{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 0.0003662109375,
  "eval_steps": 500,
  "global_step": 60000,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0,
      "model_forward_time": 1.068114995956421,
      "step": 0
    },
    {
      "epoch": 0,
      "step": 0,
      "training_step_time": 2.589256525039673
    },
    {
      "epoch": 6.103515625e-09,
      "model_forward_time": 0.11818861961364746,
      "step": 1
    },
    {
      "epoch": 6.103515625e-09,
      "step": 1,
      "training_step_time": 0.6016848087310791
    },
    {
      "epoch": 1.220703125e-08,
      "model_forward_time": 0.1183462142944336,
      "step": 2
    },
    {
      "epoch": 1.220703125e-08,
      "step": 2,
      "training_step_time": 0.623734712600708
    },
    {
      "epoch": 1.8310546875e-08,
      "model_forward_time": 0.11508297920227051,
      "step": 3
    },
    {
      "epoch": 1.8310546875e-08,
      "step": 3,
      "training_step_time": 0.6447749137878418
    },
    {
      "epoch": 2.44140625e-08,
      "model_forward_time": 0.12184286117553711,
      "step": 4
    },
    {
      "epoch": 2.44140625e-08,
      "step": 4,
      "training_step_time": 0.6861662864685059
    },
    {
      "epoch": 3.0517578125e-08,
      "model_forward_time": 0.11852264404296875,
      "step": 5
    },
    {
      "epoch": 3.0517578125e-08,
      "step": 5,
      "training_step_time": 0.6042606830596924
    },
    {
      "epoch": 3.662109375e-08,
      "model_forward_time": 0.12703275680541992,
      "step": 6
    },
    {
      "epoch": 3.662109375e-08,
      "step": 6,
      "training_step_time": 0.6070358753204346
    },
    {
      "epoch": 4.2724609375e-08,
      "model_forward_time": 0.1186676025390625,
      "step": 7
    },
    {
      "epoch": 4.2724609375e-08,
      "step": 7,
      "training_step_time": 0.6701970100402832
    },
    {
      "epoch": 4.8828125e-08,
      "model_forward_time": 0.1185917854309082,
      "step": 8
    },
    {
      "epoch": 4.8828125e-08,
      "step": 8,
      "training_step_time": 0.7149603366851807
    },
    {
      "epoch": 5.4931640625e-08,
      "model_forward_time": 0.11905789375305176,
      "step": 9
    },
    {
      "epoch": 5.4931640625e-08,
      "step": 9,
      "training_step_time": 0.6597833633422852
    },
    {
      "epoch": 6.103515625e-08,
      "grad_norm": 3.20660400390625,
      "learning_rate": 3.3333333333333335e-07,
      "loss": 1.1415,
      "step": 10
    },
    {
      "epoch": 6.103515625e-08,
      "model_forward_time": 0.11483073234558105,
      "step": 10
    },
    {
      "epoch": 6.103515625e-08,
      "step": 10,
      "training_step_time": 0.4154784679412842
    },
    {
      "epoch": 6.7138671875e-08,
      "model_forward_time": 0.11591076850891113,
      "step": 11
    },
    {
      "epoch": 6.7138671875e-08,
      "step": 11,
      "training_step_time": 0.4010653495788574
    },
    {
      "epoch": 7.32421875e-08,
      "model_forward_time": 0.1168971061706543,
      "step": 12
    },
    {
      "epoch": 7.32421875e-08,
      "step": 12,
      "training_step_time": 0.42385172843933105
    },
    {
      "epoch": 7.9345703125e-08,
      "model_forward_time": 0.11537981033325195,
      "step": 13
    },
    {
      "epoch": 7.9345703125e-08,
      "step": 13,
      "training_step_time": 0.3866758346557617
    },
    {
      "epoch": 8.544921875e-08,
      "model_forward_time": 0.11792111396789551,
      "step": 14
    },
    {
      "epoch": 8.544921875e-08,
      "step": 14,
      "training_step_time": 0.37596726417541504
    },
    {
      "epoch": 9.1552734375e-08,
      "model_forward_time": 0.11576986312866211,
      "step": 15
    },
    {
      "epoch": 9.1552734375e-08,
      "step": 15,
      "training_step_time": 0.37999916076660156
    },
    {
      "epoch": 9.765625e-08,
      "model_forward_time": 0.1158742904663086,
      "step": 16
    },
    {
      "epoch": 9.765625e-08,
      "step": 16,
      "training_step_time": 0.38437342643737793
    },
    {
      "epoch": 1.03759765625e-07,
      "model_forward_time": 0.11525082588195801,
      "step": 17
    },
    {
      "epoch": 1.03759765625e-07,
      "step": 17,
      "training_step_time": 0.3863189220428467
    },
    {
      "epoch": 1.0986328125e-07,
      "model_forward_time": 0.11482095718383789,
      "step": 18
    },
    {
      "epoch": 1.0986328125e-07,
      "step": 18,
      "training_step_time": 0.37443113327026367
    },
    {
      "epoch": 1.15966796875e-07,
      "model_forward_time": 0.11534333229064941,
      "step": 19
    },
    {
      "epoch": 1.15966796875e-07,
      "step": 19,
      "training_step_time": 0.3900470733642578
    },
    {
      "epoch": 1.220703125e-07,
      "grad_norm": 2.7341535091400146,
      "learning_rate": 6.666666666666667e-07,
      "loss": 1.1403,
      "step": 20
    },
    {
      "epoch": 1.220703125e-07,
      "model_forward_time": 0.11592388153076172,
      "step": 20
    },
    {
      "epoch": 1.220703125e-07,
      "step": 20,
      "training_step_time": 0.38483095169067383
    },
    {
      "epoch": 1.28173828125e-07,
      "model_forward_time": 0.11508989334106445,
      "step": 21
    },
    {
      "epoch": 1.28173828125e-07,
      "step": 21,
      "training_step_time": 0.4196774959564209
    },
    {
      "epoch": 1.3427734375e-07,
      "model_forward_time": 0.11707210540771484,
      "step": 22
    },
    {
      "epoch": 1.3427734375e-07,
      "step": 22,
      "training_step_time": 0.3898200988769531
    },
    {
      "epoch": 1.40380859375e-07,
      "model_forward_time": 0.11579680442810059,
      "step": 23
    },
    {
      "epoch": 1.40380859375e-07,
      "step": 23,
      "training_step_time": 0.3943495750427246
    },
    {
      "epoch": 1.46484375e-07,
      "model_forward_time": 0.11540102958679199,
      "step": 24
    },
    {
      "epoch": 1.46484375e-07,
      "step": 24,
      "training_step_time": 0.39543747901916504
    },
    {
      "epoch": 1.52587890625e-07,
      "model_forward_time": 0.11506319046020508,
      "step": 25
    },
    {
      "epoch": 1.52587890625e-07,
      "step": 25,
      "training_step_time": 0.36852145195007324
    },
    {
      "epoch": 1.5869140625e-07,
      "model_forward_time": 0.11621689796447754,
      "step": 26
    },
    {
      "epoch": 1.5869140625e-07,
      "step": 26,
      "training_step_time": 0.5130612850189209
    },
    {
      "epoch": 1.64794921875e-07,
      "model_forward_time": 0.11519479751586914,
      "step": 27
    },
    {
      "epoch": 1.64794921875e-07,
      "step": 27,
      "training_step_time": 0.4082813262939453
    },
    {
      "epoch": 1.708984375e-07,
      "model_forward_time": 0.11501049995422363,
      "step": 28
    },
    {
      "epoch": 1.708984375e-07,
      "step": 28,
      "training_step_time": 0.4623394012451172
    },
    {
      "epoch": 1.77001953125e-07,
      "model_forward_time": 0.1153416633605957,
      "step": 29
    },
    {
      "epoch": 1.77001953125e-07,
      "step": 29,
      "training_step_time": 0.4514892101287842
    },
    {
      "epoch": 1.8310546875e-07,
      "grad_norm": 1.3699910640716553,
      "learning_rate": 1.0000000000000002e-06,
      "loss": 1.1147,
      "step": 30
    },
    {
      "epoch": 1.8310546875e-07,
      "model_forward_time": 0.11532425880432129,
      "step": 30
    },
    {
      "epoch": 1.8310546875e-07,
      "step": 30,
      "training_step_time": 0.3906402587890625
    },
    {
      "epoch": 1.89208984375e-07,
      "model_forward_time": 0.11443281173706055,
      "step": 31
    },
    {
      "epoch": 1.89208984375e-07,
      "step": 31,
      "training_step_time": 0.37802577018737793
    },
    {
      "epoch": 1.953125e-07,
      "model_forward_time": 0.11498308181762695,
      "step": 32
    },
    {
      "epoch": 1.953125e-07,
      "step": 32,
      "training_step_time": 0.3959472179412842
    },
    {
      "epoch": 2.01416015625e-07,
      "model_forward_time": 0.11619114875793457,
      "step": 33
    },
    {
      "epoch": 2.01416015625e-07,
      "step": 33,
      "training_step_time": 0.38831543922424316
    },
    {
      "epoch": 2.0751953125e-07,
      "model_forward_time": 0.1152961254119873,
      "step": 34
    },
    {
      "epoch": 2.0751953125e-07,
      "step": 34,
      "training_step_time": 0.4221816062927246
    },
    {
      "epoch": 2.13623046875e-07,
      "model_forward_time": 0.11505675315856934,
      "step": 35
    },
    {
      "epoch": 2.13623046875e-07,
      "step": 35,
      "training_step_time": 0.392470121383667
    },
    {
      "epoch": 2.197265625e-07,
      "model_forward_time": 0.11516642570495605,
      "step": 36
    },
    {
      "epoch": 2.197265625e-07,
      "step": 36,
      "training_step_time": 0.37971949577331543
    },
    {
      "epoch": 2.25830078125e-07,
      "model_forward_time": 0.1155698299407959,
      "step": 37
    },
    {
      "epoch": 2.25830078125e-07,
      "step": 37,
      "training_step_time": 0.3932678699493408
    },
    {
      "epoch": 2.3193359375e-07,
      "model_forward_time": 0.11491966247558594,
      "step": 38
    },
    {
      "epoch": 2.3193359375e-07,
      "step": 38,
      "training_step_time": 0.39644932746887207
    },
    {
      "epoch": 2.38037109375e-07,
      "model_forward_time": 0.11670994758605957,
      "step": 39
    },
    {
      "epoch": 2.38037109375e-07,
      "step": 39,
      "training_step_time": 0.4466543197631836
    },
    {
      "epoch": 2.44140625e-07,
      "grad_norm": 2.245981216430664,
      "learning_rate": 1.3333333333333334e-06,
      "loss": 1.1064,
      "step": 40
    },
    {
      "epoch": 2.44140625e-07,
      "model_forward_time": 0.11648201942443848,
      "step": 40
    },
    {
      "epoch": 2.44140625e-07,
      "step": 40,
      "training_step_time": 0.45275115966796875
    },
    {
      "epoch": 2.50244140625e-07,
      "model_forward_time": 0.11647915840148926,
      "step": 41
    },
    {
      "epoch": 2.50244140625e-07,
      "step": 41,
      "training_step_time": 0.5123140811920166
    },
    {
      "epoch": 2.5634765625e-07,
      "model_forward_time": 0.11549043655395508,
      "step": 42
    },
    {
      "epoch": 2.5634765625e-07,
      "step": 42,
      "training_step_time": 0.4326655864715576
    },
    {
      "epoch": 2.62451171875e-07,
      "model_forward_time": 0.11516141891479492,
      "step": 43
    },
    {
      "epoch": 2.62451171875e-07,
      "step": 43,
      "training_step_time": 0.43152308464050293
    },
    {
      "epoch": 2.685546875e-07,
      "model_forward_time": 0.11457085609436035,
      "step": 44
    },
    {
      "epoch": 2.685546875e-07,
      "step": 44,
      "training_step_time": 0.41228318214416504
    },
    {
      "epoch": 2.74658203125e-07,
      "model_forward_time": 0.11463332176208496,
      "step": 45
    },
    {
      "epoch": 2.74658203125e-07,
      "step": 45,
      "training_step_time": 0.3999054431915283
    },
    {
      "epoch": 2.8076171875e-07,
      "model_forward_time": 0.11532402038574219,
      "step": 46
    },
    {
      "epoch": 2.8076171875e-07,
      "step": 46,
      "training_step_time": 0.38355445861816406
    },
    {
      "epoch": 2.86865234375e-07,
      "model_forward_time": 0.11488199234008789,
      "step": 47
    },
    {
      "epoch": 2.86865234375e-07,
      "step": 47,
      "training_step_time": 0.42302799224853516
    },
    {
      "epoch": 2.9296875e-07,
      "model_forward_time": 0.11518740653991699,
      "step": 48
    },
    {
      "epoch": 2.9296875e-07,
      "step": 48,
      "training_step_time": 0.4027693271636963
    },
    {
      "epoch": 2.99072265625e-07,
      "model_forward_time": 0.11568665504455566,
      "step": 49
    },
    {
      "epoch": 2.99072265625e-07,
      "step": 49,
      "training_step_time": 0.38857388496398926
    },
    {
      "epoch": 3.0517578125e-07,
      "grad_norm": 1.864699125289917,
      "learning_rate": 1.6666666666666667e-06,
      "loss": 1.1028,
      "step": 50
    },
    {
      "epoch": 3.0517578125e-07,
      "model_forward_time": 0.11533093452453613,
      "step": 50
    },
    {
      "epoch": 3.0517578125e-07,
      "step": 50,
      "training_step_time": 0.39000535011291504
    },
    {
      "epoch": 3.11279296875e-07,
      "model_forward_time": 0.11539459228515625,
      "step": 51
    },
    {
      "epoch": 3.11279296875e-07,
      "step": 51,
      "training_step_time": 0.384906530380249
    },
    {
      "epoch": 3.173828125e-07,
      "model_forward_time": 0.11522030830383301,
      "step": 52
    },
    {
      "epoch": 3.173828125e-07,
      "step": 52,
      "training_step_time": 0.3911309242248535
    },
    {
      "epoch": 3.23486328125e-07,
      "model_forward_time": 0.1147012710571289,
      "step": 53
    },
    {
      "epoch": 3.23486328125e-07,
      "step": 53,
      "training_step_time": 0.39182257652282715
    },
    {
      "epoch": 3.2958984375e-07,
      "model_forward_time": 0.11491584777832031,
      "step": 54
    },
    {
      "epoch": 3.2958984375e-07,
      "step": 54,
      "training_step_time": 0.37348365783691406
    },
    {
      "epoch": 3.35693359375e-07,
      "model_forward_time": 0.11493754386901855,
      "step": 55
    },
    {
      "epoch": 3.35693359375e-07,
      "step": 55,
      "training_step_time": 0.44512248039245605
    },
    {
      "epoch": 3.41796875e-07,
      "model_forward_time": 0.11546707153320312,
      "step": 56
    },
    {
      "epoch": 3.41796875e-07,
      "step": 56,
      "training_step_time": 0.4514331817626953
    },
    {
      "epoch": 3.47900390625e-07,
      "model_forward_time": 0.11531686782836914,
      "step": 57
    },
    {
      "epoch": 3.47900390625e-07,
      "step": 57,
      "training_step_time": 0.4207191467285156
    },
    {
      "epoch": 3.5400390625e-07,
      "model_forward_time": 0.1156759262084961,
      "step": 58
    },
    {
      "epoch": 3.5400390625e-07,
      "step": 58,
      "training_step_time": 0.44870424270629883
    },
    {
      "epoch": 3.60107421875e-07,
      "model_forward_time": 0.11504125595092773,
      "step": 59
    },
    {
      "epoch": 3.60107421875e-07,
      "step": 59,
      "training_step_time": 0.3938467502593994
    },
    {
      "epoch": 3.662109375e-07,
      "grad_norm": 1.1708608865737915,
      "learning_rate": 2.0000000000000003e-06,
      "loss": 1.1034,
      "step": 60
    },
    {
      "epoch": 3.662109375e-07,
      "model_forward_time": 0.11585211753845215,
      "step": 60
    },
    {
      "epoch": 3.662109375e-07,
      "step": 60,
      "training_step_time": 0.3741302490234375
    },
    {
      "epoch": 3.72314453125e-07,
      "model_forward_time": 0.11566853523254395,
      "step": 61
    },
    {
      "epoch": 3.72314453125e-07,
      "step": 61,
      "training_step_time": 0.3932523727416992
    },
    {
      "epoch": 3.7841796875e-07,
      "model_forward_time": 0.11485910415649414,
      "step": 62
    },
    {
      "epoch": 3.7841796875e-07,
      "step": 62,
      "training_step_time": 0.4378373622894287
    },
    {
      "epoch": 3.84521484375e-07,
      "model_forward_time": 0.11550307273864746,
      "step": 63
    },
    {
      "epoch": 3.84521484375e-07,
      "step": 63,
      "training_step_time": 0.4017314910888672
    },
    {
      "epoch": 3.90625e-07,
      "model_forward_time": 0.11620187759399414,
      "step": 64
    },
    {
      "epoch": 3.90625e-07,
      "step": 64,
      "training_step_time": 0.3781619071960449
    },
    {
      "epoch": 3.96728515625e-07,
      "model_forward_time": 0.11490416526794434,
      "step": 65
    },
    {
      "epoch": 3.96728515625e-07,
      "step": 65,
      "training_step_time": 0.3902404308319092
    },
    {
      "epoch": 4.0283203125e-07,
      "model_forward_time": 0.11493992805480957,
      "step": 66
    },
    {
      "epoch": 4.0283203125e-07,
      "step": 66,
      "training_step_time": 0.5718536376953125
    },
    {
      "epoch": 4.08935546875e-07,
      "model_forward_time": 0.11537694931030273,
      "step": 67
    },
    {
      "epoch": 4.08935546875e-07,
      "step": 67,
      "training_step_time": 0.3937966823577881
    },
    {
      "epoch": 4.150390625e-07,
      "model_forward_time": 0.13338971138000488,
      "step": 68
    },
    {
      "epoch": 4.150390625e-07,
      "step": 68,
      "training_step_time": 0.40157079696655273
    },
    {
      "epoch": 4.21142578125e-07,
      "model_forward_time": 0.11652278900146484,
      "step": 69
    },
    {
      "epoch": 4.21142578125e-07,
      "step": 69,
      "training_step_time": 0.37198972702026367
    },
    {
      "epoch": 4.2724609375e-07,
      "grad_norm": 0.9959396123886108,
      "learning_rate": 2.3333333333333336e-06,
      "loss": 1.0827,
      "step": 70
    },
    {
      "epoch": 4.2724609375e-07,
      "model_forward_time": 0.11605143547058105,
      "step": 70
    },
    {
      "epoch": 4.2724609375e-07,
      "step": 70,
      "training_step_time": 0.46333885192871094
    },
    {
      "epoch": 4.33349609375e-07,
      "model_forward_time": 0.11564302444458008,
      "step": 71
    },
    {
      "epoch": 4.33349609375e-07,
      "step": 71,
      "training_step_time": 0.46513843536376953
    },
    {
      "epoch": 4.39453125e-07,
      "model_forward_time": 0.11678409576416016,
      "step": 72
    },
    {
      "epoch": 4.39453125e-07,
      "step": 72,
      "training_step_time": 0.4105799198150635
    },
    {
      "epoch": 4.45556640625e-07,
      "model_forward_time": 0.11667442321777344,
      "step": 73
    },
    {
      "epoch": 4.45556640625e-07,
      "step": 73,
      "training_step_time": 0.4111361503601074
    },
    {
      "epoch": 4.5166015625e-07,
      "model_forward_time": 0.11957383155822754,
      "step": 74
    },
    {
      "epoch": 4.5166015625e-07,
      "step": 74,
      "training_step_time": 0.38088154792785645
    },
    {
      "epoch": 4.57763671875e-07,
      "model_forward_time": 0.11765217781066895,
      "step": 75
    },
    {
      "epoch": 4.57763671875e-07,
      "step": 75,
      "training_step_time": 0.40162134170532227
    },
    {
      "epoch": 4.638671875e-07,
      "model_forward_time": 0.12216591835021973,
      "step": 76
    },
    {
      "epoch": 4.638671875e-07,
      "step": 76,
      "training_step_time": 0.4454476833343506
    },
    {
      "epoch": 4.69970703125e-07,
      "model_forward_time": 0.11712646484375,
      "step": 77
    },
    {
      "epoch": 4.69970703125e-07,
      "step": 77,
      "training_step_time": 0.3794717788696289
    },
    {
      "epoch": 4.7607421875e-07,
      "model_forward_time": 0.11604928970336914,
      "step": 78
    },
    {
      "epoch": 4.7607421875e-07,
      "step": 78,
      "training_step_time": 0.47694873809814453
    },
    {
      "epoch": 4.82177734375e-07,
      "model_forward_time": 0.11615514755249023,
      "step": 79
    },
    {
      "epoch": 4.82177734375e-07,
      "step": 79,
      "training_step_time": 0.3859405517578125
    },
    {
      "epoch": 4.8828125e-07,
      "grad_norm": 0.9222889542579651,
      "learning_rate": 2.666666666666667e-06,
      "loss": 1.101,
      "step": 80
    },
    {
      "epoch": 4.8828125e-07,
      "model_forward_time": 0.11620926856994629,
      "step": 80
    },
    {
      "epoch": 4.8828125e-07,
      "step": 80,
      "training_step_time": 0.39353346824645996
    },
    {
      "epoch": 4.94384765625e-07,
      "model_forward_time": 0.11679577827453613,
      "step": 81
    },
    {
      "epoch": 4.94384765625e-07,
      "step": 81,
      "training_step_time": 0.3935065269470215
    },
    {
      "epoch": 5.0048828125e-07,
      "model_forward_time": 0.1158757209777832,
      "step": 82
    },
    {
      "epoch": 5.0048828125e-07,
      "step": 82,
      "training_step_time": 0.7667293548583984
    },
    {
      "epoch": 5.06591796875e-07,
      "model_forward_time": 0.1166234016418457,
      "step": 83
    },
    {
      "epoch": 5.06591796875e-07,
      "step": 83,
      "training_step_time": 0.3930344581604004
    },
    {
      "epoch": 5.126953125e-07,
      "model_forward_time": 0.11566543579101562,
      "step": 84
    },
    {
      "epoch": 5.126953125e-07,
      "step": 84,
      "training_step_time": 0.44940900802612305
    },
    {
      "epoch": 5.18798828125e-07,
      "model_forward_time": 0.11564779281616211,
      "step": 85
    },
    {
      "epoch": 5.18798828125e-07,
      "step": 85,
      "training_step_time": 0.43939638137817383
    },
    {
      "epoch": 5.2490234375e-07,
      "model_forward_time": 0.11632061004638672,
      "step": 86
    },
    {
      "epoch": 5.2490234375e-07,
      "step": 86,
      "training_step_time": 0.41818690299987793
    },
    {
      "epoch": 5.31005859375e-07,
      "model_forward_time": 0.11898040771484375,
      "step": 87
    },
    {
      "epoch": 5.31005859375e-07,
      "step": 87,
      "training_step_time": 0.3908510208129883
    },
    {
      "epoch": 5.37109375e-07,
      "model_forward_time": 0.1161806583404541,
      "step": 88
    },
    {
      "epoch": 5.37109375e-07,
      "step": 88,
      "training_step_time": 0.4997539520263672
    },
    {
      "epoch": 5.43212890625e-07,
      "model_forward_time": 0.11960649490356445,
      "step": 89
    },
    {
      "epoch": 5.43212890625e-07,
      "step": 89,
      "training_step_time": 0.37975454330444336
    },
    {
      "epoch": 5.4931640625e-07,
      "grad_norm": 0.9185659885406494,
      "learning_rate": 3e-06,
      "loss": 1.0985,
      "step": 90
    },
    {
      "epoch": 5.4931640625e-07,
      "model_forward_time": 0.11632680892944336,
      "step": 90
    },
    {
      "epoch": 5.4931640625e-07,
      "step": 90,
      "training_step_time": 0.3774988651275635
    },
    {
      "epoch": 5.55419921875e-07,
      "model_forward_time": 0.11469078063964844,
      "step": 91
    },
    {
      "epoch": 5.55419921875e-07,
      "step": 91,
      "training_step_time": 0.38294219970703125
    },
    {
      "epoch": 5.615234375e-07,
      "model_forward_time": 0.11697244644165039,
      "step": 92
    },
    {
      "epoch": 5.615234375e-07,
      "step": 92,
      "training_step_time": 0.3933894634246826
    },
    {
      "epoch": 5.67626953125e-07,
      "model_forward_time": 0.11675834655761719,
      "step": 93
    },
    {
      "epoch": 5.67626953125e-07,
      "step": 93,
      "training_step_time": 0.39702534675598145
    },
    {
      "epoch": 5.7373046875e-07,
      "model_forward_time": 0.11692357063293457,
      "step": 94
    },
    {
      "epoch": 5.7373046875e-07,
      "step": 94,
      "training_step_time": 0.7299199104309082
    },
    {
      "epoch": 5.79833984375e-07,
      "model_forward_time": 0.11531281471252441,
      "step": 95
    },
    {
      "epoch": 5.79833984375e-07,
      "step": 95,
      "training_step_time": 0.3751537799835205
    },
    {
      "epoch": 5.859375e-07,
      "model_forward_time": 0.11531281471252441,
      "step": 96
    },
    {
      "epoch": 5.859375e-07,
      "step": 96,
      "training_step_time": 0.43257570266723633
    },
    {
      "epoch": 5.92041015625e-07,
      "model_forward_time": 0.11770200729370117,
      "step": 97
    },
    {
      "epoch": 5.92041015625e-07,
      "step": 97,
      "training_step_time": 0.4919912815093994
    },
    {
      "epoch": 5.9814453125e-07,
      "model_forward_time": 0.11592936515808105,
      "step": 98
    },
    {
      "epoch": 5.9814453125e-07,
      "step": 98,
      "training_step_time": 0.4533522129058838
    },
    {
      "epoch": 6.04248046875e-07,
      "model_forward_time": 0.11517858505249023,
      "step": 99
    },
    {
      "epoch": 6.04248046875e-07,
      "step": 99,
      "training_step_time": 0.4861295223236084
    },
    {
      "epoch": 6.103515625e-07,
      "grad_norm": 0.8675374984741211,
      "learning_rate": 3.3333333333333333e-06,
      "loss": 1.0784,
      "step": 100
    },
    {
      "epoch": 6.103515625e-07,
      "model_forward_time": 0.11546468734741211,
      "step": 100
    },
    {
      "epoch": 6.103515625e-07,
      "step": 100,
      "training_step_time": 0.41651415824890137
    },
    {
      "epoch": 6.16455078125e-07,
      "model_forward_time": 0.12558674812316895,
      "step": 101
    },
    {
      "epoch": 6.16455078125e-07,
      "step": 101,
      "training_step_time": 0.4089362621307373
    },
    {
      "epoch": 6.2255859375e-07,
      "model_forward_time": 0.1152200698852539,
      "step": 102
    },
    {
      "epoch": 6.2255859375e-07,
      "step": 102,
      "training_step_time": 0.37114930152893066
    },
    {
      "epoch": 6.28662109375e-07,
      "model_forward_time": 0.11505866050720215,
      "step": 103
    },
    {
      "epoch": 6.28662109375e-07,
      "step": 103,
      "training_step_time": 0.38558053970336914
    },
    {
      "epoch": 6.34765625e-07,
      "model_forward_time": 0.11448240280151367,
      "step": 104
    },
    {
      "epoch": 6.34765625e-07,
      "step": 104,
      "training_step_time": 0.3960094451904297
    },
    {
      "epoch": 6.40869140625e-07,
      "model_forward_time": 0.11595582962036133,
      "step": 105
    },
    {
      "epoch": 6.40869140625e-07,
      "step": 105,
      "training_step_time": 0.39528322219848633
    },
    {
      "epoch": 6.4697265625e-07,
      "model_forward_time": 0.11477375030517578,
      "step": 106
    },
    {
      "epoch": 6.4697265625e-07,
      "step": 106,
      "training_step_time": 0.6875863075256348
    },
    {
      "epoch": 6.53076171875e-07,
      "model_forward_time": 0.11549687385559082,
      "step": 107
    },
    {
      "epoch": 6.53076171875e-07,
      "step": 107,
      "training_step_time": 0.40080904960632324
    },
    {
      "epoch": 6.591796875e-07,
      "model_forward_time": 0.11452841758728027,
      "step": 108
    },
    {
      "epoch": 6.591796875e-07,
      "step": 108,
      "training_step_time": 0.37557125091552734
    },
    {
      "epoch": 6.65283203125e-07,
      "model_forward_time": 0.11435246467590332,
      "step": 109
    },
    {
      "epoch": 6.65283203125e-07,
      "step": 109,
      "training_step_time": 0.38553404808044434
    },
    {
      "epoch": 6.7138671875e-07,
      "grad_norm": 0.9986009001731873,
      "learning_rate": 3.666666666666667e-06,
      "loss": 1.097,
      "step": 110
    },
    {
      "epoch": 6.7138671875e-07,
      "model_forward_time": 0.11495828628540039,
      "step": 110
    },
    {
      "epoch": 6.7138671875e-07,
      "step": 110,
      "training_step_time": 0.40414929389953613
    },
    {
      "epoch": 6.77490234375e-07,
      "model_forward_time": 0.11605715751647949,
      "step": 111
    },
    {
      "epoch": 6.77490234375e-07,
      "step": 111,
      "training_step_time": 0.47589612007141113
    },
    {
      "epoch": 6.8359375e-07,
      "model_forward_time": 0.11532139778137207,
      "step": 112
    },
    {
      "epoch": 6.8359375e-07,
      "step": 112,
      "training_step_time": 0.48401641845703125
    },
    {
      "epoch": 6.89697265625e-07,
      "model_forward_time": 0.11460566520690918,
      "step": 113
    },
    {
      "epoch": 6.89697265625e-07,
      "step": 113,
      "training_step_time": 0.46838951110839844
    },
    {
      "epoch": 6.9580078125e-07,
      "model_forward_time": 0.11559104919433594,
      "step": 114
    },
    {
      "epoch": 6.9580078125e-07,
      "step": 114,
      "training_step_time": 0.4173548221588135
    },
    {
      "epoch": 7.01904296875e-07,
      "model_forward_time": 0.11461186408996582,
      "step": 115
    },
    {
      "epoch": 7.01904296875e-07,
      "step": 115,
      "training_step_time": 0.3700261116027832
    },
    {
      "epoch": 7.080078125e-07,
      "model_forward_time": 0.11505532264709473,
      "step": 116
    },
    {
      "epoch": 7.080078125e-07,
      "step": 116,
      "training_step_time": 0.38633275032043457
    },
    {
      "epoch": 7.14111328125e-07,
      "model_forward_time": 0.11482644081115723,
      "step": 117
    },
    {
      "epoch": 7.14111328125e-07,
      "step": 117,
      "training_step_time": 0.3838527202606201
    },
    {
      "epoch": 7.2021484375e-07,
      "model_forward_time": 0.11556458473205566,
      "step": 118
    },
    {
      "epoch": 7.2021484375e-07,
      "step": 118,
      "training_step_time": 0.5511746406555176
    },
    {
      "epoch": 7.26318359375e-07,
      "model_forward_time": 0.11603236198425293,
      "step": 119
    },
    {
      "epoch": 7.26318359375e-07,
      "step": 119,
      "training_step_time": 0.39818620681762695
    },
    {
      "epoch": 7.32421875e-07,
      "grad_norm": 0.6874130964279175,
      "learning_rate": 4.000000000000001e-06,
      "loss": 1.0989,
      "step": 120
    },
    {
      "epoch": 7.32421875e-07,
      "model_forward_time": 0.11471819877624512,
      "step": 120
    },
    {
      "epoch": 7.32421875e-07,
      "step": 120,
      "training_step_time": 0.37912678718566895
    },
    {
      "epoch": 7.38525390625e-07,
      "model_forward_time": 0.11503839492797852,
      "step": 121
    },
    {
      "epoch": 7.38525390625e-07,
      "step": 121,
      "training_step_time": 0.3943634033203125
    },
    {
      "epoch": 7.4462890625e-07,
      "model_forward_time": 0.11544203758239746,
      "step": 122
    },
    {
      "epoch": 7.4462890625e-07,
      "step": 122,
      "training_step_time": 0.4057338237762451
    },
    {
      "epoch": 7.50732421875e-07,
      "model_forward_time": 0.11469459533691406,
      "step": 123
    },
    {
      "epoch": 7.50732421875e-07,
      "step": 123,
      "training_step_time": 0.40770912170410156
    },
    {
      "epoch": 7.568359375e-07,
      "model_forward_time": 0.11475181579589844,
      "step": 124
    },
    {
      "epoch": 7.568359375e-07,
      "step": 124,
      "training_step_time": 0.8845586776733398
    },
    {
      "epoch": 7.62939453125e-07,
      "model_forward_time": 0.1157224178314209,
      "step": 125
    },
    {
      "epoch": 7.62939453125e-07,
      "step": 125,
      "training_step_time": 0.4198455810546875
    },
    {
      "epoch": 7.6904296875e-07,
      "model_forward_time": 0.11398935317993164,
      "step": 126
    },
    {
      "epoch": 7.6904296875e-07,
      "step": 126,
      "training_step_time": 0.4576399326324463
    },
    {
      "epoch": 7.75146484375e-07,
      "model_forward_time": 0.11431336402893066,
      "step": 127
    },
    {
      "epoch": 7.75146484375e-07,
      "step": 127,
      "training_step_time": 0.4242744445800781
    },
    {
      "epoch": 7.8125e-07,
      "model_forward_time": 0.11383605003356934,
      "step": 128
    },
    {
      "epoch": 7.8125e-07,
      "step": 128,
      "training_step_time": 0.38214850425720215
    },
    {
      "epoch": 7.87353515625e-07,
      "model_forward_time": 0.1144101619720459,
      "step": 129
    },
    {
      "epoch": 7.87353515625e-07,
      "step": 129,
      "training_step_time": 0.3962855339050293
    },
    {
      "epoch": 7.9345703125e-07,
      "grad_norm": 0.684953510761261,
      "learning_rate": 4.333333333333334e-06,
      "loss": 1.0886,
      "step": 130
    },
    {
      "epoch": 7.9345703125e-07,
      "model_forward_time": 0.1148521900177002,
      "step": 130
    },
    {
      "epoch": 7.9345703125e-07,
      "step": 130,
      "training_step_time": 0.45325279235839844
    },
    {
      "epoch": 7.99560546875e-07,
      "model_forward_time": 0.11472845077514648,
      "step": 131
    },
    {
      "epoch": 7.99560546875e-07,
      "step": 131,
      "training_step_time": 0.3886139392852783
    },
    {
      "epoch": 8.056640625e-07,
      "model_forward_time": 0.11590051651000977,
      "step": 132
    },
    {
      "epoch": 8.056640625e-07,
      "step": 132,
      "training_step_time": 0.3731698989868164
    },
    {
      "epoch": 8.11767578125e-07,
      "model_forward_time": 0.11515355110168457,
      "step": 133
    },
    {
      "epoch": 8.11767578125e-07,
      "step": 133,
      "training_step_time": 0.39522218704223633
    },
    {
      "epoch": 8.1787109375e-07,
      "model_forward_time": 0.11657094955444336,
      "step": 134
    },
    {
      "epoch": 8.1787109375e-07,
      "step": 134,
      "training_step_time": 0.39534425735473633
    },
    {
      "epoch": 8.23974609375e-07,
      "model_forward_time": 0.11681270599365234,
      "step": 135
    },
    {
      "epoch": 8.23974609375e-07,
      "step": 135,
      "training_step_time": 0.40212297439575195
    },
    {
      "epoch": 8.30078125e-07,
      "model_forward_time": 0.1158607006072998,
      "step": 136
    },
    {
      "epoch": 8.30078125e-07,
      "step": 136,
      "training_step_time": 0.8470561504364014
    },
    {
      "epoch": 8.36181640625e-07,
      "model_forward_time": 0.11458015441894531,
      "step": 137
    },
    {
      "epoch": 8.36181640625e-07,
      "step": 137,
      "training_step_time": 0.38942909240722656
    },
    {
      "epoch": 8.4228515625e-07,
      "model_forward_time": 0.11476469039916992,
      "step": 138
    },
    {
      "epoch": 8.4228515625e-07,
      "step": 138,
      "training_step_time": 0.36739611625671387
    },
    {
      "epoch": 8.48388671875e-07,
      "model_forward_time": 0.11401867866516113,
      "step": 139
    },
    {
      "epoch": 8.48388671875e-07,
      "step": 139,
      "training_step_time": 0.4375300407409668
    },
    {
      "epoch": 8.544921875e-07,
      "grad_norm": 0.9070088863372803,
      "learning_rate": 4.666666666666667e-06,
      "loss": 1.0826,
      "step": 140
    },
    {
      "epoch": 8.544921875e-07,
      "model_forward_time": 0.11393165588378906,
      "step": 140
    },
    {
      "epoch": 8.544921875e-07,
      "step": 140,
      "training_step_time": 0.45546555519104004
    },
    {
      "epoch": 8.60595703125e-07,
      "model_forward_time": 0.11522674560546875,
      "step": 141
    },
    {
      "epoch": 8.60595703125e-07,
      "step": 141,
      "training_step_time": 0.4061553478240967
    },
    {
      "epoch": 8.6669921875e-07,
      "model_forward_time": 0.11503744125366211,
      "step": 142
    },
    {
      "epoch": 8.6669921875e-07,
      "step": 142,
      "training_step_time": 0.577354907989502
    },
    {
      "epoch": 8.72802734375e-07,
      "model_forward_time": 0.11489534378051758,
      "step": 143
    },
    {
      "epoch": 8.72802734375e-07,
      "step": 143,
      "training_step_time": 0.38277602195739746
    },
    {
      "epoch": 8.7890625e-07,
      "model_forward_time": 0.11599564552307129,
      "step": 144
    },
    {
      "epoch": 8.7890625e-07,
      "step": 144,
      "training_step_time": 0.3741450309753418
    },
    {
      "epoch": 8.85009765625e-07,
      "model_forward_time": 0.11546158790588379,
      "step": 145
    },
    {
      "epoch": 8.85009765625e-07,
      "step": 145,
      "training_step_time": 0.39592862129211426
    },
    {
      "epoch": 8.9111328125e-07,
      "model_forward_time": 0.11505413055419922,
      "step": 146
    },
    {
      "epoch": 8.9111328125e-07,
      "step": 146,
      "training_step_time": 0.3923978805541992
    },
    {
      "epoch": 8.97216796875e-07,
      "model_forward_time": 0.1151113510131836,
      "step": 147
    },
    {
      "epoch": 8.97216796875e-07,
      "step": 147,
      "training_step_time": 0.4056394100189209
    },
    {
      "epoch": 9.033203125e-07,
      "model_forward_time": 0.11479473114013672,
      "step": 148
    },
    {
      "epoch": 9.033203125e-07,
      "step": 148,
      "training_step_time": 0.4639747142791748
    },
    {
      "epoch": 9.09423828125e-07,
      "model_forward_time": 0.11571240425109863,
      "step": 149
    },
    {
      "epoch": 9.09423828125e-07,
      "step": 149,
      "training_step_time": 0.3916938304901123
    },
    {
      "epoch": 9.1552734375e-07,
      "grad_norm": 0.9361276626586914,
      "learning_rate": 5e-06,
      "loss": 1.0675,
      "step": 150
    },
    {
      "epoch": 9.1552734375e-07,
      "model_forward_time": 0.11612796783447266,
      "step": 150
    },
    {
      "epoch": 9.1552734375e-07,
      "step": 150,
      "training_step_time": 0.37497472763061523
    },
    {
      "epoch": 9.21630859375e-07,
      "model_forward_time": 0.11450076103210449,
      "step": 151
    },
    {
      "epoch": 9.21630859375e-07,
      "step": 151,
      "training_step_time": 0.3900718688964844
    },
    {
      "epoch": 9.27734375e-07,
      "model_forward_time": 0.11519312858581543,
      "step": 152
    },
    {
      "epoch": 9.27734375e-07,
      "step": 152,
      "training_step_time": 0.4006524085998535
    },
    {
      "epoch": 9.33837890625e-07,
      "model_forward_time": 0.11603665351867676,
      "step": 153
    },
    {
      "epoch": 9.33837890625e-07,
      "step": 153,
      "training_step_time": 0.4487955570220947
    },
    {
      "epoch": 9.3994140625e-07,
      "model_forward_time": 0.11505365371704102,
      "step": 154
    },
    {
      "epoch": 9.3994140625e-07,
      "step": 154,
      "training_step_time": 0.5257163047790527
    },
    {
      "epoch": 9.46044921875e-07,
      "model_forward_time": 0.11529231071472168,
      "step": 155
    },
    {
      "epoch": 9.46044921875e-07,
      "step": 155,
      "training_step_time": 0.5101108551025391
    },
    {
      "epoch": 9.521484375e-07,
      "model_forward_time": 0.11587309837341309,
      "step": 156
    },
    {
      "epoch": 9.521484375e-07,
      "step": 156,
      "training_step_time": 0.3784027099609375
    },
    {
      "epoch": 9.58251953125e-07,
      "model_forward_time": 0.11438512802124023,
      "step": 157
    },
    {
      "epoch": 9.58251953125e-07,
      "step": 157,
      "training_step_time": 0.39696574211120605
    },
    {
      "epoch": 9.6435546875e-07,
      "model_forward_time": 0.11533093452453613,
      "step": 158
    },
    {
      "epoch": 9.6435546875e-07,
      "step": 158,
      "training_step_time": 0.3955390453338623
    },
    {
      "epoch": 9.70458984375e-07,
      "model_forward_time": 0.11495327949523926,
      "step": 159
    },
    {
      "epoch": 9.70458984375e-07,
      "step": 159,
      "training_step_time": 0.40442419052124023
    },
    {
      "epoch": 9.765625e-07,
      "grad_norm": 0.9113616347312927,
      "learning_rate": 5.333333333333334e-06,
      "loss": 1.0735,
      "step": 160
    },
    {
      "epoch": 9.765625e-07,
      "model_forward_time": 0.11531543731689453,
      "step": 160
    },
    {
      "epoch": 9.765625e-07,
      "step": 160,
      "training_step_time": 0.4158656597137451
    },
    {
      "epoch": 9.82666015625e-07,
      "model_forward_time": 0.11614513397216797,
      "step": 161
    },
    {
      "epoch": 9.82666015625e-07,
      "step": 161,
      "training_step_time": 0.4047403335571289
    },
    {
      "epoch": 9.8876953125e-07,
      "model_forward_time": 0.1150507926940918,
      "step": 162
    },
    {
      "epoch": 9.8876953125e-07,
      "step": 162,
      "training_step_time": 0.38225626945495605
    },
    {
      "epoch": 9.94873046875e-07,
      "model_forward_time": 0.11783170700073242,
      "step": 163
    },
    {
      "epoch": 9.94873046875e-07,
      "step": 163,
      "training_step_time": 0.3915858268737793
    },
    {
      "epoch": 1.0009765625e-06,
      "model_forward_time": 0.11484837532043457,
      "step": 164
    },
    {
      "epoch": 1.0009765625e-06,
      "step": 164,
      "training_step_time": 0.39934802055358887
    },
    {
      "epoch": 1.007080078125e-06,
      "model_forward_time": 0.11464881896972656,
      "step": 165
    },
    {
      "epoch": 1.007080078125e-06,
      "step": 165,
      "training_step_time": 0.40863537788391113
    },
    {
      "epoch": 1.01318359375e-06,
      "model_forward_time": 0.11444234848022461,
      "step": 166
    },
    {
      "epoch": 1.01318359375e-06,
      "step": 166,
      "training_step_time": 0.460219144821167
    },
    {
      "epoch": 1.019287109375e-06,
      "model_forward_time": 0.11568903923034668,
      "step": 167
    },
    {
      "epoch": 1.019287109375e-06,
      "step": 167,
      "training_step_time": 0.3726162910461426
    },
    {
      "epoch": 1.025390625e-06,
      "model_forward_time": 0.11686825752258301,
      "step": 168
    },
    {
      "epoch": 1.025390625e-06,
      "step": 168,
      "training_step_time": 0.47736644744873047
    },
    {
      "epoch": 1.031494140625e-06,
      "model_forward_time": 0.11485862731933594,
      "step": 169
    },
    {
      "epoch": 1.031494140625e-06,
      "step": 169,
      "training_step_time": 0.4991581439971924
    },
    {
      "epoch": 1.03759765625e-06,
      "grad_norm": 1.3245502710342407,
      "learning_rate": 5.666666666666667e-06,
      "loss": 1.0551,
      "step": 170
    },
    {
      "epoch": 1.03759765625e-06,
      "model_forward_time": 0.11477184295654297,
      "step": 170
    },
    {
      "epoch": 1.03759765625e-06,
      "step": 170,
      "training_step_time": 0.41733479499816895
    },
    {
      "epoch": 1.043701171875e-06,
      "model_forward_time": 0.11424970626831055,
      "step": 171
    },
    {
      "epoch": 1.043701171875e-06,
      "step": 171,
      "training_step_time": 0.3999342918395996
    },
    {
      "epoch": 1.0498046875e-06,
      "model_forward_time": 0.11442828178405762,
      "step": 172
    },
    {
      "epoch": 1.0498046875e-06,
      "step": 172,
      "training_step_time": 0.37691688537597656
    },
    {
      "epoch": 1.055908203125e-06,
      "model_forward_time": 0.11475181579589844,
      "step": 173
    },
    {
      "epoch": 1.055908203125e-06,
      "step": 173,
      "training_step_time": 0.38630127906799316
    },
    {
      "epoch": 1.06201171875e-06,
      "model_forward_time": 0.1147909164428711,
      "step": 174
    },
    {
      "epoch": 1.06201171875e-06,
      "step": 174,
      "training_step_time": 0.5447337627410889
    },
    {
      "epoch": 1.068115234375e-06,
      "model_forward_time": 0.11503028869628906,
      "step": 175
    },
    {
      "epoch": 1.068115234375e-06,
      "step": 175,
      "training_step_time": 0.40401220321655273
    },
    {
      "epoch": 1.07421875e-06,
      "model_forward_time": 0.11458373069763184,
      "step": 176
    },
    {
      "epoch": 1.07421875e-06,
      "step": 176,
      "training_step_time": 0.38637423515319824
    },
    {
      "epoch": 1.080322265625e-06,
      "model_forward_time": 0.1145637035369873,
      "step": 177
    },
    {
      "epoch": 1.080322265625e-06,
      "step": 177,
      "training_step_time": 0.39459848403930664
    },
    {
      "epoch": 1.08642578125e-06,
      "model_forward_time": 0.11463475227355957,
      "step": 178
    },
    {
      "epoch": 1.08642578125e-06,
      "step": 178,
      "training_step_time": 0.3720533847808838
    },
    {
      "epoch": 1.092529296875e-06,
      "model_forward_time": 0.1150355339050293,
      "step": 179
    },
    {
      "epoch": 1.092529296875e-06,
      "step": 179,
      "training_step_time": 0.4315612316131592
    },
    {
      "epoch": 1.0986328125e-06,
      "grad_norm": 1.1121952533721924,
      "learning_rate": 6e-06,
      "loss": 1.0563,
      "step": 180
    },
    {
      "epoch": 1.0986328125e-06,
      "model_forward_time": 0.11504411697387695,
      "step": 180
    },
    {
      "epoch": 1.0986328125e-06,
      "step": 180,
      "training_step_time": 0.7046372890472412
    },
    {
      "epoch": 1.104736328125e-06,
      "model_forward_time": 0.11559343338012695,
      "step": 181
    },
    {
      "epoch": 1.104736328125e-06,
      "step": 181,
      "training_step_time": 0.3700881004333496
    },
    {
      "epoch": 1.11083984375e-06,
      "model_forward_time": 0.1145017147064209,
      "step": 182
    },
    {
      "epoch": 1.11083984375e-06,
      "step": 182,
      "training_step_time": 0.4783205986022949
    },
    {
      "epoch": 1.116943359375e-06,
      "model_forward_time": 0.11443424224853516,
      "step": 183
    },
    {
      "epoch": 1.116943359375e-06,
      "step": 183,
      "training_step_time": 0.47341036796569824
    },
    {
      "epoch": 1.123046875e-06,
      "model_forward_time": 0.11427044868469238,
      "step": 184
    },
    {
      "epoch": 1.123046875e-06,
      "step": 184,
      "training_step_time": 0.46605920791625977
    },
    {
      "epoch": 1.129150390625e-06,
      "model_forward_time": 0.11432981491088867,
      "step": 185
    },
    {
      "epoch": 1.129150390625e-06,
      "step": 185,
      "training_step_time": 0.39018964767456055
    },
    {
      "epoch": 1.13525390625e-06,
      "model_forward_time": 0.11474132537841797,
      "step": 186
    },
    {
      "epoch": 1.13525390625e-06,
      "step": 186,
      "training_step_time": 0.37169694900512695
    },
    {
      "epoch": 1.141357421875e-06,
      "model_forward_time": 0.11504173278808594,
      "step": 187
    },
    {
      "epoch": 1.141357421875e-06,
      "step": 187,
      "training_step_time": 0.4109828472137451
    },
    {
      "epoch": 1.1474609375e-06,
      "model_forward_time": 0.11550092697143555,
      "step": 188
    },
    {
      "epoch": 1.1474609375e-06,
      "step": 188,
      "training_step_time": 0.3842589855194092
    },
    {
      "epoch": 1.153564453125e-06,
      "model_forward_time": 0.11573171615600586,
      "step": 189
    },
    {
      "epoch": 1.153564453125e-06,
      "step": 189,
      "training_step_time": 0.37532615661621094
    },
    {
      "epoch": 1.15966796875e-06,
      "grad_norm": 0.8655436635017395,
      "learning_rate": 6.333333333333334e-06,
      "loss": 1.0635,
      "step": 190
    },
    {
      "epoch": 1.15966796875e-06,
      "model_forward_time": 0.11618971824645996,
      "step": 190
    },
    {
      "epoch": 1.15966796875e-06,
      "step": 190,
      "training_step_time": 0.3744802474975586
    },
    {
      "epoch": 1.165771484375e-06,
      "model_forward_time": 0.11573386192321777,
      "step": 191
    },
    {
      "epoch": 1.165771484375e-06,
      "step": 191,
      "training_step_time": 0.3906838893890381
    },
    {
      "epoch": 1.171875e-06,
      "model_forward_time": 0.11568379402160645,
      "step": 192
    },
    {
      "epoch": 1.171875e-06,
      "step": 192,
      "training_step_time": 0.5501444339752197
    },
    {
      "epoch": 1.177978515625e-06,
      "model_forward_time": 0.1149601936340332,
      "step": 193
    },
    {
      "epoch": 1.177978515625e-06,
      "step": 193,
      "training_step_time": 0.39635682106018066
    },
    {
      "epoch": 1.18408203125e-06,
      "model_forward_time": 0.11491584777832031,
      "step": 194
    },
    {
      "epoch": 1.18408203125e-06,
      "step": 194,
      "training_step_time": 0.39296865463256836
    },
    {
      "epoch": 1.190185546875e-06,
      "model_forward_time": 0.11526107788085938,
      "step": 195
    },
    {
      "epoch": 1.190185546875e-06,
      "step": 195,
      "training_step_time": 0.40963101387023926
    },
    {
      "epoch": 1.1962890625e-06,
      "model_forward_time": 0.1151425838470459,
      "step": 196
    },
    {
      "epoch": 1.1962890625e-06,
      "step": 196,
      "training_step_time": 0.43520498275756836
    },
    {
      "epoch": 1.202392578125e-06,
      "model_forward_time": 0.11563801765441895,
      "step": 197
    },
    {
      "epoch": 1.202392578125e-06,
      "step": 197,
      "training_step_time": 0.4893817901611328
    },
    {
      "epoch": 1.20849609375e-06,
      "model_forward_time": 0.11477446556091309,
      "step": 198
    },
    {
      "epoch": 1.20849609375e-06,
      "step": 198,
      "training_step_time": 0.4732680320739746
    },
    {
      "epoch": 1.214599609375e-06,
      "model_forward_time": 0.11478948593139648,
      "step": 199
    },
    {
      "epoch": 1.214599609375e-06,
      "step": 199,
      "training_step_time": 0.4001801013946533
    },
    {
      "epoch": 1.220703125e-06,
      "grad_norm": 0.9394235610961914,
      "learning_rate": 6.666666666666667e-06,
      "loss": 1.0502,
      "step": 200
    },
    {
      "epoch": 1.220703125e-06,
      "model_forward_time": 0.11527085304260254,
      "step": 200
    },
    {
      "epoch": 1.220703125e-06,
      "step": 200,
      "training_step_time": 0.3977320194244385
    },
    {
      "epoch": 1.226806640625e-06,
      "model_forward_time": 0.1151735782623291,
      "step": 201
    },
    {
      "epoch": 1.226806640625e-06,
      "step": 201,
      "training_step_time": 0.39919018745422363
    },
    {
      "epoch": 1.23291015625e-06,
      "model_forward_time": 0.11563372611999512,
      "step": 202
    },
    {
      "epoch": 1.23291015625e-06,
      "step": 202,
      "training_step_time": 0.3754732608795166
    },
    {
      "epoch": 1.239013671875e-06,
      "model_forward_time": 0.11567974090576172,
      "step": 203
    },
    {
      "epoch": 1.239013671875e-06,
      "step": 203,
      "training_step_time": 0.38751745223999023
    },
    {
      "epoch": 1.2451171875e-06,
      "model_forward_time": 0.11516833305358887,
      "step": 204
    },
    {
      "epoch": 1.2451171875e-06,
      "step": 204,
      "training_step_time": 0.3797776699066162
    },
    {
      "epoch": 1.251220703125e-06,
      "model_forward_time": 0.11648273468017578,
      "step": 205
    },
    {
      "epoch": 1.251220703125e-06,
      "step": 205,
      "training_step_time": 0.4007999897003174
    },
    {
      "epoch": 1.25732421875e-06,
      "model_forward_time": 0.11669182777404785,
      "step": 206
    },
    {
      "epoch": 1.25732421875e-06,
      "step": 206,
      "training_step_time": 0.4395749568939209
    },
    {
      "epoch": 1.263427734375e-06,
      "model_forward_time": 0.11550474166870117,
      "step": 207
    },
    {
      "epoch": 1.263427734375e-06,
      "step": 207,
      "training_step_time": 0.4002079963684082
    },
    {
      "epoch": 1.26953125e-06,
      "model_forward_time": 0.11477136611938477,
      "step": 208
    },
    {
      "epoch": 1.26953125e-06,
      "step": 208,
      "training_step_time": 0.3781282901763916
    },
    {
      "epoch": 1.275634765625e-06,
      "model_forward_time": 0.11577773094177246,
      "step": 209
    },
    {
      "epoch": 1.275634765625e-06,
      "step": 209,
      "training_step_time": 0.3939056396484375
    },
    {
      "epoch": 1.28173828125e-06,
      "grad_norm": 0.9746418595314026,
      "learning_rate": 7.000000000000001e-06,
      "loss": 1.0571,
      "step": 210
    },
    {
      "epoch": 1.28173828125e-06,
      "model_forward_time": 0.1148829460144043,
      "step": 210
    },
    {
      "epoch": 1.28173828125e-06,
      "step": 210,
      "training_step_time": 0.5113632678985596
    },
    {
      "epoch": 1.287841796875e-06,
      "model_forward_time": 0.11513948440551758,
      "step": 211
    },
    {
      "epoch": 1.287841796875e-06,
      "step": 211,
      "training_step_time": 0.4815220832824707
    },
    {
      "epoch": 1.2939453125e-06,
      "model_forward_time": 0.11507868766784668,
      "step": 212
    },
    {
      "epoch": 1.2939453125e-06,
      "step": 212,
      "training_step_time": 0.49545717239379883
    },
    {
      "epoch": 1.300048828125e-06,
      "model_forward_time": 0.11533188819885254,
      "step": 213
    },
    {
      "epoch": 1.300048828125e-06,
      "step": 213,
      "training_step_time": 0.4843175411224365
    },
    {
      "epoch": 1.30615234375e-06,
      "model_forward_time": 0.11482644081115723,
      "step": 214
    },
    {
      "epoch": 1.30615234375e-06,
      "step": 214,
      "training_step_time": 0.3737821578979492
    },
    {
      "epoch": 1.312255859375e-06,
      "model_forward_time": 0.11453557014465332,
      "step": 215
    },
    {
      "epoch": 1.312255859375e-06,
      "step": 215,
      "training_step_time": 0.38806819915771484
    },
    {
      "epoch": 1.318359375e-06,
      "model_forward_time": 0.11631965637207031,
      "step": 216
    },
    {
      "epoch": 1.318359375e-06,
      "step": 216,
      "training_step_time": 0.39367222785949707
    },
    {
      "epoch": 1.324462890625e-06,
      "model_forward_time": 0.1146860122680664,
      "step": 217
    },
    {
      "epoch": 1.324462890625e-06,
      "step": 217,
      "training_step_time": 0.3886542320251465
    },
    {
      "epoch": 1.33056640625e-06,
      "model_forward_time": 0.11536622047424316,
      "step": 218
    },
    {
      "epoch": 1.33056640625e-06,
      "step": 218,
      "training_step_time": 0.3862628936767578
    },
    {
      "epoch": 1.336669921875e-06,
      "model_forward_time": 0.11481308937072754,
      "step": 219
    },
    {
      "epoch": 1.336669921875e-06,
      "step": 219,
      "training_step_time": 0.42119693756103516
    },
    {
      "epoch": 1.3427734375e-06,
      "grad_norm": 0.8484799861907959,
      "learning_rate": 7.333333333333334e-06,
      "loss": 1.0436,
      "step": 220
    },
    {
      "epoch": 1.3427734375e-06,
      "model_forward_time": 0.11536002159118652,
      "step": 220
    },
    {
      "epoch": 1.3427734375e-06,
      "step": 220,
      "training_step_time": 0.4166278839111328
    },
    {
      "epoch": 1.348876953125e-06,
      "model_forward_time": 0.11530804634094238,
      "step": 221
    },
    {
      "epoch": 1.348876953125e-06,
      "step": 221,
      "training_step_time": 0.38692307472229004
    },
    {
      "epoch": 1.35498046875e-06,
      "model_forward_time": 0.11536264419555664,
      "step": 222
    },
    {
      "epoch": 1.35498046875e-06,
      "step": 222,
      "training_step_time": 0.6305625438690186
    },
    {
      "epoch": 1.361083984375e-06,
      "model_forward_time": 0.11577963829040527,
      "step": 223
    },
    {
      "epoch": 1.361083984375e-06,
      "step": 223,
      "training_step_time": 0.3869893550872803
    },
    {
      "epoch": 1.3671875e-06,
      "model_forward_time": 0.11487174034118652,
      "step": 224
    },
    {
      "epoch": 1.3671875e-06,
      "step": 224,
      "training_step_time": 0.4414210319519043
    },
    {
      "epoch": 1.373291015625e-06,
      "model_forward_time": 0.1151268482208252,
      "step": 225
    },
    {
      "epoch": 1.373291015625e-06,
      "step": 225,
      "training_step_time": 0.45107436180114746
    },
    {
      "epoch": 1.37939453125e-06,
      "model_forward_time": 0.11471724510192871,
      "step": 226
    },
    {
      "epoch": 1.37939453125e-06,
      "step": 226,
      "training_step_time": 0.44527363777160645
    },
    {
      "epoch": 1.385498046875e-06,
      "model_forward_time": 0.11517739295959473,
      "step": 227
    },
    {
      "epoch": 1.385498046875e-06,
      "step": 227,
      "training_step_time": 0.5065436363220215
    },
    {
      "epoch": 1.3916015625e-06,
      "model_forward_time": 0.11533665657043457,
      "step": 228
    },
    {
      "epoch": 1.3916015625e-06,
      "step": 228,
      "training_step_time": 0.3840501308441162
    },
    {
      "epoch": 1.397705078125e-06,
      "model_forward_time": 0.11507654190063477,
      "step": 229
    },
    {
      "epoch": 1.397705078125e-06,
      "step": 229,
      "training_step_time": 0.40604519844055176
    },
    {
      "epoch": 1.40380859375e-06,
      "grad_norm": 0.8787742257118225,
      "learning_rate": 7.666666666666667e-06,
      "loss": 1.0567,
      "step": 230
    },
    {
      "epoch": 1.40380859375e-06,
      "model_forward_time": 0.11568522453308105,
      "step": 230
    },
    {
      "epoch": 1.40380859375e-06,
      "step": 230,
      "training_step_time": 0.38828420639038086
    },
    {
      "epoch": 1.409912109375e-06,
      "model_forward_time": 0.11481356620788574,
      "step": 231
    },
    {
      "epoch": 1.409912109375e-06,
      "step": 231,
      "training_step_time": 0.39077138900756836
    },
    {
      "epoch": 1.416015625e-06,
      "model_forward_time": 0.11563873291015625,
      "step": 232
    },
    {
      "epoch": 1.416015625e-06,
      "step": 232,
      "training_step_time": 0.42352986335754395
    },
    {
      "epoch": 1.422119140625e-06,
      "model_forward_time": 0.11491012573242188,
      "step": 233
    },
    {
      "epoch": 1.422119140625e-06,
      "step": 233,
      "training_step_time": 0.39761972427368164
    },
    {
      "epoch": 1.42822265625e-06,
      "model_forward_time": 0.11585688591003418,
      "step": 234
    },
    {
      "epoch": 1.42822265625e-06,
      "step": 234,
      "training_step_time": 0.5555145740509033
    },
    {
      "epoch": 1.434326171875e-06,
      "model_forward_time": 0.11471819877624512,
      "step": 235
    },
    {
      "epoch": 1.434326171875e-06,
      "step": 235,
      "training_step_time": 0.3919389247894287
    },
    {
      "epoch": 1.4404296875e-06,
      "model_forward_time": 0.11476516723632812,
      "step": 236
    },
    {
      "epoch": 1.4404296875e-06,
      "step": 236,
      "training_step_time": 0.3865072727203369
    },
    {
      "epoch": 1.446533203125e-06,
      "model_forward_time": 0.11535263061523438,
      "step": 237
    },
    {
      "epoch": 1.446533203125e-06,
      "step": 237,
      "training_step_time": 0.39117860794067383
    },
    {
      "epoch": 1.45263671875e-06,
      "model_forward_time": 0.11495280265808105,
      "step": 238
    },
    {
      "epoch": 1.45263671875e-06,
      "step": 238,
      "training_step_time": 0.3721606731414795
    },
    {
      "epoch": 1.458740234375e-06,
      "model_forward_time": 0.11493420600891113,
      "step": 239
    },
    {
      "epoch": 1.458740234375e-06,
      "step": 239,
      "training_step_time": 0.3840489387512207
    },
    {
      "epoch": 1.46484375e-06,
      "grad_norm": 0.8165913820266724,
      "learning_rate": 8.000000000000001e-06,
      "loss": 1.0391,
      "step": 240
    },
    {
      "epoch": 1.46484375e-06,
      "model_forward_time": 0.11487054824829102,
      "step": 240
    },
    {
      "epoch": 1.46484375e-06,
      "step": 240,
      "training_step_time": 0.932342529296875
    },
    {
      "epoch": 1.470947265625e-06,
      "model_forward_time": 0.11491799354553223,
      "step": 241
    },
    {
      "epoch": 1.470947265625e-06,
      "step": 241,
      "training_step_time": 0.40691137313842773
    },
    {
      "epoch": 1.47705078125e-06,
      "model_forward_time": 0.11755084991455078,
      "step": 242
    },
    {
      "epoch": 1.47705078125e-06,
      "step": 242,
      "training_step_time": 0.38867878913879395
    },
    {
      "epoch": 1.483154296875e-06,
      "model_forward_time": 0.11739420890808105,
      "step": 243
    },
    {
      "epoch": 1.483154296875e-06,
      "step": 243,
      "training_step_time": 0.3926882743835449
    },
    {
      "epoch": 1.4892578125e-06,
      "model_forward_time": 0.11746048927307129,
      "step": 244
    },
    {
      "epoch": 1.4892578125e-06,
      "step": 244,
      "training_step_time": 0.3883647918701172
    },
    {
      "epoch": 1.495361328125e-06,
      "model_forward_time": 0.11671185493469238,
      "step": 245
    },
    {
      "epoch": 1.495361328125e-06,
      "step": 245,
      "training_step_time": 0.4067988395690918
    },
    {
      "epoch": 1.50146484375e-06,
      "model_forward_time": 0.11591649055480957,
      "step": 246
    },
    {
      "epoch": 1.50146484375e-06,
      "step": 246,
      "training_step_time": 0.5880584716796875
    },
    {
      "epoch": 1.507568359375e-06,
      "model_forward_time": 0.11439228057861328,
      "step": 247
    },
    {
      "epoch": 1.507568359375e-06,
      "step": 247,
      "training_step_time": 0.4029383659362793
    },
    {
      "epoch": 1.513671875e-06,
      "model_forward_time": 0.11419987678527832,
      "step": 248
    },
    {
      "epoch": 1.513671875e-06,
      "step": 248,
      "training_step_time": 0.39243483543395996
    },
    {
      "epoch": 1.519775390625e-06,
      "model_forward_time": 0.1140899658203125,
      "step": 249
    },
    {
      "epoch": 1.519775390625e-06,
      "step": 249,
      "training_step_time": 0.3874843120574951
    },
    {
      "epoch": 1.52587890625e-06,
      "grad_norm": 1.1641093492507935,
      "learning_rate": 8.333333333333334e-06,
      "loss": 1.0417,
      "step": 250
    },
    {
      "epoch": 1.52587890625e-06,
      "model_forward_time": 0.1152043342590332,
      "step": 250
    },
    {
      "epoch": 1.52587890625e-06,
      "step": 250,
      "training_step_time": 0.3740217685699463
    },
    {
      "epoch": 1.531982421875e-06,
      "model_forward_time": 0.11501741409301758,
      "step": 251
    },
    {
      "epoch": 1.531982421875e-06,
      "step": 251,
      "training_step_time": 0.38243675231933594
    },
    {
      "epoch": 1.5380859375e-06,
      "model_forward_time": 0.1146695613861084,
      "step": 252
    },
    {
      "epoch": 1.5380859375e-06,
      "step": 252,
      "training_step_time": 0.949505090713501
    },
    {
      "epoch": 1.544189453125e-06,
      "model_forward_time": 0.11469507217407227,
      "step": 253
    },
    {
      "epoch": 1.544189453125e-06,
      "step": 253,
      "training_step_time": 0.46765685081481934
    },
    {
      "epoch": 1.55029296875e-06,
      "model_forward_time": 0.11463570594787598,
      "step": 254
    },
    {
      "epoch": 1.55029296875e-06,
      "step": 254,
      "training_step_time": 0.41571640968322754
    },
    {
      "epoch": 1.556396484375e-06,
      "model_forward_time": 0.1140434741973877,
      "step": 255
    },
    {
      "epoch": 1.556396484375e-06,
      "step": 255,
      "training_step_time": 0.3965110778808594
    },
    {
      "epoch": 1.5625e-06,
      "model_forward_time": 0.11451244354248047,
      "step": 256
    },
    {
      "epoch": 1.5625e-06,
      "step": 256,
      "training_step_time": 0.3785820007324219
    },
    {
      "epoch": 1.568603515625e-06,
      "model_forward_time": 0.11441612243652344,
      "step": 257
    },
    {
      "epoch": 1.568603515625e-06,
      "step": 257,
      "training_step_time": 0.4099724292755127
    },
    {
      "epoch": 1.57470703125e-06,
      "model_forward_time": 0.11485147476196289,
      "step": 258
    },
    {
      "epoch": 1.57470703125e-06,
      "step": 258,
      "training_step_time": 0.5916268825531006
    },
    {
      "epoch": 1.580810546875e-06,
      "model_forward_time": 0.11454272270202637,
      "step": 259
    },
    {
      "epoch": 1.580810546875e-06,
      "step": 259,
      "training_step_time": 0.39170265197753906
    },
    {
      "epoch": 1.5869140625e-06,
      "grad_norm": 1.5555168390274048,
      "learning_rate": 8.666666666666668e-06,
      "loss": 1.0437,
      "step": 260
    },
    {
      "epoch": 1.5869140625e-06,
      "model_forward_time": 0.11530041694641113,
      "step": 260
    },
    {
      "epoch": 1.5869140625e-06,
      "step": 260,
      "training_step_time": 0.39473581314086914
    },
    {
      "epoch": 1.593017578125e-06,
      "model_forward_time": 0.11443495750427246,
      "step": 261
    },
    {
      "epoch": 1.593017578125e-06,
      "step": 261,
      "training_step_time": 0.39850497245788574
    },
    {
      "epoch": 1.59912109375e-06,
      "model_forward_time": 0.1152350902557373,
      "step": 262
    },
    {
      "epoch": 1.59912109375e-06,
      "step": 262,
      "training_step_time": 0.3729217052459717
    },
    {
      "epoch": 1.605224609375e-06,
      "model_forward_time": 0.1160733699798584,
      "step": 263
    },
    {
      "epoch": 1.605224609375e-06,
      "step": 263,
      "training_step_time": 0.38193225860595703
    },
    {
      "epoch": 1.611328125e-06,
      "model_forward_time": 0.11509418487548828,
      "step": 264
    },
    {
      "epoch": 1.611328125e-06,
      "step": 264,
      "training_step_time": 0.9337189197540283
    },
    {
      "epoch": 1.617431640625e-06,
      "model_forward_time": 0.11531400680541992,
      "step": 265
    },
    {
      "epoch": 1.617431640625e-06,
      "step": 265,
      "training_step_time": 0.4394803047180176
    },
    {
      "epoch": 1.62353515625e-06,
      "model_forward_time": 0.11427927017211914,
      "step": 266
    },
    {
      "epoch": 1.62353515625e-06,
      "step": 266,
      "training_step_time": 0.42351865768432617
    },
    {
      "epoch": 1.629638671875e-06,
      "model_forward_time": 0.1151270866394043,
      "step": 267
    },
    {
      "epoch": 1.629638671875e-06,
      "step": 267,
      "training_step_time": 0.4302976131439209
    },
    {
      "epoch": 1.6357421875e-06,
      "model_forward_time": 0.11448359489440918,
      "step": 268
    },
    {
      "epoch": 1.6357421875e-06,
      "step": 268,
      "training_step_time": 0.37930870056152344
    },
    {
      "epoch": 1.641845703125e-06,
      "model_forward_time": 0.11451125144958496,
      "step": 269
    },
    {
      "epoch": 1.641845703125e-06,
      "step": 269,
      "training_step_time": 0.4037342071533203
    },
    {
      "epoch": 1.64794921875e-06,
      "grad_norm": 1.4390380382537842,
      "learning_rate": 9e-06,
      "loss": 1.0364,
      "step": 270
    },
    {
      "epoch": 1.64794921875e-06,
      "model_forward_time": 0.11592864990234375,
      "step": 270
    },
    {
      "epoch": 1.64794921875e-06,
      "step": 270,
      "training_step_time": 0.4952352046966553
    },
    {
      "epoch": 1.654052734375e-06,
      "model_forward_time": 0.11445164680480957,
      "step": 271
    },
    {
      "epoch": 1.654052734375e-06,
      "step": 271,
      "training_step_time": 0.39828920364379883
    },
    {
      "epoch": 1.66015625e-06,
      "model_forward_time": 0.11777210235595703,
      "step": 272
    },
    {
      "epoch": 1.66015625e-06,
      "step": 272,
      "training_step_time": 0.37882161140441895
    },
    {
      "epoch": 1.666259765625e-06,
      "model_forward_time": 0.11700558662414551,
      "step": 273
    },
    {
      "epoch": 1.666259765625e-06,
      "step": 273,
      "training_step_time": 0.39322495460510254
    },
    {
      "epoch": 1.67236328125e-06,
      "model_forward_time": 0.11529731750488281,
      "step": 274
    },
    {
      "epoch": 1.67236328125e-06,
      "step": 274,
      "training_step_time": 0.38054323196411133
    },
    {
      "epoch": 1.678466796875e-06,
      "model_forward_time": 0.11497092247009277,
      "step": 275
    },
    {
      "epoch": 1.678466796875e-06,
      "step": 275,
      "training_step_time": 0.4028785228729248
    },
    {
      "epoch": 1.6845703125e-06,
      "model_forward_time": 0.11503481864929199,
      "step": 276
    },
    {
      "epoch": 1.6845703125e-06,
      "step": 276,
      "training_step_time": 0.8320791721343994
    },
    {
      "epoch": 1.690673828125e-06,
      "model_forward_time": 0.11439132690429688,
      "step": 277
    },
    {
      "epoch": 1.690673828125e-06,
      "step": 277,
      "training_step_time": 0.39356303215026855
    },
    {
      "epoch": 1.69677734375e-06,
      "model_forward_time": 0.11444664001464844,
      "step": 278
    },
    {
      "epoch": 1.69677734375e-06,
      "step": 278,
      "training_step_time": 0.39139819145202637
    },
    {
      "epoch": 1.702880859375e-06,
      "model_forward_time": 0.11535835266113281,
      "step": 279
    },
    {
      "epoch": 1.702880859375e-06,
      "step": 279,
      "training_step_time": 0.5020012855529785
    },
    {
      "epoch": 1.708984375e-06,
      "grad_norm": 1.7222793102264404,
      "learning_rate": 9.333333333333334e-06,
      "loss": 1.005,
      "step": 280
    },
    {
      "epoch": 1.708984375e-06,
      "model_forward_time": 0.11412596702575684,
      "step": 280
    },
    {
      "epoch": 1.708984375e-06,
      "step": 280,
      "training_step_time": 0.47095632553100586
    },
    {
      "epoch": 1.715087890625e-06,
      "model_forward_time": 0.11490535736083984,
      "step": 281
    },
    {
      "epoch": 1.715087890625e-06,
      "step": 281,
      "training_step_time": 0.460268497467041
    },
    {
      "epoch": 1.72119140625e-06,
      "model_forward_time": 0.1148078441619873,
      "step": 282
    },
    {
      "epoch": 1.72119140625e-06,
      "step": 282,
      "training_step_time": 0.6337311267852783
    },
    {
      "epoch": 1.727294921875e-06,
      "model_forward_time": 0.11408376693725586,
      "step": 283
    },
    {
      "epoch": 1.727294921875e-06,
      "step": 283,
      "training_step_time": 0.393413782119751
    },
    {
      "epoch": 1.7333984375e-06,
      "model_forward_time": 0.11493062973022461,
      "step": 284
    },
    {
      "epoch": 1.7333984375e-06,
      "step": 284,
      "training_step_time": 0.3903994560241699
    },
    {
      "epoch": 1.739501953125e-06,
      "model_forward_time": 0.11521506309509277,
      "step": 285
    },
    {
      "epoch": 1.739501953125e-06,
      "step": 285,
      "training_step_time": 0.3930370807647705
    },
    {
      "epoch": 1.74560546875e-06,
      "model_forward_time": 0.11496591567993164,
      "step": 286
    },
    {
      "epoch": 1.74560546875e-06,
      "step": 286,
      "training_step_time": 0.37085723876953125
    },
    {
      "epoch": 1.751708984375e-06,
      "model_forward_time": 0.11531543731689453,
      "step": 287
    },
    {
      "epoch": 1.751708984375e-06,
      "step": 287,
      "training_step_time": 0.39652371406555176
    },
    {
      "epoch": 1.7578125e-06,
      "model_forward_time": 0.11650872230529785,
      "step": 288
    },
    {
      "epoch": 1.7578125e-06,
      "step": 288,
      "training_step_time": 0.6935832500457764
    },
    {
      "epoch": 1.763916015625e-06,
      "model_forward_time": 0.1157982349395752,
      "step": 289
    },
    {
      "epoch": 1.763916015625e-06,
      "step": 289,
      "training_step_time": 0.4344601631164551
    },
    {
      "epoch": 1.77001953125e-06,
      "grad_norm": 1.7756845951080322,
      "learning_rate": 9.666666666666667e-06,
      "loss": 0.9818,
      "step": 290
    },
    {
      "epoch": 1.77001953125e-06,
      "model_forward_time": 0.11451268196105957,
      "step": 290
    },
    {
      "epoch": 1.77001953125e-06,
      "step": 290,
      "training_step_time": 0.3898651599884033
    },
    {
      "epoch": 1.776123046875e-06,
      "model_forward_time": 0.11544656753540039,
      "step": 291
    },
    {
      "epoch": 1.776123046875e-06,
      "step": 291,
      "training_step_time": 0.4000208377838135
    },
    {
      "epoch": 1.7822265625e-06,
      "model_forward_time": 0.11480379104614258,
      "step": 292
    },
    {
      "epoch": 1.7822265625e-06,
      "step": 292,
      "training_step_time": 0.4215672016143799
    },
    {
      "epoch": 1.788330078125e-06,
      "model_forward_time": 0.11434102058410645,
      "step": 293
    },
    {
      "epoch": 1.788330078125e-06,
      "step": 293,
      "training_step_time": 0.46752071380615234
    },
    {
      "epoch": 1.79443359375e-06,
      "model_forward_time": 0.11586356163024902,
      "step": 294
    },
    {
      "epoch": 1.79443359375e-06,
      "step": 294,
      "training_step_time": 0.49666404724121094
    },
    {
      "epoch": 1.800537109375e-06,
      "model_forward_time": 0.11437082290649414,
      "step": 295
    },
    {
      "epoch": 1.800537109375e-06,
      "step": 295,
      "training_step_time": 0.4551877975463867
    },
    {
      "epoch": 1.806640625e-06,
      "model_forward_time": 0.11517953872680664,
      "step": 296
    },
    {
      "epoch": 1.806640625e-06,
      "step": 296,
      "training_step_time": 0.39525938034057617
    },
    {
      "epoch": 1.812744140625e-06,
      "model_forward_time": 0.11512923240661621,
      "step": 297
    },
    {
      "epoch": 1.812744140625e-06,
      "step": 297,
      "training_step_time": 0.38171958923339844
    },
    {
      "epoch": 1.81884765625e-06,
      "model_forward_time": 0.1152348518371582,
      "step": 298
    },
    {
      "epoch": 1.81884765625e-06,
      "step": 298,
      "training_step_time": 0.3724544048309326
    },
    {
      "epoch": 1.824951171875e-06,
      "model_forward_time": 0.11461877822875977,
      "step": 299
    },
    {
      "epoch": 1.824951171875e-06,
      "step": 299,
      "training_step_time": 0.39290308952331543
    },
    {
      "epoch": 1.8310546875e-06,
      "grad_norm": 2.1607892513275146,
      "learning_rate": 1e-05,
      "loss": 0.9536,
      "step": 300
    },
    {
      "epoch": 1.8310546875e-06,
      "model_forward_time": 0.11583638191223145,
      "step": 300
    },
    {
      "epoch": 1.8310546875e-06,
      "step": 300,
      "training_step_time": 0.9979956150054932
    },
    {
      "epoch": 1.837158203125e-06,
      "model_forward_time": 0.11451363563537598,
      "step": 301
    },
    {
      "epoch": 1.837158203125e-06,
      "step": 301,
      "training_step_time": 0.39142799377441406
    },
    {
      "epoch": 1.84326171875e-06,
      "model_forward_time": 0.1144559383392334,
      "step": 302
    },
    {
      "epoch": 1.84326171875e-06,
      "step": 302,
      "training_step_time": 0.38706159591674805
    },
    {
      "epoch": 1.849365234375e-06,
      "model_forward_time": 0.11560297012329102,
      "step": 303
    },
    {
      "epoch": 1.849365234375e-06,
      "step": 303,
      "training_step_time": 0.3805837631225586
    },
    {
      "epoch": 1.85546875e-06,
      "model_forward_time": 0.11442828178405762,
      "step": 304
    },
    {
      "epoch": 1.85546875e-06,
      "step": 304,
      "training_step_time": 0.36758995056152344
    },
    {
      "epoch": 1.861572265625e-06,
      "model_forward_time": 0.11496233940124512,
      "step": 305
    },
    {
      "epoch": 1.861572265625e-06,
      "step": 305,
      "training_step_time": 0.3826580047607422
    },
    {
      "epoch": 1.86767578125e-06,
      "model_forward_time": 0.11548900604248047,
      "step": 306
    },
    {
      "epoch": 1.86767578125e-06,
      "step": 306,
      "training_step_time": 0.6065328121185303
    },
    {
      "epoch": 1.873779296875e-06,
      "model_forward_time": 0.11546111106872559,
      "step": 307
    },
    {
      "epoch": 1.873779296875e-06,
      "step": 307,
      "training_step_time": 0.47936034202575684
    },
    {
      "epoch": 1.8798828125e-06,
      "model_forward_time": 0.11574101448059082,
      "step": 308
    },
    {
      "epoch": 1.8798828125e-06,
      "step": 308,
      "training_step_time": 0.44551730155944824
    },
    {
      "epoch": 1.885986328125e-06,
      "model_forward_time": 0.11486148834228516,
      "step": 309
    },
    {
      "epoch": 1.885986328125e-06,
      "step": 309,
      "training_step_time": 0.3834512233734131
    },
    {
      "epoch": 1.89208984375e-06,
      "grad_norm": 3.193880796432495,
      "learning_rate": 1.0333333333333333e-05,
      "loss": 0.9101,
      "step": 310
    },
    {
      "epoch": 1.89208984375e-06,
      "model_forward_time": 0.11483144760131836,
      "step": 310
    },
    {
      "epoch": 1.89208984375e-06,
      "step": 310,
      "training_step_time": 0.38506340980529785
    },
    {
      "epoch": 1.898193359375e-06,
      "model_forward_time": 0.11476469039916992,
      "step": 311
    },
    {
      "epoch": 1.898193359375e-06,
      "step": 311,
      "training_step_time": 0.38016748428344727
    },
    {
      "epoch": 1.904296875e-06,
      "model_forward_time": 0.11484289169311523,
      "step": 312
    },
    {
      "epoch": 1.904296875e-06,
      "step": 312,
      "training_step_time": 0.7698545455932617
    },
    {
      "epoch": 1.910400390625e-06,
      "model_forward_time": 0.11474227905273438,
      "step": 313
    },
    {
      "epoch": 1.910400390625e-06,
      "step": 313,
      "training_step_time": 0.4051187038421631
    },
    {
      "epoch": 1.91650390625e-06,
      "model_forward_time": 0.11493134498596191,
      "step": 314
    },
    {
      "epoch": 1.91650390625e-06,
      "step": 314,
      "training_step_time": 0.3951985836029053
    },
    {
      "epoch": 1.922607421875e-06,
      "model_forward_time": 0.11424589157104492,
      "step": 315
    },
    {
      "epoch": 1.922607421875e-06,
      "step": 315,
      "training_step_time": 0.38913869857788086
    },
    {
      "epoch": 1.9287109375e-06,
      "model_forward_time": 0.11481451988220215,
      "step": 316
    },
    {
      "epoch": 1.9287109375e-06,
      "step": 316,
      "training_step_time": 0.38080525398254395
    },
    {
      "epoch": 1.934814453125e-06,
      "model_forward_time": 0.11468362808227539,
      "step": 317
    },
    {
      "epoch": 1.934814453125e-06,
      "step": 317,
      "training_step_time": 0.38344883918762207
    },
    {
      "epoch": 1.94091796875e-06,
      "model_forward_time": 0.11505961418151855,
      "step": 318
    },
    {
      "epoch": 1.94091796875e-06,
      "step": 318,
      "training_step_time": 1.0097649097442627
    },
    {
      "epoch": 1.947021484375e-06,
      "model_forward_time": 0.11440062522888184,
      "step": 319
    },
    {
      "epoch": 1.947021484375e-06,
      "step": 319,
      "training_step_time": 0.46561551094055176
    },
    {
      "epoch": 1.953125e-06,
      "grad_norm": 3.008812427520752,
      "learning_rate": 1.0666666666666667e-05,
      "loss": 0.8467,
      "step": 320
    },
    {
      "epoch": 1.953125e-06,
      "model_forward_time": 0.11417460441589355,
      "step": 320
    },
    {
      "epoch": 1.953125e-06,
      "step": 320,
      "training_step_time": 0.47393274307250977
    },
    {
      "epoch": 1.959228515625e-06,
      "model_forward_time": 0.11428117752075195,
      "step": 321
    },
    {
      "epoch": 1.959228515625e-06,
      "step": 321,
      "training_step_time": 0.43343424797058105
    },
    {
      "epoch": 1.96533203125e-06,
      "model_forward_time": 0.11428236961364746,
      "step": 322
    },
    {
      "epoch": 1.96533203125e-06,
      "step": 322,
      "training_step_time": 0.364940881729126
    },
    {
      "epoch": 1.971435546875e-06,
      "model_forward_time": 0.11456608772277832,
      "step": 323
    },
    {
      "epoch": 1.971435546875e-06,
      "step": 323,
      "training_step_time": 0.3818483352661133
    },
    {
      "epoch": 1.9775390625e-06,
      "model_forward_time": 0.11494898796081543,
      "step": 324
    },
    {
      "epoch": 1.9775390625e-06,
      "step": 324,
      "training_step_time": 0.3974335193634033
    },
    {
      "epoch": 1.983642578125e-06,
      "model_forward_time": 0.11592388153076172,
      "step": 325
    },
    {
      "epoch": 1.983642578125e-06,
      "step": 325,
      "training_step_time": 0.4194471836090088
    },
    {
      "epoch": 1.98974609375e-06,
      "model_forward_time": 0.11534452438354492,
      "step": 326
    },
    {
      "epoch": 1.98974609375e-06,
      "step": 326,
      "training_step_time": 0.389819860458374
    },
    {
      "epoch": 1.995849609375e-06,
      "model_forward_time": 0.11648797988891602,
      "step": 327
    },
    {
      "epoch": 1.995849609375e-06,
      "step": 327,
      "training_step_time": 0.3876533508300781
    },
    {
      "epoch": 2.001953125e-06,
      "model_forward_time": 0.1155099868774414,
      "step": 328
    },
    {
      "epoch": 2.001953125e-06,
      "step": 328,
      "training_step_time": 0.3773231506347656
    },
    {
      "epoch": 2.008056640625e-06,
      "model_forward_time": 0.11543774604797363,
      "step": 329
    },
    {
      "epoch": 2.008056640625e-06,
      "step": 329,
      "training_step_time": 0.38866257667541504
    },
    {
      "epoch": 2.01416015625e-06,
      "grad_norm": 3.463630199432373,
      "learning_rate": 1.1000000000000001e-05,
      "loss": 0.7792,
      "step": 330
    },
    {
      "epoch": 2.01416015625e-06,
      "model_forward_time": 0.1152040958404541,
      "step": 330
    },
    {
      "epoch": 2.01416015625e-06,
      "step": 330,
      "training_step_time": 0.8720221519470215
    },
    {
      "epoch": 2.020263671875e-06,
      "model_forward_time": 0.11435365676879883,
      "step": 331
    },
    {
      "epoch": 2.020263671875e-06,
      "step": 331,
      "training_step_time": 0.3894689083099365
    },
    {
      "epoch": 2.0263671875e-06,
      "model_forward_time": 0.1154336929321289,
      "step": 332
    },
    {
      "epoch": 2.0263671875e-06,
      "step": 332,
      "training_step_time": 0.4298818111419678
    },
    {
      "epoch": 2.032470703125e-06,
      "model_forward_time": 0.11475872993469238,
      "step": 333
    },
    {
      "epoch": 2.032470703125e-06,
      "step": 333,
      "training_step_time": 0.49071693420410156
    },
    {
      "epoch": 2.03857421875e-06,
      "model_forward_time": 0.1144723892211914,
      "step": 334
    },
    {
      "epoch": 2.03857421875e-06,
      "step": 334,
      "training_step_time": 0.4544203281402588
    },
    {
      "epoch": 2.044677734375e-06,
      "model_forward_time": 0.1147758960723877,
      "step": 335
    },
    {
      "epoch": 2.044677734375e-06,
      "step": 335,
      "training_step_time": 0.5030477046966553
    },
    {
      "epoch": 2.05078125e-06,
      "model_forward_time": 0.11476683616638184,
      "step": 336
    },
    {
      "epoch": 2.05078125e-06,
      "step": 336,
      "training_step_time": 0.4605107307434082
    },
    {
      "epoch": 2.056884765625e-06,
      "model_forward_time": 0.11400866508483887,
      "step": 337
    },
    {
      "epoch": 2.056884765625e-06,
      "step": 337,
      "training_step_time": 0.3926842212677002
    },
    {
      "epoch": 2.06298828125e-06,
      "model_forward_time": 0.11470556259155273,
      "step": 338
    },
    {
      "epoch": 2.06298828125e-06,
      "step": 338,
      "training_step_time": 0.3879520893096924
    },
    {
      "epoch": 2.069091796875e-06,
      "model_forward_time": 0.11496186256408691,
      "step": 339
    },
    {
      "epoch": 2.069091796875e-06,
      "step": 339,
      "training_step_time": 0.39146852493286133
    },
    {
      "epoch": 2.0751953125e-06,
      "grad_norm": 4.9554009437561035,
      "learning_rate": 1.1333333333333334e-05,
      "loss": 0.6854,
      "step": 340
    },
    {
      "epoch": 2.0751953125e-06,
      "model_forward_time": 0.11470460891723633,
      "step": 340
    },
    {
      "epoch": 2.0751953125e-06,
      "step": 340,
      "training_step_time": 0.3755669593811035
    },
    {
      "epoch": 2.081298828125e-06,
      "model_forward_time": 0.11524558067321777,
      "step": 341
    },
    {
      "epoch": 2.081298828125e-06,
      "step": 341,
      "training_step_time": 0.3942830562591553
    },
    {
      "epoch": 2.08740234375e-06,
      "model_forward_time": 0.11485600471496582,
      "step": 342
    },
    {
      "epoch": 2.08740234375e-06,
      "step": 342,
      "training_step_time": 0.836296558380127
    },
    {
      "epoch": 2.093505859375e-06,
      "model_forward_time": 0.1144862174987793,
      "step": 343
    },
    {
      "epoch": 2.093505859375e-06,
      "step": 343,
      "training_step_time": 0.3811347484588623
    },
    {
      "epoch": 2.099609375e-06,
      "model_forward_time": 0.11569333076477051,
      "step": 344
    },
    {
      "epoch": 2.099609375e-06,
      "step": 344,
      "training_step_time": 0.39063429832458496
    },
    {
      "epoch": 2.105712890625e-06,
      "model_forward_time": 0.11427116394042969,
      "step": 345
    },
    {
      "epoch": 2.105712890625e-06,
      "step": 345,
      "training_step_time": 0.4093630313873291
    },
    {
      "epoch": 2.11181640625e-06,
      "model_forward_time": 0.11456823348999023,
      "step": 346
    },
    {
      "epoch": 2.11181640625e-06,
      "step": 346,
      "training_step_time": 0.4323740005493164
    },
    {
      "epoch": 2.117919921875e-06,
      "model_forward_time": 0.11489129066467285,
      "step": 347
    },
    {
      "epoch": 2.117919921875e-06,
      "step": 347,
      "training_step_time": 0.46034741401672363
    },
    {
      "epoch": 2.1240234375e-06,
      "model_forward_time": 0.11460638046264648,
      "step": 348
    },
    {
      "epoch": 2.1240234375e-06,
      "step": 348,
      "training_step_time": 0.7846474647521973
    },
    {
      "epoch": 2.130126953125e-06,
      "model_forward_time": 0.11425900459289551,
      "step": 349
    },
    {
      "epoch": 2.130126953125e-06,
      "step": 349,
      "training_step_time": 0.38395118713378906
    },
    {
      "epoch": 2.13623046875e-06,
      "grad_norm": 4.425543308258057,
      "learning_rate": 1.1666666666666668e-05,
      "loss": 0.6046,
      "step": 350
    },
    {
      "epoch": 2.13623046875e-06,
      "model_forward_time": 0.1173861026763916,
      "step": 350
    },
    {
      "epoch": 2.13623046875e-06,
      "step": 350,
      "training_step_time": 0.38132572174072266
    },
    {
      "epoch": 2.142333984375e-06,
      "model_forward_time": 0.11438584327697754,
      "step": 351
    },
    {
      "epoch": 2.142333984375e-06,
      "step": 351,
      "training_step_time": 0.3942146301269531
    },
    {
      "epoch": 2.1484375e-06,
      "model_forward_time": 0.11480975151062012,
      "step": 352
    },
    {
      "epoch": 2.1484375e-06,
      "step": 352,
      "training_step_time": 0.3747520446777344
    },
    {
      "epoch": 2.154541015625e-06,
      "model_forward_time": 0.11454463005065918,
      "step": 353
    },
    {
      "epoch": 2.154541015625e-06,
      "step": 353,
      "training_step_time": 0.38141322135925293
    },
    {
      "epoch": 2.16064453125e-06,
      "model_forward_time": 0.11531186103820801,
      "step": 354
    },
    {
      "epoch": 2.16064453125e-06,
      "step": 354,
      "training_step_time": 0.7380213737487793
    },
    {
      "epoch": 2.166748046875e-06,
      "model_forward_time": 0.11505675315856934,
      "step": 355
    },
    {
      "epoch": 2.166748046875e-06,
      "step": 355,
      "training_step_time": 0.38903164863586426
    },
    {
      "epoch": 2.1728515625e-06,
      "model_forward_time": 0.11519360542297363,
      "step": 356
    },
    {
      "epoch": 2.1728515625e-06,
      "step": 356,
      "training_step_time": 0.3865818977355957
    },
    {
      "epoch": 2.178955078125e-06,
      "model_forward_time": 0.1154181957244873,
      "step": 357
    },
    {
      "epoch": 2.178955078125e-06,
      "step": 357,
      "training_step_time": 0.40325307846069336
    },
    {
      "epoch": 2.18505859375e-06,
      "model_forward_time": 0.11524176597595215,
      "step": 358
    },
    {
      "epoch": 2.18505859375e-06,
      "step": 358,
      "training_step_time": 0.3821876049041748
    },
    {
      "epoch": 2.191162109375e-06,
      "model_forward_time": 0.11650800704956055,
      "step": 359
    },
    {
      "epoch": 2.191162109375e-06,
      "step": 359,
      "training_step_time": 0.3899984359741211
    },
    {
      "epoch": 2.197265625e-06,
      "grad_norm": 4.272157192230225,
      "learning_rate": 1.2e-05,
      "loss": 0.5317,
      "step": 360
    },
    {
      "epoch": 2.197265625e-06,
      "model_forward_time": 0.11662793159484863,
      "step": 360
    },
    {
      "epoch": 2.197265625e-06,
      "step": 360,
      "training_step_time": 0.9096043109893799
    },
    {
      "epoch": 2.203369140625e-06,
      "model_forward_time": 0.11567425727844238,
      "step": 361
    },
    {
      "epoch": 2.203369140625e-06,
      "step": 361,
      "training_step_time": 0.4642343521118164
    },
    {
      "epoch": 2.20947265625e-06,
      "model_forward_time": 0.11431145668029785,
      "step": 362
    },
    {
      "epoch": 2.20947265625e-06,
      "step": 362,
      "training_step_time": 0.4661977291107178
    },
    {
      "epoch": 2.215576171875e-06,
      "model_forward_time": 0.11415863037109375,
      "step": 363
    },
    {
      "epoch": 2.215576171875e-06,
      "step": 363,
      "training_step_time": 0.3953514099121094
    },
    {
      "epoch": 2.2216796875e-06,
      "model_forward_time": 0.11388468742370605,
      "step": 364
    },
    {
      "epoch": 2.2216796875e-06,
      "step": 364,
      "training_step_time": 0.36801838874816895
    },
    {
      "epoch": 2.227783203125e-06,
      "model_forward_time": 0.11401772499084473,
      "step": 365
    },
    {
      "epoch": 2.227783203125e-06,
      "step": 365,
      "training_step_time": 0.3809692859649658
    },
    {
      "epoch": 2.23388671875e-06,
      "model_forward_time": 0.11563754081726074,
      "step": 366
    },
    {
      "epoch": 2.23388671875e-06,
      "step": 366,
      "training_step_time": 0.6340248584747314
    },
    {
      "epoch": 2.239990234375e-06,
      "model_forward_time": 0.1140294075012207,
      "step": 367
    },
    {
      "epoch": 2.239990234375e-06,
      "step": 367,
      "training_step_time": 0.37574243545532227
    },
    {
      "epoch": 2.24609375e-06,
      "model_forward_time": 0.114654541015625,
      "step": 368
    },
    {
      "epoch": 2.24609375e-06,
      "step": 368,
      "training_step_time": 0.40468692779541016
    },
    {
      "epoch": 2.252197265625e-06,
      "model_forward_time": 0.11530518531799316,
      "step": 369
    },
    {
      "epoch": 2.252197265625e-06,
      "step": 369,
      "training_step_time": 0.40299415588378906
    },
    {
      "epoch": 2.25830078125e-06,
      "grad_norm": 5.367913246154785,
      "learning_rate": 1.2333333333333334e-05,
      "loss": 0.487,
      "step": 370
    },
    {
      "epoch": 2.25830078125e-06,
      "model_forward_time": 0.11486983299255371,
      "step": 370
    },
    {
      "epoch": 2.25830078125e-06,
      "step": 370,
      "training_step_time": 0.43947815895080566
    },
    {
      "epoch": 2.264404296875e-06,
      "model_forward_time": 0.1139993667602539,
      "step": 371
    },
    {
      "epoch": 2.264404296875e-06,
      "step": 371,
      "training_step_time": 0.38419485092163086
    },
    {
      "epoch": 2.2705078125e-06,
      "model_forward_time": 0.11539125442504883,
      "step": 372
    },
    {
      "epoch": 2.2705078125e-06,
      "step": 372,
      "training_step_time": 0.9370241165161133
    },
    {
      "epoch": 2.276611328125e-06,
      "model_forward_time": 0.1152334213256836,
      "step": 373
    },
    {
      "epoch": 2.276611328125e-06,
      "step": 373,
      "training_step_time": 0.36850833892822266
    },
    {
      "epoch": 2.28271484375e-06,
      "model_forward_time": 0.1140432357788086,
      "step": 374
    },
    {
      "epoch": 2.28271484375e-06,
      "step": 374,
      "training_step_time": 0.4952704906463623
    },
    {
      "epoch": 2.288818359375e-06,
      "model_forward_time": 0.11413002014160156,
      "step": 375
    },
    {
      "epoch": 2.288818359375e-06,
      "step": 375,
      "training_step_time": 0.5003020763397217
    },
    {
      "epoch": 2.294921875e-06,
      "model_forward_time": 0.11458373069763184,
      "step": 376
    },
    {
      "epoch": 2.294921875e-06,
      "step": 376,
      "training_step_time": 0.3691234588623047
    },
    {
      "epoch": 2.301025390625e-06,
      "model_forward_time": 0.11439037322998047,
      "step": 377
    },
    {
      "epoch": 2.301025390625e-06,
      "step": 377,
      "training_step_time": 0.3975706100463867
    },
    {
      "epoch": 2.30712890625e-06,
      "model_forward_time": 0.11490440368652344,
      "step": 378
    },
    {
      "epoch": 2.30712890625e-06,
      "step": 378,
      "training_step_time": 0.5839526653289795
    },
    {
      "epoch": 2.313232421875e-06,
      "model_forward_time": 0.11446809768676758,
      "step": 379
    },
    {
      "epoch": 2.313232421875e-06,
      "step": 379,
      "training_step_time": 0.5042972564697266
    },
    {
      "epoch": 2.3193359375e-06,
      "grad_norm": 3.218459129333496,
      "learning_rate": 1.2666666666666668e-05,
      "loss": 0.4209,
      "step": 380
    },
    {
      "epoch": 2.3193359375e-06,
      "model_forward_time": 0.11417579650878906,
      "step": 380
    },
    {
      "epoch": 2.3193359375e-06,
      "step": 380,
      "training_step_time": 0.39469265937805176
    },
    {
      "epoch": 2.325439453125e-06,
      "model_forward_time": 0.11655545234680176,
      "step": 381
    },
    {
      "epoch": 2.325439453125e-06,
      "step": 381,
      "training_step_time": 0.41895461082458496
    },
    {
      "epoch": 2.33154296875e-06,
      "model_forward_time": 0.11441302299499512,
      "step": 382
    },
    {
      "epoch": 2.33154296875e-06,
      "step": 382,
      "training_step_time": 0.41220569610595703
    },
    {
      "epoch": 2.337646484375e-06,
      "model_forward_time": 0.11497020721435547,
      "step": 383
    },
    {
      "epoch": 2.337646484375e-06,
      "step": 383,
      "training_step_time": 0.3802766799926758
    },
    {
      "epoch": 2.34375e-06,
      "model_forward_time": 0.11501622200012207,
      "step": 384
    },
    {
      "epoch": 2.34375e-06,
      "step": 384,
      "training_step_time": 0.5634384155273438
    },
    {
      "epoch": 2.349853515625e-06,
      "model_forward_time": 0.11563467979431152,
      "step": 385
    },
    {
      "epoch": 2.349853515625e-06,
      "step": 385,
      "training_step_time": 0.4958324432373047
    },
    {
      "epoch": 2.35595703125e-06,
      "model_forward_time": 0.11486268043518066,
      "step": 386
    },
    {
      "epoch": 2.35595703125e-06,
      "step": 386,
      "training_step_time": 0.48305559158325195
    },
    {
      "epoch": 2.362060546875e-06,
      "model_forward_time": 0.11491823196411133,
      "step": 387
    },
    {
      "epoch": 2.362060546875e-06,
      "step": 387,
      "training_step_time": 0.43328380584716797
    },
    {
      "epoch": 2.3681640625e-06,
      "model_forward_time": 0.11453008651733398,
      "step": 388
    },
    {
      "epoch": 2.3681640625e-06,
      "step": 388,
      "training_step_time": 0.45809054374694824
    },
    {
      "epoch": 2.374267578125e-06,
      "model_forward_time": 0.11431550979614258,
      "step": 389
    },
    {
      "epoch": 2.374267578125e-06,
      "step": 389,
      "training_step_time": 0.4695010185241699
    },
    {
      "epoch": 2.38037109375e-06,
      "grad_norm": 2.358853340148926,
      "learning_rate": 1.3000000000000001e-05,
      "loss": 0.3765,
      "step": 390
    },
    {
      "epoch": 2.38037109375e-06,
      "model_forward_time": 0.11401200294494629,
      "step": 390
    },
    {
      "epoch": 2.38037109375e-06,
      "step": 390,
      "training_step_time": 0.39287710189819336
    },
    {
      "epoch": 2.386474609375e-06,
      "model_forward_time": 0.1142129898071289,
      "step": 391
    },
    {
      "epoch": 2.386474609375e-06,
      "step": 391,
      "training_step_time": 0.40686964988708496
    },
    {
      "epoch": 2.392578125e-06,
      "model_forward_time": 0.11540389060974121,
      "step": 392
    },
    {
      "epoch": 2.392578125e-06,
      "step": 392,
      "training_step_time": 0.3903920650482178
    },
    {
      "epoch": 2.398681640625e-06,
      "model_forward_time": 0.11488842964172363,
      "step": 393
    },
    {
      "epoch": 2.398681640625e-06,
      "step": 393,
      "training_step_time": 0.38411903381347656
    },
    {
      "epoch": 2.40478515625e-06,
      "model_forward_time": 0.11570453643798828,
      "step": 394
    },
    {
      "epoch": 2.40478515625e-06,
      "step": 394,
      "training_step_time": 0.4349946975708008
    },
    {
      "epoch": 2.410888671875e-06,
      "model_forward_time": 0.11466097831726074,
      "step": 395
    },
    {
      "epoch": 2.410888671875e-06,
      "step": 395,
      "training_step_time": 0.4003572463989258
    },
    {
      "epoch": 2.4169921875e-06,
      "model_forward_time": 0.11553573608398438,
      "step": 396
    },
    {
      "epoch": 2.4169921875e-06,
      "step": 396,
      "training_step_time": 0.8028266429901123
    },
    {
      "epoch": 2.423095703125e-06,
      "model_forward_time": 0.1146552562713623,
      "step": 397
    },
    {
      "epoch": 2.423095703125e-06,
      "step": 397,
      "training_step_time": 0.3805079460144043
    },
    {
      "epoch": 2.42919921875e-06,
      "model_forward_time": 0.11561703681945801,
      "step": 398
    },
    {
      "epoch": 2.42919921875e-06,
      "step": 398,
      "training_step_time": 0.39784836769104004
    },
    {
      "epoch": 2.435302734375e-06,
      "model_forward_time": 0.11460399627685547,
      "step": 399
    },
    {
      "epoch": 2.435302734375e-06,
      "step": 399,
      "training_step_time": 0.40634655952453613
    },
    {
      "epoch": 2.44140625e-06,
      "grad_norm": 3.0905919075012207,
      "learning_rate": 1.3333333333333333e-05,
      "loss": 0.3557,
      "step": 400
    },
    {
      "epoch": 2.44140625e-06,
      "model_forward_time": 0.11464214324951172,
      "step": 400
    },
    {
      "epoch": 2.44140625e-06,
      "step": 400,
      "training_step_time": 0.44780564308166504
    },
    {
      "epoch": 2.447509765625e-06,
      "model_forward_time": 0.11475229263305664,
      "step": 401
    },
    {
      "epoch": 2.447509765625e-06,
      "step": 401,
      "training_step_time": 0.46521902084350586
    },
    {
      "epoch": 2.45361328125e-06,
      "model_forward_time": 0.11522626876831055,
      "step": 402
    },
    {
      "epoch": 2.45361328125e-06,
      "step": 402,
      "training_step_time": 0.9017741680145264
    },
    {
      "epoch": 2.459716796875e-06,
      "model_forward_time": 0.11471390724182129,
      "step": 403
    },
    {
      "epoch": 2.459716796875e-06,
      "step": 403,
      "training_step_time": 0.39224863052368164
    },
    {
      "epoch": 2.4658203125e-06,
      "model_forward_time": 0.11420774459838867,
      "step": 404
    },
    {
      "epoch": 2.4658203125e-06,
      "step": 404,
      "training_step_time": 0.39765024185180664
    },
    {
      "epoch": 2.471923828125e-06,
      "model_forward_time": 0.11526036262512207,
      "step": 405
    },
    {
      "epoch": 2.471923828125e-06,
      "step": 405,
      "training_step_time": 0.3939051628112793
    },
    {
      "epoch": 2.47802734375e-06,
      "model_forward_time": 0.11545109748840332,
      "step": 406
    },
    {
      "epoch": 2.47802734375e-06,
      "step": 406,
      "training_step_time": 0.388141393661499
    },
    {
      "epoch": 2.484130859375e-06,
      "model_forward_time": 0.11448454856872559,
      "step": 407
    },
    {
      "epoch": 2.484130859375e-06,
      "step": 407,
      "training_step_time": 0.4678635597229004
    },
    {
      "epoch": 2.490234375e-06,
      "model_forward_time": 0.1157217025756836,
      "step": 408
    },
    {
      "epoch": 2.490234375e-06,
      "step": 408,
      "training_step_time": 0.7627737522125244
    },
    {
      "epoch": 2.496337890625e-06,
      "model_forward_time": 0.11421442031860352,
      "step": 409
    },
    {
      "epoch": 2.496337890625e-06,
      "step": 409,
      "training_step_time": 0.4002971649169922
    },
    {
      "epoch": 2.50244140625e-06,
      "grad_norm": 2.8451051712036133,
      "learning_rate": 1.3666666666666666e-05,
      "loss": 0.3439,
      "step": 410
    },
    {
      "epoch": 2.50244140625e-06,
      "model_forward_time": 0.11511683464050293,
      "step": 410
    },
    {
      "epoch": 2.50244140625e-06,
      "step": 410,
      "training_step_time": 0.3896017074584961
    },
    {
      "epoch": 2.508544921875e-06,
      "model_forward_time": 0.11524724960327148,
      "step": 411
    },
    {
      "epoch": 2.508544921875e-06,
      "step": 411,
      "training_step_time": 0.3877708911895752
    },
    {
      "epoch": 2.5146484375e-06,
      "model_forward_time": 0.11475658416748047,
      "step": 412
    },
    {
      "epoch": 2.5146484375e-06,
      "step": 412,
      "training_step_time": 0.49294114112854004
    },
    {
      "epoch": 2.520751953125e-06,
      "model_forward_time": 0.1144876480102539,
      "step": 413
    },
    {
      "epoch": 2.520751953125e-06,
      "step": 413,
      "training_step_time": 0.36890077590942383
    },
    {
      "epoch": 2.52685546875e-06,
      "model_forward_time": 0.11528635025024414,
      "step": 414
    },
    {
      "epoch": 2.52685546875e-06,
      "step": 414,
      "training_step_time": 0.5730295181274414
    },
    {
      "epoch": 2.532958984375e-06,
      "model_forward_time": 0.11571216583251953,
      "step": 415
    },
    {
      "epoch": 2.532958984375e-06,
      "step": 415,
      "training_step_time": 0.3993690013885498
    },
    {
      "epoch": 2.5390625e-06,
      "model_forward_time": 0.11530518531799316,
      "step": 416
    },
    {
      "epoch": 2.5390625e-06,
      "step": 416,
      "training_step_time": 0.3732020854949951
    },
    {
      "epoch": 2.545166015625e-06,
      "model_forward_time": 0.11451959609985352,
      "step": 417
    },
    {
      "epoch": 2.545166015625e-06,
      "step": 417,
      "training_step_time": 0.39104652404785156
    },
    {
      "epoch": 2.55126953125e-06,
      "model_forward_time": 0.11501240730285645,
      "step": 418
    },
    {
      "epoch": 2.55126953125e-06,
      "step": 418,
      "training_step_time": 0.41527485847473145
    },
    {
      "epoch": 2.557373046875e-06,
      "model_forward_time": 0.11534976959228516,
      "step": 419
    },
    {
      "epoch": 2.557373046875e-06,
      "step": 419,
      "training_step_time": 0.4441235065460205
    },
    {
      "epoch": 2.5634765625e-06,
      "grad_norm": 2.7128443717956543,
      "learning_rate": 1.4000000000000001e-05,
      "loss": 0.3245,
      "step": 420
    },
    {
      "epoch": 2.5634765625e-06,
      "model_forward_time": 0.11697030067443848,
      "step": 420
    },
    {
      "epoch": 2.5634765625e-06,
      "step": 420,
      "training_step_time": 0.9133481979370117
    },
    {
      "epoch": 2.569580078125e-06,
      "model_forward_time": 0.11438894271850586,
      "step": 421
    },
    {
      "epoch": 2.569580078125e-06,
      "step": 421,
      "training_step_time": 0.3907439708709717
    },
    {
      "epoch": 2.57568359375e-06,
      "model_forward_time": 0.11499524116516113,
      "step": 422
    },
    {
      "epoch": 2.57568359375e-06,
      "step": 422,
      "training_step_time": 0.389312744140625
    },
    {
      "epoch": 2.581787109375e-06,
      "model_forward_time": 0.11439752578735352,
      "step": 423
    },
    {
      "epoch": 2.581787109375e-06,
      "step": 423,
      "training_step_time": 0.37870144844055176
    },
    {
      "epoch": 2.587890625e-06,
      "model_forward_time": 0.11504793167114258,
      "step": 424
    },
    {
      "epoch": 2.587890625e-06,
      "step": 424,
      "training_step_time": 0.37754201889038086
    },
    {
      "epoch": 2.593994140625e-06,
      "model_forward_time": 0.11444473266601562,
      "step": 425
    },
    {
      "epoch": 2.593994140625e-06,
      "step": 425,
      "training_step_time": 0.4886929988861084
    },
    {
      "epoch": 2.60009765625e-06,
      "model_forward_time": 0.11523199081420898,
      "step": 426
    },
    {
      "epoch": 2.60009765625e-06,
      "step": 426,
      "training_step_time": 0.7421483993530273
    },
    {
      "epoch": 2.606201171875e-06,
      "model_forward_time": 0.11459732055664062,
      "step": 427
    },
    {
      "epoch": 2.606201171875e-06,
      "step": 427,
      "training_step_time": 0.47810864448547363
    },
    {
      "epoch": 2.6123046875e-06,
      "model_forward_time": 0.11502552032470703,
      "step": 428
    },
    {
      "epoch": 2.6123046875e-06,
      "step": 428,
      "training_step_time": 0.45731258392333984
    },
    {
      "epoch": 2.618408203125e-06,
      "model_forward_time": 0.1135098934173584,
      "step": 429
    },
    {
      "epoch": 2.618408203125e-06,
      "step": 429,
      "training_step_time": 0.3953084945678711
    },
    {
      "epoch": 2.62451171875e-06,
      "grad_norm": 2.3049941062927246,
      "learning_rate": 1.4333333333333334e-05,
      "loss": 0.3186,
      "step": 430
    },
    {
      "epoch": 2.62451171875e-06,
      "model_forward_time": 0.11450433731079102,
      "step": 430
    },
    {
      "epoch": 2.62451171875e-06,
      "step": 430,
      "training_step_time": 0.3925163745880127
    },
    {
      "epoch": 2.630615234375e-06,
      "model_forward_time": 0.11422157287597656,
      "step": 431
    },
    {
      "epoch": 2.630615234375e-06,
      "step": 431,
      "training_step_time": 0.43565893173217773
    },
    {
      "epoch": 2.63671875e-06,
      "model_forward_time": 0.1147928237915039,
      "step": 432
    },
    {
      "epoch": 2.63671875e-06,
      "step": 432,
      "training_step_time": 0.39562058448791504
    },
    {
      "epoch": 2.642822265625e-06,
      "model_forward_time": 0.11468172073364258,
      "step": 433
    },
    {
      "epoch": 2.642822265625e-06,
      "step": 433,
      "training_step_time": 0.3889007568359375
    },
    {
      "epoch": 2.64892578125e-06,
      "model_forward_time": 0.11544656753540039,
      "step": 434
    },
    {
      "epoch": 2.64892578125e-06,
      "step": 434,
      "training_step_time": 0.3949720859527588
    },
    {
      "epoch": 2.655029296875e-06,
      "model_forward_time": 0.1151885986328125,
      "step": 435
    },
    {
      "epoch": 2.655029296875e-06,
      "step": 435,
      "training_step_time": 0.386685848236084
    },
    {
      "epoch": 2.6611328125e-06,
      "model_forward_time": 0.1151738166809082,
      "step": 436
    },
    {
      "epoch": 2.6611328125e-06,
      "step": 436,
      "training_step_time": 0.3915064334869385
    },
    {
      "epoch": 2.667236328125e-06,
      "model_forward_time": 0.11553096771240234,
      "step": 437
    },
    {
      "epoch": 2.667236328125e-06,
      "step": 437,
      "training_step_time": 0.40921831130981445
    },
    {
      "epoch": 2.67333984375e-06,
      "model_forward_time": 0.11492443084716797,
      "step": 438
    },
    {
      "epoch": 2.67333984375e-06,
      "step": 438,
      "training_step_time": 1.0480387210845947
    },
    {
      "epoch": 2.679443359375e-06,
      "model_forward_time": 0.11437797546386719,
      "step": 439
    },
    {
      "epoch": 2.679443359375e-06,
      "step": 439,
      "training_step_time": 0.4509541988372803
    },
    {
      "epoch": 2.685546875e-06,
      "grad_norm": 2.637598752975464,
      "learning_rate": 1.4666666666666668e-05,
      "loss": 0.2988,
      "step": 440
    },
    {
      "epoch": 2.685546875e-06,
      "model_forward_time": 0.1149747371673584,
      "step": 440
    },
    {
      "epoch": 2.685546875e-06,
      "step": 440,
      "training_step_time": 0.43091869354248047
    },
    {
      "epoch": 2.691650390625e-06,
      "model_forward_time": 0.1144556999206543,
      "step": 441
    },
    {
      "epoch": 2.691650390625e-06,
      "step": 441,
      "training_step_time": 0.439434289932251
    },
    {
      "epoch": 2.69775390625e-06,
      "model_forward_time": 0.11468100547790527,
      "step": 442
    },
    {
      "epoch": 2.69775390625e-06,
      "step": 442,
      "training_step_time": 0.3885769844055176
    },
    {
      "epoch": 2.703857421875e-06,
      "model_forward_time": 0.11502814292907715,
      "step": 443
    },
    {
      "epoch": 2.703857421875e-06,
      "step": 443,
      "training_step_time": 0.39815354347229004
    },
    {
      "epoch": 2.7099609375e-06,
      "model_forward_time": 0.11469435691833496,
      "step": 444
    },
    {
      "epoch": 2.7099609375e-06,
      "step": 444,
      "training_step_time": 0.7831234931945801
    },
    {
      "epoch": 2.716064453125e-06,
      "model_forward_time": 0.11444997787475586,
      "step": 445
    },
    {
      "epoch": 2.716064453125e-06,
      "step": 445,
      "training_step_time": 0.39215731620788574
    },
    {
      "epoch": 2.72216796875e-06,
      "model_forward_time": 0.11421561241149902,
      "step": 446
    },
    {
      "epoch": 2.72216796875e-06,
      "step": 446,
      "training_step_time": 0.40018439292907715
    },
    {
      "epoch": 2.728271484375e-06,
      "model_forward_time": 0.11414265632629395,
      "step": 447
    },
    {
      "epoch": 2.728271484375e-06,
      "step": 447,
      "training_step_time": 0.3966343402862549
    },
    {
      "epoch": 2.734375e-06,
      "model_forward_time": 0.11454343795776367,
      "step": 448
    },
    {
      "epoch": 2.734375e-06,
      "step": 448,
      "training_step_time": 0.38767576217651367
    },
    {
      "epoch": 2.740478515625e-06,
      "model_forward_time": 0.11511421203613281,
      "step": 449
    },
    {
      "epoch": 2.740478515625e-06,
      "step": 449,
      "training_step_time": 0.379849910736084
    },
    {
      "epoch": 2.74658203125e-06,
      "grad_norm": 1.5352064371109009,
      "learning_rate": 1.5e-05,
      "loss": 0.2859,
      "step": 450
    },
    {
      "epoch": 2.74658203125e-06,
      "model_forward_time": 0.11494064331054688,
      "step": 450
    },
    {
      "epoch": 2.74658203125e-06,
      "step": 450,
      "training_step_time": 0.7662777900695801
    },
    {
      "epoch": 2.752685546875e-06,
      "model_forward_time": 0.11521530151367188,
      "step": 451
    },
    {
      "epoch": 2.752685546875e-06,
      "step": 451,
      "training_step_time": 0.44028735160827637
    },
    {
      "epoch": 2.7587890625e-06,
      "model_forward_time": 0.11453509330749512,
      "step": 452
    },
    {
      "epoch": 2.7587890625e-06,
      "step": 452,
      "training_step_time": 0.4488070011138916
    },
    {
      "epoch": 2.764892578125e-06,
      "model_forward_time": 0.11544632911682129,
      "step": 453
    },
    {
      "epoch": 2.764892578125e-06,
      "step": 453,
      "training_step_time": 0.4806375503540039
    },
    {
      "epoch": 2.77099609375e-06,
      "model_forward_time": 0.11527633666992188,
      "step": 454
    },
    {
      "epoch": 2.77099609375e-06,
      "step": 454,
      "training_step_time": 0.477977991104126
    },
    {
      "epoch": 2.777099609375e-06,
      "model_forward_time": 0.11484336853027344,
      "step": 455
    },
    {
      "epoch": 2.777099609375e-06,
      "step": 455,
      "training_step_time": 0.4521033763885498
    },
    {
      "epoch": 2.783203125e-06,
      "model_forward_time": 0.1149899959564209,
      "step": 456
    },
    {
      "epoch": 2.783203125e-06,
      "step": 456,
      "training_step_time": 0.43650126457214355
    },
    {
      "epoch": 2.789306640625e-06,
      "model_forward_time": 0.11433529853820801,
      "step": 457
    },
    {
      "epoch": 2.789306640625e-06,
      "step": 457,
      "training_step_time": 0.38315820693969727
    },
    {
      "epoch": 2.79541015625e-06,
      "model_forward_time": 0.11518144607543945,
      "step": 458
    },
    {
      "epoch": 2.79541015625e-06,
      "step": 458,
      "training_step_time": 0.4002246856689453
    },
    {
      "epoch": 2.801513671875e-06,
      "model_forward_time": 0.11493611335754395,
      "step": 459
    },
    {
      "epoch": 2.801513671875e-06,
      "step": 459,
      "training_step_time": 0.39005279541015625
    },
    {
      "epoch": 2.8076171875e-06,
      "grad_norm": 1.790676236152649,
      "learning_rate": 1.5333333333333334e-05,
      "loss": 0.2813,
      "step": 460
    },
    {
      "epoch": 2.8076171875e-06,
      "model_forward_time": 0.1152338981628418,
      "step": 460
    },
    {
      "epoch": 2.8076171875e-06,
      "step": 460,
      "training_step_time": 0.38473963737487793
    },
    {
      "epoch": 2.813720703125e-06,
      "model_forward_time": 0.1155843734741211,
      "step": 461
    },
    {
      "epoch": 2.813720703125e-06,
      "step": 461,
      "training_step_time": 0.3913297653198242
    },
    {
      "epoch": 2.81982421875e-06,
      "model_forward_time": 0.1150665283203125,
      "step": 462
    },
    {
      "epoch": 2.81982421875e-06,
      "step": 462,
      "training_step_time": 0.8664777278900146
    },
    {
      "epoch": 2.825927734375e-06,
      "model_forward_time": 0.11434125900268555,
      "step": 463
    },
    {
      "epoch": 2.825927734375e-06,
      "step": 463,
      "training_step_time": 0.3906588554382324
    },
    {
      "epoch": 2.83203125e-06,
      "model_forward_time": 0.11431097984313965,
      "step": 464
    },
    {
      "epoch": 2.83203125e-06,
      "step": 464,
      "training_step_time": 0.4337351322174072
    },
    {
      "epoch": 2.838134765625e-06,
      "model_forward_time": 0.11440777778625488,
      "step": 465
    },
    {
      "epoch": 2.838134765625e-06,
      "step": 465,
      "training_step_time": 0.45081639289855957
    },
    {
      "epoch": 2.84423828125e-06,
      "model_forward_time": 0.11419343948364258,
      "step": 466
    },
    {
      "epoch": 2.84423828125e-06,
      "step": 466,
      "training_step_time": 0.44078660011291504
    },
    {
      "epoch": 2.850341796875e-06,
      "model_forward_time": 0.1140141487121582,
      "step": 467
    },
    {
      "epoch": 2.850341796875e-06,
      "step": 467,
      "training_step_time": 0.45360493659973145
    },
    {
      "epoch": 2.8564453125e-06,
      "model_forward_time": 0.11445951461791992,
      "step": 468
    },
    {
      "epoch": 2.8564453125e-06,
      "step": 468,
      "training_step_time": 0.5421149730682373
    },
    {
      "epoch": 2.862548828125e-06,
      "model_forward_time": 0.11451148986816406,
      "step": 469
    },
    {
      "epoch": 2.862548828125e-06,
      "step": 469,
      "training_step_time": 0.3919374942779541
    },
    {
      "epoch": 2.86865234375e-06,
      "grad_norm": 1.624840259552002,
      "learning_rate": 1.5666666666666667e-05,
      "loss": 0.292,
      "step": 470
    },
    {
      "epoch": 2.86865234375e-06,
      "model_forward_time": 0.11458516120910645,
      "step": 470
    },
    {
      "epoch": 2.86865234375e-06,
      "step": 470,
      "training_step_time": 0.38745570182800293
    },
    {
      "epoch": 2.874755859375e-06,
      "model_forward_time": 0.1147465705871582,
      "step": 471
    },
    {
      "epoch": 2.874755859375e-06,
      "step": 471,
      "training_step_time": 0.4077305793762207
    },
    {
      "epoch": 2.880859375e-06,
      "model_forward_time": 0.11496472358703613,
      "step": 472
    },
    {
      "epoch": 2.880859375e-06,
      "step": 472,
      "training_step_time": 0.3953673839569092
    },
    {
      "epoch": 2.886962890625e-06,
      "model_forward_time": 0.11610627174377441,
      "step": 473
    },
    {
      "epoch": 2.886962890625e-06,
      "step": 473,
      "training_step_time": 0.3977010250091553
    },
    {
      "epoch": 2.89306640625e-06,
      "model_forward_time": 0.11577200889587402,
      "step": 474
    },
    {
      "epoch": 2.89306640625e-06,
      "step": 474,
      "training_step_time": 0.746037483215332
    },
    {
      "epoch": 2.899169921875e-06,
      "model_forward_time": 0.11458039283752441,
      "step": 475
    },
    {
      "epoch": 2.899169921875e-06,
      "step": 475,
      "training_step_time": 0.38939833641052246
    },
    {
      "epoch": 2.9052734375e-06,
      "model_forward_time": 0.11522030830383301,
      "step": 476
    },
    {
      "epoch": 2.9052734375e-06,
      "step": 476,
      "training_step_time": 0.3961446285247803
    },
    {
      "epoch": 2.911376953125e-06,
      "model_forward_time": 0.11505699157714844,
      "step": 477
    },
    {
      "epoch": 2.911376953125e-06,
      "step": 477,
      "training_step_time": 0.39431238174438477
    },
    {
      "epoch": 2.91748046875e-06,
      "model_forward_time": 0.11474299430847168,
      "step": 478
    },
    {
      "epoch": 2.91748046875e-06,
      "step": 478,
      "training_step_time": 0.4558565616607666
    },
    {
      "epoch": 2.923583984375e-06,
      "model_forward_time": 0.1154623031616211,
      "step": 479
    },
    {
      "epoch": 2.923583984375e-06,
      "step": 479,
      "training_step_time": 0.4000086784362793
    },
    {
      "epoch": 2.9296875e-06,
      "grad_norm": 1.1301449537277222,
      "learning_rate": 1.6000000000000003e-05,
      "loss": 0.2729,
      "step": 480
    },
    {
      "epoch": 2.9296875e-06,
      "model_forward_time": 0.11472558975219727,
      "step": 480
    },
    {
      "epoch": 2.9296875e-06,
      "step": 480,
      "training_step_time": 0.9751715660095215
    },
    {
      "epoch": 2.935791015625e-06,
      "model_forward_time": 0.11497640609741211,
      "step": 481
    },
    {
      "epoch": 2.935791015625e-06,
      "step": 481,
      "training_step_time": 0.4278600215911865
    },
    {
      "epoch": 2.94189453125e-06,
      "model_forward_time": 0.11485695838928223,
      "step": 482
    },
    {
      "epoch": 2.94189453125e-06,
      "step": 482,
      "training_step_time": 0.3990294933319092
    },
    {
      "epoch": 2.947998046875e-06,
      "model_forward_time": 0.11493730545043945,
      "step": 483
    },
    {
      "epoch": 2.947998046875e-06,
      "step": 483,
      "training_step_time": 0.3819444179534912
    },
    {
      "epoch": 2.9541015625e-06,
      "model_forward_time": 0.11432480812072754,
      "step": 484
    },
    {
      "epoch": 2.9541015625e-06,
      "step": 484,
      "training_step_time": 0.3887064456939697
    },
    {
      "epoch": 2.960205078125e-06,
      "model_forward_time": 0.1152503490447998,
      "step": 485
    },
    {
      "epoch": 2.960205078125e-06,
      "step": 485,
      "training_step_time": 0.39098668098449707
    },
    {
      "epoch": 2.96630859375e-06,
      "model_forward_time": 0.11463785171508789,
      "step": 486
    },
    {
      "epoch": 2.96630859375e-06,
      "step": 486,
      "training_step_time": 0.6724040508270264
    },
    {
      "epoch": 2.972412109375e-06,
      "model_forward_time": 0.11455202102661133,
      "step": 487
    },
    {
      "epoch": 2.972412109375e-06,
      "step": 487,
      "training_step_time": 0.3986837863922119
    },
    {
      "epoch": 2.978515625e-06,
      "model_forward_time": 0.11515021324157715,
      "step": 488
    },
    {
      "epoch": 2.978515625e-06,
      "step": 488,
      "training_step_time": 0.39199280738830566
    },
    {
      "epoch": 2.984619140625e-06,
      "model_forward_time": 0.11510086059570312,
      "step": 489
    },
    {
      "epoch": 2.984619140625e-06,
      "step": 489,
      "training_step_time": 0.3972151279449463
    },
    {
      "epoch": 2.99072265625e-06,
      "grad_norm": 1.2684446573257446,
      "learning_rate": 1.6333333333333335e-05,
      "loss": 0.2633,
      "step": 490
    },
    {
      "epoch": 2.99072265625e-06,
      "model_forward_time": 0.11527562141418457,
      "step": 490
    },
    {
      "epoch": 2.99072265625e-06,
      "step": 490,
      "training_step_time": 0.38922762870788574
    },
    {
      "epoch": 2.996826171875e-06,
      "model_forward_time": 0.11585640907287598,
      "step": 491
    },
    {
      "epoch": 2.996826171875e-06,
      "step": 491,
      "training_step_time": 0.49173927307128906
    },
    {
      "epoch": 3.0029296875e-06,
      "model_forward_time": 0.11505556106567383,
      "step": 492
    },
    {
      "epoch": 3.0029296875e-06,
      "step": 492,
      "training_step_time": 1.141871452331543
    },
    {
      "epoch": 3.009033203125e-06,
      "model_forward_time": 0.11384820938110352,
      "step": 493
    },
    {
      "epoch": 3.009033203125e-06,
      "step": 493,
      "training_step_time": 0.46054506301879883
    },
    {
      "epoch": 3.01513671875e-06,
      "model_forward_time": 0.11405706405639648,
      "step": 494
    },
    {
      "epoch": 3.01513671875e-06,
      "step": 494,
      "training_step_time": 0.4306952953338623
    },
    {
      "epoch": 3.021240234375e-06,
      "model_forward_time": 0.11364340782165527,
      "step": 495
    },
    {
      "epoch": 3.021240234375e-06,
      "step": 495,
      "training_step_time": 0.3759188652038574
    },
    {
      "epoch": 3.02734375e-06,
      "model_forward_time": 0.11406397819519043,
      "step": 496
    },
    {
      "epoch": 3.02734375e-06,
      "step": 496,
      "training_step_time": 0.38660287857055664
    },
    {
      "epoch": 3.033447265625e-06,
      "model_forward_time": 0.11492204666137695,
      "step": 497
    },
    {
      "epoch": 3.033447265625e-06,
      "step": 497,
      "training_step_time": 0.40532922744750977
    },
    {
      "epoch": 3.03955078125e-06,
      "model_forward_time": 0.11502814292907715,
      "step": 498
    },
    {
      "epoch": 3.03955078125e-06,
      "step": 498,
      "training_step_time": 0.5025286674499512
    },
    {
      "epoch": 3.045654296875e-06,
      "model_forward_time": 0.1149754524230957,
      "step": 499
    },
    {
      "epoch": 3.045654296875e-06,
      "step": 499,
      "training_step_time": 0.38861584663391113
    },
    {
      "epoch": 3.0517578125e-06,
      "grad_norm": 0.8355977535247803,
      "learning_rate": 1.6666666666666667e-05,
      "loss": 0.2453,
      "step": 500
    },
    {
      "epoch": 3.0517578125e-06,
      "model_forward_time": 0.11531305313110352,
      "step": 500
    },
    {
      "epoch": 3.0517578125e-06,
      "step": 500,
      "training_step_time": 0.3871915340423584
    },
    {
      "epoch": 3.057861328125e-06,
      "model_forward_time": 0.11527585983276367,
      "step": 501
    },
    {
      "epoch": 3.057861328125e-06,
      "step": 501,
      "training_step_time": 0.3926115036010742
    },
    {
      "epoch": 3.06396484375e-06,
      "model_forward_time": 0.11523556709289551,
      "step": 502
    },
    {
      "epoch": 3.06396484375e-06,
      "step": 502,
      "training_step_time": 0.38888978958129883
    },
    {
      "epoch": 3.070068359375e-06,
      "model_forward_time": 0.1151893138885498,
      "step": 503
    },
    {
      "epoch": 3.070068359375e-06,
      "step": 503,
      "training_step_time": 0.39560961723327637
    },
    {
      "epoch": 3.076171875e-06,
      "model_forward_time": 0.1157078742980957,
      "step": 504
    },
    {
      "epoch": 3.076171875e-06,
      "step": 504,
      "training_step_time": 0.9668362140655518
    },
    {
      "epoch": 3.082275390625e-06,
      "model_forward_time": 0.11472773551940918,
      "step": 505
    },
    {
      "epoch": 3.082275390625e-06,
      "step": 505,
      "training_step_time": 0.4433412551879883
    },
    {
      "epoch": 3.08837890625e-06,
      "model_forward_time": 0.11452102661132812,
      "step": 506
    },
    {
      "epoch": 3.08837890625e-06,
      "step": 506,
      "training_step_time": 0.4506411552429199
    },
    {
      "epoch": 3.094482421875e-06,
      "model_forward_time": 0.11470580101013184,
      "step": 507
    },
    {
      "epoch": 3.094482421875e-06,
      "step": 507,
      "training_step_time": 0.4426255226135254
    },
    {
      "epoch": 3.1005859375e-06,
      "model_forward_time": 0.11363601684570312,
      "step": 508
    },
    {
      "epoch": 3.1005859375e-06,
      "step": 508,
      "training_step_time": 0.4605371952056885
    },
    {
      "epoch": 3.106689453125e-06,
      "model_forward_time": 0.11435723304748535,
      "step": 509
    },
    {
      "epoch": 3.106689453125e-06,
      "step": 509,
      "training_step_time": 0.39173030853271484
    },
    {
      "epoch": 3.11279296875e-06,
      "grad_norm": 1.3344290256500244,
      "learning_rate": 1.7000000000000003e-05,
      "loss": 0.2514,
      "step": 510
    },
    {
      "epoch": 3.11279296875e-06,
      "model_forward_time": 0.11434721946716309,
      "step": 510
    },
    {
      "epoch": 3.11279296875e-06,
      "step": 510,
      "training_step_time": 0.4182741641998291
    },
    {
      "epoch": 3.118896484375e-06,
      "model_forward_time": 0.1150064468383789,
      "step": 511
    },
    {
      "epoch": 3.118896484375e-06,
      "step": 511,
      "training_step_time": 0.39836621284484863
    },
    {
      "epoch": 3.125e-06,
      "model_forward_time": 0.1150665283203125,
      "step": 512
    },
    {
      "epoch": 3.125e-06,
      "step": 512,
      "training_step_time": 0.3956592082977295
    },
    {
      "epoch": 3.131103515625e-06,
      "model_forward_time": 0.11536908149719238,
      "step": 513
    },
    {
      "epoch": 3.131103515625e-06,
      "step": 513,
      "training_step_time": 0.39478588104248047
    },
    {
      "epoch": 3.13720703125e-06,
      "model_forward_time": 0.11506414413452148,
      "step": 514
    },
    {
      "epoch": 3.13720703125e-06,
      "step": 514,
      "training_step_time": 0.39980006217956543
    },
    {
      "epoch": 3.143310546875e-06,
      "model_forward_time": 0.11469435691833496,
      "step": 515
    },
    {
      "epoch": 3.143310546875e-06,
      "step": 515,
      "training_step_time": 0.39145898818969727
    },
    {
      "epoch": 3.1494140625e-06,
      "model_forward_time": 0.11477446556091309,
      "step": 516
    },
    {
      "epoch": 3.1494140625e-06,
      "step": 516,
      "training_step_time": 0.9185752868652344
    },
    {
      "epoch": 3.155517578125e-06,
      "model_forward_time": 0.11391377449035645,
      "step": 517
    },
    {
      "epoch": 3.155517578125e-06,
      "step": 517,
      "training_step_time": 0.4090244770050049
    },
    {
      "epoch": 3.16162109375e-06,
      "model_forward_time": 0.11456441879272461,
      "step": 518
    },
    {
      "epoch": 3.16162109375e-06,
      "step": 518,
      "training_step_time": 0.4405789375305176
    },
    {
      "epoch": 3.167724609375e-06,
      "model_forward_time": 0.11423921585083008,
      "step": 519
    },
    {
      "epoch": 3.167724609375e-06,
      "step": 519,
      "training_step_time": 0.4722890853881836
    },
    {
      "epoch": 3.173828125e-06,
      "grad_norm": 1.888867735862732,
      "learning_rate": 1.7333333333333336e-05,
      "loss": 0.2386,
      "step": 520
    },
    {
      "epoch": 3.173828125e-06,
      "model_forward_time": 0.1148369312286377,
      "step": 520
    },
    {
      "epoch": 3.173828125e-06,
      "step": 520,
      "training_step_time": 0.46411991119384766
    },
    {
      "epoch": 3.179931640625e-06,
      "model_forward_time": 0.11393117904663086,
      "step": 521
    },
    {
      "epoch": 3.179931640625e-06,
      "step": 521,
      "training_step_time": 0.435884952545166
    },
    {
      "epoch": 3.18603515625e-06,
      "model_forward_time": 0.1146690845489502,
      "step": 522
    },
    {
      "epoch": 3.18603515625e-06,
      "step": 522,
      "training_step_time": 0.5017662048339844
    },
    {
      "epoch": 3.192138671875e-06,
      "model_forward_time": 0.11457705497741699,
      "step": 523
    },
    {
      "epoch": 3.192138671875e-06,
      "step": 523,
      "training_step_time": 0.3902895450592041
    },
    {
      "epoch": 3.1982421875e-06,
      "model_forward_time": 0.11489009857177734,
      "step": 524
    },
    {
      "epoch": 3.1982421875e-06,
      "step": 524,
      "training_step_time": 0.3939518928527832
    },
    {
      "epoch": 3.204345703125e-06,
      "model_forward_time": 0.11489534378051758,
      "step": 525
    },
    {
      "epoch": 3.204345703125e-06,
      "step": 525,
      "training_step_time": 0.39987778663635254
    },
    {
      "epoch": 3.21044921875e-06,
      "model_forward_time": 0.11510801315307617,
      "step": 526
    },
    {
      "epoch": 3.21044921875e-06,
      "step": 526,
      "training_step_time": 0.3956606388092041
    },
    {
      "epoch": 3.216552734375e-06,
      "model_forward_time": 0.1154179573059082,
      "step": 527
    },
    {
      "epoch": 3.216552734375e-06,
      "step": 527,
      "training_step_time": 0.38172364234924316
    },
    {
      "epoch": 3.22265625e-06,
      "model_forward_time": 0.11524081230163574,
      "step": 528
    },
    {
      "epoch": 3.22265625e-06,
      "step": 528,
      "training_step_time": 0.8925614356994629
    },
    {
      "epoch": 3.228759765625e-06,
      "model_forward_time": 0.11437821388244629,
      "step": 529
    },
    {
      "epoch": 3.228759765625e-06,
      "step": 529,
      "training_step_time": 0.42867040634155273
    },
    {
      "epoch": 3.23486328125e-06,
      "grad_norm": 1.3376423120498657,
      "learning_rate": 1.7666666666666668e-05,
      "loss": 0.253,
      "step": 530
    },
    {
      "epoch": 3.23486328125e-06,
      "model_forward_time": 0.1146855354309082,
      "step": 530
    },
    {
      "epoch": 3.23486328125e-06,
      "step": 530,
      "training_step_time": 0.43072009086608887
    },
    {
      "epoch": 3.240966796875e-06,
      "model_forward_time": 0.11470270156860352,
      "step": 531
    },
    {
      "epoch": 3.240966796875e-06,
      "step": 531,
      "training_step_time": 0.40853285789489746
    },
    {
      "epoch": 3.2470703125e-06,
      "model_forward_time": 0.11479043960571289,
      "step": 532
    },
    {
      "epoch": 3.2470703125e-06,
      "step": 532,
      "training_step_time": 0.4743916988372803
    },
    {
      "epoch": 3.253173828125e-06,
      "model_forward_time": 0.11493396759033203,
      "step": 533
    },
    {
      "epoch": 3.253173828125e-06,
      "step": 533,
      "training_step_time": 0.36933398246765137
    },
    {
      "epoch": 3.25927734375e-06,
      "model_forward_time": 0.11451840400695801,
      "step": 534
    },
    {
      "epoch": 3.25927734375e-06,
      "step": 534,
      "training_step_time": 0.5508582592010498
    },
    {
      "epoch": 3.265380859375e-06,
      "model_forward_time": 0.11455392837524414,
      "step": 535
    },
    {
      "epoch": 3.265380859375e-06,
      "step": 535,
      "training_step_time": 0.39676547050476074
    },
    {
      "epoch": 3.271484375e-06,
      "model_forward_time": 0.11622142791748047,
      "step": 536
    },
    {
      "epoch": 3.271484375e-06,
      "step": 536,
      "training_step_time": 0.37085628509521484
    },
    {
      "epoch": 3.277587890625e-06,
      "model_forward_time": 0.11432194709777832,
      "step": 537
    },
    {
      "epoch": 3.277587890625e-06,
      "step": 537,
      "training_step_time": 0.4046807289123535
    },
    {
      "epoch": 3.28369140625e-06,
      "model_forward_time": 0.11498427391052246,
      "step": 538
    },
    {
      "epoch": 3.28369140625e-06,
      "step": 538,
      "training_step_time": 0.3947782516479492
    },
    {
      "epoch": 3.289794921875e-06,
      "model_forward_time": 0.1150062084197998,
      "step": 539
    },
    {
      "epoch": 3.289794921875e-06,
      "step": 539,
      "training_step_time": 0.3902144432067871
    },
    {
      "epoch": 3.2958984375e-06,
      "grad_norm": 1.7164231538772583,
      "learning_rate": 1.8e-05,
      "loss": 0.2353,
      "step": 540
    },
    {
      "epoch": 3.2958984375e-06,
      "model_forward_time": 0.11544561386108398,
      "step": 540
    },
    {
      "epoch": 3.2958984375e-06,
      "step": 540,
      "training_step_time": 0.8031148910522461
    },
    {
      "epoch": 3.302001953125e-06,
      "model_forward_time": 0.11452388763427734,
      "step": 541
    },
    {
      "epoch": 3.302001953125e-06,
      "step": 541,
      "training_step_time": 0.42194056510925293
    },
    {
      "epoch": 3.30810546875e-06,
      "model_forward_time": 0.11462903022766113,
      "step": 542
    },
    {
      "epoch": 3.30810546875e-06,
      "step": 542,
      "training_step_time": 0.4254634380340576
    },
    {
      "epoch": 3.314208984375e-06,
      "model_forward_time": 0.1151266098022461,
      "step": 543
    },
    {
      "epoch": 3.314208984375e-06,
      "step": 543,
      "training_step_time": 0.42095470428466797
    },
    {
      "epoch": 3.3203125e-06,
      "model_forward_time": 0.11473727226257324,
      "step": 544
    },
    {
      "epoch": 3.3203125e-06,
      "step": 544,
      "training_step_time": 0.38925790786743164
    },
    {
      "epoch": 3.326416015625e-06,
      "model_forward_time": 0.11407017707824707,
      "step": 545
    },
    {
      "epoch": 3.326416015625e-06,
      "step": 545,
      "training_step_time": 0.4827752113342285
    },
    {
      "epoch": 3.33251953125e-06,
      "model_forward_time": 0.11474823951721191,
      "step": 546
    },
    {
      "epoch": 3.33251953125e-06,
      "step": 546,
      "training_step_time": 0.5980148315429688
    },
    {
      "epoch": 3.338623046875e-06,
      "model_forward_time": 0.11549091339111328,
      "step": 547
    },
    {
      "epoch": 3.338623046875e-06,
      "step": 547,
      "training_step_time": 0.3973391056060791
    },
    {
      "epoch": 3.3447265625e-06,
      "model_forward_time": 0.11449670791625977,
      "step": 548
    },
    {
      "epoch": 3.3447265625e-06,
      "step": 548,
      "training_step_time": 0.4557342529296875
    },
    {
      "epoch": 3.350830078125e-06,
      "model_forward_time": 0.11473917961120605,
      "step": 549
    },
    {
      "epoch": 3.350830078125e-06,
      "step": 549,
      "training_step_time": 0.39772462844848633
    },
    {
      "epoch": 3.35693359375e-06,
      "grad_norm": 1.2527589797973633,
      "learning_rate": 1.8333333333333333e-05,
      "loss": 0.2321,
      "step": 550
    },
    {
      "epoch": 3.35693359375e-06,
      "model_forward_time": 0.11419010162353516,
      "step": 550
    },
    {
      "epoch": 3.35693359375e-06,
      "step": 550,
      "training_step_time": 0.3839294910430908
    },
    {
      "epoch": 3.363037109375e-06,
      "model_forward_time": 0.1149752140045166,
      "step": 551
    },
    {
      "epoch": 3.363037109375e-06,
      "step": 551,
      "training_step_time": 0.39248204231262207
    },
    {
      "epoch": 3.369140625e-06,
      "model_forward_time": 0.11519026756286621,
      "step": 552
    },
    {
      "epoch": 3.369140625e-06,
      "step": 552,
      "training_step_time": 0.5746562480926514
    },
    {
      "epoch": 3.375244140625e-06,
      "model_forward_time": 0.11463522911071777,
      "step": 553
    },
    {
      "epoch": 3.375244140625e-06,
      "step": 553,
      "training_step_time": 0.39179420471191406
    },
    {
      "epoch": 3.38134765625e-06,
      "model_forward_time": 0.11556339263916016,
      "step": 554
    },
    {
      "epoch": 3.38134765625e-06,
      "step": 554,
      "training_step_time": 0.4257619380950928
    },
    {
      "epoch": 3.387451171875e-06,
      "model_forward_time": 0.11431384086608887,
      "step": 555
    },
    {
      "epoch": 3.387451171875e-06,
      "step": 555,
      "training_step_time": 0.43113160133361816
    },
    {
      "epoch": 3.3935546875e-06,
      "model_forward_time": 0.1145789623260498,
      "step": 556
    },
    {
      "epoch": 3.3935546875e-06,
      "step": 556,
      "training_step_time": 0.3920266628265381
    },
    {
      "epoch": 3.399658203125e-06,
      "model_forward_time": 0.11499357223510742,
      "step": 557
    },
    {
      "epoch": 3.399658203125e-06,
      "step": 557,
      "training_step_time": 0.38733792304992676
    },
    {
      "epoch": 3.40576171875e-06,
      "model_forward_time": 0.11517620086669922,
      "step": 558
    },
    {
      "epoch": 3.40576171875e-06,
      "step": 558,
      "training_step_time": 1.0650510787963867
    },
    {
      "epoch": 3.411865234375e-06,
      "model_forward_time": 0.11463260650634766,
      "step": 559
    },
    {
      "epoch": 3.411865234375e-06,
      "step": 559,
      "training_step_time": 0.394334077835083
    },
    {
      "epoch": 3.41796875e-06,
      "grad_norm": 1.3132437467575073,
      "learning_rate": 1.866666666666667e-05,
      "loss": 0.2217,
      "step": 560
    },
    {
      "epoch": 3.41796875e-06,
      "model_forward_time": 0.11604499816894531,
      "step": 560
    },
    {
      "epoch": 3.41796875e-06,
      "step": 560,
      "training_step_time": 0.477492094039917
    },
    {
      "epoch": 3.424072265625e-06,
      "model_forward_time": 0.1149442195892334,
      "step": 561
    },
    {
      "epoch": 3.424072265625e-06,
      "step": 561,
      "training_step_time": 0.3876047134399414
    },
    {
      "epoch": 3.43017578125e-06,
      "model_forward_time": 0.11386609077453613,
      "step": 562
    },
    {
      "epoch": 3.43017578125e-06,
      "step": 562,
      "training_step_time": 0.4524097442626953
    },
    {
      "epoch": 3.436279296875e-06,
      "model_forward_time": 0.11429429054260254,
      "step": 563
    },
    {
      "epoch": 3.436279296875e-06,
      "step": 563,
      "training_step_time": 0.3886075019836426
    },
    {
      "epoch": 3.4423828125e-06,
      "model_forward_time": 0.11464476585388184,
      "step": 564
    },
    {
      "epoch": 3.4423828125e-06,
      "step": 564,
      "training_step_time": 0.713111162185669
    },
    {
      "epoch": 3.448486328125e-06,
      "model_forward_time": 0.11426377296447754,
      "step": 565
    },
    {
      "epoch": 3.448486328125e-06,
      "step": 565,
      "training_step_time": 0.38864827156066895
    },
    {
      "epoch": 3.45458984375e-06,
      "model_forward_time": 0.1145784854888916,
      "step": 566
    },
    {
      "epoch": 3.45458984375e-06,
      "step": 566,
      "training_step_time": 0.43290090560913086
    },
    {
      "epoch": 3.460693359375e-06,
      "model_forward_time": 0.11420655250549316,
      "step": 567
    },
    {
      "epoch": 3.460693359375e-06,
      "step": 567,
      "training_step_time": 0.39628005027770996
    },
    {
      "epoch": 3.466796875e-06,
      "model_forward_time": 0.11459994316101074,
      "step": 568
    },
    {
      "epoch": 3.466796875e-06,
      "step": 568,
      "training_step_time": 0.4111490249633789
    },
    {
      "epoch": 3.472900390625e-06,
      "model_forward_time": 0.11494016647338867,
      "step": 569
    },
    {
      "epoch": 3.472900390625e-06,
      "step": 569,
      "training_step_time": 0.3944590091705322
    },
    {
      "epoch": 3.47900390625e-06,
      "grad_norm": 1.6555219888687134,
      "learning_rate": 1.9e-05,
      "loss": 0.2275,
      "step": 570
    },
    {
      "epoch": 3.47900390625e-06,
      "model_forward_time": 0.11446309089660645,
      "step": 570
    },
    {
      "epoch": 3.47900390625e-06,
      "step": 570,
      "training_step_time": 0.8769803047180176
    },
    {
      "epoch": 3.485107421875e-06,
      "model_forward_time": 0.11412262916564941,
      "step": 571
    },
    {
      "epoch": 3.485107421875e-06,
      "step": 571,
      "training_step_time": 0.47476983070373535
    },
    {
      "epoch": 3.4912109375e-06,
      "model_forward_time": 0.11426734924316406,
      "step": 572
    },
    {
      "epoch": 3.4912109375e-06,
      "step": 572,
      "training_step_time": 0.4591202735900879
    },
    {
      "epoch": 3.497314453125e-06,
      "model_forward_time": 0.11437368392944336,
      "step": 573
    },
    {
      "epoch": 3.497314453125e-06,
      "step": 573,
      "training_step_time": 0.4957904815673828
    },
    {
      "epoch": 3.50341796875e-06,
      "model_forward_time": 0.11419534683227539,
      "step": 574
    },
    {
      "epoch": 3.50341796875e-06,
      "step": 574,
      "training_step_time": 0.4110407829284668
    },
    {
      "epoch": 3.509521484375e-06,
      "model_forward_time": 0.11476802825927734,
      "step": 575
    },
    {
      "epoch": 3.509521484375e-06,
      "step": 575,
      "training_step_time": 0.3785746097564697
    },
    {
      "epoch": 3.515625e-06,
      "model_forward_time": 0.11440706253051758,
      "step": 576
    },
    {
      "epoch": 3.515625e-06,
      "step": 576,
      "training_step_time": 0.48397231101989746
    },
    {
      "epoch": 3.521728515625e-06,
      "model_forward_time": 0.11464142799377441,
      "step": 577
    },
    {
      "epoch": 3.521728515625e-06,
      "step": 577,
      "training_step_time": 0.3799736499786377
    },
    {
      "epoch": 3.52783203125e-06,
      "model_forward_time": 0.11503195762634277,
      "step": 578
    },
    {
      "epoch": 3.52783203125e-06,
      "step": 578,
      "training_step_time": 0.3924062252044678
    },
    {
      "epoch": 3.533935546875e-06,
      "model_forward_time": 0.11479473114013672,
      "step": 579
    },
    {
      "epoch": 3.533935546875e-06,
      "step": 579,
      "training_step_time": 0.42032718658447266
    },
    {
      "epoch": 3.5400390625e-06,
      "grad_norm": 1.4820743799209595,
      "learning_rate": 1.9333333333333333e-05,
      "loss": 0.2283,
      "step": 580
    },
    {
      "epoch": 3.5400390625e-06,
      "model_forward_time": 0.1146235466003418,
      "step": 580
    },
    {
      "epoch": 3.5400390625e-06,
      "step": 580,
      "training_step_time": 0.39569520950317383
    },
    {
      "epoch": 3.546142578125e-06,
      "model_forward_time": 0.11496782302856445,
      "step": 581
    },
    {
      "epoch": 3.546142578125e-06,
      "step": 581,
      "training_step_time": 0.38707590103149414
    },
    {
      "epoch": 3.55224609375e-06,
      "model_forward_time": 0.11537456512451172,
      "step": 582
    },
    {
      "epoch": 3.55224609375e-06,
      "step": 582,
      "training_step_time": 1.051706314086914
    },
    {
      "epoch": 3.558349609375e-06,
      "model_forward_time": 0.11417937278747559,
      "step": 583
    },
    {
      "epoch": 3.558349609375e-06,
      "step": 583,
      "training_step_time": 0.44466280937194824
    },
    {
      "epoch": 3.564453125e-06,
      "model_forward_time": 0.11390805244445801,
      "step": 584
    },
    {
      "epoch": 3.564453125e-06,
      "step": 584,
      "training_step_time": 0.44254326820373535
    },
    {
      "epoch": 3.570556640625e-06,
      "model_forward_time": 0.11447834968566895,
      "step": 585
    },
    {
      "epoch": 3.570556640625e-06,
      "step": 585,
      "training_step_time": 0.40255069732666016
    },
    {
      "epoch": 3.57666015625e-06,
      "model_forward_time": 0.11458992958068848,
      "step": 586
    },
    {
      "epoch": 3.57666015625e-06,
      "step": 586,
      "training_step_time": 0.4442124366760254
    },
    {
      "epoch": 3.582763671875e-06,
      "model_forward_time": 0.11461234092712402,
      "step": 587
    },
    {
      "epoch": 3.582763671875e-06,
      "step": 587,
      "training_step_time": 0.46125364303588867
    },
    {
      "epoch": 3.5888671875e-06,
      "model_forward_time": 0.11419367790222168,
      "step": 588
    },
    {
      "epoch": 3.5888671875e-06,
      "step": 588,
      "training_step_time": 0.47649526596069336
    },
    {
      "epoch": 3.594970703125e-06,
      "model_forward_time": 0.11465167999267578,
      "step": 589
    },
    {
      "epoch": 3.594970703125e-06,
      "step": 589,
      "training_step_time": 0.38735413551330566
    },
    {
      "epoch": 3.60107421875e-06,
      "grad_norm": 1.483670711517334,
      "learning_rate": 1.9666666666666666e-05,
      "loss": 0.2356,
      "step": 590
    },
    {
      "epoch": 3.60107421875e-06,
      "model_forward_time": 0.1145169734954834,
      "step": 590
    },
    {
      "epoch": 3.60107421875e-06,
      "step": 590,
      "training_step_time": 0.3894333839416504
    },
    {
      "epoch": 3.607177734375e-06,
      "model_forward_time": 0.11491036415100098,
      "step": 591
    },
    {
      "epoch": 3.607177734375e-06,
      "step": 591,
      "training_step_time": 0.38866305351257324
    },
    {
      "epoch": 3.61328125e-06,
      "model_forward_time": 0.11521053314208984,
      "step": 592
    },
    {
      "epoch": 3.61328125e-06,
      "step": 592,
      "training_step_time": 0.4136691093444824
    },
    {
      "epoch": 3.619384765625e-06,
      "model_forward_time": 0.11613607406616211,
      "step": 593
    },
    {
      "epoch": 3.619384765625e-06,
      "step": 593,
      "training_step_time": 0.39809513092041016
    },
    {
      "epoch": 3.62548828125e-06,
      "model_forward_time": 0.11578798294067383,
      "step": 594
    },
    {
      "epoch": 3.62548828125e-06,
      "step": 594,
      "training_step_time": 0.7903368473052979
    },
    {
      "epoch": 3.631591796875e-06,
      "model_forward_time": 0.11459565162658691,
      "step": 595
    },
    {
      "epoch": 3.631591796875e-06,
      "step": 595,
      "training_step_time": 0.3869779109954834
    },
    {
      "epoch": 3.6376953125e-06,
      "model_forward_time": 0.1145322322845459,
      "step": 596
    },
    {
      "epoch": 3.6376953125e-06,
      "step": 596,
      "training_step_time": 0.38456273078918457
    },
    {
      "epoch": 3.643798828125e-06,
      "model_forward_time": 0.1143341064453125,
      "step": 597
    },
    {
      "epoch": 3.643798828125e-06,
      "step": 597,
      "training_step_time": 0.3848428726196289
    },
    {
      "epoch": 3.64990234375e-06,
      "model_forward_time": 0.1153724193572998,
      "step": 598
    },
    {
      "epoch": 3.64990234375e-06,
      "step": 598,
      "training_step_time": 0.3829677104949951
    },
    {
      "epoch": 3.656005859375e-06,
      "model_forward_time": 0.11503076553344727,
      "step": 599
    },
    {
      "epoch": 3.656005859375e-06,
      "step": 599,
      "training_step_time": 0.42778611183166504
    },
    {
      "epoch": 3.662109375e-06,
      "grad_norm": 1.0638790130615234,
      "learning_rate": 2e-05,
      "loss": 0.2307,
      "step": 600
    },
    {
      "epoch": 3.662109375e-06,
      "model_forward_time": 0.11533856391906738,
      "step": 600
    },
    {
      "epoch": 3.662109375e-06,
      "step": 600,
      "training_step_time": 0.7526195049285889
    },
    {
      "epoch": 3.668212890625e-06,
      "model_forward_time": 0.11502456665039062,
      "step": 601
    },
    {
      "epoch": 3.668212890625e-06,
      "step": 601,
      "training_step_time": 0.4527757167816162
    },
    {
      "epoch": 3.67431640625e-06,
      "model_forward_time": 0.11461043357849121,
      "step": 602
    },
    {
      "epoch": 3.67431640625e-06,
      "step": 602,
      "training_step_time": 0.3927931785583496
    },
    {
      "epoch": 3.680419921875e-06,
      "model_forward_time": 0.11458539962768555,
      "step": 603
    },
    {
      "epoch": 3.680419921875e-06,
      "step": 603,
      "training_step_time": 0.38584208488464355
    },
    {
      "epoch": 3.6865234375e-06,
      "model_forward_time": 0.1142127513885498,
      "step": 604
    },
    {
      "epoch": 3.6865234375e-06,
      "step": 604,
      "training_step_time": 0.3869616985321045
    },
    {
      "epoch": 3.692626953125e-06,
      "model_forward_time": 0.11447525024414062,
      "step": 605
    },
    {
      "epoch": 3.692626953125e-06,
      "step": 605,
      "training_step_time": 0.41910743713378906
    },
    {
      "epoch": 3.69873046875e-06,
      "model_forward_time": 0.11529159545898438,
      "step": 606
    },
    {
      "epoch": 3.69873046875e-06,
      "step": 606,
      "training_step_time": 0.622201681137085
    },
    {
      "epoch": 3.704833984375e-06,
      "model_forward_time": 0.11460638046264648,
      "step": 607
    },
    {
      "epoch": 3.704833984375e-06,
      "step": 607,
      "training_step_time": 0.39683055877685547
    },
    {
      "epoch": 3.7109375e-06,
      "model_forward_time": 0.11435103416442871,
      "step": 608
    },
    {
      "epoch": 3.7109375e-06,
      "step": 608,
      "training_step_time": 0.3855562210083008
    },
    {
      "epoch": 3.717041015625e-06,
      "model_forward_time": 0.11526775360107422,
      "step": 609
    },
    {
      "epoch": 3.717041015625e-06,
      "step": 609,
      "training_step_time": 0.3867762088775635
    },
    {
      "epoch": 3.72314453125e-06,
      "grad_norm": 1.5238053798675537,
      "learning_rate": 2.0333333333333334e-05,
      "loss": 0.2555,
      "step": 610
    },
    {
      "epoch": 3.72314453125e-06,
      "model_forward_time": 0.11478900909423828,
      "step": 610
    },
    {
      "epoch": 3.72314453125e-06,
      "step": 610,
      "training_step_time": 0.38146352767944336
    },
    {
      "epoch": 3.729248046875e-06,
      "model_forward_time": 0.1147453784942627,
      "step": 611
    },
    {
      "epoch": 3.729248046875e-06,
      "step": 611,
      "training_step_time": 0.45026326179504395
    },
    {
      "epoch": 3.7353515625e-06,
      "model_forward_time": 0.11485075950622559,
      "step": 612
    },
    {
      "epoch": 3.7353515625e-06,
      "step": 612,
      "training_step_time": 0.9995315074920654
    },
    {
      "epoch": 3.741455078125e-06,
      "model_forward_time": 0.11340832710266113,
      "step": 613
    },
    {
      "epoch": 3.741455078125e-06,
      "step": 613,
      "training_step_time": 0.41344618797302246
    },
    {
      "epoch": 3.74755859375e-06,
      "model_forward_time": 0.11388397216796875,
      "step": 614
    },
    {
      "epoch": 3.74755859375e-06,
      "step": 614,
      "training_step_time": 0.4691460132598877
    },
    {
      "epoch": 3.753662109375e-06,
      "model_forward_time": 0.11407780647277832,
      "step": 615
    },
    {
      "epoch": 3.753662109375e-06,
      "step": 615,
      "training_step_time": 0.38997840881347656
    },
    {
      "epoch": 3.759765625e-06,
      "model_forward_time": 0.11487007141113281,
      "step": 616
    },
    {
      "epoch": 3.759765625e-06,
      "step": 616,
      "training_step_time": 0.4148843288421631
    },
    {
      "epoch": 3.765869140625e-06,
      "model_forward_time": 0.11463642120361328,
      "step": 617
    },
    {
      "epoch": 3.765869140625e-06,
      "step": 617,
      "training_step_time": 0.4156360626220703
    },
    {
      "epoch": 3.77197265625e-06,
      "model_forward_time": 0.11504411697387695,
      "step": 618
    },
    {
      "epoch": 3.77197265625e-06,
      "step": 618,
      "training_step_time": 0.6627392768859863
    },
    {
      "epoch": 3.778076171875e-06,
      "model_forward_time": 0.11462116241455078,
      "step": 619
    },
    {
      "epoch": 3.778076171875e-06,
      "step": 619,
      "training_step_time": 0.3985583782196045
    },
    {
      "epoch": 3.7841796875e-06,
      "grad_norm": 1.22978675365448,
      "learning_rate": 2.0666666666666666e-05,
      "loss": 0.2198,
      "step": 620
    },
    {
      "epoch": 3.7841796875e-06,
      "model_forward_time": 0.11427831649780273,
      "step": 620
    },
    {
      "epoch": 3.7841796875e-06,
      "step": 620,
      "training_step_time": 0.38040971755981445
    },
    {
      "epoch": 3.790283203125e-06,
      "model_forward_time": 0.11515402793884277,
      "step": 621
    },
    {
      "epoch": 3.790283203125e-06,
      "step": 621,
      "training_step_time": 0.3828287124633789
    },
    {
      "epoch": 3.79638671875e-06,
      "model_forward_time": 0.11470842361450195,
      "step": 622
    },
    {
      "epoch": 3.79638671875e-06,
      "step": 622,
      "training_step_time": 0.3986325263977051
    },
    {
      "epoch": 3.802490234375e-06,
      "model_forward_time": 0.11456751823425293,
      "step": 623
    },
    {
      "epoch": 3.802490234375e-06,
      "step": 623,
      "training_step_time": 0.3842782974243164
    },
    {
      "epoch": 3.80859375e-06,
      "model_forward_time": 0.11475300788879395,
      "step": 624
    },
    {
      "epoch": 3.80859375e-06,
      "step": 624,
      "training_step_time": 0.8656482696533203
    },
    {
      "epoch": 3.814697265625e-06,
      "model_forward_time": 0.11548805236816406,
      "step": 625
    },
    {
      "epoch": 3.814697265625e-06,
      "step": 625,
      "training_step_time": 0.4500412940979004
    },
    {
      "epoch": 3.82080078125e-06,
      "model_forward_time": 0.11388373374938965,
      "step": 626
    },
    {
      "epoch": 3.82080078125e-06,
      "step": 626,
      "training_step_time": 0.43526744842529297
    },
    {
      "epoch": 3.826904296875e-06,
      "model_forward_time": 0.1141214370727539,
      "step": 627
    },
    {
      "epoch": 3.826904296875e-06,
      "step": 627,
      "training_step_time": 0.43099284172058105
    },
    {
      "epoch": 3.8330078125e-06,
      "model_forward_time": 0.11428070068359375,
      "step": 628
    },
    {
      "epoch": 3.8330078125e-06,
      "step": 628,
      "training_step_time": 0.38146066665649414
    },
    {
      "epoch": 3.839111328125e-06,
      "model_forward_time": 0.11422848701477051,
      "step": 629
    },
    {
      "epoch": 3.839111328125e-06,
      "step": 629,
      "training_step_time": 0.4095780849456787
    },
    {
      "epoch": 3.84521484375e-06,
      "grad_norm": 1.1359565258026123,
      "learning_rate": 2.1e-05,
      "loss": 0.2004,
      "step": 630
    },
    {
      "epoch": 3.84521484375e-06,
      "model_forward_time": 0.11436867713928223,
      "step": 630
    },
    {
      "epoch": 3.84521484375e-06,
      "step": 630,
      "training_step_time": 0.5177855491638184
    },
    {
      "epoch": 3.851318359375e-06,
      "model_forward_time": 0.11436867713928223,
      "step": 631
    },
    {
      "epoch": 3.851318359375e-06,
      "step": 631,
      "training_step_time": 0.3852500915527344
    },
    {
      "epoch": 3.857421875e-06,
      "model_forward_time": 0.11519646644592285,
      "step": 632
    },
    {
      "epoch": 3.857421875e-06,
      "step": 632,
      "training_step_time": 0.38747406005859375
    },
    {
      "epoch": 3.863525390625e-06,
      "model_forward_time": 0.11539220809936523,
      "step": 633
    },
    {
      "epoch": 3.863525390625e-06,
      "step": 633,
      "training_step_time": 0.384143590927124
    },
    {
      "epoch": 3.86962890625e-06,
      "model_forward_time": 0.11512422561645508,
      "step": 634
    },
    {
      "epoch": 3.86962890625e-06,
      "step": 634,
      "training_step_time": 0.3964996337890625
    },
    {
      "epoch": 3.875732421875e-06,
      "model_forward_time": 0.11458778381347656,
      "step": 635
    },
    {
      "epoch": 3.875732421875e-06,
      "step": 635,
      "training_step_time": 0.38544344902038574
    },
    {
      "epoch": 3.8818359375e-06,
      "model_forward_time": 0.11517786979675293,
      "step": 636
    },
    {
      "epoch": 3.8818359375e-06,
      "step": 636,
      "training_step_time": 0.6350977420806885
    },
    {
      "epoch": 3.887939453125e-06,
      "model_forward_time": 0.11524248123168945,
      "step": 637
    },
    {
      "epoch": 3.887939453125e-06,
      "step": 637,
      "training_step_time": 0.40221524238586426
    },
    {
      "epoch": 3.89404296875e-06,
      "model_forward_time": 0.11452865600585938,
      "step": 638
    },
    {
      "epoch": 3.89404296875e-06,
      "step": 638,
      "training_step_time": 0.48713064193725586
    },
    {
      "epoch": 3.900146484375e-06,
      "model_forward_time": 0.11438226699829102,
      "step": 639
    },
    {
      "epoch": 3.900146484375e-06,
      "step": 639,
      "training_step_time": 0.4547595977783203
    },
    {
      "epoch": 3.90625e-06,
      "grad_norm": 1.2750078439712524,
      "learning_rate": 2.1333333333333335e-05,
      "loss": 0.2141,
      "step": 640
    },
    {
      "epoch": 3.90625e-06,
      "model_forward_time": 0.11919713020324707,
      "step": 640
    },
    {
      "epoch": 3.90625e-06,
      "step": 640,
      "training_step_time": 0.4258546829223633
    },
    {
      "epoch": 3.912353515625e-06,
      "model_forward_time": 0.11417603492736816,
      "step": 641
    },
    {
      "epoch": 3.912353515625e-06,
      "step": 641,
      "training_step_time": 0.47188878059387207
    },
    {
      "epoch": 3.91845703125e-06,
      "model_forward_time": 0.11559247970581055,
      "step": 642
    },
    {
      "epoch": 3.91845703125e-06,
      "step": 642,
      "training_step_time": 0.4772982597351074
    },
    {
      "epoch": 3.924560546875e-06,
      "model_forward_time": 0.1150364875793457,
      "step": 643
    },
    {
      "epoch": 3.924560546875e-06,
      "step": 643,
      "training_step_time": 0.42975926399230957
    },
    {
      "epoch": 3.9306640625e-06,
      "model_forward_time": 0.1150212287902832,
      "step": 644
    },
    {
      "epoch": 3.9306640625e-06,
      "step": 644,
      "training_step_time": 0.3932809829711914
    },
    {
      "epoch": 3.936767578125e-06,
      "model_forward_time": 0.11490225791931152,
      "step": 645
    },
    {
      "epoch": 3.936767578125e-06,
      "step": 645,
      "training_step_time": 0.40143418312072754
    },
    {
      "epoch": 3.94287109375e-06,
      "model_forward_time": 0.11632990837097168,
      "step": 646
    },
    {
      "epoch": 3.94287109375e-06,
      "step": 646,
      "training_step_time": 0.393754243850708
    },
    {
      "epoch": 3.948974609375e-06,
      "model_forward_time": 0.11573195457458496,
      "step": 647
    },
    {
      "epoch": 3.948974609375e-06,
      "step": 647,
      "training_step_time": 0.3891105651855469
    },
    {
      "epoch": 3.955078125e-06,
      "model_forward_time": 0.1145317554473877,
      "step": 648
    },
    {
      "epoch": 3.955078125e-06,
      "step": 648,
      "training_step_time": 0.5031626224517822
    },
    {
      "epoch": 3.961181640625e-06,
      "model_forward_time": 0.11536860466003418,
      "step": 649
    },
    {
      "epoch": 3.961181640625e-06,
      "step": 649,
      "training_step_time": 0.38752031326293945
    },
    {
      "epoch": 3.96728515625e-06,
      "grad_norm": 1.7488712072372437,
      "learning_rate": 2.1666666666666667e-05,
      "loss": 0.2235,
      "step": 650
    },
    {
      "epoch": 3.96728515625e-06,
      "model_forward_time": 0.11577033996582031,
      "step": 650
    },
    {
      "epoch": 3.96728515625e-06,
      "step": 650,
      "training_step_time": 0.3898482322692871
    },
    {
      "epoch": 3.973388671875e-06,
      "model_forward_time": 0.11502623558044434,
      "step": 651
    },
    {
      "epoch": 3.973388671875e-06,
      "step": 651,
      "training_step_time": 0.42479968070983887
    },
    {
      "epoch": 3.9794921875e-06,
      "model_forward_time": 0.1160433292388916,
      "step": 652
    },
    {
      "epoch": 3.9794921875e-06,
      "step": 652,
      "training_step_time": 0.41515421867370605
    },
    {
      "epoch": 3.985595703125e-06,
      "model_forward_time": 0.11683416366577148,
      "step": 653
    },
    {
      "epoch": 3.985595703125e-06,
      "step": 653,
      "training_step_time": 0.4534158706665039
    },
    {
      "epoch": 3.99169921875e-06,
      "model_forward_time": 0.11544656753540039,
      "step": 654
    },
    {
      "epoch": 3.99169921875e-06,
      "step": 654,
      "training_step_time": 0.6196043491363525
    },
    {
      "epoch": 3.997802734375e-06,
      "model_forward_time": 0.11478424072265625,
      "step": 655
    },
    {
      "epoch": 3.997802734375e-06,
      "step": 655,
      "training_step_time": 0.4748518466949463
    },
    {
      "epoch": 4.00390625e-06,
      "model_forward_time": 0.11471986770629883,
      "step": 656
    },
    {
      "epoch": 4.00390625e-06,
      "step": 656,
      "training_step_time": 0.434464693069458
    },
    {
      "epoch": 4.010009765625e-06,
      "model_forward_time": 0.11451840400695801,
      "step": 657
    },
    {
      "epoch": 4.010009765625e-06,
      "step": 657,
      "training_step_time": 0.3881657123565674
    },
    {
      "epoch": 4.01611328125e-06,
      "model_forward_time": 0.11436963081359863,
      "step": 658
    },
    {
      "epoch": 4.01611328125e-06,
      "step": 658,
      "training_step_time": 0.38692474365234375
    },
    {
      "epoch": 4.022216796875e-06,
      "model_forward_time": 0.11414051055908203,
      "step": 659
    },
    {
      "epoch": 4.022216796875e-06,
      "step": 659,
      "training_step_time": 0.389251708984375
    },
    {
      "epoch": 4.0283203125e-06,
      "grad_norm": 1.5419056415557861,
      "learning_rate": 2.2000000000000003e-05,
      "loss": 0.2196,
      "step": 660
    },
    {
      "epoch": 4.0283203125e-06,
      "model_forward_time": 0.11526918411254883,
      "step": 660
    },
    {
      "epoch": 4.0283203125e-06,
      "step": 660,
      "training_step_time": 0.5548181533813477
    },
    {
      "epoch": 4.034423828125e-06,
      "model_forward_time": 0.11499857902526855,
      "step": 661
    },
    {
      "epoch": 4.034423828125e-06,
      "step": 661,
      "training_step_time": 0.397735595703125
    },
    {
      "epoch": 4.04052734375e-06,
      "model_forward_time": 0.11534523963928223,
      "step": 662
    },
    {
      "epoch": 4.04052734375e-06,
      "step": 662,
      "training_step_time": 0.3911898136138916
    },
    {
      "epoch": 4.046630859375e-06,
      "model_forward_time": 0.11553311347961426,
      "step": 663
    },
    {
      "epoch": 4.046630859375e-06,
      "step": 663,
      "training_step_time": 0.39119887351989746
    },
    {
      "epoch": 4.052734375e-06,
      "model_forward_time": 0.11493921279907227,
      "step": 664
    },
    {
      "epoch": 4.052734375e-06,
      "step": 664,
      "training_step_time": 0.39086151123046875
    },
    {
      "epoch": 4.058837890625e-06,
      "model_forward_time": 0.11547327041625977,
      "step": 665
    },
    {
      "epoch": 4.058837890625e-06,
      "step": 665,
      "training_step_time": 0.38899803161621094
    },
    {
      "epoch": 4.06494140625e-06,
      "model_forward_time": 0.11473274230957031,
      "step": 666
    },
    {
      "epoch": 4.06494140625e-06,
      "step": 666,
      "training_step_time": 0.6974160671234131
    },
    {
      "epoch": 4.071044921875e-06,
      "model_forward_time": 0.11462163925170898,
      "step": 667
    },
    {
      "epoch": 4.071044921875e-06,
      "step": 667,
      "training_step_time": 0.47119140625
    },
    {
      "epoch": 4.0771484375e-06,
      "model_forward_time": 0.1160728931427002,
      "step": 668
    },
    {
      "epoch": 4.0771484375e-06,
      "step": 668,
      "training_step_time": 0.47118043899536133
    },
    {
      "epoch": 4.083251953125e-06,
      "model_forward_time": 0.11498260498046875,
      "step": 669
    },
    {
      "epoch": 4.083251953125e-06,
      "step": 669,
      "training_step_time": 0.48494839668273926
    },
    {
      "epoch": 4.08935546875e-06,
      "grad_norm": 1.611844539642334,
      "learning_rate": 2.2333333333333335e-05,
      "loss": 0.2469,
      "step": 670
    },
    {
      "epoch": 4.08935546875e-06,
      "model_forward_time": 0.11465907096862793,
      "step": 670
    },
    {
      "epoch": 4.08935546875e-06,
      "step": 670,
      "training_step_time": 0.46581482887268066
    },
    {
      "epoch": 4.095458984375e-06,
      "model_forward_time": 0.11399531364440918,
      "step": 671
    },
    {
      "epoch": 4.095458984375e-06,
      "step": 671,
      "training_step_time": 0.38835859298706055
    },
    {
      "epoch": 4.1015625e-06,
      "model_forward_time": 0.11579632759094238,
      "step": 672
    },
    {
      "epoch": 4.1015625e-06,
      "step": 672,
      "training_step_time": 0.7511868476867676
    },
    {
      "epoch": 4.107666015625e-06,
      "model_forward_time": 0.11452817916870117,
      "step": 673
    },
    {
      "epoch": 4.107666015625e-06,
      "step": 673,
      "training_step_time": 0.3929097652435303
    },
    {
      "epoch": 4.11376953125e-06,
      "model_forward_time": 0.11442780494689941,
      "step": 674
    },
    {
      "epoch": 4.11376953125e-06,
      "step": 674,
      "training_step_time": 0.4068775177001953
    },
    {
      "epoch": 4.119873046875e-06,
      "model_forward_time": 0.11467218399047852,
      "step": 675
    },
    {
      "epoch": 4.119873046875e-06,
      "step": 675,
      "training_step_time": 0.39038729667663574
    },
    {
      "epoch": 4.1259765625e-06,
      "model_forward_time": 0.11443614959716797,
      "step": 676
    },
    {
      "epoch": 4.1259765625e-06,
      "step": 676,
      "training_step_time": 0.39723730087280273
    },
    {
      "epoch": 4.132080078125e-06,
      "model_forward_time": 0.11467361450195312,
      "step": 677
    },
    {
      "epoch": 4.132080078125e-06,
      "step": 677,
      "training_step_time": 0.39386582374572754
    },
    {
      "epoch": 4.13818359375e-06,
      "model_forward_time": 0.11531352996826172,
      "step": 678
    },
    {
      "epoch": 4.13818359375e-06,
      "step": 678,
      "training_step_time": 0.8291652202606201
    },
    {
      "epoch": 4.144287109375e-06,
      "model_forward_time": 0.11460256576538086,
      "step": 679
    },
    {
      "epoch": 4.144287109375e-06,
      "step": 679,
      "training_step_time": 0.4808773994445801
    },
    {
      "epoch": 4.150390625e-06,
      "grad_norm": 0.9757271409034729,
      "learning_rate": 2.2666666666666668e-05,
      "loss": 0.2232,
      "step": 680
    },
    {
      "epoch": 4.150390625e-06,
      "model_forward_time": 0.11409497261047363,
      "step": 680
    },
    {
      "epoch": 4.150390625e-06,
      "step": 680,
      "training_step_time": 0.44171714782714844
    },
    {
      "epoch": 4.156494140625e-06,
      "model_forward_time": 0.11597514152526855,
      "step": 681
    },
    {
      "epoch": 4.156494140625e-06,
      "step": 681,
      "training_step_time": 0.48319196701049805
    },
    {
      "epoch": 4.16259765625e-06,
      "model_forward_time": 0.11494016647338867,
      "step": 682
    },
    {
      "epoch": 4.16259765625e-06,
      "step": 682,
      "training_step_time": 0.4891955852508545
    },
    {
      "epoch": 4.168701171875e-06,
      "model_forward_time": 0.11401939392089844,
      "step": 683
    },
    {
      "epoch": 4.168701171875e-06,
      "step": 683,
      "training_step_time": 0.4642150402069092
    },
    {
      "epoch": 4.1748046875e-06,
      "model_forward_time": 0.11492538452148438,
      "step": 684
    },
    {
      "epoch": 4.1748046875e-06,
      "step": 684,
      "training_step_time": 0.39071202278137207
    },
    {
      "epoch": 4.180908203125e-06,
      "model_forward_time": 0.11431026458740234,
      "step": 685
    },
    {
      "epoch": 4.180908203125e-06,
      "step": 685,
      "training_step_time": 0.3938117027282715
    },
    {
      "epoch": 4.18701171875e-06,
      "model_forward_time": 0.11454319953918457,
      "step": 686
    },
    {
      "epoch": 4.18701171875e-06,
      "step": 686,
      "training_step_time": 0.39244890213012695
    },
    {
      "epoch": 4.193115234375e-06,
      "model_forward_time": 0.11607646942138672,
      "step": 687
    },
    {
      "epoch": 4.193115234375e-06,
      "step": 687,
      "training_step_time": 0.39053988456726074
    },
    {
      "epoch": 4.19921875e-06,
      "model_forward_time": 0.11532211303710938,
      "step": 688
    },
    {
      "epoch": 4.19921875e-06,
      "step": 688,
      "training_step_time": 0.39333510398864746
    },
    {
      "epoch": 4.205322265625e-06,
      "model_forward_time": 0.11500382423400879,
      "step": 689
    },
    {
      "epoch": 4.205322265625e-06,
      "step": 689,
      "training_step_time": 0.40746593475341797
    },
    {
      "epoch": 4.21142578125e-06,
      "grad_norm": 1.134266972541809,
      "learning_rate": 2.3000000000000003e-05,
      "loss": 0.2133,
      "step": 690
    },
    {
      "epoch": 4.21142578125e-06,
      "model_forward_time": 0.11594295501708984,
      "step": 690
    },
    {
      "epoch": 4.21142578125e-06,
      "step": 690,
      "training_step_time": 1.0402541160583496
    },
    {
      "epoch": 4.217529296875e-06,
      "model_forward_time": 0.11566829681396484,
      "step": 691
    },
    {
      "epoch": 4.217529296875e-06,
      "step": 691,
      "training_step_time": 0.45081424713134766
    },
    {
      "epoch": 4.2236328125e-06,
      "model_forward_time": 0.1142435073852539,
      "step": 692
    },
    {
      "epoch": 4.2236328125e-06,
      "step": 692,
      "training_step_time": 0.46073412895202637
    },
    {
      "epoch": 4.229736328125e-06,
      "model_forward_time": 0.11435055732727051,
      "step": 693
    },
    {
      "epoch": 4.229736328125e-06,
      "step": 693,
      "training_step_time": 0.4176619052886963
    },
    {
      "epoch": 4.23583984375e-06,
      "model_forward_time": 0.11449122428894043,
      "step": 694
    },
    {
      "epoch": 4.23583984375e-06,
      "step": 694,
      "training_step_time": 0.3631930351257324
    },
    {
      "epoch": 4.241943359375e-06,
      "model_forward_time": 0.1147005558013916,
      "step": 695
    },
    {
      "epoch": 4.241943359375e-06,
      "step": 695,
      "training_step_time": 0.47155165672302246
    },
    {
      "epoch": 4.248046875e-06,
      "model_forward_time": 0.1149137020111084,
      "step": 696
    },
    {
      "epoch": 4.248046875e-06,
      "step": 696,
      "training_step_time": 0.47505760192871094
    },
    {
      "epoch": 4.254150390625e-06,
      "model_forward_time": 0.11440491676330566,
      "step": 697
    },
    {
      "epoch": 4.254150390625e-06,
      "step": 697,
      "training_step_time": 0.3917713165283203
    },
    {
      "epoch": 4.26025390625e-06,
      "model_forward_time": 0.1147150993347168,
      "step": 698
    },
    {
      "epoch": 4.26025390625e-06,
      "step": 698,
      "training_step_time": 0.38777613639831543
    },
    {
      "epoch": 4.266357421875e-06,
      "model_forward_time": 0.11414742469787598,
      "step": 699
    },
    {
      "epoch": 4.266357421875e-06,
      "step": 699,
      "training_step_time": 0.391556978225708
    },
    {
      "epoch": 4.2724609375e-06,
      "grad_norm": 1.0388742685317993,
      "learning_rate": 2.3333333333333336e-05,
      "loss": 0.216,
      "step": 700
    },
    {
      "epoch": 4.2724609375e-06,
      "model_forward_time": 0.11535048484802246,
      "step": 700
    },
    {
      "epoch": 4.2724609375e-06,
      "step": 700,
      "training_step_time": 0.39750003814697266
    },
    {
      "epoch": 4.278564453125e-06,
      "model_forward_time": 0.11499977111816406,
      "step": 701
    },
    {
      "epoch": 4.278564453125e-06,
      "step": 701,
      "training_step_time": 0.38860058784484863
    },
    {
      "epoch": 4.28466796875e-06,
      "model_forward_time": 0.11496710777282715,
      "step": 702
    },
    {
      "epoch": 4.28466796875e-06,
      "step": 702,
      "training_step_time": 0.7900667190551758
    },
    {
      "epoch": 4.290771484375e-06,
      "model_forward_time": 0.11507081985473633,
      "step": 703
    },
    {
      "epoch": 4.290771484375e-06,
      "step": 703,
      "training_step_time": 0.3712747097015381
    },
    {
      "epoch": 4.296875e-06,
      "model_forward_time": 0.11493372917175293,
      "step": 704
    },
    {
      "epoch": 4.296875e-06,
      "step": 704,
      "training_step_time": 0.3849306106567383
    },
    {
      "epoch": 4.302978515625e-06,
      "model_forward_time": 0.11483979225158691,
      "step": 705
    },
    {
      "epoch": 4.302978515625e-06,
      "step": 705,
      "training_step_time": 0.4602165222167969
    },
    {
      "epoch": 4.30908203125e-06,
      "model_forward_time": 0.11526775360107422,
      "step": 706
    },
    {
      "epoch": 4.30908203125e-06,
      "step": 706,
      "training_step_time": 0.46991682052612305
    },
    {
      "epoch": 4.315185546875e-06,
      "model_forward_time": 0.11450052261352539,
      "step": 707
    },
    {
      "epoch": 4.315185546875e-06,
      "step": 707,
      "training_step_time": 0.43523430824279785
    },
    {
      "epoch": 4.3212890625e-06,
      "model_forward_time": 0.11641955375671387,
      "step": 708
    },
    {
      "epoch": 4.3212890625e-06,
      "step": 708,
      "training_step_time": 0.9474716186523438
    },
    {
      "epoch": 4.327392578125e-06,
      "model_forward_time": 0.11502647399902344,
      "step": 709
    },
    {
      "epoch": 4.327392578125e-06,
      "step": 709,
      "training_step_time": 0.4270970821380615
    },
    {
      "epoch": 4.33349609375e-06,
      "grad_norm": 0.9237807393074036,
      "learning_rate": 2.3666666666666668e-05,
      "loss": 0.2262,
      "step": 710
    },
    {
      "epoch": 4.33349609375e-06,
      "model_forward_time": 0.11446785926818848,
      "step": 710
    },
    {
      "epoch": 4.33349609375e-06,
      "step": 710,
      "training_step_time": 0.3929901123046875
    },
    {
      "epoch": 4.339599609375e-06,
      "model_forward_time": 0.11401987075805664,
      "step": 711
    },
    {
      "epoch": 4.339599609375e-06,
      "step": 711,
      "training_step_time": 0.3863034248352051
    },
    {
      "epoch": 4.345703125e-06,
      "model_forward_time": 0.11408472061157227,
      "step": 712
    },
    {
      "epoch": 4.345703125e-06,
      "step": 712,
      "training_step_time": 0.38372302055358887
    },
    {
      "epoch": 4.351806640625e-06,
      "model_forward_time": 0.1144404411315918,
      "step": 713
    },
    {
      "epoch": 4.351806640625e-06,
      "step": 713,
      "training_step_time": 0.3851799964904785
    },
    {
      "epoch": 4.35791015625e-06,
      "model_forward_time": 0.11566424369812012,
      "step": 714
    },
    {
      "epoch": 4.35791015625e-06,
      "step": 714,
      "training_step_time": 0.8741405010223389
    },
    {
      "epoch": 4.364013671875e-06,
      "model_forward_time": 0.11482954025268555,
      "step": 715
    },
    {
      "epoch": 4.364013671875e-06,
      "step": 715,
      "training_step_time": 0.3902623653411865
    },
    {
      "epoch": 4.3701171875e-06,
      "model_forward_time": 0.11447572708129883,
      "step": 716
    },
    {
      "epoch": 4.3701171875e-06,
      "step": 716,
      "training_step_time": 0.39763426780700684
    },
    {
      "epoch": 4.376220703125e-06,
      "model_forward_time": 0.11455631256103516,
      "step": 717
    },
    {
      "epoch": 4.376220703125e-06,
      "step": 717,
      "training_step_time": 0.421905517578125
    },
    {
      "epoch": 4.38232421875e-06,
      "model_forward_time": 0.11491656303405762,
      "step": 718
    },
    {
      "epoch": 4.38232421875e-06,
      "step": 718,
      "training_step_time": 0.3941354751586914
    },
    {
      "epoch": 4.388427734375e-06,
      "model_forward_time": 0.11481499671936035,
      "step": 719
    },
    {
      "epoch": 4.388427734375e-06,
      "step": 719,
      "training_step_time": 0.37660837173461914
    },
    {
      "epoch": 4.39453125e-06,
      "grad_norm": 1.1396279335021973,
      "learning_rate": 2.4e-05,
      "loss": 0.213,
      "step": 720
    },
    {
      "epoch": 4.39453125e-06,
      "model_forward_time": 0.11510682106018066,
      "step": 720
    },
    {
      "epoch": 4.39453125e-06,
      "step": 720,
      "training_step_time": 0.7226572036743164
    },
    {
      "epoch": 4.400634765625e-06,
      "model_forward_time": 0.11492085456848145,
      "step": 721
    },
    {
      "epoch": 4.400634765625e-06,
      "step": 721,
      "training_step_time": 0.492279052734375
    },
    {
      "epoch": 4.40673828125e-06,
      "model_forward_time": 0.11520051956176758,
      "step": 722
    },
    {
      "epoch": 4.40673828125e-06,
      "step": 722,
      "training_step_time": 0.42713022232055664
    },
    {
      "epoch": 4.412841796875e-06,
      "model_forward_time": 0.11494112014770508,
      "step": 723
    },
    {
      "epoch": 4.412841796875e-06,
      "step": 723,
      "training_step_time": 0.46254611015319824
    },
    {
      "epoch": 4.4189453125e-06,
      "model_forward_time": 0.11437559127807617,
      "step": 724
    },
    {
      "epoch": 4.4189453125e-06,
      "step": 724,
      "training_step_time": 0.38756346702575684
    },
    {
      "epoch": 4.425048828125e-06,
      "model_forward_time": 0.1139519214630127,
      "step": 725
    },
    {
      "epoch": 4.425048828125e-06,
      "step": 725,
      "training_step_time": 0.3887197971343994
    },
    {
      "epoch": 4.43115234375e-06,
      "model_forward_time": 0.11541748046875,
      "step": 726
    },
    {
      "epoch": 4.43115234375e-06,
      "step": 726,
      "training_step_time": 0.9174158573150635
    },
    {
      "epoch": 4.437255859375e-06,
      "model_forward_time": 0.11436223983764648,
      "step": 727
    },
    {
      "epoch": 4.437255859375e-06,
      "step": 727,
      "training_step_time": 0.4072108268737793
    },
    {
      "epoch": 4.443359375e-06,
      "model_forward_time": 0.11396527290344238,
      "step": 728
    },
    {
      "epoch": 4.443359375e-06,
      "step": 728,
      "training_step_time": 0.3907294273376465
    },
    {
      "epoch": 4.449462890625e-06,
      "model_forward_time": 0.11464524269104004,
      "step": 729
    },
    {
      "epoch": 4.449462890625e-06,
      "step": 729,
      "training_step_time": 0.40229368209838867
    },
    {
      "epoch": 4.45556640625e-06,
      "grad_norm": 0.9114917516708374,
      "learning_rate": 2.4333333333333336e-05,
      "loss": 0.2095,
      "step": 730
    },
    {
      "epoch": 4.45556640625e-06,
      "model_forward_time": 0.1145620346069336,
      "step": 730
    },
    {
      "epoch": 4.45556640625e-06,
      "step": 730,
      "training_step_time": 0.4147024154663086
    },
    {
      "epoch": 4.461669921875e-06,
      "model_forward_time": 0.1140897274017334,
      "step": 731
    },
    {
      "epoch": 4.461669921875e-06,
      "step": 731,
      "training_step_time": 0.439772367477417
    },
    {
      "epoch": 4.4677734375e-06,
      "model_forward_time": 0.11451244354248047,
      "step": 732
    },
    {
      "epoch": 4.4677734375e-06,
      "step": 732,
      "training_step_time": 0.8656237125396729
    },
    {
      "epoch": 4.473876953125e-06,
      "model_forward_time": 0.11390256881713867,
      "step": 733
    },
    {
      "epoch": 4.473876953125e-06,
      "step": 733,
      "training_step_time": 0.42742085456848145
    },
    {
      "epoch": 4.47998046875e-06,
      "model_forward_time": 0.11426734924316406,
      "step": 734
    },
    {
      "epoch": 4.47998046875e-06,
      "step": 734,
      "training_step_time": 0.48584651947021484
    },
    {
      "epoch": 4.486083984375e-06,
      "model_forward_time": 0.11437749862670898,
      "step": 735
    },
    {
      "epoch": 4.486083984375e-06,
      "step": 735,
      "training_step_time": 0.44229650497436523
    },
    {
      "epoch": 4.4921875e-06,
      "model_forward_time": 0.13263964653015137,
      "step": 736
    },
    {
      "epoch": 4.4921875e-06,
      "step": 736,
      "training_step_time": 0.3920409679412842
    },
    {
      "epoch": 4.498291015625e-06,
      "model_forward_time": 0.11415219306945801,
      "step": 737
    },
    {
      "epoch": 4.498291015625e-06,
      "step": 737,
      "training_step_time": 0.3837161064147949
    },
    {
      "epoch": 4.50439453125e-06,
      "model_forward_time": 0.11458873748779297,
      "step": 738
    },
    {
      "epoch": 4.50439453125e-06,
      "step": 738,
      "training_step_time": 0.4686777591705322
    },
    {
      "epoch": 4.510498046875e-06,
      "model_forward_time": 0.11451888084411621,
      "step": 739
    },
    {
      "epoch": 4.510498046875e-06,
      "step": 739,
      "training_step_time": 0.38858461380004883
    },
    {
      "epoch": 4.5166015625e-06,
      "grad_norm": 1.14071524143219,
      "learning_rate": 2.466666666666667e-05,
      "loss": 0.1823,
      "step": 740
    },
    {
      "epoch": 4.5166015625e-06,
      "model_forward_time": 0.11510992050170898,
      "step": 740
    },
    {
      "epoch": 4.5166015625e-06,
      "step": 740,
      "training_step_time": 0.3916921615600586
    },
    {
      "epoch": 4.522705078125e-06,
      "model_forward_time": 0.11547541618347168,
      "step": 741
    },
    {
      "epoch": 4.522705078125e-06,
      "step": 741,
      "training_step_time": 0.4101419448852539
    },
    {
      "epoch": 4.52880859375e-06,
      "model_forward_time": 0.11446809768676758,
      "step": 742
    },
    {
      "epoch": 4.52880859375e-06,
      "step": 742,
      "training_step_time": 0.39273834228515625
    },
    {
      "epoch": 4.534912109375e-06,
      "model_forward_time": 0.11576223373413086,
      "step": 743
    },
    {
      "epoch": 4.534912109375e-06,
      "step": 743,
      "training_step_time": 0.40291404724121094
    },
    {
      "epoch": 4.541015625e-06,
      "model_forward_time": 0.11540412902832031,
      "step": 744
    },
    {
      "epoch": 4.541015625e-06,
      "step": 744,
      "training_step_time": 1.0789213180541992
    },
    {
      "epoch": 4.547119140625e-06,
      "model_forward_time": 0.11392641067504883,
      "step": 745
    },
    {
      "epoch": 4.547119140625e-06,
      "step": 745,
      "training_step_time": 0.4692103862762451
    },
    {
      "epoch": 4.55322265625e-06,
      "model_forward_time": 0.11476659774780273,
      "step": 746
    },
    {
      "epoch": 4.55322265625e-06,
      "step": 746,
      "training_step_time": 0.436542272567749
    },
    {
      "epoch": 4.559326171875e-06,
      "model_forward_time": 0.1140294075012207,
      "step": 747
    },
    {
      "epoch": 4.559326171875e-06,
      "step": 747,
      "training_step_time": 0.47097039222717285
    },
    {
      "epoch": 4.5654296875e-06,
      "model_forward_time": 0.11348581314086914,
      "step": 748
    },
    {
      "epoch": 4.5654296875e-06,
      "step": 748,
      "training_step_time": 0.4494435787200928
    },
    {
      "epoch": 4.571533203125e-06,
      "model_forward_time": 0.11387515068054199,
      "step": 749
    },
    {
      "epoch": 4.571533203125e-06,
      "step": 749,
      "training_step_time": 0.3917350769042969
    },
    {
      "epoch": 4.57763671875e-06,
      "grad_norm": 1.1281847953796387,
      "learning_rate": 2.5e-05,
      "loss": 0.2118,
      "step": 750
    },
    {
      "epoch": 4.57763671875e-06,
      "model_forward_time": 0.11513996124267578,
      "step": 750
    },
    {
      "epoch": 4.57763671875e-06,
      "step": 750,
      "training_step_time": 0.557905912399292
    },
    {
      "epoch": 4.583740234375e-06,
      "model_forward_time": 0.11420559883117676,
      "step": 751
    },
    {
      "epoch": 4.583740234375e-06,
      "step": 751,
      "training_step_time": 0.38519835472106934
    },
    {
      "epoch": 4.58984375e-06,
      "model_forward_time": 0.11474752426147461,
      "step": 752
    },
    {
      "epoch": 4.58984375e-06,
      "step": 752,
      "training_step_time": 0.3843095302581787
    },
    {
      "epoch": 4.595947265625e-06,
      "model_forward_time": 0.11447930335998535,
      "step": 753
    },
    {
      "epoch": 4.595947265625e-06,
      "step": 753,
      "training_step_time": 0.38863086700439453
    },
    {
      "epoch": 4.60205078125e-06,
      "model_forward_time": 0.11496376991271973,
      "step": 754
    },
    {
      "epoch": 4.60205078125e-06,
      "step": 754,
      "training_step_time": 0.41975831985473633
    },
    {
      "epoch": 4.608154296875e-06,
      "model_forward_time": 0.11579108238220215,
      "step": 755
    },
    {
      "epoch": 4.608154296875e-06,
      "step": 755,
      "training_step_time": 0.4317324161529541
    },
    {
      "epoch": 4.6142578125e-06,
      "model_forward_time": 0.11571049690246582,
      "step": 756
    },
    {
      "epoch": 4.6142578125e-06,
      "step": 756,
      "training_step_time": 0.8276469707489014
    },
    {
      "epoch": 4.620361328125e-06,
      "model_forward_time": 0.11437726020812988,
      "step": 757
    },
    {
      "epoch": 4.620361328125e-06,
      "step": 757,
      "training_step_time": 0.39843058586120605
    },
    {
      "epoch": 4.62646484375e-06,
      "model_forward_time": 0.11427640914916992,
      "step": 758
    },
    {
      "epoch": 4.62646484375e-06,
      "step": 758,
      "training_step_time": 0.4338092803955078
    },
    {
      "epoch": 4.632568359375e-06,
      "model_forward_time": 0.11449265480041504,
      "step": 759
    },
    {
      "epoch": 4.632568359375e-06,
      "step": 759,
      "training_step_time": 0.46454310417175293
    },
    {
      "epoch": 4.638671875e-06,
      "grad_norm": 1.200852870941162,
      "learning_rate": 2.5333333333333337e-05,
      "loss": 0.1907,
      "step": 760
    },
    {
      "epoch": 4.638671875e-06,
      "model_forward_time": 0.11425542831420898,
      "step": 760
    },
    {
      "epoch": 4.638671875e-06,
      "step": 760,
      "training_step_time": 0.46249985694885254
    },
    {
      "epoch": 4.644775390625e-06,
      "model_forward_time": 0.11553335189819336,
      "step": 761
    },
    {
      "epoch": 4.644775390625e-06,
      "step": 761,
      "training_step_time": 0.4080183506011963
    },
    {
      "epoch": 4.65087890625e-06,
      "model_forward_time": 0.1153712272644043,
      "step": 762
    },
    {
      "epoch": 4.65087890625e-06,
      "step": 762,
      "training_step_time": 0.7668821811676025
    },
    {
      "epoch": 4.656982421875e-06,
      "model_forward_time": 0.11428046226501465,
      "step": 763
    },
    {
      "epoch": 4.656982421875e-06,
      "step": 763,
      "training_step_time": 0.3866140842437744
    },
    {
      "epoch": 4.6630859375e-06,
      "model_forward_time": 0.11416912078857422,
      "step": 764
    },
    {
      "epoch": 4.6630859375e-06,
      "step": 764,
      "training_step_time": 0.3914520740509033
    },
    {
      "epoch": 4.669189453125e-06,
      "model_forward_time": 0.11435413360595703,
      "step": 765
    },
    {
      "epoch": 4.669189453125e-06,
      "step": 765,
      "training_step_time": 0.38828086853027344
    },
    {
      "epoch": 4.67529296875e-06,
      "model_forward_time": 0.11478233337402344,
      "step": 766
    },
    {
      "epoch": 4.67529296875e-06,
      "step": 766,
      "training_step_time": 0.404193639755249
    },
    {
      "epoch": 4.681396484375e-06,
      "model_forward_time": 0.11525440216064453,
      "step": 767
    },
    {
      "epoch": 4.681396484375e-06,
      "step": 767,
      "training_step_time": 0.4171023368835449
    },
    {
      "epoch": 4.6875e-06,
      "model_forward_time": 0.1153264045715332,
      "step": 768
    },
    {
      "epoch": 4.6875e-06,
      "step": 768,
      "training_step_time": 0.8709359169006348
    },
    {
      "epoch": 4.693603515625e-06,
      "model_forward_time": 0.11492609977722168,
      "step": 769
    },
    {
      "epoch": 4.693603515625e-06,
      "step": 769,
      "training_step_time": 0.3969275951385498
    },
    {
      "epoch": 4.69970703125e-06,
      "grad_norm": 1.1810721158981323,
      "learning_rate": 2.5666666666666666e-05,
      "loss": 0.2002,
      "step": 770
    },
    {
      "epoch": 4.69970703125e-06,
      "model_forward_time": 0.11464214324951172,
      "step": 770
    },
    {
      "epoch": 4.69970703125e-06,
      "step": 770,
      "training_step_time": 0.39827561378479004
    },
    {
      "epoch": 4.705810546875e-06,
      "model_forward_time": 0.11446213722229004,
      "step": 771
    },
    {
      "epoch": 4.705810546875e-06,
      "step": 771,
      "training_step_time": 0.4102485179901123
    },
    {
      "epoch": 4.7119140625e-06,
      "model_forward_time": 0.11467790603637695,
      "step": 772
    },
    {
      "epoch": 4.7119140625e-06,
      "step": 772,
      "training_step_time": 0.4799201488494873
    },
    {
      "epoch": 4.718017578125e-06,
      "model_forward_time": 0.11503124237060547,
      "step": 773
    },
    {
      "epoch": 4.718017578125e-06,
      "step": 773,
      "training_step_time": 0.4643130302429199
    },
    {
      "epoch": 4.72412109375e-06,
      "model_forward_time": 0.11492681503295898,
      "step": 774
    },
    {
      "epoch": 4.72412109375e-06,
      "step": 774,
      "training_step_time": 0.7083098888397217
    },
    {
      "epoch": 4.730224609375e-06,
      "model_forward_time": 0.11395478248596191,
      "step": 775
    },
    {
      "epoch": 4.730224609375e-06,
      "step": 775,
      "training_step_time": 0.3874657154083252
    },
    {
      "epoch": 4.736328125e-06,
      "model_forward_time": 0.11447858810424805,
      "step": 776
    },
    {
      "epoch": 4.736328125e-06,
      "step": 776,
      "training_step_time": 0.3906562328338623
    },
    {
      "epoch": 4.742431640625e-06,
      "model_forward_time": 0.11425042152404785,
      "step": 777
    },
    {
      "epoch": 4.742431640625e-06,
      "step": 777,
      "training_step_time": 0.38308286666870117
    },
    {
      "epoch": 4.74853515625e-06,
      "model_forward_time": 0.11501693725585938,
      "step": 778
    },
    {
      "epoch": 4.74853515625e-06,
      "step": 778,
      "training_step_time": 0.3897566795349121
    },
    {
      "epoch": 4.754638671875e-06,
      "model_forward_time": 0.11501502990722656,
      "step": 779
    },
    {
      "epoch": 4.754638671875e-06,
      "step": 779,
      "training_step_time": 0.4418761730194092
    },
    {
      "epoch": 4.7607421875e-06,
      "grad_norm": 1.0061239004135132,
      "learning_rate": 2.6000000000000002e-05,
      "loss": 0.1955,
      "step": 780
    },
    {
      "epoch": 4.7607421875e-06,
      "model_forward_time": 0.11557483673095703,
      "step": 780
    },
    {
      "epoch": 4.7607421875e-06,
      "step": 780,
      "training_step_time": 0.7428178787231445
    },
    {
      "epoch": 4.766845703125e-06,
      "model_forward_time": 0.1145772933959961,
      "step": 781
    },
    {
      "epoch": 4.766845703125e-06,
      "step": 781,
      "training_step_time": 0.3926115036010742
    },
    {
      "epoch": 4.77294921875e-06,
      "model_forward_time": 0.11444711685180664,
      "step": 782
    },
    {
      "epoch": 4.77294921875e-06,
      "step": 782,
      "training_step_time": 0.47801756858825684
    },
    {
      "epoch": 4.779052734375e-06,
      "model_forward_time": 0.1150197982788086,
      "step": 783
    },
    {
      "epoch": 4.779052734375e-06,
      "step": 783,
      "training_step_time": 0.4757692813873291
    },
    {
      "epoch": 4.78515625e-06,
      "model_forward_time": 0.11415505409240723,
      "step": 784
    },
    {
      "epoch": 4.78515625e-06,
      "step": 784,
      "training_step_time": 0.4339113235473633
    },
    {
      "epoch": 4.791259765625e-06,
      "model_forward_time": 0.11578011512756348,
      "step": 785
    },
    {
      "epoch": 4.791259765625e-06,
      "step": 785,
      "training_step_time": 0.442230224609375
    },
    {
      "epoch": 4.79736328125e-06,
      "model_forward_time": 0.11577081680297852,
      "step": 786
    },
    {
      "epoch": 4.79736328125e-06,
      "step": 786,
      "training_step_time": 0.7941992282867432
    },
    {
      "epoch": 4.803466796875e-06,
      "model_forward_time": 0.11453771591186523,
      "step": 787
    },
    {
      "epoch": 4.803466796875e-06,
      "step": 787,
      "training_step_time": 0.4374866485595703
    },
    {
      "epoch": 4.8095703125e-06,
      "model_forward_time": 0.11404800415039062,
      "step": 788
    },
    {
      "epoch": 4.8095703125e-06,
      "step": 788,
      "training_step_time": 0.3989837169647217
    },
    {
      "epoch": 4.815673828125e-06,
      "model_forward_time": 0.11457443237304688,
      "step": 789
    },
    {
      "epoch": 4.815673828125e-06,
      "step": 789,
      "training_step_time": 0.38931870460510254
    },
    {
      "epoch": 4.82177734375e-06,
      "grad_norm": 0.9708206653594971,
      "learning_rate": 2.633333333333333e-05,
      "loss": 0.189,
      "step": 790
    },
    {
      "epoch": 4.82177734375e-06,
      "model_forward_time": 0.11448907852172852,
      "step": 790
    },
    {
      "epoch": 4.82177734375e-06,
      "step": 790,
      "training_step_time": 0.3892989158630371
    },
    {
      "epoch": 4.827880859375e-06,
      "model_forward_time": 0.11504769325256348,
      "step": 791
    },
    {
      "epoch": 4.827880859375e-06,
      "step": 791,
      "training_step_time": 0.4421093463897705
    },
    {
      "epoch": 4.833984375e-06,
      "model_forward_time": 0.11487317085266113,
      "step": 792
    },
    {
      "epoch": 4.833984375e-06,
      "step": 792,
      "training_step_time": 0.9034712314605713
    },
    {
      "epoch": 4.840087890625e-06,
      "model_forward_time": 0.11379289627075195,
      "step": 793
    },
    {
      "epoch": 4.840087890625e-06,
      "step": 793,
      "training_step_time": 0.38620877265930176
    },
    {
      "epoch": 4.84619140625e-06,
      "model_forward_time": 0.11493420600891113,
      "step": 794
    },
    {
      "epoch": 4.84619140625e-06,
      "step": 794,
      "training_step_time": 0.38274073600769043
    },
    {
      "epoch": 4.852294921875e-06,
      "model_forward_time": 0.11398935317993164,
      "step": 795
    },
    {
      "epoch": 4.852294921875e-06,
      "step": 795,
      "training_step_time": 0.43085169792175293
    },
    {
      "epoch": 4.8583984375e-06,
      "model_forward_time": 0.11439037322998047,
      "step": 796
    },
    {
      "epoch": 4.8583984375e-06,
      "step": 796,
      "training_step_time": 0.38135385513305664
    },
    {
      "epoch": 4.864501953125e-06,
      "model_forward_time": 0.11393857002258301,
      "step": 797
    },
    {
      "epoch": 4.864501953125e-06,
      "step": 797,
      "training_step_time": 0.38854074478149414
    },
    {
      "epoch": 4.87060546875e-06,
      "model_forward_time": 0.11564111709594727,
      "step": 798
    },
    {
      "epoch": 4.87060546875e-06,
      "step": 798,
      "training_step_time": 0.5898542404174805
    },
    {
      "epoch": 4.876708984375e-06,
      "model_forward_time": 0.1152811050415039,
      "step": 799
    },
    {
      "epoch": 4.876708984375e-06,
      "step": 799,
      "training_step_time": 0.4154932498931885
    },
    {
      "epoch": 4.8828125e-06,
      "grad_norm": 1.3665679693222046,
      "learning_rate": 2.6666666666666667e-05,
      "loss": 0.1975,
      "step": 800
    },
    {
      "epoch": 4.8828125e-06,
      "model_forward_time": 0.11565589904785156,
      "step": 800
    },
    {
      "epoch": 4.8828125e-06,
      "step": 800,
      "training_step_time": 0.4743387699127197
    },
    {
      "epoch": 4.888916015625e-06,
      "model_forward_time": 0.11549830436706543,
      "step": 801
    },
    {
      "epoch": 4.888916015625e-06,
      "step": 801,
      "training_step_time": 0.48876237869262695
    },
    {
      "epoch": 4.89501953125e-06,
      "model_forward_time": 0.11457276344299316,
      "step": 802
    },
    {
      "epoch": 4.89501953125e-06,
      "step": 802,
      "training_step_time": 0.3849601745605469
    },
    {
      "epoch": 4.901123046875e-06,
      "model_forward_time": 0.1147761344909668,
      "step": 803
    },
    {
      "epoch": 4.901123046875e-06,
      "step": 803,
      "training_step_time": 0.4215848445892334
    },
    {
      "epoch": 4.9072265625e-06,
      "model_forward_time": 0.11492776870727539,
      "step": 804
    },
    {
      "epoch": 4.9072265625e-06,
      "step": 804,
      "training_step_time": 0.7033753395080566
    },
    {
      "epoch": 4.913330078125e-06,
      "model_forward_time": 0.11414575576782227,
      "step": 805
    },
    {
      "epoch": 4.913330078125e-06,
      "step": 805,
      "training_step_time": 0.39503955841064453
    },
    {
      "epoch": 4.91943359375e-06,
      "model_forward_time": 0.11479449272155762,
      "step": 806
    },
    {
      "epoch": 4.91943359375e-06,
      "step": 806,
      "training_step_time": 0.3792712688446045
    },
    {
      "epoch": 4.925537109375e-06,
      "model_forward_time": 0.11445212364196777,
      "step": 807
    },
    {
      "epoch": 4.925537109375e-06,
      "step": 807,
      "training_step_time": 0.3871276378631592
    },
    {
      "epoch": 4.931640625e-06,
      "model_forward_time": 0.11452388763427734,
      "step": 808
    },
    {
      "epoch": 4.931640625e-06,
      "step": 808,
      "training_step_time": 0.38632774353027344
    },
    {
      "epoch": 4.937744140625e-06,
      "model_forward_time": 0.11467528343200684,
      "step": 809
    },
    {
      "epoch": 4.937744140625e-06,
      "step": 809,
      "training_step_time": 0.38372159004211426
    },
    {
      "epoch": 4.94384765625e-06,
      "grad_norm": 1.370382308959961,
      "learning_rate": 2.7000000000000002e-05,
      "loss": 0.2104,
      "step": 810
    },
    {
      "epoch": 4.94384765625e-06,
      "model_forward_time": 0.1146998405456543,
      "step": 810
    },
    {
      "epoch": 4.94384765625e-06,
      "step": 810,
      "training_step_time": 0.6059484481811523
    },
    {
      "epoch": 4.949951171875e-06,
      "model_forward_time": 0.11470341682434082,
      "step": 811
    },
    {
      "epoch": 4.949951171875e-06,
      "step": 811,
      "training_step_time": 0.39766860008239746
    },
    {
      "epoch": 4.9560546875e-06,
      "model_forward_time": 0.11531186103820801,
      "step": 812
    },
    {
      "epoch": 4.9560546875e-06,
      "step": 812,
      "training_step_time": 0.45047521591186523
    },
    {
      "epoch": 4.962158203125e-06,
      "model_forward_time": 0.11546492576599121,
      "step": 813
    },
    {
      "epoch": 4.962158203125e-06,
      "step": 813,
      "training_step_time": 0.4485912322998047
    },
    {
      "epoch": 4.96826171875e-06,
      "model_forward_time": 0.11486530303955078,
      "step": 814
    },
    {
      "epoch": 4.96826171875e-06,
      "step": 814,
      "training_step_time": 0.47475481033325195
    },
    {
      "epoch": 4.974365234375e-06,
      "model_forward_time": 0.11455416679382324,
      "step": 815
    },
    {
      "epoch": 4.974365234375e-06,
      "step": 815,
      "training_step_time": 0.47914719581604004
    },
    {
      "epoch": 4.98046875e-06,
      "model_forward_time": 0.11516952514648438,
      "step": 816
    },
    {
      "epoch": 4.98046875e-06,
      "step": 816,
      "training_step_time": 0.8154816627502441
    },
    {
      "epoch": 4.986572265625e-06,
      "model_forward_time": 0.11442756652832031,
      "step": 817
    },
    {
      "epoch": 4.986572265625e-06,
      "step": 817,
      "training_step_time": 0.38348937034606934
    },
    {
      "epoch": 4.99267578125e-06,
      "model_forward_time": 0.11452269554138184,
      "step": 818
    },
    {
      "epoch": 4.99267578125e-06,
      "step": 818,
      "training_step_time": 0.3895587921142578
    },
    {
      "epoch": 4.998779296875e-06,
      "model_forward_time": 0.11414337158203125,
      "step": 819
    },
    {
      "epoch": 4.998779296875e-06,
      "step": 819,
      "training_step_time": 0.38072872161865234
    },
    {
      "epoch": 5.0048828125e-06,
      "grad_norm": 1.7305934429168701,
      "learning_rate": 2.733333333333333e-05,
      "loss": 0.1955,
      "step": 820
    },
    {
      "epoch": 5.0048828125e-06,
      "model_forward_time": 0.11434102058410645,
      "step": 820
    },
    {
      "epoch": 5.0048828125e-06,
      "step": 820,
      "training_step_time": 0.3829789161682129
    },
    {
      "epoch": 5.010986328125e-06,
      "model_forward_time": 0.11462950706481934,
      "step": 821
    },
    {
      "epoch": 5.010986328125e-06,
      "step": 821,
      "training_step_time": 0.3789865970611572
    },
    {
      "epoch": 5.01708984375e-06,
      "model_forward_time": 0.11468124389648438,
      "step": 822
    },
    {
      "epoch": 5.01708984375e-06,
      "step": 822,
      "training_step_time": 0.7821769714355469
    },
    {
      "epoch": 5.023193359375e-06,
      "model_forward_time": 0.11489105224609375,
      "step": 823
    },
    {
      "epoch": 5.023193359375e-06,
      "step": 823,
      "training_step_time": 0.4568188190460205
    },
    {
      "epoch": 5.029296875e-06,
      "model_forward_time": 0.11509585380554199,
      "step": 824
    },
    {
      "epoch": 5.029296875e-06,
      "step": 824,
      "training_step_time": 0.3935730457305908
    },
    {
      "epoch": 5.035400390625e-06,
      "model_forward_time": 0.11458683013916016,
      "step": 825
    },
    {
      "epoch": 5.035400390625e-06,
      "step": 825,
      "training_step_time": 0.3959836959838867
    },
    {
      "epoch": 5.04150390625e-06,
      "model_forward_time": 0.11542487144470215,
      "step": 826
    },
    {
      "epoch": 5.04150390625e-06,
      "step": 826,
      "training_step_time": 0.44175052642822266
    },
    {
      "epoch": 5.047607421875e-06,
      "model_forward_time": 0.11466670036315918,
      "step": 827
    },
    {
      "epoch": 5.047607421875e-06,
      "step": 827,
      "training_step_time": 0.49220895767211914
    },
    {
      "epoch": 5.0537109375e-06,
      "model_forward_time": 0.11552143096923828,
      "step": 828
    },
    {
      "epoch": 5.0537109375e-06,
      "step": 828,
      "training_step_time": 0.7473299503326416
    },
    {
      "epoch": 5.059814453125e-06,
      "model_forward_time": 0.114990234375,
      "step": 829
    },
    {
      "epoch": 5.059814453125e-06,
      "step": 829,
      "training_step_time": 0.39185214042663574
    },
    {
      "epoch": 5.06591796875e-06,
      "grad_norm": 1.3036689758300781,
      "learning_rate": 2.7666666666666667e-05,
      "loss": 0.2036,
      "step": 830
    },
    {
      "epoch": 5.06591796875e-06,
      "model_forward_time": 0.11466598510742188,
      "step": 830
    },
    {
      "epoch": 5.06591796875e-06,
      "step": 830,
      "training_step_time": 0.38861870765686035
    },
    {
      "epoch": 5.072021484375e-06,
      "model_forward_time": 0.11423587799072266,
      "step": 831
    },
    {
      "epoch": 5.072021484375e-06,
      "step": 831,
      "training_step_time": 0.3868858814239502
    },
    {
      "epoch": 5.078125e-06,
      "model_forward_time": 0.11506962776184082,
      "step": 832
    },
    {
      "epoch": 5.078125e-06,
      "step": 832,
      "training_step_time": 0.38451123237609863
    },
    {
      "epoch": 5.084228515625e-06,
      "model_forward_time": 0.11401653289794922,
      "step": 833
    },
    {
      "epoch": 5.084228515625e-06,
      "step": 833,
      "training_step_time": 0.382490873336792
    },
    {
      "epoch": 5.09033203125e-06,
      "model_forward_time": 0.11545228958129883,
      "step": 834
    },
    {
      "epoch": 5.09033203125e-06,
      "step": 834,
      "training_step_time": 0.9015848636627197
    },
    {
      "epoch": 5.096435546875e-06,
      "model_forward_time": 0.11435174942016602,
      "step": 835
    },
    {
      "epoch": 5.096435546875e-06,
      "step": 835,
      "training_step_time": 0.488523006439209
    },
    {
      "epoch": 5.1025390625e-06,
      "model_forward_time": 0.1147923469543457,
      "step": 836
    },
    {
      "epoch": 5.1025390625e-06,
      "step": 836,
      "training_step_time": 0.43741345405578613
    },
    {
      "epoch": 5.108642578125e-06,
      "model_forward_time": 0.11477994918823242,
      "step": 837
    },
    {
      "epoch": 5.108642578125e-06,
      "step": 837,
      "training_step_time": 0.3849339485168457
    },
    {
      "epoch": 5.11474609375e-06,
      "model_forward_time": 0.11453533172607422,
      "step": 838
    },
    {
      "epoch": 5.11474609375e-06,
      "step": 838,
      "training_step_time": 0.4994654655456543
    },
    {
      "epoch": 5.120849609375e-06,
      "model_forward_time": 0.11456012725830078,
      "step": 839
    },
    {
      "epoch": 5.120849609375e-06,
      "step": 839,
      "training_step_time": 0.4512672424316406
    },
    {
      "epoch": 5.126953125e-06,
      "grad_norm": 0.7597894668579102,
      "learning_rate": 2.8000000000000003e-05,
      "loss": 0.1886,
      "step": 840
    },
    {
      "epoch": 5.126953125e-06,
      "model_forward_time": 0.11572957038879395,
      "step": 840
    },
    {
      "epoch": 5.126953125e-06,
      "step": 840,
      "training_step_time": 0.4781656265258789
    },
    {
      "epoch": 5.133056640625e-06,
      "model_forward_time": 0.11525750160217285,
      "step": 841
    },
    {
      "epoch": 5.133056640625e-06,
      "step": 841,
      "training_step_time": 0.39075660705566406
    },
    {
      "epoch": 5.13916015625e-06,
      "model_forward_time": 0.11507511138916016,
      "step": 842
    },
    {
      "epoch": 5.13916015625e-06,
      "step": 842,
      "training_step_time": 0.3878328800201416
    },
    {
      "epoch": 5.145263671875e-06,
      "model_forward_time": 0.11440730094909668,
      "step": 843
    },
    {
      "epoch": 5.145263671875e-06,
      "step": 843,
      "training_step_time": 0.40123915672302246
    },
    {
      "epoch": 5.1513671875e-06,
      "model_forward_time": 0.11471295356750488,
      "step": 844
    },
    {
      "epoch": 5.1513671875e-06,
      "step": 844,
      "training_step_time": 0.3948545455932617
    },
    {
      "epoch": 5.157470703125e-06,
      "model_forward_time": 0.11529135704040527,
      "step": 845
    },
    {
      "epoch": 5.157470703125e-06,
      "step": 845,
      "training_step_time": 0.40227794647216797
    },
    {
      "epoch": 5.16357421875e-06,
      "model_forward_time": 0.11491107940673828,
      "step": 846
    },
    {
      "epoch": 5.16357421875e-06,
      "step": 846,
      "training_step_time": 0.9151999950408936
    },
    {
      "epoch": 5.169677734375e-06,
      "model_forward_time": 0.11477470397949219,
      "step": 847
    },
    {
      "epoch": 5.169677734375e-06,
      "step": 847,
      "training_step_time": 0.38953137397766113
    },
    {
      "epoch": 5.17578125e-06,
      "model_forward_time": 0.11409521102905273,
      "step": 848
    },
    {
      "epoch": 5.17578125e-06,
      "step": 848,
      "training_step_time": 0.39414548873901367
    },
    {
      "epoch": 5.181884765625e-06,
      "model_forward_time": 0.11422586441040039,
      "step": 849
    },
    {
      "epoch": 5.181884765625e-06,
      "step": 849,
      "training_step_time": 0.4469892978668213
    },
    {
      "epoch": 5.18798828125e-06,
      "grad_norm": 1.0537887811660767,
      "learning_rate": 2.8333333333333335e-05,
      "loss": 0.2134,
      "step": 850
    },
    {
      "epoch": 5.18798828125e-06,
      "model_forward_time": 0.11395049095153809,
      "step": 850
    },
    {
      "epoch": 5.18798828125e-06,
      "step": 850,
      "training_step_time": 0.4194159507751465
    },
    {
      "epoch": 5.194091796875e-06,
      "model_forward_time": 0.11429953575134277,
      "step": 851
    },
    {
      "epoch": 5.194091796875e-06,
      "step": 851,
      "training_step_time": 0.3988308906555176
    },
    {
      "epoch": 5.2001953125e-06,
      "model_forward_time": 0.11521100997924805,
      "step": 852
    },
    {
      "epoch": 5.2001953125e-06,
      "step": 852,
      "training_step_time": 0.5345206260681152
    },
    {
      "epoch": 5.206298828125e-06,
      "model_forward_time": 0.11415672302246094,
      "step": 853
    },
    {
      "epoch": 5.206298828125e-06,
      "step": 853,
      "training_step_time": 0.4048902988433838
    },
    {
      "epoch": 5.21240234375e-06,
      "model_forward_time": 0.11466455459594727,
      "step": 854
    },
    {
      "epoch": 5.21240234375e-06,
      "step": 854,
      "training_step_time": 0.46231698989868164
    },
    {
      "epoch": 5.218505859375e-06,
      "model_forward_time": 0.11453914642333984,
      "step": 855
    },
    {
      "epoch": 5.218505859375e-06,
      "step": 855,
      "training_step_time": 0.38641810417175293
    },
    {
      "epoch": 5.224609375e-06,
      "model_forward_time": 0.11478853225708008,
      "step": 856
    },
    {
      "epoch": 5.224609375e-06,
      "step": 856,
      "training_step_time": 0.39278650283813477
    },
    {
      "epoch": 5.230712890625e-06,
      "model_forward_time": 0.11417865753173828,
      "step": 857
    },
    {
      "epoch": 5.230712890625e-06,
      "step": 857,
      "training_step_time": 0.38535547256469727
    },
    {
      "epoch": 5.23681640625e-06,
      "model_forward_time": 0.11596894264221191,
      "step": 858
    },
    {
      "epoch": 5.23681640625e-06,
      "step": 858,
      "training_step_time": 0.9643454551696777
    },
    {
      "epoch": 5.242919921875e-06,
      "model_forward_time": 0.11454272270202637,
      "step": 859
    },
    {
      "epoch": 5.242919921875e-06,
      "step": 859,
      "training_step_time": 0.38855791091918945
    },
    {
      "epoch": 5.2490234375e-06,
      "grad_norm": 0.9492697715759277,
      "learning_rate": 2.8666666666666668e-05,
      "loss": 0.1938,
      "step": 860
    },
    {
      "epoch": 5.2490234375e-06,
      "model_forward_time": 0.11416935920715332,
      "step": 860
    },
    {
      "epoch": 5.2490234375e-06,
      "step": 860,
      "training_step_time": 0.4047386646270752
    },
    {
      "epoch": 5.255126953125e-06,
      "model_forward_time": 0.11386895179748535,
      "step": 861
    },
    {
      "epoch": 5.255126953125e-06,
      "step": 861,
      "training_step_time": 0.43465447425842285
    },
    {
      "epoch": 5.26123046875e-06,
      "model_forward_time": 0.11425399780273438,
      "step": 862
    },
    {
      "epoch": 5.26123046875e-06,
      "step": 862,
      "training_step_time": 0.46036386489868164
    },
    {
      "epoch": 5.267333984375e-06,
      "model_forward_time": 0.11457252502441406,
      "step": 863
    },
    {
      "epoch": 5.267333984375e-06,
      "step": 863,
      "training_step_time": 0.4200270175933838
    },
    {
      "epoch": 5.2734375e-06,
      "model_forward_time": 0.11517906188964844,
      "step": 864
    },
    {
      "epoch": 5.2734375e-06,
      "step": 864,
      "training_step_time": 0.41338324546813965
    },
    {
      "epoch": 5.279541015625e-06,
      "model_forward_time": 0.11500263214111328,
      "step": 865
    },
    {
      "epoch": 5.279541015625e-06,
      "step": 865,
      "training_step_time": 0.4258298873901367
    },
    {
      "epoch": 5.28564453125e-06,
      "model_forward_time": 0.11466455459594727,
      "step": 866
    },
    {
      "epoch": 5.28564453125e-06,
      "step": 866,
      "training_step_time": 0.47531747817993164
    },
    {
      "epoch": 5.291748046875e-06,
      "model_forward_time": 0.11557602882385254,
      "step": 867
    },
    {
      "epoch": 5.291748046875e-06,
      "step": 867,
      "training_step_time": 0.48044371604919434
    },
    {
      "epoch": 5.2978515625e-06,
      "model_forward_time": 0.11573171615600586,
      "step": 868
    },
    {
      "epoch": 5.2978515625e-06,
      "step": 868,
      "training_step_time": 0.48190736770629883
    },
    {
      "epoch": 5.303955078125e-06,
      "model_forward_time": 0.11837625503540039,
      "step": 869
    },
    {
      "epoch": 5.303955078125e-06,
      "step": 869,
      "training_step_time": 0.4000992774963379
    },
    {
      "epoch": 5.31005859375e-06,
      "grad_norm": 1.2442091703414917,
      "learning_rate": 2.9e-05,
      "loss": 0.1937,
      "step": 870
    },
    {
      "epoch": 5.31005859375e-06,
      "model_forward_time": 0.11516308784484863,
      "step": 870
    },
    {
      "epoch": 5.31005859375e-06,
      "step": 870,
      "training_step_time": 0.6378076076507568
    },
    {
      "epoch": 5.316162109375e-06,
      "model_forward_time": 0.11417579650878906,
      "step": 871
    },
    {
      "epoch": 5.316162109375e-06,
      "step": 871,
      "training_step_time": 0.38768815994262695
    },
    {
      "epoch": 5.322265625e-06,
      "model_forward_time": 0.11507940292358398,
      "step": 872
    },
    {
      "epoch": 5.322265625e-06,
      "step": 872,
      "training_step_time": 0.38657641410827637
    },
    {
      "epoch": 5.328369140625e-06,
      "model_forward_time": 0.11464929580688477,
      "step": 873
    },
    {
      "epoch": 5.328369140625e-06,
      "step": 873,
      "training_step_time": 0.3841593265533447
    },
    {
      "epoch": 5.33447265625e-06,
      "model_forward_time": 0.11534476280212402,
      "step": 874
    },
    {
      "epoch": 5.33447265625e-06,
      "step": 874,
      "training_step_time": 0.3945729732513428
    },
    {
      "epoch": 5.340576171875e-06,
      "model_forward_time": 0.11488795280456543,
      "step": 875
    },
    {
      "epoch": 5.340576171875e-06,
      "step": 875,
      "training_step_time": 0.4256577491760254
    },
    {
      "epoch": 5.3466796875e-06,
      "model_forward_time": 0.11511683464050293,
      "step": 876
    },
    {
      "epoch": 5.3466796875e-06,
      "step": 876,
      "training_step_time": 1.0902419090270996
    },
    {
      "epoch": 5.352783203125e-06,
      "model_forward_time": 0.1147611141204834,
      "step": 877
    },
    {
      "epoch": 5.352783203125e-06,
      "step": 877,
      "training_step_time": 0.39672112464904785
    },
    {
      "epoch": 5.35888671875e-06,
      "model_forward_time": 0.11446619033813477,
      "step": 878
    },
    {
      "epoch": 5.35888671875e-06,
      "step": 878,
      "training_step_time": 0.4099297523498535
    },
    {
      "epoch": 5.364990234375e-06,
      "model_forward_time": 0.11379504203796387,
      "step": 879
    },
    {
      "epoch": 5.364990234375e-06,
      "step": 879,
      "training_step_time": 0.4339272975921631
    },
    {
      "epoch": 5.37109375e-06,
      "grad_norm": 1.1346290111541748,
      "learning_rate": 2.9333333333333336e-05,
      "loss": 0.1821,
      "step": 880
    },
    {
      "epoch": 5.37109375e-06,
      "model_forward_time": 0.11380195617675781,
      "step": 880
    },
    {
      "epoch": 5.37109375e-06,
      "step": 880,
      "training_step_time": 0.4340369701385498
    },
    {
      "epoch": 5.377197265625e-06,
      "model_forward_time": 0.1150052547454834,
      "step": 881
    },
    {
      "epoch": 5.377197265625e-06,
      "step": 881,
      "training_step_time": 0.38088011741638184
    },
    {
      "epoch": 5.38330078125e-06,
      "model_forward_time": 0.11539530754089355,
      "step": 882
    },
    {
      "epoch": 5.38330078125e-06,
      "step": 882,
      "training_step_time": 0.5232093334197998
    },
    {
      "epoch": 5.389404296875e-06,
      "model_forward_time": 0.1146841049194336,
      "step": 883
    },
    {
      "epoch": 5.389404296875e-06,
      "step": 883,
      "training_step_time": 0.3846278190612793
    },
    {
      "epoch": 5.3955078125e-06,
      "model_forward_time": 0.11605119705200195,
      "step": 884
    },
    {
      "epoch": 5.3955078125e-06,
      "step": 884,
      "training_step_time": 0.3934760093688965
    },
    {
      "epoch": 5.401611328125e-06,
      "model_forward_time": 0.11437058448791504,
      "step": 885
    },
    {
      "epoch": 5.401611328125e-06,
      "step": 885,
      "training_step_time": 0.4018540382385254
    },
    {
      "epoch": 5.40771484375e-06,
      "model_forward_time": 0.11487793922424316,
      "step": 886
    },
    {
      "epoch": 5.40771484375e-06,
      "step": 886,
      "training_step_time": 0.39193153381347656
    },
    {
      "epoch": 5.413818359375e-06,
      "model_forward_time": 0.11504173278808594,
      "step": 887
    },
    {
      "epoch": 5.413818359375e-06,
      "step": 887,
      "training_step_time": 0.3954653739929199
    },
    {
      "epoch": 5.419921875e-06,
      "model_forward_time": 0.1149284839630127,
      "step": 888
    },
    {
      "epoch": 5.419921875e-06,
      "step": 888,
      "training_step_time": 1.041215181350708
    },
    {
      "epoch": 5.426025390625e-06,
      "model_forward_time": 0.1144258975982666,
      "step": 889
    },
    {
      "epoch": 5.426025390625e-06,
      "step": 889,
      "training_step_time": 0.4702632427215576
    },
    {
      "epoch": 5.43212890625e-06,
      "grad_norm": 0.9311594367027283,
      "learning_rate": 2.9666666666666672e-05,
      "loss": 0.1729,
      "step": 890
    },
    {
      "epoch": 5.43212890625e-06,
      "model_forward_time": 0.11401963233947754,
      "step": 890
    },
    {
      "epoch": 5.43212890625e-06,
      "step": 890,
      "training_step_time": 0.3893754482269287
    },
    {
      "epoch": 5.438232421875e-06,
      "model_forward_time": 0.11430549621582031,
      "step": 891
    },
    {
      "epoch": 5.438232421875e-06,
      "step": 891,
      "training_step_time": 0.42136359214782715
    },
    {
      "epoch": 5.4443359375e-06,
      "model_forward_time": 0.11406159400939941,
      "step": 892
    },
    {
      "epoch": 5.4443359375e-06,
      "step": 892,
      "training_step_time": 0.3622548580169678
    },
    {
      "epoch": 5.450439453125e-06,
      "model_forward_time": 0.11361479759216309,
      "step": 893
    },
    {
      "epoch": 5.450439453125e-06,
      "step": 893,
      "training_step_time": 0.4555354118347168
    },
    {
      "epoch": 5.45654296875e-06,
      "model_forward_time": 0.1146855354309082,
      "step": 894
    },
    {
      "epoch": 5.45654296875e-06,
      "step": 894,
      "training_step_time": 0.4749162197113037
    },
    {
      "epoch": 5.462646484375e-06,
      "model_forward_time": 0.11455869674682617,
      "step": 895
    },
    {
      "epoch": 5.462646484375e-06,
      "step": 895,
      "training_step_time": 0.40015339851379395
    },
    {
      "epoch": 5.46875e-06,
      "model_forward_time": 0.11472702026367188,
      "step": 896
    },
    {
      "epoch": 5.46875e-06,
      "step": 896,
      "training_step_time": 0.39221858978271484
    },
    {
      "epoch": 5.474853515625e-06,
      "model_forward_time": 0.11528801918029785,
      "step": 897
    },
    {
      "epoch": 5.474853515625e-06,
      "step": 897,
      "training_step_time": 0.39566683769226074
    },
    {
      "epoch": 5.48095703125e-06,
      "model_forward_time": 0.11553812026977539,
      "step": 898
    },
    {
      "epoch": 5.48095703125e-06,
      "step": 898,
      "training_step_time": 0.38982415199279785
    },
    {
      "epoch": 5.487060546875e-06,
      "model_forward_time": 0.11493945121765137,
      "step": 899
    },
    {
      "epoch": 5.487060546875e-06,
      "step": 899,
      "training_step_time": 0.39887499809265137
    },
    {
      "epoch": 5.4931640625e-06,
      "grad_norm": 1.1311533451080322,
      "learning_rate": 3e-05,
      "loss": 0.1997,
      "step": 900
    },
    {
      "epoch": 5.4931640625e-06,
      "model_forward_time": 0.11487984657287598,
      "step": 900
    },
    {
      "epoch": 5.4931640625e-06,
      "step": 900,
      "training_step_time": 0.8561239242553711
    },
    {
      "epoch": 5.499267578125e-06,
      "model_forward_time": 0.1149911880493164,
      "step": 901
    },
    {
      "epoch": 5.499267578125e-06,
      "step": 901,
      "training_step_time": 0.428555965423584
    },
    {
      "epoch": 5.50537109375e-06,
      "model_forward_time": 0.11474180221557617,
      "step": 902
    },
    {
      "epoch": 5.50537109375e-06,
      "step": 902,
      "training_step_time": 0.42240142822265625
    },
    {
      "epoch": 5.511474609375e-06,
      "model_forward_time": 0.11457204818725586,
      "step": 903
    },
    {
      "epoch": 5.511474609375e-06,
      "step": 903,
      "training_step_time": 0.393108606338501
    },
    {
      "epoch": 5.517578125e-06,
      "model_forward_time": 0.1146693229675293,
      "step": 904
    },
    {
      "epoch": 5.517578125e-06,
      "step": 904,
      "training_step_time": 0.3856184482574463
    },
    {
      "epoch": 5.523681640625e-06,
      "model_forward_time": 0.11481499671936035,
      "step": 905
    },
    {
      "epoch": 5.523681640625e-06,
      "step": 905,
      "training_step_time": 0.43091535568237305
    },
    {
      "epoch": 5.52978515625e-06,
      "model_forward_time": 0.11647391319274902,
      "step": 906
    },
    {
      "epoch": 5.52978515625e-06,
      "step": 906,
      "training_step_time": 0.7464518547058105
    },
    {
      "epoch": 5.535888671875e-06,
      "model_forward_time": 0.11488175392150879,
      "step": 907
    },
    {
      "epoch": 5.535888671875e-06,
      "step": 907,
      "training_step_time": 0.4492800235748291
    },
    {
      "epoch": 5.5419921875e-06,
      "model_forward_time": 0.11418390274047852,
      "step": 908
    },
    {
      "epoch": 5.5419921875e-06,
      "step": 908,
      "training_step_time": 0.39121294021606445
    },
    {
      "epoch": 5.548095703125e-06,
      "model_forward_time": 0.11547517776489258,
      "step": 909
    },
    {
      "epoch": 5.548095703125e-06,
      "step": 909,
      "training_step_time": 0.3848092555999756
    },
    {
      "epoch": 5.55419921875e-06,
      "grad_norm": 1.1262726783752441,
      "learning_rate": 3.0333333333333337e-05,
      "loss": 0.1764,
      "step": 910
    },
    {
      "epoch": 5.55419921875e-06,
      "model_forward_time": 0.11484861373901367,
      "step": 910
    },
    {
      "epoch": 5.55419921875e-06,
      "step": 910,
      "training_step_time": 0.3767552375793457
    },
    {
      "epoch": 5.560302734375e-06,
      "model_forward_time": 0.1143958568572998,
      "step": 911
    },
    {
      "epoch": 5.560302734375e-06,
      "step": 911,
      "training_step_time": 0.38242197036743164
    },
    {
      "epoch": 5.56640625e-06,
      "model_forward_time": 0.11536073684692383,
      "step": 912
    },
    {
      "epoch": 5.56640625e-06,
      "step": 912,
      "training_step_time": 0.9834694862365723
    },
    {
      "epoch": 5.572509765625e-06,
      "model_forward_time": 0.11498689651489258,
      "step": 913
    },
    {
      "epoch": 5.572509765625e-06,
      "step": 913,
      "training_step_time": 0.4878072738647461
    },
    {
      "epoch": 5.57861328125e-06,
      "model_forward_time": 0.11420655250549316,
      "step": 914
    },
    {
      "epoch": 5.57861328125e-06,
      "step": 914,
      "training_step_time": 0.4664890766143799
    },
    {
      "epoch": 5.584716796875e-06,
      "model_forward_time": 0.1145317554473877,
      "step": 915
    },
    {
      "epoch": 5.584716796875e-06,
      "step": 915,
      "training_step_time": 0.41030120849609375
    },
    {
      "epoch": 5.5908203125e-06,
      "model_forward_time": 0.11409592628479004,
      "step": 916
    },
    {
      "epoch": 5.5908203125e-06,
      "step": 916,
      "training_step_time": 0.3850674629211426
    },
    {
      "epoch": 5.596923828125e-06,
      "model_forward_time": 0.11410307884216309,
      "step": 917
    },
    {
      "epoch": 5.596923828125e-06,
      "step": 917,
      "training_step_time": 0.38909149169921875
    },
    {
      "epoch": 5.60302734375e-06,
      "model_forward_time": 0.11499667167663574,
      "step": 918
    },
    {
      "epoch": 5.60302734375e-06,
      "step": 918,
      "training_step_time": 0.6462681293487549
    },
    {
      "epoch": 5.609130859375e-06,
      "model_forward_time": 0.1138467788696289,
      "step": 919
    },
    {
      "epoch": 5.609130859375e-06,
      "step": 919,
      "training_step_time": 0.4889335632324219
    },
    {
      "epoch": 5.615234375e-06,
      "grad_norm": 1.1378653049468994,
      "learning_rate": 3.066666666666667e-05,
      "loss": 0.194,
      "step": 920
    },
    {
      "epoch": 5.615234375e-06,
      "model_forward_time": 0.11497902870178223,
      "step": 920
    },
    {
      "epoch": 5.615234375e-06,
      "step": 920,
      "training_step_time": 0.4454929828643799
    },
    {
      "epoch": 5.621337890625e-06,
      "model_forward_time": 0.11562657356262207,
      "step": 921
    },
    {
      "epoch": 5.621337890625e-06,
      "step": 921,
      "training_step_time": 0.395632266998291
    },
    {
      "epoch": 5.62744140625e-06,
      "model_forward_time": 0.11469316482543945,
      "step": 922
    },
    {
      "epoch": 5.62744140625e-06,
      "step": 922,
      "training_step_time": 0.3835489749908447
    },
    {
      "epoch": 5.633544921875e-06,
      "model_forward_time": 0.1142117977142334,
      "step": 923
    },
    {
      "epoch": 5.633544921875e-06,
      "step": 923,
      "training_step_time": 0.37861037254333496
    },
    {
      "epoch": 5.6396484375e-06,
      "model_forward_time": 0.11508750915527344,
      "step": 924
    },
    {
      "epoch": 5.6396484375e-06,
      "step": 924,
      "training_step_time": 0.8555681705474854
    },
    {
      "epoch": 5.645751953125e-06,
      "model_forward_time": 0.1164543628692627,
      "step": 925
    },
    {
      "epoch": 5.645751953125e-06,
      "step": 925,
      "training_step_time": 0.4171617031097412
    },
    {
      "epoch": 5.65185546875e-06,
      "model_forward_time": 0.11619257926940918,
      "step": 926
    },
    {
      "epoch": 5.65185546875e-06,
      "step": 926,
      "training_step_time": 0.49977922439575195
    },
    {
      "epoch": 5.657958984375e-06,
      "model_forward_time": 0.11584758758544922,
      "step": 927
    },
    {
      "epoch": 5.657958984375e-06,
      "step": 927,
      "training_step_time": 0.5240564346313477
    },
    {
      "epoch": 5.6640625e-06,
      "model_forward_time": 0.12263870239257812,
      "step": 928
    },
    {
      "epoch": 5.6640625e-06,
      "step": 928,
      "training_step_time": 0.6709740161895752
    },
    {
      "epoch": 5.670166015625e-06,
      "model_forward_time": 0.12528347969055176,
      "step": 929
    },
    {
      "epoch": 5.670166015625e-06,
      "step": 929,
      "training_step_time": 0.6654930114746094
    },
    {
      "epoch": 5.67626953125e-06,
      "grad_norm": 1.0557554960250854,
      "learning_rate": 3.1e-05,
      "loss": 0.1838,
      "step": 930
    },
    {
      "epoch": 5.67626953125e-06,
      "model_forward_time": 0.1192469596862793,
      "step": 930
    },
    {
      "epoch": 5.67626953125e-06,
      "step": 930,
      "training_step_time": 0.8219482898712158
    },
    {
      "epoch": 5.682373046875e-06,
      "model_forward_time": 0.11838865280151367,
      "step": 931
    },
    {
      "epoch": 5.682373046875e-06,
      "step": 931,
      "training_step_time": 0.6494407653808594
    },
    {
      "epoch": 5.6884765625e-06,
      "model_forward_time": 0.12479758262634277,
      "step": 932
    },
    {
      "epoch": 5.6884765625e-06,
      "step": 932,
      "training_step_time": 0.7652781009674072
    },
    {
      "epoch": 5.694580078125e-06,
      "model_forward_time": 0.11899137496948242,
      "step": 933
    },
    {
      "epoch": 5.694580078125e-06,
      "step": 933,
      "training_step_time": 0.6822638511657715
    },
    {
      "epoch": 5.70068359375e-06,
      "model_forward_time": 0.11855387687683105,
      "step": 934
    },
    {
      "epoch": 5.70068359375e-06,
      "step": 934,
      "training_step_time": 0.7408745288848877
    },
    {
      "epoch": 5.706787109375e-06,
      "model_forward_time": 0.11853456497192383,
      "step": 935
    },
    {
      "epoch": 5.706787109375e-06,
      "step": 935,
      "training_step_time": 0.6630668640136719
    },
    {
      "epoch": 5.712890625e-06,
      "model_forward_time": 0.12116479873657227,
      "step": 936
    },
    {
      "epoch": 5.712890625e-06,
      "step": 936,
      "training_step_time": 0.7307074069976807
    },
    {
      "epoch": 5.718994140625e-06,
      "model_forward_time": 0.11587190628051758,
      "step": 937
    },
    {
      "epoch": 5.718994140625e-06,
      "step": 937,
      "training_step_time": 0.7249820232391357
    },
    {
      "epoch": 5.72509765625e-06,
      "model_forward_time": 0.1181330680847168,
      "step": 938
    },
    {
      "epoch": 5.72509765625e-06,
      "step": 938,
      "training_step_time": 0.652517557144165
    },
    {
      "epoch": 5.731201171875e-06,
      "model_forward_time": 0.1179494857788086,
      "step": 939
    },
    {
      "epoch": 5.731201171875e-06,
      "step": 939,
      "training_step_time": 0.7245683670043945
    },
    {
      "epoch": 5.7373046875e-06,
      "grad_norm": 0.7217634916305542,
      "learning_rate": 3.1333333333333334e-05,
      "loss": 0.1756,
      "step": 940
    },
    {
      "epoch": 5.7373046875e-06,
      "model_forward_time": 0.1192777156829834,
      "step": 940
    },
    {
      "epoch": 5.7373046875e-06,
      "step": 940,
      "training_step_time": 0.6484079360961914
    },
    {
      "epoch": 5.743408203125e-06,
      "model_forward_time": 0.11749863624572754,
      "step": 941
    },
    {
      "epoch": 5.743408203125e-06,
      "step": 941,
      "training_step_time": 0.6554031372070312
    },
    {
      "epoch": 5.74951171875e-06,
      "model_forward_time": 0.11931180953979492,
      "step": 942
    },
    {
      "epoch": 5.74951171875e-06,
      "step": 942,
      "training_step_time": 0.6660497188568115
    },
    {
      "epoch": 5.755615234375e-06,
      "model_forward_time": 0.1167445182800293,
      "step": 943
    },
    {
      "epoch": 5.755615234375e-06,
      "step": 943,
      "training_step_time": 0.655674934387207
    },
    {
      "epoch": 5.76171875e-06,
      "model_forward_time": 0.12101149559020996,
      "step": 944
    },
    {
      "epoch": 5.76171875e-06,
      "step": 944,
      "training_step_time": 0.6366870403289795
    },
    {
      "epoch": 5.767822265625e-06,
      "model_forward_time": 0.11698770523071289,
      "step": 945
    },
    {
      "epoch": 5.767822265625e-06,
      "step": 945,
      "training_step_time": 0.6275789737701416
    },
    {
      "epoch": 5.77392578125e-06,
      "model_forward_time": 0.11841392517089844,
      "step": 946
    },
    {
      "epoch": 5.77392578125e-06,
      "step": 946,
      "training_step_time": 0.725883960723877
    },
    {
      "epoch": 5.780029296875e-06,
      "model_forward_time": 0.13086795806884766,
      "step": 947
    },
    {
      "epoch": 5.780029296875e-06,
      "step": 947,
      "training_step_time": 0.5906555652618408
    },
    {
      "epoch": 5.7861328125e-06,
      "model_forward_time": 0.11817431449890137,
      "step": 948
    },
    {
      "epoch": 5.7861328125e-06,
      "step": 948,
      "training_step_time": 0.6535568237304688
    },
    {
      "epoch": 5.792236328125e-06,
      "model_forward_time": 0.12622809410095215,
      "step": 949
    },
    {
      "epoch": 5.792236328125e-06,
      "step": 949,
      "training_step_time": 0.7652688026428223
    },
    {
      "epoch": 5.79833984375e-06,
      "grad_norm": 1.2397196292877197,
      "learning_rate": 3.1666666666666666e-05,
      "loss": 0.1954,
      "step": 950
    },
    {
      "epoch": 5.79833984375e-06,
      "model_forward_time": 0.11728882789611816,
      "step": 950
    },
    {
      "epoch": 5.79833984375e-06,
      "step": 950,
      "training_step_time": 0.7122070789337158
    },
    {
      "epoch": 5.804443359375e-06,
      "model_forward_time": 0.12205624580383301,
      "step": 951
    },
    {
      "epoch": 5.804443359375e-06,
      "step": 951,
      "training_step_time": 0.6639015674591064
    },
    {
      "epoch": 5.810546875e-06,
      "model_forward_time": 0.11885857582092285,
      "step": 952
    },
    {
      "epoch": 5.810546875e-06,
      "step": 952,
      "training_step_time": 0.6615447998046875
    },
    {
      "epoch": 5.816650390625e-06,
      "model_forward_time": 0.12073707580566406,
      "step": 953
    },
    {
      "epoch": 5.816650390625e-06,
      "step": 953,
      "training_step_time": 0.6239645481109619
    },
    {
      "epoch": 5.82275390625e-06,
      "model_forward_time": 0.127349853515625,
      "step": 954
    },
    {
      "epoch": 5.82275390625e-06,
      "step": 954,
      "training_step_time": 0.7061307430267334
    },
    {
      "epoch": 5.828857421875e-06,
      "model_forward_time": 0.11623072624206543,
      "step": 955
    },
    {
      "epoch": 5.828857421875e-06,
      "step": 955,
      "training_step_time": 0.6264078617095947
    },
    {
      "epoch": 5.8349609375e-06,
      "model_forward_time": 0.12861323356628418,
      "step": 956
    },
    {
      "epoch": 5.8349609375e-06,
      "step": 956,
      "training_step_time": 0.7606816291809082
    },
    {
      "epoch": 5.841064453125e-06,
      "model_forward_time": 0.1225430965423584,
      "step": 957
    },
    {
      "epoch": 5.841064453125e-06,
      "step": 957,
      "training_step_time": 0.6518685817718506
    },
    {
      "epoch": 5.84716796875e-06,
      "model_forward_time": 0.11966204643249512,
      "step": 958
    },
    {
      "epoch": 5.84716796875e-06,
      "step": 958,
      "training_step_time": 0.6118063926696777
    },
    {
      "epoch": 5.853271484375e-06,
      "model_forward_time": 0.12067294120788574,
      "step": 959
    },
    {
      "epoch": 5.853271484375e-06,
      "step": 959,
      "training_step_time": 0.5510463714599609
    },
    {
      "epoch": 5.859375e-06,
      "grad_norm": 0.8774025440216064,
      "learning_rate": 3.2000000000000005e-05,
      "loss": 0.1725,
      "step": 960
    },
    {
      "epoch": 5.859375e-06,
      "model_forward_time": 0.11942839622497559,
      "step": 960
    },
    {
      "epoch": 5.859375e-06,
      "step": 960,
      "training_step_time": 0.7392368316650391
    },
    {
      "epoch": 5.865478515625e-06,
      "model_forward_time": 0.11819744110107422,
      "step": 961
    },
    {
      "epoch": 5.865478515625e-06,
      "step": 961,
      "training_step_time": 0.672661304473877
    },
    {
      "epoch": 5.87158203125e-06,
      "model_forward_time": 0.12305736541748047,
      "step": 962
    },
    {
      "epoch": 5.87158203125e-06,
      "step": 962,
      "training_step_time": 0.6077322959899902
    },
    {
      "epoch": 5.877685546875e-06,
      "model_forward_time": 0.12895584106445312,
      "step": 963
    },
    {
      "epoch": 5.877685546875e-06,
      "step": 963,
      "training_step_time": 0.662039041519165
    },
    {
      "epoch": 5.8837890625e-06,
      "model_forward_time": 0.11788535118103027,
      "step": 964
    },
    {
      "epoch": 5.8837890625e-06,
      "step": 964,
      "training_step_time": 0.6420321464538574
    },
    {
      "epoch": 5.889892578125e-06,
      "model_forward_time": 0.11666154861450195,
      "step": 965
    },
    {
      "epoch": 5.889892578125e-06,
      "step": 965,
      "training_step_time": 0.6698830127716064
    },
    {
      "epoch": 5.89599609375e-06,
      "model_forward_time": 0.13130640983581543,
      "step": 966
    },
    {
      "epoch": 5.89599609375e-06,
      "step": 966,
      "training_step_time": 0.7010674476623535
    },
    {
      "epoch": 5.902099609375e-06,
      "model_forward_time": 0.12295722961425781,
      "step": 967
    },
    {
      "epoch": 5.902099609375e-06,
      "step": 967,
      "training_step_time": 0.6792657375335693
    },
    {
      "epoch": 5.908203125e-06,
      "model_forward_time": 0.11853671073913574,
      "step": 968
    },
    {
      "epoch": 5.908203125e-06,
      "step": 968,
      "training_step_time": 0.6396629810333252
    },
    {
      "epoch": 5.914306640625e-06,
      "model_forward_time": 0.11986231803894043,
      "step": 969
    },
    {
      "epoch": 5.914306640625e-06,
      "step": 969,
      "training_step_time": 0.6388187408447266
    },
    {
      "epoch": 5.92041015625e-06,
      "grad_norm": 0.7846910953521729,
      "learning_rate": 3.233333333333333e-05,
      "loss": 0.1767,
      "step": 970
    },
    {
      "epoch": 5.92041015625e-06,
      "model_forward_time": 0.12044882774353027,
      "step": 970
    },
    {
      "epoch": 5.92041015625e-06,
      "step": 970,
      "training_step_time": 0.7654087543487549
    },
    {
      "epoch": 5.926513671875e-06,
      "model_forward_time": 0.12412881851196289,
      "step": 971
    },
    {
      "epoch": 5.926513671875e-06,
      "step": 971,
      "training_step_time": 0.6442046165466309
    },
    {
      "epoch": 5.9326171875e-06,
      "model_forward_time": 0.12096619606018066,
      "step": 972
    },
    {
      "epoch": 5.9326171875e-06,
      "step": 972,
      "training_step_time": 0.6712021827697754
    },
    {
      "epoch": 5.938720703125e-06,
      "model_forward_time": 0.12372469902038574,
      "step": 973
    },
    {
      "epoch": 5.938720703125e-06,
      "step": 973,
      "training_step_time": 0.6515345573425293
    },
    {
      "epoch": 5.94482421875e-06,
      "model_forward_time": 0.12470340728759766,
      "step": 974
    },
    {
      "epoch": 5.94482421875e-06,
      "step": 974,
      "training_step_time": 0.6525802612304688
    },
    {
      "epoch": 5.950927734375e-06,
      "model_forward_time": 0.12180900573730469,
      "step": 975
    },
    {
      "epoch": 5.950927734375e-06,
      "step": 975,
      "training_step_time": 0.6605820655822754
    },
    {
      "epoch": 5.95703125e-06,
      "model_forward_time": 0.1173245906829834,
      "step": 976
    },
    {
      "epoch": 5.95703125e-06,
      "step": 976,
      "training_step_time": 0.6759047508239746
    },
    {
      "epoch": 5.963134765625e-06,
      "model_forward_time": 0.12062525749206543,
      "step": 977
    },
    {
      "epoch": 5.963134765625e-06,
      "step": 977,
      "training_step_time": 0.6418213844299316
    },
    {
      "epoch": 5.96923828125e-06,
      "model_forward_time": 0.11670041084289551,
      "step": 978
    },
    {
      "epoch": 5.96923828125e-06,
      "step": 978,
      "training_step_time": 0.7223889827728271
    },
    {
      "epoch": 5.975341796875e-06,
      "model_forward_time": 0.12154579162597656,
      "step": 979
    },
    {
      "epoch": 5.975341796875e-06,
      "step": 979,
      "training_step_time": 0.7106490135192871
    },
    {
      "epoch": 5.9814453125e-06,
      "grad_norm": 1.122510313987732,
      "learning_rate": 3.266666666666667e-05,
      "loss": 0.1874,
      "step": 980
    },
    {
      "epoch": 5.9814453125e-06,
      "model_forward_time": 0.12106752395629883,
      "step": 980
    },
    {
      "epoch": 5.9814453125e-06,
      "step": 980,
      "training_step_time": 0.7105462551116943
    },
    {
      "epoch": 5.987548828125e-06,
      "model_forward_time": 0.11945891380310059,
      "step": 981
    },
    {
      "epoch": 5.987548828125e-06,
      "step": 981,
      "training_step_time": 0.6739258766174316
    },
    {
      "epoch": 5.99365234375e-06,
      "model_forward_time": 0.11818790435791016,
      "step": 982
    },
    {
      "epoch": 5.99365234375e-06,
      "step": 982,
      "training_step_time": 0.6283535957336426
    },
    {
      "epoch": 5.999755859375e-06,
      "model_forward_time": 0.11742901802062988,
      "step": 983
    },
    {
      "epoch": 5.999755859375e-06,
      "step": 983,
      "training_step_time": 0.793632984161377
    },
    {
      "epoch": 6.005859375e-06,
      "model_forward_time": 0.11923956871032715,
      "step": 984
    },
    {
      "epoch": 6.005859375e-06,
      "step": 984,
      "training_step_time": 0.6377861499786377
    },
    {
      "epoch": 6.011962890625e-06,
      "model_forward_time": 0.12425589561462402,
      "step": 985
    },
    {
      "epoch": 6.011962890625e-06,
      "step": 985,
      "training_step_time": 0.7118980884552002
    },
    {
      "epoch": 6.01806640625e-06,
      "model_forward_time": 0.1192317008972168,
      "step": 986
    },
    {
      "epoch": 6.01806640625e-06,
      "step": 986,
      "training_step_time": 0.7081990242004395
    },
    {
      "epoch": 6.024169921875e-06,
      "model_forward_time": 0.11885929107666016,
      "step": 987
    },
    {
      "epoch": 6.024169921875e-06,
      "step": 987,
      "training_step_time": 0.7701773643493652
    },
    {
      "epoch": 6.0302734375e-06,
      "model_forward_time": 0.11955380439758301,
      "step": 988
    },
    {
      "epoch": 6.0302734375e-06,
      "step": 988,
      "training_step_time": 0.6516082286834717
    },
    {
      "epoch": 6.036376953125e-06,
      "model_forward_time": 0.1187448501586914,
      "step": 989
    },
    {
      "epoch": 6.036376953125e-06,
      "step": 989,
      "training_step_time": 0.6952617168426514
    },
    {
      "epoch": 6.04248046875e-06,
      "grad_norm": 1.109555959701538,
      "learning_rate": 3.3e-05,
      "loss": 0.1841,
      "step": 990
    },
    {
      "epoch": 6.04248046875e-06,
      "model_forward_time": 0.11623859405517578,
      "step": 990
    },
    {
      "epoch": 6.04248046875e-06,
      "step": 990,
      "training_step_time": 0.6878018379211426
    },
    {
      "epoch": 6.048583984375e-06,
      "model_forward_time": 0.1180112361907959,
      "step": 991
    },
    {
      "epoch": 6.048583984375e-06,
      "step": 991,
      "training_step_time": 0.6597878932952881
    },
    {
      "epoch": 6.0546875e-06,
      "model_forward_time": 0.12439966201782227,
      "step": 992
    },
    {
      "epoch": 6.0546875e-06,
      "step": 992,
      "training_step_time": 0.5926949977874756
    },
    {
      "epoch": 6.060791015625e-06,
      "model_forward_time": 0.11913084983825684,
      "step": 993
    },
    {
      "epoch": 6.060791015625e-06,
      "step": 993,
      "training_step_time": 0.6148390769958496
    },
    {
      "epoch": 6.06689453125e-06,
      "model_forward_time": 0.12468981742858887,
      "step": 994
    },
    {
      "epoch": 6.06689453125e-06,
      "step": 994,
      "training_step_time": 0.62862229347229
    },
    {
      "epoch": 6.072998046875e-06,
      "model_forward_time": 0.12328410148620605,
      "step": 995
    },
    {
      "epoch": 6.072998046875e-06,
      "step": 995,
      "training_step_time": 0.5934100151062012
    },
    {
      "epoch": 6.0791015625e-06,
      "model_forward_time": 0.12945866584777832,
      "step": 996
    },
    {
      "epoch": 6.0791015625e-06,
      "step": 996,
      "training_step_time": 0.6289079189300537
    },
    {
      "epoch": 6.085205078125e-06,
      "model_forward_time": 0.1304028034210205,
      "step": 997
    },
    {
      "epoch": 6.085205078125e-06,
      "step": 997,
      "training_step_time": 0.5414283275604248
    },
    {
      "epoch": 6.09130859375e-06,
      "model_forward_time": 0.1269855499267578,
      "step": 998
    },
    {
      "epoch": 6.09130859375e-06,
      "step": 998,
      "training_step_time": 0.600609302520752
    },
    {
      "epoch": 6.097412109375e-06,
      "model_forward_time": 0.11773371696472168,
      "step": 999
    },
    {
      "epoch": 6.097412109375e-06,
      "step": 999,
      "training_step_time": 0.54563307762146
    },
    {
      "epoch": 6.103515625e-06,
      "grad_norm": 0.9202449321746826,
      "learning_rate": 3.3333333333333335e-05,
      "loss": 0.1799,
      "step": 1000
    },
    {
      "epoch": 6.103515625e-06,
      "model_forward_time": 0.11466312408447266,
      "step": 1000
    },
    {
      "epoch": 6.103515625e-06,
      "step": 1000,
      "training_step_time": 0.3600168228149414
    },
    {
      "epoch": 6.109619140625e-06,
      "model_forward_time": 0.11264967918395996,
      "step": 1001
    },
    {
      "epoch": 6.109619140625e-06,
      "step": 1001,
      "training_step_time": 0.4184916019439697
    },
    {
      "epoch": 6.11572265625e-06,
      "model_forward_time": 0.11320281028747559,
      "step": 1002
    },
    {
      "epoch": 6.11572265625e-06,
      "step": 1002,
      "training_step_time": 0.37920427322387695
    },
    {
      "epoch": 6.121826171875e-06,
      "model_forward_time": 0.11413240432739258,
      "step": 1003
    },
    {
      "epoch": 6.121826171875e-06,
      "step": 1003,
      "training_step_time": 0.38296055793762207
    },
    {
      "epoch": 6.1279296875e-06,
      "model_forward_time": 0.11664915084838867,
      "step": 1004
    },
    {
      "epoch": 6.1279296875e-06,
      "step": 1004,
      "training_step_time": 0.3885791301727295
    },
    {
      "epoch": 6.134033203125e-06,
      "model_forward_time": 0.11530208587646484,
      "step": 1005
    },
    {
      "epoch": 6.134033203125e-06,
      "step": 1005,
      "training_step_time": 0.3967764377593994
    },
    {
      "epoch": 6.14013671875e-06,
      "model_forward_time": 0.11543560028076172,
      "step": 1006
    },
    {
      "epoch": 6.14013671875e-06,
      "step": 1006,
      "training_step_time": 0.39858198165893555
    },
    {
      "epoch": 6.146240234375e-06,
      "model_forward_time": 0.11614274978637695,
      "step": 1007
    },
    {
      "epoch": 6.146240234375e-06,
      "step": 1007,
      "training_step_time": 0.4799633026123047
    },
    {
      "epoch": 6.15234375e-06,
      "model_forward_time": 0.11484646797180176,
      "step": 1008
    },
    {
      "epoch": 6.15234375e-06,
      "step": 1008,
      "training_step_time": 0.39124202728271484
    },
    {
      "epoch": 6.158447265625e-06,
      "model_forward_time": 0.11509203910827637,
      "step": 1009
    },
    {
      "epoch": 6.158447265625e-06,
      "step": 1009,
      "training_step_time": 0.392533540725708
    },
    {
      "epoch": 6.16455078125e-06,
      "grad_norm": 0.9100350141525269,
      "learning_rate": 3.366666666666667e-05,
      "loss": 0.1686,
      "step": 1010
    },
    {
      "epoch": 6.16455078125e-06,
      "model_forward_time": 0.11488747596740723,
      "step": 1010
    },
    {
      "epoch": 6.16455078125e-06,
      "step": 1010,
      "training_step_time": 0.4162418842315674
    },
    {
      "epoch": 6.170654296875e-06,
      "model_forward_time": 0.11464881896972656,
      "step": 1011
    },
    {
      "epoch": 6.170654296875e-06,
      "step": 1011,
      "training_step_time": 0.40639781951904297
    },
    {
      "epoch": 6.1767578125e-06,
      "model_forward_time": 0.11526226997375488,
      "step": 1012
    },
    {
      "epoch": 6.1767578125e-06,
      "step": 1012,
      "training_step_time": 0.3919839859008789
    },
    {
      "epoch": 6.182861328125e-06,
      "model_forward_time": 0.11561822891235352,
      "step": 1013
    },
    {
      "epoch": 6.182861328125e-06,
      "step": 1013,
      "training_step_time": 0.39577174186706543
    },
    {
      "epoch": 6.18896484375e-06,
      "model_forward_time": 0.1150815486907959,
      "step": 1014
    },
    {
      "epoch": 6.18896484375e-06,
      "step": 1014,
      "training_step_time": 0.4586150646209717
    },
    {
      "epoch": 6.195068359375e-06,
      "model_forward_time": 0.11605978012084961,
      "step": 1015
    },
    {
      "epoch": 6.195068359375e-06,
      "step": 1015,
      "training_step_time": 0.4894587993621826
    },
    {
      "epoch": 6.201171875e-06,
      "model_forward_time": 0.11547255516052246,
      "step": 1016
    },
    {
      "epoch": 6.201171875e-06,
      "step": 1016,
      "training_step_time": 0.44758033752441406
    },
    {
      "epoch": 6.207275390625e-06,
      "model_forward_time": 0.11527514457702637,
      "step": 1017
    },
    {
      "epoch": 6.207275390625e-06,
      "step": 1017,
      "training_step_time": 0.5073788166046143
    },
    {
      "epoch": 6.21337890625e-06,
      "model_forward_time": 0.11547565460205078,
      "step": 1018
    },
    {
      "epoch": 6.21337890625e-06,
      "step": 1018,
      "training_step_time": 0.38573670387268066
    },
    {
      "epoch": 6.219482421875e-06,
      "model_forward_time": 0.1147148609161377,
      "step": 1019
    },
    {
      "epoch": 6.219482421875e-06,
      "step": 1019,
      "training_step_time": 0.3843710422515869
    },
    {
      "epoch": 6.2255859375e-06,
      "grad_norm": 1.2308323383331299,
      "learning_rate": 3.4000000000000007e-05,
      "loss": 0.1795,
      "step": 1020
    },
    {
      "epoch": 6.2255859375e-06,
      "model_forward_time": 0.11514663696289062,
      "step": 1020
    },
    {
      "epoch": 6.2255859375e-06,
      "step": 1020,
      "training_step_time": 0.3891129493713379
    },
    {
      "epoch": 6.231689453125e-06,
      "model_forward_time": 0.11550116539001465,
      "step": 1021
    },
    {
      "epoch": 6.231689453125e-06,
      "step": 1021,
      "training_step_time": 0.4549579620361328
    },
    {
      "epoch": 6.23779296875e-06,
      "model_forward_time": 0.11533474922180176,
      "step": 1022
    },
    {
      "epoch": 6.23779296875e-06,
      "step": 1022,
      "training_step_time": 0.3892486095428467
    },
    {
      "epoch": 6.243896484375e-06,
      "model_forward_time": 0.11545825004577637,
      "step": 1023
    },
    {
      "epoch": 6.243896484375e-06,
      "step": 1023,
      "training_step_time": 0.41174840927124023
    },
    {
      "epoch": 6.25e-06,
      "model_forward_time": 0.1149451732635498,
      "step": 1024
    },
    {
      "epoch": 6.25e-06,
      "step": 1024,
      "training_step_time": 0.39980268478393555
    },
    {
      "epoch": 6.256103515625e-06,
      "model_forward_time": 0.11485719680786133,
      "step": 1025
    },
    {
      "epoch": 6.256103515625e-06,
      "step": 1025,
      "training_step_time": 0.4134330749511719
    },
    {
      "epoch": 6.26220703125e-06,
      "model_forward_time": 0.1148231029510498,
      "step": 1026
    },
    {
      "epoch": 6.26220703125e-06,
      "step": 1026,
      "training_step_time": 0.40212178230285645
    },
    {
      "epoch": 6.268310546875e-06,
      "model_forward_time": 0.11498665809631348,
      "step": 1027
    },
    {
      "epoch": 6.268310546875e-06,
      "step": 1027,
      "training_step_time": 0.39458250999450684
    },
    {
      "epoch": 6.2744140625e-06,
      "model_forward_time": 0.1154470443725586,
      "step": 1028
    },
    {
      "epoch": 6.2744140625e-06,
      "step": 1028,
      "training_step_time": 0.41354894638061523
    },
    {
      "epoch": 6.280517578125e-06,
      "model_forward_time": 0.11529421806335449,
      "step": 1029
    },
    {
      "epoch": 6.280517578125e-06,
      "step": 1029,
      "training_step_time": 0.47569751739501953
    },
    {
      "epoch": 6.28662109375e-06,
      "grad_norm": 0.8971108198165894,
      "learning_rate": 3.433333333333333e-05,
      "loss": 0.1742,
      "step": 1030
    },
    {
      "epoch": 6.28662109375e-06,
      "model_forward_time": 0.11557149887084961,
      "step": 1030
    },
    {
      "epoch": 6.28662109375e-06,
      "step": 1030,
      "training_step_time": 0.5086121559143066
    },
    {
      "epoch": 6.292724609375e-06,
      "model_forward_time": 0.11466860771179199,
      "step": 1031
    },
    {
      "epoch": 6.292724609375e-06,
      "step": 1031,
      "training_step_time": 0.4938638210296631
    },
    {
      "epoch": 6.298828125e-06,
      "model_forward_time": 0.11556339263916016,
      "step": 1032
    },
    {
      "epoch": 6.298828125e-06,
      "step": 1032,
      "training_step_time": 0.45630788803100586
    },
    {
      "epoch": 6.304931640625e-06,
      "model_forward_time": 0.11551451683044434,
      "step": 1033
    },
    {
      "epoch": 6.304931640625e-06,
      "step": 1033,
      "training_step_time": 0.3883843421936035
    },
    {
      "epoch": 6.31103515625e-06,
      "model_forward_time": 0.11461472511291504,
      "step": 1034
    },
    {
      "epoch": 6.31103515625e-06,
      "step": 1034,
      "training_step_time": 0.3880574703216553
    },
    {
      "epoch": 6.317138671875e-06,
      "model_forward_time": 0.11533117294311523,
      "step": 1035
    },
    {
      "epoch": 6.317138671875e-06,
      "step": 1035,
      "training_step_time": 0.3954436779022217
    },
    {
      "epoch": 6.3232421875e-06,
      "model_forward_time": 0.11587882041931152,
      "step": 1036
    },
    {
      "epoch": 6.3232421875e-06,
      "step": 1036,
      "training_step_time": 0.3985409736633301
    },
    {
      "epoch": 6.329345703125e-06,
      "model_forward_time": 0.11508584022521973,
      "step": 1037
    },
    {
      "epoch": 6.329345703125e-06,
      "step": 1037,
      "training_step_time": 0.4033997058868408
    },
    {
      "epoch": 6.33544921875e-06,
      "model_forward_time": 0.1155388355255127,
      "step": 1038
    },
    {
      "epoch": 6.33544921875e-06,
      "step": 1038,
      "training_step_time": 0.39670538902282715
    },
    {
      "epoch": 6.341552734375e-06,
      "model_forward_time": 0.11732816696166992,
      "step": 1039
    },
    {
      "epoch": 6.341552734375e-06,
      "step": 1039,
      "training_step_time": 0.4600255489349365
    },
    {
      "epoch": 6.34765625e-06,
      "grad_norm": 0.8450629711151123,
      "learning_rate": 3.466666666666667e-05,
      "loss": 0.1868,
      "step": 1040
    },
    {
      "epoch": 6.34765625e-06,
      "model_forward_time": 0.11526632308959961,
      "step": 1040
    },
    {
      "epoch": 6.34765625e-06,
      "step": 1040,
      "training_step_time": 0.4006624221801758
    },
    {
      "epoch": 6.353759765625e-06,
      "model_forward_time": 0.11501812934875488,
      "step": 1041
    },
    {
      "epoch": 6.353759765625e-06,
      "step": 1041,
      "training_step_time": 0.3970608711242676
    },
    {
      "epoch": 6.35986328125e-06,
      "model_forward_time": 0.11519503593444824,
      "step": 1042
    },
    {
      "epoch": 6.35986328125e-06,
      "step": 1042,
      "training_step_time": 0.39319539070129395
    },
    {
      "epoch": 6.365966796875e-06,
      "model_forward_time": 0.11530852317810059,
      "step": 1043
    },
    {
      "epoch": 6.365966796875e-06,
      "step": 1043,
      "training_step_time": 0.41364145278930664
    },
    {
      "epoch": 6.3720703125e-06,
      "model_forward_time": 0.11508822441101074,
      "step": 1044
    },
    {
      "epoch": 6.3720703125e-06,
      "step": 1044,
      "training_step_time": 0.4380166530609131
    },
    {
      "epoch": 6.378173828125e-06,
      "model_forward_time": 0.11515164375305176,
      "step": 1045
    },
    {
      "epoch": 6.378173828125e-06,
      "step": 1045,
      "training_step_time": 0.38819265365600586
    },
    {
      "epoch": 6.38427734375e-06,
      "model_forward_time": 0.11524820327758789,
      "step": 1046
    },
    {
      "epoch": 6.38427734375e-06,
      "step": 1046,
      "training_step_time": 0.46350526809692383
    },
    {
      "epoch": 6.390380859375e-06,
      "model_forward_time": 0.11556577682495117,
      "step": 1047
    },
    {
      "epoch": 6.390380859375e-06,
      "step": 1047,
      "training_step_time": 0.4149808883666992
    },
    {
      "epoch": 6.396484375e-06,
      "model_forward_time": 0.1158294677734375,
      "step": 1048
    },
    {
      "epoch": 6.396484375e-06,
      "step": 1048,
      "training_step_time": 0.39243006706237793
    },
    {
      "epoch": 6.402587890625e-06,
      "model_forward_time": 0.11582756042480469,
      "step": 1049
    },
    {
      "epoch": 6.402587890625e-06,
      "step": 1049,
      "training_step_time": 0.3913247585296631
    },
    {
      "epoch": 6.40869140625e-06,
      "grad_norm": 1.211618185043335,
      "learning_rate": 3.5e-05,
      "loss": 0.1683,
      "step": 1050
    },
    {
      "epoch": 6.40869140625e-06,
      "model_forward_time": 0.11516213417053223,
      "step": 1050
    },
    {
      "epoch": 6.40869140625e-06,
      "step": 1050,
      "training_step_time": 0.448411226272583
    },
    {
      "epoch": 6.414794921875e-06,
      "model_forward_time": 0.11505722999572754,
      "step": 1051
    },
    {
      "epoch": 6.414794921875e-06,
      "step": 1051,
      "training_step_time": 0.40756750106811523
    },
    {
      "epoch": 6.4208984375e-06,
      "model_forward_time": 0.11594414710998535,
      "step": 1052
    },
    {
      "epoch": 6.4208984375e-06,
      "step": 1052,
      "training_step_time": 0.45964527130126953
    },
    {
      "epoch": 6.427001953125e-06,
      "model_forward_time": 0.11562299728393555,
      "step": 1053
    },
    {
      "epoch": 6.427001953125e-06,
      "step": 1053,
      "training_step_time": 0.4134070873260498
    },
    {
      "epoch": 6.43310546875e-06,
      "model_forward_time": 0.11607074737548828,
      "step": 1054
    },
    {
      "epoch": 6.43310546875e-06,
      "step": 1054,
      "training_step_time": 0.3985002040863037
    },
    {
      "epoch": 6.439208984375e-06,
      "model_forward_time": 0.11572647094726562,
      "step": 1055
    },
    {
      "epoch": 6.439208984375e-06,
      "step": 1055,
      "training_step_time": 0.401792049407959
    },
    {
      "epoch": 6.4453125e-06,
      "model_forward_time": 0.11632013320922852,
      "step": 1056
    },
    {
      "epoch": 6.4453125e-06,
      "step": 1056,
      "training_step_time": 0.40786290168762207
    },
    {
      "epoch": 6.451416015625e-06,
      "model_forward_time": 0.11521553993225098,
      "step": 1057
    },
    {
      "epoch": 6.451416015625e-06,
      "step": 1057,
      "training_step_time": 0.39685654640197754
    },
    {
      "epoch": 6.45751953125e-06,
      "model_forward_time": 0.11565661430358887,
      "step": 1058
    },
    {
      "epoch": 6.45751953125e-06,
      "step": 1058,
      "training_step_time": 0.3981287479400635
    },
    {
      "epoch": 6.463623046875e-06,
      "model_forward_time": 0.11531877517700195,
      "step": 1059
    },
    {
      "epoch": 6.463623046875e-06,
      "step": 1059,
      "training_step_time": 0.40364599227905273
    },
    {
      "epoch": 6.4697265625e-06,
      "grad_norm": 1.0413010120391846,
      "learning_rate": 3.5333333333333336e-05,
      "loss": 0.1913,
      "step": 1060
    },
    {
      "epoch": 6.4697265625e-06,
      "model_forward_time": 0.11560821533203125,
      "step": 1060
    },
    {
      "epoch": 6.4697265625e-06,
      "step": 1060,
      "training_step_time": 0.46651148796081543
    },
    {
      "epoch": 6.475830078125e-06,
      "model_forward_time": 0.1148691177368164,
      "step": 1061
    },
    {
      "epoch": 6.475830078125e-06,
      "step": 1061,
      "training_step_time": 0.4461808204650879
    },
    {
      "epoch": 6.48193359375e-06,
      "model_forward_time": 0.11551856994628906,
      "step": 1062
    },
    {
      "epoch": 6.48193359375e-06,
      "step": 1062,
      "training_step_time": 0.8132777214050293
    },
    {
      "epoch": 6.488037109375e-06,
      "model_forward_time": 0.11530828475952148,
      "step": 1063
    },
    {
      "epoch": 6.488037109375e-06,
      "step": 1063,
      "training_step_time": 0.38811469078063965
    },
    {
      "epoch": 6.494140625e-06,
      "model_forward_time": 0.11449217796325684,
      "step": 1064
    },
    {
      "epoch": 6.494140625e-06,
      "step": 1064,
      "training_step_time": 0.4637172222137451
    },
    {
      "epoch": 6.500244140625e-06,
      "model_forward_time": 0.1145639419555664,
      "step": 1065
    },
    {
      "epoch": 6.500244140625e-06,
      "step": 1065,
      "training_step_time": 0.48302340507507324
    },
    {
      "epoch": 6.50634765625e-06,
      "model_forward_time": 0.13065338134765625,
      "step": 1066
    },
    {
      "epoch": 6.50634765625e-06,
      "step": 1066,
      "training_step_time": 0.4445536136627197
    },
    {
      "epoch": 6.512451171875e-06,
      "model_forward_time": 0.11377453804016113,
      "step": 1067
    },
    {
      "epoch": 6.512451171875e-06,
      "step": 1067,
      "training_step_time": 0.3857574462890625
    },
    {
      "epoch": 6.5185546875e-06,
      "model_forward_time": 0.11567473411560059,
      "step": 1068
    },
    {
      "epoch": 6.5185546875e-06,
      "step": 1068,
      "training_step_time": 0.3914523124694824
    },
    {
      "epoch": 6.524658203125e-06,
      "model_forward_time": 0.11489105224609375,
      "step": 1069
    },
    {
      "epoch": 6.524658203125e-06,
      "step": 1069,
      "training_step_time": 0.38837170600891113
    },
    {
      "epoch": 6.53076171875e-06,
      "grad_norm": 1.33501398563385,
      "learning_rate": 3.566666666666667e-05,
      "loss": 0.1822,
      "step": 1070
    },
    {
      "epoch": 6.53076171875e-06,
      "model_forward_time": 0.11590743064880371,
      "step": 1070
    },
    {
      "epoch": 6.53076171875e-06,
      "step": 1070,
      "training_step_time": 0.3929166793823242
    },
    {
      "epoch": 6.536865234375e-06,
      "model_forward_time": 0.11570096015930176,
      "step": 1071
    },
    {
      "epoch": 6.536865234375e-06,
      "step": 1071,
      "training_step_time": 0.39581990242004395
    },
    {
      "epoch": 6.54296875e-06,
      "model_forward_time": 0.11656999588012695,
      "step": 1072
    },
    {
      "epoch": 6.54296875e-06,
      "step": 1072,
      "training_step_time": 0.38996386528015137
    },
    {
      "epoch": 6.549072265625e-06,
      "model_forward_time": 0.1157839298248291,
      "step": 1073
    },
    {
      "epoch": 6.549072265625e-06,
      "step": 1073,
      "training_step_time": 0.44647932052612305
    },
    {
      "epoch": 6.55517578125e-06,
      "model_forward_time": 0.116058349609375,
      "step": 1074
    },
    {
      "epoch": 6.55517578125e-06,
      "step": 1074,
      "training_step_time": 0.4520566463470459
    },
    {
      "epoch": 6.561279296875e-06,
      "model_forward_time": 0.12159585952758789,
      "step": 1075
    },
    {
      "epoch": 6.561279296875e-06,
      "step": 1075,
      "training_step_time": 0.4205973148345947
    },
    {
      "epoch": 6.5673828125e-06,
      "model_forward_time": 0.11950922012329102,
      "step": 1076
    },
    {
      "epoch": 6.5673828125e-06,
      "step": 1076,
      "training_step_time": 0.38843536376953125
    },
    {
      "epoch": 6.573486328125e-06,
      "model_forward_time": 0.1179506778717041,
      "step": 1077
    },
    {
      "epoch": 6.573486328125e-06,
      "step": 1077,
      "training_step_time": 0.383495569229126
    },
    {
      "epoch": 6.57958984375e-06,
      "model_forward_time": 0.11905646324157715,
      "step": 1078
    },
    {
      "epoch": 6.57958984375e-06,
      "step": 1078,
      "training_step_time": 0.38498973846435547
    },
    {
      "epoch": 6.585693359375e-06,
      "model_forward_time": 0.11870098114013672,
      "step": 1079
    },
    {
      "epoch": 6.585693359375e-06,
      "step": 1079,
      "training_step_time": 0.4217362403869629
    },
    {
      "epoch": 6.591796875e-06,
      "grad_norm": 1.0258756875991821,
      "learning_rate": 3.6e-05,
      "loss": 0.1806,
      "step": 1080
    },
    {
      "epoch": 6.591796875e-06,
      "model_forward_time": 0.11790728569030762,
      "step": 1080
    },
    {
      "epoch": 6.591796875e-06,
      "step": 1080,
      "training_step_time": 0.4241170883178711
    },
    {
      "epoch": 6.597900390625e-06,
      "model_forward_time": 0.11642122268676758,
      "step": 1081
    },
    {
      "epoch": 6.597900390625e-06,
      "step": 1081,
      "training_step_time": 0.4102156162261963
    },
    {
      "epoch": 6.60400390625e-06,
      "model_forward_time": 0.1154625415802002,
      "step": 1082
    },
    {
      "epoch": 6.60400390625e-06,
      "step": 1082,
      "training_step_time": 0.41055798530578613
    },
    {
      "epoch": 6.610107421875e-06,
      "model_forward_time": 0.11597967147827148,
      "step": 1083
    },
    {
      "epoch": 6.610107421875e-06,
      "step": 1083,
      "training_step_time": 0.41323280334472656
    },
    {
      "epoch": 6.6162109375e-06,
      "model_forward_time": 0.11619234085083008,
      "step": 1084
    },
    {
      "epoch": 6.6162109375e-06,
      "step": 1084,
      "training_step_time": 0.3940911293029785
    },
    {
      "epoch": 6.622314453125e-06,
      "model_forward_time": 0.11680340766906738,
      "step": 1085
    },
    {
      "epoch": 6.622314453125e-06,
      "step": 1085,
      "training_step_time": 0.3814432621002197
    },
    {
      "epoch": 6.62841796875e-06,
      "model_forward_time": 0.11658072471618652,
      "step": 1086
    },
    {
      "epoch": 6.62841796875e-06,
      "step": 1086,
      "training_step_time": 0.39995431900024414
    },
    {
      "epoch": 6.634521484375e-06,
      "model_forward_time": 0.11707854270935059,
      "step": 1087
    },
    {
      "epoch": 6.634521484375e-06,
      "step": 1087,
      "training_step_time": 0.4005546569824219
    },
    {
      "epoch": 6.640625e-06,
      "model_forward_time": 0.11600470542907715,
      "step": 1088
    },
    {
      "epoch": 6.640625e-06,
      "step": 1088,
      "training_step_time": 0.4582540988922119
    },
    {
      "epoch": 6.646728515625e-06,
      "model_forward_time": 0.11614322662353516,
      "step": 1089
    },
    {
      "epoch": 6.646728515625e-06,
      "step": 1089,
      "training_step_time": 0.44124650955200195
    },
    {
      "epoch": 6.65283203125e-06,
      "grad_norm": 0.9015865921974182,
      "learning_rate": 3.633333333333333e-05,
      "loss": 0.1573,
      "step": 1090
    },
    {
      "epoch": 6.65283203125e-06,
      "model_forward_time": 0.11623263359069824,
      "step": 1090
    },
    {
      "epoch": 6.65283203125e-06,
      "step": 1090,
      "training_step_time": 0.5087239742279053
    },
    {
      "epoch": 6.658935546875e-06,
      "model_forward_time": 0.11743640899658203,
      "step": 1091
    },
    {
      "epoch": 6.658935546875e-06,
      "step": 1091,
      "training_step_time": 0.37833690643310547
    },
    {
      "epoch": 6.6650390625e-06,
      "model_forward_time": 0.11678409576416016,
      "step": 1092
    },
    {
      "epoch": 6.6650390625e-06,
      "step": 1092,
      "training_step_time": 0.44183778762817383
    },
    {
      "epoch": 6.671142578125e-06,
      "model_forward_time": 0.11615276336669922,
      "step": 1093
    },
    {
      "epoch": 6.671142578125e-06,
      "step": 1093,
      "training_step_time": 0.39885520935058594
    },
    {
      "epoch": 6.67724609375e-06,
      "model_forward_time": 0.11572933197021484,
      "step": 1094
    },
    {
      "epoch": 6.67724609375e-06,
      "step": 1094,
      "training_step_time": 0.43032169342041016
    },
    {
      "epoch": 6.683349609375e-06,
      "model_forward_time": 0.11547350883483887,
      "step": 1095
    },
    {
      "epoch": 6.683349609375e-06,
      "step": 1095,
      "training_step_time": 0.39392924308776855
    },
    {
      "epoch": 6.689453125e-06,
      "model_forward_time": 0.11593413352966309,
      "step": 1096
    },
    {
      "epoch": 6.689453125e-06,
      "step": 1096,
      "training_step_time": 0.4581727981567383
    },
    {
      "epoch": 6.695556640625e-06,
      "model_forward_time": 0.11581921577453613,
      "step": 1097
    },
    {
      "epoch": 6.695556640625e-06,
      "step": 1097,
      "training_step_time": 0.38172364234924316
    },
    {
      "epoch": 6.70166015625e-06,
      "model_forward_time": 0.11595869064331055,
      "step": 1098
    },
    {
      "epoch": 6.70166015625e-06,
      "step": 1098,
      "training_step_time": 0.3973233699798584
    },
    {
      "epoch": 6.707763671875e-06,
      "model_forward_time": 0.11574316024780273,
      "step": 1099
    },
    {
      "epoch": 6.707763671875e-06,
      "step": 1099,
      "training_step_time": 0.39921021461486816
    },
    {
      "epoch": 6.7138671875e-06,
      "grad_norm": 1.3102848529815674,
      "learning_rate": 3.6666666666666666e-05,
      "loss": 0.1738,
      "step": 1100
    },
    {
      "epoch": 6.7138671875e-06,
      "model_forward_time": 0.11578679084777832,
      "step": 1100
    },
    {
      "epoch": 6.7138671875e-06,
      "step": 1100,
      "training_step_time": 0.3979766368865967
    },
    {
      "epoch": 6.719970703125e-06,
      "model_forward_time": 0.11651182174682617,
      "step": 1101
    },
    {
      "epoch": 6.719970703125e-06,
      "step": 1101,
      "training_step_time": 0.382432222366333
    },
    {
      "epoch": 6.72607421875e-06,
      "model_forward_time": 0.11629533767700195,
      "step": 1102
    },
    {
      "epoch": 6.72607421875e-06,
      "step": 1102,
      "training_step_time": 0.38358616828918457
    },
    {
      "epoch": 6.732177734375e-06,
      "model_forward_time": 0.12142324447631836,
      "step": 1103
    },
    {
      "epoch": 6.732177734375e-06,
      "step": 1103,
      "training_step_time": 0.43462061882019043
    },
    {
      "epoch": 6.73828125e-06,
      "model_forward_time": 0.11676526069641113,
      "step": 1104
    },
    {
      "epoch": 6.73828125e-06,
      "step": 1104,
      "training_step_time": 0.44774913787841797
    },
    {
      "epoch": 6.744384765625e-06,
      "model_forward_time": 0.1168220043182373,
      "step": 1105
    },
    {
      "epoch": 6.744384765625e-06,
      "step": 1105,
      "training_step_time": 0.4820425510406494
    },
    {
      "epoch": 6.75048828125e-06,
      "model_forward_time": 0.11659717559814453,
      "step": 1106
    },
    {
      "epoch": 6.75048828125e-06,
      "step": 1106,
      "training_step_time": 0.4838066101074219
    },
    {
      "epoch": 6.756591796875e-06,
      "model_forward_time": 0.12355589866638184,
      "step": 1107
    },
    {
      "epoch": 6.756591796875e-06,
      "step": 1107,
      "training_step_time": 0.39911961555480957
    },
    {
      "epoch": 6.7626953125e-06,
      "model_forward_time": 0.1170351505279541,
      "step": 1108
    },
    {
      "epoch": 6.7626953125e-06,
      "step": 1108,
      "training_step_time": 0.3966188430786133
    },
    {
      "epoch": 6.768798828125e-06,
      "model_forward_time": 0.11568164825439453,
      "step": 1109
    },
    {
      "epoch": 6.768798828125e-06,
      "step": 1109,
      "training_step_time": 0.4136841297149658
    },
    {
      "epoch": 6.77490234375e-06,
      "grad_norm": 1.0690021514892578,
      "learning_rate": 3.7e-05,
      "loss": 0.18,
      "step": 1110
    },
    {
      "epoch": 6.77490234375e-06,
      "model_forward_time": 0.11715483665466309,
      "step": 1110
    },
    {
      "epoch": 6.77490234375e-06,
      "step": 1110,
      "training_step_time": 0.4924354553222656
    },
    {
      "epoch": 6.781005859375e-06,
      "model_forward_time": 0.11491918563842773,
      "step": 1111
    },
    {
      "epoch": 6.781005859375e-06,
      "step": 1111,
      "training_step_time": 0.4411191940307617
    },
    {
      "epoch": 6.787109375e-06,
      "model_forward_time": 0.11530947685241699,
      "step": 1112
    },
    {
      "epoch": 6.787109375e-06,
      "step": 1112,
      "training_step_time": 0.4034080505371094
    },
    {
      "epoch": 6.793212890625e-06,
      "model_forward_time": 0.11541104316711426,
      "step": 1113
    },
    {
      "epoch": 6.793212890625e-06,
      "step": 1113,
      "training_step_time": 0.3960731029510498
    },
    {
      "epoch": 6.79931640625e-06,
      "model_forward_time": 0.11567211151123047,
      "step": 1114
    },
    {
      "epoch": 6.79931640625e-06,
      "step": 1114,
      "training_step_time": 0.39372968673706055
    },
    {
      "epoch": 6.805419921875e-06,
      "model_forward_time": 0.11595296859741211,
      "step": 1115
    },
    {
      "epoch": 6.805419921875e-06,
      "step": 1115,
      "training_step_time": 0.39931225776672363
    },
    {
      "epoch": 6.8115234375e-06,
      "model_forward_time": 0.11524128913879395,
      "step": 1116
    },
    {
      "epoch": 6.8115234375e-06,
      "step": 1116,
      "training_step_time": 0.4003419876098633
    },
    {
      "epoch": 6.817626953125e-06,
      "model_forward_time": 0.115234375,
      "step": 1117
    },
    {
      "epoch": 6.817626953125e-06,
      "step": 1117,
      "training_step_time": 0.39649248123168945
    },
    {
      "epoch": 6.82373046875e-06,
      "model_forward_time": 0.1156923770904541,
      "step": 1118
    },
    {
      "epoch": 6.82373046875e-06,
      "step": 1118,
      "training_step_time": 0.43784403800964355
    },
    {
      "epoch": 6.829833984375e-06,
      "model_forward_time": 0.11566305160522461,
      "step": 1119
    },
    {
      "epoch": 6.829833984375e-06,
      "step": 1119,
      "training_step_time": 0.49810028076171875
    },
    {
      "epoch": 6.8359375e-06,
      "grad_norm": 0.5876435041427612,
      "learning_rate": 3.733333333333334e-05,
      "loss": 0.169,
      "step": 1120
    },
    {
      "epoch": 6.8359375e-06,
      "model_forward_time": 0.11555027961730957,
      "step": 1120
    },
    {
      "epoch": 6.8359375e-06,
      "step": 1120,
      "training_step_time": 0.5109388828277588
    },
    {
      "epoch": 6.842041015625e-06,
      "model_forward_time": 0.11536645889282227,
      "step": 1121
    },
    {
      "epoch": 6.842041015625e-06,
      "step": 1121,
      "training_step_time": 0.3991374969482422
    },
    {
      "epoch": 6.84814453125e-06,
      "model_forward_time": 0.1160421371459961,
      "step": 1122
    },
    {
      "epoch": 6.84814453125e-06,
      "step": 1122,
      "training_step_time": 0.39334774017333984
    },
    {
      "epoch": 6.854248046875e-06,
      "model_forward_time": 0.11617326736450195,
      "step": 1123
    },
    {
      "epoch": 6.854248046875e-06,
      "step": 1123,
      "training_step_time": 0.4012877941131592
    },
    {
      "epoch": 6.8603515625e-06,
      "model_forward_time": 0.11599564552307129,
      "step": 1124
    },
    {
      "epoch": 6.8603515625e-06,
      "step": 1124,
      "training_step_time": 0.5197644233703613
    },
    {
      "epoch": 6.866455078125e-06,
      "model_forward_time": 0.115203857421875,
      "step": 1125
    },
    {
      "epoch": 6.866455078125e-06,
      "step": 1125,
      "training_step_time": 0.4401249885559082
    },
    {
      "epoch": 6.87255859375e-06,
      "model_forward_time": 0.11619067192077637,
      "step": 1126
    },
    {
      "epoch": 6.87255859375e-06,
      "step": 1126,
      "training_step_time": 0.39460182189941406
    },
    {
      "epoch": 6.878662109375e-06,
      "model_forward_time": 0.11669445037841797,
      "step": 1127
    },
    {
      "epoch": 6.878662109375e-06,
      "step": 1127,
      "training_step_time": 0.40361475944519043
    },
    {
      "epoch": 6.884765625e-06,
      "model_forward_time": 0.1158897876739502,
      "step": 1128
    },
    {
      "epoch": 6.884765625e-06,
      "step": 1128,
      "training_step_time": 0.4048452377319336
    },
    {
      "epoch": 6.890869140625e-06,
      "model_forward_time": 0.11555314064025879,
      "step": 1129
    },
    {
      "epoch": 6.890869140625e-06,
      "step": 1129,
      "training_step_time": 0.40424466133117676
    },
    {
      "epoch": 6.89697265625e-06,
      "grad_norm": 0.5458449721336365,
      "learning_rate": 3.766666666666667e-05,
      "loss": 0.1791,
      "step": 1130
    },
    {
      "epoch": 6.89697265625e-06,
      "model_forward_time": 0.11553001403808594,
      "step": 1130
    },
    {
      "epoch": 6.89697265625e-06,
      "step": 1130,
      "training_step_time": 0.39935874938964844
    },
    {
      "epoch": 6.903076171875e-06,
      "model_forward_time": 0.11888766288757324,
      "step": 1131
    },
    {
      "epoch": 6.903076171875e-06,
      "step": 1131,
      "training_step_time": 0.3917713165283203
    },
    {
      "epoch": 6.9091796875e-06,
      "model_forward_time": 0.11680006980895996,
      "step": 1132
    },
    {
      "epoch": 6.9091796875e-06,
      "step": 1132,
      "training_step_time": 0.39139389991760254
    },
    {
      "epoch": 6.915283203125e-06,
      "model_forward_time": 0.11610245704650879,
      "step": 1133
    },
    {
      "epoch": 6.915283203125e-06,
      "step": 1133,
      "training_step_time": 0.4160335063934326
    },
    {
      "epoch": 6.92138671875e-06,
      "model_forward_time": 0.11591482162475586,
      "step": 1134
    },
    {
      "epoch": 6.92138671875e-06,
      "step": 1134,
      "training_step_time": 0.4054887294769287
    },
    {
      "epoch": 6.927490234375e-06,
      "model_forward_time": 0.11521649360656738,
      "step": 1135
    },
    {
      "epoch": 6.927490234375e-06,
      "step": 1135,
      "training_step_time": 0.5907728672027588
    },
    {
      "epoch": 6.93359375e-06,
      "model_forward_time": 0.11511564254760742,
      "step": 1136
    },
    {
      "epoch": 6.93359375e-06,
      "step": 1136,
      "training_step_time": 0.40250253677368164
    },
    {
      "epoch": 6.939697265625e-06,
      "model_forward_time": 0.11536550521850586,
      "step": 1137
    },
    {
      "epoch": 6.939697265625e-06,
      "step": 1137,
      "training_step_time": 0.39401865005493164
    },
    {
      "epoch": 6.94580078125e-06,
      "model_forward_time": 0.11778807640075684,
      "step": 1138
    },
    {
      "epoch": 6.94580078125e-06,
      "step": 1138,
      "training_step_time": 0.4136688709259033
    },
    {
      "epoch": 6.951904296875e-06,
      "model_forward_time": 0.11738801002502441,
      "step": 1139
    },
    {
      "epoch": 6.951904296875e-06,
      "step": 1139,
      "training_step_time": 0.41550421714782715
    },
    {
      "epoch": 6.9580078125e-06,
      "grad_norm": 0.6635262966156006,
      "learning_rate": 3.8e-05,
      "loss": 0.1754,
      "step": 1140
    },
    {
      "epoch": 6.9580078125e-06,
      "model_forward_time": 0.11522102355957031,
      "step": 1140
    },
    {
      "epoch": 6.9580078125e-06,
      "step": 1140,
      "training_step_time": 0.4783942699432373
    },
    {
      "epoch": 6.964111328125e-06,
      "model_forward_time": 0.11513590812683105,
      "step": 1141
    },
    {
      "epoch": 6.964111328125e-06,
      "step": 1141,
      "training_step_time": 0.40140795707702637
    },
    {
      "epoch": 6.97021484375e-06,
      "model_forward_time": 0.11604166030883789,
      "step": 1142
    },
    {
      "epoch": 6.97021484375e-06,
      "step": 1142,
      "training_step_time": 0.40622401237487793
    },
    {
      "epoch": 6.976318359375e-06,
      "model_forward_time": 0.11564779281616211,
      "step": 1143
    },
    {
      "epoch": 6.976318359375e-06,
      "step": 1143,
      "training_step_time": 0.40254855155944824
    },
    {
      "epoch": 6.982421875e-06,
      "model_forward_time": 0.11503982543945312,
      "step": 1144
    },
    {
      "epoch": 6.982421875e-06,
      "step": 1144,
      "training_step_time": 0.4089181423187256
    },
    {
      "epoch": 6.988525390625e-06,
      "model_forward_time": 0.11491966247558594,
      "step": 1145
    },
    {
      "epoch": 6.988525390625e-06,
      "step": 1145,
      "training_step_time": 0.4199204444885254
    },
    {
      "epoch": 6.99462890625e-06,
      "model_forward_time": 0.11727738380432129,
      "step": 1146
    },
    {
      "epoch": 6.99462890625e-06,
      "step": 1146,
      "training_step_time": 0.4047691822052002
    },
    {
      "epoch": 7.000732421875e-06,
      "model_forward_time": 0.11519002914428711,
      "step": 1147
    },
    {
      "epoch": 7.000732421875e-06,
      "step": 1147,
      "training_step_time": 0.4446990489959717
    },
    {
      "epoch": 7.0068359375e-06,
      "model_forward_time": 0.11479067802429199,
      "step": 1148
    },
    {
      "epoch": 7.0068359375e-06,
      "step": 1148,
      "training_step_time": 0.42665886878967285
    },
    {
      "epoch": 7.012939453125e-06,
      "model_forward_time": 0.11558103561401367,
      "step": 1149
    },
    {
      "epoch": 7.012939453125e-06,
      "step": 1149,
      "training_step_time": 0.4496777057647705
    },
    {
      "epoch": 7.01904296875e-06,
      "grad_norm": 0.5162835121154785,
      "learning_rate": 3.8333333333333334e-05,
      "loss": 0.176,
      "step": 1150
    },
    {
      "epoch": 7.01904296875e-06,
      "model_forward_time": 0.11496543884277344,
      "step": 1150
    },
    {
      "epoch": 7.01904296875e-06,
      "step": 1150,
      "training_step_time": 0.5006966590881348
    },
    {
      "epoch": 7.025146484375e-06,
      "model_forward_time": 0.11538147926330566,
      "step": 1151
    },
    {
      "epoch": 7.025146484375e-06,
      "step": 1151,
      "training_step_time": 0.5223019123077393
    },
    {
      "epoch": 7.03125e-06,
      "model_forward_time": 0.11485147476196289,
      "step": 1152
    },
    {
      "epoch": 7.03125e-06,
      "step": 1152,
      "training_step_time": 0.4248175621032715
    },
    {
      "epoch": 7.037353515625e-06,
      "model_forward_time": 0.11562013626098633,
      "step": 1153
    },
    {
      "epoch": 7.037353515625e-06,
      "step": 1153,
      "training_step_time": 0.3938486576080322
    },
    {
      "epoch": 7.04345703125e-06,
      "model_forward_time": 0.11536860466003418,
      "step": 1154
    },
    {
      "epoch": 7.04345703125e-06,
      "step": 1154,
      "training_step_time": 0.39521265029907227
    },
    {
      "epoch": 7.049560546875e-06,
      "model_forward_time": 0.1160891056060791,
      "step": 1155
    },
    {
      "epoch": 7.049560546875e-06,
      "step": 1155,
      "training_step_time": 0.4019191265106201
    },
    {
      "epoch": 7.0556640625e-06,
      "model_forward_time": 0.11530494689941406,
      "step": 1156
    },
    {
      "epoch": 7.0556640625e-06,
      "step": 1156,
      "training_step_time": 0.39373064041137695
    },
    {
      "epoch": 7.061767578125e-06,
      "model_forward_time": 0.11579275131225586,
      "step": 1157
    },
    {
      "epoch": 7.061767578125e-06,
      "step": 1157,
      "training_step_time": 0.4023418426513672
    },
    {
      "epoch": 7.06787109375e-06,
      "model_forward_time": 0.11513400077819824,
      "step": 1158
    },
    {
      "epoch": 7.06787109375e-06,
      "step": 1158,
      "training_step_time": 0.4289278984069824
    },
    {
      "epoch": 7.073974609375e-06,
      "model_forward_time": 0.11510848999023438,
      "step": 1159
    },
    {
      "epoch": 7.073974609375e-06,
      "step": 1159,
      "training_step_time": 0.39293622970581055
    },
    {
      "epoch": 7.080078125e-06,
      "grad_norm": 1.0081863403320312,
      "learning_rate": 3.866666666666667e-05,
      "loss": 0.1719,
      "step": 1160
    },
    {
      "epoch": 7.080078125e-06,
      "model_forward_time": 0.11519885063171387,
      "step": 1160
    },
    {
      "epoch": 7.080078125e-06,
      "step": 1160,
      "training_step_time": 0.3961303234100342
    },
    {
      "epoch": 7.086181640625e-06,
      "model_forward_time": 0.11592435836791992,
      "step": 1161
    },
    {
      "epoch": 7.086181640625e-06,
      "step": 1161,
      "training_step_time": 0.42728590965270996
    },
    {
      "epoch": 7.09228515625e-06,
      "model_forward_time": 0.1154170036315918,
      "step": 1162
    },
    {
      "epoch": 7.09228515625e-06,
      "step": 1162,
      "training_step_time": 0.4257981777191162
    },
    {
      "epoch": 7.098388671875e-06,
      "model_forward_time": 0.11524128913879395,
      "step": 1163
    },
    {
      "epoch": 7.098388671875e-06,
      "step": 1163,
      "training_step_time": 0.3694326877593994
    },
    {
      "epoch": 7.1044921875e-06,
      "model_forward_time": 0.11631131172180176,
      "step": 1164
    },
    {
      "epoch": 7.1044921875e-06,
      "step": 1164,
      "training_step_time": 0.46671247482299805
    },
    {
      "epoch": 7.110595703125e-06,
      "model_forward_time": 0.11519241333007812,
      "step": 1165
    },
    {
      "epoch": 7.110595703125e-06,
      "step": 1165,
      "training_step_time": 0.39738893508911133
    },
    {
      "epoch": 7.11669921875e-06,
      "model_forward_time": 0.1165013313293457,
      "step": 1166
    },
    {
      "epoch": 7.11669921875e-06,
      "step": 1166,
      "training_step_time": 0.3958313465118408
    },
    {
      "epoch": 7.122802734375e-06,
      "model_forward_time": 0.11519789695739746,
      "step": 1167
    },
    {
      "epoch": 7.122802734375e-06,
      "step": 1167,
      "training_step_time": 0.40741825103759766
    },
    {
      "epoch": 7.12890625e-06,
      "model_forward_time": 0.11623358726501465,
      "step": 1168
    },
    {
      "epoch": 7.12890625e-06,
      "step": 1168,
      "training_step_time": 0.4653937816619873
    },
    {
      "epoch": 7.135009765625e-06,
      "model_forward_time": 0.11552953720092773,
      "step": 1169
    },
    {
      "epoch": 7.135009765625e-06,
      "step": 1169,
      "training_step_time": 0.4527561664581299
    },
    {
      "epoch": 7.14111328125e-06,
      "grad_norm": 0.6205767393112183,
      "learning_rate": 3.9000000000000006e-05,
      "loss": 0.1725,
      "step": 1170
    },
    {
      "epoch": 7.14111328125e-06,
      "model_forward_time": 0.11516690254211426,
      "step": 1170
    },
    {
      "epoch": 7.14111328125e-06,
      "step": 1170,
      "training_step_time": 0.4040720462799072
    },
    {
      "epoch": 7.147216796875e-06,
      "model_forward_time": 0.11460185050964355,
      "step": 1171
    },
    {
      "epoch": 7.147216796875e-06,
      "step": 1171,
      "training_step_time": 0.391817569732666
    },
    {
      "epoch": 7.1533203125e-06,
      "model_forward_time": 0.11540818214416504,
      "step": 1172
    },
    {
      "epoch": 7.1533203125e-06,
      "step": 1172,
      "training_step_time": 0.39200592041015625
    },
    {
      "epoch": 7.159423828125e-06,
      "model_forward_time": 0.11506915092468262,
      "step": 1173
    },
    {
      "epoch": 7.159423828125e-06,
      "step": 1173,
      "training_step_time": 0.39421749114990234
    },
    {
      "epoch": 7.16552734375e-06,
      "model_forward_time": 0.1149294376373291,
      "step": 1174
    },
    {
      "epoch": 7.16552734375e-06,
      "step": 1174,
      "training_step_time": 0.38620471954345703
    },
    {
      "epoch": 7.171630859375e-06,
      "model_forward_time": 0.1154184341430664,
      "step": 1175
    },
    {
      "epoch": 7.171630859375e-06,
      "step": 1175,
      "training_step_time": 0.42575979232788086
    },
    {
      "epoch": 7.177734375e-06,
      "model_forward_time": 0.11480402946472168,
      "step": 1176
    },
    {
      "epoch": 7.177734375e-06,
      "step": 1176,
      "training_step_time": 0.4111051559448242
    },
    {
      "epoch": 7.183837890625e-06,
      "model_forward_time": 0.11467194557189941,
      "step": 1177
    },
    {
      "epoch": 7.183837890625e-06,
      "step": 1177,
      "training_step_time": 0.49785351753234863
    },
    {
      "epoch": 7.18994140625e-06,
      "model_forward_time": 0.11565637588500977,
      "step": 1178
    },
    {
      "epoch": 7.18994140625e-06,
      "step": 1178,
      "training_step_time": 0.36801624298095703
    },
    {
      "epoch": 7.196044921875e-06,
      "model_forward_time": 0.11564207077026367,
      "step": 1179
    },
    {
      "epoch": 7.196044921875e-06,
      "step": 1179,
      "training_step_time": 0.3942434787750244
    },
    {
      "epoch": 7.2021484375e-06,
      "grad_norm": 0.73110032081604,
      "learning_rate": 3.933333333333333e-05,
      "loss": 0.1657,
      "step": 1180
    },
    {
      "epoch": 7.2021484375e-06,
      "model_forward_time": 0.11558818817138672,
      "step": 1180
    },
    {
      "epoch": 7.2021484375e-06,
      "step": 1180,
      "training_step_time": 0.4152531623840332
    },
    {
      "epoch": 7.208251953125e-06,
      "model_forward_time": 0.11538910865783691,
      "step": 1181
    },
    {
      "epoch": 7.208251953125e-06,
      "step": 1181,
      "training_step_time": 0.3823263645172119
    },
    {
      "epoch": 7.21435546875e-06,
      "model_forward_time": 0.11567091941833496,
      "step": 1182
    },
    {
      "epoch": 7.21435546875e-06,
      "step": 1182,
      "training_step_time": 0.4095151424407959
    },
    {
      "epoch": 7.220458984375e-06,
      "model_forward_time": 0.1153414249420166,
      "step": 1183
    },
    {
      "epoch": 7.220458984375e-06,
      "step": 1183,
      "training_step_time": 0.4781758785247803
    },
    {
      "epoch": 7.2265625e-06,
      "model_forward_time": 0.11531257629394531,
      "step": 1184
    },
    {
      "epoch": 7.2265625e-06,
      "step": 1184,
      "training_step_time": 0.42758774757385254
    },
    {
      "epoch": 7.232666015625e-06,
      "model_forward_time": 0.1156008243560791,
      "step": 1185
    },
    {
      "epoch": 7.232666015625e-06,
      "step": 1185,
      "training_step_time": 0.3905599117279053
    },
    {
      "epoch": 7.23876953125e-06,
      "model_forward_time": 0.11482429504394531,
      "step": 1186
    },
    {
      "epoch": 7.23876953125e-06,
      "step": 1186,
      "training_step_time": 0.39171910285949707
    },
    {
      "epoch": 7.244873046875e-06,
      "model_forward_time": 0.11512231826782227,
      "step": 1187
    },
    {
      "epoch": 7.244873046875e-06,
      "step": 1187,
      "training_step_time": 0.39319276809692383
    },
    {
      "epoch": 7.2509765625e-06,
      "model_forward_time": 0.1150660514831543,
      "step": 1188
    },
    {
      "epoch": 7.2509765625e-06,
      "step": 1188,
      "training_step_time": 0.4198298454284668
    },
    {
      "epoch": 7.257080078125e-06,
      "model_forward_time": 0.11490130424499512,
      "step": 1189
    },
    {
      "epoch": 7.257080078125e-06,
      "step": 1189,
      "training_step_time": 0.4476754665374756
    },
    {
      "epoch": 7.26318359375e-06,
      "grad_norm": 0.8407962918281555,
      "learning_rate": 3.966666666666667e-05,
      "loss": 0.1691,
      "step": 1190
    },
    {
      "epoch": 7.26318359375e-06,
      "model_forward_time": 0.11531329154968262,
      "step": 1190
    },
    {
      "epoch": 7.26318359375e-06,
      "step": 1190,
      "training_step_time": 0.3951883316040039
    },
    {
      "epoch": 7.269287109375e-06,
      "model_forward_time": 0.1149590015411377,
      "step": 1191
    },
    {
      "epoch": 7.269287109375e-06,
      "step": 1191,
      "training_step_time": 0.4838399887084961
    },
    {
      "epoch": 7.275390625e-06,
      "model_forward_time": 0.11513066291809082,
      "step": 1192
    },
    {
      "epoch": 7.275390625e-06,
      "step": 1192,
      "training_step_time": 0.4770545959472656
    },
    {
      "epoch": 7.281494140625e-06,
      "model_forward_time": 0.11590313911437988,
      "step": 1193
    },
    {
      "epoch": 7.281494140625e-06,
      "step": 1193,
      "training_step_time": 0.473752498626709
    },
    {
      "epoch": 7.28759765625e-06,
      "model_forward_time": 0.11533093452453613,
      "step": 1194
    },
    {
      "epoch": 7.28759765625e-06,
      "step": 1194,
      "training_step_time": 0.4351189136505127
    },
    {
      "epoch": 7.293701171875e-06,
      "model_forward_time": 0.11469626426696777,
      "step": 1195
    },
    {
      "epoch": 7.293701171875e-06,
      "step": 1195,
      "training_step_time": 0.4023115634918213
    },
    {
      "epoch": 7.2998046875e-06,
      "model_forward_time": 0.11486005783081055,
      "step": 1196
    },
    {
      "epoch": 7.2998046875e-06,
      "step": 1196,
      "training_step_time": 0.43505239486694336
    },
    {
      "epoch": 7.305908203125e-06,
      "model_forward_time": 0.11548709869384766,
      "step": 1197
    },
    {
      "epoch": 7.305908203125e-06,
      "step": 1197,
      "training_step_time": 0.40651512145996094
    },
    {
      "epoch": 7.31201171875e-06,
      "model_forward_time": 0.115234375,
      "step": 1198
    },
    {
      "epoch": 7.31201171875e-06,
      "step": 1198,
      "training_step_time": 0.3911886215209961
    },
    {
      "epoch": 7.318115234375e-06,
      "model_forward_time": 0.11690235137939453,
      "step": 1199
    },
    {
      "epoch": 7.318115234375e-06,
      "step": 1199,
      "training_step_time": 0.39272403717041016
    },
    {
      "epoch": 7.32421875e-06,
      "grad_norm": 0.7564777731895447,
      "learning_rate": 4e-05,
      "loss": 0.1743,
      "step": 1200
    },
    {
      "epoch": 7.32421875e-06,
      "model_forward_time": 0.11467814445495605,
      "step": 1200
    },
    {
      "epoch": 7.32421875e-06,
      "step": 1200,
      "training_step_time": 0.39711475372314453
    },
    {
      "epoch": 7.330322265625e-06,
      "model_forward_time": 0.11589193344116211,
      "step": 1201
    },
    {
      "epoch": 7.330322265625e-06,
      "step": 1201,
      "training_step_time": 0.39394545555114746
    },
    {
      "epoch": 7.33642578125e-06,
      "model_forward_time": 0.11576056480407715,
      "step": 1202
    },
    {
      "epoch": 7.33642578125e-06,
      "step": 1202,
      "training_step_time": 0.527240514755249
    },
    {
      "epoch": 7.342529296875e-06,
      "model_forward_time": 0.11447310447692871,
      "step": 1203
    },
    {
      "epoch": 7.342529296875e-06,
      "step": 1203,
      "training_step_time": 0.4488542079925537
    },
    {
      "epoch": 7.3486328125e-06,
      "model_forward_time": 0.11551666259765625,
      "step": 1204
    },
    {
      "epoch": 7.3486328125e-06,
      "step": 1204,
      "training_step_time": 0.39281678199768066
    },
    {
      "epoch": 7.354736328125e-06,
      "model_forward_time": 0.11556148529052734,
      "step": 1205
    },
    {
      "epoch": 7.354736328125e-06,
      "step": 1205,
      "training_step_time": 0.3953878879547119
    },
    {
      "epoch": 7.36083984375e-06,
      "model_forward_time": 0.11513495445251465,
      "step": 1206
    },
    {
      "epoch": 7.36083984375e-06,
      "step": 1206,
      "training_step_time": 0.4387211799621582
    },
    {
      "epoch": 7.366943359375e-06,
      "model_forward_time": 0.11571598052978516,
      "step": 1207
    },
    {
      "epoch": 7.366943359375e-06,
      "step": 1207,
      "training_step_time": 0.41816091537475586
    },
    {
      "epoch": 7.373046875e-06,
      "model_forward_time": 0.11519312858581543,
      "step": 1208
    },
    {
      "epoch": 7.373046875e-06,
      "step": 1208,
      "training_step_time": 0.6336548328399658
    },
    {
      "epoch": 7.379150390625e-06,
      "model_forward_time": 0.11545944213867188,
      "step": 1209
    },
    {
      "epoch": 7.379150390625e-06,
      "step": 1209,
      "training_step_time": 0.5467319488525391
    },
    {
      "epoch": 7.38525390625e-06,
      "grad_norm": 0.7832808494567871,
      "learning_rate": 4.0333333333333336e-05,
      "loss": 0.1728,
      "step": 1210
    },
    {
      "epoch": 7.38525390625e-06,
      "model_forward_time": 0.11440539360046387,
      "step": 1210
    },
    {
      "epoch": 7.38525390625e-06,
      "step": 1210,
      "training_step_time": 0.42884254455566406
    },
    {
      "epoch": 7.391357421875e-06,
      "model_forward_time": 0.1148221492767334,
      "step": 1211
    },
    {
      "epoch": 7.391357421875e-06,
      "step": 1211,
      "training_step_time": 0.39890575408935547
    },
    {
      "epoch": 7.3974609375e-06,
      "model_forward_time": 0.11415457725524902,
      "step": 1212
    },
    {
      "epoch": 7.3974609375e-06,
      "step": 1212,
      "training_step_time": 0.4930877685546875
    },
    {
      "epoch": 7.403564453125e-06,
      "model_forward_time": 0.11452364921569824,
      "step": 1213
    },
    {
      "epoch": 7.403564453125e-06,
      "step": 1213,
      "training_step_time": 0.3987298011779785
    },
    {
      "epoch": 7.40966796875e-06,
      "model_forward_time": 0.11483073234558105,
      "step": 1214
    },
    {
      "epoch": 7.40966796875e-06,
      "step": 1214,
      "training_step_time": 0.4100682735443115
    },
    {
      "epoch": 7.415771484375e-06,
      "model_forward_time": 0.11477804183959961,
      "step": 1215
    },
    {
      "epoch": 7.415771484375e-06,
      "step": 1215,
      "training_step_time": 0.43980908393859863
    },
    {
      "epoch": 7.421875e-06,
      "model_forward_time": 0.11489224433898926,
      "step": 1216
    },
    {
      "epoch": 7.421875e-06,
      "step": 1216,
      "training_step_time": 0.4314918518066406
    },
    {
      "epoch": 7.427978515625e-06,
      "model_forward_time": 0.11566567420959473,
      "step": 1217
    },
    {
      "epoch": 7.427978515625e-06,
      "step": 1217,
      "training_step_time": 0.39628124237060547
    },
    {
      "epoch": 7.43408203125e-06,
      "model_forward_time": 0.11537551879882812,
      "step": 1218
    },
    {
      "epoch": 7.43408203125e-06,
      "step": 1218,
      "training_step_time": 0.3923485279083252
    },
    {
      "epoch": 7.440185546875e-06,
      "model_forward_time": 0.11491107940673828,
      "step": 1219
    },
    {
      "epoch": 7.440185546875e-06,
      "step": 1219,
      "training_step_time": 0.40149784088134766
    },
    {
      "epoch": 7.4462890625e-06,
      "grad_norm": 0.7816764712333679,
      "learning_rate": 4.066666666666667e-05,
      "loss": 0.1605,
      "step": 1220
    },
    {
      "epoch": 7.4462890625e-06,
      "model_forward_time": 0.11541152000427246,
      "step": 1220
    },
    {
      "epoch": 7.4462890625e-06,
      "step": 1220,
      "training_step_time": 0.4459569454193115
    },
    {
      "epoch": 7.452392578125e-06,
      "model_forward_time": 0.11554765701293945,
      "step": 1221
    },
    {
      "epoch": 7.452392578125e-06,
      "step": 1221,
      "training_step_time": 0.4795713424682617
    },
    {
      "epoch": 7.45849609375e-06,
      "model_forward_time": 0.11464834213256836,
      "step": 1222
    },
    {
      "epoch": 7.45849609375e-06,
      "step": 1222,
      "training_step_time": 0.42885661125183105
    },
    {
      "epoch": 7.464599609375e-06,
      "model_forward_time": 0.11668848991394043,
      "step": 1223
    },
    {
      "epoch": 7.464599609375e-06,
      "step": 1223,
      "training_step_time": 0.40541696548461914
    },
    {
      "epoch": 7.470703125e-06,
      "model_forward_time": 0.11602544784545898,
      "step": 1224
    },
    {
      "epoch": 7.470703125e-06,
      "step": 1224,
      "training_step_time": 0.5009636878967285
    },
    {
      "epoch": 7.476806640625e-06,
      "model_forward_time": 0.11499762535095215,
      "step": 1225
    },
    {
      "epoch": 7.476806640625e-06,
      "step": 1225,
      "training_step_time": 0.3993360996246338
    },
    {
      "epoch": 7.48291015625e-06,
      "model_forward_time": 0.11684441566467285,
      "step": 1226
    },
    {
      "epoch": 7.48291015625e-06,
      "step": 1226,
      "training_step_time": 0.4123103618621826
    },
    {
      "epoch": 7.489013671875e-06,
      "model_forward_time": 0.11546158790588379,
      "step": 1227
    },
    {
      "epoch": 7.489013671875e-06,
      "step": 1227,
      "training_step_time": 0.3906402587890625
    },
    {
      "epoch": 7.4951171875e-06,
      "model_forward_time": 0.11518430709838867,
      "step": 1228
    },
    {
      "epoch": 7.4951171875e-06,
      "step": 1228,
      "training_step_time": 0.3926835060119629
    },
    {
      "epoch": 7.501220703125e-06,
      "model_forward_time": 0.11544656753540039,
      "step": 1229
    },
    {
      "epoch": 7.501220703125e-06,
      "step": 1229,
      "training_step_time": 0.5904548168182373
    },
    {
      "epoch": 7.50732421875e-06,
      "grad_norm": 0.7419109344482422,
      "learning_rate": 4.1e-05,
      "loss": 0.1707,
      "step": 1230
    },
    {
      "epoch": 7.50732421875e-06,
      "model_forward_time": 0.114898681640625,
      "step": 1230
    },
    {
      "epoch": 7.50732421875e-06,
      "step": 1230,
      "training_step_time": 0.4038071632385254
    },
    {
      "epoch": 7.513427734375e-06,
      "model_forward_time": 0.11571097373962402,
      "step": 1231
    },
    {
      "epoch": 7.513427734375e-06,
      "step": 1231,
      "training_step_time": 0.39194655418395996
    },
    {
      "epoch": 7.51953125e-06,
      "model_forward_time": 0.11536860466003418,
      "step": 1232
    },
    {
      "epoch": 7.51953125e-06,
      "step": 1232,
      "training_step_time": 0.3930075168609619
    },
    {
      "epoch": 7.525634765625e-06,
      "model_forward_time": 0.1143805980682373,
      "step": 1233
    },
    {
      "epoch": 7.525634765625e-06,
      "step": 1233,
      "training_step_time": 0.395963191986084
    },
    {
      "epoch": 7.53173828125e-06,
      "model_forward_time": 0.11520671844482422,
      "step": 1234
    },
    {
      "epoch": 7.53173828125e-06,
      "step": 1234,
      "training_step_time": 0.3922388553619385
    },
    {
      "epoch": 7.537841796875e-06,
      "model_forward_time": 0.11505270004272461,
      "step": 1235
    },
    {
      "epoch": 7.537841796875e-06,
      "step": 1235,
      "training_step_time": 0.4526844024658203
    },
    {
      "epoch": 7.5439453125e-06,
      "model_forward_time": 0.11555814743041992,
      "step": 1236
    },
    {
      "epoch": 7.5439453125e-06,
      "step": 1236,
      "training_step_time": 0.37662625312805176
    },
    {
      "epoch": 7.550048828125e-06,
      "model_forward_time": 0.11513566970825195,
      "step": 1237
    },
    {
      "epoch": 7.550048828125e-06,
      "step": 1237,
      "training_step_time": 0.47516560554504395
    },
    {
      "epoch": 7.55615234375e-06,
      "model_forward_time": 0.11557555198669434,
      "step": 1238
    },
    {
      "epoch": 7.55615234375e-06,
      "step": 1238,
      "training_step_time": 0.44618988037109375
    },
    {
      "epoch": 7.562255859375e-06,
      "model_forward_time": 0.11535310745239258,
      "step": 1239
    },
    {
      "epoch": 7.562255859375e-06,
      "step": 1239,
      "training_step_time": 0.5367350578308105
    },
    {
      "epoch": 7.568359375e-06,
      "grad_norm": 0.7105250358581543,
      "learning_rate": 4.133333333333333e-05,
      "loss": 0.1634,
      "step": 1240
    },
    {
      "epoch": 7.568359375e-06,
      "model_forward_time": 0.11478209495544434,
      "step": 1240
    },
    {
      "epoch": 7.568359375e-06,
      "step": 1240,
      "training_step_time": 0.4489479064941406
    },
    {
      "epoch": 7.574462890625e-06,
      "model_forward_time": 0.11552691459655762,
      "step": 1241
    },
    {
      "epoch": 7.574462890625e-06,
      "step": 1241,
      "training_step_time": 0.4392387866973877
    },
    {
      "epoch": 7.58056640625e-06,
      "model_forward_time": 0.11469006538391113,
      "step": 1242
    },
    {
      "epoch": 7.58056640625e-06,
      "step": 1242,
      "training_step_time": 0.3991208076477051
    },
    {
      "epoch": 7.586669921875e-06,
      "model_forward_time": 0.11534905433654785,
      "step": 1243
    },
    {
      "epoch": 7.586669921875e-06,
      "step": 1243,
      "training_step_time": 0.3976304531097412
    },
    {
      "epoch": 7.5927734375e-06,
      "model_forward_time": 0.11506772041320801,
      "step": 1244
    },
    {
      "epoch": 7.5927734375e-06,
      "step": 1244,
      "training_step_time": 0.39360499382019043
    },
    {
      "epoch": 7.598876953125e-06,
      "model_forward_time": 0.11601686477661133,
      "step": 1245
    },
    {
      "epoch": 7.598876953125e-06,
      "step": 1245,
      "training_step_time": 0.39349889755249023
    },
    {
      "epoch": 7.60498046875e-06,
      "model_forward_time": 0.11521434783935547,
      "step": 1246
    },
    {
      "epoch": 7.60498046875e-06,
      "step": 1246,
      "training_step_time": 0.3921194076538086
    },
    {
      "epoch": 7.611083984375e-06,
      "model_forward_time": 0.11576700210571289,
      "step": 1247
    },
    {
      "epoch": 7.611083984375e-06,
      "step": 1247,
      "training_step_time": 0.7390682697296143
    },
    {
      "epoch": 7.6171875e-06,
      "model_forward_time": 0.11455035209655762,
      "step": 1248
    },
    {
      "epoch": 7.6171875e-06,
      "step": 1248,
      "training_step_time": 0.43691515922546387
    },
    {
      "epoch": 7.623291015625e-06,
      "model_forward_time": 0.11551046371459961,
      "step": 1249
    },
    {
      "epoch": 7.623291015625e-06,
      "step": 1249,
      "training_step_time": 0.41979122161865234
    },
    {
      "epoch": 7.62939453125e-06,
      "grad_norm": 0.8034347295761108,
      "learning_rate": 4.166666666666667e-05,
      "loss": 0.1766,
      "step": 1250
    },
    {
      "epoch": 7.62939453125e-06,
      "model_forward_time": 0.11538910865783691,
      "step": 1250
    },
    {
      "epoch": 7.62939453125e-06,
      "step": 1250,
      "training_step_time": 0.45081186294555664
    },
    {
      "epoch": 7.635498046875e-06,
      "model_forward_time": 0.11433982849121094,
      "step": 1251
    },
    {
      "epoch": 7.635498046875e-06,
      "step": 1251,
      "training_step_time": 0.4044160842895508
    },
    {
      "epoch": 7.6416015625e-06,
      "model_forward_time": 0.11478209495544434,
      "step": 1252
    },
    {
      "epoch": 7.6416015625e-06,
      "step": 1252,
      "training_step_time": 0.48001956939697266
    },
    {
      "epoch": 7.647705078125e-06,
      "model_forward_time": 0.11441326141357422,
      "step": 1253
    },
    {
      "epoch": 7.647705078125e-06,
      "step": 1253,
      "training_step_time": 0.7264089584350586
    },
    {
      "epoch": 7.65380859375e-06,
      "model_forward_time": 0.11481356620788574,
      "step": 1254
    },
    {
      "epoch": 7.65380859375e-06,
      "step": 1254,
      "training_step_time": 0.40602898597717285
    },
    {
      "epoch": 7.659912109375e-06,
      "model_forward_time": 0.1150217056274414,
      "step": 1255
    },
    {
      "epoch": 7.659912109375e-06,
      "step": 1255,
      "training_step_time": 0.4226405620574951
    },
    {
      "epoch": 7.666015625e-06,
      "model_forward_time": 0.1151888370513916,
      "step": 1256
    },
    {
      "epoch": 7.666015625e-06,
      "step": 1256,
      "training_step_time": 0.4079780578613281
    },
    {
      "epoch": 7.672119140625e-06,
      "model_forward_time": 0.11499762535095215,
      "step": 1257
    },
    {
      "epoch": 7.672119140625e-06,
      "step": 1257,
      "training_step_time": 0.4006073474884033
    },
    {
      "epoch": 7.67822265625e-06,
      "model_forward_time": 0.11412811279296875,
      "step": 1258
    },
    {
      "epoch": 7.67822265625e-06,
      "step": 1258,
      "training_step_time": 0.38584065437316895
    },
    {
      "epoch": 7.684326171875e-06,
      "model_forward_time": 0.11559128761291504,
      "step": 1259
    },
    {
      "epoch": 7.684326171875e-06,
      "step": 1259,
      "training_step_time": 0.5461585521697998
    },
    {
      "epoch": 7.6904296875e-06,
      "grad_norm": 0.8935962319374084,
      "learning_rate": 4.2e-05,
      "loss": 0.1645,
      "step": 1260
    },
    {
      "epoch": 7.6904296875e-06,
      "model_forward_time": 0.11509847640991211,
      "step": 1260
    },
    {
      "epoch": 7.6904296875e-06,
      "step": 1260,
      "training_step_time": 0.3973574638366699
    },
    {
      "epoch": 7.696533203125e-06,
      "model_forward_time": 0.11516118049621582,
      "step": 1261
    },
    {
      "epoch": 7.696533203125e-06,
      "step": 1261,
      "training_step_time": 0.3944840431213379
    },
    {
      "epoch": 7.70263671875e-06,
      "model_forward_time": 0.1153099536895752,
      "step": 1262
    },
    {
      "epoch": 7.70263671875e-06,
      "step": 1262,
      "training_step_time": 0.4147484302520752
    },
    {
      "epoch": 7.708740234375e-06,
      "model_forward_time": 0.11563706398010254,
      "step": 1263
    },
    {
      "epoch": 7.708740234375e-06,
      "step": 1263,
      "training_step_time": 0.4469790458679199
    },
    {
      "epoch": 7.71484375e-06,
      "model_forward_time": 0.11605262756347656,
      "step": 1264
    },
    {
      "epoch": 7.71484375e-06,
      "step": 1264,
      "training_step_time": 0.40889620780944824
    },
    {
      "epoch": 7.720947265625e-06,
      "model_forward_time": 0.11513137817382812,
      "step": 1265
    },
    {
      "epoch": 7.720947265625e-06,
      "step": 1265,
      "training_step_time": 0.650850772857666
    },
    {
      "epoch": 7.72705078125e-06,
      "model_forward_time": 0.11538195610046387,
      "step": 1266
    },
    {
      "epoch": 7.72705078125e-06,
      "step": 1266,
      "training_step_time": 0.494368314743042
    },
    {
      "epoch": 7.733154296875e-06,
      "model_forward_time": 0.11414718627929688,
      "step": 1267
    },
    {
      "epoch": 7.733154296875e-06,
      "step": 1267,
      "training_step_time": 0.46417951583862305
    },
    {
      "epoch": 7.7392578125e-06,
      "model_forward_time": 0.1148841381072998,
      "step": 1268
    },
    {
      "epoch": 7.7392578125e-06,
      "step": 1268,
      "training_step_time": 0.4285435676574707
    },
    {
      "epoch": 7.745361328125e-06,
      "model_forward_time": 0.1148688793182373,
      "step": 1269
    },
    {
      "epoch": 7.745361328125e-06,
      "step": 1269,
      "training_step_time": 0.38616299629211426
    },
    {
      "epoch": 7.75146484375e-06,
      "grad_norm": 0.6239719986915588,
      "learning_rate": 4.233333333333334e-05,
      "loss": 0.1735,
      "step": 1270
    },
    {
      "epoch": 7.75146484375e-06,
      "model_forward_time": 0.11512374877929688,
      "step": 1270
    },
    {
      "epoch": 7.75146484375e-06,
      "step": 1270,
      "training_step_time": 0.38739466667175293
    },
    {
      "epoch": 7.757568359375e-06,
      "model_forward_time": 0.11568737030029297,
      "step": 1271
    },
    {
      "epoch": 7.757568359375e-06,
      "step": 1271,
      "training_step_time": 0.39150547981262207
    },
    {
      "epoch": 7.763671875e-06,
      "model_forward_time": 0.11523914337158203,
      "step": 1272
    },
    {
      "epoch": 7.763671875e-06,
      "step": 1272,
      "training_step_time": 0.3987443447113037
    },
    {
      "epoch": 7.769775390625e-06,
      "model_forward_time": 0.11487126350402832,
      "step": 1273
    },
    {
      "epoch": 7.769775390625e-06,
      "step": 1273,
      "training_step_time": 0.41489291191101074
    },
    {
      "epoch": 7.77587890625e-06,
      "model_forward_time": 0.1152498722076416,
      "step": 1274
    },
    {
      "epoch": 7.77587890625e-06,
      "step": 1274,
      "training_step_time": 0.3963358402252197
    },
    {
      "epoch": 7.781982421875e-06,
      "model_forward_time": 0.11684894561767578,
      "step": 1275
    },
    {
      "epoch": 7.781982421875e-06,
      "step": 1275,
      "training_step_time": 0.4062204360961914
    },
    {
      "epoch": 7.7880859375e-06,
      "model_forward_time": 0.11518096923828125,
      "step": 1276
    },
    {
      "epoch": 7.7880859375e-06,
      "step": 1276,
      "training_step_time": 0.4064669609069824
    },
    {
      "epoch": 7.794189453125e-06,
      "model_forward_time": 0.1153414249420166,
      "step": 1277
    },
    {
      "epoch": 7.794189453125e-06,
      "step": 1277,
      "training_step_time": 0.4705502986907959
    },
    {
      "epoch": 7.80029296875e-06,
      "model_forward_time": 0.11488127708435059,
      "step": 1278
    },
    {
      "epoch": 7.80029296875e-06,
      "step": 1278,
      "training_step_time": 0.47042059898376465
    },
    {
      "epoch": 7.806396484375e-06,
      "model_forward_time": 0.1151890754699707,
      "step": 1279
    },
    {
      "epoch": 7.806396484375e-06,
      "step": 1279,
      "training_step_time": 0.37293362617492676
    },
    {
      "epoch": 7.8125e-06,
      "grad_norm": 0.6374793648719788,
      "learning_rate": 4.266666666666667e-05,
      "loss": 0.1693,
      "step": 1280
    },
    {
      "epoch": 7.8125e-06,
      "model_forward_time": 0.11547684669494629,
      "step": 1280
    },
    {
      "epoch": 7.8125e-06,
      "step": 1280,
      "training_step_time": 0.4864318370819092
    },
    {
      "epoch": 7.818603515625e-06,
      "model_forward_time": 0.1148374080657959,
      "step": 1281
    },
    {
      "epoch": 7.818603515625e-06,
      "step": 1281,
      "training_step_time": 0.5042605400085449
    },
    {
      "epoch": 7.82470703125e-06,
      "model_forward_time": 0.11467385292053223,
      "step": 1282
    },
    {
      "epoch": 7.82470703125e-06,
      "step": 1282,
      "training_step_time": 0.41266942024230957
    },
    {
      "epoch": 7.830810546875e-06,
      "model_forward_time": 0.11502289772033691,
      "step": 1283
    },
    {
      "epoch": 7.830810546875e-06,
      "step": 1283,
      "training_step_time": 0.4006023406982422
    },
    {
      "epoch": 7.8369140625e-06,
      "model_forward_time": 0.11515998840332031,
      "step": 1284
    },
    {
      "epoch": 7.8369140625e-06,
      "step": 1284,
      "training_step_time": 0.39290547370910645
    },
    {
      "epoch": 7.843017578125e-06,
      "model_forward_time": 0.1151282787322998,
      "step": 1285
    },
    {
      "epoch": 7.843017578125e-06,
      "step": 1285,
      "training_step_time": 0.3934509754180908
    },
    {
      "epoch": 7.84912109375e-06,
      "model_forward_time": 0.11574554443359375,
      "step": 1286
    },
    {
      "epoch": 7.84912109375e-06,
      "step": 1286,
      "training_step_time": 0.3910961151123047
    },
    {
      "epoch": 7.855224609375e-06,
      "model_forward_time": 0.11521625518798828,
      "step": 1287
    },
    {
      "epoch": 7.855224609375e-06,
      "step": 1287,
      "training_step_time": 0.40825676918029785
    },
    {
      "epoch": 7.861328125e-06,
      "model_forward_time": 0.11530470848083496,
      "step": 1288
    },
    {
      "epoch": 7.861328125e-06,
      "step": 1288,
      "training_step_time": 0.4009084701538086
    },
    {
      "epoch": 7.867431640625e-06,
      "model_forward_time": 0.11597061157226562,
      "step": 1289
    },
    {
      "epoch": 7.867431640625e-06,
      "step": 1289,
      "training_step_time": 0.3975238800048828
    },
    {
      "epoch": 7.87353515625e-06,
      "grad_norm": 0.5592852234840393,
      "learning_rate": 4.3e-05,
      "loss": 0.1773,
      "step": 1290
    },
    {
      "epoch": 7.87353515625e-06,
      "model_forward_time": 0.11573648452758789,
      "step": 1290
    },
    {
      "epoch": 7.87353515625e-06,
      "step": 1290,
      "training_step_time": 0.39718151092529297
    },
    {
      "epoch": 7.879638671875e-06,
      "model_forward_time": 0.11474990844726562,
      "step": 1291
    },
    {
      "epoch": 7.879638671875e-06,
      "step": 1291,
      "training_step_time": 0.4044344425201416
    },
    {
      "epoch": 7.8857421875e-06,
      "model_forward_time": 0.11549162864685059,
      "step": 1292
    },
    {
      "epoch": 7.8857421875e-06,
      "step": 1292,
      "training_step_time": 0.45020580291748047
    },
    {
      "epoch": 7.891845703125e-06,
      "model_forward_time": 0.11591792106628418,
      "step": 1293
    },
    {
      "epoch": 7.891845703125e-06,
      "step": 1293,
      "training_step_time": 0.4165318012237549
    },
    {
      "epoch": 7.89794921875e-06,
      "model_forward_time": 0.11554598808288574,
      "step": 1294
    },
    {
      "epoch": 7.89794921875e-06,
      "step": 1294,
      "training_step_time": 0.3700387477874756
    },
    {
      "epoch": 7.904052734375e-06,
      "model_forward_time": 0.11493539810180664,
      "step": 1295
    },
    {
      "epoch": 7.904052734375e-06,
      "step": 1295,
      "training_step_time": 0.4190986156463623
    },
    {
      "epoch": 7.91015625e-06,
      "model_forward_time": 0.11710143089294434,
      "step": 1296
    },
    {
      "epoch": 7.91015625e-06,
      "step": 1296,
      "training_step_time": 0.48522233963012695
    },
    {
      "epoch": 7.916259765625e-06,
      "model_forward_time": 0.11469626426696777,
      "step": 1297
    },
    {
      "epoch": 7.916259765625e-06,
      "step": 1297,
      "training_step_time": 0.4609072208404541
    },
    {
      "epoch": 7.92236328125e-06,
      "model_forward_time": 0.1144716739654541,
      "step": 1298
    },
    {
      "epoch": 7.92236328125e-06,
      "step": 1298,
      "training_step_time": 0.4069700241088867
    },
    {
      "epoch": 7.928466796875e-06,
      "model_forward_time": 0.11493825912475586,
      "step": 1299
    },
    {
      "epoch": 7.928466796875e-06,
      "step": 1299,
      "training_step_time": 0.39900970458984375
    },
    {
      "epoch": 7.9345703125e-06,
      "grad_norm": 0.8232478499412537,
      "learning_rate": 4.3333333333333334e-05,
      "loss": 0.1667,
      "step": 1300
    },
    {
      "epoch": 7.9345703125e-06,
      "model_forward_time": 0.11521363258361816,
      "step": 1300
    },
    {
      "epoch": 7.9345703125e-06,
      "step": 1300,
      "training_step_time": 0.3949086666107178
    },
    {
      "epoch": 7.940673828125e-06,
      "model_forward_time": 0.11531472206115723,
      "step": 1301
    },
    {
      "epoch": 7.940673828125e-06,
      "step": 1301,
      "training_step_time": 0.39579319953918457
    },
    {
      "epoch": 7.94677734375e-06,
      "model_forward_time": 0.11543774604797363,
      "step": 1302
    },
    {
      "epoch": 7.94677734375e-06,
      "step": 1302,
      "training_step_time": 0.40082597732543945
    },
    {
      "epoch": 7.952880859375e-06,
      "model_forward_time": 0.11488056182861328,
      "step": 1303
    },
    {
      "epoch": 7.952880859375e-06,
      "step": 1303,
      "training_step_time": 0.4007909297943115
    },
    {
      "epoch": 7.958984375e-06,
      "model_forward_time": 0.11556077003479004,
      "step": 1304
    },
    {
      "epoch": 7.958984375e-06,
      "step": 1304,
      "training_step_time": 0.39730095863342285
    },
    {
      "epoch": 7.965087890625e-06,
      "model_forward_time": 0.11515474319458008,
      "step": 1305
    },
    {
      "epoch": 7.965087890625e-06,
      "step": 1305,
      "training_step_time": 0.419649600982666
    },
    {
      "epoch": 7.97119140625e-06,
      "model_forward_time": 0.11601877212524414,
      "step": 1306
    },
    {
      "epoch": 7.97119140625e-06,
      "step": 1306,
      "training_step_time": 0.4004032611846924
    },
    {
      "epoch": 7.977294921875e-06,
      "model_forward_time": 0.11528205871582031,
      "step": 1307
    },
    {
      "epoch": 7.977294921875e-06,
      "step": 1307,
      "training_step_time": 0.48099684715270996
    },
    {
      "epoch": 7.9833984375e-06,
      "model_forward_time": 0.11461329460144043,
      "step": 1308
    },
    {
      "epoch": 7.9833984375e-06,
      "step": 1308,
      "training_step_time": 0.5013775825500488
    },
    {
      "epoch": 7.989501953125e-06,
      "model_forward_time": 0.11567807197570801,
      "step": 1309
    },
    {
      "epoch": 7.989501953125e-06,
      "step": 1309,
      "training_step_time": 0.4380619525909424
    },
    {
      "epoch": 7.99560546875e-06,
      "grad_norm": 0.7374643087387085,
      "learning_rate": 4.3666666666666666e-05,
      "loss": 0.1691,
      "step": 1310
    },
    {
      "epoch": 7.99560546875e-06,
      "model_forward_time": 0.11599159240722656,
      "step": 1310
    },
    {
      "epoch": 7.99560546875e-06,
      "step": 1310,
      "training_step_time": 0.494504451751709
    },
    {
      "epoch": 8.001708984375e-06,
      "model_forward_time": 0.11615347862243652,
      "step": 1311
    },
    {
      "epoch": 8.001708984375e-06,
      "step": 1311,
      "training_step_time": 0.41882872581481934
    },
    {
      "epoch": 8.0078125e-06,
      "model_forward_time": 0.11415386199951172,
      "step": 1312
    },
    {
      "epoch": 8.0078125e-06,
      "step": 1312,
      "training_step_time": 0.4840550422668457
    },
    {
      "epoch": 8.013916015625e-06,
      "model_forward_time": 0.1151275634765625,
      "step": 1313
    },
    {
      "epoch": 8.013916015625e-06,
      "step": 1313,
      "training_step_time": 0.392805814743042
    },
    {
      "epoch": 8.02001953125e-06,
      "model_forward_time": 0.11513376235961914,
      "step": 1314
    },
    {
      "epoch": 8.02001953125e-06,
      "step": 1314,
      "training_step_time": 0.3801414966583252
    },
    {
      "epoch": 8.026123046875e-06,
      "model_forward_time": 0.114654541015625,
      "step": 1315
    },
    {
      "epoch": 8.026123046875e-06,
      "step": 1315,
      "training_step_time": 0.39557313919067383
    },
    {
      "epoch": 8.0322265625e-06,
      "model_forward_time": 0.11487603187561035,
      "step": 1316
    },
    {
      "epoch": 8.0322265625e-06,
      "step": 1316,
      "training_step_time": 0.39962053298950195
    },
    {
      "epoch": 8.038330078125e-06,
      "model_forward_time": 0.11523747444152832,
      "step": 1317
    },
    {
      "epoch": 8.038330078125e-06,
      "step": 1317,
      "training_step_time": 0.40201854705810547
    },
    {
      "epoch": 8.04443359375e-06,
      "model_forward_time": 0.11554980278015137,
      "step": 1318
    },
    {
      "epoch": 8.04443359375e-06,
      "step": 1318,
      "training_step_time": 0.39690446853637695
    },
    {
      "epoch": 8.050537109375e-06,
      "model_forward_time": 0.11508464813232422,
      "step": 1319
    },
    {
      "epoch": 8.050537109375e-06,
      "step": 1319,
      "training_step_time": 0.40001583099365234
    },
    {
      "epoch": 8.056640625e-06,
      "grad_norm": 0.8675338625907898,
      "learning_rate": 4.4000000000000006e-05,
      "loss": 0.1749,
      "step": 1320
    },
    {
      "epoch": 8.056640625e-06,
      "model_forward_time": 0.11539435386657715,
      "step": 1320
    },
    {
      "epoch": 8.056640625e-06,
      "step": 1320,
      "training_step_time": 0.38598084449768066
    },
    {
      "epoch": 8.062744140625e-06,
      "model_forward_time": 0.11606240272521973,
      "step": 1321
    },
    {
      "epoch": 8.062744140625e-06,
      "step": 1321,
      "training_step_time": 0.40241289138793945
    },
    {
      "epoch": 8.06884765625e-06,
      "model_forward_time": 0.11548256874084473,
      "step": 1322
    },
    {
      "epoch": 8.06884765625e-06,
      "step": 1322,
      "training_step_time": 0.4410891532897949
    },
    {
      "epoch": 8.074951171875e-06,
      "model_forward_time": 0.11545372009277344,
      "step": 1323
    },
    {
      "epoch": 8.074951171875e-06,
      "step": 1323,
      "training_step_time": 0.44417452812194824
    },
    {
      "epoch": 8.0810546875e-06,
      "model_forward_time": 0.1151580810546875,
      "step": 1324
    },
    {
      "epoch": 8.0810546875e-06,
      "step": 1324,
      "training_step_time": 0.4180119037628174
    },
    {
      "epoch": 8.087158203125e-06,
      "model_forward_time": 0.11504626274108887,
      "step": 1325
    },
    {
      "epoch": 8.087158203125e-06,
      "step": 1325,
      "training_step_time": 0.44541430473327637
    },
    {
      "epoch": 8.09326171875e-06,
      "model_forward_time": 0.11552667617797852,
      "step": 1326
    },
    {
      "epoch": 8.09326171875e-06,
      "step": 1326,
      "training_step_time": 0.4839332103729248
    },
    {
      "epoch": 8.099365234375e-06,
      "model_forward_time": 0.11505961418151855,
      "step": 1327
    },
    {
      "epoch": 8.099365234375e-06,
      "step": 1327,
      "training_step_time": 0.4677104949951172
    },
    {
      "epoch": 8.10546875e-06,
      "model_forward_time": 0.11481833457946777,
      "step": 1328
    },
    {
      "epoch": 8.10546875e-06,
      "step": 1328,
      "training_step_time": 0.3955819606781006
    },
    {
      "epoch": 8.111572265625e-06,
      "model_forward_time": 0.11533784866333008,
      "step": 1329
    },
    {
      "epoch": 8.111572265625e-06,
      "step": 1329,
      "training_step_time": 0.3947124481201172
    },
    {
      "epoch": 8.11767578125e-06,
      "grad_norm": 0.6675403714179993,
      "learning_rate": 4.433333333333334e-05,
      "loss": 0.1543,
      "step": 1330
    },
    {
      "epoch": 8.11767578125e-06,
      "model_forward_time": 0.11514687538146973,
      "step": 1330
    },
    {
      "epoch": 8.11767578125e-06,
      "step": 1330,
      "training_step_time": 0.39127159118652344
    },
    {
      "epoch": 8.123779296875e-06,
      "model_forward_time": 0.11569070816040039,
      "step": 1331
    },
    {
      "epoch": 8.123779296875e-06,
      "step": 1331,
      "training_step_time": 0.3920176029205322
    },
    {
      "epoch": 8.1298828125e-06,
      "model_forward_time": 0.11509919166564941,
      "step": 1332
    },
    {
      "epoch": 8.1298828125e-06,
      "step": 1332,
      "training_step_time": 0.40317296981811523
    },
    {
      "epoch": 8.135986328125e-06,
      "model_forward_time": 0.11516499519348145,
      "step": 1333
    },
    {
      "epoch": 8.135986328125e-06,
      "step": 1333,
      "training_step_time": 0.3965420722961426
    },
    {
      "epoch": 8.14208984375e-06,
      "model_forward_time": 0.11559700965881348,
      "step": 1334
    },
    {
      "epoch": 8.14208984375e-06,
      "step": 1334,
      "training_step_time": 0.39336085319519043
    },
    {
      "epoch": 8.148193359375e-06,
      "model_forward_time": 0.11585235595703125,
      "step": 1335
    },
    {
      "epoch": 8.148193359375e-06,
      "step": 1335,
      "training_step_time": 0.4353921413421631
    },
    {
      "epoch": 8.154296875e-06,
      "model_forward_time": 0.11554288864135742,
      "step": 1336
    },
    {
      "epoch": 8.154296875e-06,
      "step": 1336,
      "training_step_time": 0.4274864196777344
    },
    {
      "epoch": 8.160400390625e-06,
      "model_forward_time": 0.11519837379455566,
      "step": 1337
    },
    {
      "epoch": 8.160400390625e-06,
      "step": 1337,
      "training_step_time": 0.4202854633331299
    },
    {
      "epoch": 8.16650390625e-06,
      "model_forward_time": 0.11582684516906738,
      "step": 1338
    },
    {
      "epoch": 8.16650390625e-06,
      "step": 1338,
      "training_step_time": 0.45694875717163086
    },
    {
      "epoch": 8.172607421875e-06,
      "model_forward_time": 0.11557531356811523,
      "step": 1339
    },
    {
      "epoch": 8.172607421875e-06,
      "step": 1339,
      "training_step_time": 0.43560028076171875
    },
    {
      "epoch": 8.1787109375e-06,
      "grad_norm": 0.9887769222259521,
      "learning_rate": 4.466666666666667e-05,
      "loss": 0.1647,
      "step": 1340
    },
    {
      "epoch": 8.1787109375e-06,
      "model_forward_time": 0.11560511589050293,
      "step": 1340
    },
    {
      "epoch": 8.1787109375e-06,
      "step": 1340,
      "training_step_time": 0.42524123191833496
    },
    {
      "epoch": 8.184814453125e-06,
      "model_forward_time": 0.11520600318908691,
      "step": 1341
    },
    {
      "epoch": 8.184814453125e-06,
      "step": 1341,
      "training_step_time": 0.4857659339904785
    },
    {
      "epoch": 8.19091796875e-06,
      "model_forward_time": 0.11489248275756836,
      "step": 1342
    },
    {
      "epoch": 8.19091796875e-06,
      "step": 1342,
      "training_step_time": 0.4209568500518799
    },
    {
      "epoch": 8.197021484375e-06,
      "model_forward_time": 0.11472558975219727,
      "step": 1343
    },
    {
      "epoch": 8.197021484375e-06,
      "step": 1343,
      "training_step_time": 0.39816975593566895
    },
    {
      "epoch": 8.203125e-06,
      "model_forward_time": 0.11563658714294434,
      "step": 1344
    },
    {
      "epoch": 8.203125e-06,
      "step": 1344,
      "training_step_time": 0.3974735736846924
    },
    {
      "epoch": 8.209228515625e-06,
      "model_forward_time": 0.11538457870483398,
      "step": 1345
    },
    {
      "epoch": 8.209228515625e-06,
      "step": 1345,
      "training_step_time": 0.3957233428955078
    },
    {
      "epoch": 8.21533203125e-06,
      "model_forward_time": 0.11556148529052734,
      "step": 1346
    },
    {
      "epoch": 8.21533203125e-06,
      "step": 1346,
      "training_step_time": 0.409731388092041
    },
    {
      "epoch": 8.221435546875e-06,
      "model_forward_time": 0.11558961868286133,
      "step": 1347
    },
    {
      "epoch": 8.221435546875e-06,
      "step": 1347,
      "training_step_time": 0.5055994987487793
    },
    {
      "epoch": 8.2275390625e-06,
      "model_forward_time": 0.11539530754089355,
      "step": 1348
    },
    {
      "epoch": 8.2275390625e-06,
      "step": 1348,
      "training_step_time": 0.3976860046386719
    },
    {
      "epoch": 8.233642578125e-06,
      "model_forward_time": 0.1151425838470459,
      "step": 1349
    },
    {
      "epoch": 8.233642578125e-06,
      "step": 1349,
      "training_step_time": 0.3901364803314209
    },
    {
      "epoch": 8.23974609375e-06,
      "grad_norm": 0.8821226954460144,
      "learning_rate": 4.5e-05,
      "loss": 0.153,
      "step": 1350
    },
    {
      "epoch": 8.23974609375e-06,
      "model_forward_time": 0.11494803428649902,
      "step": 1350
    },
    {
      "epoch": 8.23974609375e-06,
      "step": 1350,
      "training_step_time": 0.479388952255249
    },
    {
      "epoch": 8.245849609375e-06,
      "model_forward_time": 0.11470389366149902,
      "step": 1351
    },
    {
      "epoch": 8.245849609375e-06,
      "step": 1351,
      "training_step_time": 0.4766843318939209
    },
    {
      "epoch": 8.251953125e-06,
      "model_forward_time": 0.11493182182312012,
      "step": 1352
    },
    {
      "epoch": 8.251953125e-06,
      "step": 1352,
      "training_step_time": 0.48047375679016113
    },
    {
      "epoch": 8.258056640625e-06,
      "model_forward_time": 0.11422610282897949,
      "step": 1353
    },
    {
      "epoch": 8.258056640625e-06,
      "step": 1353,
      "training_step_time": 0.36698412895202637
    },
    {
      "epoch": 8.26416015625e-06,
      "model_forward_time": 0.11477422714233398,
      "step": 1354
    },
    {
      "epoch": 8.26416015625e-06,
      "step": 1354,
      "training_step_time": 0.47293567657470703
    },
    {
      "epoch": 8.270263671875e-06,
      "model_forward_time": 0.11411833763122559,
      "step": 1355
    },
    {
      "epoch": 8.270263671875e-06,
      "step": 1355,
      "training_step_time": 0.45593738555908203
    },
    {
      "epoch": 8.2763671875e-06,
      "model_forward_time": 0.11499619483947754,
      "step": 1356
    },
    {
      "epoch": 8.2763671875e-06,
      "step": 1356,
      "training_step_time": 0.39960432052612305
    },
    {
      "epoch": 8.282470703125e-06,
      "model_forward_time": 0.11429572105407715,
      "step": 1357
    },
    {
      "epoch": 8.282470703125e-06,
      "step": 1357,
      "training_step_time": 0.4023475646972656
    },
    {
      "epoch": 8.28857421875e-06,
      "model_forward_time": 0.11466026306152344,
      "step": 1358
    },
    {
      "epoch": 8.28857421875e-06,
      "step": 1358,
      "training_step_time": 0.3861370086669922
    },
    {
      "epoch": 8.294677734375e-06,
      "model_forward_time": 0.11531209945678711,
      "step": 1359
    },
    {
      "epoch": 8.294677734375e-06,
      "step": 1359,
      "training_step_time": 0.38807058334350586
    },
    {
      "epoch": 8.30078125e-06,
      "grad_norm": 0.8778019547462463,
      "learning_rate": 4.5333333333333335e-05,
      "loss": 0.1627,
      "step": 1360
    },
    {
      "epoch": 8.30078125e-06,
      "model_forward_time": 0.11536741256713867,
      "step": 1360
    },
    {
      "epoch": 8.30078125e-06,
      "step": 1360,
      "training_step_time": 0.39740514755249023
    },
    {
      "epoch": 8.306884765625e-06,
      "model_forward_time": 0.11529064178466797,
      "step": 1361
    },
    {
      "epoch": 8.306884765625e-06,
      "step": 1361,
      "training_step_time": 0.4046921730041504
    },
    {
      "epoch": 8.31298828125e-06,
      "model_forward_time": 0.11526799201965332,
      "step": 1362
    },
    {
      "epoch": 8.31298828125e-06,
      "step": 1362,
      "training_step_time": 0.39876365661621094
    },
    {
      "epoch": 8.319091796875e-06,
      "model_forward_time": 0.11551022529602051,
      "step": 1363
    },
    {
      "epoch": 8.319091796875e-06,
      "step": 1363,
      "training_step_time": 0.4621739387512207
    },
    {
      "epoch": 8.3251953125e-06,
      "model_forward_time": 0.11538004875183105,
      "step": 1364
    },
    {
      "epoch": 8.3251953125e-06,
      "step": 1364,
      "training_step_time": 0.39601588249206543
    },
    {
      "epoch": 8.331298828125e-06,
      "model_forward_time": 0.11754345893859863,
      "step": 1365
    },
    {
      "epoch": 8.331298828125e-06,
      "step": 1365,
      "training_step_time": 0.5513746738433838
    },
    {
      "epoch": 8.33740234375e-06,
      "model_forward_time": 0.11600065231323242,
      "step": 1366
    },
    {
      "epoch": 8.33740234375e-06,
      "step": 1366,
      "training_step_time": 0.4253842830657959
    },
    {
      "epoch": 8.343505859375e-06,
      "model_forward_time": 0.11648964881896973,
      "step": 1367
    },
    {
      "epoch": 8.343505859375e-06,
      "step": 1367,
      "training_step_time": 0.532545804977417
    },
    {
      "epoch": 8.349609375e-06,
      "model_forward_time": 0.11520886421203613,
      "step": 1368
    },
    {
      "epoch": 8.349609375e-06,
      "step": 1368,
      "training_step_time": 0.4819474220275879
    },
    {
      "epoch": 8.355712890625e-06,
      "model_forward_time": 0.11577343940734863,
      "step": 1369
    },
    {
      "epoch": 8.355712890625e-06,
      "step": 1369,
      "training_step_time": 0.4816269874572754
    },
    {
      "epoch": 8.36181640625e-06,
      "grad_norm": 0.853164792060852,
      "learning_rate": 4.566666666666667e-05,
      "loss": 0.1485,
      "step": 1370
    },
    {
      "epoch": 8.36181640625e-06,
      "model_forward_time": 0.11446690559387207,
      "step": 1370
    },
    {
      "epoch": 8.36181640625e-06,
      "step": 1370,
      "training_step_time": 0.4374055862426758
    },
    {
      "epoch": 8.367919921875e-06,
      "model_forward_time": 0.11479544639587402,
      "step": 1371
    },
    {
      "epoch": 8.367919921875e-06,
      "step": 1371,
      "training_step_time": 0.38926243782043457
    },
    {
      "epoch": 8.3740234375e-06,
      "model_forward_time": 0.11429882049560547,
      "step": 1372
    },
    {
      "epoch": 8.3740234375e-06,
      "step": 1372,
      "training_step_time": 0.3943746089935303
    },
    {
      "epoch": 8.380126953125e-06,
      "model_forward_time": 0.11509132385253906,
      "step": 1373
    },
    {
      "epoch": 8.380126953125e-06,
      "step": 1373,
      "training_step_time": 0.3980262279510498
    },
    {
      "epoch": 8.38623046875e-06,
      "model_forward_time": 0.1163477897644043,
      "step": 1374
    },
    {
      "epoch": 8.38623046875e-06,
      "step": 1374,
      "training_step_time": 0.39809584617614746
    },
    {
      "epoch": 8.392333984375e-06,
      "model_forward_time": 0.11517500877380371,
      "step": 1375
    },
    {
      "epoch": 8.392333984375e-06,
      "step": 1375,
      "training_step_time": 0.39718103408813477
    },
    {
      "epoch": 8.3984375e-06,
      "model_forward_time": 0.1156766414642334,
      "step": 1376
    },
    {
      "epoch": 8.3984375e-06,
      "step": 1376,
      "training_step_time": 0.4056549072265625
    },
    {
      "epoch": 8.404541015625e-06,
      "model_forward_time": 0.11958646774291992,
      "step": 1377
    },
    {
      "epoch": 8.404541015625e-06,
      "step": 1377,
      "training_step_time": 0.5205910205841064
    },
    {
      "epoch": 8.41064453125e-06,
      "model_forward_time": 0.11916065216064453,
      "step": 1378
    },
    {
      "epoch": 8.41064453125e-06,
      "step": 1378,
      "training_step_time": 0.3811459541320801
    },
    {
      "epoch": 8.416748046875e-06,
      "model_forward_time": 0.11576223373413086,
      "step": 1379
    },
    {
      "epoch": 8.416748046875e-06,
      "step": 1379,
      "training_step_time": 0.38619494438171387
    },
    {
      "epoch": 8.4228515625e-06,
      "grad_norm": 0.617634117603302,
      "learning_rate": 4.600000000000001e-05,
      "loss": 0.1632,
      "step": 1380
    },
    {
      "epoch": 8.4228515625e-06,
      "model_forward_time": 0.1154780387878418,
      "step": 1380
    },
    {
      "epoch": 8.4228515625e-06,
      "step": 1380,
      "training_step_time": 0.40378403663635254
    },
    {
      "epoch": 8.428955078125e-06,
      "model_forward_time": 0.11490988731384277,
      "step": 1381
    },
    {
      "epoch": 8.428955078125e-06,
      "step": 1381,
      "training_step_time": 0.4783482551574707
    },
    {
      "epoch": 8.43505859375e-06,
      "model_forward_time": 0.11513710021972656,
      "step": 1382
    },
    {
      "epoch": 8.43505859375e-06,
      "step": 1382,
      "training_step_time": 0.4139423370361328
    },
    {
      "epoch": 8.441162109375e-06,
      "model_forward_time": 0.11566734313964844,
      "step": 1383
    },
    {
      "epoch": 8.441162109375e-06,
      "step": 1383,
      "training_step_time": 0.4788210391998291
    },
    {
      "epoch": 8.447265625e-06,
      "model_forward_time": 0.11513662338256836,
      "step": 1384
    },
    {
      "epoch": 8.447265625e-06,
      "step": 1384,
      "training_step_time": 0.46393322944641113
    },
    {
      "epoch": 8.453369140625e-06,
      "model_forward_time": 0.11603140830993652,
      "step": 1385
    },
    {
      "epoch": 8.453369140625e-06,
      "step": 1385,
      "training_step_time": 0.43648433685302734
    },
    {
      "epoch": 8.45947265625e-06,
      "model_forward_time": 0.11509823799133301,
      "step": 1386
    },
    {
      "epoch": 8.45947265625e-06,
      "step": 1386,
      "training_step_time": 0.3966352939605713
    },
    {
      "epoch": 8.465576171875e-06,
      "model_forward_time": 0.11559391021728516,
      "step": 1387
    },
    {
      "epoch": 8.465576171875e-06,
      "step": 1387,
      "training_step_time": 0.39789915084838867
    },
    {
      "epoch": 8.4716796875e-06,
      "model_forward_time": 0.11687946319580078,
      "step": 1388
    },
    {
      "epoch": 8.4716796875e-06,
      "step": 1388,
      "training_step_time": 0.3986835479736328
    },
    {
      "epoch": 8.477783203125e-06,
      "model_forward_time": 0.11607027053833008,
      "step": 1389
    },
    {
      "epoch": 8.477783203125e-06,
      "step": 1389,
      "training_step_time": 0.4041759967803955
    },
    {
      "epoch": 8.48388671875e-06,
      "grad_norm": 0.7520446181297302,
      "learning_rate": 4.633333333333333e-05,
      "loss": 0.1552,
      "step": 1390
    },
    {
      "epoch": 8.48388671875e-06,
      "model_forward_time": 0.11542367935180664,
      "step": 1390
    },
    {
      "epoch": 8.48388671875e-06,
      "step": 1390,
      "training_step_time": 0.42499566078186035
    },
    {
      "epoch": 8.489990234375e-06,
      "model_forward_time": 0.11544203758239746,
      "step": 1391
    },
    {
      "epoch": 8.489990234375e-06,
      "step": 1391,
      "training_step_time": 0.3993685245513916
    },
    {
      "epoch": 8.49609375e-06,
      "model_forward_time": 0.11576437950134277,
      "step": 1392
    },
    {
      "epoch": 8.49609375e-06,
      "step": 1392,
      "training_step_time": 0.39933133125305176
    },
    {
      "epoch": 8.502197265625e-06,
      "model_forward_time": 0.11509490013122559,
      "step": 1393
    },
    {
      "epoch": 8.502197265625e-06,
      "step": 1393,
      "training_step_time": 0.41196417808532715
    },
    {
      "epoch": 8.50830078125e-06,
      "model_forward_time": 0.11582279205322266,
      "step": 1394
    },
    {
      "epoch": 8.50830078125e-06,
      "step": 1394,
      "training_step_time": 0.4016401767730713
    },
    {
      "epoch": 8.514404296875e-06,
      "model_forward_time": 0.11507606506347656,
      "step": 1395
    },
    {
      "epoch": 8.514404296875e-06,
      "step": 1395,
      "training_step_time": 0.5665712356567383
    },
    {
      "epoch": 8.5205078125e-06,
      "model_forward_time": 0.11589765548706055,
      "step": 1396
    },
    {
      "epoch": 8.5205078125e-06,
      "step": 1396,
      "training_step_time": 0.40050721168518066
    },
    {
      "epoch": 8.526611328125e-06,
      "model_forward_time": 0.1166081428527832,
      "step": 1397
    },
    {
      "epoch": 8.526611328125e-06,
      "step": 1397,
      "training_step_time": 0.49790191650390625
    },
    {
      "epoch": 8.53271484375e-06,
      "model_forward_time": 0.11488676071166992,
      "step": 1398
    },
    {
      "epoch": 8.53271484375e-06,
      "step": 1398,
      "training_step_time": 0.4798853397369385
    },
    {
      "epoch": 8.538818359375e-06,
      "model_forward_time": 0.11500883102416992,
      "step": 1399
    },
    {
      "epoch": 8.538818359375e-06,
      "step": 1399,
      "training_step_time": 0.5145010948181152
    },
    {
      "epoch": 8.544921875e-06,
      "grad_norm": 0.5595810413360596,
      "learning_rate": 4.666666666666667e-05,
      "loss": 0.1644,
      "step": 1400
    },
    {
      "epoch": 8.544921875e-06,
      "model_forward_time": 0.11544942855834961,
      "step": 1400
    },
    {
      "epoch": 8.544921875e-06,
      "step": 1400,
      "training_step_time": 0.40616822242736816
    },
    {
      "epoch": 8.551025390625e-06,
      "model_forward_time": 0.11459660530090332,
      "step": 1401
    },
    {
      "epoch": 8.551025390625e-06,
      "step": 1401,
      "training_step_time": 0.39261627197265625
    },
    {
      "epoch": 8.55712890625e-06,
      "model_forward_time": 0.11495637893676758,
      "step": 1402
    },
    {
      "epoch": 8.55712890625e-06,
      "step": 1402,
      "training_step_time": 0.4018824100494385
    },
    {
      "epoch": 8.563232421875e-06,
      "model_forward_time": 0.11509227752685547,
      "step": 1403
    },
    {
      "epoch": 8.563232421875e-06,
      "step": 1403,
      "training_step_time": 0.3998422622680664
    },
    {
      "epoch": 8.5693359375e-06,
      "model_forward_time": 0.11574482917785645,
      "step": 1404
    },
    {
      "epoch": 8.5693359375e-06,
      "step": 1404,
      "training_step_time": 0.3900444507598877
    },
    {
      "epoch": 8.575439453125e-06,
      "model_forward_time": 0.11568641662597656,
      "step": 1405
    },
    {
      "epoch": 8.575439453125e-06,
      "step": 1405,
      "training_step_time": 0.3987751007080078
    },
    {
      "epoch": 8.58154296875e-06,
      "model_forward_time": 0.11509919166564941,
      "step": 1406
    },
    {
      "epoch": 8.58154296875e-06,
      "step": 1406,
      "training_step_time": 0.40744805335998535
    },
    {
      "epoch": 8.587646484375e-06,
      "model_forward_time": 0.11587333679199219,
      "step": 1407
    },
    {
      "epoch": 8.587646484375e-06,
      "step": 1407,
      "training_step_time": 0.5238609313964844
    },
    {
      "epoch": 8.59375e-06,
      "model_forward_time": 0.115875244140625,
      "step": 1408
    },
    {
      "epoch": 8.59375e-06,
      "step": 1408,
      "training_step_time": 0.39795732498168945
    },
    {
      "epoch": 8.599853515625e-06,
      "model_forward_time": 0.11504483222961426,
      "step": 1409
    },
    {
      "epoch": 8.599853515625e-06,
      "step": 1409,
      "training_step_time": 0.4203073978424072
    },
    {
      "epoch": 8.60595703125e-06,
      "grad_norm": 0.77544766664505,
      "learning_rate": 4.7e-05,
      "loss": 0.1542,
      "step": 1410
    },
    {
      "epoch": 8.60595703125e-06,
      "model_forward_time": 0.11437582969665527,
      "step": 1410
    },
    {
      "epoch": 8.60595703125e-06,
      "step": 1410,
      "training_step_time": 0.4402337074279785
    },
    {
      "epoch": 8.612060546875e-06,
      "model_forward_time": 0.11487221717834473,
      "step": 1411
    },
    {
      "epoch": 8.612060546875e-06,
      "step": 1411,
      "training_step_time": 0.40975141525268555
    },
    {
      "epoch": 8.6181640625e-06,
      "model_forward_time": 0.11666154861450195,
      "step": 1412
    },
    {
      "epoch": 8.6181640625e-06,
      "step": 1412,
      "training_step_time": 0.46944165229797363
    },
    {
      "epoch": 8.624267578125e-06,
      "model_forward_time": 0.11497211456298828,
      "step": 1413
    },
    {
      "epoch": 8.624267578125e-06,
      "step": 1413,
      "training_step_time": 0.4973921775817871
    },
    {
      "epoch": 8.63037109375e-06,
      "model_forward_time": 0.11538076400756836,
      "step": 1414
    },
    {
      "epoch": 8.63037109375e-06,
      "step": 1414,
      "training_step_time": 0.4263932704925537
    },
    {
      "epoch": 8.636474609375e-06,
      "model_forward_time": 0.11448836326599121,
      "step": 1415
    },
    {
      "epoch": 8.636474609375e-06,
      "step": 1415,
      "training_step_time": 0.4083845615386963
    },
    {
      "epoch": 8.642578125e-06,
      "model_forward_time": 0.1150364875793457,
      "step": 1416
    },
    {
      "epoch": 8.642578125e-06,
      "step": 1416,
      "training_step_time": 0.40094947814941406
    },
    {
      "epoch": 8.648681640625e-06,
      "model_forward_time": 0.11494588851928711,
      "step": 1417
    },
    {
      "epoch": 8.648681640625e-06,
      "step": 1417,
      "training_step_time": 0.391237735748291
    },
    {
      "epoch": 8.65478515625e-06,
      "model_forward_time": 0.11527204513549805,
      "step": 1418
    },
    {
      "epoch": 8.65478515625e-06,
      "step": 1418,
      "training_step_time": 0.3988490104675293
    },
    {
      "epoch": 8.660888671875e-06,
      "model_forward_time": 0.11508560180664062,
      "step": 1419
    },
    {
      "epoch": 8.660888671875e-06,
      "step": 1419,
      "training_step_time": 0.543917179107666
    },
    {
      "epoch": 8.6669921875e-06,
      "grad_norm": 0.6109082698822021,
      "learning_rate": 4.7333333333333336e-05,
      "loss": 0.1608,
      "step": 1420
    },
    {
      "epoch": 8.6669921875e-06,
      "model_forward_time": 0.11705160140991211,
      "step": 1420
    },
    {
      "epoch": 8.6669921875e-06,
      "step": 1420,
      "training_step_time": 0.3995487689971924
    },
    {
      "epoch": 8.673095703125e-06,
      "model_forward_time": 0.11548686027526855,
      "step": 1421
    },
    {
      "epoch": 8.673095703125e-06,
      "step": 1421,
      "training_step_time": 0.39668774604797363
    },
    {
      "epoch": 8.67919921875e-06,
      "model_forward_time": 0.1154475212097168,
      "step": 1422
    },
    {
      "epoch": 8.67919921875e-06,
      "step": 1422,
      "training_step_time": 0.40253257751464844
    },
    {
      "epoch": 8.685302734375e-06,
      "model_forward_time": 0.11651492118835449,
      "step": 1423
    },
    {
      "epoch": 8.685302734375e-06,
      "step": 1423,
      "training_step_time": 0.4088454246520996
    },
    {
      "epoch": 8.69140625e-06,
      "model_forward_time": 0.11602449417114258,
      "step": 1424
    },
    {
      "epoch": 8.69140625e-06,
      "step": 1424,
      "training_step_time": 0.4896202087402344
    },
    {
      "epoch": 8.697509765625e-06,
      "model_forward_time": 0.11531376838684082,
      "step": 1425
    },
    {
      "epoch": 8.697509765625e-06,
      "step": 1425,
      "training_step_time": 0.458967924118042
    },
    {
      "epoch": 8.70361328125e-06,
      "model_forward_time": 0.1147458553314209,
      "step": 1426
    },
    {
      "epoch": 8.70361328125e-06,
      "step": 1426,
      "training_step_time": 0.37271976470947266
    },
    {
      "epoch": 8.709716796875e-06,
      "model_forward_time": 0.11571216583251953,
      "step": 1427
    },
    {
      "epoch": 8.709716796875e-06,
      "step": 1427,
      "training_step_time": 0.5152580738067627
    },
    {
      "epoch": 8.7158203125e-06,
      "model_forward_time": 0.11526346206665039,
      "step": 1428
    },
    {
      "epoch": 8.7158203125e-06,
      "step": 1428,
      "training_step_time": 0.48931455612182617
    },
    {
      "epoch": 8.721923828125e-06,
      "model_forward_time": 0.11484551429748535,
      "step": 1429
    },
    {
      "epoch": 8.721923828125e-06,
      "step": 1429,
      "training_step_time": 0.4173407554626465
    },
    {
      "epoch": 8.72802734375e-06,
      "grad_norm": 0.7244499325752258,
      "learning_rate": 4.766666666666667e-05,
      "loss": 0.1541,
      "step": 1430
    },
    {
      "epoch": 8.72802734375e-06,
      "model_forward_time": 0.11472964286804199,
      "step": 1430
    },
    {
      "epoch": 8.72802734375e-06,
      "step": 1430,
      "training_step_time": 0.39411187171936035
    },
    {
      "epoch": 8.734130859375e-06,
      "model_forward_time": 0.1152796745300293,
      "step": 1431
    },
    {
      "epoch": 8.734130859375e-06,
      "step": 1431,
      "training_step_time": 0.39572882652282715
    },
    {
      "epoch": 8.740234375e-06,
      "model_forward_time": 0.11484169960021973,
      "step": 1432
    },
    {
      "epoch": 8.740234375e-06,
      "step": 1432,
      "training_step_time": 0.402299165725708
    },
    {
      "epoch": 8.746337890625e-06,
      "model_forward_time": 0.11550664901733398,
      "step": 1433
    },
    {
      "epoch": 8.746337890625e-06,
      "step": 1433,
      "training_step_time": 0.39737439155578613
    },
    {
      "epoch": 8.75244140625e-06,
      "model_forward_time": 0.11566424369812012,
      "step": 1434
    },
    {
      "epoch": 8.75244140625e-06,
      "step": 1434,
      "training_step_time": 0.39750099182128906
    },
    {
      "epoch": 8.758544921875e-06,
      "model_forward_time": 0.11492681503295898,
      "step": 1435
    },
    {
      "epoch": 8.758544921875e-06,
      "step": 1435,
      "training_step_time": 0.39423489570617676
    },
    {
      "epoch": 8.7646484375e-06,
      "model_forward_time": 0.11611104011535645,
      "step": 1436
    },
    {
      "epoch": 8.7646484375e-06,
      "step": 1436,
      "training_step_time": 0.4082601070404053
    },
    {
      "epoch": 8.770751953125e-06,
      "model_forward_time": 0.11493492126464844,
      "step": 1437
    },
    {
      "epoch": 8.770751953125e-06,
      "step": 1437,
      "training_step_time": 0.3973560333251953
    },
    {
      "epoch": 8.77685546875e-06,
      "model_forward_time": 0.11518192291259766,
      "step": 1438
    },
    {
      "epoch": 8.77685546875e-06,
      "step": 1438,
      "training_step_time": 0.39944887161254883
    },
    {
      "epoch": 8.782958984375e-06,
      "model_forward_time": 0.11606431007385254,
      "step": 1439
    },
    {
      "epoch": 8.782958984375e-06,
      "step": 1439,
      "training_step_time": 0.49666404724121094
    },
    {
      "epoch": 8.7890625e-06,
      "grad_norm": 1.182771921157837,
      "learning_rate": 4.8e-05,
      "loss": 0.1604,
      "step": 1440
    },
    {
      "epoch": 8.7890625e-06,
      "model_forward_time": 0.11440777778625488,
      "step": 1440
    },
    {
      "epoch": 8.7890625e-06,
      "step": 1440,
      "training_step_time": 0.5167431831359863
    },
    {
      "epoch": 8.795166015625e-06,
      "model_forward_time": 0.11612939834594727,
      "step": 1441
    },
    {
      "epoch": 8.795166015625e-06,
      "step": 1441,
      "training_step_time": 0.4477672576904297
    },
    {
      "epoch": 8.80126953125e-06,
      "model_forward_time": 0.11515641212463379,
      "step": 1442
    },
    {
      "epoch": 8.80126953125e-06,
      "step": 1442,
      "training_step_time": 0.5050239562988281
    },
    {
      "epoch": 8.807373046875e-06,
      "model_forward_time": 0.11486983299255371,
      "step": 1443
    },
    {
      "epoch": 8.807373046875e-06,
      "step": 1443,
      "training_step_time": 0.4942164421081543
    },
    {
      "epoch": 8.8134765625e-06,
      "model_forward_time": 0.11467099189758301,
      "step": 1444
    },
    {
      "epoch": 8.8134765625e-06,
      "step": 1444,
      "training_step_time": 0.39721226692199707
    },
    {
      "epoch": 8.819580078125e-06,
      "model_forward_time": 0.11519503593444824,
      "step": 1445
    },
    {
      "epoch": 8.819580078125e-06,
      "step": 1445,
      "training_step_time": 0.40380072593688965
    },
    {
      "epoch": 8.82568359375e-06,
      "model_forward_time": 0.11509823799133301,
      "step": 1446
    },
    {
      "epoch": 8.82568359375e-06,
      "step": 1446,
      "training_step_time": 0.39476537704467773
    },
    {
      "epoch": 8.831787109375e-06,
      "model_forward_time": 0.11519455909729004,
      "step": 1447
    },
    {
      "epoch": 8.831787109375e-06,
      "step": 1447,
      "training_step_time": 0.39577293395996094
    },
    {
      "epoch": 8.837890625e-06,
      "model_forward_time": 0.11550354957580566,
      "step": 1448
    },
    {
      "epoch": 8.837890625e-06,
      "step": 1448,
      "training_step_time": 0.3966536521911621
    },
    {
      "epoch": 8.843994140625e-06,
      "model_forward_time": 0.11544442176818848,
      "step": 1449
    },
    {
      "epoch": 8.843994140625e-06,
      "step": 1449,
      "training_step_time": 0.39815378189086914
    },
    {
      "epoch": 8.85009765625e-06,
      "grad_norm": 1.1987322568893433,
      "learning_rate": 4.8333333333333334e-05,
      "loss": 0.1679,
      "step": 1450
    },
    {
      "epoch": 8.85009765625e-06,
      "model_forward_time": 0.11474037170410156,
      "step": 1450
    },
    {
      "epoch": 8.85009765625e-06,
      "step": 1450,
      "training_step_time": 0.40040087699890137
    },
    {
      "epoch": 8.856201171875e-06,
      "model_forward_time": 0.11518120765686035,
      "step": 1451
    },
    {
      "epoch": 8.856201171875e-06,
      "step": 1451,
      "training_step_time": 0.390073299407959
    },
    {
      "epoch": 8.8623046875e-06,
      "model_forward_time": 0.1148838996887207,
      "step": 1452
    },
    {
      "epoch": 8.8623046875e-06,
      "step": 1452,
      "training_step_time": 0.40448999404907227
    },
    {
      "epoch": 8.868408203125e-06,
      "model_forward_time": 0.1150825023651123,
      "step": 1453
    },
    {
      "epoch": 8.868408203125e-06,
      "step": 1453,
      "training_step_time": 0.39680051803588867
    },
    {
      "epoch": 8.87451171875e-06,
      "model_forward_time": 0.11541223526000977,
      "step": 1454
    },
    {
      "epoch": 8.87451171875e-06,
      "step": 1454,
      "training_step_time": 0.46023106575012207
    },
    {
      "epoch": 8.880615234375e-06,
      "model_forward_time": 0.11517047882080078,
      "step": 1455
    },
    {
      "epoch": 8.880615234375e-06,
      "step": 1455,
      "training_step_time": 0.44629359245300293
    },
    {
      "epoch": 8.88671875e-06,
      "model_forward_time": 0.11510658264160156,
      "step": 1456
    },
    {
      "epoch": 8.88671875e-06,
      "step": 1456,
      "training_step_time": 0.4449167251586914
    },
    {
      "epoch": 8.892822265625e-06,
      "model_forward_time": 0.11548590660095215,
      "step": 1457
    },
    {
      "epoch": 8.892822265625e-06,
      "step": 1457,
      "training_step_time": 0.3972151279449463
    },
    {
      "epoch": 8.89892578125e-06,
      "model_forward_time": 0.1158437728881836,
      "step": 1458
    },
    {
      "epoch": 8.89892578125e-06,
      "step": 1458,
      "training_step_time": 0.5101423263549805
    },
    {
      "epoch": 8.905029296875e-06,
      "model_forward_time": 0.11495590209960938,
      "step": 1459
    },
    {
      "epoch": 8.905029296875e-06,
      "step": 1459,
      "training_step_time": 0.400223970413208
    },
    {
      "epoch": 8.9111328125e-06,
      "grad_norm": 0.9067109823226929,
      "learning_rate": 4.866666666666667e-05,
      "loss": 0.1638,
      "step": 1460
    },
    {
      "epoch": 8.9111328125e-06,
      "model_forward_time": 0.11577868461608887,
      "step": 1460
    },
    {
      "epoch": 8.9111328125e-06,
      "step": 1460,
      "training_step_time": 0.3966081142425537
    },
    {
      "epoch": 8.917236328125e-06,
      "model_forward_time": 0.11538076400756836,
      "step": 1461
    },
    {
      "epoch": 8.917236328125e-06,
      "step": 1461,
      "training_step_time": 0.3975813388824463
    },
    {
      "epoch": 8.92333984375e-06,
      "model_forward_time": 0.1159524917602539,
      "step": 1462
    },
    {
      "epoch": 8.92333984375e-06,
      "step": 1462,
      "training_step_time": 0.3944587707519531
    },
    {
      "epoch": 8.929443359375e-06,
      "model_forward_time": 0.11570286750793457,
      "step": 1463
    },
    {
      "epoch": 8.929443359375e-06,
      "step": 1463,
      "training_step_time": 0.408552885055542
    },
    {
      "epoch": 8.935546875e-06,
      "model_forward_time": 0.11512207984924316,
      "step": 1464
    },
    {
      "epoch": 8.935546875e-06,
      "step": 1464,
      "training_step_time": 0.40044164657592773
    },
    {
      "epoch": 8.941650390625e-06,
      "model_forward_time": 0.11642575263977051,
      "step": 1465
    },
    {
      "epoch": 8.941650390625e-06,
      "step": 1465,
      "training_step_time": 0.4087235927581787
    },
    {
      "epoch": 8.94775390625e-06,
      "model_forward_time": 0.1347649097442627,
      "step": 1466
    },
    {
      "epoch": 8.94775390625e-06,
      "step": 1466,
      "training_step_time": 0.3985600471496582
    },
    {
      "epoch": 8.953857421875e-06,
      "model_forward_time": 0.11645865440368652,
      "step": 1467
    },
    {
      "epoch": 8.953857421875e-06,
      "step": 1467,
      "training_step_time": 0.39533352851867676
    },
    {
      "epoch": 8.9599609375e-06,
      "model_forward_time": 0.11562323570251465,
      "step": 1468
    },
    {
      "epoch": 8.9599609375e-06,
      "step": 1468,
      "training_step_time": 0.4138205051422119
    },
    {
      "epoch": 8.966064453125e-06,
      "model_forward_time": 0.1172945499420166,
      "step": 1469
    },
    {
      "epoch": 8.966064453125e-06,
      "step": 1469,
      "training_step_time": 0.450516939163208
    },
    {
      "epoch": 8.97216796875e-06,
      "grad_norm": 0.6850046515464783,
      "learning_rate": 4.9e-05,
      "loss": 0.1671,
      "step": 1470
    },
    {
      "epoch": 8.97216796875e-06,
      "model_forward_time": 0.11521100997924805,
      "step": 1470
    },
    {
      "epoch": 8.97216796875e-06,
      "step": 1470,
      "training_step_time": 0.4984252452850342
    },
    {
      "epoch": 8.978271484375e-06,
      "model_forward_time": 0.11571192741394043,
      "step": 1471
    },
    {
      "epoch": 8.978271484375e-06,
      "step": 1471,
      "training_step_time": 0.4675750732421875
    },
    {
      "epoch": 8.984375e-06,
      "model_forward_time": 0.11588644981384277,
      "step": 1472
    },
    {
      "epoch": 8.984375e-06,
      "step": 1472,
      "training_step_time": 0.45714306831359863
    },
    {
      "epoch": 8.990478515625e-06,
      "model_forward_time": 0.11514782905578613,
      "step": 1473
    },
    {
      "epoch": 8.990478515625e-06,
      "step": 1473,
      "training_step_time": 0.5192117691040039
    },
    {
      "epoch": 8.99658203125e-06,
      "model_forward_time": 0.11445975303649902,
      "step": 1474
    },
    {
      "epoch": 8.99658203125e-06,
      "step": 1474,
      "training_step_time": 0.3844764232635498
    },
    {
      "epoch": 9.002685546875e-06,
      "model_forward_time": 0.11495780944824219,
      "step": 1475
    },
    {
      "epoch": 9.002685546875e-06,
      "step": 1475,
      "training_step_time": 0.39164161682128906
    },
    {
      "epoch": 9.0087890625e-06,
      "model_forward_time": 0.11512470245361328,
      "step": 1476
    },
    {
      "epoch": 9.0087890625e-06,
      "step": 1476,
      "training_step_time": 0.38918614387512207
    },
    {
      "epoch": 9.014892578125e-06,
      "model_forward_time": 0.11519503593444824,
      "step": 1477
    },
    {
      "epoch": 9.014892578125e-06,
      "step": 1477,
      "training_step_time": 0.43102526664733887
    },
    {
      "epoch": 9.02099609375e-06,
      "model_forward_time": 0.11543869972229004,
      "step": 1478
    },
    {
      "epoch": 9.02099609375e-06,
      "step": 1478,
      "training_step_time": 0.39055442810058594
    },
    {
      "epoch": 9.027099609375e-06,
      "model_forward_time": 0.11483120918273926,
      "step": 1479
    },
    {
      "epoch": 9.027099609375e-06,
      "step": 1479,
      "training_step_time": 0.38944315910339355
    },
    {
      "epoch": 9.033203125e-06,
      "grad_norm": 0.6996373534202576,
      "learning_rate": 4.933333333333334e-05,
      "loss": 0.1672,
      "step": 1480
    },
    {
      "epoch": 9.033203125e-06,
      "model_forward_time": 0.11534905433654785,
      "step": 1480
    },
    {
      "epoch": 9.033203125e-06,
      "step": 1480,
      "training_step_time": 0.3993704319000244
    },
    {
      "epoch": 9.039306640625e-06,
      "model_forward_time": 0.11492419242858887,
      "step": 1481
    },
    {
      "epoch": 9.039306640625e-06,
      "step": 1481,
      "training_step_time": 0.39822983741760254
    },
    {
      "epoch": 9.04541015625e-06,
      "model_forward_time": 0.11571002006530762,
      "step": 1482
    },
    {
      "epoch": 9.04541015625e-06,
      "step": 1482,
      "training_step_time": 0.3979377746582031
    },
    {
      "epoch": 9.051513671875e-06,
      "model_forward_time": 0.11656045913696289,
      "step": 1483
    },
    {
      "epoch": 9.051513671875e-06,
      "step": 1483,
      "training_step_time": 0.5302259922027588
    },
    {
      "epoch": 9.0576171875e-06,
      "model_forward_time": 0.11490345001220703,
      "step": 1484
    },
    {
      "epoch": 9.0576171875e-06,
      "step": 1484,
      "training_step_time": 0.49254894256591797
    },
    {
      "epoch": 9.063720703125e-06,
      "model_forward_time": 0.11531710624694824,
      "step": 1485
    },
    {
      "epoch": 9.063720703125e-06,
      "step": 1485,
      "training_step_time": 0.4572873115539551
    },
    {
      "epoch": 9.06982421875e-06,
      "model_forward_time": 0.1146695613861084,
      "step": 1486
    },
    {
      "epoch": 9.06982421875e-06,
      "step": 1486,
      "training_step_time": 0.49406909942626953
    },
    {
      "epoch": 9.075927734375e-06,
      "model_forward_time": 0.11468744277954102,
      "step": 1487
    },
    {
      "epoch": 9.075927734375e-06,
      "step": 1487,
      "training_step_time": 0.4601759910583496
    },
    {
      "epoch": 9.08203125e-06,
      "model_forward_time": 0.11527347564697266,
      "step": 1488
    },
    {
      "epoch": 9.08203125e-06,
      "step": 1488,
      "training_step_time": 0.42661190032958984
    },
    {
      "epoch": 9.088134765625e-06,
      "model_forward_time": 0.11496710777282715,
      "step": 1489
    },
    {
      "epoch": 9.088134765625e-06,
      "step": 1489,
      "training_step_time": 0.3893270492553711
    },
    {
      "epoch": 9.09423828125e-06,
      "grad_norm": 0.7380015850067139,
      "learning_rate": 4.966666666666667e-05,
      "loss": 0.1564,
      "step": 1490
    },
    {
      "epoch": 9.09423828125e-06,
      "model_forward_time": 0.11490154266357422,
      "step": 1490
    },
    {
      "epoch": 9.09423828125e-06,
      "step": 1490,
      "training_step_time": 0.39681315422058105
    },
    {
      "epoch": 9.100341796875e-06,
      "model_forward_time": 0.11513757705688477,
      "step": 1491
    },
    {
      "epoch": 9.100341796875e-06,
      "step": 1491,
      "training_step_time": 0.3843357563018799
    },
    {
      "epoch": 9.1064453125e-06,
      "model_forward_time": 0.11552047729492188,
      "step": 1492
    },
    {
      "epoch": 9.1064453125e-06,
      "step": 1492,
      "training_step_time": 0.3976128101348877
    },
    {
      "epoch": 9.112548828125e-06,
      "model_forward_time": 0.11542463302612305,
      "step": 1493
    },
    {
      "epoch": 9.112548828125e-06,
      "step": 1493,
      "training_step_time": 0.3925344944000244
    },
    {
      "epoch": 9.11865234375e-06,
      "model_forward_time": 0.11496496200561523,
      "step": 1494
    },
    {
      "epoch": 9.11865234375e-06,
      "step": 1494,
      "training_step_time": 0.9114370346069336
    },
    {
      "epoch": 9.124755859375e-06,
      "model_forward_time": 0.11478734016418457,
      "step": 1495
    },
    {
      "epoch": 9.124755859375e-06,
      "step": 1495,
      "training_step_time": 0.42180347442626953
    },
    {
      "epoch": 9.130859375e-06,
      "model_forward_time": 0.1143193244934082,
      "step": 1496
    },
    {
      "epoch": 9.130859375e-06,
      "step": 1496,
      "training_step_time": 0.42072391510009766
    },
    {
      "epoch": 9.136962890625e-06,
      "model_forward_time": 0.11409544944763184,
      "step": 1497
    },
    {
      "epoch": 9.136962890625e-06,
      "step": 1497,
      "training_step_time": 0.45273280143737793
    },
    {
      "epoch": 9.14306640625e-06,
      "model_forward_time": 0.11441349983215332,
      "step": 1498
    },
    {
      "epoch": 9.14306640625e-06,
      "step": 1498,
      "training_step_time": 0.462888240814209
    },
    {
      "epoch": 9.149169921875e-06,
      "model_forward_time": 0.11441993713378906,
      "step": 1499
    },
    {
      "epoch": 9.149169921875e-06,
      "step": 1499,
      "training_step_time": 0.4682765007019043
    },
    {
      "epoch": 9.1552734375e-06,
      "grad_norm": 0.9579315185546875,
      "learning_rate": 5e-05,
      "loss": 0.1782,
      "step": 1500
    },
    {
      "epoch": 9.1552734375e-06,
      "model_forward_time": 0.11477899551391602,
      "step": 1500
    },
    {
      "epoch": 9.1552734375e-06,
      "step": 1500,
      "training_step_time": 0.5255227088928223
    },
    {
      "epoch": 9.161376953125e-06,
      "model_forward_time": 0.11532950401306152,
      "step": 1501
    },
    {
      "epoch": 9.161376953125e-06,
      "step": 1501,
      "training_step_time": 0.4857447147369385
    },
    {
      "epoch": 9.16748046875e-06,
      "model_forward_time": 0.11441898345947266,
      "step": 1502
    },
    {
      "epoch": 9.16748046875e-06,
      "step": 1502,
      "training_step_time": 0.3847205638885498
    },
    {
      "epoch": 9.173583984375e-06,
      "model_forward_time": 0.11530518531799316,
      "step": 1503
    },
    {
      "epoch": 9.173583984375e-06,
      "step": 1503,
      "training_step_time": 0.38791894912719727
    },
    {
      "epoch": 9.1796875e-06,
      "model_forward_time": 0.11487197875976562,
      "step": 1504
    },
    {
      "epoch": 9.1796875e-06,
      "step": 1504,
      "training_step_time": 0.3880343437194824
    },
    {
      "epoch": 9.185791015625e-06,
      "model_forward_time": 0.1143808364868164,
      "step": 1505
    },
    {
      "epoch": 9.185791015625e-06,
      "step": 1505,
      "training_step_time": 0.3931114673614502
    },
    {
      "epoch": 9.19189453125e-06,
      "model_forward_time": 0.11495065689086914,
      "step": 1506
    },
    {
      "epoch": 9.19189453125e-06,
      "step": 1506,
      "training_step_time": 0.5671420097351074
    },
    {
      "epoch": 9.197998046875e-06,
      "model_forward_time": 0.1149740219116211,
      "step": 1507
    },
    {
      "epoch": 9.197998046875e-06,
      "step": 1507,
      "training_step_time": 0.3916015625
    },
    {
      "epoch": 9.2041015625e-06,
      "model_forward_time": 0.11507320404052734,
      "step": 1508
    },
    {
      "epoch": 9.2041015625e-06,
      "step": 1508,
      "training_step_time": 0.3896901607513428
    },
    {
      "epoch": 9.210205078125e-06,
      "model_forward_time": 0.1149601936340332,
      "step": 1509
    },
    {
      "epoch": 9.210205078125e-06,
      "step": 1509,
      "training_step_time": 0.4034273624420166
    },
    {
      "epoch": 9.21630859375e-06,
      "grad_norm": 0.643334686756134,
      "learning_rate": 5.0333333333333335e-05,
      "loss": 0.1639,
      "step": 1510
    },
    {
      "epoch": 9.21630859375e-06,
      "model_forward_time": 0.11589884757995605,
      "step": 1510
    },
    {
      "epoch": 9.21630859375e-06,
      "step": 1510,
      "training_step_time": 0.3896005153656006
    },
    {
      "epoch": 9.222412109375e-06,
      "model_forward_time": 0.1148538589477539,
      "step": 1511
    },
    {
      "epoch": 9.222412109375e-06,
      "step": 1511,
      "training_step_time": 0.4687221050262451
    },
    {
      "epoch": 9.228515625e-06,
      "model_forward_time": 0.11519312858581543,
      "step": 1512
    },
    {
      "epoch": 9.228515625e-06,
      "step": 1512,
      "training_step_time": 0.7629122734069824
    },
    {
      "epoch": 9.234619140625e-06,
      "model_forward_time": 0.11520624160766602,
      "step": 1513
    },
    {
      "epoch": 9.234619140625e-06,
      "step": 1513,
      "training_step_time": 0.5272655487060547
    },
    {
      "epoch": 9.24072265625e-06,
      "model_forward_time": 0.11457180976867676,
      "step": 1514
    },
    {
      "epoch": 9.24072265625e-06,
      "step": 1514,
      "training_step_time": 0.4076368808746338
    },
    {
      "epoch": 9.246826171875e-06,
      "model_forward_time": 0.11693859100341797,
      "step": 1515
    },
    {
      "epoch": 9.246826171875e-06,
      "step": 1515,
      "training_step_time": 0.4175090789794922
    },
    {
      "epoch": 9.2529296875e-06,
      "model_forward_time": 0.1152350902557373,
      "step": 1516
    },
    {
      "epoch": 9.2529296875e-06,
      "step": 1516,
      "training_step_time": 0.3819997310638428
    },
    {
      "epoch": 9.259033203125e-06,
      "model_forward_time": 0.11458754539489746,
      "step": 1517
    },
    {
      "epoch": 9.259033203125e-06,
      "step": 1517,
      "training_step_time": 0.3913307189941406
    },
    {
      "epoch": 9.26513671875e-06,
      "model_forward_time": 0.11551213264465332,
      "step": 1518
    },
    {
      "epoch": 9.26513671875e-06,
      "step": 1518,
      "training_step_time": 0.5157124996185303
    },
    {
      "epoch": 9.271240234375e-06,
      "model_forward_time": 0.11473751068115234,
      "step": 1519
    },
    {
      "epoch": 9.271240234375e-06,
      "step": 1519,
      "training_step_time": 0.39431309700012207
    },
    {
      "epoch": 9.27734375e-06,
      "grad_norm": 0.7413501739501953,
      "learning_rate": 5.0666666666666674e-05,
      "loss": 0.167,
      "step": 1520
    },
    {
      "epoch": 9.27734375e-06,
      "model_forward_time": 0.11480307579040527,
      "step": 1520
    },
    {
      "epoch": 9.27734375e-06,
      "step": 1520,
      "training_step_time": 0.3922104835510254
    },
    {
      "epoch": 9.283447265625e-06,
      "model_forward_time": 0.11538124084472656,
      "step": 1521
    },
    {
      "epoch": 9.283447265625e-06,
      "step": 1521,
      "training_step_time": 0.4183521270751953
    },
    {
      "epoch": 9.28955078125e-06,
      "model_forward_time": 0.1154625415802002,
      "step": 1522
    },
    {
      "epoch": 9.28955078125e-06,
      "step": 1522,
      "training_step_time": 0.39485979080200195
    },
    {
      "epoch": 9.295654296875e-06,
      "model_forward_time": 0.11556386947631836,
      "step": 1523
    },
    {
      "epoch": 9.295654296875e-06,
      "step": 1523,
      "training_step_time": 0.3916330337524414
    },
    {
      "epoch": 9.3017578125e-06,
      "model_forward_time": 0.11628937721252441,
      "step": 1524
    },
    {
      "epoch": 9.3017578125e-06,
      "step": 1524,
      "training_step_time": 0.876727819442749
    },
    {
      "epoch": 9.307861328125e-06,
      "model_forward_time": 0.11496448516845703,
      "step": 1525
    },
    {
      "epoch": 9.307861328125e-06,
      "step": 1525,
      "training_step_time": 0.4020049571990967
    },
    {
      "epoch": 9.31396484375e-06,
      "model_forward_time": 0.11470484733581543,
      "step": 1526
    },
    {
      "epoch": 9.31396484375e-06,
      "step": 1526,
      "training_step_time": 0.4657626152038574
    },
    {
      "epoch": 9.320068359375e-06,
      "model_forward_time": 0.11547017097473145,
      "step": 1527
    },
    {
      "epoch": 9.320068359375e-06,
      "step": 1527,
      "training_step_time": 0.4577937126159668
    },
    {
      "epoch": 9.326171875e-06,
      "model_forward_time": 0.11515474319458008,
      "step": 1528
    },
    {
      "epoch": 9.326171875e-06,
      "step": 1528,
      "training_step_time": 0.4062325954437256
    },
    {
      "epoch": 9.332275390625e-06,
      "model_forward_time": 0.11454963684082031,
      "step": 1529
    },
    {
      "epoch": 9.332275390625e-06,
      "step": 1529,
      "training_step_time": 0.4695930480957031
    },
    {
      "epoch": 9.33837890625e-06,
      "grad_norm": 0.6293384432792664,
      "learning_rate": 5.1000000000000006e-05,
      "loss": 0.1623,
      "step": 1530
    },
    {
      "epoch": 9.33837890625e-06,
      "model_forward_time": 0.11454105377197266,
      "step": 1530
    },
    {
      "epoch": 9.33837890625e-06,
      "step": 1530,
      "training_step_time": 0.3939945697784424
    },
    {
      "epoch": 9.344482421875e-06,
      "model_forward_time": 0.11529755592346191,
      "step": 1531
    },
    {
      "epoch": 9.344482421875e-06,
      "step": 1531,
      "training_step_time": 0.3934600353240967
    },
    {
      "epoch": 9.3505859375e-06,
      "model_forward_time": 0.11526656150817871,
      "step": 1532
    },
    {
      "epoch": 9.3505859375e-06,
      "step": 1532,
      "training_step_time": 0.3918490409851074
    },
    {
      "epoch": 9.356689453125e-06,
      "model_forward_time": 0.11616992950439453,
      "step": 1533
    },
    {
      "epoch": 9.356689453125e-06,
      "step": 1533,
      "training_step_time": 0.3906378746032715
    },
    {
      "epoch": 9.36279296875e-06,
      "model_forward_time": 0.11560964584350586,
      "step": 1534
    },
    {
      "epoch": 9.36279296875e-06,
      "step": 1534,
      "training_step_time": 0.3934452533721924
    },
    {
      "epoch": 9.368896484375e-06,
      "model_forward_time": 0.11535763740539551,
      "step": 1535
    },
    {
      "epoch": 9.368896484375e-06,
      "step": 1535,
      "training_step_time": 0.404177188873291
    },
    {
      "epoch": 9.375e-06,
      "model_forward_time": 0.11541914939880371,
      "step": 1536
    },
    {
      "epoch": 9.375e-06,
      "step": 1536,
      "training_step_time": 0.865502119064331
    },
    {
      "epoch": 9.381103515625e-06,
      "model_forward_time": 0.11467361450195312,
      "step": 1537
    },
    {
      "epoch": 9.381103515625e-06,
      "step": 1537,
      "training_step_time": 0.39703965187072754
    },
    {
      "epoch": 9.38720703125e-06,
      "model_forward_time": 0.1146097183227539,
      "step": 1538
    },
    {
      "epoch": 9.38720703125e-06,
      "step": 1538,
      "training_step_time": 0.49009037017822266
    },
    {
      "epoch": 9.393310546875e-06,
      "model_forward_time": 0.11402273178100586,
      "step": 1539
    },
    {
      "epoch": 9.393310546875e-06,
      "step": 1539,
      "training_step_time": 0.4422333240509033
    },
    {
      "epoch": 9.3994140625e-06,
      "grad_norm": 0.859408438205719,
      "learning_rate": 5.133333333333333e-05,
      "loss": 0.1657,
      "step": 1540
    },
    {
      "epoch": 9.3994140625e-06,
      "model_forward_time": 0.11387753486633301,
      "step": 1540
    },
    {
      "epoch": 9.3994140625e-06,
      "step": 1540,
      "training_step_time": 0.4174633026123047
    },
    {
      "epoch": 9.405517578125e-06,
      "model_forward_time": 0.11417412757873535,
      "step": 1541
    },
    {
      "epoch": 9.405517578125e-06,
      "step": 1541,
      "training_step_time": 0.40361714363098145
    },
    {
      "epoch": 9.41162109375e-06,
      "model_forward_time": 0.1146707534790039,
      "step": 1542
    },
    {
      "epoch": 9.41162109375e-06,
      "step": 1542,
      "training_step_time": 0.59317946434021
    },
    {
      "epoch": 9.417724609375e-06,
      "model_forward_time": 0.11555290222167969,
      "step": 1543
    },
    {
      "epoch": 9.417724609375e-06,
      "step": 1543,
      "training_step_time": 0.3946549892425537
    },
    {
      "epoch": 9.423828125e-06,
      "model_forward_time": 0.1145787239074707,
      "step": 1544
    },
    {
      "epoch": 9.423828125e-06,
      "step": 1544,
      "training_step_time": 0.3960142135620117
    },
    {
      "epoch": 9.429931640625e-06,
      "model_forward_time": 0.11526942253112793,
      "step": 1545
    },
    {
      "epoch": 9.429931640625e-06,
      "step": 1545,
      "training_step_time": 0.391956090927124
    },
    {
      "epoch": 9.43603515625e-06,
      "model_forward_time": 0.1152033805847168,
      "step": 1546
    },
    {
      "epoch": 9.43603515625e-06,
      "step": 1546,
      "training_step_time": 0.38898563385009766
    },
    {
      "epoch": 9.442138671875e-06,
      "model_forward_time": 0.11514711380004883,
      "step": 1547
    },
    {
      "epoch": 9.442138671875e-06,
      "step": 1547,
      "training_step_time": 0.41428637504577637
    },
    {
      "epoch": 9.4482421875e-06,
      "model_forward_time": 0.11507606506347656,
      "step": 1548
    },
    {
      "epoch": 9.4482421875e-06,
      "step": 1548,
      "training_step_time": 0.5363538265228271
    },
    {
      "epoch": 9.454345703125e-06,
      "model_forward_time": 0.1151731014251709,
      "step": 1549
    },
    {
      "epoch": 9.454345703125e-06,
      "step": 1549,
      "training_step_time": 0.39575648307800293
    },
    {
      "epoch": 9.46044921875e-06,
      "grad_norm": 0.9897884726524353,
      "learning_rate": 5.166666666666667e-05,
      "loss": 0.1623,
      "step": 1550
    },
    {
      "epoch": 9.46044921875e-06,
      "model_forward_time": 0.11562061309814453,
      "step": 1550
    },
    {
      "epoch": 9.46044921875e-06,
      "step": 1550,
      "training_step_time": 0.3917231559753418
    },
    {
      "epoch": 9.466552734375e-06,
      "model_forward_time": 0.12639474868774414,
      "step": 1551
    },
    {
      "epoch": 9.466552734375e-06,
      "step": 1551,
      "training_step_time": 0.3912320137023926
    },
    {
      "epoch": 9.47265625e-06,
      "model_forward_time": 0.1158452033996582,
      "step": 1552
    },
    {
      "epoch": 9.47265625e-06,
      "step": 1552,
      "training_step_time": 0.5119509696960449
    },
    {
      "epoch": 9.478759765625e-06,
      "model_forward_time": 0.11631464958190918,
      "step": 1553
    },
    {
      "epoch": 9.478759765625e-06,
      "step": 1553,
      "training_step_time": 0.48965024948120117
    },
    {
      "epoch": 9.48486328125e-06,
      "model_forward_time": 0.11519217491149902,
      "step": 1554
    },
    {
      "epoch": 9.48486328125e-06,
      "step": 1554,
      "training_step_time": 0.6980812549591064
    },
    {
      "epoch": 9.490966796875e-06,
      "model_forward_time": 0.11474180221557617,
      "step": 1555
    },
    {
      "epoch": 9.490966796875e-06,
      "step": 1555,
      "training_step_time": 0.5074498653411865
    },
    {
      "epoch": 9.4970703125e-06,
      "model_forward_time": 0.11513662338256836,
      "step": 1556
    },
    {
      "epoch": 9.4970703125e-06,
      "step": 1556,
      "training_step_time": 0.43369007110595703
    },
    {
      "epoch": 9.503173828125e-06,
      "model_forward_time": 0.11500692367553711,
      "step": 1557
    },
    {
      "epoch": 9.503173828125e-06,
      "step": 1557,
      "training_step_time": 0.39340639114379883
    },
    {
      "epoch": 9.50927734375e-06,
      "model_forward_time": 0.11530065536499023,
      "step": 1558
    },
    {
      "epoch": 9.50927734375e-06,
      "step": 1558,
      "training_step_time": 0.37912940979003906
    },
    {
      "epoch": 9.515380859375e-06,
      "model_forward_time": 0.11465597152709961,
      "step": 1559
    },
    {
      "epoch": 9.515380859375e-06,
      "step": 1559,
      "training_step_time": 0.415255069732666
    },
    {
      "epoch": 9.521484375e-06,
      "grad_norm": 0.8952525854110718,
      "learning_rate": 5.2000000000000004e-05,
      "loss": 0.1596,
      "step": 1560
    },
    {
      "epoch": 9.521484375e-06,
      "model_forward_time": 0.11514019966125488,
      "step": 1560
    },
    {
      "epoch": 9.521484375e-06,
      "step": 1560,
      "training_step_time": 0.6236681938171387
    },
    {
      "epoch": 9.527587890625e-06,
      "model_forward_time": 0.11614370346069336,
      "step": 1561
    },
    {
      "epoch": 9.527587890625e-06,
      "step": 1561,
      "training_step_time": 0.3938412666320801
    },
    {
      "epoch": 9.53369140625e-06,
      "model_forward_time": 0.11538982391357422,
      "step": 1562
    },
    {
      "epoch": 9.53369140625e-06,
      "step": 1562,
      "training_step_time": 0.39008426666259766
    },
    {
      "epoch": 9.539794921875e-06,
      "model_forward_time": 0.1151432991027832,
      "step": 1563
    },
    {
      "epoch": 9.539794921875e-06,
      "step": 1563,
      "training_step_time": 0.39394426345825195
    },
    {
      "epoch": 9.5458984375e-06,
      "model_forward_time": 0.11579561233520508,
      "step": 1564
    },
    {
      "epoch": 9.5458984375e-06,
      "step": 1564,
      "training_step_time": 0.39343762397766113
    },
    {
      "epoch": 9.552001953125e-06,
      "model_forward_time": 0.12123680114746094,
      "step": 1565
    },
    {
      "epoch": 9.552001953125e-06,
      "step": 1565,
      "training_step_time": 0.40604233741760254
    },
    {
      "epoch": 9.55810546875e-06,
      "model_forward_time": 0.11681890487670898,
      "step": 1566
    },
    {
      "epoch": 9.55810546875e-06,
      "step": 1566,
      "training_step_time": 0.8510773181915283
    },
    {
      "epoch": 9.564208984375e-06,
      "model_forward_time": 0.11521577835083008,
      "step": 1567
    },
    {
      "epoch": 9.564208984375e-06,
      "step": 1567,
      "training_step_time": 0.49648427963256836
    },
    {
      "epoch": 9.5703125e-06,
      "model_forward_time": 0.1157982349395752,
      "step": 1568
    },
    {
      "epoch": 9.5703125e-06,
      "step": 1568,
      "training_step_time": 0.3878004550933838
    },
    {
      "epoch": 9.576416015625e-06,
      "model_forward_time": 0.11443686485290527,
      "step": 1569
    },
    {
      "epoch": 9.576416015625e-06,
      "step": 1569,
      "training_step_time": 0.4238710403442383
    },
    {
      "epoch": 9.58251953125e-06,
      "grad_norm": 0.8370407819747925,
      "learning_rate": 5.2333333333333336e-05,
      "loss": 0.16,
      "step": 1570
    },
    {
      "epoch": 9.58251953125e-06,
      "model_forward_time": 0.11560630798339844,
      "step": 1570
    },
    {
      "epoch": 9.58251953125e-06,
      "step": 1570,
      "training_step_time": 0.4632246494293213
    },
    {
      "epoch": 9.588623046875e-06,
      "model_forward_time": 0.11491608619689941,
      "step": 1571
    },
    {
      "epoch": 9.588623046875e-06,
      "step": 1571,
      "training_step_time": 0.4080033302307129
    },
    {
      "epoch": 9.5947265625e-06,
      "model_forward_time": 0.11620283126831055,
      "step": 1572
    },
    {
      "epoch": 9.5947265625e-06,
      "step": 1572,
      "training_step_time": 0.6541023254394531
    },
    {
      "epoch": 9.600830078125e-06,
      "model_forward_time": 0.11468005180358887,
      "step": 1573
    },
    {
      "epoch": 9.600830078125e-06,
      "step": 1573,
      "training_step_time": 0.39123964309692383
    },
    {
      "epoch": 9.60693359375e-06,
      "model_forward_time": 0.11559295654296875,
      "step": 1574
    },
    {
      "epoch": 9.60693359375e-06,
      "step": 1574,
      "training_step_time": 0.37993741035461426
    },
    {
      "epoch": 9.613037109375e-06,
      "model_forward_time": 0.1151587963104248,
      "step": 1575
    },
    {
      "epoch": 9.613037109375e-06,
      "step": 1575,
      "training_step_time": 0.3793783187866211
    },
    {
      "epoch": 9.619140625e-06,
      "model_forward_time": 0.11598467826843262,
      "step": 1576
    },
    {
      "epoch": 9.619140625e-06,
      "step": 1576,
      "training_step_time": 0.37630271911621094
    },
    {
      "epoch": 9.625244140625e-06,
      "model_forward_time": 0.11546611785888672,
      "step": 1577
    },
    {
      "epoch": 9.625244140625e-06,
      "step": 1577,
      "training_step_time": 0.3945157527923584
    },
    {
      "epoch": 9.63134765625e-06,
      "model_forward_time": 0.1161203384399414,
      "step": 1578
    },
    {
      "epoch": 9.63134765625e-06,
      "step": 1578,
      "training_step_time": 0.7463135719299316
    },
    {
      "epoch": 9.637451171875e-06,
      "model_forward_time": 0.11507225036621094,
      "step": 1579
    },
    {
      "epoch": 9.637451171875e-06,
      "step": 1579,
      "training_step_time": 0.4499497413635254
    },
    {
      "epoch": 9.6435546875e-06,
      "grad_norm": 0.6783791780471802,
      "learning_rate": 5.266666666666666e-05,
      "loss": 0.1525,
      "step": 1580
    },
    {
      "epoch": 9.6435546875e-06,
      "model_forward_time": 0.11609172821044922,
      "step": 1580
    },
    {
      "epoch": 9.6435546875e-06,
      "step": 1580,
      "training_step_time": 0.4806520938873291
    },
    {
      "epoch": 9.649658203125e-06,
      "model_forward_time": 0.11466407775878906,
      "step": 1581
    },
    {
      "epoch": 9.649658203125e-06,
      "step": 1581,
      "training_step_time": 0.3851139545440674
    },
    {
      "epoch": 9.65576171875e-06,
      "model_forward_time": 0.11454963684082031,
      "step": 1582
    },
    {
      "epoch": 9.65576171875e-06,
      "step": 1582,
      "training_step_time": 0.36717700958251953
    },
    {
      "epoch": 9.661865234375e-06,
      "model_forward_time": 0.1146697998046875,
      "step": 1583
    },
    {
      "epoch": 9.661865234375e-06,
      "step": 1583,
      "training_step_time": 0.46657633781433105
    },
    {
      "epoch": 9.66796875e-06,
      "model_forward_time": 0.11427879333496094,
      "step": 1584
    },
    {
      "epoch": 9.66796875e-06,
      "step": 1584,
      "training_step_time": 0.48915886878967285
    },
    {
      "epoch": 9.674072265625e-06,
      "model_forward_time": 0.11501383781433105,
      "step": 1585
    },
    {
      "epoch": 9.674072265625e-06,
      "step": 1585,
      "training_step_time": 0.42224860191345215
    },
    {
      "epoch": 9.68017578125e-06,
      "model_forward_time": 0.11571907997131348,
      "step": 1586
    },
    {
      "epoch": 9.68017578125e-06,
      "step": 1586,
      "training_step_time": 0.3716123104095459
    },
    {
      "epoch": 9.686279296875e-06,
      "model_forward_time": 0.11587691307067871,
      "step": 1587
    },
    {
      "epoch": 9.686279296875e-06,
      "step": 1587,
      "training_step_time": 0.3945033550262451
    },
    {
      "epoch": 9.6923828125e-06,
      "model_forward_time": 0.11514091491699219,
      "step": 1588
    },
    {
      "epoch": 9.6923828125e-06,
      "step": 1588,
      "training_step_time": 0.40181684494018555
    },
    {
      "epoch": 9.698486328125e-06,
      "model_forward_time": 0.11648845672607422,
      "step": 1589
    },
    {
      "epoch": 9.698486328125e-06,
      "step": 1589,
      "training_step_time": 0.40695714950561523
    },
    {
      "epoch": 9.70458984375e-06,
      "grad_norm": 0.5877509117126465,
      "learning_rate": 5.300000000000001e-05,
      "loss": 0.156,
      "step": 1590
    },
    {
      "epoch": 9.70458984375e-06,
      "model_forward_time": 0.11558938026428223,
      "step": 1590
    },
    {
      "epoch": 9.70458984375e-06,
      "step": 1590,
      "training_step_time": 0.40425992012023926
    },
    {
      "epoch": 9.710693359375e-06,
      "model_forward_time": 0.11524033546447754,
      "step": 1591
    },
    {
      "epoch": 9.710693359375e-06,
      "step": 1591,
      "training_step_time": 0.3977193832397461
    },
    {
      "epoch": 9.716796875e-06,
      "model_forward_time": 0.11782526969909668,
      "step": 1592
    },
    {
      "epoch": 9.716796875e-06,
      "step": 1592,
      "training_step_time": 0.4315526485443115
    },
    {
      "epoch": 9.722900390625e-06,
      "model_forward_time": 0.11523318290710449,
      "step": 1593
    },
    {
      "epoch": 9.722900390625e-06,
      "step": 1593,
      "training_step_time": 0.49390697479248047
    },
    {
      "epoch": 9.72900390625e-06,
      "model_forward_time": 0.11640238761901855,
      "step": 1594
    },
    {
      "epoch": 9.72900390625e-06,
      "step": 1594,
      "training_step_time": 0.4702725410461426
    },
    {
      "epoch": 9.735107421875e-06,
      "model_forward_time": 0.11494755744934082,
      "step": 1595
    },
    {
      "epoch": 9.735107421875e-06,
      "step": 1595,
      "training_step_time": 0.41637277603149414
    },
    {
      "epoch": 9.7412109375e-06,
      "model_forward_time": 0.11505675315856934,
      "step": 1596
    },
    {
      "epoch": 9.7412109375e-06,
      "step": 1596,
      "training_step_time": 0.5673575401306152
    },
    {
      "epoch": 9.747314453125e-06,
      "model_forward_time": 0.11463689804077148,
      "step": 1597
    },
    {
      "epoch": 9.747314453125e-06,
      "step": 1597,
      "training_step_time": 0.48801565170288086
    },
    {
      "epoch": 9.75341796875e-06,
      "model_forward_time": 0.11537027359008789,
      "step": 1598
    },
    {
      "epoch": 9.75341796875e-06,
      "step": 1598,
      "training_step_time": 0.49333858489990234
    },
    {
      "epoch": 9.759521484375e-06,
      "model_forward_time": 0.11480998992919922,
      "step": 1599
    },
    {
      "epoch": 9.759521484375e-06,
      "step": 1599,
      "training_step_time": 0.3897116184234619
    },
    {
      "epoch": 9.765625e-06,
      "grad_norm": 0.542512834072113,
      "learning_rate": 5.333333333333333e-05,
      "loss": 0.1551,
      "step": 1600
    },
    {
      "epoch": 9.765625e-06,
      "model_forward_time": 0.11446022987365723,
      "step": 1600
    },
    {
      "epoch": 9.765625e-06,
      "step": 1600,
      "training_step_time": 0.3903219699859619
    },
    {
      "epoch": 9.771728515625e-06,
      "model_forward_time": 0.1147317886352539,
      "step": 1601
    },
    {
      "epoch": 9.771728515625e-06,
      "step": 1601,
      "training_step_time": 0.38817381858825684
    },
    {
      "epoch": 9.77783203125e-06,
      "model_forward_time": 0.1152043342590332,
      "step": 1602
    },
    {
      "epoch": 9.77783203125e-06,
      "step": 1602,
      "training_step_time": 0.5874900817871094
    },
    {
      "epoch": 9.783935546875e-06,
      "model_forward_time": 0.11467623710632324,
      "step": 1603
    },
    {
      "epoch": 9.783935546875e-06,
      "step": 1603,
      "training_step_time": 0.3913383483886719
    },
    {
      "epoch": 9.7900390625e-06,
      "model_forward_time": 0.11702227592468262,
      "step": 1604
    },
    {
      "epoch": 9.7900390625e-06,
      "step": 1604,
      "training_step_time": 0.3795318603515625
    },
    {
      "epoch": 9.796142578125e-06,
      "model_forward_time": 0.11536049842834473,
      "step": 1605
    },
    {
      "epoch": 9.796142578125e-06,
      "step": 1605,
      "training_step_time": 0.39008116722106934
    },
    {
      "epoch": 9.80224609375e-06,
      "model_forward_time": 0.11711239814758301,
      "step": 1606
    },
    {
      "epoch": 9.80224609375e-06,
      "step": 1606,
      "training_step_time": 0.41993045806884766
    },
    {
      "epoch": 9.808349609375e-06,
      "model_forward_time": 0.11684179306030273,
      "step": 1607
    },
    {
      "epoch": 9.808349609375e-06,
      "step": 1607,
      "training_step_time": 0.49219489097595215
    },
    {
      "epoch": 9.814453125e-06,
      "model_forward_time": 0.11598658561706543,
      "step": 1608
    },
    {
      "epoch": 9.814453125e-06,
      "step": 1608,
      "training_step_time": 0.6437854766845703
    },
    {
      "epoch": 9.820556640625e-06,
      "model_forward_time": 0.11497378349304199,
      "step": 1609
    },
    {
      "epoch": 9.820556640625e-06,
      "step": 1609,
      "training_step_time": 0.4435851573944092
    },
    {
      "epoch": 9.82666015625e-06,
      "grad_norm": 0.8453152775764465,
      "learning_rate": 5.3666666666666666e-05,
      "loss": 0.1471,
      "step": 1610
    },
    {
      "epoch": 9.82666015625e-06,
      "model_forward_time": 0.11579275131225586,
      "step": 1610
    },
    {
      "epoch": 9.82666015625e-06,
      "step": 1610,
      "training_step_time": 0.4089663028717041
    },
    {
      "epoch": 9.832763671875e-06,
      "model_forward_time": 0.11526107788085938,
      "step": 1611
    },
    {
      "epoch": 9.832763671875e-06,
      "step": 1611,
      "training_step_time": 0.47849535942077637
    },
    {
      "epoch": 9.8388671875e-06,
      "model_forward_time": 0.11537837982177734,
      "step": 1612
    },
    {
      "epoch": 9.8388671875e-06,
      "step": 1612,
      "training_step_time": 0.5082056522369385
    },
    {
      "epoch": 9.844970703125e-06,
      "model_forward_time": 0.11440038681030273,
      "step": 1613
    },
    {
      "epoch": 9.844970703125e-06,
      "step": 1613,
      "training_step_time": 0.3814406394958496
    },
    {
      "epoch": 9.85107421875e-06,
      "model_forward_time": 0.11573338508605957,
      "step": 1614
    },
    {
      "epoch": 9.85107421875e-06,
      "step": 1614,
      "training_step_time": 0.4092228412628174
    },
    {
      "epoch": 9.857177734375e-06,
      "model_forward_time": 0.11509895324707031,
      "step": 1615
    },
    {
      "epoch": 9.857177734375e-06,
      "step": 1615,
      "training_step_time": 0.39578914642333984
    },
    {
      "epoch": 9.86328125e-06,
      "model_forward_time": 0.11712408065795898,
      "step": 1616
    },
    {
      "epoch": 9.86328125e-06,
      "step": 1616,
      "training_step_time": 0.3785278797149658
    },
    {
      "epoch": 9.869384765625e-06,
      "model_forward_time": 0.11495447158813477,
      "step": 1617
    },
    {
      "epoch": 9.869384765625e-06,
      "step": 1617,
      "training_step_time": 0.39947032928466797
    },
    {
      "epoch": 9.87548828125e-06,
      "model_forward_time": 0.11515402793884277,
      "step": 1618
    },
    {
      "epoch": 9.87548828125e-06,
      "step": 1618,
      "training_step_time": 0.3859732151031494
    },
    {
      "epoch": 9.881591796875e-06,
      "model_forward_time": 0.11525368690490723,
      "step": 1619
    },
    {
      "epoch": 9.881591796875e-06,
      "step": 1619,
      "training_step_time": 0.3991978168487549
    },
    {
      "epoch": 9.8876953125e-06,
      "grad_norm": 0.4816384017467499,
      "learning_rate": 5.4000000000000005e-05,
      "loss": 0.1453,
      "step": 1620
    },
    {
      "epoch": 9.8876953125e-06,
      "model_forward_time": 0.11591672897338867,
      "step": 1620
    },
    {
      "epoch": 9.8876953125e-06,
      "step": 1620,
      "training_step_time": 0.918682336807251
    },
    {
      "epoch": 9.893798828125e-06,
      "model_forward_time": 0.11475181579589844,
      "step": 1621
    },
    {
      "epoch": 9.893798828125e-06,
      "step": 1621,
      "training_step_time": 0.4345357418060303
    },
    {
      "epoch": 9.89990234375e-06,
      "model_forward_time": 0.1145627498626709,
      "step": 1622
    },
    {
      "epoch": 9.89990234375e-06,
      "step": 1622,
      "training_step_time": 0.4670979976654053
    },
    {
      "epoch": 9.906005859375e-06,
      "model_forward_time": 0.11889219284057617,
      "step": 1623
    },
    {
      "epoch": 9.906005859375e-06,
      "step": 1623,
      "training_step_time": 0.40383243560791016
    },
    {
      "epoch": 9.912109375e-06,
      "model_forward_time": 0.11447453498840332,
      "step": 1624
    },
    {
      "epoch": 9.912109375e-06,
      "step": 1624,
      "training_step_time": 0.38701891899108887
    },
    {
      "epoch": 9.918212890625e-06,
      "model_forward_time": 0.1152195930480957,
      "step": 1625
    },
    {
      "epoch": 9.918212890625e-06,
      "step": 1625,
      "training_step_time": 0.45748114585876465
    },
    {
      "epoch": 9.92431640625e-06,
      "model_forward_time": 0.11536002159118652,
      "step": 1626
    },
    {
      "epoch": 9.92431640625e-06,
      "step": 1626,
      "training_step_time": 0.5877077579498291
    },
    {
      "epoch": 9.930419921875e-06,
      "model_forward_time": 0.11469221115112305,
      "step": 1627
    },
    {
      "epoch": 9.930419921875e-06,
      "step": 1627,
      "training_step_time": 0.3868122100830078
    },
    {
      "epoch": 9.9365234375e-06,
      "model_forward_time": 0.11465191841125488,
      "step": 1628
    },
    {
      "epoch": 9.9365234375e-06,
      "step": 1628,
      "training_step_time": 0.39994120597839355
    },
    {
      "epoch": 9.942626953125e-06,
      "model_forward_time": 0.11452460289001465,
      "step": 1629
    },
    {
      "epoch": 9.942626953125e-06,
      "step": 1629,
      "training_step_time": 0.4103584289550781
    },
    {
      "epoch": 9.94873046875e-06,
      "grad_norm": 0.7247220277786255,
      "learning_rate": 5.433333333333334e-05,
      "loss": 0.152,
      "step": 1630
    },
    {
      "epoch": 9.94873046875e-06,
      "model_forward_time": 0.11474180221557617,
      "step": 1630
    },
    {
      "epoch": 9.94873046875e-06,
      "step": 1630,
      "training_step_time": 0.39876556396484375
    },
    {
      "epoch": 9.954833984375e-06,
      "model_forward_time": 0.11495590209960938,
      "step": 1631
    },
    {
      "epoch": 9.954833984375e-06,
      "step": 1631,
      "training_step_time": 0.39369821548461914
    },
    {
      "epoch": 9.9609375e-06,
      "model_forward_time": 0.1147458553314209,
      "step": 1632
    },
    {
      "epoch": 9.9609375e-06,
      "step": 1632,
      "training_step_time": 0.8165853023529053
    },
    {
      "epoch": 9.967041015625e-06,
      "model_forward_time": 0.11437511444091797,
      "step": 1633
    },
    {
      "epoch": 9.967041015625e-06,
      "step": 1633,
      "training_step_time": 0.45563578605651855
    },
    {
      "epoch": 9.97314453125e-06,
      "model_forward_time": 0.11390948295593262,
      "step": 1634
    },
    {
      "epoch": 9.97314453125e-06,
      "step": 1634,
      "training_step_time": 0.4410274028778076
    },
    {
      "epoch": 9.979248046875e-06,
      "model_forward_time": 0.11466431617736816,
      "step": 1635
    },
    {
      "epoch": 9.979248046875e-06,
      "step": 1635,
      "training_step_time": 0.3985402584075928
    },
    {
      "epoch": 9.9853515625e-06,
      "model_forward_time": 0.11518454551696777,
      "step": 1636
    },
    {
      "epoch": 9.9853515625e-06,
      "step": 1636,
      "training_step_time": 0.47106075286865234
    },
    {
      "epoch": 9.991455078125e-06,
      "model_forward_time": 0.11477994918823242,
      "step": 1637
    },
    {
      "epoch": 9.991455078125e-06,
      "step": 1637,
      "training_step_time": 0.36962175369262695
    },
    {
      "epoch": 9.99755859375e-06,
      "model_forward_time": 0.11563301086425781,
      "step": 1638
    },
    {
      "epoch": 9.99755859375e-06,
      "step": 1638,
      "training_step_time": 0.4772617816925049
    },
    {
      "epoch": 1.0003662109375e-05,
      "model_forward_time": 0.11568760871887207,
      "step": 1639
    },
    {
      "epoch": 1.0003662109375e-05,
      "step": 1639,
      "training_step_time": 0.5081350803375244
    },
    {
      "epoch": 1.0009765625e-05,
      "grad_norm": 0.575721025466919,
      "learning_rate": 5.466666666666666e-05,
      "loss": 0.1594,
      "step": 1640
    },
    {
      "epoch": 1.0009765625e-05,
      "model_forward_time": 0.11483311653137207,
      "step": 1640
    },
    {
      "epoch": 1.0009765625e-05,
      "step": 1640,
      "training_step_time": 0.39552736282348633
    },
    {
      "epoch": 1.0015869140625e-05,
      "model_forward_time": 0.11503124237060547,
      "step": 1641
    },
    {
      "epoch": 1.0015869140625e-05,
      "step": 1641,
      "training_step_time": 0.39690208435058594
    },
    {
      "epoch": 1.002197265625e-05,
      "model_forward_time": 0.11558008193969727,
      "step": 1642
    },
    {
      "epoch": 1.002197265625e-05,
      "step": 1642,
      "training_step_time": 0.3845522403717041
    },
    {
      "epoch": 1.0028076171875e-05,
      "model_forward_time": 0.11575078964233398,
      "step": 1643
    },
    {
      "epoch": 1.0028076171875e-05,
      "step": 1643,
      "training_step_time": 0.3930983543395996
    },
    {
      "epoch": 1.00341796875e-05,
      "model_forward_time": 0.11507415771484375,
      "step": 1644
    },
    {
      "epoch": 1.00341796875e-05,
      "step": 1644,
      "training_step_time": 0.46282458305358887
    },
    {
      "epoch": 1.0040283203125e-05,
      "model_forward_time": 0.11540555953979492,
      "step": 1645
    },
    {
      "epoch": 1.0040283203125e-05,
      "step": 1645,
      "training_step_time": 0.39373350143432617
    },
    {
      "epoch": 1.004638671875e-05,
      "model_forward_time": 0.11613607406616211,
      "step": 1646
    },
    {
      "epoch": 1.004638671875e-05,
      "step": 1646,
      "training_step_time": 0.39322638511657715
    },
    {
      "epoch": 1.0052490234375e-05,
      "model_forward_time": 0.1157844066619873,
      "step": 1647
    },
    {
      "epoch": 1.0052490234375e-05,
      "step": 1647,
      "training_step_time": 0.40816354751586914
    },
    {
      "epoch": 1.005859375e-05,
      "model_forward_time": 0.11542677879333496,
      "step": 1648
    },
    {
      "epoch": 1.005859375e-05,
      "step": 1648,
      "training_step_time": 0.44310879707336426
    },
    {
      "epoch": 1.0064697265625e-05,
      "model_forward_time": 0.1154015064239502,
      "step": 1649
    },
    {
      "epoch": 1.0064697265625e-05,
      "step": 1649,
      "training_step_time": 0.47403669357299805
    },
    {
      "epoch": 1.007080078125e-05,
      "grad_norm": 0.3702383041381836,
      "learning_rate": 5.500000000000001e-05,
      "loss": 0.1647,
      "step": 1650
    },
    {
      "epoch": 1.007080078125e-05,
      "model_forward_time": 0.11527061462402344,
      "step": 1650
    },
    {
      "epoch": 1.007080078125e-05,
      "step": 1650,
      "training_step_time": 0.5304281711578369
    },
    {
      "epoch": 1.0076904296875e-05,
      "model_forward_time": 0.1149909496307373,
      "step": 1651
    },
    {
      "epoch": 1.0076904296875e-05,
      "step": 1651,
      "training_step_time": 0.37003564834594727
    },
    {
      "epoch": 1.00830078125e-05,
      "model_forward_time": 0.1147928237915039,
      "step": 1652
    },
    {
      "epoch": 1.00830078125e-05,
      "step": 1652,
      "training_step_time": 0.4328281879425049
    },
    {
      "epoch": 1.0089111328125e-05,
      "model_forward_time": 0.11610054969787598,
      "step": 1653
    },
    {
      "epoch": 1.0089111328125e-05,
      "step": 1653,
      "training_step_time": 0.4638948440551758
    },
    {
      "epoch": 1.009521484375e-05,
      "model_forward_time": 0.11534810066223145,
      "step": 1654
    },
    {
      "epoch": 1.009521484375e-05,
      "step": 1654,
      "training_step_time": 0.3870851993560791
    },
    {
      "epoch": 1.0101318359375e-05,
      "model_forward_time": 0.11412382125854492,
      "step": 1655
    },
    {
      "epoch": 1.0101318359375e-05,
      "step": 1655,
      "training_step_time": 0.3912036418914795
    },
    {
      "epoch": 1.0107421875e-05,
      "model_forward_time": 0.11543822288513184,
      "step": 1656
    },
    {
      "epoch": 1.0107421875e-05,
      "step": 1656,
      "training_step_time": 0.4041774272918701
    },
    {
      "epoch": 1.0113525390625e-05,
      "model_forward_time": 0.11496186256408691,
      "step": 1657
    },
    {
      "epoch": 1.0113525390625e-05,
      "step": 1657,
      "training_step_time": 0.3852198123931885
    },
    {
      "epoch": 1.011962890625e-05,
      "model_forward_time": 0.11593389511108398,
      "step": 1658
    },
    {
      "epoch": 1.011962890625e-05,
      "step": 1658,
      "training_step_time": 0.3818497657775879
    },
    {
      "epoch": 1.0125732421875e-05,
      "model_forward_time": 0.1150979995727539,
      "step": 1659
    },
    {
      "epoch": 1.0125732421875e-05,
      "step": 1659,
      "training_step_time": 0.38900327682495117
    },
    {
      "epoch": 1.01318359375e-05,
      "grad_norm": 0.5039880275726318,
      "learning_rate": 5.5333333333333334e-05,
      "loss": 0.1404,
      "step": 1660
    },
    {
      "epoch": 1.01318359375e-05,
      "model_forward_time": 0.11480045318603516,
      "step": 1660
    },
    {
      "epoch": 1.01318359375e-05,
      "step": 1660,
      "training_step_time": 0.39037251472473145
    },
    {
      "epoch": 1.0137939453125e-05,
      "model_forward_time": 0.11634039878845215,
      "step": 1661
    },
    {
      "epoch": 1.0137939453125e-05,
      "step": 1661,
      "training_step_time": 0.4313337802886963
    },
    {
      "epoch": 1.014404296875e-05,
      "model_forward_time": 0.1157076358795166,
      "step": 1662
    },
    {
      "epoch": 1.014404296875e-05,
      "step": 1662,
      "training_step_time": 0.4820899963378906
    },
    {
      "epoch": 1.0150146484375e-05,
      "model_forward_time": 0.11475491523742676,
      "step": 1663
    },
    {
      "epoch": 1.0150146484375e-05,
      "step": 1663,
      "training_step_time": 0.5005357265472412
    },
    {
      "epoch": 1.015625e-05,
      "model_forward_time": 0.11511516571044922,
      "step": 1664
    },
    {
      "epoch": 1.015625e-05,
      "step": 1664,
      "training_step_time": 0.46167659759521484
    },
    {
      "epoch": 1.0162353515625e-05,
      "model_forward_time": 0.11646747589111328,
      "step": 1665
    },
    {
      "epoch": 1.0162353515625e-05,
      "step": 1665,
      "training_step_time": 0.4295339584350586
    },
    {
      "epoch": 1.016845703125e-05,
      "model_forward_time": 0.11545467376708984,
      "step": 1666
    },
    {
      "epoch": 1.016845703125e-05,
      "step": 1666,
      "training_step_time": 0.4010627269744873
    },
    {
      "epoch": 1.0174560546875e-05,
      "model_forward_time": 0.11764907836914062,
      "step": 1667
    },
    {
      "epoch": 1.0174560546875e-05,
      "step": 1667,
      "training_step_time": 0.4073145389556885
    },
    {
      "epoch": 1.01806640625e-05,
      "model_forward_time": 0.11479544639587402,
      "step": 1668
    },
    {
      "epoch": 1.01806640625e-05,
      "step": 1668,
      "training_step_time": 0.4672567844390869
    },
    {
      "epoch": 1.0186767578125e-05,
      "model_forward_time": 0.11538052558898926,
      "step": 1669
    },
    {
      "epoch": 1.0186767578125e-05,
      "step": 1669,
      "training_step_time": 0.3926351070404053
    },
    {
      "epoch": 1.019287109375e-05,
      "grad_norm": 0.6142540574073792,
      "learning_rate": 5.566666666666667e-05,
      "loss": 0.1534,
      "step": 1670
    },
    {
      "epoch": 1.019287109375e-05,
      "model_forward_time": 0.11556792259216309,
      "step": 1670
    },
    {
      "epoch": 1.019287109375e-05,
      "step": 1670,
      "training_step_time": 0.40210747718811035
    },
    {
      "epoch": 1.0198974609375e-05,
      "model_forward_time": 0.11557292938232422,
      "step": 1671
    },
    {
      "epoch": 1.0198974609375e-05,
      "step": 1671,
      "training_step_time": 0.39672279357910156
    },
    {
      "epoch": 1.0205078125e-05,
      "model_forward_time": 0.1162867546081543,
      "step": 1672
    },
    {
      "epoch": 1.0205078125e-05,
      "step": 1672,
      "training_step_time": 0.40140724182128906
    },
    {
      "epoch": 1.0211181640625e-05,
      "model_forward_time": 0.11639642715454102,
      "step": 1673
    },
    {
      "epoch": 1.0211181640625e-05,
      "step": 1673,
      "training_step_time": 0.40097689628601074
    },
    {
      "epoch": 1.021728515625e-05,
      "model_forward_time": 0.11528730392456055,
      "step": 1674
    },
    {
      "epoch": 1.021728515625e-05,
      "step": 1674,
      "training_step_time": 0.46393275260925293
    },
    {
      "epoch": 1.0223388671875e-05,
      "model_forward_time": 0.11568641662597656,
      "step": 1675
    },
    {
      "epoch": 1.0223388671875e-05,
      "step": 1675,
      "training_step_time": 0.39868593215942383
    },
    {
      "epoch": 1.02294921875e-05,
      "model_forward_time": 0.11556506156921387,
      "step": 1676
    },
    {
      "epoch": 1.02294921875e-05,
      "step": 1676,
      "training_step_time": 0.4829823970794678
    },
    {
      "epoch": 1.0235595703125e-05,
      "model_forward_time": 0.11505746841430664,
      "step": 1677
    },
    {
      "epoch": 1.0235595703125e-05,
      "step": 1677,
      "training_step_time": 0.4420797824859619
    },
    {
      "epoch": 1.024169921875e-05,
      "model_forward_time": 0.1149744987487793,
      "step": 1678
    },
    {
      "epoch": 1.024169921875e-05,
      "step": 1678,
      "training_step_time": 0.44224071502685547
    },
    {
      "epoch": 1.0247802734375e-05,
      "model_forward_time": 0.1167752742767334,
      "step": 1679
    },
    {
      "epoch": 1.0247802734375e-05,
      "step": 1679,
      "training_step_time": 0.3939933776855469
    },
    {
      "epoch": 1.025390625e-05,
      "grad_norm": 0.7021307945251465,
      "learning_rate": 5.6000000000000006e-05,
      "loss": 0.1559,
      "step": 1680
    },
    {
      "epoch": 1.025390625e-05,
      "model_forward_time": 0.11532020568847656,
      "step": 1680
    },
    {
      "epoch": 1.025390625e-05,
      "step": 1680,
      "training_step_time": 0.4936661720275879
    },
    {
      "epoch": 1.0260009765625e-05,
      "model_forward_time": 0.1160879135131836,
      "step": 1681
    },
    {
      "epoch": 1.0260009765625e-05,
      "step": 1681,
      "training_step_time": 0.40401268005371094
    },
    {
      "epoch": 1.026611328125e-05,
      "model_forward_time": 0.11833953857421875,
      "step": 1682
    },
    {
      "epoch": 1.026611328125e-05,
      "step": 1682,
      "training_step_time": 0.47489380836486816
    },
    {
      "epoch": 1.0272216796875e-05,
      "model_forward_time": 0.11999845504760742,
      "step": 1683
    },
    {
      "epoch": 1.0272216796875e-05,
      "step": 1683,
      "training_step_time": 0.4073503017425537
    },
    {
      "epoch": 1.02783203125e-05,
      "model_forward_time": 0.11613869667053223,
      "step": 1684
    },
    {
      "epoch": 1.02783203125e-05,
      "step": 1684,
      "training_step_time": 0.38300633430480957
    },
    {
      "epoch": 1.0284423828125e-05,
      "model_forward_time": 0.11500167846679688,
      "step": 1685
    },
    {
      "epoch": 1.0284423828125e-05,
      "step": 1685,
      "training_step_time": 0.40446996688842773
    },
    {
      "epoch": 1.029052734375e-05,
      "model_forward_time": 0.11542987823486328,
      "step": 1686
    },
    {
      "epoch": 1.029052734375e-05,
      "step": 1686,
      "training_step_time": 0.39396119117736816
    },
    {
      "epoch": 1.0296630859375e-05,
      "model_forward_time": 0.11699461936950684,
      "step": 1687
    },
    {
      "epoch": 1.0296630859375e-05,
      "step": 1687,
      "training_step_time": 0.3950047492980957
    },
    {
      "epoch": 1.0302734375e-05,
      "model_forward_time": 0.1174154281616211,
      "step": 1688
    },
    {
      "epoch": 1.0302734375e-05,
      "step": 1688,
      "training_step_time": 0.4383091926574707
    },
    {
      "epoch": 1.0308837890625e-05,
      "model_forward_time": 0.11675000190734863,
      "step": 1689
    },
    {
      "epoch": 1.0308837890625e-05,
      "step": 1689,
      "training_step_time": 0.4396402835845947
    },
    {
      "epoch": 1.031494140625e-05,
      "grad_norm": 0.7907035946846008,
      "learning_rate": 5.633333333333334e-05,
      "loss": 0.1558,
      "step": 1690
    },
    {
      "epoch": 1.031494140625e-05,
      "model_forward_time": 0.11919593811035156,
      "step": 1690
    },
    {
      "epoch": 1.031494140625e-05,
      "step": 1690,
      "training_step_time": 0.44063806533813477
    },
    {
      "epoch": 1.0321044921875e-05,
      "model_forward_time": 0.11684393882751465,
      "step": 1691
    },
    {
      "epoch": 1.0321044921875e-05,
      "step": 1691,
      "training_step_time": 0.3975822925567627
    },
    {
      "epoch": 1.03271484375e-05,
      "model_forward_time": 0.11542558670043945,
      "step": 1692
    },
    {
      "epoch": 1.03271484375e-05,
      "step": 1692,
      "training_step_time": 0.4021275043487549
    },
    {
      "epoch": 1.0333251953125e-05,
      "model_forward_time": 0.11569547653198242,
      "step": 1693
    },
    {
      "epoch": 1.0333251953125e-05,
      "step": 1693,
      "training_step_time": 0.4008316993713379
    },
    {
      "epoch": 1.033935546875e-05,
      "model_forward_time": 0.1156914234161377,
      "step": 1694
    },
    {
      "epoch": 1.033935546875e-05,
      "step": 1694,
      "training_step_time": 0.4455089569091797
    },
    {
      "epoch": 1.0345458984375e-05,
      "model_forward_time": 0.11679482460021973,
      "step": 1695
    },
    {
      "epoch": 1.0345458984375e-05,
      "step": 1695,
      "training_step_time": 0.40491414070129395
    },
    {
      "epoch": 1.03515625e-05,
      "model_forward_time": 0.11585736274719238,
      "step": 1696
    },
    {
      "epoch": 1.03515625e-05,
      "step": 1696,
      "training_step_time": 0.4699702262878418
    },
    {
      "epoch": 1.0357666015625e-05,
      "model_forward_time": 0.11672663688659668,
      "step": 1697
    },
    {
      "epoch": 1.0357666015625e-05,
      "step": 1697,
      "training_step_time": 0.44263672828674316
    },
    {
      "epoch": 1.036376953125e-05,
      "model_forward_time": 0.11664462089538574,
      "step": 1698
    },
    {
      "epoch": 1.036376953125e-05,
      "step": 1698,
      "training_step_time": 0.4852430820465088
    },
    {
      "epoch": 1.0369873046875e-05,
      "model_forward_time": 0.11534309387207031,
      "step": 1699
    },
    {
      "epoch": 1.0369873046875e-05,
      "step": 1699,
      "training_step_time": 0.387437105178833
    },
    {
      "epoch": 1.03759765625e-05,
      "grad_norm": 0.7053083181381226,
      "learning_rate": 5.666666666666667e-05,
      "loss": 0.1505,
      "step": 1700
    },
    {
      "epoch": 1.03759765625e-05,
      "model_forward_time": 0.11511850357055664,
      "step": 1700
    },
    {
      "epoch": 1.03759765625e-05,
      "step": 1700,
      "training_step_time": 0.3881363868713379
    },
    {
      "epoch": 1.0382080078125e-05,
      "model_forward_time": 0.11678504943847656,
      "step": 1701
    },
    {
      "epoch": 1.0382080078125e-05,
      "step": 1701,
      "training_step_time": 0.41332125663757324
    },
    {
      "epoch": 1.038818359375e-05,
      "model_forward_time": 0.11733269691467285,
      "step": 1702
    },
    {
      "epoch": 1.038818359375e-05,
      "step": 1702,
      "training_step_time": 0.45493435859680176
    },
    {
      "epoch": 1.0394287109375e-05,
      "model_forward_time": 0.11520504951477051,
      "step": 1703
    },
    {
      "epoch": 1.0394287109375e-05,
      "step": 1703,
      "training_step_time": 0.3866417407989502
    },
    {
      "epoch": 1.0400390625e-05,
      "model_forward_time": 0.11553406715393066,
      "step": 1704
    },
    {
      "epoch": 1.0400390625e-05,
      "step": 1704,
      "training_step_time": 0.43112850189208984
    },
    {
      "epoch": 1.0406494140625e-05,
      "model_forward_time": 0.11503267288208008,
      "step": 1705
    },
    {
      "epoch": 1.0406494140625e-05,
      "step": 1705,
      "training_step_time": 0.42557263374328613
    },
    {
      "epoch": 1.041259765625e-05,
      "model_forward_time": 0.11505460739135742,
      "step": 1706
    },
    {
      "epoch": 1.041259765625e-05,
      "step": 1706,
      "training_step_time": 0.46311283111572266
    },
    {
      "epoch": 1.0418701171875e-05,
      "model_forward_time": 0.1155390739440918,
      "step": 1707
    },
    {
      "epoch": 1.0418701171875e-05,
      "step": 1707,
      "training_step_time": 0.4156680107116699
    },
    {
      "epoch": 1.04248046875e-05,
      "model_forward_time": 0.11576271057128906,
      "step": 1708
    },
    {
      "epoch": 1.04248046875e-05,
      "step": 1708,
      "training_step_time": 0.44700002670288086
    },
    {
      "epoch": 1.0430908203125e-05,
      "model_forward_time": 0.11649155616760254,
      "step": 1709
    },
    {
      "epoch": 1.0430908203125e-05,
      "step": 1709,
      "training_step_time": 0.4944448471069336
    },
    {
      "epoch": 1.043701171875e-05,
      "grad_norm": 0.5829038619995117,
      "learning_rate": 5.6999999999999996e-05,
      "loss": 0.1552,
      "step": 1710
    },
    {
      "epoch": 1.043701171875e-05,
      "model_forward_time": 0.13476133346557617,
      "step": 1710
    },
    {
      "epoch": 1.043701171875e-05,
      "step": 1710,
      "training_step_time": 0.3897993564605713
    },
    {
      "epoch": 1.0443115234375e-05,
      "model_forward_time": 0.11477947235107422,
      "step": 1711
    },
    {
      "epoch": 1.0443115234375e-05,
      "step": 1711,
      "training_step_time": 0.43736815452575684
    },
    {
      "epoch": 1.044921875e-05,
      "model_forward_time": 0.11636090278625488,
      "step": 1712
    },
    {
      "epoch": 1.044921875e-05,
      "step": 1712,
      "training_step_time": 0.4098172187805176
    },
    {
      "epoch": 1.0455322265625e-05,
      "model_forward_time": 0.11909222602844238,
      "step": 1713
    },
    {
      "epoch": 1.0455322265625e-05,
      "step": 1713,
      "training_step_time": 0.39570045471191406
    },
    {
      "epoch": 1.046142578125e-05,
      "model_forward_time": 0.11464166641235352,
      "step": 1714
    },
    {
      "epoch": 1.046142578125e-05,
      "step": 1714,
      "training_step_time": 0.3862314224243164
    },
    {
      "epoch": 1.0467529296875e-05,
      "model_forward_time": 0.11508321762084961,
      "step": 1715
    },
    {
      "epoch": 1.0467529296875e-05,
      "step": 1715,
      "training_step_time": 0.3878915309906006
    },
    {
      "epoch": 1.04736328125e-05,
      "model_forward_time": 0.11578798294067383,
      "step": 1716
    },
    {
      "epoch": 1.04736328125e-05,
      "step": 1716,
      "training_step_time": 0.3957962989807129
    },
    {
      "epoch": 1.0479736328125e-05,
      "model_forward_time": 0.11616730690002441,
      "step": 1717
    },
    {
      "epoch": 1.0479736328125e-05,
      "step": 1717,
      "training_step_time": 0.3985867500305176
    },
    {
      "epoch": 1.048583984375e-05,
      "model_forward_time": 0.11638379096984863,
      "step": 1718
    },
    {
      "epoch": 1.048583984375e-05,
      "step": 1718,
      "training_step_time": 0.3934292793273926
    },
    {
      "epoch": 1.0491943359375e-05,
      "model_forward_time": 0.11887073516845703,
      "step": 1719
    },
    {
      "epoch": 1.0491943359375e-05,
      "step": 1719,
      "training_step_time": 0.45106935501098633
    },
    {
      "epoch": 1.0498046875e-05,
      "grad_norm": 0.639657735824585,
      "learning_rate": 5.7333333333333336e-05,
      "loss": 0.156,
      "step": 1720
    },
    {
      "epoch": 1.0498046875e-05,
      "model_forward_time": 0.11823844909667969,
      "step": 1720
    },
    {
      "epoch": 1.0498046875e-05,
      "step": 1720,
      "training_step_time": 0.39144253730773926
    },
    {
      "epoch": 1.0504150390625e-05,
      "model_forward_time": 0.11570572853088379,
      "step": 1721
    },
    {
      "epoch": 1.0504150390625e-05,
      "step": 1721,
      "training_step_time": 0.41113710403442383
    },
    {
      "epoch": 1.051025390625e-05,
      "model_forward_time": 0.11844944953918457,
      "step": 1722
    },
    {
      "epoch": 1.051025390625e-05,
      "step": 1722,
      "training_step_time": 0.4513576030731201
    },
    {
      "epoch": 1.0516357421875e-05,
      "model_forward_time": 0.11837172508239746,
      "step": 1723
    },
    {
      "epoch": 1.0516357421875e-05,
      "step": 1723,
      "training_step_time": 0.49047231674194336
    },
    {
      "epoch": 1.05224609375e-05,
      "model_forward_time": 0.11767244338989258,
      "step": 1724
    },
    {
      "epoch": 1.05224609375e-05,
      "step": 1724,
      "training_step_time": 0.49065542221069336
    },
    {
      "epoch": 1.0528564453125e-05,
      "model_forward_time": 0.11616897583007812,
      "step": 1725
    },
    {
      "epoch": 1.0528564453125e-05,
      "step": 1725,
      "training_step_time": 0.3698601722717285
    },
    {
      "epoch": 1.053466796875e-05,
      "model_forward_time": 0.11499142646789551,
      "step": 1726
    },
    {
      "epoch": 1.053466796875e-05,
      "step": 1726,
      "training_step_time": 0.49652600288391113
    },
    {
      "epoch": 1.0540771484375e-05,
      "model_forward_time": 0.11528825759887695,
      "step": 1727
    },
    {
      "epoch": 1.0540771484375e-05,
      "step": 1727,
      "training_step_time": 0.4994618892669678
    },
    {
      "epoch": 1.0546875e-05,
      "model_forward_time": 0.11441421508789062,
      "step": 1728
    },
    {
      "epoch": 1.0546875e-05,
      "step": 1728,
      "training_step_time": 0.42122387886047363
    },
    {
      "epoch": 1.0552978515625e-05,
      "model_forward_time": 0.11588764190673828,
      "step": 1729
    },
    {
      "epoch": 1.0552978515625e-05,
      "step": 1729,
      "training_step_time": 0.42310428619384766
    },
    {
      "epoch": 1.055908203125e-05,
      "grad_norm": 0.5905861854553223,
      "learning_rate": 5.766666666666667e-05,
      "loss": 0.1563,
      "step": 1730
    },
    {
      "epoch": 1.055908203125e-05,
      "model_forward_time": 0.11837410926818848,
      "step": 1730
    },
    {
      "epoch": 1.055908203125e-05,
      "step": 1730,
      "training_step_time": 0.4045827388763428
    },
    {
      "epoch": 1.0565185546875e-05,
      "model_forward_time": 0.11852741241455078,
      "step": 1731
    },
    {
      "epoch": 1.0565185546875e-05,
      "step": 1731,
      "training_step_time": 0.38385915756225586
    },
    {
      "epoch": 1.05712890625e-05,
      "model_forward_time": 0.11909270286560059,
      "step": 1732
    },
    {
      "epoch": 1.05712890625e-05,
      "step": 1732,
      "training_step_time": 0.40235328674316406
    },
    {
      "epoch": 1.0577392578125e-05,
      "model_forward_time": 0.1178433895111084,
      "step": 1733
    },
    {
      "epoch": 1.0577392578125e-05,
      "step": 1733,
      "training_step_time": 0.4291713237762451
    },
    {
      "epoch": 1.058349609375e-05,
      "model_forward_time": 0.11575889587402344,
      "step": 1734
    },
    {
      "epoch": 1.058349609375e-05,
      "step": 1734,
      "training_step_time": 0.4860241413116455
    },
    {
      "epoch": 1.0589599609375e-05,
      "model_forward_time": 0.11504602432250977,
      "step": 1735
    },
    {
      "epoch": 1.0589599609375e-05,
      "step": 1735,
      "training_step_time": 0.45496654510498047
    },
    {
      "epoch": 1.0595703125e-05,
      "model_forward_time": 0.115692138671875,
      "step": 1736
    },
    {
      "epoch": 1.0595703125e-05,
      "step": 1736,
      "training_step_time": 0.45821595191955566
    },
    {
      "epoch": 1.0601806640625e-05,
      "model_forward_time": 0.11554980278015137,
      "step": 1737
    },
    {
      "epoch": 1.0601806640625e-05,
      "step": 1737,
      "training_step_time": 0.48636698722839355
    },
    {
      "epoch": 1.060791015625e-05,
      "model_forward_time": 0.1192014217376709,
      "step": 1738
    },
    {
      "epoch": 1.060791015625e-05,
      "step": 1738,
      "training_step_time": 0.4962599277496338
    },
    {
      "epoch": 1.0614013671875e-05,
      "model_forward_time": 0.114959716796875,
      "step": 1739
    },
    {
      "epoch": 1.0614013671875e-05,
      "step": 1739,
      "training_step_time": 0.3679478168487549
    },
    {
      "epoch": 1.06201171875e-05,
      "grad_norm": 0.7563685774803162,
      "learning_rate": 5.8e-05,
      "loss": 0.1531,
      "step": 1740
    },
    {
      "epoch": 1.06201171875e-05,
      "model_forward_time": 0.11557245254516602,
      "step": 1740
    },
    {
      "epoch": 1.06201171875e-05,
      "step": 1740,
      "training_step_time": 0.4694857597351074
    },
    {
      "epoch": 1.0626220703125e-05,
      "model_forward_time": 0.11537885665893555,
      "step": 1741
    },
    {
      "epoch": 1.0626220703125e-05,
      "step": 1741,
      "training_step_time": 0.4859962463378906
    },
    {
      "epoch": 1.063232421875e-05,
      "model_forward_time": 0.11530184745788574,
      "step": 1742
    },
    {
      "epoch": 1.063232421875e-05,
      "step": 1742,
      "training_step_time": 0.39348673820495605
    },
    {
      "epoch": 1.0638427734375e-05,
      "model_forward_time": 0.11629223823547363,
      "step": 1743
    },
    {
      "epoch": 1.0638427734375e-05,
      "step": 1743,
      "training_step_time": 0.40189290046691895
    },
    {
      "epoch": 1.064453125e-05,
      "model_forward_time": 0.11517024040222168,
      "step": 1744
    },
    {
      "epoch": 1.064453125e-05,
      "step": 1744,
      "training_step_time": 0.3923327922821045
    },
    {
      "epoch": 1.0650634765625e-05,
      "model_forward_time": 0.12132382392883301,
      "step": 1745
    },
    {
      "epoch": 1.0650634765625e-05,
      "step": 1745,
      "training_step_time": 0.3984041213989258
    },
    {
      "epoch": 1.065673828125e-05,
      "model_forward_time": 0.11932730674743652,
      "step": 1746
    },
    {
      "epoch": 1.065673828125e-05,
      "step": 1746,
      "training_step_time": 0.38994669914245605
    },
    {
      "epoch": 1.0662841796875e-05,
      "model_forward_time": 0.11808586120605469,
      "step": 1747
    },
    {
      "epoch": 1.0662841796875e-05,
      "step": 1747,
      "training_step_time": 0.4452493190765381
    },
    {
      "epoch": 1.06689453125e-05,
      "model_forward_time": 0.11664819717407227,
      "step": 1748
    },
    {
      "epoch": 1.06689453125e-05,
      "step": 1748,
      "training_step_time": 0.49622035026550293
    },
    {
      "epoch": 1.0675048828125e-05,
      "model_forward_time": 0.11791610717773438,
      "step": 1749
    },
    {
      "epoch": 1.0675048828125e-05,
      "step": 1749,
      "training_step_time": 0.44608545303344727
    },
    {
      "epoch": 1.068115234375e-05,
      "grad_norm": 0.8233485221862793,
      "learning_rate": 5.833333333333334e-05,
      "loss": 0.1562,
      "step": 1750
    },
    {
      "epoch": 1.068115234375e-05,
      "model_forward_time": 0.11554265022277832,
      "step": 1750
    },
    {
      "epoch": 1.068115234375e-05,
      "step": 1750,
      "training_step_time": 0.41042494773864746
    },
    {
      "epoch": 1.0687255859375e-05,
      "model_forward_time": 0.11534643173217773,
      "step": 1751
    },
    {
      "epoch": 1.0687255859375e-05,
      "step": 1751,
      "training_step_time": 0.43719053268432617
    },
    {
      "epoch": 1.0693359375e-05,
      "model_forward_time": 0.11546015739440918,
      "step": 1752
    },
    {
      "epoch": 1.0693359375e-05,
      "step": 1752,
      "training_step_time": 0.41799354553222656
    },
    {
      "epoch": 1.0699462890625e-05,
      "model_forward_time": 0.11492466926574707,
      "step": 1753
    },
    {
      "epoch": 1.0699462890625e-05,
      "step": 1753,
      "training_step_time": 0.4215052127838135
    },
    {
      "epoch": 1.070556640625e-05,
      "model_forward_time": 0.11578536033630371,
      "step": 1754
    },
    {
      "epoch": 1.070556640625e-05,
      "step": 1754,
      "training_step_time": 0.4226212501525879
    },
    {
      "epoch": 1.0711669921875e-05,
      "model_forward_time": 0.11565566062927246,
      "step": 1755
    },
    {
      "epoch": 1.0711669921875e-05,
      "step": 1755,
      "training_step_time": 0.4900848865509033
    },
    {
      "epoch": 1.07177734375e-05,
      "model_forward_time": 0.11568617820739746,
      "step": 1756
    },
    {
      "epoch": 1.07177734375e-05,
      "step": 1756,
      "training_step_time": 0.4954390525817871
    },
    {
      "epoch": 1.0723876953125e-05,
      "model_forward_time": 0.11461615562438965,
      "step": 1757
    },
    {
      "epoch": 1.0723876953125e-05,
      "step": 1757,
      "training_step_time": 0.39432740211486816
    },
    {
      "epoch": 1.072998046875e-05,
      "model_forward_time": 0.11786198616027832,
      "step": 1758
    },
    {
      "epoch": 1.072998046875e-05,
      "step": 1758,
      "training_step_time": 0.4010586738586426
    },
    {
      "epoch": 1.0736083984375e-05,
      "model_forward_time": 0.11603093147277832,
      "step": 1759
    },
    {
      "epoch": 1.0736083984375e-05,
      "step": 1759,
      "training_step_time": 0.39844250679016113
    },
    {
      "epoch": 1.07421875e-05,
      "grad_norm": 0.6831129789352417,
      "learning_rate": 5.866666666666667e-05,
      "loss": 0.1613,
      "step": 1760
    },
    {
      "epoch": 1.07421875e-05,
      "model_forward_time": 0.1155240535736084,
      "step": 1760
    },
    {
      "epoch": 1.07421875e-05,
      "step": 1760,
      "training_step_time": 0.41140294075012207
    },
    {
      "epoch": 1.0748291015625e-05,
      "model_forward_time": 0.11821508407592773,
      "step": 1761
    },
    {
      "epoch": 1.0748291015625e-05,
      "step": 1761,
      "training_step_time": 0.4292275905609131
    },
    {
      "epoch": 1.075439453125e-05,
      "model_forward_time": 0.1152045726776123,
      "step": 1762
    },
    {
      "epoch": 1.075439453125e-05,
      "step": 1762,
      "training_step_time": 0.4194173812866211
    },
    {
      "epoch": 1.0760498046875e-05,
      "model_forward_time": 0.12029170989990234,
      "step": 1763
    },
    {
      "epoch": 1.0760498046875e-05,
      "step": 1763,
      "training_step_time": 0.4645366668701172
    },
    {
      "epoch": 1.07666015625e-05,
      "model_forward_time": 0.11548018455505371,
      "step": 1764
    },
    {
      "epoch": 1.07666015625e-05,
      "step": 1764,
      "training_step_time": 0.3929004669189453
    },
    {
      "epoch": 1.0772705078125e-05,
      "model_forward_time": 0.1168978214263916,
      "step": 1765
    },
    {
      "epoch": 1.0772705078125e-05,
      "step": 1765,
      "training_step_time": 0.5164663791656494
    },
    {
      "epoch": 1.077880859375e-05,
      "model_forward_time": 0.11446094512939453,
      "step": 1766
    },
    {
      "epoch": 1.077880859375e-05,
      "step": 1766,
      "training_step_time": 0.49076247215270996
    },
    {
      "epoch": 1.0784912109375e-05,
      "model_forward_time": 0.11521053314208984,
      "step": 1767
    },
    {
      "epoch": 1.0784912109375e-05,
      "step": 1767,
      "training_step_time": 0.5131738185882568
    },
    {
      "epoch": 1.0791015625e-05,
      "model_forward_time": 0.11491775512695312,
      "step": 1768
    },
    {
      "epoch": 1.0791015625e-05,
      "step": 1768,
      "training_step_time": 0.46049952507019043
    },
    {
      "epoch": 1.0797119140625e-05,
      "model_forward_time": 0.11593008041381836,
      "step": 1769
    },
    {
      "epoch": 1.0797119140625e-05,
      "step": 1769,
      "training_step_time": 0.48558521270751953
    },
    {
      "epoch": 1.080322265625e-05,
      "grad_norm": 0.8308389186859131,
      "learning_rate": 5.9e-05,
      "loss": 0.1561,
      "step": 1770
    },
    {
      "epoch": 1.080322265625e-05,
      "model_forward_time": 0.11475396156311035,
      "step": 1770
    },
    {
      "epoch": 1.080322265625e-05,
      "step": 1770,
      "training_step_time": 0.47432398796081543
    },
    {
      "epoch": 1.0809326171875e-05,
      "model_forward_time": 0.1148824691772461,
      "step": 1771
    },
    {
      "epoch": 1.0809326171875e-05,
      "step": 1771,
      "training_step_time": 0.3891916275024414
    },
    {
      "epoch": 1.08154296875e-05,
      "model_forward_time": 0.11496877670288086,
      "step": 1772
    },
    {
      "epoch": 1.08154296875e-05,
      "step": 1772,
      "training_step_time": 0.39858293533325195
    },
    {
      "epoch": 1.0821533203125e-05,
      "model_forward_time": 0.1150200366973877,
      "step": 1773
    },
    {
      "epoch": 1.0821533203125e-05,
      "step": 1773,
      "training_step_time": 0.38509464263916016
    },
    {
      "epoch": 1.082763671875e-05,
      "model_forward_time": 0.1157069206237793,
      "step": 1774
    },
    {
      "epoch": 1.082763671875e-05,
      "step": 1774,
      "training_step_time": 0.39029955863952637
    },
    {
      "epoch": 1.0833740234375e-05,
      "model_forward_time": 0.11502432823181152,
      "step": 1775
    },
    {
      "epoch": 1.0833740234375e-05,
      "step": 1775,
      "training_step_time": 0.43617892265319824
    },
    {
      "epoch": 1.083984375e-05,
      "model_forward_time": 0.11844301223754883,
      "step": 1776
    },
    {
      "epoch": 1.083984375e-05,
      "step": 1776,
      "training_step_time": 0.42825794219970703
    },
    {
      "epoch": 1.0845947265625e-05,
      "model_forward_time": 0.11537528038024902,
      "step": 1777
    },
    {
      "epoch": 1.0845947265625e-05,
      "step": 1777,
      "training_step_time": 0.4539926052093506
    },
    {
      "epoch": 1.085205078125e-05,
      "model_forward_time": 0.11560988426208496,
      "step": 1778
    },
    {
      "epoch": 1.085205078125e-05,
      "step": 1778,
      "training_step_time": 0.38974499702453613
    },
    {
      "epoch": 1.0858154296875e-05,
      "model_forward_time": 0.11540579795837402,
      "step": 1779
    },
    {
      "epoch": 1.0858154296875e-05,
      "step": 1779,
      "training_step_time": 0.39714956283569336
    },
    {
      "epoch": 1.08642578125e-05,
      "grad_norm": 0.3986964225769043,
      "learning_rate": 5.9333333333333343e-05,
      "loss": 0.1528,
      "step": 1780
    },
    {
      "epoch": 1.08642578125e-05,
      "model_forward_time": 0.11503839492797852,
      "step": 1780
    },
    {
      "epoch": 1.08642578125e-05,
      "step": 1780,
      "training_step_time": 0.4997730255126953
    },
    {
      "epoch": 1.0870361328125e-05,
      "model_forward_time": 0.11505961418151855,
      "step": 1781
    },
    {
      "epoch": 1.0870361328125e-05,
      "step": 1781,
      "training_step_time": 0.4991919994354248
    },
    {
      "epoch": 1.087646484375e-05,
      "model_forward_time": 0.11492109298706055,
      "step": 1782
    },
    {
      "epoch": 1.087646484375e-05,
      "step": 1782,
      "training_step_time": 0.37102174758911133
    },
    {
      "epoch": 1.0882568359375e-05,
      "model_forward_time": 0.11525845527648926,
      "step": 1783
    },
    {
      "epoch": 1.0882568359375e-05,
      "step": 1783,
      "training_step_time": 0.43497228622436523
    },
    {
      "epoch": 1.0888671875e-05,
      "model_forward_time": 0.11606526374816895,
      "step": 1784
    },
    {
      "epoch": 1.0888671875e-05,
      "step": 1784,
      "training_step_time": 0.44069528579711914
    },
    {
      "epoch": 1.0894775390625e-05,
      "model_forward_time": 0.11473250389099121,
      "step": 1785
    },
    {
      "epoch": 1.0894775390625e-05,
      "step": 1785,
      "training_step_time": 0.38619041442871094
    },
    {
      "epoch": 1.090087890625e-05,
      "model_forward_time": 0.11492037773132324,
      "step": 1786
    },
    {
      "epoch": 1.090087890625e-05,
      "step": 1786,
      "training_step_time": 0.3973097801208496
    },
    {
      "epoch": 1.0906982421875e-05,
      "model_forward_time": 0.11494207382202148,
      "step": 1787
    },
    {
      "epoch": 1.0906982421875e-05,
      "step": 1787,
      "training_step_time": 0.39122843742370605
    },
    {
      "epoch": 1.09130859375e-05,
      "model_forward_time": 0.11627006530761719,
      "step": 1788
    },
    {
      "epoch": 1.09130859375e-05,
      "step": 1788,
      "training_step_time": 0.39066219329833984
    },
    {
      "epoch": 1.0919189453125e-05,
      "model_forward_time": 0.11558055877685547,
      "step": 1789
    },
    {
      "epoch": 1.0919189453125e-05,
      "step": 1789,
      "training_step_time": 0.4177067279815674
    },
    {
      "epoch": 1.092529296875e-05,
      "grad_norm": 0.4383661150932312,
      "learning_rate": 5.966666666666667e-05,
      "loss": 0.1593,
      "step": 1790
    },
    {
      "epoch": 1.092529296875e-05,
      "model_forward_time": 0.11624574661254883,
      "step": 1790
    },
    {
      "epoch": 1.092529296875e-05,
      "step": 1790,
      "training_step_time": 0.4059150218963623
    },
    {
      "epoch": 1.0931396484375e-05,
      "model_forward_time": 0.11545801162719727,
      "step": 1791
    },
    {
      "epoch": 1.0931396484375e-05,
      "step": 1791,
      "training_step_time": 0.41043710708618164
    },
    {
      "epoch": 1.09375e-05,
      "model_forward_time": 0.1163320541381836,
      "step": 1792
    },
    {
      "epoch": 1.09375e-05,
      "step": 1792,
      "training_step_time": 0.40931248664855957
    },
    {
      "epoch": 1.0943603515625e-05,
      "model_forward_time": 0.1149754524230957,
      "step": 1793
    },
    {
      "epoch": 1.0943603515625e-05,
      "step": 1793,
      "training_step_time": 0.4029078483581543
    },
    {
      "epoch": 1.094970703125e-05,
      "model_forward_time": 0.11531352996826172,
      "step": 1794
    },
    {
      "epoch": 1.094970703125e-05,
      "step": 1794,
      "training_step_time": 0.5227620601654053
    },
    {
      "epoch": 1.0955810546875e-05,
      "model_forward_time": 0.11529064178466797,
      "step": 1795
    },
    {
      "epoch": 1.0955810546875e-05,
      "step": 1795,
      "training_step_time": 0.474168062210083
    },
    {
      "epoch": 1.09619140625e-05,
      "model_forward_time": 0.11478257179260254,
      "step": 1796
    },
    {
      "epoch": 1.09619140625e-05,
      "step": 1796,
      "training_step_time": 0.41262102127075195
    },
    {
      "epoch": 1.0968017578125e-05,
      "model_forward_time": 0.11579155921936035,
      "step": 1797
    },
    {
      "epoch": 1.0968017578125e-05,
      "step": 1797,
      "training_step_time": 0.4855222702026367
    },
    {
      "epoch": 1.097412109375e-05,
      "model_forward_time": 0.11526346206665039,
      "step": 1798
    },
    {
      "epoch": 1.097412109375e-05,
      "step": 1798,
      "training_step_time": 0.4991731643676758
    },
    {
      "epoch": 1.0980224609375e-05,
      "model_forward_time": 0.11548805236816406,
      "step": 1799
    },
    {
      "epoch": 1.0980224609375e-05,
      "step": 1799,
      "training_step_time": 0.48024797439575195
    },
    {
      "epoch": 1.0986328125e-05,
      "grad_norm": 0.4701133072376251,
      "learning_rate": 6e-05,
      "loss": 0.1549,
      "step": 1800
    },
    {
      "epoch": 1.0986328125e-05,
      "model_forward_time": 0.11437439918518066,
      "step": 1800
    },
    {
      "epoch": 1.0986328125e-05,
      "step": 1800,
      "training_step_time": 0.39446234703063965
    },
    {
      "epoch": 1.0992431640625e-05,
      "model_forward_time": 0.11643385887145996,
      "step": 1801
    },
    {
      "epoch": 1.0992431640625e-05,
      "step": 1801,
      "training_step_time": 0.3867177963256836
    },
    {
      "epoch": 1.099853515625e-05,
      "model_forward_time": 0.11436963081359863,
      "step": 1802
    },
    {
      "epoch": 1.099853515625e-05,
      "step": 1802,
      "training_step_time": 0.3923661708831787
    },
    {
      "epoch": 1.1004638671875e-05,
      "model_forward_time": 0.11605358123779297,
      "step": 1803
    },
    {
      "epoch": 1.1004638671875e-05,
      "step": 1803,
      "training_step_time": 0.39449548721313477
    },
    {
      "epoch": 1.10107421875e-05,
      "model_forward_time": 0.11493039131164551,
      "step": 1804
    },
    {
      "epoch": 1.10107421875e-05,
      "step": 1804,
      "training_step_time": 0.4470639228820801
    },
    {
      "epoch": 1.1016845703125e-05,
      "model_forward_time": 0.11585855484008789,
      "step": 1805
    },
    {
      "epoch": 1.1016845703125e-05,
      "step": 1805,
      "training_step_time": 0.4240422248840332
    },
    {
      "epoch": 1.102294921875e-05,
      "model_forward_time": 0.11617112159729004,
      "step": 1806
    },
    {
      "epoch": 1.102294921875e-05,
      "step": 1806,
      "training_step_time": 0.46775341033935547
    },
    {
      "epoch": 1.1029052734375e-05,
      "model_forward_time": 0.1151883602142334,
      "step": 1807
    },
    {
      "epoch": 1.1029052734375e-05,
      "step": 1807,
      "training_step_time": 0.40615010261535645
    },
    {
      "epoch": 1.103515625e-05,
      "model_forward_time": 0.11553215980529785,
      "step": 1808
    },
    {
      "epoch": 1.103515625e-05,
      "step": 1808,
      "training_step_time": 0.400209903717041
    },
    {
      "epoch": 1.1041259765625e-05,
      "model_forward_time": 0.11519980430603027,
      "step": 1809
    },
    {
      "epoch": 1.1041259765625e-05,
      "step": 1809,
      "training_step_time": 0.4006073474884033
    },
    {
      "epoch": 1.104736328125e-05,
      "grad_norm": 0.8044312596321106,
      "learning_rate": 6.033333333333334e-05,
      "loss": 0.1543,
      "step": 1810
    },
    {
      "epoch": 1.104736328125e-05,
      "model_forward_time": 0.1165928840637207,
      "step": 1810
    },
    {
      "epoch": 1.104736328125e-05,
      "step": 1810,
      "training_step_time": 0.3898787498474121
    },
    {
      "epoch": 1.1053466796875e-05,
      "model_forward_time": 0.11488771438598633,
      "step": 1811
    },
    {
      "epoch": 1.1053466796875e-05,
      "step": 1811,
      "training_step_time": 0.3679068088531494
    },
    {
      "epoch": 1.10595703125e-05,
      "model_forward_time": 0.11574864387512207,
      "step": 1812
    },
    {
      "epoch": 1.10595703125e-05,
      "step": 1812,
      "training_step_time": 0.4025552272796631
    },
    {
      "epoch": 1.1065673828125e-05,
      "model_forward_time": 0.1146245002746582,
      "step": 1813
    },
    {
      "epoch": 1.1065673828125e-05,
      "step": 1813,
      "training_step_time": 0.42873120307922363
    },
    {
      "epoch": 1.107177734375e-05,
      "model_forward_time": 0.11602425575256348,
      "step": 1814
    },
    {
      "epoch": 1.107177734375e-05,
      "step": 1814,
      "training_step_time": 0.41942477226257324
    },
    {
      "epoch": 1.1077880859375e-05,
      "model_forward_time": 0.11575007438659668,
      "step": 1815
    },
    {
      "epoch": 1.1077880859375e-05,
      "step": 1815,
      "training_step_time": 0.3925962448120117
    },
    {
      "epoch": 1.1083984375e-05,
      "model_forward_time": 0.11579036712646484,
      "step": 1816
    },
    {
      "epoch": 1.1083984375e-05,
      "step": 1816,
      "training_step_time": 0.39565062522888184
    },
    {
      "epoch": 1.1090087890625e-05,
      "model_forward_time": 0.11533260345458984,
      "step": 1817
    },
    {
      "epoch": 1.1090087890625e-05,
      "step": 1817,
      "training_step_time": 0.38827037811279297
    },
    {
      "epoch": 1.109619140625e-05,
      "model_forward_time": 0.11551904678344727,
      "step": 1818
    },
    {
      "epoch": 1.109619140625e-05,
      "step": 1818,
      "training_step_time": 0.39388394355773926
    },
    {
      "epoch": 1.1102294921875e-05,
      "model_forward_time": 0.11569690704345703,
      "step": 1819
    },
    {
      "epoch": 1.1102294921875e-05,
      "step": 1819,
      "training_step_time": 0.4426760673522949
    },
    {
      "epoch": 1.11083984375e-05,
      "grad_norm": 0.4627319872379303,
      "learning_rate": 6.066666666666667e-05,
      "loss": 0.1475,
      "step": 1820
    },
    {
      "epoch": 1.11083984375e-05,
      "model_forward_time": 0.11527299880981445,
      "step": 1820
    },
    {
      "epoch": 1.11083984375e-05,
      "step": 1820,
      "training_step_time": 0.40908360481262207
    },
    {
      "epoch": 1.1114501953125e-05,
      "model_forward_time": 0.11569070816040039,
      "step": 1821
    },
    {
      "epoch": 1.1114501953125e-05,
      "step": 1821,
      "training_step_time": 0.4122941493988037
    },
    {
      "epoch": 1.112060546875e-05,
      "model_forward_time": 0.11502504348754883,
      "step": 1822
    },
    {
      "epoch": 1.112060546875e-05,
      "step": 1822,
      "training_step_time": 0.40068960189819336
    },
    {
      "epoch": 1.1126708984375e-05,
      "model_forward_time": 0.11859607696533203,
      "step": 1823
    },
    {
      "epoch": 1.1126708984375e-05,
      "step": 1823,
      "training_step_time": 0.39189934730529785
    },
    {
      "epoch": 1.11328125e-05,
      "model_forward_time": 0.11798501014709473,
      "step": 1824
    },
    {
      "epoch": 1.11328125e-05,
      "step": 1824,
      "training_step_time": 0.39380788803100586
    },
    {
      "epoch": 1.1138916015625e-05,
      "model_forward_time": 0.11583209037780762,
      "step": 1825
    },
    {
      "epoch": 1.1138916015625e-05,
      "step": 1825,
      "training_step_time": 0.45311784744262695
    },
    {
      "epoch": 1.114501953125e-05,
      "model_forward_time": 0.11548995971679688,
      "step": 1826
    },
    {
      "epoch": 1.114501953125e-05,
      "step": 1826,
      "training_step_time": 0.37725377082824707
    },
    {
      "epoch": 1.1151123046875e-05,
      "model_forward_time": 0.11555624008178711,
      "step": 1827
    },
    {
      "epoch": 1.1151123046875e-05,
      "step": 1827,
      "training_step_time": 0.40863513946533203
    },
    {
      "epoch": 1.11572265625e-05,
      "model_forward_time": 0.11458587646484375,
      "step": 1828
    },
    {
      "epoch": 1.11572265625e-05,
      "step": 1828,
      "training_step_time": 0.4386410713195801
    },
    {
      "epoch": 1.1163330078125e-05,
      "model_forward_time": 0.11573052406311035,
      "step": 1829
    },
    {
      "epoch": 1.1163330078125e-05,
      "step": 1829,
      "training_step_time": 0.39145684242248535
    },
    {
      "epoch": 1.116943359375e-05,
      "grad_norm": 0.650722861289978,
      "learning_rate": 6.1e-05,
      "loss": 0.1464,
      "step": 1830
    },
    {
      "epoch": 1.116943359375e-05,
      "model_forward_time": 0.11557221412658691,
      "step": 1830
    },
    {
      "epoch": 1.116943359375e-05,
      "step": 1830,
      "training_step_time": 0.39046597480773926
    },
    {
      "epoch": 1.1175537109375e-05,
      "model_forward_time": 0.11526918411254883,
      "step": 1831
    },
    {
      "epoch": 1.1175537109375e-05,
      "step": 1831,
      "training_step_time": 0.39350295066833496
    },
    {
      "epoch": 1.1181640625e-05,
      "model_forward_time": 0.11565637588500977,
      "step": 1832
    },
    {
      "epoch": 1.1181640625e-05,
      "step": 1832,
      "training_step_time": 0.3872206211090088
    },
    {
      "epoch": 1.1187744140625e-05,
      "model_forward_time": 0.11657118797302246,
      "step": 1833
    },
    {
      "epoch": 1.1187744140625e-05,
      "step": 1833,
      "training_step_time": 0.39379048347473145
    },
    {
      "epoch": 1.119384765625e-05,
      "model_forward_time": 0.11516737937927246,
      "step": 1834
    },
    {
      "epoch": 1.119384765625e-05,
      "step": 1834,
      "training_step_time": 0.40602636337280273
    },
    {
      "epoch": 1.1199951171875e-05,
      "model_forward_time": 0.11572670936584473,
      "step": 1835
    },
    {
      "epoch": 1.1199951171875e-05,
      "step": 1835,
      "training_step_time": 0.4317812919616699
    },
    {
      "epoch": 1.12060546875e-05,
      "model_forward_time": 0.1150357723236084,
      "step": 1836
    },
    {
      "epoch": 1.12060546875e-05,
      "step": 1836,
      "training_step_time": 0.4914054870605469
    },
    {
      "epoch": 1.1212158203125e-05,
      "model_forward_time": 0.11507678031921387,
      "step": 1837
    },
    {
      "epoch": 1.1212158203125e-05,
      "step": 1837,
      "training_step_time": 0.397998571395874
    },
    {
      "epoch": 1.121826171875e-05,
      "model_forward_time": 0.11511826515197754,
      "step": 1838
    },
    {
      "epoch": 1.121826171875e-05,
      "step": 1838,
      "training_step_time": 0.6424741744995117
    },
    {
      "epoch": 1.1224365234375e-05,
      "model_forward_time": 0.11445760726928711,
      "step": 1839
    },
    {
      "epoch": 1.1224365234375e-05,
      "step": 1839,
      "training_step_time": 0.4029574394226074
    },
    {
      "epoch": 1.123046875e-05,
      "grad_norm": 0.4624830484390259,
      "learning_rate": 6.133333333333334e-05,
      "loss": 0.155,
      "step": 1840
    },
    {
      "epoch": 1.123046875e-05,
      "model_forward_time": 0.11498475074768066,
      "step": 1840
    },
    {
      "epoch": 1.123046875e-05,
      "step": 1840,
      "training_step_time": 0.4950673580169678
    },
    {
      "epoch": 1.1236572265625e-05,
      "model_forward_time": 0.11509227752685547,
      "step": 1841
    },
    {
      "epoch": 1.1236572265625e-05,
      "step": 1841,
      "training_step_time": 0.47687649726867676
    },
    {
      "epoch": 1.124267578125e-05,
      "model_forward_time": 0.11464858055114746,
      "step": 1842
    },
    {
      "epoch": 1.124267578125e-05,
      "step": 1842,
      "training_step_time": 0.4108395576477051
    },
    {
      "epoch": 1.1248779296875e-05,
      "model_forward_time": 0.1150827407836914,
      "step": 1843
    },
    {
      "epoch": 1.1248779296875e-05,
      "step": 1843,
      "training_step_time": 0.4798457622528076
    },
    {
      "epoch": 1.12548828125e-05,
      "model_forward_time": 0.11497354507446289,
      "step": 1844
    },
    {
      "epoch": 1.12548828125e-05,
      "step": 1844,
      "training_step_time": 0.6414015293121338
    },
    {
      "epoch": 1.1260986328125e-05,
      "model_forward_time": 0.11458635330200195,
      "step": 1845
    },
    {
      "epoch": 1.1260986328125e-05,
      "step": 1845,
      "training_step_time": 0.38202714920043945
    },
    {
      "epoch": 1.126708984375e-05,
      "model_forward_time": 0.11453485488891602,
      "step": 1846
    },
    {
      "epoch": 1.126708984375e-05,
      "step": 1846,
      "training_step_time": 0.3856637477874756
    },
    {
      "epoch": 1.1273193359375e-05,
      "model_forward_time": 0.1215512752532959,
      "step": 1847
    },
    {
      "epoch": 1.1273193359375e-05,
      "step": 1847,
      "training_step_time": 0.4646718502044678
    },
    {
      "epoch": 1.1279296875e-05,
      "model_forward_time": 0.11759090423583984,
      "step": 1848
    },
    {
      "epoch": 1.1279296875e-05,
      "step": 1848,
      "training_step_time": 0.4059174060821533
    },
    {
      "epoch": 1.1285400390625e-05,
      "model_forward_time": 0.11942696571350098,
      "step": 1849
    },
    {
      "epoch": 1.1285400390625e-05,
      "step": 1849,
      "training_step_time": 0.4243144989013672
    },
    {
      "epoch": 1.129150390625e-05,
      "grad_norm": 0.4328053891658783,
      "learning_rate": 6.166666666666667e-05,
      "loss": 0.1513,
      "step": 1850
    },
    {
      "epoch": 1.129150390625e-05,
      "model_forward_time": 0.11787009239196777,
      "step": 1850
    },
    {
      "epoch": 1.129150390625e-05,
      "step": 1850,
      "training_step_time": 0.5081167221069336
    },
    {
      "epoch": 1.1297607421875e-05,
      "model_forward_time": 0.1150820255279541,
      "step": 1851
    },
    {
      "epoch": 1.1297607421875e-05,
      "step": 1851,
      "training_step_time": 0.3880434036254883
    },
    {
      "epoch": 1.13037109375e-05,
      "model_forward_time": 0.11571478843688965,
      "step": 1852
    },
    {
      "epoch": 1.13037109375e-05,
      "step": 1852,
      "training_step_time": 0.4388391971588135
    },
    {
      "epoch": 1.1309814453125e-05,
      "model_forward_time": 0.11537003517150879,
      "step": 1853
    },
    {
      "epoch": 1.1309814453125e-05,
      "step": 1853,
      "training_step_time": 0.44632744789123535
    },
    {
      "epoch": 1.131591796875e-05,
      "model_forward_time": 0.11584091186523438,
      "step": 1854
    },
    {
      "epoch": 1.131591796875e-05,
      "step": 1854,
      "training_step_time": 0.460925817489624
    },
    {
      "epoch": 1.1322021484375e-05,
      "model_forward_time": 0.11523222923278809,
      "step": 1855
    },
    {
      "epoch": 1.1322021484375e-05,
      "step": 1855,
      "training_step_time": 0.42758703231811523
    },
    {
      "epoch": 1.1328125e-05,
      "model_forward_time": 0.11536288261413574,
      "step": 1856
    },
    {
      "epoch": 1.1328125e-05,
      "step": 1856,
      "training_step_time": 0.4914877414703369
    },
    {
      "epoch": 1.1334228515625e-05,
      "model_forward_time": 0.11564135551452637,
      "step": 1857
    },
    {
      "epoch": 1.1334228515625e-05,
      "step": 1857,
      "training_step_time": 0.40554022789001465
    },
    {
      "epoch": 1.134033203125e-05,
      "model_forward_time": 0.11598515510559082,
      "step": 1858
    },
    {
      "epoch": 1.134033203125e-05,
      "step": 1858,
      "training_step_time": 0.38105177879333496
    },
    {
      "epoch": 1.1346435546875e-05,
      "model_forward_time": 0.11551237106323242,
      "step": 1859
    },
    {
      "epoch": 1.1346435546875e-05,
      "step": 1859,
      "training_step_time": 0.3835935592651367
    },
    {
      "epoch": 1.13525390625e-05,
      "grad_norm": 0.5620033740997314,
      "learning_rate": 6.2e-05,
      "loss": 0.1654,
      "step": 1860
    },
    {
      "epoch": 1.13525390625e-05,
      "model_forward_time": 0.11566829681396484,
      "step": 1860
    },
    {
      "epoch": 1.13525390625e-05,
      "step": 1860,
      "training_step_time": 0.39537882804870605
    },
    {
      "epoch": 1.1358642578125e-05,
      "model_forward_time": 0.11562371253967285,
      "step": 1861
    },
    {
      "epoch": 1.1358642578125e-05,
      "step": 1861,
      "training_step_time": 0.42516374588012695
    },
    {
      "epoch": 1.136474609375e-05,
      "model_forward_time": 0.11571955680847168,
      "step": 1862
    },
    {
      "epoch": 1.136474609375e-05,
      "step": 1862,
      "training_step_time": 0.6177146434783936
    },
    {
      "epoch": 1.1370849609375e-05,
      "model_forward_time": 0.11828994750976562,
      "step": 1863
    },
    {
      "epoch": 1.1370849609375e-05,
      "step": 1863,
      "training_step_time": 0.5557286739349365
    },
    {
      "epoch": 1.1376953125e-05,
      "model_forward_time": 0.11665654182434082,
      "step": 1864
    },
    {
      "epoch": 1.1376953125e-05,
      "step": 1864,
      "training_step_time": 0.5668668746948242
    },
    {
      "epoch": 1.1383056640625e-05,
      "model_forward_time": 0.11629152297973633,
      "step": 1865
    },
    {
      "epoch": 1.1383056640625e-05,
      "step": 1865,
      "training_step_time": 0.5915772914886475
    },
    {
      "epoch": 1.138916015625e-05,
      "model_forward_time": 0.1286473274230957,
      "step": 1866
    },
    {
      "epoch": 1.138916015625e-05,
      "step": 1866,
      "training_step_time": 0.6199760437011719
    },
    {
      "epoch": 1.1395263671875e-05,
      "model_forward_time": 0.12285947799682617,
      "step": 1867
    },
    {
      "epoch": 1.1395263671875e-05,
      "step": 1867,
      "training_step_time": 0.7603096961975098
    },
    {
      "epoch": 1.14013671875e-05,
      "model_forward_time": 0.11905980110168457,
      "step": 1868
    },
    {
      "epoch": 1.14013671875e-05,
      "step": 1868,
      "training_step_time": 0.6797528266906738
    },
    {
      "epoch": 1.1407470703125e-05,
      "model_forward_time": 0.1216120719909668,
      "step": 1869
    },
    {
      "epoch": 1.1407470703125e-05,
      "step": 1869,
      "training_step_time": 0.814063310623169
    },
    {
      "epoch": 1.141357421875e-05,
      "grad_norm": 0.6283659338951111,
      "learning_rate": 6.233333333333334e-05,
      "loss": 0.1535,
      "step": 1870
    },
    {
      "epoch": 1.141357421875e-05,
      "model_forward_time": 0.11735224723815918,
      "step": 1870
    },
    {
      "epoch": 1.141357421875e-05,
      "step": 1870,
      "training_step_time": 0.6651830673217773
    },
    {
      "epoch": 1.1419677734375e-05,
      "model_forward_time": 0.11856818199157715,
      "step": 1871
    },
    {
      "epoch": 1.1419677734375e-05,
      "step": 1871,
      "training_step_time": 0.6457281112670898
    },
    {
      "epoch": 1.142578125e-05,
      "model_forward_time": 0.11847805976867676,
      "step": 1872
    },
    {
      "epoch": 1.142578125e-05,
      "step": 1872,
      "training_step_time": 0.6300632953643799
    },
    {
      "epoch": 1.1431884765625e-05,
      "model_forward_time": 0.11729979515075684,
      "step": 1873
    },
    {
      "epoch": 1.1431884765625e-05,
      "step": 1873,
      "training_step_time": 0.6765830516815186
    },
    {
      "epoch": 1.143798828125e-05,
      "model_forward_time": 0.12352108955383301,
      "step": 1874
    },
    {
      "epoch": 1.143798828125e-05,
      "step": 1874,
      "training_step_time": 0.6296885013580322
    },
    {
      "epoch": 1.1444091796875e-05,
      "model_forward_time": 0.11798596382141113,
      "step": 1875
    },
    {
      "epoch": 1.1444091796875e-05,
      "step": 1875,
      "training_step_time": 0.6960463523864746
    },
    {
      "epoch": 1.14501953125e-05,
      "model_forward_time": 0.12247753143310547,
      "step": 1876
    },
    {
      "epoch": 1.14501953125e-05,
      "step": 1876,
      "training_step_time": 0.6123142242431641
    },
    {
      "epoch": 1.1456298828125e-05,
      "model_forward_time": 0.11917757987976074,
      "step": 1877
    },
    {
      "epoch": 1.1456298828125e-05,
      "step": 1877,
      "training_step_time": 0.7203490734100342
    },
    {
      "epoch": 1.146240234375e-05,
      "model_forward_time": 0.11859250068664551,
      "step": 1878
    },
    {
      "epoch": 1.146240234375e-05,
      "step": 1878,
      "training_step_time": 0.6219446659088135
    },
    {
      "epoch": 1.1468505859375e-05,
      "model_forward_time": 0.12323355674743652,
      "step": 1879
    },
    {
      "epoch": 1.1468505859375e-05,
      "step": 1879,
      "training_step_time": 0.6742579936981201
    },
    {
      "epoch": 1.1474609375e-05,
      "grad_norm": 0.8101016283035278,
      "learning_rate": 6.266666666666667e-05,
      "loss": 0.147,
      "step": 1880
    },
    {
      "epoch": 1.1474609375e-05,
      "model_forward_time": 0.11777567863464355,
      "step": 1880
    },
    {
      "epoch": 1.1474609375e-05,
      "step": 1880,
      "training_step_time": 0.6511926651000977
    },
    {
      "epoch": 1.1480712890625e-05,
      "model_forward_time": 0.12042903900146484,
      "step": 1881
    },
    {
      "epoch": 1.1480712890625e-05,
      "step": 1881,
      "training_step_time": 0.6560304164886475
    },
    {
      "epoch": 1.148681640625e-05,
      "model_forward_time": 0.11837649345397949,
      "step": 1882
    },
    {
      "epoch": 1.148681640625e-05,
      "step": 1882,
      "training_step_time": 0.6859352588653564
    },
    {
      "epoch": 1.1492919921875e-05,
      "model_forward_time": 0.12067866325378418,
      "step": 1883
    },
    {
      "epoch": 1.1492919921875e-05,
      "step": 1883,
      "training_step_time": 0.67574143409729
    },
    {
      "epoch": 1.14990234375e-05,
      "model_forward_time": 0.12752056121826172,
      "step": 1884
    },
    {
      "epoch": 1.14990234375e-05,
      "step": 1884,
      "training_step_time": 0.6719436645507812
    },
    {
      "epoch": 1.1505126953125e-05,
      "model_forward_time": 0.1163482666015625,
      "step": 1885
    },
    {
      "epoch": 1.1505126953125e-05,
      "step": 1885,
      "training_step_time": 0.7108156681060791
    },
    {
      "epoch": 1.151123046875e-05,
      "model_forward_time": 0.11868476867675781,
      "step": 1886
    },
    {
      "epoch": 1.151123046875e-05,
      "step": 1886,
      "training_step_time": 0.5450513362884521
    },
    {
      "epoch": 1.1517333984375e-05,
      "model_forward_time": 0.11765694618225098,
      "step": 1887
    },
    {
      "epoch": 1.1517333984375e-05,
      "step": 1887,
      "training_step_time": 0.6661350727081299
    },
    {
      "epoch": 1.15234375e-05,
      "model_forward_time": 0.12636923789978027,
      "step": 1888
    },
    {
      "epoch": 1.15234375e-05,
      "step": 1888,
      "training_step_time": 0.7037100791931152
    },
    {
      "epoch": 1.1529541015625e-05,
      "model_forward_time": 0.11811089515686035,
      "step": 1889
    },
    {
      "epoch": 1.1529541015625e-05,
      "step": 1889,
      "training_step_time": 0.6388041973114014
    },
    {
      "epoch": 1.153564453125e-05,
      "grad_norm": 0.5666515827178955,
      "learning_rate": 6.3e-05,
      "loss": 0.138,
      "step": 1890
    },
    {
      "epoch": 1.153564453125e-05,
      "model_forward_time": 0.12379813194274902,
      "step": 1890
    },
    {
      "epoch": 1.153564453125e-05,
      "step": 1890,
      "training_step_time": 0.7137699127197266
    },
    {
      "epoch": 1.1541748046875e-05,
      "model_forward_time": 0.11775708198547363,
      "step": 1891
    },
    {
      "epoch": 1.1541748046875e-05,
      "step": 1891,
      "training_step_time": 0.775233268737793
    },
    {
      "epoch": 1.15478515625e-05,
      "model_forward_time": 0.12048983573913574,
      "step": 1892
    },
    {
      "epoch": 1.15478515625e-05,
      "step": 1892,
      "training_step_time": 0.7394039630889893
    },
    {
      "epoch": 1.1553955078125e-05,
      "model_forward_time": 0.11798381805419922,
      "step": 1893
    },
    {
      "epoch": 1.1553955078125e-05,
      "step": 1893,
      "training_step_time": 0.6808276176452637
    },
    {
      "epoch": 1.156005859375e-05,
      "model_forward_time": 0.11767172813415527,
      "step": 1894
    },
    {
      "epoch": 1.156005859375e-05,
      "step": 1894,
      "training_step_time": 0.7383110523223877
    },
    {
      "epoch": 1.1566162109375e-05,
      "model_forward_time": 0.12024903297424316,
      "step": 1895
    },
    {
      "epoch": 1.1566162109375e-05,
      "step": 1895,
      "training_step_time": 0.7741611003875732
    },
    {
      "epoch": 1.1572265625e-05,
      "model_forward_time": 0.12022590637207031,
      "step": 1896
    },
    {
      "epoch": 1.1572265625e-05,
      "step": 1896,
      "training_step_time": 0.7660167217254639
    },
    {
      "epoch": 1.1578369140625e-05,
      "model_forward_time": 0.11847972869873047,
      "step": 1897
    },
    {
      "epoch": 1.1578369140625e-05,
      "step": 1897,
      "training_step_time": 0.6606960296630859
    },
    {
      "epoch": 1.158447265625e-05,
      "model_forward_time": 0.11660599708557129,
      "step": 1898
    },
    {
      "epoch": 1.158447265625e-05,
      "step": 1898,
      "training_step_time": 0.7244722843170166
    },
    {
      "epoch": 1.1590576171875e-05,
      "model_forward_time": 0.1220095157623291,
      "step": 1899
    },
    {
      "epoch": 1.1590576171875e-05,
      "step": 1899,
      "training_step_time": 0.6800642013549805
    },
    {
      "epoch": 1.15966796875e-05,
      "grad_norm": 0.7164374589920044,
      "learning_rate": 6.333333333333333e-05,
      "loss": 0.1612,
      "step": 1900
    },
    {
      "epoch": 1.15966796875e-05,
      "model_forward_time": 0.11779570579528809,
      "step": 1900
    },
    {
      "epoch": 1.15966796875e-05,
      "step": 1900,
      "training_step_time": 0.723508358001709
    },
    {
      "epoch": 1.1602783203125e-05,
      "model_forward_time": 0.12003421783447266,
      "step": 1901
    },
    {
      "epoch": 1.1602783203125e-05,
      "step": 1901,
      "training_step_time": 0.7061009407043457
    },
    {
      "epoch": 1.160888671875e-05,
      "model_forward_time": 0.1193547248840332,
      "step": 1902
    },
    {
      "epoch": 1.160888671875e-05,
      "step": 1902,
      "training_step_time": 0.6598236560821533
    },
    {
      "epoch": 1.1614990234375e-05,
      "model_forward_time": 0.12322616577148438,
      "step": 1903
    },
    {
      "epoch": 1.1614990234375e-05,
      "step": 1903,
      "training_step_time": 0.6880936622619629
    },
    {
      "epoch": 1.162109375e-05,
      "model_forward_time": 0.11674690246582031,
      "step": 1904
    },
    {
      "epoch": 1.162109375e-05,
      "step": 1904,
      "training_step_time": 0.6729726791381836
    },
    {
      "epoch": 1.1627197265625e-05,
      "model_forward_time": 0.11980462074279785,
      "step": 1905
    },
    {
      "epoch": 1.1627197265625e-05,
      "step": 1905,
      "training_step_time": 0.6594040393829346
    },
    {
      "epoch": 1.163330078125e-05,
      "model_forward_time": 0.11802220344543457,
      "step": 1906
    },
    {
      "epoch": 1.163330078125e-05,
      "step": 1906,
      "training_step_time": 0.709963321685791
    },
    {
      "epoch": 1.1639404296875e-05,
      "model_forward_time": 0.12902450561523438,
      "step": 1907
    },
    {
      "epoch": 1.1639404296875e-05,
      "step": 1907,
      "training_step_time": 0.7159276008605957
    },
    {
      "epoch": 1.16455078125e-05,
      "model_forward_time": 0.12010765075683594,
      "step": 1908
    },
    {
      "epoch": 1.16455078125e-05,
      "step": 1908,
      "training_step_time": 0.70493483543396
    },
    {
      "epoch": 1.1651611328125e-05,
      "model_forward_time": 0.11833667755126953,
      "step": 1909
    },
    {
      "epoch": 1.1651611328125e-05,
      "step": 1909,
      "training_step_time": 0.7046914100646973
    },
    {
      "epoch": 1.165771484375e-05,
      "grad_norm": 0.6372619867324829,
      "learning_rate": 6.366666666666668e-05,
      "loss": 0.1634,
      "step": 1910
    },
    {
      "epoch": 1.165771484375e-05,
      "model_forward_time": 0.11970090866088867,
      "step": 1910
    },
    {
      "epoch": 1.165771484375e-05,
      "step": 1910,
      "training_step_time": 0.7183010578155518
    },
    {
      "epoch": 1.1663818359375e-05,
      "model_forward_time": 0.11668276786804199,
      "step": 1911
    },
    {
      "epoch": 1.1663818359375e-05,
      "step": 1911,
      "training_step_time": 0.6686837673187256
    },
    {
      "epoch": 1.1669921875e-05,
      "model_forward_time": 0.12524652481079102,
      "step": 1912
    },
    {
      "epoch": 1.1669921875e-05,
      "step": 1912,
      "training_step_time": 0.6305932998657227
    },
    {
      "epoch": 1.1676025390625e-05,
      "model_forward_time": 0.11709928512573242,
      "step": 1913
    },
    {
      "epoch": 1.1676025390625e-05,
      "step": 1913,
      "training_step_time": 0.712928056716919
    },
    {
      "epoch": 1.168212890625e-05,
      "model_forward_time": 0.12413668632507324,
      "step": 1914
    },
    {
      "epoch": 1.168212890625e-05,
      "step": 1914,
      "training_step_time": 0.7547869682312012
    },
    {
      "epoch": 1.1688232421875e-05,
      "model_forward_time": 0.12343382835388184,
      "step": 1915
    },
    {
      "epoch": 1.1688232421875e-05,
      "step": 1915,
      "training_step_time": 0.7287640571594238
    },
    {
      "epoch": 1.16943359375e-05,
      "model_forward_time": 0.12604594230651855,
      "step": 1916
    },
    {
      "epoch": 1.16943359375e-05,
      "step": 1916,
      "training_step_time": 0.724785566329956
    },
    {
      "epoch": 1.1700439453125e-05,
      "model_forward_time": 0.13025856018066406,
      "step": 1917
    },
    {
      "epoch": 1.1700439453125e-05,
      "step": 1917,
      "training_step_time": 0.695911169052124
    },
    {
      "epoch": 1.170654296875e-05,
      "model_forward_time": 0.1214454174041748,
      "step": 1918
    },
    {
      "epoch": 1.170654296875e-05,
      "step": 1918,
      "training_step_time": 0.7093498706817627
    },
    {
      "epoch": 1.1712646484375e-05,
      "model_forward_time": 0.1193692684173584,
      "step": 1919
    },
    {
      "epoch": 1.1712646484375e-05,
      "step": 1919,
      "training_step_time": 0.8410453796386719
    },
    {
      "epoch": 1.171875e-05,
      "grad_norm": 0.6048005223274231,
      "learning_rate": 6.400000000000001e-05,
      "loss": 0.1559,
      "step": 1920
    },
    {
      "epoch": 1.171875e-05,
      "model_forward_time": 0.11846065521240234,
      "step": 1920
    },
    {
      "epoch": 1.171875e-05,
      "step": 1920,
      "training_step_time": 0.680610179901123
    },
    {
      "epoch": 1.1724853515625e-05,
      "model_forward_time": 0.11881327629089355,
      "step": 1921
    },
    {
      "epoch": 1.1724853515625e-05,
      "step": 1921,
      "training_step_time": 0.6465902328491211
    },
    {
      "epoch": 1.173095703125e-05,
      "model_forward_time": 0.1216592788696289,
      "step": 1922
    },
    {
      "epoch": 1.173095703125e-05,
      "step": 1922,
      "training_step_time": 0.6230175495147705
    },
    {
      "epoch": 1.1737060546875e-05,
      "model_forward_time": 0.1171565055847168,
      "step": 1923
    },
    {
      "epoch": 1.1737060546875e-05,
      "step": 1923,
      "training_step_time": 0.7770602703094482
    },
    {
      "epoch": 1.17431640625e-05,
      "model_forward_time": 0.11812853813171387,
      "step": 1924
    },
    {
      "epoch": 1.17431640625e-05,
      "step": 1924,
      "training_step_time": 0.6363656520843506
    },
    {
      "epoch": 1.1749267578125e-05,
      "model_forward_time": 0.11913561820983887,
      "step": 1925
    },
    {
      "epoch": 1.1749267578125e-05,
      "step": 1925,
      "training_step_time": 0.6628353595733643
    },
    {
      "epoch": 1.175537109375e-05,
      "model_forward_time": 0.11710429191589355,
      "step": 1926
    },
    {
      "epoch": 1.175537109375e-05,
      "step": 1926,
      "training_step_time": 0.60280442237854
    },
    {
      "epoch": 1.1761474609375e-05,
      "model_forward_time": 0.11704587936401367,
      "step": 1927
    },
    {
      "epoch": 1.1761474609375e-05,
      "step": 1927,
      "training_step_time": 0.671001672744751
    },
    {
      "epoch": 1.1767578125e-05,
      "model_forward_time": 0.1185159683227539,
      "step": 1928
    },
    {
      "epoch": 1.1767578125e-05,
      "step": 1928,
      "training_step_time": 0.6369936466217041
    },
    {
      "epoch": 1.1773681640625e-05,
      "model_forward_time": 0.1219480037689209,
      "step": 1929
    },
    {
      "epoch": 1.1773681640625e-05,
      "step": 1929,
      "training_step_time": 0.6574859619140625
    },
    {
      "epoch": 1.177978515625e-05,
      "grad_norm": 0.6911097764968872,
      "learning_rate": 6.433333333333333e-05,
      "loss": 0.1586,
      "step": 1930
    },
    {
      "epoch": 1.177978515625e-05,
      "model_forward_time": 0.12128782272338867,
      "step": 1930
    },
    {
      "epoch": 1.177978515625e-05,
      "step": 1930,
      "training_step_time": 0.5922577381134033
    },
    {
      "epoch": 1.1785888671875e-05,
      "model_forward_time": 0.12803053855895996,
      "step": 1931
    },
    {
      "epoch": 1.1785888671875e-05,
      "step": 1931,
      "training_step_time": 0.6248490810394287
    },
    {
      "epoch": 1.17919921875e-05,
      "model_forward_time": 0.11716604232788086,
      "step": 1932
    },
    {
      "epoch": 1.17919921875e-05,
      "step": 1932,
      "training_step_time": 0.5904524326324463
    },
    {
      "epoch": 1.1798095703125e-05,
      "model_forward_time": 0.11822366714477539,
      "step": 1933
    },
    {
      "epoch": 1.1798095703125e-05,
      "step": 1933,
      "training_step_time": 0.569652795791626
    },
    {
      "epoch": 1.180419921875e-05,
      "model_forward_time": 0.12116861343383789,
      "step": 1934
    },
    {
      "epoch": 1.180419921875e-05,
      "step": 1934,
      "training_step_time": 0.45828723907470703
    },
    {
      "epoch": 1.1810302734375e-05,
      "model_forward_time": 0.11688590049743652,
      "step": 1935
    },
    {
      "epoch": 1.1810302734375e-05,
      "step": 1935,
      "training_step_time": 0.43642091751098633
    },
    {
      "epoch": 1.181640625e-05,
      "model_forward_time": 0.1188821792602539,
      "step": 1936
    },
    {
      "epoch": 1.181640625e-05,
      "step": 1936,
      "training_step_time": 0.46594738960266113
    },
    {
      "epoch": 1.1822509765625e-05,
      "model_forward_time": 0.11633110046386719,
      "step": 1937
    },
    {
      "epoch": 1.1822509765625e-05,
      "step": 1937,
      "training_step_time": 0.42636632919311523
    },
    {
      "epoch": 1.182861328125e-05,
      "model_forward_time": 0.11722016334533691,
      "step": 1938
    },
    {
      "epoch": 1.182861328125e-05,
      "step": 1938,
      "training_step_time": 0.4147920608520508
    },
    {
      "epoch": 1.1834716796875e-05,
      "model_forward_time": 0.11646604537963867,
      "step": 1939
    },
    {
      "epoch": 1.1834716796875e-05,
      "step": 1939,
      "training_step_time": 0.4213521480560303
    },
    {
      "epoch": 1.18408203125e-05,
      "grad_norm": 0.4561288058757782,
      "learning_rate": 6.466666666666666e-05,
      "loss": 0.1557,
      "step": 1940
    },
    {
      "epoch": 1.18408203125e-05,
      "model_forward_time": 0.11702895164489746,
      "step": 1940
    },
    {
      "epoch": 1.18408203125e-05,
      "step": 1940,
      "training_step_time": 0.44889092445373535
    },
    {
      "epoch": 1.1846923828125e-05,
      "model_forward_time": 0.11597752571105957,
      "step": 1941
    },
    {
      "epoch": 1.1846923828125e-05,
      "step": 1941,
      "training_step_time": 0.47251367568969727
    },
    {
      "epoch": 1.185302734375e-05,
      "model_forward_time": 0.11599588394165039,
      "step": 1942
    },
    {
      "epoch": 1.185302734375e-05,
      "step": 1942,
      "training_step_time": 0.3897392749786377
    },
    {
      "epoch": 1.1859130859375e-05,
      "model_forward_time": 0.11630749702453613,
      "step": 1943
    },
    {
      "epoch": 1.1859130859375e-05,
      "step": 1943,
      "training_step_time": 0.3999927043914795
    },
    {
      "epoch": 1.1865234375e-05,
      "model_forward_time": 0.11565852165222168,
      "step": 1944
    },
    {
      "epoch": 1.1865234375e-05,
      "step": 1944,
      "training_step_time": 0.4129157066345215
    },
    {
      "epoch": 1.1871337890625e-05,
      "model_forward_time": 0.11550617218017578,
      "step": 1945
    },
    {
      "epoch": 1.1871337890625e-05,
      "step": 1945,
      "training_step_time": 0.4024848937988281
    },
    {
      "epoch": 1.187744140625e-05,
      "model_forward_time": 0.11717987060546875,
      "step": 1946
    },
    {
      "epoch": 1.187744140625e-05,
      "step": 1946,
      "training_step_time": 0.43645215034484863
    },
    {
      "epoch": 1.1883544921875e-05,
      "model_forward_time": 0.11537671089172363,
      "step": 1947
    },
    {
      "epoch": 1.1883544921875e-05,
      "step": 1947,
      "training_step_time": 0.4289515018463135
    },
    {
      "epoch": 1.18896484375e-05,
      "model_forward_time": 0.11551570892333984,
      "step": 1948
    },
    {
      "epoch": 1.18896484375e-05,
      "step": 1948,
      "training_step_time": 0.49445486068725586
    },
    {
      "epoch": 1.1895751953125e-05,
      "model_forward_time": 0.11557817459106445,
      "step": 1949
    },
    {
      "epoch": 1.1895751953125e-05,
      "step": 1949,
      "training_step_time": 0.36995625495910645
    },
    {
      "epoch": 1.190185546875e-05,
      "grad_norm": 0.6462470889091492,
      "learning_rate": 6.500000000000001e-05,
      "loss": 0.1549,
      "step": 1950
    },
    {
      "epoch": 1.190185546875e-05,
      "model_forward_time": 0.11598539352416992,
      "step": 1950
    },
    {
      "epoch": 1.190185546875e-05,
      "step": 1950,
      "training_step_time": 0.44890689849853516
    },
    {
      "epoch": 1.1907958984375e-05,
      "model_forward_time": 0.11468124389648438,
      "step": 1951
    },
    {
      "epoch": 1.1907958984375e-05,
      "step": 1951,
      "training_step_time": 0.43664121627807617
    },
    {
      "epoch": 1.19140625e-05,
      "model_forward_time": 0.11630487442016602,
      "step": 1952
    },
    {
      "epoch": 1.19140625e-05,
      "step": 1952,
      "training_step_time": 0.4177980422973633
    },
    {
      "epoch": 1.1920166015625e-05,
      "model_forward_time": 0.11558175086975098,
      "step": 1953
    },
    {
      "epoch": 1.1920166015625e-05,
      "step": 1953,
      "training_step_time": 0.40299057960510254
    },
    {
      "epoch": 1.192626953125e-05,
      "model_forward_time": 0.11668205261230469,
      "step": 1954
    },
    {
      "epoch": 1.192626953125e-05,
      "step": 1954,
      "training_step_time": 0.46321678161621094
    },
    {
      "epoch": 1.1932373046875e-05,
      "model_forward_time": 0.11635494232177734,
      "step": 1955
    },
    {
      "epoch": 1.1932373046875e-05,
      "step": 1955,
      "training_step_time": 0.41686201095581055
    },
    {
      "epoch": 1.19384765625e-05,
      "model_forward_time": 0.11616992950439453,
      "step": 1956
    },
    {
      "epoch": 1.19384765625e-05,
      "step": 1956,
      "training_step_time": 0.4278397560119629
    },
    {
      "epoch": 1.1944580078125e-05,
      "model_forward_time": 0.11476612091064453,
      "step": 1957
    },
    {
      "epoch": 1.1944580078125e-05,
      "step": 1957,
      "training_step_time": 0.39665675163269043
    },
    {
      "epoch": 1.195068359375e-05,
      "model_forward_time": 0.11678814888000488,
      "step": 1958
    },
    {
      "epoch": 1.195068359375e-05,
      "step": 1958,
      "training_step_time": 0.3908565044403076
    },
    {
      "epoch": 1.1956787109375e-05,
      "model_forward_time": 0.1154630184173584,
      "step": 1959
    },
    {
      "epoch": 1.1956787109375e-05,
      "step": 1959,
      "training_step_time": 0.3951292037963867
    },
    {
      "epoch": 1.1962890625e-05,
      "grad_norm": 0.467575341463089,
      "learning_rate": 6.533333333333334e-05,
      "loss": 0.1414,
      "step": 1960
    },
    {
      "epoch": 1.1962890625e-05,
      "model_forward_time": 0.11520004272460938,
      "step": 1960
    },
    {
      "epoch": 1.1962890625e-05,
      "step": 1960,
      "training_step_time": 0.43898987770080566
    },
    {
      "epoch": 1.1968994140625e-05,
      "model_forward_time": 0.11562299728393555,
      "step": 1961
    },
    {
      "epoch": 1.1968994140625e-05,
      "step": 1961,
      "training_step_time": 0.4271736145019531
    },
    {
      "epoch": 1.197509765625e-05,
      "model_forward_time": 0.11563658714294434,
      "step": 1962
    },
    {
      "epoch": 1.197509765625e-05,
      "step": 1962,
      "training_step_time": 0.4718949794769287
    },
    {
      "epoch": 1.1981201171875e-05,
      "model_forward_time": 0.12027645111083984,
      "step": 1963
    },
    {
      "epoch": 1.1981201171875e-05,
      "step": 1963,
      "training_step_time": 0.49269795417785645
    },
    {
      "epoch": 1.19873046875e-05,
      "model_forward_time": 0.11508488655090332,
      "step": 1964
    },
    {
      "epoch": 1.19873046875e-05,
      "step": 1964,
      "training_step_time": 0.368929386138916
    },
    {
      "epoch": 1.1993408203125e-05,
      "model_forward_time": 0.11540675163269043,
      "step": 1965
    },
    {
      "epoch": 1.1993408203125e-05,
      "step": 1965,
      "training_step_time": 0.4378652572631836
    },
    {
      "epoch": 1.199951171875e-05,
      "model_forward_time": 0.11478352546691895,
      "step": 1966
    },
    {
      "epoch": 1.199951171875e-05,
      "step": 1966,
      "training_step_time": 0.38449931144714355
    },
    {
      "epoch": 1.2005615234375e-05,
      "model_forward_time": 0.11586427688598633,
      "step": 1967
    },
    {
      "epoch": 1.2005615234375e-05,
      "step": 1967,
      "training_step_time": 0.38753843307495117
    },
    {
      "epoch": 1.201171875e-05,
      "model_forward_time": 0.11696958541870117,
      "step": 1968
    },
    {
      "epoch": 1.201171875e-05,
      "step": 1968,
      "training_step_time": 0.4072225093841553
    },
    {
      "epoch": 1.2017822265625e-05,
      "model_forward_time": 0.1155858039855957,
      "step": 1969
    },
    {
      "epoch": 1.2017822265625e-05,
      "step": 1969,
      "training_step_time": 0.5020184516906738
    },
    {
      "epoch": 1.202392578125e-05,
      "grad_norm": 0.5875524282455444,
      "learning_rate": 6.566666666666666e-05,
      "loss": 0.1649,
      "step": 1970
    },
    {
      "epoch": 1.202392578125e-05,
      "model_forward_time": 0.11536383628845215,
      "step": 1970
    },
    {
      "epoch": 1.202392578125e-05,
      "step": 1970,
      "training_step_time": 0.48984670639038086
    },
    {
      "epoch": 1.2030029296875e-05,
      "model_forward_time": 0.11538934707641602,
      "step": 1971
    },
    {
      "epoch": 1.2030029296875e-05,
      "step": 1971,
      "training_step_time": 0.39570188522338867
    },
    {
      "epoch": 1.20361328125e-05,
      "model_forward_time": 0.11550259590148926,
      "step": 1972
    },
    {
      "epoch": 1.20361328125e-05,
      "step": 1972,
      "training_step_time": 0.3942446708679199
    },
    {
      "epoch": 1.2042236328125e-05,
      "model_forward_time": 0.11696410179138184,
      "step": 1973
    },
    {
      "epoch": 1.2042236328125e-05,
      "step": 1973,
      "training_step_time": 0.4063730239868164
    },
    {
      "epoch": 1.204833984375e-05,
      "model_forward_time": 0.11593270301818848,
      "step": 1974
    },
    {
      "epoch": 1.204833984375e-05,
      "step": 1974,
      "training_step_time": 0.4433467388153076
    },
    {
      "epoch": 1.2054443359375e-05,
      "model_forward_time": 0.11655998229980469,
      "step": 1975
    },
    {
      "epoch": 1.2054443359375e-05,
      "step": 1975,
      "training_step_time": 0.41675376892089844
    },
    {
      "epoch": 1.2060546875e-05,
      "model_forward_time": 0.11519074440002441,
      "step": 1976
    },
    {
      "epoch": 1.2060546875e-05,
      "step": 1976,
      "training_step_time": 0.6887991428375244
    },
    {
      "epoch": 1.2066650390625e-05,
      "model_forward_time": 0.11502718925476074,
      "step": 1977
    },
    {
      "epoch": 1.2066650390625e-05,
      "step": 1977,
      "training_step_time": 0.4498307704925537
    },
    {
      "epoch": 1.207275390625e-05,
      "model_forward_time": 0.11511874198913574,
      "step": 1978
    },
    {
      "epoch": 1.207275390625e-05,
      "step": 1978,
      "training_step_time": 0.37415194511413574
    },
    {
      "epoch": 1.2078857421875e-05,
      "model_forward_time": 0.11512207984924316,
      "step": 1979
    },
    {
      "epoch": 1.2078857421875e-05,
      "step": 1979,
      "training_step_time": 0.49526047706604004
    },
    {
      "epoch": 1.20849609375e-05,
      "grad_norm": 0.6227431297302246,
      "learning_rate": 6.6e-05,
      "loss": 0.1587,
      "step": 1980
    },
    {
      "epoch": 1.20849609375e-05,
      "model_forward_time": 0.11516427993774414,
      "step": 1980
    },
    {
      "epoch": 1.20849609375e-05,
      "step": 1980,
      "training_step_time": 0.3907492160797119
    },
    {
      "epoch": 1.2091064453125e-05,
      "model_forward_time": 0.11524105072021484,
      "step": 1981
    },
    {
      "epoch": 1.2091064453125e-05,
      "step": 1981,
      "training_step_time": 0.39199042320251465
    },
    {
      "epoch": 1.209716796875e-05,
      "model_forward_time": 0.11439967155456543,
      "step": 1982
    },
    {
      "epoch": 1.209716796875e-05,
      "step": 1982,
      "training_step_time": 0.48569488525390625
    },
    {
      "epoch": 1.2103271484375e-05,
      "model_forward_time": 0.11494779586791992,
      "step": 1983
    },
    {
      "epoch": 1.2103271484375e-05,
      "step": 1983,
      "training_step_time": 0.44881224632263184
    },
    {
      "epoch": 1.2109375e-05,
      "model_forward_time": 0.11447978019714355,
      "step": 1984
    },
    {
      "epoch": 1.2109375e-05,
      "step": 1984,
      "training_step_time": 0.4396185874938965
    },
    {
      "epoch": 1.2115478515625e-05,
      "model_forward_time": 0.11483120918273926,
      "step": 1985
    },
    {
      "epoch": 1.2115478515625e-05,
      "step": 1985,
      "training_step_time": 0.3921825885772705
    },
    {
      "epoch": 1.212158203125e-05,
      "model_forward_time": 0.11522769927978516,
      "step": 1986
    },
    {
      "epoch": 1.212158203125e-05,
      "step": 1986,
      "training_step_time": 0.40953731536865234
    },
    {
      "epoch": 1.2127685546875e-05,
      "model_forward_time": 0.11532092094421387,
      "step": 1987
    },
    {
      "epoch": 1.2127685546875e-05,
      "step": 1987,
      "training_step_time": 0.4307515621185303
    },
    {
      "epoch": 1.21337890625e-05,
      "model_forward_time": 0.11541032791137695,
      "step": 1988
    },
    {
      "epoch": 1.21337890625e-05,
      "step": 1988,
      "training_step_time": 0.6145317554473877
    },
    {
      "epoch": 1.2139892578125e-05,
      "model_forward_time": 0.11477351188659668,
      "step": 1989
    },
    {
      "epoch": 1.2139892578125e-05,
      "step": 1989,
      "training_step_time": 0.4194798469543457
    },
    {
      "epoch": 1.214599609375e-05,
      "grad_norm": 0.6207889318466187,
      "learning_rate": 6.633333333333334e-05,
      "loss": 0.1524,
      "step": 1990
    },
    {
      "epoch": 1.214599609375e-05,
      "model_forward_time": 0.11484885215759277,
      "step": 1990
    },
    {
      "epoch": 1.214599609375e-05,
      "step": 1990,
      "training_step_time": 0.4294579029083252
    },
    {
      "epoch": 1.2152099609375e-05,
      "model_forward_time": 0.11483287811279297,
      "step": 1991
    },
    {
      "epoch": 1.2152099609375e-05,
      "step": 1991,
      "training_step_time": 0.5200700759887695
    },
    {
      "epoch": 1.2158203125e-05,
      "model_forward_time": 0.11482477188110352,
      "step": 1992
    },
    {
      "epoch": 1.2158203125e-05,
      "step": 1992,
      "training_step_time": 0.4555203914642334
    },
    {
      "epoch": 1.2164306640625e-05,
      "model_forward_time": 0.11496138572692871,
      "step": 1993
    },
    {
      "epoch": 1.2164306640625e-05,
      "step": 1993,
      "training_step_time": 0.4665818214416504
    },
    {
      "epoch": 1.217041015625e-05,
      "model_forward_time": 0.11612510681152344,
      "step": 1994
    },
    {
      "epoch": 1.217041015625e-05,
      "step": 1994,
      "training_step_time": 0.45200562477111816
    },
    {
      "epoch": 1.2176513671875e-05,
      "model_forward_time": 0.11463642120361328,
      "step": 1995
    },
    {
      "epoch": 1.2176513671875e-05,
      "step": 1995,
      "training_step_time": 0.3884551525115967
    },
    {
      "epoch": 1.21826171875e-05,
      "model_forward_time": 0.11504030227661133,
      "step": 1996
    },
    {
      "epoch": 1.21826171875e-05,
      "step": 1996,
      "training_step_time": 0.4735076427459717
    },
    {
      "epoch": 1.2188720703125e-05,
      "model_forward_time": 0.11526370048522949,
      "step": 1997
    },
    {
      "epoch": 1.2188720703125e-05,
      "step": 1997,
      "training_step_time": 0.8525793552398682
    },
    {
      "epoch": 1.219482421875e-05,
      "model_forward_time": 0.11461567878723145,
      "step": 1998
    },
    {
      "epoch": 1.219482421875e-05,
      "step": 1998,
      "training_step_time": 0.4176828861236572
    },
    {
      "epoch": 1.2200927734375e-05,
      "model_forward_time": 0.11416912078857422,
      "step": 1999
    },
    {
      "epoch": 1.2200927734375e-05,
      "step": 1999,
      "training_step_time": 0.3935525417327881
    },
    {
      "epoch": 1.220703125e-05,
      "grad_norm": 0.5634704232215881,
      "learning_rate": 6.666666666666667e-05,
      "loss": 0.1495,
      "step": 2000
    },
    {
      "epoch": 1.220703125e-05,
      "model_forward_time": 0.12705302238464355,
      "step": 2000
    },
    {
      "epoch": 1.220703125e-05,
      "step": 2000,
      "training_step_time": 0.38051867485046387
    },
    {
      "epoch": 1.2213134765625e-05,
      "model_forward_time": 0.11472392082214355,
      "step": 2001
    },
    {
      "epoch": 1.2213134765625e-05,
      "step": 2001,
      "training_step_time": 0.3642914295196533
    },
    {
      "epoch": 1.221923828125e-05,
      "model_forward_time": 0.11593151092529297,
      "step": 2002
    },
    {
      "epoch": 1.221923828125e-05,
      "step": 2002,
      "training_step_time": 0.37477803230285645
    },
    {
      "epoch": 1.2225341796875e-05,
      "model_forward_time": 0.11681723594665527,
      "step": 2003
    },
    {
      "epoch": 1.2225341796875e-05,
      "step": 2003,
      "training_step_time": 0.38707447052001953
    },
    {
      "epoch": 1.22314453125e-05,
      "model_forward_time": 0.12290811538696289,
      "step": 2004
    },
    {
      "epoch": 1.22314453125e-05,
      "step": 2004,
      "training_step_time": 0.38848161697387695
    },
    {
      "epoch": 1.2237548828125e-05,
      "model_forward_time": 0.11619424819946289,
      "step": 2005
    },
    {
      "epoch": 1.2237548828125e-05,
      "step": 2005,
      "training_step_time": 0.39182424545288086
    },
    {
      "epoch": 1.224365234375e-05,
      "model_forward_time": 0.11600089073181152,
      "step": 2006
    },
    {
      "epoch": 1.224365234375e-05,
      "step": 2006,
      "training_step_time": 0.40059852600097656
    },
    {
      "epoch": 1.2249755859375e-05,
      "model_forward_time": 0.1160128116607666,
      "step": 2007
    },
    {
      "epoch": 1.2249755859375e-05,
      "step": 2007,
      "training_step_time": 0.3967413902282715
    },
    {
      "epoch": 1.2255859375e-05,
      "model_forward_time": 0.11695051193237305,
      "step": 2008
    },
    {
      "epoch": 1.2255859375e-05,
      "step": 2008,
      "training_step_time": 0.38269591331481934
    },
    {
      "epoch": 1.2261962890625e-05,
      "model_forward_time": 0.11696839332580566,
      "step": 2009
    },
    {
      "epoch": 1.2261962890625e-05,
      "step": 2009,
      "training_step_time": 0.48766541481018066
    },
    {
      "epoch": 1.226806640625e-05,
      "grad_norm": 0.3595292866230011,
      "learning_rate": 6.7e-05,
      "loss": 0.145,
      "step": 2010
    },
    {
      "epoch": 1.226806640625e-05,
      "model_forward_time": 0.11737537384033203,
      "step": 2010
    },
    {
      "epoch": 1.226806640625e-05,
      "step": 2010,
      "training_step_time": 0.5308332443237305
    },
    {
      "epoch": 1.2274169921875e-05,
      "model_forward_time": 0.11606144905090332,
      "step": 2011
    },
    {
      "epoch": 1.2274169921875e-05,
      "step": 2011,
      "training_step_time": 0.39514851570129395
    },
    {
      "epoch": 1.22802734375e-05,
      "model_forward_time": 0.11625504493713379,
      "step": 2012
    },
    {
      "epoch": 1.22802734375e-05,
      "step": 2012,
      "training_step_time": 0.4733998775482178
    },
    {
      "epoch": 1.2286376953125e-05,
      "model_forward_time": 0.1166069507598877,
      "step": 2013
    },
    {
      "epoch": 1.2286376953125e-05,
      "step": 2013,
      "training_step_time": 0.453171968460083
    },
    {
      "epoch": 1.229248046875e-05,
      "model_forward_time": 0.1208646297454834,
      "step": 2014
    },
    {
      "epoch": 1.229248046875e-05,
      "step": 2014,
      "training_step_time": 0.405750036239624
    },
    {
      "epoch": 1.2298583984375e-05,
      "model_forward_time": 0.11998605728149414,
      "step": 2015
    },
    {
      "epoch": 1.2298583984375e-05,
      "step": 2015,
      "training_step_time": 0.3875117301940918
    },
    {
      "epoch": 1.23046875e-05,
      "model_forward_time": 0.12181997299194336,
      "step": 2016
    },
    {
      "epoch": 1.23046875e-05,
      "step": 2016,
      "training_step_time": 0.38516831398010254
    },
    {
      "epoch": 1.2310791015625e-05,
      "model_forward_time": 0.11800384521484375,
      "step": 2017
    },
    {
      "epoch": 1.2310791015625e-05,
      "step": 2017,
      "training_step_time": 0.3828105926513672
    },
    {
      "epoch": 1.231689453125e-05,
      "model_forward_time": 0.12057995796203613,
      "step": 2018
    },
    {
      "epoch": 1.231689453125e-05,
      "step": 2018,
      "training_step_time": 0.40004849433898926
    },
    {
      "epoch": 1.2322998046875e-05,
      "model_forward_time": 0.11691045761108398,
      "step": 2019
    },
    {
      "epoch": 1.2322998046875e-05,
      "step": 2019,
      "training_step_time": 0.40618133544921875
    },
    {
      "epoch": 1.23291015625e-05,
      "grad_norm": 0.45891010761260986,
      "learning_rate": 6.733333333333333e-05,
      "loss": 0.1545,
      "step": 2020
    },
    {
      "epoch": 1.23291015625e-05,
      "model_forward_time": 0.11672210693359375,
      "step": 2020
    },
    {
      "epoch": 1.23291015625e-05,
      "step": 2020,
      "training_step_time": 0.4733145236968994
    },
    {
      "epoch": 1.2335205078125e-05,
      "model_forward_time": 0.11700654029846191,
      "step": 2021
    },
    {
      "epoch": 1.2335205078125e-05,
      "step": 2021,
      "training_step_time": 0.46415185928344727
    },
    {
      "epoch": 1.234130859375e-05,
      "model_forward_time": 0.11754703521728516,
      "step": 2022
    },
    {
      "epoch": 1.234130859375e-05,
      "step": 2022,
      "training_step_time": 0.3745732307434082
    },
    {
      "epoch": 1.2347412109375e-05,
      "model_forward_time": 0.11700892448425293,
      "step": 2023
    },
    {
      "epoch": 1.2347412109375e-05,
      "step": 2023,
      "training_step_time": 0.4031028747558594
    },
    {
      "epoch": 1.2353515625e-05,
      "model_forward_time": 0.11824703216552734,
      "step": 2024
    },
    {
      "epoch": 1.2353515625e-05,
      "step": 2024,
      "training_step_time": 0.42312049865722656
    },
    {
      "epoch": 1.2359619140625e-05,
      "model_forward_time": 0.11814212799072266,
      "step": 2025
    },
    {
      "epoch": 1.2359619140625e-05,
      "step": 2025,
      "training_step_time": 0.4018876552581787
    },
    {
      "epoch": 1.236572265625e-05,
      "model_forward_time": 0.1170356273651123,
      "step": 2026
    },
    {
      "epoch": 1.236572265625e-05,
      "step": 2026,
      "training_step_time": 0.42371034622192383
    },
    {
      "epoch": 1.2371826171875e-05,
      "model_forward_time": 0.11762642860412598,
      "step": 2027
    },
    {
      "epoch": 1.2371826171875e-05,
      "step": 2027,
      "training_step_time": 0.4215831756591797
    },
    {
      "epoch": 1.23779296875e-05,
      "model_forward_time": 0.11707615852355957,
      "step": 2028
    },
    {
      "epoch": 1.23779296875e-05,
      "step": 2028,
      "training_step_time": 0.46160888671875
    },
    {
      "epoch": 1.2384033203125e-05,
      "model_forward_time": 0.12070012092590332,
      "step": 2029
    },
    {
      "epoch": 1.2384033203125e-05,
      "step": 2029,
      "training_step_time": 0.42275261878967285
    },
    {
      "epoch": 1.239013671875e-05,
      "grad_norm": 0.43716320395469666,
      "learning_rate": 6.766666666666667e-05,
      "loss": 0.156,
      "step": 2030
    },
    {
      "epoch": 1.239013671875e-05,
      "model_forward_time": 0.11657238006591797,
      "step": 2030
    },
    {
      "epoch": 1.239013671875e-05,
      "step": 2030,
      "training_step_time": 0.401181697845459
    },
    {
      "epoch": 1.2396240234375e-05,
      "model_forward_time": 0.11617302894592285,
      "step": 2031
    },
    {
      "epoch": 1.2396240234375e-05,
      "step": 2031,
      "training_step_time": 0.40174031257629395
    },
    {
      "epoch": 1.240234375e-05,
      "model_forward_time": 0.11710739135742188,
      "step": 2032
    },
    {
      "epoch": 1.240234375e-05,
      "step": 2032,
      "training_step_time": 0.40237951278686523
    },
    {
      "epoch": 1.2408447265625e-05,
      "model_forward_time": 0.11918830871582031,
      "step": 2033
    },
    {
      "epoch": 1.2408447265625e-05,
      "step": 2033,
      "training_step_time": 0.3882584571838379
    },
    {
      "epoch": 1.241455078125e-05,
      "model_forward_time": 0.11787629127502441,
      "step": 2034
    },
    {
      "epoch": 1.241455078125e-05,
      "step": 2034,
      "training_step_time": 0.3890089988708496
    },
    {
      "epoch": 1.2420654296875e-05,
      "model_forward_time": 0.11707043647766113,
      "step": 2035
    },
    {
      "epoch": 1.2420654296875e-05,
      "step": 2035,
      "training_step_time": 0.39782238006591797
    },
    {
      "epoch": 1.24267578125e-05,
      "model_forward_time": 0.11755728721618652,
      "step": 2036
    },
    {
      "epoch": 1.24267578125e-05,
      "step": 2036,
      "training_step_time": 0.45558881759643555
    },
    {
      "epoch": 1.2432861328125e-05,
      "model_forward_time": 0.11835575103759766,
      "step": 2037
    },
    {
      "epoch": 1.2432861328125e-05,
      "step": 2037,
      "training_step_time": 0.3925955295562744
    },
    {
      "epoch": 1.243896484375e-05,
      "model_forward_time": 0.11703276634216309,
      "step": 2038
    },
    {
      "epoch": 1.243896484375e-05,
      "step": 2038,
      "training_step_time": 0.47913193702697754
    },
    {
      "epoch": 1.2445068359375e-05,
      "model_forward_time": 0.1170802116394043,
      "step": 2039
    },
    {
      "epoch": 1.2445068359375e-05,
      "step": 2039,
      "training_step_time": 0.5450031757354736
    },
    {
      "epoch": 1.2451171875e-05,
      "grad_norm": 0.5399112105369568,
      "learning_rate": 6.800000000000001e-05,
      "loss": 0.1615,
      "step": 2040
    },
    {
      "epoch": 1.2451171875e-05,
      "model_forward_time": 0.11721181869506836,
      "step": 2040
    },
    {
      "epoch": 1.2451171875e-05,
      "step": 2040,
      "training_step_time": 0.41489434242248535
    },
    {
      "epoch": 1.2457275390625e-05,
      "model_forward_time": 0.12030911445617676,
      "step": 2041
    },
    {
      "epoch": 1.2457275390625e-05,
      "step": 2041,
      "training_step_time": 0.48329830169677734
    },
    {
      "epoch": 1.246337890625e-05,
      "model_forward_time": 0.11668801307678223,
      "step": 2042
    },
    {
      "epoch": 1.246337890625e-05,
      "step": 2042,
      "training_step_time": 0.43520665168762207
    },
    {
      "epoch": 1.2469482421875e-05,
      "model_forward_time": 0.11684298515319824,
      "step": 2043
    },
    {
      "epoch": 1.2469482421875e-05,
      "step": 2043,
      "training_step_time": 0.3967287540435791
    },
    {
      "epoch": 1.24755859375e-05,
      "model_forward_time": 0.11698079109191895,
      "step": 2044
    },
    {
      "epoch": 1.24755859375e-05,
      "step": 2044,
      "training_step_time": 0.3880891799926758
    },
    {
      "epoch": 1.2481689453125e-05,
      "model_forward_time": 0.11802291870117188,
      "step": 2045
    },
    {
      "epoch": 1.2481689453125e-05,
      "step": 2045,
      "training_step_time": 0.3925902843475342
    },
    {
      "epoch": 1.248779296875e-05,
      "model_forward_time": 0.11899113655090332,
      "step": 2046
    },
    {
      "epoch": 1.248779296875e-05,
      "step": 2046,
      "training_step_time": 0.39895033836364746
    },
    {
      "epoch": 1.2493896484375e-05,
      "model_forward_time": 0.12057352066040039,
      "step": 2047
    },
    {
      "epoch": 1.2493896484375e-05,
      "step": 2047,
      "training_step_time": 0.39320898056030273
    },
    {
      "epoch": 1.25e-05,
      "model_forward_time": 0.12404751777648926,
      "step": 2048
    },
    {
      "epoch": 1.25e-05,
      "step": 2048,
      "training_step_time": 0.3981053829193115
    },
    {
      "epoch": 1.2506103515625e-05,
      "model_forward_time": 0.11899566650390625,
      "step": 2049
    },
    {
      "epoch": 1.2506103515625e-05,
      "step": 2049,
      "training_step_time": 0.43347835540771484
    },
    {
      "epoch": 1.251220703125e-05,
      "grad_norm": 0.6124165058135986,
      "learning_rate": 6.833333333333333e-05,
      "loss": 0.1518,
      "step": 2050
    },
    {
      "epoch": 1.251220703125e-05,
      "model_forward_time": 0.11722636222839355,
      "step": 2050
    },
    {
      "epoch": 1.251220703125e-05,
      "step": 2050,
      "training_step_time": 0.48014402389526367
    },
    {
      "epoch": 1.2518310546875e-05,
      "model_forward_time": 0.11523914337158203,
      "step": 2051
    },
    {
      "epoch": 1.2518310546875e-05,
      "step": 2051,
      "training_step_time": 0.4994969367980957
    },
    {
      "epoch": 1.25244140625e-05,
      "model_forward_time": 0.11762571334838867,
      "step": 2052
    },
    {
      "epoch": 1.25244140625e-05,
      "step": 2052,
      "training_step_time": 0.4343605041503906
    },
    {
      "epoch": 1.2530517578125e-05,
      "model_forward_time": 0.11628389358520508,
      "step": 2053
    },
    {
      "epoch": 1.2530517578125e-05,
      "step": 2053,
      "training_step_time": 0.4172542095184326
    },
    {
      "epoch": 1.253662109375e-05,
      "model_forward_time": 0.11690759658813477,
      "step": 2054
    },
    {
      "epoch": 1.253662109375e-05,
      "step": 2054,
      "training_step_time": 0.42142319679260254
    },
    {
      "epoch": 1.2542724609375e-05,
      "model_forward_time": 0.11543488502502441,
      "step": 2055
    },
    {
      "epoch": 1.2542724609375e-05,
      "step": 2055,
      "training_step_time": 0.44783878326416016
    },
    {
      "epoch": 1.2548828125e-05,
      "model_forward_time": 0.1176149845123291,
      "step": 2056
    },
    {
      "epoch": 1.2548828125e-05,
      "step": 2056,
      "training_step_time": 0.4509608745574951
    },
    {
      "epoch": 1.2554931640625e-05,
      "model_forward_time": 0.11773490905761719,
      "step": 2057
    },
    {
      "epoch": 1.2554931640625e-05,
      "step": 2057,
      "training_step_time": 0.3873779773712158
    },
    {
      "epoch": 1.256103515625e-05,
      "model_forward_time": 0.11634969711303711,
      "step": 2058
    },
    {
      "epoch": 1.256103515625e-05,
      "step": 2058,
      "training_step_time": 0.3913431167602539
    },
    {
      "epoch": 1.2567138671875e-05,
      "model_forward_time": 0.11588525772094727,
      "step": 2059
    },
    {
      "epoch": 1.2567138671875e-05,
      "step": 2059,
      "training_step_time": 0.39464712142944336
    },
    {
      "epoch": 1.25732421875e-05,
      "grad_norm": 0.4071783125400543,
      "learning_rate": 6.866666666666666e-05,
      "loss": 0.1415,
      "step": 2060
    },
    {
      "epoch": 1.25732421875e-05,
      "model_forward_time": 0.11835789680480957,
      "step": 2060
    },
    {
      "epoch": 1.25732421875e-05,
      "step": 2060,
      "training_step_time": 0.4143228530883789
    },
    {
      "epoch": 1.2579345703125e-05,
      "model_forward_time": 0.1163787841796875,
      "step": 2061
    },
    {
      "epoch": 1.2579345703125e-05,
      "step": 2061,
      "training_step_time": 0.390871524810791
    },
    {
      "epoch": 1.258544921875e-05,
      "model_forward_time": 0.11558079719543457,
      "step": 2062
    },
    {
      "epoch": 1.258544921875e-05,
      "step": 2062,
      "training_step_time": 0.38768434524536133
    },
    {
      "epoch": 1.2591552734375e-05,
      "model_forward_time": 0.1162714958190918,
      "step": 2063
    },
    {
      "epoch": 1.2591552734375e-05,
      "step": 2063,
      "training_step_time": 0.494091272354126
    },
    {
      "epoch": 1.259765625e-05,
      "model_forward_time": 0.12674522399902344,
      "step": 2064
    },
    {
      "epoch": 1.259765625e-05,
      "step": 2064,
      "training_step_time": 0.5085582733154297
    },
    {
      "epoch": 1.2603759765625e-05,
      "model_forward_time": 0.11539030075073242,
      "step": 2065
    },
    {
      "epoch": 1.2603759765625e-05,
      "step": 2065,
      "training_step_time": 0.4428670406341553
    },
    {
      "epoch": 1.260986328125e-05,
      "model_forward_time": 0.11600852012634277,
      "step": 2066
    },
    {
      "epoch": 1.260986328125e-05,
      "step": 2066,
      "training_step_time": 0.4743049144744873
    },
    {
      "epoch": 1.2615966796875e-05,
      "model_forward_time": 0.11646842956542969,
      "step": 2067
    },
    {
      "epoch": 1.2615966796875e-05,
      "step": 2067,
      "training_step_time": 0.4679558277130127
    },
    {
      "epoch": 1.26220703125e-05,
      "model_forward_time": 0.11523866653442383,
      "step": 2068
    },
    {
      "epoch": 1.26220703125e-05,
      "step": 2068,
      "training_step_time": 0.4950411319732666
    },
    {
      "epoch": 1.2628173828125e-05,
      "model_forward_time": 0.11507225036621094,
      "step": 2069
    },
    {
      "epoch": 1.2628173828125e-05,
      "step": 2069,
      "training_step_time": 0.5420711040496826
    },
    {
      "epoch": 1.263427734375e-05,
      "grad_norm": 0.4463164210319519,
      "learning_rate": 6.9e-05,
      "loss": 0.1469,
      "step": 2070
    },
    {
      "epoch": 1.263427734375e-05,
      "model_forward_time": 0.11473870277404785,
      "step": 2070
    },
    {
      "epoch": 1.263427734375e-05,
      "step": 2070,
      "training_step_time": 0.4818692207336426
    },
    {
      "epoch": 1.2640380859375e-05,
      "model_forward_time": 0.11487364768981934,
      "step": 2071
    },
    {
      "epoch": 1.2640380859375e-05,
      "step": 2071,
      "training_step_time": 0.3941214084625244
    },
    {
      "epoch": 1.2646484375e-05,
      "model_forward_time": 0.11730194091796875,
      "step": 2072
    },
    {
      "epoch": 1.2646484375e-05,
      "step": 2072,
      "training_step_time": 0.39936375617980957
    },
    {
      "epoch": 1.2652587890625e-05,
      "model_forward_time": 0.11849665641784668,
      "step": 2073
    },
    {
      "epoch": 1.2652587890625e-05,
      "step": 2073,
      "training_step_time": 0.3761579990386963
    },
    {
      "epoch": 1.265869140625e-05,
      "model_forward_time": 0.11909246444702148,
      "step": 2074
    },
    {
      "epoch": 1.265869140625e-05,
      "step": 2074,
      "training_step_time": 0.3794217109680176
    },
    {
      "epoch": 1.2664794921875e-05,
      "model_forward_time": 0.11631011962890625,
      "step": 2075
    },
    {
      "epoch": 1.2664794921875e-05,
      "step": 2075,
      "training_step_time": 0.8422160148620605
    },
    {
      "epoch": 1.26708984375e-05,
      "model_forward_time": 0.11592888832092285,
      "step": 2076
    },
    {
      "epoch": 1.26708984375e-05,
      "step": 2076,
      "training_step_time": 0.38584089279174805
    },
    {
      "epoch": 1.2677001953125e-05,
      "model_forward_time": 0.11489605903625488,
      "step": 2077
    },
    {
      "epoch": 1.2677001953125e-05,
      "step": 2077,
      "training_step_time": 0.43529343605041504
    },
    {
      "epoch": 1.268310546875e-05,
      "model_forward_time": 0.11549019813537598,
      "step": 2078
    },
    {
      "epoch": 1.268310546875e-05,
      "step": 2078,
      "training_step_time": 0.40041446685791016
    },
    {
      "epoch": 1.2689208984375e-05,
      "model_forward_time": 0.11450481414794922,
      "step": 2079
    },
    {
      "epoch": 1.2689208984375e-05,
      "step": 2079,
      "training_step_time": 0.373852014541626
    },
    {
      "epoch": 1.26953125e-05,
      "grad_norm": 0.41721639037132263,
      "learning_rate": 6.933333333333334e-05,
      "loss": 0.1402,
      "step": 2080
    },
    {
      "epoch": 1.26953125e-05,
      "model_forward_time": 0.11361956596374512,
      "step": 2080
    },
    {
      "epoch": 1.26953125e-05,
      "step": 2080,
      "training_step_time": 0.4514913558959961
    },
    {
      "epoch": 1.2701416015625e-05,
      "model_forward_time": 0.11491036415100098,
      "step": 2081
    },
    {
      "epoch": 1.2701416015625e-05,
      "step": 2081,
      "training_step_time": 0.6481828689575195
    },
    {
      "epoch": 1.270751953125e-05,
      "model_forward_time": 0.11495518684387207,
      "step": 2082
    },
    {
      "epoch": 1.270751953125e-05,
      "step": 2082,
      "training_step_time": 0.44625425338745117
    },
    {
      "epoch": 1.2713623046875e-05,
      "model_forward_time": 0.1152045726776123,
      "step": 2083
    },
    {
      "epoch": 1.2713623046875e-05,
      "step": 2083,
      "training_step_time": 0.4282865524291992
    },
    {
      "epoch": 1.27197265625e-05,
      "model_forward_time": 0.11394500732421875,
      "step": 2084
    },
    {
      "epoch": 1.27197265625e-05,
      "step": 2084,
      "training_step_time": 0.3792686462402344
    },
    {
      "epoch": 1.2725830078125e-05,
      "model_forward_time": 0.11486148834228516,
      "step": 2085
    },
    {
      "epoch": 1.2725830078125e-05,
      "step": 2085,
      "training_step_time": 0.39090514183044434
    },
    {
      "epoch": 1.273193359375e-05,
      "model_forward_time": 0.11659383773803711,
      "step": 2086
    },
    {
      "epoch": 1.273193359375e-05,
      "step": 2086,
      "training_step_time": 0.39689183235168457
    },
    {
      "epoch": 1.2738037109375e-05,
      "model_forward_time": 0.11560750007629395,
      "step": 2087
    },
    {
      "epoch": 1.2738037109375e-05,
      "step": 2087,
      "training_step_time": 0.6205589771270752
    },
    {
      "epoch": 1.2744140625e-05,
      "model_forward_time": 0.11475944519042969,
      "step": 2088
    },
    {
      "epoch": 1.2744140625e-05,
      "step": 2088,
      "training_step_time": 0.3880140781402588
    },
    {
      "epoch": 1.2750244140625e-05,
      "model_forward_time": 0.11528134346008301,
      "step": 2089
    },
    {
      "epoch": 1.2750244140625e-05,
      "step": 2089,
      "training_step_time": 0.3893308639526367
    },
    {
      "epoch": 1.275634765625e-05,
      "grad_norm": 0.40439894795417786,
      "learning_rate": 6.966666666666668e-05,
      "loss": 0.1401,
      "step": 2090
    },
    {
      "epoch": 1.275634765625e-05,
      "model_forward_time": 0.1156620979309082,
      "step": 2090
    },
    {
      "epoch": 1.275634765625e-05,
      "step": 2090,
      "training_step_time": 0.38623046875
    },
    {
      "epoch": 1.2762451171875e-05,
      "model_forward_time": 0.1148073673248291,
      "step": 2091
    },
    {
      "epoch": 1.2762451171875e-05,
      "step": 2091,
      "training_step_time": 0.4110860824584961
    },
    {
      "epoch": 1.27685546875e-05,
      "model_forward_time": 0.11529135704040527,
      "step": 2092
    },
    {
      "epoch": 1.27685546875e-05,
      "step": 2092,
      "training_step_time": 0.43140745162963867
    },
    {
      "epoch": 1.2774658203125e-05,
      "model_forward_time": 0.11578559875488281,
      "step": 2093
    },
    {
      "epoch": 1.2774658203125e-05,
      "step": 2093,
      "training_step_time": 1.394498586654663
    },
    {
      "epoch": 1.278076171875e-05,
      "model_forward_time": 0.11470985412597656,
      "step": 2094
    },
    {
      "epoch": 1.278076171875e-05,
      "step": 2094,
      "training_step_time": 0.4161567687988281
    },
    {
      "epoch": 1.2786865234375e-05,
      "model_forward_time": 0.11383175849914551,
      "step": 2095
    },
    {
      "epoch": 1.2786865234375e-05,
      "step": 2095,
      "training_step_time": 0.4062387943267822
    },
    {
      "epoch": 1.279296875e-05,
      "model_forward_time": 0.11391687393188477,
      "step": 2096
    },
    {
      "epoch": 1.279296875e-05,
      "step": 2096,
      "training_step_time": 0.3882460594177246
    },
    {
      "epoch": 1.2799072265625e-05,
      "model_forward_time": 0.11366391181945801,
      "step": 2097
    },
    {
      "epoch": 1.2799072265625e-05,
      "step": 2097,
      "training_step_time": 0.38400936126708984
    },
    {
      "epoch": 1.280517578125e-05,
      "model_forward_time": 0.1151118278503418,
      "step": 2098
    },
    {
      "epoch": 1.280517578125e-05,
      "step": 2098,
      "training_step_time": 0.3803675174713135
    },
    {
      "epoch": 1.2811279296875e-05,
      "model_forward_time": 0.11523056030273438,
      "step": 2099
    },
    {
      "epoch": 1.2811279296875e-05,
      "step": 2099,
      "training_step_time": 0.8138835430145264
    },
    {
      "epoch": 1.28173828125e-05,
      "grad_norm": 0.6051329374313354,
      "learning_rate": 7e-05,
      "loss": 0.1405,
      "step": 2100
    },
    {
      "epoch": 1.28173828125e-05,
      "model_forward_time": 0.11780858039855957,
      "step": 2100
    },
    {
      "epoch": 1.28173828125e-05,
      "step": 2100,
      "training_step_time": 0.4084784984588623
    },
    {
      "epoch": 1.2823486328125e-05,
      "model_forward_time": 0.11890220642089844,
      "step": 2101
    },
    {
      "epoch": 1.2823486328125e-05,
      "step": 2101,
      "training_step_time": 0.37792468070983887
    },
    {
      "epoch": 1.282958984375e-05,
      "model_forward_time": 0.11878633499145508,
      "step": 2102
    },
    {
      "epoch": 1.282958984375e-05,
      "step": 2102,
      "training_step_time": 0.3824586868286133
    },
    {
      "epoch": 1.2835693359375e-05,
      "model_forward_time": 0.11835885047912598,
      "step": 2103
    },
    {
      "epoch": 1.2835693359375e-05,
      "step": 2103,
      "training_step_time": 0.4048497676849365
    },
    {
      "epoch": 1.2841796875e-05,
      "model_forward_time": 0.11701679229736328,
      "step": 2104
    },
    {
      "epoch": 1.2841796875e-05,
      "step": 2104,
      "training_step_time": 0.45615601539611816
    },
    {
      "epoch": 1.2847900390625e-05,
      "model_forward_time": 0.1154019832611084,
      "step": 2105
    },
    {
      "epoch": 1.2847900390625e-05,
      "step": 2105,
      "training_step_time": 0.9363758563995361
    },
    {
      "epoch": 1.285400390625e-05,
      "model_forward_time": 0.11481428146362305,
      "step": 2106
    },
    {
      "epoch": 1.285400390625e-05,
      "step": 2106,
      "training_step_time": 0.40723705291748047
    },
    {
      "epoch": 1.2860107421875e-05,
      "model_forward_time": 0.11565780639648438,
      "step": 2107
    },
    {
      "epoch": 1.2860107421875e-05,
      "step": 2107,
      "training_step_time": 0.4021625518798828
    },
    {
      "epoch": 1.28662109375e-05,
      "model_forward_time": 0.11430168151855469,
      "step": 2108
    },
    {
      "epoch": 1.28662109375e-05,
      "step": 2108,
      "training_step_time": 0.4294261932373047
    },
    {
      "epoch": 1.2872314453125e-05,
      "model_forward_time": 0.11409759521484375,
      "step": 2109
    },
    {
      "epoch": 1.2872314453125e-05,
      "step": 2109,
      "training_step_time": 0.3888700008392334
    },
    {
      "epoch": 1.287841796875e-05,
      "grad_norm": 0.41937610507011414,
      "learning_rate": 7.033333333333334e-05,
      "loss": 0.1469,
      "step": 2110
    },
    {
      "epoch": 1.287841796875e-05,
      "model_forward_time": 0.11800360679626465,
      "step": 2110
    },
    {
      "epoch": 1.287841796875e-05,
      "step": 2110,
      "training_step_time": 0.3971118927001953
    },
    {
      "epoch": 1.2884521484375e-05,
      "model_forward_time": 0.11821746826171875,
      "step": 2111
    },
    {
      "epoch": 1.2884521484375e-05,
      "step": 2111,
      "training_step_time": 0.9435486793518066
    },
    {
      "epoch": 1.2890625e-05,
      "model_forward_time": 0.11675405502319336,
      "step": 2112
    },
    {
      "epoch": 1.2890625e-05,
      "step": 2112,
      "training_step_time": 0.3834559917449951
    },
    {
      "epoch": 1.2896728515625e-05,
      "model_forward_time": 0.11499285697937012,
      "step": 2113
    },
    {
      "epoch": 1.2896728515625e-05,
      "step": 2113,
      "training_step_time": 0.37306904792785645
    },
    {
      "epoch": 1.290283203125e-05,
      "model_forward_time": 0.11456060409545898,
      "step": 2114
    },
    {
      "epoch": 1.290283203125e-05,
      "step": 2114,
      "training_step_time": 0.41495680809020996
    },
    {
      "epoch": 1.2908935546875e-05,
      "model_forward_time": 0.11512541770935059,
      "step": 2115
    },
    {
      "epoch": 1.2908935546875e-05,
      "step": 2115,
      "training_step_time": 0.4312326908111572
    },
    {
      "epoch": 1.29150390625e-05,
      "model_forward_time": 0.11583971977233887,
      "step": 2116
    },
    {
      "epoch": 1.29150390625e-05,
      "step": 2116,
      "training_step_time": 0.45360708236694336
    },
    {
      "epoch": 1.2921142578125e-05,
      "model_forward_time": 0.11521434783935547,
      "step": 2117
    },
    {
      "epoch": 1.2921142578125e-05,
      "step": 2117,
      "training_step_time": 0.9577634334564209
    },
    {
      "epoch": 1.292724609375e-05,
      "model_forward_time": 0.11526274681091309,
      "step": 2118
    },
    {
      "epoch": 1.292724609375e-05,
      "step": 2118,
      "training_step_time": 0.3958089351654053
    },
    {
      "epoch": 1.2933349609375e-05,
      "model_forward_time": 0.11548471450805664,
      "step": 2119
    },
    {
      "epoch": 1.2933349609375e-05,
      "step": 2119,
      "training_step_time": 0.4746367931365967
    },
    {
      "epoch": 1.2939453125e-05,
      "grad_norm": 0.5356715321540833,
      "learning_rate": 7.066666666666667e-05,
      "loss": 0.1399,
      "step": 2120
    },
    {
      "epoch": 1.2939453125e-05,
      "model_forward_time": 0.11463069915771484,
      "step": 2120
    },
    {
      "epoch": 1.2939453125e-05,
      "step": 2120,
      "training_step_time": 0.38492417335510254
    },
    {
      "epoch": 1.2945556640625e-05,
      "model_forward_time": 0.11475324630737305,
      "step": 2121
    },
    {
      "epoch": 1.2945556640625e-05,
      "step": 2121,
      "training_step_time": 0.42920970916748047
    },
    {
      "epoch": 1.295166015625e-05,
      "model_forward_time": 0.11490654945373535,
      "step": 2122
    },
    {
      "epoch": 1.295166015625e-05,
      "step": 2122,
      "training_step_time": 0.38985443115234375
    },
    {
      "epoch": 1.2957763671875e-05,
      "model_forward_time": 0.11576008796691895,
      "step": 2123
    },
    {
      "epoch": 1.2957763671875e-05,
      "step": 2123,
      "training_step_time": 0.5020542144775391
    },
    {
      "epoch": 1.29638671875e-05,
      "model_forward_time": 0.11817455291748047,
      "step": 2124
    },
    {
      "epoch": 1.29638671875e-05,
      "step": 2124,
      "training_step_time": 0.3913555145263672
    },
    {
      "epoch": 1.2969970703125e-05,
      "model_forward_time": 0.11980223655700684,
      "step": 2125
    },
    {
      "epoch": 1.2969970703125e-05,
      "step": 2125,
      "training_step_time": 0.3908240795135498
    },
    {
      "epoch": 1.297607421875e-05,
      "model_forward_time": 0.13777542114257812,
      "step": 2126
    },
    {
      "epoch": 1.297607421875e-05,
      "step": 2126,
      "training_step_time": 0.4017035961151123
    },
    {
      "epoch": 1.2982177734375e-05,
      "model_forward_time": 0.11723852157592773,
      "step": 2127
    },
    {
      "epoch": 1.2982177734375e-05,
      "step": 2127,
      "training_step_time": 0.39318013191223145
    },
    {
      "epoch": 1.298828125e-05,
      "model_forward_time": 0.11804032325744629,
      "step": 2128
    },
    {
      "epoch": 1.298828125e-05,
      "step": 2128,
      "training_step_time": 0.4335310459136963
    },
    {
      "epoch": 1.2994384765625e-05,
      "model_forward_time": 0.11637759208679199,
      "step": 2129
    },
    {
      "epoch": 1.2994384765625e-05,
      "step": 2129,
      "training_step_time": 0.8732728958129883
    },
    {
      "epoch": 1.300048828125e-05,
      "grad_norm": 0.8602519035339355,
      "learning_rate": 7.1e-05,
      "loss": 0.1504,
      "step": 2130
    },
    {
      "epoch": 1.300048828125e-05,
      "model_forward_time": 0.11561036109924316,
      "step": 2130
    },
    {
      "epoch": 1.300048828125e-05,
      "step": 2130,
      "training_step_time": 0.3673276901245117
    },
    {
      "epoch": 1.3006591796875e-05,
      "model_forward_time": 0.1146700382232666,
      "step": 2131
    },
    {
      "epoch": 1.3006591796875e-05,
      "step": 2131,
      "training_step_time": 0.4267299175262451
    },
    {
      "epoch": 1.30126953125e-05,
      "model_forward_time": 0.11493778228759766,
      "step": 2132
    },
    {
      "epoch": 1.30126953125e-05,
      "step": 2132,
      "training_step_time": 0.45365047454833984
    },
    {
      "epoch": 1.3018798828125e-05,
      "model_forward_time": 0.11499142646789551,
      "step": 2133
    },
    {
      "epoch": 1.3018798828125e-05,
      "step": 2133,
      "training_step_time": 0.3912825584411621
    },
    {
      "epoch": 1.302490234375e-05,
      "model_forward_time": 0.11457037925720215,
      "step": 2134
    },
    {
      "epoch": 1.302490234375e-05,
      "step": 2134,
      "training_step_time": 0.43122005462646484
    },
    {
      "epoch": 1.3031005859375e-05,
      "model_forward_time": 0.11544656753540039,
      "step": 2135
    },
    {
      "epoch": 1.3031005859375e-05,
      "step": 2135,
      "training_step_time": 0.6032135486602783
    },
    {
      "epoch": 1.3037109375e-05,
      "model_forward_time": 0.11512303352355957,
      "step": 2136
    },
    {
      "epoch": 1.3037109375e-05,
      "step": 2136,
      "training_step_time": 0.3956320285797119
    },
    {
      "epoch": 1.3043212890625e-05,
      "model_forward_time": 0.11540770530700684,
      "step": 2137
    },
    {
      "epoch": 1.3043212890625e-05,
      "step": 2137,
      "training_step_time": 0.39179563522338867
    },
    {
      "epoch": 1.304931640625e-05,
      "model_forward_time": 0.11658048629760742,
      "step": 2138
    },
    {
      "epoch": 1.304931640625e-05,
      "step": 2138,
      "training_step_time": 0.39955782890319824
    },
    {
      "epoch": 1.3055419921875e-05,
      "model_forward_time": 0.11881685256958008,
      "step": 2139
    },
    {
      "epoch": 1.3055419921875e-05,
      "step": 2139,
      "training_step_time": 0.41748619079589844
    },
    {
      "epoch": 1.30615234375e-05,
      "grad_norm": 0.4984835088253021,
      "learning_rate": 7.133333333333334e-05,
      "loss": 0.155,
      "step": 2140
    },
    {
      "epoch": 1.30615234375e-05,
      "model_forward_time": 0.11804914474487305,
      "step": 2140
    },
    {
      "epoch": 1.30615234375e-05,
      "step": 2140,
      "training_step_time": 0.4145545959472656
    },
    {
      "epoch": 1.3067626953125e-05,
      "model_forward_time": 0.11604571342468262,
      "step": 2141
    },
    {
      "epoch": 1.3067626953125e-05,
      "step": 2141,
      "training_step_time": 0.9733245372772217
    },
    {
      "epoch": 1.307373046875e-05,
      "model_forward_time": 0.11488938331604004,
      "step": 2142
    },
    {
      "epoch": 1.307373046875e-05,
      "step": 2142,
      "training_step_time": 0.4029982089996338
    },
    {
      "epoch": 1.3079833984375e-05,
      "model_forward_time": 0.11508655548095703,
      "step": 2143
    },
    {
      "epoch": 1.3079833984375e-05,
      "step": 2143,
      "training_step_time": 0.4717390537261963
    },
    {
      "epoch": 1.30859375e-05,
      "model_forward_time": 0.11595559120178223,
      "step": 2144
    },
    {
      "epoch": 1.30859375e-05,
      "step": 2144,
      "training_step_time": 0.39481234550476074
    },
    {
      "epoch": 1.3092041015625e-05,
      "model_forward_time": 0.11575841903686523,
      "step": 2145
    },
    {
      "epoch": 1.3092041015625e-05,
      "step": 2145,
      "training_step_time": 0.44320225715637207
    },
    {
      "epoch": 1.309814453125e-05,
      "model_forward_time": 0.11611437797546387,
      "step": 2146
    },
    {
      "epoch": 1.309814453125e-05,
      "step": 2146,
      "training_step_time": 0.47974300384521484
    },
    {
      "epoch": 1.3104248046875e-05,
      "model_forward_time": 0.11584258079528809,
      "step": 2147
    },
    {
      "epoch": 1.3104248046875e-05,
      "step": 2147,
      "training_step_time": 0.6087906360626221
    },
    {
      "epoch": 1.31103515625e-05,
      "model_forward_time": 0.11520862579345703,
      "step": 2148
    },
    {
      "epoch": 1.31103515625e-05,
      "step": 2148,
      "training_step_time": 0.39087533950805664
    },
    {
      "epoch": 1.3116455078125e-05,
      "model_forward_time": 0.1162557601928711,
      "step": 2149
    },
    {
      "epoch": 1.3116455078125e-05,
      "step": 2149,
      "training_step_time": 0.38810038566589355
    },
    {
      "epoch": 1.312255859375e-05,
      "grad_norm": 0.41887167096138,
      "learning_rate": 7.166666666666667e-05,
      "loss": 0.1587,
      "step": 2150
    },
    {
      "epoch": 1.312255859375e-05,
      "model_forward_time": 0.11495113372802734,
      "step": 2150
    },
    {
      "epoch": 1.312255859375e-05,
      "step": 2150,
      "training_step_time": 0.39579319953918457
    },
    {
      "epoch": 1.3128662109375e-05,
      "model_forward_time": 0.11539363861083984,
      "step": 2151
    },
    {
      "epoch": 1.3128662109375e-05,
      "step": 2151,
      "training_step_time": 0.40238475799560547
    },
    {
      "epoch": 1.3134765625e-05,
      "model_forward_time": 0.11547398567199707,
      "step": 2152
    },
    {
      "epoch": 1.3134765625e-05,
      "step": 2152,
      "training_step_time": 0.3996412754058838
    },
    {
      "epoch": 1.3140869140625e-05,
      "model_forward_time": 0.11661124229431152,
      "step": 2153
    },
    {
      "epoch": 1.3140869140625e-05,
      "step": 2153,
      "training_step_time": 0.7721433639526367
    },
    {
      "epoch": 1.314697265625e-05,
      "model_forward_time": 0.1155240535736084,
      "step": 2154
    },
    {
      "epoch": 1.314697265625e-05,
      "step": 2154,
      "training_step_time": 0.39993834495544434
    },
    {
      "epoch": 1.3153076171875e-05,
      "model_forward_time": 0.11481714248657227,
      "step": 2155
    },
    {
      "epoch": 1.3153076171875e-05,
      "step": 2155,
      "training_step_time": 0.42899465560913086
    },
    {
      "epoch": 1.31591796875e-05,
      "model_forward_time": 0.11549878120422363,
      "step": 2156
    },
    {
      "epoch": 1.31591796875e-05,
      "step": 2156,
      "training_step_time": 0.3940086364746094
    },
    {
      "epoch": 1.3165283203125e-05,
      "model_forward_time": 0.11487221717834473,
      "step": 2157
    },
    {
      "epoch": 1.3165283203125e-05,
      "step": 2157,
      "training_step_time": 0.407238245010376
    },
    {
      "epoch": 1.317138671875e-05,
      "model_forward_time": 0.11482048034667969,
      "step": 2158
    },
    {
      "epoch": 1.317138671875e-05,
      "step": 2158,
      "training_step_time": 0.5145001411437988
    },
    {
      "epoch": 1.3177490234375e-05,
      "model_forward_time": 0.11717629432678223,
      "step": 2159
    },
    {
      "epoch": 1.3177490234375e-05,
      "step": 2159,
      "training_step_time": 0.846268892288208
    },
    {
      "epoch": 1.318359375e-05,
      "grad_norm": 0.5998401641845703,
      "learning_rate": 7.2e-05,
      "loss": 0.1496,
      "step": 2160
    },
    {
      "epoch": 1.318359375e-05,
      "model_forward_time": 0.1197195053100586,
      "step": 2160
    },
    {
      "epoch": 1.318359375e-05,
      "step": 2160,
      "training_step_time": 0.4916374683380127
    },
    {
      "epoch": 1.3189697265625e-05,
      "model_forward_time": 0.11476373672485352,
      "step": 2161
    },
    {
      "epoch": 1.3189697265625e-05,
      "step": 2161,
      "training_step_time": 0.38442420959472656
    },
    {
      "epoch": 1.319580078125e-05,
      "model_forward_time": 0.11437129974365234,
      "step": 2162
    },
    {
      "epoch": 1.319580078125e-05,
      "step": 2162,
      "training_step_time": 0.393904447555542
    },
    {
      "epoch": 1.3201904296875e-05,
      "model_forward_time": 0.11424994468688965,
      "step": 2163
    },
    {
      "epoch": 1.3201904296875e-05,
      "step": 2163,
      "training_step_time": 0.3943028450012207
    },
    {
      "epoch": 1.32080078125e-05,
      "model_forward_time": 0.11453390121459961,
      "step": 2164
    },
    {
      "epoch": 1.32080078125e-05,
      "step": 2164,
      "training_step_time": 0.4293019771575928
    },
    {
      "epoch": 1.3214111328125e-05,
      "model_forward_time": 0.11626005172729492,
      "step": 2165
    },
    {
      "epoch": 1.3214111328125e-05,
      "step": 2165,
      "training_step_time": 0.4593956470489502
    },
    {
      "epoch": 1.322021484375e-05,
      "model_forward_time": 0.1156010627746582,
      "step": 2166
    },
    {
      "epoch": 1.322021484375e-05,
      "step": 2166,
      "training_step_time": 0.3906552791595459
    },
    {
      "epoch": 1.3226318359375e-05,
      "model_forward_time": 0.1148681640625,
      "step": 2167
    },
    {
      "epoch": 1.3226318359375e-05,
      "step": 2167,
      "training_step_time": 0.38448095321655273
    },
    {
      "epoch": 1.3232421875e-05,
      "model_forward_time": 0.11616253852844238,
      "step": 2168
    },
    {
      "epoch": 1.3232421875e-05,
      "step": 2168,
      "training_step_time": 0.40235209465026855
    },
    {
      "epoch": 1.3238525390625e-05,
      "model_forward_time": 0.11567306518554688,
      "step": 2169
    },
    {
      "epoch": 1.3238525390625e-05,
      "step": 2169,
      "training_step_time": 0.4857063293457031
    },
    {
      "epoch": 1.324462890625e-05,
      "grad_norm": 0.47781822085380554,
      "learning_rate": 7.233333333333335e-05,
      "loss": 0.1419,
      "step": 2170
    },
    {
      "epoch": 1.324462890625e-05,
      "model_forward_time": 0.11643862724304199,
      "step": 2170
    },
    {
      "epoch": 1.324462890625e-05,
      "step": 2170,
      "training_step_time": 0.47984886169433594
    },
    {
      "epoch": 1.3250732421875e-05,
      "model_forward_time": 0.1153557300567627,
      "step": 2171
    },
    {
      "epoch": 1.3250732421875e-05,
      "step": 2171,
      "training_step_time": 0.3946998119354248
    },
    {
      "epoch": 1.32568359375e-05,
      "model_forward_time": 0.11572957038879395,
      "step": 2172
    },
    {
      "epoch": 1.32568359375e-05,
      "step": 2172,
      "training_step_time": 0.43323659896850586
    },
    {
      "epoch": 1.3262939453125e-05,
      "model_forward_time": 0.1146845817565918,
      "step": 2173
    },
    {
      "epoch": 1.3262939453125e-05,
      "step": 2173,
      "training_step_time": 0.4464151859283447
    },
    {
      "epoch": 1.326904296875e-05,
      "model_forward_time": 0.11481785774230957,
      "step": 2174
    },
    {
      "epoch": 1.326904296875e-05,
      "step": 2174,
      "training_step_time": 0.4318883419036865
    },
    {
      "epoch": 1.3275146484375e-05,
      "model_forward_time": 0.11535072326660156,
      "step": 2175
    },
    {
      "epoch": 1.3275146484375e-05,
      "step": 2175,
      "training_step_time": 0.38861942291259766
    },
    {
      "epoch": 1.328125e-05,
      "model_forward_time": 0.11604881286621094,
      "step": 2176
    },
    {
      "epoch": 1.328125e-05,
      "step": 2176,
      "training_step_time": 0.3884449005126953
    },
    {
      "epoch": 1.3287353515625e-05,
      "model_forward_time": 0.11485075950622559,
      "step": 2177
    },
    {
      "epoch": 1.3287353515625e-05,
      "step": 2177,
      "training_step_time": 0.6962118148803711
    },
    {
      "epoch": 1.329345703125e-05,
      "model_forward_time": 0.11481928825378418,
      "step": 2178
    },
    {
      "epoch": 1.329345703125e-05,
      "step": 2178,
      "training_step_time": 0.4166419506072998
    },
    {
      "epoch": 1.3299560546875e-05,
      "model_forward_time": 0.11521267890930176,
      "step": 2179
    },
    {
      "epoch": 1.3299560546875e-05,
      "step": 2179,
      "training_step_time": 0.3888061046600342
    },
    {
      "epoch": 1.33056640625e-05,
      "grad_norm": 0.5060789585113525,
      "learning_rate": 7.266666666666667e-05,
      "loss": 0.1528,
      "step": 2180
    },
    {
      "epoch": 1.33056640625e-05,
      "model_forward_time": 0.11479568481445312,
      "step": 2180
    },
    {
      "epoch": 1.33056640625e-05,
      "step": 2180,
      "training_step_time": 0.3957359790802002
    },
    {
      "epoch": 1.3311767578125e-05,
      "model_forward_time": 0.11421346664428711,
      "step": 2181
    },
    {
      "epoch": 1.3311767578125e-05,
      "step": 2181,
      "training_step_time": 0.3903179168701172
    },
    {
      "epoch": 1.331787109375e-05,
      "model_forward_time": 0.11528587341308594,
      "step": 2182
    },
    {
      "epoch": 1.331787109375e-05,
      "step": 2182,
      "training_step_time": 0.4193532466888428
    },
    {
      "epoch": 1.3323974609375e-05,
      "model_forward_time": 0.11784124374389648,
      "step": 2183
    },
    {
      "epoch": 1.3323974609375e-05,
      "step": 2183,
      "training_step_time": 0.6880991458892822
    },
    {
      "epoch": 1.3330078125e-05,
      "model_forward_time": 0.11661577224731445,
      "step": 2184
    },
    {
      "epoch": 1.3330078125e-05,
      "step": 2184,
      "training_step_time": 0.42719340324401855
    },
    {
      "epoch": 1.3336181640625e-05,
      "model_forward_time": 0.11506962776184082,
      "step": 2185
    },
    {
      "epoch": 1.3336181640625e-05,
      "step": 2185,
      "training_step_time": 0.41416287422180176
    },
    {
      "epoch": 1.334228515625e-05,
      "model_forward_time": 0.1151740550994873,
      "step": 2186
    },
    {
      "epoch": 1.334228515625e-05,
      "step": 2186,
      "training_step_time": 0.4906003475189209
    },
    {
      "epoch": 1.3348388671875e-05,
      "model_forward_time": 0.11488008499145508,
      "step": 2187
    },
    {
      "epoch": 1.3348388671875e-05,
      "step": 2187,
      "training_step_time": 0.48882532119750977
    },
    {
      "epoch": 1.33544921875e-05,
      "model_forward_time": 0.11431217193603516,
      "step": 2188
    },
    {
      "epoch": 1.33544921875e-05,
      "step": 2188,
      "training_step_time": 0.40822410583496094
    },
    {
      "epoch": 1.3360595703125e-05,
      "model_forward_time": 0.11432790756225586,
      "step": 2189
    },
    {
      "epoch": 1.3360595703125e-05,
      "step": 2189,
      "training_step_time": 0.5510740280151367
    },
    {
      "epoch": 1.336669921875e-05,
      "grad_norm": 0.45794302225112915,
      "learning_rate": 7.3e-05,
      "loss": 0.138,
      "step": 2190
    },
    {
      "epoch": 1.336669921875e-05,
      "model_forward_time": 0.11482954025268555,
      "step": 2190
    },
    {
      "epoch": 1.336669921875e-05,
      "step": 2190,
      "training_step_time": 0.4521520137786865
    },
    {
      "epoch": 1.3372802734375e-05,
      "model_forward_time": 0.11426353454589844,
      "step": 2191
    },
    {
      "epoch": 1.3372802734375e-05,
      "step": 2191,
      "training_step_time": 0.42008423805236816
    },
    {
      "epoch": 1.337890625e-05,
      "model_forward_time": 0.11432218551635742,
      "step": 2192
    },
    {
      "epoch": 1.337890625e-05,
      "step": 2192,
      "training_step_time": 0.39509057998657227
    },
    {
      "epoch": 1.3385009765625e-05,
      "model_forward_time": 0.11469006538391113,
      "step": 2193
    },
    {
      "epoch": 1.3385009765625e-05,
      "step": 2193,
      "training_step_time": 0.3944971561431885
    },
    {
      "epoch": 1.339111328125e-05,
      "model_forward_time": 0.11559033393859863,
      "step": 2194
    },
    {
      "epoch": 1.339111328125e-05,
      "step": 2194,
      "training_step_time": 0.39039087295532227
    },
    {
      "epoch": 1.3397216796875e-05,
      "model_forward_time": 0.11538910865783691,
      "step": 2195
    },
    {
      "epoch": 1.3397216796875e-05,
      "step": 2195,
      "training_step_time": 0.6819915771484375
    },
    {
      "epoch": 1.34033203125e-05,
      "model_forward_time": 0.11521673202514648,
      "step": 2196
    },
    {
      "epoch": 1.34033203125e-05,
      "step": 2196,
      "training_step_time": 0.44937801361083984
    },
    {
      "epoch": 1.3409423828125e-05,
      "model_forward_time": 0.1148366928100586,
      "step": 2197
    },
    {
      "epoch": 1.3409423828125e-05,
      "step": 2197,
      "training_step_time": 0.4833207130432129
    },
    {
      "epoch": 1.341552734375e-05,
      "model_forward_time": 0.11601686477661133,
      "step": 2198
    },
    {
      "epoch": 1.341552734375e-05,
      "step": 2198,
      "training_step_time": 0.36697959899902344
    },
    {
      "epoch": 1.3421630859375e-05,
      "model_forward_time": 0.11487674713134766,
      "step": 2199
    },
    {
      "epoch": 1.3421630859375e-05,
      "step": 2199,
      "training_step_time": 0.47585463523864746
    },
    {
      "epoch": 1.3427734375e-05,
      "grad_norm": 0.43966543674468994,
      "learning_rate": 7.333333333333333e-05,
      "loss": 0.1438,
      "step": 2200
    },
    {
      "epoch": 1.3427734375e-05,
      "model_forward_time": 0.11559128761291504,
      "step": 2200
    },
    {
      "epoch": 1.3427734375e-05,
      "step": 2200,
      "training_step_time": 0.4161968231201172
    },
    {
      "epoch": 1.3433837890625e-05,
      "model_forward_time": 0.1150960922241211,
      "step": 2201
    },
    {
      "epoch": 1.3433837890625e-05,
      "step": 2201,
      "training_step_time": 0.4138631820678711
    },
    {
      "epoch": 1.343994140625e-05,
      "model_forward_time": 0.11456084251403809,
      "step": 2202
    },
    {
      "epoch": 1.343994140625e-05,
      "step": 2202,
      "training_step_time": 0.42847180366516113
    },
    {
      "epoch": 1.3446044921875e-05,
      "model_forward_time": 0.1147315502166748,
      "step": 2203
    },
    {
      "epoch": 1.3446044921875e-05,
      "step": 2203,
      "training_step_time": 0.3858463764190674
    },
    {
      "epoch": 1.34521484375e-05,
      "model_forward_time": 0.11469101905822754,
      "step": 2204
    },
    {
      "epoch": 1.34521484375e-05,
      "step": 2204,
      "training_step_time": 0.4345273971557617
    },
    {
      "epoch": 1.3458251953125e-05,
      "model_forward_time": 0.11550331115722656,
      "step": 2205
    },
    {
      "epoch": 1.3458251953125e-05,
      "step": 2205,
      "training_step_time": 0.3975648880004883
    },
    {
      "epoch": 1.346435546875e-05,
      "model_forward_time": 0.11598086357116699,
      "step": 2206
    },
    {
      "epoch": 1.346435546875e-05,
      "step": 2206,
      "training_step_time": 0.3931865692138672
    },
    {
      "epoch": 1.3470458984375e-05,
      "model_forward_time": 0.1158914566040039,
      "step": 2207
    },
    {
      "epoch": 1.3470458984375e-05,
      "step": 2207,
      "training_step_time": 0.6218745708465576
    },
    {
      "epoch": 1.34765625e-05,
      "model_forward_time": 0.1143653392791748,
      "step": 2208
    },
    {
      "epoch": 1.34765625e-05,
      "step": 2208,
      "training_step_time": 0.39103174209594727
    },
    {
      "epoch": 1.3482666015625e-05,
      "model_forward_time": 0.1150972843170166,
      "step": 2209
    },
    {
      "epoch": 1.3482666015625e-05,
      "step": 2209,
      "training_step_time": 0.3882169723510742
    },
    {
      "epoch": 1.348876953125e-05,
      "grad_norm": 0.31443387269973755,
      "learning_rate": 7.366666666666668e-05,
      "loss": 0.1407,
      "step": 2210
    },
    {
      "epoch": 1.348876953125e-05,
      "model_forward_time": 0.1151895523071289,
      "step": 2210
    },
    {
      "epoch": 1.348876953125e-05,
      "step": 2210,
      "training_step_time": 0.4418001174926758
    },
    {
      "epoch": 1.3494873046875e-05,
      "model_forward_time": 0.11532330513000488,
      "step": 2211
    },
    {
      "epoch": 1.3494873046875e-05,
      "step": 2211,
      "training_step_time": 0.4475734233856201
    },
    {
      "epoch": 1.35009765625e-05,
      "model_forward_time": 0.11473727226257324,
      "step": 2212
    },
    {
      "epoch": 1.35009765625e-05,
      "step": 2212,
      "training_step_time": 0.3729257583618164
    },
    {
      "epoch": 1.3507080078125e-05,
      "model_forward_time": 0.1149606704711914,
      "step": 2213
    },
    {
      "epoch": 1.3507080078125e-05,
      "step": 2213,
      "training_step_time": 0.6700565814971924
    },
    {
      "epoch": 1.351318359375e-05,
      "model_forward_time": 0.11443829536437988,
      "step": 2214
    },
    {
      "epoch": 1.351318359375e-05,
      "step": 2214,
      "training_step_time": 0.5042531490325928
    },
    {
      "epoch": 1.3519287109375e-05,
      "model_forward_time": 0.11406064033508301,
      "step": 2215
    },
    {
      "epoch": 1.3519287109375e-05,
      "step": 2215,
      "training_step_time": 0.407503604888916
    },
    {
      "epoch": 1.3525390625e-05,
      "model_forward_time": 0.11424899101257324,
      "step": 2216
    },
    {
      "epoch": 1.3525390625e-05,
      "step": 2216,
      "training_step_time": 0.3876938819885254
    },
    {
      "epoch": 1.3531494140625e-05,
      "model_forward_time": 0.11416912078857422,
      "step": 2217
    },
    {
      "epoch": 1.3531494140625e-05,
      "step": 2217,
      "training_step_time": 0.3974635601043701
    },
    {
      "epoch": 1.353759765625e-05,
      "model_forward_time": 0.11490368843078613,
      "step": 2218
    },
    {
      "epoch": 1.353759765625e-05,
      "step": 2218,
      "training_step_time": 0.3909029960632324
    },
    {
      "epoch": 1.3543701171875e-05,
      "model_forward_time": 0.11594367027282715,
      "step": 2219
    },
    {
      "epoch": 1.3543701171875e-05,
      "step": 2219,
      "training_step_time": 0.6063237190246582
    },
    {
      "epoch": 1.35498046875e-05,
      "grad_norm": 0.5427930951118469,
      "learning_rate": 7.4e-05,
      "loss": 0.1392,
      "step": 2220
    },
    {
      "epoch": 1.35498046875e-05,
      "model_forward_time": 0.11513185501098633,
      "step": 2220
    },
    {
      "epoch": 1.35498046875e-05,
      "step": 2220,
      "training_step_time": 0.3840799331665039
    },
    {
      "epoch": 1.3555908203125e-05,
      "model_forward_time": 0.1154336929321289,
      "step": 2221
    },
    {
      "epoch": 1.3555908203125e-05,
      "step": 2221,
      "training_step_time": 0.3962388038635254
    },
    {
      "epoch": 1.356201171875e-05,
      "model_forward_time": 0.11546754837036133,
      "step": 2222
    },
    {
      "epoch": 1.356201171875e-05,
      "step": 2222,
      "training_step_time": 0.38635826110839844
    },
    {
      "epoch": 1.3568115234375e-05,
      "model_forward_time": 0.11527466773986816,
      "step": 2223
    },
    {
      "epoch": 1.3568115234375e-05,
      "step": 2223,
      "training_step_time": 0.3955404758453369
    },
    {
      "epoch": 1.357421875e-05,
      "model_forward_time": 0.11495780944824219,
      "step": 2224
    },
    {
      "epoch": 1.357421875e-05,
      "step": 2224,
      "training_step_time": 0.49486613273620605
    },
    {
      "epoch": 1.3580322265625e-05,
      "model_forward_time": 0.11506271362304688,
      "step": 2225
    },
    {
      "epoch": 1.3580322265625e-05,
      "step": 2225,
      "training_step_time": 0.8324277400970459
    },
    {
      "epoch": 1.358642578125e-05,
      "model_forward_time": 0.11546134948730469,
      "step": 2226
    },
    {
      "epoch": 1.358642578125e-05,
      "step": 2226,
      "training_step_time": 0.4703376293182373
    },
    {
      "epoch": 1.3592529296875e-05,
      "model_forward_time": 0.11479020118713379,
      "step": 2227
    },
    {
      "epoch": 1.3592529296875e-05,
      "step": 2227,
      "training_step_time": 0.4546501636505127
    },
    {
      "epoch": 1.35986328125e-05,
      "model_forward_time": 0.11406874656677246,
      "step": 2228
    },
    {
      "epoch": 1.35986328125e-05,
      "step": 2228,
      "training_step_time": 0.43236398696899414
    },
    {
      "epoch": 1.3604736328125e-05,
      "model_forward_time": 0.11649227142333984,
      "step": 2229
    },
    {
      "epoch": 1.3604736328125e-05,
      "step": 2229,
      "training_step_time": 0.4027218818664551
    },
    {
      "epoch": 1.361083984375e-05,
      "grad_norm": 0.4510776400566101,
      "learning_rate": 7.433333333333333e-05,
      "loss": 0.1459,
      "step": 2230
    },
    {
      "epoch": 1.361083984375e-05,
      "model_forward_time": 0.11503958702087402,
      "step": 2230
    },
    {
      "epoch": 1.361083984375e-05,
      "step": 2230,
      "training_step_time": 0.38288283348083496
    },
    {
      "epoch": 1.3616943359375e-05,
      "model_forward_time": 0.11457586288452148,
      "step": 2231
    },
    {
      "epoch": 1.3616943359375e-05,
      "step": 2231,
      "training_step_time": 0.634864091873169
    },
    {
      "epoch": 1.3623046875e-05,
      "model_forward_time": 0.1148231029510498,
      "step": 2232
    },
    {
      "epoch": 1.3623046875e-05,
      "step": 2232,
      "training_step_time": 0.38959813117980957
    },
    {
      "epoch": 1.3629150390625e-05,
      "model_forward_time": 0.11525917053222656,
      "step": 2233
    },
    {
      "epoch": 1.3629150390625e-05,
      "step": 2233,
      "training_step_time": 0.38818955421447754
    },
    {
      "epoch": 1.363525390625e-05,
      "model_forward_time": 0.1155397891998291,
      "step": 2234
    },
    {
      "epoch": 1.363525390625e-05,
      "step": 2234,
      "training_step_time": 0.3971843719482422
    },
    {
      "epoch": 1.3641357421875e-05,
      "model_forward_time": 0.1148533821105957,
      "step": 2235
    },
    {
      "epoch": 1.3641357421875e-05,
      "step": 2235,
      "training_step_time": 0.40412378311157227
    },
    {
      "epoch": 1.36474609375e-05,
      "model_forward_time": 0.11524772644042969,
      "step": 2236
    },
    {
      "epoch": 1.36474609375e-05,
      "step": 2236,
      "training_step_time": 0.3864905834197998
    },
    {
      "epoch": 1.3653564453125e-05,
      "model_forward_time": 0.11515212059020996,
      "step": 2237
    },
    {
      "epoch": 1.3653564453125e-05,
      "step": 2237,
      "training_step_time": 0.7087998390197754
    },
    {
      "epoch": 1.365966796875e-05,
      "model_forward_time": 0.11529779434204102,
      "step": 2238
    },
    {
      "epoch": 1.365966796875e-05,
      "step": 2238,
      "training_step_time": 0.4128711223602295
    },
    {
      "epoch": 1.3665771484375e-05,
      "model_forward_time": 0.11516046524047852,
      "step": 2239
    },
    {
      "epoch": 1.3665771484375e-05,
      "step": 2239,
      "training_step_time": 0.36570000648498535
    },
    {
      "epoch": 1.3671875e-05,
      "grad_norm": 0.4043273329734802,
      "learning_rate": 7.466666666666667e-05,
      "loss": 0.1363,
      "step": 2240
    },
    {
      "epoch": 1.3671875e-05,
      "model_forward_time": 0.1158139705657959,
      "step": 2240
    },
    {
      "epoch": 1.3671875e-05,
      "step": 2240,
      "training_step_time": 0.4799318313598633
    },
    {
      "epoch": 1.3677978515625e-05,
      "model_forward_time": 0.11449837684631348,
      "step": 2241
    },
    {
      "epoch": 1.3677978515625e-05,
      "step": 2241,
      "training_step_time": 0.45772743225097656
    },
    {
      "epoch": 1.368408203125e-05,
      "model_forward_time": 0.11527371406555176,
      "step": 2242
    },
    {
      "epoch": 1.368408203125e-05,
      "step": 2242,
      "training_step_time": 0.4059586524963379
    },
    {
      "epoch": 1.3690185546875e-05,
      "model_forward_time": 0.11570143699645996,
      "step": 2243
    },
    {
      "epoch": 1.3690185546875e-05,
      "step": 2243,
      "training_step_time": 0.5995206832885742
    },
    {
      "epoch": 1.36962890625e-05,
      "model_forward_time": 0.11468935012817383,
      "step": 2244
    },
    {
      "epoch": 1.36962890625e-05,
      "step": 2244,
      "training_step_time": 0.3844630718231201
    },
    {
      "epoch": 1.3702392578125e-05,
      "model_forward_time": 0.11441469192504883,
      "step": 2245
    },
    {
      "epoch": 1.3702392578125e-05,
      "step": 2245,
      "training_step_time": 0.39490509033203125
    },
    {
      "epoch": 1.370849609375e-05,
      "model_forward_time": 0.11493802070617676,
      "step": 2246
    },
    {
      "epoch": 1.370849609375e-05,
      "step": 2246,
      "training_step_time": 0.3986680507659912
    },
    {
      "epoch": 1.3714599609375e-05,
      "model_forward_time": 0.11516356468200684,
      "step": 2247
    },
    {
      "epoch": 1.3714599609375e-05,
      "step": 2247,
      "training_step_time": 0.3956279754638672
    },
    {
      "epoch": 1.3720703125e-05,
      "model_forward_time": 0.11550641059875488,
      "step": 2248
    },
    {
      "epoch": 1.3720703125e-05,
      "step": 2248,
      "training_step_time": 0.39099669456481934
    },
    {
      "epoch": 1.3726806640625e-05,
      "model_forward_time": 0.11585760116577148,
      "step": 2249
    },
    {
      "epoch": 1.3726806640625e-05,
      "step": 2249,
      "training_step_time": 0.7619256973266602
    },
    {
      "epoch": 1.373291015625e-05,
      "grad_norm": 0.4419693648815155,
      "learning_rate": 7.500000000000001e-05,
      "loss": 0.1506,
      "step": 2250
    },
    {
      "epoch": 1.373291015625e-05,
      "model_forward_time": 0.11459970474243164,
      "step": 2250
    },
    {
      "epoch": 1.373291015625e-05,
      "step": 2250,
      "training_step_time": 0.42285990715026855
    },
    {
      "epoch": 1.3739013671875e-05,
      "model_forward_time": 0.11517572402954102,
      "step": 2251
    },
    {
      "epoch": 1.3739013671875e-05,
      "step": 2251,
      "training_step_time": 0.41747021675109863
    },
    {
      "epoch": 1.37451171875e-05,
      "model_forward_time": 0.11498785018920898,
      "step": 2252
    },
    {
      "epoch": 1.37451171875e-05,
      "step": 2252,
      "training_step_time": 0.4778921604156494
    },
    {
      "epoch": 1.3751220703125e-05,
      "model_forward_time": 0.11473798751831055,
      "step": 2253
    },
    {
      "epoch": 1.3751220703125e-05,
      "step": 2253,
      "training_step_time": 0.49335741996765137
    },
    {
      "epoch": 1.375732421875e-05,
      "model_forward_time": 0.11496233940124512,
      "step": 2254
    },
    {
      "epoch": 1.375732421875e-05,
      "step": 2254,
      "training_step_time": 0.469710111618042
    },
    {
      "epoch": 1.3763427734375e-05,
      "model_forward_time": 0.11528563499450684,
      "step": 2255
    },
    {
      "epoch": 1.3763427734375e-05,
      "step": 2255,
      "training_step_time": 0.5120131969451904
    },
    {
      "epoch": 1.376953125e-05,
      "model_forward_time": 0.11454010009765625,
      "step": 2256
    },
    {
      "epoch": 1.376953125e-05,
      "step": 2256,
      "training_step_time": 0.3895537853240967
    },
    {
      "epoch": 1.3775634765625e-05,
      "model_forward_time": 0.1146090030670166,
      "step": 2257
    },
    {
      "epoch": 1.3775634765625e-05,
      "step": 2257,
      "training_step_time": 0.3876535892486572
    },
    {
      "epoch": 1.378173828125e-05,
      "model_forward_time": 0.11494708061218262,
      "step": 2258
    },
    {
      "epoch": 1.378173828125e-05,
      "step": 2258,
      "training_step_time": 0.39002060890197754
    },
    {
      "epoch": 1.3787841796875e-05,
      "model_forward_time": 0.11551713943481445,
      "step": 2259
    },
    {
      "epoch": 1.3787841796875e-05,
      "step": 2259,
      "training_step_time": 0.39716053009033203
    },
    {
      "epoch": 1.37939453125e-05,
      "grad_norm": 0.3534468710422516,
      "learning_rate": 7.533333333333334e-05,
      "loss": 0.146,
      "step": 2260
    },
    {
      "epoch": 1.37939453125e-05,
      "model_forward_time": 0.11671733856201172,
      "step": 2260
    },
    {
      "epoch": 1.37939453125e-05,
      "step": 2260,
      "training_step_time": 0.3680267333984375
    },
    {
      "epoch": 1.3800048828125e-05,
      "model_forward_time": 0.11545085906982422,
      "step": 2261
    },
    {
      "epoch": 1.3800048828125e-05,
      "step": 2261,
      "training_step_time": 0.7995553016662598
    },
    {
      "epoch": 1.380615234375e-05,
      "model_forward_time": 0.11509847640991211,
      "step": 2262
    },
    {
      "epoch": 1.380615234375e-05,
      "step": 2262,
      "training_step_time": 0.38672518730163574
    },
    {
      "epoch": 1.3812255859375e-05,
      "model_forward_time": 0.11505556106567383,
      "step": 2263
    },
    {
      "epoch": 1.3812255859375e-05,
      "step": 2263,
      "training_step_time": 0.397052526473999
    },
    {
      "epoch": 1.3818359375e-05,
      "model_forward_time": 0.11469578742980957,
      "step": 2264
    },
    {
      "epoch": 1.3818359375e-05,
      "step": 2264,
      "training_step_time": 0.39568305015563965
    },
    {
      "epoch": 1.3824462890625e-05,
      "model_forward_time": 0.11446666717529297,
      "step": 2265
    },
    {
      "epoch": 1.3824462890625e-05,
      "step": 2265,
      "training_step_time": 0.427107572555542
    },
    {
      "epoch": 1.383056640625e-05,
      "model_forward_time": 0.11529231071472168,
      "step": 2266
    },
    {
      "epoch": 1.383056640625e-05,
      "step": 2266,
      "training_step_time": 0.4742133617401123
    },
    {
      "epoch": 1.3836669921875e-05,
      "model_forward_time": 0.11518597602844238,
      "step": 2267
    },
    {
      "epoch": 1.3836669921875e-05,
      "step": 2267,
      "training_step_time": 0.7763869762420654
    },
    {
      "epoch": 1.38427734375e-05,
      "model_forward_time": 0.11470293998718262,
      "step": 2268
    },
    {
      "epoch": 1.38427734375e-05,
      "step": 2268,
      "training_step_time": 0.4215278625488281
    },
    {
      "epoch": 1.3848876953125e-05,
      "model_forward_time": 0.11622333526611328,
      "step": 2269
    },
    {
      "epoch": 1.3848876953125e-05,
      "step": 2269,
      "training_step_time": 0.3836650848388672
    },
    {
      "epoch": 1.385498046875e-05,
      "grad_norm": 0.41389721632003784,
      "learning_rate": 7.566666666666667e-05,
      "loss": 0.1386,
      "step": 2270
    },
    {
      "epoch": 1.385498046875e-05,
      "model_forward_time": 0.11428046226501465,
      "step": 2270
    },
    {
      "epoch": 1.385498046875e-05,
      "step": 2270,
      "training_step_time": 0.38016319274902344
    },
    {
      "epoch": 1.3861083984375e-05,
      "model_forward_time": 0.1143035888671875,
      "step": 2271
    },
    {
      "epoch": 1.3861083984375e-05,
      "step": 2271,
      "training_step_time": 0.3932220935821533
    },
    {
      "epoch": 1.38671875e-05,
      "model_forward_time": 0.11428499221801758,
      "step": 2272
    },
    {
      "epoch": 1.38671875e-05,
      "step": 2272,
      "training_step_time": 0.3827078342437744
    },
    {
      "epoch": 1.3873291015625e-05,
      "model_forward_time": 0.11541509628295898,
      "step": 2273
    },
    {
      "epoch": 1.3873291015625e-05,
      "step": 2273,
      "training_step_time": 0.7858397960662842
    },
    {
      "epoch": 1.387939453125e-05,
      "model_forward_time": 0.11562800407409668,
      "step": 2274
    },
    {
      "epoch": 1.387939453125e-05,
      "step": 2274,
      "training_step_time": 0.38822174072265625
    },
    {
      "epoch": 1.3885498046875e-05,
      "model_forward_time": 0.11529254913330078,
      "step": 2275
    },
    {
      "epoch": 1.3885498046875e-05,
      "step": 2275,
      "training_step_time": 0.39064645767211914
    },
    {
      "epoch": 1.38916015625e-05,
      "model_forward_time": 0.11421632766723633,
      "step": 2276
    },
    {
      "epoch": 1.38916015625e-05,
      "step": 2276,
      "training_step_time": 0.39086294174194336
    },
    {
      "epoch": 1.3897705078125e-05,
      "model_forward_time": 0.11468815803527832,
      "step": 2277
    },
    {
      "epoch": 1.3897705078125e-05,
      "step": 2277,
      "training_step_time": 0.39189887046813965
    },
    {
      "epoch": 1.390380859375e-05,
      "model_forward_time": 0.11468958854675293,
      "step": 2278
    },
    {
      "epoch": 1.390380859375e-05,
      "step": 2278,
      "training_step_time": 0.4024240970611572
    },
    {
      "epoch": 1.3909912109375e-05,
      "model_forward_time": 0.11529088020324707,
      "step": 2279
    },
    {
      "epoch": 1.3909912109375e-05,
      "step": 2279,
      "training_step_time": 0.711083173751831
    },
    {
      "epoch": 1.3916015625e-05,
      "grad_norm": 0.4570159614086151,
      "learning_rate": 7.6e-05,
      "loss": 0.1538,
      "step": 2280
    },
    {
      "epoch": 1.3916015625e-05,
      "model_forward_time": 0.11590218544006348,
      "step": 2280
    },
    {
      "epoch": 1.3916015625e-05,
      "step": 2280,
      "training_step_time": 0.48742008209228516
    },
    {
      "epoch": 1.3922119140625e-05,
      "model_forward_time": 0.1164097785949707,
      "step": 2281
    },
    {
      "epoch": 1.3922119140625e-05,
      "step": 2281,
      "training_step_time": 0.45826148986816406
    },
    {
      "epoch": 1.392822265625e-05,
      "model_forward_time": 0.11548066139221191,
      "step": 2282
    },
    {
      "epoch": 1.392822265625e-05,
      "step": 2282,
      "training_step_time": 0.3875606060028076
    },
    {
      "epoch": 1.3934326171875e-05,
      "model_forward_time": 0.11506271362304688,
      "step": 2283
    },
    {
      "epoch": 1.3934326171875e-05,
      "step": 2283,
      "training_step_time": 0.3907806873321533
    },
    {
      "epoch": 1.39404296875e-05,
      "model_forward_time": 0.1145944595336914,
      "step": 2284
    },
    {
      "epoch": 1.39404296875e-05,
      "step": 2284,
      "training_step_time": 0.38116025924682617
    },
    {
      "epoch": 1.3946533203125e-05,
      "model_forward_time": 0.1156318187713623,
      "step": 2285
    },
    {
      "epoch": 1.3946533203125e-05,
      "step": 2285,
      "training_step_time": 0.47736597061157227
    },
    {
      "epoch": 1.395263671875e-05,
      "model_forward_time": 0.11520099639892578,
      "step": 2286
    },
    {
      "epoch": 1.395263671875e-05,
      "step": 2286,
      "training_step_time": 0.3986058235168457
    },
    {
      "epoch": 1.3958740234375e-05,
      "model_forward_time": 0.11546635627746582,
      "step": 2287
    },
    {
      "epoch": 1.3958740234375e-05,
      "step": 2287,
      "training_step_time": 0.39321088790893555
    },
    {
      "epoch": 1.396484375e-05,
      "model_forward_time": 0.11620283126831055,
      "step": 2288
    },
    {
      "epoch": 1.396484375e-05,
      "step": 2288,
      "training_step_time": 0.3996281623840332
    },
    {
      "epoch": 1.3970947265625e-05,
      "model_forward_time": 0.11557531356811523,
      "step": 2289
    },
    {
      "epoch": 1.3970947265625e-05,
      "step": 2289,
      "training_step_time": 0.39532899856567383
    },
    {
      "epoch": 1.397705078125e-05,
      "grad_norm": 0.40584173798561096,
      "learning_rate": 7.633333333333334e-05,
      "loss": 0.1403,
      "step": 2290
    },
    {
      "epoch": 1.397705078125e-05,
      "model_forward_time": 0.11519885063171387,
      "step": 2290
    },
    {
      "epoch": 1.397705078125e-05,
      "step": 2290,
      "training_step_time": 0.39316558837890625
    },
    {
      "epoch": 1.3983154296875e-05,
      "model_forward_time": 0.11491084098815918,
      "step": 2291
    },
    {
      "epoch": 1.3983154296875e-05,
      "step": 2291,
      "training_step_time": 0.9567945003509521
    },
    {
      "epoch": 1.39892578125e-05,
      "model_forward_time": 0.11418557167053223,
      "step": 2292
    },
    {
      "epoch": 1.39892578125e-05,
      "step": 2292,
      "training_step_time": 0.4893956184387207
    },
    {
      "epoch": 1.3995361328125e-05,
      "model_forward_time": 0.11510968208312988,
      "step": 2293
    },
    {
      "epoch": 1.3995361328125e-05,
      "step": 2293,
      "training_step_time": 0.4075756072998047
    },
    {
      "epoch": 1.400146484375e-05,
      "model_forward_time": 0.11467671394348145,
      "step": 2294
    },
    {
      "epoch": 1.400146484375e-05,
      "step": 2294,
      "training_step_time": 0.41927170753479004
    },
    {
      "epoch": 1.4007568359375e-05,
      "model_forward_time": 0.11331439018249512,
      "step": 2295
    },
    {
      "epoch": 1.4007568359375e-05,
      "step": 2295,
      "training_step_time": 0.39655065536499023
    },
    {
      "epoch": 1.4013671875e-05,
      "model_forward_time": 0.11520147323608398,
      "step": 2296
    },
    {
      "epoch": 1.4013671875e-05,
      "step": 2296,
      "training_step_time": 0.38153553009033203
    },
    {
      "epoch": 1.4019775390625e-05,
      "model_forward_time": 0.11455607414245605,
      "step": 2297
    },
    {
      "epoch": 1.4019775390625e-05,
      "step": 2297,
      "training_step_time": 0.38457250595092773
    },
    {
      "epoch": 1.402587890625e-05,
      "model_forward_time": 0.11474347114562988,
      "step": 2298
    },
    {
      "epoch": 1.402587890625e-05,
      "step": 2298,
      "training_step_time": 0.39541077613830566
    },
    {
      "epoch": 1.4031982421875e-05,
      "model_forward_time": 0.11502671241760254,
      "step": 2299
    },
    {
      "epoch": 1.4031982421875e-05,
      "step": 2299,
      "training_step_time": 0.396010160446167
    },
    {
      "epoch": 1.40380859375e-05,
      "grad_norm": 0.5737215280532837,
      "learning_rate": 7.666666666666667e-05,
      "loss": 0.132,
      "step": 2300
    },
    {
      "epoch": 1.40380859375e-05,
      "model_forward_time": 0.11491751670837402,
      "step": 2300
    },
    {
      "epoch": 1.40380859375e-05,
      "step": 2300,
      "training_step_time": 0.40188169479370117
    },
    {
      "epoch": 1.4044189453125e-05,
      "model_forward_time": 0.11566781997680664,
      "step": 2301
    },
    {
      "epoch": 1.4044189453125e-05,
      "step": 2301,
      "training_step_time": 0.4051632881164551
    },
    {
      "epoch": 1.405029296875e-05,
      "model_forward_time": 0.11513805389404297,
      "step": 2302
    },
    {
      "epoch": 1.405029296875e-05,
      "step": 2302,
      "training_step_time": 0.4057326316833496
    },
    {
      "epoch": 1.4056396484375e-05,
      "model_forward_time": 0.11562681198120117,
      "step": 2303
    },
    {
      "epoch": 1.4056396484375e-05,
      "step": 2303,
      "training_step_time": 1.0108113288879395
    },
    {
      "epoch": 1.40625e-05,
      "model_forward_time": 0.11411404609680176,
      "step": 2304
    },
    {
      "epoch": 1.40625e-05,
      "step": 2304,
      "training_step_time": 0.3990046977996826
    },
    {
      "epoch": 1.4068603515625e-05,
      "model_forward_time": 0.11420321464538574,
      "step": 2305
    },
    {
      "epoch": 1.4068603515625e-05,
      "step": 2305,
      "training_step_time": 0.4214644432067871
    },
    {
      "epoch": 1.407470703125e-05,
      "model_forward_time": 0.11443758010864258,
      "step": 2306
    },
    {
      "epoch": 1.407470703125e-05,
      "step": 2306,
      "training_step_time": 0.3955075740814209
    },
    {
      "epoch": 1.4080810546875e-05,
      "model_forward_time": 0.1139218807220459,
      "step": 2307
    },
    {
      "epoch": 1.4080810546875e-05,
      "step": 2307,
      "training_step_time": 0.41934800148010254
    },
    {
      "epoch": 1.40869140625e-05,
      "model_forward_time": 0.11385750770568848,
      "step": 2308
    },
    {
      "epoch": 1.40869140625e-05,
      "step": 2308,
      "training_step_time": 0.46123552322387695
    },
    {
      "epoch": 1.4093017578125e-05,
      "model_forward_time": 0.11594820022583008,
      "step": 2309
    },
    {
      "epoch": 1.4093017578125e-05,
      "step": 2309,
      "training_step_time": 0.752544641494751
    },
    {
      "epoch": 1.409912109375e-05,
      "grad_norm": 0.5840413570404053,
      "learning_rate": 7.7e-05,
      "loss": 0.1446,
      "step": 2310
    },
    {
      "epoch": 1.409912109375e-05,
      "model_forward_time": 0.1157674789428711,
      "step": 2310
    },
    {
      "epoch": 1.409912109375e-05,
      "step": 2310,
      "training_step_time": 0.39667797088623047
    },
    {
      "epoch": 1.4105224609375e-05,
      "model_forward_time": 0.11410808563232422,
      "step": 2311
    },
    {
      "epoch": 1.4105224609375e-05,
      "step": 2311,
      "training_step_time": 0.3942134380340576
    },
    {
      "epoch": 1.4111328125e-05,
      "model_forward_time": 0.11546158790588379,
      "step": 2312
    },
    {
      "epoch": 1.4111328125e-05,
      "step": 2312,
      "training_step_time": 0.390303373336792
    },
    {
      "epoch": 1.4117431640625e-05,
      "model_forward_time": 0.11454224586486816,
      "step": 2313
    },
    {
      "epoch": 1.4117431640625e-05,
      "step": 2313,
      "training_step_time": 0.3963737487792969
    },
    {
      "epoch": 1.412353515625e-05,
      "model_forward_time": 0.11458349227905273,
      "step": 2314
    },
    {
      "epoch": 1.412353515625e-05,
      "step": 2314,
      "training_step_time": 0.38530778884887695
    },
    {
      "epoch": 1.4129638671875e-05,
      "model_forward_time": 0.1154332160949707,
      "step": 2315
    },
    {
      "epoch": 1.4129638671875e-05,
      "step": 2315,
      "training_step_time": 0.8724143505096436
    },
    {
      "epoch": 1.41357421875e-05,
      "model_forward_time": 0.1148219108581543,
      "step": 2316
    },
    {
      "epoch": 1.41357421875e-05,
      "step": 2316,
      "training_step_time": 0.398754358291626
    },
    {
      "epoch": 1.4141845703125e-05,
      "model_forward_time": 0.11438298225402832,
      "step": 2317
    },
    {
      "epoch": 1.4141845703125e-05,
      "step": 2317,
      "training_step_time": 0.3837127685546875
    },
    {
      "epoch": 1.414794921875e-05,
      "model_forward_time": 0.11477780342102051,
      "step": 2318
    },
    {
      "epoch": 1.414794921875e-05,
      "step": 2318,
      "training_step_time": 0.39270758628845215
    },
    {
      "epoch": 1.4154052734375e-05,
      "model_forward_time": 0.11414098739624023,
      "step": 2319
    },
    {
      "epoch": 1.4154052734375e-05,
      "step": 2319,
      "training_step_time": 0.44477415084838867
    },
    {
      "epoch": 1.416015625e-05,
      "grad_norm": 0.607746422290802,
      "learning_rate": 7.733333333333333e-05,
      "loss": 0.1341,
      "step": 2320
    },
    {
      "epoch": 1.416015625e-05,
      "model_forward_time": 0.11419343948364258,
      "step": 2320
    },
    {
      "epoch": 1.416015625e-05,
      "step": 2320,
      "training_step_time": 0.4808671474456787
    },
    {
      "epoch": 1.4166259765625e-05,
      "model_forward_time": 0.11561441421508789,
      "step": 2321
    },
    {
      "epoch": 1.4166259765625e-05,
      "step": 2321,
      "training_step_time": 0.5588493347167969
    },
    {
      "epoch": 1.417236328125e-05,
      "model_forward_time": 0.11511611938476562,
      "step": 2322
    },
    {
      "epoch": 1.417236328125e-05,
      "step": 2322,
      "training_step_time": 0.4041023254394531
    },
    {
      "epoch": 1.4178466796875e-05,
      "model_forward_time": 0.11492180824279785,
      "step": 2323
    },
    {
      "epoch": 1.4178466796875e-05,
      "step": 2323,
      "training_step_time": 0.4161515235900879
    },
    {
      "epoch": 1.41845703125e-05,
      "model_forward_time": 0.11436200141906738,
      "step": 2324
    },
    {
      "epoch": 1.41845703125e-05,
      "step": 2324,
      "training_step_time": 0.41008830070495605
    },
    {
      "epoch": 1.4190673828125e-05,
      "model_forward_time": 0.11473441123962402,
      "step": 2325
    },
    {
      "epoch": 1.4190673828125e-05,
      "step": 2325,
      "training_step_time": 0.39144301414489746
    },
    {
      "epoch": 1.419677734375e-05,
      "model_forward_time": 0.11422562599182129,
      "step": 2326
    },
    {
      "epoch": 1.419677734375e-05,
      "step": 2326,
      "training_step_time": 0.40181851387023926
    },
    {
      "epoch": 1.4202880859375e-05,
      "model_forward_time": 0.1151123046875,
      "step": 2327
    },
    {
      "epoch": 1.4202880859375e-05,
      "step": 2327,
      "training_step_time": 0.686795711517334
    },
    {
      "epoch": 1.4208984375e-05,
      "model_forward_time": 0.11400890350341797,
      "step": 2328
    },
    {
      "epoch": 1.4208984375e-05,
      "step": 2328,
      "training_step_time": 0.3817167282104492
    },
    {
      "epoch": 1.4215087890625e-05,
      "model_forward_time": 0.11454892158508301,
      "step": 2329
    },
    {
      "epoch": 1.4215087890625e-05,
      "step": 2329,
      "training_step_time": 0.4070861339569092
    },
    {
      "epoch": 1.422119140625e-05,
      "grad_norm": 0.5959402322769165,
      "learning_rate": 7.766666666666667e-05,
      "loss": 0.1508,
      "step": 2330
    },
    {
      "epoch": 1.422119140625e-05,
      "model_forward_time": 0.11378169059753418,
      "step": 2330
    },
    {
      "epoch": 1.422119140625e-05,
      "step": 2330,
      "training_step_time": 0.42420530319213867
    },
    {
      "epoch": 1.4227294921875e-05,
      "model_forward_time": 0.11472654342651367,
      "step": 2331
    },
    {
      "epoch": 1.4227294921875e-05,
      "step": 2331,
      "training_step_time": 0.3923075199127197
    },
    {
      "epoch": 1.42333984375e-05,
      "model_forward_time": 0.11382055282592773,
      "step": 2332
    },
    {
      "epoch": 1.42333984375e-05,
      "step": 2332,
      "training_step_time": 0.4669320583343506
    },
    {
      "epoch": 1.4239501953125e-05,
      "model_forward_time": 0.11511659622192383,
      "step": 2333
    },
    {
      "epoch": 1.4239501953125e-05,
      "step": 2333,
      "training_step_time": 0.7492408752441406
    },
    {
      "epoch": 1.424560546875e-05,
      "model_forward_time": 0.11411333084106445,
      "step": 2334
    },
    {
      "epoch": 1.424560546875e-05,
      "step": 2334,
      "training_step_time": 0.46096062660217285
    },
    {
      "epoch": 1.4251708984375e-05,
      "model_forward_time": 0.1146843433380127,
      "step": 2335
    },
    {
      "epoch": 1.4251708984375e-05,
      "step": 2335,
      "training_step_time": 0.4358551502227783
    },
    {
      "epoch": 1.42578125e-05,
      "model_forward_time": 0.11531758308410645,
      "step": 2336
    },
    {
      "epoch": 1.42578125e-05,
      "step": 2336,
      "training_step_time": 0.45780324935913086
    },
    {
      "epoch": 1.4263916015625e-05,
      "model_forward_time": 0.11458230018615723,
      "step": 2337
    },
    {
      "epoch": 1.4263916015625e-05,
      "step": 2337,
      "training_step_time": 0.38405847549438477
    },
    {
      "epoch": 1.427001953125e-05,
      "model_forward_time": 0.11420035362243652,
      "step": 2338
    },
    {
      "epoch": 1.427001953125e-05,
      "step": 2338,
      "training_step_time": 0.39426231384277344
    },
    {
      "epoch": 1.4276123046875e-05,
      "model_forward_time": 0.11638832092285156,
      "step": 2339
    },
    {
      "epoch": 1.4276123046875e-05,
      "step": 2339,
      "training_step_time": 0.6541035175323486
    },
    {
      "epoch": 1.42822265625e-05,
      "grad_norm": 0.6173917055130005,
      "learning_rate": 7.800000000000001e-05,
      "loss": 0.1399,
      "step": 2340
    },
    {
      "epoch": 1.42822265625e-05,
      "model_forward_time": 0.1148078441619873,
      "step": 2340
    },
    {
      "epoch": 1.42822265625e-05,
      "step": 2340,
      "training_step_time": 0.39679956436157227
    },
    {
      "epoch": 1.4288330078125e-05,
      "model_forward_time": 0.11466097831726074,
      "step": 2341
    },
    {
      "epoch": 1.4288330078125e-05,
      "step": 2341,
      "training_step_time": 0.43271446228027344
    },
    {
      "epoch": 1.429443359375e-05,
      "model_forward_time": 0.11513328552246094,
      "step": 2342
    },
    {
      "epoch": 1.429443359375e-05,
      "step": 2342,
      "training_step_time": 0.4033064842224121
    },
    {
      "epoch": 1.4300537109375e-05,
      "model_forward_time": 0.11507821083068848,
      "step": 2343
    },
    {
      "epoch": 1.4300537109375e-05,
      "step": 2343,
      "training_step_time": 0.3965115547180176
    },
    {
      "epoch": 1.4306640625e-05,
      "model_forward_time": 0.1148681640625,
      "step": 2344
    },
    {
      "epoch": 1.4306640625e-05,
      "step": 2344,
      "training_step_time": 0.3868739604949951
    },
    {
      "epoch": 1.4312744140625e-05,
      "model_forward_time": 0.11506366729736328,
      "step": 2345
    },
    {
      "epoch": 1.4312744140625e-05,
      "step": 2345,
      "training_step_time": 0.7370791435241699
    },
    {
      "epoch": 1.431884765625e-05,
      "model_forward_time": 0.11478137969970703,
      "step": 2346
    },
    {
      "epoch": 1.431884765625e-05,
      "step": 2346,
      "training_step_time": 0.40314149856567383
    },
    {
      "epoch": 1.4324951171875e-05,
      "model_forward_time": 0.11598730087280273,
      "step": 2347
    },
    {
      "epoch": 1.4324951171875e-05,
      "step": 2347,
      "training_step_time": 0.4535961151123047
    },
    {
      "epoch": 1.43310546875e-05,
      "model_forward_time": 0.11460638046264648,
      "step": 2348
    },
    {
      "epoch": 1.43310546875e-05,
      "step": 2348,
      "training_step_time": 0.3637669086456299
    },
    {
      "epoch": 1.4337158203125e-05,
      "model_forward_time": 0.11445760726928711,
      "step": 2349
    },
    {
      "epoch": 1.4337158203125e-05,
      "step": 2349,
      "training_step_time": 0.42766880989074707
    },
    {
      "epoch": 1.434326171875e-05,
      "grad_norm": 0.5223784446716309,
      "learning_rate": 7.833333333333333e-05,
      "loss": 0.1434,
      "step": 2350
    },
    {
      "epoch": 1.434326171875e-05,
      "model_forward_time": 0.11428356170654297,
      "step": 2350
    },
    {
      "epoch": 1.434326171875e-05,
      "step": 2350,
      "training_step_time": 0.4732632637023926
    },
    {
      "epoch": 1.4349365234375e-05,
      "model_forward_time": 0.1152503490447998,
      "step": 2351
    },
    {
      "epoch": 1.4349365234375e-05,
      "step": 2351,
      "training_step_time": 0.4172043800354004
    },
    {
      "epoch": 1.435546875e-05,
      "model_forward_time": 0.11483311653137207,
      "step": 2352
    },
    {
      "epoch": 1.435546875e-05,
      "step": 2352,
      "training_step_time": 0.39458465576171875
    },
    {
      "epoch": 1.4361572265625e-05,
      "model_forward_time": 0.11537718772888184,
      "step": 2353
    },
    {
      "epoch": 1.4361572265625e-05,
      "step": 2353,
      "training_step_time": 0.38988161087036133
    },
    {
      "epoch": 1.436767578125e-05,
      "model_forward_time": 0.11477279663085938,
      "step": 2354
    },
    {
      "epoch": 1.436767578125e-05,
      "step": 2354,
      "training_step_time": 0.3972916603088379
    },
    {
      "epoch": 1.4373779296875e-05,
      "model_forward_time": 0.11544513702392578,
      "step": 2355
    },
    {
      "epoch": 1.4373779296875e-05,
      "step": 2355,
      "training_step_time": 0.4193460941314697
    },
    {
      "epoch": 1.43798828125e-05,
      "model_forward_time": 0.11544322967529297,
      "step": 2356
    },
    {
      "epoch": 1.43798828125e-05,
      "step": 2356,
      "training_step_time": 0.38967204093933105
    },
    {
      "epoch": 1.4385986328125e-05,
      "model_forward_time": 0.11506843566894531,
      "step": 2357
    },
    {
      "epoch": 1.4385986328125e-05,
      "step": 2357,
      "training_step_time": 0.8300654888153076
    },
    {
      "epoch": 1.439208984375e-05,
      "model_forward_time": 0.11471891403198242,
      "step": 2358
    },
    {
      "epoch": 1.439208984375e-05,
      "step": 2358,
      "training_step_time": 0.38521552085876465
    },
    {
      "epoch": 1.4398193359375e-05,
      "model_forward_time": 0.11469149589538574,
      "step": 2359
    },
    {
      "epoch": 1.4398193359375e-05,
      "step": 2359,
      "training_step_time": 0.4100196361541748
    },
    {
      "epoch": 1.4404296875e-05,
      "grad_norm": 0.5949745774269104,
      "learning_rate": 7.866666666666666e-05,
      "loss": 0.14,
      "step": 2360
    },
    {
      "epoch": 1.4404296875e-05,
      "model_forward_time": 0.11375808715820312,
      "step": 2360
    },
    {
      "epoch": 1.4404296875e-05,
      "step": 2360,
      "training_step_time": 0.46447324752807617
    },
    {
      "epoch": 1.4410400390625e-05,
      "model_forward_time": 0.11398696899414062,
      "step": 2361
    },
    {
      "epoch": 1.4410400390625e-05,
      "step": 2361,
      "training_step_time": 0.492922306060791
    },
    {
      "epoch": 1.441650390625e-05,
      "model_forward_time": 0.11440896987915039,
      "step": 2362
    },
    {
      "epoch": 1.441650390625e-05,
      "step": 2362,
      "training_step_time": 0.4563577175140381
    },
    {
      "epoch": 1.4422607421875e-05,
      "model_forward_time": 0.11511778831481934,
      "step": 2363
    },
    {
      "epoch": 1.4422607421875e-05,
      "step": 2363,
      "training_step_time": 0.5663292407989502
    },
    {
      "epoch": 1.44287109375e-05,
      "model_forward_time": 0.11475467681884766,
      "step": 2364
    },
    {
      "epoch": 1.44287109375e-05,
      "step": 2364,
      "training_step_time": 0.44655513763427734
    },
    {
      "epoch": 1.4434814453125e-05,
      "model_forward_time": 0.11407780647277832,
      "step": 2365
    },
    {
      "epoch": 1.4434814453125e-05,
      "step": 2365,
      "training_step_time": 0.38770604133605957
    },
    {
      "epoch": 1.444091796875e-05,
      "model_forward_time": 0.11405539512634277,
      "step": 2366
    },
    {
      "epoch": 1.444091796875e-05,
      "step": 2366,
      "training_step_time": 0.39879894256591797
    },
    {
      "epoch": 1.4447021484375e-05,
      "model_forward_time": 0.11533093452453613,
      "step": 2367
    },
    {
      "epoch": 1.4447021484375e-05,
      "step": 2367,
      "training_step_time": 0.42423009872436523
    },
    {
      "epoch": 1.4453125e-05,
      "model_forward_time": 0.11586546897888184,
      "step": 2368
    },
    {
      "epoch": 1.4453125e-05,
      "step": 2368,
      "training_step_time": 0.4009988307952881
    },
    {
      "epoch": 1.4459228515625e-05,
      "model_forward_time": 0.11567425727844238,
      "step": 2369
    },
    {
      "epoch": 1.4459228515625e-05,
      "step": 2369,
      "training_step_time": 0.6333999633789062
    },
    {
      "epoch": 1.446533203125e-05,
      "grad_norm": 0.3977593779563904,
      "learning_rate": 7.900000000000001e-05,
      "loss": 0.1335,
      "step": 2370
    },
    {
      "epoch": 1.446533203125e-05,
      "model_forward_time": 0.11418795585632324,
      "step": 2370
    },
    {
      "epoch": 1.446533203125e-05,
      "step": 2370,
      "training_step_time": 0.39452147483825684
    },
    {
      "epoch": 1.4471435546875e-05,
      "model_forward_time": 0.11564064025878906,
      "step": 2371
    },
    {
      "epoch": 1.4471435546875e-05,
      "step": 2371,
      "training_step_time": 0.4009096622467041
    },
    {
      "epoch": 1.44775390625e-05,
      "model_forward_time": 0.11458158493041992,
      "step": 2372
    },
    {
      "epoch": 1.44775390625e-05,
      "step": 2372,
      "training_step_time": 0.3972604274749756
    },
    {
      "epoch": 1.4483642578125e-05,
      "model_forward_time": 0.11495399475097656,
      "step": 2373
    },
    {
      "epoch": 1.4483642578125e-05,
      "step": 2373,
      "training_step_time": 0.47345685958862305
    },
    {
      "epoch": 1.448974609375e-05,
      "model_forward_time": 0.11529970169067383,
      "step": 2374
    },
    {
      "epoch": 1.448974609375e-05,
      "step": 2374,
      "training_step_time": 0.451305627822876
    },
    {
      "epoch": 1.4495849609375e-05,
      "model_forward_time": 0.11434769630432129,
      "step": 2375
    },
    {
      "epoch": 1.4495849609375e-05,
      "step": 2375,
      "training_step_time": 0.6833727359771729
    },
    {
      "epoch": 1.4501953125e-05,
      "model_forward_time": 0.11485099792480469,
      "step": 2376
    },
    {
      "epoch": 1.4501953125e-05,
      "step": 2376,
      "training_step_time": 0.4288957118988037
    },
    {
      "epoch": 1.4508056640625e-05,
      "model_forward_time": 0.11465263366699219,
      "step": 2377
    },
    {
      "epoch": 1.4508056640625e-05,
      "step": 2377,
      "training_step_time": 0.46848106384277344
    },
    {
      "epoch": 1.451416015625e-05,
      "model_forward_time": 0.11418795585632324,
      "step": 2378
    },
    {
      "epoch": 1.451416015625e-05,
      "step": 2378,
      "training_step_time": 0.38321900367736816
    },
    {
      "epoch": 1.4520263671875e-05,
      "model_forward_time": 0.11463642120361328,
      "step": 2379
    },
    {
      "epoch": 1.4520263671875e-05,
      "step": 2379,
      "training_step_time": 0.46192455291748047
    },
    {
      "epoch": 1.45263671875e-05,
      "grad_norm": 0.34166935086250305,
      "learning_rate": 7.933333333333334e-05,
      "loss": 0.128,
      "step": 2380
    },
    {
      "epoch": 1.45263671875e-05,
      "model_forward_time": 0.1153268814086914,
      "step": 2380
    },
    {
      "epoch": 1.45263671875e-05,
      "step": 2380,
      "training_step_time": 0.41818785667419434
    },
    {
      "epoch": 1.4532470703125e-05,
      "model_forward_time": 0.11492156982421875,
      "step": 2381
    },
    {
      "epoch": 1.4532470703125e-05,
      "step": 2381,
      "training_step_time": 0.5509686470031738
    },
    {
      "epoch": 1.453857421875e-05,
      "model_forward_time": 0.1154792308807373,
      "step": 2382
    },
    {
      "epoch": 1.453857421875e-05,
      "step": 2382,
      "training_step_time": 0.390458345413208
    },
    {
      "epoch": 1.4544677734375e-05,
      "model_forward_time": 0.11485028266906738,
      "step": 2383
    },
    {
      "epoch": 1.4544677734375e-05,
      "step": 2383,
      "training_step_time": 0.39319801330566406
    },
    {
      "epoch": 1.455078125e-05,
      "model_forward_time": 0.11521220207214355,
      "step": 2384
    },
    {
      "epoch": 1.455078125e-05,
      "step": 2384,
      "training_step_time": 0.38994646072387695
    },
    {
      "epoch": 1.4556884765625e-05,
      "model_forward_time": 0.11514616012573242,
      "step": 2385
    },
    {
      "epoch": 1.4556884765625e-05,
      "step": 2385,
      "training_step_time": 0.3942234516143799
    },
    {
      "epoch": 1.456298828125e-05,
      "model_forward_time": 0.11578845977783203,
      "step": 2386
    },
    {
      "epoch": 1.456298828125e-05,
      "step": 2386,
      "training_step_time": 0.3960292339324951
    },
    {
      "epoch": 1.4569091796875e-05,
      "model_forward_time": 0.11498332023620605,
      "step": 2387
    },
    {
      "epoch": 1.4569091796875e-05,
      "step": 2387,
      "training_step_time": 0.8465025424957275
    },
    {
      "epoch": 1.45751953125e-05,
      "model_forward_time": 0.11420536041259766,
      "step": 2388
    },
    {
      "epoch": 1.45751953125e-05,
      "step": 2388,
      "training_step_time": 0.4287729263305664
    },
    {
      "epoch": 1.4581298828125e-05,
      "model_forward_time": 0.11509084701538086,
      "step": 2389
    },
    {
      "epoch": 1.4581298828125e-05,
      "step": 2389,
      "training_step_time": 0.37438249588012695
    },
    {
      "epoch": 1.458740234375e-05,
      "grad_norm": 0.42962968349456787,
      "learning_rate": 7.966666666666666e-05,
      "loss": 0.1478,
      "step": 2390
    },
    {
      "epoch": 1.458740234375e-05,
      "model_forward_time": 0.11426711082458496,
      "step": 2390
    },
    {
      "epoch": 1.458740234375e-05,
      "step": 2390,
      "training_step_time": 0.45406293869018555
    },
    {
      "epoch": 1.4593505859375e-05,
      "model_forward_time": 0.11413335800170898,
      "step": 2391
    },
    {
      "epoch": 1.4593505859375e-05,
      "step": 2391,
      "training_step_time": 0.4095945358276367
    },
    {
      "epoch": 1.4599609375e-05,
      "model_forward_time": 0.11463022232055664,
      "step": 2392
    },
    {
      "epoch": 1.4599609375e-05,
      "step": 2392,
      "training_step_time": 0.412600040435791
    },
    {
      "epoch": 1.4605712890625e-05,
      "model_forward_time": 0.11470866203308105,
      "step": 2393
    },
    {
      "epoch": 1.4605712890625e-05,
      "step": 2393,
      "training_step_time": 0.5831282138824463
    },
    {
      "epoch": 1.461181640625e-05,
      "model_forward_time": 0.11585712432861328,
      "step": 2394
    },
    {
      "epoch": 1.461181640625e-05,
      "step": 2394,
      "training_step_time": 0.39278531074523926
    },
    {
      "epoch": 1.4617919921875e-05,
      "model_forward_time": 0.11492252349853516,
      "step": 2395
    },
    {
      "epoch": 1.4617919921875e-05,
      "step": 2395,
      "training_step_time": 0.39132237434387207
    },
    {
      "epoch": 1.46240234375e-05,
      "model_forward_time": 0.11518979072570801,
      "step": 2396
    },
    {
      "epoch": 1.46240234375e-05,
      "step": 2396,
      "training_step_time": 0.3958098888397217
    },
    {
      "epoch": 1.4630126953125e-05,
      "model_forward_time": 0.11510205268859863,
      "step": 2397
    },
    {
      "epoch": 1.4630126953125e-05,
      "step": 2397,
      "training_step_time": 0.39762282371520996
    },
    {
      "epoch": 1.463623046875e-05,
      "model_forward_time": 0.11492562294006348,
      "step": 2398
    },
    {
      "epoch": 1.463623046875e-05,
      "step": 2398,
      "training_step_time": 0.39042186737060547
    },
    {
      "epoch": 1.4642333984375e-05,
      "model_forward_time": 0.11528635025024414,
      "step": 2399
    },
    {
      "epoch": 1.4642333984375e-05,
      "step": 2399,
      "training_step_time": 0.8481488227844238
    },
    {
      "epoch": 1.46484375e-05,
      "grad_norm": 0.42388248443603516,
      "learning_rate": 8e-05,
      "loss": 0.1463,
      "step": 2400
    },
    {
      "epoch": 1.46484375e-05,
      "model_forward_time": 0.11447834968566895,
      "step": 2400
    },
    {
      "epoch": 1.46484375e-05,
      "step": 2400,
      "training_step_time": 0.4511401653289795
    },
    {
      "epoch": 1.4654541015625e-05,
      "model_forward_time": 0.11464548110961914,
      "step": 2401
    },
    {
      "epoch": 1.4654541015625e-05,
      "step": 2401,
      "training_step_time": 0.47190022468566895
    },
    {
      "epoch": 1.466064453125e-05,
      "model_forward_time": 0.11448550224304199,
      "step": 2402
    },
    {
      "epoch": 1.466064453125e-05,
      "step": 2402,
      "training_step_time": 0.3662538528442383
    },
    {
      "epoch": 1.4666748046875e-05,
      "model_forward_time": 0.11479854583740234,
      "step": 2403
    },
    {
      "epoch": 1.4666748046875e-05,
      "step": 2403,
      "training_step_time": 0.4528212547302246
    },
    {
      "epoch": 1.46728515625e-05,
      "model_forward_time": 0.1141958236694336,
      "step": 2404
    },
    {
      "epoch": 1.46728515625e-05,
      "step": 2404,
      "training_step_time": 0.4635765552520752
    },
    {
      "epoch": 1.4678955078125e-05,
      "model_forward_time": 0.11452889442443848,
      "step": 2405
    },
    {
      "epoch": 1.4678955078125e-05,
      "step": 2405,
      "training_step_time": 0.4112975597381592
    },
    {
      "epoch": 1.468505859375e-05,
      "model_forward_time": 0.11472606658935547,
      "step": 2406
    },
    {
      "epoch": 1.468505859375e-05,
      "step": 2406,
      "training_step_time": 0.3881714344024658
    },
    {
      "epoch": 1.4691162109375e-05,
      "model_forward_time": 0.11522340774536133,
      "step": 2407
    },
    {
      "epoch": 1.4691162109375e-05,
      "step": 2407,
      "training_step_time": 0.39377737045288086
    },
    {
      "epoch": 1.4697265625e-05,
      "model_forward_time": 0.11465764045715332,
      "step": 2408
    },
    {
      "epoch": 1.4697265625e-05,
      "step": 2408,
      "training_step_time": 0.39911437034606934
    },
    {
      "epoch": 1.4703369140625e-05,
      "model_forward_time": 0.11570096015930176,
      "step": 2409
    },
    {
      "epoch": 1.4703369140625e-05,
      "step": 2409,
      "training_step_time": 0.3953087329864502
    },
    {
      "epoch": 1.470947265625e-05,
      "grad_norm": 0.6055220365524292,
      "learning_rate": 8.033333333333334e-05,
      "loss": 0.1341,
      "step": 2410
    },
    {
      "epoch": 1.470947265625e-05,
      "model_forward_time": 0.1147913932800293,
      "step": 2410
    },
    {
      "epoch": 1.470947265625e-05,
      "step": 2410,
      "training_step_time": 0.37856173515319824
    },
    {
      "epoch": 1.4715576171875e-05,
      "model_forward_time": 0.11460328102111816,
      "step": 2411
    },
    {
      "epoch": 1.4715576171875e-05,
      "step": 2411,
      "training_step_time": 0.5227961540222168
    },
    {
      "epoch": 1.47216796875e-05,
      "model_forward_time": 0.1150197982788086,
      "step": 2412
    },
    {
      "epoch": 1.47216796875e-05,
      "step": 2412,
      "training_step_time": 0.3964390754699707
    },
    {
      "epoch": 1.4727783203125e-05,
      "model_forward_time": 0.1146540641784668,
      "step": 2413
    },
    {
      "epoch": 1.4727783203125e-05,
      "step": 2413,
      "training_step_time": 0.3996164798736572
    },
    {
      "epoch": 1.473388671875e-05,
      "model_forward_time": 0.11540913581848145,
      "step": 2414
    },
    {
      "epoch": 1.473388671875e-05,
      "step": 2414,
      "training_step_time": 0.47406721115112305
    },
    {
      "epoch": 1.4739990234375e-05,
      "model_forward_time": 0.11520814895629883,
      "step": 2415
    },
    {
      "epoch": 1.4739990234375e-05,
      "step": 2415,
      "training_step_time": 0.4619438648223877
    },
    {
      "epoch": 1.474609375e-05,
      "model_forward_time": 0.11550211906433105,
      "step": 2416
    },
    {
      "epoch": 1.474609375e-05,
      "step": 2416,
      "training_step_time": 0.45143938064575195
    },
    {
      "epoch": 1.4752197265625e-05,
      "model_forward_time": 0.11543869972229004,
      "step": 2417
    },
    {
      "epoch": 1.4752197265625e-05,
      "step": 2417,
      "training_step_time": 0.6799092292785645
    },
    {
      "epoch": 1.475830078125e-05,
      "model_forward_time": 0.11479973793029785,
      "step": 2418
    },
    {
      "epoch": 1.475830078125e-05,
      "step": 2418,
      "training_step_time": 0.4228827953338623
    },
    {
      "epoch": 1.4764404296875e-05,
      "model_forward_time": 0.11447358131408691,
      "step": 2419
    },
    {
      "epoch": 1.4764404296875e-05,
      "step": 2419,
      "training_step_time": 0.39436912536621094
    },
    {
      "epoch": 1.47705078125e-05,
      "grad_norm": 0.27308639883995056,
      "learning_rate": 8.066666666666667e-05,
      "loss": 0.1335,
      "step": 2420
    },
    {
      "epoch": 1.47705078125e-05,
      "model_forward_time": 0.11394619941711426,
      "step": 2420
    },
    {
      "epoch": 1.47705078125e-05,
      "step": 2420,
      "training_step_time": 0.3937265872955322
    },
    {
      "epoch": 1.4776611328125e-05,
      "model_forward_time": 0.11457371711730957,
      "step": 2421
    },
    {
      "epoch": 1.4776611328125e-05,
      "step": 2421,
      "training_step_time": 0.3971407413482666
    },
    {
      "epoch": 1.478271484375e-05,
      "model_forward_time": 0.11462163925170898,
      "step": 2422
    },
    {
      "epoch": 1.478271484375e-05,
      "step": 2422,
      "training_step_time": 0.3793942928314209
    },
    {
      "epoch": 1.4788818359375e-05,
      "model_forward_time": 0.11462831497192383,
      "step": 2423
    },
    {
      "epoch": 1.4788818359375e-05,
      "step": 2423,
      "training_step_time": 0.68166184425354
    },
    {
      "epoch": 1.4794921875e-05,
      "model_forward_time": 0.11477851867675781,
      "step": 2424
    },
    {
      "epoch": 1.4794921875e-05,
      "step": 2424,
      "training_step_time": 0.3883798122406006
    },
    {
      "epoch": 1.4801025390625e-05,
      "model_forward_time": 0.11508393287658691,
      "step": 2425
    },
    {
      "epoch": 1.4801025390625e-05,
      "step": 2425,
      "training_step_time": 0.3881072998046875
    },
    {
      "epoch": 1.480712890625e-05,
      "model_forward_time": 0.11491727828979492,
      "step": 2426
    },
    {
      "epoch": 1.480712890625e-05,
      "step": 2426,
      "training_step_time": 0.39472460746765137
    },
    {
      "epoch": 1.4813232421875e-05,
      "model_forward_time": 0.11491227149963379,
      "step": 2427
    },
    {
      "epoch": 1.4813232421875e-05,
      "step": 2427,
      "training_step_time": 0.3836324214935303
    },
    {
      "epoch": 1.48193359375e-05,
      "model_forward_time": 0.11669135093688965,
      "step": 2428
    },
    {
      "epoch": 1.48193359375e-05,
      "step": 2428,
      "training_step_time": 0.48638033866882324
    },
    {
      "epoch": 1.4825439453125e-05,
      "model_forward_time": 0.1153101921081543,
      "step": 2429
    },
    {
      "epoch": 1.4825439453125e-05,
      "step": 2429,
      "training_step_time": 0.577582836151123
    },
    {
      "epoch": 1.483154296875e-05,
      "grad_norm": 0.39970043301582336,
      "learning_rate": 8.1e-05,
      "loss": 0.1366,
      "step": 2430
    },
    {
      "epoch": 1.483154296875e-05,
      "model_forward_time": 0.11469745635986328,
      "step": 2430
    },
    {
      "epoch": 1.483154296875e-05,
      "step": 2430,
      "training_step_time": 0.36515188217163086
    },
    {
      "epoch": 1.4837646484375e-05,
      "model_forward_time": 0.11426734924316406,
      "step": 2431
    },
    {
      "epoch": 1.4837646484375e-05,
      "step": 2431,
      "training_step_time": 0.48929762840270996
    },
    {
      "epoch": 1.484375e-05,
      "model_forward_time": 0.11522269248962402,
      "step": 2432
    },
    {
      "epoch": 1.484375e-05,
      "step": 2432,
      "training_step_time": 0.41288304328918457
    },
    {
      "epoch": 1.4849853515625e-05,
      "model_forward_time": 0.11517047882080078,
      "step": 2433
    },
    {
      "epoch": 1.4849853515625e-05,
      "step": 2433,
      "training_step_time": 0.38978099822998047
    },
    {
      "epoch": 1.485595703125e-05,
      "model_forward_time": 0.11413359642028809,
      "step": 2434
    },
    {
      "epoch": 1.485595703125e-05,
      "step": 2434,
      "training_step_time": 0.38344764709472656
    },
    {
      "epoch": 1.4862060546875e-05,
      "model_forward_time": 0.11536431312561035,
      "step": 2435
    },
    {
      "epoch": 1.4862060546875e-05,
      "step": 2435,
      "training_step_time": 0.5987606048583984
    },
    {
      "epoch": 1.48681640625e-05,
      "model_forward_time": 0.1148233413696289,
      "step": 2436
    },
    {
      "epoch": 1.48681640625e-05,
      "step": 2436,
      "training_step_time": 0.3893582820892334
    },
    {
      "epoch": 1.4874267578125e-05,
      "model_forward_time": 0.11530256271362305,
      "step": 2437
    },
    {
      "epoch": 1.4874267578125e-05,
      "step": 2437,
      "training_step_time": 0.3922851085662842
    },
    {
      "epoch": 1.488037109375e-05,
      "model_forward_time": 0.11529994010925293,
      "step": 2438
    },
    {
      "epoch": 1.488037109375e-05,
      "step": 2438,
      "training_step_time": 0.3934650421142578
    },
    {
      "epoch": 1.4886474609375e-05,
      "model_forward_time": 0.11446285247802734,
      "step": 2439
    },
    {
      "epoch": 1.4886474609375e-05,
      "step": 2439,
      "training_step_time": 0.39322519302368164
    },
    {
      "epoch": 1.4892578125e-05,
      "grad_norm": 0.47629398107528687,
      "learning_rate": 8.133333333333334e-05,
      "loss": 0.143,
      "step": 2440
    },
    {
      "epoch": 1.4892578125e-05,
      "model_forward_time": 0.1148064136505127,
      "step": 2440
    },
    {
      "epoch": 1.4892578125e-05,
      "step": 2440,
      "training_step_time": 0.607079267501831
    },
    {
      "epoch": 1.4898681640625e-05,
      "model_forward_time": 0.11493706703186035,
      "step": 2441
    },
    {
      "epoch": 1.4898681640625e-05,
      "step": 2441,
      "training_step_time": 0.5242204666137695
    },
    {
      "epoch": 1.490478515625e-05,
      "model_forward_time": 0.11524677276611328,
      "step": 2442
    },
    {
      "epoch": 1.490478515625e-05,
      "step": 2442,
      "training_step_time": 0.4948999881744385
    },
    {
      "epoch": 1.4910888671875e-05,
      "model_forward_time": 0.11467695236206055,
      "step": 2443
    },
    {
      "epoch": 1.4910888671875e-05,
      "step": 2443,
      "training_step_time": 0.4757843017578125
    },
    {
      "epoch": 1.49169921875e-05,
      "model_forward_time": 0.11429381370544434,
      "step": 2444
    },
    {
      "epoch": 1.49169921875e-05,
      "step": 2444,
      "training_step_time": 0.4314908981323242
    },
    {
      "epoch": 1.4923095703125e-05,
      "model_forward_time": 0.11421608924865723,
      "step": 2445
    },
    {
      "epoch": 1.4923095703125e-05,
      "step": 2445,
      "training_step_time": 0.414539098739624
    },
    {
      "epoch": 1.492919921875e-05,
      "model_forward_time": 0.11497950553894043,
      "step": 2446
    },
    {
      "epoch": 1.492919921875e-05,
      "step": 2446,
      "training_step_time": 0.5218744277954102
    },
    {
      "epoch": 1.4935302734375e-05,
      "model_forward_time": 0.11434555053710938,
      "step": 2447
    },
    {
      "epoch": 1.4935302734375e-05,
      "step": 2447,
      "training_step_time": 0.40632104873657227
    },
    {
      "epoch": 1.494140625e-05,
      "model_forward_time": 0.1140139102935791,
      "step": 2448
    },
    {
      "epoch": 1.494140625e-05,
      "step": 2448,
      "training_step_time": 0.39267563819885254
    },
    {
      "epoch": 1.4947509765625e-05,
      "model_forward_time": 0.11472058296203613,
      "step": 2449
    },
    {
      "epoch": 1.4947509765625e-05,
      "step": 2449,
      "training_step_time": 0.38817524909973145
    },
    {
      "epoch": 1.495361328125e-05,
      "grad_norm": 0.4305209815502167,
      "learning_rate": 8.166666666666667e-05,
      "loss": 0.1388,
      "step": 2450
    },
    {
      "epoch": 1.495361328125e-05,
      "model_forward_time": 0.11505961418151855,
      "step": 2450
    },
    {
      "epoch": 1.495361328125e-05,
      "step": 2450,
      "training_step_time": 0.389418363571167
    },
    {
      "epoch": 1.4959716796875e-05,
      "model_forward_time": 0.11551070213317871,
      "step": 2451
    },
    {
      "epoch": 1.4959716796875e-05,
      "step": 2451,
      "training_step_time": 0.3960869312286377
    },
    {
      "epoch": 1.49658203125e-05,
      "model_forward_time": 0.1155695915222168,
      "step": 2452
    },
    {
      "epoch": 1.49658203125e-05,
      "step": 2452,
      "training_step_time": 0.8399202823638916
    },
    {
      "epoch": 1.4971923828125e-05,
      "model_forward_time": 0.1147453784942627,
      "step": 2453
    },
    {
      "epoch": 1.4971923828125e-05,
      "step": 2453,
      "training_step_time": 0.4403083324432373
    },
    {
      "epoch": 1.497802734375e-05,
      "model_forward_time": 0.11425924301147461,
      "step": 2454
    },
    {
      "epoch": 1.497802734375e-05,
      "step": 2454,
      "training_step_time": 0.38626790046691895
    },
    {
      "epoch": 1.4984130859375e-05,
      "model_forward_time": 0.11436700820922852,
      "step": 2455
    },
    {
      "epoch": 1.4984130859375e-05,
      "step": 2455,
      "training_step_time": 0.42406344413757324
    },
    {
      "epoch": 1.4990234375e-05,
      "model_forward_time": 0.11518502235412598,
      "step": 2456
    },
    {
      "epoch": 1.4990234375e-05,
      "step": 2456,
      "training_step_time": 0.41197729110717773
    },
    {
      "epoch": 1.4996337890625e-05,
      "model_forward_time": 0.11431288719177246,
      "step": 2457
    },
    {
      "epoch": 1.4996337890625e-05,
      "step": 2457,
      "training_step_time": 0.4702129364013672
    },
    {
      "epoch": 1.500244140625e-05,
      "model_forward_time": 0.11485624313354492,
      "step": 2458
    },
    {
      "epoch": 1.500244140625e-05,
      "step": 2458,
      "training_step_time": 0.38962411880493164
    },
    {
      "epoch": 1.5008544921875e-05,
      "model_forward_time": 0.11618757247924805,
      "step": 2459
    },
    {
      "epoch": 1.5008544921875e-05,
      "step": 2459,
      "training_step_time": 0.8656532764434814
    },
    {
      "epoch": 1.50146484375e-05,
      "grad_norm": 0.6467142105102539,
      "learning_rate": 8.2e-05,
      "loss": 0.1428,
      "step": 2460
    },
    {
      "epoch": 1.50146484375e-05,
      "model_forward_time": 0.11397695541381836,
      "step": 2460
    },
    {
      "epoch": 1.50146484375e-05,
      "step": 2460,
      "training_step_time": 0.3860585689544678
    },
    {
      "epoch": 1.5020751953125e-05,
      "model_forward_time": 0.11639690399169922,
      "step": 2461
    },
    {
      "epoch": 1.5020751953125e-05,
      "step": 2461,
      "training_step_time": 0.3861112594604492
    },
    {
      "epoch": 1.502685546875e-05,
      "model_forward_time": 0.11418318748474121,
      "step": 2462
    },
    {
      "epoch": 1.502685546875e-05,
      "step": 2462,
      "training_step_time": 0.3848721981048584
    },
    {
      "epoch": 1.5032958984375e-05,
      "model_forward_time": 0.11379837989807129,
      "step": 2463
    },
    {
      "epoch": 1.5032958984375e-05,
      "step": 2463,
      "training_step_time": 0.3922312259674072
    },
    {
      "epoch": 1.50390625e-05,
      "model_forward_time": 0.11439800262451172,
      "step": 2464
    },
    {
      "epoch": 1.50390625e-05,
      "step": 2464,
      "training_step_time": 0.3901822566986084
    },
    {
      "epoch": 1.5045166015625e-05,
      "model_forward_time": 0.11495018005371094,
      "step": 2465
    },
    {
      "epoch": 1.5045166015625e-05,
      "step": 2465,
      "training_step_time": 0.7135899066925049
    },
    {
      "epoch": 1.505126953125e-05,
      "model_forward_time": 0.11443042755126953,
      "step": 2466
    },
    {
      "epoch": 1.505126953125e-05,
      "step": 2466,
      "training_step_time": 0.40442323684692383
    },
    {
      "epoch": 1.5057373046875e-05,
      "model_forward_time": 0.11465716361999512,
      "step": 2467
    },
    {
      "epoch": 1.5057373046875e-05,
      "step": 2467,
      "training_step_time": 0.3862886428833008
    },
    {
      "epoch": 1.50634765625e-05,
      "model_forward_time": 0.11481761932373047,
      "step": 2468
    },
    {
      "epoch": 1.50634765625e-05,
      "step": 2468,
      "training_step_time": 0.432788610458374
    },
    {
      "epoch": 1.5069580078125e-05,
      "model_forward_time": 0.11487913131713867,
      "step": 2469
    },
    {
      "epoch": 1.5069580078125e-05,
      "step": 2469,
      "training_step_time": 0.45270633697509766
    },
    {
      "epoch": 1.507568359375e-05,
      "grad_norm": 0.48534655570983887,
      "learning_rate": 8.233333333333333e-05,
      "loss": 0.1344,
      "step": 2470
    },
    {
      "epoch": 1.507568359375e-05,
      "model_forward_time": 0.11460089683532715,
      "step": 2470
    },
    {
      "epoch": 1.507568359375e-05,
      "step": 2470,
      "training_step_time": 0.4488039016723633
    },
    {
      "epoch": 1.5081787109375e-05,
      "model_forward_time": 0.11596798896789551,
      "step": 2471
    },
    {
      "epoch": 1.5081787109375e-05,
      "step": 2471,
      "training_step_time": 0.512211799621582
    },
    {
      "epoch": 1.5087890625e-05,
      "model_forward_time": 0.11481308937072754,
      "step": 2472
    },
    {
      "epoch": 1.5087890625e-05,
      "step": 2472,
      "training_step_time": 0.45339035987854004
    },
    {
      "epoch": 1.5093994140625e-05,
      "model_forward_time": 0.11511850357055664,
      "step": 2473
    },
    {
      "epoch": 1.5093994140625e-05,
      "step": 2473,
      "training_step_time": 0.38881492614746094
    },
    {
      "epoch": 1.510009765625e-05,
      "model_forward_time": 0.11496281623840332,
      "step": 2474
    },
    {
      "epoch": 1.510009765625e-05,
      "step": 2474,
      "training_step_time": 0.3989865779876709
    },
    {
      "epoch": 1.5106201171875e-05,
      "model_forward_time": 0.11427736282348633,
      "step": 2475
    },
    {
      "epoch": 1.5106201171875e-05,
      "step": 2475,
      "training_step_time": 0.3961296081542969
    },
    {
      "epoch": 1.51123046875e-05,
      "model_forward_time": 0.11468243598937988,
      "step": 2476
    },
    {
      "epoch": 1.51123046875e-05,
      "step": 2476,
      "training_step_time": 0.7114214897155762
    },
    {
      "epoch": 1.5118408203125e-05,
      "model_forward_time": 0.11452817916870117,
      "step": 2477
    },
    {
      "epoch": 1.5118408203125e-05,
      "step": 2477,
      "training_step_time": 0.4756350517272949
    },
    {
      "epoch": 1.512451171875e-05,
      "model_forward_time": 0.11458444595336914,
      "step": 2478
    },
    {
      "epoch": 1.512451171875e-05,
      "step": 2478,
      "training_step_time": 0.38956546783447266
    },
    {
      "epoch": 1.5130615234375e-05,
      "model_forward_time": 0.11493968963623047,
      "step": 2479
    },
    {
      "epoch": 1.5130615234375e-05,
      "step": 2479,
      "training_step_time": 0.38840222358703613
    },
    {
      "epoch": 1.513671875e-05,
      "grad_norm": 0.45099076628685,
      "learning_rate": 8.266666666666667e-05,
      "loss": 0.1348,
      "step": 2480
    },
    {
      "epoch": 1.513671875e-05,
      "model_forward_time": 0.11447834968566895,
      "step": 2480
    },
    {
      "epoch": 1.513671875e-05,
      "step": 2480,
      "training_step_time": 0.3969414234161377
    },
    {
      "epoch": 1.5142822265625e-05,
      "model_forward_time": 0.1144096851348877,
      "step": 2481
    },
    {
      "epoch": 1.5142822265625e-05,
      "step": 2481,
      "training_step_time": 0.43709373474121094
    },
    {
      "epoch": 1.514892578125e-05,
      "model_forward_time": 0.11505460739135742,
      "step": 2482
    },
    {
      "epoch": 1.514892578125e-05,
      "step": 2482,
      "training_step_time": 0.7257740497589111
    },
    {
      "epoch": 1.5155029296875e-05,
      "model_forward_time": 0.1147162914276123,
      "step": 2483
    },
    {
      "epoch": 1.5155029296875e-05,
      "step": 2483,
      "training_step_time": 0.45643115043640137
    },
    {
      "epoch": 1.51611328125e-05,
      "model_forward_time": 0.11495089530944824,
      "step": 2484
    },
    {
      "epoch": 1.51611328125e-05,
      "step": 2484,
      "training_step_time": 0.38979649543762207
    },
    {
      "epoch": 1.5167236328125e-05,
      "model_forward_time": 0.11452889442443848,
      "step": 2485
    },
    {
      "epoch": 1.5167236328125e-05,
      "step": 2485,
      "training_step_time": 0.4421241283416748
    },
    {
      "epoch": 1.517333984375e-05,
      "model_forward_time": 0.1145784854888916,
      "step": 2486
    },
    {
      "epoch": 1.517333984375e-05,
      "step": 2486,
      "training_step_time": 0.41530585289001465
    },
    {
      "epoch": 1.5179443359375e-05,
      "model_forward_time": 0.1143801212310791,
      "step": 2487
    },
    {
      "epoch": 1.5179443359375e-05,
      "step": 2487,
      "training_step_time": 0.38982701301574707
    },
    {
      "epoch": 1.5185546875e-05,
      "model_forward_time": 0.11427974700927734,
      "step": 2488
    },
    {
      "epoch": 1.5185546875e-05,
      "step": 2488,
      "training_step_time": 0.6551811695098877
    },
    {
      "epoch": 1.5191650390625e-05,
      "model_forward_time": 0.11436915397644043,
      "step": 2489
    },
    {
      "epoch": 1.5191650390625e-05,
      "step": 2489,
      "training_step_time": 0.38789868354797363
    },
    {
      "epoch": 1.519775390625e-05,
      "grad_norm": 0.48140519857406616,
      "learning_rate": 8.3e-05,
      "loss": 0.1299,
      "step": 2490
    },
    {
      "epoch": 1.519775390625e-05,
      "model_forward_time": 0.11472535133361816,
      "step": 2490
    },
    {
      "epoch": 1.519775390625e-05,
      "step": 2490,
      "training_step_time": 0.38533711433410645
    },
    {
      "epoch": 1.5203857421875e-05,
      "model_forward_time": 0.11469197273254395,
      "step": 2491
    },
    {
      "epoch": 1.5203857421875e-05,
      "step": 2491,
      "training_step_time": 0.39731383323669434
    },
    {
      "epoch": 1.52099609375e-05,
      "model_forward_time": 0.114990234375,
      "step": 2492
    },
    {
      "epoch": 1.52099609375e-05,
      "step": 2492,
      "training_step_time": 0.38409876823425293
    },
    {
      "epoch": 1.5216064453125e-05,
      "model_forward_time": 0.11517119407653809,
      "step": 2493
    },
    {
      "epoch": 1.5216064453125e-05,
      "step": 2493,
      "training_step_time": 0.43152499198913574
    },
    {
      "epoch": 1.522216796875e-05,
      "model_forward_time": 0.1160728931427002,
      "step": 2494
    },
    {
      "epoch": 1.522216796875e-05,
      "step": 2494,
      "training_step_time": 1.0634188652038574
    },
    {
      "epoch": 1.5228271484375e-05,
      "model_forward_time": 0.11407327651977539,
      "step": 2495
    },
    {
      "epoch": 1.5228271484375e-05,
      "step": 2495,
      "training_step_time": 0.43866467475891113
    },
    {
      "epoch": 1.5234375e-05,
      "model_forward_time": 0.11435627937316895,
      "step": 2496
    },
    {
      "epoch": 1.5234375e-05,
      "step": 2496,
      "training_step_time": 0.42378854751586914
    },
    {
      "epoch": 1.5240478515625e-05,
      "model_forward_time": 0.11396670341491699,
      "step": 2497
    },
    {
      "epoch": 1.5240478515625e-05,
      "step": 2497,
      "training_step_time": 0.36398959159851074
    },
    {
      "epoch": 1.524658203125e-05,
      "model_forward_time": 0.11362242698669434,
      "step": 2498
    },
    {
      "epoch": 1.524658203125e-05,
      "step": 2498,
      "training_step_time": 0.40549278259277344
    },
    {
      "epoch": 1.5252685546875e-05,
      "model_forward_time": 0.11520743370056152,
      "step": 2499
    },
    {
      "epoch": 1.5252685546875e-05,
      "step": 2499,
      "training_step_time": 0.442004919052124
    },
    {
      "epoch": 1.52587890625e-05,
      "grad_norm": 0.46489280462265015,
      "learning_rate": 8.333333333333334e-05,
      "loss": 0.1399,
      "step": 2500
    },
    {
      "epoch": 1.52587890625e-05,
      "model_forward_time": 0.11587190628051758,
      "step": 2500
    },
    {
      "epoch": 1.52587890625e-05,
      "step": 2500,
      "training_step_time": 0.397000789642334
    },
    {
      "epoch": 1.5264892578125e-05,
      "model_forward_time": 0.11472415924072266,
      "step": 2501
    },
    {
      "epoch": 1.5264892578125e-05,
      "step": 2501,
      "training_step_time": 0.39494800567626953
    },
    {
      "epoch": 1.527099609375e-05,
      "model_forward_time": 0.11532878875732422,
      "step": 2502
    },
    {
      "epoch": 1.527099609375e-05,
      "step": 2502,
      "training_step_time": 0.3914010524749756
    },
    {
      "epoch": 1.5277099609375e-05,
      "model_forward_time": 0.11624431610107422,
      "step": 2503
    },
    {
      "epoch": 1.5277099609375e-05,
      "step": 2503,
      "training_step_time": 0.4219245910644531
    },
    {
      "epoch": 1.5283203125e-05,
      "model_forward_time": 0.11581993103027344,
      "step": 2504
    },
    {
      "epoch": 1.5283203125e-05,
      "step": 2504,
      "training_step_time": 0.38701391220092773
    },
    {
      "epoch": 1.5289306640625e-05,
      "model_forward_time": 0.11519336700439453,
      "step": 2505
    },
    {
      "epoch": 1.5289306640625e-05,
      "step": 2505,
      "training_step_time": 0.42973780632019043
    },
    {
      "epoch": 1.529541015625e-05,
      "model_forward_time": 0.11490058898925781,
      "step": 2506
    },
    {
      "epoch": 1.529541015625e-05,
      "step": 2506,
      "training_step_time": 0.77608323097229
    },
    {
      "epoch": 1.5301513671875e-05,
      "model_forward_time": 0.11451482772827148,
      "step": 2507
    },
    {
      "epoch": 1.5301513671875e-05,
      "step": 2507,
      "training_step_time": 0.3847372531890869
    },
    {
      "epoch": 1.53076171875e-05,
      "model_forward_time": 0.11433267593383789,
      "step": 2508
    },
    {
      "epoch": 1.53076171875e-05,
      "step": 2508,
      "training_step_time": 0.4140045642852783
    },
    {
      "epoch": 1.5313720703125e-05,
      "model_forward_time": 0.11469411849975586,
      "step": 2509
    },
    {
      "epoch": 1.5313720703125e-05,
      "step": 2509,
      "training_step_time": 0.4795665740966797
    },
    {
      "epoch": 1.531982421875e-05,
      "grad_norm": 0.3503076732158661,
      "learning_rate": 8.366666666666668e-05,
      "loss": 0.141,
      "step": 2510
    },
    {
      "epoch": 1.531982421875e-05,
      "model_forward_time": 0.1152186393737793,
      "step": 2510
    },
    {
      "epoch": 1.531982421875e-05,
      "step": 2510,
      "training_step_time": 0.4586153030395508
    },
    {
      "epoch": 1.5325927734375e-05,
      "model_forward_time": 0.11465954780578613,
      "step": 2511
    },
    {
      "epoch": 1.5325927734375e-05,
      "step": 2511,
      "training_step_time": 0.4507887363433838
    },
    {
      "epoch": 1.533203125e-05,
      "model_forward_time": 0.11516118049621582,
      "step": 2512
    },
    {
      "epoch": 1.533203125e-05,
      "step": 2512,
      "training_step_time": 0.6533675193786621
    },
    {
      "epoch": 1.5338134765625e-05,
      "model_forward_time": 0.11439371109008789,
      "step": 2513
    },
    {
      "epoch": 1.5338134765625e-05,
      "step": 2513,
      "training_step_time": 0.49055027961730957
    },
    {
      "epoch": 1.534423828125e-05,
      "model_forward_time": 0.11442804336547852,
      "step": 2514
    },
    {
      "epoch": 1.534423828125e-05,
      "step": 2514,
      "training_step_time": 0.38234877586364746
    },
    {
      "epoch": 1.5350341796875e-05,
      "model_forward_time": 0.11507344245910645,
      "step": 2515
    },
    {
      "epoch": 1.5350341796875e-05,
      "step": 2515,
      "training_step_time": 0.3872959613800049
    },
    {
      "epoch": 1.53564453125e-05,
      "model_forward_time": 0.11480593681335449,
      "step": 2516
    },
    {
      "epoch": 1.53564453125e-05,
      "step": 2516,
      "training_step_time": 0.3915739059448242
    },
    {
      "epoch": 1.5362548828125e-05,
      "model_forward_time": 0.11457538604736328,
      "step": 2517
    },
    {
      "epoch": 1.5362548828125e-05,
      "step": 2517,
      "training_step_time": 0.40949416160583496
    },
    {
      "epoch": 1.536865234375e-05,
      "model_forward_time": 0.11474204063415527,
      "step": 2518
    },
    {
      "epoch": 1.536865234375e-05,
      "step": 2518,
      "training_step_time": 0.7528424263000488
    },
    {
      "epoch": 1.5374755859375e-05,
      "model_forward_time": 0.11460542678833008,
      "step": 2519
    },
    {
      "epoch": 1.5374755859375e-05,
      "step": 2519,
      "training_step_time": 0.41347193717956543
    },
    {
      "epoch": 1.5380859375e-05,
      "grad_norm": 0.4466148316860199,
      "learning_rate": 8.4e-05,
      "loss": 0.1311,
      "step": 2520
    },
    {
      "epoch": 1.5380859375e-05,
      "model_forward_time": 0.1145792007446289,
      "step": 2520
    },
    {
      "epoch": 1.5380859375e-05,
      "step": 2520,
      "training_step_time": 0.3803112506866455
    },
    {
      "epoch": 1.5386962890625e-05,
      "model_forward_time": 0.11465668678283691,
      "step": 2521
    },
    {
      "epoch": 1.5386962890625e-05,
      "step": 2521,
      "training_step_time": 0.4140641689300537
    },
    {
      "epoch": 1.539306640625e-05,
      "model_forward_time": 0.1142418384552002,
      "step": 2522
    },
    {
      "epoch": 1.539306640625e-05,
      "step": 2522,
      "training_step_time": 0.46148157119750977
    },
    {
      "epoch": 1.5399169921875e-05,
      "model_forward_time": 0.11446142196655273,
      "step": 2523
    },
    {
      "epoch": 1.5399169921875e-05,
      "step": 2523,
      "training_step_time": 0.4136040210723877
    },
    {
      "epoch": 1.54052734375e-05,
      "model_forward_time": 0.11546754837036133,
      "step": 2524
    },
    {
      "epoch": 1.54052734375e-05,
      "step": 2524,
      "training_step_time": 0.7606258392333984
    },
    {
      "epoch": 1.5411376953125e-05,
      "model_forward_time": 0.11477184295654297,
      "step": 2525
    },
    {
      "epoch": 1.5411376953125e-05,
      "step": 2525,
      "training_step_time": 0.38500142097473145
    },
    {
      "epoch": 1.541748046875e-05,
      "model_forward_time": 0.11429929733276367,
      "step": 2526
    },
    {
      "epoch": 1.541748046875e-05,
      "step": 2526,
      "training_step_time": 0.4456615447998047
    },
    {
      "epoch": 1.5423583984375e-05,
      "model_forward_time": 0.1147618293762207,
      "step": 2527
    },
    {
      "epoch": 1.5423583984375e-05,
      "step": 2527,
      "training_step_time": 0.383098840713501
    },
    {
      "epoch": 1.54296875e-05,
      "model_forward_time": 0.11414861679077148,
      "step": 2528
    },
    {
      "epoch": 1.54296875e-05,
      "step": 2528,
      "training_step_time": 0.38387513160705566
    },
    {
      "epoch": 1.5435791015625e-05,
      "model_forward_time": 0.11509585380554199,
      "step": 2529
    },
    {
      "epoch": 1.5435791015625e-05,
      "step": 2529,
      "training_step_time": 0.3836987018585205
    },
    {
      "epoch": 1.544189453125e-05,
      "grad_norm": 0.6963123083114624,
      "learning_rate": 8.433333333333334e-05,
      "loss": 0.1291,
      "step": 2530
    },
    {
      "epoch": 1.544189453125e-05,
      "model_forward_time": 0.11490750312805176,
      "step": 2530
    },
    {
      "epoch": 1.544189453125e-05,
      "step": 2530,
      "training_step_time": 0.7181613445281982
    },
    {
      "epoch": 1.5447998046875e-05,
      "model_forward_time": 0.11523747444152832,
      "step": 2531
    },
    {
      "epoch": 1.5447998046875e-05,
      "step": 2531,
      "training_step_time": 0.39356136322021484
    },
    {
      "epoch": 1.54541015625e-05,
      "model_forward_time": 0.11492609977722168,
      "step": 2532
    },
    {
      "epoch": 1.54541015625e-05,
      "step": 2532,
      "training_step_time": 0.38829922676086426
    },
    {
      "epoch": 1.5460205078125e-05,
      "model_forward_time": 0.11486244201660156,
      "step": 2533
    },
    {
      "epoch": 1.5460205078125e-05,
      "step": 2533,
      "training_step_time": 0.3939826488494873
    },
    {
      "epoch": 1.546630859375e-05,
      "model_forward_time": 0.11506319046020508,
      "step": 2534
    },
    {
      "epoch": 1.546630859375e-05,
      "step": 2534,
      "training_step_time": 0.3944573402404785
    },
    {
      "epoch": 1.5472412109375e-05,
      "model_forward_time": 0.11524271965026855,
      "step": 2535
    },
    {
      "epoch": 1.5472412109375e-05,
      "step": 2535,
      "training_step_time": 0.45599985122680664
    },
    {
      "epoch": 1.5478515625e-05,
      "model_forward_time": 0.11510825157165527,
      "step": 2536
    },
    {
      "epoch": 1.5478515625e-05,
      "step": 2536,
      "training_step_time": 0.8411054611206055
    },
    {
      "epoch": 1.5484619140625e-05,
      "model_forward_time": 0.1141812801361084,
      "step": 2537
    },
    {
      "epoch": 1.5484619140625e-05,
      "step": 2537,
      "training_step_time": 0.4710512161254883
    },
    {
      "epoch": 1.549072265625e-05,
      "model_forward_time": 0.11443901062011719,
      "step": 2538
    },
    {
      "epoch": 1.549072265625e-05,
      "step": 2538,
      "training_step_time": 0.46400904655456543
    },
    {
      "epoch": 1.5496826171875e-05,
      "model_forward_time": 0.114501953125,
      "step": 2539
    },
    {
      "epoch": 1.5496826171875e-05,
      "step": 2539,
      "training_step_time": 0.42897677421569824
    },
    {
      "epoch": 1.55029296875e-05,
      "grad_norm": 0.5536630749702454,
      "learning_rate": 8.466666666666667e-05,
      "loss": 0.1407,
      "step": 2540
    },
    {
      "epoch": 1.55029296875e-05,
      "model_forward_time": 0.1154778003692627,
      "step": 2540
    },
    {
      "epoch": 1.55029296875e-05,
      "step": 2540,
      "training_step_time": 0.39136624336242676
    },
    {
      "epoch": 1.5509033203125e-05,
      "model_forward_time": 0.11484003067016602,
      "step": 2541
    },
    {
      "epoch": 1.5509033203125e-05,
      "step": 2541,
      "training_step_time": 0.394122838973999
    },
    {
      "epoch": 1.551513671875e-05,
      "model_forward_time": 0.1143960952758789,
      "step": 2542
    },
    {
      "epoch": 1.551513671875e-05,
      "step": 2542,
      "training_step_time": 0.653296709060669
    },
    {
      "epoch": 1.5521240234375e-05,
      "model_forward_time": 0.11473965644836426,
      "step": 2543
    },
    {
      "epoch": 1.5521240234375e-05,
      "step": 2543,
      "training_step_time": 0.3913249969482422
    },
    {
      "epoch": 1.552734375e-05,
      "model_forward_time": 0.11438107490539551,
      "step": 2544
    },
    {
      "epoch": 1.552734375e-05,
      "step": 2544,
      "training_step_time": 0.3831217288970947
    },
    {
      "epoch": 1.5533447265625e-05,
      "model_forward_time": 0.11467146873474121,
      "step": 2545
    },
    {
      "epoch": 1.5533447265625e-05,
      "step": 2545,
      "training_step_time": 0.39142584800720215
    },
    {
      "epoch": 1.553955078125e-05,
      "model_forward_time": 0.1143960952758789,
      "step": 2546
    },
    {
      "epoch": 1.553955078125e-05,
      "step": 2546,
      "training_step_time": 0.4068596363067627
    },
    {
      "epoch": 1.5545654296875e-05,
      "model_forward_time": 0.11449360847473145,
      "step": 2547
    },
    {
      "epoch": 1.5545654296875e-05,
      "step": 2547,
      "training_step_time": 0.3948380947113037
    },
    {
      "epoch": 1.55517578125e-05,
      "model_forward_time": 0.11499595642089844,
      "step": 2548
    },
    {
      "epoch": 1.55517578125e-05,
      "step": 2548,
      "training_step_time": 1.0174450874328613
    },
    {
      "epoch": 1.5557861328125e-05,
      "model_forward_time": 0.11412930488586426,
      "step": 2549
    },
    {
      "epoch": 1.5557861328125e-05,
      "step": 2549,
      "training_step_time": 0.48146677017211914
    },
    {
      "epoch": 1.556396484375e-05,
      "grad_norm": 0.4746797978878021,
      "learning_rate": 8.5e-05,
      "loss": 0.147,
      "step": 2550
    },
    {
      "epoch": 1.556396484375e-05,
      "model_forward_time": 0.11419153213500977,
      "step": 2550
    },
    {
      "epoch": 1.556396484375e-05,
      "step": 2550,
      "training_step_time": 0.3926057815551758
    },
    {
      "epoch": 1.5570068359375e-05,
      "model_forward_time": 0.11402058601379395,
      "step": 2551
    },
    {
      "epoch": 1.5570068359375e-05,
      "step": 2551,
      "training_step_time": 0.43956708908081055
    },
    {
      "epoch": 1.5576171875e-05,
      "model_forward_time": 0.11426377296447754,
      "step": 2552
    },
    {
      "epoch": 1.5576171875e-05,
      "step": 2552,
      "training_step_time": 0.4157295227050781
    },
    {
      "epoch": 1.5582275390625e-05,
      "model_forward_time": 0.11491250991821289,
      "step": 2553
    },
    {
      "epoch": 1.5582275390625e-05,
      "step": 2553,
      "training_step_time": 0.38654518127441406
    },
    {
      "epoch": 1.558837890625e-05,
      "model_forward_time": 0.1147611141204834,
      "step": 2554
    },
    {
      "epoch": 1.558837890625e-05,
      "step": 2554,
      "training_step_time": 0.8176355361938477
    },
    {
      "epoch": 1.5594482421875e-05,
      "model_forward_time": 0.1143646240234375,
      "step": 2555
    },
    {
      "epoch": 1.5594482421875e-05,
      "step": 2555,
      "training_step_time": 0.4143857955932617
    },
    {
      "epoch": 1.56005859375e-05,
      "model_forward_time": 0.11418962478637695,
      "step": 2556
    },
    {
      "epoch": 1.56005859375e-05,
      "step": 2556,
      "training_step_time": 0.3813948631286621
    },
    {
      "epoch": 1.5606689453125e-05,
      "model_forward_time": 0.11432790756225586,
      "step": 2557
    },
    {
      "epoch": 1.5606689453125e-05,
      "step": 2557,
      "training_step_time": 0.394197940826416
    },
    {
      "epoch": 1.561279296875e-05,
      "model_forward_time": 0.11423277854919434,
      "step": 2558
    },
    {
      "epoch": 1.561279296875e-05,
      "step": 2558,
      "training_step_time": 0.3877241611480713
    },
    {
      "epoch": 1.5618896484375e-05,
      "model_forward_time": 0.11436724662780762,
      "step": 2559
    },
    {
      "epoch": 1.5618896484375e-05,
      "step": 2559,
      "training_step_time": 0.3885211944580078
    },
    {
      "epoch": 1.5625e-05,
      "grad_norm": 0.504030704498291,
      "learning_rate": 8.533333333333334e-05,
      "loss": 0.1373,
      "step": 2560
    },
    {
      "epoch": 1.5625e-05,
      "model_forward_time": 0.11512517929077148,
      "step": 2560
    },
    {
      "epoch": 1.5625e-05,
      "step": 2560,
      "training_step_time": 0.8865962028503418
    },
    {
      "epoch": 1.5631103515625e-05,
      "model_forward_time": 0.11432576179504395,
      "step": 2561
    },
    {
      "epoch": 1.5631103515625e-05,
      "step": 2561,
      "training_step_time": 0.47712254524230957
    },
    {
      "epoch": 1.563720703125e-05,
      "model_forward_time": 0.11457204818725586,
      "step": 2562
    },
    {
      "epoch": 1.563720703125e-05,
      "step": 2562,
      "training_step_time": 0.44318509101867676
    },
    {
      "epoch": 1.5643310546875e-05,
      "model_forward_time": 0.11399364471435547,
      "step": 2563
    },
    {
      "epoch": 1.5643310546875e-05,
      "step": 2563,
      "training_step_time": 0.4279031753540039
    },
    {
      "epoch": 1.56494140625e-05,
      "model_forward_time": 0.1136939525604248,
      "step": 2564
    },
    {
      "epoch": 1.56494140625e-05,
      "step": 2564,
      "training_step_time": 0.46425390243530273
    },
    {
      "epoch": 1.5655517578125e-05,
      "model_forward_time": 0.11437821388244629,
      "step": 2565
    },
    {
      "epoch": 1.5655517578125e-05,
      "step": 2565,
      "training_step_time": 0.4522855281829834
    },
    {
      "epoch": 1.566162109375e-05,
      "model_forward_time": 0.11497950553894043,
      "step": 2566
    },
    {
      "epoch": 1.566162109375e-05,
      "step": 2566,
      "training_step_time": 0.5077369213104248
    },
    {
      "epoch": 1.5667724609375e-05,
      "model_forward_time": 0.11401128768920898,
      "step": 2567
    },
    {
      "epoch": 1.5667724609375e-05,
      "step": 2567,
      "training_step_time": 0.39718055725097656
    },
    {
      "epoch": 1.5673828125e-05,
      "model_forward_time": 0.11440587043762207,
      "step": 2568
    },
    {
      "epoch": 1.5673828125e-05,
      "step": 2568,
      "training_step_time": 0.3916466236114502
    },
    {
      "epoch": 1.5679931640625e-05,
      "model_forward_time": 0.11512970924377441,
      "step": 2569
    },
    {
      "epoch": 1.5679931640625e-05,
      "step": 2569,
      "training_step_time": 0.38515400886535645
    },
    {
      "epoch": 1.568603515625e-05,
      "grad_norm": 0.46948590874671936,
      "learning_rate": 8.566666666666667e-05,
      "loss": 0.1372,
      "step": 2570
    },
    {
      "epoch": 1.568603515625e-05,
      "model_forward_time": 0.114593505859375,
      "step": 2570
    },
    {
      "epoch": 1.568603515625e-05,
      "step": 2570,
      "training_step_time": 0.38887810707092285
    },
    {
      "epoch": 1.5692138671875e-05,
      "model_forward_time": 0.11502289772033691,
      "step": 2571
    },
    {
      "epoch": 1.5692138671875e-05,
      "step": 2571,
      "training_step_time": 0.3997342586517334
    },
    {
      "epoch": 1.56982421875e-05,
      "model_forward_time": 0.11515569686889648,
      "step": 2572
    },
    {
      "epoch": 1.56982421875e-05,
      "step": 2572,
      "training_step_time": 0.7262363433837891
    },
    {
      "epoch": 1.5704345703125e-05,
      "model_forward_time": 0.11461734771728516,
      "step": 2573
    },
    {
      "epoch": 1.5704345703125e-05,
      "step": 2573,
      "training_step_time": 0.3838200569152832
    },
    {
      "epoch": 1.571044921875e-05,
      "model_forward_time": 0.11412811279296875,
      "step": 2574
    },
    {
      "epoch": 1.571044921875e-05,
      "step": 2574,
      "training_step_time": 0.403200626373291
    },
    {
      "epoch": 1.5716552734375e-05,
      "model_forward_time": 0.11483478546142578,
      "step": 2575
    },
    {
      "epoch": 1.5716552734375e-05,
      "step": 2575,
      "training_step_time": 0.39911937713623047
    },
    {
      "epoch": 1.572265625e-05,
      "model_forward_time": 0.11458754539489746,
      "step": 2576
    },
    {
      "epoch": 1.572265625e-05,
      "step": 2576,
      "training_step_time": 0.42894816398620605
    },
    {
      "epoch": 1.5728759765625e-05,
      "model_forward_time": 0.11497306823730469,
      "step": 2577
    },
    {
      "epoch": 1.5728759765625e-05,
      "step": 2577,
      "training_step_time": 0.422285795211792
    },
    {
      "epoch": 1.573486328125e-05,
      "model_forward_time": 0.11600399017333984,
      "step": 2578
    },
    {
      "epoch": 1.573486328125e-05,
      "step": 2578,
      "training_step_time": 0.7090420722961426
    },
    {
      "epoch": 1.5740966796875e-05,
      "model_forward_time": 0.11450839042663574,
      "step": 2579
    },
    {
      "epoch": 1.5740966796875e-05,
      "step": 2579,
      "training_step_time": 0.42789745330810547
    },
    {
      "epoch": 1.57470703125e-05,
      "grad_norm": 0.43274304270744324,
      "learning_rate": 8.6e-05,
      "loss": 0.1346,
      "step": 2580
    },
    {
      "epoch": 1.57470703125e-05,
      "model_forward_time": 0.11386394500732422,
      "step": 2580
    },
    {
      "epoch": 1.57470703125e-05,
      "step": 2580,
      "training_step_time": 0.4493260383605957
    },
    {
      "epoch": 1.5753173828125e-05,
      "model_forward_time": 0.11451077461242676,
      "step": 2581
    },
    {
      "epoch": 1.5753173828125e-05,
      "step": 2581,
      "training_step_time": 0.38722729682922363
    },
    {
      "epoch": 1.575927734375e-05,
      "model_forward_time": 0.11444830894470215,
      "step": 2582
    },
    {
      "epoch": 1.575927734375e-05,
      "step": 2582,
      "training_step_time": 0.3884875774383545
    },
    {
      "epoch": 1.5765380859375e-05,
      "model_forward_time": 0.11436343193054199,
      "step": 2583
    },
    {
      "epoch": 1.5765380859375e-05,
      "step": 2583,
      "training_step_time": 0.3939378261566162
    },
    {
      "epoch": 1.5771484375e-05,
      "model_forward_time": 0.11557459831237793,
      "step": 2584
    },
    {
      "epoch": 1.5771484375e-05,
      "step": 2584,
      "training_step_time": 0.8153862953186035
    },
    {
      "epoch": 1.5777587890625e-05,
      "model_forward_time": 0.11458921432495117,
      "step": 2585
    },
    {
      "epoch": 1.5777587890625e-05,
      "step": 2585,
      "training_step_time": 0.37940454483032227
    },
    {
      "epoch": 1.578369140625e-05,
      "model_forward_time": 0.11441993713378906,
      "step": 2586
    },
    {
      "epoch": 1.578369140625e-05,
      "step": 2586,
      "training_step_time": 0.3857266902923584
    },
    {
      "epoch": 1.5789794921875e-05,
      "model_forward_time": 0.11427640914916992,
      "step": 2587
    },
    {
      "epoch": 1.5789794921875e-05,
      "step": 2587,
      "training_step_time": 0.3899359703063965
    },
    {
      "epoch": 1.57958984375e-05,
      "model_forward_time": 0.11437869071960449,
      "step": 2588
    },
    {
      "epoch": 1.57958984375e-05,
      "step": 2588,
      "training_step_time": 0.4115030765533447
    },
    {
      "epoch": 1.5802001953125e-05,
      "model_forward_time": 0.11464166641235352,
      "step": 2589
    },
    {
      "epoch": 1.5802001953125e-05,
      "step": 2589,
      "training_step_time": 0.44129133224487305
    },
    {
      "epoch": 1.580810546875e-05,
      "grad_norm": 0.4241025149822235,
      "learning_rate": 8.633333333333334e-05,
      "loss": 0.1357,
      "step": 2590
    },
    {
      "epoch": 1.580810546875e-05,
      "model_forward_time": 0.11477422714233398,
      "step": 2590
    },
    {
      "epoch": 1.580810546875e-05,
      "step": 2590,
      "training_step_time": 0.7477941513061523
    },
    {
      "epoch": 1.5814208984375e-05,
      "model_forward_time": 0.11510753631591797,
      "step": 2591
    },
    {
      "epoch": 1.5814208984375e-05,
      "step": 2591,
      "training_step_time": 0.4357781410217285
    },
    {
      "epoch": 1.58203125e-05,
      "model_forward_time": 0.11473488807678223,
      "step": 2592
    },
    {
      "epoch": 1.58203125e-05,
      "step": 2592,
      "training_step_time": 0.41365838050842285
    },
    {
      "epoch": 1.5826416015625e-05,
      "model_forward_time": 0.11496257781982422,
      "step": 2593
    },
    {
      "epoch": 1.5826416015625e-05,
      "step": 2593,
      "training_step_time": 0.40805983543395996
    },
    {
      "epoch": 1.583251953125e-05,
      "model_forward_time": 0.11491823196411133,
      "step": 2594
    },
    {
      "epoch": 1.583251953125e-05,
      "step": 2594,
      "training_step_time": 0.3938024044036865
    },
    {
      "epoch": 1.5838623046875e-05,
      "model_forward_time": 0.11450552940368652,
      "step": 2595
    },
    {
      "epoch": 1.5838623046875e-05,
      "step": 2595,
      "training_step_time": 0.3870561122894287
    },
    {
      "epoch": 1.58447265625e-05,
      "model_forward_time": 0.11680150032043457,
      "step": 2596
    },
    {
      "epoch": 1.58447265625e-05,
      "step": 2596,
      "training_step_time": 0.8603558540344238
    },
    {
      "epoch": 1.5850830078125e-05,
      "model_forward_time": 0.11371707916259766,
      "step": 2597
    },
    {
      "epoch": 1.5850830078125e-05,
      "step": 2597,
      "training_step_time": 0.3957788944244385
    },
    {
      "epoch": 1.585693359375e-05,
      "model_forward_time": 0.11443328857421875,
      "step": 2598
    },
    {
      "epoch": 1.585693359375e-05,
      "step": 2598,
      "training_step_time": 0.38593530654907227
    },
    {
      "epoch": 1.5863037109375e-05,
      "model_forward_time": 0.11389446258544922,
      "step": 2599
    },
    {
      "epoch": 1.5863037109375e-05,
      "step": 2599,
      "training_step_time": 0.38429975509643555
    },
    {
      "epoch": 1.5869140625e-05,
      "grad_norm": 0.6060633063316345,
      "learning_rate": 8.666666666666667e-05,
      "loss": 0.1357,
      "step": 2600
    },
    {
      "epoch": 1.5869140625e-05,
      "model_forward_time": 0.11417913436889648,
      "step": 2600
    },
    {
      "epoch": 1.5869140625e-05,
      "step": 2600,
      "training_step_time": 0.39221620559692383
    },
    {
      "epoch": 1.5875244140625e-05,
      "model_forward_time": 0.11403346061706543,
      "step": 2601
    },
    {
      "epoch": 1.5875244140625e-05,
      "step": 2601,
      "training_step_time": 0.4781057834625244
    },
    {
      "epoch": 1.588134765625e-05,
      "model_forward_time": 0.11470198631286621,
      "step": 2602
    },
    {
      "epoch": 1.588134765625e-05,
      "step": 2602,
      "training_step_time": 0.8731112480163574
    },
    {
      "epoch": 1.5887451171875e-05,
      "model_forward_time": 0.11435484886169434,
      "step": 2603
    },
    {
      "epoch": 1.5887451171875e-05,
      "step": 2603,
      "training_step_time": 0.41037940979003906
    },
    {
      "epoch": 1.58935546875e-05,
      "model_forward_time": 0.11600160598754883,
      "step": 2604
    },
    {
      "epoch": 1.58935546875e-05,
      "step": 2604,
      "training_step_time": 0.44748854637145996
    },
    {
      "epoch": 1.5899658203125e-05,
      "model_forward_time": 0.11368346214294434,
      "step": 2605
    },
    {
      "epoch": 1.5899658203125e-05,
      "step": 2605,
      "training_step_time": 0.383929967880249
    },
    {
      "epoch": 1.590576171875e-05,
      "model_forward_time": 0.11447453498840332,
      "step": 2606
    },
    {
      "epoch": 1.590576171875e-05,
      "step": 2606,
      "training_step_time": 0.41101765632629395
    },
    {
      "epoch": 1.5911865234375e-05,
      "model_forward_time": 0.11474847793579102,
      "step": 2607
    },
    {
      "epoch": 1.5911865234375e-05,
      "step": 2607,
      "training_step_time": 0.3841562271118164
    },
    {
      "epoch": 1.591796875e-05,
      "model_forward_time": 0.11593937873840332,
      "step": 2608
    },
    {
      "epoch": 1.591796875e-05,
      "step": 2608,
      "training_step_time": 0.44014835357666016
    },
    {
      "epoch": 1.5924072265625e-05,
      "model_forward_time": 0.11576247215270996,
      "step": 2609
    },
    {
      "epoch": 1.5924072265625e-05,
      "step": 2609,
      "training_step_time": 0.3883528709411621
    },
    {
      "epoch": 1.593017578125e-05,
      "grad_norm": 0.408560574054718,
      "learning_rate": 8.7e-05,
      "loss": 0.1255,
      "step": 2610
    },
    {
      "epoch": 1.593017578125e-05,
      "model_forward_time": 0.11578726768493652,
      "step": 2610
    },
    {
      "epoch": 1.593017578125e-05,
      "step": 2610,
      "training_step_time": 0.39223575592041016
    },
    {
      "epoch": 1.5936279296875e-05,
      "model_forward_time": 0.11503887176513672,
      "step": 2611
    },
    {
      "epoch": 1.5936279296875e-05,
      "step": 2611,
      "training_step_time": 0.394442081451416
    },
    {
      "epoch": 1.59423828125e-05,
      "model_forward_time": 0.11520195007324219,
      "step": 2612
    },
    {
      "epoch": 1.59423828125e-05,
      "step": 2612,
      "training_step_time": 0.4023096561431885
    },
    {
      "epoch": 1.5948486328125e-05,
      "model_forward_time": 0.11562466621398926,
      "step": 2613
    },
    {
      "epoch": 1.5948486328125e-05,
      "step": 2613,
      "training_step_time": 0.4019920825958252
    },
    {
      "epoch": 1.595458984375e-05,
      "model_forward_time": 0.11494040489196777,
      "step": 2614
    },
    {
      "epoch": 1.595458984375e-05,
      "step": 2614,
      "training_step_time": 1.2806072235107422
    },
    {
      "epoch": 1.5960693359375e-05,
      "model_forward_time": 0.11393523216247559,
      "step": 2615
    },
    {
      "epoch": 1.5960693359375e-05,
      "step": 2615,
      "training_step_time": 0.4116637706756592
    },
    {
      "epoch": 1.5966796875e-05,
      "model_forward_time": 0.1144406795501709,
      "step": 2616
    },
    {
      "epoch": 1.5966796875e-05,
      "step": 2616,
      "training_step_time": 0.44283223152160645
    },
    {
      "epoch": 1.5972900390625e-05,
      "model_forward_time": 0.11452102661132812,
      "step": 2617
    },
    {
      "epoch": 1.5972900390625e-05,
      "step": 2617,
      "training_step_time": 0.469620943069458
    },
    {
      "epoch": 1.597900390625e-05,
      "model_forward_time": 0.11445260047912598,
      "step": 2618
    },
    {
      "epoch": 1.597900390625e-05,
      "step": 2618,
      "training_step_time": 0.41037869453430176
    },
    {
      "epoch": 1.5985107421875e-05,
      "model_forward_time": 0.11396956443786621,
      "step": 2619
    },
    {
      "epoch": 1.5985107421875e-05,
      "step": 2619,
      "training_step_time": 0.39092040061950684
    },
    {
      "epoch": 1.59912109375e-05,
      "grad_norm": 0.385897159576416,
      "learning_rate": 8.733333333333333e-05,
      "loss": 0.1333,
      "step": 2620
    },
    {
      "epoch": 1.59912109375e-05,
      "model_forward_time": 0.11437654495239258,
      "step": 2620
    },
    {
      "epoch": 1.59912109375e-05,
      "step": 2620,
      "training_step_time": 0.38951730728149414
    },
    {
      "epoch": 1.5997314453125e-05,
      "model_forward_time": 0.11487388610839844,
      "step": 2621
    },
    {
      "epoch": 1.5997314453125e-05,
      "step": 2621,
      "training_step_time": 0.3868579864501953
    },
    {
      "epoch": 1.600341796875e-05,
      "model_forward_time": 0.11488866806030273,
      "step": 2622
    },
    {
      "epoch": 1.600341796875e-05,
      "step": 2622,
      "training_step_time": 0.3868086338043213
    },
    {
      "epoch": 1.6009521484375e-05,
      "model_forward_time": 0.11475014686584473,
      "step": 2623
    },
    {
      "epoch": 1.6009521484375e-05,
      "step": 2623,
      "training_step_time": 0.4036099910736084
    },
    {
      "epoch": 1.6015625e-05,
      "model_forward_time": 0.1156003475189209,
      "step": 2624
    },
    {
      "epoch": 1.6015625e-05,
      "step": 2624,
      "training_step_time": 0.39214634895324707
    },
    {
      "epoch": 1.6021728515625e-05,
      "model_forward_time": 0.11558151245117188,
      "step": 2625
    },
    {
      "epoch": 1.6021728515625e-05,
      "step": 2625,
      "training_step_time": 0.3953263759613037
    },
    {
      "epoch": 1.602783203125e-05,
      "model_forward_time": 0.11532330513000488,
      "step": 2626
    },
    {
      "epoch": 1.602783203125e-05,
      "step": 2626,
      "training_step_time": 1.0613603591918945
    },
    {
      "epoch": 1.6033935546875e-05,
      "model_forward_time": 0.11510205268859863,
      "step": 2627
    },
    {
      "epoch": 1.6033935546875e-05,
      "step": 2627,
      "training_step_time": 0.38835644721984863
    },
    {
      "epoch": 1.60400390625e-05,
      "model_forward_time": 0.11452174186706543,
      "step": 2628
    },
    {
      "epoch": 1.60400390625e-05,
      "step": 2628,
      "training_step_time": 0.4390099048614502
    },
    {
      "epoch": 1.6046142578125e-05,
      "model_forward_time": 0.11580228805541992,
      "step": 2629
    },
    {
      "epoch": 1.6046142578125e-05,
      "step": 2629,
      "training_step_time": 0.4337961673736572
    },
    {
      "epoch": 1.605224609375e-05,
      "grad_norm": 0.3554571270942688,
      "learning_rate": 8.766666666666668e-05,
      "loss": 0.124,
      "step": 2630
    },
    {
      "epoch": 1.605224609375e-05,
      "model_forward_time": 0.11441373825073242,
      "step": 2630
    },
    {
      "epoch": 1.605224609375e-05,
      "step": 2630,
      "training_step_time": 0.4175848960876465
    },
    {
      "epoch": 1.6058349609375e-05,
      "model_forward_time": 0.11439061164855957,
      "step": 2631
    },
    {
      "epoch": 1.6058349609375e-05,
      "step": 2631,
      "training_step_time": 0.42563605308532715
    },
    {
      "epoch": 1.6064453125e-05,
      "model_forward_time": 0.11551547050476074,
      "step": 2632
    },
    {
      "epoch": 1.6064453125e-05,
      "step": 2632,
      "training_step_time": 0.554694652557373
    },
    {
      "epoch": 1.6070556640625e-05,
      "model_forward_time": 0.1144251823425293,
      "step": 2633
    },
    {
      "epoch": 1.6070556640625e-05,
      "step": 2633,
      "training_step_time": 0.3865995407104492
    },
    {
      "epoch": 1.607666015625e-05,
      "model_forward_time": 0.11471939086914062,
      "step": 2634
    },
    {
      "epoch": 1.607666015625e-05,
      "step": 2634,
      "training_step_time": 0.3877134323120117
    },
    {
      "epoch": 1.6082763671875e-05,
      "model_forward_time": 0.11477494239807129,
      "step": 2635
    },
    {
      "epoch": 1.6082763671875e-05,
      "step": 2635,
      "training_step_time": 0.39534664154052734
    },
    {
      "epoch": 1.60888671875e-05,
      "model_forward_time": 0.11522221565246582,
      "step": 2636
    },
    {
      "epoch": 1.60888671875e-05,
      "step": 2636,
      "training_step_time": 0.38967275619506836
    },
    {
      "epoch": 1.6094970703125e-05,
      "model_forward_time": 0.11578035354614258,
      "step": 2637
    },
    {
      "epoch": 1.6094970703125e-05,
      "step": 2637,
      "training_step_time": 0.39264607429504395
    },
    {
      "epoch": 1.610107421875e-05,
      "model_forward_time": 0.11555647850036621,
      "step": 2638
    },
    {
      "epoch": 1.610107421875e-05,
      "step": 2638,
      "training_step_time": 1.2416317462921143
    },
    {
      "epoch": 1.6107177734375e-05,
      "model_forward_time": 0.11525654792785645,
      "step": 2639
    },
    {
      "epoch": 1.6107177734375e-05,
      "step": 2639,
      "training_step_time": 0.3915531635284424
    },
    {
      "epoch": 1.611328125e-05,
      "grad_norm": 0.37573689222335815,
      "learning_rate": 8.800000000000001e-05,
      "loss": 0.1173,
      "step": 2640
    },
    {
      "epoch": 1.611328125e-05,
      "model_forward_time": 0.11404681205749512,
      "step": 2640
    },
    {
      "epoch": 1.611328125e-05,
      "step": 2640,
      "training_step_time": 0.4230775833129883
    },
    {
      "epoch": 1.6119384765625e-05,
      "model_forward_time": 0.11425662040710449,
      "step": 2641
    },
    {
      "epoch": 1.6119384765625e-05,
      "step": 2641,
      "training_step_time": 0.4390738010406494
    },
    {
      "epoch": 1.612548828125e-05,
      "model_forward_time": 0.11382150650024414,
      "step": 2642
    },
    {
      "epoch": 1.612548828125e-05,
      "step": 2642,
      "training_step_time": 0.47719383239746094
    },
    {
      "epoch": 1.6131591796875e-05,
      "model_forward_time": 0.11546850204467773,
      "step": 2643
    },
    {
      "epoch": 1.6131591796875e-05,
      "step": 2643,
      "training_step_time": 0.4297664165496826
    },
    {
      "epoch": 1.61376953125e-05,
      "model_forward_time": 0.11466646194458008,
      "step": 2644
    },
    {
      "epoch": 1.61376953125e-05,
      "step": 2644,
      "training_step_time": 0.7364621162414551
    },
    {
      "epoch": 1.6143798828125e-05,
      "model_forward_time": 0.11421084403991699,
      "step": 2645
    },
    {
      "epoch": 1.6143798828125e-05,
      "step": 2645,
      "training_step_time": 0.42518138885498047
    },
    {
      "epoch": 1.614990234375e-05,
      "model_forward_time": 0.11480450630187988,
      "step": 2646
    },
    {
      "epoch": 1.614990234375e-05,
      "step": 2646,
      "training_step_time": 0.37775540351867676
    },
    {
      "epoch": 1.6156005859375e-05,
      "model_forward_time": 0.11430120468139648,
      "step": 2647
    },
    {
      "epoch": 1.6156005859375e-05,
      "step": 2647,
      "training_step_time": 0.38683629035949707
    },
    {
      "epoch": 1.6162109375e-05,
      "model_forward_time": 0.1143636703491211,
      "step": 2648
    },
    {
      "epoch": 1.6162109375e-05,
      "step": 2648,
      "training_step_time": 0.3907032012939453
    },
    {
      "epoch": 1.6168212890625e-05,
      "model_forward_time": 0.11463546752929688,
      "step": 2649
    },
    {
      "epoch": 1.6168212890625e-05,
      "step": 2649,
      "training_step_time": 0.39087986946105957
    },
    {
      "epoch": 1.617431640625e-05,
      "grad_norm": 0.4416767358779907,
      "learning_rate": 8.833333333333333e-05,
      "loss": 0.1331,
      "step": 2650
    },
    {
      "epoch": 1.617431640625e-05,
      "model_forward_time": 0.1150665283203125,
      "step": 2650
    },
    {
      "epoch": 1.617431640625e-05,
      "step": 2650,
      "training_step_time": 0.7224154472351074
    },
    {
      "epoch": 1.6180419921875e-05,
      "model_forward_time": 0.1147160530090332,
      "step": 2651
    },
    {
      "epoch": 1.6180419921875e-05,
      "step": 2651,
      "training_step_time": 0.38269591331481934
    },
    {
      "epoch": 1.61865234375e-05,
      "model_forward_time": 0.11514139175415039,
      "step": 2652
    },
    {
      "epoch": 1.61865234375e-05,
      "step": 2652,
      "training_step_time": 0.4069671630859375
    },
    {
      "epoch": 1.6192626953125e-05,
      "model_forward_time": 0.11465144157409668,
      "step": 2653
    },
    {
      "epoch": 1.6192626953125e-05,
      "step": 2653,
      "training_step_time": 0.4203767776489258
    },
    {
      "epoch": 1.619873046875e-05,
      "model_forward_time": 0.11492300033569336,
      "step": 2654
    },
    {
      "epoch": 1.619873046875e-05,
      "step": 2654,
      "training_step_time": 0.4412257671356201
    },
    {
      "epoch": 1.6204833984375e-05,
      "model_forward_time": 0.11491942405700684,
      "step": 2655
    },
    {
      "epoch": 1.6204833984375e-05,
      "step": 2655,
      "training_step_time": 0.44057416915893555
    },
    {
      "epoch": 1.62109375e-05,
      "model_forward_time": 0.11565613746643066,
      "step": 2656
    },
    {
      "epoch": 1.62109375e-05,
      "step": 2656,
      "training_step_time": 0.5913152694702148
    },
    {
      "epoch": 1.6217041015625e-05,
      "model_forward_time": 0.1155545711517334,
      "step": 2657
    },
    {
      "epoch": 1.6217041015625e-05,
      "step": 2657,
      "training_step_time": 0.40770649909973145
    },
    {
      "epoch": 1.622314453125e-05,
      "model_forward_time": 0.11486077308654785,
      "step": 2658
    },
    {
      "epoch": 1.622314453125e-05,
      "step": 2658,
      "training_step_time": 0.4600396156311035
    },
    {
      "epoch": 1.6229248046875e-05,
      "model_forward_time": 0.11523938179016113,
      "step": 2659
    },
    {
      "epoch": 1.6229248046875e-05,
      "step": 2659,
      "training_step_time": 0.4970066547393799
    },
    {
      "epoch": 1.62353515625e-05,
      "grad_norm": 0.40430817008018494,
      "learning_rate": 8.866666666666668e-05,
      "loss": 0.1399,
      "step": 2660
    },
    {
      "epoch": 1.62353515625e-05,
      "model_forward_time": 0.11437582969665527,
      "step": 2660
    },
    {
      "epoch": 1.62353515625e-05,
      "step": 2660,
      "training_step_time": 0.39228081703186035
    },
    {
      "epoch": 1.6241455078125e-05,
      "model_forward_time": 0.11504602432250977,
      "step": 2661
    },
    {
      "epoch": 1.6241455078125e-05,
      "step": 2661,
      "training_step_time": 0.3925514221191406
    },
    {
      "epoch": 1.624755859375e-05,
      "model_forward_time": 0.11500310897827148,
      "step": 2662
    },
    {
      "epoch": 1.624755859375e-05,
      "step": 2662,
      "training_step_time": 0.46225881576538086
    },
    {
      "epoch": 1.6253662109375e-05,
      "model_forward_time": 0.11510848999023438,
      "step": 2663
    },
    {
      "epoch": 1.6253662109375e-05,
      "step": 2663,
      "training_step_time": 0.403656005859375
    },
    {
      "epoch": 1.6259765625e-05,
      "model_forward_time": 0.11543107032775879,
      "step": 2664
    },
    {
      "epoch": 1.6259765625e-05,
      "step": 2664,
      "training_step_time": 0.39597010612487793
    },
    {
      "epoch": 1.6265869140625e-05,
      "model_forward_time": 0.11464786529541016,
      "step": 2665
    },
    {
      "epoch": 1.6265869140625e-05,
      "step": 2665,
      "training_step_time": 0.4097907543182373
    },
    {
      "epoch": 1.627197265625e-05,
      "model_forward_time": 0.11493587493896484,
      "step": 2666
    },
    {
      "epoch": 1.627197265625e-05,
      "step": 2666,
      "training_step_time": 0.4474194049835205
    },
    {
      "epoch": 1.6278076171875e-05,
      "model_forward_time": 0.11526751518249512,
      "step": 2667
    },
    {
      "epoch": 1.6278076171875e-05,
      "step": 2667,
      "training_step_time": 0.4849982261657715
    },
    {
      "epoch": 1.62841796875e-05,
      "model_forward_time": 0.11503839492797852,
      "step": 2668
    },
    {
      "epoch": 1.62841796875e-05,
      "step": 2668,
      "training_step_time": 1.1407418251037598
    },
    {
      "epoch": 1.6290283203125e-05,
      "model_forward_time": 0.11458158493041992,
      "step": 2669
    },
    {
      "epoch": 1.6290283203125e-05,
      "step": 2669,
      "training_step_time": 0.3981814384460449
    },
    {
      "epoch": 1.629638671875e-05,
      "grad_norm": 0.4977336823940277,
      "learning_rate": 8.900000000000001e-05,
      "loss": 0.1298,
      "step": 2670
    },
    {
      "epoch": 1.629638671875e-05,
      "model_forward_time": 0.11458563804626465,
      "step": 2670
    },
    {
      "epoch": 1.629638671875e-05,
      "step": 2670,
      "training_step_time": 0.4286963939666748
    },
    {
      "epoch": 1.6302490234375e-05,
      "model_forward_time": 0.11438846588134766,
      "step": 2671
    },
    {
      "epoch": 1.6302490234375e-05,
      "step": 2671,
      "training_step_time": 0.42696380615234375
    },
    {
      "epoch": 1.630859375e-05,
      "model_forward_time": 0.1139981746673584,
      "step": 2672
    },
    {
      "epoch": 1.630859375e-05,
      "step": 2672,
      "training_step_time": 0.4564480781555176
    },
    {
      "epoch": 1.6314697265625e-05,
      "model_forward_time": 0.11437368392944336,
      "step": 2673
    },
    {
      "epoch": 1.6314697265625e-05,
      "step": 2673,
      "training_step_time": 0.3900458812713623
    },
    {
      "epoch": 1.632080078125e-05,
      "model_forward_time": 0.11539936065673828,
      "step": 2674
    },
    {
      "epoch": 1.632080078125e-05,
      "step": 2674,
      "training_step_time": 0.7208232879638672
    },
    {
      "epoch": 1.6326904296875e-05,
      "model_forward_time": 0.11438965797424316,
      "step": 2675
    },
    {
      "epoch": 1.6326904296875e-05,
      "step": 2675,
      "training_step_time": 0.38469839096069336
    },
    {
      "epoch": 1.63330078125e-05,
      "model_forward_time": 0.11411476135253906,
      "step": 2676
    },
    {
      "epoch": 1.63330078125e-05,
      "step": 2676,
      "training_step_time": 0.3938136100769043
    },
    {
      "epoch": 1.6339111328125e-05,
      "model_forward_time": 0.11466312408447266,
      "step": 2677
    },
    {
      "epoch": 1.6339111328125e-05,
      "step": 2677,
      "training_step_time": 0.40439629554748535
    },
    {
      "epoch": 1.634521484375e-05,
      "model_forward_time": 0.1149754524230957,
      "step": 2678
    },
    {
      "epoch": 1.634521484375e-05,
      "step": 2678,
      "training_step_time": 0.3929121494293213
    },
    {
      "epoch": 1.6351318359375e-05,
      "model_forward_time": 0.11458468437194824,
      "step": 2679
    },
    {
      "epoch": 1.6351318359375e-05,
      "step": 2679,
      "training_step_time": 0.40309762954711914
    },
    {
      "epoch": 1.6357421875e-05,
      "grad_norm": 0.4260495603084564,
      "learning_rate": 8.933333333333334e-05,
      "loss": 0.1265,
      "step": 2680
    },
    {
      "epoch": 1.6357421875e-05,
      "model_forward_time": 0.11478018760681152,
      "step": 2680
    },
    {
      "epoch": 1.6357421875e-05,
      "step": 2680,
      "training_step_time": 0.7957990169525146
    },
    {
      "epoch": 1.6363525390625e-05,
      "model_forward_time": 0.11464977264404297,
      "step": 2681
    },
    {
      "epoch": 1.6363525390625e-05,
      "step": 2681,
      "training_step_time": 0.4790229797363281
    },
    {
      "epoch": 1.636962890625e-05,
      "model_forward_time": 0.11483168601989746,
      "step": 2682
    },
    {
      "epoch": 1.636962890625e-05,
      "step": 2682,
      "training_step_time": 0.40756964683532715
    },
    {
      "epoch": 1.6375732421875e-05,
      "model_forward_time": 0.1152336597442627,
      "step": 2683
    },
    {
      "epoch": 1.6375732421875e-05,
      "step": 2683,
      "training_step_time": 0.4001810550689697
    },
    {
      "epoch": 1.63818359375e-05,
      "model_forward_time": 0.11510181427001953,
      "step": 2684
    },
    {
      "epoch": 1.63818359375e-05,
      "step": 2684,
      "training_step_time": 0.3962719440460205
    },
    {
      "epoch": 1.6387939453125e-05,
      "model_forward_time": 0.11458921432495117,
      "step": 2685
    },
    {
      "epoch": 1.6387939453125e-05,
      "step": 2685,
      "training_step_time": 0.48335766792297363
    },
    {
      "epoch": 1.639404296875e-05,
      "model_forward_time": 0.11486530303955078,
      "step": 2686
    },
    {
      "epoch": 1.639404296875e-05,
      "step": 2686,
      "training_step_time": 0.671576976776123
    },
    {
      "epoch": 1.6400146484375e-05,
      "model_forward_time": 0.11463117599487305,
      "step": 2687
    },
    {
      "epoch": 1.6400146484375e-05,
      "step": 2687,
      "training_step_time": 0.3887345790863037
    },
    {
      "epoch": 1.640625e-05,
      "model_forward_time": 0.11441802978515625,
      "step": 2688
    },
    {
      "epoch": 1.640625e-05,
      "step": 2688,
      "training_step_time": 0.39460015296936035
    },
    {
      "epoch": 1.6412353515625e-05,
      "model_forward_time": 0.11430931091308594,
      "step": 2689
    },
    {
      "epoch": 1.6412353515625e-05,
      "step": 2689,
      "training_step_time": 0.3940889835357666
    },
    {
      "epoch": 1.641845703125e-05,
      "grad_norm": 0.6012243032455444,
      "learning_rate": 8.966666666666666e-05,
      "loss": 0.1251,
      "step": 2690
    },
    {
      "epoch": 1.641845703125e-05,
      "model_forward_time": 0.11474251747131348,
      "step": 2690
    },
    {
      "epoch": 1.641845703125e-05,
      "step": 2690,
      "training_step_time": 0.4352593421936035
    },
    {
      "epoch": 1.6424560546875e-05,
      "model_forward_time": 0.11440134048461914,
      "step": 2691
    },
    {
      "epoch": 1.6424560546875e-05,
      "step": 2691,
      "training_step_time": 0.38600921630859375
    },
    {
      "epoch": 1.64306640625e-05,
      "model_forward_time": 0.1151418685913086,
      "step": 2692
    },
    {
      "epoch": 1.64306640625e-05,
      "step": 2692,
      "training_step_time": 1.0021309852600098
    },
    {
      "epoch": 1.6436767578125e-05,
      "model_forward_time": 0.11443185806274414,
      "step": 2693
    },
    {
      "epoch": 1.6436767578125e-05,
      "step": 2693,
      "training_step_time": 0.44441819190979004
    },
    {
      "epoch": 1.644287109375e-05,
      "model_forward_time": 0.11452317237854004,
      "step": 2694
    },
    {
      "epoch": 1.644287109375e-05,
      "step": 2694,
      "training_step_time": 0.3819005489349365
    },
    {
      "epoch": 1.6448974609375e-05,
      "model_forward_time": 0.1137545108795166,
      "step": 2695
    },
    {
      "epoch": 1.6448974609375e-05,
      "step": 2695,
      "training_step_time": 0.45716309547424316
    },
    {
      "epoch": 1.6455078125e-05,
      "model_forward_time": 0.11382317543029785,
      "step": 2696
    },
    {
      "epoch": 1.6455078125e-05,
      "step": 2696,
      "training_step_time": 0.4378030300140381
    },
    {
      "epoch": 1.6461181640625e-05,
      "model_forward_time": 0.11440348625183105,
      "step": 2697
    },
    {
      "epoch": 1.6461181640625e-05,
      "step": 2697,
      "training_step_time": 0.3894035816192627
    },
    {
      "epoch": 1.646728515625e-05,
      "model_forward_time": 0.11469864845275879,
      "step": 2698
    },
    {
      "epoch": 1.646728515625e-05,
      "step": 2698,
      "training_step_time": 0.4773223400115967
    },
    {
      "epoch": 1.6473388671875e-05,
      "model_forward_time": 0.11462879180908203,
      "step": 2699
    },
    {
      "epoch": 1.6473388671875e-05,
      "step": 2699,
      "training_step_time": 0.3857724666595459
    },
    {
      "epoch": 1.64794921875e-05,
      "grad_norm": 0.40862175822257996,
      "learning_rate": 9e-05,
      "loss": 0.1335,
      "step": 2700
    },
    {
      "epoch": 1.64794921875e-05,
      "model_forward_time": 0.114593505859375,
      "step": 2700
    },
    {
      "epoch": 1.64794921875e-05,
      "step": 2700,
      "training_step_time": 0.3771016597747803
    },
    {
      "epoch": 1.6485595703125e-05,
      "model_forward_time": 0.11460256576538086,
      "step": 2701
    },
    {
      "epoch": 1.6485595703125e-05,
      "step": 2701,
      "training_step_time": 0.3884916305541992
    },
    {
      "epoch": 1.649169921875e-05,
      "model_forward_time": 0.11619019508361816,
      "step": 2702
    },
    {
      "epoch": 1.649169921875e-05,
      "step": 2702,
      "training_step_time": 0.4257950782775879
    },
    {
      "epoch": 1.6497802734375e-05,
      "model_forward_time": 0.11536574363708496,
      "step": 2703
    },
    {
      "epoch": 1.6497802734375e-05,
      "step": 2703,
      "training_step_time": 0.4116501808166504
    },
    {
      "epoch": 1.650390625e-05,
      "model_forward_time": 0.1147313117980957,
      "step": 2704
    },
    {
      "epoch": 1.650390625e-05,
      "step": 2704,
      "training_step_time": 0.5225999355316162
    },
    {
      "epoch": 1.6510009765625e-05,
      "model_forward_time": 0.11557769775390625,
      "step": 2705
    },
    {
      "epoch": 1.6510009765625e-05,
      "step": 2705,
      "training_step_time": 0.4158613681793213
    },
    {
      "epoch": 1.651611328125e-05,
      "model_forward_time": 0.11539649963378906,
      "step": 2706
    },
    {
      "epoch": 1.651611328125e-05,
      "step": 2706,
      "training_step_time": 0.420534610748291
    },
    {
      "epoch": 1.6522216796875e-05,
      "model_forward_time": 0.11504268646240234,
      "step": 2707
    },
    {
      "epoch": 1.6522216796875e-05,
      "step": 2707,
      "training_step_time": 0.4609379768371582
    },
    {
      "epoch": 1.65283203125e-05,
      "model_forward_time": 0.11473226547241211,
      "step": 2708
    },
    {
      "epoch": 1.65283203125e-05,
      "step": 2708,
      "training_step_time": 0.4525332450866699
    },
    {
      "epoch": 1.6534423828125e-05,
      "model_forward_time": 0.1153726577758789,
      "step": 2709
    },
    {
      "epoch": 1.6534423828125e-05,
      "step": 2709,
      "training_step_time": 0.39158105850219727
    },
    {
      "epoch": 1.654052734375e-05,
      "grad_norm": 0.5262507796287537,
      "learning_rate": 9.033333333333334e-05,
      "loss": 0.1261,
      "step": 2710
    },
    {
      "epoch": 1.654052734375e-05,
      "model_forward_time": 0.11532044410705566,
      "step": 2710
    },
    {
      "epoch": 1.654052734375e-05,
      "step": 2710,
      "training_step_time": 0.5602507591247559
    },
    {
      "epoch": 1.6546630859375e-05,
      "model_forward_time": 0.11462616920471191,
      "step": 2711
    },
    {
      "epoch": 1.6546630859375e-05,
      "step": 2711,
      "training_step_time": 0.3882317543029785
    },
    {
      "epoch": 1.6552734375e-05,
      "model_forward_time": 0.11450314521789551,
      "step": 2712
    },
    {
      "epoch": 1.6552734375e-05,
      "step": 2712,
      "training_step_time": 0.46105384826660156
    },
    {
      "epoch": 1.6558837890625e-05,
      "model_forward_time": 0.11447024345397949,
      "step": 2713
    },
    {
      "epoch": 1.6558837890625e-05,
      "step": 2713,
      "training_step_time": 0.39041781425476074
    },
    {
      "epoch": 1.656494140625e-05,
      "model_forward_time": 0.11513161659240723,
      "step": 2714
    },
    {
      "epoch": 1.656494140625e-05,
      "step": 2714,
      "training_step_time": 0.39537978172302246
    },
    {
      "epoch": 1.6571044921875e-05,
      "model_forward_time": 0.1146397590637207,
      "step": 2715
    },
    {
      "epoch": 1.6571044921875e-05,
      "step": 2715,
      "training_step_time": 0.4146602153778076
    },
    {
      "epoch": 1.65771484375e-05,
      "model_forward_time": 0.11587929725646973,
      "step": 2716
    },
    {
      "epoch": 1.65771484375e-05,
      "step": 2716,
      "training_step_time": 1.116992473602295
    },
    {
      "epoch": 1.6583251953125e-05,
      "model_forward_time": 0.11452651023864746,
      "step": 2717
    },
    {
      "epoch": 1.6583251953125e-05,
      "step": 2717,
      "training_step_time": 0.38393521308898926
    },
    {
      "epoch": 1.658935546875e-05,
      "model_forward_time": 0.11425232887268066,
      "step": 2718
    },
    {
      "epoch": 1.658935546875e-05,
      "step": 2718,
      "training_step_time": 0.40184497833251953
    },
    {
      "epoch": 1.6595458984375e-05,
      "model_forward_time": 0.11404061317443848,
      "step": 2719
    },
    {
      "epoch": 1.6595458984375e-05,
      "step": 2719,
      "training_step_time": 0.3920471668243408
    },
    {
      "epoch": 1.66015625e-05,
      "grad_norm": 0.27889353036880493,
      "learning_rate": 9.066666666666667e-05,
      "loss": 0.123,
      "step": 2720
    },
    {
      "epoch": 1.66015625e-05,
      "model_forward_time": 0.11406326293945312,
      "step": 2720
    },
    {
      "epoch": 1.66015625e-05,
      "step": 2720,
      "training_step_time": 0.41524171829223633
    },
    {
      "epoch": 1.6607666015625e-05,
      "model_forward_time": 0.11422014236450195,
      "step": 2721
    },
    {
      "epoch": 1.6607666015625e-05,
      "step": 2721,
      "training_step_time": 0.38888120651245117
    },
    {
      "epoch": 1.661376953125e-05,
      "model_forward_time": 0.1149592399597168,
      "step": 2722
    },
    {
      "epoch": 1.661376953125e-05,
      "step": 2722,
      "training_step_time": 0.9285247325897217
    },
    {
      "epoch": 1.6619873046875e-05,
      "model_forward_time": 0.11561751365661621,
      "step": 2723
    },
    {
      "epoch": 1.6619873046875e-05,
      "step": 2723,
      "training_step_time": 0.43143677711486816
    },
    {
      "epoch": 1.66259765625e-05,
      "model_forward_time": 0.1150825023651123,
      "step": 2724
    },
    {
      "epoch": 1.66259765625e-05,
      "step": 2724,
      "training_step_time": 0.46445536613464355
    },
    {
      "epoch": 1.6632080078125e-05,
      "model_forward_time": 0.11538219451904297,
      "step": 2725
    },
    {
      "epoch": 1.6632080078125e-05,
      "step": 2725,
      "training_step_time": 0.4856703281402588
    },
    {
      "epoch": 1.663818359375e-05,
      "model_forward_time": 0.11426925659179688,
      "step": 2726
    },
    {
      "epoch": 1.663818359375e-05,
      "step": 2726,
      "training_step_time": 0.4342515468597412
    },
    {
      "epoch": 1.6644287109375e-05,
      "model_forward_time": 0.11481833457946777,
      "step": 2727
    },
    {
      "epoch": 1.6644287109375e-05,
      "step": 2727,
      "training_step_time": 0.3864872455596924
    },
    {
      "epoch": 1.6650390625e-05,
      "model_forward_time": 0.11390876770019531,
      "step": 2728
    },
    {
      "epoch": 1.6650390625e-05,
      "step": 2728,
      "training_step_time": 0.3867528438568115
    },
    {
      "epoch": 1.6656494140625e-05,
      "model_forward_time": 0.11547183990478516,
      "step": 2729
    },
    {
      "epoch": 1.6656494140625e-05,
      "step": 2729,
      "training_step_time": 0.38897061347961426
    },
    {
      "epoch": 1.666259765625e-05,
      "grad_norm": 0.2934762239456177,
      "learning_rate": 9.1e-05,
      "loss": 0.1262,
      "step": 2730
    },
    {
      "epoch": 1.666259765625e-05,
      "model_forward_time": 0.11444902420043945,
      "step": 2730
    },
    {
      "epoch": 1.666259765625e-05,
      "step": 2730,
      "training_step_time": 0.39243531227111816
    },
    {
      "epoch": 1.6668701171875e-05,
      "model_forward_time": 0.1157069206237793,
      "step": 2731
    },
    {
      "epoch": 1.6668701171875e-05,
      "step": 2731,
      "training_step_time": 0.39208984375
    },
    {
      "epoch": 1.66748046875e-05,
      "model_forward_time": 0.1151590347290039,
      "step": 2732
    },
    {
      "epoch": 1.66748046875e-05,
      "step": 2732,
      "training_step_time": 0.4128742218017578
    },
    {
      "epoch": 1.6680908203125e-05,
      "model_forward_time": 0.11625909805297852,
      "step": 2733
    },
    {
      "epoch": 1.6680908203125e-05,
      "step": 2733,
      "training_step_time": 0.4212534427642822
    },
    {
      "epoch": 1.668701171875e-05,
      "model_forward_time": 0.11529183387756348,
      "step": 2734
    },
    {
      "epoch": 1.668701171875e-05,
      "step": 2734,
      "training_step_time": 0.9287879467010498
    },
    {
      "epoch": 1.6693115234375e-05,
      "model_forward_time": 0.11764812469482422,
      "step": 2735
    },
    {
      "epoch": 1.6693115234375e-05,
      "step": 2735,
      "training_step_time": 0.3872029781341553
    },
    {
      "epoch": 1.669921875e-05,
      "model_forward_time": 0.11511731147766113,
      "step": 2736
    },
    {
      "epoch": 1.669921875e-05,
      "step": 2736,
      "training_step_time": 0.4221510887145996
    },
    {
      "epoch": 1.6705322265625e-05,
      "model_forward_time": 0.11459684371948242,
      "step": 2737
    },
    {
      "epoch": 1.6705322265625e-05,
      "step": 2737,
      "training_step_time": 0.387099027633667
    },
    {
      "epoch": 1.671142578125e-05,
      "model_forward_time": 0.11423325538635254,
      "step": 2738
    },
    {
      "epoch": 1.671142578125e-05,
      "step": 2738,
      "training_step_time": 0.43514132499694824
    },
    {
      "epoch": 1.6717529296875e-05,
      "model_forward_time": 0.11478972434997559,
      "step": 2739
    },
    {
      "epoch": 1.6717529296875e-05,
      "step": 2739,
      "training_step_time": 0.42417216300964355
    },
    {
      "epoch": 1.67236328125e-05,
      "grad_norm": 0.5571705102920532,
      "learning_rate": 9.133333333333334e-05,
      "loss": 0.1288,
      "step": 2740
    },
    {
      "epoch": 1.67236328125e-05,
      "model_forward_time": 0.11490845680236816,
      "step": 2740
    },
    {
      "epoch": 1.67236328125e-05,
      "step": 2740,
      "training_step_time": 0.634868860244751
    },
    {
      "epoch": 1.6729736328125e-05,
      "model_forward_time": 0.11526775360107422,
      "step": 2741
    },
    {
      "epoch": 1.6729736328125e-05,
      "step": 2741,
      "training_step_time": 0.38608527183532715
    },
    {
      "epoch": 1.673583984375e-05,
      "model_forward_time": 0.11429286003112793,
      "step": 2742
    },
    {
      "epoch": 1.673583984375e-05,
      "step": 2742,
      "training_step_time": 0.38703012466430664
    },
    {
      "epoch": 1.6741943359375e-05,
      "model_forward_time": 0.11484956741333008,
      "step": 2743
    },
    {
      "epoch": 1.6741943359375e-05,
      "step": 2743,
      "training_step_time": 0.39365220069885254
    },
    {
      "epoch": 1.6748046875e-05,
      "model_forward_time": 0.11494970321655273,
      "step": 2744
    },
    {
      "epoch": 1.6748046875e-05,
      "step": 2744,
      "training_step_time": 0.3958616256713867
    },
    {
      "epoch": 1.6754150390625e-05,
      "model_forward_time": 0.11522650718688965,
      "step": 2745
    },
    {
      "epoch": 1.6754150390625e-05,
      "step": 2745,
      "training_step_time": 0.3924543857574463
    },
    {
      "epoch": 1.676025390625e-05,
      "model_forward_time": 0.11451458930969238,
      "step": 2746
    },
    {
      "epoch": 1.676025390625e-05,
      "step": 2746,
      "training_step_time": 0.8215148448944092
    },
    {
      "epoch": 1.6766357421875e-05,
      "model_forward_time": 0.11518144607543945,
      "step": 2747
    },
    {
      "epoch": 1.6766357421875e-05,
      "step": 2747,
      "training_step_time": 0.38610386848449707
    },
    {
      "epoch": 1.67724609375e-05,
      "model_forward_time": 0.11639881134033203,
      "step": 2748
    },
    {
      "epoch": 1.67724609375e-05,
      "step": 2748,
      "training_step_time": 0.40937352180480957
    },
    {
      "epoch": 1.6778564453125e-05,
      "model_forward_time": 0.11526727676391602,
      "step": 2749
    },
    {
      "epoch": 1.6778564453125e-05,
      "step": 2749,
      "training_step_time": 0.44680333137512207
    },
    {
      "epoch": 1.678466796875e-05,
      "grad_norm": 0.4228571057319641,
      "learning_rate": 9.166666666666667e-05,
      "loss": 0.1187,
      "step": 2750
    },
    {
      "epoch": 1.678466796875e-05,
      "model_forward_time": 0.11450934410095215,
      "step": 2750
    },
    {
      "epoch": 1.678466796875e-05,
      "step": 2750,
      "training_step_time": 0.3635098934173584
    },
    {
      "epoch": 1.6790771484375e-05,
      "model_forward_time": 0.1143641471862793,
      "step": 2751
    },
    {
      "epoch": 1.6790771484375e-05,
      "step": 2751,
      "training_step_time": 0.43129825592041016
    },
    {
      "epoch": 1.6796875e-05,
      "model_forward_time": 0.11533164978027344,
      "step": 2752
    },
    {
      "epoch": 1.6796875e-05,
      "step": 2752,
      "training_step_time": 0.5358107089996338
    },
    {
      "epoch": 1.6802978515625e-05,
      "model_forward_time": 0.1146695613861084,
      "step": 2753
    },
    {
      "epoch": 1.6802978515625e-05,
      "step": 2753,
      "training_step_time": 0.4123222827911377
    },
    {
      "epoch": 1.680908203125e-05,
      "model_forward_time": 0.11507010459899902,
      "step": 2754
    },
    {
      "epoch": 1.680908203125e-05,
      "step": 2754,
      "training_step_time": 0.4029874801635742
    },
    {
      "epoch": 1.6815185546875e-05,
      "model_forward_time": 0.11459660530090332,
      "step": 2755
    },
    {
      "epoch": 1.6815185546875e-05,
      "step": 2755,
      "training_step_time": 0.39618539810180664
    },
    {
      "epoch": 1.68212890625e-05,
      "model_forward_time": 0.11504149436950684,
      "step": 2756
    },
    {
      "epoch": 1.68212890625e-05,
      "step": 2756,
      "training_step_time": 0.39522862434387207
    },
    {
      "epoch": 1.6827392578125e-05,
      "model_forward_time": 0.1148383617401123,
      "step": 2757
    },
    {
      "epoch": 1.6827392578125e-05,
      "step": 2757,
      "training_step_time": 0.3901035785675049
    },
    {
      "epoch": 1.683349609375e-05,
      "model_forward_time": 0.11525297164916992,
      "step": 2758
    },
    {
      "epoch": 1.683349609375e-05,
      "step": 2758,
      "training_step_time": 1.0389938354492188
    },
    {
      "epoch": 1.6839599609375e-05,
      "model_forward_time": 0.11469578742980957,
      "step": 2759
    },
    {
      "epoch": 1.6839599609375e-05,
      "step": 2759,
      "training_step_time": 0.4776763916015625
    },
    {
      "epoch": 1.6845703125e-05,
      "grad_norm": 0.41523122787475586,
      "learning_rate": 9.200000000000001e-05,
      "loss": 0.1249,
      "step": 2760
    },
    {
      "epoch": 1.6845703125e-05,
      "model_forward_time": 0.11410665512084961,
      "step": 2760
    },
    {
      "epoch": 1.6845703125e-05,
      "step": 2760,
      "training_step_time": 0.3860745429992676
    },
    {
      "epoch": 1.6851806640625e-05,
      "model_forward_time": 0.1141207218170166,
      "step": 2761
    },
    {
      "epoch": 1.6851806640625e-05,
      "step": 2761,
      "training_step_time": 0.3890223503112793
    },
    {
      "epoch": 1.685791015625e-05,
      "model_forward_time": 0.11378788948059082,
      "step": 2762
    },
    {
      "epoch": 1.685791015625e-05,
      "step": 2762,
      "training_step_time": 0.4135899543762207
    },
    {
      "epoch": 1.6864013671875e-05,
      "model_forward_time": 0.11444807052612305,
      "step": 2763
    },
    {
      "epoch": 1.6864013671875e-05,
      "step": 2763,
      "training_step_time": 0.40334224700927734
    },
    {
      "epoch": 1.68701171875e-05,
      "model_forward_time": 0.1154937744140625,
      "step": 2764
    },
    {
      "epoch": 1.68701171875e-05,
      "step": 2764,
      "training_step_time": 0.6110455989837646
    },
    {
      "epoch": 1.6876220703125e-05,
      "model_forward_time": 0.11454319953918457,
      "step": 2765
    },
    {
      "epoch": 1.6876220703125e-05,
      "step": 2765,
      "training_step_time": 0.41657114028930664
    },
    {
      "epoch": 1.688232421875e-05,
      "model_forward_time": 0.11448860168457031,
      "step": 2766
    },
    {
      "epoch": 1.688232421875e-05,
      "step": 2766,
      "training_step_time": 0.39000844955444336
    },
    {
      "epoch": 1.6888427734375e-05,
      "model_forward_time": 0.1148080825805664,
      "step": 2767
    },
    {
      "epoch": 1.6888427734375e-05,
      "step": 2767,
      "training_step_time": 0.3889915943145752
    },
    {
      "epoch": 1.689453125e-05,
      "model_forward_time": 0.11518120765686035,
      "step": 2768
    },
    {
      "epoch": 1.689453125e-05,
      "step": 2768,
      "training_step_time": 0.3892683982849121
    },
    {
      "epoch": 1.6900634765625e-05,
      "model_forward_time": 0.11520123481750488,
      "step": 2769
    },
    {
      "epoch": 1.6900634765625e-05,
      "step": 2769,
      "training_step_time": 0.39660191535949707
    },
    {
      "epoch": 1.690673828125e-05,
      "grad_norm": 0.505501925945282,
      "learning_rate": 9.233333333333333e-05,
      "loss": 0.1219,
      "step": 2770
    },
    {
      "epoch": 1.690673828125e-05,
      "model_forward_time": 0.11490011215209961,
      "step": 2770
    },
    {
      "epoch": 1.690673828125e-05,
      "step": 2770,
      "training_step_time": 1.1691391468048096
    },
    {
      "epoch": 1.6912841796875e-05,
      "model_forward_time": 0.11409902572631836,
      "step": 2771
    },
    {
      "epoch": 1.6912841796875e-05,
      "step": 2771,
      "training_step_time": 0.41805315017700195
    },
    {
      "epoch": 1.69189453125e-05,
      "model_forward_time": 0.11404061317443848,
      "step": 2772
    },
    {
      "epoch": 1.69189453125e-05,
      "step": 2772,
      "training_step_time": 0.3939988613128662
    },
    {
      "epoch": 1.6925048828125e-05,
      "model_forward_time": 0.11399030685424805,
      "step": 2773
    },
    {
      "epoch": 1.6925048828125e-05,
      "step": 2773,
      "training_step_time": 0.38407349586486816
    },
    {
      "epoch": 1.693115234375e-05,
      "model_forward_time": 0.11396455764770508,
      "step": 2774
    },
    {
      "epoch": 1.693115234375e-05,
      "step": 2774,
      "training_step_time": 0.4033544063568115
    },
    {
      "epoch": 1.6937255859375e-05,
      "model_forward_time": 0.11445403099060059,
      "step": 2775
    },
    {
      "epoch": 1.6937255859375e-05,
      "step": 2775,
      "training_step_time": 0.44818639755249023
    },
    {
      "epoch": 1.6943359375e-05,
      "model_forward_time": 0.11461639404296875,
      "step": 2776
    },
    {
      "epoch": 1.6943359375e-05,
      "step": 2776,
      "training_step_time": 0.6115663051605225
    },
    {
      "epoch": 1.6949462890625e-05,
      "model_forward_time": 0.11471748352050781,
      "step": 2777
    },
    {
      "epoch": 1.6949462890625e-05,
      "step": 2777,
      "training_step_time": 0.41943979263305664
    },
    {
      "epoch": 1.695556640625e-05,
      "model_forward_time": 0.11475729942321777,
      "step": 2778
    },
    {
      "epoch": 1.695556640625e-05,
      "step": 2778,
      "training_step_time": 0.46090173721313477
    },
    {
      "epoch": 1.6961669921875e-05,
      "model_forward_time": 0.11495375633239746,
      "step": 2779
    },
    {
      "epoch": 1.6961669921875e-05,
      "step": 2779,
      "training_step_time": 0.4124116897583008
    },
    {
      "epoch": 1.69677734375e-05,
      "grad_norm": 0.44913250207901,
      "learning_rate": 9.266666666666666e-05,
      "loss": 0.1224,
      "step": 2780
    },
    {
      "epoch": 1.69677734375e-05,
      "model_forward_time": 0.11495113372802734,
      "step": 2780
    },
    {
      "epoch": 1.69677734375e-05,
      "step": 2780,
      "training_step_time": 0.4020853042602539
    },
    {
      "epoch": 1.6973876953125e-05,
      "model_forward_time": 0.11524343490600586,
      "step": 2781
    },
    {
      "epoch": 1.6973876953125e-05,
      "step": 2781,
      "training_step_time": 0.38643503189086914
    },
    {
      "epoch": 1.697998046875e-05,
      "model_forward_time": 0.11530685424804688,
      "step": 2782
    },
    {
      "epoch": 1.697998046875e-05,
      "step": 2782,
      "training_step_time": 0.81610107421875
    },
    {
      "epoch": 1.6986083984375e-05,
      "model_forward_time": 0.11430191993713379,
      "step": 2783
    },
    {
      "epoch": 1.6986083984375e-05,
      "step": 2783,
      "training_step_time": 0.4268481731414795
    },
    {
      "epoch": 1.69921875e-05,
      "model_forward_time": 0.11447834968566895,
      "step": 2784
    },
    {
      "epoch": 1.69921875e-05,
      "step": 2784,
      "training_step_time": 0.42768001556396484
    },
    {
      "epoch": 1.6998291015625e-05,
      "model_forward_time": 0.11433219909667969,
      "step": 2785
    },
    {
      "epoch": 1.6998291015625e-05,
      "step": 2785,
      "training_step_time": 0.40469884872436523
    },
    {
      "epoch": 1.700439453125e-05,
      "model_forward_time": 0.11422491073608398,
      "step": 2786
    },
    {
      "epoch": 1.700439453125e-05,
      "step": 2786,
      "training_step_time": 0.38396215438842773
    },
    {
      "epoch": 1.7010498046875e-05,
      "model_forward_time": 0.11477065086364746,
      "step": 2787
    },
    {
      "epoch": 1.7010498046875e-05,
      "step": 2787,
      "training_step_time": 0.38236546516418457
    },
    {
      "epoch": 1.70166015625e-05,
      "model_forward_time": 0.11567354202270508,
      "step": 2788
    },
    {
      "epoch": 1.70166015625e-05,
      "step": 2788,
      "training_step_time": 0.6949186325073242
    },
    {
      "epoch": 1.7022705078125e-05,
      "model_forward_time": 0.11439085006713867,
      "step": 2789
    },
    {
      "epoch": 1.7022705078125e-05,
      "step": 2789,
      "training_step_time": 0.5395410060882568
    },
    {
      "epoch": 1.702880859375e-05,
      "grad_norm": 0.5357421636581421,
      "learning_rate": 9.300000000000001e-05,
      "loss": 0.1185,
      "step": 2790
    },
    {
      "epoch": 1.702880859375e-05,
      "model_forward_time": 0.11416316032409668,
      "step": 2790
    },
    {
      "epoch": 1.702880859375e-05,
      "step": 2790,
      "training_step_time": 0.4411349296569824
    },
    {
      "epoch": 1.7034912109375e-05,
      "model_forward_time": 0.11439061164855957,
      "step": 2791
    },
    {
      "epoch": 1.7034912109375e-05,
      "step": 2791,
      "training_step_time": 0.4720454216003418
    },
    {
      "epoch": 1.7041015625e-05,
      "model_forward_time": 0.11476731300354004,
      "step": 2792
    },
    {
      "epoch": 1.7041015625e-05,
      "step": 2792,
      "training_step_time": 0.4623875617980957
    },
    {
      "epoch": 1.7047119140625e-05,
      "model_forward_time": 0.1143646240234375,
      "step": 2793
    },
    {
      "epoch": 1.7047119140625e-05,
      "step": 2793,
      "training_step_time": 0.39144039154052734
    },
    {
      "epoch": 1.705322265625e-05,
      "model_forward_time": 0.11472296714782715,
      "step": 2794
    },
    {
      "epoch": 1.705322265625e-05,
      "step": 2794,
      "training_step_time": 0.45089101791381836
    },
    {
      "epoch": 1.7059326171875e-05,
      "model_forward_time": 0.11509323120117188,
      "step": 2795
    },
    {
      "epoch": 1.7059326171875e-05,
      "step": 2795,
      "training_step_time": 0.3870856761932373
    },
    {
      "epoch": 1.70654296875e-05,
      "model_forward_time": 0.11588788032531738,
      "step": 2796
    },
    {
      "epoch": 1.70654296875e-05,
      "step": 2796,
      "training_step_time": 0.39177989959716797
    },
    {
      "epoch": 1.7071533203125e-05,
      "model_forward_time": 0.11489725112915039,
      "step": 2797
    },
    {
      "epoch": 1.7071533203125e-05,
      "step": 2797,
      "training_step_time": 0.45595812797546387
    },
    {
      "epoch": 1.707763671875e-05,
      "model_forward_time": 0.11554765701293945,
      "step": 2798
    },
    {
      "epoch": 1.707763671875e-05,
      "step": 2798,
      "training_step_time": 0.4241790771484375
    },
    {
      "epoch": 1.7083740234375e-05,
      "model_forward_time": 0.1151421070098877,
      "step": 2799
    },
    {
      "epoch": 1.7083740234375e-05,
      "step": 2799,
      "training_step_time": 0.44629979133605957
    },
    {
      "epoch": 1.708984375e-05,
      "grad_norm": 0.3678658902645111,
      "learning_rate": 9.333333333333334e-05,
      "loss": 0.1244,
      "step": 2800
    },
    {
      "epoch": 1.708984375e-05,
      "model_forward_time": 0.11550068855285645,
      "step": 2800
    },
    {
      "epoch": 1.708984375e-05,
      "step": 2800,
      "training_step_time": 0.8051025867462158
    },
    {
      "epoch": 1.7095947265625e-05,
      "model_forward_time": 0.11559486389160156,
      "step": 2801
    },
    {
      "epoch": 1.7095947265625e-05,
      "step": 2801,
      "training_step_time": 0.5954604148864746
    },
    {
      "epoch": 1.710205078125e-05,
      "model_forward_time": 0.12118172645568848,
      "step": 2802
    },
    {
      "epoch": 1.710205078125e-05,
      "step": 2802,
      "training_step_time": 0.7179338932037354
    },
    {
      "epoch": 1.7108154296875e-05,
      "model_forward_time": 0.12289166450500488,
      "step": 2803
    },
    {
      "epoch": 1.7108154296875e-05,
      "step": 2803,
      "training_step_time": 0.6040511131286621
    },
    {
      "epoch": 1.71142578125e-05,
      "model_forward_time": 0.11639165878295898,
      "step": 2804
    },
    {
      "epoch": 1.71142578125e-05,
      "step": 2804,
      "training_step_time": 0.7638218402862549
    },
    {
      "epoch": 1.7120361328125e-05,
      "model_forward_time": 0.11679768562316895,
      "step": 2805
    },
    {
      "epoch": 1.7120361328125e-05,
      "step": 2805,
      "training_step_time": 0.6688432693481445
    },
    {
      "epoch": 1.712646484375e-05,
      "model_forward_time": 0.12501096725463867,
      "step": 2806
    },
    {
      "epoch": 1.712646484375e-05,
      "step": 2806,
      "training_step_time": 0.663444995880127
    },
    {
      "epoch": 1.7132568359375e-05,
      "model_forward_time": 0.11913657188415527,
      "step": 2807
    },
    {
      "epoch": 1.7132568359375e-05,
      "step": 2807,
      "training_step_time": 0.6579642295837402
    },
    {
      "epoch": 1.7138671875e-05,
      "model_forward_time": 0.1205134391784668,
      "step": 2808
    },
    {
      "epoch": 1.7138671875e-05,
      "step": 2808,
      "training_step_time": 0.6911187171936035
    },
    {
      "epoch": 1.7144775390625e-05,
      "model_forward_time": 0.11821675300598145,
      "step": 2809
    },
    {
      "epoch": 1.7144775390625e-05,
      "step": 2809,
      "training_step_time": 0.6582918167114258
    },
    {
      "epoch": 1.715087890625e-05,
      "grad_norm": 0.3431757092475891,
      "learning_rate": 9.366666666666668e-05,
      "loss": 0.1245,
      "step": 2810
    },
    {
      "epoch": 1.715087890625e-05,
      "model_forward_time": 0.13089513778686523,
      "step": 2810
    },
    {
      "epoch": 1.715087890625e-05,
      "step": 2810,
      "training_step_time": 0.7189614772796631
    },
    {
      "epoch": 1.7156982421875e-05,
      "model_forward_time": 0.12270545959472656,
      "step": 2811
    },
    {
      "epoch": 1.7156982421875e-05,
      "step": 2811,
      "training_step_time": 0.6731827259063721
    },
    {
      "epoch": 1.71630859375e-05,
      "model_forward_time": 0.11721491813659668,
      "step": 2812
    },
    {
      "epoch": 1.71630859375e-05,
      "step": 2812,
      "training_step_time": 0.6278407573699951
    },
    {
      "epoch": 1.7169189453125e-05,
      "model_forward_time": 0.1361980438232422,
      "step": 2813
    },
    {
      "epoch": 1.7169189453125e-05,
      "step": 2813,
      "training_step_time": 0.6509943008422852
    },
    {
      "epoch": 1.717529296875e-05,
      "model_forward_time": 0.12105989456176758,
      "step": 2814
    },
    {
      "epoch": 1.717529296875e-05,
      "step": 2814,
      "training_step_time": 0.6769986152648926
    },
    {
      "epoch": 1.7181396484375e-05,
      "model_forward_time": 0.11919093132019043,
      "step": 2815
    },
    {
      "epoch": 1.7181396484375e-05,
      "step": 2815,
      "training_step_time": 0.6831300258636475
    },
    {
      "epoch": 1.71875e-05,
      "model_forward_time": 0.12156105041503906,
      "step": 2816
    },
    {
      "epoch": 1.71875e-05,
      "step": 2816,
      "training_step_time": 0.6698267459869385
    },
    {
      "epoch": 1.7193603515625e-05,
      "model_forward_time": 0.11926889419555664,
      "step": 2817
    },
    {
      "epoch": 1.7193603515625e-05,
      "step": 2817,
      "training_step_time": 0.7428948879241943
    },
    {
      "epoch": 1.719970703125e-05,
      "model_forward_time": 0.11772394180297852,
      "step": 2818
    },
    {
      "epoch": 1.719970703125e-05,
      "step": 2818,
      "training_step_time": 0.725867509841919
    },
    {
      "epoch": 1.7205810546875e-05,
      "model_forward_time": 0.119171142578125,
      "step": 2819
    },
    {
      "epoch": 1.7205810546875e-05,
      "step": 2819,
      "training_step_time": 0.6299479007720947
    },
    {
      "epoch": 1.72119140625e-05,
      "grad_norm": 0.37067529559135437,
      "learning_rate": 9.4e-05,
      "loss": 0.1215,
      "step": 2820
    },
    {
      "epoch": 1.72119140625e-05,
      "model_forward_time": 0.11979794502258301,
      "step": 2820
    },
    {
      "epoch": 1.72119140625e-05,
      "step": 2820,
      "training_step_time": 0.7119941711425781
    },
    {
      "epoch": 1.7218017578125e-05,
      "model_forward_time": 0.11850857734680176,
      "step": 2821
    },
    {
      "epoch": 1.7218017578125e-05,
      "step": 2821,
      "training_step_time": 0.8034045696258545
    },
    {
      "epoch": 1.722412109375e-05,
      "model_forward_time": 0.11857867240905762,
      "step": 2822
    },
    {
      "epoch": 1.722412109375e-05,
      "step": 2822,
      "training_step_time": 0.7054595947265625
    },
    {
      "epoch": 1.7230224609375e-05,
      "model_forward_time": 0.11846351623535156,
      "step": 2823
    },
    {
      "epoch": 1.7230224609375e-05,
      "step": 2823,
      "training_step_time": 0.6499767303466797
    },
    {
      "epoch": 1.7236328125e-05,
      "model_forward_time": 0.11655282974243164,
      "step": 2824
    },
    {
      "epoch": 1.7236328125e-05,
      "step": 2824,
      "training_step_time": 0.6272873878479004
    },
    {
      "epoch": 1.7242431640625e-05,
      "model_forward_time": 0.11923813819885254,
      "step": 2825
    },
    {
      "epoch": 1.7242431640625e-05,
      "step": 2825,
      "training_step_time": 0.6986947059631348
    },
    {
      "epoch": 1.724853515625e-05,
      "model_forward_time": 0.1206202507019043,
      "step": 2826
    },
    {
      "epoch": 1.724853515625e-05,
      "step": 2826,
      "training_step_time": 0.7017765045166016
    },
    {
      "epoch": 1.7254638671875e-05,
      "model_forward_time": 0.11985397338867188,
      "step": 2827
    },
    {
      "epoch": 1.7254638671875e-05,
      "step": 2827,
      "training_step_time": 0.7083420753479004
    },
    {
      "epoch": 1.72607421875e-05,
      "model_forward_time": 0.12133288383483887,
      "step": 2828
    },
    {
      "epoch": 1.72607421875e-05,
      "step": 2828,
      "training_step_time": 0.6665081977844238
    },
    {
      "epoch": 1.7266845703125e-05,
      "model_forward_time": 0.12168002128601074,
      "step": 2829
    },
    {
      "epoch": 1.7266845703125e-05,
      "step": 2829,
      "training_step_time": 0.6333777904510498
    },
    {
      "epoch": 1.727294921875e-05,
      "grad_norm": 0.3687140941619873,
      "learning_rate": 9.433333333333334e-05,
      "loss": 0.1294,
      "step": 2830
    },
    {
      "epoch": 1.727294921875e-05,
      "model_forward_time": 0.12170052528381348,
      "step": 2830
    },
    {
      "epoch": 1.727294921875e-05,
      "step": 2830,
      "training_step_time": 0.7576100826263428
    },
    {
      "epoch": 1.7279052734375e-05,
      "model_forward_time": 0.1328287124633789,
      "step": 2831
    },
    {
      "epoch": 1.7279052734375e-05,
      "step": 2831,
      "training_step_time": 0.6386523246765137
    },
    {
      "epoch": 1.728515625e-05,
      "model_forward_time": 0.12073659896850586,
      "step": 2832
    },
    {
      "epoch": 1.728515625e-05,
      "step": 2832,
      "training_step_time": 0.7333533763885498
    },
    {
      "epoch": 1.7291259765625e-05,
      "model_forward_time": 0.11836838722229004,
      "step": 2833
    },
    {
      "epoch": 1.7291259765625e-05,
      "step": 2833,
      "training_step_time": 0.7115457057952881
    },
    {
      "epoch": 1.729736328125e-05,
      "model_forward_time": 0.12045431137084961,
      "step": 2834
    },
    {
      "epoch": 1.729736328125e-05,
      "step": 2834,
      "training_step_time": 0.6889045238494873
    },
    {
      "epoch": 1.7303466796875e-05,
      "model_forward_time": 0.1237952709197998,
      "step": 2835
    },
    {
      "epoch": 1.7303466796875e-05,
      "step": 2835,
      "training_step_time": 0.6849784851074219
    },
    {
      "epoch": 1.73095703125e-05,
      "model_forward_time": 0.11973357200622559,
      "step": 2836
    },
    {
      "epoch": 1.73095703125e-05,
      "step": 2836,
      "training_step_time": 0.7381584644317627
    },
    {
      "epoch": 1.7315673828125e-05,
      "model_forward_time": 0.11792135238647461,
      "step": 2837
    },
    {
      "epoch": 1.7315673828125e-05,
      "step": 2837,
      "training_step_time": 0.720069169998169
    },
    {
      "epoch": 1.732177734375e-05,
      "model_forward_time": 0.1192924976348877,
      "step": 2838
    },
    {
      "epoch": 1.732177734375e-05,
      "step": 2838,
      "training_step_time": 0.7559809684753418
    },
    {
      "epoch": 1.7327880859375e-05,
      "model_forward_time": 0.11858201026916504,
      "step": 2839
    },
    {
      "epoch": 1.7327880859375e-05,
      "step": 2839,
      "training_step_time": 0.697706937789917
    },
    {
      "epoch": 1.7333984375e-05,
      "grad_norm": 0.37371280789375305,
      "learning_rate": 9.466666666666667e-05,
      "loss": 0.1254,
      "step": 2840
    },
    {
      "epoch": 1.7333984375e-05,
      "model_forward_time": 0.11942791938781738,
      "step": 2840
    },
    {
      "epoch": 1.7333984375e-05,
      "step": 2840,
      "training_step_time": 0.5859827995300293
    },
    {
      "epoch": 1.7340087890625e-05,
      "model_forward_time": 0.12287425994873047,
      "step": 2841
    },
    {
      "epoch": 1.7340087890625e-05,
      "step": 2841,
      "training_step_time": 0.7506735324859619
    },
    {
      "epoch": 1.734619140625e-05,
      "model_forward_time": 0.12746763229370117,
      "step": 2842
    },
    {
      "epoch": 1.734619140625e-05,
      "step": 2842,
      "training_step_time": 0.6543600559234619
    },
    {
      "epoch": 1.7352294921875e-05,
      "model_forward_time": 0.12101197242736816,
      "step": 2843
    },
    {
      "epoch": 1.7352294921875e-05,
      "step": 2843,
      "training_step_time": 0.6618483066558838
    },
    {
      "epoch": 1.73583984375e-05,
      "model_forward_time": 0.12492632865905762,
      "step": 2844
    },
    {
      "epoch": 1.73583984375e-05,
      "step": 2844,
      "training_step_time": 0.6851532459259033
    },
    {
      "epoch": 1.7364501953125e-05,
      "model_forward_time": 0.11712980270385742,
      "step": 2845
    },
    {
      "epoch": 1.7364501953125e-05,
      "step": 2845,
      "training_step_time": 0.675626277923584
    },
    {
      "epoch": 1.737060546875e-05,
      "model_forward_time": 0.12027621269226074,
      "step": 2846
    },
    {
      "epoch": 1.737060546875e-05,
      "step": 2846,
      "training_step_time": 0.7202212810516357
    },
    {
      "epoch": 1.7376708984375e-05,
      "model_forward_time": 0.11952733993530273,
      "step": 2847
    },
    {
      "epoch": 1.7376708984375e-05,
      "step": 2847,
      "training_step_time": 0.6491947174072266
    },
    {
      "epoch": 1.73828125e-05,
      "model_forward_time": 0.12018084526062012,
      "step": 2848
    },
    {
      "epoch": 1.73828125e-05,
      "step": 2848,
      "training_step_time": 0.7154581546783447
    },
    {
      "epoch": 1.7388916015625e-05,
      "model_forward_time": 0.12609028816223145,
      "step": 2849
    },
    {
      "epoch": 1.7388916015625e-05,
      "step": 2849,
      "training_step_time": 0.781912088394165
    },
    {
      "epoch": 1.739501953125e-05,
      "grad_norm": 0.5943578481674194,
      "learning_rate": 9.5e-05,
      "loss": 0.1291,
      "step": 2850
    },
    {
      "epoch": 1.739501953125e-05,
      "model_forward_time": 0.12208390235900879,
      "step": 2850
    },
    {
      "epoch": 1.739501953125e-05,
      "step": 2850,
      "training_step_time": 0.6919076442718506
    },
    {
      "epoch": 1.7401123046875e-05,
      "model_forward_time": 0.12463188171386719,
      "step": 2851
    },
    {
      "epoch": 1.7401123046875e-05,
      "step": 2851,
      "training_step_time": 0.7232887744903564
    },
    {
      "epoch": 1.74072265625e-05,
      "model_forward_time": 0.12410092353820801,
      "step": 2852
    },
    {
      "epoch": 1.74072265625e-05,
      "step": 2852,
      "training_step_time": 0.6722548007965088
    },
    {
      "epoch": 1.7413330078125e-05,
      "model_forward_time": 0.11933636665344238,
      "step": 2853
    },
    {
      "epoch": 1.7413330078125e-05,
      "step": 2853,
      "training_step_time": 0.6865859031677246
    },
    {
      "epoch": 1.741943359375e-05,
      "model_forward_time": 0.11753416061401367,
      "step": 2854
    },
    {
      "epoch": 1.741943359375e-05,
      "step": 2854,
      "training_step_time": 0.721712589263916
    },
    {
      "epoch": 1.7425537109375e-05,
      "model_forward_time": 0.11701726913452148,
      "step": 2855
    },
    {
      "epoch": 1.7425537109375e-05,
      "step": 2855,
      "training_step_time": 0.7058818340301514
    },
    {
      "epoch": 1.7431640625e-05,
      "model_forward_time": 0.11895895004272461,
      "step": 2856
    },
    {
      "epoch": 1.7431640625e-05,
      "step": 2856,
      "training_step_time": 0.6380226612091064
    },
    {
      "epoch": 1.7437744140625e-05,
      "model_forward_time": 0.11849308013916016,
      "step": 2857
    },
    {
      "epoch": 1.7437744140625e-05,
      "step": 2857,
      "training_step_time": 0.741337776184082
    },
    {
      "epoch": 1.744384765625e-05,
      "model_forward_time": 0.1180727481842041,
      "step": 2858
    },
    {
      "epoch": 1.744384765625e-05,
      "step": 2858,
      "training_step_time": 0.6835470199584961
    },
    {
      "epoch": 1.7449951171875e-05,
      "model_forward_time": 0.12487244606018066,
      "step": 2859
    },
    {
      "epoch": 1.7449951171875e-05,
      "step": 2859,
      "training_step_time": 0.6589596271514893
    },
    {
      "epoch": 1.74560546875e-05,
      "grad_norm": 0.5648719072341919,
      "learning_rate": 9.533333333333334e-05,
      "loss": 0.1239,
      "step": 2860
    },
    {
      "epoch": 1.74560546875e-05,
      "model_forward_time": 0.12215232849121094,
      "step": 2860
    },
    {
      "epoch": 1.74560546875e-05,
      "step": 2860,
      "training_step_time": 0.7260441780090332
    },
    {
      "epoch": 1.7462158203125e-05,
      "model_forward_time": 0.12096309661865234,
      "step": 2861
    },
    {
      "epoch": 1.7462158203125e-05,
      "step": 2861,
      "training_step_time": 0.6200153827667236
    },
    {
      "epoch": 1.746826171875e-05,
      "model_forward_time": 0.12290287017822266,
      "step": 2862
    },
    {
      "epoch": 1.746826171875e-05,
      "step": 2862,
      "training_step_time": 0.6175808906555176
    },
    {
      "epoch": 1.7474365234375e-05,
      "model_forward_time": 0.11615467071533203,
      "step": 2863
    },
    {
      "epoch": 1.7474365234375e-05,
      "step": 2863,
      "training_step_time": 0.7231404781341553
    },
    {
      "epoch": 1.748046875e-05,
      "model_forward_time": 0.11732244491577148,
      "step": 2864
    },
    {
      "epoch": 1.748046875e-05,
      "step": 2864,
      "training_step_time": 0.6123616695404053
    },
    {
      "epoch": 1.7486572265625e-05,
      "model_forward_time": 0.12225627899169922,
      "step": 2865
    },
    {
      "epoch": 1.7486572265625e-05,
      "step": 2865,
      "training_step_time": 0.5964419841766357
    },
    {
      "epoch": 1.749267578125e-05,
      "model_forward_time": 0.1208333969116211,
      "step": 2866
    },
    {
      "epoch": 1.749267578125e-05,
      "step": 2866,
      "training_step_time": 0.5704185962677002
    },
    {
      "epoch": 1.7498779296875e-05,
      "model_forward_time": 0.13067626953125,
      "step": 2867
    },
    {
      "epoch": 1.7498779296875e-05,
      "step": 2867,
      "training_step_time": 0.6105523109436035
    },
    {
      "epoch": 1.75048828125e-05,
      "model_forward_time": 0.12644410133361816,
      "step": 2868
    },
    {
      "epoch": 1.75048828125e-05,
      "step": 2868,
      "training_step_time": 0.5940251350402832
    },
    {
      "epoch": 1.7510986328125e-05,
      "model_forward_time": 0.12017440795898438,
      "step": 2869
    },
    {
      "epoch": 1.7510986328125e-05,
      "step": 2869,
      "training_step_time": 0.48426365852355957
    },
    {
      "epoch": 1.751708984375e-05,
      "grad_norm": 0.5149357914924622,
      "learning_rate": 9.566666666666667e-05,
      "loss": 0.1244,
      "step": 2870
    },
    {
      "epoch": 1.751708984375e-05,
      "model_forward_time": 0.12834954261779785,
      "step": 2870
    },
    {
      "epoch": 1.751708984375e-05,
      "step": 2870,
      "training_step_time": 0.5700678825378418
    },
    {
      "epoch": 1.7523193359375e-05,
      "model_forward_time": 0.11807680130004883,
      "step": 2871
    },
    {
      "epoch": 1.7523193359375e-05,
      "step": 2871,
      "training_step_time": 0.5165095329284668
    },
    {
      "epoch": 1.7529296875e-05,
      "model_forward_time": 0.11997175216674805,
      "step": 2872
    },
    {
      "epoch": 1.7529296875e-05,
      "step": 2872,
      "training_step_time": 0.5069892406463623
    },
    {
      "epoch": 1.7535400390625e-05,
      "model_forward_time": 0.11657977104187012,
      "step": 2873
    },
    {
      "epoch": 1.7535400390625e-05,
      "step": 2873,
      "training_step_time": 0.4037957191467285
    },
    {
      "epoch": 1.754150390625e-05,
      "model_forward_time": 0.11750483512878418,
      "step": 2874
    },
    {
      "epoch": 1.754150390625e-05,
      "step": 2874,
      "training_step_time": 0.4006028175354004
    },
    {
      "epoch": 1.7547607421875e-05,
      "model_forward_time": 0.11722302436828613,
      "step": 2875
    },
    {
      "epoch": 1.7547607421875e-05,
      "step": 2875,
      "training_step_time": 0.4497981071472168
    },
    {
      "epoch": 1.75537109375e-05,
      "model_forward_time": 0.11714792251586914,
      "step": 2876
    },
    {
      "epoch": 1.75537109375e-05,
      "step": 2876,
      "training_step_time": 0.41001367568969727
    },
    {
      "epoch": 1.7559814453125e-05,
      "model_forward_time": 0.11522936820983887,
      "step": 2877
    },
    {
      "epoch": 1.7559814453125e-05,
      "step": 2877,
      "training_step_time": 0.3947875499725342
    },
    {
      "epoch": 1.756591796875e-05,
      "model_forward_time": 0.1157984733581543,
      "step": 2878
    },
    {
      "epoch": 1.756591796875e-05,
      "step": 2878,
      "training_step_time": 0.3800075054168701
    },
    {
      "epoch": 1.7572021484375e-05,
      "model_forward_time": 0.11510062217712402,
      "step": 2879
    },
    {
      "epoch": 1.7572021484375e-05,
      "step": 2879,
      "training_step_time": 0.3956584930419922
    },
    {
      "epoch": 1.7578125e-05,
      "grad_norm": 0.3896704316139221,
      "learning_rate": 9.6e-05,
      "loss": 0.1247,
      "step": 2880
    },
    {
      "epoch": 1.7578125e-05,
      "model_forward_time": 0.11553835868835449,
      "step": 2880
    },
    {
      "epoch": 1.7578125e-05,
      "step": 2880,
      "training_step_time": 0.46152186393737793
    },
    {
      "epoch": 1.7584228515625e-05,
      "model_forward_time": 0.11577415466308594,
      "step": 2881
    },
    {
      "epoch": 1.7584228515625e-05,
      "step": 2881,
      "training_step_time": 0.4738450050354004
    },
    {
      "epoch": 1.759033203125e-05,
      "model_forward_time": 0.11511826515197754,
      "step": 2882
    },
    {
      "epoch": 1.759033203125e-05,
      "step": 2882,
      "training_step_time": 0.40428662300109863
    },
    {
      "epoch": 1.7596435546875e-05,
      "model_forward_time": 0.11562061309814453,
      "step": 2883
    },
    {
      "epoch": 1.7596435546875e-05,
      "step": 2883,
      "training_step_time": 0.3941936492919922
    },
    {
      "epoch": 1.76025390625e-05,
      "model_forward_time": 0.11564898490905762,
      "step": 2884
    },
    {
      "epoch": 1.76025390625e-05,
      "step": 2884,
      "training_step_time": 0.38396692276000977
    },
    {
      "epoch": 1.7608642578125e-05,
      "model_forward_time": 0.11580324172973633,
      "step": 2885
    },
    {
      "epoch": 1.7608642578125e-05,
      "step": 2885,
      "training_step_time": 0.48996615409851074
    },
    {
      "epoch": 1.761474609375e-05,
      "model_forward_time": 0.11514973640441895,
      "step": 2886
    },
    {
      "epoch": 1.761474609375e-05,
      "step": 2886,
      "training_step_time": 0.5203235149383545
    },
    {
      "epoch": 1.7620849609375e-05,
      "model_forward_time": 0.11660027503967285,
      "step": 2887
    },
    {
      "epoch": 1.7620849609375e-05,
      "step": 2887,
      "training_step_time": 0.3984487056732178
    },
    {
      "epoch": 1.7626953125e-05,
      "model_forward_time": 0.11517786979675293,
      "step": 2888
    },
    {
      "epoch": 1.7626953125e-05,
      "step": 2888,
      "training_step_time": 0.39024949073791504
    },
    {
      "epoch": 1.7633056640625e-05,
      "model_forward_time": 0.11551022529602051,
      "step": 2889
    },
    {
      "epoch": 1.7633056640625e-05,
      "step": 2889,
      "training_step_time": 0.3926243782043457
    },
    {
      "epoch": 1.763916015625e-05,
      "grad_norm": 0.36102667450904846,
      "learning_rate": 9.633333333333335e-05,
      "loss": 0.1276,
      "step": 2890
    },
    {
      "epoch": 1.763916015625e-05,
      "model_forward_time": 0.11479449272155762,
      "step": 2890
    },
    {
      "epoch": 1.763916015625e-05,
      "step": 2890,
      "training_step_time": 0.43492889404296875
    },
    {
      "epoch": 1.7645263671875e-05,
      "model_forward_time": 0.1154634952545166,
      "step": 2891
    },
    {
      "epoch": 1.7645263671875e-05,
      "step": 2891,
      "training_step_time": 0.48375749588012695
    },
    {
      "epoch": 1.76513671875e-05,
      "model_forward_time": 0.13284063339233398,
      "step": 2892
    },
    {
      "epoch": 1.76513671875e-05,
      "step": 2892,
      "training_step_time": 0.4601776599884033
    },
    {
      "epoch": 1.7657470703125e-05,
      "model_forward_time": 0.11563897132873535,
      "step": 2893
    },
    {
      "epoch": 1.7657470703125e-05,
      "step": 2893,
      "training_step_time": 0.38488054275512695
    },
    {
      "epoch": 1.766357421875e-05,
      "model_forward_time": 0.11539840698242188,
      "step": 2894
    },
    {
      "epoch": 1.766357421875e-05,
      "step": 2894,
      "training_step_time": 0.4098379611968994
    },
    {
      "epoch": 1.7669677734375e-05,
      "model_forward_time": 0.11523079872131348,
      "step": 2895
    },
    {
      "epoch": 1.7669677734375e-05,
      "step": 2895,
      "training_step_time": 0.43010830879211426
    },
    {
      "epoch": 1.767578125e-05,
      "model_forward_time": 0.11494302749633789,
      "step": 2896
    },
    {
      "epoch": 1.767578125e-05,
      "step": 2896,
      "training_step_time": 0.49537110328674316
    },
    {
      "epoch": 1.7681884765625e-05,
      "model_forward_time": 0.11459231376647949,
      "step": 2897
    },
    {
      "epoch": 1.7681884765625e-05,
      "step": 2897,
      "training_step_time": 0.38468146324157715
    },
    {
      "epoch": 1.768798828125e-05,
      "model_forward_time": 0.11523056030273438,
      "step": 2898
    },
    {
      "epoch": 1.768798828125e-05,
      "step": 2898,
      "training_step_time": 0.3678145408630371
    },
    {
      "epoch": 1.7694091796875e-05,
      "model_forward_time": 0.1151266098022461,
      "step": 2899
    },
    {
      "epoch": 1.7694091796875e-05,
      "step": 2899,
      "training_step_time": 0.4494972229003906
    },
    {
      "epoch": 1.77001953125e-05,
      "grad_norm": 0.43634799122810364,
      "learning_rate": 9.666666666666667e-05,
      "loss": 0.1263,
      "step": 2900
    },
    {
      "epoch": 1.77001953125e-05,
      "model_forward_time": 0.11555790901184082,
      "step": 2900
    },
    {
      "epoch": 1.77001953125e-05,
      "step": 2900,
      "training_step_time": 0.4300873279571533
    },
    {
      "epoch": 1.7706298828125e-05,
      "model_forward_time": 0.11588454246520996,
      "step": 2901
    },
    {
      "epoch": 1.7706298828125e-05,
      "step": 2901,
      "training_step_time": 0.3819394111633301
    },
    {
      "epoch": 1.771240234375e-05,
      "model_forward_time": 0.11526250839233398,
      "step": 2902
    },
    {
      "epoch": 1.771240234375e-05,
      "step": 2902,
      "training_step_time": 0.39165759086608887
    },
    {
      "epoch": 1.7718505859375e-05,
      "model_forward_time": 0.11526083946228027,
      "step": 2903
    },
    {
      "epoch": 1.7718505859375e-05,
      "step": 2903,
      "training_step_time": 0.3969001770019531
    },
    {
      "epoch": 1.7724609375e-05,
      "model_forward_time": 0.11590194702148438,
      "step": 2904
    },
    {
      "epoch": 1.7724609375e-05,
      "step": 2904,
      "training_step_time": 0.3929297924041748
    },
    {
      "epoch": 1.7730712890625e-05,
      "model_forward_time": 0.11570239067077637,
      "step": 2905
    },
    {
      "epoch": 1.7730712890625e-05,
      "step": 2905,
      "training_step_time": 0.42603206634521484
    },
    {
      "epoch": 1.773681640625e-05,
      "model_forward_time": 0.11524033546447754,
      "step": 2906
    },
    {
      "epoch": 1.773681640625e-05,
      "step": 2906,
      "training_step_time": 0.4354860782623291
    },
    {
      "epoch": 1.7742919921875e-05,
      "model_forward_time": 0.11580038070678711,
      "step": 2907
    },
    {
      "epoch": 1.7742919921875e-05,
      "step": 2907,
      "training_step_time": 0.39792299270629883
    },
    {
      "epoch": 1.77490234375e-05,
      "model_forward_time": 0.11495089530944824,
      "step": 2908
    },
    {
      "epoch": 1.77490234375e-05,
      "step": 2908,
      "training_step_time": 0.4584167003631592
    },
    {
      "epoch": 1.7755126953125e-05,
      "model_forward_time": 0.11499595642089844,
      "step": 2909
    },
    {
      "epoch": 1.7755126953125e-05,
      "step": 2909,
      "training_step_time": 0.4101135730743408
    },
    {
      "epoch": 1.776123046875e-05,
      "grad_norm": 0.4332195222377777,
      "learning_rate": 9.7e-05,
      "loss": 0.1418,
      "step": 2910
    },
    {
      "epoch": 1.776123046875e-05,
      "model_forward_time": 0.11466646194458008,
      "step": 2910
    },
    {
      "epoch": 1.776123046875e-05,
      "step": 2910,
      "training_step_time": 0.4807915687561035
    },
    {
      "epoch": 1.7767333984375e-05,
      "model_forward_time": 0.11549496650695801,
      "step": 2911
    },
    {
      "epoch": 1.7767333984375e-05,
      "step": 2911,
      "training_step_time": 0.39401841163635254
    },
    {
      "epoch": 1.77734375e-05,
      "model_forward_time": 0.11489057540893555,
      "step": 2912
    },
    {
      "epoch": 1.77734375e-05,
      "step": 2912,
      "training_step_time": 0.39494991302490234
    },
    {
      "epoch": 1.7779541015625e-05,
      "model_forward_time": 0.11523771286010742,
      "step": 2913
    },
    {
      "epoch": 1.7779541015625e-05,
      "step": 2913,
      "training_step_time": 0.3698768615722656
    },
    {
      "epoch": 1.778564453125e-05,
      "model_forward_time": 0.11584877967834473,
      "step": 2914
    },
    {
      "epoch": 1.778564453125e-05,
      "step": 2914,
      "training_step_time": 1.108396053314209
    },
    {
      "epoch": 1.7791748046875e-05,
      "model_forward_time": 0.1137552261352539,
      "step": 2915
    },
    {
      "epoch": 1.7791748046875e-05,
      "step": 2915,
      "training_step_time": 0.39594459533691406
    },
    {
      "epoch": 1.77978515625e-05,
      "model_forward_time": 0.11478424072265625,
      "step": 2916
    },
    {
      "epoch": 1.77978515625e-05,
      "step": 2916,
      "training_step_time": 0.38779377937316895
    },
    {
      "epoch": 1.7803955078125e-05,
      "model_forward_time": 0.11387896537780762,
      "step": 2917
    },
    {
      "epoch": 1.7803955078125e-05,
      "step": 2917,
      "training_step_time": 0.38433384895324707
    },
    {
      "epoch": 1.781005859375e-05,
      "model_forward_time": 0.11396479606628418,
      "step": 2918
    },
    {
      "epoch": 1.781005859375e-05,
      "step": 2918,
      "training_step_time": 0.4379763603210449
    },
    {
      "epoch": 1.7816162109375e-05,
      "model_forward_time": 0.1141355037689209,
      "step": 2919
    },
    {
      "epoch": 1.7816162109375e-05,
      "step": 2919,
      "training_step_time": 0.44392824172973633
    },
    {
      "epoch": 1.7822265625e-05,
      "grad_norm": 0.27103403210639954,
      "learning_rate": 9.733333333333335e-05,
      "loss": 0.1281,
      "step": 2920
    },
    {
      "epoch": 1.7822265625e-05,
      "model_forward_time": 0.1150665283203125,
      "step": 2920
    },
    {
      "epoch": 1.7822265625e-05,
      "step": 2920,
      "training_step_time": 0.6208665370941162
    },
    {
      "epoch": 1.7828369140625e-05,
      "model_forward_time": 0.11368513107299805,
      "step": 2921
    },
    {
      "epoch": 1.7828369140625e-05,
      "step": 2921,
      "training_step_time": 0.38112735748291016
    },
    {
      "epoch": 1.783447265625e-05,
      "model_forward_time": 0.11461353302001953,
      "step": 2922
    },
    {
      "epoch": 1.783447265625e-05,
      "step": 2922,
      "training_step_time": 0.40949344635009766
    },
    {
      "epoch": 1.7840576171875e-05,
      "model_forward_time": 0.11447334289550781,
      "step": 2923
    },
    {
      "epoch": 1.7840576171875e-05,
      "step": 2923,
      "training_step_time": 0.40079569816589355
    },
    {
      "epoch": 1.78466796875e-05,
      "model_forward_time": 0.1145792007446289,
      "step": 2924
    },
    {
      "epoch": 1.78466796875e-05,
      "step": 2924,
      "training_step_time": 0.502565860748291
    },
    {
      "epoch": 1.7852783203125e-05,
      "model_forward_time": 0.11530709266662598,
      "step": 2925
    },
    {
      "epoch": 1.7852783203125e-05,
      "step": 2925,
      "training_step_time": 0.4096951484680176
    },
    {
      "epoch": 1.785888671875e-05,
      "model_forward_time": 0.1166234016418457,
      "step": 2926
    },
    {
      "epoch": 1.785888671875e-05,
      "step": 2926,
      "training_step_time": 0.9055006504058838
    },
    {
      "epoch": 1.7864990234375e-05,
      "model_forward_time": 0.11509203910827637,
      "step": 2927
    },
    {
      "epoch": 1.7864990234375e-05,
      "step": 2927,
      "training_step_time": 0.4728832244873047
    },
    {
      "epoch": 1.787109375e-05,
      "model_forward_time": 0.11468911170959473,
      "step": 2928
    },
    {
      "epoch": 1.787109375e-05,
      "step": 2928,
      "training_step_time": 0.3745293617248535
    },
    {
      "epoch": 1.7877197265625e-05,
      "model_forward_time": 0.1144554615020752,
      "step": 2929
    },
    {
      "epoch": 1.7877197265625e-05,
      "step": 2929,
      "training_step_time": 0.37933874130249023
    },
    {
      "epoch": 1.788330078125e-05,
      "grad_norm": 0.4580288529396057,
      "learning_rate": 9.766666666666668e-05,
      "loss": 0.1219,
      "step": 2930
    },
    {
      "epoch": 1.788330078125e-05,
      "model_forward_time": 0.11435079574584961,
      "step": 2930
    },
    {
      "epoch": 1.788330078125e-05,
      "step": 2930,
      "training_step_time": 0.3877997398376465
    },
    {
      "epoch": 1.7889404296875e-05,
      "model_forward_time": 0.1145620346069336,
      "step": 2931
    },
    {
      "epoch": 1.7889404296875e-05,
      "step": 2931,
      "training_step_time": 0.38887834548950195
    },
    {
      "epoch": 1.78955078125e-05,
      "model_forward_time": 0.11518144607543945,
      "step": 2932
    },
    {
      "epoch": 1.78955078125e-05,
      "step": 2932,
      "training_step_time": 0.4941718578338623
    },
    {
      "epoch": 1.7901611328125e-05,
      "model_forward_time": 0.11608624458312988,
      "step": 2933
    },
    {
      "epoch": 1.7901611328125e-05,
      "step": 2933,
      "training_step_time": 0.49414944648742676
    },
    {
      "epoch": 1.790771484375e-05,
      "model_forward_time": 0.11468315124511719,
      "step": 2934
    },
    {
      "epoch": 1.790771484375e-05,
      "step": 2934,
      "training_step_time": 0.39772820472717285
    },
    {
      "epoch": 1.7913818359375e-05,
      "model_forward_time": 0.1163015365600586,
      "step": 2935
    },
    {
      "epoch": 1.7913818359375e-05,
      "step": 2935,
      "training_step_time": 0.3902320861816406
    },
    {
      "epoch": 1.7919921875e-05,
      "model_forward_time": 0.11568856239318848,
      "step": 2936
    },
    {
      "epoch": 1.7919921875e-05,
      "step": 2936,
      "training_step_time": 0.4031550884246826
    },
    {
      "epoch": 1.7926025390625e-05,
      "model_forward_time": 0.1153104305267334,
      "step": 2937
    },
    {
      "epoch": 1.7926025390625e-05,
      "step": 2937,
      "training_step_time": 0.40803003311157227
    },
    {
      "epoch": 1.793212890625e-05,
      "model_forward_time": 0.11683320999145508,
      "step": 2938
    },
    {
      "epoch": 1.793212890625e-05,
      "step": 2938,
      "training_step_time": 0.6754209995269775
    },
    {
      "epoch": 1.7938232421875e-05,
      "model_forward_time": 0.11544966697692871,
      "step": 2939
    },
    {
      "epoch": 1.7938232421875e-05,
      "step": 2939,
      "training_step_time": 0.36838293075561523
    },
    {
      "epoch": 1.79443359375e-05,
      "grad_norm": 0.401104211807251,
      "learning_rate": 9.8e-05,
      "loss": 0.1231,
      "step": 2940
    },
    {
      "epoch": 1.79443359375e-05,
      "model_forward_time": 0.11531186103820801,
      "step": 2940
    },
    {
      "epoch": 1.79443359375e-05,
      "step": 2940,
      "training_step_time": 0.4777233600616455
    },
    {
      "epoch": 1.7950439453125e-05,
      "model_forward_time": 0.1150360107421875,
      "step": 2941
    },
    {
      "epoch": 1.7950439453125e-05,
      "step": 2941,
      "training_step_time": 0.48810601234436035
    },
    {
      "epoch": 1.795654296875e-05,
      "model_forward_time": 0.11434364318847656,
      "step": 2942
    },
    {
      "epoch": 1.795654296875e-05,
      "step": 2942,
      "training_step_time": 0.39689183235168457
    },
    {
      "epoch": 1.7962646484375e-05,
      "model_forward_time": 0.11537432670593262,
      "step": 2943
    },
    {
      "epoch": 1.7962646484375e-05,
      "step": 2943,
      "training_step_time": 0.392611026763916
    },
    {
      "epoch": 1.796875e-05,
      "model_forward_time": 0.1151282787322998,
      "step": 2944
    },
    {
      "epoch": 1.796875e-05,
      "step": 2944,
      "training_step_time": 0.38828134536743164
    },
    {
      "epoch": 1.7974853515625e-05,
      "model_forward_time": 0.11539745330810547,
      "step": 2945
    },
    {
      "epoch": 1.7974853515625e-05,
      "step": 2945,
      "training_step_time": 0.42139697074890137
    },
    {
      "epoch": 1.798095703125e-05,
      "model_forward_time": 0.1153256893157959,
      "step": 2946
    },
    {
      "epoch": 1.798095703125e-05,
      "step": 2946,
      "training_step_time": 0.41509222984313965
    },
    {
      "epoch": 1.7987060546875e-05,
      "model_forward_time": 0.11510348320007324,
      "step": 2947
    },
    {
      "epoch": 1.7987060546875e-05,
      "step": 2947,
      "training_step_time": 0.39447474479675293
    },
    {
      "epoch": 1.79931640625e-05,
      "model_forward_time": 0.11568427085876465,
      "step": 2948
    },
    {
      "epoch": 1.79931640625e-05,
      "step": 2948,
      "training_step_time": 0.3891744613647461
    },
    {
      "epoch": 1.7999267578125e-05,
      "model_forward_time": 0.11524724960327148,
      "step": 2949
    },
    {
      "epoch": 1.7999267578125e-05,
      "step": 2949,
      "training_step_time": 0.39330053329467773
    },
    {
      "epoch": 1.800537109375e-05,
      "grad_norm": 0.3879845142364502,
      "learning_rate": 9.833333333333333e-05,
      "loss": 0.1211,
      "step": 2950
    },
    {
      "epoch": 1.800537109375e-05,
      "model_forward_time": 0.11577391624450684,
      "step": 2950
    },
    {
      "epoch": 1.800537109375e-05,
      "step": 2950,
      "training_step_time": 0.4879930019378662
    },
    {
      "epoch": 1.8011474609375e-05,
      "model_forward_time": 0.11493921279907227,
      "step": 2951
    },
    {
      "epoch": 1.8011474609375e-05,
      "step": 2951,
      "training_step_time": 0.42406630516052246
    },
    {
      "epoch": 1.8017578125e-05,
      "model_forward_time": 0.11501049995422363,
      "step": 2952
    },
    {
      "epoch": 1.8017578125e-05,
      "step": 2952,
      "training_step_time": 0.4881112575531006
    },
    {
      "epoch": 1.8023681640625e-05,
      "model_forward_time": 0.11551022529602051,
      "step": 2953
    },
    {
      "epoch": 1.8023681640625e-05,
      "step": 2953,
      "training_step_time": 0.415616512298584
    },
    {
      "epoch": 1.802978515625e-05,
      "model_forward_time": 0.11684679985046387,
      "step": 2954
    },
    {
      "epoch": 1.802978515625e-05,
      "step": 2954,
      "training_step_time": 0.4882171154022217
    },
    {
      "epoch": 1.8035888671875e-05,
      "model_forward_time": 0.11499476432800293,
      "step": 2955
    },
    {
      "epoch": 1.8035888671875e-05,
      "step": 2955,
      "training_step_time": 0.4839341640472412
    },
    {
      "epoch": 1.80419921875e-05,
      "model_forward_time": 0.11522793769836426,
      "step": 2956
    },
    {
      "epoch": 1.80419921875e-05,
      "step": 2956,
      "training_step_time": 0.4987785816192627
    },
    {
      "epoch": 1.8048095703125e-05,
      "model_forward_time": 0.11490821838378906,
      "step": 2957
    },
    {
      "epoch": 1.8048095703125e-05,
      "step": 2957,
      "training_step_time": 0.3967475891113281
    },
    {
      "epoch": 1.805419921875e-05,
      "model_forward_time": 0.1152336597442627,
      "step": 2958
    },
    {
      "epoch": 1.805419921875e-05,
      "step": 2958,
      "training_step_time": 0.3912489414215088
    },
    {
      "epoch": 1.8060302734375e-05,
      "model_forward_time": 0.11516475677490234,
      "step": 2959
    },
    {
      "epoch": 1.8060302734375e-05,
      "step": 2959,
      "training_step_time": 0.4664785861968994
    },
    {
      "epoch": 1.806640625e-05,
      "grad_norm": 0.2797454595565796,
      "learning_rate": 9.866666666666668e-05,
      "loss": 0.132,
      "step": 2960
    },
    {
      "epoch": 1.806640625e-05,
      "model_forward_time": 0.11481714248657227,
      "step": 2960
    },
    {
      "epoch": 1.806640625e-05,
      "step": 2960,
      "training_step_time": 0.41297316551208496
    },
    {
      "epoch": 1.8072509765625e-05,
      "model_forward_time": 0.11543154716491699,
      "step": 2961
    },
    {
      "epoch": 1.8072509765625e-05,
      "step": 2961,
      "training_step_time": 0.44640135765075684
    },
    {
      "epoch": 1.807861328125e-05,
      "model_forward_time": 0.11626815795898438,
      "step": 2962
    },
    {
      "epoch": 1.807861328125e-05,
      "step": 2962,
      "training_step_time": 0.3980240821838379
    },
    {
      "epoch": 1.8084716796875e-05,
      "model_forward_time": 0.11501693725585938,
      "step": 2963
    },
    {
      "epoch": 1.8084716796875e-05,
      "step": 2963,
      "training_step_time": 0.45688676834106445
    },
    {
      "epoch": 1.80908203125e-05,
      "model_forward_time": 0.11551976203918457,
      "step": 2964
    },
    {
      "epoch": 1.80908203125e-05,
      "step": 2964,
      "training_step_time": 0.4884810447692871
    },
    {
      "epoch": 1.8096923828125e-05,
      "model_forward_time": 0.1151878833770752,
      "step": 2965
    },
    {
      "epoch": 1.8096923828125e-05,
      "step": 2965,
      "training_step_time": 0.42319297790527344
    },
    {
      "epoch": 1.810302734375e-05,
      "model_forward_time": 0.1150045394897461,
      "step": 2966
    },
    {
      "epoch": 1.810302734375e-05,
      "step": 2966,
      "training_step_time": 0.46479344367980957
    },
    {
      "epoch": 1.8109130859375e-05,
      "model_forward_time": 0.1154019832611084,
      "step": 2967
    },
    {
      "epoch": 1.8109130859375e-05,
      "step": 2967,
      "training_step_time": 0.39692091941833496
    },
    {
      "epoch": 1.8115234375e-05,
      "model_forward_time": 0.11562490463256836,
      "step": 2968
    },
    {
      "epoch": 1.8115234375e-05,
      "step": 2968,
      "training_step_time": 0.4785490036010742
    },
    {
      "epoch": 1.8121337890625e-05,
      "model_forward_time": 0.11533379554748535,
      "step": 2969
    },
    {
      "epoch": 1.8121337890625e-05,
      "step": 2969,
      "training_step_time": 0.48093461990356445
    },
    {
      "epoch": 1.812744140625e-05,
      "grad_norm": 0.33814626932144165,
      "learning_rate": 9.900000000000001e-05,
      "loss": 0.1265,
      "step": 2970
    },
    {
      "epoch": 1.812744140625e-05,
      "model_forward_time": 0.11554908752441406,
      "step": 2970
    },
    {
      "epoch": 1.812744140625e-05,
      "step": 2970,
      "training_step_time": 0.49480581283569336
    },
    {
      "epoch": 1.8133544921875e-05,
      "model_forward_time": 0.11467480659484863,
      "step": 2971
    },
    {
      "epoch": 1.8133544921875e-05,
      "step": 2971,
      "training_step_time": 0.395693302154541
    },
    {
      "epoch": 1.81396484375e-05,
      "model_forward_time": 0.11503863334655762,
      "step": 2972
    },
    {
      "epoch": 1.81396484375e-05,
      "step": 2972,
      "training_step_time": 0.37335824966430664
    },
    {
      "epoch": 1.8145751953125e-05,
      "model_forward_time": 0.1154029369354248,
      "step": 2973
    },
    {
      "epoch": 1.8145751953125e-05,
      "step": 2973,
      "training_step_time": 0.39245080947875977
    },
    {
      "epoch": 1.815185546875e-05,
      "model_forward_time": 0.1147911548614502,
      "step": 2974
    },
    {
      "epoch": 1.815185546875e-05,
      "step": 2974,
      "training_step_time": 0.47659754753112793
    },
    {
      "epoch": 1.8157958984375e-05,
      "model_forward_time": 0.11485576629638672,
      "step": 2975
    },
    {
      "epoch": 1.8157958984375e-05,
      "step": 2975,
      "training_step_time": 0.453965425491333
    },
    {
      "epoch": 1.81640625e-05,
      "model_forward_time": 0.11570096015930176,
      "step": 2976
    },
    {
      "epoch": 1.81640625e-05,
      "step": 2976,
      "training_step_time": 0.3971881866455078
    },
    {
      "epoch": 1.8170166015625e-05,
      "model_forward_time": 0.11491727828979492,
      "step": 2977
    },
    {
      "epoch": 1.8170166015625e-05,
      "step": 2977,
      "training_step_time": 0.41183042526245117
    },
    {
      "epoch": 1.817626953125e-05,
      "model_forward_time": 0.11543536186218262,
      "step": 2978
    },
    {
      "epoch": 1.817626953125e-05,
      "step": 2978,
      "training_step_time": 0.4096834659576416
    },
    {
      "epoch": 1.8182373046875e-05,
      "model_forward_time": 0.11564493179321289,
      "step": 2979
    },
    {
      "epoch": 1.8182373046875e-05,
      "step": 2979,
      "training_step_time": 0.4371654987335205
    },
    {
      "epoch": 1.81884765625e-05,
      "grad_norm": 0.35019293427467346,
      "learning_rate": 9.933333333333334e-05,
      "loss": 0.1248,
      "step": 2980
    },
    {
      "epoch": 1.81884765625e-05,
      "model_forward_time": 0.11569094657897949,
      "step": 2980
    },
    {
      "epoch": 1.81884765625e-05,
      "step": 2980,
      "training_step_time": 0.49103474617004395
    },
    {
      "epoch": 1.8194580078125e-05,
      "model_forward_time": 0.11522936820983887,
      "step": 2981
    },
    {
      "epoch": 1.8194580078125e-05,
      "step": 2981,
      "training_step_time": 0.3948826789855957
    },
    {
      "epoch": 1.820068359375e-05,
      "model_forward_time": 0.1157388687133789,
      "step": 2982
    },
    {
      "epoch": 1.820068359375e-05,
      "step": 2982,
      "training_step_time": 0.4105689525604248
    },
    {
      "epoch": 1.8206787109375e-05,
      "model_forward_time": 0.11658239364624023,
      "step": 2983
    },
    {
      "epoch": 1.8206787109375e-05,
      "step": 2983,
      "training_step_time": 0.4185371398925781
    },
    {
      "epoch": 1.8212890625e-05,
      "model_forward_time": 0.11511015892028809,
      "step": 2984
    },
    {
      "epoch": 1.8212890625e-05,
      "step": 2984,
      "training_step_time": 0.4958970546722412
    },
    {
      "epoch": 1.8218994140625e-05,
      "model_forward_time": 0.11548280715942383,
      "step": 2985
    },
    {
      "epoch": 1.8218994140625e-05,
      "step": 2985,
      "training_step_time": 0.3970918655395508
    },
    {
      "epoch": 1.822509765625e-05,
      "model_forward_time": 0.11551928520202637,
      "step": 2986
    },
    {
      "epoch": 1.822509765625e-05,
      "step": 2986,
      "training_step_time": 0.4043843746185303
    },
    {
      "epoch": 1.8231201171875e-05,
      "model_forward_time": 0.11499190330505371,
      "step": 2987
    },
    {
      "epoch": 1.8231201171875e-05,
      "step": 2987,
      "training_step_time": 0.39992785453796387
    },
    {
      "epoch": 1.82373046875e-05,
      "model_forward_time": 0.11535239219665527,
      "step": 2988
    },
    {
      "epoch": 1.82373046875e-05,
      "step": 2988,
      "training_step_time": 0.4273567199707031
    },
    {
      "epoch": 1.8243408203125e-05,
      "model_forward_time": 0.11527228355407715,
      "step": 2989
    },
    {
      "epoch": 1.8243408203125e-05,
      "step": 2989,
      "training_step_time": 0.429424524307251
    },
    {
      "epoch": 1.824951171875e-05,
      "grad_norm": 0.30974888801574707,
      "learning_rate": 9.966666666666667e-05,
      "loss": 0.1136,
      "step": 2990
    },
    {
      "epoch": 1.824951171875e-05,
      "model_forward_time": 0.11495518684387207,
      "step": 2990
    },
    {
      "epoch": 1.824951171875e-05,
      "step": 2990,
      "training_step_time": 0.49561119079589844
    },
    {
      "epoch": 1.8255615234375e-05,
      "model_forward_time": 0.11479306221008301,
      "step": 2991
    },
    {
      "epoch": 1.8255615234375e-05,
      "step": 2991,
      "training_step_time": 0.410383939743042
    },
    {
      "epoch": 1.826171875e-05,
      "model_forward_time": 0.1148538589477539,
      "step": 2992
    },
    {
      "epoch": 1.826171875e-05,
      "step": 2992,
      "training_step_time": 0.38289308547973633
    },
    {
      "epoch": 1.8267822265625e-05,
      "model_forward_time": 0.1147603988647461,
      "step": 2993
    },
    {
      "epoch": 1.8267822265625e-05,
      "step": 2993,
      "training_step_time": 0.42527341842651367
    },
    {
      "epoch": 1.827392578125e-05,
      "model_forward_time": 0.1157541275024414,
      "step": 2994
    },
    {
      "epoch": 1.827392578125e-05,
      "step": 2994,
      "training_step_time": 0.42635083198547363
    },
    {
      "epoch": 1.8280029296875e-05,
      "model_forward_time": 0.1150667667388916,
      "step": 2995
    },
    {
      "epoch": 1.8280029296875e-05,
      "step": 2995,
      "training_step_time": 0.5126509666442871
    },
    {
      "epoch": 1.82861328125e-05,
      "model_forward_time": 0.11515474319458008,
      "step": 2996
    },
    {
      "epoch": 1.82861328125e-05,
      "step": 2996,
      "training_step_time": 0.3676116466522217
    },
    {
      "epoch": 1.8292236328125e-05,
      "model_forward_time": 0.11518073081970215,
      "step": 2997
    },
    {
      "epoch": 1.8292236328125e-05,
      "step": 2997,
      "training_step_time": 0.39452600479125977
    },
    {
      "epoch": 1.829833984375e-05,
      "model_forward_time": 0.11508321762084961,
      "step": 2998
    },
    {
      "epoch": 1.829833984375e-05,
      "step": 2998,
      "training_step_time": 0.420398473739624
    },
    {
      "epoch": 1.8304443359375e-05,
      "model_forward_time": 0.11510920524597168,
      "step": 2999
    },
    {
      "epoch": 1.8304443359375e-05,
      "step": 2999,
      "training_step_time": 0.4055778980255127
    },
    {
      "epoch": 1.8310546875e-05,
      "grad_norm": 0.43700122833251953,
      "learning_rate": 0.0001,
      "loss": 0.1266,
      "step": 3000
    },
    {
      "epoch": 1.8310546875e-05,
      "model_forward_time": 0.1191556453704834,
      "step": 3000
    },
    {
      "epoch": 1.8310546875e-05,
      "step": 3000,
      "training_step_time": 0.3704206943511963
    },
    {
      "epoch": 1.8316650390625e-05,
      "model_forward_time": 0.11536145210266113,
      "step": 3001
    },
    {
      "epoch": 1.8316650390625e-05,
      "step": 3001,
      "training_step_time": 0.40317702293395996
    },
    {
      "epoch": 1.832275390625e-05,
      "model_forward_time": 0.12579107284545898,
      "step": 3002
    },
    {
      "epoch": 1.832275390625e-05,
      "step": 3002,
      "training_step_time": 0.39415884017944336
    },
    {
      "epoch": 1.8328857421875e-05,
      "model_forward_time": 0.11670637130737305,
      "step": 3003
    },
    {
      "epoch": 1.8328857421875e-05,
      "step": 3003,
      "training_step_time": 0.3769950866699219
    },
    {
      "epoch": 1.83349609375e-05,
      "model_forward_time": 0.11591410636901855,
      "step": 3004
    },
    {
      "epoch": 1.83349609375e-05,
      "step": 3004,
      "training_step_time": 0.47484397888183594
    },
    {
      "epoch": 1.8341064453125e-05,
      "model_forward_time": 0.11568522453308105,
      "step": 3005
    },
    {
      "epoch": 1.8341064453125e-05,
      "step": 3005,
      "training_step_time": 0.42533445358276367
    },
    {
      "epoch": 1.834716796875e-05,
      "model_forward_time": 0.11582040786743164,
      "step": 3006
    },
    {
      "epoch": 1.834716796875e-05,
      "step": 3006,
      "training_step_time": 0.4509005546569824
    },
    {
      "epoch": 1.8353271484375e-05,
      "model_forward_time": 0.11733508110046387,
      "step": 3007
    },
    {
      "epoch": 1.8353271484375e-05,
      "step": 3007,
      "training_step_time": 0.39339637756347656
    },
    {
      "epoch": 1.8359375e-05,
      "model_forward_time": 0.11695003509521484,
      "step": 3008
    },
    {
      "epoch": 1.8359375e-05,
      "step": 3008,
      "training_step_time": 0.3916592597961426
    },
    {
      "epoch": 1.8365478515625e-05,
      "model_forward_time": 0.11729240417480469,
      "step": 3009
    },
    {
      "epoch": 1.8365478515625e-05,
      "step": 3009,
      "training_step_time": 0.4066610336303711
    },
    {
      "epoch": 1.837158203125e-05,
      "grad_norm": 0.3381696939468384,
      "learning_rate": 9.99999924056601e-05,
      "loss": 0.1201,
      "step": 3010
    },
    {
      "epoch": 1.837158203125e-05,
      "model_forward_time": 0.11711835861206055,
      "step": 3010
    },
    {
      "epoch": 1.837158203125e-05,
      "step": 3010,
      "training_step_time": 0.40357017517089844
    },
    {
      "epoch": 1.8377685546875e-05,
      "model_forward_time": 0.11730718612670898,
      "step": 3011
    },
    {
      "epoch": 1.8377685546875e-05,
      "step": 3011,
      "training_step_time": 0.4791693687438965
    },
    {
      "epoch": 1.83837890625e-05,
      "model_forward_time": 0.11722779273986816,
      "step": 3012
    },
    {
      "epoch": 1.83837890625e-05,
      "step": 3012,
      "training_step_time": 0.3948404788970947
    },
    {
      "epoch": 1.8389892578125e-05,
      "model_forward_time": 0.11803579330444336,
      "step": 3013
    },
    {
      "epoch": 1.8389892578125e-05,
      "step": 3013,
      "training_step_time": 0.39963531494140625
    },
    {
      "epoch": 1.839599609375e-05,
      "model_forward_time": 0.11849617958068848,
      "step": 3014
    },
    {
      "epoch": 1.839599609375e-05,
      "step": 3014,
      "training_step_time": 0.4847903251647949
    },
    {
      "epoch": 1.8402099609375e-05,
      "model_forward_time": 0.11734580993652344,
      "step": 3015
    },
    {
      "epoch": 1.8402099609375e-05,
      "step": 3015,
      "training_step_time": 0.4738430976867676
    },
    {
      "epoch": 1.8408203125e-05,
      "model_forward_time": 0.11657524108886719,
      "step": 3016
    },
    {
      "epoch": 1.8408203125e-05,
      "step": 3016,
      "training_step_time": 0.510657787322998
    },
    {
      "epoch": 1.8414306640625e-05,
      "model_forward_time": 0.11667466163635254,
      "step": 3017
    },
    {
      "epoch": 1.8414306640625e-05,
      "step": 3017,
      "training_step_time": 0.4009580612182617
    },
    {
      "epoch": 1.842041015625e-05,
      "model_forward_time": 0.11559176445007324,
      "step": 3018
    },
    {
      "epoch": 1.842041015625e-05,
      "step": 3018,
      "training_step_time": 0.43802475929260254
    },
    {
      "epoch": 1.8426513671875e-05,
      "model_forward_time": 0.1153709888458252,
      "step": 3019
    },
    {
      "epoch": 1.8426513671875e-05,
      "step": 3019,
      "training_step_time": 0.4098961353302002
    },
    {
      "epoch": 1.84326171875e-05,
      "grad_norm": 0.4005891680717468,
      "learning_rate": 9.999996962264266e-05,
      "loss": 0.1274,
      "step": 3020
    },
    {
      "epoch": 1.84326171875e-05,
      "model_forward_time": 0.11539506912231445,
      "step": 3020
    },
    {
      "epoch": 1.84326171875e-05,
      "step": 3020,
      "training_step_time": 0.4253857135772705
    },
    {
      "epoch": 1.8438720703125e-05,
      "model_forward_time": 0.11657285690307617,
      "step": 3021
    },
    {
      "epoch": 1.8438720703125e-05,
      "step": 3021,
      "training_step_time": 0.3945908546447754
    },
    {
      "epoch": 1.844482421875e-05,
      "model_forward_time": 0.1159052848815918,
      "step": 3022
    },
    {
      "epoch": 1.844482421875e-05,
      "step": 3022,
      "training_step_time": 0.3904151916503906
    },
    {
      "epoch": 1.8450927734375e-05,
      "model_forward_time": 0.11589479446411133,
      "step": 3023
    },
    {
      "epoch": 1.8450927734375e-05,
      "step": 3023,
      "training_step_time": 0.39198780059814453
    },
    {
      "epoch": 1.845703125e-05,
      "model_forward_time": 0.11551809310913086,
      "step": 3024
    },
    {
      "epoch": 1.845703125e-05,
      "step": 3024,
      "training_step_time": 0.44356274604797363
    },
    {
      "epoch": 1.8463134765625e-05,
      "model_forward_time": 0.11649084091186523,
      "step": 3025
    },
    {
      "epoch": 1.8463134765625e-05,
      "step": 3025,
      "training_step_time": 0.45984864234924316
    },
    {
      "epoch": 1.846923828125e-05,
      "model_forward_time": 0.11532258987426758,
      "step": 3026
    },
    {
      "epoch": 1.846923828125e-05,
      "step": 3026,
      "training_step_time": 0.4442133903503418
    },
    {
      "epoch": 1.8475341796875e-05,
      "model_forward_time": 0.11537981033325195,
      "step": 3027
    },
    {
      "epoch": 1.8475341796875e-05,
      "step": 3027,
      "training_step_time": 0.3960247039794922
    },
    {
      "epoch": 1.84814453125e-05,
      "model_forward_time": 0.11592364311218262,
      "step": 3028
    },
    {
      "epoch": 1.84814453125e-05,
      "step": 3028,
      "training_step_time": 0.37268614768981934
    },
    {
      "epoch": 1.8487548828125e-05,
      "model_forward_time": 0.11620044708251953,
      "step": 3029
    },
    {
      "epoch": 1.8487548828125e-05,
      "step": 3029,
      "training_step_time": 0.4907844066619873
    },
    {
      "epoch": 1.849365234375e-05,
      "grad_norm": 0.47514164447784424,
      "learning_rate": 9.999993165095463e-05,
      "loss": 0.1168,
      "step": 3030
    },
    {
      "epoch": 1.849365234375e-05,
      "model_forward_time": 0.11552906036376953,
      "step": 3030
    },
    {
      "epoch": 1.849365234375e-05,
      "step": 3030,
      "training_step_time": 0.47647786140441895
    },
    {
      "epoch": 1.8499755859375e-05,
      "model_forward_time": 0.11563444137573242,
      "step": 3031
    },
    {
      "epoch": 1.8499755859375e-05,
      "step": 3031,
      "training_step_time": 0.41292905807495117
    },
    {
      "epoch": 1.8505859375e-05,
      "model_forward_time": 0.11708426475524902,
      "step": 3032
    },
    {
      "epoch": 1.8505859375e-05,
      "step": 3032,
      "training_step_time": 0.44313716888427734
    },
    {
      "epoch": 1.8511962890625e-05,
      "model_forward_time": 0.1159510612487793,
      "step": 3033
    },
    {
      "epoch": 1.8511962890625e-05,
      "step": 3033,
      "training_step_time": 0.47637009620666504
    },
    {
      "epoch": 1.851806640625e-05,
      "model_forward_time": 0.1154639720916748,
      "step": 3034
    },
    {
      "epoch": 1.851806640625e-05,
      "step": 3034,
      "training_step_time": 0.44059157371520996
    },
    {
      "epoch": 1.8524169921875e-05,
      "model_forward_time": 0.11553716659545898,
      "step": 3035
    },
    {
      "epoch": 1.8524169921875e-05,
      "step": 3035,
      "training_step_time": 0.38881611824035645
    },
    {
      "epoch": 1.85302734375e-05,
      "model_forward_time": 0.11696004867553711,
      "step": 3036
    },
    {
      "epoch": 1.85302734375e-05,
      "step": 3036,
      "training_step_time": 0.3881068229675293
    },
    {
      "epoch": 1.8536376953125e-05,
      "model_forward_time": 0.11658644676208496,
      "step": 3037
    },
    {
      "epoch": 1.8536376953125e-05,
      "step": 3037,
      "training_step_time": 0.39411091804504395
    },
    {
      "epoch": 1.854248046875e-05,
      "model_forward_time": 0.11655306816101074,
      "step": 3038
    },
    {
      "epoch": 1.854248046875e-05,
      "step": 3038,
      "training_step_time": 0.4558389186859131
    },
    {
      "epoch": 1.8548583984375e-05,
      "model_forward_time": 0.11573171615600586,
      "step": 3039
    },
    {
      "epoch": 1.8548583984375e-05,
      "step": 3039,
      "training_step_time": 0.4001936912536621
    },
    {
      "epoch": 1.85546875e-05,
      "grad_norm": 0.35208821296691895,
      "learning_rate": 9.999987849060753e-05,
      "loss": 0.1185,
      "step": 3040
    },
    {
      "epoch": 1.85546875e-05,
      "model_forward_time": 0.11561870574951172,
      "step": 3040
    },
    {
      "epoch": 1.85546875e-05,
      "step": 3040,
      "training_step_time": 0.40762901306152344
    },
    {
      "epoch": 1.8560791015625e-05,
      "model_forward_time": 0.11640071868896484,
      "step": 3041
    },
    {
      "epoch": 1.8560791015625e-05,
      "step": 3041,
      "training_step_time": 0.3941361904144287
    },
    {
      "epoch": 1.856689453125e-05,
      "model_forward_time": 0.11747217178344727,
      "step": 3042
    },
    {
      "epoch": 1.856689453125e-05,
      "step": 3042,
      "training_step_time": 0.40288448333740234
    },
    {
      "epoch": 1.8572998046875e-05,
      "model_forward_time": 0.11718916893005371,
      "step": 3043
    },
    {
      "epoch": 1.8572998046875e-05,
      "step": 3043,
      "training_step_time": 0.4168407917022705
    },
    {
      "epoch": 1.85791015625e-05,
      "model_forward_time": 0.11630892753601074,
      "step": 3044
    },
    {
      "epoch": 1.85791015625e-05,
      "step": 3044,
      "training_step_time": 0.4192991256713867
    },
    {
      "epoch": 1.8585205078125e-05,
      "model_forward_time": 0.1169428825378418,
      "step": 3045
    },
    {
      "epoch": 1.8585205078125e-05,
      "step": 3045,
      "training_step_time": 0.43099522590637207
    },
    {
      "epoch": 1.859130859375e-05,
      "model_forward_time": 0.11653780937194824,
      "step": 3046
    },
    {
      "epoch": 1.859130859375e-05,
      "step": 3046,
      "training_step_time": 0.3940882682800293
    },
    {
      "epoch": 1.8597412109375e-05,
      "model_forward_time": 0.11592912673950195,
      "step": 3047
    },
    {
      "epoch": 1.8597412109375e-05,
      "step": 3047,
      "training_step_time": 0.4637429714202881
    },
    {
      "epoch": 1.8603515625e-05,
      "model_forward_time": 0.11614203453063965,
      "step": 3048
    },
    {
      "epoch": 1.8603515625e-05,
      "step": 3048,
      "training_step_time": 0.4103281497955322
    },
    {
      "epoch": 1.8609619140625e-05,
      "model_forward_time": 0.11635828018188477,
      "step": 3049
    },
    {
      "epoch": 1.8609619140625e-05,
      "step": 3049,
      "training_step_time": 0.40186619758605957
    },
    {
      "epoch": 1.861572265625e-05,
      "grad_norm": 0.48997214436531067,
      "learning_rate": 9.999981014161752e-05,
      "loss": 0.1295,
      "step": 3050
    },
    {
      "epoch": 1.861572265625e-05,
      "model_forward_time": 0.11666727066040039,
      "step": 3050
    },
    {
      "epoch": 1.861572265625e-05,
      "step": 3050,
      "training_step_time": 0.40026044845581055
    },
    {
      "epoch": 1.8621826171875e-05,
      "model_forward_time": 0.11819720268249512,
      "step": 3051
    },
    {
      "epoch": 1.8621826171875e-05,
      "step": 3051,
      "training_step_time": 0.39644289016723633
    },
    {
      "epoch": 1.86279296875e-05,
      "model_forward_time": 0.12077689170837402,
      "step": 3052
    },
    {
      "epoch": 1.86279296875e-05,
      "step": 3052,
      "training_step_time": 0.38617444038391113
    },
    {
      "epoch": 1.8634033203125e-05,
      "model_forward_time": 0.11890268325805664,
      "step": 3053
    },
    {
      "epoch": 1.8634033203125e-05,
      "step": 3053,
      "training_step_time": 0.47720909118652344
    },
    {
      "epoch": 1.864013671875e-05,
      "model_forward_time": 0.11593985557556152,
      "step": 3054
    },
    {
      "epoch": 1.864013671875e-05,
      "step": 3054,
      "training_step_time": 0.4630725383758545
    },
    {
      "epoch": 1.8646240234375e-05,
      "model_forward_time": 0.1155710220336914,
      "step": 3055
    },
    {
      "epoch": 1.8646240234375e-05,
      "step": 3055,
      "training_step_time": 0.4123656749725342
    },
    {
      "epoch": 1.865234375e-05,
      "model_forward_time": 0.11574602127075195,
      "step": 3056
    },
    {
      "epoch": 1.865234375e-05,
      "step": 3056,
      "training_step_time": 0.3895249366760254
    },
    {
      "epoch": 1.8658447265625e-05,
      "model_forward_time": 0.11544299125671387,
      "step": 3057
    },
    {
      "epoch": 1.8658447265625e-05,
      "step": 3057,
      "training_step_time": 0.3700892925262451
    },
    {
      "epoch": 1.866455078125e-05,
      "model_forward_time": 0.11570549011230469,
      "step": 3058
    },
    {
      "epoch": 1.866455078125e-05,
      "step": 3058,
      "training_step_time": 0.43437814712524414
    },
    {
      "epoch": 1.8670654296875e-05,
      "model_forward_time": 0.1153709888458252,
      "step": 3059
    },
    {
      "epoch": 1.8670654296875e-05,
      "step": 3059,
      "training_step_time": 0.41912341117858887
    },
    {
      "epoch": 1.86767578125e-05,
      "grad_norm": 0.3747941553592682,
      "learning_rate": 9.999972660400536e-05,
      "loss": 0.1313,
      "step": 3060
    },
    {
      "epoch": 1.86767578125e-05,
      "model_forward_time": 0.1160888671875,
      "step": 3060
    },
    {
      "epoch": 1.86767578125e-05,
      "step": 3060,
      "training_step_time": 0.41640305519104004
    },
    {
      "epoch": 1.8682861328125e-05,
      "model_forward_time": 0.11557531356811523,
      "step": 3061
    },
    {
      "epoch": 1.8682861328125e-05,
      "step": 3061,
      "training_step_time": 0.384047269821167
    },
    {
      "epoch": 1.868896484375e-05,
      "model_forward_time": 0.11575055122375488,
      "step": 3062
    },
    {
      "epoch": 1.868896484375e-05,
      "step": 3062,
      "training_step_time": 0.47115111351013184
    },
    {
      "epoch": 1.8695068359375e-05,
      "model_forward_time": 0.11556363105773926,
      "step": 3063
    },
    {
      "epoch": 1.8695068359375e-05,
      "step": 3063,
      "training_step_time": 0.45372796058654785
    },
    {
      "epoch": 1.8701171875e-05,
      "model_forward_time": 0.11547136306762695,
      "step": 3064
    },
    {
      "epoch": 1.8701171875e-05,
      "step": 3064,
      "training_step_time": 0.3850395679473877
    },
    {
      "epoch": 1.8707275390625e-05,
      "model_forward_time": 0.11470770835876465,
      "step": 3065
    },
    {
      "epoch": 1.8707275390625e-05,
      "step": 3065,
      "training_step_time": 0.38945627212524414
    },
    {
      "epoch": 1.871337890625e-05,
      "model_forward_time": 0.11513972282409668,
      "step": 3066
    },
    {
      "epoch": 1.871337890625e-05,
      "step": 3066,
      "training_step_time": 0.3940927982330322
    },
    {
      "epoch": 1.8719482421875e-05,
      "model_forward_time": 0.11548852920532227,
      "step": 3067
    },
    {
      "epoch": 1.8719482421875e-05,
      "step": 3067,
      "training_step_time": 0.397305965423584
    },
    {
      "epoch": 1.87255859375e-05,
      "model_forward_time": 0.11545062065124512,
      "step": 3068
    },
    {
      "epoch": 1.87255859375e-05,
      "step": 3068,
      "training_step_time": 0.41202783584594727
    },
    {
      "epoch": 1.8731689453125e-05,
      "model_forward_time": 0.11560583114624023,
      "step": 3069
    },
    {
      "epoch": 1.8731689453125e-05,
      "step": 3069,
      "training_step_time": 0.4224724769592285
    },
    {
      "epoch": 1.873779296875e-05,
      "grad_norm": 0.39982640743255615,
      "learning_rate": 9.999962787779642e-05,
      "loss": 0.1334,
      "step": 3070
    },
    {
      "epoch": 1.873779296875e-05,
      "model_forward_time": 0.11452031135559082,
      "step": 3070
    },
    {
      "epoch": 1.873779296875e-05,
      "step": 3070,
      "training_step_time": 0.4973335266113281
    },
    {
      "epoch": 1.8743896484375e-05,
      "model_forward_time": 0.11685037612915039,
      "step": 3071
    },
    {
      "epoch": 1.8743896484375e-05,
      "step": 3071,
      "training_step_time": 0.4019472599029541
    },
    {
      "epoch": 1.875e-05,
      "model_forward_time": 0.11639022827148438,
      "step": 3072
    },
    {
      "epoch": 1.875e-05,
      "step": 3072,
      "training_step_time": 0.3995332717895508
    },
    {
      "epoch": 1.8756103515625e-05,
      "model_forward_time": 0.1151740550994873,
      "step": 3073
    },
    {
      "epoch": 1.8756103515625e-05,
      "step": 3073,
      "training_step_time": 0.3677859306335449
    },
    {
      "epoch": 1.876220703125e-05,
      "model_forward_time": 0.11588025093078613,
      "step": 3074
    },
    {
      "epoch": 1.876220703125e-05,
      "step": 3074,
      "training_step_time": 0.4873542785644531
    },
    {
      "epoch": 1.8768310546875e-05,
      "model_forward_time": 0.11598896980285645,
      "step": 3075
    },
    {
      "epoch": 1.8768310546875e-05,
      "step": 3075,
      "training_step_time": 0.4944586753845215
    },
    {
      "epoch": 1.87744140625e-05,
      "model_forward_time": 0.11571311950683594,
      "step": 3076
    },
    {
      "epoch": 1.87744140625e-05,
      "step": 3076,
      "training_step_time": 0.3978538513183594
    },
    {
      "epoch": 1.8780517578125e-05,
      "model_forward_time": 0.11456942558288574,
      "step": 3077
    },
    {
      "epoch": 1.8780517578125e-05,
      "step": 3077,
      "training_step_time": 0.44365668296813965
    },
    {
      "epoch": 1.878662109375e-05,
      "model_forward_time": 0.11569547653198242,
      "step": 3078
    },
    {
      "epoch": 1.878662109375e-05,
      "step": 3078,
      "training_step_time": 0.525399923324585
    },
    {
      "epoch": 1.8792724609375e-05,
      "model_forward_time": 0.11483931541442871,
      "step": 3079
    },
    {
      "epoch": 1.8792724609375e-05,
      "step": 3079,
      "training_step_time": 0.3909275531768799
    },
    {
      "epoch": 1.8798828125e-05,
      "grad_norm": 0.3584403395652771,
      "learning_rate": 9.999951396302069e-05,
      "loss": 0.127,
      "step": 3080
    },
    {
      "epoch": 1.8798828125e-05,
      "model_forward_time": 0.11538577079772949,
      "step": 3080
    },
    {
      "epoch": 1.8798828125e-05,
      "step": 3080,
      "training_step_time": 0.3945732116699219
    },
    {
      "epoch": 1.8804931640625e-05,
      "model_forward_time": 0.11585474014282227,
      "step": 3081
    },
    {
      "epoch": 1.8804931640625e-05,
      "step": 3081,
      "training_step_time": 0.38589954376220703
    },
    {
      "epoch": 1.881103515625e-05,
      "model_forward_time": 0.11543726921081543,
      "step": 3082
    },
    {
      "epoch": 1.881103515625e-05,
      "step": 3082,
      "training_step_time": 0.403667688369751
    },
    {
      "epoch": 1.8817138671875e-05,
      "model_forward_time": 0.11689352989196777,
      "step": 3083
    },
    {
      "epoch": 1.8817138671875e-05,
      "step": 3083,
      "training_step_time": 0.48206424713134766
    },
    {
      "epoch": 1.88232421875e-05,
      "model_forward_time": 0.11494255065917969,
      "step": 3084
    },
    {
      "epoch": 1.88232421875e-05,
      "step": 3084,
      "training_step_time": 0.5342190265655518
    },
    {
      "epoch": 1.8829345703125e-05,
      "model_forward_time": 0.11573314666748047,
      "step": 3085
    },
    {
      "epoch": 1.8829345703125e-05,
      "step": 3085,
      "training_step_time": 0.41310763359069824
    },
    {
      "epoch": 1.883544921875e-05,
      "model_forward_time": 0.11472868919372559,
      "step": 3086
    },
    {
      "epoch": 1.883544921875e-05,
      "step": 3086,
      "training_step_time": 0.40143752098083496
    },
    {
      "epoch": 1.8841552734375e-05,
      "model_forward_time": 0.11566352844238281,
      "step": 3087
    },
    {
      "epoch": 1.8841552734375e-05,
      "step": 3087,
      "training_step_time": 0.36905670166015625
    },
    {
      "epoch": 1.884765625e-05,
      "model_forward_time": 0.11541080474853516,
      "step": 3088
    },
    {
      "epoch": 1.884765625e-05,
      "step": 3088,
      "training_step_time": 0.4957139492034912
    },
    {
      "epoch": 1.8853759765625e-05,
      "model_forward_time": 0.11497235298156738,
      "step": 3089
    },
    {
      "epoch": 1.8853759765625e-05,
      "step": 3089,
      "training_step_time": 0.5029561519622803
    },
    {
      "epoch": 1.885986328125e-05,
      "grad_norm": 0.3387565314769745,
      "learning_rate": 9.999938485971279e-05,
      "loss": 0.1298,
      "step": 3090
    },
    {
      "epoch": 1.885986328125e-05,
      "model_forward_time": 0.11445808410644531,
      "step": 3090
    },
    {
      "epoch": 1.885986328125e-05,
      "step": 3090,
      "training_step_time": 0.4032912254333496
    },
    {
      "epoch": 1.8865966796875e-05,
      "model_forward_time": 0.11532855033874512,
      "step": 3091
    },
    {
      "epoch": 1.8865966796875e-05,
      "step": 3091,
      "training_step_time": 0.4039592742919922
    },
    {
      "epoch": 1.88720703125e-05,
      "model_forward_time": 0.11529207229614258,
      "step": 3092
    },
    {
      "epoch": 1.88720703125e-05,
      "step": 3092,
      "training_step_time": 0.3970479965209961
    },
    {
      "epoch": 1.8878173828125e-05,
      "model_forward_time": 0.11533284187316895,
      "step": 3093
    },
    {
      "epoch": 1.8878173828125e-05,
      "step": 3093,
      "training_step_time": 0.3995816707611084
    },
    {
      "epoch": 1.888427734375e-05,
      "model_forward_time": 0.11685061454772949,
      "step": 3094
    },
    {
      "epoch": 1.888427734375e-05,
      "step": 3094,
      "training_step_time": 0.39400506019592285
    },
    {
      "epoch": 1.8890380859375e-05,
      "model_forward_time": 0.11587834358215332,
      "step": 3095
    },
    {
      "epoch": 1.8890380859375e-05,
      "step": 3095,
      "training_step_time": 0.39570093154907227
    },
    {
      "epoch": 1.8896484375e-05,
      "model_forward_time": 0.11548113822937012,
      "step": 3096
    },
    {
      "epoch": 1.8896484375e-05,
      "step": 3096,
      "training_step_time": 0.3864893913269043
    },
    {
      "epoch": 1.8902587890625e-05,
      "model_forward_time": 0.11651062965393066,
      "step": 3097
    },
    {
      "epoch": 1.8902587890625e-05,
      "step": 3097,
      "training_step_time": 0.48229026794433594
    },
    {
      "epoch": 1.890869140625e-05,
      "model_forward_time": 0.11522173881530762,
      "step": 3098
    },
    {
      "epoch": 1.890869140625e-05,
      "step": 3098,
      "training_step_time": 0.42572569847106934
    },
    {
      "epoch": 1.8914794921875e-05,
      "model_forward_time": 0.11530137062072754,
      "step": 3099
    },
    {
      "epoch": 1.8914794921875e-05,
      "step": 3099,
      "training_step_time": 0.46134281158447266
    },
    {
      "epoch": 1.89208984375e-05,
      "grad_norm": 0.40571749210357666,
      "learning_rate": 9.999924056791192e-05,
      "loss": 0.1303,
      "step": 3100
    },
    {
      "epoch": 1.89208984375e-05,
      "model_forward_time": 0.11536192893981934,
      "step": 3100
    },
    {
      "epoch": 1.89208984375e-05,
      "step": 3100,
      "training_step_time": 0.44234418869018555
    },
    {
      "epoch": 1.8927001953125e-05,
      "model_forward_time": 0.1156461238861084,
      "step": 3101
    },
    {
      "epoch": 1.8927001953125e-05,
      "step": 3101,
      "training_step_time": 0.38055944442749023
    },
    {
      "epoch": 1.893310546875e-05,
      "model_forward_time": 0.11544537544250488,
      "step": 3102
    },
    {
      "epoch": 1.893310546875e-05,
      "step": 3102,
      "training_step_time": 0.39107751846313477
    },
    {
      "epoch": 1.8939208984375e-05,
      "model_forward_time": 0.1159060001373291,
      "step": 3103
    },
    {
      "epoch": 1.8939208984375e-05,
      "step": 3103,
      "training_step_time": 0.4631030559539795
    },
    {
      "epoch": 1.89453125e-05,
      "model_forward_time": 0.11530876159667969,
      "step": 3104
    },
    {
      "epoch": 1.89453125e-05,
      "step": 3104,
      "training_step_time": 0.4492509365081787
    },
    {
      "epoch": 1.8951416015625e-05,
      "model_forward_time": 0.11730480194091797,
      "step": 3105
    },
    {
      "epoch": 1.8951416015625e-05,
      "step": 3105,
      "training_step_time": 0.4149210453033447
    },
    {
      "epoch": 1.895751953125e-05,
      "model_forward_time": 0.11449551582336426,
      "step": 3106
    },
    {
      "epoch": 1.895751953125e-05,
      "step": 3106,
      "training_step_time": 0.4536139965057373
    },
    {
      "epoch": 1.8963623046875e-05,
      "model_forward_time": 0.1166377067565918,
      "step": 3107
    },
    {
      "epoch": 1.8963623046875e-05,
      "step": 3107,
      "training_step_time": 0.38239264488220215
    },
    {
      "epoch": 1.89697265625e-05,
      "model_forward_time": 0.11545372009277344,
      "step": 3108
    },
    {
      "epoch": 1.89697265625e-05,
      "step": 3108,
      "training_step_time": 0.3874201774597168
    },
    {
      "epoch": 1.8975830078125e-05,
      "model_forward_time": 0.11587667465209961,
      "step": 3109
    },
    {
      "epoch": 1.8975830078125e-05,
      "step": 3109,
      "training_step_time": 0.39160919189453125
    },
    {
      "epoch": 1.898193359375e-05,
      "grad_norm": 0.5898085832595825,
      "learning_rate": 9.999908108766191e-05,
      "loss": 0.1375,
      "step": 3110
    },
    {
      "epoch": 1.898193359375e-05,
      "model_forward_time": 0.1156306266784668,
      "step": 3110
    },
    {
      "epoch": 1.898193359375e-05,
      "step": 3110,
      "training_step_time": 0.3957531452178955
    },
    {
      "epoch": 1.8988037109375e-05,
      "model_forward_time": 0.1153721809387207,
      "step": 3111
    },
    {
      "epoch": 1.8988037109375e-05,
      "step": 3111,
      "training_step_time": 0.39482641220092773
    },
    {
      "epoch": 1.8994140625e-05,
      "model_forward_time": 0.11625862121582031,
      "step": 3112
    },
    {
      "epoch": 1.8994140625e-05,
      "step": 3112,
      "training_step_time": 0.48901987075805664
    },
    {
      "epoch": 1.9000244140625e-05,
      "model_forward_time": 0.11548709869384766,
      "step": 3113
    },
    {
      "epoch": 1.9000244140625e-05,
      "step": 3113,
      "training_step_time": 0.4403567314147949
    },
    {
      "epoch": 1.900634765625e-05,
      "model_forward_time": 0.11552309989929199,
      "step": 3114
    },
    {
      "epoch": 1.900634765625e-05,
      "step": 3114,
      "training_step_time": 0.41320347785949707
    },
    {
      "epoch": 1.9012451171875e-05,
      "model_forward_time": 0.11525988578796387,
      "step": 3115
    },
    {
      "epoch": 1.9012451171875e-05,
      "step": 3115,
      "training_step_time": 0.3957557678222656
    },
    {
      "epoch": 1.90185546875e-05,
      "model_forward_time": 0.11544680595397949,
      "step": 3116
    },
    {
      "epoch": 1.90185546875e-05,
      "step": 3116,
      "training_step_time": 0.3944535255432129
    },
    {
      "epoch": 1.9024658203125e-05,
      "model_forward_time": 0.1148078441619873,
      "step": 3117
    },
    {
      "epoch": 1.9024658203125e-05,
      "step": 3117,
      "training_step_time": 0.4031181335449219
    },
    {
      "epoch": 1.903076171875e-05,
      "model_forward_time": 0.1155388355255127,
      "step": 3118
    },
    {
      "epoch": 1.903076171875e-05,
      "step": 3118,
      "training_step_time": 0.4798707962036133
    },
    {
      "epoch": 1.9036865234375e-05,
      "model_forward_time": 0.11571311950683594,
      "step": 3119
    },
    {
      "epoch": 1.9036865234375e-05,
      "step": 3119,
      "training_step_time": 0.45881128311157227
    },
    {
      "epoch": 1.904296875e-05,
      "grad_norm": 0.3650141656398773,
      "learning_rate": 9.999890641901125e-05,
      "loss": 0.1265,
      "step": 3120
    },
    {
      "epoch": 1.904296875e-05,
      "model_forward_time": 0.11507177352905273,
      "step": 3120
    },
    {
      "epoch": 1.904296875e-05,
      "step": 3120,
      "training_step_time": 0.4721958637237549
    },
    {
      "epoch": 1.9049072265625e-05,
      "model_forward_time": 0.11526894569396973,
      "step": 3121
    },
    {
      "epoch": 1.9049072265625e-05,
      "step": 3121,
      "training_step_time": 0.4255485534667969
    },
    {
      "epoch": 1.905517578125e-05,
      "model_forward_time": 0.1173088550567627,
      "step": 3122
    },
    {
      "epoch": 1.905517578125e-05,
      "step": 3122,
      "training_step_time": 0.3958559036254883
    },
    {
      "epoch": 1.9061279296875e-05,
      "model_forward_time": 0.11981797218322754,
      "step": 3123
    },
    {
      "epoch": 1.9061279296875e-05,
      "step": 3123,
      "training_step_time": 0.39156174659729004
    },
    {
      "epoch": 1.90673828125e-05,
      "model_forward_time": 0.12033438682556152,
      "step": 3124
    },
    {
      "epoch": 1.90673828125e-05,
      "step": 3124,
      "training_step_time": 0.38104724884033203
    },
    {
      "epoch": 1.9073486328125e-05,
      "model_forward_time": 0.1163933277130127,
      "step": 3125
    },
    {
      "epoch": 1.9073486328125e-05,
      "step": 3125,
      "training_step_time": 0.40519213676452637
    },
    {
      "epoch": 1.907958984375e-05,
      "model_forward_time": 0.11543822288513184,
      "step": 3126
    },
    {
      "epoch": 1.907958984375e-05,
      "step": 3126,
      "training_step_time": 0.4450571537017822
    },
    {
      "epoch": 1.9085693359375e-05,
      "model_forward_time": 0.11567044258117676,
      "step": 3127
    },
    {
      "epoch": 1.9085693359375e-05,
      "step": 3127,
      "training_step_time": 0.4440629482269287
    },
    {
      "epoch": 1.9091796875e-05,
      "model_forward_time": 0.11545777320861816,
      "step": 3128
    },
    {
      "epoch": 1.9091796875e-05,
      "step": 3128,
      "training_step_time": 0.4479842185974121
    },
    {
      "epoch": 1.9097900390625e-05,
      "model_forward_time": 0.11571359634399414,
      "step": 3129
    },
    {
      "epoch": 1.9097900390625e-05,
      "step": 3129,
      "training_step_time": 0.3983910083770752
    },
    {
      "epoch": 1.910400390625e-05,
      "grad_norm": 0.4937800467014313,
      "learning_rate": 9.999871656201292e-05,
      "loss": 0.1335,
      "step": 3130
    },
    {
      "epoch": 1.910400390625e-05,
      "model_forward_time": 0.11461257934570312,
      "step": 3130
    },
    {
      "epoch": 1.910400390625e-05,
      "step": 3130,
      "training_step_time": 0.3916666507720947
    },
    {
      "epoch": 1.9110107421875e-05,
      "model_forward_time": 0.11553430557250977,
      "step": 3131
    },
    {
      "epoch": 1.9110107421875e-05,
      "step": 3131,
      "training_step_time": 0.36968469619750977
    },
    {
      "epoch": 1.91162109375e-05,
      "model_forward_time": 0.11566662788391113,
      "step": 3132
    },
    {
      "epoch": 1.91162109375e-05,
      "step": 3132,
      "training_step_time": 0.4354262351989746
    },
    {
      "epoch": 1.9122314453125e-05,
      "model_forward_time": 0.11597895622253418,
      "step": 3133
    },
    {
      "epoch": 1.9122314453125e-05,
      "step": 3133,
      "training_step_time": 0.39135003089904785
    },
    {
      "epoch": 1.912841796875e-05,
      "model_forward_time": 0.11630725860595703,
      "step": 3134
    },
    {
      "epoch": 1.912841796875e-05,
      "step": 3134,
      "training_step_time": 0.40018272399902344
    },
    {
      "epoch": 1.9134521484375e-05,
      "model_forward_time": 0.11504554748535156,
      "step": 3135
    },
    {
      "epoch": 1.9134521484375e-05,
      "step": 3135,
      "training_step_time": 0.43561553955078125
    },
    {
      "epoch": 1.9140625e-05,
      "model_forward_time": 0.11530756950378418,
      "step": 3136
    },
    {
      "epoch": 1.9140625e-05,
      "step": 3136,
      "training_step_time": 0.5058705806732178
    },
    {
      "epoch": 1.9146728515625e-05,
      "model_forward_time": 0.11496877670288086,
      "step": 3137
    },
    {
      "epoch": 1.9146728515625e-05,
      "step": 3137,
      "training_step_time": 0.3947906494140625
    },
    {
      "epoch": 1.915283203125e-05,
      "model_forward_time": 0.11695122718811035,
      "step": 3138
    },
    {
      "epoch": 1.915283203125e-05,
      "step": 3138,
      "training_step_time": 0.39412927627563477
    },
    {
      "epoch": 1.9158935546875e-05,
      "model_forward_time": 0.11746644973754883,
      "step": 3139
    },
    {
      "epoch": 1.9158935546875e-05,
      "step": 3139,
      "training_step_time": 0.40349364280700684
    },
    {
      "epoch": 1.91650390625e-05,
      "grad_norm": 0.47110259532928467,
      "learning_rate": 9.999851151672466e-05,
      "loss": 0.1368,
      "step": 3140
    },
    {
      "epoch": 1.91650390625e-05,
      "model_forward_time": 0.11548399925231934,
      "step": 3140
    },
    {
      "epoch": 1.91650390625e-05,
      "step": 3140,
      "training_step_time": 0.39733457565307617
    },
    {
      "epoch": 1.9171142578125e-05,
      "model_forward_time": 0.1154642105102539,
      "step": 3141
    },
    {
      "epoch": 1.9171142578125e-05,
      "step": 3141,
      "training_step_time": 0.3994588851928711
    },
    {
      "epoch": 1.917724609375e-05,
      "model_forward_time": 0.11583518981933594,
      "step": 3142
    },
    {
      "epoch": 1.917724609375e-05,
      "step": 3142,
      "training_step_time": 0.45316576957702637
    },
    {
      "epoch": 1.9183349609375e-05,
      "model_forward_time": 0.11567163467407227,
      "step": 3143
    },
    {
      "epoch": 1.9183349609375e-05,
      "step": 3143,
      "training_step_time": 0.44733285903930664
    },
    {
      "epoch": 1.9189453125e-05,
      "model_forward_time": 0.11557197570800781,
      "step": 3144
    },
    {
      "epoch": 1.9189453125e-05,
      "step": 3144,
      "training_step_time": 0.4021728038787842
    },
    {
      "epoch": 1.9195556640625e-05,
      "model_forward_time": 0.11552071571350098,
      "step": 3145
    },
    {
      "epoch": 1.9195556640625e-05,
      "step": 3145,
      "training_step_time": 0.398576021194458
    },
    {
      "epoch": 1.920166015625e-05,
      "model_forward_time": 0.11623215675354004,
      "step": 3146
    },
    {
      "epoch": 1.920166015625e-05,
      "step": 3146,
      "training_step_time": 0.3728320598602295
    },
    {
      "epoch": 1.9207763671875e-05,
      "model_forward_time": 0.11522936820983887,
      "step": 3147
    },
    {
      "epoch": 1.9207763671875e-05,
      "step": 3147,
      "training_step_time": 0.4705328941345215
    },
    {
      "epoch": 1.92138671875e-05,
      "model_forward_time": 0.1153261661529541,
      "step": 3148
    },
    {
      "epoch": 1.92138671875e-05,
      "step": 3148,
      "training_step_time": 0.45723962783813477
    },
    {
      "epoch": 1.9219970703125e-05,
      "model_forward_time": 0.1158759593963623,
      "step": 3149
    },
    {
      "epoch": 1.9219970703125e-05,
      "step": 3149,
      "training_step_time": 0.47935938835144043
    },
    {
      "epoch": 1.922607421875e-05,
      "grad_norm": 0.655949592590332,
      "learning_rate": 9.999829128320874e-05,
      "loss": 0.1436,
      "step": 3150
    },
    {
      "epoch": 1.922607421875e-05,
      "model_forward_time": 0.11481142044067383,
      "step": 3150
    },
    {
      "epoch": 1.922607421875e-05,
      "step": 3150,
      "training_step_time": 0.4678776264190674
    },
    {
      "epoch": 1.9232177734375e-05,
      "model_forward_time": 0.11548781394958496,
      "step": 3151
    },
    {
      "epoch": 1.9232177734375e-05,
      "step": 3151,
      "training_step_time": 0.4102447032928467
    },
    {
      "epoch": 1.923828125e-05,
      "model_forward_time": 0.11537361145019531,
      "step": 3152
    },
    {
      "epoch": 1.923828125e-05,
      "step": 3152,
      "training_step_time": 0.39245104789733887
    },
    {
      "epoch": 1.9244384765625e-05,
      "model_forward_time": 0.11660122871398926,
      "step": 3153
    },
    {
      "epoch": 1.9244384765625e-05,
      "step": 3153,
      "training_step_time": 0.4242284297943115
    },
    {
      "epoch": 1.925048828125e-05,
      "model_forward_time": 0.11533856391906738,
      "step": 3154
    },
    {
      "epoch": 1.925048828125e-05,
      "step": 3154,
      "training_step_time": 0.39292287826538086
    },
    {
      "epoch": 1.9256591796875e-05,
      "model_forward_time": 0.11590743064880371,
      "step": 3155
    },
    {
      "epoch": 1.9256591796875e-05,
      "step": 3155,
      "training_step_time": 0.4774143695831299
    },
    {
      "epoch": 1.92626953125e-05,
      "model_forward_time": 0.11616230010986328,
      "step": 3156
    },
    {
      "epoch": 1.92626953125e-05,
      "step": 3156,
      "training_step_time": 0.49386048316955566
    },
    {
      "epoch": 1.9268798828125e-05,
      "model_forward_time": 0.1147608757019043,
      "step": 3157
    },
    {
      "epoch": 1.9268798828125e-05,
      "step": 3157,
      "training_step_time": 0.4010274410247803
    },
    {
      "epoch": 1.927490234375e-05,
      "model_forward_time": 0.11574935913085938,
      "step": 3158
    },
    {
      "epoch": 1.927490234375e-05,
      "step": 3158,
      "training_step_time": 0.4008297920227051
    },
    {
      "epoch": 1.9281005859375e-05,
      "model_forward_time": 0.11602306365966797,
      "step": 3159
    },
    {
      "epoch": 1.9281005859375e-05,
      "step": 3159,
      "training_step_time": 0.3824465274810791
    },
    {
      "epoch": 1.9287109375e-05,
      "grad_norm": 0.6035019755363464,
      "learning_rate": 9.999805586153205e-05,
      "loss": 0.1449,
      "step": 3160
    },
    {
      "epoch": 1.9287109375e-05,
      "model_forward_time": 0.11595988273620605,
      "step": 3160
    },
    {
      "epoch": 1.9287109375e-05,
      "step": 3160,
      "training_step_time": 0.38953590393066406
    },
    {
      "epoch": 1.9293212890625e-05,
      "model_forward_time": 0.11618399620056152,
      "step": 3161
    },
    {
      "epoch": 1.9293212890625e-05,
      "step": 3161,
      "training_step_time": 0.4184260368347168
    },
    {
      "epoch": 1.929931640625e-05,
      "model_forward_time": 0.11635017395019531,
      "step": 3162
    },
    {
      "epoch": 1.929931640625e-05,
      "step": 3162,
      "training_step_time": 0.445603609085083
    },
    {
      "epoch": 1.9305419921875e-05,
      "model_forward_time": 0.11632585525512695,
      "step": 3163
    },
    {
      "epoch": 1.9305419921875e-05,
      "step": 3163,
      "training_step_time": 0.5107176303863525
    },
    {
      "epoch": 1.93115234375e-05,
      "model_forward_time": 0.11590099334716797,
      "step": 3164
    },
    {
      "epoch": 1.93115234375e-05,
      "step": 3164,
      "training_step_time": 0.4397563934326172
    },
    {
      "epoch": 1.9317626953125e-05,
      "model_forward_time": 0.11495065689086914,
      "step": 3165
    },
    {
      "epoch": 1.9317626953125e-05,
      "step": 3165,
      "training_step_time": 0.39283132553100586
    },
    {
      "epoch": 1.932373046875e-05,
      "model_forward_time": 0.11556291580200195,
      "step": 3166
    },
    {
      "epoch": 1.932373046875e-05,
      "step": 3166,
      "training_step_time": 0.42684316635131836
    },
    {
      "epoch": 1.9329833984375e-05,
      "model_forward_time": 0.11500811576843262,
      "step": 3167
    },
    {
      "epoch": 1.9329833984375e-05,
      "step": 3167,
      "training_step_time": 0.41171813011169434
    },
    {
      "epoch": 1.93359375e-05,
      "model_forward_time": 0.11536717414855957,
      "step": 3168
    },
    {
      "epoch": 1.93359375e-05,
      "step": 3168,
      "training_step_time": 0.3970012664794922
    },
    {
      "epoch": 1.9342041015625e-05,
      "model_forward_time": 0.11574578285217285,
      "step": 3169
    },
    {
      "epoch": 1.9342041015625e-05,
      "step": 3169,
      "training_step_time": 0.4070422649383545
    },
    {
      "epoch": 1.934814453125e-05,
      "grad_norm": 0.301326721906662,
      "learning_rate": 9.99978052517661e-05,
      "loss": 0.1366,
      "step": 3170
    },
    {
      "epoch": 1.934814453125e-05,
      "model_forward_time": 0.11565256118774414,
      "step": 3170
    },
    {
      "epoch": 1.934814453125e-05,
      "step": 3170,
      "training_step_time": 0.46495985984802246
    },
    {
      "epoch": 1.9354248046875e-05,
      "model_forward_time": 0.11542987823486328,
      "step": 3171
    },
    {
      "epoch": 1.9354248046875e-05,
      "step": 3171,
      "training_step_time": 0.44991278648376465
    },
    {
      "epoch": 1.93603515625e-05,
      "model_forward_time": 0.11571669578552246,
      "step": 3172
    },
    {
      "epoch": 1.93603515625e-05,
      "step": 3172,
      "training_step_time": 0.3933100700378418
    },
    {
      "epoch": 1.9366455078125e-05,
      "model_forward_time": 0.11538100242614746,
      "step": 3173
    },
    {
      "epoch": 1.9366455078125e-05,
      "step": 3173,
      "training_step_time": 0.38616347312927246
    },
    {
      "epoch": 1.937255859375e-05,
      "model_forward_time": 0.11695456504821777,
      "step": 3174
    },
    {
      "epoch": 1.937255859375e-05,
      "step": 3174,
      "training_step_time": 0.39479875564575195
    },
    {
      "epoch": 1.9378662109375e-05,
      "model_forward_time": 0.11535978317260742,
      "step": 3175
    },
    {
      "epoch": 1.9378662109375e-05,
      "step": 3175,
      "training_step_time": 0.3705141544342041
    },
    {
      "epoch": 1.9384765625e-05,
      "model_forward_time": 0.11519217491149902,
      "step": 3176
    },
    {
      "epoch": 1.9384765625e-05,
      "step": 3176,
      "training_step_time": 0.4800865650177002
    },
    {
      "epoch": 1.9390869140625e-05,
      "model_forward_time": 0.1159970760345459,
      "step": 3177
    },
    {
      "epoch": 1.9390869140625e-05,
      "step": 3177,
      "training_step_time": 0.45584678649902344
    },
    {
      "epoch": 1.939697265625e-05,
      "model_forward_time": 0.11542701721191406,
      "step": 3178
    },
    {
      "epoch": 1.939697265625e-05,
      "step": 3178,
      "training_step_time": 0.4724695682525635
    },
    {
      "epoch": 1.9403076171875e-05,
      "model_forward_time": 0.1169123649597168,
      "step": 3179
    },
    {
      "epoch": 1.9403076171875e-05,
      "step": 3179,
      "training_step_time": 0.4854714870452881
    },
    {
      "epoch": 1.94091796875e-05,
      "grad_norm": 0.43525663018226624,
      "learning_rate": 9.999753945398704e-05,
      "loss": 0.1329,
      "step": 3180
    },
    {
      "epoch": 1.94091796875e-05,
      "model_forward_time": 0.11501550674438477,
      "step": 3180
    },
    {
      "epoch": 1.94091796875e-05,
      "step": 3180,
      "training_step_time": 0.39350032806396484
    },
    {
      "epoch": 1.9415283203125e-05,
      "model_forward_time": 0.11509251594543457,
      "step": 3181
    },
    {
      "epoch": 1.9415283203125e-05,
      "step": 3181,
      "training_step_time": 0.4042785167694092
    },
    {
      "epoch": 1.942138671875e-05,
      "model_forward_time": 0.11472630500793457,
      "step": 3182
    },
    {
      "epoch": 1.942138671875e-05,
      "step": 3182,
      "training_step_time": 0.39275145530700684
    },
    {
      "epoch": 1.9427490234375e-05,
      "model_forward_time": 0.11548352241516113,
      "step": 3183
    },
    {
      "epoch": 1.9427490234375e-05,
      "step": 3183,
      "training_step_time": 0.40822649002075195
    },
    {
      "epoch": 1.943359375e-05,
      "model_forward_time": 0.11514902114868164,
      "step": 3184
    },
    {
      "epoch": 1.943359375e-05,
      "step": 3184,
      "training_step_time": 0.4444270133972168
    },
    {
      "epoch": 1.9439697265625e-05,
      "model_forward_time": 0.11526346206665039,
      "step": 3185
    },
    {
      "epoch": 1.9439697265625e-05,
      "step": 3185,
      "training_step_time": 0.48259687423706055
    },
    {
      "epoch": 1.944580078125e-05,
      "model_forward_time": 0.11561417579650879,
      "step": 3186
    },
    {
      "epoch": 1.944580078125e-05,
      "step": 3186,
      "training_step_time": 0.5314106941223145
    },
    {
      "epoch": 1.9451904296875e-05,
      "model_forward_time": 0.11508393287658691,
      "step": 3187
    },
    {
      "epoch": 1.9451904296875e-05,
      "step": 3187,
      "training_step_time": 0.3913583755493164
    },
    {
      "epoch": 1.94580078125e-05,
      "model_forward_time": 0.11490559577941895,
      "step": 3188
    },
    {
      "epoch": 1.94580078125e-05,
      "step": 3188,
      "training_step_time": 0.3975954055786133
    },
    {
      "epoch": 1.9464111328125e-05,
      "model_forward_time": 0.11547708511352539,
      "step": 3189
    },
    {
      "epoch": 1.9464111328125e-05,
      "step": 3189,
      "training_step_time": 0.3701162338256836
    },
    {
      "epoch": 1.947021484375e-05,
      "grad_norm": 0.35275959968566895,
      "learning_rate": 9.999725846827562e-05,
      "loss": 0.1375,
      "step": 3190
    },
    {
      "epoch": 1.947021484375e-05,
      "model_forward_time": 0.11498379707336426,
      "step": 3190
    },
    {
      "epoch": 1.947021484375e-05,
      "step": 3190,
      "training_step_time": 0.4246394634246826
    },
    {
      "epoch": 1.9476318359375e-05,
      "model_forward_time": 0.11555194854736328,
      "step": 3191
    },
    {
      "epoch": 1.9476318359375e-05,
      "step": 3191,
      "training_step_time": 0.4720497131347656
    },
    {
      "epoch": 1.9482421875e-05,
      "model_forward_time": 0.11680340766906738,
      "step": 3192
    },
    {
      "epoch": 1.9482421875e-05,
      "step": 3192,
      "training_step_time": 0.4279041290283203
    },
    {
      "epoch": 1.9488525390625e-05,
      "model_forward_time": 0.11538195610046387,
      "step": 3193
    },
    {
      "epoch": 1.9488525390625e-05,
      "step": 3193,
      "training_step_time": 0.44573354721069336
    },
    {
      "epoch": 1.949462890625e-05,
      "model_forward_time": 0.11465764045715332,
      "step": 3194
    },
    {
      "epoch": 1.949462890625e-05,
      "step": 3194,
      "training_step_time": 0.39505720138549805
    },
    {
      "epoch": 1.9500732421875e-05,
      "model_forward_time": 0.11547136306762695,
      "step": 3195
    },
    {
      "epoch": 1.9500732421875e-05,
      "step": 3195,
      "training_step_time": 0.3914034366607666
    },
    {
      "epoch": 1.95068359375e-05,
      "model_forward_time": 0.11548233032226562,
      "step": 3196
    },
    {
      "epoch": 1.95068359375e-05,
      "step": 3196,
      "training_step_time": 0.39209866523742676
    },
    {
      "epoch": 1.9512939453125e-05,
      "model_forward_time": 0.11544632911682129,
      "step": 3197
    },
    {
      "epoch": 1.9512939453125e-05,
      "step": 3197,
      "training_step_time": 0.39629077911376953
    },
    {
      "epoch": 1.951904296875e-05,
      "model_forward_time": 0.11536002159118652,
      "step": 3198
    },
    {
      "epoch": 1.951904296875e-05,
      "step": 3198,
      "training_step_time": 0.401186466217041
    },
    {
      "epoch": 1.9525146484375e-05,
      "model_forward_time": 0.11576080322265625,
      "step": 3199
    },
    {
      "epoch": 1.9525146484375e-05,
      "step": 3199,
      "training_step_time": 0.4611330032348633
    },
    {
      "epoch": 1.953125e-05,
      "grad_norm": 0.31576210260391235,
      "learning_rate": 9.999696229471716e-05,
      "loss": 0.1396,
      "step": 3200
    },
    {
      "epoch": 1.953125e-05,
      "model_forward_time": 0.11528706550598145,
      "step": 3200
    },
    {
      "epoch": 1.953125e-05,
      "step": 3200,
      "training_step_time": 0.44339680671691895
    },
    {
      "epoch": 1.9537353515625e-05,
      "model_forward_time": 0.11523175239562988,
      "step": 3201
    },
    {
      "epoch": 1.9537353515625e-05,
      "step": 3201,
      "training_step_time": 0.4407501220703125
    },
    {
      "epoch": 1.954345703125e-05,
      "model_forward_time": 0.11517000198364258,
      "step": 3202
    },
    {
      "epoch": 1.954345703125e-05,
      "step": 3202,
      "training_step_time": 0.3961036205291748
    },
    {
      "epoch": 1.9549560546875e-05,
      "model_forward_time": 0.11527013778686523,
      "step": 3203
    },
    {
      "epoch": 1.9549560546875e-05,
      "step": 3203,
      "training_step_time": 0.3912672996520996
    },
    {
      "epoch": 1.95556640625e-05,
      "model_forward_time": 0.11499977111816406,
      "step": 3204
    },
    {
      "epoch": 1.95556640625e-05,
      "step": 3204,
      "training_step_time": 0.36847996711730957
    },
    {
      "epoch": 1.9561767578125e-05,
      "model_forward_time": 0.11541104316711426,
      "step": 3205
    },
    {
      "epoch": 1.9561767578125e-05,
      "step": 3205,
      "training_step_time": 0.478069543838501
    },
    {
      "epoch": 1.956787109375e-05,
      "model_forward_time": 0.11580681800842285,
      "step": 3206
    },
    {
      "epoch": 1.956787109375e-05,
      "step": 3206,
      "training_step_time": 0.5023922920227051
    },
    {
      "epoch": 1.9573974609375e-05,
      "model_forward_time": 0.11438393592834473,
      "step": 3207
    },
    {
      "epoch": 1.9573974609375e-05,
      "step": 3207,
      "training_step_time": 0.45595383644104004
    },
    {
      "epoch": 1.9580078125e-05,
      "model_forward_time": 0.11524391174316406,
      "step": 3208
    },
    {
      "epoch": 1.9580078125e-05,
      "step": 3208,
      "training_step_time": 0.3956897258758545
    },
    {
      "epoch": 1.9586181640625e-05,
      "model_forward_time": 0.1148216724395752,
      "step": 3209
    },
    {
      "epoch": 1.9586181640625e-05,
      "step": 3209,
      "training_step_time": 0.39198946952819824
    },
    {
      "epoch": 1.959228515625e-05,
      "grad_norm": 0.34488123655319214,
      "learning_rate": 9.999665093340165e-05,
      "loss": 0.1305,
      "step": 3210
    },
    {
      "epoch": 1.959228515625e-05,
      "model_forward_time": 0.1160893440246582,
      "step": 3210
    },
    {
      "epoch": 1.959228515625e-05,
      "step": 3210,
      "training_step_time": 0.3995988368988037
    },
    {
      "epoch": 1.9598388671875e-05,
      "model_forward_time": 0.11520814895629883,
      "step": 3211
    },
    {
      "epoch": 1.9598388671875e-05,
      "step": 3211,
      "training_step_time": 0.39791107177734375
    },
    {
      "epoch": 1.96044921875e-05,
      "model_forward_time": 0.11564111709594727,
      "step": 3212
    },
    {
      "epoch": 1.96044921875e-05,
      "step": 3212,
      "training_step_time": 0.39317846298217773
    },
    {
      "epoch": 1.9610595703125e-05,
      "model_forward_time": 0.1153116226196289,
      "step": 3213
    },
    {
      "epoch": 1.9610595703125e-05,
      "step": 3213,
      "training_step_time": 0.3899376392364502
    },
    {
      "epoch": 1.961669921875e-05,
      "model_forward_time": 0.11618256568908691,
      "step": 3214
    },
    {
      "epoch": 1.961669921875e-05,
      "step": 3214,
      "training_step_time": 0.39061570167541504
    },
    {
      "epoch": 1.9622802734375e-05,
      "model_forward_time": 0.13776159286499023,
      "step": 3215
    },
    {
      "epoch": 1.9622802734375e-05,
      "step": 3215,
      "training_step_time": 0.4506964683532715
    },
    {
      "epoch": 1.962890625e-05,
      "model_forward_time": 0.11518001556396484,
      "step": 3216
    },
    {
      "epoch": 1.962890625e-05,
      "step": 3216,
      "training_step_time": 0.482677698135376
    },
    {
      "epoch": 1.9635009765625e-05,
      "model_forward_time": 0.11525869369506836,
      "step": 3217
    },
    {
      "epoch": 1.9635009765625e-05,
      "step": 3217,
      "training_step_time": 0.39673328399658203
    },
    {
      "epoch": 1.964111328125e-05,
      "model_forward_time": 0.11510562896728516,
      "step": 3218
    },
    {
      "epoch": 1.964111328125e-05,
      "step": 3218,
      "training_step_time": 0.3888697624206543
    },
    {
      "epoch": 1.9647216796875e-05,
      "model_forward_time": 0.1157369613647461,
      "step": 3219
    },
    {
      "epoch": 1.9647216796875e-05,
      "step": 3219,
      "training_step_time": 0.4100513458251953
    },
    {
      "epoch": 1.96533203125e-05,
      "grad_norm": 0.4240483045578003,
      "learning_rate": 9.999632438442367e-05,
      "loss": 0.126,
      "step": 3220
    },
    {
      "epoch": 1.96533203125e-05,
      "model_forward_time": 0.11409997940063477,
      "step": 3220
    },
    {
      "epoch": 1.96533203125e-05,
      "step": 3220,
      "training_step_time": 0.47733235359191895
    },
    {
      "epoch": 1.9659423828125e-05,
      "model_forward_time": 0.1155552864074707,
      "step": 3221
    },
    {
      "epoch": 1.9659423828125e-05,
      "step": 3221,
      "training_step_time": 0.5122325420379639
    },
    {
      "epoch": 1.966552734375e-05,
      "model_forward_time": 0.11487245559692383,
      "step": 3222
    },
    {
      "epoch": 1.966552734375e-05,
      "step": 3222,
      "training_step_time": 0.5099046230316162
    },
    {
      "epoch": 1.9671630859375e-05,
      "model_forward_time": 0.11456084251403809,
      "step": 3223
    },
    {
      "epoch": 1.9671630859375e-05,
      "step": 3223,
      "training_step_time": 0.4020099639892578
    },
    {
      "epoch": 1.9677734375e-05,
      "model_forward_time": 0.11468982696533203,
      "step": 3224
    },
    {
      "epoch": 1.9677734375e-05,
      "step": 3224,
      "training_step_time": 0.39495038986206055
    },
    {
      "epoch": 1.9683837890625e-05,
      "model_forward_time": 0.11531400680541992,
      "step": 3225
    },
    {
      "epoch": 1.9683837890625e-05,
      "step": 3225,
      "training_step_time": 0.39069104194641113
    },
    {
      "epoch": 1.968994140625e-05,
      "model_forward_time": 0.11498141288757324,
      "step": 3226
    },
    {
      "epoch": 1.968994140625e-05,
      "step": 3226,
      "training_step_time": 0.3845858573913574
    },
    {
      "epoch": 1.9696044921875e-05,
      "model_forward_time": 0.11676311492919922,
      "step": 3227
    },
    {
      "epoch": 1.9696044921875e-05,
      "step": 3227,
      "training_step_time": 0.39420557022094727
    },
    {
      "epoch": 1.97021484375e-05,
      "model_forward_time": 0.11535310745239258,
      "step": 3228
    },
    {
      "epoch": 1.97021484375e-05,
      "step": 3228,
      "training_step_time": 0.5143187046051025
    },
    {
      "epoch": 1.9708251953125e-05,
      "model_forward_time": 0.1151585578918457,
      "step": 3229
    },
    {
      "epoch": 1.9708251953125e-05,
      "step": 3229,
      "training_step_time": 0.47112298011779785
    },
    {
      "epoch": 1.971435546875e-05,
      "grad_norm": 0.4277900755405426,
      "learning_rate": 9.999598264788241e-05,
      "loss": 0.1319,
      "step": 3230
    },
    {
      "epoch": 1.971435546875e-05,
      "model_forward_time": 0.11507034301757812,
      "step": 3230
    },
    {
      "epoch": 1.971435546875e-05,
      "step": 3230,
      "training_step_time": 0.4100651741027832
    },
    {
      "epoch": 1.9720458984375e-05,
      "model_forward_time": 0.1149752140045166,
      "step": 3231
    },
    {
      "epoch": 1.9720458984375e-05,
      "step": 3231,
      "training_step_time": 0.38657593727111816
    },
    {
      "epoch": 1.97265625e-05,
      "model_forward_time": 0.1164543628692627,
      "step": 3232
    },
    {
      "epoch": 1.97265625e-05,
      "step": 3232,
      "training_step_time": 0.38765716552734375
    },
    {
      "epoch": 1.9732666015625e-05,
      "model_forward_time": 0.1151590347290039,
      "step": 3233
    },
    {
      "epoch": 1.9732666015625e-05,
      "step": 3233,
      "training_step_time": 0.40390920639038086
    },
    {
      "epoch": 1.973876953125e-05,
      "model_forward_time": 0.11541366577148438,
      "step": 3234
    },
    {
      "epoch": 1.973876953125e-05,
      "step": 3234,
      "training_step_time": 0.5980324745178223
    },
    {
      "epoch": 1.9744873046875e-05,
      "model_forward_time": 0.11438941955566406,
      "step": 3235
    },
    {
      "epoch": 1.9744873046875e-05,
      "step": 3235,
      "training_step_time": 0.4262733459472656
    },
    {
      "epoch": 1.97509765625e-05,
      "model_forward_time": 0.1170194149017334,
      "step": 3236
    },
    {
      "epoch": 1.97509765625e-05,
      "step": 3236,
      "training_step_time": 0.4184131622314453
    },
    {
      "epoch": 1.9757080078125e-05,
      "model_forward_time": 0.11532402038574219,
      "step": 3237
    },
    {
      "epoch": 1.9757080078125e-05,
      "step": 3237,
      "training_step_time": 0.3933532238006592
    },
    {
      "epoch": 1.976318359375e-05,
      "model_forward_time": 0.11501836776733398,
      "step": 3238
    },
    {
      "epoch": 1.976318359375e-05,
      "step": 3238,
      "training_step_time": 0.3974635601043701
    },
    {
      "epoch": 1.9769287109375e-05,
      "model_forward_time": 0.11542272567749023,
      "step": 3239
    },
    {
      "epoch": 1.9769287109375e-05,
      "step": 3239,
      "training_step_time": 0.39320898056030273
    },
    {
      "epoch": 1.9775390625e-05,
      "grad_norm": 0.3508949279785156,
      "learning_rate": 9.99956257238817e-05,
      "loss": 0.1239,
      "step": 3240
    },
    {
      "epoch": 1.9775390625e-05,
      "model_forward_time": 0.11455225944519043,
      "step": 3240
    },
    {
      "epoch": 1.9775390625e-05,
      "step": 3240,
      "training_step_time": 0.6563575267791748
    },
    {
      "epoch": 1.9781494140625e-05,
      "model_forward_time": 0.11487555503845215,
      "step": 3241
    },
    {
      "epoch": 1.9781494140625e-05,
      "step": 3241,
      "training_step_time": 0.4068584442138672
    },
    {
      "epoch": 1.978759765625e-05,
      "model_forward_time": 0.11447525024414062,
      "step": 3242
    },
    {
      "epoch": 1.978759765625e-05,
      "step": 3242,
      "training_step_time": 0.4015359878540039
    },
    {
      "epoch": 1.9793701171875e-05,
      "model_forward_time": 0.1152198314666748,
      "step": 3243
    },
    {
      "epoch": 1.9793701171875e-05,
      "step": 3243,
      "training_step_time": 0.481065034866333
    },
    {
      "epoch": 1.97998046875e-05,
      "model_forward_time": 0.11459803581237793,
      "step": 3244
    },
    {
      "epoch": 1.97998046875e-05,
      "step": 3244,
      "training_step_time": 0.4968986511230469
    },
    {
      "epoch": 1.9805908203125e-05,
      "model_forward_time": 0.11452579498291016,
      "step": 3245
    },
    {
      "epoch": 1.9805908203125e-05,
      "step": 3245,
      "training_step_time": 0.3937814235687256
    },
    {
      "epoch": 1.981201171875e-05,
      "model_forward_time": 0.1148984432220459,
      "step": 3246
    },
    {
      "epoch": 1.981201171875e-05,
      "step": 3246,
      "training_step_time": 0.4810786247253418
    },
    {
      "epoch": 1.9818115234375e-05,
      "model_forward_time": 0.11531400680541992,
      "step": 3247
    },
    {
      "epoch": 1.9818115234375e-05,
      "step": 3247,
      "training_step_time": 0.4130895137786865
    },
    {
      "epoch": 1.982421875e-05,
      "model_forward_time": 0.11414480209350586,
      "step": 3248
    },
    {
      "epoch": 1.982421875e-05,
      "step": 3248,
      "training_step_time": 0.45717835426330566
    },
    {
      "epoch": 1.9830322265625e-05,
      "model_forward_time": 0.11441349983215332,
      "step": 3249
    },
    {
      "epoch": 1.9830322265625e-05,
      "step": 3249,
      "training_step_time": 0.48316359519958496
    },
    {
      "epoch": 1.983642578125e-05,
      "grad_norm": 0.5045982599258423,
      "learning_rate": 9.999525361252996e-05,
      "loss": 0.1224,
      "step": 3250
    },
    {
      "epoch": 1.983642578125e-05,
      "model_forward_time": 0.11447763442993164,
      "step": 3250
    },
    {
      "epoch": 1.983642578125e-05,
      "step": 3250,
      "training_step_time": 0.40717411041259766
    },
    {
      "epoch": 1.9842529296875e-05,
      "model_forward_time": 0.11455917358398438,
      "step": 3251
    },
    {
      "epoch": 1.9842529296875e-05,
      "step": 3251,
      "training_step_time": 0.402524471282959
    },
    {
      "epoch": 1.98486328125e-05,
      "model_forward_time": 0.11520552635192871,
      "step": 3252
    },
    {
      "epoch": 1.98486328125e-05,
      "step": 3252,
      "training_step_time": 0.40528202056884766
    },
    {
      "epoch": 1.9854736328125e-05,
      "model_forward_time": 0.11492633819580078,
      "step": 3253
    },
    {
      "epoch": 1.9854736328125e-05,
      "step": 3253,
      "training_step_time": 0.3878746032714844
    },
    {
      "epoch": 1.986083984375e-05,
      "model_forward_time": 0.11516523361206055,
      "step": 3254
    },
    {
      "epoch": 1.986083984375e-05,
      "step": 3254,
      "training_step_time": 0.3923604488372803
    },
    {
      "epoch": 1.9866943359375e-05,
      "model_forward_time": 0.11489343643188477,
      "step": 3255
    },
    {
      "epoch": 1.9866943359375e-05,
      "step": 3255,
      "training_step_time": 0.3995945453643799
    },
    {
      "epoch": 1.9873046875e-05,
      "model_forward_time": 0.11575865745544434,
      "step": 3256
    },
    {
      "epoch": 1.9873046875e-05,
      "step": 3256,
      "training_step_time": 0.39702391624450684
    },
    {
      "epoch": 1.9879150390625e-05,
      "model_forward_time": 0.11542057991027832,
      "step": 3257
    },
    {
      "epoch": 1.9879150390625e-05,
      "step": 3257,
      "training_step_time": 0.470029354095459
    },
    {
      "epoch": 1.988525390625e-05,
      "model_forward_time": 0.11593127250671387,
      "step": 3258
    },
    {
      "epoch": 1.988525390625e-05,
      "step": 3258,
      "training_step_time": 0.5381247997283936
    },
    {
      "epoch": 1.9891357421875e-05,
      "model_forward_time": 0.11500048637390137,
      "step": 3259
    },
    {
      "epoch": 1.9891357421875e-05,
      "step": 3259,
      "training_step_time": 0.46756887435913086
    },
    {
      "epoch": 1.98974609375e-05,
      "grad_norm": 0.41363024711608887,
      "learning_rate": 9.999486631394021e-05,
      "loss": 0.1346,
      "step": 3260
    },
    {
      "epoch": 1.98974609375e-05,
      "model_forward_time": 0.11457538604736328,
      "step": 3260
    },
    {
      "epoch": 1.98974609375e-05,
      "step": 3260,
      "training_step_time": 0.3879561424255371
    },
    {
      "epoch": 1.9903564453125e-05,
      "model_forward_time": 0.11545944213867188,
      "step": 3261
    },
    {
      "epoch": 1.9903564453125e-05,
      "step": 3261,
      "training_step_time": 0.44149303436279297
    },
    {
      "epoch": 1.990966796875e-05,
      "model_forward_time": 0.1152963638305664,
      "step": 3262
    },
    {
      "epoch": 1.990966796875e-05,
      "step": 3262,
      "training_step_time": 0.47467970848083496
    },
    {
      "epoch": 1.9915771484375e-05,
      "model_forward_time": 0.1154484748840332,
      "step": 3263
    },
    {
      "epoch": 1.9915771484375e-05,
      "step": 3263,
      "training_step_time": 0.49245405197143555
    },
    {
      "epoch": 1.9921875e-05,
      "model_forward_time": 0.11561012268066406,
      "step": 3264
    },
    {
      "epoch": 1.9921875e-05,
      "step": 3264,
      "training_step_time": 0.5074079036712646
    },
    {
      "epoch": 1.9927978515625e-05,
      "model_forward_time": 0.11538386344909668,
      "step": 3265
    },
    {
      "epoch": 1.9927978515625e-05,
      "step": 3265,
      "training_step_time": 0.41054463386535645
    },
    {
      "epoch": 1.993408203125e-05,
      "model_forward_time": 0.11484241485595703,
      "step": 3266
    },
    {
      "epoch": 1.993408203125e-05,
      "step": 3266,
      "training_step_time": 0.3908045291900635
    },
    {
      "epoch": 1.9940185546875e-05,
      "model_forward_time": 0.11522722244262695,
      "step": 3267
    },
    {
      "epoch": 1.9940185546875e-05,
      "step": 3267,
      "training_step_time": 0.38822507858276367
    },
    {
      "epoch": 1.99462890625e-05,
      "model_forward_time": 0.11517620086669922,
      "step": 3268
    },
    {
      "epoch": 1.99462890625e-05,
      "step": 3268,
      "training_step_time": 0.3925292491912842
    },
    {
      "epoch": 1.9952392578125e-05,
      "model_forward_time": 0.11490654945373535,
      "step": 3269
    },
    {
      "epoch": 1.9952392578125e-05,
      "step": 3269,
      "training_step_time": 0.3905823230743408
    },
    {
      "epoch": 1.995849609375e-05,
      "grad_norm": 0.30719318985939026,
      "learning_rate": 9.999446382823013e-05,
      "loss": 0.1256,
      "step": 3270
    },
    {
      "epoch": 1.995849609375e-05,
      "model_forward_time": 0.11519956588745117,
      "step": 3270
    },
    {
      "epoch": 1.995849609375e-05,
      "step": 3270,
      "training_step_time": 0.42725324630737305
    },
    {
      "epoch": 1.9964599609375e-05,
      "model_forward_time": 0.11510014533996582,
      "step": 3271
    },
    {
      "epoch": 1.9964599609375e-05,
      "step": 3271,
      "training_step_time": 0.39406847953796387
    },
    {
      "epoch": 1.9970703125e-05,
      "model_forward_time": 0.11606216430664062,
      "step": 3272
    },
    {
      "epoch": 1.9970703125e-05,
      "step": 3272,
      "training_step_time": 0.4745817184448242
    },
    {
      "epoch": 1.9976806640625e-05,
      "model_forward_time": 0.11571598052978516,
      "step": 3273
    },
    {
      "epoch": 1.9976806640625e-05,
      "step": 3273,
      "training_step_time": 0.5139906406402588
    },
    {
      "epoch": 1.998291015625e-05,
      "model_forward_time": 0.1144416332244873,
      "step": 3274
    },
    {
      "epoch": 1.998291015625e-05,
      "step": 3274,
      "training_step_time": 0.4306473731994629
    },
    {
      "epoch": 1.9989013671875e-05,
      "model_forward_time": 0.1155099868774414,
      "step": 3275
    },
    {
      "epoch": 1.9989013671875e-05,
      "step": 3275,
      "training_step_time": 0.41071510314941406
    },
    {
      "epoch": 1.99951171875e-05,
      "model_forward_time": 0.11432003974914551,
      "step": 3276
    },
    {
      "epoch": 1.99951171875e-05,
      "step": 3276,
      "training_step_time": 0.4131455421447754
    },
    {
      "epoch": 2.0001220703125e-05,
      "model_forward_time": 0.11556720733642578,
      "step": 3277
    },
    {
      "epoch": 2.0001220703125e-05,
      "step": 3277,
      "training_step_time": 0.4217541217803955
    },
    {
      "epoch": 2.000732421875e-05,
      "model_forward_time": 0.11630415916442871,
      "step": 3278
    },
    {
      "epoch": 2.000732421875e-05,
      "step": 3278,
      "training_step_time": 0.48757457733154297
    },
    {
      "epoch": 2.0013427734375e-05,
      "model_forward_time": 0.11602020263671875,
      "step": 3279
    },
    {
      "epoch": 2.0013427734375e-05,
      "step": 3279,
      "training_step_time": 0.4174783229827881
    },
    {
      "epoch": 2.001953125e-05,
      "grad_norm": 0.39607682824134827,
      "learning_rate": 9.999404615552194e-05,
      "loss": 0.1225,
      "step": 3280
    },
    {
      "epoch": 2.001953125e-05,
      "model_forward_time": 0.11493492126464844,
      "step": 3280
    },
    {
      "epoch": 2.001953125e-05,
      "step": 3280,
      "training_step_time": 0.3953080177307129
    },
    {
      "epoch": 2.0025634765625e-05,
      "model_forward_time": 0.1156005859375,
      "step": 3281
    },
    {
      "epoch": 2.0025634765625e-05,
      "step": 3281,
      "training_step_time": 0.39742016792297363
    },
    {
      "epoch": 2.003173828125e-05,
      "model_forward_time": 0.11561369895935059,
      "step": 3282
    },
    {
      "epoch": 2.003173828125e-05,
      "step": 3282,
      "training_step_time": 0.4257376194000244
    },
    {
      "epoch": 2.0037841796875e-05,
      "model_forward_time": 0.1150503158569336,
      "step": 3283
    },
    {
      "epoch": 2.0037841796875e-05,
      "step": 3283,
      "training_step_time": 0.39304399490356445
    },
    {
      "epoch": 2.00439453125e-05,
      "model_forward_time": 0.11533498764038086,
      "step": 3284
    },
    {
      "epoch": 2.00439453125e-05,
      "step": 3284,
      "training_step_time": 0.40543437004089355
    },
    {
      "epoch": 2.0050048828125e-05,
      "model_forward_time": 0.11560606956481934,
      "step": 3285
    },
    {
      "epoch": 2.0050048828125e-05,
      "step": 3285,
      "training_step_time": 0.3934152126312256
    },
    {
      "epoch": 2.005615234375e-05,
      "model_forward_time": 0.11508321762084961,
      "step": 3286
    },
    {
      "epoch": 2.005615234375e-05,
      "step": 3286,
      "training_step_time": 0.4027252197265625
    },
    {
      "epoch": 2.0062255859375e-05,
      "model_forward_time": 0.11573147773742676,
      "step": 3287
    },
    {
      "epoch": 2.0062255859375e-05,
      "step": 3287,
      "training_step_time": 0.4772987365722656
    },
    {
      "epoch": 2.0068359375e-05,
      "model_forward_time": 0.1157674789428711,
      "step": 3288
    },
    {
      "epoch": 2.0068359375e-05,
      "step": 3288,
      "training_step_time": 0.4663107395172119
    },
    {
      "epoch": 2.0074462890625e-05,
      "model_forward_time": 0.11534690856933594,
      "step": 3289
    },
    {
      "epoch": 2.0074462890625e-05,
      "step": 3289,
      "training_step_time": 0.4382462501525879
    },
    {
      "epoch": 2.008056640625e-05,
      "grad_norm": 0.29803258180618286,
      "learning_rate": 9.999361329594254e-05,
      "loss": 0.1275,
      "step": 3290
    },
    {
      "epoch": 2.008056640625e-05,
      "model_forward_time": 0.11518001556396484,
      "step": 3290
    },
    {
      "epoch": 2.008056640625e-05,
      "step": 3290,
      "training_step_time": 0.3901405334472656
    },
    {
      "epoch": 2.0086669921875e-05,
      "model_forward_time": 0.11438679695129395,
      "step": 3291
    },
    {
      "epoch": 2.0086669921875e-05,
      "step": 3291,
      "training_step_time": 0.3690755367279053
    },
    {
      "epoch": 2.00927734375e-05,
      "model_forward_time": 0.11477494239807129,
      "step": 3292
    },
    {
      "epoch": 2.00927734375e-05,
      "step": 3292,
      "training_step_time": 0.48868727684020996
    },
    {
      "epoch": 2.0098876953125e-05,
      "model_forward_time": 0.11487960815429688,
      "step": 3293
    },
    {
      "epoch": 2.0098876953125e-05,
      "step": 3293,
      "training_step_time": 0.48481011390686035
    },
    {
      "epoch": 2.010498046875e-05,
      "model_forward_time": 0.11536312103271484,
      "step": 3294
    },
    {
      "epoch": 2.010498046875e-05,
      "step": 3294,
      "training_step_time": 0.3890376091003418
    },
    {
      "epoch": 2.0111083984375e-05,
      "model_forward_time": 0.11478543281555176,
      "step": 3295
    },
    {
      "epoch": 2.0111083984375e-05,
      "step": 3295,
      "training_step_time": 0.3892686367034912
    },
    {
      "epoch": 2.01171875e-05,
      "model_forward_time": 0.11492061614990234,
      "step": 3296
    },
    {
      "epoch": 2.01171875e-05,
      "step": 3296,
      "training_step_time": 0.39637184143066406
    },
    {
      "epoch": 2.0123291015625e-05,
      "model_forward_time": 0.11496639251708984,
      "step": 3297
    },
    {
      "epoch": 2.0123291015625e-05,
      "step": 3297,
      "training_step_time": 0.38997960090637207
    },
    {
      "epoch": 2.012939453125e-05,
      "model_forward_time": 0.11591815948486328,
      "step": 3298
    },
    {
      "epoch": 2.012939453125e-05,
      "step": 3298,
      "training_step_time": 0.3947470188140869
    },
    {
      "epoch": 2.0135498046875e-05,
      "model_forward_time": 0.11556410789489746,
      "step": 3299
    },
    {
      "epoch": 2.0135498046875e-05,
      "step": 3299,
      "training_step_time": 0.37152099609375
    },
    {
      "epoch": 2.01416015625e-05,
      "grad_norm": 0.28598812222480774,
      "learning_rate": 9.999316524962345e-05,
      "loss": 0.1214,
      "step": 3300
    },
    {
      "epoch": 2.01416015625e-05,
      "model_forward_time": 0.11460256576538086,
      "step": 3300
    },
    {
      "epoch": 2.01416015625e-05,
      "step": 3300,
      "training_step_time": 0.5112662315368652
    },
    {
      "epoch": 2.0147705078125e-05,
      "model_forward_time": 0.11538481712341309,
      "step": 3301
    },
    {
      "epoch": 2.0147705078125e-05,
      "step": 3301,
      "training_step_time": 0.45815515518188477
    },
    {
      "epoch": 2.015380859375e-05,
      "model_forward_time": 0.11574602127075195,
      "step": 3302
    },
    {
      "epoch": 2.015380859375e-05,
      "step": 3302,
      "training_step_time": 0.4383542537689209
    },
    {
      "epoch": 2.0159912109375e-05,
      "model_forward_time": 0.1153411865234375,
      "step": 3303
    },
    {
      "epoch": 2.0159912109375e-05,
      "step": 3303,
      "training_step_time": 0.392702579498291
    },
    {
      "epoch": 2.0166015625e-05,
      "model_forward_time": 0.11525416374206543,
      "step": 3304
    },
    {
      "epoch": 2.0166015625e-05,
      "step": 3304,
      "training_step_time": 0.3866848945617676
    },
    {
      "epoch": 2.0172119140625e-05,
      "model_forward_time": 0.11672806739807129,
      "step": 3305
    },
    {
      "epoch": 2.0172119140625e-05,
      "step": 3305,
      "training_step_time": 0.3880152702331543
    },
    {
      "epoch": 2.017822265625e-05,
      "model_forward_time": 0.11733388900756836,
      "step": 3306
    },
    {
      "epoch": 2.017822265625e-05,
      "step": 3306,
      "training_step_time": 0.40921807289123535
    },
    {
      "epoch": 2.0184326171875e-05,
      "model_forward_time": 0.11571621894836426,
      "step": 3307
    },
    {
      "epoch": 2.0184326171875e-05,
      "step": 3307,
      "training_step_time": 0.4141721725463867
    },
    {
      "epoch": 2.01904296875e-05,
      "model_forward_time": 0.11670756340026855,
      "step": 3308
    },
    {
      "epoch": 2.01904296875e-05,
      "step": 3308,
      "training_step_time": 0.4234793186187744
    },
    {
      "epoch": 2.0196533203125e-05,
      "model_forward_time": 0.11577463150024414,
      "step": 3309
    },
    {
      "epoch": 2.0196533203125e-05,
      "step": 3309,
      "training_step_time": 0.39369964599609375
    },
    {
      "epoch": 2.020263671875e-05,
      "grad_norm": 0.3873397707939148,
      "learning_rate": 9.999270201670074e-05,
      "loss": 0.1198,
      "step": 3310
    },
    {
      "epoch": 2.020263671875e-05,
      "model_forward_time": 0.11545610427856445,
      "step": 3310
    },
    {
      "epoch": 2.020263671875e-05,
      "step": 3310,
      "training_step_time": 0.4028041362762451
    },
    {
      "epoch": 2.0208740234375e-05,
      "model_forward_time": 0.11547708511352539,
      "step": 3311
    },
    {
      "epoch": 2.0208740234375e-05,
      "step": 3311,
      "training_step_time": 0.40025925636291504
    },
    {
      "epoch": 2.021484375e-05,
      "model_forward_time": 0.11533975601196289,
      "step": 3312
    },
    {
      "epoch": 2.021484375e-05,
      "step": 3312,
      "training_step_time": 0.6712379455566406
    },
    {
      "epoch": 2.0220947265625e-05,
      "model_forward_time": 0.11494159698486328,
      "step": 3313
    },
    {
      "epoch": 2.0220947265625e-05,
      "step": 3313,
      "training_step_time": 0.3984506130218506
    },
    {
      "epoch": 2.022705078125e-05,
      "model_forward_time": 0.11561083793640137,
      "step": 3314
    },
    {
      "epoch": 2.022705078125e-05,
      "step": 3314,
      "training_step_time": 0.48082876205444336
    },
    {
      "epoch": 2.0233154296875e-05,
      "model_forward_time": 0.11508798599243164,
      "step": 3315
    },
    {
      "epoch": 2.0233154296875e-05,
      "step": 3315,
      "training_step_time": 0.432422399520874
    },
    {
      "epoch": 2.02392578125e-05,
      "model_forward_time": 0.11482071876525879,
      "step": 3316
    },
    {
      "epoch": 2.02392578125e-05,
      "step": 3316,
      "training_step_time": 0.41779136657714844
    },
    {
      "epoch": 2.0245361328125e-05,
      "model_forward_time": 0.1143336296081543,
      "step": 3317
    },
    {
      "epoch": 2.0245361328125e-05,
      "step": 3317,
      "training_step_time": 0.38185596466064453
    },
    {
      "epoch": 2.025146484375e-05,
      "model_forward_time": 0.11532783508300781,
      "step": 3318
    },
    {
      "epoch": 2.025146484375e-05,
      "step": 3318,
      "training_step_time": 0.47499918937683105
    },
    {
      "epoch": 2.0257568359375e-05,
      "model_forward_time": 0.11490797996520996,
      "step": 3319
    },
    {
      "epoch": 2.0257568359375e-05,
      "step": 3319,
      "training_step_time": 0.39253664016723633
    },
    {
      "epoch": 2.0263671875e-05,
      "grad_norm": 0.4183133840560913,
      "learning_rate": 9.999222359731514e-05,
      "loss": 0.1217,
      "step": 3320
    },
    {
      "epoch": 2.0263671875e-05,
      "model_forward_time": 0.11520552635192871,
      "step": 3320
    },
    {
      "epoch": 2.0263671875e-05,
      "step": 3320,
      "training_step_time": 0.37216758728027344
    },
    {
      "epoch": 2.0269775390625e-05,
      "model_forward_time": 0.11511874198913574,
      "step": 3321
    },
    {
      "epoch": 2.0269775390625e-05,
      "step": 3321,
      "training_step_time": 0.40808963775634766
    },
    {
      "epoch": 2.027587890625e-05,
      "model_forward_time": 0.1156914234161377,
      "step": 3322
    },
    {
      "epoch": 2.027587890625e-05,
      "step": 3322,
      "training_step_time": 0.47231602668762207
    },
    {
      "epoch": 2.0281982421875e-05,
      "model_forward_time": 0.11533474922180176,
      "step": 3323
    },
    {
      "epoch": 2.0281982421875e-05,
      "step": 3323,
      "training_step_time": 0.4003901481628418
    },
    {
      "epoch": 2.02880859375e-05,
      "model_forward_time": 0.11519742012023926,
      "step": 3324
    },
    {
      "epoch": 2.02880859375e-05,
      "step": 3324,
      "training_step_time": 0.5298898220062256
    },
    {
      "epoch": 2.0294189453125e-05,
      "model_forward_time": 0.1149439811706543,
      "step": 3325
    },
    {
      "epoch": 2.0294189453125e-05,
      "step": 3325,
      "training_step_time": 0.397747278213501
    },
    {
      "epoch": 2.030029296875e-05,
      "model_forward_time": 0.11514830589294434,
      "step": 3326
    },
    {
      "epoch": 2.030029296875e-05,
      "step": 3326,
      "training_step_time": 0.39208364486694336
    },
    {
      "epoch": 2.0306396484375e-05,
      "model_forward_time": 0.11495137214660645,
      "step": 3327
    },
    {
      "epoch": 2.0306396484375e-05,
      "step": 3327,
      "training_step_time": 0.39923715591430664
    },
    {
      "epoch": 2.03125e-05,
      "model_forward_time": 0.11473894119262695,
      "step": 3328
    },
    {
      "epoch": 2.03125e-05,
      "step": 3328,
      "training_step_time": 0.41765832901000977
    },
    {
      "epoch": 2.0318603515625e-05,
      "model_forward_time": 0.11502671241760254,
      "step": 3329
    },
    {
      "epoch": 2.0318603515625e-05,
      "step": 3329,
      "training_step_time": 0.4167461395263672
    },
    {
      "epoch": 2.032470703125e-05,
      "grad_norm": 0.5686017274856567,
      "learning_rate": 9.999172999161198e-05,
      "loss": 0.1228,
      "step": 3330
    },
    {
      "epoch": 2.032470703125e-05,
      "model_forward_time": 0.11489152908325195,
      "step": 3330
    },
    {
      "epoch": 2.032470703125e-05,
      "step": 3330,
      "training_step_time": 0.6187644004821777
    },
    {
      "epoch": 2.0330810546875e-05,
      "model_forward_time": 0.11472654342651367,
      "step": 3331
    },
    {
      "epoch": 2.0330810546875e-05,
      "step": 3331,
      "training_step_time": 0.4237973690032959
    },
    {
      "epoch": 2.03369140625e-05,
      "model_forward_time": 0.11521625518798828,
      "step": 3332
    },
    {
      "epoch": 2.03369140625e-05,
      "step": 3332,
      "training_step_time": 0.3925511837005615
    },
    {
      "epoch": 2.0343017578125e-05,
      "model_forward_time": 0.11492419242858887,
      "step": 3333
    },
    {
      "epoch": 2.0343017578125e-05,
      "step": 3333,
      "training_step_time": 0.3974635601043701
    },
    {
      "epoch": 2.034912109375e-05,
      "model_forward_time": 0.11632227897644043,
      "step": 3334
    },
    {
      "epoch": 2.034912109375e-05,
      "step": 3334,
      "training_step_time": 0.381986141204834
    },
    {
      "epoch": 2.0355224609375e-05,
      "model_forward_time": 0.11460161209106445,
      "step": 3335
    },
    {
      "epoch": 2.0355224609375e-05,
      "step": 3335,
      "training_step_time": 0.47863030433654785
    },
    {
      "epoch": 2.0361328125e-05,
      "model_forward_time": 0.115264892578125,
      "step": 3336
    },
    {
      "epoch": 2.0361328125e-05,
      "step": 3336,
      "training_step_time": 0.5282816886901855
    },
    {
      "epoch": 2.0367431640625e-05,
      "model_forward_time": 0.11519670486450195,
      "step": 3337
    },
    {
      "epoch": 2.0367431640625e-05,
      "step": 3337,
      "training_step_time": 0.406749963760376
    },
    {
      "epoch": 2.037353515625e-05,
      "model_forward_time": 0.11537003517150879,
      "step": 3338
    },
    {
      "epoch": 2.037353515625e-05,
      "step": 3338,
      "training_step_time": 0.39623117446899414
    },
    {
      "epoch": 2.0379638671875e-05,
      "model_forward_time": 0.1154942512512207,
      "step": 3339
    },
    {
      "epoch": 2.0379638671875e-05,
      "step": 3339,
      "training_step_time": 0.3872222900390625
    },
    {
      "epoch": 2.03857421875e-05,
      "grad_norm": 0.312966525554657,
      "learning_rate": 9.999122119974121e-05,
      "loss": 0.1261,
      "step": 3340
    },
    {
      "epoch": 2.03857421875e-05,
      "model_forward_time": 0.11505937576293945,
      "step": 3340
    },
    {
      "epoch": 2.03857421875e-05,
      "step": 3340,
      "training_step_time": 0.3856785297393799
    },
    {
      "epoch": 2.0391845703125e-05,
      "model_forward_time": 0.11563229560852051,
      "step": 3341
    },
    {
      "epoch": 2.0391845703125e-05,
      "step": 3341,
      "training_step_time": 0.43134546279907227
    },
    {
      "epoch": 2.039794921875e-05,
      "model_forward_time": 0.11532711982727051,
      "step": 3342
    },
    {
      "epoch": 2.039794921875e-05,
      "step": 3342,
      "training_step_time": 0.5883092880249023
    },
    {
      "epoch": 2.0404052734375e-05,
      "model_forward_time": 0.11476707458496094,
      "step": 3343
    },
    {
      "epoch": 2.0404052734375e-05,
      "step": 3343,
      "training_step_time": 0.45909738540649414
    },
    {
      "epoch": 2.041015625e-05,
      "model_forward_time": 0.11463642120361328,
      "step": 3344
    },
    {
      "epoch": 2.041015625e-05,
      "step": 3344,
      "training_step_time": 0.4790513515472412
    },
    {
      "epoch": 2.0416259765625e-05,
      "model_forward_time": 0.11438202857971191,
      "step": 3345
    },
    {
      "epoch": 2.0416259765625e-05,
      "step": 3345,
      "training_step_time": 0.504317045211792
    },
    {
      "epoch": 2.042236328125e-05,
      "model_forward_time": 0.11441898345947266,
      "step": 3346
    },
    {
      "epoch": 2.042236328125e-05,
      "step": 3346,
      "training_step_time": 0.3832995891571045
    },
    {
      "epoch": 2.0428466796875e-05,
      "model_forward_time": 0.11507844924926758,
      "step": 3347
    },
    {
      "epoch": 2.0428466796875e-05,
      "step": 3347,
      "training_step_time": 0.38928651809692383
    },
    {
      "epoch": 2.04345703125e-05,
      "model_forward_time": 0.11448097229003906,
      "step": 3348
    },
    {
      "epoch": 2.04345703125e-05,
      "step": 3348,
      "training_step_time": 0.3702993392944336
    },
    {
      "epoch": 2.0440673828125e-05,
      "model_forward_time": 0.1158897876739502,
      "step": 3349
    },
    {
      "epoch": 2.0440673828125e-05,
      "step": 3349,
      "training_step_time": 0.4894373416900635
    },
    {
      "epoch": 2.044677734375e-05,
      "grad_norm": 0.4141256809234619,
      "learning_rate": 9.999069722185737e-05,
      "loss": 0.1252,
      "step": 3350
    },
    {
      "epoch": 2.044677734375e-05,
      "model_forward_time": 0.1146993637084961,
      "step": 3350
    },
    {
      "epoch": 2.044677734375e-05,
      "step": 3350,
      "training_step_time": 0.4890296459197998
    },
    {
      "epoch": 2.0452880859375e-05,
      "model_forward_time": 0.1147623062133789,
      "step": 3351
    },
    {
      "epoch": 2.0452880859375e-05,
      "step": 3351,
      "training_step_time": 0.43548107147216797
    },
    {
      "epoch": 2.0458984375e-05,
      "model_forward_time": 0.11455345153808594,
      "step": 3352
    },
    {
      "epoch": 2.0458984375e-05,
      "step": 3352,
      "training_step_time": 0.38991856575012207
    },
    {
      "epoch": 2.0465087890625e-05,
      "model_forward_time": 0.11494302749633789,
      "step": 3353
    },
    {
      "epoch": 2.0465087890625e-05,
      "step": 3353,
      "training_step_time": 0.39100027084350586
    },
    {
      "epoch": 2.047119140625e-05,
      "model_forward_time": 0.11555242538452148,
      "step": 3354
    },
    {
      "epoch": 2.047119140625e-05,
      "step": 3354,
      "training_step_time": 0.42413878440856934
    },
    {
      "epoch": 2.0477294921875e-05,
      "model_forward_time": 0.11513233184814453,
      "step": 3355
    },
    {
      "epoch": 2.0477294921875e-05,
      "step": 3355,
      "training_step_time": 0.4272465705871582
    },
    {
      "epoch": 2.04833984375e-05,
      "model_forward_time": 0.11495161056518555,
      "step": 3356
    },
    {
      "epoch": 2.04833984375e-05,
      "step": 3356,
      "training_step_time": 0.3947031497955322
    },
    {
      "epoch": 2.0489501953125e-05,
      "model_forward_time": 0.11532926559448242,
      "step": 3357
    },
    {
      "epoch": 2.0489501953125e-05,
      "step": 3357,
      "training_step_time": 0.3964877128601074
    },
    {
      "epoch": 2.049560546875e-05,
      "model_forward_time": 0.11549186706542969,
      "step": 3358
    },
    {
      "epoch": 2.049560546875e-05,
      "step": 3358,
      "training_step_time": 0.3902628421783447
    },
    {
      "epoch": 2.0501708984375e-05,
      "model_forward_time": 0.11504244804382324,
      "step": 3359
    },
    {
      "epoch": 2.0501708984375e-05,
      "step": 3359,
      "training_step_time": 0.456371545791626
    },
    {
      "epoch": 2.05078125e-05,
      "grad_norm": 0.3395267724990845,
      "learning_rate": 9.999015805811965e-05,
      "loss": 0.1258,
      "step": 3360
    },
    {
      "epoch": 2.05078125e-05,
      "model_forward_time": 0.1152341365814209,
      "step": 3360
    },
    {
      "epoch": 2.05078125e-05,
      "step": 3360,
      "training_step_time": 0.43126416206359863
    },
    {
      "epoch": 2.0513916015625e-05,
      "model_forward_time": 0.11532783508300781,
      "step": 3361
    },
    {
      "epoch": 2.0513916015625e-05,
      "step": 3361,
      "training_step_time": 0.41173601150512695
    },
    {
      "epoch": 2.052001953125e-05,
      "model_forward_time": 0.11527180671691895,
      "step": 3362
    },
    {
      "epoch": 2.052001953125e-05,
      "step": 3362,
      "training_step_time": 0.4027369022369385
    },
    {
      "epoch": 2.0526123046875e-05,
      "model_forward_time": 0.11516380310058594,
      "step": 3363
    },
    {
      "epoch": 2.0526123046875e-05,
      "step": 3363,
      "training_step_time": 0.4238142967224121
    },
    {
      "epoch": 2.05322265625e-05,
      "model_forward_time": 0.11451220512390137,
      "step": 3364
    },
    {
      "epoch": 2.05322265625e-05,
      "step": 3364,
      "training_step_time": 0.44289493560791016
    },
    {
      "epoch": 2.0538330078125e-05,
      "model_forward_time": 0.1152801513671875,
      "step": 3365
    },
    {
      "epoch": 2.0538330078125e-05,
      "step": 3365,
      "training_step_time": 0.510202169418335
    },
    {
      "epoch": 2.054443359375e-05,
      "model_forward_time": 0.11528134346008301,
      "step": 3366
    },
    {
      "epoch": 2.054443359375e-05,
      "step": 3366,
      "training_step_time": 0.46028757095336914
    },
    {
      "epoch": 2.0550537109375e-05,
      "model_forward_time": 0.11545610427856445,
      "step": 3367
    },
    {
      "epoch": 2.0550537109375e-05,
      "step": 3367,
      "training_step_time": 0.3939523696899414
    },
    {
      "epoch": 2.0556640625e-05,
      "model_forward_time": 0.1148841381072998,
      "step": 3368
    },
    {
      "epoch": 2.0556640625e-05,
      "step": 3368,
      "training_step_time": 0.42342543601989746
    },
    {
      "epoch": 2.0562744140625e-05,
      "model_forward_time": 0.11554932594299316,
      "step": 3369
    },
    {
      "epoch": 2.0562744140625e-05,
      "step": 3369,
      "training_step_time": 0.4159712791442871
    },
    {
      "epoch": 2.056884765625e-05,
      "grad_norm": 0.28095707297325134,
      "learning_rate": 9.998960370869181e-05,
      "loss": 0.1326,
      "step": 3370
    },
    {
      "epoch": 2.056884765625e-05,
      "model_forward_time": 0.1152794361114502,
      "step": 3370
    },
    {
      "epoch": 2.056884765625e-05,
      "step": 3370,
      "training_step_time": 0.39977145195007324
    },
    {
      "epoch": 2.0574951171875e-05,
      "model_forward_time": 0.11539697647094727,
      "step": 3371
    },
    {
      "epoch": 2.0574951171875e-05,
      "step": 3371,
      "training_step_time": 0.40051841735839844
    },
    {
      "epoch": 2.05810546875e-05,
      "model_forward_time": 0.11536097526550293,
      "step": 3372
    },
    {
      "epoch": 2.05810546875e-05,
      "step": 3372,
      "training_step_time": 0.5687212944030762
    },
    {
      "epoch": 2.0587158203125e-05,
      "model_forward_time": 0.1144406795501709,
      "step": 3373
    },
    {
      "epoch": 2.0587158203125e-05,
      "step": 3373,
      "training_step_time": 0.425661563873291
    },
    {
      "epoch": 2.059326171875e-05,
      "model_forward_time": 0.11565685272216797,
      "step": 3374
    },
    {
      "epoch": 2.059326171875e-05,
      "step": 3374,
      "training_step_time": 0.5026743412017822
    },
    {
      "epoch": 2.0599365234375e-05,
      "model_forward_time": 0.1143960952758789,
      "step": 3375
    },
    {
      "epoch": 2.0599365234375e-05,
      "step": 3375,
      "training_step_time": 0.3864707946777344
    },
    {
      "epoch": 2.060546875e-05,
      "model_forward_time": 0.11511802673339844,
      "step": 3376
    },
    {
      "epoch": 2.060546875e-05,
      "step": 3376,
      "training_step_time": 0.3844778537750244
    },
    {
      "epoch": 2.0611572265625e-05,
      "model_forward_time": 0.11456823348999023,
      "step": 3377
    },
    {
      "epoch": 2.0611572265625e-05,
      "step": 3377,
      "training_step_time": 0.3655414581298828
    },
    {
      "epoch": 2.061767578125e-05,
      "model_forward_time": 0.11528325080871582,
      "step": 3378
    },
    {
      "epoch": 2.061767578125e-05,
      "step": 3378,
      "training_step_time": 0.4923431873321533
    },
    {
      "epoch": 2.0623779296875e-05,
      "model_forward_time": 0.11517620086669922,
      "step": 3379
    },
    {
      "epoch": 2.0623779296875e-05,
      "step": 3379,
      "training_step_time": 0.4965076446533203
    },
    {
      "epoch": 2.06298828125e-05,
      "grad_norm": 0.46810394525527954,
      "learning_rate": 9.998903417374228e-05,
      "loss": 0.1314,
      "step": 3380
    },
    {
      "epoch": 2.06298828125e-05,
      "model_forward_time": 0.1146690845489502,
      "step": 3380
    },
    {
      "epoch": 2.06298828125e-05,
      "step": 3380,
      "training_step_time": 0.3939228057861328
    },
    {
      "epoch": 2.0635986328125e-05,
      "model_forward_time": 0.11514592170715332,
      "step": 3381
    },
    {
      "epoch": 2.0635986328125e-05,
      "step": 3381,
      "training_step_time": 0.420518159866333
    },
    {
      "epoch": 2.064208984375e-05,
      "model_forward_time": 0.11455368995666504,
      "step": 3382
    },
    {
      "epoch": 2.064208984375e-05,
      "step": 3382,
      "training_step_time": 0.3988921642303467
    },
    {
      "epoch": 2.0648193359375e-05,
      "model_forward_time": 0.11612057685852051,
      "step": 3383
    },
    {
      "epoch": 2.0648193359375e-05,
      "step": 3383,
      "training_step_time": 0.39430785179138184
    },
    {
      "epoch": 2.0654296875e-05,
      "model_forward_time": 0.11541128158569336,
      "step": 3384
    },
    {
      "epoch": 2.0654296875e-05,
      "step": 3384,
      "training_step_time": 0.4311232566833496
    },
    {
      "epoch": 2.0660400390625e-05,
      "model_forward_time": 0.11472725868225098,
      "step": 3385
    },
    {
      "epoch": 2.0660400390625e-05,
      "step": 3385,
      "training_step_time": 0.3964247703552246
    },
    {
      "epoch": 2.066650390625e-05,
      "model_forward_time": 0.11501073837280273,
      "step": 3386
    },
    {
      "epoch": 2.066650390625e-05,
      "step": 3386,
      "training_step_time": 0.3971126079559326
    },
    {
      "epoch": 2.0672607421875e-05,
      "model_forward_time": 0.11518979072570801,
      "step": 3387
    },
    {
      "epoch": 2.0672607421875e-05,
      "step": 3387,
      "training_step_time": 0.4252762794494629
    },
    {
      "epoch": 2.06787109375e-05,
      "model_forward_time": 0.11517858505249023,
      "step": 3388
    },
    {
      "epoch": 2.06787109375e-05,
      "step": 3388,
      "training_step_time": 0.40169596672058105
    },
    {
      "epoch": 2.0684814453125e-05,
      "model_forward_time": 0.11591887474060059,
      "step": 3389
    },
    {
      "epoch": 2.0684814453125e-05,
      "step": 3389,
      "training_step_time": 0.43841552734375
    },
    {
      "epoch": 2.069091796875e-05,
      "grad_norm": 0.28879645466804504,
      "learning_rate": 9.998844945344405e-05,
      "loss": 0.1393,
      "step": 3390
    },
    {
      "epoch": 2.069091796875e-05,
      "model_forward_time": 0.11565876007080078,
      "step": 3390
    },
    {
      "epoch": 2.069091796875e-05,
      "step": 3390,
      "training_step_time": 0.6467339992523193
    },
    {
      "epoch": 2.0697021484375e-05,
      "model_forward_time": 0.11531782150268555,
      "step": 3391
    },
    {
      "epoch": 2.0697021484375e-05,
      "step": 3391,
      "training_step_time": 0.37392640113830566
    },
    {
      "epoch": 2.0703125e-05,
      "model_forward_time": 0.1146845817565918,
      "step": 3392
    },
    {
      "epoch": 2.0703125e-05,
      "step": 3392,
      "training_step_time": 0.4361743927001953
    },
    {
      "epoch": 2.0709228515625e-05,
      "model_forward_time": 0.1144106388092041,
      "step": 3393
    },
    {
      "epoch": 2.0709228515625e-05,
      "step": 3393,
      "training_step_time": 0.46972155570983887
    },
    {
      "epoch": 2.071533203125e-05,
      "model_forward_time": 0.11470222473144531,
      "step": 3394
    },
    {
      "epoch": 2.071533203125e-05,
      "step": 3394,
      "training_step_time": 0.4101881980895996
    },
    {
      "epoch": 2.0721435546875e-05,
      "model_forward_time": 0.11500048637390137,
      "step": 3395
    },
    {
      "epoch": 2.0721435546875e-05,
      "step": 3395,
      "training_step_time": 0.3942680358886719
    },
    {
      "epoch": 2.07275390625e-05,
      "model_forward_time": 0.11514425277709961,
      "step": 3396
    },
    {
      "epoch": 2.07275390625e-05,
      "step": 3396,
      "training_step_time": 0.4503166675567627
    },
    {
      "epoch": 2.0733642578125e-05,
      "model_forward_time": 0.11487007141113281,
      "step": 3397
    },
    {
      "epoch": 2.0733642578125e-05,
      "step": 3397,
      "training_step_time": 0.40050840377807617
    },
    {
      "epoch": 2.073974609375e-05,
      "model_forward_time": 0.11561727523803711,
      "step": 3398
    },
    {
      "epoch": 2.073974609375e-05,
      "step": 3398,
      "training_step_time": 0.3990492820739746
    },
    {
      "epoch": 2.0745849609375e-05,
      "model_forward_time": 0.1147615909576416,
      "step": 3399
    },
    {
      "epoch": 2.0745849609375e-05,
      "step": 3399,
      "training_step_time": 0.3971574306488037
    },
    {
      "epoch": 2.0751953125e-05,
      "grad_norm": 0.36559268832206726,
      "learning_rate": 9.998784954797474e-05,
      "loss": 0.1427,
      "step": 3400
    },
    {
      "epoch": 2.0751953125e-05,
      "model_forward_time": 0.1152651309967041,
      "step": 3400
    },
    {
      "epoch": 2.0751953125e-05,
      "step": 3400,
      "training_step_time": 0.4041597843170166
    },
    {
      "epoch": 2.0758056640625e-05,
      "model_forward_time": 0.11516189575195312,
      "step": 3401
    },
    {
      "epoch": 2.0758056640625e-05,
      "step": 3401,
      "training_step_time": 0.4147653579711914
    },
    {
      "epoch": 2.076416015625e-05,
      "model_forward_time": 0.11463785171508789,
      "step": 3402
    },
    {
      "epoch": 2.076416015625e-05,
      "step": 3402,
      "training_step_time": 0.6080532073974609
    },
    {
      "epoch": 2.0770263671875e-05,
      "model_forward_time": 0.11469268798828125,
      "step": 3403
    },
    {
      "epoch": 2.0770263671875e-05,
      "step": 3403,
      "training_step_time": 0.41139793395996094
    },
    {
      "epoch": 2.07763671875e-05,
      "model_forward_time": 0.11463761329650879,
      "step": 3404
    },
    {
      "epoch": 2.07763671875e-05,
      "step": 3404,
      "training_step_time": 0.39423179626464844
    },
    {
      "epoch": 2.0782470703125e-05,
      "model_forward_time": 0.11531829833984375,
      "step": 3405
    },
    {
      "epoch": 2.0782470703125e-05,
      "step": 3405,
      "training_step_time": 0.40027427673339844
    },
    {
      "epoch": 2.078857421875e-05,
      "model_forward_time": 0.1145181655883789,
      "step": 3406
    },
    {
      "epoch": 2.078857421875e-05,
      "step": 3406,
      "training_step_time": 0.4823453426361084
    },
    {
      "epoch": 2.0794677734375e-05,
      "model_forward_time": 0.11522078514099121,
      "step": 3407
    },
    {
      "epoch": 2.0794677734375e-05,
      "step": 3407,
      "training_step_time": 0.4517030715942383
    },
    {
      "epoch": 2.080078125e-05,
      "model_forward_time": 0.11549067497253418,
      "step": 3408
    },
    {
      "epoch": 2.080078125e-05,
      "step": 3408,
      "training_step_time": 0.42606329917907715
    },
    {
      "epoch": 2.0806884765625e-05,
      "model_forward_time": 0.11457967758178711,
      "step": 3409
    },
    {
      "epoch": 2.0806884765625e-05,
      "step": 3409,
      "training_step_time": 0.42192864418029785
    },
    {
      "epoch": 2.081298828125e-05,
      "grad_norm": 0.42327508330345154,
      "learning_rate": 9.998723445751658e-05,
      "loss": 0.1266,
      "step": 3410
    },
    {
      "epoch": 2.081298828125e-05,
      "model_forward_time": 0.11534881591796875,
      "step": 3410
    },
    {
      "epoch": 2.081298828125e-05,
      "step": 3410,
      "training_step_time": 0.3951270580291748
    },
    {
      "epoch": 2.0819091796875e-05,
      "model_forward_time": 0.11539316177368164,
      "step": 3411
    },
    {
      "epoch": 2.0819091796875e-05,
      "step": 3411,
      "training_step_time": 0.4062483310699463
    },
    {
      "epoch": 2.08251953125e-05,
      "model_forward_time": 0.11452937126159668,
      "step": 3412
    },
    {
      "epoch": 2.08251953125e-05,
      "step": 3412,
      "training_step_time": 0.39443373680114746
    },
    {
      "epoch": 2.0831298828125e-05,
      "model_forward_time": 0.11556124687194824,
      "step": 3413
    },
    {
      "epoch": 2.0831298828125e-05,
      "step": 3413,
      "training_step_time": 0.40156054496765137
    },
    {
      "epoch": 2.083740234375e-05,
      "model_forward_time": 0.11573123931884766,
      "step": 3414
    },
    {
      "epoch": 2.083740234375e-05,
      "step": 3414,
      "training_step_time": 0.557314395904541
    },
    {
      "epoch": 2.0843505859375e-05,
      "model_forward_time": 0.1158294677734375,
      "step": 3415
    },
    {
      "epoch": 2.0843505859375e-05,
      "step": 3415,
      "training_step_time": 0.48641514778137207
    },
    {
      "epoch": 2.0849609375e-05,
      "model_forward_time": 0.11429023742675781,
      "step": 3416
    },
    {
      "epoch": 2.0849609375e-05,
      "step": 3416,
      "training_step_time": 0.48925352096557617
    },
    {
      "epoch": 2.0855712890625e-05,
      "model_forward_time": 0.11515116691589355,
      "step": 3417
    },
    {
      "epoch": 2.0855712890625e-05,
      "step": 3417,
      "training_step_time": 0.5074594020843506
    },
    {
      "epoch": 2.086181640625e-05,
      "model_forward_time": 0.11478948593139648,
      "step": 3418
    },
    {
      "epoch": 2.086181640625e-05,
      "step": 3418,
      "training_step_time": 0.38005924224853516
    },
    {
      "epoch": 2.0867919921875e-05,
      "model_forward_time": 0.11464643478393555,
      "step": 3419
    },
    {
      "epoch": 2.0867919921875e-05,
      "step": 3419,
      "training_step_time": 0.3840353488922119
    },
    {
      "epoch": 2.08740234375e-05,
      "grad_norm": 0.33703115582466125,
      "learning_rate": 9.998660418225645e-05,
      "loss": 0.1333,
      "step": 3420
    },
    {
      "epoch": 2.08740234375e-05,
      "model_forward_time": 0.11543679237365723,
      "step": 3420
    },
    {
      "epoch": 2.08740234375e-05,
      "step": 3420,
      "training_step_time": 0.5134634971618652
    },
    {
      "epoch": 2.0880126953125e-05,
      "model_forward_time": 0.1154942512512207,
      "step": 3421
    },
    {
      "epoch": 2.0880126953125e-05,
      "step": 3421,
      "training_step_time": 0.4811553955078125
    },
    {
      "epoch": 2.088623046875e-05,
      "model_forward_time": 0.11473941802978516,
      "step": 3422
    },
    {
      "epoch": 2.088623046875e-05,
      "step": 3422,
      "training_step_time": 0.47916197776794434
    },
    {
      "epoch": 2.0892333984375e-05,
      "model_forward_time": 0.1150519847869873,
      "step": 3423
    },
    {
      "epoch": 2.0892333984375e-05,
      "step": 3423,
      "training_step_time": 0.39133119583129883
    },
    {
      "epoch": 2.08984375e-05,
      "model_forward_time": 0.11483550071716309,
      "step": 3424
    },
    {
      "epoch": 2.08984375e-05,
      "step": 3424,
      "training_step_time": 0.39200615882873535
    },
    {
      "epoch": 2.0904541015625e-05,
      "model_forward_time": 0.11574387550354004,
      "step": 3425
    },
    {
      "epoch": 2.0904541015625e-05,
      "step": 3425,
      "training_step_time": 0.3974618911743164
    },
    {
      "epoch": 2.091064453125e-05,
      "model_forward_time": 0.11476969718933105,
      "step": 3426
    },
    {
      "epoch": 2.091064453125e-05,
      "step": 3426,
      "training_step_time": 0.3974273204803467
    },
    {
      "epoch": 2.0916748046875e-05,
      "model_forward_time": 0.11595678329467773,
      "step": 3427
    },
    {
      "epoch": 2.0916748046875e-05,
      "step": 3427,
      "training_step_time": 0.39791226387023926
    },
    {
      "epoch": 2.09228515625e-05,
      "model_forward_time": 0.11589360237121582,
      "step": 3428
    },
    {
      "epoch": 2.09228515625e-05,
      "step": 3428,
      "training_step_time": 0.40282511711120605
    },
    {
      "epoch": 2.0928955078125e-05,
      "model_forward_time": 0.11518239974975586,
      "step": 3429
    },
    {
      "epoch": 2.0928955078125e-05,
      "step": 3429,
      "training_step_time": 0.39779162406921387
    },
    {
      "epoch": 2.093505859375e-05,
      "grad_norm": 0.40411001443862915,
      "learning_rate": 9.998595872238577e-05,
      "loss": 0.1203,
      "step": 3430
    },
    {
      "epoch": 2.093505859375e-05,
      "model_forward_time": 0.1151270866394043,
      "step": 3430
    },
    {
      "epoch": 2.093505859375e-05,
      "step": 3430,
      "training_step_time": 0.4318661689758301
    },
    {
      "epoch": 2.0941162109375e-05,
      "model_forward_time": 0.11523604393005371,
      "step": 3431
    },
    {
      "epoch": 2.0941162109375e-05,
      "step": 3431,
      "training_step_time": 0.40442514419555664
    },
    {
      "epoch": 2.0947265625e-05,
      "model_forward_time": 0.1155550479888916,
      "step": 3432
    },
    {
      "epoch": 2.0947265625e-05,
      "step": 3432,
      "training_step_time": 0.5364887714385986
    },
    {
      "epoch": 2.0953369140625e-05,
      "model_forward_time": 0.11563420295715332,
      "step": 3433
    },
    {
      "epoch": 2.0953369140625e-05,
      "step": 3433,
      "training_step_time": 0.3920881748199463
    },
    {
      "epoch": 2.095947265625e-05,
      "model_forward_time": 0.11694121360778809,
      "step": 3434
    },
    {
      "epoch": 2.095947265625e-05,
      "step": 3434,
      "training_step_time": 0.42908620834350586
    },
    {
      "epoch": 2.0965576171875e-05,
      "model_forward_time": 0.1154935359954834,
      "step": 3435
    },
    {
      "epoch": 2.0965576171875e-05,
      "step": 3435,
      "training_step_time": 0.44388675689697266
    },
    {
      "epoch": 2.09716796875e-05,
      "model_forward_time": 0.11592411994934082,
      "step": 3436
    },
    {
      "epoch": 2.09716796875e-05,
      "step": 3436,
      "training_step_time": 0.4856288433074951
    },
    {
      "epoch": 2.0977783203125e-05,
      "model_forward_time": 0.11592841148376465,
      "step": 3437
    },
    {
      "epoch": 2.0977783203125e-05,
      "step": 3437,
      "training_step_time": 0.5043063163757324
    },
    {
      "epoch": 2.098388671875e-05,
      "model_forward_time": 0.11520504951477051,
      "step": 3438
    },
    {
      "epoch": 2.098388671875e-05,
      "step": 3438,
      "training_step_time": 0.45632457733154297
    },
    {
      "epoch": 2.0989990234375e-05,
      "model_forward_time": 0.11440229415893555,
      "step": 3439
    },
    {
      "epoch": 2.0989990234375e-05,
      "step": 3439,
      "training_step_time": 0.40497732162475586
    },
    {
      "epoch": 2.099609375e-05,
      "grad_norm": 0.36491093039512634,
      "learning_rate": 9.998529807810064e-05,
      "loss": 0.1283,
      "step": 3440
    },
    {
      "epoch": 2.099609375e-05,
      "model_forward_time": 0.11456632614135742,
      "step": 3440
    },
    {
      "epoch": 2.099609375e-05,
      "step": 3440,
      "training_step_time": 0.3934321403503418
    },
    {
      "epoch": 2.1002197265625e-05,
      "model_forward_time": 0.11593079566955566,
      "step": 3441
    },
    {
      "epoch": 2.1002197265625e-05,
      "step": 3441,
      "training_step_time": 0.3998293876647949
    },
    {
      "epoch": 2.100830078125e-05,
      "model_forward_time": 0.1152946949005127,
      "step": 3442
    },
    {
      "epoch": 2.100830078125e-05,
      "step": 3442,
      "training_step_time": 0.3949422836303711
    },
    {
      "epoch": 2.1014404296875e-05,
      "model_forward_time": 0.11556744575500488,
      "step": 3443
    },
    {
      "epoch": 2.1014404296875e-05,
      "step": 3443,
      "training_step_time": 0.40636372566223145
    },
    {
      "epoch": 2.10205078125e-05,
      "model_forward_time": 0.1156618595123291,
      "step": 3444
    },
    {
      "epoch": 2.10205078125e-05,
      "step": 3444,
      "training_step_time": 0.5966668128967285
    },
    {
      "epoch": 2.1026611328125e-05,
      "model_forward_time": 0.11529731750488281,
      "step": 3445
    },
    {
      "epoch": 2.1026611328125e-05,
      "step": 3445,
      "training_step_time": 0.47060513496398926
    },
    {
      "epoch": 2.103271484375e-05,
      "model_forward_time": 0.11496257781982422,
      "step": 3446
    },
    {
      "epoch": 2.103271484375e-05,
      "step": 3446,
      "training_step_time": 0.4457969665527344
    },
    {
      "epoch": 2.1038818359375e-05,
      "model_forward_time": 0.11530256271362305,
      "step": 3447
    },
    {
      "epoch": 2.1038818359375e-05,
      "step": 3447,
      "training_step_time": 0.46296072006225586
    },
    {
      "epoch": 2.1044921875e-05,
      "model_forward_time": 0.11445784568786621,
      "step": 3448
    },
    {
      "epoch": 2.1044921875e-05,
      "step": 3448,
      "training_step_time": 0.4503207206726074
    },
    {
      "epoch": 2.1051025390625e-05,
      "model_forward_time": 0.11459922790527344,
      "step": 3449
    },
    {
      "epoch": 2.1051025390625e-05,
      "step": 3449,
      "training_step_time": 0.43662118911743164
    },
    {
      "epoch": 2.105712890625e-05,
      "grad_norm": 0.29307812452316284,
      "learning_rate": 9.998462224960175e-05,
      "loss": 0.1192,
      "step": 3450
    },
    {
      "epoch": 2.105712890625e-05,
      "model_forward_time": 0.11550331115722656,
      "step": 3450
    },
    {
      "epoch": 2.105712890625e-05,
      "step": 3450,
      "training_step_time": 0.5112805366516113
    },
    {
      "epoch": 2.1063232421875e-05,
      "model_forward_time": 0.1144871711730957,
      "step": 3451
    },
    {
      "epoch": 2.1063232421875e-05,
      "step": 3451,
      "training_step_time": 0.3943972587585449
    },
    {
      "epoch": 2.10693359375e-05,
      "model_forward_time": 0.11474156379699707,
      "step": 3452
    },
    {
      "epoch": 2.10693359375e-05,
      "step": 3452,
      "training_step_time": 0.39078330993652344
    },
    {
      "epoch": 2.1075439453125e-05,
      "model_forward_time": 0.11562466621398926,
      "step": 3453
    },
    {
      "epoch": 2.1075439453125e-05,
      "step": 3453,
      "training_step_time": 0.39057183265686035
    },
    {
      "epoch": 2.108154296875e-05,
      "model_forward_time": 0.11545085906982422,
      "step": 3454
    },
    {
      "epoch": 2.108154296875e-05,
      "step": 3454,
      "training_step_time": 0.39765167236328125
    },
    {
      "epoch": 2.1087646484375e-05,
      "model_forward_time": 0.11548066139221191,
      "step": 3455
    },
    {
      "epoch": 2.1087646484375e-05,
      "step": 3455,
      "training_step_time": 0.3850109577178955
    },
    {
      "epoch": 2.109375e-05,
      "model_forward_time": 0.11554956436157227,
      "step": 3456
    },
    {
      "epoch": 2.109375e-05,
      "step": 3456,
      "training_step_time": 0.39239954948425293
    },
    {
      "epoch": 2.1099853515625e-05,
      "model_forward_time": 0.11550307273864746,
      "step": 3457
    },
    {
      "epoch": 2.1099853515625e-05,
      "step": 3457,
      "training_step_time": 0.3929879665374756
    },
    {
      "epoch": 2.110595703125e-05,
      "model_forward_time": 0.11601662635803223,
      "step": 3458
    },
    {
      "epoch": 2.110595703125e-05,
      "step": 3458,
      "training_step_time": 0.43691158294677734
    },
    {
      "epoch": 2.1112060546875e-05,
      "model_forward_time": 0.11574649810791016,
      "step": 3459
    },
    {
      "epoch": 2.1112060546875e-05,
      "step": 3459,
      "training_step_time": 0.4352247714996338
    },
    {
      "epoch": 2.11181640625e-05,
      "grad_norm": 0.9897426962852478,
      "learning_rate": 9.998393123709438e-05,
      "loss": 0.1367,
      "step": 3460
    },
    {
      "epoch": 2.11181640625e-05,
      "model_forward_time": 0.11492562294006348,
      "step": 3460
    },
    {
      "epoch": 2.11181640625e-05,
      "step": 3460,
      "training_step_time": 0.5077383518218994
    },
    {
      "epoch": 2.1124267578125e-05,
      "model_forward_time": 0.11467647552490234,
      "step": 3461
    },
    {
      "epoch": 2.1124267578125e-05,
      "step": 3461,
      "training_step_time": 0.3978433609008789
    },
    {
      "epoch": 2.113037109375e-05,
      "model_forward_time": 0.11572933197021484,
      "step": 3462
    },
    {
      "epoch": 2.113037109375e-05,
      "step": 3462,
      "training_step_time": 0.4397706985473633
    },
    {
      "epoch": 2.1136474609375e-05,
      "model_forward_time": 0.11494636535644531,
      "step": 3463
    },
    {
      "epoch": 2.1136474609375e-05,
      "step": 3463,
      "training_step_time": 0.3651711940765381
    },
    {
      "epoch": 2.1142578125e-05,
      "model_forward_time": 0.1147146224975586,
      "step": 3464
    },
    {
      "epoch": 2.1142578125e-05,
      "step": 3464,
      "training_step_time": 0.43979907035827637
    },
    {
      "epoch": 2.1148681640625e-05,
      "model_forward_time": 0.11506414413452148,
      "step": 3465
    },
    {
      "epoch": 2.1148681640625e-05,
      "step": 3465,
      "training_step_time": 0.39574718475341797
    },
    {
      "epoch": 2.115478515625e-05,
      "model_forward_time": 0.11636853218078613,
      "step": 3466
    },
    {
      "epoch": 2.115478515625e-05,
      "step": 3466,
      "training_step_time": 0.39540982246398926
    },
    {
      "epoch": 2.1160888671875e-05,
      "model_forward_time": 0.11450028419494629,
      "step": 3467
    },
    {
      "epoch": 2.1160888671875e-05,
      "step": 3467,
      "training_step_time": 0.40716981887817383
    },
    {
      "epoch": 2.11669921875e-05,
      "model_forward_time": 0.11493182182312012,
      "step": 3468
    },
    {
      "epoch": 2.11669921875e-05,
      "step": 3468,
      "training_step_time": 0.4978320598602295
    },
    {
      "epoch": 2.1173095703125e-05,
      "model_forward_time": 0.11541175842285156,
      "step": 3469
    },
    {
      "epoch": 2.1173095703125e-05,
      "step": 3469,
      "training_step_time": 0.38542604446411133
    },
    {
      "epoch": 2.117919921875e-05,
      "grad_norm": 0.31605127453804016,
      "learning_rate": 9.998322504078843e-05,
      "loss": 0.1279,
      "step": 3470
    },
    {
      "epoch": 2.117919921875e-05,
      "model_forward_time": 0.11496734619140625,
      "step": 3470
    },
    {
      "epoch": 2.117919921875e-05,
      "step": 3470,
      "training_step_time": 0.3939473628997803
    },
    {
      "epoch": 2.1185302734375e-05,
      "model_forward_time": 0.11560678482055664,
      "step": 3471
    },
    {
      "epoch": 2.1185302734375e-05,
      "step": 3471,
      "training_step_time": 0.3939340114593506
    },
    {
      "epoch": 2.119140625e-05,
      "model_forward_time": 0.11555242538452148,
      "step": 3472
    },
    {
      "epoch": 2.119140625e-05,
      "step": 3472,
      "training_step_time": 0.389359712600708
    },
    {
      "epoch": 2.1197509765625e-05,
      "model_forward_time": 0.11520719528198242,
      "step": 3473
    },
    {
      "epoch": 2.1197509765625e-05,
      "step": 3473,
      "training_step_time": 0.3921535015106201
    },
    {
      "epoch": 2.120361328125e-05,
      "model_forward_time": 0.11632442474365234,
      "step": 3474
    },
    {
      "epoch": 2.120361328125e-05,
      "step": 3474,
      "training_step_time": 0.6244852542877197
    },
    {
      "epoch": 2.1209716796875e-05,
      "model_forward_time": 0.11509418487548828,
      "step": 3475
    },
    {
      "epoch": 2.1209716796875e-05,
      "step": 3475,
      "training_step_time": 0.45972347259521484
    },
    {
      "epoch": 2.12158203125e-05,
      "model_forward_time": 0.11527824401855469,
      "step": 3476
    },
    {
      "epoch": 2.12158203125e-05,
      "step": 3476,
      "training_step_time": 0.3968651294708252
    },
    {
      "epoch": 2.1221923828125e-05,
      "model_forward_time": 0.11517214775085449,
      "step": 3477
    },
    {
      "epoch": 2.1221923828125e-05,
      "step": 3477,
      "training_step_time": 0.4212527275085449
    },
    {
      "epoch": 2.122802734375e-05,
      "model_forward_time": 0.1152348518371582,
      "step": 3478
    },
    {
      "epoch": 2.122802734375e-05,
      "step": 3478,
      "training_step_time": 0.4201076030731201
    },
    {
      "epoch": 2.1234130859375e-05,
      "model_forward_time": 0.11452221870422363,
      "step": 3479
    },
    {
      "epoch": 2.1234130859375e-05,
      "step": 3479,
      "training_step_time": 0.5024237632751465
    },
    {
      "epoch": 2.1240234375e-05,
      "grad_norm": 0.4355277121067047,
      "learning_rate": 9.998250366089848e-05,
      "loss": 0.1372,
      "step": 3480
    },
    {
      "epoch": 2.1240234375e-05,
      "model_forward_time": 0.11508512496948242,
      "step": 3480
    },
    {
      "epoch": 2.1240234375e-05,
      "step": 3480,
      "training_step_time": 0.47360992431640625
    },
    {
      "epoch": 2.1246337890625e-05,
      "model_forward_time": 0.11613059043884277,
      "step": 3481
    },
    {
      "epoch": 2.1246337890625e-05,
      "step": 3481,
      "training_step_time": 0.39511561393737793
    },
    {
      "epoch": 2.125244140625e-05,
      "model_forward_time": 0.11519908905029297,
      "step": 3482
    },
    {
      "epoch": 2.125244140625e-05,
      "step": 3482,
      "training_step_time": 0.39264988899230957
    },
    {
      "epoch": 2.1258544921875e-05,
      "model_forward_time": 0.11531352996826172,
      "step": 3483
    },
    {
      "epoch": 2.1258544921875e-05,
      "step": 3483,
      "training_step_time": 0.39408183097839355
    },
    {
      "epoch": 2.12646484375e-05,
      "model_forward_time": 0.11553335189819336,
      "step": 3484
    },
    {
      "epoch": 2.12646484375e-05,
      "step": 3484,
      "training_step_time": 0.3962550163269043
    },
    {
      "epoch": 2.1270751953125e-05,
      "model_forward_time": 0.11500978469848633,
      "step": 3485
    },
    {
      "epoch": 2.1270751953125e-05,
      "step": 3485,
      "training_step_time": 0.3882143497467041
    },
    {
      "epoch": 2.127685546875e-05,
      "model_forward_time": 0.11571192741394043,
      "step": 3486
    },
    {
      "epoch": 2.127685546875e-05,
      "step": 3486,
      "training_step_time": 0.6832301616668701
    },
    {
      "epoch": 2.1282958984375e-05,
      "model_forward_time": 0.11588072776794434,
      "step": 3487
    },
    {
      "epoch": 2.1282958984375e-05,
      "step": 3487,
      "training_step_time": 0.39899158477783203
    },
    {
      "epoch": 2.12890625e-05,
      "model_forward_time": 0.1146697998046875,
      "step": 3488
    },
    {
      "epoch": 2.12890625e-05,
      "step": 3488,
      "training_step_time": 0.4218864440917969
    },
    {
      "epoch": 2.1295166015625e-05,
      "model_forward_time": 0.11514401435852051,
      "step": 3489
    },
    {
      "epoch": 2.1295166015625e-05,
      "step": 3489,
      "training_step_time": 0.4781181812286377
    },
    {
      "epoch": 2.130126953125e-05,
      "grad_norm": 0.3787548542022705,
      "learning_rate": 9.99817670976436e-05,
      "loss": 0.1335,
      "step": 3490
    },
    {
      "epoch": 2.130126953125e-05,
      "model_forward_time": 0.11474180221557617,
      "step": 3490
    },
    {
      "epoch": 2.130126953125e-05,
      "step": 3490,
      "training_step_time": 0.3889353275299072
    },
    {
      "epoch": 2.1307373046875e-05,
      "model_forward_time": 0.1156167984008789,
      "step": 3491
    },
    {
      "epoch": 2.1307373046875e-05,
      "step": 3491,
      "training_step_time": 0.48044538497924805
    },
    {
      "epoch": 2.13134765625e-05,
      "model_forward_time": 0.11585617065429688,
      "step": 3492
    },
    {
      "epoch": 2.13134765625e-05,
      "step": 3492,
      "training_step_time": 0.4865844249725342
    },
    {
      "epoch": 2.1319580078125e-05,
      "model_forward_time": 0.11495327949523926,
      "step": 3493
    },
    {
      "epoch": 2.1319580078125e-05,
      "step": 3493,
      "training_step_time": 0.4127209186553955
    },
    {
      "epoch": 2.132568359375e-05,
      "model_forward_time": 0.11553812026977539,
      "step": 3494
    },
    {
      "epoch": 2.132568359375e-05,
      "step": 3494,
      "training_step_time": 0.49556636810302734
    },
    {
      "epoch": 2.1331787109375e-05,
      "model_forward_time": 0.11451172828674316,
      "step": 3495
    },
    {
      "epoch": 2.1331787109375e-05,
      "step": 3495,
      "training_step_time": 0.39845871925354004
    },
    {
      "epoch": 2.1337890625e-05,
      "model_forward_time": 0.115692138671875,
      "step": 3496
    },
    {
      "epoch": 2.1337890625e-05,
      "step": 3496,
      "training_step_time": 0.38729119300842285
    },
    {
      "epoch": 2.1343994140625e-05,
      "model_forward_time": 0.11521267890930176,
      "step": 3497
    },
    {
      "epoch": 2.1343994140625e-05,
      "step": 3497,
      "training_step_time": 0.3899397850036621
    },
    {
      "epoch": 2.135009765625e-05,
      "model_forward_time": 0.1155083179473877,
      "step": 3498
    },
    {
      "epoch": 2.135009765625e-05,
      "step": 3498,
      "training_step_time": 0.50262451171875
    },
    {
      "epoch": 2.1356201171875e-05,
      "model_forward_time": 0.11512398719787598,
      "step": 3499
    },
    {
      "epoch": 2.1356201171875e-05,
      "step": 3499,
      "training_step_time": 0.3935418128967285
    },
    {
      "epoch": 2.13623046875e-05,
      "grad_norm": 0.4797550439834595,
      "learning_rate": 9.998101535124758e-05,
      "loss": 0.141,
      "step": 3500
    },
    {
      "epoch": 2.13623046875e-05,
      "model_forward_time": 0.1155397891998291,
      "step": 3500
    },
    {
      "epoch": 2.13623046875e-05,
      "step": 3500,
      "training_step_time": 0.39381837844848633
    },
    {
      "epoch": 2.1368408203125e-05,
      "model_forward_time": 0.11583542823791504,
      "step": 3501
    },
    {
      "epoch": 2.1368408203125e-05,
      "step": 3501,
      "training_step_time": 0.46178364753723145
    },
    {
      "epoch": 2.137451171875e-05,
      "model_forward_time": 0.1147911548614502,
      "step": 3502
    },
    {
      "epoch": 2.137451171875e-05,
      "step": 3502,
      "training_step_time": 0.49019360542297363
    },
    {
      "epoch": 2.1380615234375e-05,
      "model_forward_time": 0.11541152000427246,
      "step": 3503
    },
    {
      "epoch": 2.1380615234375e-05,
      "step": 3503,
      "training_step_time": 0.49936485290527344
    },
    {
      "epoch": 2.138671875e-05,
      "model_forward_time": 0.11517977714538574,
      "step": 3504
    },
    {
      "epoch": 2.138671875e-05,
      "step": 3504,
      "training_step_time": 0.3939683437347412
    },
    {
      "epoch": 2.1392822265625e-05,
      "model_forward_time": 0.11471796035766602,
      "step": 3505
    },
    {
      "epoch": 2.1392822265625e-05,
      "step": 3505,
      "training_step_time": 0.4145519733428955
    },
    {
      "epoch": 2.139892578125e-05,
      "model_forward_time": 0.11511349678039551,
      "step": 3506
    },
    {
      "epoch": 2.139892578125e-05,
      "step": 3506,
      "training_step_time": 0.3651890754699707
    },
    {
      "epoch": 2.1405029296875e-05,
      "model_forward_time": 0.11513376235961914,
      "step": 3507
    },
    {
      "epoch": 2.1405029296875e-05,
      "step": 3507,
      "training_step_time": 0.4790353775024414
    },
    {
      "epoch": 2.14111328125e-05,
      "model_forward_time": 0.11478209495544434,
      "step": 3508
    },
    {
      "epoch": 2.14111328125e-05,
      "step": 3508,
      "training_step_time": 0.49452733993530273
    },
    {
      "epoch": 2.1417236328125e-05,
      "model_forward_time": 0.11473417282104492,
      "step": 3509
    },
    {
      "epoch": 2.1417236328125e-05,
      "step": 3509,
      "training_step_time": 0.387190580368042
    },
    {
      "epoch": 2.142333984375e-05,
      "grad_norm": 0.4705619812011719,
      "learning_rate": 9.998024842193876e-05,
      "loss": 0.1307,
      "step": 3510
    },
    {
      "epoch": 2.142333984375e-05,
      "model_forward_time": 0.11470150947570801,
      "step": 3510
    },
    {
      "epoch": 2.142333984375e-05,
      "step": 3510,
      "training_step_time": 0.39499640464782715
    },
    {
      "epoch": 2.1429443359375e-05,
      "model_forward_time": 0.11497020721435547,
      "step": 3511
    },
    {
      "epoch": 2.1429443359375e-05,
      "step": 3511,
      "training_step_time": 0.40086960792541504
    },
    {
      "epoch": 2.1435546875e-05,
      "model_forward_time": 0.115234375,
      "step": 3512
    },
    {
      "epoch": 2.1435546875e-05,
      "step": 3512,
      "training_step_time": 0.40548229217529297
    },
    {
      "epoch": 2.1441650390625e-05,
      "model_forward_time": 0.11592698097229004,
      "step": 3513
    },
    {
      "epoch": 2.1441650390625e-05,
      "step": 3513,
      "training_step_time": 0.396806001663208
    },
    {
      "epoch": 2.144775390625e-05,
      "model_forward_time": 0.11582183837890625,
      "step": 3514
    },
    {
      "epoch": 2.144775390625e-05,
      "step": 3514,
      "training_step_time": 0.3932027816772461
    },
    {
      "epoch": 2.1453857421875e-05,
      "model_forward_time": 0.1157383918762207,
      "step": 3515
    },
    {
      "epoch": 2.1453857421875e-05,
      "step": 3515,
      "training_step_time": 0.4209907054901123
    },
    {
      "epoch": 2.14599609375e-05,
      "model_forward_time": 0.11485433578491211,
      "step": 3516
    },
    {
      "epoch": 2.14599609375e-05,
      "step": 3516,
      "training_step_time": 0.5415818691253662
    },
    {
      "epoch": 2.1466064453125e-05,
      "model_forward_time": 0.11743617057800293,
      "step": 3517
    },
    {
      "epoch": 2.1466064453125e-05,
      "step": 3517,
      "training_step_time": 0.49489378929138184
    },
    {
      "epoch": 2.147216796875e-05,
      "model_forward_time": 0.11629319190979004,
      "step": 3518
    },
    {
      "epoch": 2.147216796875e-05,
      "step": 3518,
      "training_step_time": 0.39558887481689453
    },
    {
      "epoch": 2.1478271484375e-05,
      "model_forward_time": 0.11551213264465332,
      "step": 3519
    },
    {
      "epoch": 2.1478271484375e-05,
      "step": 3519,
      "training_step_time": 0.42659640312194824
    },
    {
      "epoch": 2.1484375e-05,
      "grad_norm": 0.36119821667671204,
      "learning_rate": 9.997946630995013e-05,
      "loss": 0.1223,
      "step": 3520
    },
    {
      "epoch": 2.1484375e-05,
      "model_forward_time": 0.11504769325256348,
      "step": 3520
    },
    {
      "epoch": 2.1484375e-05,
      "step": 3520,
      "training_step_time": 0.4096856117248535
    },
    {
      "epoch": 2.1490478515625e-05,
      "model_forward_time": 0.11554384231567383,
      "step": 3521
    },
    {
      "epoch": 2.1490478515625e-05,
      "step": 3521,
      "training_step_time": 0.4352753162384033
    },
    {
      "epoch": 2.149658203125e-05,
      "model_forward_time": 0.11510562896728516,
      "step": 3522
    },
    {
      "epoch": 2.149658203125e-05,
      "step": 3522,
      "training_step_time": 0.488739013671875
    },
    {
      "epoch": 2.1502685546875e-05,
      "model_forward_time": 0.115447998046875,
      "step": 3523
    },
    {
      "epoch": 2.1502685546875e-05,
      "step": 3523,
      "training_step_time": 0.49556803703308105
    },
    {
      "epoch": 2.15087890625e-05,
      "model_forward_time": 0.11519122123718262,
      "step": 3524
    },
    {
      "epoch": 2.15087890625e-05,
      "step": 3524,
      "training_step_time": 0.40424180030822754
    },
    {
      "epoch": 2.1514892578125e-05,
      "model_forward_time": 0.11528515815734863,
      "step": 3525
    },
    {
      "epoch": 2.1514892578125e-05,
      "step": 3525,
      "training_step_time": 0.3942372798919678
    },
    {
      "epoch": 2.152099609375e-05,
      "model_forward_time": 0.11472511291503906,
      "step": 3526
    },
    {
      "epoch": 2.152099609375e-05,
      "step": 3526,
      "training_step_time": 0.3871889114379883
    },
    {
      "epoch": 2.1527099609375e-05,
      "model_forward_time": 0.11543607711791992,
      "step": 3527
    },
    {
      "epoch": 2.1527099609375e-05,
      "step": 3527,
      "training_step_time": 0.38774633407592773
    },
    {
      "epoch": 2.1533203125e-05,
      "model_forward_time": 0.11516475677490234,
      "step": 3528
    },
    {
      "epoch": 2.1533203125e-05,
      "step": 3528,
      "training_step_time": 0.41177988052368164
    },
    {
      "epoch": 2.1539306640625e-05,
      "model_forward_time": 0.11555981636047363,
      "step": 3529
    },
    {
      "epoch": 2.1539306640625e-05,
      "step": 3529,
      "training_step_time": 0.39935898780822754
    },
    {
      "epoch": 2.154541015625e-05,
      "grad_norm": 0.3078767955303192,
      "learning_rate": 9.997866901551926e-05,
      "loss": 0.121,
      "step": 3530
    },
    {
      "epoch": 2.154541015625e-05,
      "model_forward_time": 0.11545515060424805,
      "step": 3530
    },
    {
      "epoch": 2.154541015625e-05,
      "step": 3530,
      "training_step_time": 0.4872424602508545
    },
    {
      "epoch": 2.1551513671875e-05,
      "model_forward_time": 0.1151723861694336,
      "step": 3531
    },
    {
      "epoch": 2.1551513671875e-05,
      "step": 3531,
      "training_step_time": 0.4340076446533203
    },
    {
      "epoch": 2.15576171875e-05,
      "model_forward_time": 0.11577343940734863,
      "step": 3532
    },
    {
      "epoch": 2.15576171875e-05,
      "step": 3532,
      "training_step_time": 0.46360325813293457
    },
    {
      "epoch": 2.1563720703125e-05,
      "model_forward_time": 0.11543917655944824,
      "step": 3533
    },
    {
      "epoch": 2.1563720703125e-05,
      "step": 3533,
      "training_step_time": 0.4800283908843994
    },
    {
      "epoch": 2.156982421875e-05,
      "model_forward_time": 0.11512517929077148,
      "step": 3534
    },
    {
      "epoch": 2.156982421875e-05,
      "step": 3534,
      "training_step_time": 0.5060887336730957
    },
    {
      "epoch": 2.1575927734375e-05,
      "model_forward_time": 0.11526703834533691,
      "step": 3535
    },
    {
      "epoch": 2.1575927734375e-05,
      "step": 3535,
      "training_step_time": 0.5225098133087158
    },
    {
      "epoch": 2.158203125e-05,
      "model_forward_time": 0.11442041397094727,
      "step": 3536
    },
    {
      "epoch": 2.158203125e-05,
      "step": 3536,
      "training_step_time": 0.48319578170776367
    },
    {
      "epoch": 2.1588134765625e-05,
      "model_forward_time": 0.11627388000488281,
      "step": 3537
    },
    {
      "epoch": 2.1588134765625e-05,
      "step": 3537,
      "training_step_time": 0.48264360427856445
    },
    {
      "epoch": 2.159423828125e-05,
      "model_forward_time": 0.11474275588989258,
      "step": 3538
    },
    {
      "epoch": 2.159423828125e-05,
      "step": 3538,
      "training_step_time": 0.39409852027893066
    },
    {
      "epoch": 2.1600341796875e-05,
      "model_forward_time": 0.11444401741027832,
      "step": 3539
    },
    {
      "epoch": 2.1600341796875e-05,
      "step": 3539,
      "training_step_time": 0.3986842632293701
    },
    {
      "epoch": 2.16064453125e-05,
      "grad_norm": 0.531217634677887,
      "learning_rate": 9.997785653888835e-05,
      "loss": 0.1309,
      "step": 3540
    },
    {
      "epoch": 2.16064453125e-05,
      "model_forward_time": 0.11489057540893555,
      "step": 3540
    },
    {
      "epoch": 2.16064453125e-05,
      "step": 3540,
      "training_step_time": 0.3789680004119873
    },
    {
      "epoch": 2.1612548828125e-05,
      "model_forward_time": 0.11492085456848145,
      "step": 3541
    },
    {
      "epoch": 2.1612548828125e-05,
      "step": 3541,
      "training_step_time": 0.3994290828704834
    },
    {
      "epoch": 2.161865234375e-05,
      "model_forward_time": 0.11520814895629883,
      "step": 3542
    },
    {
      "epoch": 2.161865234375e-05,
      "step": 3542,
      "training_step_time": 0.39940762519836426
    },
    {
      "epoch": 2.1624755859375e-05,
      "model_forward_time": 0.11478018760681152,
      "step": 3543
    },
    {
      "epoch": 2.1624755859375e-05,
      "step": 3543,
      "training_step_time": 0.4069480895996094
    },
    {
      "epoch": 2.1630859375e-05,
      "model_forward_time": 0.11538028717041016,
      "step": 3544
    },
    {
      "epoch": 2.1630859375e-05,
      "step": 3544,
      "training_step_time": 0.4166741371154785
    },
    {
      "epoch": 2.1636962890625e-05,
      "model_forward_time": 0.1172952651977539,
      "step": 3545
    },
    {
      "epoch": 2.1636962890625e-05,
      "step": 3545,
      "training_step_time": 0.5055444240570068
    },
    {
      "epoch": 2.164306640625e-05,
      "model_forward_time": 0.11571693420410156,
      "step": 3546
    },
    {
      "epoch": 2.164306640625e-05,
      "step": 3546,
      "training_step_time": 0.5088391304016113
    },
    {
      "epoch": 2.1649169921875e-05,
      "model_forward_time": 0.1149299144744873,
      "step": 3547
    },
    {
      "epoch": 2.1649169921875e-05,
      "step": 3547,
      "training_step_time": 0.4083738327026367
    },
    {
      "epoch": 2.16552734375e-05,
      "model_forward_time": 0.11512017250061035,
      "step": 3548
    },
    {
      "epoch": 2.16552734375e-05,
      "step": 3548,
      "training_step_time": 0.4192841053009033
    },
    {
      "epoch": 2.1661376953125e-05,
      "model_forward_time": 0.11492681503295898,
      "step": 3549
    },
    {
      "epoch": 2.1661376953125e-05,
      "step": 3549,
      "training_step_time": 0.4513669013977051
    },
    {
      "epoch": 2.166748046875e-05,
      "grad_norm": 0.2788081467151642,
      "learning_rate": 9.997702888030423e-05,
      "loss": 0.1251,
      "step": 3550
    },
    {
      "epoch": 2.166748046875e-05,
      "model_forward_time": 0.11491560935974121,
      "step": 3550
    },
    {
      "epoch": 2.166748046875e-05,
      "step": 3550,
      "training_step_time": 0.4389212131500244
    },
    {
      "epoch": 2.1673583984375e-05,
      "model_forward_time": 0.11511659622192383,
      "step": 3551
    },
    {
      "epoch": 2.1673583984375e-05,
      "step": 3551,
      "training_step_time": 0.4050276279449463
    },
    {
      "epoch": 2.16796875e-05,
      "model_forward_time": 0.1152641773223877,
      "step": 3552
    },
    {
      "epoch": 2.16796875e-05,
      "step": 3552,
      "training_step_time": 0.3924899101257324
    },
    {
      "epoch": 2.1685791015625e-05,
      "model_forward_time": 0.11540079116821289,
      "step": 3553
    },
    {
      "epoch": 2.1685791015625e-05,
      "step": 3553,
      "training_step_time": 0.38924169540405273
    },
    {
      "epoch": 2.169189453125e-05,
      "model_forward_time": 0.11567425727844238,
      "step": 3554
    },
    {
      "epoch": 2.169189453125e-05,
      "step": 3554,
      "training_step_time": 0.39099955558776855
    },
    {
      "epoch": 2.1697998046875e-05,
      "model_forward_time": 0.11500287055969238,
      "step": 3555
    },
    {
      "epoch": 2.1697998046875e-05,
      "step": 3555,
      "training_step_time": 0.4201011657714844
    },
    {
      "epoch": 2.17041015625e-05,
      "model_forward_time": 0.11495685577392578,
      "step": 3556
    },
    {
      "epoch": 2.17041015625e-05,
      "step": 3556,
      "training_step_time": 0.39073610305786133
    },
    {
      "epoch": 2.1710205078125e-05,
      "model_forward_time": 0.11510205268859863,
      "step": 3557
    },
    {
      "epoch": 2.1710205078125e-05,
      "step": 3557,
      "training_step_time": 0.3923978805541992
    },
    {
      "epoch": 2.171630859375e-05,
      "model_forward_time": 0.11582565307617188,
      "step": 3558
    },
    {
      "epoch": 2.171630859375e-05,
      "step": 3558,
      "training_step_time": 0.5040667057037354
    },
    {
      "epoch": 2.1722412109375e-05,
      "model_forward_time": 0.11502337455749512,
      "step": 3559
    },
    {
      "epoch": 2.1722412109375e-05,
      "step": 3559,
      "training_step_time": 0.4173765182495117
    },
    {
      "epoch": 2.1728515625e-05,
      "grad_norm": 0.353547602891922,
      "learning_rate": 9.997618604001829e-05,
      "loss": 0.1234,
      "step": 3560
    },
    {
      "epoch": 2.1728515625e-05,
      "model_forward_time": 0.11593818664550781,
      "step": 3560
    },
    {
      "epoch": 2.1728515625e-05,
      "step": 3560,
      "training_step_time": 0.3946504592895508
    },
    {
      "epoch": 2.1734619140625e-05,
      "model_forward_time": 0.11541986465454102,
      "step": 3561
    },
    {
      "epoch": 2.1734619140625e-05,
      "step": 3561,
      "training_step_time": 0.42913222312927246
    },
    {
      "epoch": 2.174072265625e-05,
      "model_forward_time": 0.11599278450012207,
      "step": 3562
    },
    {
      "epoch": 2.174072265625e-05,
      "step": 3562,
      "training_step_time": 0.4382517337799072
    },
    {
      "epoch": 2.1746826171875e-05,
      "model_forward_time": 0.11685061454772949,
      "step": 3563
    },
    {
      "epoch": 2.1746826171875e-05,
      "step": 3563,
      "training_step_time": 0.40146827697753906
    },
    {
      "epoch": 2.17529296875e-05,
      "model_forward_time": 0.1154940128326416,
      "step": 3564
    },
    {
      "epoch": 2.17529296875e-05,
      "step": 3564,
      "training_step_time": 0.48081183433532715
    },
    {
      "epoch": 2.1759033203125e-05,
      "model_forward_time": 0.11517524719238281,
      "step": 3565
    },
    {
      "epoch": 2.1759033203125e-05,
      "step": 3565,
      "training_step_time": 0.3933243751525879
    },
    {
      "epoch": 2.176513671875e-05,
      "model_forward_time": 0.11520123481750488,
      "step": 3566
    },
    {
      "epoch": 2.176513671875e-05,
      "step": 3566,
      "training_step_time": 0.4909708499908447
    },
    {
      "epoch": 2.1771240234375e-05,
      "model_forward_time": 0.1157679557800293,
      "step": 3567
    },
    {
      "epoch": 2.1771240234375e-05,
      "step": 3567,
      "training_step_time": 0.3928391933441162
    },
    {
      "epoch": 2.177734375e-05,
      "model_forward_time": 0.11529731750488281,
      "step": 3568
    },
    {
      "epoch": 2.177734375e-05,
      "step": 3568,
      "training_step_time": 0.41458630561828613
    },
    {
      "epoch": 2.1783447265625e-05,
      "model_forward_time": 0.11477136611938477,
      "step": 3569
    },
    {
      "epoch": 2.1783447265625e-05,
      "step": 3569,
      "training_step_time": 0.4007115364074707
    },
    {
      "epoch": 2.178955078125e-05,
      "grad_norm": 0.418510377407074,
      "learning_rate": 9.997532801828658e-05,
      "loss": 0.1296,
      "step": 3570
    },
    {
      "epoch": 2.178955078125e-05,
      "model_forward_time": 0.1151571273803711,
      "step": 3570
    },
    {
      "epoch": 2.178955078125e-05,
      "step": 3570,
      "training_step_time": 0.41697168350219727
    },
    {
      "epoch": 2.1795654296875e-05,
      "model_forward_time": 0.11600351333618164,
      "step": 3571
    },
    {
      "epoch": 2.1795654296875e-05,
      "step": 3571,
      "training_step_time": 0.38411831855773926
    },
    {
      "epoch": 2.18017578125e-05,
      "model_forward_time": 0.1156313419342041,
      "step": 3572
    },
    {
      "epoch": 2.18017578125e-05,
      "step": 3572,
      "training_step_time": 0.3838067054748535
    },
    {
      "epoch": 2.1807861328125e-05,
      "model_forward_time": 0.11646294593811035,
      "step": 3573
    },
    {
      "epoch": 2.1807861328125e-05,
      "step": 3573,
      "training_step_time": 0.49076414108276367
    },
    {
      "epoch": 2.181396484375e-05,
      "model_forward_time": 0.1151571273803711,
      "step": 3574
    },
    {
      "epoch": 2.181396484375e-05,
      "step": 3574,
      "training_step_time": 0.48142337799072266
    },
    {
      "epoch": 2.1820068359375e-05,
      "model_forward_time": 0.11466288566589355,
      "step": 3575
    },
    {
      "epoch": 2.1820068359375e-05,
      "step": 3575,
      "training_step_time": 0.5015361309051514
    },
    {
      "epoch": 2.1826171875e-05,
      "model_forward_time": 0.11509013175964355,
      "step": 3576
    },
    {
      "epoch": 2.1826171875e-05,
      "step": 3576,
      "training_step_time": 0.4894075393676758
    },
    {
      "epoch": 2.1832275390625e-05,
      "model_forward_time": 0.11432194709777832,
      "step": 3577
    },
    {
      "epoch": 2.1832275390625e-05,
      "step": 3577,
      "training_step_time": 0.4070851802825928
    },
    {
      "epoch": 2.183837890625e-05,
      "model_forward_time": 0.11514616012573242,
      "step": 3578
    },
    {
      "epoch": 2.183837890625e-05,
      "step": 3578,
      "training_step_time": 0.4042811393737793
    },
    {
      "epoch": 2.1844482421875e-05,
      "model_forward_time": 0.11454010009765625,
      "step": 3579
    },
    {
      "epoch": 2.1844482421875e-05,
      "step": 3579,
      "training_step_time": 0.4724149703979492
    },
    {
      "epoch": 2.18505859375e-05,
      "grad_norm": 0.2877735197544098,
      "learning_rate": 9.997445481536973e-05,
      "loss": 0.1237,
      "step": 3580
    },
    {
      "epoch": 2.18505859375e-05,
      "model_forward_time": 0.1145021915435791,
      "step": 3580
    },
    {
      "epoch": 2.18505859375e-05,
      "step": 3580,
      "training_step_time": 0.4727897644042969
    },
    {
      "epoch": 2.1856689453125e-05,
      "model_forward_time": 0.11472630500793457,
      "step": 3581
    },
    {
      "epoch": 2.1856689453125e-05,
      "step": 3581,
      "training_step_time": 0.39922022819519043
    },
    {
      "epoch": 2.186279296875e-05,
      "model_forward_time": 0.11476659774780273,
      "step": 3582
    },
    {
      "epoch": 2.186279296875e-05,
      "step": 3582,
      "training_step_time": 0.4001128673553467
    },
    {
      "epoch": 2.1868896484375e-05,
      "model_forward_time": 0.11527323722839355,
      "step": 3583
    },
    {
      "epoch": 2.1868896484375e-05,
      "step": 3583,
      "training_step_time": 0.4015626907348633
    },
    {
      "epoch": 2.1875e-05,
      "model_forward_time": 0.11491751670837402,
      "step": 3584
    },
    {
      "epoch": 2.1875e-05,
      "step": 3584,
      "training_step_time": 0.38891148567199707
    },
    {
      "epoch": 2.1881103515625e-05,
      "model_forward_time": 0.11506414413452148,
      "step": 3585
    },
    {
      "epoch": 2.1881103515625e-05,
      "step": 3585,
      "training_step_time": 0.39743614196777344
    },
    {
      "epoch": 2.188720703125e-05,
      "model_forward_time": 0.11509346961975098,
      "step": 3586
    },
    {
      "epoch": 2.188720703125e-05,
      "step": 3586,
      "training_step_time": 0.38811755180358887
    },
    {
      "epoch": 2.1893310546875e-05,
      "model_forward_time": 0.1152181625366211,
      "step": 3587
    },
    {
      "epoch": 2.1893310546875e-05,
      "step": 3587,
      "training_step_time": 0.38683509826660156
    },
    {
      "epoch": 2.18994140625e-05,
      "model_forward_time": 0.11535215377807617,
      "step": 3588
    },
    {
      "epoch": 2.18994140625e-05,
      "step": 3588,
      "training_step_time": 0.45618247985839844
    },
    {
      "epoch": 2.1905517578125e-05,
      "model_forward_time": 0.11572265625,
      "step": 3589
    },
    {
      "epoch": 2.1905517578125e-05,
      "step": 3589,
      "training_step_time": 0.4416697025299072
    },
    {
      "epoch": 2.191162109375e-05,
      "grad_norm": 0.3215029835700989,
      "learning_rate": 9.997356643153303e-05,
      "loss": 0.1169,
      "step": 3590
    },
    {
      "epoch": 2.191162109375e-05,
      "model_forward_time": 0.11696362495422363,
      "step": 3590
    },
    {
      "epoch": 2.191162109375e-05,
      "step": 3590,
      "training_step_time": 0.4586944580078125
    },
    {
      "epoch": 2.1917724609375e-05,
      "model_forward_time": 0.1159067153930664,
      "step": 3591
    },
    {
      "epoch": 2.1917724609375e-05,
      "step": 3591,
      "training_step_time": 0.48627161979675293
    },
    {
      "epoch": 2.1923828125e-05,
      "model_forward_time": 0.11500883102416992,
      "step": 3592
    },
    {
      "epoch": 2.1923828125e-05,
      "step": 3592,
      "training_step_time": 0.3682997226715088
    },
    {
      "epoch": 2.1929931640625e-05,
      "model_forward_time": 0.11547446250915527,
      "step": 3593
    },
    {
      "epoch": 2.1929931640625e-05,
      "step": 3593,
      "training_step_time": 0.49416041374206543
    },
    {
      "epoch": 2.193603515625e-05,
      "model_forward_time": 0.11535167694091797,
      "step": 3594
    },
    {
      "epoch": 2.193603515625e-05,
      "step": 3594,
      "training_step_time": 0.5002520084381104
    },
    {
      "epoch": 2.1942138671875e-05,
      "model_forward_time": 0.11457276344299316,
      "step": 3595
    },
    {
      "epoch": 2.1942138671875e-05,
      "step": 3595,
      "training_step_time": 0.395611047744751
    },
    {
      "epoch": 2.19482421875e-05,
      "model_forward_time": 0.11540937423706055,
      "step": 3596
    },
    {
      "epoch": 2.19482421875e-05,
      "step": 3596,
      "training_step_time": 0.388439416885376
    },
    {
      "epoch": 2.1954345703125e-05,
      "model_forward_time": 0.11480212211608887,
      "step": 3597
    },
    {
      "epoch": 2.1954345703125e-05,
      "step": 3597,
      "training_step_time": 0.3883211612701416
    },
    {
      "epoch": 2.196044921875e-05,
      "model_forward_time": 0.11548709869384766,
      "step": 3598
    },
    {
      "epoch": 2.196044921875e-05,
      "step": 3598,
      "training_step_time": 0.3851175308227539
    },
    {
      "epoch": 2.1966552734375e-05,
      "model_forward_time": 0.11612820625305176,
      "step": 3599
    },
    {
      "epoch": 2.1966552734375e-05,
      "step": 3599,
      "training_step_time": 0.38980793952941895
    },
    {
      "epoch": 2.197265625e-05,
      "grad_norm": 0.32068580389022827,
      "learning_rate": 9.997266286704631e-05,
      "loss": 0.1246,
      "step": 3600
    },
    {
      "epoch": 2.197265625e-05,
      "model_forward_time": 0.11523151397705078,
      "step": 3600
    },
    {
      "epoch": 2.197265625e-05,
      "step": 3600,
      "training_step_time": 0.3911004066467285
    },
    {
      "epoch": 2.1978759765625e-05,
      "model_forward_time": 0.11516094207763672,
      "step": 3601
    },
    {
      "epoch": 2.1978759765625e-05,
      "step": 3601,
      "training_step_time": 0.38938140869140625
    },
    {
      "epoch": 2.198486328125e-05,
      "model_forward_time": 0.11566352844238281,
      "step": 3602
    },
    {
      "epoch": 2.198486328125e-05,
      "step": 3602,
      "training_step_time": 0.4108555316925049
    },
    {
      "epoch": 2.1990966796875e-05,
      "model_forward_time": 0.11518597602844238,
      "step": 3603
    },
    {
      "epoch": 2.1990966796875e-05,
      "step": 3603,
      "training_step_time": 0.43732309341430664
    },
    {
      "epoch": 2.19970703125e-05,
      "model_forward_time": 0.11509251594543457,
      "step": 3604
    },
    {
      "epoch": 2.19970703125e-05,
      "step": 3604,
      "training_step_time": 0.5115125179290771
    },
    {
      "epoch": 2.2003173828125e-05,
      "model_forward_time": 0.11512184143066406,
      "step": 3605
    },
    {
      "epoch": 2.2003173828125e-05,
      "step": 3605,
      "training_step_time": 0.41152429580688477
    },
    {
      "epoch": 2.200927734375e-05,
      "model_forward_time": 0.1163794994354248,
      "step": 3606
    },
    {
      "epoch": 2.200927734375e-05,
      "step": 3606,
      "training_step_time": 0.4606642723083496
    },
    {
      "epoch": 2.2015380859375e-05,
      "model_forward_time": 0.11583209037780762,
      "step": 3607
    },
    {
      "epoch": 2.2015380859375e-05,
      "step": 3607,
      "training_step_time": 0.48398280143737793
    },
    {
      "epoch": 2.2021484375e-05,
      "model_forward_time": 0.1155247688293457,
      "step": 3608
    },
    {
      "epoch": 2.2021484375e-05,
      "step": 3608,
      "training_step_time": 0.4905664920806885
    },
    {
      "epoch": 2.2027587890625e-05,
      "model_forward_time": 0.11495828628540039,
      "step": 3609
    },
    {
      "epoch": 2.2027587890625e-05,
      "step": 3609,
      "training_step_time": 0.5088341236114502
    },
    {
      "epoch": 2.203369140625e-05,
      "grad_norm": 0.3670293092727661,
      "learning_rate": 9.997174412218406e-05,
      "loss": 0.1127,
      "step": 3610
    },
    {
      "epoch": 2.203369140625e-05,
      "model_forward_time": 0.11465740203857422,
      "step": 3610
    },
    {
      "epoch": 2.203369140625e-05,
      "step": 3610,
      "training_step_time": 0.38527846336364746
    },
    {
      "epoch": 2.2039794921875e-05,
      "model_forward_time": 0.11468744277954102,
      "step": 3611
    },
    {
      "epoch": 2.2039794921875e-05,
      "step": 3611,
      "training_step_time": 0.3878135681152344
    },
    {
      "epoch": 2.20458984375e-05,
      "model_forward_time": 0.11531877517700195,
      "step": 3612
    },
    {
      "epoch": 2.20458984375e-05,
      "step": 3612,
      "training_step_time": 0.3981297016143799
    },
    {
      "epoch": 2.2052001953125e-05,
      "model_forward_time": 0.11559104919433594,
      "step": 3613
    },
    {
      "epoch": 2.2052001953125e-05,
      "step": 3613,
      "training_step_time": 0.3972349166870117
    },
    {
      "epoch": 2.205810546875e-05,
      "model_forward_time": 0.1150050163269043,
      "step": 3614
    },
    {
      "epoch": 2.205810546875e-05,
      "step": 3614,
      "training_step_time": 0.39653515815734863
    },
    {
      "epoch": 2.2064208984375e-05,
      "model_forward_time": 0.11543679237365723,
      "step": 3615
    },
    {
      "epoch": 2.2064208984375e-05,
      "step": 3615,
      "training_step_time": 0.3983752727508545
    },
    {
      "epoch": 2.20703125e-05,
      "model_forward_time": 0.11589956283569336,
      "step": 3616
    },
    {
      "epoch": 2.20703125e-05,
      "step": 3616,
      "training_step_time": 0.4077756404876709
    },
    {
      "epoch": 2.2076416015625e-05,
      "model_forward_time": 0.11507606506347656,
      "step": 3617
    },
    {
      "epoch": 2.2076416015625e-05,
      "step": 3617,
      "training_step_time": 0.4293630123138428
    },
    {
      "epoch": 2.208251953125e-05,
      "model_forward_time": 0.11557459831237793,
      "step": 3618
    },
    {
      "epoch": 2.208251953125e-05,
      "step": 3618,
      "training_step_time": 0.4075355529785156
    },
    {
      "epoch": 2.2088623046875e-05,
      "model_forward_time": 0.11569881439208984,
      "step": 3619
    },
    {
      "epoch": 2.2088623046875e-05,
      "step": 3619,
      "training_step_time": 0.4392578601837158
    },
    {
      "epoch": 2.20947265625e-05,
      "grad_norm": 0.40554362535476685,
      "learning_rate": 9.997081019722537e-05,
      "loss": 0.1226,
      "step": 3620
    },
    {
      "epoch": 2.20947265625e-05,
      "model_forward_time": 0.11501359939575195,
      "step": 3620
    },
    {
      "epoch": 2.20947265625e-05,
      "step": 3620,
      "training_step_time": 0.40161705017089844
    },
    {
      "epoch": 2.2100830078125e-05,
      "model_forward_time": 0.11545753479003906,
      "step": 3621
    },
    {
      "epoch": 2.2100830078125e-05,
      "step": 3621,
      "training_step_time": 0.41614460945129395
    },
    {
      "epoch": 2.210693359375e-05,
      "model_forward_time": 0.11519169807434082,
      "step": 3622
    },
    {
      "epoch": 2.210693359375e-05,
      "step": 3622,
      "training_step_time": 0.4267709255218506
    },
    {
      "epoch": 2.2113037109375e-05,
      "model_forward_time": 0.11511397361755371,
      "step": 3623
    },
    {
      "epoch": 2.2113037109375e-05,
      "step": 3623,
      "training_step_time": 0.5089356899261475
    },
    {
      "epoch": 2.2119140625e-05,
      "model_forward_time": 0.11561870574951172,
      "step": 3624
    },
    {
      "epoch": 2.2119140625e-05,
      "step": 3624,
      "training_step_time": 0.4951198101043701
    },
    {
      "epoch": 2.2125244140625e-05,
      "model_forward_time": 0.11524009704589844,
      "step": 3625
    },
    {
      "epoch": 2.2125244140625e-05,
      "step": 3625,
      "training_step_time": 0.3974189758300781
    },
    {
      "epoch": 2.213134765625e-05,
      "model_forward_time": 0.11447310447692871,
      "step": 3626
    },
    {
      "epoch": 2.213134765625e-05,
      "step": 3626,
      "training_step_time": 0.3923158645629883
    },
    {
      "epoch": 2.2137451171875e-05,
      "model_forward_time": 0.11491227149963379,
      "step": 3627
    },
    {
      "epoch": 2.2137451171875e-05,
      "step": 3627,
      "training_step_time": 0.3896489143371582
    },
    {
      "epoch": 2.21435546875e-05,
      "model_forward_time": 0.11501717567443848,
      "step": 3628
    },
    {
      "epoch": 2.21435546875e-05,
      "step": 3628,
      "training_step_time": 0.3944668769836426
    },
    {
      "epoch": 2.2149658203125e-05,
      "model_forward_time": 0.11508345603942871,
      "step": 3629
    },
    {
      "epoch": 2.2149658203125e-05,
      "step": 3629,
      "training_step_time": 0.392672061920166
    },
    {
      "epoch": 2.215576171875e-05,
      "grad_norm": 0.21074055135250092,
      "learning_rate": 9.996986109245395e-05,
      "loss": 0.12,
      "step": 3630
    },
    {
      "epoch": 2.215576171875e-05,
      "model_forward_time": 0.11574959754943848,
      "step": 3630
    },
    {
      "epoch": 2.215576171875e-05,
      "step": 3630,
      "training_step_time": 0.5277652740478516
    },
    {
      "epoch": 2.2161865234375e-05,
      "model_forward_time": 0.1151435375213623,
      "step": 3631
    },
    {
      "epoch": 2.2161865234375e-05,
      "step": 3631,
      "training_step_time": 0.39888620376586914
    },
    {
      "epoch": 2.216796875e-05,
      "model_forward_time": 0.11557316780090332,
      "step": 3632
    },
    {
      "epoch": 2.216796875e-05,
      "step": 3632,
      "training_step_time": 0.4526994228363037
    },
    {
      "epoch": 2.2174072265625e-05,
      "model_forward_time": 0.1146078109741211,
      "step": 3633
    },
    {
      "epoch": 2.2174072265625e-05,
      "step": 3633,
      "training_step_time": 0.4519038200378418
    },
    {
      "epoch": 2.218017578125e-05,
      "model_forward_time": 0.11467361450195312,
      "step": 3634
    },
    {
      "epoch": 2.218017578125e-05,
      "step": 3634,
      "training_step_time": 0.49323010444641113
    },
    {
      "epoch": 2.2186279296875e-05,
      "model_forward_time": 0.11436057090759277,
      "step": 3635
    },
    {
      "epoch": 2.2186279296875e-05,
      "step": 3635,
      "training_step_time": 0.4665720462799072
    },
    {
      "epoch": 2.21923828125e-05,
      "model_forward_time": 0.11663579940795898,
      "step": 3636
    },
    {
      "epoch": 2.21923828125e-05,
      "step": 3636,
      "training_step_time": 0.4332761764526367
    },
    {
      "epoch": 2.2198486328125e-05,
      "model_forward_time": 0.11601948738098145,
      "step": 3637
    },
    {
      "epoch": 2.2198486328125e-05,
      "step": 3637,
      "training_step_time": 0.3961825370788574
    },
    {
      "epoch": 2.220458984375e-05,
      "model_forward_time": 0.11493253707885742,
      "step": 3638
    },
    {
      "epoch": 2.220458984375e-05,
      "step": 3638,
      "training_step_time": 0.5459182262420654
    },
    {
      "epoch": 2.2210693359375e-05,
      "model_forward_time": 0.1152033805847168,
      "step": 3639
    },
    {
      "epoch": 2.2210693359375e-05,
      "step": 3639,
      "training_step_time": 0.40940022468566895
    },
    {
      "epoch": 2.2216796875e-05,
      "grad_norm": 0.3045071065425873,
      "learning_rate": 9.99688968081581e-05,
      "loss": 0.1211,
      "step": 3640
    },
    {
      "epoch": 2.2216796875e-05,
      "model_forward_time": 0.11462879180908203,
      "step": 3640
    },
    {
      "epoch": 2.2216796875e-05,
      "step": 3640,
      "training_step_time": 0.396409273147583
    },
    {
      "epoch": 2.2222900390625e-05,
      "model_forward_time": 0.11519527435302734,
      "step": 3641
    },
    {
      "epoch": 2.2222900390625e-05,
      "step": 3641,
      "training_step_time": 0.39753270149230957
    },
    {
      "epoch": 2.222900390625e-05,
      "model_forward_time": 0.11459851264953613,
      "step": 3642
    },
    {
      "epoch": 2.222900390625e-05,
      "step": 3642,
      "training_step_time": 0.38558197021484375
    },
    {
      "epoch": 2.2235107421875e-05,
      "model_forward_time": 0.11577677726745605,
      "step": 3643
    },
    {
      "epoch": 2.2235107421875e-05,
      "step": 3643,
      "training_step_time": 0.3948242664337158
    },
    {
      "epoch": 2.22412109375e-05,
      "model_forward_time": 0.11498332023620605,
      "step": 3644
    },
    {
      "epoch": 2.22412109375e-05,
      "step": 3644,
      "training_step_time": 0.3967607021331787
    },
    {
      "epoch": 2.2247314453125e-05,
      "model_forward_time": 0.1148824691772461,
      "step": 3645
    },
    {
      "epoch": 2.2247314453125e-05,
      "step": 3645,
      "training_step_time": 0.39705705642700195
    },
    {
      "epoch": 2.225341796875e-05,
      "model_forward_time": 0.11572408676147461,
      "step": 3646
    },
    {
      "epoch": 2.225341796875e-05,
      "step": 3646,
      "training_step_time": 0.38655877113342285
    },
    {
      "epoch": 2.2259521484375e-05,
      "model_forward_time": 0.11530137062072754,
      "step": 3647
    },
    {
      "epoch": 2.2259521484375e-05,
      "step": 3647,
      "training_step_time": 0.42533326148986816
    },
    {
      "epoch": 2.2265625e-05,
      "model_forward_time": 0.11584734916687012,
      "step": 3648
    },
    {
      "epoch": 2.2265625e-05,
      "step": 3648,
      "training_step_time": 0.47458505630493164
    },
    {
      "epoch": 2.2271728515625e-05,
      "model_forward_time": 0.11495280265808105,
      "step": 3649
    },
    {
      "epoch": 2.2271728515625e-05,
      "step": 3649,
      "training_step_time": 0.44637179374694824
    },
    {
      "epoch": 2.227783203125e-05,
      "grad_norm": 0.329451322555542,
      "learning_rate": 9.996791734463077e-05,
      "loss": 0.1199,
      "step": 3650
    },
    {
      "epoch": 2.227783203125e-05,
      "model_forward_time": 0.11431527137756348,
      "step": 3650
    },
    {
      "epoch": 2.227783203125e-05,
      "step": 3650,
      "training_step_time": 0.5069575309753418
    },
    {
      "epoch": 2.2283935546875e-05,
      "model_forward_time": 0.11466670036315918,
      "step": 3651
    },
    {
      "epoch": 2.2283935546875e-05,
      "step": 3651,
      "training_step_time": 0.4511752128601074
    },
    {
      "epoch": 2.22900390625e-05,
      "model_forward_time": 0.11476755142211914,
      "step": 3652
    },
    {
      "epoch": 2.22900390625e-05,
      "step": 3652,
      "training_step_time": 0.4730653762817383
    },
    {
      "epoch": 2.2296142578125e-05,
      "model_forward_time": 0.11570930480957031,
      "step": 3653
    },
    {
      "epoch": 2.2296142578125e-05,
      "step": 3653,
      "training_step_time": 0.39094972610473633
    },
    {
      "epoch": 2.230224609375e-05,
      "model_forward_time": 0.1161048412322998,
      "step": 3654
    },
    {
      "epoch": 2.230224609375e-05,
      "step": 3654,
      "training_step_time": 0.39572978019714355
    },
    {
      "epoch": 2.2308349609375e-05,
      "model_forward_time": 0.11584687232971191,
      "step": 3655
    },
    {
      "epoch": 2.2308349609375e-05,
      "step": 3655,
      "training_step_time": 0.3913438320159912
    },
    {
      "epoch": 2.2314453125e-05,
      "model_forward_time": 0.11514544486999512,
      "step": 3656
    },
    {
      "epoch": 2.2314453125e-05,
      "step": 3656,
      "training_step_time": 0.39351677894592285
    },
    {
      "epoch": 2.2320556640625e-05,
      "model_forward_time": 0.11540341377258301,
      "step": 3657
    },
    {
      "epoch": 2.2320556640625e-05,
      "step": 3657,
      "training_step_time": 0.3925604820251465
    },
    {
      "epoch": 2.232666015625e-05,
      "model_forward_time": 0.1148366928100586,
      "step": 3658
    },
    {
      "epoch": 2.232666015625e-05,
      "step": 3658,
      "training_step_time": 0.3978431224822998
    },
    {
      "epoch": 2.2332763671875e-05,
      "model_forward_time": 0.11516284942626953,
      "step": 3659
    },
    {
      "epoch": 2.2332763671875e-05,
      "step": 3659,
      "training_step_time": 0.39505815505981445
    },
    {
      "epoch": 2.23388671875e-05,
      "grad_norm": 0.2513203024864197,
      "learning_rate": 9.996692270216947e-05,
      "loss": 0.1067,
      "step": 3660
    },
    {
      "epoch": 2.23388671875e-05,
      "model_forward_time": 0.11556887626647949,
      "step": 3660
    },
    {
      "epoch": 2.23388671875e-05,
      "step": 3660,
      "training_step_time": 0.581575870513916
    },
    {
      "epoch": 2.2344970703125e-05,
      "model_forward_time": 0.11553287506103516,
      "step": 3661
    },
    {
      "epoch": 2.2344970703125e-05,
      "step": 3661,
      "training_step_time": 0.3923807144165039
    },
    {
      "epoch": 2.235107421875e-05,
      "model_forward_time": 0.11535143852233887,
      "step": 3662
    },
    {
      "epoch": 2.235107421875e-05,
      "step": 3662,
      "training_step_time": 0.4792170524597168
    },
    {
      "epoch": 2.2357177734375e-05,
      "model_forward_time": 0.11519980430603027,
      "step": 3663
    },
    {
      "epoch": 2.2357177734375e-05,
      "step": 3663,
      "training_step_time": 0.43795061111450195
    },
    {
      "epoch": 2.236328125e-05,
      "model_forward_time": 0.11505627632141113,
      "step": 3664
    },
    {
      "epoch": 2.236328125e-05,
      "step": 3664,
      "training_step_time": 0.4408276081085205
    },
    {
      "epoch": 2.2369384765625e-05,
      "model_forward_time": 0.11541056632995605,
      "step": 3665
    },
    {
      "epoch": 2.2369384765625e-05,
      "step": 3665,
      "training_step_time": 0.4801614284515381
    },
    {
      "epoch": 2.237548828125e-05,
      "model_forward_time": 0.1155996322631836,
      "step": 3666
    },
    {
      "epoch": 2.237548828125e-05,
      "step": 3666,
      "training_step_time": 0.4842219352722168
    },
    {
      "epoch": 2.2381591796875e-05,
      "model_forward_time": 0.11533975601196289,
      "step": 3667
    },
    {
      "epoch": 2.2381591796875e-05,
      "step": 3667,
      "training_step_time": 0.5185832977294922
    },
    {
      "epoch": 2.23876953125e-05,
      "model_forward_time": 0.1151115894317627,
      "step": 3668
    },
    {
      "epoch": 2.23876953125e-05,
      "step": 3668,
      "training_step_time": 0.392122745513916
    },
    {
      "epoch": 2.2393798828125e-05,
      "model_forward_time": 0.11500692367553711,
      "step": 3669
    },
    {
      "epoch": 2.2393798828125e-05,
      "step": 3669,
      "training_step_time": 0.39066433906555176
    },
    {
      "epoch": 2.239990234375e-05,
      "grad_norm": 0.38557761907577515,
      "learning_rate": 9.996591288107635e-05,
      "loss": 0.1164,
      "step": 3670
    },
    {
      "epoch": 2.239990234375e-05,
      "model_forward_time": 0.1152944564819336,
      "step": 3670
    },
    {
      "epoch": 2.239990234375e-05,
      "step": 3670,
      "training_step_time": 0.39525890350341797
    },
    {
      "epoch": 2.2406005859375e-05,
      "model_forward_time": 0.1152188777923584,
      "step": 3671
    },
    {
      "epoch": 2.2406005859375e-05,
      "step": 3671,
      "training_step_time": 0.3946530818939209
    },
    {
      "epoch": 2.2412109375e-05,
      "model_forward_time": 0.11490845680236816,
      "step": 3672
    },
    {
      "epoch": 2.2412109375e-05,
      "step": 3672,
      "training_step_time": 0.3989558219909668
    },
    {
      "epoch": 2.2418212890625e-05,
      "model_forward_time": 0.11671280860900879,
      "step": 3673
    },
    {
      "epoch": 2.2418212890625e-05,
      "step": 3673,
      "training_step_time": 0.3946349620819092
    },
    {
      "epoch": 2.242431640625e-05,
      "model_forward_time": 0.11553716659545898,
      "step": 3674
    },
    {
      "epoch": 2.242431640625e-05,
      "step": 3674,
      "training_step_time": 0.394376277923584
    },
    {
      "epoch": 2.2430419921875e-05,
      "model_forward_time": 0.11587047576904297,
      "step": 3675
    },
    {
      "epoch": 2.2430419921875e-05,
      "step": 3675,
      "training_step_time": 0.40091466903686523
    },
    {
      "epoch": 2.24365234375e-05,
      "model_forward_time": 0.11499691009521484,
      "step": 3676
    },
    {
      "epoch": 2.24365234375e-05,
      "step": 3676,
      "training_step_time": 0.4280424118041992
    },
    {
      "epoch": 2.2442626953125e-05,
      "model_forward_time": 0.11518716812133789,
      "step": 3677
    },
    {
      "epoch": 2.2442626953125e-05,
      "step": 3677,
      "training_step_time": 0.44553327560424805
    },
    {
      "epoch": 2.244873046875e-05,
      "model_forward_time": 0.11556005477905273,
      "step": 3678
    },
    {
      "epoch": 2.244873046875e-05,
      "step": 3678,
      "training_step_time": 0.7680678367614746
    },
    {
      "epoch": 2.2454833984375e-05,
      "model_forward_time": 0.11521291732788086,
      "step": 3679
    },
    {
      "epoch": 2.2454833984375e-05,
      "step": 3679,
      "training_step_time": 0.39376258850097656
    },
    {
      "epoch": 2.24609375e-05,
      "grad_norm": 0.41717761754989624,
      "learning_rate": 9.996488788165816e-05,
      "loss": 0.1198,
      "step": 3680
    },
    {
      "epoch": 2.24609375e-05,
      "model_forward_time": 0.11434507369995117,
      "step": 3680
    },
    {
      "epoch": 2.24609375e-05,
      "step": 3680,
      "training_step_time": 0.47167372703552246
    },
    {
      "epoch": 2.2467041015625e-05,
      "model_forward_time": 0.11559009552001953,
      "step": 3681
    },
    {
      "epoch": 2.2467041015625e-05,
      "step": 3681,
      "training_step_time": 0.4761514663696289
    },
    {
      "epoch": 2.247314453125e-05,
      "model_forward_time": 0.11591100692749023,
      "step": 3682
    },
    {
      "epoch": 2.247314453125e-05,
      "step": 3682,
      "training_step_time": 0.379504919052124
    },
    {
      "epoch": 2.2479248046875e-05,
      "model_forward_time": 0.11498451232910156,
      "step": 3683
    },
    {
      "epoch": 2.2479248046875e-05,
      "step": 3683,
      "training_step_time": 0.3845252990722656
    },
    {
      "epoch": 2.24853515625e-05,
      "model_forward_time": 0.11501717567443848,
      "step": 3684
    },
    {
      "epoch": 2.24853515625e-05,
      "step": 3684,
      "training_step_time": 0.39821696281433105
    },
    {
      "epoch": 2.2491455078125e-05,
      "model_forward_time": 0.11444807052612305,
      "step": 3685
    },
    {
      "epoch": 2.2491455078125e-05,
      "step": 3685,
      "training_step_time": 0.39283084869384766
    },
    {
      "epoch": 2.249755859375e-05,
      "model_forward_time": 0.1149590015411377,
      "step": 3686
    },
    {
      "epoch": 2.249755859375e-05,
      "step": 3686,
      "training_step_time": 0.3922905921936035
    },
    {
      "epoch": 2.2503662109375e-05,
      "model_forward_time": 0.11532855033874512,
      "step": 3687
    },
    {
      "epoch": 2.2503662109375e-05,
      "step": 3687,
      "training_step_time": 0.4022352695465088
    },
    {
      "epoch": 2.2509765625e-05,
      "model_forward_time": 0.11551642417907715,
      "step": 3688
    },
    {
      "epoch": 2.2509765625e-05,
      "step": 3688,
      "training_step_time": 0.3870272636413574
    },
    {
      "epoch": 2.2515869140625e-05,
      "model_forward_time": 0.11688613891601562,
      "step": 3689
    },
    {
      "epoch": 2.2515869140625e-05,
      "step": 3689,
      "training_step_time": 0.4039309024810791
    },
    {
      "epoch": 2.252197265625e-05,
      "grad_norm": 0.31178760528564453,
      "learning_rate": 9.996384770422629e-05,
      "loss": 0.1232,
      "step": 3690
    },
    {
      "epoch": 2.252197265625e-05,
      "model_forward_time": 0.1154167652130127,
      "step": 3690
    },
    {
      "epoch": 2.252197265625e-05,
      "step": 3690,
      "training_step_time": 0.6624138355255127
    },
    {
      "epoch": 2.2528076171875e-05,
      "model_forward_time": 0.1148841381072998,
      "step": 3691
    },
    {
      "epoch": 2.2528076171875e-05,
      "step": 3691,
      "training_step_time": 0.487454891204834
    },
    {
      "epoch": 2.25341796875e-05,
      "model_forward_time": 0.11426830291748047,
      "step": 3692
    },
    {
      "epoch": 2.25341796875e-05,
      "step": 3692,
      "training_step_time": 0.4368932247161865
    },
    {
      "epoch": 2.2540283203125e-05,
      "model_forward_time": 0.11580133438110352,
      "step": 3693
    },
    {
      "epoch": 2.2540283203125e-05,
      "step": 3693,
      "training_step_time": 0.43958115577697754
    },
    {
      "epoch": 2.254638671875e-05,
      "model_forward_time": 0.11552667617797852,
      "step": 3694
    },
    {
      "epoch": 2.254638671875e-05,
      "step": 3694,
      "training_step_time": 0.4133265018463135
    },
    {
      "epoch": 2.2552490234375e-05,
      "model_forward_time": 0.11579346656799316,
      "step": 3695
    },
    {
      "epoch": 2.2552490234375e-05,
      "step": 3695,
      "training_step_time": 0.4175539016723633
    },
    {
      "epoch": 2.255859375e-05,
      "model_forward_time": 0.11549067497253418,
      "step": 3696
    },
    {
      "epoch": 2.255859375e-05,
      "step": 3696,
      "training_step_time": 0.40096282958984375
    },
    {
      "epoch": 2.2564697265625e-05,
      "model_forward_time": 0.11545443534851074,
      "step": 3697
    },
    {
      "epoch": 2.2564697265625e-05,
      "step": 3697,
      "training_step_time": 0.3971388339996338
    },
    {
      "epoch": 2.257080078125e-05,
      "model_forward_time": 0.11551690101623535,
      "step": 3698
    },
    {
      "epoch": 2.257080078125e-05,
      "step": 3698,
      "training_step_time": 0.3980116844177246
    },
    {
      "epoch": 2.2576904296875e-05,
      "model_forward_time": 0.11577081680297852,
      "step": 3699
    },
    {
      "epoch": 2.2576904296875e-05,
      "step": 3699,
      "training_step_time": 0.40158772468566895
    },
    {
      "epoch": 2.25830078125e-05,
      "grad_norm": 0.3392006456851959,
      "learning_rate": 9.996279234909671e-05,
      "loss": 0.1149,
      "step": 3700
    },
    {
      "epoch": 2.25830078125e-05,
      "model_forward_time": 0.11476349830627441,
      "step": 3700
    },
    {
      "epoch": 2.25830078125e-05,
      "step": 3700,
      "training_step_time": 0.39749956130981445
    },
    {
      "epoch": 2.2589111328125e-05,
      "model_forward_time": 0.11550569534301758,
      "step": 3701
    },
    {
      "epoch": 2.2589111328125e-05,
      "step": 3701,
      "training_step_time": 0.40190768241882324
    },
    {
      "epoch": 2.259521484375e-05,
      "model_forward_time": 0.11536955833435059,
      "step": 3702
    },
    {
      "epoch": 2.259521484375e-05,
      "step": 3702,
      "training_step_time": 0.5783586502075195
    },
    {
      "epoch": 2.2601318359375e-05,
      "model_forward_time": 0.1149449348449707,
      "step": 3703
    },
    {
      "epoch": 2.2601318359375e-05,
      "step": 3703,
      "training_step_time": 0.4326138496398926
    },
    {
      "epoch": 2.2607421875e-05,
      "model_forward_time": 0.11487221717834473,
      "step": 3704
    },
    {
      "epoch": 2.2607421875e-05,
      "step": 3704,
      "training_step_time": 0.41849493980407715
    },
    {
      "epoch": 2.2613525390625e-05,
      "model_forward_time": 0.11519718170166016,
      "step": 3705
    },
    {
      "epoch": 2.2613525390625e-05,
      "step": 3705,
      "training_step_time": 0.44918346405029297
    },
    {
      "epoch": 2.261962890625e-05,
      "model_forward_time": 0.11487698554992676,
      "step": 3706
    },
    {
      "epoch": 2.261962890625e-05,
      "step": 3706,
      "training_step_time": 0.47365403175354004
    },
    {
      "epoch": 2.2625732421875e-05,
      "model_forward_time": 0.11577510833740234,
      "step": 3707
    },
    {
      "epoch": 2.2625732421875e-05,
      "step": 3707,
      "training_step_time": 0.37270665168762207
    },
    {
      "epoch": 2.26318359375e-05,
      "model_forward_time": 0.11525893211364746,
      "step": 3708
    },
    {
      "epoch": 2.26318359375e-05,
      "step": 3708,
      "training_step_time": 0.3972129821777344
    },
    {
      "epoch": 2.2637939453125e-05,
      "model_forward_time": 0.11474490165710449,
      "step": 3709
    },
    {
      "epoch": 2.2637939453125e-05,
      "step": 3709,
      "training_step_time": 0.42172718048095703
    },
    {
      "epoch": 2.264404296875e-05,
      "grad_norm": 0.38945308327674866,
      "learning_rate": 9.996172181659e-05,
      "loss": 0.1168,
      "step": 3710
    },
    {
      "epoch": 2.264404296875e-05,
      "model_forward_time": 0.11510992050170898,
      "step": 3710
    },
    {
      "epoch": 2.264404296875e-05,
      "step": 3710,
      "training_step_time": 0.3885786533355713
    },
    {
      "epoch": 2.2650146484375e-05,
      "model_forward_time": 0.11520004272460938,
      "step": 3711
    },
    {
      "epoch": 2.2650146484375e-05,
      "step": 3711,
      "training_step_time": 0.3882710933685303
    },
    {
      "epoch": 2.265625e-05,
      "model_forward_time": 0.11487817764282227,
      "step": 3712
    },
    {
      "epoch": 2.265625e-05,
      "step": 3712,
      "training_step_time": 0.4059104919433594
    },
    {
      "epoch": 2.2662353515625e-05,
      "model_forward_time": 0.11499333381652832,
      "step": 3713
    },
    {
      "epoch": 2.2662353515625e-05,
      "step": 3713,
      "training_step_time": 0.3905963897705078
    },
    {
      "epoch": 2.266845703125e-05,
      "model_forward_time": 0.1163027286529541,
      "step": 3714
    },
    {
      "epoch": 2.266845703125e-05,
      "step": 3714,
      "training_step_time": 0.5454263687133789
    },
    {
      "epoch": 2.2674560546875e-05,
      "model_forward_time": 0.11568403244018555,
      "step": 3715
    },
    {
      "epoch": 2.2674560546875e-05,
      "step": 3715,
      "training_step_time": 0.41826963424682617
    },
    {
      "epoch": 2.26806640625e-05,
      "model_forward_time": 0.11516475677490234,
      "step": 3716
    },
    {
      "epoch": 2.26806640625e-05,
      "step": 3716,
      "training_step_time": 0.40914011001586914
    },
    {
      "epoch": 2.2686767578125e-05,
      "model_forward_time": 0.11643052101135254,
      "step": 3717
    },
    {
      "epoch": 2.2686767578125e-05,
      "step": 3717,
      "training_step_time": 0.38945698738098145
    },
    {
      "epoch": 2.269287109375e-05,
      "model_forward_time": 0.1156313419342041,
      "step": 3718
    },
    {
      "epoch": 2.269287109375e-05,
      "step": 3718,
      "training_step_time": 0.4039428234100342
    },
    {
      "epoch": 2.2698974609375e-05,
      "model_forward_time": 0.11570930480957031,
      "step": 3719
    },
    {
      "epoch": 2.2698974609375e-05,
      "step": 3719,
      "training_step_time": 0.39787745475769043
    },
    {
      "epoch": 2.2705078125e-05,
      "grad_norm": 0.2935015857219696,
      "learning_rate": 9.996063610703137e-05,
      "loss": 0.1211,
      "step": 3720
    },
    {
      "epoch": 2.2705078125e-05,
      "model_forward_time": 0.11531829833984375,
      "step": 3720
    },
    {
      "epoch": 2.2705078125e-05,
      "step": 3720,
      "training_step_time": 0.598459005355835
    },
    {
      "epoch": 2.2711181640625e-05,
      "model_forward_time": 0.11578822135925293,
      "step": 3721
    },
    {
      "epoch": 2.2711181640625e-05,
      "step": 3721,
      "training_step_time": 0.3689570426940918
    },
    {
      "epoch": 2.271728515625e-05,
      "model_forward_time": 0.11495757102966309,
      "step": 3722
    },
    {
      "epoch": 2.271728515625e-05,
      "step": 3722,
      "training_step_time": 0.48542356491088867
    },
    {
      "epoch": 2.2723388671875e-05,
      "model_forward_time": 0.11459970474243164,
      "step": 3723
    },
    {
      "epoch": 2.2723388671875e-05,
      "step": 3723,
      "training_step_time": 0.4842102527618408
    },
    {
      "epoch": 2.27294921875e-05,
      "model_forward_time": 0.11389541625976562,
      "step": 3724
    },
    {
      "epoch": 2.27294921875e-05,
      "step": 3724,
      "training_step_time": 0.3808708190917969
    },
    {
      "epoch": 2.2735595703125e-05,
      "model_forward_time": 0.11481261253356934,
      "step": 3725
    },
    {
      "epoch": 2.2735595703125e-05,
      "step": 3725,
      "training_step_time": 0.3819303512573242
    },
    {
      "epoch": 2.274169921875e-05,
      "model_forward_time": 0.1152951717376709,
      "step": 3726
    },
    {
      "epoch": 2.274169921875e-05,
      "step": 3726,
      "training_step_time": 0.398409366607666
    },
    {
      "epoch": 2.2747802734375e-05,
      "model_forward_time": 0.11562585830688477,
      "step": 3727
    },
    {
      "epoch": 2.2747802734375e-05,
      "step": 3727,
      "training_step_time": 0.38994646072387695
    },
    {
      "epoch": 2.275390625e-05,
      "model_forward_time": 0.11588740348815918,
      "step": 3728
    },
    {
      "epoch": 2.275390625e-05,
      "step": 3728,
      "training_step_time": 0.39780378341674805
    },
    {
      "epoch": 2.2760009765625e-05,
      "model_forward_time": 0.11534357070922852,
      "step": 3729
    },
    {
      "epoch": 2.2760009765625e-05,
      "step": 3729,
      "training_step_time": 0.45115065574645996
    },
    {
      "epoch": 2.276611328125e-05,
      "grad_norm": 0.4675724506378174,
      "learning_rate": 9.995953522075061e-05,
      "loss": 0.1103,
      "step": 3730
    },
    {
      "epoch": 2.276611328125e-05,
      "model_forward_time": 0.11540699005126953,
      "step": 3730
    },
    {
      "epoch": 2.276611328125e-05,
      "step": 3730,
      "training_step_time": 0.4727451801300049
    },
    {
      "epoch": 2.2772216796875e-05,
      "model_forward_time": 0.11446928977966309,
      "step": 3731
    },
    {
      "epoch": 2.2772216796875e-05,
      "step": 3731,
      "training_step_time": 0.39077115058898926
    },
    {
      "epoch": 2.27783203125e-05,
      "model_forward_time": 0.11527276039123535,
      "step": 3732
    },
    {
      "epoch": 2.27783203125e-05,
      "step": 3732,
      "training_step_time": 0.5337810516357422
    },
    {
      "epoch": 2.2784423828125e-05,
      "model_forward_time": 0.11450934410095215,
      "step": 3733
    },
    {
      "epoch": 2.2784423828125e-05,
      "step": 3733,
      "training_step_time": 0.41962623596191406
    },
    {
      "epoch": 2.279052734375e-05,
      "model_forward_time": 0.1152200698852539,
      "step": 3734
    },
    {
      "epoch": 2.279052734375e-05,
      "step": 3734,
      "training_step_time": 0.42496371269226074
    },
    {
      "epoch": 2.2796630859375e-05,
      "model_forward_time": 0.11509227752685547,
      "step": 3735
    },
    {
      "epoch": 2.2796630859375e-05,
      "step": 3735,
      "training_step_time": 0.4042942523956299
    },
    {
      "epoch": 2.2802734375e-05,
      "model_forward_time": 0.11525917053222656,
      "step": 3736
    },
    {
      "epoch": 2.2802734375e-05,
      "step": 3736,
      "training_step_time": 0.4003427028656006
    },
    {
      "epoch": 2.2808837890625e-05,
      "model_forward_time": 0.11525368690490723,
      "step": 3737
    },
    {
      "epoch": 2.2808837890625e-05,
      "step": 3737,
      "training_step_time": 0.47493958473205566
    },
    {
      "epoch": 2.281494140625e-05,
      "model_forward_time": 0.11579251289367676,
      "step": 3738
    },
    {
      "epoch": 2.281494140625e-05,
      "step": 3738,
      "training_step_time": 0.5229341983795166
    },
    {
      "epoch": 2.2821044921875e-05,
      "model_forward_time": 0.11665558815002441,
      "step": 3739
    },
    {
      "epoch": 2.2821044921875e-05,
      "step": 3739,
      "training_step_time": 0.4689924716949463
    },
    {
      "epoch": 2.28271484375e-05,
      "grad_norm": 0.31484225392341614,
      "learning_rate": 9.995841915808218e-05,
      "loss": 0.117,
      "step": 3740
    },
    {
      "epoch": 2.28271484375e-05,
      "model_forward_time": 0.1195833683013916,
      "step": 3740
    },
    {
      "epoch": 2.28271484375e-05,
      "step": 3740,
      "training_step_time": 0.5789620876312256
    },
    {
      "epoch": 2.2833251953125e-05,
      "model_forward_time": 0.11866283416748047,
      "step": 3741
    },
    {
      "epoch": 2.2833251953125e-05,
      "step": 3741,
      "training_step_time": 0.5782816410064697
    },
    {
      "epoch": 2.283935546875e-05,
      "model_forward_time": 0.12098932266235352,
      "step": 3742
    },
    {
      "epoch": 2.283935546875e-05,
      "step": 3742,
      "training_step_time": 0.6511125564575195
    },
    {
      "epoch": 2.2845458984375e-05,
      "model_forward_time": 0.12132501602172852,
      "step": 3743
    },
    {
      "epoch": 2.2845458984375e-05,
      "step": 3743,
      "training_step_time": 0.6683506965637207
    },
    {
      "epoch": 2.28515625e-05,
      "model_forward_time": 0.11882233619689941,
      "step": 3744
    },
    {
      "epoch": 2.28515625e-05,
      "step": 3744,
      "training_step_time": 0.6979489326477051
    },
    {
      "epoch": 2.2857666015625e-05,
      "model_forward_time": 0.12042951583862305,
      "step": 3745
    },
    {
      "epoch": 2.2857666015625e-05,
      "step": 3745,
      "training_step_time": 0.7089288234710693
    },
    {
      "epoch": 2.286376953125e-05,
      "model_forward_time": 0.12053561210632324,
      "step": 3746
    },
    {
      "epoch": 2.286376953125e-05,
      "step": 3746,
      "training_step_time": 0.7249736785888672
    },
    {
      "epoch": 2.2869873046875e-05,
      "model_forward_time": 0.12193155288696289,
      "step": 3747
    },
    {
      "epoch": 2.2869873046875e-05,
      "step": 3747,
      "training_step_time": 0.730518102645874
    },
    {
      "epoch": 2.28759765625e-05,
      "model_forward_time": 0.12143087387084961,
      "step": 3748
    },
    {
      "epoch": 2.28759765625e-05,
      "step": 3748,
      "training_step_time": 0.6920473575592041
    },
    {
      "epoch": 2.2882080078125e-05,
      "model_forward_time": 0.1201622486114502,
      "step": 3749
    },
    {
      "epoch": 2.2882080078125e-05,
      "step": 3749,
      "training_step_time": 0.6052961349487305
    },
    {
      "epoch": 2.288818359375e-05,
      "grad_norm": 0.40565183758735657,
      "learning_rate": 9.995728791936504e-05,
      "loss": 0.1146,
      "step": 3750
    },
    {
      "epoch": 2.288818359375e-05,
      "model_forward_time": 0.11991381645202637,
      "step": 3750
    },
    {
      "epoch": 2.288818359375e-05,
      "step": 3750,
      "training_step_time": 0.7050454616546631
    },
    {
      "epoch": 2.2894287109375e-05,
      "model_forward_time": 0.1215047836303711,
      "step": 3751
    },
    {
      "epoch": 2.2894287109375e-05,
      "step": 3751,
      "training_step_time": 0.6748349666595459
    },
    {
      "epoch": 2.2900390625e-05,
      "model_forward_time": 0.1186990737915039,
      "step": 3752
    },
    {
      "epoch": 2.2900390625e-05,
      "step": 3752,
      "training_step_time": 0.6181936264038086
    },
    {
      "epoch": 2.2906494140625e-05,
      "model_forward_time": 0.12252283096313477,
      "step": 3753
    },
    {
      "epoch": 2.2906494140625e-05,
      "step": 3753,
      "training_step_time": 0.6147165298461914
    },
    {
      "epoch": 2.291259765625e-05,
      "model_forward_time": 0.1276400089263916,
      "step": 3754
    },
    {
      "epoch": 2.291259765625e-05,
      "step": 3754,
      "training_step_time": 0.6004753112792969
    },
    {
      "epoch": 2.2918701171875e-05,
      "model_forward_time": 0.11905980110168457,
      "step": 3755
    },
    {
      "epoch": 2.2918701171875e-05,
      "step": 3755,
      "training_step_time": 0.6942610740661621
    },
    {
      "epoch": 2.29248046875e-05,
      "model_forward_time": 0.12436461448669434,
      "step": 3756
    },
    {
      "epoch": 2.29248046875e-05,
      "step": 3756,
      "training_step_time": 0.7567586898803711
    },
    {
      "epoch": 2.2930908203125e-05,
      "model_forward_time": 0.11955761909484863,
      "step": 3757
    },
    {
      "epoch": 2.2930908203125e-05,
      "step": 3757,
      "training_step_time": 0.6857798099517822
    },
    {
      "epoch": 2.293701171875e-05,
      "model_forward_time": 0.1212620735168457,
      "step": 3758
    },
    {
      "epoch": 2.293701171875e-05,
      "step": 3758,
      "training_step_time": 0.7173752784729004
    },
    {
      "epoch": 2.2943115234375e-05,
      "model_forward_time": 0.11598753929138184,
      "step": 3759
    },
    {
      "epoch": 2.2943115234375e-05,
      "step": 3759,
      "training_step_time": 0.6476571559906006
    },
    {
      "epoch": 2.294921875e-05,
      "grad_norm": 0.2696101665496826,
      "learning_rate": 9.995614150494293e-05,
      "loss": 0.1138,
      "step": 3760
    },
    {
      "epoch": 2.294921875e-05,
      "model_forward_time": 0.12007021903991699,
      "step": 3760
    },
    {
      "epoch": 2.294921875e-05,
      "step": 3760,
      "training_step_time": 0.633655309677124
    },
    {
      "epoch": 2.2955322265625e-05,
      "model_forward_time": 0.11671185493469238,
      "step": 3761
    },
    {
      "epoch": 2.2955322265625e-05,
      "step": 3761,
      "training_step_time": 0.6761183738708496
    },
    {
      "epoch": 2.296142578125e-05,
      "model_forward_time": 0.12263917922973633,
      "step": 3762
    },
    {
      "epoch": 2.296142578125e-05,
      "step": 3762,
      "training_step_time": 0.6228182315826416
    },
    {
      "epoch": 2.2967529296875e-05,
      "model_forward_time": 0.1213996410369873,
      "step": 3763
    },
    {
      "epoch": 2.2967529296875e-05,
      "step": 3763,
      "training_step_time": 0.6621992588043213
    },
    {
      "epoch": 2.29736328125e-05,
      "model_forward_time": 0.11605048179626465,
      "step": 3764
    },
    {
      "epoch": 2.29736328125e-05,
      "step": 3764,
      "training_step_time": 0.7185068130493164
    },
    {
      "epoch": 2.2979736328125e-05,
      "model_forward_time": 0.12580466270446777,
      "step": 3765
    },
    {
      "epoch": 2.2979736328125e-05,
      "step": 3765,
      "training_step_time": 0.7841424942016602
    },
    {
      "epoch": 2.298583984375e-05,
      "model_forward_time": 0.1165151596069336,
      "step": 3766
    },
    {
      "epoch": 2.298583984375e-05,
      "step": 3766,
      "training_step_time": 0.5862023830413818
    },
    {
      "epoch": 2.2991943359375e-05,
      "model_forward_time": 0.12056851387023926,
      "step": 3767
    },
    {
      "epoch": 2.2991943359375e-05,
      "step": 3767,
      "training_step_time": 0.6629431247711182
    },
    {
      "epoch": 2.2998046875e-05,
      "model_forward_time": 0.12034034729003906,
      "step": 3768
    },
    {
      "epoch": 2.2998046875e-05,
      "step": 3768,
      "training_step_time": 0.6763756275177002
    },
    {
      "epoch": 2.3004150390625e-05,
      "model_forward_time": 0.11890220642089844,
      "step": 3769
    },
    {
      "epoch": 2.3004150390625e-05,
      "step": 3769,
      "training_step_time": 0.655998706817627
    },
    {
      "epoch": 2.301025390625e-05,
      "grad_norm": 0.30390113592147827,
      "learning_rate": 9.995497991516401e-05,
      "loss": 0.126,
      "step": 3770
    },
    {
      "epoch": 2.301025390625e-05,
      "model_forward_time": 0.11974430084228516,
      "step": 3770
    },
    {
      "epoch": 2.301025390625e-05,
      "step": 3770,
      "training_step_time": 0.6714603900909424
    },
    {
      "epoch": 2.3016357421875e-05,
      "model_forward_time": 0.11611533164978027,
      "step": 3771
    },
    {
      "epoch": 2.3016357421875e-05,
      "step": 3771,
      "training_step_time": 0.6766695976257324
    },
    {
      "epoch": 2.30224609375e-05,
      "model_forward_time": 0.12297701835632324,
      "step": 3772
    },
    {
      "epoch": 2.30224609375e-05,
      "step": 3772,
      "training_step_time": 0.7731945514678955
    },
    {
      "epoch": 2.3028564453125e-05,
      "model_forward_time": 0.11678075790405273,
      "step": 3773
    },
    {
      "epoch": 2.3028564453125e-05,
      "step": 3773,
      "training_step_time": 0.647719144821167
    },
    {
      "epoch": 2.303466796875e-05,
      "model_forward_time": 0.11914563179016113,
      "step": 3774
    },
    {
      "epoch": 2.303466796875e-05,
      "step": 3774,
      "training_step_time": 0.6825511455535889
    },
    {
      "epoch": 2.3040771484375e-05,
      "model_forward_time": 0.12148356437683105,
      "step": 3775
    },
    {
      "epoch": 2.3040771484375e-05,
      "step": 3775,
      "training_step_time": 0.7469308376312256
    },
    {
      "epoch": 2.3046875e-05,
      "model_forward_time": 0.1191251277923584,
      "step": 3776
    },
    {
      "epoch": 2.3046875e-05,
      "step": 3776,
      "training_step_time": 0.6859393119812012
    },
    {
      "epoch": 2.3052978515625e-05,
      "model_forward_time": 0.12006354331970215,
      "step": 3777
    },
    {
      "epoch": 2.3052978515625e-05,
      "step": 3777,
      "training_step_time": 0.7324643135070801
    },
    {
      "epoch": 2.305908203125e-05,
      "model_forward_time": 0.11695289611816406,
      "step": 3778
    },
    {
      "epoch": 2.305908203125e-05,
      "step": 3778,
      "training_step_time": 0.7122762203216553
    },
    {
      "epoch": 2.3065185546875e-05,
      "model_forward_time": 0.11850333213806152,
      "step": 3779
    },
    {
      "epoch": 2.3065185546875e-05,
      "step": 3779,
      "training_step_time": 0.7102344036102295
    },
    {
      "epoch": 2.30712890625e-05,
      "grad_norm": 0.3397531509399414,
      "learning_rate": 9.995380315038119e-05,
      "loss": 0.1174,
      "step": 3780
    },
    {
      "epoch": 2.30712890625e-05,
      "model_forward_time": 0.11551141738891602,
      "step": 3780
    },
    {
      "epoch": 2.30712890625e-05,
      "step": 3780,
      "training_step_time": 0.7349772453308105
    },
    {
      "epoch": 2.3077392578125e-05,
      "model_forward_time": 0.12362217903137207,
      "step": 3781
    },
    {
      "epoch": 2.3077392578125e-05,
      "step": 3781,
      "training_step_time": 0.7486567497253418
    },
    {
      "epoch": 2.308349609375e-05,
      "model_forward_time": 0.11967158317565918,
      "step": 3782
    },
    {
      "epoch": 2.308349609375e-05,
      "step": 3782,
      "training_step_time": 0.6443221569061279
    },
    {
      "epoch": 2.3089599609375e-05,
      "model_forward_time": 0.12056994438171387,
      "step": 3783
    },
    {
      "epoch": 2.3089599609375e-05,
      "step": 3783,
      "training_step_time": 0.7424130439758301
    },
    {
      "epoch": 2.3095703125e-05,
      "model_forward_time": 0.11747550964355469,
      "step": 3784
    },
    {
      "epoch": 2.3095703125e-05,
      "step": 3784,
      "training_step_time": 0.6285367012023926
    },
    {
      "epoch": 2.3101806640625e-05,
      "model_forward_time": 0.12003135681152344,
      "step": 3785
    },
    {
      "epoch": 2.3101806640625e-05,
      "step": 3785,
      "training_step_time": 0.689359188079834
    },
    {
      "epoch": 2.310791015625e-05,
      "model_forward_time": 0.12381958961486816,
      "step": 3786
    },
    {
      "epoch": 2.310791015625e-05,
      "step": 3786,
      "training_step_time": 0.6537144184112549
    },
    {
      "epoch": 2.3114013671875e-05,
      "model_forward_time": 0.11804795265197754,
      "step": 3787
    },
    {
      "epoch": 2.3114013671875e-05,
      "step": 3787,
      "training_step_time": 0.6500279903411865
    },
    {
      "epoch": 2.31201171875e-05,
      "model_forward_time": 0.11636686325073242,
      "step": 3788
    },
    {
      "epoch": 2.31201171875e-05,
      "step": 3788,
      "training_step_time": 0.6818699836730957
    },
    {
      "epoch": 2.3126220703125e-05,
      "model_forward_time": 0.12122607231140137,
      "step": 3789
    },
    {
      "epoch": 2.3126220703125e-05,
      "step": 3789,
      "training_step_time": 0.6580138206481934
    },
    {
      "epoch": 2.313232421875e-05,
      "grad_norm": 0.3532073497772217,
      "learning_rate": 9.995261121095194e-05,
      "loss": 0.1123,
      "step": 3790
    },
    {
      "epoch": 2.313232421875e-05,
      "model_forward_time": 0.13574624061584473,
      "step": 3790
    },
    {
      "epoch": 2.313232421875e-05,
      "step": 3790,
      "training_step_time": 0.6884160041809082
    },
    {
      "epoch": 2.3138427734375e-05,
      "model_forward_time": 0.11882877349853516,
      "step": 3791
    },
    {
      "epoch": 2.3138427734375e-05,
      "step": 3791,
      "training_step_time": 0.6272006034851074
    },
    {
      "epoch": 2.314453125e-05,
      "model_forward_time": 0.11739993095397949,
      "step": 3792
    },
    {
      "epoch": 2.314453125e-05,
      "step": 3792,
      "training_step_time": 0.5975673198699951
    },
    {
      "epoch": 2.3150634765625e-05,
      "model_forward_time": 0.12118339538574219,
      "step": 3793
    },
    {
      "epoch": 2.3150634765625e-05,
      "step": 3793,
      "training_step_time": 0.7700879573822021
    },
    {
      "epoch": 2.315673828125e-05,
      "model_forward_time": 0.12004780769348145,
      "step": 3794
    },
    {
      "epoch": 2.315673828125e-05,
      "step": 3794,
      "training_step_time": 0.7230923175811768
    },
    {
      "epoch": 2.3162841796875e-05,
      "model_forward_time": 0.11746883392333984,
      "step": 3795
    },
    {
      "epoch": 2.3162841796875e-05,
      "step": 3795,
      "training_step_time": 0.7234148979187012
    },
    {
      "epoch": 2.31689453125e-05,
      "model_forward_time": 0.11881709098815918,
      "step": 3796
    },
    {
      "epoch": 2.31689453125e-05,
      "step": 3796,
      "training_step_time": 0.726581335067749
    },
    {
      "epoch": 2.3175048828125e-05,
      "model_forward_time": 0.11954641342163086,
      "step": 3797
    },
    {
      "epoch": 2.3175048828125e-05,
      "step": 3797,
      "training_step_time": 0.6644699573516846
    },
    {
      "epoch": 2.318115234375e-05,
      "model_forward_time": 0.1320028305053711,
      "step": 3798
    },
    {
      "epoch": 2.318115234375e-05,
      "step": 3798,
      "training_step_time": 0.5882141590118408
    },
    {
      "epoch": 2.3187255859375e-05,
      "model_forward_time": 0.12050676345825195,
      "step": 3799
    },
    {
      "epoch": 2.3187255859375e-05,
      "step": 3799,
      "training_step_time": 0.654799222946167
    },
    {
      "epoch": 2.3193359375e-05,
      "grad_norm": 0.2812345027923584,
      "learning_rate": 9.99514040972383e-05,
      "loss": 0.1175,
      "step": 3800
    },
    {
      "epoch": 2.3193359375e-05,
      "model_forward_time": 0.11786127090454102,
      "step": 3800
    },
    {
      "epoch": 2.3193359375e-05,
      "step": 3800,
      "training_step_time": 0.675126314163208
    },
    {
      "epoch": 2.3199462890625e-05,
      "model_forward_time": 0.12087750434875488,
      "step": 3801
    },
    {
      "epoch": 2.3199462890625e-05,
      "step": 3801,
      "training_step_time": 0.6582715511322021
    },
    {
      "epoch": 2.320556640625e-05,
      "model_forward_time": 0.12542319297790527,
      "step": 3802
    },
    {
      "epoch": 2.320556640625e-05,
      "step": 3802,
      "training_step_time": 0.5925278663635254
    },
    {
      "epoch": 2.3211669921875e-05,
      "model_forward_time": 0.13200616836547852,
      "step": 3803
    },
    {
      "epoch": 2.3211669921875e-05,
      "step": 3803,
      "training_step_time": 0.6580698490142822
    },
    {
      "epoch": 2.32177734375e-05,
      "model_forward_time": 0.1210637092590332,
      "step": 3804
    },
    {
      "epoch": 2.32177734375e-05,
      "step": 3804,
      "training_step_time": 0.7545955181121826
    },
    {
      "epoch": 2.3223876953125e-05,
      "model_forward_time": 0.1318066120147705,
      "step": 3805
    },
    {
      "epoch": 2.3223876953125e-05,
      "step": 3805,
      "training_step_time": 0.6303126811981201
    },
    {
      "epoch": 2.322998046875e-05,
      "model_forward_time": 0.12494802474975586,
      "step": 3806
    },
    {
      "epoch": 2.322998046875e-05,
      "step": 3806,
      "training_step_time": 0.6462769508361816
    },
    {
      "epoch": 2.3236083984375e-05,
      "model_forward_time": 0.12149930000305176,
      "step": 3807
    },
    {
      "epoch": 2.3236083984375e-05,
      "step": 3807,
      "training_step_time": 0.7295668125152588
    },
    {
      "epoch": 2.32421875e-05,
      "model_forward_time": 0.11870360374450684,
      "step": 3808
    },
    {
      "epoch": 2.32421875e-05,
      "step": 3808,
      "training_step_time": 0.5377602577209473
    },
    {
      "epoch": 2.3248291015625e-05,
      "model_forward_time": 0.12178659439086914,
      "step": 3809
    },
    {
      "epoch": 2.3248291015625e-05,
      "step": 3809,
      "training_step_time": 0.5399799346923828
    },
    {
      "epoch": 2.325439453125e-05,
      "grad_norm": 0.34627267718315125,
      "learning_rate": 9.9950181809607e-05,
      "loss": 0.1052,
      "step": 3810
    },
    {
      "epoch": 2.325439453125e-05,
      "model_forward_time": 0.11947488784790039,
      "step": 3810
    },
    {
      "epoch": 2.325439453125e-05,
      "step": 3810,
      "training_step_time": 0.5328185558319092
    },
    {
      "epoch": 2.3260498046875e-05,
      "model_forward_time": 0.11841583251953125,
      "step": 3811
    },
    {
      "epoch": 2.3260498046875e-05,
      "step": 3811,
      "training_step_time": 0.5509247779846191
    },
    {
      "epoch": 2.32666015625e-05,
      "model_forward_time": 0.11819124221801758,
      "step": 3812
    },
    {
      "epoch": 2.32666015625e-05,
      "step": 3812,
      "training_step_time": 0.5059607028961182
    },
    {
      "epoch": 2.3272705078125e-05,
      "model_forward_time": 0.11777162551879883,
      "step": 3813
    },
    {
      "epoch": 2.3272705078125e-05,
      "step": 3813,
      "training_step_time": 0.46866297721862793
    },
    {
      "epoch": 2.327880859375e-05,
      "model_forward_time": 0.11736822128295898,
      "step": 3814
    },
    {
      "epoch": 2.327880859375e-05,
      "step": 3814,
      "training_step_time": 0.4391360282897949
    },
    {
      "epoch": 2.3284912109375e-05,
      "model_forward_time": 0.11565899848937988,
      "step": 3815
    },
    {
      "epoch": 2.3284912109375e-05,
      "step": 3815,
      "training_step_time": 0.4920170307159424
    },
    {
      "epoch": 2.3291015625e-05,
      "model_forward_time": 0.11566567420959473,
      "step": 3816
    },
    {
      "epoch": 2.3291015625e-05,
      "step": 3816,
      "training_step_time": 0.485457181930542
    },
    {
      "epoch": 2.3297119140625e-05,
      "model_forward_time": 0.11622118949890137,
      "step": 3817
    },
    {
      "epoch": 2.3297119140625e-05,
      "step": 3817,
      "training_step_time": 0.38762593269348145
    },
    {
      "epoch": 2.330322265625e-05,
      "model_forward_time": 0.1151885986328125,
      "step": 3818
    },
    {
      "epoch": 2.330322265625e-05,
      "step": 3818,
      "training_step_time": 0.4952714443206787
    },
    {
      "epoch": 2.3309326171875e-05,
      "model_forward_time": 0.11543035507202148,
      "step": 3819
    },
    {
      "epoch": 2.3309326171875e-05,
      "step": 3819,
      "training_step_time": 0.49715232849121094
    },
    {
      "epoch": 2.33154296875e-05,
      "grad_norm": 0.1764538437128067,
      "learning_rate": 9.994894434842932e-05,
      "loss": 0.1154,
      "step": 3820
    },
    {
      "epoch": 2.33154296875e-05,
      "model_forward_time": 0.11539697647094727,
      "step": 3820
    },
    {
      "epoch": 2.33154296875e-05,
      "step": 3820,
      "training_step_time": 0.3836371898651123
    },
    {
      "epoch": 2.3321533203125e-05,
      "model_forward_time": 0.11700916290283203,
      "step": 3821
    },
    {
      "epoch": 2.3321533203125e-05,
      "step": 3821,
      "training_step_time": 0.37767982482910156
    },
    {
      "epoch": 2.332763671875e-05,
      "model_forward_time": 0.11529111862182617,
      "step": 3822
    },
    {
      "epoch": 2.332763671875e-05,
      "step": 3822,
      "training_step_time": 0.40202879905700684
    },
    {
      "epoch": 2.3333740234375e-05,
      "model_forward_time": 0.11506533622741699,
      "step": 3823
    },
    {
      "epoch": 2.3333740234375e-05,
      "step": 3823,
      "training_step_time": 0.40561413764953613
    },
    {
      "epoch": 2.333984375e-05,
      "model_forward_time": 0.11530804634094238,
      "step": 3824
    },
    {
      "epoch": 2.333984375e-05,
      "step": 3824,
      "training_step_time": 0.4014008045196533
    },
    {
      "epoch": 2.3345947265625e-05,
      "model_forward_time": 0.11522459983825684,
      "step": 3825
    },
    {
      "epoch": 2.3345947265625e-05,
      "step": 3825,
      "training_step_time": 0.40666627883911133
    },
    {
      "epoch": 2.335205078125e-05,
      "model_forward_time": 0.11611485481262207,
      "step": 3826
    },
    {
      "epoch": 2.335205078125e-05,
      "step": 3826,
      "training_step_time": 0.41153454780578613
    },
    {
      "epoch": 2.3358154296875e-05,
      "model_forward_time": 0.11575460433959961,
      "step": 3827
    },
    {
      "epoch": 2.3358154296875e-05,
      "step": 3827,
      "training_step_time": 0.3925187587738037
    },
    {
      "epoch": 2.33642578125e-05,
      "model_forward_time": 0.1152656078338623,
      "step": 3828
    },
    {
      "epoch": 2.33642578125e-05,
      "step": 3828,
      "training_step_time": 0.4022843837738037
    },
    {
      "epoch": 2.3370361328125e-05,
      "model_forward_time": 0.11669206619262695,
      "step": 3829
    },
    {
      "epoch": 2.3370361328125e-05,
      "step": 3829,
      "training_step_time": 0.42920351028442383
    },
    {
      "epoch": 2.337646484375e-05,
      "grad_norm": 0.233937069773674,
      "learning_rate": 9.994769171408118e-05,
      "loss": 0.1183,
      "step": 3830
    },
    {
      "epoch": 2.337646484375e-05,
      "model_forward_time": 0.11565804481506348,
      "step": 3830
    },
    {
      "epoch": 2.337646484375e-05,
      "step": 3830,
      "training_step_time": 0.4316294193267822
    },
    {
      "epoch": 2.3382568359375e-05,
      "model_forward_time": 0.11652064323425293,
      "step": 3831
    },
    {
      "epoch": 2.3382568359375e-05,
      "step": 3831,
      "training_step_time": 0.4822044372558594
    },
    {
      "epoch": 2.3388671875e-05,
      "model_forward_time": 0.11569452285766602,
      "step": 3832
    },
    {
      "epoch": 2.3388671875e-05,
      "step": 3832,
      "training_step_time": 0.46557044982910156
    },
    {
      "epoch": 2.3394775390625e-05,
      "model_forward_time": 0.11574220657348633,
      "step": 3833
    },
    {
      "epoch": 2.3394775390625e-05,
      "step": 3833,
      "training_step_time": 0.45070409774780273
    },
    {
      "epoch": 2.340087890625e-05,
      "model_forward_time": 0.11599111557006836,
      "step": 3834
    },
    {
      "epoch": 2.340087890625e-05,
      "step": 3834,
      "training_step_time": 0.4163846969604492
    },
    {
      "epoch": 2.3406982421875e-05,
      "model_forward_time": 0.11502504348754883,
      "step": 3835
    },
    {
      "epoch": 2.3406982421875e-05,
      "step": 3835,
      "training_step_time": 0.3904552459716797
    },
    {
      "epoch": 2.34130859375e-05,
      "model_forward_time": 0.11561417579650879,
      "step": 3836
    },
    {
      "epoch": 2.34130859375e-05,
      "step": 3836,
      "training_step_time": 0.3916606903076172
    },
    {
      "epoch": 2.3419189453125e-05,
      "model_forward_time": 0.11535167694091797,
      "step": 3837
    },
    {
      "epoch": 2.3419189453125e-05,
      "step": 3837,
      "training_step_time": 0.4063239097595215
    },
    {
      "epoch": 2.342529296875e-05,
      "model_forward_time": 0.11590361595153809,
      "step": 3838
    },
    {
      "epoch": 2.342529296875e-05,
      "step": 3838,
      "training_step_time": 0.3965165615081787
    },
    {
      "epoch": 2.3431396484375e-05,
      "model_forward_time": 0.11495041847229004,
      "step": 3839
    },
    {
      "epoch": 2.3431396484375e-05,
      "step": 3839,
      "training_step_time": 0.38884806632995605
    },
    {
      "epoch": 2.34375e-05,
      "grad_norm": 0.26253941655158997,
      "learning_rate": 9.994642390694308e-05,
      "loss": 0.1155,
      "step": 3840
    },
    {
      "epoch": 2.34375e-05,
      "model_forward_time": 0.11554813385009766,
      "step": 3840
    },
    {
      "epoch": 2.34375e-05,
      "step": 3840,
      "training_step_time": 0.39322710037231445
    },
    {
      "epoch": 2.3443603515625e-05,
      "model_forward_time": 0.11535501480102539,
      "step": 3841
    },
    {
      "epoch": 2.3443603515625e-05,
      "step": 3841,
      "training_step_time": 0.39487433433532715
    },
    {
      "epoch": 2.344970703125e-05,
      "model_forward_time": 0.11538243293762207,
      "step": 3842
    },
    {
      "epoch": 2.344970703125e-05,
      "step": 3842,
      "training_step_time": 0.395888090133667
    },
    {
      "epoch": 2.3455810546875e-05,
      "model_forward_time": 0.11657547950744629,
      "step": 3843
    },
    {
      "epoch": 2.3455810546875e-05,
      "step": 3843,
      "training_step_time": 0.4587829113006592
    },
    {
      "epoch": 2.34619140625e-05,
      "model_forward_time": 0.1153261661529541,
      "step": 3844
    },
    {
      "epoch": 2.34619140625e-05,
      "step": 3844,
      "training_step_time": 0.5145914554595947
    },
    {
      "epoch": 2.3468017578125e-05,
      "model_forward_time": 0.1154172420501709,
      "step": 3845
    },
    {
      "epoch": 2.3468017578125e-05,
      "step": 3845,
      "training_step_time": 0.4685232639312744
    },
    {
      "epoch": 2.347412109375e-05,
      "model_forward_time": 0.11509060859680176,
      "step": 3846
    },
    {
      "epoch": 2.347412109375e-05,
      "step": 3846,
      "training_step_time": 0.43354296684265137
    },
    {
      "epoch": 2.3480224609375e-05,
      "model_forward_time": 0.11705589294433594,
      "step": 3847
    },
    {
      "epoch": 2.3480224609375e-05,
      "step": 3847,
      "training_step_time": 0.44123005867004395
    },
    {
      "epoch": 2.3486328125e-05,
      "model_forward_time": 0.11524844169616699,
      "step": 3848
    },
    {
      "epoch": 2.3486328125e-05,
      "step": 3848,
      "training_step_time": 0.4640641212463379
    },
    {
      "epoch": 2.3492431640625e-05,
      "model_forward_time": 0.11559271812438965,
      "step": 3849
    },
    {
      "epoch": 2.3492431640625e-05,
      "step": 3849,
      "training_step_time": 0.5036940574645996
    },
    {
      "epoch": 2.349853515625e-05,
      "grad_norm": 0.28171053528785706,
      "learning_rate": 9.994514092740015e-05,
      "loss": 0.1102,
      "step": 3850
    },
    {
      "epoch": 2.349853515625e-05,
      "model_forward_time": 0.1151118278503418,
      "step": 3850
    },
    {
      "epoch": 2.349853515625e-05,
      "step": 3850,
      "training_step_time": 0.400587797164917
    },
    {
      "epoch": 2.3504638671875e-05,
      "model_forward_time": 0.11464262008666992,
      "step": 3851
    },
    {
      "epoch": 2.3504638671875e-05,
      "step": 3851,
      "training_step_time": 0.413135290145874
    },
    {
      "epoch": 2.35107421875e-05,
      "model_forward_time": 0.11514616012573242,
      "step": 3852
    },
    {
      "epoch": 2.35107421875e-05,
      "step": 3852,
      "training_step_time": 0.39284205436706543
    },
    {
      "epoch": 2.3516845703125e-05,
      "model_forward_time": 0.11520791053771973,
      "step": 3853
    },
    {
      "epoch": 2.3516845703125e-05,
      "step": 3853,
      "training_step_time": 0.40378260612487793
    },
    {
      "epoch": 2.352294921875e-05,
      "model_forward_time": 0.11593914031982422,
      "step": 3854
    },
    {
      "epoch": 2.352294921875e-05,
      "step": 3854,
      "training_step_time": 0.44784998893737793
    },
    {
      "epoch": 2.3529052734375e-05,
      "model_forward_time": 0.11585617065429688,
      "step": 3855
    },
    {
      "epoch": 2.3529052734375e-05,
      "step": 3855,
      "training_step_time": 0.40575289726257324
    },
    {
      "epoch": 2.353515625e-05,
      "model_forward_time": 0.11584806442260742,
      "step": 3856
    },
    {
      "epoch": 2.353515625e-05,
      "step": 3856,
      "training_step_time": 0.38984203338623047
    },
    {
      "epoch": 2.3541259765625e-05,
      "model_forward_time": 0.114654541015625,
      "step": 3857
    },
    {
      "epoch": 2.3541259765625e-05,
      "step": 3857,
      "training_step_time": 0.3910691738128662
    },
    {
      "epoch": 2.354736328125e-05,
      "model_forward_time": 0.11567807197570801,
      "step": 3858
    },
    {
      "epoch": 2.354736328125e-05,
      "step": 3858,
      "training_step_time": 0.48448872566223145
    },
    {
      "epoch": 2.3553466796875e-05,
      "model_forward_time": 0.11517882347106934,
      "step": 3859
    },
    {
      "epoch": 2.3553466796875e-05,
      "step": 3859,
      "training_step_time": 0.5048925876617432
    },
    {
      "epoch": 2.35595703125e-05,
      "grad_norm": 0.2972756624221802,
      "learning_rate": 9.994384277584214e-05,
      "loss": 0.1151,
      "step": 3860
    },
    {
      "epoch": 2.35595703125e-05,
      "model_forward_time": 0.11508655548095703,
      "step": 3860
    },
    {
      "epoch": 2.35595703125e-05,
      "step": 3860,
      "training_step_time": 0.40773725509643555
    },
    {
      "epoch": 2.3565673828125e-05,
      "model_forward_time": 0.11496186256408691,
      "step": 3861
    },
    {
      "epoch": 2.3565673828125e-05,
      "step": 3861,
      "training_step_time": 0.3687262535095215
    },
    {
      "epoch": 2.357177734375e-05,
      "model_forward_time": 0.11504030227661133,
      "step": 3862
    },
    {
      "epoch": 2.357177734375e-05,
      "step": 3862,
      "training_step_time": 0.4474337100982666
    },
    {
      "epoch": 2.3577880859375e-05,
      "model_forward_time": 0.11543059349060059,
      "step": 3863
    },
    {
      "epoch": 2.3577880859375e-05,
      "step": 3863,
      "training_step_time": 0.3929741382598877
    },
    {
      "epoch": 2.3583984375e-05,
      "model_forward_time": 0.11566925048828125,
      "step": 3864
    },
    {
      "epoch": 2.3583984375e-05,
      "step": 3864,
      "training_step_time": 0.3863041400909424
    },
    {
      "epoch": 2.3590087890625e-05,
      "model_forward_time": 0.11628460884094238,
      "step": 3865
    },
    {
      "epoch": 2.3590087890625e-05,
      "step": 3865,
      "training_step_time": 0.3877744674682617
    },
    {
      "epoch": 2.359619140625e-05,
      "model_forward_time": 0.1153562068939209,
      "step": 3866
    },
    {
      "epoch": 2.359619140625e-05,
      "step": 3866,
      "training_step_time": 0.4028589725494385
    },
    {
      "epoch": 2.3602294921875e-05,
      "model_forward_time": 0.11618494987487793,
      "step": 3867
    },
    {
      "epoch": 2.3602294921875e-05,
      "step": 3867,
      "training_step_time": 0.44686198234558105
    },
    {
      "epoch": 2.36083984375e-05,
      "model_forward_time": 0.11620402336120605,
      "step": 3868
    },
    {
      "epoch": 2.36083984375e-05,
      "step": 3868,
      "training_step_time": 0.3973698616027832
    },
    {
      "epoch": 2.3614501953125e-05,
      "model_forward_time": 0.11489510536193848,
      "step": 3869
    },
    {
      "epoch": 2.3614501953125e-05,
      "step": 3869,
      "training_step_time": 0.39624667167663574
    },
    {
      "epoch": 2.362060546875e-05,
      "grad_norm": 0.39445531368255615,
      "learning_rate": 9.99425294526634e-05,
      "loss": 0.1131,
      "step": 3870
    },
    {
      "epoch": 2.362060546875e-05,
      "model_forward_time": 0.1150367259979248,
      "step": 3870
    },
    {
      "epoch": 2.362060546875e-05,
      "step": 3870,
      "training_step_time": 0.3945920467376709
    },
    {
      "epoch": 2.3626708984375e-05,
      "model_forward_time": 0.11602449417114258,
      "step": 3871
    },
    {
      "epoch": 2.3626708984375e-05,
      "step": 3871,
      "training_step_time": 0.3911149501800537
    },
    {
      "epoch": 2.36328125e-05,
      "model_forward_time": 0.11513757705688477,
      "step": 3872
    },
    {
      "epoch": 2.36328125e-05,
      "step": 3872,
      "training_step_time": 0.45820116996765137
    },
    {
      "epoch": 2.3638916015625e-05,
      "model_forward_time": 0.1158454418182373,
      "step": 3873
    },
    {
      "epoch": 2.3638916015625e-05,
      "step": 3873,
      "training_step_time": 0.42682623863220215
    },
    {
      "epoch": 2.364501953125e-05,
      "model_forward_time": 0.11507272720336914,
      "step": 3874
    },
    {
      "epoch": 2.364501953125e-05,
      "step": 3874,
      "training_step_time": 0.48821449279785156
    },
    {
      "epoch": 2.3651123046875e-05,
      "model_forward_time": 0.11550664901733398,
      "step": 3875
    },
    {
      "epoch": 2.3651123046875e-05,
      "step": 3875,
      "training_step_time": 0.4556145668029785
    },
    {
      "epoch": 2.36572265625e-05,
      "model_forward_time": 0.11527252197265625,
      "step": 3876
    },
    {
      "epoch": 2.36572265625e-05,
      "step": 3876,
      "training_step_time": 0.37059783935546875
    },
    {
      "epoch": 2.3663330078125e-05,
      "model_forward_time": 0.11576008796691895,
      "step": 3877
    },
    {
      "epoch": 2.3663330078125e-05,
      "step": 3877,
      "training_step_time": 0.49423956871032715
    },
    {
      "epoch": 2.366943359375e-05,
      "model_forward_time": 0.11561393737792969,
      "step": 3878
    },
    {
      "epoch": 2.366943359375e-05,
      "step": 3878,
      "training_step_time": 0.48996400833129883
    },
    {
      "epoch": 2.3675537109375e-05,
      "model_forward_time": 0.11450791358947754,
      "step": 3879
    },
    {
      "epoch": 2.3675537109375e-05,
      "step": 3879,
      "training_step_time": 0.3853647708892822
    },
    {
      "epoch": 2.3681640625e-05,
      "grad_norm": 0.26572754979133606,
      "learning_rate": 9.994120095826285e-05,
      "loss": 0.1109,
      "step": 3880
    },
    {
      "epoch": 2.3681640625e-05,
      "model_forward_time": 0.11480879783630371,
      "step": 3880
    },
    {
      "epoch": 2.3681640625e-05,
      "step": 3880,
      "training_step_time": 0.42044758796691895
    },
    {
      "epoch": 2.3687744140625e-05,
      "model_forward_time": 0.11489510536193848,
      "step": 3881
    },
    {
      "epoch": 2.3687744140625e-05,
      "step": 3881,
      "training_step_time": 0.40558815002441406
    },
    {
      "epoch": 2.369384765625e-05,
      "model_forward_time": 0.11665177345275879,
      "step": 3882
    },
    {
      "epoch": 2.369384765625e-05,
      "step": 3882,
      "training_step_time": 0.3955237865447998
    },
    {
      "epoch": 2.3699951171875e-05,
      "model_forward_time": 0.11574029922485352,
      "step": 3883
    },
    {
      "epoch": 2.3699951171875e-05,
      "step": 3883,
      "training_step_time": 0.3872544765472412
    },
    {
      "epoch": 2.37060546875e-05,
      "model_forward_time": 0.11691403388977051,
      "step": 3884
    },
    {
      "epoch": 2.37060546875e-05,
      "step": 3884,
      "training_step_time": 0.3976125717163086
    },
    {
      "epoch": 2.3712158203125e-05,
      "model_forward_time": 0.11661434173583984,
      "step": 3885
    },
    {
      "epoch": 2.3712158203125e-05,
      "step": 3885,
      "training_step_time": 0.39115309715270996
    },
    {
      "epoch": 2.371826171875e-05,
      "model_forward_time": 0.11551260948181152,
      "step": 3886
    },
    {
      "epoch": 2.371826171875e-05,
      "step": 3886,
      "training_step_time": 0.38828539848327637
    },
    {
      "epoch": 2.3724365234375e-05,
      "model_forward_time": 0.1157686710357666,
      "step": 3887
    },
    {
      "epoch": 2.3724365234375e-05,
      "step": 3887,
      "training_step_time": 0.43453192710876465
    },
    {
      "epoch": 2.373046875e-05,
      "model_forward_time": 0.11566424369812012,
      "step": 3888
    },
    {
      "epoch": 2.373046875e-05,
      "step": 3888,
      "training_step_time": 0.39275622367858887
    },
    {
      "epoch": 2.3736572265625e-05,
      "model_forward_time": 0.11538076400756836,
      "step": 3889
    },
    {
      "epoch": 2.3736572265625e-05,
      "step": 3889,
      "training_step_time": 0.40406084060668945
    },
    {
      "epoch": 2.374267578125e-05,
      "grad_norm": 0.21599625051021576,
      "learning_rate": 9.993985729304408e-05,
      "loss": 0.1074,
      "step": 3890
    },
    {
      "epoch": 2.374267578125e-05,
      "model_forward_time": 0.11571502685546875,
      "step": 3890
    },
    {
      "epoch": 2.374267578125e-05,
      "step": 3890,
      "training_step_time": 0.39447474479675293
    },
    {
      "epoch": 2.3748779296875e-05,
      "model_forward_time": 0.11676764488220215,
      "step": 3891
    },
    {
      "epoch": 2.3748779296875e-05,
      "step": 3891,
      "training_step_time": 0.4159879684448242
    },
    {
      "epoch": 2.37548828125e-05,
      "model_forward_time": 0.11515927314758301,
      "step": 3892
    },
    {
      "epoch": 2.37548828125e-05,
      "step": 3892,
      "training_step_time": 0.45359086990356445
    },
    {
      "epoch": 2.3760986328125e-05,
      "model_forward_time": 0.11577296257019043,
      "step": 3893
    },
    {
      "epoch": 2.3760986328125e-05,
      "step": 3893,
      "training_step_time": 0.5082764625549316
    },
    {
      "epoch": 2.376708984375e-05,
      "model_forward_time": 0.11488533020019531,
      "step": 3894
    },
    {
      "epoch": 2.376708984375e-05,
      "step": 3894,
      "training_step_time": 0.38698649406433105
    },
    {
      "epoch": 2.3773193359375e-05,
      "model_forward_time": 0.11519575119018555,
      "step": 3895
    },
    {
      "epoch": 2.3773193359375e-05,
      "step": 3895,
      "training_step_time": 0.4198722839355469
    },
    {
      "epoch": 2.3779296875e-05,
      "model_forward_time": 0.11577916145324707,
      "step": 3896
    },
    {
      "epoch": 2.3779296875e-05,
      "step": 3896,
      "training_step_time": 0.3867533206939697
    },
    {
      "epoch": 2.3785400390625e-05,
      "model_forward_time": 0.11560535430908203,
      "step": 3897
    },
    {
      "epoch": 2.3785400390625e-05,
      "step": 3897,
      "training_step_time": 0.3935582637786865
    },
    {
      "epoch": 2.379150390625e-05,
      "model_forward_time": 0.1163322925567627,
      "step": 3898
    },
    {
      "epoch": 2.379150390625e-05,
      "step": 3898,
      "training_step_time": 0.38971471786499023
    },
    {
      "epoch": 2.3797607421875e-05,
      "model_forward_time": 0.11555051803588867,
      "step": 3899
    },
    {
      "epoch": 2.3797607421875e-05,
      "step": 3899,
      "training_step_time": 0.39478564262390137
    },
    {
      "epoch": 2.38037109375e-05,
      "grad_norm": 0.2821347713470459,
      "learning_rate": 9.993849845741524e-05,
      "loss": 0.114,
      "step": 3900
    },
    {
      "epoch": 2.38037109375e-05,
      "model_forward_time": 0.11688041687011719,
      "step": 3900
    },
    {
      "epoch": 2.38037109375e-05,
      "step": 3900,
      "training_step_time": 0.3881382942199707
    },
    {
      "epoch": 2.3809814453125e-05,
      "model_forward_time": 0.11540031433105469,
      "step": 3901
    },
    {
      "epoch": 2.3809814453125e-05,
      "step": 3901,
      "training_step_time": 0.4623863697052002
    },
    {
      "epoch": 2.381591796875e-05,
      "model_forward_time": 0.11580228805541992,
      "step": 3902
    },
    {
      "epoch": 2.381591796875e-05,
      "step": 3902,
      "training_step_time": 0.4416172504425049
    },
    {
      "epoch": 2.3822021484375e-05,
      "model_forward_time": 0.11544609069824219,
      "step": 3903
    },
    {
      "epoch": 2.3822021484375e-05,
      "step": 3903,
      "training_step_time": 0.39206552505493164
    },
    {
      "epoch": 2.3828125e-05,
      "model_forward_time": 0.11521720886230469,
      "step": 3904
    },
    {
      "epoch": 2.3828125e-05,
      "step": 3904,
      "training_step_time": 0.4001939296722412
    },
    {
      "epoch": 2.3834228515625e-05,
      "model_forward_time": 0.11587905883789062,
      "step": 3905
    },
    {
      "epoch": 2.3834228515625e-05,
      "step": 3905,
      "training_step_time": 0.3939681053161621
    },
    {
      "epoch": 2.384033203125e-05,
      "model_forward_time": 0.11646246910095215,
      "step": 3906
    },
    {
      "epoch": 2.384033203125e-05,
      "step": 3906,
      "training_step_time": 0.4210829734802246
    },
    {
      "epoch": 2.3846435546875e-05,
      "model_forward_time": 0.11476349830627441,
      "step": 3907
    },
    {
      "epoch": 2.3846435546875e-05,
      "step": 3907,
      "training_step_time": 0.4185061454772949
    },
    {
      "epoch": 2.38525390625e-05,
      "model_forward_time": 0.11558198928833008,
      "step": 3908
    },
    {
      "epoch": 2.38525390625e-05,
      "step": 3908,
      "training_step_time": 0.5342276096343994
    },
    {
      "epoch": 2.3858642578125e-05,
      "model_forward_time": 0.11559605598449707,
      "step": 3909
    },
    {
      "epoch": 2.3858642578125e-05,
      "step": 3909,
      "training_step_time": 0.39165163040161133
    },
    {
      "epoch": 2.386474609375e-05,
      "grad_norm": 0.35962000489234924,
      "learning_rate": 9.993712445178913e-05,
      "loss": 0.1138,
      "step": 3910
    },
    {
      "epoch": 2.386474609375e-05,
      "model_forward_time": 0.11517214775085449,
      "step": 3910
    },
    {
      "epoch": 2.386474609375e-05,
      "step": 3910,
      "training_step_time": 0.3867335319519043
    },
    {
      "epoch": 2.3870849609375e-05,
      "model_forward_time": 0.11537051200866699,
      "step": 3911
    },
    {
      "epoch": 2.3870849609375e-05,
      "step": 3911,
      "training_step_time": 0.3920412063598633
    },
    {
      "epoch": 2.3876953125e-05,
      "model_forward_time": 0.11514854431152344,
      "step": 3912
    },
    {
      "epoch": 2.3876953125e-05,
      "step": 3912,
      "training_step_time": 0.39418816566467285
    },
    {
      "epoch": 2.3883056640625e-05,
      "model_forward_time": 0.11537885665893555,
      "step": 3913
    },
    {
      "epoch": 2.3883056640625e-05,
      "step": 3913,
      "training_step_time": 0.3904235363006592
    },
    {
      "epoch": 2.388916015625e-05,
      "model_forward_time": 0.1155252456665039,
      "step": 3914
    },
    {
      "epoch": 2.388916015625e-05,
      "step": 3914,
      "training_step_time": 0.3943977355957031
    },
    {
      "epoch": 2.3895263671875e-05,
      "model_forward_time": 0.1155242919921875,
      "step": 3915
    },
    {
      "epoch": 2.3895263671875e-05,
      "step": 3915,
      "training_step_time": 0.39284229278564453
    },
    {
      "epoch": 2.39013671875e-05,
      "model_forward_time": 0.11534428596496582,
      "step": 3916
    },
    {
      "epoch": 2.39013671875e-05,
      "step": 3916,
      "training_step_time": 0.4042174816131592
    },
    {
      "epoch": 2.3907470703125e-05,
      "model_forward_time": 0.1156930923461914,
      "step": 3917
    },
    {
      "epoch": 2.3907470703125e-05,
      "step": 3917,
      "training_step_time": 0.402540922164917
    },
    {
      "epoch": 2.391357421875e-05,
      "model_forward_time": 0.1159818172454834,
      "step": 3918
    },
    {
      "epoch": 2.391357421875e-05,
      "step": 3918,
      "training_step_time": 0.3919401168823242
    },
    {
      "epoch": 2.3919677734375e-05,
      "model_forward_time": 0.11533117294311523,
      "step": 3919
    },
    {
      "epoch": 2.3919677734375e-05,
      "step": 3919,
      "training_step_time": 0.3902242183685303
    },
    {
      "epoch": 2.392578125e-05,
      "grad_norm": 0.3787918984889984,
      "learning_rate": 9.99357352765831e-05,
      "loss": 0.1127,
      "step": 3920
    },
    {
      "epoch": 2.392578125e-05,
      "model_forward_time": 0.11572909355163574,
      "step": 3920
    },
    {
      "epoch": 2.392578125e-05,
      "step": 3920,
      "training_step_time": 0.5043337345123291
    },
    {
      "epoch": 2.3931884765625e-05,
      "model_forward_time": 0.11549997329711914,
      "step": 3921
    },
    {
      "epoch": 2.3931884765625e-05,
      "step": 3921,
      "training_step_time": 0.4162449836730957
    },
    {
      "epoch": 2.393798828125e-05,
      "model_forward_time": 0.1155245304107666,
      "step": 3922
    },
    {
      "epoch": 2.393798828125e-05,
      "step": 3922,
      "training_step_time": 0.452465295791626
    },
    {
      "epoch": 2.3944091796875e-05,
      "model_forward_time": 0.11574459075927734,
      "step": 3923
    },
    {
      "epoch": 2.3944091796875e-05,
      "step": 3923,
      "training_step_time": 0.5283236503601074
    },
    {
      "epoch": 2.39501953125e-05,
      "model_forward_time": 0.11494040489196777,
      "step": 3924
    },
    {
      "epoch": 2.39501953125e-05,
      "step": 3924,
      "training_step_time": 0.3866536617279053
    },
    {
      "epoch": 2.3956298828125e-05,
      "model_forward_time": 0.11523151397705078,
      "step": 3925
    },
    {
      "epoch": 2.3956298828125e-05,
      "step": 3925,
      "training_step_time": 0.39498257637023926
    },
    {
      "epoch": 2.396240234375e-05,
      "model_forward_time": 0.11461353302001953,
      "step": 3926
    },
    {
      "epoch": 2.396240234375e-05,
      "step": 3926,
      "training_step_time": 0.38892245292663574
    },
    {
      "epoch": 2.3968505859375e-05,
      "model_forward_time": 0.11670660972595215,
      "step": 3927
    },
    {
      "epoch": 2.3968505859375e-05,
      "step": 3927,
      "training_step_time": 0.3729422092437744
    },
    {
      "epoch": 2.3974609375e-05,
      "model_forward_time": 0.11555671691894531,
      "step": 3928
    },
    {
      "epoch": 2.3974609375e-05,
      "step": 3928,
      "training_step_time": 0.39395594596862793
    },
    {
      "epoch": 2.3980712890625e-05,
      "model_forward_time": 0.11539745330810547,
      "step": 3929
    },
    {
      "epoch": 2.3980712890625e-05,
      "step": 3929,
      "training_step_time": 0.40879106521606445
    },
    {
      "epoch": 2.398681640625e-05,
      "grad_norm": 0.3459290862083435,
      "learning_rate": 9.99343309322192e-05,
      "loss": 0.1123,
      "step": 3930
    },
    {
      "epoch": 2.398681640625e-05,
      "model_forward_time": 0.11580133438110352,
      "step": 3930
    },
    {
      "epoch": 2.398681640625e-05,
      "step": 3930,
      "training_step_time": 0.4113020896911621
    },
    {
      "epoch": 2.3992919921875e-05,
      "model_forward_time": 0.12129497528076172,
      "step": 3931
    },
    {
      "epoch": 2.3992919921875e-05,
      "step": 3931,
      "training_step_time": 0.40225744247436523
    },
    {
      "epoch": 2.39990234375e-05,
      "model_forward_time": 0.11575150489807129,
      "step": 3932
    },
    {
      "epoch": 2.39990234375e-05,
      "step": 3932,
      "training_step_time": 1.055983304977417
    },
    {
      "epoch": 2.4005126953125e-05,
      "model_forward_time": 0.11516714096069336,
      "step": 3933
    },
    {
      "epoch": 2.4005126953125e-05,
      "step": 3933,
      "training_step_time": 0.4233286380767822
    },
    {
      "epoch": 2.401123046875e-05,
      "model_forward_time": 0.11434578895568848,
      "step": 3934
    },
    {
      "epoch": 2.401123046875e-05,
      "step": 3934,
      "training_step_time": 0.36530423164367676
    },
    {
      "epoch": 2.4017333984375e-05,
      "model_forward_time": 0.11384773254394531,
      "step": 3935
    },
    {
      "epoch": 2.4017333984375e-05,
      "step": 3935,
      "training_step_time": 0.4113636016845703
    },
    {
      "epoch": 2.40234375e-05,
      "model_forward_time": 0.11397600173950195,
      "step": 3936
    },
    {
      "epoch": 2.40234375e-05,
      "step": 3936,
      "training_step_time": 0.44779133796691895
    },
    {
      "epoch": 2.4029541015625e-05,
      "model_forward_time": 0.11427831649780273,
      "step": 3937
    },
    {
      "epoch": 2.4029541015625e-05,
      "step": 3937,
      "training_step_time": 0.39020538330078125
    },
    {
      "epoch": 2.403564453125e-05,
      "model_forward_time": 0.11420679092407227,
      "step": 3938
    },
    {
      "epoch": 2.403564453125e-05,
      "step": 3938,
      "training_step_time": 0.39252662658691406
    },
    {
      "epoch": 2.4041748046875e-05,
      "model_forward_time": 0.11480927467346191,
      "step": 3939
    },
    {
      "epoch": 2.4041748046875e-05,
      "step": 3939,
      "training_step_time": 0.39740705490112305
    },
    {
      "epoch": 2.40478515625e-05,
      "grad_norm": 0.43506431579589844,
      "learning_rate": 9.9932911419124e-05,
      "loss": 0.1167,
      "step": 3940
    },
    {
      "epoch": 2.40478515625e-05,
      "model_forward_time": 0.11474800109863281,
      "step": 3940
    },
    {
      "epoch": 2.40478515625e-05,
      "step": 3940,
      "training_step_time": 0.3864321708679199
    },
    {
      "epoch": 2.4053955078125e-05,
      "model_forward_time": 0.1154336929321289,
      "step": 3941
    },
    {
      "epoch": 2.4053955078125e-05,
      "step": 3941,
      "training_step_time": 0.3884706497192383
    },
    {
      "epoch": 2.406005859375e-05,
      "model_forward_time": 0.1159214973449707,
      "step": 3942
    },
    {
      "epoch": 2.406005859375e-05,
      "step": 3942,
      "training_step_time": 0.39301156997680664
    },
    {
      "epoch": 2.4066162109375e-05,
      "model_forward_time": 0.11571717262268066,
      "step": 3943
    },
    {
      "epoch": 2.4066162109375e-05,
      "step": 3943,
      "training_step_time": 0.45875978469848633
    },
    {
      "epoch": 2.4072265625e-05,
      "model_forward_time": 0.1155242919921875,
      "step": 3944
    },
    {
      "epoch": 2.4072265625e-05,
      "step": 3944,
      "training_step_time": 1.1200578212738037
    },
    {
      "epoch": 2.4078369140625e-05,
      "model_forward_time": 0.11405396461486816,
      "step": 3945
    },
    {
      "epoch": 2.4078369140625e-05,
      "step": 3945,
      "training_step_time": 0.3877522945404053
    },
    {
      "epoch": 2.408447265625e-05,
      "model_forward_time": 0.1140604019165039,
      "step": 3946
    },
    {
      "epoch": 2.408447265625e-05,
      "step": 3946,
      "training_step_time": 0.4131484031677246
    },
    {
      "epoch": 2.4090576171875e-05,
      "model_forward_time": 0.11536574363708496,
      "step": 3947
    },
    {
      "epoch": 2.4090576171875e-05,
      "step": 3947,
      "training_step_time": 0.437558650970459
    },
    {
      "epoch": 2.40966796875e-05,
      "model_forward_time": 0.11448979377746582,
      "step": 3948
    },
    {
      "epoch": 2.40966796875e-05,
      "step": 3948,
      "training_step_time": 0.42299962043762207
    },
    {
      "epoch": 2.4102783203125e-05,
      "model_forward_time": 0.11469817161560059,
      "step": 3949
    },
    {
      "epoch": 2.4102783203125e-05,
      "step": 3949,
      "training_step_time": 0.39872097969055176
    },
    {
      "epoch": 2.410888671875e-05,
      "grad_norm": 0.37718498706817627,
      "learning_rate": 9.99314767377287e-05,
      "loss": 0.1166,
      "step": 3950
    },
    {
      "epoch": 2.410888671875e-05,
      "model_forward_time": 0.11464238166809082,
      "step": 3950
    },
    {
      "epoch": 2.410888671875e-05,
      "step": 3950,
      "training_step_time": 0.9141643047332764
    },
    {
      "epoch": 2.4114990234375e-05,
      "model_forward_time": 0.1141042709350586,
      "step": 3951
    },
    {
      "epoch": 2.4114990234375e-05,
      "step": 3951,
      "training_step_time": 0.3841836452484131
    },
    {
      "epoch": 2.412109375e-05,
      "model_forward_time": 0.11528706550598145,
      "step": 3952
    },
    {
      "epoch": 2.412109375e-05,
      "step": 3952,
      "training_step_time": 0.39032793045043945
    },
    {
      "epoch": 2.4127197265625e-05,
      "model_forward_time": 0.1146695613861084,
      "step": 3953
    },
    {
      "epoch": 2.4127197265625e-05,
      "step": 3953,
      "training_step_time": 0.3823204040527344
    },
    {
      "epoch": 2.413330078125e-05,
      "model_forward_time": 0.11461281776428223,
      "step": 3954
    },
    {
      "epoch": 2.413330078125e-05,
      "step": 3954,
      "training_step_time": 0.3855133056640625
    },
    {
      "epoch": 2.4139404296875e-05,
      "model_forward_time": 0.1143944263458252,
      "step": 3955
    },
    {
      "epoch": 2.4139404296875e-05,
      "step": 3955,
      "training_step_time": 0.39891767501831055
    },
    {
      "epoch": 2.41455078125e-05,
      "model_forward_time": 0.1154482364654541,
      "step": 3956
    },
    {
      "epoch": 2.41455078125e-05,
      "step": 3956,
      "training_step_time": 0.5210421085357666
    },
    {
      "epoch": 2.4151611328125e-05,
      "model_forward_time": 0.11522579193115234,
      "step": 3957
    },
    {
      "epoch": 2.4151611328125e-05,
      "step": 3957,
      "training_step_time": 0.43390464782714844
    },
    {
      "epoch": 2.415771484375e-05,
      "model_forward_time": 0.11529111862182617,
      "step": 3958
    },
    {
      "epoch": 2.415771484375e-05,
      "step": 3958,
      "training_step_time": 0.39496636390686035
    },
    {
      "epoch": 2.4163818359375e-05,
      "model_forward_time": 0.11552643775939941,
      "step": 3959
    },
    {
      "epoch": 2.4163818359375e-05,
      "step": 3959,
      "training_step_time": 0.3841102123260498
    },
    {
      "epoch": 2.4169921875e-05,
      "grad_norm": 0.3757854402065277,
      "learning_rate": 9.993002688846913e-05,
      "loss": 0.1094,
      "step": 3960
    },
    {
      "epoch": 2.4169921875e-05,
      "model_forward_time": 0.11466503143310547,
      "step": 3960
    },
    {
      "epoch": 2.4169921875e-05,
      "step": 3960,
      "training_step_time": 0.38782596588134766
    },
    {
      "epoch": 2.4176025390625e-05,
      "model_forward_time": 0.11524271965026855,
      "step": 3961
    },
    {
      "epoch": 2.4176025390625e-05,
      "step": 3961,
      "training_step_time": 0.4509162902832031
    },
    {
      "epoch": 2.418212890625e-05,
      "model_forward_time": 0.11544656753540039,
      "step": 3962
    },
    {
      "epoch": 2.418212890625e-05,
      "step": 3962,
      "training_step_time": 0.9073681831359863
    },
    {
      "epoch": 2.4188232421875e-05,
      "model_forward_time": 0.11457061767578125,
      "step": 3963
    },
    {
      "epoch": 2.4188232421875e-05,
      "step": 3963,
      "training_step_time": 0.4253668785095215
    },
    {
      "epoch": 2.41943359375e-05,
      "model_forward_time": 0.11455869674682617,
      "step": 3964
    },
    {
      "epoch": 2.41943359375e-05,
      "step": 3964,
      "training_step_time": 0.37446045875549316
    },
    {
      "epoch": 2.4200439453125e-05,
      "model_forward_time": 0.11427116394042969,
      "step": 3965
    },
    {
      "epoch": 2.4200439453125e-05,
      "step": 3965,
      "training_step_time": 0.3845686912536621
    },
    {
      "epoch": 2.420654296875e-05,
      "model_forward_time": 0.1150665283203125,
      "step": 3966
    },
    {
      "epoch": 2.420654296875e-05,
      "step": 3966,
      "training_step_time": 0.3779792785644531
    },
    {
      "epoch": 2.4212646484375e-05,
      "model_forward_time": 0.11480450630187988,
      "step": 3967
    },
    {
      "epoch": 2.4212646484375e-05,
      "step": 3967,
      "training_step_time": 0.40023064613342285
    },
    {
      "epoch": 2.421875e-05,
      "model_forward_time": 0.11492228507995605,
      "step": 3968
    },
    {
      "epoch": 2.421875e-05,
      "step": 3968,
      "training_step_time": 0.9578263759613037
    },
    {
      "epoch": 2.4224853515625e-05,
      "model_forward_time": 0.11446928977966309,
      "step": 3969
    },
    {
      "epoch": 2.4224853515625e-05,
      "step": 3969,
      "training_step_time": 0.413405179977417
    },
    {
      "epoch": 2.423095703125e-05,
      "grad_norm": 0.20404258370399475,
      "learning_rate": 9.992856187178572e-05,
      "loss": 0.1129,
      "step": 3970
    },
    {
      "epoch": 2.423095703125e-05,
      "model_forward_time": 0.11435627937316895,
      "step": 3970
    },
    {
      "epoch": 2.423095703125e-05,
      "step": 3970,
      "training_step_time": 0.42427754402160645
    },
    {
      "epoch": 2.4237060546875e-05,
      "model_forward_time": 0.1141059398651123,
      "step": 3971
    },
    {
      "epoch": 2.4237060546875e-05,
      "step": 3971,
      "training_step_time": 0.3898749351501465
    },
    {
      "epoch": 2.42431640625e-05,
      "model_forward_time": 0.11425423622131348,
      "step": 3972
    },
    {
      "epoch": 2.42431640625e-05,
      "step": 3972,
      "training_step_time": 0.38719725608825684
    },
    {
      "epoch": 2.4249267578125e-05,
      "model_forward_time": 0.11415410041809082,
      "step": 3973
    },
    {
      "epoch": 2.4249267578125e-05,
      "step": 3973,
      "training_step_time": 0.4557628631591797
    },
    {
      "epoch": 2.425537109375e-05,
      "model_forward_time": 0.11504459381103516,
      "step": 3974
    },
    {
      "epoch": 2.425537109375e-05,
      "step": 3974,
      "training_step_time": 0.6438171863555908
    },
    {
      "epoch": 2.4261474609375e-05,
      "model_forward_time": 0.11454510688781738,
      "step": 3975
    },
    {
      "epoch": 2.4261474609375e-05,
      "step": 3975,
      "training_step_time": 0.4266233444213867
    },
    {
      "epoch": 2.4267578125e-05,
      "model_forward_time": 0.1156618595123291,
      "step": 3976
    },
    {
      "epoch": 2.4267578125e-05,
      "step": 3976,
      "training_step_time": 0.47988033294677734
    },
    {
      "epoch": 2.4273681640625e-05,
      "model_forward_time": 0.11411762237548828,
      "step": 3977
    },
    {
      "epoch": 2.4273681640625e-05,
      "step": 3977,
      "training_step_time": 0.39350461959838867
    },
    {
      "epoch": 2.427978515625e-05,
      "model_forward_time": 0.11406087875366211,
      "step": 3978
    },
    {
      "epoch": 2.427978515625e-05,
      "step": 3978,
      "training_step_time": 0.38266968727111816
    },
    {
      "epoch": 2.4285888671875e-05,
      "model_forward_time": 0.11496376991271973,
      "step": 3979
    },
    {
      "epoch": 2.4285888671875e-05,
      "step": 3979,
      "training_step_time": 0.4051637649536133
    },
    {
      "epoch": 2.42919921875e-05,
      "grad_norm": 0.2059016227722168,
      "learning_rate": 9.99270816881235e-05,
      "loss": 0.1087,
      "step": 3980
    },
    {
      "epoch": 2.42919921875e-05,
      "model_forward_time": 0.11473822593688965,
      "step": 3980
    },
    {
      "epoch": 2.42919921875e-05,
      "step": 3980,
      "training_step_time": 0.5933132171630859
    },
    {
      "epoch": 2.4298095703125e-05,
      "model_forward_time": 0.1150352954864502,
      "step": 3981
    },
    {
      "epoch": 2.4298095703125e-05,
      "step": 3981,
      "training_step_time": 0.38883042335510254
    },
    {
      "epoch": 2.430419921875e-05,
      "model_forward_time": 0.11443734169006348,
      "step": 3982
    },
    {
      "epoch": 2.430419921875e-05,
      "step": 3982,
      "training_step_time": 0.38147687911987305
    },
    {
      "epoch": 2.4310302734375e-05,
      "model_forward_time": 0.11542820930480957,
      "step": 3983
    },
    {
      "epoch": 2.4310302734375e-05,
      "step": 3983,
      "training_step_time": 0.48923778533935547
    },
    {
      "epoch": 2.431640625e-05,
      "model_forward_time": 0.11525917053222656,
      "step": 3984
    },
    {
      "epoch": 2.431640625e-05,
      "step": 3984,
      "training_step_time": 0.44574904441833496
    },
    {
      "epoch": 2.4322509765625e-05,
      "model_forward_time": 0.11685895919799805,
      "step": 3985
    },
    {
      "epoch": 2.4322509765625e-05,
      "step": 3985,
      "training_step_time": 0.3896667957305908
    },
    {
      "epoch": 2.432861328125e-05,
      "model_forward_time": 0.11606812477111816,
      "step": 3986
    },
    {
      "epoch": 2.432861328125e-05,
      "step": 3986,
      "training_step_time": 0.5201621055603027
    },
    {
      "epoch": 2.4334716796875e-05,
      "model_forward_time": 0.11663532257080078,
      "step": 3987
    },
    {
      "epoch": 2.4334716796875e-05,
      "step": 3987,
      "training_step_time": 0.4832577705383301
    },
    {
      "epoch": 2.43408203125e-05,
      "model_forward_time": 0.11498522758483887,
      "step": 3988
    },
    {
      "epoch": 2.43408203125e-05,
      "step": 3988,
      "training_step_time": 0.4730055332183838
    },
    {
      "epoch": 2.4346923828125e-05,
      "model_forward_time": 0.11495280265808105,
      "step": 3989
    },
    {
      "epoch": 2.4346923828125e-05,
      "step": 3989,
      "training_step_time": 0.3865540027618408
    },
    {
      "epoch": 2.435302734375e-05,
      "grad_norm": 0.29679444432258606,
      "learning_rate": 9.992558633793212e-05,
      "loss": 0.1093,
      "step": 3990
    },
    {
      "epoch": 2.435302734375e-05,
      "model_forward_time": 0.11538267135620117,
      "step": 3990
    },
    {
      "epoch": 2.435302734375e-05,
      "step": 3990,
      "training_step_time": 0.4806838035583496
    },
    {
      "epoch": 2.4359130859375e-05,
      "model_forward_time": 0.11453461647033691,
      "step": 3991
    },
    {
      "epoch": 2.4359130859375e-05,
      "step": 3991,
      "training_step_time": 0.3891730308532715
    },
    {
      "epoch": 2.4365234375e-05,
      "model_forward_time": 0.11494255065917969,
      "step": 3992
    },
    {
      "epoch": 2.4365234375e-05,
      "step": 3992,
      "training_step_time": 0.39249682426452637
    },
    {
      "epoch": 2.4371337890625e-05,
      "model_forward_time": 0.11661362648010254,
      "step": 3993
    },
    {
      "epoch": 2.4371337890625e-05,
      "step": 3993,
      "training_step_time": 0.3891892433166504
    },
    {
      "epoch": 2.437744140625e-05,
      "model_forward_time": 0.11793398857116699,
      "step": 3994
    },
    {
      "epoch": 2.437744140625e-05,
      "step": 3994,
      "training_step_time": 0.40186166763305664
    },
    {
      "epoch": 2.4383544921875e-05,
      "model_forward_time": 0.11532735824584961,
      "step": 3995
    },
    {
      "epoch": 2.4383544921875e-05,
      "step": 3995,
      "training_step_time": 0.39919304847717285
    },
    {
      "epoch": 2.43896484375e-05,
      "model_forward_time": 0.11757755279541016,
      "step": 3996
    },
    {
      "epoch": 2.43896484375e-05,
      "step": 3996,
      "training_step_time": 0.40117573738098145
    },
    {
      "epoch": 2.4395751953125e-05,
      "model_forward_time": 0.11589360237121582,
      "step": 3997
    },
    {
      "epoch": 2.4395751953125e-05,
      "step": 3997,
      "training_step_time": 0.48740363121032715
    },
    {
      "epoch": 2.440185546875e-05,
      "model_forward_time": 0.11554527282714844,
      "step": 3998
    },
    {
      "epoch": 2.440185546875e-05,
      "step": 3998,
      "training_step_time": 0.6029667854309082
    },
    {
      "epoch": 2.4407958984375e-05,
      "model_forward_time": 0.11478161811828613,
      "step": 3999
    },
    {
      "epoch": 2.4407958984375e-05,
      "step": 3999,
      "training_step_time": 0.39434814453125
    },
    {
      "epoch": 2.44140625e-05,
      "grad_norm": 0.3520660996437073,
      "learning_rate": 9.992407582166581e-05,
      "loss": 0.1132,
      "step": 4000
    },
    {
      "epoch": 2.44140625e-05,
      "model_forward_time": 0.11400055885314941,
      "step": 4000
    },
    {
      "epoch": 2.44140625e-05,
      "step": 4000,
      "training_step_time": 0.3553802967071533
    },
    {
      "epoch": 2.4420166015625e-05,
      "model_forward_time": 0.11290526390075684,
      "step": 4001
    },
    {
      "epoch": 2.4420166015625e-05,
      "step": 4001,
      "training_step_time": 0.39650487899780273
    },
    {
      "epoch": 2.442626953125e-05,
      "model_forward_time": 0.1139380931854248,
      "step": 4002
    },
    {
      "epoch": 2.442626953125e-05,
      "step": 4002,
      "training_step_time": 0.3794865608215332
    },
    {
      "epoch": 2.4432373046875e-05,
      "model_forward_time": 0.11373591423034668,
      "step": 4003
    },
    {
      "epoch": 2.4432373046875e-05,
      "step": 4003,
      "training_step_time": 0.39151906967163086
    },
    {
      "epoch": 2.44384765625e-05,
      "model_forward_time": 0.11410355567932129,
      "step": 4004
    },
    {
      "epoch": 2.44384765625e-05,
      "step": 4004,
      "training_step_time": 0.4023425579071045
    },
    {
      "epoch": 2.4444580078125e-05,
      "model_forward_time": 0.11407732963562012,
      "step": 4005
    },
    {
      "epoch": 2.4444580078125e-05,
      "step": 4005,
      "training_step_time": 0.3675994873046875
    },
    {
      "epoch": 2.445068359375e-05,
      "model_forward_time": 0.11417484283447266,
      "step": 4006
    },
    {
      "epoch": 2.445068359375e-05,
      "step": 4006,
      "training_step_time": 0.4295051097869873
    },
    {
      "epoch": 2.4456787109375e-05,
      "model_forward_time": 0.1147618293762207,
      "step": 4007
    },
    {
      "epoch": 2.4456787109375e-05,
      "step": 4007,
      "training_step_time": 0.42430710792541504
    },
    {
      "epoch": 2.4462890625e-05,
      "model_forward_time": 0.11463236808776855,
      "step": 4008
    },
    {
      "epoch": 2.4462890625e-05,
      "step": 4008,
      "training_step_time": 0.40581202507019043
    },
    {
      "epoch": 2.4468994140625e-05,
      "model_forward_time": 0.11552619934082031,
      "step": 4009
    },
    {
      "epoch": 2.4468994140625e-05,
      "step": 4009,
      "training_step_time": 0.3894765377044678
    },
    {
      "epoch": 2.447509765625e-05,
      "grad_norm": 0.36294281482696533,
      "learning_rate": 9.992255013978344e-05,
      "loss": 0.1076,
      "step": 4010
    },
    {
      "epoch": 2.447509765625e-05,
      "model_forward_time": 0.11500024795532227,
      "step": 4010
    },
    {
      "epoch": 2.447509765625e-05,
      "step": 4010,
      "training_step_time": 0.3948638439178467
    },
    {
      "epoch": 2.4481201171875e-05,
      "model_forward_time": 0.11532163619995117,
      "step": 4011
    },
    {
      "epoch": 2.4481201171875e-05,
      "step": 4011,
      "training_step_time": 0.39192819595336914
    },
    {
      "epoch": 2.44873046875e-05,
      "model_forward_time": 0.11490011215209961,
      "step": 4012
    },
    {
      "epoch": 2.44873046875e-05,
      "step": 4012,
      "training_step_time": 0.4216763973236084
    },
    {
      "epoch": 2.4493408203125e-05,
      "model_forward_time": 0.11453056335449219,
      "step": 4013
    },
    {
      "epoch": 2.4493408203125e-05,
      "step": 4013,
      "training_step_time": 0.4253857135772705
    },
    {
      "epoch": 2.449951171875e-05,
      "model_forward_time": 0.11557459831237793,
      "step": 4014
    },
    {
      "epoch": 2.449951171875e-05,
      "step": 4014,
      "training_step_time": 0.43023014068603516
    },
    {
      "epoch": 2.4505615234375e-05,
      "model_forward_time": 0.11549925804138184,
      "step": 4015
    },
    {
      "epoch": 2.4505615234375e-05,
      "step": 4015,
      "training_step_time": 0.4402310848236084
    },
    {
      "epoch": 2.451171875e-05,
      "model_forward_time": 0.11531972885131836,
      "step": 4016
    },
    {
      "epoch": 2.451171875e-05,
      "step": 4016,
      "training_step_time": 0.47550487518310547
    },
    {
      "epoch": 2.4517822265625e-05,
      "model_forward_time": 0.11467838287353516,
      "step": 4017
    },
    {
      "epoch": 2.4517822265625e-05,
      "step": 4017,
      "training_step_time": 0.3993499279022217
    },
    {
      "epoch": 2.452392578125e-05,
      "model_forward_time": 0.11518287658691406,
      "step": 4018
    },
    {
      "epoch": 2.452392578125e-05,
      "step": 4018,
      "training_step_time": 0.40544819831848145
    },
    {
      "epoch": 2.4530029296875e-05,
      "model_forward_time": 0.11520218849182129,
      "step": 4019
    },
    {
      "epoch": 2.4530029296875e-05,
      "step": 4019,
      "training_step_time": 0.42414355278015137
    },
    {
      "epoch": 2.45361328125e-05,
      "grad_norm": 0.24916695058345795,
      "learning_rate": 9.992100929274846e-05,
      "loss": 0.109,
      "step": 4020
    },
    {
      "epoch": 2.45361328125e-05,
      "model_forward_time": 0.11460232734680176,
      "step": 4020
    },
    {
      "epoch": 2.45361328125e-05,
      "step": 4020,
      "training_step_time": 0.5104715824127197
    },
    {
      "epoch": 2.4542236328125e-05,
      "model_forward_time": 0.11482048034667969,
      "step": 4021
    },
    {
      "epoch": 2.4542236328125e-05,
      "step": 4021,
      "training_step_time": 0.46256113052368164
    },
    {
      "epoch": 2.454833984375e-05,
      "model_forward_time": 0.11591577529907227,
      "step": 4022
    },
    {
      "epoch": 2.454833984375e-05,
      "step": 4022,
      "training_step_time": 0.4972827434539795
    },
    {
      "epoch": 2.4554443359375e-05,
      "model_forward_time": 0.11454033851623535,
      "step": 4023
    },
    {
      "epoch": 2.4554443359375e-05,
      "step": 4023,
      "training_step_time": 0.38896679878234863
    },
    {
      "epoch": 2.4560546875e-05,
      "model_forward_time": 0.11479997634887695,
      "step": 4024
    },
    {
      "epoch": 2.4560546875e-05,
      "step": 4024,
      "training_step_time": 0.3962078094482422
    },
    {
      "epoch": 2.4566650390625e-05,
      "model_forward_time": 0.1148982048034668,
      "step": 4025
    },
    {
      "epoch": 2.4566650390625e-05,
      "step": 4025,
      "training_step_time": 0.38354921340942383
    },
    {
      "epoch": 2.457275390625e-05,
      "model_forward_time": 0.11506175994873047,
      "step": 4026
    },
    {
      "epoch": 2.457275390625e-05,
      "step": 4026,
      "training_step_time": 0.434802770614624
    },
    {
      "epoch": 2.4578857421875e-05,
      "model_forward_time": 0.11466026306152344,
      "step": 4027
    },
    {
      "epoch": 2.4578857421875e-05,
      "step": 4027,
      "training_step_time": 0.40856456756591797
    },
    {
      "epoch": 2.45849609375e-05,
      "model_forward_time": 0.11459875106811523,
      "step": 4028
    },
    {
      "epoch": 2.45849609375e-05,
      "step": 4028,
      "training_step_time": 0.4276547431945801
    },
    {
      "epoch": 2.4591064453125e-05,
      "model_forward_time": 0.11546683311462402,
      "step": 4029
    },
    {
      "epoch": 2.4591064453125e-05,
      "step": 4029,
      "training_step_time": 0.5012056827545166
    },
    {
      "epoch": 2.459716796875e-05,
      "grad_norm": 0.2697557806968689,
      "learning_rate": 9.991945328102897e-05,
      "loss": 0.1173,
      "step": 4030
    },
    {
      "epoch": 2.459716796875e-05,
      "model_forward_time": 0.11448121070861816,
      "step": 4030
    },
    {
      "epoch": 2.459716796875e-05,
      "step": 4030,
      "training_step_time": 0.46100902557373047
    },
    {
      "epoch": 2.4603271484375e-05,
      "model_forward_time": 0.11525344848632812,
      "step": 4031
    },
    {
      "epoch": 2.4603271484375e-05,
      "step": 4031,
      "training_step_time": 0.39455294609069824
    },
    {
      "epoch": 2.4609375e-05,
      "model_forward_time": 0.11447930335998535,
      "step": 4032
    },
    {
      "epoch": 2.4609375e-05,
      "step": 4032,
      "training_step_time": 0.48316407203674316
    },
    {
      "epoch": 2.4615478515625e-05,
      "model_forward_time": 0.11507201194763184,
      "step": 4033
    },
    {
      "epoch": 2.4615478515625e-05,
      "step": 4033,
      "training_step_time": 0.4417738914489746
    },
    {
      "epoch": 2.462158203125e-05,
      "model_forward_time": 0.11499977111816406,
      "step": 4034
    },
    {
      "epoch": 2.462158203125e-05,
      "step": 4034,
      "training_step_time": 0.36606860160827637
    },
    {
      "epoch": 2.4627685546875e-05,
      "model_forward_time": 0.11440253257751465,
      "step": 4035
    },
    {
      "epoch": 2.4627685546875e-05,
      "step": 4035,
      "training_step_time": 0.40033602714538574
    },
    {
      "epoch": 2.46337890625e-05,
      "model_forward_time": 0.1154792308807373,
      "step": 4036
    },
    {
      "epoch": 2.46337890625e-05,
      "step": 4036,
      "training_step_time": 0.416287899017334
    },
    {
      "epoch": 2.4639892578125e-05,
      "model_forward_time": 0.11546874046325684,
      "step": 4037
    },
    {
      "epoch": 2.4639892578125e-05,
      "step": 4037,
      "training_step_time": 0.4216735363006592
    },
    {
      "epoch": 2.464599609375e-05,
      "model_forward_time": 0.13011741638183594,
      "step": 4038
    },
    {
      "epoch": 2.464599609375e-05,
      "step": 4038,
      "training_step_time": 0.3969283103942871
    },
    {
      "epoch": 2.4652099609375e-05,
      "model_forward_time": 0.11479854583740234,
      "step": 4039
    },
    {
      "epoch": 2.4652099609375e-05,
      "step": 4039,
      "training_step_time": 0.4143798351287842
    },
    {
      "epoch": 2.4658203125e-05,
      "grad_norm": 0.21852846443653107,
      "learning_rate": 9.991788210509758e-05,
      "loss": 0.11,
      "step": 4040
    },
    {
      "epoch": 2.4658203125e-05,
      "model_forward_time": 0.11527681350708008,
      "step": 4040
    },
    {
      "epoch": 2.4658203125e-05,
      "step": 4040,
      "training_step_time": 0.4634740352630615
    },
    {
      "epoch": 2.4664306640625e-05,
      "model_forward_time": 0.11524629592895508,
      "step": 4041
    },
    {
      "epoch": 2.4664306640625e-05,
      "step": 4041,
      "training_step_time": 0.3987746238708496
    },
    {
      "epoch": 2.467041015625e-05,
      "model_forward_time": 0.11460089683532715,
      "step": 4042
    },
    {
      "epoch": 2.467041015625e-05,
      "step": 4042,
      "training_step_time": 0.3977622985839844
    },
    {
      "epoch": 2.4676513671875e-05,
      "model_forward_time": 0.11582207679748535,
      "step": 4043
    },
    {
      "epoch": 2.4676513671875e-05,
      "step": 4043,
      "training_step_time": 0.4838137626647949
    },
    {
      "epoch": 2.46826171875e-05,
      "model_forward_time": 0.11462521553039551,
      "step": 4044
    },
    {
      "epoch": 2.46826171875e-05,
      "step": 4044,
      "training_step_time": 0.4131443500518799
    },
    {
      "epoch": 2.4688720703125e-05,
      "model_forward_time": 0.11587715148925781,
      "step": 4045
    },
    {
      "epoch": 2.4688720703125e-05,
      "step": 4045,
      "training_step_time": 0.4275093078613281
    },
    {
      "epoch": 2.469482421875e-05,
      "model_forward_time": 0.11507654190063477,
      "step": 4046
    },
    {
      "epoch": 2.469482421875e-05,
      "step": 4046,
      "training_step_time": 0.39574146270751953
    },
    {
      "epoch": 2.4700927734375e-05,
      "model_forward_time": 0.1148688793182373,
      "step": 4047
    },
    {
      "epoch": 2.4700927734375e-05,
      "step": 4047,
      "training_step_time": 0.4226493835449219
    },
    {
      "epoch": 2.470703125e-05,
      "model_forward_time": 0.11507987976074219,
      "step": 4048
    },
    {
      "epoch": 2.470703125e-05,
      "step": 4048,
      "training_step_time": 0.4259982109069824
    },
    {
      "epoch": 2.4713134765625e-05,
      "model_forward_time": 0.1154024600982666,
      "step": 4049
    },
    {
      "epoch": 2.4713134765625e-05,
      "step": 4049,
      "training_step_time": 0.4958813190460205
    },
    {
      "epoch": 2.471923828125e-05,
      "grad_norm": 0.17924758791923523,
      "learning_rate": 9.991629576543163e-05,
      "loss": 0.105,
      "step": 4050
    },
    {
      "epoch": 2.471923828125e-05,
      "model_forward_time": 0.11493110656738281,
      "step": 4050
    },
    {
      "epoch": 2.471923828125e-05,
      "step": 4050,
      "training_step_time": 0.41924309730529785
    },
    {
      "epoch": 2.4725341796875e-05,
      "model_forward_time": 0.11480712890625,
      "step": 4051
    },
    {
      "epoch": 2.4725341796875e-05,
      "step": 4051,
      "training_step_time": 0.42371273040771484
    },
    {
      "epoch": 2.47314453125e-05,
      "model_forward_time": 0.11567854881286621,
      "step": 4052
    },
    {
      "epoch": 2.47314453125e-05,
      "step": 4052,
      "training_step_time": 0.6010987758636475
    },
    {
      "epoch": 2.4737548828125e-05,
      "model_forward_time": 0.11454391479492188,
      "step": 4053
    },
    {
      "epoch": 2.4737548828125e-05,
      "step": 4053,
      "training_step_time": 0.40354037284851074
    },
    {
      "epoch": 2.474365234375e-05,
      "model_forward_time": 0.11510539054870605,
      "step": 4054
    },
    {
      "epoch": 2.474365234375e-05,
      "step": 4054,
      "training_step_time": 0.3912532329559326
    },
    {
      "epoch": 2.4749755859375e-05,
      "model_forward_time": 0.11458945274353027,
      "step": 4055
    },
    {
      "epoch": 2.4749755859375e-05,
      "step": 4055,
      "training_step_time": 0.3895909786224365
    },
    {
      "epoch": 2.4755859375e-05,
      "model_forward_time": 0.1146397590637207,
      "step": 4056
    },
    {
      "epoch": 2.4755859375e-05,
      "step": 4056,
      "training_step_time": 0.3864138126373291
    },
    {
      "epoch": 2.4761962890625e-05,
      "model_forward_time": 0.1155238151550293,
      "step": 4057
    },
    {
      "epoch": 2.4761962890625e-05,
      "step": 4057,
      "training_step_time": 0.45896220207214355
    },
    {
      "epoch": 2.476806640625e-05,
      "model_forward_time": 0.11550545692443848,
      "step": 4058
    },
    {
      "epoch": 2.476806640625e-05,
      "step": 4058,
      "training_step_time": 0.48325014114379883
    },
    {
      "epoch": 2.4774169921875e-05,
      "model_forward_time": 0.11481547355651855,
      "step": 4059
    },
    {
      "epoch": 2.4774169921875e-05,
      "step": 4059,
      "training_step_time": 0.4612410068511963
    },
    {
      "epoch": 2.47802734375e-05,
      "grad_norm": 0.2595427632331848,
      "learning_rate": 9.9914694262513e-05,
      "loss": 0.1129,
      "step": 4060
    },
    {
      "epoch": 2.47802734375e-05,
      "model_forward_time": 0.11443352699279785,
      "step": 4060
    },
    {
      "epoch": 2.47802734375e-05,
      "step": 4060,
      "training_step_time": 0.38976097106933594
    },
    {
      "epoch": 2.4786376953125e-05,
      "model_forward_time": 0.11564278602600098,
      "step": 4061
    },
    {
      "epoch": 2.4786376953125e-05,
      "step": 4061,
      "training_step_time": 0.4429605007171631
    },
    {
      "epoch": 2.479248046875e-05,
      "model_forward_time": 0.1148841381072998,
      "step": 4062
    },
    {
      "epoch": 2.479248046875e-05,
      "step": 4062,
      "training_step_time": 0.4821479320526123
    },
    {
      "epoch": 2.4798583984375e-05,
      "model_forward_time": 0.11467933654785156,
      "step": 4063
    },
    {
      "epoch": 2.4798583984375e-05,
      "step": 4063,
      "training_step_time": 0.4405782222747803
    },
    {
      "epoch": 2.48046875e-05,
      "model_forward_time": 0.11508560180664062,
      "step": 4064
    },
    {
      "epoch": 2.48046875e-05,
      "step": 4064,
      "training_step_time": 0.4578220844268799
    },
    {
      "epoch": 2.4810791015625e-05,
      "model_forward_time": 0.11427927017211914,
      "step": 4065
    },
    {
      "epoch": 2.4810791015625e-05,
      "step": 4065,
      "training_step_time": 0.43316197395324707
    },
    {
      "epoch": 2.481689453125e-05,
      "model_forward_time": 0.11442708969116211,
      "step": 4066
    },
    {
      "epoch": 2.481689453125e-05,
      "step": 4066,
      "training_step_time": 0.43047404289245605
    },
    {
      "epoch": 2.4822998046875e-05,
      "model_forward_time": 0.11429357528686523,
      "step": 4067
    },
    {
      "epoch": 2.4822998046875e-05,
      "step": 4067,
      "training_step_time": 0.42257142066955566
    },
    {
      "epoch": 2.48291015625e-05,
      "model_forward_time": 0.11403179168701172,
      "step": 4068
    },
    {
      "epoch": 2.48291015625e-05,
      "step": 4068,
      "training_step_time": 0.39387011528015137
    },
    {
      "epoch": 2.4835205078125e-05,
      "model_forward_time": 0.11455774307250977,
      "step": 4069
    },
    {
      "epoch": 2.4835205078125e-05,
      "step": 4069,
      "training_step_time": 0.39316487312316895
    },
    {
      "epoch": 2.484130859375e-05,
      "grad_norm": 0.22363148629665375,
      "learning_rate": 9.991307759682815e-05,
      "loss": 0.1097,
      "step": 4070
    },
    {
      "epoch": 2.484130859375e-05,
      "model_forward_time": 0.11522531509399414,
      "step": 4070
    },
    {
      "epoch": 2.484130859375e-05,
      "step": 4070,
      "training_step_time": 0.3920862674713135
    },
    {
      "epoch": 2.4847412109375e-05,
      "model_forward_time": 0.11510753631591797,
      "step": 4071
    },
    {
      "epoch": 2.4847412109375e-05,
      "step": 4071,
      "training_step_time": 0.4054558277130127
    },
    {
      "epoch": 2.4853515625e-05,
      "model_forward_time": 0.11509180068969727,
      "step": 4072
    },
    {
      "epoch": 2.4853515625e-05,
      "step": 4072,
      "training_step_time": 0.44165802001953125
    },
    {
      "epoch": 2.4859619140625e-05,
      "model_forward_time": 0.11493539810180664,
      "step": 4073
    },
    {
      "epoch": 2.4859619140625e-05,
      "step": 4073,
      "training_step_time": 0.3978004455566406
    },
    {
      "epoch": 2.486572265625e-05,
      "model_forward_time": 0.11562490463256836,
      "step": 4074
    },
    {
      "epoch": 2.486572265625e-05,
      "step": 4074,
      "training_step_time": 0.5102264881134033
    },
    {
      "epoch": 2.4871826171875e-05,
      "model_forward_time": 0.11481690406799316,
      "step": 4075
    },
    {
      "epoch": 2.4871826171875e-05,
      "step": 4075,
      "training_step_time": 0.4025390148162842
    },
    {
      "epoch": 2.48779296875e-05,
      "model_forward_time": 0.11485862731933594,
      "step": 4076
    },
    {
      "epoch": 2.48779296875e-05,
      "step": 4076,
      "training_step_time": 0.410778284072876
    },
    {
      "epoch": 2.4884033203125e-05,
      "model_forward_time": 0.11484456062316895,
      "step": 4077
    },
    {
      "epoch": 2.4884033203125e-05,
      "step": 4077,
      "training_step_time": 0.39603304862976074
    },
    {
      "epoch": 2.489013671875e-05,
      "model_forward_time": 0.11533737182617188,
      "step": 4078
    },
    {
      "epoch": 2.489013671875e-05,
      "step": 4078,
      "training_step_time": 0.43450355529785156
    },
    {
      "epoch": 2.4896240234375e-05,
      "model_forward_time": 0.11474919319152832,
      "step": 4079
    },
    {
      "epoch": 2.4896240234375e-05,
      "step": 4079,
      "training_step_time": 0.46724367141723633
    },
    {
      "epoch": 2.490234375e-05,
      "grad_norm": 0.2301221489906311,
      "learning_rate": 9.991144576886823e-05,
      "loss": 0.1063,
      "step": 4080
    },
    {
      "epoch": 2.490234375e-05,
      "model_forward_time": 0.11512899398803711,
      "step": 4080
    },
    {
      "epoch": 2.490234375e-05,
      "step": 4080,
      "training_step_time": 0.44777989387512207
    },
    {
      "epoch": 2.4908447265625e-05,
      "model_forward_time": 0.11533474922180176,
      "step": 4081
    },
    {
      "epoch": 2.4908447265625e-05,
      "step": 4081,
      "training_step_time": 0.39101171493530273
    },
    {
      "epoch": 2.491455078125e-05,
      "model_forward_time": 0.11574769020080566,
      "step": 4082
    },
    {
      "epoch": 2.491455078125e-05,
      "step": 4082,
      "training_step_time": 0.3900306224822998
    },
    {
      "epoch": 2.4920654296875e-05,
      "model_forward_time": 0.11525440216064453,
      "step": 4083
    },
    {
      "epoch": 2.4920654296875e-05,
      "step": 4083,
      "training_step_time": 0.396343469619751
    },
    {
      "epoch": 2.49267578125e-05,
      "model_forward_time": 0.1149439811706543,
      "step": 4084
    },
    {
      "epoch": 2.49267578125e-05,
      "step": 4084,
      "training_step_time": 0.3996591567993164
    },
    {
      "epoch": 2.4932861328125e-05,
      "model_forward_time": 0.11530208587646484,
      "step": 4085
    },
    {
      "epoch": 2.4932861328125e-05,
      "step": 4085,
      "training_step_time": 0.39772748947143555
    },
    {
      "epoch": 2.493896484375e-05,
      "model_forward_time": 0.11554551124572754,
      "step": 4086
    },
    {
      "epoch": 2.493896484375e-05,
      "step": 4086,
      "training_step_time": 0.47133708000183105
    },
    {
      "epoch": 2.4945068359375e-05,
      "model_forward_time": 0.11560726165771484,
      "step": 4087
    },
    {
      "epoch": 2.4945068359375e-05,
      "step": 4087,
      "training_step_time": 0.5099227428436279
    },
    {
      "epoch": 2.4951171875e-05,
      "model_forward_time": 0.11529159545898438,
      "step": 4088
    },
    {
      "epoch": 2.4951171875e-05,
      "step": 4088,
      "training_step_time": 0.42475056648254395
    },
    {
      "epoch": 2.4957275390625e-05,
      "model_forward_time": 0.11450695991516113,
      "step": 4089
    },
    {
      "epoch": 2.4957275390625e-05,
      "step": 4089,
      "training_step_time": 0.3933696746826172
    },
    {
      "epoch": 2.496337890625e-05,
      "grad_norm": 0.21253716945648193,
      "learning_rate": 9.990979877912891e-05,
      "loss": 0.1127,
      "step": 4090
    },
    {
      "epoch": 2.496337890625e-05,
      "model_forward_time": 0.11524462699890137,
      "step": 4090
    },
    {
      "epoch": 2.496337890625e-05,
      "step": 4090,
      "training_step_time": 0.3842954635620117
    },
    {
      "epoch": 2.4969482421875e-05,
      "model_forward_time": 0.11493802070617676,
      "step": 4091
    },
    {
      "epoch": 2.4969482421875e-05,
      "step": 4091,
      "training_step_time": 0.43097686767578125
    },
    {
      "epoch": 2.49755859375e-05,
      "model_forward_time": 0.11490035057067871,
      "step": 4092
    },
    {
      "epoch": 2.49755859375e-05,
      "step": 4092,
      "training_step_time": 0.4049406051635742
    },
    {
      "epoch": 2.4981689453125e-05,
      "model_forward_time": 0.11631989479064941,
      "step": 4093
    },
    {
      "epoch": 2.4981689453125e-05,
      "step": 4093,
      "training_step_time": 0.4326460361480713
    },
    {
      "epoch": 2.498779296875e-05,
      "model_forward_time": 0.11568522453308105,
      "step": 4094
    },
    {
      "epoch": 2.498779296875e-05,
      "step": 4094,
      "training_step_time": 0.5764000415802002
    },
    {
      "epoch": 2.4993896484375e-05,
      "model_forward_time": 0.11407709121704102,
      "step": 4095
    },
    {
      "epoch": 2.4993896484375e-05,
      "step": 4095,
      "training_step_time": 0.39237356185913086
    },
    {
      "epoch": 2.5e-05,
      "model_forward_time": 0.1150364875793457,
      "step": 4096
    },
    {
      "epoch": 2.5e-05,
      "step": 4096,
      "training_step_time": 0.3899204730987549
    },
    {
      "epoch": 2.5006103515625e-05,
      "model_forward_time": 0.11519932746887207,
      "step": 4097
    },
    {
      "epoch": 2.5006103515625e-05,
      "step": 4097,
      "training_step_time": 0.3901937007904053
    },
    {
      "epoch": 2.501220703125e-05,
      "model_forward_time": 0.1153264045715332,
      "step": 4098
    },
    {
      "epoch": 2.501220703125e-05,
      "step": 4098,
      "training_step_time": 0.39629173278808594
    },
    {
      "epoch": 2.5018310546875e-05,
      "model_forward_time": 0.11499500274658203,
      "step": 4099
    },
    {
      "epoch": 2.5018310546875e-05,
      "step": 4099,
      "training_step_time": 0.39493632316589355
    },
    {
      "epoch": 2.50244140625e-05,
      "grad_norm": 0.3338877558708191,
      "learning_rate": 9.990813662811051e-05,
      "loss": 0.1165,
      "step": 4100
    },
    {
      "epoch": 2.50244140625e-05,
      "model_forward_time": 0.11562967300415039,
      "step": 4100
    },
    {
      "epoch": 2.50244140625e-05,
      "step": 4100,
      "training_step_time": 0.6359715461730957
    },
    {
      "epoch": 2.5030517578125e-05,
      "model_forward_time": 0.11484313011169434,
      "step": 4101
    },
    {
      "epoch": 2.5030517578125e-05,
      "step": 4101,
      "training_step_time": 0.3972456455230713
    },
    {
      "epoch": 2.503662109375e-05,
      "model_forward_time": 0.11444520950317383,
      "step": 4102
    },
    {
      "epoch": 2.503662109375e-05,
      "step": 4102,
      "training_step_time": 0.38584089279174805
    },
    {
      "epoch": 2.5042724609375e-05,
      "model_forward_time": 0.1158301830291748,
      "step": 4103
    },
    {
      "epoch": 2.5042724609375e-05,
      "step": 4103,
      "training_step_time": 0.4012417793273926
    },
    {
      "epoch": 2.5048828125e-05,
      "model_forward_time": 0.11481237411499023,
      "step": 4104
    },
    {
      "epoch": 2.5048828125e-05,
      "step": 4104,
      "training_step_time": 0.3894472122192383
    },
    {
      "epoch": 2.5054931640625e-05,
      "model_forward_time": 0.11644911766052246,
      "step": 4105
    },
    {
      "epoch": 2.5054931640625e-05,
      "step": 4105,
      "training_step_time": 0.43804216384887695
    },
    {
      "epoch": 2.506103515625e-05,
      "model_forward_time": 0.11474990844726562,
      "step": 4106
    },
    {
      "epoch": 2.506103515625e-05,
      "step": 4106,
      "training_step_time": 0.8395447731018066
    },
    {
      "epoch": 2.5067138671875e-05,
      "model_forward_time": 0.11391592025756836,
      "step": 4107
    },
    {
      "epoch": 2.5067138671875e-05,
      "step": 4107,
      "training_step_time": 0.4325559139251709
    },
    {
      "epoch": 2.50732421875e-05,
      "model_forward_time": 0.11425280570983887,
      "step": 4108
    },
    {
      "epoch": 2.50732421875e-05,
      "step": 4108,
      "training_step_time": 0.4212913513183594
    },
    {
      "epoch": 2.5079345703125e-05,
      "model_forward_time": 0.11471700668334961,
      "step": 4109
    },
    {
      "epoch": 2.5079345703125e-05,
      "step": 4109,
      "training_step_time": 0.3898956775665283
    },
    {
      "epoch": 2.508544921875e-05,
      "grad_norm": 0.419734925031662,
      "learning_rate": 9.990645931631796e-05,
      "loss": 0.1115,
      "step": 4110
    },
    {
      "epoch": 2.508544921875e-05,
      "model_forward_time": 0.11391067504882812,
      "step": 4110
    },
    {
      "epoch": 2.508544921875e-05,
      "step": 4110,
      "training_step_time": 0.3871445655822754
    },
    {
      "epoch": 2.5091552734375e-05,
      "model_forward_time": 0.11474800109863281,
      "step": 4111
    },
    {
      "epoch": 2.5091552734375e-05,
      "step": 4111,
      "training_step_time": 0.3860945701599121
    },
    {
      "epoch": 2.509765625e-05,
      "model_forward_time": 0.11624264717102051,
      "step": 4112
    },
    {
      "epoch": 2.509765625e-05,
      "step": 4112,
      "training_step_time": 0.5924310684204102
    },
    {
      "epoch": 2.5103759765625e-05,
      "model_forward_time": 0.11556005477905273,
      "step": 4113
    },
    {
      "epoch": 2.5103759765625e-05,
      "step": 4113,
      "training_step_time": 0.3951752185821533
    },
    {
      "epoch": 2.510986328125e-05,
      "model_forward_time": 0.11437487602233887,
      "step": 4114
    },
    {
      "epoch": 2.510986328125e-05,
      "step": 4114,
      "training_step_time": 0.49407148361206055
    },
    {
      "epoch": 2.5115966796875e-05,
      "model_forward_time": 0.11526799201965332,
      "step": 4115
    },
    {
      "epoch": 2.5115966796875e-05,
      "step": 4115,
      "training_step_time": 0.4826626777648926
    },
    {
      "epoch": 2.51220703125e-05,
      "model_forward_time": 0.11410641670227051,
      "step": 4116
    },
    {
      "epoch": 2.51220703125e-05,
      "step": 4116,
      "training_step_time": 0.4847543239593506
    },
    {
      "epoch": 2.5128173828125e-05,
      "model_forward_time": 0.11381864547729492,
      "step": 4117
    },
    {
      "epoch": 2.5128173828125e-05,
      "step": 4117,
      "training_step_time": 0.39249348640441895
    },
    {
      "epoch": 2.513427734375e-05,
      "model_forward_time": 0.11526679992675781,
      "step": 4118
    },
    {
      "epoch": 2.513427734375e-05,
      "step": 4118,
      "training_step_time": 0.689577579498291
    },
    {
      "epoch": 2.5140380859375e-05,
      "model_forward_time": 0.11460041999816895,
      "step": 4119
    },
    {
      "epoch": 2.5140380859375e-05,
      "step": 4119,
      "training_step_time": 0.404247522354126
    },
    {
      "epoch": 2.5146484375e-05,
      "grad_norm": 0.3762415647506714,
      "learning_rate": 9.990476684426075e-05,
      "loss": 0.1084,
      "step": 4120
    },
    {
      "epoch": 2.5146484375e-05,
      "model_forward_time": 0.11480283737182617,
      "step": 4120
    },
    {
      "epoch": 2.5146484375e-05,
      "step": 4120,
      "training_step_time": 0.47269248962402344
    },
    {
      "epoch": 2.5152587890625e-05,
      "model_forward_time": 0.11545181274414062,
      "step": 4121
    },
    {
      "epoch": 2.5152587890625e-05,
      "step": 4121,
      "training_step_time": 0.391573429107666
    },
    {
      "epoch": 2.515869140625e-05,
      "model_forward_time": 0.11493444442749023,
      "step": 4122
    },
    {
      "epoch": 2.515869140625e-05,
      "step": 4122,
      "training_step_time": 0.49141550064086914
    },
    {
      "epoch": 2.5164794921875e-05,
      "model_forward_time": 0.11475586891174316,
      "step": 4123
    },
    {
      "epoch": 2.5164794921875e-05,
      "step": 4123,
      "training_step_time": 0.3875389099121094
    },
    {
      "epoch": 2.51708984375e-05,
      "model_forward_time": 0.11510491371154785,
      "step": 4124
    },
    {
      "epoch": 2.51708984375e-05,
      "step": 4124,
      "training_step_time": 0.683830976486206
    },
    {
      "epoch": 2.5177001953125e-05,
      "model_forward_time": 0.11466145515441895,
      "step": 4125
    },
    {
      "epoch": 2.5177001953125e-05,
      "step": 4125,
      "training_step_time": 0.38904857635498047
    },
    {
      "epoch": 2.518310546875e-05,
      "model_forward_time": 0.11499214172363281,
      "step": 4126
    },
    {
      "epoch": 2.518310546875e-05,
      "step": 4126,
      "training_step_time": 0.39043569564819336
    },
    {
      "epoch": 2.5189208984375e-05,
      "model_forward_time": 0.11480283737182617,
      "step": 4127
    },
    {
      "epoch": 2.5189208984375e-05,
      "step": 4127,
      "training_step_time": 0.3832681179046631
    },
    {
      "epoch": 2.51953125e-05,
      "model_forward_time": 0.11534595489501953,
      "step": 4128
    },
    {
      "epoch": 2.51953125e-05,
      "step": 4128,
      "training_step_time": 0.40206480026245117
    },
    {
      "epoch": 2.5201416015625e-05,
      "model_forward_time": 0.11487174034118652,
      "step": 4129
    },
    {
      "epoch": 2.5201416015625e-05,
      "step": 4129,
      "training_step_time": 0.4041013717651367
    },
    {
      "epoch": 2.520751953125e-05,
      "grad_norm": 0.2985150218009949,
      "learning_rate": 9.990305921245306e-05,
      "loss": 0.1099,
      "step": 4130
    },
    {
      "epoch": 2.520751953125e-05,
      "model_forward_time": 0.11528301239013672,
      "step": 4130
    },
    {
      "epoch": 2.520751953125e-05,
      "step": 4130,
      "training_step_time": 0.5427849292755127
    },
    {
      "epoch": 2.5213623046875e-05,
      "model_forward_time": 0.11565375328063965,
      "step": 4131
    },
    {
      "epoch": 2.5213623046875e-05,
      "step": 4131,
      "training_step_time": 0.4518890380859375
    },
    {
      "epoch": 2.52197265625e-05,
      "model_forward_time": 0.11518025398254395,
      "step": 4132
    },
    {
      "epoch": 2.52197265625e-05,
      "step": 4132,
      "training_step_time": 0.43342065811157227
    },
    {
      "epoch": 2.5225830078125e-05,
      "model_forward_time": 0.11549115180969238,
      "step": 4133
    },
    {
      "epoch": 2.5225830078125e-05,
      "step": 4133,
      "training_step_time": 0.3980681896209717
    },
    {
      "epoch": 2.523193359375e-05,
      "model_forward_time": 0.11562037467956543,
      "step": 4134
    },
    {
      "epoch": 2.523193359375e-05,
      "step": 4134,
      "training_step_time": 0.4865729808807373
    },
    {
      "epoch": 2.5238037109375e-05,
      "model_forward_time": 0.11601138114929199,
      "step": 4135
    },
    {
      "epoch": 2.5238037109375e-05,
      "step": 4135,
      "training_step_time": 0.38947010040283203
    },
    {
      "epoch": 2.5244140625e-05,
      "model_forward_time": 0.11639833450317383,
      "step": 4136
    },
    {
      "epoch": 2.5244140625e-05,
      "step": 4136,
      "training_step_time": 0.6300475597381592
    },
    {
      "epoch": 2.5250244140625e-05,
      "model_forward_time": 0.11476802825927734,
      "step": 4137
    },
    {
      "epoch": 2.5250244140625e-05,
      "step": 4137,
      "training_step_time": 0.39441728591918945
    },
    {
      "epoch": 2.525634765625e-05,
      "model_forward_time": 0.11465787887573242,
      "step": 4138
    },
    {
      "epoch": 2.525634765625e-05,
      "step": 4138,
      "training_step_time": 0.384033203125
    },
    {
      "epoch": 2.5262451171875e-05,
      "model_forward_time": 0.11486959457397461,
      "step": 4139
    },
    {
      "epoch": 2.5262451171875e-05,
      "step": 4139,
      "training_step_time": 0.3918919563293457
    },
    {
      "epoch": 2.52685546875e-05,
      "grad_norm": 0.24934016168117523,
      "learning_rate": 9.990133642141359e-05,
      "loss": 0.1092,
      "step": 4140
    },
    {
      "epoch": 2.52685546875e-05,
      "model_forward_time": 0.11491537094116211,
      "step": 4140
    },
    {
      "epoch": 2.52685546875e-05,
      "step": 4140,
      "training_step_time": 0.3934907913208008
    },
    {
      "epoch": 2.5274658203125e-05,
      "model_forward_time": 0.11527514457702637,
      "step": 4141
    },
    {
      "epoch": 2.5274658203125e-05,
      "step": 4141,
      "training_step_time": 0.39575886726379395
    },
    {
      "epoch": 2.528076171875e-05,
      "model_forward_time": 0.11505651473999023,
      "step": 4142
    },
    {
      "epoch": 2.528076171875e-05,
      "step": 4142,
      "training_step_time": 0.5498502254486084
    },
    {
      "epoch": 2.5286865234375e-05,
      "model_forward_time": 0.11596536636352539,
      "step": 4143
    },
    {
      "epoch": 2.5286865234375e-05,
      "step": 4143,
      "training_step_time": 0.4203946590423584
    },
    {
      "epoch": 2.529296875e-05,
      "model_forward_time": 0.11573553085327148,
      "step": 4144
    },
    {
      "epoch": 2.529296875e-05,
      "step": 4144,
      "training_step_time": 0.44598388671875
    },
    {
      "epoch": 2.5299072265625e-05,
      "model_forward_time": 0.11553525924682617,
      "step": 4145
    },
    {
      "epoch": 2.5299072265625e-05,
      "step": 4145,
      "training_step_time": 0.3984689712524414
    },
    {
      "epoch": 2.530517578125e-05,
      "model_forward_time": 0.11469316482543945,
      "step": 4146
    },
    {
      "epoch": 2.530517578125e-05,
      "step": 4146,
      "training_step_time": 0.3909261226654053
    },
    {
      "epoch": 2.5311279296875e-05,
      "model_forward_time": 0.11583280563354492,
      "step": 4147
    },
    {
      "epoch": 2.5311279296875e-05,
      "step": 4147,
      "training_step_time": 0.4044973850250244
    },
    {
      "epoch": 2.53173828125e-05,
      "model_forward_time": 0.11521458625793457,
      "step": 4148
    },
    {
      "epoch": 2.53173828125e-05,
      "step": 4148,
      "training_step_time": 0.41286516189575195
    },
    {
      "epoch": 2.5323486328125e-05,
      "model_forward_time": 0.1159982681274414,
      "step": 4149
    },
    {
      "epoch": 2.5323486328125e-05,
      "step": 4149,
      "training_step_time": 0.5126876831054688
    },
    {
      "epoch": 2.532958984375e-05,
      "grad_norm": 0.3180139362812042,
      "learning_rate": 9.989959847166567e-05,
      "loss": 0.1071,
      "step": 4150
    },
    {
      "epoch": 2.532958984375e-05,
      "model_forward_time": 0.11555933952331543,
      "step": 4150
    },
    {
      "epoch": 2.532958984375e-05,
      "step": 4150,
      "training_step_time": 0.4474043846130371
    },
    {
      "epoch": 2.5335693359375e-05,
      "model_forward_time": 0.11467456817626953,
      "step": 4151
    },
    {
      "epoch": 2.5335693359375e-05,
      "step": 4151,
      "training_step_time": 0.38915228843688965
    },
    {
      "epoch": 2.5341796875e-05,
      "model_forward_time": 0.11544108390808105,
      "step": 4152
    },
    {
      "epoch": 2.5341796875e-05,
      "step": 4152,
      "training_step_time": 0.39548826217651367
    },
    {
      "epoch": 2.5347900390625e-05,
      "model_forward_time": 0.11488032341003418,
      "step": 4153
    },
    {
      "epoch": 2.5347900390625e-05,
      "step": 4153,
      "training_step_time": 0.392183780670166
    },
    {
      "epoch": 2.535400390625e-05,
      "model_forward_time": 0.11514782905578613,
      "step": 4154
    },
    {
      "epoch": 2.535400390625e-05,
      "step": 4154,
      "training_step_time": 0.5985217094421387
    },
    {
      "epoch": 2.5360107421875e-05,
      "model_forward_time": 0.11484909057617188,
      "step": 4155
    },
    {
      "epoch": 2.5360107421875e-05,
      "step": 4155,
      "training_step_time": 0.38622212409973145
    },
    {
      "epoch": 2.53662109375e-05,
      "model_forward_time": 0.11544203758239746,
      "step": 4156
    },
    {
      "epoch": 2.53662109375e-05,
      "step": 4156,
      "training_step_time": 0.41940903663635254
    },
    {
      "epoch": 2.5372314453125e-05,
      "model_forward_time": 0.11531352996826172,
      "step": 4157
    },
    {
      "epoch": 2.5372314453125e-05,
      "step": 4157,
      "training_step_time": 0.48912644386291504
    },
    {
      "epoch": 2.537841796875e-05,
      "model_forward_time": 0.11471271514892578,
      "step": 4158
    },
    {
      "epoch": 2.537841796875e-05,
      "step": 4158,
      "training_step_time": 0.4567289352416992
    },
    {
      "epoch": 2.5384521484375e-05,
      "model_forward_time": 0.1147620677947998,
      "step": 4159
    },
    {
      "epoch": 2.5384521484375e-05,
      "step": 4159,
      "training_step_time": 0.3932230472564697
    },
    {
      "epoch": 2.5390625e-05,
      "grad_norm": 0.3006206452846527,
      "learning_rate": 9.989784536373726e-05,
      "loss": 0.0995,
      "step": 4160
    },
    {
      "epoch": 2.5390625e-05,
      "model_forward_time": 0.11478638648986816,
      "step": 4160
    },
    {
      "epoch": 2.5390625e-05,
      "step": 4160,
      "training_step_time": 0.3896050453186035
    },
    {
      "epoch": 2.5396728515625e-05,
      "model_forward_time": 0.11504483222961426,
      "step": 4161
    },
    {
      "epoch": 2.5396728515625e-05,
      "step": 4161,
      "training_step_time": 0.46273136138916016
    },
    {
      "epoch": 2.540283203125e-05,
      "model_forward_time": 0.11496615409851074,
      "step": 4162
    },
    {
      "epoch": 2.540283203125e-05,
      "step": 4162,
      "training_step_time": 0.38330793380737305
    },
    {
      "epoch": 2.5408935546875e-05,
      "model_forward_time": 0.11538028717041016,
      "step": 4163
    },
    {
      "epoch": 2.5408935546875e-05,
      "step": 4163,
      "training_step_time": 0.5079953670501709
    },
    {
      "epoch": 2.54150390625e-05,
      "model_forward_time": 0.11518001556396484,
      "step": 4164
    },
    {
      "epoch": 2.54150390625e-05,
      "step": 4164,
      "training_step_time": 0.4474482536315918
    },
    {
      "epoch": 2.5421142578125e-05,
      "model_forward_time": 0.11837887763977051,
      "step": 4165
    },
    {
      "epoch": 2.5421142578125e-05,
      "step": 4165,
      "training_step_time": 0.4088752269744873
    },
    {
      "epoch": 2.542724609375e-05,
      "model_forward_time": 0.11458969116210938,
      "step": 4166
    },
    {
      "epoch": 2.542724609375e-05,
      "step": 4166,
      "training_step_time": 0.38518643379211426
    },
    {
      "epoch": 2.5433349609375e-05,
      "model_forward_time": 0.11512231826782227,
      "step": 4167
    },
    {
      "epoch": 2.5433349609375e-05,
      "step": 4167,
      "training_step_time": 0.38919734954833984
    },
    {
      "epoch": 2.5439453125e-05,
      "model_forward_time": 0.11514854431152344,
      "step": 4168
    },
    {
      "epoch": 2.5439453125e-05,
      "step": 4168,
      "training_step_time": 0.38454389572143555
    },
    {
      "epoch": 2.5445556640625e-05,
      "model_forward_time": 0.11463737487792969,
      "step": 4169
    },
    {
      "epoch": 2.5445556640625e-05,
      "step": 4169,
      "training_step_time": 0.3880925178527832
    },
    {
      "epoch": 2.545166015625e-05,
      "grad_norm": 0.3119199872016907,
      "learning_rate": 9.989607709816091e-05,
      "loss": 0.1111,
      "step": 4170
    },
    {
      "epoch": 2.545166015625e-05,
      "model_forward_time": 0.11426210403442383,
      "step": 4170
    },
    {
      "epoch": 2.545166015625e-05,
      "step": 4170,
      "training_step_time": 0.39215612411499023
    },
    {
      "epoch": 2.5457763671875e-05,
      "model_forward_time": 0.11501240730285645,
      "step": 4171
    },
    {
      "epoch": 2.5457763671875e-05,
      "step": 4171,
      "training_step_time": 0.42943239212036133
    },
    {
      "epoch": 2.54638671875e-05,
      "model_forward_time": 0.11682939529418945,
      "step": 4172
    },
    {
      "epoch": 2.54638671875e-05,
      "step": 4172,
      "training_step_time": 0.4451720714569092
    },
    {
      "epoch": 2.5469970703125e-05,
      "model_forward_time": 0.11653852462768555,
      "step": 4173
    },
    {
      "epoch": 2.5469970703125e-05,
      "step": 4173,
      "training_step_time": 0.5074682235717773
    },
    {
      "epoch": 2.547607421875e-05,
      "model_forward_time": 0.11491131782531738,
      "step": 4174
    },
    {
      "epoch": 2.547607421875e-05,
      "step": 4174,
      "training_step_time": 0.39055752754211426
    },
    {
      "epoch": 2.5482177734375e-05,
      "model_forward_time": 0.11541557312011719,
      "step": 4175
    },
    {
      "epoch": 2.5482177734375e-05,
      "step": 4175,
      "training_step_time": 0.3898022174835205
    },
    {
      "epoch": 2.548828125e-05,
      "model_forward_time": 0.11544108390808105,
      "step": 4176
    },
    {
      "epoch": 2.548828125e-05,
      "step": 4176,
      "training_step_time": 0.4064621925354004
    },
    {
      "epoch": 2.5494384765625e-05,
      "model_forward_time": 0.11525607109069824,
      "step": 4177
    },
    {
      "epoch": 2.5494384765625e-05,
      "step": 4177,
      "training_step_time": 0.4555819034576416
    },
    {
      "epoch": 2.550048828125e-05,
      "model_forward_time": 0.11549878120422363,
      "step": 4178
    },
    {
      "epoch": 2.550048828125e-05,
      "step": 4178,
      "training_step_time": 0.47178101539611816
    },
    {
      "epoch": 2.5506591796875e-05,
      "model_forward_time": 0.1164083480834961,
      "step": 4179
    },
    {
      "epoch": 2.5506591796875e-05,
      "step": 4179,
      "training_step_time": 0.49851369857788086
    },
    {
      "epoch": 2.55126953125e-05,
      "grad_norm": 0.21686910092830658,
      "learning_rate": 9.989429367547377e-05,
      "loss": 0.1132,
      "step": 4180
    },
    {
      "epoch": 2.55126953125e-05,
      "model_forward_time": 0.11487770080566406,
      "step": 4180
    },
    {
      "epoch": 2.55126953125e-05,
      "step": 4180,
      "training_step_time": 0.39814329147338867
    },
    {
      "epoch": 2.5518798828125e-05,
      "model_forward_time": 0.11601114273071289,
      "step": 4181
    },
    {
      "epoch": 2.5518798828125e-05,
      "step": 4181,
      "training_step_time": 0.39256954193115234
    },
    {
      "epoch": 2.552490234375e-05,
      "model_forward_time": 0.1152803897857666,
      "step": 4182
    },
    {
      "epoch": 2.552490234375e-05,
      "step": 4182,
      "training_step_time": 0.3901505470275879
    },
    {
      "epoch": 2.5531005859375e-05,
      "model_forward_time": 0.11537766456604004,
      "step": 4183
    },
    {
      "epoch": 2.5531005859375e-05,
      "step": 4183,
      "training_step_time": 0.3965592384338379
    },
    {
      "epoch": 2.5537109375e-05,
      "model_forward_time": 0.11493039131164551,
      "step": 4184
    },
    {
      "epoch": 2.5537109375e-05,
      "step": 4184,
      "training_step_time": 0.39085865020751953
    },
    {
      "epoch": 2.5543212890625e-05,
      "model_forward_time": 0.11494827270507812,
      "step": 4185
    },
    {
      "epoch": 2.5543212890625e-05,
      "step": 4185,
      "training_step_time": 0.4197885990142822
    },
    {
      "epoch": 2.554931640625e-05,
      "model_forward_time": 0.11578774452209473,
      "step": 4186
    },
    {
      "epoch": 2.554931640625e-05,
      "step": 4186,
      "training_step_time": 0.4168524742126465
    },
    {
      "epoch": 2.5555419921875e-05,
      "model_forward_time": 0.11535859107971191,
      "step": 4187
    },
    {
      "epoch": 2.5555419921875e-05,
      "step": 4187,
      "training_step_time": 0.4419276714324951
    },
    {
      "epoch": 2.55615234375e-05,
      "model_forward_time": 0.11523985862731934,
      "step": 4188
    },
    {
      "epoch": 2.55615234375e-05,
      "step": 4188,
      "training_step_time": 0.5253067016601562
    },
    {
      "epoch": 2.5567626953125e-05,
      "model_forward_time": 0.11483287811279297,
      "step": 4189
    },
    {
      "epoch": 2.5567626953125e-05,
      "step": 4189,
      "training_step_time": 0.4000823497772217
    },
    {
      "epoch": 2.557373046875e-05,
      "grad_norm": 0.3207443654537201,
      "learning_rate": 9.989249509621759e-05,
      "loss": 0.1076,
      "step": 4190
    },
    {
      "epoch": 2.557373046875e-05,
      "model_forward_time": 0.11493206024169922,
      "step": 4190
    },
    {
      "epoch": 2.557373046875e-05,
      "step": 4190,
      "training_step_time": 0.39829301834106445
    },
    {
      "epoch": 2.5579833984375e-05,
      "model_forward_time": 0.11526179313659668,
      "step": 4191
    },
    {
      "epoch": 2.5579833984375e-05,
      "step": 4191,
      "training_step_time": 0.3686232566833496
    },
    {
      "epoch": 2.55859375e-05,
      "model_forward_time": 0.11503219604492188,
      "step": 4192
    },
    {
      "epoch": 2.55859375e-05,
      "step": 4192,
      "training_step_time": 0.49802565574645996
    },
    {
      "epoch": 2.5592041015625e-05,
      "model_forward_time": 0.11716699600219727,
      "step": 4193
    },
    {
      "epoch": 2.5592041015625e-05,
      "step": 4193,
      "training_step_time": 0.4899258613586426
    },
    {
      "epoch": 2.559814453125e-05,
      "model_forward_time": 0.11527752876281738,
      "step": 4194
    },
    {
      "epoch": 2.559814453125e-05,
      "step": 4194,
      "training_step_time": 0.3881242275238037
    },
    {
      "epoch": 2.5604248046875e-05,
      "model_forward_time": 0.11490678787231445,
      "step": 4195
    },
    {
      "epoch": 2.5604248046875e-05,
      "step": 4195,
      "training_step_time": 0.38519716262817383
    },
    {
      "epoch": 2.56103515625e-05,
      "model_forward_time": 0.11439013481140137,
      "step": 4196
    },
    {
      "epoch": 2.56103515625e-05,
      "step": 4196,
      "training_step_time": 0.4074747562408447
    },
    {
      "epoch": 2.5616455078125e-05,
      "model_forward_time": 0.11529397964477539,
      "step": 4197
    },
    {
      "epoch": 2.5616455078125e-05,
      "step": 4197,
      "training_step_time": 0.39321351051330566
    },
    {
      "epoch": 2.562255859375e-05,
      "model_forward_time": 0.11695194244384766,
      "step": 4198
    },
    {
      "epoch": 2.562255859375e-05,
      "step": 4198,
      "training_step_time": 0.41538119316101074
    },
    {
      "epoch": 2.5628662109375e-05,
      "model_forward_time": 0.11483168601989746,
      "step": 4199
    },
    {
      "epoch": 2.5628662109375e-05,
      "step": 4199,
      "training_step_time": 0.4313700199127197
    },
    {
      "epoch": 2.5634765625e-05,
      "grad_norm": 0.35547974705696106,
      "learning_rate": 9.989068136093873e-05,
      "loss": 0.1082,
      "step": 4200
    },
    {
      "epoch": 2.5634765625e-05,
      "model_forward_time": 0.116943359375,
      "step": 4200
    },
    {
      "epoch": 2.5634765625e-05,
      "step": 4200,
      "training_step_time": 0.4409360885620117
    },
    {
      "epoch": 2.5640869140625e-05,
      "model_forward_time": 0.11547160148620605,
      "step": 4201
    },
    {
      "epoch": 2.5640869140625e-05,
      "step": 4201,
      "training_step_time": 0.4144904613494873
    },
    {
      "epoch": 2.564697265625e-05,
      "model_forward_time": 0.11496257781982422,
      "step": 4202
    },
    {
      "epoch": 2.564697265625e-05,
      "step": 4202,
      "training_step_time": 0.46719837188720703
    },
    {
      "epoch": 2.5653076171875e-05,
      "model_forward_time": 0.11580824851989746,
      "step": 4203
    },
    {
      "epoch": 2.5653076171875e-05,
      "step": 4203,
      "training_step_time": 0.3931589126586914
    },
    {
      "epoch": 2.56591796875e-05,
      "model_forward_time": 0.11560392379760742,
      "step": 4204
    },
    {
      "epoch": 2.56591796875e-05,
      "step": 4204,
      "training_step_time": 0.3946878910064697
    },
    {
      "epoch": 2.5665283203125e-05,
      "model_forward_time": 0.11546874046325684,
      "step": 4205
    },
    {
      "epoch": 2.5665283203125e-05,
      "step": 4205,
      "training_step_time": 0.3705925941467285
    },
    {
      "epoch": 2.567138671875e-05,
      "model_forward_time": 0.11546015739440918,
      "step": 4206
    },
    {
      "epoch": 2.567138671875e-05,
      "step": 4206,
      "training_step_time": 0.44074010848999023
    },
    {
      "epoch": 2.5677490234375e-05,
      "model_forward_time": 0.11523056030273438,
      "step": 4207
    },
    {
      "epoch": 2.5677490234375e-05,
      "step": 4207,
      "training_step_time": 0.5179927349090576
    },
    {
      "epoch": 2.568359375e-05,
      "model_forward_time": 0.11521506309509277,
      "step": 4208
    },
    {
      "epoch": 2.568359375e-05,
      "step": 4208,
      "training_step_time": 0.3938331604003906
    },
    {
      "epoch": 2.5689697265625e-05,
      "model_forward_time": 0.11498188972473145,
      "step": 4209
    },
    {
      "epoch": 2.5689697265625e-05,
      "step": 4209,
      "training_step_time": 0.4026174545288086
    },
    {
      "epoch": 2.569580078125e-05,
      "grad_norm": 0.28489169478416443,
      "learning_rate": 9.988885247018817e-05,
      "loss": 0.1003,
      "step": 4210
    },
    {
      "epoch": 2.569580078125e-05,
      "model_forward_time": 0.11471056938171387,
      "step": 4210
    },
    {
      "epoch": 2.569580078125e-05,
      "step": 4210,
      "training_step_time": 0.3911912441253662
    },
    {
      "epoch": 2.5701904296875e-05,
      "model_forward_time": 0.11454439163208008,
      "step": 4211
    },
    {
      "epoch": 2.5701904296875e-05,
      "step": 4211,
      "training_step_time": 0.39011645317077637
    },
    {
      "epoch": 2.57080078125e-05,
      "model_forward_time": 0.1151573657989502,
      "step": 4212
    },
    {
      "epoch": 2.57080078125e-05,
      "step": 4212,
      "training_step_time": 0.39705538749694824
    },
    {
      "epoch": 2.5714111328125e-05,
      "model_forward_time": 0.11515474319458008,
      "step": 4213
    },
    {
      "epoch": 2.5714111328125e-05,
      "step": 4213,
      "training_step_time": 0.40691137313842773
    },
    {
      "epoch": 2.572021484375e-05,
      "model_forward_time": 0.11478137969970703,
      "step": 4214
    },
    {
      "epoch": 2.572021484375e-05,
      "step": 4214,
      "training_step_time": 0.39714527130126953
    },
    {
      "epoch": 2.5726318359375e-05,
      "model_forward_time": 0.11574769020080566,
      "step": 4215
    },
    {
      "epoch": 2.5726318359375e-05,
      "step": 4215,
      "training_step_time": 0.4150693416595459
    },
    {
      "epoch": 2.5732421875e-05,
      "model_forward_time": 0.11528992652893066,
      "step": 4216
    },
    {
      "epoch": 2.5732421875e-05,
      "step": 4216,
      "training_step_time": 0.43704867362976074
    },
    {
      "epoch": 2.5738525390625e-05,
      "model_forward_time": 0.11478853225708008,
      "step": 4217
    },
    {
      "epoch": 2.5738525390625e-05,
      "step": 4217,
      "training_step_time": 0.4330332279205322
    },
    {
      "epoch": 2.574462890625e-05,
      "model_forward_time": 0.11473345756530762,
      "step": 4218
    },
    {
      "epoch": 2.574462890625e-05,
      "step": 4218,
      "training_step_time": 0.395066499710083
    },
    {
      "epoch": 2.5750732421875e-05,
      "model_forward_time": 0.11550354957580566,
      "step": 4219
    },
    {
      "epoch": 2.5750732421875e-05,
      "step": 4219,
      "training_step_time": 0.38947415351867676
    },
    {
      "epoch": 2.57568359375e-05,
      "grad_norm": 0.336209774017334,
      "learning_rate": 9.988700842452146e-05,
      "loss": 0.1058,
      "step": 4220
    },
    {
      "epoch": 2.57568359375e-05,
      "model_forward_time": 0.11666417121887207,
      "step": 4220
    },
    {
      "epoch": 2.57568359375e-05,
      "step": 4220,
      "training_step_time": 0.3672161102294922
    },
    {
      "epoch": 2.5762939453125e-05,
      "model_forward_time": 0.11540746688842773,
      "step": 4221
    },
    {
      "epoch": 2.5762939453125e-05,
      "step": 4221,
      "training_step_time": 0.4419887065887451
    },
    {
      "epoch": 2.576904296875e-05,
      "model_forward_time": 0.11577796936035156,
      "step": 4222
    },
    {
      "epoch": 2.576904296875e-05,
      "step": 4222,
      "training_step_time": 0.49199748039245605
    },
    {
      "epoch": 2.5775146484375e-05,
      "model_forward_time": 0.11638975143432617,
      "step": 4223
    },
    {
      "epoch": 2.5775146484375e-05,
      "step": 4223,
      "training_step_time": 0.3986225128173828
    },
    {
      "epoch": 2.578125e-05,
      "model_forward_time": 0.11498236656188965,
      "step": 4224
    },
    {
      "epoch": 2.578125e-05,
      "step": 4224,
      "training_step_time": 0.3867344856262207
    },
    {
      "epoch": 2.5787353515625e-05,
      "model_forward_time": 0.11540007591247559,
      "step": 4225
    },
    {
      "epoch": 2.5787353515625e-05,
      "step": 4225,
      "training_step_time": 0.3939967155456543
    },
    {
      "epoch": 2.579345703125e-05,
      "model_forward_time": 0.11519050598144531,
      "step": 4226
    },
    {
      "epoch": 2.579345703125e-05,
      "step": 4226,
      "training_step_time": 0.433734655380249
    },
    {
      "epoch": 2.5799560546875e-05,
      "model_forward_time": 0.1149129867553711,
      "step": 4227
    },
    {
      "epoch": 2.5799560546875e-05,
      "step": 4227,
      "training_step_time": 0.40231847763061523
    },
    {
      "epoch": 2.58056640625e-05,
      "model_forward_time": 0.11567258834838867,
      "step": 4228
    },
    {
      "epoch": 2.58056640625e-05,
      "step": 4228,
      "training_step_time": 0.39471435546875
    },
    {
      "epoch": 2.5811767578125e-05,
      "model_forward_time": 0.1158599853515625,
      "step": 4229
    },
    {
      "epoch": 2.5811767578125e-05,
      "step": 4229,
      "training_step_time": 0.3869960308074951
    },
    {
      "epoch": 2.581787109375e-05,
      "grad_norm": 0.2929883301258087,
      "learning_rate": 9.988514922449879e-05,
      "loss": 0.0997,
      "step": 4230
    },
    {
      "epoch": 2.581787109375e-05,
      "model_forward_time": 0.1156303882598877,
      "step": 4230
    },
    {
      "epoch": 2.581787109375e-05,
      "step": 4230,
      "training_step_time": 0.5020678043365479
    },
    {
      "epoch": 2.5823974609375e-05,
      "model_forward_time": 0.11496257781982422,
      "step": 4231
    },
    {
      "epoch": 2.5823974609375e-05,
      "step": 4231,
      "training_step_time": 0.468843936920166
    },
    {
      "epoch": 2.5830078125e-05,
      "model_forward_time": 0.11471199989318848,
      "step": 4232
    },
    {
      "epoch": 2.5830078125e-05,
      "step": 4232,
      "training_step_time": 0.39716649055480957
    },
    {
      "epoch": 2.5836181640625e-05,
      "model_forward_time": 0.1149756908416748,
      "step": 4233
    },
    {
      "epoch": 2.5836181640625e-05,
      "step": 4233,
      "training_step_time": 0.40136265754699707
    },
    {
      "epoch": 2.584228515625e-05,
      "model_forward_time": 0.11509084701538086,
      "step": 4234
    },
    {
      "epoch": 2.584228515625e-05,
      "step": 4234,
      "training_step_time": 0.3963143825531006
    },
    {
      "epoch": 2.5848388671875e-05,
      "model_forward_time": 0.1164100170135498,
      "step": 4235
    },
    {
      "epoch": 2.5848388671875e-05,
      "step": 4235,
      "training_step_time": 0.4243755340576172
    },
    {
      "epoch": 2.58544921875e-05,
      "model_forward_time": 0.11545300483703613,
      "step": 4236
    },
    {
      "epoch": 2.58544921875e-05,
      "step": 4236,
      "training_step_time": 0.4442927837371826
    },
    {
      "epoch": 2.5860595703125e-05,
      "model_forward_time": 0.11546635627746582,
      "step": 4237
    },
    {
      "epoch": 2.5860595703125e-05,
      "step": 4237,
      "training_step_time": 0.5004994869232178
    },
    {
      "epoch": 2.586669921875e-05,
      "model_forward_time": 0.11504340171813965,
      "step": 4238
    },
    {
      "epoch": 2.586669921875e-05,
      "step": 4238,
      "training_step_time": 0.3978085517883301
    },
    {
      "epoch": 2.5872802734375e-05,
      "model_forward_time": 0.11510109901428223,
      "step": 4239
    },
    {
      "epoch": 2.5872802734375e-05,
      "step": 4239,
      "training_step_time": 0.40570592880249023
    },
    {
      "epoch": 2.587890625e-05,
      "grad_norm": 0.3122323155403137,
      "learning_rate": 9.988327487068492e-05,
      "loss": 0.1119,
      "step": 4240
    },
    {
      "epoch": 2.587890625e-05,
      "model_forward_time": 0.11473870277404785,
      "step": 4240
    },
    {
      "epoch": 2.587890625e-05,
      "step": 4240,
      "training_step_time": 0.41904640197753906
    },
    {
      "epoch": 2.5885009765625e-05,
      "model_forward_time": 0.11474227905273438,
      "step": 4241
    },
    {
      "epoch": 2.5885009765625e-05,
      "step": 4241,
      "training_step_time": 0.4053664207458496
    },
    {
      "epoch": 2.589111328125e-05,
      "model_forward_time": 0.11475992202758789,
      "step": 4242
    },
    {
      "epoch": 2.589111328125e-05,
      "step": 4242,
      "training_step_time": 0.39092016220092773
    },
    {
      "epoch": 2.5897216796875e-05,
      "model_forward_time": 0.11481928825378418,
      "step": 4243
    },
    {
      "epoch": 2.5897216796875e-05,
      "step": 4243,
      "training_step_time": 0.3964200019836426
    },
    {
      "epoch": 2.59033203125e-05,
      "model_forward_time": 0.11595702171325684,
      "step": 4244
    },
    {
      "epoch": 2.59033203125e-05,
      "step": 4244,
      "training_step_time": 0.44341373443603516
    },
    {
      "epoch": 2.5909423828125e-05,
      "model_forward_time": 0.11560583114624023,
      "step": 4245
    },
    {
      "epoch": 2.5909423828125e-05,
      "step": 4245,
      "training_step_time": 0.4897596836090088
    },
    {
      "epoch": 2.591552734375e-05,
      "model_forward_time": 0.11456632614135742,
      "step": 4246
    },
    {
      "epoch": 2.591552734375e-05,
      "step": 4246,
      "training_step_time": 0.48169517517089844
    },
    {
      "epoch": 2.5921630859375e-05,
      "model_forward_time": 0.11521363258361816,
      "step": 4247
    },
    {
      "epoch": 2.5921630859375e-05,
      "step": 4247,
      "training_step_time": 0.38681530952453613
    },
    {
      "epoch": 2.5927734375e-05,
      "model_forward_time": 0.11505270004272461,
      "step": 4248
    },
    {
      "epoch": 2.5927734375e-05,
      "step": 4248,
      "training_step_time": 0.3968536853790283
    },
    {
      "epoch": 2.5933837890625e-05,
      "model_forward_time": 0.11640739440917969,
      "step": 4249
    },
    {
      "epoch": 2.5933837890625e-05,
      "step": 4249,
      "training_step_time": 0.4118518829345703
    },
    {
      "epoch": 2.593994140625e-05,
      "grad_norm": 0.32206031680107117,
      "learning_rate": 9.988138536364922e-05,
      "loss": 0.1103,
      "step": 4250
    },
    {
      "epoch": 2.593994140625e-05,
      "model_forward_time": 0.11605548858642578,
      "step": 4250
    },
    {
      "epoch": 2.593994140625e-05,
      "step": 4250,
      "training_step_time": 0.48409557342529297
    },
    {
      "epoch": 2.5946044921875e-05,
      "model_forward_time": 0.1162717342376709,
      "step": 4251
    },
    {
      "epoch": 2.5946044921875e-05,
      "step": 4251,
      "training_step_time": 0.487807035446167
    },
    {
      "epoch": 2.59521484375e-05,
      "model_forward_time": 0.11518740653991699,
      "step": 4252
    },
    {
      "epoch": 2.59521484375e-05,
      "step": 4252,
      "training_step_time": 0.4286937713623047
    },
    {
      "epoch": 2.5958251953125e-05,
      "model_forward_time": 0.11482882499694824,
      "step": 4253
    },
    {
      "epoch": 2.5958251953125e-05,
      "step": 4253,
      "training_step_time": 0.4488377571105957
    },
    {
      "epoch": 2.596435546875e-05,
      "model_forward_time": 0.1149604320526123,
      "step": 4254
    },
    {
      "epoch": 2.596435546875e-05,
      "step": 4254,
      "training_step_time": 0.4538130760192871
    },
    {
      "epoch": 2.5970458984375e-05,
      "model_forward_time": 0.11397695541381836,
      "step": 4255
    },
    {
      "epoch": 2.5970458984375e-05,
      "step": 4255,
      "training_step_time": 0.3996725082397461
    },
    {
      "epoch": 2.59765625e-05,
      "model_forward_time": 0.11535978317260742,
      "step": 4256
    },
    {
      "epoch": 2.59765625e-05,
      "step": 4256,
      "training_step_time": 0.7426095008850098
    },
    {
      "epoch": 2.5982666015625e-05,
      "model_forward_time": 0.11501121520996094,
      "step": 4257
    },
    {
      "epoch": 2.5982666015625e-05,
      "step": 4257,
      "training_step_time": 0.39785146713256836
    },
    {
      "epoch": 2.598876953125e-05,
      "model_forward_time": 0.11453747749328613,
      "step": 4258
    },
    {
      "epoch": 2.598876953125e-05,
      "step": 4258,
      "training_step_time": 0.4032869338989258
    },
    {
      "epoch": 2.5994873046875e-05,
      "model_forward_time": 0.11443400382995605,
      "step": 4259
    },
    {
      "epoch": 2.5994873046875e-05,
      "step": 4259,
      "training_step_time": 0.44099950790405273
    },
    {
      "epoch": 2.60009765625e-05,
      "grad_norm": 0.20108716189861298,
      "learning_rate": 9.987948070396571e-05,
      "loss": 0.106,
      "step": 4260
    },
    {
      "epoch": 2.60009765625e-05,
      "model_forward_time": 0.11420392990112305,
      "step": 4260
    },
    {
      "epoch": 2.60009765625e-05,
      "step": 4260,
      "training_step_time": 0.49865007400512695
    },
    {
      "epoch": 2.6007080078125e-05,
      "model_forward_time": 0.11436223983764648,
      "step": 4261
    },
    {
      "epoch": 2.6007080078125e-05,
      "step": 4261,
      "training_step_time": 0.3841822147369385
    },
    {
      "epoch": 2.601318359375e-05,
      "model_forward_time": 0.11466860771179199,
      "step": 4262
    },
    {
      "epoch": 2.601318359375e-05,
      "step": 4262,
      "training_step_time": 0.9453308582305908
    },
    {
      "epoch": 2.6019287109375e-05,
      "model_forward_time": 0.1140444278717041,
      "step": 4263
    },
    {
      "epoch": 2.6019287109375e-05,
      "step": 4263,
      "training_step_time": 0.42609453201293945
    },
    {
      "epoch": 2.6025390625e-05,
      "model_forward_time": 0.11365365982055664,
      "step": 4264
    },
    {
      "epoch": 2.6025390625e-05,
      "step": 4264,
      "training_step_time": 0.4493906497955322
    },
    {
      "epoch": 2.6031494140625e-05,
      "model_forward_time": 0.11485910415649414,
      "step": 4265
    },
    {
      "epoch": 2.6031494140625e-05,
      "step": 4265,
      "training_step_time": 0.3891887664794922
    },
    {
      "epoch": 2.603759765625e-05,
      "model_forward_time": 0.11430811882019043,
      "step": 4266
    },
    {
      "epoch": 2.603759765625e-05,
      "step": 4266,
      "training_step_time": 0.37727975845336914
    },
    {
      "epoch": 2.6043701171875e-05,
      "model_forward_time": 0.11434721946716309,
      "step": 4267
    },
    {
      "epoch": 2.6043701171875e-05,
      "step": 4267,
      "training_step_time": 0.38394689559936523
    },
    {
      "epoch": 2.60498046875e-05,
      "model_forward_time": 0.1151580810546875,
      "step": 4268
    },
    {
      "epoch": 2.60498046875e-05,
      "step": 4268,
      "training_step_time": 0.6105844974517822
    },
    {
      "epoch": 2.6055908203125e-05,
      "model_forward_time": 0.11490559577941895,
      "step": 4269
    },
    {
      "epoch": 2.6055908203125e-05,
      "step": 4269,
      "training_step_time": 0.38877296447753906
    },
    {
      "epoch": 2.606201171875e-05,
      "grad_norm": 0.34491705894470215,
      "learning_rate": 9.987756089221296e-05,
      "loss": 0.1027,
      "step": 4270
    },
    {
      "epoch": 2.606201171875e-05,
      "model_forward_time": 0.11500978469848633,
      "step": 4270
    },
    {
      "epoch": 2.606201171875e-05,
      "step": 4270,
      "training_step_time": 0.38538694381713867
    },
    {
      "epoch": 2.6068115234375e-05,
      "model_forward_time": 0.11464738845825195,
      "step": 4271
    },
    {
      "epoch": 2.6068115234375e-05,
      "step": 4271,
      "training_step_time": 0.43262290954589844
    },
    {
      "epoch": 2.607421875e-05,
      "model_forward_time": 0.11500024795532227,
      "step": 4272
    },
    {
      "epoch": 2.607421875e-05,
      "step": 4272,
      "training_step_time": 0.41099071502685547
    },
    {
      "epoch": 2.6080322265625e-05,
      "model_forward_time": 0.11523866653442383,
      "step": 4273
    },
    {
      "epoch": 2.6080322265625e-05,
      "step": 4273,
      "training_step_time": 0.39299678802490234
    },
    {
      "epoch": 2.608642578125e-05,
      "model_forward_time": 0.1156930923461914,
      "step": 4274
    },
    {
      "epoch": 2.608642578125e-05,
      "step": 4274,
      "training_step_time": 0.9307732582092285
    },
    {
      "epoch": 2.6092529296875e-05,
      "model_forward_time": 0.11623573303222656,
      "step": 4275
    },
    {
      "epoch": 2.6092529296875e-05,
      "step": 4275,
      "training_step_time": 0.4725339412689209
    },
    {
      "epoch": 2.60986328125e-05,
      "model_forward_time": 0.11474800109863281,
      "step": 4276
    },
    {
      "epoch": 2.60986328125e-05,
      "step": 4276,
      "training_step_time": 0.4841153621673584
    },
    {
      "epoch": 2.6104736328125e-05,
      "model_forward_time": 0.11411499977111816,
      "step": 4277
    },
    {
      "epoch": 2.6104736328125e-05,
      "step": 4277,
      "training_step_time": 0.4664590358734131
    },
    {
      "epoch": 2.611083984375e-05,
      "model_forward_time": 0.11513376235961914,
      "step": 4278
    },
    {
      "epoch": 2.611083984375e-05,
      "step": 4278,
      "training_step_time": 0.45638012886047363
    },
    {
      "epoch": 2.6116943359375e-05,
      "model_forward_time": 0.11442875862121582,
      "step": 4279
    },
    {
      "epoch": 2.6116943359375e-05,
      "step": 4279,
      "training_step_time": 0.3916664123535156
    },
    {
      "epoch": 2.6123046875e-05,
      "grad_norm": 0.4100404977798462,
      "learning_rate": 9.987562592897413e-05,
      "loss": 0.1055,
      "step": 4280
    },
    {
      "epoch": 2.6123046875e-05,
      "model_forward_time": 0.11525225639343262,
      "step": 4280
    },
    {
      "epoch": 2.6123046875e-05,
      "step": 4280,
      "training_step_time": 0.39462971687316895
    },
    {
      "epoch": 2.6129150390625e-05,
      "model_forward_time": 0.11457943916320801,
      "step": 4281
    },
    {
      "epoch": 2.6129150390625e-05,
      "step": 4281,
      "training_step_time": 0.39226508140563965
    },
    {
      "epoch": 2.613525390625e-05,
      "model_forward_time": 0.11430931091308594,
      "step": 4282
    },
    {
      "epoch": 2.613525390625e-05,
      "step": 4282,
      "training_step_time": 0.38335561752319336
    },
    {
      "epoch": 2.6141357421875e-05,
      "model_forward_time": 0.11523103713989258,
      "step": 4283
    },
    {
      "epoch": 2.6141357421875e-05,
      "step": 4283,
      "training_step_time": 0.38764047622680664
    },
    {
      "epoch": 2.61474609375e-05,
      "model_forward_time": 0.11545681953430176,
      "step": 4284
    },
    {
      "epoch": 2.61474609375e-05,
      "step": 4284,
      "training_step_time": 0.48288965225219727
    },
    {
      "epoch": 2.6153564453125e-05,
      "model_forward_time": 0.11520814895629883,
      "step": 4285
    },
    {
      "epoch": 2.6153564453125e-05,
      "step": 4285,
      "training_step_time": 0.4387533664703369
    },
    {
      "epoch": 2.615966796875e-05,
      "model_forward_time": 0.11500215530395508,
      "step": 4286
    },
    {
      "epoch": 2.615966796875e-05,
      "step": 4286,
      "training_step_time": 0.7979631423950195
    },
    {
      "epoch": 2.6165771484375e-05,
      "model_forward_time": 0.11465048789978027,
      "step": 4287
    },
    {
      "epoch": 2.6165771484375e-05,
      "step": 4287,
      "training_step_time": 0.3873591423034668
    },
    {
      "epoch": 2.6171875e-05,
      "model_forward_time": 0.11419463157653809,
      "step": 4288
    },
    {
      "epoch": 2.6171875e-05,
      "step": 4288,
      "training_step_time": 0.3866307735443115
    },
    {
      "epoch": 2.6177978515625e-05,
      "model_forward_time": 0.11454558372497559,
      "step": 4289
    },
    {
      "epoch": 2.6177978515625e-05,
      "step": 4289,
      "training_step_time": 0.47567081451416016
    },
    {
      "epoch": 2.618408203125e-05,
      "grad_norm": 0.34920942783355713,
      "learning_rate": 9.987367581483705e-05,
      "loss": 0.104,
      "step": 4290
    },
    {
      "epoch": 2.618408203125e-05,
      "model_forward_time": 0.11446166038513184,
      "step": 4290
    },
    {
      "epoch": 2.618408203125e-05,
      "step": 4290,
      "training_step_time": 0.45978569984436035
    },
    {
      "epoch": 2.6190185546875e-05,
      "model_forward_time": 0.11455464363098145,
      "step": 4291
    },
    {
      "epoch": 2.6190185546875e-05,
      "step": 4291,
      "training_step_time": 0.45781564712524414
    },
    {
      "epoch": 2.61962890625e-05,
      "model_forward_time": 0.11464262008666992,
      "step": 4292
    },
    {
      "epoch": 2.61962890625e-05,
      "step": 4292,
      "training_step_time": 0.6212589740753174
    },
    {
      "epoch": 2.6202392578125e-05,
      "model_forward_time": 0.11458539962768555,
      "step": 4293
    },
    {
      "epoch": 2.6202392578125e-05,
      "step": 4293,
      "training_step_time": 0.38779306411743164
    },
    {
      "epoch": 2.620849609375e-05,
      "model_forward_time": 0.11410236358642578,
      "step": 4294
    },
    {
      "epoch": 2.620849609375e-05,
      "step": 4294,
      "training_step_time": 0.3814988136291504
    },
    {
      "epoch": 2.6214599609375e-05,
      "model_forward_time": 0.11468887329101562,
      "step": 4295
    },
    {
      "epoch": 2.6214599609375e-05,
      "step": 4295,
      "training_step_time": 0.38504648208618164
    },
    {
      "epoch": 2.6220703125e-05,
      "model_forward_time": 0.1147162914276123,
      "step": 4296
    },
    {
      "epoch": 2.6220703125e-05,
      "step": 4296,
      "training_step_time": 0.3873708248138428
    },
    {
      "epoch": 2.6226806640625e-05,
      "model_forward_time": 0.11504697799682617,
      "step": 4297
    },
    {
      "epoch": 2.6226806640625e-05,
      "step": 4297,
      "training_step_time": 0.3911280632019043
    },
    {
      "epoch": 2.623291015625e-05,
      "model_forward_time": 0.11434197425842285,
      "step": 4298
    },
    {
      "epoch": 2.623291015625e-05,
      "step": 4298,
      "training_step_time": 0.7978558540344238
    },
    {
      "epoch": 2.6239013671875e-05,
      "model_forward_time": 0.11461496353149414,
      "step": 4299
    },
    {
      "epoch": 2.6239013671875e-05,
      "step": 4299,
      "training_step_time": 0.42549705505371094
    },
    {
      "epoch": 2.62451171875e-05,
      "grad_norm": 0.40596872568130493,
      "learning_rate": 9.987171055039408e-05,
      "loss": 0.1062,
      "step": 4300
    },
    {
      "epoch": 2.62451171875e-05,
      "model_forward_time": 0.11442303657531738,
      "step": 4300
    },
    {
      "epoch": 2.62451171875e-05,
      "step": 4300,
      "training_step_time": 0.3860499858856201
    },
    {
      "epoch": 2.6251220703125e-05,
      "model_forward_time": 0.11442923545837402,
      "step": 4301
    },
    {
      "epoch": 2.6251220703125e-05,
      "step": 4301,
      "training_step_time": 0.39261364936828613
    },
    {
      "epoch": 2.625732421875e-05,
      "model_forward_time": 0.11468696594238281,
      "step": 4302
    },
    {
      "epoch": 2.625732421875e-05,
      "step": 4302,
      "training_step_time": 0.39165353775024414
    },
    {
      "epoch": 2.6263427734375e-05,
      "model_forward_time": 0.11473894119262695,
      "step": 4303
    },
    {
      "epoch": 2.6263427734375e-05,
      "step": 4303,
      "training_step_time": 0.36345624923706055
    },
    {
      "epoch": 2.626953125e-05,
      "model_forward_time": 0.1150200366973877,
      "step": 4304
    },
    {
      "epoch": 2.626953125e-05,
      "step": 4304,
      "training_step_time": 0.49187612533569336
    },
    {
      "epoch": 2.6275634765625e-05,
      "model_forward_time": 0.11506891250610352,
      "step": 4305
    },
    {
      "epoch": 2.6275634765625e-05,
      "step": 4305,
      "training_step_time": 0.48175764083862305
    },
    {
      "epoch": 2.628173828125e-05,
      "model_forward_time": 0.11442732810974121,
      "step": 4306
    },
    {
      "epoch": 2.628173828125e-05,
      "step": 4306,
      "training_step_time": 0.3841209411621094
    },
    {
      "epoch": 2.6287841796875e-05,
      "model_forward_time": 0.11543989181518555,
      "step": 4307
    },
    {
      "epoch": 2.6287841796875e-05,
      "step": 4307,
      "training_step_time": 0.3892021179199219
    },
    {
      "epoch": 2.62939453125e-05,
      "model_forward_time": 0.1151130199432373,
      "step": 4308
    },
    {
      "epoch": 2.62939453125e-05,
      "step": 4308,
      "training_step_time": 0.38744473457336426
    },
    {
      "epoch": 2.6300048828125e-05,
      "model_forward_time": 0.1150362491607666,
      "step": 4309
    },
    {
      "epoch": 2.6300048828125e-05,
      "step": 4309,
      "training_step_time": 0.3918333053588867
    },
    {
      "epoch": 2.630615234375e-05,
      "grad_norm": 0.2685665786266327,
      "learning_rate": 9.986973013624224e-05,
      "loss": 0.1029,
      "step": 4310
    },
    {
      "epoch": 2.630615234375e-05,
      "model_forward_time": 0.11871623992919922,
      "step": 4310
    },
    {
      "epoch": 2.630615234375e-05,
      "step": 4310,
      "training_step_time": 0.39611148834228516
    },
    {
      "epoch": 2.6312255859375e-05,
      "model_forward_time": 0.11619400978088379,
      "step": 4311
    },
    {
      "epoch": 2.6312255859375e-05,
      "step": 4311,
      "training_step_time": 0.423137903213501
    },
    {
      "epoch": 2.6318359375e-05,
      "model_forward_time": 0.11528229713439941,
      "step": 4312
    },
    {
      "epoch": 2.6318359375e-05,
      "step": 4312,
      "training_step_time": 0.4303264617919922
    },
    {
      "epoch": 2.6324462890625e-05,
      "model_forward_time": 0.11482644081115723,
      "step": 4313
    },
    {
      "epoch": 2.6324462890625e-05,
      "step": 4313,
      "training_step_time": 0.39867448806762695
    },
    {
      "epoch": 2.633056640625e-05,
      "model_forward_time": 0.11545777320861816,
      "step": 4314
    },
    {
      "epoch": 2.633056640625e-05,
      "step": 4314,
      "training_step_time": 0.424083948135376
    },
    {
      "epoch": 2.6336669921875e-05,
      "model_forward_time": 0.11508584022521973,
      "step": 4315
    },
    {
      "epoch": 2.6336669921875e-05,
      "step": 4315,
      "training_step_time": 0.39028453826904297
    },
    {
      "epoch": 2.63427734375e-05,
      "model_forward_time": 0.11498308181762695,
      "step": 4316
    },
    {
      "epoch": 2.63427734375e-05,
      "step": 4316,
      "training_step_time": 0.602834939956665
    },
    {
      "epoch": 2.6348876953125e-05,
      "model_forward_time": 0.11456513404846191,
      "step": 4317
    },
    {
      "epoch": 2.6348876953125e-05,
      "step": 4317,
      "training_step_time": 0.45764875411987305
    },
    {
      "epoch": 2.635498046875e-05,
      "model_forward_time": 0.11519646644592285,
      "step": 4318
    },
    {
      "epoch": 2.635498046875e-05,
      "step": 4318,
      "training_step_time": 0.48009467124938965
    },
    {
      "epoch": 2.6361083984375e-05,
      "model_forward_time": 0.11474847793579102,
      "step": 4319
    },
    {
      "epoch": 2.6361083984375e-05,
      "step": 4319,
      "training_step_time": 0.469097375869751
    },
    {
      "epoch": 2.63671875e-05,
      "grad_norm": 0.16081883013248444,
      "learning_rate": 9.986773457298311e-05,
      "loss": 0.1,
      "step": 4320
    },
    {
      "epoch": 2.63671875e-05,
      "model_forward_time": 0.11447310447692871,
      "step": 4320
    },
    {
      "epoch": 2.63671875e-05,
      "step": 4320,
      "training_step_time": 0.39404797554016113
    },
    {
      "epoch": 2.6373291015625e-05,
      "model_forward_time": 0.1148366928100586,
      "step": 4321
    },
    {
      "epoch": 2.6373291015625e-05,
      "step": 4321,
      "training_step_time": 0.3908958435058594
    },
    {
      "epoch": 2.637939453125e-05,
      "model_forward_time": 0.11488795280456543,
      "step": 4322
    },
    {
      "epoch": 2.637939453125e-05,
      "step": 4322,
      "training_step_time": 0.39311838150024414
    },
    {
      "epoch": 2.6385498046875e-05,
      "model_forward_time": 0.11524748802185059,
      "step": 4323
    },
    {
      "epoch": 2.6385498046875e-05,
      "step": 4323,
      "training_step_time": 0.3972916603088379
    },
    {
      "epoch": 2.63916015625e-05,
      "model_forward_time": 0.11510491371154785,
      "step": 4324
    },
    {
      "epoch": 2.63916015625e-05,
      "step": 4324,
      "training_step_time": 0.39209818840026855
    },
    {
      "epoch": 2.6397705078125e-05,
      "model_forward_time": 0.11568522453308105,
      "step": 4325
    },
    {
      "epoch": 2.6397705078125e-05,
      "step": 4325,
      "training_step_time": 0.42591071128845215
    },
    {
      "epoch": 2.640380859375e-05,
      "model_forward_time": 0.1147005558013916,
      "step": 4326
    },
    {
      "epoch": 2.640380859375e-05,
      "step": 4326,
      "training_step_time": 0.4173557758331299
    },
    {
      "epoch": 2.6409912109375e-05,
      "model_forward_time": 0.11641526222229004,
      "step": 4327
    },
    {
      "epoch": 2.6409912109375e-05,
      "step": 4327,
      "training_step_time": 0.4345273971557617
    },
    {
      "epoch": 2.6416015625e-05,
      "model_forward_time": 0.11600756645202637,
      "step": 4328
    },
    {
      "epoch": 2.6416015625e-05,
      "step": 4328,
      "training_step_time": 0.49437999725341797
    },
    {
      "epoch": 2.6422119140625e-05,
      "model_forward_time": 0.1148526668548584,
      "step": 4329
    },
    {
      "epoch": 2.6422119140625e-05,
      "step": 4329,
      "training_step_time": 0.3907144069671631
    },
    {
      "epoch": 2.642822265625e-05,
      "grad_norm": 0.3570311665534973,
      "learning_rate": 9.986572386122291e-05,
      "loss": 0.0984,
      "step": 4330
    },
    {
      "epoch": 2.642822265625e-05,
      "model_forward_time": 0.11543869972229004,
      "step": 4330
    },
    {
      "epoch": 2.642822265625e-05,
      "step": 4330,
      "training_step_time": 0.3921518325805664
    },
    {
      "epoch": 2.6434326171875e-05,
      "model_forward_time": 0.11492180824279785,
      "step": 4331
    },
    {
      "epoch": 2.6434326171875e-05,
      "step": 4331,
      "training_step_time": 0.4367244243621826
    },
    {
      "epoch": 2.64404296875e-05,
      "model_forward_time": 0.11490225791931152,
      "step": 4332
    },
    {
      "epoch": 2.64404296875e-05,
      "step": 4332,
      "training_step_time": 0.3689894676208496
    },
    {
      "epoch": 2.6446533203125e-05,
      "model_forward_time": 0.11609625816345215,
      "step": 4333
    },
    {
      "epoch": 2.6446533203125e-05,
      "step": 4333,
      "training_step_time": 0.4734206199645996
    },
    {
      "epoch": 2.645263671875e-05,
      "model_forward_time": 0.11496400833129883,
      "step": 4334
    },
    {
      "epoch": 2.645263671875e-05,
      "step": 4334,
      "training_step_time": 0.48268604278564453
    },
    {
      "epoch": 2.6458740234375e-05,
      "model_forward_time": 0.11508917808532715,
      "step": 4335
    },
    {
      "epoch": 2.6458740234375e-05,
      "step": 4335,
      "training_step_time": 0.3916897773742676
    },
    {
      "epoch": 2.646484375e-05,
      "model_forward_time": 0.11550474166870117,
      "step": 4336
    },
    {
      "epoch": 2.646484375e-05,
      "step": 4336,
      "training_step_time": 0.3887772560119629
    },
    {
      "epoch": 2.6470947265625e-05,
      "model_forward_time": 0.11544013023376465,
      "step": 4337
    },
    {
      "epoch": 2.6470947265625e-05,
      "step": 4337,
      "training_step_time": 0.395599365234375
    },
    {
      "epoch": 2.647705078125e-05,
      "model_forward_time": 0.11507558822631836,
      "step": 4338
    },
    {
      "epoch": 2.647705078125e-05,
      "step": 4338,
      "training_step_time": 0.3887488842010498
    },
    {
      "epoch": 2.6483154296875e-05,
      "model_forward_time": 0.11522746086120605,
      "step": 4339
    },
    {
      "epoch": 2.6483154296875e-05,
      "step": 4339,
      "training_step_time": 0.4062328338623047
    },
    {
      "epoch": 2.64892578125e-05,
      "grad_norm": 0.21811053156852722,
      "learning_rate": 9.986369800157242e-05,
      "loss": 0.1032,
      "step": 4340
    },
    {
      "epoch": 2.64892578125e-05,
      "model_forward_time": 0.11444854736328125,
      "step": 4340
    },
    {
      "epoch": 2.64892578125e-05,
      "step": 4340,
      "training_step_time": 0.39710330963134766
    },
    {
      "epoch": 2.6495361328125e-05,
      "model_forward_time": 0.1161646842956543,
      "step": 4341
    },
    {
      "epoch": 2.6495361328125e-05,
      "step": 4341,
      "training_step_time": 0.4256870746612549
    },
    {
      "epoch": 2.650146484375e-05,
      "model_forward_time": 0.11536860466003418,
      "step": 4342
    },
    {
      "epoch": 2.650146484375e-05,
      "step": 4342,
      "training_step_time": 0.4973182678222656
    },
    {
      "epoch": 2.6507568359375e-05,
      "model_forward_time": 0.11531567573547363,
      "step": 4343
    },
    {
      "epoch": 2.6507568359375e-05,
      "step": 4343,
      "training_step_time": 0.46848344802856445
    },
    {
      "epoch": 2.6513671875e-05,
      "model_forward_time": 0.11523079872131348,
      "step": 4344
    },
    {
      "epoch": 2.6513671875e-05,
      "step": 4344,
      "training_step_time": 0.4337303638458252
    },
    {
      "epoch": 2.6519775390625e-05,
      "model_forward_time": 0.11545395851135254,
      "step": 4345
    },
    {
      "epoch": 2.6519775390625e-05,
      "step": 4345,
      "training_step_time": 0.3963944911956787
    },
    {
      "epoch": 2.652587890625e-05,
      "model_forward_time": 0.11639118194580078,
      "step": 4346
    },
    {
      "epoch": 2.652587890625e-05,
      "step": 4346,
      "training_step_time": 0.6392700672149658
    },
    {
      "epoch": 2.6531982421875e-05,
      "model_forward_time": 0.11440658569335938,
      "step": 4347
    },
    {
      "epoch": 2.6531982421875e-05,
      "step": 4347,
      "training_step_time": 0.48426342010498047
    },
    {
      "epoch": 2.65380859375e-05,
      "model_forward_time": 0.11558842658996582,
      "step": 4348
    },
    {
      "epoch": 2.65380859375e-05,
      "step": 4348,
      "training_step_time": 0.5103278160095215
    },
    {
      "epoch": 2.6544189453125e-05,
      "model_forward_time": 0.11461257934570312,
      "step": 4349
    },
    {
      "epoch": 2.6544189453125e-05,
      "step": 4349,
      "training_step_time": 0.392242431640625
    },
    {
      "epoch": 2.655029296875e-05,
      "grad_norm": 0.30380532145500183,
      "learning_rate": 9.986165699464705e-05,
      "loss": 0.1022,
      "step": 4350
    },
    {
      "epoch": 2.655029296875e-05,
      "model_forward_time": 0.1144247055053711,
      "step": 4350
    },
    {
      "epoch": 2.655029296875e-05,
      "step": 4350,
      "training_step_time": 0.3915822505950928
    },
    {
      "epoch": 2.6556396484375e-05,
      "model_forward_time": 0.11484813690185547,
      "step": 4351
    },
    {
      "epoch": 2.6556396484375e-05,
      "step": 4351,
      "training_step_time": 0.3903768062591553
    },
    {
      "epoch": 2.65625e-05,
      "model_forward_time": 0.11503744125366211,
      "step": 4352
    },
    {
      "epoch": 2.65625e-05,
      "step": 4352,
      "training_step_time": 0.3949618339538574
    },
    {
      "epoch": 2.6568603515625e-05,
      "model_forward_time": 0.11535453796386719,
      "step": 4353
    },
    {
      "epoch": 2.6568603515625e-05,
      "step": 4353,
      "training_step_time": 0.3937516212463379
    },
    {
      "epoch": 2.657470703125e-05,
      "model_forward_time": 0.11492180824279785,
      "step": 4354
    },
    {
      "epoch": 2.657470703125e-05,
      "step": 4354,
      "training_step_time": 0.38556599617004395
    },
    {
      "epoch": 2.6580810546875e-05,
      "model_forward_time": 0.1153109073638916,
      "step": 4355
    },
    {
      "epoch": 2.6580810546875e-05,
      "step": 4355,
      "training_step_time": 0.4116642475128174
    },
    {
      "epoch": 2.65869140625e-05,
      "model_forward_time": 0.11590075492858887,
      "step": 4356
    },
    {
      "epoch": 2.65869140625e-05,
      "step": 4356,
      "training_step_time": 0.5025467872619629
    },
    {
      "epoch": 2.6593017578125e-05,
      "model_forward_time": 0.11510539054870605,
      "step": 4357
    },
    {
      "epoch": 2.6593017578125e-05,
      "step": 4357,
      "training_step_time": 0.4676368236541748
    },
    {
      "epoch": 2.659912109375e-05,
      "model_forward_time": 0.11497926712036133,
      "step": 4358
    },
    {
      "epoch": 2.659912109375e-05,
      "step": 4358,
      "training_step_time": 1.0508301258087158
    },
    {
      "epoch": 2.6605224609375e-05,
      "model_forward_time": 0.11401057243347168,
      "step": 4359
    },
    {
      "epoch": 2.6605224609375e-05,
      "step": 4359,
      "training_step_time": 0.3843264579772949
    },
    {
      "epoch": 2.6611328125e-05,
      "grad_norm": 0.2589091658592224,
      "learning_rate": 9.985960084106682e-05,
      "loss": 0.0988,
      "step": 4360
    },
    {
      "epoch": 2.6611328125e-05,
      "model_forward_time": 0.11449575424194336,
      "step": 4360
    },
    {
      "epoch": 2.6611328125e-05,
      "step": 4360,
      "training_step_time": 0.39540839195251465
    },
    {
      "epoch": 2.6617431640625e-05,
      "model_forward_time": 0.11436343193054199,
      "step": 4361
    },
    {
      "epoch": 2.6617431640625e-05,
      "step": 4361,
      "training_step_time": 0.43408942222595215
    },
    {
      "epoch": 2.662353515625e-05,
      "model_forward_time": 0.1142129898071289,
      "step": 4362
    },
    {
      "epoch": 2.662353515625e-05,
      "step": 4362,
      "training_step_time": 0.46437978744506836
    },
    {
      "epoch": 2.6629638671875e-05,
      "model_forward_time": 0.11455202102661133,
      "step": 4363
    },
    {
      "epoch": 2.6629638671875e-05,
      "step": 4363,
      "training_step_time": 0.3924696445465088
    },
    {
      "epoch": 2.66357421875e-05,
      "model_forward_time": 0.11525154113769531,
      "step": 4364
    },
    {
      "epoch": 2.66357421875e-05,
      "step": 4364,
      "training_step_time": 0.6483688354492188
    },
    {
      "epoch": 2.6641845703125e-05,
      "model_forward_time": 0.11449980735778809,
      "step": 4365
    },
    {
      "epoch": 2.6641845703125e-05,
      "step": 4365,
      "training_step_time": 0.3871653079986572
    },
    {
      "epoch": 2.664794921875e-05,
      "model_forward_time": 0.11414361000061035,
      "step": 4366
    },
    {
      "epoch": 2.664794921875e-05,
      "step": 4366,
      "training_step_time": 0.3793761730194092
    },
    {
      "epoch": 2.6654052734375e-05,
      "model_forward_time": 0.11508345603942871,
      "step": 4367
    },
    {
      "epoch": 2.6654052734375e-05,
      "step": 4367,
      "training_step_time": 0.38834118843078613
    },
    {
      "epoch": 2.666015625e-05,
      "model_forward_time": 0.11546587944030762,
      "step": 4368
    },
    {
      "epoch": 2.666015625e-05,
      "step": 4368,
      "training_step_time": 0.4491891860961914
    },
    {
      "epoch": 2.6666259765625e-05,
      "model_forward_time": 0.1150510311126709,
      "step": 4369
    },
    {
      "epoch": 2.6666259765625e-05,
      "step": 4369,
      "training_step_time": 0.4440009593963623
    },
    {
      "epoch": 2.667236328125e-05,
      "grad_norm": 0.21052496135234833,
      "learning_rate": 9.98575295414563e-05,
      "loss": 0.1008,
      "step": 4370
    },
    {
      "epoch": 2.667236328125e-05,
      "model_forward_time": 0.11537909507751465,
      "step": 4370
    },
    {
      "epoch": 2.667236328125e-05,
      "step": 4370,
      "training_step_time": 0.8134958744049072
    },
    {
      "epoch": 2.6678466796875e-05,
      "model_forward_time": 0.11429667472839355,
      "step": 4371
    },
    {
      "epoch": 2.6678466796875e-05,
      "step": 4371,
      "training_step_time": 0.3933298587799072
    },
    {
      "epoch": 2.66845703125e-05,
      "model_forward_time": 0.11467432975769043,
      "step": 4372
    },
    {
      "epoch": 2.66845703125e-05,
      "step": 4372,
      "training_step_time": 0.3848855495452881
    },
    {
      "epoch": 2.6690673828125e-05,
      "model_forward_time": 0.11470818519592285,
      "step": 4373
    },
    {
      "epoch": 2.6690673828125e-05,
      "step": 4373,
      "training_step_time": 0.3641982078552246
    },
    {
      "epoch": 2.669677734375e-05,
      "model_forward_time": 0.11454463005065918,
      "step": 4374
    },
    {
      "epoch": 2.669677734375e-05,
      "step": 4374,
      "training_step_time": 0.4740731716156006
    },
    {
      "epoch": 2.6702880859375e-05,
      "model_forward_time": 0.11424684524536133,
      "step": 4375
    },
    {
      "epoch": 2.6702880859375e-05,
      "step": 4375,
      "training_step_time": 0.46739888191223145
    },
    {
      "epoch": 2.6708984375e-05,
      "model_forward_time": 0.11479735374450684,
      "step": 4376
    },
    {
      "epoch": 2.6708984375e-05,
      "step": 4376,
      "training_step_time": 0.5424478054046631
    },
    {
      "epoch": 2.6715087890625e-05,
      "model_forward_time": 0.1139669418334961,
      "step": 4377
    },
    {
      "epoch": 2.6715087890625e-05,
      "step": 4377,
      "training_step_time": 0.38865041732788086
    },
    {
      "epoch": 2.672119140625e-05,
      "model_forward_time": 0.11396598815917969,
      "step": 4378
    },
    {
      "epoch": 2.672119140625e-05,
      "step": 4378,
      "training_step_time": 0.3857402801513672
    },
    {
      "epoch": 2.6727294921875e-05,
      "model_forward_time": 0.11449050903320312,
      "step": 4379
    },
    {
      "epoch": 2.6727294921875e-05,
      "step": 4379,
      "training_step_time": 0.3864748477935791
    },
    {
      "epoch": 2.67333984375e-05,
      "grad_norm": 0.4113611876964569,
      "learning_rate": 9.985544309644475e-05,
      "loss": 0.0976,
      "step": 4380
    },
    {
      "epoch": 2.67333984375e-05,
      "model_forward_time": 0.11464166641235352,
      "step": 4380
    },
    {
      "epoch": 2.67333984375e-05,
      "step": 4380,
      "training_step_time": 0.390580415725708
    },
    {
      "epoch": 2.6739501953125e-05,
      "model_forward_time": 0.11552023887634277,
      "step": 4381
    },
    {
      "epoch": 2.6739501953125e-05,
      "step": 4381,
      "training_step_time": 0.4461073875427246
    },
    {
      "epoch": 2.674560546875e-05,
      "model_forward_time": 0.11511898040771484,
      "step": 4382
    },
    {
      "epoch": 2.674560546875e-05,
      "step": 4382,
      "training_step_time": 0.8317201137542725
    },
    {
      "epoch": 2.6751708984375e-05,
      "model_forward_time": 0.11451959609985352,
      "step": 4383
    },
    {
      "epoch": 2.6751708984375e-05,
      "step": 4383,
      "training_step_time": 0.4034385681152344
    },
    {
      "epoch": 2.67578125e-05,
      "model_forward_time": 0.11502242088317871,
      "step": 4384
    },
    {
      "epoch": 2.67578125e-05,
      "step": 4384,
      "training_step_time": 0.3822026252746582
    },
    {
      "epoch": 2.6763916015625e-05,
      "model_forward_time": 0.11415314674377441,
      "step": 4385
    },
    {
      "epoch": 2.6763916015625e-05,
      "step": 4385,
      "training_step_time": 0.38069820404052734
    },
    {
      "epoch": 2.677001953125e-05,
      "model_forward_time": 0.11438560485839844,
      "step": 4386
    },
    {
      "epoch": 2.677001953125e-05,
      "step": 4386,
      "training_step_time": 0.38410377502441406
    },
    {
      "epoch": 2.6776123046875e-05,
      "model_forward_time": 0.11504745483398438,
      "step": 4387
    },
    {
      "epoch": 2.6776123046875e-05,
      "step": 4387,
      "training_step_time": 0.47396183013916016
    },
    {
      "epoch": 2.67822265625e-05,
      "model_forward_time": 0.11731147766113281,
      "step": 4388
    },
    {
      "epoch": 2.67822265625e-05,
      "step": 4388,
      "training_step_time": 0.9788863658905029
    },
    {
      "epoch": 2.6788330078125e-05,
      "model_forward_time": 0.11430621147155762,
      "step": 4389
    },
    {
      "epoch": 2.6788330078125e-05,
      "step": 4389,
      "training_step_time": 0.38504791259765625
    },
    {
      "epoch": 2.679443359375e-05,
      "grad_norm": 0.27904078364372253,
      "learning_rate": 9.985334150666592e-05,
      "loss": 0.1072,
      "step": 4390
    },
    {
      "epoch": 2.679443359375e-05,
      "model_forward_time": 0.1150667667388916,
      "step": 4390
    },
    {
      "epoch": 2.679443359375e-05,
      "step": 4390,
      "training_step_time": 0.3767669200897217
    },
    {
      "epoch": 2.6800537109375e-05,
      "model_forward_time": 0.11428499221801758,
      "step": 4391
    },
    {
      "epoch": 2.6800537109375e-05,
      "step": 4391,
      "training_step_time": 0.3785068988800049
    },
    {
      "epoch": 2.6806640625e-05,
      "model_forward_time": 0.11429381370544434,
      "step": 4392
    },
    {
      "epoch": 2.6806640625e-05,
      "step": 4392,
      "training_step_time": 0.39072561264038086
    },
    {
      "epoch": 2.6812744140625e-05,
      "model_forward_time": 0.11420059204101562,
      "step": 4393
    },
    {
      "epoch": 2.6812744140625e-05,
      "step": 4393,
      "training_step_time": 0.3924117088317871
    },
    {
      "epoch": 2.681884765625e-05,
      "model_forward_time": 0.11540889739990234,
      "step": 4394
    },
    {
      "epoch": 2.681884765625e-05,
      "step": 4394,
      "training_step_time": 0.7252001762390137
    },
    {
      "epoch": 2.6824951171875e-05,
      "model_forward_time": 0.11540555953979492,
      "step": 4395
    },
    {
      "epoch": 2.6824951171875e-05,
      "step": 4395,
      "training_step_time": 0.4025592803955078
    },
    {
      "epoch": 2.68310546875e-05,
      "model_forward_time": 0.11481785774230957,
      "step": 4396
    },
    {
      "epoch": 2.68310546875e-05,
      "step": 4396,
      "training_step_time": 0.49553847312927246
    },
    {
      "epoch": 2.6837158203125e-05,
      "model_forward_time": 0.11467123031616211,
      "step": 4397
    },
    {
      "epoch": 2.6837158203125e-05,
      "step": 4397,
      "training_step_time": 0.38092684745788574
    },
    {
      "epoch": 2.684326171875e-05,
      "model_forward_time": 0.114288330078125,
      "step": 4398
    },
    {
      "epoch": 2.684326171875e-05,
      "step": 4398,
      "training_step_time": 0.37914490699768066
    },
    {
      "epoch": 2.6849365234375e-05,
      "model_forward_time": 0.11463785171508789,
      "step": 4399
    },
    {
      "epoch": 2.6849365234375e-05,
      "step": 4399,
      "training_step_time": 0.3916811943054199
    },
    {
      "epoch": 2.685546875e-05,
      "grad_norm": 0.3904087543487549,
      "learning_rate": 9.985122477275824e-05,
      "loss": 0.113,
      "step": 4400
    },
    {
      "epoch": 2.685546875e-05,
      "model_forward_time": 0.11518383026123047,
      "step": 4400
    },
    {
      "epoch": 2.685546875e-05,
      "step": 4400,
      "training_step_time": 0.7151951789855957
    },
    {
      "epoch": 2.6861572265625e-05,
      "model_forward_time": 0.11517930030822754,
      "step": 4401
    },
    {
      "epoch": 2.6861572265625e-05,
      "step": 4401,
      "training_step_time": 0.45162129402160645
    },
    {
      "epoch": 2.686767578125e-05,
      "model_forward_time": 0.11466193199157715,
      "step": 4402
    },
    {
      "epoch": 2.686767578125e-05,
      "step": 4402,
      "training_step_time": 0.4142446517944336
    },
    {
      "epoch": 2.6873779296875e-05,
      "model_forward_time": 0.11478853225708008,
      "step": 4403
    },
    {
      "epoch": 2.6873779296875e-05,
      "step": 4403,
      "training_step_time": 0.3824326992034912
    },
    {
      "epoch": 2.68798828125e-05,
      "model_forward_time": 0.1145176887512207,
      "step": 4404
    },
    {
      "epoch": 2.68798828125e-05,
      "step": 4404,
      "training_step_time": 0.38367486000061035
    },
    {
      "epoch": 2.6885986328125e-05,
      "model_forward_time": 0.11446189880371094,
      "step": 4405
    },
    {
      "epoch": 2.6885986328125e-05,
      "step": 4405,
      "training_step_time": 0.4132728576660156
    },
    {
      "epoch": 2.689208984375e-05,
      "model_forward_time": 0.11534452438354492,
      "step": 4406
    },
    {
      "epoch": 2.689208984375e-05,
      "step": 4406,
      "training_step_time": 0.6498520374298096
    },
    {
      "epoch": 2.6898193359375e-05,
      "model_forward_time": 0.11505389213562012,
      "step": 4407
    },
    {
      "epoch": 2.6898193359375e-05,
      "step": 4407,
      "training_step_time": 0.3931005001068115
    },
    {
      "epoch": 2.6904296875e-05,
      "model_forward_time": 0.11447906494140625,
      "step": 4408
    },
    {
      "epoch": 2.6904296875e-05,
      "step": 4408,
      "training_step_time": 0.4724905490875244
    },
    {
      "epoch": 2.6910400390625e-05,
      "model_forward_time": 0.11505603790283203,
      "step": 4409
    },
    {
      "epoch": 2.6910400390625e-05,
      "step": 4409,
      "training_step_time": 0.41124534606933594
    },
    {
      "epoch": 2.691650390625e-05,
      "grad_norm": 0.2319435477256775,
      "learning_rate": 9.984909289536473e-05,
      "loss": 0.1045,
      "step": 4410
    },
    {
      "epoch": 2.691650390625e-05,
      "model_forward_time": 0.11507010459899902,
      "step": 4410
    },
    {
      "epoch": 2.691650390625e-05,
      "step": 4410,
      "training_step_time": 0.3898611068725586
    },
    {
      "epoch": 2.6922607421875e-05,
      "model_forward_time": 0.11462950706481934,
      "step": 4411
    },
    {
      "epoch": 2.6922607421875e-05,
      "step": 4411,
      "training_step_time": 0.41341137886047363
    },
    {
      "epoch": 2.69287109375e-05,
      "model_forward_time": 0.11625933647155762,
      "step": 4412
    },
    {
      "epoch": 2.69287109375e-05,
      "step": 4412,
      "training_step_time": 0.6245551109313965
    },
    {
      "epoch": 2.6934814453125e-05,
      "model_forward_time": 0.11455178260803223,
      "step": 4413
    },
    {
      "epoch": 2.6934814453125e-05,
      "step": 4413,
      "training_step_time": 0.37911510467529297
    },
    {
      "epoch": 2.694091796875e-05,
      "model_forward_time": 0.11516547203063965,
      "step": 4414
    },
    {
      "epoch": 2.694091796875e-05,
      "step": 4414,
      "training_step_time": 0.46617674827575684
    },
    {
      "epoch": 2.6947021484375e-05,
      "model_forward_time": 0.11546015739440918,
      "step": 4415
    },
    {
      "epoch": 2.6947021484375e-05,
      "step": 4415,
      "training_step_time": 0.450364351272583
    },
    {
      "epoch": 2.6953125e-05,
      "model_forward_time": 0.11496424674987793,
      "step": 4416
    },
    {
      "epoch": 2.6953125e-05,
      "step": 4416,
      "training_step_time": 0.4632296562194824
    },
    {
      "epoch": 2.6959228515625e-05,
      "model_forward_time": 0.11429262161254883,
      "step": 4417
    },
    {
      "epoch": 2.6959228515625e-05,
      "step": 4417,
      "training_step_time": 0.4216318130493164
    },
    {
      "epoch": 2.696533203125e-05,
      "model_forward_time": 0.11647629737854004,
      "step": 4418
    },
    {
      "epoch": 2.696533203125e-05,
      "step": 4418,
      "training_step_time": 0.4283580780029297
    },
    {
      "epoch": 2.6971435546875e-05,
      "model_forward_time": 0.11555290222167969,
      "step": 4419
    },
    {
      "epoch": 2.6971435546875e-05,
      "step": 4419,
      "training_step_time": 0.4310481548309326
    },
    {
      "epoch": 2.69775390625e-05,
      "grad_norm": 0.2948521077632904,
      "learning_rate": 9.984694587513298e-05,
      "loss": 0.1038,
      "step": 4420
    },
    {
      "epoch": 2.69775390625e-05,
      "model_forward_time": 0.11555743217468262,
      "step": 4420
    },
    {
      "epoch": 2.69775390625e-05,
      "step": 4420,
      "training_step_time": 0.3893089294433594
    },
    {
      "epoch": 2.6983642578125e-05,
      "model_forward_time": 0.11596512794494629,
      "step": 4421
    },
    {
      "epoch": 2.6983642578125e-05,
      "step": 4421,
      "training_step_time": 0.39259958267211914
    },
    {
      "epoch": 2.698974609375e-05,
      "model_forward_time": 0.11479330062866211,
      "step": 4422
    },
    {
      "epoch": 2.698974609375e-05,
      "step": 4422,
      "training_step_time": 0.4962646961212158
    },
    {
      "epoch": 2.6995849609375e-05,
      "model_forward_time": 0.11520624160766602,
      "step": 4423
    },
    {
      "epoch": 2.6995849609375e-05,
      "step": 4423,
      "training_step_time": 0.4581336975097656
    },
    {
      "epoch": 2.7001953125e-05,
      "model_forward_time": 0.11577820777893066,
      "step": 4424
    },
    {
      "epoch": 2.7001953125e-05,
      "step": 4424,
      "training_step_time": 0.6395623683929443
    },
    {
      "epoch": 2.7008056640625e-05,
      "model_forward_time": 0.11460518836975098,
      "step": 4425
    },
    {
      "epoch": 2.7008056640625e-05,
      "step": 4425,
      "training_step_time": 0.3872354030609131
    },
    {
      "epoch": 2.701416015625e-05,
      "model_forward_time": 0.11411881446838379,
      "step": 4426
    },
    {
      "epoch": 2.701416015625e-05,
      "step": 4426,
      "training_step_time": 0.38292813301086426
    },
    {
      "epoch": 2.7020263671875e-05,
      "model_forward_time": 0.11509227752685547,
      "step": 4427
    },
    {
      "epoch": 2.7020263671875e-05,
      "step": 4427,
      "training_step_time": 0.38555145263671875
    },
    {
      "epoch": 2.70263671875e-05,
      "model_forward_time": 0.1150662899017334,
      "step": 4428
    },
    {
      "epoch": 2.70263671875e-05,
      "step": 4428,
      "training_step_time": 0.47939538955688477
    },
    {
      "epoch": 2.7032470703125e-05,
      "model_forward_time": 0.11467933654785156,
      "step": 4429
    },
    {
      "epoch": 2.7032470703125e-05,
      "step": 4429,
      "training_step_time": 0.45555615425109863
    },
    {
      "epoch": 2.703857421875e-05,
      "grad_norm": 0.2474593073129654,
      "learning_rate": 9.984478371271521e-05,
      "loss": 0.0968,
      "step": 4430
    },
    {
      "epoch": 2.703857421875e-05,
      "model_forward_time": 0.11486554145812988,
      "step": 4430
    },
    {
      "epoch": 2.703857421875e-05,
      "step": 4430,
      "training_step_time": 0.987795352935791
    },
    {
      "epoch": 2.7044677734375e-05,
      "model_forward_time": 0.11385011672973633,
      "step": 4431
    },
    {
      "epoch": 2.7044677734375e-05,
      "step": 4431,
      "training_step_time": 0.4004194736480713
    },
    {
      "epoch": 2.705078125e-05,
      "model_forward_time": 0.11418676376342773,
      "step": 4432
    },
    {
      "epoch": 2.705078125e-05,
      "step": 4432,
      "training_step_time": 0.386322021484375
    },
    {
      "epoch": 2.7056884765625e-05,
      "model_forward_time": 0.11479544639587402,
      "step": 4433
    },
    {
      "epoch": 2.7056884765625e-05,
      "step": 4433,
      "training_step_time": 0.3907613754272461
    },
    {
      "epoch": 2.706298828125e-05,
      "model_forward_time": 0.11442065238952637,
      "step": 4434
    },
    {
      "epoch": 2.706298828125e-05,
      "step": 4434,
      "training_step_time": 0.38408756256103516
    },
    {
      "epoch": 2.7069091796875e-05,
      "model_forward_time": 0.11438632011413574,
      "step": 4435
    },
    {
      "epoch": 2.7069091796875e-05,
      "step": 4435,
      "training_step_time": 0.4647848606109619
    },
    {
      "epoch": 2.70751953125e-05,
      "model_forward_time": 0.11462616920471191,
      "step": 4436
    },
    {
      "epoch": 2.70751953125e-05,
      "step": 4436,
      "training_step_time": 0.5899472236633301
    },
    {
      "epoch": 2.7081298828125e-05,
      "model_forward_time": 0.11499929428100586,
      "step": 4437
    },
    {
      "epoch": 2.7081298828125e-05,
      "step": 4437,
      "training_step_time": 0.395190954208374
    },
    {
      "epoch": 2.708740234375e-05,
      "model_forward_time": 0.11514067649841309,
      "step": 4438
    },
    {
      "epoch": 2.708740234375e-05,
      "step": 4438,
      "training_step_time": 0.3865549564361572
    },
    {
      "epoch": 2.7093505859375e-05,
      "model_forward_time": 0.11485457420349121,
      "step": 4439
    },
    {
      "epoch": 2.7093505859375e-05,
      "step": 4439,
      "training_step_time": 0.386641263961792
    },
    {
      "epoch": 2.7099609375e-05,
      "grad_norm": 0.31704461574554443,
      "learning_rate": 9.984260640876821e-05,
      "loss": 0.1097,
      "step": 4440
    },
    {
      "epoch": 2.7099609375e-05,
      "model_forward_time": 0.11496996879577637,
      "step": 4440
    },
    {
      "epoch": 2.7099609375e-05,
      "step": 4440,
      "training_step_time": 0.39209413528442383
    },
    {
      "epoch": 2.7105712890625e-05,
      "model_forward_time": 0.11530542373657227,
      "step": 4441
    },
    {
      "epoch": 2.7105712890625e-05,
      "step": 4441,
      "training_step_time": 0.3642096519470215
    },
    {
      "epoch": 2.711181640625e-05,
      "model_forward_time": 0.11507296562194824,
      "step": 4442
    },
    {
      "epoch": 2.711181640625e-05,
      "step": 4442,
      "training_step_time": 0.8367125988006592
    },
    {
      "epoch": 2.7117919921875e-05,
      "model_forward_time": 0.11461925506591797,
      "step": 4443
    },
    {
      "epoch": 2.7117919921875e-05,
      "step": 4443,
      "training_step_time": 0.45081305503845215
    },
    {
      "epoch": 2.71240234375e-05,
      "model_forward_time": 0.1144723892211914,
      "step": 4444
    },
    {
      "epoch": 2.71240234375e-05,
      "step": 4444,
      "training_step_time": 0.41928625106811523
    },
    {
      "epoch": 2.7130126953125e-05,
      "model_forward_time": 0.11459517478942871,
      "step": 4445
    },
    {
      "epoch": 2.7130126953125e-05,
      "step": 4445,
      "training_step_time": 0.39589643478393555
    },
    {
      "epoch": 2.713623046875e-05,
      "model_forward_time": 0.11527466773986816,
      "step": 4446
    },
    {
      "epoch": 2.713623046875e-05,
      "step": 4446,
      "training_step_time": 0.3755760192871094
    },
    {
      "epoch": 2.7142333984375e-05,
      "model_forward_time": 0.11481952667236328,
      "step": 4447
    },
    {
      "epoch": 2.7142333984375e-05,
      "step": 4447,
      "training_step_time": 0.3949306011199951
    },
    {
      "epoch": 2.71484375e-05,
      "model_forward_time": 0.11504173278808594,
      "step": 4448
    },
    {
      "epoch": 2.71484375e-05,
      "step": 4448,
      "training_step_time": 0.9239208698272705
    },
    {
      "epoch": 2.7154541015625e-05,
      "model_forward_time": 0.11431717872619629,
      "step": 4449
    },
    {
      "epoch": 2.7154541015625e-05,
      "step": 4449,
      "training_step_time": 0.42261314392089844
    },
    {
      "epoch": 2.716064453125e-05,
      "grad_norm": 0.2659796178340912,
      "learning_rate": 9.984041396395343e-05,
      "loss": 0.0967,
      "step": 4450
    },
    {
      "epoch": 2.716064453125e-05,
      "model_forward_time": 0.11454987525939941,
      "step": 4450
    },
    {
      "epoch": 2.716064453125e-05,
      "step": 4450,
      "training_step_time": 0.3923373222351074
    },
    {
      "epoch": 2.7166748046875e-05,
      "model_forward_time": 0.11392712593078613,
      "step": 4451
    },
    {
      "epoch": 2.7166748046875e-05,
      "step": 4451,
      "training_step_time": 0.39511632919311523
    },
    {
      "epoch": 2.71728515625e-05,
      "model_forward_time": 0.11444830894470215,
      "step": 4452
    },
    {
      "epoch": 2.71728515625e-05,
      "step": 4452,
      "training_step_time": 0.38573622703552246
    },
    {
      "epoch": 2.7178955078125e-05,
      "model_forward_time": 0.11433959007263184,
      "step": 4453
    },
    {
      "epoch": 2.7178955078125e-05,
      "step": 4453,
      "training_step_time": 0.38651323318481445
    },
    {
      "epoch": 2.718505859375e-05,
      "model_forward_time": 0.11531400680541992,
      "step": 4454
    },
    {
      "epoch": 2.718505859375e-05,
      "step": 4454,
      "training_step_time": 0.6759169101715088
    },
    {
      "epoch": 2.7191162109375e-05,
      "model_forward_time": 0.11413455009460449,
      "step": 4455
    },
    {
      "epoch": 2.7191162109375e-05,
      "step": 4455,
      "training_step_time": 0.4197962284088135
    },
    {
      "epoch": 2.7197265625e-05,
      "model_forward_time": 0.11470770835876465,
      "step": 4456
    },
    {
      "epoch": 2.7197265625e-05,
      "step": 4456,
      "training_step_time": 0.4651966094970703
    },
    {
      "epoch": 2.7203369140625e-05,
      "model_forward_time": 0.11453080177307129,
      "step": 4457
    },
    {
      "epoch": 2.7203369140625e-05,
      "step": 4457,
      "training_step_time": 0.38206958770751953
    },
    {
      "epoch": 2.720947265625e-05,
      "model_forward_time": 0.1153714656829834,
      "step": 4458
    },
    {
      "epoch": 2.720947265625e-05,
      "step": 4458,
      "training_step_time": 0.38817644119262695
    },
    {
      "epoch": 2.7215576171875e-05,
      "model_forward_time": 0.11474990844726562,
      "step": 4459
    },
    {
      "epoch": 2.7215576171875e-05,
      "step": 4459,
      "training_step_time": 0.3971133232116699
    },
    {
      "epoch": 2.72216796875e-05,
      "grad_norm": 0.29522767663002014,
      "learning_rate": 9.983820637893681e-05,
      "loss": 0.1041,
      "step": 4460
    },
    {
      "epoch": 2.72216796875e-05,
      "model_forward_time": 0.11482405662536621,
      "step": 4460
    },
    {
      "epoch": 2.72216796875e-05,
      "step": 4460,
      "training_step_time": 0.6897938251495361
    },
    {
      "epoch": 2.7227783203125e-05,
      "model_forward_time": 0.11478877067565918,
      "step": 4461
    },
    {
      "epoch": 2.7227783203125e-05,
      "step": 4461,
      "training_step_time": 0.3911440372467041
    },
    {
      "epoch": 2.723388671875e-05,
      "model_forward_time": 0.11507177352905273,
      "step": 4462
    },
    {
      "epoch": 2.723388671875e-05,
      "step": 4462,
      "training_step_time": 0.38261914253234863
    },
    {
      "epoch": 2.7239990234375e-05,
      "model_forward_time": 0.11529803276062012,
      "step": 4463
    },
    {
      "epoch": 2.7239990234375e-05,
      "step": 4463,
      "training_step_time": 0.45073604583740234
    },
    {
      "epoch": 2.724609375e-05,
      "model_forward_time": 0.11495089530944824,
      "step": 4464
    },
    {
      "epoch": 2.724609375e-05,
      "step": 4464,
      "training_step_time": 0.3879971504211426
    },
    {
      "epoch": 2.7252197265625e-05,
      "model_forward_time": 0.11478042602539062,
      "step": 4465
    },
    {
      "epoch": 2.7252197265625e-05,
      "step": 4465,
      "training_step_time": 0.39302492141723633
    },
    {
      "epoch": 2.725830078125e-05,
      "model_forward_time": 0.11465573310852051,
      "step": 4466
    },
    {
      "epoch": 2.725830078125e-05,
      "step": 4466,
      "training_step_time": 0.7548813819885254
    },
    {
      "epoch": 2.7264404296875e-05,
      "model_forward_time": 0.11427545547485352,
      "step": 4467
    },
    {
      "epoch": 2.7264404296875e-05,
      "step": 4467,
      "training_step_time": 0.39026737213134766
    },
    {
      "epoch": 2.72705078125e-05,
      "model_forward_time": 0.11469435691833496,
      "step": 4468
    },
    {
      "epoch": 2.72705078125e-05,
      "step": 4468,
      "training_step_time": 0.45819735527038574
    },
    {
      "epoch": 2.7276611328125e-05,
      "model_forward_time": 0.11445045471191406,
      "step": 4469
    },
    {
      "epoch": 2.7276611328125e-05,
      "step": 4469,
      "training_step_time": 0.47605323791503906
    },
    {
      "epoch": 2.728271484375e-05,
      "grad_norm": 0.23451142013072968,
      "learning_rate": 9.983598365438902e-05,
      "loss": 0.1041,
      "step": 4470
    },
    {
      "epoch": 2.728271484375e-05,
      "model_forward_time": 0.11491203308105469,
      "step": 4470
    },
    {
      "epoch": 2.728271484375e-05,
      "step": 4470,
      "training_step_time": 0.4949164390563965
    },
    {
      "epoch": 2.7288818359375e-05,
      "model_forward_time": 0.1143946647644043,
      "step": 4471
    },
    {
      "epoch": 2.7288818359375e-05,
      "step": 4471,
      "training_step_time": 0.38596510887145996
    },
    {
      "epoch": 2.7294921875e-05,
      "model_forward_time": 0.11532211303710938,
      "step": 4472
    },
    {
      "epoch": 2.7294921875e-05,
      "step": 4472,
      "training_step_time": 0.397233247756958
    },
    {
      "epoch": 2.7301025390625e-05,
      "model_forward_time": 0.11445116996765137,
      "step": 4473
    },
    {
      "epoch": 2.7301025390625e-05,
      "step": 4473,
      "training_step_time": 0.3897840976715088
    },
    {
      "epoch": 2.730712890625e-05,
      "model_forward_time": 0.11566472053527832,
      "step": 4474
    },
    {
      "epoch": 2.730712890625e-05,
      "step": 4474,
      "training_step_time": 0.3880758285522461
    },
    {
      "epoch": 2.7313232421875e-05,
      "model_forward_time": 0.1150517463684082,
      "step": 4475
    },
    {
      "epoch": 2.7313232421875e-05,
      "step": 4475,
      "training_step_time": 0.4375183582305908
    },
    {
      "epoch": 2.73193359375e-05,
      "model_forward_time": 0.11559581756591797,
      "step": 4476
    },
    {
      "epoch": 2.73193359375e-05,
      "step": 4476,
      "training_step_time": 0.5056540966033936
    },
    {
      "epoch": 2.7325439453125e-05,
      "model_forward_time": 0.1152350902557373,
      "step": 4477
    },
    {
      "epoch": 2.7325439453125e-05,
      "step": 4477,
      "training_step_time": 0.42941904067993164
    },
    {
      "epoch": 2.733154296875e-05,
      "model_forward_time": 0.11675095558166504,
      "step": 4478
    },
    {
      "epoch": 2.733154296875e-05,
      "step": 4478,
      "training_step_time": 0.39281582832336426
    },
    {
      "epoch": 2.7337646484375e-05,
      "model_forward_time": 0.11496376991271973,
      "step": 4479
    },
    {
      "epoch": 2.7337646484375e-05,
      "step": 4479,
      "training_step_time": 0.40573573112487793
    },
    {
      "epoch": 2.734375e-05,
      "grad_norm": 0.24403370916843414,
      "learning_rate": 9.983374579098523e-05,
      "loss": 0.099,
      "step": 4480
    },
    {
      "epoch": 2.734375e-05,
      "model_forward_time": 0.11562395095825195,
      "step": 4480
    },
    {
      "epoch": 2.734375e-05,
      "step": 4480,
      "training_step_time": 0.386627197265625
    },
    {
      "epoch": 2.7349853515625e-05,
      "model_forward_time": 0.11568903923034668,
      "step": 4481
    },
    {
      "epoch": 2.7349853515625e-05,
      "step": 4481,
      "training_step_time": 0.4522557258605957
    },
    {
      "epoch": 2.735595703125e-05,
      "model_forward_time": 0.11474251747131348,
      "step": 4482
    },
    {
      "epoch": 2.735595703125e-05,
      "step": 4482,
      "training_step_time": 0.36278438568115234
    },
    {
      "epoch": 2.7362060546875e-05,
      "model_forward_time": 0.1149907112121582,
      "step": 4483
    },
    {
      "epoch": 2.7362060546875e-05,
      "step": 4483,
      "training_step_time": 0.46731996536254883
    },
    {
      "epoch": 2.73681640625e-05,
      "model_forward_time": 0.11548781394958496,
      "step": 4484
    },
    {
      "epoch": 2.73681640625e-05,
      "step": 4484,
      "training_step_time": 0.5128805637359619
    },
    {
      "epoch": 2.7374267578125e-05,
      "model_forward_time": 0.11498832702636719,
      "step": 4485
    },
    {
      "epoch": 2.7374267578125e-05,
      "step": 4485,
      "training_step_time": 0.391193151473999
    },
    {
      "epoch": 2.738037109375e-05,
      "model_forward_time": 0.11482763290405273,
      "step": 4486
    },
    {
      "epoch": 2.738037109375e-05,
      "step": 4486,
      "training_step_time": 0.3801748752593994
    },
    {
      "epoch": 2.7386474609375e-05,
      "model_forward_time": 0.1155860424041748,
      "step": 4487
    },
    {
      "epoch": 2.7386474609375e-05,
      "step": 4487,
      "training_step_time": 0.3894340991973877
    },
    {
      "epoch": 2.7392578125e-05,
      "model_forward_time": 0.11543703079223633,
      "step": 4488
    },
    {
      "epoch": 2.7392578125e-05,
      "step": 4488,
      "training_step_time": 0.3863234519958496
    },
    {
      "epoch": 2.7398681640625e-05,
      "model_forward_time": 0.1150200366973877,
      "step": 4489
    },
    {
      "epoch": 2.7398681640625e-05,
      "step": 4489,
      "training_step_time": 0.39898061752319336
    },
    {
      "epoch": 2.740478515625e-05,
      "grad_norm": 0.25155526399612427,
      "learning_rate": 9.983149278940526e-05,
      "loss": 0.1026,
      "step": 4490
    },
    {
      "epoch": 2.740478515625e-05,
      "model_forward_time": 0.1147618293762207,
      "step": 4490
    },
    {
      "epoch": 2.740478515625e-05,
      "step": 4490,
      "training_step_time": 0.5752675533294678
    },
    {
      "epoch": 2.7410888671875e-05,
      "model_forward_time": 0.11493396759033203,
      "step": 4491
    },
    {
      "epoch": 2.7410888671875e-05,
      "step": 4491,
      "training_step_time": 0.4164860248565674
    },
    {
      "epoch": 2.74169921875e-05,
      "model_forward_time": 0.11531949043273926,
      "step": 4492
    },
    {
      "epoch": 2.74169921875e-05,
      "step": 4492,
      "training_step_time": 0.38894081115722656
    },
    {
      "epoch": 2.7423095703125e-05,
      "model_forward_time": 0.1151418685913086,
      "step": 4493
    },
    {
      "epoch": 2.7423095703125e-05,
      "step": 4493,
      "training_step_time": 0.393296480178833
    },
    {
      "epoch": 2.742919921875e-05,
      "model_forward_time": 0.11585164070129395,
      "step": 4494
    },
    {
      "epoch": 2.742919921875e-05,
      "step": 4494,
      "training_step_time": 0.392780065536499
    },
    {
      "epoch": 2.7435302734375e-05,
      "model_forward_time": 0.11588478088378906,
      "step": 4495
    },
    {
      "epoch": 2.7435302734375e-05,
      "step": 4495,
      "training_step_time": 0.4472942352294922
    },
    {
      "epoch": 2.744140625e-05,
      "model_forward_time": 0.11539030075073242,
      "step": 4496
    },
    {
      "epoch": 2.744140625e-05,
      "step": 4496,
      "training_step_time": 0.5750057697296143
    },
    {
      "epoch": 2.7447509765625e-05,
      "model_forward_time": 0.11477828025817871,
      "step": 4497
    },
    {
      "epoch": 2.7447509765625e-05,
      "step": 4497,
      "training_step_time": 0.45471668243408203
    },
    {
      "epoch": 2.745361328125e-05,
      "model_forward_time": 0.11496663093566895,
      "step": 4498
    },
    {
      "epoch": 2.745361328125e-05,
      "step": 4498,
      "training_step_time": 0.4487781524658203
    },
    {
      "epoch": 2.7459716796875e-05,
      "model_forward_time": 0.11663365364074707,
      "step": 4499
    },
    {
      "epoch": 2.7459716796875e-05,
      "step": 4499,
      "training_step_time": 0.4231107234954834
    },
    {
      "epoch": 2.74658203125e-05,
      "grad_norm": 0.31057366728782654,
      "learning_rate": 9.98292246503335e-05,
      "loss": 0.098,
      "step": 4500
    },
    {
      "epoch": 2.74658203125e-05,
      "model_forward_time": 0.11411380767822266,
      "step": 4500
    },
    {
      "epoch": 2.74658203125e-05,
      "step": 4500,
      "training_step_time": 0.3951244354248047
    },
    {
      "epoch": 2.7471923828125e-05,
      "model_forward_time": 0.11469769477844238,
      "step": 4501
    },
    {
      "epoch": 2.7471923828125e-05,
      "step": 4501,
      "training_step_time": 0.38841986656188965
    },
    {
      "epoch": 2.747802734375e-05,
      "model_forward_time": 0.11452293395996094,
      "step": 4502
    },
    {
      "epoch": 2.747802734375e-05,
      "step": 4502,
      "training_step_time": 0.39997124671936035
    },
    {
      "epoch": 2.7484130859375e-05,
      "model_forward_time": 0.11578488349914551,
      "step": 4503
    },
    {
      "epoch": 2.7484130859375e-05,
      "step": 4503,
      "training_step_time": 0.3892829418182373
    },
    {
      "epoch": 2.7490234375e-05,
      "model_forward_time": 0.11703872680664062,
      "step": 4504
    },
    {
      "epoch": 2.7490234375e-05,
      "step": 4504,
      "training_step_time": 0.38925886154174805
    },
    {
      "epoch": 2.7496337890625e-05,
      "model_forward_time": 0.11522293090820312,
      "step": 4505
    },
    {
      "epoch": 2.7496337890625e-05,
      "step": 4505,
      "training_step_time": 0.46866559982299805
    },
    {
      "epoch": 2.750244140625e-05,
      "model_forward_time": 0.11521673202514648,
      "step": 4506
    },
    {
      "epoch": 2.750244140625e-05,
      "step": 4506,
      "training_step_time": 0.39913177490234375
    },
    {
      "epoch": 2.7508544921875e-05,
      "model_forward_time": 0.11497879028320312,
      "step": 4507
    },
    {
      "epoch": 2.7508544921875e-05,
      "step": 4507,
      "training_step_time": 0.39523839950561523
    },
    {
      "epoch": 2.75146484375e-05,
      "model_forward_time": 0.11527156829833984,
      "step": 4508
    },
    {
      "epoch": 2.75146484375e-05,
      "step": 4508,
      "training_step_time": 0.6479308605194092
    },
    {
      "epoch": 2.7520751953125e-05,
      "model_forward_time": 0.11472415924072266,
      "step": 4509
    },
    {
      "epoch": 2.7520751953125e-05,
      "step": 4509,
      "training_step_time": 0.41919541358947754
    },
    {
      "epoch": 2.752685546875e-05,
      "grad_norm": 0.40936192870140076,
      "learning_rate": 9.982694137445896e-05,
      "loss": 0.1087,
      "step": 4510
    },
    {
      "epoch": 2.752685546875e-05,
      "model_forward_time": 0.11485695838928223,
      "step": 4510
    },
    {
      "epoch": 2.752685546875e-05,
      "step": 4510,
      "training_step_time": 0.3876957893371582
    },
    {
      "epoch": 2.7532958984375e-05,
      "model_forward_time": 0.11513710021972656,
      "step": 4511
    },
    {
      "epoch": 2.7532958984375e-05,
      "step": 4511,
      "training_step_time": 0.4788506031036377
    },
    {
      "epoch": 2.75390625e-05,
      "model_forward_time": 0.1153266429901123,
      "step": 4512
    },
    {
      "epoch": 2.75390625e-05,
      "step": 4512,
      "training_step_time": 0.48986005783081055
    },
    {
      "epoch": 2.7545166015625e-05,
      "model_forward_time": 0.11542844772338867,
      "step": 4513
    },
    {
      "epoch": 2.7545166015625e-05,
      "step": 4513,
      "training_step_time": 0.4914119243621826
    },
    {
      "epoch": 2.755126953125e-05,
      "model_forward_time": 0.11530327796936035,
      "step": 4514
    },
    {
      "epoch": 2.755126953125e-05,
      "step": 4514,
      "training_step_time": 0.396695613861084
    },
    {
      "epoch": 2.7557373046875e-05,
      "model_forward_time": 0.11452102661132812,
      "step": 4515
    },
    {
      "epoch": 2.7557373046875e-05,
      "step": 4515,
      "training_step_time": 0.39044785499572754
    },
    {
      "epoch": 2.75634765625e-05,
      "model_forward_time": 0.11484527587890625,
      "step": 4516
    },
    {
      "epoch": 2.75634765625e-05,
      "step": 4516,
      "training_step_time": 0.38501715660095215
    },
    {
      "epoch": 2.7569580078125e-05,
      "model_forward_time": 0.11552286148071289,
      "step": 4517
    },
    {
      "epoch": 2.7569580078125e-05,
      "step": 4517,
      "training_step_time": 0.4095041751861572
    },
    {
      "epoch": 2.757568359375e-05,
      "model_forward_time": 0.1150507926940918,
      "step": 4518
    },
    {
      "epoch": 2.757568359375e-05,
      "step": 4518,
      "training_step_time": 0.414858341217041
    },
    {
      "epoch": 2.7581787109375e-05,
      "model_forward_time": 0.11567950248718262,
      "step": 4519
    },
    {
      "epoch": 2.7581787109375e-05,
      "step": 4519,
      "training_step_time": 0.4023737907409668
    },
    {
      "epoch": 2.7587890625e-05,
      "grad_norm": 0.2868102788925171,
      "learning_rate": 9.982464296247522e-05,
      "loss": 0.1003,
      "step": 4520
    },
    {
      "epoch": 2.7587890625e-05,
      "model_forward_time": 0.11539053916931152,
      "step": 4520
    },
    {
      "epoch": 2.7587890625e-05,
      "step": 4520,
      "training_step_time": 0.39888787269592285
    },
    {
      "epoch": 2.7593994140625e-05,
      "model_forward_time": 0.11565446853637695,
      "step": 4521
    },
    {
      "epoch": 2.7593994140625e-05,
      "step": 4521,
      "training_step_time": 0.395719051361084
    },
    {
      "epoch": 2.760009765625e-05,
      "model_forward_time": 0.11509275436401367,
      "step": 4522
    },
    {
      "epoch": 2.760009765625e-05,
      "step": 4522,
      "training_step_time": 0.4295475482940674
    },
    {
      "epoch": 2.7606201171875e-05,
      "model_forward_time": 0.11530542373657227,
      "step": 4523
    },
    {
      "epoch": 2.7606201171875e-05,
      "step": 4523,
      "training_step_time": 0.39048123359680176
    },
    {
      "epoch": 2.76123046875e-05,
      "model_forward_time": 0.11517739295959473,
      "step": 4524
    },
    {
      "epoch": 2.76123046875e-05,
      "step": 4524,
      "training_step_time": 0.39302897453308105
    },
    {
      "epoch": 2.7618408203125e-05,
      "model_forward_time": 0.11539626121520996,
      "step": 4525
    },
    {
      "epoch": 2.7618408203125e-05,
      "step": 4525,
      "training_step_time": 0.5497291088104248
    },
    {
      "epoch": 2.762451171875e-05,
      "model_forward_time": 0.1145470142364502,
      "step": 4526
    },
    {
      "epoch": 2.762451171875e-05,
      "step": 4526,
      "training_step_time": 0.4871649742126465
    },
    {
      "epoch": 2.7630615234375e-05,
      "model_forward_time": 0.11480569839477539,
      "step": 4527
    },
    {
      "epoch": 2.7630615234375e-05,
      "step": 4527,
      "training_step_time": 0.48765063285827637
    },
    {
      "epoch": 2.763671875e-05,
      "model_forward_time": 0.11456704139709473,
      "step": 4528
    },
    {
      "epoch": 2.763671875e-05,
      "step": 4528,
      "training_step_time": 0.3911774158477783
    },
    {
      "epoch": 2.7642822265625e-05,
      "model_forward_time": 0.1148843765258789,
      "step": 4529
    },
    {
      "epoch": 2.7642822265625e-05,
      "step": 4529,
      "training_step_time": 0.4008636474609375
    },
    {
      "epoch": 2.764892578125e-05,
      "grad_norm": 0.37852129340171814,
      "learning_rate": 9.98223294150805e-05,
      "loss": 0.1039,
      "step": 4530
    },
    {
      "epoch": 2.764892578125e-05,
      "model_forward_time": 0.11394691467285156,
      "step": 4530
    },
    {
      "epoch": 2.764892578125e-05,
      "step": 4530,
      "training_step_time": 0.3831827640533447
    },
    {
      "epoch": 2.7655029296875e-05,
      "model_forward_time": 0.1154637336730957,
      "step": 4531
    },
    {
      "epoch": 2.7655029296875e-05,
      "step": 4531,
      "training_step_time": 0.8918213844299316
    },
    {
      "epoch": 2.76611328125e-05,
      "model_forward_time": 0.11473536491394043,
      "step": 4532
    },
    {
      "epoch": 2.76611328125e-05,
      "step": 4532,
      "training_step_time": 0.38175368309020996
    },
    {
      "epoch": 2.7667236328125e-05,
      "model_forward_time": 0.11382293701171875,
      "step": 4533
    },
    {
      "epoch": 2.7667236328125e-05,
      "step": 4533,
      "training_step_time": 0.3916802406311035
    },
    {
      "epoch": 2.767333984375e-05,
      "model_forward_time": 0.1146998405456543,
      "step": 4534
    },
    {
      "epoch": 2.767333984375e-05,
      "step": 4534,
      "training_step_time": 0.44121718406677246
    },
    {
      "epoch": 2.7679443359375e-05,
      "model_forward_time": 0.11408567428588867,
      "step": 4535
    },
    {
      "epoch": 2.7679443359375e-05,
      "step": 4535,
      "training_step_time": 0.40101027488708496
    },
    {
      "epoch": 2.7685546875e-05,
      "model_forward_time": 0.11473631858825684,
      "step": 4536
    },
    {
      "epoch": 2.7685546875e-05,
      "step": 4536,
      "training_step_time": 0.39501380920410156
    },
    {
      "epoch": 2.7691650390625e-05,
      "model_forward_time": 0.11447000503540039,
      "step": 4537
    },
    {
      "epoch": 2.7691650390625e-05,
      "step": 4537,
      "training_step_time": 0.8627216815948486
    },
    {
      "epoch": 2.769775390625e-05,
      "model_forward_time": 0.11512565612792969,
      "step": 4538
    },
    {
      "epoch": 2.769775390625e-05,
      "step": 4538,
      "training_step_time": 0.4393036365509033
    },
    {
      "epoch": 2.7703857421875e-05,
      "model_forward_time": 0.11499834060668945,
      "step": 4539
    },
    {
      "epoch": 2.7703857421875e-05,
      "step": 4539,
      "training_step_time": 0.46208763122558594
    },
    {
      "epoch": 2.77099609375e-05,
      "grad_norm": 0.266767680644989,
      "learning_rate": 9.982000073297759e-05,
      "loss": 0.0955,
      "step": 4540
    },
    {
      "epoch": 2.77099609375e-05,
      "model_forward_time": 0.11506009101867676,
      "step": 4540
    },
    {
      "epoch": 2.77099609375e-05,
      "step": 4540,
      "training_step_time": 0.5249660015106201
    },
    {
      "epoch": 2.7716064453125e-05,
      "model_forward_time": 0.11473798751831055,
      "step": 4541
    },
    {
      "epoch": 2.7716064453125e-05,
      "step": 4541,
      "training_step_time": 0.3780937194824219
    },
    {
      "epoch": 2.772216796875e-05,
      "model_forward_time": 0.11405467987060547,
      "step": 4542
    },
    {
      "epoch": 2.772216796875e-05,
      "step": 4542,
      "training_step_time": 0.38272595405578613
    },
    {
      "epoch": 2.7728271484375e-05,
      "model_forward_time": 0.11449980735778809,
      "step": 4543
    },
    {
      "epoch": 2.7728271484375e-05,
      "step": 4543,
      "training_step_time": 0.7062587738037109
    },
    {
      "epoch": 2.7734375e-05,
      "model_forward_time": 0.11400079727172852,
      "step": 4544
    },
    {
      "epoch": 2.7734375e-05,
      "step": 4544,
      "training_step_time": 0.45757293701171875
    },
    {
      "epoch": 2.7740478515625e-05,
      "model_forward_time": 0.11434507369995117,
      "step": 4545
    },
    {
      "epoch": 2.7740478515625e-05,
      "step": 4545,
      "training_step_time": 0.42716193199157715
    },
    {
      "epoch": 2.774658203125e-05,
      "model_forward_time": 0.11432480812072754,
      "step": 4546
    },
    {
      "epoch": 2.774658203125e-05,
      "step": 4546,
      "training_step_time": 0.41464853286743164
    },
    {
      "epoch": 2.7752685546875e-05,
      "model_forward_time": 0.11429071426391602,
      "step": 4547
    },
    {
      "epoch": 2.7752685546875e-05,
      "step": 4547,
      "training_step_time": 0.4006650447845459
    },
    {
      "epoch": 2.77587890625e-05,
      "model_forward_time": 0.11450004577636719,
      "step": 4548
    },
    {
      "epoch": 2.77587890625e-05,
      "step": 4548,
      "training_step_time": 0.3837270736694336
    },
    {
      "epoch": 2.7764892578125e-05,
      "model_forward_time": 0.11540532112121582,
      "step": 4549
    },
    {
      "epoch": 2.7764892578125e-05,
      "step": 4549,
      "training_step_time": 0.7051963806152344
    },
    {
      "epoch": 2.777099609375e-05,
      "grad_norm": 0.2866908311843872,
      "learning_rate": 9.981765691687388e-05,
      "loss": 0.093,
      "step": 4550
    },
    {
      "epoch": 2.777099609375e-05,
      "model_forward_time": 0.1148383617401123,
      "step": 4550
    },
    {
      "epoch": 2.777099609375e-05,
      "step": 4550,
      "training_step_time": 0.3743915557861328
    },
    {
      "epoch": 2.7777099609375e-05,
      "model_forward_time": 0.11450839042663574,
      "step": 4551
    },
    {
      "epoch": 2.7777099609375e-05,
      "step": 4551,
      "training_step_time": 0.3983500003814697
    },
    {
      "epoch": 2.7783203125e-05,
      "model_forward_time": 0.11449933052062988,
      "step": 4552
    },
    {
      "epoch": 2.7783203125e-05,
      "step": 4552,
      "training_step_time": 0.47612929344177246
    },
    {
      "epoch": 2.7789306640625e-05,
      "model_forward_time": 0.11449241638183594,
      "step": 4553
    },
    {
      "epoch": 2.7789306640625e-05,
      "step": 4553,
      "training_step_time": 0.4482731819152832
    },
    {
      "epoch": 2.779541015625e-05,
      "model_forward_time": 0.1146082878112793,
      "step": 4554
    },
    {
      "epoch": 2.779541015625e-05,
      "step": 4554,
      "training_step_time": 0.42169690132141113
    },
    {
      "epoch": 2.7801513671875e-05,
      "model_forward_time": 0.11495637893676758,
      "step": 4555
    },
    {
      "epoch": 2.7801513671875e-05,
      "step": 4555,
      "training_step_time": 0.8416152000427246
    },
    {
      "epoch": 2.78076171875e-05,
      "model_forward_time": 0.11449670791625977,
      "step": 4556
    },
    {
      "epoch": 2.78076171875e-05,
      "step": 4556,
      "training_step_time": 0.3727867603302002
    },
    {
      "epoch": 2.7813720703125e-05,
      "model_forward_time": 0.11482906341552734,
      "step": 4557
    },
    {
      "epoch": 2.7813720703125e-05,
      "step": 4557,
      "training_step_time": 0.4207189083099365
    },
    {
      "epoch": 2.781982421875e-05,
      "model_forward_time": 0.11511707305908203,
      "step": 4558
    },
    {
      "epoch": 2.781982421875e-05,
      "step": 4558,
      "training_step_time": 0.40683770179748535
    },
    {
      "epoch": 2.7825927734375e-05,
      "model_forward_time": 0.11431503295898438,
      "step": 4559
    },
    {
      "epoch": 2.7825927734375e-05,
      "step": 4559,
      "training_step_time": 0.40758371353149414
    },
    {
      "epoch": 2.783203125e-05,
      "grad_norm": 0.29975035786628723,
      "learning_rate": 9.981529796748134e-05,
      "loss": 0.0971,
      "step": 4560
    },
    {
      "epoch": 2.783203125e-05,
      "model_forward_time": 0.1143503189086914,
      "step": 4560
    },
    {
      "epoch": 2.783203125e-05,
      "step": 4560,
      "training_step_time": 0.39686059951782227
    },
    {
      "epoch": 2.7838134765625e-05,
      "model_forward_time": 0.11522412300109863,
      "step": 4561
    },
    {
      "epoch": 2.7838134765625e-05,
      "step": 4561,
      "training_step_time": 1.0826141834259033
    },
    {
      "epoch": 2.784423828125e-05,
      "model_forward_time": 0.1141207218170166,
      "step": 4562
    },
    {
      "epoch": 2.784423828125e-05,
      "step": 4562,
      "training_step_time": 0.3880879878997803
    },
    {
      "epoch": 2.7850341796875e-05,
      "model_forward_time": 0.11423945426940918,
      "step": 4563
    },
    {
      "epoch": 2.7850341796875e-05,
      "step": 4563,
      "training_step_time": 0.36110782623291016
    },
    {
      "epoch": 2.78564453125e-05,
      "model_forward_time": 0.11414384841918945,
      "step": 4564
    },
    {
      "epoch": 2.78564453125e-05,
      "step": 4564,
      "training_step_time": 0.4097630977630615
    },
    {
      "epoch": 2.7862548828125e-05,
      "model_forward_time": 0.11389422416687012,
      "step": 4565
    },
    {
      "epoch": 2.7862548828125e-05,
      "step": 4565,
      "training_step_time": 0.4482851028442383
    },
    {
      "epoch": 2.786865234375e-05,
      "model_forward_time": 0.11418509483337402,
      "step": 4566
    },
    {
      "epoch": 2.786865234375e-05,
      "step": 4566,
      "training_step_time": 0.45610737800598145
    },
    {
      "epoch": 2.7874755859375e-05,
      "model_forward_time": 0.11504340171813965,
      "step": 4567
    },
    {
      "epoch": 2.7874755859375e-05,
      "step": 4567,
      "training_step_time": 0.40288472175598145
    },
    {
      "epoch": 2.7880859375e-05,
      "model_forward_time": 0.11435198783874512,
      "step": 4568
    },
    {
      "epoch": 2.7880859375e-05,
      "step": 4568,
      "training_step_time": 0.41080641746520996
    },
    {
      "epoch": 2.7886962890625e-05,
      "model_forward_time": 0.11533474922180176,
      "step": 4569
    },
    {
      "epoch": 2.7886962890625e-05,
      "step": 4569,
      "training_step_time": 0.3844316005706787
    },
    {
      "epoch": 2.789306640625e-05,
      "grad_norm": 0.39472368359565735,
      "learning_rate": 9.981292388551659e-05,
      "loss": 0.0999,
      "step": 4570
    },
    {
      "epoch": 2.789306640625e-05,
      "model_forward_time": 0.11522078514099121,
      "step": 4570
    },
    {
      "epoch": 2.789306640625e-05,
      "step": 4570,
      "training_step_time": 0.4788830280303955
    },
    {
      "epoch": 2.7899169921875e-05,
      "model_forward_time": 0.11449003219604492,
      "step": 4571
    },
    {
      "epoch": 2.7899169921875e-05,
      "step": 4571,
      "training_step_time": 0.45989155769348145
    },
    {
      "epoch": 2.79052734375e-05,
      "model_forward_time": 0.11515617370605469,
      "step": 4572
    },
    {
      "epoch": 2.79052734375e-05,
      "step": 4572,
      "training_step_time": 0.3964829444885254
    },
    {
      "epoch": 2.7911376953125e-05,
      "model_forward_time": 0.11446571350097656,
      "step": 4573
    },
    {
      "epoch": 2.7911376953125e-05,
      "step": 4573,
      "training_step_time": 0.40108346939086914
    },
    {
      "epoch": 2.791748046875e-05,
      "model_forward_time": 0.11507654190063477,
      "step": 4574
    },
    {
      "epoch": 2.791748046875e-05,
      "step": 4574,
      "training_step_time": 0.395615816116333
    },
    {
      "epoch": 2.7923583984375e-05,
      "model_forward_time": 0.11510276794433594,
      "step": 4575
    },
    {
      "epoch": 2.7923583984375e-05,
      "step": 4575,
      "training_step_time": 0.38936948776245117
    },
    {
      "epoch": 2.79296875e-05,
      "model_forward_time": 0.11496162414550781,
      "step": 4576
    },
    {
      "epoch": 2.79296875e-05,
      "step": 4576,
      "training_step_time": 0.39493775367736816
    },
    {
      "epoch": 2.7935791015625e-05,
      "model_forward_time": 0.11477208137512207,
      "step": 4577
    },
    {
      "epoch": 2.7935791015625e-05,
      "step": 4577,
      "training_step_time": 0.394284725189209
    },
    {
      "epoch": 2.794189453125e-05,
      "model_forward_time": 0.1152956485748291,
      "step": 4578
    },
    {
      "epoch": 2.794189453125e-05,
      "step": 4578,
      "training_step_time": 0.36919641494750977
    },
    {
      "epoch": 2.7947998046875e-05,
      "model_forward_time": 0.11505842208862305,
      "step": 4579
    },
    {
      "epoch": 2.7947998046875e-05,
      "step": 4579,
      "training_step_time": 0.5052945613861084
    },
    {
      "epoch": 2.79541015625e-05,
      "grad_norm": 0.36309197545051575,
      "learning_rate": 9.98105346717008e-05,
      "loss": 0.1009,
      "step": 4580
    },
    {
      "epoch": 2.79541015625e-05,
      "model_forward_time": 0.11557245254516602,
      "step": 4580
    },
    {
      "epoch": 2.79541015625e-05,
      "step": 4580,
      "training_step_time": 0.4772331714630127
    },
    {
      "epoch": 2.7960205078125e-05,
      "model_forward_time": 0.11623048782348633,
      "step": 4581
    },
    {
      "epoch": 2.7960205078125e-05,
      "step": 4581,
      "training_step_time": 0.3924233913421631
    },
    {
      "epoch": 2.796630859375e-05,
      "model_forward_time": 0.11482596397399902,
      "step": 4582
    },
    {
      "epoch": 2.796630859375e-05,
      "step": 4582,
      "training_step_time": 0.3916294574737549
    },
    {
      "epoch": 2.7972412109375e-05,
      "model_forward_time": 0.1153569221496582,
      "step": 4583
    },
    {
      "epoch": 2.7972412109375e-05,
      "step": 4583,
      "training_step_time": 0.39215970039367676
    },
    {
      "epoch": 2.7978515625e-05,
      "model_forward_time": 0.11571979522705078,
      "step": 4584
    },
    {
      "epoch": 2.7978515625e-05,
      "step": 4584,
      "training_step_time": 0.47218966484069824
    },
    {
      "epoch": 2.7984619140625e-05,
      "model_forward_time": 0.11529994010925293,
      "step": 4585
    },
    {
      "epoch": 2.7984619140625e-05,
      "step": 4585,
      "training_step_time": 0.41292405128479004
    },
    {
      "epoch": 2.799072265625e-05,
      "model_forward_time": 0.11521458625793457,
      "step": 4586
    },
    {
      "epoch": 2.799072265625e-05,
      "step": 4586,
      "training_step_time": 0.4071803092956543
    },
    {
      "epoch": 2.7996826171875e-05,
      "model_forward_time": 0.11565303802490234,
      "step": 4587
    },
    {
      "epoch": 2.7996826171875e-05,
      "step": 4587,
      "training_step_time": 0.3955204486846924
    },
    {
      "epoch": 2.80029296875e-05,
      "model_forward_time": 0.11513829231262207,
      "step": 4588
    },
    {
      "epoch": 2.80029296875e-05,
      "step": 4588,
      "training_step_time": 0.39219212532043457
    },
    {
      "epoch": 2.8009033203125e-05,
      "model_forward_time": 0.11617612838745117,
      "step": 4589
    },
    {
      "epoch": 2.8009033203125e-05,
      "step": 4589,
      "training_step_time": 0.39777660369873047
    },
    {
      "epoch": 2.801513671875e-05,
      "grad_norm": 0.2890002727508545,
      "learning_rate": 9.980813032675974e-05,
      "loss": 0.1027,
      "step": 4590
    },
    {
      "epoch": 2.801513671875e-05,
      "model_forward_time": 0.11490607261657715,
      "step": 4590
    },
    {
      "epoch": 2.801513671875e-05,
      "step": 4590,
      "training_step_time": 0.3919534683227539
    },
    {
      "epoch": 2.8021240234375e-05,
      "model_forward_time": 0.11508035659790039,
      "step": 4591
    },
    {
      "epoch": 2.8021240234375e-05,
      "step": 4591,
      "training_step_time": 0.3954470157623291
    },
    {
      "epoch": 2.802734375e-05,
      "model_forward_time": 0.11564517021179199,
      "step": 4592
    },
    {
      "epoch": 2.802734375e-05,
      "step": 4592,
      "training_step_time": 0.3927898406982422
    },
    {
      "epoch": 2.8033447265625e-05,
      "model_forward_time": 0.11508941650390625,
      "step": 4593
    },
    {
      "epoch": 2.8033447265625e-05,
      "step": 4593,
      "training_step_time": 0.4989786148071289
    },
    {
      "epoch": 2.803955078125e-05,
      "model_forward_time": 0.11556291580200195,
      "step": 4594
    },
    {
      "epoch": 2.803955078125e-05,
      "step": 4594,
      "training_step_time": 0.5024130344390869
    },
    {
      "epoch": 2.8045654296875e-05,
      "model_forward_time": 0.11569762229919434,
      "step": 4595
    },
    {
      "epoch": 2.8045654296875e-05,
      "step": 4595,
      "training_step_time": 0.5047669410705566
    },
    {
      "epoch": 2.80517578125e-05,
      "model_forward_time": 0.11457085609436035,
      "step": 4596
    },
    {
      "epoch": 2.80517578125e-05,
      "step": 4596,
      "training_step_time": 0.4225175380706787
    },
    {
      "epoch": 2.8057861328125e-05,
      "model_forward_time": 0.11457324028015137,
      "step": 4597
    },
    {
      "epoch": 2.8057861328125e-05,
      "step": 4597,
      "training_step_time": 0.44503045082092285
    },
    {
      "epoch": 2.806396484375e-05,
      "model_forward_time": 0.11555981636047363,
      "step": 4598
    },
    {
      "epoch": 2.806396484375e-05,
      "step": 4598,
      "training_step_time": 0.4109373092651367
    },
    {
      "epoch": 2.8070068359375e-05,
      "model_forward_time": 0.11472773551940918,
      "step": 4599
    },
    {
      "epoch": 2.8070068359375e-05,
      "step": 4599,
      "training_step_time": 0.40672945976257324
    },
    {
      "epoch": 2.8076171875e-05,
      "grad_norm": 0.34191715717315674,
      "learning_rate": 9.980571085142381e-05,
      "loss": 0.1012,
      "step": 4600
    },
    {
      "epoch": 2.8076171875e-05,
      "model_forward_time": 0.11497020721435547,
      "step": 4600
    },
    {
      "epoch": 2.8076171875e-05,
      "step": 4600,
      "training_step_time": 0.38872718811035156
    },
    {
      "epoch": 2.8082275390625e-05,
      "model_forward_time": 0.11613059043884277,
      "step": 4601
    },
    {
      "epoch": 2.8082275390625e-05,
      "step": 4601,
      "training_step_time": 0.41292262077331543
    },
    {
      "epoch": 2.808837890625e-05,
      "model_forward_time": 0.11551427841186523,
      "step": 4602
    },
    {
      "epoch": 2.808837890625e-05,
      "step": 4602,
      "training_step_time": 0.3896021842956543
    },
    {
      "epoch": 2.8094482421875e-05,
      "model_forward_time": 0.11542224884033203,
      "step": 4603
    },
    {
      "epoch": 2.8094482421875e-05,
      "step": 4603,
      "training_step_time": 0.4118821620941162
    },
    {
      "epoch": 2.81005859375e-05,
      "model_forward_time": 0.11589336395263672,
      "step": 4604
    },
    {
      "epoch": 2.81005859375e-05,
      "step": 4604,
      "training_step_time": 0.39272475242614746
    },
    {
      "epoch": 2.8106689453125e-05,
      "model_forward_time": 0.11533331871032715,
      "step": 4605
    },
    {
      "epoch": 2.8106689453125e-05,
      "step": 4605,
      "training_step_time": 0.4000213146209717
    },
    {
      "epoch": 2.811279296875e-05,
      "model_forward_time": 0.11614871025085449,
      "step": 4606
    },
    {
      "epoch": 2.811279296875e-05,
      "step": 4606,
      "training_step_time": 0.39177465438842773
    },
    {
      "epoch": 2.8118896484375e-05,
      "model_forward_time": 0.1152656078338623,
      "step": 4607
    },
    {
      "epoch": 2.8118896484375e-05,
      "step": 4607,
      "training_step_time": 0.38112330436706543
    },
    {
      "epoch": 2.8125e-05,
      "model_forward_time": 0.11551260948181152,
      "step": 4608
    },
    {
      "epoch": 2.8125e-05,
      "step": 4608,
      "training_step_time": 0.4225127696990967
    },
    {
      "epoch": 2.8131103515625e-05,
      "model_forward_time": 0.11581230163574219,
      "step": 4609
    },
    {
      "epoch": 2.8131103515625e-05,
      "step": 4609,
      "training_step_time": 0.4990696907043457
    },
    {
      "epoch": 2.813720703125e-05,
      "grad_norm": 0.28113576769828796,
      "learning_rate": 9.980327624642795e-05,
      "loss": 0.1014,
      "step": 4610
    },
    {
      "epoch": 2.813720703125e-05,
      "model_forward_time": 0.11512207984924316,
      "step": 4610
    },
    {
      "epoch": 2.813720703125e-05,
      "step": 4610,
      "training_step_time": 0.4515421390533447
    },
    {
      "epoch": 2.8143310546875e-05,
      "model_forward_time": 0.11551618576049805,
      "step": 4611
    },
    {
      "epoch": 2.8143310546875e-05,
      "step": 4611,
      "training_step_time": 0.39592480659484863
    },
    {
      "epoch": 2.81494140625e-05,
      "model_forward_time": 0.11567211151123047,
      "step": 4612
    },
    {
      "epoch": 2.81494140625e-05,
      "step": 4612,
      "training_step_time": 0.4013395309448242
    },
    {
      "epoch": 2.8155517578125e-05,
      "model_forward_time": 0.11494207382202148,
      "step": 4613
    },
    {
      "epoch": 2.8155517578125e-05,
      "step": 4613,
      "training_step_time": 0.42337703704833984
    },
    {
      "epoch": 2.816162109375e-05,
      "model_forward_time": 0.1144266128540039,
      "step": 4614
    },
    {
      "epoch": 2.816162109375e-05,
      "step": 4614,
      "training_step_time": 0.3978545665740967
    },
    {
      "epoch": 2.8167724609375e-05,
      "model_forward_time": 0.11546468734741211,
      "step": 4615
    },
    {
      "epoch": 2.8167724609375e-05,
      "step": 4615,
      "training_step_time": 0.3933851718902588
    },
    {
      "epoch": 2.8173828125e-05,
      "model_forward_time": 0.11502695083618164,
      "step": 4616
    },
    {
      "epoch": 2.8173828125e-05,
      "step": 4616,
      "training_step_time": 0.3938751220703125
    },
    {
      "epoch": 2.8179931640625e-05,
      "model_forward_time": 0.11557269096374512,
      "step": 4617
    },
    {
      "epoch": 2.8179931640625e-05,
      "step": 4617,
      "training_step_time": 0.3858191967010498
    },
    {
      "epoch": 2.818603515625e-05,
      "model_forward_time": 0.1151731014251709,
      "step": 4618
    },
    {
      "epoch": 2.818603515625e-05,
      "step": 4618,
      "training_step_time": 0.38776659965515137
    },
    {
      "epoch": 2.8192138671875e-05,
      "model_forward_time": 0.1153726577758789,
      "step": 4619
    },
    {
      "epoch": 2.8192138671875e-05,
      "step": 4619,
      "training_step_time": 0.38033556938171387
    },
    {
      "epoch": 2.81982421875e-05,
      "grad_norm": 0.2587094008922577,
      "learning_rate": 9.980082651251175e-05,
      "loss": 0.0995,
      "step": 4620
    },
    {
      "epoch": 2.81982421875e-05,
      "model_forward_time": 0.11521220207214355,
      "step": 4620
    },
    {
      "epoch": 2.81982421875e-05,
      "step": 4620,
      "training_step_time": 0.38848447799682617
    },
    {
      "epoch": 2.8204345703125e-05,
      "model_forward_time": 0.11556577682495117,
      "step": 4621
    },
    {
      "epoch": 2.8204345703125e-05,
      "step": 4621,
      "training_step_time": 0.3963589668273926
    },
    {
      "epoch": 2.821044921875e-05,
      "model_forward_time": 0.11553192138671875,
      "step": 4622
    },
    {
      "epoch": 2.821044921875e-05,
      "step": 4622,
      "training_step_time": 0.3944997787475586
    },
    {
      "epoch": 2.8216552734375e-05,
      "model_forward_time": 0.11642575263977051,
      "step": 4623
    },
    {
      "epoch": 2.8216552734375e-05,
      "step": 4623,
      "training_step_time": 0.4262535572052002
    },
    {
      "epoch": 2.822265625e-05,
      "model_forward_time": 0.11536955833435059,
      "step": 4624
    },
    {
      "epoch": 2.822265625e-05,
      "step": 4624,
      "training_step_time": 0.4731411933898926
    },
    {
      "epoch": 2.8228759765625e-05,
      "model_forward_time": 0.11510968208312988,
      "step": 4625
    },
    {
      "epoch": 2.8228759765625e-05,
      "step": 4625,
      "training_step_time": 0.4668159484863281
    },
    {
      "epoch": 2.823486328125e-05,
      "model_forward_time": 0.11559772491455078,
      "step": 4626
    },
    {
      "epoch": 2.823486328125e-05,
      "step": 4626,
      "training_step_time": 0.4403219223022461
    },
    {
      "epoch": 2.8240966796875e-05,
      "model_forward_time": 0.1154935359954834,
      "step": 4627
    },
    {
      "epoch": 2.8240966796875e-05,
      "step": 4627,
      "training_step_time": 0.3926222324371338
    },
    {
      "epoch": 2.82470703125e-05,
      "model_forward_time": 0.11512112617492676,
      "step": 4628
    },
    {
      "epoch": 2.82470703125e-05,
      "step": 4628,
      "training_step_time": 0.4054734706878662
    },
    {
      "epoch": 2.8253173828125e-05,
      "model_forward_time": 0.11543846130371094,
      "step": 4629
    },
    {
      "epoch": 2.8253173828125e-05,
      "step": 4629,
      "training_step_time": 0.40421628952026367
    },
    {
      "epoch": 2.825927734375e-05,
      "grad_norm": 0.3387567400932312,
      "learning_rate": 9.979836165041936e-05,
      "loss": 0.0993,
      "step": 4630
    },
    {
      "epoch": 2.825927734375e-05,
      "model_forward_time": 0.11472177505493164,
      "step": 4630
    },
    {
      "epoch": 2.825927734375e-05,
      "step": 4630,
      "training_step_time": 0.4385225772857666
    },
    {
      "epoch": 2.8265380859375e-05,
      "model_forward_time": 0.1143949031829834,
      "step": 4631
    },
    {
      "epoch": 2.8265380859375e-05,
      "step": 4631,
      "training_step_time": 0.38825178146362305
    },
    {
      "epoch": 2.8271484375e-05,
      "model_forward_time": 0.11547970771789551,
      "step": 4632
    },
    {
      "epoch": 2.8271484375e-05,
      "step": 4632,
      "training_step_time": 0.3896799087524414
    },
    {
      "epoch": 2.8277587890625e-05,
      "model_forward_time": 0.11548924446105957,
      "step": 4633
    },
    {
      "epoch": 2.8277587890625e-05,
      "step": 4633,
      "training_step_time": 0.39206576347351074
    },
    {
      "epoch": 2.828369140625e-05,
      "model_forward_time": 0.11545681953430176,
      "step": 4634
    },
    {
      "epoch": 2.828369140625e-05,
      "step": 4634,
      "training_step_time": 0.9947707653045654
    },
    {
      "epoch": 2.8289794921875e-05,
      "model_forward_time": 0.1139822006225586,
      "step": 4635
    },
    {
      "epoch": 2.8289794921875e-05,
      "step": 4635,
      "training_step_time": 0.3890206813812256
    },
    {
      "epoch": 2.82958984375e-05,
      "model_forward_time": 0.11489582061767578,
      "step": 4636
    },
    {
      "epoch": 2.82958984375e-05,
      "step": 4636,
      "training_step_time": 0.38344764709472656
    },
    {
      "epoch": 2.8302001953125e-05,
      "model_forward_time": 0.11506056785583496,
      "step": 4637
    },
    {
      "epoch": 2.8302001953125e-05,
      "step": 4637,
      "training_step_time": 0.39250755310058594
    },
    {
      "epoch": 2.830810546875e-05,
      "model_forward_time": 0.11441206932067871,
      "step": 4638
    },
    {
      "epoch": 2.830810546875e-05,
      "step": 4638,
      "training_step_time": 0.46933650970458984
    },
    {
      "epoch": 2.8314208984375e-05,
      "model_forward_time": 0.11461591720581055,
      "step": 4639
    },
    {
      "epoch": 2.8314208984375e-05,
      "step": 4639,
      "training_step_time": 0.472736120223999
    },
    {
      "epoch": 2.83203125e-05,
      "grad_norm": 0.324432909488678,
      "learning_rate": 9.979588166089958e-05,
      "loss": 0.1003,
      "step": 4640
    },
    {
      "epoch": 2.83203125e-05,
      "model_forward_time": 0.11511015892028809,
      "step": 4640
    },
    {
      "epoch": 2.83203125e-05,
      "step": 4640,
      "training_step_time": 0.6747429370880127
    },
    {
      "epoch": 2.8326416015625e-05,
      "model_forward_time": 0.11428260803222656,
      "step": 4641
    },
    {
      "epoch": 2.8326416015625e-05,
      "step": 4641,
      "training_step_time": 0.39236903190612793
    },
    {
      "epoch": 2.833251953125e-05,
      "model_forward_time": 0.11417889595031738,
      "step": 4642
    },
    {
      "epoch": 2.833251953125e-05,
      "step": 4642,
      "training_step_time": 0.4392399787902832
    },
    {
      "epoch": 2.8338623046875e-05,
      "model_forward_time": 0.11506986618041992,
      "step": 4643
    },
    {
      "epoch": 2.8338623046875e-05,
      "step": 4643,
      "training_step_time": 0.39196014404296875
    },
    {
      "epoch": 2.83447265625e-05,
      "model_forward_time": 0.11452746391296387,
      "step": 4644
    },
    {
      "epoch": 2.83447265625e-05,
      "step": 4644,
      "training_step_time": 0.39235806465148926
    },
    {
      "epoch": 2.8350830078125e-05,
      "model_forward_time": 0.1152191162109375,
      "step": 4645
    },
    {
      "epoch": 2.8350830078125e-05,
      "step": 4645,
      "training_step_time": 0.3904695510864258
    },
    {
      "epoch": 2.835693359375e-05,
      "model_forward_time": 0.11464118957519531,
      "step": 4646
    },
    {
      "epoch": 2.835693359375e-05,
      "step": 4646,
      "training_step_time": 0.8203821182250977
    },
    {
      "epoch": 2.8363037109375e-05,
      "model_forward_time": 0.11485624313354492,
      "step": 4647
    },
    {
      "epoch": 2.8363037109375e-05,
      "step": 4647,
      "training_step_time": 0.39391088485717773
    },
    {
      "epoch": 2.8369140625e-05,
      "model_forward_time": 0.11461901664733887,
      "step": 4648
    },
    {
      "epoch": 2.8369140625e-05,
      "step": 4648,
      "training_step_time": 0.3855414390563965
    },
    {
      "epoch": 2.8375244140625e-05,
      "model_forward_time": 0.11460041999816895,
      "step": 4649
    },
    {
      "epoch": 2.8375244140625e-05,
      "step": 4649,
      "training_step_time": 0.39017462730407715
    },
    {
      "epoch": 2.838134765625e-05,
      "grad_norm": 0.23858307301998138,
      "learning_rate": 9.979338654470569e-05,
      "loss": 0.0994,
      "step": 4650
    },
    {
      "epoch": 2.838134765625e-05,
      "model_forward_time": 0.11504888534545898,
      "step": 4650
    },
    {
      "epoch": 2.838134765625e-05,
      "step": 4650,
      "training_step_time": 0.36528992652893066
    },
    {
      "epoch": 2.8387451171875e-05,
      "model_forward_time": 0.11433529853820801,
      "step": 4651
    },
    {
      "epoch": 2.8387451171875e-05,
      "step": 4651,
      "training_step_time": 0.47141218185424805
    },
    {
      "epoch": 2.83935546875e-05,
      "model_forward_time": 0.1147012710571289,
      "step": 4652
    },
    {
      "epoch": 2.83935546875e-05,
      "step": 4652,
      "training_step_time": 0.7962160110473633
    },
    {
      "epoch": 2.8399658203125e-05,
      "model_forward_time": 0.11375284194946289,
      "step": 4653
    },
    {
      "epoch": 2.8399658203125e-05,
      "step": 4653,
      "training_step_time": 0.38779330253601074
    },
    {
      "epoch": 2.840576171875e-05,
      "model_forward_time": 0.11441349983215332,
      "step": 4654
    },
    {
      "epoch": 2.840576171875e-05,
      "step": 4654,
      "training_step_time": 0.4305460453033447
    },
    {
      "epoch": 2.8411865234375e-05,
      "model_forward_time": 0.11446881294250488,
      "step": 4655
    },
    {
      "epoch": 2.8411865234375e-05,
      "step": 4655,
      "training_step_time": 0.46745920181274414
    },
    {
      "epoch": 2.841796875e-05,
      "model_forward_time": 0.11499667167663574,
      "step": 4656
    },
    {
      "epoch": 2.841796875e-05,
      "step": 4656,
      "training_step_time": 0.43993496894836426
    },
    {
      "epoch": 2.8424072265625e-05,
      "model_forward_time": 0.11471939086914062,
      "step": 4657
    },
    {
      "epoch": 2.8424072265625e-05,
      "step": 4657,
      "training_step_time": 0.3894827365875244
    },
    {
      "epoch": 2.843017578125e-05,
      "model_forward_time": 0.11468768119812012,
      "step": 4658
    },
    {
      "epoch": 2.843017578125e-05,
      "step": 4658,
      "training_step_time": 0.6122434139251709
    },
    {
      "epoch": 2.8436279296875e-05,
      "model_forward_time": 0.11453056335449219,
      "step": 4659
    },
    {
      "epoch": 2.8436279296875e-05,
      "step": 4659,
      "training_step_time": 0.3943490982055664
    },
    {
      "epoch": 2.84423828125e-05,
      "grad_norm": 0.29037147760391235,
      "learning_rate": 9.979087630259572e-05,
      "loss": 0.1043,
      "step": 4660
    },
    {
      "epoch": 2.84423828125e-05,
      "model_forward_time": 0.11470413208007812,
      "step": 4660
    },
    {
      "epoch": 2.84423828125e-05,
      "step": 4660,
      "training_step_time": 0.37922191619873047
    },
    {
      "epoch": 2.8448486328125e-05,
      "model_forward_time": 0.11478447914123535,
      "step": 4661
    },
    {
      "epoch": 2.8448486328125e-05,
      "step": 4661,
      "training_step_time": 0.39627766609191895
    },
    {
      "epoch": 2.845458984375e-05,
      "model_forward_time": 0.11487889289855957,
      "step": 4662
    },
    {
      "epoch": 2.845458984375e-05,
      "step": 4662,
      "training_step_time": 0.3766822814941406
    },
    {
      "epoch": 2.8460693359375e-05,
      "model_forward_time": 0.11510848999023438,
      "step": 4663
    },
    {
      "epoch": 2.8460693359375e-05,
      "step": 4663,
      "training_step_time": 0.4339776039123535
    },
    {
      "epoch": 2.8466796875e-05,
      "model_forward_time": 0.11529994010925293,
      "step": 4664
    },
    {
      "epoch": 2.8466796875e-05,
      "step": 4664,
      "training_step_time": 0.5452413558959961
    },
    {
      "epoch": 2.8472900390625e-05,
      "model_forward_time": 0.11559081077575684,
      "step": 4665
    },
    {
      "epoch": 2.8472900390625e-05,
      "step": 4665,
      "training_step_time": 0.45459961891174316
    },
    {
      "epoch": 2.847900390625e-05,
      "model_forward_time": 0.11531233787536621,
      "step": 4666
    },
    {
      "epoch": 2.847900390625e-05,
      "step": 4666,
      "training_step_time": 0.4860529899597168
    },
    {
      "epoch": 2.8485107421875e-05,
      "model_forward_time": 0.11439824104309082,
      "step": 4667
    },
    {
      "epoch": 2.8485107421875e-05,
      "step": 4667,
      "training_step_time": 0.38766932487487793
    },
    {
      "epoch": 2.84912109375e-05,
      "model_forward_time": 0.11456036567687988,
      "step": 4668
    },
    {
      "epoch": 2.84912109375e-05,
      "step": 4668,
      "training_step_time": 0.4489297866821289
    },
    {
      "epoch": 2.8497314453125e-05,
      "model_forward_time": 0.11458230018615723,
      "step": 4669
    },
    {
      "epoch": 2.8497314453125e-05,
      "step": 4669,
      "training_step_time": 0.42012691497802734
    },
    {
      "epoch": 2.850341796875e-05,
      "grad_norm": 0.31024280190467834,
      "learning_rate": 9.978835093533216e-05,
      "loss": 0.1002,
      "step": 4670
    },
    {
      "epoch": 2.850341796875e-05,
      "model_forward_time": 0.11419177055358887,
      "step": 4670
    },
    {
      "epoch": 2.850341796875e-05,
      "step": 4670,
      "training_step_time": 0.39425134658813477
    },
    {
      "epoch": 2.8509521484375e-05,
      "model_forward_time": 0.11579704284667969,
      "step": 4671
    },
    {
      "epoch": 2.8509521484375e-05,
      "step": 4671,
      "training_step_time": 0.397946834564209
    },
    {
      "epoch": 2.8515625e-05,
      "model_forward_time": 0.11499714851379395,
      "step": 4672
    },
    {
      "epoch": 2.8515625e-05,
      "step": 4672,
      "training_step_time": 0.3899211883544922
    },
    {
      "epoch": 2.8521728515625e-05,
      "model_forward_time": 0.1155703067779541,
      "step": 4673
    },
    {
      "epoch": 2.8521728515625e-05,
      "step": 4673,
      "training_step_time": 0.3902420997619629
    },
    {
      "epoch": 2.852783203125e-05,
      "model_forward_time": 0.11566591262817383,
      "step": 4674
    },
    {
      "epoch": 2.852783203125e-05,
      "step": 4674,
      "training_step_time": 0.3912932872772217
    },
    {
      "epoch": 2.8533935546875e-05,
      "model_forward_time": 0.11522650718688965,
      "step": 4675
    },
    {
      "epoch": 2.8533935546875e-05,
      "step": 4675,
      "training_step_time": 0.4012458324432373
    },
    {
      "epoch": 2.85400390625e-05,
      "model_forward_time": 0.11781954765319824,
      "step": 4676
    },
    {
      "epoch": 2.85400390625e-05,
      "step": 4676,
      "training_step_time": 0.9032654762268066
    },
    {
      "epoch": 2.8546142578125e-05,
      "model_forward_time": 0.11993575096130371,
      "step": 4677
    },
    {
      "epoch": 2.8546142578125e-05,
      "step": 4677,
      "training_step_time": 0.7022247314453125
    },
    {
      "epoch": 2.855224609375e-05,
      "model_forward_time": 0.11723470687866211,
      "step": 4678
    },
    {
      "epoch": 2.855224609375e-05,
      "step": 4678,
      "training_step_time": 0.6788573265075684
    },
    {
      "epoch": 2.8558349609375e-05,
      "model_forward_time": 0.11809206008911133,
      "step": 4679
    },
    {
      "epoch": 2.8558349609375e-05,
      "step": 4679,
      "training_step_time": 0.7282624244689941
    },
    {
      "epoch": 2.8564453125e-05,
      "grad_norm": 0.15864017605781555,
      "learning_rate": 9.97858104436822e-05,
      "loss": 0.088,
      "step": 4680
    },
    {
      "epoch": 2.8564453125e-05,
      "model_forward_time": 0.11878132820129395,
      "step": 4680
    },
    {
      "epoch": 2.8564453125e-05,
      "step": 4680,
      "training_step_time": 0.8487017154693604
    },
    {
      "epoch": 2.8570556640625e-05,
      "model_forward_time": 0.11945724487304688,
      "step": 4681
    },
    {
      "epoch": 2.8570556640625e-05,
      "step": 4681,
      "training_step_time": 0.7073047161102295
    },
    {
      "epoch": 2.857666015625e-05,
      "model_forward_time": 0.1204078197479248,
      "step": 4682
    },
    {
      "epoch": 2.857666015625e-05,
      "step": 4682,
      "training_step_time": 0.6792783737182617
    },
    {
      "epoch": 2.8582763671875e-05,
      "model_forward_time": 0.12021851539611816,
      "step": 4683
    },
    {
      "epoch": 2.8582763671875e-05,
      "step": 4683,
      "training_step_time": 0.6384890079498291
    },
    {
      "epoch": 2.85888671875e-05,
      "model_forward_time": 0.12626361846923828,
      "step": 4684
    },
    {
      "epoch": 2.85888671875e-05,
      "step": 4684,
      "training_step_time": 0.7013857364654541
    },
    {
      "epoch": 2.8594970703125e-05,
      "model_forward_time": 0.11887311935424805,
      "step": 4685
    },
    {
      "epoch": 2.8594970703125e-05,
      "step": 4685,
      "training_step_time": 0.6893248558044434
    },
    {
      "epoch": 2.860107421875e-05,
      "model_forward_time": 0.12155699729919434,
      "step": 4686
    },
    {
      "epoch": 2.860107421875e-05,
      "step": 4686,
      "training_step_time": 0.7106518745422363
    },
    {
      "epoch": 2.8607177734375e-05,
      "model_forward_time": 0.11716055870056152,
      "step": 4687
    },
    {
      "epoch": 2.8607177734375e-05,
      "step": 4687,
      "training_step_time": 0.7037968635559082
    },
    {
      "epoch": 2.861328125e-05,
      "model_forward_time": 0.1176302433013916,
      "step": 4688
    },
    {
      "epoch": 2.861328125e-05,
      "step": 4688,
      "training_step_time": 0.8123514652252197
    },
    {
      "epoch": 2.8619384765625e-05,
      "model_forward_time": 0.12191486358642578,
      "step": 4689
    },
    {
      "epoch": 2.8619384765625e-05,
      "step": 4689,
      "training_step_time": 0.7464520931243896
    },
    {
      "epoch": 2.862548828125e-05,
      "grad_norm": 0.3167679011821747,
      "learning_rate": 9.978325482841753e-05,
      "loss": 0.1036,
      "step": 4690
    },
    {
      "epoch": 2.862548828125e-05,
      "model_forward_time": 0.12244009971618652,
      "step": 4690
    },
    {
      "epoch": 2.862548828125e-05,
      "step": 4690,
      "training_step_time": 0.6188685894012451
    },
    {
      "epoch": 2.8631591796875e-05,
      "model_forward_time": 0.11961913108825684,
      "step": 4691
    },
    {
      "epoch": 2.8631591796875e-05,
      "step": 4691,
      "training_step_time": 0.6634502410888672
    },
    {
      "epoch": 2.86376953125e-05,
      "model_forward_time": 0.11681079864501953,
      "step": 4692
    },
    {
      "epoch": 2.86376953125e-05,
      "step": 4692,
      "training_step_time": 0.6832478046417236
    },
    {
      "epoch": 2.8643798828125e-05,
      "model_forward_time": 0.11646556854248047,
      "step": 4693
    },
    {
      "epoch": 2.8643798828125e-05,
      "step": 4693,
      "training_step_time": 0.7098853588104248
    },
    {
      "epoch": 2.864990234375e-05,
      "model_forward_time": 0.11908602714538574,
      "step": 4694
    },
    {
      "epoch": 2.864990234375e-05,
      "step": 4694,
      "training_step_time": 0.6605873107910156
    },
    {
      "epoch": 2.8656005859375e-05,
      "model_forward_time": 0.11641287803649902,
      "step": 4695
    },
    {
      "epoch": 2.8656005859375e-05,
      "step": 4695,
      "training_step_time": 0.6700494289398193
    },
    {
      "epoch": 2.8662109375e-05,
      "model_forward_time": 0.11813163757324219,
      "step": 4696
    },
    {
      "epoch": 2.8662109375e-05,
      "step": 4696,
      "training_step_time": 0.7001028060913086
    },
    {
      "epoch": 2.8668212890625e-05,
      "model_forward_time": 0.12167882919311523,
      "step": 4697
    },
    {
      "epoch": 2.8668212890625e-05,
      "step": 4697,
      "training_step_time": 0.6698250770568848
    },
    {
      "epoch": 2.867431640625e-05,
      "model_forward_time": 0.11892580986022949,
      "step": 4698
    },
    {
      "epoch": 2.867431640625e-05,
      "step": 4698,
      "training_step_time": 0.8042316436767578
    },
    {
      "epoch": 2.8680419921875e-05,
      "model_forward_time": 0.11904382705688477,
      "step": 4699
    },
    {
      "epoch": 2.8680419921875e-05,
      "step": 4699,
      "training_step_time": 0.6934096813201904
    },
    {
      "epoch": 2.86865234375e-05,
      "grad_norm": 0.22417768836021423,
      "learning_rate": 9.978068409031449e-05,
      "loss": 0.1104,
      "step": 4700
    },
    {
      "epoch": 2.86865234375e-05,
      "model_forward_time": 0.13219022750854492,
      "step": 4700
    },
    {
      "epoch": 2.86865234375e-05,
      "step": 4700,
      "training_step_time": 0.7252509593963623
    },
    {
      "epoch": 2.8692626953125e-05,
      "model_forward_time": 0.12049460411071777,
      "step": 4701
    },
    {
      "epoch": 2.8692626953125e-05,
      "step": 4701,
      "training_step_time": 0.6849007606506348
    },
    {
      "epoch": 2.869873046875e-05,
      "model_forward_time": 0.12002301216125488,
      "step": 4702
    },
    {
      "epoch": 2.869873046875e-05,
      "step": 4702,
      "training_step_time": 0.6604743003845215
    },
    {
      "epoch": 2.8704833984375e-05,
      "model_forward_time": 0.12045979499816895,
      "step": 4703
    },
    {
      "epoch": 2.8704833984375e-05,
      "step": 4703,
      "training_step_time": 0.6218788623809814
    },
    {
      "epoch": 2.87109375e-05,
      "model_forward_time": 0.11848855018615723,
      "step": 4704
    },
    {
      "epoch": 2.87109375e-05,
      "step": 4704,
      "training_step_time": 0.6461608409881592
    },
    {
      "epoch": 2.8717041015625e-05,
      "model_forward_time": 0.12630629539489746,
      "step": 4705
    },
    {
      "epoch": 2.8717041015625e-05,
      "step": 4705,
      "training_step_time": 0.6788830757141113
    },
    {
      "epoch": 2.872314453125e-05,
      "model_forward_time": 0.1214146614074707,
      "step": 4706
    },
    {
      "epoch": 2.872314453125e-05,
      "step": 4706,
      "training_step_time": 0.7349100112915039
    },
    {
      "epoch": 2.8729248046875e-05,
      "model_forward_time": 0.11745834350585938,
      "step": 4707
    },
    {
      "epoch": 2.8729248046875e-05,
      "step": 4707,
      "training_step_time": 0.6300113201141357
    },
    {
      "epoch": 2.87353515625e-05,
      "model_forward_time": 0.1271193027496338,
      "step": 4708
    },
    {
      "epoch": 2.87353515625e-05,
      "step": 4708,
      "training_step_time": 0.7202343940734863
    },
    {
      "epoch": 2.8741455078125e-05,
      "model_forward_time": 0.11884832382202148,
      "step": 4709
    },
    {
      "epoch": 2.8741455078125e-05,
      "step": 4709,
      "training_step_time": 0.6884515285491943
    },
    {
      "epoch": 2.874755859375e-05,
      "grad_norm": 0.30206039547920227,
      "learning_rate": 9.977809823015401e-05,
      "loss": 0.1091,
      "step": 4710
    },
    {
      "epoch": 2.874755859375e-05,
      "model_forward_time": 0.1168980598449707,
      "step": 4710
    },
    {
      "epoch": 2.874755859375e-05,
      "step": 4710,
      "training_step_time": 0.699998140335083
    },
    {
      "epoch": 2.8753662109375e-05,
      "model_forward_time": 0.11918330192565918,
      "step": 4711
    },
    {
      "epoch": 2.8753662109375e-05,
      "step": 4711,
      "training_step_time": 0.6728301048278809
    },
    {
      "epoch": 2.8759765625e-05,
      "model_forward_time": 0.11847186088562012,
      "step": 4712
    },
    {
      "epoch": 2.8759765625e-05,
      "step": 4712,
      "training_step_time": 0.6790797710418701
    },
    {
      "epoch": 2.8765869140625e-05,
      "model_forward_time": 0.11742472648620605,
      "step": 4713
    },
    {
      "epoch": 2.8765869140625e-05,
      "step": 4713,
      "training_step_time": 0.6261231899261475
    },
    {
      "epoch": 2.877197265625e-05,
      "model_forward_time": 0.11889219284057617,
      "step": 4714
    },
    {
      "epoch": 2.877197265625e-05,
      "step": 4714,
      "training_step_time": 0.6343715190887451
    },
    {
      "epoch": 2.8778076171875e-05,
      "model_forward_time": 0.1219182014465332,
      "step": 4715
    },
    {
      "epoch": 2.8778076171875e-05,
      "step": 4715,
      "training_step_time": 0.6401472091674805
    },
    {
      "epoch": 2.87841796875e-05,
      "model_forward_time": 0.12749075889587402,
      "step": 4716
    },
    {
      "epoch": 2.87841796875e-05,
      "step": 4716,
      "training_step_time": 0.701765775680542
    },
    {
      "epoch": 2.8790283203125e-05,
      "model_forward_time": 0.1217801570892334,
      "step": 4717
    },
    {
      "epoch": 2.8790283203125e-05,
      "step": 4717,
      "training_step_time": 0.7031886577606201
    },
    {
      "epoch": 2.879638671875e-05,
      "model_forward_time": 0.11972737312316895,
      "step": 4718
    },
    {
      "epoch": 2.879638671875e-05,
      "step": 4718,
      "training_step_time": 0.6937909126281738
    },
    {
      "epoch": 2.8802490234375e-05,
      "model_forward_time": 0.12378072738647461,
      "step": 4719
    },
    {
      "epoch": 2.8802490234375e-05,
      "step": 4719,
      "training_step_time": 0.7009925842285156
    },
    {
      "epoch": 2.880859375e-05,
      "grad_norm": 0.27564623951911926,
      "learning_rate": 9.97754972487216e-05,
      "loss": 0.1088,
      "step": 4720
    },
    {
      "epoch": 2.880859375e-05,
      "model_forward_time": 0.12686562538146973,
      "step": 4720
    },
    {
      "epoch": 2.880859375e-05,
      "step": 4720,
      "training_step_time": 0.6976134777069092
    },
    {
      "epoch": 2.8814697265625e-05,
      "model_forward_time": 0.11704015731811523,
      "step": 4721
    },
    {
      "epoch": 2.8814697265625e-05,
      "step": 4721,
      "training_step_time": 0.7199909687042236
    },
    {
      "epoch": 2.882080078125e-05,
      "model_forward_time": 0.12010908126831055,
      "step": 4722
    },
    {
      "epoch": 2.882080078125e-05,
      "step": 4722,
      "training_step_time": 0.6762800216674805
    },
    {
      "epoch": 2.8826904296875e-05,
      "model_forward_time": 0.11842012405395508,
      "step": 4723
    },
    {
      "epoch": 2.8826904296875e-05,
      "step": 4723,
      "training_step_time": 0.6942489147186279
    },
    {
      "epoch": 2.88330078125e-05,
      "model_forward_time": 0.11842727661132812,
      "step": 4724
    },
    {
      "epoch": 2.88330078125e-05,
      "step": 4724,
      "training_step_time": 0.5435745716094971
    },
    {
      "epoch": 2.8839111328125e-05,
      "model_forward_time": 0.11592316627502441,
      "step": 4725
    },
    {
      "epoch": 2.8839111328125e-05,
      "step": 4725,
      "training_step_time": 0.689056396484375
    },
    {
      "epoch": 2.884521484375e-05,
      "model_forward_time": 0.12244963645935059,
      "step": 4726
    },
    {
      "epoch": 2.884521484375e-05,
      "step": 4726,
      "training_step_time": 0.687694787979126
    },
    {
      "epoch": 2.8851318359375e-05,
      "model_forward_time": 0.11877083778381348,
      "step": 4727
    },
    {
      "epoch": 2.8851318359375e-05,
      "step": 4727,
      "training_step_time": 0.6921393871307373
    },
    {
      "epoch": 2.8857421875e-05,
      "model_forward_time": 0.12432098388671875,
      "step": 4728
    },
    {
      "epoch": 2.8857421875e-05,
      "step": 4728,
      "training_step_time": 0.7218508720397949
    },
    {
      "epoch": 2.8863525390625e-05,
      "model_forward_time": 0.11661100387573242,
      "step": 4729
    },
    {
      "epoch": 2.8863525390625e-05,
      "step": 4729,
      "training_step_time": 0.6799070835113525
    },
    {
      "epoch": 2.886962890625e-05,
      "grad_norm": 0.2987068295478821,
      "learning_rate": 9.977288114680737e-05,
      "loss": 0.1047,
      "step": 4730
    },
    {
      "epoch": 2.886962890625e-05,
      "model_forward_time": 0.11859321594238281,
      "step": 4730
    },
    {
      "epoch": 2.886962890625e-05,
      "step": 4730,
      "training_step_time": 0.671459436416626
    },
    {
      "epoch": 2.8875732421875e-05,
      "model_forward_time": 0.11591386795043945,
      "step": 4731
    },
    {
      "epoch": 2.8875732421875e-05,
      "step": 4731,
      "training_step_time": 0.6559534072875977
    },
    {
      "epoch": 2.88818359375e-05,
      "model_forward_time": 0.12501955032348633,
      "step": 4732
    },
    {
      "epoch": 2.88818359375e-05,
      "step": 4732,
      "training_step_time": 0.6469736099243164
    },
    {
      "epoch": 2.8887939453125e-05,
      "model_forward_time": 0.11932897567749023,
      "step": 4733
    },
    {
      "epoch": 2.8887939453125e-05,
      "step": 4733,
      "training_step_time": 0.6641733646392822
    },
    {
      "epoch": 2.889404296875e-05,
      "model_forward_time": 0.11795902252197266,
      "step": 4734
    },
    {
      "epoch": 2.889404296875e-05,
      "step": 4734,
      "training_step_time": 0.6939585208892822
    },
    {
      "epoch": 2.8900146484375e-05,
      "model_forward_time": 0.13556814193725586,
      "step": 4735
    },
    {
      "epoch": 2.8900146484375e-05,
      "step": 4735,
      "training_step_time": 0.7288212776184082
    },
    {
      "epoch": 2.890625e-05,
      "model_forward_time": 0.11819314956665039,
      "step": 4736
    },
    {
      "epoch": 2.890625e-05,
      "step": 4736,
      "training_step_time": 0.7221806049346924
    },
    {
      "epoch": 2.8912353515625e-05,
      "model_forward_time": 0.12409043312072754,
      "step": 4737
    },
    {
      "epoch": 2.8912353515625e-05,
      "step": 4737,
      "training_step_time": 0.6810920238494873
    },
    {
      "epoch": 2.891845703125e-05,
      "model_forward_time": 0.11728215217590332,
      "step": 4738
    },
    {
      "epoch": 2.891845703125e-05,
      "step": 4738,
      "training_step_time": 0.6344234943389893
    },
    {
      "epoch": 2.8924560546875e-05,
      "model_forward_time": 0.12674474716186523,
      "step": 4739
    },
    {
      "epoch": 2.8924560546875e-05,
      "step": 4739,
      "training_step_time": 0.6088893413543701
    },
    {
      "epoch": 2.89306640625e-05,
      "grad_norm": 0.2976251542568207,
      "learning_rate": 9.977024992520602e-05,
      "loss": 0.1044,
      "step": 4740
    },
    {
      "epoch": 2.89306640625e-05,
      "model_forward_time": 0.11707019805908203,
      "step": 4740
    },
    {
      "epoch": 2.89306640625e-05,
      "step": 4740,
      "training_step_time": 0.5962667465209961
    },
    {
      "epoch": 2.8936767578125e-05,
      "model_forward_time": 0.12128973007202148,
      "step": 4741
    },
    {
      "epoch": 2.8936767578125e-05,
      "step": 4741,
      "training_step_time": 0.5855817794799805
    },
    {
      "epoch": 2.894287109375e-05,
      "model_forward_time": 0.13381147384643555,
      "step": 4742
    },
    {
      "epoch": 2.894287109375e-05,
      "step": 4742,
      "training_step_time": 0.5829639434814453
    },
    {
      "epoch": 2.8948974609375e-05,
      "model_forward_time": 0.12525296211242676,
      "step": 4743
    },
    {
      "epoch": 2.8948974609375e-05,
      "step": 4743,
      "training_step_time": 0.5526018142700195
    },
    {
      "epoch": 2.8955078125e-05,
      "model_forward_time": 0.12187552452087402,
      "step": 4744
    },
    {
      "epoch": 2.8955078125e-05,
      "step": 4744,
      "training_step_time": 0.5315611362457275
    },
    {
      "epoch": 2.8961181640625e-05,
      "model_forward_time": 0.12222123146057129,
      "step": 4745
    },
    {
      "epoch": 2.8961181640625e-05,
      "step": 4745,
      "training_step_time": 0.6128215789794922
    },
    {
      "epoch": 2.896728515625e-05,
      "model_forward_time": 0.11744236946105957,
      "step": 4746
    },
    {
      "epoch": 2.896728515625e-05,
      "step": 4746,
      "training_step_time": 0.6085619926452637
    },
    {
      "epoch": 2.8973388671875e-05,
      "model_forward_time": 0.1187582015991211,
      "step": 4747
    },
    {
      "epoch": 2.8973388671875e-05,
      "step": 4747,
      "training_step_time": 0.6048827171325684
    },
    {
      "epoch": 2.89794921875e-05,
      "model_forward_time": 0.11758279800415039,
      "step": 4748
    },
    {
      "epoch": 2.89794921875e-05,
      "step": 4748,
      "training_step_time": 0.5511019229888916
    },
    {
      "epoch": 2.8985595703125e-05,
      "model_forward_time": 0.11611199378967285,
      "step": 4749
    },
    {
      "epoch": 2.8985595703125e-05,
      "step": 4749,
      "training_step_time": 0.4710202217102051
    },
    {
      "epoch": 2.899169921875e-05,
      "grad_norm": 0.2980317175388336,
      "learning_rate": 9.976760358471686e-05,
      "loss": 0.106,
      "step": 4750
    },
    {
      "epoch": 2.899169921875e-05,
      "model_forward_time": 0.11529302597045898,
      "step": 4750
    },
    {
      "epoch": 2.899169921875e-05,
      "step": 4750,
      "training_step_time": 0.41455841064453125
    },
    {
      "epoch": 2.8997802734375e-05,
      "model_forward_time": 0.11474967002868652,
      "step": 4751
    },
    {
      "epoch": 2.8997802734375e-05,
      "step": 4751,
      "training_step_time": 0.41724157333374023
    },
    {
      "epoch": 2.900390625e-05,
      "model_forward_time": 0.11587643623352051,
      "step": 4752
    },
    {
      "epoch": 2.900390625e-05,
      "step": 4752,
      "training_step_time": 0.4033505916595459
    },
    {
      "epoch": 2.9010009765625e-05,
      "model_forward_time": 0.11558365821838379,
      "step": 4753
    },
    {
      "epoch": 2.9010009765625e-05,
      "step": 4753,
      "training_step_time": 0.4134645462036133
    },
    {
      "epoch": 2.901611328125e-05,
      "model_forward_time": 0.11520147323608398,
      "step": 4754
    },
    {
      "epoch": 2.901611328125e-05,
      "step": 4754,
      "training_step_time": 0.4011878967285156
    },
    {
      "epoch": 2.9022216796875e-05,
      "model_forward_time": 0.11502623558044434,
      "step": 4755
    },
    {
      "epoch": 2.9022216796875e-05,
      "step": 4755,
      "training_step_time": 0.40244340896606445
    },
    {
      "epoch": 2.90283203125e-05,
      "model_forward_time": 0.1152033805847168,
      "step": 4756
    },
    {
      "epoch": 2.90283203125e-05,
      "step": 4756,
      "training_step_time": 0.37760281562805176
    },
    {
      "epoch": 2.9034423828125e-05,
      "model_forward_time": 0.11527371406555176,
      "step": 4757
    },
    {
      "epoch": 2.9034423828125e-05,
      "step": 4757,
      "training_step_time": 0.39855051040649414
    },
    {
      "epoch": 2.904052734375e-05,
      "model_forward_time": 0.1167449951171875,
      "step": 4758
    },
    {
      "epoch": 2.904052734375e-05,
      "step": 4758,
      "training_step_time": 0.37189769744873047
    },
    {
      "epoch": 2.9046630859375e-05,
      "model_forward_time": 0.11484789848327637,
      "step": 4759
    },
    {
      "epoch": 2.9046630859375e-05,
      "step": 4759,
      "training_step_time": 0.498828649520874
    },
    {
      "epoch": 2.9052734375e-05,
      "grad_norm": 0.23009061813354492,
      "learning_rate": 9.976494212614377e-05,
      "loss": 0.0995,
      "step": 4760
    },
    {
      "epoch": 2.9052734375e-05,
      "model_forward_time": 0.11431765556335449,
      "step": 4760
    },
    {
      "epoch": 2.9052734375e-05,
      "step": 4760,
      "training_step_time": 0.507469654083252
    },
    {
      "epoch": 2.9058837890625e-05,
      "model_forward_time": 0.11578154563903809,
      "step": 4761
    },
    {
      "epoch": 2.9058837890625e-05,
      "step": 4761,
      "training_step_time": 0.4932222366333008
    },
    {
      "epoch": 2.906494140625e-05,
      "model_forward_time": 0.11407995223999023,
      "step": 4762
    },
    {
      "epoch": 2.906494140625e-05,
      "step": 4762,
      "training_step_time": 0.39898252487182617
    },
    {
      "epoch": 2.9071044921875e-05,
      "model_forward_time": 0.1150810718536377,
      "step": 4763
    },
    {
      "epoch": 2.9071044921875e-05,
      "step": 4763,
      "training_step_time": 0.4017479419708252
    },
    {
      "epoch": 2.90771484375e-05,
      "model_forward_time": 0.11482739448547363,
      "step": 4764
    },
    {
      "epoch": 2.90771484375e-05,
      "step": 4764,
      "training_step_time": 0.3905673027038574
    },
    {
      "epoch": 2.9083251953125e-05,
      "model_forward_time": 0.11536645889282227,
      "step": 4765
    },
    {
      "epoch": 2.9083251953125e-05,
      "step": 4765,
      "training_step_time": 0.39812564849853516
    },
    {
      "epoch": 2.908935546875e-05,
      "model_forward_time": 0.11551570892333984,
      "step": 4766
    },
    {
      "epoch": 2.908935546875e-05,
      "step": 4766,
      "training_step_time": 0.3919820785522461
    },
    {
      "epoch": 2.9095458984375e-05,
      "model_forward_time": 0.11479902267456055,
      "step": 4767
    },
    {
      "epoch": 2.9095458984375e-05,
      "step": 4767,
      "training_step_time": 0.3918492794036865
    },
    {
      "epoch": 2.91015625e-05,
      "model_forward_time": 0.11527752876281738,
      "step": 4768
    },
    {
      "epoch": 2.91015625e-05,
      "step": 4768,
      "training_step_time": 0.3903653621673584
    },
    {
      "epoch": 2.9107666015625e-05,
      "model_forward_time": 0.11500120162963867,
      "step": 4769
    },
    {
      "epoch": 2.9107666015625e-05,
      "step": 4769,
      "training_step_time": 0.3976328372955322
    },
    {
      "epoch": 2.911376953125e-05,
      "grad_norm": 0.33577099442481995,
      "learning_rate": 9.976226555029522e-05,
      "loss": 0.1061,
      "step": 4770
    },
    {
      "epoch": 2.911376953125e-05,
      "model_forward_time": 0.11565113067626953,
      "step": 4770
    },
    {
      "epoch": 2.911376953125e-05,
      "step": 4770,
      "training_step_time": 0.40244460105895996
    },
    {
      "epoch": 2.9119873046875e-05,
      "model_forward_time": 0.11505722999572754,
      "step": 4771
    },
    {
      "epoch": 2.9119873046875e-05,
      "step": 4771,
      "training_step_time": 0.3848111629486084
    },
    {
      "epoch": 2.91259765625e-05,
      "model_forward_time": 0.11611342430114746,
      "step": 4772
    },
    {
      "epoch": 2.91259765625e-05,
      "step": 4772,
      "training_step_time": 0.40551090240478516
    },
    {
      "epoch": 2.9132080078125e-05,
      "model_forward_time": 0.11523199081420898,
      "step": 4773
    },
    {
      "epoch": 2.9132080078125e-05,
      "step": 4773,
      "training_step_time": 0.3671262264251709
    },
    {
      "epoch": 2.913818359375e-05,
      "model_forward_time": 0.11603498458862305,
      "step": 4774
    },
    {
      "epoch": 2.913818359375e-05,
      "step": 4774,
      "training_step_time": 0.47821545600891113
    },
    {
      "epoch": 2.9144287109375e-05,
      "model_forward_time": 0.11558866500854492,
      "step": 4775
    },
    {
      "epoch": 2.9144287109375e-05,
      "step": 4775,
      "training_step_time": 0.49347662925720215
    },
    {
      "epoch": 2.9150390625e-05,
      "model_forward_time": 0.11552715301513672,
      "step": 4776
    },
    {
      "epoch": 2.9150390625e-05,
      "step": 4776,
      "training_step_time": 0.43378686904907227
    },
    {
      "epoch": 2.9156494140625e-05,
      "model_forward_time": 0.11527442932128906,
      "step": 4777
    },
    {
      "epoch": 2.9156494140625e-05,
      "step": 4777,
      "training_step_time": 0.41872429847717285
    },
    {
      "epoch": 2.916259765625e-05,
      "model_forward_time": 0.1158452033996582,
      "step": 4778
    },
    {
      "epoch": 2.916259765625e-05,
      "step": 4778,
      "training_step_time": 0.4013645648956299
    },
    {
      "epoch": 2.9168701171875e-05,
      "model_forward_time": 0.11534810066223145,
      "step": 4779
    },
    {
      "epoch": 2.9168701171875e-05,
      "step": 4779,
      "training_step_time": 0.3957066535949707
    },
    {
      "epoch": 2.91748046875e-05,
      "grad_norm": 0.2878871262073517,
      "learning_rate": 9.97595738579843e-05,
      "loss": 0.1022,
      "step": 4780
    },
    {
      "epoch": 2.91748046875e-05,
      "model_forward_time": 0.114990234375,
      "step": 4780
    },
    {
      "epoch": 2.91748046875e-05,
      "step": 4780,
      "training_step_time": 0.38640904426574707
    },
    {
      "epoch": 2.9180908203125e-05,
      "model_forward_time": 0.11554884910583496,
      "step": 4781
    },
    {
      "epoch": 2.9180908203125e-05,
      "step": 4781,
      "training_step_time": 0.4043691158294678
    },
    {
      "epoch": 2.918701171875e-05,
      "model_forward_time": 0.11517977714538574,
      "step": 4782
    },
    {
      "epoch": 2.918701171875e-05,
      "step": 4782,
      "training_step_time": 0.38925886154174805
    },
    {
      "epoch": 2.9193115234375e-05,
      "model_forward_time": 0.11531305313110352,
      "step": 4783
    },
    {
      "epoch": 2.9193115234375e-05,
      "step": 4783,
      "training_step_time": 0.3939237594604492
    },
    {
      "epoch": 2.919921875e-05,
      "model_forward_time": 0.11573195457458496,
      "step": 4784
    },
    {
      "epoch": 2.919921875e-05,
      "step": 4784,
      "training_step_time": 0.400559663772583
    },
    {
      "epoch": 2.9205322265625e-05,
      "model_forward_time": 0.11641478538513184,
      "step": 4785
    },
    {
      "epoch": 2.9205322265625e-05,
      "step": 4785,
      "training_step_time": 0.39874720573425293
    },
    {
      "epoch": 2.921142578125e-05,
      "model_forward_time": 0.11574339866638184,
      "step": 4786
    },
    {
      "epoch": 2.921142578125e-05,
      "step": 4786,
      "training_step_time": 0.3827853202819824
    },
    {
      "epoch": 2.9217529296875e-05,
      "model_forward_time": 0.11546492576599121,
      "step": 4787
    },
    {
      "epoch": 2.9217529296875e-05,
      "step": 4787,
      "training_step_time": 0.4289970397949219
    },
    {
      "epoch": 2.92236328125e-05,
      "model_forward_time": 0.11501669883728027,
      "step": 4788
    },
    {
      "epoch": 2.92236328125e-05,
      "step": 4788,
      "training_step_time": 0.36594414710998535
    },
    {
      "epoch": 2.9229736328125e-05,
      "model_forward_time": 0.11561846733093262,
      "step": 4789
    },
    {
      "epoch": 2.9229736328125e-05,
      "step": 4789,
      "training_step_time": 0.4482607841491699
    },
    {
      "epoch": 2.923583984375e-05,
      "grad_norm": 0.46230360865592957,
      "learning_rate": 9.975686705002867e-05,
      "loss": 0.1022,
      "step": 4790
    },
    {
      "epoch": 2.923583984375e-05,
      "model_forward_time": 0.11553645133972168,
      "step": 4790
    },
    {
      "epoch": 2.923583984375e-05,
      "step": 4790,
      "training_step_time": 0.4583859443664551
    },
    {
      "epoch": 2.9241943359375e-05,
      "model_forward_time": 0.11551380157470703,
      "step": 4791
    },
    {
      "epoch": 2.9241943359375e-05,
      "step": 4791,
      "training_step_time": 0.4638991355895996
    },
    {
      "epoch": 2.9248046875e-05,
      "model_forward_time": 0.11530900001525879,
      "step": 4792
    },
    {
      "epoch": 2.9248046875e-05,
      "step": 4792,
      "training_step_time": 0.4031350612640381
    },
    {
      "epoch": 2.9254150390625e-05,
      "model_forward_time": 0.11464738845825195,
      "step": 4793
    },
    {
      "epoch": 2.9254150390625e-05,
      "step": 4793,
      "training_step_time": 0.411466121673584
    },
    {
      "epoch": 2.926025390625e-05,
      "model_forward_time": 0.11538028717041016,
      "step": 4794
    },
    {
      "epoch": 2.926025390625e-05,
      "step": 4794,
      "training_step_time": 0.399550199508667
    },
    {
      "epoch": 2.9266357421875e-05,
      "model_forward_time": 0.11464929580688477,
      "step": 4795
    },
    {
      "epoch": 2.9266357421875e-05,
      "step": 4795,
      "training_step_time": 0.3926277160644531
    },
    {
      "epoch": 2.92724609375e-05,
      "model_forward_time": 0.11595749855041504,
      "step": 4796
    },
    {
      "epoch": 2.92724609375e-05,
      "step": 4796,
      "training_step_time": 0.40064144134521484
    },
    {
      "epoch": 2.9278564453125e-05,
      "model_forward_time": 0.11552548408508301,
      "step": 4797
    },
    {
      "epoch": 2.9278564453125e-05,
      "step": 4797,
      "training_step_time": 0.39531397819519043
    },
    {
      "epoch": 2.928466796875e-05,
      "model_forward_time": 0.11530041694641113,
      "step": 4798
    },
    {
      "epoch": 2.928466796875e-05,
      "step": 4798,
      "training_step_time": 0.38937902450561523
    },
    {
      "epoch": 2.9290771484375e-05,
      "model_forward_time": 0.11490559577941895,
      "step": 4799
    },
    {
      "epoch": 2.9290771484375e-05,
      "step": 4799,
      "training_step_time": 0.40050697326660156
    },
    {
      "epoch": 2.9296875e-05,
      "grad_norm": 0.29752522706985474,
      "learning_rate": 9.975414512725057e-05,
      "loss": 0.1002,
      "step": 4800
    },
    {
      "epoch": 2.9296875e-05,
      "model_forward_time": 0.11582207679748535,
      "step": 4800
    },
    {
      "epoch": 2.9296875e-05,
      "step": 4800,
      "training_step_time": 0.40081310272216797
    },
    {
      "epoch": 2.9302978515625e-05,
      "model_forward_time": 0.11530756950378418,
      "step": 4801
    },
    {
      "epoch": 2.9302978515625e-05,
      "step": 4801,
      "training_step_time": 0.39977455139160156
    },
    {
      "epoch": 2.930908203125e-05,
      "model_forward_time": 0.11628508567810059,
      "step": 4802
    },
    {
      "epoch": 2.930908203125e-05,
      "step": 4802,
      "training_step_time": 0.41744422912597656
    },
    {
      "epoch": 2.9315185546875e-05,
      "model_forward_time": 0.11596226692199707,
      "step": 4803
    },
    {
      "epoch": 2.9315185546875e-05,
      "step": 4803,
      "training_step_time": 0.4134514331817627
    },
    {
      "epoch": 2.93212890625e-05,
      "model_forward_time": 0.11519503593444824,
      "step": 4804
    },
    {
      "epoch": 2.93212890625e-05,
      "step": 4804,
      "training_step_time": 0.4279031753540039
    },
    {
      "epoch": 2.9327392578125e-05,
      "model_forward_time": 0.11484932899475098,
      "step": 4805
    },
    {
      "epoch": 2.9327392578125e-05,
      "step": 4805,
      "training_step_time": 0.482607364654541
    },
    {
      "epoch": 2.933349609375e-05,
      "model_forward_time": 0.11562252044677734,
      "step": 4806
    },
    {
      "epoch": 2.933349609375e-05,
      "step": 4806,
      "training_step_time": 0.5040783882141113
    },
    {
      "epoch": 2.9339599609375e-05,
      "model_forward_time": 0.11497235298156738,
      "step": 4807
    },
    {
      "epoch": 2.9339599609375e-05,
      "step": 4807,
      "training_step_time": 0.4250650405883789
    },
    {
      "epoch": 2.9345703125e-05,
      "model_forward_time": 0.11490035057067871,
      "step": 4808
    },
    {
      "epoch": 2.9345703125e-05,
      "step": 4808,
      "training_step_time": 0.4002265930175781
    },
    {
      "epoch": 2.9351806640625e-05,
      "model_forward_time": 0.11469888687133789,
      "step": 4809
    },
    {
      "epoch": 2.9351806640625e-05,
      "step": 4809,
      "training_step_time": 0.39350366592407227
    },
    {
      "epoch": 2.935791015625e-05,
      "grad_norm": 0.3031197488307953,
      "learning_rate": 9.975140809047687e-05,
      "loss": 0.0981,
      "step": 4810
    },
    {
      "epoch": 2.935791015625e-05,
      "model_forward_time": 0.11502289772033691,
      "step": 4810
    },
    {
      "epoch": 2.935791015625e-05,
      "step": 4810,
      "training_step_time": 0.3921482563018799
    },
    {
      "epoch": 2.9364013671875e-05,
      "model_forward_time": 0.11524844169616699,
      "step": 4811
    },
    {
      "epoch": 2.9364013671875e-05,
      "step": 4811,
      "training_step_time": 0.3895416259765625
    },
    {
      "epoch": 2.93701171875e-05,
      "model_forward_time": 0.11438965797424316,
      "step": 4812
    },
    {
      "epoch": 2.93701171875e-05,
      "step": 4812,
      "training_step_time": 0.39368200302124023
    },
    {
      "epoch": 2.9376220703125e-05,
      "model_forward_time": 0.11487221717834473,
      "step": 4813
    },
    {
      "epoch": 2.9376220703125e-05,
      "step": 4813,
      "training_step_time": 0.3907470703125
    },
    {
      "epoch": 2.938232421875e-05,
      "model_forward_time": 0.11592602729797363,
      "step": 4814
    },
    {
      "epoch": 2.938232421875e-05,
      "step": 4814,
      "training_step_time": 0.39360642433166504
    },
    {
      "epoch": 2.9388427734375e-05,
      "model_forward_time": 0.11514735221862793,
      "step": 4815
    },
    {
      "epoch": 2.9388427734375e-05,
      "step": 4815,
      "training_step_time": 0.6623365879058838
    },
    {
      "epoch": 2.939453125e-05,
      "model_forward_time": 0.11506056785583496,
      "step": 4816
    },
    {
      "epoch": 2.939453125e-05,
      "step": 4816,
      "training_step_time": 0.3897130489349365
    },
    {
      "epoch": 2.9400634765625e-05,
      "model_forward_time": 0.11468076705932617,
      "step": 4817
    },
    {
      "epoch": 2.9400634765625e-05,
      "step": 4817,
      "training_step_time": 0.3657371997833252
    },
    {
      "epoch": 2.940673828125e-05,
      "model_forward_time": 0.11508703231811523,
      "step": 4818
    },
    {
      "epoch": 2.940673828125e-05,
      "step": 4818,
      "training_step_time": 0.39144277572631836
    },
    {
      "epoch": 2.9412841796875e-05,
      "model_forward_time": 0.11503100395202637,
      "step": 4819
    },
    {
      "epoch": 2.9412841796875e-05,
      "step": 4819,
      "training_step_time": 0.506281852722168
    },
    {
      "epoch": 2.94189453125e-05,
      "grad_norm": 0.24473492801189423,
      "learning_rate": 9.974865594053902e-05,
      "loss": 0.0958,
      "step": 4820
    },
    {
      "epoch": 2.94189453125e-05,
      "model_forward_time": 0.11488986015319824,
      "step": 4820
    },
    {
      "epoch": 2.94189453125e-05,
      "step": 4820,
      "training_step_time": 0.4856836795806885
    },
    {
      "epoch": 2.9425048828125e-05,
      "model_forward_time": 0.11482357978820801,
      "step": 4821
    },
    {
      "epoch": 2.9425048828125e-05,
      "step": 4821,
      "training_step_time": 0.4558224678039551
    },
    {
      "epoch": 2.943115234375e-05,
      "model_forward_time": 0.11493206024169922,
      "step": 4822
    },
    {
      "epoch": 2.943115234375e-05,
      "step": 4822,
      "training_step_time": 0.3889741897583008
    },
    {
      "epoch": 2.9437255859375e-05,
      "model_forward_time": 0.11592793464660645,
      "step": 4823
    },
    {
      "epoch": 2.9437255859375e-05,
      "step": 4823,
      "training_step_time": 0.3950819969177246
    },
    {
      "epoch": 2.9443359375e-05,
      "model_forward_time": 0.11427640914916992,
      "step": 4824
    },
    {
      "epoch": 2.9443359375e-05,
      "step": 4824,
      "training_step_time": 0.39864444732666016
    },
    {
      "epoch": 2.9449462890625e-05,
      "model_forward_time": 0.11502242088317871,
      "step": 4825
    },
    {
      "epoch": 2.9449462890625e-05,
      "step": 4825,
      "training_step_time": 0.4016389846801758
    },
    {
      "epoch": 2.945556640625e-05,
      "model_forward_time": 0.11465859413146973,
      "step": 4826
    },
    {
      "epoch": 2.945556640625e-05,
      "step": 4826,
      "training_step_time": 0.39320945739746094
    },
    {
      "epoch": 2.9461669921875e-05,
      "model_forward_time": 0.11519765853881836,
      "step": 4827
    },
    {
      "epoch": 2.9461669921875e-05,
      "step": 4827,
      "training_step_time": 0.5999650955200195
    },
    {
      "epoch": 2.94677734375e-05,
      "model_forward_time": 0.11535811424255371,
      "step": 4828
    },
    {
      "epoch": 2.94677734375e-05,
      "step": 4828,
      "training_step_time": 0.42246222496032715
    },
    {
      "epoch": 2.9473876953125e-05,
      "model_forward_time": 0.11480474472045898,
      "step": 4829
    },
    {
      "epoch": 2.9473876953125e-05,
      "step": 4829,
      "training_step_time": 0.3912379741668701
    },
    {
      "epoch": 2.947998046875e-05,
      "grad_norm": 0.34635159373283386,
      "learning_rate": 9.974588867827301e-05,
      "loss": 0.1078,
      "step": 4830
    },
    {
      "epoch": 2.947998046875e-05,
      "model_forward_time": 0.11638927459716797,
      "step": 4830
    },
    {
      "epoch": 2.947998046875e-05,
      "step": 4830,
      "training_step_time": 0.39118194580078125
    },
    {
      "epoch": 2.9486083984375e-05,
      "model_forward_time": 0.1147148609161377,
      "step": 4831
    },
    {
      "epoch": 2.9486083984375e-05,
      "step": 4831,
      "training_step_time": 0.40112996101379395
    },
    {
      "epoch": 2.94921875e-05,
      "model_forward_time": 0.11512470245361328,
      "step": 4832
    },
    {
      "epoch": 2.94921875e-05,
      "step": 4832,
      "training_step_time": 0.4032564163208008
    },
    {
      "epoch": 2.9498291015625e-05,
      "model_forward_time": 0.11514639854431152,
      "step": 4833
    },
    {
      "epoch": 2.9498291015625e-05,
      "step": 4833,
      "training_step_time": 0.6319081783294678
    },
    {
      "epoch": 2.950439453125e-05,
      "model_forward_time": 0.11512446403503418,
      "step": 4834
    },
    {
      "epoch": 2.950439453125e-05,
      "step": 4834,
      "training_step_time": 0.511756181716919
    },
    {
      "epoch": 2.9510498046875e-05,
      "model_forward_time": 0.11452412605285645,
      "step": 4835
    },
    {
      "epoch": 2.9510498046875e-05,
      "step": 4835,
      "training_step_time": 0.47777605056762695
    },
    {
      "epoch": 2.95166015625e-05,
      "model_forward_time": 0.11426043510437012,
      "step": 4836
    },
    {
      "epoch": 2.95166015625e-05,
      "step": 4836,
      "training_step_time": 0.38925600051879883
    },
    {
      "epoch": 2.9522705078125e-05,
      "model_forward_time": 0.11423778533935547,
      "step": 4837
    },
    {
      "epoch": 2.9522705078125e-05,
      "step": 4837,
      "training_step_time": 0.39919304847717285
    },
    {
      "epoch": 2.952880859375e-05,
      "model_forward_time": 0.11457467079162598,
      "step": 4838
    },
    {
      "epoch": 2.952880859375e-05,
      "step": 4838,
      "training_step_time": 0.41245055198669434
    },
    {
      "epoch": 2.9534912109375e-05,
      "model_forward_time": 0.1145026683807373,
      "step": 4839
    },
    {
      "epoch": 2.9534912109375e-05,
      "step": 4839,
      "training_step_time": 0.6530191898345947
    },
    {
      "epoch": 2.9541015625e-05,
      "grad_norm": 0.3451343774795532,
      "learning_rate": 9.974310630451948e-05,
      "loss": 0.1069,
      "step": 4840
    },
    {
      "epoch": 2.9541015625e-05,
      "model_forward_time": 0.11432981491088867,
      "step": 4840
    },
    {
      "epoch": 2.9541015625e-05,
      "step": 4840,
      "training_step_time": 0.3845226764678955
    },
    {
      "epoch": 2.9547119140625e-05,
      "model_forward_time": 0.11445188522338867,
      "step": 4841
    },
    {
      "epoch": 2.9547119140625e-05,
      "step": 4841,
      "training_step_time": 0.38812255859375
    },
    {
      "epoch": 2.955322265625e-05,
      "model_forward_time": 0.11432075500488281,
      "step": 4842
    },
    {
      "epoch": 2.955322265625e-05,
      "step": 4842,
      "training_step_time": 0.4195566177368164
    },
    {
      "epoch": 2.9559326171875e-05,
      "model_forward_time": 0.11453580856323242,
      "step": 4843
    },
    {
      "epoch": 2.9559326171875e-05,
      "step": 4843,
      "training_step_time": 0.396054744720459
    },
    {
      "epoch": 2.95654296875e-05,
      "model_forward_time": 0.1144571304321289,
      "step": 4844
    },
    {
      "epoch": 2.95654296875e-05,
      "step": 4844,
      "training_step_time": 0.3925316333770752
    },
    {
      "epoch": 2.9571533203125e-05,
      "model_forward_time": 0.11522078514099121,
      "step": 4845
    },
    {
      "epoch": 2.9571533203125e-05,
      "step": 4845,
      "training_step_time": 0.8517425060272217
    },
    {
      "epoch": 2.957763671875e-05,
      "model_forward_time": 0.11411881446838379,
      "step": 4846
    },
    {
      "epoch": 2.957763671875e-05,
      "step": 4846,
      "training_step_time": 0.46378636360168457
    },
    {
      "epoch": 2.9583740234375e-05,
      "model_forward_time": 0.11515021324157715,
      "step": 4847
    },
    {
      "epoch": 2.9583740234375e-05,
      "step": 4847,
      "training_step_time": 0.49379849433898926
    },
    {
      "epoch": 2.958984375e-05,
      "model_forward_time": 0.11442184448242188,
      "step": 4848
    },
    {
      "epoch": 2.958984375e-05,
      "step": 4848,
      "training_step_time": 0.43297863006591797
    },
    {
      "epoch": 2.9595947265625e-05,
      "model_forward_time": 0.11439824104309082,
      "step": 4849
    },
    {
      "epoch": 2.9595947265625e-05,
      "step": 4849,
      "training_step_time": 0.4361405372619629
    },
    {
      "epoch": 2.960205078125e-05,
      "grad_norm": 0.24618205428123474,
      "learning_rate": 9.974030882012367e-05,
      "loss": 0.0952,
      "step": 4850
    },
    {
      "epoch": 2.960205078125e-05,
      "model_forward_time": 0.11397910118103027,
      "step": 4850
    },
    {
      "epoch": 2.960205078125e-05,
      "step": 4850,
      "training_step_time": 0.39341235160827637
    },
    {
      "epoch": 2.9608154296875e-05,
      "model_forward_time": 0.11451196670532227,
      "step": 4851
    },
    {
      "epoch": 2.9608154296875e-05,
      "step": 4851,
      "training_step_time": 0.3891921043395996
    },
    {
      "epoch": 2.96142578125e-05,
      "model_forward_time": 0.11503815650939941,
      "step": 4852
    },
    {
      "epoch": 2.96142578125e-05,
      "step": 4852,
      "training_step_time": 0.3875911235809326
    },
    {
      "epoch": 2.9620361328125e-05,
      "model_forward_time": 0.1152338981628418,
      "step": 4853
    },
    {
      "epoch": 2.9620361328125e-05,
      "step": 4853,
      "training_step_time": 0.38595008850097656
    },
    {
      "epoch": 2.962646484375e-05,
      "model_forward_time": 0.11518740653991699,
      "step": 4854
    },
    {
      "epoch": 2.962646484375e-05,
      "step": 4854,
      "training_step_time": 0.4038991928100586
    },
    {
      "epoch": 2.9632568359375e-05,
      "model_forward_time": 0.11511778831481934,
      "step": 4855
    },
    {
      "epoch": 2.9632568359375e-05,
      "step": 4855,
      "training_step_time": 0.3993842601776123
    },
    {
      "epoch": 2.9638671875e-05,
      "model_forward_time": 0.11472558975219727,
      "step": 4856
    },
    {
      "epoch": 2.9638671875e-05,
      "step": 4856,
      "training_step_time": 0.40692567825317383
    },
    {
      "epoch": 2.9644775390625e-05,
      "model_forward_time": 0.11559224128723145,
      "step": 4857
    },
    {
      "epoch": 2.9644775390625e-05,
      "step": 4857,
      "training_step_time": 0.7256629467010498
    },
    {
      "epoch": 2.965087890625e-05,
      "model_forward_time": 0.11487078666687012,
      "step": 4858
    },
    {
      "epoch": 2.965087890625e-05,
      "step": 4858,
      "training_step_time": 0.3907740116119385
    },
    {
      "epoch": 2.9656982421875e-05,
      "model_forward_time": 0.11475229263305664,
      "step": 4859
    },
    {
      "epoch": 2.9656982421875e-05,
      "step": 4859,
      "training_step_time": 0.39382410049438477
    },
    {
      "epoch": 2.96630859375e-05,
      "grad_norm": 0.23580442368984222,
      "learning_rate": 9.973749622593534e-05,
      "loss": 0.0956,
      "step": 4860
    },
    {
      "epoch": 2.96630859375e-05,
      "model_forward_time": 0.11496305465698242,
      "step": 4860
    },
    {
      "epoch": 2.96630859375e-05,
      "step": 4860,
      "training_step_time": 0.4799509048461914
    },
    {
      "epoch": 2.9669189453125e-05,
      "model_forward_time": 0.11491942405700684,
      "step": 4861
    },
    {
      "epoch": 2.9669189453125e-05,
      "step": 4861,
      "training_step_time": 0.45463013648986816
    },
    {
      "epoch": 2.967529296875e-05,
      "model_forward_time": 0.11423206329345703,
      "step": 4862
    },
    {
      "epoch": 2.967529296875e-05,
      "step": 4862,
      "training_step_time": 0.42695116996765137
    },
    {
      "epoch": 2.9681396484375e-05,
      "model_forward_time": 0.11450481414794922,
      "step": 4863
    },
    {
      "epoch": 2.9681396484375e-05,
      "step": 4863,
      "training_step_time": 0.4247269630432129
    },
    {
      "epoch": 2.96875e-05,
      "model_forward_time": 0.11445307731628418,
      "step": 4864
    },
    {
      "epoch": 2.96875e-05,
      "step": 4864,
      "training_step_time": 0.3950023651123047
    },
    {
      "epoch": 2.9693603515625e-05,
      "model_forward_time": 0.11476254463195801,
      "step": 4865
    },
    {
      "epoch": 2.9693603515625e-05,
      "step": 4865,
      "training_step_time": 0.3948812484741211
    },
    {
      "epoch": 2.969970703125e-05,
      "model_forward_time": 0.11503887176513672,
      "step": 4866
    },
    {
      "epoch": 2.969970703125e-05,
      "step": 4866,
      "training_step_time": 0.4012563228607178
    },
    {
      "epoch": 2.9705810546875e-05,
      "model_forward_time": 0.11483263969421387,
      "step": 4867
    },
    {
      "epoch": 2.9705810546875e-05,
      "step": 4867,
      "training_step_time": 0.4071216583251953
    },
    {
      "epoch": 2.97119140625e-05,
      "model_forward_time": 0.11489129066467285,
      "step": 4868
    },
    {
      "epoch": 2.97119140625e-05,
      "step": 4868,
      "training_step_time": 0.4446754455566406
    },
    {
      "epoch": 2.9718017578125e-05,
      "model_forward_time": 0.11507892608642578,
      "step": 4869
    },
    {
      "epoch": 2.9718017578125e-05,
      "step": 4869,
      "training_step_time": 0.40998220443725586
    },
    {
      "epoch": 2.972412109375e-05,
      "grad_norm": 0.27171072363853455,
      "learning_rate": 9.973466852280889e-05,
      "loss": 0.0955,
      "step": 4870
    },
    {
      "epoch": 2.972412109375e-05,
      "model_forward_time": 0.11493635177612305,
      "step": 4870
    },
    {
      "epoch": 2.972412109375e-05,
      "step": 4870,
      "training_step_time": 0.4007863998413086
    },
    {
      "epoch": 2.9730224609375e-05,
      "model_forward_time": 0.11508059501647949,
      "step": 4871
    },
    {
      "epoch": 2.9730224609375e-05,
      "step": 4871,
      "training_step_time": 0.39814257621765137
    },
    {
      "epoch": 2.9736328125e-05,
      "model_forward_time": 0.11534261703491211,
      "step": 4872
    },
    {
      "epoch": 2.9736328125e-05,
      "step": 4872,
      "training_step_time": 0.3988049030303955
    },
    {
      "epoch": 2.9742431640625e-05,
      "model_forward_time": 0.11512613296508789,
      "step": 4873
    },
    {
      "epoch": 2.9742431640625e-05,
      "step": 4873,
      "training_step_time": 0.36771059036254883
    },
    {
      "epoch": 2.974853515625e-05,
      "model_forward_time": 0.1147928237915039,
      "step": 4874
    },
    {
      "epoch": 2.974853515625e-05,
      "step": 4874,
      "training_step_time": 0.3988974094390869
    },
    {
      "epoch": 2.9754638671875e-05,
      "model_forward_time": 0.11538386344909668,
      "step": 4875
    },
    {
      "epoch": 2.9754638671875e-05,
      "step": 4875,
      "training_step_time": 0.5776875019073486
    },
    {
      "epoch": 2.97607421875e-05,
      "model_forward_time": 0.1153421401977539,
      "step": 4876
    },
    {
      "epoch": 2.97607421875e-05,
      "step": 4876,
      "training_step_time": 0.45474982261657715
    },
    {
      "epoch": 2.9766845703125e-05,
      "model_forward_time": 0.11511588096618652,
      "step": 4877
    },
    {
      "epoch": 2.9766845703125e-05,
      "step": 4877,
      "training_step_time": 0.46199679374694824
    },
    {
      "epoch": 2.977294921875e-05,
      "model_forward_time": 0.11524820327758789,
      "step": 4878
    },
    {
      "epoch": 2.977294921875e-05,
      "step": 4878,
      "training_step_time": 0.39491868019104004
    },
    {
      "epoch": 2.9779052734375e-05,
      "model_forward_time": 0.11504030227661133,
      "step": 4879
    },
    {
      "epoch": 2.9779052734375e-05,
      "step": 4879,
      "training_step_time": 0.39206981658935547
    },
    {
      "epoch": 2.978515625e-05,
      "grad_norm": 0.31362950801849365,
      "learning_rate": 9.973182571160332e-05,
      "loss": 0.1,
      "step": 4880
    },
    {
      "epoch": 2.978515625e-05,
      "model_forward_time": 0.1143639087677002,
      "step": 4880
    },
    {
      "epoch": 2.978515625e-05,
      "step": 4880,
      "training_step_time": 0.3995332717895508
    },
    {
      "epoch": 2.9791259765625e-05,
      "model_forward_time": 0.1152651309967041,
      "step": 4881
    },
    {
      "epoch": 2.9791259765625e-05,
      "step": 4881,
      "training_step_time": 0.4122047424316406
    },
    {
      "epoch": 2.979736328125e-05,
      "model_forward_time": 0.11479496955871582,
      "step": 4882
    },
    {
      "epoch": 2.979736328125e-05,
      "step": 4882,
      "training_step_time": 0.42549753189086914
    },
    {
      "epoch": 2.9803466796875e-05,
      "model_forward_time": 0.11476707458496094,
      "step": 4883
    },
    {
      "epoch": 2.9803466796875e-05,
      "step": 4883,
      "training_step_time": 0.43581104278564453
    },
    {
      "epoch": 2.98095703125e-05,
      "model_forward_time": 0.11439728736877441,
      "step": 4884
    },
    {
      "epoch": 2.98095703125e-05,
      "step": 4884,
      "training_step_time": 0.399860143661499
    },
    {
      "epoch": 2.9815673828125e-05,
      "model_forward_time": 0.1148827075958252,
      "step": 4885
    },
    {
      "epoch": 2.9815673828125e-05,
      "step": 4885,
      "training_step_time": 0.3921630382537842
    },
    {
      "epoch": 2.982177734375e-05,
      "model_forward_time": 0.11473393440246582,
      "step": 4886
    },
    {
      "epoch": 2.982177734375e-05,
      "step": 4886,
      "training_step_time": 0.39820313453674316
    },
    {
      "epoch": 2.9827880859375e-05,
      "model_forward_time": 0.11513280868530273,
      "step": 4887
    },
    {
      "epoch": 2.9827880859375e-05,
      "step": 4887,
      "training_step_time": 0.43065500259399414
    },
    {
      "epoch": 2.9833984375e-05,
      "model_forward_time": 0.11511039733886719,
      "step": 4888
    },
    {
      "epoch": 2.9833984375e-05,
      "step": 4888,
      "training_step_time": 0.39682483673095703
    },
    {
      "epoch": 2.9840087890625e-05,
      "model_forward_time": 0.11581897735595703,
      "step": 4889
    },
    {
      "epoch": 2.9840087890625e-05,
      "step": 4889,
      "training_step_time": 0.4647951126098633
    },
    {
      "epoch": 2.984619140625e-05,
      "grad_norm": 0.29096436500549316,
      "learning_rate": 9.972896779318219e-05,
      "loss": 0.1008,
      "step": 4890
    },
    {
      "epoch": 2.984619140625e-05,
      "model_forward_time": 0.11587285995483398,
      "step": 4890
    },
    {
      "epoch": 2.984619140625e-05,
      "step": 4890,
      "training_step_time": 0.5225300788879395
    },
    {
      "epoch": 2.9852294921875e-05,
      "model_forward_time": 0.11475205421447754,
      "step": 4891
    },
    {
      "epoch": 2.9852294921875e-05,
      "step": 4891,
      "training_step_time": 0.4032747745513916
    },
    {
      "epoch": 2.98583984375e-05,
      "model_forward_time": 0.11539244651794434,
      "step": 4892
    },
    {
      "epoch": 2.98583984375e-05,
      "step": 4892,
      "training_step_time": 0.41063714027404785
    },
    {
      "epoch": 2.9864501953125e-05,
      "model_forward_time": 0.11457157135009766,
      "step": 4893
    },
    {
      "epoch": 2.9864501953125e-05,
      "step": 4893,
      "training_step_time": 0.3887522220611572
    },
    {
      "epoch": 2.987060546875e-05,
      "model_forward_time": 0.11523056030273438,
      "step": 4894
    },
    {
      "epoch": 2.987060546875e-05,
      "step": 4894,
      "training_step_time": 0.3955531120300293
    },
    {
      "epoch": 2.9876708984375e-05,
      "model_forward_time": 0.11468815803527832,
      "step": 4895
    },
    {
      "epoch": 2.9876708984375e-05,
      "step": 4895,
      "training_step_time": 0.4038381576538086
    },
    {
      "epoch": 2.98828125e-05,
      "model_forward_time": 0.1151723861694336,
      "step": 4896
    },
    {
      "epoch": 2.98828125e-05,
      "step": 4896,
      "training_step_time": 0.4285695552825928
    },
    {
      "epoch": 2.9888916015625e-05,
      "model_forward_time": 0.11463427543640137,
      "step": 4897
    },
    {
      "epoch": 2.9888916015625e-05,
      "step": 4897,
      "training_step_time": 0.4397401809692383
    },
    {
      "epoch": 2.989501953125e-05,
      "model_forward_time": 0.11546850204467773,
      "step": 4898
    },
    {
      "epoch": 2.989501953125e-05,
      "step": 4898,
      "training_step_time": 0.3993868827819824
    },
    {
      "epoch": 2.9901123046875e-05,
      "model_forward_time": 0.11510944366455078,
      "step": 4899
    },
    {
      "epoch": 2.9901123046875e-05,
      "step": 4899,
      "training_step_time": 0.41742968559265137
    },
    {
      "epoch": 2.99072265625e-05,
      "grad_norm": 0.33501535654067993,
      "learning_rate": 9.972609476841367e-05,
      "loss": 0.1026,
      "step": 4900
    },
    {
      "epoch": 2.99072265625e-05,
      "model_forward_time": 0.11461544036865234,
      "step": 4900
    },
    {
      "epoch": 2.99072265625e-05,
      "step": 4900,
      "training_step_time": 0.41879987716674805
    },
    {
      "epoch": 2.9913330078125e-05,
      "model_forward_time": 0.11442971229553223,
      "step": 4901
    },
    {
      "epoch": 2.9913330078125e-05,
      "step": 4901,
      "training_step_time": 0.3873107433319092
    },
    {
      "epoch": 2.991943359375e-05,
      "model_forward_time": 0.11523795127868652,
      "step": 4902
    },
    {
      "epoch": 2.991943359375e-05,
      "step": 4902,
      "training_step_time": 0.3910658359527588
    },
    {
      "epoch": 2.9925537109375e-05,
      "model_forward_time": 0.11553049087524414,
      "step": 4903
    },
    {
      "epoch": 2.9925537109375e-05,
      "step": 4903,
      "training_step_time": 0.40851497650146484
    },
    {
      "epoch": 2.9931640625e-05,
      "model_forward_time": 0.11517882347106934,
      "step": 4904
    },
    {
      "epoch": 2.9931640625e-05,
      "step": 4904,
      "training_step_time": 0.48354077339172363
    },
    {
      "epoch": 2.9937744140625e-05,
      "model_forward_time": 0.11652708053588867,
      "step": 4905
    },
    {
      "epoch": 2.9937744140625e-05,
      "step": 4905,
      "training_step_time": 0.5204017162322998
    },
    {
      "epoch": 2.994384765625e-05,
      "model_forward_time": 0.11467766761779785,
      "step": 4906
    },
    {
      "epoch": 2.994384765625e-05,
      "step": 4906,
      "training_step_time": 0.4847264289855957
    },
    {
      "epoch": 2.9949951171875e-05,
      "model_forward_time": 0.11604857444763184,
      "step": 4907
    },
    {
      "epoch": 2.9949951171875e-05,
      "step": 4907,
      "training_step_time": 0.4169788360595703
    },
    {
      "epoch": 2.99560546875e-05,
      "model_forward_time": 0.11480975151062012,
      "step": 4908
    },
    {
      "epoch": 2.99560546875e-05,
      "step": 4908,
      "training_step_time": 0.400986909866333
    },
    {
      "epoch": 2.9962158203125e-05,
      "model_forward_time": 0.11489701271057129,
      "step": 4909
    },
    {
      "epoch": 2.9962158203125e-05,
      "step": 4909,
      "training_step_time": 0.43320727348327637
    },
    {
      "epoch": 2.996826171875e-05,
      "grad_norm": 0.2546725571155548,
      "learning_rate": 9.97232066381705e-05,
      "loss": 0.0963,
      "step": 4910
    },
    {
      "epoch": 2.996826171875e-05,
      "model_forward_time": 0.1147618293762207,
      "step": 4910
    },
    {
      "epoch": 2.996826171875e-05,
      "step": 4910,
      "training_step_time": 0.4444551467895508
    },
    {
      "epoch": 2.9974365234375e-05,
      "model_forward_time": 0.11506772041320801,
      "step": 4911
    },
    {
      "epoch": 2.9974365234375e-05,
      "step": 4911,
      "training_step_time": 0.4659006595611572
    },
    {
      "epoch": 2.998046875e-05,
      "model_forward_time": 0.1150972843170166,
      "step": 4912
    },
    {
      "epoch": 2.998046875e-05,
      "step": 4912,
      "training_step_time": 0.3872401714324951
    },
    {
      "epoch": 2.9986572265625e-05,
      "model_forward_time": 0.11509895324707031,
      "step": 4913
    },
    {
      "epoch": 2.9986572265625e-05,
      "step": 4913,
      "training_step_time": 0.38139986991882324
    },
    {
      "epoch": 2.999267578125e-05,
      "model_forward_time": 0.11504292488098145,
      "step": 4914
    },
    {
      "epoch": 2.999267578125e-05,
      "step": 4914,
      "training_step_time": 0.39896225929260254
    },
    {
      "epoch": 2.9998779296875e-05,
      "model_forward_time": 0.11426496505737305,
      "step": 4915
    },
    {
      "epoch": 2.9998779296875e-05,
      "step": 4915,
      "training_step_time": 0.3949131965637207
    },
    {
      "epoch": 3.00048828125e-05,
      "model_forward_time": 0.11470651626586914,
      "step": 4916
    },
    {
      "epoch": 3.00048828125e-05,
      "step": 4916,
      "training_step_time": 0.396622896194458
    },
    {
      "epoch": 3.0010986328125e-05,
      "model_forward_time": 0.11459732055664062,
      "step": 4917
    },
    {
      "epoch": 3.0010986328125e-05,
      "step": 4917,
      "training_step_time": 0.6305594444274902
    },
    {
      "epoch": 3.001708984375e-05,
      "model_forward_time": 0.11459231376647949,
      "step": 4918
    },
    {
      "epoch": 3.001708984375e-05,
      "step": 4918,
      "training_step_time": 0.437924861907959
    },
    {
      "epoch": 3.0023193359375e-05,
      "model_forward_time": 0.11482954025268555,
      "step": 4919
    },
    {
      "epoch": 3.0023193359375e-05,
      "step": 4919,
      "training_step_time": 0.47665977478027344
    },
    {
      "epoch": 3.0029296875e-05,
      "grad_norm": 0.26401275396347046,
      "learning_rate": 9.972030340333001e-05,
      "loss": 0.094,
      "step": 4920
    },
    {
      "epoch": 3.0029296875e-05,
      "model_forward_time": 0.11434435844421387,
      "step": 4920
    },
    {
      "epoch": 3.0029296875e-05,
      "step": 4920,
      "training_step_time": 0.4300572872161865
    },
    {
      "epoch": 3.0035400390625e-05,
      "model_forward_time": 0.1149451732635498,
      "step": 4921
    },
    {
      "epoch": 3.0035400390625e-05,
      "step": 4921,
      "training_step_time": 0.4385080337524414
    },
    {
      "epoch": 3.004150390625e-05,
      "model_forward_time": 0.11432766914367676,
      "step": 4922
    },
    {
      "epoch": 3.004150390625e-05,
      "step": 4922,
      "training_step_time": 0.4210813045501709
    },
    {
      "epoch": 3.0047607421875e-05,
      "model_forward_time": 0.1145627498626709,
      "step": 4923
    },
    {
      "epoch": 3.0047607421875e-05,
      "step": 4923,
      "training_step_time": 0.43140721321105957
    },
    {
      "epoch": 3.00537109375e-05,
      "model_forward_time": 0.11417818069458008,
      "step": 4924
    },
    {
      "epoch": 3.00537109375e-05,
      "step": 4924,
      "training_step_time": 0.39769744873046875
    },
    {
      "epoch": 3.0059814453125e-05,
      "model_forward_time": 0.11506414413452148,
      "step": 4925
    },
    {
      "epoch": 3.0059814453125e-05,
      "step": 4925,
      "training_step_time": 0.38640856742858887
    },
    {
      "epoch": 3.006591796875e-05,
      "model_forward_time": 0.11549854278564453,
      "step": 4926
    },
    {
      "epoch": 3.006591796875e-05,
      "step": 4926,
      "training_step_time": 0.4040415287017822
    },
    {
      "epoch": 3.0072021484375e-05,
      "model_forward_time": 0.11499953269958496,
      "step": 4927
    },
    {
      "epoch": 3.0072021484375e-05,
      "step": 4927,
      "training_step_time": 0.3965718746185303
    },
    {
      "epoch": 3.0078125e-05,
      "model_forward_time": 0.11491823196411133,
      "step": 4928
    },
    {
      "epoch": 3.0078125e-05,
      "step": 4928,
      "training_step_time": 0.39728522300720215
    },
    {
      "epoch": 3.0084228515625e-05,
      "model_forward_time": 0.11442446708679199,
      "step": 4929
    },
    {
      "epoch": 3.0084228515625e-05,
      "step": 4929,
      "training_step_time": 0.3914659023284912
    },
    {
      "epoch": 3.009033203125e-05,
      "grad_norm": 0.3536728024482727,
      "learning_rate": 9.971738506477414e-05,
      "loss": 0.1045,
      "step": 4930
    },
    {
      "epoch": 3.009033203125e-05,
      "model_forward_time": 0.115936279296875,
      "step": 4930
    },
    {
      "epoch": 3.009033203125e-05,
      "step": 4930,
      "training_step_time": 0.3940427303314209
    },
    {
      "epoch": 3.0096435546875e-05,
      "model_forward_time": 0.11531543731689453,
      "step": 4931
    },
    {
      "epoch": 3.0096435546875e-05,
      "step": 4931,
      "training_step_time": 0.3685436248779297
    },
    {
      "epoch": 3.01025390625e-05,
      "model_forward_time": 0.11516022682189941,
      "step": 4932
    },
    {
      "epoch": 3.01025390625e-05,
      "step": 4932,
      "training_step_time": 0.44470930099487305
    },
    {
      "epoch": 3.0108642578125e-05,
      "model_forward_time": 0.11435317993164062,
      "step": 4933
    },
    {
      "epoch": 3.0108642578125e-05,
      "step": 4933,
      "training_step_time": 0.43887877464294434
    },
    {
      "epoch": 3.011474609375e-05,
      "model_forward_time": 0.11492085456848145,
      "step": 4934
    },
    {
      "epoch": 3.011474609375e-05,
      "step": 4934,
      "training_step_time": 0.49779748916625977
    },
    {
      "epoch": 3.0120849609375e-05,
      "model_forward_time": 0.11483144760131836,
      "step": 4935
    },
    {
      "epoch": 3.0120849609375e-05,
      "step": 4935,
      "training_step_time": 0.4574606418609619
    },
    {
      "epoch": 3.0126953125e-05,
      "model_forward_time": 0.11464571952819824,
      "step": 4936
    },
    {
      "epoch": 3.0126953125e-05,
      "step": 4936,
      "training_step_time": 0.46829748153686523
    },
    {
      "epoch": 3.0133056640625e-05,
      "model_forward_time": 0.11476349830627441,
      "step": 4937
    },
    {
      "epoch": 3.0133056640625e-05,
      "step": 4937,
      "training_step_time": 0.4199655055999756
    },
    {
      "epoch": 3.013916015625e-05,
      "model_forward_time": 0.11515498161315918,
      "step": 4938
    },
    {
      "epoch": 3.013916015625e-05,
      "step": 4938,
      "training_step_time": 0.40329861640930176
    },
    {
      "epoch": 3.0145263671875e-05,
      "model_forward_time": 0.12325000762939453,
      "step": 4939
    },
    {
      "epoch": 3.0145263671875e-05,
      "step": 4939,
      "training_step_time": 0.39710545539855957
    },
    {
      "epoch": 3.01513671875e-05,
      "grad_norm": 0.2868027687072754,
      "learning_rate": 9.971445162338939e-05,
      "loss": 0.0965,
      "step": 4940
    },
    {
      "epoch": 3.01513671875e-05,
      "model_forward_time": 0.11560916900634766,
      "step": 4940
    },
    {
      "epoch": 3.01513671875e-05,
      "step": 4940,
      "training_step_time": 0.397167444229126
    },
    {
      "epoch": 3.0157470703125e-05,
      "model_forward_time": 0.11621999740600586,
      "step": 4941
    },
    {
      "epoch": 3.0157470703125e-05,
      "step": 4941,
      "training_step_time": 0.40466928482055664
    },
    {
      "epoch": 3.016357421875e-05,
      "model_forward_time": 0.11456894874572754,
      "step": 4942
    },
    {
      "epoch": 3.016357421875e-05,
      "step": 4942,
      "training_step_time": 0.40221071243286133
    },
    {
      "epoch": 3.0169677734375e-05,
      "model_forward_time": 0.11536192893981934,
      "step": 4943
    },
    {
      "epoch": 3.0169677734375e-05,
      "step": 4943,
      "training_step_time": 0.3989572525024414
    },
    {
      "epoch": 3.017578125e-05,
      "model_forward_time": 0.11510968208312988,
      "step": 4944
    },
    {
      "epoch": 3.017578125e-05,
      "step": 4944,
      "training_step_time": 0.4005255699157715
    },
    {
      "epoch": 3.0181884765625e-05,
      "model_forward_time": 0.1154017448425293,
      "step": 4945
    },
    {
      "epoch": 3.0181884765625e-05,
      "step": 4945,
      "training_step_time": 0.41033363342285156
    },
    {
      "epoch": 3.018798828125e-05,
      "model_forward_time": 0.11496925354003906,
      "step": 4946
    },
    {
      "epoch": 3.018798828125e-05,
      "step": 4946,
      "training_step_time": 0.36648011207580566
    },
    {
      "epoch": 3.0194091796875e-05,
      "model_forward_time": 0.11541438102722168,
      "step": 4947
    },
    {
      "epoch": 3.0194091796875e-05,
      "step": 4947,
      "training_step_time": 0.4338066577911377
    },
    {
      "epoch": 3.02001953125e-05,
      "model_forward_time": 0.11551618576049805,
      "step": 4948
    },
    {
      "epoch": 3.02001953125e-05,
      "step": 4948,
      "training_step_time": 0.5021932125091553
    },
    {
      "epoch": 3.0206298828125e-05,
      "model_forward_time": 0.1151118278503418,
      "step": 4949
    },
    {
      "epoch": 3.0206298828125e-05,
      "step": 4949,
      "training_step_time": 0.46466755867004395
    },
    {
      "epoch": 3.021240234375e-05,
      "grad_norm": 0.3842293620109558,
      "learning_rate": 9.97115030800669e-05,
      "loss": 0.0969,
      "step": 4950
    },
    {
      "epoch": 3.021240234375e-05,
      "model_forward_time": 0.1145167350769043,
      "step": 4950
    },
    {
      "epoch": 3.021240234375e-05,
      "step": 4950,
      "training_step_time": 0.420243501663208
    },
    {
      "epoch": 3.0218505859375e-05,
      "model_forward_time": 0.11471772193908691,
      "step": 4951
    },
    {
      "epoch": 3.0218505859375e-05,
      "step": 4951,
      "training_step_time": 0.3935360908508301
    },
    {
      "epoch": 3.0224609375e-05,
      "model_forward_time": 0.11619687080383301,
      "step": 4952
    },
    {
      "epoch": 3.0224609375e-05,
      "step": 4952,
      "training_step_time": 0.397935152053833
    },
    {
      "epoch": 3.0230712890625e-05,
      "model_forward_time": 0.11463618278503418,
      "step": 4953
    },
    {
      "epoch": 3.0230712890625e-05,
      "step": 4953,
      "training_step_time": 0.39905667304992676
    },
    {
      "epoch": 3.023681640625e-05,
      "model_forward_time": 0.11492395401000977,
      "step": 4954
    },
    {
      "epoch": 3.023681640625e-05,
      "step": 4954,
      "training_step_time": 0.3927462100982666
    },
    {
      "epoch": 3.0242919921875e-05,
      "model_forward_time": 0.11472916603088379,
      "step": 4955
    },
    {
      "epoch": 3.0242919921875e-05,
      "step": 4955,
      "training_step_time": 0.4626126289367676
    },
    {
      "epoch": 3.02490234375e-05,
      "model_forward_time": 0.1146399974822998,
      "step": 4956
    },
    {
      "epoch": 3.02490234375e-05,
      "step": 4956,
      "training_step_time": 0.40018415451049805
    },
    {
      "epoch": 3.0255126953125e-05,
      "model_forward_time": 0.11515378952026367,
      "step": 4957
    },
    {
      "epoch": 3.0255126953125e-05,
      "step": 4957,
      "training_step_time": 0.40375733375549316
    },
    {
      "epoch": 3.026123046875e-05,
      "model_forward_time": 0.1147310733795166,
      "step": 4958
    },
    {
      "epoch": 3.026123046875e-05,
      "step": 4958,
      "training_step_time": 0.4060189723968506
    },
    {
      "epoch": 3.0267333984375e-05,
      "model_forward_time": 0.1150362491607666,
      "step": 4959
    },
    {
      "epoch": 3.0267333984375e-05,
      "step": 4959,
      "training_step_time": 0.3951089382171631
    },
    {
      "epoch": 3.02734375e-05,
      "grad_norm": 0.23066028952598572,
      "learning_rate": 9.97085394357023e-05,
      "loss": 0.0911,
      "step": 4960
    },
    {
      "epoch": 3.02734375e-05,
      "model_forward_time": 0.11437797546386719,
      "step": 4960
    },
    {
      "epoch": 3.02734375e-05,
      "step": 4960,
      "training_step_time": 0.39189648628234863
    },
    {
      "epoch": 3.0279541015625e-05,
      "model_forward_time": 0.11545896530151367,
      "step": 4961
    },
    {
      "epoch": 3.0279541015625e-05,
      "step": 4961,
      "training_step_time": 0.5955064296722412
    },
    {
      "epoch": 3.028564453125e-05,
      "model_forward_time": 0.11475682258605957,
      "step": 4962
    },
    {
      "epoch": 3.028564453125e-05,
      "step": 4962,
      "training_step_time": 0.44731736183166504
    },
    {
      "epoch": 3.0291748046875e-05,
      "model_forward_time": 0.11556506156921387,
      "step": 4963
    },
    {
      "epoch": 3.0291748046875e-05,
      "step": 4963,
      "training_step_time": 0.4886510372161865
    },
    {
      "epoch": 3.02978515625e-05,
      "model_forward_time": 0.11454224586486816,
      "step": 4964
    },
    {
      "epoch": 3.02978515625e-05,
      "step": 4964,
      "training_step_time": 0.4388401508331299
    },
    {
      "epoch": 3.0303955078125e-05,
      "model_forward_time": 0.11518239974975586,
      "step": 4965
    },
    {
      "epoch": 3.0303955078125e-05,
      "step": 4965,
      "training_step_time": 0.4054250717163086
    },
    {
      "epoch": 3.031005859375e-05,
      "model_forward_time": 0.11410117149353027,
      "step": 4966
    },
    {
      "epoch": 3.031005859375e-05,
      "step": 4966,
      "training_step_time": 0.39218878746032715
    },
    {
      "epoch": 3.0316162109375e-05,
      "model_forward_time": 0.11550164222717285,
      "step": 4967
    },
    {
      "epoch": 3.0316162109375e-05,
      "step": 4967,
      "training_step_time": 0.38935399055480957
    },
    {
      "epoch": 3.0322265625e-05,
      "model_forward_time": 0.11544656753540039,
      "step": 4968
    },
    {
      "epoch": 3.0322265625e-05,
      "step": 4968,
      "training_step_time": 0.3836991786956787
    },
    {
      "epoch": 3.0328369140625e-05,
      "model_forward_time": 0.1159672737121582,
      "step": 4969
    },
    {
      "epoch": 3.0328369140625e-05,
      "step": 4969,
      "training_step_time": 0.3996307849884033
    },
    {
      "epoch": 3.033447265625e-05,
      "grad_norm": 0.2587489187717438,
      "learning_rate": 9.97055606911959e-05,
      "loss": 0.103,
      "step": 4970
    },
    {
      "epoch": 3.033447265625e-05,
      "model_forward_time": 0.11564302444458008,
      "step": 4970
    },
    {
      "epoch": 3.033447265625e-05,
      "step": 4970,
      "training_step_time": 0.40328168869018555
    },
    {
      "epoch": 3.0340576171875e-05,
      "model_forward_time": 0.11510157585144043,
      "step": 4971
    },
    {
      "epoch": 3.0340576171875e-05,
      "step": 4971,
      "training_step_time": 0.3941671848297119
    },
    {
      "epoch": 3.03466796875e-05,
      "model_forward_time": 0.11546063423156738,
      "step": 4972
    },
    {
      "epoch": 3.03466796875e-05,
      "step": 4972,
      "training_step_time": 0.3971402645111084
    },
    {
      "epoch": 3.0352783203125e-05,
      "model_forward_time": 0.11566734313964844,
      "step": 4973
    },
    {
      "epoch": 3.0352783203125e-05,
      "step": 4973,
      "training_step_time": 0.49629807472229004
    },
    {
      "epoch": 3.035888671875e-05,
      "model_forward_time": 0.11525821685791016,
      "step": 4974
    },
    {
      "epoch": 3.035888671875e-05,
      "step": 4974,
      "training_step_time": 0.47714686393737793
    },
    {
      "epoch": 3.0364990234375e-05,
      "model_forward_time": 0.11460590362548828,
      "step": 4975
    },
    {
      "epoch": 3.0364990234375e-05,
      "step": 4975,
      "training_step_time": 0.3694891929626465
    },
    {
      "epoch": 3.037109375e-05,
      "model_forward_time": 0.11586403846740723,
      "step": 4976
    },
    {
      "epoch": 3.037109375e-05,
      "step": 4976,
      "training_step_time": 0.49701356887817383
    },
    {
      "epoch": 3.0377197265625e-05,
      "model_forward_time": 0.1147603988647461,
      "step": 4977
    },
    {
      "epoch": 3.0377197265625e-05,
      "step": 4977,
      "training_step_time": 0.4843010902404785
    },
    {
      "epoch": 3.038330078125e-05,
      "model_forward_time": 0.11465668678283691,
      "step": 4978
    },
    {
      "epoch": 3.038330078125e-05,
      "step": 4978,
      "training_step_time": 0.44359803199768066
    },
    {
      "epoch": 3.0389404296875e-05,
      "model_forward_time": 0.11518216133117676,
      "step": 4979
    },
    {
      "epoch": 3.0389404296875e-05,
      "step": 4979,
      "training_step_time": 0.41730690002441406
    },
    {
      "epoch": 3.03955078125e-05,
      "grad_norm": 0.2334640473127365,
      "learning_rate": 9.970256684745258e-05,
      "loss": 0.1006,
      "step": 4980
    },
    {
      "epoch": 3.03955078125e-05,
      "model_forward_time": 0.11576366424560547,
      "step": 4980
    },
    {
      "epoch": 3.03955078125e-05,
      "step": 4980,
      "training_step_time": 0.3814091682434082
    },
    {
      "epoch": 3.0401611328125e-05,
      "model_forward_time": 0.11493754386901855,
      "step": 4981
    },
    {
      "epoch": 3.0401611328125e-05,
      "step": 4981,
      "training_step_time": 0.3878452777862549
    },
    {
      "epoch": 3.040771484375e-05,
      "model_forward_time": 0.11471891403198242,
      "step": 4982
    },
    {
      "epoch": 3.040771484375e-05,
      "step": 4982,
      "training_step_time": 0.403289794921875
    },
    {
      "epoch": 3.0413818359375e-05,
      "model_forward_time": 0.11524796485900879,
      "step": 4983
    },
    {
      "epoch": 3.0413818359375e-05,
      "step": 4983,
      "training_step_time": 0.3924386501312256
    },
    {
      "epoch": 3.0419921875e-05,
      "model_forward_time": 0.11523580551147461,
      "step": 4984
    },
    {
      "epoch": 3.0419921875e-05,
      "step": 4984,
      "training_step_time": 0.3895680904388428
    },
    {
      "epoch": 3.0426025390625e-05,
      "model_forward_time": 0.11557936668395996,
      "step": 4985
    },
    {
      "epoch": 3.0426025390625e-05,
      "step": 4985,
      "training_step_time": 0.3863484859466553
    },
    {
      "epoch": 3.043212890625e-05,
      "model_forward_time": 0.11508011817932129,
      "step": 4986
    },
    {
      "epoch": 3.043212890625e-05,
      "step": 4986,
      "training_step_time": 0.40346527099609375
    },
    {
      "epoch": 3.0438232421875e-05,
      "model_forward_time": 0.11506390571594238,
      "step": 4987
    },
    {
      "epoch": 3.0438232421875e-05,
      "step": 4987,
      "training_step_time": 0.4048006534576416
    },
    {
      "epoch": 3.04443359375e-05,
      "model_forward_time": 0.11529970169067383,
      "step": 4988
    },
    {
      "epoch": 3.04443359375e-05,
      "step": 4988,
      "training_step_time": 0.4235501289367676
    },
    {
      "epoch": 3.0450439453125e-05,
      "model_forward_time": 0.11554694175720215,
      "step": 4989
    },
    {
      "epoch": 3.0450439453125e-05,
      "step": 4989,
      "training_step_time": 0.40180325508117676
    },
    {
      "epoch": 3.045654296875e-05,
      "grad_norm": 0.3534263074398041,
      "learning_rate": 9.969955790538175e-05,
      "loss": 0.0967,
      "step": 4990
    },
    {
      "epoch": 3.045654296875e-05,
      "model_forward_time": 0.11527013778686523,
      "step": 4990
    },
    {
      "epoch": 3.045654296875e-05,
      "step": 4990,
      "training_step_time": 0.48604297637939453
    },
    {
      "epoch": 3.0462646484375e-05,
      "model_forward_time": 0.1152801513671875,
      "step": 4991
    },
    {
      "epoch": 3.0462646484375e-05,
      "step": 4991,
      "training_step_time": 0.47624993324279785
    },
    {
      "epoch": 3.046875e-05,
      "model_forward_time": 0.11565256118774414,
      "step": 4992
    },
    {
      "epoch": 3.046875e-05,
      "step": 4992,
      "training_step_time": 0.5252950191497803
    },
    {
      "epoch": 3.0474853515625e-05,
      "model_forward_time": 0.11506390571594238,
      "step": 4993
    },
    {
      "epoch": 3.0474853515625e-05,
      "step": 4993,
      "training_step_time": 0.43797945976257324
    },
    {
      "epoch": 3.048095703125e-05,
      "model_forward_time": 0.11476707458496094,
      "step": 4994
    },
    {
      "epoch": 3.048095703125e-05,
      "step": 4994,
      "training_step_time": 0.4158031940460205
    },
    {
      "epoch": 3.0487060546875e-05,
      "model_forward_time": 0.11458134651184082,
      "step": 4995
    },
    {
      "epoch": 3.0487060546875e-05,
      "step": 4995,
      "training_step_time": 0.3972339630126953
    },
    {
      "epoch": 3.04931640625e-05,
      "model_forward_time": 0.1154487133026123,
      "step": 4996
    },
    {
      "epoch": 3.04931640625e-05,
      "step": 4996,
      "training_step_time": 0.38126587867736816
    },
    {
      "epoch": 3.0499267578125e-05,
      "model_forward_time": 0.11562323570251465,
      "step": 4997
    },
    {
      "epoch": 3.0499267578125e-05,
      "step": 4997,
      "training_step_time": 0.3880798816680908
    },
    {
      "epoch": 3.050537109375e-05,
      "model_forward_time": 0.11568641662597656,
      "step": 4998
    },
    {
      "epoch": 3.050537109375e-05,
      "step": 4998,
      "training_step_time": 0.3864133358001709
    },
    {
      "epoch": 3.0511474609375e-05,
      "model_forward_time": 0.11505842208862305,
      "step": 4999
    },
    {
      "epoch": 3.0511474609375e-05,
      "step": 4999,
      "training_step_time": 0.39483070373535156
    },
    {
      "epoch": 3.0517578125e-05,
      "grad_norm": 0.2958953380584717,
      "learning_rate": 9.969653386589748e-05,
      "loss": 0.0949,
      "step": 5000
    },
    {
      "epoch": 3.0517578125e-05,
      "model_forward_time": 0.11278510093688965,
      "step": 5000
    },
    {
      "epoch": 3.0517578125e-05,
      "step": 5000,
      "training_step_time": 0.3536078929901123
    },
    {
      "epoch": 3.0523681640625e-05,
      "model_forward_time": 0.11261868476867676,
      "step": 5001
    },
    {
      "epoch": 3.0523681640625e-05,
      "step": 5001,
      "training_step_time": 0.3705103397369385
    },
    {
      "epoch": 3.052978515625e-05,
      "model_forward_time": 0.11293745040893555,
      "step": 5002
    },
    {
      "epoch": 3.052978515625e-05,
      "step": 5002,
      "training_step_time": 0.36532115936279297
    },
    {
      "epoch": 3.0535888671875e-05,
      "model_forward_time": 0.1141502857208252,
      "step": 5003
    },
    {
      "epoch": 3.0535888671875e-05,
      "step": 5003,
      "training_step_time": 0.3972158432006836
    },
    {
      "epoch": 3.05419921875e-05,
      "model_forward_time": 0.11481475830078125,
      "step": 5004
    },
    {
      "epoch": 3.05419921875e-05,
      "step": 5004,
      "training_step_time": 0.3775618076324463
    },
    {
      "epoch": 3.0548095703125e-05,
      "model_forward_time": 0.11421322822570801,
      "step": 5005
    },
    {
      "epoch": 3.0548095703125e-05,
      "step": 5005,
      "training_step_time": 0.38652777671813965
    },
    {
      "epoch": 3.055419921875e-05,
      "model_forward_time": 0.11454629898071289,
      "step": 5006
    },
    {
      "epoch": 3.055419921875e-05,
      "step": 5006,
      "training_step_time": 0.38832902908325195
    },
    {
      "epoch": 3.0560302734375e-05,
      "model_forward_time": 0.11519098281860352,
      "step": 5007
    },
    {
      "epoch": 3.0560302734375e-05,
      "step": 5007,
      "training_step_time": 0.4222078323364258
    },
    {
      "epoch": 3.056640625e-05,
      "model_forward_time": 0.11469793319702148,
      "step": 5008
    },
    {
      "epoch": 3.056640625e-05,
      "step": 5008,
      "training_step_time": 0.36518287658691406
    },
    {
      "epoch": 3.0572509765625e-05,
      "model_forward_time": 0.11489367485046387,
      "step": 5009
    },
    {
      "epoch": 3.0572509765625e-05,
      "step": 5009,
      "training_step_time": 0.44480323791503906
    },
    {
      "epoch": 3.057861328125e-05,
      "grad_norm": 0.3390209674835205,
      "learning_rate": 9.969349472991838e-05,
      "loss": 0.0953,
      "step": 5010
    },
    {
      "epoch": 3.057861328125e-05,
      "model_forward_time": 0.11446547508239746,
      "step": 5010
    },
    {
      "epoch": 3.057861328125e-05,
      "step": 5010,
      "training_step_time": 0.44049692153930664
    },
    {
      "epoch": 3.0584716796875e-05,
      "model_forward_time": 0.11627364158630371,
      "step": 5011
    },
    {
      "epoch": 3.0584716796875e-05,
      "step": 5011,
      "training_step_time": 0.4316596984863281
    },
    {
      "epoch": 3.05908203125e-05,
      "model_forward_time": 0.1147618293762207,
      "step": 5012
    },
    {
      "epoch": 3.05908203125e-05,
      "step": 5012,
      "training_step_time": 0.48548054695129395
    },
    {
      "epoch": 3.0596923828125e-05,
      "model_forward_time": 0.115020751953125,
      "step": 5013
    },
    {
      "epoch": 3.0596923828125e-05,
      "step": 5013,
      "training_step_time": 0.40605735778808594
    },
    {
      "epoch": 3.060302734375e-05,
      "model_forward_time": 0.11499953269958496,
      "step": 5014
    },
    {
      "epoch": 3.060302734375e-05,
      "step": 5014,
      "training_step_time": 0.3888516426086426
    },
    {
      "epoch": 3.0609130859375e-05,
      "model_forward_time": 0.11479020118713379,
      "step": 5015
    },
    {
      "epoch": 3.0609130859375e-05,
      "step": 5015,
      "training_step_time": 0.38902711868286133
    },
    {
      "epoch": 3.0615234375e-05,
      "model_forward_time": 0.11502337455749512,
      "step": 5016
    },
    {
      "epoch": 3.0615234375e-05,
      "step": 5016,
      "training_step_time": 0.40250158309936523
    },
    {
      "epoch": 3.0621337890625e-05,
      "model_forward_time": 0.11521458625793457,
      "step": 5017
    },
    {
      "epoch": 3.0621337890625e-05,
      "step": 5017,
      "training_step_time": 0.39490604400634766
    },
    {
      "epoch": 3.062744140625e-05,
      "model_forward_time": 0.11510586738586426,
      "step": 5018
    },
    {
      "epoch": 3.062744140625e-05,
      "step": 5018,
      "training_step_time": 0.39462709426879883
    },
    {
      "epoch": 3.0633544921875e-05,
      "model_forward_time": 0.11416196823120117,
      "step": 5019
    },
    {
      "epoch": 3.0633544921875e-05,
      "step": 5019,
      "training_step_time": 0.39188289642333984
    },
    {
      "epoch": 3.06396484375e-05,
      "grad_norm": 0.21656997501850128,
      "learning_rate": 9.969044049836767e-05,
      "loss": 0.0976,
      "step": 5020
    },
    {
      "epoch": 3.06396484375e-05,
      "model_forward_time": 0.11553025245666504,
      "step": 5020
    },
    {
      "epoch": 3.06396484375e-05,
      "step": 5020,
      "training_step_time": 0.40409350395202637
    },
    {
      "epoch": 3.0645751953125e-05,
      "model_forward_time": 0.11552715301513672,
      "step": 5021
    },
    {
      "epoch": 3.0645751953125e-05,
      "step": 5021,
      "training_step_time": 0.4676485061645508
    },
    {
      "epoch": 3.065185546875e-05,
      "model_forward_time": 0.1155855655670166,
      "step": 5022
    },
    {
      "epoch": 3.065185546875e-05,
      "step": 5022,
      "training_step_time": 0.4094233512878418
    },
    {
      "epoch": 3.0657958984375e-05,
      "model_forward_time": 0.11473202705383301,
      "step": 5023
    },
    {
      "epoch": 3.0657958984375e-05,
      "step": 5023,
      "training_step_time": 0.47662806510925293
    },
    {
      "epoch": 3.06640625e-05,
      "model_forward_time": 0.115142822265625,
      "step": 5024
    },
    {
      "epoch": 3.06640625e-05,
      "step": 5024,
      "training_step_time": 0.5040795803070068
    },
    {
      "epoch": 3.0670166015625e-05,
      "model_forward_time": 0.11538434028625488,
      "step": 5025
    },
    {
      "epoch": 3.0670166015625e-05,
      "step": 5025,
      "training_step_time": 0.5003540515899658
    },
    {
      "epoch": 3.067626953125e-05,
      "model_forward_time": 0.11457586288452148,
      "step": 5026
    },
    {
      "epoch": 3.067626953125e-05,
      "step": 5026,
      "training_step_time": 0.4396941661834717
    },
    {
      "epoch": 3.0682373046875e-05,
      "model_forward_time": 0.11453032493591309,
      "step": 5027
    },
    {
      "epoch": 3.0682373046875e-05,
      "step": 5027,
      "training_step_time": 0.41084742546081543
    },
    {
      "epoch": 3.06884765625e-05,
      "model_forward_time": 0.1148843765258789,
      "step": 5028
    },
    {
      "epoch": 3.06884765625e-05,
      "step": 5028,
      "training_step_time": 0.3953397274017334
    },
    {
      "epoch": 3.0694580078125e-05,
      "model_forward_time": 0.11484432220458984,
      "step": 5029
    },
    {
      "epoch": 3.0694580078125e-05,
      "step": 5029,
      "training_step_time": 0.406217098236084
    },
    {
      "epoch": 3.070068359375e-05,
      "grad_norm": 0.39605221152305603,
      "learning_rate": 9.968737117217313e-05,
      "loss": 0.093,
      "step": 5030
    },
    {
      "epoch": 3.070068359375e-05,
      "model_forward_time": 0.11524152755737305,
      "step": 5030
    },
    {
      "epoch": 3.070068359375e-05,
      "step": 5030,
      "training_step_time": 0.3984496593475342
    },
    {
      "epoch": 3.0706787109375e-05,
      "model_forward_time": 0.11582589149475098,
      "step": 5031
    },
    {
      "epoch": 3.0706787109375e-05,
      "step": 5031,
      "training_step_time": 0.4025542736053467
    },
    {
      "epoch": 3.0712890625e-05,
      "model_forward_time": 0.11496782302856445,
      "step": 5032
    },
    {
      "epoch": 3.0712890625e-05,
      "step": 5032,
      "training_step_time": 0.396960973739624
    },
    {
      "epoch": 3.0718994140625e-05,
      "model_forward_time": 0.1153724193572998,
      "step": 5033
    },
    {
      "epoch": 3.0718994140625e-05,
      "step": 5033,
      "training_step_time": 0.4002113342285156
    },
    {
      "epoch": 3.072509765625e-05,
      "model_forward_time": 0.11545395851135254,
      "step": 5034
    },
    {
      "epoch": 3.072509765625e-05,
      "step": 5034,
      "training_step_time": 0.39424943923950195
    },
    {
      "epoch": 3.0731201171875e-05,
      "model_forward_time": 0.11525535583496094,
      "step": 5035
    },
    {
      "epoch": 3.0731201171875e-05,
      "step": 5035,
      "training_step_time": 0.4239051342010498
    },
    {
      "epoch": 3.07373046875e-05,
      "model_forward_time": 0.11495828628540039,
      "step": 5036
    },
    {
      "epoch": 3.07373046875e-05,
      "step": 5036,
      "training_step_time": 0.4548313617706299
    },
    {
      "epoch": 3.0743408203125e-05,
      "model_forward_time": 0.11472940444946289,
      "step": 5037
    },
    {
      "epoch": 3.0743408203125e-05,
      "step": 5037,
      "training_step_time": 0.3956477642059326
    },
    {
      "epoch": 3.074951171875e-05,
      "model_forward_time": 0.11599349975585938,
      "step": 5038
    },
    {
      "epoch": 3.074951171875e-05,
      "step": 5038,
      "training_step_time": 0.372422456741333
    },
    {
      "epoch": 3.0755615234375e-05,
      "model_forward_time": 0.11503267288208008,
      "step": 5039
    },
    {
      "epoch": 3.0755615234375e-05,
      "step": 5039,
      "training_step_time": 0.49692249298095703
    },
    {
      "epoch": 3.076171875e-05,
      "grad_norm": 0.31527990102767944,
      "learning_rate": 9.968428675226714e-05,
      "loss": 0.0933,
      "step": 5040
    },
    {
      "epoch": 3.076171875e-05,
      "model_forward_time": 0.11469078063964844,
      "step": 5040
    },
    {
      "epoch": 3.076171875e-05,
      "step": 5040,
      "training_step_time": 0.4924135208129883
    },
    {
      "epoch": 3.0767822265625e-05,
      "model_forward_time": 0.11742520332336426,
      "step": 5041
    },
    {
      "epoch": 3.0767822265625e-05,
      "step": 5041,
      "training_step_time": 0.4544668197631836
    },
    {
      "epoch": 3.077392578125e-05,
      "model_forward_time": 0.1144258975982666,
      "step": 5042
    },
    {
      "epoch": 3.077392578125e-05,
      "step": 5042,
      "training_step_time": 0.39002299308776855
    },
    {
      "epoch": 3.0780029296875e-05,
      "model_forward_time": 0.11475372314453125,
      "step": 5043
    },
    {
      "epoch": 3.0780029296875e-05,
      "step": 5043,
      "training_step_time": 0.3984529972076416
    },
    {
      "epoch": 3.07861328125e-05,
      "model_forward_time": 0.11504554748535156,
      "step": 5044
    },
    {
      "epoch": 3.07861328125e-05,
      "step": 5044,
      "training_step_time": 0.3925762176513672
    },
    {
      "epoch": 3.0792236328125e-05,
      "model_forward_time": 0.11528396606445312,
      "step": 5045
    },
    {
      "epoch": 3.0792236328125e-05,
      "step": 5045,
      "training_step_time": 0.394636869430542
    },
    {
      "epoch": 3.079833984375e-05,
      "model_forward_time": 0.11505317687988281,
      "step": 5046
    },
    {
      "epoch": 3.079833984375e-05,
      "step": 5046,
      "training_step_time": 0.39044737815856934
    },
    {
      "epoch": 3.0804443359375e-05,
      "model_forward_time": 0.11520552635192871,
      "step": 5047
    },
    {
      "epoch": 3.0804443359375e-05,
      "step": 5047,
      "training_step_time": 0.3901660442352295
    },
    {
      "epoch": 3.0810546875e-05,
      "model_forward_time": 0.11602377891540527,
      "step": 5048
    },
    {
      "epoch": 3.0810546875e-05,
      "step": 5048,
      "training_step_time": 0.40917325019836426
    },
    {
      "epoch": 3.0816650390625e-05,
      "model_forward_time": 0.11515188217163086,
      "step": 5049
    },
    {
      "epoch": 3.0816650390625e-05,
      "step": 5049,
      "training_step_time": 0.40853142738342285
    },
    {
      "epoch": 3.082275390625e-05,
      "grad_norm": 0.3194001615047455,
      "learning_rate": 9.968118723958668e-05,
      "loss": 0.0949,
      "step": 5050
    },
    {
      "epoch": 3.082275390625e-05,
      "model_forward_time": 0.11531758308410645,
      "step": 5050
    },
    {
      "epoch": 3.082275390625e-05,
      "step": 5050,
      "training_step_time": 0.3956286907196045
    },
    {
      "epoch": 3.0828857421875e-05,
      "model_forward_time": 0.11514639854431152,
      "step": 5051
    },
    {
      "epoch": 3.0828857421875e-05,
      "step": 5051,
      "training_step_time": 0.3845863342285156
    },
    {
      "epoch": 3.08349609375e-05,
      "model_forward_time": 0.1152200698852539,
      "step": 5052
    },
    {
      "epoch": 3.08349609375e-05,
      "step": 5052,
      "training_step_time": 0.40073347091674805
    },
    {
      "epoch": 3.0841064453125e-05,
      "model_forward_time": 0.11510610580444336,
      "step": 5053
    },
    {
      "epoch": 3.0841064453125e-05,
      "step": 5053,
      "training_step_time": 0.4018990993499756
    },
    {
      "epoch": 3.084716796875e-05,
      "model_forward_time": 0.11571335792541504,
      "step": 5054
    },
    {
      "epoch": 3.084716796875e-05,
      "step": 5054,
      "training_step_time": 0.46132326126098633
    },
    {
      "epoch": 3.0853271484375e-05,
      "model_forward_time": 0.11568737030029297,
      "step": 5055
    },
    {
      "epoch": 3.0853271484375e-05,
      "step": 5055,
      "training_step_time": 0.502007007598877
    },
    {
      "epoch": 3.0859375e-05,
      "model_forward_time": 0.11513376235961914,
      "step": 5056
    },
    {
      "epoch": 3.0859375e-05,
      "step": 5056,
      "training_step_time": 0.48444318771362305
    },
    {
      "epoch": 3.0865478515625e-05,
      "model_forward_time": 0.11457085609436035,
      "step": 5057
    },
    {
      "epoch": 3.0865478515625e-05,
      "step": 5057,
      "training_step_time": 0.39278721809387207
    },
    {
      "epoch": 3.087158203125e-05,
      "model_forward_time": 0.1164865493774414,
      "step": 5058
    },
    {
      "epoch": 3.087158203125e-05,
      "step": 5058,
      "training_step_time": 0.3940701484680176
    },
    {
      "epoch": 3.0877685546875e-05,
      "model_forward_time": 0.11484527587890625,
      "step": 5059
    },
    {
      "epoch": 3.0877685546875e-05,
      "step": 5059,
      "training_step_time": 0.390059232711792
    },
    {
      "epoch": 3.08837890625e-05,
      "grad_norm": 0.2771991193294525,
      "learning_rate": 9.967807263507329e-05,
      "loss": 0.0926,
      "step": 5060
    },
    {
      "epoch": 3.08837890625e-05,
      "model_forward_time": 0.11548471450805664,
      "step": 5060
    },
    {
      "epoch": 3.08837890625e-05,
      "step": 5060,
      "training_step_time": 0.40302062034606934
    },
    {
      "epoch": 3.0889892578125e-05,
      "model_forward_time": 0.11598038673400879,
      "step": 5061
    },
    {
      "epoch": 3.0889892578125e-05,
      "step": 5061,
      "training_step_time": 0.3878045082092285
    },
    {
      "epoch": 3.089599609375e-05,
      "model_forward_time": 0.11545586585998535,
      "step": 5062
    },
    {
      "epoch": 3.089599609375e-05,
      "step": 5062,
      "training_step_time": 0.3933894634246826
    },
    {
      "epoch": 3.0902099609375e-05,
      "model_forward_time": 0.11582612991333008,
      "step": 5063
    },
    {
      "epoch": 3.0902099609375e-05,
      "step": 5063,
      "training_step_time": 0.4136781692504883
    },
    {
      "epoch": 3.0908203125e-05,
      "model_forward_time": 0.11554121971130371,
      "step": 5064
    },
    {
      "epoch": 3.0908203125e-05,
      "step": 5064,
      "training_step_time": 0.39200758934020996
    },
    {
      "epoch": 3.0914306640625e-05,
      "model_forward_time": 0.11493802070617676,
      "step": 5065
    },
    {
      "epoch": 3.0914306640625e-05,
      "step": 5065,
      "training_step_time": 0.40880370140075684
    },
    {
      "epoch": 3.092041015625e-05,
      "model_forward_time": 0.11512589454650879,
      "step": 5066
    },
    {
      "epoch": 3.092041015625e-05,
      "step": 5066,
      "training_step_time": 0.4147365093231201
    },
    {
      "epoch": 3.0926513671875e-05,
      "model_forward_time": 0.11462187767028809,
      "step": 5067
    },
    {
      "epoch": 3.0926513671875e-05,
      "step": 5067,
      "training_step_time": 0.3657517433166504
    },
    {
      "epoch": 3.09326171875e-05,
      "model_forward_time": 0.11526870727539062,
      "step": 5068
    },
    {
      "epoch": 3.09326171875e-05,
      "step": 5068,
      "training_step_time": 0.4299743175506592
    },
    {
      "epoch": 3.0938720703125e-05,
      "model_forward_time": 0.11655426025390625,
      "step": 5069
    },
    {
      "epoch": 3.0938720703125e-05,
      "step": 5069,
      "training_step_time": 0.4834144115447998
    },
    {
      "epoch": 3.094482421875e-05,
      "grad_norm": 0.18782612681388855,
      "learning_rate": 9.967494293967312e-05,
      "loss": 0.0846,
      "step": 5070
    },
    {
      "epoch": 3.094482421875e-05,
      "model_forward_time": 0.11562442779541016,
      "step": 5070
    },
    {
      "epoch": 3.094482421875e-05,
      "step": 5070,
      "training_step_time": 0.42763423919677734
    },
    {
      "epoch": 3.0950927734375e-05,
      "model_forward_time": 0.11500716209411621,
      "step": 5071
    },
    {
      "epoch": 3.0950927734375e-05,
      "step": 5071,
      "training_step_time": 0.4924614429473877
    },
    {
      "epoch": 3.095703125e-05,
      "model_forward_time": 0.11565923690795898,
      "step": 5072
    },
    {
      "epoch": 3.095703125e-05,
      "step": 5072,
      "training_step_time": 0.39289212226867676
    },
    {
      "epoch": 3.0963134765625e-05,
      "model_forward_time": 0.11476588249206543,
      "step": 5073
    },
    {
      "epoch": 3.0963134765625e-05,
      "step": 5073,
      "training_step_time": 0.39573097229003906
    },
    {
      "epoch": 3.096923828125e-05,
      "model_forward_time": 0.11546754837036133,
      "step": 5074
    },
    {
      "epoch": 3.096923828125e-05,
      "step": 5074,
      "training_step_time": 0.3947758674621582
    },
    {
      "epoch": 3.0975341796875e-05,
      "model_forward_time": 0.11629271507263184,
      "step": 5075
    },
    {
      "epoch": 3.0975341796875e-05,
      "step": 5075,
      "training_step_time": 0.39667820930480957
    },
    {
      "epoch": 3.09814453125e-05,
      "model_forward_time": 0.11513137817382812,
      "step": 5076
    },
    {
      "epoch": 3.09814453125e-05,
      "step": 5076,
      "training_step_time": 0.4032466411590576
    },
    {
      "epoch": 3.0987548828125e-05,
      "model_forward_time": 0.11547684669494629,
      "step": 5077
    },
    {
      "epoch": 3.0987548828125e-05,
      "step": 5077,
      "training_step_time": 0.3943443298339844
    },
    {
      "epoch": 3.099365234375e-05,
      "model_forward_time": 0.11577630043029785,
      "step": 5078
    },
    {
      "epoch": 3.099365234375e-05,
      "step": 5078,
      "training_step_time": 0.40055060386657715
    },
    {
      "epoch": 3.0999755859375e-05,
      "model_forward_time": 0.11647176742553711,
      "step": 5079
    },
    {
      "epoch": 3.0999755859375e-05,
      "step": 5079,
      "training_step_time": 0.3929460048675537
    },
    {
      "epoch": 3.1005859375e-05,
      "grad_norm": 0.277910441160202,
      "learning_rate": 9.967179815433685e-05,
      "loss": 0.0911,
      "step": 5080
    },
    {
      "epoch": 3.1005859375e-05,
      "model_forward_time": 0.1150350570678711,
      "step": 5080
    },
    {
      "epoch": 3.1005859375e-05,
      "step": 5080,
      "training_step_time": 0.38873958587646484
    },
    {
      "epoch": 3.1011962890625e-05,
      "model_forward_time": 0.11690735816955566,
      "step": 5081
    },
    {
      "epoch": 3.1011962890625e-05,
      "step": 5081,
      "training_step_time": 0.38443708419799805
    },
    {
      "epoch": 3.101806640625e-05,
      "model_forward_time": 0.11529707908630371,
      "step": 5082
    },
    {
      "epoch": 3.101806640625e-05,
      "step": 5082,
      "training_step_time": 0.4632902145385742
    },
    {
      "epoch": 3.1024169921875e-05,
      "model_forward_time": 0.1159524917602539,
      "step": 5083
    },
    {
      "epoch": 3.1024169921875e-05,
      "step": 5083,
      "training_step_time": 0.5156006813049316
    },
    {
      "epoch": 3.10302734375e-05,
      "model_forward_time": 0.11482691764831543,
      "step": 5084
    },
    {
      "epoch": 3.10302734375e-05,
      "step": 5084,
      "training_step_time": 0.5100686550140381
    },
    {
      "epoch": 3.1036376953125e-05,
      "model_forward_time": 0.11517739295959473,
      "step": 5085
    },
    {
      "epoch": 3.1036376953125e-05,
      "step": 5085,
      "training_step_time": 0.5286900997161865
    },
    {
      "epoch": 3.104248046875e-05,
      "model_forward_time": 0.11454296112060547,
      "step": 5086
    },
    {
      "epoch": 3.104248046875e-05,
      "step": 5086,
      "training_step_time": 0.40572333335876465
    },
    {
      "epoch": 3.1048583984375e-05,
      "model_forward_time": 0.1144411563873291,
      "step": 5087
    },
    {
      "epoch": 3.1048583984375e-05,
      "step": 5087,
      "training_step_time": 0.3917236328125
    },
    {
      "epoch": 3.10546875e-05,
      "model_forward_time": 0.11442732810974121,
      "step": 5088
    },
    {
      "epoch": 3.10546875e-05,
      "step": 5088,
      "training_step_time": 0.402451753616333
    },
    {
      "epoch": 3.1060791015625e-05,
      "model_forward_time": 0.11514449119567871,
      "step": 5089
    },
    {
      "epoch": 3.1060791015625e-05,
      "step": 5089,
      "training_step_time": 0.402435302734375
    },
    {
      "epoch": 3.106689453125e-05,
      "grad_norm": 0.3900544047355652,
      "learning_rate": 9.966863828001982e-05,
      "loss": 0.0953,
      "step": 5090
    },
    {
      "epoch": 3.106689453125e-05,
      "model_forward_time": 0.11585116386413574,
      "step": 5090
    },
    {
      "epoch": 3.106689453125e-05,
      "step": 5090,
      "training_step_time": 0.42803406715393066
    },
    {
      "epoch": 3.1072998046875e-05,
      "model_forward_time": 0.11626911163330078,
      "step": 5091
    },
    {
      "epoch": 3.1072998046875e-05,
      "step": 5091,
      "training_step_time": 0.3960278034210205
    },
    {
      "epoch": 3.10791015625e-05,
      "model_forward_time": 0.11626768112182617,
      "step": 5092
    },
    {
      "epoch": 3.10791015625e-05,
      "step": 5092,
      "training_step_time": 0.3889658451080322
    },
    {
      "epoch": 3.1085205078125e-05,
      "model_forward_time": 0.11511921882629395,
      "step": 5093
    },
    {
      "epoch": 3.1085205078125e-05,
      "step": 5093,
      "training_step_time": 0.4053788185119629
    },
    {
      "epoch": 3.109130859375e-05,
      "model_forward_time": 0.11452174186706543,
      "step": 5094
    },
    {
      "epoch": 3.109130859375e-05,
      "step": 5094,
      "training_step_time": 0.4001641273498535
    },
    {
      "epoch": 3.1097412109375e-05,
      "model_forward_time": 0.11501789093017578,
      "step": 5095
    },
    {
      "epoch": 3.1097412109375e-05,
      "step": 5095,
      "training_step_time": 0.40275073051452637
    },
    {
      "epoch": 3.1103515625e-05,
      "model_forward_time": 0.11534762382507324,
      "step": 5096
    },
    {
      "epoch": 3.1103515625e-05,
      "step": 5096,
      "training_step_time": 0.36518359184265137
    },
    {
      "epoch": 3.1109619140625e-05,
      "model_forward_time": 0.11561799049377441,
      "step": 5097
    },
    {
      "epoch": 3.1109619140625e-05,
      "step": 5097,
      "training_step_time": 0.4380462169647217
    },
    {
      "epoch": 3.111572265625e-05,
      "model_forward_time": 0.11516809463500977,
      "step": 5098
    },
    {
      "epoch": 3.111572265625e-05,
      "step": 5098,
      "training_step_time": 0.4604299068450928
    },
    {
      "epoch": 3.1121826171875e-05,
      "model_forward_time": 0.11554908752441406,
      "step": 5099
    },
    {
      "epoch": 3.1121826171875e-05,
      "step": 5099,
      "training_step_time": 0.49286913871765137
    },
    {
      "epoch": 3.11279296875e-05,
      "grad_norm": 0.30753690004348755,
      "learning_rate": 9.966546331768191e-05,
      "loss": 0.0952,
      "step": 5100
    },
    {
      "epoch": 3.11279296875e-05,
      "model_forward_time": 0.11428260803222656,
      "step": 5100
    },
    {
      "epoch": 3.11279296875e-05,
      "step": 5100,
      "training_step_time": 0.49650096893310547
    },
    {
      "epoch": 3.1134033203125e-05,
      "model_forward_time": 0.11435055732727051,
      "step": 5101
    },
    {
      "epoch": 3.1134033203125e-05,
      "step": 5101,
      "training_step_time": 0.39518094062805176
    },
    {
      "epoch": 3.114013671875e-05,
      "model_forward_time": 0.11496257781982422,
      "step": 5102
    },
    {
      "epoch": 3.114013671875e-05,
      "step": 5102,
      "training_step_time": 0.3859529495239258
    },
    {
      "epoch": 3.1146240234375e-05,
      "model_forward_time": 0.11463785171508789,
      "step": 5103
    },
    {
      "epoch": 3.1146240234375e-05,
      "step": 5103,
      "training_step_time": 0.42530369758605957
    },
    {
      "epoch": 3.115234375e-05,
      "model_forward_time": 0.11552023887634277,
      "step": 5104
    },
    {
      "epoch": 3.115234375e-05,
      "step": 5104,
      "training_step_time": 0.39557695388793945
    },
    {
      "epoch": 3.1158447265625e-05,
      "model_forward_time": 0.1154792308807373,
      "step": 5105
    },
    {
      "epoch": 3.1158447265625e-05,
      "step": 5105,
      "training_step_time": 0.40156078338623047
    },
    {
      "epoch": 3.116455078125e-05,
      "model_forward_time": 0.11598443984985352,
      "step": 5106
    },
    {
      "epoch": 3.116455078125e-05,
      "step": 5106,
      "training_step_time": 0.39305758476257324
    },
    {
      "epoch": 3.1170654296875e-05,
      "model_forward_time": 0.11524224281311035,
      "step": 5107
    },
    {
      "epoch": 3.1170654296875e-05,
      "step": 5107,
      "training_step_time": 0.4035942554473877
    },
    {
      "epoch": 3.11767578125e-05,
      "model_forward_time": 0.11621236801147461,
      "step": 5108
    },
    {
      "epoch": 3.11767578125e-05,
      "step": 5108,
      "training_step_time": 0.40190911293029785
    },
    {
      "epoch": 3.1182861328125e-05,
      "model_forward_time": 0.11530184745788574,
      "step": 5109
    },
    {
      "epoch": 3.1182861328125e-05,
      "step": 5109,
      "training_step_time": 0.3924062252044678
    },
    {
      "epoch": 3.118896484375e-05,
      "grad_norm": 0.24132797122001648,
      "learning_rate": 9.966227326828759e-05,
      "loss": 0.0895,
      "step": 5110
    },
    {
      "epoch": 3.118896484375e-05,
      "model_forward_time": 0.11503982543945312,
      "step": 5110
    },
    {
      "epoch": 3.118896484375e-05,
      "step": 5110,
      "training_step_time": 0.4005312919616699
    },
    {
      "epoch": 3.1195068359375e-05,
      "model_forward_time": 0.1161808967590332,
      "step": 5111
    },
    {
      "epoch": 3.1195068359375e-05,
      "step": 5111,
      "training_step_time": 0.49077820777893066
    },
    {
      "epoch": 3.1201171875e-05,
      "model_forward_time": 0.11569714546203613,
      "step": 5112
    },
    {
      "epoch": 3.1201171875e-05,
      "step": 5112,
      "training_step_time": 0.5044925212860107
    },
    {
      "epoch": 3.1207275390625e-05,
      "model_forward_time": 0.1148982048034668,
      "step": 5113
    },
    {
      "epoch": 3.1207275390625e-05,
      "step": 5113,
      "training_step_time": 0.5093920230865479
    },
    {
      "epoch": 3.121337890625e-05,
      "model_forward_time": 0.11428117752075195,
      "step": 5114
    },
    {
      "epoch": 3.121337890625e-05,
      "step": 5114,
      "training_step_time": 0.49182605743408203
    },
    {
      "epoch": 3.1219482421875e-05,
      "model_forward_time": 0.11410403251647949,
      "step": 5115
    },
    {
      "epoch": 3.1219482421875e-05,
      "step": 5115,
      "training_step_time": 0.4689757823944092
    },
    {
      "epoch": 3.12255859375e-05,
      "model_forward_time": 0.11428046226501465,
      "step": 5116
    },
    {
      "epoch": 3.12255859375e-05,
      "step": 5116,
      "training_step_time": 0.4401583671569824
    },
    {
      "epoch": 3.1231689453125e-05,
      "model_forward_time": 0.11617827415466309,
      "step": 5117
    },
    {
      "epoch": 3.1231689453125e-05,
      "step": 5117,
      "training_step_time": 0.3933403491973877
    },
    {
      "epoch": 3.123779296875e-05,
      "model_forward_time": 0.11441421508789062,
      "step": 5118
    },
    {
      "epoch": 3.123779296875e-05,
      "step": 5118,
      "training_step_time": 0.3908510208129883
    },
    {
      "epoch": 3.1243896484375e-05,
      "model_forward_time": 0.11433172225952148,
      "step": 5119
    },
    {
      "epoch": 3.1243896484375e-05,
      "step": 5119,
      "training_step_time": 0.40181899070739746
    },
    {
      "epoch": 3.125e-05,
      "grad_norm": 0.2713369131088257,
      "learning_rate": 9.96590681328059e-05,
      "loss": 0.1028,
      "step": 5120
    },
    {
      "epoch": 3.125e-05,
      "model_forward_time": 0.11528253555297852,
      "step": 5120
    },
    {
      "epoch": 3.125e-05,
      "step": 5120,
      "training_step_time": 0.41027235984802246
    },
    {
      "epoch": 3.1256103515625e-05,
      "model_forward_time": 0.11500239372253418,
      "step": 5121
    },
    {
      "epoch": 3.1256103515625e-05,
      "step": 5121,
      "training_step_time": 0.39567065238952637
    },
    {
      "epoch": 3.126220703125e-05,
      "model_forward_time": 0.11552977561950684,
      "step": 5122
    },
    {
      "epoch": 3.126220703125e-05,
      "step": 5122,
      "training_step_time": 0.39669275283813477
    },
    {
      "epoch": 3.1268310546875e-05,
      "model_forward_time": 0.11511707305908203,
      "step": 5123
    },
    {
      "epoch": 3.1268310546875e-05,
      "step": 5123,
      "training_step_time": 0.3915879726409912
    },
    {
      "epoch": 3.12744140625e-05,
      "model_forward_time": 0.11472654342651367,
      "step": 5124
    },
    {
      "epoch": 3.12744140625e-05,
      "step": 5124,
      "training_step_time": 0.38239169120788574
    },
    {
      "epoch": 3.1280517578125e-05,
      "model_forward_time": 0.11493778228759766,
      "step": 5125
    },
    {
      "epoch": 3.1280517578125e-05,
      "step": 5125,
      "training_step_time": 0.36481380462646484
    },
    {
      "epoch": 3.128662109375e-05,
      "model_forward_time": 0.11586332321166992,
      "step": 5126
    },
    {
      "epoch": 3.128662109375e-05,
      "step": 5126,
      "training_step_time": 0.43801331520080566
    },
    {
      "epoch": 3.1292724609375e-05,
      "model_forward_time": 0.11493802070617676,
      "step": 5127
    },
    {
      "epoch": 3.1292724609375e-05,
      "step": 5127,
      "training_step_time": 0.4292454719543457
    },
    {
      "epoch": 3.1298828125e-05,
      "model_forward_time": 0.11488151550292969,
      "step": 5128
    },
    {
      "epoch": 3.1298828125e-05,
      "step": 5128,
      "training_step_time": 0.44317102432250977
    },
    {
      "epoch": 3.1304931640625e-05,
      "model_forward_time": 0.1148226261138916,
      "step": 5129
    },
    {
      "epoch": 3.1304931640625e-05,
      "step": 5129,
      "training_step_time": 0.5397887229919434
    },
    {
      "epoch": 3.131103515625e-05,
      "grad_norm": 0.27616530656814575,
      "learning_rate": 9.965584791221048e-05,
      "loss": 0.0876,
      "step": 5130
    },
    {
      "epoch": 3.131103515625e-05,
      "model_forward_time": 0.11527109146118164,
      "step": 5130
    },
    {
      "epoch": 3.131103515625e-05,
      "step": 5130,
      "training_step_time": 0.40384411811828613
    },
    {
      "epoch": 3.1317138671875e-05,
      "model_forward_time": 0.11493420600891113,
      "step": 5131
    },
    {
      "epoch": 3.1317138671875e-05,
      "step": 5131,
      "training_step_time": 0.3903012275695801
    },
    {
      "epoch": 3.13232421875e-05,
      "model_forward_time": 0.11450672149658203,
      "step": 5132
    },
    {
      "epoch": 3.13232421875e-05,
      "step": 5132,
      "training_step_time": 0.3947465419769287
    },
    {
      "epoch": 3.1329345703125e-05,
      "model_forward_time": 0.11517596244812012,
      "step": 5133
    },
    {
      "epoch": 3.1329345703125e-05,
      "step": 5133,
      "training_step_time": 0.3840456008911133
    },
    {
      "epoch": 3.133544921875e-05,
      "model_forward_time": 0.11498427391052246,
      "step": 5134
    },
    {
      "epoch": 3.133544921875e-05,
      "step": 5134,
      "training_step_time": 0.3879098892211914
    },
    {
      "epoch": 3.1341552734375e-05,
      "model_forward_time": 0.11449480056762695,
      "step": 5135
    },
    {
      "epoch": 3.1341552734375e-05,
      "step": 5135,
      "training_step_time": 0.39139699935913086
    },
    {
      "epoch": 3.134765625e-05,
      "model_forward_time": 0.1156008243560791,
      "step": 5136
    },
    {
      "epoch": 3.134765625e-05,
      "step": 5136,
      "training_step_time": 0.3989272117614746
    },
    {
      "epoch": 3.1353759765625e-05,
      "model_forward_time": 0.11526012420654297,
      "step": 5137
    },
    {
      "epoch": 3.1353759765625e-05,
      "step": 5137,
      "training_step_time": 0.4010140895843506
    },
    {
      "epoch": 3.135986328125e-05,
      "model_forward_time": 0.11550211906433105,
      "step": 5138
    },
    {
      "epoch": 3.135986328125e-05,
      "step": 5138,
      "training_step_time": 0.8254580497741699
    },
    {
      "epoch": 3.1365966796875e-05,
      "model_forward_time": 0.11539983749389648,
      "step": 5139
    },
    {
      "epoch": 3.1365966796875e-05,
      "step": 5139,
      "training_step_time": 0.36504507064819336
    },
    {
      "epoch": 3.13720703125e-05,
      "grad_norm": 0.2898329794406891,
      "learning_rate": 9.965261260747956e-05,
      "loss": 0.0909,
      "step": 5140
    },
    {
      "epoch": 3.13720703125e-05,
      "model_forward_time": 0.11399555206298828,
      "step": 5140
    },
    {
      "epoch": 3.13720703125e-05,
      "step": 5140,
      "training_step_time": 0.428391695022583
    },
    {
      "epoch": 3.1378173828125e-05,
      "model_forward_time": 0.115936279296875,
      "step": 5141
    },
    {
      "epoch": 3.1378173828125e-05,
      "step": 5141,
      "training_step_time": 0.4961698055267334
    },
    {
      "epoch": 3.138427734375e-05,
      "model_forward_time": 0.11528611183166504,
      "step": 5142
    },
    {
      "epoch": 3.138427734375e-05,
      "step": 5142,
      "training_step_time": 0.4926755428314209
    },
    {
      "epoch": 3.1390380859375e-05,
      "model_forward_time": 0.11498761177062988,
      "step": 5143
    },
    {
      "epoch": 3.1390380859375e-05,
      "step": 5143,
      "training_step_time": 0.45134568214416504
    },
    {
      "epoch": 3.1396484375e-05,
      "model_forward_time": 0.11440324783325195,
      "step": 5144
    },
    {
      "epoch": 3.1396484375e-05,
      "step": 5144,
      "training_step_time": 0.40007758140563965
    },
    {
      "epoch": 3.1402587890625e-05,
      "model_forward_time": 0.11425948143005371,
      "step": 5145
    },
    {
      "epoch": 3.1402587890625e-05,
      "step": 5145,
      "training_step_time": 0.3905186653137207
    },
    {
      "epoch": 3.140869140625e-05,
      "model_forward_time": 0.1141672134399414,
      "step": 5146
    },
    {
      "epoch": 3.140869140625e-05,
      "step": 5146,
      "training_step_time": 0.38210320472717285
    },
    {
      "epoch": 3.1414794921875e-05,
      "model_forward_time": 0.11497902870178223,
      "step": 5147
    },
    {
      "epoch": 3.1414794921875e-05,
      "step": 5147,
      "training_step_time": 0.3941495418548584
    },
    {
      "epoch": 3.14208984375e-05,
      "model_forward_time": 0.11562156677246094,
      "step": 5148
    },
    {
      "epoch": 3.14208984375e-05,
      "step": 5148,
      "training_step_time": 0.3910367488861084
    },
    {
      "epoch": 3.1427001953125e-05,
      "model_forward_time": 0.11501646041870117,
      "step": 5149
    },
    {
      "epoch": 3.1427001953125e-05,
      "step": 5149,
      "training_step_time": 0.3962235450744629
    },
    {
      "epoch": 3.143310546875e-05,
      "grad_norm": 0.26406434178352356,
      "learning_rate": 9.96493622195959e-05,
      "loss": 0.0891,
      "step": 5150
    },
    {
      "epoch": 3.143310546875e-05,
      "model_forward_time": 0.11525249481201172,
      "step": 5150
    },
    {
      "epoch": 3.143310546875e-05,
      "step": 5150,
      "training_step_time": 0.551990270614624
    },
    {
      "epoch": 3.1439208984375e-05,
      "model_forward_time": 0.11520934104919434,
      "step": 5151
    },
    {
      "epoch": 3.1439208984375e-05,
      "step": 5151,
      "training_step_time": 0.3889622688293457
    },
    {
      "epoch": 3.14453125e-05,
      "model_forward_time": 0.11537432670593262,
      "step": 5152
    },
    {
      "epoch": 3.14453125e-05,
      "step": 5152,
      "training_step_time": 0.39433836936950684
    },
    {
      "epoch": 3.1451416015625e-05,
      "model_forward_time": 0.11545467376708984,
      "step": 5153
    },
    {
      "epoch": 3.1451416015625e-05,
      "step": 5153,
      "training_step_time": 0.3656280040740967
    },
    {
      "epoch": 3.145751953125e-05,
      "model_forward_time": 0.11491823196411133,
      "step": 5154
    },
    {
      "epoch": 3.145751953125e-05,
      "step": 5154,
      "training_step_time": 0.45285582542419434
    },
    {
      "epoch": 3.1463623046875e-05,
      "model_forward_time": 0.11459755897521973,
      "step": 5155
    },
    {
      "epoch": 3.1463623046875e-05,
      "step": 5155,
      "training_step_time": 0.4179058074951172
    },
    {
      "epoch": 3.14697265625e-05,
      "model_forward_time": 0.1160118579864502,
      "step": 5156
    },
    {
      "epoch": 3.14697265625e-05,
      "step": 5156,
      "training_step_time": 0.6247234344482422
    },
    {
      "epoch": 3.1475830078125e-05,
      "model_forward_time": 0.11447882652282715,
      "step": 5157
    },
    {
      "epoch": 3.1475830078125e-05,
      "step": 5157,
      "training_step_time": 0.4618403911590576
    },
    {
      "epoch": 3.148193359375e-05,
      "model_forward_time": 0.11613059043884277,
      "step": 5158
    },
    {
      "epoch": 3.148193359375e-05,
      "step": 5158,
      "training_step_time": 0.39971303939819336
    },
    {
      "epoch": 3.1488037109375e-05,
      "model_forward_time": 0.11448264122009277,
      "step": 5159
    },
    {
      "epoch": 3.1488037109375e-05,
      "step": 5159,
      "training_step_time": 0.3829531669616699
    },
    {
      "epoch": 3.1494140625e-05,
      "grad_norm": 0.2911367416381836,
      "learning_rate": 9.964609674954696e-05,
      "loss": 0.0968,
      "step": 5160
    },
    {
      "epoch": 3.1494140625e-05,
      "model_forward_time": 0.11469316482543945,
      "step": 5160
    },
    {
      "epoch": 3.1494140625e-05,
      "step": 5160,
      "training_step_time": 0.39249444007873535
    },
    {
      "epoch": 3.1500244140625e-05,
      "model_forward_time": 0.11465668678283691,
      "step": 5161
    },
    {
      "epoch": 3.1500244140625e-05,
      "step": 5161,
      "training_step_time": 0.39496946334838867
    },
    {
      "epoch": 3.150634765625e-05,
      "model_forward_time": 0.11472702026367188,
      "step": 5162
    },
    {
      "epoch": 3.150634765625e-05,
      "step": 5162,
      "training_step_time": 0.40583300590515137
    },
    {
      "epoch": 3.1512451171875e-05,
      "model_forward_time": 0.1156623363494873,
      "step": 5163
    },
    {
      "epoch": 3.1512451171875e-05,
      "step": 5163,
      "training_step_time": 0.39778780937194824
    },
    {
      "epoch": 3.15185546875e-05,
      "model_forward_time": 0.11489725112915039,
      "step": 5164
    },
    {
      "epoch": 3.15185546875e-05,
      "step": 5164,
      "training_step_time": 0.3867006301879883
    },
    {
      "epoch": 3.1524658203125e-05,
      "model_forward_time": 0.11552906036376953,
      "step": 5165
    },
    {
      "epoch": 3.1524658203125e-05,
      "step": 5165,
      "training_step_time": 0.40435218811035156
    },
    {
      "epoch": 3.153076171875e-05,
      "model_forward_time": 0.11542749404907227,
      "step": 5166
    },
    {
      "epoch": 3.153076171875e-05,
      "step": 5166,
      "training_step_time": 0.3944835662841797
    },
    {
      "epoch": 3.1536865234375e-05,
      "model_forward_time": 0.11533045768737793,
      "step": 5167
    },
    {
      "epoch": 3.1536865234375e-05,
      "step": 5167,
      "training_step_time": 0.39789700508117676
    },
    {
      "epoch": 3.154296875e-05,
      "model_forward_time": 0.11618709564208984,
      "step": 5168
    },
    {
      "epoch": 3.154296875e-05,
      "step": 5168,
      "training_step_time": 0.4882936477661133
    },
    {
      "epoch": 3.1549072265625e-05,
      "model_forward_time": 0.11509466171264648,
      "step": 5169
    },
    {
      "epoch": 3.1549072265625e-05,
      "step": 5169,
      "training_step_time": 0.4551808834075928
    },
    {
      "epoch": 3.155517578125e-05,
      "grad_norm": 0.2551155388355255,
      "learning_rate": 9.964281619832463e-05,
      "loss": 0.0794,
      "step": 5170
    },
    {
      "epoch": 3.155517578125e-05,
      "model_forward_time": 0.11509990692138672,
      "step": 5170
    },
    {
      "epoch": 3.155517578125e-05,
      "step": 5170,
      "training_step_time": 0.49483776092529297
    },
    {
      "epoch": 3.1561279296875e-05,
      "model_forward_time": 0.11479616165161133,
      "step": 5171
    },
    {
      "epoch": 3.1561279296875e-05,
      "step": 5171,
      "training_step_time": 0.4663684368133545
    },
    {
      "epoch": 3.15673828125e-05,
      "model_forward_time": 0.11481547355651855,
      "step": 5172
    },
    {
      "epoch": 3.15673828125e-05,
      "step": 5172,
      "training_step_time": 0.4846045970916748
    },
    {
      "epoch": 3.1573486328125e-05,
      "model_forward_time": 0.11505937576293945,
      "step": 5173
    },
    {
      "epoch": 3.1573486328125e-05,
      "step": 5173,
      "training_step_time": 0.3890221118927002
    },
    {
      "epoch": 3.157958984375e-05,
      "model_forward_time": 0.11532950401306152,
      "step": 5174
    },
    {
      "epoch": 3.157958984375e-05,
      "step": 5174,
      "training_step_time": 0.3809361457824707
    },
    {
      "epoch": 3.1585693359375e-05,
      "model_forward_time": 0.11420059204101562,
      "step": 5175
    },
    {
      "epoch": 3.1585693359375e-05,
      "step": 5175,
      "training_step_time": 0.3999977111816406
    },
    {
      "epoch": 3.1591796875e-05,
      "model_forward_time": 0.11550641059875488,
      "step": 5176
    },
    {
      "epoch": 3.1591796875e-05,
      "step": 5176,
      "training_step_time": 0.3902301788330078
    },
    {
      "epoch": 3.1597900390625e-05,
      "model_forward_time": 0.11536860466003418,
      "step": 5177
    },
    {
      "epoch": 3.1597900390625e-05,
      "step": 5177,
      "training_step_time": 0.3931303024291992
    },
    {
      "epoch": 3.160400390625e-05,
      "model_forward_time": 0.11522293090820312,
      "step": 5178
    },
    {
      "epoch": 3.160400390625e-05,
      "step": 5178,
      "training_step_time": 0.4037339687347412
    },
    {
      "epoch": 3.1610107421875e-05,
      "model_forward_time": 0.11503934860229492,
      "step": 5179
    },
    {
      "epoch": 3.1610107421875e-05,
      "step": 5179,
      "training_step_time": 0.3997611999511719
    },
    {
      "epoch": 3.16162109375e-05,
      "grad_norm": 0.3633282482624054,
      "learning_rate": 9.963952056692549e-05,
      "loss": 0.0925,
      "step": 5180
    },
    {
      "epoch": 3.16162109375e-05,
      "model_forward_time": 0.11553764343261719,
      "step": 5180
    },
    {
      "epoch": 3.16162109375e-05,
      "step": 5180,
      "training_step_time": 0.681713342666626
    },
    {
      "epoch": 3.1622314453125e-05,
      "model_forward_time": 0.11542844772338867,
      "step": 5181
    },
    {
      "epoch": 3.1622314453125e-05,
      "step": 5181,
      "training_step_time": 0.3918931484222412
    },
    {
      "epoch": 3.162841796875e-05,
      "model_forward_time": 0.11436653137207031,
      "step": 5182
    },
    {
      "epoch": 3.162841796875e-05,
      "step": 5182,
      "training_step_time": 0.37651824951171875
    },
    {
      "epoch": 3.1634521484375e-05,
      "model_forward_time": 0.11454272270202637,
      "step": 5183
    },
    {
      "epoch": 3.1634521484375e-05,
      "step": 5183,
      "training_step_time": 0.4844651222229004
    },
    {
      "epoch": 3.1640625e-05,
      "model_forward_time": 0.11439108848571777,
      "step": 5184
    },
    {
      "epoch": 3.1640625e-05,
      "step": 5184,
      "training_step_time": 0.5057945251464844
    },
    {
      "epoch": 3.1646728515625e-05,
      "model_forward_time": 0.11361861228942871,
      "step": 5185
    },
    {
      "epoch": 3.1646728515625e-05,
      "step": 5185,
      "training_step_time": 0.4539835453033447
    },
    {
      "epoch": 3.165283203125e-05,
      "model_forward_time": 0.11374473571777344,
      "step": 5186
    },
    {
      "epoch": 3.165283203125e-05,
      "step": 5186,
      "training_step_time": 0.4573187828063965
    },
    {
      "epoch": 3.1658935546875e-05,
      "model_forward_time": 0.11540365219116211,
      "step": 5187
    },
    {
      "epoch": 3.1658935546875e-05,
      "step": 5187,
      "training_step_time": 0.3883397579193115
    },
    {
      "epoch": 3.16650390625e-05,
      "model_forward_time": 0.11431431770324707,
      "step": 5188
    },
    {
      "epoch": 3.16650390625e-05,
      "step": 5188,
      "training_step_time": 0.3833038806915283
    },
    {
      "epoch": 3.1671142578125e-05,
      "model_forward_time": 0.1149282455444336,
      "step": 5189
    },
    {
      "epoch": 3.1671142578125e-05,
      "step": 5189,
      "training_step_time": 0.37979960441589355
    },
    {
      "epoch": 3.167724609375e-05,
      "grad_norm": 0.3693482577800751,
      "learning_rate": 9.963620985635065e-05,
      "loss": 0.0949,
      "step": 5190
    },
    {
      "epoch": 3.167724609375e-05,
      "model_forward_time": 0.11601853370666504,
      "step": 5190
    },
    {
      "epoch": 3.167724609375e-05,
      "step": 5190,
      "training_step_time": 0.401486873626709
    },
    {
      "epoch": 3.1683349609375e-05,
      "model_forward_time": 0.11495113372802734,
      "step": 5191
    },
    {
      "epoch": 3.1683349609375e-05,
      "step": 5191,
      "training_step_time": 0.3971140384674072
    },
    {
      "epoch": 3.1689453125e-05,
      "model_forward_time": 0.1151113510131836,
      "step": 5192
    },
    {
      "epoch": 3.1689453125e-05,
      "step": 5192,
      "training_step_time": 0.7715268135070801
    },
    {
      "epoch": 3.1695556640625e-05,
      "model_forward_time": 0.11419987678527832,
      "step": 5193
    },
    {
      "epoch": 3.1695556640625e-05,
      "step": 5193,
      "training_step_time": 0.3958892822265625
    },
    {
      "epoch": 3.170166015625e-05,
      "model_forward_time": 0.11457109451293945,
      "step": 5194
    },
    {
      "epoch": 3.170166015625e-05,
      "step": 5194,
      "training_step_time": 0.39129042625427246
    },
    {
      "epoch": 3.1707763671875e-05,
      "model_forward_time": 0.11455059051513672,
      "step": 5195
    },
    {
      "epoch": 3.1707763671875e-05,
      "step": 5195,
      "training_step_time": 0.3994476795196533
    },
    {
      "epoch": 3.17138671875e-05,
      "model_forward_time": 0.11476874351501465,
      "step": 5196
    },
    {
      "epoch": 3.17138671875e-05,
      "step": 5196,
      "training_step_time": 0.4182443618774414
    },
    {
      "epoch": 3.1719970703125e-05,
      "model_forward_time": 0.11478114128112793,
      "step": 5197
    },
    {
      "epoch": 3.1719970703125e-05,
      "step": 5197,
      "training_step_time": 0.39539408683776855
    },
    {
      "epoch": 3.172607421875e-05,
      "model_forward_time": 0.11456108093261719,
      "step": 5198
    },
    {
      "epoch": 3.172607421875e-05,
      "step": 5198,
      "training_step_time": 0.5763709545135498
    },
    {
      "epoch": 3.1732177734375e-05,
      "model_forward_time": 0.11490607261657715,
      "step": 5199
    },
    {
      "epoch": 3.1732177734375e-05,
      "step": 5199,
      "training_step_time": 0.44841742515563965
    },
    {
      "epoch": 3.173828125e-05,
      "grad_norm": 0.3256244957447052,
      "learning_rate": 9.963288406760582e-05,
      "loss": 0.0991,
      "step": 5200
    },
    {
      "epoch": 3.173828125e-05,
      "model_forward_time": 0.11494326591491699,
      "step": 5200
    },
    {
      "epoch": 3.173828125e-05,
      "step": 5200,
      "training_step_time": 0.48136472702026367
    },
    {
      "epoch": 3.1744384765625e-05,
      "model_forward_time": 0.11452007293701172,
      "step": 5201
    },
    {
      "epoch": 3.1744384765625e-05,
      "step": 5201,
      "training_step_time": 0.3939805030822754
    },
    {
      "epoch": 3.175048828125e-05,
      "model_forward_time": 0.11439633369445801,
      "step": 5202
    },
    {
      "epoch": 3.175048828125e-05,
      "step": 5202,
      "training_step_time": 0.4004340171813965
    },
    {
      "epoch": 3.1756591796875e-05,
      "model_forward_time": 0.11488819122314453,
      "step": 5203
    },
    {
      "epoch": 3.1756591796875e-05,
      "step": 5203,
      "training_step_time": 0.3961303234100342
    },
    {
      "epoch": 3.17626953125e-05,
      "model_forward_time": 0.11518645286560059,
      "step": 5204
    },
    {
      "epoch": 3.17626953125e-05,
      "step": 5204,
      "training_step_time": 0.5033984184265137
    },
    {
      "epoch": 3.1768798828125e-05,
      "model_forward_time": 0.11531448364257812,
      "step": 5205
    },
    {
      "epoch": 3.1768798828125e-05,
      "step": 5205,
      "training_step_time": 0.39134693145751953
    },
    {
      "epoch": 3.177490234375e-05,
      "model_forward_time": 0.11476016044616699,
      "step": 5206
    },
    {
      "epoch": 3.177490234375e-05,
      "step": 5206,
      "training_step_time": 0.38834142684936523
    },
    {
      "epoch": 3.1781005859375e-05,
      "model_forward_time": 0.11486101150512695,
      "step": 5207
    },
    {
      "epoch": 3.1781005859375e-05,
      "step": 5207,
      "training_step_time": 0.3882434368133545
    },
    {
      "epoch": 3.1787109375e-05,
      "model_forward_time": 0.11574816703796387,
      "step": 5208
    },
    {
      "epoch": 3.1787109375e-05,
      "step": 5208,
      "training_step_time": 0.397672176361084
    },
    {
      "epoch": 3.1793212890625e-05,
      "model_forward_time": 0.1147010326385498,
      "step": 5209
    },
    {
      "epoch": 3.1793212890625e-05,
      "step": 5209,
      "training_step_time": 0.4386165142059326
    },
    {
      "epoch": 3.179931640625e-05,
      "grad_norm": 0.38863107562065125,
      "learning_rate": 9.96295432017013e-05,
      "loss": 0.0961,
      "step": 5210
    },
    {
      "epoch": 3.179931640625e-05,
      "model_forward_time": 0.11522674560546875,
      "step": 5210
    },
    {
      "epoch": 3.179931640625e-05,
      "step": 5210,
      "training_step_time": 0.6744997501373291
    },
    {
      "epoch": 3.1805419921875e-05,
      "model_forward_time": 0.11501383781433105,
      "step": 5211
    },
    {
      "epoch": 3.1805419921875e-05,
      "step": 5211,
      "training_step_time": 0.44995927810668945
    },
    {
      "epoch": 3.18115234375e-05,
      "model_forward_time": 0.11500740051269531,
      "step": 5212
    },
    {
      "epoch": 3.18115234375e-05,
      "step": 5212,
      "training_step_time": 0.4784221649169922
    },
    {
      "epoch": 3.1817626953125e-05,
      "model_forward_time": 0.11432671546936035,
      "step": 5213
    },
    {
      "epoch": 3.1817626953125e-05,
      "step": 5213,
      "training_step_time": 0.5061323642730713
    },
    {
      "epoch": 3.182373046875e-05,
      "model_forward_time": 0.11422920227050781,
      "step": 5214
    },
    {
      "epoch": 3.182373046875e-05,
      "step": 5214,
      "training_step_time": 0.4291684627532959
    },
    {
      "epoch": 3.1829833984375e-05,
      "model_forward_time": 0.11388683319091797,
      "step": 5215
    },
    {
      "epoch": 3.1829833984375e-05,
      "step": 5215,
      "training_step_time": 0.39608001708984375
    },
    {
      "epoch": 3.18359375e-05,
      "model_forward_time": 0.11574721336364746,
      "step": 5216
    },
    {
      "epoch": 3.18359375e-05,
      "step": 5216,
      "training_step_time": 0.40773558616638184
    },
    {
      "epoch": 3.1842041015625e-05,
      "model_forward_time": 0.11675524711608887,
      "step": 5217
    },
    {
      "epoch": 3.1842041015625e-05,
      "step": 5217,
      "training_step_time": 0.3924860954284668
    },
    {
      "epoch": 3.184814453125e-05,
      "model_forward_time": 0.11528635025024414,
      "step": 5218
    },
    {
      "epoch": 3.184814453125e-05,
      "step": 5218,
      "training_step_time": 0.3894155025482178
    },
    {
      "epoch": 3.1854248046875e-05,
      "model_forward_time": 0.11513161659240723,
      "step": 5219
    },
    {
      "epoch": 3.1854248046875e-05,
      "step": 5219,
      "training_step_time": 0.40005922317504883
    },
    {
      "epoch": 3.18603515625e-05,
      "grad_norm": 0.17136989533901215,
      "learning_rate": 9.962618725965196e-05,
      "loss": 0.0928,
      "step": 5220
    },
    {
      "epoch": 3.18603515625e-05,
      "model_forward_time": 0.11430883407592773,
      "step": 5220
    },
    {
      "epoch": 3.18603515625e-05,
      "step": 5220,
      "training_step_time": 0.40204310417175293
    },
    {
      "epoch": 3.1866455078125e-05,
      "model_forward_time": 0.11570954322814941,
      "step": 5221
    },
    {
      "epoch": 3.1866455078125e-05,
      "step": 5221,
      "training_step_time": 0.3990898132324219
    },
    {
      "epoch": 3.187255859375e-05,
      "model_forward_time": 0.11512541770935059,
      "step": 5222
    },
    {
      "epoch": 3.187255859375e-05,
      "step": 5222,
      "training_step_time": 0.602910041809082
    },
    {
      "epoch": 3.1878662109375e-05,
      "model_forward_time": 0.11471080780029297,
      "step": 5223
    },
    {
      "epoch": 3.1878662109375e-05,
      "step": 5223,
      "training_step_time": 0.39131951332092285
    },
    {
      "epoch": 3.1884765625e-05,
      "model_forward_time": 0.11565279960632324,
      "step": 5224
    },
    {
      "epoch": 3.1884765625e-05,
      "step": 5224,
      "training_step_time": 0.38254857063293457
    },
    {
      "epoch": 3.1890869140625e-05,
      "model_forward_time": 0.11440634727478027,
      "step": 5225
    },
    {
      "epoch": 3.1890869140625e-05,
      "step": 5225,
      "training_step_time": 0.36449456214904785
    },
    {
      "epoch": 3.189697265625e-05,
      "model_forward_time": 0.11391520500183105,
      "step": 5226
    },
    {
      "epoch": 3.189697265625e-05,
      "step": 5226,
      "training_step_time": 0.4550447463989258
    },
    {
      "epoch": 3.1903076171875e-05,
      "model_forward_time": 0.11544370651245117,
      "step": 5227
    },
    {
      "epoch": 3.1903076171875e-05,
      "step": 5227,
      "training_step_time": 0.46011805534362793
    },
    {
      "epoch": 3.19091796875e-05,
      "model_forward_time": 0.11499619483947754,
      "step": 5228
    },
    {
      "epoch": 3.19091796875e-05,
      "step": 5228,
      "training_step_time": 0.5134875774383545
    },
    {
      "epoch": 3.1915283203125e-05,
      "model_forward_time": 0.11418271064758301,
      "step": 5229
    },
    {
      "epoch": 3.1915283203125e-05,
      "step": 5229,
      "training_step_time": 0.44018006324768066
    },
    {
      "epoch": 3.192138671875e-05,
      "grad_norm": 0.3574534058570862,
      "learning_rate": 9.962281624247722e-05,
      "loss": 0.0909,
      "step": 5230
    },
    {
      "epoch": 3.192138671875e-05,
      "model_forward_time": 0.11501097679138184,
      "step": 5230
    },
    {
      "epoch": 3.192138671875e-05,
      "step": 5230,
      "training_step_time": 0.379286527633667
    },
    {
      "epoch": 3.1927490234375e-05,
      "model_forward_time": 0.11496472358703613,
      "step": 5231
    },
    {
      "epoch": 3.1927490234375e-05,
      "step": 5231,
      "training_step_time": 0.3933751583099365
    },
    {
      "epoch": 3.193359375e-05,
      "model_forward_time": 0.11527061462402344,
      "step": 5232
    },
    {
      "epoch": 3.193359375e-05,
      "step": 5232,
      "training_step_time": 0.39253711700439453
    },
    {
      "epoch": 3.1939697265625e-05,
      "model_forward_time": 0.1150655746459961,
      "step": 5233
    },
    {
      "epoch": 3.1939697265625e-05,
      "step": 5233,
      "training_step_time": 0.3921821117401123
    },
    {
      "epoch": 3.194580078125e-05,
      "model_forward_time": 0.11547183990478516,
      "step": 5234
    },
    {
      "epoch": 3.194580078125e-05,
      "step": 5234,
      "training_step_time": 0.7744128704071045
    },
    {
      "epoch": 3.1951904296875e-05,
      "model_forward_time": 0.11438393592834473,
      "step": 5235
    },
    {
      "epoch": 3.1951904296875e-05,
      "step": 5235,
      "training_step_time": 0.38521885871887207
    },
    {
      "epoch": 3.19580078125e-05,
      "model_forward_time": 0.11493277549743652,
      "step": 5236
    },
    {
      "epoch": 3.19580078125e-05,
      "step": 5236,
      "training_step_time": 0.38619399070739746
    },
    {
      "epoch": 3.1964111328125e-05,
      "model_forward_time": 0.11464953422546387,
      "step": 5237
    },
    {
      "epoch": 3.1964111328125e-05,
      "step": 5237,
      "training_step_time": 0.3974742889404297
    },
    {
      "epoch": 3.197021484375e-05,
      "model_forward_time": 0.1140594482421875,
      "step": 5238
    },
    {
      "epoch": 3.197021484375e-05,
      "step": 5238,
      "training_step_time": 0.39589428901672363
    },
    {
      "epoch": 3.1976318359375e-05,
      "model_forward_time": 0.11443614959716797,
      "step": 5239
    },
    {
      "epoch": 3.1976318359375e-05,
      "step": 5239,
      "training_step_time": 0.3685142993927002
    },
    {
      "epoch": 3.1982421875e-05,
      "grad_norm": 0.4402635395526886,
      "learning_rate": 9.961943015120113e-05,
      "loss": 0.0875,
      "step": 5240
    },
    {
      "epoch": 3.1982421875e-05,
      "model_forward_time": 0.11435079574584961,
      "step": 5240
    },
    {
      "epoch": 3.1982421875e-05,
      "step": 5240,
      "training_step_time": 0.4817214012145996
    },
    {
      "epoch": 3.1988525390625e-05,
      "model_forward_time": 0.11529803276062012,
      "step": 5241
    },
    {
      "epoch": 3.1988525390625e-05,
      "step": 5241,
      "training_step_time": 0.49422383308410645
    },
    {
      "epoch": 3.199462890625e-05,
      "model_forward_time": 0.11466288566589355,
      "step": 5242
    },
    {
      "epoch": 3.199462890625e-05,
      "step": 5242,
      "training_step_time": 0.4495224952697754
    },
    {
      "epoch": 3.2000732421875e-05,
      "model_forward_time": 0.11500239372253418,
      "step": 5243
    },
    {
      "epoch": 3.2000732421875e-05,
      "step": 5243,
      "training_step_time": 0.44394898414611816
    },
    {
      "epoch": 3.20068359375e-05,
      "model_forward_time": 0.11497664451599121,
      "step": 5244
    },
    {
      "epoch": 3.20068359375e-05,
      "step": 5244,
      "training_step_time": 0.3970520496368408
    },
    {
      "epoch": 3.2012939453125e-05,
      "model_forward_time": 0.11492204666137695,
      "step": 5245
    },
    {
      "epoch": 3.2012939453125e-05,
      "step": 5245,
      "training_step_time": 0.3992578983306885
    },
    {
      "epoch": 3.201904296875e-05,
      "model_forward_time": 0.1143803596496582,
      "step": 5246
    },
    {
      "epoch": 3.201904296875e-05,
      "step": 5246,
      "training_step_time": 0.4814929962158203
    },
    {
      "epoch": 3.2025146484375e-05,
      "model_forward_time": 0.11526203155517578,
      "step": 5247
    },
    {
      "epoch": 3.2025146484375e-05,
      "step": 5247,
      "training_step_time": 0.41551995277404785
    },
    {
      "epoch": 3.203125e-05,
      "model_forward_time": 0.11498737335205078,
      "step": 5248
    },
    {
      "epoch": 3.203125e-05,
      "step": 5248,
      "training_step_time": 0.4072422981262207
    },
    {
      "epoch": 3.2037353515625e-05,
      "model_forward_time": 0.11478471755981445,
      "step": 5249
    },
    {
      "epoch": 3.2037353515625e-05,
      "step": 5249,
      "training_step_time": 0.38852596282958984
    },
    {
      "epoch": 3.204345703125e-05,
      "grad_norm": 0.264547199010849,
      "learning_rate": 9.961602898685226e-05,
      "loss": 0.0874,
      "step": 5250
    },
    {
      "epoch": 3.204345703125e-05,
      "model_forward_time": 0.11460566520690918,
      "step": 5250
    },
    {
      "epoch": 3.204345703125e-05,
      "step": 5250,
      "training_step_time": 0.3914191722869873
    },
    {
      "epoch": 3.2049560546875e-05,
      "model_forward_time": 0.11457300186157227,
      "step": 5251
    },
    {
      "epoch": 3.2049560546875e-05,
      "step": 5251,
      "training_step_time": 0.39198875427246094
    },
    {
      "epoch": 3.20556640625e-05,
      "model_forward_time": 0.11538982391357422,
      "step": 5252
    },
    {
      "epoch": 3.20556640625e-05,
      "step": 5252,
      "training_step_time": 0.6784586906433105
    },
    {
      "epoch": 3.2061767578125e-05,
      "model_forward_time": 0.11448144912719727,
      "step": 5253
    },
    {
      "epoch": 3.2061767578125e-05,
      "step": 5253,
      "training_step_time": 0.36511683464050293
    },
    {
      "epoch": 3.206787109375e-05,
      "model_forward_time": 0.11513495445251465,
      "step": 5254
    },
    {
      "epoch": 3.206787109375e-05,
      "step": 5254,
      "training_step_time": 0.41188502311706543
    },
    {
      "epoch": 3.2073974609375e-05,
      "model_forward_time": 0.11504817008972168,
      "step": 5255
    },
    {
      "epoch": 3.2073974609375e-05,
      "step": 5255,
      "training_step_time": 0.4551064968109131
    },
    {
      "epoch": 3.2080078125e-05,
      "model_forward_time": 0.11477112770080566,
      "step": 5256
    },
    {
      "epoch": 3.2080078125e-05,
      "step": 5256,
      "training_step_time": 0.48571252822875977
    },
    {
      "epoch": 3.2086181640625e-05,
      "model_forward_time": 0.11477303504943848,
      "step": 5257
    },
    {
      "epoch": 3.2086181640625e-05,
      "step": 5257,
      "training_step_time": 0.4028751850128174
    },
    {
      "epoch": 3.209228515625e-05,
      "model_forward_time": 0.11424136161804199,
      "step": 5258
    },
    {
      "epoch": 3.209228515625e-05,
      "step": 5258,
      "training_step_time": 0.6087357997894287
    },
    {
      "epoch": 3.2098388671875e-05,
      "model_forward_time": 0.11388134956359863,
      "step": 5259
    },
    {
      "epoch": 3.2098388671875e-05,
      "step": 5259,
      "training_step_time": 0.39167284965515137
    },
    {
      "epoch": 3.21044921875e-05,
      "grad_norm": 0.2480546236038208,
      "learning_rate": 9.961261275046383e-05,
      "loss": 0.0911,
      "step": 5260
    },
    {
      "epoch": 3.21044921875e-05,
      "model_forward_time": 0.11386418342590332,
      "step": 5260
    },
    {
      "epoch": 3.21044921875e-05,
      "step": 5260,
      "training_step_time": 0.4103870391845703
    },
    {
      "epoch": 3.2110595703125e-05,
      "model_forward_time": 0.11505365371704102,
      "step": 5261
    },
    {
      "epoch": 3.2110595703125e-05,
      "step": 5261,
      "training_step_time": 0.3873734474182129
    },
    {
      "epoch": 3.211669921875e-05,
      "model_forward_time": 0.11522483825683594,
      "step": 5262
    },
    {
      "epoch": 3.211669921875e-05,
      "step": 5262,
      "training_step_time": 0.36600279808044434
    },
    {
      "epoch": 3.2122802734375e-05,
      "model_forward_time": 0.1148371696472168,
      "step": 5263
    },
    {
      "epoch": 3.2122802734375e-05,
      "step": 5263,
      "training_step_time": 0.3977696895599365
    },
    {
      "epoch": 3.212890625e-05,
      "model_forward_time": 0.11554932594299316,
      "step": 5264
    },
    {
      "epoch": 3.212890625e-05,
      "step": 5264,
      "training_step_time": 0.810969352722168
    },
    {
      "epoch": 3.2135009765625e-05,
      "model_forward_time": 0.11408329010009766,
      "step": 5265
    },
    {
      "epoch": 3.2135009765625e-05,
      "step": 5265,
      "training_step_time": 0.3874971866607666
    },
    {
      "epoch": 3.214111328125e-05,
      "model_forward_time": 0.11453890800476074,
      "step": 5266
    },
    {
      "epoch": 3.214111328125e-05,
      "step": 5266,
      "training_step_time": 0.3641965389251709
    },
    {
      "epoch": 3.2147216796875e-05,
      "model_forward_time": 0.11433601379394531,
      "step": 5267
    },
    {
      "epoch": 3.2147216796875e-05,
      "step": 5267,
      "training_step_time": 0.4195375442504883
    },
    {
      "epoch": 3.21533203125e-05,
      "model_forward_time": 0.11421012878417969,
      "step": 5268
    },
    {
      "epoch": 3.21533203125e-05,
      "step": 5268,
      "training_step_time": 0.4001946449279785
    },
    {
      "epoch": 3.2159423828125e-05,
      "model_forward_time": 0.11417865753173828,
      "step": 5269
    },
    {
      "epoch": 3.2159423828125e-05,
      "step": 5269,
      "training_step_time": 0.43021583557128906
    },
    {
      "epoch": 3.216552734375e-05,
      "grad_norm": 0.23789764940738678,
      "learning_rate": 9.96091814430736e-05,
      "loss": 0.0918,
      "step": 5270
    },
    {
      "epoch": 3.216552734375e-05,
      "model_forward_time": 0.11452817916870117,
      "step": 5270
    },
    {
      "epoch": 3.216552734375e-05,
      "step": 5270,
      "training_step_time": 0.4559643268585205
    },
    {
      "epoch": 3.2171630859375e-05,
      "model_forward_time": 0.11478090286254883,
      "step": 5271
    },
    {
      "epoch": 3.2171630859375e-05,
      "step": 5271,
      "training_step_time": 0.5149588584899902
    },
    {
      "epoch": 3.2177734375e-05,
      "model_forward_time": 0.11512112617492676,
      "step": 5272
    },
    {
      "epoch": 3.2177734375e-05,
      "step": 5272,
      "training_step_time": 0.4100160598754883
    },
    {
      "epoch": 3.2183837890625e-05,
      "model_forward_time": 0.11426758766174316,
      "step": 5273
    },
    {
      "epoch": 3.2183837890625e-05,
      "step": 5273,
      "training_step_time": 0.4248228073120117
    },
    {
      "epoch": 3.218994140625e-05,
      "model_forward_time": 0.1152040958404541,
      "step": 5274
    },
    {
      "epoch": 3.218994140625e-05,
      "step": 5274,
      "training_step_time": 0.4221150875091553
    },
    {
      "epoch": 3.2196044921875e-05,
      "model_forward_time": 0.11470913887023926,
      "step": 5275
    },
    {
      "epoch": 3.2196044921875e-05,
      "step": 5275,
      "training_step_time": 0.39850902557373047
    },
    {
      "epoch": 3.22021484375e-05,
      "model_forward_time": 0.11419343948364258,
      "step": 5276
    },
    {
      "epoch": 3.22021484375e-05,
      "step": 5276,
      "training_step_time": 0.41791749000549316
    },
    {
      "epoch": 3.2208251953125e-05,
      "model_forward_time": 0.1152031421661377,
      "step": 5277
    },
    {
      "epoch": 3.2208251953125e-05,
      "step": 5277,
      "training_step_time": 0.3929862976074219
    },
    {
      "epoch": 3.221435546875e-05,
      "model_forward_time": 0.11539554595947266,
      "step": 5278
    },
    {
      "epoch": 3.221435546875e-05,
      "step": 5278,
      "training_step_time": 0.3889145851135254
    },
    {
      "epoch": 3.2220458984375e-05,
      "model_forward_time": 0.11557769775390625,
      "step": 5279
    },
    {
      "epoch": 3.2220458984375e-05,
      "step": 5279,
      "training_step_time": 0.3925187587738037
    },
    {
      "epoch": 3.22265625e-05,
      "grad_norm": 0.21392187476158142,
      "learning_rate": 9.96057350657239e-05,
      "loss": 0.0924,
      "step": 5280
    },
    {
      "epoch": 3.22265625e-05,
      "model_forward_time": 0.11518096923828125,
      "step": 5280
    },
    {
      "epoch": 3.22265625e-05,
      "step": 5280,
      "training_step_time": 0.3674447536468506
    },
    {
      "epoch": 3.2232666015625e-05,
      "model_forward_time": 0.11514544486999512,
      "step": 5281
    },
    {
      "epoch": 3.2232666015625e-05,
      "step": 5281,
      "training_step_time": 0.39815521240234375
    },
    {
      "epoch": 3.223876953125e-05,
      "model_forward_time": 0.11502861976623535,
      "step": 5282
    },
    {
      "epoch": 3.223876953125e-05,
      "step": 5282,
      "training_step_time": 0.6305723190307617
    },
    {
      "epoch": 3.2244873046875e-05,
      "model_forward_time": 0.11483049392700195,
      "step": 5283
    },
    {
      "epoch": 3.2244873046875e-05,
      "step": 5283,
      "training_step_time": 0.4217221736907959
    },
    {
      "epoch": 3.22509765625e-05,
      "model_forward_time": 0.11457061767578125,
      "step": 5284
    },
    {
      "epoch": 3.22509765625e-05,
      "step": 5284,
      "training_step_time": 0.40642237663269043
    },
    {
      "epoch": 3.2257080078125e-05,
      "model_forward_time": 0.11506342887878418,
      "step": 5285
    },
    {
      "epoch": 3.2257080078125e-05,
      "step": 5285,
      "training_step_time": 0.395580530166626
    },
    {
      "epoch": 3.226318359375e-05,
      "model_forward_time": 0.11503934860229492,
      "step": 5286
    },
    {
      "epoch": 3.226318359375e-05,
      "step": 5286,
      "training_step_time": 0.4221920967102051
    },
    {
      "epoch": 3.2269287109375e-05,
      "model_forward_time": 0.11432790756225586,
      "step": 5287
    },
    {
      "epoch": 3.2269287109375e-05,
      "step": 5287,
      "training_step_time": 0.39406466484069824
    },
    {
      "epoch": 3.2275390625e-05,
      "model_forward_time": 0.11571931838989258,
      "step": 5288
    },
    {
      "epoch": 3.2275390625e-05,
      "step": 5288,
      "training_step_time": 0.6039366722106934
    },
    {
      "epoch": 3.2281494140625e-05,
      "model_forward_time": 0.11469912528991699,
      "step": 5289
    },
    {
      "epoch": 3.2281494140625e-05,
      "step": 5289,
      "training_step_time": 0.3839681148529053
    },
    {
      "epoch": 3.228759765625e-05,
      "grad_norm": 0.19920575618743896,
      "learning_rate": 9.960227361946164e-05,
      "loss": 0.0904,
      "step": 5290
    },
    {
      "epoch": 3.228759765625e-05,
      "model_forward_time": 0.11479043960571289,
      "step": 5290
    },
    {
      "epoch": 3.228759765625e-05,
      "step": 5290,
      "training_step_time": 0.3835928440093994
    },
    {
      "epoch": 3.2293701171875e-05,
      "model_forward_time": 0.11482405662536621,
      "step": 5291
    },
    {
      "epoch": 3.2293701171875e-05,
      "step": 5291,
      "training_step_time": 0.3857429027557373
    },
    {
      "epoch": 3.22998046875e-05,
      "model_forward_time": 0.11630582809448242,
      "step": 5292
    },
    {
      "epoch": 3.22998046875e-05,
      "step": 5292,
      "training_step_time": 0.3810758590698242
    },
    {
      "epoch": 3.2305908203125e-05,
      "model_forward_time": 0.11470937728881836,
      "step": 5293
    },
    {
      "epoch": 3.2305908203125e-05,
      "step": 5293,
      "training_step_time": 0.39218926429748535
    },
    {
      "epoch": 3.231201171875e-05,
      "model_forward_time": 0.1154167652130127,
      "step": 5294
    },
    {
      "epoch": 3.231201171875e-05,
      "step": 5294,
      "training_step_time": 0.6024525165557861
    },
    {
      "epoch": 3.2318115234375e-05,
      "model_forward_time": 0.11437010765075684,
      "step": 5295
    },
    {
      "epoch": 3.2318115234375e-05,
      "step": 5295,
      "training_step_time": 0.4307088851928711
    },
    {
      "epoch": 3.232421875e-05,
      "model_forward_time": 0.1143941879272461,
      "step": 5296
    },
    {
      "epoch": 3.232421875e-05,
      "step": 5296,
      "training_step_time": 0.4654550552368164
    },
    {
      "epoch": 3.2330322265625e-05,
      "model_forward_time": 0.11439967155456543,
      "step": 5297
    },
    {
      "epoch": 3.2330322265625e-05,
      "step": 5297,
      "training_step_time": 0.438443660736084
    },
    {
      "epoch": 3.233642578125e-05,
      "model_forward_time": 0.11443352699279785,
      "step": 5298
    },
    {
      "epoch": 3.233642578125e-05,
      "step": 5298,
      "training_step_time": 0.46908044815063477
    },
    {
      "epoch": 3.2342529296875e-05,
      "model_forward_time": 0.11374068260192871,
      "step": 5299
    },
    {
      "epoch": 3.2342529296875e-05,
      "step": 5299,
      "training_step_time": 0.445037841796875
    },
    {
      "epoch": 3.23486328125e-05,
      "grad_norm": 0.35642895102500916,
      "learning_rate": 9.959879710533835e-05,
      "loss": 0.0895,
      "step": 5300
    },
    {
      "epoch": 3.23486328125e-05,
      "model_forward_time": 0.11413860321044922,
      "step": 5300
    },
    {
      "epoch": 3.23486328125e-05,
      "step": 5300,
      "training_step_time": 0.4678666591644287
    },
    {
      "epoch": 3.2354736328125e-05,
      "model_forward_time": 0.11480998992919922,
      "step": 5301
    },
    {
      "epoch": 3.2354736328125e-05,
      "step": 5301,
      "training_step_time": 0.38614583015441895
    },
    {
      "epoch": 3.236083984375e-05,
      "model_forward_time": 0.11440086364746094,
      "step": 5302
    },
    {
      "epoch": 3.236083984375e-05,
      "step": 5302,
      "training_step_time": 0.38623762130737305
    },
    {
      "epoch": 3.2366943359375e-05,
      "model_forward_time": 0.11481404304504395,
      "step": 5303
    },
    {
      "epoch": 3.2366943359375e-05,
      "step": 5303,
      "training_step_time": 0.38680076599121094
    },
    {
      "epoch": 3.2373046875e-05,
      "model_forward_time": 0.11554074287414551,
      "step": 5304
    },
    {
      "epoch": 3.2373046875e-05,
      "step": 5304,
      "training_step_time": 0.40354251861572266
    },
    {
      "epoch": 3.2379150390625e-05,
      "model_forward_time": 0.11531400680541992,
      "step": 5305
    },
    {
      "epoch": 3.2379150390625e-05,
      "step": 5305,
      "training_step_time": 0.39231061935424805
    },
    {
      "epoch": 3.238525390625e-05,
      "model_forward_time": 0.11512422561645508,
      "step": 5306
    },
    {
      "epoch": 3.238525390625e-05,
      "step": 5306,
      "training_step_time": 0.609567403793335
    },
    {
      "epoch": 3.2391357421875e-05,
      "model_forward_time": 0.11433243751525879,
      "step": 5307
    },
    {
      "epoch": 3.2391357421875e-05,
      "step": 5307,
      "training_step_time": 0.3999803066253662
    },
    {
      "epoch": 3.23974609375e-05,
      "model_forward_time": 0.11525654792785645,
      "step": 5308
    },
    {
      "epoch": 3.23974609375e-05,
      "step": 5308,
      "training_step_time": 0.3727562427520752
    },
    {
      "epoch": 3.2403564453125e-05,
      "model_forward_time": 0.11496520042419434,
      "step": 5309
    },
    {
      "epoch": 3.2403564453125e-05,
      "step": 5309,
      "training_step_time": 0.49307775497436523
    },
    {
      "epoch": 3.240966796875e-05,
      "grad_norm": 0.28846389055252075,
      "learning_rate": 9.959530552441005e-05,
      "loss": 0.0901,
      "step": 5310
    },
    {
      "epoch": 3.240966796875e-05,
      "model_forward_time": 0.11472034454345703,
      "step": 5310
    },
    {
      "epoch": 3.240966796875e-05,
      "step": 5310,
      "training_step_time": 0.4780464172363281
    },
    {
      "epoch": 3.2415771484375e-05,
      "model_forward_time": 0.11454081535339355,
      "step": 5311
    },
    {
      "epoch": 3.2415771484375e-05,
      "step": 5311,
      "training_step_time": 0.43189120292663574
    },
    {
      "epoch": 3.2421875e-05,
      "model_forward_time": 0.11462688446044922,
      "step": 5312
    },
    {
      "epoch": 3.2421875e-05,
      "step": 5312,
      "training_step_time": 0.40874195098876953
    },
    {
      "epoch": 3.2427978515625e-05,
      "model_forward_time": 0.11498212814331055,
      "step": 5313
    },
    {
      "epoch": 3.2427978515625e-05,
      "step": 5313,
      "training_step_time": 0.4635157585144043
    },
    {
      "epoch": 3.243408203125e-05,
      "model_forward_time": 0.1145782470703125,
      "step": 5314
    },
    {
      "epoch": 3.243408203125e-05,
      "step": 5314,
      "training_step_time": 0.41089534759521484
    },
    {
      "epoch": 3.2440185546875e-05,
      "model_forward_time": 0.11487436294555664,
      "step": 5315
    },
    {
      "epoch": 3.2440185546875e-05,
      "step": 5315,
      "training_step_time": 0.39757847785949707
    },
    {
      "epoch": 3.24462890625e-05,
      "model_forward_time": 0.11518168449401855,
      "step": 5316
    },
    {
      "epoch": 3.24462890625e-05,
      "step": 5316,
      "training_step_time": 0.39327263832092285
    },
    {
      "epoch": 3.2452392578125e-05,
      "model_forward_time": 0.11426424980163574,
      "step": 5317
    },
    {
      "epoch": 3.2452392578125e-05,
      "step": 5317,
      "training_step_time": 0.39142322540283203
    },
    {
      "epoch": 3.245849609375e-05,
      "model_forward_time": 0.11552953720092773,
      "step": 5318
    },
    {
      "epoch": 3.245849609375e-05,
      "step": 5318,
      "training_step_time": 0.48969125747680664
    },
    {
      "epoch": 3.2464599609375e-05,
      "model_forward_time": 0.11418628692626953,
      "step": 5319
    },
    {
      "epoch": 3.2464599609375e-05,
      "step": 5319,
      "training_step_time": 0.39345741271972656
    },
    {
      "epoch": 3.2470703125e-05,
      "grad_norm": 0.26318493485450745,
      "learning_rate": 9.959179887773744e-05,
      "loss": 0.0899,
      "step": 5320
    },
    {
      "epoch": 3.2470703125e-05,
      "model_forward_time": 0.11487817764282227,
      "step": 5320
    },
    {
      "epoch": 3.2470703125e-05,
      "step": 5320,
      "training_step_time": 0.39289379119873047
    },
    {
      "epoch": 3.2476806640625e-05,
      "model_forward_time": 0.11508917808532715,
      "step": 5321
    },
    {
      "epoch": 3.2476806640625e-05,
      "step": 5321,
      "training_step_time": 0.39507150650024414
    },
    {
      "epoch": 3.248291015625e-05,
      "model_forward_time": 0.11487674713134766,
      "step": 5322
    },
    {
      "epoch": 3.248291015625e-05,
      "step": 5322,
      "training_step_time": 0.398456335067749
    },
    {
      "epoch": 3.2489013671875e-05,
      "model_forward_time": 0.1151432991027832,
      "step": 5323
    },
    {
      "epoch": 3.2489013671875e-05,
      "step": 5323,
      "training_step_time": 0.47146105766296387
    },
    {
      "epoch": 3.24951171875e-05,
      "model_forward_time": 0.1154477596282959,
      "step": 5324
    },
    {
      "epoch": 3.24951171875e-05,
      "step": 5324,
      "training_step_time": 0.6984491348266602
    },
    {
      "epoch": 3.2501220703125e-05,
      "model_forward_time": 0.11475515365600586,
      "step": 5325
    },
    {
      "epoch": 3.2501220703125e-05,
      "step": 5325,
      "training_step_time": 0.4798586368560791
    },
    {
      "epoch": 3.250732421875e-05,
      "model_forward_time": 0.11374878883361816,
      "step": 5326
    },
    {
      "epoch": 3.250732421875e-05,
      "step": 5326,
      "training_step_time": 0.41410088539123535
    },
    {
      "epoch": 3.2513427734375e-05,
      "model_forward_time": 0.11504793167114258,
      "step": 5327
    },
    {
      "epoch": 3.2513427734375e-05,
      "step": 5327,
      "training_step_time": 0.4041557312011719
    },
    {
      "epoch": 3.251953125e-05,
      "model_forward_time": 0.11426901817321777,
      "step": 5328
    },
    {
      "epoch": 3.251953125e-05,
      "step": 5328,
      "training_step_time": 0.4922831058502197
    },
    {
      "epoch": 3.2525634765625e-05,
      "model_forward_time": 0.11425542831420898,
      "step": 5329
    },
    {
      "epoch": 3.2525634765625e-05,
      "step": 5329,
      "training_step_time": 0.38216495513916016
    },
    {
      "epoch": 3.253173828125e-05,
      "grad_norm": 0.15655797719955444,
      "learning_rate": 9.958827716638572e-05,
      "loss": 0.0859,
      "step": 5330
    },
    {
      "epoch": 3.253173828125e-05,
      "model_forward_time": 0.11467981338500977,
      "step": 5330
    },
    {
      "epoch": 3.253173828125e-05,
      "step": 5330,
      "training_step_time": 0.5304884910583496
    },
    {
      "epoch": 3.2537841796875e-05,
      "model_forward_time": 0.11494183540344238,
      "step": 5331
    },
    {
      "epoch": 3.2537841796875e-05,
      "step": 5331,
      "training_step_time": 0.3871595859527588
    },
    {
      "epoch": 3.25439453125e-05,
      "model_forward_time": 0.11491513252258301,
      "step": 5332
    },
    {
      "epoch": 3.25439453125e-05,
      "step": 5332,
      "training_step_time": 0.3897216320037842
    },
    {
      "epoch": 3.2550048828125e-05,
      "model_forward_time": 0.11499571800231934,
      "step": 5333
    },
    {
      "epoch": 3.2550048828125e-05,
      "step": 5333,
      "training_step_time": 0.37558865547180176
    },
    {
      "epoch": 3.255615234375e-05,
      "model_forward_time": 0.11519765853881836,
      "step": 5334
    },
    {
      "epoch": 3.255615234375e-05,
      "step": 5334,
      "training_step_time": 0.3941221237182617
    },
    {
      "epoch": 3.2562255859375e-05,
      "model_forward_time": 0.11468839645385742,
      "step": 5335
    },
    {
      "epoch": 3.2562255859375e-05,
      "step": 5335,
      "training_step_time": 0.3884444236755371
    },
    {
      "epoch": 3.2568359375e-05,
      "model_forward_time": 0.11575913429260254,
      "step": 5336
    },
    {
      "epoch": 3.2568359375e-05,
      "step": 5336,
      "training_step_time": 0.6437532901763916
    },
    {
      "epoch": 3.2574462890625e-05,
      "model_forward_time": 0.11496686935424805,
      "step": 5337
    },
    {
      "epoch": 3.2574462890625e-05,
      "step": 5337,
      "training_step_time": 0.44315314292907715
    },
    {
      "epoch": 3.258056640625e-05,
      "model_forward_time": 0.11461830139160156,
      "step": 5338
    },
    {
      "epoch": 3.258056640625e-05,
      "step": 5338,
      "training_step_time": 0.4863858222961426
    },
    {
      "epoch": 3.2586669921875e-05,
      "model_forward_time": 0.1153876781463623,
      "step": 5339
    },
    {
      "epoch": 3.2586669921875e-05,
      "step": 5339,
      "training_step_time": 0.4408454895019531
    },
    {
      "epoch": 3.25927734375e-05,
      "grad_norm": 0.21212370693683624,
      "learning_rate": 9.95847403914247e-05,
      "loss": 0.0894,
      "step": 5340
    },
    {
      "epoch": 3.25927734375e-05,
      "model_forward_time": 0.11507105827331543,
      "step": 5340
    },
    {
      "epoch": 3.25927734375e-05,
      "step": 5340,
      "training_step_time": 0.39829397201538086
    },
    {
      "epoch": 3.2598876953125e-05,
      "model_forward_time": 0.11426377296447754,
      "step": 5341
    },
    {
      "epoch": 3.2598876953125e-05,
      "step": 5341,
      "training_step_time": 0.4176931381225586
    },
    {
      "epoch": 3.260498046875e-05,
      "model_forward_time": 0.11486935615539551,
      "step": 5342
    },
    {
      "epoch": 3.260498046875e-05,
      "step": 5342,
      "training_step_time": 0.46634507179260254
    },
    {
      "epoch": 3.2611083984375e-05,
      "model_forward_time": 0.11452770233154297,
      "step": 5343
    },
    {
      "epoch": 3.2611083984375e-05,
      "step": 5343,
      "training_step_time": 0.39260435104370117
    },
    {
      "epoch": 3.26171875e-05,
      "model_forward_time": 0.1150810718536377,
      "step": 5344
    },
    {
      "epoch": 3.26171875e-05,
      "step": 5344,
      "training_step_time": 0.3807954788208008
    },
    {
      "epoch": 3.2623291015625e-05,
      "model_forward_time": 0.11533045768737793,
      "step": 5345
    },
    {
      "epoch": 3.2623291015625e-05,
      "step": 5345,
      "training_step_time": 0.3909015655517578
    },
    {
      "epoch": 3.262939453125e-05,
      "model_forward_time": 0.11450934410095215,
      "step": 5346
    },
    {
      "epoch": 3.262939453125e-05,
      "step": 5346,
      "training_step_time": 0.3938412666320801
    },
    {
      "epoch": 3.2635498046875e-05,
      "model_forward_time": 0.11514091491699219,
      "step": 5347
    },
    {
      "epoch": 3.2635498046875e-05,
      "step": 5347,
      "training_step_time": 0.3930072784423828
    },
    {
      "epoch": 3.26416015625e-05,
      "model_forward_time": 0.11494970321655273,
      "step": 5348
    },
    {
      "epoch": 3.26416015625e-05,
      "step": 5348,
      "training_step_time": 0.7201430797576904
    },
    {
      "epoch": 3.2647705078125e-05,
      "model_forward_time": 0.1148676872253418,
      "step": 5349
    },
    {
      "epoch": 3.2647705078125e-05,
      "step": 5349,
      "training_step_time": 0.3953735828399658
    },
    {
      "epoch": 3.265380859375e-05,
      "grad_norm": 0.22898134589195251,
      "learning_rate": 9.958118855392876e-05,
      "loss": 0.0877,
      "step": 5350
    },
    {
      "epoch": 3.265380859375e-05,
      "model_forward_time": 0.11518692970275879,
      "step": 5350
    },
    {
      "epoch": 3.265380859375e-05,
      "step": 5350,
      "training_step_time": 0.3867218494415283
    },
    {
      "epoch": 3.2659912109375e-05,
      "model_forward_time": 0.11497306823730469,
      "step": 5351
    },
    {
      "epoch": 3.2659912109375e-05,
      "step": 5351,
      "training_step_time": 0.5011234283447266
    },
    {
      "epoch": 3.2666015625e-05,
      "model_forward_time": 0.11494779586791992,
      "step": 5352
    },
    {
      "epoch": 3.2666015625e-05,
      "step": 5352,
      "training_step_time": 0.4783332347869873
    },
    {
      "epoch": 3.2672119140625e-05,
      "model_forward_time": 0.11507010459899902,
      "step": 5353
    },
    {
      "epoch": 3.2672119140625e-05,
      "step": 5353,
      "training_step_time": 0.477618932723999
    },
    {
      "epoch": 3.267822265625e-05,
      "model_forward_time": 0.11649537086486816,
      "step": 5354
    },
    {
      "epoch": 3.267822265625e-05,
      "step": 5354,
      "training_step_time": 0.41486287117004395
    },
    {
      "epoch": 3.2684326171875e-05,
      "model_forward_time": 0.11486411094665527,
      "step": 5355
    },
    {
      "epoch": 3.2684326171875e-05,
      "step": 5355,
      "training_step_time": 0.39547085762023926
    },
    {
      "epoch": 3.26904296875e-05,
      "model_forward_time": 0.1149289608001709,
      "step": 5356
    },
    {
      "epoch": 3.26904296875e-05,
      "step": 5356,
      "training_step_time": 0.5010156631469727
    },
    {
      "epoch": 3.2696533203125e-05,
      "model_forward_time": 0.11414146423339844,
      "step": 5357
    },
    {
      "epoch": 3.2696533203125e-05,
      "step": 5357,
      "training_step_time": 0.38157057762145996
    },
    {
      "epoch": 3.270263671875e-05,
      "model_forward_time": 0.11561775207519531,
      "step": 5358
    },
    {
      "epoch": 3.270263671875e-05,
      "step": 5358,
      "training_step_time": 0.38562989234924316
    },
    {
      "epoch": 3.2708740234375e-05,
      "model_forward_time": 0.11509251594543457,
      "step": 5359
    },
    {
      "epoch": 3.2708740234375e-05,
      "step": 5359,
      "training_step_time": 0.3987863063812256
    },
    {
      "epoch": 3.271484375e-05,
      "grad_norm": 0.3735746145248413,
      "learning_rate": 9.957762165497686e-05,
      "loss": 0.0918,
      "step": 5360
    },
    {
      "epoch": 3.271484375e-05,
      "model_forward_time": 0.11553049087524414,
      "step": 5360
    },
    {
      "epoch": 3.271484375e-05,
      "step": 5360,
      "training_step_time": 0.4658997058868408
    },
    {
      "epoch": 3.2720947265625e-05,
      "model_forward_time": 0.11519575119018555,
      "step": 5361
    },
    {
      "epoch": 3.2720947265625e-05,
      "step": 5361,
      "training_step_time": 0.39306640625
    },
    {
      "epoch": 3.272705078125e-05,
      "model_forward_time": 0.11538982391357422,
      "step": 5362
    },
    {
      "epoch": 3.272705078125e-05,
      "step": 5362,
      "training_step_time": 0.3818397521972656
    },
    {
      "epoch": 3.2733154296875e-05,
      "model_forward_time": 0.11637639999389648,
      "step": 5363
    },
    {
      "epoch": 3.2733154296875e-05,
      "step": 5363,
      "training_step_time": 0.3939244747161865
    },
    {
      "epoch": 3.27392578125e-05,
      "model_forward_time": 0.1152951717376709,
      "step": 5364
    },
    {
      "epoch": 3.27392578125e-05,
      "step": 5364,
      "training_step_time": 0.39212489128112793
    },
    {
      "epoch": 3.2745361328125e-05,
      "model_forward_time": 0.11641955375671387,
      "step": 5365
    },
    {
      "epoch": 3.2745361328125e-05,
      "step": 5365,
      "training_step_time": 0.43588948249816895
    },
    {
      "epoch": 3.275146484375e-05,
      "model_forward_time": 0.11530089378356934,
      "step": 5366
    },
    {
      "epoch": 3.275146484375e-05,
      "step": 5366,
      "training_step_time": 0.6523606777191162
    },
    {
      "epoch": 3.2757568359375e-05,
      "model_forward_time": 0.11502480506896973,
      "step": 5367
    },
    {
      "epoch": 3.2757568359375e-05,
      "step": 5367,
      "training_step_time": 0.41585302352905273
    },
    {
      "epoch": 3.2763671875e-05,
      "model_forward_time": 0.11460208892822266,
      "step": 5368
    },
    {
      "epoch": 3.2763671875e-05,
      "step": 5368,
      "training_step_time": 0.3923981189727783
    },
    {
      "epoch": 3.2769775390625e-05,
      "model_forward_time": 0.11613225936889648,
      "step": 5369
    },
    {
      "epoch": 3.2769775390625e-05,
      "step": 5369,
      "training_step_time": 0.4846079349517822
    },
    {
      "epoch": 3.277587890625e-05,
      "grad_norm": 0.2537057101726532,
      "learning_rate": 9.95740396956525e-05,
      "loss": 0.0935,
      "step": 5370
    },
    {
      "epoch": 3.277587890625e-05,
      "model_forward_time": 0.11463093757629395,
      "step": 5370
    },
    {
      "epoch": 3.277587890625e-05,
      "step": 5370,
      "training_step_time": 0.46106505393981934
    },
    {
      "epoch": 3.2781982421875e-05,
      "model_forward_time": 0.11540699005126953,
      "step": 5371
    },
    {
      "epoch": 3.2781982421875e-05,
      "step": 5371,
      "training_step_time": 0.4019815921783447
    },
    {
      "epoch": 3.27880859375e-05,
      "model_forward_time": 0.11504101753234863,
      "step": 5372
    },
    {
      "epoch": 3.27880859375e-05,
      "step": 5372,
      "training_step_time": 0.5250730514526367
    },
    {
      "epoch": 3.2794189453125e-05,
      "model_forward_time": 0.11734342575073242,
      "step": 5373
    },
    {
      "epoch": 3.2794189453125e-05,
      "step": 5373,
      "training_step_time": 0.38040757179260254
    },
    {
      "epoch": 3.280029296875e-05,
      "model_forward_time": 0.1183159351348877,
      "step": 5374
    },
    {
      "epoch": 3.280029296875e-05,
      "step": 5374,
      "training_step_time": 0.38091373443603516
    },
    {
      "epoch": 3.2806396484375e-05,
      "model_forward_time": 0.11492586135864258,
      "step": 5375
    },
    {
      "epoch": 3.2806396484375e-05,
      "step": 5375,
      "training_step_time": 0.3761861324310303
    },
    {
      "epoch": 3.28125e-05,
      "model_forward_time": 0.11532831192016602,
      "step": 5376
    },
    {
      "epoch": 3.28125e-05,
      "step": 5376,
      "training_step_time": 0.38957715034484863
    },
    {
      "epoch": 3.2818603515625e-05,
      "model_forward_time": 0.11546182632446289,
      "step": 5377
    },
    {
      "epoch": 3.2818603515625e-05,
      "step": 5377,
      "training_step_time": 0.3920104503631592
    },
    {
      "epoch": 3.282470703125e-05,
      "model_forward_time": 0.11506319046020508,
      "step": 5378
    },
    {
      "epoch": 3.282470703125e-05,
      "step": 5378,
      "training_step_time": 0.6268367767333984
    },
    {
      "epoch": 3.2830810546875e-05,
      "model_forward_time": 0.11482882499694824,
      "step": 5379
    },
    {
      "epoch": 3.2830810546875e-05,
      "step": 5379,
      "training_step_time": 0.3660585880279541
    },
    {
      "epoch": 3.28369140625e-05,
      "grad_norm": 0.2849438786506653,
      "learning_rate": 9.957044267704384e-05,
      "loss": 0.0872,
      "step": 5380
    },
    {
      "epoch": 3.28369140625e-05,
      "model_forward_time": 0.1148681640625,
      "step": 5380
    },
    {
      "epoch": 3.28369140625e-05,
      "step": 5380,
      "training_step_time": 0.46506333351135254
    },
    {
      "epoch": 3.2843017578125e-05,
      "model_forward_time": 0.11469388008117676,
      "step": 5381
    },
    {
      "epoch": 3.2843017578125e-05,
      "step": 5381,
      "training_step_time": 0.45029330253601074
    },
    {
      "epoch": 3.284912109375e-05,
      "model_forward_time": 0.11504840850830078,
      "step": 5382
    },
    {
      "epoch": 3.284912109375e-05,
      "step": 5382,
      "training_step_time": 0.4248158931732178
    },
    {
      "epoch": 3.2855224609375e-05,
      "model_forward_time": 0.1144256591796875,
      "step": 5383
    },
    {
      "epoch": 3.2855224609375e-05,
      "step": 5383,
      "training_step_time": 0.42159271240234375
    },
    {
      "epoch": 3.2861328125e-05,
      "model_forward_time": 0.11445903778076172,
      "step": 5384
    },
    {
      "epoch": 3.2861328125e-05,
      "step": 5384,
      "training_step_time": 0.47197675704956055
    },
    {
      "epoch": 3.2867431640625e-05,
      "model_forward_time": 0.11479973793029785,
      "step": 5385
    },
    {
      "epoch": 3.2867431640625e-05,
      "step": 5385,
      "training_step_time": 0.4017510414123535
    },
    {
      "epoch": 3.287353515625e-05,
      "model_forward_time": 0.11479640007019043,
      "step": 5386
    },
    {
      "epoch": 3.287353515625e-05,
      "step": 5386,
      "training_step_time": 0.386760950088501
    },
    {
      "epoch": 3.2879638671875e-05,
      "model_forward_time": 0.11439204216003418,
      "step": 5387
    },
    {
      "epoch": 3.2879638671875e-05,
      "step": 5387,
      "training_step_time": 0.38507986068725586
    },
    {
      "epoch": 3.28857421875e-05,
      "model_forward_time": 0.11468338966369629,
      "step": 5388
    },
    {
      "epoch": 3.28857421875e-05,
      "step": 5388,
      "training_step_time": 0.3962681293487549
    },
    {
      "epoch": 3.2891845703125e-05,
      "model_forward_time": 0.11473751068115234,
      "step": 5389
    },
    {
      "epoch": 3.2891845703125e-05,
      "step": 5389,
      "training_step_time": 0.47579002380371094
    },
    {
      "epoch": 3.289794921875e-05,
      "grad_norm": 0.2668357491493225,
      "learning_rate": 9.95668306002435e-05,
      "loss": 0.0885,
      "step": 5390
    },
    {
      "epoch": 3.289794921875e-05,
      "model_forward_time": 0.11531925201416016,
      "step": 5390
    },
    {
      "epoch": 3.289794921875e-05,
      "step": 5390,
      "training_step_time": 0.5147213935852051
    },
    {
      "epoch": 3.2904052734375e-05,
      "model_forward_time": 0.11545681953430176,
      "step": 5391
    },
    {
      "epoch": 3.2904052734375e-05,
      "step": 5391,
      "training_step_time": 0.45676517486572266
    },
    {
      "epoch": 3.291015625e-05,
      "model_forward_time": 0.11521315574645996,
      "step": 5392
    },
    {
      "epoch": 3.291015625e-05,
      "step": 5392,
      "training_step_time": 0.44523024559020996
    },
    {
      "epoch": 3.2916259765625e-05,
      "model_forward_time": 0.11480569839477539,
      "step": 5393
    },
    {
      "epoch": 3.2916259765625e-05,
      "step": 5393,
      "training_step_time": 0.3930692672729492
    },
    {
      "epoch": 3.292236328125e-05,
      "model_forward_time": 0.11474180221557617,
      "step": 5394
    },
    {
      "epoch": 3.292236328125e-05,
      "step": 5394,
      "training_step_time": 0.40987586975097656
    },
    {
      "epoch": 3.2928466796875e-05,
      "model_forward_time": 0.11505579948425293,
      "step": 5395
    },
    {
      "epoch": 3.2928466796875e-05,
      "step": 5395,
      "training_step_time": 0.4438352584838867
    },
    {
      "epoch": 3.29345703125e-05,
      "model_forward_time": 0.11511039733886719,
      "step": 5396
    },
    {
      "epoch": 3.29345703125e-05,
      "step": 5396,
      "training_step_time": 0.498507022857666
    },
    {
      "epoch": 3.2940673828125e-05,
      "model_forward_time": 0.11462926864624023,
      "step": 5397
    },
    {
      "epoch": 3.2940673828125e-05,
      "step": 5397,
      "training_step_time": 0.41762733459472656
    },
    {
      "epoch": 3.294677734375e-05,
      "model_forward_time": 0.11568045616149902,
      "step": 5398
    },
    {
      "epoch": 3.294677734375e-05,
      "step": 5398,
      "training_step_time": 0.397998571395874
    },
    {
      "epoch": 3.2952880859375e-05,
      "model_forward_time": 0.11494684219360352,
      "step": 5399
    },
    {
      "epoch": 3.2952880859375e-05,
      "step": 5399,
      "training_step_time": 0.4206545352935791
    },
    {
      "epoch": 3.2958984375e-05,
      "grad_norm": 0.3000977635383606,
      "learning_rate": 9.956320346634876e-05,
      "loss": 0.0886,
      "step": 5400
    },
    {
      "epoch": 3.2958984375e-05,
      "model_forward_time": 0.11737966537475586,
      "step": 5400
    },
    {
      "epoch": 3.2958984375e-05,
      "step": 5400,
      "training_step_time": 0.3892509937286377
    },
    {
      "epoch": 3.2965087890625e-05,
      "model_forward_time": 0.11509180068969727,
      "step": 5401
    },
    {
      "epoch": 3.2965087890625e-05,
      "step": 5401,
      "training_step_time": 0.39141273498535156
    },
    {
      "epoch": 3.297119140625e-05,
      "model_forward_time": 0.11522412300109863,
      "step": 5402
    },
    {
      "epoch": 3.297119140625e-05,
      "step": 5402,
      "training_step_time": 0.5951247215270996
    },
    {
      "epoch": 3.2977294921875e-05,
      "model_forward_time": 0.11443853378295898,
      "step": 5403
    },
    {
      "epoch": 3.2977294921875e-05,
      "step": 5403,
      "training_step_time": 0.39595770835876465
    },
    {
      "epoch": 3.29833984375e-05,
      "model_forward_time": 0.11478304862976074,
      "step": 5404
    },
    {
      "epoch": 3.29833984375e-05,
      "step": 5404,
      "training_step_time": 0.3913569450378418
    },
    {
      "epoch": 3.2989501953125e-05,
      "model_forward_time": 0.11547231674194336,
      "step": 5405
    },
    {
      "epoch": 3.2989501953125e-05,
      "step": 5405,
      "training_step_time": 0.4004976749420166
    },
    {
      "epoch": 3.299560546875e-05,
      "model_forward_time": 0.11547350883483887,
      "step": 5406
    },
    {
      "epoch": 3.299560546875e-05,
      "step": 5406,
      "training_step_time": 0.3968663215637207
    },
    {
      "epoch": 3.3001708984375e-05,
      "model_forward_time": 0.1150205135345459,
      "step": 5407
    },
    {
      "epoch": 3.3001708984375e-05,
      "step": 5407,
      "training_step_time": 0.3698561191558838
    },
    {
      "epoch": 3.30078125e-05,
      "model_forward_time": 0.11529064178466797,
      "step": 5408
    },
    {
      "epoch": 3.30078125e-05,
      "step": 5408,
      "training_step_time": 0.49477529525756836
    },
    {
      "epoch": 3.3013916015625e-05,
      "model_forward_time": 0.11641454696655273,
      "step": 5409
    },
    {
      "epoch": 3.3013916015625e-05,
      "step": 5409,
      "training_step_time": 0.4927959442138672
    },
    {
      "epoch": 3.302001953125e-05,
      "grad_norm": 0.28107115626335144,
      "learning_rate": 9.955956127646147e-05,
      "loss": 0.0922,
      "step": 5410
    },
    {
      "epoch": 3.302001953125e-05,
      "model_forward_time": 0.11487936973571777,
      "step": 5410
    },
    {
      "epoch": 3.302001953125e-05,
      "step": 5410,
      "training_step_time": 0.45070695877075195
    },
    {
      "epoch": 3.3026123046875e-05,
      "model_forward_time": 0.11566710472106934,
      "step": 5411
    },
    {
      "epoch": 3.3026123046875e-05,
      "step": 5411,
      "training_step_time": 0.4585700035095215
    },
    {
      "epoch": 3.30322265625e-05,
      "model_forward_time": 0.11580467224121094,
      "step": 5412
    },
    {
      "epoch": 3.30322265625e-05,
      "step": 5412,
      "training_step_time": 0.4407670497894287
    },
    {
      "epoch": 3.3038330078125e-05,
      "model_forward_time": 0.1145017147064209,
      "step": 5413
    },
    {
      "epoch": 3.3038330078125e-05,
      "step": 5413,
      "training_step_time": 0.4253251552581787
    },
    {
      "epoch": 3.304443359375e-05,
      "model_forward_time": 0.11492228507995605,
      "step": 5414
    },
    {
      "epoch": 3.304443359375e-05,
      "step": 5414,
      "training_step_time": 0.3879396915435791
    },
    {
      "epoch": 3.3050537109375e-05,
      "model_forward_time": 0.11556053161621094,
      "step": 5415
    },
    {
      "epoch": 3.3050537109375e-05,
      "step": 5415,
      "training_step_time": 0.395153284072876
    },
    {
      "epoch": 3.3056640625e-05,
      "model_forward_time": 0.11548471450805664,
      "step": 5416
    },
    {
      "epoch": 3.3056640625e-05,
      "step": 5416,
      "training_step_time": 0.4028775691986084
    },
    {
      "epoch": 3.3062744140625e-05,
      "model_forward_time": 0.11577486991882324,
      "step": 5417
    },
    {
      "epoch": 3.3062744140625e-05,
      "step": 5417,
      "training_step_time": 0.38152194023132324
    },
    {
      "epoch": 3.306884765625e-05,
      "model_forward_time": 0.1163320541381836,
      "step": 5418
    },
    {
      "epoch": 3.306884765625e-05,
      "step": 5418,
      "training_step_time": 0.3985767364501953
    },
    {
      "epoch": 3.3074951171875e-05,
      "model_forward_time": 0.11545920372009277,
      "step": 5419
    },
    {
      "epoch": 3.3074951171875e-05,
      "step": 5419,
      "training_step_time": 0.4021584987640381
    },
    {
      "epoch": 3.30810546875e-05,
      "grad_norm": 0.3873922526836395,
      "learning_rate": 9.955590403168799e-05,
      "loss": 0.0849,
      "step": 5420
    },
    {
      "epoch": 3.30810546875e-05,
      "model_forward_time": 0.11518287658691406,
      "step": 5420
    },
    {
      "epoch": 3.30810546875e-05,
      "step": 5420,
      "training_step_time": 0.6034150123596191
    },
    {
      "epoch": 3.3087158203125e-05,
      "model_forward_time": 0.1145622730255127,
      "step": 5421
    },
    {
      "epoch": 3.3087158203125e-05,
      "step": 5421,
      "training_step_time": 0.39965319633483887
    },
    {
      "epoch": 3.309326171875e-05,
      "model_forward_time": 0.11502242088317871,
      "step": 5422
    },
    {
      "epoch": 3.309326171875e-05,
      "step": 5422,
      "training_step_time": 0.44063258171081543
    },
    {
      "epoch": 3.3099365234375e-05,
      "model_forward_time": 0.11512327194213867,
      "step": 5423
    },
    {
      "epoch": 3.3099365234375e-05,
      "step": 5423,
      "training_step_time": 0.4907515048980713
    },
    {
      "epoch": 3.310546875e-05,
      "model_forward_time": 0.11485528945922852,
      "step": 5424
    },
    {
      "epoch": 3.310546875e-05,
      "step": 5424,
      "training_step_time": 0.4604148864746094
    },
    {
      "epoch": 3.3111572265625e-05,
      "model_forward_time": 0.11421942710876465,
      "step": 5425
    },
    {
      "epoch": 3.3111572265625e-05,
      "step": 5425,
      "training_step_time": 0.5014595985412598
    },
    {
      "epoch": 3.311767578125e-05,
      "model_forward_time": 0.1144404411315918,
      "step": 5426
    },
    {
      "epoch": 3.311767578125e-05,
      "step": 5426,
      "training_step_time": 0.4119603633880615
    },
    {
      "epoch": 3.3123779296875e-05,
      "model_forward_time": 0.1147150993347168,
      "step": 5427
    },
    {
      "epoch": 3.3123779296875e-05,
      "step": 5427,
      "training_step_time": 0.4124302864074707
    },
    {
      "epoch": 3.31298828125e-05,
      "model_forward_time": 0.11488938331604004,
      "step": 5428
    },
    {
      "epoch": 3.31298828125e-05,
      "step": 5428,
      "training_step_time": 0.39113879203796387
    },
    {
      "epoch": 3.3135986328125e-05,
      "model_forward_time": 0.11454510688781738,
      "step": 5429
    },
    {
      "epoch": 3.3135986328125e-05,
      "step": 5429,
      "training_step_time": 0.3870875835418701
    },
    {
      "epoch": 3.314208984375e-05,
      "grad_norm": 0.3881751298904419,
      "learning_rate": 9.955223173313931e-05,
      "loss": 0.0898,
      "step": 5430
    },
    {
      "epoch": 3.314208984375e-05,
      "model_forward_time": 0.11486649513244629,
      "step": 5430
    },
    {
      "epoch": 3.314208984375e-05,
      "step": 5430,
      "training_step_time": 0.3894364833831787
    },
    {
      "epoch": 3.3148193359375e-05,
      "model_forward_time": 0.11526322364807129,
      "step": 5431
    },
    {
      "epoch": 3.3148193359375e-05,
      "step": 5431,
      "training_step_time": 0.4032766819000244
    },
    {
      "epoch": 3.3154296875e-05,
      "model_forward_time": 0.1150670051574707,
      "step": 5432
    },
    {
      "epoch": 3.3154296875e-05,
      "step": 5432,
      "training_step_time": 0.6534926891326904
    },
    {
      "epoch": 3.3160400390625e-05,
      "model_forward_time": 0.11475348472595215,
      "step": 5433
    },
    {
      "epoch": 3.3160400390625e-05,
      "step": 5433,
      "training_step_time": 0.3880751132965088
    },
    {
      "epoch": 3.316650390625e-05,
      "model_forward_time": 0.11520624160766602,
      "step": 5434
    },
    {
      "epoch": 3.316650390625e-05,
      "step": 5434,
      "training_step_time": 0.382047176361084
    },
    {
      "epoch": 3.3172607421875e-05,
      "model_forward_time": 0.11512112617492676,
      "step": 5435
    },
    {
      "epoch": 3.3172607421875e-05,
      "step": 5435,
      "training_step_time": 0.395845890045166
    },
    {
      "epoch": 3.31787109375e-05,
      "model_forward_time": 0.11519813537597656,
      "step": 5436
    },
    {
      "epoch": 3.31787109375e-05,
      "step": 5436,
      "training_step_time": 0.4807310104370117
    },
    {
      "epoch": 3.3184814453125e-05,
      "model_forward_time": 0.11622786521911621,
      "step": 5437
    },
    {
      "epoch": 3.3184814453125e-05,
      "step": 5437,
      "training_step_time": 0.48986101150512695
    },
    {
      "epoch": 3.319091796875e-05,
      "model_forward_time": 0.11526226997375488,
      "step": 5438
    },
    {
      "epoch": 3.319091796875e-05,
      "step": 5438,
      "training_step_time": 0.6230220794677734
    },
    {
      "epoch": 3.3197021484375e-05,
      "model_forward_time": 0.11445474624633789,
      "step": 5439
    },
    {
      "epoch": 3.3197021484375e-05,
      "step": 5439,
      "training_step_time": 0.41591548919677734
    },
    {
      "epoch": 3.3203125e-05,
      "grad_norm": 0.35306504368782043,
      "learning_rate": 9.9548544381931e-05,
      "loss": 0.0859,
      "step": 5440
    },
    {
      "epoch": 3.3203125e-05,
      "model_forward_time": 0.11449146270751953,
      "step": 5440
    },
    {
      "epoch": 3.3203125e-05,
      "step": 5440,
      "training_step_time": 0.40688347816467285
    },
    {
      "epoch": 3.3209228515625e-05,
      "model_forward_time": 0.11493325233459473,
      "step": 5441
    },
    {
      "epoch": 3.3209228515625e-05,
      "step": 5441,
      "training_step_time": 0.46887779235839844
    },
    {
      "epoch": 3.321533203125e-05,
      "model_forward_time": 0.11612439155578613,
      "step": 5442
    },
    {
      "epoch": 3.321533203125e-05,
      "step": 5442,
      "training_step_time": 0.3951907157897949
    },
    {
      "epoch": 3.3221435546875e-05,
      "model_forward_time": 0.11437559127807617,
      "step": 5443
    },
    {
      "epoch": 3.3221435546875e-05,
      "step": 5443,
      "training_step_time": 0.3948171138763428
    },
    {
      "epoch": 3.32275390625e-05,
      "model_forward_time": 0.11564087867736816,
      "step": 5444
    },
    {
      "epoch": 3.32275390625e-05,
      "step": 5444,
      "training_step_time": 0.48946309089660645
    },
    {
      "epoch": 3.3233642578125e-05,
      "model_forward_time": 0.11512541770935059,
      "step": 5445
    },
    {
      "epoch": 3.3233642578125e-05,
      "step": 5445,
      "training_step_time": 0.4058043956756592
    },
    {
      "epoch": 3.323974609375e-05,
      "model_forward_time": 0.1148836612701416,
      "step": 5446
    },
    {
      "epoch": 3.323974609375e-05,
      "step": 5446,
      "training_step_time": 0.3969097137451172
    },
    {
      "epoch": 3.3245849609375e-05,
      "model_forward_time": 0.11543846130371094,
      "step": 5447
    },
    {
      "epoch": 3.3245849609375e-05,
      "step": 5447,
      "training_step_time": 0.3967165946960449
    },
    {
      "epoch": 3.3251953125e-05,
      "model_forward_time": 0.11574339866638184,
      "step": 5448
    },
    {
      "epoch": 3.3251953125e-05,
      "step": 5448,
      "training_step_time": 0.38595008850097656
    },
    {
      "epoch": 3.3258056640625e-05,
      "model_forward_time": 0.11527800559997559,
      "step": 5449
    },
    {
      "epoch": 3.3258056640625e-05,
      "step": 5449,
      "training_step_time": 0.40358686447143555
    },
    {
      "epoch": 3.326416015625e-05,
      "grad_norm": 0.4014861583709717,
      "learning_rate": 9.954484197918315e-05,
      "loss": 0.0958,
      "step": 5450
    },
    {
      "epoch": 3.326416015625e-05,
      "model_forward_time": 0.1161198616027832,
      "step": 5450
    },
    {
      "epoch": 3.326416015625e-05,
      "step": 5450,
      "training_step_time": 0.6473512649536133
    },
    {
      "epoch": 3.3270263671875e-05,
      "model_forward_time": 0.11520600318908691,
      "step": 5451
    },
    {
      "epoch": 3.3270263671875e-05,
      "step": 5451,
      "training_step_time": 0.4961884021759033
    },
    {
      "epoch": 3.32763671875e-05,
      "model_forward_time": 0.11453604698181152,
      "step": 5452
    },
    {
      "epoch": 3.32763671875e-05,
      "step": 5452,
      "training_step_time": 0.44370555877685547
    },
    {
      "epoch": 3.3282470703125e-05,
      "model_forward_time": 0.11484265327453613,
      "step": 5453
    },
    {
      "epoch": 3.3282470703125e-05,
      "step": 5453,
      "training_step_time": 0.41938257217407227
    },
    {
      "epoch": 3.328857421875e-05,
      "model_forward_time": 0.1150662899017334,
      "step": 5454
    },
    {
      "epoch": 3.328857421875e-05,
      "step": 5454,
      "training_step_time": 0.43308472633361816
    },
    {
      "epoch": 3.3294677734375e-05,
      "model_forward_time": 0.11464643478393555,
      "step": 5455
    },
    {
      "epoch": 3.3294677734375e-05,
      "step": 5455,
      "training_step_time": 0.4796288013458252
    },
    {
      "epoch": 3.330078125e-05,
      "model_forward_time": 0.11485671997070312,
      "step": 5456
    },
    {
      "epoch": 3.330078125e-05,
      "step": 5456,
      "training_step_time": 0.4329509735107422
    },
    {
      "epoch": 3.3306884765625e-05,
      "model_forward_time": 0.11460065841674805,
      "step": 5457
    },
    {
      "epoch": 3.3306884765625e-05,
      "step": 5457,
      "training_step_time": 0.40778064727783203
    },
    {
      "epoch": 3.331298828125e-05,
      "model_forward_time": 0.11447000503540039,
      "step": 5458
    },
    {
      "epoch": 3.331298828125e-05,
      "step": 5458,
      "training_step_time": 0.42105674743652344
    },
    {
      "epoch": 3.3319091796875e-05,
      "model_forward_time": 0.1147756576538086,
      "step": 5459
    },
    {
      "epoch": 3.3319091796875e-05,
      "step": 5459,
      "training_step_time": 0.3924121856689453
    },
    {
      "epoch": 3.33251953125e-05,
      "grad_norm": 0.19162505865097046,
      "learning_rate": 9.954112452602045e-05,
      "loss": 0.0828,
      "step": 5460
    },
    {
      "epoch": 3.33251953125e-05,
      "model_forward_time": 0.1152501106262207,
      "step": 5460
    },
    {
      "epoch": 3.33251953125e-05,
      "step": 5460,
      "training_step_time": 0.3979203701019287
    },
    {
      "epoch": 3.3331298828125e-05,
      "model_forward_time": 0.11516261100769043,
      "step": 5461
    },
    {
      "epoch": 3.3331298828125e-05,
      "step": 5461,
      "training_step_time": 0.3931002616882324
    },
    {
      "epoch": 3.333740234375e-05,
      "model_forward_time": 0.11507129669189453,
      "step": 5462
    },
    {
      "epoch": 3.333740234375e-05,
      "step": 5462,
      "training_step_time": 0.7080161571502686
    },
    {
      "epoch": 3.3343505859375e-05,
      "model_forward_time": 0.11507034301757812,
      "step": 5463
    },
    {
      "epoch": 3.3343505859375e-05,
      "step": 5463,
      "training_step_time": 0.40023016929626465
    },
    {
      "epoch": 3.3349609375e-05,
      "model_forward_time": 0.11512970924377441,
      "step": 5464
    },
    {
      "epoch": 3.3349609375e-05,
      "step": 5464,
      "training_step_time": 0.42345643043518066
    },
    {
      "epoch": 3.3355712890625e-05,
      "model_forward_time": 0.11471438407897949,
      "step": 5465
    },
    {
      "epoch": 3.3355712890625e-05,
      "step": 5465,
      "training_step_time": 0.4620509147644043
    },
    {
      "epoch": 3.336181640625e-05,
      "model_forward_time": 0.1148529052734375,
      "step": 5466
    },
    {
      "epoch": 3.336181640625e-05,
      "step": 5466,
      "training_step_time": 0.45222973823547363
    },
    {
      "epoch": 3.3367919921875e-05,
      "model_forward_time": 0.11529350280761719,
      "step": 5467
    },
    {
      "epoch": 3.3367919921875e-05,
      "step": 5467,
      "training_step_time": 0.38748788833618164
    },
    {
      "epoch": 3.33740234375e-05,
      "model_forward_time": 0.11452245712280273,
      "step": 5468
    },
    {
      "epoch": 3.33740234375e-05,
      "step": 5468,
      "training_step_time": 0.4636697769165039
    },
    {
      "epoch": 3.3380126953125e-05,
      "model_forward_time": 0.11483144760131836,
      "step": 5469
    },
    {
      "epoch": 3.3380126953125e-05,
      "step": 5469,
      "training_step_time": 0.5103662014007568
    },
    {
      "epoch": 3.338623046875e-05,
      "grad_norm": 0.23485657572746277,
      "learning_rate": 9.953739202357218e-05,
      "loss": 0.0858,
      "step": 5470
    },
    {
      "epoch": 3.338623046875e-05,
      "model_forward_time": 0.11466860771179199,
      "step": 5470
    },
    {
      "epoch": 3.338623046875e-05,
      "step": 5470,
      "training_step_time": 0.4516632556915283
    },
    {
      "epoch": 3.3392333984375e-05,
      "model_forward_time": 0.11468267440795898,
      "step": 5471
    },
    {
      "epoch": 3.3392333984375e-05,
      "step": 5471,
      "training_step_time": 0.4569852352142334
    },
    {
      "epoch": 3.33984375e-05,
      "model_forward_time": 0.11554503440856934,
      "step": 5472
    },
    {
      "epoch": 3.33984375e-05,
      "step": 5472,
      "training_step_time": 0.38379549980163574
    },
    {
      "epoch": 3.3404541015625e-05,
      "model_forward_time": 0.11512446403503418,
      "step": 5473
    },
    {
      "epoch": 3.3404541015625e-05,
      "step": 5473,
      "training_step_time": 0.3652350902557373
    },
    {
      "epoch": 3.341064453125e-05,
      "model_forward_time": 0.11462640762329102,
      "step": 5474
    },
    {
      "epoch": 3.341064453125e-05,
      "step": 5474,
      "training_step_time": 0.5465133190155029
    },
    {
      "epoch": 3.3416748046875e-05,
      "model_forward_time": 0.11436963081359863,
      "step": 5475
    },
    {
      "epoch": 3.3416748046875e-05,
      "step": 5475,
      "training_step_time": 0.3868095874786377
    },
    {
      "epoch": 3.34228515625e-05,
      "model_forward_time": 0.11509823799133301,
      "step": 5476
    },
    {
      "epoch": 3.34228515625e-05,
      "step": 5476,
      "training_step_time": 0.38501453399658203
    },
    {
      "epoch": 3.3428955078125e-05,
      "model_forward_time": 0.11504864692687988,
      "step": 5477
    },
    {
      "epoch": 3.3428955078125e-05,
      "step": 5477,
      "training_step_time": 0.38577699661254883
    },
    {
      "epoch": 3.343505859375e-05,
      "model_forward_time": 0.11507964134216309,
      "step": 5478
    },
    {
      "epoch": 3.343505859375e-05,
      "step": 5478,
      "training_step_time": 0.3733644485473633
    },
    {
      "epoch": 3.3441162109375e-05,
      "model_forward_time": 0.11485791206359863,
      "step": 5479
    },
    {
      "epoch": 3.3441162109375e-05,
      "step": 5479,
      "training_step_time": 0.4879770278930664
    },
    {
      "epoch": 3.3447265625e-05,
      "grad_norm": 0.1780766099691391,
      "learning_rate": 9.953364447297219e-05,
      "loss": 0.0836,
      "step": 5480
    },
    {
      "epoch": 3.3447265625e-05,
      "model_forward_time": 0.11492538452148438,
      "step": 5480
    },
    {
      "epoch": 3.3447265625e-05,
      "step": 5480,
      "training_step_time": 0.5649533271789551
    },
    {
      "epoch": 3.3453369140625e-05,
      "model_forward_time": 0.11437606811523438,
      "step": 5481
    },
    {
      "epoch": 3.3453369140625e-05,
      "step": 5481,
      "training_step_time": 0.40009593963623047
    },
    {
      "epoch": 3.345947265625e-05,
      "model_forward_time": 0.11493182182312012,
      "step": 5482
    },
    {
      "epoch": 3.345947265625e-05,
      "step": 5482,
      "training_step_time": 0.4600393772125244
    },
    {
      "epoch": 3.3465576171875e-05,
      "model_forward_time": 0.1147756576538086,
      "step": 5483
    },
    {
      "epoch": 3.3465576171875e-05,
      "step": 5483,
      "training_step_time": 0.41289758682250977
    },
    {
      "epoch": 3.34716796875e-05,
      "model_forward_time": 0.11512613296508789,
      "step": 5484
    },
    {
      "epoch": 3.34716796875e-05,
      "step": 5484,
      "training_step_time": 0.41098999977111816
    },
    {
      "epoch": 3.3477783203125e-05,
      "model_forward_time": 0.11477112770080566,
      "step": 5485
    },
    {
      "epoch": 3.3477783203125e-05,
      "step": 5485,
      "training_step_time": 0.3923811912536621
    },
    {
      "epoch": 3.348388671875e-05,
      "model_forward_time": 0.11549854278564453,
      "step": 5486
    },
    {
      "epoch": 3.348388671875e-05,
      "step": 5486,
      "training_step_time": 0.40743374824523926
    },
    {
      "epoch": 3.3489990234375e-05,
      "model_forward_time": 0.11521482467651367,
      "step": 5487
    },
    {
      "epoch": 3.3489990234375e-05,
      "step": 5487,
      "training_step_time": 0.3914341926574707
    },
    {
      "epoch": 3.349609375e-05,
      "model_forward_time": 0.11511611938476562,
      "step": 5488
    },
    {
      "epoch": 3.349609375e-05,
      "step": 5488,
      "training_step_time": 0.3937652111053467
    },
    {
      "epoch": 3.3502197265625e-05,
      "model_forward_time": 0.11508440971374512,
      "step": 5489
    },
    {
      "epoch": 3.3502197265625e-05,
      "step": 5489,
      "training_step_time": 0.39177465438842773
    },
    {
      "epoch": 3.350830078125e-05,
      "grad_norm": 0.28228649497032166,
      "learning_rate": 9.952988187535886e-05,
      "loss": 0.0817,
      "step": 5490
    },
    {
      "epoch": 3.350830078125e-05,
      "model_forward_time": 0.11560702323913574,
      "step": 5490
    },
    {
      "epoch": 3.350830078125e-05,
      "step": 5490,
      "training_step_time": 0.401731014251709
    },
    {
      "epoch": 3.3514404296875e-05,
      "model_forward_time": 0.11608004570007324,
      "step": 5491
    },
    {
      "epoch": 3.3514404296875e-05,
      "step": 5491,
      "training_step_time": 0.39912939071655273
    },
    {
      "epoch": 3.35205078125e-05,
      "model_forward_time": 0.11577534675598145,
      "step": 5492
    },
    {
      "epoch": 3.35205078125e-05,
      "step": 5492,
      "training_step_time": 0.5638213157653809
    },
    {
      "epoch": 3.3526611328125e-05,
      "model_forward_time": 0.11510848999023438,
      "step": 5493
    },
    {
      "epoch": 3.3526611328125e-05,
      "step": 5493,
      "training_step_time": 0.40889883041381836
    },
    {
      "epoch": 3.353271484375e-05,
      "model_forward_time": 0.11526298522949219,
      "step": 5494
    },
    {
      "epoch": 3.353271484375e-05,
      "step": 5494,
      "training_step_time": 0.49020910263061523
    },
    {
      "epoch": 3.3538818359375e-05,
      "model_forward_time": 0.11521553993225098,
      "step": 5495
    },
    {
      "epoch": 3.3538818359375e-05,
      "step": 5495,
      "training_step_time": 0.4583315849304199
    },
    {
      "epoch": 3.3544921875e-05,
      "model_forward_time": 0.11543059349060059,
      "step": 5496
    },
    {
      "epoch": 3.3544921875e-05,
      "step": 5496,
      "training_step_time": 0.43517613410949707
    },
    {
      "epoch": 3.3551025390625e-05,
      "model_forward_time": 0.11392951011657715,
      "step": 5497
    },
    {
      "epoch": 3.3551025390625e-05,
      "step": 5497,
      "training_step_time": 0.43408679962158203
    },
    {
      "epoch": 3.355712890625e-05,
      "model_forward_time": 0.11474847793579102,
      "step": 5498
    },
    {
      "epoch": 3.355712890625e-05,
      "step": 5498,
      "training_step_time": 0.38762521743774414
    },
    {
      "epoch": 3.3563232421875e-05,
      "model_forward_time": 0.11510682106018066,
      "step": 5499
    },
    {
      "epoch": 3.3563232421875e-05,
      "step": 5499,
      "training_step_time": 0.39330267906188965
    },
    {
      "epoch": 3.35693359375e-05,
      "grad_norm": 0.2894534766674042,
      "learning_rate": 9.952610423187516e-05,
      "loss": 0.0809,
      "step": 5500
    },
    {
      "epoch": 3.35693359375e-05,
      "model_forward_time": 0.11472797393798828,
      "step": 5500
    },
    {
      "epoch": 3.35693359375e-05,
      "step": 5500,
      "training_step_time": 0.37598371505737305
    },
    {
      "epoch": 3.3575439453125e-05,
      "model_forward_time": 0.11503863334655762,
      "step": 5501
    },
    {
      "epoch": 3.3575439453125e-05,
      "step": 5501,
      "training_step_time": 0.396470308303833
    },
    {
      "epoch": 3.358154296875e-05,
      "model_forward_time": 0.11532139778137207,
      "step": 5502
    },
    {
      "epoch": 3.358154296875e-05,
      "step": 5502,
      "training_step_time": 0.3950927257537842
    },
    {
      "epoch": 3.3587646484375e-05,
      "model_forward_time": 0.11476421356201172,
      "step": 5503
    },
    {
      "epoch": 3.3587646484375e-05,
      "step": 5503,
      "training_step_time": 0.39397382736206055
    },
    {
      "epoch": 3.359375e-05,
      "model_forward_time": 0.11497378349304199,
      "step": 5504
    },
    {
      "epoch": 3.359375e-05,
      "step": 5504,
      "training_step_time": 0.5912642478942871
    },
    {
      "epoch": 3.3599853515625e-05,
      "model_forward_time": 0.11486220359802246,
      "step": 5505
    },
    {
      "epoch": 3.3599853515625e-05,
      "step": 5505,
      "training_step_time": 0.3956270217895508
    },
    {
      "epoch": 3.360595703125e-05,
      "model_forward_time": 0.11497926712036133,
      "step": 5506
    },
    {
      "epoch": 3.360595703125e-05,
      "step": 5506,
      "training_step_time": 0.3861103057861328
    },
    {
      "epoch": 3.3612060546875e-05,
      "model_forward_time": 0.11558985710144043,
      "step": 5507
    },
    {
      "epoch": 3.3612060546875e-05,
      "step": 5507,
      "training_step_time": 0.37441515922546387
    },
    {
      "epoch": 3.36181640625e-05,
      "model_forward_time": 0.11490941047668457,
      "step": 5508
    },
    {
      "epoch": 3.36181640625e-05,
      "step": 5508,
      "training_step_time": 0.48035717010498047
    },
    {
      "epoch": 3.3624267578125e-05,
      "model_forward_time": 0.11577558517456055,
      "step": 5509
    },
    {
      "epoch": 3.3624267578125e-05,
      "step": 5509,
      "training_step_time": 0.49171996116638184
    },
    {
      "epoch": 3.363037109375e-05,
      "grad_norm": 0.3628395199775696,
      "learning_rate": 9.952231154366868e-05,
      "loss": 0.0822,
      "step": 5510
    },
    {
      "epoch": 3.363037109375e-05,
      "model_forward_time": 0.11457371711730957,
      "step": 5510
    },
    {
      "epoch": 3.363037109375e-05,
      "step": 5510,
      "training_step_time": 0.5486021041870117
    },
    {
      "epoch": 3.3636474609375e-05,
      "model_forward_time": 0.11412358283996582,
      "step": 5511
    },
    {
      "epoch": 3.3636474609375e-05,
      "step": 5511,
      "training_step_time": 0.46624326705932617
    },
    {
      "epoch": 3.3642578125e-05,
      "model_forward_time": 0.11453580856323242,
      "step": 5512
    },
    {
      "epoch": 3.3642578125e-05,
      "step": 5512,
      "training_step_time": 0.4173133373260498
    },
    {
      "epoch": 3.3648681640625e-05,
      "model_forward_time": 0.11347389221191406,
      "step": 5513
    },
    {
      "epoch": 3.3648681640625e-05,
      "step": 5513,
      "training_step_time": 0.39122724533081055
    },
    {
      "epoch": 3.365478515625e-05,
      "model_forward_time": 0.11452102661132812,
      "step": 5514
    },
    {
      "epoch": 3.365478515625e-05,
      "step": 5514,
      "training_step_time": 0.38417482376098633
    },
    {
      "epoch": 3.3660888671875e-05,
      "model_forward_time": 0.11486220359802246,
      "step": 5515
    },
    {
      "epoch": 3.3660888671875e-05,
      "step": 5515,
      "training_step_time": 0.38733506202697754
    },
    {
      "epoch": 3.36669921875e-05,
      "model_forward_time": 0.11597895622253418,
      "step": 5516
    },
    {
      "epoch": 3.36669921875e-05,
      "step": 5516,
      "training_step_time": 0.63602614402771
    },
    {
      "epoch": 3.3673095703125e-05,
      "model_forward_time": 0.11469268798828125,
      "step": 5517
    },
    {
      "epoch": 3.3673095703125e-05,
      "step": 5517,
      "training_step_time": 0.3925364017486572
    },
    {
      "epoch": 3.367919921875e-05,
      "model_forward_time": 0.11538481712341309,
      "step": 5518
    },
    {
      "epoch": 3.367919921875e-05,
      "step": 5518,
      "training_step_time": 0.3838188648223877
    },
    {
      "epoch": 3.3685302734375e-05,
      "model_forward_time": 0.11492919921875,
      "step": 5519
    },
    {
      "epoch": 3.3685302734375e-05,
      "step": 5519,
      "training_step_time": 0.386401891708374
    },
    {
      "epoch": 3.369140625e-05,
      "grad_norm": 0.2567913830280304,
      "learning_rate": 9.95185038118915e-05,
      "loss": 0.0781,
      "step": 5520
    },
    {
      "epoch": 3.369140625e-05,
      "model_forward_time": 0.11420321464538574,
      "step": 5520
    },
    {
      "epoch": 3.369140625e-05,
      "step": 5520,
      "training_step_time": 0.3880186080932617
    },
    {
      "epoch": 3.3697509765625e-05,
      "model_forward_time": 0.11485075950622559,
      "step": 5521
    },
    {
      "epoch": 3.3697509765625e-05,
      "step": 5521,
      "training_step_time": 0.378201961517334
    },
    {
      "epoch": 3.370361328125e-05,
      "model_forward_time": 0.11574935913085938,
      "step": 5522
    },
    {
      "epoch": 3.370361328125e-05,
      "step": 5522,
      "training_step_time": 0.760887622833252
    },
    {
      "epoch": 3.3709716796875e-05,
      "model_forward_time": 0.11510610580444336,
      "step": 5523
    },
    {
      "epoch": 3.3709716796875e-05,
      "step": 5523,
      "training_step_time": 0.41564106941223145
    },
    {
      "epoch": 3.37158203125e-05,
      "model_forward_time": 0.11409592628479004,
      "step": 5524
    },
    {
      "epoch": 3.37158203125e-05,
      "step": 5524,
      "training_step_time": 0.4676077365875244
    },
    {
      "epoch": 3.3721923828125e-05,
      "model_forward_time": 0.11455035209655762,
      "step": 5525
    },
    {
      "epoch": 3.3721923828125e-05,
      "step": 5525,
      "training_step_time": 0.49942588806152344
    },
    {
      "epoch": 3.372802734375e-05,
      "model_forward_time": 0.11460328102111816,
      "step": 5526
    },
    {
      "epoch": 3.372802734375e-05,
      "step": 5526,
      "training_step_time": 0.38439393043518066
    },
    {
      "epoch": 3.3734130859375e-05,
      "model_forward_time": 0.11425924301147461,
      "step": 5527
    },
    {
      "epoch": 3.3734130859375e-05,
      "step": 5527,
      "training_step_time": 0.38358592987060547
    },
    {
      "epoch": 3.3740234375e-05,
      "model_forward_time": 0.11558246612548828,
      "step": 5528
    },
    {
      "epoch": 3.3740234375e-05,
      "step": 5528,
      "training_step_time": 0.38487720489501953
    },
    {
      "epoch": 3.3746337890625e-05,
      "model_forward_time": 0.11486053466796875,
      "step": 5529
    },
    {
      "epoch": 3.3746337890625e-05,
      "step": 5529,
      "training_step_time": 0.3913853168487549
    },
    {
      "epoch": 3.375244140625e-05,
      "grad_norm": 0.2663637399673462,
      "learning_rate": 9.951468103770032e-05,
      "loss": 0.0939,
      "step": 5530
    },
    {
      "epoch": 3.375244140625e-05,
      "model_forward_time": 0.11489391326904297,
      "step": 5530
    },
    {
      "epoch": 3.375244140625e-05,
      "step": 5530,
      "training_step_time": 0.3807969093322754
    },
    {
      "epoch": 3.3758544921875e-05,
      "model_forward_time": 0.11524271965026855,
      "step": 5531
    },
    {
      "epoch": 3.3758544921875e-05,
      "step": 5531,
      "training_step_time": 0.3979370594024658
    },
    {
      "epoch": 3.37646484375e-05,
      "model_forward_time": 0.11522293090820312,
      "step": 5532
    },
    {
      "epoch": 3.37646484375e-05,
      "step": 5532,
      "training_step_time": 0.3974297046661377
    },
    {
      "epoch": 3.3770751953125e-05,
      "model_forward_time": 0.11493229866027832,
      "step": 5533
    },
    {
      "epoch": 3.3770751953125e-05,
      "step": 5533,
      "training_step_time": 0.4119422435760498
    },
    {
      "epoch": 3.377685546875e-05,
      "model_forward_time": 0.11536383628845215,
      "step": 5534
    },
    {
      "epoch": 3.377685546875e-05,
      "step": 5534,
      "training_step_time": 0.5411005020141602
    },
    {
      "epoch": 3.3782958984375e-05,
      "model_forward_time": 0.1164238452911377,
      "step": 5535
    },
    {
      "epoch": 3.3782958984375e-05,
      "step": 5535,
      "training_step_time": 0.36932992935180664
    },
    {
      "epoch": 3.37890625e-05,
      "model_forward_time": 0.1148979663848877,
      "step": 5536
    },
    {
      "epoch": 3.37890625e-05,
      "step": 5536,
      "training_step_time": 0.4942312240600586
    },
    {
      "epoch": 3.3795166015625e-05,
      "model_forward_time": 0.11519002914428711,
      "step": 5537
    },
    {
      "epoch": 3.3795166015625e-05,
      "step": 5537,
      "training_step_time": 0.48441219329833984
    },
    {
      "epoch": 3.380126953125e-05,
      "model_forward_time": 0.11507797241210938,
      "step": 5538
    },
    {
      "epoch": 3.380126953125e-05,
      "step": 5538,
      "training_step_time": 0.4451780319213867
    },
    {
      "epoch": 3.3807373046875e-05,
      "model_forward_time": 0.11591768264770508,
      "step": 5539
    },
    {
      "epoch": 3.3807373046875e-05,
      "step": 5539,
      "training_step_time": 0.42331981658935547
    },
    {
      "epoch": 3.38134765625e-05,
      "grad_norm": 0.37027305364608765,
      "learning_rate": 9.951084322225641e-05,
      "loss": 0.0832,
      "step": 5540
    },
    {
      "epoch": 3.38134765625e-05,
      "model_forward_time": 0.1144874095916748,
      "step": 5540
    },
    {
      "epoch": 3.38134765625e-05,
      "step": 5540,
      "training_step_time": 0.4127082824707031
    },
    {
      "epoch": 3.3819580078125e-05,
      "model_forward_time": 0.11488676071166992,
      "step": 5541
    },
    {
      "epoch": 3.3819580078125e-05,
      "step": 5541,
      "training_step_time": 0.39014124870300293
    },
    {
      "epoch": 3.382568359375e-05,
      "model_forward_time": 0.11601090431213379,
      "step": 5542
    },
    {
      "epoch": 3.382568359375e-05,
      "step": 5542,
      "training_step_time": 0.38746166229248047
    },
    {
      "epoch": 3.3831787109375e-05,
      "model_forward_time": 0.1152193546295166,
      "step": 5543
    },
    {
      "epoch": 3.3831787109375e-05,
      "step": 5543,
      "training_step_time": 0.38491320610046387
    },
    {
      "epoch": 3.3837890625e-05,
      "model_forward_time": 0.11485528945922852,
      "step": 5544
    },
    {
      "epoch": 3.3837890625e-05,
      "step": 5544,
      "training_step_time": 0.38669276237487793
    },
    {
      "epoch": 3.3843994140625e-05,
      "model_forward_time": 0.1148688793182373,
      "step": 5545
    },
    {
      "epoch": 3.3843994140625e-05,
      "step": 5545,
      "training_step_time": 0.40038323402404785
    },
    {
      "epoch": 3.385009765625e-05,
      "model_forward_time": 0.11521339416503906,
      "step": 5546
    },
    {
      "epoch": 3.385009765625e-05,
      "step": 5546,
      "training_step_time": 0.44939613342285156
    },
    {
      "epoch": 3.3856201171875e-05,
      "model_forward_time": 0.11534285545349121,
      "step": 5547
    },
    {
      "epoch": 3.3856201171875e-05,
      "step": 5547,
      "training_step_time": 0.3945004940032959
    },
    {
      "epoch": 3.38623046875e-05,
      "model_forward_time": 0.1163492202758789,
      "step": 5548
    },
    {
      "epoch": 3.38623046875e-05,
      "step": 5548,
      "training_step_time": 0.3948984146118164
    },
    {
      "epoch": 3.3868408203125e-05,
      "model_forward_time": 0.11568856239318848,
      "step": 5549
    },
    {
      "epoch": 3.3868408203125e-05,
      "step": 5549,
      "training_step_time": 0.39960193634033203
    },
    {
      "epoch": 3.387451171875e-05,
      "grad_norm": 0.29956814646720886,
      "learning_rate": 9.950699036672559e-05,
      "loss": 0.0792,
      "step": 5550
    },
    {
      "epoch": 3.387451171875e-05,
      "model_forward_time": 0.11595368385314941,
      "step": 5550
    },
    {
      "epoch": 3.387451171875e-05,
      "step": 5550,
      "training_step_time": 0.4411923885345459
    },
    {
      "epoch": 3.3880615234375e-05,
      "model_forward_time": 0.11594271659851074,
      "step": 5551
    },
    {
      "epoch": 3.3880615234375e-05,
      "step": 5551,
      "training_step_time": 0.46652936935424805
    },
    {
      "epoch": 3.388671875e-05,
      "model_forward_time": 0.11578941345214844,
      "step": 5552
    },
    {
      "epoch": 3.388671875e-05,
      "step": 5552,
      "training_step_time": 0.6314630508422852
    },
    {
      "epoch": 3.3892822265625e-05,
      "model_forward_time": 0.11549973487854004,
      "step": 5553
    },
    {
      "epoch": 3.3892822265625e-05,
      "step": 5553,
      "training_step_time": 0.48429346084594727
    },
    {
      "epoch": 3.389892578125e-05,
      "model_forward_time": 0.11509418487548828,
      "step": 5554
    },
    {
      "epoch": 3.389892578125e-05,
      "step": 5554,
      "training_step_time": 0.422853946685791
    },
    {
      "epoch": 3.3905029296875e-05,
      "model_forward_time": 0.1139838695526123,
      "step": 5555
    },
    {
      "epoch": 3.3905029296875e-05,
      "step": 5555,
      "training_step_time": 0.3879718780517578
    },
    {
      "epoch": 3.39111328125e-05,
      "model_forward_time": 0.11434507369995117,
      "step": 5556
    },
    {
      "epoch": 3.39111328125e-05,
      "step": 5556,
      "training_step_time": 0.4097733497619629
    },
    {
      "epoch": 3.3917236328125e-05,
      "model_forward_time": 0.1147451400756836,
      "step": 5557
    },
    {
      "epoch": 3.3917236328125e-05,
      "step": 5557,
      "training_step_time": 0.3959527015686035
    },
    {
      "epoch": 3.392333984375e-05,
      "model_forward_time": 0.11546921730041504,
      "step": 5558
    },
    {
      "epoch": 3.392333984375e-05,
      "step": 5558,
      "training_step_time": 0.44823384284973145
    },
    {
      "epoch": 3.3929443359375e-05,
      "model_forward_time": 0.11573171615600586,
      "step": 5559
    },
    {
      "epoch": 3.3929443359375e-05,
      "step": 5559,
      "training_step_time": 0.3897411823272705
    },
    {
      "epoch": 3.3935546875e-05,
      "grad_norm": 0.2884554862976074,
      "learning_rate": 9.950312247227825e-05,
      "loss": 0.0881,
      "step": 5560
    },
    {
      "epoch": 3.3935546875e-05,
      "model_forward_time": 0.11438608169555664,
      "step": 5560
    },
    {
      "epoch": 3.3935546875e-05,
      "step": 5560,
      "training_step_time": 0.3899571895599365
    },
    {
      "epoch": 3.3941650390625e-05,
      "model_forward_time": 0.11531996726989746,
      "step": 5561
    },
    {
      "epoch": 3.3941650390625e-05,
      "step": 5561,
      "training_step_time": 0.39384889602661133
    },
    {
      "epoch": 3.394775390625e-05,
      "model_forward_time": 0.11458897590637207,
      "step": 5562
    },
    {
      "epoch": 3.394775390625e-05,
      "step": 5562,
      "training_step_time": 0.3935072422027588
    },
    {
      "epoch": 3.3953857421875e-05,
      "model_forward_time": 0.11503243446350098,
      "step": 5563
    },
    {
      "epoch": 3.3953857421875e-05,
      "step": 5563,
      "training_step_time": 0.4123260974884033
    },
    {
      "epoch": 3.39599609375e-05,
      "model_forward_time": 0.1149137020111084,
      "step": 5564
    },
    {
      "epoch": 3.39599609375e-05,
      "step": 5564,
      "training_step_time": 0.5825889110565186
    },
    {
      "epoch": 3.3966064453125e-05,
      "model_forward_time": 0.11516356468200684,
      "step": 5565
    },
    {
      "epoch": 3.3966064453125e-05,
      "step": 5565,
      "training_step_time": 0.47748756408691406
    },
    {
      "epoch": 3.397216796875e-05,
      "model_forward_time": 0.11466646194458008,
      "step": 5566
    },
    {
      "epoch": 3.397216796875e-05,
      "step": 5566,
      "training_step_time": 0.48513197898864746
    },
    {
      "epoch": 3.3978271484375e-05,
      "model_forward_time": 0.11510276794433594,
      "step": 5567
    },
    {
      "epoch": 3.3978271484375e-05,
      "step": 5567,
      "training_step_time": 0.46882057189941406
    },
    {
      "epoch": 3.3984375e-05,
      "model_forward_time": 0.1143949031829834,
      "step": 5568
    },
    {
      "epoch": 3.3984375e-05,
      "step": 5568,
      "training_step_time": 0.4566471576690674
    },
    {
      "epoch": 3.3990478515625e-05,
      "model_forward_time": 0.11395740509033203,
      "step": 5569
    },
    {
      "epoch": 3.3990478515625e-05,
      "step": 5569,
      "training_step_time": 0.38892531394958496
    },
    {
      "epoch": 3.399658203125e-05,
      "grad_norm": 0.23591193556785583,
      "learning_rate": 9.949923954008935e-05,
      "loss": 0.0815,
      "step": 5570
    },
    {
      "epoch": 3.399658203125e-05,
      "model_forward_time": 0.11473917961120605,
      "step": 5570
    },
    {
      "epoch": 3.399658203125e-05,
      "step": 5570,
      "training_step_time": 0.3936269283294678
    },
    {
      "epoch": 3.4002685546875e-05,
      "model_forward_time": 0.11469340324401855,
      "step": 5571
    },
    {
      "epoch": 3.4002685546875e-05,
      "step": 5571,
      "training_step_time": 0.3753540515899658
    },
    {
      "epoch": 3.40087890625e-05,
      "model_forward_time": 0.11458683013916016,
      "step": 5572
    },
    {
      "epoch": 3.40087890625e-05,
      "step": 5572,
      "training_step_time": 0.3825960159301758
    },
    {
      "epoch": 3.4014892578125e-05,
      "model_forward_time": 0.11529850959777832,
      "step": 5573
    },
    {
      "epoch": 3.4014892578125e-05,
      "step": 5573,
      "training_step_time": 0.3903682231903076
    },
    {
      "epoch": 3.402099609375e-05,
      "model_forward_time": 0.11527824401855469,
      "step": 5574
    },
    {
      "epoch": 3.402099609375e-05,
      "step": 5574,
      "training_step_time": 0.4038417339324951
    },
    {
      "epoch": 3.4027099609375e-05,
      "model_forward_time": 0.11522245407104492,
      "step": 5575
    },
    {
      "epoch": 3.4027099609375e-05,
      "step": 5575,
      "training_step_time": 0.40032386779785156
    },
    {
      "epoch": 3.4033203125e-05,
      "model_forward_time": 0.11432814598083496,
      "step": 5576
    },
    {
      "epoch": 3.4033203125e-05,
      "step": 5576,
      "training_step_time": 0.4106011390686035
    },
    {
      "epoch": 3.4039306640625e-05,
      "model_forward_time": 0.11560535430908203,
      "step": 5577
    },
    {
      "epoch": 3.4039306640625e-05,
      "step": 5577,
      "training_step_time": 0.40967869758605957
    },
    {
      "epoch": 3.404541015625e-05,
      "model_forward_time": 0.11508417129516602,
      "step": 5578
    },
    {
      "epoch": 3.404541015625e-05,
      "step": 5578,
      "training_step_time": 0.36699414253234863
    },
    {
      "epoch": 3.4051513671875e-05,
      "model_forward_time": 0.1152033805847168,
      "step": 5579
    },
    {
      "epoch": 3.4051513671875e-05,
      "step": 5579,
      "training_step_time": 0.46158885955810547
    },
    {
      "epoch": 3.40576171875e-05,
      "grad_norm": 0.24947351217269897,
      "learning_rate": 9.949534157133844e-05,
      "loss": 0.0856,
      "step": 5580
    },
    {
      "epoch": 3.40576171875e-05,
      "model_forward_time": 0.11462903022766113,
      "step": 5580
    },
    {
      "epoch": 3.40576171875e-05,
      "step": 5580,
      "training_step_time": 0.3995521068572998
    },
    {
      "epoch": 3.4063720703125e-05,
      "model_forward_time": 0.11537623405456543,
      "step": 5581
    },
    {
      "epoch": 3.4063720703125e-05,
      "step": 5581,
      "training_step_time": 0.463165283203125
    },
    {
      "epoch": 3.406982421875e-05,
      "model_forward_time": 0.1145319938659668,
      "step": 5582
    },
    {
      "epoch": 3.406982421875e-05,
      "step": 5582,
      "training_step_time": 0.6315922737121582
    },
    {
      "epoch": 3.4075927734375e-05,
      "model_forward_time": 0.11439824104309082,
      "step": 5583
    },
    {
      "epoch": 3.4075927734375e-05,
      "step": 5583,
      "training_step_time": 0.3890833854675293
    },
    {
      "epoch": 3.408203125e-05,
      "model_forward_time": 0.11434507369995117,
      "step": 5584
    },
    {
      "epoch": 3.408203125e-05,
      "step": 5584,
      "training_step_time": 0.3773772716522217
    },
    {
      "epoch": 3.4088134765625e-05,
      "model_forward_time": 0.11495661735534668,
      "step": 5585
    },
    {
      "epoch": 3.4088134765625e-05,
      "step": 5585,
      "training_step_time": 0.3947713375091553
    },
    {
      "epoch": 3.409423828125e-05,
      "model_forward_time": 0.11475300788879395,
      "step": 5586
    },
    {
      "epoch": 3.409423828125e-05,
      "step": 5586,
      "training_step_time": 0.3923153877258301
    },
    {
      "epoch": 3.4100341796875e-05,
      "model_forward_time": 0.11459016799926758,
      "step": 5587
    },
    {
      "epoch": 3.4100341796875e-05,
      "step": 5587,
      "training_step_time": 0.391376256942749
    },
    {
      "epoch": 3.41064453125e-05,
      "model_forward_time": 0.11454319953918457,
      "step": 5588
    },
    {
      "epoch": 3.41064453125e-05,
      "step": 5588,
      "training_step_time": 0.6440417766571045
    },
    {
      "epoch": 3.4112548828125e-05,
      "model_forward_time": 0.11488080024719238,
      "step": 5589
    },
    {
      "epoch": 3.4112548828125e-05,
      "step": 5589,
      "training_step_time": 0.46262431144714355
    },
    {
      "epoch": 3.411865234375e-05,
      "grad_norm": 0.29944515228271484,
      "learning_rate": 9.949142856720961e-05,
      "loss": 0.0819,
      "step": 5590
    },
    {
      "epoch": 3.411865234375e-05,
      "model_forward_time": 0.11511921882629395,
      "step": 5590
    },
    {
      "epoch": 3.411865234375e-05,
      "step": 5590,
      "training_step_time": 0.42185020446777344
    },
    {
      "epoch": 3.4124755859375e-05,
      "model_forward_time": 0.1143503189086914,
      "step": 5591
    },
    {
      "epoch": 3.4124755859375e-05,
      "step": 5591,
      "training_step_time": 0.386852502822876
    },
    {
      "epoch": 3.4130859375e-05,
      "model_forward_time": 0.1150517463684082,
      "step": 5592
    },
    {
      "epoch": 3.4130859375e-05,
      "step": 5592,
      "training_step_time": 0.37683701515197754
    },
    {
      "epoch": 3.4136962890625e-05,
      "model_forward_time": 0.11434721946716309,
      "step": 5593
    },
    {
      "epoch": 3.4136962890625e-05,
      "step": 5593,
      "training_step_time": 0.45661497116088867
    },
    {
      "epoch": 3.414306640625e-05,
      "model_forward_time": 0.1153101921081543,
      "step": 5594
    },
    {
      "epoch": 3.414306640625e-05,
      "step": 5594,
      "training_step_time": 0.4862637519836426
    },
    {
      "epoch": 3.4149169921875e-05,
      "model_forward_time": 0.114593505859375,
      "step": 5595
    },
    {
      "epoch": 3.4149169921875e-05,
      "step": 5595,
      "training_step_time": 0.4517178535461426
    },
    {
      "epoch": 3.41552734375e-05,
      "model_forward_time": 0.1150517463684082,
      "step": 5596
    },
    {
      "epoch": 3.41552734375e-05,
      "step": 5596,
      "training_step_time": 0.4700582027435303
    },
    {
      "epoch": 3.4161376953125e-05,
      "model_forward_time": 0.11476683616638184,
      "step": 5597
    },
    {
      "epoch": 3.4161376953125e-05,
      "step": 5597,
      "training_step_time": 0.38016247749328613
    },
    {
      "epoch": 3.416748046875e-05,
      "model_forward_time": 0.11506199836730957,
      "step": 5598
    },
    {
      "epoch": 3.416748046875e-05,
      "step": 5598,
      "training_step_time": 0.3923523426055908
    },
    {
      "epoch": 3.4173583984375e-05,
      "model_forward_time": 0.11472439765930176,
      "step": 5599
    },
    {
      "epoch": 3.4173583984375e-05,
      "step": 5599,
      "training_step_time": 0.39127683639526367
    },
    {
      "epoch": 3.41796875e-05,
      "grad_norm": 0.3135662376880646,
      "learning_rate": 9.94875005288915e-05,
      "loss": 0.0858,
      "step": 5600
    },
    {
      "epoch": 3.41796875e-05,
      "model_forward_time": 0.11529684066772461,
      "step": 5600
    },
    {
      "epoch": 3.41796875e-05,
      "step": 5600,
      "training_step_time": 0.398266077041626
    },
    {
      "epoch": 3.4185791015625e-05,
      "model_forward_time": 0.11493635177612305,
      "step": 5601
    },
    {
      "epoch": 3.4185791015625e-05,
      "step": 5601,
      "training_step_time": 0.41632652282714844
    },
    {
      "epoch": 3.419189453125e-05,
      "model_forward_time": 0.11428499221801758,
      "step": 5602
    },
    {
      "epoch": 3.419189453125e-05,
      "step": 5602,
      "training_step_time": 0.3883786201477051
    },
    {
      "epoch": 3.4197998046875e-05,
      "model_forward_time": 0.1157841682434082,
      "step": 5603
    },
    {
      "epoch": 3.4197998046875e-05,
      "step": 5603,
      "training_step_time": 0.43964123725891113
    },
    {
      "epoch": 3.42041015625e-05,
      "model_forward_time": 0.11664390563964844,
      "step": 5604
    },
    {
      "epoch": 3.42041015625e-05,
      "step": 5604,
      "training_step_time": 0.4087245464324951
    },
    {
      "epoch": 3.4210205078125e-05,
      "model_forward_time": 0.11621880531311035,
      "step": 5605
    },
    {
      "epoch": 3.4210205078125e-05,
      "step": 5605,
      "training_step_time": 0.39868950843811035
    },
    {
      "epoch": 3.421630859375e-05,
      "model_forward_time": 0.1152658462524414,
      "step": 5606
    },
    {
      "epoch": 3.421630859375e-05,
      "step": 5606,
      "training_step_time": 0.6671185493469238
    },
    {
      "epoch": 3.4222412109375e-05,
      "model_forward_time": 0.11477136611938477,
      "step": 5607
    },
    {
      "epoch": 3.4222412109375e-05,
      "step": 5607,
      "training_step_time": 0.4332253932952881
    },
    {
      "epoch": 3.4228515625e-05,
      "model_forward_time": 0.11490559577941895,
      "step": 5608
    },
    {
      "epoch": 3.4228515625e-05,
      "step": 5608,
      "training_step_time": 0.43166041374206543
    },
    {
      "epoch": 3.4234619140625e-05,
      "model_forward_time": 0.11517047882080078,
      "step": 5609
    },
    {
      "epoch": 3.4234619140625e-05,
      "step": 5609,
      "training_step_time": 0.48594045639038086
    },
    {
      "epoch": 3.424072265625e-05,
      "grad_norm": 0.29303044080734253,
      "learning_rate": 9.948355745757741e-05,
      "loss": 0.0882,
      "step": 5610
    },
    {
      "epoch": 3.424072265625e-05,
      "model_forward_time": 0.11417961120605469,
      "step": 5610
    },
    {
      "epoch": 3.424072265625e-05,
      "step": 5610,
      "training_step_time": 0.38814687728881836
    },
    {
      "epoch": 3.4246826171875e-05,
      "model_forward_time": 0.11477231979370117,
      "step": 5611
    },
    {
      "epoch": 3.4246826171875e-05,
      "step": 5611,
      "training_step_time": 0.3996732234954834
    },
    {
      "epoch": 3.42529296875e-05,
      "model_forward_time": 0.11519241333007812,
      "step": 5612
    },
    {
      "epoch": 3.42529296875e-05,
      "step": 5612,
      "training_step_time": 0.3977315425872803
    },
    {
      "epoch": 3.4259033203125e-05,
      "model_forward_time": 0.115966796875,
      "step": 5613
    },
    {
      "epoch": 3.4259033203125e-05,
      "step": 5613,
      "training_step_time": 0.4130516052246094
    },
    {
      "epoch": 3.426513671875e-05,
      "model_forward_time": 0.11840653419494629,
      "step": 5614
    },
    {
      "epoch": 3.426513671875e-05,
      "step": 5614,
      "training_step_time": 0.4941728115081787
    },
    {
      "epoch": 3.4271240234375e-05,
      "model_forward_time": 0.11905384063720703,
      "step": 5615
    },
    {
      "epoch": 3.4271240234375e-05,
      "step": 5615,
      "training_step_time": 0.615626335144043
    },
    {
      "epoch": 3.427734375e-05,
      "model_forward_time": 0.1367785930633545,
      "step": 5616
    },
    {
      "epoch": 3.427734375e-05,
      "step": 5616,
      "training_step_time": 0.6463172435760498
    },
    {
      "epoch": 3.4283447265625e-05,
      "model_forward_time": 0.11893200874328613,
      "step": 5617
    },
    {
      "epoch": 3.4283447265625e-05,
      "step": 5617,
      "training_step_time": 0.685305118560791
    },
    {
      "epoch": 3.428955078125e-05,
      "model_forward_time": 0.12116074562072754,
      "step": 5618
    },
    {
      "epoch": 3.428955078125e-05,
      "step": 5618,
      "training_step_time": 0.715003490447998
    },
    {
      "epoch": 3.4295654296875e-05,
      "model_forward_time": 0.12935256958007812,
      "step": 5619
    },
    {
      "epoch": 3.4295654296875e-05,
      "step": 5619,
      "training_step_time": 0.6861393451690674
    },
    {
      "epoch": 3.43017578125e-05,
      "grad_norm": 0.29299196600914,
      "learning_rate": 9.947959935446507e-05,
      "loss": 0.0851,
      "step": 5620
    },
    {
      "epoch": 3.43017578125e-05,
      "model_forward_time": 0.12134742736816406,
      "step": 5620
    },
    {
      "epoch": 3.43017578125e-05,
      "step": 5620,
      "training_step_time": 0.6355431079864502
    },
    {
      "epoch": 3.4307861328125e-05,
      "model_forward_time": 0.12038683891296387,
      "step": 5621
    },
    {
      "epoch": 3.4307861328125e-05,
      "step": 5621,
      "training_step_time": 0.6399791240692139
    },
    {
      "epoch": 3.431396484375e-05,
      "model_forward_time": 0.11802244186401367,
      "step": 5622
    },
    {
      "epoch": 3.431396484375e-05,
      "step": 5622,
      "training_step_time": 0.7046554088592529
    },
    {
      "epoch": 3.4320068359375e-05,
      "model_forward_time": 0.12546253204345703,
      "step": 5623
    },
    {
      "epoch": 3.4320068359375e-05,
      "step": 5623,
      "training_step_time": 0.6366000175476074
    },
    {
      "epoch": 3.4326171875e-05,
      "model_forward_time": 0.13181567192077637,
      "step": 5624
    },
    {
      "epoch": 3.4326171875e-05,
      "step": 5624,
      "training_step_time": 0.6177241802215576
    },
    {
      "epoch": 3.4332275390625e-05,
      "model_forward_time": 0.12122321128845215,
      "step": 5625
    },
    {
      "epoch": 3.4332275390625e-05,
      "step": 5625,
      "training_step_time": 0.680793285369873
    },
    {
      "epoch": 3.433837890625e-05,
      "model_forward_time": 0.11600732803344727,
      "step": 5626
    },
    {
      "epoch": 3.433837890625e-05,
      "step": 5626,
      "training_step_time": 0.6879808902740479
    },
    {
      "epoch": 3.4344482421875e-05,
      "model_forward_time": 0.11890721321105957,
      "step": 5627
    },
    {
      "epoch": 3.4344482421875e-05,
      "step": 5627,
      "training_step_time": 0.6734764575958252
    },
    {
      "epoch": 3.43505859375e-05,
      "model_forward_time": 0.11571192741394043,
      "step": 5628
    },
    {
      "epoch": 3.43505859375e-05,
      "step": 5628,
      "training_step_time": 0.5771951675415039
    },
    {
      "epoch": 3.4356689453125e-05,
      "model_forward_time": 0.11982560157775879,
      "step": 5629
    },
    {
      "epoch": 3.4356689453125e-05,
      "step": 5629,
      "training_step_time": 0.8322877883911133
    },
    {
      "epoch": 3.436279296875e-05,
      "grad_norm": 0.27090781927108765,
      "learning_rate": 9.94756262207569e-05,
      "loss": 0.0841,
      "step": 5630
    },
    {
      "epoch": 3.436279296875e-05,
      "model_forward_time": 0.11783552169799805,
      "step": 5630
    },
    {
      "epoch": 3.436279296875e-05,
      "step": 5630,
      "training_step_time": 0.7112767696380615
    },
    {
      "epoch": 3.4368896484375e-05,
      "model_forward_time": 0.1168062686920166,
      "step": 5631
    },
    {
      "epoch": 3.4368896484375e-05,
      "step": 5631,
      "training_step_time": 0.7439701557159424
    },
    {
      "epoch": 3.4375e-05,
      "model_forward_time": 0.11823439598083496,
      "step": 5632
    },
    {
      "epoch": 3.4375e-05,
      "step": 5632,
      "training_step_time": 0.6992137432098389
    },
    {
      "epoch": 3.4381103515625e-05,
      "model_forward_time": 0.1201326847076416,
      "step": 5633
    },
    {
      "epoch": 3.4381103515625e-05,
      "step": 5633,
      "training_step_time": 0.73258376121521
    },
    {
      "epoch": 3.438720703125e-05,
      "model_forward_time": 0.11997485160827637,
      "step": 5634
    },
    {
      "epoch": 3.438720703125e-05,
      "step": 5634,
      "training_step_time": 0.7094933986663818
    },
    {
      "epoch": 3.4393310546875e-05,
      "model_forward_time": 0.12052607536315918,
      "step": 5635
    },
    {
      "epoch": 3.4393310546875e-05,
      "step": 5635,
      "training_step_time": 0.6523985862731934
    },
    {
      "epoch": 3.43994140625e-05,
      "model_forward_time": 0.11981368064880371,
      "step": 5636
    },
    {
      "epoch": 3.43994140625e-05,
      "step": 5636,
      "training_step_time": 0.6858174800872803
    },
    {
      "epoch": 3.4405517578125e-05,
      "model_forward_time": 0.12996959686279297,
      "step": 5637
    },
    {
      "epoch": 3.4405517578125e-05,
      "step": 5637,
      "training_step_time": 0.5877091884613037
    },
    {
      "epoch": 3.441162109375e-05,
      "model_forward_time": 0.12061190605163574,
      "step": 5638
    },
    {
      "epoch": 3.441162109375e-05,
      "step": 5638,
      "training_step_time": 0.7632825374603271
    },
    {
      "epoch": 3.4417724609375e-05,
      "model_forward_time": 0.12421154975891113,
      "step": 5639
    },
    {
      "epoch": 3.4417724609375e-05,
      "step": 5639,
      "training_step_time": 0.7297663688659668
    },
    {
      "epoch": 3.4423828125e-05,
      "grad_norm": 0.22038677334785461,
      "learning_rate": 9.94716380576598e-05,
      "loss": 0.095,
      "step": 5640
    },
    {
      "epoch": 3.4423828125e-05,
      "model_forward_time": 0.1216428279876709,
      "step": 5640
    },
    {
      "epoch": 3.4423828125e-05,
      "step": 5640,
      "training_step_time": 0.7135269641876221
    },
    {
      "epoch": 3.4429931640625e-05,
      "model_forward_time": 0.12656855583190918,
      "step": 5641
    },
    {
      "epoch": 3.4429931640625e-05,
      "step": 5641,
      "training_step_time": 0.5950524806976318
    },
    {
      "epoch": 3.443603515625e-05,
      "model_forward_time": 0.12190103530883789,
      "step": 5642
    },
    {
      "epoch": 3.443603515625e-05,
      "step": 5642,
      "training_step_time": 0.724686861038208
    },
    {
      "epoch": 3.4442138671875e-05,
      "model_forward_time": 0.1224524974822998,
      "step": 5643
    },
    {
      "epoch": 3.4442138671875e-05,
      "step": 5643,
      "training_step_time": 0.6654071807861328
    },
    {
      "epoch": 3.44482421875e-05,
      "model_forward_time": 0.11999034881591797,
      "step": 5644
    },
    {
      "epoch": 3.44482421875e-05,
      "step": 5644,
      "training_step_time": 0.7009575366973877
    },
    {
      "epoch": 3.4454345703125e-05,
      "model_forward_time": 0.11630415916442871,
      "step": 5645
    },
    {
      "epoch": 3.4454345703125e-05,
      "step": 5645,
      "training_step_time": 0.6607379913330078
    },
    {
      "epoch": 3.446044921875e-05,
      "model_forward_time": 0.12320923805236816,
      "step": 5646
    },
    {
      "epoch": 3.446044921875e-05,
      "step": 5646,
      "training_step_time": 0.78153395652771
    },
    {
      "epoch": 3.4466552734375e-05,
      "model_forward_time": 0.12310504913330078,
      "step": 5647
    },
    {
      "epoch": 3.4466552734375e-05,
      "step": 5647,
      "training_step_time": 0.6657993793487549
    },
    {
      "epoch": 3.447265625e-05,
      "model_forward_time": 0.12348484992980957,
      "step": 5648
    },
    {
      "epoch": 3.447265625e-05,
      "step": 5648,
      "training_step_time": 0.7848637104034424
    },
    {
      "epoch": 3.4478759765625e-05,
      "model_forward_time": 0.12472295761108398,
      "step": 5649
    },
    {
      "epoch": 3.4478759765625e-05,
      "step": 5649,
      "training_step_time": 0.646033525466919
    },
    {
      "epoch": 3.448486328125e-05,
      "grad_norm": 0.3708132803440094,
      "learning_rate": 9.946763486638528e-05,
      "loss": 0.0869,
      "step": 5650
    },
    {
      "epoch": 3.448486328125e-05,
      "model_forward_time": 0.11954498291015625,
      "step": 5650
    },
    {
      "epoch": 3.448486328125e-05,
      "step": 5650,
      "training_step_time": 0.6929609775543213
    },
    {
      "epoch": 3.4490966796875e-05,
      "model_forward_time": 0.13327836990356445,
      "step": 5651
    },
    {
      "epoch": 3.4490966796875e-05,
      "step": 5651,
      "training_step_time": 0.6456129550933838
    },
    {
      "epoch": 3.44970703125e-05,
      "model_forward_time": 0.1314077377319336,
      "step": 5652
    },
    {
      "epoch": 3.44970703125e-05,
      "step": 5652,
      "training_step_time": 0.653378963470459
    },
    {
      "epoch": 3.4503173828125e-05,
      "model_forward_time": 0.12063169479370117,
      "step": 5653
    },
    {
      "epoch": 3.4503173828125e-05,
      "step": 5653,
      "training_step_time": 0.6939589977264404
    },
    {
      "epoch": 3.450927734375e-05,
      "model_forward_time": 0.11785364151000977,
      "step": 5654
    },
    {
      "epoch": 3.450927734375e-05,
      "step": 5654,
      "training_step_time": 0.6466758251190186
    },
    {
      "epoch": 3.4515380859375e-05,
      "model_forward_time": 0.12189245223999023,
      "step": 5655
    },
    {
      "epoch": 3.4515380859375e-05,
      "step": 5655,
      "training_step_time": 0.6653392314910889
    },
    {
      "epoch": 3.4521484375e-05,
      "model_forward_time": 0.1189730167388916,
      "step": 5656
    },
    {
      "epoch": 3.4521484375e-05,
      "step": 5656,
      "training_step_time": 0.6303348541259766
    },
    {
      "epoch": 3.4527587890625e-05,
      "model_forward_time": 0.11828017234802246,
      "step": 5657
    },
    {
      "epoch": 3.4527587890625e-05,
      "step": 5657,
      "training_step_time": 0.7718393802642822
    },
    {
      "epoch": 3.453369140625e-05,
      "model_forward_time": 0.11638164520263672,
      "step": 5658
    },
    {
      "epoch": 3.453369140625e-05,
      "step": 5658,
      "training_step_time": 0.6891107559204102
    },
    {
      "epoch": 3.4539794921875e-05,
      "model_forward_time": 0.12458014488220215,
      "step": 5659
    },
    {
      "epoch": 3.4539794921875e-05,
      "step": 5659,
      "training_step_time": 0.7694699764251709
    },
    {
      "epoch": 3.45458984375e-05,
      "grad_norm": 0.26748883724212646,
      "learning_rate": 9.946361664814943e-05,
      "loss": 0.0957,
      "step": 5660
    },
    {
      "epoch": 3.45458984375e-05,
      "model_forward_time": 0.11571526527404785,
      "step": 5660
    },
    {
      "epoch": 3.45458984375e-05,
      "step": 5660,
      "training_step_time": 0.6373651027679443
    },
    {
      "epoch": 3.4552001953125e-05,
      "model_forward_time": 0.13812756538391113,
      "step": 5661
    },
    {
      "epoch": 3.4552001953125e-05,
      "step": 5661,
      "training_step_time": 0.6853582859039307
    },
    {
      "epoch": 3.455810546875e-05,
      "model_forward_time": 0.11990714073181152,
      "step": 5662
    },
    {
      "epoch": 3.455810546875e-05,
      "step": 5662,
      "training_step_time": 0.6688885688781738
    },
    {
      "epoch": 3.4564208984375e-05,
      "model_forward_time": 0.11913871765136719,
      "step": 5663
    },
    {
      "epoch": 3.4564208984375e-05,
      "step": 5663,
      "training_step_time": 0.6406221389770508
    },
    {
      "epoch": 3.45703125e-05,
      "model_forward_time": 0.11933588981628418,
      "step": 5664
    },
    {
      "epoch": 3.45703125e-05,
      "step": 5664,
      "training_step_time": 0.6549334526062012
    },
    {
      "epoch": 3.4576416015625e-05,
      "model_forward_time": 0.11751365661621094,
      "step": 5665
    },
    {
      "epoch": 3.4576416015625e-05,
      "step": 5665,
      "training_step_time": 0.6441338062286377
    },
    {
      "epoch": 3.458251953125e-05,
      "model_forward_time": 0.12320733070373535,
      "step": 5666
    },
    {
      "epoch": 3.458251953125e-05,
      "step": 5666,
      "training_step_time": 0.7376203536987305
    },
    {
      "epoch": 3.4588623046875e-05,
      "model_forward_time": 0.11731648445129395,
      "step": 5667
    },
    {
      "epoch": 3.4588623046875e-05,
      "step": 5667,
      "training_step_time": 0.7155489921569824
    },
    {
      "epoch": 3.45947265625e-05,
      "model_forward_time": 0.1195828914642334,
      "step": 5668
    },
    {
      "epoch": 3.45947265625e-05,
      "step": 5668,
      "training_step_time": 0.7533366680145264
    },
    {
      "epoch": 3.4600830078125e-05,
      "model_forward_time": 0.1195685863494873,
      "step": 5669
    },
    {
      "epoch": 3.4600830078125e-05,
      "step": 5669,
      "training_step_time": 0.7944989204406738
    },
    {
      "epoch": 3.460693359375e-05,
      "grad_norm": 0.3049900531768799,
      "learning_rate": 9.945958340417283e-05,
      "loss": 0.0942,
      "step": 5670
    },
    {
      "epoch": 3.460693359375e-05,
      "model_forward_time": 0.12237763404846191,
      "step": 5670
    },
    {
      "epoch": 3.460693359375e-05,
      "step": 5670,
      "training_step_time": 0.6522948741912842
    },
    {
      "epoch": 3.4613037109375e-05,
      "model_forward_time": 0.11831140518188477,
      "step": 5671
    },
    {
      "epoch": 3.4613037109375e-05,
      "step": 5671,
      "training_step_time": 0.6200687885284424
    },
    {
      "epoch": 3.4619140625e-05,
      "model_forward_time": 0.11610245704650879,
      "step": 5672
    },
    {
      "epoch": 3.4619140625e-05,
      "step": 5672,
      "training_step_time": 0.6455049514770508
    },
    {
      "epoch": 3.4625244140625e-05,
      "model_forward_time": 0.12654376029968262,
      "step": 5673
    },
    {
      "epoch": 3.4625244140625e-05,
      "step": 5673,
      "training_step_time": 0.6360816955566406
    },
    {
      "epoch": 3.463134765625e-05,
      "model_forward_time": 0.12297368049621582,
      "step": 5674
    },
    {
      "epoch": 3.463134765625e-05,
      "step": 5674,
      "training_step_time": 0.6532926559448242
    },
    {
      "epoch": 3.4637451171875e-05,
      "model_forward_time": 0.11633706092834473,
      "step": 5675
    },
    {
      "epoch": 3.4637451171875e-05,
      "step": 5675,
      "training_step_time": 0.6427273750305176
    },
    {
      "epoch": 3.46435546875e-05,
      "model_forward_time": 0.12429428100585938,
      "step": 5676
    },
    {
      "epoch": 3.46435546875e-05,
      "step": 5676,
      "training_step_time": 0.7038753032684326
    },
    {
      "epoch": 3.4649658203125e-05,
      "model_forward_time": 0.12050056457519531,
      "step": 5677
    },
    {
      "epoch": 3.4649658203125e-05,
      "step": 5677,
      "training_step_time": 0.7300689220428467
    },
    {
      "epoch": 3.465576171875e-05,
      "model_forward_time": 0.11925029754638672,
      "step": 5678
    },
    {
      "epoch": 3.465576171875e-05,
      "step": 5678,
      "training_step_time": 0.6946587562561035
    },
    {
      "epoch": 3.4661865234375e-05,
      "model_forward_time": 0.12376737594604492,
      "step": 5679
    },
    {
      "epoch": 3.4661865234375e-05,
      "step": 5679,
      "training_step_time": 0.6524362564086914
    },
    {
      "epoch": 3.466796875e-05,
      "grad_norm": 0.28104791045188904,
      "learning_rate": 9.945553513568068e-05,
      "loss": 0.0898,
      "step": 5680
    },
    {
      "epoch": 3.466796875e-05,
      "model_forward_time": 0.11905431747436523,
      "step": 5680
    },
    {
      "epoch": 3.466796875e-05,
      "step": 5680,
      "training_step_time": 0.560490608215332
    },
    {
      "epoch": 3.4674072265625e-05,
      "model_forward_time": 0.1207587718963623,
      "step": 5681
    },
    {
      "epoch": 3.4674072265625e-05,
      "step": 5681,
      "training_step_time": 0.5402853488922119
    },
    {
      "epoch": 3.468017578125e-05,
      "model_forward_time": 0.12140250205993652,
      "step": 5682
    },
    {
      "epoch": 3.468017578125e-05,
      "step": 5682,
      "training_step_time": 0.5295798778533936
    },
    {
      "epoch": 3.4686279296875e-05,
      "model_forward_time": 0.11944890022277832,
      "step": 5683
    },
    {
      "epoch": 3.4686279296875e-05,
      "step": 5683,
      "training_step_time": 0.5472586154937744
    },
    {
      "epoch": 3.46923828125e-05,
      "model_forward_time": 0.12324118614196777,
      "step": 5684
    },
    {
      "epoch": 3.46923828125e-05,
      "step": 5684,
      "training_step_time": 0.5238921642303467
    },
    {
      "epoch": 3.4698486328125e-05,
      "model_forward_time": 0.12052130699157715,
      "step": 5685
    },
    {
      "epoch": 3.4698486328125e-05,
      "step": 5685,
      "training_step_time": 0.4492073059082031
    },
    {
      "epoch": 3.470458984375e-05,
      "model_forward_time": 0.11806654930114746,
      "step": 5686
    },
    {
      "epoch": 3.470458984375e-05,
      "step": 5686,
      "training_step_time": 0.43163466453552246
    },
    {
      "epoch": 3.4710693359375e-05,
      "model_forward_time": 0.11893630027770996,
      "step": 5687
    },
    {
      "epoch": 3.4710693359375e-05,
      "step": 5687,
      "training_step_time": 0.5445117950439453
    },
    {
      "epoch": 3.4716796875e-05,
      "model_forward_time": 0.11769700050354004,
      "step": 5688
    },
    {
      "epoch": 3.4716796875e-05,
      "step": 5688,
      "training_step_time": 0.43842625617980957
    },
    {
      "epoch": 3.4722900390625e-05,
      "model_forward_time": 0.1172938346862793,
      "step": 5689
    },
    {
      "epoch": 3.4722900390625e-05,
      "step": 5689,
      "training_step_time": 0.46947741508483887
    },
    {
      "epoch": 3.472900390625e-05,
      "grad_norm": 0.34282806515693665,
      "learning_rate": 9.945147184390278e-05,
      "loss": 0.099,
      "step": 5690
    },
    {
      "epoch": 3.472900390625e-05,
      "model_forward_time": 0.11612391471862793,
      "step": 5690
    },
    {
      "epoch": 3.472900390625e-05,
      "step": 5690,
      "training_step_time": 0.5050482749938965
    },
    {
      "epoch": 3.4735107421875e-05,
      "model_forward_time": 0.1157236099243164,
      "step": 5691
    },
    {
      "epoch": 3.4735107421875e-05,
      "step": 5691,
      "training_step_time": 0.4184737205505371
    },
    {
      "epoch": 3.47412109375e-05,
      "model_forward_time": 0.1163034439086914,
      "step": 5692
    },
    {
      "epoch": 3.47412109375e-05,
      "step": 5692,
      "training_step_time": 0.3821072578430176
    },
    {
      "epoch": 3.4747314453125e-05,
      "model_forward_time": 0.11495804786682129,
      "step": 5693
    },
    {
      "epoch": 3.4747314453125e-05,
      "step": 5693,
      "training_step_time": 0.3824591636657715
    },
    {
      "epoch": 3.475341796875e-05,
      "model_forward_time": 0.11557555198669434,
      "step": 5694
    },
    {
      "epoch": 3.475341796875e-05,
      "step": 5694,
      "training_step_time": 0.3856947422027588
    },
    {
      "epoch": 3.4759521484375e-05,
      "model_forward_time": 0.11537480354309082,
      "step": 5695
    },
    {
      "epoch": 3.4759521484375e-05,
      "step": 5695,
      "training_step_time": 0.3982551097869873
    },
    {
      "epoch": 3.4765625e-05,
      "model_forward_time": 0.11596083641052246,
      "step": 5696
    },
    {
      "epoch": 3.4765625e-05,
      "step": 5696,
      "training_step_time": 0.3827805519104004
    },
    {
      "epoch": 3.4771728515625e-05,
      "model_forward_time": 0.11534452438354492,
      "step": 5697
    },
    {
      "epoch": 3.4771728515625e-05,
      "step": 5697,
      "training_step_time": 0.3742225170135498
    },
    {
      "epoch": 3.477783203125e-05,
      "model_forward_time": 0.11537790298461914,
      "step": 5698
    },
    {
      "epoch": 3.477783203125e-05,
      "step": 5698,
      "training_step_time": 0.3887624740600586
    },
    {
      "epoch": 3.4783935546875e-05,
      "model_forward_time": 0.1153097152709961,
      "step": 5699
    },
    {
      "epoch": 3.4783935546875e-05,
      "step": 5699,
      "training_step_time": 0.386599063873291
    },
    {
      "epoch": 3.47900390625e-05,
      "grad_norm": 0.24710234999656677,
      "learning_rate": 9.944739353007344e-05,
      "loss": 0.0851,
      "step": 5700
    },
    {
      "epoch": 3.47900390625e-05,
      "model_forward_time": 0.1153249740600586,
      "step": 5700
    },
    {
      "epoch": 3.47900390625e-05,
      "step": 5700,
      "training_step_time": 0.39560556411743164
    },
    {
      "epoch": 3.4796142578125e-05,
      "model_forward_time": 0.1155385971069336,
      "step": 5701
    },
    {
      "epoch": 3.4796142578125e-05,
      "step": 5701,
      "training_step_time": 0.4410839080810547
    },
    {
      "epoch": 3.480224609375e-05,
      "model_forward_time": 0.11442065238952637,
      "step": 5702
    },
    {
      "epoch": 3.480224609375e-05,
      "step": 5702,
      "training_step_time": 0.3694028854370117
    },
    {
      "epoch": 3.4808349609375e-05,
      "model_forward_time": 0.11483478546142578,
      "step": 5703
    },
    {
      "epoch": 3.4808349609375e-05,
      "step": 5703,
      "training_step_time": 0.41793298721313477
    },
    {
      "epoch": 3.4814453125e-05,
      "model_forward_time": 0.11479949951171875,
      "step": 5704
    },
    {
      "epoch": 3.4814453125e-05,
      "step": 5704,
      "training_step_time": 0.5006446838378906
    },
    {
      "epoch": 3.4820556640625e-05,
      "model_forward_time": 0.11488842964172363,
      "step": 5705
    },
    {
      "epoch": 3.4820556640625e-05,
      "step": 5705,
      "training_step_time": 0.3903484344482422
    },
    {
      "epoch": 3.482666015625e-05,
      "model_forward_time": 0.11473822593688965,
      "step": 5706
    },
    {
      "epoch": 3.482666015625e-05,
      "step": 5706,
      "training_step_time": 0.47103309631347656
    },
    {
      "epoch": 3.4832763671875e-05,
      "model_forward_time": 0.11483907699584961,
      "step": 5707
    },
    {
      "epoch": 3.4832763671875e-05,
      "step": 5707,
      "training_step_time": 0.3889808654785156
    },
    {
      "epoch": 3.48388671875e-05,
      "model_forward_time": 0.11498308181762695,
      "step": 5708
    },
    {
      "epoch": 3.48388671875e-05,
      "step": 5708,
      "training_step_time": 0.40142345428466797
    },
    {
      "epoch": 3.4844970703125e-05,
      "model_forward_time": 0.1148536205291748,
      "step": 5709
    },
    {
      "epoch": 3.4844970703125e-05,
      "step": 5709,
      "training_step_time": 0.39013242721557617
    },
    {
      "epoch": 3.485107421875e-05,
      "grad_norm": 0.3101944625377655,
      "learning_rate": 9.944330019543149e-05,
      "loss": 0.0981,
      "step": 5710
    },
    {
      "epoch": 3.485107421875e-05,
      "model_forward_time": 0.11591744422912598,
      "step": 5710
    },
    {
      "epoch": 3.485107421875e-05,
      "step": 5710,
      "training_step_time": 0.38424253463745117
    },
    {
      "epoch": 3.4857177734375e-05,
      "model_forward_time": 0.11574244499206543,
      "step": 5711
    },
    {
      "epoch": 3.4857177734375e-05,
      "step": 5711,
      "training_step_time": 0.39174461364746094
    },
    {
      "epoch": 3.486328125e-05,
      "model_forward_time": 0.11541104316711426,
      "step": 5712
    },
    {
      "epoch": 3.486328125e-05,
      "step": 5712,
      "training_step_time": 0.39447736740112305
    },
    {
      "epoch": 3.4869384765625e-05,
      "model_forward_time": 0.11544966697692871,
      "step": 5713
    },
    {
      "epoch": 3.4869384765625e-05,
      "step": 5713,
      "training_step_time": 0.39685511589050293
    },
    {
      "epoch": 3.487548828125e-05,
      "model_forward_time": 0.11570262908935547,
      "step": 5714
    },
    {
      "epoch": 3.487548828125e-05,
      "step": 5714,
      "training_step_time": 0.406813383102417
    },
    {
      "epoch": 3.4881591796875e-05,
      "model_forward_time": 0.11502265930175781,
      "step": 5715
    },
    {
      "epoch": 3.4881591796875e-05,
      "step": 5715,
      "training_step_time": 0.3997311592102051
    },
    {
      "epoch": 3.48876953125e-05,
      "model_forward_time": 0.1147761344909668,
      "step": 5716
    },
    {
      "epoch": 3.48876953125e-05,
      "step": 5716,
      "training_step_time": 0.4024937152862549
    },
    {
      "epoch": 3.4893798828125e-05,
      "model_forward_time": 0.11547040939331055,
      "step": 5717
    },
    {
      "epoch": 3.4893798828125e-05,
      "step": 5717,
      "training_step_time": 0.43059802055358887
    },
    {
      "epoch": 3.489990234375e-05,
      "model_forward_time": 0.11517643928527832,
      "step": 5718
    },
    {
      "epoch": 3.489990234375e-05,
      "step": 5718,
      "training_step_time": 0.4666469097137451
    },
    {
      "epoch": 3.4906005859375e-05,
      "model_forward_time": 0.1152184009552002,
      "step": 5719
    },
    {
      "epoch": 3.4906005859375e-05,
      "step": 5719,
      "training_step_time": 0.47330641746520996
    },
    {
      "epoch": 3.4912109375e-05,
      "grad_norm": 0.3736476004123688,
      "learning_rate": 9.943919184122043e-05,
      "loss": 0.0991,
      "step": 5720
    },
    {
      "epoch": 3.4912109375e-05,
      "model_forward_time": 0.11521267890930176,
      "step": 5720
    },
    {
      "epoch": 3.4912109375e-05,
      "step": 5720,
      "training_step_time": 0.4596407413482666
    },
    {
      "epoch": 3.4918212890625e-05,
      "model_forward_time": 0.11529231071472168,
      "step": 5721
    },
    {
      "epoch": 3.4918212890625e-05,
      "step": 5721,
      "training_step_time": 0.60843825340271
    },
    {
      "epoch": 3.492431640625e-05,
      "model_forward_time": 0.11524438858032227,
      "step": 5722
    },
    {
      "epoch": 3.492431640625e-05,
      "step": 5722,
      "training_step_time": 0.3799748420715332
    },
    {
      "epoch": 3.4930419921875e-05,
      "model_forward_time": 0.11515450477600098,
      "step": 5723
    },
    {
      "epoch": 3.4930419921875e-05,
      "step": 5723,
      "training_step_time": 0.38611793518066406
    },
    {
      "epoch": 3.49365234375e-05,
      "model_forward_time": 0.11442279815673828,
      "step": 5724
    },
    {
      "epoch": 3.49365234375e-05,
      "step": 5724,
      "training_step_time": 0.3879106044769287
    },
    {
      "epoch": 3.4942626953125e-05,
      "model_forward_time": 0.1160123348236084,
      "step": 5725
    },
    {
      "epoch": 3.4942626953125e-05,
      "step": 5725,
      "training_step_time": 0.39371323585510254
    },
    {
      "epoch": 3.494873046875e-05,
      "model_forward_time": 0.1147301197052002,
      "step": 5726
    },
    {
      "epoch": 3.494873046875e-05,
      "step": 5726,
      "training_step_time": 0.39556145668029785
    },
    {
      "epoch": 3.4954833984375e-05,
      "model_forward_time": 0.11481976509094238,
      "step": 5727
    },
    {
      "epoch": 3.4954833984375e-05,
      "step": 5727,
      "training_step_time": 0.8849730491638184
    },
    {
      "epoch": 3.49609375e-05,
      "model_forward_time": 0.11418700218200684,
      "step": 5728
    },
    {
      "epoch": 3.49609375e-05,
      "step": 5728,
      "training_step_time": 0.4171714782714844
    },
    {
      "epoch": 3.4967041015625e-05,
      "model_forward_time": 0.11373686790466309,
      "step": 5729
    },
    {
      "epoch": 3.4967041015625e-05,
      "step": 5729,
      "training_step_time": 0.43709635734558105
    },
    {
      "epoch": 3.497314453125e-05,
      "grad_norm": 0.24994973838329315,
      "learning_rate": 9.943506846868826e-05,
      "loss": 0.0989,
      "step": 5730
    },
    {
      "epoch": 3.497314453125e-05,
      "model_forward_time": 0.11521220207214355,
      "step": 5730
    },
    {
      "epoch": 3.497314453125e-05,
      "step": 5730,
      "training_step_time": 0.40675878524780273
    },
    {
      "epoch": 3.4979248046875e-05,
      "model_forward_time": 0.11428523063659668,
      "step": 5731
    },
    {
      "epoch": 3.4979248046875e-05,
      "step": 5731,
      "training_step_time": 0.3812880516052246
    },
    {
      "epoch": 3.49853515625e-05,
      "model_forward_time": 0.11486029624938965,
      "step": 5732
    },
    {
      "epoch": 3.49853515625e-05,
      "step": 5732,
      "training_step_time": 0.47679591178894043
    },
    {
      "epoch": 3.4991455078125e-05,
      "model_forward_time": 0.11429834365844727,
      "step": 5733
    },
    {
      "epoch": 3.4991455078125e-05,
      "step": 5733,
      "training_step_time": 0.4912080764770508
    },
    {
      "epoch": 3.499755859375e-05,
      "model_forward_time": 0.11429095268249512,
      "step": 5734
    },
    {
      "epoch": 3.499755859375e-05,
      "step": 5734,
      "training_step_time": 0.4840991497039795
    },
    {
      "epoch": 3.5003662109375e-05,
      "model_forward_time": 0.11392712593078613,
      "step": 5735
    },
    {
      "epoch": 3.5003662109375e-05,
      "step": 5735,
      "training_step_time": 0.3982212543487549
    },
    {
      "epoch": 3.5009765625e-05,
      "model_forward_time": 0.11425924301147461,
      "step": 5736
    },
    {
      "epoch": 3.5009765625e-05,
      "step": 5736,
      "training_step_time": 0.3853452205657959
    },
    {
      "epoch": 3.5015869140625e-05,
      "model_forward_time": 0.11471986770629883,
      "step": 5737
    },
    {
      "epoch": 3.5015869140625e-05,
      "step": 5737,
      "training_step_time": 0.3934962749481201
    },
    {
      "epoch": 3.502197265625e-05,
      "model_forward_time": 0.11442089080810547,
      "step": 5738
    },
    {
      "epoch": 3.502197265625e-05,
      "step": 5738,
      "training_step_time": 0.38499975204467773
    },
    {
      "epoch": 3.5028076171875e-05,
      "model_forward_time": 0.11449837684631348,
      "step": 5739
    },
    {
      "epoch": 3.5028076171875e-05,
      "step": 5739,
      "training_step_time": 0.7152080535888672
    },
    {
      "epoch": 3.50341796875e-05,
      "grad_norm": 0.2745978534221649,
      "learning_rate": 9.943093007908755e-05,
      "loss": 0.0913,
      "step": 5740
    },
    {
      "epoch": 3.50341796875e-05,
      "model_forward_time": 0.11395931243896484,
      "step": 5740
    },
    {
      "epoch": 3.50341796875e-05,
      "step": 5740,
      "training_step_time": 0.38137221336364746
    },
    {
      "epoch": 3.5040283203125e-05,
      "model_forward_time": 0.11465859413146973,
      "step": 5741
    },
    {
      "epoch": 3.5040283203125e-05,
      "step": 5741,
      "training_step_time": 0.38538384437561035
    },
    {
      "epoch": 3.504638671875e-05,
      "model_forward_time": 0.11449670791625977,
      "step": 5742
    },
    {
      "epoch": 3.504638671875e-05,
      "step": 5742,
      "training_step_time": 0.42144298553466797
    },
    {
      "epoch": 3.5052490234375e-05,
      "model_forward_time": 0.11499261856079102,
      "step": 5743
    },
    {
      "epoch": 3.5052490234375e-05,
      "step": 5743,
      "training_step_time": 0.38922715187072754
    },
    {
      "epoch": 3.505859375e-05,
      "model_forward_time": 0.1140131950378418,
      "step": 5744
    },
    {
      "epoch": 3.505859375e-05,
      "step": 5744,
      "training_step_time": 0.4107334613800049
    },
    {
      "epoch": 3.5064697265625e-05,
      "model_forward_time": 0.1154937744140625,
      "step": 5745
    },
    {
      "epoch": 3.5064697265625e-05,
      "step": 5745,
      "training_step_time": 0.36586928367614746
    },
    {
      "epoch": 3.507080078125e-05,
      "model_forward_time": 0.11548852920532227,
      "step": 5746
    },
    {
      "epoch": 3.507080078125e-05,
      "step": 5746,
      "training_step_time": 0.46314048767089844
    },
    {
      "epoch": 3.5076904296875e-05,
      "model_forward_time": 0.11464691162109375,
      "step": 5747
    },
    {
      "epoch": 3.5076904296875e-05,
      "step": 5747,
      "training_step_time": 0.4495978355407715
    },
    {
      "epoch": 3.50830078125e-05,
      "model_forward_time": 0.11490845680236816,
      "step": 5748
    },
    {
      "epoch": 3.50830078125e-05,
      "step": 5748,
      "training_step_time": 0.41155171394348145
    },
    {
      "epoch": 3.5089111328125e-05,
      "model_forward_time": 0.1147758960723877,
      "step": 5749
    },
    {
      "epoch": 3.5089111328125e-05,
      "step": 5749,
      "training_step_time": 0.39186906814575195
    },
    {
      "epoch": 3.509521484375e-05,
      "grad_norm": 0.21036255359649658,
      "learning_rate": 9.942677667367541e-05,
      "loss": 0.0895,
      "step": 5750
    },
    {
      "epoch": 3.509521484375e-05,
      "model_forward_time": 0.11474490165710449,
      "step": 5750
    },
    {
      "epoch": 3.509521484375e-05,
      "step": 5750,
      "training_step_time": 0.38974761962890625
    },
    {
      "epoch": 3.5101318359375e-05,
      "model_forward_time": 0.11489677429199219,
      "step": 5751
    },
    {
      "epoch": 3.5101318359375e-05,
      "step": 5751,
      "training_step_time": 0.38902759552001953
    },
    {
      "epoch": 3.5107421875e-05,
      "model_forward_time": 0.11492657661437988,
      "step": 5752
    },
    {
      "epoch": 3.5107421875e-05,
      "step": 5752,
      "training_step_time": 0.38885068893432617
    },
    {
      "epoch": 3.5113525390625e-05,
      "model_forward_time": 0.1152186393737793,
      "step": 5753
    },
    {
      "epoch": 3.5113525390625e-05,
      "step": 5753,
      "training_step_time": 0.3974275588989258
    },
    {
      "epoch": 3.511962890625e-05,
      "model_forward_time": 0.11520719528198242,
      "step": 5754
    },
    {
      "epoch": 3.511962890625e-05,
      "step": 5754,
      "training_step_time": 0.3971552848815918
    },
    {
      "epoch": 3.5125732421875e-05,
      "model_forward_time": 0.11499691009521484,
      "step": 5755
    },
    {
      "epoch": 3.5125732421875e-05,
      "step": 5755,
      "training_step_time": 0.4097118377685547
    },
    {
      "epoch": 3.51318359375e-05,
      "model_forward_time": 0.11533808708190918,
      "step": 5756
    },
    {
      "epoch": 3.51318359375e-05,
      "step": 5756,
      "training_step_time": 0.3984842300415039
    },
    {
      "epoch": 3.5137939453125e-05,
      "model_forward_time": 0.11605978012084961,
      "step": 5757
    },
    {
      "epoch": 3.5137939453125e-05,
      "step": 5757,
      "training_step_time": 0.3990309238433838
    },
    {
      "epoch": 3.514404296875e-05,
      "model_forward_time": 0.11542367935180664,
      "step": 5758
    },
    {
      "epoch": 3.514404296875e-05,
      "step": 5758,
      "training_step_time": 0.386646032333374
    },
    {
      "epoch": 3.5150146484375e-05,
      "model_forward_time": 0.11487460136413574,
      "step": 5759
    },
    {
      "epoch": 3.5150146484375e-05,
      "step": 5759,
      "training_step_time": 0.4566013813018799
    },
    {
      "epoch": 3.515625e-05,
      "grad_norm": 0.23121069371700287,
      "learning_rate": 9.942260825371358e-05,
      "loss": 0.0877,
      "step": 5760
    },
    {
      "epoch": 3.515625e-05,
      "model_forward_time": 0.11564755439758301,
      "step": 5760
    },
    {
      "epoch": 3.515625e-05,
      "step": 5760,
      "training_step_time": 0.36980390548706055
    },
    {
      "epoch": 3.5162353515625e-05,
      "model_forward_time": 0.1149442195892334,
      "step": 5761
    },
    {
      "epoch": 3.5162353515625e-05,
      "step": 5761,
      "training_step_time": 0.4366037845611572
    },
    {
      "epoch": 3.516845703125e-05,
      "model_forward_time": 0.11503124237060547,
      "step": 5762
    },
    {
      "epoch": 3.516845703125e-05,
      "step": 5762,
      "training_step_time": 0.42075085639953613
    },
    {
      "epoch": 3.5174560546875e-05,
      "model_forward_time": 0.11515927314758301,
      "step": 5763
    },
    {
      "epoch": 3.5174560546875e-05,
      "step": 5763,
      "training_step_time": 0.4883613586425781
    },
    {
      "epoch": 3.51806640625e-05,
      "model_forward_time": 0.11471772193908691,
      "step": 5764
    },
    {
      "epoch": 3.51806640625e-05,
      "step": 5764,
      "training_step_time": 0.38362741470336914
    },
    {
      "epoch": 3.5186767578125e-05,
      "model_forward_time": 0.11484503746032715,
      "step": 5765
    },
    {
      "epoch": 3.5186767578125e-05,
      "step": 5765,
      "training_step_time": 0.3974294662475586
    },
    {
      "epoch": 3.519287109375e-05,
      "model_forward_time": 0.11451840400695801,
      "step": 5766
    },
    {
      "epoch": 3.519287109375e-05,
      "step": 5766,
      "training_step_time": 0.391920804977417
    },
    {
      "epoch": 3.5198974609375e-05,
      "model_forward_time": 0.11522173881530762,
      "step": 5767
    },
    {
      "epoch": 3.5198974609375e-05,
      "step": 5767,
      "training_step_time": 0.39049339294433594
    },
    {
      "epoch": 3.5205078125e-05,
      "model_forward_time": 0.11505341529846191,
      "step": 5768
    },
    {
      "epoch": 3.5205078125e-05,
      "step": 5768,
      "training_step_time": 0.4048807621002197
    },
    {
      "epoch": 3.5211181640625e-05,
      "model_forward_time": 0.11466360092163086,
      "step": 5769
    },
    {
      "epoch": 3.5211181640625e-05,
      "step": 5769,
      "training_step_time": 0.38633131980895996
    },
    {
      "epoch": 3.521728515625e-05,
      "grad_norm": 0.23065687716007233,
      "learning_rate": 9.941842482046828e-05,
      "loss": 0.0881,
      "step": 5770
    },
    {
      "epoch": 3.521728515625e-05,
      "model_forward_time": 0.11542201042175293,
      "step": 5770
    },
    {
      "epoch": 3.521728515625e-05,
      "step": 5770,
      "training_step_time": 0.4418618679046631
    },
    {
      "epoch": 3.5223388671875e-05,
      "model_forward_time": 0.11483073234558105,
      "step": 5771
    },
    {
      "epoch": 3.5223388671875e-05,
      "step": 5771,
      "training_step_time": 0.3872537612915039
    },
    {
      "epoch": 3.52294921875e-05,
      "model_forward_time": 0.11527156829833984,
      "step": 5772
    },
    {
      "epoch": 3.52294921875e-05,
      "step": 5772,
      "training_step_time": 0.38778114318847656
    },
    {
      "epoch": 3.5235595703125e-05,
      "model_forward_time": 0.11563229560852051,
      "step": 5773
    },
    {
      "epoch": 3.5235595703125e-05,
      "step": 5773,
      "training_step_time": 0.39528369903564453
    },
    {
      "epoch": 3.524169921875e-05,
      "model_forward_time": 0.1152806282043457,
      "step": 5774
    },
    {
      "epoch": 3.524169921875e-05,
      "step": 5774,
      "training_step_time": 0.4456770420074463
    },
    {
      "epoch": 3.5247802734375e-05,
      "model_forward_time": 0.11570334434509277,
      "step": 5775
    },
    {
      "epoch": 3.5247802734375e-05,
      "step": 5775,
      "training_step_time": 0.41368961334228516
    },
    {
      "epoch": 3.525390625e-05,
      "model_forward_time": 0.11541295051574707,
      "step": 5776
    },
    {
      "epoch": 3.525390625e-05,
      "step": 5776,
      "training_step_time": 0.458219051361084
    },
    {
      "epoch": 3.5260009765625e-05,
      "model_forward_time": 0.11473655700683594,
      "step": 5777
    },
    {
      "epoch": 3.5260009765625e-05,
      "step": 5777,
      "training_step_time": 0.5137536525726318
    },
    {
      "epoch": 3.526611328125e-05,
      "model_forward_time": 0.11459994316101074,
      "step": 5778
    },
    {
      "epoch": 3.526611328125e-05,
      "step": 5778,
      "training_step_time": 0.5025589466094971
    },
    {
      "epoch": 3.5272216796875e-05,
      "model_forward_time": 0.11420011520385742,
      "step": 5779
    },
    {
      "epoch": 3.5272216796875e-05,
      "step": 5779,
      "training_step_time": 0.3914041519165039
    },
    {
      "epoch": 3.52783203125e-05,
      "grad_norm": 0.21559879183769226,
      "learning_rate": 9.941422637521035e-05,
      "loss": 0.09,
      "step": 5780
    },
    {
      "epoch": 3.52783203125e-05,
      "model_forward_time": 0.1145939826965332,
      "step": 5780
    },
    {
      "epoch": 3.52783203125e-05,
      "step": 5780,
      "training_step_time": 0.39965057373046875
    },
    {
      "epoch": 3.5284423828125e-05,
      "model_forward_time": 0.1148691177368164,
      "step": 5781
    },
    {
      "epoch": 3.5284423828125e-05,
      "step": 5781,
      "training_step_time": 0.389240026473999
    },
    {
      "epoch": 3.529052734375e-05,
      "model_forward_time": 0.11457586288452148,
      "step": 5782
    },
    {
      "epoch": 3.529052734375e-05,
      "step": 5782,
      "training_step_time": 0.40076112747192383
    },
    {
      "epoch": 3.5296630859375e-05,
      "model_forward_time": 0.11477160453796387,
      "step": 5783
    },
    {
      "epoch": 3.5296630859375e-05,
      "step": 5783,
      "training_step_time": 0.40734243392944336
    },
    {
      "epoch": 3.5302734375e-05,
      "model_forward_time": 0.11470651626586914,
      "step": 5784
    },
    {
      "epoch": 3.5302734375e-05,
      "step": 5784,
      "training_step_time": 0.39696431159973145
    },
    {
      "epoch": 3.5308837890625e-05,
      "model_forward_time": 0.1152653694152832,
      "step": 5785
    },
    {
      "epoch": 3.5308837890625e-05,
      "step": 5785,
      "training_step_time": 0.4011867046356201
    },
    {
      "epoch": 3.531494140625e-05,
      "model_forward_time": 0.11488604545593262,
      "step": 5786
    },
    {
      "epoch": 3.531494140625e-05,
      "step": 5786,
      "training_step_time": 0.3987290859222412
    },
    {
      "epoch": 3.5321044921875e-05,
      "model_forward_time": 0.11562061309814453,
      "step": 5787
    },
    {
      "epoch": 3.5321044921875e-05,
      "step": 5787,
      "training_step_time": 0.39327144622802734
    },
    {
      "epoch": 3.53271484375e-05,
      "model_forward_time": 0.11501836776733398,
      "step": 5788
    },
    {
      "epoch": 3.53271484375e-05,
      "step": 5788,
      "training_step_time": 0.3903331756591797
    },
    {
      "epoch": 3.5333251953125e-05,
      "model_forward_time": 0.1153097152709961,
      "step": 5789
    },
    {
      "epoch": 3.5333251953125e-05,
      "step": 5789,
      "training_step_time": 0.37088799476623535
    },
    {
      "epoch": 3.533935546875e-05,
      "grad_norm": 0.24528732895851135,
      "learning_rate": 9.941001291921512e-05,
      "loss": 0.0859,
      "step": 5790
    },
    {
      "epoch": 3.533935546875e-05,
      "model_forward_time": 0.11573338508605957,
      "step": 5790
    },
    {
      "epoch": 3.533935546875e-05,
      "step": 5790,
      "training_step_time": 0.4199965000152588
    },
    {
      "epoch": 3.5345458984375e-05,
      "model_forward_time": 0.11554622650146484,
      "step": 5791
    },
    {
      "epoch": 3.5345458984375e-05,
      "step": 5791,
      "training_step_time": 0.4111299514770508
    },
    {
      "epoch": 3.53515625e-05,
      "model_forward_time": 0.11543750762939453,
      "step": 5792
    },
    {
      "epoch": 3.53515625e-05,
      "step": 5792,
      "training_step_time": 0.39150261878967285
    },
    {
      "epoch": 3.5357666015625e-05,
      "model_forward_time": 0.11494588851928711,
      "step": 5793
    },
    {
      "epoch": 3.5357666015625e-05,
      "step": 5793,
      "training_step_time": 0.39940786361694336
    },
    {
      "epoch": 3.536376953125e-05,
      "model_forward_time": 0.11493372917175293,
      "step": 5794
    },
    {
      "epoch": 3.536376953125e-05,
      "step": 5794,
      "training_step_time": 0.3938019275665283
    },
    {
      "epoch": 3.5369873046875e-05,
      "model_forward_time": 0.12471318244934082,
      "step": 5795
    },
    {
      "epoch": 3.5369873046875e-05,
      "step": 5795,
      "training_step_time": 0.39676856994628906
    },
    {
      "epoch": 3.53759765625e-05,
      "model_forward_time": 0.11502361297607422,
      "step": 5796
    },
    {
      "epoch": 3.53759765625e-05,
      "step": 5796,
      "training_step_time": 0.3944664001464844
    },
    {
      "epoch": 3.5382080078125e-05,
      "model_forward_time": 0.11498451232910156,
      "step": 5797
    },
    {
      "epoch": 3.5382080078125e-05,
      "step": 5797,
      "training_step_time": 0.41986560821533203
    },
    {
      "epoch": 3.538818359375e-05,
      "model_forward_time": 0.11676907539367676,
      "step": 5798
    },
    {
      "epoch": 3.538818359375e-05,
      "step": 5798,
      "training_step_time": 0.43712806701660156
    },
    {
      "epoch": 3.5394287109375e-05,
      "model_forward_time": 0.11534810066223145,
      "step": 5799
    },
    {
      "epoch": 3.5394287109375e-05,
      "step": 5799,
      "training_step_time": 0.3870103359222412
    },
    {
      "epoch": 3.5400390625e-05,
      "grad_norm": 0.2588684856891632,
      "learning_rate": 9.940578445376258e-05,
      "loss": 0.0861,
      "step": 5800
    },
    {
      "epoch": 3.5400390625e-05,
      "model_forward_time": 0.11738801002502441,
      "step": 5800
    },
    {
      "epoch": 3.5400390625e-05,
      "step": 5800,
      "training_step_time": 0.3781614303588867
    },
    {
      "epoch": 3.5406494140625e-05,
      "model_forward_time": 0.11536145210266113,
      "step": 5801
    },
    {
      "epoch": 3.5406494140625e-05,
      "step": 5801,
      "training_step_time": 0.3882322311401367
    },
    {
      "epoch": 3.541259765625e-05,
      "model_forward_time": 0.11524033546447754,
      "step": 5802
    },
    {
      "epoch": 3.541259765625e-05,
      "step": 5802,
      "training_step_time": 0.3910865783691406
    },
    {
      "epoch": 3.5418701171875e-05,
      "model_forward_time": 0.11575150489807129,
      "step": 5803
    },
    {
      "epoch": 3.5418701171875e-05,
      "step": 5803,
      "training_step_time": 0.40715885162353516
    },
    {
      "epoch": 3.54248046875e-05,
      "model_forward_time": 0.11453771591186523,
      "step": 5804
    },
    {
      "epoch": 3.54248046875e-05,
      "step": 5804,
      "training_step_time": 0.4316122531890869
    },
    {
      "epoch": 3.5430908203125e-05,
      "model_forward_time": 0.11511588096618652,
      "step": 5805
    },
    {
      "epoch": 3.5430908203125e-05,
      "step": 5805,
      "training_step_time": 0.4745032787322998
    },
    {
      "epoch": 3.543701171875e-05,
      "model_forward_time": 0.11529660224914551,
      "step": 5806
    },
    {
      "epoch": 3.543701171875e-05,
      "step": 5806,
      "training_step_time": 0.4629507064819336
    },
    {
      "epoch": 3.5443115234375e-05,
      "model_forward_time": 0.11508417129516602,
      "step": 5807
    },
    {
      "epoch": 3.5443115234375e-05,
      "step": 5807,
      "training_step_time": 0.4457738399505615
    },
    {
      "epoch": 3.544921875e-05,
      "model_forward_time": 0.11504554748535156,
      "step": 5808
    },
    {
      "epoch": 3.544921875e-05,
      "step": 5808,
      "training_step_time": 0.4949817657470703
    },
    {
      "epoch": 3.5455322265625e-05,
      "model_forward_time": 0.11487889289855957,
      "step": 5809
    },
    {
      "epoch": 3.5455322265625e-05,
      "step": 5809,
      "training_step_time": 0.38710808753967285
    },
    {
      "epoch": 3.546142578125e-05,
      "grad_norm": 0.22784455120563507,
      "learning_rate": 9.940154098013723e-05,
      "loss": 0.0851,
      "step": 5810
    },
    {
      "epoch": 3.546142578125e-05,
      "model_forward_time": 0.11461353302001953,
      "step": 5810
    },
    {
      "epoch": 3.546142578125e-05,
      "step": 5810,
      "training_step_time": 0.43004894256591797
    },
    {
      "epoch": 3.5467529296875e-05,
      "model_forward_time": 0.11458539962768555,
      "step": 5811
    },
    {
      "epoch": 3.5467529296875e-05,
      "step": 5811,
      "training_step_time": 0.3938870429992676
    },
    {
      "epoch": 3.54736328125e-05,
      "model_forward_time": 0.11462569236755371,
      "step": 5812
    },
    {
      "epoch": 3.54736328125e-05,
      "step": 5812,
      "training_step_time": 0.3753187656402588
    },
    {
      "epoch": 3.5479736328125e-05,
      "model_forward_time": 0.11480593681335449,
      "step": 5813
    },
    {
      "epoch": 3.5479736328125e-05,
      "step": 5813,
      "training_step_time": 0.38898587226867676
    },
    {
      "epoch": 3.548583984375e-05,
      "model_forward_time": 0.1152336597442627,
      "step": 5814
    },
    {
      "epoch": 3.548583984375e-05,
      "step": 5814,
      "training_step_time": 0.4085559844970703
    },
    {
      "epoch": 3.5491943359375e-05,
      "model_forward_time": 0.11518096923828125,
      "step": 5815
    },
    {
      "epoch": 3.5491943359375e-05,
      "step": 5815,
      "training_step_time": 0.39435648918151855
    },
    {
      "epoch": 3.5498046875e-05,
      "model_forward_time": 0.11499905586242676,
      "step": 5816
    },
    {
      "epoch": 3.5498046875e-05,
      "step": 5816,
      "training_step_time": 0.401871919631958
    },
    {
      "epoch": 3.5504150390625e-05,
      "model_forward_time": 0.11489629745483398,
      "step": 5817
    },
    {
      "epoch": 3.5504150390625e-05,
      "step": 5817,
      "training_step_time": 0.3925321102142334
    },
    {
      "epoch": 3.551025390625e-05,
      "model_forward_time": 0.11488509178161621,
      "step": 5818
    },
    {
      "epoch": 3.551025390625e-05,
      "step": 5818,
      "training_step_time": 0.3749263286590576
    },
    {
      "epoch": 3.5516357421875e-05,
      "model_forward_time": 0.11554837226867676,
      "step": 5819
    },
    {
      "epoch": 3.5516357421875e-05,
      "step": 5819,
      "training_step_time": 0.46662354469299316
    },
    {
      "epoch": 3.55224609375e-05,
      "grad_norm": 0.3768160045146942,
      "learning_rate": 9.939728249962807e-05,
      "loss": 0.0812,
      "step": 5820
    },
    {
      "epoch": 3.55224609375e-05,
      "model_forward_time": 0.11492133140563965,
      "step": 5820
    },
    {
      "epoch": 3.55224609375e-05,
      "step": 5820,
      "training_step_time": 0.45917248725891113
    },
    {
      "epoch": 3.5528564453125e-05,
      "model_forward_time": 0.11480188369750977,
      "step": 5821
    },
    {
      "epoch": 3.5528564453125e-05,
      "step": 5821,
      "training_step_time": 0.4393179416656494
    },
    {
      "epoch": 3.553466796875e-05,
      "model_forward_time": 0.11560773849487305,
      "step": 5822
    },
    {
      "epoch": 3.553466796875e-05,
      "step": 5822,
      "training_step_time": 0.40184855461120605
    },
    {
      "epoch": 3.5540771484375e-05,
      "model_forward_time": 0.11478972434997559,
      "step": 5823
    },
    {
      "epoch": 3.5540771484375e-05,
      "step": 5823,
      "training_step_time": 0.41700100898742676
    },
    {
      "epoch": 3.5546875e-05,
      "model_forward_time": 0.11480069160461426,
      "step": 5824
    },
    {
      "epoch": 3.5546875e-05,
      "step": 5824,
      "training_step_time": 0.39025425910949707
    },
    {
      "epoch": 3.5552978515625e-05,
      "model_forward_time": 0.11495828628540039,
      "step": 5825
    },
    {
      "epoch": 3.5552978515625e-05,
      "step": 5825,
      "training_step_time": 0.42992258071899414
    },
    {
      "epoch": 3.555908203125e-05,
      "model_forward_time": 0.11478543281555176,
      "step": 5826
    },
    {
      "epoch": 3.555908203125e-05,
      "step": 5826,
      "training_step_time": 0.3843846321105957
    },
    {
      "epoch": 3.5565185546875e-05,
      "model_forward_time": 0.11532449722290039,
      "step": 5827
    },
    {
      "epoch": 3.5565185546875e-05,
      "step": 5827,
      "training_step_time": 0.38999390602111816
    },
    {
      "epoch": 3.55712890625e-05,
      "model_forward_time": 0.11507678031921387,
      "step": 5828
    },
    {
      "epoch": 3.55712890625e-05,
      "step": 5828,
      "training_step_time": 0.3960287570953369
    },
    {
      "epoch": 3.5577392578125e-05,
      "model_forward_time": 0.11513113975524902,
      "step": 5829
    },
    {
      "epoch": 3.5577392578125e-05,
      "step": 5829,
      "training_step_time": 0.390758752822876
    },
    {
      "epoch": 3.558349609375e-05,
      "grad_norm": 0.21917422115802765,
      "learning_rate": 9.939300901352876e-05,
      "loss": 0.0922,
      "step": 5830
    },
    {
      "epoch": 3.558349609375e-05,
      "model_forward_time": 0.11487197875976562,
      "step": 5830
    },
    {
      "epoch": 3.558349609375e-05,
      "step": 5830,
      "training_step_time": 0.3869776725769043
    },
    {
      "epoch": 3.5589599609375e-05,
      "model_forward_time": 0.11495208740234375,
      "step": 5831
    },
    {
      "epoch": 3.5589599609375e-05,
      "step": 5831,
      "training_step_time": 0.3897974491119385
    },
    {
      "epoch": 3.5595703125e-05,
      "model_forward_time": 0.1155850887298584,
      "step": 5832
    },
    {
      "epoch": 3.5595703125e-05,
      "step": 5832,
      "training_step_time": 0.39217185974121094
    },
    {
      "epoch": 3.5601806640625e-05,
      "model_forward_time": 0.11596202850341797,
      "step": 5833
    },
    {
      "epoch": 3.5601806640625e-05,
      "step": 5833,
      "training_step_time": 0.38434624671936035
    },
    {
      "epoch": 3.560791015625e-05,
      "model_forward_time": 0.11646461486816406,
      "step": 5834
    },
    {
      "epoch": 3.560791015625e-05,
      "step": 5834,
      "training_step_time": 0.47082018852233887
    },
    {
      "epoch": 3.5614013671875e-05,
      "model_forward_time": 0.11580085754394531,
      "step": 5835
    },
    {
      "epoch": 3.5614013671875e-05,
      "step": 5835,
      "training_step_time": 0.49759864807128906
    },
    {
      "epoch": 3.56201171875e-05,
      "model_forward_time": 0.11547970771789551,
      "step": 5836
    },
    {
      "epoch": 3.56201171875e-05,
      "step": 5836,
      "training_step_time": 0.41140294075012207
    },
    {
      "epoch": 3.5626220703125e-05,
      "model_forward_time": 0.11468005180358887,
      "step": 5837
    },
    {
      "epoch": 3.5626220703125e-05,
      "step": 5837,
      "training_step_time": 0.4847278594970703
    },
    {
      "epoch": 3.563232421875e-05,
      "model_forward_time": 0.11533188819885254,
      "step": 5838
    },
    {
      "epoch": 3.563232421875e-05,
      "step": 5838,
      "training_step_time": 0.4971141815185547
    },
    {
      "epoch": 3.5638427734375e-05,
      "model_forward_time": 0.1149599552154541,
      "step": 5839
    },
    {
      "epoch": 3.5638427734375e-05,
      "step": 5839,
      "training_step_time": 0.3883523941040039
    },
    {
      "epoch": 3.564453125e-05,
      "grad_norm": 0.26247385144233704,
      "learning_rate": 9.938872052313746e-05,
      "loss": 0.0873,
      "step": 5840
    },
    {
      "epoch": 3.564453125e-05,
      "model_forward_time": 0.11439204216003418,
      "step": 5840
    },
    {
      "epoch": 3.564453125e-05,
      "step": 5840,
      "training_step_time": 0.38969874382019043
    },
    {
      "epoch": 3.5650634765625e-05,
      "model_forward_time": 0.11460614204406738,
      "step": 5841
    },
    {
      "epoch": 3.5650634765625e-05,
      "step": 5841,
      "training_step_time": 0.40053534507751465
    },
    {
      "epoch": 3.565673828125e-05,
      "model_forward_time": 0.11538529396057129,
      "step": 5842
    },
    {
      "epoch": 3.565673828125e-05,
      "step": 5842,
      "training_step_time": 0.3818941116333008
    },
    {
      "epoch": 3.5662841796875e-05,
      "model_forward_time": 0.11570954322814941,
      "step": 5843
    },
    {
      "epoch": 3.5662841796875e-05,
      "step": 5843,
      "training_step_time": 0.39411449432373047
    },
    {
      "epoch": 3.56689453125e-05,
      "model_forward_time": 0.11464118957519531,
      "step": 5844
    },
    {
      "epoch": 3.56689453125e-05,
      "step": 5844,
      "training_step_time": 0.39356231689453125
    },
    {
      "epoch": 3.5675048828125e-05,
      "model_forward_time": 0.11521482467651367,
      "step": 5845
    },
    {
      "epoch": 3.5675048828125e-05,
      "step": 5845,
      "training_step_time": 0.3966183662414551
    },
    {
      "epoch": 3.568115234375e-05,
      "model_forward_time": 0.11448264122009277,
      "step": 5846
    },
    {
      "epoch": 3.568115234375e-05,
      "step": 5846,
      "training_step_time": 0.38932156562805176
    },
    {
      "epoch": 3.5687255859375e-05,
      "model_forward_time": 0.11539077758789062,
      "step": 5847
    },
    {
      "epoch": 3.5687255859375e-05,
      "step": 5847,
      "training_step_time": 0.38858771324157715
    },
    {
      "epoch": 3.5693359375e-05,
      "model_forward_time": 0.11608338356018066,
      "step": 5848
    },
    {
      "epoch": 3.5693359375e-05,
      "step": 5848,
      "training_step_time": 0.46784353256225586
    },
    {
      "epoch": 3.5699462890625e-05,
      "model_forward_time": 0.11587762832641602,
      "step": 5849
    },
    {
      "epoch": 3.5699462890625e-05,
      "step": 5849,
      "training_step_time": 0.5014348030090332
    },
    {
      "epoch": 3.570556640625e-05,
      "grad_norm": 0.2781771719455719,
      "learning_rate": 9.938441702975689e-05,
      "loss": 0.0905,
      "step": 5850
    },
    {
      "epoch": 3.570556640625e-05,
      "model_forward_time": 0.11488699913024902,
      "step": 5850
    },
    {
      "epoch": 3.570556640625e-05,
      "step": 5850,
      "training_step_time": 0.4641551971435547
    },
    {
      "epoch": 3.5711669921875e-05,
      "model_forward_time": 0.11481261253356934,
      "step": 5851
    },
    {
      "epoch": 3.5711669921875e-05,
      "step": 5851,
      "training_step_time": 0.47820258140563965
    },
    {
      "epoch": 3.57177734375e-05,
      "model_forward_time": 0.11523175239562988,
      "step": 5852
    },
    {
      "epoch": 3.57177734375e-05,
      "step": 5852,
      "training_step_time": 0.44033336639404297
    },
    {
      "epoch": 3.5723876953125e-05,
      "model_forward_time": 0.11444354057312012,
      "step": 5853
    },
    {
      "epoch": 3.5723876953125e-05,
      "step": 5853,
      "training_step_time": 0.4078845977783203
    },
    {
      "epoch": 3.572998046875e-05,
      "model_forward_time": 0.11515259742736816,
      "step": 5854
    },
    {
      "epoch": 3.572998046875e-05,
      "step": 5854,
      "training_step_time": 0.38043665885925293
    },
    {
      "epoch": 3.5736083984375e-05,
      "model_forward_time": 0.11491703987121582,
      "step": 5855
    },
    {
      "epoch": 3.5736083984375e-05,
      "step": 5855,
      "training_step_time": 0.3889009952545166
    },
    {
      "epoch": 3.57421875e-05,
      "model_forward_time": 0.11513066291809082,
      "step": 5856
    },
    {
      "epoch": 3.57421875e-05,
      "step": 5856,
      "training_step_time": 0.39463329315185547
    },
    {
      "epoch": 3.5748291015625e-05,
      "model_forward_time": 0.11513018608093262,
      "step": 5857
    },
    {
      "epoch": 3.5748291015625e-05,
      "step": 5857,
      "training_step_time": 0.3902461528778076
    },
    {
      "epoch": 3.575439453125e-05,
      "model_forward_time": 0.11519527435302734,
      "step": 5858
    },
    {
      "epoch": 3.575439453125e-05,
      "step": 5858,
      "training_step_time": 0.3906075954437256
    },
    {
      "epoch": 3.5760498046875e-05,
      "model_forward_time": 0.11565303802490234,
      "step": 5859
    },
    {
      "epoch": 3.5760498046875e-05,
      "step": 5859,
      "training_step_time": 0.40266871452331543
    },
    {
      "epoch": 3.57666015625e-05,
      "grad_norm": 0.23744770884513855,
      "learning_rate": 9.938009853469436e-05,
      "loss": 0.0834,
      "step": 5860
    },
    {
      "epoch": 3.57666015625e-05,
      "model_forward_time": 0.11544060707092285,
      "step": 5860
    },
    {
      "epoch": 3.57666015625e-05,
      "step": 5860,
      "training_step_time": 0.3878653049468994
    },
    {
      "epoch": 3.5772705078125e-05,
      "model_forward_time": 0.1153113842010498,
      "step": 5861
    },
    {
      "epoch": 3.5772705078125e-05,
      "step": 5861,
      "training_step_time": 0.3907921314239502
    },
    {
      "epoch": 3.577880859375e-05,
      "model_forward_time": 0.11501002311706543,
      "step": 5862
    },
    {
      "epoch": 3.577880859375e-05,
      "step": 5862,
      "training_step_time": 0.39006733894348145
    },
    {
      "epoch": 3.5784912109375e-05,
      "model_forward_time": 0.11557579040527344,
      "step": 5863
    },
    {
      "epoch": 3.5784912109375e-05,
      "step": 5863,
      "training_step_time": 0.4879183769226074
    },
    {
      "epoch": 3.5791015625e-05,
      "model_forward_time": 0.1150054931640625,
      "step": 5864
    },
    {
      "epoch": 3.5791015625e-05,
      "step": 5864,
      "training_step_time": 0.4812595844268799
    },
    {
      "epoch": 3.5797119140625e-05,
      "model_forward_time": 0.11555218696594238,
      "step": 5865
    },
    {
      "epoch": 3.5797119140625e-05,
      "step": 5865,
      "training_step_time": 0.4692404270172119
    },
    {
      "epoch": 3.580322265625e-05,
      "model_forward_time": 0.1149909496307373,
      "step": 5866
    },
    {
      "epoch": 3.580322265625e-05,
      "step": 5866,
      "training_step_time": 0.4530322551727295
    },
    {
      "epoch": 3.5809326171875e-05,
      "model_forward_time": 0.11535906791687012,
      "step": 5867
    },
    {
      "epoch": 3.5809326171875e-05,
      "step": 5867,
      "training_step_time": 0.42909669876098633
    },
    {
      "epoch": 3.58154296875e-05,
      "model_forward_time": 0.11464738845825195,
      "step": 5868
    },
    {
      "epoch": 3.58154296875e-05,
      "step": 5868,
      "training_step_time": 0.4541819095611572
    },
    {
      "epoch": 3.5821533203125e-05,
      "model_forward_time": 0.1147150993347168,
      "step": 5869
    },
    {
      "epoch": 3.5821533203125e-05,
      "step": 5869,
      "training_step_time": 0.3959798812866211
    },
    {
      "epoch": 3.582763671875e-05,
      "grad_norm": 0.3373236358165741,
      "learning_rate": 9.93757650392617e-05,
      "loss": 0.0835,
      "step": 5870
    },
    {
      "epoch": 3.582763671875e-05,
      "model_forward_time": 0.11464405059814453,
      "step": 5870
    },
    {
      "epoch": 3.582763671875e-05,
      "step": 5870,
      "training_step_time": 0.3996603488922119
    },
    {
      "epoch": 3.5833740234375e-05,
      "model_forward_time": 0.11478590965270996,
      "step": 5871
    },
    {
      "epoch": 3.5833740234375e-05,
      "step": 5871,
      "training_step_time": 0.39339590072631836
    },
    {
      "epoch": 3.583984375e-05,
      "model_forward_time": 0.11562132835388184,
      "step": 5872
    },
    {
      "epoch": 3.583984375e-05,
      "step": 5872,
      "training_step_time": 0.3850276470184326
    },
    {
      "epoch": 3.5845947265625e-05,
      "model_forward_time": 0.1154477596282959,
      "step": 5873
    },
    {
      "epoch": 3.5845947265625e-05,
      "step": 5873,
      "training_step_time": 0.3882434368133545
    },
    {
      "epoch": 3.585205078125e-05,
      "model_forward_time": 0.11483025550842285,
      "step": 5874
    },
    {
      "epoch": 3.585205078125e-05,
      "step": 5874,
      "training_step_time": 0.38469624519348145
    },
    {
      "epoch": 3.5858154296875e-05,
      "model_forward_time": 0.11545705795288086,
      "step": 5875
    },
    {
      "epoch": 3.5858154296875e-05,
      "step": 5875,
      "training_step_time": 0.3935120105743408
    },
    {
      "epoch": 3.58642578125e-05,
      "model_forward_time": 0.1151266098022461,
      "step": 5876
    },
    {
      "epoch": 3.58642578125e-05,
      "step": 5876,
      "training_step_time": 0.38355374336242676
    },
    {
      "epoch": 3.5870361328125e-05,
      "model_forward_time": 0.11680865287780762,
      "step": 5877
    },
    {
      "epoch": 3.5870361328125e-05,
      "step": 5877,
      "training_step_time": 0.3948338031768799
    },
    {
      "epoch": 3.587646484375e-05,
      "model_forward_time": 0.11525917053222656,
      "step": 5878
    },
    {
      "epoch": 3.587646484375e-05,
      "step": 5878,
      "training_step_time": 0.480609655380249
    },
    {
      "epoch": 3.5882568359375e-05,
      "model_forward_time": 0.11508965492248535,
      "step": 5879
    },
    {
      "epoch": 3.5882568359375e-05,
      "step": 5879,
      "training_step_time": 0.4734618663787842
    },
    {
      "epoch": 3.5888671875e-05,
      "grad_norm": 0.29471203684806824,
      "learning_rate": 9.937141654477528e-05,
      "loss": 0.0936,
      "step": 5880
    },
    {
      "epoch": 3.5888671875e-05,
      "model_forward_time": 0.11658573150634766,
      "step": 5880
    },
    {
      "epoch": 3.5888671875e-05,
      "step": 5880,
      "training_step_time": 0.5150811672210693
    },
    {
      "epoch": 3.5894775390625e-05,
      "model_forward_time": 0.11494255065917969,
      "step": 5881
    },
    {
      "epoch": 3.5894775390625e-05,
      "step": 5881,
      "training_step_time": 0.4661691188812256
    },
    {
      "epoch": 3.590087890625e-05,
      "model_forward_time": 0.11606192588806152,
      "step": 5882
    },
    {
      "epoch": 3.590087890625e-05,
      "step": 5882,
      "training_step_time": 0.48323655128479004
    },
    {
      "epoch": 3.5906982421875e-05,
      "model_forward_time": 0.11477780342102051,
      "step": 5883
    },
    {
      "epoch": 3.5906982421875e-05,
      "step": 5883,
      "training_step_time": 0.38535356521606445
    },
    {
      "epoch": 3.59130859375e-05,
      "model_forward_time": 0.11513590812683105,
      "step": 5884
    },
    {
      "epoch": 3.59130859375e-05,
      "step": 5884,
      "training_step_time": 0.3979825973510742
    },
    {
      "epoch": 3.5919189453125e-05,
      "model_forward_time": 0.11543822288513184,
      "step": 5885
    },
    {
      "epoch": 3.5919189453125e-05,
      "step": 5885,
      "training_step_time": 0.38828420639038086
    },
    {
      "epoch": 3.592529296875e-05,
      "model_forward_time": 0.11455798149108887,
      "step": 5886
    },
    {
      "epoch": 3.592529296875e-05,
      "step": 5886,
      "training_step_time": 0.3875746726989746
    },
    {
      "epoch": 3.5931396484375e-05,
      "model_forward_time": 0.11480045318603516,
      "step": 5887
    },
    {
      "epoch": 3.5931396484375e-05,
      "step": 5887,
      "training_step_time": 0.4387168884277344
    },
    {
      "epoch": 3.59375e-05,
      "model_forward_time": 0.11480045318603516,
      "step": 5888
    },
    {
      "epoch": 3.59375e-05,
      "step": 5888,
      "training_step_time": 0.39239501953125
    },
    {
      "epoch": 3.5943603515625e-05,
      "model_forward_time": 0.11526775360107422,
      "step": 5889
    },
    {
      "epoch": 3.5943603515625e-05,
      "step": 5889,
      "training_step_time": 0.3882451057434082
    },
    {
      "epoch": 3.594970703125e-05,
      "grad_norm": 0.33340466022491455,
      "learning_rate": 9.936705305255612e-05,
      "loss": 0.0851,
      "step": 5890
    },
    {
      "epoch": 3.594970703125e-05,
      "model_forward_time": 0.11492300033569336,
      "step": 5890
    },
    {
      "epoch": 3.594970703125e-05,
      "step": 5890,
      "training_step_time": 0.389270544052124
    },
    {
      "epoch": 3.5955810546875e-05,
      "model_forward_time": 0.11518144607543945,
      "step": 5891
    },
    {
      "epoch": 3.5955810546875e-05,
      "step": 5891,
      "training_step_time": 0.3949766159057617
    },
    {
      "epoch": 3.59619140625e-05,
      "model_forward_time": 0.11506438255310059,
      "step": 5892
    },
    {
      "epoch": 3.59619140625e-05,
      "step": 5892,
      "training_step_time": 0.41204309463500977
    },
    {
      "epoch": 3.5968017578125e-05,
      "model_forward_time": 0.1152040958404541,
      "step": 5893
    },
    {
      "epoch": 3.5968017578125e-05,
      "step": 5893,
      "training_step_time": 0.4979822635650635
    },
    {
      "epoch": 3.597412109375e-05,
      "model_forward_time": 0.11616373062133789,
      "step": 5894
    },
    {
      "epoch": 3.597412109375e-05,
      "step": 5894,
      "training_step_time": 0.4299638271331787
    },
    {
      "epoch": 3.5980224609375e-05,
      "model_forward_time": 0.11555123329162598,
      "step": 5895
    },
    {
      "epoch": 3.5980224609375e-05,
      "step": 5895,
      "training_step_time": 0.47554588317871094
    },
    {
      "epoch": 3.5986328125e-05,
      "model_forward_time": 0.11514496803283691,
      "step": 5896
    },
    {
      "epoch": 3.5986328125e-05,
      "step": 5896,
      "training_step_time": 0.42765140533447266
    },
    {
      "epoch": 3.5992431640625e-05,
      "model_forward_time": 0.11526250839233398,
      "step": 5897
    },
    {
      "epoch": 3.5992431640625e-05,
      "step": 5897,
      "training_step_time": 0.45883893966674805
    },
    {
      "epoch": 3.599853515625e-05,
      "model_forward_time": 0.11470890045166016,
      "step": 5898
    },
    {
      "epoch": 3.599853515625e-05,
      "step": 5898,
      "training_step_time": 0.3831629753112793
    },
    {
      "epoch": 3.6004638671875e-05,
      "model_forward_time": 0.11493182182312012,
      "step": 5899
    },
    {
      "epoch": 3.6004638671875e-05,
      "step": 5899,
      "training_step_time": 0.39407777786254883
    },
    {
      "epoch": 3.60107421875e-05,
      "grad_norm": 0.25396445393562317,
      "learning_rate": 9.936267456392971e-05,
      "loss": 0.0879,
      "step": 5900
    },
    {
      "epoch": 3.60107421875e-05,
      "model_forward_time": 0.11511874198913574,
      "step": 5900
    },
    {
      "epoch": 3.60107421875e-05,
      "step": 5900,
      "training_step_time": 0.3876931667327881
    },
    {
      "epoch": 3.6016845703125e-05,
      "model_forward_time": 0.1156001091003418,
      "step": 5901
    },
    {
      "epoch": 3.6016845703125e-05,
      "step": 5901,
      "training_step_time": 0.38300609588623047
    },
    {
      "epoch": 3.602294921875e-05,
      "model_forward_time": 0.11515116691589355,
      "step": 5902
    },
    {
      "epoch": 3.602294921875e-05,
      "step": 5902,
      "training_step_time": 0.3872091770172119
    },
    {
      "epoch": 3.6029052734375e-05,
      "model_forward_time": 0.11572432518005371,
      "step": 5903
    },
    {
      "epoch": 3.6029052734375e-05,
      "step": 5903,
      "training_step_time": 0.3923490047454834
    },
    {
      "epoch": 3.603515625e-05,
      "model_forward_time": 0.1154778003692627,
      "step": 5904
    },
    {
      "epoch": 3.603515625e-05,
      "step": 5904,
      "training_step_time": 0.39925336837768555
    },
    {
      "epoch": 3.6041259765625e-05,
      "model_forward_time": 0.1158289909362793,
      "step": 5905
    },
    {
      "epoch": 3.6041259765625e-05,
      "step": 5905,
      "training_step_time": 0.39339184761047363
    },
    {
      "epoch": 3.604736328125e-05,
      "model_forward_time": 0.11477184295654297,
      "step": 5906
    },
    {
      "epoch": 3.604736328125e-05,
      "step": 5906,
      "training_step_time": 0.4249460697174072
    },
    {
      "epoch": 3.6053466796875e-05,
      "model_forward_time": 0.11469697952270508,
      "step": 5907
    },
    {
      "epoch": 3.6053466796875e-05,
      "step": 5907,
      "training_step_time": 0.3670637607574463
    },
    {
      "epoch": 3.60595703125e-05,
      "model_forward_time": 0.11505389213562012,
      "step": 5908
    },
    {
      "epoch": 3.60595703125e-05,
      "step": 5908,
      "training_step_time": 0.4405794143676758
    },
    {
      "epoch": 3.6065673828125e-05,
      "model_forward_time": 0.11482858657836914,
      "step": 5909
    },
    {
      "epoch": 3.6065673828125e-05,
      "step": 5909,
      "training_step_time": 0.4802682399749756
    },
    {
      "epoch": 3.607177734375e-05,
      "grad_norm": 0.29818129539489746,
      "learning_rate": 9.93582810802261e-05,
      "loss": 0.085,
      "step": 5910
    },
    {
      "epoch": 3.607177734375e-05,
      "model_forward_time": 0.11456179618835449,
      "step": 5910
    },
    {
      "epoch": 3.607177734375e-05,
      "step": 5910,
      "training_step_time": 0.4411919116973877
    },
    {
      "epoch": 3.6077880859375e-05,
      "model_forward_time": 0.1143341064453125,
      "step": 5911
    },
    {
      "epoch": 3.6077880859375e-05,
      "step": 5911,
      "training_step_time": 0.4360690116882324
    },
    {
      "epoch": 3.6083984375e-05,
      "model_forward_time": 0.11550402641296387,
      "step": 5912
    },
    {
      "epoch": 3.6083984375e-05,
      "step": 5912,
      "training_step_time": 0.4257819652557373
    },
    {
      "epoch": 3.6090087890625e-05,
      "model_forward_time": 0.11447644233703613,
      "step": 5913
    },
    {
      "epoch": 3.6090087890625e-05,
      "step": 5913,
      "training_step_time": 0.3876223564147949
    },
    {
      "epoch": 3.609619140625e-05,
      "model_forward_time": 0.11471128463745117,
      "step": 5914
    },
    {
      "epoch": 3.609619140625e-05,
      "step": 5914,
      "training_step_time": 0.39846277236938477
    },
    {
      "epoch": 3.6102294921875e-05,
      "model_forward_time": 0.11548161506652832,
      "step": 5915
    },
    {
      "epoch": 3.6102294921875e-05,
      "step": 5915,
      "training_step_time": 0.3895540237426758
    },
    {
      "epoch": 3.61083984375e-05,
      "model_forward_time": 0.11522841453552246,
      "step": 5916
    },
    {
      "epoch": 3.61083984375e-05,
      "step": 5916,
      "training_step_time": 0.39812493324279785
    },
    {
      "epoch": 3.6114501953125e-05,
      "model_forward_time": 0.11487460136413574,
      "step": 5917
    },
    {
      "epoch": 3.6114501953125e-05,
      "step": 5917,
      "training_step_time": 0.39694738388061523
    },
    {
      "epoch": 3.612060546875e-05,
      "model_forward_time": 0.11518979072570801,
      "step": 5918
    },
    {
      "epoch": 3.612060546875e-05,
      "step": 5918,
      "training_step_time": 0.3934805393218994
    },
    {
      "epoch": 3.6126708984375e-05,
      "model_forward_time": 0.1145169734954834,
      "step": 5919
    },
    {
      "epoch": 3.6126708984375e-05,
      "step": 5919,
      "training_step_time": 0.3974487781524658
    },
    {
      "epoch": 3.61328125e-05,
      "grad_norm": 0.2876518666744232,
      "learning_rate": 9.935387260277993e-05,
      "loss": 0.0845,
      "step": 5920
    },
    {
      "epoch": 3.61328125e-05,
      "model_forward_time": 0.1158151626586914,
      "step": 5920
    },
    {
      "epoch": 3.61328125e-05,
      "step": 5920,
      "training_step_time": 0.4141514301300049
    },
    {
      "epoch": 3.6138916015625e-05,
      "model_forward_time": 0.1149301528930664,
      "step": 5921
    },
    {
      "epoch": 3.6138916015625e-05,
      "step": 5921,
      "training_step_time": 0.511873722076416
    },
    {
      "epoch": 3.614501953125e-05,
      "model_forward_time": 0.11543655395507812,
      "step": 5922
    },
    {
      "epoch": 3.614501953125e-05,
      "step": 5922,
      "training_step_time": 0.4975254535675049
    },
    {
      "epoch": 3.6151123046875e-05,
      "model_forward_time": 0.11485838890075684,
      "step": 5923
    },
    {
      "epoch": 3.6151123046875e-05,
      "step": 5923,
      "training_step_time": 0.4840424060821533
    },
    {
      "epoch": 3.61572265625e-05,
      "model_forward_time": 0.11478614807128906,
      "step": 5924
    },
    {
      "epoch": 3.61572265625e-05,
      "step": 5924,
      "training_step_time": 0.5108752250671387
    },
    {
      "epoch": 3.6163330078125e-05,
      "model_forward_time": 0.11468005180358887,
      "step": 5925
    },
    {
      "epoch": 3.6163330078125e-05,
      "step": 5925,
      "training_step_time": 0.41365909576416016
    },
    {
      "epoch": 3.616943359375e-05,
      "model_forward_time": 0.11432981491088867,
      "step": 5926
    },
    {
      "epoch": 3.616943359375e-05,
      "step": 5926,
      "training_step_time": 0.39917540550231934
    },
    {
      "epoch": 3.6175537109375e-05,
      "model_forward_time": 0.11466717720031738,
      "step": 5927
    },
    {
      "epoch": 3.6175537109375e-05,
      "step": 5927,
      "training_step_time": 0.3887979984283447
    },
    {
      "epoch": 3.6181640625e-05,
      "model_forward_time": 0.11493206024169922,
      "step": 5928
    },
    {
      "epoch": 3.6181640625e-05,
      "step": 5928,
      "training_step_time": 0.39442944526672363
    },
    {
      "epoch": 3.6187744140625e-05,
      "model_forward_time": 0.11507296562194824,
      "step": 5929
    },
    {
      "epoch": 3.6187744140625e-05,
      "step": 5929,
      "training_step_time": 0.3997647762298584
    },
    {
      "epoch": 3.619384765625e-05,
      "grad_norm": 0.22541870176792145,
      "learning_rate": 9.934944913293038e-05,
      "loss": 0.0817,
      "step": 5930
    },
    {
      "epoch": 3.619384765625e-05,
      "model_forward_time": 0.11495280265808105,
      "step": 5930
    },
    {
      "epoch": 3.619384765625e-05,
      "step": 5930,
      "training_step_time": 0.385483980178833
    },
    {
      "epoch": 3.6199951171875e-05,
      "model_forward_time": 0.1158599853515625,
      "step": 5931
    },
    {
      "epoch": 3.6199951171875e-05,
      "step": 5931,
      "training_step_time": 0.38887882232666016
    },
    {
      "epoch": 3.62060546875e-05,
      "model_forward_time": 0.11547565460205078,
      "step": 5932
    },
    {
      "epoch": 3.62060546875e-05,
      "step": 5932,
      "training_step_time": 0.3858342170715332
    },
    {
      "epoch": 3.6212158203125e-05,
      "model_forward_time": 0.11480093002319336,
      "step": 5933
    },
    {
      "epoch": 3.6212158203125e-05,
      "step": 5933,
      "training_step_time": 0.39684414863586426
    },
    {
      "epoch": 3.621826171875e-05,
      "model_forward_time": 0.11529874801635742,
      "step": 5934
    },
    {
      "epoch": 3.621826171875e-05,
      "step": 5934,
      "training_step_time": 0.44116806983947754
    },
    {
      "epoch": 3.6224365234375e-05,
      "model_forward_time": 0.11507225036621094,
      "step": 5935
    },
    {
      "epoch": 3.6224365234375e-05,
      "step": 5935,
      "training_step_time": 0.4114375114440918
    },
    {
      "epoch": 3.623046875e-05,
      "model_forward_time": 0.11542820930480957,
      "step": 5936
    },
    {
      "epoch": 3.623046875e-05,
      "step": 5936,
      "training_step_time": 0.39890313148498535
    },
    {
      "epoch": 3.6236572265625e-05,
      "model_forward_time": 0.1156928539276123,
      "step": 5937
    },
    {
      "epoch": 3.6236572265625e-05,
      "step": 5937,
      "training_step_time": 0.45124173164367676
    },
    {
      "epoch": 3.624267578125e-05,
      "model_forward_time": 0.11525273323059082,
      "step": 5938
    },
    {
      "epoch": 3.624267578125e-05,
      "step": 5938,
      "training_step_time": 0.47837185859680176
    },
    {
      "epoch": 3.6248779296875e-05,
      "model_forward_time": 0.11549901962280273,
      "step": 5939
    },
    {
      "epoch": 3.6248779296875e-05,
      "step": 5939,
      "training_step_time": 0.5264298915863037
    },
    {
      "epoch": 3.62548828125e-05,
      "grad_norm": 0.3483678996562958,
      "learning_rate": 9.934501067202117e-05,
      "loss": 0.0827,
      "step": 5940
    },
    {
      "epoch": 3.62548828125e-05,
      "model_forward_time": 0.11609268188476562,
      "step": 5940
    },
    {
      "epoch": 3.62548828125e-05,
      "step": 5940,
      "training_step_time": 0.41394805908203125
    },
    {
      "epoch": 3.6260986328125e-05,
      "model_forward_time": 0.11506938934326172,
      "step": 5941
    },
    {
      "epoch": 3.6260986328125e-05,
      "step": 5941,
      "training_step_time": 0.44727611541748047
    },
    {
      "epoch": 3.626708984375e-05,
      "model_forward_time": 0.11452054977416992,
      "step": 5942
    },
    {
      "epoch": 3.626708984375e-05,
      "step": 5942,
      "training_step_time": 0.3880908489227295
    },
    {
      "epoch": 3.6273193359375e-05,
      "model_forward_time": 0.11485409736633301,
      "step": 5943
    },
    {
      "epoch": 3.6273193359375e-05,
      "step": 5943,
      "training_step_time": 0.3912813663482666
    },
    {
      "epoch": 3.6279296875e-05,
      "model_forward_time": 0.11548876762390137,
      "step": 5944
    },
    {
      "epoch": 3.6279296875e-05,
      "step": 5944,
      "training_step_time": 0.3793222904205322
    },
    {
      "epoch": 3.6285400390625e-05,
      "model_forward_time": 0.1158437728881836,
      "step": 5945
    },
    {
      "epoch": 3.6285400390625e-05,
      "step": 5945,
      "training_step_time": 0.392789363861084
    },
    {
      "epoch": 3.629150390625e-05,
      "model_forward_time": 0.11518120765686035,
      "step": 5946
    },
    {
      "epoch": 3.629150390625e-05,
      "step": 5946,
      "training_step_time": 0.3927290439605713
    },
    {
      "epoch": 3.6297607421875e-05,
      "model_forward_time": 0.11537981033325195,
      "step": 5947
    },
    {
      "epoch": 3.6297607421875e-05,
      "step": 5947,
      "training_step_time": 0.4296102523803711
    },
    {
      "epoch": 3.63037109375e-05,
      "model_forward_time": 0.1160895824432373,
      "step": 5948
    },
    {
      "epoch": 3.63037109375e-05,
      "step": 5948,
      "training_step_time": 0.4536869525909424
    },
    {
      "epoch": 3.6309814453125e-05,
      "model_forward_time": 0.11580157279968262,
      "step": 5949
    },
    {
      "epoch": 3.6309814453125e-05,
      "step": 5949,
      "training_step_time": 0.3951711654663086
    },
    {
      "epoch": 3.631591796875e-05,
      "grad_norm": 0.21032288670539856,
      "learning_rate": 9.934055722140061e-05,
      "loss": 0.0843,
      "step": 5950
    },
    {
      "epoch": 3.631591796875e-05,
      "model_forward_time": 0.11460995674133301,
      "step": 5950
    },
    {
      "epoch": 3.631591796875e-05,
      "step": 5950,
      "training_step_time": 0.3885798454284668
    },
    {
      "epoch": 3.6322021484375e-05,
      "model_forward_time": 0.11504006385803223,
      "step": 5951
    },
    {
      "epoch": 3.6322021484375e-05,
      "step": 5951,
      "training_step_time": 0.4049057960510254
    },
    {
      "epoch": 3.6328125e-05,
      "model_forward_time": 0.1153562068939209,
      "step": 5952
    },
    {
      "epoch": 3.6328125e-05,
      "step": 5952,
      "training_step_time": 0.47818636894226074
    },
    {
      "epoch": 3.6334228515625e-05,
      "model_forward_time": 0.11474394798278809,
      "step": 5953
    },
    {
      "epoch": 3.6334228515625e-05,
      "step": 5953,
      "training_step_time": 0.4937174320220947
    },
    {
      "epoch": 3.634033203125e-05,
      "model_forward_time": 0.11467862129211426,
      "step": 5954
    },
    {
      "epoch": 3.634033203125e-05,
      "step": 5954,
      "training_step_time": 0.4186117649078369
    },
    {
      "epoch": 3.6346435546875e-05,
      "model_forward_time": 0.11444091796875,
      "step": 5955
    },
    {
      "epoch": 3.6346435546875e-05,
      "step": 5955,
      "training_step_time": 0.41303253173828125
    },
    {
      "epoch": 3.63525390625e-05,
      "model_forward_time": 0.11435413360595703,
      "step": 5956
    },
    {
      "epoch": 3.63525390625e-05,
      "step": 5956,
      "training_step_time": 0.3800323009490967
    },
    {
      "epoch": 3.6358642578125e-05,
      "model_forward_time": 0.1143805980682373,
      "step": 5957
    },
    {
      "epoch": 3.6358642578125e-05,
      "step": 5957,
      "training_step_time": 0.3843085765838623
    },
    {
      "epoch": 3.636474609375e-05,
      "model_forward_time": 0.1148374080657959,
      "step": 5958
    },
    {
      "epoch": 3.636474609375e-05,
      "step": 5958,
      "training_step_time": 0.39075493812561035
    },
    {
      "epoch": 3.6370849609375e-05,
      "model_forward_time": 0.11627197265625,
      "step": 5959
    },
    {
      "epoch": 3.6370849609375e-05,
      "step": 5959,
      "training_step_time": 0.4002804756164551
    },
    {
      "epoch": 3.6376953125e-05,
      "grad_norm": 0.3639170527458191,
      "learning_rate": 9.933608878242153e-05,
      "loss": 0.0834,
      "step": 5960
    },
    {
      "epoch": 3.6376953125e-05,
      "model_forward_time": 0.11506938934326172,
      "step": 5960
    },
    {
      "epoch": 3.6376953125e-05,
      "step": 5960,
      "training_step_time": 0.39017820358276367
    },
    {
      "epoch": 3.6383056640625e-05,
      "model_forward_time": 0.11572527885437012,
      "step": 5961
    },
    {
      "epoch": 3.6383056640625e-05,
      "step": 5961,
      "training_step_time": 0.4058501720428467
    },
    {
      "epoch": 3.638916015625e-05,
      "model_forward_time": 0.11562228202819824,
      "step": 5962
    },
    {
      "epoch": 3.638916015625e-05,
      "step": 5962,
      "training_step_time": 0.4131901264190674
    },
    {
      "epoch": 3.6395263671875e-05,
      "model_forward_time": 0.11446976661682129,
      "step": 5963
    },
    {
      "epoch": 3.6395263671875e-05,
      "step": 5963,
      "training_step_time": 0.387786865234375
    },
    {
      "epoch": 3.64013671875e-05,
      "model_forward_time": 0.11501932144165039,
      "step": 5964
    },
    {
      "epoch": 3.64013671875e-05,
      "step": 5964,
      "training_step_time": 0.38646554946899414
    },
    {
      "epoch": 3.6407470703125e-05,
      "model_forward_time": 0.11544346809387207,
      "step": 5965
    },
    {
      "epoch": 3.6407470703125e-05,
      "step": 5965,
      "training_step_time": 0.4336659908294678
    },
    {
      "epoch": 3.641357421875e-05,
      "model_forward_time": 0.11489415168762207,
      "step": 5966
    },
    {
      "epoch": 3.641357421875e-05,
      "step": 5966,
      "training_step_time": 0.3770711421966553
    },
    {
      "epoch": 3.6419677734375e-05,
      "model_forward_time": 0.11489701271057129,
      "step": 5967
    },
    {
      "epoch": 3.6419677734375e-05,
      "step": 5967,
      "training_step_time": 0.48933935165405273
    },
    {
      "epoch": 3.642578125e-05,
      "model_forward_time": 0.11488676071166992,
      "step": 5968
    },
    {
      "epoch": 3.642578125e-05,
      "step": 5968,
      "training_step_time": 0.4947032928466797
    },
    {
      "epoch": 3.6431884765625e-05,
      "model_forward_time": 0.11507868766784668,
      "step": 5969
    },
    {
      "epoch": 3.6431884765625e-05,
      "step": 5969,
      "training_step_time": 0.40294313430786133
    },
    {
      "epoch": 3.643798828125e-05,
      "grad_norm": 0.2827535569667816,
      "learning_rate": 9.93316053564413e-05,
      "loss": 0.08,
      "step": 5970
    },
    {
      "epoch": 3.643798828125e-05,
      "model_forward_time": 0.11439013481140137,
      "step": 5970
    },
    {
      "epoch": 3.643798828125e-05,
      "step": 5970,
      "training_step_time": 0.42330241203308105
    },
    {
      "epoch": 3.6444091796875e-05,
      "model_forward_time": 0.11549997329711914,
      "step": 5971
    },
    {
      "epoch": 3.6444091796875e-05,
      "step": 5971,
      "training_step_time": 0.39565134048461914
    },
    {
      "epoch": 3.64501953125e-05,
      "model_forward_time": 0.11457252502441406,
      "step": 5972
    },
    {
      "epoch": 3.64501953125e-05,
      "step": 5972,
      "training_step_time": 0.39096689224243164
    },
    {
      "epoch": 3.6456298828125e-05,
      "model_forward_time": 0.11466050148010254,
      "step": 5973
    },
    {
      "epoch": 3.6456298828125e-05,
      "step": 5973,
      "training_step_time": 0.3912167549133301
    },
    {
      "epoch": 3.646240234375e-05,
      "model_forward_time": 0.11499953269958496,
      "step": 5974
    },
    {
      "epoch": 3.646240234375e-05,
      "step": 5974,
      "training_step_time": 0.4747195243835449
    },
    {
      "epoch": 3.6468505859375e-05,
      "model_forward_time": 0.11484599113464355,
      "step": 5975
    },
    {
      "epoch": 3.6468505859375e-05,
      "step": 5975,
      "training_step_time": 0.44553709030151367
    },
    {
      "epoch": 3.6474609375e-05,
      "model_forward_time": 0.11588382720947266,
      "step": 5976
    },
    {
      "epoch": 3.6474609375e-05,
      "step": 5976,
      "training_step_time": 0.4011218547821045
    },
    {
      "epoch": 3.6480712890625e-05,
      "model_forward_time": 0.11539697647094727,
      "step": 5977
    },
    {
      "epoch": 3.6480712890625e-05,
      "step": 5977,
      "training_step_time": 0.40937137603759766
    },
    {
      "epoch": 3.648681640625e-05,
      "model_forward_time": 0.11542558670043945,
      "step": 5978
    },
    {
      "epoch": 3.648681640625e-05,
      "step": 5978,
      "training_step_time": 0.3961963653564453
    },
    {
      "epoch": 3.6492919921875e-05,
      "model_forward_time": 0.1153876781463623,
      "step": 5979
    },
    {
      "epoch": 3.6492919921875e-05,
      "step": 5979,
      "training_step_time": 0.39299678802490234
    },
    {
      "epoch": 3.64990234375e-05,
      "grad_norm": 0.3384322226047516,
      "learning_rate": 9.932710694482191e-05,
      "loss": 0.0845,
      "step": 5980
    },
    {
      "epoch": 3.64990234375e-05,
      "model_forward_time": 0.11606860160827637,
      "step": 5980
    },
    {
      "epoch": 3.64990234375e-05,
      "step": 5980,
      "training_step_time": 0.5784218311309814
    },
    {
      "epoch": 3.6505126953125e-05,
      "model_forward_time": 0.11549758911132812,
      "step": 5981
    },
    {
      "epoch": 3.6505126953125e-05,
      "step": 5981,
      "training_step_time": 0.47918152809143066
    },
    {
      "epoch": 3.651123046875e-05,
      "model_forward_time": 0.11443734169006348,
      "step": 5982
    },
    {
      "epoch": 3.651123046875e-05,
      "step": 5982,
      "training_step_time": 0.42333984375
    },
    {
      "epoch": 3.6517333984375e-05,
      "model_forward_time": 0.11469173431396484,
      "step": 5983
    },
    {
      "epoch": 3.6517333984375e-05,
      "step": 5983,
      "training_step_time": 0.41414713859558105
    },
    {
      "epoch": 3.65234375e-05,
      "model_forward_time": 0.11486577987670898,
      "step": 5984
    },
    {
      "epoch": 3.65234375e-05,
      "step": 5984,
      "training_step_time": 0.45066332817077637
    },
    {
      "epoch": 3.6529541015625e-05,
      "model_forward_time": 0.1146235466003418,
      "step": 5985
    },
    {
      "epoch": 3.6529541015625e-05,
      "step": 5985,
      "training_step_time": 0.4508812427520752
    },
    {
      "epoch": 3.653564453125e-05,
      "model_forward_time": 0.1145334243774414,
      "step": 5986
    },
    {
      "epoch": 3.653564453125e-05,
      "step": 5986,
      "training_step_time": 0.3891618251800537
    },
    {
      "epoch": 3.6541748046875e-05,
      "model_forward_time": 0.11518573760986328,
      "step": 5987
    },
    {
      "epoch": 3.6541748046875e-05,
      "step": 5987,
      "training_step_time": 0.3890833854675293
    },
    {
      "epoch": 3.65478515625e-05,
      "model_forward_time": 0.11477375030517578,
      "step": 5988
    },
    {
      "epoch": 3.65478515625e-05,
      "step": 5988,
      "training_step_time": 0.40164756774902344
    },
    {
      "epoch": 3.6553955078125e-05,
      "model_forward_time": 0.11516833305358887,
      "step": 5989
    },
    {
      "epoch": 3.6553955078125e-05,
      "step": 5989,
      "training_step_time": 0.39336514472961426
    },
    {
      "epoch": 3.656005859375e-05,
      "grad_norm": 0.3036302328109741,
      "learning_rate": 9.932259354892984e-05,
      "loss": 0.0797,
      "step": 5990
    },
    {
      "epoch": 3.656005859375e-05,
      "model_forward_time": 0.11538815498352051,
      "step": 5990
    },
    {
      "epoch": 3.656005859375e-05,
      "step": 5990,
      "training_step_time": 0.4163477420806885
    },
    {
      "epoch": 3.6566162109375e-05,
      "model_forward_time": 0.1144249439239502,
      "step": 5991
    },
    {
      "epoch": 3.6566162109375e-05,
      "step": 5991,
      "training_step_time": 0.3943750858306885
    },
    {
      "epoch": 3.6572265625e-05,
      "model_forward_time": 0.11500668525695801,
      "step": 5992
    },
    {
      "epoch": 3.6572265625e-05,
      "step": 5992,
      "training_step_time": 0.6314163208007812
    },
    {
      "epoch": 3.6578369140625e-05,
      "model_forward_time": 0.11433529853820801,
      "step": 5993
    },
    {
      "epoch": 3.6578369140625e-05,
      "step": 5993,
      "training_step_time": 0.39923810958862305
    },
    {
      "epoch": 3.658447265625e-05,
      "model_forward_time": 0.1154177188873291,
      "step": 5994
    },
    {
      "epoch": 3.658447265625e-05,
      "step": 5994,
      "training_step_time": 0.45552611351013184
    },
    {
      "epoch": 3.6590576171875e-05,
      "model_forward_time": 0.11508727073669434,
      "step": 5995
    },
    {
      "epoch": 3.6590576171875e-05,
      "step": 5995,
      "training_step_time": 0.48746776580810547
    },
    {
      "epoch": 3.65966796875e-05,
      "model_forward_time": 0.11485981941223145,
      "step": 5996
    },
    {
      "epoch": 3.65966796875e-05,
      "step": 5996,
      "training_step_time": 0.49283623695373535
    },
    {
      "epoch": 3.6602783203125e-05,
      "model_forward_time": 0.11528182029724121,
      "step": 5997
    },
    {
      "epoch": 3.6602783203125e-05,
      "step": 5997,
      "training_step_time": 0.4870314598083496
    },
    {
      "epoch": 3.660888671875e-05,
      "model_forward_time": 0.11681413650512695,
      "step": 5998
    },
    {
      "epoch": 3.660888671875e-05,
      "step": 5998,
      "training_step_time": 0.4885838031768799
    },
    {
      "epoch": 3.6614990234375e-05,
      "model_forward_time": 0.11476302146911621,
      "step": 5999
    },
    {
      "epoch": 3.6614990234375e-05,
      "step": 5999,
      "training_step_time": 0.3827354907989502
    },
    {
      "epoch": 3.662109375e-05,
      "grad_norm": 0.2397361844778061,
      "learning_rate": 9.931806517013612e-05,
      "loss": 0.0848,
      "step": 6000
    },
    {
      "epoch": 3.662109375e-05,
      "model_forward_time": 0.11354565620422363,
      "step": 6000
    },
    {
      "epoch": 3.662109375e-05,
      "step": 6000,
      "training_step_time": 0.3601648807525635
    },
    {
      "epoch": 3.6627197265625e-05,
      "model_forward_time": 0.11283993721008301,
      "step": 6001
    },
    {
      "epoch": 3.6627197265625e-05,
      "step": 6001,
      "training_step_time": 0.37166571617126465
    },
    {
      "epoch": 3.663330078125e-05,
      "model_forward_time": 0.1127772331237793,
      "step": 6002
    },
    {
      "epoch": 3.663330078125e-05,
      "step": 6002,
      "training_step_time": 0.3840465545654297
    },
    {
      "epoch": 3.6639404296875e-05,
      "model_forward_time": 0.11370420455932617,
      "step": 6003
    },
    {
      "epoch": 3.6639404296875e-05,
      "step": 6003,
      "training_step_time": 0.41939616203308105
    },
    {
      "epoch": 3.66455078125e-05,
      "model_forward_time": 0.11480402946472168,
      "step": 6004
    },
    {
      "epoch": 3.66455078125e-05,
      "step": 6004,
      "training_step_time": 0.40106701850891113
    },
    {
      "epoch": 3.6651611328125e-05,
      "model_forward_time": 0.1155087947845459,
      "step": 6005
    },
    {
      "epoch": 3.6651611328125e-05,
      "step": 6005,
      "training_step_time": 0.38320279121398926
    },
    {
      "epoch": 3.665771484375e-05,
      "model_forward_time": 0.11486244201660156,
      "step": 6006
    },
    {
      "epoch": 3.665771484375e-05,
      "step": 6006,
      "training_step_time": 0.396881103515625
    },
    {
      "epoch": 3.6663818359375e-05,
      "model_forward_time": 0.11535954475402832,
      "step": 6007
    },
    {
      "epoch": 3.6663818359375e-05,
      "step": 6007,
      "training_step_time": 0.38843512535095215
    },
    {
      "epoch": 3.6669921875e-05,
      "model_forward_time": 0.11529064178466797,
      "step": 6008
    },
    {
      "epoch": 3.6669921875e-05,
      "step": 6008,
      "training_step_time": 0.3786768913269043
    },
    {
      "epoch": 3.6676025390625e-05,
      "model_forward_time": 0.11548376083374023,
      "step": 6009
    },
    {
      "epoch": 3.6676025390625e-05,
      "step": 6009,
      "training_step_time": 0.3875296115875244
    },
    {
      "epoch": 3.668212890625e-05,
      "grad_norm": 0.2963692843914032,
      "learning_rate": 9.931352180981637e-05,
      "loss": 0.078,
      "step": 6010
    },
    {
      "epoch": 3.668212890625e-05,
      "model_forward_time": 0.1152198314666748,
      "step": 6010
    },
    {
      "epoch": 3.668212890625e-05,
      "step": 6010,
      "training_step_time": 0.3868601322174072
    },
    {
      "epoch": 3.6688232421875e-05,
      "model_forward_time": 0.11492085456848145,
      "step": 6011
    },
    {
      "epoch": 3.6688232421875e-05,
      "step": 6011,
      "training_step_time": 0.397418737411499
    },
    {
      "epoch": 3.66943359375e-05,
      "model_forward_time": 0.11586713790893555,
      "step": 6012
    },
    {
      "epoch": 3.66943359375e-05,
      "step": 6012,
      "training_step_time": 0.3955683708190918
    },
    {
      "epoch": 3.6700439453125e-05,
      "model_forward_time": 0.11560678482055664,
      "step": 6013
    },
    {
      "epoch": 3.6700439453125e-05,
      "step": 6013,
      "training_step_time": 0.5155529975891113
    },
    {
      "epoch": 3.670654296875e-05,
      "model_forward_time": 0.11500811576843262,
      "step": 6014
    },
    {
      "epoch": 3.670654296875e-05,
      "step": 6014,
      "training_step_time": 0.46446704864501953
    },
    {
      "epoch": 3.6712646484375e-05,
      "model_forward_time": 0.11520195007324219,
      "step": 6015
    },
    {
      "epoch": 3.6712646484375e-05,
      "step": 6015,
      "training_step_time": 0.39445018768310547
    },
    {
      "epoch": 3.671875e-05,
      "model_forward_time": 0.11571836471557617,
      "step": 6016
    },
    {
      "epoch": 3.671875e-05,
      "step": 6016,
      "training_step_time": 0.4975099563598633
    },
    {
      "epoch": 3.6724853515625e-05,
      "model_forward_time": 0.11491990089416504,
      "step": 6017
    },
    {
      "epoch": 3.6724853515625e-05,
      "step": 6017,
      "training_step_time": 0.5030786991119385
    },
    {
      "epoch": 3.673095703125e-05,
      "model_forward_time": 0.11414217948913574,
      "step": 6018
    },
    {
      "epoch": 3.673095703125e-05,
      "step": 6018,
      "training_step_time": 0.4245340824127197
    },
    {
      "epoch": 3.6737060546875e-05,
      "model_forward_time": 0.11459660530090332,
      "step": 6019
    },
    {
      "epoch": 3.6737060546875e-05,
      "step": 6019,
      "training_step_time": 0.4356496334075928
    },
    {
      "epoch": 3.67431640625e-05,
      "grad_norm": 0.24499326944351196,
      "learning_rate": 9.930896346935077e-05,
      "loss": 0.0829,
      "step": 6020
    },
    {
      "epoch": 3.67431640625e-05,
      "model_forward_time": 0.11498594284057617,
      "step": 6020
    },
    {
      "epoch": 3.67431640625e-05,
      "step": 6020,
      "training_step_time": 0.3905820846557617
    },
    {
      "epoch": 3.6749267578125e-05,
      "model_forward_time": 0.11472821235656738,
      "step": 6021
    },
    {
      "epoch": 3.6749267578125e-05,
      "step": 6021,
      "training_step_time": 0.3826775550842285
    },
    {
      "epoch": 3.675537109375e-05,
      "model_forward_time": 0.11484313011169434,
      "step": 6022
    },
    {
      "epoch": 3.675537109375e-05,
      "step": 6022,
      "training_step_time": 0.3907618522644043
    },
    {
      "epoch": 3.6761474609375e-05,
      "model_forward_time": 0.11487269401550293,
      "step": 6023
    },
    {
      "epoch": 3.6761474609375e-05,
      "step": 6023,
      "training_step_time": 0.3939085006713867
    },
    {
      "epoch": 3.6767578125e-05,
      "model_forward_time": 0.11517000198364258,
      "step": 6024
    },
    {
      "epoch": 3.6767578125e-05,
      "step": 6024,
      "training_step_time": 0.3977813720703125
    },
    {
      "epoch": 3.6773681640625e-05,
      "model_forward_time": 0.11492276191711426,
      "step": 6025
    },
    {
      "epoch": 3.6773681640625e-05,
      "step": 6025,
      "training_step_time": 0.3942391872406006
    },
    {
      "epoch": 3.677978515625e-05,
      "model_forward_time": 0.11448001861572266,
      "step": 6026
    },
    {
      "epoch": 3.677978515625e-05,
      "step": 6026,
      "training_step_time": 0.39722609519958496
    },
    {
      "epoch": 3.6785888671875e-05,
      "model_forward_time": 0.11570239067077637,
      "step": 6027
    },
    {
      "epoch": 3.6785888671875e-05,
      "step": 6027,
      "training_step_time": 0.43667006492614746
    },
    {
      "epoch": 3.67919921875e-05,
      "model_forward_time": 0.11527538299560547,
      "step": 6028
    },
    {
      "epoch": 3.67919921875e-05,
      "step": 6028,
      "training_step_time": 0.3773026466369629
    },
    {
      "epoch": 3.6798095703125e-05,
      "model_forward_time": 0.11514639854431152,
      "step": 6029
    },
    {
      "epoch": 3.6798095703125e-05,
      "step": 6029,
      "training_step_time": 0.4587843418121338
    },
    {
      "epoch": 3.680419921875e-05,
      "grad_norm": 0.29364854097366333,
      "learning_rate": 9.930439015012396e-05,
      "loss": 0.0793,
      "step": 6030
    },
    {
      "epoch": 3.680419921875e-05,
      "model_forward_time": 0.11520576477050781,
      "step": 6030
    },
    {
      "epoch": 3.680419921875e-05,
      "step": 6030,
      "training_step_time": 0.510568380355835
    },
    {
      "epoch": 3.6810302734375e-05,
      "model_forward_time": 0.11484456062316895,
      "step": 6031
    },
    {
      "epoch": 3.6810302734375e-05,
      "step": 6031,
      "training_step_time": 0.5092434883117676
    },
    {
      "epoch": 3.681640625e-05,
      "model_forward_time": 0.11538314819335938,
      "step": 6032
    },
    {
      "epoch": 3.681640625e-05,
      "step": 6032,
      "training_step_time": 0.47800135612487793
    },
    {
      "epoch": 3.6822509765625e-05,
      "model_forward_time": 0.11487102508544922,
      "step": 6033
    },
    {
      "epoch": 3.6822509765625e-05,
      "step": 6033,
      "training_step_time": 0.3867452144622803
    },
    {
      "epoch": 3.682861328125e-05,
      "model_forward_time": 0.11479306221008301,
      "step": 6034
    },
    {
      "epoch": 3.682861328125e-05,
      "step": 6034,
      "training_step_time": 0.3822159767150879
    },
    {
      "epoch": 3.6834716796875e-05,
      "model_forward_time": 0.11428976058959961,
      "step": 6035
    },
    {
      "epoch": 3.6834716796875e-05,
      "step": 6035,
      "training_step_time": 0.4007136821746826
    },
    {
      "epoch": 3.68408203125e-05,
      "model_forward_time": 0.11527514457702637,
      "step": 6036
    },
    {
      "epoch": 3.68408203125e-05,
      "step": 6036,
      "training_step_time": 0.3901631832122803
    },
    {
      "epoch": 3.6846923828125e-05,
      "model_forward_time": 0.11478543281555176,
      "step": 6037
    },
    {
      "epoch": 3.6846923828125e-05,
      "step": 6037,
      "training_step_time": 0.39345788955688477
    },
    {
      "epoch": 3.685302734375e-05,
      "model_forward_time": 0.11502265930175781,
      "step": 6038
    },
    {
      "epoch": 3.685302734375e-05,
      "step": 6038,
      "training_step_time": 0.3892209529876709
    },
    {
      "epoch": 3.6859130859375e-05,
      "model_forward_time": 0.11532163619995117,
      "step": 6039
    },
    {
      "epoch": 3.6859130859375e-05,
      "step": 6039,
      "training_step_time": 0.38373470306396484
    },
    {
      "epoch": 3.6865234375e-05,
      "grad_norm": 0.23209451138973236,
      "learning_rate": 9.929980185352526e-05,
      "loss": 0.0846,
      "step": 6040
    },
    {
      "epoch": 3.6865234375e-05,
      "model_forward_time": 0.11530447006225586,
      "step": 6040
    },
    {
      "epoch": 3.6865234375e-05,
      "step": 6040,
      "training_step_time": 0.401381254196167
    },
    {
      "epoch": 3.6871337890625e-05,
      "model_forward_time": 0.11526203155517578,
      "step": 6041
    },
    {
      "epoch": 3.6871337890625e-05,
      "step": 6041,
      "training_step_time": 0.39977312088012695
    },
    {
      "epoch": 3.687744140625e-05,
      "model_forward_time": 0.11488127708435059,
      "step": 6042
    },
    {
      "epoch": 3.687744140625e-05,
      "step": 6042,
      "training_step_time": 0.40769505500793457
    },
    {
      "epoch": 3.6883544921875e-05,
      "model_forward_time": 0.11438727378845215,
      "step": 6043
    },
    {
      "epoch": 3.6883544921875e-05,
      "step": 6043,
      "training_step_time": 0.36951375007629395
    },
    {
      "epoch": 3.68896484375e-05,
      "model_forward_time": 0.11491656303405762,
      "step": 6044
    },
    {
      "epoch": 3.68896484375e-05,
      "step": 6044,
      "training_step_time": 0.49880385398864746
    },
    {
      "epoch": 3.6895751953125e-05,
      "model_forward_time": 0.1156759262084961,
      "step": 6045
    },
    {
      "epoch": 3.6895751953125e-05,
      "step": 6045,
      "training_step_time": 0.4779539108276367
    },
    {
      "epoch": 3.690185546875e-05,
      "model_forward_time": 0.11519455909729004,
      "step": 6046
    },
    {
      "epoch": 3.690185546875e-05,
      "step": 6046,
      "training_step_time": 0.45949411392211914
    },
    {
      "epoch": 3.6907958984375e-05,
      "model_forward_time": 0.11472487449645996,
      "step": 6047
    },
    {
      "epoch": 3.6907958984375e-05,
      "step": 6047,
      "training_step_time": 0.420851469039917
    },
    {
      "epoch": 3.69140625e-05,
      "model_forward_time": 0.11386895179748535,
      "step": 6048
    },
    {
      "epoch": 3.69140625e-05,
      "step": 6048,
      "training_step_time": 0.3984391689300537
    },
    {
      "epoch": 3.6920166015625e-05,
      "model_forward_time": 0.11469650268554688,
      "step": 6049
    },
    {
      "epoch": 3.6920166015625e-05,
      "step": 6049,
      "training_step_time": 0.3881542682647705
    },
    {
      "epoch": 3.692626953125e-05,
      "grad_norm": 0.3722728192806244,
      "learning_rate": 9.929519858094843e-05,
      "loss": 0.0769,
      "step": 6050
    },
    {
      "epoch": 3.692626953125e-05,
      "model_forward_time": 0.1149899959564209,
      "step": 6050
    },
    {
      "epoch": 3.692626953125e-05,
      "step": 6050,
      "training_step_time": 0.3934500217437744
    },
    {
      "epoch": 3.6932373046875e-05,
      "model_forward_time": 0.1149590015411377,
      "step": 6051
    },
    {
      "epoch": 3.6932373046875e-05,
      "step": 6051,
      "training_step_time": 0.37985682487487793
    },
    {
      "epoch": 3.69384765625e-05,
      "model_forward_time": 0.11517715454101562,
      "step": 6052
    },
    {
      "epoch": 3.69384765625e-05,
      "step": 6052,
      "training_step_time": 0.3884146213531494
    },
    {
      "epoch": 3.6944580078125e-05,
      "model_forward_time": 0.11564993858337402,
      "step": 6053
    },
    {
      "epoch": 3.6944580078125e-05,
      "step": 6053,
      "training_step_time": 0.3933744430541992
    },
    {
      "epoch": 3.695068359375e-05,
      "model_forward_time": 0.11500215530395508,
      "step": 6054
    },
    {
      "epoch": 3.695068359375e-05,
      "step": 6054,
      "training_step_time": 0.39264345169067383
    },
    {
      "epoch": 3.6956787109375e-05,
      "model_forward_time": 0.11574268341064453,
      "step": 6055
    },
    {
      "epoch": 3.6956787109375e-05,
      "step": 6055,
      "training_step_time": 0.39527034759521484
    },
    {
      "epoch": 3.6962890625e-05,
      "model_forward_time": 0.11474990844726562,
      "step": 6056
    },
    {
      "epoch": 3.6962890625e-05,
      "step": 6056,
      "training_step_time": 0.3909308910369873
    },
    {
      "epoch": 3.6968994140625e-05,
      "model_forward_time": 0.11543893814086914,
      "step": 6057
    },
    {
      "epoch": 3.6968994140625e-05,
      "step": 6057,
      "training_step_time": 0.3727855682373047
    },
    {
      "epoch": 3.697509765625e-05,
      "model_forward_time": 0.11549067497253418,
      "step": 6058
    },
    {
      "epoch": 3.697509765625e-05,
      "step": 6058,
      "training_step_time": 0.5059945583343506
    },
    {
      "epoch": 3.6981201171875e-05,
      "model_forward_time": 0.11486577987670898,
      "step": 6059
    },
    {
      "epoch": 3.6981201171875e-05,
      "step": 6059,
      "training_step_time": 0.4264695644378662
    },
    {
      "epoch": 3.69873046875e-05,
      "grad_norm": 0.19458144903182983,
      "learning_rate": 9.929058033379181e-05,
      "loss": 0.0805,
      "step": 6060
    },
    {
      "epoch": 3.69873046875e-05,
      "model_forward_time": 0.11601924896240234,
      "step": 6060
    },
    {
      "epoch": 3.69873046875e-05,
      "step": 6060,
      "training_step_time": 0.42876243591308594
    },
    {
      "epoch": 3.6993408203125e-05,
      "model_forward_time": 0.11531186103820801,
      "step": 6061
    },
    {
      "epoch": 3.6993408203125e-05,
      "step": 6061,
      "training_step_time": 0.4468502998352051
    },
    {
      "epoch": 3.699951171875e-05,
      "model_forward_time": 0.11496114730834961,
      "step": 6062
    },
    {
      "epoch": 3.699951171875e-05,
      "step": 6062,
      "training_step_time": 0.4364335536956787
    },
    {
      "epoch": 3.7005615234375e-05,
      "model_forward_time": 0.11578106880187988,
      "step": 6063
    },
    {
      "epoch": 3.7005615234375e-05,
      "step": 6063,
      "training_step_time": 0.3887135982513428
    },
    {
      "epoch": 3.701171875e-05,
      "model_forward_time": 0.11471176147460938,
      "step": 6064
    },
    {
      "epoch": 3.701171875e-05,
      "step": 6064,
      "training_step_time": 0.40976762771606445
    },
    {
      "epoch": 3.7017822265625e-05,
      "model_forward_time": 0.11510324478149414,
      "step": 6065
    },
    {
      "epoch": 3.7017822265625e-05,
      "step": 6065,
      "training_step_time": 0.40143895149230957
    },
    {
      "epoch": 3.702392578125e-05,
      "model_forward_time": 0.11464309692382812,
      "step": 6066
    },
    {
      "epoch": 3.702392578125e-05,
      "step": 6066,
      "training_step_time": 0.40297961235046387
    },
    {
      "epoch": 3.7030029296875e-05,
      "model_forward_time": 0.1145474910736084,
      "step": 6067
    },
    {
      "epoch": 3.7030029296875e-05,
      "step": 6067,
      "training_step_time": 0.40018606185913086
    },
    {
      "epoch": 3.70361328125e-05,
      "model_forward_time": 0.11523938179016113,
      "step": 6068
    },
    {
      "epoch": 3.70361328125e-05,
      "step": 6068,
      "training_step_time": 0.39525508880615234
    },
    {
      "epoch": 3.7042236328125e-05,
      "model_forward_time": 0.1156771183013916,
      "step": 6069
    },
    {
      "epoch": 3.7042236328125e-05,
      "step": 6069,
      "training_step_time": 0.38141441345214844
    },
    {
      "epoch": 3.704833984375e-05,
      "grad_norm": 0.31701818108558655,
      "learning_rate": 9.928594711345836e-05,
      "loss": 0.0814,
      "step": 6070
    },
    {
      "epoch": 3.704833984375e-05,
      "model_forward_time": 0.11509346961975098,
      "step": 6070
    },
    {
      "epoch": 3.704833984375e-05,
      "step": 6070,
      "training_step_time": 0.3871588706970215
    },
    {
      "epoch": 3.7054443359375e-05,
      "model_forward_time": 0.1159813404083252,
      "step": 6071
    },
    {
      "epoch": 3.7054443359375e-05,
      "step": 6071,
      "training_step_time": 0.3836984634399414
    },
    {
      "epoch": 3.7060546875e-05,
      "model_forward_time": 0.11547684669494629,
      "step": 6072
    },
    {
      "epoch": 3.7060546875e-05,
      "step": 6072,
      "training_step_time": 0.46481871604919434
    },
    {
      "epoch": 3.7066650390625e-05,
      "model_forward_time": 0.11552619934082031,
      "step": 6073
    },
    {
      "epoch": 3.7066650390625e-05,
      "step": 6073,
      "training_step_time": 0.4908175468444824
    },
    {
      "epoch": 3.707275390625e-05,
      "model_forward_time": 0.11650848388671875,
      "step": 6074
    },
    {
      "epoch": 3.707275390625e-05,
      "step": 6074,
      "training_step_time": 0.4990363121032715
    },
    {
      "epoch": 3.7078857421875e-05,
      "model_forward_time": 0.1151723861694336,
      "step": 6075
    },
    {
      "epoch": 3.7078857421875e-05,
      "step": 6075,
      "training_step_time": 0.5109410285949707
    },
    {
      "epoch": 3.70849609375e-05,
      "model_forward_time": 0.11467385292053223,
      "step": 6076
    },
    {
      "epoch": 3.70849609375e-05,
      "step": 6076,
      "training_step_time": 0.44580578804016113
    },
    {
      "epoch": 3.7091064453125e-05,
      "model_forward_time": 0.11402106285095215,
      "step": 6077
    },
    {
      "epoch": 3.7091064453125e-05,
      "step": 6077,
      "training_step_time": 0.38512563705444336
    },
    {
      "epoch": 3.709716796875e-05,
      "model_forward_time": 0.11493539810180664,
      "step": 6078
    },
    {
      "epoch": 3.709716796875e-05,
      "step": 6078,
      "training_step_time": 0.40749382972717285
    },
    {
      "epoch": 3.7103271484375e-05,
      "model_forward_time": 0.1147456169128418,
      "step": 6079
    },
    {
      "epoch": 3.7103271484375e-05,
      "step": 6079,
      "training_step_time": 0.4019153118133545
    },
    {
      "epoch": 3.7109375e-05,
      "grad_norm": 0.34499987959861755,
      "learning_rate": 9.92812989213555e-05,
      "loss": 0.0886,
      "step": 6080
    },
    {
      "epoch": 3.7109375e-05,
      "model_forward_time": 0.11461448669433594,
      "step": 6080
    },
    {
      "epoch": 3.7109375e-05,
      "step": 6080,
      "training_step_time": 0.4293787479400635
    },
    {
      "epoch": 3.7115478515625e-05,
      "model_forward_time": 0.11532092094421387,
      "step": 6081
    },
    {
      "epoch": 3.7115478515625e-05,
      "step": 6081,
      "training_step_time": 0.38983631134033203
    },
    {
      "epoch": 3.712158203125e-05,
      "model_forward_time": 0.11494326591491699,
      "step": 6082
    },
    {
      "epoch": 3.712158203125e-05,
      "step": 6082,
      "training_step_time": 0.3964722156524658
    },
    {
      "epoch": 3.7127685546875e-05,
      "model_forward_time": 0.11547970771789551,
      "step": 6083
    },
    {
      "epoch": 3.7127685546875e-05,
      "step": 6083,
      "training_step_time": 0.3962538242340088
    },
    {
      "epoch": 3.71337890625e-05,
      "model_forward_time": 0.11485505104064941,
      "step": 6084
    },
    {
      "epoch": 3.71337890625e-05,
      "step": 6084,
      "training_step_time": 0.39446163177490234
    },
    {
      "epoch": 3.7139892578125e-05,
      "model_forward_time": 0.11497974395751953,
      "step": 6085
    },
    {
      "epoch": 3.7139892578125e-05,
      "step": 6085,
      "training_step_time": 0.39899230003356934
    },
    {
      "epoch": 3.714599609375e-05,
      "model_forward_time": 0.11501097679138184,
      "step": 6086
    },
    {
      "epoch": 3.714599609375e-05,
      "step": 6086,
      "training_step_time": 0.39347386360168457
    },
    {
      "epoch": 3.7152099609375e-05,
      "model_forward_time": 0.11613035202026367,
      "step": 6087
    },
    {
      "epoch": 3.7152099609375e-05,
      "step": 6087,
      "training_step_time": 0.6230661869049072
    },
    {
      "epoch": 3.7158203125e-05,
      "model_forward_time": 0.11468195915222168,
      "step": 6088
    },
    {
      "epoch": 3.7158203125e-05,
      "step": 6088,
      "training_step_time": 0.467179536819458
    },
    {
      "epoch": 3.7164306640625e-05,
      "model_forward_time": 0.1149587631225586,
      "step": 6089
    },
    {
      "epoch": 3.7164306640625e-05,
      "step": 6089,
      "training_step_time": 0.4922924041748047
    },
    {
      "epoch": 3.717041015625e-05,
      "grad_norm": 0.2420942783355713,
      "learning_rate": 9.927663575889521e-05,
      "loss": 0.0819,
      "step": 6090
    },
    {
      "epoch": 3.717041015625e-05,
      "model_forward_time": 0.1144707202911377,
      "step": 6090
    },
    {
      "epoch": 3.717041015625e-05,
      "step": 6090,
      "training_step_time": 0.41725850105285645
    },
    {
      "epoch": 3.7176513671875e-05,
      "model_forward_time": 0.11484384536743164,
      "step": 6091
    },
    {
      "epoch": 3.7176513671875e-05,
      "step": 6091,
      "training_step_time": 0.40947413444519043
    },
    {
      "epoch": 3.71826171875e-05,
      "model_forward_time": 0.11464715003967285,
      "step": 6092
    },
    {
      "epoch": 3.71826171875e-05,
      "step": 6092,
      "training_step_time": 0.397510290145874
    },
    {
      "epoch": 3.7188720703125e-05,
      "model_forward_time": 0.1151421070098877,
      "step": 6093
    },
    {
      "epoch": 3.7188720703125e-05,
      "step": 6093,
      "training_step_time": 0.4004349708557129
    },
    {
      "epoch": 3.719482421875e-05,
      "model_forward_time": 0.11516070365905762,
      "step": 6094
    },
    {
      "epoch": 3.719482421875e-05,
      "step": 6094,
      "training_step_time": 0.4023163318634033
    },
    {
      "epoch": 3.7200927734375e-05,
      "model_forward_time": 0.11497712135314941,
      "step": 6095
    },
    {
      "epoch": 3.7200927734375e-05,
      "step": 6095,
      "training_step_time": 0.389601469039917
    },
    {
      "epoch": 3.720703125e-05,
      "model_forward_time": 0.11504697799682617,
      "step": 6096
    },
    {
      "epoch": 3.720703125e-05,
      "step": 6096,
      "training_step_time": 0.3898453712463379
    },
    {
      "epoch": 3.7213134765625e-05,
      "model_forward_time": 0.1149911880493164,
      "step": 6097
    },
    {
      "epoch": 3.7213134765625e-05,
      "step": 6097,
      "training_step_time": 0.3941640853881836
    },
    {
      "epoch": 3.721923828125e-05,
      "model_forward_time": 0.11519336700439453,
      "step": 6098
    },
    {
      "epoch": 3.721923828125e-05,
      "step": 6098,
      "training_step_time": 0.401869535446167
    },
    {
      "epoch": 3.7225341796875e-05,
      "model_forward_time": 0.11504840850830078,
      "step": 6099
    },
    {
      "epoch": 3.7225341796875e-05,
      "step": 6099,
      "training_step_time": 0.6345558166503906
    },
    {
      "epoch": 3.72314453125e-05,
      "grad_norm": 0.31335756182670593,
      "learning_rate": 9.927195762749405e-05,
      "loss": 0.0777,
      "step": 6100
    },
    {
      "epoch": 3.72314453125e-05,
      "model_forward_time": 0.11481285095214844,
      "step": 6100
    },
    {
      "epoch": 3.72314453125e-05,
      "step": 6100,
      "training_step_time": 0.4167022705078125
    },
    {
      "epoch": 3.7237548828125e-05,
      "model_forward_time": 0.11481237411499023,
      "step": 6101
    },
    {
      "epoch": 3.7237548828125e-05,
      "step": 6101,
      "training_step_time": 0.45076465606689453
    },
    {
      "epoch": 3.724365234375e-05,
      "model_forward_time": 0.1142737865447998,
      "step": 6102
    },
    {
      "epoch": 3.724365234375e-05,
      "step": 6102,
      "training_step_time": 0.44352293014526367
    },
    {
      "epoch": 3.7249755859375e-05,
      "model_forward_time": 0.1150808334350586,
      "step": 6103
    },
    {
      "epoch": 3.7249755859375e-05,
      "step": 6103,
      "training_step_time": 0.4583566188812256
    },
    {
      "epoch": 3.7255859375e-05,
      "model_forward_time": 0.11436343193054199,
      "step": 6104
    },
    {
      "epoch": 3.7255859375e-05,
      "step": 6104,
      "training_step_time": 0.4155266284942627
    },
    {
      "epoch": 3.7261962890625e-05,
      "model_forward_time": 0.11541414260864258,
      "step": 6105
    },
    {
      "epoch": 3.7261962890625e-05,
      "step": 6105,
      "training_step_time": 0.5092775821685791
    },
    {
      "epoch": 3.726806640625e-05,
      "model_forward_time": 0.1150064468383789,
      "step": 6106
    },
    {
      "epoch": 3.726806640625e-05,
      "step": 6106,
      "training_step_time": 0.3896300792694092
    },
    {
      "epoch": 3.7274169921875e-05,
      "model_forward_time": 0.11467528343200684,
      "step": 6107
    },
    {
      "epoch": 3.7274169921875e-05,
      "step": 6107,
      "training_step_time": 0.38988614082336426
    },
    {
      "epoch": 3.72802734375e-05,
      "model_forward_time": 0.11504554748535156,
      "step": 6108
    },
    {
      "epoch": 3.72802734375e-05,
      "step": 6108,
      "training_step_time": 0.39247584342956543
    },
    {
      "epoch": 3.7286376953125e-05,
      "model_forward_time": 0.11465001106262207,
      "step": 6109
    },
    {
      "epoch": 3.7286376953125e-05,
      "step": 6109,
      "training_step_time": 0.39408326148986816
    },
    {
      "epoch": 3.729248046875e-05,
      "grad_norm": 0.30985966324806213,
      "learning_rate": 9.926726452857312e-05,
      "loss": 0.0766,
      "step": 6110
    },
    {
      "epoch": 3.729248046875e-05,
      "model_forward_time": 0.11446976661682129,
      "step": 6110
    },
    {
      "epoch": 3.729248046875e-05,
      "step": 6110,
      "training_step_time": 0.40151500701904297
    },
    {
      "epoch": 3.7298583984375e-05,
      "model_forward_time": 0.11527109146118164,
      "step": 6111
    },
    {
      "epoch": 3.7298583984375e-05,
      "step": 6111,
      "training_step_time": 0.6641855239868164
    },
    {
      "epoch": 3.73046875e-05,
      "model_forward_time": 0.11553573608398438,
      "step": 6112
    },
    {
      "epoch": 3.73046875e-05,
      "step": 6112,
      "training_step_time": 0.39589738845825195
    },
    {
      "epoch": 3.7310791015625e-05,
      "model_forward_time": 0.1150047779083252,
      "step": 6113
    },
    {
      "epoch": 3.7310791015625e-05,
      "step": 6113,
      "training_step_time": 0.39360475540161133
    },
    {
      "epoch": 3.731689453125e-05,
      "model_forward_time": 0.11482477188110352,
      "step": 6114
    },
    {
      "epoch": 3.731689453125e-05,
      "step": 6114,
      "training_step_time": 0.43861865997314453
    },
    {
      "epoch": 3.7322998046875e-05,
      "model_forward_time": 0.11450409889221191,
      "step": 6115
    },
    {
      "epoch": 3.7322998046875e-05,
      "step": 6115,
      "training_step_time": 0.44165682792663574
    },
    {
      "epoch": 3.73291015625e-05,
      "model_forward_time": 0.11529731750488281,
      "step": 6116
    },
    {
      "epoch": 3.73291015625e-05,
      "step": 6116,
      "training_step_time": 0.5128405094146729
    },
    {
      "epoch": 3.7335205078125e-05,
      "model_forward_time": 0.11641716957092285,
      "step": 6117
    },
    {
      "epoch": 3.7335205078125e-05,
      "step": 6117,
      "training_step_time": 0.5086634159088135
    },
    {
      "epoch": 3.734130859375e-05,
      "model_forward_time": 0.11495709419250488,
      "step": 6118
    },
    {
      "epoch": 3.734130859375e-05,
      "step": 6118,
      "training_step_time": 0.4722893238067627
    },
    {
      "epoch": 3.7347412109375e-05,
      "model_forward_time": 0.11432671546936035,
      "step": 6119
    },
    {
      "epoch": 3.7347412109375e-05,
      "step": 6119,
      "training_step_time": 0.42040133476257324
    },
    {
      "epoch": 3.7353515625e-05,
      "grad_norm": 0.2941863238811493,
      "learning_rate": 9.926255646355804e-05,
      "loss": 0.0844,
      "step": 6120
    },
    {
      "epoch": 3.7353515625e-05,
      "model_forward_time": 0.11404299736022949,
      "step": 6120
    },
    {
      "epoch": 3.7353515625e-05,
      "step": 6120,
      "training_step_time": 0.4106619358062744
    },
    {
      "epoch": 3.7359619140625e-05,
      "model_forward_time": 0.11420893669128418,
      "step": 6121
    },
    {
      "epoch": 3.7359619140625e-05,
      "step": 6121,
      "training_step_time": 0.39748144149780273
    },
    {
      "epoch": 3.736572265625e-05,
      "model_forward_time": 0.1149754524230957,
      "step": 6122
    },
    {
      "epoch": 3.736572265625e-05,
      "step": 6122,
      "training_step_time": 0.3939208984375
    },
    {
      "epoch": 3.7371826171875e-05,
      "model_forward_time": 0.11571860313415527,
      "step": 6123
    },
    {
      "epoch": 3.7371826171875e-05,
      "step": 6123,
      "training_step_time": 0.5210955142974854
    },
    {
      "epoch": 3.73779296875e-05,
      "model_forward_time": 0.11505866050720215,
      "step": 6124
    },
    {
      "epoch": 3.73779296875e-05,
      "step": 6124,
      "training_step_time": 0.38507819175720215
    },
    {
      "epoch": 3.7384033203125e-05,
      "model_forward_time": 0.11718058586120605,
      "step": 6125
    },
    {
      "epoch": 3.7384033203125e-05,
      "step": 6125,
      "training_step_time": 0.3926517963409424
    },
    {
      "epoch": 3.739013671875e-05,
      "model_forward_time": 0.11420893669128418,
      "step": 6126
    },
    {
      "epoch": 3.739013671875e-05,
      "step": 6126,
      "training_step_time": 0.3935120105743408
    },
    {
      "epoch": 3.7396240234375e-05,
      "model_forward_time": 0.11470556259155273,
      "step": 6127
    },
    {
      "epoch": 3.7396240234375e-05,
      "step": 6127,
      "training_step_time": 0.3972024917602539
    },
    {
      "epoch": 3.740234375e-05,
      "model_forward_time": 0.11526966094970703,
      "step": 6128
    },
    {
      "epoch": 3.740234375e-05,
      "step": 6128,
      "training_step_time": 0.39483189582824707
    },
    {
      "epoch": 3.7408447265625e-05,
      "model_forward_time": 0.11529040336608887,
      "step": 6129
    },
    {
      "epoch": 3.7408447265625e-05,
      "step": 6129,
      "training_step_time": 0.39850306510925293
    },
    {
      "epoch": 3.741455078125e-05,
      "grad_norm": 0.2950305640697479,
      "learning_rate": 9.925783343387903e-05,
      "loss": 0.0837,
      "step": 6130
    },
    {
      "epoch": 3.741455078125e-05,
      "model_forward_time": 0.11527633666992188,
      "step": 6130
    },
    {
      "epoch": 3.741455078125e-05,
      "step": 6130,
      "training_step_time": 0.4221312999725342
    },
    {
      "epoch": 3.7420654296875e-05,
      "model_forward_time": 0.1148076057434082,
      "step": 6131
    },
    {
      "epoch": 3.7420654296875e-05,
      "step": 6131,
      "training_step_time": 0.45049190521240234
    },
    {
      "epoch": 3.74267578125e-05,
      "model_forward_time": 0.1148831844329834,
      "step": 6132
    },
    {
      "epoch": 3.74267578125e-05,
      "step": 6132,
      "training_step_time": 0.5135962963104248
    },
    {
      "epoch": 3.7432861328125e-05,
      "model_forward_time": 0.11477875709533691,
      "step": 6133
    },
    {
      "epoch": 3.7432861328125e-05,
      "step": 6133,
      "training_step_time": 0.49128007888793945
    },
    {
      "epoch": 3.743896484375e-05,
      "model_forward_time": 0.11434435844421387,
      "step": 6134
    },
    {
      "epoch": 3.743896484375e-05,
      "step": 6134,
      "training_step_time": 0.3895835876464844
    },
    {
      "epoch": 3.7445068359375e-05,
      "model_forward_time": 0.11494040489196777,
      "step": 6135
    },
    {
      "epoch": 3.7445068359375e-05,
      "step": 6135,
      "training_step_time": 0.3932175636291504
    },
    {
      "epoch": 3.7451171875e-05,
      "model_forward_time": 0.11408877372741699,
      "step": 6136
    },
    {
      "epoch": 3.7451171875e-05,
      "step": 6136,
      "training_step_time": 0.39024853706359863
    },
    {
      "epoch": 3.7457275390625e-05,
      "model_forward_time": 0.11532115936279297,
      "step": 6137
    },
    {
      "epoch": 3.7457275390625e-05,
      "step": 6137,
      "training_step_time": 0.3838787078857422
    },
    {
      "epoch": 3.746337890625e-05,
      "model_forward_time": 0.11482095718383789,
      "step": 6138
    },
    {
      "epoch": 3.746337890625e-05,
      "step": 6138,
      "training_step_time": 0.4068026542663574
    },
    {
      "epoch": 3.7469482421875e-05,
      "model_forward_time": 0.11497831344604492,
      "step": 6139
    },
    {
      "epoch": 3.7469482421875e-05,
      "step": 6139,
      "training_step_time": 0.393343448638916
    },
    {
      "epoch": 3.74755859375e-05,
      "grad_norm": 0.3621252477169037,
      "learning_rate": 9.925309544097078e-05,
      "loss": 0.0778,
      "step": 6140
    },
    {
      "epoch": 3.74755859375e-05,
      "model_forward_time": 0.11500954627990723,
      "step": 6140
    },
    {
      "epoch": 3.74755859375e-05,
      "step": 6140,
      "training_step_time": 0.4316747188568115
    },
    {
      "epoch": 3.7481689453125e-05,
      "model_forward_time": 0.11446785926818848,
      "step": 6141
    },
    {
      "epoch": 3.7481689453125e-05,
      "step": 6141,
      "training_step_time": 0.3913094997406006
    },
    {
      "epoch": 3.748779296875e-05,
      "model_forward_time": 0.11531519889831543,
      "step": 6142
    },
    {
      "epoch": 3.748779296875e-05,
      "step": 6142,
      "training_step_time": 0.3980233669281006
    },
    {
      "epoch": 3.7493896484375e-05,
      "model_forward_time": 0.11573171615600586,
      "step": 6143
    },
    {
      "epoch": 3.7493896484375e-05,
      "step": 6143,
      "training_step_time": 0.3966672420501709
    },
    {
      "epoch": 3.75e-05,
      "model_forward_time": 0.11484050750732422,
      "step": 6144
    },
    {
      "epoch": 3.75e-05,
      "step": 6144,
      "training_step_time": 0.4956233501434326
    },
    {
      "epoch": 3.7506103515625e-05,
      "model_forward_time": 0.11550450325012207,
      "step": 6145
    },
    {
      "epoch": 3.7506103515625e-05,
      "step": 6145,
      "training_step_time": 0.482957124710083
    },
    {
      "epoch": 3.751220703125e-05,
      "model_forward_time": 0.11551094055175781,
      "step": 6146
    },
    {
      "epoch": 3.751220703125e-05,
      "step": 6146,
      "training_step_time": 0.45450305938720703
    },
    {
      "epoch": 3.7518310546875e-05,
      "model_forward_time": 0.11486482620239258,
      "step": 6147
    },
    {
      "epoch": 3.7518310546875e-05,
      "step": 6147,
      "training_step_time": 0.5088748931884766
    },
    {
      "epoch": 3.75244140625e-05,
      "model_forward_time": 0.11468887329101562,
      "step": 6148
    },
    {
      "epoch": 3.75244140625e-05,
      "step": 6148,
      "training_step_time": 0.46490979194641113
    },
    {
      "epoch": 3.7530517578125e-05,
      "model_forward_time": 0.11618375778198242,
      "step": 6149
    },
    {
      "epoch": 3.7530517578125e-05,
      "step": 6149,
      "training_step_time": 0.37030959129333496
    },
    {
      "epoch": 3.753662109375e-05,
      "grad_norm": 0.21072690188884735,
      "learning_rate": 9.92483424862726e-05,
      "loss": 0.0736,
      "step": 6150
    },
    {
      "epoch": 3.753662109375e-05,
      "model_forward_time": 0.1144254207611084,
      "step": 6150
    },
    {
      "epoch": 3.753662109375e-05,
      "step": 6150,
      "training_step_time": 0.395641565322876
    },
    {
      "epoch": 3.7542724609375e-05,
      "model_forward_time": 0.11533284187316895,
      "step": 6151
    },
    {
      "epoch": 3.7542724609375e-05,
      "step": 6151,
      "training_step_time": 0.4025883674621582
    },
    {
      "epoch": 3.7548828125e-05,
      "model_forward_time": 0.11556792259216309,
      "step": 6152
    },
    {
      "epoch": 3.7548828125e-05,
      "step": 6152,
      "training_step_time": 0.3831954002380371
    },
    {
      "epoch": 3.7554931640625e-05,
      "model_forward_time": 0.11489510536193848,
      "step": 6153
    },
    {
      "epoch": 3.7554931640625e-05,
      "step": 6153,
      "training_step_time": 0.39370179176330566
    },
    {
      "epoch": 3.756103515625e-05,
      "model_forward_time": 0.11458277702331543,
      "step": 6154
    },
    {
      "epoch": 3.756103515625e-05,
      "step": 6154,
      "training_step_time": 0.42000603675842285
    },
    {
      "epoch": 3.7567138671875e-05,
      "model_forward_time": 0.11479449272155762,
      "step": 6155
    },
    {
      "epoch": 3.7567138671875e-05,
      "step": 6155,
      "training_step_time": 0.407071590423584
    },
    {
      "epoch": 3.75732421875e-05,
      "model_forward_time": 0.11655306816101074,
      "step": 6156
    },
    {
      "epoch": 3.75732421875e-05,
      "step": 6156,
      "training_step_time": 0.3892207145690918
    },
    {
      "epoch": 3.7579345703125e-05,
      "model_forward_time": 0.11609482765197754,
      "step": 6157
    },
    {
      "epoch": 3.7579345703125e-05,
      "step": 6157,
      "training_step_time": 0.4052746295928955
    },
    {
      "epoch": 3.758544921875e-05,
      "model_forward_time": 0.1158597469329834,
      "step": 6158
    },
    {
      "epoch": 3.758544921875e-05,
      "step": 6158,
      "training_step_time": 0.4412343502044678
    },
    {
      "epoch": 3.7591552734375e-05,
      "model_forward_time": 0.11508917808532715,
      "step": 6159
    },
    {
      "epoch": 3.7591552734375e-05,
      "step": 6159,
      "training_step_time": 0.39909911155700684
    },
    {
      "epoch": 3.759765625e-05,
      "grad_norm": 0.1925504356622696,
      "learning_rate": 9.924357457122828e-05,
      "loss": 0.0857,
      "step": 6160
    },
    {
      "epoch": 3.759765625e-05,
      "model_forward_time": 0.11590695381164551,
      "step": 6160
    },
    {
      "epoch": 3.759765625e-05,
      "step": 6160,
      "training_step_time": 0.39153051376342773
    },
    {
      "epoch": 3.7603759765625e-05,
      "model_forward_time": 0.11510491371154785,
      "step": 6161
    },
    {
      "epoch": 3.7603759765625e-05,
      "step": 6161,
      "training_step_time": 0.49544215202331543
    },
    {
      "epoch": 3.760986328125e-05,
      "model_forward_time": 0.11555194854736328,
      "step": 6162
    },
    {
      "epoch": 3.760986328125e-05,
      "step": 6162,
      "training_step_time": 0.5020201206207275
    },
    {
      "epoch": 3.7615966796875e-05,
      "model_forward_time": 0.11551451683044434,
      "step": 6163
    },
    {
      "epoch": 3.7615966796875e-05,
      "step": 6163,
      "training_step_time": 0.528712272644043
    },
    {
      "epoch": 3.76220703125e-05,
      "model_forward_time": 0.11448550224304199,
      "step": 6164
    },
    {
      "epoch": 3.76220703125e-05,
      "step": 6164,
      "training_step_time": 0.388425350189209
    },
    {
      "epoch": 3.7628173828125e-05,
      "model_forward_time": 0.11502623558044434,
      "step": 6165
    },
    {
      "epoch": 3.7628173828125e-05,
      "step": 6165,
      "training_step_time": 0.4048905372619629
    },
    {
      "epoch": 3.763427734375e-05,
      "model_forward_time": 0.11429333686828613,
      "step": 6166
    },
    {
      "epoch": 3.763427734375e-05,
      "step": 6166,
      "training_step_time": 0.37848401069641113
    },
    {
      "epoch": 3.7640380859375e-05,
      "model_forward_time": 0.11565542221069336,
      "step": 6167
    },
    {
      "epoch": 3.7640380859375e-05,
      "step": 6167,
      "training_step_time": 0.39021921157836914
    },
    {
      "epoch": 3.7646484375e-05,
      "model_forward_time": 0.11548733711242676,
      "step": 6168
    },
    {
      "epoch": 3.7646484375e-05,
      "step": 6168,
      "training_step_time": 0.43994879722595215
    },
    {
      "epoch": 3.7652587890625e-05,
      "model_forward_time": 0.1155707836151123,
      "step": 6169
    },
    {
      "epoch": 3.7652587890625e-05,
      "step": 6169,
      "training_step_time": 0.40017151832580566
    },
    {
      "epoch": 3.765869140625e-05,
      "grad_norm": 0.21035976707935333,
      "learning_rate": 9.923879169728622e-05,
      "loss": 0.0823,
      "step": 6170
    },
    {
      "epoch": 3.765869140625e-05,
      "model_forward_time": 0.11539506912231445,
      "step": 6170
    },
    {
      "epoch": 3.765869140625e-05,
      "step": 6170,
      "training_step_time": 0.41383814811706543
    },
    {
      "epoch": 3.7664794921875e-05,
      "model_forward_time": 0.11560225486755371,
      "step": 6171
    },
    {
      "epoch": 3.7664794921875e-05,
      "step": 6171,
      "training_step_time": 0.3928394317626953
    },
    {
      "epoch": 3.76708984375e-05,
      "model_forward_time": 0.11576318740844727,
      "step": 6172
    },
    {
      "epoch": 3.76708984375e-05,
      "step": 6172,
      "training_step_time": 0.3869047164916992
    },
    {
      "epoch": 3.7677001953125e-05,
      "model_forward_time": 0.1147165298461914,
      "step": 6173
    },
    {
      "epoch": 3.7677001953125e-05,
      "step": 6173,
      "training_step_time": 0.3963582515716553
    },
    {
      "epoch": 3.768310546875e-05,
      "model_forward_time": 0.11552953720092773,
      "step": 6174
    },
    {
      "epoch": 3.768310546875e-05,
      "step": 6174,
      "training_step_time": 0.37134289741516113
    },
    {
      "epoch": 3.7689208984375e-05,
      "model_forward_time": 0.11532831192016602,
      "step": 6175
    },
    {
      "epoch": 3.7689208984375e-05,
      "step": 6175,
      "training_step_time": 0.4864950180053711
    },
    {
      "epoch": 3.76953125e-05,
      "model_forward_time": 0.11466836929321289,
      "step": 6176
    },
    {
      "epoch": 3.76953125e-05,
      "step": 6176,
      "training_step_time": 0.49475955963134766
    },
    {
      "epoch": 3.7701416015625e-05,
      "model_forward_time": 0.1148538589477539,
      "step": 6177
    },
    {
      "epoch": 3.7701416015625e-05,
      "step": 6177,
      "training_step_time": 0.42235708236694336
    },
    {
      "epoch": 3.770751953125e-05,
      "model_forward_time": 0.1149439811706543,
      "step": 6178
    },
    {
      "epoch": 3.770751953125e-05,
      "step": 6178,
      "training_step_time": 0.45705676078796387
    },
    {
      "epoch": 3.7713623046875e-05,
      "model_forward_time": 0.1145775318145752,
      "step": 6179
    },
    {
      "epoch": 3.7713623046875e-05,
      "step": 6179,
      "training_step_time": 0.38411855697631836
    },
    {
      "epoch": 3.77197265625e-05,
      "grad_norm": 0.2576007843017578,
      "learning_rate": 9.923399386589933e-05,
      "loss": 0.0824,
      "step": 6180
    },
    {
      "epoch": 3.77197265625e-05,
      "model_forward_time": 0.11410331726074219,
      "step": 6180
    },
    {
      "epoch": 3.77197265625e-05,
      "step": 6180,
      "training_step_time": 0.39579200744628906
    },
    {
      "epoch": 3.7725830078125e-05,
      "model_forward_time": 0.1147305965423584,
      "step": 6181
    },
    {
      "epoch": 3.7725830078125e-05,
      "step": 6181,
      "training_step_time": 0.3952450752258301
    },
    {
      "epoch": 3.773193359375e-05,
      "model_forward_time": 0.11438536643981934,
      "step": 6182
    },
    {
      "epoch": 3.773193359375e-05,
      "step": 6182,
      "training_step_time": 0.38963770866394043
    },
    {
      "epoch": 3.7738037109375e-05,
      "model_forward_time": 0.11522126197814941,
      "step": 6183
    },
    {
      "epoch": 3.7738037109375e-05,
      "step": 6183,
      "training_step_time": 0.38684868812561035
    },
    {
      "epoch": 3.7744140625e-05,
      "model_forward_time": 0.11568737030029297,
      "step": 6184
    },
    {
      "epoch": 3.7744140625e-05,
      "step": 6184,
      "training_step_time": 0.3819417953491211
    },
    {
      "epoch": 3.7750244140625e-05,
      "model_forward_time": 0.11702752113342285,
      "step": 6185
    },
    {
      "epoch": 3.7750244140625e-05,
      "step": 6185,
      "training_step_time": 0.3899803161621094
    },
    {
      "epoch": 3.775634765625e-05,
      "model_forward_time": 0.11546683311462402,
      "step": 6186
    },
    {
      "epoch": 3.775634765625e-05,
      "step": 6186,
      "training_step_time": 0.3933980464935303
    },
    {
      "epoch": 3.7762451171875e-05,
      "model_forward_time": 0.1157996654510498,
      "step": 6187
    },
    {
      "epoch": 3.7762451171875e-05,
      "step": 6187,
      "training_step_time": 0.40154600143432617
    },
    {
      "epoch": 3.77685546875e-05,
      "model_forward_time": 0.11546897888183594,
      "step": 6188
    },
    {
      "epoch": 3.77685546875e-05,
      "step": 6188,
      "training_step_time": 0.434950590133667
    },
    {
      "epoch": 3.7774658203125e-05,
      "model_forward_time": 0.11653351783752441,
      "step": 6189
    },
    {
      "epoch": 3.7774658203125e-05,
      "step": 6189,
      "training_step_time": 0.40087366104125977
    },
    {
      "epoch": 3.778076171875e-05,
      "grad_norm": 0.3283666670322418,
      "learning_rate": 9.922918107852504e-05,
      "loss": 0.0864,
      "step": 6190
    },
    {
      "epoch": 3.778076171875e-05,
      "model_forward_time": 0.11543655395507812,
      "step": 6190
    },
    {
      "epoch": 3.778076171875e-05,
      "step": 6190,
      "training_step_time": 0.45997047424316406
    },
    {
      "epoch": 3.7786865234375e-05,
      "model_forward_time": 0.11547422409057617,
      "step": 6191
    },
    {
      "epoch": 3.7786865234375e-05,
      "step": 6191,
      "training_step_time": 0.5019958019256592
    },
    {
      "epoch": 3.779296875e-05,
      "model_forward_time": 0.11533069610595703,
      "step": 6192
    },
    {
      "epoch": 3.779296875e-05,
      "step": 6192,
      "training_step_time": 0.46235203742980957
    },
    {
      "epoch": 3.7799072265625e-05,
      "model_forward_time": 0.11522626876831055,
      "step": 6193
    },
    {
      "epoch": 3.7799072265625e-05,
      "step": 6193,
      "training_step_time": 0.387005090713501
    },
    {
      "epoch": 3.780517578125e-05,
      "model_forward_time": 0.11507701873779297,
      "step": 6194
    },
    {
      "epoch": 3.780517578125e-05,
      "step": 6194,
      "training_step_time": 0.43311190605163574
    },
    {
      "epoch": 3.7811279296875e-05,
      "model_forward_time": 0.11439943313598633,
      "step": 6195
    },
    {
      "epoch": 3.7811279296875e-05,
      "step": 6195,
      "training_step_time": 0.39404797554016113
    },
    {
      "epoch": 3.78173828125e-05,
      "model_forward_time": 0.115234375,
      "step": 6196
    },
    {
      "epoch": 3.78173828125e-05,
      "step": 6196,
      "training_step_time": 0.41149330139160156
    },
    {
      "epoch": 3.7823486328125e-05,
      "model_forward_time": 0.1152505874633789,
      "step": 6197
    },
    {
      "epoch": 3.7823486328125e-05,
      "step": 6197,
      "training_step_time": 0.38835835456848145
    },
    {
      "epoch": 3.782958984375e-05,
      "model_forward_time": 0.11493396759033203,
      "step": 6198
    },
    {
      "epoch": 3.782958984375e-05,
      "step": 6198,
      "training_step_time": 0.3912835121154785
    },
    {
      "epoch": 3.7835693359375e-05,
      "model_forward_time": 0.11504912376403809,
      "step": 6199
    },
    {
      "epoch": 3.7835693359375e-05,
      "step": 6199,
      "training_step_time": 0.3943142890930176
    },
    {
      "epoch": 3.7841796875e-05,
      "grad_norm": 0.35816752910614014,
      "learning_rate": 9.922435333662536e-05,
      "loss": 0.0802,
      "step": 6200
    },
    {
      "epoch": 3.7841796875e-05,
      "model_forward_time": 0.11486029624938965,
      "step": 6200
    },
    {
      "epoch": 3.7841796875e-05,
      "step": 6200,
      "training_step_time": 0.3856465816497803
    },
    {
      "epoch": 3.7847900390625e-05,
      "model_forward_time": 0.11579680442810059,
      "step": 6201
    },
    {
      "epoch": 3.7847900390625e-05,
      "step": 6201,
      "training_step_time": 0.39191651344299316
    },
    {
      "epoch": 3.785400390625e-05,
      "model_forward_time": 0.11548042297363281,
      "step": 6202
    },
    {
      "epoch": 3.785400390625e-05,
      "step": 6202,
      "training_step_time": 0.3972208499908447
    },
    {
      "epoch": 3.7860107421875e-05,
      "model_forward_time": 0.11516594886779785,
      "step": 6203
    },
    {
      "epoch": 3.7860107421875e-05,
      "step": 6203,
      "training_step_time": 0.5116455554962158
    },
    {
      "epoch": 3.78662109375e-05,
      "model_forward_time": 0.11475467681884766,
      "step": 6204
    },
    {
      "epoch": 3.78662109375e-05,
      "step": 6204,
      "training_step_time": 0.4633352756500244
    },
    {
      "epoch": 3.7872314453125e-05,
      "model_forward_time": 0.11587643623352051,
      "step": 6205
    },
    {
      "epoch": 3.7872314453125e-05,
      "step": 6205,
      "training_step_time": 0.49311184883117676
    },
    {
      "epoch": 3.787841796875e-05,
      "model_forward_time": 0.11708331108093262,
      "step": 6206
    },
    {
      "epoch": 3.787841796875e-05,
      "step": 6206,
      "training_step_time": 0.4985835552215576
    },
    {
      "epoch": 3.7884521484375e-05,
      "model_forward_time": 0.11446595191955566,
      "step": 6207
    },
    {
      "epoch": 3.7884521484375e-05,
      "step": 6207,
      "training_step_time": 0.4271070957183838
    },
    {
      "epoch": 3.7890625e-05,
      "model_forward_time": 0.1147010326385498,
      "step": 6208
    },
    {
      "epoch": 3.7890625e-05,
      "step": 6208,
      "training_step_time": 0.3720853328704834
    },
    {
      "epoch": 3.7896728515625e-05,
      "model_forward_time": 0.11478400230407715,
      "step": 6209
    },
    {
      "epoch": 3.7896728515625e-05,
      "step": 6209,
      "training_step_time": 0.38878583908081055
    },
    {
      "epoch": 3.790283203125e-05,
      "grad_norm": 0.2943185567855835,
      "learning_rate": 9.921951064166684e-05,
      "loss": 0.081,
      "step": 6210
    },
    {
      "epoch": 3.790283203125e-05,
      "model_forward_time": 0.11491847038269043,
      "step": 6210
    },
    {
      "epoch": 3.790283203125e-05,
      "step": 6210,
      "training_step_time": 0.39009690284729004
    },
    {
      "epoch": 3.7908935546875e-05,
      "model_forward_time": 0.11613583564758301,
      "step": 6211
    },
    {
      "epoch": 3.7908935546875e-05,
      "step": 6211,
      "training_step_time": 0.39054107666015625
    },
    {
      "epoch": 3.79150390625e-05,
      "model_forward_time": 0.11543512344360352,
      "step": 6212
    },
    {
      "epoch": 3.79150390625e-05,
      "step": 6212,
      "training_step_time": 0.39040422439575195
    },
    {
      "epoch": 3.7921142578125e-05,
      "model_forward_time": 0.11571812629699707,
      "step": 6213
    },
    {
      "epoch": 3.7921142578125e-05,
      "step": 6213,
      "training_step_time": 0.40033769607543945
    },
    {
      "epoch": 3.792724609375e-05,
      "model_forward_time": 0.11504125595092773,
      "step": 6214
    },
    {
      "epoch": 3.792724609375e-05,
      "step": 6214,
      "training_step_time": 0.37710070610046387
    },
    {
      "epoch": 3.7933349609375e-05,
      "model_forward_time": 0.1150815486907959,
      "step": 6215
    },
    {
      "epoch": 3.7933349609375e-05,
      "step": 6215,
      "training_step_time": 0.3940141201019287
    },
    {
      "epoch": 3.7939453125e-05,
      "model_forward_time": 0.11576080322265625,
      "step": 6216
    },
    {
      "epoch": 3.7939453125e-05,
      "step": 6216,
      "training_step_time": 0.3980369567871094
    },
    {
      "epoch": 3.7945556640625e-05,
      "model_forward_time": 0.11511015892028809,
      "step": 6217
    },
    {
      "epoch": 3.7945556640625e-05,
      "step": 6217,
      "training_step_time": 0.4467628002166748
    },
    {
      "epoch": 3.795166015625e-05,
      "model_forward_time": 0.11496114730834961,
      "step": 6218
    },
    {
      "epoch": 3.795166015625e-05,
      "step": 6218,
      "training_step_time": 0.43233656883239746
    },
    {
      "epoch": 3.7957763671875e-05,
      "model_forward_time": 0.11449265480041504,
      "step": 6219
    },
    {
      "epoch": 3.7957763671875e-05,
      "step": 6219,
      "training_step_time": 0.4585413932800293
    },
    {
      "epoch": 3.79638671875e-05,
      "grad_norm": 0.23427259922027588,
      "learning_rate": 9.921465299512054e-05,
      "loss": 0.0761,
      "step": 6220
    },
    {
      "epoch": 3.79638671875e-05,
      "model_forward_time": 0.11548018455505371,
      "step": 6220
    },
    {
      "epoch": 3.79638671875e-05,
      "step": 6220,
      "training_step_time": 0.46515870094299316
    },
    {
      "epoch": 3.7969970703125e-05,
      "model_forward_time": 0.11508703231811523,
      "step": 6221
    },
    {
      "epoch": 3.7969970703125e-05,
      "step": 6221,
      "training_step_time": 0.4461629390716553
    },
    {
      "epoch": 3.797607421875e-05,
      "model_forward_time": 0.11469316482543945,
      "step": 6222
    },
    {
      "epoch": 3.797607421875e-05,
      "step": 6222,
      "training_step_time": 0.4411044120788574
    },
    {
      "epoch": 3.7982177734375e-05,
      "model_forward_time": 0.11453962326049805,
      "step": 6223
    },
    {
      "epoch": 3.7982177734375e-05,
      "step": 6223,
      "training_step_time": 0.4350459575653076
    },
    {
      "epoch": 3.798828125e-05,
      "model_forward_time": 0.11419415473937988,
      "step": 6224
    },
    {
      "epoch": 3.798828125e-05,
      "step": 6224,
      "training_step_time": 0.39185309410095215
    },
    {
      "epoch": 3.7994384765625e-05,
      "model_forward_time": 0.11484861373901367,
      "step": 6225
    },
    {
      "epoch": 3.7994384765625e-05,
      "step": 6225,
      "training_step_time": 0.38886523246765137
    },
    {
      "epoch": 3.800048828125e-05,
      "model_forward_time": 0.11495828628540039,
      "step": 6226
    },
    {
      "epoch": 3.800048828125e-05,
      "step": 6226,
      "training_step_time": 0.38344240188598633
    },
    {
      "epoch": 3.8006591796875e-05,
      "model_forward_time": 0.11537790298461914,
      "step": 6227
    },
    {
      "epoch": 3.8006591796875e-05,
      "step": 6227,
      "training_step_time": 0.39166903495788574
    },
    {
      "epoch": 3.80126953125e-05,
      "model_forward_time": 0.11607623100280762,
      "step": 6228
    },
    {
      "epoch": 3.80126953125e-05,
      "step": 6228,
      "training_step_time": 0.40500569343566895
    },
    {
      "epoch": 3.8018798828125e-05,
      "model_forward_time": 0.11501169204711914,
      "step": 6229
    },
    {
      "epoch": 3.8018798828125e-05,
      "step": 6229,
      "training_step_time": 0.39816784858703613
    },
    {
      "epoch": 3.802490234375e-05,
      "grad_norm": 0.2352094054222107,
      "learning_rate": 9.92097803984621e-05,
      "loss": 0.0807,
      "step": 6230
    },
    {
      "epoch": 3.802490234375e-05,
      "model_forward_time": 0.1149294376373291,
      "step": 6230
    },
    {
      "epoch": 3.802490234375e-05,
      "step": 6230,
      "training_step_time": 0.3942594528198242
    },
    {
      "epoch": 3.8031005859375e-05,
      "model_forward_time": 0.11550545692443848,
      "step": 6231
    },
    {
      "epoch": 3.8031005859375e-05,
      "step": 6231,
      "training_step_time": 0.4005465507507324
    },
    {
      "epoch": 3.8037109375e-05,
      "model_forward_time": 0.11470794677734375,
      "step": 6232
    },
    {
      "epoch": 3.8037109375e-05,
      "step": 6232,
      "training_step_time": 0.3951592445373535
    },
    {
      "epoch": 3.8043212890625e-05,
      "model_forward_time": 0.11578822135925293,
      "step": 6233
    },
    {
      "epoch": 3.8043212890625e-05,
      "step": 6233,
      "training_step_time": 0.39869117736816406
    },
    {
      "epoch": 3.804931640625e-05,
      "model_forward_time": 0.11476016044616699,
      "step": 6234
    },
    {
      "epoch": 3.804931640625e-05,
      "step": 6234,
      "training_step_time": 0.4663667678833008
    },
    {
      "epoch": 3.8055419921875e-05,
      "model_forward_time": 0.11499333381652832,
      "step": 6235
    },
    {
      "epoch": 3.8055419921875e-05,
      "step": 6235,
      "training_step_time": 0.4969797134399414
    },
    {
      "epoch": 3.80615234375e-05,
      "model_forward_time": 0.11480021476745605,
      "step": 6236
    },
    {
      "epoch": 3.80615234375e-05,
      "step": 6236,
      "training_step_time": 0.41721510887145996
    },
    {
      "epoch": 3.8067626953125e-05,
      "model_forward_time": 0.11524319648742676,
      "step": 6237
    },
    {
      "epoch": 3.8067626953125e-05,
      "step": 6237,
      "training_step_time": 0.4165627956390381
    },
    {
      "epoch": 3.807373046875e-05,
      "model_forward_time": 0.1146993637084961,
      "step": 6238
    },
    {
      "epoch": 3.807373046875e-05,
      "step": 6238,
      "training_step_time": 0.37781763076782227
    },
    {
      "epoch": 3.8079833984375e-05,
      "model_forward_time": 0.11499762535095215,
      "step": 6239
    },
    {
      "epoch": 3.8079833984375e-05,
      "step": 6239,
      "training_step_time": 0.4005110263824463
    },
    {
      "epoch": 3.80859375e-05,
      "grad_norm": 0.2525123357772827,
      "learning_rate": 9.92048928531717e-05,
      "loss": 0.0777,
      "step": 6240
    },
    {
      "epoch": 3.80859375e-05,
      "model_forward_time": 0.1150212287902832,
      "step": 6240
    },
    {
      "epoch": 3.80859375e-05,
      "step": 6240,
      "training_step_time": 0.40230441093444824
    },
    {
      "epoch": 3.8092041015625e-05,
      "model_forward_time": 0.11535191535949707,
      "step": 6241
    },
    {
      "epoch": 3.8092041015625e-05,
      "step": 6241,
      "training_step_time": 0.40499281883239746
    },
    {
      "epoch": 3.809814453125e-05,
      "model_forward_time": 0.11609244346618652,
      "step": 6242
    },
    {
      "epoch": 3.809814453125e-05,
      "step": 6242,
      "training_step_time": 0.39813661575317383
    },
    {
      "epoch": 3.8104248046875e-05,
      "model_forward_time": 0.11502814292907715,
      "step": 6243
    },
    {
      "epoch": 3.8104248046875e-05,
      "step": 6243,
      "training_step_time": 0.39394402503967285
    },
    {
      "epoch": 3.81103515625e-05,
      "model_forward_time": 0.11549091339111328,
      "step": 6244
    },
    {
      "epoch": 3.81103515625e-05,
      "step": 6244,
      "training_step_time": 0.39203691482543945
    },
    {
      "epoch": 3.8116455078125e-05,
      "model_forward_time": 0.11516499519348145,
      "step": 6245
    },
    {
      "epoch": 3.8116455078125e-05,
      "step": 6245,
      "training_step_time": 0.38842177391052246
    },
    {
      "epoch": 3.812255859375e-05,
      "model_forward_time": 0.11485409736633301,
      "step": 6246
    },
    {
      "epoch": 3.812255859375e-05,
      "step": 6246,
      "training_step_time": 0.3872387409210205
    },
    {
      "epoch": 3.8128662109375e-05,
      "model_forward_time": 0.11497211456298828,
      "step": 6247
    },
    {
      "epoch": 3.8128662109375e-05,
      "step": 6247,
      "training_step_time": 0.4400665760040283
    },
    {
      "epoch": 3.8134765625e-05,
      "model_forward_time": 0.11548328399658203,
      "step": 6248
    },
    {
      "epoch": 3.8134765625e-05,
      "step": 6248,
      "training_step_time": 0.36649560928344727
    },
    {
      "epoch": 3.8140869140625e-05,
      "model_forward_time": 0.11535382270812988,
      "step": 6249
    },
    {
      "epoch": 3.8140869140625e-05,
      "step": 6249,
      "training_step_time": 0.45714306831359863
    },
    {
      "epoch": 3.814697265625e-05,
      "grad_norm": 0.17544591426849365,
      "learning_rate": 9.9199990360734e-05,
      "loss": 0.0801,
      "step": 6250
    },
    {
      "epoch": 3.814697265625e-05,
      "model_forward_time": 0.11464381217956543,
      "step": 6250
    },
    {
      "epoch": 3.814697265625e-05,
      "step": 6250,
      "training_step_time": 0.4602205753326416
    },
    {
      "epoch": 3.8153076171875e-05,
      "model_forward_time": 0.1150047779083252,
      "step": 6251
    },
    {
      "epoch": 3.8153076171875e-05,
      "step": 6251,
      "training_step_time": 0.46717333793640137
    },
    {
      "epoch": 3.81591796875e-05,
      "model_forward_time": 0.11488175392150879,
      "step": 6252
    },
    {
      "epoch": 3.81591796875e-05,
      "step": 6252,
      "training_step_time": 0.4201619625091553
    },
    {
      "epoch": 3.8165283203125e-05,
      "model_forward_time": 0.1149749755859375,
      "step": 6253
    },
    {
      "epoch": 3.8165283203125e-05,
      "step": 6253,
      "training_step_time": 0.4038350582122803
    },
    {
      "epoch": 3.817138671875e-05,
      "model_forward_time": 0.11499905586242676,
      "step": 6254
    },
    {
      "epoch": 3.817138671875e-05,
      "step": 6254,
      "training_step_time": 0.4002234935760498
    },
    {
      "epoch": 3.8177490234375e-05,
      "model_forward_time": 0.1147770881652832,
      "step": 6255
    },
    {
      "epoch": 3.8177490234375e-05,
      "step": 6255,
      "training_step_time": 0.3906068801879883
    },
    {
      "epoch": 3.818359375e-05,
      "model_forward_time": 0.11554384231567383,
      "step": 6256
    },
    {
      "epoch": 3.818359375e-05,
      "step": 6256,
      "training_step_time": 0.39035511016845703
    },
    {
      "epoch": 3.8189697265625e-05,
      "model_forward_time": 0.11500120162963867,
      "step": 6257
    },
    {
      "epoch": 3.8189697265625e-05,
      "step": 6257,
      "training_step_time": 0.3926687240600586
    },
    {
      "epoch": 3.819580078125e-05,
      "model_forward_time": 0.11497211456298828,
      "step": 6258
    },
    {
      "epoch": 3.819580078125e-05,
      "step": 6258,
      "training_step_time": 0.39371728897094727
    },
    {
      "epoch": 3.8201904296875e-05,
      "model_forward_time": 0.11539459228515625,
      "step": 6259
    },
    {
      "epoch": 3.8201904296875e-05,
      "step": 6259,
      "training_step_time": 0.395953893661499
    },
    {
      "epoch": 3.82080078125e-05,
      "grad_norm": 0.2442990392446518,
      "learning_rate": 9.91950729226383e-05,
      "loss": 0.0794,
      "step": 6260
    },
    {
      "epoch": 3.82080078125e-05,
      "model_forward_time": 0.11510372161865234,
      "step": 6260
    },
    {
      "epoch": 3.82080078125e-05,
      "step": 6260,
      "training_step_time": 0.39426159858703613
    },
    {
      "epoch": 3.8214111328125e-05,
      "model_forward_time": 0.11568355560302734,
      "step": 6261
    },
    {
      "epoch": 3.8214111328125e-05,
      "step": 6261,
      "training_step_time": 0.38948678970336914
    },
    {
      "epoch": 3.822021484375e-05,
      "model_forward_time": 0.1153557300567627,
      "step": 6262
    },
    {
      "epoch": 3.822021484375e-05,
      "step": 6262,
      "training_step_time": 0.42843031883239746
    },
    {
      "epoch": 3.8226318359375e-05,
      "model_forward_time": 0.11550402641296387,
      "step": 6263
    },
    {
      "epoch": 3.8226318359375e-05,
      "step": 6263,
      "training_step_time": 0.4912254810333252
    },
    {
      "epoch": 3.8232421875e-05,
      "model_forward_time": 0.11525940895080566,
      "step": 6264
    },
    {
      "epoch": 3.8232421875e-05,
      "step": 6264,
      "training_step_time": 0.47032713890075684
    },
    {
      "epoch": 3.8238525390625e-05,
      "model_forward_time": 0.11590957641601562,
      "step": 6265
    },
    {
      "epoch": 3.8238525390625e-05,
      "step": 6265,
      "training_step_time": 0.48877906799316406
    },
    {
      "epoch": 3.824462890625e-05,
      "model_forward_time": 0.11544346809387207,
      "step": 6266
    },
    {
      "epoch": 3.824462890625e-05,
      "step": 6266,
      "training_step_time": 0.4374549388885498
    },
    {
      "epoch": 3.8250732421875e-05,
      "model_forward_time": 0.11477851867675781,
      "step": 6267
    },
    {
      "epoch": 3.8250732421875e-05,
      "step": 6267,
      "training_step_time": 0.4280102252960205
    },
    {
      "epoch": 3.82568359375e-05,
      "model_forward_time": 0.11511611938476562,
      "step": 6268
    },
    {
      "epoch": 3.82568359375e-05,
      "step": 6268,
      "training_step_time": 0.3888125419616699
    },
    {
      "epoch": 3.8262939453125e-05,
      "model_forward_time": 0.11487984657287598,
      "step": 6269
    },
    {
      "epoch": 3.8262939453125e-05,
      "step": 6269,
      "training_step_time": 5.659558057785034
    },
    {
      "epoch": 3.826904296875e-05,
      "grad_norm": 0.3998180031776428,
      "learning_rate": 9.919014054037836e-05,
      "loss": 0.0794,
      "step": 6270
    },
    {
      "epoch": 3.826904296875e-05,
      "model_forward_time": 0.11249613761901855,
      "step": 6270
    },
    {
      "epoch": 3.826904296875e-05,
      "step": 6270,
      "training_step_time": 0.3981301784515381
    },
    {
      "epoch": 3.8275146484375e-05,
      "model_forward_time": 0.11239910125732422,
      "step": 6271
    },
    {
      "epoch": 3.8275146484375e-05,
      "step": 6271,
      "training_step_time": 0.3785402774810791
    },
    {
      "epoch": 3.828125e-05,
      "model_forward_time": 0.11325263977050781,
      "step": 6272
    },
    {
      "epoch": 3.828125e-05,
      "step": 6272,
      "training_step_time": 0.37723708152770996
    },
    {
      "epoch": 3.8287353515625e-05,
      "model_forward_time": 0.1140897274017334,
      "step": 6273
    },
    {
      "epoch": 3.8287353515625e-05,
      "step": 6273,
      "training_step_time": 0.3741164207458496
    },
    {
      "epoch": 3.829345703125e-05,
      "model_forward_time": 0.11442399024963379,
      "step": 6274
    },
    {
      "epoch": 3.829345703125e-05,
      "step": 6274,
      "training_step_time": 0.37813401222229004
    },
    {
      "epoch": 3.8299560546875e-05,
      "model_forward_time": 0.1157686710357666,
      "step": 6275
    },
    {
      "epoch": 3.8299560546875e-05,
      "step": 6275,
      "training_step_time": 0.3954761028289795
    },
    {
      "epoch": 3.83056640625e-05,
      "model_forward_time": 0.1148989200592041,
      "step": 6276
    },
    {
      "epoch": 3.83056640625e-05,
      "step": 6276,
      "training_step_time": 0.38849949836730957
    },
    {
      "epoch": 3.8311767578125e-05,
      "model_forward_time": 0.11483454704284668,
      "step": 6277
    },
    {
      "epoch": 3.8311767578125e-05,
      "step": 6277,
      "training_step_time": 0.3880918025970459
    },
    {
      "epoch": 3.831787109375e-05,
      "model_forward_time": 0.11579251289367676,
      "step": 6278
    },
    {
      "epoch": 3.831787109375e-05,
      "step": 6278,
      "training_step_time": 0.4066147804260254
    },
    {
      "epoch": 3.8323974609375e-05,
      "model_forward_time": 0.11487221717834473,
      "step": 6279
    },
    {
      "epoch": 3.8323974609375e-05,
      "step": 6279,
      "training_step_time": 0.43554234504699707
    },
    {
      "epoch": 3.8330078125e-05,
      "grad_norm": 0.27381622791290283,
      "learning_rate": 9.918519321545251e-05,
      "loss": 0.0819,
      "step": 6280
    },
    {
      "epoch": 3.8330078125e-05,
      "model_forward_time": 0.11472415924072266,
      "step": 6280
    },
    {
      "epoch": 3.8330078125e-05,
      "step": 6280,
      "training_step_time": 0.36988019943237305
    },
    {
      "epoch": 3.8336181640625e-05,
      "model_forward_time": 0.11507964134216309,
      "step": 6281
    },
    {
      "epoch": 3.8336181640625e-05,
      "step": 6281,
      "training_step_time": 0.41158437728881836
    },
    {
      "epoch": 3.834228515625e-05,
      "model_forward_time": 0.11507177352905273,
      "step": 6282
    },
    {
      "epoch": 3.834228515625e-05,
      "step": 6282,
      "training_step_time": 0.4181218147277832
    },
    {
      "epoch": 3.8348388671875e-05,
      "model_forward_time": 0.1150050163269043,
      "step": 6283
    },
    {
      "epoch": 3.8348388671875e-05,
      "step": 6283,
      "training_step_time": 0.44824767112731934
    },
    {
      "epoch": 3.83544921875e-05,
      "model_forward_time": 0.11616230010986328,
      "step": 6284
    },
    {
      "epoch": 3.83544921875e-05,
      "step": 6284,
      "training_step_time": 0.42612409591674805
    },
    {
      "epoch": 3.8360595703125e-05,
      "model_forward_time": 0.11485147476196289,
      "step": 6285
    },
    {
      "epoch": 3.8360595703125e-05,
      "step": 6285,
      "training_step_time": 0.41814661026000977
    },
    {
      "epoch": 3.836669921875e-05,
      "model_forward_time": 0.11526274681091309,
      "step": 6286
    },
    {
      "epoch": 3.836669921875e-05,
      "step": 6286,
      "training_step_time": 0.38204073905944824
    },
    {
      "epoch": 3.8372802734375e-05,
      "model_forward_time": 0.11575698852539062,
      "step": 6287
    },
    {
      "epoch": 3.8372802734375e-05,
      "step": 6287,
      "training_step_time": 0.39242029190063477
    },
    {
      "epoch": 3.837890625e-05,
      "model_forward_time": 0.11492371559143066,
      "step": 6288
    },
    {
      "epoch": 3.837890625e-05,
      "step": 6288,
      "training_step_time": 0.39813852310180664
    },
    {
      "epoch": 3.8385009765625e-05,
      "model_forward_time": 0.11530089378356934,
      "step": 6289
    },
    {
      "epoch": 3.8385009765625e-05,
      "step": 6289,
      "training_step_time": 0.3979682922363281
    },
    {
      "epoch": 3.839111328125e-05,
      "grad_norm": 0.16509085893630981,
      "learning_rate": 9.918023094936363e-05,
      "loss": 0.0859,
      "step": 6290
    },
    {
      "epoch": 3.839111328125e-05,
      "model_forward_time": 0.11581182479858398,
      "step": 6290
    },
    {
      "epoch": 3.839111328125e-05,
      "step": 6290,
      "training_step_time": 0.4129815101623535
    },
    {
      "epoch": 3.8397216796875e-05,
      "model_forward_time": 0.11486315727233887,
      "step": 6291
    },
    {
      "epoch": 3.8397216796875e-05,
      "step": 6291,
      "training_step_time": 0.393613338470459
    },
    {
      "epoch": 3.84033203125e-05,
      "model_forward_time": 0.11510086059570312,
      "step": 6292
    },
    {
      "epoch": 3.84033203125e-05,
      "step": 6292,
      "training_step_time": 0.4021034240722656
    },
    {
      "epoch": 3.8409423828125e-05,
      "model_forward_time": 0.11476945877075195,
      "step": 6293
    },
    {
      "epoch": 3.8409423828125e-05,
      "step": 6293,
      "training_step_time": 0.47263407707214355
    },
    {
      "epoch": 3.841552734375e-05,
      "model_forward_time": 0.11484265327453613,
      "step": 6294
    },
    {
      "epoch": 3.841552734375e-05,
      "step": 6294,
      "training_step_time": 0.4444570541381836
    },
    {
      "epoch": 3.8421630859375e-05,
      "model_forward_time": 0.11454892158508301,
      "step": 6295
    },
    {
      "epoch": 3.8421630859375e-05,
      "step": 6295,
      "training_step_time": 0.4222297668457031
    },
    {
      "epoch": 3.8427734375e-05,
      "model_forward_time": 0.11423540115356445,
      "step": 6296
    },
    {
      "epoch": 3.8427734375e-05,
      "step": 6296,
      "training_step_time": 0.4802560806274414
    },
    {
      "epoch": 3.8433837890625e-05,
      "model_forward_time": 0.1148538589477539,
      "step": 6297
    },
    {
      "epoch": 3.8433837890625e-05,
      "step": 6297,
      "training_step_time": 0.4804704189300537
    },
    {
      "epoch": 3.843994140625e-05,
      "model_forward_time": 0.11503219604492188,
      "step": 6298
    },
    {
      "epoch": 3.843994140625e-05,
      "step": 6298,
      "training_step_time": 0.4429178237915039
    },
    {
      "epoch": 3.8446044921875e-05,
      "model_forward_time": 0.11416316032409668,
      "step": 6299
    },
    {
      "epoch": 3.8446044921875e-05,
      "step": 6299,
      "training_step_time": 0.4939098358154297
    },
    {
      "epoch": 3.84521484375e-05,
      "grad_norm": 0.1977270245552063,
      "learning_rate": 9.917525374361912e-05,
      "loss": 0.0745,
      "step": 6300
    },
    {
      "epoch": 3.84521484375e-05,
      "model_forward_time": 0.11423635482788086,
      "step": 6300
    },
    {
      "epoch": 3.84521484375e-05,
      "step": 6300,
      "training_step_time": 0.38785862922668457
    },
    {
      "epoch": 3.8458251953125e-05,
      "model_forward_time": 0.11465978622436523,
      "step": 6301
    },
    {
      "epoch": 3.8458251953125e-05,
      "step": 6301,
      "training_step_time": 0.3905675411224365
    },
    {
      "epoch": 3.846435546875e-05,
      "model_forward_time": 0.11472487449645996,
      "step": 6302
    },
    {
      "epoch": 3.846435546875e-05,
      "step": 6302,
      "training_step_time": 0.3852212429046631
    },
    {
      "epoch": 3.8470458984375e-05,
      "model_forward_time": 0.11542081832885742,
      "step": 6303
    },
    {
      "epoch": 3.8470458984375e-05,
      "step": 6303,
      "training_step_time": 0.38323330879211426
    },
    {
      "epoch": 3.84765625e-05,
      "model_forward_time": 0.11560487747192383,
      "step": 6304
    },
    {
      "epoch": 3.84765625e-05,
      "step": 6304,
      "training_step_time": 0.3955559730529785
    },
    {
      "epoch": 3.8482666015625e-05,
      "model_forward_time": 0.11499619483947754,
      "step": 6305
    },
    {
      "epoch": 3.8482666015625e-05,
      "step": 6305,
      "training_step_time": 0.3960907459259033
    },
    {
      "epoch": 3.848876953125e-05,
      "model_forward_time": 0.11522960662841797,
      "step": 6306
    },
    {
      "epoch": 3.848876953125e-05,
      "step": 6306,
      "training_step_time": 0.3987462520599365
    },
    {
      "epoch": 3.8494873046875e-05,
      "model_forward_time": 0.1157386302947998,
      "step": 6307
    },
    {
      "epoch": 3.8494873046875e-05,
      "step": 6307,
      "training_step_time": 0.41959500312805176
    },
    {
      "epoch": 3.85009765625e-05,
      "model_forward_time": 0.11456084251403809,
      "step": 6308
    },
    {
      "epoch": 3.85009765625e-05,
      "step": 6308,
      "training_step_time": 0.47000718116760254
    },
    {
      "epoch": 3.8507080078125e-05,
      "model_forward_time": 0.11488890647888184,
      "step": 6309
    },
    {
      "epoch": 3.8507080078125e-05,
      "step": 6309,
      "training_step_time": 0.4626171588897705
    },
    {
      "epoch": 3.851318359375e-05,
      "grad_norm": 0.23993247747421265,
      "learning_rate": 9.917026159973092e-05,
      "loss": 0.0804,
      "step": 6310
    },
    {
      "epoch": 3.851318359375e-05,
      "model_forward_time": 0.11574959754943848,
      "step": 6310
    },
    {
      "epoch": 3.851318359375e-05,
      "step": 6310,
      "training_step_time": 0.45566701889038086
    },
    {
      "epoch": 3.8519287109375e-05,
      "model_forward_time": 0.11478519439697266,
      "step": 6311
    },
    {
      "epoch": 3.8519287109375e-05,
      "step": 6311,
      "training_step_time": 0.48345351219177246
    },
    {
      "epoch": 3.8525390625e-05,
      "model_forward_time": 0.11462903022766113,
      "step": 6312
    },
    {
      "epoch": 3.8525390625e-05,
      "step": 6312,
      "training_step_time": 0.39726877212524414
    },
    {
      "epoch": 3.8531494140625e-05,
      "model_forward_time": 0.11501812934875488,
      "step": 6313
    },
    {
      "epoch": 3.8531494140625e-05,
      "step": 6313,
      "training_step_time": 0.43604016304016113
    },
    {
      "epoch": 3.853759765625e-05,
      "model_forward_time": 0.11496734619140625,
      "step": 6314
    },
    {
      "epoch": 3.853759765625e-05,
      "step": 6314,
      "training_step_time": 0.46143341064453125
    },
    {
      "epoch": 3.8543701171875e-05,
      "model_forward_time": 0.11486124992370605,
      "step": 6315
    },
    {
      "epoch": 3.8543701171875e-05,
      "step": 6315,
      "training_step_time": 0.3859283924102783
    },
    {
      "epoch": 3.85498046875e-05,
      "model_forward_time": 0.11524009704589844,
      "step": 6316
    },
    {
      "epoch": 3.85498046875e-05,
      "step": 6316,
      "training_step_time": 0.39917945861816406
    },
    {
      "epoch": 3.8555908203125e-05,
      "model_forward_time": 0.11449337005615234,
      "step": 6317
    },
    {
      "epoch": 3.8555908203125e-05,
      "step": 6317,
      "training_step_time": 0.3919260501861572
    },
    {
      "epoch": 3.856201171875e-05,
      "model_forward_time": 0.11494660377502441,
      "step": 6318
    },
    {
      "epoch": 3.856201171875e-05,
      "step": 6318,
      "training_step_time": 0.40526437759399414
    },
    {
      "epoch": 3.8568115234375e-05,
      "model_forward_time": 0.11475253105163574,
      "step": 6319
    },
    {
      "epoch": 3.8568115234375e-05,
      "step": 6319,
      "training_step_time": 0.40082550048828125
    },
    {
      "epoch": 3.857421875e-05,
      "grad_norm": 0.3143400251865387,
      "learning_rate": 9.91652545192155e-05,
      "loss": 0.076,
      "step": 6320
    },
    {
      "epoch": 3.857421875e-05,
      "model_forward_time": 0.11485576629638672,
      "step": 6320
    },
    {
      "epoch": 3.857421875e-05,
      "step": 6320,
      "training_step_time": 0.4137990474700928
    },
    {
      "epoch": 3.8580322265625e-05,
      "model_forward_time": 0.11524319648742676,
      "step": 6321
    },
    {
      "epoch": 3.8580322265625e-05,
      "step": 6321,
      "training_step_time": 0.3988373279571533
    },
    {
      "epoch": 3.858642578125e-05,
      "model_forward_time": 0.11516046524047852,
      "step": 6322
    },
    {
      "epoch": 3.858642578125e-05,
      "step": 6322,
      "training_step_time": 0.3892631530761719
    },
    {
      "epoch": 3.8592529296875e-05,
      "model_forward_time": 0.11522960662841797,
      "step": 6323
    },
    {
      "epoch": 3.8592529296875e-05,
      "step": 6323,
      "training_step_time": 0.4272031784057617
    },
    {
      "epoch": 3.85986328125e-05,
      "model_forward_time": 0.11550211906433105,
      "step": 6324
    },
    {
      "epoch": 3.85986328125e-05,
      "step": 6324,
      "training_step_time": 0.4842560291290283
    },
    {
      "epoch": 3.8604736328125e-05,
      "model_forward_time": 0.1151726245880127,
      "step": 6325
    },
    {
      "epoch": 3.8604736328125e-05,
      "step": 6325,
      "training_step_time": 0.4657864570617676
    },
    {
      "epoch": 3.861083984375e-05,
      "model_forward_time": 0.11454606056213379,
      "step": 6326
    },
    {
      "epoch": 3.861083984375e-05,
      "step": 6326,
      "training_step_time": 0.46381521224975586
    },
    {
      "epoch": 3.8616943359375e-05,
      "model_forward_time": 0.11539053916931152,
      "step": 6327
    },
    {
      "epoch": 3.8616943359375e-05,
      "step": 6327,
      "training_step_time": 0.4377000331878662
    },
    {
      "epoch": 3.8623046875e-05,
      "model_forward_time": 0.11472249031066895,
      "step": 6328
    },
    {
      "epoch": 3.8623046875e-05,
      "step": 6328,
      "training_step_time": 0.48378443717956543
    },
    {
      "epoch": 3.8629150390625e-05,
      "model_forward_time": 0.11434769630432129,
      "step": 6329
    },
    {
      "epoch": 3.8629150390625e-05,
      "step": 6329,
      "training_step_time": 0.38875317573547363
    },
    {
      "epoch": 3.863525390625e-05,
      "grad_norm": 0.2330155223608017,
      "learning_rate": 9.91602325035939e-05,
      "loss": 0.0748,
      "step": 6330
    },
    {
      "epoch": 3.863525390625e-05,
      "model_forward_time": 0.11474180221557617,
      "step": 6330
    },
    {
      "epoch": 3.863525390625e-05,
      "step": 6330,
      "training_step_time": 0.3997619152069092
    },
    {
      "epoch": 3.8641357421875e-05,
      "model_forward_time": 0.11514019966125488,
      "step": 6331
    },
    {
      "epoch": 3.8641357421875e-05,
      "step": 6331,
      "training_step_time": 0.398287296295166
    },
    {
      "epoch": 3.86474609375e-05,
      "model_forward_time": 0.11549615859985352,
      "step": 6332
    },
    {
      "epoch": 3.86474609375e-05,
      "step": 6332,
      "training_step_time": 0.40331363677978516
    },
    {
      "epoch": 3.8653564453125e-05,
      "model_forward_time": 0.11525344848632812,
      "step": 6333
    },
    {
      "epoch": 3.8653564453125e-05,
      "step": 6333,
      "training_step_time": 0.41753363609313965
    },
    {
      "epoch": 3.865966796875e-05,
      "model_forward_time": 0.11595320701599121,
      "step": 6334
    },
    {
      "epoch": 3.865966796875e-05,
      "step": 6334,
      "training_step_time": 0.45581626892089844
    },
    {
      "epoch": 3.8665771484375e-05,
      "model_forward_time": 0.11503243446350098,
      "step": 6335
    },
    {
      "epoch": 3.8665771484375e-05,
      "step": 6335,
      "training_step_time": 0.4060556888580322
    },
    {
      "epoch": 3.8671875e-05,
      "model_forward_time": 0.11558842658996582,
      "step": 6336
    },
    {
      "epoch": 3.8671875e-05,
      "step": 6336,
      "training_step_time": 0.3984096050262451
    },
    {
      "epoch": 3.8677978515625e-05,
      "model_forward_time": 0.1157693862915039,
      "step": 6337
    },
    {
      "epoch": 3.8677978515625e-05,
      "step": 6337,
      "training_step_time": 0.3805990219116211
    },
    {
      "epoch": 3.868408203125e-05,
      "model_forward_time": 0.11564803123474121,
      "step": 6338
    },
    {
      "epoch": 3.868408203125e-05,
      "step": 6338,
      "training_step_time": 0.5114104747772217
    },
    {
      "epoch": 3.8690185546875e-05,
      "model_forward_time": 0.11526703834533691,
      "step": 6339
    },
    {
      "epoch": 3.8690185546875e-05,
      "step": 6339,
      "training_step_time": 0.4553804397583008
    },
    {
      "epoch": 3.86962890625e-05,
      "grad_norm": 0.2933768033981323,
      "learning_rate": 9.915519555439166e-05,
      "loss": 0.0757,
      "step": 6340
    },
    {
      "epoch": 3.86962890625e-05,
      "model_forward_time": 0.11534571647644043,
      "step": 6340
    },
    {
      "epoch": 3.86962890625e-05,
      "step": 6340,
      "training_step_time": 0.4055812358856201
    },
    {
      "epoch": 3.8702392578125e-05,
      "model_forward_time": 0.11579155921936035,
      "step": 6341
    },
    {
      "epoch": 3.8702392578125e-05,
      "step": 6341,
      "training_step_time": 0.4958336353302002
    },
    {
      "epoch": 3.870849609375e-05,
      "model_forward_time": 0.11490726470947266,
      "step": 6342
    },
    {
      "epoch": 3.870849609375e-05,
      "step": 6342,
      "training_step_time": 0.49506282806396484
    },
    {
      "epoch": 3.8714599609375e-05,
      "model_forward_time": 0.11452889442443848,
      "step": 6343
    },
    {
      "epoch": 3.8714599609375e-05,
      "step": 6343,
      "training_step_time": 0.40390825271606445
    },
    {
      "epoch": 3.8720703125e-05,
      "model_forward_time": 0.11454081535339355,
      "step": 6344
    },
    {
      "epoch": 3.8720703125e-05,
      "step": 6344,
      "training_step_time": 0.40982604026794434
    },
    {
      "epoch": 3.8726806640625e-05,
      "model_forward_time": 0.11490154266357422,
      "step": 6345
    },
    {
      "epoch": 3.8726806640625e-05,
      "step": 6345,
      "training_step_time": 0.4024674892425537
    },
    {
      "epoch": 3.873291015625e-05,
      "model_forward_time": 0.11505913734436035,
      "step": 6346
    },
    {
      "epoch": 3.873291015625e-05,
      "step": 6346,
      "training_step_time": 0.38326382637023926
    },
    {
      "epoch": 3.8739013671875e-05,
      "model_forward_time": 0.11535143852233887,
      "step": 6347
    },
    {
      "epoch": 3.8739013671875e-05,
      "step": 6347,
      "training_step_time": 0.39361000061035156
    },
    {
      "epoch": 3.87451171875e-05,
      "model_forward_time": 0.1148829460144043,
      "step": 6348
    },
    {
      "epoch": 3.87451171875e-05,
      "step": 6348,
      "training_step_time": 0.42252516746520996
    },
    {
      "epoch": 3.8751220703125e-05,
      "model_forward_time": 0.11560964584350586,
      "step": 6349
    },
    {
      "epoch": 3.8751220703125e-05,
      "step": 6349,
      "training_step_time": 0.42043113708496094
    },
    {
      "epoch": 3.875732421875e-05,
      "grad_norm": 0.3652785122394562,
      "learning_rate": 9.915014367313888e-05,
      "loss": 0.0716,
      "step": 6350
    },
    {
      "epoch": 3.875732421875e-05,
      "model_forward_time": 0.11507105827331543,
      "step": 6350
    },
    {
      "epoch": 3.875732421875e-05,
      "step": 6350,
      "training_step_time": 0.39769983291625977
    },
    {
      "epoch": 3.8763427734375e-05,
      "model_forward_time": 0.1156153678894043,
      "step": 6351
    },
    {
      "epoch": 3.8763427734375e-05,
      "step": 6351,
      "training_step_time": 0.39333391189575195
    },
    {
      "epoch": 3.876953125e-05,
      "model_forward_time": 0.1145315170288086,
      "step": 6352
    },
    {
      "epoch": 3.876953125e-05,
      "step": 6352,
      "training_step_time": 0.3943781852722168
    },
    {
      "epoch": 3.8775634765625e-05,
      "model_forward_time": 0.11526846885681152,
      "step": 6353
    },
    {
      "epoch": 3.8775634765625e-05,
      "step": 6353,
      "training_step_time": 0.4171442985534668
    },
    {
      "epoch": 3.878173828125e-05,
      "model_forward_time": 0.11536359786987305,
      "step": 6354
    },
    {
      "epoch": 3.878173828125e-05,
      "step": 6354,
      "training_step_time": 0.473006010055542
    },
    {
      "epoch": 3.8787841796875e-05,
      "model_forward_time": 0.11523079872131348,
      "step": 6355
    },
    {
      "epoch": 3.8787841796875e-05,
      "step": 6355,
      "training_step_time": 0.49163103103637695
    },
    {
      "epoch": 3.87939453125e-05,
      "model_forward_time": 0.11469602584838867,
      "step": 6356
    },
    {
      "epoch": 3.87939453125e-05,
      "step": 6356,
      "training_step_time": 0.46975183486938477
    },
    {
      "epoch": 3.8800048828125e-05,
      "model_forward_time": 0.11532044410705566,
      "step": 6357
    },
    {
      "epoch": 3.8800048828125e-05,
      "step": 6357,
      "training_step_time": 0.49309563636779785
    },
    {
      "epoch": 3.880615234375e-05,
      "model_forward_time": 0.11452269554138184,
      "step": 6358
    },
    {
      "epoch": 3.880615234375e-05,
      "step": 6358,
      "training_step_time": 0.4034426212310791
    },
    {
      "epoch": 3.8812255859375e-05,
      "model_forward_time": 0.11458730697631836,
      "step": 6359
    },
    {
      "epoch": 3.8812255859375e-05,
      "step": 6359,
      "training_step_time": 0.39482975006103516
    },
    {
      "epoch": 3.8818359375e-05,
      "grad_norm": 0.20537322759628296,
      "learning_rate": 9.914507686137019e-05,
      "loss": 0.0798,
      "step": 6360
    },
    {
      "epoch": 3.8818359375e-05,
      "model_forward_time": 0.11469602584838867,
      "step": 6360
    },
    {
      "epoch": 3.8818359375e-05,
      "step": 6360,
      "training_step_time": 0.41800642013549805
    },
    {
      "epoch": 3.8824462890625e-05,
      "model_forward_time": 0.11518669128417969,
      "step": 6361
    },
    {
      "epoch": 3.8824462890625e-05,
      "step": 6361,
      "training_step_time": 0.40371274948120117
    },
    {
      "epoch": 3.883056640625e-05,
      "model_forward_time": 0.11483907699584961,
      "step": 6362
    },
    {
      "epoch": 3.883056640625e-05,
      "step": 6362,
      "training_step_time": 0.5004780292510986
    },
    {
      "epoch": 3.8836669921875e-05,
      "model_forward_time": 0.11463785171508789,
      "step": 6363
    },
    {
      "epoch": 3.8836669921875e-05,
      "step": 6363,
      "training_step_time": 0.3955345153808594
    },
    {
      "epoch": 3.88427734375e-05,
      "model_forward_time": 0.11536931991577148,
      "step": 6364
    },
    {
      "epoch": 3.88427734375e-05,
      "step": 6364,
      "training_step_time": 0.405580997467041
    },
    {
      "epoch": 3.8848876953125e-05,
      "model_forward_time": 0.11490774154663086,
      "step": 6365
    },
    {
      "epoch": 3.8848876953125e-05,
      "step": 6365,
      "training_step_time": 0.4037027359008789
    },
    {
      "epoch": 3.885498046875e-05,
      "model_forward_time": 0.11520838737487793,
      "step": 6366
    },
    {
      "epoch": 3.885498046875e-05,
      "step": 6366,
      "training_step_time": 0.476367712020874
    },
    {
      "epoch": 3.8861083984375e-05,
      "model_forward_time": 0.11681890487670898,
      "step": 6367
    },
    {
      "epoch": 3.8861083984375e-05,
      "step": 6367,
      "training_step_time": 0.47962403297424316
    },
    {
      "epoch": 3.88671875e-05,
      "model_forward_time": 0.11504960060119629,
      "step": 6368
    },
    {
      "epoch": 3.88671875e-05,
      "step": 6368,
      "training_step_time": 0.46481919288635254
    },
    {
      "epoch": 3.8873291015625e-05,
      "model_forward_time": 0.11497902870178223,
      "step": 6369
    },
    {
      "epoch": 3.8873291015625e-05,
      "step": 6369,
      "training_step_time": 0.45719480514526367
    },
    {
      "epoch": 3.887939453125e-05,
      "grad_norm": 0.24856138229370117,
      "learning_rate": 9.913999512062475e-05,
      "loss": 0.0757,
      "step": 6370
    },
    {
      "epoch": 3.887939453125e-05,
      "model_forward_time": 0.11501860618591309,
      "step": 6370
    },
    {
      "epoch": 3.887939453125e-05,
      "step": 6370,
      "training_step_time": 0.4528648853302002
    },
    {
      "epoch": 3.8885498046875e-05,
      "model_forward_time": 0.11455106735229492,
      "step": 6371
    },
    {
      "epoch": 3.8885498046875e-05,
      "step": 6371,
      "training_step_time": 0.4966244697570801
    },
    {
      "epoch": 3.88916015625e-05,
      "model_forward_time": 0.11505746841430664,
      "step": 6372
    },
    {
      "epoch": 3.88916015625e-05,
      "step": 6372,
      "training_step_time": 0.3840363025665283
    },
    {
      "epoch": 3.8897705078125e-05,
      "model_forward_time": 0.11487269401550293,
      "step": 6373
    },
    {
      "epoch": 3.8897705078125e-05,
      "step": 6373,
      "training_step_time": 0.40398430824279785
    },
    {
      "epoch": 3.890380859375e-05,
      "model_forward_time": 0.11570405960083008,
      "step": 6374
    },
    {
      "epoch": 3.890380859375e-05,
      "step": 6374,
      "training_step_time": 0.41588687896728516
    },
    {
      "epoch": 3.8909912109375e-05,
      "model_forward_time": 0.11438155174255371,
      "step": 6375
    },
    {
      "epoch": 3.8909912109375e-05,
      "step": 6375,
      "training_step_time": 0.3915088176727295
    },
    {
      "epoch": 3.8916015625e-05,
      "model_forward_time": 0.11514639854431152,
      "step": 6376
    },
    {
      "epoch": 3.8916015625e-05,
      "step": 6376,
      "training_step_time": 0.38275790214538574
    },
    {
      "epoch": 3.8922119140625e-05,
      "model_forward_time": 0.1158149242401123,
      "step": 6377
    },
    {
      "epoch": 3.8922119140625e-05,
      "step": 6377,
      "training_step_time": 0.3953089714050293
    },
    {
      "epoch": 3.892822265625e-05,
      "model_forward_time": 0.11513280868530273,
      "step": 6378
    },
    {
      "epoch": 3.892822265625e-05,
      "step": 6378,
      "training_step_time": 0.41579627990722656
    },
    {
      "epoch": 3.8934326171875e-05,
      "model_forward_time": 0.11548161506652832,
      "step": 6379
    },
    {
      "epoch": 3.8934326171875e-05,
      "step": 6379,
      "training_step_time": 0.39635133743286133
    },
    {
      "epoch": 3.89404296875e-05,
      "grad_norm": 0.27422022819519043,
      "learning_rate": 9.913489845244626e-05,
      "loss": 0.0743,
      "step": 6380
    },
    {
      "epoch": 3.89404296875e-05,
      "model_forward_time": 0.11552047729492188,
      "step": 6380
    },
    {
      "epoch": 3.89404296875e-05,
      "step": 6380,
      "training_step_time": 0.40076303482055664
    },
    {
      "epoch": 3.8946533203125e-05,
      "model_forward_time": 0.11533975601196289,
      "step": 6381
    },
    {
      "epoch": 3.8946533203125e-05,
      "step": 6381,
      "training_step_time": 0.49911975860595703
    },
    {
      "epoch": 3.895263671875e-05,
      "model_forward_time": 0.11488199234008789,
      "step": 6382
    },
    {
      "epoch": 3.895263671875e-05,
      "step": 6382,
      "training_step_time": 0.46575164794921875
    },
    {
      "epoch": 3.8958740234375e-05,
      "model_forward_time": 0.11574935913085938,
      "step": 6383
    },
    {
      "epoch": 3.8958740234375e-05,
      "step": 6383,
      "training_step_time": 0.4269556999206543
    },
    {
      "epoch": 3.896484375e-05,
      "model_forward_time": 0.11500072479248047,
      "step": 6384
    },
    {
      "epoch": 3.896484375e-05,
      "step": 6384,
      "training_step_time": 0.4211547374725342
    },
    {
      "epoch": 3.8970947265625e-05,
      "model_forward_time": 0.11456561088562012,
      "step": 6385
    },
    {
      "epoch": 3.8970947265625e-05,
      "step": 6385,
      "training_step_time": 0.4739096164703369
    },
    {
      "epoch": 3.897705078125e-05,
      "model_forward_time": 0.11582231521606445,
      "step": 6386
    },
    {
      "epoch": 3.897705078125e-05,
      "step": 6386,
      "training_step_time": 0.496265172958374
    },
    {
      "epoch": 3.8983154296875e-05,
      "model_forward_time": 0.11490702629089355,
      "step": 6387
    },
    {
      "epoch": 3.8983154296875e-05,
      "step": 6387,
      "training_step_time": 0.3971424102783203
    },
    {
      "epoch": 3.89892578125e-05,
      "model_forward_time": 0.11459088325500488,
      "step": 6388
    },
    {
      "epoch": 3.89892578125e-05,
      "step": 6388,
      "training_step_time": 0.38433170318603516
    },
    {
      "epoch": 3.8995361328125e-05,
      "model_forward_time": 0.11561822891235352,
      "step": 6389
    },
    {
      "epoch": 3.8995361328125e-05,
      "step": 6389,
      "training_step_time": 0.3893406391143799
    },
    {
      "epoch": 3.900146484375e-05,
      "grad_norm": 0.3514620363712311,
      "learning_rate": 9.912978685838294e-05,
      "loss": 0.0792,
      "step": 6390
    },
    {
      "epoch": 3.900146484375e-05,
      "model_forward_time": 0.11507773399353027,
      "step": 6390
    },
    {
      "epoch": 3.900146484375e-05,
      "step": 6390,
      "training_step_time": 0.3992929458618164
    },
    {
      "epoch": 3.9007568359375e-05,
      "model_forward_time": 0.11517548561096191,
      "step": 6391
    },
    {
      "epoch": 3.9007568359375e-05,
      "step": 6391,
      "training_step_time": 0.40543651580810547
    },
    {
      "epoch": 3.9013671875e-05,
      "model_forward_time": 0.11499595642089844,
      "step": 6392
    },
    {
      "epoch": 3.9013671875e-05,
      "step": 6392,
      "training_step_time": 0.5230295658111572
    },
    {
      "epoch": 3.9019775390625e-05,
      "model_forward_time": 0.11416459083557129,
      "step": 6393
    },
    {
      "epoch": 3.9019775390625e-05,
      "step": 6393,
      "training_step_time": 0.40634846687316895
    },
    {
      "epoch": 3.902587890625e-05,
      "model_forward_time": 0.11472678184509277,
      "step": 6394
    },
    {
      "epoch": 3.902587890625e-05,
      "step": 6394,
      "training_step_time": 0.39510488510131836
    },
    {
      "epoch": 3.9031982421875e-05,
      "model_forward_time": 0.11520552635192871,
      "step": 6395
    },
    {
      "epoch": 3.9031982421875e-05,
      "step": 6395,
      "training_step_time": 0.42482686042785645
    },
    {
      "epoch": 3.90380859375e-05,
      "model_forward_time": 0.11502909660339355,
      "step": 6396
    },
    {
      "epoch": 3.90380859375e-05,
      "step": 6396,
      "training_step_time": 0.4163510799407959
    },
    {
      "epoch": 3.9044189453125e-05,
      "model_forward_time": 0.11522626876831055,
      "step": 6397
    },
    {
      "epoch": 3.9044189453125e-05,
      "step": 6397,
      "training_step_time": 0.44710254669189453
    },
    {
      "epoch": 3.905029296875e-05,
      "model_forward_time": 0.11667776107788086,
      "step": 6398
    },
    {
      "epoch": 3.905029296875e-05,
      "step": 6398,
      "training_step_time": 1.0253188610076904
    },
    {
      "epoch": 3.9056396484375e-05,
      "model_forward_time": 0.11452937126159668,
      "step": 6399
    },
    {
      "epoch": 3.9056396484375e-05,
      "step": 6399,
      "training_step_time": 0.43146395683288574
    },
    {
      "epoch": 3.90625e-05,
      "grad_norm": 0.20933504402637482,
      "learning_rate": 9.912466033998757e-05,
      "loss": 0.076,
      "step": 6400
    },
    {
      "epoch": 3.90625e-05,
      "model_forward_time": 0.1147613525390625,
      "step": 6400
    },
    {
      "epoch": 3.90625e-05,
      "step": 6400,
      "training_step_time": 0.3961458206176758
    },
    {
      "epoch": 3.9068603515625e-05,
      "model_forward_time": 0.11434364318847656,
      "step": 6401
    },
    {
      "epoch": 3.9068603515625e-05,
      "step": 6401,
      "training_step_time": 0.4327995777130127
    },
    {
      "epoch": 3.907470703125e-05,
      "model_forward_time": 0.11395001411437988,
      "step": 6402
    },
    {
      "epoch": 3.907470703125e-05,
      "step": 6402,
      "training_step_time": 0.39414000511169434
    },
    {
      "epoch": 3.9080810546875e-05,
      "model_forward_time": 0.11441826820373535,
      "step": 6403
    },
    {
      "epoch": 3.9080810546875e-05,
      "step": 6403,
      "training_step_time": 0.38923001289367676
    },
    {
      "epoch": 3.90869140625e-05,
      "model_forward_time": 0.1151127815246582,
      "step": 6404
    },
    {
      "epoch": 3.90869140625e-05,
      "step": 6404,
      "training_step_time": 0.5254452228546143
    },
    {
      "epoch": 3.9093017578125e-05,
      "model_forward_time": 0.1147012710571289,
      "step": 6405
    },
    {
      "epoch": 3.9093017578125e-05,
      "step": 6405,
      "training_step_time": 0.4150509834289551
    },
    {
      "epoch": 3.909912109375e-05,
      "model_forward_time": 0.11498808860778809,
      "step": 6406
    },
    {
      "epoch": 3.909912109375e-05,
      "step": 6406,
      "training_step_time": 0.3811507225036621
    },
    {
      "epoch": 3.9105224609375e-05,
      "model_forward_time": 0.11496424674987793,
      "step": 6407
    },
    {
      "epoch": 3.9105224609375e-05,
      "step": 6407,
      "training_step_time": 0.3960568904876709
    },
    {
      "epoch": 3.9111328125e-05,
      "model_forward_time": 0.11509442329406738,
      "step": 6408
    },
    {
      "epoch": 3.9111328125e-05,
      "step": 6408,
      "training_step_time": 0.4823036193847656
    },
    {
      "epoch": 3.9117431640625e-05,
      "model_forward_time": 0.11504364013671875,
      "step": 6409
    },
    {
      "epoch": 3.9117431640625e-05,
      "step": 6409,
      "training_step_time": 0.49286413192749023
    },
    {
      "epoch": 3.912353515625e-05,
      "grad_norm": 0.2778359055519104,
      "learning_rate": 9.911951889881747e-05,
      "loss": 0.0799,
      "step": 6410
    },
    {
      "epoch": 3.912353515625e-05,
      "model_forward_time": 0.11589932441711426,
      "step": 6410
    },
    {
      "epoch": 3.912353515625e-05,
      "step": 6410,
      "training_step_time": 0.8015391826629639
    },
    {
      "epoch": 3.9129638671875e-05,
      "model_forward_time": 0.11524105072021484,
      "step": 6411
    },
    {
      "epoch": 3.9129638671875e-05,
      "step": 6411,
      "training_step_time": 0.48612213134765625
    },
    {
      "epoch": 3.91357421875e-05,
      "model_forward_time": 0.1142117977142334,
      "step": 6412
    },
    {
      "epoch": 3.91357421875e-05,
      "step": 6412,
      "training_step_time": 0.4828016757965088
    },
    {
      "epoch": 3.9141845703125e-05,
      "model_forward_time": 0.11424946784973145,
      "step": 6413
    },
    {
      "epoch": 3.9141845703125e-05,
      "step": 6413,
      "training_step_time": 0.4149186611175537
    },
    {
      "epoch": 3.914794921875e-05,
      "model_forward_time": 0.11408352851867676,
      "step": 6414
    },
    {
      "epoch": 3.914794921875e-05,
      "step": 6414,
      "training_step_time": 0.39587879180908203
    },
    {
      "epoch": 3.9154052734375e-05,
      "model_forward_time": 0.1143195629119873,
      "step": 6415
    },
    {
      "epoch": 3.9154052734375e-05,
      "step": 6415,
      "training_step_time": 0.39070558547973633
    },
    {
      "epoch": 3.916015625e-05,
      "model_forward_time": 0.11458873748779297,
      "step": 6416
    },
    {
      "epoch": 3.916015625e-05,
      "step": 6416,
      "training_step_time": 0.4362971782684326
    },
    {
      "epoch": 3.9166259765625e-05,
      "model_forward_time": 0.1143636703491211,
      "step": 6417
    },
    {
      "epoch": 3.9166259765625e-05,
      "step": 6417,
      "training_step_time": 0.3979837894439697
    },
    {
      "epoch": 3.917236328125e-05,
      "model_forward_time": 0.11592721939086914,
      "step": 6418
    },
    {
      "epoch": 3.917236328125e-05,
      "step": 6418,
      "training_step_time": 0.39682745933532715
    },
    {
      "epoch": 3.9178466796875e-05,
      "model_forward_time": 0.1150212287902832,
      "step": 6419
    },
    {
      "epoch": 3.9178466796875e-05,
      "step": 6419,
      "training_step_time": 0.3986968994140625
    },
    {
      "epoch": 3.91845703125e-05,
      "grad_norm": 0.2147146463394165,
      "learning_rate": 9.911436253643445e-05,
      "loss": 0.0787,
      "step": 6420
    },
    {
      "epoch": 3.91845703125e-05,
      "model_forward_time": 0.1146392822265625,
      "step": 6420
    },
    {
      "epoch": 3.91845703125e-05,
      "step": 6420,
      "training_step_time": 0.4055452346801758
    },
    {
      "epoch": 3.9190673828125e-05,
      "model_forward_time": 0.11534857749938965,
      "step": 6421
    },
    {
      "epoch": 3.9190673828125e-05,
      "step": 6421,
      "training_step_time": 0.3946976661682129
    },
    {
      "epoch": 3.919677734375e-05,
      "model_forward_time": 0.11466360092163086,
      "step": 6422
    },
    {
      "epoch": 3.919677734375e-05,
      "step": 6422,
      "training_step_time": 0.8234319686889648
    },
    {
      "epoch": 3.9202880859375e-05,
      "model_forward_time": 0.11486291885375977,
      "step": 6423
    },
    {
      "epoch": 3.9202880859375e-05,
      "step": 6423,
      "training_step_time": 0.3892538547515869
    },
    {
      "epoch": 3.9208984375e-05,
      "model_forward_time": 0.11479425430297852,
      "step": 6424
    },
    {
      "epoch": 3.9208984375e-05,
      "step": 6424,
      "training_step_time": 0.42589378356933594
    },
    {
      "epoch": 3.9215087890625e-05,
      "model_forward_time": 0.11536407470703125,
      "step": 6425
    },
    {
      "epoch": 3.9215087890625e-05,
      "step": 6425,
      "training_step_time": 0.4579191207885742
    },
    {
      "epoch": 3.922119140625e-05,
      "model_forward_time": 0.11499476432800293,
      "step": 6426
    },
    {
      "epoch": 3.922119140625e-05,
      "step": 6426,
      "training_step_time": 0.4758763313293457
    },
    {
      "epoch": 3.9227294921875e-05,
      "model_forward_time": 0.1136484146118164,
      "step": 6427
    },
    {
      "epoch": 3.9227294921875e-05,
      "step": 6427,
      "training_step_time": 0.39220190048217773
    },
    {
      "epoch": 3.92333984375e-05,
      "model_forward_time": 0.11492609977722168,
      "step": 6428
    },
    {
      "epoch": 3.92333984375e-05,
      "step": 6428,
      "training_step_time": 0.40126609802246094
    },
    {
      "epoch": 3.9239501953125e-05,
      "model_forward_time": 0.11528253555297852,
      "step": 6429
    },
    {
      "epoch": 3.9239501953125e-05,
      "step": 6429,
      "training_step_time": 0.39522504806518555
    },
    {
      "epoch": 3.924560546875e-05,
      "grad_norm": 0.14858834445476532,
      "learning_rate": 9.910919125440485e-05,
      "loss": 0.0728,
      "step": 6430
    },
    {
      "epoch": 3.924560546875e-05,
      "model_forward_time": 0.11535906791687012,
      "step": 6430
    },
    {
      "epoch": 3.924560546875e-05,
      "step": 6430,
      "training_step_time": 0.39455318450927734
    },
    {
      "epoch": 3.9251708984375e-05,
      "model_forward_time": 0.11485028266906738,
      "step": 6431
    },
    {
      "epoch": 3.9251708984375e-05,
      "step": 6431,
      "training_step_time": 0.408200740814209
    },
    {
      "epoch": 3.92578125e-05,
      "model_forward_time": 0.11552143096923828,
      "step": 6432
    },
    {
      "epoch": 3.92578125e-05,
      "step": 6432,
      "training_step_time": 0.3914299011230469
    },
    {
      "epoch": 3.9263916015625e-05,
      "model_forward_time": 0.11560273170471191,
      "step": 6433
    },
    {
      "epoch": 3.9263916015625e-05,
      "step": 6433,
      "training_step_time": 0.39271116256713867
    },
    {
      "epoch": 3.927001953125e-05,
      "model_forward_time": 0.1153099536895752,
      "step": 6434
    },
    {
      "epoch": 3.927001953125e-05,
      "step": 6434,
      "training_step_time": 0.4003927707672119
    },
    {
      "epoch": 3.9276123046875e-05,
      "model_forward_time": 0.1150968074798584,
      "step": 6435
    },
    {
      "epoch": 3.9276123046875e-05,
      "step": 6435,
      "training_step_time": 0.40483784675598145
    },
    {
      "epoch": 3.92822265625e-05,
      "model_forward_time": 0.11537623405456543,
      "step": 6436
    },
    {
      "epoch": 3.92822265625e-05,
      "step": 6436,
      "training_step_time": 0.4854414463043213
    },
    {
      "epoch": 3.9288330078125e-05,
      "model_forward_time": 0.11556220054626465,
      "step": 6437
    },
    {
      "epoch": 3.9288330078125e-05,
      "step": 6437,
      "training_step_time": 0.4008290767669678
    },
    {
      "epoch": 3.929443359375e-05,
      "model_forward_time": 0.11543917655944824,
      "step": 6438
    },
    {
      "epoch": 3.929443359375e-05,
      "step": 6438,
      "training_step_time": 0.39005374908447266
    },
    {
      "epoch": 3.9300537109375e-05,
      "model_forward_time": 0.11570930480957031,
      "step": 6439
    },
    {
      "epoch": 3.9300537109375e-05,
      "step": 6439,
      "training_step_time": 0.4673027992248535
    },
    {
      "epoch": 3.9306640625e-05,
      "grad_norm": 0.22883924841880798,
      "learning_rate": 9.910400505429965e-05,
      "loss": 0.0767,
      "step": 6440
    },
    {
      "epoch": 3.9306640625e-05,
      "model_forward_time": 0.1154029369354248,
      "step": 6440
    },
    {
      "epoch": 3.9306640625e-05,
      "step": 6440,
      "training_step_time": 0.4587728977203369
    },
    {
      "epoch": 3.9312744140625e-05,
      "model_forward_time": 0.11542201042175293,
      "step": 6441
    },
    {
      "epoch": 3.9312744140625e-05,
      "step": 6441,
      "training_step_time": 0.4980790615081787
    },
    {
      "epoch": 3.931884765625e-05,
      "model_forward_time": 0.1147763729095459,
      "step": 6442
    },
    {
      "epoch": 3.931884765625e-05,
      "step": 6442,
      "training_step_time": 0.39048337936401367
    },
    {
      "epoch": 3.9324951171875e-05,
      "model_forward_time": 0.11536359786987305,
      "step": 6443
    },
    {
      "epoch": 3.9324951171875e-05,
      "step": 6443,
      "training_step_time": 0.40668702125549316
    },
    {
      "epoch": 3.93310546875e-05,
      "model_forward_time": 0.11440491676330566,
      "step": 6444
    },
    {
      "epoch": 3.93310546875e-05,
      "step": 6444,
      "training_step_time": 0.3938734531402588
    },
    {
      "epoch": 3.9337158203125e-05,
      "model_forward_time": 0.1161797046661377,
      "step": 6445
    },
    {
      "epoch": 3.9337158203125e-05,
      "step": 6445,
      "training_step_time": 0.39350366592407227
    },
    {
      "epoch": 3.934326171875e-05,
      "model_forward_time": 0.11505508422851562,
      "step": 6446
    },
    {
      "epoch": 3.934326171875e-05,
      "step": 6446,
      "training_step_time": 0.4005403518676758
    },
    {
      "epoch": 3.9349365234375e-05,
      "model_forward_time": 0.11573672294616699,
      "step": 6447
    },
    {
      "epoch": 3.9349365234375e-05,
      "step": 6447,
      "training_step_time": 0.3977046012878418
    },
    {
      "epoch": 3.935546875e-05,
      "model_forward_time": 0.11491203308105469,
      "step": 6448
    },
    {
      "epoch": 3.935546875e-05,
      "step": 6448,
      "training_step_time": 0.3960273265838623
    },
    {
      "epoch": 3.9361572265625e-05,
      "model_forward_time": 0.11541938781738281,
      "step": 6449
    },
    {
      "epoch": 3.9361572265625e-05,
      "step": 6449,
      "training_step_time": 0.4075324535369873
    },
    {
      "epoch": 3.936767578125e-05,
      "grad_norm": 0.3620823919773102,
      "learning_rate": 9.90988039376942e-05,
      "loss": 0.0708,
      "step": 6450
    },
    {
      "epoch": 3.936767578125e-05,
      "model_forward_time": 0.11485433578491211,
      "step": 6450
    },
    {
      "epoch": 3.936767578125e-05,
      "step": 6450,
      "training_step_time": 0.44382619857788086
    },
    {
      "epoch": 3.9373779296875e-05,
      "model_forward_time": 0.11500811576843262,
      "step": 6451
    },
    {
      "epoch": 3.9373779296875e-05,
      "step": 6451,
      "training_step_time": 0.43943142890930176
    },
    {
      "epoch": 3.93798828125e-05,
      "model_forward_time": 0.11525368690490723,
      "step": 6452
    },
    {
      "epoch": 3.93798828125e-05,
      "step": 6452,
      "training_step_time": 0.4496438503265381
    },
    {
      "epoch": 3.9385986328125e-05,
      "model_forward_time": 0.11528372764587402,
      "step": 6453
    },
    {
      "epoch": 3.9385986328125e-05,
      "step": 6453,
      "training_step_time": 0.44262075424194336
    },
    {
      "epoch": 3.939208984375e-05,
      "model_forward_time": 0.11640524864196777,
      "step": 6454
    },
    {
      "epoch": 3.939208984375e-05,
      "step": 6454,
      "training_step_time": 0.4061596393585205
    },
    {
      "epoch": 3.9398193359375e-05,
      "model_forward_time": 0.11529016494750977,
      "step": 6455
    },
    {
      "epoch": 3.9398193359375e-05,
      "step": 6455,
      "training_step_time": 0.48090362548828125
    },
    {
      "epoch": 3.9404296875e-05,
      "model_forward_time": 0.11595368385314941,
      "step": 6456
    },
    {
      "epoch": 3.9404296875e-05,
      "step": 6456,
      "training_step_time": 0.49535536766052246
    },
    {
      "epoch": 3.9410400390625e-05,
      "model_forward_time": 0.1148073673248291,
      "step": 6457
    },
    {
      "epoch": 3.9410400390625e-05,
      "step": 6457,
      "training_step_time": 0.3959317207336426
    },
    {
      "epoch": 3.941650390625e-05,
      "model_forward_time": 0.1151893138885498,
      "step": 6458
    },
    {
      "epoch": 3.941650390625e-05,
      "step": 6458,
      "training_step_time": 0.397477388381958
    },
    {
      "epoch": 3.9422607421875e-05,
      "model_forward_time": 0.11538147926330566,
      "step": 6459
    },
    {
      "epoch": 3.9422607421875e-05,
      "step": 6459,
      "training_step_time": 0.39817047119140625
    },
    {
      "epoch": 3.94287109375e-05,
      "grad_norm": 0.22971919178962708,
      "learning_rate": 9.909358790616849e-05,
      "loss": 0.073,
      "step": 6460
    },
    {
      "epoch": 3.94287109375e-05,
      "model_forward_time": 0.11487817764282227,
      "step": 6460
    },
    {
      "epoch": 3.94287109375e-05,
      "step": 6460,
      "training_step_time": 0.37847208976745605
    },
    {
      "epoch": 3.9434814453125e-05,
      "model_forward_time": 0.11557722091674805,
      "step": 6461
    },
    {
      "epoch": 3.9434814453125e-05,
      "step": 6461,
      "training_step_time": 0.4025454521179199
    },
    {
      "epoch": 3.944091796875e-05,
      "model_forward_time": 0.11537837982177734,
      "step": 6462
    },
    {
      "epoch": 3.944091796875e-05,
      "step": 6462,
      "training_step_time": 0.39058780670166016
    },
    {
      "epoch": 3.9447021484375e-05,
      "model_forward_time": 0.1154022216796875,
      "step": 6463
    },
    {
      "epoch": 3.9447021484375e-05,
      "step": 6463,
      "training_step_time": 0.39824867248535156
    },
    {
      "epoch": 3.9453125e-05,
      "model_forward_time": 0.11537003517150879,
      "step": 6464
    },
    {
      "epoch": 3.9453125e-05,
      "step": 6464,
      "training_step_time": 0.4957423210144043
    },
    {
      "epoch": 3.9459228515625e-05,
      "model_forward_time": 0.11561107635498047,
      "step": 6465
    },
    {
      "epoch": 3.9459228515625e-05,
      "step": 6465,
      "training_step_time": 0.46091794967651367
    },
    {
      "epoch": 3.946533203125e-05,
      "model_forward_time": 0.1149747371673584,
      "step": 6466
    },
    {
      "epoch": 3.946533203125e-05,
      "step": 6466,
      "training_step_time": 0.4076266288757324
    },
    {
      "epoch": 3.9471435546875e-05,
      "model_forward_time": 0.11470556259155273,
      "step": 6467
    },
    {
      "epoch": 3.9471435546875e-05,
      "step": 6467,
      "training_step_time": 0.3934168815612793
    },
    {
      "epoch": 3.94775390625e-05,
      "model_forward_time": 0.11569881439208984,
      "step": 6468
    },
    {
      "epoch": 3.94775390625e-05,
      "step": 6468,
      "training_step_time": 0.38875913619995117
    },
    {
      "epoch": 3.9483642578125e-05,
      "model_forward_time": 0.11495327949523926,
      "step": 6469
    },
    {
      "epoch": 3.9483642578125e-05,
      "step": 6469,
      "training_step_time": 0.4536111354827881
    },
    {
      "epoch": 3.948974609375e-05,
      "grad_norm": 0.17873123288154602,
      "learning_rate": 9.908835696130701e-05,
      "loss": 0.0795,
      "step": 6470
    },
    {
      "epoch": 3.948974609375e-05,
      "model_forward_time": 0.11481165885925293,
      "step": 6470
    },
    {
      "epoch": 3.948974609375e-05,
      "step": 6470,
      "training_step_time": 0.39589929580688477
    },
    {
      "epoch": 3.9495849609375e-05,
      "model_forward_time": 0.11543035507202148,
      "step": 6471
    },
    {
      "epoch": 3.9495849609375e-05,
      "step": 6471,
      "training_step_time": 0.48503994941711426
    },
    {
      "epoch": 3.9501953125e-05,
      "model_forward_time": 0.11526823043823242,
      "step": 6472
    },
    {
      "epoch": 3.9501953125e-05,
      "step": 6472,
      "training_step_time": 0.39286279678344727
    },
    {
      "epoch": 3.9508056640625e-05,
      "model_forward_time": 0.11534237861633301,
      "step": 6473
    },
    {
      "epoch": 3.9508056640625e-05,
      "step": 6473,
      "training_step_time": 0.3876011371612549
    },
    {
      "epoch": 3.951416015625e-05,
      "model_forward_time": 0.1155710220336914,
      "step": 6474
    },
    {
      "epoch": 3.951416015625e-05,
      "step": 6474,
      "training_step_time": 0.41890764236450195
    },
    {
      "epoch": 3.9520263671875e-05,
      "model_forward_time": 0.11545014381408691,
      "step": 6475
    },
    {
      "epoch": 3.9520263671875e-05,
      "step": 6475,
      "training_step_time": 0.3955047130584717
    },
    {
      "epoch": 3.95263671875e-05,
      "model_forward_time": 0.11563706398010254,
      "step": 6476
    },
    {
      "epoch": 3.95263671875e-05,
      "step": 6476,
      "training_step_time": 0.4073221683502197
    },
    {
      "epoch": 3.9532470703125e-05,
      "model_forward_time": 0.11471819877624512,
      "step": 6477
    },
    {
      "epoch": 3.9532470703125e-05,
      "step": 6477,
      "training_step_time": 0.4041769504547119
    },
    {
      "epoch": 3.953857421875e-05,
      "model_forward_time": 0.11496090888977051,
      "step": 6478
    },
    {
      "epoch": 3.953857421875e-05,
      "step": 6478,
      "training_step_time": 0.4197666645050049
    },
    {
      "epoch": 3.9544677734375e-05,
      "model_forward_time": 0.11511588096618652,
      "step": 6479
    },
    {
      "epoch": 3.9544677734375e-05,
      "step": 6479,
      "training_step_time": 0.49704718589782715
    },
    {
      "epoch": 3.955078125e-05,
      "grad_norm": 0.243406280875206,
      "learning_rate": 9.90831111046988e-05,
      "loss": 0.0709,
      "step": 6480
    },
    {
      "epoch": 3.955078125e-05,
      "model_forward_time": 0.11481237411499023,
      "step": 6480
    },
    {
      "epoch": 3.955078125e-05,
      "step": 6480,
      "training_step_time": 0.44721102714538574
    },
    {
      "epoch": 3.9556884765625e-05,
      "model_forward_time": 0.11514449119567871,
      "step": 6481
    },
    {
      "epoch": 3.9556884765625e-05,
      "step": 6481,
      "training_step_time": 0.40440845489501953
    },
    {
      "epoch": 3.956298828125e-05,
      "model_forward_time": 0.11533570289611816,
      "step": 6482
    },
    {
      "epoch": 3.956298828125e-05,
      "step": 6482,
      "training_step_time": 0.41638994216918945
    },
    {
      "epoch": 3.9569091796875e-05,
      "model_forward_time": 0.11515688896179199,
      "step": 6483
    },
    {
      "epoch": 3.9569091796875e-05,
      "step": 6483,
      "training_step_time": 0.3775908946990967
    },
    {
      "epoch": 3.95751953125e-05,
      "model_forward_time": 0.11458086967468262,
      "step": 6484
    },
    {
      "epoch": 3.95751953125e-05,
      "step": 6484,
      "training_step_time": 0.46469807624816895
    },
    {
      "epoch": 3.9581298828125e-05,
      "model_forward_time": 0.11512994766235352,
      "step": 6485
    },
    {
      "epoch": 3.9581298828125e-05,
      "step": 6485,
      "training_step_time": 0.48209381103515625
    },
    {
      "epoch": 3.958740234375e-05,
      "model_forward_time": 0.1147468090057373,
      "step": 6486
    },
    {
      "epoch": 3.958740234375e-05,
      "step": 6486,
      "training_step_time": 0.4071383476257324
    },
    {
      "epoch": 3.9593505859375e-05,
      "model_forward_time": 0.11455512046813965,
      "step": 6487
    },
    {
      "epoch": 3.9593505859375e-05,
      "step": 6487,
      "training_step_time": 0.3905456066131592
    },
    {
      "epoch": 3.9599609375e-05,
      "model_forward_time": 0.11483216285705566,
      "step": 6488
    },
    {
      "epoch": 3.9599609375e-05,
      "step": 6488,
      "training_step_time": 0.39606451988220215
    },
    {
      "epoch": 3.9605712890625e-05,
      "model_forward_time": 0.11468505859375,
      "step": 6489
    },
    {
      "epoch": 3.9605712890625e-05,
      "step": 6489,
      "training_step_time": 0.39718127250671387
    },
    {
      "epoch": 3.961181640625e-05,
      "grad_norm": 0.20740395784378052,
      "learning_rate": 9.90778503379374e-05,
      "loss": 0.0734,
      "step": 6490
    },
    {
      "epoch": 3.961181640625e-05,
      "model_forward_time": 0.11554527282714844,
      "step": 6490
    },
    {
      "epoch": 3.961181640625e-05,
      "step": 6490,
      "training_step_time": 0.39067697525024414
    },
    {
      "epoch": 3.9617919921875e-05,
      "model_forward_time": 0.11501216888427734,
      "step": 6491
    },
    {
      "epoch": 3.9617919921875e-05,
      "step": 6491,
      "training_step_time": 0.3936278820037842
    },
    {
      "epoch": 3.96240234375e-05,
      "model_forward_time": 0.11498546600341797,
      "step": 6492
    },
    {
      "epoch": 3.96240234375e-05,
      "step": 6492,
      "training_step_time": 0.3850526809692383
    },
    {
      "epoch": 3.9630126953125e-05,
      "model_forward_time": 0.11561393737792969,
      "step": 6493
    },
    {
      "epoch": 3.9630126953125e-05,
      "step": 6493,
      "training_step_time": 0.40834474563598633
    },
    {
      "epoch": 3.963623046875e-05,
      "model_forward_time": 0.11500811576843262,
      "step": 6494
    },
    {
      "epoch": 3.963623046875e-05,
      "step": 6494,
      "training_step_time": 0.40725111961364746
    },
    {
      "epoch": 3.9642333984375e-05,
      "model_forward_time": 0.11562561988830566,
      "step": 6495
    },
    {
      "epoch": 3.9642333984375e-05,
      "step": 6495,
      "training_step_time": 0.4570653438568115
    },
    {
      "epoch": 3.96484375e-05,
      "model_forward_time": 0.11545634269714355,
      "step": 6496
    },
    {
      "epoch": 3.96484375e-05,
      "step": 6496,
      "training_step_time": 0.4046323299407959
    },
    {
      "epoch": 3.9654541015625e-05,
      "model_forward_time": 0.11531472206115723,
      "step": 6497
    },
    {
      "epoch": 3.9654541015625e-05,
      "step": 6497,
      "training_step_time": 0.4049050807952881
    },
    {
      "epoch": 3.966064453125e-05,
      "model_forward_time": 0.1147909164428711,
      "step": 6498
    },
    {
      "epoch": 3.966064453125e-05,
      "step": 6498,
      "training_step_time": 0.47093939781188965
    },
    {
      "epoch": 3.9666748046875e-05,
      "model_forward_time": 0.11594223976135254,
      "step": 6499
    },
    {
      "epoch": 3.9666748046875e-05,
      "step": 6499,
      "training_step_time": 0.4820554256439209
    },
    {
      "epoch": 3.96728515625e-05,
      "grad_norm": 0.27711221575737,
      "learning_rate": 9.90725746626209e-05,
      "loss": 0.0731,
      "step": 6500
    },
    {
      "epoch": 3.96728515625e-05,
      "model_forward_time": 0.11518120765686035,
      "step": 6500
    },
    {
      "epoch": 3.96728515625e-05,
      "step": 6500,
      "training_step_time": 0.5026545524597168
    },
    {
      "epoch": 3.9678955078125e-05,
      "model_forward_time": 0.11487150192260742,
      "step": 6501
    },
    {
      "epoch": 3.9678955078125e-05,
      "step": 6501,
      "training_step_time": 0.388446569442749
    },
    {
      "epoch": 3.968505859375e-05,
      "model_forward_time": 0.11528968811035156,
      "step": 6502
    },
    {
      "epoch": 3.968505859375e-05,
      "step": 6502,
      "training_step_time": 0.3918623924255371
    },
    {
      "epoch": 3.9691162109375e-05,
      "model_forward_time": 0.1145792007446289,
      "step": 6503
    },
    {
      "epoch": 3.9691162109375e-05,
      "step": 6503,
      "training_step_time": 0.3865654468536377
    },
    {
      "epoch": 3.9697265625e-05,
      "model_forward_time": 0.11500430107116699,
      "step": 6504
    },
    {
      "epoch": 3.9697265625e-05,
      "step": 6504,
      "training_step_time": 0.38068437576293945
    },
    {
      "epoch": 3.9703369140625e-05,
      "model_forward_time": 0.11509966850280762,
      "step": 6505
    },
    {
      "epoch": 3.9703369140625e-05,
      "step": 6505,
      "training_step_time": 0.3909292221069336
    },
    {
      "epoch": 3.970947265625e-05,
      "model_forward_time": 0.11511659622192383,
      "step": 6506
    },
    {
      "epoch": 3.970947265625e-05,
      "step": 6506,
      "training_step_time": 0.402141809463501
    },
    {
      "epoch": 3.9715576171875e-05,
      "model_forward_time": 0.11502337455749512,
      "step": 6507
    },
    {
      "epoch": 3.9715576171875e-05,
      "step": 6507,
      "training_step_time": 0.4740622043609619
    },
    {
      "epoch": 3.97216796875e-05,
      "model_forward_time": 0.11524295806884766,
      "step": 6508
    },
    {
      "epoch": 3.97216796875e-05,
      "step": 6508,
      "training_step_time": 0.48549461364746094
    },
    {
      "epoch": 3.9727783203125e-05,
      "model_forward_time": 0.11657571792602539,
      "step": 6509
    },
    {
      "epoch": 3.9727783203125e-05,
      "step": 6509,
      "training_step_time": 0.4695398807525635
    },
    {
      "epoch": 3.973388671875e-05,
      "grad_norm": 0.2991316020488739,
      "learning_rate": 9.90672840803519e-05,
      "loss": 0.0776,
      "step": 6510
    },
    {
      "epoch": 3.973388671875e-05,
      "model_forward_time": 0.11518192291259766,
      "step": 6510
    },
    {
      "epoch": 3.973388671875e-05,
      "step": 6510,
      "training_step_time": 0.38785719871520996
    },
    {
      "epoch": 3.9739990234375e-05,
      "model_forward_time": 0.11498022079467773,
      "step": 6511
    },
    {
      "epoch": 3.9739990234375e-05,
      "step": 6511,
      "training_step_time": 0.3925626277923584
    },
    {
      "epoch": 3.974609375e-05,
      "model_forward_time": 0.1149282455444336,
      "step": 6512
    },
    {
      "epoch": 3.974609375e-05,
      "step": 6512,
      "training_step_time": 0.36866188049316406
    },
    {
      "epoch": 3.9752197265625e-05,
      "model_forward_time": 0.11507630348205566,
      "step": 6513
    },
    {
      "epoch": 3.9752197265625e-05,
      "step": 6513,
      "training_step_time": 0.46614742279052734
    },
    {
      "epoch": 3.975830078125e-05,
      "model_forward_time": 0.11468267440795898,
      "step": 6514
    },
    {
      "epoch": 3.975830078125e-05,
      "step": 6514,
      "training_step_time": 0.4751570224761963
    },
    {
      "epoch": 3.9764404296875e-05,
      "model_forward_time": 0.11489081382751465,
      "step": 6515
    },
    {
      "epoch": 3.9764404296875e-05,
      "step": 6515,
      "training_step_time": 0.40779662132263184
    },
    {
      "epoch": 3.97705078125e-05,
      "model_forward_time": 0.11514949798583984,
      "step": 6516
    },
    {
      "epoch": 3.97705078125e-05,
      "step": 6516,
      "training_step_time": 0.3918919563293457
    },
    {
      "epoch": 3.9776611328125e-05,
      "model_forward_time": 0.11542272567749023,
      "step": 6517
    },
    {
      "epoch": 3.9776611328125e-05,
      "step": 6517,
      "training_step_time": 0.3892982006072998
    },
    {
      "epoch": 3.978271484375e-05,
      "model_forward_time": 0.11539912223815918,
      "step": 6518
    },
    {
      "epoch": 3.978271484375e-05,
      "step": 6518,
      "training_step_time": 0.39788150787353516
    },
    {
      "epoch": 3.9788818359375e-05,
      "model_forward_time": 0.11440873146057129,
      "step": 6519
    },
    {
      "epoch": 3.9788818359375e-05,
      "step": 6519,
      "training_step_time": 0.39848828315734863
    },
    {
      "epoch": 3.9794921875e-05,
      "grad_norm": 0.357748419046402,
      "learning_rate": 9.906197859273753e-05,
      "loss": 0.0805,
      "step": 6520
    },
    {
      "epoch": 3.9794921875e-05,
      "model_forward_time": 0.11521315574645996,
      "step": 6520
    },
    {
      "epoch": 3.9794921875e-05,
      "step": 6520,
      "training_step_time": 0.3887319564819336
    },
    {
      "epoch": 3.9801025390625e-05,
      "model_forward_time": 0.11556172370910645,
      "step": 6521
    },
    {
      "epoch": 3.9801025390625e-05,
      "step": 6521,
      "training_step_time": 0.4180905818939209
    },
    {
      "epoch": 3.980712890625e-05,
      "model_forward_time": 0.11536312103271484,
      "step": 6522
    },
    {
      "epoch": 3.980712890625e-05,
      "step": 6522,
      "training_step_time": 0.42055773735046387
    },
    {
      "epoch": 3.9813232421875e-05,
      "model_forward_time": 0.1158287525177002,
      "step": 6523
    },
    {
      "epoch": 3.9813232421875e-05,
      "step": 6523,
      "training_step_time": 0.4662177562713623
    },
    {
      "epoch": 3.98193359375e-05,
      "model_forward_time": 0.11521410942077637,
      "step": 6524
    },
    {
      "epoch": 3.98193359375e-05,
      "step": 6524,
      "training_step_time": 0.49695277214050293
    },
    {
      "epoch": 3.9825439453125e-05,
      "model_forward_time": 0.11525654792785645,
      "step": 6525
    },
    {
      "epoch": 3.9825439453125e-05,
      "step": 6525,
      "training_step_time": 0.4116520881652832
    },
    {
      "epoch": 3.983154296875e-05,
      "model_forward_time": 0.11515164375305176,
      "step": 6526
    },
    {
      "epoch": 3.983154296875e-05,
      "step": 6526,
      "training_step_time": 0.38846611976623535
    },
    {
      "epoch": 3.9837646484375e-05,
      "model_forward_time": 0.11518144607543945,
      "step": 6527
    },
    {
      "epoch": 3.9837646484375e-05,
      "step": 6527,
      "training_step_time": 0.44635486602783203
    },
    {
      "epoch": 3.984375e-05,
      "model_forward_time": 0.11479616165161133,
      "step": 6528
    },
    {
      "epoch": 3.984375e-05,
      "step": 6528,
      "training_step_time": 0.4120299816131592
    },
    {
      "epoch": 3.9849853515625e-05,
      "model_forward_time": 0.11556577682495117,
      "step": 6529
    },
    {
      "epoch": 3.9849853515625e-05,
      "step": 6529,
      "training_step_time": 0.492185115814209
    },
    {
      "epoch": 3.985595703125e-05,
      "grad_norm": 0.28743261098861694,
      "learning_rate": 9.905665820138949e-05,
      "loss": 0.0785,
      "step": 6530
    },
    {
      "epoch": 3.985595703125e-05,
      "model_forward_time": 0.1150522232055664,
      "step": 6530
    },
    {
      "epoch": 3.985595703125e-05,
      "step": 6530,
      "training_step_time": 0.39746546745300293
    },
    {
      "epoch": 3.9862060546875e-05,
      "model_forward_time": 0.11510968208312988,
      "step": 6531
    },
    {
      "epoch": 3.9862060546875e-05,
      "step": 6531,
      "training_step_time": 0.38880348205566406
    },
    {
      "epoch": 3.98681640625e-05,
      "model_forward_time": 0.11569762229919434,
      "step": 6532
    },
    {
      "epoch": 3.98681640625e-05,
      "step": 6532,
      "training_step_time": 0.385251522064209
    },
    {
      "epoch": 3.9874267578125e-05,
      "model_forward_time": 0.11546707153320312,
      "step": 6533
    },
    {
      "epoch": 3.9874267578125e-05,
      "step": 6533,
      "training_step_time": 0.3988039493560791
    },
    {
      "epoch": 3.988037109375e-05,
      "model_forward_time": 0.11536884307861328,
      "step": 6534
    },
    {
      "epoch": 3.988037109375e-05,
      "step": 6534,
      "training_step_time": 0.41503381729125977
    },
    {
      "epoch": 3.9886474609375e-05,
      "model_forward_time": 0.11496734619140625,
      "step": 6535
    },
    {
      "epoch": 3.9886474609375e-05,
      "step": 6535,
      "training_step_time": 0.4033334255218506
    },
    {
      "epoch": 3.9892578125e-05,
      "model_forward_time": 0.11513400077819824,
      "step": 6536
    },
    {
      "epoch": 3.9892578125e-05,
      "step": 6536,
      "training_step_time": 0.46779799461364746
    },
    {
      "epoch": 3.9898681640625e-05,
      "model_forward_time": 0.11495280265808105,
      "step": 6537
    },
    {
      "epoch": 3.9898681640625e-05,
      "step": 6537,
      "training_step_time": 0.4676041603088379
    },
    {
      "epoch": 3.990478515625e-05,
      "model_forward_time": 0.11585068702697754,
      "step": 6538
    },
    {
      "epoch": 3.990478515625e-05,
      "step": 6538,
      "training_step_time": 0.47051501274108887
    },
    {
      "epoch": 3.9910888671875e-05,
      "model_forward_time": 0.11494207382202148,
      "step": 6539
    },
    {
      "epoch": 3.9910888671875e-05,
      "step": 6539,
      "training_step_time": 0.39365577697753906
    },
    {
      "epoch": 3.99169921875e-05,
      "grad_norm": 0.19510583579540253,
      "learning_rate": 9.905132290792394e-05,
      "loss": 0.0713,
      "step": 6540
    },
    {
      "epoch": 3.99169921875e-05,
      "model_forward_time": 0.11548328399658203,
      "step": 6540
    },
    {
      "epoch": 3.99169921875e-05,
      "step": 6540,
      "training_step_time": 0.4010961055755615
    },
    {
      "epoch": 3.9923095703125e-05,
      "model_forward_time": 0.11424589157104492,
      "step": 6541
    },
    {
      "epoch": 3.9923095703125e-05,
      "step": 6541,
      "training_step_time": 0.36595582962036133
    },
    {
      "epoch": 3.992919921875e-05,
      "model_forward_time": 0.11486601829528809,
      "step": 6542
    },
    {
      "epoch": 3.992919921875e-05,
      "step": 6542,
      "training_step_time": 0.48311495780944824
    },
    {
      "epoch": 3.9935302734375e-05,
      "model_forward_time": 0.11523795127868652,
      "step": 6543
    },
    {
      "epoch": 3.9935302734375e-05,
      "step": 6543,
      "training_step_time": 0.4772613048553467
    },
    {
      "epoch": 3.994140625e-05,
      "model_forward_time": 0.11400032043457031,
      "step": 6544
    },
    {
      "epoch": 3.994140625e-05,
      "step": 6544,
      "training_step_time": 0.3955864906311035
    },
    {
      "epoch": 3.9947509765625e-05,
      "model_forward_time": 0.11516499519348145,
      "step": 6545
    },
    {
      "epoch": 3.9947509765625e-05,
      "step": 6545,
      "training_step_time": 0.38487768173217773
    },
    {
      "epoch": 3.995361328125e-05,
      "model_forward_time": 0.1152498722076416,
      "step": 6546
    },
    {
      "epoch": 3.995361328125e-05,
      "step": 6546,
      "training_step_time": 0.38794636726379395
    },
    {
      "epoch": 3.9959716796875e-05,
      "model_forward_time": 0.11505937576293945,
      "step": 6547
    },
    {
      "epoch": 3.9959716796875e-05,
      "step": 6547,
      "training_step_time": 0.40204501152038574
    },
    {
      "epoch": 3.99658203125e-05,
      "model_forward_time": 0.11504125595092773,
      "step": 6548
    },
    {
      "epoch": 3.99658203125e-05,
      "step": 6548,
      "training_step_time": 0.4266316890716553
    },
    {
      "epoch": 3.9971923828125e-05,
      "model_forward_time": 0.11547660827636719,
      "step": 6549
    },
    {
      "epoch": 3.9971923828125e-05,
      "step": 6549,
      "training_step_time": 0.402249813079834
    },
    {
      "epoch": 3.997802734375e-05,
      "grad_norm": 0.17925101518630981,
      "learning_rate": 9.904597271396162e-05,
      "loss": 0.0748,
      "step": 6550
    },
    {
      "epoch": 3.997802734375e-05,
      "model_forward_time": 0.11562705039978027,
      "step": 6550
    },
    {
      "epoch": 3.997802734375e-05,
      "step": 6550,
      "training_step_time": 0.4327714443206787
    },
    {
      "epoch": 3.9984130859375e-05,
      "model_forward_time": 0.11611604690551758,
      "step": 6551
    },
    {
      "epoch": 3.9984130859375e-05,
      "step": 6551,
      "training_step_time": 0.512627124786377
    },
    {
      "epoch": 3.9990234375e-05,
      "model_forward_time": 0.11732244491577148,
      "step": 6552
    },
    {
      "epoch": 3.9990234375e-05,
      "step": 6552,
      "training_step_time": 0.6144602298736572
    },
    {
      "epoch": 3.9996337890625e-05,
      "model_forward_time": 0.12015652656555176,
      "step": 6553
    },
    {
      "epoch": 3.9996337890625e-05,
      "step": 6553,
      "training_step_time": 0.7084190845489502
    },
    {
      "epoch": 4.000244140625e-05,
      "model_forward_time": 0.11801815032958984,
      "step": 6554
    },
    {
      "epoch": 4.000244140625e-05,
      "step": 6554,
      "training_step_time": 0.5839834213256836
    },
    {
      "epoch": 4.0008544921875e-05,
      "model_forward_time": 0.12046265602111816,
      "step": 6555
    },
    {
      "epoch": 4.0008544921875e-05,
      "step": 6555,
      "training_step_time": 0.7135255336761475
    },
    {
      "epoch": 4.00146484375e-05,
      "model_forward_time": 0.11806511878967285,
      "step": 6556
    },
    {
      "epoch": 4.00146484375e-05,
      "step": 6556,
      "training_step_time": 0.6706395149230957
    },
    {
      "epoch": 4.0020751953125e-05,
      "model_forward_time": 0.12058854103088379,
      "step": 6557
    },
    {
      "epoch": 4.0020751953125e-05,
      "step": 6557,
      "training_step_time": 0.6708242893218994
    },
    {
      "epoch": 4.002685546875e-05,
      "model_forward_time": 0.11910343170166016,
      "step": 6558
    },
    {
      "epoch": 4.002685546875e-05,
      "step": 6558,
      "training_step_time": 0.635467529296875
    },
    {
      "epoch": 4.0032958984375e-05,
      "model_forward_time": 0.12510156631469727,
      "step": 6559
    },
    {
      "epoch": 4.0032958984375e-05,
      "step": 6559,
      "training_step_time": 0.6300179958343506
    },
    {
      "epoch": 4.00390625e-05,
      "grad_norm": 0.2540580630302429,
      "learning_rate": 9.904060762112777e-05,
      "loss": 0.0808,
      "step": 6560
    },
    {
      "epoch": 4.00390625e-05,
      "model_forward_time": 0.12220168113708496,
      "step": 6560
    },
    {
      "epoch": 4.00390625e-05,
      "step": 6560,
      "training_step_time": 0.7095222473144531
    },
    {
      "epoch": 4.0045166015625e-05,
      "model_forward_time": 0.1407163143157959,
      "step": 6561
    },
    {
      "epoch": 4.0045166015625e-05,
      "step": 6561,
      "training_step_time": 0.658130407333374
    },
    {
      "epoch": 4.005126953125e-05,
      "model_forward_time": 0.11954545974731445,
      "step": 6562
    },
    {
      "epoch": 4.005126953125e-05,
      "step": 6562,
      "training_step_time": 0.7099583148956299
    },
    {
      "epoch": 4.0057373046875e-05,
      "model_forward_time": 0.12148451805114746,
      "step": 6563
    },
    {
      "epoch": 4.0057373046875e-05,
      "step": 6563,
      "training_step_time": 0.6653623580932617
    },
    {
      "epoch": 4.00634765625e-05,
      "model_forward_time": 0.12084746360778809,
      "step": 6564
    },
    {
      "epoch": 4.00634765625e-05,
      "step": 6564,
      "training_step_time": 0.710655689239502
    },
    {
      "epoch": 4.0069580078125e-05,
      "model_forward_time": 0.11708927154541016,
      "step": 6565
    },
    {
      "epoch": 4.0069580078125e-05,
      "step": 6565,
      "training_step_time": 0.6628496646881104
    },
    {
      "epoch": 4.007568359375e-05,
      "model_forward_time": 0.11671733856201172,
      "step": 6566
    },
    {
      "epoch": 4.007568359375e-05,
      "step": 6566,
      "training_step_time": 0.6367874145507812
    },
    {
      "epoch": 4.0081787109375e-05,
      "model_forward_time": 0.11684775352478027,
      "step": 6567
    },
    {
      "epoch": 4.0081787109375e-05,
      "step": 6567,
      "training_step_time": 0.642568826675415
    },
    {
      "epoch": 4.0087890625e-05,
      "model_forward_time": 0.11989593505859375,
      "step": 6568
    },
    {
      "epoch": 4.0087890625e-05,
      "step": 6568,
      "training_step_time": 0.7176644802093506
    },
    {
      "epoch": 4.0093994140625e-05,
      "model_forward_time": 0.11765003204345703,
      "step": 6569
    },
    {
      "epoch": 4.0093994140625e-05,
      "step": 6569,
      "training_step_time": 0.6426205635070801
    },
    {
      "epoch": 4.010009765625e-05,
      "grad_norm": 0.2755386531352997,
      "learning_rate": 9.903522763105218e-05,
      "loss": 0.0867,
      "step": 6570
    },
    {
      "epoch": 4.010009765625e-05,
      "model_forward_time": 0.11973690986633301,
      "step": 6570
    },
    {
      "epoch": 4.010009765625e-05,
      "step": 6570,
      "training_step_time": 0.7322852611541748
    },
    {
      "epoch": 4.0106201171875e-05,
      "model_forward_time": 0.12297797203063965,
      "step": 6571
    },
    {
      "epoch": 4.0106201171875e-05,
      "step": 6571,
      "training_step_time": 0.6789882183074951
    },
    {
      "epoch": 4.01123046875e-05,
      "model_forward_time": 0.11852812767028809,
      "step": 6572
    },
    {
      "epoch": 4.01123046875e-05,
      "step": 6572,
      "training_step_time": 0.6389946937561035
    },
    {
      "epoch": 4.0118408203125e-05,
      "model_forward_time": 0.1200399398803711,
      "step": 6573
    },
    {
      "epoch": 4.0118408203125e-05,
      "step": 6573,
      "training_step_time": 0.5931494235992432
    },
    {
      "epoch": 4.012451171875e-05,
      "model_forward_time": 0.12063121795654297,
      "step": 6574
    },
    {
      "epoch": 4.012451171875e-05,
      "step": 6574,
      "training_step_time": 0.7602095603942871
    },
    {
      "epoch": 4.0130615234375e-05,
      "model_forward_time": 0.12068939208984375,
      "step": 6575
    },
    {
      "epoch": 4.0130615234375e-05,
      "step": 6575,
      "training_step_time": 0.7384424209594727
    },
    {
      "epoch": 4.013671875e-05,
      "model_forward_time": 0.11675405502319336,
      "step": 6576
    },
    {
      "epoch": 4.013671875e-05,
      "step": 6576,
      "training_step_time": 0.6471009254455566
    },
    {
      "epoch": 4.0142822265625e-05,
      "model_forward_time": 0.1173551082611084,
      "step": 6577
    },
    {
      "epoch": 4.0142822265625e-05,
      "step": 6577,
      "training_step_time": 0.6685690879821777
    },
    {
      "epoch": 4.014892578125e-05,
      "model_forward_time": 0.11743426322937012,
      "step": 6578
    },
    {
      "epoch": 4.014892578125e-05,
      "step": 6578,
      "training_step_time": 0.7088556289672852
    },
    {
      "epoch": 4.0155029296875e-05,
      "model_forward_time": 0.12287425994873047,
      "step": 6579
    },
    {
      "epoch": 4.0155029296875e-05,
      "step": 6579,
      "training_step_time": 0.722081184387207
    },
    {
      "epoch": 4.01611328125e-05,
      "grad_norm": 0.33370503783226013,
      "learning_rate": 9.902983274536912e-05,
      "loss": 0.0966,
      "step": 6580
    },
    {
      "epoch": 4.01611328125e-05,
      "model_forward_time": 0.12787961959838867,
      "step": 6580
    },
    {
      "epoch": 4.01611328125e-05,
      "step": 6580,
      "training_step_time": 0.734874963760376
    },
    {
      "epoch": 4.0167236328125e-05,
      "model_forward_time": 0.11856508255004883,
      "step": 6581
    },
    {
      "epoch": 4.0167236328125e-05,
      "step": 6581,
      "training_step_time": 0.7259447574615479
    },
    {
      "epoch": 4.017333984375e-05,
      "model_forward_time": 0.1185002326965332,
      "step": 6582
    },
    {
      "epoch": 4.017333984375e-05,
      "step": 6582,
      "training_step_time": 0.6458115577697754
    },
    {
      "epoch": 4.0179443359375e-05,
      "model_forward_time": 0.12036371231079102,
      "step": 6583
    },
    {
      "epoch": 4.0179443359375e-05,
      "step": 6583,
      "training_step_time": 0.8736588954925537
    },
    {
      "epoch": 4.0185546875e-05,
      "model_forward_time": 0.11828184127807617,
      "step": 6584
    },
    {
      "epoch": 4.0185546875e-05,
      "step": 6584,
      "training_step_time": 0.7374148368835449
    },
    {
      "epoch": 4.0191650390625e-05,
      "model_forward_time": 0.11736655235290527,
      "step": 6585
    },
    {
      "epoch": 4.0191650390625e-05,
      "step": 6585,
      "training_step_time": 0.711144208908081
    },
    {
      "epoch": 4.019775390625e-05,
      "model_forward_time": 0.11766910552978516,
      "step": 6586
    },
    {
      "epoch": 4.019775390625e-05,
      "step": 6586,
      "training_step_time": 0.6368565559387207
    },
    {
      "epoch": 4.0203857421875e-05,
      "model_forward_time": 0.11948323249816895,
      "step": 6587
    },
    {
      "epoch": 4.0203857421875e-05,
      "step": 6587,
      "training_step_time": 0.632164478302002
    },
    {
      "epoch": 4.02099609375e-05,
      "model_forward_time": 0.11901450157165527,
      "step": 6588
    },
    {
      "epoch": 4.02099609375e-05,
      "step": 6588,
      "training_step_time": 0.7405846118927002
    },
    {
      "epoch": 4.0216064453125e-05,
      "model_forward_time": 0.12102460861206055,
      "step": 6589
    },
    {
      "epoch": 4.0216064453125e-05,
      "step": 6589,
      "training_step_time": 0.7847287654876709
    },
    {
      "epoch": 4.022216796875e-05,
      "grad_norm": 0.2861916422843933,
      "learning_rate": 9.902442296571743e-05,
      "loss": 0.0875,
      "step": 6590
    },
    {
      "epoch": 4.022216796875e-05,
      "model_forward_time": 0.11621665954589844,
      "step": 6590
    },
    {
      "epoch": 4.022216796875e-05,
      "step": 6590,
      "training_step_time": 0.6070847511291504
    },
    {
      "epoch": 4.0228271484375e-05,
      "model_forward_time": 0.11926603317260742,
      "step": 6591
    },
    {
      "epoch": 4.0228271484375e-05,
      "step": 6591,
      "training_step_time": 0.6407742500305176
    },
    {
      "epoch": 4.0234375e-05,
      "model_forward_time": 0.11809253692626953,
      "step": 6592
    },
    {
      "epoch": 4.0234375e-05,
      "step": 6592,
      "training_step_time": 0.6934614181518555
    },
    {
      "epoch": 4.0240478515625e-05,
      "model_forward_time": 0.11725497245788574,
      "step": 6593
    },
    {
      "epoch": 4.0240478515625e-05,
      "step": 6593,
      "training_step_time": 0.6519343852996826
    },
    {
      "epoch": 4.024658203125e-05,
      "model_forward_time": 0.12001967430114746,
      "step": 6594
    },
    {
      "epoch": 4.024658203125e-05,
      "step": 6594,
      "training_step_time": 0.6487958431243896
    },
    {
      "epoch": 4.0252685546875e-05,
      "model_forward_time": 0.11951422691345215,
      "step": 6595
    },
    {
      "epoch": 4.0252685546875e-05,
      "step": 6595,
      "training_step_time": 0.6291482448577881
    },
    {
      "epoch": 4.02587890625e-05,
      "model_forward_time": 0.11962175369262695,
      "step": 6596
    },
    {
      "epoch": 4.02587890625e-05,
      "step": 6596,
      "training_step_time": 0.5947535037994385
    },
    {
      "epoch": 4.0264892578125e-05,
      "model_forward_time": 0.1208641529083252,
      "step": 6597
    },
    {
      "epoch": 4.0264892578125e-05,
      "step": 6597,
      "training_step_time": 0.7186503410339355
    },
    {
      "epoch": 4.027099609375e-05,
      "model_forward_time": 0.1163029670715332,
      "step": 6598
    },
    {
      "epoch": 4.027099609375e-05,
      "step": 6598,
      "training_step_time": 0.6723189353942871
    },
    {
      "epoch": 4.0277099609375e-05,
      "model_forward_time": 0.1237490177154541,
      "step": 6599
    },
    {
      "epoch": 4.0277099609375e-05,
      "step": 6599,
      "training_step_time": 0.6520822048187256
    },
    {
      "epoch": 4.0283203125e-05,
      "grad_norm": 0.3580460548400879,
      "learning_rate": 9.901899829374047e-05,
      "loss": 0.083,
      "step": 6600
    },
    {
      "epoch": 4.0283203125e-05,
      "model_forward_time": 0.12081527709960938,
      "step": 6600
    },
    {
      "epoch": 4.0283203125e-05,
      "step": 6600,
      "training_step_time": 0.665569543838501
    },
    {
      "epoch": 4.0289306640625e-05,
      "model_forward_time": 0.11894893646240234,
      "step": 6601
    },
    {
      "epoch": 4.0289306640625e-05,
      "step": 6601,
      "training_step_time": 0.767998218536377
    },
    {
      "epoch": 4.029541015625e-05,
      "model_forward_time": 0.1224818229675293,
      "step": 6602
    },
    {
      "epoch": 4.029541015625e-05,
      "step": 6602,
      "training_step_time": 0.6436243057250977
    },
    {
      "epoch": 4.0301513671875e-05,
      "model_forward_time": 0.12043309211730957,
      "step": 6603
    },
    {
      "epoch": 4.0301513671875e-05,
      "step": 6603,
      "training_step_time": 0.7011973857879639
    },
    {
      "epoch": 4.03076171875e-05,
      "model_forward_time": 0.12109208106994629,
      "step": 6604
    },
    {
      "epoch": 4.03076171875e-05,
      "step": 6604,
      "training_step_time": 0.6440656185150146
    },
    {
      "epoch": 4.0313720703125e-05,
      "model_forward_time": 0.12180876731872559,
      "step": 6605
    },
    {
      "epoch": 4.0313720703125e-05,
      "step": 6605,
      "training_step_time": 0.6972646713256836
    },
    {
      "epoch": 4.031982421875e-05,
      "model_forward_time": 0.13723158836364746,
      "step": 6606
    },
    {
      "epoch": 4.031982421875e-05,
      "step": 6606,
      "training_step_time": 0.6658289432525635
    },
    {
      "epoch": 4.0325927734375e-05,
      "model_forward_time": 0.11951041221618652,
      "step": 6607
    },
    {
      "epoch": 4.0325927734375e-05,
      "step": 6607,
      "training_step_time": 0.7230358123779297
    },
    {
      "epoch": 4.033203125e-05,
      "model_forward_time": 0.12121438980102539,
      "step": 6608
    },
    {
      "epoch": 4.033203125e-05,
      "step": 6608,
      "training_step_time": 0.7082486152648926
    },
    {
      "epoch": 4.0338134765625e-05,
      "model_forward_time": 0.11701464653015137,
      "step": 6609
    },
    {
      "epoch": 4.0338134765625e-05,
      "step": 6609,
      "training_step_time": 0.6410982608795166
    },
    {
      "epoch": 4.034423828125e-05,
      "grad_norm": 0.43103310465812683,
      "learning_rate": 9.901355873108609e-05,
      "loss": 0.085,
      "step": 6610
    },
    {
      "epoch": 4.034423828125e-05,
      "model_forward_time": 0.12365603446960449,
      "step": 6610
    },
    {
      "epoch": 4.034423828125e-05,
      "step": 6610,
      "training_step_time": 0.6258821487426758
    },
    {
      "epoch": 4.0350341796875e-05,
      "model_forward_time": 0.1273653507232666,
      "step": 6611
    },
    {
      "epoch": 4.0350341796875e-05,
      "step": 6611,
      "training_step_time": 0.6639120578765869
    },
    {
      "epoch": 4.03564453125e-05,
      "model_forward_time": 0.11692690849304199,
      "step": 6612
    },
    {
      "epoch": 4.03564453125e-05,
      "step": 6612,
      "training_step_time": 0.7199711799621582
    },
    {
      "epoch": 4.0362548828125e-05,
      "model_forward_time": 0.14770293235778809,
      "step": 6613
    },
    {
      "epoch": 4.0362548828125e-05,
      "step": 6613,
      "training_step_time": 0.6761329174041748
    },
    {
      "epoch": 4.036865234375e-05,
      "model_forward_time": 0.11895346641540527,
      "step": 6614
    },
    {
      "epoch": 4.036865234375e-05,
      "step": 6614,
      "training_step_time": 0.6536641120910645
    },
    {
      "epoch": 4.0374755859375e-05,
      "model_forward_time": 0.1187293529510498,
      "step": 6615
    },
    {
      "epoch": 4.0374755859375e-05,
      "step": 6615,
      "training_step_time": 0.6449112892150879
    },
    {
      "epoch": 4.0380859375e-05,
      "model_forward_time": 0.13765645027160645,
      "step": 6616
    },
    {
      "epoch": 4.0380859375e-05,
      "step": 6616,
      "training_step_time": 0.6425192356109619
    },
    {
      "epoch": 4.0386962890625e-05,
      "model_forward_time": 0.12504935264587402,
      "step": 6617
    },
    {
      "epoch": 4.0386962890625e-05,
      "step": 6617,
      "training_step_time": 0.6228179931640625
    },
    {
      "epoch": 4.039306640625e-05,
      "model_forward_time": 0.1211402416229248,
      "step": 6618
    },
    {
      "epoch": 4.039306640625e-05,
      "step": 6618,
      "training_step_time": 0.6486482620239258
    },
    {
      "epoch": 4.0399169921875e-05,
      "model_forward_time": 0.11871552467346191,
      "step": 6619
    },
    {
      "epoch": 4.0399169921875e-05,
      "step": 6619,
      "training_step_time": 0.6020462512969971
    },
    {
      "epoch": 4.04052734375e-05,
      "grad_norm": 0.29238349199295044,
      "learning_rate": 9.90081042794067e-05,
      "loss": 0.0798,
      "step": 6620
    },
    {
      "epoch": 4.04052734375e-05,
      "model_forward_time": 0.12225794792175293,
      "step": 6620
    },
    {
      "epoch": 4.04052734375e-05,
      "step": 6620,
      "training_step_time": 0.6219058036804199
    },
    {
      "epoch": 4.0411376953125e-05,
      "model_forward_time": 0.13045978546142578,
      "step": 6621
    },
    {
      "epoch": 4.0411376953125e-05,
      "step": 6621,
      "training_step_time": 0.4976058006286621
    },
    {
      "epoch": 4.041748046875e-05,
      "model_forward_time": 0.12840580940246582,
      "step": 6622
    },
    {
      "epoch": 4.041748046875e-05,
      "step": 6622,
      "training_step_time": 0.5515503883361816
    },
    {
      "epoch": 4.0423583984375e-05,
      "model_forward_time": 0.11858797073364258,
      "step": 6623
    },
    {
      "epoch": 4.0423583984375e-05,
      "step": 6623,
      "training_step_time": 0.549351692199707
    },
    {
      "epoch": 4.04296875e-05,
      "model_forward_time": 0.11806726455688477,
      "step": 6624
    },
    {
      "epoch": 4.04296875e-05,
      "step": 6624,
      "training_step_time": 0.5061278343200684
    },
    {
      "epoch": 4.0435791015625e-05,
      "model_forward_time": 0.11676168441772461,
      "step": 6625
    },
    {
      "epoch": 4.0435791015625e-05,
      "step": 6625,
      "training_step_time": 0.4257349967956543
    },
    {
      "epoch": 4.044189453125e-05,
      "model_forward_time": 0.12011170387268066,
      "step": 6626
    },
    {
      "epoch": 4.044189453125e-05,
      "step": 6626,
      "training_step_time": 0.43582773208618164
    },
    {
      "epoch": 4.0447998046875e-05,
      "model_forward_time": 0.1158287525177002,
      "step": 6627
    },
    {
      "epoch": 4.0447998046875e-05,
      "step": 6627,
      "training_step_time": 0.4628019332885742
    },
    {
      "epoch": 4.04541015625e-05,
      "model_forward_time": 0.11582350730895996,
      "step": 6628
    },
    {
      "epoch": 4.04541015625e-05,
      "step": 6628,
      "training_step_time": 0.41426539421081543
    },
    {
      "epoch": 4.0460205078125e-05,
      "model_forward_time": 0.11732172966003418,
      "step": 6629
    },
    {
      "epoch": 4.0460205078125e-05,
      "step": 6629,
      "training_step_time": 0.41577672958374023
    },
    {
      "epoch": 4.046630859375e-05,
      "grad_norm": 0.14347833395004272,
      "learning_rate": 9.900263494035921e-05,
      "loss": 0.0883,
      "step": 6630
    },
    {
      "epoch": 4.046630859375e-05,
      "model_forward_time": 0.11745333671569824,
      "step": 6630
    },
    {
      "epoch": 4.046630859375e-05,
      "step": 6630,
      "training_step_time": 0.45486950874328613
    },
    {
      "epoch": 4.0472412109375e-05,
      "model_forward_time": 0.1149282455444336,
      "step": 6631
    },
    {
      "epoch": 4.0472412109375e-05,
      "step": 6631,
      "training_step_time": 0.41727113723754883
    },
    {
      "epoch": 4.0478515625e-05,
      "model_forward_time": 0.11502218246459961,
      "step": 6632
    },
    {
      "epoch": 4.0478515625e-05,
      "step": 6632,
      "training_step_time": 0.5128293037414551
    },
    {
      "epoch": 4.0484619140625e-05,
      "model_forward_time": 0.11465120315551758,
      "step": 6633
    },
    {
      "epoch": 4.0484619140625e-05,
      "step": 6633,
      "training_step_time": 0.3962373733520508
    },
    {
      "epoch": 4.049072265625e-05,
      "model_forward_time": 0.11548376083374023,
      "step": 6634
    },
    {
      "epoch": 4.049072265625e-05,
      "step": 6634,
      "training_step_time": 0.40172290802001953
    },
    {
      "epoch": 4.0496826171875e-05,
      "model_forward_time": 0.1150052547454834,
      "step": 6635
    },
    {
      "epoch": 4.0496826171875e-05,
      "step": 6635,
      "training_step_time": 0.3989715576171875
    },
    {
      "epoch": 4.05029296875e-05,
      "model_forward_time": 0.11503839492797852,
      "step": 6636
    },
    {
      "epoch": 4.05029296875e-05,
      "step": 6636,
      "training_step_time": 0.44606995582580566
    },
    {
      "epoch": 4.0509033203125e-05,
      "model_forward_time": 0.11517572402954102,
      "step": 6637
    },
    {
      "epoch": 4.0509033203125e-05,
      "step": 6637,
      "training_step_time": 0.4879577159881592
    },
    {
      "epoch": 4.051513671875e-05,
      "model_forward_time": 0.11531424522399902,
      "step": 6638
    },
    {
      "epoch": 4.051513671875e-05,
      "step": 6638,
      "training_step_time": 0.47727537155151367
    },
    {
      "epoch": 4.0521240234375e-05,
      "model_forward_time": 0.11512637138366699,
      "step": 6639
    },
    {
      "epoch": 4.0521240234375e-05,
      "step": 6639,
      "training_step_time": 0.3814668655395508
    },
    {
      "epoch": 4.052734375e-05,
      "grad_norm": 0.2868571877479553,
      "learning_rate": 9.899715071560508e-05,
      "loss": 0.0831,
      "step": 6640
    },
    {
      "epoch": 4.052734375e-05,
      "model_forward_time": 0.11445903778076172,
      "step": 6640
    },
    {
      "epoch": 4.052734375e-05,
      "step": 6640,
      "training_step_time": 0.40218424797058105
    },
    {
      "epoch": 4.0533447265625e-05,
      "model_forward_time": 0.11496281623840332,
      "step": 6641
    },
    {
      "epoch": 4.0533447265625e-05,
      "step": 6641,
      "training_step_time": 0.3947467803955078
    },
    {
      "epoch": 4.053955078125e-05,
      "model_forward_time": 0.11489415168762207,
      "step": 6642
    },
    {
      "epoch": 4.053955078125e-05,
      "step": 6642,
      "training_step_time": 0.39136219024658203
    },
    {
      "epoch": 4.0545654296875e-05,
      "model_forward_time": 0.1149446964263916,
      "step": 6643
    },
    {
      "epoch": 4.0545654296875e-05,
      "step": 6643,
      "training_step_time": 0.4273521900177002
    },
    {
      "epoch": 4.05517578125e-05,
      "model_forward_time": 0.1146547794342041,
      "step": 6644
    },
    {
      "epoch": 4.05517578125e-05,
      "step": 6644,
      "training_step_time": 0.4140503406524658
    },
    {
      "epoch": 4.0557861328125e-05,
      "model_forward_time": 0.11442732810974121,
      "step": 6645
    },
    {
      "epoch": 4.0557861328125e-05,
      "step": 6645,
      "training_step_time": 0.5043444633483887
    },
    {
      "epoch": 4.056396484375e-05,
      "model_forward_time": 0.11526274681091309,
      "step": 6646
    },
    {
      "epoch": 4.056396484375e-05,
      "step": 6646,
      "training_step_time": 0.41097593307495117
    },
    {
      "epoch": 4.0570068359375e-05,
      "model_forward_time": 0.11483573913574219,
      "step": 6647
    },
    {
      "epoch": 4.0570068359375e-05,
      "step": 6647,
      "training_step_time": 0.38770031929016113
    },
    {
      "epoch": 4.0576171875e-05,
      "model_forward_time": 0.11531472206115723,
      "step": 6648
    },
    {
      "epoch": 4.0576171875e-05,
      "step": 6648,
      "training_step_time": 0.39302897453308105
    },
    {
      "epoch": 4.0582275390625e-05,
      "model_forward_time": 0.11535358428955078,
      "step": 6649
    },
    {
      "epoch": 4.0582275390625e-05,
      "step": 6649,
      "training_step_time": 0.39609670639038086
    },
    {
      "epoch": 4.058837890625e-05,
      "grad_norm": 0.18488092720508575,
      "learning_rate": 9.899165160681025e-05,
      "loss": 0.077,
      "step": 6650
    },
    {
      "epoch": 4.058837890625e-05,
      "model_forward_time": 0.115997314453125,
      "step": 6650
    },
    {
      "epoch": 4.058837890625e-05,
      "step": 6650,
      "training_step_time": 0.36938023567199707
    },
    {
      "epoch": 4.0594482421875e-05,
      "model_forward_time": 0.11475753784179688,
      "step": 6651
    },
    {
      "epoch": 4.0594482421875e-05,
      "step": 6651,
      "training_step_time": 0.4844942092895508
    },
    {
      "epoch": 4.06005859375e-05,
      "model_forward_time": 0.11602210998535156,
      "step": 6652
    },
    {
      "epoch": 4.06005859375e-05,
      "step": 6652,
      "training_step_time": 0.4383237361907959
    },
    {
      "epoch": 4.0606689453125e-05,
      "model_forward_time": 0.11486506462097168,
      "step": 6653
    },
    {
      "epoch": 4.0606689453125e-05,
      "step": 6653,
      "training_step_time": 0.3925173282623291
    },
    {
      "epoch": 4.061279296875e-05,
      "model_forward_time": 0.11472558975219727,
      "step": 6654
    },
    {
      "epoch": 4.061279296875e-05,
      "step": 6654,
      "training_step_time": 0.3977501392364502
    },
    {
      "epoch": 4.0618896484375e-05,
      "model_forward_time": 0.1149606704711914,
      "step": 6655
    },
    {
      "epoch": 4.0618896484375e-05,
      "step": 6655,
      "training_step_time": 0.40645647048950195
    },
    {
      "epoch": 4.0625e-05,
      "model_forward_time": 0.11539268493652344,
      "step": 6656
    },
    {
      "epoch": 4.0625e-05,
      "step": 6656,
      "training_step_time": 0.39356422424316406
    },
    {
      "epoch": 4.0631103515625e-05,
      "model_forward_time": 0.11529326438903809,
      "step": 6657
    },
    {
      "epoch": 4.0631103515625e-05,
      "step": 6657,
      "training_step_time": 0.40227842330932617
    },
    {
      "epoch": 4.063720703125e-05,
      "model_forward_time": 0.11556267738342285,
      "step": 6658
    },
    {
      "epoch": 4.063720703125e-05,
      "step": 6658,
      "training_step_time": 0.42366766929626465
    },
    {
      "epoch": 4.0643310546875e-05,
      "model_forward_time": 0.11460018157958984,
      "step": 6659
    },
    {
      "epoch": 4.0643310546875e-05,
      "step": 6659,
      "training_step_time": 0.45578646659851074
    },
    {
      "epoch": 4.06494140625e-05,
      "grad_norm": 0.3010686933994293,
      "learning_rate": 9.89861376156452e-05,
      "loss": 0.0834,
      "step": 6660
    },
    {
      "epoch": 4.06494140625e-05,
      "model_forward_time": 0.11471915245056152,
      "step": 6660
    },
    {
      "epoch": 4.06494140625e-05,
      "step": 6660,
      "training_step_time": 0.42522382736206055
    },
    {
      "epoch": 4.0655517578125e-05,
      "model_forward_time": 0.11438798904418945,
      "step": 6661
    },
    {
      "epoch": 4.0655517578125e-05,
      "step": 6661,
      "training_step_time": 0.3967249393463135
    },
    {
      "epoch": 4.066162109375e-05,
      "model_forward_time": 0.11493134498596191,
      "step": 6662
    },
    {
      "epoch": 4.066162109375e-05,
      "step": 6662,
      "training_step_time": 0.3966846466064453
    },
    {
      "epoch": 4.0667724609375e-05,
      "model_forward_time": 0.1147003173828125,
      "step": 6663
    },
    {
      "epoch": 4.0667724609375e-05,
      "step": 6663,
      "training_step_time": 0.39015960693359375
    },
    {
      "epoch": 4.0673828125e-05,
      "model_forward_time": 0.11598086357116699,
      "step": 6664
    },
    {
      "epoch": 4.0673828125e-05,
      "step": 6664,
      "training_step_time": 0.38486814498901367
    },
    {
      "epoch": 4.0679931640625e-05,
      "model_forward_time": 0.11538004875183105,
      "step": 6665
    },
    {
      "epoch": 4.0679931640625e-05,
      "step": 6665,
      "training_step_time": 0.45307374000549316
    },
    {
      "epoch": 4.068603515625e-05,
      "model_forward_time": 0.11649489402770996,
      "step": 6666
    },
    {
      "epoch": 4.068603515625e-05,
      "step": 6666,
      "training_step_time": 0.49756598472595215
    },
    {
      "epoch": 4.0692138671875e-05,
      "model_forward_time": 0.1152498722076416,
      "step": 6667
    },
    {
      "epoch": 4.0692138671875e-05,
      "step": 6667,
      "training_step_time": 0.5219979286193848
    },
    {
      "epoch": 4.06982421875e-05,
      "model_forward_time": 0.11559271812438965,
      "step": 6668
    },
    {
      "epoch": 4.06982421875e-05,
      "step": 6668,
      "training_step_time": 0.4000120162963867
    },
    {
      "epoch": 4.0704345703125e-05,
      "model_forward_time": 0.11497139930725098,
      "step": 6669
    },
    {
      "epoch": 4.0704345703125e-05,
      "step": 6669,
      "training_step_time": 0.6158215999603271
    },
    {
      "epoch": 4.071044921875e-05,
      "grad_norm": 0.22007393836975098,
      "learning_rate": 9.898060874378496e-05,
      "loss": 0.0799,
      "step": 6670
    },
    {
      "epoch": 4.071044921875e-05,
      "model_forward_time": 0.11430764198303223,
      "step": 6670
    },
    {
      "epoch": 4.071044921875e-05,
      "step": 6670,
      "training_step_time": 0.3854527473449707
    },
    {
      "epoch": 4.0716552734375e-05,
      "model_forward_time": 0.11464643478393555,
      "step": 6671
    },
    {
      "epoch": 4.0716552734375e-05,
      "step": 6671,
      "training_step_time": 0.42862749099731445
    },
    {
      "epoch": 4.072265625e-05,
      "model_forward_time": 0.11557793617248535,
      "step": 6672
    },
    {
      "epoch": 4.072265625e-05,
      "step": 6672,
      "training_step_time": 0.4150104522705078
    },
    {
      "epoch": 4.0728759765625e-05,
      "model_forward_time": 0.11396646499633789,
      "step": 6673
    },
    {
      "epoch": 4.0728759765625e-05,
      "step": 6673,
      "training_step_time": 0.42986631393432617
    },
    {
      "epoch": 4.073486328125e-05,
      "model_forward_time": 0.11408305168151855,
      "step": 6674
    },
    {
      "epoch": 4.073486328125e-05,
      "step": 6674,
      "training_step_time": 0.3961341381072998
    },
    {
      "epoch": 4.0740966796875e-05,
      "model_forward_time": 0.1148843765258789,
      "step": 6675
    },
    {
      "epoch": 4.0740966796875e-05,
      "step": 6675,
      "training_step_time": 0.5497183799743652
    },
    {
      "epoch": 4.07470703125e-05,
      "model_forward_time": 0.11650824546813965,
      "step": 6676
    },
    {
      "epoch": 4.07470703125e-05,
      "step": 6676,
      "training_step_time": 0.39917469024658203
    },
    {
      "epoch": 4.0753173828125e-05,
      "model_forward_time": 0.11566901206970215,
      "step": 6677
    },
    {
      "epoch": 4.0753173828125e-05,
      "step": 6677,
      "training_step_time": 0.38698267936706543
    },
    {
      "epoch": 4.075927734375e-05,
      "model_forward_time": 0.1145932674407959,
      "step": 6678
    },
    {
      "epoch": 4.075927734375e-05,
      "step": 6678,
      "training_step_time": 0.4008495807647705
    },
    {
      "epoch": 4.0765380859375e-05,
      "model_forward_time": 0.11530232429504395,
      "step": 6679
    },
    {
      "epoch": 4.0765380859375e-05,
      "step": 6679,
      "training_step_time": 0.4909641742706299
    },
    {
      "epoch": 4.0771484375e-05,
      "grad_norm": 0.3751073181629181,
      "learning_rate": 9.897506499290902e-05,
      "loss": 0.0858,
      "step": 6680
    },
    {
      "epoch": 4.0771484375e-05,
      "model_forward_time": 0.11481833457946777,
      "step": 6680
    },
    {
      "epoch": 4.0771484375e-05,
      "step": 6680,
      "training_step_time": 0.48505425453186035
    },
    {
      "epoch": 4.0777587890625e-05,
      "model_forward_time": 0.11688733100891113,
      "step": 6681
    },
    {
      "epoch": 4.0777587890625e-05,
      "step": 6681,
      "training_step_time": 0.47579097747802734
    },
    {
      "epoch": 4.078369140625e-05,
      "model_forward_time": 0.11468839645385742,
      "step": 6682
    },
    {
      "epoch": 4.078369140625e-05,
      "step": 6682,
      "training_step_time": 0.3892977237701416
    },
    {
      "epoch": 4.0789794921875e-05,
      "model_forward_time": 0.11473250389099121,
      "step": 6683
    },
    {
      "epoch": 4.0789794921875e-05,
      "step": 6683,
      "training_step_time": 0.3823678493499756
    },
    {
      "epoch": 4.07958984375e-05,
      "model_forward_time": 0.11502671241760254,
      "step": 6684
    },
    {
      "epoch": 4.07958984375e-05,
      "step": 6684,
      "training_step_time": 0.3923759460449219
    },
    {
      "epoch": 4.0802001953125e-05,
      "model_forward_time": 0.11439275741577148,
      "step": 6685
    },
    {
      "epoch": 4.0802001953125e-05,
      "step": 6685,
      "training_step_time": 0.3923685550689697
    },
    {
      "epoch": 4.080810546875e-05,
      "model_forward_time": 0.11495542526245117,
      "step": 6686
    },
    {
      "epoch": 4.080810546875e-05,
      "step": 6686,
      "training_step_time": 0.4481515884399414
    },
    {
      "epoch": 4.0814208984375e-05,
      "model_forward_time": 0.11537623405456543,
      "step": 6687
    },
    {
      "epoch": 4.0814208984375e-05,
      "step": 6687,
      "training_step_time": 0.5132095813751221
    },
    {
      "epoch": 4.08203125e-05,
      "model_forward_time": 0.11553645133972168,
      "step": 6688
    },
    {
      "epoch": 4.08203125e-05,
      "step": 6688,
      "training_step_time": 0.39626383781433105
    },
    {
      "epoch": 4.0826416015625e-05,
      "model_forward_time": 0.11535930633544922,
      "step": 6689
    },
    {
      "epoch": 4.0826416015625e-05,
      "step": 6689,
      "training_step_time": 0.4012424945831299
    },
    {
      "epoch": 4.083251953125e-05,
      "grad_norm": 0.2719419598579407,
      "learning_rate": 9.896950636470147e-05,
      "loss": 0.0813,
      "step": 6690
    },
    {
      "epoch": 4.083251953125e-05,
      "model_forward_time": 0.11564779281616211,
      "step": 6690
    },
    {
      "epoch": 4.083251953125e-05,
      "step": 6690,
      "training_step_time": 0.39061737060546875
    },
    {
      "epoch": 4.0838623046875e-05,
      "model_forward_time": 0.11502838134765625,
      "step": 6691
    },
    {
      "epoch": 4.0838623046875e-05,
      "step": 6691,
      "training_step_time": 0.3925495147705078
    },
    {
      "epoch": 4.08447265625e-05,
      "model_forward_time": 0.11486506462097168,
      "step": 6692
    },
    {
      "epoch": 4.08447265625e-05,
      "step": 6692,
      "training_step_time": 0.3969254493713379
    },
    {
      "epoch": 4.0850830078125e-05,
      "model_forward_time": 0.1148684024810791,
      "step": 6693
    },
    {
      "epoch": 4.0850830078125e-05,
      "step": 6693,
      "training_step_time": 0.7081751823425293
    },
    {
      "epoch": 4.085693359375e-05,
      "model_forward_time": 0.11459827423095703,
      "step": 6694
    },
    {
      "epoch": 4.085693359375e-05,
      "step": 6694,
      "training_step_time": 0.4252181053161621
    },
    {
      "epoch": 4.0863037109375e-05,
      "model_forward_time": 0.11405181884765625,
      "step": 6695
    },
    {
      "epoch": 4.0863037109375e-05,
      "step": 6695,
      "training_step_time": 0.41196107864379883
    },
    {
      "epoch": 4.0869140625e-05,
      "model_forward_time": 0.11479926109313965,
      "step": 6696
    },
    {
      "epoch": 4.0869140625e-05,
      "step": 6696,
      "training_step_time": 0.40270352363586426
    },
    {
      "epoch": 4.0875244140625e-05,
      "model_forward_time": 0.11475563049316406,
      "step": 6697
    },
    {
      "epoch": 4.0875244140625e-05,
      "step": 6697,
      "training_step_time": 0.39251089096069336
    },
    {
      "epoch": 4.088134765625e-05,
      "model_forward_time": 0.11540913581848145,
      "step": 6698
    },
    {
      "epoch": 4.088134765625e-05,
      "step": 6698,
      "training_step_time": 0.38526105880737305
    },
    {
      "epoch": 4.0887451171875e-05,
      "model_forward_time": 0.11501336097717285,
      "step": 6699
    },
    {
      "epoch": 4.0887451171875e-05,
      "step": 6699,
      "training_step_time": 0.42395639419555664
    },
    {
      "epoch": 4.08935546875e-05,
      "grad_norm": 0.23012468218803406,
      "learning_rate": 9.896393286085084e-05,
      "loss": 0.0801,
      "step": 6700
    },
    {
      "epoch": 4.08935546875e-05,
      "model_forward_time": 0.11514115333557129,
      "step": 6700
    },
    {
      "epoch": 4.08935546875e-05,
      "step": 6700,
      "training_step_time": 0.3948829174041748
    },
    {
      "epoch": 4.0899658203125e-05,
      "model_forward_time": 0.1145937442779541,
      "step": 6701
    },
    {
      "epoch": 4.0899658203125e-05,
      "step": 6701,
      "training_step_time": 0.41440701484680176
    },
    {
      "epoch": 4.090576171875e-05,
      "model_forward_time": 0.11523723602294922,
      "step": 6702
    },
    {
      "epoch": 4.090576171875e-05,
      "step": 6702,
      "training_step_time": 0.5218091011047363
    },
    {
      "epoch": 4.0911865234375e-05,
      "model_forward_time": 0.11523795127868652,
      "step": 6703
    },
    {
      "epoch": 4.0911865234375e-05,
      "step": 6703,
      "training_step_time": 0.4061756134033203
    },
    {
      "epoch": 4.091796875e-05,
      "model_forward_time": 0.11493349075317383,
      "step": 6704
    },
    {
      "epoch": 4.091796875e-05,
      "step": 6704,
      "training_step_time": 0.39241862297058105
    },
    {
      "epoch": 4.0924072265625e-05,
      "model_forward_time": 0.11535358428955078,
      "step": 6705
    },
    {
      "epoch": 4.0924072265625e-05,
      "step": 6705,
      "training_step_time": 0.9019696712493896
    },
    {
      "epoch": 4.093017578125e-05,
      "model_forward_time": 0.11421728134155273,
      "step": 6706
    },
    {
      "epoch": 4.093017578125e-05,
      "step": 6706,
      "training_step_time": 0.38279175758361816
    },
    {
      "epoch": 4.0936279296875e-05,
      "model_forward_time": 0.11531543731689453,
      "step": 6707
    },
    {
      "epoch": 4.0936279296875e-05,
      "step": 6707,
      "training_step_time": 0.39731764793395996
    },
    {
      "epoch": 4.09423828125e-05,
      "model_forward_time": 0.11396026611328125,
      "step": 6708
    },
    {
      "epoch": 4.09423828125e-05,
      "step": 6708,
      "training_step_time": 0.45420193672180176
    },
    {
      "epoch": 4.0948486328125e-05,
      "model_forward_time": 0.11400103569030762,
      "step": 6709
    },
    {
      "epoch": 4.0948486328125e-05,
      "step": 6709,
      "training_step_time": 0.4722442626953125
    },
    {
      "epoch": 4.095458984375e-05,
      "grad_norm": 0.2586442828178406,
      "learning_rate": 9.895834448305024e-05,
      "loss": 0.0792,
      "step": 6710
    },
    {
      "epoch": 4.095458984375e-05,
      "model_forward_time": 0.11437082290649414,
      "step": 6710
    },
    {
      "epoch": 4.095458984375e-05,
      "step": 6710,
      "training_step_time": 0.3854696750640869
    },
    {
      "epoch": 4.0960693359375e-05,
      "model_forward_time": 0.11456775665283203,
      "step": 6711
    },
    {
      "epoch": 4.0960693359375e-05,
      "step": 6711,
      "training_step_time": 0.6133849620819092
    },
    {
      "epoch": 4.0966796875e-05,
      "model_forward_time": 0.11514878273010254,
      "step": 6712
    },
    {
      "epoch": 4.0966796875e-05,
      "step": 6712,
      "training_step_time": 0.38686704635620117
    },
    {
      "epoch": 4.0972900390625e-05,
      "model_forward_time": 0.11455702781677246,
      "step": 6713
    },
    {
      "epoch": 4.0972900390625e-05,
      "step": 6713,
      "training_step_time": 0.5079975128173828
    },
    {
      "epoch": 4.097900390625e-05,
      "model_forward_time": 0.1139070987701416,
      "step": 6714
    },
    {
      "epoch": 4.097900390625e-05,
      "step": 6714,
      "training_step_time": 0.39833498001098633
    },
    {
      "epoch": 4.0985107421875e-05,
      "model_forward_time": 0.11422348022460938,
      "step": 6715
    },
    {
      "epoch": 4.0985107421875e-05,
      "step": 6715,
      "training_step_time": 0.39070558547973633
    },
    {
      "epoch": 4.09912109375e-05,
      "model_forward_time": 0.11417984962463379,
      "step": 6716
    },
    {
      "epoch": 4.09912109375e-05,
      "step": 6716,
      "training_step_time": 0.4091837406158447
    },
    {
      "epoch": 4.0997314453125e-05,
      "model_forward_time": 0.11499762535095215,
      "step": 6717
    },
    {
      "epoch": 4.0997314453125e-05,
      "step": 6717,
      "training_step_time": 0.603553056716919
    },
    {
      "epoch": 4.100341796875e-05,
      "model_forward_time": 0.11466526985168457,
      "step": 6718
    },
    {
      "epoch": 4.100341796875e-05,
      "step": 6718,
      "training_step_time": 0.38697075843811035
    },
    {
      "epoch": 4.1009521484375e-05,
      "model_forward_time": 0.1146700382232666,
      "step": 6719
    },
    {
      "epoch": 4.1009521484375e-05,
      "step": 6719,
      "training_step_time": 0.3989584445953369
    },
    {
      "epoch": 4.1015625e-05,
      "grad_norm": 0.2990333139896393,
      "learning_rate": 9.895274123299723e-05,
      "loss": 0.0791,
      "step": 6720
    },
    {
      "epoch": 4.1015625e-05,
      "model_forward_time": 0.1146090030670166,
      "step": 6720
    },
    {
      "epoch": 4.1015625e-05,
      "step": 6720,
      "training_step_time": 0.36841464042663574
    },
    {
      "epoch": 4.1021728515625e-05,
      "model_forward_time": 0.11481785774230957,
      "step": 6721
    },
    {
      "epoch": 4.1021728515625e-05,
      "step": 6721,
      "training_step_time": 0.407900333404541
    },
    {
      "epoch": 4.102783203125e-05,
      "model_forward_time": 0.11436843872070312,
      "step": 6722
    },
    {
      "epoch": 4.102783203125e-05,
      "step": 6722,
      "training_step_time": 0.4632759094238281
    },
    {
      "epoch": 4.1033935546875e-05,
      "model_forward_time": 0.11576604843139648,
      "step": 6723
    },
    {
      "epoch": 4.1033935546875e-05,
      "step": 6723,
      "training_step_time": 0.78688645362854
    },
    {
      "epoch": 4.10400390625e-05,
      "model_forward_time": 0.1147470474243164,
      "step": 6724
    },
    {
      "epoch": 4.10400390625e-05,
      "step": 6724,
      "training_step_time": 0.44292116165161133
    },
    {
      "epoch": 4.1046142578125e-05,
      "model_forward_time": 0.11483240127563477,
      "step": 6725
    },
    {
      "epoch": 4.1046142578125e-05,
      "step": 6725,
      "training_step_time": 0.378124475479126
    },
    {
      "epoch": 4.105224609375e-05,
      "model_forward_time": 0.11403727531433105,
      "step": 6726
    },
    {
      "epoch": 4.105224609375e-05,
      "step": 6726,
      "training_step_time": 0.43297839164733887
    },
    {
      "epoch": 4.1058349609375e-05,
      "model_forward_time": 0.11480188369750977,
      "step": 6727
    },
    {
      "epoch": 4.1058349609375e-05,
      "step": 6727,
      "training_step_time": 0.39374351501464844
    },
    {
      "epoch": 4.1064453125e-05,
      "model_forward_time": 0.11495280265808105,
      "step": 6728
    },
    {
      "epoch": 4.1064453125e-05,
      "step": 6728,
      "training_step_time": 0.42562389373779297
    },
    {
      "epoch": 4.1070556640625e-05,
      "model_forward_time": 0.11482548713684082,
      "step": 6729
    },
    {
      "epoch": 4.1070556640625e-05,
      "step": 6729,
      "training_step_time": 0.5090577602386475
    },
    {
      "epoch": 4.107666015625e-05,
      "grad_norm": 0.3336773216724396,
      "learning_rate": 9.894712311239398e-05,
      "loss": 0.0816,
      "step": 6730
    },
    {
      "epoch": 4.107666015625e-05,
      "model_forward_time": 0.1148676872253418,
      "step": 6730
    },
    {
      "epoch": 4.107666015625e-05,
      "step": 6730,
      "training_step_time": 0.38099122047424316
    },
    {
      "epoch": 4.1082763671875e-05,
      "model_forward_time": 0.11518096923828125,
      "step": 6731
    },
    {
      "epoch": 4.1082763671875e-05,
      "step": 6731,
      "training_step_time": 0.38077235221862793
    },
    {
      "epoch": 4.10888671875e-05,
      "model_forward_time": 0.11481928825378418,
      "step": 6732
    },
    {
      "epoch": 4.10888671875e-05,
      "step": 6732,
      "training_step_time": 0.38881516456604004
    },
    {
      "epoch": 4.1094970703125e-05,
      "model_forward_time": 0.11446285247802734,
      "step": 6733
    },
    {
      "epoch": 4.1094970703125e-05,
      "step": 6733,
      "training_step_time": 0.38913655281066895
    },
    {
      "epoch": 4.110107421875e-05,
      "model_forward_time": 0.11496472358703613,
      "step": 6734
    },
    {
      "epoch": 4.110107421875e-05,
      "step": 6734,
      "training_step_time": 0.3683617115020752
    },
    {
      "epoch": 4.1107177734375e-05,
      "model_forward_time": 0.11469125747680664,
      "step": 6735
    },
    {
      "epoch": 4.1107177734375e-05,
      "step": 6735,
      "training_step_time": 0.8792386054992676
    },
    {
      "epoch": 4.111328125e-05,
      "model_forward_time": 0.11577224731445312,
      "step": 6736
    },
    {
      "epoch": 4.111328125e-05,
      "step": 6736,
      "training_step_time": 0.41128063201904297
    },
    {
      "epoch": 4.1119384765625e-05,
      "model_forward_time": 0.1144721508026123,
      "step": 6737
    },
    {
      "epoch": 4.1119384765625e-05,
      "step": 6737,
      "training_step_time": 0.40057826042175293
    },
    {
      "epoch": 4.112548828125e-05,
      "model_forward_time": 0.11429047584533691,
      "step": 6738
    },
    {
      "epoch": 4.112548828125e-05,
      "step": 6738,
      "training_step_time": 0.3845789432525635
    },
    {
      "epoch": 4.1131591796875e-05,
      "model_forward_time": 0.11407780647277832,
      "step": 6739
    },
    {
      "epoch": 4.1131591796875e-05,
      "step": 6739,
      "training_step_time": 0.3921971321105957
    },
    {
      "epoch": 4.11376953125e-05,
      "grad_norm": 0.304877370595932,
      "learning_rate": 9.894149012294708e-05,
      "loss": 0.0851,
      "step": 6740
    },
    {
      "epoch": 4.11376953125e-05,
      "model_forward_time": 0.11415863037109375,
      "step": 6740
    },
    {
      "epoch": 4.11376953125e-05,
      "step": 6740,
      "training_step_time": 0.47464561462402344
    },
    {
      "epoch": 4.1143798828125e-05,
      "model_forward_time": 0.11517763137817383,
      "step": 6741
    },
    {
      "epoch": 4.1143798828125e-05,
      "step": 6741,
      "training_step_time": 0.8570327758789062
    },
    {
      "epoch": 4.114990234375e-05,
      "model_forward_time": 0.1145775318145752,
      "step": 6742
    },
    {
      "epoch": 4.114990234375e-05,
      "step": 6742,
      "training_step_time": 0.3859889507293701
    },
    {
      "epoch": 4.1156005859375e-05,
      "model_forward_time": 0.11378097534179688,
      "step": 6743
    },
    {
      "epoch": 4.1156005859375e-05,
      "step": 6743,
      "training_step_time": 0.3910090923309326
    },
    {
      "epoch": 4.1162109375e-05,
      "model_forward_time": 0.11369609832763672,
      "step": 6744
    },
    {
      "epoch": 4.1162109375e-05,
      "step": 6744,
      "training_step_time": 0.38491177558898926
    },
    {
      "epoch": 4.1168212890625e-05,
      "model_forward_time": 0.11439824104309082,
      "step": 6745
    },
    {
      "epoch": 4.1168212890625e-05,
      "step": 6745,
      "training_step_time": 0.3913156986236572
    },
    {
      "epoch": 4.117431640625e-05,
      "model_forward_time": 0.11380410194396973,
      "step": 6746
    },
    {
      "epoch": 4.117431640625e-05,
      "step": 6746,
      "training_step_time": 0.39165163040161133
    },
    {
      "epoch": 4.1180419921875e-05,
      "model_forward_time": 0.11510133743286133,
      "step": 6747
    },
    {
      "epoch": 4.1180419921875e-05,
      "step": 6747,
      "training_step_time": 0.8431634902954102
    },
    {
      "epoch": 4.11865234375e-05,
      "model_forward_time": 0.11445856094360352,
      "step": 6748
    },
    {
      "epoch": 4.11865234375e-05,
      "step": 6748,
      "training_step_time": 0.4472651481628418
    },
    {
      "epoch": 4.1192626953125e-05,
      "model_forward_time": 0.11500310897827148,
      "step": 6749
    },
    {
      "epoch": 4.1192626953125e-05,
      "step": 6749,
      "training_step_time": 0.4285573959350586
    },
    {
      "epoch": 4.119873046875e-05,
      "grad_norm": 0.23441973328590393,
      "learning_rate": 9.893584226636772e-05,
      "loss": 0.074,
      "step": 6750
    },
    {
      "epoch": 4.119873046875e-05,
      "model_forward_time": 0.11478018760681152,
      "step": 6750
    },
    {
      "epoch": 4.119873046875e-05,
      "step": 6750,
      "training_step_time": 0.3971445560455322
    },
    {
      "epoch": 4.1204833984375e-05,
      "model_forward_time": 0.11394906044006348,
      "step": 6751
    },
    {
      "epoch": 4.1204833984375e-05,
      "step": 6751,
      "training_step_time": 0.40840673446655273
    },
    {
      "epoch": 4.12109375e-05,
      "model_forward_time": 0.11455082893371582,
      "step": 6752
    },
    {
      "epoch": 4.12109375e-05,
      "step": 6752,
      "training_step_time": 0.40662193298339844
    },
    {
      "epoch": 4.1217041015625e-05,
      "model_forward_time": 0.11545252799987793,
      "step": 6753
    },
    {
      "epoch": 4.1217041015625e-05,
      "step": 6753,
      "training_step_time": 0.40560388565063477
    },
    {
      "epoch": 4.122314453125e-05,
      "model_forward_time": 0.11573243141174316,
      "step": 6754
    },
    {
      "epoch": 4.122314453125e-05,
      "step": 6754,
      "training_step_time": 0.39784693717956543
    },
    {
      "epoch": 4.1229248046875e-05,
      "model_forward_time": 0.11472296714782715,
      "step": 6755
    },
    {
      "epoch": 4.1229248046875e-05,
      "step": 6755,
      "training_step_time": 0.4002799987792969
    },
    {
      "epoch": 4.12353515625e-05,
      "model_forward_time": 0.11533093452453613,
      "step": 6756
    },
    {
      "epoch": 4.12353515625e-05,
      "step": 6756,
      "training_step_time": 0.3967092037200928
    },
    {
      "epoch": 4.1241455078125e-05,
      "model_forward_time": 0.11562705039978027,
      "step": 6757
    },
    {
      "epoch": 4.1241455078125e-05,
      "step": 6757,
      "training_step_time": 0.3998236656188965
    },
    {
      "epoch": 4.124755859375e-05,
      "model_forward_time": 0.11567449569702148,
      "step": 6758
    },
    {
      "epoch": 4.124755859375e-05,
      "step": 6758,
      "training_step_time": 0.3977353572845459
    },
    {
      "epoch": 4.1253662109375e-05,
      "model_forward_time": 0.11499738693237305,
      "step": 6759
    },
    {
      "epoch": 4.1253662109375e-05,
      "step": 6759,
      "training_step_time": 0.6666874885559082
    },
    {
      "epoch": 4.1259765625e-05,
      "grad_norm": 0.2729036509990692,
      "learning_rate": 9.893017954437156e-05,
      "loss": 0.0757,
      "step": 6760
    },
    {
      "epoch": 4.1259765625e-05,
      "model_forward_time": 0.1143500804901123,
      "step": 6760
    },
    {
      "epoch": 4.1259765625e-05,
      "step": 6760,
      "training_step_time": 0.40579962730407715
    },
    {
      "epoch": 4.1265869140625e-05,
      "model_forward_time": 0.11611723899841309,
      "step": 6761
    },
    {
      "epoch": 4.1265869140625e-05,
      "step": 6761,
      "training_step_time": 0.3758885860443115
    },
    {
      "epoch": 4.127197265625e-05,
      "model_forward_time": 0.11437845230102539,
      "step": 6762
    },
    {
      "epoch": 4.127197265625e-05,
      "step": 6762,
      "training_step_time": 0.4641408920288086
    },
    {
      "epoch": 4.1278076171875e-05,
      "model_forward_time": 0.11464810371398926,
      "step": 6763
    },
    {
      "epoch": 4.1278076171875e-05,
      "step": 6763,
      "training_step_time": 0.4786112308502197
    },
    {
      "epoch": 4.12841796875e-05,
      "model_forward_time": 0.1146852970123291,
      "step": 6764
    },
    {
      "epoch": 4.12841796875e-05,
      "step": 6764,
      "training_step_time": 0.39449334144592285
    },
    {
      "epoch": 4.1290283203125e-05,
      "model_forward_time": 0.11510586738586426,
      "step": 6765
    },
    {
      "epoch": 4.1290283203125e-05,
      "step": 6765,
      "training_step_time": 0.40326666831970215
    },
    {
      "epoch": 4.129638671875e-05,
      "model_forward_time": 0.11635518074035645,
      "step": 6766
    },
    {
      "epoch": 4.129638671875e-05,
      "step": 6766,
      "training_step_time": 0.3913404941558838
    },
    {
      "epoch": 4.1302490234375e-05,
      "model_forward_time": 0.11454987525939941,
      "step": 6767
    },
    {
      "epoch": 4.1302490234375e-05,
      "step": 6767,
      "training_step_time": 0.4194934368133545
    },
    {
      "epoch": 4.130859375e-05,
      "model_forward_time": 0.11559224128723145,
      "step": 6768
    },
    {
      "epoch": 4.130859375e-05,
      "step": 6768,
      "training_step_time": 0.43553733825683594
    },
    {
      "epoch": 4.1314697265625e-05,
      "model_forward_time": 0.1147928237915039,
      "step": 6769
    },
    {
      "epoch": 4.1314697265625e-05,
      "step": 6769,
      "training_step_time": 0.5143675804138184
    },
    {
      "epoch": 4.132080078125e-05,
      "grad_norm": 0.2490294724702835,
      "learning_rate": 9.892450195867877e-05,
      "loss": 0.0717,
      "step": 6770
    },
    {
      "epoch": 4.132080078125e-05,
      "model_forward_time": 0.11577391624450684,
      "step": 6770
    },
    {
      "epoch": 4.132080078125e-05,
      "step": 6770,
      "training_step_time": 0.39455533027648926
    },
    {
      "epoch": 4.1326904296875e-05,
      "model_forward_time": 0.11487030982971191,
      "step": 6771
    },
    {
      "epoch": 4.1326904296875e-05,
      "step": 6771,
      "training_step_time": 0.5673537254333496
    },
    {
      "epoch": 4.13330078125e-05,
      "model_forward_time": 0.11436152458190918,
      "step": 6772
    },
    {
      "epoch": 4.13330078125e-05,
      "step": 6772,
      "training_step_time": 0.3827075958251953
    },
    {
      "epoch": 4.1339111328125e-05,
      "model_forward_time": 0.11539816856384277,
      "step": 6773
    },
    {
      "epoch": 4.1339111328125e-05,
      "step": 6773,
      "training_step_time": 0.38765883445739746
    },
    {
      "epoch": 4.134521484375e-05,
      "model_forward_time": 0.11497020721435547,
      "step": 6774
    },
    {
      "epoch": 4.134521484375e-05,
      "step": 6774,
      "training_step_time": 0.40457677841186523
    },
    {
      "epoch": 4.1351318359375e-05,
      "model_forward_time": 0.1153569221496582,
      "step": 6775
    },
    {
      "epoch": 4.1351318359375e-05,
      "step": 6775,
      "training_step_time": 0.46398162841796875
    },
    {
      "epoch": 4.1357421875e-05,
      "model_forward_time": 0.11508536338806152,
      "step": 6776
    },
    {
      "epoch": 4.1357421875e-05,
      "step": 6776,
      "training_step_time": 0.44086337089538574
    },
    {
      "epoch": 4.1363525390625e-05,
      "model_forward_time": 0.11529707908630371,
      "step": 6777
    },
    {
      "epoch": 4.1363525390625e-05,
      "step": 6777,
      "training_step_time": 0.47167444229125977
    },
    {
      "epoch": 4.136962890625e-05,
      "model_forward_time": 0.11503005027770996,
      "step": 6778
    },
    {
      "epoch": 4.136962890625e-05,
      "step": 6778,
      "training_step_time": 0.39435815811157227
    },
    {
      "epoch": 4.1375732421875e-05,
      "model_forward_time": 0.11470198631286621,
      "step": 6779
    },
    {
      "epoch": 4.1375732421875e-05,
      "step": 6779,
      "training_step_time": 0.3927645683288574
    },
    {
      "epoch": 4.13818359375e-05,
      "grad_norm": 0.25190216302871704,
      "learning_rate": 9.891880951101407e-05,
      "loss": 0.0752,
      "step": 6780
    },
    {
      "epoch": 4.13818359375e-05,
      "model_forward_time": 0.11507058143615723,
      "step": 6780
    },
    {
      "epoch": 4.13818359375e-05,
      "step": 6780,
      "training_step_time": 0.3986074924468994
    },
    {
      "epoch": 4.1387939453125e-05,
      "model_forward_time": 0.11507391929626465,
      "step": 6781
    },
    {
      "epoch": 4.1387939453125e-05,
      "step": 6781,
      "training_step_time": 0.44473814964294434
    },
    {
      "epoch": 4.139404296875e-05,
      "model_forward_time": 0.11454343795776367,
      "step": 6782
    },
    {
      "epoch": 4.139404296875e-05,
      "step": 6782,
      "training_step_time": 0.4023761749267578
    },
    {
      "epoch": 4.1400146484375e-05,
      "model_forward_time": 0.11498594284057617,
      "step": 6783
    },
    {
      "epoch": 4.1400146484375e-05,
      "step": 6783,
      "training_step_time": 0.5613372325897217
    },
    {
      "epoch": 4.140625e-05,
      "model_forward_time": 0.1148529052734375,
      "step": 6784
    },
    {
      "epoch": 4.140625e-05,
      "step": 6784,
      "training_step_time": 0.39163827896118164
    },
    {
      "epoch": 4.1412353515625e-05,
      "model_forward_time": 0.11498188972473145,
      "step": 6785
    },
    {
      "epoch": 4.1412353515625e-05,
      "step": 6785,
      "training_step_time": 0.38657379150390625
    },
    {
      "epoch": 4.141845703125e-05,
      "model_forward_time": 0.1144247055053711,
      "step": 6786
    },
    {
      "epoch": 4.141845703125e-05,
      "step": 6786,
      "training_step_time": 0.3910858631134033
    },
    {
      "epoch": 4.1424560546875e-05,
      "model_forward_time": 0.11467719078063965,
      "step": 6787
    },
    {
      "epoch": 4.1424560546875e-05,
      "step": 6787,
      "training_step_time": 0.3940260410308838
    },
    {
      "epoch": 4.14306640625e-05,
      "model_forward_time": 0.11513495445251465,
      "step": 6788
    },
    {
      "epoch": 4.14306640625e-05,
      "step": 6788,
      "training_step_time": 0.38590097427368164
    },
    {
      "epoch": 4.1436767578125e-05,
      "model_forward_time": 0.11496353149414062,
      "step": 6789
    },
    {
      "epoch": 4.1436767578125e-05,
      "step": 6789,
      "training_step_time": 0.9118912220001221
    },
    {
      "epoch": 4.144287109375e-05,
      "grad_norm": 0.32915204763412476,
      "learning_rate": 9.891310220310666e-05,
      "loss": 0.0781,
      "step": 6790
    },
    {
      "epoch": 4.144287109375e-05,
      "model_forward_time": 0.1142587661743164,
      "step": 6790
    },
    {
      "epoch": 4.144287109375e-05,
      "step": 6790,
      "training_step_time": 0.4188411235809326
    },
    {
      "epoch": 4.1448974609375e-05,
      "model_forward_time": 0.11454987525939941,
      "step": 6791
    },
    {
      "epoch": 4.1448974609375e-05,
      "step": 6791,
      "training_step_time": 0.42934155464172363
    },
    {
      "epoch": 4.1455078125e-05,
      "model_forward_time": 0.11445260047912598,
      "step": 6792
    },
    {
      "epoch": 4.1455078125e-05,
      "step": 6792,
      "training_step_time": 0.3778352737426758
    },
    {
      "epoch": 4.1461181640625e-05,
      "model_forward_time": 0.11396503448486328,
      "step": 6793
    },
    {
      "epoch": 4.1461181640625e-05,
      "step": 6793,
      "training_step_time": 0.38942551612854004
    },
    {
      "epoch": 4.146728515625e-05,
      "model_forward_time": 0.11484289169311523,
      "step": 6794
    },
    {
      "epoch": 4.146728515625e-05,
      "step": 6794,
      "training_step_time": 0.39966344833374023
    },
    {
      "epoch": 4.1473388671875e-05,
      "model_forward_time": 0.11417293548583984,
      "step": 6795
    },
    {
      "epoch": 4.1473388671875e-05,
      "step": 6795,
      "training_step_time": 0.47261524200439453
    },
    {
      "epoch": 4.14794921875e-05,
      "model_forward_time": 0.11539173126220703,
      "step": 6796
    },
    {
      "epoch": 4.14794921875e-05,
      "step": 6796,
      "training_step_time": 0.3979015350341797
    },
    {
      "epoch": 4.1485595703125e-05,
      "model_forward_time": 0.11453461647033691,
      "step": 6797
    },
    {
      "epoch": 4.1485595703125e-05,
      "step": 6797,
      "training_step_time": 0.47326159477233887
    },
    {
      "epoch": 4.149169921875e-05,
      "model_forward_time": 0.11466765403747559,
      "step": 6798
    },
    {
      "epoch": 4.149169921875e-05,
      "step": 6798,
      "training_step_time": 0.39835071563720703
    },
    {
      "epoch": 4.1497802734375e-05,
      "model_forward_time": 0.11498713493347168,
      "step": 6799
    },
    {
      "epoch": 4.1497802734375e-05,
      "step": 6799,
      "training_step_time": 0.4051954746246338
    },
    {
      "epoch": 4.150390625e-05,
      "grad_norm": 0.25957199931144714,
      "learning_rate": 9.890738003669029e-05,
      "loss": 0.0874,
      "step": 6800
    },
    {
      "epoch": 4.150390625e-05,
      "model_forward_time": 0.11436891555786133,
      "step": 6800
    },
    {
      "epoch": 4.150390625e-05,
      "step": 6800,
      "training_step_time": 0.3982551097869873
    },
    {
      "epoch": 4.1510009765625e-05,
      "model_forward_time": 0.11522316932678223,
      "step": 6801
    },
    {
      "epoch": 4.1510009765625e-05,
      "step": 6801,
      "training_step_time": 0.7516226768493652
    },
    {
      "epoch": 4.151611328125e-05,
      "model_forward_time": 0.1145167350769043,
      "step": 6802
    },
    {
      "epoch": 4.151611328125e-05,
      "step": 6802,
      "training_step_time": 0.45269012451171875
    },
    {
      "epoch": 4.1522216796875e-05,
      "model_forward_time": 0.1155242919921875,
      "step": 6803
    },
    {
      "epoch": 4.1522216796875e-05,
      "step": 6803,
      "training_step_time": 0.4790375232696533
    },
    {
      "epoch": 4.15283203125e-05,
      "model_forward_time": 0.11451029777526855,
      "step": 6804
    },
    {
      "epoch": 4.15283203125e-05,
      "step": 6804,
      "training_step_time": 0.45053839683532715
    },
    {
      "epoch": 4.1534423828125e-05,
      "model_forward_time": 0.11477351188659668,
      "step": 6805
    },
    {
      "epoch": 4.1534423828125e-05,
      "step": 6805,
      "training_step_time": 0.4680182933807373
    },
    {
      "epoch": 4.154052734375e-05,
      "model_forward_time": 0.11457967758178711,
      "step": 6806
    },
    {
      "epoch": 4.154052734375e-05,
      "step": 6806,
      "training_step_time": 0.39611220359802246
    },
    {
      "epoch": 4.1546630859375e-05,
      "model_forward_time": 0.11485958099365234,
      "step": 6807
    },
    {
      "epoch": 4.1546630859375e-05,
      "step": 6807,
      "training_step_time": 0.5271894931793213
    },
    {
      "epoch": 4.1552734375e-05,
      "model_forward_time": 0.11441802978515625,
      "step": 6808
    },
    {
      "epoch": 4.1552734375e-05,
      "step": 6808,
      "training_step_time": 0.40320348739624023
    },
    {
      "epoch": 4.1558837890625e-05,
      "model_forward_time": 0.11420345306396484,
      "step": 6809
    },
    {
      "epoch": 4.1558837890625e-05,
      "step": 6809,
      "training_step_time": 0.38393664360046387
    },
    {
      "epoch": 4.156494140625e-05,
      "grad_norm": 0.2582229673862457,
      "learning_rate": 9.890164301350318e-05,
      "loss": 0.072,
      "step": 6810
    },
    {
      "epoch": 4.156494140625e-05,
      "model_forward_time": 0.11514639854431152,
      "step": 6810
    },
    {
      "epoch": 4.156494140625e-05,
      "step": 6810,
      "training_step_time": 0.4215831756591797
    },
    {
      "epoch": 4.1571044921875e-05,
      "model_forward_time": 0.1149284839630127,
      "step": 6811
    },
    {
      "epoch": 4.1571044921875e-05,
      "step": 6811,
      "training_step_time": 0.4060382843017578
    },
    {
      "epoch": 4.15771484375e-05,
      "model_forward_time": 0.11499857902526855,
      "step": 6812
    },
    {
      "epoch": 4.15771484375e-05,
      "step": 6812,
      "training_step_time": 0.39763307571411133
    },
    {
      "epoch": 4.1583251953125e-05,
      "model_forward_time": 0.1145784854888916,
      "step": 6813
    },
    {
      "epoch": 4.1583251953125e-05,
      "step": 6813,
      "training_step_time": 1.0880305767059326
    },
    {
      "epoch": 4.158935546875e-05,
      "model_forward_time": 0.11421608924865723,
      "step": 6814
    },
    {
      "epoch": 4.158935546875e-05,
      "step": 6814,
      "training_step_time": 0.4059276580810547
    },
    {
      "epoch": 4.1595458984375e-05,
      "model_forward_time": 0.11387515068054199,
      "step": 6815
    },
    {
      "epoch": 4.1595458984375e-05,
      "step": 6815,
      "training_step_time": 0.3853757381439209
    },
    {
      "epoch": 4.16015625e-05,
      "model_forward_time": 0.11427831649780273,
      "step": 6816
    },
    {
      "epoch": 4.16015625e-05,
      "step": 6816,
      "training_step_time": 0.4170358180999756
    },
    {
      "epoch": 4.1607666015625e-05,
      "model_forward_time": 0.11407709121704102,
      "step": 6817
    },
    {
      "epoch": 4.1607666015625e-05,
      "step": 6817,
      "training_step_time": 0.4440310001373291
    },
    {
      "epoch": 4.161376953125e-05,
      "model_forward_time": 0.11414384841918945,
      "step": 6818
    },
    {
      "epoch": 4.161376953125e-05,
      "step": 6818,
      "training_step_time": 0.4622833728790283
    },
    {
      "epoch": 4.1619873046875e-05,
      "model_forward_time": 0.1145467758178711,
      "step": 6819
    },
    {
      "epoch": 4.1619873046875e-05,
      "step": 6819,
      "training_step_time": 0.4637589454650879
    },
    {
      "epoch": 4.16259765625e-05,
      "grad_norm": 0.44846081733703613,
      "learning_rate": 9.889589113528809e-05,
      "loss": 0.0751,
      "step": 6820
    },
    {
      "epoch": 4.16259765625e-05,
      "model_forward_time": 0.11479997634887695,
      "step": 6820
    },
    {
      "epoch": 4.16259765625e-05,
      "step": 6820,
      "training_step_time": 0.3902561664581299
    },
    {
      "epoch": 4.1632080078125e-05,
      "model_forward_time": 0.11474156379699707,
      "step": 6821
    },
    {
      "epoch": 4.1632080078125e-05,
      "step": 6821,
      "training_step_time": 0.4151651859283447
    },
    {
      "epoch": 4.163818359375e-05,
      "model_forward_time": 0.11557793617248535,
      "step": 6822
    },
    {
      "epoch": 4.163818359375e-05,
      "step": 6822,
      "training_step_time": 0.4817540645599365
    },
    {
      "epoch": 4.1644287109375e-05,
      "model_forward_time": 0.11485528945922852,
      "step": 6823
    },
    {
      "epoch": 4.1644287109375e-05,
      "step": 6823,
      "training_step_time": 0.40940403938293457
    },
    {
      "epoch": 4.1650390625e-05,
      "model_forward_time": 0.11599850654602051,
      "step": 6824
    },
    {
      "epoch": 4.1650390625e-05,
      "step": 6824,
      "training_step_time": 0.3885476589202881
    },
    {
      "epoch": 4.1656494140625e-05,
      "model_forward_time": 0.11499404907226562,
      "step": 6825
    },
    {
      "epoch": 4.1656494140625e-05,
      "step": 6825,
      "training_step_time": 0.7186121940612793
    },
    {
      "epoch": 4.166259765625e-05,
      "model_forward_time": 0.1137094497680664,
      "step": 6826
    },
    {
      "epoch": 4.166259765625e-05,
      "step": 6826,
      "training_step_time": 0.41905808448791504
    },
    {
      "epoch": 4.1668701171875e-05,
      "model_forward_time": 0.1139225959777832,
      "step": 6827
    },
    {
      "epoch": 4.1668701171875e-05,
      "step": 6827,
      "training_step_time": 0.3807992935180664
    },
    {
      "epoch": 4.16748046875e-05,
      "model_forward_time": 0.11450362205505371,
      "step": 6828
    },
    {
      "epoch": 4.16748046875e-05,
      "step": 6828,
      "training_step_time": 0.3851335048675537
    },
    {
      "epoch": 4.1680908203125e-05,
      "model_forward_time": 0.11553478240966797,
      "step": 6829
    },
    {
      "epoch": 4.1680908203125e-05,
      "step": 6829,
      "training_step_time": 0.40268492698669434
    },
    {
      "epoch": 4.168701171875e-05,
      "grad_norm": 0.2776511013507843,
      "learning_rate": 9.88901244037923e-05,
      "loss": 0.07,
      "step": 6830
    },
    {
      "epoch": 4.168701171875e-05,
      "model_forward_time": 0.1157381534576416,
      "step": 6830
    },
    {
      "epoch": 4.168701171875e-05,
      "step": 6830,
      "training_step_time": 0.49576592445373535
    },
    {
      "epoch": 4.1693115234375e-05,
      "model_forward_time": 0.11534523963928223,
      "step": 6831
    },
    {
      "epoch": 4.1693115234375e-05,
      "step": 6831,
      "training_step_time": 0.6633334159851074
    },
    {
      "epoch": 4.169921875e-05,
      "model_forward_time": 0.11649179458618164,
      "step": 6832
    },
    {
      "epoch": 4.169921875e-05,
      "step": 6832,
      "training_step_time": 0.384690523147583
    },
    {
      "epoch": 4.1705322265625e-05,
      "model_forward_time": 0.11410665512084961,
      "step": 6833
    },
    {
      "epoch": 4.1705322265625e-05,
      "step": 6833,
      "training_step_time": 0.39009690284729004
    },
    {
      "epoch": 4.171142578125e-05,
      "model_forward_time": 0.11404991149902344,
      "step": 6834
    },
    {
      "epoch": 4.171142578125e-05,
      "step": 6834,
      "training_step_time": 0.3896458148956299
    },
    {
      "epoch": 4.1717529296875e-05,
      "model_forward_time": 0.1148383617401123,
      "step": 6835
    },
    {
      "epoch": 4.1717529296875e-05,
      "step": 6835,
      "training_step_time": 0.41649723052978516
    },
    {
      "epoch": 4.17236328125e-05,
      "model_forward_time": 0.11426687240600586,
      "step": 6836
    },
    {
      "epoch": 4.17236328125e-05,
      "step": 6836,
      "training_step_time": 0.43878841400146484
    },
    {
      "epoch": 4.1729736328125e-05,
      "model_forward_time": 0.11507844924926758,
      "step": 6837
    },
    {
      "epoch": 4.1729736328125e-05,
      "step": 6837,
      "training_step_time": 0.9742636680603027
    },
    {
      "epoch": 4.173583984375e-05,
      "model_forward_time": 0.11420631408691406,
      "step": 6838
    },
    {
      "epoch": 4.173583984375e-05,
      "step": 6838,
      "training_step_time": 0.427203893661499
    },
    {
      "epoch": 4.1741943359375e-05,
      "model_forward_time": 0.11458468437194824,
      "step": 6839
    },
    {
      "epoch": 4.1741943359375e-05,
      "step": 6839,
      "training_step_time": 0.37989020347595215
    },
    {
      "epoch": 4.1748046875e-05,
      "grad_norm": 0.22314178943634033,
      "learning_rate": 9.888434282076758e-05,
      "loss": 0.0733,
      "step": 6840
    },
    {
      "epoch": 4.1748046875e-05,
      "model_forward_time": 0.11415719985961914,
      "step": 6840
    },
    {
      "epoch": 4.1748046875e-05,
      "step": 6840,
      "training_step_time": 0.3911755084991455
    },
    {
      "epoch": 4.1754150390625e-05,
      "model_forward_time": 0.11381769180297852,
      "step": 6841
    },
    {
      "epoch": 4.1754150390625e-05,
      "step": 6841,
      "training_step_time": 0.39005613327026367
    },
    {
      "epoch": 4.176025390625e-05,
      "model_forward_time": 0.11426782608032227,
      "step": 6842
    },
    {
      "epoch": 4.176025390625e-05,
      "step": 6842,
      "training_step_time": 0.38758325576782227
    },
    {
      "epoch": 4.1766357421875e-05,
      "model_forward_time": 0.1151571273803711,
      "step": 6843
    },
    {
      "epoch": 4.1766357421875e-05,
      "step": 6843,
      "training_step_time": 0.6513795852661133
    },
    {
      "epoch": 4.17724609375e-05,
      "model_forward_time": 0.11492061614990234,
      "step": 6844
    },
    {
      "epoch": 4.17724609375e-05,
      "step": 6844,
      "training_step_time": 0.4652855396270752
    },
    {
      "epoch": 4.1778564453125e-05,
      "model_forward_time": 0.11481785774230957,
      "step": 6845
    },
    {
      "epoch": 4.1778564453125e-05,
      "step": 6845,
      "training_step_time": 0.38896846771240234
    },
    {
      "epoch": 4.178466796875e-05,
      "model_forward_time": 0.11449646949768066,
      "step": 6846
    },
    {
      "epoch": 4.178466796875e-05,
      "step": 6846,
      "training_step_time": 0.390150785446167
    },
    {
      "epoch": 4.1790771484375e-05,
      "model_forward_time": 0.11460089683532715,
      "step": 6847
    },
    {
      "epoch": 4.1790771484375e-05,
      "step": 6847,
      "training_step_time": 0.38446664810180664
    },
    {
      "epoch": 4.1796875e-05,
      "model_forward_time": 0.1145467758178711,
      "step": 6848
    },
    {
      "epoch": 4.1796875e-05,
      "step": 6848,
      "training_step_time": 0.489638090133667
    },
    {
      "epoch": 4.1802978515625e-05,
      "model_forward_time": 0.11481809616088867,
      "step": 6849
    },
    {
      "epoch": 4.1802978515625e-05,
      "step": 6849,
      "training_step_time": 0.4896528720855713
    },
    {
      "epoch": 4.180908203125e-05,
      "grad_norm": 0.22385133802890778,
      "learning_rate": 9.887854638797023e-05,
      "loss": 0.0734,
      "step": 6850
    },
    {
      "epoch": 4.180908203125e-05,
      "model_forward_time": 0.1151123046875,
      "step": 6850
    },
    {
      "epoch": 4.180908203125e-05,
      "step": 6850,
      "training_step_time": 0.41097474098205566
    },
    {
      "epoch": 4.1815185546875e-05,
      "model_forward_time": 0.11449241638183594,
      "step": 6851
    },
    {
      "epoch": 4.1815185546875e-05,
      "step": 6851,
      "training_step_time": 0.40416669845581055
    },
    {
      "epoch": 4.18212890625e-05,
      "model_forward_time": 0.11503338813781738,
      "step": 6852
    },
    {
      "epoch": 4.18212890625e-05,
      "step": 6852,
      "training_step_time": 0.41034984588623047
    },
    {
      "epoch": 4.1827392578125e-05,
      "model_forward_time": 0.11470341682434082,
      "step": 6853
    },
    {
      "epoch": 4.1827392578125e-05,
      "step": 6853,
      "training_step_time": 0.39102864265441895
    },
    {
      "epoch": 4.183349609375e-05,
      "model_forward_time": 0.11560845375061035,
      "step": 6854
    },
    {
      "epoch": 4.183349609375e-05,
      "step": 6854,
      "training_step_time": 0.39073991775512695
    },
    {
      "epoch": 4.1839599609375e-05,
      "model_forward_time": 0.11440324783325195,
      "step": 6855
    },
    {
      "epoch": 4.1839599609375e-05,
      "step": 6855,
      "training_step_time": 0.9128973484039307
    },
    {
      "epoch": 4.1845703125e-05,
      "model_forward_time": 0.11432456970214844,
      "step": 6856
    },
    {
      "epoch": 4.1845703125e-05,
      "step": 6856,
      "training_step_time": 0.42117786407470703
    },
    {
      "epoch": 4.1851806640625e-05,
      "model_forward_time": 0.11411046981811523,
      "step": 6857
    },
    {
      "epoch": 4.1851806640625e-05,
      "step": 6857,
      "training_step_time": 0.4008157253265381
    },
    {
      "epoch": 4.185791015625e-05,
      "model_forward_time": 0.11412811279296875,
      "step": 6858
    },
    {
      "epoch": 4.185791015625e-05,
      "step": 6858,
      "training_step_time": 0.4362015724182129
    },
    {
      "epoch": 4.1864013671875e-05,
      "model_forward_time": 0.1139063835144043,
      "step": 6859
    },
    {
      "epoch": 4.1864013671875e-05,
      "step": 6859,
      "training_step_time": 0.38973164558410645
    },
    {
      "epoch": 4.18701171875e-05,
      "grad_norm": 0.3239944875240326,
      "learning_rate": 9.887273510716107e-05,
      "loss": 0.0824,
      "step": 6860
    },
    {
      "epoch": 4.18701171875e-05,
      "model_forward_time": 0.11468839645385742,
      "step": 6860
    },
    {
      "epoch": 4.18701171875e-05,
      "step": 6860,
      "training_step_time": 0.3807849884033203
    },
    {
      "epoch": 4.1876220703125e-05,
      "model_forward_time": 0.11511683464050293,
      "step": 6861
    },
    {
      "epoch": 4.1876220703125e-05,
      "step": 6861,
      "training_step_time": 0.7016994953155518
    },
    {
      "epoch": 4.188232421875e-05,
      "model_forward_time": 0.11600899696350098,
      "step": 6862
    },
    {
      "epoch": 4.188232421875e-05,
      "step": 6862,
      "training_step_time": 0.49625539779663086
    },
    {
      "epoch": 4.1888427734375e-05,
      "model_forward_time": 0.11448264122009277,
      "step": 6863
    },
    {
      "epoch": 4.1888427734375e-05,
      "step": 6863,
      "training_step_time": 0.4256322383880615
    },
    {
      "epoch": 4.189453125e-05,
      "model_forward_time": 0.11403870582580566,
      "step": 6864
    },
    {
      "epoch": 4.189453125e-05,
      "step": 6864,
      "training_step_time": 0.39260220527648926
    },
    {
      "epoch": 4.1900634765625e-05,
      "model_forward_time": 0.11466407775878906,
      "step": 6865
    },
    {
      "epoch": 4.1900634765625e-05,
      "step": 6865,
      "training_step_time": 0.38590288162231445
    },
    {
      "epoch": 4.190673828125e-05,
      "model_forward_time": 0.11427688598632812,
      "step": 6866
    },
    {
      "epoch": 4.190673828125e-05,
      "step": 6866,
      "training_step_time": 0.38814425468444824
    },
    {
      "epoch": 4.1912841796875e-05,
      "model_forward_time": 0.11514043807983398,
      "step": 6867
    },
    {
      "epoch": 4.1912841796875e-05,
      "step": 6867,
      "training_step_time": 0.6155080795288086
    },
    {
      "epoch": 4.19189453125e-05,
      "model_forward_time": 0.11454510688781738,
      "step": 6868
    },
    {
      "epoch": 4.19189453125e-05,
      "step": 6868,
      "training_step_time": 0.39377260208129883
    },
    {
      "epoch": 4.1925048828125e-05,
      "model_forward_time": 0.1150822639465332,
      "step": 6869
    },
    {
      "epoch": 4.1925048828125e-05,
      "step": 6869,
      "training_step_time": 0.3876326084136963
    },
    {
      "epoch": 4.193115234375e-05,
      "grad_norm": 0.22923576831817627,
      "learning_rate": 9.886690898010535e-05,
      "loss": 0.0796,
      "step": 6870
    },
    {
      "epoch": 4.193115234375e-05,
      "model_forward_time": 0.11514687538146973,
      "step": 6870
    },
    {
      "epoch": 4.193115234375e-05,
      "step": 6870,
      "training_step_time": 0.48763179779052734
    },
    {
      "epoch": 4.1937255859375e-05,
      "model_forward_time": 0.11455750465393066,
      "step": 6871
    },
    {
      "epoch": 4.1937255859375e-05,
      "step": 6871,
      "training_step_time": 0.5052487850189209
    },
    {
      "epoch": 4.1943359375e-05,
      "model_forward_time": 0.11429619789123535,
      "step": 6872
    },
    {
      "epoch": 4.1943359375e-05,
      "step": 6872,
      "training_step_time": 0.4515080451965332
    },
    {
      "epoch": 4.1949462890625e-05,
      "model_forward_time": 0.11450791358947754,
      "step": 6873
    },
    {
      "epoch": 4.1949462890625e-05,
      "step": 6873,
      "training_step_time": 0.3920435905456543
    },
    {
      "epoch": 4.195556640625e-05,
      "model_forward_time": 0.1151583194732666,
      "step": 6874
    },
    {
      "epoch": 4.195556640625e-05,
      "step": 6874,
      "training_step_time": 0.44457411766052246
    },
    {
      "epoch": 4.1961669921875e-05,
      "model_forward_time": 0.1146538257598877,
      "step": 6875
    },
    {
      "epoch": 4.1961669921875e-05,
      "step": 6875,
      "training_step_time": 0.4632840156555176
    },
    {
      "epoch": 4.19677734375e-05,
      "model_forward_time": 0.11434650421142578,
      "step": 6876
    },
    {
      "epoch": 4.19677734375e-05,
      "step": 6876,
      "training_step_time": 0.4522833824157715
    },
    {
      "epoch": 4.1973876953125e-05,
      "model_forward_time": 0.11474394798278809,
      "step": 6877
    },
    {
      "epoch": 4.1973876953125e-05,
      "step": 6877,
      "training_step_time": 0.43259215354919434
    },
    {
      "epoch": 4.197998046875e-05,
      "model_forward_time": 0.1151728630065918,
      "step": 6878
    },
    {
      "epoch": 4.197998046875e-05,
      "step": 6878,
      "training_step_time": 0.3859124183654785
    },
    {
      "epoch": 4.1986083984375e-05,
      "model_forward_time": 0.11494326591491699,
      "step": 6879
    },
    {
      "epoch": 4.1986083984375e-05,
      "step": 6879,
      "training_step_time": 0.5935192108154297
    },
    {
      "epoch": 4.19921875e-05,
      "grad_norm": 0.23771034181118011,
      "learning_rate": 9.886106800857298e-05,
      "loss": 0.0823,
      "step": 6880
    },
    {
      "epoch": 4.19921875e-05,
      "model_forward_time": 0.11414122581481934,
      "step": 6880
    },
    {
      "epoch": 4.19921875e-05,
      "step": 6880,
      "training_step_time": 0.3885924816131592
    },
    {
      "epoch": 4.1998291015625e-05,
      "model_forward_time": 0.11469793319702148,
      "step": 6881
    },
    {
      "epoch": 4.1998291015625e-05,
      "step": 6881,
      "training_step_time": 0.38764429092407227
    },
    {
      "epoch": 4.200439453125e-05,
      "model_forward_time": 0.1144251823425293,
      "step": 6882
    },
    {
      "epoch": 4.200439453125e-05,
      "step": 6882,
      "training_step_time": 0.386249303817749
    },
    {
      "epoch": 4.2010498046875e-05,
      "model_forward_time": 0.11470246315002441,
      "step": 6883
    },
    {
      "epoch": 4.2010498046875e-05,
      "step": 6883,
      "training_step_time": 0.38686656951904297
    },
    {
      "epoch": 4.20166015625e-05,
      "model_forward_time": 0.11687040328979492,
      "step": 6884
    },
    {
      "epoch": 4.20166015625e-05,
      "step": 6884,
      "training_step_time": 0.43819308280944824
    },
    {
      "epoch": 4.2022705078125e-05,
      "model_forward_time": 0.1147603988647461,
      "step": 6885
    },
    {
      "epoch": 4.2022705078125e-05,
      "step": 6885,
      "training_step_time": 0.4676804542541504
    },
    {
      "epoch": 4.202880859375e-05,
      "model_forward_time": 0.11495304107666016,
      "step": 6886
    },
    {
      "epoch": 4.202880859375e-05,
      "step": 6886,
      "training_step_time": 0.4960196018218994
    },
    {
      "epoch": 4.2034912109375e-05,
      "model_forward_time": 0.11516332626342773,
      "step": 6887
    },
    {
      "epoch": 4.2034912109375e-05,
      "step": 6887,
      "training_step_time": 0.377716064453125
    },
    {
      "epoch": 4.2041015625e-05,
      "model_forward_time": 0.11501145362854004,
      "step": 6888
    },
    {
      "epoch": 4.2041015625e-05,
      "step": 6888,
      "training_step_time": 0.4071657657623291
    },
    {
      "epoch": 4.2047119140625e-05,
      "model_forward_time": 0.11503219604492188,
      "step": 6889
    },
    {
      "epoch": 4.2047119140625e-05,
      "step": 6889,
      "training_step_time": 0.4889957904815674
    },
    {
      "epoch": 4.205322265625e-05,
      "grad_norm": 0.2492363601922989,
      "learning_rate": 9.885521219433823e-05,
      "loss": 0.0796,
      "step": 6890
    },
    {
      "epoch": 4.205322265625e-05,
      "model_forward_time": 0.114227294921875,
      "step": 6890
    },
    {
      "epoch": 4.205322265625e-05,
      "step": 6890,
      "training_step_time": 0.42572522163391113
    },
    {
      "epoch": 4.2059326171875e-05,
      "model_forward_time": 0.1146383285522461,
      "step": 6891
    },
    {
      "epoch": 4.2059326171875e-05,
      "step": 6891,
      "training_step_time": 0.3819434642791748
    },
    {
      "epoch": 4.20654296875e-05,
      "model_forward_time": 0.1151731014251709,
      "step": 6892
    },
    {
      "epoch": 4.20654296875e-05,
      "step": 6892,
      "training_step_time": 0.38741111755371094
    },
    {
      "epoch": 4.2071533203125e-05,
      "model_forward_time": 0.11454105377197266,
      "step": 6893
    },
    {
      "epoch": 4.2071533203125e-05,
      "step": 6893,
      "training_step_time": 0.38428568840026855
    },
    {
      "epoch": 4.207763671875e-05,
      "model_forward_time": 0.11512994766235352,
      "step": 6894
    },
    {
      "epoch": 4.207763671875e-05,
      "step": 6894,
      "training_step_time": 0.39044189453125
    },
    {
      "epoch": 4.2083740234375e-05,
      "model_forward_time": 0.11459708213806152,
      "step": 6895
    },
    {
      "epoch": 4.2083740234375e-05,
      "step": 6895,
      "training_step_time": 0.3961665630340576
    },
    {
      "epoch": 4.208984375e-05,
      "model_forward_time": 0.11515402793884277,
      "step": 6896
    },
    {
      "epoch": 4.208984375e-05,
      "step": 6896,
      "training_step_time": 0.4053466320037842
    },
    {
      "epoch": 4.2095947265625e-05,
      "model_forward_time": 0.11524701118469238,
      "step": 6897
    },
    {
      "epoch": 4.2095947265625e-05,
      "step": 6897,
      "training_step_time": 0.40103793144226074
    },
    {
      "epoch": 4.210205078125e-05,
      "model_forward_time": 0.11525583267211914,
      "step": 6898
    },
    {
      "epoch": 4.210205078125e-05,
      "step": 6898,
      "training_step_time": 0.44921374320983887
    },
    {
      "epoch": 4.2108154296875e-05,
      "model_forward_time": 0.11469745635986328,
      "step": 6899
    },
    {
      "epoch": 4.2108154296875e-05,
      "step": 6899,
      "training_step_time": 0.4498915672302246
    },
    {
      "epoch": 4.21142578125e-05,
      "grad_norm": 0.17764230072498322,
      "learning_rate": 9.884934153917997e-05,
      "loss": 0.0745,
      "step": 6900
    },
    {
      "epoch": 4.21142578125e-05,
      "model_forward_time": 0.11470174789428711,
      "step": 6900
    },
    {
      "epoch": 4.21142578125e-05,
      "step": 6900,
      "training_step_time": 0.43453359603881836
    },
    {
      "epoch": 4.2120361328125e-05,
      "model_forward_time": 0.11457514762878418,
      "step": 6901
    },
    {
      "epoch": 4.2120361328125e-05,
      "step": 6901,
      "training_step_time": 0.4370396137237549
    },
    {
      "epoch": 4.212646484375e-05,
      "model_forward_time": 0.11459970474243164,
      "step": 6902
    },
    {
      "epoch": 4.212646484375e-05,
      "step": 6902,
      "training_step_time": 0.39325690269470215
    },
    {
      "epoch": 4.2132568359375e-05,
      "model_forward_time": 0.1157221794128418,
      "step": 6903
    },
    {
      "epoch": 4.2132568359375e-05,
      "step": 6903,
      "training_step_time": 0.48484349250793457
    },
    {
      "epoch": 4.2138671875e-05,
      "model_forward_time": 0.11475133895874023,
      "step": 6904
    },
    {
      "epoch": 4.2138671875e-05,
      "step": 6904,
      "training_step_time": 0.3972446918487549
    },
    {
      "epoch": 4.2144775390625e-05,
      "model_forward_time": 0.11464643478393555,
      "step": 6905
    },
    {
      "epoch": 4.2144775390625e-05,
      "step": 6905,
      "training_step_time": 0.40288805961608887
    },
    {
      "epoch": 4.215087890625e-05,
      "model_forward_time": 0.11460304260253906,
      "step": 6906
    },
    {
      "epoch": 4.215087890625e-05,
      "step": 6906,
      "training_step_time": 0.39704275131225586
    },
    {
      "epoch": 4.2156982421875e-05,
      "model_forward_time": 0.11543583869934082,
      "step": 6907
    },
    {
      "epoch": 4.2156982421875e-05,
      "step": 6907,
      "training_step_time": 0.4095001220703125
    },
    {
      "epoch": 4.21630859375e-05,
      "model_forward_time": 0.1154031753540039,
      "step": 6908
    },
    {
      "epoch": 4.21630859375e-05,
      "step": 6908,
      "training_step_time": 0.39940977096557617
    },
    {
      "epoch": 4.2169189453125e-05,
      "model_forward_time": 0.11526942253112793,
      "step": 6909
    },
    {
      "epoch": 4.2169189453125e-05,
      "step": 6909,
      "training_step_time": 0.7882480621337891
    },
    {
      "epoch": 4.217529296875e-05,
      "grad_norm": 0.2141934633255005,
      "learning_rate": 9.884345604488155e-05,
      "loss": 0.0753,
      "step": 6910
    },
    {
      "epoch": 4.217529296875e-05,
      "model_forward_time": 0.11453914642333984,
      "step": 6910
    },
    {
      "epoch": 4.217529296875e-05,
      "step": 6910,
      "training_step_time": 0.3894979953765869
    },
    {
      "epoch": 4.2181396484375e-05,
      "model_forward_time": 0.11437439918518066,
      "step": 6911
    },
    {
      "epoch": 4.2181396484375e-05,
      "step": 6911,
      "training_step_time": 0.39703845977783203
    },
    {
      "epoch": 4.21875e-05,
      "model_forward_time": 0.11409330368041992,
      "step": 6912
    },
    {
      "epoch": 4.21875e-05,
      "step": 6912,
      "training_step_time": 0.39459896087646484
    },
    {
      "epoch": 4.2193603515625e-05,
      "model_forward_time": 0.11414670944213867,
      "step": 6913
    },
    {
      "epoch": 4.2193603515625e-05,
      "step": 6913,
      "training_step_time": 0.4562368392944336
    },
    {
      "epoch": 4.219970703125e-05,
      "model_forward_time": 0.11417341232299805,
      "step": 6914
    },
    {
      "epoch": 4.219970703125e-05,
      "step": 6914,
      "training_step_time": 0.4399993419647217
    },
    {
      "epoch": 4.2205810546875e-05,
      "model_forward_time": 0.11607742309570312,
      "step": 6915
    },
    {
      "epoch": 4.2205810546875e-05,
      "step": 6915,
      "training_step_time": 0.48323702812194824
    },
    {
      "epoch": 4.22119140625e-05,
      "model_forward_time": 0.11544489860534668,
      "step": 6916
    },
    {
      "epoch": 4.22119140625e-05,
      "step": 6916,
      "training_step_time": 0.4665088653564453
    },
    {
      "epoch": 4.2218017578125e-05,
      "model_forward_time": 0.11477947235107422,
      "step": 6917
    },
    {
      "epoch": 4.2218017578125e-05,
      "step": 6917,
      "training_step_time": 0.48154258728027344
    },
    {
      "epoch": 4.222412109375e-05,
      "model_forward_time": 0.1147010326385498,
      "step": 6918
    },
    {
      "epoch": 4.222412109375e-05,
      "step": 6918,
      "training_step_time": 0.4615952968597412
    },
    {
      "epoch": 4.2230224609375e-05,
      "model_forward_time": 0.11467123031616211,
      "step": 6919
    },
    {
      "epoch": 4.2230224609375e-05,
      "step": 6919,
      "training_step_time": 0.387099027633667
    },
    {
      "epoch": 4.2236328125e-05,
      "grad_norm": 0.27914294600486755,
      "learning_rate": 9.88375557132308e-05,
      "loss": 0.0841,
      "step": 6920
    },
    {
      "epoch": 4.2236328125e-05,
      "model_forward_time": 0.11394309997558594,
      "step": 6920
    },
    {
      "epoch": 4.2236328125e-05,
      "step": 6920,
      "training_step_time": 0.39386630058288574
    },
    {
      "epoch": 4.2242431640625e-05,
      "model_forward_time": 0.11521697044372559,
      "step": 6921
    },
    {
      "epoch": 4.2242431640625e-05,
      "step": 6921,
      "training_step_time": 0.8396768569946289
    },
    {
      "epoch": 4.224853515625e-05,
      "model_forward_time": 0.11400914192199707,
      "step": 6922
    },
    {
      "epoch": 4.224853515625e-05,
      "step": 6922,
      "training_step_time": 0.38541293144226074
    },
    {
      "epoch": 4.2254638671875e-05,
      "model_forward_time": 0.11457133293151855,
      "step": 6923
    },
    {
      "epoch": 4.2254638671875e-05,
      "step": 6923,
      "training_step_time": 0.3824796676635742
    },
    {
      "epoch": 4.22607421875e-05,
      "model_forward_time": 0.1148529052734375,
      "step": 6924
    },
    {
      "epoch": 4.22607421875e-05,
      "step": 6924,
      "training_step_time": 0.4057576656341553
    },
    {
      "epoch": 4.2266845703125e-05,
      "model_forward_time": 0.11495304107666016,
      "step": 6925
    },
    {
      "epoch": 4.2266845703125e-05,
      "step": 6925,
      "training_step_time": 0.3880753517150879
    },
    {
      "epoch": 4.227294921875e-05,
      "model_forward_time": 0.11521697044372559,
      "step": 6926
    },
    {
      "epoch": 4.227294921875e-05,
      "step": 6926,
      "training_step_time": 0.36383056640625
    },
    {
      "epoch": 4.2279052734375e-05,
      "model_forward_time": 0.11518478393554688,
      "step": 6927
    },
    {
      "epoch": 4.2279052734375e-05,
      "step": 6927,
      "training_step_time": 0.9216597080230713
    },
    {
      "epoch": 4.228515625e-05,
      "model_forward_time": 0.11389565467834473,
      "step": 6928
    },
    {
      "epoch": 4.228515625e-05,
      "step": 6928,
      "training_step_time": 0.4258122444152832
    },
    {
      "epoch": 4.2291259765625e-05,
      "model_forward_time": 0.1141357421875,
      "step": 6929
    },
    {
      "epoch": 4.2291259765625e-05,
      "step": 6929,
      "training_step_time": 0.4392843246459961
    },
    {
      "epoch": 4.229736328125e-05,
      "grad_norm": 0.26551634073257446,
      "learning_rate": 9.883164054602012e-05,
      "loss": 0.0708,
      "step": 6930
    },
    {
      "epoch": 4.229736328125e-05,
      "model_forward_time": 0.11441183090209961,
      "step": 6930
    },
    {
      "epoch": 4.229736328125e-05,
      "step": 6930,
      "training_step_time": 0.3902101516723633
    },
    {
      "epoch": 4.2303466796875e-05,
      "model_forward_time": 0.11366438865661621,
      "step": 6931
    },
    {
      "epoch": 4.2303466796875e-05,
      "step": 6931,
      "training_step_time": 0.4161670207977295
    },
    {
      "epoch": 4.23095703125e-05,
      "model_forward_time": 0.11371612548828125,
      "step": 6932
    },
    {
      "epoch": 4.23095703125e-05,
      "step": 6932,
      "training_step_time": 0.38991308212280273
    },
    {
      "epoch": 4.2315673828125e-05,
      "model_forward_time": 0.1146388053894043,
      "step": 6933
    },
    {
      "epoch": 4.2315673828125e-05,
      "step": 6933,
      "training_step_time": 0.6486499309539795
    },
    {
      "epoch": 4.232177734375e-05,
      "model_forward_time": 0.1150047779083252,
      "step": 6934
    },
    {
      "epoch": 4.232177734375e-05,
      "step": 6934,
      "training_step_time": 0.4031665325164795
    },
    {
      "epoch": 4.2327880859375e-05,
      "model_forward_time": 0.11388564109802246,
      "step": 6935
    },
    {
      "epoch": 4.2327880859375e-05,
      "step": 6935,
      "training_step_time": 0.38017964363098145
    },
    {
      "epoch": 4.2333984375e-05,
      "model_forward_time": 0.11448860168457031,
      "step": 6936
    },
    {
      "epoch": 4.2333984375e-05,
      "step": 6936,
      "training_step_time": 0.38611912727355957
    },
    {
      "epoch": 4.2340087890625e-05,
      "model_forward_time": 0.11498641967773438,
      "step": 6937
    },
    {
      "epoch": 4.2340087890625e-05,
      "step": 6937,
      "training_step_time": 0.3997941017150879
    },
    {
      "epoch": 4.234619140625e-05,
      "model_forward_time": 0.11460471153259277,
      "step": 6938
    },
    {
      "epoch": 4.234619140625e-05,
      "step": 6938,
      "training_step_time": 0.38579392433166504
    },
    {
      "epoch": 4.2352294921875e-05,
      "model_forward_time": 0.11455821990966797,
      "step": 6939
    },
    {
      "epoch": 4.2352294921875e-05,
      "step": 6939,
      "training_step_time": 0.8803601264953613
    },
    {
      "epoch": 4.23583984375e-05,
      "grad_norm": 0.2616161108016968,
      "learning_rate": 9.882571054504636e-05,
      "loss": 0.072,
      "step": 6940
    },
    {
      "epoch": 4.23583984375e-05,
      "model_forward_time": 0.11430501937866211,
      "step": 6940
    },
    {
      "epoch": 4.23583984375e-05,
      "step": 6940,
      "training_step_time": 0.4468116760253906
    },
    {
      "epoch": 4.2364501953125e-05,
      "model_forward_time": 0.11435270309448242,
      "step": 6941
    },
    {
      "epoch": 4.2364501953125e-05,
      "step": 6941,
      "training_step_time": 0.4821133613586426
    },
    {
      "epoch": 4.237060546875e-05,
      "model_forward_time": 0.11389827728271484,
      "step": 6942
    },
    {
      "epoch": 4.237060546875e-05,
      "step": 6942,
      "training_step_time": 0.4134705066680908
    },
    {
      "epoch": 4.2376708984375e-05,
      "model_forward_time": 0.11348533630371094,
      "step": 6943
    },
    {
      "epoch": 4.2376708984375e-05,
      "step": 6943,
      "training_step_time": 0.447434663772583
    },
    {
      "epoch": 4.23828125e-05,
      "model_forward_time": 0.1140749454498291,
      "step": 6944
    },
    {
      "epoch": 4.23828125e-05,
      "step": 6944,
      "training_step_time": 0.4345259666442871
    },
    {
      "epoch": 4.2388916015625e-05,
      "model_forward_time": 0.11430692672729492,
      "step": 6945
    },
    {
      "epoch": 4.2388916015625e-05,
      "step": 6945,
      "training_step_time": 0.6304905414581299
    },
    {
      "epoch": 4.239501953125e-05,
      "model_forward_time": 0.11444854736328125,
      "step": 6946
    },
    {
      "epoch": 4.239501953125e-05,
      "step": 6946,
      "training_step_time": 0.3781092166900635
    },
    {
      "epoch": 4.2401123046875e-05,
      "model_forward_time": 0.11401081085205078,
      "step": 6947
    },
    {
      "epoch": 4.2401123046875e-05,
      "step": 6947,
      "training_step_time": 0.379288911819458
    },
    {
      "epoch": 4.24072265625e-05,
      "model_forward_time": 0.11492133140563965,
      "step": 6948
    },
    {
      "epoch": 4.24072265625e-05,
      "step": 6948,
      "training_step_time": 0.3848235607147217
    },
    {
      "epoch": 4.2413330078125e-05,
      "model_forward_time": 0.11438202857971191,
      "step": 6949
    },
    {
      "epoch": 4.2413330078125e-05,
      "step": 6949,
      "training_step_time": 0.3829996585845947
    },
    {
      "epoch": 4.241943359375e-05,
      "grad_norm": 0.29414039850234985,
      "learning_rate": 9.88197657121109e-05,
      "loss": 0.0805,
      "step": 6950
    },
    {
      "epoch": 4.241943359375e-05,
      "model_forward_time": 0.11522841453552246,
      "step": 6950
    },
    {
      "epoch": 4.241943359375e-05,
      "step": 6950,
      "training_step_time": 0.3827807903289795
    },
    {
      "epoch": 4.2425537109375e-05,
      "model_forward_time": 0.11592555046081543,
      "step": 6951
    },
    {
      "epoch": 4.2425537109375e-05,
      "step": 6951,
      "training_step_time": 0.8498926162719727
    },
    {
      "epoch": 4.2431640625e-05,
      "model_forward_time": 0.11603713035583496,
      "step": 6952
    },
    {
      "epoch": 4.2431640625e-05,
      "step": 6952,
      "training_step_time": 0.3686251640319824
    },
    {
      "epoch": 4.2437744140625e-05,
      "model_forward_time": 0.1150515079498291,
      "step": 6953
    },
    {
      "epoch": 4.2437744140625e-05,
      "step": 6953,
      "training_step_time": 0.43627476692199707
    },
    {
      "epoch": 4.244384765625e-05,
      "model_forward_time": 0.11459183692932129,
      "step": 6954
    },
    {
      "epoch": 4.244384765625e-05,
      "step": 6954,
      "training_step_time": 0.48869800567626953
    },
    {
      "epoch": 4.2449951171875e-05,
      "model_forward_time": 0.11383223533630371,
      "step": 6955
    },
    {
      "epoch": 4.2449951171875e-05,
      "step": 6955,
      "training_step_time": 0.3902456760406494
    },
    {
      "epoch": 4.24560546875e-05,
      "model_forward_time": 0.11400485038757324,
      "step": 6956
    },
    {
      "epoch": 4.24560546875e-05,
      "step": 6956,
      "training_step_time": 0.41378355026245117
    },
    {
      "epoch": 4.2462158203125e-05,
      "model_forward_time": 0.11389827728271484,
      "step": 6957
    },
    {
      "epoch": 4.2462158203125e-05,
      "step": 6957,
      "training_step_time": 0.48243117332458496
    },
    {
      "epoch": 4.246826171875e-05,
      "model_forward_time": 0.11617875099182129,
      "step": 6958
    },
    {
      "epoch": 4.246826171875e-05,
      "step": 6958,
      "training_step_time": 0.3889484405517578
    },
    {
      "epoch": 4.2474365234375e-05,
      "model_forward_time": 0.11419391632080078,
      "step": 6959
    },
    {
      "epoch": 4.2474365234375e-05,
      "step": 6959,
      "training_step_time": 0.39597010612487793
    },
    {
      "epoch": 4.248046875e-05,
      "grad_norm": 0.2597789764404297,
      "learning_rate": 9.881380604901964e-05,
      "loss": 0.0757,
      "step": 6960
    },
    {
      "epoch": 4.248046875e-05,
      "model_forward_time": 0.11486291885375977,
      "step": 6960
    },
    {
      "epoch": 4.248046875e-05,
      "step": 6960,
      "training_step_time": 0.396740198135376
    },
    {
      "epoch": 4.2486572265625e-05,
      "model_forward_time": 0.11490678787231445,
      "step": 6961
    },
    {
      "epoch": 4.2486572265625e-05,
      "step": 6961,
      "training_step_time": 0.39925336837768555
    },
    {
      "epoch": 4.249267578125e-05,
      "model_forward_time": 0.11496090888977051,
      "step": 6962
    },
    {
      "epoch": 4.249267578125e-05,
      "step": 6962,
      "training_step_time": 0.3907170295715332
    },
    {
      "epoch": 4.2498779296875e-05,
      "model_forward_time": 0.11496615409851074,
      "step": 6963
    },
    {
      "epoch": 4.2498779296875e-05,
      "step": 6963,
      "training_step_time": 0.39153432846069336
    },
    {
      "epoch": 4.25048828125e-05,
      "model_forward_time": 0.11427545547485352,
      "step": 6964
    },
    {
      "epoch": 4.25048828125e-05,
      "step": 6964,
      "training_step_time": 0.38933682441711426
    },
    {
      "epoch": 4.2510986328125e-05,
      "model_forward_time": 0.1151113510131836,
      "step": 6965
    },
    {
      "epoch": 4.2510986328125e-05,
      "step": 6965,
      "training_step_time": 0.39702725410461426
    },
    {
      "epoch": 4.251708984375e-05,
      "model_forward_time": 0.11528778076171875,
      "step": 6966
    },
    {
      "epoch": 4.251708984375e-05,
      "step": 6966,
      "training_step_time": 0.4177134037017822
    },
    {
      "epoch": 4.2523193359375e-05,
      "model_forward_time": 0.11519598960876465,
      "step": 6967
    },
    {
      "epoch": 4.2523193359375e-05,
      "step": 6967,
      "training_step_time": 0.468064546585083
    },
    {
      "epoch": 4.2529296875e-05,
      "model_forward_time": 0.11488056182861328,
      "step": 6968
    },
    {
      "epoch": 4.2529296875e-05,
      "step": 6968,
      "training_step_time": 0.47991156578063965
    },
    {
      "epoch": 4.2535400390625e-05,
      "model_forward_time": 0.11549901962280273,
      "step": 6969
    },
    {
      "epoch": 4.2535400390625e-05,
      "step": 6969,
      "training_step_time": 0.5225365161895752
    },
    {
      "epoch": 4.254150390625e-05,
      "grad_norm": 0.2690414488315582,
      "learning_rate": 9.880783155758296e-05,
      "loss": 0.0719,
      "step": 6970
    },
    {
      "epoch": 4.254150390625e-05,
      "model_forward_time": 0.11530780792236328,
      "step": 6970
    },
    {
      "epoch": 4.254150390625e-05,
      "step": 6970,
      "training_step_time": 0.3881664276123047
    },
    {
      "epoch": 4.2547607421875e-05,
      "model_forward_time": 0.11474847793579102,
      "step": 6971
    },
    {
      "epoch": 4.2547607421875e-05,
      "step": 6971,
      "training_step_time": 0.40152573585510254
    },
    {
      "epoch": 4.25537109375e-05,
      "model_forward_time": 0.11521267890930176,
      "step": 6972
    },
    {
      "epoch": 4.25537109375e-05,
      "step": 6972,
      "training_step_time": 0.40453290939331055
    },
    {
      "epoch": 4.2559814453125e-05,
      "model_forward_time": 0.11430191993713379,
      "step": 6973
    },
    {
      "epoch": 4.2559814453125e-05,
      "step": 6973,
      "training_step_time": 0.39592909812927246
    },
    {
      "epoch": 4.256591796875e-05,
      "model_forward_time": 0.11473608016967773,
      "step": 6974
    },
    {
      "epoch": 4.256591796875e-05,
      "step": 6974,
      "training_step_time": 0.39162659645080566
    },
    {
      "epoch": 4.2572021484375e-05,
      "model_forward_time": 0.11666226387023926,
      "step": 6975
    },
    {
      "epoch": 4.2572021484375e-05,
      "step": 6975,
      "training_step_time": 1.0228767395019531
    },
    {
      "epoch": 4.2578125e-05,
      "model_forward_time": 0.11429834365844727,
      "step": 6976
    },
    {
      "epoch": 4.2578125e-05,
      "step": 6976,
      "training_step_time": 0.3901376724243164
    },
    {
      "epoch": 4.2584228515625e-05,
      "model_forward_time": 0.11374783515930176,
      "step": 6977
    },
    {
      "epoch": 4.2584228515625e-05,
      "step": 6977,
      "training_step_time": 0.3891150951385498
    },
    {
      "epoch": 4.259033203125e-05,
      "model_forward_time": 0.11454391479492188,
      "step": 6978
    },
    {
      "epoch": 4.259033203125e-05,
      "step": 6978,
      "training_step_time": 0.40303874015808105
    },
    {
      "epoch": 4.2596435546875e-05,
      "model_forward_time": 0.11405205726623535,
      "step": 6979
    },
    {
      "epoch": 4.2596435546875e-05,
      "step": 6979,
      "training_step_time": 0.40083956718444824
    },
    {
      "epoch": 4.26025390625e-05,
      "grad_norm": 0.2062709480524063,
      "learning_rate": 9.880184223961573e-05,
      "loss": 0.0713,
      "step": 6980
    },
    {
      "epoch": 4.26025390625e-05,
      "model_forward_time": 0.11426520347595215,
      "step": 6980
    },
    {
      "epoch": 4.26025390625e-05,
      "step": 6980,
      "training_step_time": 0.3640868663787842
    },
    {
      "epoch": 4.2608642578125e-05,
      "model_forward_time": 0.11468076705932617,
      "step": 6981
    },
    {
      "epoch": 4.2608642578125e-05,
      "step": 6981,
      "training_step_time": 0.9362139701843262
    },
    {
      "epoch": 4.261474609375e-05,
      "model_forward_time": 0.11379861831665039,
      "step": 6982
    },
    {
      "epoch": 4.261474609375e-05,
      "step": 6982,
      "training_step_time": 0.4426276683807373
    },
    {
      "epoch": 4.2620849609375e-05,
      "model_forward_time": 0.11388325691223145,
      "step": 6983
    },
    {
      "epoch": 4.2620849609375e-05,
      "step": 6983,
      "training_step_time": 0.43886590003967285
    },
    {
      "epoch": 4.2626953125e-05,
      "model_forward_time": 0.1134653091430664,
      "step": 6984
    },
    {
      "epoch": 4.2626953125e-05,
      "step": 6984,
      "training_step_time": 0.41040587425231934
    },
    {
      "epoch": 4.2633056640625e-05,
      "model_forward_time": 0.11411046981811523,
      "step": 6985
    },
    {
      "epoch": 4.2633056640625e-05,
      "step": 6985,
      "training_step_time": 0.3834972381591797
    },
    {
      "epoch": 4.263916015625e-05,
      "model_forward_time": 0.1149742603302002,
      "step": 6986
    },
    {
      "epoch": 4.263916015625e-05,
      "step": 6986,
      "training_step_time": 0.3855409622192383
    },
    {
      "epoch": 4.2645263671875e-05,
      "model_forward_time": 0.11491084098815918,
      "step": 6987
    },
    {
      "epoch": 4.2645263671875e-05,
      "step": 6987,
      "training_step_time": 0.6323356628417969
    },
    {
      "epoch": 4.26513671875e-05,
      "model_forward_time": 0.11453890800476074,
      "step": 6988
    },
    {
      "epoch": 4.26513671875e-05,
      "step": 6988,
      "training_step_time": 0.39114928245544434
    },
    {
      "epoch": 4.2657470703125e-05,
      "model_forward_time": 0.1148219108581543,
      "step": 6989
    },
    {
      "epoch": 4.2657470703125e-05,
      "step": 6989,
      "training_step_time": 0.3883395195007324
    },
    {
      "epoch": 4.266357421875e-05,
      "grad_norm": 0.2519131004810333,
      "learning_rate": 9.879583809693738e-05,
      "loss": 0.0756,
      "step": 6990
    },
    {
      "epoch": 4.266357421875e-05,
      "model_forward_time": 0.11459875106811523,
      "step": 6990
    },
    {
      "epoch": 4.266357421875e-05,
      "step": 6990,
      "training_step_time": 0.41693806648254395
    },
    {
      "epoch": 4.2669677734375e-05,
      "model_forward_time": 0.11491942405700684,
      "step": 6991
    },
    {
      "epoch": 4.2669677734375e-05,
      "step": 6991,
      "training_step_time": 0.43811988830566406
    },
    {
      "epoch": 4.267578125e-05,
      "model_forward_time": 0.11460614204406738,
      "step": 6992
    },
    {
      "epoch": 4.267578125e-05,
      "step": 6992,
      "training_step_time": 0.41512060165405273
    },
    {
      "epoch": 4.2681884765625e-05,
      "model_forward_time": 0.11493086814880371,
      "step": 6993
    },
    {
      "epoch": 4.2681884765625e-05,
      "step": 6993,
      "training_step_time": 0.6518445014953613
    },
    {
      "epoch": 4.268798828125e-05,
      "model_forward_time": 0.11460137367248535,
      "step": 6994
    },
    {
      "epoch": 4.268798828125e-05,
      "step": 6994,
      "training_step_time": 0.4136018753051758
    },
    {
      "epoch": 4.2694091796875e-05,
      "model_forward_time": 0.11505508422851562,
      "step": 6995
    },
    {
      "epoch": 4.2694091796875e-05,
      "step": 6995,
      "training_step_time": 0.4517557621002197
    },
    {
      "epoch": 4.27001953125e-05,
      "model_forward_time": 0.11446928977966309,
      "step": 6996
    },
    {
      "epoch": 4.27001953125e-05,
      "step": 6996,
      "training_step_time": 0.4259765148162842
    },
    {
      "epoch": 4.2706298828125e-05,
      "model_forward_time": 0.11407327651977539,
      "step": 6997
    },
    {
      "epoch": 4.2706298828125e-05,
      "step": 6997,
      "training_step_time": 0.39716434478759766
    },
    {
      "epoch": 4.271240234375e-05,
      "model_forward_time": 0.11457943916320801,
      "step": 6998
    },
    {
      "epoch": 4.271240234375e-05,
      "step": 6998,
      "training_step_time": 0.38335752487182617
    },
    {
      "epoch": 4.2718505859375e-05,
      "model_forward_time": 0.11483263969421387,
      "step": 6999
    },
    {
      "epoch": 4.2718505859375e-05,
      "step": 6999,
      "training_step_time": 0.6100900173187256
    },
    {
      "epoch": 4.2724609375e-05,
      "grad_norm": 0.33764582872390747,
      "learning_rate": 9.878981913137179e-05,
      "loss": 0.0712,
      "step": 7000
    },
    {
      "epoch": 4.2724609375e-05,
      "model_forward_time": 0.11335539817810059,
      "step": 7000
    },
    {
      "epoch": 4.2724609375e-05,
      "step": 7000,
      "training_step_time": 0.3544433116912842
    },
    {
      "epoch": 4.2730712890625e-05,
      "model_forward_time": 0.11303329467773438,
      "step": 7001
    },
    {
      "epoch": 4.2730712890625e-05,
      "step": 7001,
      "training_step_time": 0.46615099906921387
    },
    {
      "epoch": 4.273681640625e-05,
      "model_forward_time": 0.1127009391784668,
      "step": 7002
    },
    {
      "epoch": 4.273681640625e-05,
      "step": 7002,
      "training_step_time": 0.373502254486084
    },
    {
      "epoch": 4.2742919921875e-05,
      "model_forward_time": 0.11321449279785156,
      "step": 7003
    },
    {
      "epoch": 4.2742919921875e-05,
      "step": 7003,
      "training_step_time": 0.38126158714294434
    },
    {
      "epoch": 4.27490234375e-05,
      "model_forward_time": 0.11426901817321777,
      "step": 7004
    },
    {
      "epoch": 4.27490234375e-05,
      "step": 7004,
      "training_step_time": 0.3839590549468994
    },
    {
      "epoch": 4.2755126953125e-05,
      "model_forward_time": 0.11421322822570801,
      "step": 7005
    },
    {
      "epoch": 4.2755126953125e-05,
      "step": 7005,
      "training_step_time": 0.38863587379455566
    },
    {
      "epoch": 4.276123046875e-05,
      "model_forward_time": 0.11476850509643555,
      "step": 7006
    },
    {
      "epoch": 4.276123046875e-05,
      "step": 7006,
      "training_step_time": 0.3810586929321289
    },
    {
      "epoch": 4.2767333984375e-05,
      "model_forward_time": 0.11466622352600098,
      "step": 7007
    },
    {
      "epoch": 4.2767333984375e-05,
      "step": 7007,
      "training_step_time": 0.42621445655822754
    },
    {
      "epoch": 4.27734375e-05,
      "model_forward_time": 0.11490345001220703,
      "step": 7008
    },
    {
      "epoch": 4.27734375e-05,
      "step": 7008,
      "training_step_time": 0.3992347717285156
    },
    {
      "epoch": 4.2779541015625e-05,
      "model_forward_time": 0.1149454116821289,
      "step": 7009
    },
    {
      "epoch": 4.2779541015625e-05,
      "step": 7009,
      "training_step_time": 0.39455318450927734
    },
    {
      "epoch": 4.278564453125e-05,
      "grad_norm": 0.19997134804725647,
      "learning_rate": 9.878378534474737e-05,
      "loss": 0.0756,
      "step": 7010
    },
    {
      "epoch": 4.278564453125e-05,
      "model_forward_time": 0.11510610580444336,
      "step": 7010
    },
    {
      "epoch": 4.278564453125e-05,
      "step": 7010,
      "training_step_time": 0.3910486698150635
    },
    {
      "epoch": 4.2791748046875e-05,
      "model_forward_time": 0.11472868919372559,
      "step": 7011
    },
    {
      "epoch": 4.2791748046875e-05,
      "step": 7011,
      "training_step_time": 0.4608428478240967
    },
    {
      "epoch": 4.27978515625e-05,
      "model_forward_time": 0.11486387252807617,
      "step": 7012
    },
    {
      "epoch": 4.27978515625e-05,
      "step": 7012,
      "training_step_time": 0.4611673355102539
    },
    {
      "epoch": 4.2803955078125e-05,
      "model_forward_time": 0.11547017097473145,
      "step": 7013
    },
    {
      "epoch": 4.2803955078125e-05,
      "step": 7013,
      "training_step_time": 0.502244234085083
    },
    {
      "epoch": 4.281005859375e-05,
      "model_forward_time": 0.11530423164367676,
      "step": 7014
    },
    {
      "epoch": 4.281005859375e-05,
      "step": 7014,
      "training_step_time": 0.4678173065185547
    },
    {
      "epoch": 4.2816162109375e-05,
      "model_forward_time": 0.11544013023376465,
      "step": 7015
    },
    {
      "epoch": 4.2816162109375e-05,
      "step": 7015,
      "training_step_time": 0.4432182312011719
    },
    {
      "epoch": 4.2822265625e-05,
      "model_forward_time": 0.1147925853729248,
      "step": 7016
    },
    {
      "epoch": 4.2822265625e-05,
      "step": 7016,
      "training_step_time": 0.38896679878234863
    },
    {
      "epoch": 4.2828369140625e-05,
      "model_forward_time": 0.11472153663635254,
      "step": 7017
    },
    {
      "epoch": 4.2828369140625e-05,
      "step": 7017,
      "training_step_time": 0.40726423263549805
    },
    {
      "epoch": 4.283447265625e-05,
      "model_forward_time": 0.11491727828979492,
      "step": 7018
    },
    {
      "epoch": 4.283447265625e-05,
      "step": 7018,
      "training_step_time": 0.3796555995941162
    },
    {
      "epoch": 4.2840576171875e-05,
      "model_forward_time": 0.11461758613586426,
      "step": 7019
    },
    {
      "epoch": 4.2840576171875e-05,
      "step": 7019,
      "training_step_time": 0.39397239685058594
    },
    {
      "epoch": 4.28466796875e-05,
      "grad_norm": 0.2620401680469513,
      "learning_rate": 9.877773673889701e-05,
      "loss": 0.0675,
      "step": 7020
    },
    {
      "epoch": 4.28466796875e-05,
      "model_forward_time": 0.11427497863769531,
      "step": 7020
    },
    {
      "epoch": 4.28466796875e-05,
      "step": 7020,
      "training_step_time": 0.39752793312072754
    },
    {
      "epoch": 4.2852783203125e-05,
      "model_forward_time": 0.11554837226867676,
      "step": 7021
    },
    {
      "epoch": 4.2852783203125e-05,
      "step": 7021,
      "training_step_time": 0.3971834182739258
    },
    {
      "epoch": 4.285888671875e-05,
      "model_forward_time": 0.11470508575439453,
      "step": 7022
    },
    {
      "epoch": 4.285888671875e-05,
      "step": 7022,
      "training_step_time": 0.401125431060791
    },
    {
      "epoch": 4.2864990234375e-05,
      "model_forward_time": 0.11495232582092285,
      "step": 7023
    },
    {
      "epoch": 4.2864990234375e-05,
      "step": 7023,
      "training_step_time": 0.3992893695831299
    },
    {
      "epoch": 4.287109375e-05,
      "model_forward_time": 0.11473274230957031,
      "step": 7024
    },
    {
      "epoch": 4.287109375e-05,
      "step": 7024,
      "training_step_time": 0.38680481910705566
    },
    {
      "epoch": 4.2877197265625e-05,
      "model_forward_time": 0.11464738845825195,
      "step": 7025
    },
    {
      "epoch": 4.2877197265625e-05,
      "step": 7025,
      "training_step_time": 0.38889479637145996
    },
    {
      "epoch": 4.288330078125e-05,
      "model_forward_time": 0.11492228507995605,
      "step": 7026
    },
    {
      "epoch": 4.288330078125e-05,
      "step": 7026,
      "training_step_time": 0.3917427062988281
    },
    {
      "epoch": 4.2889404296875e-05,
      "model_forward_time": 0.11455917358398438,
      "step": 7027
    },
    {
      "epoch": 4.2889404296875e-05,
      "step": 7027,
      "training_step_time": 0.41953086853027344
    },
    {
      "epoch": 4.28955078125e-05,
      "model_forward_time": 0.11488723754882812,
      "step": 7028
    },
    {
      "epoch": 4.28955078125e-05,
      "step": 7028,
      "training_step_time": 0.44048094749450684
    },
    {
      "epoch": 4.2901611328125e-05,
      "model_forward_time": 0.11469244956970215,
      "step": 7029
    },
    {
      "epoch": 4.2901611328125e-05,
      "step": 7029,
      "training_step_time": 0.41809749603271484
    },
    {
      "epoch": 4.290771484375e-05,
      "grad_norm": 0.26657918095588684,
      "learning_rate": 9.877167331565816e-05,
      "loss": 0.074,
      "step": 7030
    },
    {
      "epoch": 4.290771484375e-05,
      "model_forward_time": 0.11512970924377441,
      "step": 7030
    },
    {
      "epoch": 4.290771484375e-05,
      "step": 7030,
      "training_step_time": 0.39975905418395996
    },
    {
      "epoch": 4.2913818359375e-05,
      "model_forward_time": 0.11499238014221191,
      "step": 7031
    },
    {
      "epoch": 4.2913818359375e-05,
      "step": 7031,
      "training_step_time": 0.38225698471069336
    },
    {
      "epoch": 4.2919921875e-05,
      "model_forward_time": 0.11489343643188477,
      "step": 7032
    },
    {
      "epoch": 4.2919921875e-05,
      "step": 7032,
      "training_step_time": 0.3951551914215088
    },
    {
      "epoch": 4.2926025390625e-05,
      "model_forward_time": 0.11480164527893066,
      "step": 7033
    },
    {
      "epoch": 4.2926025390625e-05,
      "step": 7033,
      "training_step_time": 0.390444278717041
    },
    {
      "epoch": 4.293212890625e-05,
      "model_forward_time": 0.11510157585144043,
      "step": 7034
    },
    {
      "epoch": 4.293212890625e-05,
      "step": 7034,
      "training_step_time": 0.3945577144622803
    },
    {
      "epoch": 4.2938232421875e-05,
      "model_forward_time": 0.1152651309967041,
      "step": 7035
    },
    {
      "epoch": 4.2938232421875e-05,
      "step": 7035,
      "training_step_time": 0.4095475673675537
    },
    {
      "epoch": 4.29443359375e-05,
      "model_forward_time": 0.11535429954528809,
      "step": 7036
    },
    {
      "epoch": 4.29443359375e-05,
      "step": 7036,
      "training_step_time": 0.39010119438171387
    },
    {
      "epoch": 4.2950439453125e-05,
      "model_forward_time": 0.11556744575500488,
      "step": 7037
    },
    {
      "epoch": 4.2950439453125e-05,
      "step": 7037,
      "training_step_time": 0.4043538570404053
    },
    {
      "epoch": 4.295654296875e-05,
      "model_forward_time": 0.11501240730285645,
      "step": 7038
    },
    {
      "epoch": 4.295654296875e-05,
      "step": 7038,
      "training_step_time": 0.3934650421142578
    },
    {
      "epoch": 4.2962646484375e-05,
      "model_forward_time": 0.11524009704589844,
      "step": 7039
    },
    {
      "epoch": 4.2962646484375e-05,
      "step": 7039,
      "training_step_time": 0.3876528739929199
    },
    {
      "epoch": 4.296875e-05,
      "grad_norm": 0.2848794460296631,
      "learning_rate": 9.876559507687267e-05,
      "loss": 0.0752,
      "step": 7040
    },
    {
      "epoch": 4.296875e-05,
      "model_forward_time": 0.11536145210266113,
      "step": 7040
    },
    {
      "epoch": 4.296875e-05,
      "step": 7040,
      "training_step_time": 0.40313720703125
    },
    {
      "epoch": 4.2974853515625e-05,
      "model_forward_time": 0.11587715148925781,
      "step": 7041
    },
    {
      "epoch": 4.2974853515625e-05,
      "step": 7041,
      "training_step_time": 0.39973974227905273
    },
    {
      "epoch": 4.298095703125e-05,
      "model_forward_time": 0.11562919616699219,
      "step": 7042
    },
    {
      "epoch": 4.298095703125e-05,
      "step": 7042,
      "training_step_time": 0.46783876419067383
    },
    {
      "epoch": 4.2987060546875e-05,
      "model_forward_time": 0.11520576477050781,
      "step": 7043
    },
    {
      "epoch": 4.2987060546875e-05,
      "step": 7043,
      "training_step_time": 0.5143241882324219
    },
    {
      "epoch": 4.29931640625e-05,
      "model_forward_time": 0.11530208587646484,
      "step": 7044
    },
    {
      "epoch": 4.29931640625e-05,
      "step": 7044,
      "training_step_time": 0.4355313777923584
    },
    {
      "epoch": 4.2999267578125e-05,
      "model_forward_time": 0.11505770683288574,
      "step": 7045
    },
    {
      "epoch": 4.2999267578125e-05,
      "step": 7045,
      "training_step_time": 0.4283766746520996
    },
    {
      "epoch": 4.300537109375e-05,
      "model_forward_time": 0.11487102508544922,
      "step": 7046
    },
    {
      "epoch": 4.300537109375e-05,
      "step": 7046,
      "training_step_time": 0.39158177375793457
    },
    {
      "epoch": 4.3011474609375e-05,
      "model_forward_time": 0.11520266532897949,
      "step": 7047
    },
    {
      "epoch": 4.3011474609375e-05,
      "step": 7047,
      "training_step_time": 0.44811320304870605
    },
    {
      "epoch": 4.3017578125e-05,
      "model_forward_time": 0.11480283737182617,
      "step": 7048
    },
    {
      "epoch": 4.3017578125e-05,
      "step": 7048,
      "training_step_time": 0.38622283935546875
    },
    {
      "epoch": 4.3023681640625e-05,
      "model_forward_time": 0.11509966850280762,
      "step": 7049
    },
    {
      "epoch": 4.3023681640625e-05,
      "step": 7049,
      "training_step_time": 0.42121195793151855
    },
    {
      "epoch": 4.302978515625e-05,
      "grad_norm": 0.27561700344085693,
      "learning_rate": 9.8759502024387e-05,
      "loss": 0.0808,
      "step": 7050
    },
    {
      "epoch": 4.302978515625e-05,
      "model_forward_time": 0.11524677276611328,
      "step": 7050
    },
    {
      "epoch": 4.302978515625e-05,
      "step": 7050,
      "training_step_time": 0.3939073085784912
    },
    {
      "epoch": 4.3035888671875e-05,
      "model_forward_time": 0.11494135856628418,
      "step": 7051
    },
    {
      "epoch": 4.3035888671875e-05,
      "step": 7051,
      "training_step_time": 0.3943138122558594
    },
    {
      "epoch": 4.30419921875e-05,
      "model_forward_time": 0.11537361145019531,
      "step": 7052
    },
    {
      "epoch": 4.30419921875e-05,
      "step": 7052,
      "training_step_time": 0.3936502933502197
    },
    {
      "epoch": 4.3048095703125e-05,
      "model_forward_time": 0.11489057540893555,
      "step": 7053
    },
    {
      "epoch": 4.3048095703125e-05,
      "step": 7053,
      "training_step_time": 0.4018070697784424
    },
    {
      "epoch": 4.305419921875e-05,
      "model_forward_time": 0.11497378349304199,
      "step": 7054
    },
    {
      "epoch": 4.305419921875e-05,
      "step": 7054,
      "training_step_time": 0.40278029441833496
    },
    {
      "epoch": 4.3060302734375e-05,
      "model_forward_time": 0.11508035659790039,
      "step": 7055
    },
    {
      "epoch": 4.3060302734375e-05,
      "step": 7055,
      "training_step_time": 0.3840065002441406
    },
    {
      "epoch": 4.306640625e-05,
      "model_forward_time": 0.11548304557800293,
      "step": 7056
    },
    {
      "epoch": 4.306640625e-05,
      "step": 7056,
      "training_step_time": 0.45664095878601074
    },
    {
      "epoch": 4.3072509765625e-05,
      "model_forward_time": 0.11515617370605469,
      "step": 7057
    },
    {
      "epoch": 4.3072509765625e-05,
      "step": 7057,
      "training_step_time": 0.41420912742614746
    },
    {
      "epoch": 4.307861328125e-05,
      "model_forward_time": 0.11515212059020996,
      "step": 7058
    },
    {
      "epoch": 4.307861328125e-05,
      "step": 7058,
      "training_step_time": 0.4841806888580322
    },
    {
      "epoch": 4.3084716796875e-05,
      "model_forward_time": 0.11597156524658203,
      "step": 7059
    },
    {
      "epoch": 4.3084716796875e-05,
      "step": 7059,
      "training_step_time": 0.5052368640899658
    },
    {
      "epoch": 4.30908203125e-05,
      "grad_norm": 0.28276750445365906,
      "learning_rate": 9.875339416005202e-05,
      "loss": 0.0702,
      "step": 7060
    },
    {
      "epoch": 4.30908203125e-05,
      "model_forward_time": 0.11461448669433594,
      "step": 7060
    },
    {
      "epoch": 4.30908203125e-05,
      "step": 7060,
      "training_step_time": 0.4505457878112793
    },
    {
      "epoch": 4.3096923828125e-05,
      "model_forward_time": 0.11531209945678711,
      "step": 7061
    },
    {
      "epoch": 4.3096923828125e-05,
      "step": 7061,
      "training_step_time": 0.39846062660217285
    },
    {
      "epoch": 4.310302734375e-05,
      "model_forward_time": 0.11479306221008301,
      "step": 7062
    },
    {
      "epoch": 4.310302734375e-05,
      "step": 7062,
      "training_step_time": 0.3906874656677246
    },
    {
      "epoch": 4.3109130859375e-05,
      "model_forward_time": 0.11476969718933105,
      "step": 7063
    },
    {
      "epoch": 4.3109130859375e-05,
      "step": 7063,
      "training_step_time": 0.389249324798584
    },
    {
      "epoch": 4.3115234375e-05,
      "model_forward_time": 0.11522841453552246,
      "step": 7064
    },
    {
      "epoch": 4.3115234375e-05,
      "step": 7064,
      "training_step_time": 0.3837850093841553
    },
    {
      "epoch": 4.3121337890625e-05,
      "model_forward_time": 0.11513471603393555,
      "step": 7065
    },
    {
      "epoch": 4.3121337890625e-05,
      "step": 7065,
      "training_step_time": 1.2139549255371094
    },
    {
      "epoch": 4.312744140625e-05,
      "model_forward_time": 0.11405658721923828,
      "step": 7066
    },
    {
      "epoch": 4.312744140625e-05,
      "step": 7066,
      "training_step_time": 0.3813040256500244
    },
    {
      "epoch": 4.3133544921875e-05,
      "model_forward_time": 0.11401820182800293,
      "step": 7067
    },
    {
      "epoch": 4.3133544921875e-05,
      "step": 7067,
      "training_step_time": 0.3893458843231201
    },
    {
      "epoch": 4.31396484375e-05,
      "model_forward_time": 0.11330652236938477,
      "step": 7068
    },
    {
      "epoch": 4.31396484375e-05,
      "step": 7068,
      "training_step_time": 0.3866918087005615
    },
    {
      "epoch": 4.3145751953125e-05,
      "model_forward_time": 0.11386370658874512,
      "step": 7069
    },
    {
      "epoch": 4.3145751953125e-05,
      "step": 7069,
      "training_step_time": 0.39736008644104004
    },
    {
      "epoch": 4.315185546875e-05,
      "grad_norm": 0.20545582473278046,
      "learning_rate": 9.874727148572315e-05,
      "loss": 0.0756,
      "step": 7070
    },
    {
      "epoch": 4.315185546875e-05,
      "model_forward_time": 0.1153404712677002,
      "step": 7070
    },
    {
      "epoch": 4.315185546875e-05,
      "step": 7070,
      "training_step_time": 0.42199063301086426
    },
    {
      "epoch": 4.3157958984375e-05,
      "model_forward_time": 0.11529231071472168,
      "step": 7071
    },
    {
      "epoch": 4.3157958984375e-05,
      "step": 7071,
      "training_step_time": 0.5649263858795166
    },
    {
      "epoch": 4.31640625e-05,
      "model_forward_time": 0.11501812934875488,
      "step": 7072
    },
    {
      "epoch": 4.31640625e-05,
      "step": 7072,
      "training_step_time": 0.39391469955444336
    },
    {
      "epoch": 4.3170166015625e-05,
      "model_forward_time": 0.11473298072814941,
      "step": 7073
    },
    {
      "epoch": 4.3170166015625e-05,
      "step": 7073,
      "training_step_time": 0.4503445625305176
    },
    {
      "epoch": 4.317626953125e-05,
      "model_forward_time": 0.1145164966583252,
      "step": 7074
    },
    {
      "epoch": 4.317626953125e-05,
      "step": 7074,
      "training_step_time": 0.3859274387359619
    },
    {
      "epoch": 4.3182373046875e-05,
      "model_forward_time": 0.11524462699890137,
      "step": 7075
    },
    {
      "epoch": 4.3182373046875e-05,
      "step": 7075,
      "training_step_time": 0.3865659236907959
    },
    {
      "epoch": 4.31884765625e-05,
      "model_forward_time": 0.11528325080871582,
      "step": 7076
    },
    {
      "epoch": 4.31884765625e-05,
      "step": 7076,
      "training_step_time": 0.40955114364624023
    },
    {
      "epoch": 4.3194580078125e-05,
      "model_forward_time": 0.11460518836975098,
      "step": 7077
    },
    {
      "epoch": 4.3194580078125e-05,
      "step": 7077,
      "training_step_time": 0.9024257659912109
    },
    {
      "epoch": 4.320068359375e-05,
      "model_forward_time": 0.11448025703430176,
      "step": 7078
    },
    {
      "epoch": 4.320068359375e-05,
      "step": 7078,
      "training_step_time": 0.38158512115478516
    },
    {
      "epoch": 4.3206787109375e-05,
      "model_forward_time": 0.11430811882019043,
      "step": 7079
    },
    {
      "epoch": 4.3206787109375e-05,
      "step": 7079,
      "training_step_time": 0.3872208595275879
    },
    {
      "epoch": 4.3212890625e-05,
      "grad_norm": 0.229121595621109,
      "learning_rate": 9.87411340032603e-05,
      "loss": 0.0679,
      "step": 7080
    },
    {
      "epoch": 4.3212890625e-05,
      "model_forward_time": 0.11392068862915039,
      "step": 7080
    },
    {
      "epoch": 4.3212890625e-05,
      "step": 7080,
      "training_step_time": 0.38179826736450195
    },
    {
      "epoch": 4.3218994140625e-05,
      "model_forward_time": 0.1142268180847168,
      "step": 7081
    },
    {
      "epoch": 4.3218994140625e-05,
      "step": 7081,
      "training_step_time": 0.38652729988098145
    },
    {
      "epoch": 4.322509765625e-05,
      "model_forward_time": 0.11380124092102051,
      "step": 7082
    },
    {
      "epoch": 4.322509765625e-05,
      "step": 7082,
      "training_step_time": 0.3823373317718506
    },
    {
      "epoch": 4.3231201171875e-05,
      "model_forward_time": 0.11557555198669434,
      "step": 7083
    },
    {
      "epoch": 4.3231201171875e-05,
      "step": 7083,
      "training_step_time": 0.667837381362915
    },
    {
      "epoch": 4.32373046875e-05,
      "model_forward_time": 0.11441898345947266,
      "step": 7084
    },
    {
      "epoch": 4.32373046875e-05,
      "step": 7084,
      "training_step_time": 0.5010812282562256
    },
    {
      "epoch": 4.3243408203125e-05,
      "model_forward_time": 0.11484551429748535,
      "step": 7085
    },
    {
      "epoch": 4.3243408203125e-05,
      "step": 7085,
      "training_step_time": 0.40898585319519043
    },
    {
      "epoch": 4.324951171875e-05,
      "model_forward_time": 0.11410188674926758,
      "step": 7086
    },
    {
      "epoch": 4.324951171875e-05,
      "step": 7086,
      "training_step_time": 0.47583794593811035
    },
    {
      "epoch": 4.3255615234375e-05,
      "model_forward_time": 0.1136777400970459,
      "step": 7087
    },
    {
      "epoch": 4.3255615234375e-05,
      "step": 7087,
      "training_step_time": 0.3980879783630371
    },
    {
      "epoch": 4.326171875e-05,
      "model_forward_time": 0.11450052261352539,
      "step": 7088
    },
    {
      "epoch": 4.326171875e-05,
      "step": 7088,
      "training_step_time": 0.39293861389160156
    },
    {
      "epoch": 4.3267822265625e-05,
      "model_forward_time": 0.11711740493774414,
      "step": 7089
    },
    {
      "epoch": 4.3267822265625e-05,
      "step": 7089,
      "training_step_time": 0.7455556392669678
    },
    {
      "epoch": 4.327392578125e-05,
      "grad_norm": 0.1392516791820526,
      "learning_rate": 9.873498171452789e-05,
      "loss": 0.0696,
      "step": 7090
    },
    {
      "epoch": 4.327392578125e-05,
      "model_forward_time": 0.11448025703430176,
      "step": 7090
    },
    {
      "epoch": 4.327392578125e-05,
      "step": 7090,
      "training_step_time": 0.3841850757598877
    },
    {
      "epoch": 4.3280029296875e-05,
      "model_forward_time": 0.11433219909667969,
      "step": 7091
    },
    {
      "epoch": 4.3280029296875e-05,
      "step": 7091,
      "training_step_time": 0.39414119720458984
    },
    {
      "epoch": 4.32861328125e-05,
      "model_forward_time": 0.11444282531738281,
      "step": 7092
    },
    {
      "epoch": 4.32861328125e-05,
      "step": 7092,
      "training_step_time": 0.3861966133117676
    },
    {
      "epoch": 4.3292236328125e-05,
      "model_forward_time": 0.11444926261901855,
      "step": 7093
    },
    {
      "epoch": 4.3292236328125e-05,
      "step": 7093,
      "training_step_time": 0.3915586471557617
    },
    {
      "epoch": 4.329833984375e-05,
      "model_forward_time": 0.1143486499786377,
      "step": 7094
    },
    {
      "epoch": 4.329833984375e-05,
      "step": 7094,
      "training_step_time": 0.39405298233032227
    },
    {
      "epoch": 4.3304443359375e-05,
      "model_forward_time": 0.11514925956726074,
      "step": 7095
    },
    {
      "epoch": 4.3304443359375e-05,
      "step": 7095,
      "training_step_time": 0.8927669525146484
    },
    {
      "epoch": 4.3310546875e-05,
      "model_forward_time": 0.11430168151855469,
      "step": 7096
    },
    {
      "epoch": 4.3310546875e-05,
      "step": 7096,
      "training_step_time": 0.3862638473510742
    },
    {
      "epoch": 4.3316650390625e-05,
      "model_forward_time": 0.1144261360168457,
      "step": 7097
    },
    {
      "epoch": 4.3316650390625e-05,
      "step": 7097,
      "training_step_time": 0.40982818603515625
    },
    {
      "epoch": 4.332275390625e-05,
      "model_forward_time": 0.1149747371673584,
      "step": 7098
    },
    {
      "epoch": 4.332275390625e-05,
      "step": 7098,
      "training_step_time": 0.47756290435791016
    },
    {
      "epoch": 4.3328857421875e-05,
      "model_forward_time": 0.1140282154083252,
      "step": 7099
    },
    {
      "epoch": 4.3328857421875e-05,
      "step": 7099,
      "training_step_time": 0.41507816314697266
    },
    {
      "epoch": 4.33349609375e-05,
      "grad_norm": 0.2934490442276001,
      "learning_rate": 9.872881462139479e-05,
      "loss": 0.0731,
      "step": 7100
    },
    {
      "epoch": 4.33349609375e-05,
      "model_forward_time": 0.11392998695373535,
      "step": 7100
    },
    {
      "epoch": 4.33349609375e-05,
      "step": 7100,
      "training_step_time": 0.38861894607543945
    },
    {
      "epoch": 4.3341064453125e-05,
      "model_forward_time": 0.11494040489196777,
      "step": 7101
    },
    {
      "epoch": 4.3341064453125e-05,
      "step": 7101,
      "training_step_time": 0.8289699554443359
    },
    {
      "epoch": 4.334716796875e-05,
      "model_forward_time": 0.11425042152404785,
      "step": 7102
    },
    {
      "epoch": 4.334716796875e-05,
      "step": 7102,
      "training_step_time": 0.3714463710784912
    },
    {
      "epoch": 4.3353271484375e-05,
      "model_forward_time": 0.11399722099304199,
      "step": 7103
    },
    {
      "epoch": 4.3353271484375e-05,
      "step": 7103,
      "training_step_time": 0.38494062423706055
    },
    {
      "epoch": 4.3359375e-05,
      "model_forward_time": 0.11391377449035645,
      "step": 7104
    },
    {
      "epoch": 4.3359375e-05,
      "step": 7104,
      "training_step_time": 0.3966646194458008
    },
    {
      "epoch": 4.3365478515625e-05,
      "model_forward_time": 0.11410140991210938,
      "step": 7105
    },
    {
      "epoch": 4.3365478515625e-05,
      "step": 7105,
      "training_step_time": 0.3873724937438965
    },
    {
      "epoch": 4.337158203125e-05,
      "model_forward_time": 0.11422181129455566,
      "step": 7106
    },
    {
      "epoch": 4.337158203125e-05,
      "step": 7106,
      "training_step_time": 0.3988969326019287
    },
    {
      "epoch": 4.3377685546875e-05,
      "model_forward_time": 0.1158442497253418,
      "step": 7107
    },
    {
      "epoch": 4.3377685546875e-05,
      "step": 7107,
      "training_step_time": 0.9444494247436523
    },
    {
      "epoch": 4.33837890625e-05,
      "model_forward_time": 0.1143491268157959,
      "step": 7108
    },
    {
      "epoch": 4.33837890625e-05,
      "step": 7108,
      "training_step_time": 0.36906957626342773
    },
    {
      "epoch": 4.3389892578125e-05,
      "model_forward_time": 0.11428046226501465,
      "step": 7109
    },
    {
      "epoch": 4.3389892578125e-05,
      "step": 7109,
      "training_step_time": 0.3904101848602295
    },
    {
      "epoch": 4.339599609375e-05,
      "grad_norm": 0.18469594419002533,
      "learning_rate": 9.872263272573443e-05,
      "loss": 0.0734,
      "step": 7110
    },
    {
      "epoch": 4.339599609375e-05,
      "model_forward_time": 0.11411619186401367,
      "step": 7110
    },
    {
      "epoch": 4.339599609375e-05,
      "step": 7110,
      "training_step_time": 0.4375340938568115
    },
    {
      "epoch": 4.3402099609375e-05,
      "model_forward_time": 0.11472773551940918,
      "step": 7111
    },
    {
      "epoch": 4.3402099609375e-05,
      "step": 7111,
      "training_step_time": 0.42187047004699707
    },
    {
      "epoch": 4.3408203125e-05,
      "model_forward_time": 0.11442708969116211,
      "step": 7112
    },
    {
      "epoch": 4.3408203125e-05,
      "step": 7112,
      "training_step_time": 0.4197063446044922
    },
    {
      "epoch": 4.3414306640625e-05,
      "model_forward_time": 0.1150052547454834,
      "step": 7113
    },
    {
      "epoch": 4.3414306640625e-05,
      "step": 7113,
      "training_step_time": 0.6723897457122803
    },
    {
      "epoch": 4.342041015625e-05,
      "model_forward_time": 0.1143648624420166,
      "step": 7114
    },
    {
      "epoch": 4.342041015625e-05,
      "step": 7114,
      "training_step_time": 0.37857627868652344
    },
    {
      "epoch": 4.3426513671875e-05,
      "model_forward_time": 0.11424779891967773,
      "step": 7115
    },
    {
      "epoch": 4.3426513671875e-05,
      "step": 7115,
      "training_step_time": 0.3938121795654297
    },
    {
      "epoch": 4.34326171875e-05,
      "model_forward_time": 0.11511540412902832,
      "step": 7116
    },
    {
      "epoch": 4.34326171875e-05,
      "step": 7116,
      "training_step_time": 0.3887028694152832
    },
    {
      "epoch": 4.3438720703125e-05,
      "model_forward_time": 0.11417508125305176,
      "step": 7117
    },
    {
      "epoch": 4.3438720703125e-05,
      "step": 7117,
      "training_step_time": 0.3921699523925781
    },
    {
      "epoch": 4.344482421875e-05,
      "model_forward_time": 0.11475467681884766,
      "step": 7118
    },
    {
      "epoch": 4.344482421875e-05,
      "step": 7118,
      "training_step_time": 0.4060201644897461
    },
    {
      "epoch": 4.3450927734375e-05,
      "model_forward_time": 0.11486697196960449,
      "step": 7119
    },
    {
      "epoch": 4.3450927734375e-05,
      "step": 7119,
      "training_step_time": 0.9492006301879883
    },
    {
      "epoch": 4.345703125e-05,
      "grad_norm": 0.2868138551712036,
      "learning_rate": 9.871643602942469e-05,
      "loss": 0.0722,
      "step": 7120
    },
    {
      "epoch": 4.345703125e-05,
      "model_forward_time": 0.11361026763916016,
      "step": 7120
    },
    {
      "epoch": 4.345703125e-05,
      "step": 7120,
      "training_step_time": 0.3845503330230713
    },
    {
      "epoch": 4.3463134765625e-05,
      "model_forward_time": 0.11406373977661133,
      "step": 7121
    },
    {
      "epoch": 4.3463134765625e-05,
      "step": 7121,
      "training_step_time": 0.39107298851013184
    },
    {
      "epoch": 4.346923828125e-05,
      "model_forward_time": 0.11400127410888672,
      "step": 7122
    },
    {
      "epoch": 4.346923828125e-05,
      "step": 7122,
      "training_step_time": 0.38780975341796875
    },
    {
      "epoch": 4.3475341796875e-05,
      "model_forward_time": 0.11461520195007324,
      "step": 7123
    },
    {
      "epoch": 4.3475341796875e-05,
      "step": 7123,
      "training_step_time": 0.42769527435302734
    },
    {
      "epoch": 4.34814453125e-05,
      "model_forward_time": 0.11478519439697266,
      "step": 7124
    },
    {
      "epoch": 4.34814453125e-05,
      "step": 7124,
      "training_step_time": 0.4484677314758301
    },
    {
      "epoch": 4.3487548828125e-05,
      "model_forward_time": 0.11500382423400879,
      "step": 7125
    },
    {
      "epoch": 4.3487548828125e-05,
      "step": 7125,
      "training_step_time": 0.8755276203155518
    },
    {
      "epoch": 4.349365234375e-05,
      "model_forward_time": 0.11389493942260742,
      "step": 7126
    },
    {
      "epoch": 4.349365234375e-05,
      "step": 7126,
      "training_step_time": 0.37404608726501465
    },
    {
      "epoch": 4.3499755859375e-05,
      "model_forward_time": 0.1142275333404541,
      "step": 7127
    },
    {
      "epoch": 4.3499755859375e-05,
      "step": 7127,
      "training_step_time": 0.39301466941833496
    },
    {
      "epoch": 4.3505859375e-05,
      "model_forward_time": 0.1141808032989502,
      "step": 7128
    },
    {
      "epoch": 4.3505859375e-05,
      "step": 7128,
      "training_step_time": 0.39119696617126465
    },
    {
      "epoch": 4.3511962890625e-05,
      "model_forward_time": 0.1139688491821289,
      "step": 7129
    },
    {
      "epoch": 4.3511962890625e-05,
      "step": 7129,
      "training_step_time": 0.39042115211486816
    },
    {
      "epoch": 4.351806640625e-05,
      "grad_norm": 0.21361446380615234,
      "learning_rate": 9.871022453434798e-05,
      "loss": 0.0675,
      "step": 7130
    },
    {
      "epoch": 4.351806640625e-05,
      "model_forward_time": 0.11435985565185547,
      "step": 7130
    },
    {
      "epoch": 4.351806640625e-05,
      "step": 7130,
      "training_step_time": 0.3859102725982666
    },
    {
      "epoch": 4.3524169921875e-05,
      "model_forward_time": 0.11479377746582031,
      "step": 7131
    },
    {
      "epoch": 4.3524169921875e-05,
      "step": 7131,
      "training_step_time": 0.8984386920928955
    },
    {
      "epoch": 4.35302734375e-05,
      "model_forward_time": 0.1141500473022461,
      "step": 7132
    },
    {
      "epoch": 4.35302734375e-05,
      "step": 7132,
      "training_step_time": 0.3845407962799072
    },
    {
      "epoch": 4.3536376953125e-05,
      "model_forward_time": 0.11386561393737793,
      "step": 7133
    },
    {
      "epoch": 4.3536376953125e-05,
      "step": 7133,
      "training_step_time": 0.3814222812652588
    },
    {
      "epoch": 4.354248046875e-05,
      "model_forward_time": 0.1143805980682373,
      "step": 7134
    },
    {
      "epoch": 4.354248046875e-05,
      "step": 7134,
      "training_step_time": 0.3949711322784424
    },
    {
      "epoch": 4.3548583984375e-05,
      "model_forward_time": 0.11599349975585938,
      "step": 7135
    },
    {
      "epoch": 4.3548583984375e-05,
      "step": 7135,
      "training_step_time": 0.391632080078125
    },
    {
      "epoch": 4.35546875e-05,
      "model_forward_time": 0.11421489715576172,
      "step": 7136
    },
    {
      "epoch": 4.35546875e-05,
      "step": 7136,
      "training_step_time": 0.47711968421936035
    },
    {
      "epoch": 4.3560791015625e-05,
      "model_forward_time": 0.11678147315979004,
      "step": 7137
    },
    {
      "epoch": 4.3560791015625e-05,
      "step": 7137,
      "training_step_time": 0.7325947284698486
    },
    {
      "epoch": 4.356689453125e-05,
      "model_forward_time": 0.11428403854370117,
      "step": 7138
    },
    {
      "epoch": 4.356689453125e-05,
      "step": 7138,
      "training_step_time": 0.44870996475219727
    },
    {
      "epoch": 4.3572998046875e-05,
      "model_forward_time": 0.11481094360351562,
      "step": 7139
    },
    {
      "epoch": 4.3572998046875e-05,
      "step": 7139,
      "training_step_time": 0.38056230545043945
    },
    {
      "epoch": 4.35791015625e-05,
      "grad_norm": 0.27341535687446594,
      "learning_rate": 9.870399824239117e-05,
      "loss": 0.0731,
      "step": 7140
    },
    {
      "epoch": 4.35791015625e-05,
      "model_forward_time": 0.11430668830871582,
      "step": 7140
    },
    {
      "epoch": 4.35791015625e-05,
      "step": 7140,
      "training_step_time": 0.39251065254211426
    },
    {
      "epoch": 4.3585205078125e-05,
      "model_forward_time": 0.11401677131652832,
      "step": 7141
    },
    {
      "epoch": 4.3585205078125e-05,
      "step": 7141,
      "training_step_time": 0.3898165225982666
    },
    {
      "epoch": 4.359130859375e-05,
      "model_forward_time": 0.11490488052368164,
      "step": 7142
    },
    {
      "epoch": 4.359130859375e-05,
      "step": 7142,
      "training_step_time": 0.394122838973999
    },
    {
      "epoch": 4.3597412109375e-05,
      "model_forward_time": 0.11533832550048828,
      "step": 7143
    },
    {
      "epoch": 4.3597412109375e-05,
      "step": 7143,
      "training_step_time": 1.0409626960754395
    },
    {
      "epoch": 4.3603515625e-05,
      "model_forward_time": 0.11423349380493164,
      "step": 7144
    },
    {
      "epoch": 4.3603515625e-05,
      "step": 7144,
      "training_step_time": 0.3829677104949951
    },
    {
      "epoch": 4.3609619140625e-05,
      "model_forward_time": 0.11461758613586426,
      "step": 7145
    },
    {
      "epoch": 4.3609619140625e-05,
      "step": 7145,
      "training_step_time": 0.3805215358734131
    },
    {
      "epoch": 4.361572265625e-05,
      "model_forward_time": 0.113739013671875,
      "step": 7146
    },
    {
      "epoch": 4.361572265625e-05,
      "step": 7146,
      "training_step_time": 0.39362192153930664
    },
    {
      "epoch": 4.3621826171875e-05,
      "model_forward_time": 0.1144096851348877,
      "step": 7147
    },
    {
      "epoch": 4.3621826171875e-05,
      "step": 7147,
      "training_step_time": 0.3940894603729248
    },
    {
      "epoch": 4.36279296875e-05,
      "model_forward_time": 0.11472868919372559,
      "step": 7148
    },
    {
      "epoch": 4.36279296875e-05,
      "step": 7148,
      "training_step_time": 0.4697844982147217
    },
    {
      "epoch": 4.3634033203125e-05,
      "model_forward_time": 0.11542510986328125,
      "step": 7149
    },
    {
      "epoch": 4.3634033203125e-05,
      "step": 7149,
      "training_step_time": 0.6552770137786865
    },
    {
      "epoch": 4.364013671875e-05,
      "grad_norm": 0.2570369243621826,
      "learning_rate": 9.869775715544562e-05,
      "loss": 0.0745,
      "step": 7150
    },
    {
      "epoch": 4.364013671875e-05,
      "model_forward_time": 0.11478090286254883,
      "step": 7150
    },
    {
      "epoch": 4.364013671875e-05,
      "step": 7150,
      "training_step_time": 0.4331505298614502
    },
    {
      "epoch": 4.3646240234375e-05,
      "model_forward_time": 0.11451506614685059,
      "step": 7151
    },
    {
      "epoch": 4.3646240234375e-05,
      "step": 7151,
      "training_step_time": 0.47214722633361816
    },
    {
      "epoch": 4.365234375e-05,
      "model_forward_time": 0.11437773704528809,
      "step": 7152
    },
    {
      "epoch": 4.365234375e-05,
      "step": 7152,
      "training_step_time": 0.40067195892333984
    },
    {
      "epoch": 4.3658447265625e-05,
      "model_forward_time": 0.11463451385498047,
      "step": 7153
    },
    {
      "epoch": 4.3658447265625e-05,
      "step": 7153,
      "training_step_time": 0.38204312324523926
    },
    {
      "epoch": 4.366455078125e-05,
      "model_forward_time": 0.11458301544189453,
      "step": 7154
    },
    {
      "epoch": 4.366455078125e-05,
      "step": 7154,
      "training_step_time": 0.39054155349731445
    },
    {
      "epoch": 4.3670654296875e-05,
      "model_forward_time": 0.11473870277404785,
      "step": 7155
    },
    {
      "epoch": 4.3670654296875e-05,
      "step": 7155,
      "training_step_time": 0.7598781585693359
    },
    {
      "epoch": 4.36767578125e-05,
      "model_forward_time": 0.11429023742675781,
      "step": 7156
    },
    {
      "epoch": 4.36767578125e-05,
      "step": 7156,
      "training_step_time": 0.3858020305633545
    },
    {
      "epoch": 4.3682861328125e-05,
      "model_forward_time": 0.11544561386108398,
      "step": 7157
    },
    {
      "epoch": 4.3682861328125e-05,
      "step": 7157,
      "training_step_time": 0.4009740352630615
    },
    {
      "epoch": 4.368896484375e-05,
      "model_forward_time": 0.1145625114440918,
      "step": 7158
    },
    {
      "epoch": 4.368896484375e-05,
      "step": 7158,
      "training_step_time": 0.39395594596862793
    },
    {
      "epoch": 4.3695068359375e-05,
      "model_forward_time": 0.11368441581726074,
      "step": 7159
    },
    {
      "epoch": 4.3695068359375e-05,
      "step": 7159,
      "training_step_time": 0.3907203674316406
    },
    {
      "epoch": 4.3701171875e-05,
      "grad_norm": 0.2273266315460205,
      "learning_rate": 9.869150127540727e-05,
      "loss": 0.0744,
      "step": 7160
    },
    {
      "epoch": 4.3701171875e-05,
      "model_forward_time": 0.11461424827575684,
      "step": 7160
    },
    {
      "epoch": 4.3701171875e-05,
      "step": 7160,
      "training_step_time": 0.40871214866638184
    },
    {
      "epoch": 4.3707275390625e-05,
      "model_forward_time": 0.11458277702331543,
      "step": 7161
    },
    {
      "epoch": 4.3707275390625e-05,
      "step": 7161,
      "training_step_time": 0.5922696590423584
    },
    {
      "epoch": 4.371337890625e-05,
      "model_forward_time": 0.11428642272949219,
      "step": 7162
    },
    {
      "epoch": 4.371337890625e-05,
      "step": 7162,
      "training_step_time": 0.45217061042785645
    },
    {
      "epoch": 4.3719482421875e-05,
      "model_forward_time": 0.11479520797729492,
      "step": 7163
    },
    {
      "epoch": 4.3719482421875e-05,
      "step": 7163,
      "training_step_time": 0.4753270149230957
    },
    {
      "epoch": 4.37255859375e-05,
      "model_forward_time": 0.11464428901672363,
      "step": 7164
    },
    {
      "epoch": 4.37255859375e-05,
      "step": 7164,
      "training_step_time": 0.46712565422058105
    },
    {
      "epoch": 4.3731689453125e-05,
      "model_forward_time": 0.11407041549682617,
      "step": 7165
    },
    {
      "epoch": 4.3731689453125e-05,
      "step": 7165,
      "training_step_time": 0.4771718978881836
    },
    {
      "epoch": 4.373779296875e-05,
      "model_forward_time": 0.11409807205200195,
      "step": 7166
    },
    {
      "epoch": 4.373779296875e-05,
      "step": 7166,
      "training_step_time": 0.38736844062805176
    },
    {
      "epoch": 4.3743896484375e-05,
      "model_forward_time": 0.11484694480895996,
      "step": 7167
    },
    {
      "epoch": 4.3743896484375e-05,
      "step": 7167,
      "training_step_time": 0.4063565731048584
    },
    {
      "epoch": 4.375e-05,
      "model_forward_time": 0.11510872840881348,
      "step": 7168
    },
    {
      "epoch": 4.375e-05,
      "step": 7168,
      "training_step_time": 0.3857409954071045
    },
    {
      "epoch": 4.3756103515625e-05,
      "model_forward_time": 0.11512327194213867,
      "step": 7169
    },
    {
      "epoch": 4.3756103515625e-05,
      "step": 7169,
      "training_step_time": 0.3975663185119629
    },
    {
      "epoch": 4.376220703125e-05,
      "grad_norm": 0.27688005566596985,
      "learning_rate": 9.868523060417646e-05,
      "loss": 0.0769,
      "step": 7170
    },
    {
      "epoch": 4.376220703125e-05,
      "model_forward_time": 0.11464452743530273,
      "step": 7170
    },
    {
      "epoch": 4.376220703125e-05,
      "step": 7170,
      "training_step_time": 0.39196205139160156
    },
    {
      "epoch": 4.3768310546875e-05,
      "model_forward_time": 0.1149132251739502,
      "step": 7171
    },
    {
      "epoch": 4.3768310546875e-05,
      "step": 7171,
      "training_step_time": 0.39555978775024414
    },
    {
      "epoch": 4.37744140625e-05,
      "model_forward_time": 0.11528563499450684,
      "step": 7172
    },
    {
      "epoch": 4.37744140625e-05,
      "step": 7172,
      "training_step_time": 0.39545392990112305
    },
    {
      "epoch": 4.3780517578125e-05,
      "model_forward_time": 0.11513376235961914,
      "step": 7173
    },
    {
      "epoch": 4.3780517578125e-05,
      "step": 7173,
      "training_step_time": 0.930232048034668
    },
    {
      "epoch": 4.378662109375e-05,
      "model_forward_time": 0.11447453498840332,
      "step": 7174
    },
    {
      "epoch": 4.378662109375e-05,
      "step": 7174,
      "training_step_time": 0.37693357467651367
    },
    {
      "epoch": 4.3792724609375e-05,
      "model_forward_time": 0.11466574668884277,
      "step": 7175
    },
    {
      "epoch": 4.3792724609375e-05,
      "step": 7175,
      "training_step_time": 0.4446566104888916
    },
    {
      "epoch": 4.3798828125e-05,
      "model_forward_time": 0.11449360847473145,
      "step": 7176
    },
    {
      "epoch": 4.3798828125e-05,
      "step": 7176,
      "training_step_time": 0.3905055522918701
    },
    {
      "epoch": 4.3804931640625e-05,
      "model_forward_time": 0.11451196670532227,
      "step": 7177
    },
    {
      "epoch": 4.3804931640625e-05,
      "step": 7177,
      "training_step_time": 0.45825934410095215
    },
    {
      "epoch": 4.381103515625e-05,
      "model_forward_time": 0.11527109146118164,
      "step": 7178
    },
    {
      "epoch": 4.381103515625e-05,
      "step": 7178,
      "training_step_time": 0.4500296115875244
    },
    {
      "epoch": 4.3817138671875e-05,
      "model_forward_time": 0.11487984657287598,
      "step": 7179
    },
    {
      "epoch": 4.3817138671875e-05,
      "step": 7179,
      "training_step_time": 0.5918638706207275
    },
    {
      "epoch": 4.38232421875e-05,
      "grad_norm": 0.2177583873271942,
      "learning_rate": 9.867894514365802e-05,
      "loss": 0.0693,
      "step": 7180
    },
    {
      "epoch": 4.38232421875e-05,
      "model_forward_time": 0.11402297019958496,
      "step": 7180
    },
    {
      "epoch": 4.38232421875e-05,
      "step": 7180,
      "training_step_time": 0.3784472942352295
    },
    {
      "epoch": 4.3829345703125e-05,
      "model_forward_time": 0.11502933502197266,
      "step": 7181
    },
    {
      "epoch": 4.3829345703125e-05,
      "step": 7181,
      "training_step_time": 0.39751696586608887
    },
    {
      "epoch": 4.383544921875e-05,
      "model_forward_time": 0.11433839797973633,
      "step": 7182
    },
    {
      "epoch": 4.383544921875e-05,
      "step": 7182,
      "training_step_time": 0.40323519706726074
    },
    {
      "epoch": 4.3841552734375e-05,
      "model_forward_time": 0.1147317886352539,
      "step": 7183
    },
    {
      "epoch": 4.3841552734375e-05,
      "step": 7183,
      "training_step_time": 0.38762640953063965
    },
    {
      "epoch": 4.384765625e-05,
      "model_forward_time": 0.1147146224975586,
      "step": 7184
    },
    {
      "epoch": 4.384765625e-05,
      "step": 7184,
      "training_step_time": 0.387988805770874
    },
    {
      "epoch": 4.3853759765625e-05,
      "model_forward_time": 0.11508727073669434,
      "step": 7185
    },
    {
      "epoch": 4.3853759765625e-05,
      "step": 7185,
      "training_step_time": 0.9672739505767822
    },
    {
      "epoch": 4.385986328125e-05,
      "model_forward_time": 0.11437296867370605,
      "step": 7186
    },
    {
      "epoch": 4.385986328125e-05,
      "step": 7186,
      "training_step_time": 0.3817431926727295
    },
    {
      "epoch": 4.3865966796875e-05,
      "model_forward_time": 0.11438632011413574,
      "step": 7187
    },
    {
      "epoch": 4.3865966796875e-05,
      "step": 7187,
      "training_step_time": 0.3935425281524658
    },
    {
      "epoch": 4.38720703125e-05,
      "model_forward_time": 0.11462903022766113,
      "step": 7188
    },
    {
      "epoch": 4.38720703125e-05,
      "step": 7188,
      "training_step_time": 0.36331963539123535
    },
    {
      "epoch": 4.3878173828125e-05,
      "model_forward_time": 0.11530280113220215,
      "step": 7189
    },
    {
      "epoch": 4.3878173828125e-05,
      "step": 7189,
      "training_step_time": 0.4078075885772705
    },
    {
      "epoch": 4.388427734375e-05,
      "grad_norm": 0.3022494912147522,
      "learning_rate": 9.867264489576135e-05,
      "loss": 0.0759,
      "step": 7190
    },
    {
      "epoch": 4.388427734375e-05,
      "model_forward_time": 0.11432743072509766,
      "step": 7190
    },
    {
      "epoch": 4.388427734375e-05,
      "step": 7190,
      "training_step_time": 0.46987318992614746
    },
    {
      "epoch": 4.3890380859375e-05,
      "model_forward_time": 0.11528539657592773,
      "step": 7191
    },
    {
      "epoch": 4.3890380859375e-05,
      "step": 7191,
      "training_step_time": 0.4520721435546875
    },
    {
      "epoch": 4.3896484375e-05,
      "model_forward_time": 0.11491227149963379,
      "step": 7192
    },
    {
      "epoch": 4.3896484375e-05,
      "step": 7192,
      "training_step_time": 0.4690413475036621
    },
    {
      "epoch": 4.3902587890625e-05,
      "model_forward_time": 0.11467504501342773,
      "step": 7193
    },
    {
      "epoch": 4.3902587890625e-05,
      "step": 7193,
      "training_step_time": 0.38942694664001465
    },
    {
      "epoch": 4.390869140625e-05,
      "model_forward_time": 0.11418986320495605,
      "step": 7194
    },
    {
      "epoch": 4.390869140625e-05,
      "step": 7194,
      "training_step_time": 0.42082881927490234
    },
    {
      "epoch": 4.3914794921875e-05,
      "model_forward_time": 0.11461353302001953,
      "step": 7195
    },
    {
      "epoch": 4.3914794921875e-05,
      "step": 7195,
      "training_step_time": 0.39388084411621094
    },
    {
      "epoch": 4.39208984375e-05,
      "model_forward_time": 0.1143958568572998,
      "step": 7196
    },
    {
      "epoch": 4.39208984375e-05,
      "step": 7196,
      "training_step_time": 0.389603853225708
    },
    {
      "epoch": 4.3927001953125e-05,
      "model_forward_time": 0.11575698852539062,
      "step": 7197
    },
    {
      "epoch": 4.3927001953125e-05,
      "step": 7197,
      "training_step_time": 0.4014599323272705
    },
    {
      "epoch": 4.393310546875e-05,
      "model_forward_time": 0.11484789848327637,
      "step": 7198
    },
    {
      "epoch": 4.393310546875e-05,
      "step": 7198,
      "training_step_time": 0.4375917911529541
    },
    {
      "epoch": 4.3939208984375e-05,
      "model_forward_time": 0.11494231224060059,
      "step": 7199
    },
    {
      "epoch": 4.3939208984375e-05,
      "step": 7199,
      "training_step_time": 0.4322381019592285
    },
    {
      "epoch": 4.39453125e-05,
      "grad_norm": 0.28015702962875366,
      "learning_rate": 9.86663298624003e-05,
      "loss": 0.0705,
      "step": 7200
    },
    {
      "epoch": 4.39453125e-05,
      "model_forward_time": 0.11592245101928711,
      "step": 7200
    },
    {
      "epoch": 4.39453125e-05,
      "step": 7200,
      "training_step_time": 0.39786601066589355
    },
    {
      "epoch": 4.3951416015625e-05,
      "model_forward_time": 0.1148066520690918,
      "step": 7201
    },
    {
      "epoch": 4.3951416015625e-05,
      "step": 7201,
      "training_step_time": 0.3921678066253662
    },
    {
      "epoch": 4.395751953125e-05,
      "model_forward_time": 0.11515498161315918,
      "step": 7202
    },
    {
      "epoch": 4.395751953125e-05,
      "step": 7202,
      "training_step_time": 0.3986678123474121
    },
    {
      "epoch": 4.3963623046875e-05,
      "model_forward_time": 0.11561059951782227,
      "step": 7203
    },
    {
      "epoch": 4.3963623046875e-05,
      "step": 7203,
      "training_step_time": 0.4911618232727051
    },
    {
      "epoch": 4.39697265625e-05,
      "model_forward_time": 0.1148233413696289,
      "step": 7204
    },
    {
      "epoch": 4.39697265625e-05,
      "step": 7204,
      "training_step_time": 0.43700575828552246
    },
    {
      "epoch": 4.3975830078125e-05,
      "model_forward_time": 0.11563444137573242,
      "step": 7205
    },
    {
      "epoch": 4.3975830078125e-05,
      "step": 7205,
      "training_step_time": 0.49911046028137207
    },
    {
      "epoch": 4.398193359375e-05,
      "model_forward_time": 0.1149289608001709,
      "step": 7206
    },
    {
      "epoch": 4.398193359375e-05,
      "step": 7206,
      "training_step_time": 0.498790979385376
    },
    {
      "epoch": 4.3988037109375e-05,
      "model_forward_time": 0.11482453346252441,
      "step": 7207
    },
    {
      "epoch": 4.3988037109375e-05,
      "step": 7207,
      "training_step_time": 0.3928663730621338
    },
    {
      "epoch": 4.3994140625e-05,
      "model_forward_time": 0.11435270309448242,
      "step": 7208
    },
    {
      "epoch": 4.3994140625e-05,
      "step": 7208,
      "training_step_time": 0.39627718925476074
    },
    {
      "epoch": 4.4000244140625e-05,
      "model_forward_time": 0.11452102661132812,
      "step": 7209
    },
    {
      "epoch": 4.4000244140625e-05,
      "step": 7209,
      "training_step_time": 0.8832578659057617
    },
    {
      "epoch": 4.400634765625e-05,
      "grad_norm": 0.265351265668869,
      "learning_rate": 9.866000004549318e-05,
      "loss": 0.0665,
      "step": 7210
    },
    {
      "epoch": 4.400634765625e-05,
      "model_forward_time": 0.1141355037689209,
      "step": 7210
    },
    {
      "epoch": 4.400634765625e-05,
      "step": 7210,
      "training_step_time": 0.38260746002197266
    },
    {
      "epoch": 4.4012451171875e-05,
      "model_forward_time": 0.11355733871459961,
      "step": 7211
    },
    {
      "epoch": 4.4012451171875e-05,
      "step": 7211,
      "training_step_time": 0.4603605270385742
    },
    {
      "epoch": 4.40185546875e-05,
      "model_forward_time": 0.11418986320495605,
      "step": 7212
    },
    {
      "epoch": 4.40185546875e-05,
      "step": 7212,
      "training_step_time": 0.4131288528442383
    },
    {
      "epoch": 4.4024658203125e-05,
      "model_forward_time": 0.11383223533630371,
      "step": 7213
    },
    {
      "epoch": 4.4024658203125e-05,
      "step": 7213,
      "training_step_time": 0.38332080841064453
    },
    {
      "epoch": 4.403076171875e-05,
      "model_forward_time": 0.11474466323852539,
      "step": 7214
    },
    {
      "epoch": 4.403076171875e-05,
      "step": 7214,
      "training_step_time": 0.3840773105621338
    },
    {
      "epoch": 4.4036865234375e-05,
      "model_forward_time": 0.11535263061523438,
      "step": 7215
    },
    {
      "epoch": 4.4036865234375e-05,
      "step": 7215,
      "training_step_time": 1.2315316200256348
    },
    {
      "epoch": 4.404296875e-05,
      "model_forward_time": 0.11410760879516602,
      "step": 7216
    },
    {
      "epoch": 4.404296875e-05,
      "step": 7216,
      "training_step_time": 0.40263891220092773
    },
    {
      "epoch": 4.4049072265625e-05,
      "model_forward_time": 0.1137239933013916,
      "step": 7217
    },
    {
      "epoch": 4.4049072265625e-05,
      "step": 7217,
      "training_step_time": 0.4480316638946533
    },
    {
      "epoch": 4.405517578125e-05,
      "model_forward_time": 0.11351394653320312,
      "step": 7218
    },
    {
      "epoch": 4.405517578125e-05,
      "step": 7218,
      "training_step_time": 0.49089980125427246
    },
    {
      "epoch": 4.4061279296875e-05,
      "model_forward_time": 0.11352229118347168,
      "step": 7219
    },
    {
      "epoch": 4.4061279296875e-05,
      "step": 7219,
      "training_step_time": 0.397106409072876
    },
    {
      "epoch": 4.40673828125e-05,
      "grad_norm": 0.2312273532152176,
      "learning_rate": 9.865365544696285e-05,
      "loss": 0.0745,
      "step": 7220
    },
    {
      "epoch": 4.40673828125e-05,
      "model_forward_time": 0.11549854278564453,
      "step": 7220
    },
    {
      "epoch": 4.40673828125e-05,
      "step": 7220,
      "training_step_time": 0.3864116668701172
    },
    {
      "epoch": 4.4073486328125e-05,
      "model_forward_time": 0.1144552230834961,
      "step": 7221
    },
    {
      "epoch": 4.4073486328125e-05,
      "step": 7221,
      "training_step_time": 0.40932250022888184
    },
    {
      "epoch": 4.407958984375e-05,
      "model_forward_time": 0.11414504051208496,
      "step": 7222
    },
    {
      "epoch": 4.407958984375e-05,
      "step": 7222,
      "training_step_time": 0.3828613758087158
    },
    {
      "epoch": 4.4085693359375e-05,
      "model_forward_time": 0.11499500274658203,
      "step": 7223
    },
    {
      "epoch": 4.4085693359375e-05,
      "step": 7223,
      "training_step_time": 0.39104533195495605
    },
    {
      "epoch": 4.4091796875e-05,
      "model_forward_time": 0.11470460891723633,
      "step": 7224
    },
    {
      "epoch": 4.4091796875e-05,
      "step": 7224,
      "training_step_time": 0.424391508102417
    },
    {
      "epoch": 4.4097900390625e-05,
      "model_forward_time": 0.115234375,
      "step": 7225
    },
    {
      "epoch": 4.4097900390625e-05,
      "step": 7225,
      "training_step_time": 0.3949723243713379
    },
    {
      "epoch": 4.410400390625e-05,
      "model_forward_time": 0.11550259590148926,
      "step": 7226
    },
    {
      "epoch": 4.410400390625e-05,
      "step": 7226,
      "training_step_time": 0.3960103988647461
    },
    {
      "epoch": 4.4110107421875e-05,
      "model_forward_time": 0.11529064178466797,
      "step": 7227
    },
    {
      "epoch": 4.4110107421875e-05,
      "step": 7227,
      "training_step_time": 0.7658669948577881
    },
    {
      "epoch": 4.41162109375e-05,
      "model_forward_time": 0.11461758613586426,
      "step": 7228
    },
    {
      "epoch": 4.41162109375e-05,
      "step": 7228,
      "training_step_time": 0.39110779762268066
    },
    {
      "epoch": 4.4122314453125e-05,
      "model_forward_time": 0.11420416831970215,
      "step": 7229
    },
    {
      "epoch": 4.4122314453125e-05,
      "step": 7229,
      "training_step_time": 0.38451099395751953
    },
    {
      "epoch": 4.412841796875e-05,
      "grad_norm": 0.2148643434047699,
      "learning_rate": 9.864729606873663e-05,
      "loss": 0.0686,
      "step": 7230
    },
    {
      "epoch": 4.412841796875e-05,
      "model_forward_time": 0.11433053016662598,
      "step": 7230
    },
    {
      "epoch": 4.412841796875e-05,
      "step": 7230,
      "training_step_time": 0.4416215419769287
    },
    {
      "epoch": 4.4134521484375e-05,
      "model_forward_time": 0.1144723892211914,
      "step": 7231
    },
    {
      "epoch": 4.4134521484375e-05,
      "step": 7231,
      "training_step_time": 0.4885683059692383
    },
    {
      "epoch": 4.4140625e-05,
      "model_forward_time": 0.11423110961914062,
      "step": 7232
    },
    {
      "epoch": 4.4140625e-05,
      "step": 7232,
      "training_step_time": 0.4167056083679199
    },
    {
      "epoch": 4.4146728515625e-05,
      "model_forward_time": 0.11454582214355469,
      "step": 7233
    },
    {
      "epoch": 4.4146728515625e-05,
      "step": 7233,
      "training_step_time": 0.6882188320159912
    },
    {
      "epoch": 4.415283203125e-05,
      "model_forward_time": 0.11442399024963379,
      "step": 7234
    },
    {
      "epoch": 4.415283203125e-05,
      "step": 7234,
      "training_step_time": 0.38548779487609863
    },
    {
      "epoch": 4.4158935546875e-05,
      "model_forward_time": 0.11384844779968262,
      "step": 7235
    },
    {
      "epoch": 4.4158935546875e-05,
      "step": 7235,
      "training_step_time": 0.38026976585388184
    },
    {
      "epoch": 4.41650390625e-05,
      "model_forward_time": 0.11487674713134766,
      "step": 7236
    },
    {
      "epoch": 4.41650390625e-05,
      "step": 7236,
      "training_step_time": 0.38966917991638184
    },
    {
      "epoch": 4.4171142578125e-05,
      "model_forward_time": 0.11531543731689453,
      "step": 7237
    },
    {
      "epoch": 4.4171142578125e-05,
      "step": 7237,
      "training_step_time": 0.41474270820617676
    },
    {
      "epoch": 4.417724609375e-05,
      "model_forward_time": 0.11424493789672852,
      "step": 7238
    },
    {
      "epoch": 4.417724609375e-05,
      "step": 7238,
      "training_step_time": 0.3943476676940918
    },
    {
      "epoch": 4.4183349609375e-05,
      "model_forward_time": 0.11506509780883789,
      "step": 7239
    },
    {
      "epoch": 4.4183349609375e-05,
      "step": 7239,
      "training_step_time": 1.048945665359497
    },
    {
      "epoch": 4.4189453125e-05,
      "grad_norm": 0.18290410935878754,
      "learning_rate": 9.864092191274632e-05,
      "loss": 0.0673,
      "step": 7240
    },
    {
      "epoch": 4.4189453125e-05,
      "model_forward_time": 0.1142270565032959,
      "step": 7240
    },
    {
      "epoch": 4.4189453125e-05,
      "step": 7240,
      "training_step_time": 0.39054250717163086
    },
    {
      "epoch": 4.4195556640625e-05,
      "model_forward_time": 0.11514401435852051,
      "step": 7241
    },
    {
      "epoch": 4.4195556640625e-05,
      "step": 7241,
      "training_step_time": 0.3970174789428711
    },
    {
      "epoch": 4.420166015625e-05,
      "model_forward_time": 0.11371827125549316,
      "step": 7242
    },
    {
      "epoch": 4.420166015625e-05,
      "step": 7242,
      "training_step_time": 0.3605189323425293
    },
    {
      "epoch": 4.4207763671875e-05,
      "model_forward_time": 0.1156156063079834,
      "step": 7243
    },
    {
      "epoch": 4.4207763671875e-05,
      "step": 7243,
      "training_step_time": 0.42862725257873535
    },
    {
      "epoch": 4.42138671875e-05,
      "model_forward_time": 0.11443901062011719,
      "step": 7244
    },
    {
      "epoch": 4.42138671875e-05,
      "step": 7244,
      "training_step_time": 0.45622968673706055
    },
    {
      "epoch": 4.4219970703125e-05,
      "model_forward_time": 0.11350297927856445,
      "step": 7245
    },
    {
      "epoch": 4.4219970703125e-05,
      "step": 7245,
      "training_step_time": 0.5927326679229736
    },
    {
      "epoch": 4.422607421875e-05,
      "model_forward_time": 0.11448955535888672,
      "step": 7246
    },
    {
      "epoch": 4.422607421875e-05,
      "step": 7246,
      "training_step_time": 0.41274523735046387
    },
    {
      "epoch": 4.4232177734375e-05,
      "model_forward_time": 0.11437034606933594,
      "step": 7247
    },
    {
      "epoch": 4.4232177734375e-05,
      "step": 7247,
      "training_step_time": 0.38478660583496094
    },
    {
      "epoch": 4.423828125e-05,
      "model_forward_time": 0.1140296459197998,
      "step": 7248
    },
    {
      "epoch": 4.423828125e-05,
      "step": 7248,
      "training_step_time": 0.3978276252746582
    },
    {
      "epoch": 4.4244384765625e-05,
      "model_forward_time": 0.11539816856384277,
      "step": 7249
    },
    {
      "epoch": 4.4244384765625e-05,
      "step": 7249,
      "training_step_time": 0.3923914432525635
    },
    {
      "epoch": 4.425048828125e-05,
      "grad_norm": 0.22404254972934723,
      "learning_rate": 9.86345329809282e-05,
      "loss": 0.0685,
      "step": 7250
    },
    {
      "epoch": 4.425048828125e-05,
      "model_forward_time": 0.1148691177368164,
      "step": 7250
    },
    {
      "epoch": 4.425048828125e-05,
      "step": 7250,
      "training_step_time": 0.400862455368042
    },
    {
      "epoch": 4.4256591796875e-05,
      "model_forward_time": 0.11446619033813477,
      "step": 7251
    },
    {
      "epoch": 4.4256591796875e-05,
      "step": 7251,
      "training_step_time": 0.5793607234954834
    },
    {
      "epoch": 4.42626953125e-05,
      "model_forward_time": 0.11489510536193848,
      "step": 7252
    },
    {
      "epoch": 4.42626953125e-05,
      "step": 7252,
      "training_step_time": 0.4007987976074219
    },
    {
      "epoch": 4.4268798828125e-05,
      "model_forward_time": 0.1145939826965332,
      "step": 7253
    },
    {
      "epoch": 4.4268798828125e-05,
      "step": 7253,
      "training_step_time": 0.39588022232055664
    },
    {
      "epoch": 4.427490234375e-05,
      "model_forward_time": 0.11505293846130371,
      "step": 7254
    },
    {
      "epoch": 4.427490234375e-05,
      "step": 7254,
      "training_step_time": 0.4107375144958496
    },
    {
      "epoch": 4.4281005859375e-05,
      "model_forward_time": 0.11428666114807129,
      "step": 7255
    },
    {
      "epoch": 4.4281005859375e-05,
      "step": 7255,
      "training_step_time": 0.3865084648132324
    },
    {
      "epoch": 4.4287109375e-05,
      "model_forward_time": 0.11447715759277344,
      "step": 7256
    },
    {
      "epoch": 4.4287109375e-05,
      "step": 7256,
      "training_step_time": 0.36328673362731934
    },
    {
      "epoch": 4.4293212890625e-05,
      "model_forward_time": 0.11438870429992676,
      "step": 7257
    },
    {
      "epoch": 4.4293212890625e-05,
      "step": 7257,
      "training_step_time": 1.0570597648620605
    },
    {
      "epoch": 4.429931640625e-05,
      "model_forward_time": 0.11389374732971191,
      "step": 7258
    },
    {
      "epoch": 4.429931640625e-05,
      "step": 7258,
      "training_step_time": 0.4910449981689453
    },
    {
      "epoch": 4.4305419921875e-05,
      "model_forward_time": 0.11362338066101074,
      "step": 7259
    },
    {
      "epoch": 4.4305419921875e-05,
      "step": 7259,
      "training_step_time": 0.3764986991882324
    },
    {
      "epoch": 4.43115234375e-05,
      "grad_norm": 0.20427018404006958,
      "learning_rate": 9.862812927522309e-05,
      "loss": 0.0699,
      "step": 7260
    },
    {
      "epoch": 4.43115234375e-05,
      "model_forward_time": 0.11380243301391602,
      "step": 7260
    },
    {
      "epoch": 4.43115234375e-05,
      "step": 7260,
      "training_step_time": 0.38168835639953613
    },
    {
      "epoch": 4.4317626953125e-05,
      "model_forward_time": 0.11448264122009277,
      "step": 7261
    },
    {
      "epoch": 4.4317626953125e-05,
      "step": 7261,
      "training_step_time": 0.3970968723297119
    },
    {
      "epoch": 4.432373046875e-05,
      "model_forward_time": 0.11498427391052246,
      "step": 7262
    },
    {
      "epoch": 4.432373046875e-05,
      "step": 7262,
      "training_step_time": 0.38338494300842285
    },
    {
      "epoch": 4.4329833984375e-05,
      "model_forward_time": 0.11520099639892578,
      "step": 7263
    },
    {
      "epoch": 4.4329833984375e-05,
      "step": 7263,
      "training_step_time": 0.7014486789703369
    },
    {
      "epoch": 4.43359375e-05,
      "model_forward_time": 0.11435079574584961,
      "step": 7264
    },
    {
      "epoch": 4.43359375e-05,
      "step": 7264,
      "training_step_time": 0.38718342781066895
    },
    {
      "epoch": 4.4342041015625e-05,
      "model_forward_time": 0.1147608757019043,
      "step": 7265
    },
    {
      "epoch": 4.4342041015625e-05,
      "step": 7265,
      "training_step_time": 0.3915870189666748
    },
    {
      "epoch": 4.434814453125e-05,
      "model_forward_time": 0.1141045093536377,
      "step": 7266
    },
    {
      "epoch": 4.434814453125e-05,
      "step": 7266,
      "training_step_time": 0.39632344245910645
    },
    {
      "epoch": 4.4354248046875e-05,
      "model_forward_time": 0.11406946182250977,
      "step": 7267
    },
    {
      "epoch": 4.4354248046875e-05,
      "step": 7267,
      "training_step_time": 0.39122891426086426
    },
    {
      "epoch": 4.43603515625e-05,
      "model_forward_time": 0.11452317237854004,
      "step": 7268
    },
    {
      "epoch": 4.43603515625e-05,
      "step": 7268,
      "training_step_time": 0.39566707611083984
    },
    {
      "epoch": 4.4366455078125e-05,
      "model_forward_time": 0.11522030830383301,
      "step": 7269
    },
    {
      "epoch": 4.4366455078125e-05,
      "step": 7269,
      "training_step_time": 0.7377586364746094
    },
    {
      "epoch": 4.437255859375e-05,
      "grad_norm": 0.17226168513298035,
      "learning_rate": 9.862171079757628e-05,
      "loss": 0.072,
      "step": 7270
    },
    {
      "epoch": 4.437255859375e-05,
      "model_forward_time": 0.11461138725280762,
      "step": 7270
    },
    {
      "epoch": 4.437255859375e-05,
      "step": 7270,
      "training_step_time": 0.4780082702636719
    },
    {
      "epoch": 4.4378662109375e-05,
      "model_forward_time": 0.11424469947814941,
      "step": 7271
    },
    {
      "epoch": 4.4378662109375e-05,
      "step": 7271,
      "training_step_time": 0.40981221199035645
    },
    {
      "epoch": 4.4384765625e-05,
      "model_forward_time": 0.11421370506286621,
      "step": 7272
    },
    {
      "epoch": 4.4384765625e-05,
      "step": 7272,
      "training_step_time": 0.40282154083251953
    },
    {
      "epoch": 4.4390869140625e-05,
      "model_forward_time": 0.1141350269317627,
      "step": 7273
    },
    {
      "epoch": 4.4390869140625e-05,
      "step": 7273,
      "training_step_time": 0.43256282806396484
    },
    {
      "epoch": 4.439697265625e-05,
      "model_forward_time": 0.11459708213806152,
      "step": 7274
    },
    {
      "epoch": 4.439697265625e-05,
      "step": 7274,
      "training_step_time": 0.40721845626831055
    },
    {
      "epoch": 4.4403076171875e-05,
      "model_forward_time": 0.1144721508026123,
      "step": 7275
    },
    {
      "epoch": 4.4403076171875e-05,
      "step": 7275,
      "training_step_time": 0.7807745933532715
    },
    {
      "epoch": 4.44091796875e-05,
      "model_forward_time": 0.11454057693481445,
      "step": 7276
    },
    {
      "epoch": 4.44091796875e-05,
      "step": 7276,
      "training_step_time": 0.3805506229400635
    },
    {
      "epoch": 4.4415283203125e-05,
      "model_forward_time": 0.11414885520935059,
      "step": 7277
    },
    {
      "epoch": 4.4415283203125e-05,
      "step": 7277,
      "training_step_time": 0.38660526275634766
    },
    {
      "epoch": 4.442138671875e-05,
      "model_forward_time": 0.11442422866821289,
      "step": 7278
    },
    {
      "epoch": 4.442138671875e-05,
      "step": 7278,
      "training_step_time": 0.3953890800476074
    },
    {
      "epoch": 4.4427490234375e-05,
      "model_forward_time": 0.11416363716125488,
      "step": 7279
    },
    {
      "epoch": 4.4427490234375e-05,
      "step": 7279,
      "training_step_time": 0.4086942672729492
    },
    {
      "epoch": 4.443359375e-05,
      "grad_norm": 0.22631588578224182,
      "learning_rate": 9.861527754993749e-05,
      "loss": 0.0646,
      "step": 7280
    },
    {
      "epoch": 4.443359375e-05,
      "model_forward_time": 0.11452960968017578,
      "step": 7280
    },
    {
      "epoch": 4.443359375e-05,
      "step": 7280,
      "training_step_time": 0.3909144401550293
    },
    {
      "epoch": 4.4439697265625e-05,
      "model_forward_time": 0.11476612091064453,
      "step": 7281
    },
    {
      "epoch": 4.4439697265625e-05,
      "step": 7281,
      "training_step_time": 0.6911749839782715
    },
    {
      "epoch": 4.444580078125e-05,
      "model_forward_time": 0.11437511444091797,
      "step": 7282
    },
    {
      "epoch": 4.444580078125e-05,
      "step": 7282,
      "training_step_time": 0.3645174503326416
    },
    {
      "epoch": 4.4451904296875e-05,
      "model_forward_time": 0.11472892761230469,
      "step": 7283
    },
    {
      "epoch": 4.4451904296875e-05,
      "step": 7283,
      "training_step_time": 0.4515681266784668
    },
    {
      "epoch": 4.44580078125e-05,
      "model_forward_time": 0.11417102813720703,
      "step": 7284
    },
    {
      "epoch": 4.44580078125e-05,
      "step": 7284,
      "training_step_time": 0.47890567779541016
    },
    {
      "epoch": 4.4464111328125e-05,
      "model_forward_time": 0.11444735527038574,
      "step": 7285
    },
    {
      "epoch": 4.4464111328125e-05,
      "step": 7285,
      "training_step_time": 0.4748256206512451
    },
    {
      "epoch": 4.447021484375e-05,
      "model_forward_time": 0.11408114433288574,
      "step": 7286
    },
    {
      "epoch": 4.447021484375e-05,
      "step": 7286,
      "training_step_time": 0.43261218070983887
    },
    {
      "epoch": 4.4476318359375e-05,
      "model_forward_time": 0.11470437049865723,
      "step": 7287
    },
    {
      "epoch": 4.4476318359375e-05,
      "step": 7287,
      "training_step_time": 0.5143375396728516
    },
    {
      "epoch": 4.4482421875e-05,
      "model_forward_time": 0.11414146423339844,
      "step": 7288
    },
    {
      "epoch": 4.4482421875e-05,
      "step": 7288,
      "training_step_time": 0.3927876949310303
    },
    {
      "epoch": 4.4488525390625e-05,
      "model_forward_time": 0.11425638198852539,
      "step": 7289
    },
    {
      "epoch": 4.4488525390625e-05,
      "step": 7289,
      "training_step_time": 0.3890976905822754
    },
    {
      "epoch": 4.449462890625e-05,
      "grad_norm": 0.17179541289806366,
      "learning_rate": 9.860882953426099e-05,
      "loss": 0.0676,
      "step": 7290
    },
    {
      "epoch": 4.449462890625e-05,
      "model_forward_time": 0.11410713195800781,
      "step": 7290
    },
    {
      "epoch": 4.449462890625e-05,
      "step": 7290,
      "training_step_time": 0.4106621742248535
    },
    {
      "epoch": 4.4500732421875e-05,
      "model_forward_time": 0.11462783813476562,
      "step": 7291
    },
    {
      "epoch": 4.4500732421875e-05,
      "step": 7291,
      "training_step_time": 0.3934147357940674
    },
    {
      "epoch": 4.45068359375e-05,
      "model_forward_time": 0.11553454399108887,
      "step": 7292
    },
    {
      "epoch": 4.45068359375e-05,
      "step": 7292,
      "training_step_time": 0.3949000835418701
    },
    {
      "epoch": 4.4512939453125e-05,
      "model_forward_time": 0.1145172119140625,
      "step": 7293
    },
    {
      "epoch": 4.4512939453125e-05,
      "step": 7293,
      "training_step_time": 0.7134311199188232
    },
    {
      "epoch": 4.451904296875e-05,
      "model_forward_time": 0.11395502090454102,
      "step": 7294
    },
    {
      "epoch": 4.451904296875e-05,
      "step": 7294,
      "training_step_time": 0.3862011432647705
    },
    {
      "epoch": 4.4525146484375e-05,
      "model_forward_time": 0.11455845832824707,
      "step": 7295
    },
    {
      "epoch": 4.4525146484375e-05,
      "step": 7295,
      "training_step_time": 0.3823552131652832
    },
    {
      "epoch": 4.453125e-05,
      "model_forward_time": 0.11487007141113281,
      "step": 7296
    },
    {
      "epoch": 4.453125e-05,
      "step": 7296,
      "training_step_time": 0.36591672897338867
    },
    {
      "epoch": 4.4537353515625e-05,
      "model_forward_time": 0.11421680450439453,
      "step": 7297
    },
    {
      "epoch": 4.4537353515625e-05,
      "step": 7297,
      "training_step_time": 0.45854759216308594
    },
    {
      "epoch": 4.454345703125e-05,
      "model_forward_time": 0.11410164833068848,
      "step": 7298
    },
    {
      "epoch": 4.454345703125e-05,
      "step": 7298,
      "training_step_time": 0.47074007987976074
    },
    {
      "epoch": 4.4549560546875e-05,
      "model_forward_time": 0.11487340927124023,
      "step": 7299
    },
    {
      "epoch": 4.4549560546875e-05,
      "step": 7299,
      "training_step_time": 0.42337584495544434
    },
    {
      "epoch": 4.45556640625e-05,
      "grad_norm": 0.1662365347146988,
      "learning_rate": 9.860236675250552e-05,
      "loss": 0.0747,
      "step": 7300
    },
    {
      "epoch": 4.45556640625e-05,
      "model_forward_time": 0.11512565612792969,
      "step": 7300
    },
    {
      "epoch": 4.45556640625e-05,
      "step": 7300,
      "training_step_time": 0.3914320468902588
    },
    {
      "epoch": 4.4561767578125e-05,
      "model_forward_time": 0.11501502990722656,
      "step": 7301
    },
    {
      "epoch": 4.4561767578125e-05,
      "step": 7301,
      "training_step_time": 0.40141773223876953
    },
    {
      "epoch": 4.456787109375e-05,
      "model_forward_time": 0.114532470703125,
      "step": 7302
    },
    {
      "epoch": 4.456787109375e-05,
      "step": 7302,
      "training_step_time": 0.3945331573486328
    },
    {
      "epoch": 4.4573974609375e-05,
      "model_forward_time": 0.11478734016418457,
      "step": 7303
    },
    {
      "epoch": 4.4573974609375e-05,
      "step": 7303,
      "training_step_time": 0.38908958435058594
    },
    {
      "epoch": 4.4580078125e-05,
      "model_forward_time": 0.11465716361999512,
      "step": 7304
    },
    {
      "epoch": 4.4580078125e-05,
      "step": 7304,
      "training_step_time": 0.3871898651123047
    },
    {
      "epoch": 4.4586181640625e-05,
      "model_forward_time": 0.1140596866607666,
      "step": 7305
    },
    {
      "epoch": 4.4586181640625e-05,
      "step": 7305,
      "training_step_time": 0.755706787109375
    },
    {
      "epoch": 4.459228515625e-05,
      "model_forward_time": 0.11442136764526367,
      "step": 7306
    },
    {
      "epoch": 4.459228515625e-05,
      "step": 7306,
      "training_step_time": 0.39265942573547363
    },
    {
      "epoch": 4.4598388671875e-05,
      "model_forward_time": 0.1149141788482666,
      "step": 7307
    },
    {
      "epoch": 4.4598388671875e-05,
      "step": 7307,
      "training_step_time": 0.3879413604736328
    },
    {
      "epoch": 4.46044921875e-05,
      "model_forward_time": 0.11416983604431152,
      "step": 7308
    },
    {
      "epoch": 4.46044921875e-05,
      "step": 7308,
      "training_step_time": 0.3963954448699951
    },
    {
      "epoch": 4.4610595703125e-05,
      "model_forward_time": 0.11425447463989258,
      "step": 7309
    },
    {
      "epoch": 4.4610595703125e-05,
      "step": 7309,
      "training_step_time": 0.399064302444458
    },
    {
      "epoch": 4.461669921875e-05,
      "grad_norm": 0.1898437738418579,
      "learning_rate": 9.859588920663432e-05,
      "loss": 0.0688,
      "step": 7310
    },
    {
      "epoch": 4.461669921875e-05,
      "model_forward_time": 0.1139822006225586,
      "step": 7310
    },
    {
      "epoch": 4.461669921875e-05,
      "step": 7310,
      "training_step_time": 0.39574217796325684
    },
    {
      "epoch": 4.4622802734375e-05,
      "model_forward_time": 0.11473536491394043,
      "step": 7311
    },
    {
      "epoch": 4.4622802734375e-05,
      "step": 7311,
      "training_step_time": 1.3703811168670654
    },
    {
      "epoch": 4.462890625e-05,
      "model_forward_time": 0.11368894577026367,
      "step": 7312
    },
    {
      "epoch": 4.462890625e-05,
      "step": 7312,
      "training_step_time": 0.375685453414917
    },
    {
      "epoch": 4.4635009765625e-05,
      "model_forward_time": 0.11314010620117188,
      "step": 7313
    },
    {
      "epoch": 4.4635009765625e-05,
      "step": 7313,
      "training_step_time": 0.36838507652282715
    },
    {
      "epoch": 4.464111328125e-05,
      "model_forward_time": 0.11330962181091309,
      "step": 7314
    },
    {
      "epoch": 4.464111328125e-05,
      "step": 7314,
      "training_step_time": 0.3806033134460449
    },
    {
      "epoch": 4.4647216796875e-05,
      "model_forward_time": 0.11366009712219238,
      "step": 7315
    },
    {
      "epoch": 4.4647216796875e-05,
      "step": 7315,
      "training_step_time": 0.3875141143798828
    },
    {
      "epoch": 4.46533203125e-05,
      "model_forward_time": 0.11400842666625977,
      "step": 7316
    },
    {
      "epoch": 4.46533203125e-05,
      "step": 7316,
      "training_step_time": 0.3896908760070801
    },
    {
      "epoch": 4.4659423828125e-05,
      "model_forward_time": 0.1146249771118164,
      "step": 7317
    },
    {
      "epoch": 4.4659423828125e-05,
      "step": 7317,
      "training_step_time": 0.5832898616790771
    },
    {
      "epoch": 4.466552734375e-05,
      "model_forward_time": 0.11437225341796875,
      "step": 7318
    },
    {
      "epoch": 4.466552734375e-05,
      "step": 7318,
      "training_step_time": 0.38350772857666016
    },
    {
      "epoch": 4.4671630859375e-05,
      "model_forward_time": 0.11440467834472656,
      "step": 7319
    },
    {
      "epoch": 4.4671630859375e-05,
      "step": 7319,
      "training_step_time": 0.3979530334472656
    },
    {
      "epoch": 4.4677734375e-05,
      "grad_norm": 0.20144999027252197,
      "learning_rate": 9.858939689861506e-05,
      "loss": 0.0685,
      "step": 7320
    },
    {
      "epoch": 4.4677734375e-05,
      "model_forward_time": 0.11423850059509277,
      "step": 7320
    },
    {
      "epoch": 4.4677734375e-05,
      "step": 7320,
      "training_step_time": 0.4054250717163086
    },
    {
      "epoch": 4.4683837890625e-05,
      "model_forward_time": 0.11557555198669434,
      "step": 7321
    },
    {
      "epoch": 4.4683837890625e-05,
      "step": 7321,
      "training_step_time": 0.43236780166625977
    },
    {
      "epoch": 4.468994140625e-05,
      "model_forward_time": 0.1150815486907959,
      "step": 7322
    },
    {
      "epoch": 4.468994140625e-05,
      "step": 7322,
      "training_step_time": 0.4014906883239746
    },
    {
      "epoch": 4.4696044921875e-05,
      "model_forward_time": 0.1146385669708252,
      "step": 7323
    },
    {
      "epoch": 4.4696044921875e-05,
      "step": 7323,
      "training_step_time": 0.7657253742218018
    },
    {
      "epoch": 4.47021484375e-05,
      "model_forward_time": 0.11375117301940918,
      "step": 7324
    },
    {
      "epoch": 4.47021484375e-05,
      "step": 7324,
      "training_step_time": 0.4640491008758545
    },
    {
      "epoch": 4.4708251953125e-05,
      "model_forward_time": 0.11407852172851562,
      "step": 7325
    },
    {
      "epoch": 4.4708251953125e-05,
      "step": 7325,
      "training_step_time": 0.39769577980041504
    },
    {
      "epoch": 4.471435546875e-05,
      "model_forward_time": 0.1144113540649414,
      "step": 7326
    },
    {
      "epoch": 4.471435546875e-05,
      "step": 7326,
      "training_step_time": 0.40357208251953125
    },
    {
      "epoch": 4.4720458984375e-05,
      "model_forward_time": 0.11395978927612305,
      "step": 7327
    },
    {
      "epoch": 4.4720458984375e-05,
      "step": 7327,
      "training_step_time": 0.3893930912017822
    },
    {
      "epoch": 4.47265625e-05,
      "model_forward_time": 0.11461496353149414,
      "step": 7328
    },
    {
      "epoch": 4.47265625e-05,
      "step": 7328,
      "training_step_time": 0.39820122718811035
    },
    {
      "epoch": 4.4732666015625e-05,
      "model_forward_time": 0.11467528343200684,
      "step": 7329
    },
    {
      "epoch": 4.4732666015625e-05,
      "step": 7329,
      "training_step_time": 0.7101080417633057
    },
    {
      "epoch": 4.473876953125e-05,
      "grad_norm": 0.31266483664512634,
      "learning_rate": 9.858288983041996e-05,
      "loss": 0.0689,
      "step": 7330
    },
    {
      "epoch": 4.473876953125e-05,
      "model_forward_time": 0.11453080177307129,
      "step": 7330
    },
    {
      "epoch": 4.473876953125e-05,
      "step": 7330,
      "training_step_time": 0.387645959854126
    },
    {
      "epoch": 4.4744873046875e-05,
      "model_forward_time": 0.11437749862670898,
      "step": 7331
    },
    {
      "epoch": 4.4744873046875e-05,
      "step": 7331,
      "training_step_time": 0.41005420684814453
    },
    {
      "epoch": 4.47509765625e-05,
      "model_forward_time": 0.11411643028259277,
      "step": 7332
    },
    {
      "epoch": 4.47509765625e-05,
      "step": 7332,
      "training_step_time": 0.3999340534210205
    },
    {
      "epoch": 4.4757080078125e-05,
      "model_forward_time": 0.11417198181152344,
      "step": 7333
    },
    {
      "epoch": 4.4757080078125e-05,
      "step": 7333,
      "training_step_time": 0.3915705680847168
    },
    {
      "epoch": 4.476318359375e-05,
      "model_forward_time": 0.11430788040161133,
      "step": 7334
    },
    {
      "epoch": 4.476318359375e-05,
      "step": 7334,
      "training_step_time": 0.39907217025756836
    },
    {
      "epoch": 4.4769287109375e-05,
      "model_forward_time": 0.11534619331359863,
      "step": 7335
    },
    {
      "epoch": 4.4769287109375e-05,
      "step": 7335,
      "training_step_time": 0.8232975006103516
    },
    {
      "epoch": 4.4775390625e-05,
      "model_forward_time": 0.11425304412841797,
      "step": 7336
    },
    {
      "epoch": 4.4775390625e-05,
      "step": 7336,
      "training_step_time": 0.4644296169281006
    },
    {
      "epoch": 4.4781494140625e-05,
      "model_forward_time": 0.11457109451293945,
      "step": 7337
    },
    {
      "epoch": 4.4781494140625e-05,
      "step": 7337,
      "training_step_time": 0.48127055168151855
    },
    {
      "epoch": 4.478759765625e-05,
      "model_forward_time": 0.1141808032989502,
      "step": 7338
    },
    {
      "epoch": 4.478759765625e-05,
      "step": 7338,
      "training_step_time": 0.4393949508666992
    },
    {
      "epoch": 4.4793701171875e-05,
      "model_forward_time": 0.11400532722473145,
      "step": 7339
    },
    {
      "epoch": 4.4793701171875e-05,
      "step": 7339,
      "training_step_time": 0.47847938537597656
    },
    {
      "epoch": 4.47998046875e-05,
      "grad_norm": 0.20110507309436798,
      "learning_rate": 9.857636800402568e-05,
      "loss": 0.0676,
      "step": 7340
    },
    {
      "epoch": 4.47998046875e-05,
      "model_forward_time": 0.11454319953918457,
      "step": 7340
    },
    {
      "epoch": 4.47998046875e-05,
      "step": 7340,
      "training_step_time": 0.3831627368927002
    },
    {
      "epoch": 4.4805908203125e-05,
      "model_forward_time": 0.11460995674133301,
      "step": 7341
    },
    {
      "epoch": 4.4805908203125e-05,
      "step": 7341,
      "training_step_time": 0.3998596668243408
    },
    {
      "epoch": 4.481201171875e-05,
      "model_forward_time": 0.11474418640136719,
      "step": 7342
    },
    {
      "epoch": 4.481201171875e-05,
      "step": 7342,
      "training_step_time": 0.39523768424987793
    },
    {
      "epoch": 4.4818115234375e-05,
      "model_forward_time": 0.11488199234008789,
      "step": 7343
    },
    {
      "epoch": 4.4818115234375e-05,
      "step": 7343,
      "training_step_time": 0.38698482513427734
    },
    {
      "epoch": 4.482421875e-05,
      "model_forward_time": 0.11557912826538086,
      "step": 7344
    },
    {
      "epoch": 4.482421875e-05,
      "step": 7344,
      "training_step_time": 0.40393996238708496
    },
    {
      "epoch": 4.4830322265625e-05,
      "model_forward_time": 0.11500954627990723,
      "step": 7345
    },
    {
      "epoch": 4.4830322265625e-05,
      "step": 7345,
      "training_step_time": 0.4062619209289551
    },
    {
      "epoch": 4.483642578125e-05,
      "model_forward_time": 0.11426901817321777,
      "step": 7346
    },
    {
      "epoch": 4.483642578125e-05,
      "step": 7346,
      "training_step_time": 0.40007543563842773
    },
    {
      "epoch": 4.4842529296875e-05,
      "model_forward_time": 0.11465954780578613,
      "step": 7347
    },
    {
      "epoch": 4.4842529296875e-05,
      "step": 7347,
      "training_step_time": 0.5976946353912354
    },
    {
      "epoch": 4.48486328125e-05,
      "model_forward_time": 0.11501646041870117,
      "step": 7348
    },
    {
      "epoch": 4.48486328125e-05,
      "step": 7348,
      "training_step_time": 0.40041589736938477
    },
    {
      "epoch": 4.4854736328125e-05,
      "model_forward_time": 0.11422419548034668,
      "step": 7349
    },
    {
      "epoch": 4.4854736328125e-05,
      "step": 7349,
      "training_step_time": 0.39950060844421387
    },
    {
      "epoch": 4.486083984375e-05,
      "grad_norm": 0.21119239926338196,
      "learning_rate": 9.856983142141339e-05,
      "loss": 0.0663,
      "step": 7350
    },
    {
      "epoch": 4.486083984375e-05,
      "model_forward_time": 0.11433053016662598,
      "step": 7350
    },
    {
      "epoch": 4.486083984375e-05,
      "step": 7350,
      "training_step_time": 0.3693664073944092
    },
    {
      "epoch": 4.4866943359375e-05,
      "model_forward_time": 0.1142578125,
      "step": 7351
    },
    {
      "epoch": 4.4866943359375e-05,
      "step": 7351,
      "training_step_time": 0.45076751708984375
    },
    {
      "epoch": 4.4873046875e-05,
      "model_forward_time": 0.11469578742980957,
      "step": 7352
    },
    {
      "epoch": 4.4873046875e-05,
      "step": 7352,
      "training_step_time": 0.4900696277618408
    },
    {
      "epoch": 4.4879150390625e-05,
      "model_forward_time": 0.11495828628540039,
      "step": 7353
    },
    {
      "epoch": 4.4879150390625e-05,
      "step": 7353,
      "training_step_time": 0.7005171775817871
    },
    {
      "epoch": 4.488525390625e-05,
      "model_forward_time": 0.1145315170288086,
      "step": 7354
    },
    {
      "epoch": 4.488525390625e-05,
      "step": 7354,
      "training_step_time": 0.4209887981414795
    },
    {
      "epoch": 4.4891357421875e-05,
      "model_forward_time": 0.11422157287597656,
      "step": 7355
    },
    {
      "epoch": 4.4891357421875e-05,
      "step": 7355,
      "training_step_time": 0.38413453102111816
    },
    {
      "epoch": 4.48974609375e-05,
      "model_forward_time": 0.11425089836120605,
      "step": 7356
    },
    {
      "epoch": 4.48974609375e-05,
      "step": 7356,
      "training_step_time": 0.3822770118713379
    },
    {
      "epoch": 4.4903564453125e-05,
      "model_forward_time": 0.11516022682189941,
      "step": 7357
    },
    {
      "epoch": 4.4903564453125e-05,
      "step": 7357,
      "training_step_time": 0.39315080642700195
    },
    {
      "epoch": 4.490966796875e-05,
      "model_forward_time": 0.11411142349243164,
      "step": 7358
    },
    {
      "epoch": 4.490966796875e-05,
      "step": 7358,
      "training_step_time": 0.39183998107910156
    },
    {
      "epoch": 4.4915771484375e-05,
      "model_forward_time": 0.11514067649841309,
      "step": 7359
    },
    {
      "epoch": 4.4915771484375e-05,
      "step": 7359,
      "training_step_time": 0.9961273670196533
    },
    {
      "epoch": 4.4921875e-05,
      "grad_norm": 0.3470989465713501,
      "learning_rate": 9.856328008456872e-05,
      "loss": 0.0654,
      "step": 7360
    },
    {
      "epoch": 4.4921875e-05,
      "model_forward_time": 0.11441516876220703,
      "step": 7360
    },
    {
      "epoch": 4.4921875e-05,
      "step": 7360,
      "training_step_time": 0.4207885265350342
    },
    {
      "epoch": 4.4927978515625e-05,
      "model_forward_time": 0.1144857406616211,
      "step": 7361
    },
    {
      "epoch": 4.4927978515625e-05,
      "step": 7361,
      "training_step_time": 0.39803075790405273
    },
    {
      "epoch": 4.493408203125e-05,
      "model_forward_time": 0.11351943016052246,
      "step": 7362
    },
    {
      "epoch": 4.493408203125e-05,
      "step": 7362,
      "training_step_time": 0.3859553337097168
    },
    {
      "epoch": 4.4940185546875e-05,
      "model_forward_time": 0.1138162612915039,
      "step": 7363
    },
    {
      "epoch": 4.4940185546875e-05,
      "step": 7363,
      "training_step_time": 0.3566913604736328
    },
    {
      "epoch": 4.49462890625e-05,
      "model_forward_time": 0.1138312816619873,
      "step": 7364
    },
    {
      "epoch": 4.49462890625e-05,
      "step": 7364,
      "training_step_time": 0.4410741329193115
    },
    {
      "epoch": 4.4952392578125e-05,
      "model_forward_time": 0.11428284645080566,
      "step": 7365
    },
    {
      "epoch": 4.4952392578125e-05,
      "step": 7365,
      "training_step_time": 0.5566544532775879
    },
    {
      "epoch": 4.495849609375e-05,
      "model_forward_time": 0.11463451385498047,
      "step": 7366
    },
    {
      "epoch": 4.495849609375e-05,
      "step": 7366,
      "training_step_time": 0.47966742515563965
    },
    {
      "epoch": 4.4964599609375e-05,
      "model_forward_time": 0.11400365829467773,
      "step": 7367
    },
    {
      "epoch": 4.4964599609375e-05,
      "step": 7367,
      "training_step_time": 0.3936431407928467
    },
    {
      "epoch": 4.4970703125e-05,
      "model_forward_time": 0.11435103416442871,
      "step": 7368
    },
    {
      "epoch": 4.4970703125e-05,
      "step": 7368,
      "training_step_time": 0.394256591796875
    },
    {
      "epoch": 4.4976806640625e-05,
      "model_forward_time": 0.11486101150512695,
      "step": 7369
    },
    {
      "epoch": 4.4976806640625e-05,
      "step": 7369,
      "training_step_time": 0.3997063636779785
    },
    {
      "epoch": 4.498291015625e-05,
      "grad_norm": 0.20256631076335907,
      "learning_rate": 9.855671399548181e-05,
      "loss": 0.0688,
      "step": 7370
    },
    {
      "epoch": 4.498291015625e-05,
      "model_forward_time": 0.11486530303955078,
      "step": 7370
    },
    {
      "epoch": 4.498291015625e-05,
      "step": 7370,
      "training_step_time": 0.3958437442779541
    },
    {
      "epoch": 4.4989013671875e-05,
      "model_forward_time": 0.11468195915222168,
      "step": 7371
    },
    {
      "epoch": 4.4989013671875e-05,
      "step": 7371,
      "training_step_time": 0.6515307426452637
    },
    {
      "epoch": 4.49951171875e-05,
      "model_forward_time": 0.11487340927124023,
      "step": 7372
    },
    {
      "epoch": 4.49951171875e-05,
      "step": 7372,
      "training_step_time": 0.3917696475982666
    },
    {
      "epoch": 4.5001220703125e-05,
      "model_forward_time": 0.11461877822875977,
      "step": 7373
    },
    {
      "epoch": 4.5001220703125e-05,
      "step": 7373,
      "training_step_time": 0.4031510353088379
    },
    {
      "epoch": 4.500732421875e-05,
      "model_forward_time": 0.11475467681884766,
      "step": 7374
    },
    {
      "epoch": 4.500732421875e-05,
      "step": 7374,
      "training_step_time": 0.4087674617767334
    },
    {
      "epoch": 4.5013427734375e-05,
      "model_forward_time": 0.11407327651977539,
      "step": 7375
    },
    {
      "epoch": 4.5013427734375e-05,
      "step": 7375,
      "training_step_time": 0.3936276435852051
    },
    {
      "epoch": 4.501953125e-05,
      "model_forward_time": 0.11451554298400879,
      "step": 7376
    },
    {
      "epoch": 4.501953125e-05,
      "step": 7376,
      "training_step_time": 0.40589165687561035
    },
    {
      "epoch": 4.5025634765625e-05,
      "model_forward_time": 0.11507582664489746,
      "step": 7377
    },
    {
      "epoch": 4.5025634765625e-05,
      "step": 7377,
      "training_step_time": 0.47901225090026855
    },
    {
      "epoch": 4.503173828125e-05,
      "model_forward_time": 0.11478662490844727,
      "step": 7378
    },
    {
      "epoch": 4.503173828125e-05,
      "step": 7378,
      "training_step_time": 0.41451573371887207
    },
    {
      "epoch": 4.5037841796875e-05,
      "model_forward_time": 0.11441445350646973,
      "step": 7379
    },
    {
      "epoch": 4.5037841796875e-05,
      "step": 7379,
      "training_step_time": 0.48363304138183594
    },
    {
      "epoch": 4.50439453125e-05,
      "grad_norm": 0.18291924893856049,
      "learning_rate": 9.855013315614725e-05,
      "loss": 0.0706,
      "step": 7380
    },
    {
      "epoch": 4.50439453125e-05,
      "model_forward_time": 0.11429095268249512,
      "step": 7380
    },
    {
      "epoch": 4.50439453125e-05,
      "step": 7380,
      "training_step_time": 0.4232330322265625
    },
    {
      "epoch": 4.5050048828125e-05,
      "model_forward_time": 0.11377120018005371,
      "step": 7381
    },
    {
      "epoch": 4.5050048828125e-05,
      "step": 7381,
      "training_step_time": 0.4037771224975586
    },
    {
      "epoch": 4.505615234375e-05,
      "model_forward_time": 0.11418581008911133,
      "step": 7382
    },
    {
      "epoch": 4.505615234375e-05,
      "step": 7382,
      "training_step_time": 0.39067554473876953
    },
    {
      "epoch": 4.5062255859375e-05,
      "model_forward_time": 0.11474013328552246,
      "step": 7383
    },
    {
      "epoch": 4.5062255859375e-05,
      "step": 7383,
      "training_step_time": 0.670809268951416
    },
    {
      "epoch": 4.5068359375e-05,
      "model_forward_time": 0.11445856094360352,
      "step": 7384
    },
    {
      "epoch": 4.5068359375e-05,
      "step": 7384,
      "training_step_time": 0.3829641342163086
    },
    {
      "epoch": 4.5074462890625e-05,
      "model_forward_time": 0.11434507369995117,
      "step": 7385
    },
    {
      "epoch": 4.5074462890625e-05,
      "step": 7385,
      "training_step_time": 0.3829808235168457
    },
    {
      "epoch": 4.508056640625e-05,
      "model_forward_time": 0.11515355110168457,
      "step": 7386
    },
    {
      "epoch": 4.508056640625e-05,
      "step": 7386,
      "training_step_time": 0.4326293468475342
    },
    {
      "epoch": 4.5086669921875e-05,
      "model_forward_time": 0.11424970626831055,
      "step": 7387
    },
    {
      "epoch": 4.5086669921875e-05,
      "step": 7387,
      "training_step_time": 0.39519405364990234
    },
    {
      "epoch": 4.50927734375e-05,
      "model_forward_time": 0.11454987525939941,
      "step": 7388
    },
    {
      "epoch": 4.50927734375e-05,
      "step": 7388,
      "training_step_time": 0.3871798515319824
    },
    {
      "epoch": 4.5098876953125e-05,
      "model_forward_time": 0.11524772644042969,
      "step": 7389
    },
    {
      "epoch": 4.5098876953125e-05,
      "step": 7389,
      "training_step_time": 0.9165549278259277
    },
    {
      "epoch": 4.510498046875e-05,
      "grad_norm": 0.18722814321517944,
      "learning_rate": 9.854353756856412e-05,
      "loss": 0.0696,
      "step": 7390
    },
    {
      "epoch": 4.510498046875e-05,
      "model_forward_time": 0.11430978775024414,
      "step": 7390
    },
    {
      "epoch": 4.510498046875e-05,
      "step": 7390,
      "training_step_time": 0.5120646953582764
    },
    {
      "epoch": 4.5111083984375e-05,
      "model_forward_time": 0.11430239677429199,
      "step": 7391
    },
    {
      "epoch": 4.5111083984375e-05,
      "step": 7391,
      "training_step_time": 0.46610021591186523
    },
    {
      "epoch": 4.51171875e-05,
      "model_forward_time": 0.11320877075195312,
      "step": 7392
    },
    {
      "epoch": 4.51171875e-05,
      "step": 7392,
      "training_step_time": 0.4679276943206787
    },
    {
      "epoch": 4.5123291015625e-05,
      "model_forward_time": 0.11368107795715332,
      "step": 7393
    },
    {
      "epoch": 4.5123291015625e-05,
      "step": 7393,
      "training_step_time": 0.388805627822876
    },
    {
      "epoch": 4.512939453125e-05,
      "model_forward_time": 0.11403083801269531,
      "step": 7394
    },
    {
      "epoch": 4.512939453125e-05,
      "step": 7394,
      "training_step_time": 0.42546796798706055
    },
    {
      "epoch": 4.5135498046875e-05,
      "model_forward_time": 0.11414027214050293,
      "step": 7395
    },
    {
      "epoch": 4.5135498046875e-05,
      "step": 7395,
      "training_step_time": 0.6419219970703125
    },
    {
      "epoch": 4.51416015625e-05,
      "model_forward_time": 0.11415934562683105,
      "step": 7396
    },
    {
      "epoch": 4.51416015625e-05,
      "step": 7396,
      "training_step_time": 0.38373303413391113
    },
    {
      "epoch": 4.5147705078125e-05,
      "model_forward_time": 0.11443686485290527,
      "step": 7397
    },
    {
      "epoch": 4.5147705078125e-05,
      "step": 7397,
      "training_step_time": 0.40122342109680176
    },
    {
      "epoch": 4.515380859375e-05,
      "model_forward_time": 0.11428546905517578,
      "step": 7398
    },
    {
      "epoch": 4.515380859375e-05,
      "step": 7398,
      "training_step_time": 0.4549722671508789
    },
    {
      "epoch": 4.5159912109375e-05,
      "model_forward_time": 0.11383724212646484,
      "step": 7399
    },
    {
      "epoch": 4.5159912109375e-05,
      "step": 7399,
      "training_step_time": 0.39870166778564453
    },
    {
      "epoch": 4.5166015625e-05,
      "grad_norm": 0.26299476623535156,
      "learning_rate": 9.8536927234736e-05,
      "loss": 0.0682,
      "step": 7400
    },
    {
      "epoch": 4.5166015625e-05,
      "model_forward_time": 0.11435890197753906,
      "step": 7400
    },
    {
      "epoch": 4.5166015625e-05,
      "step": 7400,
      "training_step_time": 0.655585765838623
    },
    {
      "epoch": 4.5172119140625e-05,
      "model_forward_time": 0.11538505554199219,
      "step": 7401
    },
    {
      "epoch": 4.5172119140625e-05,
      "step": 7401,
      "training_step_time": 0.547290563583374
    },
    {
      "epoch": 4.517822265625e-05,
      "model_forward_time": 0.1136941909790039,
      "step": 7402
    },
    {
      "epoch": 4.517822265625e-05,
      "step": 7402,
      "training_step_time": 0.39363646507263184
    },
    {
      "epoch": 4.5184326171875e-05,
      "model_forward_time": 0.11415791511535645,
      "step": 7403
    },
    {
      "epoch": 4.5184326171875e-05,
      "step": 7403,
      "training_step_time": 0.4622340202331543
    },
    {
      "epoch": 4.51904296875e-05,
      "model_forward_time": 0.11465954780578613,
      "step": 7404
    },
    {
      "epoch": 4.51904296875e-05,
      "step": 7404,
      "training_step_time": 0.5665972232818604
    },
    {
      "epoch": 4.5196533203125e-05,
      "model_forward_time": 0.11428308486938477,
      "step": 7405
    },
    {
      "epoch": 4.5196533203125e-05,
      "step": 7405,
      "training_step_time": 0.4308297634124756
    },
    {
      "epoch": 4.520263671875e-05,
      "model_forward_time": 0.11397790908813477,
      "step": 7406
    },
    {
      "epoch": 4.520263671875e-05,
      "step": 7406,
      "training_step_time": 0.39046597480773926
    },
    {
      "epoch": 4.5208740234375e-05,
      "model_forward_time": 0.11417317390441895,
      "step": 7407
    },
    {
      "epoch": 4.5208740234375e-05,
      "step": 7407,
      "training_step_time": 0.4773068428039551
    },
    {
      "epoch": 4.521484375e-05,
      "model_forward_time": 0.11384844779968262,
      "step": 7408
    },
    {
      "epoch": 4.521484375e-05,
      "step": 7408,
      "training_step_time": 0.37676143646240234
    },
    {
      "epoch": 4.5220947265625e-05,
      "model_forward_time": 0.11446070671081543,
      "step": 7409
    },
    {
      "epoch": 4.5220947265625e-05,
      "step": 7409,
      "training_step_time": 0.3908407688140869
    },
    {
      "epoch": 4.522705078125e-05,
      "grad_norm": 0.1657324880361557,
      "learning_rate": 9.853030215667093e-05,
      "loss": 0.0632,
      "step": 7410
    },
    {
      "epoch": 4.522705078125e-05,
      "model_forward_time": 0.11589956283569336,
      "step": 7410
    },
    {
      "epoch": 4.522705078125e-05,
      "step": 7410,
      "training_step_time": 0.4034082889556885
    },
    {
      "epoch": 4.5233154296875e-05,
      "model_forward_time": 0.11456060409545898,
      "step": 7411
    },
    {
      "epoch": 4.5233154296875e-05,
      "step": 7411,
      "training_step_time": 0.4120292663574219
    },
    {
      "epoch": 4.52392578125e-05,
      "model_forward_time": 0.11528849601745605,
      "step": 7412
    },
    {
      "epoch": 4.52392578125e-05,
      "step": 7412,
      "training_step_time": 0.4387092590332031
    },
    {
      "epoch": 4.5245361328125e-05,
      "model_forward_time": 0.11483526229858398,
      "step": 7413
    },
    {
      "epoch": 4.5245361328125e-05,
      "step": 7413,
      "training_step_time": 0.9579706192016602
    },
    {
      "epoch": 4.525146484375e-05,
      "model_forward_time": 0.11448311805725098,
      "step": 7414
    },
    {
      "epoch": 4.525146484375e-05,
      "step": 7414,
      "training_step_time": 0.38222193717956543
    },
    {
      "epoch": 4.5257568359375e-05,
      "model_forward_time": 0.11370635032653809,
      "step": 7415
    },
    {
      "epoch": 4.5257568359375e-05,
      "step": 7415,
      "training_step_time": 0.4175848960876465
    },
    {
      "epoch": 4.5263671875e-05,
      "model_forward_time": 0.11461114883422852,
      "step": 7416
    },
    {
      "epoch": 4.5263671875e-05,
      "step": 7416,
      "training_step_time": 0.44946932792663574
    },
    {
      "epoch": 4.5269775390625e-05,
      "model_forward_time": 0.1140449047088623,
      "step": 7417
    },
    {
      "epoch": 4.5269775390625e-05,
      "step": 7417,
      "training_step_time": 0.42592334747314453
    },
    {
      "epoch": 4.527587890625e-05,
      "model_forward_time": 0.1148688793182373,
      "step": 7418
    },
    {
      "epoch": 4.527587890625e-05,
      "step": 7418,
      "training_step_time": 0.440213680267334
    },
    {
      "epoch": 4.5281982421875e-05,
      "model_forward_time": 0.11588311195373535,
      "step": 7419
    },
    {
      "epoch": 4.5281982421875e-05,
      "step": 7419,
      "training_step_time": 0.765061616897583
    },
    {
      "epoch": 4.52880859375e-05,
      "grad_norm": 0.28420746326446533,
      "learning_rate": 9.852366233638144e-05,
      "loss": 0.0725,
      "step": 7420
    },
    {
      "epoch": 4.52880859375e-05,
      "model_forward_time": 0.11470842361450195,
      "step": 7420
    },
    {
      "epoch": 4.52880859375e-05,
      "step": 7420,
      "training_step_time": 0.37925028800964355
    },
    {
      "epoch": 4.5294189453125e-05,
      "model_forward_time": 0.1141519546508789,
      "step": 7421
    },
    {
      "epoch": 4.5294189453125e-05,
      "step": 7421,
      "training_step_time": 0.4018394947052002
    },
    {
      "epoch": 4.530029296875e-05,
      "model_forward_time": 0.11391901969909668,
      "step": 7422
    },
    {
      "epoch": 4.530029296875e-05,
      "step": 7422,
      "training_step_time": 0.4190390110015869
    },
    {
      "epoch": 4.5306396484375e-05,
      "model_forward_time": 0.11378645896911621,
      "step": 7423
    },
    {
      "epoch": 4.5306396484375e-05,
      "step": 7423,
      "training_step_time": 0.4966886043548584
    },
    {
      "epoch": 4.53125e-05,
      "model_forward_time": 0.11484742164611816,
      "step": 7424
    },
    {
      "epoch": 4.53125e-05,
      "step": 7424,
      "training_step_time": 0.4279773235321045
    },
    {
      "epoch": 4.5318603515625e-05,
      "model_forward_time": 0.11461091041564941,
      "step": 7425
    },
    {
      "epoch": 4.5318603515625e-05,
      "step": 7425,
      "training_step_time": 0.47277188301086426
    },
    {
      "epoch": 4.532470703125e-05,
      "model_forward_time": 0.11464428901672363,
      "step": 7426
    },
    {
      "epoch": 4.532470703125e-05,
      "step": 7426,
      "training_step_time": 0.3806650638580322
    },
    {
      "epoch": 4.5330810546875e-05,
      "model_forward_time": 0.11489677429199219,
      "step": 7427
    },
    {
      "epoch": 4.5330810546875e-05,
      "step": 7427,
      "training_step_time": 0.38361358642578125
    },
    {
      "epoch": 4.53369140625e-05,
      "model_forward_time": 0.11443591117858887,
      "step": 7428
    },
    {
      "epoch": 4.53369140625e-05,
      "step": 7428,
      "training_step_time": 0.3903934955596924
    },
    {
      "epoch": 4.5343017578125e-05,
      "model_forward_time": 0.11486601829528809,
      "step": 7429
    },
    {
      "epoch": 4.5343017578125e-05,
      "step": 7429,
      "training_step_time": 0.4323725700378418
    },
    {
      "epoch": 4.534912109375e-05,
      "grad_norm": 0.2454838901758194,
      "learning_rate": 9.851700777588453e-05,
      "loss": 0.0696,
      "step": 7430
    },
    {
      "epoch": 4.534912109375e-05,
      "model_forward_time": 0.11430120468139648,
      "step": 7430
    },
    {
      "epoch": 4.534912109375e-05,
      "step": 7430,
      "training_step_time": 0.4295487403869629
    },
    {
      "epoch": 4.5355224609375e-05,
      "model_forward_time": 0.1146397590637207,
      "step": 7431
    },
    {
      "epoch": 4.5355224609375e-05,
      "step": 7431,
      "training_step_time": 0.8931527137756348
    },
    {
      "epoch": 4.5361328125e-05,
      "model_forward_time": 0.11513137817382812,
      "step": 7432
    },
    {
      "epoch": 4.5361328125e-05,
      "step": 7432,
      "training_step_time": 0.4193747043609619
    },
    {
      "epoch": 4.5367431640625e-05,
      "model_forward_time": 0.11372780799865723,
      "step": 7433
    },
    {
      "epoch": 4.5367431640625e-05,
      "step": 7433,
      "training_step_time": 0.4161221981048584
    },
    {
      "epoch": 4.537353515625e-05,
      "model_forward_time": 0.11391305923461914,
      "step": 7434
    },
    {
      "epoch": 4.537353515625e-05,
      "step": 7434,
      "training_step_time": 0.3875257968902588
    },
    {
      "epoch": 4.5379638671875e-05,
      "model_forward_time": 0.11421847343444824,
      "step": 7435
    },
    {
      "epoch": 4.5379638671875e-05,
      "step": 7435,
      "training_step_time": 0.3970317840576172
    },
    {
      "epoch": 4.53857421875e-05,
      "model_forward_time": 0.1146385669708252,
      "step": 7436
    },
    {
      "epoch": 4.53857421875e-05,
      "step": 7436,
      "training_step_time": 0.4044179916381836
    },
    {
      "epoch": 4.5391845703125e-05,
      "model_forward_time": 0.11480093002319336,
      "step": 7437
    },
    {
      "epoch": 4.5391845703125e-05,
      "step": 7437,
      "training_step_time": 0.49981093406677246
    },
    {
      "epoch": 4.539794921875e-05,
      "model_forward_time": 0.11434292793273926,
      "step": 7438
    },
    {
      "epoch": 4.539794921875e-05,
      "step": 7438,
      "training_step_time": 0.38733792304992676
    },
    {
      "epoch": 4.5404052734375e-05,
      "model_forward_time": 0.11438393592834473,
      "step": 7439
    },
    {
      "epoch": 4.5404052734375e-05,
      "step": 7439,
      "training_step_time": 0.6842272281646729
    },
    {
      "epoch": 4.541015625e-05,
      "grad_norm": 0.1972702294588089,
      "learning_rate": 9.851033847720166e-05,
      "loss": 0.0683,
      "step": 7440
    },
    {
      "epoch": 4.541015625e-05,
      "model_forward_time": 0.11420249938964844,
      "step": 7440
    },
    {
      "epoch": 4.541015625e-05,
      "step": 7440,
      "training_step_time": 0.40363073348999023
    },
    {
      "epoch": 4.5416259765625e-05,
      "model_forward_time": 0.11408543586730957,
      "step": 7441
    },
    {
      "epoch": 4.5416259765625e-05,
      "step": 7441,
      "training_step_time": 0.3886067867279053
    },
    {
      "epoch": 4.542236328125e-05,
      "model_forward_time": 0.11386871337890625,
      "step": 7442
    },
    {
      "epoch": 4.542236328125e-05,
      "step": 7442,
      "training_step_time": 0.45271897315979004
    },
    {
      "epoch": 4.5428466796875e-05,
      "model_forward_time": 0.11440920829772949,
      "step": 7443
    },
    {
      "epoch": 4.5428466796875e-05,
      "step": 7443,
      "training_step_time": 0.6882596015930176
    },
    {
      "epoch": 4.54345703125e-05,
      "model_forward_time": 0.11463665962219238,
      "step": 7444
    },
    {
      "epoch": 4.54345703125e-05,
      "step": 7444,
      "training_step_time": 0.38210153579711914
    },
    {
      "epoch": 4.5440673828125e-05,
      "model_forward_time": 0.11424565315246582,
      "step": 7445
    },
    {
      "epoch": 4.5440673828125e-05,
      "step": 7445,
      "training_step_time": 0.44412899017333984
    },
    {
      "epoch": 4.544677734375e-05,
      "model_forward_time": 0.11450695991516113,
      "step": 7446
    },
    {
      "epoch": 4.544677734375e-05,
      "step": 7446,
      "training_step_time": 0.3975498676300049
    },
    {
      "epoch": 4.5452880859375e-05,
      "model_forward_time": 0.11423349380493164,
      "step": 7447
    },
    {
      "epoch": 4.5452880859375e-05,
      "step": 7447,
      "training_step_time": 0.4432351589202881
    },
    {
      "epoch": 4.5458984375e-05,
      "model_forward_time": 0.11360955238342285,
      "step": 7448
    },
    {
      "epoch": 4.5458984375e-05,
      "step": 7448,
      "training_step_time": 0.43622469902038574
    },
    {
      "epoch": 4.5465087890625e-05,
      "model_forward_time": 0.11486053466796875,
      "step": 7449
    },
    {
      "epoch": 4.5465087890625e-05,
      "step": 7449,
      "training_step_time": 0.5366799831390381
    },
    {
      "epoch": 4.547119140625e-05,
      "grad_norm": 0.2558968663215637,
      "learning_rate": 9.85036544423588e-05,
      "loss": 0.0648,
      "step": 7450
    },
    {
      "epoch": 4.547119140625e-05,
      "model_forward_time": 0.11501455307006836,
      "step": 7450
    },
    {
      "epoch": 4.547119140625e-05,
      "step": 7450,
      "training_step_time": 0.40422534942626953
    },
    {
      "epoch": 4.5477294921875e-05,
      "model_forward_time": 0.1161494255065918,
      "step": 7451
    },
    {
      "epoch": 4.5477294921875e-05,
      "step": 7451,
      "training_step_time": 0.394481897354126
    },
    {
      "epoch": 4.54833984375e-05,
      "model_forward_time": 0.11461663246154785,
      "step": 7452
    },
    {
      "epoch": 4.54833984375e-05,
      "step": 7452,
      "training_step_time": 0.3968489170074463
    },
    {
      "epoch": 4.5489501953125e-05,
      "model_forward_time": 0.11436820030212402,
      "step": 7453
    },
    {
      "epoch": 4.5489501953125e-05,
      "step": 7453,
      "training_step_time": 0.39355015754699707
    },
    {
      "epoch": 4.549560546875e-05,
      "model_forward_time": 0.11616659164428711,
      "step": 7454
    },
    {
      "epoch": 4.549560546875e-05,
      "step": 7454,
      "training_step_time": 0.39384007453918457
    },
    {
      "epoch": 4.5501708984375e-05,
      "model_forward_time": 0.1147758960723877,
      "step": 7455
    },
    {
      "epoch": 4.5501708984375e-05,
      "step": 7455,
      "training_step_time": 0.5882611274719238
    },
    {
      "epoch": 4.55078125e-05,
      "model_forward_time": 0.11463332176208496,
      "step": 7456
    },
    {
      "epoch": 4.55078125e-05,
      "step": 7456,
      "training_step_time": 0.45615386962890625
    },
    {
      "epoch": 4.5513916015625e-05,
      "model_forward_time": 0.11377739906311035,
      "step": 7457
    },
    {
      "epoch": 4.5513916015625e-05,
      "step": 7457,
      "training_step_time": 0.3633420467376709
    },
    {
      "epoch": 4.552001953125e-05,
      "model_forward_time": 0.11413192749023438,
      "step": 7458
    },
    {
      "epoch": 4.552001953125e-05,
      "step": 7458,
      "training_step_time": 0.45543742179870605
    },
    {
      "epoch": 4.5526123046875e-05,
      "model_forward_time": 0.11349725723266602,
      "step": 7459
    },
    {
      "epoch": 4.5526123046875e-05,
      "step": 7459,
      "training_step_time": 0.7261791229248047
    },
    {
      "epoch": 4.55322265625e-05,
      "grad_norm": 0.18826763331890106,
      "learning_rate": 9.849695567338639e-05,
      "loss": 0.0675,
      "step": 7460
    },
    {
      "epoch": 4.55322265625e-05,
      "model_forward_time": 0.1141819953918457,
      "step": 7460
    },
    {
      "epoch": 4.55322265625e-05,
      "step": 7460,
      "training_step_time": 0.46309518814086914
    },
    {
      "epoch": 4.5538330078125e-05,
      "model_forward_time": 0.11407661437988281,
      "step": 7461
    },
    {
      "epoch": 4.5538330078125e-05,
      "step": 7461,
      "training_step_time": 0.5591108798980713
    },
    {
      "epoch": 4.554443359375e-05,
      "model_forward_time": 0.11424398422241211,
      "step": 7462
    },
    {
      "epoch": 4.554443359375e-05,
      "step": 7462,
      "training_step_time": 0.3768038749694824
    },
    {
      "epoch": 4.5550537109375e-05,
      "model_forward_time": 0.11390495300292969,
      "step": 7463
    },
    {
      "epoch": 4.5550537109375e-05,
      "step": 7463,
      "training_step_time": 0.3933281898498535
    },
    {
      "epoch": 4.5556640625e-05,
      "model_forward_time": 0.11411762237548828,
      "step": 7464
    },
    {
      "epoch": 4.5556640625e-05,
      "step": 7464,
      "training_step_time": 0.3913440704345703
    },
    {
      "epoch": 4.5562744140625e-05,
      "model_forward_time": 0.11454248428344727,
      "step": 7465
    },
    {
      "epoch": 4.5562744140625e-05,
      "step": 7465,
      "training_step_time": 0.42014002799987793
    },
    {
      "epoch": 4.556884765625e-05,
      "model_forward_time": 0.11398839950561523,
      "step": 7466
    },
    {
      "epoch": 4.556884765625e-05,
      "step": 7466,
      "training_step_time": 0.3944382667541504
    },
    {
      "epoch": 4.5574951171875e-05,
      "model_forward_time": 0.11503386497497559,
      "step": 7467
    },
    {
      "epoch": 4.5574951171875e-05,
      "step": 7467,
      "training_step_time": 0.43879222869873047
    },
    {
      "epoch": 4.55810546875e-05,
      "model_forward_time": 0.11431741714477539,
      "step": 7468
    },
    {
      "epoch": 4.55810546875e-05,
      "step": 7468,
      "training_step_time": 0.39630722999572754
    },
    {
      "epoch": 4.5587158203125e-05,
      "model_forward_time": 0.11532402038574219,
      "step": 7469
    },
    {
      "epoch": 4.5587158203125e-05,
      "step": 7469,
      "training_step_time": 0.4772350788116455
    },
    {
      "epoch": 4.559326171875e-05,
      "grad_norm": 0.28596213459968567,
      "learning_rate": 9.849024217231935e-05,
      "loss": 0.0695,
      "step": 7470
    },
    {
      "epoch": 4.559326171875e-05,
      "model_forward_time": 0.11503124237060547,
      "step": 7470
    },
    {
      "epoch": 4.559326171875e-05,
      "step": 7470,
      "training_step_time": 0.4119389057159424
    },
    {
      "epoch": 4.5599365234375e-05,
      "model_forward_time": 0.11432957649230957,
      "step": 7471
    },
    {
      "epoch": 4.5599365234375e-05,
      "step": 7471,
      "training_step_time": 0.39635443687438965
    },
    {
      "epoch": 4.560546875e-05,
      "model_forward_time": 0.11502790451049805,
      "step": 7472
    },
    {
      "epoch": 4.560546875e-05,
      "step": 7472,
      "training_step_time": 0.4470639228820801
    },
    {
      "epoch": 4.5611572265625e-05,
      "model_forward_time": 0.11604762077331543,
      "step": 7473
    },
    {
      "epoch": 4.5611572265625e-05,
      "step": 7473,
      "training_step_time": 1.1820602416992188
    },
    {
      "epoch": 4.561767578125e-05,
      "model_forward_time": 0.11358642578125,
      "step": 7474
    },
    {
      "epoch": 4.561767578125e-05,
      "step": 7474,
      "training_step_time": 0.38466835021972656
    },
    {
      "epoch": 4.5623779296875e-05,
      "model_forward_time": 0.1136019229888916,
      "step": 7475
    },
    {
      "epoch": 4.5623779296875e-05,
      "step": 7475,
      "training_step_time": 0.37890124320983887
    },
    {
      "epoch": 4.56298828125e-05,
      "model_forward_time": 0.11386513710021973,
      "step": 7476
    },
    {
      "epoch": 4.56298828125e-05,
      "step": 7476,
      "training_step_time": 0.378894567489624
    },
    {
      "epoch": 4.5635986328125e-05,
      "model_forward_time": 0.11363792419433594,
      "step": 7477
    },
    {
      "epoch": 4.5635986328125e-05,
      "step": 7477,
      "training_step_time": 0.38848042488098145
    },
    {
      "epoch": 4.564208984375e-05,
      "model_forward_time": 0.11461305618286133,
      "step": 7478
    },
    {
      "epoch": 4.564208984375e-05,
      "step": 7478,
      "training_step_time": 0.38493967056274414
    },
    {
      "epoch": 4.5648193359375e-05,
      "model_forward_time": 0.11564874649047852,
      "step": 7479
    },
    {
      "epoch": 4.5648193359375e-05,
      "step": 7479,
      "training_step_time": 0.8603501319885254
    },
    {
      "epoch": 4.5654296875e-05,
      "grad_norm": 0.2658058702945709,
      "learning_rate": 9.848351394119704e-05,
      "loss": 0.0681,
      "step": 7480
    },
    {
      "epoch": 4.5654296875e-05,
      "model_forward_time": 0.11451077461242676,
      "step": 7480
    },
    {
      "epoch": 4.5654296875e-05,
      "step": 7480,
      "training_step_time": 0.5022842884063721
    },
    {
      "epoch": 4.5660400390625e-05,
      "model_forward_time": 0.11385345458984375,
      "step": 7481
    },
    {
      "epoch": 4.5660400390625e-05,
      "step": 7481,
      "training_step_time": 0.5149626731872559
    },
    {
      "epoch": 4.566650390625e-05,
      "model_forward_time": 0.11371874809265137,
      "step": 7482
    },
    {
      "epoch": 4.566650390625e-05,
      "step": 7482,
      "training_step_time": 0.4474937915802002
    },
    {
      "epoch": 4.5672607421875e-05,
      "model_forward_time": 0.1143653392791748,
      "step": 7483
    },
    {
      "epoch": 4.5672607421875e-05,
      "step": 7483,
      "training_step_time": 0.42191338539123535
    },
    {
      "epoch": 4.56787109375e-05,
      "model_forward_time": 0.11438584327697754,
      "step": 7484
    },
    {
      "epoch": 4.56787109375e-05,
      "step": 7484,
      "training_step_time": 0.437732458114624
    },
    {
      "epoch": 4.5684814453125e-05,
      "model_forward_time": 0.11501431465148926,
      "step": 7485
    },
    {
      "epoch": 4.5684814453125e-05,
      "step": 7485,
      "training_step_time": 0.5820350646972656
    },
    {
      "epoch": 4.569091796875e-05,
      "model_forward_time": 0.11441230773925781,
      "step": 7486
    },
    {
      "epoch": 4.569091796875e-05,
      "step": 7486,
      "training_step_time": 0.3890044689178467
    },
    {
      "epoch": 4.5697021484375e-05,
      "model_forward_time": 0.11482715606689453,
      "step": 7487
    },
    {
      "epoch": 4.5697021484375e-05,
      "step": 7487,
      "training_step_time": 0.38451147079467773
    },
    {
      "epoch": 4.5703125e-05,
      "model_forward_time": 0.11473417282104492,
      "step": 7488
    },
    {
      "epoch": 4.5703125e-05,
      "step": 7488,
      "training_step_time": 0.416165828704834
    },
    {
      "epoch": 4.5709228515625e-05,
      "model_forward_time": 0.1162264347076416,
      "step": 7489
    },
    {
      "epoch": 4.5709228515625e-05,
      "step": 7489,
      "training_step_time": 0.46312713623046875
    },
    {
      "epoch": 4.571533203125e-05,
      "grad_norm": 0.2244815230369568,
      "learning_rate": 9.847677098206332e-05,
      "loss": 0.0713,
      "step": 7490
    },
    {
      "epoch": 4.571533203125e-05,
      "model_forward_time": 0.12395548820495605,
      "step": 7490
    },
    {
      "epoch": 4.571533203125e-05,
      "step": 7490,
      "training_step_time": 0.532066822052002
    },
    {
      "epoch": 4.5721435546875e-05,
      "model_forward_time": 0.11617112159729004,
      "step": 7491
    },
    {
      "epoch": 4.5721435546875e-05,
      "step": 7491,
      "training_step_time": 0.7190415859222412
    },
    {
      "epoch": 4.57275390625e-05,
      "model_forward_time": 0.11857795715332031,
      "step": 7492
    },
    {
      "epoch": 4.57275390625e-05,
      "step": 7492,
      "training_step_time": 0.656559944152832
    },
    {
      "epoch": 4.5733642578125e-05,
      "model_forward_time": 0.11955595016479492,
      "step": 7493
    },
    {
      "epoch": 4.5733642578125e-05,
      "step": 7493,
      "training_step_time": 0.7572319507598877
    },
    {
      "epoch": 4.573974609375e-05,
      "model_forward_time": 0.1197969913482666,
      "step": 7494
    },
    {
      "epoch": 4.573974609375e-05,
      "step": 7494,
      "training_step_time": 0.6566481590270996
    },
    {
      "epoch": 4.5745849609375e-05,
      "model_forward_time": 0.11812543869018555,
      "step": 7495
    },
    {
      "epoch": 4.5745849609375e-05,
      "step": 7495,
      "training_step_time": 0.6974411010742188
    },
    {
      "epoch": 4.5751953125e-05,
      "model_forward_time": 0.11552619934082031,
      "step": 7496
    },
    {
      "epoch": 4.5751953125e-05,
      "step": 7496,
      "training_step_time": 0.7539620399475098
    },
    {
      "epoch": 4.5758056640625e-05,
      "model_forward_time": 0.1391909122467041,
      "step": 7497
    },
    {
      "epoch": 4.5758056640625e-05,
      "step": 7497,
      "training_step_time": 0.6255393028259277
    },
    {
      "epoch": 4.576416015625e-05,
      "model_forward_time": 0.12048125267028809,
      "step": 7498
    },
    {
      "epoch": 4.576416015625e-05,
      "step": 7498,
      "training_step_time": 0.6761307716369629
    },
    {
      "epoch": 4.5770263671875e-05,
      "model_forward_time": 0.12464094161987305,
      "step": 7499
    },
    {
      "epoch": 4.5770263671875e-05,
      "step": 7499,
      "training_step_time": 0.6821866035461426
    },
    {
      "epoch": 4.57763671875e-05,
      "grad_norm": 0.25787821412086487,
      "learning_rate": 9.847001329696653e-05,
      "loss": 0.07,
      "step": 7500
    },
    {
      "epoch": 4.57763671875e-05,
      "model_forward_time": 0.12168288230895996,
      "step": 7500
    },
    {
      "epoch": 4.57763671875e-05,
      "step": 7500,
      "training_step_time": 0.6933951377868652
    },
    {
      "epoch": 4.5782470703125e-05,
      "model_forward_time": 0.11750650405883789,
      "step": 7501
    },
    {
      "epoch": 4.5782470703125e-05,
      "step": 7501,
      "training_step_time": 0.6497616767883301
    },
    {
      "epoch": 4.578857421875e-05,
      "model_forward_time": 0.11607670783996582,
      "step": 7502
    },
    {
      "epoch": 4.578857421875e-05,
      "step": 7502,
      "training_step_time": 0.6616675853729248
    },
    {
      "epoch": 4.5794677734375e-05,
      "model_forward_time": 0.11680102348327637,
      "step": 7503
    },
    {
      "epoch": 4.5794677734375e-05,
      "step": 7503,
      "training_step_time": 0.7597198486328125
    },
    {
      "epoch": 4.580078125e-05,
      "model_forward_time": 0.11730670928955078,
      "step": 7504
    },
    {
      "epoch": 4.580078125e-05,
      "step": 7504,
      "training_step_time": 0.7362933158874512
    },
    {
      "epoch": 4.5806884765625e-05,
      "model_forward_time": 0.1235506534576416,
      "step": 7505
    },
    {
      "epoch": 4.5806884765625e-05,
      "step": 7505,
      "training_step_time": 0.7329528331756592
    },
    {
      "epoch": 4.581298828125e-05,
      "model_forward_time": 0.12076044082641602,
      "step": 7506
    },
    {
      "epoch": 4.581298828125e-05,
      "step": 7506,
      "training_step_time": 0.7273385524749756
    },
    {
      "epoch": 4.5819091796875e-05,
      "model_forward_time": 0.11934638023376465,
      "step": 7507
    },
    {
      "epoch": 4.5819091796875e-05,
      "step": 7507,
      "training_step_time": 0.6488194465637207
    },
    {
      "epoch": 4.58251953125e-05,
      "model_forward_time": 0.11654162406921387,
      "step": 7508
    },
    {
      "epoch": 4.58251953125e-05,
      "step": 7508,
      "training_step_time": 0.6413931846618652
    },
    {
      "epoch": 4.5831298828125e-05,
      "model_forward_time": 0.11710476875305176,
      "step": 7509
    },
    {
      "epoch": 4.5831298828125e-05,
      "step": 7509,
      "training_step_time": 0.6336266994476318
    },
    {
      "epoch": 4.583740234375e-05,
      "grad_norm": 0.23304009437561035,
      "learning_rate": 9.846324088795948e-05,
      "loss": 0.0741,
      "step": 7510
    },
    {
      "epoch": 4.583740234375e-05,
      "model_forward_time": 0.11605358123779297,
      "step": 7510
    },
    {
      "epoch": 4.583740234375e-05,
      "step": 7510,
      "training_step_time": 0.7082521915435791
    },
    {
      "epoch": 4.5843505859375e-05,
      "model_forward_time": 0.11640071868896484,
      "step": 7511
    },
    {
      "epoch": 4.5843505859375e-05,
      "step": 7511,
      "training_step_time": 0.743574857711792
    },
    {
      "epoch": 4.5849609375e-05,
      "model_forward_time": 0.11623811721801758,
      "step": 7512
    },
    {
      "epoch": 4.5849609375e-05,
      "step": 7512,
      "training_step_time": 0.790325403213501
    },
    {
      "epoch": 4.5855712890625e-05,
      "model_forward_time": 0.11666059494018555,
      "step": 7513
    },
    {
      "epoch": 4.5855712890625e-05,
      "step": 7513,
      "training_step_time": 0.8084938526153564
    },
    {
      "epoch": 4.586181640625e-05,
      "model_forward_time": 0.11969637870788574,
      "step": 7514
    },
    {
      "epoch": 4.586181640625e-05,
      "step": 7514,
      "training_step_time": 0.745079517364502
    },
    {
      "epoch": 4.5867919921875e-05,
      "model_forward_time": 0.11679673194885254,
      "step": 7515
    },
    {
      "epoch": 4.5867919921875e-05,
      "step": 7515,
      "training_step_time": 0.7541608810424805
    },
    {
      "epoch": 4.58740234375e-05,
      "model_forward_time": 0.11752510070800781,
      "step": 7516
    },
    {
      "epoch": 4.58740234375e-05,
      "step": 7516,
      "training_step_time": 0.649895429611206
    },
    {
      "epoch": 4.5880126953125e-05,
      "model_forward_time": 0.11844968795776367,
      "step": 7517
    },
    {
      "epoch": 4.5880126953125e-05,
      "step": 7517,
      "training_step_time": 0.661064624786377
    },
    {
      "epoch": 4.588623046875e-05,
      "model_forward_time": 0.11896944046020508,
      "step": 7518
    },
    {
      "epoch": 4.588623046875e-05,
      "step": 7518,
      "training_step_time": 0.6930394172668457
    },
    {
      "epoch": 4.5892333984375e-05,
      "model_forward_time": 0.11675453186035156,
      "step": 7519
    },
    {
      "epoch": 4.5892333984375e-05,
      "step": 7519,
      "training_step_time": 0.6989562511444092
    },
    {
      "epoch": 4.58984375e-05,
      "grad_norm": 0.16819658875465393,
      "learning_rate": 9.845645375709945e-05,
      "loss": 0.0845,
      "step": 7520
    },
    {
      "epoch": 4.58984375e-05,
      "model_forward_time": 0.11996841430664062,
      "step": 7520
    },
    {
      "epoch": 4.58984375e-05,
      "step": 7520,
      "training_step_time": 0.6844868659973145
    },
    {
      "epoch": 4.5904541015625e-05,
      "model_forward_time": 0.11627697944641113,
      "step": 7521
    },
    {
      "epoch": 4.5904541015625e-05,
      "step": 7521,
      "training_step_time": 0.694183349609375
    },
    {
      "epoch": 4.591064453125e-05,
      "model_forward_time": 0.11674857139587402,
      "step": 7522
    },
    {
      "epoch": 4.591064453125e-05,
      "step": 7522,
      "training_step_time": 0.54364013671875
    },
    {
      "epoch": 4.5916748046875e-05,
      "model_forward_time": 0.12000298500061035,
      "step": 7523
    },
    {
      "epoch": 4.5916748046875e-05,
      "step": 7523,
      "training_step_time": 0.7035121917724609
    },
    {
      "epoch": 4.59228515625e-05,
      "model_forward_time": 0.11854124069213867,
      "step": 7524
    },
    {
      "epoch": 4.59228515625e-05,
      "step": 7524,
      "training_step_time": 0.7165164947509766
    },
    {
      "epoch": 4.5928955078125e-05,
      "model_forward_time": 0.11786842346191406,
      "step": 7525
    },
    {
      "epoch": 4.5928955078125e-05,
      "step": 7525,
      "training_step_time": 0.7536506652832031
    },
    {
      "epoch": 4.593505859375e-05,
      "model_forward_time": 0.11607503890991211,
      "step": 7526
    },
    {
      "epoch": 4.593505859375e-05,
      "step": 7526,
      "training_step_time": 0.6304488182067871
    },
    {
      "epoch": 4.5941162109375e-05,
      "model_forward_time": 0.11936306953430176,
      "step": 7527
    },
    {
      "epoch": 4.5941162109375e-05,
      "step": 7527,
      "training_step_time": 0.682952880859375
    },
    {
      "epoch": 4.5947265625e-05,
      "model_forward_time": 0.11851692199707031,
      "step": 7528
    },
    {
      "epoch": 4.5947265625e-05,
      "step": 7528,
      "training_step_time": 0.6539757251739502
    },
    {
      "epoch": 4.5953369140625e-05,
      "model_forward_time": 0.1170346736907959,
      "step": 7529
    },
    {
      "epoch": 4.5953369140625e-05,
      "step": 7529,
      "training_step_time": 0.6879720687866211
    },
    {
      "epoch": 4.595947265625e-05,
      "grad_norm": 0.2848677635192871,
      "learning_rate": 9.844965190644817e-05,
      "loss": 0.0823,
      "step": 7530
    },
    {
      "epoch": 4.595947265625e-05,
      "model_forward_time": 0.12228274345397949,
      "step": 7530
    },
    {
      "epoch": 4.595947265625e-05,
      "step": 7530,
      "training_step_time": 0.707627534866333
    },
    {
      "epoch": 4.5965576171875e-05,
      "model_forward_time": 0.11480474472045898,
      "step": 7531
    },
    {
      "epoch": 4.5965576171875e-05,
      "step": 7531,
      "training_step_time": 0.6888935565948486
    },
    {
      "epoch": 4.59716796875e-05,
      "model_forward_time": 0.11868548393249512,
      "step": 7532
    },
    {
      "epoch": 4.59716796875e-05,
      "step": 7532,
      "training_step_time": 0.7271380424499512
    },
    {
      "epoch": 4.5977783203125e-05,
      "model_forward_time": 0.12133312225341797,
      "step": 7533
    },
    {
      "epoch": 4.5977783203125e-05,
      "step": 7533,
      "training_step_time": 0.6901342868804932
    },
    {
      "epoch": 4.598388671875e-05,
      "model_forward_time": 0.11841535568237305,
      "step": 7534
    },
    {
      "epoch": 4.598388671875e-05,
      "step": 7534,
      "training_step_time": 0.6692764759063721
    },
    {
      "epoch": 4.5989990234375e-05,
      "model_forward_time": 0.11884188652038574,
      "step": 7535
    },
    {
      "epoch": 4.5989990234375e-05,
      "step": 7535,
      "training_step_time": 0.710608959197998
    },
    {
      "epoch": 4.599609375e-05,
      "model_forward_time": 0.12288856506347656,
      "step": 7536
    },
    {
      "epoch": 4.599609375e-05,
      "step": 7536,
      "training_step_time": 0.6778542995452881
    },
    {
      "epoch": 4.6002197265625e-05,
      "model_forward_time": 0.12685060501098633,
      "step": 7537
    },
    {
      "epoch": 4.6002197265625e-05,
      "step": 7537,
      "training_step_time": 0.6458685398101807
    },
    {
      "epoch": 4.600830078125e-05,
      "model_forward_time": 0.12315773963928223,
      "step": 7538
    },
    {
      "epoch": 4.600830078125e-05,
      "step": 7538,
      "training_step_time": 0.6660735607147217
    },
    {
      "epoch": 4.6014404296875e-05,
      "model_forward_time": 0.11692380905151367,
      "step": 7539
    },
    {
      "epoch": 4.6014404296875e-05,
      "step": 7539,
      "training_step_time": 0.6491155624389648
    },
    {
      "epoch": 4.60205078125e-05,
      "grad_norm": 0.3227285146713257,
      "learning_rate": 9.84428353380719e-05,
      "loss": 0.0755,
      "step": 7540
    },
    {
      "epoch": 4.60205078125e-05,
      "model_forward_time": 0.11968302726745605,
      "step": 7540
    },
    {
      "epoch": 4.60205078125e-05,
      "step": 7540,
      "training_step_time": 0.6316828727722168
    },
    {
      "epoch": 4.6026611328125e-05,
      "model_forward_time": 0.11974000930786133,
      "step": 7541
    },
    {
      "epoch": 4.6026611328125e-05,
      "step": 7541,
      "training_step_time": 0.7288868427276611
    },
    {
      "epoch": 4.603271484375e-05,
      "model_forward_time": 0.11919307708740234,
      "step": 7542
    },
    {
      "epoch": 4.603271484375e-05,
      "step": 7542,
      "training_step_time": 0.6130728721618652
    },
    {
      "epoch": 4.6038818359375e-05,
      "model_forward_time": 0.11763358116149902,
      "step": 7543
    },
    {
      "epoch": 4.6038818359375e-05,
      "step": 7543,
      "training_step_time": 0.7574725151062012
    },
    {
      "epoch": 4.6044921875e-05,
      "model_forward_time": 0.11796903610229492,
      "step": 7544
    },
    {
      "epoch": 4.6044921875e-05,
      "step": 7544,
      "training_step_time": 0.6216738224029541
    },
    {
      "epoch": 4.6051025390625e-05,
      "model_forward_time": 0.12368059158325195,
      "step": 7545
    },
    {
      "epoch": 4.6051025390625e-05,
      "step": 7545,
      "training_step_time": 0.6768167018890381
    },
    {
      "epoch": 4.605712890625e-05,
      "model_forward_time": 0.11673951148986816,
      "step": 7546
    },
    {
      "epoch": 4.605712890625e-05,
      "step": 7546,
      "training_step_time": 0.6855032444000244
    },
    {
      "epoch": 4.6063232421875e-05,
      "model_forward_time": 0.12356042861938477,
      "step": 7547
    },
    {
      "epoch": 4.6063232421875e-05,
      "step": 7547,
      "training_step_time": 0.6749787330627441
    },
    {
      "epoch": 4.60693359375e-05,
      "model_forward_time": 0.11908984184265137,
      "step": 7548
    },
    {
      "epoch": 4.60693359375e-05,
      "step": 7548,
      "training_step_time": 0.6832542419433594
    },
    {
      "epoch": 4.6075439453125e-05,
      "model_forward_time": 0.12533211708068848,
      "step": 7549
    },
    {
      "epoch": 4.6075439453125e-05,
      "step": 7549,
      "training_step_time": 0.695533275604248
    },
    {
      "epoch": 4.608154296875e-05,
      "grad_norm": 0.2509036660194397,
      "learning_rate": 9.843600405404131e-05,
      "loss": 0.0792,
      "step": 7550
    },
    {
      "epoch": 4.608154296875e-05,
      "model_forward_time": 0.11869406700134277,
      "step": 7550
    },
    {
      "epoch": 4.608154296875e-05,
      "step": 7550,
      "training_step_time": 0.688838005065918
    },
    {
      "epoch": 4.6087646484375e-05,
      "model_forward_time": 0.12101197242736816,
      "step": 7551
    },
    {
      "epoch": 4.6087646484375e-05,
      "step": 7551,
      "training_step_time": 0.6306064128875732
    },
    {
      "epoch": 4.609375e-05,
      "model_forward_time": 0.1224825382232666,
      "step": 7552
    },
    {
      "epoch": 4.609375e-05,
      "step": 7552,
      "training_step_time": 0.76035475730896
    },
    {
      "epoch": 4.6099853515625e-05,
      "model_forward_time": 0.12281012535095215,
      "step": 7553
    },
    {
      "epoch": 4.6099853515625e-05,
      "step": 7553,
      "training_step_time": 0.651296854019165
    },
    {
      "epoch": 4.610595703125e-05,
      "model_forward_time": 0.12238335609436035,
      "step": 7554
    },
    {
      "epoch": 4.610595703125e-05,
      "step": 7554,
      "training_step_time": 0.5934774875640869
    },
    {
      "epoch": 4.6112060546875e-05,
      "model_forward_time": 0.11971426010131836,
      "step": 7555
    },
    {
      "epoch": 4.6112060546875e-05,
      "step": 7555,
      "training_step_time": 0.582115650177002
    },
    {
      "epoch": 4.61181640625e-05,
      "model_forward_time": 0.11769652366638184,
      "step": 7556
    },
    {
      "epoch": 4.61181640625e-05,
      "step": 7556,
      "training_step_time": 0.6010966300964355
    },
    {
      "epoch": 4.6124267578125e-05,
      "model_forward_time": 0.11849713325500488,
      "step": 7557
    },
    {
      "epoch": 4.6124267578125e-05,
      "step": 7557,
      "training_step_time": 0.5591318607330322
    },
    {
      "epoch": 4.613037109375e-05,
      "model_forward_time": 0.12372827529907227,
      "step": 7558
    },
    {
      "epoch": 4.613037109375e-05,
      "step": 7558,
      "training_step_time": 0.583383321762085
    },
    {
      "epoch": 4.6136474609375e-05,
      "model_forward_time": 0.11902952194213867,
      "step": 7559
    },
    {
      "epoch": 4.6136474609375e-05,
      "step": 7559,
      "training_step_time": 0.5349020957946777
    },
    {
      "epoch": 4.6142578125e-05,
      "grad_norm": 0.34053775668144226,
      "learning_rate": 9.842915805643155e-05,
      "loss": 0.0766,
      "step": 7560
    },
    {
      "epoch": 4.6142578125e-05,
      "model_forward_time": 0.11776185035705566,
      "step": 7560
    },
    {
      "epoch": 4.6142578125e-05,
      "step": 7560,
      "training_step_time": 0.6541252136230469
    },
    {
      "epoch": 4.6148681640625e-05,
      "model_forward_time": 0.11777901649475098,
      "step": 7561
    },
    {
      "epoch": 4.6148681640625e-05,
      "step": 7561,
      "training_step_time": 0.5552992820739746
    },
    {
      "epoch": 4.615478515625e-05,
      "model_forward_time": 0.1194159984588623,
      "step": 7562
    },
    {
      "epoch": 4.615478515625e-05,
      "step": 7562,
      "training_step_time": 0.49825215339660645
    },
    {
      "epoch": 4.6160888671875e-05,
      "model_forward_time": 0.11683130264282227,
      "step": 7563
    },
    {
      "epoch": 4.6160888671875e-05,
      "step": 7563,
      "training_step_time": 0.48296308517456055
    },
    {
      "epoch": 4.61669921875e-05,
      "model_forward_time": 0.11642003059387207,
      "step": 7564
    },
    {
      "epoch": 4.61669921875e-05,
      "step": 7564,
      "training_step_time": 0.4672505855560303
    },
    {
      "epoch": 4.6173095703125e-05,
      "model_forward_time": 0.11552119255065918,
      "step": 7565
    },
    {
      "epoch": 4.6173095703125e-05,
      "step": 7565,
      "training_step_time": 0.3899984359741211
    },
    {
      "epoch": 4.617919921875e-05,
      "model_forward_time": 0.11539936065673828,
      "step": 7566
    },
    {
      "epoch": 4.617919921875e-05,
      "step": 7566,
      "training_step_time": 0.4036703109741211
    },
    {
      "epoch": 4.6185302734375e-05,
      "model_forward_time": 0.11536765098571777,
      "step": 7567
    },
    {
      "epoch": 4.6185302734375e-05,
      "step": 7567,
      "training_step_time": 0.4231410026550293
    },
    {
      "epoch": 4.619140625e-05,
      "model_forward_time": 0.11586785316467285,
      "step": 7568
    },
    {
      "epoch": 4.619140625e-05,
      "step": 7568,
      "training_step_time": 0.44458985328674316
    },
    {
      "epoch": 4.6197509765625e-05,
      "model_forward_time": 0.11494016647338867,
      "step": 7569
    },
    {
      "epoch": 4.6197509765625e-05,
      "step": 7569,
      "training_step_time": 0.3866569995880127
    },
    {
      "epoch": 4.620361328125e-05,
      "grad_norm": 0.31340715289115906,
      "learning_rate": 9.842229734732229e-05,
      "loss": 0.0771,
      "step": 7570
    },
    {
      "epoch": 4.620361328125e-05,
      "model_forward_time": 0.12041354179382324,
      "step": 7570
    },
    {
      "epoch": 4.620361328125e-05,
      "step": 7570,
      "training_step_time": 0.39965105056762695
    },
    {
      "epoch": 4.6209716796875e-05,
      "model_forward_time": 0.11526203155517578,
      "step": 7571
    },
    {
      "epoch": 4.6209716796875e-05,
      "step": 7571,
      "training_step_time": 0.3787853717803955
    },
    {
      "epoch": 4.62158203125e-05,
      "model_forward_time": 0.11569809913635254,
      "step": 7572
    },
    {
      "epoch": 4.62158203125e-05,
      "step": 7572,
      "training_step_time": 0.39109230041503906
    },
    {
      "epoch": 4.6221923828125e-05,
      "model_forward_time": 0.11507654190063477,
      "step": 7573
    },
    {
      "epoch": 4.6221923828125e-05,
      "step": 7573,
      "training_step_time": 0.4438619613647461
    },
    {
      "epoch": 4.622802734375e-05,
      "model_forward_time": 0.11541938781738281,
      "step": 7574
    },
    {
      "epoch": 4.622802734375e-05,
      "step": 7574,
      "training_step_time": 0.42476558685302734
    },
    {
      "epoch": 4.6234130859375e-05,
      "model_forward_time": 0.1144411563873291,
      "step": 7575
    },
    {
      "epoch": 4.6234130859375e-05,
      "step": 7575,
      "training_step_time": 0.4137558937072754
    },
    {
      "epoch": 4.6240234375e-05,
      "model_forward_time": 0.11512565612792969,
      "step": 7576
    },
    {
      "epoch": 4.6240234375e-05,
      "step": 7576,
      "training_step_time": 0.39534974098205566
    },
    {
      "epoch": 4.6246337890625e-05,
      "model_forward_time": 0.11598634719848633,
      "step": 7577
    },
    {
      "epoch": 4.6246337890625e-05,
      "step": 7577,
      "training_step_time": 0.4994473457336426
    },
    {
      "epoch": 4.625244140625e-05,
      "model_forward_time": 0.11539840698242188,
      "step": 7578
    },
    {
      "epoch": 4.625244140625e-05,
      "step": 7578,
      "training_step_time": 0.4789559841156006
    },
    {
      "epoch": 4.6258544921875e-05,
      "model_forward_time": 0.11472392082214355,
      "step": 7579
    },
    {
      "epoch": 4.6258544921875e-05,
      "step": 7579,
      "training_step_time": 0.5001583099365234
    },
    {
      "epoch": 4.62646484375e-05,
      "grad_norm": 0.295278400182724,
      "learning_rate": 9.841542192879762e-05,
      "loss": 0.0839,
      "step": 7580
    },
    {
      "epoch": 4.62646484375e-05,
      "model_forward_time": 0.1145792007446289,
      "step": 7580
    },
    {
      "epoch": 4.62646484375e-05,
      "step": 7580,
      "training_step_time": 0.44870758056640625
    },
    {
      "epoch": 4.6270751953125e-05,
      "model_forward_time": 0.1151731014251709,
      "step": 7581
    },
    {
      "epoch": 4.6270751953125e-05,
      "step": 7581,
      "training_step_time": 0.41687822341918945
    },
    {
      "epoch": 4.627685546875e-05,
      "model_forward_time": 0.11491990089416504,
      "step": 7582
    },
    {
      "epoch": 4.627685546875e-05,
      "step": 7582,
      "training_step_time": 0.4122505187988281
    },
    {
      "epoch": 4.6282958984375e-05,
      "model_forward_time": 0.11443018913269043,
      "step": 7583
    },
    {
      "epoch": 4.6282958984375e-05,
      "step": 7583,
      "training_step_time": 0.38686609268188477
    },
    {
      "epoch": 4.62890625e-05,
      "model_forward_time": 0.11531472206115723,
      "step": 7584
    },
    {
      "epoch": 4.62890625e-05,
      "step": 7584,
      "training_step_time": 0.37343716621398926
    },
    {
      "epoch": 4.6295166015625e-05,
      "model_forward_time": 0.11628007888793945,
      "step": 7585
    },
    {
      "epoch": 4.6295166015625e-05,
      "step": 7585,
      "training_step_time": 0.39426326751708984
    },
    {
      "epoch": 4.630126953125e-05,
      "model_forward_time": 0.11472272872924805,
      "step": 7586
    },
    {
      "epoch": 4.630126953125e-05,
      "step": 7586,
      "training_step_time": 0.3959171772003174
    },
    {
      "epoch": 4.6307373046875e-05,
      "model_forward_time": 0.1154623031616211,
      "step": 7587
    },
    {
      "epoch": 4.6307373046875e-05,
      "step": 7587,
      "training_step_time": 0.39322924613952637
    },
    {
      "epoch": 4.63134765625e-05,
      "model_forward_time": 0.11583185195922852,
      "step": 7588
    },
    {
      "epoch": 4.63134765625e-05,
      "step": 7588,
      "training_step_time": 0.45177388191223145
    },
    {
      "epoch": 4.6319580078125e-05,
      "model_forward_time": 0.11526370048522949,
      "step": 7589
    },
    {
      "epoch": 4.6319580078125e-05,
      "step": 7589,
      "training_step_time": 0.38829731941223145
    },
    {
      "epoch": 4.632568359375e-05,
      "grad_norm": 0.2923046946525574,
      "learning_rate": 9.840853180294608e-05,
      "loss": 0.0787,
      "step": 7590
    },
    {
      "epoch": 4.632568359375e-05,
      "model_forward_time": 0.1153717041015625,
      "step": 7590
    },
    {
      "epoch": 4.632568359375e-05,
      "step": 7590,
      "training_step_time": 0.4246180057525635
    },
    {
      "epoch": 4.6331787109375e-05,
      "model_forward_time": 0.11545085906982422,
      "step": 7591
    },
    {
      "epoch": 4.6331787109375e-05,
      "step": 7591,
      "training_step_time": 0.4188542366027832
    },
    {
      "epoch": 4.6337890625e-05,
      "model_forward_time": 0.11428999900817871,
      "step": 7592
    },
    {
      "epoch": 4.6337890625e-05,
      "step": 7592,
      "training_step_time": 0.47475767135620117
    },
    {
      "epoch": 4.6343994140625e-05,
      "model_forward_time": 0.11457943916320801,
      "step": 7593
    },
    {
      "epoch": 4.6343994140625e-05,
      "step": 7593,
      "training_step_time": 0.4833869934082031
    },
    {
      "epoch": 4.635009765625e-05,
      "model_forward_time": 0.11512947082519531,
      "step": 7594
    },
    {
      "epoch": 4.635009765625e-05,
      "step": 7594,
      "training_step_time": 0.4012317657470703
    },
    {
      "epoch": 4.6356201171875e-05,
      "model_forward_time": 0.11451268196105957,
      "step": 7595
    },
    {
      "epoch": 4.6356201171875e-05,
      "step": 7595,
      "training_step_time": 0.42171812057495117
    },
    {
      "epoch": 4.63623046875e-05,
      "model_forward_time": 0.1147451400756836,
      "step": 7596
    },
    {
      "epoch": 4.63623046875e-05,
      "step": 7596,
      "training_step_time": 0.3900635242462158
    },
    {
      "epoch": 4.6368408203125e-05,
      "model_forward_time": 0.11446809768676758,
      "step": 7597
    },
    {
      "epoch": 4.6368408203125e-05,
      "step": 7597,
      "training_step_time": 0.3989298343658447
    },
    {
      "epoch": 4.637451171875e-05,
      "model_forward_time": 0.1154170036315918,
      "step": 7598
    },
    {
      "epoch": 4.637451171875e-05,
      "step": 7598,
      "training_step_time": 0.39362001419067383
    },
    {
      "epoch": 4.6380615234375e-05,
      "model_forward_time": 0.11460518836975098,
      "step": 7599
    },
    {
      "epoch": 4.6380615234375e-05,
      "step": 7599,
      "training_step_time": 0.3860800266265869
    },
    {
      "epoch": 4.638671875e-05,
      "grad_norm": 0.29881513118743896,
      "learning_rate": 9.840162697186075e-05,
      "loss": 0.0816,
      "step": 7600
    },
    {
      "epoch": 4.638671875e-05,
      "model_forward_time": 0.11653947830200195,
      "step": 7600
    },
    {
      "epoch": 4.638671875e-05,
      "step": 7600,
      "training_step_time": 0.3910939693450928
    },
    {
      "epoch": 4.6392822265625e-05,
      "model_forward_time": 0.11561012268066406,
      "step": 7601
    },
    {
      "epoch": 4.6392822265625e-05,
      "step": 7601,
      "training_step_time": 0.39231252670288086
    },
    {
      "epoch": 4.639892578125e-05,
      "model_forward_time": 0.11426949501037598,
      "step": 7602
    },
    {
      "epoch": 4.639892578125e-05,
      "step": 7602,
      "training_step_time": 0.38388586044311523
    },
    {
      "epoch": 4.6405029296875e-05,
      "model_forward_time": 0.11553406715393066,
      "step": 7603
    },
    {
      "epoch": 4.6405029296875e-05,
      "step": 7603,
      "training_step_time": 0.42765378952026367
    },
    {
      "epoch": 4.64111328125e-05,
      "model_forward_time": 0.11479830741882324,
      "step": 7604
    },
    {
      "epoch": 4.64111328125e-05,
      "step": 7604,
      "training_step_time": 0.4295508861541748
    },
    {
      "epoch": 4.6417236328125e-05,
      "model_forward_time": 0.11493420600891113,
      "step": 7605
    },
    {
      "epoch": 4.6417236328125e-05,
      "step": 7605,
      "training_step_time": 0.4116957187652588
    },
    {
      "epoch": 4.642333984375e-05,
      "model_forward_time": 0.1160881519317627,
      "step": 7606
    },
    {
      "epoch": 4.642333984375e-05,
      "step": 7606,
      "training_step_time": 0.369875431060791
    },
    {
      "epoch": 4.6429443359375e-05,
      "model_forward_time": 0.11573290824890137,
      "step": 7607
    },
    {
      "epoch": 4.6429443359375e-05,
      "step": 7607,
      "training_step_time": 0.4507451057434082
    },
    {
      "epoch": 4.6435546875e-05,
      "model_forward_time": 0.11532878875732422,
      "step": 7608
    },
    {
      "epoch": 4.6435546875e-05,
      "step": 7608,
      "training_step_time": 0.518071174621582
    },
    {
      "epoch": 4.6441650390625e-05,
      "model_forward_time": 0.11535835266113281,
      "step": 7609
    },
    {
      "epoch": 4.6441650390625e-05,
      "step": 7609,
      "training_step_time": 0.3931751251220703
    },
    {
      "epoch": 4.644775390625e-05,
      "grad_norm": 0.22245343029499054,
      "learning_rate": 9.83947074376391e-05,
      "loss": 0.0713,
      "step": 7610
    },
    {
      "epoch": 4.644775390625e-05,
      "model_forward_time": 0.1149754524230957,
      "step": 7610
    },
    {
      "epoch": 4.644775390625e-05,
      "step": 7610,
      "training_step_time": 0.40041637420654297
    },
    {
      "epoch": 4.6453857421875e-05,
      "model_forward_time": 0.11544060707092285,
      "step": 7611
    },
    {
      "epoch": 4.6453857421875e-05,
      "step": 7611,
      "training_step_time": 0.6692121028900146
    },
    {
      "epoch": 4.64599609375e-05,
      "model_forward_time": 0.1147305965423584,
      "step": 7612
    },
    {
      "epoch": 4.64599609375e-05,
      "step": 7612,
      "training_step_time": 0.4064452648162842
    },
    {
      "epoch": 4.6466064453125e-05,
      "model_forward_time": 0.1145014762878418,
      "step": 7613
    },
    {
      "epoch": 4.6466064453125e-05,
      "step": 7613,
      "training_step_time": 0.378978967666626
    },
    {
      "epoch": 4.647216796875e-05,
      "model_forward_time": 0.11473608016967773,
      "step": 7614
    },
    {
      "epoch": 4.647216796875e-05,
      "step": 7614,
      "training_step_time": 0.37378525733947754
    },
    {
      "epoch": 4.6478271484375e-05,
      "model_forward_time": 0.11502647399902344,
      "step": 7615
    },
    {
      "epoch": 4.6478271484375e-05,
      "step": 7615,
      "training_step_time": 0.3873732089996338
    },
    {
      "epoch": 4.6484375e-05,
      "model_forward_time": 0.11429524421691895,
      "step": 7616
    },
    {
      "epoch": 4.6484375e-05,
      "step": 7616,
      "training_step_time": 0.4005401134490967
    },
    {
      "epoch": 4.6490478515625e-05,
      "model_forward_time": 0.11471128463745117,
      "step": 7617
    },
    {
      "epoch": 4.6490478515625e-05,
      "step": 7617,
      "training_step_time": 0.9727299213409424
    },
    {
      "epoch": 4.649658203125e-05,
      "model_forward_time": 0.11440277099609375,
      "step": 7618
    },
    {
      "epoch": 4.649658203125e-05,
      "step": 7618,
      "training_step_time": 0.38591432571411133
    },
    {
      "epoch": 4.6502685546875e-05,
      "model_forward_time": 0.11419272422790527,
      "step": 7619
    },
    {
      "epoch": 4.6502685546875e-05,
      "step": 7619,
      "training_step_time": 0.46854472160339355
    },
    {
      "epoch": 4.65087890625e-05,
      "grad_norm": 0.19296959042549133,
      "learning_rate": 9.838777320238312e-05,
      "loss": 0.07,
      "step": 7620
    },
    {
      "epoch": 4.65087890625e-05,
      "model_forward_time": 0.11511349678039551,
      "step": 7620
    },
    {
      "epoch": 4.65087890625e-05,
      "step": 7620,
      "training_step_time": 0.4523649215698242
    },
    {
      "epoch": 4.6514892578125e-05,
      "model_forward_time": 0.11371827125549316,
      "step": 7621
    },
    {
      "epoch": 4.6514892578125e-05,
      "step": 7621,
      "training_step_time": 0.47583651542663574
    },
    {
      "epoch": 4.652099609375e-05,
      "model_forward_time": 0.11396574974060059,
      "step": 7622
    },
    {
      "epoch": 4.652099609375e-05,
      "step": 7622,
      "training_step_time": 0.40289926528930664
    },
    {
      "epoch": 4.6527099609375e-05,
      "model_forward_time": 0.11462879180908203,
      "step": 7623
    },
    {
      "epoch": 4.6527099609375e-05,
      "step": 7623,
      "training_step_time": 0.3971226215362549
    },
    {
      "epoch": 4.6533203125e-05,
      "model_forward_time": 0.1138007640838623,
      "step": 7624
    },
    {
      "epoch": 4.6533203125e-05,
      "step": 7624,
      "training_step_time": 0.3764607906341553
    },
    {
      "epoch": 4.6539306640625e-05,
      "model_forward_time": 0.11395049095153809,
      "step": 7625
    },
    {
      "epoch": 4.6539306640625e-05,
      "step": 7625,
      "training_step_time": 0.3851206302642822
    },
    {
      "epoch": 4.654541015625e-05,
      "model_forward_time": 0.11470508575439453,
      "step": 7626
    },
    {
      "epoch": 4.654541015625e-05,
      "step": 7626,
      "training_step_time": 0.39043641090393066
    },
    {
      "epoch": 4.6551513671875e-05,
      "model_forward_time": 0.11533379554748535,
      "step": 7627
    },
    {
      "epoch": 4.6551513671875e-05,
      "step": 7627,
      "training_step_time": 0.3933277130126953
    },
    {
      "epoch": 4.65576171875e-05,
      "model_forward_time": 0.11480212211608887,
      "step": 7628
    },
    {
      "epoch": 4.65576171875e-05,
      "step": 7628,
      "training_step_time": 0.3980903625488281
    },
    {
      "epoch": 4.6563720703125e-05,
      "model_forward_time": 0.11590814590454102,
      "step": 7629
    },
    {
      "epoch": 4.6563720703125e-05,
      "step": 7629,
      "training_step_time": 0.4692037105560303
    },
    {
      "epoch": 4.656982421875e-05,
      "grad_norm": 0.23182497918605804,
      "learning_rate": 9.838082426819926e-05,
      "loss": 0.0768,
      "step": 7630
    },
    {
      "epoch": 4.656982421875e-05,
      "model_forward_time": 0.1150963306427002,
      "step": 7630
    },
    {
      "epoch": 4.656982421875e-05,
      "step": 7630,
      "training_step_time": 0.42488813400268555
    },
    {
      "epoch": 4.6575927734375e-05,
      "model_forward_time": 0.11422610282897949,
      "step": 7631
    },
    {
      "epoch": 4.6575927734375e-05,
      "step": 7631,
      "training_step_time": 0.42036867141723633
    },
    {
      "epoch": 4.658203125e-05,
      "model_forward_time": 0.11489629745483398,
      "step": 7632
    },
    {
      "epoch": 4.658203125e-05,
      "step": 7632,
      "training_step_time": 0.45818233489990234
    },
    {
      "epoch": 4.6588134765625e-05,
      "model_forward_time": 0.11577796936035156,
      "step": 7633
    },
    {
      "epoch": 4.6588134765625e-05,
      "step": 7633,
      "training_step_time": 0.3990488052368164
    },
    {
      "epoch": 4.659423828125e-05,
      "model_forward_time": 0.11561083793640137,
      "step": 7634
    },
    {
      "epoch": 4.659423828125e-05,
      "step": 7634,
      "training_step_time": 0.4047207832336426
    },
    {
      "epoch": 4.6600341796875e-05,
      "model_forward_time": 0.11440086364746094,
      "step": 7635
    },
    {
      "epoch": 4.6600341796875e-05,
      "step": 7635,
      "training_step_time": 0.44235754013061523
    },
    {
      "epoch": 4.66064453125e-05,
      "model_forward_time": 0.11478376388549805,
      "step": 7636
    },
    {
      "epoch": 4.66064453125e-05,
      "step": 7636,
      "training_step_time": 0.48356175422668457
    },
    {
      "epoch": 4.6612548828125e-05,
      "model_forward_time": 0.11566781997680664,
      "step": 7637
    },
    {
      "epoch": 4.6612548828125e-05,
      "step": 7637,
      "training_step_time": 0.4926741123199463
    },
    {
      "epoch": 4.661865234375e-05,
      "model_forward_time": 0.11469435691833496,
      "step": 7638
    },
    {
      "epoch": 4.661865234375e-05,
      "step": 7638,
      "training_step_time": 0.38666510581970215
    },
    {
      "epoch": 4.6624755859375e-05,
      "model_forward_time": 0.1149289608001709,
      "step": 7639
    },
    {
      "epoch": 4.6624755859375e-05,
      "step": 7639,
      "training_step_time": 0.7588942050933838
    },
    {
      "epoch": 4.6630859375e-05,
      "grad_norm": 0.19510670006275177,
      "learning_rate": 9.83738606371984e-05,
      "loss": 0.0765,
      "step": 7640
    },
    {
      "epoch": 4.6630859375e-05,
      "model_forward_time": 0.11395835876464844,
      "step": 7640
    },
    {
      "epoch": 4.6630859375e-05,
      "step": 7640,
      "training_step_time": 0.3837764263153076
    },
    {
      "epoch": 4.6636962890625e-05,
      "model_forward_time": 0.11388158798217773,
      "step": 7641
    },
    {
      "epoch": 4.6636962890625e-05,
      "step": 7641,
      "training_step_time": 0.38024187088012695
    },
    {
      "epoch": 4.664306640625e-05,
      "model_forward_time": 0.11424779891967773,
      "step": 7642
    },
    {
      "epoch": 4.664306640625e-05,
      "step": 7642,
      "training_step_time": 0.38631272315979004
    },
    {
      "epoch": 4.6649169921875e-05,
      "model_forward_time": 0.11432957649230957,
      "step": 7643
    },
    {
      "epoch": 4.6649169921875e-05,
      "step": 7643,
      "training_step_time": 0.5340321063995361
    },
    {
      "epoch": 4.66552734375e-05,
      "model_forward_time": 0.11487841606140137,
      "step": 7644
    },
    {
      "epoch": 4.66552734375e-05,
      "step": 7644,
      "training_step_time": 0.43920445442199707
    },
    {
      "epoch": 4.6661376953125e-05,
      "model_forward_time": 0.11486268043518066,
      "step": 7645
    },
    {
      "epoch": 4.6661376953125e-05,
      "step": 7645,
      "training_step_time": 0.7965986728668213
    },
    {
      "epoch": 4.666748046875e-05,
      "model_forward_time": 0.11417126655578613,
      "step": 7646
    },
    {
      "epoch": 4.666748046875e-05,
      "step": 7646,
      "training_step_time": 0.41631436347961426
    },
    {
      "epoch": 4.6673583984375e-05,
      "model_forward_time": 0.11443424224853516,
      "step": 7647
    },
    {
      "epoch": 4.6673583984375e-05,
      "step": 7647,
      "training_step_time": 0.45404958724975586
    },
    {
      "epoch": 4.66796875e-05,
      "model_forward_time": 0.11372756958007812,
      "step": 7648
    },
    {
      "epoch": 4.66796875e-05,
      "step": 7648,
      "training_step_time": 0.4858255386352539
    },
    {
      "epoch": 4.6685791015625e-05,
      "model_forward_time": 0.11415314674377441,
      "step": 7649
    },
    {
      "epoch": 4.6685791015625e-05,
      "step": 7649,
      "training_step_time": 0.5104067325592041
    },
    {
      "epoch": 4.669189453125e-05,
      "grad_norm": 0.2343512326478958,
      "learning_rate": 9.836688231149592e-05,
      "loss": 0.0759,
      "step": 7650
    },
    {
      "epoch": 4.669189453125e-05,
      "model_forward_time": 0.11466646194458008,
      "step": 7650
    },
    {
      "epoch": 4.669189453125e-05,
      "step": 7650,
      "training_step_time": 0.3840820789337158
    },
    {
      "epoch": 4.6697998046875e-05,
      "model_forward_time": 0.11378645896911621,
      "step": 7651
    },
    {
      "epoch": 4.6697998046875e-05,
      "step": 7651,
      "training_step_time": 0.3888814449310303
    },
    {
      "epoch": 4.67041015625e-05,
      "model_forward_time": 0.11413741111755371,
      "step": 7652
    },
    {
      "epoch": 4.67041015625e-05,
      "step": 7652,
      "training_step_time": 0.39966320991516113
    },
    {
      "epoch": 4.6710205078125e-05,
      "model_forward_time": 0.11428093910217285,
      "step": 7653
    },
    {
      "epoch": 4.6710205078125e-05,
      "step": 7653,
      "training_step_time": 0.387829065322876
    },
    {
      "epoch": 4.671630859375e-05,
      "model_forward_time": 0.11499333381652832,
      "step": 7654
    },
    {
      "epoch": 4.671630859375e-05,
      "step": 7654,
      "training_step_time": 0.3881256580352783
    },
    {
      "epoch": 4.6722412109375e-05,
      "model_forward_time": 0.1154029369354248,
      "step": 7655
    },
    {
      "epoch": 4.6722412109375e-05,
      "step": 7655,
      "training_step_time": 0.3956871032714844
    },
    {
      "epoch": 4.6728515625e-05,
      "model_forward_time": 0.11489558219909668,
      "step": 7656
    },
    {
      "epoch": 4.6728515625e-05,
      "step": 7656,
      "training_step_time": 0.3876974582672119
    },
    {
      "epoch": 4.6734619140625e-05,
      "model_forward_time": 0.11521434783935547,
      "step": 7657
    },
    {
      "epoch": 4.6734619140625e-05,
      "step": 7657,
      "training_step_time": 0.9657080173492432
    },
    {
      "epoch": 4.674072265625e-05,
      "model_forward_time": 0.11443257331848145,
      "step": 7658
    },
    {
      "epoch": 4.674072265625e-05,
      "step": 7658,
      "training_step_time": 0.4197063446044922
    },
    {
      "epoch": 4.6746826171875e-05,
      "model_forward_time": 0.1147310733795166,
      "step": 7659
    },
    {
      "epoch": 4.6746826171875e-05,
      "step": 7659,
      "training_step_time": 0.4006233215332031
    },
    {
      "epoch": 4.67529296875e-05,
      "grad_norm": 0.184413343667984,
      "learning_rate": 9.835988929321165e-05,
      "loss": 0.0737,
      "step": 7660
    },
    {
      "epoch": 4.67529296875e-05,
      "model_forward_time": 0.11445355415344238,
      "step": 7660
    },
    {
      "epoch": 4.67529296875e-05,
      "step": 7660,
      "training_step_time": 0.46299028396606445
    },
    {
      "epoch": 4.6759033203125e-05,
      "model_forward_time": 0.11384320259094238,
      "step": 7661
    },
    {
      "epoch": 4.6759033203125e-05,
      "step": 7661,
      "training_step_time": 0.38871288299560547
    },
    {
      "epoch": 4.676513671875e-05,
      "model_forward_time": 0.11440515518188477,
      "step": 7662
    },
    {
      "epoch": 4.676513671875e-05,
      "step": 7662,
      "training_step_time": 0.421328067779541
    },
    {
      "epoch": 4.6771240234375e-05,
      "model_forward_time": 0.11496949195861816,
      "step": 7663
    },
    {
      "epoch": 4.6771240234375e-05,
      "step": 7663,
      "training_step_time": 0.6193013191223145
    },
    {
      "epoch": 4.677734375e-05,
      "model_forward_time": 0.11395382881164551,
      "step": 7664
    },
    {
      "epoch": 4.677734375e-05,
      "step": 7664,
      "training_step_time": 0.3849928379058838
    },
    {
      "epoch": 4.6783447265625e-05,
      "model_forward_time": 0.11494779586791992,
      "step": 7665
    },
    {
      "epoch": 4.6783447265625e-05,
      "step": 7665,
      "training_step_time": 0.385664701461792
    },
    {
      "epoch": 4.678955078125e-05,
      "model_forward_time": 0.1147005558013916,
      "step": 7666
    },
    {
      "epoch": 4.678955078125e-05,
      "step": 7666,
      "training_step_time": 0.3820006847381592
    },
    {
      "epoch": 4.6795654296875e-05,
      "model_forward_time": 0.11461806297302246,
      "step": 7667
    },
    {
      "epoch": 4.6795654296875e-05,
      "step": 7667,
      "training_step_time": 0.3808879852294922
    },
    {
      "epoch": 4.68017578125e-05,
      "model_forward_time": 0.11489319801330566,
      "step": 7668
    },
    {
      "epoch": 4.68017578125e-05,
      "step": 7668,
      "training_step_time": 0.39655351638793945
    },
    {
      "epoch": 4.6807861328125e-05,
      "model_forward_time": 0.11449766159057617,
      "step": 7669
    },
    {
      "epoch": 4.6807861328125e-05,
      "step": 7669,
      "training_step_time": 0.6913940906524658
    },
    {
      "epoch": 4.681396484375e-05,
      "grad_norm": 0.2760550081729889,
      "learning_rate": 9.835288158446986e-05,
      "loss": 0.075,
      "step": 7670
    },
    {
      "epoch": 4.681396484375e-05,
      "model_forward_time": 0.11474847793579102,
      "step": 7670
    },
    {
      "epoch": 4.681396484375e-05,
      "step": 7670,
      "training_step_time": 0.38797569274902344
    },
    {
      "epoch": 4.6820068359375e-05,
      "model_forward_time": 0.11471128463745117,
      "step": 7671
    },
    {
      "epoch": 4.6820068359375e-05,
      "step": 7671,
      "training_step_time": 0.4763216972351074
    },
    {
      "epoch": 4.6826171875e-05,
      "model_forward_time": 0.11475372314453125,
      "step": 7672
    },
    {
      "epoch": 4.6826171875e-05,
      "step": 7672,
      "training_step_time": 0.43796849250793457
    },
    {
      "epoch": 4.6832275390625e-05,
      "model_forward_time": 0.11494565010070801,
      "step": 7673
    },
    {
      "epoch": 4.6832275390625e-05,
      "step": 7673,
      "training_step_time": 0.44757819175720215
    },
    {
      "epoch": 4.683837890625e-05,
      "model_forward_time": 0.1142125129699707,
      "step": 7674
    },
    {
      "epoch": 4.683837890625e-05,
      "step": 7674,
      "training_step_time": 0.36589789390563965
    },
    {
      "epoch": 4.6844482421875e-05,
      "model_forward_time": 0.1145014762878418,
      "step": 7675
    },
    {
      "epoch": 4.6844482421875e-05,
      "step": 7675,
      "training_step_time": 0.46514439582824707
    },
    {
      "epoch": 4.68505859375e-05,
      "model_forward_time": 0.11530900001525879,
      "step": 7676
    },
    {
      "epoch": 4.68505859375e-05,
      "step": 7676,
      "training_step_time": 0.40474557876586914
    },
    {
      "epoch": 4.6856689453125e-05,
      "model_forward_time": 0.11614847183227539,
      "step": 7677
    },
    {
      "epoch": 4.6856689453125e-05,
      "step": 7677,
      "training_step_time": 0.39020466804504395
    },
    {
      "epoch": 4.686279296875e-05,
      "model_forward_time": 0.11579298973083496,
      "step": 7678
    },
    {
      "epoch": 4.686279296875e-05,
      "step": 7678,
      "training_step_time": 0.3835299015045166
    },
    {
      "epoch": 4.6868896484375e-05,
      "model_forward_time": 0.11538434028625488,
      "step": 7679
    },
    {
      "epoch": 4.6868896484375e-05,
      "step": 7679,
      "training_step_time": 0.7749850749969482
    },
    {
      "epoch": 4.6875e-05,
      "grad_norm": 0.19347438216209412,
      "learning_rate": 9.834585918739936e-05,
      "loss": 0.0728,
      "step": 7680
    },
    {
      "epoch": 4.6875e-05,
      "model_forward_time": 0.11418890953063965,
      "step": 7680
    },
    {
      "epoch": 4.6875e-05,
      "step": 7680,
      "training_step_time": 0.4118056297302246
    },
    {
      "epoch": 4.6881103515625e-05,
      "model_forward_time": 0.11463403701782227,
      "step": 7681
    },
    {
      "epoch": 4.6881103515625e-05,
      "step": 7681,
      "training_step_time": 0.4046204090118408
    },
    {
      "epoch": 4.688720703125e-05,
      "model_forward_time": 0.1153562068939209,
      "step": 7682
    },
    {
      "epoch": 4.688720703125e-05,
      "step": 7682,
      "training_step_time": 0.39496421813964844
    },
    {
      "epoch": 4.6893310546875e-05,
      "model_forward_time": 0.11472678184509277,
      "step": 7683
    },
    {
      "epoch": 4.6893310546875e-05,
      "step": 7683,
      "training_step_time": 0.38895320892333984
    },
    {
      "epoch": 4.68994140625e-05,
      "model_forward_time": 0.11371994018554688,
      "step": 7684
    },
    {
      "epoch": 4.68994140625e-05,
      "step": 7684,
      "training_step_time": 0.45078253746032715
    },
    {
      "epoch": 4.6905517578125e-05,
      "model_forward_time": 0.11460995674133301,
      "step": 7685
    },
    {
      "epoch": 4.6905517578125e-05,
      "step": 7685,
      "training_step_time": 0.8384959697723389
    },
    {
      "epoch": 4.691162109375e-05,
      "model_forward_time": 0.1146385669708252,
      "step": 7686
    },
    {
      "epoch": 4.691162109375e-05,
      "step": 7686,
      "training_step_time": 0.3826320171356201
    },
    {
      "epoch": 4.6917724609375e-05,
      "model_forward_time": 0.11473441123962402,
      "step": 7687
    },
    {
      "epoch": 4.6917724609375e-05,
      "step": 7687,
      "training_step_time": 0.5214154720306396
    },
    {
      "epoch": 4.6923828125e-05,
      "model_forward_time": 0.11445999145507812,
      "step": 7688
    },
    {
      "epoch": 4.6923828125e-05,
      "step": 7688,
      "training_step_time": 0.5004501342773438
    },
    {
      "epoch": 4.6929931640625e-05,
      "model_forward_time": 0.11378788948059082,
      "step": 7689
    },
    {
      "epoch": 4.6929931640625e-05,
      "step": 7689,
      "training_step_time": 0.4026057720184326
    },
    {
      "epoch": 4.693603515625e-05,
      "grad_norm": 0.3348197340965271,
      "learning_rate": 9.833882210413332e-05,
      "loss": 0.074,
      "step": 7690
    },
    {
      "epoch": 4.693603515625e-05,
      "model_forward_time": 0.1142735481262207,
      "step": 7690
    },
    {
      "epoch": 4.693603515625e-05,
      "step": 7690,
      "training_step_time": 0.3882319927215576
    },
    {
      "epoch": 4.6942138671875e-05,
      "model_forward_time": 0.11444211006164551,
      "step": 7691
    },
    {
      "epoch": 4.6942138671875e-05,
      "step": 7691,
      "training_step_time": 0.4042072296142578
    },
    {
      "epoch": 4.69482421875e-05,
      "model_forward_time": 0.11448097229003906,
      "step": 7692
    },
    {
      "epoch": 4.69482421875e-05,
      "step": 7692,
      "training_step_time": 0.3940587043762207
    },
    {
      "epoch": 4.6954345703125e-05,
      "model_forward_time": 0.11493062973022461,
      "step": 7693
    },
    {
      "epoch": 4.6954345703125e-05,
      "step": 7693,
      "training_step_time": 0.6998450756072998
    },
    {
      "epoch": 4.696044921875e-05,
      "model_forward_time": 0.1145322322845459,
      "step": 7694
    },
    {
      "epoch": 4.696044921875e-05,
      "step": 7694,
      "training_step_time": 0.407773494720459
    },
    {
      "epoch": 4.6966552734375e-05,
      "model_forward_time": 0.11402392387390137,
      "step": 7695
    },
    {
      "epoch": 4.6966552734375e-05,
      "step": 7695,
      "training_step_time": 0.39203429222106934
    },
    {
      "epoch": 4.697265625e-05,
      "model_forward_time": 0.11490178108215332,
      "step": 7696
    },
    {
      "epoch": 4.697265625e-05,
      "step": 7696,
      "training_step_time": 0.39063143730163574
    },
    {
      "epoch": 4.6978759765625e-05,
      "model_forward_time": 0.11446022987365723,
      "step": 7697
    },
    {
      "epoch": 4.6978759765625e-05,
      "step": 7697,
      "training_step_time": 0.7257158756256104
    },
    {
      "epoch": 4.698486328125e-05,
      "model_forward_time": 0.11430597305297852,
      "step": 7698
    },
    {
      "epoch": 4.698486328125e-05,
      "step": 7698,
      "training_step_time": 0.49414944648742676
    },
    {
      "epoch": 4.6990966796875e-05,
      "model_forward_time": 0.11359190940856934,
      "step": 7699
    },
    {
      "epoch": 4.6990966796875e-05,
      "step": 7699,
      "training_step_time": 0.3994467258453369
    },
    {
      "epoch": 4.69970703125e-05,
      "grad_norm": 0.16027367115020752,
      "learning_rate": 9.833177033680944e-05,
      "loss": 0.0731,
      "step": 7700
    },
    {
      "epoch": 4.69970703125e-05,
      "model_forward_time": 0.11378717422485352,
      "step": 7700
    },
    {
      "epoch": 4.69970703125e-05,
      "step": 7700,
      "training_step_time": 0.3915576934814453
    },
    {
      "epoch": 4.7003173828125e-05,
      "model_forward_time": 0.11439108848571777,
      "step": 7701
    },
    {
      "epoch": 4.7003173828125e-05,
      "step": 7701,
      "training_step_time": 0.4971649646759033
    },
    {
      "epoch": 4.700927734375e-05,
      "model_forward_time": 0.11428570747375488,
      "step": 7702
    },
    {
      "epoch": 4.700927734375e-05,
      "step": 7702,
      "training_step_time": 0.4439961910247803
    },
    {
      "epoch": 4.7015380859375e-05,
      "model_forward_time": 0.11482548713684082,
      "step": 7703
    },
    {
      "epoch": 4.7015380859375e-05,
      "step": 7703,
      "training_step_time": 0.8302040100097656
    },
    {
      "epoch": 4.7021484375e-05,
      "model_forward_time": 0.1139078140258789,
      "step": 7704
    },
    {
      "epoch": 4.7021484375e-05,
      "step": 7704,
      "training_step_time": 0.38686156272888184
    },
    {
      "epoch": 4.7027587890625e-05,
      "model_forward_time": 0.11404633522033691,
      "step": 7705
    },
    {
      "epoch": 4.7027587890625e-05,
      "step": 7705,
      "training_step_time": 0.5232834815979004
    },
    {
      "epoch": 4.703369140625e-05,
      "model_forward_time": 0.11426591873168945,
      "step": 7706
    },
    {
      "epoch": 4.703369140625e-05,
      "step": 7706,
      "training_step_time": 0.3868248462677002
    },
    {
      "epoch": 4.7039794921875e-05,
      "model_forward_time": 0.11410284042358398,
      "step": 7707
    },
    {
      "epoch": 4.7039794921875e-05,
      "step": 7707,
      "training_step_time": 0.39716291427612305
    },
    {
      "epoch": 4.70458984375e-05,
      "model_forward_time": 0.11383461952209473,
      "step": 7708
    },
    {
      "epoch": 4.70458984375e-05,
      "step": 7708,
      "training_step_time": 0.3953108787536621
    },
    {
      "epoch": 4.7052001953125e-05,
      "model_forward_time": 0.11482381820678711,
      "step": 7709
    },
    {
      "epoch": 4.7052001953125e-05,
      "step": 7709,
      "training_step_time": 0.7612481117248535
    },
    {
      "epoch": 4.705810546875e-05,
      "grad_norm": 0.23416151106357574,
      "learning_rate": 9.832470388756987e-05,
      "loss": 0.0747,
      "step": 7710
    },
    {
      "epoch": 4.705810546875e-05,
      "model_forward_time": 0.11440134048461914,
      "step": 7710
    },
    {
      "epoch": 4.705810546875e-05,
      "step": 7710,
      "training_step_time": 0.4456446170806885
    },
    {
      "epoch": 4.7064208984375e-05,
      "model_forward_time": 0.11373639106750488,
      "step": 7711
    },
    {
      "epoch": 4.7064208984375e-05,
      "step": 7711,
      "training_step_time": 0.4670886993408203
    },
    {
      "epoch": 4.70703125e-05,
      "model_forward_time": 0.11518287658691406,
      "step": 7712
    },
    {
      "epoch": 4.70703125e-05,
      "step": 7712,
      "training_step_time": 0.38851094245910645
    },
    {
      "epoch": 4.7076416015625e-05,
      "model_forward_time": 0.11419272422790527,
      "step": 7713
    },
    {
      "epoch": 4.7076416015625e-05,
      "step": 7713,
      "training_step_time": 0.42847490310668945
    },
    {
      "epoch": 4.708251953125e-05,
      "model_forward_time": 0.11538028717041016,
      "step": 7714
    },
    {
      "epoch": 4.708251953125e-05,
      "step": 7714,
      "training_step_time": 0.42691850662231445
    },
    {
      "epoch": 4.7088623046875e-05,
      "model_forward_time": 0.11466360092163086,
      "step": 7715
    },
    {
      "epoch": 4.7088623046875e-05,
      "step": 7715,
      "training_step_time": 0.5002093315124512
    },
    {
      "epoch": 4.70947265625e-05,
      "model_forward_time": 0.11450719833374023,
      "step": 7716
    },
    {
      "epoch": 4.70947265625e-05,
      "step": 7716,
      "training_step_time": 0.39409399032592773
    },
    {
      "epoch": 4.7100830078125e-05,
      "model_forward_time": 0.11404585838317871,
      "step": 7717
    },
    {
      "epoch": 4.7100830078125e-05,
      "step": 7717,
      "training_step_time": 0.3955397605895996
    },
    {
      "epoch": 4.710693359375e-05,
      "model_forward_time": 0.11450982093811035,
      "step": 7718
    },
    {
      "epoch": 4.710693359375e-05,
      "step": 7718,
      "training_step_time": 0.4058499336242676
    },
    {
      "epoch": 4.7113037109375e-05,
      "model_forward_time": 0.11442708969116211,
      "step": 7719
    },
    {
      "epoch": 4.7113037109375e-05,
      "step": 7719,
      "training_step_time": 0.3922123908996582
    },
    {
      "epoch": 4.7119140625e-05,
      "grad_norm": 0.2191901057958603,
      "learning_rate": 9.831762275856118e-05,
      "loss": 0.0692,
      "step": 7720
    },
    {
      "epoch": 4.7119140625e-05,
      "model_forward_time": 0.11505866050720215,
      "step": 7720
    },
    {
      "epoch": 4.7119140625e-05,
      "step": 7720,
      "training_step_time": 0.3917505741119385
    },
    {
      "epoch": 4.7125244140625e-05,
      "model_forward_time": 0.11529898643493652,
      "step": 7721
    },
    {
      "epoch": 4.7125244140625e-05,
      "step": 7721,
      "training_step_time": 0.6226637363433838
    },
    {
      "epoch": 4.713134765625e-05,
      "model_forward_time": 0.11535453796386719,
      "step": 7722
    },
    {
      "epoch": 4.713134765625e-05,
      "step": 7722,
      "training_step_time": 0.4223208427429199
    },
    {
      "epoch": 4.7137451171875e-05,
      "model_forward_time": 0.11536669731140137,
      "step": 7723
    },
    {
      "epoch": 4.7137451171875e-05,
      "step": 7723,
      "training_step_time": 0.6222274303436279
    },
    {
      "epoch": 4.71435546875e-05,
      "model_forward_time": 0.11485815048217773,
      "step": 7724
    },
    {
      "epoch": 4.71435546875e-05,
      "step": 7724,
      "training_step_time": 0.44315266609191895
    },
    {
      "epoch": 4.7149658203125e-05,
      "model_forward_time": 0.11414623260498047,
      "step": 7725
    },
    {
      "epoch": 4.7149658203125e-05,
      "step": 7725,
      "training_step_time": 0.39657068252563477
    },
    {
      "epoch": 4.715576171875e-05,
      "model_forward_time": 0.1142115592956543,
      "step": 7726
    },
    {
      "epoch": 4.715576171875e-05,
      "step": 7726,
      "training_step_time": 0.40063023567199707
    },
    {
      "epoch": 4.7161865234375e-05,
      "model_forward_time": 0.11497640609741211,
      "step": 7727
    },
    {
      "epoch": 4.7161865234375e-05,
      "step": 7727,
      "training_step_time": 0.4502592086791992
    },
    {
      "epoch": 4.716796875e-05,
      "model_forward_time": 0.1165611743927002,
      "step": 7728
    },
    {
      "epoch": 4.716796875e-05,
      "step": 7728,
      "training_step_time": 0.43886661529541016
    },
    {
      "epoch": 4.7174072265625e-05,
      "model_forward_time": 0.11548113822937012,
      "step": 7729
    },
    {
      "epoch": 4.7174072265625e-05,
      "step": 7729,
      "training_step_time": 0.4436299800872803
    },
    {
      "epoch": 4.718017578125e-05,
      "grad_norm": 0.1338580995798111,
      "learning_rate": 9.831052695193445e-05,
      "loss": 0.0795,
      "step": 7730
    },
    {
      "epoch": 4.718017578125e-05,
      "model_forward_time": 0.11532282829284668,
      "step": 7730
    },
    {
      "epoch": 4.718017578125e-05,
      "step": 7730,
      "training_step_time": 0.38884806632995605
    },
    {
      "epoch": 4.7186279296875e-05,
      "model_forward_time": 0.11574530601501465,
      "step": 7731
    },
    {
      "epoch": 4.7186279296875e-05,
      "step": 7731,
      "training_step_time": 0.3853945732116699
    },
    {
      "epoch": 4.71923828125e-05,
      "model_forward_time": 0.11553001403808594,
      "step": 7732
    },
    {
      "epoch": 4.71923828125e-05,
      "step": 7732,
      "training_step_time": 0.38404369354248047
    },
    {
      "epoch": 4.7198486328125e-05,
      "model_forward_time": 0.11486458778381348,
      "step": 7733
    },
    {
      "epoch": 4.7198486328125e-05,
      "step": 7733,
      "training_step_time": 0.40038633346557617
    },
    {
      "epoch": 4.720458984375e-05,
      "model_forward_time": 0.1153261661529541,
      "step": 7734
    },
    {
      "epoch": 4.720458984375e-05,
      "step": 7734,
      "training_step_time": 0.3930978775024414
    },
    {
      "epoch": 4.7210693359375e-05,
      "model_forward_time": 0.11527466773986816,
      "step": 7735
    },
    {
      "epoch": 4.7210693359375e-05,
      "step": 7735,
      "training_step_time": 0.5310797691345215
    },
    {
      "epoch": 4.7216796875e-05,
      "model_forward_time": 0.1157083511352539,
      "step": 7736
    },
    {
      "epoch": 4.7216796875e-05,
      "step": 7736,
      "training_step_time": 0.4368891716003418
    },
    {
      "epoch": 4.7222900390625e-05,
      "model_forward_time": 0.1155698299407959,
      "step": 7737
    },
    {
      "epoch": 4.7222900390625e-05,
      "step": 7737,
      "training_step_time": 0.47467470169067383
    },
    {
      "epoch": 4.722900390625e-05,
      "model_forward_time": 0.11543154716491699,
      "step": 7738
    },
    {
      "epoch": 4.722900390625e-05,
      "step": 7738,
      "training_step_time": 0.42490100860595703
    },
    {
      "epoch": 4.7235107421875e-05,
      "model_forward_time": 0.11481881141662598,
      "step": 7739
    },
    {
      "epoch": 4.7235107421875e-05,
      "step": 7739,
      "training_step_time": 0.6501705646514893
    },
    {
      "epoch": 4.72412109375e-05,
      "grad_norm": 0.2794559895992279,
      "learning_rate": 9.830341646984521e-05,
      "loss": 0.0693,
      "step": 7740
    },
    {
      "epoch": 4.72412109375e-05,
      "model_forward_time": 0.11409425735473633,
      "step": 7740
    },
    {
      "epoch": 4.72412109375e-05,
      "step": 7740,
      "training_step_time": 0.42013978958129883
    },
    {
      "epoch": 4.7247314453125e-05,
      "model_forward_time": 0.11590027809143066,
      "step": 7741
    },
    {
      "epoch": 4.7247314453125e-05,
      "step": 7741,
      "training_step_time": 0.3665955066680908
    },
    {
      "epoch": 4.725341796875e-05,
      "model_forward_time": 0.11448097229003906,
      "step": 7742
    },
    {
      "epoch": 4.725341796875e-05,
      "step": 7742,
      "training_step_time": 0.45194077491760254
    },
    {
      "epoch": 4.7259521484375e-05,
      "model_forward_time": 0.11432480812072754,
      "step": 7743
    },
    {
      "epoch": 4.7259521484375e-05,
      "step": 7743,
      "training_step_time": 0.4445078372955322
    },
    {
      "epoch": 4.7265625e-05,
      "model_forward_time": 0.11412549018859863,
      "step": 7744
    },
    {
      "epoch": 4.7265625e-05,
      "step": 7744,
      "training_step_time": 0.38719820976257324
    },
    {
      "epoch": 4.7271728515625e-05,
      "model_forward_time": 0.11481332778930664,
      "step": 7745
    },
    {
      "epoch": 4.7271728515625e-05,
      "step": 7745,
      "training_step_time": 0.698807954788208
    },
    {
      "epoch": 4.727783203125e-05,
      "model_forward_time": 0.11442899703979492,
      "step": 7746
    },
    {
      "epoch": 4.727783203125e-05,
      "step": 7746,
      "training_step_time": 0.385026216506958
    },
    {
      "epoch": 4.7283935546875e-05,
      "model_forward_time": 0.11436653137207031,
      "step": 7747
    },
    {
      "epoch": 4.7283935546875e-05,
      "step": 7747,
      "training_step_time": 0.43029141426086426
    },
    {
      "epoch": 4.72900390625e-05,
      "model_forward_time": 0.11463260650634766,
      "step": 7748
    },
    {
      "epoch": 4.72900390625e-05,
      "step": 7748,
      "training_step_time": 0.40621304512023926
    },
    {
      "epoch": 4.7296142578125e-05,
      "model_forward_time": 0.11412930488586426,
      "step": 7749
    },
    {
      "epoch": 4.7296142578125e-05,
      "step": 7749,
      "training_step_time": 0.3911111354827881
    },
    {
      "epoch": 4.730224609375e-05,
      "grad_norm": 0.16777142882347107,
      "learning_rate": 9.829629131445342e-05,
      "loss": 0.0725,
      "step": 7750
    },
    {
      "epoch": 4.730224609375e-05,
      "model_forward_time": 0.11470723152160645,
      "step": 7750
    },
    {
      "epoch": 4.730224609375e-05,
      "step": 7750,
      "training_step_time": 0.38121676445007324
    },
    {
      "epoch": 4.7308349609375e-05,
      "model_forward_time": 0.11511969566345215,
      "step": 7751
    },
    {
      "epoch": 4.7308349609375e-05,
      "step": 7751,
      "training_step_time": 0.7827796936035156
    },
    {
      "epoch": 4.7314453125e-05,
      "model_forward_time": 0.11455059051513672,
      "step": 7752
    },
    {
      "epoch": 4.7314453125e-05,
      "step": 7752,
      "training_step_time": 0.3860297203063965
    },
    {
      "epoch": 4.7320556640625e-05,
      "model_forward_time": 0.11440896987915039,
      "step": 7753
    },
    {
      "epoch": 4.7320556640625e-05,
      "step": 7753,
      "training_step_time": 0.9090402126312256
    },
    {
      "epoch": 4.732666015625e-05,
      "model_forward_time": 0.11395668983459473,
      "step": 7754
    },
    {
      "epoch": 4.732666015625e-05,
      "step": 7754,
      "training_step_time": 0.39852213859558105
    },
    {
      "epoch": 4.7332763671875e-05,
      "model_forward_time": 0.11434340476989746,
      "step": 7755
    },
    {
      "epoch": 4.7332763671875e-05,
      "step": 7755,
      "training_step_time": 0.42644190788269043
    },
    {
      "epoch": 4.73388671875e-05,
      "model_forward_time": 0.11446237564086914,
      "step": 7756
    },
    {
      "epoch": 4.73388671875e-05,
      "step": 7756,
      "training_step_time": 0.4287099838256836
    },
    {
      "epoch": 4.7344970703125e-05,
      "model_forward_time": 0.11405158042907715,
      "step": 7757
    },
    {
      "epoch": 4.7344970703125e-05,
      "step": 7757,
      "training_step_time": 0.37171101570129395
    },
    {
      "epoch": 4.735107421875e-05,
      "model_forward_time": 0.11483573913574219,
      "step": 7758
    },
    {
      "epoch": 4.735107421875e-05,
      "step": 7758,
      "training_step_time": 0.3854970932006836
    },
    {
      "epoch": 4.7357177734375e-05,
      "model_forward_time": 0.11480593681335449,
      "step": 7759
    },
    {
      "epoch": 4.7357177734375e-05,
      "step": 7759,
      "training_step_time": 0.8384087085723877
    },
    {
      "epoch": 4.736328125e-05,
      "grad_norm": 0.22723035514354706,
      "learning_rate": 9.828915148792352e-05,
      "loss": 0.0708,
      "step": 7760
    },
    {
      "epoch": 4.736328125e-05,
      "model_forward_time": 0.11374950408935547,
      "step": 7760
    },
    {
      "epoch": 4.736328125e-05,
      "step": 7760,
      "training_step_time": 0.43467020988464355
    },
    {
      "epoch": 4.7369384765625e-05,
      "model_forward_time": 0.11452269554138184,
      "step": 7761
    },
    {
      "epoch": 4.7369384765625e-05,
      "step": 7761,
      "training_step_time": 0.38496828079223633
    },
    {
      "epoch": 4.737548828125e-05,
      "model_forward_time": 0.11405014991760254,
      "step": 7762
    },
    {
      "epoch": 4.737548828125e-05,
      "step": 7762,
      "training_step_time": 0.38253307342529297
    },
    {
      "epoch": 4.7381591796875e-05,
      "model_forward_time": 0.114501953125,
      "step": 7763
    },
    {
      "epoch": 4.7381591796875e-05,
      "step": 7763,
      "training_step_time": 0.41518354415893555
    },
    {
      "epoch": 4.73876953125e-05,
      "model_forward_time": 0.1140127182006836,
      "step": 7764
    },
    {
      "epoch": 4.73876953125e-05,
      "step": 7764,
      "training_step_time": 0.43187499046325684
    },
    {
      "epoch": 4.7393798828125e-05,
      "model_forward_time": 0.1166985034942627,
      "step": 7765
    },
    {
      "epoch": 4.7393798828125e-05,
      "step": 7765,
      "training_step_time": 0.41582250595092773
    },
    {
      "epoch": 4.739990234375e-05,
      "model_forward_time": 0.11482119560241699,
      "step": 7766
    },
    {
      "epoch": 4.739990234375e-05,
      "step": 7766,
      "training_step_time": 0.3966236114501953
    },
    {
      "epoch": 4.7406005859375e-05,
      "model_forward_time": 0.11631178855895996,
      "step": 7767
    },
    {
      "epoch": 4.7406005859375e-05,
      "step": 7767,
      "training_step_time": 0.3976118564605713
    },
    {
      "epoch": 4.7412109375e-05,
      "model_forward_time": 0.11527442932128906,
      "step": 7768
    },
    {
      "epoch": 4.7412109375e-05,
      "step": 7768,
      "training_step_time": 0.4308016300201416
    },
    {
      "epoch": 4.7418212890625e-05,
      "model_forward_time": 0.11463475227355957,
      "step": 7769
    },
    {
      "epoch": 4.7418212890625e-05,
      "step": 7769,
      "training_step_time": 0.4673757553100586
    },
    {
      "epoch": 4.742431640625e-05,
      "grad_norm": 0.21207673847675323,
      "learning_rate": 9.82819969924244e-05,
      "loss": 0.0665,
      "step": 7770
    },
    {
      "epoch": 4.742431640625e-05,
      "model_forward_time": 0.11483526229858398,
      "step": 7770
    },
    {
      "epoch": 4.742431640625e-05,
      "step": 7770,
      "training_step_time": 0.4937717914581299
    },
    {
      "epoch": 4.7430419921875e-05,
      "model_forward_time": 0.11471104621887207,
      "step": 7771
    },
    {
      "epoch": 4.7430419921875e-05,
      "step": 7771,
      "training_step_time": 0.7786679267883301
    },
    {
      "epoch": 4.74365234375e-05,
      "model_forward_time": 0.11391019821166992,
      "step": 7772
    },
    {
      "epoch": 4.74365234375e-05,
      "step": 7772,
      "training_step_time": 0.3945651054382324
    },
    {
      "epoch": 4.7442626953125e-05,
      "model_forward_time": 0.11372780799865723,
      "step": 7773
    },
    {
      "epoch": 4.7442626953125e-05,
      "step": 7773,
      "training_step_time": 0.4153430461883545
    },
    {
      "epoch": 4.744873046875e-05,
      "model_forward_time": 0.11451172828674316,
      "step": 7774
    },
    {
      "epoch": 4.744873046875e-05,
      "step": 7774,
      "training_step_time": 0.37798237800598145
    },
    {
      "epoch": 4.7454833984375e-05,
      "model_forward_time": 0.11501049995422363,
      "step": 7775
    },
    {
      "epoch": 4.7454833984375e-05,
      "step": 7775,
      "training_step_time": 0.38491368293762207
    },
    {
      "epoch": 4.74609375e-05,
      "model_forward_time": 0.11462092399597168,
      "step": 7776
    },
    {
      "epoch": 4.74609375e-05,
      "step": 7776,
      "training_step_time": 0.38640666007995605
    },
    {
      "epoch": 4.7467041015625e-05,
      "model_forward_time": 0.1149911880493164,
      "step": 7777
    },
    {
      "epoch": 4.7467041015625e-05,
      "step": 7777,
      "training_step_time": 0.9379067420959473
    },
    {
      "epoch": 4.747314453125e-05,
      "model_forward_time": 0.11457562446594238,
      "step": 7778
    },
    {
      "epoch": 4.747314453125e-05,
      "step": 7778,
      "training_step_time": 0.39035868644714355
    },
    {
      "epoch": 4.7479248046875e-05,
      "model_forward_time": 0.11418700218200684,
      "step": 7779
    },
    {
      "epoch": 4.7479248046875e-05,
      "step": 7779,
      "training_step_time": 0.38303613662719727
    },
    {
      "epoch": 4.74853515625e-05,
      "grad_norm": 0.3072090744972229,
      "learning_rate": 9.82748278301294e-05,
      "loss": 0.077,
      "step": 7780
    },
    {
      "epoch": 4.74853515625e-05,
      "model_forward_time": 0.11372256278991699,
      "step": 7780
    },
    {
      "epoch": 4.74853515625e-05,
      "step": 7780,
      "training_step_time": 0.3803989887237549
    },
    {
      "epoch": 4.7491455078125e-05,
      "model_forward_time": 0.11438751220703125,
      "step": 7781
    },
    {
      "epoch": 4.7491455078125e-05,
      "step": 7781,
      "training_step_time": 0.4573180675506592
    },
    {
      "epoch": 4.749755859375e-05,
      "model_forward_time": 0.11480116844177246,
      "step": 7782
    },
    {
      "epoch": 4.749755859375e-05,
      "step": 7782,
      "training_step_time": 0.46303486824035645
    },
    {
      "epoch": 4.7503662109375e-05,
      "model_forward_time": 0.11464262008666992,
      "step": 7783
    },
    {
      "epoch": 4.7503662109375e-05,
      "step": 7783,
      "training_step_time": 1.1300244331359863
    },
    {
      "epoch": 4.7509765625e-05,
      "model_forward_time": 0.11403226852416992,
      "step": 7784
    },
    {
      "epoch": 4.7509765625e-05,
      "step": 7784,
      "training_step_time": 0.3923921585083008
    },
    {
      "epoch": 4.7515869140625e-05,
      "model_forward_time": 0.11432862281799316,
      "step": 7785
    },
    {
      "epoch": 4.7515869140625e-05,
      "step": 7785,
      "training_step_time": 0.3849778175354004
    },
    {
      "epoch": 4.752197265625e-05,
      "model_forward_time": 0.1136622428894043,
      "step": 7786
    },
    {
      "epoch": 4.752197265625e-05,
      "step": 7786,
      "training_step_time": 0.3944730758666992
    },
    {
      "epoch": 4.7528076171875e-05,
      "model_forward_time": 0.11479592323303223,
      "step": 7787
    },
    {
      "epoch": 4.7528076171875e-05,
      "step": 7787,
      "training_step_time": 0.3752732276916504
    },
    {
      "epoch": 4.75341796875e-05,
      "model_forward_time": 0.11465907096862793,
      "step": 7788
    },
    {
      "epoch": 4.75341796875e-05,
      "step": 7788,
      "training_step_time": 0.3813474178314209
    },
    {
      "epoch": 4.7540283203125e-05,
      "model_forward_time": 0.11462688446044922,
      "step": 7789
    },
    {
      "epoch": 4.7540283203125e-05,
      "step": 7789,
      "training_step_time": 0.6686489582061768
    },
    {
      "epoch": 4.754638671875e-05,
      "grad_norm": 0.2897636294364929,
      "learning_rate": 9.826764400321633e-05,
      "loss": 0.0696,
      "step": 7790
    },
    {
      "epoch": 4.754638671875e-05,
      "model_forward_time": 0.1153116226196289,
      "step": 7790
    },
    {
      "epoch": 4.754638671875e-05,
      "step": 7790,
      "training_step_time": 0.42012619972229004
    },
    {
      "epoch": 4.7552490234375e-05,
      "model_forward_time": 0.1144261360168457,
      "step": 7791
    },
    {
      "epoch": 4.7552490234375e-05,
      "step": 7791,
      "training_step_time": 0.3943178653717041
    },
    {
      "epoch": 4.755859375e-05,
      "model_forward_time": 0.11466598510742188,
      "step": 7792
    },
    {
      "epoch": 4.755859375e-05,
      "step": 7792,
      "training_step_time": 0.38727807998657227
    },
    {
      "epoch": 4.7564697265625e-05,
      "model_forward_time": 0.11524415016174316,
      "step": 7793
    },
    {
      "epoch": 4.7564697265625e-05,
      "step": 7793,
      "training_step_time": 0.3938486576080322
    },
    {
      "epoch": 4.757080078125e-05,
      "model_forward_time": 0.11487889289855957,
      "step": 7794
    },
    {
      "epoch": 4.757080078125e-05,
      "step": 7794,
      "training_step_time": 0.4725971221923828
    },
    {
      "epoch": 4.7576904296875e-05,
      "model_forward_time": 0.1160423755645752,
      "step": 7795
    },
    {
      "epoch": 4.7576904296875e-05,
      "step": 7795,
      "training_step_time": 0.8412706851959229
    },
    {
      "epoch": 4.75830078125e-05,
      "model_forward_time": 0.11410331726074219,
      "step": 7796
    },
    {
      "epoch": 4.75830078125e-05,
      "step": 7796,
      "training_step_time": 0.42798423767089844
    },
    {
      "epoch": 4.7589111328125e-05,
      "model_forward_time": 0.11427736282348633,
      "step": 7797
    },
    {
      "epoch": 4.7589111328125e-05,
      "step": 7797,
      "training_step_time": 0.43026065826416016
    },
    {
      "epoch": 4.759521484375e-05,
      "model_forward_time": 0.11480450630187988,
      "step": 7798
    },
    {
      "epoch": 4.759521484375e-05,
      "step": 7798,
      "training_step_time": 0.3790452480316162
    },
    {
      "epoch": 4.7601318359375e-05,
      "model_forward_time": 0.11440730094909668,
      "step": 7799
    },
    {
      "epoch": 4.7601318359375e-05,
      "step": 7799,
      "training_step_time": 0.38154101371765137
    },
    {
      "epoch": 4.7607421875e-05,
      "grad_norm": 0.2229989469051361,
      "learning_rate": 9.826044551386744e-05,
      "loss": 0.0704,
      "step": 7800
    },
    {
      "epoch": 4.7607421875e-05,
      "model_forward_time": 0.11526107788085938,
      "step": 7800
    },
    {
      "epoch": 4.7607421875e-05,
      "step": 7800,
      "training_step_time": 0.3831191062927246
    },
    {
      "epoch": 4.7613525390625e-05,
      "model_forward_time": 0.11469364166259766,
      "step": 7801
    },
    {
      "epoch": 4.7613525390625e-05,
      "step": 7801,
      "training_step_time": 0.38697338104248047
    },
    {
      "epoch": 4.761962890625e-05,
      "model_forward_time": 0.11495208740234375,
      "step": 7802
    },
    {
      "epoch": 4.761962890625e-05,
      "step": 7802,
      "training_step_time": 0.3935506343841553
    },
    {
      "epoch": 4.7625732421875e-05,
      "model_forward_time": 0.11553645133972168,
      "step": 7803
    },
    {
      "epoch": 4.7625732421875e-05,
      "step": 7803,
      "training_step_time": 0.42743587493896484
    },
    {
      "epoch": 4.76318359375e-05,
      "model_forward_time": 0.11575102806091309,
      "step": 7804
    },
    {
      "epoch": 4.76318359375e-05,
      "step": 7804,
      "training_step_time": 0.41712188720703125
    },
    {
      "epoch": 4.7637939453125e-05,
      "model_forward_time": 0.11490058898925781,
      "step": 7805
    },
    {
      "epoch": 4.7637939453125e-05,
      "step": 7805,
      "training_step_time": 0.3793525695800781
    },
    {
      "epoch": 4.764404296875e-05,
      "model_forward_time": 0.11547517776489258,
      "step": 7806
    },
    {
      "epoch": 4.764404296875e-05,
      "step": 7806,
      "training_step_time": 0.39949703216552734
    },
    {
      "epoch": 4.7650146484375e-05,
      "model_forward_time": 0.11532950401306152,
      "step": 7807
    },
    {
      "epoch": 4.7650146484375e-05,
      "step": 7807,
      "training_step_time": 0.44209718704223633
    },
    {
      "epoch": 4.765625e-05,
      "model_forward_time": 0.11508655548095703,
      "step": 7808
    },
    {
      "epoch": 4.765625e-05,
      "step": 7808,
      "training_step_time": 0.4546992778778076
    },
    {
      "epoch": 4.7662353515625e-05,
      "model_forward_time": 0.1149296760559082,
      "step": 7809
    },
    {
      "epoch": 4.7662353515625e-05,
      "step": 7809,
      "training_step_time": 0.4287745952606201
    },
    {
      "epoch": 4.766845703125e-05,
      "grad_norm": 0.2308916598558426,
      "learning_rate": 9.825323236426944e-05,
      "loss": 0.073,
      "step": 7810
    },
    {
      "epoch": 4.766845703125e-05,
      "model_forward_time": 0.1154787540435791,
      "step": 7810
    },
    {
      "epoch": 4.766845703125e-05,
      "step": 7810,
      "training_step_time": 0.44082021713256836
    },
    {
      "epoch": 4.7674560546875e-05,
      "model_forward_time": 0.11570119857788086,
      "step": 7811
    },
    {
      "epoch": 4.7674560546875e-05,
      "step": 7811,
      "training_step_time": 0.4190835952758789
    },
    {
      "epoch": 4.76806640625e-05,
      "model_forward_time": 0.11455774307250977,
      "step": 7812
    },
    {
      "epoch": 4.76806640625e-05,
      "step": 7812,
      "training_step_time": 0.404665470123291
    },
    {
      "epoch": 4.7686767578125e-05,
      "model_forward_time": 0.11598324775695801,
      "step": 7813
    },
    {
      "epoch": 4.7686767578125e-05,
      "step": 7813,
      "training_step_time": 0.394406795501709
    },
    {
      "epoch": 4.769287109375e-05,
      "model_forward_time": 0.1153421401977539,
      "step": 7814
    },
    {
      "epoch": 4.769287109375e-05,
      "step": 7814,
      "training_step_time": 0.39098381996154785
    },
    {
      "epoch": 4.7698974609375e-05,
      "model_forward_time": 0.11485576629638672,
      "step": 7815
    },
    {
      "epoch": 4.7698974609375e-05,
      "step": 7815,
      "training_step_time": 0.3923037052154541
    },
    {
      "epoch": 4.7705078125e-05,
      "model_forward_time": 0.11525917053222656,
      "step": 7816
    },
    {
      "epoch": 4.7705078125e-05,
      "step": 7816,
      "training_step_time": 0.4047849178314209
    },
    {
      "epoch": 4.7711181640625e-05,
      "model_forward_time": 0.11531519889831543,
      "step": 7817
    },
    {
      "epoch": 4.7711181640625e-05,
      "step": 7817,
      "training_step_time": 1.0486037731170654
    },
    {
      "epoch": 4.771728515625e-05,
      "model_forward_time": 0.11407089233398438,
      "step": 7818
    },
    {
      "epoch": 4.771728515625e-05,
      "step": 7818,
      "training_step_time": 0.39087533950805664
    },
    {
      "epoch": 4.7723388671875e-05,
      "model_forward_time": 0.11460089683532715,
      "step": 7819
    },
    {
      "epoch": 4.7723388671875e-05,
      "step": 7819,
      "training_step_time": 0.378037691116333
    },
    {
      "epoch": 4.77294921875e-05,
      "grad_norm": 0.27092981338500977,
      "learning_rate": 9.824600455661353e-05,
      "loss": 0.0777,
      "step": 7820
    },
    {
      "epoch": 4.77294921875e-05,
      "model_forward_time": 0.11401867866516113,
      "step": 7820
    },
    {
      "epoch": 4.77294921875e-05,
      "step": 7820,
      "training_step_time": 0.405864953994751
    },
    {
      "epoch": 4.7735595703125e-05,
      "model_forward_time": 0.11417770385742188,
      "step": 7821
    },
    {
      "epoch": 4.7735595703125e-05,
      "step": 7821,
      "training_step_time": 0.3647427558898926
    },
    {
      "epoch": 4.774169921875e-05,
      "model_forward_time": 0.11410760879516602,
      "step": 7822
    },
    {
      "epoch": 4.774169921875e-05,
      "step": 7822,
      "training_step_time": 0.46427416801452637
    },
    {
      "epoch": 4.7747802734375e-05,
      "model_forward_time": 0.1148371696472168,
      "step": 7823
    },
    {
      "epoch": 4.7747802734375e-05,
      "step": 7823,
      "training_step_time": 0.6331689357757568
    },
    {
      "epoch": 4.775390625e-05,
      "model_forward_time": 0.11414074897766113,
      "step": 7824
    },
    {
      "epoch": 4.775390625e-05,
      "step": 7824,
      "training_step_time": 0.38181066513061523
    },
    {
      "epoch": 4.7760009765625e-05,
      "model_forward_time": 0.11467576026916504,
      "step": 7825
    },
    {
      "epoch": 4.7760009765625e-05,
      "step": 7825,
      "training_step_time": 0.3908841609954834
    },
    {
      "epoch": 4.776611328125e-05,
      "model_forward_time": 0.11425089836120605,
      "step": 7826
    },
    {
      "epoch": 4.776611328125e-05,
      "step": 7826,
      "training_step_time": 0.3919491767883301
    },
    {
      "epoch": 4.7772216796875e-05,
      "model_forward_time": 0.11426925659179688,
      "step": 7827
    },
    {
      "epoch": 4.7772216796875e-05,
      "step": 7827,
      "training_step_time": 0.3896937370300293
    },
    {
      "epoch": 4.77783203125e-05,
      "model_forward_time": 0.11614060401916504,
      "step": 7828
    },
    {
      "epoch": 4.77783203125e-05,
      "step": 7828,
      "training_step_time": 0.3867042064666748
    },
    {
      "epoch": 4.7784423828125e-05,
      "model_forward_time": 0.11534762382507324,
      "step": 7829
    },
    {
      "epoch": 4.7784423828125e-05,
      "step": 7829,
      "training_step_time": 0.8220305442810059
    },
    {
      "epoch": 4.779052734375e-05,
      "grad_norm": 0.25602254271507263,
      "learning_rate": 9.823876209309527e-05,
      "loss": 0.0805,
      "step": 7830
    },
    {
      "epoch": 4.779052734375e-05,
      "model_forward_time": 0.11424708366394043,
      "step": 7830
    },
    {
      "epoch": 4.779052734375e-05,
      "step": 7830,
      "training_step_time": 0.4094574451446533
    },
    {
      "epoch": 4.7796630859375e-05,
      "model_forward_time": 0.11430692672729492,
      "step": 7831
    },
    {
      "epoch": 4.7796630859375e-05,
      "step": 7831,
      "training_step_time": 0.39116358757019043
    },
    {
      "epoch": 4.7802734375e-05,
      "model_forward_time": 0.11459922790527344,
      "step": 7832
    },
    {
      "epoch": 4.7802734375e-05,
      "step": 7832,
      "training_step_time": 0.4033036231994629
    },
    {
      "epoch": 4.7808837890625e-05,
      "model_forward_time": 0.11391139030456543,
      "step": 7833
    },
    {
      "epoch": 4.7808837890625e-05,
      "step": 7833,
      "training_step_time": 0.3849163055419922
    },
    {
      "epoch": 4.781494140625e-05,
      "model_forward_time": 0.11476325988769531,
      "step": 7834
    },
    {
      "epoch": 4.781494140625e-05,
      "step": 7834,
      "training_step_time": 0.48198652267456055
    },
    {
      "epoch": 4.7821044921875e-05,
      "model_forward_time": 0.11507797241210938,
      "step": 7835
    },
    {
      "epoch": 4.7821044921875e-05,
      "step": 7835,
      "training_step_time": 0.7874102592468262
    },
    {
      "epoch": 4.78271484375e-05,
      "model_forward_time": 0.11439037322998047,
      "step": 7836
    },
    {
      "epoch": 4.78271484375e-05,
      "step": 7836,
      "training_step_time": 0.4017777442932129
    },
    {
      "epoch": 4.7833251953125e-05,
      "model_forward_time": 0.1144247055053711,
      "step": 7837
    },
    {
      "epoch": 4.7833251953125e-05,
      "step": 7837,
      "training_step_time": 0.3969697952270508
    },
    {
      "epoch": 4.783935546875e-05,
      "model_forward_time": 0.1142268180847168,
      "step": 7838
    },
    {
      "epoch": 4.783935546875e-05,
      "step": 7838,
      "training_step_time": 0.3924374580383301
    },
    {
      "epoch": 4.7845458984375e-05,
      "model_forward_time": 0.11435961723327637,
      "step": 7839
    },
    {
      "epoch": 4.7845458984375e-05,
      "step": 7839,
      "training_step_time": 0.3869435787200928
    },
    {
      "epoch": 4.78515625e-05,
      "grad_norm": 0.21268047392368317,
      "learning_rate": 9.823150497591476e-05,
      "loss": 0.0742,
      "step": 7840
    },
    {
      "epoch": 4.78515625e-05,
      "model_forward_time": 0.11412811279296875,
      "step": 7840
    },
    {
      "epoch": 4.78515625e-05,
      "step": 7840,
      "training_step_time": 0.3826305866241455
    },
    {
      "epoch": 4.7857666015625e-05,
      "model_forward_time": 0.11511468887329102,
      "step": 7841
    },
    {
      "epoch": 4.7857666015625e-05,
      "step": 7841,
      "training_step_time": 1.1889793872833252
    },
    {
      "epoch": 4.786376953125e-05,
      "model_forward_time": 0.11392617225646973,
      "step": 7842
    },
    {
      "epoch": 4.786376953125e-05,
      "step": 7842,
      "training_step_time": 0.4292898178100586
    },
    {
      "epoch": 4.7869873046875e-05,
      "model_forward_time": 0.11421704292297363,
      "step": 7843
    },
    {
      "epoch": 4.7869873046875e-05,
      "step": 7843,
      "training_step_time": 0.4427928924560547
    },
    {
      "epoch": 4.78759765625e-05,
      "model_forward_time": 0.11391782760620117,
      "step": 7844
    },
    {
      "epoch": 4.78759765625e-05,
      "step": 7844,
      "training_step_time": 0.3829350471496582
    },
    {
      "epoch": 4.7882080078125e-05,
      "model_forward_time": 0.11321806907653809,
      "step": 7845
    },
    {
      "epoch": 4.7882080078125e-05,
      "step": 7845,
      "training_step_time": 0.3863208293914795
    },
    {
      "epoch": 4.788818359375e-05,
      "model_forward_time": 0.11403751373291016,
      "step": 7846
    },
    {
      "epoch": 4.788818359375e-05,
      "step": 7846,
      "training_step_time": 0.38036417961120605
    },
    {
      "epoch": 4.7894287109375e-05,
      "model_forward_time": 0.11568188667297363,
      "step": 7847
    },
    {
      "epoch": 4.7894287109375e-05,
      "step": 7847,
      "training_step_time": 1.0904922485351562
    },
    {
      "epoch": 4.7900390625e-05,
      "model_forward_time": 0.11367392539978027,
      "step": 7848
    },
    {
      "epoch": 4.7900390625e-05,
      "step": 7848,
      "training_step_time": 0.40926051139831543
    },
    {
      "epoch": 4.7906494140625e-05,
      "model_forward_time": 0.11393308639526367,
      "step": 7849
    },
    {
      "epoch": 4.7906494140625e-05,
      "step": 7849,
      "training_step_time": 0.4041719436645508
    },
    {
      "epoch": 4.791259765625e-05,
      "grad_norm": 0.19523930549621582,
      "learning_rate": 9.822423320727654e-05,
      "loss": 0.0682,
      "step": 7850
    },
    {
      "epoch": 4.791259765625e-05,
      "model_forward_time": 0.11393952369689941,
      "step": 7850
    },
    {
      "epoch": 4.791259765625e-05,
      "step": 7850,
      "training_step_time": 0.40313196182250977
    },
    {
      "epoch": 4.7918701171875e-05,
      "model_forward_time": 0.11401915550231934,
      "step": 7851
    },
    {
      "epoch": 4.7918701171875e-05,
      "step": 7851,
      "training_step_time": 0.38716602325439453
    },
    {
      "epoch": 4.79248046875e-05,
      "model_forward_time": 0.11436223983764648,
      "step": 7852
    },
    {
      "epoch": 4.79248046875e-05,
      "step": 7852,
      "training_step_time": 0.3762667179107666
    },
    {
      "epoch": 4.7930908203125e-05,
      "model_forward_time": 0.11481094360351562,
      "step": 7853
    },
    {
      "epoch": 4.7930908203125e-05,
      "step": 7853,
      "training_step_time": 1.0104329586029053
    },
    {
      "epoch": 4.793701171875e-05,
      "model_forward_time": 0.11391568183898926,
      "step": 7854
    },
    {
      "epoch": 4.793701171875e-05,
      "step": 7854,
      "training_step_time": 0.41186022758483887
    },
    {
      "epoch": 4.7943115234375e-05,
      "model_forward_time": 0.11400818824768066,
      "step": 7855
    },
    {
      "epoch": 4.7943115234375e-05,
      "step": 7855,
      "training_step_time": 0.42853403091430664
    },
    {
      "epoch": 4.794921875e-05,
      "model_forward_time": 0.11441183090209961,
      "step": 7856
    },
    {
      "epoch": 4.794921875e-05,
      "step": 7856,
      "training_step_time": 0.41860008239746094
    },
    {
      "epoch": 4.7955322265625e-05,
      "model_forward_time": 0.11392378807067871,
      "step": 7857
    },
    {
      "epoch": 4.7955322265625e-05,
      "step": 7857,
      "training_step_time": 0.3843824863433838
    },
    {
      "epoch": 4.796142578125e-05,
      "model_forward_time": 0.11475157737731934,
      "step": 7858
    },
    {
      "epoch": 4.796142578125e-05,
      "step": 7858,
      "training_step_time": 0.39437198638916016
    },
    {
      "epoch": 4.7967529296875e-05,
      "model_forward_time": 0.1145622730255127,
      "step": 7859
    },
    {
      "epoch": 4.7967529296875e-05,
      "step": 7859,
      "training_step_time": 0.6756851673126221
    },
    {
      "epoch": 4.79736328125e-05,
      "grad_norm": 0.21132074296474457,
      "learning_rate": 9.821694678938953e-05,
      "loss": 0.0651,
      "step": 7860
    },
    {
      "epoch": 4.79736328125e-05,
      "model_forward_time": 0.11472773551940918,
      "step": 7860
    },
    {
      "epoch": 4.79736328125e-05,
      "step": 7860,
      "training_step_time": 0.44739532470703125
    },
    {
      "epoch": 4.7979736328125e-05,
      "model_forward_time": 0.11383962631225586,
      "step": 7861
    },
    {
      "epoch": 4.7979736328125e-05,
      "step": 7861,
      "training_step_time": 0.4392242431640625
    },
    {
      "epoch": 4.798583984375e-05,
      "model_forward_time": 0.11492323875427246,
      "step": 7862
    },
    {
      "epoch": 4.798583984375e-05,
      "step": 7862,
      "training_step_time": 0.40924668312072754
    },
    {
      "epoch": 4.7991943359375e-05,
      "model_forward_time": 0.11473608016967773,
      "step": 7863
    },
    {
      "epoch": 4.7991943359375e-05,
      "step": 7863,
      "training_step_time": 0.38600993156433105
    },
    {
      "epoch": 4.7998046875e-05,
      "model_forward_time": 0.1143198013305664,
      "step": 7864
    },
    {
      "epoch": 4.7998046875e-05,
      "step": 7864,
      "training_step_time": 0.3812406063079834
    },
    {
      "epoch": 4.8004150390625e-05,
      "model_forward_time": 0.11476016044616699,
      "step": 7865
    },
    {
      "epoch": 4.8004150390625e-05,
      "step": 7865,
      "training_step_time": 0.6605930328369141
    },
    {
      "epoch": 4.801025390625e-05,
      "model_forward_time": 0.11430978775024414,
      "step": 7866
    },
    {
      "epoch": 4.801025390625e-05,
      "step": 7866,
      "training_step_time": 0.39372730255126953
    },
    {
      "epoch": 4.8016357421875e-05,
      "model_forward_time": 0.11653327941894531,
      "step": 7867
    },
    {
      "epoch": 4.8016357421875e-05,
      "step": 7867,
      "training_step_time": 0.4363524913787842
    },
    {
      "epoch": 4.80224609375e-05,
      "model_forward_time": 0.11451482772827148,
      "step": 7868
    },
    {
      "epoch": 4.80224609375e-05,
      "step": 7868,
      "training_step_time": 0.4110839366912842
    },
    {
      "epoch": 4.8028564453125e-05,
      "model_forward_time": 0.11432981491088867,
      "step": 7869
    },
    {
      "epoch": 4.8028564453125e-05,
      "step": 7869,
      "training_step_time": 0.44503355026245117
    },
    {
      "epoch": 4.803466796875e-05,
      "grad_norm": 0.21004325151443481,
      "learning_rate": 9.82096457244672e-05,
      "loss": 0.0676,
      "step": 7870
    },
    {
      "epoch": 4.803466796875e-05,
      "model_forward_time": 0.11466765403747559,
      "step": 7870
    },
    {
      "epoch": 4.803466796875e-05,
      "step": 7870,
      "training_step_time": 0.3933391571044922
    },
    {
      "epoch": 4.8040771484375e-05,
      "model_forward_time": 0.11516880989074707,
      "step": 7871
    },
    {
      "epoch": 4.8040771484375e-05,
      "step": 7871,
      "training_step_time": 0.9581694602966309
    },
    {
      "epoch": 4.8046875e-05,
      "model_forward_time": 0.11371183395385742,
      "step": 7872
    },
    {
      "epoch": 4.8046875e-05,
      "step": 7872,
      "training_step_time": 0.46579408645629883
    },
    {
      "epoch": 4.8052978515625e-05,
      "model_forward_time": 0.11428046226501465,
      "step": 7873
    },
    {
      "epoch": 4.8052978515625e-05,
      "step": 7873,
      "training_step_time": 0.4304320812225342
    },
    {
      "epoch": 4.805908203125e-05,
      "model_forward_time": 0.1139218807220459,
      "step": 7874
    },
    {
      "epoch": 4.805908203125e-05,
      "step": 7874,
      "training_step_time": 0.4603848457336426
    },
    {
      "epoch": 4.8065185546875e-05,
      "model_forward_time": 0.11355304718017578,
      "step": 7875
    },
    {
      "epoch": 4.8065185546875e-05,
      "step": 7875,
      "training_step_time": 0.3800079822540283
    },
    {
      "epoch": 4.80712890625e-05,
      "model_forward_time": 0.1147153377532959,
      "step": 7876
    },
    {
      "epoch": 4.80712890625e-05,
      "step": 7876,
      "training_step_time": 0.3862452507019043
    },
    {
      "epoch": 4.8077392578125e-05,
      "model_forward_time": 0.11470866203308105,
      "step": 7877
    },
    {
      "epoch": 4.8077392578125e-05,
      "step": 7877,
      "training_step_time": 0.8401694297790527
    },
    {
      "epoch": 4.808349609375e-05,
      "model_forward_time": 0.11518478393554688,
      "step": 7878
    },
    {
      "epoch": 4.808349609375e-05,
      "step": 7878,
      "training_step_time": 0.38121819496154785
    },
    {
      "epoch": 4.8089599609375e-05,
      "model_forward_time": 0.11431717872619629,
      "step": 7879
    },
    {
      "epoch": 4.8089599609375e-05,
      "step": 7879,
      "training_step_time": 0.38063549995422363
    },
    {
      "epoch": 4.8095703125e-05,
      "grad_norm": 0.2797534167766571,
      "learning_rate": 9.820233001472738e-05,
      "loss": 0.0751,
      "step": 7880
    },
    {
      "epoch": 4.8095703125e-05,
      "model_forward_time": 0.1140296459197998,
      "step": 7880
    },
    {
      "epoch": 4.8095703125e-05,
      "step": 7880,
      "training_step_time": 0.4161553382873535
    },
    {
      "epoch": 4.8101806640625e-05,
      "model_forward_time": 0.11403608322143555,
      "step": 7881
    },
    {
      "epoch": 4.8101806640625e-05,
      "step": 7881,
      "training_step_time": 0.3884296417236328
    },
    {
      "epoch": 4.810791015625e-05,
      "model_forward_time": 0.11479067802429199,
      "step": 7882
    },
    {
      "epoch": 4.810791015625e-05,
      "step": 7882,
      "training_step_time": 0.38252782821655273
    },
    {
      "epoch": 4.8114013671875e-05,
      "model_forward_time": 0.11471128463745117,
      "step": 7883
    },
    {
      "epoch": 4.8114013671875e-05,
      "step": 7883,
      "training_step_time": 0.8911209106445312
    },
    {
      "epoch": 4.81201171875e-05,
      "model_forward_time": 0.11408686637878418,
      "step": 7884
    },
    {
      "epoch": 4.81201171875e-05,
      "step": 7884,
      "training_step_time": 0.389880895614624
    },
    {
      "epoch": 4.8126220703125e-05,
      "model_forward_time": 0.11388921737670898,
      "step": 7885
    },
    {
      "epoch": 4.8126220703125e-05,
      "step": 7885,
      "training_step_time": 0.43431520462036133
    },
    {
      "epoch": 4.813232421875e-05,
      "model_forward_time": 0.11435842514038086,
      "step": 7886
    },
    {
      "epoch": 4.813232421875e-05,
      "step": 7886,
      "training_step_time": 0.43436312675476074
    },
    {
      "epoch": 4.8138427734375e-05,
      "model_forward_time": 0.11463427543640137,
      "step": 7887
    },
    {
      "epoch": 4.8138427734375e-05,
      "step": 7887,
      "training_step_time": 0.4599268436431885
    },
    {
      "epoch": 4.814453125e-05,
      "model_forward_time": 0.11527371406555176,
      "step": 7888
    },
    {
      "epoch": 4.814453125e-05,
      "step": 7888,
      "training_step_time": 0.4168891906738281
    },
    {
      "epoch": 4.8150634765625e-05,
      "model_forward_time": 0.1148977279663086,
      "step": 7889
    },
    {
      "epoch": 4.8150634765625e-05,
      "step": 7889,
      "training_step_time": 0.6901140213012695
    },
    {
      "epoch": 4.815673828125e-05,
      "grad_norm": 0.18887577950954437,
      "learning_rate": 9.819499966239243e-05,
      "loss": 0.0725,
      "step": 7890
    },
    {
      "epoch": 4.815673828125e-05,
      "model_forward_time": 0.11461520195007324,
      "step": 7890
    },
    {
      "epoch": 4.815673828125e-05,
      "step": 7890,
      "training_step_time": 0.39192628860473633
    },
    {
      "epoch": 4.8162841796875e-05,
      "model_forward_time": 0.11387801170349121,
      "step": 7891
    },
    {
      "epoch": 4.8162841796875e-05,
      "step": 7891,
      "training_step_time": 0.390120267868042
    },
    {
      "epoch": 4.81689453125e-05,
      "model_forward_time": 0.11423087120056152,
      "step": 7892
    },
    {
      "epoch": 4.81689453125e-05,
      "step": 7892,
      "training_step_time": 0.4112234115600586
    },
    {
      "epoch": 4.8175048828125e-05,
      "model_forward_time": 0.11428260803222656,
      "step": 7893
    },
    {
      "epoch": 4.8175048828125e-05,
      "step": 7893,
      "training_step_time": 0.43720483779907227
    },
    {
      "epoch": 4.818115234375e-05,
      "model_forward_time": 0.11486506462097168,
      "step": 7894
    },
    {
      "epoch": 4.818115234375e-05,
      "step": 7894,
      "training_step_time": 0.3847346305847168
    },
    {
      "epoch": 4.8187255859375e-05,
      "model_forward_time": 0.11453723907470703,
      "step": 7895
    },
    {
      "epoch": 4.8187255859375e-05,
      "step": 7895,
      "training_step_time": 0.5214219093322754
    },
    {
      "epoch": 4.8193359375e-05,
      "model_forward_time": 0.1148834228515625,
      "step": 7896
    },
    {
      "epoch": 4.8193359375e-05,
      "step": 7896,
      "training_step_time": 0.3955097198486328
    },
    {
      "epoch": 4.8199462890625e-05,
      "model_forward_time": 0.11479902267456055,
      "step": 7897
    },
    {
      "epoch": 4.8199462890625e-05,
      "step": 7897,
      "training_step_time": 0.38788366317749023
    },
    {
      "epoch": 4.820556640625e-05,
      "model_forward_time": 0.11481904983520508,
      "step": 7898
    },
    {
      "epoch": 4.820556640625e-05,
      "step": 7898,
      "training_step_time": 0.3960089683532715
    },
    {
      "epoch": 4.8211669921875e-05,
      "model_forward_time": 0.11494302749633789,
      "step": 7899
    },
    {
      "epoch": 4.8211669921875e-05,
      "step": 7899,
      "training_step_time": 0.3676793575286865
    },
    {
      "epoch": 4.82177734375e-05,
      "grad_norm": 0.17781051993370056,
      "learning_rate": 9.818765466968909e-05,
      "loss": 0.0681,
      "step": 7900
    },
    {
      "epoch": 4.82177734375e-05,
      "model_forward_time": 0.1151437759399414,
      "step": 7900
    },
    {
      "epoch": 4.82177734375e-05,
      "step": 7900,
      "training_step_time": 0.4190993309020996
    },
    {
      "epoch": 4.8223876953125e-05,
      "model_forward_time": 0.11536931991577148,
      "step": 7901
    },
    {
      "epoch": 4.8223876953125e-05,
      "step": 7901,
      "training_step_time": 1.412708044052124
    },
    {
      "epoch": 4.822998046875e-05,
      "model_forward_time": 0.1136932373046875,
      "step": 7902
    },
    {
      "epoch": 4.822998046875e-05,
      "step": 7902,
      "training_step_time": 0.3717660903930664
    },
    {
      "epoch": 4.8236083984375e-05,
      "model_forward_time": 0.11392068862915039,
      "step": 7903
    },
    {
      "epoch": 4.8236083984375e-05,
      "step": 7903,
      "training_step_time": 0.38176584243774414
    },
    {
      "epoch": 4.82421875e-05,
      "model_forward_time": 0.1137998104095459,
      "step": 7904
    },
    {
      "epoch": 4.82421875e-05,
      "step": 7904,
      "training_step_time": 0.39112186431884766
    },
    {
      "epoch": 4.8248291015625e-05,
      "model_forward_time": 0.11390233039855957,
      "step": 7905
    },
    {
      "epoch": 4.8248291015625e-05,
      "step": 7905,
      "training_step_time": 0.4103739261627197
    },
    {
      "epoch": 4.825439453125e-05,
      "model_forward_time": 0.11437582969665527,
      "step": 7906
    },
    {
      "epoch": 4.825439453125e-05,
      "step": 7906,
      "training_step_time": 0.45615196228027344
    },
    {
      "epoch": 4.8260498046875e-05,
      "model_forward_time": 0.11443209648132324,
      "step": 7907
    },
    {
      "epoch": 4.8260498046875e-05,
      "step": 7907,
      "training_step_time": 0.49294590950012207
    },
    {
      "epoch": 4.82666015625e-05,
      "model_forward_time": 0.11382508277893066,
      "step": 7908
    },
    {
      "epoch": 4.82666015625e-05,
      "step": 7908,
      "training_step_time": 0.39126133918762207
    },
    {
      "epoch": 4.8272705078125e-05,
      "model_forward_time": 0.11450934410095215,
      "step": 7909
    },
    {
      "epoch": 4.8272705078125e-05,
      "step": 7909,
      "training_step_time": 0.38848423957824707
    },
    {
      "epoch": 4.827880859375e-05,
      "grad_norm": 0.3071466386318207,
      "learning_rate": 9.818029503884859e-05,
      "loss": 0.0799,
      "step": 7910
    },
    {
      "epoch": 4.827880859375e-05,
      "model_forward_time": 0.1144869327545166,
      "step": 7910
    },
    {
      "epoch": 4.827880859375e-05,
      "step": 7910,
      "training_step_time": 0.39179444313049316
    },
    {
      "epoch": 4.8284912109375e-05,
      "model_forward_time": 0.11577939987182617,
      "step": 7911
    },
    {
      "epoch": 4.8284912109375e-05,
      "step": 7911,
      "training_step_time": 0.40141844749450684
    },
    {
      "epoch": 4.8291015625e-05,
      "model_forward_time": 0.11531662940979004,
      "step": 7912
    },
    {
      "epoch": 4.8291015625e-05,
      "step": 7912,
      "training_step_time": 0.501157283782959
    },
    {
      "epoch": 4.8297119140625e-05,
      "model_forward_time": 0.11453557014465332,
      "step": 7913
    },
    {
      "epoch": 4.8297119140625e-05,
      "step": 7913,
      "training_step_time": 0.6518721580505371
    },
    {
      "epoch": 4.830322265625e-05,
      "model_forward_time": 0.11449670791625977,
      "step": 7914
    },
    {
      "epoch": 4.830322265625e-05,
      "step": 7914,
      "training_step_time": 0.3906538486480713
    },
    {
      "epoch": 4.8309326171875e-05,
      "model_forward_time": 0.11484527587890625,
      "step": 7915
    },
    {
      "epoch": 4.8309326171875e-05,
      "step": 7915,
      "training_step_time": 0.39128637313842773
    },
    {
      "epoch": 4.83154296875e-05,
      "model_forward_time": 0.1141817569732666,
      "step": 7916
    },
    {
      "epoch": 4.83154296875e-05,
      "step": 7916,
      "training_step_time": 0.3912966251373291
    },
    {
      "epoch": 4.8321533203125e-05,
      "model_forward_time": 0.11384224891662598,
      "step": 7917
    },
    {
      "epoch": 4.8321533203125e-05,
      "step": 7917,
      "training_step_time": 0.4015834331512451
    },
    {
      "epoch": 4.832763671875e-05,
      "model_forward_time": 0.1140742301940918,
      "step": 7918
    },
    {
      "epoch": 4.832763671875e-05,
      "step": 7918,
      "training_step_time": 0.4072422981262207
    },
    {
      "epoch": 4.8333740234375e-05,
      "model_forward_time": 0.11523175239562988,
      "step": 7919
    },
    {
      "epoch": 4.8333740234375e-05,
      "step": 7919,
      "training_step_time": 0.5359649658203125
    },
    {
      "epoch": 4.833984375e-05,
      "grad_norm": 0.23478013277053833,
      "learning_rate": 9.817292077210659e-05,
      "loss": 0.0717,
      "step": 7920
    },
    {
      "epoch": 4.833984375e-05,
      "model_forward_time": 0.11515378952026367,
      "step": 7920
    },
    {
      "epoch": 4.833984375e-05,
      "step": 7920,
      "training_step_time": 0.4298844337463379
    },
    {
      "epoch": 4.8345947265625e-05,
      "model_forward_time": 0.11501407623291016,
      "step": 7921
    },
    {
      "epoch": 4.8345947265625e-05,
      "step": 7921,
      "training_step_time": 0.40122365951538086
    },
    {
      "epoch": 4.835205078125e-05,
      "model_forward_time": 0.11533164978027344,
      "step": 7922
    },
    {
      "epoch": 4.835205078125e-05,
      "step": 7922,
      "training_step_time": 0.39374613761901855
    },
    {
      "epoch": 4.8358154296875e-05,
      "model_forward_time": 0.11527657508850098,
      "step": 7923
    },
    {
      "epoch": 4.8358154296875e-05,
      "step": 7923,
      "training_step_time": 0.3893146514892578
    },
    {
      "epoch": 4.83642578125e-05,
      "model_forward_time": 0.1151585578918457,
      "step": 7924
    },
    {
      "epoch": 4.83642578125e-05,
      "step": 7924,
      "training_step_time": 0.3899681568145752
    },
    {
      "epoch": 4.8370361328125e-05,
      "model_forward_time": 0.11556839942932129,
      "step": 7925
    },
    {
      "epoch": 4.8370361328125e-05,
      "step": 7925,
      "training_step_time": 0.7867827415466309
    },
    {
      "epoch": 4.837646484375e-05,
      "model_forward_time": 0.11514401435852051,
      "step": 7926
    },
    {
      "epoch": 4.837646484375e-05,
      "step": 7926,
      "training_step_time": 0.4084033966064453
    },
    {
      "epoch": 4.8382568359375e-05,
      "model_forward_time": 0.11448216438293457,
      "step": 7927
    },
    {
      "epoch": 4.8382568359375e-05,
      "step": 7927,
      "training_step_time": 0.4032480716705322
    },
    {
      "epoch": 4.8388671875e-05,
      "model_forward_time": 0.11451053619384766,
      "step": 7928
    },
    {
      "epoch": 4.8388671875e-05,
      "step": 7928,
      "training_step_time": 0.4424586296081543
    },
    {
      "epoch": 4.8394775390625e-05,
      "model_forward_time": 0.11487936973571777,
      "step": 7929
    },
    {
      "epoch": 4.8394775390625e-05,
      "step": 7929,
      "training_step_time": 0.3979182243347168
    },
    {
      "epoch": 4.840087890625e-05,
      "grad_norm": 0.19599390029907227,
      "learning_rate": 9.816553187170317e-05,
      "loss": 0.0713,
      "step": 7930
    },
    {
      "epoch": 4.840087890625e-05,
      "model_forward_time": 0.11436748504638672,
      "step": 7930
    },
    {
      "epoch": 4.840087890625e-05,
      "step": 7930,
      "training_step_time": 0.3872675895690918
    },
    {
      "epoch": 4.8406982421875e-05,
      "model_forward_time": 0.11545491218566895,
      "step": 7931
    },
    {
      "epoch": 4.8406982421875e-05,
      "step": 7931,
      "training_step_time": 0.9278469085693359
    },
    {
      "epoch": 4.84130859375e-05,
      "model_forward_time": 0.11517000198364258,
      "step": 7932
    },
    {
      "epoch": 4.84130859375e-05,
      "step": 7932,
      "training_step_time": 0.43124985694885254
    },
    {
      "epoch": 4.8419189453125e-05,
      "model_forward_time": 0.11374568939208984,
      "step": 7933
    },
    {
      "epoch": 4.8419189453125e-05,
      "step": 7933,
      "training_step_time": 0.4362802505493164
    },
    {
      "epoch": 4.842529296875e-05,
      "model_forward_time": 0.11410951614379883,
      "step": 7934
    },
    {
      "epoch": 4.842529296875e-05,
      "step": 7934,
      "training_step_time": 0.4844391345977783
    },
    {
      "epoch": 4.8431396484375e-05,
      "model_forward_time": 0.1140296459197998,
      "step": 7935
    },
    {
      "epoch": 4.8431396484375e-05,
      "step": 7935,
      "training_step_time": 0.3798985481262207
    },
    {
      "epoch": 4.84375e-05,
      "model_forward_time": 0.11465263366699219,
      "step": 7936
    },
    {
      "epoch": 4.84375e-05,
      "step": 7936,
      "training_step_time": 0.3826124668121338
    },
    {
      "epoch": 4.8443603515625e-05,
      "model_forward_time": 0.11514925956726074,
      "step": 7937
    },
    {
      "epoch": 4.8443603515625e-05,
      "step": 7937,
      "training_step_time": 0.7903647422790527
    },
    {
      "epoch": 4.844970703125e-05,
      "model_forward_time": 0.11455655097961426,
      "step": 7938
    },
    {
      "epoch": 4.844970703125e-05,
      "step": 7938,
      "training_step_time": 0.43646240234375
    },
    {
      "epoch": 4.8455810546875e-05,
      "model_forward_time": 0.11466455459594727,
      "step": 7939
    },
    {
      "epoch": 4.8455810546875e-05,
      "step": 7939,
      "training_step_time": 0.4562807083129883
    },
    {
      "epoch": 4.84619140625e-05,
      "grad_norm": 0.20249173045158386,
      "learning_rate": 9.815812833988291e-05,
      "loss": 0.0717,
      "step": 7940
    },
    {
      "epoch": 4.84619140625e-05,
      "model_forward_time": 0.11413264274597168,
      "step": 7940
    },
    {
      "epoch": 4.84619140625e-05,
      "step": 7940,
      "training_step_time": 0.470294713973999
    },
    {
      "epoch": 4.8468017578125e-05,
      "model_forward_time": 0.11439728736877441,
      "step": 7941
    },
    {
      "epoch": 4.8468017578125e-05,
      "step": 7941,
      "training_step_time": 0.37221789360046387
    },
    {
      "epoch": 4.847412109375e-05,
      "model_forward_time": 0.11480855941772461,
      "step": 7942
    },
    {
      "epoch": 4.847412109375e-05,
      "step": 7942,
      "training_step_time": 0.3785722255706787
    },
    {
      "epoch": 4.8480224609375e-05,
      "model_forward_time": 0.11498665809631348,
      "step": 7943
    },
    {
      "epoch": 4.8480224609375e-05,
      "step": 7943,
      "training_step_time": 0.7966818809509277
    },
    {
      "epoch": 4.8486328125e-05,
      "model_forward_time": 0.11441445350646973,
      "step": 7944
    },
    {
      "epoch": 4.8486328125e-05,
      "step": 7944,
      "training_step_time": 0.3968532085418701
    },
    {
      "epoch": 4.8492431640625e-05,
      "model_forward_time": 0.11399435997009277,
      "step": 7945
    },
    {
      "epoch": 4.8492431640625e-05,
      "step": 7945,
      "training_step_time": 0.4005255699157715
    },
    {
      "epoch": 4.849853515625e-05,
      "model_forward_time": 0.11469626426696777,
      "step": 7946
    },
    {
      "epoch": 4.849853515625e-05,
      "step": 7946,
      "training_step_time": 0.3962514400482178
    },
    {
      "epoch": 4.8504638671875e-05,
      "model_forward_time": 0.11409902572631836,
      "step": 7947
    },
    {
      "epoch": 4.8504638671875e-05,
      "step": 7947,
      "training_step_time": 0.4787101745605469
    },
    {
      "epoch": 4.85107421875e-05,
      "model_forward_time": 0.11438417434692383,
      "step": 7948
    },
    {
      "epoch": 4.85107421875e-05,
      "step": 7948,
      "training_step_time": 0.3803269863128662
    },
    {
      "epoch": 4.8516845703125e-05,
      "model_forward_time": 0.11578869819641113,
      "step": 7949
    },
    {
      "epoch": 4.8516845703125e-05,
      "step": 7949,
      "training_step_time": 1.0517189502716064
    },
    {
      "epoch": 4.852294921875e-05,
      "grad_norm": 0.2821812331676483,
      "learning_rate": 9.815071017889482e-05,
      "loss": 0.0794,
      "step": 7950
    },
    {
      "epoch": 4.852294921875e-05,
      "model_forward_time": 0.11428976058959961,
      "step": 7950
    },
    {
      "epoch": 4.852294921875e-05,
      "step": 7950,
      "training_step_time": 0.4785935878753662
    },
    {
      "epoch": 4.8529052734375e-05,
      "model_forward_time": 0.11459207534790039,
      "step": 7951
    },
    {
      "epoch": 4.8529052734375e-05,
      "step": 7951,
      "training_step_time": 0.41092824935913086
    },
    {
      "epoch": 4.853515625e-05,
      "model_forward_time": 0.11404132843017578,
      "step": 7952
    },
    {
      "epoch": 4.853515625e-05,
      "step": 7952,
      "training_step_time": 0.46715211868286133
    },
    {
      "epoch": 4.8541259765625e-05,
      "model_forward_time": 0.1138451099395752,
      "step": 7953
    },
    {
      "epoch": 4.8541259765625e-05,
      "step": 7953,
      "training_step_time": 0.3979146480560303
    },
    {
      "epoch": 4.854736328125e-05,
      "model_forward_time": 0.11411190032958984,
      "step": 7954
    },
    {
      "epoch": 4.854736328125e-05,
      "step": 7954,
      "training_step_time": 0.38636159896850586
    },
    {
      "epoch": 4.8553466796875e-05,
      "model_forward_time": 0.11490201950073242,
      "step": 7955
    },
    {
      "epoch": 4.8553466796875e-05,
      "step": 7955,
      "training_step_time": 0.5868983268737793
    },
    {
      "epoch": 4.85595703125e-05,
      "model_forward_time": 0.1147775650024414,
      "step": 7956
    },
    {
      "epoch": 4.85595703125e-05,
      "step": 7956,
      "training_step_time": 0.45458221435546875
    },
    {
      "epoch": 4.8565673828125e-05,
      "model_forward_time": 0.11494302749633789,
      "step": 7957
    },
    {
      "epoch": 4.8565673828125e-05,
      "step": 7957,
      "training_step_time": 0.40580105781555176
    },
    {
      "epoch": 4.857177734375e-05,
      "model_forward_time": 0.11477994918823242,
      "step": 7958
    },
    {
      "epoch": 4.857177734375e-05,
      "step": 7958,
      "training_step_time": 0.39068007469177246
    },
    {
      "epoch": 4.8577880859375e-05,
      "model_forward_time": 0.11533403396606445,
      "step": 7959
    },
    {
      "epoch": 4.8577880859375e-05,
      "step": 7959,
      "training_step_time": 0.43766117095947266
    },
    {
      "epoch": 4.8583984375e-05,
      "grad_norm": 0.24683503806591034,
      "learning_rate": 9.81432773909923e-05,
      "loss": 0.0827,
      "step": 7960
    },
    {
      "epoch": 4.8583984375e-05,
      "model_forward_time": 0.11460041999816895,
      "step": 7960
    },
    {
      "epoch": 4.8583984375e-05,
      "step": 7960,
      "training_step_time": 0.38330984115600586
    },
    {
      "epoch": 4.8590087890625e-05,
      "model_forward_time": 0.11566448211669922,
      "step": 7961
    },
    {
      "epoch": 4.8590087890625e-05,
      "step": 7961,
      "training_step_time": 0.784625768661499
    },
    {
      "epoch": 4.859619140625e-05,
      "model_forward_time": 0.11488032341003418,
      "step": 7962
    },
    {
      "epoch": 4.859619140625e-05,
      "step": 7962,
      "training_step_time": 0.3900182247161865
    },
    {
      "epoch": 4.8602294921875e-05,
      "model_forward_time": 0.11475038528442383,
      "step": 7963
    },
    {
      "epoch": 4.8602294921875e-05,
      "step": 7963,
      "training_step_time": 0.3895554542541504
    },
    {
      "epoch": 4.86083984375e-05,
      "model_forward_time": 0.11619853973388672,
      "step": 7964
    },
    {
      "epoch": 4.86083984375e-05,
      "step": 7964,
      "training_step_time": 0.40200090408325195
    },
    {
      "epoch": 4.8614501953125e-05,
      "model_forward_time": 0.11472725868225098,
      "step": 7965
    },
    {
      "epoch": 4.8614501953125e-05,
      "step": 7965,
      "training_step_time": 0.4388101100921631
    },
    {
      "epoch": 4.862060546875e-05,
      "model_forward_time": 0.1148841381072998,
      "step": 7966
    },
    {
      "epoch": 4.862060546875e-05,
      "step": 7966,
      "training_step_time": 0.4787485599517822
    },
    {
      "epoch": 4.8626708984375e-05,
      "model_forward_time": 0.11488580703735352,
      "step": 7967
    },
    {
      "epoch": 4.8626708984375e-05,
      "step": 7967,
      "training_step_time": 1.0020620822906494
    },
    {
      "epoch": 4.86328125e-05,
      "model_forward_time": 0.11420583724975586,
      "step": 7968
    },
    {
      "epoch": 4.86328125e-05,
      "step": 7968,
      "training_step_time": 0.3850250244140625
    },
    {
      "epoch": 4.8638916015625e-05,
      "model_forward_time": 0.11329269409179688,
      "step": 7969
    },
    {
      "epoch": 4.8638916015625e-05,
      "step": 7969,
      "training_step_time": 0.3852224349975586
    },
    {
      "epoch": 4.864501953125e-05,
      "grad_norm": 0.21035167574882507,
      "learning_rate": 9.813582997843327e-05,
      "loss": 0.0745,
      "step": 7970
    },
    {
      "epoch": 4.864501953125e-05,
      "model_forward_time": 0.11416363716125488,
      "step": 7970
    },
    {
      "epoch": 4.864501953125e-05,
      "step": 7970,
      "training_step_time": 0.40546178817749023
    },
    {
      "epoch": 4.8651123046875e-05,
      "model_forward_time": 0.11398005485534668,
      "step": 7971
    },
    {
      "epoch": 4.8651123046875e-05,
      "step": 7971,
      "training_step_time": 0.448333740234375
    },
    {
      "epoch": 4.86572265625e-05,
      "model_forward_time": 0.11398863792419434,
      "step": 7972
    },
    {
      "epoch": 4.86572265625e-05,
      "step": 7972,
      "training_step_time": 0.42668914794921875
    },
    {
      "epoch": 4.8663330078125e-05,
      "model_forward_time": 0.11493992805480957,
      "step": 7973
    },
    {
      "epoch": 4.8663330078125e-05,
      "step": 7973,
      "training_step_time": 0.8096446990966797
    },
    {
      "epoch": 4.866943359375e-05,
      "model_forward_time": 0.1148684024810791,
      "step": 7974
    },
    {
      "epoch": 4.866943359375e-05,
      "step": 7974,
      "training_step_time": 0.37087512016296387
    },
    {
      "epoch": 4.8675537109375e-05,
      "model_forward_time": 0.11436867713928223,
      "step": 7975
    },
    {
      "epoch": 4.8675537109375e-05,
      "step": 7975,
      "training_step_time": 0.3861656188964844
    },
    {
      "epoch": 4.8681640625e-05,
      "model_forward_time": 0.11448979377746582,
      "step": 7976
    },
    {
      "epoch": 4.8681640625e-05,
      "step": 7976,
      "training_step_time": 0.45183849334716797
    },
    {
      "epoch": 4.8687744140625e-05,
      "model_forward_time": 0.11480474472045898,
      "step": 7977
    },
    {
      "epoch": 4.8687744140625e-05,
      "step": 7977,
      "training_step_time": 0.42801713943481445
    },
    {
      "epoch": 4.869384765625e-05,
      "model_forward_time": 0.11451458930969238,
      "step": 7978
    },
    {
      "epoch": 4.869384765625e-05,
      "step": 7978,
      "training_step_time": 0.4611978530883789
    },
    {
      "epoch": 4.8699951171875e-05,
      "model_forward_time": 0.11500954627990723,
      "step": 7979
    },
    {
      "epoch": 4.8699951171875e-05,
      "step": 7979,
      "training_step_time": 0.5148375034332275
    },
    {
      "epoch": 4.87060546875e-05,
      "grad_norm": 0.2612982392311096,
      "learning_rate": 9.812836794348004e-05,
      "loss": 0.0833,
      "step": 7980
    },
    {
      "epoch": 4.87060546875e-05,
      "model_forward_time": 0.11471772193908691,
      "step": 7980
    },
    {
      "epoch": 4.87060546875e-05,
      "step": 7980,
      "training_step_time": 0.39603233337402344
    },
    {
      "epoch": 4.8712158203125e-05,
      "model_forward_time": 0.1151576042175293,
      "step": 7981
    },
    {
      "epoch": 4.8712158203125e-05,
      "step": 7981,
      "training_step_time": 0.38471388816833496
    },
    {
      "epoch": 4.871826171875e-05,
      "model_forward_time": 0.11526322364807129,
      "step": 7982
    },
    {
      "epoch": 4.871826171875e-05,
      "step": 7982,
      "training_step_time": 0.38856935501098633
    },
    {
      "epoch": 4.8724365234375e-05,
      "model_forward_time": 0.11729598045349121,
      "step": 7983
    },
    {
      "epoch": 4.8724365234375e-05,
      "step": 7983,
      "training_step_time": 0.39475083351135254
    },
    {
      "epoch": 4.873046875e-05,
      "model_forward_time": 0.11548519134521484,
      "step": 7984
    },
    {
      "epoch": 4.873046875e-05,
      "step": 7984,
      "training_step_time": 0.4850625991821289
    },
    {
      "epoch": 4.8736572265625e-05,
      "model_forward_time": 0.11491537094116211,
      "step": 7985
    },
    {
      "epoch": 4.8736572265625e-05,
      "step": 7985,
      "training_step_time": 0.841545581817627
    },
    {
      "epoch": 4.874267578125e-05,
      "model_forward_time": 0.11432719230651855,
      "step": 7986
    },
    {
      "epoch": 4.874267578125e-05,
      "step": 7986,
      "training_step_time": 0.37392091751098633
    },
    {
      "epoch": 4.8748779296875e-05,
      "model_forward_time": 0.11408233642578125,
      "step": 7987
    },
    {
      "epoch": 4.8748779296875e-05,
      "step": 7987,
      "training_step_time": 0.3809020519256592
    },
    {
      "epoch": 4.87548828125e-05,
      "model_forward_time": 0.11434316635131836,
      "step": 7988
    },
    {
      "epoch": 4.87548828125e-05,
      "step": 7988,
      "training_step_time": 0.38991379737854004
    },
    {
      "epoch": 4.8760986328125e-05,
      "model_forward_time": 0.11435461044311523,
      "step": 7989
    },
    {
      "epoch": 4.8760986328125e-05,
      "step": 7989,
      "training_step_time": 0.39084768295288086
    },
    {
      "epoch": 4.876708984375e-05,
      "grad_norm": 0.28277337551116943,
      "learning_rate": 9.812089128839938e-05,
      "loss": 0.0732,
      "step": 7990
    },
    {
      "epoch": 4.876708984375e-05,
      "model_forward_time": 0.11490416526794434,
      "step": 7990
    },
    {
      "epoch": 4.876708984375e-05,
      "step": 7990,
      "training_step_time": 0.4298367500305176
    },
    {
      "epoch": 4.8773193359375e-05,
      "model_forward_time": 0.11537623405456543,
      "step": 7991
    },
    {
      "epoch": 4.8773193359375e-05,
      "step": 7991,
      "training_step_time": 1.1288745403289795
    },
    {
      "epoch": 4.8779296875e-05,
      "model_forward_time": 0.11363935470581055,
      "step": 7992
    },
    {
      "epoch": 4.8779296875e-05,
      "step": 7992,
      "training_step_time": 0.3680999279022217
    },
    {
      "epoch": 4.8785400390625e-05,
      "model_forward_time": 0.11353659629821777,
      "step": 7993
    },
    {
      "epoch": 4.8785400390625e-05,
      "step": 7993,
      "training_step_time": 0.38173556327819824
    },
    {
      "epoch": 4.879150390625e-05,
      "model_forward_time": 0.11471819877624512,
      "step": 7994
    },
    {
      "epoch": 4.879150390625e-05,
      "step": 7994,
      "training_step_time": 0.38284802436828613
    },
    {
      "epoch": 4.8797607421875e-05,
      "model_forward_time": 0.11349844932556152,
      "step": 7995
    },
    {
      "epoch": 4.8797607421875e-05,
      "step": 7995,
      "training_step_time": 0.3889145851135254
    },
    {
      "epoch": 4.88037109375e-05,
      "model_forward_time": 0.11430621147155762,
      "step": 7996
    },
    {
      "epoch": 4.88037109375e-05,
      "step": 7996,
      "training_step_time": 0.4271509647369385
    },
    {
      "epoch": 4.8809814453125e-05,
      "model_forward_time": 0.11411046981811523,
      "step": 7997
    },
    {
      "epoch": 4.8809814453125e-05,
      "step": 7997,
      "training_step_time": 0.9527628421783447
    },
    {
      "epoch": 4.881591796875e-05,
      "model_forward_time": 0.11418437957763672,
      "step": 7998
    },
    {
      "epoch": 4.881591796875e-05,
      "step": 7998,
      "training_step_time": 0.3817174434661865
    },
    {
      "epoch": 4.8822021484375e-05,
      "model_forward_time": 0.11367535591125488,
      "step": 7999
    },
    {
      "epoch": 4.8822021484375e-05,
      "step": 7999,
      "training_step_time": 0.3845551013946533
    },
    {
      "epoch": 4.8828125e-05,
      "grad_norm": 0.28598707914352417,
      "learning_rate": 9.811340001546251e-05,
      "loss": 0.0703,
      "step": 8000
    },
    {
      "epoch": 4.8828125e-05,
      "model_forward_time": 0.11272883415222168,
      "step": 8000
    },
    {
      "epoch": 4.8828125e-05,
      "step": 8000,
      "training_step_time": 0.35289525985717773
    },
    {
      "epoch": 4.8834228515625e-05,
      "model_forward_time": 0.11234068870544434,
      "step": 8001
    },
    {
      "epoch": 4.8834228515625e-05,
      "step": 8001,
      "training_step_time": 0.43171238899230957
    },
    {
      "epoch": 4.884033203125e-05,
      "model_forward_time": 0.11311841011047363,
      "step": 8002
    },
    {
      "epoch": 4.884033203125e-05,
      "step": 8002,
      "training_step_time": 0.37006688117980957
    },
    {
      "epoch": 4.8846435546875e-05,
      "model_forward_time": 0.11393237113952637,
      "step": 8003
    },
    {
      "epoch": 4.8846435546875e-05,
      "step": 8003,
      "training_step_time": 0.38126468658447266
    },
    {
      "epoch": 4.88525390625e-05,
      "model_forward_time": 0.11422610282897949,
      "step": 8004
    },
    {
      "epoch": 4.88525390625e-05,
      "step": 8004,
      "training_step_time": 0.3746175765991211
    },
    {
      "epoch": 4.8858642578125e-05,
      "model_forward_time": 0.11437368392944336,
      "step": 8005
    },
    {
      "epoch": 4.8858642578125e-05,
      "step": 8005,
      "training_step_time": 0.3908367156982422
    },
    {
      "epoch": 4.886474609375e-05,
      "model_forward_time": 0.11516618728637695,
      "step": 8006
    },
    {
      "epoch": 4.886474609375e-05,
      "step": 8006,
      "training_step_time": 0.4671955108642578
    },
    {
      "epoch": 4.8870849609375e-05,
      "model_forward_time": 0.11504626274108887,
      "step": 8007
    },
    {
      "epoch": 4.8870849609375e-05,
      "step": 8007,
      "training_step_time": 0.4640684127807617
    },
    {
      "epoch": 4.8876953125e-05,
      "model_forward_time": 0.11511373519897461,
      "step": 8008
    },
    {
      "epoch": 4.8876953125e-05,
      "step": 8008,
      "training_step_time": 0.3938422203063965
    },
    {
      "epoch": 4.8883056640625e-05,
      "model_forward_time": 0.11535859107971191,
      "step": 8009
    },
    {
      "epoch": 4.8883056640625e-05,
      "step": 8009,
      "training_step_time": 0.39536118507385254
    },
    {
      "epoch": 4.888916015625e-05,
      "grad_norm": 0.26762548089027405,
      "learning_rate": 9.81058941269451e-05,
      "loss": 0.0754,
      "step": 8010
    },
    {
      "epoch": 4.888916015625e-05,
      "model_forward_time": 0.11460065841674805,
      "step": 8010
    },
    {
      "epoch": 4.888916015625e-05,
      "step": 8010,
      "training_step_time": 0.3818216323852539
    },
    {
      "epoch": 4.8895263671875e-05,
      "model_forward_time": 0.11460423469543457,
      "step": 8011
    },
    {
      "epoch": 4.8895263671875e-05,
      "step": 8011,
      "training_step_time": 0.40523338317871094
    },
    {
      "epoch": 4.89013671875e-05,
      "model_forward_time": 0.11539578437805176,
      "step": 8012
    },
    {
      "epoch": 4.89013671875e-05,
      "step": 8012,
      "training_step_time": 0.38759565353393555
    },
    {
      "epoch": 4.8907470703125e-05,
      "model_forward_time": 0.11437368392944336,
      "step": 8013
    },
    {
      "epoch": 4.8907470703125e-05,
      "step": 8013,
      "training_step_time": 0.3925304412841797
    },
    {
      "epoch": 4.891357421875e-05,
      "model_forward_time": 0.1154780387878418,
      "step": 8014
    },
    {
      "epoch": 4.891357421875e-05,
      "step": 8014,
      "training_step_time": 0.41507625579833984
    },
    {
      "epoch": 4.8919677734375e-05,
      "model_forward_time": 0.11455202102661133,
      "step": 8015
    },
    {
      "epoch": 4.8919677734375e-05,
      "step": 8015,
      "training_step_time": 0.38797926902770996
    },
    {
      "epoch": 4.892578125e-05,
      "model_forward_time": 0.11485505104064941,
      "step": 8016
    },
    {
      "epoch": 4.892578125e-05,
      "step": 8016,
      "training_step_time": 0.3861968517303467
    },
    {
      "epoch": 4.8931884765625e-05,
      "model_forward_time": 0.1145024299621582,
      "step": 8017
    },
    {
      "epoch": 4.8931884765625e-05,
      "step": 8017,
      "training_step_time": 0.39318394660949707
    },
    {
      "epoch": 4.893798828125e-05,
      "model_forward_time": 0.11461639404296875,
      "step": 8018
    },
    {
      "epoch": 4.893798828125e-05,
      "step": 8018,
      "training_step_time": 0.4039311408996582
    },
    {
      "epoch": 4.8944091796875e-05,
      "model_forward_time": 0.11462831497192383,
      "step": 8019
    },
    {
      "epoch": 4.8944091796875e-05,
      "step": 8019,
      "training_step_time": 0.3954765796661377
    },
    {
      "epoch": 4.89501953125e-05,
      "grad_norm": 0.23876190185546875,
      "learning_rate": 9.80983736251272e-05,
      "loss": 0.0724,
      "step": 8020
    },
    {
      "epoch": 4.89501953125e-05,
      "model_forward_time": 0.11469674110412598,
      "step": 8020
    },
    {
      "epoch": 4.89501953125e-05,
      "step": 8020,
      "training_step_time": 0.4482698440551758
    },
    {
      "epoch": 4.8956298828125e-05,
      "model_forward_time": 0.11463522911071777,
      "step": 8021
    },
    {
      "epoch": 4.8956298828125e-05,
      "step": 8021,
      "training_step_time": 0.4281308650970459
    },
    {
      "epoch": 4.896240234375e-05,
      "model_forward_time": 0.11533331871032715,
      "step": 8022
    },
    {
      "epoch": 4.896240234375e-05,
      "step": 8022,
      "training_step_time": 0.49341392517089844
    },
    {
      "epoch": 4.8968505859375e-05,
      "model_forward_time": 0.11525082588195801,
      "step": 8023
    },
    {
      "epoch": 4.8968505859375e-05,
      "step": 8023,
      "training_step_time": 0.4799630641937256
    },
    {
      "epoch": 4.8974609375e-05,
      "model_forward_time": 0.11469244956970215,
      "step": 8024
    },
    {
      "epoch": 4.8974609375e-05,
      "step": 8024,
      "training_step_time": 0.4809577465057373
    },
    {
      "epoch": 4.8980712890625e-05,
      "model_forward_time": 0.11405706405639648,
      "step": 8025
    },
    {
      "epoch": 4.8980712890625e-05,
      "step": 8025,
      "training_step_time": 0.40285658836364746
    },
    {
      "epoch": 4.898681640625e-05,
      "model_forward_time": 0.11460232734680176,
      "step": 8026
    },
    {
      "epoch": 4.898681640625e-05,
      "step": 8026,
      "training_step_time": 0.38115644454956055
    },
    {
      "epoch": 4.8992919921875e-05,
      "model_forward_time": 0.11462998390197754,
      "step": 8027
    },
    {
      "epoch": 4.8992919921875e-05,
      "step": 8027,
      "training_step_time": 0.3761141300201416
    },
    {
      "epoch": 4.89990234375e-05,
      "model_forward_time": 0.11488175392150879,
      "step": 8028
    },
    {
      "epoch": 4.89990234375e-05,
      "step": 8028,
      "training_step_time": 0.4476656913757324
    },
    {
      "epoch": 4.9005126953125e-05,
      "model_forward_time": 0.1148378849029541,
      "step": 8029
    },
    {
      "epoch": 4.9005126953125e-05,
      "step": 8029,
      "training_step_time": 0.47247314453125
    },
    {
      "epoch": 4.901123046875e-05,
      "grad_norm": 0.20290593802928925,
      "learning_rate": 9.809083851229335e-05,
      "loss": 0.0732,
      "step": 8030
    },
    {
      "epoch": 4.901123046875e-05,
      "model_forward_time": 0.11438894271850586,
      "step": 8030
    },
    {
      "epoch": 4.901123046875e-05,
      "step": 8030,
      "training_step_time": 0.44236087799072266
    },
    {
      "epoch": 4.9017333984375e-05,
      "model_forward_time": 0.1150822639465332,
      "step": 8031
    },
    {
      "epoch": 4.9017333984375e-05,
      "step": 8031,
      "training_step_time": 0.38312768936157227
    },
    {
      "epoch": 4.90234375e-05,
      "model_forward_time": 0.11506009101867676,
      "step": 8032
    },
    {
      "epoch": 4.90234375e-05,
      "step": 8032,
      "training_step_time": 0.3936917781829834
    },
    {
      "epoch": 4.9029541015625e-05,
      "model_forward_time": 0.11463522911071777,
      "step": 8033
    },
    {
      "epoch": 4.9029541015625e-05,
      "step": 8033,
      "training_step_time": 0.38938093185424805
    },
    {
      "epoch": 4.903564453125e-05,
      "model_forward_time": 0.1147603988647461,
      "step": 8034
    },
    {
      "epoch": 4.903564453125e-05,
      "step": 8034,
      "training_step_time": 0.39035654067993164
    },
    {
      "epoch": 4.9041748046875e-05,
      "model_forward_time": 0.11484241485595703,
      "step": 8035
    },
    {
      "epoch": 4.9041748046875e-05,
      "step": 8035,
      "training_step_time": 0.4333333969116211
    },
    {
      "epoch": 4.90478515625e-05,
      "model_forward_time": 0.11518049240112305,
      "step": 8036
    },
    {
      "epoch": 4.90478515625e-05,
      "step": 8036,
      "training_step_time": 0.36452817916870117
    },
    {
      "epoch": 4.9053955078125e-05,
      "model_forward_time": 0.11519098281860352,
      "step": 8037
    },
    {
      "epoch": 4.9053955078125e-05,
      "step": 8037,
      "training_step_time": 0.5238332748413086
    },
    {
      "epoch": 4.906005859375e-05,
      "model_forward_time": 0.11487340927124023,
      "step": 8038
    },
    {
      "epoch": 4.906005859375e-05,
      "step": 8038,
      "training_step_time": 0.4452958106994629
    },
    {
      "epoch": 4.9066162109375e-05,
      "model_forward_time": 0.11473870277404785,
      "step": 8039
    },
    {
      "epoch": 4.9066162109375e-05,
      "step": 8039,
      "training_step_time": 0.38002920150756836
    },
    {
      "epoch": 4.9072265625e-05,
      "grad_norm": 0.27513787150382996,
      "learning_rate": 9.808328879073251e-05,
      "loss": 0.0668,
      "step": 8040
    },
    {
      "epoch": 4.9072265625e-05,
      "model_forward_time": 0.11464810371398926,
      "step": 8040
    },
    {
      "epoch": 4.9072265625e-05,
      "step": 8040,
      "training_step_time": 0.37252020835876465
    },
    {
      "epoch": 4.9078369140625e-05,
      "model_forward_time": 0.11489343643188477,
      "step": 8041
    },
    {
      "epoch": 4.9078369140625e-05,
      "step": 8041,
      "training_step_time": 0.391284704208374
    },
    {
      "epoch": 4.908447265625e-05,
      "model_forward_time": 0.11504936218261719,
      "step": 8042
    },
    {
      "epoch": 4.908447265625e-05,
      "step": 8042,
      "training_step_time": 0.393390417098999
    },
    {
      "epoch": 4.9090576171875e-05,
      "model_forward_time": 0.11485934257507324,
      "step": 8043
    },
    {
      "epoch": 4.9090576171875e-05,
      "step": 8043,
      "training_step_time": 0.48700952529907227
    },
    {
      "epoch": 4.90966796875e-05,
      "model_forward_time": 0.11502885818481445,
      "step": 8044
    },
    {
      "epoch": 4.90966796875e-05,
      "step": 8044,
      "training_step_time": 0.39965295791625977
    },
    {
      "epoch": 4.9102783203125e-05,
      "model_forward_time": 0.11472296714782715,
      "step": 8045
    },
    {
      "epoch": 4.9102783203125e-05,
      "step": 8045,
      "training_step_time": 0.3968207836151123
    },
    {
      "epoch": 4.910888671875e-05,
      "model_forward_time": 0.11569833755493164,
      "step": 8046
    },
    {
      "epoch": 4.910888671875e-05,
      "step": 8046,
      "training_step_time": 0.37435483932495117
    },
    {
      "epoch": 4.9114990234375e-05,
      "model_forward_time": 0.11484646797180176,
      "step": 8047
    },
    {
      "epoch": 4.9114990234375e-05,
      "step": 8047,
      "training_step_time": 0.4214968681335449
    },
    {
      "epoch": 4.912109375e-05,
      "model_forward_time": 0.11513924598693848,
      "step": 8048
    },
    {
      "epoch": 4.912109375e-05,
      "step": 8048,
      "training_step_time": 0.40459275245666504
    },
    {
      "epoch": 4.9127197265625e-05,
      "model_forward_time": 0.11480164527893066,
      "step": 8049
    },
    {
      "epoch": 4.9127197265625e-05,
      "step": 8049,
      "training_step_time": 0.4207909107208252
    },
    {
      "epoch": 4.913330078125e-05,
      "grad_norm": 0.24464499950408936,
      "learning_rate": 9.807572446273814e-05,
      "loss": 0.0661,
      "step": 8050
    },
    {
      "epoch": 4.913330078125e-05,
      "model_forward_time": 0.11544322967529297,
      "step": 8050
    },
    {
      "epoch": 4.913330078125e-05,
      "step": 8050,
      "training_step_time": 0.4301567077636719
    },
    {
      "epoch": 4.9139404296875e-05,
      "model_forward_time": 0.11486482620239258,
      "step": 8051
    },
    {
      "epoch": 4.9139404296875e-05,
      "step": 8051,
      "training_step_time": 0.4933176040649414
    },
    {
      "epoch": 4.91455078125e-05,
      "model_forward_time": 0.1146385669708252,
      "step": 8052
    },
    {
      "epoch": 4.91455078125e-05,
      "step": 8052,
      "training_step_time": 0.43759918212890625
    },
    {
      "epoch": 4.9151611328125e-05,
      "model_forward_time": 0.11602187156677246,
      "step": 8053
    },
    {
      "epoch": 4.9151611328125e-05,
      "step": 8053,
      "training_step_time": 0.4479544162750244
    },
    {
      "epoch": 4.915771484375e-05,
      "model_forward_time": 0.11471295356750488,
      "step": 8054
    },
    {
      "epoch": 4.915771484375e-05,
      "step": 8054,
      "training_step_time": 0.38638877868652344
    },
    {
      "epoch": 4.9163818359375e-05,
      "model_forward_time": 0.11529541015625,
      "step": 8055
    },
    {
      "epoch": 4.9163818359375e-05,
      "step": 8055,
      "training_step_time": 0.3967599868774414
    },
    {
      "epoch": 4.9169921875e-05,
      "model_forward_time": 0.11542367935180664,
      "step": 8056
    },
    {
      "epoch": 4.9169921875e-05,
      "step": 8056,
      "training_step_time": 0.37967681884765625
    },
    {
      "epoch": 4.9176025390625e-05,
      "model_forward_time": 0.11531519889831543,
      "step": 8057
    },
    {
      "epoch": 4.9176025390625e-05,
      "step": 8057,
      "training_step_time": 0.4615187644958496
    },
    {
      "epoch": 4.918212890625e-05,
      "model_forward_time": 0.11532735824584961,
      "step": 8058
    },
    {
      "epoch": 4.918212890625e-05,
      "step": 8058,
      "training_step_time": 0.4862039089202881
    },
    {
      "epoch": 4.9188232421875e-05,
      "model_forward_time": 0.11527132987976074,
      "step": 8059
    },
    {
      "epoch": 4.9188232421875e-05,
      "step": 8059,
      "training_step_time": 0.4634237289428711
    },
    {
      "epoch": 4.91943359375e-05,
      "grad_norm": 0.22295406460762024,
      "learning_rate": 9.806814553060801e-05,
      "loss": 0.0702,
      "step": 8060
    },
    {
      "epoch": 4.91943359375e-05,
      "model_forward_time": 0.11442852020263672,
      "step": 8060
    },
    {
      "epoch": 4.91943359375e-05,
      "step": 8060,
      "training_step_time": 0.38806724548339844
    },
    {
      "epoch": 4.9200439453125e-05,
      "model_forward_time": 0.11499738693237305,
      "step": 8061
    },
    {
      "epoch": 4.9200439453125e-05,
      "step": 8061,
      "training_step_time": 0.5868401527404785
    },
    {
      "epoch": 4.920654296875e-05,
      "model_forward_time": 0.11435794830322266,
      "step": 8062
    },
    {
      "epoch": 4.920654296875e-05,
      "step": 8062,
      "training_step_time": 0.38626790046691895
    },
    {
      "epoch": 4.9212646484375e-05,
      "model_forward_time": 0.11450505256652832,
      "step": 8063
    },
    {
      "epoch": 4.9212646484375e-05,
      "step": 8063,
      "training_step_time": 0.38533520698547363
    },
    {
      "epoch": 4.921875e-05,
      "model_forward_time": 0.11483931541442871,
      "step": 8064
    },
    {
      "epoch": 4.921875e-05,
      "step": 8064,
      "training_step_time": 0.49007296562194824
    },
    {
      "epoch": 4.9224853515625e-05,
      "model_forward_time": 0.11525225639343262,
      "step": 8065
    },
    {
      "epoch": 4.9224853515625e-05,
      "step": 8065,
      "training_step_time": 0.5015442371368408
    },
    {
      "epoch": 4.923095703125e-05,
      "model_forward_time": 0.11481595039367676,
      "step": 8066
    },
    {
      "epoch": 4.923095703125e-05,
      "step": 8066,
      "training_step_time": 0.4486124515533447
    },
    {
      "epoch": 4.9237060546875e-05,
      "model_forward_time": 0.11530661582946777,
      "step": 8067
    },
    {
      "epoch": 4.9237060546875e-05,
      "step": 8067,
      "training_step_time": 0.6378777027130127
    },
    {
      "epoch": 4.92431640625e-05,
      "model_forward_time": 0.11421608924865723,
      "step": 8068
    },
    {
      "epoch": 4.92431640625e-05,
      "step": 8068,
      "training_step_time": 0.37700724601745605
    },
    {
      "epoch": 4.9249267578125e-05,
      "model_forward_time": 0.11419463157653809,
      "step": 8069
    },
    {
      "epoch": 4.9249267578125e-05,
      "step": 8069,
      "training_step_time": 0.7058310508728027
    },
    {
      "epoch": 4.925537109375e-05,
      "grad_norm": 0.14404593408107758,
      "learning_rate": 9.806055199664446e-05,
      "loss": 0.0663,
      "step": 8070
    },
    {
      "epoch": 4.925537109375e-05,
      "model_forward_time": 0.11381030082702637,
      "step": 8070
    },
    {
      "epoch": 4.925537109375e-05,
      "step": 8070,
      "training_step_time": 0.41049981117248535
    },
    {
      "epoch": 4.9261474609375e-05,
      "model_forward_time": 0.1138617992401123,
      "step": 8071
    },
    {
      "epoch": 4.9261474609375e-05,
      "step": 8071,
      "training_step_time": 0.39499664306640625
    },
    {
      "epoch": 4.9267578125e-05,
      "model_forward_time": 0.11539530754089355,
      "step": 8072
    },
    {
      "epoch": 4.9267578125e-05,
      "step": 8072,
      "training_step_time": 0.4209163188934326
    },
    {
      "epoch": 4.9273681640625e-05,
      "model_forward_time": 0.11480903625488281,
      "step": 8073
    },
    {
      "epoch": 4.9273681640625e-05,
      "step": 8073,
      "training_step_time": 0.495072603225708
    },
    {
      "epoch": 4.927978515625e-05,
      "model_forward_time": 0.11419415473937988,
      "step": 8074
    },
    {
      "epoch": 4.927978515625e-05,
      "step": 8074,
      "training_step_time": 0.40136003494262695
    },
    {
      "epoch": 4.9285888671875e-05,
      "model_forward_time": 0.11479020118713379,
      "step": 8075
    },
    {
      "epoch": 4.9285888671875e-05,
      "step": 8075,
      "training_step_time": 0.8645057678222656
    },
    {
      "epoch": 4.92919921875e-05,
      "model_forward_time": 0.1147770881652832,
      "step": 8076
    },
    {
      "epoch": 4.92919921875e-05,
      "step": 8076,
      "training_step_time": 0.37445545196533203
    },
    {
      "epoch": 4.9298095703125e-05,
      "model_forward_time": 0.11419439315795898,
      "step": 8077
    },
    {
      "epoch": 4.9298095703125e-05,
      "step": 8077,
      "training_step_time": 0.36431026458740234
    },
    {
      "epoch": 4.930419921875e-05,
      "model_forward_time": 0.11476588249206543,
      "step": 8078
    },
    {
      "epoch": 4.930419921875e-05,
      "step": 8078,
      "training_step_time": 0.41413235664367676
    },
    {
      "epoch": 4.9310302734375e-05,
      "model_forward_time": 0.11473536491394043,
      "step": 8079
    },
    {
      "epoch": 4.9310302734375e-05,
      "step": 8079,
      "training_step_time": 0.38211894035339355
    },
    {
      "epoch": 4.931640625e-05,
      "grad_norm": 0.20591603219509125,
      "learning_rate": 9.805294386315415e-05,
      "loss": 0.0683,
      "step": 8080
    },
    {
      "epoch": 4.931640625e-05,
      "model_forward_time": 0.11459946632385254,
      "step": 8080
    },
    {
      "epoch": 4.931640625e-05,
      "step": 8080,
      "training_step_time": 0.37682223320007324
    },
    {
      "epoch": 4.9322509765625e-05,
      "model_forward_time": 0.11592984199523926,
      "step": 8081
    },
    {
      "epoch": 4.9322509765625e-05,
      "step": 8081,
      "training_step_time": 0.6796693801879883
    },
    {
      "epoch": 4.932861328125e-05,
      "model_forward_time": 0.11437559127807617,
      "step": 8082
    },
    {
      "epoch": 4.932861328125e-05,
      "step": 8082,
      "training_step_time": 0.3843221664428711
    },
    {
      "epoch": 4.9334716796875e-05,
      "model_forward_time": 0.11415791511535645,
      "step": 8083
    },
    {
      "epoch": 4.9334716796875e-05,
      "step": 8083,
      "training_step_time": 0.4618041515350342
    },
    {
      "epoch": 4.93408203125e-05,
      "model_forward_time": 0.11402463912963867,
      "step": 8084
    },
    {
      "epoch": 4.93408203125e-05,
      "step": 8084,
      "training_step_time": 0.43616771697998047
    },
    {
      "epoch": 4.9346923828125e-05,
      "model_forward_time": 0.11510038375854492,
      "step": 8085
    },
    {
      "epoch": 4.9346923828125e-05,
      "step": 8085,
      "training_step_time": 0.43564772605895996
    },
    {
      "epoch": 4.935302734375e-05,
      "model_forward_time": 0.11497735977172852,
      "step": 8086
    },
    {
      "epoch": 4.935302734375e-05,
      "step": 8086,
      "training_step_time": 0.39220547676086426
    },
    {
      "epoch": 4.9359130859375e-05,
      "model_forward_time": 0.11486577987670898,
      "step": 8087
    },
    {
      "epoch": 4.9359130859375e-05,
      "step": 8087,
      "training_step_time": 0.9071743488311768
    },
    {
      "epoch": 4.9365234375e-05,
      "model_forward_time": 0.11409926414489746,
      "step": 8088
    },
    {
      "epoch": 4.9365234375e-05,
      "step": 8088,
      "training_step_time": 0.36968183517456055
    },
    {
      "epoch": 4.9371337890625e-05,
      "model_forward_time": 0.11424660682678223,
      "step": 8089
    },
    {
      "epoch": 4.9371337890625e-05,
      "step": 8089,
      "training_step_time": 0.38266825675964355
    },
    {
      "epoch": 4.937744140625e-05,
      "grad_norm": 0.2215094417333603,
      "learning_rate": 9.804532113244828e-05,
      "loss": 0.0635,
      "step": 8090
    },
    {
      "epoch": 4.937744140625e-05,
      "model_forward_time": 0.11490559577941895,
      "step": 8090
    },
    {
      "epoch": 4.937744140625e-05,
      "step": 8090,
      "training_step_time": 0.4140627384185791
    },
    {
      "epoch": 4.9383544921875e-05,
      "model_forward_time": 0.11429405212402344,
      "step": 8091
    },
    {
      "epoch": 4.9383544921875e-05,
      "step": 8091,
      "training_step_time": 0.5657367706298828
    },
    {
      "epoch": 4.93896484375e-05,
      "model_forward_time": 0.11489343643188477,
      "step": 8092
    },
    {
      "epoch": 4.93896484375e-05,
      "step": 8092,
      "training_step_time": 0.43938159942626953
    },
    {
      "epoch": 4.9395751953125e-05,
      "model_forward_time": 0.11459565162658691,
      "step": 8093
    },
    {
      "epoch": 4.9395751953125e-05,
      "step": 8093,
      "training_step_time": 0.9778413772583008
    },
    {
      "epoch": 4.940185546875e-05,
      "model_forward_time": 0.11426305770874023,
      "step": 8094
    },
    {
      "epoch": 4.940185546875e-05,
      "step": 8094,
      "training_step_time": 0.36591410636901855
    },
    {
      "epoch": 4.9407958984375e-05,
      "model_forward_time": 0.11380958557128906,
      "step": 8095
    },
    {
      "epoch": 4.9407958984375e-05,
      "step": 8095,
      "training_step_time": 0.4400207996368408
    },
    {
      "epoch": 4.94140625e-05,
      "model_forward_time": 0.11459922790527344,
      "step": 8096
    },
    {
      "epoch": 4.94140625e-05,
      "step": 8096,
      "training_step_time": 0.4427638053894043
    },
    {
      "epoch": 4.9420166015625e-05,
      "model_forward_time": 0.11380195617675781,
      "step": 8097
    },
    {
      "epoch": 4.9420166015625e-05,
      "step": 8097,
      "training_step_time": 0.4102644920349121
    },
    {
      "epoch": 4.942626953125e-05,
      "model_forward_time": 0.11421489715576172,
      "step": 8098
    },
    {
      "epoch": 4.942626953125e-05,
      "step": 8098,
      "training_step_time": 0.37561917304992676
    },
    {
      "epoch": 4.9432373046875e-05,
      "model_forward_time": 0.11473798751831055,
      "step": 8099
    },
    {
      "epoch": 4.9432373046875e-05,
      "step": 8099,
      "training_step_time": 0.631995439529419
    },
    {
      "epoch": 4.94384765625e-05,
      "grad_norm": 0.22633065283298492,
      "learning_rate": 9.803768380684242e-05,
      "loss": 0.0631,
      "step": 8100
    },
    {
      "epoch": 4.94384765625e-05,
      "model_forward_time": 0.1145021915435791,
      "step": 8100
    },
    {
      "epoch": 4.94384765625e-05,
      "step": 8100,
      "training_step_time": 0.38034629821777344
    },
    {
      "epoch": 4.9444580078125e-05,
      "model_forward_time": 0.11460494995117188,
      "step": 8101
    },
    {
      "epoch": 4.9444580078125e-05,
      "step": 8101,
      "training_step_time": 0.4069173336029053
    },
    {
      "epoch": 4.945068359375e-05,
      "model_forward_time": 0.11470794677734375,
      "step": 8102
    },
    {
      "epoch": 4.945068359375e-05,
      "step": 8102,
      "training_step_time": 0.4874582290649414
    },
    {
      "epoch": 4.9456787109375e-05,
      "model_forward_time": 0.11482834815979004,
      "step": 8103
    },
    {
      "epoch": 4.9456787109375e-05,
      "step": 8103,
      "training_step_time": 0.49510645866394043
    },
    {
      "epoch": 4.9462890625e-05,
      "model_forward_time": 0.11566567420959473,
      "step": 8104
    },
    {
      "epoch": 4.9462890625e-05,
      "step": 8104,
      "training_step_time": 0.4641401767730713
    },
    {
      "epoch": 4.9468994140625e-05,
      "model_forward_time": 0.11510419845581055,
      "step": 8105
    },
    {
      "epoch": 4.9468994140625e-05,
      "step": 8105,
      "training_step_time": 0.7470464706420898
    },
    {
      "epoch": 4.947509765625e-05,
      "model_forward_time": 0.11444449424743652,
      "step": 8106
    },
    {
      "epoch": 4.947509765625e-05,
      "step": 8106,
      "training_step_time": 0.3802223205566406
    },
    {
      "epoch": 4.9481201171875e-05,
      "model_forward_time": 0.11444783210754395,
      "step": 8107
    },
    {
      "epoch": 4.9481201171875e-05,
      "step": 8107,
      "training_step_time": 0.3863074779510498
    },
    {
      "epoch": 4.94873046875e-05,
      "model_forward_time": 0.11458826065063477,
      "step": 8108
    },
    {
      "epoch": 4.94873046875e-05,
      "step": 8108,
      "training_step_time": 0.4439117908477783
    },
    {
      "epoch": 4.9493408203125e-05,
      "model_forward_time": 0.11443948745727539,
      "step": 8109
    },
    {
      "epoch": 4.9493408203125e-05,
      "step": 8109,
      "training_step_time": 0.4514954090118408
    },
    {
      "epoch": 4.949951171875e-05,
      "grad_norm": 0.189594104886055,
      "learning_rate": 9.803003188865656e-05,
      "loss": 0.0666,
      "step": 8110
    },
    {
      "epoch": 4.949951171875e-05,
      "model_forward_time": 0.11395597457885742,
      "step": 8110
    },
    {
      "epoch": 4.949951171875e-05,
      "step": 8110,
      "training_step_time": 0.4113800525665283
    },
    {
      "epoch": 4.9505615234375e-05,
      "model_forward_time": 0.11548376083374023,
      "step": 8111
    },
    {
      "epoch": 4.9505615234375e-05,
      "step": 8111,
      "training_step_time": 1.0437393188476562
    },
    {
      "epoch": 4.951171875e-05,
      "model_forward_time": 0.11371588706970215,
      "step": 8112
    },
    {
      "epoch": 4.951171875e-05,
      "step": 8112,
      "training_step_time": 0.3767573833465576
    },
    {
      "epoch": 4.9517822265625e-05,
      "model_forward_time": 0.1141822338104248,
      "step": 8113
    },
    {
      "epoch": 4.9517822265625e-05,
      "step": 8113,
      "training_step_time": 0.428558349609375
    },
    {
      "epoch": 4.952392578125e-05,
      "model_forward_time": 0.11439299583435059,
      "step": 8114
    },
    {
      "epoch": 4.952392578125e-05,
      "step": 8114,
      "training_step_time": 0.3875300884246826
    },
    {
      "epoch": 4.9530029296875e-05,
      "model_forward_time": 0.11386704444885254,
      "step": 8115
    },
    {
      "epoch": 4.9530029296875e-05,
      "step": 8115,
      "training_step_time": 0.41068339347839355
    },
    {
      "epoch": 4.95361328125e-05,
      "model_forward_time": 0.11443901062011719,
      "step": 8116
    },
    {
      "epoch": 4.95361328125e-05,
      "step": 8116,
      "training_step_time": 0.4192972183227539
    },
    {
      "epoch": 4.9542236328125e-05,
      "model_forward_time": 0.1152963638305664,
      "step": 8117
    },
    {
      "epoch": 4.9542236328125e-05,
      "step": 8117,
      "training_step_time": 0.9627728462219238
    },
    {
      "epoch": 4.954833984375e-05,
      "model_forward_time": 0.11455607414245605,
      "step": 8118
    },
    {
      "epoch": 4.954833984375e-05,
      "step": 8118,
      "training_step_time": 0.3781607151031494
    },
    {
      "epoch": 4.9554443359375e-05,
      "model_forward_time": 0.11373639106750488,
      "step": 8119
    },
    {
      "epoch": 4.9554443359375e-05,
      "step": 8119,
      "training_step_time": 0.38316798210144043
    },
    {
      "epoch": 4.9560546875e-05,
      "grad_norm": 0.1483820080757141,
      "learning_rate": 9.802236538021518e-05,
      "loss": 0.0669,
      "step": 8120
    },
    {
      "epoch": 4.9560546875e-05,
      "model_forward_time": 0.1140449047088623,
      "step": 8120
    },
    {
      "epoch": 4.9560546875e-05,
      "step": 8120,
      "training_step_time": 0.4377706050872803
    },
    {
      "epoch": 4.9566650390625e-05,
      "model_forward_time": 0.11514616012573242,
      "step": 8121
    },
    {
      "epoch": 4.9566650390625e-05,
      "step": 8121,
      "training_step_time": 0.3982110023498535
    },
    {
      "epoch": 4.957275390625e-05,
      "model_forward_time": 0.11471366882324219,
      "step": 8122
    },
    {
      "epoch": 4.957275390625e-05,
      "step": 8122,
      "training_step_time": 0.3912937641143799
    },
    {
      "epoch": 4.9578857421875e-05,
      "model_forward_time": 0.11463689804077148,
      "step": 8123
    },
    {
      "epoch": 4.9578857421875e-05,
      "step": 8123,
      "training_step_time": 0.8530311584472656
    },
    {
      "epoch": 4.95849609375e-05,
      "model_forward_time": 0.11469602584838867,
      "step": 8124
    },
    {
      "epoch": 4.95849609375e-05,
      "step": 8124,
      "training_step_time": 0.36998748779296875
    },
    {
      "epoch": 4.9591064453125e-05,
      "model_forward_time": 0.1141500473022461,
      "step": 8125
    },
    {
      "epoch": 4.9591064453125e-05,
      "step": 8125,
      "training_step_time": 0.3789334297180176
    },
    {
      "epoch": 4.959716796875e-05,
      "model_forward_time": 0.1146702766418457,
      "step": 8126
    },
    {
      "epoch": 4.959716796875e-05,
      "step": 8126,
      "training_step_time": 0.4011218547821045
    },
    {
      "epoch": 4.9603271484375e-05,
      "model_forward_time": 0.11445760726928711,
      "step": 8127
    },
    {
      "epoch": 4.9603271484375e-05,
      "step": 8127,
      "training_step_time": 0.38970303535461426
    },
    {
      "epoch": 4.9609375e-05,
      "model_forward_time": 0.11446571350097656,
      "step": 8128
    },
    {
      "epoch": 4.9609375e-05,
      "step": 8128,
      "training_step_time": 0.36362624168395996
    },
    {
      "epoch": 4.9615478515625e-05,
      "model_forward_time": 0.11485958099365234,
      "step": 8129
    },
    {
      "epoch": 4.9615478515625e-05,
      "step": 8129,
      "training_step_time": 0.9466490745544434
    },
    {
      "epoch": 4.962158203125e-05,
      "grad_norm": 0.19455486536026,
      "learning_rate": 9.801468428384716e-05,
      "loss": 0.0618,
      "step": 8130
    },
    {
      "epoch": 4.962158203125e-05,
      "model_forward_time": 0.11492538452148438,
      "step": 8130
    },
    {
      "epoch": 4.962158203125e-05,
      "step": 8130,
      "training_step_time": 0.38072657585144043
    },
    {
      "epoch": 4.9627685546875e-05,
      "model_forward_time": 0.11382246017456055,
      "step": 8131
    },
    {
      "epoch": 4.9627685546875e-05,
      "step": 8131,
      "training_step_time": 0.3789951801300049
    },
    {
      "epoch": 4.96337890625e-05,
      "model_forward_time": 0.1145026683807373,
      "step": 8132
    },
    {
      "epoch": 4.96337890625e-05,
      "step": 8132,
      "training_step_time": 0.40514159202575684
    },
    {
      "epoch": 4.9639892578125e-05,
      "model_forward_time": 0.11385345458984375,
      "step": 8133
    },
    {
      "epoch": 4.9639892578125e-05,
      "step": 8133,
      "training_step_time": 0.4348335266113281
    },
    {
      "epoch": 4.964599609375e-05,
      "model_forward_time": 0.11405825614929199,
      "step": 8134
    },
    {
      "epoch": 4.964599609375e-05,
      "step": 8134,
      "training_step_time": 0.43932127952575684
    },
    {
      "epoch": 4.9652099609375e-05,
      "model_forward_time": 0.11452293395996094,
      "step": 8135
    },
    {
      "epoch": 4.9652099609375e-05,
      "step": 8135,
      "training_step_time": 0.906259298324585
    },
    {
      "epoch": 4.9658203125e-05,
      "model_forward_time": 0.1144566535949707,
      "step": 8136
    },
    {
      "epoch": 4.9658203125e-05,
      "step": 8136,
      "training_step_time": 0.3746800422668457
    },
    {
      "epoch": 4.9664306640625e-05,
      "model_forward_time": 0.11373019218444824,
      "step": 8137
    },
    {
      "epoch": 4.9664306640625e-05,
      "step": 8137,
      "training_step_time": 0.3849821090698242
    },
    {
      "epoch": 4.967041015625e-05,
      "model_forward_time": 0.11501073837280273,
      "step": 8138
    },
    {
      "epoch": 4.967041015625e-05,
      "step": 8138,
      "training_step_time": 0.38878393173217773
    },
    {
      "epoch": 4.9676513671875e-05,
      "model_forward_time": 0.11413383483886719,
      "step": 8139
    },
    {
      "epoch": 4.9676513671875e-05,
      "step": 8139,
      "training_step_time": 0.38906049728393555
    },
    {
      "epoch": 4.96826171875e-05,
      "grad_norm": 0.2728213667869568,
      "learning_rate": 9.80069886018858e-05,
      "loss": 0.0701,
      "step": 8140
    },
    {
      "epoch": 4.96826171875e-05,
      "model_forward_time": 0.11378312110900879,
      "step": 8140
    },
    {
      "epoch": 4.96826171875e-05,
      "step": 8140,
      "training_step_time": 0.4642632007598877
    },
    {
      "epoch": 4.9688720703125e-05,
      "model_forward_time": 0.11507105827331543,
      "step": 8141
    },
    {
      "epoch": 4.9688720703125e-05,
      "step": 8141,
      "training_step_time": 0.8503284454345703
    },
    {
      "epoch": 4.969482421875e-05,
      "model_forward_time": 0.11430931091308594,
      "step": 8142
    },
    {
      "epoch": 4.969482421875e-05,
      "step": 8142,
      "training_step_time": 0.3800063133239746
    },
    {
      "epoch": 4.9700927734375e-05,
      "model_forward_time": 0.11579203605651855,
      "step": 8143
    },
    {
      "epoch": 4.9700927734375e-05,
      "step": 8143,
      "training_step_time": 0.38532352447509766
    },
    {
      "epoch": 4.970703125e-05,
      "model_forward_time": 0.11463212966918945,
      "step": 8144
    },
    {
      "epoch": 4.970703125e-05,
      "step": 8144,
      "training_step_time": 0.41902971267700195
    },
    {
      "epoch": 4.9713134765625e-05,
      "model_forward_time": 0.11506009101867676,
      "step": 8145
    },
    {
      "epoch": 4.9713134765625e-05,
      "step": 8145,
      "training_step_time": 0.42246508598327637
    },
    {
      "epoch": 4.971923828125e-05,
      "model_forward_time": 0.11467742919921875,
      "step": 8146
    },
    {
      "epoch": 4.971923828125e-05,
      "step": 8146,
      "training_step_time": 0.47604799270629883
    },
    {
      "epoch": 4.9725341796875e-05,
      "model_forward_time": 0.11459136009216309,
      "step": 8147
    },
    {
      "epoch": 4.9725341796875e-05,
      "step": 8147,
      "training_step_time": 0.7283673286437988
    },
    {
      "epoch": 4.97314453125e-05,
      "model_forward_time": 0.11461162567138672,
      "step": 8148
    },
    {
      "epoch": 4.97314453125e-05,
      "step": 8148,
      "training_step_time": 0.3790595531463623
    },
    {
      "epoch": 4.9737548828125e-05,
      "model_forward_time": 0.11414146423339844,
      "step": 8149
    },
    {
      "epoch": 4.9737548828125e-05,
      "step": 8149,
      "training_step_time": 0.38125038146972656
    },
    {
      "epoch": 4.974365234375e-05,
      "grad_norm": 0.22222432494163513,
      "learning_rate": 9.799927833666887e-05,
      "loss": 0.0683,
      "step": 8150
    },
    {
      "epoch": 4.974365234375e-05,
      "model_forward_time": 0.11462688446044922,
      "step": 8150
    },
    {
      "epoch": 4.974365234375e-05,
      "step": 8150,
      "training_step_time": 0.3902287483215332
    },
    {
      "epoch": 4.9749755859375e-05,
      "model_forward_time": 0.11417388916015625,
      "step": 8151
    },
    {
      "epoch": 4.9749755859375e-05,
      "step": 8151,
      "training_step_time": 0.39768290519714355
    },
    {
      "epoch": 4.9755859375e-05,
      "model_forward_time": 0.11452698707580566,
      "step": 8152
    },
    {
      "epoch": 4.9755859375e-05,
      "step": 8152,
      "training_step_time": 0.3838186264038086
    },
    {
      "epoch": 4.9761962890625e-05,
      "model_forward_time": 0.11473822593688965,
      "step": 8153
    },
    {
      "epoch": 4.9761962890625e-05,
      "step": 8153,
      "training_step_time": 0.9909842014312744
    },
    {
      "epoch": 4.976806640625e-05,
      "model_forward_time": 0.11457085609436035,
      "step": 8154
    },
    {
      "epoch": 4.976806640625e-05,
      "step": 8154,
      "training_step_time": 0.4988367557525635
    },
    {
      "epoch": 4.9774169921875e-05,
      "model_forward_time": 0.11396121978759766,
      "step": 8155
    },
    {
      "epoch": 4.9774169921875e-05,
      "step": 8155,
      "training_step_time": 0.4138007164001465
    },
    {
      "epoch": 4.97802734375e-05,
      "model_forward_time": 0.11487793922424316,
      "step": 8156
    },
    {
      "epoch": 4.97802734375e-05,
      "step": 8156,
      "training_step_time": 0.40561723709106445
    },
    {
      "epoch": 4.9786376953125e-05,
      "model_forward_time": 0.11415505409240723,
      "step": 8157
    },
    {
      "epoch": 4.9786376953125e-05,
      "step": 8157,
      "training_step_time": 0.39876604080200195
    },
    {
      "epoch": 4.979248046875e-05,
      "model_forward_time": 0.11414813995361328,
      "step": 8158
    },
    {
      "epoch": 4.979248046875e-05,
      "step": 8158,
      "training_step_time": 0.40938401222229004
    },
    {
      "epoch": 4.9798583984375e-05,
      "model_forward_time": 0.1149604320526123,
      "step": 8159
    },
    {
      "epoch": 4.9798583984375e-05,
      "step": 8159,
      "training_step_time": 0.8149685859680176
    },
    {
      "epoch": 4.98046875e-05,
      "grad_norm": 0.22291401028633118,
      "learning_rate": 9.799155349053851e-05,
      "loss": 0.0675,
      "step": 8160
    },
    {
      "epoch": 4.98046875e-05,
      "model_forward_time": 0.11386466026306152,
      "step": 8160
    },
    {
      "epoch": 4.98046875e-05,
      "step": 8160,
      "training_step_time": 0.4129025936126709
    },
    {
      "epoch": 4.9810791015625e-05,
      "model_forward_time": 0.11448478698730469,
      "step": 8161
    },
    {
      "epoch": 4.9810791015625e-05,
      "step": 8161,
      "training_step_time": 0.38604140281677246
    },
    {
      "epoch": 4.981689453125e-05,
      "model_forward_time": 0.11445403099060059,
      "step": 8162
    },
    {
      "epoch": 4.981689453125e-05,
      "step": 8162,
      "training_step_time": 0.3915398120880127
    },
    {
      "epoch": 4.9822998046875e-05,
      "model_forward_time": 0.11422896385192871,
      "step": 8163
    },
    {
      "epoch": 4.9822998046875e-05,
      "step": 8163,
      "training_step_time": 0.3874094486236572
    },
    {
      "epoch": 4.98291015625e-05,
      "model_forward_time": 0.11418962478637695,
      "step": 8164
    },
    {
      "epoch": 4.98291015625e-05,
      "step": 8164,
      "training_step_time": 0.3790760040283203
    },
    {
      "epoch": 4.9835205078125e-05,
      "model_forward_time": 0.11585164070129395,
      "step": 8165
    },
    {
      "epoch": 4.9835205078125e-05,
      "step": 8165,
      "training_step_time": 0.8080945014953613
    },
    {
      "epoch": 4.984130859375e-05,
      "model_forward_time": 0.11456060409545898,
      "step": 8166
    },
    {
      "epoch": 4.984130859375e-05,
      "step": 8166,
      "training_step_time": 0.45117902755737305
    },
    {
      "epoch": 4.9847412109375e-05,
      "model_forward_time": 0.11436104774475098,
      "step": 8167
    },
    {
      "epoch": 4.9847412109375e-05,
      "step": 8167,
      "training_step_time": 0.44556427001953125
    },
    {
      "epoch": 4.9853515625e-05,
      "model_forward_time": 0.11406660079956055,
      "step": 8168
    },
    {
      "epoch": 4.9853515625e-05,
      "step": 8168,
      "training_step_time": 0.4687812328338623
    },
    {
      "epoch": 4.9859619140625e-05,
      "model_forward_time": 0.11437702178955078,
      "step": 8169
    },
    {
      "epoch": 4.9859619140625e-05,
      "step": 8169,
      "training_step_time": 0.4645543098449707
    },
    {
      "epoch": 4.986572265625e-05,
      "grad_norm": 0.2751985788345337,
      "learning_rate": 9.798381406584135e-05,
      "loss": 0.0662,
      "step": 8170
    },
    {
      "epoch": 4.986572265625e-05,
      "model_forward_time": 0.11419343948364258,
      "step": 8170
    },
    {
      "epoch": 4.986572265625e-05,
      "step": 8170,
      "training_step_time": 0.3823575973510742
    },
    {
      "epoch": 4.9871826171875e-05,
      "model_forward_time": 0.11487221717834473,
      "step": 8171
    },
    {
      "epoch": 4.9871826171875e-05,
      "step": 8171,
      "training_step_time": 0.5370907783508301
    },
    {
      "epoch": 4.98779296875e-05,
      "model_forward_time": 0.11444759368896484,
      "step": 8172
    },
    {
      "epoch": 4.98779296875e-05,
      "step": 8172,
      "training_step_time": 0.36898231506347656
    },
    {
      "epoch": 4.9884033203125e-05,
      "model_forward_time": 0.11476731300354004,
      "step": 8173
    },
    {
      "epoch": 4.9884033203125e-05,
      "step": 8173,
      "training_step_time": 0.4377129077911377
    },
    {
      "epoch": 4.989013671875e-05,
      "model_forward_time": 0.11585688591003418,
      "step": 8174
    },
    {
      "epoch": 4.989013671875e-05,
      "step": 8174,
      "training_step_time": 0.38870668411254883
    },
    {
      "epoch": 4.9896240234375e-05,
      "model_forward_time": 0.11489987373352051,
      "step": 8175
    },
    {
      "epoch": 4.9896240234375e-05,
      "step": 8175,
      "training_step_time": 0.39209699630737305
    },
    {
      "epoch": 4.990234375e-05,
      "model_forward_time": 0.11528682708740234,
      "step": 8176
    },
    {
      "epoch": 4.990234375e-05,
      "step": 8176,
      "training_step_time": 0.37540531158447266
    },
    {
      "epoch": 4.9908447265625e-05,
      "model_forward_time": 0.11542820930480957,
      "step": 8177
    },
    {
      "epoch": 4.9908447265625e-05,
      "step": 8177,
      "training_step_time": 1.1522560119628906
    },
    {
      "epoch": 4.991455078125e-05,
      "model_forward_time": 0.11438894271850586,
      "step": 8178
    },
    {
      "epoch": 4.991455078125e-05,
      "step": 8178,
      "training_step_time": 0.3722493648529053
    },
    {
      "epoch": 4.9920654296875e-05,
      "model_forward_time": 0.11431169509887695,
      "step": 8179
    },
    {
      "epoch": 4.9920654296875e-05,
      "step": 8179,
      "training_step_time": 0.3996593952178955
    },
    {
      "epoch": 4.99267578125e-05,
      "grad_norm": 0.21352578699588776,
      "learning_rate": 9.797606006492841e-05,
      "loss": 0.0672,
      "step": 8180
    },
    {
      "epoch": 4.99267578125e-05,
      "model_forward_time": 0.11442184448242188,
      "step": 8180
    },
    {
      "epoch": 4.99267578125e-05,
      "step": 8180,
      "training_step_time": 0.500537633895874
    },
    {
      "epoch": 4.9932861328125e-05,
      "model_forward_time": 0.11385440826416016,
      "step": 8181
    },
    {
      "epoch": 4.9932861328125e-05,
      "step": 8181,
      "training_step_time": 0.38265085220336914
    },
    {
      "epoch": 4.993896484375e-05,
      "model_forward_time": 0.11437129974365234,
      "step": 8182
    },
    {
      "epoch": 4.993896484375e-05,
      "step": 8182,
      "training_step_time": 0.37433862686157227
    },
    {
      "epoch": 4.9945068359375e-05,
      "model_forward_time": 0.11517000198364258,
      "step": 8183
    },
    {
      "epoch": 4.9945068359375e-05,
      "step": 8183,
      "training_step_time": 0.9601762294769287
    },
    {
      "epoch": 4.9951171875e-05,
      "model_forward_time": 0.11392831802368164,
      "step": 8184
    },
    {
      "epoch": 4.9951171875e-05,
      "step": 8184,
      "training_step_time": 0.4012877941131592
    },
    {
      "epoch": 4.9957275390625e-05,
      "model_forward_time": 0.11399555206298828,
      "step": 8185
    },
    {
      "epoch": 4.9957275390625e-05,
      "step": 8185,
      "training_step_time": 0.4112069606781006
    },
    {
      "epoch": 4.996337890625e-05,
      "model_forward_time": 0.11465048789978027,
      "step": 8186
    },
    {
      "epoch": 4.996337890625e-05,
      "step": 8186,
      "training_step_time": 0.38161373138427734
    },
    {
      "epoch": 4.9969482421875e-05,
      "model_forward_time": 0.11466717720031738,
      "step": 8187
    },
    {
      "epoch": 4.9969482421875e-05,
      "step": 8187,
      "training_step_time": 0.3793833255767822
    },
    {
      "epoch": 4.99755859375e-05,
      "model_forward_time": 0.1142435073852539,
      "step": 8188
    },
    {
      "epoch": 4.99755859375e-05,
      "step": 8188,
      "training_step_time": 0.40659475326538086
    },
    {
      "epoch": 4.9981689453125e-05,
      "model_forward_time": 0.11533808708190918,
      "step": 8189
    },
    {
      "epoch": 4.9981689453125e-05,
      "step": 8189,
      "training_step_time": 0.7034587860107422
    },
    {
      "epoch": 4.998779296875e-05,
      "grad_norm": 0.2069196105003357,
      "learning_rate": 9.796829149015517e-05,
      "loss": 0.0691,
      "step": 8190
    },
    {
      "epoch": 4.998779296875e-05,
      "model_forward_time": 0.11463308334350586,
      "step": 8190
    },
    {
      "epoch": 4.998779296875e-05,
      "step": 8190,
      "training_step_time": 0.37937068939208984
    },
    {
      "epoch": 4.9993896484375e-05,
      "model_forward_time": 0.11383676528930664,
      "step": 8191
    },
    {
      "epoch": 4.9993896484375e-05,
      "step": 8191,
      "training_step_time": 0.3973731994628906
    },
    {
      "epoch": 5e-05,
      "model_forward_time": 0.11451435089111328,
      "step": 8192
    },
    {
      "epoch": 5e-05,
      "step": 8192,
      "training_step_time": 0.4242827892303467
    },
    {
      "epoch": 5.0006103515625e-05,
      "model_forward_time": 0.1142277717590332,
      "step": 8193
    },
    {
      "epoch": 5.0006103515625e-05,
      "step": 8193,
      "training_step_time": 0.49071168899536133
    },
    {
      "epoch": 5.001220703125e-05,
      "model_forward_time": 0.11482810974121094,
      "step": 8194
    },
    {
      "epoch": 5.001220703125e-05,
      "step": 8194,
      "training_step_time": 0.4172031879425049
    },
    {
      "epoch": 5.0018310546875e-05,
      "model_forward_time": 0.11612987518310547,
      "step": 8195
    },
    {
      "epoch": 5.0018310546875e-05,
      "step": 8195,
      "training_step_time": 0.8572158813476562
    },
    {
      "epoch": 5.00244140625e-05,
      "model_forward_time": 0.11504197120666504,
      "step": 8196
    },
    {
      "epoch": 5.00244140625e-05,
      "step": 8196,
      "training_step_time": 0.4484386444091797
    },
    {
      "epoch": 5.0030517578125e-05,
      "model_forward_time": 0.11548161506652832,
      "step": 8197
    },
    {
      "epoch": 5.0030517578125e-05,
      "step": 8197,
      "training_step_time": 0.453777551651001
    },
    {
      "epoch": 5.003662109375e-05,
      "model_forward_time": 0.11415910720825195,
      "step": 8198
    },
    {
      "epoch": 5.003662109375e-05,
      "step": 8198,
      "training_step_time": 0.4126293659210205
    },
    {
      "epoch": 5.0042724609375e-05,
      "model_forward_time": 0.1146543025970459,
      "step": 8199
    },
    {
      "epoch": 5.0042724609375e-05,
      "step": 8199,
      "training_step_time": 0.39882516860961914
    },
    {
      "epoch": 5.0048828125e-05,
      "grad_norm": 0.23436735570430756,
      "learning_rate": 9.796050834388149e-05,
      "loss": 0.0695,
      "step": 8200
    },
    {
      "epoch": 5.0048828125e-05,
      "model_forward_time": 0.11395645141601562,
      "step": 8200
    },
    {
      "epoch": 5.0048828125e-05,
      "step": 8200,
      "training_step_time": 0.3887772560119629
    },
    {
      "epoch": 5.0054931640625e-05,
      "model_forward_time": 0.1149895191192627,
      "step": 8201
    },
    {
      "epoch": 5.0054931640625e-05,
      "step": 8201,
      "training_step_time": 0.9819920063018799
    },
    {
      "epoch": 5.006103515625e-05,
      "model_forward_time": 0.1139383316040039,
      "step": 8202
    },
    {
      "epoch": 5.006103515625e-05,
      "step": 8202,
      "training_step_time": 0.36621761322021484
    },
    {
      "epoch": 5.0067138671875e-05,
      "model_forward_time": 0.11453771591186523,
      "step": 8203
    },
    {
      "epoch": 5.0067138671875e-05,
      "step": 8203,
      "training_step_time": 0.3830118179321289
    },
    {
      "epoch": 5.00732421875e-05,
      "model_forward_time": 0.11425185203552246,
      "step": 8204
    },
    {
      "epoch": 5.00732421875e-05,
      "step": 8204,
      "training_step_time": 0.444690465927124
    },
    {
      "epoch": 5.0079345703125e-05,
      "model_forward_time": 0.11439394950866699,
      "step": 8205
    },
    {
      "epoch": 5.0079345703125e-05,
      "step": 8205,
      "training_step_time": 0.46074557304382324
    },
    {
      "epoch": 5.008544921875e-05,
      "model_forward_time": 0.1152651309967041,
      "step": 8206
    },
    {
      "epoch": 5.008544921875e-05,
      "step": 8206,
      "training_step_time": 0.3829617500305176
    },
    {
      "epoch": 5.0091552734375e-05,
      "model_forward_time": 0.1151571273803711,
      "step": 8207
    },
    {
      "epoch": 5.0091552734375e-05,
      "step": 8207,
      "training_step_time": 0.8525478839874268
    },
    {
      "epoch": 5.009765625e-05,
      "model_forward_time": 0.11381125450134277,
      "step": 8208
    },
    {
      "epoch": 5.009765625e-05,
      "step": 8208,
      "training_step_time": 0.37237048149108887
    },
    {
      "epoch": 5.0103759765625e-05,
      "model_forward_time": 0.11446285247802734,
      "step": 8209
    },
    {
      "epoch": 5.0103759765625e-05,
      "step": 8209,
      "training_step_time": 0.3904542922973633
    },
    {
      "epoch": 5.010986328125e-05,
      "grad_norm": 0.1812238246202469,
      "learning_rate": 9.795271062847172e-05,
      "loss": 0.064,
      "step": 8210
    },
    {
      "epoch": 5.010986328125e-05,
      "model_forward_time": 0.11373591423034668,
      "step": 8210
    },
    {
      "epoch": 5.010986328125e-05,
      "step": 8210,
      "training_step_time": 0.39441704750061035
    },
    {
      "epoch": 5.0115966796875e-05,
      "model_forward_time": 0.11446881294250488,
      "step": 8211
    },
    {
      "epoch": 5.0115966796875e-05,
      "step": 8211,
      "training_step_time": 0.4704265594482422
    },
    {
      "epoch": 5.01220703125e-05,
      "model_forward_time": 0.11445379257202148,
      "step": 8212
    },
    {
      "epoch": 5.01220703125e-05,
      "step": 8212,
      "training_step_time": 0.37480640411376953
    },
    {
      "epoch": 5.0128173828125e-05,
      "model_forward_time": 0.11493229866027832,
      "step": 8213
    },
    {
      "epoch": 5.0128173828125e-05,
      "step": 8213,
      "training_step_time": 0.9107460975646973
    },
    {
      "epoch": 5.013427734375e-05,
      "model_forward_time": 0.11413884162902832,
      "step": 8214
    },
    {
      "epoch": 5.013427734375e-05,
      "step": 8214,
      "training_step_time": 0.37339115142822266
    },
    {
      "epoch": 5.0140380859375e-05,
      "model_forward_time": 0.1141514778137207,
      "step": 8215
    },
    {
      "epoch": 5.0140380859375e-05,
      "step": 8215,
      "training_step_time": 0.3868403434753418
    },
    {
      "epoch": 5.0146484375e-05,
      "model_forward_time": 0.1143333911895752,
      "step": 8216
    },
    {
      "epoch": 5.0146484375e-05,
      "step": 8216,
      "training_step_time": 0.4425787925720215
    },
    {
      "epoch": 5.0152587890625e-05,
      "model_forward_time": 0.11477422714233398,
      "step": 8217
    },
    {
      "epoch": 5.0152587890625e-05,
      "step": 8217,
      "training_step_time": 0.49648022651672363
    },
    {
      "epoch": 5.015869140625e-05,
      "model_forward_time": 0.11417675018310547,
      "step": 8218
    },
    {
      "epoch": 5.015869140625e-05,
      "step": 8218,
      "training_step_time": 0.4479103088378906
    },
    {
      "epoch": 5.0164794921875e-05,
      "model_forward_time": 0.11448931694030762,
      "step": 8219
    },
    {
      "epoch": 5.0164794921875e-05,
      "step": 8219,
      "training_step_time": 0.9406120777130127
    },
    {
      "epoch": 5.01708984375e-05,
      "grad_norm": 0.2609352767467499,
      "learning_rate": 9.794489834629455e-05,
      "loss": 0.0716,
      "step": 8220
    },
    {
      "epoch": 5.01708984375e-05,
      "model_forward_time": 0.1140439510345459,
      "step": 8220
    },
    {
      "epoch": 5.01708984375e-05,
      "step": 8220,
      "training_step_time": 0.3647291660308838
    },
    {
      "epoch": 5.0177001953125e-05,
      "model_forward_time": 0.11411190032958984,
      "step": 8221
    },
    {
      "epoch": 5.0177001953125e-05,
      "step": 8221,
      "training_step_time": 0.38843441009521484
    },
    {
      "epoch": 5.018310546875e-05,
      "model_forward_time": 0.11400508880615234,
      "step": 8222
    },
    {
      "epoch": 5.018310546875e-05,
      "step": 8222,
      "training_step_time": 0.3898332118988037
    },
    {
      "epoch": 5.0189208984375e-05,
      "model_forward_time": 0.11412549018859863,
      "step": 8223
    },
    {
      "epoch": 5.0189208984375e-05,
      "step": 8223,
      "training_step_time": 0.4115946292877197
    },
    {
      "epoch": 5.01953125e-05,
      "model_forward_time": 0.11420226097106934,
      "step": 8224
    },
    {
      "epoch": 5.01953125e-05,
      "step": 8224,
      "training_step_time": 0.39351773262023926
    },
    {
      "epoch": 5.0201416015625e-05,
      "model_forward_time": 0.11450481414794922,
      "step": 8225
    },
    {
      "epoch": 5.0201416015625e-05,
      "step": 8225,
      "training_step_time": 1.0036561489105225
    },
    {
      "epoch": 5.020751953125e-05,
      "model_forward_time": 0.1142122745513916,
      "step": 8226
    },
    {
      "epoch": 5.020751953125e-05,
      "step": 8226,
      "training_step_time": 0.3725733757019043
    },
    {
      "epoch": 5.0213623046875e-05,
      "model_forward_time": 0.11423087120056152,
      "step": 8227
    },
    {
      "epoch": 5.0213623046875e-05,
      "step": 8227,
      "training_step_time": 0.45264625549316406
    },
    {
      "epoch": 5.02197265625e-05,
      "model_forward_time": 0.11371064186096191,
      "step": 8228
    },
    {
      "epoch": 5.02197265625e-05,
      "step": 8228,
      "training_step_time": 0.38369059562683105
    },
    {
      "epoch": 5.0225830078125e-05,
      "model_forward_time": 0.11539149284362793,
      "step": 8229
    },
    {
      "epoch": 5.0225830078125e-05,
      "step": 8229,
      "training_step_time": 0.36312007904052734
    },
    {
      "epoch": 5.023193359375e-05,
      "grad_norm": 0.23826412856578827,
      "learning_rate": 9.79370714997232e-05,
      "loss": 0.0688,
      "step": 8230
    },
    {
      "epoch": 5.023193359375e-05,
      "model_forward_time": 0.11406755447387695,
      "step": 8230
    },
    {
      "epoch": 5.023193359375e-05,
      "step": 8230,
      "training_step_time": 0.42345643043518066
    },
    {
      "epoch": 5.0238037109375e-05,
      "model_forward_time": 0.11443352699279785,
      "step": 8231
    },
    {
      "epoch": 5.0238037109375e-05,
      "step": 8231,
      "training_step_time": 0.9538946151733398
    },
    {
      "epoch": 5.0244140625e-05,
      "model_forward_time": 0.11435747146606445,
      "step": 8232
    },
    {
      "epoch": 5.0244140625e-05,
      "step": 8232,
      "training_step_time": 0.36699461936950684
    },
    {
      "epoch": 5.0250244140625e-05,
      "model_forward_time": 0.11385655403137207,
      "step": 8233
    },
    {
      "epoch": 5.0250244140625e-05,
      "step": 8233,
      "training_step_time": 0.45160484313964844
    },
    {
      "epoch": 5.025634765625e-05,
      "model_forward_time": 0.11432743072509766,
      "step": 8234
    },
    {
      "epoch": 5.025634765625e-05,
      "step": 8234,
      "training_step_time": 0.3961446285247803
    },
    {
      "epoch": 5.0262451171875e-05,
      "model_forward_time": 0.11469674110412598,
      "step": 8235
    },
    {
      "epoch": 5.0262451171875e-05,
      "step": 8235,
      "training_step_time": 0.41185736656188965
    },
    {
      "epoch": 5.02685546875e-05,
      "model_forward_time": 0.11488699913024902,
      "step": 8236
    },
    {
      "epoch": 5.02685546875e-05,
      "step": 8236,
      "training_step_time": 0.38625335693359375
    },
    {
      "epoch": 5.0274658203125e-05,
      "model_forward_time": 0.11474847793579102,
      "step": 8237
    },
    {
      "epoch": 5.0274658203125e-05,
      "step": 8237,
      "training_step_time": 1.0043745040893555
    },
    {
      "epoch": 5.028076171875e-05,
      "model_forward_time": 0.11391139030456543,
      "step": 8238
    },
    {
      "epoch": 5.028076171875e-05,
      "step": 8238,
      "training_step_time": 0.3723478317260742
    },
    {
      "epoch": 5.0286865234375e-05,
      "model_forward_time": 0.1136007308959961,
      "step": 8239
    },
    {
      "epoch": 5.0286865234375e-05,
      "step": 8239,
      "training_step_time": 0.4004476070404053
    },
    {
      "epoch": 5.029296875e-05,
      "grad_norm": 0.1770312637090683,
      "learning_rate": 9.792923009113522e-05,
      "loss": 0.0655,
      "step": 8240
    },
    {
      "epoch": 5.029296875e-05,
      "model_forward_time": 0.11426329612731934,
      "step": 8240
    },
    {
      "epoch": 5.029296875e-05,
      "step": 8240,
      "training_step_time": 0.3996157646179199
    },
    {
      "epoch": 5.0299072265625e-05,
      "model_forward_time": 0.11509346961975098,
      "step": 8241
    },
    {
      "epoch": 5.0299072265625e-05,
      "step": 8241,
      "training_step_time": 0.44205260276794434
    },
    {
      "epoch": 5.030517578125e-05,
      "model_forward_time": 0.1147165298461914,
      "step": 8242
    },
    {
      "epoch": 5.030517578125e-05,
      "step": 8242,
      "training_step_time": 0.39974427223205566
    },
    {
      "epoch": 5.0311279296875e-05,
      "model_forward_time": 0.11501240730285645,
      "step": 8243
    },
    {
      "epoch": 5.0311279296875e-05,
      "step": 8243,
      "training_step_time": 0.9725866317749023
    },
    {
      "epoch": 5.03173828125e-05,
      "model_forward_time": 0.11381006240844727,
      "step": 8244
    },
    {
      "epoch": 5.03173828125e-05,
      "step": 8244,
      "training_step_time": 0.3745884895324707
    },
    {
      "epoch": 5.0323486328125e-05,
      "model_forward_time": 0.11379790306091309,
      "step": 8245
    },
    {
      "epoch": 5.0323486328125e-05,
      "step": 8245,
      "training_step_time": 0.38254261016845703
    },
    {
      "epoch": 5.032958984375e-05,
      "model_forward_time": 0.11426115036010742,
      "step": 8246
    },
    {
      "epoch": 5.032958984375e-05,
      "step": 8246,
      "training_step_time": 0.42281675338745117
    },
    {
      "epoch": 5.0335693359375e-05,
      "model_forward_time": 0.1144566535949707,
      "step": 8247
    },
    {
      "epoch": 5.0335693359375e-05,
      "step": 8247,
      "training_step_time": 0.42307209968566895
    },
    {
      "epoch": 5.0341796875e-05,
      "model_forward_time": 0.11439204216003418,
      "step": 8248
    },
    {
      "epoch": 5.0341796875e-05,
      "step": 8248,
      "training_step_time": 0.3785431385040283
    },
    {
      "epoch": 5.0347900390625e-05,
      "model_forward_time": 0.11530089378356934,
      "step": 8249
    },
    {
      "epoch": 5.0347900390625e-05,
      "step": 8249,
      "training_step_time": 0.6553366184234619
    },
    {
      "epoch": 5.035400390625e-05,
      "grad_norm": 0.20372606813907623,
      "learning_rate": 9.792137412291265e-05,
      "loss": 0.0658,
      "step": 8250
    },
    {
      "epoch": 5.035400390625e-05,
      "model_forward_time": 0.1143198013305664,
      "step": 8250
    },
    {
      "epoch": 5.035400390625e-05,
      "step": 8250,
      "training_step_time": 0.3868255615234375
    },
    {
      "epoch": 5.0360107421875e-05,
      "model_forward_time": 0.11428999900817871,
      "step": 8251
    },
    {
      "epoch": 5.0360107421875e-05,
      "step": 8251,
      "training_step_time": 0.3996610641479492
    },
    {
      "epoch": 5.03662109375e-05,
      "model_forward_time": 0.11381959915161133,
      "step": 8252
    },
    {
      "epoch": 5.03662109375e-05,
      "step": 8252,
      "training_step_time": 0.3863515853881836
    },
    {
      "epoch": 5.0372314453125e-05,
      "model_forward_time": 0.11561465263366699,
      "step": 8253
    },
    {
      "epoch": 5.0372314453125e-05,
      "step": 8253,
      "training_step_time": 0.38942646980285645
    },
    {
      "epoch": 5.037841796875e-05,
      "model_forward_time": 0.11480498313903809,
      "step": 8254
    },
    {
      "epoch": 5.037841796875e-05,
      "step": 8254,
      "training_step_time": 0.5071330070495605
    },
    {
      "epoch": 5.0384521484375e-05,
      "model_forward_time": 0.11485910415649414,
      "step": 8255
    },
    {
      "epoch": 5.0384521484375e-05,
      "step": 8255,
      "training_step_time": 0.5523550510406494
    },
    {
      "epoch": 5.0390625e-05,
      "model_forward_time": 0.1147146224975586,
      "step": 8256
    },
    {
      "epoch": 5.0390625e-05,
      "step": 8256,
      "training_step_time": 0.47117090225219727
    },
    {
      "epoch": 5.0396728515625e-05,
      "model_forward_time": 0.11431074142456055,
      "step": 8257
    },
    {
      "epoch": 5.0396728515625e-05,
      "step": 8257,
      "training_step_time": 0.39285922050476074
    },
    {
      "epoch": 5.040283203125e-05,
      "model_forward_time": 0.11412811279296875,
      "step": 8258
    },
    {
      "epoch": 5.040283203125e-05,
      "step": 8258,
      "training_step_time": 0.4148685932159424
    },
    {
      "epoch": 5.0408935546875e-05,
      "model_forward_time": 0.11415886878967285,
      "step": 8259
    },
    {
      "epoch": 5.0408935546875e-05,
      "step": 8259,
      "training_step_time": 0.39391136169433594
    },
    {
      "epoch": 5.04150390625e-05,
      "grad_norm": 0.24821072816848755,
      "learning_rate": 9.791350359744189e-05,
      "loss": 0.063,
      "step": 8260
    },
    {
      "epoch": 5.04150390625e-05,
      "model_forward_time": 0.11455488204956055,
      "step": 8260
    },
    {
      "epoch": 5.04150390625e-05,
      "step": 8260,
      "training_step_time": 0.4671916961669922
    },
    {
      "epoch": 5.0421142578125e-05,
      "model_forward_time": 0.11481189727783203,
      "step": 8261
    },
    {
      "epoch": 5.0421142578125e-05,
      "step": 8261,
      "training_step_time": 0.48526620864868164
    },
    {
      "epoch": 5.042724609375e-05,
      "model_forward_time": 0.11469173431396484,
      "step": 8262
    },
    {
      "epoch": 5.042724609375e-05,
      "step": 8262,
      "training_step_time": 0.38561344146728516
    },
    {
      "epoch": 5.0433349609375e-05,
      "model_forward_time": 0.11504626274108887,
      "step": 8263
    },
    {
      "epoch": 5.0433349609375e-05,
      "step": 8263,
      "training_step_time": 0.39333510398864746
    },
    {
      "epoch": 5.0439453125e-05,
      "model_forward_time": 0.11465811729431152,
      "step": 8264
    },
    {
      "epoch": 5.0439453125e-05,
      "step": 8264,
      "training_step_time": 0.43187522888183594
    },
    {
      "epoch": 5.0445556640625e-05,
      "model_forward_time": 0.11508655548095703,
      "step": 8265
    },
    {
      "epoch": 5.0445556640625e-05,
      "step": 8265,
      "training_step_time": 0.39653491973876953
    },
    {
      "epoch": 5.045166015625e-05,
      "model_forward_time": 0.11466360092163086,
      "step": 8266
    },
    {
      "epoch": 5.045166015625e-05,
      "step": 8266,
      "training_step_time": 0.38654232025146484
    },
    {
      "epoch": 5.0457763671875e-05,
      "model_forward_time": 0.11458158493041992,
      "step": 8267
    },
    {
      "epoch": 5.0457763671875e-05,
      "step": 8267,
      "training_step_time": 0.57834792137146
    },
    {
      "epoch": 5.04638671875e-05,
      "model_forward_time": 0.11436796188354492,
      "step": 8268
    },
    {
      "epoch": 5.04638671875e-05,
      "step": 8268,
      "training_step_time": 0.3649141788482666
    },
    {
      "epoch": 5.0469970703125e-05,
      "model_forward_time": 0.11485600471496582,
      "step": 8269
    },
    {
      "epoch": 5.0469970703125e-05,
      "step": 8269,
      "training_step_time": 0.44350457191467285
    },
    {
      "epoch": 5.047607421875e-05,
      "grad_norm": 0.21422179043293,
      "learning_rate": 9.790561851711383e-05,
      "loss": 0.0775,
      "step": 8270
    },
    {
      "epoch": 5.047607421875e-05,
      "model_forward_time": 0.11499381065368652,
      "step": 8270
    },
    {
      "epoch": 5.047607421875e-05,
      "step": 8270,
      "training_step_time": 0.46518826484680176
    },
    {
      "epoch": 5.0482177734375e-05,
      "model_forward_time": 0.1146383285522461,
      "step": 8271
    },
    {
      "epoch": 5.0482177734375e-05,
      "step": 8271,
      "training_step_time": 0.3853933811187744
    },
    {
      "epoch": 5.048828125e-05,
      "model_forward_time": 0.11503982543945312,
      "step": 8272
    },
    {
      "epoch": 5.048828125e-05,
      "step": 8272,
      "training_step_time": 0.3786487579345703
    },
    {
      "epoch": 5.0494384765625e-05,
      "model_forward_time": 0.11501479148864746,
      "step": 8273
    },
    {
      "epoch": 5.0494384765625e-05,
      "step": 8273,
      "training_step_time": 0.5121212005615234
    },
    {
      "epoch": 5.050048828125e-05,
      "model_forward_time": 0.11522889137268066,
      "step": 8274
    },
    {
      "epoch": 5.050048828125e-05,
      "step": 8274,
      "training_step_time": 0.38590312004089355
    },
    {
      "epoch": 5.0506591796875e-05,
      "model_forward_time": 0.11481690406799316,
      "step": 8275
    },
    {
      "epoch": 5.0506591796875e-05,
      "step": 8275,
      "training_step_time": 0.4354524612426758
    },
    {
      "epoch": 5.05126953125e-05,
      "model_forward_time": 0.11531352996826172,
      "step": 8276
    },
    {
      "epoch": 5.05126953125e-05,
      "step": 8276,
      "training_step_time": 0.38521909713745117
    },
    {
      "epoch": 5.0518798828125e-05,
      "model_forward_time": 0.11519074440002441,
      "step": 8277
    },
    {
      "epoch": 5.0518798828125e-05,
      "step": 8277,
      "training_step_time": 0.4115109443664551
    },
    {
      "epoch": 5.052490234375e-05,
      "model_forward_time": 0.11515021324157715,
      "step": 8278
    },
    {
      "epoch": 5.052490234375e-05,
      "step": 8278,
      "training_step_time": 0.4752926826477051
    },
    {
      "epoch": 5.0531005859375e-05,
      "model_forward_time": 0.11493587493896484,
      "step": 8279
    },
    {
      "epoch": 5.0531005859375e-05,
      "step": 8279,
      "training_step_time": 0.7266082763671875
    },
    {
      "epoch": 5.0537109375e-05,
      "grad_norm": 0.18949559330940247,
      "learning_rate": 9.789771888432375e-05,
      "loss": 0.0677,
      "step": 8280
    },
    {
      "epoch": 5.0537109375e-05,
      "model_forward_time": 0.1140291690826416,
      "step": 8280
    },
    {
      "epoch": 5.0537109375e-05,
      "step": 8280,
      "training_step_time": 0.3722100257873535
    },
    {
      "epoch": 5.0543212890625e-05,
      "model_forward_time": 0.11462998390197754,
      "step": 8281
    },
    {
      "epoch": 5.0543212890625e-05,
      "step": 8281,
      "training_step_time": 0.397291898727417
    },
    {
      "epoch": 5.054931640625e-05,
      "model_forward_time": 0.11417484283447266,
      "step": 8282
    },
    {
      "epoch": 5.054931640625e-05,
      "step": 8282,
      "training_step_time": 0.5098099708557129
    },
    {
      "epoch": 5.0555419921875e-05,
      "model_forward_time": 0.11517095565795898,
      "step": 8283
    },
    {
      "epoch": 5.0555419921875e-05,
      "step": 8283,
      "training_step_time": 0.4391789436340332
    },
    {
      "epoch": 5.05615234375e-05,
      "model_forward_time": 0.1141815185546875,
      "step": 8284
    },
    {
      "epoch": 5.05615234375e-05,
      "step": 8284,
      "training_step_time": 0.4678778648376465
    },
    {
      "epoch": 5.0567626953125e-05,
      "model_forward_time": 0.11483240127563477,
      "step": 8285
    },
    {
      "epoch": 5.0567626953125e-05,
      "step": 8285,
      "training_step_time": 0.6334962844848633
    },
    {
      "epoch": 5.057373046875e-05,
      "model_forward_time": 0.11442780494689941,
      "step": 8286
    },
    {
      "epoch": 5.057373046875e-05,
      "step": 8286,
      "training_step_time": 0.3747889995574951
    },
    {
      "epoch": 5.0579833984375e-05,
      "model_forward_time": 0.11379027366638184,
      "step": 8287
    },
    {
      "epoch": 5.0579833984375e-05,
      "step": 8287,
      "training_step_time": 0.3962235450744629
    },
    {
      "epoch": 5.05859375e-05,
      "model_forward_time": 0.11453080177307129,
      "step": 8288
    },
    {
      "epoch": 5.05859375e-05,
      "step": 8288,
      "training_step_time": 0.4095165729522705
    },
    {
      "epoch": 5.0592041015625e-05,
      "model_forward_time": 0.1153573989868164,
      "step": 8289
    },
    {
      "epoch": 5.0592041015625e-05,
      "step": 8289,
      "training_step_time": 0.4153885841369629
    },
    {
      "epoch": 5.059814453125e-05,
      "grad_norm": 0.25023558735847473,
      "learning_rate": 9.788980470147132e-05,
      "loss": 0.0657,
      "step": 8290
    },
    {
      "epoch": 5.059814453125e-05,
      "model_forward_time": 0.11492919921875,
      "step": 8290
    },
    {
      "epoch": 5.059814453125e-05,
      "step": 8290,
      "training_step_time": 0.4198458194732666
    },
    {
      "epoch": 5.0604248046875e-05,
      "model_forward_time": 0.11515522003173828,
      "step": 8291
    },
    {
      "epoch": 5.0604248046875e-05,
      "step": 8291,
      "training_step_time": 0.779102087020874
    },
    {
      "epoch": 5.06103515625e-05,
      "model_forward_time": 0.11435532569885254,
      "step": 8292
    },
    {
      "epoch": 5.06103515625e-05,
      "step": 8292,
      "training_step_time": 0.38590431213378906
    },
    {
      "epoch": 5.0616455078125e-05,
      "model_forward_time": 0.1133718490600586,
      "step": 8293
    },
    {
      "epoch": 5.0616455078125e-05,
      "step": 8293,
      "training_step_time": 0.39197540283203125
    },
    {
      "epoch": 5.062255859375e-05,
      "model_forward_time": 0.11373090744018555,
      "step": 8294
    },
    {
      "epoch": 5.062255859375e-05,
      "step": 8294,
      "training_step_time": 0.38049960136413574
    },
    {
      "epoch": 5.0628662109375e-05,
      "model_forward_time": 0.11407113075256348,
      "step": 8295
    },
    {
      "epoch": 5.0628662109375e-05,
      "step": 8295,
      "training_step_time": 0.36548495292663574
    },
    {
      "epoch": 5.0634765625e-05,
      "model_forward_time": 0.11414170265197754,
      "step": 8296
    },
    {
      "epoch": 5.0634765625e-05,
      "step": 8296,
      "training_step_time": 0.4315509796142578
    },
    {
      "epoch": 5.0640869140625e-05,
      "model_forward_time": 0.11471891403198242,
      "step": 8297
    },
    {
      "epoch": 5.0640869140625e-05,
      "step": 8297,
      "training_step_time": 0.9327764511108398
    },
    {
      "epoch": 5.064697265625e-05,
      "model_forward_time": 0.11415743827819824,
      "step": 8298
    },
    {
      "epoch": 5.064697265625e-05,
      "step": 8298,
      "training_step_time": 0.36782240867614746
    },
    {
      "epoch": 5.0653076171875e-05,
      "model_forward_time": 0.11332130432128906,
      "step": 8299
    },
    {
      "epoch": 5.0653076171875e-05,
      "step": 8299,
      "training_step_time": 0.3948190212249756
    },
    {
      "epoch": 5.06591796875e-05,
      "grad_norm": 0.3082042932510376,
      "learning_rate": 9.788187597096069e-05,
      "loss": 0.0639,
      "step": 8300
    },
    {
      "epoch": 5.06591796875e-05,
      "model_forward_time": 0.11392831802368164,
      "step": 8300
    },
    {
      "epoch": 5.06591796875e-05,
      "step": 8300,
      "training_step_time": 0.39964866638183594
    },
    {
      "epoch": 5.0665283203125e-05,
      "model_forward_time": 0.11391639709472656,
      "step": 8301
    },
    {
      "epoch": 5.0665283203125e-05,
      "step": 8301,
      "training_step_time": 0.42314887046813965
    },
    {
      "epoch": 5.067138671875e-05,
      "model_forward_time": 0.1138918399810791,
      "step": 8302
    },
    {
      "epoch": 5.067138671875e-05,
      "step": 8302,
      "training_step_time": 0.4199817180633545
    },
    {
      "epoch": 5.0677490234375e-05,
      "model_forward_time": 0.11552667617797852,
      "step": 8303
    },
    {
      "epoch": 5.0677490234375e-05,
      "step": 8303,
      "training_step_time": 0.7311339378356934
    },
    {
      "epoch": 5.068359375e-05,
      "model_forward_time": 0.1135258674621582,
      "step": 8304
    },
    {
      "epoch": 5.068359375e-05,
      "step": 8304,
      "training_step_time": 0.37761640548706055
    },
    {
      "epoch": 5.0689697265625e-05,
      "model_forward_time": 0.11421537399291992,
      "step": 8305
    },
    {
      "epoch": 5.0689697265625e-05,
      "step": 8305,
      "training_step_time": 0.3889799118041992
    },
    {
      "epoch": 5.069580078125e-05,
      "model_forward_time": 0.11373376846313477,
      "step": 8306
    },
    {
      "epoch": 5.069580078125e-05,
      "step": 8306,
      "training_step_time": 0.39203786849975586
    },
    {
      "epoch": 5.0701904296875e-05,
      "model_forward_time": 0.11458706855773926,
      "step": 8307
    },
    {
      "epoch": 5.0701904296875e-05,
      "step": 8307,
      "training_step_time": 0.38735437393188477
    },
    {
      "epoch": 5.07080078125e-05,
      "model_forward_time": 0.11484527587890625,
      "step": 8308
    },
    {
      "epoch": 5.07080078125e-05,
      "step": 8308,
      "training_step_time": 0.36182570457458496
    },
    {
      "epoch": 5.0714111328125e-05,
      "model_forward_time": 0.11497139930725098,
      "step": 8309
    },
    {
      "epoch": 5.0714111328125e-05,
      "step": 8309,
      "training_step_time": 0.9146831035614014
    },
    {
      "epoch": 5.072021484375e-05,
      "grad_norm": 0.2305501401424408,
      "learning_rate": 9.787393269520039e-05,
      "loss": 0.0719,
      "step": 8310
    },
    {
      "epoch": 5.072021484375e-05,
      "model_forward_time": 0.11503863334655762,
      "step": 8310
    },
    {
      "epoch": 5.072021484375e-05,
      "step": 8310,
      "training_step_time": 0.37655115127563477
    },
    {
      "epoch": 5.0726318359375e-05,
      "model_forward_time": 0.11402320861816406,
      "step": 8311
    },
    {
      "epoch": 5.0726318359375e-05,
      "step": 8311,
      "training_step_time": 0.39073848724365234
    },
    {
      "epoch": 5.0732421875e-05,
      "model_forward_time": 0.11465287208557129,
      "step": 8312
    },
    {
      "epoch": 5.0732421875e-05,
      "step": 8312,
      "training_step_time": 0.3852822780609131
    },
    {
      "epoch": 5.0738525390625e-05,
      "model_forward_time": 0.11413097381591797,
      "step": 8313
    },
    {
      "epoch": 5.0738525390625e-05,
      "step": 8313,
      "training_step_time": 0.4496593475341797
    },
    {
      "epoch": 5.074462890625e-05,
      "model_forward_time": 0.11443543434143066,
      "step": 8314
    },
    {
      "epoch": 5.074462890625e-05,
      "step": 8314,
      "training_step_time": 0.4445047378540039
    },
    {
      "epoch": 5.0750732421875e-05,
      "model_forward_time": 0.1148841381072998,
      "step": 8315
    },
    {
      "epoch": 5.0750732421875e-05,
      "step": 8315,
      "training_step_time": 0.6095757484436035
    },
    {
      "epoch": 5.07568359375e-05,
      "model_forward_time": 0.11493182182312012,
      "step": 8316
    },
    {
      "epoch": 5.07568359375e-05,
      "step": 8316,
      "training_step_time": 0.37185192108154297
    },
    {
      "epoch": 5.0762939453125e-05,
      "model_forward_time": 0.11452126502990723,
      "step": 8317
    },
    {
      "epoch": 5.0762939453125e-05,
      "step": 8317,
      "training_step_time": 0.39609789848327637
    },
    {
      "epoch": 5.076904296875e-05,
      "model_forward_time": 0.11533689498901367,
      "step": 8318
    },
    {
      "epoch": 5.076904296875e-05,
      "step": 8318,
      "training_step_time": 0.3945443630218506
    },
    {
      "epoch": 5.0775146484375e-05,
      "model_forward_time": 0.11514163017272949,
      "step": 8319
    },
    {
      "epoch": 5.0775146484375e-05,
      "step": 8319,
      "training_step_time": 0.3924119472503662
    },
    {
      "epoch": 5.078125e-05,
      "grad_norm": 0.33097487688064575,
      "learning_rate": 9.786597487660337e-05,
      "loss": 0.0705,
      "step": 8320
    },
    {
      "epoch": 5.078125e-05,
      "model_forward_time": 0.11536693572998047,
      "step": 8320
    },
    {
      "epoch": 5.078125e-05,
      "step": 8320,
      "training_step_time": 0.40046167373657227
    },
    {
      "epoch": 5.0787353515625e-05,
      "model_forward_time": 0.11538243293762207,
      "step": 8321
    },
    {
      "epoch": 5.0787353515625e-05,
      "step": 8321,
      "training_step_time": 0.8302278518676758
    },
    {
      "epoch": 5.079345703125e-05,
      "model_forward_time": 0.11532306671142578,
      "step": 8322
    },
    {
      "epoch": 5.079345703125e-05,
      "step": 8322,
      "training_step_time": 0.4898033142089844
    },
    {
      "epoch": 5.0799560546875e-05,
      "model_forward_time": 0.11425137519836426,
      "step": 8323
    },
    {
      "epoch": 5.0799560546875e-05,
      "step": 8323,
      "training_step_time": 0.4690873622894287
    },
    {
      "epoch": 5.08056640625e-05,
      "model_forward_time": 0.11409163475036621,
      "step": 8324
    },
    {
      "epoch": 5.08056640625e-05,
      "step": 8324,
      "training_step_time": 0.3829517364501953
    },
    {
      "epoch": 5.0811767578125e-05,
      "model_forward_time": 0.11441826820373535,
      "step": 8325
    },
    {
      "epoch": 5.0811767578125e-05,
      "step": 8325,
      "training_step_time": 0.386199951171875
    },
    {
      "epoch": 5.081787109375e-05,
      "model_forward_time": 0.11491847038269043,
      "step": 8326
    },
    {
      "epoch": 5.081787109375e-05,
      "step": 8326,
      "training_step_time": 0.44518208503723145
    },
    {
      "epoch": 5.0823974609375e-05,
      "model_forward_time": 0.11479640007019043,
      "step": 8327
    },
    {
      "epoch": 5.0823974609375e-05,
      "step": 8327,
      "training_step_time": 0.5859341621398926
    },
    {
      "epoch": 5.0830078125e-05,
      "model_forward_time": 0.11474490165710449,
      "step": 8328
    },
    {
      "epoch": 5.0830078125e-05,
      "step": 8328,
      "training_step_time": 0.41089773178100586
    },
    {
      "epoch": 5.0836181640625e-05,
      "model_forward_time": 0.1145026683807373,
      "step": 8329
    },
    {
      "epoch": 5.0836181640625e-05,
      "step": 8329,
      "training_step_time": 0.3965871334075928
    },
    {
      "epoch": 5.084228515625e-05,
      "grad_norm": 0.16541199386119843,
      "learning_rate": 9.785800251758701e-05,
      "loss": 0.0704,
      "step": 8330
    },
    {
      "epoch": 5.084228515625e-05,
      "model_forward_time": 0.1146543025970459,
      "step": 8330
    },
    {
      "epoch": 5.084228515625e-05,
      "step": 8330,
      "training_step_time": 0.387127161026001
    },
    {
      "epoch": 5.0848388671875e-05,
      "model_forward_time": 0.11457324028015137,
      "step": 8331
    },
    {
      "epoch": 5.0848388671875e-05,
      "step": 8331,
      "training_step_time": 0.39724230766296387
    },
    {
      "epoch": 5.08544921875e-05,
      "model_forward_time": 0.1146993637084961,
      "step": 8332
    },
    {
      "epoch": 5.08544921875e-05,
      "step": 8332,
      "training_step_time": 0.3829073905944824
    },
    {
      "epoch": 5.0860595703125e-05,
      "model_forward_time": 0.11701250076293945,
      "step": 8333
    },
    {
      "epoch": 5.0860595703125e-05,
      "step": 8333,
      "training_step_time": 1.0440220832824707
    },
    {
      "epoch": 5.086669921875e-05,
      "model_forward_time": 0.11423754692077637,
      "step": 8334
    },
    {
      "epoch": 5.086669921875e-05,
      "step": 8334,
      "training_step_time": 0.45210766792297363
    },
    {
      "epoch": 5.0872802734375e-05,
      "model_forward_time": 0.11438512802124023,
      "step": 8335
    },
    {
      "epoch": 5.0872802734375e-05,
      "step": 8335,
      "training_step_time": 0.44005346298217773
    },
    {
      "epoch": 5.087890625e-05,
      "model_forward_time": 0.11380767822265625,
      "step": 8336
    },
    {
      "epoch": 5.087890625e-05,
      "step": 8336,
      "training_step_time": 0.4731452465057373
    },
    {
      "epoch": 5.0885009765625e-05,
      "model_forward_time": 0.11425209045410156,
      "step": 8337
    },
    {
      "epoch": 5.0885009765625e-05,
      "step": 8337,
      "training_step_time": 0.3791952133178711
    },
    {
      "epoch": 5.089111328125e-05,
      "model_forward_time": 0.11467266082763672,
      "step": 8338
    },
    {
      "epoch": 5.089111328125e-05,
      "step": 8338,
      "training_step_time": 0.42117905616760254
    },
    {
      "epoch": 5.0897216796875e-05,
      "model_forward_time": 0.11467194557189941,
      "step": 8339
    },
    {
      "epoch": 5.0897216796875e-05,
      "step": 8339,
      "training_step_time": 0.675532341003418
    },
    {
      "epoch": 5.09033203125e-05,
      "grad_norm": 0.28820091485977173,
      "learning_rate": 9.785001562057309e-05,
      "loss": 0.0677,
      "step": 8340
    },
    {
      "epoch": 5.09033203125e-05,
      "model_forward_time": 0.11434698104858398,
      "step": 8340
    },
    {
      "epoch": 5.09033203125e-05,
      "step": 8340,
      "training_step_time": 0.3844568729400635
    },
    {
      "epoch": 5.0909423828125e-05,
      "model_forward_time": 0.11401700973510742,
      "step": 8341
    },
    {
      "epoch": 5.0909423828125e-05,
      "step": 8341,
      "training_step_time": 0.46135425567626953
    },
    {
      "epoch": 5.091552734375e-05,
      "model_forward_time": 0.1144869327545166,
      "step": 8342
    },
    {
      "epoch": 5.091552734375e-05,
      "step": 8342,
      "training_step_time": 0.38861703872680664
    },
    {
      "epoch": 5.0921630859375e-05,
      "model_forward_time": 0.11463737487792969,
      "step": 8343
    },
    {
      "epoch": 5.0921630859375e-05,
      "step": 8343,
      "training_step_time": 0.3963615894317627
    },
    {
      "epoch": 5.0927734375e-05,
      "model_forward_time": 0.11455345153808594,
      "step": 8344
    },
    {
      "epoch": 5.0927734375e-05,
      "step": 8344,
      "training_step_time": 0.3737447261810303
    },
    {
      "epoch": 5.0933837890625e-05,
      "model_forward_time": 0.11520552635192871,
      "step": 8345
    },
    {
      "epoch": 5.0933837890625e-05,
      "step": 8345,
      "training_step_time": 1.1539604663848877
    },
    {
      "epoch": 5.093994140625e-05,
      "model_forward_time": 0.11499404907226562,
      "step": 8346
    },
    {
      "epoch": 5.093994140625e-05,
      "step": 8346,
      "training_step_time": 0.4191281795501709
    },
    {
      "epoch": 5.0946044921875e-05,
      "model_forward_time": 0.11425566673278809,
      "step": 8347
    },
    {
      "epoch": 5.0946044921875e-05,
      "step": 8347,
      "training_step_time": 0.44628453254699707
    },
    {
      "epoch": 5.09521484375e-05,
      "model_forward_time": 0.11373376846313477,
      "step": 8348
    },
    {
      "epoch": 5.09521484375e-05,
      "step": 8348,
      "training_step_time": 0.4205911159515381
    },
    {
      "epoch": 5.0958251953125e-05,
      "model_forward_time": 0.11413264274597168,
      "step": 8349
    },
    {
      "epoch": 5.0958251953125e-05,
      "step": 8349,
      "training_step_time": 0.41649389266967773
    },
    {
      "epoch": 5.096435546875e-05,
      "grad_norm": 0.23190392553806305,
      "learning_rate": 9.784201418798786e-05,
      "loss": 0.0684,
      "step": 8350
    },
    {
      "epoch": 5.096435546875e-05,
      "model_forward_time": 0.11403799057006836,
      "step": 8350
    },
    {
      "epoch": 5.096435546875e-05,
      "step": 8350,
      "training_step_time": 0.4074540138244629
    },
    {
      "epoch": 5.0970458984375e-05,
      "model_forward_time": 0.11490583419799805,
      "step": 8351
    },
    {
      "epoch": 5.0970458984375e-05,
      "step": 8351,
      "training_step_time": 0.7874221801757812
    },
    {
      "epoch": 5.09765625e-05,
      "model_forward_time": 0.11506772041320801,
      "step": 8352
    },
    {
      "epoch": 5.09765625e-05,
      "step": 8352,
      "training_step_time": 0.46625328063964844
    },
    {
      "epoch": 5.0982666015625e-05,
      "model_forward_time": 0.11366915702819824,
      "step": 8353
    },
    {
      "epoch": 5.0982666015625e-05,
      "step": 8353,
      "training_step_time": 0.4201805591583252
    },
    {
      "epoch": 5.098876953125e-05,
      "model_forward_time": 0.11420321464538574,
      "step": 8354
    },
    {
      "epoch": 5.098876953125e-05,
      "step": 8354,
      "training_step_time": 0.3910958766937256
    },
    {
      "epoch": 5.0994873046875e-05,
      "model_forward_time": 0.11360979080200195,
      "step": 8355
    },
    {
      "epoch": 5.0994873046875e-05,
      "step": 8355,
      "training_step_time": 0.3789505958557129
    },
    {
      "epoch": 5.10009765625e-05,
      "model_forward_time": 0.11472439765930176,
      "step": 8356
    },
    {
      "epoch": 5.10009765625e-05,
      "step": 8356,
      "training_step_time": 0.38095617294311523
    },
    {
      "epoch": 5.1007080078125e-05,
      "model_forward_time": 0.11504626274108887,
      "step": 8357
    },
    {
      "epoch": 5.1007080078125e-05,
      "step": 8357,
      "training_step_time": 1.0187289714813232
    },
    {
      "epoch": 5.101318359375e-05,
      "model_forward_time": 0.11464619636535645,
      "step": 8358
    },
    {
      "epoch": 5.101318359375e-05,
      "step": 8358,
      "training_step_time": 0.3693735599517822
    },
    {
      "epoch": 5.1019287109375e-05,
      "model_forward_time": 0.11411690711975098,
      "step": 8359
    },
    {
      "epoch": 5.1019287109375e-05,
      "step": 8359,
      "training_step_time": 0.3773458003997803
    },
    {
      "epoch": 5.1025390625e-05,
      "grad_norm": 0.23451179265975952,
      "learning_rate": 9.783399822226189e-05,
      "loss": 0.0677,
      "step": 8360
    },
    {
      "epoch": 5.1025390625e-05,
      "model_forward_time": 0.11411476135253906,
      "step": 8360
    },
    {
      "epoch": 5.1025390625e-05,
      "step": 8360,
      "training_step_time": 0.40595221519470215
    },
    {
      "epoch": 5.1031494140625e-05,
      "model_forward_time": 0.11401820182800293,
      "step": 8361
    },
    {
      "epoch": 5.1031494140625e-05,
      "step": 8361,
      "training_step_time": 0.44333529472351074
    },
    {
      "epoch": 5.103759765625e-05,
      "model_forward_time": 0.1143794059753418,
      "step": 8362
    },
    {
      "epoch": 5.103759765625e-05,
      "step": 8362,
      "training_step_time": 0.39626288414001465
    },
    {
      "epoch": 5.1043701171875e-05,
      "model_forward_time": 0.11458086967468262,
      "step": 8363
    },
    {
      "epoch": 5.1043701171875e-05,
      "step": 8363,
      "training_step_time": 0.9520816802978516
    },
    {
      "epoch": 5.10498046875e-05,
      "model_forward_time": 0.11423301696777344,
      "step": 8364
    },
    {
      "epoch": 5.10498046875e-05,
      "step": 8364,
      "training_step_time": 0.4091172218322754
    },
    {
      "epoch": 5.1055908203125e-05,
      "model_forward_time": 0.1142878532409668,
      "step": 8365
    },
    {
      "epoch": 5.1055908203125e-05,
      "step": 8365,
      "training_step_time": 0.419292688369751
    },
    {
      "epoch": 5.106201171875e-05,
      "model_forward_time": 0.11384296417236328,
      "step": 8366
    },
    {
      "epoch": 5.106201171875e-05,
      "step": 8366,
      "training_step_time": 0.3943793773651123
    },
    {
      "epoch": 5.1068115234375e-05,
      "model_forward_time": 0.11415767669677734,
      "step": 8367
    },
    {
      "epoch": 5.1068115234375e-05,
      "step": 8367,
      "training_step_time": 0.38661956787109375
    },
    {
      "epoch": 5.107421875e-05,
      "model_forward_time": 0.11453866958618164,
      "step": 8368
    },
    {
      "epoch": 5.107421875e-05,
      "step": 8368,
      "training_step_time": 0.400104284286499
    },
    {
      "epoch": 5.1080322265625e-05,
      "model_forward_time": 0.11533713340759277,
      "step": 8369
    },
    {
      "epoch": 5.1080322265625e-05,
      "step": 8369,
      "training_step_time": 0.7204685211181641
    },
    {
      "epoch": 5.108642578125e-05,
      "grad_norm": 0.2336363047361374,
      "learning_rate": 9.782596772583026e-05,
      "loss": 0.0597,
      "step": 8370
    },
    {
      "epoch": 5.108642578125e-05,
      "model_forward_time": 0.11424803733825684,
      "step": 8370
    },
    {
      "epoch": 5.108642578125e-05,
      "step": 8370,
      "training_step_time": 0.3766360282897949
    },
    {
      "epoch": 5.1092529296875e-05,
      "model_forward_time": 0.11402130126953125,
      "step": 8371
    },
    {
      "epoch": 5.1092529296875e-05,
      "step": 8371,
      "training_step_time": 0.3869285583496094
    },
    {
      "epoch": 5.10986328125e-05,
      "model_forward_time": 0.11499714851379395,
      "step": 8372
    },
    {
      "epoch": 5.10986328125e-05,
      "step": 8372,
      "training_step_time": 0.36185169219970703
    },
    {
      "epoch": 5.1104736328125e-05,
      "model_forward_time": 0.11432886123657227,
      "step": 8373
    },
    {
      "epoch": 5.1104736328125e-05,
      "step": 8373,
      "training_step_time": 0.4247136116027832
    },
    {
      "epoch": 5.111083984375e-05,
      "model_forward_time": 0.11491775512695312,
      "step": 8374
    },
    {
      "epoch": 5.111083984375e-05,
      "step": 8374,
      "training_step_time": 0.45679306983947754
    },
    {
      "epoch": 5.1116943359375e-05,
      "model_forward_time": 0.11468720436096191,
      "step": 8375
    },
    {
      "epoch": 5.1116943359375e-05,
      "step": 8375,
      "training_step_time": 1.0293850898742676
    },
    {
      "epoch": 5.1123046875e-05,
      "model_forward_time": 0.11343598365783691,
      "step": 8376
    },
    {
      "epoch": 5.1123046875e-05,
      "step": 8376,
      "training_step_time": 0.41282200813293457
    },
    {
      "epoch": 5.1129150390625e-05,
      "model_forward_time": 0.11398744583129883,
      "step": 8377
    },
    {
      "epoch": 5.1129150390625e-05,
      "step": 8377,
      "training_step_time": 0.445204496383667
    },
    {
      "epoch": 5.113525390625e-05,
      "model_forward_time": 0.11394715309143066,
      "step": 8378
    },
    {
      "epoch": 5.113525390625e-05,
      "step": 8378,
      "training_step_time": 0.4090123176574707
    },
    {
      "epoch": 5.1141357421875e-05,
      "model_forward_time": 0.11426854133605957,
      "step": 8379
    },
    {
      "epoch": 5.1141357421875e-05,
      "step": 8379,
      "training_step_time": 0.40554356575012207
    },
    {
      "epoch": 5.11474609375e-05,
      "grad_norm": 0.26583966612815857,
      "learning_rate": 9.781792270113241e-05,
      "loss": 0.0664,
      "step": 8380
    },
    {
      "epoch": 5.11474609375e-05,
      "model_forward_time": 0.11424827575683594,
      "step": 8380
    },
    {
      "epoch": 5.11474609375e-05,
      "step": 8380,
      "training_step_time": 0.3831954002380371
    },
    {
      "epoch": 5.1153564453125e-05,
      "model_forward_time": 0.11719131469726562,
      "step": 8381
    },
    {
      "epoch": 5.1153564453125e-05,
      "step": 8381,
      "training_step_time": 0.5961911678314209
    },
    {
      "epoch": 5.115966796875e-05,
      "model_forward_time": 0.11469078063964844,
      "step": 8382
    },
    {
      "epoch": 5.115966796875e-05,
      "step": 8382,
      "training_step_time": 0.38608288764953613
    },
    {
      "epoch": 5.1165771484375e-05,
      "model_forward_time": 0.11449623107910156,
      "step": 8383
    },
    {
      "epoch": 5.1165771484375e-05,
      "step": 8383,
      "training_step_time": 0.3995695114135742
    },
    {
      "epoch": 5.1171875e-05,
      "model_forward_time": 0.11447739601135254,
      "step": 8384
    },
    {
      "epoch": 5.1171875e-05,
      "step": 8384,
      "training_step_time": 0.3925786018371582
    },
    {
      "epoch": 5.1177978515625e-05,
      "model_forward_time": 0.1147012710571289,
      "step": 8385
    },
    {
      "epoch": 5.1177978515625e-05,
      "step": 8385,
      "training_step_time": 0.3660919666290283
    },
    {
      "epoch": 5.118408203125e-05,
      "model_forward_time": 0.11550593376159668,
      "step": 8386
    },
    {
      "epoch": 5.118408203125e-05,
      "step": 8386,
      "training_step_time": 0.4595983028411865
    },
    {
      "epoch": 5.1190185546875e-05,
      "model_forward_time": 0.11517572402954102,
      "step": 8387
    },
    {
      "epoch": 5.1190185546875e-05,
      "step": 8387,
      "training_step_time": 1.1761388778686523
    },
    {
      "epoch": 5.11962890625e-05,
      "model_forward_time": 0.11348509788513184,
      "step": 8388
    },
    {
      "epoch": 5.11962890625e-05,
      "step": 8388,
      "training_step_time": 0.37701892852783203
    },
    {
      "epoch": 5.1202392578125e-05,
      "model_forward_time": 0.11331605911254883,
      "step": 8389
    },
    {
      "epoch": 5.1202392578125e-05,
      "step": 8389,
      "training_step_time": 0.38584256172180176
    },
    {
      "epoch": 5.120849609375e-05,
      "grad_norm": 0.17241519689559937,
      "learning_rate": 9.780986315061218e-05,
      "loss": 0.0665,
      "step": 8390
    },
    {
      "epoch": 5.120849609375e-05,
      "model_forward_time": 0.11430048942565918,
      "step": 8390
    },
    {
      "epoch": 5.120849609375e-05,
      "step": 8390,
      "training_step_time": 0.3917272090911865
    },
    {
      "epoch": 5.1214599609375e-05,
      "model_forward_time": 0.11327552795410156,
      "step": 8391
    },
    {
      "epoch": 5.1214599609375e-05,
      "step": 8391,
      "training_step_time": 0.43605852127075195
    },
    {
      "epoch": 5.1220703125e-05,
      "model_forward_time": 0.11412596702575684,
      "step": 8392
    },
    {
      "epoch": 5.1220703125e-05,
      "step": 8392,
      "training_step_time": 0.38846850395202637
    },
    {
      "epoch": 5.1226806640625e-05,
      "model_forward_time": 0.11459827423095703,
      "step": 8393
    },
    {
      "epoch": 5.1226806640625e-05,
      "step": 8393,
      "training_step_time": 1.2734882831573486
    },
    {
      "epoch": 5.123291015625e-05,
      "model_forward_time": 0.11340045928955078,
      "step": 8394
    },
    {
      "epoch": 5.123291015625e-05,
      "step": 8394,
      "training_step_time": 0.3794867992401123
    },
    {
      "epoch": 5.1239013671875e-05,
      "model_forward_time": 0.11319494247436523,
      "step": 8395
    },
    {
      "epoch": 5.1239013671875e-05,
      "step": 8395,
      "training_step_time": 0.37703800201416016
    },
    {
      "epoch": 5.12451171875e-05,
      "model_forward_time": 0.11366772651672363,
      "step": 8396
    },
    {
      "epoch": 5.12451171875e-05,
      "step": 8396,
      "training_step_time": 0.4756796360015869
    },
    {
      "epoch": 5.1251220703125e-05,
      "model_forward_time": 0.11347651481628418,
      "step": 8397
    },
    {
      "epoch": 5.1251220703125e-05,
      "step": 8397,
      "training_step_time": 0.4263901710510254
    },
    {
      "epoch": 5.125732421875e-05,
      "model_forward_time": 0.11472153663635254,
      "step": 8398
    },
    {
      "epoch": 5.125732421875e-05,
      "step": 8398,
      "training_step_time": 0.4343721866607666
    },
    {
      "epoch": 5.1263427734375e-05,
      "model_forward_time": 0.11502265930175781,
      "step": 8399
    },
    {
      "epoch": 5.1263427734375e-05,
      "step": 8399,
      "training_step_time": 0.6492774486541748
    },
    {
      "epoch": 5.126953125e-05,
      "grad_norm": 0.341817170381546,
      "learning_rate": 9.780178907671789e-05,
      "loss": 0.0571,
      "step": 8400
    },
    {
      "epoch": 5.126953125e-05,
      "model_forward_time": 0.11420774459838867,
      "step": 8400
    },
    {
      "epoch": 5.126953125e-05,
      "step": 8400,
      "training_step_time": 0.38709354400634766
    },
    {
      "epoch": 5.1275634765625e-05,
      "model_forward_time": 0.11401009559631348,
      "step": 8401
    },
    {
      "epoch": 5.1275634765625e-05,
      "step": 8401,
      "training_step_time": 0.453981876373291
    },
    {
      "epoch": 5.128173828125e-05,
      "model_forward_time": 0.11444830894470215,
      "step": 8402
    },
    {
      "epoch": 5.128173828125e-05,
      "step": 8402,
      "training_step_time": 0.4025905132293701
    },
    {
      "epoch": 5.1287841796875e-05,
      "model_forward_time": 0.11475014686584473,
      "step": 8403
    },
    {
      "epoch": 5.1287841796875e-05,
      "step": 8403,
      "training_step_time": 0.4135615825653076
    },
    {
      "epoch": 5.12939453125e-05,
      "model_forward_time": 0.11454415321350098,
      "step": 8404
    },
    {
      "epoch": 5.12939453125e-05,
      "step": 8404,
      "training_step_time": 0.4058115482330322
    },
    {
      "epoch": 5.1300048828125e-05,
      "model_forward_time": 0.11505722999572754,
      "step": 8405
    },
    {
      "epoch": 5.1300048828125e-05,
      "step": 8405,
      "training_step_time": 1.388658046722412
    },
    {
      "epoch": 5.130615234375e-05,
      "model_forward_time": 0.1136786937713623,
      "step": 8406
    },
    {
      "epoch": 5.130615234375e-05,
      "step": 8406,
      "training_step_time": 0.3861241340637207
    },
    {
      "epoch": 5.1312255859375e-05,
      "model_forward_time": 0.11362361907958984,
      "step": 8407
    },
    {
      "epoch": 5.1312255859375e-05,
      "step": 8407,
      "training_step_time": 0.38828134536743164
    },
    {
      "epoch": 5.1318359375e-05,
      "model_forward_time": 0.11354589462280273,
      "step": 8408
    },
    {
      "epoch": 5.1318359375e-05,
      "step": 8408,
      "training_step_time": 0.36224961280822754
    },
    {
      "epoch": 5.1324462890625e-05,
      "model_forward_time": 0.11423134803771973,
      "step": 8409
    },
    {
      "epoch": 5.1324462890625e-05,
      "step": 8409,
      "training_step_time": 0.42969465255737305
    },
    {
      "epoch": 5.133056640625e-05,
      "grad_norm": 0.1721486896276474,
      "learning_rate": 9.77937004819022e-05,
      "loss": 0.0669,
      "step": 8410
    },
    {
      "epoch": 5.133056640625e-05,
      "model_forward_time": 0.11437320709228516,
      "step": 8410
    },
    {
      "epoch": 5.133056640625e-05,
      "step": 8410,
      "training_step_time": 0.4317338466644287
    },
    {
      "epoch": 5.1336669921875e-05,
      "model_forward_time": 0.11468362808227539,
      "step": 8411
    },
    {
      "epoch": 5.1336669921875e-05,
      "step": 8411,
      "training_step_time": 0.4286952018737793
    },
    {
      "epoch": 5.13427734375e-05,
      "model_forward_time": 0.11477422714233398,
      "step": 8412
    },
    {
      "epoch": 5.13427734375e-05,
      "step": 8412,
      "training_step_time": 0.38676881790161133
    },
    {
      "epoch": 5.1348876953125e-05,
      "model_forward_time": 0.11513972282409668,
      "step": 8413
    },
    {
      "epoch": 5.1348876953125e-05,
      "step": 8413,
      "training_step_time": 0.3999471664428711
    },
    {
      "epoch": 5.135498046875e-05,
      "model_forward_time": 0.11561822891235352,
      "step": 8414
    },
    {
      "epoch": 5.135498046875e-05,
      "step": 8414,
      "training_step_time": 0.39859914779663086
    },
    {
      "epoch": 5.1361083984375e-05,
      "model_forward_time": 0.11528420448303223,
      "step": 8415
    },
    {
      "epoch": 5.1361083984375e-05,
      "step": 8415,
      "training_step_time": 0.41230273246765137
    },
    {
      "epoch": 5.13671875e-05,
      "model_forward_time": 0.11505985260009766,
      "step": 8416
    },
    {
      "epoch": 5.13671875e-05,
      "step": 8416,
      "training_step_time": 0.4922351837158203
    },
    {
      "epoch": 5.1373291015625e-05,
      "model_forward_time": 0.11441421508789062,
      "step": 8417
    },
    {
      "epoch": 5.1373291015625e-05,
      "step": 8417,
      "training_step_time": 0.5108850002288818
    },
    {
      "epoch": 5.137939453125e-05,
      "model_forward_time": 0.11565113067626953,
      "step": 8418
    },
    {
      "epoch": 5.137939453125e-05,
      "step": 8418,
      "training_step_time": 0.39243626594543457
    },
    {
      "epoch": 5.1385498046875e-05,
      "model_forward_time": 0.11655521392822266,
      "step": 8419
    },
    {
      "epoch": 5.1385498046875e-05,
      "step": 8419,
      "training_step_time": 0.41167211532592773
    },
    {
      "epoch": 5.13916015625e-05,
      "grad_norm": 0.2220950871706009,
      "learning_rate": 9.778559736862223e-05,
      "loss": 0.0616,
      "step": 8420
    },
    {
      "epoch": 5.13916015625e-05,
      "model_forward_time": 0.11474895477294922,
      "step": 8420
    },
    {
      "epoch": 5.13916015625e-05,
      "step": 8420,
      "training_step_time": 0.4126744270324707
    },
    {
      "epoch": 5.1397705078125e-05,
      "model_forward_time": 0.11533665657043457,
      "step": 8421
    },
    {
      "epoch": 5.1397705078125e-05,
      "step": 8421,
      "training_step_time": 0.46082019805908203
    },
    {
      "epoch": 5.140380859375e-05,
      "model_forward_time": 0.11607623100280762,
      "step": 8422
    },
    {
      "epoch": 5.140380859375e-05,
      "step": 8422,
      "training_step_time": 0.3681209087371826
    },
    {
      "epoch": 5.1409912109375e-05,
      "model_forward_time": 0.11516261100769043,
      "step": 8423
    },
    {
      "epoch": 5.1409912109375e-05,
      "step": 8423,
      "training_step_time": 1.04264497756958
    },
    {
      "epoch": 5.1416015625e-05,
      "model_forward_time": 0.11434340476989746,
      "step": 8424
    },
    {
      "epoch": 5.1416015625e-05,
      "step": 8424,
      "training_step_time": 0.37595081329345703
    },
    {
      "epoch": 5.1422119140625e-05,
      "model_forward_time": 0.11459231376647949,
      "step": 8425
    },
    {
      "epoch": 5.1422119140625e-05,
      "step": 8425,
      "training_step_time": 0.38432884216308594
    },
    {
      "epoch": 5.142822265625e-05,
      "model_forward_time": 0.11449885368347168,
      "step": 8426
    },
    {
      "epoch": 5.142822265625e-05,
      "step": 8426,
      "training_step_time": 0.4125399589538574
    },
    {
      "epoch": 5.1434326171875e-05,
      "model_forward_time": 0.11546468734741211,
      "step": 8427
    },
    {
      "epoch": 5.1434326171875e-05,
      "step": 8427,
      "training_step_time": 0.5229341983795166
    },
    {
      "epoch": 5.14404296875e-05,
      "model_forward_time": 0.1154482364654541,
      "step": 8428
    },
    {
      "epoch": 5.14404296875e-05,
      "step": 8428,
      "training_step_time": 0.5916197299957275
    },
    {
      "epoch": 5.1446533203125e-05,
      "model_forward_time": 0.11917376518249512,
      "step": 8429
    },
    {
      "epoch": 5.1446533203125e-05,
      "step": 8429,
      "training_step_time": 0.7707619667053223
    },
    {
      "epoch": 5.145263671875e-05,
      "grad_norm": 0.2500338852405548,
      "learning_rate": 9.777747973933948e-05,
      "loss": 0.0646,
      "step": 8430
    },
    {
      "epoch": 5.145263671875e-05,
      "model_forward_time": 0.11922669410705566,
      "step": 8430
    },
    {
      "epoch": 5.145263671875e-05,
      "step": 8430,
      "training_step_time": 0.6602809429168701
    },
    {
      "epoch": 5.1458740234375e-05,
      "model_forward_time": 0.12516355514526367,
      "step": 8431
    },
    {
      "epoch": 5.1458740234375e-05,
      "step": 8431,
      "training_step_time": 0.717430591583252
    },
    {
      "epoch": 5.146484375e-05,
      "model_forward_time": 0.11742401123046875,
      "step": 8432
    },
    {
      "epoch": 5.146484375e-05,
      "step": 8432,
      "training_step_time": 0.6402997970581055
    },
    {
      "epoch": 5.1470947265625e-05,
      "model_forward_time": 0.11795997619628906,
      "step": 8433
    },
    {
      "epoch": 5.1470947265625e-05,
      "step": 8433,
      "training_step_time": 0.7036359310150146
    },
    {
      "epoch": 5.147705078125e-05,
      "model_forward_time": 0.11983513832092285,
      "step": 8434
    },
    {
      "epoch": 5.147705078125e-05,
      "step": 8434,
      "training_step_time": 0.6696140766143799
    },
    {
      "epoch": 5.1483154296875e-05,
      "model_forward_time": 0.12545084953308105,
      "step": 8435
    },
    {
      "epoch": 5.1483154296875e-05,
      "step": 8435,
      "training_step_time": 0.6455128192901611
    },
    {
      "epoch": 5.14892578125e-05,
      "model_forward_time": 0.11606025695800781,
      "step": 8436
    },
    {
      "epoch": 5.14892578125e-05,
      "step": 8436,
      "training_step_time": 0.6559953689575195
    },
    {
      "epoch": 5.1495361328125e-05,
      "model_forward_time": 0.11677026748657227,
      "step": 8437
    },
    {
      "epoch": 5.1495361328125e-05,
      "step": 8437,
      "training_step_time": 0.6692063808441162
    },
    {
      "epoch": 5.150146484375e-05,
      "model_forward_time": 0.12031126022338867,
      "step": 8438
    },
    {
      "epoch": 5.150146484375e-05,
      "step": 8438,
      "training_step_time": 0.6398556232452393
    },
    {
      "epoch": 5.1507568359375e-05,
      "model_forward_time": 0.1172330379486084,
      "step": 8439
    },
    {
      "epoch": 5.1507568359375e-05,
      "step": 8439,
      "training_step_time": 0.7013132572174072
    },
    {
      "epoch": 5.1513671875e-05,
      "grad_norm": 0.2254265993833542,
      "learning_rate": 9.776934759651988e-05,
      "loss": 0.0605,
      "step": 8440
    },
    {
      "epoch": 5.1513671875e-05,
      "model_forward_time": 0.11614274978637695,
      "step": 8440
    },
    {
      "epoch": 5.1513671875e-05,
      "step": 8440,
      "training_step_time": 0.6719510555267334
    },
    {
      "epoch": 5.1519775390625e-05,
      "model_forward_time": 0.11738395690917969,
      "step": 8441
    },
    {
      "epoch": 5.1519775390625e-05,
      "step": 8441,
      "training_step_time": 0.7416565418243408
    },
    {
      "epoch": 5.152587890625e-05,
      "model_forward_time": 0.12375402450561523,
      "step": 8442
    },
    {
      "epoch": 5.152587890625e-05,
      "step": 8442,
      "training_step_time": 0.6933376789093018
    },
    {
      "epoch": 5.1531982421875e-05,
      "model_forward_time": 0.11904525756835938,
      "step": 8443
    },
    {
      "epoch": 5.1531982421875e-05,
      "step": 8443,
      "training_step_time": 0.703782320022583
    },
    {
      "epoch": 5.15380859375e-05,
      "model_forward_time": 0.11744856834411621,
      "step": 8444
    },
    {
      "epoch": 5.15380859375e-05,
      "step": 8444,
      "training_step_time": 0.6645054817199707
    },
    {
      "epoch": 5.1544189453125e-05,
      "model_forward_time": 0.12338805198669434,
      "step": 8445
    },
    {
      "epoch": 5.1544189453125e-05,
      "step": 8445,
      "training_step_time": 0.8000669479370117
    },
    {
      "epoch": 5.155029296875e-05,
      "model_forward_time": 0.117919921875,
      "step": 8446
    },
    {
      "epoch": 5.155029296875e-05,
      "step": 8446,
      "training_step_time": 0.722440242767334
    },
    {
      "epoch": 5.1556396484375e-05,
      "model_forward_time": 0.11971521377563477,
      "step": 8447
    },
    {
      "epoch": 5.1556396484375e-05,
      "step": 8447,
      "training_step_time": 0.5969955921173096
    },
    {
      "epoch": 5.15625e-05,
      "model_forward_time": 0.1191091537475586,
      "step": 8448
    },
    {
      "epoch": 5.15625e-05,
      "step": 8448,
      "training_step_time": 0.7959139347076416
    },
    {
      "epoch": 5.1568603515625e-05,
      "model_forward_time": 0.1205146312713623,
      "step": 8449
    },
    {
      "epoch": 5.1568603515625e-05,
      "step": 8449,
      "training_step_time": 0.6842954158782959
    },
    {
      "epoch": 5.157470703125e-05,
      "grad_norm": 0.18816998600959778,
      "learning_rate": 9.776120094263376e-05,
      "loss": 0.0741,
      "step": 8450
    },
    {
      "epoch": 5.157470703125e-05,
      "model_forward_time": 0.12392640113830566,
      "step": 8450
    },
    {
      "epoch": 5.157470703125e-05,
      "step": 8450,
      "training_step_time": 0.676537275314331
    },
    {
      "epoch": 5.1580810546875e-05,
      "model_forward_time": 0.11812138557434082,
      "step": 8451
    },
    {
      "epoch": 5.1580810546875e-05,
      "step": 8451,
      "training_step_time": 0.600771427154541
    },
    {
      "epoch": 5.15869140625e-05,
      "model_forward_time": 0.11814284324645996,
      "step": 8452
    },
    {
      "epoch": 5.15869140625e-05,
      "step": 8452,
      "training_step_time": 0.7206454277038574
    },
    {
      "epoch": 5.1593017578125e-05,
      "model_forward_time": 0.11934065818786621,
      "step": 8453
    },
    {
      "epoch": 5.1593017578125e-05,
      "step": 8453,
      "training_step_time": 0.6817846298217773
    },
    {
      "epoch": 5.159912109375e-05,
      "model_forward_time": 0.11986112594604492,
      "step": 8454
    },
    {
      "epoch": 5.159912109375e-05,
      "step": 8454,
      "training_step_time": 0.7345964908599854
    },
    {
      "epoch": 5.1605224609375e-05,
      "model_forward_time": 0.11919760704040527,
      "step": 8455
    },
    {
      "epoch": 5.1605224609375e-05,
      "step": 8455,
      "training_step_time": 0.6846394538879395
    },
    {
      "epoch": 5.1611328125e-05,
      "model_forward_time": 0.11920619010925293,
      "step": 8456
    },
    {
      "epoch": 5.1611328125e-05,
      "step": 8456,
      "training_step_time": 0.7238194942474365
    },
    {
      "epoch": 5.1617431640625e-05,
      "model_forward_time": 0.1214141845703125,
      "step": 8457
    },
    {
      "epoch": 5.1617431640625e-05,
      "step": 8457,
      "training_step_time": 0.7172832489013672
    },
    {
      "epoch": 5.162353515625e-05,
      "model_forward_time": 0.11853337287902832,
      "step": 8458
    },
    {
      "epoch": 5.162353515625e-05,
      "step": 8458,
      "training_step_time": 0.6786491870880127
    },
    {
      "epoch": 5.1629638671875e-05,
      "model_forward_time": 0.11551928520202637,
      "step": 8459
    },
    {
      "epoch": 5.1629638671875e-05,
      "step": 8459,
      "training_step_time": 0.6441569328308105
    },
    {
      "epoch": 5.16357421875e-05,
      "grad_norm": 0.2601645588874817,
      "learning_rate": 9.775303978015585e-05,
      "loss": 0.0666,
      "step": 8460
    },
    {
      "epoch": 5.16357421875e-05,
      "model_forward_time": 0.13204717636108398,
      "step": 8460
    },
    {
      "epoch": 5.16357421875e-05,
      "step": 8460,
      "training_step_time": 0.6697237491607666
    },
    {
      "epoch": 5.1641845703125e-05,
      "model_forward_time": 0.12038755416870117,
      "step": 8461
    },
    {
      "epoch": 5.1641845703125e-05,
      "step": 8461,
      "training_step_time": 0.6837272644042969
    },
    {
      "epoch": 5.164794921875e-05,
      "model_forward_time": 0.12181425094604492,
      "step": 8462
    },
    {
      "epoch": 5.164794921875e-05,
      "step": 8462,
      "training_step_time": 0.6985819339752197
    },
    {
      "epoch": 5.1654052734375e-05,
      "model_forward_time": 0.11829996109008789,
      "step": 8463
    },
    {
      "epoch": 5.1654052734375e-05,
      "step": 8463,
      "training_step_time": 0.690047025680542
    },
    {
      "epoch": 5.166015625e-05,
      "model_forward_time": 0.12312793731689453,
      "step": 8464
    },
    {
      "epoch": 5.166015625e-05,
      "step": 8464,
      "training_step_time": 0.7032570838928223
    },
    {
      "epoch": 5.1666259765625e-05,
      "model_forward_time": 0.1175539493560791,
      "step": 8465
    },
    {
      "epoch": 5.1666259765625e-05,
      "step": 8465,
      "training_step_time": 0.6795675754547119
    },
    {
      "epoch": 5.167236328125e-05,
      "model_forward_time": 0.1183168888092041,
      "step": 8466
    },
    {
      "epoch": 5.167236328125e-05,
      "step": 8466,
      "training_step_time": 0.6808934211730957
    },
    {
      "epoch": 5.1678466796875e-05,
      "model_forward_time": 0.11789822578430176,
      "step": 8467
    },
    {
      "epoch": 5.1678466796875e-05,
      "step": 8467,
      "training_step_time": 0.7812786102294922
    },
    {
      "epoch": 5.16845703125e-05,
      "model_forward_time": 0.11749601364135742,
      "step": 8468
    },
    {
      "epoch": 5.16845703125e-05,
      "step": 8468,
      "training_step_time": 0.6387505531311035
    },
    {
      "epoch": 5.1690673828125e-05,
      "model_forward_time": 0.13302946090698242,
      "step": 8469
    },
    {
      "epoch": 5.1690673828125e-05,
      "step": 8469,
      "training_step_time": 0.7139372825622559
    },
    {
      "epoch": 5.169677734375e-05,
      "grad_norm": 0.2047189474105835,
      "learning_rate": 9.77448641115653e-05,
      "loss": 0.0738,
      "step": 8470
    },
    {
      "epoch": 5.169677734375e-05,
      "model_forward_time": 0.11696386337280273,
      "step": 8470
    },
    {
      "epoch": 5.169677734375e-05,
      "step": 8470,
      "training_step_time": 0.690842866897583
    },
    {
      "epoch": 5.1702880859375e-05,
      "model_forward_time": 0.12257266044616699,
      "step": 8471
    },
    {
      "epoch": 5.1702880859375e-05,
      "step": 8471,
      "training_step_time": 0.6973631381988525
    },
    {
      "epoch": 5.1708984375e-05,
      "model_forward_time": 0.1213526725769043,
      "step": 8472
    },
    {
      "epoch": 5.1708984375e-05,
      "step": 8472,
      "training_step_time": 0.6088638305664062
    },
    {
      "epoch": 5.1715087890625e-05,
      "model_forward_time": 0.12026858329772949,
      "step": 8473
    },
    {
      "epoch": 5.1715087890625e-05,
      "step": 8473,
      "training_step_time": 0.6905679702758789
    },
    {
      "epoch": 5.172119140625e-05,
      "model_forward_time": 0.1164712905883789,
      "step": 8474
    },
    {
      "epoch": 5.172119140625e-05,
      "step": 8474,
      "training_step_time": 0.6811048984527588
    },
    {
      "epoch": 5.1727294921875e-05,
      "model_forward_time": 0.11805438995361328,
      "step": 8475
    },
    {
      "epoch": 5.1727294921875e-05,
      "step": 8475,
      "training_step_time": 0.6764819622039795
    },
    {
      "epoch": 5.17333984375e-05,
      "model_forward_time": 0.11994385719299316,
      "step": 8476
    },
    {
      "epoch": 5.17333984375e-05,
      "step": 8476,
      "training_step_time": 0.6376821994781494
    },
    {
      "epoch": 5.1739501953125e-05,
      "model_forward_time": 0.11662673950195312,
      "step": 8477
    },
    {
      "epoch": 5.1739501953125e-05,
      "step": 8477,
      "training_step_time": 0.7427854537963867
    },
    {
      "epoch": 5.174560546875e-05,
      "model_forward_time": 0.12315654754638672,
      "step": 8478
    },
    {
      "epoch": 5.174560546875e-05,
      "step": 8478,
      "training_step_time": 0.6469516754150391
    },
    {
      "epoch": 5.1751708984375e-05,
      "model_forward_time": 0.12070298194885254,
      "step": 8479
    },
    {
      "epoch": 5.1751708984375e-05,
      "step": 8479,
      "training_step_time": 0.6010611057281494
    },
    {
      "epoch": 5.17578125e-05,
      "grad_norm": 0.19979363679885864,
      "learning_rate": 9.773667393934567e-05,
      "loss": 0.0774,
      "step": 8480
    },
    {
      "epoch": 5.17578125e-05,
      "model_forward_time": 0.1178438663482666,
      "step": 8480
    },
    {
      "epoch": 5.17578125e-05,
      "step": 8480,
      "training_step_time": 0.6792488098144531
    },
    {
      "epoch": 5.1763916015625e-05,
      "model_forward_time": 0.1234281063079834,
      "step": 8481
    },
    {
      "epoch": 5.1763916015625e-05,
      "step": 8481,
      "training_step_time": 0.6995158195495605
    },
    {
      "epoch": 5.177001953125e-05,
      "model_forward_time": 0.1206824779510498,
      "step": 8482
    },
    {
      "epoch": 5.177001953125e-05,
      "step": 8482,
      "training_step_time": 0.6357510089874268
    },
    {
      "epoch": 5.1776123046875e-05,
      "model_forward_time": 0.11676812171936035,
      "step": 8483
    },
    {
      "epoch": 5.1776123046875e-05,
      "step": 8483,
      "training_step_time": 0.6686515808105469
    },
    {
      "epoch": 5.17822265625e-05,
      "model_forward_time": 0.11934518814086914,
      "step": 8484
    },
    {
      "epoch": 5.17822265625e-05,
      "step": 8484,
      "training_step_time": 0.6348369121551514
    },
    {
      "epoch": 5.1788330078125e-05,
      "model_forward_time": 0.12002992630004883,
      "step": 8485
    },
    {
      "epoch": 5.1788330078125e-05,
      "step": 8485,
      "training_step_time": 0.6790590286254883
    },
    {
      "epoch": 5.179443359375e-05,
      "model_forward_time": 0.12109780311584473,
      "step": 8486
    },
    {
      "epoch": 5.179443359375e-05,
      "step": 8486,
      "training_step_time": 0.6629304885864258
    },
    {
      "epoch": 5.1800537109375e-05,
      "model_forward_time": 0.12201070785522461,
      "step": 8487
    },
    {
      "epoch": 5.1800537109375e-05,
      "step": 8487,
      "training_step_time": 0.6010057926177979
    },
    {
      "epoch": 5.1806640625e-05,
      "model_forward_time": 0.11771559715270996,
      "step": 8488
    },
    {
      "epoch": 5.1806640625e-05,
      "step": 8488,
      "training_step_time": 0.6982781887054443
    },
    {
      "epoch": 5.1812744140625e-05,
      "model_forward_time": 0.12044358253479004,
      "step": 8489
    },
    {
      "epoch": 5.1812744140625e-05,
      "step": 8489,
      "training_step_time": 0.7358183860778809
    },
    {
      "epoch": 5.181884765625e-05,
      "grad_norm": 0.16933253407478333,
      "learning_rate": 9.772846926598491e-05,
      "loss": 0.074,
      "step": 8490
    },
    {
      "epoch": 5.181884765625e-05,
      "model_forward_time": 0.12151908874511719,
      "step": 8490
    },
    {
      "epoch": 5.181884765625e-05,
      "step": 8490,
      "training_step_time": 0.6627254486083984
    },
    {
      "epoch": 5.1824951171875e-05,
      "model_forward_time": 0.12929773330688477,
      "step": 8491
    },
    {
      "epoch": 5.1824951171875e-05,
      "step": 8491,
      "training_step_time": 0.651982307434082
    },
    {
      "epoch": 5.18310546875e-05,
      "model_forward_time": 0.1209723949432373,
      "step": 8492
    },
    {
      "epoch": 5.18310546875e-05,
      "step": 8492,
      "training_step_time": 0.6223254203796387
    },
    {
      "epoch": 5.1837158203125e-05,
      "model_forward_time": 0.12236356735229492,
      "step": 8493
    },
    {
      "epoch": 5.1837158203125e-05,
      "step": 8493,
      "training_step_time": 0.5775277614593506
    },
    {
      "epoch": 5.184326171875e-05,
      "model_forward_time": 0.11967325210571289,
      "step": 8494
    },
    {
      "epoch": 5.184326171875e-05,
      "step": 8494,
      "training_step_time": 0.5539119243621826
    },
    {
      "epoch": 5.1849365234375e-05,
      "model_forward_time": 0.12200927734375,
      "step": 8495
    },
    {
      "epoch": 5.1849365234375e-05,
      "step": 8495,
      "training_step_time": 0.5821001529693604
    },
    {
      "epoch": 5.185546875e-05,
      "model_forward_time": 0.11916470527648926,
      "step": 8496
    },
    {
      "epoch": 5.185546875e-05,
      "step": 8496,
      "training_step_time": 0.5196568965911865
    },
    {
      "epoch": 5.1861572265625e-05,
      "model_forward_time": 0.11756777763366699,
      "step": 8497
    },
    {
      "epoch": 5.1861572265625e-05,
      "step": 8497,
      "training_step_time": 0.5135092735290527
    },
    {
      "epoch": 5.186767578125e-05,
      "model_forward_time": 0.11938619613647461,
      "step": 8498
    },
    {
      "epoch": 5.186767578125e-05,
      "step": 8498,
      "training_step_time": 0.47684192657470703
    },
    {
      "epoch": 5.1873779296875e-05,
      "model_forward_time": 0.11749839782714844,
      "step": 8499
    },
    {
      "epoch": 5.1873779296875e-05,
      "step": 8499,
      "training_step_time": 0.578737735748291
    },
    {
      "epoch": 5.18798828125e-05,
      "grad_norm": 0.23600566387176514,
      "learning_rate": 9.772025009397537e-05,
      "loss": 0.0685,
      "step": 8500
    },
    {
      "epoch": 5.18798828125e-05,
      "model_forward_time": 0.11786651611328125,
      "step": 8500
    },
    {
      "epoch": 5.18798828125e-05,
      "step": 8500,
      "training_step_time": 0.538642406463623
    },
    {
      "epoch": 5.1885986328125e-05,
      "model_forward_time": 0.11673188209533691,
      "step": 8501
    },
    {
      "epoch": 5.1885986328125e-05,
      "step": 8501,
      "training_step_time": 0.43368005752563477
    },
    {
      "epoch": 5.189208984375e-05,
      "model_forward_time": 0.11677694320678711,
      "step": 8502
    },
    {
      "epoch": 5.189208984375e-05,
      "step": 8502,
      "training_step_time": 0.5028157234191895
    },
    {
      "epoch": 5.1898193359375e-05,
      "model_forward_time": 0.11630821228027344,
      "step": 8503
    },
    {
      "epoch": 5.1898193359375e-05,
      "step": 8503,
      "training_step_time": 0.4220261573791504
    },
    {
      "epoch": 5.1904296875e-05,
      "model_forward_time": 0.11846399307250977,
      "step": 8504
    },
    {
      "epoch": 5.1904296875e-05,
      "step": 8504,
      "training_step_time": 0.4160153865814209
    },
    {
      "epoch": 5.1910400390625e-05,
      "model_forward_time": 0.11845970153808594,
      "step": 8505
    },
    {
      "epoch": 5.1910400390625e-05,
      "step": 8505,
      "training_step_time": 0.4205129146575928
    },
    {
      "epoch": 5.191650390625e-05,
      "model_forward_time": 0.11941027641296387,
      "step": 8506
    },
    {
      "epoch": 5.191650390625e-05,
      "step": 8506,
      "training_step_time": 0.38542699813842773
    },
    {
      "epoch": 5.1922607421875e-05,
      "model_forward_time": 0.11836600303649902,
      "step": 8507
    },
    {
      "epoch": 5.1922607421875e-05,
      "step": 8507,
      "training_step_time": 0.42374730110168457
    },
    {
      "epoch": 5.19287109375e-05,
      "model_forward_time": 0.12148833274841309,
      "step": 8508
    },
    {
      "epoch": 5.19287109375e-05,
      "step": 8508,
      "training_step_time": 0.38379788398742676
    },
    {
      "epoch": 5.1934814453125e-05,
      "model_forward_time": 0.11771774291992188,
      "step": 8509
    },
    {
      "epoch": 5.1934814453125e-05,
      "step": 8509,
      "training_step_time": 0.38286590576171875
    },
    {
      "epoch": 5.194091796875e-05,
      "grad_norm": 0.1601993888616562,
      "learning_rate": 9.771201642581385e-05,
      "loss": 0.0747,
      "step": 8510
    },
    {
      "epoch": 5.194091796875e-05,
      "model_forward_time": 0.11802482604980469,
      "step": 8510
    },
    {
      "epoch": 5.194091796875e-05,
      "step": 8510,
      "training_step_time": 0.38837623596191406
    },
    {
      "epoch": 5.1947021484375e-05,
      "model_forward_time": 0.11736774444580078,
      "step": 8511
    },
    {
      "epoch": 5.1947021484375e-05,
      "step": 8511,
      "training_step_time": 0.3810553550720215
    },
    {
      "epoch": 5.1953125e-05,
      "model_forward_time": 0.12645983695983887,
      "step": 8512
    },
    {
      "epoch": 5.1953125e-05,
      "step": 8512,
      "training_step_time": 0.39745450019836426
    },
    {
      "epoch": 5.1959228515625e-05,
      "model_forward_time": 0.11718535423278809,
      "step": 8513
    },
    {
      "epoch": 5.1959228515625e-05,
      "step": 8513,
      "training_step_time": 0.40279245376586914
    },
    {
      "epoch": 5.196533203125e-05,
      "model_forward_time": 0.11807107925415039,
      "step": 8514
    },
    {
      "epoch": 5.196533203125e-05,
      "step": 8514,
      "training_step_time": 0.387148380279541
    },
    {
      "epoch": 5.1971435546875e-05,
      "model_forward_time": 0.11702823638916016,
      "step": 8515
    },
    {
      "epoch": 5.1971435546875e-05,
      "step": 8515,
      "training_step_time": 0.4414553642272949
    },
    {
      "epoch": 5.19775390625e-05,
      "model_forward_time": 0.11583113670349121,
      "step": 8516
    },
    {
      "epoch": 5.19775390625e-05,
      "step": 8516,
      "training_step_time": 0.5056617259979248
    },
    {
      "epoch": 5.1983642578125e-05,
      "model_forward_time": 0.11689877510070801,
      "step": 8517
    },
    {
      "epoch": 5.1983642578125e-05,
      "step": 8517,
      "training_step_time": 0.4645345211029053
    },
    {
      "epoch": 5.198974609375e-05,
      "model_forward_time": 0.11584258079528809,
      "step": 8518
    },
    {
      "epoch": 5.198974609375e-05,
      "step": 8518,
      "training_step_time": 0.41185426712036133
    },
    {
      "epoch": 5.1995849609375e-05,
      "model_forward_time": 0.11566400527954102,
      "step": 8519
    },
    {
      "epoch": 5.1995849609375e-05,
      "step": 8519,
      "training_step_time": 0.3910050392150879
    },
    {
      "epoch": 5.2001953125e-05,
      "grad_norm": 0.33760005235671997,
      "learning_rate": 9.77037682640015e-05,
      "loss": 0.0735,
      "step": 8520
    },
    {
      "epoch": 5.2001953125e-05,
      "model_forward_time": 0.11530518531799316,
      "step": 8520
    },
    {
      "epoch": 5.2001953125e-05,
      "step": 8520,
      "training_step_time": 0.3818542957305908
    },
    {
      "epoch": 5.2008056640625e-05,
      "model_forward_time": 0.1188652515411377,
      "step": 8521
    },
    {
      "epoch": 5.2008056640625e-05,
      "step": 8521,
      "training_step_time": 0.38344717025756836
    },
    {
      "epoch": 5.201416015625e-05,
      "model_forward_time": 0.11877202987670898,
      "step": 8522
    },
    {
      "epoch": 5.201416015625e-05,
      "step": 8522,
      "training_step_time": 0.38285231590270996
    },
    {
      "epoch": 5.2020263671875e-05,
      "model_forward_time": 0.11722445487976074,
      "step": 8523
    },
    {
      "epoch": 5.2020263671875e-05,
      "step": 8523,
      "training_step_time": 0.386944055557251
    },
    {
      "epoch": 5.20263671875e-05,
      "model_forward_time": 0.11833047866821289,
      "step": 8524
    },
    {
      "epoch": 5.20263671875e-05,
      "step": 8524,
      "training_step_time": 0.3968324661254883
    },
    {
      "epoch": 5.2032470703125e-05,
      "model_forward_time": 0.11765813827514648,
      "step": 8525
    },
    {
      "epoch": 5.2032470703125e-05,
      "step": 8525,
      "training_step_time": 0.3913090229034424
    },
    {
      "epoch": 5.203857421875e-05,
      "model_forward_time": 0.11936688423156738,
      "step": 8526
    },
    {
      "epoch": 5.203857421875e-05,
      "step": 8526,
      "training_step_time": 0.3865213394165039
    },
    {
      "epoch": 5.2044677734375e-05,
      "model_forward_time": 0.11828923225402832,
      "step": 8527
    },
    {
      "epoch": 5.2044677734375e-05,
      "step": 8527,
      "training_step_time": 0.3843414783477783
    },
    {
      "epoch": 5.205078125e-05,
      "model_forward_time": 0.11704492568969727,
      "step": 8528
    },
    {
      "epoch": 5.205078125e-05,
      "step": 8528,
      "training_step_time": 0.3898766040802002
    },
    {
      "epoch": 5.2056884765625e-05,
      "model_forward_time": 0.11563968658447266,
      "step": 8529
    },
    {
      "epoch": 5.2056884765625e-05,
      "step": 8529,
      "training_step_time": 0.3970909118652344
    },
    {
      "epoch": 5.206298828125e-05,
      "grad_norm": 0.2692420780658722,
      "learning_rate": 9.769550561104388e-05,
      "loss": 0.0685,
      "step": 8530
    },
    {
      "epoch": 5.206298828125e-05,
      "model_forward_time": 0.11644864082336426,
      "step": 8530
    },
    {
      "epoch": 5.206298828125e-05,
      "step": 8530,
      "training_step_time": 0.43102407455444336
    },
    {
      "epoch": 5.2069091796875e-05,
      "model_forward_time": 0.1158151626586914,
      "step": 8531
    },
    {
      "epoch": 5.2069091796875e-05,
      "step": 8531,
      "training_step_time": 0.4328172206878662
    },
    {
      "epoch": 5.20751953125e-05,
      "model_forward_time": 0.11554360389709473,
      "step": 8532
    },
    {
      "epoch": 5.20751953125e-05,
      "step": 8532,
      "training_step_time": 0.4683072566986084
    },
    {
      "epoch": 5.2081298828125e-05,
      "model_forward_time": 0.11656379699707031,
      "step": 8533
    },
    {
      "epoch": 5.2081298828125e-05,
      "step": 8533,
      "training_step_time": 0.4087998867034912
    },
    {
      "epoch": 5.208740234375e-05,
      "model_forward_time": 0.11701440811157227,
      "step": 8534
    },
    {
      "epoch": 5.208740234375e-05,
      "step": 8534,
      "training_step_time": 0.39481687545776367
    },
    {
      "epoch": 5.2093505859375e-05,
      "model_forward_time": 0.11606526374816895,
      "step": 8535
    },
    {
      "epoch": 5.2093505859375e-05,
      "step": 8535,
      "training_step_time": 0.3845791816711426
    },
    {
      "epoch": 5.2099609375e-05,
      "model_forward_time": 0.11596536636352539,
      "step": 8536
    },
    {
      "epoch": 5.2099609375e-05,
      "step": 8536,
      "training_step_time": 0.38704681396484375
    },
    {
      "epoch": 5.2105712890625e-05,
      "model_forward_time": 0.11592388153076172,
      "step": 8537
    },
    {
      "epoch": 5.2105712890625e-05,
      "step": 8537,
      "training_step_time": 0.38886308670043945
    },
    {
      "epoch": 5.211181640625e-05,
      "model_forward_time": 0.11667847633361816,
      "step": 8538
    },
    {
      "epoch": 5.211181640625e-05,
      "step": 8538,
      "training_step_time": 0.37831807136535645
    },
    {
      "epoch": 5.2117919921875e-05,
      "model_forward_time": 0.11635160446166992,
      "step": 8539
    },
    {
      "epoch": 5.2117919921875e-05,
      "step": 8539,
      "training_step_time": 0.3938140869140625
    },
    {
      "epoch": 5.21240234375e-05,
      "grad_norm": 0.2512180805206299,
      "learning_rate": 9.7687228469451e-05,
      "loss": 0.066,
      "step": 8540
    },
    {
      "epoch": 5.21240234375e-05,
      "model_forward_time": 0.11598849296569824,
      "step": 8540
    },
    {
      "epoch": 5.21240234375e-05,
      "step": 8540,
      "training_step_time": 0.3999326229095459
    },
    {
      "epoch": 5.2130126953125e-05,
      "model_forward_time": 0.11631655693054199,
      "step": 8541
    },
    {
      "epoch": 5.2130126953125e-05,
      "step": 8541,
      "training_step_time": 0.3960433006286621
    },
    {
      "epoch": 5.213623046875e-05,
      "model_forward_time": 0.11615610122680664,
      "step": 8542
    },
    {
      "epoch": 5.213623046875e-05,
      "step": 8542,
      "training_step_time": 0.4079713821411133
    },
    {
      "epoch": 5.2142333984375e-05,
      "model_forward_time": 0.11612772941589355,
      "step": 8543
    },
    {
      "epoch": 5.2142333984375e-05,
      "step": 8543,
      "training_step_time": 0.39900636672973633
    },
    {
      "epoch": 5.21484375e-05,
      "model_forward_time": 0.11712884902954102,
      "step": 8544
    },
    {
      "epoch": 5.21484375e-05,
      "step": 8544,
      "training_step_time": 0.39081668853759766
    },
    {
      "epoch": 5.2154541015625e-05,
      "model_forward_time": 0.11654233932495117,
      "step": 8545
    },
    {
      "epoch": 5.2154541015625e-05,
      "step": 8545,
      "training_step_time": 0.458571195602417
    },
    {
      "epoch": 5.216064453125e-05,
      "model_forward_time": 0.11580681800842285,
      "step": 8546
    },
    {
      "epoch": 5.216064453125e-05,
      "step": 8546,
      "training_step_time": 0.43791747093200684
    },
    {
      "epoch": 5.2166748046875e-05,
      "model_forward_time": 0.11590409278869629,
      "step": 8547
    },
    {
      "epoch": 5.2166748046875e-05,
      "step": 8547,
      "training_step_time": 0.7815337181091309
    },
    {
      "epoch": 5.21728515625e-05,
      "model_forward_time": 0.11534380912780762,
      "step": 8548
    },
    {
      "epoch": 5.21728515625e-05,
      "step": 8548,
      "training_step_time": 0.3822362422943115
    },
    {
      "epoch": 5.2178955078125e-05,
      "model_forward_time": 0.11581611633300781,
      "step": 8549
    },
    {
      "epoch": 5.2178955078125e-05,
      "step": 8549,
      "training_step_time": 0.3944211006164551
    },
    {
      "epoch": 5.218505859375e-05,
      "grad_norm": 0.17809464037418365,
      "learning_rate": 9.767893684173721e-05,
      "loss": 0.0778,
      "step": 8550
    },
    {
      "epoch": 5.218505859375e-05,
      "model_forward_time": 0.11765933036804199,
      "step": 8550
    },
    {
      "epoch": 5.218505859375e-05,
      "step": 8550,
      "training_step_time": 0.3761756420135498
    },
    {
      "epoch": 5.2191162109375e-05,
      "model_forward_time": 0.11723041534423828,
      "step": 8551
    },
    {
      "epoch": 5.2191162109375e-05,
      "step": 8551,
      "training_step_time": 0.3779473304748535
    },
    {
      "epoch": 5.2197265625e-05,
      "model_forward_time": 0.11738848686218262,
      "step": 8552
    },
    {
      "epoch": 5.2197265625e-05,
      "step": 8552,
      "training_step_time": 0.381591796875
    },
    {
      "epoch": 5.2203369140625e-05,
      "model_forward_time": 0.11920809745788574,
      "step": 8553
    },
    {
      "epoch": 5.2203369140625e-05,
      "step": 8553,
      "training_step_time": 0.8046503067016602
    },
    {
      "epoch": 5.220947265625e-05,
      "model_forward_time": 0.11732959747314453,
      "step": 8554
    },
    {
      "epoch": 5.220947265625e-05,
      "step": 8554,
      "training_step_time": 0.37574243545532227
    },
    {
      "epoch": 5.2215576171875e-05,
      "model_forward_time": 0.11705493927001953,
      "step": 8555
    },
    {
      "epoch": 5.2215576171875e-05,
      "step": 8555,
      "training_step_time": 0.37700414657592773
    },
    {
      "epoch": 5.22216796875e-05,
      "model_forward_time": 0.11541938781738281,
      "step": 8556
    },
    {
      "epoch": 5.22216796875e-05,
      "step": 8556,
      "training_step_time": 0.37227606773376465
    },
    {
      "epoch": 5.2227783203125e-05,
      "model_forward_time": 0.11579322814941406,
      "step": 8557
    },
    {
      "epoch": 5.2227783203125e-05,
      "step": 8557,
      "training_step_time": 0.40789318084716797
    },
    {
      "epoch": 5.223388671875e-05,
      "model_forward_time": 0.11526632308959961,
      "step": 8558
    },
    {
      "epoch": 5.223388671875e-05,
      "step": 8558,
      "training_step_time": 0.388059139251709
    },
    {
      "epoch": 5.2239990234375e-05,
      "model_forward_time": 0.11603879928588867,
      "step": 8559
    },
    {
      "epoch": 5.2239990234375e-05,
      "step": 8559,
      "training_step_time": 0.6995577812194824
    },
    {
      "epoch": 5.224609375e-05,
      "grad_norm": 0.20908375084400177,
      "learning_rate": 9.76706307304213e-05,
      "loss": 0.0674,
      "step": 8560
    },
    {
      "epoch": 5.224609375e-05,
      "model_forward_time": 0.11538410186767578,
      "step": 8560
    },
    {
      "epoch": 5.224609375e-05,
      "step": 8560,
      "training_step_time": 0.41760754585266113
    },
    {
      "epoch": 5.2252197265625e-05,
      "model_forward_time": 0.11631631851196289,
      "step": 8561
    },
    {
      "epoch": 5.2252197265625e-05,
      "step": 8561,
      "training_step_time": 0.468839168548584
    },
    {
      "epoch": 5.225830078125e-05,
      "model_forward_time": 0.11744570732116699,
      "step": 8562
    },
    {
      "epoch": 5.225830078125e-05,
      "step": 8562,
      "training_step_time": 0.37038350105285645
    },
    {
      "epoch": 5.2264404296875e-05,
      "model_forward_time": 0.1172640323638916,
      "step": 8563
    },
    {
      "epoch": 5.2264404296875e-05,
      "step": 8563,
      "training_step_time": 0.37825989723205566
    },
    {
      "epoch": 5.22705078125e-05,
      "model_forward_time": 0.11544203758239746,
      "step": 8564
    },
    {
      "epoch": 5.22705078125e-05,
      "step": 8564,
      "training_step_time": 0.3992767333984375
    },
    {
      "epoch": 5.2276611328125e-05,
      "model_forward_time": 0.11943459510803223,
      "step": 8565
    },
    {
      "epoch": 5.2276611328125e-05,
      "step": 8565,
      "training_step_time": 0.7662763595581055
    },
    {
      "epoch": 5.228271484375e-05,
      "model_forward_time": 0.11670660972595215,
      "step": 8566
    },
    {
      "epoch": 5.228271484375e-05,
      "step": 8566,
      "training_step_time": 0.37555861473083496
    },
    {
      "epoch": 5.2288818359375e-05,
      "model_forward_time": 0.11652922630310059,
      "step": 8567
    },
    {
      "epoch": 5.2288818359375e-05,
      "step": 8567,
      "training_step_time": 0.38080835342407227
    },
    {
      "epoch": 5.2294921875e-05,
      "model_forward_time": 0.11761188507080078,
      "step": 8568
    },
    {
      "epoch": 5.2294921875e-05,
      "step": 8568,
      "training_step_time": 0.38188695907592773
    },
    {
      "epoch": 5.2301025390625e-05,
      "model_forward_time": 0.11661911010742188,
      "step": 8569
    },
    {
      "epoch": 5.2301025390625e-05,
      "step": 8569,
      "training_step_time": 0.37451839447021484
    },
    {
      "epoch": 5.230712890625e-05,
      "grad_norm": 0.22564837336540222,
      "learning_rate": 9.766231013802645e-05,
      "loss": 0.073,
      "step": 8570
    },
    {
      "epoch": 5.230712890625e-05,
      "model_forward_time": 0.11686944961547852,
      "step": 8570
    },
    {
      "epoch": 5.230712890625e-05,
      "step": 8570,
      "training_step_time": 0.40233659744262695
    },
    {
      "epoch": 5.2313232421875e-05,
      "model_forward_time": 0.1173095703125,
      "step": 8571
    },
    {
      "epoch": 5.2313232421875e-05,
      "step": 8571,
      "training_step_time": 0.8464562892913818
    },
    {
      "epoch": 5.23193359375e-05,
      "model_forward_time": 0.11835002899169922,
      "step": 8572
    },
    {
      "epoch": 5.23193359375e-05,
      "step": 8572,
      "training_step_time": 0.41238999366760254
    },
    {
      "epoch": 5.2325439453125e-05,
      "model_forward_time": 0.11666512489318848,
      "step": 8573
    },
    {
      "epoch": 5.2325439453125e-05,
      "step": 8573,
      "training_step_time": 0.44658637046813965
    },
    {
      "epoch": 5.233154296875e-05,
      "model_forward_time": 0.11752104759216309,
      "step": 8574
    },
    {
      "epoch": 5.233154296875e-05,
      "step": 8574,
      "training_step_time": 0.41040754318237305
    },
    {
      "epoch": 5.2337646484375e-05,
      "model_forward_time": 0.11728644371032715,
      "step": 8575
    },
    {
      "epoch": 5.2337646484375e-05,
      "step": 8575,
      "training_step_time": 0.37870240211486816
    },
    {
      "epoch": 5.234375e-05,
      "model_forward_time": 0.11829090118408203,
      "step": 8576
    },
    {
      "epoch": 5.234375e-05,
      "step": 8576,
      "training_step_time": 0.3758809566497803
    },
    {
      "epoch": 5.2349853515625e-05,
      "model_forward_time": 0.11767315864562988,
      "step": 8577
    },
    {
      "epoch": 5.2349853515625e-05,
      "step": 8577,
      "training_step_time": 0.7598357200622559
    },
    {
      "epoch": 5.235595703125e-05,
      "model_forward_time": 0.1172640323638916,
      "step": 8578
    },
    {
      "epoch": 5.235595703125e-05,
      "step": 8578,
      "training_step_time": 0.3813796043395996
    },
    {
      "epoch": 5.2362060546875e-05,
      "model_forward_time": 0.11615157127380371,
      "step": 8579
    },
    {
      "epoch": 5.2362060546875e-05,
      "step": 8579,
      "training_step_time": 0.40014004707336426
    },
    {
      "epoch": 5.23681640625e-05,
      "grad_norm": 0.19961535930633545,
      "learning_rate": 9.765397506708023e-05,
      "loss": 0.0669,
      "step": 8580
    },
    {
      "epoch": 5.23681640625e-05,
      "model_forward_time": 0.1172945499420166,
      "step": 8580
    },
    {
      "epoch": 5.23681640625e-05,
      "step": 8580,
      "training_step_time": 0.37491655349731445
    },
    {
      "epoch": 5.2374267578125e-05,
      "model_forward_time": 0.11620855331420898,
      "step": 8581
    },
    {
      "epoch": 5.2374267578125e-05,
      "step": 8581,
      "training_step_time": 0.38140201568603516
    },
    {
      "epoch": 5.238037109375e-05,
      "model_forward_time": 0.11662507057189941,
      "step": 8582
    },
    {
      "epoch": 5.238037109375e-05,
      "step": 8582,
      "training_step_time": 0.4139082431793213
    },
    {
      "epoch": 5.2386474609375e-05,
      "model_forward_time": 0.11748981475830078,
      "step": 8583
    },
    {
      "epoch": 5.2386474609375e-05,
      "step": 8583,
      "training_step_time": 0.7845156192779541
    },
    {
      "epoch": 5.2392578125e-05,
      "model_forward_time": 0.11522078514099121,
      "step": 8584
    },
    {
      "epoch": 5.2392578125e-05,
      "step": 8584,
      "training_step_time": 0.3684580326080322
    },
    {
      "epoch": 5.2398681640625e-05,
      "model_forward_time": 0.11502218246459961,
      "step": 8585
    },
    {
      "epoch": 5.2398681640625e-05,
      "step": 8585,
      "training_step_time": 0.4510066509246826
    },
    {
      "epoch": 5.240478515625e-05,
      "model_forward_time": 0.11590170860290527,
      "step": 8586
    },
    {
      "epoch": 5.240478515625e-05,
      "step": 8586,
      "training_step_time": 0.41281747817993164
    },
    {
      "epoch": 5.2410888671875e-05,
      "model_forward_time": 0.11590147018432617,
      "step": 8587
    },
    {
      "epoch": 5.2410888671875e-05,
      "step": 8587,
      "training_step_time": 0.42314839363098145
    },
    {
      "epoch": 5.24169921875e-05,
      "model_forward_time": 0.11612749099731445,
      "step": 8588
    },
    {
      "epoch": 5.24169921875e-05,
      "step": 8588,
      "training_step_time": 0.40819454193115234
    },
    {
      "epoch": 5.2423095703125e-05,
      "model_forward_time": 0.11606001853942871,
      "step": 8589
    },
    {
      "epoch": 5.2423095703125e-05,
      "step": 8589,
      "training_step_time": 0.48604607582092285
    },
    {
      "epoch": 5.242919921875e-05,
      "grad_norm": 0.29569777846336365,
      "learning_rate": 9.76456255201146e-05,
      "loss": 0.0671,
      "step": 8590
    },
    {
      "epoch": 5.242919921875e-05,
      "model_forward_time": 0.11607503890991211,
      "step": 8590
    },
    {
      "epoch": 5.242919921875e-05,
      "step": 8590,
      "training_step_time": 0.3880314826965332
    },
    {
      "epoch": 5.2435302734375e-05,
      "model_forward_time": 0.11771512031555176,
      "step": 8591
    },
    {
      "epoch": 5.2435302734375e-05,
      "step": 8591,
      "training_step_time": 0.39600276947021484
    },
    {
      "epoch": 5.244140625e-05,
      "model_forward_time": 0.11819911003112793,
      "step": 8592
    },
    {
      "epoch": 5.244140625e-05,
      "step": 8592,
      "training_step_time": 0.37877678871154785
    },
    {
      "epoch": 5.2447509765625e-05,
      "model_forward_time": 0.11782717704772949,
      "step": 8593
    },
    {
      "epoch": 5.2447509765625e-05,
      "step": 8593,
      "training_step_time": 0.3853473663330078
    },
    {
      "epoch": 5.245361328125e-05,
      "model_forward_time": 0.11875343322753906,
      "step": 8594
    },
    {
      "epoch": 5.245361328125e-05,
      "step": 8594,
      "training_step_time": 0.38028836250305176
    },
    {
      "epoch": 5.2459716796875e-05,
      "model_forward_time": 0.11762428283691406,
      "step": 8595
    },
    {
      "epoch": 5.2459716796875e-05,
      "step": 8595,
      "training_step_time": 1.0512630939483643
    },
    {
      "epoch": 5.24658203125e-05,
      "model_forward_time": 0.11661529541015625,
      "step": 8596
    },
    {
      "epoch": 5.24658203125e-05,
      "step": 8596,
      "training_step_time": 0.39830923080444336
    },
    {
      "epoch": 5.2471923828125e-05,
      "model_forward_time": 0.11588072776794434,
      "step": 8597
    },
    {
      "epoch": 5.2471923828125e-05,
      "step": 8597,
      "training_step_time": 0.3701310157775879
    },
    {
      "epoch": 5.247802734375e-05,
      "model_forward_time": 0.11642026901245117,
      "step": 8598
    },
    {
      "epoch": 5.247802734375e-05,
      "step": 8598,
      "training_step_time": 0.3988037109375
    },
    {
      "epoch": 5.2484130859375e-05,
      "model_forward_time": 0.11725711822509766,
      "step": 8599
    },
    {
      "epoch": 5.2484130859375e-05,
      "step": 8599,
      "training_step_time": 0.43367600440979004
    },
    {
      "epoch": 5.2490234375e-05,
      "grad_norm": 0.2991601824760437,
      "learning_rate": 9.763726149966596e-05,
      "loss": 0.0745,
      "step": 8600
    },
    {
      "epoch": 5.2490234375e-05,
      "model_forward_time": 0.1166384220123291,
      "step": 8600
    },
    {
      "epoch": 5.2490234375e-05,
      "step": 8600,
      "training_step_time": 0.3949286937713623
    },
    {
      "epoch": 5.2496337890625e-05,
      "model_forward_time": 0.11684608459472656,
      "step": 8601
    },
    {
      "epoch": 5.2496337890625e-05,
      "step": 8601,
      "training_step_time": 0.8078281879425049
    },
    {
      "epoch": 5.250244140625e-05,
      "model_forward_time": 0.11449217796325684,
      "step": 8602
    },
    {
      "epoch": 5.250244140625e-05,
      "step": 8602,
      "training_step_time": 0.3764674663543701
    },
    {
      "epoch": 5.2508544921875e-05,
      "model_forward_time": 0.11505246162414551,
      "step": 8603
    },
    {
      "epoch": 5.2508544921875e-05,
      "step": 8603,
      "training_step_time": 0.39065122604370117
    },
    {
      "epoch": 5.25146484375e-05,
      "model_forward_time": 0.11542034149169922,
      "step": 8604
    },
    {
      "epoch": 5.25146484375e-05,
      "step": 8604,
      "training_step_time": 0.3793792724609375
    },
    {
      "epoch": 5.2520751953125e-05,
      "model_forward_time": 0.11538386344909668,
      "step": 8605
    },
    {
      "epoch": 5.2520751953125e-05,
      "step": 8605,
      "training_step_time": 0.3784494400024414
    },
    {
      "epoch": 5.252685546875e-05,
      "model_forward_time": 0.11534452438354492,
      "step": 8606
    },
    {
      "epoch": 5.252685546875e-05,
      "step": 8606,
      "training_step_time": 0.4692506790161133
    },
    {
      "epoch": 5.2532958984375e-05,
      "model_forward_time": 0.11613297462463379,
      "step": 8607
    },
    {
      "epoch": 5.2532958984375e-05,
      "step": 8607,
      "training_step_time": 0.6028187274932861
    },
    {
      "epoch": 5.25390625e-05,
      "model_forward_time": 0.1149604320526123,
      "step": 8608
    },
    {
      "epoch": 5.25390625e-05,
      "step": 8608,
      "training_step_time": 0.40198850631713867
    },
    {
      "epoch": 5.2545166015625e-05,
      "model_forward_time": 0.11626863479614258,
      "step": 8609
    },
    {
      "epoch": 5.2545166015625e-05,
      "step": 8609,
      "training_step_time": 0.3873178958892822
    },
    {
      "epoch": 5.255126953125e-05,
      "grad_norm": 0.2383200079202652,
      "learning_rate": 9.762888300827507e-05,
      "loss": 0.0704,
      "step": 8610
    },
    {
      "epoch": 5.255126953125e-05,
      "model_forward_time": 0.11560583114624023,
      "step": 8610
    },
    {
      "epoch": 5.255126953125e-05,
      "step": 8610,
      "training_step_time": 0.3881676197052002
    },
    {
      "epoch": 5.2557373046875e-05,
      "model_forward_time": 0.11579155921936035,
      "step": 8611
    },
    {
      "epoch": 5.2557373046875e-05,
      "step": 8611,
      "training_step_time": 0.42484617233276367
    },
    {
      "epoch": 5.25634765625e-05,
      "model_forward_time": 0.11596918106079102,
      "step": 8612
    },
    {
      "epoch": 5.25634765625e-05,
      "step": 8612,
      "training_step_time": 0.444413423538208
    },
    {
      "epoch": 5.2569580078125e-05,
      "model_forward_time": 0.11612296104431152,
      "step": 8613
    },
    {
      "epoch": 5.2569580078125e-05,
      "step": 8613,
      "training_step_time": 0.7709167003631592
    },
    {
      "epoch": 5.257568359375e-05,
      "model_forward_time": 0.11657214164733887,
      "step": 8614
    },
    {
      "epoch": 5.257568359375e-05,
      "step": 8614,
      "training_step_time": 0.3800935745239258
    },
    {
      "epoch": 5.2581787109375e-05,
      "model_forward_time": 0.11719727516174316,
      "step": 8615
    },
    {
      "epoch": 5.2581787109375e-05,
      "step": 8615,
      "training_step_time": 0.3973665237426758
    },
    {
      "epoch": 5.2587890625e-05,
      "model_forward_time": 0.11711764335632324,
      "step": 8616
    },
    {
      "epoch": 5.2587890625e-05,
      "step": 8616,
      "training_step_time": 0.37712764739990234
    },
    {
      "epoch": 5.2593994140625e-05,
      "model_forward_time": 0.12095141410827637,
      "step": 8617
    },
    {
      "epoch": 5.2593994140625e-05,
      "step": 8617,
      "training_step_time": 0.37851524353027344
    },
    {
      "epoch": 5.260009765625e-05,
      "model_forward_time": 0.1167762279510498,
      "step": 8618
    },
    {
      "epoch": 5.260009765625e-05,
      "step": 8618,
      "training_step_time": 0.38149023056030273
    },
    {
      "epoch": 5.2606201171875e-05,
      "model_forward_time": 0.12156319618225098,
      "step": 8619
    },
    {
      "epoch": 5.2606201171875e-05,
      "step": 8619,
      "training_step_time": 0.7877788543701172
    },
    {
      "epoch": 5.26123046875e-05,
      "grad_norm": 0.24052174389362335,
      "learning_rate": 9.762049004848706e-05,
      "loss": 0.0712,
      "step": 8620
    },
    {
      "epoch": 5.26123046875e-05,
      "model_forward_time": 0.11654472351074219,
      "step": 8620
    },
    {
      "epoch": 5.26123046875e-05,
      "step": 8620,
      "training_step_time": 0.3826315402984619
    },
    {
      "epoch": 5.2618408203125e-05,
      "model_forward_time": 0.11577057838439941,
      "step": 8621
    },
    {
      "epoch": 5.2618408203125e-05,
      "step": 8621,
      "training_step_time": 0.40970301628112793
    },
    {
      "epoch": 5.262451171875e-05,
      "model_forward_time": 0.11632847785949707,
      "step": 8622
    },
    {
      "epoch": 5.262451171875e-05,
      "step": 8622,
      "training_step_time": 0.3769495487213135
    },
    {
      "epoch": 5.2630615234375e-05,
      "model_forward_time": 0.11563372611999512,
      "step": 8623
    },
    {
      "epoch": 5.2630615234375e-05,
      "step": 8623,
      "training_step_time": 0.44902539253234863
    },
    {
      "epoch": 5.263671875e-05,
      "model_forward_time": 0.11591506004333496,
      "step": 8624
    },
    {
      "epoch": 5.263671875e-05,
      "step": 8624,
      "training_step_time": 0.40201878547668457
    },
    {
      "epoch": 5.2642822265625e-05,
      "model_forward_time": 0.11606168746948242,
      "step": 8625
    },
    {
      "epoch": 5.2642822265625e-05,
      "step": 8625,
      "training_step_time": 0.7638390064239502
    },
    {
      "epoch": 5.264892578125e-05,
      "model_forward_time": 0.11582541465759277,
      "step": 8626
    },
    {
      "epoch": 5.264892578125e-05,
      "step": 8626,
      "training_step_time": 0.3939626216888428
    },
    {
      "epoch": 5.2655029296875e-05,
      "model_forward_time": 0.11556124687194824,
      "step": 8627
    },
    {
      "epoch": 5.2655029296875e-05,
      "step": 8627,
      "training_step_time": 0.3812105655670166
    },
    {
      "epoch": 5.26611328125e-05,
      "model_forward_time": 0.1157979965209961,
      "step": 8628
    },
    {
      "epoch": 5.26611328125e-05,
      "step": 8628,
      "training_step_time": 0.37699198722839355
    },
    {
      "epoch": 5.2667236328125e-05,
      "model_forward_time": 0.11583232879638672,
      "step": 8629
    },
    {
      "epoch": 5.2667236328125e-05,
      "step": 8629,
      "training_step_time": 0.3854403495788574
    },
    {
      "epoch": 5.267333984375e-05,
      "grad_norm": 0.19869568943977356,
      "learning_rate": 9.761208262285155e-05,
      "loss": 0.0679,
      "step": 8630
    },
    {
      "epoch": 5.267333984375e-05,
      "model_forward_time": 0.11579370498657227,
      "step": 8630
    },
    {
      "epoch": 5.267333984375e-05,
      "step": 8630,
      "training_step_time": 0.39170408248901367
    },
    {
      "epoch": 5.2679443359375e-05,
      "model_forward_time": 0.11611509323120117,
      "step": 8631
    },
    {
      "epoch": 5.2679443359375e-05,
      "step": 8631,
      "training_step_time": 0.5021917819976807
    },
    {
      "epoch": 5.2685546875e-05,
      "model_forward_time": 0.11721110343933105,
      "step": 8632
    },
    {
      "epoch": 5.2685546875e-05,
      "step": 8632,
      "training_step_time": 0.42341113090515137
    },
    {
      "epoch": 5.2691650390625e-05,
      "model_forward_time": 0.11582517623901367,
      "step": 8633
    },
    {
      "epoch": 5.2691650390625e-05,
      "step": 8633,
      "training_step_time": 0.46151065826416016
    },
    {
      "epoch": 5.269775390625e-05,
      "model_forward_time": 0.11672663688659668,
      "step": 8634
    },
    {
      "epoch": 5.269775390625e-05,
      "step": 8634,
      "training_step_time": 0.3813209533691406
    },
    {
      "epoch": 5.2703857421875e-05,
      "model_forward_time": 0.11536931991577148,
      "step": 8635
    },
    {
      "epoch": 5.2703857421875e-05,
      "step": 8635,
      "training_step_time": 0.40227437019348145
    },
    {
      "epoch": 5.27099609375e-05,
      "model_forward_time": 0.1161048412322998,
      "step": 8636
    },
    {
      "epoch": 5.27099609375e-05,
      "step": 8636,
      "training_step_time": 0.39160895347595215
    },
    {
      "epoch": 5.2716064453125e-05,
      "model_forward_time": 0.11623954772949219,
      "step": 8637
    },
    {
      "epoch": 5.2716064453125e-05,
      "step": 8637,
      "training_step_time": 0.5694363117218018
    },
    {
      "epoch": 5.272216796875e-05,
      "model_forward_time": 0.11624908447265625,
      "step": 8638
    },
    {
      "epoch": 5.272216796875e-05,
      "step": 8638,
      "training_step_time": 0.513141393661499
    },
    {
      "epoch": 5.2728271484375e-05,
      "model_forward_time": 0.11640238761901855,
      "step": 8639
    },
    {
      "epoch": 5.2728271484375e-05,
      "step": 8639,
      "training_step_time": 0.49814558029174805
    },
    {
      "epoch": 5.2734375e-05,
      "grad_norm": 0.2418190836906433,
      "learning_rate": 9.760366073392246e-05,
      "loss": 0.0673,
      "step": 8640
    },
    {
      "epoch": 5.2734375e-05,
      "model_forward_time": 0.1158759593963623,
      "step": 8640
    },
    {
      "epoch": 5.2734375e-05,
      "step": 8640,
      "training_step_time": 0.434664249420166
    },
    {
      "epoch": 5.2740478515625e-05,
      "model_forward_time": 0.11584329605102539,
      "step": 8641
    },
    {
      "epoch": 5.2740478515625e-05,
      "step": 8641,
      "training_step_time": 0.39109230041503906
    },
    {
      "epoch": 5.274658203125e-05,
      "model_forward_time": 0.11544990539550781,
      "step": 8642
    },
    {
      "epoch": 5.274658203125e-05,
      "step": 8642,
      "training_step_time": 0.39995622634887695
    },
    {
      "epoch": 5.2752685546875e-05,
      "model_forward_time": 0.11648774147033691,
      "step": 8643
    },
    {
      "epoch": 5.2752685546875e-05,
      "step": 8643,
      "training_step_time": 0.4315216541290283
    },
    {
      "epoch": 5.27587890625e-05,
      "model_forward_time": 0.12341928482055664,
      "step": 8644
    },
    {
      "epoch": 5.27587890625e-05,
      "step": 8644,
      "training_step_time": 0.40552306175231934
    },
    {
      "epoch": 5.2764892578125e-05,
      "model_forward_time": 0.11693644523620605,
      "step": 8645
    },
    {
      "epoch": 5.2764892578125e-05,
      "step": 8645,
      "training_step_time": 0.41100239753723145
    },
    {
      "epoch": 5.277099609375e-05,
      "model_forward_time": 0.1174478530883789,
      "step": 8646
    },
    {
      "epoch": 5.277099609375e-05,
      "step": 8646,
      "training_step_time": 0.4490809440612793
    },
    {
      "epoch": 5.2777099609375e-05,
      "model_forward_time": 0.11797189712524414,
      "step": 8647
    },
    {
      "epoch": 5.2777099609375e-05,
      "step": 8647,
      "training_step_time": 0.387148380279541
    },
    {
      "epoch": 5.2783203125e-05,
      "model_forward_time": 0.11636114120483398,
      "step": 8648
    },
    {
      "epoch": 5.2783203125e-05,
      "step": 8648,
      "training_step_time": 0.3840343952178955
    },
    {
      "epoch": 5.2789306640625e-05,
      "model_forward_time": 0.11615633964538574,
      "step": 8649
    },
    {
      "epoch": 5.2789306640625e-05,
      "step": 8649,
      "training_step_time": 0.3929331302642822
    },
    {
      "epoch": 5.279541015625e-05,
      "grad_norm": 0.246342271566391,
      "learning_rate": 9.759522438425813e-05,
      "loss": 0.0683,
      "step": 8650
    },
    {
      "epoch": 5.279541015625e-05,
      "model_forward_time": 0.12024855613708496,
      "step": 8650
    },
    {
      "epoch": 5.279541015625e-05,
      "step": 8650,
      "training_step_time": 0.4027886390686035
    },
    {
      "epoch": 5.2801513671875e-05,
      "model_forward_time": 0.11694478988647461,
      "step": 8651
    },
    {
      "epoch": 5.2801513671875e-05,
      "step": 8651,
      "training_step_time": 0.38216137886047363
    },
    {
      "epoch": 5.28076171875e-05,
      "model_forward_time": 0.11829566955566406,
      "step": 8652
    },
    {
      "epoch": 5.28076171875e-05,
      "step": 8652,
      "training_step_time": 0.41961050033569336
    },
    {
      "epoch": 5.2813720703125e-05,
      "model_forward_time": 0.12038302421569824,
      "step": 8653
    },
    {
      "epoch": 5.2813720703125e-05,
      "step": 8653,
      "training_step_time": 0.5021460056304932
    },
    {
      "epoch": 5.281982421875e-05,
      "model_forward_time": 0.14104700088500977,
      "step": 8654
    },
    {
      "epoch": 5.281982421875e-05,
      "step": 8654,
      "training_step_time": 0.45193958282470703
    },
    {
      "epoch": 5.2825927734375e-05,
      "model_forward_time": 0.11751580238342285,
      "step": 8655
    },
    {
      "epoch": 5.2825927734375e-05,
      "step": 8655,
      "training_step_time": 0.4009277820587158
    },
    {
      "epoch": 5.283203125e-05,
      "model_forward_time": 0.11638450622558594,
      "step": 8656
    },
    {
      "epoch": 5.283203125e-05,
      "step": 8656,
      "training_step_time": 0.3889625072479248
    },
    {
      "epoch": 5.2838134765625e-05,
      "model_forward_time": 0.11642932891845703,
      "step": 8657
    },
    {
      "epoch": 5.2838134765625e-05,
      "step": 8657,
      "training_step_time": 0.40360474586486816
    },
    {
      "epoch": 5.284423828125e-05,
      "model_forward_time": 0.11816096305847168,
      "step": 8658
    },
    {
      "epoch": 5.284423828125e-05,
      "step": 8658,
      "training_step_time": 0.3891472816467285
    },
    {
      "epoch": 5.2850341796875e-05,
      "model_forward_time": 0.11622452735900879,
      "step": 8659
    },
    {
      "epoch": 5.2850341796875e-05,
      "step": 8659,
      "training_step_time": 0.38777732849121094
    },
    {
      "epoch": 5.28564453125e-05,
      "grad_norm": 0.2716203033924103,
      "learning_rate": 9.758677357642131e-05,
      "loss": 0.0686,
      "step": 8660
    },
    {
      "epoch": 5.28564453125e-05,
      "model_forward_time": 0.11645221710205078,
      "step": 8660
    },
    {
      "epoch": 5.28564453125e-05,
      "step": 8660,
      "training_step_time": 0.39587831497192383
    },
    {
      "epoch": 5.2862548828125e-05,
      "model_forward_time": 0.11619853973388672,
      "step": 8661
    },
    {
      "epoch": 5.2862548828125e-05,
      "step": 8661,
      "training_step_time": 0.43199872970581055
    },
    {
      "epoch": 5.286865234375e-05,
      "model_forward_time": 0.11809372901916504,
      "step": 8662
    },
    {
      "epoch": 5.286865234375e-05,
      "step": 8662,
      "training_step_time": 0.3868281841278076
    },
    {
      "epoch": 5.2874755859375e-05,
      "model_forward_time": 0.11723780632019043,
      "step": 8663
    },
    {
      "epoch": 5.2874755859375e-05,
      "step": 8663,
      "training_step_time": 0.3929429054260254
    },
    {
      "epoch": 5.2880859375e-05,
      "model_forward_time": 0.11680459976196289,
      "step": 8664
    },
    {
      "epoch": 5.2880859375e-05,
      "step": 8664,
      "training_step_time": 0.381791353225708
    },
    {
      "epoch": 5.2886962890625e-05,
      "model_forward_time": 0.11600399017333984,
      "step": 8665
    },
    {
      "epoch": 5.2886962890625e-05,
      "step": 8665,
      "training_step_time": 0.40595078468322754
    },
    {
      "epoch": 5.289306640625e-05,
      "model_forward_time": 0.11644673347473145,
      "step": 8666
    },
    {
      "epoch": 5.289306640625e-05,
      "step": 8666,
      "training_step_time": 0.4531433582305908
    },
    {
      "epoch": 5.2899169921875e-05,
      "model_forward_time": 0.11696386337280273,
      "step": 8667
    },
    {
      "epoch": 5.2899169921875e-05,
      "step": 8667,
      "training_step_time": 0.4679281711578369
    },
    {
      "epoch": 5.29052734375e-05,
      "model_forward_time": 0.1166071891784668,
      "step": 8668
    },
    {
      "epoch": 5.29052734375e-05,
      "step": 8668,
      "training_step_time": 0.4677393436431885
    },
    {
      "epoch": 5.2911376953125e-05,
      "model_forward_time": 0.11777949333190918,
      "step": 8669
    },
    {
      "epoch": 5.2911376953125e-05,
      "step": 8669,
      "training_step_time": 0.4725170135498047
    },
    {
      "epoch": 5.291748046875e-05,
      "grad_norm": 0.3345835506916046,
      "learning_rate": 9.757830831297914e-05,
      "loss": 0.0686,
      "step": 8670
    },
    {
      "epoch": 5.291748046875e-05,
      "model_forward_time": 0.11616706848144531,
      "step": 8670
    },
    {
      "epoch": 5.291748046875e-05,
      "step": 8670,
      "training_step_time": 0.38625049591064453
    },
    {
      "epoch": 5.2923583984375e-05,
      "model_forward_time": 0.1157984733581543,
      "step": 8671
    },
    {
      "epoch": 5.2923583984375e-05,
      "step": 8671,
      "training_step_time": 0.4000520706176758
    },
    {
      "epoch": 5.29296875e-05,
      "model_forward_time": 0.11614274978637695,
      "step": 8672
    },
    {
      "epoch": 5.29296875e-05,
      "step": 8672,
      "training_step_time": 0.4311835765838623
    },
    {
      "epoch": 5.2935791015625e-05,
      "model_forward_time": 0.11639285087585449,
      "step": 8673
    },
    {
      "epoch": 5.2935791015625e-05,
      "step": 8673,
      "training_step_time": 0.3866243362426758
    },
    {
      "epoch": 5.294189453125e-05,
      "model_forward_time": 0.11864757537841797,
      "step": 8674
    },
    {
      "epoch": 5.294189453125e-05,
      "step": 8674,
      "training_step_time": 0.37982773780822754
    },
    {
      "epoch": 5.2947998046875e-05,
      "model_forward_time": 0.1189737319946289,
      "step": 8675
    },
    {
      "epoch": 5.2947998046875e-05,
      "step": 8675,
      "training_step_time": 0.5943996906280518
    },
    {
      "epoch": 5.29541015625e-05,
      "model_forward_time": 0.11809182167053223,
      "step": 8676
    },
    {
      "epoch": 5.29541015625e-05,
      "step": 8676,
      "training_step_time": 0.37878894805908203
    },
    {
      "epoch": 5.2960205078125e-05,
      "model_forward_time": 0.11739730834960938,
      "step": 8677
    },
    {
      "epoch": 5.2960205078125e-05,
      "step": 8677,
      "training_step_time": 0.3834362030029297
    },
    {
      "epoch": 5.296630859375e-05,
      "model_forward_time": 0.1185615062713623,
      "step": 8678
    },
    {
      "epoch": 5.296630859375e-05,
      "step": 8678,
      "training_step_time": 0.3827033042907715
    },
    {
      "epoch": 5.2972412109375e-05,
      "model_forward_time": 0.1176445484161377,
      "step": 8679
    },
    {
      "epoch": 5.2972412109375e-05,
      "step": 8679,
      "training_step_time": 0.3784489631652832
    },
    {
      "epoch": 5.2978515625e-05,
      "grad_norm": 0.2195359170436859,
      "learning_rate": 9.756982859650314e-05,
      "loss": 0.0771,
      "step": 8680
    },
    {
      "epoch": 5.2978515625e-05,
      "model_forward_time": 0.11742496490478516,
      "step": 8680
    },
    {
      "epoch": 5.2978515625e-05,
      "step": 8680,
      "training_step_time": 0.45722532272338867
    },
    {
      "epoch": 5.2984619140625e-05,
      "model_forward_time": 0.11810803413391113,
      "step": 8681
    },
    {
      "epoch": 5.2984619140625e-05,
      "step": 8681,
      "training_step_time": 0.9202446937561035
    },
    {
      "epoch": 5.299072265625e-05,
      "model_forward_time": 0.11739897727966309,
      "step": 8682
    },
    {
      "epoch": 5.299072265625e-05,
      "step": 8682,
      "training_step_time": 0.4286308288574219
    },
    {
      "epoch": 5.2996826171875e-05,
      "model_forward_time": 0.11652445793151855,
      "step": 8683
    },
    {
      "epoch": 5.2996826171875e-05,
      "step": 8683,
      "training_step_time": 0.3864145278930664
    },
    {
      "epoch": 5.30029296875e-05,
      "model_forward_time": 0.11600899696350098,
      "step": 8684
    },
    {
      "epoch": 5.30029296875e-05,
      "step": 8684,
      "training_step_time": 0.38759422302246094
    },
    {
      "epoch": 5.3009033203125e-05,
      "model_forward_time": 0.11656498908996582,
      "step": 8685
    },
    {
      "epoch": 5.3009033203125e-05,
      "step": 8685,
      "training_step_time": 0.40549325942993164
    },
    {
      "epoch": 5.301513671875e-05,
      "model_forward_time": 0.11552667617797852,
      "step": 8686
    },
    {
      "epoch": 5.301513671875e-05,
      "step": 8686,
      "training_step_time": 0.37312769889831543
    },
    {
      "epoch": 5.3021240234375e-05,
      "model_forward_time": 0.11629295349121094,
      "step": 8687
    },
    {
      "epoch": 5.3021240234375e-05,
      "step": 8687,
      "training_step_time": 0.7797024250030518
    },
    {
      "epoch": 5.302734375e-05,
      "model_forward_time": 0.11529088020324707,
      "step": 8688
    },
    {
      "epoch": 5.302734375e-05,
      "step": 8688,
      "training_step_time": 0.3758571147918701
    },
    {
      "epoch": 5.3033447265625e-05,
      "model_forward_time": 0.11518096923828125,
      "step": 8689
    },
    {
      "epoch": 5.3033447265625e-05,
      "step": 8689,
      "training_step_time": 0.38223695755004883
    },
    {
      "epoch": 5.303955078125e-05,
      "grad_norm": 0.20983853936195374,
      "learning_rate": 9.756133442956923e-05,
      "loss": 0.0701,
      "step": 8690
    },
    {
      "epoch": 5.303955078125e-05,
      "model_forward_time": 0.11603927612304688,
      "step": 8690
    },
    {
      "epoch": 5.303955078125e-05,
      "step": 8690,
      "training_step_time": 0.39397096633911133
    },
    {
      "epoch": 5.3045654296875e-05,
      "model_forward_time": 0.11811065673828125,
      "step": 8691
    },
    {
      "epoch": 5.3045654296875e-05,
      "step": 8691,
      "training_step_time": 0.3858475685119629
    },
    {
      "epoch": 5.30517578125e-05,
      "model_forward_time": 0.1166384220123291,
      "step": 8692
    },
    {
      "epoch": 5.30517578125e-05,
      "step": 8692,
      "training_step_time": 0.37938976287841797
    },
    {
      "epoch": 5.3057861328125e-05,
      "model_forward_time": 0.11846351623535156,
      "step": 8693
    },
    {
      "epoch": 5.3057861328125e-05,
      "step": 8693,
      "training_step_time": 0.9373021125793457
    },
    {
      "epoch": 5.306396484375e-05,
      "model_forward_time": 0.1163938045501709,
      "step": 8694
    },
    {
      "epoch": 5.306396484375e-05,
      "step": 8694,
      "training_step_time": 0.4322245121002197
    },
    {
      "epoch": 5.3070068359375e-05,
      "model_forward_time": 0.11649847030639648,
      "step": 8695
    },
    {
      "epoch": 5.3070068359375e-05,
      "step": 8695,
      "training_step_time": 0.4107701778411865
    },
    {
      "epoch": 5.3076171875e-05,
      "model_forward_time": 0.11523890495300293,
      "step": 8696
    },
    {
      "epoch": 5.3076171875e-05,
      "step": 8696,
      "training_step_time": 0.39124035835266113
    },
    {
      "epoch": 5.3082275390625e-05,
      "model_forward_time": 0.11624550819396973,
      "step": 8697
    },
    {
      "epoch": 5.3082275390625e-05,
      "step": 8697,
      "training_step_time": 0.40781497955322266
    },
    {
      "epoch": 5.308837890625e-05,
      "model_forward_time": 0.11532139778137207,
      "step": 8698
    },
    {
      "epoch": 5.308837890625e-05,
      "step": 8698,
      "training_step_time": 0.37603092193603516
    },
    {
      "epoch": 5.3094482421875e-05,
      "model_forward_time": 0.11634230613708496,
      "step": 8699
    },
    {
      "epoch": 5.3094482421875e-05,
      "step": 8699,
      "training_step_time": 0.44501185417175293
    },
    {
      "epoch": 5.31005859375e-05,
      "grad_norm": 0.19482511281967163,
      "learning_rate": 9.755282581475769e-05,
      "loss": 0.0661,
      "step": 8700
    },
    {
      "epoch": 5.31005859375e-05,
      "model_forward_time": 0.11578607559204102,
      "step": 8700
    },
    {
      "epoch": 5.31005859375e-05,
      "step": 8700,
      "training_step_time": 0.37569236755371094
    },
    {
      "epoch": 5.3106689453125e-05,
      "model_forward_time": 0.11830258369445801,
      "step": 8701
    },
    {
      "epoch": 5.3106689453125e-05,
      "step": 8701,
      "training_step_time": 0.3979506492614746
    },
    {
      "epoch": 5.311279296875e-05,
      "model_forward_time": 0.11842632293701172,
      "step": 8702
    },
    {
      "epoch": 5.311279296875e-05,
      "step": 8702,
      "training_step_time": 0.3822641372680664
    },
    {
      "epoch": 5.3118896484375e-05,
      "model_forward_time": 0.11826372146606445,
      "step": 8703
    },
    {
      "epoch": 5.3118896484375e-05,
      "step": 8703,
      "training_step_time": 0.3904721736907959
    },
    {
      "epoch": 5.3125e-05,
      "model_forward_time": 0.1164553165435791,
      "step": 8704
    },
    {
      "epoch": 5.3125e-05,
      "step": 8704,
      "training_step_time": 0.39118123054504395
    },
    {
      "epoch": 5.3131103515625e-05,
      "model_forward_time": 0.11666655540466309,
      "step": 8705
    },
    {
      "epoch": 5.3131103515625e-05,
      "step": 8705,
      "training_step_time": 1.3492023944854736
    },
    {
      "epoch": 5.313720703125e-05,
      "model_forward_time": 0.11491990089416504,
      "step": 8706
    },
    {
      "epoch": 5.313720703125e-05,
      "step": 8706,
      "training_step_time": 0.3660316467285156
    },
    {
      "epoch": 5.3143310546875e-05,
      "model_forward_time": 0.11547541618347168,
      "step": 8707
    },
    {
      "epoch": 5.3143310546875e-05,
      "step": 8707,
      "training_step_time": 0.4193401336669922
    },
    {
      "epoch": 5.31494140625e-05,
      "model_forward_time": 0.11405706405639648,
      "step": 8708
    },
    {
      "epoch": 5.31494140625e-05,
      "step": 8708,
      "training_step_time": 0.4197385311126709
    },
    {
      "epoch": 5.3155517578125e-05,
      "model_forward_time": 0.11486124992370605,
      "step": 8709
    },
    {
      "epoch": 5.3155517578125e-05,
      "step": 8709,
      "training_step_time": 0.3813643455505371
    },
    {
      "epoch": 5.316162109375e-05,
      "grad_norm": 0.21464970707893372,
      "learning_rate": 9.754430275465323e-05,
      "loss": 0.0638,
      "step": 8710
    },
    {
      "epoch": 5.316162109375e-05,
      "model_forward_time": 0.11504197120666504,
      "step": 8710
    },
    {
      "epoch": 5.316162109375e-05,
      "step": 8710,
      "training_step_time": 0.40114307403564453
    },
    {
      "epoch": 5.3167724609375e-05,
      "model_forward_time": 0.11803340911865234,
      "step": 8711
    },
    {
      "epoch": 5.3167724609375e-05,
      "step": 8711,
      "training_step_time": 0.3931436538696289
    },
    {
      "epoch": 5.3173828125e-05,
      "model_forward_time": 0.11562585830688477,
      "step": 8712
    },
    {
      "epoch": 5.3173828125e-05,
      "step": 8712,
      "training_step_time": 0.5053825378417969
    },
    {
      "epoch": 5.3179931640625e-05,
      "model_forward_time": 0.11504673957824707,
      "step": 8713
    },
    {
      "epoch": 5.3179931640625e-05,
      "step": 8713,
      "training_step_time": 0.39223480224609375
    },
    {
      "epoch": 5.318603515625e-05,
      "model_forward_time": 0.11572766304016113,
      "step": 8714
    },
    {
      "epoch": 5.318603515625e-05,
      "step": 8714,
      "training_step_time": 0.3951761722564697
    },
    {
      "epoch": 5.3192138671875e-05,
      "model_forward_time": 0.12324237823486328,
      "step": 8715
    },
    {
      "epoch": 5.3192138671875e-05,
      "step": 8715,
      "training_step_time": 0.4113891124725342
    },
    {
      "epoch": 5.31982421875e-05,
      "model_forward_time": 0.1152646541595459,
      "step": 8716
    },
    {
      "epoch": 5.31982421875e-05,
      "step": 8716,
      "training_step_time": 0.3746757507324219
    },
    {
      "epoch": 5.3204345703125e-05,
      "model_forward_time": 0.11506962776184082,
      "step": 8717
    },
    {
      "epoch": 5.3204345703125e-05,
      "step": 8717,
      "training_step_time": 1.096184492111206
    },
    {
      "epoch": 5.321044921875e-05,
      "model_forward_time": 0.11494588851928711,
      "step": 8718
    },
    {
      "epoch": 5.321044921875e-05,
      "step": 8718,
      "training_step_time": 0.44960451126098633
    },
    {
      "epoch": 5.3216552734375e-05,
      "model_forward_time": 0.11465668678283691,
      "step": 8719
    },
    {
      "epoch": 5.3216552734375e-05,
      "step": 8719,
      "training_step_time": 0.48615431785583496
    },
    {
      "epoch": 5.322265625e-05,
      "grad_norm": 0.28762194514274597,
      "learning_rate": 9.753576525184492e-05,
      "loss": 0.0699,
      "step": 8720
    },
    {
      "epoch": 5.322265625e-05,
      "model_forward_time": 0.11440181732177734,
      "step": 8720
    },
    {
      "epoch": 5.322265625e-05,
      "step": 8720,
      "training_step_time": 0.43264317512512207
    },
    {
      "epoch": 5.3228759765625e-05,
      "model_forward_time": 0.11418366432189941,
      "step": 8721
    },
    {
      "epoch": 5.3228759765625e-05,
      "step": 8721,
      "training_step_time": 0.4210963249206543
    },
    {
      "epoch": 5.323486328125e-05,
      "model_forward_time": 0.11485552787780762,
      "step": 8722
    },
    {
      "epoch": 5.323486328125e-05,
      "step": 8722,
      "training_step_time": 0.39129209518432617
    },
    {
      "epoch": 5.3240966796875e-05,
      "model_forward_time": 0.11566281318664551,
      "step": 8723
    },
    {
      "epoch": 5.3240966796875e-05,
      "step": 8723,
      "training_step_time": 0.3763556480407715
    },
    {
      "epoch": 5.32470703125e-05,
      "model_forward_time": 0.11504435539245605,
      "step": 8724
    },
    {
      "epoch": 5.32470703125e-05,
      "step": 8724,
      "training_step_time": 0.4003031253814697
    },
    {
      "epoch": 5.3253173828125e-05,
      "model_forward_time": 0.1156771183013916,
      "step": 8725
    },
    {
      "epoch": 5.3253173828125e-05,
      "step": 8725,
      "training_step_time": 0.3889279365539551
    },
    {
      "epoch": 5.325927734375e-05,
      "model_forward_time": 0.11563467979431152,
      "step": 8726
    },
    {
      "epoch": 5.325927734375e-05,
      "step": 8726,
      "training_step_time": 0.3973100185394287
    },
    {
      "epoch": 5.3265380859375e-05,
      "model_forward_time": 0.11588788032531738,
      "step": 8727
    },
    {
      "epoch": 5.3265380859375e-05,
      "step": 8727,
      "training_step_time": 0.38047027587890625
    },
    {
      "epoch": 5.3271484375e-05,
      "model_forward_time": 0.11598873138427734,
      "step": 8728
    },
    {
      "epoch": 5.3271484375e-05,
      "step": 8728,
      "training_step_time": 0.3950929641723633
    },
    {
      "epoch": 5.3277587890625e-05,
      "model_forward_time": 0.11586141586303711,
      "step": 8729
    },
    {
      "epoch": 5.3277587890625e-05,
      "step": 8729,
      "training_step_time": 0.65313720703125
    },
    {
      "epoch": 5.328369140625e-05,
      "grad_norm": 0.2538050413131714,
      "learning_rate": 9.752721330892624e-05,
      "loss": 0.0706,
      "step": 8730
    },
    {
      "epoch": 5.328369140625e-05,
      "model_forward_time": 0.11586165428161621,
      "step": 8730
    },
    {
      "epoch": 5.328369140625e-05,
      "step": 8730,
      "training_step_time": 0.3825812339782715
    },
    {
      "epoch": 5.3289794921875e-05,
      "model_forward_time": 0.11556863784790039,
      "step": 8731
    },
    {
      "epoch": 5.3289794921875e-05,
      "step": 8731,
      "training_step_time": 0.40354132652282715
    },
    {
      "epoch": 5.32958984375e-05,
      "model_forward_time": 0.11530756950378418,
      "step": 8732
    },
    {
      "epoch": 5.32958984375e-05,
      "step": 8732,
      "training_step_time": 0.3865644931793213
    },
    {
      "epoch": 5.3302001953125e-05,
      "model_forward_time": 0.11799263954162598,
      "step": 8733
    },
    {
      "epoch": 5.3302001953125e-05,
      "step": 8733,
      "training_step_time": 0.44234299659729004
    },
    {
      "epoch": 5.330810546875e-05,
      "model_forward_time": 0.11662054061889648,
      "step": 8734
    },
    {
      "epoch": 5.330810546875e-05,
      "step": 8734,
      "training_step_time": 0.4498608112335205
    },
    {
      "epoch": 5.3314208984375e-05,
      "model_forward_time": 0.1152799129486084,
      "step": 8735
    },
    {
      "epoch": 5.3314208984375e-05,
      "step": 8735,
      "training_step_time": 0.58262038230896
    },
    {
      "epoch": 5.33203125e-05,
      "model_forward_time": 0.11545252799987793,
      "step": 8736
    },
    {
      "epoch": 5.33203125e-05,
      "step": 8736,
      "training_step_time": 0.378279447555542
    },
    {
      "epoch": 5.3326416015625e-05,
      "model_forward_time": 0.11712527275085449,
      "step": 8737
    },
    {
      "epoch": 5.3326416015625e-05,
      "step": 8737,
      "training_step_time": 0.3947024345397949
    },
    {
      "epoch": 5.333251953125e-05,
      "model_forward_time": 0.11481904983520508,
      "step": 8738
    },
    {
      "epoch": 5.333251953125e-05,
      "step": 8738,
      "training_step_time": 0.38936758041381836
    },
    {
      "epoch": 5.3338623046875e-05,
      "model_forward_time": 0.11557626724243164,
      "step": 8739
    },
    {
      "epoch": 5.3338623046875e-05,
      "step": 8739,
      "training_step_time": 0.40109801292419434
    },
    {
      "epoch": 5.33447265625e-05,
      "grad_norm": 0.25599855184555054,
      "learning_rate": 9.751864692849504e-05,
      "loss": 0.0674,
      "step": 8740
    },
    {
      "epoch": 5.33447265625e-05,
      "model_forward_time": 0.11512351036071777,
      "step": 8740
    },
    {
      "epoch": 5.33447265625e-05,
      "step": 8740,
      "training_step_time": 0.392179012298584
    },
    {
      "epoch": 5.3350830078125e-05,
      "model_forward_time": 0.11537885665893555,
      "step": 8741
    },
    {
      "epoch": 5.3350830078125e-05,
      "step": 8741,
      "training_step_time": 1.1384656429290771
    },
    {
      "epoch": 5.335693359375e-05,
      "model_forward_time": 0.11439371109008789,
      "step": 8742
    },
    {
      "epoch": 5.335693359375e-05,
      "step": 8742,
      "training_step_time": 0.3793058395385742
    },
    {
      "epoch": 5.3363037109375e-05,
      "model_forward_time": 0.11482119560241699,
      "step": 8743
    },
    {
      "epoch": 5.3363037109375e-05,
      "step": 8743,
      "training_step_time": 0.3816049098968506
    },
    {
      "epoch": 5.3369140625e-05,
      "model_forward_time": 0.11455512046813965,
      "step": 8744
    },
    {
      "epoch": 5.3369140625e-05,
      "step": 8744,
      "training_step_time": 0.3833141326904297
    },
    {
      "epoch": 5.3375244140625e-05,
      "model_forward_time": 0.11605978012084961,
      "step": 8745
    },
    {
      "epoch": 5.3375244140625e-05,
      "step": 8745,
      "training_step_time": 0.38251495361328125
    },
    {
      "epoch": 5.338134765625e-05,
      "model_forward_time": 0.11607885360717773,
      "step": 8746
    },
    {
      "epoch": 5.338134765625e-05,
      "step": 8746,
      "training_step_time": 0.43233203887939453
    },
    {
      "epoch": 5.3387451171875e-05,
      "model_forward_time": 0.11551260948181152,
      "step": 8747
    },
    {
      "epoch": 5.3387451171875e-05,
      "step": 8747,
      "training_step_time": 0.7764763832092285
    },
    {
      "epoch": 5.33935546875e-05,
      "model_forward_time": 0.11542725563049316,
      "step": 8748
    },
    {
      "epoch": 5.33935546875e-05,
      "step": 8748,
      "training_step_time": 0.43650102615356445
    },
    {
      "epoch": 5.3399658203125e-05,
      "model_forward_time": 0.11479926109313965,
      "step": 8749
    },
    {
      "epoch": 5.3399658203125e-05,
      "step": 8749,
      "training_step_time": 0.41117119789123535
    },
    {
      "epoch": 5.340576171875e-05,
      "grad_norm": 0.23450924456119537,
      "learning_rate": 9.751006611315356e-05,
      "loss": 0.0693,
      "step": 8750
    },
    {
      "epoch": 5.340576171875e-05,
      "model_forward_time": 0.11485075950622559,
      "step": 8750
    },
    {
      "epoch": 5.340576171875e-05,
      "step": 8750,
      "training_step_time": 0.38369178771972656
    },
    {
      "epoch": 5.3411865234375e-05,
      "model_forward_time": 0.11473894119262695,
      "step": 8751
    },
    {
      "epoch": 5.3411865234375e-05,
      "step": 8751,
      "training_step_time": 0.3916301727294922
    },
    {
      "epoch": 5.341796875e-05,
      "model_forward_time": 0.11467194557189941,
      "step": 8752
    },
    {
      "epoch": 5.341796875e-05,
      "step": 8752,
      "training_step_time": 0.38782215118408203
    },
    {
      "epoch": 5.3424072265625e-05,
      "model_forward_time": 0.1161956787109375,
      "step": 8753
    },
    {
      "epoch": 5.3424072265625e-05,
      "step": 8753,
      "training_step_time": 0.72737717628479
    },
    {
      "epoch": 5.343017578125e-05,
      "model_forward_time": 0.11879706382751465,
      "step": 8754
    },
    {
      "epoch": 5.343017578125e-05,
      "step": 8754,
      "training_step_time": 0.37975239753723145
    },
    {
      "epoch": 5.3436279296875e-05,
      "model_forward_time": 0.11778950691223145,
      "step": 8755
    },
    {
      "epoch": 5.3436279296875e-05,
      "step": 8755,
      "training_step_time": 0.3791482448577881
    },
    {
      "epoch": 5.34423828125e-05,
      "model_forward_time": 0.11846399307250977,
      "step": 8756
    },
    {
      "epoch": 5.34423828125e-05,
      "step": 8756,
      "training_step_time": 0.38933897018432617
    },
    {
      "epoch": 5.3448486328125e-05,
      "model_forward_time": 0.11777663230895996,
      "step": 8757
    },
    {
      "epoch": 5.3448486328125e-05,
      "step": 8757,
      "training_step_time": 0.375321626663208
    },
    {
      "epoch": 5.345458984375e-05,
      "model_forward_time": 0.1175084114074707,
      "step": 8758
    },
    {
      "epoch": 5.345458984375e-05,
      "step": 8758,
      "training_step_time": 0.40359997749328613
    },
    {
      "epoch": 5.3460693359375e-05,
      "model_forward_time": 0.11731314659118652,
      "step": 8759
    },
    {
      "epoch": 5.3460693359375e-05,
      "step": 8759,
      "training_step_time": 1.1176323890686035
    },
    {
      "epoch": 5.3466796875e-05,
      "grad_norm": 0.3348061442375183,
      "learning_rate": 9.750147086550844e-05,
      "loss": 0.0715,
      "step": 8760
    },
    {
      "epoch": 5.3466796875e-05,
      "model_forward_time": 0.1173858642578125,
      "step": 8760
    },
    {
      "epoch": 5.3466796875e-05,
      "step": 8760,
      "training_step_time": 0.45412540435791016
    },
    {
      "epoch": 5.3472900390625e-05,
      "model_forward_time": 0.11503767967224121,
      "step": 8761
    },
    {
      "epoch": 5.3472900390625e-05,
      "step": 8761,
      "training_step_time": 0.40103697776794434
    },
    {
      "epoch": 5.347900390625e-05,
      "model_forward_time": 0.11472630500793457,
      "step": 8762
    },
    {
      "epoch": 5.347900390625e-05,
      "step": 8762,
      "training_step_time": 0.38193607330322266
    },
    {
      "epoch": 5.3485107421875e-05,
      "model_forward_time": 0.11518192291259766,
      "step": 8763
    },
    {
      "epoch": 5.3485107421875e-05,
      "step": 8763,
      "training_step_time": 0.37746644020080566
    },
    {
      "epoch": 5.34912109375e-05,
      "model_forward_time": 0.11423325538635254,
      "step": 8764
    },
    {
      "epoch": 5.34912109375e-05,
      "step": 8764,
      "training_step_time": 0.38881373405456543
    },
    {
      "epoch": 5.3497314453125e-05,
      "model_forward_time": 0.11591458320617676,
      "step": 8765
    },
    {
      "epoch": 5.3497314453125e-05,
      "step": 8765,
      "training_step_time": 0.3996102809906006
    },
    {
      "epoch": 5.350341796875e-05,
      "model_forward_time": 0.11529541015625,
      "step": 8766
    },
    {
      "epoch": 5.350341796875e-05,
      "step": 8766,
      "training_step_time": 0.39608025550842285
    },
    {
      "epoch": 5.3509521484375e-05,
      "model_forward_time": 0.12120437622070312,
      "step": 8767
    },
    {
      "epoch": 5.3509521484375e-05,
      "step": 8767,
      "training_step_time": 0.38736677169799805
    },
    {
      "epoch": 5.3515625e-05,
      "model_forward_time": 0.11943793296813965,
      "step": 8768
    },
    {
      "epoch": 5.3515625e-05,
      "step": 8768,
      "training_step_time": 0.3891122341156006
    },
    {
      "epoch": 5.3521728515625e-05,
      "model_forward_time": 0.11773800849914551,
      "step": 8769
    },
    {
      "epoch": 5.3521728515625e-05,
      "step": 8769,
      "training_step_time": 0.38115572929382324
    },
    {
      "epoch": 5.352783203125e-05,
      "grad_norm": 0.21496833860874176,
      "learning_rate": 9.749286118817067e-05,
      "loss": 0.0651,
      "step": 8770
    },
    {
      "epoch": 5.352783203125e-05,
      "model_forward_time": 0.11876702308654785,
      "step": 8770
    },
    {
      "epoch": 5.352783203125e-05,
      "step": 8770,
      "training_step_time": 0.3859372138977051
    },
    {
      "epoch": 5.3533935546875e-05,
      "model_forward_time": 0.11873292922973633,
      "step": 8771
    },
    {
      "epoch": 5.3533935546875e-05,
      "step": 8771,
      "training_step_time": 0.6521186828613281
    },
    {
      "epoch": 5.35400390625e-05,
      "model_forward_time": 0.1186673641204834,
      "step": 8772
    },
    {
      "epoch": 5.35400390625e-05,
      "step": 8772,
      "training_step_time": 0.44197559356689453
    },
    {
      "epoch": 5.3546142578125e-05,
      "model_forward_time": 0.1173095703125,
      "step": 8773
    },
    {
      "epoch": 5.3546142578125e-05,
      "step": 8773,
      "training_step_time": 0.42766642570495605
    },
    {
      "epoch": 5.355224609375e-05,
      "model_forward_time": 0.11542010307312012,
      "step": 8774
    },
    {
      "epoch": 5.355224609375e-05,
      "step": 8774,
      "training_step_time": 0.4782445430755615
    },
    {
      "epoch": 5.3558349609375e-05,
      "model_forward_time": 0.11471319198608398,
      "step": 8775
    },
    {
      "epoch": 5.3558349609375e-05,
      "step": 8775,
      "training_step_time": 0.4205503463745117
    },
    {
      "epoch": 5.3564453125e-05,
      "model_forward_time": 0.11483597755432129,
      "step": 8776
    },
    {
      "epoch": 5.3564453125e-05,
      "step": 8776,
      "training_step_time": 0.3896019458770752
    },
    {
      "epoch": 5.3570556640625e-05,
      "model_forward_time": 0.11556649208068848,
      "step": 8777
    },
    {
      "epoch": 5.3570556640625e-05,
      "step": 8777,
      "training_step_time": 0.4094877243041992
    },
    {
      "epoch": 5.357666015625e-05,
      "model_forward_time": 0.11509537696838379,
      "step": 8778
    },
    {
      "epoch": 5.357666015625e-05,
      "step": 8778,
      "training_step_time": 0.37410855293273926
    },
    {
      "epoch": 5.3582763671875e-05,
      "model_forward_time": 0.11595392227172852,
      "step": 8779
    },
    {
      "epoch": 5.3582763671875e-05,
      "step": 8779,
      "training_step_time": 0.3894062042236328
    },
    {
      "epoch": 5.35888671875e-05,
      "grad_norm": 0.2900720536708832,
      "learning_rate": 9.748423708375563e-05,
      "loss": 0.0695,
      "step": 8780
    },
    {
      "epoch": 5.35888671875e-05,
      "model_forward_time": 0.11595010757446289,
      "step": 8780
    },
    {
      "epoch": 5.35888671875e-05,
      "step": 8780,
      "training_step_time": 0.3890855312347412
    },
    {
      "epoch": 5.3594970703125e-05,
      "model_forward_time": 0.11586737632751465,
      "step": 8781
    },
    {
      "epoch": 5.3594970703125e-05,
      "step": 8781,
      "training_step_time": 0.38466811180114746
    },
    {
      "epoch": 5.360107421875e-05,
      "model_forward_time": 0.11566448211669922,
      "step": 8782
    },
    {
      "epoch": 5.360107421875e-05,
      "step": 8782,
      "training_step_time": 0.38957738876342773
    },
    {
      "epoch": 5.3607177734375e-05,
      "model_forward_time": 0.11548280715942383,
      "step": 8783
    },
    {
      "epoch": 5.3607177734375e-05,
      "step": 8783,
      "training_step_time": 0.7823333740234375
    },
    {
      "epoch": 5.361328125e-05,
      "model_forward_time": 0.1156313419342041,
      "step": 8784
    },
    {
      "epoch": 5.361328125e-05,
      "step": 8784,
      "training_step_time": 0.3738417625427246
    },
    {
      "epoch": 5.3619384765625e-05,
      "model_forward_time": 0.11492800712585449,
      "step": 8785
    },
    {
      "epoch": 5.3619384765625e-05,
      "step": 8785,
      "training_step_time": 0.3943302631378174
    },
    {
      "epoch": 5.362548828125e-05,
      "model_forward_time": 0.11527824401855469,
      "step": 8786
    },
    {
      "epoch": 5.362548828125e-05,
      "step": 8786,
      "training_step_time": 0.4604976177215576
    },
    {
      "epoch": 5.3631591796875e-05,
      "model_forward_time": 0.1147303581237793,
      "step": 8787
    },
    {
      "epoch": 5.3631591796875e-05,
      "step": 8787,
      "training_step_time": 0.41361021995544434
    },
    {
      "epoch": 5.36376953125e-05,
      "model_forward_time": 0.11503458023071289,
      "step": 8788
    },
    {
      "epoch": 5.36376953125e-05,
      "step": 8788,
      "training_step_time": 0.4785587787628174
    },
    {
      "epoch": 5.3643798828125e-05,
      "model_forward_time": 0.11454916000366211,
      "step": 8789
    },
    {
      "epoch": 5.3643798828125e-05,
      "step": 8789,
      "training_step_time": 0.615631103515625
    },
    {
      "epoch": 5.364990234375e-05,
      "grad_norm": 0.26830536127090454,
      "learning_rate": 9.747559855488313e-05,
      "loss": 0.0735,
      "step": 8790
    },
    {
      "epoch": 5.364990234375e-05,
      "model_forward_time": 0.11443543434143066,
      "step": 8790
    },
    {
      "epoch": 5.364990234375e-05,
      "step": 8790,
      "training_step_time": 0.3713517189025879
    },
    {
      "epoch": 5.3656005859375e-05,
      "model_forward_time": 0.11446046829223633,
      "step": 8791
    },
    {
      "epoch": 5.3656005859375e-05,
      "step": 8791,
      "training_step_time": 0.41639137268066406
    },
    {
      "epoch": 5.3662109375e-05,
      "model_forward_time": 0.11442232131958008,
      "step": 8792
    },
    {
      "epoch": 5.3662109375e-05,
      "step": 8792,
      "training_step_time": 0.3916938304901123
    },
    {
      "epoch": 5.3668212890625e-05,
      "model_forward_time": 0.11529850959777832,
      "step": 8793
    },
    {
      "epoch": 5.3668212890625e-05,
      "step": 8793,
      "training_step_time": 0.3891327381134033
    },
    {
      "epoch": 5.367431640625e-05,
      "model_forward_time": 0.11519527435302734,
      "step": 8794
    },
    {
      "epoch": 5.367431640625e-05,
      "step": 8794,
      "training_step_time": 0.3834846019744873
    },
    {
      "epoch": 5.3680419921875e-05,
      "model_forward_time": 0.11548352241516113,
      "step": 8795
    },
    {
      "epoch": 5.3680419921875e-05,
      "step": 8795,
      "training_step_time": 0.9343862533569336
    },
    {
      "epoch": 5.36865234375e-05,
      "model_forward_time": 0.1177208423614502,
      "step": 8796
    },
    {
      "epoch": 5.36865234375e-05,
      "step": 8796,
      "training_step_time": 0.37699317932128906
    },
    {
      "epoch": 5.3692626953125e-05,
      "model_forward_time": 0.11770749092102051,
      "step": 8797
    },
    {
      "epoch": 5.3692626953125e-05,
      "step": 8797,
      "training_step_time": 0.3910243511199951
    },
    {
      "epoch": 5.369873046875e-05,
      "model_forward_time": 0.11671257019042969,
      "step": 8798
    },
    {
      "epoch": 5.369873046875e-05,
      "step": 8798,
      "training_step_time": 0.37941884994506836
    },
    {
      "epoch": 5.3704833984375e-05,
      "model_forward_time": 0.11748337745666504,
      "step": 8799
    },
    {
      "epoch": 5.3704833984375e-05,
      "step": 8799,
      "training_step_time": 0.43409085273742676
    },
    {
      "epoch": 5.37109375e-05,
      "grad_norm": 0.22071528434753418,
      "learning_rate": 9.746694560417731e-05,
      "loss": 0.066,
      "step": 8800
    },
    {
      "epoch": 5.37109375e-05,
      "model_forward_time": 0.117919921875,
      "step": 8800
    },
    {
      "epoch": 5.37109375e-05,
      "step": 8800,
      "training_step_time": 0.41138577461242676
    },
    {
      "epoch": 5.3717041015625e-05,
      "model_forward_time": 0.1181342601776123,
      "step": 8801
    },
    {
      "epoch": 5.3717041015625e-05,
      "step": 8801,
      "training_step_time": 0.720853328704834
    },
    {
      "epoch": 5.372314453125e-05,
      "model_forward_time": 0.11828351020812988,
      "step": 8802
    },
    {
      "epoch": 5.372314453125e-05,
      "step": 8802,
      "training_step_time": 0.4019455909729004
    },
    {
      "epoch": 5.3729248046875e-05,
      "model_forward_time": 0.11769437789916992,
      "step": 8803
    },
    {
      "epoch": 5.3729248046875e-05,
      "step": 8803,
      "training_step_time": 0.38243842124938965
    },
    {
      "epoch": 5.37353515625e-05,
      "model_forward_time": 0.11625981330871582,
      "step": 8804
    },
    {
      "epoch": 5.37353515625e-05,
      "step": 8804,
      "training_step_time": 0.3859841823577881
    },
    {
      "epoch": 5.3741455078125e-05,
      "model_forward_time": 0.1170036792755127,
      "step": 8805
    },
    {
      "epoch": 5.3741455078125e-05,
      "step": 8805,
      "training_step_time": 0.3797910213470459
    },
    {
      "epoch": 5.374755859375e-05,
      "model_forward_time": 0.11783361434936523,
      "step": 8806
    },
    {
      "epoch": 5.374755859375e-05,
      "step": 8806,
      "training_step_time": 0.39539551734924316
    },
    {
      "epoch": 5.3753662109375e-05,
      "model_forward_time": 0.11516189575195312,
      "step": 8807
    },
    {
      "epoch": 5.3753662109375e-05,
      "step": 8807,
      "training_step_time": 0.9507408142089844
    },
    {
      "epoch": 5.3759765625e-05,
      "model_forward_time": 0.11484861373901367,
      "step": 8808
    },
    {
      "epoch": 5.3759765625e-05,
      "step": 8808,
      "training_step_time": 0.4100816249847412
    },
    {
      "epoch": 5.3765869140625e-05,
      "model_forward_time": 0.11704373359680176,
      "step": 8809
    },
    {
      "epoch": 5.3765869140625e-05,
      "step": 8809,
      "training_step_time": 0.3919074535369873
    },
    {
      "epoch": 5.377197265625e-05,
      "grad_norm": 0.1289728879928589,
      "learning_rate": 9.74582782342667e-05,
      "loss": 0.067,
      "step": 8810
    },
    {
      "epoch": 5.377197265625e-05,
      "model_forward_time": 0.11685705184936523,
      "step": 8810
    },
    {
      "epoch": 5.377197265625e-05,
      "step": 8810,
      "training_step_time": 0.37506103515625
    },
    {
      "epoch": 5.3778076171875e-05,
      "model_forward_time": 0.11693716049194336,
      "step": 8811
    },
    {
      "epoch": 5.3778076171875e-05,
      "step": 8811,
      "training_step_time": 0.38826942443847656
    },
    {
      "epoch": 5.37841796875e-05,
      "model_forward_time": 0.12063956260681152,
      "step": 8812
    },
    {
      "epoch": 5.37841796875e-05,
      "step": 8812,
      "training_step_time": 0.42244958877563477
    },
    {
      "epoch": 5.3790283203125e-05,
      "model_forward_time": 0.11823725700378418,
      "step": 8813
    },
    {
      "epoch": 5.3790283203125e-05,
      "step": 8813,
      "training_step_time": 0.6805622577667236
    },
    {
      "epoch": 5.379638671875e-05,
      "model_forward_time": 0.11953973770141602,
      "step": 8814
    },
    {
      "epoch": 5.379638671875e-05,
      "step": 8814,
      "training_step_time": 0.3913612365722656
    },
    {
      "epoch": 5.3802490234375e-05,
      "model_forward_time": 0.11533474922180176,
      "step": 8815
    },
    {
      "epoch": 5.3802490234375e-05,
      "step": 8815,
      "training_step_time": 0.4107363224029541
    },
    {
      "epoch": 5.380859375e-05,
      "model_forward_time": 0.11510205268859863,
      "step": 8816
    },
    {
      "epoch": 5.380859375e-05,
      "step": 8816,
      "training_step_time": 0.3885037899017334
    },
    {
      "epoch": 5.3814697265625e-05,
      "model_forward_time": 0.1155247688293457,
      "step": 8817
    },
    {
      "epoch": 5.3814697265625e-05,
      "step": 8817,
      "training_step_time": 0.38857269287109375
    },
    {
      "epoch": 5.382080078125e-05,
      "model_forward_time": 0.11830663681030273,
      "step": 8818
    },
    {
      "epoch": 5.382080078125e-05,
      "step": 8818,
      "training_step_time": 0.37662220001220703
    },
    {
      "epoch": 5.3826904296875e-05,
      "model_forward_time": 0.12136316299438477,
      "step": 8819
    },
    {
      "epoch": 5.3826904296875e-05,
      "step": 8819,
      "training_step_time": 0.7735505104064941
    },
    {
      "epoch": 5.38330078125e-05,
      "grad_norm": 0.2761629521846771,
      "learning_rate": 9.744959644778422e-05,
      "loss": 0.0702,
      "step": 8820
    },
    {
      "epoch": 5.38330078125e-05,
      "model_forward_time": 0.11867165565490723,
      "step": 8820
    },
    {
      "epoch": 5.38330078125e-05,
      "step": 8820,
      "training_step_time": 0.37732696533203125
    },
    {
      "epoch": 5.3839111328125e-05,
      "model_forward_time": 0.11711597442626953,
      "step": 8821
    },
    {
      "epoch": 5.3839111328125e-05,
      "step": 8821,
      "training_step_time": 0.42832183837890625
    },
    {
      "epoch": 5.384521484375e-05,
      "model_forward_time": 0.11558389663696289,
      "step": 8822
    },
    {
      "epoch": 5.384521484375e-05,
      "step": 8822,
      "training_step_time": 0.4048748016357422
    },
    {
      "epoch": 5.3851318359375e-05,
      "model_forward_time": 0.11573648452758789,
      "step": 8823
    },
    {
      "epoch": 5.3851318359375e-05,
      "step": 8823,
      "training_step_time": 0.3752772808074951
    },
    {
      "epoch": 5.3857421875e-05,
      "model_forward_time": 0.11510157585144043,
      "step": 8824
    },
    {
      "epoch": 5.3857421875e-05,
      "step": 8824,
      "training_step_time": 0.37763524055480957
    },
    {
      "epoch": 5.3863525390625e-05,
      "model_forward_time": 0.11780166625976562,
      "step": 8825
    },
    {
      "epoch": 5.3863525390625e-05,
      "step": 8825,
      "training_step_time": 1.2134151458740234
    },
    {
      "epoch": 5.386962890625e-05,
      "model_forward_time": 0.1150212287902832,
      "step": 8826
    },
    {
      "epoch": 5.386962890625e-05,
      "step": 8826,
      "training_step_time": 0.38772082328796387
    },
    {
      "epoch": 5.3875732421875e-05,
      "model_forward_time": 0.11481022834777832,
      "step": 8827
    },
    {
      "epoch": 5.3875732421875e-05,
      "step": 8827,
      "training_step_time": 0.418536901473999
    },
    {
      "epoch": 5.38818359375e-05,
      "model_forward_time": 0.11449503898620605,
      "step": 8828
    },
    {
      "epoch": 5.38818359375e-05,
      "step": 8828,
      "training_step_time": 0.3993687629699707
    },
    {
      "epoch": 5.3887939453125e-05,
      "model_forward_time": 0.1146702766418457,
      "step": 8829
    },
    {
      "epoch": 5.3887939453125e-05,
      "step": 8829,
      "training_step_time": 0.3815746307373047
    },
    {
      "epoch": 5.389404296875e-05,
      "grad_norm": 0.17429935932159424,
      "learning_rate": 9.744090024736719e-05,
      "loss": 0.0678,
      "step": 8830
    },
    {
      "epoch": 5.389404296875e-05,
      "model_forward_time": 0.11508607864379883,
      "step": 8830
    },
    {
      "epoch": 5.389404296875e-05,
      "step": 8830,
      "training_step_time": 0.37778639793395996
    },
    {
      "epoch": 5.3900146484375e-05,
      "model_forward_time": 0.11643695831298828,
      "step": 8831
    },
    {
      "epoch": 5.3900146484375e-05,
      "step": 8831,
      "training_step_time": 0.5143370628356934
    },
    {
      "epoch": 5.390625e-05,
      "model_forward_time": 0.11578106880187988,
      "step": 8832
    },
    {
      "epoch": 5.390625e-05,
      "step": 8832,
      "training_step_time": 0.37311577796936035
    },
    {
      "epoch": 5.3912353515625e-05,
      "model_forward_time": 0.11575937271118164,
      "step": 8833
    },
    {
      "epoch": 5.3912353515625e-05,
      "step": 8833,
      "training_step_time": 0.4243168830871582
    },
    {
      "epoch": 5.391845703125e-05,
      "model_forward_time": 0.11621689796447754,
      "step": 8834
    },
    {
      "epoch": 5.391845703125e-05,
      "step": 8834,
      "training_step_time": 0.4527726173400879
    },
    {
      "epoch": 5.3924560546875e-05,
      "model_forward_time": 0.11583542823791504,
      "step": 8835
    },
    {
      "epoch": 5.3924560546875e-05,
      "step": 8835,
      "training_step_time": 0.39285922050476074
    },
    {
      "epoch": 5.39306640625e-05,
      "model_forward_time": 0.11635613441467285,
      "step": 8836
    },
    {
      "epoch": 5.39306640625e-05,
      "step": 8836,
      "training_step_time": 0.39205002784729004
    },
    {
      "epoch": 5.3936767578125e-05,
      "model_forward_time": 0.11592221260070801,
      "step": 8837
    },
    {
      "epoch": 5.3936767578125e-05,
      "step": 8837,
      "training_step_time": 1.0191216468811035
    },
    {
      "epoch": 5.394287109375e-05,
      "model_forward_time": 0.11521267890930176,
      "step": 8838
    },
    {
      "epoch": 5.394287109375e-05,
      "step": 8838,
      "training_step_time": 0.4501204490661621
    },
    {
      "epoch": 5.3948974609375e-05,
      "model_forward_time": 0.11478042602539062,
      "step": 8839
    },
    {
      "epoch": 5.3948974609375e-05,
      "step": 8839,
      "training_step_time": 0.4225749969482422
    },
    {
      "epoch": 5.3955078125e-05,
      "grad_norm": 0.22161339223384857,
      "learning_rate": 9.743218963565725e-05,
      "loss": 0.063,
      "step": 8840
    },
    {
      "epoch": 5.3955078125e-05,
      "model_forward_time": 0.11474108695983887,
      "step": 8840
    },
    {
      "epoch": 5.3955078125e-05,
      "step": 8840,
      "training_step_time": 0.3913133144378662
    },
    {
      "epoch": 5.3961181640625e-05,
      "model_forward_time": 0.11470746994018555,
      "step": 8841
    },
    {
      "epoch": 5.3961181640625e-05,
      "step": 8841,
      "training_step_time": 0.3850100040435791
    },
    {
      "epoch": 5.396728515625e-05,
      "model_forward_time": 0.11477279663085938,
      "step": 8842
    },
    {
      "epoch": 5.396728515625e-05,
      "step": 8842,
      "training_step_time": 0.38921594619750977
    },
    {
      "epoch": 5.3973388671875e-05,
      "model_forward_time": 0.1156468391418457,
      "step": 8843
    },
    {
      "epoch": 5.3973388671875e-05,
      "step": 8843,
      "training_step_time": 0.6630575656890869
    },
    {
      "epoch": 5.39794921875e-05,
      "model_forward_time": 0.11528921127319336,
      "step": 8844
    },
    {
      "epoch": 5.39794921875e-05,
      "step": 8844,
      "training_step_time": 0.3763270378112793
    },
    {
      "epoch": 5.3985595703125e-05,
      "model_forward_time": 0.1155846118927002,
      "step": 8845
    },
    {
      "epoch": 5.3985595703125e-05,
      "step": 8845,
      "training_step_time": 0.4379997253417969
    },
    {
      "epoch": 5.399169921875e-05,
      "model_forward_time": 0.11496233940124512,
      "step": 8846
    },
    {
      "epoch": 5.399169921875e-05,
      "step": 8846,
      "training_step_time": 0.38842296600341797
    },
    {
      "epoch": 5.3997802734375e-05,
      "model_forward_time": 0.11526083946228027,
      "step": 8847
    },
    {
      "epoch": 5.3997802734375e-05,
      "step": 8847,
      "training_step_time": 0.39307641983032227
    },
    {
      "epoch": 5.400390625e-05,
      "model_forward_time": 0.11651086807250977,
      "step": 8848
    },
    {
      "epoch": 5.400390625e-05,
      "step": 8848,
      "training_step_time": 0.3849647045135498
    },
    {
      "epoch": 5.4010009765625e-05,
      "model_forward_time": 0.11632585525512695,
      "step": 8849
    },
    {
      "epoch": 5.4010009765625e-05,
      "step": 8849,
      "training_step_time": 0.942500114440918
    },
    {
      "epoch": 5.401611328125e-05,
      "grad_norm": 0.22391924262046814,
      "learning_rate": 9.742346461530048e-05,
      "loss": 0.0693,
      "step": 8850
    },
    {
      "epoch": 5.401611328125e-05,
      "model_forward_time": 0.11528396606445312,
      "step": 8850
    },
    {
      "epoch": 5.401611328125e-05,
      "step": 8850,
      "training_step_time": 0.42702484130859375
    },
    {
      "epoch": 5.4022216796875e-05,
      "model_forward_time": 0.11612510681152344,
      "step": 8851
    },
    {
      "epoch": 5.4022216796875e-05,
      "step": 8851,
      "training_step_time": 0.43340349197387695
    },
    {
      "epoch": 5.40283203125e-05,
      "model_forward_time": 0.11527156829833984,
      "step": 8852
    },
    {
      "epoch": 5.40283203125e-05,
      "step": 8852,
      "training_step_time": 0.4385526180267334
    },
    {
      "epoch": 5.4034423828125e-05,
      "model_forward_time": 0.11491250991821289,
      "step": 8853
    },
    {
      "epoch": 5.4034423828125e-05,
      "step": 8853,
      "training_step_time": 0.4655442237854004
    },
    {
      "epoch": 5.404052734375e-05,
      "model_forward_time": 0.11545729637145996,
      "step": 8854
    },
    {
      "epoch": 5.404052734375e-05,
      "step": 8854,
      "training_step_time": 0.38927340507507324
    },
    {
      "epoch": 5.4046630859375e-05,
      "model_forward_time": 0.11522698402404785,
      "step": 8855
    },
    {
      "epoch": 5.4046630859375e-05,
      "step": 8855,
      "training_step_time": 0.39252138137817383
    },
    {
      "epoch": 5.4052734375e-05,
      "model_forward_time": 0.11609482765197754,
      "step": 8856
    },
    {
      "epoch": 5.4052734375e-05,
      "step": 8856,
      "training_step_time": 0.38649749755859375
    },
    {
      "epoch": 5.4058837890625e-05,
      "model_forward_time": 0.11533331871032715,
      "step": 8857
    },
    {
      "epoch": 5.4058837890625e-05,
      "step": 8857,
      "training_step_time": 0.3927772045135498
    },
    {
      "epoch": 5.406494140625e-05,
      "model_forward_time": 0.11716151237487793,
      "step": 8858
    },
    {
      "epoch": 5.406494140625e-05,
      "step": 8858,
      "training_step_time": 0.3959689140319824
    },
    {
      "epoch": 5.4071044921875e-05,
      "model_forward_time": 0.11655211448669434,
      "step": 8859
    },
    {
      "epoch": 5.4071044921875e-05,
      "step": 8859,
      "training_step_time": 0.4160456657409668
    },
    {
      "epoch": 5.40771484375e-05,
      "grad_norm": 0.2011716663837433,
      "learning_rate": 9.74147251889473e-05,
      "loss": 0.067,
      "step": 8860
    },
    {
      "epoch": 5.40771484375e-05,
      "model_forward_time": 0.11604166030883789,
      "step": 8860
    },
    {
      "epoch": 5.40771484375e-05,
      "step": 8860,
      "training_step_time": 0.3935091495513916
    },
    {
      "epoch": 5.4083251953125e-05,
      "model_forward_time": 0.11593389511108398,
      "step": 8861
    },
    {
      "epoch": 5.4083251953125e-05,
      "step": 8861,
      "training_step_time": 0.6821138858795166
    },
    {
      "epoch": 5.408935546875e-05,
      "model_forward_time": 0.11519241333007812,
      "step": 8862
    },
    {
      "epoch": 5.408935546875e-05,
      "step": 8862,
      "training_step_time": 0.37656402587890625
    },
    {
      "epoch": 5.4095458984375e-05,
      "model_forward_time": 0.11576056480407715,
      "step": 8863
    },
    {
      "epoch": 5.4095458984375e-05,
      "step": 8863,
      "training_step_time": 0.40247607231140137
    },
    {
      "epoch": 5.41015625e-05,
      "model_forward_time": 0.11593055725097656,
      "step": 8864
    },
    {
      "epoch": 5.41015625e-05,
      "step": 8864,
      "training_step_time": 0.3964526653289795
    },
    {
      "epoch": 5.4107666015625e-05,
      "model_forward_time": 0.11516666412353516,
      "step": 8865
    },
    {
      "epoch": 5.4107666015625e-05,
      "step": 8865,
      "training_step_time": 0.3666510581970215
    },
    {
      "epoch": 5.411376953125e-05,
      "model_forward_time": 0.11548733711242676,
      "step": 8866
    },
    {
      "epoch": 5.411376953125e-05,
      "step": 8866,
      "training_step_time": 0.44704437255859375
    },
    {
      "epoch": 5.4119873046875e-05,
      "model_forward_time": 0.11562514305114746,
      "step": 8867
    },
    {
      "epoch": 5.4119873046875e-05,
      "step": 8867,
      "training_step_time": 0.4908483028411865
    },
    {
      "epoch": 5.41259765625e-05,
      "model_forward_time": 0.11580204963684082,
      "step": 8868
    },
    {
      "epoch": 5.41259765625e-05,
      "step": 8868,
      "training_step_time": 0.38001227378845215
    },
    {
      "epoch": 5.4132080078125e-05,
      "model_forward_time": 0.11581802368164062,
      "step": 8869
    },
    {
      "epoch": 5.4132080078125e-05,
      "step": 8869,
      "training_step_time": 0.39226651191711426
    },
    {
      "epoch": 5.413818359375e-05,
      "grad_norm": 0.34431999921798706,
      "learning_rate": 9.740597135925253e-05,
      "loss": 0.0619,
      "step": 8870
    },
    {
      "epoch": 5.413818359375e-05,
      "model_forward_time": 0.11570525169372559,
      "step": 8870
    },
    {
      "epoch": 5.413818359375e-05,
      "step": 8870,
      "training_step_time": 0.3853952884674072
    },
    {
      "epoch": 5.4144287109375e-05,
      "model_forward_time": 0.11578631401062012,
      "step": 8871
    },
    {
      "epoch": 5.4144287109375e-05,
      "step": 8871,
      "training_step_time": 0.4252464771270752
    },
    {
      "epoch": 5.4150390625e-05,
      "model_forward_time": 0.11633086204528809,
      "step": 8872
    },
    {
      "epoch": 5.4150390625e-05,
      "step": 8872,
      "training_step_time": 0.4134480953216553
    },
    {
      "epoch": 5.4156494140625e-05,
      "model_forward_time": 0.11639237403869629,
      "step": 8873
    },
    {
      "epoch": 5.4156494140625e-05,
      "step": 8873,
      "training_step_time": 0.7145757675170898
    },
    {
      "epoch": 5.416259765625e-05,
      "model_forward_time": 0.11532163619995117,
      "step": 8874
    },
    {
      "epoch": 5.416259765625e-05,
      "step": 8874,
      "training_step_time": 0.37688493728637695
    },
    {
      "epoch": 5.4168701171875e-05,
      "model_forward_time": 0.1166071891784668,
      "step": 8875
    },
    {
      "epoch": 5.4168701171875e-05,
      "step": 8875,
      "training_step_time": 0.391521692276001
    },
    {
      "epoch": 5.41748046875e-05,
      "model_forward_time": 0.1153407096862793,
      "step": 8876
    },
    {
      "epoch": 5.41748046875e-05,
      "step": 8876,
      "training_step_time": 0.4008631706237793
    },
    {
      "epoch": 5.4180908203125e-05,
      "model_forward_time": 0.1149904727935791,
      "step": 8877
    },
    {
      "epoch": 5.4180908203125e-05,
      "step": 8877,
      "training_step_time": 0.402146577835083
    },
    {
      "epoch": 5.418701171875e-05,
      "model_forward_time": 0.11664223670959473,
      "step": 8878
    },
    {
      "epoch": 5.418701171875e-05,
      "step": 8878,
      "training_step_time": 0.4120657444000244
    },
    {
      "epoch": 5.4193115234375e-05,
      "model_forward_time": 0.11756134033203125,
      "step": 8879
    },
    {
      "epoch": 5.4193115234375e-05,
      "step": 8879,
      "training_step_time": 0.41202783584594727
    },
    {
      "epoch": 5.419921875e-05,
      "grad_norm": 0.24584713578224182,
      "learning_rate": 9.739720312887535e-05,
      "loss": 0.0674,
      "step": 8880
    },
    {
      "epoch": 5.419921875e-05,
      "model_forward_time": 0.11549043655395508,
      "step": 8880
    },
    {
      "epoch": 5.419921875e-05,
      "step": 8880,
      "training_step_time": 0.4511096477508545
    },
    {
      "epoch": 5.4205322265625e-05,
      "model_forward_time": 0.11602354049682617,
      "step": 8881
    },
    {
      "epoch": 5.4205322265625e-05,
      "step": 8881,
      "training_step_time": 0.4378495216369629
    },
    {
      "epoch": 5.421142578125e-05,
      "model_forward_time": 0.11741304397583008,
      "step": 8882
    },
    {
      "epoch": 5.421142578125e-05,
      "step": 8882,
      "training_step_time": 0.39220333099365234
    },
    {
      "epoch": 5.4217529296875e-05,
      "model_forward_time": 0.11584115028381348,
      "step": 8883
    },
    {
      "epoch": 5.4217529296875e-05,
      "step": 8883,
      "training_step_time": 0.3899805545806885
    },
    {
      "epoch": 5.42236328125e-05,
      "model_forward_time": 0.11570286750793457,
      "step": 8884
    },
    {
      "epoch": 5.42236328125e-05,
      "step": 8884,
      "training_step_time": 0.3981742858886719
    },
    {
      "epoch": 5.4229736328125e-05,
      "model_forward_time": 0.11517000198364258,
      "step": 8885
    },
    {
      "epoch": 5.4229736328125e-05,
      "step": 8885,
      "training_step_time": 0.682178258895874
    },
    {
      "epoch": 5.423583984375e-05,
      "model_forward_time": 0.1158149242401123,
      "step": 8886
    },
    {
      "epoch": 5.423583984375e-05,
      "step": 8886,
      "training_step_time": 0.3723785877227783
    },
    {
      "epoch": 5.4241943359375e-05,
      "model_forward_time": 0.11573553085327148,
      "step": 8887
    },
    {
      "epoch": 5.4241943359375e-05,
      "step": 8887,
      "training_step_time": 0.3925137519836426
    },
    {
      "epoch": 5.4248046875e-05,
      "model_forward_time": 0.11522269248962402,
      "step": 8888
    },
    {
      "epoch": 5.4248046875e-05,
      "step": 8888,
      "training_step_time": 0.3934001922607422
    },
    {
      "epoch": 5.4254150390625e-05,
      "model_forward_time": 0.11548423767089844,
      "step": 8889
    },
    {
      "epoch": 5.4254150390625e-05,
      "step": 8889,
      "training_step_time": 0.39789700508117676
    },
    {
      "epoch": 5.426025390625e-05,
      "grad_norm": 0.2946909964084625,
      "learning_rate": 9.73884205004793e-05,
      "loss": 0.0618,
      "step": 8890
    },
    {
      "epoch": 5.426025390625e-05,
      "model_forward_time": 0.11566948890686035,
      "step": 8890
    },
    {
      "epoch": 5.426025390625e-05,
      "step": 8890,
      "training_step_time": 0.39078378677368164
    },
    {
      "epoch": 5.4266357421875e-05,
      "model_forward_time": 0.11551976203918457,
      "step": 8891
    },
    {
      "epoch": 5.4266357421875e-05,
      "step": 8891,
      "training_step_time": 0.7220637798309326
    },
    {
      "epoch": 5.42724609375e-05,
      "model_forward_time": 0.1151435375213623,
      "step": 8892
    },
    {
      "epoch": 5.42724609375e-05,
      "step": 8892,
      "training_step_time": 0.4493081569671631
    },
    {
      "epoch": 5.4278564453125e-05,
      "model_forward_time": 0.11440205574035645,
      "step": 8893
    },
    {
      "epoch": 5.4278564453125e-05,
      "step": 8893,
      "training_step_time": 0.454089879989624
    },
    {
      "epoch": 5.428466796875e-05,
      "model_forward_time": 0.11464452743530273,
      "step": 8894
    },
    {
      "epoch": 5.428466796875e-05,
      "step": 8894,
      "training_step_time": 0.4395792484283447
    },
    {
      "epoch": 5.4290771484375e-05,
      "model_forward_time": 0.11472916603088379,
      "step": 8895
    },
    {
      "epoch": 5.4290771484375e-05,
      "step": 8895,
      "training_step_time": 0.4073004722595215
    },
    {
      "epoch": 5.4296875e-05,
      "model_forward_time": 0.11488485336303711,
      "step": 8896
    },
    {
      "epoch": 5.4296875e-05,
      "step": 8896,
      "training_step_time": 0.4384152889251709
    },
    {
      "epoch": 5.4302978515625e-05,
      "model_forward_time": 0.11530709266662598,
      "step": 8897
    },
    {
      "epoch": 5.4302978515625e-05,
      "step": 8897,
      "training_step_time": 0.9988863468170166
    },
    {
      "epoch": 5.430908203125e-05,
      "model_forward_time": 0.11478567123413086,
      "step": 8898
    },
    {
      "epoch": 5.430908203125e-05,
      "step": 8898,
      "training_step_time": 0.36783862113952637
    },
    {
      "epoch": 5.4315185546875e-05,
      "model_forward_time": 0.11452603340148926,
      "step": 8899
    },
    {
      "epoch": 5.4315185546875e-05,
      "step": 8899,
      "training_step_time": 0.3874223232269287
    },
    {
      "epoch": 5.43212890625e-05,
      "grad_norm": 0.31378307938575745,
      "learning_rate": 9.737962347673231e-05,
      "loss": 0.071,
      "step": 8900
    },
    {
      "epoch": 5.43212890625e-05,
      "model_forward_time": 0.11666345596313477,
      "step": 8900
    },
    {
      "epoch": 5.43212890625e-05,
      "step": 8900,
      "training_step_time": 0.3919517993927002
    },
    {
      "epoch": 5.4327392578125e-05,
      "model_forward_time": 0.1168372631072998,
      "step": 8901
    },
    {
      "epoch": 5.4327392578125e-05,
      "step": 8901,
      "training_step_time": 0.37866640090942383
    },
    {
      "epoch": 5.433349609375e-05,
      "model_forward_time": 0.11675190925598145,
      "step": 8902
    },
    {
      "epoch": 5.433349609375e-05,
      "step": 8902,
      "training_step_time": 0.3743255138397217
    },
    {
      "epoch": 5.4339599609375e-05,
      "model_forward_time": 0.11528801918029785,
      "step": 8903
    },
    {
      "epoch": 5.4339599609375e-05,
      "step": 8903,
      "training_step_time": 0.42007946968078613
    },
    {
      "epoch": 5.4345703125e-05,
      "model_forward_time": 0.11522841453552246,
      "step": 8904
    },
    {
      "epoch": 5.4345703125e-05,
      "step": 8904,
      "training_step_time": 0.3886072635650635
    },
    {
      "epoch": 5.4351806640625e-05,
      "model_forward_time": 0.11536169052124023,
      "step": 8905
    },
    {
      "epoch": 5.4351806640625e-05,
      "step": 8905,
      "training_step_time": 0.4097898006439209
    },
    {
      "epoch": 5.435791015625e-05,
      "model_forward_time": 0.11501741409301758,
      "step": 8906
    },
    {
      "epoch": 5.435791015625e-05,
      "step": 8906,
      "training_step_time": 0.40466976165771484
    },
    {
      "epoch": 5.4364013671875e-05,
      "model_forward_time": 0.11559700965881348,
      "step": 8907
    },
    {
      "epoch": 5.4364013671875e-05,
      "step": 8907,
      "training_step_time": 0.42780470848083496
    },
    {
      "epoch": 5.43701171875e-05,
      "model_forward_time": 0.11567902565002441,
      "step": 8908
    },
    {
      "epoch": 5.43701171875e-05,
      "step": 8908,
      "training_step_time": 0.45964765548706055
    },
    {
      "epoch": 5.4376220703125e-05,
      "model_forward_time": 0.1162106990814209,
      "step": 8909
    },
    {
      "epoch": 5.4376220703125e-05,
      "step": 8909,
      "training_step_time": 0.3960535526275635
    },
    {
      "epoch": 5.438232421875e-05,
      "grad_norm": 0.2065199613571167,
      "learning_rate": 9.73708120603067e-05,
      "loss": 0.0621,
      "step": 8910
    },
    {
      "epoch": 5.438232421875e-05,
      "model_forward_time": 0.11728930473327637,
      "step": 8910
    },
    {
      "epoch": 5.438232421875e-05,
      "step": 8910,
      "training_step_time": 0.40679097175598145
    },
    {
      "epoch": 5.4388427734375e-05,
      "model_forward_time": 0.11568903923034668,
      "step": 8911
    },
    {
      "epoch": 5.4388427734375e-05,
      "step": 8911,
      "training_step_time": 0.3997800350189209
    },
    {
      "epoch": 5.439453125e-05,
      "model_forward_time": 0.1185908317565918,
      "step": 8912
    },
    {
      "epoch": 5.439453125e-05,
      "step": 8912,
      "training_step_time": 0.381760835647583
    },
    {
      "epoch": 5.4400634765625e-05,
      "model_forward_time": 0.1173250675201416,
      "step": 8913
    },
    {
      "epoch": 5.4400634765625e-05,
      "step": 8913,
      "training_step_time": 0.37975049018859863
    },
    {
      "epoch": 5.440673828125e-05,
      "model_forward_time": 0.11817359924316406,
      "step": 8914
    },
    {
      "epoch": 5.440673828125e-05,
      "step": 8914,
      "training_step_time": 0.3891324996948242
    },
    {
      "epoch": 5.4412841796875e-05,
      "model_forward_time": 0.11782622337341309,
      "step": 8915
    },
    {
      "epoch": 5.4412841796875e-05,
      "step": 8915,
      "training_step_time": 0.6201345920562744
    },
    {
      "epoch": 5.44189453125e-05,
      "model_forward_time": 0.11878347396850586,
      "step": 8916
    },
    {
      "epoch": 5.44189453125e-05,
      "step": 8916,
      "training_step_time": 0.3843972682952881
    },
    {
      "epoch": 5.4425048828125e-05,
      "model_forward_time": 0.11739110946655273,
      "step": 8917
    },
    {
      "epoch": 5.4425048828125e-05,
      "step": 8917,
      "training_step_time": 0.3816490173339844
    },
    {
      "epoch": 5.443115234375e-05,
      "model_forward_time": 0.11911821365356445,
      "step": 8918
    },
    {
      "epoch": 5.443115234375e-05,
      "step": 8918,
      "training_step_time": 0.40172696113586426
    },
    {
      "epoch": 5.4437255859375e-05,
      "model_forward_time": 0.1195986270904541,
      "step": 8919
    },
    {
      "epoch": 5.4437255859375e-05,
      "step": 8919,
      "training_step_time": 0.3979685306549072
    },
    {
      "epoch": 5.4443359375e-05,
      "grad_norm": 0.20168857276439667,
      "learning_rate": 9.736198625387916e-05,
      "loss": 0.0646,
      "step": 8920
    },
    {
      "epoch": 5.4443359375e-05,
      "model_forward_time": 0.11627936363220215,
      "step": 8920
    },
    {
      "epoch": 5.4443359375e-05,
      "step": 8920,
      "training_step_time": 0.47863125801086426
    },
    {
      "epoch": 5.4449462890625e-05,
      "model_forward_time": 0.11667537689208984,
      "step": 8921
    },
    {
      "epoch": 5.4449462890625e-05,
      "step": 8921,
      "training_step_time": 0.4772529602050781
    },
    {
      "epoch": 5.445556640625e-05,
      "model_forward_time": 0.11766695976257324,
      "step": 8922
    },
    {
      "epoch": 5.445556640625e-05,
      "step": 8922,
      "training_step_time": 0.47666168212890625
    },
    {
      "epoch": 5.4461669921875e-05,
      "model_forward_time": 0.11602282524108887,
      "step": 8923
    },
    {
      "epoch": 5.4461669921875e-05,
      "step": 8923,
      "training_step_time": 0.42238521575927734
    },
    {
      "epoch": 5.44677734375e-05,
      "model_forward_time": 0.11593365669250488,
      "step": 8924
    },
    {
      "epoch": 5.44677734375e-05,
      "step": 8924,
      "training_step_time": 0.38994693756103516
    },
    {
      "epoch": 5.4473876953125e-05,
      "model_forward_time": 0.11542296409606934,
      "step": 8925
    },
    {
      "epoch": 5.4473876953125e-05,
      "step": 8925,
      "training_step_time": 0.3882303237915039
    },
    {
      "epoch": 5.447998046875e-05,
      "model_forward_time": 0.11592388153076172,
      "step": 8926
    },
    {
      "epoch": 5.447998046875e-05,
      "step": 8926,
      "training_step_time": 0.3907203674316406
    },
    {
      "epoch": 5.4486083984375e-05,
      "model_forward_time": 0.11506891250610352,
      "step": 8927
    },
    {
      "epoch": 5.4486083984375e-05,
      "step": 8927,
      "training_step_time": 0.3987922668457031
    },
    {
      "epoch": 5.44921875e-05,
      "model_forward_time": 0.11659932136535645,
      "step": 8928
    },
    {
      "epoch": 5.44921875e-05,
      "step": 8928,
      "training_step_time": 0.397418737411499
    },
    {
      "epoch": 5.4498291015625e-05,
      "model_forward_time": 0.11602115631103516,
      "step": 8929
    },
    {
      "epoch": 5.4498291015625e-05,
      "step": 8929,
      "training_step_time": 0.3974018096923828
    },
    {
      "epoch": 5.450439453125e-05,
      "grad_norm": 0.20964482426643372,
      "learning_rate": 9.735314606013068e-05,
      "loss": 0.0593,
      "step": 8930
    },
    {
      "epoch": 5.450439453125e-05,
      "model_forward_time": 0.1169893741607666,
      "step": 8930
    },
    {
      "epoch": 5.450439453125e-05,
      "step": 8930,
      "training_step_time": 0.3882899284362793
    },
    {
      "epoch": 5.4510498046875e-05,
      "model_forward_time": 0.11554169654846191,
      "step": 8931
    },
    {
      "epoch": 5.4510498046875e-05,
      "step": 8931,
      "training_step_time": 0.4077944755554199
    },
    {
      "epoch": 5.45166015625e-05,
      "model_forward_time": 0.11579370498657227,
      "step": 8932
    },
    {
      "epoch": 5.45166015625e-05,
      "step": 8932,
      "training_step_time": 0.4004650115966797
    },
    {
      "epoch": 5.4522705078125e-05,
      "model_forward_time": 0.11647963523864746,
      "step": 8933
    },
    {
      "epoch": 5.4522705078125e-05,
      "step": 8933,
      "training_step_time": 0.47798752784729004
    },
    {
      "epoch": 5.452880859375e-05,
      "model_forward_time": 0.11503815650939941,
      "step": 8934
    },
    {
      "epoch": 5.452880859375e-05,
      "step": 8934,
      "training_step_time": 0.4466860294342041
    },
    {
      "epoch": 5.4534912109375e-05,
      "model_forward_time": 0.11518239974975586,
      "step": 8935
    },
    {
      "epoch": 5.4534912109375e-05,
      "step": 8935,
      "training_step_time": 0.3942074775695801
    },
    {
      "epoch": 5.4541015625e-05,
      "model_forward_time": 0.11673521995544434,
      "step": 8936
    },
    {
      "epoch": 5.4541015625e-05,
      "step": 8936,
      "training_step_time": 0.4437136650085449
    },
    {
      "epoch": 5.4547119140625e-05,
      "model_forward_time": 0.1155998706817627,
      "step": 8937
    },
    {
      "epoch": 5.4547119140625e-05,
      "step": 8937,
      "training_step_time": 0.47172069549560547
    },
    {
      "epoch": 5.455322265625e-05,
      "model_forward_time": 0.11691713333129883,
      "step": 8938
    },
    {
      "epoch": 5.455322265625e-05,
      "step": 8938,
      "training_step_time": 0.3898277282714844
    },
    {
      "epoch": 5.4559326171875e-05,
      "model_forward_time": 0.11661553382873535,
      "step": 8939
    },
    {
      "epoch": 5.4559326171875e-05,
      "step": 8939,
      "training_step_time": 0.39075422286987305
    },
    {
      "epoch": 5.45654296875e-05,
      "grad_norm": 0.2320893108844757,
      "learning_rate": 9.734429148174675e-05,
      "loss": 0.0654,
      "step": 8940
    },
    {
      "epoch": 5.45654296875e-05,
      "model_forward_time": 0.1149435043334961,
      "step": 8940
    },
    {
      "epoch": 5.45654296875e-05,
      "step": 8940,
      "training_step_time": 0.38094401359558105
    },
    {
      "epoch": 5.4571533203125e-05,
      "model_forward_time": 0.11591958999633789,
      "step": 8941
    },
    {
      "epoch": 5.4571533203125e-05,
      "step": 8941,
      "training_step_time": 0.39102935791015625
    },
    {
      "epoch": 5.457763671875e-05,
      "model_forward_time": 0.11575150489807129,
      "step": 8942
    },
    {
      "epoch": 5.457763671875e-05,
      "step": 8942,
      "training_step_time": 0.40018534660339355
    },
    {
      "epoch": 5.4583740234375e-05,
      "model_forward_time": 0.11596441268920898,
      "step": 8943
    },
    {
      "epoch": 5.4583740234375e-05,
      "step": 8943,
      "training_step_time": 0.6289498805999756
    },
    {
      "epoch": 5.458984375e-05,
      "model_forward_time": 0.11565876007080078,
      "step": 8944
    },
    {
      "epoch": 5.458984375e-05,
      "step": 8944,
      "training_step_time": 0.3832981586456299
    },
    {
      "epoch": 5.4595947265625e-05,
      "model_forward_time": 0.11574339866638184,
      "step": 8945
    },
    {
      "epoch": 5.4595947265625e-05,
      "step": 8945,
      "training_step_time": 0.4100074768066406
    },
    {
      "epoch": 5.460205078125e-05,
      "model_forward_time": 0.11775875091552734,
      "step": 8946
    },
    {
      "epoch": 5.460205078125e-05,
      "step": 8946,
      "training_step_time": 0.3986201286315918
    },
    {
      "epoch": 5.4608154296875e-05,
      "model_forward_time": 0.1183321475982666,
      "step": 8947
    },
    {
      "epoch": 5.4608154296875e-05,
      "step": 8947,
      "training_step_time": 0.4089493751525879
    },
    {
      "epoch": 5.46142578125e-05,
      "model_forward_time": 0.11918902397155762,
      "step": 8948
    },
    {
      "epoch": 5.46142578125e-05,
      "step": 8948,
      "training_step_time": 0.3823113441467285
    },
    {
      "epoch": 5.4620361328125e-05,
      "model_forward_time": 0.11915135383605957,
      "step": 8949
    },
    {
      "epoch": 5.4620361328125e-05,
      "step": 8949,
      "training_step_time": 0.6473162174224854
    },
    {
      "epoch": 5.462646484375e-05,
      "grad_norm": 0.2217789888381958,
      "learning_rate": 9.733542252141711e-05,
      "loss": 0.0654,
      "step": 8950
    },
    {
      "epoch": 5.462646484375e-05,
      "model_forward_time": 0.12236261367797852,
      "step": 8950
    },
    {
      "epoch": 5.462646484375e-05,
      "step": 8950,
      "training_step_time": 0.4469783306121826
    },
    {
      "epoch": 5.4632568359375e-05,
      "model_forward_time": 0.12299084663391113,
      "step": 8951
    },
    {
      "epoch": 5.4632568359375e-05,
      "step": 8951,
      "training_step_time": 0.4471628665924072
    },
    {
      "epoch": 5.4638671875e-05,
      "model_forward_time": 0.11568069458007812,
      "step": 8952
    },
    {
      "epoch": 5.4638671875e-05,
      "step": 8952,
      "training_step_time": 0.3764679431915283
    },
    {
      "epoch": 5.4644775390625e-05,
      "model_forward_time": 0.11618518829345703,
      "step": 8953
    },
    {
      "epoch": 5.4644775390625e-05,
      "step": 8953,
      "training_step_time": 0.38701391220092773
    },
    {
      "epoch": 5.465087890625e-05,
      "model_forward_time": 0.1150519847869873,
      "step": 8954
    },
    {
      "epoch": 5.465087890625e-05,
      "step": 8954,
      "training_step_time": 0.3811514377593994
    },
    {
      "epoch": 5.4656982421875e-05,
      "model_forward_time": 0.11635065078735352,
      "step": 8955
    },
    {
      "epoch": 5.4656982421875e-05,
      "step": 8955,
      "training_step_time": 1.0375893115997314
    },
    {
      "epoch": 5.46630859375e-05,
      "model_forward_time": 0.11546587944030762,
      "step": 8956
    },
    {
      "epoch": 5.46630859375e-05,
      "step": 8956,
      "training_step_time": 0.3873634338378906
    },
    {
      "epoch": 5.4669189453125e-05,
      "model_forward_time": 0.11660265922546387,
      "step": 8957
    },
    {
      "epoch": 5.4669189453125e-05,
      "step": 8957,
      "training_step_time": 0.38181138038635254
    },
    {
      "epoch": 5.467529296875e-05,
      "model_forward_time": 0.1148073673248291,
      "step": 8958
    },
    {
      "epoch": 5.467529296875e-05,
      "step": 8958,
      "training_step_time": 0.38739681243896484
    },
    {
      "epoch": 5.4681396484375e-05,
      "model_forward_time": 0.11469435691833496,
      "step": 8959
    },
    {
      "epoch": 5.4681396484375e-05,
      "step": 8959,
      "training_step_time": 0.38034892082214355
    },
    {
      "epoch": 5.46875e-05,
      "grad_norm": 0.2682740390300751,
      "learning_rate": 9.732653918183592e-05,
      "loss": 0.0703,
      "step": 8960
    },
    {
      "epoch": 5.46875e-05,
      "model_forward_time": 0.1169426441192627,
      "step": 8960
    },
    {
      "epoch": 5.46875e-05,
      "step": 8960,
      "training_step_time": 0.41961669921875
    },
    {
      "epoch": 5.4693603515625e-05,
      "model_forward_time": 0.1155848503112793,
      "step": 8961
    },
    {
      "epoch": 5.4693603515625e-05,
      "step": 8961,
      "training_step_time": 0.9431867599487305
    },
    {
      "epoch": 5.469970703125e-05,
      "model_forward_time": 0.11484575271606445,
      "step": 8962
    },
    {
      "epoch": 5.469970703125e-05,
      "step": 8962,
      "training_step_time": 0.4000699520111084
    },
    {
      "epoch": 5.4705810546875e-05,
      "model_forward_time": 0.11462044715881348,
      "step": 8963
    },
    {
      "epoch": 5.4705810546875e-05,
      "step": 8963,
      "training_step_time": 0.48784732818603516
    },
    {
      "epoch": 5.47119140625e-05,
      "model_forward_time": 0.11466169357299805,
      "step": 8964
    },
    {
      "epoch": 5.47119140625e-05,
      "step": 8964,
      "training_step_time": 0.374159574508667
    },
    {
      "epoch": 5.4718017578125e-05,
      "model_forward_time": 0.11478734016418457,
      "step": 8965
    },
    {
      "epoch": 5.4718017578125e-05,
      "step": 8965,
      "training_step_time": 0.38424181938171387
    },
    {
      "epoch": 5.472412109375e-05,
      "model_forward_time": 0.11472535133361816,
      "step": 8966
    },
    {
      "epoch": 5.472412109375e-05,
      "step": 8966,
      "training_step_time": 0.3724546432495117
    },
    {
      "epoch": 5.4730224609375e-05,
      "model_forward_time": 0.11526894569396973,
      "step": 8967
    },
    {
      "epoch": 5.4730224609375e-05,
      "step": 8967,
      "training_step_time": 0.45030713081359863
    },
    {
      "epoch": 5.4736328125e-05,
      "model_forward_time": 0.11556839942932129,
      "step": 8968
    },
    {
      "epoch": 5.4736328125e-05,
      "step": 8968,
      "training_step_time": 0.38321971893310547
    },
    {
      "epoch": 5.4742431640625e-05,
      "model_forward_time": 0.11781716346740723,
      "step": 8969
    },
    {
      "epoch": 5.4742431640625e-05,
      "step": 8969,
      "training_step_time": 0.39178991317749023
    },
    {
      "epoch": 5.474853515625e-05,
      "grad_norm": 0.26158738136291504,
      "learning_rate": 9.731764146570173e-05,
      "loss": 0.0599,
      "step": 8970
    },
    {
      "epoch": 5.474853515625e-05,
      "model_forward_time": 0.1191864013671875,
      "step": 8970
    },
    {
      "epoch": 5.474853515625e-05,
      "step": 8970,
      "training_step_time": 0.3800022602081299
    },
    {
      "epoch": 5.4754638671875e-05,
      "model_forward_time": 0.1180415153503418,
      "step": 8971
    },
    {
      "epoch": 5.4754638671875e-05,
      "step": 8971,
      "training_step_time": 0.3810429573059082
    },
    {
      "epoch": 5.47607421875e-05,
      "model_forward_time": 0.11802458763122559,
      "step": 8972
    },
    {
      "epoch": 5.47607421875e-05,
      "step": 8972,
      "training_step_time": 0.3829798698425293
    },
    {
      "epoch": 5.4766845703125e-05,
      "model_forward_time": 0.1174924373626709,
      "step": 8973
    },
    {
      "epoch": 5.4766845703125e-05,
      "step": 8973,
      "training_step_time": 0.8769147396087646
    },
    {
      "epoch": 5.477294921875e-05,
      "model_forward_time": 0.11775660514831543,
      "step": 8974
    },
    {
      "epoch": 5.477294921875e-05,
      "step": 8974,
      "training_step_time": 0.4019942283630371
    },
    {
      "epoch": 5.4779052734375e-05,
      "model_forward_time": 0.11713194847106934,
      "step": 8975
    },
    {
      "epoch": 5.4779052734375e-05,
      "step": 8975,
      "training_step_time": 0.3713967800140381
    },
    {
      "epoch": 5.478515625e-05,
      "model_forward_time": 0.11697268486022949,
      "step": 8976
    },
    {
      "epoch": 5.478515625e-05,
      "step": 8976,
      "training_step_time": 0.41725921630859375
    },
    {
      "epoch": 5.4791259765625e-05,
      "model_forward_time": 0.11690258979797363,
      "step": 8977
    },
    {
      "epoch": 5.4791259765625e-05,
      "step": 8977,
      "training_step_time": 0.45612359046936035
    },
    {
      "epoch": 5.479736328125e-05,
      "model_forward_time": 0.1170511245727539,
      "step": 8978
    },
    {
      "epoch": 5.479736328125e-05,
      "step": 8978,
      "training_step_time": 0.3791067600250244
    },
    {
      "epoch": 5.4803466796875e-05,
      "model_forward_time": 0.11739706993103027,
      "step": 8979
    },
    {
      "epoch": 5.4803466796875e-05,
      "step": 8979,
      "training_step_time": 0.8718094825744629
    },
    {
      "epoch": 5.48095703125e-05,
      "grad_norm": 0.19395993649959564,
      "learning_rate": 9.730872937571739e-05,
      "loss": 0.0652,
      "step": 8980
    },
    {
      "epoch": 5.48095703125e-05,
      "model_forward_time": 0.11482357978820801,
      "step": 8980
    },
    {
      "epoch": 5.48095703125e-05,
      "step": 8980,
      "training_step_time": 0.3883552551269531
    },
    {
      "epoch": 5.4815673828125e-05,
      "model_forward_time": 0.11578917503356934,
      "step": 8981
    },
    {
      "epoch": 5.4815673828125e-05,
      "step": 8981,
      "training_step_time": 0.38539910316467285
    },
    {
      "epoch": 5.482177734375e-05,
      "model_forward_time": 0.11504817008972168,
      "step": 8982
    },
    {
      "epoch": 5.482177734375e-05,
      "step": 8982,
      "training_step_time": 0.3799424171447754
    },
    {
      "epoch": 5.4827880859375e-05,
      "model_forward_time": 0.1145331859588623,
      "step": 8983
    },
    {
      "epoch": 5.4827880859375e-05,
      "step": 8983,
      "training_step_time": 0.3825192451477051
    },
    {
      "epoch": 5.4833984375e-05,
      "model_forward_time": 0.11423969268798828,
      "step": 8984
    },
    {
      "epoch": 5.4833984375e-05,
      "step": 8984,
      "training_step_time": 0.37635159492492676
    },
    {
      "epoch": 5.4840087890625e-05,
      "model_forward_time": 0.1168978214263916,
      "step": 8985
    },
    {
      "epoch": 5.4840087890625e-05,
      "step": 8985,
      "training_step_time": 0.8610882759094238
    },
    {
      "epoch": 5.484619140625e-05,
      "model_forward_time": 0.11505746841430664,
      "step": 8986
    },
    {
      "epoch": 5.484619140625e-05,
      "step": 8986,
      "training_step_time": 0.4491860866546631
    },
    {
      "epoch": 5.4852294921875e-05,
      "model_forward_time": 0.1150515079498291,
      "step": 8987
    },
    {
      "epoch": 5.4852294921875e-05,
      "step": 8987,
      "training_step_time": 0.39757680892944336
    },
    {
      "epoch": 5.48583984375e-05,
      "model_forward_time": 0.11464881896972656,
      "step": 8988
    },
    {
      "epoch": 5.48583984375e-05,
      "step": 8988,
      "training_step_time": 0.45054030418395996
    },
    {
      "epoch": 5.4864501953125e-05,
      "model_forward_time": 0.11713004112243652,
      "step": 8989
    },
    {
      "epoch": 5.4864501953125e-05,
      "step": 8989,
      "training_step_time": 0.41173481941223145
    },
    {
      "epoch": 5.487060546875e-05,
      "grad_norm": 0.18815825879573822,
      "learning_rate": 9.729980291459019e-05,
      "loss": 0.0657,
      "step": 8990
    },
    {
      "epoch": 5.487060546875e-05,
      "model_forward_time": 0.11467218399047852,
      "step": 8990
    },
    {
      "epoch": 5.487060546875e-05,
      "step": 8990,
      "training_step_time": 0.4750204086303711
    },
    {
      "epoch": 5.4876708984375e-05,
      "model_forward_time": 0.11542725563049316,
      "step": 8991
    },
    {
      "epoch": 5.4876708984375e-05,
      "step": 8991,
      "training_step_time": 0.5843369960784912
    },
    {
      "epoch": 5.48828125e-05,
      "model_forward_time": 0.11582589149475098,
      "step": 8992
    },
    {
      "epoch": 5.48828125e-05,
      "step": 8992,
      "training_step_time": 0.3865947723388672
    },
    {
      "epoch": 5.4888916015625e-05,
      "model_forward_time": 0.11491942405700684,
      "step": 8993
    },
    {
      "epoch": 5.4888916015625e-05,
      "step": 8993,
      "training_step_time": 0.38829708099365234
    },
    {
      "epoch": 5.489501953125e-05,
      "model_forward_time": 0.11537909507751465,
      "step": 8994
    },
    {
      "epoch": 5.489501953125e-05,
      "step": 8994,
      "training_step_time": 0.42245912551879883
    },
    {
      "epoch": 5.4901123046875e-05,
      "model_forward_time": 0.1157383918762207,
      "step": 8995
    },
    {
      "epoch": 5.4901123046875e-05,
      "step": 8995,
      "training_step_time": 0.39504170417785645
    },
    {
      "epoch": 5.49072265625e-05,
      "model_forward_time": 0.11524057388305664,
      "step": 8996
    },
    {
      "epoch": 5.49072265625e-05,
      "step": 8996,
      "training_step_time": 0.39991259574890137
    },
    {
      "epoch": 5.4913330078125e-05,
      "model_forward_time": 0.11662721633911133,
      "step": 8997
    },
    {
      "epoch": 5.4913330078125e-05,
      "step": 8997,
      "training_step_time": 0.5233049392700195
    },
    {
      "epoch": 5.491943359375e-05,
      "model_forward_time": 0.11576151847839355,
      "step": 8998
    },
    {
      "epoch": 5.491943359375e-05,
      "step": 8998,
      "training_step_time": 0.4213387966156006
    },
    {
      "epoch": 5.4925537109375e-05,
      "model_forward_time": 0.11545419692993164,
      "step": 8999
    },
    {
      "epoch": 5.4925537109375e-05,
      "step": 8999,
      "training_step_time": 0.42482757568359375
    },
    {
      "epoch": 5.4931640625e-05,
      "grad_norm": 0.2681264877319336,
      "learning_rate": 9.729086208503174e-05,
      "loss": 0.072,
      "step": 9000
    },
    {
      "epoch": 5.4931640625e-05,
      "model_forward_time": 0.11771893501281738,
      "step": 9000
    },
    {
      "epoch": 5.4931640625e-05,
      "step": 9000,
      "training_step_time": 0.36934638023376465
    },
    {
      "epoch": 5.4937744140625e-05,
      "model_forward_time": 0.11334443092346191,
      "step": 9001
    },
    {
      "epoch": 5.4937744140625e-05,
      "step": 9001,
      "training_step_time": 0.38375210762023926
    },
    {
      "epoch": 5.494384765625e-05,
      "model_forward_time": 0.11399674415588379,
      "step": 9002
    },
    {
      "epoch": 5.494384765625e-05,
      "step": 9002,
      "training_step_time": 0.372067928314209
    },
    {
      "epoch": 5.4949951171875e-05,
      "model_forward_time": 0.11440205574035645,
      "step": 9003
    },
    {
      "epoch": 5.4949951171875e-05,
      "step": 9003,
      "training_step_time": 0.39852309226989746
    },
    {
      "epoch": 5.49560546875e-05,
      "model_forward_time": 0.11490249633789062,
      "step": 9004
    },
    {
      "epoch": 5.49560546875e-05,
      "step": 9004,
      "training_step_time": 0.43723249435424805
    },
    {
      "epoch": 5.4962158203125e-05,
      "model_forward_time": 0.11548876762390137,
      "step": 9005
    },
    {
      "epoch": 5.4962158203125e-05,
      "step": 9005,
      "training_step_time": 0.430936336517334
    },
    {
      "epoch": 5.496826171875e-05,
      "model_forward_time": 0.11504387855529785,
      "step": 9006
    },
    {
      "epoch": 5.496826171875e-05,
      "step": 9006,
      "training_step_time": 0.39524292945861816
    },
    {
      "epoch": 5.4974365234375e-05,
      "model_forward_time": 0.11568784713745117,
      "step": 9007
    },
    {
      "epoch": 5.4974365234375e-05,
      "step": 9007,
      "training_step_time": 0.38770341873168945
    },
    {
      "epoch": 5.498046875e-05,
      "model_forward_time": 0.11562991142272949,
      "step": 9008
    },
    {
      "epoch": 5.498046875e-05,
      "step": 9008,
      "training_step_time": 0.3915691375732422
    },
    {
      "epoch": 5.4986572265625e-05,
      "model_forward_time": 0.11533617973327637,
      "step": 9009
    },
    {
      "epoch": 5.4986572265625e-05,
      "step": 9009,
      "training_step_time": 0.39357900619506836
    },
    {
      "epoch": 5.499267578125e-05,
      "grad_norm": 0.3236185610294342,
      "learning_rate": 9.728190688975804e-05,
      "loss": 0.059,
      "step": 9010
    },
    {
      "epoch": 5.499267578125e-05,
      "model_forward_time": 0.11622476577758789,
      "step": 9010
    },
    {
      "epoch": 5.499267578125e-05,
      "step": 9010,
      "training_step_time": 0.46393394470214844
    },
    {
      "epoch": 5.4998779296875e-05,
      "model_forward_time": 0.11740255355834961,
      "step": 9011
    },
    {
      "epoch": 5.4998779296875e-05,
      "step": 9011,
      "training_step_time": 0.3874013423919678
    },
    {
      "epoch": 5.50048828125e-05,
      "model_forward_time": 0.11916518211364746,
      "step": 9012
    },
    {
      "epoch": 5.50048828125e-05,
      "step": 9012,
      "training_step_time": 0.3820481300354004
    },
    {
      "epoch": 5.5010986328125e-05,
      "model_forward_time": 0.11797714233398438,
      "step": 9013
    },
    {
      "epoch": 5.5010986328125e-05,
      "step": 9013,
      "training_step_time": 0.3938024044036865
    },
    {
      "epoch": 5.501708984375e-05,
      "model_forward_time": 0.11824774742126465,
      "step": 9014
    },
    {
      "epoch": 5.501708984375e-05,
      "step": 9014,
      "training_step_time": 0.3923811912536621
    },
    {
      "epoch": 5.5023193359375e-05,
      "model_forward_time": 0.11737513542175293,
      "step": 9015
    },
    {
      "epoch": 5.5023193359375e-05,
      "step": 9015,
      "training_step_time": 0.48114895820617676
    },
    {
      "epoch": 5.5029296875e-05,
      "model_forward_time": 0.14236736297607422,
      "step": 9016
    },
    {
      "epoch": 5.5029296875e-05,
      "step": 9016,
      "training_step_time": 0.46076440811157227
    },
    {
      "epoch": 5.5035400390625e-05,
      "model_forward_time": 0.11940860748291016,
      "step": 9017
    },
    {
      "epoch": 5.5035400390625e-05,
      "step": 9017,
      "training_step_time": 0.3807337284088135
    },
    {
      "epoch": 5.504150390625e-05,
      "model_forward_time": 0.11674618721008301,
      "step": 9018
    },
    {
      "epoch": 5.504150390625e-05,
      "step": 9018,
      "training_step_time": 0.4008965492248535
    },
    {
      "epoch": 5.5047607421875e-05,
      "model_forward_time": 0.11690330505371094,
      "step": 9019
    },
    {
      "epoch": 5.5047607421875e-05,
      "step": 9019,
      "training_step_time": 0.4062483310699463
    },
    {
      "epoch": 5.50537109375e-05,
      "grad_norm": 0.2918623089790344,
      "learning_rate": 9.727293733148942e-05,
      "loss": 0.0602,
      "step": 9020
    },
    {
      "epoch": 5.50537109375e-05,
      "model_forward_time": 0.11584806442260742,
      "step": 9020
    },
    {
      "epoch": 5.50537109375e-05,
      "step": 9020,
      "training_step_time": 0.46903467178344727
    },
    {
      "epoch": 5.5059814453125e-05,
      "model_forward_time": 0.1166074275970459,
      "step": 9021
    },
    {
      "epoch": 5.5059814453125e-05,
      "step": 9021,
      "training_step_time": 0.39542269706726074
    },
    {
      "epoch": 5.506591796875e-05,
      "model_forward_time": 0.11651325225830078,
      "step": 9022
    },
    {
      "epoch": 5.506591796875e-05,
      "step": 9022,
      "training_step_time": 0.39269089698791504
    },
    {
      "epoch": 5.5072021484375e-05,
      "model_forward_time": 0.11669111251831055,
      "step": 9023
    },
    {
      "epoch": 5.5072021484375e-05,
      "step": 9023,
      "training_step_time": 0.39885544776916504
    },
    {
      "epoch": 5.5078125e-05,
      "model_forward_time": 0.11596035957336426,
      "step": 9024
    },
    {
      "epoch": 5.5078125e-05,
      "step": 9024,
      "training_step_time": 0.3856315612792969
    },
    {
      "epoch": 5.5084228515625e-05,
      "model_forward_time": 0.11637353897094727,
      "step": 9025
    },
    {
      "epoch": 5.5084228515625e-05,
      "step": 9025,
      "training_step_time": 0.3957846164703369
    },
    {
      "epoch": 5.509033203125e-05,
      "model_forward_time": 0.11783146858215332,
      "step": 9026
    },
    {
      "epoch": 5.509033203125e-05,
      "step": 9026,
      "training_step_time": 0.40784740447998047
    },
    {
      "epoch": 5.5096435546875e-05,
      "model_forward_time": 0.11667919158935547,
      "step": 9027
    },
    {
      "epoch": 5.5096435546875e-05,
      "step": 9027,
      "training_step_time": 0.39714980125427246
    },
    {
      "epoch": 5.51025390625e-05,
      "model_forward_time": 0.11865520477294922,
      "step": 9028
    },
    {
      "epoch": 5.51025390625e-05,
      "step": 9028,
      "training_step_time": 0.3990781307220459
    },
    {
      "epoch": 5.5108642578125e-05,
      "model_forward_time": 0.11606860160827637,
      "step": 9029
    },
    {
      "epoch": 5.5108642578125e-05,
      "step": 9029,
      "training_step_time": 0.4061264991760254
    },
    {
      "epoch": 5.511474609375e-05,
      "grad_norm": 0.19444561004638672,
      "learning_rate": 9.726395341295062e-05,
      "loss": 0.061,
      "step": 9030
    },
    {
      "epoch": 5.511474609375e-05,
      "model_forward_time": 0.11907172203063965,
      "step": 9030
    },
    {
      "epoch": 5.511474609375e-05,
      "step": 9030,
      "training_step_time": 0.41385579109191895
    },
    {
      "epoch": 5.5120849609375e-05,
      "model_forward_time": 0.11664438247680664,
      "step": 9031
    },
    {
      "epoch": 5.5120849609375e-05,
      "step": 9031,
      "training_step_time": 0.3908107280731201
    },
    {
      "epoch": 5.5126953125e-05,
      "model_forward_time": 0.11771297454833984,
      "step": 9032
    },
    {
      "epoch": 5.5126953125e-05,
      "step": 9032,
      "training_step_time": 0.3854079246520996
    },
    {
      "epoch": 5.5133056640625e-05,
      "model_forward_time": 0.11737465858459473,
      "step": 9033
    },
    {
      "epoch": 5.5133056640625e-05,
      "step": 9033,
      "training_step_time": 0.44339776039123535
    },
    {
      "epoch": 5.513916015625e-05,
      "model_forward_time": 0.12204170227050781,
      "step": 9034
    },
    {
      "epoch": 5.513916015625e-05,
      "step": 9034,
      "training_step_time": 0.4934723377227783
    },
    {
      "epoch": 5.5145263671875e-05,
      "model_forward_time": 0.1178746223449707,
      "step": 9035
    },
    {
      "epoch": 5.5145263671875e-05,
      "step": 9035,
      "training_step_time": 0.42290377616882324
    },
    {
      "epoch": 5.51513671875e-05,
      "model_forward_time": 0.1164863109588623,
      "step": 9036
    },
    {
      "epoch": 5.51513671875e-05,
      "step": 9036,
      "training_step_time": 0.38062071800231934
    },
    {
      "epoch": 5.5157470703125e-05,
      "model_forward_time": 0.11739802360534668,
      "step": 9037
    },
    {
      "epoch": 5.5157470703125e-05,
      "step": 9037,
      "training_step_time": 0.7134463787078857
    },
    {
      "epoch": 5.516357421875e-05,
      "model_forward_time": 0.11700725555419922,
      "step": 9038
    },
    {
      "epoch": 5.516357421875e-05,
      "step": 9038,
      "training_step_time": 0.3732483386993408
    },
    {
      "epoch": 5.5169677734375e-05,
      "model_forward_time": 0.11754894256591797,
      "step": 9039
    },
    {
      "epoch": 5.5169677734375e-05,
      "step": 9039,
      "training_step_time": 0.39882826805114746
    },
    {
      "epoch": 5.517578125e-05,
      "grad_norm": 0.20167475938796997,
      "learning_rate": 9.72549551368707e-05,
      "loss": 0.0617,
      "step": 9040
    },
    {
      "epoch": 5.517578125e-05,
      "model_forward_time": 0.11976480484008789,
      "step": 9040
    },
    {
      "epoch": 5.517578125e-05,
      "step": 9040,
      "training_step_time": 0.3881819248199463
    },
    {
      "epoch": 5.5181884765625e-05,
      "model_forward_time": 0.11692523956298828,
      "step": 9041
    },
    {
      "epoch": 5.5181884765625e-05,
      "step": 9041,
      "training_step_time": 0.38399696350097656
    },
    {
      "epoch": 5.518798828125e-05,
      "model_forward_time": 0.11751818656921387,
      "step": 9042
    },
    {
      "epoch": 5.518798828125e-05,
      "step": 9042,
      "training_step_time": 0.38742589950561523
    },
    {
      "epoch": 5.5194091796875e-05,
      "model_forward_time": 0.11764860153198242,
      "step": 9043
    },
    {
      "epoch": 5.5194091796875e-05,
      "step": 9043,
      "training_step_time": 0.4798297882080078
    },
    {
      "epoch": 5.52001953125e-05,
      "model_forward_time": 0.1184546947479248,
      "step": 9044
    },
    {
      "epoch": 5.52001953125e-05,
      "step": 9044,
      "training_step_time": 0.47434258460998535
    },
    {
      "epoch": 5.5206298828125e-05,
      "model_forward_time": 0.11792826652526855,
      "step": 9045
    },
    {
      "epoch": 5.5206298828125e-05,
      "step": 9045,
      "training_step_time": 0.3780512809753418
    },
    {
      "epoch": 5.521240234375e-05,
      "model_forward_time": 0.11829471588134766,
      "step": 9046
    },
    {
      "epoch": 5.521240234375e-05,
      "step": 9046,
      "training_step_time": 0.375931978225708
    },
    {
      "epoch": 5.5218505859375e-05,
      "model_forward_time": 0.11763978004455566,
      "step": 9047
    },
    {
      "epoch": 5.5218505859375e-05,
      "step": 9047,
      "training_step_time": 0.46323680877685547
    },
    {
      "epoch": 5.5224609375e-05,
      "model_forward_time": 0.1192789077758789,
      "step": 9048
    },
    {
      "epoch": 5.5224609375e-05,
      "step": 9048,
      "training_step_time": 0.44974470138549805
    },
    {
      "epoch": 5.5230712890625e-05,
      "model_forward_time": 0.11707568168640137,
      "step": 9049
    },
    {
      "epoch": 5.5230712890625e-05,
      "step": 9049,
      "training_step_time": 0.4534268379211426
    },
    {
      "epoch": 5.523681640625e-05,
      "grad_norm": 0.24490822851657867,
      "learning_rate": 9.724594250598311e-05,
      "loss": 0.0623,
      "step": 9050
    },
    {
      "epoch": 5.523681640625e-05,
      "model_forward_time": 0.11789417266845703,
      "step": 9050
    },
    {
      "epoch": 5.523681640625e-05,
      "step": 9050,
      "training_step_time": 0.38150763511657715
    },
    {
      "epoch": 5.5242919921875e-05,
      "model_forward_time": 0.11762118339538574,
      "step": 9051
    },
    {
      "epoch": 5.5242919921875e-05,
      "step": 9051,
      "training_step_time": 0.3823425769805908
    },
    {
      "epoch": 5.52490234375e-05,
      "model_forward_time": 0.11739730834960938,
      "step": 9052
    },
    {
      "epoch": 5.52490234375e-05,
      "step": 9052,
      "training_step_time": 0.37749552726745605
    },
    {
      "epoch": 5.5255126953125e-05,
      "model_forward_time": 0.11740946769714355,
      "step": 9053
    },
    {
      "epoch": 5.5255126953125e-05,
      "step": 9053,
      "training_step_time": 0.40816807746887207
    },
    {
      "epoch": 5.526123046875e-05,
      "model_forward_time": 0.11880373954772949,
      "step": 9054
    },
    {
      "epoch": 5.526123046875e-05,
      "step": 9054,
      "training_step_time": 0.43900012969970703
    },
    {
      "epoch": 5.5267333984375e-05,
      "model_forward_time": 0.1178438663482666,
      "step": 9055
    },
    {
      "epoch": 5.5267333984375e-05,
      "step": 9055,
      "training_step_time": 0.4716510772705078
    },
    {
      "epoch": 5.52734375e-05,
      "model_forward_time": 0.12051224708557129,
      "step": 9056
    },
    {
      "epoch": 5.52734375e-05,
      "step": 9056,
      "training_step_time": 0.3838672637939453
    },
    {
      "epoch": 5.5279541015625e-05,
      "model_forward_time": 0.12262797355651855,
      "step": 9057
    },
    {
      "epoch": 5.5279541015625e-05,
      "step": 9057,
      "training_step_time": 0.39832639694213867
    },
    {
      "epoch": 5.528564453125e-05,
      "model_forward_time": 0.11881422996520996,
      "step": 9058
    },
    {
      "epoch": 5.528564453125e-05,
      "step": 9058,
      "training_step_time": 0.38357019424438477
    },
    {
      "epoch": 5.5291748046875e-05,
      "model_forward_time": 0.11700558662414551,
      "step": 9059
    },
    {
      "epoch": 5.5291748046875e-05,
      "step": 9059,
      "training_step_time": 0.3778049945831299
    },
    {
      "epoch": 5.52978515625e-05,
      "grad_norm": 0.19002647697925568,
      "learning_rate": 9.723691552302562e-05,
      "loss": 0.063,
      "step": 9060
    },
    {
      "epoch": 5.52978515625e-05,
      "model_forward_time": 0.11720037460327148,
      "step": 9060
    },
    {
      "epoch": 5.52978515625e-05,
      "step": 9060,
      "training_step_time": 0.3763554096221924
    },
    {
      "epoch": 5.5303955078125e-05,
      "model_forward_time": 0.11733198165893555,
      "step": 9061
    },
    {
      "epoch": 5.5303955078125e-05,
      "step": 9061,
      "training_step_time": 0.753394365310669
    },
    {
      "epoch": 5.531005859375e-05,
      "model_forward_time": 0.1172940731048584,
      "step": 9062
    },
    {
      "epoch": 5.531005859375e-05,
      "step": 9062,
      "training_step_time": 0.4096057415008545
    },
    {
      "epoch": 5.5316162109375e-05,
      "model_forward_time": 0.12041473388671875,
      "step": 9063
    },
    {
      "epoch": 5.5316162109375e-05,
      "step": 9063,
      "training_step_time": 0.3819293975830078
    },
    {
      "epoch": 5.5322265625e-05,
      "model_forward_time": 0.11594486236572266,
      "step": 9064
    },
    {
      "epoch": 5.5322265625e-05,
      "step": 9064,
      "training_step_time": 0.3802957534790039
    },
    {
      "epoch": 5.5328369140625e-05,
      "model_forward_time": 0.1182107925415039,
      "step": 9065
    },
    {
      "epoch": 5.5328369140625e-05,
      "step": 9065,
      "training_step_time": 0.38722944259643555
    },
    {
      "epoch": 5.533447265625e-05,
      "model_forward_time": 0.11814069747924805,
      "step": 9066
    },
    {
      "epoch": 5.533447265625e-05,
      "step": 9066,
      "training_step_time": 0.38138294219970703
    },
    {
      "epoch": 5.5340576171875e-05,
      "model_forward_time": 0.11633920669555664,
      "step": 9067
    },
    {
      "epoch": 5.5340576171875e-05,
      "step": 9067,
      "training_step_time": 0.5871360301971436
    },
    {
      "epoch": 5.53466796875e-05,
      "model_forward_time": 0.11671113967895508,
      "step": 9068
    },
    {
      "epoch": 5.53466796875e-05,
      "step": 9068,
      "training_step_time": 0.3708493709564209
    },
    {
      "epoch": 5.5352783203125e-05,
      "model_forward_time": 0.1165621280670166,
      "step": 9069
    },
    {
      "epoch": 5.5352783203125e-05,
      "step": 9069,
      "training_step_time": 0.37728047370910645
    },
    {
      "epoch": 5.535888671875e-05,
      "grad_norm": 0.20538510382175446,
      "learning_rate": 9.722787419074044e-05,
      "loss": 0.063,
      "step": 9070
    },
    {
      "epoch": 5.535888671875e-05,
      "model_forward_time": 0.11752796173095703,
      "step": 9070
    },
    {
      "epoch": 5.535888671875e-05,
      "step": 9070,
      "training_step_time": 0.38605570793151855
    },
    {
      "epoch": 5.5364990234375e-05,
      "model_forward_time": 0.11676478385925293,
      "step": 9071
    },
    {
      "epoch": 5.5364990234375e-05,
      "step": 9071,
      "training_step_time": 0.49626588821411133
    },
    {
      "epoch": 5.537109375e-05,
      "model_forward_time": 0.11822628974914551,
      "step": 9072
    },
    {
      "epoch": 5.537109375e-05,
      "step": 9072,
      "training_step_time": 0.42566347122192383
    },
    {
      "epoch": 5.5377197265625e-05,
      "model_forward_time": 0.11697244644165039,
      "step": 9073
    },
    {
      "epoch": 5.5377197265625e-05,
      "step": 9073,
      "training_step_time": 0.4894979000091553
    },
    {
      "epoch": 5.538330078125e-05,
      "model_forward_time": 0.11711502075195312,
      "step": 9074
    },
    {
      "epoch": 5.538330078125e-05,
      "step": 9074,
      "training_step_time": 0.37175846099853516
    },
    {
      "epoch": 5.5389404296875e-05,
      "model_forward_time": 0.11663603782653809,
      "step": 9075
    },
    {
      "epoch": 5.5389404296875e-05,
      "step": 9075,
      "training_step_time": 0.383312463760376
    },
    {
      "epoch": 5.53955078125e-05,
      "model_forward_time": 0.11720848083496094,
      "step": 9076
    },
    {
      "epoch": 5.53955078125e-05,
      "step": 9076,
      "training_step_time": 0.4356420040130615
    },
    {
      "epoch": 5.5401611328125e-05,
      "model_forward_time": 0.11806774139404297,
      "step": 9077
    },
    {
      "epoch": 5.5401611328125e-05,
      "step": 9077,
      "training_step_time": 0.47335100173950195
    },
    {
      "epoch": 5.540771484375e-05,
      "model_forward_time": 0.11818265914916992,
      "step": 9078
    },
    {
      "epoch": 5.540771484375e-05,
      "step": 9078,
      "training_step_time": 0.37694454193115234
    },
    {
      "epoch": 5.5413818359375e-05,
      "model_forward_time": 0.11792755126953125,
      "step": 9079
    },
    {
      "epoch": 5.5413818359375e-05,
      "step": 9079,
      "training_step_time": 1.1957025527954102
    },
    {
      "epoch": 5.5419921875e-05,
      "grad_norm": 0.33016276359558105,
      "learning_rate": 9.721881851187406e-05,
      "loss": 0.0598,
      "step": 9080
    },
    {
      "epoch": 5.5419921875e-05,
      "model_forward_time": 0.11591577529907227,
      "step": 9080
    },
    {
      "epoch": 5.5419921875e-05,
      "step": 9080,
      "training_step_time": 0.36492323875427246
    },
    {
      "epoch": 5.5426025390625e-05,
      "model_forward_time": 0.11683940887451172,
      "step": 9081
    },
    {
      "epoch": 5.5426025390625e-05,
      "step": 9081,
      "training_step_time": 0.37650179862976074
    },
    {
      "epoch": 5.543212890625e-05,
      "model_forward_time": 0.11676716804504395,
      "step": 9082
    },
    {
      "epoch": 5.543212890625e-05,
      "step": 9082,
      "training_step_time": 0.36818981170654297
    },
    {
      "epoch": 5.5438232421875e-05,
      "model_forward_time": 0.11610984802246094,
      "step": 9083
    },
    {
      "epoch": 5.5438232421875e-05,
      "step": 9083,
      "training_step_time": 0.393352746963501
    },
    {
      "epoch": 5.54443359375e-05,
      "model_forward_time": 0.11720538139343262,
      "step": 9084
    },
    {
      "epoch": 5.54443359375e-05,
      "step": 9084,
      "training_step_time": 0.36830806732177734
    },
    {
      "epoch": 5.5450439453125e-05,
      "model_forward_time": 0.11870193481445312,
      "step": 9085
    },
    {
      "epoch": 5.5450439453125e-05,
      "step": 9085,
      "training_step_time": 0.41867780685424805
    },
    {
      "epoch": 5.545654296875e-05,
      "model_forward_time": 0.11706972122192383,
      "step": 9086
    },
    {
      "epoch": 5.545654296875e-05,
      "step": 9086,
      "training_step_time": 0.3867940902709961
    },
    {
      "epoch": 5.5462646484375e-05,
      "model_forward_time": 0.1170506477355957,
      "step": 9087
    },
    {
      "epoch": 5.5462646484375e-05,
      "step": 9087,
      "training_step_time": 0.39022374153137207
    },
    {
      "epoch": 5.546875e-05,
      "model_forward_time": 0.11684584617614746,
      "step": 9088
    },
    {
      "epoch": 5.546875e-05,
      "step": 9088,
      "training_step_time": 0.3683507442474365
    },
    {
      "epoch": 5.5474853515625e-05,
      "model_forward_time": 0.11667537689208984,
      "step": 9089
    },
    {
      "epoch": 5.5474853515625e-05,
      "step": 9089,
      "training_step_time": 0.41604185104370117
    },
    {
      "epoch": 5.548095703125e-05,
      "grad_norm": 0.21408230066299438,
      "learning_rate": 9.720974848917735e-05,
      "loss": 0.0644,
      "step": 9090
    },
    {
      "epoch": 5.548095703125e-05,
      "model_forward_time": 0.11788034439086914,
      "step": 9090
    },
    {
      "epoch": 5.548095703125e-05,
      "step": 9090,
      "training_step_time": 0.3867344856262207
    },
    {
      "epoch": 5.5487060546875e-05,
      "model_forward_time": 0.1172935962677002,
      "step": 9091
    },
    {
      "epoch": 5.5487060546875e-05,
      "step": 9091,
      "training_step_time": 1.0691421031951904
    },
    {
      "epoch": 5.54931640625e-05,
      "model_forward_time": 0.11521291732788086,
      "step": 9092
    },
    {
      "epoch": 5.54931640625e-05,
      "step": 9092,
      "training_step_time": 0.38161349296569824
    },
    {
      "epoch": 5.5499267578125e-05,
      "model_forward_time": 0.11522698402404785,
      "step": 9093
    },
    {
      "epoch": 5.5499267578125e-05,
      "step": 9093,
      "training_step_time": 0.38504695892333984
    },
    {
      "epoch": 5.550537109375e-05,
      "model_forward_time": 0.11605215072631836,
      "step": 9094
    },
    {
      "epoch": 5.550537109375e-05,
      "step": 9094,
      "training_step_time": 0.3863544464111328
    },
    {
      "epoch": 5.5511474609375e-05,
      "model_forward_time": 0.11562418937683105,
      "step": 9095
    },
    {
      "epoch": 5.5511474609375e-05,
      "step": 9095,
      "training_step_time": 0.3911457061767578
    },
    {
      "epoch": 5.5517578125e-05,
      "model_forward_time": 0.11671233177185059,
      "step": 9096
    },
    {
      "epoch": 5.5517578125e-05,
      "step": 9096,
      "training_step_time": 0.37350940704345703
    },
    {
      "epoch": 5.5523681640625e-05,
      "model_forward_time": 0.11673378944396973,
      "step": 9097
    },
    {
      "epoch": 5.5523681640625e-05,
      "step": 9097,
      "training_step_time": 0.9431722164154053
    },
    {
      "epoch": 5.552978515625e-05,
      "model_forward_time": 0.13359355926513672,
      "step": 9098
    },
    {
      "epoch": 5.552978515625e-05,
      "step": 9098,
      "training_step_time": 0.39492368698120117
    },
    {
      "epoch": 5.5535888671875e-05,
      "model_forward_time": 0.11598777770996094,
      "step": 9099
    },
    {
      "epoch": 5.5535888671875e-05,
      "step": 9099,
      "training_step_time": 0.3771224021911621
    },
    {
      "epoch": 5.55419921875e-05,
      "grad_norm": 0.21429874002933502,
      "learning_rate": 9.720066412540554e-05,
      "loss": 0.065,
      "step": 9100
    },
    {
      "epoch": 5.55419921875e-05,
      "model_forward_time": 0.11776614189147949,
      "step": 9100
    },
    {
      "epoch": 5.55419921875e-05,
      "step": 9100,
      "training_step_time": 0.38656067848205566
    },
    {
      "epoch": 5.5548095703125e-05,
      "model_forward_time": 0.1162257194519043,
      "step": 9101
    },
    {
      "epoch": 5.5548095703125e-05,
      "step": 9101,
      "training_step_time": 0.44707679748535156
    },
    {
      "epoch": 5.555419921875e-05,
      "model_forward_time": 0.1177511215209961,
      "step": 9102
    },
    {
      "epoch": 5.555419921875e-05,
      "step": 9102,
      "training_step_time": 0.4836697578430176
    },
    {
      "epoch": 5.5560302734375e-05,
      "model_forward_time": 0.11722826957702637,
      "step": 9103
    },
    {
      "epoch": 5.5560302734375e-05,
      "step": 9103,
      "training_step_time": 0.681797981262207
    },
    {
      "epoch": 5.556640625e-05,
      "model_forward_time": 0.11686134338378906,
      "step": 9104
    },
    {
      "epoch": 5.556640625e-05,
      "step": 9104,
      "training_step_time": 0.3744008541107178
    },
    {
      "epoch": 5.5572509765625e-05,
      "model_forward_time": 0.11598014831542969,
      "step": 9105
    },
    {
      "epoch": 5.5572509765625e-05,
      "step": 9105,
      "training_step_time": 0.3874647617340088
    },
    {
      "epoch": 5.557861328125e-05,
      "model_forward_time": 0.1164391040802002,
      "step": 9106
    },
    {
      "epoch": 5.557861328125e-05,
      "step": 9106,
      "training_step_time": 0.37345457077026367
    },
    {
      "epoch": 5.5584716796875e-05,
      "model_forward_time": 0.11652255058288574,
      "step": 9107
    },
    {
      "epoch": 5.5584716796875e-05,
      "step": 9107,
      "training_step_time": 0.3835465908050537
    },
    {
      "epoch": 5.55908203125e-05,
      "model_forward_time": 0.11726665496826172,
      "step": 9108
    },
    {
      "epoch": 5.55908203125e-05,
      "step": 9108,
      "training_step_time": 0.39125633239746094
    },
    {
      "epoch": 5.5596923828125e-05,
      "model_forward_time": 0.11825823783874512,
      "step": 9109
    },
    {
      "epoch": 5.5596923828125e-05,
      "step": 9109,
      "training_step_time": 0.7907130718231201
    },
    {
      "epoch": 5.560302734375e-05,
      "grad_norm": 0.20475292205810547,
      "learning_rate": 9.719156542331824e-05,
      "loss": 0.068,
      "step": 9110
    },
    {
      "epoch": 5.560302734375e-05,
      "model_forward_time": 0.11680006980895996,
      "step": 9110
    },
    {
      "epoch": 5.560302734375e-05,
      "step": 9110,
      "training_step_time": 0.3767218589782715
    },
    {
      "epoch": 5.5609130859375e-05,
      "model_forward_time": 0.11730694770812988,
      "step": 9111
    },
    {
      "epoch": 5.5609130859375e-05,
      "step": 9111,
      "training_step_time": 0.4574868679046631
    },
    {
      "epoch": 5.5615234375e-05,
      "model_forward_time": 0.11616754531860352,
      "step": 9112
    },
    {
      "epoch": 5.5615234375e-05,
      "step": 9112,
      "training_step_time": 0.3825345039367676
    },
    {
      "epoch": 5.5621337890625e-05,
      "model_forward_time": 0.11548113822937012,
      "step": 9113
    },
    {
      "epoch": 5.5621337890625e-05,
      "step": 9113,
      "training_step_time": 0.3867509365081787
    },
    {
      "epoch": 5.562744140625e-05,
      "model_forward_time": 0.11906862258911133,
      "step": 9114
    },
    {
      "epoch": 5.562744140625e-05,
      "step": 9114,
      "training_step_time": 0.40720272064208984
    },
    {
      "epoch": 5.5633544921875e-05,
      "model_forward_time": 0.1176457405090332,
      "step": 9115
    },
    {
      "epoch": 5.5633544921875e-05,
      "step": 9115,
      "training_step_time": 0.8344902992248535
    },
    {
      "epoch": 5.56396484375e-05,
      "model_forward_time": 0.11563706398010254,
      "step": 9116
    },
    {
      "epoch": 5.56396484375e-05,
      "step": 9116,
      "training_step_time": 0.38932323455810547
    },
    {
      "epoch": 5.5645751953125e-05,
      "model_forward_time": 0.1152656078338623,
      "step": 9117
    },
    {
      "epoch": 5.5645751953125e-05,
      "step": 9117,
      "training_step_time": 0.3679006099700928
    },
    {
      "epoch": 5.565185546875e-05,
      "model_forward_time": 0.11616873741149902,
      "step": 9118
    },
    {
      "epoch": 5.565185546875e-05,
      "step": 9118,
      "training_step_time": 0.3696000576019287
    },
    {
      "epoch": 5.5657958984375e-05,
      "model_forward_time": 0.11559510231018066,
      "step": 9119
    },
    {
      "epoch": 5.5657958984375e-05,
      "step": 9119,
      "training_step_time": 0.37344956398010254
    },
    {
      "epoch": 5.56640625e-05,
      "grad_norm": 0.20601247251033783,
      "learning_rate": 9.718245238567939e-05,
      "loss": 0.0631,
      "step": 9120
    },
    {
      "epoch": 5.56640625e-05,
      "model_forward_time": 0.11689424514770508,
      "step": 9120
    },
    {
      "epoch": 5.56640625e-05,
      "step": 9120,
      "training_step_time": 0.37102413177490234
    },
    {
      "epoch": 5.5670166015625e-05,
      "model_forward_time": 0.11696434020996094,
      "step": 9121
    },
    {
      "epoch": 5.5670166015625e-05,
      "step": 9121,
      "training_step_time": 0.7823958396911621
    },
    {
      "epoch": 5.567626953125e-05,
      "model_forward_time": 0.11600923538208008,
      "step": 9122
    },
    {
      "epoch": 5.567626953125e-05,
      "step": 9122,
      "training_step_time": 0.3821535110473633
    },
    {
      "epoch": 5.5682373046875e-05,
      "model_forward_time": 0.11579012870788574,
      "step": 9123
    },
    {
      "epoch": 5.5682373046875e-05,
      "step": 9123,
      "training_step_time": 0.41564440727233887
    },
    {
      "epoch": 5.56884765625e-05,
      "model_forward_time": 0.11638116836547852,
      "step": 9124
    },
    {
      "epoch": 5.56884765625e-05,
      "step": 9124,
      "training_step_time": 0.43480563163757324
    },
    {
      "epoch": 5.5694580078125e-05,
      "model_forward_time": 0.11614751815795898,
      "step": 9125
    },
    {
      "epoch": 5.5694580078125e-05,
      "step": 9125,
      "training_step_time": 0.37105393409729004
    },
    {
      "epoch": 5.570068359375e-05,
      "model_forward_time": 0.11709308624267578,
      "step": 9126
    },
    {
      "epoch": 5.570068359375e-05,
      "step": 9126,
      "training_step_time": 0.4093358516693115
    },
    {
      "epoch": 5.5706787109375e-05,
      "model_forward_time": 0.11803364753723145,
      "step": 9127
    },
    {
      "epoch": 5.5706787109375e-05,
      "step": 9127,
      "training_step_time": 0.5374982357025146
    },
    {
      "epoch": 5.5712890625e-05,
      "model_forward_time": 0.11657500267028809,
      "step": 9128
    },
    {
      "epoch": 5.5712890625e-05,
      "step": 9128,
      "training_step_time": 0.41434717178344727
    },
    {
      "epoch": 5.5718994140625e-05,
      "model_forward_time": 0.11710715293884277,
      "step": 9129
    },
    {
      "epoch": 5.5718994140625e-05,
      "step": 9129,
      "training_step_time": 0.4206709861755371
    },
    {
      "epoch": 5.572509765625e-05,
      "grad_norm": 0.2253658026456833,
      "learning_rate": 9.717332501525729e-05,
      "loss": 0.0543,
      "step": 9130
    },
    {
      "epoch": 5.572509765625e-05,
      "model_forward_time": 0.1163640022277832,
      "step": 9130
    },
    {
      "epoch": 5.572509765625e-05,
      "step": 9130,
      "training_step_time": 0.3943016529083252
    },
    {
      "epoch": 5.5731201171875e-05,
      "model_forward_time": 0.1173403263092041,
      "step": 9131
    },
    {
      "epoch": 5.5731201171875e-05,
      "step": 9131,
      "training_step_time": 0.38634681701660156
    },
    {
      "epoch": 5.57373046875e-05,
      "model_forward_time": 0.11840033531188965,
      "step": 9132
    },
    {
      "epoch": 5.57373046875e-05,
      "step": 9132,
      "training_step_time": 0.3788113594055176
    },
    {
      "epoch": 5.5743408203125e-05,
      "model_forward_time": 0.11653804779052734,
      "step": 9133
    },
    {
      "epoch": 5.5743408203125e-05,
      "step": 9133,
      "training_step_time": 0.4420902729034424
    },
    {
      "epoch": 5.574951171875e-05,
      "model_forward_time": 0.11671757698059082,
      "step": 9134
    },
    {
      "epoch": 5.574951171875e-05,
      "step": 9134,
      "training_step_time": 0.40648722648620605
    },
    {
      "epoch": 5.5755615234375e-05,
      "model_forward_time": 0.11780476570129395,
      "step": 9135
    },
    {
      "epoch": 5.5755615234375e-05,
      "step": 9135,
      "training_step_time": 0.3913753032684326
    },
    {
      "epoch": 5.576171875e-05,
      "model_forward_time": 0.11833739280700684,
      "step": 9136
    },
    {
      "epoch": 5.576171875e-05,
      "step": 9136,
      "training_step_time": 0.3878302574157715
    },
    {
      "epoch": 5.5767822265625e-05,
      "model_forward_time": 0.1181795597076416,
      "step": 9137
    },
    {
      "epoch": 5.5767822265625e-05,
      "step": 9137,
      "training_step_time": 0.420180082321167
    },
    {
      "epoch": 5.577392578125e-05,
      "model_forward_time": 0.11700630187988281,
      "step": 9138
    },
    {
      "epoch": 5.577392578125e-05,
      "step": 9138,
      "training_step_time": 0.4191169738769531
    },
    {
      "epoch": 5.5780029296875e-05,
      "model_forward_time": 0.11920404434204102,
      "step": 9139
    },
    {
      "epoch": 5.5780029296875e-05,
      "step": 9139,
      "training_step_time": 0.5780863761901855
    },
    {
      "epoch": 5.57861328125e-05,
      "grad_norm": 0.1183713749051094,
      "learning_rate": 9.716418331482458e-05,
      "loss": 0.0641,
      "step": 9140
    },
    {
      "epoch": 5.57861328125e-05,
      "model_forward_time": 0.11691832542419434,
      "step": 9140
    },
    {
      "epoch": 5.57861328125e-05,
      "step": 9140,
      "training_step_time": 0.43057775497436523
    },
    {
      "epoch": 5.5792236328125e-05,
      "model_forward_time": 0.11853933334350586,
      "step": 9141
    },
    {
      "epoch": 5.5792236328125e-05,
      "step": 9141,
      "training_step_time": 0.43258047103881836
    },
    {
      "epoch": 5.579833984375e-05,
      "model_forward_time": 0.11648392677307129,
      "step": 9142
    },
    {
      "epoch": 5.579833984375e-05,
      "step": 9142,
      "training_step_time": 0.43353724479675293
    },
    {
      "epoch": 5.5804443359375e-05,
      "model_forward_time": 0.11677789688110352,
      "step": 9143
    },
    {
      "epoch": 5.5804443359375e-05,
      "step": 9143,
      "training_step_time": 0.48446011543273926
    },
    {
      "epoch": 5.5810546875e-05,
      "model_forward_time": 0.11620378494262695,
      "step": 9144
    },
    {
      "epoch": 5.5810546875e-05,
      "step": 9144,
      "training_step_time": 0.46880149841308594
    },
    {
      "epoch": 5.5816650390625e-05,
      "model_forward_time": 0.11861729621887207,
      "step": 9145
    },
    {
      "epoch": 5.5816650390625e-05,
      "step": 9145,
      "training_step_time": 0.38997745513916016
    },
    {
      "epoch": 5.582275390625e-05,
      "model_forward_time": 0.1172637939453125,
      "step": 9146
    },
    {
      "epoch": 5.582275390625e-05,
      "step": 9146,
      "training_step_time": 0.38466858863830566
    },
    {
      "epoch": 5.5828857421875e-05,
      "model_forward_time": 0.11725640296936035,
      "step": 9147
    },
    {
      "epoch": 5.5828857421875e-05,
      "step": 9147,
      "training_step_time": 0.38700222969055176
    },
    {
      "epoch": 5.58349609375e-05,
      "model_forward_time": 0.11618781089782715,
      "step": 9148
    },
    {
      "epoch": 5.58349609375e-05,
      "step": 9148,
      "training_step_time": 0.38906383514404297
    },
    {
      "epoch": 5.5841064453125e-05,
      "model_forward_time": 0.1165473461151123,
      "step": 9149
    },
    {
      "epoch": 5.5841064453125e-05,
      "step": 9149,
      "training_step_time": 0.3881542682647705
    },
    {
      "epoch": 5.584716796875e-05,
      "grad_norm": 0.16840794682502747,
      "learning_rate": 9.715502728715826e-05,
      "loss": 0.0652,
      "step": 9150
    },
    {
      "epoch": 5.584716796875e-05,
      "model_forward_time": 0.11841297149658203,
      "step": 9150
    },
    {
      "epoch": 5.584716796875e-05,
      "step": 9150,
      "training_step_time": 0.3786170482635498
    },
    {
      "epoch": 5.5853271484375e-05,
      "model_forward_time": 0.11831116676330566,
      "step": 9151
    },
    {
      "epoch": 5.5853271484375e-05,
      "step": 9151,
      "training_step_time": 0.461275577545166
    },
    {
      "epoch": 5.5859375e-05,
      "model_forward_time": 0.11739134788513184,
      "step": 9152
    },
    {
      "epoch": 5.5859375e-05,
      "step": 9152,
      "training_step_time": 0.4318687915802002
    },
    {
      "epoch": 5.5865478515625e-05,
      "model_forward_time": 0.11757159233093262,
      "step": 9153
    },
    {
      "epoch": 5.5865478515625e-05,
      "step": 9153,
      "training_step_time": 0.45480966567993164
    },
    {
      "epoch": 5.587158203125e-05,
      "model_forward_time": 0.11689162254333496,
      "step": 9154
    },
    {
      "epoch": 5.587158203125e-05,
      "step": 9154,
      "training_step_time": 0.44402599334716797
    },
    {
      "epoch": 5.5877685546875e-05,
      "model_forward_time": 0.11572575569152832,
      "step": 9155
    },
    {
      "epoch": 5.5877685546875e-05,
      "step": 9155,
      "training_step_time": 0.3786039352416992
    },
    {
      "epoch": 5.58837890625e-05,
      "model_forward_time": 0.1162405014038086,
      "step": 9156
    },
    {
      "epoch": 5.58837890625e-05,
      "step": 9156,
      "training_step_time": 0.3753974437713623
    },
    {
      "epoch": 5.5889892578125e-05,
      "model_forward_time": 0.1165013313293457,
      "step": 9157
    },
    {
      "epoch": 5.5889892578125e-05,
      "step": 9157,
      "training_step_time": 0.4206202030181885
    },
    {
      "epoch": 5.589599609375e-05,
      "model_forward_time": 0.11791586875915527,
      "step": 9158
    },
    {
      "epoch": 5.589599609375e-05,
      "step": 9158,
      "training_step_time": 0.46973085403442383
    },
    {
      "epoch": 5.5902099609375e-05,
      "model_forward_time": 0.11654973030090332,
      "step": 9159
    },
    {
      "epoch": 5.5902099609375e-05,
      "step": 9159,
      "training_step_time": 0.3865644931793213
    },
    {
      "epoch": 5.5908203125e-05,
      "grad_norm": 0.2006361186504364,
      "learning_rate": 9.714585693503974e-05,
      "loss": 0.062,
      "step": 9160
    },
    {
      "epoch": 5.5908203125e-05,
      "model_forward_time": 0.11758971214294434,
      "step": 9160
    },
    {
      "epoch": 5.5908203125e-05,
      "step": 9160,
      "training_step_time": 0.37917137145996094
    },
    {
      "epoch": 5.5914306640625e-05,
      "model_forward_time": 0.11700606346130371,
      "step": 9161
    },
    {
      "epoch": 5.5914306640625e-05,
      "step": 9161,
      "training_step_time": 0.37450599670410156
    },
    {
      "epoch": 5.592041015625e-05,
      "model_forward_time": 0.1168053150177002,
      "step": 9162
    },
    {
      "epoch": 5.592041015625e-05,
      "step": 9162,
      "training_step_time": 0.372882604598999
    },
    {
      "epoch": 5.5926513671875e-05,
      "model_forward_time": 0.11674833297729492,
      "step": 9163
    },
    {
      "epoch": 5.5926513671875e-05,
      "step": 9163,
      "training_step_time": 0.4012277126312256
    },
    {
      "epoch": 5.59326171875e-05,
      "model_forward_time": 0.11595869064331055,
      "step": 9164
    },
    {
      "epoch": 5.59326171875e-05,
      "step": 9164,
      "training_step_time": 0.3784308433532715
    },
    {
      "epoch": 5.5938720703125e-05,
      "model_forward_time": 0.11793017387390137,
      "step": 9165
    },
    {
      "epoch": 5.5938720703125e-05,
      "step": 9165,
      "training_step_time": 0.3780860900878906
    },
    {
      "epoch": 5.594482421875e-05,
      "model_forward_time": 0.1165170669555664,
      "step": 9166
    },
    {
      "epoch": 5.594482421875e-05,
      "step": 9166,
      "training_step_time": 0.413118839263916
    },
    {
      "epoch": 5.5950927734375e-05,
      "model_forward_time": 0.1170198917388916,
      "step": 9167
    },
    {
      "epoch": 5.5950927734375e-05,
      "step": 9167,
      "training_step_time": 0.4168128967285156
    },
    {
      "epoch": 5.595703125e-05,
      "model_forward_time": 0.1181480884552002,
      "step": 9168
    },
    {
      "epoch": 5.595703125e-05,
      "step": 9168,
      "training_step_time": 0.47628092765808105
    },
    {
      "epoch": 5.5963134765625e-05,
      "model_forward_time": 0.11727023124694824,
      "step": 9169
    },
    {
      "epoch": 5.5963134765625e-05,
      "step": 9169,
      "training_step_time": 0.37830424308776855
    },
    {
      "epoch": 5.596923828125e-05,
      "grad_norm": 0.2304399013519287,
      "learning_rate": 9.713667226125467e-05,
      "loss": 0.066,
      "step": 9170
    },
    {
      "epoch": 5.596923828125e-05,
      "model_forward_time": 0.11725115776062012,
      "step": 9170
    },
    {
      "epoch": 5.596923828125e-05,
      "step": 9170,
      "training_step_time": 0.41539621353149414
    },
    {
      "epoch": 5.5975341796875e-05,
      "model_forward_time": 0.11634159088134766,
      "step": 9171
    },
    {
      "epoch": 5.5975341796875e-05,
      "step": 9171,
      "training_step_time": 0.3849763870239258
    },
    {
      "epoch": 5.59814453125e-05,
      "model_forward_time": 0.11782622337341309,
      "step": 9172
    },
    {
      "epoch": 5.59814453125e-05,
      "step": 9172,
      "training_step_time": 0.41546058654785156
    },
    {
      "epoch": 5.5987548828125e-05,
      "model_forward_time": 0.1171870231628418,
      "step": 9173
    },
    {
      "epoch": 5.5987548828125e-05,
      "step": 9173,
      "training_step_time": 0.47399306297302246
    },
    {
      "epoch": 5.599365234375e-05,
      "model_forward_time": 0.11733222007751465,
      "step": 9174
    },
    {
      "epoch": 5.599365234375e-05,
      "step": 9174,
      "training_step_time": 0.3744821548461914
    },
    {
      "epoch": 5.5999755859375e-05,
      "model_forward_time": 0.11837482452392578,
      "step": 9175
    },
    {
      "epoch": 5.5999755859375e-05,
      "step": 9175,
      "training_step_time": 0.3787229061126709
    },
    {
      "epoch": 5.6005859375e-05,
      "model_forward_time": 0.11678099632263184,
      "step": 9176
    },
    {
      "epoch": 5.6005859375e-05,
      "step": 9176,
      "training_step_time": 0.3741159439086914
    },
    {
      "epoch": 5.6011962890625e-05,
      "model_forward_time": 0.11820745468139648,
      "step": 9177
    },
    {
      "epoch": 5.6011962890625e-05,
      "step": 9177,
      "training_step_time": 0.37554144859313965
    },
    {
      "epoch": 5.601806640625e-05,
      "model_forward_time": 0.11698436737060547,
      "step": 9178
    },
    {
      "epoch": 5.601806640625e-05,
      "step": 9178,
      "training_step_time": 0.38957834243774414
    },
    {
      "epoch": 5.6024169921875e-05,
      "model_forward_time": 0.11704730987548828,
      "step": 9179
    },
    {
      "epoch": 5.6024169921875e-05,
      "step": 9179,
      "training_step_time": 0.3850877285003662
    },
    {
      "epoch": 5.60302734375e-05,
      "grad_norm": 0.2741745114326477,
      "learning_rate": 9.712747326859315e-05,
      "loss": 0.0649,
      "step": 9180
    },
    {
      "epoch": 5.60302734375e-05,
      "model_forward_time": 0.11988043785095215,
      "step": 9180
    },
    {
      "epoch": 5.60302734375e-05,
      "step": 9180,
      "training_step_time": 0.5667686462402344
    },
    {
      "epoch": 5.6036376953125e-05,
      "model_forward_time": 0.11913371086120605,
      "step": 9181
    },
    {
      "epoch": 5.6036376953125e-05,
      "step": 9181,
      "training_step_time": 0.4548771381378174
    },
    {
      "epoch": 5.604248046875e-05,
      "model_forward_time": 0.11696147918701172,
      "step": 9182
    },
    {
      "epoch": 5.604248046875e-05,
      "step": 9182,
      "training_step_time": 0.40856003761291504
    },
    {
      "epoch": 5.6048583984375e-05,
      "model_forward_time": 0.11773324012756348,
      "step": 9183
    },
    {
      "epoch": 5.6048583984375e-05,
      "step": 9183,
      "training_step_time": 0.3846573829650879
    },
    {
      "epoch": 5.60546875e-05,
      "model_forward_time": 0.11651897430419922,
      "step": 9184
    },
    {
      "epoch": 5.60546875e-05,
      "step": 9184,
      "training_step_time": 0.378903865814209
    },
    {
      "epoch": 5.6060791015625e-05,
      "model_forward_time": 0.11719250679016113,
      "step": 9185
    },
    {
      "epoch": 5.6060791015625e-05,
      "step": 9185,
      "training_step_time": 0.45176243782043457
    },
    {
      "epoch": 5.606689453125e-05,
      "model_forward_time": 0.11665844917297363,
      "step": 9186
    },
    {
      "epoch": 5.606689453125e-05,
      "step": 9186,
      "training_step_time": 0.7379388809204102
    },
    {
      "epoch": 5.6072998046875e-05,
      "model_forward_time": 0.1164090633392334,
      "step": 9187
    },
    {
      "epoch": 5.6072998046875e-05,
      "step": 9187,
      "training_step_time": 0.5155460834503174
    },
    {
      "epoch": 5.60791015625e-05,
      "model_forward_time": 0.11638903617858887,
      "step": 9188
    },
    {
      "epoch": 5.60791015625e-05,
      "step": 9188,
      "training_step_time": 0.38268446922302246
    },
    {
      "epoch": 5.6085205078125e-05,
      "model_forward_time": 0.11644625663757324,
      "step": 9189
    },
    {
      "epoch": 5.6085205078125e-05,
      "step": 9189,
      "training_step_time": 0.38149404525756836
    },
    {
      "epoch": 5.609130859375e-05,
      "grad_norm": 0.22636695206165314,
      "learning_rate": 9.711825995984957e-05,
      "loss": 0.0583,
      "step": 9190
    },
    {
      "epoch": 5.609130859375e-05,
      "model_forward_time": 0.11675548553466797,
      "step": 9190
    },
    {
      "epoch": 5.609130859375e-05,
      "step": 9190,
      "training_step_time": 0.3747267723083496
    },
    {
      "epoch": 5.6097412109375e-05,
      "model_forward_time": 0.11596369743347168,
      "step": 9191
    },
    {
      "epoch": 5.6097412109375e-05,
      "step": 9191,
      "training_step_time": 0.37694692611694336
    },
    {
      "epoch": 5.6103515625e-05,
      "model_forward_time": 0.11750221252441406,
      "step": 9192
    },
    {
      "epoch": 5.6103515625e-05,
      "step": 9192,
      "training_step_time": 0.8245069980621338
    },
    {
      "epoch": 5.6109619140625e-05,
      "model_forward_time": 0.11606121063232422,
      "step": 9193
    },
    {
      "epoch": 5.6109619140625e-05,
      "step": 9193,
      "training_step_time": 0.4482114315032959
    },
    {
      "epoch": 5.611572265625e-05,
      "model_forward_time": 0.11565709114074707,
      "step": 9194
    },
    {
      "epoch": 5.611572265625e-05,
      "step": 9194,
      "training_step_time": 0.4085564613342285
    },
    {
      "epoch": 5.6121826171875e-05,
      "model_forward_time": 0.11601424217224121,
      "step": 9195
    },
    {
      "epoch": 5.6121826171875e-05,
      "step": 9195,
      "training_step_time": 0.37238097190856934
    },
    {
      "epoch": 5.61279296875e-05,
      "model_forward_time": 0.11597633361816406,
      "step": 9196
    },
    {
      "epoch": 5.61279296875e-05,
      "step": 9196,
      "training_step_time": 0.3750271797180176
    },
    {
      "epoch": 5.6134033203125e-05,
      "model_forward_time": 0.1156916618347168,
      "step": 9197
    },
    {
      "epoch": 5.6134033203125e-05,
      "step": 9197,
      "training_step_time": 0.39440393447875977
    },
    {
      "epoch": 5.614013671875e-05,
      "model_forward_time": 0.11775422096252441,
      "step": 9198
    },
    {
      "epoch": 5.614013671875e-05,
      "step": 9198,
      "training_step_time": 0.7193527221679688
    },
    {
      "epoch": 5.6146240234375e-05,
      "model_forward_time": 0.11648726463317871,
      "step": 9199
    },
    {
      "epoch": 5.6146240234375e-05,
      "step": 9199,
      "training_step_time": 0.46955418586730957
    },
    {
      "epoch": 5.615234375e-05,
      "grad_norm": 0.22279846668243408,
      "learning_rate": 9.710903233782272e-05,
      "loss": 0.0639,
      "step": 9200
    },
    {
      "epoch": 5.615234375e-05,
      "model_forward_time": 0.11635422706604004,
      "step": 9200
    },
    {
      "epoch": 5.615234375e-05,
      "step": 9200,
      "training_step_time": 0.5147812366485596
    },
    {
      "epoch": 5.6158447265625e-05,
      "model_forward_time": 0.11611509323120117,
      "step": 9201
    },
    {
      "epoch": 5.6158447265625e-05,
      "step": 9201,
      "training_step_time": 0.3685588836669922
    },
    {
      "epoch": 5.616455078125e-05,
      "model_forward_time": 0.11647248268127441,
      "step": 9202
    },
    {
      "epoch": 5.616455078125e-05,
      "step": 9202,
      "training_step_time": 0.3685297966003418
    },
    {
      "epoch": 5.6170654296875e-05,
      "model_forward_time": 0.11639165878295898,
      "step": 9203
    },
    {
      "epoch": 5.6170654296875e-05,
      "step": 9203,
      "training_step_time": 0.3759770393371582
    },
    {
      "epoch": 5.61767578125e-05,
      "model_forward_time": 0.11724400520324707,
      "step": 9204
    },
    {
      "epoch": 5.61767578125e-05,
      "step": 9204,
      "training_step_time": 0.6713101863861084
    },
    {
      "epoch": 5.6182861328125e-05,
      "model_forward_time": 0.1166694164276123,
      "step": 9205
    },
    {
      "epoch": 5.6182861328125e-05,
      "step": 9205,
      "training_step_time": 0.4280993938446045
    },
    {
      "epoch": 5.618896484375e-05,
      "model_forward_time": 0.11788439750671387,
      "step": 9206
    },
    {
      "epoch": 5.618896484375e-05,
      "step": 9206,
      "training_step_time": 0.49358272552490234
    },
    {
      "epoch": 5.6195068359375e-05,
      "model_forward_time": 0.11607956886291504,
      "step": 9207
    },
    {
      "epoch": 5.6195068359375e-05,
      "step": 9207,
      "training_step_time": 0.4392545223236084
    },
    {
      "epoch": 5.6201171875e-05,
      "model_forward_time": 0.11586332321166992,
      "step": 9208
    },
    {
      "epoch": 5.6201171875e-05,
      "step": 9208,
      "training_step_time": 0.36725950241088867
    },
    {
      "epoch": 5.6207275390625e-05,
      "model_forward_time": 0.11571049690246582,
      "step": 9209
    },
    {
      "epoch": 5.6207275390625e-05,
      "step": 9209,
      "training_step_time": 0.37101101875305176
    },
    {
      "epoch": 5.621337890625e-05,
      "grad_norm": 0.1948869228363037,
      "learning_rate": 9.709979040531569e-05,
      "loss": 0.0626,
      "step": 9210
    },
    {
      "epoch": 5.621337890625e-05,
      "model_forward_time": 0.11669135093688965,
      "step": 9210
    },
    {
      "epoch": 5.621337890625e-05,
      "step": 9210,
      "training_step_time": 0.4042778015136719
    },
    {
      "epoch": 5.6219482421875e-05,
      "model_forward_time": 0.11641645431518555,
      "step": 9211
    },
    {
      "epoch": 5.6219482421875e-05,
      "step": 9211,
      "training_step_time": 0.43833208084106445
    },
    {
      "epoch": 5.62255859375e-05,
      "model_forward_time": 0.11786127090454102,
      "step": 9212
    },
    {
      "epoch": 5.62255859375e-05,
      "step": 9212,
      "training_step_time": 0.45604658126831055
    },
    {
      "epoch": 5.6231689453125e-05,
      "model_forward_time": 0.11750054359436035,
      "step": 9213
    },
    {
      "epoch": 5.6231689453125e-05,
      "step": 9213,
      "training_step_time": 0.451080322265625
    },
    {
      "epoch": 5.623779296875e-05,
      "model_forward_time": 0.11716485023498535,
      "step": 9214
    },
    {
      "epoch": 5.623779296875e-05,
      "step": 9214,
      "training_step_time": 0.490070104598999
    },
    {
      "epoch": 5.6243896484375e-05,
      "model_forward_time": 0.11696147918701172,
      "step": 9215
    },
    {
      "epoch": 5.6243896484375e-05,
      "step": 9215,
      "training_step_time": 0.38136982917785645
    },
    {
      "epoch": 5.625e-05,
      "model_forward_time": 0.1171560287475586,
      "step": 9216
    },
    {
      "epoch": 5.625e-05,
      "step": 9216,
      "training_step_time": 0.9190330505371094
    },
    {
      "epoch": 5.6256103515625e-05,
      "model_forward_time": 0.11664414405822754,
      "step": 9217
    },
    {
      "epoch": 5.6256103515625e-05,
      "step": 9217,
      "training_step_time": 0.3875908851623535
    },
    {
      "epoch": 5.626220703125e-05,
      "model_forward_time": 0.11883234977722168,
      "step": 9218
    },
    {
      "epoch": 5.626220703125e-05,
      "step": 9218,
      "training_step_time": 0.39307689666748047
    },
    {
      "epoch": 5.6268310546875e-05,
      "model_forward_time": 0.11546063423156738,
      "step": 9219
    },
    {
      "epoch": 5.6268310546875e-05,
      "step": 9219,
      "training_step_time": 0.4790327548980713
    },
    {
      "epoch": 5.62744140625e-05,
      "grad_norm": 0.25807178020477295,
      "learning_rate": 9.709053416513592e-05,
      "loss": 0.0673,
      "step": 9220
    },
    {
      "epoch": 5.62744140625e-05,
      "model_forward_time": 0.11553573608398438,
      "step": 9220
    },
    {
      "epoch": 5.62744140625e-05,
      "step": 9220,
      "training_step_time": 0.39133691787719727
    },
    {
      "epoch": 5.6280517578125e-05,
      "model_forward_time": 0.11594367027282715,
      "step": 9221
    },
    {
      "epoch": 5.6280517578125e-05,
      "step": 9221,
      "training_step_time": 0.3830831050872803
    },
    {
      "epoch": 5.628662109375e-05,
      "model_forward_time": 0.11627054214477539,
      "step": 9222
    },
    {
      "epoch": 5.628662109375e-05,
      "step": 9222,
      "training_step_time": 0.47888708114624023
    },
    {
      "epoch": 5.6292724609375e-05,
      "model_forward_time": 0.11659574508666992,
      "step": 9223
    },
    {
      "epoch": 5.6292724609375e-05,
      "step": 9223,
      "training_step_time": 0.41681408882141113
    },
    {
      "epoch": 5.6298828125e-05,
      "model_forward_time": 0.11655426025390625,
      "step": 9224
    },
    {
      "epoch": 5.6298828125e-05,
      "step": 9224,
      "training_step_time": 0.39348649978637695
    },
    {
      "epoch": 5.6304931640625e-05,
      "model_forward_time": 0.11621880531311035,
      "step": 9225
    },
    {
      "epoch": 5.6304931640625e-05,
      "step": 9225,
      "training_step_time": 0.3748588562011719
    },
    {
      "epoch": 5.631103515625e-05,
      "model_forward_time": 0.11726140975952148,
      "step": 9226
    },
    {
      "epoch": 5.631103515625e-05,
      "step": 9226,
      "training_step_time": 0.48515844345092773
    },
    {
      "epoch": 5.6317138671875e-05,
      "model_forward_time": 0.11667943000793457,
      "step": 9227
    },
    {
      "epoch": 5.6317138671875e-05,
      "step": 9227,
      "training_step_time": 0.43263769149780273
    },
    {
      "epoch": 5.63232421875e-05,
      "model_forward_time": 0.1167140007019043,
      "step": 9228
    },
    {
      "epoch": 5.63232421875e-05,
      "step": 9228,
      "training_step_time": 1.027836799621582
    },
    {
      "epoch": 5.6329345703125e-05,
      "model_forward_time": 0.11592316627502441,
      "step": 9229
    },
    {
      "epoch": 5.6329345703125e-05,
      "step": 9229,
      "training_step_time": 0.37050366401672363
    },
    {
      "epoch": 5.633544921875e-05,
      "grad_norm": 0.24605225026607513,
      "learning_rate": 9.708126362009522e-05,
      "loss": 0.0639,
      "step": 9230
    },
    {
      "epoch": 5.633544921875e-05,
      "model_forward_time": 0.11600017547607422,
      "step": 9230
    },
    {
      "epoch": 5.633544921875e-05,
      "step": 9230,
      "training_step_time": 0.40111303329467773
    },
    {
      "epoch": 5.6341552734375e-05,
      "model_forward_time": 0.1153879165649414,
      "step": 9231
    },
    {
      "epoch": 5.6341552734375e-05,
      "step": 9231,
      "training_step_time": 0.4222707748413086
    },
    {
      "epoch": 5.634765625e-05,
      "model_forward_time": 0.11635661125183105,
      "step": 9232
    },
    {
      "epoch": 5.634765625e-05,
      "step": 9232,
      "training_step_time": 0.42624664306640625
    },
    {
      "epoch": 5.6353759765625e-05,
      "model_forward_time": 0.11612772941589355,
      "step": 9233
    },
    {
      "epoch": 5.6353759765625e-05,
      "step": 9233,
      "training_step_time": 0.42735767364501953
    },
    {
      "epoch": 5.635986328125e-05,
      "model_forward_time": 0.11849141120910645,
      "step": 9234
    },
    {
      "epoch": 5.635986328125e-05,
      "step": 9234,
      "training_step_time": 0.3898000717163086
    },
    {
      "epoch": 5.6365966796875e-05,
      "model_forward_time": 0.1160881519317627,
      "step": 9235
    },
    {
      "epoch": 5.6365966796875e-05,
      "step": 9235,
      "training_step_time": 0.38739848136901855
    },
    {
      "epoch": 5.63720703125e-05,
      "model_forward_time": 0.11635136604309082,
      "step": 9236
    },
    {
      "epoch": 5.63720703125e-05,
      "step": 9236,
      "training_step_time": 0.378049373626709
    },
    {
      "epoch": 5.6378173828125e-05,
      "model_forward_time": 0.11699485778808594,
      "step": 9237
    },
    {
      "epoch": 5.6378173828125e-05,
      "step": 9237,
      "training_step_time": 0.4355144500732422
    },
    {
      "epoch": 5.638427734375e-05,
      "model_forward_time": 0.11606407165527344,
      "step": 9238
    },
    {
      "epoch": 5.638427734375e-05,
      "step": 9238,
      "training_step_time": 0.37447690963745117
    },
    {
      "epoch": 5.6390380859375e-05,
      "model_forward_time": 0.11690425872802734,
      "step": 9239
    },
    {
      "epoch": 5.6390380859375e-05,
      "step": 9239,
      "training_step_time": 0.4792172908782959
    },
    {
      "epoch": 5.6396484375e-05,
      "grad_norm": 0.21946537494659424,
      "learning_rate": 9.707197877300974e-05,
      "loss": 0.0619,
      "step": 9240
    },
    {
      "epoch": 5.6396484375e-05,
      "model_forward_time": 0.11670780181884766,
      "step": 9240
    },
    {
      "epoch": 5.6396484375e-05,
      "step": 9240,
      "training_step_time": 0.9880189895629883
    },
    {
      "epoch": 5.6402587890625e-05,
      "model_forward_time": 0.1155557632446289,
      "step": 9241
    },
    {
      "epoch": 5.6402587890625e-05,
      "step": 9241,
      "training_step_time": 0.38406848907470703
    },
    {
      "epoch": 5.640869140625e-05,
      "model_forward_time": 0.1160423755645752,
      "step": 9242
    },
    {
      "epoch": 5.640869140625e-05,
      "step": 9242,
      "training_step_time": 0.41820240020751953
    },
    {
      "epoch": 5.6414794921875e-05,
      "model_forward_time": 0.1159052848815918,
      "step": 9243
    },
    {
      "epoch": 5.6414794921875e-05,
      "step": 9243,
      "training_step_time": 0.40910935401916504
    },
    {
      "epoch": 5.64208984375e-05,
      "model_forward_time": 0.11653327941894531,
      "step": 9244
    },
    {
      "epoch": 5.64208984375e-05,
      "step": 9244,
      "training_step_time": 0.3869903087615967
    },
    {
      "epoch": 5.6427001953125e-05,
      "model_forward_time": 0.11721444129943848,
      "step": 9245
    },
    {
      "epoch": 5.6427001953125e-05,
      "step": 9245,
      "training_step_time": 0.426375150680542
    },
    {
      "epoch": 5.643310546875e-05,
      "model_forward_time": 0.11620593070983887,
      "step": 9246
    },
    {
      "epoch": 5.643310546875e-05,
      "step": 9246,
      "training_step_time": 0.9639267921447754
    },
    {
      "epoch": 5.6439208984375e-05,
      "model_forward_time": 0.11664438247680664,
      "step": 9247
    },
    {
      "epoch": 5.6439208984375e-05,
      "step": 9247,
      "training_step_time": 0.3833153247833252
    },
    {
      "epoch": 5.64453125e-05,
      "model_forward_time": 0.11544561386108398,
      "step": 9248
    },
    {
      "epoch": 5.64453125e-05,
      "step": 9248,
      "training_step_time": 0.37225842475891113
    },
    {
      "epoch": 5.6451416015625e-05,
      "model_forward_time": 0.11574411392211914,
      "step": 9249
    },
    {
      "epoch": 5.6451416015625e-05,
      "step": 9249,
      "training_step_time": 0.3669257164001465
    },
    {
      "epoch": 5.645751953125e-05,
      "grad_norm": 0.2450469583272934,
      "learning_rate": 9.706267962669998e-05,
      "loss": 0.0645,
      "step": 9250
    },
    {
      "epoch": 5.645751953125e-05,
      "model_forward_time": 0.11623787879943848,
      "step": 9250
    },
    {
      "epoch": 5.645751953125e-05,
      "step": 9250,
      "training_step_time": 0.43900251388549805
    },
    {
      "epoch": 5.6463623046875e-05,
      "model_forward_time": 0.11598706245422363,
      "step": 9251
    },
    {
      "epoch": 5.6463623046875e-05,
      "step": 9251,
      "training_step_time": 0.4078714847564697
    },
    {
      "epoch": 5.64697265625e-05,
      "model_forward_time": 0.11799454689025879,
      "step": 9252
    },
    {
      "epoch": 5.64697265625e-05,
      "step": 9252,
      "training_step_time": 0.6158316135406494
    },
    {
      "epoch": 5.6475830078125e-05,
      "model_forward_time": 0.11626029014587402,
      "step": 9253
    },
    {
      "epoch": 5.6475830078125e-05,
      "step": 9253,
      "training_step_time": 0.38720154762268066
    },
    {
      "epoch": 5.648193359375e-05,
      "model_forward_time": 0.11608576774597168,
      "step": 9254
    },
    {
      "epoch": 5.648193359375e-05,
      "step": 9254,
      "training_step_time": 0.43129873275756836
    },
    {
      "epoch": 5.6488037109375e-05,
      "model_forward_time": 0.11626267433166504,
      "step": 9255
    },
    {
      "epoch": 5.6488037109375e-05,
      "step": 9255,
      "training_step_time": 0.42059779167175293
    },
    {
      "epoch": 5.6494140625e-05,
      "model_forward_time": 0.11718583106994629,
      "step": 9256
    },
    {
      "epoch": 5.6494140625e-05,
      "step": 9256,
      "training_step_time": 0.374908447265625
    },
    {
      "epoch": 5.6500244140625e-05,
      "model_forward_time": 0.1164710521697998,
      "step": 9257
    },
    {
      "epoch": 5.6500244140625e-05,
      "step": 9257,
      "training_step_time": 0.41857337951660156
    },
    {
      "epoch": 5.650634765625e-05,
      "model_forward_time": 0.11905145645141602,
      "step": 9258
    },
    {
      "epoch": 5.650634765625e-05,
      "step": 9258,
      "training_step_time": 0.6131167411804199
    },
    {
      "epoch": 5.6512451171875e-05,
      "model_forward_time": 0.11622500419616699,
      "step": 9259
    },
    {
      "epoch": 5.6512451171875e-05,
      "step": 9259,
      "training_step_time": 0.46355342864990234
    },
    {
      "epoch": 5.65185546875e-05,
      "grad_norm": 0.17054559290409088,
      "learning_rate": 9.705336618399077e-05,
      "loss": 0.0624,
      "step": 9260
    },
    {
      "epoch": 5.65185546875e-05,
      "model_forward_time": 0.11631298065185547,
      "step": 9260
    },
    {
      "epoch": 5.65185546875e-05,
      "step": 9260,
      "training_step_time": 0.3849663734436035
    },
    {
      "epoch": 5.6524658203125e-05,
      "model_forward_time": 0.11767816543579102,
      "step": 9261
    },
    {
      "epoch": 5.6524658203125e-05,
      "step": 9261,
      "training_step_time": 0.4007291793823242
    },
    {
      "epoch": 5.653076171875e-05,
      "model_forward_time": 0.11625003814697266,
      "step": 9262
    },
    {
      "epoch": 5.653076171875e-05,
      "step": 9262,
      "training_step_time": 0.38201475143432617
    },
    {
      "epoch": 5.6536865234375e-05,
      "model_forward_time": 0.11681747436523438,
      "step": 9263
    },
    {
      "epoch": 5.6536865234375e-05,
      "step": 9263,
      "training_step_time": 0.38195300102233887
    },
    {
      "epoch": 5.654296875e-05,
      "model_forward_time": 0.11663150787353516,
      "step": 9264
    },
    {
      "epoch": 5.654296875e-05,
      "step": 9264,
      "training_step_time": 0.5279781818389893
    },
    {
      "epoch": 5.6549072265625e-05,
      "model_forward_time": 0.1161353588104248,
      "step": 9265
    },
    {
      "epoch": 5.6549072265625e-05,
      "step": 9265,
      "training_step_time": 0.45793771743774414
    },
    {
      "epoch": 5.655517578125e-05,
      "model_forward_time": 0.11841249465942383,
      "step": 9266
    },
    {
      "epoch": 5.655517578125e-05,
      "step": 9266,
      "training_step_time": 0.5235626697540283
    },
    {
      "epoch": 5.6561279296875e-05,
      "model_forward_time": 0.11616873741149902,
      "step": 9267
    },
    {
      "epoch": 5.6561279296875e-05,
      "step": 9267,
      "training_step_time": 0.3737318515777588
    },
    {
      "epoch": 5.65673828125e-05,
      "model_forward_time": 0.1180262565612793,
      "step": 9268
    },
    {
      "epoch": 5.65673828125e-05,
      "step": 9268,
      "training_step_time": 0.38510918617248535
    },
    {
      "epoch": 5.6573486328125e-05,
      "model_forward_time": 0.11670804023742676,
      "step": 9269
    },
    {
      "epoch": 5.6573486328125e-05,
      "step": 9269,
      "training_step_time": 0.3847620487213135
    },
    {
      "epoch": 5.657958984375e-05,
      "grad_norm": 0.18135596811771393,
      "learning_rate": 9.704403844771128e-05,
      "loss": 0.0623,
      "step": 9270
    },
    {
      "epoch": 5.657958984375e-05,
      "model_forward_time": 0.11757850646972656,
      "step": 9270
    },
    {
      "epoch": 5.657958984375e-05,
      "step": 9270,
      "training_step_time": 0.37682056427001953
    },
    {
      "epoch": 5.6585693359375e-05,
      "model_forward_time": 0.11612725257873535,
      "step": 9271
    },
    {
      "epoch": 5.6585693359375e-05,
      "step": 9271,
      "training_step_time": 0.40100669860839844
    },
    {
      "epoch": 5.6591796875e-05,
      "model_forward_time": 0.11766886711120605,
      "step": 9272
    },
    {
      "epoch": 5.6591796875e-05,
      "step": 9272,
      "training_step_time": 0.4060485363006592
    },
    {
      "epoch": 5.6597900390625e-05,
      "model_forward_time": 0.11934828758239746,
      "step": 9273
    },
    {
      "epoch": 5.6597900390625e-05,
      "step": 9273,
      "training_step_time": 0.47324132919311523
    },
    {
      "epoch": 5.660400390625e-05,
      "model_forward_time": 0.1172330379486084,
      "step": 9274
    },
    {
      "epoch": 5.660400390625e-05,
      "step": 9274,
      "training_step_time": 0.3770942687988281
    },
    {
      "epoch": 5.6610107421875e-05,
      "model_forward_time": 0.11560916900634766,
      "step": 9275
    },
    {
      "epoch": 5.6610107421875e-05,
      "step": 9275,
      "training_step_time": 0.38001203536987305
    },
    {
      "epoch": 5.66162109375e-05,
      "model_forward_time": 0.11747932434082031,
      "step": 9276
    },
    {
      "epoch": 5.66162109375e-05,
      "step": 9276,
      "training_step_time": 0.44023966789245605
    },
    {
      "epoch": 5.6622314453125e-05,
      "model_forward_time": 0.11637473106384277,
      "step": 9277
    },
    {
      "epoch": 5.6622314453125e-05,
      "step": 9277,
      "training_step_time": 0.3860588073730469
    },
    {
      "epoch": 5.662841796875e-05,
      "model_forward_time": 0.1165304183959961,
      "step": 9278
    },
    {
      "epoch": 5.662841796875e-05,
      "step": 9278,
      "training_step_time": 0.3752889633178711
    },
    {
      "epoch": 5.6634521484375e-05,
      "model_forward_time": 0.11949992179870605,
      "step": 9279
    },
    {
      "epoch": 5.6634521484375e-05,
      "step": 9279,
      "training_step_time": 0.4599180221557617
    },
    {
      "epoch": 5.6640625e-05,
      "grad_norm": 0.17084462940692902,
      "learning_rate": 9.703469642069503e-05,
      "loss": 0.0557,
      "step": 9280
    },
    {
      "epoch": 5.6640625e-05,
      "model_forward_time": 0.11722493171691895,
      "step": 9280
    },
    {
      "epoch": 5.6640625e-05,
      "step": 9280,
      "training_step_time": 0.40051841735839844
    },
    {
      "epoch": 5.6646728515625e-05,
      "model_forward_time": 0.11732172966003418,
      "step": 9281
    },
    {
      "epoch": 5.6646728515625e-05,
      "step": 9281,
      "training_step_time": 0.401308536529541
    },
    {
      "epoch": 5.665283203125e-05,
      "model_forward_time": 0.11689376831054688,
      "step": 9282
    },
    {
      "epoch": 5.665283203125e-05,
      "step": 9282,
      "training_step_time": 0.4740331172943115
    },
    {
      "epoch": 5.6658935546875e-05,
      "model_forward_time": 0.11656999588012695,
      "step": 9283
    },
    {
      "epoch": 5.6658935546875e-05,
      "step": 9283,
      "training_step_time": 0.3866724967956543
    },
    {
      "epoch": 5.66650390625e-05,
      "model_forward_time": 0.11721658706665039,
      "step": 9284
    },
    {
      "epoch": 5.66650390625e-05,
      "step": 9284,
      "training_step_time": 0.38122105598449707
    },
    {
      "epoch": 5.6671142578125e-05,
      "model_forward_time": 0.11719346046447754,
      "step": 9285
    },
    {
      "epoch": 5.6671142578125e-05,
      "step": 9285,
      "training_step_time": 0.3839995861053467
    },
    {
      "epoch": 5.667724609375e-05,
      "model_forward_time": 0.11650538444519043,
      "step": 9286
    },
    {
      "epoch": 5.667724609375e-05,
      "step": 9286,
      "training_step_time": 0.4718289375305176
    },
    {
      "epoch": 5.6683349609375e-05,
      "model_forward_time": 0.11717557907104492,
      "step": 9287
    },
    {
      "epoch": 5.6683349609375e-05,
      "step": 9287,
      "training_step_time": 0.40508127212524414
    },
    {
      "epoch": 5.6689453125e-05,
      "model_forward_time": 0.11690115928649902,
      "step": 9288
    },
    {
      "epoch": 5.6689453125e-05,
      "step": 9288,
      "training_step_time": 0.38196802139282227
    },
    {
      "epoch": 5.6695556640625e-05,
      "model_forward_time": 0.11688065528869629,
      "step": 9289
    },
    {
      "epoch": 5.6695556640625e-05,
      "step": 9289,
      "training_step_time": 0.380723237991333
    },
    {
      "epoch": 5.670166015625e-05,
      "grad_norm": 0.2368001490831375,
      "learning_rate": 9.702534010577991e-05,
      "loss": 0.0576,
      "step": 9290
    },
    {
      "epoch": 5.670166015625e-05,
      "model_forward_time": 0.11662840843200684,
      "step": 9290
    },
    {
      "epoch": 5.670166015625e-05,
      "step": 9290,
      "training_step_time": 0.38408613204956055
    },
    {
      "epoch": 5.6707763671875e-05,
      "model_forward_time": 0.11683225631713867,
      "step": 9291
    },
    {
      "epoch": 5.6707763671875e-05,
      "step": 9291,
      "training_step_time": 0.3757004737854004
    },
    {
      "epoch": 5.67138671875e-05,
      "model_forward_time": 0.1186823844909668,
      "step": 9292
    },
    {
      "epoch": 5.67138671875e-05,
      "step": 9292,
      "training_step_time": 0.4202234745025635
    },
    {
      "epoch": 5.6719970703125e-05,
      "model_forward_time": 0.11627864837646484,
      "step": 9293
    },
    {
      "epoch": 5.6719970703125e-05,
      "step": 9293,
      "training_step_time": 0.37102603912353516
    },
    {
      "epoch": 5.672607421875e-05,
      "model_forward_time": 0.1182851791381836,
      "step": 9294
    },
    {
      "epoch": 5.672607421875e-05,
      "step": 9294,
      "training_step_time": 0.8908064365386963
    },
    {
      "epoch": 5.6732177734375e-05,
      "model_forward_time": 0.11574602127075195,
      "step": 9295
    },
    {
      "epoch": 5.6732177734375e-05,
      "step": 9295,
      "training_step_time": 0.37732553482055664
    },
    {
      "epoch": 5.673828125e-05,
      "model_forward_time": 0.11646461486816406,
      "step": 9296
    },
    {
      "epoch": 5.673828125e-05,
      "step": 9296,
      "training_step_time": 0.3712160587310791
    },
    {
      "epoch": 5.6744384765625e-05,
      "model_forward_time": 0.11642622947692871,
      "step": 9297
    },
    {
      "epoch": 5.6744384765625e-05,
      "step": 9297,
      "training_step_time": 0.37368059158325195
    },
    {
      "epoch": 5.675048828125e-05,
      "model_forward_time": 0.11613154411315918,
      "step": 9298
    },
    {
      "epoch": 5.675048828125e-05,
      "step": 9298,
      "training_step_time": 0.3762474060058594
    },
    {
      "epoch": 5.6756591796875e-05,
      "model_forward_time": 0.11599898338317871,
      "step": 9299
    },
    {
      "epoch": 5.6756591796875e-05,
      "step": 9299,
      "training_step_time": 0.3802194595336914
    },
    {
      "epoch": 5.67626953125e-05,
      "grad_norm": 0.23998720943927765,
      "learning_rate": 9.701596950580806e-05,
      "loss": 0.0642,
      "step": 9300
    },
    {
      "epoch": 5.67626953125e-05,
      "model_forward_time": 0.11733627319335938,
      "step": 9300
    },
    {
      "epoch": 5.67626953125e-05,
      "step": 9300,
      "training_step_time": 1.0988645553588867
    },
    {
      "epoch": 5.6768798828125e-05,
      "model_forward_time": 0.1149146556854248,
      "step": 9301
    },
    {
      "epoch": 5.6768798828125e-05,
      "step": 9301,
      "training_step_time": 0.37497615814208984
    },
    {
      "epoch": 5.677490234375e-05,
      "model_forward_time": 0.1153724193572998,
      "step": 9302
    },
    {
      "epoch": 5.677490234375e-05,
      "step": 9302,
      "training_step_time": 0.38275623321533203
    },
    {
      "epoch": 5.6781005859375e-05,
      "model_forward_time": 0.11587691307067871,
      "step": 9303
    },
    {
      "epoch": 5.6781005859375e-05,
      "step": 9303,
      "training_step_time": 0.37193918228149414
    },
    {
      "epoch": 5.6787109375e-05,
      "model_forward_time": 0.11566042900085449,
      "step": 9304
    },
    {
      "epoch": 5.6787109375e-05,
      "step": 9304,
      "training_step_time": 0.38098859786987305
    },
    {
      "epoch": 5.6793212890625e-05,
      "model_forward_time": 0.11793303489685059,
      "step": 9305
    },
    {
      "epoch": 5.6793212890625e-05,
      "step": 9305,
      "training_step_time": 0.4403066635131836
    },
    {
      "epoch": 5.679931640625e-05,
      "model_forward_time": 0.11675333976745605,
      "step": 9306
    },
    {
      "epoch": 5.679931640625e-05,
      "step": 9306,
      "training_step_time": 0.9100539684295654
    },
    {
      "epoch": 5.6805419921875e-05,
      "model_forward_time": 0.11607837677001953,
      "step": 9307
    },
    {
      "epoch": 5.6805419921875e-05,
      "step": 9307,
      "training_step_time": 0.38023972511291504
    },
    {
      "epoch": 5.68115234375e-05,
      "model_forward_time": 0.1160733699798584,
      "step": 9308
    },
    {
      "epoch": 5.68115234375e-05,
      "step": 9308,
      "training_step_time": 0.401397705078125
    },
    {
      "epoch": 5.6817626953125e-05,
      "model_forward_time": 0.11619734764099121,
      "step": 9309
    },
    {
      "epoch": 5.6817626953125e-05,
      "step": 9309,
      "training_step_time": 0.38645172119140625
    },
    {
      "epoch": 5.682373046875e-05,
      "grad_norm": 0.22096264362335205,
      "learning_rate": 9.700658462362608e-05,
      "loss": 0.0611,
      "step": 9310
    },
    {
      "epoch": 5.682373046875e-05,
      "model_forward_time": 0.11558222770690918,
      "step": 9310
    },
    {
      "epoch": 5.682373046875e-05,
      "step": 9310,
      "training_step_time": 0.37931060791015625
    },
    {
      "epoch": 5.6829833984375e-05,
      "model_forward_time": 0.11593127250671387,
      "step": 9311
    },
    {
      "epoch": 5.6829833984375e-05,
      "step": 9311,
      "training_step_time": 0.42194271087646484
    },
    {
      "epoch": 5.68359375e-05,
      "model_forward_time": 0.11672329902648926,
      "step": 9312
    },
    {
      "epoch": 5.68359375e-05,
      "step": 9312,
      "training_step_time": 0.6083414554595947
    },
    {
      "epoch": 5.6842041015625e-05,
      "model_forward_time": 0.11666250228881836,
      "step": 9313
    },
    {
      "epoch": 5.6842041015625e-05,
      "step": 9313,
      "training_step_time": 0.399569034576416
    },
    {
      "epoch": 5.684814453125e-05,
      "model_forward_time": 0.11691761016845703,
      "step": 9314
    },
    {
      "epoch": 5.684814453125e-05,
      "step": 9314,
      "training_step_time": 0.3937339782714844
    },
    {
      "epoch": 5.6854248046875e-05,
      "model_forward_time": 0.12006425857543945,
      "step": 9315
    },
    {
      "epoch": 5.6854248046875e-05,
      "step": 9315,
      "training_step_time": 0.46221232414245605
    },
    {
      "epoch": 5.68603515625e-05,
      "model_forward_time": 0.11639881134033203,
      "step": 9316
    },
    {
      "epoch": 5.68603515625e-05,
      "step": 9316,
      "training_step_time": 0.38459229469299316
    },
    {
      "epoch": 5.6866455078125e-05,
      "model_forward_time": 0.11642861366271973,
      "step": 9317
    },
    {
      "epoch": 5.6866455078125e-05,
      "step": 9317,
      "training_step_time": 0.3805253505706787
    },
    {
      "epoch": 5.687255859375e-05,
      "model_forward_time": 0.11693120002746582,
      "step": 9318
    },
    {
      "epoch": 5.687255859375e-05,
      "step": 9318,
      "training_step_time": 0.8680276870727539
    },
    {
      "epoch": 5.6878662109375e-05,
      "model_forward_time": 0.1163015365600586,
      "step": 9319
    },
    {
      "epoch": 5.6878662109375e-05,
      "step": 9319,
      "training_step_time": 0.4770700931549072
    },
    {
      "epoch": 5.6884765625e-05,
      "grad_norm": 0.28013691306114197,
      "learning_rate": 9.699718546208484e-05,
      "loss": 0.0616,
      "step": 9320
    },
    {
      "epoch": 5.6884765625e-05,
      "model_forward_time": 0.11629462242126465,
      "step": 9320
    },
    {
      "epoch": 5.6884765625e-05,
      "step": 9320,
      "training_step_time": 0.41294384002685547
    },
    {
      "epoch": 5.6890869140625e-05,
      "model_forward_time": 0.11697673797607422,
      "step": 9321
    },
    {
      "epoch": 5.6890869140625e-05,
      "step": 9321,
      "training_step_time": 0.46711277961730957
    },
    {
      "epoch": 5.689697265625e-05,
      "model_forward_time": 0.1155390739440918,
      "step": 9322
    },
    {
      "epoch": 5.689697265625e-05,
      "step": 9322,
      "training_step_time": 0.3792452812194824
    },
    {
      "epoch": 5.6903076171875e-05,
      "model_forward_time": 0.11624288558959961,
      "step": 9323
    },
    {
      "epoch": 5.6903076171875e-05,
      "step": 9323,
      "training_step_time": 0.37068939208984375
    },
    {
      "epoch": 5.69091796875e-05,
      "model_forward_time": 0.11635494232177734,
      "step": 9324
    },
    {
      "epoch": 5.69091796875e-05,
      "step": 9324,
      "training_step_time": 0.49698615074157715
    },
    {
      "epoch": 5.6915283203125e-05,
      "model_forward_time": 0.11731433868408203,
      "step": 9325
    },
    {
      "epoch": 5.6915283203125e-05,
      "step": 9325,
      "training_step_time": 0.4164743423461914
    },
    {
      "epoch": 5.692138671875e-05,
      "model_forward_time": 0.11650943756103516,
      "step": 9326
    },
    {
      "epoch": 5.692138671875e-05,
      "step": 9326,
      "training_step_time": 0.3854219913482666
    },
    {
      "epoch": 5.6927490234375e-05,
      "model_forward_time": 0.11721563339233398,
      "step": 9327
    },
    {
      "epoch": 5.6927490234375e-05,
      "step": 9327,
      "training_step_time": 0.3931095600128174
    },
    {
      "epoch": 5.693359375e-05,
      "model_forward_time": 0.11725473403930664,
      "step": 9328
    },
    {
      "epoch": 5.693359375e-05,
      "step": 9328,
      "training_step_time": 0.39696383476257324
    },
    {
      "epoch": 5.6939697265625e-05,
      "model_forward_time": 0.1163172721862793,
      "step": 9329
    },
    {
      "epoch": 5.6939697265625e-05,
      "step": 9329,
      "training_step_time": 0.3767216205596924
    },
    {
      "epoch": 5.694580078125e-05,
      "grad_norm": 0.2712906301021576,
      "learning_rate": 9.698777202403953e-05,
      "loss": 0.0691,
      "step": 9330
    },
    {
      "epoch": 5.694580078125e-05,
      "model_forward_time": 0.11766290664672852,
      "step": 9330
    },
    {
      "epoch": 5.694580078125e-05,
      "step": 9330,
      "training_step_time": 1.1816048622131348
    },
    {
      "epoch": 5.6951904296875e-05,
      "model_forward_time": 0.11785459518432617,
      "step": 9331
    },
    {
      "epoch": 5.6951904296875e-05,
      "step": 9331,
      "training_step_time": 0.4398019313812256
    },
    {
      "epoch": 5.69580078125e-05,
      "model_forward_time": 0.11495494842529297,
      "step": 9332
    },
    {
      "epoch": 5.69580078125e-05,
      "step": 9332,
      "training_step_time": 0.3877253532409668
    },
    {
      "epoch": 5.6964111328125e-05,
      "model_forward_time": 0.11479568481445312,
      "step": 9333
    },
    {
      "epoch": 5.6964111328125e-05,
      "step": 9333,
      "training_step_time": 0.40495991706848145
    },
    {
      "epoch": 5.697021484375e-05,
      "model_forward_time": 0.11792135238647461,
      "step": 9334
    },
    {
      "epoch": 5.697021484375e-05,
      "step": 9334,
      "training_step_time": 0.49137449264526367
    },
    {
      "epoch": 5.6976318359375e-05,
      "model_forward_time": 0.11539578437805176,
      "step": 9335
    },
    {
      "epoch": 5.6976318359375e-05,
      "step": 9335,
      "training_step_time": 0.3731038570404053
    },
    {
      "epoch": 5.6982421875e-05,
      "model_forward_time": 0.11789608001708984,
      "step": 9336
    },
    {
      "epoch": 5.6982421875e-05,
      "step": 9336,
      "training_step_time": 0.4097464084625244
    },
    {
      "epoch": 5.6988525390625e-05,
      "model_forward_time": 0.11658453941345215,
      "step": 9337
    },
    {
      "epoch": 5.6988525390625e-05,
      "step": 9337,
      "training_step_time": 0.442455530166626
    },
    {
      "epoch": 5.699462890625e-05,
      "model_forward_time": 0.11641716957092285,
      "step": 9338
    },
    {
      "epoch": 5.699462890625e-05,
      "step": 9338,
      "training_step_time": 0.45743465423583984
    },
    {
      "epoch": 5.7000732421875e-05,
      "model_forward_time": 0.11586928367614746,
      "step": 9339
    },
    {
      "epoch": 5.7000732421875e-05,
      "step": 9339,
      "training_step_time": 0.40492773056030273
    },
    {
      "epoch": 5.70068359375e-05,
      "grad_norm": 0.21001717448234558,
      "learning_rate": 9.697834431234973e-05,
      "loss": 0.0668,
      "step": 9340
    },
    {
      "epoch": 5.70068359375e-05,
      "model_forward_time": 0.11816763877868652,
      "step": 9340
    },
    {
      "epoch": 5.70068359375e-05,
      "step": 9340,
      "training_step_time": 0.3862192630767822
    },
    {
      "epoch": 5.7012939453125e-05,
      "model_forward_time": 0.11736512184143066,
      "step": 9341
    },
    {
      "epoch": 5.7012939453125e-05,
      "step": 9341,
      "training_step_time": 0.3925764560699463
    },
    {
      "epoch": 5.701904296875e-05,
      "model_forward_time": 0.11640119552612305,
      "step": 9342
    },
    {
      "epoch": 5.701904296875e-05,
      "step": 9342,
      "training_step_time": 0.3768901824951172
    },
    {
      "epoch": 5.7025146484375e-05,
      "model_forward_time": 0.11739611625671387,
      "step": 9343
    },
    {
      "epoch": 5.7025146484375e-05,
      "step": 9343,
      "training_step_time": 0.40634584426879883
    },
    {
      "epoch": 5.703125e-05,
      "model_forward_time": 0.11766791343688965,
      "step": 9344
    },
    {
      "epoch": 5.703125e-05,
      "step": 9344,
      "training_step_time": 0.4101221561431885
    },
    {
      "epoch": 5.7037353515625e-05,
      "model_forward_time": 0.1181936264038086,
      "step": 9345
    },
    {
      "epoch": 5.7037353515625e-05,
      "step": 9345,
      "training_step_time": 0.4046006202697754
    },
    {
      "epoch": 5.704345703125e-05,
      "model_forward_time": 0.12016177177429199,
      "step": 9346
    },
    {
      "epoch": 5.704345703125e-05,
      "step": 9346,
      "training_step_time": 0.46325039863586426
    },
    {
      "epoch": 5.7049560546875e-05,
      "model_forward_time": 0.11677384376525879,
      "step": 9347
    },
    {
      "epoch": 5.7049560546875e-05,
      "step": 9347,
      "training_step_time": 0.45043039321899414
    },
    {
      "epoch": 5.70556640625e-05,
      "model_forward_time": 0.11630916595458984,
      "step": 9348
    },
    {
      "epoch": 5.70556640625e-05,
      "step": 9348,
      "training_step_time": 0.9687416553497314
    },
    {
      "epoch": 5.7061767578125e-05,
      "model_forward_time": 0.11585044860839844,
      "step": 9349
    },
    {
      "epoch": 5.7061767578125e-05,
      "step": 9349,
      "training_step_time": 0.3781414031982422
    },
    {
      "epoch": 5.706787109375e-05,
      "grad_norm": 0.20686958730220795,
      "learning_rate": 9.696890232987931e-05,
      "loss": 0.0584,
      "step": 9350
    },
    {
      "epoch": 5.706787109375e-05,
      "model_forward_time": 0.11562728881835938,
      "step": 9350
    },
    {
      "epoch": 5.706787109375e-05,
      "step": 9350,
      "training_step_time": 0.37400388717651367
    },
    {
      "epoch": 5.7073974609375e-05,
      "model_forward_time": 0.11563372611999512,
      "step": 9351
    },
    {
      "epoch": 5.7073974609375e-05,
      "step": 9351,
      "training_step_time": 0.41513752937316895
    },
    {
      "epoch": 5.7080078125e-05,
      "model_forward_time": 0.11832308769226074,
      "step": 9352
    },
    {
      "epoch": 5.7080078125e-05,
      "step": 9352,
      "training_step_time": 0.4155857563018799
    },
    {
      "epoch": 5.7086181640625e-05,
      "model_forward_time": 0.11676740646362305,
      "step": 9353
    },
    {
      "epoch": 5.7086181640625e-05,
      "step": 9353,
      "training_step_time": 0.3838686943054199
    },
    {
      "epoch": 5.709228515625e-05,
      "model_forward_time": 0.11753559112548828,
      "step": 9354
    },
    {
      "epoch": 5.709228515625e-05,
      "step": 9354,
      "training_step_time": 0.5817253589630127
    },
    {
      "epoch": 5.7098388671875e-05,
      "model_forward_time": 0.11622023582458496,
      "step": 9355
    },
    {
      "epoch": 5.7098388671875e-05,
      "step": 9355,
      "training_step_time": 0.3860659599304199
    },
    {
      "epoch": 5.71044921875e-05,
      "model_forward_time": 0.11618661880493164,
      "step": 9356
    },
    {
      "epoch": 5.71044921875e-05,
      "step": 9356,
      "training_step_time": 0.4047684669494629
    },
    {
      "epoch": 5.7110595703125e-05,
      "model_forward_time": 0.11600732803344727,
      "step": 9357
    },
    {
      "epoch": 5.7110595703125e-05,
      "step": 9357,
      "training_step_time": 0.38551855087280273
    },
    {
      "epoch": 5.711669921875e-05,
      "model_forward_time": 0.11650824546813965,
      "step": 9358
    },
    {
      "epoch": 5.711669921875e-05,
      "step": 9358,
      "training_step_time": 0.385303258895874
    },
    {
      "epoch": 5.7122802734375e-05,
      "model_forward_time": 0.11648869514465332,
      "step": 9359
    },
    {
      "epoch": 5.7122802734375e-05,
      "step": 9359,
      "training_step_time": 0.4725668430328369
    },
    {
      "epoch": 5.712890625e-05,
      "grad_norm": 0.2660433351993561,
      "learning_rate": 9.695944607949649e-05,
      "loss": 0.0655,
      "step": 9360
    },
    {
      "epoch": 5.712890625e-05,
      "model_forward_time": 0.11696767807006836,
      "step": 9360
    },
    {
      "epoch": 5.712890625e-05,
      "step": 9360,
      "training_step_time": 1.0492157936096191
    },
    {
      "epoch": 5.7135009765625e-05,
      "model_forward_time": 0.11560988426208496,
      "step": 9361
    },
    {
      "epoch": 5.7135009765625e-05,
      "step": 9361,
      "training_step_time": 0.3785238265991211
    },
    {
      "epoch": 5.714111328125e-05,
      "model_forward_time": 0.11618351936340332,
      "step": 9362
    },
    {
      "epoch": 5.714111328125e-05,
      "step": 9362,
      "training_step_time": 0.38799452781677246
    },
    {
      "epoch": 5.7147216796875e-05,
      "model_forward_time": 0.1159508228302002,
      "step": 9363
    },
    {
      "epoch": 5.7147216796875e-05,
      "step": 9363,
      "training_step_time": 0.395733118057251
    },
    {
      "epoch": 5.71533203125e-05,
      "model_forward_time": 0.11930465698242188,
      "step": 9364
    },
    {
      "epoch": 5.71533203125e-05,
      "step": 9364,
      "training_step_time": 0.4806175231933594
    },
    {
      "epoch": 5.7159423828125e-05,
      "model_forward_time": 0.1181478500366211,
      "step": 9365
    },
    {
      "epoch": 5.7159423828125e-05,
      "step": 9365,
      "training_step_time": 0.5925900936126709
    },
    {
      "epoch": 5.716552734375e-05,
      "model_forward_time": 0.12659621238708496,
      "step": 9366
    },
    {
      "epoch": 5.716552734375e-05,
      "step": 9366,
      "training_step_time": 0.867600679397583
    },
    {
      "epoch": 5.7171630859375e-05,
      "model_forward_time": 0.14911985397338867,
      "step": 9367
    },
    {
      "epoch": 5.7171630859375e-05,
      "step": 9367,
      "training_step_time": 0.7298309803009033
    },
    {
      "epoch": 5.7177734375e-05,
      "model_forward_time": 0.12076473236083984,
      "step": 9368
    },
    {
      "epoch": 5.7177734375e-05,
      "step": 9368,
      "training_step_time": 0.71055006980896
    },
    {
      "epoch": 5.7183837890625e-05,
      "model_forward_time": 0.11957192420959473,
      "step": 9369
    },
    {
      "epoch": 5.7183837890625e-05,
      "step": 9369,
      "training_step_time": 0.7105188369750977
    },
    {
      "epoch": 5.718994140625e-05,
      "grad_norm": 0.1768334060907364,
      "learning_rate": 9.694997556407387e-05,
      "loss": 0.0656,
      "step": 9370
    },
    {
      "epoch": 5.718994140625e-05,
      "model_forward_time": 0.12449026107788086,
      "step": 9370
    },
    {
      "epoch": 5.718994140625e-05,
      "step": 9370,
      "training_step_time": 0.6546838283538818
    },
    {
      "epoch": 5.7196044921875e-05,
      "model_forward_time": 0.12293148040771484,
      "step": 9371
    },
    {
      "epoch": 5.7196044921875e-05,
      "step": 9371,
      "training_step_time": 0.6240277290344238
    },
    {
      "epoch": 5.72021484375e-05,
      "model_forward_time": 0.12048816680908203,
      "step": 9372
    },
    {
      "epoch": 5.72021484375e-05,
      "step": 9372,
      "training_step_time": 0.6253340244293213
    },
    {
      "epoch": 5.7208251953125e-05,
      "model_forward_time": 0.12644433975219727,
      "step": 9373
    },
    {
      "epoch": 5.7208251953125e-05,
      "step": 9373,
      "training_step_time": 0.6959273815155029
    },
    {
      "epoch": 5.721435546875e-05,
      "model_forward_time": 0.11971330642700195,
      "step": 9374
    },
    {
      "epoch": 5.721435546875e-05,
      "step": 9374,
      "training_step_time": 0.7040853500366211
    },
    {
      "epoch": 5.7220458984375e-05,
      "model_forward_time": 0.12467432022094727,
      "step": 9375
    },
    {
      "epoch": 5.7220458984375e-05,
      "step": 9375,
      "training_step_time": 0.6795241832733154
    },
    {
      "epoch": 5.72265625e-05,
      "model_forward_time": 0.12401437759399414,
      "step": 9376
    },
    {
      "epoch": 5.72265625e-05,
      "step": 9376,
      "training_step_time": 0.6522278785705566
    },
    {
      "epoch": 5.7232666015625e-05,
      "model_forward_time": 0.12311792373657227,
      "step": 9377
    },
    {
      "epoch": 5.7232666015625e-05,
      "step": 9377,
      "training_step_time": 0.7210991382598877
    },
    {
      "epoch": 5.723876953125e-05,
      "model_forward_time": 0.1211090087890625,
      "step": 9378
    },
    {
      "epoch": 5.723876953125e-05,
      "step": 9378,
      "training_step_time": 0.5280208587646484
    },
    {
      "epoch": 5.7244873046875e-05,
      "model_forward_time": 0.12128090858459473,
      "step": 9379
    },
    {
      "epoch": 5.7244873046875e-05,
      "step": 9379,
      "training_step_time": 0.7392737865447998
    },
    {
      "epoch": 5.72509765625e-05,
      "grad_norm": 0.1537083089351654,
      "learning_rate": 9.69404907864883e-05,
      "loss": 0.0719,
      "step": 9380
    },
    {
      "epoch": 5.72509765625e-05,
      "model_forward_time": 0.12193131446838379,
      "step": 9380
    },
    {
      "epoch": 5.72509765625e-05,
      "step": 9380,
      "training_step_time": 0.6420683860778809
    },
    {
      "epoch": 5.7257080078125e-05,
      "model_forward_time": 0.1201486587524414,
      "step": 9381
    },
    {
      "epoch": 5.7257080078125e-05,
      "step": 9381,
      "training_step_time": 0.6787774562835693
    },
    {
      "epoch": 5.726318359375e-05,
      "model_forward_time": 0.12320423126220703,
      "step": 9382
    },
    {
      "epoch": 5.726318359375e-05,
      "step": 9382,
      "training_step_time": 0.6799609661102295
    },
    {
      "epoch": 5.7269287109375e-05,
      "model_forward_time": 0.11710596084594727,
      "step": 9383
    },
    {
      "epoch": 5.7269287109375e-05,
      "step": 9383,
      "training_step_time": 0.6767711639404297
    },
    {
      "epoch": 5.7275390625e-05,
      "model_forward_time": 0.11906003952026367,
      "step": 9384
    },
    {
      "epoch": 5.7275390625e-05,
      "step": 9384,
      "training_step_time": 0.6432862281799316
    },
    {
      "epoch": 5.7281494140625e-05,
      "model_forward_time": 0.12209486961364746,
      "step": 9385
    },
    {
      "epoch": 5.7281494140625e-05,
      "step": 9385,
      "training_step_time": 0.6719856262207031
    },
    {
      "epoch": 5.728759765625e-05,
      "model_forward_time": 0.12160181999206543,
      "step": 9386
    },
    {
      "epoch": 5.728759765625e-05,
      "step": 9386,
      "training_step_time": 0.8089916706085205
    },
    {
      "epoch": 5.7293701171875e-05,
      "model_forward_time": 0.1200876235961914,
      "step": 9387
    },
    {
      "epoch": 5.7293701171875e-05,
      "step": 9387,
      "training_step_time": 0.7694389820098877
    },
    {
      "epoch": 5.72998046875e-05,
      "model_forward_time": 0.13207769393920898,
      "step": 9388
    },
    {
      "epoch": 5.72998046875e-05,
      "step": 9388,
      "training_step_time": 0.7214806079864502
    },
    {
      "epoch": 5.7305908203125e-05,
      "model_forward_time": 0.11897134780883789,
      "step": 9389
    },
    {
      "epoch": 5.7305908203125e-05,
      "step": 9389,
      "training_step_time": 0.6757967472076416
    },
    {
      "epoch": 5.731201171875e-05,
      "grad_norm": 0.30815038084983826,
      "learning_rate": 9.693099174962103e-05,
      "loss": 0.0748,
      "step": 9390
    },
    {
      "epoch": 5.731201171875e-05,
      "model_forward_time": 0.1210181713104248,
      "step": 9390
    },
    {
      "epoch": 5.731201171875e-05,
      "step": 9390,
      "training_step_time": 0.6451663970947266
    },
    {
      "epoch": 5.7318115234375e-05,
      "model_forward_time": 0.11758971214294434,
      "step": 9391
    },
    {
      "epoch": 5.7318115234375e-05,
      "step": 9391,
      "training_step_time": 0.6882035732269287
    },
    {
      "epoch": 5.732421875e-05,
      "model_forward_time": 0.12242460250854492,
      "step": 9392
    },
    {
      "epoch": 5.732421875e-05,
      "step": 9392,
      "training_step_time": 0.6176519393920898
    },
    {
      "epoch": 5.7330322265625e-05,
      "model_forward_time": 0.11850547790527344,
      "step": 9393
    },
    {
      "epoch": 5.7330322265625e-05,
      "step": 9393,
      "training_step_time": 0.6914911270141602
    },
    {
      "epoch": 5.733642578125e-05,
      "model_forward_time": 0.15292572975158691,
      "step": 9394
    },
    {
      "epoch": 5.733642578125e-05,
      "step": 9394,
      "training_step_time": 0.6600751876831055
    },
    {
      "epoch": 5.7342529296875e-05,
      "model_forward_time": 0.12561249732971191,
      "step": 9395
    },
    {
      "epoch": 5.7342529296875e-05,
      "step": 9395,
      "training_step_time": 0.6254377365112305
    },
    {
      "epoch": 5.73486328125e-05,
      "model_forward_time": 0.12814640998840332,
      "step": 9396
    },
    {
      "epoch": 5.73486328125e-05,
      "step": 9396,
      "training_step_time": 0.7326631546020508
    },
    {
      "epoch": 5.7354736328125e-05,
      "model_forward_time": 0.1360476016998291,
      "step": 9397
    },
    {
      "epoch": 5.7354736328125e-05,
      "step": 9397,
      "training_step_time": 0.7097718715667725
    },
    {
      "epoch": 5.736083984375e-05,
      "model_forward_time": 0.12179136276245117,
      "step": 9398
    },
    {
      "epoch": 5.736083984375e-05,
      "step": 9398,
      "training_step_time": 0.7307255268096924
    },
    {
      "epoch": 5.7366943359375e-05,
      "model_forward_time": 0.11951804161071777,
      "step": 9399
    },
    {
      "epoch": 5.7366943359375e-05,
      "step": 9399,
      "training_step_time": 0.7628364562988281
    },
    {
      "epoch": 5.7373046875e-05,
      "grad_norm": 0.2847496271133423,
      "learning_rate": 9.692147845635761e-05,
      "loss": 0.0672,
      "step": 9400
    },
    {
      "epoch": 5.7373046875e-05,
      "model_forward_time": 0.11780786514282227,
      "step": 9400
    },
    {
      "epoch": 5.7373046875e-05,
      "step": 9400,
      "training_step_time": 0.6670196056365967
    },
    {
      "epoch": 5.7379150390625e-05,
      "model_forward_time": 0.12202239036560059,
      "step": 9401
    },
    {
      "epoch": 5.7379150390625e-05,
      "step": 9401,
      "training_step_time": 0.680452823638916
    },
    {
      "epoch": 5.738525390625e-05,
      "model_forward_time": 0.1257336139678955,
      "step": 9402
    },
    {
      "epoch": 5.738525390625e-05,
      "step": 9402,
      "training_step_time": 0.7599136829376221
    },
    {
      "epoch": 5.7391357421875e-05,
      "model_forward_time": 0.13783645629882812,
      "step": 9403
    },
    {
      "epoch": 5.7391357421875e-05,
      "step": 9403,
      "training_step_time": 0.5917932987213135
    },
    {
      "epoch": 5.73974609375e-05,
      "model_forward_time": 0.11800909042358398,
      "step": 9404
    },
    {
      "epoch": 5.73974609375e-05,
      "step": 9404,
      "training_step_time": 0.610898494720459
    },
    {
      "epoch": 5.7403564453125e-05,
      "model_forward_time": 0.12357807159423828,
      "step": 9405
    },
    {
      "epoch": 5.7403564453125e-05,
      "step": 9405,
      "training_step_time": 0.7183575630187988
    },
    {
      "epoch": 5.740966796875e-05,
      "model_forward_time": 0.12342238426208496,
      "step": 9406
    },
    {
      "epoch": 5.740966796875e-05,
      "step": 9406,
      "training_step_time": 0.762920618057251
    },
    {
      "epoch": 5.7415771484375e-05,
      "model_forward_time": 0.12146806716918945,
      "step": 9407
    },
    {
      "epoch": 5.7415771484375e-05,
      "step": 9407,
      "training_step_time": 0.6652207374572754
    },
    {
      "epoch": 5.7421875e-05,
      "model_forward_time": 0.12705516815185547,
      "step": 9408
    },
    {
      "epoch": 5.7421875e-05,
      "step": 9408,
      "training_step_time": 0.6680736541748047
    },
    {
      "epoch": 5.7427978515625e-05,
      "model_forward_time": 0.11984467506408691,
      "step": 9409
    },
    {
      "epoch": 5.7427978515625e-05,
      "step": 9409,
      "training_step_time": 0.6661703586578369
    },
    {
      "epoch": 5.743408203125e-05,
      "grad_norm": 0.18912367522716522,
      "learning_rate": 9.691195090958791e-05,
      "loss": 0.0694,
      "step": 9410
    },
    {
      "epoch": 5.743408203125e-05,
      "model_forward_time": 0.11842799186706543,
      "step": 9410
    },
    {
      "epoch": 5.743408203125e-05,
      "step": 9410,
      "training_step_time": 0.8023533821105957
    },
    {
      "epoch": 5.7440185546875e-05,
      "model_forward_time": 0.12005996704101562,
      "step": 9411
    },
    {
      "epoch": 5.7440185546875e-05,
      "step": 9411,
      "training_step_time": 0.6595070362091064
    },
    {
      "epoch": 5.74462890625e-05,
      "model_forward_time": 0.1181800365447998,
      "step": 9412
    },
    {
      "epoch": 5.74462890625e-05,
      "step": 9412,
      "training_step_time": 0.6431405544281006
    },
    {
      "epoch": 5.7452392578125e-05,
      "model_forward_time": 0.11801433563232422,
      "step": 9413
    },
    {
      "epoch": 5.7452392578125e-05,
      "step": 9413,
      "training_step_time": 0.6365773677825928
    },
    {
      "epoch": 5.745849609375e-05,
      "model_forward_time": 0.12399005889892578,
      "step": 9414
    },
    {
      "epoch": 5.745849609375e-05,
      "step": 9414,
      "training_step_time": 0.6439452171325684
    },
    {
      "epoch": 5.7464599609375e-05,
      "model_forward_time": 0.12155437469482422,
      "step": 9415
    },
    {
      "epoch": 5.7464599609375e-05,
      "step": 9415,
      "training_step_time": 0.5582566261291504
    },
    {
      "epoch": 5.7470703125e-05,
      "model_forward_time": 0.12000417709350586,
      "step": 9416
    },
    {
      "epoch": 5.7470703125e-05,
      "step": 9416,
      "training_step_time": 0.7875931262969971
    },
    {
      "epoch": 5.7476806640625e-05,
      "model_forward_time": 0.1214447021484375,
      "step": 9417
    },
    {
      "epoch": 5.7476806640625e-05,
      "step": 9417,
      "training_step_time": 0.6374223232269287
    },
    {
      "epoch": 5.748291015625e-05,
      "model_forward_time": 0.12436223030090332,
      "step": 9418
    },
    {
      "epoch": 5.748291015625e-05,
      "step": 9418,
      "training_step_time": 0.6301140785217285
    },
    {
      "epoch": 5.7489013671875e-05,
      "model_forward_time": 0.14344096183776855,
      "step": 9419
    },
    {
      "epoch": 5.7489013671875e-05,
      "step": 9419,
      "training_step_time": 0.6613790988922119
    },
    {
      "epoch": 5.74951171875e-05,
      "grad_norm": 0.2880002558231354,
      "learning_rate": 9.690240911220618e-05,
      "loss": 0.071,
      "step": 9420
    },
    {
      "epoch": 5.74951171875e-05,
      "model_forward_time": 0.12971043586730957,
      "step": 9420
    },
    {
      "epoch": 5.74951171875e-05,
      "step": 9420,
      "training_step_time": 0.6787221431732178
    },
    {
      "epoch": 5.7501220703125e-05,
      "model_forward_time": 0.12276816368103027,
      "step": 9421
    },
    {
      "epoch": 5.7501220703125e-05,
      "step": 9421,
      "training_step_time": 0.6327991485595703
    },
    {
      "epoch": 5.750732421875e-05,
      "model_forward_time": 0.11812186241149902,
      "step": 9422
    },
    {
      "epoch": 5.750732421875e-05,
      "step": 9422,
      "training_step_time": 0.677945613861084
    },
    {
      "epoch": 5.7513427734375e-05,
      "model_forward_time": 0.12453031539916992,
      "step": 9423
    },
    {
      "epoch": 5.7513427734375e-05,
      "step": 9423,
      "training_step_time": 0.6235489845275879
    },
    {
      "epoch": 5.751953125e-05,
      "model_forward_time": 0.11930394172668457,
      "step": 9424
    },
    {
      "epoch": 5.751953125e-05,
      "step": 9424,
      "training_step_time": 0.641993522644043
    },
    {
      "epoch": 5.7525634765625e-05,
      "model_forward_time": 0.1206204891204834,
      "step": 9425
    },
    {
      "epoch": 5.7525634765625e-05,
      "step": 9425,
      "training_step_time": 0.7237155437469482
    },
    {
      "epoch": 5.753173828125e-05,
      "model_forward_time": 0.12244963645935059,
      "step": 9426
    },
    {
      "epoch": 5.753173828125e-05,
      "step": 9426,
      "training_step_time": 0.6525983810424805
    },
    {
      "epoch": 5.7537841796875e-05,
      "model_forward_time": 0.12472391128540039,
      "step": 9427
    },
    {
      "epoch": 5.7537841796875e-05,
      "step": 9427,
      "training_step_time": 0.6252651214599609
    },
    {
      "epoch": 5.75439453125e-05,
      "model_forward_time": 0.12731337547302246,
      "step": 9428
    },
    {
      "epoch": 5.75439453125e-05,
      "step": 9428,
      "training_step_time": 0.6327764987945557
    },
    {
      "epoch": 5.7550048828125e-05,
      "model_forward_time": 0.1235198974609375,
      "step": 9429
    },
    {
      "epoch": 5.7550048828125e-05,
      "step": 9429,
      "training_step_time": 0.655360221862793
    },
    {
      "epoch": 5.755615234375e-05,
      "grad_norm": 0.2191612869501114,
      "learning_rate": 9.689285306711094e-05,
      "loss": 0.0694,
      "step": 9430
    },
    {
      "epoch": 5.755615234375e-05,
      "model_forward_time": 0.12368559837341309,
      "step": 9430
    },
    {
      "epoch": 5.755615234375e-05,
      "step": 9430,
      "training_step_time": 0.6378939151763916
    },
    {
      "epoch": 5.7562255859375e-05,
      "model_forward_time": 0.12227392196655273,
      "step": 9431
    },
    {
      "epoch": 5.7562255859375e-05,
      "step": 9431,
      "training_step_time": 0.5162143707275391
    },
    {
      "epoch": 5.7568359375e-05,
      "model_forward_time": 0.13411545753479004,
      "step": 9432
    },
    {
      "epoch": 5.7568359375e-05,
      "step": 9432,
      "training_step_time": 0.4658536911010742
    },
    {
      "epoch": 5.7574462890625e-05,
      "model_forward_time": 0.11875462532043457,
      "step": 9433
    },
    {
      "epoch": 5.7574462890625e-05,
      "step": 9433,
      "training_step_time": 0.466153621673584
    },
    {
      "epoch": 5.758056640625e-05,
      "model_forward_time": 0.11843228340148926,
      "step": 9434
    },
    {
      "epoch": 5.758056640625e-05,
      "step": 9434,
      "training_step_time": 0.45662665367126465
    },
    {
      "epoch": 5.7586669921875e-05,
      "model_forward_time": 0.1187586784362793,
      "step": 9435
    },
    {
      "epoch": 5.7586669921875e-05,
      "step": 9435,
      "training_step_time": 0.4929208755493164
    },
    {
      "epoch": 5.75927734375e-05,
      "model_forward_time": 0.1181175708770752,
      "step": 9436
    },
    {
      "epoch": 5.75927734375e-05,
      "step": 9436,
      "training_step_time": 0.47150278091430664
    },
    {
      "epoch": 5.7598876953125e-05,
      "model_forward_time": 0.11723446846008301,
      "step": 9437
    },
    {
      "epoch": 5.7598876953125e-05,
      "step": 9437,
      "training_step_time": 0.4730956554412842
    },
    {
      "epoch": 5.760498046875e-05,
      "model_forward_time": 0.11790752410888672,
      "step": 9438
    },
    {
      "epoch": 5.760498046875e-05,
      "step": 9438,
      "training_step_time": 0.43218135833740234
    },
    {
      "epoch": 5.7611083984375e-05,
      "model_forward_time": 0.11721348762512207,
      "step": 9439
    },
    {
      "epoch": 5.7611083984375e-05,
      "step": 9439,
      "training_step_time": 0.4312410354614258
    },
    {
      "epoch": 5.76171875e-05,
      "grad_norm": 0.2803010046482086,
      "learning_rate": 9.688328277720507e-05,
      "loss": 0.0747,
      "step": 9440
    },
    {
      "epoch": 5.76171875e-05,
      "model_forward_time": 0.11781525611877441,
      "step": 9440
    },
    {
      "epoch": 5.76171875e-05,
      "step": 9440,
      "training_step_time": 0.42047691345214844
    },
    {
      "epoch": 5.7623291015625e-05,
      "model_forward_time": 0.11765551567077637,
      "step": 9441
    },
    {
      "epoch": 5.7623291015625e-05,
      "step": 9441,
      "training_step_time": 0.3861565589904785
    },
    {
      "epoch": 5.762939453125e-05,
      "model_forward_time": 0.11700582504272461,
      "step": 9442
    },
    {
      "epoch": 5.762939453125e-05,
      "step": 9442,
      "training_step_time": 0.4199502468109131
    },
    {
      "epoch": 5.7635498046875e-05,
      "model_forward_time": 0.11671280860900879,
      "step": 9443
    },
    {
      "epoch": 5.7635498046875e-05,
      "step": 9443,
      "training_step_time": 0.4292759895324707
    },
    {
      "epoch": 5.76416015625e-05,
      "model_forward_time": 0.11775970458984375,
      "step": 9444
    },
    {
      "epoch": 5.76416015625e-05,
      "step": 9444,
      "training_step_time": 0.47911715507507324
    },
    {
      "epoch": 5.7647705078125e-05,
      "model_forward_time": 0.11624693870544434,
      "step": 9445
    },
    {
      "epoch": 5.7647705078125e-05,
      "step": 9445,
      "training_step_time": 0.38814330101013184
    },
    {
      "epoch": 5.765380859375e-05,
      "model_forward_time": 0.11796426773071289,
      "step": 9446
    },
    {
      "epoch": 5.765380859375e-05,
      "step": 9446,
      "training_step_time": 0.3896613121032715
    },
    {
      "epoch": 5.7659912109375e-05,
      "model_forward_time": 0.11618924140930176,
      "step": 9447
    },
    {
      "epoch": 5.7659912109375e-05,
      "step": 9447,
      "training_step_time": 0.3796682357788086
    },
    {
      "epoch": 5.7666015625e-05,
      "model_forward_time": 0.1164555549621582,
      "step": 9448
    },
    {
      "epoch": 5.7666015625e-05,
      "step": 9448,
      "training_step_time": 0.3981647491455078
    },
    {
      "epoch": 5.7672119140625e-05,
      "model_forward_time": 0.11690425872802734,
      "step": 9449
    },
    {
      "epoch": 5.7672119140625e-05,
      "step": 9449,
      "training_step_time": 0.38730931282043457
    },
    {
      "epoch": 5.767822265625e-05,
      "grad_norm": 0.26780134439468384,
      "learning_rate": 9.687369824539577e-05,
      "loss": 0.0677,
      "step": 9450
    },
    {
      "epoch": 5.767822265625e-05,
      "model_forward_time": 0.11697864532470703,
      "step": 9450
    },
    {
      "epoch": 5.767822265625e-05,
      "step": 9450,
      "training_step_time": 0.44214630126953125
    },
    {
      "epoch": 5.7684326171875e-05,
      "model_forward_time": 0.11728930473327637,
      "step": 9451
    },
    {
      "epoch": 5.7684326171875e-05,
      "step": 9451,
      "training_step_time": 0.47316431999206543
    },
    {
      "epoch": 5.76904296875e-05,
      "model_forward_time": 0.11707568168640137,
      "step": 9452
    },
    {
      "epoch": 5.76904296875e-05,
      "step": 9452,
      "training_step_time": 0.3917689323425293
    },
    {
      "epoch": 5.7696533203125e-05,
      "model_forward_time": 0.11795401573181152,
      "step": 9453
    },
    {
      "epoch": 5.7696533203125e-05,
      "step": 9453,
      "training_step_time": 0.44582080841064453
    },
    {
      "epoch": 5.770263671875e-05,
      "model_forward_time": 0.11687254905700684,
      "step": 9454
    },
    {
      "epoch": 5.770263671875e-05,
      "step": 9454,
      "training_step_time": 0.4589402675628662
    },
    {
      "epoch": 5.7708740234375e-05,
      "model_forward_time": 0.11688566207885742,
      "step": 9455
    },
    {
      "epoch": 5.7708740234375e-05,
      "step": 9455,
      "training_step_time": 0.40561771392822266
    },
    {
      "epoch": 5.771484375e-05,
      "model_forward_time": 0.11695384979248047,
      "step": 9456
    },
    {
      "epoch": 5.771484375e-05,
      "step": 9456,
      "training_step_time": 0.39952754974365234
    },
    {
      "epoch": 5.7720947265625e-05,
      "model_forward_time": 0.11645936965942383,
      "step": 9457
    },
    {
      "epoch": 5.7720947265625e-05,
      "step": 9457,
      "training_step_time": 0.38648176193237305
    },
    {
      "epoch": 5.772705078125e-05,
      "model_forward_time": 0.11614489555358887,
      "step": 9458
    },
    {
      "epoch": 5.772705078125e-05,
      "step": 9458,
      "training_step_time": 0.4328780174255371
    },
    {
      "epoch": 5.7733154296875e-05,
      "model_forward_time": 0.11652231216430664,
      "step": 9459
    },
    {
      "epoch": 5.7733154296875e-05,
      "step": 9459,
      "training_step_time": 0.39013147354125977
    },
    {
      "epoch": 5.77392578125e-05,
      "grad_norm": 0.3039533495903015,
      "learning_rate": 9.686409947459458e-05,
      "loss": 0.0741,
      "step": 9460
    },
    {
      "epoch": 5.77392578125e-05,
      "model_forward_time": 0.11598372459411621,
      "step": 9460
    },
    {
      "epoch": 5.77392578125e-05,
      "step": 9460,
      "training_step_time": 0.38805365562438965
    },
    {
      "epoch": 5.7745361328125e-05,
      "model_forward_time": 0.11663103103637695,
      "step": 9461
    },
    {
      "epoch": 5.7745361328125e-05,
      "step": 9461,
      "training_step_time": 0.3811643123626709
    },
    {
      "epoch": 5.775146484375e-05,
      "model_forward_time": 0.11619114875793457,
      "step": 9462
    },
    {
      "epoch": 5.775146484375e-05,
      "step": 9462,
      "training_step_time": 0.3744809627532959
    },
    {
      "epoch": 5.7757568359375e-05,
      "model_forward_time": 0.11649370193481445,
      "step": 9463
    },
    {
      "epoch": 5.7757568359375e-05,
      "step": 9463,
      "training_step_time": 0.41745901107788086
    },
    {
      "epoch": 5.7763671875e-05,
      "model_forward_time": 0.11874175071716309,
      "step": 9464
    },
    {
      "epoch": 5.7763671875e-05,
      "step": 9464,
      "training_step_time": 0.5276858806610107
    },
    {
      "epoch": 5.7769775390625e-05,
      "model_forward_time": 0.11609196662902832,
      "step": 9465
    },
    {
      "epoch": 5.7769775390625e-05,
      "step": 9465,
      "training_step_time": 0.37805843353271484
    },
    {
      "epoch": 5.777587890625e-05,
      "model_forward_time": 0.11652135848999023,
      "step": 9466
    },
    {
      "epoch": 5.777587890625e-05,
      "step": 9466,
      "training_step_time": 0.4298877716064453
    },
    {
      "epoch": 5.7781982421875e-05,
      "model_forward_time": 0.1171419620513916,
      "step": 9467
    },
    {
      "epoch": 5.7781982421875e-05,
      "step": 9467,
      "training_step_time": 0.4444882869720459
    },
    {
      "epoch": 5.77880859375e-05,
      "model_forward_time": 0.11682772636413574,
      "step": 9468
    },
    {
      "epoch": 5.77880859375e-05,
      "step": 9468,
      "training_step_time": 0.46605396270751953
    },
    {
      "epoch": 5.7794189453125e-05,
      "model_forward_time": 0.11655092239379883,
      "step": 9469
    },
    {
      "epoch": 5.7794189453125e-05,
      "step": 9469,
      "training_step_time": 0.3961038589477539
    },
    {
      "epoch": 5.780029296875e-05,
      "grad_norm": 0.22055280208587646,
      "learning_rate": 9.685448646771734e-05,
      "loss": 0.07,
      "step": 9470
    },
    {
      "epoch": 5.780029296875e-05,
      "model_forward_time": 0.11766886711120605,
      "step": 9470
    },
    {
      "epoch": 5.780029296875e-05,
      "step": 9470,
      "training_step_time": 0.3874833583831787
    },
    {
      "epoch": 5.7806396484375e-05,
      "model_forward_time": 0.11807513236999512,
      "step": 9471
    },
    {
      "epoch": 5.7806396484375e-05,
      "step": 9471,
      "training_step_time": 0.4350142478942871
    },
    {
      "epoch": 5.78125e-05,
      "model_forward_time": 0.11676430702209473,
      "step": 9472
    },
    {
      "epoch": 5.78125e-05,
      "step": 9472,
      "training_step_time": 0.3935821056365967
    },
    {
      "epoch": 5.7818603515625e-05,
      "model_forward_time": 0.11695075035095215,
      "step": 9473
    },
    {
      "epoch": 5.7818603515625e-05,
      "step": 9473,
      "training_step_time": 0.410388708114624
    },
    {
      "epoch": 5.782470703125e-05,
      "model_forward_time": 0.11671757698059082,
      "step": 9474
    },
    {
      "epoch": 5.782470703125e-05,
      "step": 9474,
      "training_step_time": 0.378002405166626
    },
    {
      "epoch": 5.7830810546875e-05,
      "model_forward_time": 0.1181032657623291,
      "step": 9475
    },
    {
      "epoch": 5.7830810546875e-05,
      "step": 9475,
      "training_step_time": 0.8713293075561523
    },
    {
      "epoch": 5.78369140625e-05,
      "model_forward_time": 0.11677241325378418,
      "step": 9476
    },
    {
      "epoch": 5.78369140625e-05,
      "step": 9476,
      "training_step_time": 0.3863108158111572
    },
    {
      "epoch": 5.7843017578125e-05,
      "model_forward_time": 0.1165308952331543,
      "step": 9477
    },
    {
      "epoch": 5.7843017578125e-05,
      "step": 9477,
      "training_step_time": 0.3740994930267334
    },
    {
      "epoch": 5.784912109375e-05,
      "model_forward_time": 0.1164083480834961,
      "step": 9478
    },
    {
      "epoch": 5.784912109375e-05,
      "step": 9478,
      "training_step_time": 0.3903357982635498
    },
    {
      "epoch": 5.7855224609375e-05,
      "model_forward_time": 0.11645960807800293,
      "step": 9479
    },
    {
      "epoch": 5.7855224609375e-05,
      "step": 9479,
      "training_step_time": 0.43315792083740234
    },
    {
      "epoch": 5.7861328125e-05,
      "grad_norm": 0.1914362609386444,
      "learning_rate": 9.684485922768422e-05,
      "loss": 0.0687,
      "step": 9480
    },
    {
      "epoch": 5.7861328125e-05,
      "model_forward_time": 0.11578750610351562,
      "step": 9480
    },
    {
      "epoch": 5.7861328125e-05,
      "step": 9480,
      "training_step_time": 0.3665149211883545
    },
    {
      "epoch": 5.7867431640625e-05,
      "model_forward_time": 0.11713051795959473,
      "step": 9481
    },
    {
      "epoch": 5.7867431640625e-05,
      "step": 9481,
      "training_step_time": 0.874824047088623
    },
    {
      "epoch": 5.787353515625e-05,
      "model_forward_time": 0.1170954704284668,
      "step": 9482
    },
    {
      "epoch": 5.787353515625e-05,
      "step": 9482,
      "training_step_time": 0.39029884338378906
    },
    {
      "epoch": 5.7879638671875e-05,
      "model_forward_time": 0.11616086959838867,
      "step": 9483
    },
    {
      "epoch": 5.7879638671875e-05,
      "step": 9483,
      "training_step_time": 0.38860487937927246
    },
    {
      "epoch": 5.78857421875e-05,
      "model_forward_time": 0.11601042747497559,
      "step": 9484
    },
    {
      "epoch": 5.78857421875e-05,
      "step": 9484,
      "training_step_time": 0.4139859676361084
    },
    {
      "epoch": 5.7891845703125e-05,
      "model_forward_time": 0.11660528182983398,
      "step": 9485
    },
    {
      "epoch": 5.7891845703125e-05,
      "step": 9485,
      "training_step_time": 0.37779903411865234
    },
    {
      "epoch": 5.789794921875e-05,
      "model_forward_time": 0.11639881134033203,
      "step": 9486
    },
    {
      "epoch": 5.789794921875e-05,
      "step": 9486,
      "training_step_time": 0.48377299308776855
    },
    {
      "epoch": 5.7904052734375e-05,
      "model_forward_time": 0.11725044250488281,
      "step": 9487
    },
    {
      "epoch": 5.7904052734375e-05,
      "step": 9487,
      "training_step_time": 0.7036190032958984
    },
    {
      "epoch": 5.791015625e-05,
      "model_forward_time": 0.1157691478729248,
      "step": 9488
    },
    {
      "epoch": 5.791015625e-05,
      "step": 9488,
      "training_step_time": 0.3826334476470947
    },
    {
      "epoch": 5.7916259765625e-05,
      "model_forward_time": 0.11582183837890625,
      "step": 9489
    },
    {
      "epoch": 5.7916259765625e-05,
      "step": 9489,
      "training_step_time": 0.379410982131958
    },
    {
      "epoch": 5.792236328125e-05,
      "grad_norm": 0.16904878616333008,
      "learning_rate": 9.683521775741977e-05,
      "loss": 0.076,
      "step": 9490
    },
    {
      "epoch": 5.792236328125e-05,
      "model_forward_time": 0.11838221549987793,
      "step": 9490
    },
    {
      "epoch": 5.792236328125e-05,
      "step": 9490,
      "training_step_time": 0.3823251724243164
    },
    {
      "epoch": 5.7928466796875e-05,
      "model_forward_time": 0.11722493171691895,
      "step": 9491
    },
    {
      "epoch": 5.7928466796875e-05,
      "step": 9491,
      "training_step_time": 0.3993949890136719
    },
    {
      "epoch": 5.79345703125e-05,
      "model_forward_time": 0.11725997924804688,
      "step": 9492
    },
    {
      "epoch": 5.79345703125e-05,
      "step": 9492,
      "training_step_time": 0.4142036437988281
    },
    {
      "epoch": 5.7940673828125e-05,
      "model_forward_time": 0.1166839599609375,
      "step": 9493
    },
    {
      "epoch": 5.7940673828125e-05,
      "step": 9493,
      "training_step_time": 1.1435191631317139
    },
    {
      "epoch": 5.794677734375e-05,
      "model_forward_time": 0.11851119995117188,
      "step": 9494
    },
    {
      "epoch": 5.794677734375e-05,
      "step": 9494,
      "training_step_time": 0.3979768753051758
    },
    {
      "epoch": 5.7952880859375e-05,
      "model_forward_time": 0.1161956787109375,
      "step": 9495
    },
    {
      "epoch": 5.7952880859375e-05,
      "step": 9495,
      "training_step_time": 0.37911295890808105
    },
    {
      "epoch": 5.7958984375e-05,
      "model_forward_time": 0.11615657806396484,
      "step": 9496
    },
    {
      "epoch": 5.7958984375e-05,
      "step": 9496,
      "training_step_time": 0.37979960441589355
    },
    {
      "epoch": 5.7965087890625e-05,
      "model_forward_time": 0.11525726318359375,
      "step": 9497
    },
    {
      "epoch": 5.7965087890625e-05,
      "step": 9497,
      "training_step_time": 0.38153696060180664
    },
    {
      "epoch": 5.797119140625e-05,
      "model_forward_time": 0.11847281455993652,
      "step": 9498
    },
    {
      "epoch": 5.797119140625e-05,
      "step": 9498,
      "training_step_time": 0.39170384407043457
    },
    {
      "epoch": 5.7977294921875e-05,
      "model_forward_time": 0.11696982383728027,
      "step": 9499
    },
    {
      "epoch": 5.7977294921875e-05,
      "step": 9499,
      "training_step_time": 0.7373442649841309
    },
    {
      "epoch": 5.79833984375e-05,
      "grad_norm": 0.2697041630744934,
      "learning_rate": 9.682556205985274e-05,
      "loss": 0.0732,
      "step": 9500
    },
    {
      "epoch": 5.79833984375e-05,
      "model_forward_time": 0.1161036491394043,
      "step": 9500
    },
    {
      "epoch": 5.79833984375e-05,
      "step": 9500,
      "training_step_time": 0.39294958114624023
    },
    {
      "epoch": 5.7989501953125e-05,
      "model_forward_time": 0.11611747741699219,
      "step": 9501
    },
    {
      "epoch": 5.7989501953125e-05,
      "step": 9501,
      "training_step_time": 0.39237308502197266
    },
    {
      "epoch": 5.799560546875e-05,
      "model_forward_time": 0.11878728866577148,
      "step": 9502
    },
    {
      "epoch": 5.799560546875e-05,
      "step": 9502,
      "training_step_time": 0.3834877014160156
    },
    {
      "epoch": 5.8001708984375e-05,
      "model_forward_time": 0.1168513298034668,
      "step": 9503
    },
    {
      "epoch": 5.8001708984375e-05,
      "step": 9503,
      "training_step_time": 0.447340726852417
    },
    {
      "epoch": 5.80078125e-05,
      "model_forward_time": 0.11612796783447266,
      "step": 9504
    },
    {
      "epoch": 5.80078125e-05,
      "step": 9504,
      "training_step_time": 0.46581578254699707
    },
    {
      "epoch": 5.8013916015625e-05,
      "model_forward_time": 0.1173713207244873,
      "step": 9505
    },
    {
      "epoch": 5.8013916015625e-05,
      "step": 9505,
      "training_step_time": 0.49932122230529785
    },
    {
      "epoch": 5.802001953125e-05,
      "model_forward_time": 0.11564040184020996,
      "step": 9506
    },
    {
      "epoch": 5.802001953125e-05,
      "step": 9506,
      "training_step_time": 0.4080085754394531
    },
    {
      "epoch": 5.8026123046875e-05,
      "model_forward_time": 0.11705303192138672,
      "step": 9507
    },
    {
      "epoch": 5.8026123046875e-05,
      "step": 9507,
      "training_step_time": 0.4550437927246094
    },
    {
      "epoch": 5.80322265625e-05,
      "model_forward_time": 0.11650657653808594,
      "step": 9508
    },
    {
      "epoch": 5.80322265625e-05,
      "step": 9508,
      "training_step_time": 0.48346972465515137
    },
    {
      "epoch": 5.8038330078125e-05,
      "model_forward_time": 0.11632823944091797,
      "step": 9509
    },
    {
      "epoch": 5.8038330078125e-05,
      "step": 9509,
      "training_step_time": 0.3897392749786377
    },
    {
      "epoch": 5.804443359375e-05,
      "grad_norm": 0.2923339307308197,
      "learning_rate": 9.681589213791633e-05,
      "loss": 0.0659,
      "step": 9510
    },
    {
      "epoch": 5.804443359375e-05,
      "model_forward_time": 0.11851835250854492,
      "step": 9510
    },
    {
      "epoch": 5.804443359375e-05,
      "step": 9510,
      "training_step_time": 0.4239070415496826
    },
    {
      "epoch": 5.8050537109375e-05,
      "model_forward_time": 0.11677789688110352,
      "step": 9511
    },
    {
      "epoch": 5.8050537109375e-05,
      "step": 9511,
      "training_step_time": 0.9121134281158447
    },
    {
      "epoch": 5.8056640625e-05,
      "model_forward_time": 0.11578965187072754,
      "step": 9512
    },
    {
      "epoch": 5.8056640625e-05,
      "step": 9512,
      "training_step_time": 0.37984633445739746
    },
    {
      "epoch": 5.8062744140625e-05,
      "model_forward_time": 0.1157224178314209,
      "step": 9513
    },
    {
      "epoch": 5.8062744140625e-05,
      "step": 9513,
      "training_step_time": 0.37614011764526367
    },
    {
      "epoch": 5.806884765625e-05,
      "model_forward_time": 0.1157228946685791,
      "step": 9514
    },
    {
      "epoch": 5.806884765625e-05,
      "step": 9514,
      "training_step_time": 0.38257837295532227
    },
    {
      "epoch": 5.8074951171875e-05,
      "model_forward_time": 0.11699604988098145,
      "step": 9515
    },
    {
      "epoch": 5.8074951171875e-05,
      "step": 9515,
      "training_step_time": 0.4536314010620117
    },
    {
      "epoch": 5.80810546875e-05,
      "model_forward_time": 0.11700701713562012,
      "step": 9516
    },
    {
      "epoch": 5.80810546875e-05,
      "step": 9516,
      "training_step_time": 0.4200737476348877
    },
    {
      "epoch": 5.8087158203125e-05,
      "model_forward_time": 0.11674880981445312,
      "step": 9517
    },
    {
      "epoch": 5.8087158203125e-05,
      "step": 9517,
      "training_step_time": 0.7622909545898438
    },
    {
      "epoch": 5.809326171875e-05,
      "model_forward_time": 0.11607694625854492,
      "step": 9518
    },
    {
      "epoch": 5.809326171875e-05,
      "step": 9518,
      "training_step_time": 0.40996861457824707
    },
    {
      "epoch": 5.8099365234375e-05,
      "model_forward_time": 0.11675572395324707,
      "step": 9519
    },
    {
      "epoch": 5.8099365234375e-05,
      "step": 9519,
      "training_step_time": 0.3887486457824707
    },
    {
      "epoch": 5.810546875e-05,
      "grad_norm": 0.23632891476154327,
      "learning_rate": 9.6806207994548e-05,
      "loss": 0.0747,
      "step": 9520
    },
    {
      "epoch": 5.810546875e-05,
      "model_forward_time": 0.11649441719055176,
      "step": 9520
    },
    {
      "epoch": 5.810546875e-05,
      "step": 9520,
      "training_step_time": 0.4258885383605957
    },
    {
      "epoch": 5.8111572265625e-05,
      "model_forward_time": 0.11635208129882812,
      "step": 9521
    },
    {
      "epoch": 5.8111572265625e-05,
      "step": 9521,
      "training_step_time": 0.47208666801452637
    },
    {
      "epoch": 5.811767578125e-05,
      "model_forward_time": 0.11711859703063965,
      "step": 9522
    },
    {
      "epoch": 5.811767578125e-05,
      "step": 9522,
      "training_step_time": 0.436598539352417
    },
    {
      "epoch": 5.8123779296875e-05,
      "model_forward_time": 0.11738991737365723,
      "step": 9523
    },
    {
      "epoch": 5.8123779296875e-05,
      "step": 9523,
      "training_step_time": 0.7464852333068848
    },
    {
      "epoch": 5.81298828125e-05,
      "model_forward_time": 0.11539959907531738,
      "step": 9524
    },
    {
      "epoch": 5.81298828125e-05,
      "step": 9524,
      "training_step_time": 0.3833889961242676
    },
    {
      "epoch": 5.8135986328125e-05,
      "model_forward_time": 0.11659049987792969,
      "step": 9525
    },
    {
      "epoch": 5.8135986328125e-05,
      "step": 9525,
      "training_step_time": 0.37415504455566406
    },
    {
      "epoch": 5.814208984375e-05,
      "model_forward_time": 0.11587882041931152,
      "step": 9526
    },
    {
      "epoch": 5.814208984375e-05,
      "step": 9526,
      "training_step_time": 0.3713195323944092
    },
    {
      "epoch": 5.8148193359375e-05,
      "model_forward_time": 0.11644792556762695,
      "step": 9527
    },
    {
      "epoch": 5.8148193359375e-05,
      "step": 9527,
      "training_step_time": 0.3780806064605713
    },
    {
      "epoch": 5.8154296875e-05,
      "model_forward_time": 0.11683535575866699,
      "step": 9528
    },
    {
      "epoch": 5.8154296875e-05,
      "step": 9528,
      "training_step_time": 0.41649532318115234
    },
    {
      "epoch": 5.8160400390625e-05,
      "model_forward_time": 0.11637187004089355,
      "step": 9529
    },
    {
      "epoch": 5.8160400390625e-05,
      "step": 9529,
      "training_step_time": 0.9857354164123535
    },
    {
      "epoch": 5.816650390625e-05,
      "grad_norm": 0.22883743047714233,
      "learning_rate": 9.679650963268951e-05,
      "loss": 0.0727,
      "step": 9530
    },
    {
      "epoch": 5.816650390625e-05,
      "model_forward_time": 0.1149282455444336,
      "step": 9530
    },
    {
      "epoch": 5.816650390625e-05,
      "step": 9530,
      "training_step_time": 0.3946201801300049
    },
    {
      "epoch": 5.8172607421875e-05,
      "model_forward_time": 0.11581611633300781,
      "step": 9531
    },
    {
      "epoch": 5.8172607421875e-05,
      "step": 9531,
      "training_step_time": 0.36579060554504395
    },
    {
      "epoch": 5.81787109375e-05,
      "model_forward_time": 0.1161806583404541,
      "step": 9532
    },
    {
      "epoch": 5.81787109375e-05,
      "step": 9532,
      "training_step_time": 0.38465094566345215
    },
    {
      "epoch": 5.8184814453125e-05,
      "model_forward_time": 0.1154642105102539,
      "step": 9533
    },
    {
      "epoch": 5.8184814453125e-05,
      "step": 9533,
      "training_step_time": 0.44225406646728516
    },
    {
      "epoch": 5.819091796875e-05,
      "model_forward_time": 0.11646056175231934,
      "step": 9534
    },
    {
      "epoch": 5.819091796875e-05,
      "step": 9534,
      "training_step_time": 0.3831338882446289
    },
    {
      "epoch": 5.8197021484375e-05,
      "model_forward_time": 0.11590266227722168,
      "step": 9535
    },
    {
      "epoch": 5.8197021484375e-05,
      "step": 9535,
      "training_step_time": 1.1080021858215332
    },
    {
      "epoch": 5.8203125e-05,
      "model_forward_time": 0.11513733863830566,
      "step": 9536
    },
    {
      "epoch": 5.8203125e-05,
      "step": 9536,
      "training_step_time": 0.3744213581085205
    },
    {
      "epoch": 5.8209228515625e-05,
      "model_forward_time": 0.1152188777923584,
      "step": 9537
    },
    {
      "epoch": 5.8209228515625e-05,
      "step": 9537,
      "training_step_time": 0.3915212154388428
    },
    {
      "epoch": 5.821533203125e-05,
      "model_forward_time": 0.11520910263061523,
      "step": 9538
    },
    {
      "epoch": 5.821533203125e-05,
      "step": 9538,
      "training_step_time": 0.38069629669189453
    },
    {
      "epoch": 5.8221435546875e-05,
      "model_forward_time": 0.11506319046020508,
      "step": 9539
    },
    {
      "epoch": 5.8221435546875e-05,
      "step": 9539,
      "training_step_time": 0.4472053050994873
    },
    {
      "epoch": 5.82275390625e-05,
      "grad_norm": 0.23921914398670197,
      "learning_rate": 9.6786797055287e-05,
      "loss": 0.0714,
      "step": 9540
    },
    {
      "epoch": 5.82275390625e-05,
      "model_forward_time": 0.11561942100524902,
      "step": 9540
    },
    {
      "epoch": 5.82275390625e-05,
      "step": 9540,
      "training_step_time": 0.4337763786315918
    },
    {
      "epoch": 5.8233642578125e-05,
      "model_forward_time": 0.1165931224822998,
      "step": 9541
    },
    {
      "epoch": 5.8233642578125e-05,
      "step": 9541,
      "training_step_time": 0.6809852123260498
    },
    {
      "epoch": 5.823974609375e-05,
      "model_forward_time": 0.11541414260864258,
      "step": 9542
    },
    {
      "epoch": 5.823974609375e-05,
      "step": 9542,
      "training_step_time": 0.3936896324157715
    },
    {
      "epoch": 5.8245849609375e-05,
      "model_forward_time": 0.11678934097290039,
      "step": 9543
    },
    {
      "epoch": 5.8245849609375e-05,
      "step": 9543,
      "training_step_time": 0.3925750255584717
    },
    {
      "epoch": 5.8251953125e-05,
      "model_forward_time": 0.11674213409423828,
      "step": 9544
    },
    {
      "epoch": 5.8251953125e-05,
      "step": 9544,
      "training_step_time": 0.3693535327911377
    },
    {
      "epoch": 5.8258056640625e-05,
      "model_forward_time": 0.11605453491210938,
      "step": 9545
    },
    {
      "epoch": 5.8258056640625e-05,
      "step": 9545,
      "training_step_time": 0.4220314025878906
    },
    {
      "epoch": 5.826416015625e-05,
      "model_forward_time": 0.1163022518157959,
      "step": 9546
    },
    {
      "epoch": 5.826416015625e-05,
      "step": 9546,
      "training_step_time": 0.4514920711517334
    },
    {
      "epoch": 5.8270263671875e-05,
      "model_forward_time": 0.1170191764831543,
      "step": 9547
    },
    {
      "epoch": 5.8270263671875e-05,
      "step": 9547,
      "training_step_time": 0.8492803573608398
    },
    {
      "epoch": 5.82763671875e-05,
      "model_forward_time": 0.11595964431762695,
      "step": 9548
    },
    {
      "epoch": 5.82763671875e-05,
      "step": 9548,
      "training_step_time": 0.467451810836792
    },
    {
      "epoch": 5.8282470703125e-05,
      "model_forward_time": 0.1157844066619873,
      "step": 9549
    },
    {
      "epoch": 5.8282470703125e-05,
      "step": 9549,
      "training_step_time": 0.38329505920410156
    },
    {
      "epoch": 5.828857421875e-05,
      "grad_norm": 0.1840805858373642,
      "learning_rate": 9.677707026529086e-05,
      "loss": 0.0712,
      "step": 9550
    },
    {
      "epoch": 5.828857421875e-05,
      "model_forward_time": 0.11544036865234375,
      "step": 9550
    },
    {
      "epoch": 5.828857421875e-05,
      "step": 9550,
      "training_step_time": 0.3803737163543701
    },
    {
      "epoch": 5.8294677734375e-05,
      "model_forward_time": 0.11699557304382324,
      "step": 9551
    },
    {
      "epoch": 5.8294677734375e-05,
      "step": 9551,
      "training_step_time": 0.37868571281433105
    },
    {
      "epoch": 5.830078125e-05,
      "model_forward_time": 0.11675381660461426,
      "step": 9552
    },
    {
      "epoch": 5.830078125e-05,
      "step": 9552,
      "training_step_time": 0.38691234588623047
    },
    {
      "epoch": 5.8306884765625e-05,
      "model_forward_time": 0.1164848804473877,
      "step": 9553
    },
    {
      "epoch": 5.8306884765625e-05,
      "step": 9553,
      "training_step_time": 0.5684027671813965
    },
    {
      "epoch": 5.831298828125e-05,
      "model_forward_time": 0.11659955978393555,
      "step": 9554
    },
    {
      "epoch": 5.831298828125e-05,
      "step": 9554,
      "training_step_time": 0.38916468620300293
    },
    {
      "epoch": 5.8319091796875e-05,
      "model_forward_time": 0.11963486671447754,
      "step": 9555
    },
    {
      "epoch": 5.8319091796875e-05,
      "step": 9555,
      "training_step_time": 0.4104270935058594
    },
    {
      "epoch": 5.83251953125e-05,
      "model_forward_time": 0.11909222602844238,
      "step": 9556
    },
    {
      "epoch": 5.83251953125e-05,
      "step": 9556,
      "training_step_time": 0.4064962863922119
    },
    {
      "epoch": 5.8331298828125e-05,
      "model_forward_time": 0.11685967445373535,
      "step": 9557
    },
    {
      "epoch": 5.8331298828125e-05,
      "step": 9557,
      "training_step_time": 0.4985060691833496
    },
    {
      "epoch": 5.833740234375e-05,
      "model_forward_time": 0.11827325820922852,
      "step": 9558
    },
    {
      "epoch": 5.833740234375e-05,
      "step": 9558,
      "training_step_time": 0.38904237747192383
    },
    {
      "epoch": 5.8343505859375e-05,
      "model_forward_time": 0.11787009239196777,
      "step": 9559
    },
    {
      "epoch": 5.8343505859375e-05,
      "step": 9559,
      "training_step_time": 0.8881146907806396
    },
    {
      "epoch": 5.8349609375e-05,
      "grad_norm": 0.21581117808818817,
      "learning_rate": 9.676732926565585e-05,
      "loss": 0.0706,
      "step": 9560
    },
    {
      "epoch": 5.8349609375e-05,
      "model_forward_time": 0.11540651321411133,
      "step": 9560
    },
    {
      "epoch": 5.8349609375e-05,
      "step": 9560,
      "training_step_time": 0.43858885765075684
    },
    {
      "epoch": 5.8355712890625e-05,
      "model_forward_time": 0.11602306365966797,
      "step": 9561
    },
    {
      "epoch": 5.8355712890625e-05,
      "step": 9561,
      "training_step_time": 0.40024805068969727
    },
    {
      "epoch": 5.836181640625e-05,
      "model_forward_time": 0.11583685874938965,
      "step": 9562
    },
    {
      "epoch": 5.836181640625e-05,
      "step": 9562,
      "training_step_time": 0.3841536045074463
    },
    {
      "epoch": 5.8367919921875e-05,
      "model_forward_time": 0.11582088470458984,
      "step": 9563
    },
    {
      "epoch": 5.8367919921875e-05,
      "step": 9563,
      "training_step_time": 0.4090249538421631
    },
    {
      "epoch": 5.83740234375e-05,
      "model_forward_time": 0.11675786972045898,
      "step": 9564
    },
    {
      "epoch": 5.83740234375e-05,
      "step": 9564,
      "training_step_time": 0.37223315238952637
    },
    {
      "epoch": 5.8380126953125e-05,
      "model_forward_time": 0.11641860008239746,
      "step": 9565
    },
    {
      "epoch": 5.8380126953125e-05,
      "step": 9565,
      "training_step_time": 0.38636326789855957
    },
    {
      "epoch": 5.838623046875e-05,
      "model_forward_time": 0.11611104011535645,
      "step": 9566
    },
    {
      "epoch": 5.838623046875e-05,
      "step": 9566,
      "training_step_time": 0.3915715217590332
    },
    {
      "epoch": 5.8392333984375e-05,
      "model_forward_time": 0.11790251731872559,
      "step": 9567
    },
    {
      "epoch": 5.8392333984375e-05,
      "step": 9567,
      "training_step_time": 0.40774106979370117
    },
    {
      "epoch": 5.83984375e-05,
      "model_forward_time": 0.11623764038085938,
      "step": 9568
    },
    {
      "epoch": 5.83984375e-05,
      "step": 9568,
      "training_step_time": 0.39200639724731445
    },
    {
      "epoch": 5.8404541015625e-05,
      "model_forward_time": 0.11732888221740723,
      "step": 9569
    },
    {
      "epoch": 5.8404541015625e-05,
      "step": 9569,
      "training_step_time": 0.42965197563171387
    },
    {
      "epoch": 5.841064453125e-05,
      "grad_norm": 0.20585185289382935,
      "learning_rate": 9.675757405934103e-05,
      "loss": 0.0692,
      "step": 9570
    },
    {
      "epoch": 5.841064453125e-05,
      "model_forward_time": 0.11837649345397949,
      "step": 9570
    },
    {
      "epoch": 5.841064453125e-05,
      "step": 9570,
      "training_step_time": 0.3922305107116699
    },
    {
      "epoch": 5.8416748046875e-05,
      "model_forward_time": 0.11707568168640137,
      "step": 9571
    },
    {
      "epoch": 5.8416748046875e-05,
      "step": 9571,
      "training_step_time": 0.459672212600708
    },
    {
      "epoch": 5.84228515625e-05,
      "model_forward_time": 0.11769676208496094,
      "step": 9572
    },
    {
      "epoch": 5.84228515625e-05,
      "step": 9572,
      "training_step_time": 0.40122532844543457
    },
    {
      "epoch": 5.8428955078125e-05,
      "model_forward_time": 0.11644268035888672,
      "step": 9573
    },
    {
      "epoch": 5.8428955078125e-05,
      "step": 9573,
      "training_step_time": 0.4071073532104492
    },
    {
      "epoch": 5.843505859375e-05,
      "model_forward_time": 0.11742305755615234,
      "step": 9574
    },
    {
      "epoch": 5.843505859375e-05,
      "step": 9574,
      "training_step_time": 0.4840724468231201
    },
    {
      "epoch": 5.8441162109375e-05,
      "model_forward_time": 0.11680912971496582,
      "step": 9575
    },
    {
      "epoch": 5.8441162109375e-05,
      "step": 9575,
      "training_step_time": 0.44972729682922363
    },
    {
      "epoch": 5.8447265625e-05,
      "model_forward_time": 0.11744189262390137,
      "step": 9576
    },
    {
      "epoch": 5.8447265625e-05,
      "step": 9576,
      "training_step_time": 0.3851492404937744
    },
    {
      "epoch": 5.8453369140625e-05,
      "model_forward_time": 0.11903190612792969,
      "step": 9577
    },
    {
      "epoch": 5.8453369140625e-05,
      "step": 9577,
      "training_step_time": 0.7789502143859863
    },
    {
      "epoch": 5.845947265625e-05,
      "model_forward_time": 0.11590218544006348,
      "step": 9578
    },
    {
      "epoch": 5.845947265625e-05,
      "step": 9578,
      "training_step_time": 0.37503576278686523
    },
    {
      "epoch": 5.8465576171875e-05,
      "model_forward_time": 0.11596298217773438,
      "step": 9579
    },
    {
      "epoch": 5.8465576171875e-05,
      "step": 9579,
      "training_step_time": 0.41481924057006836
    },
    {
      "epoch": 5.84716796875e-05,
      "grad_norm": 0.19550669193267822,
      "learning_rate": 9.674780464930979e-05,
      "loss": 0.0697,
      "step": 9580
    },
    {
      "epoch": 5.84716796875e-05,
      "model_forward_time": 0.11774325370788574,
      "step": 9580
    },
    {
      "epoch": 5.84716796875e-05,
      "step": 9580,
      "training_step_time": 0.375537633895874
    },
    {
      "epoch": 5.8477783203125e-05,
      "model_forward_time": 0.11675763130187988,
      "step": 9581
    },
    {
      "epoch": 5.8477783203125e-05,
      "step": 9581,
      "training_step_time": 0.37995290756225586
    },
    {
      "epoch": 5.848388671875e-05,
      "model_forward_time": 0.11753225326538086,
      "step": 9582
    },
    {
      "epoch": 5.848388671875e-05,
      "step": 9582,
      "training_step_time": 0.37793946266174316
    },
    {
      "epoch": 5.8489990234375e-05,
      "model_forward_time": 0.11720418930053711,
      "step": 9583
    },
    {
      "epoch": 5.8489990234375e-05,
      "step": 9583,
      "training_step_time": 1.1974186897277832
    },
    {
      "epoch": 5.849609375e-05,
      "model_forward_time": 0.1167142391204834,
      "step": 9584
    },
    {
      "epoch": 5.849609375e-05,
      "step": 9584,
      "training_step_time": 0.3919048309326172
    },
    {
      "epoch": 5.8502197265625e-05,
      "model_forward_time": 0.11607193946838379,
      "step": 9585
    },
    {
      "epoch": 5.8502197265625e-05,
      "step": 9585,
      "training_step_time": 0.42781972885131836
    },
    {
      "epoch": 5.850830078125e-05,
      "model_forward_time": 0.11535239219665527,
      "step": 9586
    },
    {
      "epoch": 5.850830078125e-05,
      "step": 9586,
      "training_step_time": 0.45393919944763184
    },
    {
      "epoch": 5.8514404296875e-05,
      "model_forward_time": 0.11571621894836426,
      "step": 9587
    },
    {
      "epoch": 5.8514404296875e-05,
      "step": 9587,
      "training_step_time": 0.3864631652832031
    },
    {
      "epoch": 5.85205078125e-05,
      "model_forward_time": 0.1161355972290039,
      "step": 9588
    },
    {
      "epoch": 5.85205078125e-05,
      "step": 9588,
      "training_step_time": 0.44353485107421875
    },
    {
      "epoch": 5.8526611328125e-05,
      "model_forward_time": 0.1169590950012207,
      "step": 9589
    },
    {
      "epoch": 5.8526611328125e-05,
      "step": 9589,
      "training_step_time": 0.8217835426330566
    },
    {
      "epoch": 5.853271484375e-05,
      "grad_norm": 0.1814330518245697,
      "learning_rate": 9.673802103852979e-05,
      "loss": 0.0601,
      "step": 9590
    },
    {
      "epoch": 5.853271484375e-05,
      "model_forward_time": 0.11537837982177734,
      "step": 9590
    },
    {
      "epoch": 5.853271484375e-05,
      "step": 9590,
      "training_step_time": 0.3831491470336914
    },
    {
      "epoch": 5.8538818359375e-05,
      "model_forward_time": 0.11611199378967285,
      "step": 9591
    },
    {
      "epoch": 5.8538818359375e-05,
      "step": 9591,
      "training_step_time": 0.38465237617492676
    },
    {
      "epoch": 5.8544921875e-05,
      "model_forward_time": 0.11638116836547852,
      "step": 9592
    },
    {
      "epoch": 5.8544921875e-05,
      "step": 9592,
      "training_step_time": 0.3741452693939209
    },
    {
      "epoch": 5.8551025390625e-05,
      "model_forward_time": 0.11559796333312988,
      "step": 9593
    },
    {
      "epoch": 5.8551025390625e-05,
      "step": 9593,
      "training_step_time": 0.3902475833892822
    },
    {
      "epoch": 5.855712890625e-05,
      "model_forward_time": 0.11573648452758789,
      "step": 9594
    },
    {
      "epoch": 5.855712890625e-05,
      "step": 9594,
      "training_step_time": 0.3755655288696289
    },
    {
      "epoch": 5.8563232421875e-05,
      "model_forward_time": 0.11631298065185547,
      "step": 9595
    },
    {
      "epoch": 5.8563232421875e-05,
      "step": 9595,
      "training_step_time": 0.6838810443878174
    },
    {
      "epoch": 5.85693359375e-05,
      "model_forward_time": 0.11644935607910156,
      "step": 9596
    },
    {
      "epoch": 5.85693359375e-05,
      "step": 9596,
      "training_step_time": 0.40786290168762207
    },
    {
      "epoch": 5.8575439453125e-05,
      "model_forward_time": 0.11667442321777344,
      "step": 9597
    },
    {
      "epoch": 5.8575439453125e-05,
      "step": 9597,
      "training_step_time": 0.4519803524017334
    },
    {
      "epoch": 5.858154296875e-05,
      "model_forward_time": 0.11676335334777832,
      "step": 9598
    },
    {
      "epoch": 5.858154296875e-05,
      "step": 9598,
      "training_step_time": 0.5185956954956055
    },
    {
      "epoch": 5.8587646484375e-05,
      "model_forward_time": 0.11656427383422852,
      "step": 9599
    },
    {
      "epoch": 5.8587646484375e-05,
      "step": 9599,
      "training_step_time": 0.4816007614135742
    },
    {
      "epoch": 5.859375e-05,
      "grad_norm": 0.30822354555130005,
      "learning_rate": 9.672822322997305e-05,
      "loss": 0.0678,
      "step": 9600
    },
    {
      "epoch": 5.859375e-05,
      "model_forward_time": 0.11561417579650879,
      "step": 9600
    },
    {
      "epoch": 5.859375e-05,
      "step": 9600,
      "training_step_time": 0.43799352645874023
    },
    {
      "epoch": 5.8599853515625e-05,
      "model_forward_time": 0.11701226234436035,
      "step": 9601
    },
    {
      "epoch": 5.8599853515625e-05,
      "step": 9601,
      "training_step_time": 0.9163620471954346
    },
    {
      "epoch": 5.860595703125e-05,
      "model_forward_time": 0.11704277992248535,
      "step": 9602
    },
    {
      "epoch": 5.860595703125e-05,
      "step": 9602,
      "training_step_time": 0.3789641857147217
    },
    {
      "epoch": 5.8612060546875e-05,
      "model_forward_time": 0.11712217330932617,
      "step": 9603
    },
    {
      "epoch": 5.8612060546875e-05,
      "step": 9603,
      "training_step_time": 0.386472225189209
    },
    {
      "epoch": 5.86181640625e-05,
      "model_forward_time": 0.1154179573059082,
      "step": 9604
    },
    {
      "epoch": 5.86181640625e-05,
      "step": 9604,
      "training_step_time": 0.3822615146636963
    },
    {
      "epoch": 5.8624267578125e-05,
      "model_forward_time": 0.11559820175170898,
      "step": 9605
    },
    {
      "epoch": 5.8624267578125e-05,
      "step": 9605,
      "training_step_time": 0.38492560386657715
    },
    {
      "epoch": 5.863037109375e-05,
      "model_forward_time": 0.11631584167480469,
      "step": 9606
    },
    {
      "epoch": 5.863037109375e-05,
      "step": 9606,
      "training_step_time": 0.39702630043029785
    },
    {
      "epoch": 5.8636474609375e-05,
      "model_forward_time": 0.11691784858703613,
      "step": 9607
    },
    {
      "epoch": 5.8636474609375e-05,
      "step": 9607,
      "training_step_time": 0.7649931907653809
    },
    {
      "epoch": 5.8642578125e-05,
      "model_forward_time": 0.11722588539123535,
      "step": 9608
    },
    {
      "epoch": 5.8642578125e-05,
      "step": 9608,
      "training_step_time": 0.4403212070465088
    },
    {
      "epoch": 5.8648681640625e-05,
      "model_forward_time": 0.11749911308288574,
      "step": 9609
    },
    {
      "epoch": 5.8648681640625e-05,
      "step": 9609,
      "training_step_time": 0.4683506488800049
    },
    {
      "epoch": 5.865478515625e-05,
      "grad_norm": 0.24073129892349243,
      "learning_rate": 9.671841122661587e-05,
      "loss": 0.0723,
      "step": 9610
    },
    {
      "epoch": 5.865478515625e-05,
      "model_forward_time": 0.11592698097229004,
      "step": 9610
    },
    {
      "epoch": 5.865478515625e-05,
      "step": 9610,
      "training_step_time": 0.4070250988006592
    },
    {
      "epoch": 5.8660888671875e-05,
      "model_forward_time": 0.11586594581604004,
      "step": 9611
    },
    {
      "epoch": 5.8660888671875e-05,
      "step": 9611,
      "training_step_time": 0.44683313369750977
    },
    {
      "epoch": 5.86669921875e-05,
      "model_forward_time": 0.11594772338867188,
      "step": 9612
    },
    {
      "epoch": 5.86669921875e-05,
      "step": 9612,
      "training_step_time": 0.4140133857727051
    },
    {
      "epoch": 5.8673095703125e-05,
      "model_forward_time": 0.11628556251525879,
      "step": 9613
    },
    {
      "epoch": 5.8673095703125e-05,
      "step": 9613,
      "training_step_time": 0.7160289287567139
    },
    {
      "epoch": 5.867919921875e-05,
      "model_forward_time": 0.11581087112426758,
      "step": 9614
    },
    {
      "epoch": 5.867919921875e-05,
      "step": 9614,
      "training_step_time": 0.3867816925048828
    },
    {
      "epoch": 5.8685302734375e-05,
      "model_forward_time": 0.11802864074707031,
      "step": 9615
    },
    {
      "epoch": 5.8685302734375e-05,
      "step": 9615,
      "training_step_time": 0.38951992988586426
    },
    {
      "epoch": 5.869140625e-05,
      "model_forward_time": 0.11703181266784668,
      "step": 9616
    },
    {
      "epoch": 5.869140625e-05,
      "step": 9616,
      "training_step_time": 0.38524460792541504
    },
    {
      "epoch": 5.8697509765625e-05,
      "model_forward_time": 0.11650753021240234,
      "step": 9617
    },
    {
      "epoch": 5.8697509765625e-05,
      "step": 9617,
      "training_step_time": 0.3898134231567383
    },
    {
      "epoch": 5.870361328125e-05,
      "model_forward_time": 0.11692237854003906,
      "step": 9618
    },
    {
      "epoch": 5.870361328125e-05,
      "step": 9618,
      "training_step_time": 0.3761725425720215
    },
    {
      "epoch": 5.8709716796875e-05,
      "model_forward_time": 0.11628556251525879,
      "step": 9619
    },
    {
      "epoch": 5.8709716796875e-05,
      "step": 9619,
      "training_step_time": 1.2644858360290527
    },
    {
      "epoch": 5.87158203125e-05,
      "grad_norm": 0.1701352447271347,
      "learning_rate": 9.67085850314389e-05,
      "loss": 0.0712,
      "step": 9620
    },
    {
      "epoch": 5.87158203125e-05,
      "model_forward_time": 0.11475014686584473,
      "step": 9620
    },
    {
      "epoch": 5.87158203125e-05,
      "step": 9620,
      "training_step_time": 0.4001779556274414
    },
    {
      "epoch": 5.8721923828125e-05,
      "model_forward_time": 0.11524009704589844,
      "step": 9621
    },
    {
      "epoch": 5.8721923828125e-05,
      "step": 9621,
      "training_step_time": 0.43648362159729004
    },
    {
      "epoch": 5.872802734375e-05,
      "model_forward_time": 0.11478137969970703,
      "step": 9622
    },
    {
      "epoch": 5.872802734375e-05,
      "step": 9622,
      "training_step_time": 0.36458420753479004
    },
    {
      "epoch": 5.8734130859375e-05,
      "model_forward_time": 0.11546921730041504,
      "step": 9623
    },
    {
      "epoch": 5.8734130859375e-05,
      "step": 9623,
      "training_step_time": 0.3928413391113281
    },
    {
      "epoch": 5.8740234375e-05,
      "model_forward_time": 0.1162264347076416,
      "step": 9624
    },
    {
      "epoch": 5.8740234375e-05,
      "step": 9624,
      "training_step_time": 0.4260072708129883
    },
    {
      "epoch": 5.8746337890625e-05,
      "model_forward_time": 0.11695456504821777,
      "step": 9625
    },
    {
      "epoch": 5.8746337890625e-05,
      "step": 9625,
      "training_step_time": 0.5791668891906738
    },
    {
      "epoch": 5.875244140625e-05,
      "model_forward_time": 0.11590003967285156,
      "step": 9626
    },
    {
      "epoch": 5.875244140625e-05,
      "step": 9626,
      "training_step_time": 0.3799614906311035
    },
    {
      "epoch": 5.8758544921875e-05,
      "model_forward_time": 0.11603927612304688,
      "step": 9627
    },
    {
      "epoch": 5.8758544921875e-05,
      "step": 9627,
      "training_step_time": 0.4041323661804199
    },
    {
      "epoch": 5.87646484375e-05,
      "model_forward_time": 0.11760902404785156,
      "step": 9628
    },
    {
      "epoch": 5.87646484375e-05,
      "step": 9628,
      "training_step_time": 0.3808903694152832
    },
    {
      "epoch": 5.8770751953125e-05,
      "model_forward_time": 0.1165003776550293,
      "step": 9629
    },
    {
      "epoch": 5.8770751953125e-05,
      "step": 9629,
      "training_step_time": 0.37575483322143555
    },
    {
      "epoch": 5.877685546875e-05,
      "grad_norm": 0.1941530555486679,
      "learning_rate": 9.669874464742705e-05,
      "loss": 0.07,
      "step": 9630
    },
    {
      "epoch": 5.877685546875e-05,
      "model_forward_time": 0.11807847023010254,
      "step": 9630
    },
    {
      "epoch": 5.877685546875e-05,
      "step": 9630,
      "training_step_time": 0.37519001960754395
    },
    {
      "epoch": 5.8782958984375e-05,
      "model_forward_time": 0.11653423309326172,
      "step": 9631
    },
    {
      "epoch": 5.8782958984375e-05,
      "step": 9631,
      "training_step_time": 1.100764274597168
    },
    {
      "epoch": 5.87890625e-05,
      "model_forward_time": 0.11562490463256836,
      "step": 9632
    },
    {
      "epoch": 5.87890625e-05,
      "step": 9632,
      "training_step_time": 0.3749074935913086
    },
    {
      "epoch": 5.8795166015625e-05,
      "model_forward_time": 0.11565208435058594,
      "step": 9633
    },
    {
      "epoch": 5.8795166015625e-05,
      "step": 9633,
      "training_step_time": 0.37952709197998047
    },
    {
      "epoch": 5.880126953125e-05,
      "model_forward_time": 0.11626267433166504,
      "step": 9634
    },
    {
      "epoch": 5.880126953125e-05,
      "step": 9634,
      "training_step_time": 0.4034128189086914
    },
    {
      "epoch": 5.8807373046875e-05,
      "model_forward_time": 0.11597824096679688,
      "step": 9635
    },
    {
      "epoch": 5.8807373046875e-05,
      "step": 9635,
      "training_step_time": 0.36750364303588867
    },
    {
      "epoch": 5.88134765625e-05,
      "model_forward_time": 0.11684179306030273,
      "step": 9636
    },
    {
      "epoch": 5.88134765625e-05,
      "step": 9636,
      "training_step_time": 0.37491846084594727
    },
    {
      "epoch": 5.8819580078125e-05,
      "model_forward_time": 0.1163339614868164,
      "step": 9637
    },
    {
      "epoch": 5.8819580078125e-05,
      "step": 9637,
      "training_step_time": 1.1839964389801025
    },
    {
      "epoch": 5.882568359375e-05,
      "model_forward_time": 0.1153876781463623,
      "step": 9638
    },
    {
      "epoch": 5.882568359375e-05,
      "step": 9638,
      "training_step_time": 0.3762850761413574
    },
    {
      "epoch": 5.8831787109375e-05,
      "model_forward_time": 0.11474323272705078,
      "step": 9639
    },
    {
      "epoch": 5.8831787109375e-05,
      "step": 9639,
      "training_step_time": 0.384706974029541
    },
    {
      "epoch": 5.8837890625e-05,
      "grad_norm": 0.22077298164367676,
      "learning_rate": 9.668889007756961e-05,
      "loss": 0.0632,
      "step": 9640
    },
    {
      "epoch": 5.8837890625e-05,
      "model_forward_time": 0.11504507064819336,
      "step": 9640
    },
    {
      "epoch": 5.8837890625e-05,
      "step": 9640,
      "training_step_time": 0.37862586975097656
    },
    {
      "epoch": 5.8843994140625e-05,
      "model_forward_time": 0.11545228958129883,
      "step": 9641
    },
    {
      "epoch": 5.8843994140625e-05,
      "step": 9641,
      "training_step_time": 0.37517476081848145
    },
    {
      "epoch": 5.885009765625e-05,
      "model_forward_time": 0.11592912673950195,
      "step": 9642
    },
    {
      "epoch": 5.885009765625e-05,
      "step": 9642,
      "training_step_time": 0.3837273120880127
    },
    {
      "epoch": 5.8856201171875e-05,
      "model_forward_time": 0.11849808692932129,
      "step": 9643
    },
    {
      "epoch": 5.8856201171875e-05,
      "step": 9643,
      "training_step_time": 0.7296469211578369
    },
    {
      "epoch": 5.88623046875e-05,
      "model_forward_time": 0.11621570587158203,
      "step": 9644
    },
    {
      "epoch": 5.88623046875e-05,
      "step": 9644,
      "training_step_time": 0.3883054256439209
    },
    {
      "epoch": 5.8868408203125e-05,
      "model_forward_time": 0.11705780029296875,
      "step": 9645
    },
    {
      "epoch": 5.8868408203125e-05,
      "step": 9645,
      "training_step_time": 0.39539599418640137
    },
    {
      "epoch": 5.887451171875e-05,
      "model_forward_time": 0.1156008243560791,
      "step": 9646
    },
    {
      "epoch": 5.887451171875e-05,
      "step": 9646,
      "training_step_time": 0.4083240032196045
    },
    {
      "epoch": 5.8880615234375e-05,
      "model_forward_time": 0.11610960960388184,
      "step": 9647
    },
    {
      "epoch": 5.8880615234375e-05,
      "step": 9647,
      "training_step_time": 0.41375136375427246
    },
    {
      "epoch": 5.888671875e-05,
      "model_forward_time": 0.11819648742675781,
      "step": 9648
    },
    {
      "epoch": 5.888671875e-05,
      "step": 9648,
      "training_step_time": 0.4242439270019531
    },
    {
      "epoch": 5.8892822265625e-05,
      "model_forward_time": 0.11683344841003418,
      "step": 9649
    },
    {
      "epoch": 5.8892822265625e-05,
      "step": 9649,
      "training_step_time": 1.154526710510254
    },
    {
      "epoch": 5.889892578125e-05,
      "grad_norm": 0.2010641098022461,
      "learning_rate": 9.667902132486009e-05,
      "loss": 0.0689,
      "step": 9650
    },
    {
      "epoch": 5.889892578125e-05,
      "model_forward_time": 0.11537694931030273,
      "step": 9650
    },
    {
      "epoch": 5.889892578125e-05,
      "step": 9650,
      "training_step_time": 0.3756885528564453
    },
    {
      "epoch": 5.8905029296875e-05,
      "model_forward_time": 0.11545109748840332,
      "step": 9651
    },
    {
      "epoch": 5.8905029296875e-05,
      "step": 9651,
      "training_step_time": 0.3701643943786621
    },
    {
      "epoch": 5.89111328125e-05,
      "model_forward_time": 0.1151280403137207,
      "step": 9652
    },
    {
      "epoch": 5.89111328125e-05,
      "step": 9652,
      "training_step_time": 0.37800168991088867
    },
    {
      "epoch": 5.8917236328125e-05,
      "model_forward_time": 0.11554312705993652,
      "step": 9653
    },
    {
      "epoch": 5.8917236328125e-05,
      "step": 9653,
      "training_step_time": 0.3925814628601074
    },
    {
      "epoch": 5.892333984375e-05,
      "model_forward_time": 0.1171116828918457,
      "step": 9654
    },
    {
      "epoch": 5.892333984375e-05,
      "step": 9654,
      "training_step_time": 0.373248815536499
    },
    {
      "epoch": 5.8929443359375e-05,
      "model_forward_time": 0.11693596839904785,
      "step": 9655
    },
    {
      "epoch": 5.8929443359375e-05,
      "step": 9655,
      "training_step_time": 0.6434822082519531
    },
    {
      "epoch": 5.8935546875e-05,
      "model_forward_time": 0.11703276634216309,
      "step": 9656
    },
    {
      "epoch": 5.8935546875e-05,
      "step": 9656,
      "training_step_time": 0.39490628242492676
    },
    {
      "epoch": 5.8941650390625e-05,
      "model_forward_time": 0.1164243221282959,
      "step": 9657
    },
    {
      "epoch": 5.8941650390625e-05,
      "step": 9657,
      "training_step_time": 0.39826130867004395
    },
    {
      "epoch": 5.894775390625e-05,
      "model_forward_time": 0.11686372756958008,
      "step": 9658
    },
    {
      "epoch": 5.894775390625e-05,
      "step": 9658,
      "training_step_time": 0.39966297149658203
    },
    {
      "epoch": 5.8953857421875e-05,
      "model_forward_time": 0.11617469787597656,
      "step": 9659
    },
    {
      "epoch": 5.8953857421875e-05,
      "step": 9659,
      "training_step_time": 0.393282413482666
    },
    {
      "epoch": 5.89599609375e-05,
      "grad_norm": 0.22526109218597412,
      "learning_rate": 9.66691383922964e-05,
      "loss": 0.0681,
      "step": 9660
    },
    {
      "epoch": 5.89599609375e-05,
      "model_forward_time": 0.11721611022949219,
      "step": 9660
    },
    {
      "epoch": 5.89599609375e-05,
      "step": 9660,
      "training_step_time": 0.41043949127197266
    },
    {
      "epoch": 5.8966064453125e-05,
      "model_forward_time": 0.11707067489624023,
      "step": 9661
    },
    {
      "epoch": 5.8966064453125e-05,
      "step": 9661,
      "training_step_time": 0.5816307067871094
    },
    {
      "epoch": 5.897216796875e-05,
      "model_forward_time": 0.1177053451538086,
      "step": 9662
    },
    {
      "epoch": 5.897216796875e-05,
      "step": 9662,
      "training_step_time": 0.4601726531982422
    },
    {
      "epoch": 5.8978271484375e-05,
      "model_forward_time": 0.1175835132598877,
      "step": 9663
    },
    {
      "epoch": 5.8978271484375e-05,
      "step": 9663,
      "training_step_time": 0.466655969619751
    },
    {
      "epoch": 5.8984375e-05,
      "model_forward_time": 0.11703252792358398,
      "step": 9664
    },
    {
      "epoch": 5.8984375e-05,
      "step": 9664,
      "training_step_time": 0.3860170841217041
    },
    {
      "epoch": 5.8990478515625e-05,
      "model_forward_time": 0.1162571907043457,
      "step": 9665
    },
    {
      "epoch": 5.8990478515625e-05,
      "step": 9665,
      "training_step_time": 0.3784925937652588
    },
    {
      "epoch": 5.899658203125e-05,
      "model_forward_time": 0.11623287200927734,
      "step": 9666
    },
    {
      "epoch": 5.899658203125e-05,
      "step": 9666,
      "training_step_time": 0.4167623519897461
    },
    {
      "epoch": 5.9002685546875e-05,
      "model_forward_time": 0.11721539497375488,
      "step": 9667
    },
    {
      "epoch": 5.9002685546875e-05,
      "step": 9667,
      "training_step_time": 0.4459245204925537
    },
    {
      "epoch": 5.90087890625e-05,
      "model_forward_time": 0.11648082733154297,
      "step": 9668
    },
    {
      "epoch": 5.90087890625e-05,
      "step": 9668,
      "training_step_time": 0.3767387866973877
    },
    {
      "epoch": 5.9014892578125e-05,
      "model_forward_time": 0.11685895919799805,
      "step": 9669
    },
    {
      "epoch": 5.9014892578125e-05,
      "step": 9669,
      "training_step_time": 0.3863043785095215
    },
    {
      "epoch": 5.902099609375e-05,
      "grad_norm": 0.16943493485450745,
      "learning_rate": 9.665924128288068e-05,
      "loss": 0.0696,
      "step": 9670
    },
    {
      "epoch": 5.902099609375e-05,
      "model_forward_time": 0.1168673038482666,
      "step": 9670
    },
    {
      "epoch": 5.902099609375e-05,
      "step": 9670,
      "training_step_time": 0.39670825004577637
    },
    {
      "epoch": 5.9027099609375e-05,
      "model_forward_time": 0.11655354499816895,
      "step": 9671
    },
    {
      "epoch": 5.9027099609375e-05,
      "step": 9671,
      "training_step_time": 0.37627387046813965
    },
    {
      "epoch": 5.9033203125e-05,
      "model_forward_time": 0.11784982681274414,
      "step": 9672
    },
    {
      "epoch": 5.9033203125e-05,
      "step": 9672,
      "training_step_time": 0.4370572566986084
    },
    {
      "epoch": 5.9039306640625e-05,
      "model_forward_time": 0.11693739891052246,
      "step": 9673
    },
    {
      "epoch": 5.9039306640625e-05,
      "step": 9673,
      "training_step_time": 1.1154992580413818
    },
    {
      "epoch": 5.904541015625e-05,
      "model_forward_time": 0.11574697494506836,
      "step": 9674
    },
    {
      "epoch": 5.904541015625e-05,
      "step": 9674,
      "training_step_time": 0.3808631896972656
    },
    {
      "epoch": 5.9051513671875e-05,
      "model_forward_time": 0.11642217636108398,
      "step": 9675
    },
    {
      "epoch": 5.9051513671875e-05,
      "step": 9675,
      "training_step_time": 0.437089204788208
    },
    {
      "epoch": 5.90576171875e-05,
      "model_forward_time": 0.11620402336120605,
      "step": 9676
    },
    {
      "epoch": 5.90576171875e-05,
      "step": 9676,
      "training_step_time": 0.3689408302307129
    },
    {
      "epoch": 5.9063720703125e-05,
      "model_forward_time": 0.11557912826538086,
      "step": 9677
    },
    {
      "epoch": 5.9063720703125e-05,
      "step": 9677,
      "training_step_time": 0.3697168827056885
    },
    {
      "epoch": 5.906982421875e-05,
      "model_forward_time": 0.11721324920654297,
      "step": 9678
    },
    {
      "epoch": 5.906982421875e-05,
      "step": 9678,
      "training_step_time": 0.37082982063293457
    },
    {
      "epoch": 5.9075927734375e-05,
      "model_forward_time": 0.11661005020141602,
      "step": 9679
    },
    {
      "epoch": 5.9075927734375e-05,
      "step": 9679,
      "training_step_time": 1.0059535503387451
    },
    {
      "epoch": 5.908203125e-05,
      "grad_norm": 0.2756688892841339,
      "learning_rate": 9.664932999961942e-05,
      "loss": 0.0686,
      "step": 9680
    },
    {
      "epoch": 5.908203125e-05,
      "model_forward_time": 0.11626386642456055,
      "step": 9680
    },
    {
      "epoch": 5.908203125e-05,
      "step": 9680,
      "training_step_time": 0.3719813823699951
    },
    {
      "epoch": 5.9088134765625e-05,
      "model_forward_time": 0.1159360408782959,
      "step": 9681
    },
    {
      "epoch": 5.9088134765625e-05,
      "step": 9681,
      "training_step_time": 0.37457799911499023
    },
    {
      "epoch": 5.909423828125e-05,
      "model_forward_time": 0.11824536323547363,
      "step": 9682
    },
    {
      "epoch": 5.909423828125e-05,
      "step": 9682,
      "training_step_time": 0.3832423686981201
    },
    {
      "epoch": 5.9100341796875e-05,
      "model_forward_time": 0.11692953109741211,
      "step": 9683
    },
    {
      "epoch": 5.9100341796875e-05,
      "step": 9683,
      "training_step_time": 0.44090723991394043
    },
    {
      "epoch": 5.91064453125e-05,
      "model_forward_time": 0.11779522895812988,
      "step": 9684
    },
    {
      "epoch": 5.91064453125e-05,
      "step": 9684,
      "training_step_time": 0.4095144271850586
    },
    {
      "epoch": 5.9112548828125e-05,
      "model_forward_time": 0.11823177337646484,
      "step": 9685
    },
    {
      "epoch": 5.9112548828125e-05,
      "step": 9685,
      "training_step_time": 0.7583024501800537
    },
    {
      "epoch": 5.911865234375e-05,
      "model_forward_time": 0.11621856689453125,
      "step": 9686
    },
    {
      "epoch": 5.911865234375e-05,
      "step": 9686,
      "training_step_time": 0.44676995277404785
    },
    {
      "epoch": 5.9124755859375e-05,
      "model_forward_time": 0.11572933197021484,
      "step": 9687
    },
    {
      "epoch": 5.9124755859375e-05,
      "step": 9687,
      "training_step_time": 0.45548224449157715
    },
    {
      "epoch": 5.9130859375e-05,
      "model_forward_time": 0.11615109443664551,
      "step": 9688
    },
    {
      "epoch": 5.9130859375e-05,
      "step": 9688,
      "training_step_time": 0.43998217582702637
    },
    {
      "epoch": 5.9136962890625e-05,
      "model_forward_time": 0.11613249778747559,
      "step": 9689
    },
    {
      "epoch": 5.9136962890625e-05,
      "step": 9689,
      "training_step_time": 0.37880873680114746
    },
    {
      "epoch": 5.914306640625e-05,
      "grad_norm": 0.1914430856704712,
      "learning_rate": 9.663940454552342e-05,
      "loss": 0.0623,
      "step": 9690
    },
    {
      "epoch": 5.914306640625e-05,
      "model_forward_time": 0.11714553833007812,
      "step": 9690
    },
    {
      "epoch": 5.914306640625e-05,
      "step": 9690,
      "training_step_time": 0.37494540214538574
    },
    {
      "epoch": 5.9149169921875e-05,
      "model_forward_time": 0.11639595031738281,
      "step": 9691
    },
    {
      "epoch": 5.9149169921875e-05,
      "step": 9691,
      "training_step_time": 0.6476848125457764
    },
    {
      "epoch": 5.91552734375e-05,
      "model_forward_time": 0.11687493324279785,
      "step": 9692
    },
    {
      "epoch": 5.91552734375e-05,
      "step": 9692,
      "training_step_time": 0.3937978744506836
    },
    {
      "epoch": 5.9161376953125e-05,
      "model_forward_time": 0.11737608909606934,
      "step": 9693
    },
    {
      "epoch": 5.9161376953125e-05,
      "step": 9693,
      "training_step_time": 0.37972211837768555
    },
    {
      "epoch": 5.916748046875e-05,
      "model_forward_time": 0.11706089973449707,
      "step": 9694
    },
    {
      "epoch": 5.916748046875e-05,
      "step": 9694,
      "training_step_time": 0.3815758228302002
    },
    {
      "epoch": 5.9173583984375e-05,
      "model_forward_time": 0.11739778518676758,
      "step": 9695
    },
    {
      "epoch": 5.9173583984375e-05,
      "step": 9695,
      "training_step_time": 0.3932633399963379
    },
    {
      "epoch": 5.91796875e-05,
      "model_forward_time": 0.1179659366607666,
      "step": 9696
    },
    {
      "epoch": 5.91796875e-05,
      "step": 9696,
      "training_step_time": 0.40022706985473633
    },
    {
      "epoch": 5.9185791015625e-05,
      "model_forward_time": 0.11680960655212402,
      "step": 9697
    },
    {
      "epoch": 5.9185791015625e-05,
      "step": 9697,
      "training_step_time": 1.36610746383667
    },
    {
      "epoch": 5.919189453125e-05,
      "model_forward_time": 0.1154630184173584,
      "step": 9698
    },
    {
      "epoch": 5.919189453125e-05,
      "step": 9698,
      "training_step_time": 0.4105842113494873
    },
    {
      "epoch": 5.9197998046875e-05,
      "model_forward_time": 0.1146383285522461,
      "step": 9699
    },
    {
      "epoch": 5.9197998046875e-05,
      "step": 9699,
      "training_step_time": 0.39600276947021484
    },
    {
      "epoch": 5.92041015625e-05,
      "grad_norm": 0.21728742122650146,
      "learning_rate": 9.662946492360776e-05,
      "loss": 0.0608,
      "step": 9700
    },
    {
      "epoch": 5.92041015625e-05,
      "model_forward_time": 0.11521267890930176,
      "step": 9700
    },
    {
      "epoch": 5.92041015625e-05,
      "step": 9700,
      "training_step_time": 0.418503999710083
    },
    {
      "epoch": 5.9210205078125e-05,
      "model_forward_time": 0.11517119407653809,
      "step": 9701
    },
    {
      "epoch": 5.9210205078125e-05,
      "step": 9701,
      "training_step_time": 0.3768460750579834
    },
    {
      "epoch": 5.921630859375e-05,
      "model_forward_time": 0.1163325309753418,
      "step": 9702
    },
    {
      "epoch": 5.921630859375e-05,
      "step": 9702,
      "training_step_time": 0.38361191749572754
    },
    {
      "epoch": 5.9222412109375e-05,
      "model_forward_time": 0.11639213562011719,
      "step": 9703
    },
    {
      "epoch": 5.9222412109375e-05,
      "step": 9703,
      "training_step_time": 0.6185200214385986
    },
    {
      "epoch": 5.9228515625e-05,
      "model_forward_time": 0.11603474617004395,
      "step": 9704
    },
    {
      "epoch": 5.9228515625e-05,
      "step": 9704,
      "training_step_time": 0.3798341751098633
    },
    {
      "epoch": 5.9234619140625e-05,
      "model_forward_time": 0.11591243743896484,
      "step": 9705
    },
    {
      "epoch": 5.9234619140625e-05,
      "step": 9705,
      "training_step_time": 0.37769389152526855
    },
    {
      "epoch": 5.924072265625e-05,
      "model_forward_time": 0.11682295799255371,
      "step": 9706
    },
    {
      "epoch": 5.924072265625e-05,
      "step": 9706,
      "training_step_time": 0.3807549476623535
    },
    {
      "epoch": 5.9246826171875e-05,
      "model_forward_time": 0.11651849746704102,
      "step": 9707
    },
    {
      "epoch": 5.9246826171875e-05,
      "step": 9707,
      "training_step_time": 0.4288976192474365
    },
    {
      "epoch": 5.92529296875e-05,
      "model_forward_time": 0.11719346046447754,
      "step": 9708
    },
    {
      "epoch": 5.92529296875e-05,
      "step": 9708,
      "training_step_time": 0.4046363830566406
    },
    {
      "epoch": 5.9259033203125e-05,
      "model_forward_time": 0.11706042289733887,
      "step": 9709
    },
    {
      "epoch": 5.9259033203125e-05,
      "step": 9709,
      "training_step_time": 1.1387743949890137
    },
    {
      "epoch": 5.926513671875e-05,
      "grad_norm": 0.21266238391399384,
      "learning_rate": 9.661951113689182e-05,
      "loss": 0.0686,
      "step": 9710
    },
    {
      "epoch": 5.926513671875e-05,
      "model_forward_time": 0.11670136451721191,
      "step": 9710
    },
    {
      "epoch": 5.926513671875e-05,
      "step": 9710,
      "training_step_time": 0.45478129386901855
    },
    {
      "epoch": 5.9271240234375e-05,
      "model_forward_time": 0.11580252647399902,
      "step": 9711
    },
    {
      "epoch": 5.9271240234375e-05,
      "step": 9711,
      "training_step_time": 0.42209410667419434
    },
    {
      "epoch": 5.927734375e-05,
      "model_forward_time": 0.11603569984436035,
      "step": 9712
    },
    {
      "epoch": 5.927734375e-05,
      "step": 9712,
      "training_step_time": 0.46286511421203613
    },
    {
      "epoch": 5.9283447265625e-05,
      "model_forward_time": 0.11536216735839844,
      "step": 9713
    },
    {
      "epoch": 5.9283447265625e-05,
      "step": 9713,
      "training_step_time": 0.48696017265319824
    },
    {
      "epoch": 5.928955078125e-05,
      "model_forward_time": 0.11626315116882324,
      "step": 9714
    },
    {
      "epoch": 5.928955078125e-05,
      "step": 9714,
      "training_step_time": 0.37186431884765625
    },
    {
      "epoch": 5.9295654296875e-05,
      "model_forward_time": 0.11562848091125488,
      "step": 9715
    },
    {
      "epoch": 5.9295654296875e-05,
      "step": 9715,
      "training_step_time": 0.530815839767456
    },
    {
      "epoch": 5.93017578125e-05,
      "model_forward_time": 0.11580538749694824,
      "step": 9716
    },
    {
      "epoch": 5.93017578125e-05,
      "step": 9716,
      "training_step_time": 0.38131237030029297
    },
    {
      "epoch": 5.9307861328125e-05,
      "model_forward_time": 0.1156914234161377,
      "step": 9717
    },
    {
      "epoch": 5.9307861328125e-05,
      "step": 9717,
      "training_step_time": 0.39023375511169434
    },
    {
      "epoch": 5.931396484375e-05,
      "model_forward_time": 0.11617016792297363,
      "step": 9718
    },
    {
      "epoch": 5.931396484375e-05,
      "step": 9718,
      "training_step_time": 0.38286709785461426
    },
    {
      "epoch": 5.9320068359375e-05,
      "model_forward_time": 0.11680984497070312,
      "step": 9719
    },
    {
      "epoch": 5.9320068359375e-05,
      "step": 9719,
      "training_step_time": 0.4187440872192383
    },
    {
      "epoch": 5.9326171875e-05,
      "grad_norm": 0.214333176612854,
      "learning_rate": 9.660954318839933e-05,
      "loss": 0.0618,
      "step": 9720
    },
    {
      "epoch": 5.9326171875e-05,
      "model_forward_time": 0.11807990074157715,
      "step": 9720
    },
    {
      "epoch": 5.9326171875e-05,
      "step": 9720,
      "training_step_time": 0.4311800003051758
    },
    {
      "epoch": 5.9332275390625e-05,
      "model_forward_time": 0.11655139923095703,
      "step": 9721
    },
    {
      "epoch": 5.9332275390625e-05,
      "step": 9721,
      "training_step_time": 0.8945977687835693
    },
    {
      "epoch": 5.933837890625e-05,
      "model_forward_time": 0.11528968811035156,
      "step": 9722
    },
    {
      "epoch": 5.933837890625e-05,
      "step": 9722,
      "training_step_time": 0.38962554931640625
    },
    {
      "epoch": 5.9344482421875e-05,
      "model_forward_time": 0.11743855476379395,
      "step": 9723
    },
    {
      "epoch": 5.9344482421875e-05,
      "step": 9723,
      "training_step_time": 0.4023094177246094
    },
    {
      "epoch": 5.93505859375e-05,
      "model_forward_time": 0.11574506759643555,
      "step": 9724
    },
    {
      "epoch": 5.93505859375e-05,
      "step": 9724,
      "training_step_time": 0.4231135845184326
    },
    {
      "epoch": 5.9356689453125e-05,
      "model_forward_time": 0.11591839790344238,
      "step": 9725
    },
    {
      "epoch": 5.9356689453125e-05,
      "step": 9725,
      "training_step_time": 0.43675947189331055
    },
    {
      "epoch": 5.936279296875e-05,
      "model_forward_time": 0.11592912673950195,
      "step": 9726
    },
    {
      "epoch": 5.936279296875e-05,
      "step": 9726,
      "training_step_time": 0.4461548328399658
    },
    {
      "epoch": 5.9368896484375e-05,
      "model_forward_time": 0.11610960960388184,
      "step": 9727
    },
    {
      "epoch": 5.9368896484375e-05,
      "step": 9727,
      "training_step_time": 1.0853240489959717
    },
    {
      "epoch": 5.9375e-05,
      "model_forward_time": 0.11510205268859863,
      "step": 9728
    },
    {
      "epoch": 5.9375e-05,
      "step": 9728,
      "training_step_time": 0.3712935447692871
    },
    {
      "epoch": 5.9381103515625e-05,
      "model_forward_time": 0.11562895774841309,
      "step": 9729
    },
    {
      "epoch": 5.9381103515625e-05,
      "step": 9729,
      "training_step_time": 0.3718299865722656
    },
    {
      "epoch": 5.938720703125e-05,
      "grad_norm": 0.23179176449775696,
      "learning_rate": 9.659956108115827e-05,
      "loss": 0.0674,
      "step": 9730
    },
    {
      "epoch": 5.938720703125e-05,
      "model_forward_time": 0.11554789543151855,
      "step": 9730
    },
    {
      "epoch": 5.938720703125e-05,
      "step": 9730,
      "training_step_time": 0.3789646625518799
    },
    {
      "epoch": 5.9393310546875e-05,
      "model_forward_time": 0.1152946949005127,
      "step": 9731
    },
    {
      "epoch": 5.9393310546875e-05,
      "step": 9731,
      "training_step_time": 0.40055322647094727
    },
    {
      "epoch": 5.93994140625e-05,
      "model_forward_time": 0.11641764640808105,
      "step": 9732
    },
    {
      "epoch": 5.93994140625e-05,
      "step": 9732,
      "training_step_time": 0.4379842281341553
    },
    {
      "epoch": 5.9405517578125e-05,
      "model_forward_time": 0.11656904220581055,
      "step": 9733
    },
    {
      "epoch": 5.9405517578125e-05,
      "step": 9733,
      "training_step_time": 0.5103263854980469
    },
    {
      "epoch": 5.941162109375e-05,
      "model_forward_time": 0.11711549758911133,
      "step": 9734
    },
    {
      "epoch": 5.941162109375e-05,
      "step": 9734,
      "training_step_time": 0.37680649757385254
    },
    {
      "epoch": 5.9417724609375e-05,
      "model_forward_time": 0.11620044708251953,
      "step": 9735
    },
    {
      "epoch": 5.9417724609375e-05,
      "step": 9735,
      "training_step_time": 0.38436341285705566
    },
    {
      "epoch": 5.9423828125e-05,
      "model_forward_time": 0.11734414100646973,
      "step": 9736
    },
    {
      "epoch": 5.9423828125e-05,
      "step": 9736,
      "training_step_time": 0.4094560146331787
    },
    {
      "epoch": 5.9429931640625e-05,
      "model_forward_time": 0.11976766586303711,
      "step": 9737
    },
    {
      "epoch": 5.9429931640625e-05,
      "step": 9737,
      "training_step_time": 0.4499669075012207
    },
    {
      "epoch": 5.943603515625e-05,
      "model_forward_time": 0.11721944808959961,
      "step": 9738
    },
    {
      "epoch": 5.943603515625e-05,
      "step": 9738,
      "training_step_time": 0.4180483818054199
    },
    {
      "epoch": 5.9442138671875e-05,
      "model_forward_time": 0.11722826957702637,
      "step": 9739
    },
    {
      "epoch": 5.9442138671875e-05,
      "step": 9739,
      "training_step_time": 0.5444281101226807
    },
    {
      "epoch": 5.94482421875e-05,
      "grad_norm": 0.17510510981082916,
      "learning_rate": 9.658956481820094e-05,
      "loss": 0.0649,
      "step": 9740
    },
    {
      "epoch": 5.94482421875e-05,
      "model_forward_time": 0.11680960655212402,
      "step": 9740
    },
    {
      "epoch": 5.94482421875e-05,
      "step": 9740,
      "training_step_time": 0.3900935649871826
    },
    {
      "epoch": 5.9454345703125e-05,
      "model_forward_time": 0.11669301986694336,
      "step": 9741
    },
    {
      "epoch": 5.9454345703125e-05,
      "step": 9741,
      "training_step_time": 0.37548184394836426
    },
    {
      "epoch": 5.946044921875e-05,
      "model_forward_time": 0.11674356460571289,
      "step": 9742
    },
    {
      "epoch": 5.946044921875e-05,
      "step": 9742,
      "training_step_time": 0.3878517150878906
    },
    {
      "epoch": 5.9466552734375e-05,
      "model_forward_time": 0.11771726608276367,
      "step": 9743
    },
    {
      "epoch": 5.9466552734375e-05,
      "step": 9743,
      "training_step_time": 0.37583088874816895
    },
    {
      "epoch": 5.947265625e-05,
      "model_forward_time": 0.11765074729919434,
      "step": 9744
    },
    {
      "epoch": 5.947265625e-05,
      "step": 9744,
      "training_step_time": 0.37598466873168945
    },
    {
      "epoch": 5.9478759765625e-05,
      "model_forward_time": 0.11661577224731445,
      "step": 9745
    },
    {
      "epoch": 5.9478759765625e-05,
      "step": 9745,
      "training_step_time": 0.5622701644897461
    },
    {
      "epoch": 5.948486328125e-05,
      "model_forward_time": 0.11776518821716309,
      "step": 9746
    },
    {
      "epoch": 5.948486328125e-05,
      "step": 9746,
      "training_step_time": 0.39812517166137695
    },
    {
      "epoch": 5.9490966796875e-05,
      "model_forward_time": 0.1170346736907959,
      "step": 9747
    },
    {
      "epoch": 5.9490966796875e-05,
      "step": 9747,
      "training_step_time": 0.38613247871398926
    },
    {
      "epoch": 5.94970703125e-05,
      "model_forward_time": 0.11943531036376953,
      "step": 9748
    },
    {
      "epoch": 5.94970703125e-05,
      "step": 9748,
      "training_step_time": 0.4066731929779053
    },
    {
      "epoch": 5.9503173828125e-05,
      "model_forward_time": 0.1186058521270752,
      "step": 9749
    },
    {
      "epoch": 5.9503173828125e-05,
      "step": 9749,
      "training_step_time": 0.390195369720459
    },
    {
      "epoch": 5.950927734375e-05,
      "grad_norm": 0.17423339188098907,
      "learning_rate": 9.657955440256395e-05,
      "loss": 0.0631,
      "step": 9750
    },
    {
      "epoch": 5.950927734375e-05,
      "model_forward_time": 0.11727261543273926,
      "step": 9750
    },
    {
      "epoch": 5.950927734375e-05,
      "step": 9750,
      "training_step_time": 0.37546515464782715
    },
    {
      "epoch": 5.9515380859375e-05,
      "model_forward_time": 0.11693716049194336,
      "step": 9751
    },
    {
      "epoch": 5.9515380859375e-05,
      "step": 9751,
      "training_step_time": 1.2165546417236328
    },
    {
      "epoch": 5.9521484375e-05,
      "model_forward_time": 0.11503362655639648,
      "step": 9752
    },
    {
      "epoch": 5.9521484375e-05,
      "step": 9752,
      "training_step_time": 0.37427306175231934
    },
    {
      "epoch": 5.9527587890625e-05,
      "model_forward_time": 0.11757922172546387,
      "step": 9753
    },
    {
      "epoch": 5.9527587890625e-05,
      "step": 9753,
      "training_step_time": 0.3749361038208008
    },
    {
      "epoch": 5.953369140625e-05,
      "model_forward_time": 0.11575889587402344,
      "step": 9754
    },
    {
      "epoch": 5.953369140625e-05,
      "step": 9754,
      "training_step_time": 0.3724071979522705
    },
    {
      "epoch": 5.9539794921875e-05,
      "model_forward_time": 0.11539077758789062,
      "step": 9755
    },
    {
      "epoch": 5.9539794921875e-05,
      "step": 9755,
      "training_step_time": 0.37934136390686035
    },
    {
      "epoch": 5.95458984375e-05,
      "model_forward_time": 0.11569833755493164,
      "step": 9756
    },
    {
      "epoch": 5.95458984375e-05,
      "step": 9756,
      "training_step_time": 0.37689661979675293
    },
    {
      "epoch": 5.9552001953125e-05,
      "model_forward_time": 0.11676740646362305,
      "step": 9757
    },
    {
      "epoch": 5.9552001953125e-05,
      "step": 9757,
      "training_step_time": 1.1592602729797363
    },
    {
      "epoch": 5.955810546875e-05,
      "model_forward_time": 0.11703991889953613,
      "step": 9758
    },
    {
      "epoch": 5.955810546875e-05,
      "step": 9758,
      "training_step_time": 0.36892056465148926
    },
    {
      "epoch": 5.9564208984375e-05,
      "model_forward_time": 0.11511492729187012,
      "step": 9759
    },
    {
      "epoch": 5.9564208984375e-05,
      "step": 9759,
      "training_step_time": 0.37426114082336426
    },
    {
      "epoch": 5.95703125e-05,
      "grad_norm": 0.22047054767608643,
      "learning_rate": 9.65695298372882e-05,
      "loss": 0.066,
      "step": 9760
    },
    {
      "epoch": 5.95703125e-05,
      "model_forward_time": 0.11597967147827148,
      "step": 9760
    },
    {
      "epoch": 5.95703125e-05,
      "step": 9760,
      "training_step_time": 0.3691294193267822
    },
    {
      "epoch": 5.9576416015625e-05,
      "model_forward_time": 0.11568665504455566,
      "step": 9761
    },
    {
      "epoch": 5.9576416015625e-05,
      "step": 9761,
      "training_step_time": 0.37180328369140625
    },
    {
      "epoch": 5.958251953125e-05,
      "model_forward_time": 0.11606764793395996,
      "step": 9762
    },
    {
      "epoch": 5.958251953125e-05,
      "step": 9762,
      "training_step_time": 0.37708163261413574
    },
    {
      "epoch": 5.9588623046875e-05,
      "model_forward_time": 0.11800146102905273,
      "step": 9763
    },
    {
      "epoch": 5.9588623046875e-05,
      "step": 9763,
      "training_step_time": 0.8248372077941895
    },
    {
      "epoch": 5.95947265625e-05,
      "model_forward_time": 0.1168985366821289,
      "step": 9764
    },
    {
      "epoch": 5.95947265625e-05,
      "step": 9764,
      "training_step_time": 0.45136356353759766
    },
    {
      "epoch": 5.9600830078125e-05,
      "model_forward_time": 0.11574649810791016,
      "step": 9765
    },
    {
      "epoch": 5.9600830078125e-05,
      "step": 9765,
      "training_step_time": 0.3780226707458496
    },
    {
      "epoch": 5.960693359375e-05,
      "model_forward_time": 0.11606144905090332,
      "step": 9766
    },
    {
      "epoch": 5.960693359375e-05,
      "step": 9766,
      "training_step_time": 0.38816237449645996
    },
    {
      "epoch": 5.9613037109375e-05,
      "model_forward_time": 0.11602902412414551,
      "step": 9767
    },
    {
      "epoch": 5.9613037109375e-05,
      "step": 9767,
      "training_step_time": 0.38166093826293945
    },
    {
      "epoch": 5.9619140625e-05,
      "model_forward_time": 0.11755609512329102,
      "step": 9768
    },
    {
      "epoch": 5.9619140625e-05,
      "step": 9768,
      "training_step_time": 0.43200087547302246
    },
    {
      "epoch": 5.9625244140625e-05,
      "model_forward_time": 0.11681461334228516,
      "step": 9769
    },
    {
      "epoch": 5.9625244140625e-05,
      "step": 9769,
      "training_step_time": 1.013852596282959
    },
    {
      "epoch": 5.963134765625e-05,
      "grad_norm": 0.23406751453876495,
      "learning_rate": 9.655949112541887e-05,
      "loss": 0.0625,
      "step": 9770
    },
    {
      "epoch": 5.963134765625e-05,
      "model_forward_time": 0.11565542221069336,
      "step": 9770
    },
    {
      "epoch": 5.963134765625e-05,
      "step": 9770,
      "training_step_time": 0.3803431987762451
    },
    {
      "epoch": 5.9637451171875e-05,
      "model_forward_time": 0.11518311500549316,
      "step": 9771
    },
    {
      "epoch": 5.9637451171875e-05,
      "step": 9771,
      "training_step_time": 0.3754265308380127
    },
    {
      "epoch": 5.96435546875e-05,
      "model_forward_time": 0.11608743667602539,
      "step": 9772
    },
    {
      "epoch": 5.96435546875e-05,
      "step": 9772,
      "training_step_time": 0.3767690658569336
    },
    {
      "epoch": 5.9649658203125e-05,
      "model_forward_time": 0.11535954475402832,
      "step": 9773
    },
    {
      "epoch": 5.9649658203125e-05,
      "step": 9773,
      "training_step_time": 0.3900942802429199
    },
    {
      "epoch": 5.965576171875e-05,
      "model_forward_time": 0.1168067455291748,
      "step": 9774
    },
    {
      "epoch": 5.965576171875e-05,
      "step": 9774,
      "training_step_time": 0.3692004680633545
    },
    {
      "epoch": 5.9661865234375e-05,
      "model_forward_time": 0.11647152900695801,
      "step": 9775
    },
    {
      "epoch": 5.9661865234375e-05,
      "step": 9775,
      "training_step_time": 0.92909836769104
    },
    {
      "epoch": 5.966796875e-05,
      "model_forward_time": 0.11585378646850586,
      "step": 9776
    },
    {
      "epoch": 5.966796875e-05,
      "step": 9776,
      "training_step_time": 0.42163634300231934
    },
    {
      "epoch": 5.9674072265625e-05,
      "model_forward_time": 0.1160283088684082,
      "step": 9777
    },
    {
      "epoch": 5.9674072265625e-05,
      "step": 9777,
      "training_step_time": 0.3824894428253174
    },
    {
      "epoch": 5.968017578125e-05,
      "model_forward_time": 0.11541247367858887,
      "step": 9778
    },
    {
      "epoch": 5.968017578125e-05,
      "step": 9778,
      "training_step_time": 0.3792712688446045
    },
    {
      "epoch": 5.9686279296875e-05,
      "model_forward_time": 0.11527585983276367,
      "step": 9779
    },
    {
      "epoch": 5.9686279296875e-05,
      "step": 9779,
      "training_step_time": 0.3786294460296631
    },
    {
      "epoch": 5.96923828125e-05,
      "grad_norm": 0.22634659707546234,
      "learning_rate": 9.654943827000548e-05,
      "loss": 0.061,
      "step": 9780
    },
    {
      "epoch": 5.96923828125e-05,
      "model_forward_time": 0.1161661148071289,
      "step": 9780
    },
    {
      "epoch": 5.96923828125e-05,
      "step": 9780,
      "training_step_time": 0.3924539089202881
    },
    {
      "epoch": 5.9698486328125e-05,
      "model_forward_time": 0.1167452335357666,
      "step": 9781
    },
    {
      "epoch": 5.9698486328125e-05,
      "step": 9781,
      "training_step_time": 0.6691784858703613
    },
    {
      "epoch": 5.970458984375e-05,
      "model_forward_time": 0.1157994270324707,
      "step": 9782
    },
    {
      "epoch": 5.970458984375e-05,
      "step": 9782,
      "training_step_time": 0.3745615482330322
    },
    {
      "epoch": 5.9710693359375e-05,
      "model_forward_time": 0.11676454544067383,
      "step": 9783
    },
    {
      "epoch": 5.9710693359375e-05,
      "step": 9783,
      "training_step_time": 0.3893294334411621
    },
    {
      "epoch": 5.9716796875e-05,
      "model_forward_time": 0.11680769920349121,
      "step": 9784
    },
    {
      "epoch": 5.9716796875e-05,
      "step": 9784,
      "training_step_time": 0.38289833068847656
    },
    {
      "epoch": 5.9722900390625e-05,
      "model_forward_time": 0.11960268020629883,
      "step": 9785
    },
    {
      "epoch": 5.9722900390625e-05,
      "step": 9785,
      "training_step_time": 0.3894972801208496
    },
    {
      "epoch": 5.972900390625e-05,
      "model_forward_time": 0.11689972877502441,
      "step": 9786
    },
    {
      "epoch": 5.972900390625e-05,
      "step": 9786,
      "training_step_time": 0.38443827629089355
    },
    {
      "epoch": 5.9735107421875e-05,
      "model_forward_time": 0.11687445640563965,
      "step": 9787
    },
    {
      "epoch": 5.9735107421875e-05,
      "step": 9787,
      "training_step_time": 1.4278886318206787
    },
    {
      "epoch": 5.97412109375e-05,
      "model_forward_time": 0.1155550479888916,
      "step": 9788
    },
    {
      "epoch": 5.97412109375e-05,
      "step": 9788,
      "training_step_time": 0.4711322784423828
    },
    {
      "epoch": 5.9747314453125e-05,
      "model_forward_time": 0.11965107917785645,
      "step": 9789
    },
    {
      "epoch": 5.9747314453125e-05,
      "step": 9789,
      "training_step_time": 0.46271753311157227
    },
    {
      "epoch": 5.975341796875e-05,
      "grad_norm": 0.18397438526153564,
      "learning_rate": 9.65393712741018e-05,
      "loss": 0.0653,
      "step": 9790
    },
    {
      "epoch": 5.975341796875e-05,
      "model_forward_time": 0.11449337005615234,
      "step": 9790
    },
    {
      "epoch": 5.975341796875e-05,
      "step": 9790,
      "training_step_time": 0.3740401268005371
    },
    {
      "epoch": 5.9759521484375e-05,
      "model_forward_time": 0.11451148986816406,
      "step": 9791
    },
    {
      "epoch": 5.9759521484375e-05,
      "step": 9791,
      "training_step_time": 0.3916149139404297
    },
    {
      "epoch": 5.9765625e-05,
      "model_forward_time": 0.11530876159667969,
      "step": 9792
    },
    {
      "epoch": 5.9765625e-05,
      "step": 9792,
      "training_step_time": 0.3953824043273926
    },
    {
      "epoch": 5.9771728515625e-05,
      "model_forward_time": 0.11595582962036133,
      "step": 9793
    },
    {
      "epoch": 5.9771728515625e-05,
      "step": 9793,
      "training_step_time": 0.5000886917114258
    },
    {
      "epoch": 5.977783203125e-05,
      "model_forward_time": 0.1159369945526123,
      "step": 9794
    },
    {
      "epoch": 5.977783203125e-05,
      "step": 9794,
      "training_step_time": 0.3925590515136719
    },
    {
      "epoch": 5.9783935546875e-05,
      "model_forward_time": 0.1161198616027832,
      "step": 9795
    },
    {
      "epoch": 5.9783935546875e-05,
      "step": 9795,
      "training_step_time": 0.38850998878479004
    },
    {
      "epoch": 5.97900390625e-05,
      "model_forward_time": 0.11671280860900879,
      "step": 9796
    },
    {
      "epoch": 5.97900390625e-05,
      "step": 9796,
      "training_step_time": 0.38321471214294434
    },
    {
      "epoch": 5.9796142578125e-05,
      "model_forward_time": 0.11717033386230469,
      "step": 9797
    },
    {
      "epoch": 5.9796142578125e-05,
      "step": 9797,
      "training_step_time": 0.40024590492248535
    },
    {
      "epoch": 5.980224609375e-05,
      "model_forward_time": 0.11672139167785645,
      "step": 9798
    },
    {
      "epoch": 5.980224609375e-05,
      "step": 9798,
      "training_step_time": 0.39251255989074707
    },
    {
      "epoch": 5.9808349609375e-05,
      "model_forward_time": 0.11598920822143555,
      "step": 9799
    },
    {
      "epoch": 5.9808349609375e-05,
      "step": 9799,
      "training_step_time": 1.2536933422088623
    },
    {
      "epoch": 5.9814453125e-05,
      "grad_norm": 0.18209917843341827,
      "learning_rate": 9.652929014076593e-05,
      "loss": 0.0594,
      "step": 9800
    },
    {
      "epoch": 5.9814453125e-05,
      "model_forward_time": 0.11514520645141602,
      "step": 9800
    },
    {
      "epoch": 5.9814453125e-05,
      "step": 9800,
      "training_step_time": 0.42014384269714355
    },
    {
      "epoch": 5.9820556640625e-05,
      "model_forward_time": 0.1155538558959961,
      "step": 9801
    },
    {
      "epoch": 5.9820556640625e-05,
      "step": 9801,
      "training_step_time": 0.4303398132324219
    },
    {
      "epoch": 5.982666015625e-05,
      "model_forward_time": 0.11492443084716797,
      "step": 9802
    },
    {
      "epoch": 5.982666015625e-05,
      "step": 9802,
      "training_step_time": 0.4193570613861084
    },
    {
      "epoch": 5.9832763671875e-05,
      "model_forward_time": 0.11512517929077148,
      "step": 9803
    },
    {
      "epoch": 5.9832763671875e-05,
      "step": 9803,
      "training_step_time": 0.4231760501861572
    },
    {
      "epoch": 5.98388671875e-05,
      "model_forward_time": 0.11538863182067871,
      "step": 9804
    },
    {
      "epoch": 5.98388671875e-05,
      "step": 9804,
      "training_step_time": 0.3968830108642578
    },
    {
      "epoch": 5.9844970703125e-05,
      "model_forward_time": 0.11606574058532715,
      "step": 9805
    },
    {
      "epoch": 5.9844970703125e-05,
      "step": 9805,
      "training_step_time": 0.734839916229248
    },
    {
      "epoch": 5.985107421875e-05,
      "model_forward_time": 0.11619710922241211,
      "step": 9806
    },
    {
      "epoch": 5.985107421875e-05,
      "step": 9806,
      "training_step_time": 0.372875452041626
    },
    {
      "epoch": 5.9857177734375e-05,
      "model_forward_time": 0.1158592700958252,
      "step": 9807
    },
    {
      "epoch": 5.9857177734375e-05,
      "step": 9807,
      "training_step_time": 0.3741185665130615
    },
    {
      "epoch": 5.986328125e-05,
      "model_forward_time": 0.11566972732543945,
      "step": 9808
    },
    {
      "epoch": 5.986328125e-05,
      "step": 9808,
      "training_step_time": 0.3749120235443115
    },
    {
      "epoch": 5.9869384765625e-05,
      "model_forward_time": 0.11645078659057617,
      "step": 9809
    },
    {
      "epoch": 5.9869384765625e-05,
      "step": 9809,
      "training_step_time": 0.38338351249694824
    },
    {
      "epoch": 5.987548828125e-05,
      "grad_norm": 0.211614727973938,
      "learning_rate": 9.651919487306025e-05,
      "loss": 0.0656,
      "step": 9810
    },
    {
      "epoch": 5.987548828125e-05,
      "model_forward_time": 0.11894989013671875,
      "step": 9810
    },
    {
      "epoch": 5.987548828125e-05,
      "step": 9810,
      "training_step_time": 0.38063597679138184
    },
    {
      "epoch": 5.9881591796875e-05,
      "model_forward_time": 0.11733865737915039,
      "step": 9811
    },
    {
      "epoch": 5.9881591796875e-05,
      "step": 9811,
      "training_step_time": 1.035785436630249
    },
    {
      "epoch": 5.98876953125e-05,
      "model_forward_time": 0.1156315803527832,
      "step": 9812
    },
    {
      "epoch": 5.98876953125e-05,
      "step": 9812,
      "training_step_time": 0.3644993305206299
    },
    {
      "epoch": 5.9893798828125e-05,
      "model_forward_time": 0.1163020133972168,
      "step": 9813
    },
    {
      "epoch": 5.9893798828125e-05,
      "step": 9813,
      "training_step_time": 0.407764196395874
    },
    {
      "epoch": 5.989990234375e-05,
      "model_forward_time": 0.11561036109924316,
      "step": 9814
    },
    {
      "epoch": 5.989990234375e-05,
      "step": 9814,
      "training_step_time": 0.5116689205169678
    },
    {
      "epoch": 5.9906005859375e-05,
      "model_forward_time": 0.1157374382019043,
      "step": 9815
    },
    {
      "epoch": 5.9906005859375e-05,
      "step": 9815,
      "training_step_time": 0.40311765670776367
    },
    {
      "epoch": 5.9912109375e-05,
      "model_forward_time": 0.11666727066040039,
      "step": 9816
    },
    {
      "epoch": 5.9912109375e-05,
      "step": 9816,
      "training_step_time": 0.4318504333496094
    },
    {
      "epoch": 5.9918212890625e-05,
      "model_forward_time": 0.11764383316040039,
      "step": 9817
    },
    {
      "epoch": 5.9918212890625e-05,
      "step": 9817,
      "training_step_time": 0.46007299423217773
    },
    {
      "epoch": 5.992431640625e-05,
      "model_forward_time": 0.11630988121032715,
      "step": 9818
    },
    {
      "epoch": 5.992431640625e-05,
      "step": 9818,
      "training_step_time": 0.37340521812438965
    },
    {
      "epoch": 5.9930419921875e-05,
      "model_forward_time": 0.11626386642456055,
      "step": 9819
    },
    {
      "epoch": 5.9930419921875e-05,
      "step": 9819,
      "training_step_time": 0.3776547908782959
    },
    {
      "epoch": 5.99365234375e-05,
      "grad_norm": 0.2411986142396927,
      "learning_rate": 9.650908547405144e-05,
      "loss": 0.0599,
      "step": 9820
    },
    {
      "epoch": 5.99365234375e-05,
      "model_forward_time": 0.1165933609008789,
      "step": 9820
    },
    {
      "epoch": 5.99365234375e-05,
      "step": 9820,
      "training_step_time": 0.3989701271057129
    },
    {
      "epoch": 5.9942626953125e-05,
      "model_forward_time": 0.11626625061035156,
      "step": 9821
    },
    {
      "epoch": 5.9942626953125e-05,
      "step": 9821,
      "training_step_time": 0.38033342361450195
    },
    {
      "epoch": 5.994873046875e-05,
      "model_forward_time": 0.11774897575378418,
      "step": 9822
    },
    {
      "epoch": 5.994873046875e-05,
      "step": 9822,
      "training_step_time": 0.37462615966796875
    },
    {
      "epoch": 5.9954833984375e-05,
      "model_forward_time": 0.11643576622009277,
      "step": 9823
    },
    {
      "epoch": 5.9954833984375e-05,
      "step": 9823,
      "training_step_time": 0.7109689712524414
    },
    {
      "epoch": 5.99609375e-05,
      "model_forward_time": 0.11590123176574707,
      "step": 9824
    },
    {
      "epoch": 5.99609375e-05,
      "step": 9824,
      "training_step_time": 0.388110876083374
    },
    {
      "epoch": 5.9967041015625e-05,
      "model_forward_time": 0.11659622192382812,
      "step": 9825
    },
    {
      "epoch": 5.9967041015625e-05,
      "step": 9825,
      "training_step_time": 0.3957240581512451
    },
    {
      "epoch": 5.997314453125e-05,
      "model_forward_time": 0.11739277839660645,
      "step": 9826
    },
    {
      "epoch": 5.997314453125e-05,
      "step": 9826,
      "training_step_time": 0.3718750476837158
    },
    {
      "epoch": 5.9979248046875e-05,
      "model_forward_time": 0.11742472648620605,
      "step": 9827
    },
    {
      "epoch": 5.9979248046875e-05,
      "step": 9827,
      "training_step_time": 0.44942474365234375
    },
    {
      "epoch": 5.99853515625e-05,
      "model_forward_time": 0.11593365669250488,
      "step": 9828
    },
    {
      "epoch": 5.99853515625e-05,
      "step": 9828,
      "training_step_time": 0.4778909683227539
    },
    {
      "epoch": 5.9991455078125e-05,
      "model_forward_time": 0.11686444282531738,
      "step": 9829
    },
    {
      "epoch": 5.9991455078125e-05,
      "step": 9829,
      "training_step_time": 1.17698335647583
    },
    {
      "epoch": 5.999755859375e-05,
      "grad_norm": 0.19987799227237701,
      "learning_rate": 9.649896194681045e-05,
      "loss": 0.0697,
      "step": 9830
    },
    {
      "epoch": 5.999755859375e-05,
      "model_forward_time": 0.12005138397216797,
      "step": 9830
    },
    {
      "epoch": 5.999755859375e-05,
      "step": 9830,
      "training_step_time": 0.3773181438446045
    },
    {
      "epoch": 6.0003662109375e-05,
      "model_forward_time": 0.1148383617401123,
      "step": 9831
    },
    {
      "epoch": 6.0003662109375e-05,
      "step": 9831,
      "training_step_time": 0.37737560272216797
    },
    {
      "epoch": 6.0009765625e-05,
      "model_forward_time": 0.11545991897583008,
      "step": 9832
    },
    {
      "epoch": 6.0009765625e-05,
      "step": 9832,
      "training_step_time": 0.3836336135864258
    },
    {
      "epoch": 6.0015869140625e-05,
      "model_forward_time": 0.11503076553344727,
      "step": 9833
    },
    {
      "epoch": 6.0015869140625e-05,
      "step": 9833,
      "training_step_time": 0.37603282928466797
    },
    {
      "epoch": 6.002197265625e-05,
      "model_forward_time": 0.11530685424804688,
      "step": 9834
    },
    {
      "epoch": 6.002197265625e-05,
      "step": 9834,
      "training_step_time": 0.3713388442993164
    },
    {
      "epoch": 6.0028076171875e-05,
      "model_forward_time": 0.11697268486022949,
      "step": 9835
    },
    {
      "epoch": 6.0028076171875e-05,
      "step": 9835,
      "training_step_time": 0.5762767791748047
    },
    {
      "epoch": 6.00341796875e-05,
      "model_forward_time": 0.11548542976379395,
      "step": 9836
    },
    {
      "epoch": 6.00341796875e-05,
      "step": 9836,
      "training_step_time": 0.4053664207458496
    },
    {
      "epoch": 6.0040283203125e-05,
      "model_forward_time": 0.11571478843688965,
      "step": 9837
    },
    {
      "epoch": 6.0040283203125e-05,
      "step": 9837,
      "training_step_time": 0.4033651351928711
    },
    {
      "epoch": 6.004638671875e-05,
      "model_forward_time": 0.11648249626159668,
      "step": 9838
    },
    {
      "epoch": 6.004638671875e-05,
      "step": 9838,
      "training_step_time": 0.383958101272583
    },
    {
      "epoch": 6.0052490234375e-05,
      "model_forward_time": 0.11656498908996582,
      "step": 9839
    },
    {
      "epoch": 6.0052490234375e-05,
      "step": 9839,
      "training_step_time": 0.41025567054748535
    },
    {
      "epoch": 6.005859375e-05,
      "grad_norm": 0.1841287463903427,
      "learning_rate": 9.648882429441257e-05,
      "loss": 0.0651,
      "step": 9840
    },
    {
      "epoch": 6.005859375e-05,
      "model_forward_time": 0.11773204803466797,
      "step": 9840
    },
    {
      "epoch": 6.005859375e-05,
      "step": 9840,
      "training_step_time": 0.47303080558776855
    },
    {
      "epoch": 6.0064697265625e-05,
      "model_forward_time": 0.11697149276733398,
      "step": 9841
    },
    {
      "epoch": 6.0064697265625e-05,
      "step": 9841,
      "training_step_time": 0.5929055213928223
    },
    {
      "epoch": 6.007080078125e-05,
      "model_forward_time": 0.11649084091186523,
      "step": 9842
    },
    {
      "epoch": 6.007080078125e-05,
      "step": 9842,
      "training_step_time": 0.44345545768737793
    },
    {
      "epoch": 6.0076904296875e-05,
      "model_forward_time": 0.11567044258117676,
      "step": 9843
    },
    {
      "epoch": 6.0076904296875e-05,
      "step": 9843,
      "training_step_time": 0.38774991035461426
    },
    {
      "epoch": 6.00830078125e-05,
      "model_forward_time": 0.11680221557617188,
      "step": 9844
    },
    {
      "epoch": 6.00830078125e-05,
      "step": 9844,
      "training_step_time": 0.3930208683013916
    },
    {
      "epoch": 6.0089111328125e-05,
      "model_forward_time": 0.11684203147888184,
      "step": 9845
    },
    {
      "epoch": 6.0089111328125e-05,
      "step": 9845,
      "training_step_time": 0.3790853023529053
    },
    {
      "epoch": 6.009521484375e-05,
      "model_forward_time": 0.11606121063232422,
      "step": 9846
    },
    {
      "epoch": 6.009521484375e-05,
      "step": 9846,
      "training_step_time": 0.4626274108886719
    },
    {
      "epoch": 6.0101318359375e-05,
      "model_forward_time": 0.1162877082824707,
      "step": 9847
    },
    {
      "epoch": 6.0101318359375e-05,
      "step": 9847,
      "training_step_time": 0.4109823703765869
    },
    {
      "epoch": 6.0107421875e-05,
      "model_forward_time": 0.11624646186828613,
      "step": 9848
    },
    {
      "epoch": 6.0107421875e-05,
      "step": 9848,
      "training_step_time": 0.385500431060791
    },
    {
      "epoch": 6.0113525390625e-05,
      "model_forward_time": 0.11651420593261719,
      "step": 9849
    },
    {
      "epoch": 6.0113525390625e-05,
      "step": 9849,
      "training_step_time": 0.395892858505249
    },
    {
      "epoch": 6.011962890625e-05,
      "grad_norm": 0.1599273830652237,
      "learning_rate": 9.647867251993734e-05,
      "loss": 0.0699,
      "step": 9850
    },
    {
      "epoch": 6.011962890625e-05,
      "model_forward_time": 0.1159367561340332,
      "step": 9850
    },
    {
      "epoch": 6.011962890625e-05,
      "step": 9850,
      "training_step_time": 0.40021657943725586
    },
    {
      "epoch": 6.0125732421875e-05,
      "model_forward_time": 0.11735987663269043,
      "step": 9851
    },
    {
      "epoch": 6.0125732421875e-05,
      "step": 9851,
      "training_step_time": 0.39406609535217285
    },
    {
      "epoch": 6.01318359375e-05,
      "model_forward_time": 0.11678886413574219,
      "step": 9852
    },
    {
      "epoch": 6.01318359375e-05,
      "step": 9852,
      "training_step_time": 0.38750600814819336
    },
    {
      "epoch": 6.0137939453125e-05,
      "model_forward_time": 0.11718630790710449,
      "step": 9853
    },
    {
      "epoch": 6.0137939453125e-05,
      "step": 9853,
      "training_step_time": 1.5102331638336182
    },
    {
      "epoch": 6.014404296875e-05,
      "model_forward_time": 0.11609625816345215,
      "step": 9854
    },
    {
      "epoch": 6.014404296875e-05,
      "step": 9854,
      "training_step_time": 0.4105103015899658
    },
    {
      "epoch": 6.0150146484375e-05,
      "model_forward_time": 0.11496090888977051,
      "step": 9855
    },
    {
      "epoch": 6.0150146484375e-05,
      "step": 9855,
      "training_step_time": 0.3709840774536133
    },
    {
      "epoch": 6.015625e-05,
      "model_forward_time": 0.11517667770385742,
      "step": 9856
    },
    {
      "epoch": 6.015625e-05,
      "step": 9856,
      "training_step_time": 0.3811361789703369
    },
    {
      "epoch": 6.0162353515625e-05,
      "model_forward_time": 0.11560988426208496,
      "step": 9857
    },
    {
      "epoch": 6.0162353515625e-05,
      "step": 9857,
      "training_step_time": 0.3812065124511719
    },
    {
      "epoch": 6.016845703125e-05,
      "model_forward_time": 0.12128496170043945,
      "step": 9858
    },
    {
      "epoch": 6.016845703125e-05,
      "step": 9858,
      "training_step_time": 0.3816075325012207
    },
    {
      "epoch": 6.0174560546875e-05,
      "model_forward_time": 0.11684250831604004,
      "step": 9859
    },
    {
      "epoch": 6.0174560546875e-05,
      "step": 9859,
      "training_step_time": 0.7201211452484131
    },
    {
      "epoch": 6.01806640625e-05,
      "grad_norm": 0.24282360076904297,
      "learning_rate": 9.646850662646859e-05,
      "loss": 0.0635,
      "step": 9860
    },
    {
      "epoch": 6.01806640625e-05,
      "model_forward_time": 0.11533498764038086,
      "step": 9860
    },
    {
      "epoch": 6.01806640625e-05,
      "step": 9860,
      "training_step_time": 0.393054723739624
    },
    {
      "epoch": 6.0186767578125e-05,
      "model_forward_time": 0.11634445190429688,
      "step": 9861
    },
    {
      "epoch": 6.0186767578125e-05,
      "step": 9861,
      "training_step_time": 0.39050841331481934
    },
    {
      "epoch": 6.019287109375e-05,
      "model_forward_time": 0.11589384078979492,
      "step": 9862
    },
    {
      "epoch": 6.019287109375e-05,
      "step": 9862,
      "training_step_time": 0.388674259185791
    },
    {
      "epoch": 6.0198974609375e-05,
      "model_forward_time": 0.11780190467834473,
      "step": 9863
    },
    {
      "epoch": 6.0198974609375e-05,
      "step": 9863,
      "training_step_time": 0.3716859817504883
    },
    {
      "epoch": 6.0205078125e-05,
      "model_forward_time": 0.1178750991821289,
      "step": 9864
    },
    {
      "epoch": 6.0205078125e-05,
      "step": 9864,
      "training_step_time": 0.3921060562133789
    },
    {
      "epoch": 6.0211181640625e-05,
      "model_forward_time": 0.11750602722167969,
      "step": 9865
    },
    {
      "epoch": 6.0211181640625e-05,
      "step": 9865,
      "training_step_time": 0.7480549812316895
    },
    {
      "epoch": 6.021728515625e-05,
      "model_forward_time": 0.11794066429138184,
      "step": 9866
    },
    {
      "epoch": 6.021728515625e-05,
      "step": 9866,
      "training_step_time": 0.37319469451904297
    },
    {
      "epoch": 6.0223388671875e-05,
      "model_forward_time": 0.11776494979858398,
      "step": 9867
    },
    {
      "epoch": 6.0223388671875e-05,
      "step": 9867,
      "training_step_time": 0.46665096282958984
    },
    {
      "epoch": 6.02294921875e-05,
      "model_forward_time": 0.12029218673706055,
      "step": 9868
    },
    {
      "epoch": 6.02294921875e-05,
      "step": 9868,
      "training_step_time": 0.47793078422546387
    },
    {
      "epoch": 6.0235595703125e-05,
      "model_forward_time": 0.1158592700958252,
      "step": 9869
    },
    {
      "epoch": 6.0235595703125e-05,
      "step": 9869,
      "training_step_time": 0.3805365562438965
    },
    {
      "epoch": 6.024169921875e-05,
      "grad_norm": 0.14918723702430725,
      "learning_rate": 9.645832661709444e-05,
      "loss": 0.058,
      "step": 9870
    },
    {
      "epoch": 6.024169921875e-05,
      "model_forward_time": 0.11664867401123047,
      "step": 9870
    },
    {
      "epoch": 6.024169921875e-05,
      "step": 9870,
      "training_step_time": 0.3804051876068115
    },
    {
      "epoch": 6.0247802734375e-05,
      "model_forward_time": 0.1172325611114502,
      "step": 9871
    },
    {
      "epoch": 6.0247802734375e-05,
      "step": 9871,
      "training_step_time": 0.3948237895965576
    },
    {
      "epoch": 6.025390625e-05,
      "model_forward_time": 0.11633014678955078,
      "step": 9872
    },
    {
      "epoch": 6.025390625e-05,
      "step": 9872,
      "training_step_time": 0.38307809829711914
    },
    {
      "epoch": 6.0260009765625e-05,
      "model_forward_time": 0.11857819557189941,
      "step": 9873
    },
    {
      "epoch": 6.0260009765625e-05,
      "step": 9873,
      "training_step_time": 0.37400126457214355
    },
    {
      "epoch": 6.026611328125e-05,
      "model_forward_time": 0.11685395240783691,
      "step": 9874
    },
    {
      "epoch": 6.026611328125e-05,
      "step": 9874,
      "training_step_time": 0.3764209747314453
    },
    {
      "epoch": 6.0272216796875e-05,
      "model_forward_time": 0.1164388656616211,
      "step": 9875
    },
    {
      "epoch": 6.0272216796875e-05,
      "step": 9875,
      "training_step_time": 0.37884020805358887
    },
    {
      "epoch": 6.02783203125e-05,
      "model_forward_time": 0.11754798889160156,
      "step": 9876
    },
    {
      "epoch": 6.02783203125e-05,
      "step": 9876,
      "training_step_time": 0.37688493728637695
    },
    {
      "epoch": 6.0284423828125e-05,
      "model_forward_time": 0.11691498756408691,
      "step": 9877
    },
    {
      "epoch": 6.0284423828125e-05,
      "step": 9877,
      "training_step_time": 1.1310889720916748
    },
    {
      "epoch": 6.029052734375e-05,
      "model_forward_time": 0.11612534523010254,
      "step": 9878
    },
    {
      "epoch": 6.029052734375e-05,
      "step": 9878,
      "training_step_time": 0.4254636764526367
    },
    {
      "epoch": 6.0296630859375e-05,
      "model_forward_time": 0.11577892303466797,
      "step": 9879
    },
    {
      "epoch": 6.0296630859375e-05,
      "step": 9879,
      "training_step_time": 0.369110107421875
    },
    {
      "epoch": 6.0302734375e-05,
      "grad_norm": 0.22585375607013702,
      "learning_rate": 9.644813249490735e-05,
      "loss": 0.0637,
      "step": 9880
    },
    {
      "epoch": 6.0302734375e-05,
      "model_forward_time": 0.11549639701843262,
      "step": 9880
    },
    {
      "epoch": 6.0302734375e-05,
      "step": 9880,
      "training_step_time": 0.4637327194213867
    },
    {
      "epoch": 6.0308837890625e-05,
      "model_forward_time": 0.11586236953735352,
      "step": 9881
    },
    {
      "epoch": 6.0308837890625e-05,
      "step": 9881,
      "training_step_time": 0.39755678176879883
    },
    {
      "epoch": 6.031494140625e-05,
      "model_forward_time": 0.11568188667297363,
      "step": 9882
    },
    {
      "epoch": 6.031494140625e-05,
      "step": 9882,
      "training_step_time": 0.3780057430267334
    },
    {
      "epoch": 6.0321044921875e-05,
      "model_forward_time": 0.11582589149475098,
      "step": 9883
    },
    {
      "epoch": 6.0321044921875e-05,
      "step": 9883,
      "training_step_time": 0.5083675384521484
    },
    {
      "epoch": 6.03271484375e-05,
      "model_forward_time": 0.11538481712341309,
      "step": 9884
    },
    {
      "epoch": 6.03271484375e-05,
      "step": 9884,
      "training_step_time": 0.37523818016052246
    },
    {
      "epoch": 6.0333251953125e-05,
      "model_forward_time": 0.11658787727355957,
      "step": 9885
    },
    {
      "epoch": 6.0333251953125e-05,
      "step": 9885,
      "training_step_time": 0.3840494155883789
    },
    {
      "epoch": 6.033935546875e-05,
      "model_forward_time": 0.11646389961242676,
      "step": 9886
    },
    {
      "epoch": 6.033935546875e-05,
      "step": 9886,
      "training_step_time": 0.3801000118255615
    },
    {
      "epoch": 6.0345458984375e-05,
      "model_forward_time": 0.116851806640625,
      "step": 9887
    },
    {
      "epoch": 6.0345458984375e-05,
      "step": 9887,
      "training_step_time": 0.398681640625
    },
    {
      "epoch": 6.03515625e-05,
      "model_forward_time": 0.11705899238586426,
      "step": 9888
    },
    {
      "epoch": 6.03515625e-05,
      "step": 9888,
      "training_step_time": 0.38048386573791504
    },
    {
      "epoch": 6.0357666015625e-05,
      "model_forward_time": 0.11732769012451172,
      "step": 9889
    },
    {
      "epoch": 6.0357666015625e-05,
      "step": 9889,
      "training_step_time": 0.7371072769165039
    },
    {
      "epoch": 6.036376953125e-05,
      "grad_norm": 0.2311709225177765,
      "learning_rate": 9.6437924263004e-05,
      "loss": 0.069,
      "step": 9890
    },
    {
      "epoch": 6.036376953125e-05,
      "model_forward_time": 0.11663126945495605,
      "step": 9890
    },
    {
      "epoch": 6.036376953125e-05,
      "step": 9890,
      "training_step_time": 0.3742833137512207
    },
    {
      "epoch": 6.0369873046875e-05,
      "model_forward_time": 0.1162266731262207,
      "step": 9891
    },
    {
      "epoch": 6.0369873046875e-05,
      "step": 9891,
      "training_step_time": 0.4185492992401123
    },
    {
      "epoch": 6.03759765625e-05,
      "model_forward_time": 0.11911916732788086,
      "step": 9892
    },
    {
      "epoch": 6.03759765625e-05,
      "step": 9892,
      "training_step_time": 0.3883028030395508
    },
    {
      "epoch": 6.0382080078125e-05,
      "model_forward_time": 0.11573266983032227,
      "step": 9893
    },
    {
      "epoch": 6.0382080078125e-05,
      "step": 9893,
      "training_step_time": 0.38889336585998535
    },
    {
      "epoch": 6.038818359375e-05,
      "model_forward_time": 0.11640524864196777,
      "step": 9894
    },
    {
      "epoch": 6.038818359375e-05,
      "step": 9894,
      "training_step_time": 0.39089202880859375
    },
    {
      "epoch": 6.0394287109375e-05,
      "model_forward_time": 0.11720824241638184,
      "step": 9895
    },
    {
      "epoch": 6.0394287109375e-05,
      "step": 9895,
      "training_step_time": 0.47432947158813477
    },
    {
      "epoch": 6.0400390625e-05,
      "model_forward_time": 0.1176908016204834,
      "step": 9896
    },
    {
      "epoch": 6.0400390625e-05,
      "step": 9896,
      "training_step_time": 0.3795316219329834
    },
    {
      "epoch": 6.0406494140625e-05,
      "model_forward_time": 0.1163482666015625,
      "step": 9897
    },
    {
      "epoch": 6.0406494140625e-05,
      "step": 9897,
      "training_step_time": 0.37927699089050293
    },
    {
      "epoch": 6.041259765625e-05,
      "model_forward_time": 0.11776566505432129,
      "step": 9898
    },
    {
      "epoch": 6.041259765625e-05,
      "step": 9898,
      "training_step_time": 0.3801555633544922
    },
    {
      "epoch": 6.0418701171875e-05,
      "model_forward_time": 0.11577224731445312,
      "step": 9899
    },
    {
      "epoch": 6.0418701171875e-05,
      "step": 9899,
      "training_step_time": 0.37828683853149414
    },
    {
      "epoch": 6.04248046875e-05,
      "grad_norm": 0.23914587497711182,
      "learning_rate": 9.642770192448536e-05,
      "loss": 0.0617,
      "step": 9900
    },
    {
      "epoch": 6.04248046875e-05,
      "model_forward_time": 0.11683177947998047,
      "step": 9900
    },
    {
      "epoch": 6.04248046875e-05,
      "step": 9900,
      "training_step_time": 0.38022780418395996
    },
    {
      "epoch": 6.0430908203125e-05,
      "model_forward_time": 0.11789608001708984,
      "step": 9901
    },
    {
      "epoch": 6.0430908203125e-05,
      "step": 9901,
      "training_step_time": 0.7173104286193848
    },
    {
      "epoch": 6.043701171875e-05,
      "model_forward_time": 0.11646914482116699,
      "step": 9902
    },
    {
      "epoch": 6.043701171875e-05,
      "step": 9902,
      "training_step_time": 0.3725295066833496
    },
    {
      "epoch": 6.0443115234375e-05,
      "model_forward_time": 0.11605644226074219,
      "step": 9903
    },
    {
      "epoch": 6.0443115234375e-05,
      "step": 9903,
      "training_step_time": 0.4017965793609619
    },
    {
      "epoch": 6.044921875e-05,
      "model_forward_time": 0.11891889572143555,
      "step": 9904
    },
    {
      "epoch": 6.044921875e-05,
      "step": 9904,
      "training_step_time": 0.39008450508117676
    },
    {
      "epoch": 6.0455322265625e-05,
      "model_forward_time": 0.1162254810333252,
      "step": 9905
    },
    {
      "epoch": 6.0455322265625e-05,
      "step": 9905,
      "training_step_time": 0.4025566577911377
    },
    {
      "epoch": 6.046142578125e-05,
      "model_forward_time": 0.1165168285369873,
      "step": 9906
    },
    {
      "epoch": 6.046142578125e-05,
      "step": 9906,
      "training_step_time": 0.39910101890563965
    },
    {
      "epoch": 6.0467529296875e-05,
      "model_forward_time": 0.11593437194824219,
      "step": 9907
    },
    {
      "epoch": 6.0467529296875e-05,
      "step": 9907,
      "training_step_time": 0.6886632442474365
    },
    {
      "epoch": 6.04736328125e-05,
      "model_forward_time": 0.11626791954040527,
      "step": 9908
    },
    {
      "epoch": 6.04736328125e-05,
      "step": 9908,
      "training_step_time": 0.4530751705169678
    },
    {
      "epoch": 6.0479736328125e-05,
      "model_forward_time": 0.11646747589111328,
      "step": 9909
    },
    {
      "epoch": 6.0479736328125e-05,
      "step": 9909,
      "training_step_time": 0.3881206512451172
    },
    {
      "epoch": 6.048583984375e-05,
      "grad_norm": 0.1925061047077179,
      "learning_rate": 9.641746548245673e-05,
      "loss": 0.0623,
      "step": 9910
    },
    {
      "epoch": 6.048583984375e-05,
      "model_forward_time": 0.1154329776763916,
      "step": 9910
    },
    {
      "epoch": 6.048583984375e-05,
      "step": 9910,
      "training_step_time": 0.387514591217041
    },
    {
      "epoch": 6.0491943359375e-05,
      "model_forward_time": 0.11687541007995605,
      "step": 9911
    },
    {
      "epoch": 6.0491943359375e-05,
      "step": 9911,
      "training_step_time": 0.37743139266967773
    },
    {
      "epoch": 6.0498046875e-05,
      "model_forward_time": 0.11522459983825684,
      "step": 9912
    },
    {
      "epoch": 6.0498046875e-05,
      "step": 9912,
      "training_step_time": 0.37535905838012695
    },
    {
      "epoch": 6.0504150390625e-05,
      "model_forward_time": 0.11726665496826172,
      "step": 9913
    },
    {
      "epoch": 6.0504150390625e-05,
      "step": 9913,
      "training_step_time": 0.9300024509429932
    },
    {
      "epoch": 6.051025390625e-05,
      "model_forward_time": 0.11693596839904785,
      "step": 9914
    },
    {
      "epoch": 6.051025390625e-05,
      "step": 9914,
      "training_step_time": 0.3911325931549072
    },
    {
      "epoch": 6.0516357421875e-05,
      "model_forward_time": 0.11615157127380371,
      "step": 9915
    },
    {
      "epoch": 6.0516357421875e-05,
      "step": 9915,
      "training_step_time": 0.38340210914611816
    },
    {
      "epoch": 6.05224609375e-05,
      "model_forward_time": 0.11714982986450195,
      "step": 9916
    },
    {
      "epoch": 6.05224609375e-05,
      "step": 9916,
      "training_step_time": 0.380723237991333
    },
    {
      "epoch": 6.0528564453125e-05,
      "model_forward_time": 0.11633896827697754,
      "step": 9917
    },
    {
      "epoch": 6.0528564453125e-05,
      "step": 9917,
      "training_step_time": 0.37848353385925293
    },
    {
      "epoch": 6.053466796875e-05,
      "model_forward_time": 0.11709094047546387,
      "step": 9918
    },
    {
      "epoch": 6.053466796875e-05,
      "step": 9918,
      "training_step_time": 0.397031307220459
    },
    {
      "epoch": 6.0540771484375e-05,
      "model_forward_time": 0.11639523506164551,
      "step": 9919
    },
    {
      "epoch": 6.0540771484375e-05,
      "step": 9919,
      "training_step_time": 0.4687235355377197
    },
    {
      "epoch": 6.0546875e-05,
      "grad_norm": 0.3261011838912964,
      "learning_rate": 9.640721494002769e-05,
      "loss": 0.0637,
      "step": 9920
    },
    {
      "epoch": 6.0546875e-05,
      "model_forward_time": 0.11699342727661133,
      "step": 9920
    },
    {
      "epoch": 6.0546875e-05,
      "step": 9920,
      "training_step_time": 0.38517069816589355
    },
    {
      "epoch": 6.0552978515625e-05,
      "model_forward_time": 0.11694741249084473,
      "step": 9921
    },
    {
      "epoch": 6.0552978515625e-05,
      "step": 9921,
      "training_step_time": 0.45763158798217773
    },
    {
      "epoch": 6.055908203125e-05,
      "model_forward_time": 0.11739397048950195,
      "step": 9922
    },
    {
      "epoch": 6.055908203125e-05,
      "step": 9922,
      "training_step_time": 0.4911987781524658
    },
    {
      "epoch": 6.0565185546875e-05,
      "model_forward_time": 0.12059259414672852,
      "step": 9923
    },
    {
      "epoch": 6.0565185546875e-05,
      "step": 9923,
      "training_step_time": 0.37590837478637695
    },
    {
      "epoch": 6.05712890625e-05,
      "model_forward_time": 0.1172490119934082,
      "step": 9924
    },
    {
      "epoch": 6.05712890625e-05,
      "step": 9924,
      "training_step_time": 0.3728632926940918
    },
    {
      "epoch": 6.0577392578125e-05,
      "model_forward_time": 0.11682891845703125,
      "step": 9925
    },
    {
      "epoch": 6.0577392578125e-05,
      "step": 9925,
      "training_step_time": 0.8465116024017334
    },
    {
      "epoch": 6.058349609375e-05,
      "model_forward_time": 0.11642670631408691,
      "step": 9926
    },
    {
      "epoch": 6.058349609375e-05,
      "step": 9926,
      "training_step_time": 0.41945934295654297
    },
    {
      "epoch": 6.0589599609375e-05,
      "model_forward_time": 0.11820363998413086,
      "step": 9927
    },
    {
      "epoch": 6.0589599609375e-05,
      "step": 9927,
      "training_step_time": 0.4229390621185303
    },
    {
      "epoch": 6.0595703125e-05,
      "model_forward_time": 0.11570334434509277,
      "step": 9928
    },
    {
      "epoch": 6.0595703125e-05,
      "step": 9928,
      "training_step_time": 0.3783571720123291
    },
    {
      "epoch": 6.0601806640625e-05,
      "model_forward_time": 0.11615920066833496,
      "step": 9929
    },
    {
      "epoch": 6.0601806640625e-05,
      "step": 9929,
      "training_step_time": 0.3896784782409668
    },
    {
      "epoch": 6.060791015625e-05,
      "grad_norm": 0.29267680644989014,
      "learning_rate": 9.639695030031204e-05,
      "loss": 0.0587,
      "step": 9930
    },
    {
      "epoch": 6.060791015625e-05,
      "model_forward_time": 0.11504220962524414,
      "step": 9930
    },
    {
      "epoch": 6.060791015625e-05,
      "step": 9930,
      "training_step_time": 0.37197279930114746
    },
    {
      "epoch": 6.0614013671875e-05,
      "model_forward_time": 0.11779928207397461,
      "step": 9931
    },
    {
      "epoch": 6.0614013671875e-05,
      "step": 9931,
      "training_step_time": 0.7746317386627197
    },
    {
      "epoch": 6.06201171875e-05,
      "model_forward_time": 0.11644315719604492,
      "step": 9932
    },
    {
      "epoch": 6.06201171875e-05,
      "step": 9932,
      "training_step_time": 0.3989536762237549
    },
    {
      "epoch": 6.0626220703125e-05,
      "model_forward_time": 0.11610126495361328,
      "step": 9933
    },
    {
      "epoch": 6.0626220703125e-05,
      "step": 9933,
      "training_step_time": 0.37384819984436035
    },
    {
      "epoch": 6.063232421875e-05,
      "model_forward_time": 0.11657214164733887,
      "step": 9934
    },
    {
      "epoch": 6.063232421875e-05,
      "step": 9934,
      "training_step_time": 0.4414961338043213
    },
    {
      "epoch": 6.0638427734375e-05,
      "model_forward_time": 0.11644411087036133,
      "step": 9935
    },
    {
      "epoch": 6.0638427734375e-05,
      "step": 9935,
      "training_step_time": 0.39490842819213867
    },
    {
      "epoch": 6.064453125e-05,
      "model_forward_time": 0.1165614128112793,
      "step": 9936
    },
    {
      "epoch": 6.064453125e-05,
      "step": 9936,
      "training_step_time": 0.3714168071746826
    },
    {
      "epoch": 6.0650634765625e-05,
      "model_forward_time": 0.11580538749694824,
      "step": 9937
    },
    {
      "epoch": 6.0650634765625e-05,
      "step": 9937,
      "training_step_time": 0.9699687957763672
    },
    {
      "epoch": 6.065673828125e-05,
      "model_forward_time": 0.11586332321166992,
      "step": 9938
    },
    {
      "epoch": 6.065673828125e-05,
      "step": 9938,
      "training_step_time": 0.4194338321685791
    },
    {
      "epoch": 6.0662841796875e-05,
      "model_forward_time": 0.11571836471557617,
      "step": 9939
    },
    {
      "epoch": 6.0662841796875e-05,
      "step": 9939,
      "training_step_time": 0.3824779987335205
    },
    {
      "epoch": 6.06689453125e-05,
      "grad_norm": 0.2018343210220337,
      "learning_rate": 9.638667156642794e-05,
      "loss": 0.0714,
      "step": 9940
    },
    {
      "epoch": 6.06689453125e-05,
      "model_forward_time": 0.11467432975769043,
      "step": 9940
    },
    {
      "epoch": 6.06689453125e-05,
      "step": 9940,
      "training_step_time": 0.4061458110809326
    },
    {
      "epoch": 6.0675048828125e-05,
      "model_forward_time": 0.11644959449768066,
      "step": 9941
    },
    {
      "epoch": 6.0675048828125e-05,
      "step": 9941,
      "training_step_time": 0.37057995796203613
    },
    {
      "epoch": 6.068115234375e-05,
      "model_forward_time": 0.11745285987854004,
      "step": 9942
    },
    {
      "epoch": 6.068115234375e-05,
      "step": 9942,
      "training_step_time": 0.3713953495025635
    },
    {
      "epoch": 6.0687255859375e-05,
      "model_forward_time": 0.11695289611816406,
      "step": 9943
    },
    {
      "epoch": 6.0687255859375e-05,
      "step": 9943,
      "training_step_time": 0.9305577278137207
    },
    {
      "epoch": 6.0693359375e-05,
      "model_forward_time": 0.11598372459411621,
      "step": 9944
    },
    {
      "epoch": 6.0693359375e-05,
      "step": 9944,
      "training_step_time": 0.37293076515197754
    },
    {
      "epoch": 6.0699462890625e-05,
      "model_forward_time": 0.11690735816955566,
      "step": 9945
    },
    {
      "epoch": 6.0699462890625e-05,
      "step": 9945,
      "training_step_time": 0.418013334274292
    },
    {
      "epoch": 6.070556640625e-05,
      "model_forward_time": 0.1162419319152832,
      "step": 9946
    },
    {
      "epoch": 6.070556640625e-05,
      "step": 9946,
      "training_step_time": 0.4324374198913574
    },
    {
      "epoch": 6.0711669921875e-05,
      "model_forward_time": 0.11675214767456055,
      "step": 9947
    },
    {
      "epoch": 6.0711669921875e-05,
      "step": 9947,
      "training_step_time": 0.44164204597473145
    },
    {
      "epoch": 6.07177734375e-05,
      "model_forward_time": 0.11773419380187988,
      "step": 9948
    },
    {
      "epoch": 6.07177734375e-05,
      "step": 9948,
      "training_step_time": 0.4086630344390869
    },
    {
      "epoch": 6.0723876953125e-05,
      "model_forward_time": 0.11836957931518555,
      "step": 9949
    },
    {
      "epoch": 6.0723876953125e-05,
      "step": 9949,
      "training_step_time": 0.9949245452880859
    },
    {
      "epoch": 6.072998046875e-05,
      "grad_norm": 0.2535766363143921,
      "learning_rate": 9.637637874149779e-05,
      "loss": 0.0705,
      "step": 9950
    },
    {
      "epoch": 6.072998046875e-05,
      "model_forward_time": 0.11650681495666504,
      "step": 9950
    },
    {
      "epoch": 6.072998046875e-05,
      "step": 9950,
      "training_step_time": 0.40668630599975586
    },
    {
      "epoch": 6.0736083984375e-05,
      "model_forward_time": 0.1160728931427002,
      "step": 9951
    },
    {
      "epoch": 6.0736083984375e-05,
      "step": 9951,
      "training_step_time": 0.3724520206451416
    },
    {
      "epoch": 6.07421875e-05,
      "model_forward_time": 0.11644411087036133,
      "step": 9952
    },
    {
      "epoch": 6.07421875e-05,
      "step": 9952,
      "training_step_time": 0.3841829299926758
    },
    {
      "epoch": 6.0748291015625e-05,
      "model_forward_time": 0.11520171165466309,
      "step": 9953
    },
    {
      "epoch": 6.0748291015625e-05,
      "step": 9953,
      "training_step_time": 0.388049840927124
    },
    {
      "epoch": 6.075439453125e-05,
      "model_forward_time": 0.11574792861938477,
      "step": 9954
    },
    {
      "epoch": 6.075439453125e-05,
      "step": 9954,
      "training_step_time": 0.3733079433441162
    },
    {
      "epoch": 6.0760498046875e-05,
      "model_forward_time": 0.11737346649169922,
      "step": 9955
    },
    {
      "epoch": 6.0760498046875e-05,
      "step": 9955,
      "training_step_time": 1.028350830078125
    },
    {
      "epoch": 6.07666015625e-05,
      "model_forward_time": 0.11629343032836914,
      "step": 9956
    },
    {
      "epoch": 6.07666015625e-05,
      "step": 9956,
      "training_step_time": 0.44812512397766113
    },
    {
      "epoch": 6.0772705078125e-05,
      "model_forward_time": 0.11543607711791992,
      "step": 9957
    },
    {
      "epoch": 6.0772705078125e-05,
      "step": 9957,
      "training_step_time": 0.4213402271270752
    },
    {
      "epoch": 6.077880859375e-05,
      "model_forward_time": 0.11574959754943848,
      "step": 9958
    },
    {
      "epoch": 6.077880859375e-05,
      "step": 9958,
      "training_step_time": 0.4496631622314453
    },
    {
      "epoch": 6.0784912109375e-05,
      "model_forward_time": 0.11516857147216797,
      "step": 9959
    },
    {
      "epoch": 6.0784912109375e-05,
      "step": 9959,
      "training_step_time": 0.46479320526123047
    },
    {
      "epoch": 6.0791015625e-05,
      "grad_norm": 0.18408901989459991,
      "learning_rate": 9.636607182864827e-05,
      "loss": 0.0628,
      "step": 9960
    },
    {
      "epoch": 6.0791015625e-05,
      "model_forward_time": 0.11533856391906738,
      "step": 9960
    },
    {
      "epoch": 6.0791015625e-05,
      "step": 9960,
      "training_step_time": 0.45262694358825684
    },
    {
      "epoch": 6.0797119140625e-05,
      "model_forward_time": 0.11672329902648926,
      "step": 9961
    },
    {
      "epoch": 6.0797119140625e-05,
      "step": 9961,
      "training_step_time": 0.3975410461425781
    },
    {
      "epoch": 6.080322265625e-05,
      "model_forward_time": 0.11612129211425781,
      "step": 9962
    },
    {
      "epoch": 6.080322265625e-05,
      "step": 9962,
      "training_step_time": 0.4127788543701172
    },
    {
      "epoch": 6.0809326171875e-05,
      "model_forward_time": 0.11584949493408203,
      "step": 9963
    },
    {
      "epoch": 6.0809326171875e-05,
      "step": 9963,
      "training_step_time": 0.3841359615325928
    },
    {
      "epoch": 6.08154296875e-05,
      "model_forward_time": 0.11690545082092285,
      "step": 9964
    },
    {
      "epoch": 6.08154296875e-05,
      "step": 9964,
      "training_step_time": 0.38460731506347656
    },
    {
      "epoch": 6.0821533203125e-05,
      "model_forward_time": 0.11661958694458008,
      "step": 9965
    },
    {
      "epoch": 6.0821533203125e-05,
      "step": 9965,
      "training_step_time": 0.39601898193359375
    },
    {
      "epoch": 6.082763671875e-05,
      "model_forward_time": 0.11639118194580078,
      "step": 9966
    },
    {
      "epoch": 6.082763671875e-05,
      "step": 9966,
      "training_step_time": 0.387160062789917
    },
    {
      "epoch": 6.0833740234375e-05,
      "model_forward_time": 0.11631989479064941,
      "step": 9967
    },
    {
      "epoch": 6.0833740234375e-05,
      "step": 9967,
      "training_step_time": 0.9096248149871826
    },
    {
      "epoch": 6.083984375e-05,
      "model_forward_time": 0.11598563194274902,
      "step": 9968
    },
    {
      "epoch": 6.083984375e-05,
      "step": 9968,
      "training_step_time": 0.3909168243408203
    },
    {
      "epoch": 6.0845947265625e-05,
      "model_forward_time": 0.1161348819732666,
      "step": 9969
    },
    {
      "epoch": 6.0845947265625e-05,
      "step": 9969,
      "training_step_time": 0.41016721725463867
    },
    {
      "epoch": 6.085205078125e-05,
      "grad_norm": 0.24483348429203033,
      "learning_rate": 9.635575083101036e-05,
      "loss": 0.0611,
      "step": 9970
    },
    {
      "epoch": 6.085205078125e-05,
      "model_forward_time": 0.11594200134277344,
      "step": 9970
    },
    {
      "epoch": 6.085205078125e-05,
      "step": 9970,
      "training_step_time": 0.4488556385040283
    },
    {
      "epoch": 6.0858154296875e-05,
      "model_forward_time": 0.11841630935668945,
      "step": 9971
    },
    {
      "epoch": 6.0858154296875e-05,
      "step": 9971,
      "training_step_time": 0.4526073932647705
    },
    {
      "epoch": 6.08642578125e-05,
      "model_forward_time": 0.11545777320861816,
      "step": 9972
    },
    {
      "epoch": 6.08642578125e-05,
      "step": 9972,
      "training_step_time": 0.40788865089416504
    },
    {
      "epoch": 6.0870361328125e-05,
      "model_forward_time": 0.11586952209472656,
      "step": 9973
    },
    {
      "epoch": 6.0870361328125e-05,
      "step": 9973,
      "training_step_time": 0.819904088973999
    },
    {
      "epoch": 6.087646484375e-05,
      "model_forward_time": 0.11935830116271973,
      "step": 9974
    },
    {
      "epoch": 6.087646484375e-05,
      "step": 9974,
      "training_step_time": 0.39818859100341797
    },
    {
      "epoch": 6.0882568359375e-05,
      "model_forward_time": 0.11525559425354004,
      "step": 9975
    },
    {
      "epoch": 6.0882568359375e-05,
      "step": 9975,
      "training_step_time": 0.38129544258117676
    },
    {
      "epoch": 6.0888671875e-05,
      "model_forward_time": 0.11531877517700195,
      "step": 9976
    },
    {
      "epoch": 6.0888671875e-05,
      "step": 9976,
      "training_step_time": 0.3828721046447754
    },
    {
      "epoch": 6.0894775390625e-05,
      "model_forward_time": 0.11515069007873535,
      "step": 9977
    },
    {
      "epoch": 6.0894775390625e-05,
      "step": 9977,
      "training_step_time": 0.3814065456390381
    },
    {
      "epoch": 6.090087890625e-05,
      "model_forward_time": 0.11556029319763184,
      "step": 9978
    },
    {
      "epoch": 6.090087890625e-05,
      "step": 9978,
      "training_step_time": 0.3706674575805664
    },
    {
      "epoch": 6.0906982421875e-05,
      "model_forward_time": 0.1170339584350586,
      "step": 9979
    },
    {
      "epoch": 6.0906982421875e-05,
      "step": 9979,
      "training_step_time": 1.3695824146270752
    },
    {
      "epoch": 6.09130859375e-05,
      "grad_norm": 0.16684375703334808,
      "learning_rate": 9.634541575171929e-05,
      "loss": 0.0602,
      "step": 9980
    },
    {
      "epoch": 6.09130859375e-05,
      "model_forward_time": 0.11562609672546387,
      "step": 9980
    },
    {
      "epoch": 6.09130859375e-05,
      "step": 9980,
      "training_step_time": 0.39788389205932617
    },
    {
      "epoch": 6.0919189453125e-05,
      "model_forward_time": 0.11690211296081543,
      "step": 9981
    },
    {
      "epoch": 6.0919189453125e-05,
      "step": 9981,
      "training_step_time": 0.3845217227935791
    },
    {
      "epoch": 6.092529296875e-05,
      "model_forward_time": 0.1162559986114502,
      "step": 9982
    },
    {
      "epoch": 6.092529296875e-05,
      "step": 9982,
      "training_step_time": 0.44768500328063965
    },
    {
      "epoch": 6.0931396484375e-05,
      "model_forward_time": 0.11487364768981934,
      "step": 9983
    },
    {
      "epoch": 6.0931396484375e-05,
      "step": 9983,
      "training_step_time": 0.4326658248901367
    },
    {
      "epoch": 6.09375e-05,
      "model_forward_time": 0.1150507926940918,
      "step": 9984
    },
    {
      "epoch": 6.09375e-05,
      "step": 9984,
      "training_step_time": 0.44234538078308105
    },
    {
      "epoch": 6.0943603515625e-05,
      "model_forward_time": 0.11600041389465332,
      "step": 9985
    },
    {
      "epoch": 6.0943603515625e-05,
      "step": 9985,
      "training_step_time": 0.6603915691375732
    },
    {
      "epoch": 6.094970703125e-05,
      "model_forward_time": 0.11528563499450684,
      "step": 9986
    },
    {
      "epoch": 6.094970703125e-05,
      "step": 9986,
      "training_step_time": 0.38611817359924316
    },
    {
      "epoch": 6.0955810546875e-05,
      "model_forward_time": 0.11532235145568848,
      "step": 9987
    },
    {
      "epoch": 6.0955810546875e-05,
      "step": 9987,
      "training_step_time": 0.3891458511352539
    },
    {
      "epoch": 6.09619140625e-05,
      "model_forward_time": 0.11539697647094727,
      "step": 9988
    },
    {
      "epoch": 6.09619140625e-05,
      "step": 9988,
      "training_step_time": 0.3816111087799072
    },
    {
      "epoch": 6.0968017578125e-05,
      "model_forward_time": 0.11510252952575684,
      "step": 9989
    },
    {
      "epoch": 6.0968017578125e-05,
      "step": 9989,
      "training_step_time": 0.3905496597290039
    },
    {
      "epoch": 6.097412109375e-05,
      "grad_norm": 0.2447948157787323,
      "learning_rate": 9.63350665939146e-05,
      "loss": 0.0635,
      "step": 9990
    },
    {
      "epoch": 6.097412109375e-05,
      "model_forward_time": 0.11776471138000488,
      "step": 9990
    },
    {
      "epoch": 6.097412109375e-05,
      "step": 9990,
      "training_step_time": 0.38328027725219727
    },
    {
      "epoch": 6.0980224609375e-05,
      "model_forward_time": 0.11695599555969238,
      "step": 9991
    },
    {
      "epoch": 6.0980224609375e-05,
      "step": 9991,
      "training_step_time": 0.5963320732116699
    },
    {
      "epoch": 6.0986328125e-05,
      "model_forward_time": 0.11700677871704102,
      "step": 9992
    },
    {
      "epoch": 6.0986328125e-05,
      "step": 9992,
      "training_step_time": 0.38390111923217773
    },
    {
      "epoch": 6.0992431640625e-05,
      "model_forward_time": 0.11794114112854004,
      "step": 9993
    },
    {
      "epoch": 6.0992431640625e-05,
      "step": 9993,
      "training_step_time": 0.39010095596313477
    },
    {
      "epoch": 6.099853515625e-05,
      "model_forward_time": 0.11752629280090332,
      "step": 9994
    },
    {
      "epoch": 6.099853515625e-05,
      "step": 9994,
      "training_step_time": 0.41143250465393066
    },
    {
      "epoch": 6.1004638671875e-05,
      "model_forward_time": 0.11834192276000977,
      "step": 9995
    },
    {
      "epoch": 6.1004638671875e-05,
      "step": 9995,
      "training_step_time": 0.45428943634033203
    },
    {
      "epoch": 6.10107421875e-05,
      "model_forward_time": 0.1163167953491211,
      "step": 9996
    },
    {
      "epoch": 6.10107421875e-05,
      "step": 9996,
      "training_step_time": 0.41321849822998047
    },
    {
      "epoch": 6.1016845703125e-05,
      "model_forward_time": 0.1157386302947998,
      "step": 9997
    },
    {
      "epoch": 6.1016845703125e-05,
      "step": 9997,
      "training_step_time": 0.757652997970581
    },
    {
      "epoch": 6.102294921875e-05,
      "model_forward_time": 0.11636900901794434,
      "step": 9998
    },
    {
      "epoch": 6.102294921875e-05,
      "step": 9998,
      "training_step_time": 0.4917001724243164
    },
    {
      "epoch": 6.1029052734375e-05,
      "model_forward_time": 0.1153712272644043,
      "step": 9999
    },
    {
      "epoch": 6.1029052734375e-05,
      "step": 9999,
      "training_step_time": 0.4641885757446289
    },
    {
      "epoch": 6.103515625e-05,
      "grad_norm": 0.160112664103508,
      "learning_rate": 9.632470336074009e-05,
      "loss": 0.0659,
      "step": 10000
    },
    {
      "epoch": 6.103515625e-05,
      "model_forward_time": 0.11544466018676758,
      "step": 10000
    },
    {
      "epoch": 6.103515625e-05,
      "step": 10000,
      "training_step_time": 0.3635544776916504
    },
    {
      "epoch": 6.1041259765625e-05,
      "model_forward_time": 0.11278557777404785,
      "step": 10001
    },
    {
      "epoch": 6.1041259765625e-05,
      "step": 10001,
      "training_step_time": 0.4276859760284424
    },
    {
      "epoch": 6.104736328125e-05,
      "model_forward_time": 0.11399006843566895,
      "step": 10002
    },
    {
      "epoch": 6.104736328125e-05,
      "step": 10002,
      "training_step_time": 0.40624332427978516
    },
    {
      "epoch": 6.1053466796875e-05,
      "model_forward_time": 0.11504769325256348,
      "step": 10003
    },
    {
      "epoch": 6.1053466796875e-05,
      "step": 10003,
      "training_step_time": 0.3928499221801758
    },
    {
      "epoch": 6.10595703125e-05,
      "model_forward_time": 0.11490988731384277,
      "step": 10004
    },
    {
      "epoch": 6.10595703125e-05,
      "step": 10004,
      "training_step_time": 0.3799281120300293
    },
    {
      "epoch": 6.1065673828125e-05,
      "model_forward_time": 0.11534690856933594,
      "step": 10005
    },
    {
      "epoch": 6.1065673828125e-05,
      "step": 10005,
      "training_step_time": 0.3900885581970215
    },
    {
      "epoch": 6.107177734375e-05,
      "model_forward_time": 0.1169130802154541,
      "step": 10006
    },
    {
      "epoch": 6.107177734375e-05,
      "step": 10006,
      "training_step_time": 0.3902473449707031
    },
    {
      "epoch": 6.1077880859375e-05,
      "model_forward_time": 0.11783123016357422,
      "step": 10007
    },
    {
      "epoch": 6.1077880859375e-05,
      "step": 10007,
      "training_step_time": 0.38089728355407715
    },
    {
      "epoch": 6.1083984375e-05,
      "model_forward_time": 0.11826348304748535,
      "step": 10008
    },
    {
      "epoch": 6.1083984375e-05,
      "step": 10008,
      "training_step_time": 0.3762397766113281
    },
    {
      "epoch": 6.1090087890625e-05,
      "model_forward_time": 0.11738061904907227,
      "step": 10009
    },
    {
      "epoch": 6.1090087890625e-05,
      "step": 10009,
      "training_step_time": 0.3907430171966553
    },
    {
      "epoch": 6.109619140625e-05,
      "grad_norm": 0.2456478625535965,
      "learning_rate": 9.631432605534383e-05,
      "loss": 0.0629,
      "step": 10010
    },
    {
      "epoch": 6.109619140625e-05,
      "model_forward_time": 0.11690044403076172,
      "step": 10010
    },
    {
      "epoch": 6.109619140625e-05,
      "step": 10010,
      "training_step_time": 0.3867340087890625
    },
    {
      "epoch": 6.1102294921875e-05,
      "model_forward_time": 0.11751317977905273,
      "step": 10011
    },
    {
      "epoch": 6.1102294921875e-05,
      "step": 10011,
      "training_step_time": 0.390841007232666
    },
    {
      "epoch": 6.11083984375e-05,
      "model_forward_time": 0.11804413795471191,
      "step": 10012
    },
    {
      "epoch": 6.11083984375e-05,
      "step": 10012,
      "training_step_time": 0.4291105270385742
    },
    {
      "epoch": 6.1114501953125e-05,
      "model_forward_time": 0.11806249618530273,
      "step": 10013
    },
    {
      "epoch": 6.1114501953125e-05,
      "step": 10013,
      "training_step_time": 0.4137148857116699
    },
    {
      "epoch": 6.112060546875e-05,
      "model_forward_time": 0.11791729927062988,
      "step": 10014
    },
    {
      "epoch": 6.112060546875e-05,
      "step": 10014,
      "training_step_time": 0.439359188079834
    },
    {
      "epoch": 6.1126708984375e-05,
      "model_forward_time": 0.11776232719421387,
      "step": 10015
    },
    {
      "epoch": 6.1126708984375e-05,
      "step": 10015,
      "training_step_time": 0.3721179962158203
    },
    {
      "epoch": 6.11328125e-05,
      "model_forward_time": 0.12216973304748535,
      "step": 10016
    },
    {
      "epoch": 6.11328125e-05,
      "step": 10016,
      "training_step_time": 0.44602370262145996
    },
    {
      "epoch": 6.1138916015625e-05,
      "model_forward_time": 0.1235969066619873,
      "step": 10017
    },
    {
      "epoch": 6.1138916015625e-05,
      "step": 10017,
      "training_step_time": 0.45917749404907227
    },
    {
      "epoch": 6.114501953125e-05,
      "model_forward_time": 0.11740517616271973,
      "step": 10018
    },
    {
      "epoch": 6.114501953125e-05,
      "step": 10018,
      "training_step_time": 0.3809528350830078
    },
    {
      "epoch": 6.1151123046875e-05,
      "model_forward_time": 0.11684966087341309,
      "step": 10019
    },
    {
      "epoch": 6.1151123046875e-05,
      "step": 10019,
      "training_step_time": 0.3838789463043213
    },
    {
      "epoch": 6.11572265625e-05,
      "grad_norm": 0.19155353307724,
      "learning_rate": 9.630393468087818e-05,
      "loss": 0.0631,
      "step": 10020
    },
    {
      "epoch": 6.11572265625e-05,
      "model_forward_time": 0.11850905418395996,
      "step": 10020
    },
    {
      "epoch": 6.11572265625e-05,
      "step": 10020,
      "training_step_time": 0.3909282684326172
    },
    {
      "epoch": 6.1163330078125e-05,
      "model_forward_time": 0.1170651912689209,
      "step": 10021
    },
    {
      "epoch": 6.1163330078125e-05,
      "step": 10021,
      "training_step_time": 0.3778061866760254
    },
    {
      "epoch": 6.116943359375e-05,
      "model_forward_time": 0.1171574592590332,
      "step": 10022
    },
    {
      "epoch": 6.116943359375e-05,
      "step": 10022,
      "training_step_time": 0.38758158683776855
    },
    {
      "epoch": 6.1175537109375e-05,
      "model_forward_time": 0.11704015731811523,
      "step": 10023
    },
    {
      "epoch": 6.1175537109375e-05,
      "step": 10023,
      "training_step_time": 0.3937559127807617
    },
    {
      "epoch": 6.1181640625e-05,
      "model_forward_time": 0.11737561225891113,
      "step": 10024
    },
    {
      "epoch": 6.1181640625e-05,
      "step": 10024,
      "training_step_time": 0.39046692848205566
    },
    {
      "epoch": 6.1187744140625e-05,
      "model_forward_time": 0.11751031875610352,
      "step": 10025
    },
    {
      "epoch": 6.1187744140625e-05,
      "step": 10025,
      "training_step_time": 0.458118200302124
    },
    {
      "epoch": 6.119384765625e-05,
      "model_forward_time": 0.1227719783782959,
      "step": 10026
    },
    {
      "epoch": 6.119384765625e-05,
      "step": 10026,
      "training_step_time": 0.46016788482666016
    },
    {
      "epoch": 6.1199951171875e-05,
      "model_forward_time": 0.11923766136169434,
      "step": 10027
    },
    {
      "epoch": 6.1199951171875e-05,
      "step": 10027,
      "training_step_time": 0.4364323616027832
    },
    {
      "epoch": 6.12060546875e-05,
      "model_forward_time": 0.11644506454467773,
      "step": 10028
    },
    {
      "epoch": 6.12060546875e-05,
      "step": 10028,
      "training_step_time": 0.4044218063354492
    },
    {
      "epoch": 6.1212158203125e-05,
      "model_forward_time": 0.11647844314575195,
      "step": 10029
    },
    {
      "epoch": 6.1212158203125e-05,
      "step": 10029,
      "training_step_time": 0.37145423889160156
    },
    {
      "epoch": 6.121826171875e-05,
      "grad_norm": 0.22619904577732086,
      "learning_rate": 9.629352924049975e-05,
      "loss": 0.0599,
      "step": 10030
    },
    {
      "epoch": 6.121826171875e-05,
      "model_forward_time": 0.11853599548339844,
      "step": 10030
    },
    {
      "epoch": 6.121826171875e-05,
      "step": 10030,
      "training_step_time": 0.38689732551574707
    },
    {
      "epoch": 6.1224365234375e-05,
      "model_forward_time": 0.11680960655212402,
      "step": 10031
    },
    {
      "epoch": 6.1224365234375e-05,
      "step": 10031,
      "training_step_time": 0.45266270637512207
    },
    {
      "epoch": 6.123046875e-05,
      "model_forward_time": 0.14586567878723145,
      "step": 10032
    },
    {
      "epoch": 6.123046875e-05,
      "step": 10032,
      "training_step_time": 0.4132668972015381
    },
    {
      "epoch": 6.1236572265625e-05,
      "model_forward_time": 0.11670780181884766,
      "step": 10033
    },
    {
      "epoch": 6.1236572265625e-05,
      "step": 10033,
      "training_step_time": 0.3844413757324219
    },
    {
      "epoch": 6.124267578125e-05,
      "model_forward_time": 0.11889481544494629,
      "step": 10034
    },
    {
      "epoch": 6.124267578125e-05,
      "step": 10034,
      "training_step_time": 0.3947272300720215
    },
    {
      "epoch": 6.1248779296875e-05,
      "model_forward_time": 0.11644268035888672,
      "step": 10035
    },
    {
      "epoch": 6.1248779296875e-05,
      "step": 10035,
      "training_step_time": 0.3971989154815674
    },
    {
      "epoch": 6.12548828125e-05,
      "model_forward_time": 0.11753630638122559,
      "step": 10036
    },
    {
      "epoch": 6.12548828125e-05,
      "step": 10036,
      "training_step_time": 0.3911471366882324
    },
    {
      "epoch": 6.1260986328125e-05,
      "model_forward_time": 0.11668753623962402,
      "step": 10037
    },
    {
      "epoch": 6.1260986328125e-05,
      "step": 10037,
      "training_step_time": 0.3918147087097168
    },
    {
      "epoch": 6.126708984375e-05,
      "model_forward_time": 0.1168813705444336,
      "step": 10038
    },
    {
      "epoch": 6.126708984375e-05,
      "step": 10038,
      "training_step_time": 0.37702417373657227
    },
    {
      "epoch": 6.1273193359375e-05,
      "model_forward_time": 0.11709403991699219,
      "step": 10039
    },
    {
      "epoch": 6.1273193359375e-05,
      "step": 10039,
      "training_step_time": 0.7088851928710938
    },
    {
      "epoch": 6.1279296875e-05,
      "grad_norm": 0.2536637485027313,
      "learning_rate": 9.628310973736943e-05,
      "loss": 0.0693,
      "step": 10040
    },
    {
      "epoch": 6.1279296875e-05,
      "model_forward_time": 0.11601400375366211,
      "step": 10040
    },
    {
      "epoch": 6.1279296875e-05,
      "step": 10040,
      "training_step_time": 0.45960330963134766
    },
    {
      "epoch": 6.1285400390625e-05,
      "model_forward_time": 0.11648178100585938,
      "step": 10041
    },
    {
      "epoch": 6.1285400390625e-05,
      "step": 10041,
      "training_step_time": 0.42408132553100586
    },
    {
      "epoch": 6.129150390625e-05,
      "model_forward_time": 0.11755990982055664,
      "step": 10042
    },
    {
      "epoch": 6.129150390625e-05,
      "step": 10042,
      "training_step_time": 0.3866848945617676
    },
    {
      "epoch": 6.1297607421875e-05,
      "model_forward_time": 0.11774396896362305,
      "step": 10043
    },
    {
      "epoch": 6.1297607421875e-05,
      "step": 10043,
      "training_step_time": 0.41849303245544434
    },
    {
      "epoch": 6.13037109375e-05,
      "model_forward_time": 0.11757850646972656,
      "step": 10044
    },
    {
      "epoch": 6.13037109375e-05,
      "step": 10044,
      "training_step_time": 0.439899206161499
    },
    {
      "epoch": 6.1309814453125e-05,
      "model_forward_time": 0.1172339916229248,
      "step": 10045
    },
    {
      "epoch": 6.1309814453125e-05,
      "step": 10045,
      "training_step_time": 0.6461703777313232
    },
    {
      "epoch": 6.131591796875e-05,
      "model_forward_time": 0.11650586128234863,
      "step": 10046
    },
    {
      "epoch": 6.131591796875e-05,
      "step": 10046,
      "training_step_time": 0.3822143077850342
    },
    {
      "epoch": 6.1322021484375e-05,
      "model_forward_time": 0.11641120910644531,
      "step": 10047
    },
    {
      "epoch": 6.1322021484375e-05,
      "step": 10047,
      "training_step_time": 0.38187670707702637
    },
    {
      "epoch": 6.1328125e-05,
      "model_forward_time": 0.11734485626220703,
      "step": 10048
    },
    {
      "epoch": 6.1328125e-05,
      "step": 10048,
      "training_step_time": 0.39606189727783203
    },
    {
      "epoch": 6.1334228515625e-05,
      "model_forward_time": 0.11723732948303223,
      "step": 10049
    },
    {
      "epoch": 6.1334228515625e-05,
      "step": 10049,
      "training_step_time": 0.3869636058807373
    },
    {
      "epoch": 6.134033203125e-05,
      "grad_norm": 0.23464293777942657,
      "learning_rate": 9.627267617465243e-05,
      "loss": 0.0584,
      "step": 10050
    },
    {
      "epoch": 6.134033203125e-05,
      "model_forward_time": 0.1195070743560791,
      "step": 10050
    },
    {
      "epoch": 6.134033203125e-05,
      "step": 10050,
      "training_step_time": 0.3799459934234619
    },
    {
      "epoch": 6.1346435546875e-05,
      "model_forward_time": 0.11930465698242188,
      "step": 10051
    },
    {
      "epoch": 6.1346435546875e-05,
      "step": 10051,
      "training_step_time": 1.1595537662506104
    },
    {
      "epoch": 6.13525390625e-05,
      "model_forward_time": 0.11580705642700195,
      "step": 10052
    },
    {
      "epoch": 6.13525390625e-05,
      "step": 10052,
      "training_step_time": 0.46786999702453613
    },
    {
      "epoch": 6.1358642578125e-05,
      "model_forward_time": 0.11647295951843262,
      "step": 10053
    },
    {
      "epoch": 6.1358642578125e-05,
      "step": 10053,
      "training_step_time": 0.4372549057006836
    },
    {
      "epoch": 6.136474609375e-05,
      "model_forward_time": 0.11652970314025879,
      "step": 10054
    },
    {
      "epoch": 6.136474609375e-05,
      "step": 10054,
      "training_step_time": 0.4021596908569336
    },
    {
      "epoch": 6.1370849609375e-05,
      "model_forward_time": 0.11618924140930176,
      "step": 10055
    },
    {
      "epoch": 6.1370849609375e-05,
      "step": 10055,
      "training_step_time": 0.3862748146057129
    },
    {
      "epoch": 6.1376953125e-05,
      "model_forward_time": 0.11591076850891113,
      "step": 10056
    },
    {
      "epoch": 6.1376953125e-05,
      "step": 10056,
      "training_step_time": 0.41176295280456543
    },
    {
      "epoch": 6.1383056640625e-05,
      "model_forward_time": 0.11597037315368652,
      "step": 10057
    },
    {
      "epoch": 6.1383056640625e-05,
      "step": 10057,
      "training_step_time": 0.9066522121429443
    },
    {
      "epoch": 6.138916015625e-05,
      "model_forward_time": 0.11526632308959961,
      "step": 10058
    },
    {
      "epoch": 6.138916015625e-05,
      "step": 10058,
      "training_step_time": 0.38577747344970703
    },
    {
      "epoch": 6.1395263671875e-05,
      "model_forward_time": 0.11585831642150879,
      "step": 10059
    },
    {
      "epoch": 6.1395263671875e-05,
      "step": 10059,
      "training_step_time": 0.3927288055419922
    },
    {
      "epoch": 6.14013671875e-05,
      "grad_norm": 0.18771137297153473,
      "learning_rate": 9.626222855551816e-05,
      "loss": 0.0647,
      "step": 10060
    },
    {
      "epoch": 6.14013671875e-05,
      "model_forward_time": 0.11571455001831055,
      "step": 10060
    },
    {
      "epoch": 6.14013671875e-05,
      "step": 10060,
      "training_step_time": 0.3821258544921875
    },
    {
      "epoch": 6.1407470703125e-05,
      "model_forward_time": 0.1164546012878418,
      "step": 10061
    },
    {
      "epoch": 6.1407470703125e-05,
      "step": 10061,
      "training_step_time": 0.37091779708862305
    },
    {
      "epoch": 6.141357421875e-05,
      "model_forward_time": 0.12032175064086914,
      "step": 10062
    },
    {
      "epoch": 6.141357421875e-05,
      "step": 10062,
      "training_step_time": 0.38860654830932617
    },
    {
      "epoch": 6.1419677734375e-05,
      "model_forward_time": 0.11718988418579102,
      "step": 10063
    },
    {
      "epoch": 6.1419677734375e-05,
      "step": 10063,
      "training_step_time": 0.44216322898864746
    },
    {
      "epoch": 6.142578125e-05,
      "model_forward_time": 0.12024283409118652,
      "step": 10064
    },
    {
      "epoch": 6.142578125e-05,
      "step": 10064,
      "training_step_time": 0.43472909927368164
    },
    {
      "epoch": 6.1431884765625e-05,
      "model_forward_time": 0.1200411319732666,
      "step": 10065
    },
    {
      "epoch": 6.1431884765625e-05,
      "step": 10065,
      "training_step_time": 0.4006223678588867
    },
    {
      "epoch": 6.143798828125e-05,
      "model_forward_time": 0.11673855781555176,
      "step": 10066
    },
    {
      "epoch": 6.143798828125e-05,
      "step": 10066,
      "training_step_time": 0.48690319061279297
    },
    {
      "epoch": 6.1444091796875e-05,
      "model_forward_time": 0.11893773078918457,
      "step": 10067
    },
    {
      "epoch": 6.1444091796875e-05,
      "step": 10067,
      "training_step_time": 0.43674755096435547
    },
    {
      "epoch": 6.14501953125e-05,
      "model_forward_time": 0.11758565902709961,
      "step": 10068
    },
    {
      "epoch": 6.14501953125e-05,
      "step": 10068,
      "training_step_time": 0.39920568466186523
    },
    {
      "epoch": 6.1456298828125e-05,
      "model_forward_time": 0.11697196960449219,
      "step": 10069
    },
    {
      "epoch": 6.1456298828125e-05,
      "step": 10069,
      "training_step_time": 0.44483208656311035
    },
    {
      "epoch": 6.146240234375e-05,
      "grad_norm": 0.19503797590732574,
      "learning_rate": 9.625176688314035e-05,
      "loss": 0.063,
      "step": 10070
    },
    {
      "epoch": 6.146240234375e-05,
      "model_forward_time": 0.11777472496032715,
      "step": 10070
    },
    {
      "epoch": 6.146240234375e-05,
      "step": 10070,
      "training_step_time": 0.4357278347015381
    },
    {
      "epoch": 6.1468505859375e-05,
      "model_forward_time": 0.11655354499816895,
      "step": 10071
    },
    {
      "epoch": 6.1468505859375e-05,
      "step": 10071,
      "training_step_time": 0.4333791732788086
    },
    {
      "epoch": 6.1474609375e-05,
      "model_forward_time": 0.11874556541442871,
      "step": 10072
    },
    {
      "epoch": 6.1474609375e-05,
      "step": 10072,
      "training_step_time": 0.4632902145385742
    },
    {
      "epoch": 6.1480712890625e-05,
      "model_forward_time": 0.11657857894897461,
      "step": 10073
    },
    {
      "epoch": 6.1480712890625e-05,
      "step": 10073,
      "training_step_time": 0.38425564765930176
    },
    {
      "epoch": 6.148681640625e-05,
      "model_forward_time": 0.11640429496765137,
      "step": 10074
    },
    {
      "epoch": 6.148681640625e-05,
      "step": 10074,
      "training_step_time": 0.40948987007141113
    },
    {
      "epoch": 6.1492919921875e-05,
      "model_forward_time": 0.11578011512756348,
      "step": 10075
    },
    {
      "epoch": 6.1492919921875e-05,
      "step": 10075,
      "training_step_time": 0.7791039943695068
    },
    {
      "epoch": 6.14990234375e-05,
      "model_forward_time": 0.11643290519714355,
      "step": 10076
    },
    {
      "epoch": 6.14990234375e-05,
      "step": 10076,
      "training_step_time": 0.3834397792816162
    },
    {
      "epoch": 6.1505126953125e-05,
      "model_forward_time": 0.11614513397216797,
      "step": 10077
    },
    {
      "epoch": 6.1505126953125e-05,
      "step": 10077,
      "training_step_time": 0.4093294143676758
    },
    {
      "epoch": 6.151123046875e-05,
      "model_forward_time": 0.11871051788330078,
      "step": 10078
    },
    {
      "epoch": 6.151123046875e-05,
      "step": 10078,
      "training_step_time": 0.38543152809143066
    },
    {
      "epoch": 6.1517333984375e-05,
      "model_forward_time": 0.11602139472961426,
      "step": 10079
    },
    {
      "epoch": 6.1517333984375e-05,
      "step": 10079,
      "training_step_time": 0.4377927780151367
    },
    {
      "epoch": 6.15234375e-05,
      "grad_norm": 0.2220596820116043,
      "learning_rate": 9.624129116069694e-05,
      "loss": 0.0608,
      "step": 10080
    },
    {
      "epoch": 6.15234375e-05,
      "model_forward_time": 0.11705613136291504,
      "step": 10080
    },
    {
      "epoch": 6.15234375e-05,
      "step": 10080,
      "training_step_time": 0.372361421585083
    },
    {
      "epoch": 6.1529541015625e-05,
      "model_forward_time": 0.11845636367797852,
      "step": 10081
    },
    {
      "epoch": 6.1529541015625e-05,
      "step": 10081,
      "training_step_time": 0.747046709060669
    },
    {
      "epoch": 6.153564453125e-05,
      "model_forward_time": 0.11687517166137695,
      "step": 10082
    },
    {
      "epoch": 6.153564453125e-05,
      "step": 10082,
      "training_step_time": 0.41027283668518066
    },
    {
      "epoch": 6.1541748046875e-05,
      "model_forward_time": 0.11666417121887207,
      "step": 10083
    },
    {
      "epoch": 6.1541748046875e-05,
      "step": 10083,
      "training_step_time": 0.3716614246368408
    },
    {
      "epoch": 6.15478515625e-05,
      "model_forward_time": 0.11796784400939941,
      "step": 10084
    },
    {
      "epoch": 6.15478515625e-05,
      "step": 10084,
      "training_step_time": 0.4049968719482422
    },
    {
      "epoch": 6.1553955078125e-05,
      "model_forward_time": 0.11741209030151367,
      "step": 10085
    },
    {
      "epoch": 6.1553955078125e-05,
      "step": 10085,
      "training_step_time": 0.4601621627807617
    },
    {
      "epoch": 6.156005859375e-05,
      "model_forward_time": 0.11545801162719727,
      "step": 10086
    },
    {
      "epoch": 6.156005859375e-05,
      "step": 10086,
      "training_step_time": 0.3743274211883545
    },
    {
      "epoch": 6.1566162109375e-05,
      "model_forward_time": 0.11768674850463867,
      "step": 10087
    },
    {
      "epoch": 6.1566162109375e-05,
      "step": 10087,
      "training_step_time": 0.6878550052642822
    },
    {
      "epoch": 6.1572265625e-05,
      "model_forward_time": 0.11649179458618164,
      "step": 10088
    },
    {
      "epoch": 6.1572265625e-05,
      "step": 10088,
      "training_step_time": 0.3777005672454834
    },
    {
      "epoch": 6.1578369140625e-05,
      "model_forward_time": 0.11587715148925781,
      "step": 10089
    },
    {
      "epoch": 6.1578369140625e-05,
      "step": 10089,
      "training_step_time": 0.38701438903808594
    },
    {
      "epoch": 6.158447265625e-05,
      "grad_norm": 0.17422570288181305,
      "learning_rate": 9.623080139137023e-05,
      "loss": 0.0594,
      "step": 10090
    },
    {
      "epoch": 6.158447265625e-05,
      "model_forward_time": 0.1159968376159668,
      "step": 10090
    },
    {
      "epoch": 6.158447265625e-05,
      "step": 10090,
      "training_step_time": 0.40830016136169434
    },
    {
      "epoch": 6.1590576171875e-05,
      "model_forward_time": 0.1157228946685791,
      "step": 10091
    },
    {
      "epoch": 6.1590576171875e-05,
      "step": 10091,
      "training_step_time": 0.44672250747680664
    },
    {
      "epoch": 6.15966796875e-05,
      "model_forward_time": 0.11568760871887207,
      "step": 10092
    },
    {
      "epoch": 6.15966796875e-05,
      "step": 10092,
      "training_step_time": 0.3992581367492676
    },
    {
      "epoch": 6.1602783203125e-05,
      "model_forward_time": 0.1160116195678711,
      "step": 10093
    },
    {
      "epoch": 6.1602783203125e-05,
      "step": 10093,
      "training_step_time": 0.869767427444458
    },
    {
      "epoch": 6.160888671875e-05,
      "model_forward_time": 0.12169289588928223,
      "step": 10094
    },
    {
      "epoch": 6.160888671875e-05,
      "step": 10094,
      "training_step_time": 0.45206451416015625
    },
    {
      "epoch": 6.1614990234375e-05,
      "model_forward_time": 0.11812925338745117,
      "step": 10095
    },
    {
      "epoch": 6.1614990234375e-05,
      "step": 10095,
      "training_step_time": 0.4269731044769287
    },
    {
      "epoch": 6.162109375e-05,
      "model_forward_time": 0.1181793212890625,
      "step": 10096
    },
    {
      "epoch": 6.162109375e-05,
      "step": 10096,
      "training_step_time": 0.39920663833618164
    },
    {
      "epoch": 6.1627197265625e-05,
      "model_forward_time": 0.11546087265014648,
      "step": 10097
    },
    {
      "epoch": 6.1627197265625e-05,
      "step": 10097,
      "training_step_time": 0.4199538230895996
    },
    {
      "epoch": 6.163330078125e-05,
      "model_forward_time": 0.1168203353881836,
      "step": 10098
    },
    {
      "epoch": 6.163330078125e-05,
      "step": 10098,
      "training_step_time": 0.3776702880859375
    },
    {
      "epoch": 6.1639404296875e-05,
      "model_forward_time": 0.11658000946044922,
      "step": 10099
    },
    {
      "epoch": 6.1639404296875e-05,
      "step": 10099,
      "training_step_time": 1.1752455234527588
    },
    {
      "epoch": 6.16455078125e-05,
      "grad_norm": 0.13420410454273224,
      "learning_rate": 9.62202975783467e-05,
      "loss": 0.0611,
      "step": 10100
    },
    {
      "epoch": 6.16455078125e-05,
      "model_forward_time": 0.11515259742736816,
      "step": 10100
    },
    {
      "epoch": 6.16455078125e-05,
      "step": 10100,
      "training_step_time": 0.37816786766052246
    },
    {
      "epoch": 6.1651611328125e-05,
      "model_forward_time": 0.11527800559997559,
      "step": 10101
    },
    {
      "epoch": 6.1651611328125e-05,
      "step": 10101,
      "training_step_time": 0.3914642333984375
    },
    {
      "epoch": 6.165771484375e-05,
      "model_forward_time": 0.11507749557495117,
      "step": 10102
    },
    {
      "epoch": 6.165771484375e-05,
      "step": 10102,
      "training_step_time": 0.4003884792327881
    },
    {
      "epoch": 6.1663818359375e-05,
      "model_forward_time": 0.11531209945678711,
      "step": 10103
    },
    {
      "epoch": 6.1663818359375e-05,
      "step": 10103,
      "training_step_time": 0.4038407802581787
    },
    {
      "epoch": 6.1669921875e-05,
      "model_forward_time": 0.11490511894226074,
      "step": 10104
    },
    {
      "epoch": 6.1669921875e-05,
      "step": 10104,
      "training_step_time": 0.37033963203430176
    },
    {
      "epoch": 6.1676025390625e-05,
      "model_forward_time": 0.11575150489807129,
      "step": 10105
    },
    {
      "epoch": 6.1676025390625e-05,
      "step": 10105,
      "training_step_time": 0.7224273681640625
    },
    {
      "epoch": 6.168212890625e-05,
      "model_forward_time": 0.11554312705993652,
      "step": 10106
    },
    {
      "epoch": 6.168212890625e-05,
      "step": 10106,
      "training_step_time": 0.4087648391723633
    },
    {
      "epoch": 6.1688232421875e-05,
      "model_forward_time": 0.11796927452087402,
      "step": 10107
    },
    {
      "epoch": 6.1688232421875e-05,
      "step": 10107,
      "training_step_time": 0.48883605003356934
    },
    {
      "epoch": 6.16943359375e-05,
      "model_forward_time": 0.11791753768920898,
      "step": 10108
    },
    {
      "epoch": 6.16943359375e-05,
      "step": 10108,
      "training_step_time": 0.3806040287017822
    },
    {
      "epoch": 6.1700439453125e-05,
      "model_forward_time": 0.11701059341430664,
      "step": 10109
    },
    {
      "epoch": 6.1700439453125e-05,
      "step": 10109,
      "training_step_time": 0.40337467193603516
    },
    {
      "epoch": 6.170654296875e-05,
      "grad_norm": 0.1968509554862976,
      "learning_rate": 9.620977972481716e-05,
      "loss": 0.0604,
      "step": 10110
    },
    {
      "epoch": 6.170654296875e-05,
      "model_forward_time": 0.11623072624206543,
      "step": 10110
    },
    {
      "epoch": 6.170654296875e-05,
      "step": 10110,
      "training_step_time": 0.3740692138671875
    },
    {
      "epoch": 6.1712646484375e-05,
      "model_forward_time": 0.11612391471862793,
      "step": 10111
    },
    {
      "epoch": 6.1712646484375e-05,
      "step": 10111,
      "training_step_time": 0.926964282989502
    },
    {
      "epoch": 6.171875e-05,
      "model_forward_time": 0.11571311950683594,
      "step": 10112
    },
    {
      "epoch": 6.171875e-05,
      "step": 10112,
      "training_step_time": 0.3835947513580322
    },
    {
      "epoch": 6.1724853515625e-05,
      "model_forward_time": 0.11519837379455566,
      "step": 10113
    },
    {
      "epoch": 6.1724853515625e-05,
      "step": 10113,
      "training_step_time": 0.4081308841705322
    },
    {
      "epoch": 6.173095703125e-05,
      "model_forward_time": 0.1150665283203125,
      "step": 10114
    },
    {
      "epoch": 6.173095703125e-05,
      "step": 10114,
      "training_step_time": 0.416501522064209
    },
    {
      "epoch": 6.1737060546875e-05,
      "model_forward_time": 0.1193699836730957,
      "step": 10115
    },
    {
      "epoch": 6.1737060546875e-05,
      "step": 10115,
      "training_step_time": 0.3823659420013428
    },
    {
      "epoch": 6.17431640625e-05,
      "model_forward_time": 0.11519527435302734,
      "step": 10116
    },
    {
      "epoch": 6.17431640625e-05,
      "step": 10116,
      "training_step_time": 0.3798177242279053
    },
    {
      "epoch": 6.1749267578125e-05,
      "model_forward_time": 0.11612343788146973,
      "step": 10117
    },
    {
      "epoch": 6.1749267578125e-05,
      "step": 10117,
      "training_step_time": 0.9562563896179199
    },
    {
      "epoch": 6.175537109375e-05,
      "model_forward_time": 0.11574769020080566,
      "step": 10118
    },
    {
      "epoch": 6.175537109375e-05,
      "step": 10118,
      "training_step_time": 0.40787267684936523
    },
    {
      "epoch": 6.1761474609375e-05,
      "model_forward_time": 0.11631917953491211,
      "step": 10119
    },
    {
      "epoch": 6.1761474609375e-05,
      "step": 10119,
      "training_step_time": 0.39918088912963867
    },
    {
      "epoch": 6.1767578125e-05,
      "grad_norm": 0.17443998157978058,
      "learning_rate": 9.619924783397661e-05,
      "loss": 0.0617,
      "step": 10120
    },
    {
      "epoch": 6.1767578125e-05,
      "model_forward_time": 0.1167752742767334,
      "step": 10120
    },
    {
      "epoch": 6.1767578125e-05,
      "step": 10120,
      "training_step_time": 0.38772082328796387
    },
    {
      "epoch": 6.1773681640625e-05,
      "model_forward_time": 0.11645340919494629,
      "step": 10121
    },
    {
      "epoch": 6.1773681640625e-05,
      "step": 10121,
      "training_step_time": 0.3808894157409668
    },
    {
      "epoch": 6.177978515625e-05,
      "model_forward_time": 0.11586618423461914,
      "step": 10122
    },
    {
      "epoch": 6.177978515625e-05,
      "step": 10122,
      "training_step_time": 0.40431833267211914
    },
    {
      "epoch": 6.1785888671875e-05,
      "model_forward_time": 0.11826848983764648,
      "step": 10123
    },
    {
      "epoch": 6.1785888671875e-05,
      "step": 10123,
      "training_step_time": 0.38204193115234375
    },
    {
      "epoch": 6.17919921875e-05,
      "model_forward_time": 0.11608314514160156,
      "step": 10124
    },
    {
      "epoch": 6.17919921875e-05,
      "step": 10124,
      "training_step_time": 0.40701746940612793
    },
    {
      "epoch": 6.1798095703125e-05,
      "model_forward_time": 0.11856794357299805,
      "step": 10125
    },
    {
      "epoch": 6.1798095703125e-05,
      "step": 10125,
      "training_step_time": 0.39131832122802734
    },
    {
      "epoch": 6.180419921875e-05,
      "model_forward_time": 0.12090945243835449,
      "step": 10126
    },
    {
      "epoch": 6.180419921875e-05,
      "step": 10126,
      "training_step_time": 0.43742918968200684
    },
    {
      "epoch": 6.1810302734375e-05,
      "model_forward_time": 0.11646270751953125,
      "step": 10127
    },
    {
      "epoch": 6.1810302734375e-05,
      "step": 10127,
      "training_step_time": 0.41690611839294434
    },
    {
      "epoch": 6.181640625e-05,
      "model_forward_time": 0.11524510383605957,
      "step": 10128
    },
    {
      "epoch": 6.181640625e-05,
      "step": 10128,
      "training_step_time": 0.3814198970794678
    },
    {
      "epoch": 6.1822509765625e-05,
      "model_forward_time": 0.1165933609008789,
      "step": 10129
    },
    {
      "epoch": 6.1822509765625e-05,
      "step": 10129,
      "training_step_time": 0.6327450275421143
    },
    {
      "epoch": 6.182861328125e-05,
      "grad_norm": 0.17181555926799774,
      "learning_rate": 9.61887019090244e-05,
      "loss": 0.0591,
      "step": 10130
    },
    {
      "epoch": 6.182861328125e-05,
      "model_forward_time": 0.11803579330444336,
      "step": 10130
    },
    {
      "epoch": 6.182861328125e-05,
      "step": 10130,
      "training_step_time": 0.38816118240356445
    },
    {
      "epoch": 6.1834716796875e-05,
      "model_forward_time": 0.11714029312133789,
      "step": 10131
    },
    {
      "epoch": 6.1834716796875e-05,
      "step": 10131,
      "training_step_time": 0.401592493057251
    },
    {
      "epoch": 6.18408203125e-05,
      "model_forward_time": 0.12086153030395508,
      "step": 10132
    },
    {
      "epoch": 6.18408203125e-05,
      "step": 10132,
      "training_step_time": 0.41387057304382324
    },
    {
      "epoch": 6.1846923828125e-05,
      "model_forward_time": 0.11693668365478516,
      "step": 10133
    },
    {
      "epoch": 6.1846923828125e-05,
      "step": 10133,
      "training_step_time": 0.47397375106811523
    },
    {
      "epoch": 6.185302734375e-05,
      "model_forward_time": 0.11732196807861328,
      "step": 10134
    },
    {
      "epoch": 6.185302734375e-05,
      "step": 10134,
      "training_step_time": 0.47116684913635254
    },
    {
      "epoch": 6.1859130859375e-05,
      "model_forward_time": 0.12255358695983887,
      "step": 10135
    },
    {
      "epoch": 6.1859130859375e-05,
      "step": 10135,
      "training_step_time": 0.922966480255127
    },
    {
      "epoch": 6.1865234375e-05,
      "model_forward_time": 0.11609721183776855,
      "step": 10136
    },
    {
      "epoch": 6.1865234375e-05,
      "step": 10136,
      "training_step_time": 0.395068883895874
    },
    {
      "epoch": 6.1871337890625e-05,
      "model_forward_time": 0.11629486083984375,
      "step": 10137
    },
    {
      "epoch": 6.1871337890625e-05,
      "step": 10137,
      "training_step_time": 0.45010995864868164
    },
    {
      "epoch": 6.187744140625e-05,
      "model_forward_time": 0.1151576042175293,
      "step": 10138
    },
    {
      "epoch": 6.187744140625e-05,
      "step": 10138,
      "training_step_time": 0.4421563148498535
    },
    {
      "epoch": 6.1883544921875e-05,
      "model_forward_time": 0.11487746238708496,
      "step": 10139
    },
    {
      "epoch": 6.1883544921875e-05,
      "step": 10139,
      "training_step_time": 0.3956332206726074
    },
    {
      "epoch": 6.18896484375e-05,
      "grad_norm": 0.23514486849308014,
      "learning_rate": 9.617814195316411e-05,
      "loss": 0.0592,
      "step": 10140
    },
    {
      "epoch": 6.18896484375e-05,
      "model_forward_time": 0.11716437339782715,
      "step": 10140
    },
    {
      "epoch": 6.18896484375e-05,
      "step": 10140,
      "training_step_time": 0.3761591911315918
    },
    {
      "epoch": 6.1895751953125e-05,
      "model_forward_time": 0.11711406707763672,
      "step": 10141
    },
    {
      "epoch": 6.1895751953125e-05,
      "step": 10141,
      "training_step_time": 0.9848706722259521
    },
    {
      "epoch": 6.190185546875e-05,
      "model_forward_time": 0.114959716796875,
      "step": 10142
    },
    {
      "epoch": 6.190185546875e-05,
      "step": 10142,
      "training_step_time": 0.3744816780090332
    },
    {
      "epoch": 6.1907958984375e-05,
      "model_forward_time": 0.11681485176086426,
      "step": 10143
    },
    {
      "epoch": 6.1907958984375e-05,
      "step": 10143,
      "training_step_time": 0.46118712425231934
    },
    {
      "epoch": 6.19140625e-05,
      "model_forward_time": 0.11548137664794922,
      "step": 10144
    },
    {
      "epoch": 6.19140625e-05,
      "step": 10144,
      "training_step_time": 0.41576600074768066
    },
    {
      "epoch": 6.1920166015625e-05,
      "model_forward_time": 0.11766338348388672,
      "step": 10145
    },
    {
      "epoch": 6.1920166015625e-05,
      "step": 10145,
      "training_step_time": 0.3867223262786865
    },
    {
      "epoch": 6.192626953125e-05,
      "model_forward_time": 0.11741495132446289,
      "step": 10146
    },
    {
      "epoch": 6.192626953125e-05,
      "step": 10146,
      "training_step_time": 0.3959543704986572
    },
    {
      "epoch": 6.1932373046875e-05,
      "model_forward_time": 0.11616802215576172,
      "step": 10147
    },
    {
      "epoch": 6.1932373046875e-05,
      "step": 10147,
      "training_step_time": 0.7100019454956055
    },
    {
      "epoch": 6.19384765625e-05,
      "model_forward_time": 0.11506414413452148,
      "step": 10148
    },
    {
      "epoch": 6.19384765625e-05,
      "step": 10148,
      "training_step_time": 0.38648486137390137
    },
    {
      "epoch": 6.1944580078125e-05,
      "model_forward_time": 0.11524844169616699,
      "step": 10149
    },
    {
      "epoch": 6.1944580078125e-05,
      "step": 10149,
      "training_step_time": 0.41953253746032715
    },
    {
      "epoch": 6.195068359375e-05,
      "grad_norm": 0.1802094429731369,
      "learning_rate": 9.616756796960353e-05,
      "loss": 0.0587,
      "step": 10150
    },
    {
      "epoch": 6.195068359375e-05,
      "model_forward_time": 0.11530351638793945,
      "step": 10150
    },
    {
      "epoch": 6.195068359375e-05,
      "step": 10150,
      "training_step_time": 0.46623778343200684
    },
    {
      "epoch": 6.1956787109375e-05,
      "model_forward_time": 0.1158149242401123,
      "step": 10151
    },
    {
      "epoch": 6.1956787109375e-05,
      "step": 10151,
      "training_step_time": 0.3746657371520996
    },
    {
      "epoch": 6.1962890625e-05,
      "model_forward_time": 0.11696243286132812,
      "step": 10152
    },
    {
      "epoch": 6.1962890625e-05,
      "step": 10152,
      "training_step_time": 0.3774220943450928
    },
    {
      "epoch": 6.1968994140625e-05,
      "model_forward_time": 0.11823558807373047,
      "step": 10153
    },
    {
      "epoch": 6.1968994140625e-05,
      "step": 10153,
      "training_step_time": 0.7142016887664795
    },
    {
      "epoch": 6.197509765625e-05,
      "model_forward_time": 0.1168370246887207,
      "step": 10154
    },
    {
      "epoch": 6.197509765625e-05,
      "step": 10154,
      "training_step_time": 0.38976049423217773
    },
    {
      "epoch": 6.1981201171875e-05,
      "model_forward_time": 0.11638116836547852,
      "step": 10155
    },
    {
      "epoch": 6.1981201171875e-05,
      "step": 10155,
      "training_step_time": 0.37974047660827637
    },
    {
      "epoch": 6.19873046875e-05,
      "model_forward_time": 0.1171729564666748,
      "step": 10156
    },
    {
      "epoch": 6.19873046875e-05,
      "step": 10156,
      "training_step_time": 0.3941664695739746
    },
    {
      "epoch": 6.1993408203125e-05,
      "model_forward_time": 0.11576986312866211,
      "step": 10157
    },
    {
      "epoch": 6.1993408203125e-05,
      "step": 10157,
      "training_step_time": 0.39643073081970215
    },
    {
      "epoch": 6.199951171875e-05,
      "model_forward_time": 0.11552023887634277,
      "step": 10158
    },
    {
      "epoch": 6.199951171875e-05,
      "step": 10158,
      "training_step_time": 0.39717888832092285
    },
    {
      "epoch": 6.2005615234375e-05,
      "model_forward_time": 0.11602306365966797,
      "step": 10159
    },
    {
      "epoch": 6.2005615234375e-05,
      "step": 10159,
      "training_step_time": 1.2150766849517822
    },
    {
      "epoch": 6.201171875e-05,
      "grad_norm": 0.22222329676151276,
      "learning_rate": 9.61569799615548e-05,
      "loss": 0.0584,
      "step": 10160
    },
    {
      "epoch": 6.201171875e-05,
      "model_forward_time": 0.11453771591186523,
      "step": 10160
    },
    {
      "epoch": 6.201171875e-05,
      "step": 10160,
      "training_step_time": 0.3636200428009033
    },
    {
      "epoch": 6.2017822265625e-05,
      "model_forward_time": 0.11454629898071289,
      "step": 10161
    },
    {
      "epoch": 6.2017822265625e-05,
      "step": 10161,
      "training_step_time": 0.40358686447143555
    },
    {
      "epoch": 6.202392578125e-05,
      "model_forward_time": 0.11373066902160645,
      "step": 10162
    },
    {
      "epoch": 6.202392578125e-05,
      "step": 10162,
      "training_step_time": 0.4367804527282715
    },
    {
      "epoch": 6.2030029296875e-05,
      "model_forward_time": 0.1158742904663086,
      "step": 10163
    },
    {
      "epoch": 6.2030029296875e-05,
      "step": 10163,
      "training_step_time": 0.37752747535705566
    },
    {
      "epoch": 6.20361328125e-05,
      "model_forward_time": 0.11649894714355469,
      "step": 10164
    },
    {
      "epoch": 6.20361328125e-05,
      "step": 10164,
      "training_step_time": 0.3740577697753906
    },
    {
      "epoch": 6.2042236328125e-05,
      "model_forward_time": 0.1176307201385498,
      "step": 10165
    },
    {
      "epoch": 6.2042236328125e-05,
      "step": 10165,
      "training_step_time": 0.7405366897583008
    },
    {
      "epoch": 6.204833984375e-05,
      "model_forward_time": 0.11679935455322266,
      "step": 10166
    },
    {
      "epoch": 6.204833984375e-05,
      "step": 10166,
      "training_step_time": 0.37777066230773926
    },
    {
      "epoch": 6.2054443359375e-05,
      "model_forward_time": 0.11676931381225586,
      "step": 10167
    },
    {
      "epoch": 6.2054443359375e-05,
      "step": 10167,
      "training_step_time": 0.3844480514526367
    },
    {
      "epoch": 6.2060546875e-05,
      "model_forward_time": 0.1190025806427002,
      "step": 10168
    },
    {
      "epoch": 6.2060546875e-05,
      "step": 10168,
      "training_step_time": 0.40467214584350586
    },
    {
      "epoch": 6.2066650390625e-05,
      "model_forward_time": 0.11671900749206543,
      "step": 10169
    },
    {
      "epoch": 6.2066650390625e-05,
      "step": 10169,
      "training_step_time": 0.37801694869995117
    },
    {
      "epoch": 6.207275390625e-05,
      "grad_norm": 0.15987005829811096,
      "learning_rate": 9.614637793223425e-05,
      "loss": 0.0597,
      "step": 10170
    },
    {
      "epoch": 6.207275390625e-05,
      "model_forward_time": 0.11527061462402344,
      "step": 10170
    },
    {
      "epoch": 6.207275390625e-05,
      "step": 10170,
      "training_step_time": 0.4237551689147949
    },
    {
      "epoch": 6.2078857421875e-05,
      "model_forward_time": 0.11767244338989258,
      "step": 10171
    },
    {
      "epoch": 6.2078857421875e-05,
      "step": 10171,
      "training_step_time": 1.3767294883728027
    },
    {
      "epoch": 6.20849609375e-05,
      "model_forward_time": 0.11524653434753418,
      "step": 10172
    },
    {
      "epoch": 6.20849609375e-05,
      "step": 10172,
      "training_step_time": 0.3659653663635254
    },
    {
      "epoch": 6.2091064453125e-05,
      "model_forward_time": 0.11385536193847656,
      "step": 10173
    },
    {
      "epoch": 6.2091064453125e-05,
      "step": 10173,
      "training_step_time": 0.3922605514526367
    },
    {
      "epoch": 6.209716796875e-05,
      "model_forward_time": 0.11598324775695801,
      "step": 10174
    },
    {
      "epoch": 6.209716796875e-05,
      "step": 10174,
      "training_step_time": 0.44556117057800293
    },
    {
      "epoch": 6.2103271484375e-05,
      "model_forward_time": 0.11472892761230469,
      "step": 10175
    },
    {
      "epoch": 6.2103271484375e-05,
      "step": 10175,
      "training_step_time": 0.38266563415527344
    },
    {
      "epoch": 6.2109375e-05,
      "model_forward_time": 0.1157219409942627,
      "step": 10176
    },
    {
      "epoch": 6.2109375e-05,
      "step": 10176,
      "training_step_time": 0.37215304374694824
    },
    {
      "epoch": 6.2115478515625e-05,
      "model_forward_time": 0.11620450019836426,
      "step": 10177
    },
    {
      "epoch": 6.2115478515625e-05,
      "step": 10177,
      "training_step_time": 0.5007896423339844
    },
    {
      "epoch": 6.212158203125e-05,
      "model_forward_time": 0.11518692970275879,
      "step": 10178
    },
    {
      "epoch": 6.212158203125e-05,
      "step": 10178,
      "training_step_time": 0.3834950923919678
    },
    {
      "epoch": 6.2127685546875e-05,
      "model_forward_time": 0.1161036491394043,
      "step": 10179
    },
    {
      "epoch": 6.2127685546875e-05,
      "step": 10179,
      "training_step_time": 0.3783450126647949
    },
    {
      "epoch": 6.21337890625e-05,
      "grad_norm": 0.1991696059703827,
      "learning_rate": 9.613576188486253e-05,
      "loss": 0.0591,
      "step": 10180
    },
    {
      "epoch": 6.21337890625e-05,
      "model_forward_time": 0.11578130722045898,
      "step": 10180
    },
    {
      "epoch": 6.21337890625e-05,
      "step": 10180,
      "training_step_time": 0.39517784118652344
    },
    {
      "epoch": 6.2139892578125e-05,
      "model_forward_time": 0.11673307418823242,
      "step": 10181
    },
    {
      "epoch": 6.2139892578125e-05,
      "step": 10181,
      "training_step_time": 0.4633920192718506
    },
    {
      "epoch": 6.214599609375e-05,
      "model_forward_time": 0.11663174629211426,
      "step": 10182
    },
    {
      "epoch": 6.214599609375e-05,
      "step": 10182,
      "training_step_time": 0.41973018646240234
    },
    {
      "epoch": 6.2152099609375e-05,
      "model_forward_time": 0.11863470077514648,
      "step": 10183
    },
    {
      "epoch": 6.2152099609375e-05,
      "step": 10183,
      "training_step_time": 0.8985893726348877
    },
    {
      "epoch": 6.2158203125e-05,
      "model_forward_time": 0.11622810363769531,
      "step": 10184
    },
    {
      "epoch": 6.2158203125e-05,
      "step": 10184,
      "training_step_time": 0.43890929222106934
    },
    {
      "epoch": 6.2164306640625e-05,
      "model_forward_time": 0.11717414855957031,
      "step": 10185
    },
    {
      "epoch": 6.2164306640625e-05,
      "step": 10185,
      "training_step_time": 0.3964266777038574
    },
    {
      "epoch": 6.217041015625e-05,
      "model_forward_time": 0.11584210395812988,
      "step": 10186
    },
    {
      "epoch": 6.217041015625e-05,
      "step": 10186,
      "training_step_time": 0.4085664749145508
    },
    {
      "epoch": 6.2176513671875e-05,
      "model_forward_time": 0.11851143836975098,
      "step": 10187
    },
    {
      "epoch": 6.2176513671875e-05,
      "step": 10187,
      "training_step_time": 0.39664196968078613
    },
    {
      "epoch": 6.21826171875e-05,
      "model_forward_time": 0.11777925491333008,
      "step": 10188
    },
    {
      "epoch": 6.21826171875e-05,
      "step": 10188,
      "training_step_time": 0.37476634979248047
    },
    {
      "epoch": 6.2188720703125e-05,
      "model_forward_time": 0.11585593223571777,
      "step": 10189
    },
    {
      "epoch": 6.2188720703125e-05,
      "step": 10189,
      "training_step_time": 1.0395855903625488
    },
    {
      "epoch": 6.219482421875e-05,
      "grad_norm": 0.18433867394924164,
      "learning_rate": 9.612513182266447e-05,
      "loss": 0.0629,
      "step": 10190
    },
    {
      "epoch": 6.219482421875e-05,
      "model_forward_time": 0.11992573738098145,
      "step": 10190
    },
    {
      "epoch": 6.219482421875e-05,
      "step": 10190,
      "training_step_time": 0.37744712829589844
    },
    {
      "epoch": 6.2200927734375e-05,
      "model_forward_time": 0.11553001403808594,
      "step": 10191
    },
    {
      "epoch": 6.2200927734375e-05,
      "step": 10191,
      "training_step_time": 0.374556303024292
    },
    {
      "epoch": 6.220703125e-05,
      "model_forward_time": 0.1161036491394043,
      "step": 10192
    },
    {
      "epoch": 6.220703125e-05,
      "step": 10192,
      "training_step_time": 0.37647557258605957
    },
    {
      "epoch": 6.2213134765625e-05,
      "model_forward_time": 0.1159522533416748,
      "step": 10193
    },
    {
      "epoch": 6.2213134765625e-05,
      "step": 10193,
      "training_step_time": 0.4744381904602051
    },
    {
      "epoch": 6.221923828125e-05,
      "model_forward_time": 0.11929798126220703,
      "step": 10194
    },
    {
      "epoch": 6.221923828125e-05,
      "step": 10194,
      "training_step_time": 0.4185619354248047
    },
    {
      "epoch": 6.2225341796875e-05,
      "model_forward_time": 0.11736774444580078,
      "step": 10195
    },
    {
      "epoch": 6.2225341796875e-05,
      "step": 10195,
      "training_step_time": 0.6721804141998291
    },
    {
      "epoch": 6.22314453125e-05,
      "model_forward_time": 0.11646270751953125,
      "step": 10196
    },
    {
      "epoch": 6.22314453125e-05,
      "step": 10196,
      "training_step_time": 0.39526867866516113
    },
    {
      "epoch": 6.2237548828125e-05,
      "model_forward_time": 0.11609506607055664,
      "step": 10197
    },
    {
      "epoch": 6.2237548828125e-05,
      "step": 10197,
      "training_step_time": 0.4618353843688965
    },
    {
      "epoch": 6.224365234375e-05,
      "model_forward_time": 0.11587405204772949,
      "step": 10198
    },
    {
      "epoch": 6.224365234375e-05,
      "step": 10198,
      "training_step_time": 0.3968935012817383
    },
    {
      "epoch": 6.2249755859375e-05,
      "model_forward_time": 0.11649179458618164,
      "step": 10199
    },
    {
      "epoch": 6.2249755859375e-05,
      "step": 10199,
      "training_step_time": 0.4439704418182373
    },
    {
      "epoch": 6.2255859375e-05,
      "grad_norm": 0.16573970019817352,
      "learning_rate": 9.611448774886924e-05,
      "loss": 0.0592,
      "step": 10200
    },
    {
      "epoch": 6.2255859375e-05,
      "model_forward_time": 0.11598730087280273,
      "step": 10200
    },
    {
      "epoch": 6.2255859375e-05,
      "step": 10200,
      "training_step_time": 0.45165252685546875
    },
    {
      "epoch": 6.2261962890625e-05,
      "model_forward_time": 0.1159360408782959,
      "step": 10201
    },
    {
      "epoch": 6.2261962890625e-05,
      "step": 10201,
      "training_step_time": 0.8764064311981201
    },
    {
      "epoch": 6.226806640625e-05,
      "model_forward_time": 0.1155698299407959,
      "step": 10202
    },
    {
      "epoch": 6.226806640625e-05,
      "step": 10202,
      "training_step_time": 0.37769007682800293
    },
    {
      "epoch": 6.2274169921875e-05,
      "model_forward_time": 0.11606478691101074,
      "step": 10203
    },
    {
      "epoch": 6.2274169921875e-05,
      "step": 10203,
      "training_step_time": 0.3859591484069824
    },
    {
      "epoch": 6.22802734375e-05,
      "model_forward_time": 0.11634588241577148,
      "step": 10204
    },
    {
      "epoch": 6.22802734375e-05,
      "step": 10204,
      "training_step_time": 0.37308549880981445
    },
    {
      "epoch": 6.2286376953125e-05,
      "model_forward_time": 0.11566972732543945,
      "step": 10205
    },
    {
      "epoch": 6.2286376953125e-05,
      "step": 10205,
      "training_step_time": 0.4166748523712158
    },
    {
      "epoch": 6.229248046875e-05,
      "model_forward_time": 0.1153717041015625,
      "step": 10206
    },
    {
      "epoch": 6.229248046875e-05,
      "step": 10206,
      "training_step_time": 0.408860445022583
    },
    {
      "epoch": 6.2298583984375e-05,
      "model_forward_time": 0.1162412166595459,
      "step": 10207
    },
    {
      "epoch": 6.2298583984375e-05,
      "step": 10207,
      "training_step_time": 0.48273515701293945
    },
    {
      "epoch": 6.23046875e-05,
      "model_forward_time": 0.11658143997192383,
      "step": 10208
    },
    {
      "epoch": 6.23046875e-05,
      "step": 10208,
      "training_step_time": 0.4013400077819824
    },
    {
      "epoch": 6.2310791015625e-05,
      "model_forward_time": 0.11646127700805664,
      "step": 10209
    },
    {
      "epoch": 6.2310791015625e-05,
      "step": 10209,
      "training_step_time": 0.40380144119262695
    },
    {
      "epoch": 6.231689453125e-05,
      "grad_norm": 0.1504298448562622,
      "learning_rate": 9.610382966671021e-05,
      "loss": 0.0617,
      "step": 10210
    },
    {
      "epoch": 6.231689453125e-05,
      "model_forward_time": 0.11578178405761719,
      "step": 10210
    },
    {
      "epoch": 6.231689453125e-05,
      "step": 10210,
      "training_step_time": 0.4379901885986328
    },
    {
      "epoch": 6.2322998046875e-05,
      "model_forward_time": 0.11626076698303223,
      "step": 10211
    },
    {
      "epoch": 6.2322998046875e-05,
      "step": 10211,
      "training_step_time": 0.36873602867126465
    },
    {
      "epoch": 6.23291015625e-05,
      "model_forward_time": 0.1156625747680664,
      "step": 10212
    },
    {
      "epoch": 6.23291015625e-05,
      "step": 10212,
      "training_step_time": 0.43832850456237793
    },
    {
      "epoch": 6.2335205078125e-05,
      "model_forward_time": 0.11653685569763184,
      "step": 10213
    },
    {
      "epoch": 6.2335205078125e-05,
      "step": 10213,
      "training_step_time": 0.49144959449768066
    },
    {
      "epoch": 6.234130859375e-05,
      "model_forward_time": 0.11610770225524902,
      "step": 10214
    },
    {
      "epoch": 6.234130859375e-05,
      "step": 10214,
      "training_step_time": 0.3919954299926758
    },
    {
      "epoch": 6.2347412109375e-05,
      "model_forward_time": 0.11628341674804688,
      "step": 10215
    },
    {
      "epoch": 6.2347412109375e-05,
      "step": 10215,
      "training_step_time": 0.39205336570739746
    },
    {
      "epoch": 6.2353515625e-05,
      "model_forward_time": 0.11624526977539062,
      "step": 10216
    },
    {
      "epoch": 6.2353515625e-05,
      "step": 10216,
      "training_step_time": 0.375307559967041
    },
    {
      "epoch": 6.2359619140625e-05,
      "model_forward_time": 0.11725950241088867,
      "step": 10217
    },
    {
      "epoch": 6.2359619140625e-05,
      "step": 10217,
      "training_step_time": 0.38314104080200195
    },
    {
      "epoch": 6.236572265625e-05,
      "model_forward_time": 0.11595487594604492,
      "step": 10218
    },
    {
      "epoch": 6.236572265625e-05,
      "step": 10218,
      "training_step_time": 0.37750935554504395
    },
    {
      "epoch": 6.2371826171875e-05,
      "model_forward_time": 0.11739444732666016,
      "step": 10219
    },
    {
      "epoch": 6.2371826171875e-05,
      "step": 10219,
      "training_step_time": 0.5439872741699219
    },
    {
      "epoch": 6.23779296875e-05,
      "grad_norm": 0.2072109580039978,
      "learning_rate": 9.609315757942503e-05,
      "loss": 0.058,
      "step": 10220
    },
    {
      "epoch": 6.23779296875e-05,
      "model_forward_time": 0.11591029167175293,
      "step": 10220
    },
    {
      "epoch": 6.23779296875e-05,
      "step": 10220,
      "training_step_time": 0.48380041122436523
    },
    {
      "epoch": 6.2384033203125e-05,
      "model_forward_time": 0.11619186401367188,
      "step": 10221
    },
    {
      "epoch": 6.2384033203125e-05,
      "step": 10221,
      "training_step_time": 0.43456077575683594
    },
    {
      "epoch": 6.239013671875e-05,
      "model_forward_time": 0.11818623542785645,
      "step": 10222
    },
    {
      "epoch": 6.239013671875e-05,
      "step": 10222,
      "training_step_time": 0.3903017044067383
    },
    {
      "epoch": 6.2396240234375e-05,
      "model_forward_time": 0.11665534973144531,
      "step": 10223
    },
    {
      "epoch": 6.2396240234375e-05,
      "step": 10223,
      "training_step_time": 0.41116905212402344
    },
    {
      "epoch": 6.240234375e-05,
      "model_forward_time": 0.11581254005432129,
      "step": 10224
    },
    {
      "epoch": 6.240234375e-05,
      "step": 10224,
      "training_step_time": 0.42753028869628906
    },
    {
      "epoch": 6.2408447265625e-05,
      "model_forward_time": 0.11596179008483887,
      "step": 10225
    },
    {
      "epoch": 6.2408447265625e-05,
      "step": 10225,
      "training_step_time": 0.4529421329498291
    },
    {
      "epoch": 6.241455078125e-05,
      "model_forward_time": 0.11750078201293945,
      "step": 10226
    },
    {
      "epoch": 6.241455078125e-05,
      "step": 10226,
      "training_step_time": 0.4314267635345459
    },
    {
      "epoch": 6.2420654296875e-05,
      "model_forward_time": 0.11634707450866699,
      "step": 10227
    },
    {
      "epoch": 6.2420654296875e-05,
      "step": 10227,
      "training_step_time": 0.44375085830688477
    },
    {
      "epoch": 6.24267578125e-05,
      "model_forward_time": 0.11579656600952148,
      "step": 10228
    },
    {
      "epoch": 6.24267578125e-05,
      "step": 10228,
      "training_step_time": 0.3825845718383789
    },
    {
      "epoch": 6.2432861328125e-05,
      "model_forward_time": 0.11738824844360352,
      "step": 10229
    },
    {
      "epoch": 6.2432861328125e-05,
      "step": 10229,
      "training_step_time": 0.3913388252258301
    },
    {
      "epoch": 6.243896484375e-05,
      "grad_norm": 0.2306230068206787,
      "learning_rate": 9.60824714902556e-05,
      "loss": 0.062,
      "step": 10230
    },
    {
      "epoch": 6.243896484375e-05,
      "model_forward_time": 0.11878728866577148,
      "step": 10230
    },
    {
      "epoch": 6.243896484375e-05,
      "step": 10230,
      "training_step_time": 0.37948083877563477
    },
    {
      "epoch": 6.2445068359375e-05,
      "model_forward_time": 0.11700201034545898,
      "step": 10231
    },
    {
      "epoch": 6.2445068359375e-05,
      "step": 10231,
      "training_step_time": 1.0544898509979248
    },
    {
      "epoch": 6.2451171875e-05,
      "model_forward_time": 0.11608052253723145,
      "step": 10232
    },
    {
      "epoch": 6.2451171875e-05,
      "step": 10232,
      "training_step_time": 0.40810585021972656
    },
    {
      "epoch": 6.2457275390625e-05,
      "model_forward_time": 0.11744284629821777,
      "step": 10233
    },
    {
      "epoch": 6.2457275390625e-05,
      "step": 10233,
      "training_step_time": 0.37577152252197266
    },
    {
      "epoch": 6.246337890625e-05,
      "model_forward_time": 0.11586427688598633,
      "step": 10234
    },
    {
      "epoch": 6.246337890625e-05,
      "step": 10234,
      "training_step_time": 0.4013547897338867
    },
    {
      "epoch": 6.2469482421875e-05,
      "model_forward_time": 0.11594867706298828,
      "step": 10235
    },
    {
      "epoch": 6.2469482421875e-05,
      "step": 10235,
      "training_step_time": 0.41123366355895996
    },
    {
      "epoch": 6.24755859375e-05,
      "model_forward_time": 0.1161050796508789,
      "step": 10236
    },
    {
      "epoch": 6.24755859375e-05,
      "step": 10236,
      "training_step_time": 0.3916943073272705
    },
    {
      "epoch": 6.2481689453125e-05,
      "model_forward_time": 0.11739206314086914,
      "step": 10237
    },
    {
      "epoch": 6.2481689453125e-05,
      "step": 10237,
      "training_step_time": 0.5989034175872803
    },
    {
      "epoch": 6.248779296875e-05,
      "model_forward_time": 0.11691856384277344,
      "step": 10238
    },
    {
      "epoch": 6.248779296875e-05,
      "step": 10238,
      "training_step_time": 0.39287304878234863
    },
    {
      "epoch": 6.2493896484375e-05,
      "model_forward_time": 0.11639952659606934,
      "step": 10239
    },
    {
      "epoch": 6.2493896484375e-05,
      "step": 10239,
      "training_step_time": 0.38538503646850586
    },
    {
      "epoch": 6.25e-05,
      "grad_norm": 0.2603590190410614,
      "learning_rate": 9.607177140244806e-05,
      "loss": 0.0601,
      "step": 10240
    },
    {
      "epoch": 6.25e-05,
      "model_forward_time": 0.11622476577758789,
      "step": 10240
    },
    {
      "epoch": 6.25e-05,
      "step": 10240,
      "training_step_time": 0.4420304298400879
    },
    {
      "epoch": 6.2506103515625e-05,
      "model_forward_time": 0.11621642112731934,
      "step": 10241
    },
    {
      "epoch": 6.2506103515625e-05,
      "step": 10241,
      "training_step_time": 0.48292088508605957
    },
    {
      "epoch": 6.251220703125e-05,
      "model_forward_time": 0.1149435043334961,
      "step": 10242
    },
    {
      "epoch": 6.251220703125e-05,
      "step": 10242,
      "training_step_time": 0.37774062156677246
    },
    {
      "epoch": 6.2518310546875e-05,
      "model_forward_time": 0.1166996955871582,
      "step": 10243
    },
    {
      "epoch": 6.2518310546875e-05,
      "step": 10243,
      "training_step_time": 1.1474547386169434
    },
    {
      "epoch": 6.25244140625e-05,
      "model_forward_time": 0.11614370346069336,
      "step": 10244
    },
    {
      "epoch": 6.25244140625e-05,
      "step": 10244,
      "training_step_time": 0.37393617630004883
    },
    {
      "epoch": 6.2530517578125e-05,
      "model_forward_time": 0.11512517929077148,
      "step": 10245
    },
    {
      "epoch": 6.2530517578125e-05,
      "step": 10245,
      "training_step_time": 0.40104174613952637
    },
    {
      "epoch": 6.253662109375e-05,
      "model_forward_time": 0.11513924598693848,
      "step": 10246
    },
    {
      "epoch": 6.253662109375e-05,
      "step": 10246,
      "training_step_time": 0.40705180168151855
    },
    {
      "epoch": 6.2542724609375e-05,
      "model_forward_time": 0.12468767166137695,
      "step": 10247
    },
    {
      "epoch": 6.2542724609375e-05,
      "step": 10247,
      "training_step_time": 0.4255406856536865
    },
    {
      "epoch": 6.2548828125e-05,
      "model_forward_time": 0.11523103713989258,
      "step": 10248
    },
    {
      "epoch": 6.2548828125e-05,
      "step": 10248,
      "training_step_time": 0.37549710273742676
    },
    {
      "epoch": 6.2554931640625e-05,
      "model_forward_time": 0.11624288558959961,
      "step": 10249
    },
    {
      "epoch": 6.2554931640625e-05,
      "step": 10249,
      "training_step_time": 0.8561367988586426
    },
    {
      "epoch": 6.256103515625e-05,
      "grad_norm": 0.27857205271720886,
      "learning_rate": 9.606105731925283e-05,
      "loss": 0.0614,
      "step": 10250
    },
    {
      "epoch": 6.256103515625e-05,
      "model_forward_time": 0.11537575721740723,
      "step": 10250
    },
    {
      "epoch": 6.256103515625e-05,
      "step": 10250,
      "training_step_time": 0.4419827461242676
    },
    {
      "epoch": 6.2567138671875e-05,
      "model_forward_time": 0.11641669273376465,
      "step": 10251
    },
    {
      "epoch": 6.2567138671875e-05,
      "step": 10251,
      "training_step_time": 0.445742130279541
    },
    {
      "epoch": 6.25732421875e-05,
      "model_forward_time": 0.11579751968383789,
      "step": 10252
    },
    {
      "epoch": 6.25732421875e-05,
      "step": 10252,
      "training_step_time": 0.45181989669799805
    },
    {
      "epoch": 6.2579345703125e-05,
      "model_forward_time": 0.11477422714233398,
      "step": 10253
    },
    {
      "epoch": 6.2579345703125e-05,
      "step": 10253,
      "training_step_time": 0.4604785442352295
    },
    {
      "epoch": 6.258544921875e-05,
      "model_forward_time": 0.12067580223083496,
      "step": 10254
    },
    {
      "epoch": 6.258544921875e-05,
      "step": 10254,
      "training_step_time": 0.3830454349517822
    },
    {
      "epoch": 6.2591552734375e-05,
      "model_forward_time": 0.11522674560546875,
      "step": 10255
    },
    {
      "epoch": 6.2591552734375e-05,
      "step": 10255,
      "training_step_time": 0.38130688667297363
    },
    {
      "epoch": 6.259765625e-05,
      "model_forward_time": 0.11719942092895508,
      "step": 10256
    },
    {
      "epoch": 6.259765625e-05,
      "step": 10256,
      "training_step_time": 0.37831878662109375
    },
    {
      "epoch": 6.2603759765625e-05,
      "model_forward_time": 0.12577390670776367,
      "step": 10257
    },
    {
      "epoch": 6.2603759765625e-05,
      "step": 10257,
      "training_step_time": 0.40373897552490234
    },
    {
      "epoch": 6.260986328125e-05,
      "model_forward_time": 0.11753511428833008,
      "step": 10258
    },
    {
      "epoch": 6.260986328125e-05,
      "step": 10258,
      "training_step_time": 0.3814866542816162
    },
    {
      "epoch": 6.2615966796875e-05,
      "model_forward_time": 0.11783671379089355,
      "step": 10259
    },
    {
      "epoch": 6.2615966796875e-05,
      "step": 10259,
      "training_step_time": 0.5578505992889404
    },
    {
      "epoch": 6.26220703125e-05,
      "grad_norm": 0.16568657755851746,
      "learning_rate": 9.605032924392457e-05,
      "loss": 0.0605,
      "step": 10260
    },
    {
      "epoch": 6.26220703125e-05,
      "model_forward_time": 0.11780357360839844,
      "step": 10260
    },
    {
      "epoch": 6.26220703125e-05,
      "step": 10260,
      "training_step_time": 0.4659590721130371
    },
    {
      "epoch": 6.2628173828125e-05,
      "model_forward_time": 0.11713862419128418,
      "step": 10261
    },
    {
      "epoch": 6.2628173828125e-05,
      "step": 10261,
      "training_step_time": 0.4828064441680908
    },
    {
      "epoch": 6.263427734375e-05,
      "model_forward_time": 0.11716055870056152,
      "step": 10262
    },
    {
      "epoch": 6.263427734375e-05,
      "step": 10262,
      "training_step_time": 0.4140617847442627
    },
    {
      "epoch": 6.2640380859375e-05,
      "model_forward_time": 0.11696529388427734,
      "step": 10263
    },
    {
      "epoch": 6.2640380859375e-05,
      "step": 10263,
      "training_step_time": 0.38092589378356934
    },
    {
      "epoch": 6.2646484375e-05,
      "model_forward_time": 0.1173696517944336,
      "step": 10264
    },
    {
      "epoch": 6.2646484375e-05,
      "step": 10264,
      "training_step_time": 0.39490771293640137
    },
    {
      "epoch": 6.2652587890625e-05,
      "model_forward_time": 0.11756777763366699,
      "step": 10265
    },
    {
      "epoch": 6.2652587890625e-05,
      "step": 10265,
      "training_step_time": 0.4957270622253418
    },
    {
      "epoch": 6.265869140625e-05,
      "model_forward_time": 0.11673784255981445,
      "step": 10266
    },
    {
      "epoch": 6.265869140625e-05,
      "step": 10266,
      "training_step_time": 0.43302226066589355
    },
    {
      "epoch": 6.2664794921875e-05,
      "model_forward_time": 0.11736440658569336,
      "step": 10267
    },
    {
      "epoch": 6.2664794921875e-05,
      "step": 10267,
      "training_step_time": 0.49759507179260254
    },
    {
      "epoch": 6.26708984375e-05,
      "model_forward_time": 0.11528396606445312,
      "step": 10268
    },
    {
      "epoch": 6.26708984375e-05,
      "step": 10268,
      "training_step_time": 0.40268540382385254
    },
    {
      "epoch": 6.2677001953125e-05,
      "model_forward_time": 0.11595296859741211,
      "step": 10269
    },
    {
      "epoch": 6.2677001953125e-05,
      "step": 10269,
      "training_step_time": 0.3973569869995117
    },
    {
      "epoch": 6.268310546875e-05,
      "grad_norm": 0.15056785941123962,
      "learning_rate": 9.603958717972214e-05,
      "loss": 0.0606,
      "step": 10270
    },
    {
      "epoch": 6.268310546875e-05,
      "model_forward_time": 0.11773967742919922,
      "step": 10270
    },
    {
      "epoch": 6.268310546875e-05,
      "step": 10270,
      "training_step_time": 0.3992648124694824
    },
    {
      "epoch": 6.2689208984375e-05,
      "model_forward_time": 0.11688709259033203,
      "step": 10271
    },
    {
      "epoch": 6.2689208984375e-05,
      "step": 10271,
      "training_step_time": 0.3924744129180908
    },
    {
      "epoch": 6.26953125e-05,
      "model_forward_time": 0.11788415908813477,
      "step": 10272
    },
    {
      "epoch": 6.26953125e-05,
      "step": 10272,
      "training_step_time": 0.5437722206115723
    },
    {
      "epoch": 6.2701416015625e-05,
      "model_forward_time": 0.11743378639221191,
      "step": 10273
    },
    {
      "epoch": 6.2701416015625e-05,
      "step": 10273,
      "training_step_time": 0.4417436122894287
    },
    {
      "epoch": 6.270751953125e-05,
      "model_forward_time": 0.11782598495483398,
      "step": 10274
    },
    {
      "epoch": 6.270751953125e-05,
      "step": 10274,
      "training_step_time": 0.392444372177124
    },
    {
      "epoch": 6.2713623046875e-05,
      "model_forward_time": 0.1169283390045166,
      "step": 10275
    },
    {
      "epoch": 6.2713623046875e-05,
      "step": 10275,
      "training_step_time": 0.4107234477996826
    },
    {
      "epoch": 6.27197265625e-05,
      "model_forward_time": 0.12011551856994629,
      "step": 10276
    },
    {
      "epoch": 6.27197265625e-05,
      "step": 10276,
      "training_step_time": 0.3863241672515869
    },
    {
      "epoch": 6.2725830078125e-05,
      "model_forward_time": 0.12102508544921875,
      "step": 10277
    },
    {
      "epoch": 6.2725830078125e-05,
      "step": 10277,
      "training_step_time": 0.397932767868042
    },
    {
      "epoch": 6.273193359375e-05,
      "model_forward_time": 0.12321281433105469,
      "step": 10278
    },
    {
      "epoch": 6.273193359375e-05,
      "step": 10278,
      "training_step_time": 0.4081685543060303
    },
    {
      "epoch": 6.2738037109375e-05,
      "model_forward_time": 0.11679863929748535,
      "step": 10279
    },
    {
      "epoch": 6.2738037109375e-05,
      "step": 10279,
      "training_step_time": 0.4510014057159424
    },
    {
      "epoch": 6.2744140625e-05,
      "grad_norm": 0.15891078114509583,
      "learning_rate": 9.602883112990875e-05,
      "loss": 0.0708,
      "step": 10280
    },
    {
      "epoch": 6.2744140625e-05,
      "model_forward_time": 0.12039971351623535,
      "step": 10280
    },
    {
      "epoch": 6.2744140625e-05,
      "step": 10280,
      "training_step_time": 0.49376797676086426
    },
    {
      "epoch": 6.2750244140625e-05,
      "model_forward_time": 0.11664414405822754,
      "step": 10281
    },
    {
      "epoch": 6.2750244140625e-05,
      "step": 10281,
      "training_step_time": 0.5151455402374268
    },
    {
      "epoch": 6.275634765625e-05,
      "model_forward_time": 0.11612415313720703,
      "step": 10282
    },
    {
      "epoch": 6.275634765625e-05,
      "step": 10282,
      "training_step_time": 0.40632128715515137
    },
    {
      "epoch": 6.2762451171875e-05,
      "model_forward_time": 0.11635065078735352,
      "step": 10283
    },
    {
      "epoch": 6.2762451171875e-05,
      "step": 10283,
      "training_step_time": 0.3836524486541748
    },
    {
      "epoch": 6.27685546875e-05,
      "model_forward_time": 0.11580896377563477,
      "step": 10284
    },
    {
      "epoch": 6.27685546875e-05,
      "step": 10284,
      "training_step_time": 0.37641286849975586
    },
    {
      "epoch": 6.2774658203125e-05,
      "model_forward_time": 0.11661338806152344,
      "step": 10285
    },
    {
      "epoch": 6.2774658203125e-05,
      "step": 10285,
      "training_step_time": 0.3889133930206299
    },
    {
      "epoch": 6.278076171875e-05,
      "model_forward_time": 0.11713910102844238,
      "step": 10286
    },
    {
      "epoch": 6.278076171875e-05,
      "step": 10286,
      "training_step_time": 0.4713139533996582
    },
    {
      "epoch": 6.2786865234375e-05,
      "model_forward_time": 0.1180565357208252,
      "step": 10287
    },
    {
      "epoch": 6.2786865234375e-05,
      "step": 10287,
      "training_step_time": 0.43100547790527344
    },
    {
      "epoch": 6.279296875e-05,
      "model_forward_time": 0.1188192367553711,
      "step": 10288
    },
    {
      "epoch": 6.279296875e-05,
      "step": 10288,
      "training_step_time": 0.37760114669799805
    },
    {
      "epoch": 6.2799072265625e-05,
      "model_forward_time": 0.11886119842529297,
      "step": 10289
    },
    {
      "epoch": 6.2799072265625e-05,
      "step": 10289,
      "training_step_time": 0.3896811008453369
    },
    {
      "epoch": 6.280517578125e-05,
      "grad_norm": 0.20895379781723022,
      "learning_rate": 9.601806109775179e-05,
      "loss": 0.0631,
      "step": 10290
    },
    {
      "epoch": 6.280517578125e-05,
      "model_forward_time": 0.11907124519348145,
      "step": 10290
    },
    {
      "epoch": 6.280517578125e-05,
      "step": 10290,
      "training_step_time": 0.38133788108825684
    },
    {
      "epoch": 6.2811279296875e-05,
      "model_forward_time": 0.11718916893005371,
      "step": 10291
    },
    {
      "epoch": 6.2811279296875e-05,
      "step": 10291,
      "training_step_time": 0.42345476150512695
    },
    {
      "epoch": 6.28173828125e-05,
      "model_forward_time": 0.11779260635375977,
      "step": 10292
    },
    {
      "epoch": 6.28173828125e-05,
      "step": 10292,
      "training_step_time": 0.4842045307159424
    },
    {
      "epoch": 6.2823486328125e-05,
      "model_forward_time": 0.12784862518310547,
      "step": 10293
    },
    {
      "epoch": 6.2823486328125e-05,
      "step": 10293,
      "training_step_time": 0.41362690925598145
    },
    {
      "epoch": 6.282958984375e-05,
      "model_forward_time": 0.11822271347045898,
      "step": 10294
    },
    {
      "epoch": 6.282958984375e-05,
      "step": 10294,
      "training_step_time": 0.42578125
    },
    {
      "epoch": 6.2835693359375e-05,
      "model_forward_time": 0.11811494827270508,
      "step": 10295
    },
    {
      "epoch": 6.2835693359375e-05,
      "step": 10295,
      "training_step_time": 0.41255950927734375
    },
    {
      "epoch": 6.2841796875e-05,
      "model_forward_time": 0.11649394035339355,
      "step": 10296
    },
    {
      "epoch": 6.2841796875e-05,
      "step": 10296,
      "training_step_time": 0.38358449935913086
    },
    {
      "epoch": 6.2847900390625e-05,
      "model_forward_time": 0.1183474063873291,
      "step": 10297
    },
    {
      "epoch": 6.2847900390625e-05,
      "step": 10297,
      "training_step_time": 0.3815755844116211
    },
    {
      "epoch": 6.285400390625e-05,
      "model_forward_time": 0.11715149879455566,
      "step": 10298
    },
    {
      "epoch": 6.285400390625e-05,
      "step": 10298,
      "training_step_time": 0.402097225189209
    },
    {
      "epoch": 6.2860107421875e-05,
      "model_forward_time": 0.11780285835266113,
      "step": 10299
    },
    {
      "epoch": 6.2860107421875e-05,
      "step": 10299,
      "training_step_time": 0.42459964752197266
    },
    {
      "epoch": 6.28662109375e-05,
      "grad_norm": 0.24253898859024048,
      "learning_rate": 9.600727708652289e-05,
      "loss": 0.0598,
      "step": 10300
    },
    {
      "epoch": 6.28662109375e-05,
      "model_forward_time": 0.11804842948913574,
      "step": 10300
    },
    {
      "epoch": 6.28662109375e-05,
      "step": 10300,
      "training_step_time": 0.45729947090148926
    },
    {
      "epoch": 6.2872314453125e-05,
      "model_forward_time": 0.11992645263671875,
      "step": 10301
    },
    {
      "epoch": 6.2872314453125e-05,
      "step": 10301,
      "training_step_time": 0.46118927001953125
    },
    {
      "epoch": 6.287841796875e-05,
      "model_forward_time": 0.12045812606811523,
      "step": 10302
    },
    {
      "epoch": 6.287841796875e-05,
      "step": 10302,
      "training_step_time": 0.6771214008331299
    },
    {
      "epoch": 6.2884521484375e-05,
      "model_forward_time": 0.12189269065856934,
      "step": 10303
    },
    {
      "epoch": 6.2884521484375e-05,
      "step": 10303,
      "training_step_time": 0.6469912528991699
    },
    {
      "epoch": 6.2890625e-05,
      "model_forward_time": 0.12072944641113281,
      "step": 10304
    },
    {
      "epoch": 6.2890625e-05,
      "step": 10304,
      "training_step_time": 0.7852177619934082
    },
    {
      "epoch": 6.2896728515625e-05,
      "model_forward_time": 0.12188720703125,
      "step": 10305
    },
    {
      "epoch": 6.2896728515625e-05,
      "step": 10305,
      "training_step_time": 0.5860378742218018
    },
    {
      "epoch": 6.290283203125e-05,
      "model_forward_time": 0.12038516998291016,
      "step": 10306
    },
    {
      "epoch": 6.290283203125e-05,
      "step": 10306,
      "training_step_time": 0.7042708396911621
    },
    {
      "epoch": 6.2908935546875e-05,
      "model_forward_time": 0.12081360816955566,
      "step": 10307
    },
    {
      "epoch": 6.2908935546875e-05,
      "step": 10307,
      "training_step_time": 0.6516149044036865
    },
    {
      "epoch": 6.29150390625e-05,
      "model_forward_time": 0.12309575080871582,
      "step": 10308
    },
    {
      "epoch": 6.29150390625e-05,
      "step": 10308,
      "training_step_time": 0.6374716758728027
    },
    {
      "epoch": 6.2921142578125e-05,
      "model_forward_time": 0.12379813194274902,
      "step": 10309
    },
    {
      "epoch": 6.2921142578125e-05,
      "step": 10309,
      "training_step_time": 0.7882027626037598
    },
    {
      "epoch": 6.292724609375e-05,
      "grad_norm": 0.19358281791210175,
      "learning_rate": 9.599647909949798e-05,
      "loss": 0.0586,
      "step": 10310
    },
    {
      "epoch": 6.292724609375e-05,
      "model_forward_time": 0.14078259468078613,
      "step": 10310
    },
    {
      "epoch": 6.292724609375e-05,
      "step": 10310,
      "training_step_time": 0.7311520576477051
    },
    {
      "epoch": 6.2933349609375e-05,
      "model_forward_time": 0.13965225219726562,
      "step": 10311
    },
    {
      "epoch": 6.2933349609375e-05,
      "step": 10311,
      "training_step_time": 0.6863281726837158
    },
    {
      "epoch": 6.2939453125e-05,
      "model_forward_time": 0.12167620658874512,
      "step": 10312
    },
    {
      "epoch": 6.2939453125e-05,
      "step": 10312,
      "training_step_time": 0.6199300289154053
    },
    {
      "epoch": 6.2945556640625e-05,
      "model_forward_time": 0.1185600757598877,
      "step": 10313
    },
    {
      "epoch": 6.2945556640625e-05,
      "step": 10313,
      "training_step_time": 0.6960434913635254
    },
    {
      "epoch": 6.295166015625e-05,
      "model_forward_time": 0.1194307804107666,
      "step": 10314
    },
    {
      "epoch": 6.295166015625e-05,
      "step": 10314,
      "training_step_time": 0.6635696887969971
    },
    {
      "epoch": 6.2957763671875e-05,
      "model_forward_time": 0.12291312217712402,
      "step": 10315
    },
    {
      "epoch": 6.2957763671875e-05,
      "step": 10315,
      "training_step_time": 0.6979565620422363
    },
    {
      "epoch": 6.29638671875e-05,
      "model_forward_time": 0.12320995330810547,
      "step": 10316
    },
    {
      "epoch": 6.29638671875e-05,
      "step": 10316,
      "training_step_time": 0.7746670246124268
    },
    {
      "epoch": 6.2969970703125e-05,
      "model_forward_time": 0.13201570510864258,
      "step": 10317
    },
    {
      "epoch": 6.2969970703125e-05,
      "step": 10317,
      "training_step_time": 0.6467175483703613
    },
    {
      "epoch": 6.297607421875e-05,
      "model_forward_time": 0.11834383010864258,
      "step": 10318
    },
    {
      "epoch": 6.297607421875e-05,
      "step": 10318,
      "training_step_time": 0.6458759307861328
    },
    {
      "epoch": 6.2982177734375e-05,
      "model_forward_time": 0.12549495697021484,
      "step": 10319
    },
    {
      "epoch": 6.2982177734375e-05,
      "step": 10319,
      "training_step_time": 0.6577119827270508
    },
    {
      "epoch": 6.298828125e-05,
      "grad_norm": 0.13216739892959595,
      "learning_rate": 9.598566713995718e-05,
      "loss": 0.0682,
      "step": 10320
    },
    {
      "epoch": 6.298828125e-05,
      "model_forward_time": 0.12313461303710938,
      "step": 10320
    },
    {
      "epoch": 6.298828125e-05,
      "step": 10320,
      "training_step_time": 0.6607627868652344
    },
    {
      "epoch": 6.2994384765625e-05,
      "model_forward_time": 0.11921358108520508,
      "step": 10321
    },
    {
      "epoch": 6.2994384765625e-05,
      "step": 10321,
      "training_step_time": 0.6912996768951416
    },
    {
      "epoch": 6.300048828125e-05,
      "model_forward_time": 0.12266111373901367,
      "step": 10322
    },
    {
      "epoch": 6.300048828125e-05,
      "step": 10322,
      "training_step_time": 0.722184419631958
    },
    {
      "epoch": 6.3006591796875e-05,
      "model_forward_time": 0.12209844589233398,
      "step": 10323
    },
    {
      "epoch": 6.3006591796875e-05,
      "step": 10323,
      "training_step_time": 0.6868624687194824
    },
    {
      "epoch": 6.30126953125e-05,
      "model_forward_time": 0.12291312217712402,
      "step": 10324
    },
    {
      "epoch": 6.30126953125e-05,
      "step": 10324,
      "training_step_time": 0.6719889640808105
    },
    {
      "epoch": 6.3018798828125e-05,
      "model_forward_time": 0.11919450759887695,
      "step": 10325
    },
    {
      "epoch": 6.3018798828125e-05,
      "step": 10325,
      "training_step_time": 0.667898416519165
    },
    {
      "epoch": 6.302490234375e-05,
      "model_forward_time": 0.12392997741699219,
      "step": 10326
    },
    {
      "epoch": 6.302490234375e-05,
      "step": 10326,
      "training_step_time": 0.6991674900054932
    },
    {
      "epoch": 6.3031005859375e-05,
      "model_forward_time": 0.12450814247131348,
      "step": 10327
    },
    {
      "epoch": 6.3031005859375e-05,
      "step": 10327,
      "training_step_time": 0.656630277633667
    },
    {
      "epoch": 6.3037109375e-05,
      "model_forward_time": 0.11986255645751953,
      "step": 10328
    },
    {
      "epoch": 6.3037109375e-05,
      "step": 10328,
      "training_step_time": 0.7078719139099121
    },
    {
      "epoch": 6.3043212890625e-05,
      "model_forward_time": 0.12089061737060547,
      "step": 10329
    },
    {
      "epoch": 6.3043212890625e-05,
      "step": 10329,
      "training_step_time": 0.7153341770172119
    },
    {
      "epoch": 6.304931640625e-05,
      "grad_norm": 0.2189309000968933,
      "learning_rate": 9.597484121118487e-05,
      "loss": 0.0678,
      "step": 10330
    },
    {
      "epoch": 6.304931640625e-05,
      "model_forward_time": 0.12069463729858398,
      "step": 10330
    },
    {
      "epoch": 6.304931640625e-05,
      "step": 10330,
      "training_step_time": 0.6310243606567383
    },
    {
      "epoch": 6.3055419921875e-05,
      "model_forward_time": 0.11909818649291992,
      "step": 10331
    },
    {
      "epoch": 6.3055419921875e-05,
      "step": 10331,
      "training_step_time": 0.652106761932373
    },
    {
      "epoch": 6.30615234375e-05,
      "model_forward_time": 0.12734246253967285,
      "step": 10332
    },
    {
      "epoch": 6.30615234375e-05,
      "step": 10332,
      "training_step_time": 0.7181458473205566
    },
    {
      "epoch": 6.3067626953125e-05,
      "model_forward_time": 0.12585878372192383,
      "step": 10333
    },
    {
      "epoch": 6.3067626953125e-05,
      "step": 10333,
      "training_step_time": 0.6647377014160156
    },
    {
      "epoch": 6.307373046875e-05,
      "model_forward_time": 0.12002897262573242,
      "step": 10334
    },
    {
      "epoch": 6.307373046875e-05,
      "step": 10334,
      "training_step_time": 0.7273983955383301
    },
    {
      "epoch": 6.3079833984375e-05,
      "model_forward_time": 0.12051844596862793,
      "step": 10335
    },
    {
      "epoch": 6.3079833984375e-05,
      "step": 10335,
      "training_step_time": 0.6727371215820312
    },
    {
      "epoch": 6.30859375e-05,
      "model_forward_time": 0.1409001350402832,
      "step": 10336
    },
    {
      "epoch": 6.30859375e-05,
      "step": 10336,
      "training_step_time": 0.6676993370056152
    },
    {
      "epoch": 6.3092041015625e-05,
      "model_forward_time": 0.12079119682312012,
      "step": 10337
    },
    {
      "epoch": 6.3092041015625e-05,
      "step": 10337,
      "training_step_time": 0.6828799247741699
    },
    {
      "epoch": 6.309814453125e-05,
      "model_forward_time": 0.12555623054504395,
      "step": 10338
    },
    {
      "epoch": 6.309814453125e-05,
      "step": 10338,
      "training_step_time": 0.6510398387908936
    },
    {
      "epoch": 6.3104248046875e-05,
      "model_forward_time": 0.125563383102417,
      "step": 10339
    },
    {
      "epoch": 6.3104248046875e-05,
      "step": 10339,
      "training_step_time": 0.6332201957702637
    },
    {
      "epoch": 6.31103515625e-05,
      "grad_norm": 0.244691401720047,
      "learning_rate": 9.596400131646972e-05,
      "loss": 0.0645,
      "step": 10340
    },
    {
      "epoch": 6.31103515625e-05,
      "model_forward_time": 0.11807680130004883,
      "step": 10340
    },
    {
      "epoch": 6.31103515625e-05,
      "step": 10340,
      "training_step_time": 0.7053549289703369
    },
    {
      "epoch": 6.3116455078125e-05,
      "model_forward_time": 0.1257917881011963,
      "step": 10341
    },
    {
      "epoch": 6.3116455078125e-05,
      "step": 10341,
      "training_step_time": 0.6805202960968018
    },
    {
      "epoch": 6.312255859375e-05,
      "model_forward_time": 0.12172245979309082,
      "step": 10342
    },
    {
      "epoch": 6.312255859375e-05,
      "step": 10342,
      "training_step_time": 0.6690759658813477
    },
    {
      "epoch": 6.3128662109375e-05,
      "model_forward_time": 0.11833381652832031,
      "step": 10343
    },
    {
      "epoch": 6.3128662109375e-05,
      "step": 10343,
      "training_step_time": 0.761646032333374
    },
    {
      "epoch": 6.3134765625e-05,
      "model_forward_time": 0.12007808685302734,
      "step": 10344
    },
    {
      "epoch": 6.3134765625e-05,
      "step": 10344,
      "training_step_time": 0.6474809646606445
    },
    {
      "epoch": 6.3140869140625e-05,
      "model_forward_time": 0.12187051773071289,
      "step": 10345
    },
    {
      "epoch": 6.3140869140625e-05,
      "step": 10345,
      "training_step_time": 0.614250659942627
    },
    {
      "epoch": 6.314697265625e-05,
      "model_forward_time": 0.1189734935760498,
      "step": 10346
    },
    {
      "epoch": 6.314697265625e-05,
      "step": 10346,
      "training_step_time": 0.6523289680480957
    },
    {
      "epoch": 6.3153076171875e-05,
      "model_forward_time": 0.12255072593688965,
      "step": 10347
    },
    {
      "epoch": 6.3153076171875e-05,
      "step": 10347,
      "training_step_time": 0.6687710285186768
    },
    {
      "epoch": 6.31591796875e-05,
      "model_forward_time": 0.12063312530517578,
      "step": 10348
    },
    {
      "epoch": 6.31591796875e-05,
      "step": 10348,
      "training_step_time": 0.6566474437713623
    },
    {
      "epoch": 6.3165283203125e-05,
      "model_forward_time": 0.11954450607299805,
      "step": 10349
    },
    {
      "epoch": 6.3165283203125e-05,
      "step": 10349,
      "training_step_time": 0.71441650390625
    },
    {
      "epoch": 6.317138671875e-05,
      "grad_norm": 0.18757113814353943,
      "learning_rate": 9.595314745910456e-05,
      "loss": 0.0686,
      "step": 10350
    },
    {
      "epoch": 6.317138671875e-05,
      "model_forward_time": 0.12485313415527344,
      "step": 10350
    },
    {
      "epoch": 6.317138671875e-05,
      "step": 10350,
      "training_step_time": 0.6878540515899658
    },
    {
      "epoch": 6.3177490234375e-05,
      "model_forward_time": 0.12033939361572266,
      "step": 10351
    },
    {
      "epoch": 6.3177490234375e-05,
      "step": 10351,
      "training_step_time": 0.6275460720062256
    },
    {
      "epoch": 6.318359375e-05,
      "model_forward_time": 0.1246485710144043,
      "step": 10352
    },
    {
      "epoch": 6.318359375e-05,
      "step": 10352,
      "training_step_time": 0.7241532802581787
    },
    {
      "epoch": 6.3189697265625e-05,
      "model_forward_time": 0.12878012657165527,
      "step": 10353
    },
    {
      "epoch": 6.3189697265625e-05,
      "step": 10353,
      "training_step_time": 0.6810736656188965
    },
    {
      "epoch": 6.319580078125e-05,
      "model_forward_time": 0.11914491653442383,
      "step": 10354
    },
    {
      "epoch": 6.319580078125e-05,
      "step": 10354,
      "training_step_time": 0.6656336784362793
    },
    {
      "epoch": 6.3201904296875e-05,
      "model_forward_time": 0.11935639381408691,
      "step": 10355
    },
    {
      "epoch": 6.3201904296875e-05,
      "step": 10355,
      "training_step_time": 0.7128360271453857
    },
    {
      "epoch": 6.32080078125e-05,
      "model_forward_time": 0.1196742057800293,
      "step": 10356
    },
    {
      "epoch": 6.32080078125e-05,
      "step": 10356,
      "training_step_time": 0.6350312232971191
    },
    {
      "epoch": 6.3214111328125e-05,
      "model_forward_time": 0.12329983711242676,
      "step": 10357
    },
    {
      "epoch": 6.3214111328125e-05,
      "step": 10357,
      "training_step_time": 0.655512809753418
    },
    {
      "epoch": 6.322021484375e-05,
      "model_forward_time": 0.12174797058105469,
      "step": 10358
    },
    {
      "epoch": 6.322021484375e-05,
      "step": 10358,
      "training_step_time": 0.6856396198272705
    },
    {
      "epoch": 6.3226318359375e-05,
      "model_forward_time": 0.1182248592376709,
      "step": 10359
    },
    {
      "epoch": 6.3226318359375e-05,
      "step": 10359,
      "training_step_time": 0.6647086143493652
    },
    {
      "epoch": 6.3232421875e-05,
      "grad_norm": 0.17837479710578918,
      "learning_rate": 9.594227964238653e-05,
      "loss": 0.0613,
      "step": 10360
    },
    {
      "epoch": 6.3232421875e-05,
      "model_forward_time": 0.11883139610290527,
      "step": 10360
    },
    {
      "epoch": 6.3232421875e-05,
      "step": 10360,
      "training_step_time": 0.6627798080444336
    },
    {
      "epoch": 6.3238525390625e-05,
      "model_forward_time": 0.12314915657043457,
      "step": 10361
    },
    {
      "epoch": 6.3238525390625e-05,
      "step": 10361,
      "training_step_time": 0.6215605735778809
    },
    {
      "epoch": 6.324462890625e-05,
      "model_forward_time": 0.12012481689453125,
      "step": 10362
    },
    {
      "epoch": 6.324462890625e-05,
      "step": 10362,
      "training_step_time": 0.6298582553863525
    },
    {
      "epoch": 6.3250732421875e-05,
      "model_forward_time": 0.12289261817932129,
      "step": 10363
    },
    {
      "epoch": 6.3250732421875e-05,
      "step": 10363,
      "training_step_time": 0.6565485000610352
    },
    {
      "epoch": 6.32568359375e-05,
      "model_forward_time": 0.12241435050964355,
      "step": 10364
    },
    {
      "epoch": 6.32568359375e-05,
      "step": 10364,
      "training_step_time": 0.6048214435577393
    },
    {
      "epoch": 6.3262939453125e-05,
      "model_forward_time": 0.12209773063659668,
      "step": 10365
    },
    {
      "epoch": 6.3262939453125e-05,
      "step": 10365,
      "training_step_time": 0.6701722145080566
    },
    {
      "epoch": 6.326904296875e-05,
      "model_forward_time": 0.1324176788330078,
      "step": 10366
    },
    {
      "epoch": 6.326904296875e-05,
      "step": 10366,
      "training_step_time": 0.5597002506256104
    },
    {
      "epoch": 6.3275146484375e-05,
      "model_forward_time": 0.12417006492614746,
      "step": 10367
    },
    {
      "epoch": 6.3275146484375e-05,
      "step": 10367,
      "training_step_time": 0.6701607704162598
    },
    {
      "epoch": 6.328125e-05,
      "model_forward_time": 0.1260678768157959,
      "step": 10368
    },
    {
      "epoch": 6.328125e-05,
      "step": 10368,
      "training_step_time": 0.6202645301818848
    },
    {
      "epoch": 6.3287353515625e-05,
      "model_forward_time": 0.12179231643676758,
      "step": 10369
    },
    {
      "epoch": 6.3287353515625e-05,
      "step": 10369,
      "training_step_time": 0.6361217498779297
    },
    {
      "epoch": 6.329345703125e-05,
      "grad_norm": 0.21265950798988342,
      "learning_rate": 9.593139786961697e-05,
      "loss": 0.0723,
      "step": 10370
    },
    {
      "epoch": 6.329345703125e-05,
      "model_forward_time": 0.12140846252441406,
      "step": 10370
    },
    {
      "epoch": 6.329345703125e-05,
      "step": 10370,
      "training_step_time": 0.6010909080505371
    },
    {
      "epoch": 6.3299560546875e-05,
      "model_forward_time": 0.11957311630249023,
      "step": 10371
    },
    {
      "epoch": 6.3299560546875e-05,
      "step": 10371,
      "training_step_time": 0.52825927734375
    },
    {
      "epoch": 6.33056640625e-05,
      "model_forward_time": 0.12125205993652344,
      "step": 10372
    },
    {
      "epoch": 6.33056640625e-05,
      "step": 10372,
      "training_step_time": 0.4339637756347656
    },
    {
      "epoch": 6.3311767578125e-05,
      "model_forward_time": 0.12150287628173828,
      "step": 10373
    },
    {
      "epoch": 6.3311767578125e-05,
      "step": 10373,
      "training_step_time": 0.5230555534362793
    },
    {
      "epoch": 6.331787109375e-05,
      "model_forward_time": 0.11881899833679199,
      "step": 10374
    },
    {
      "epoch": 6.331787109375e-05,
      "step": 10374,
      "training_step_time": 0.4708576202392578
    },
    {
      "epoch": 6.3323974609375e-05,
      "model_forward_time": 0.11891436576843262,
      "step": 10375
    },
    {
      "epoch": 6.3323974609375e-05,
      "step": 10375,
      "training_step_time": 0.42183613777160645
    },
    {
      "epoch": 6.3330078125e-05,
      "model_forward_time": 0.11652731895446777,
      "step": 10376
    },
    {
      "epoch": 6.3330078125e-05,
      "step": 10376,
      "training_step_time": 0.5192155838012695
    },
    {
      "epoch": 6.3336181640625e-05,
      "model_forward_time": 0.1168367862701416,
      "step": 10377
    },
    {
      "epoch": 6.3336181640625e-05,
      "step": 10377,
      "training_step_time": 0.426318883895874
    },
    {
      "epoch": 6.334228515625e-05,
      "model_forward_time": 0.11798763275146484,
      "step": 10378
    },
    {
      "epoch": 6.334228515625e-05,
      "step": 10378,
      "training_step_time": 0.4819817543029785
    },
    {
      "epoch": 6.3348388671875e-05,
      "model_forward_time": 0.11833715438842773,
      "step": 10379
    },
    {
      "epoch": 6.3348388671875e-05,
      "step": 10379,
      "training_step_time": 0.38005566596984863
    },
    {
      "epoch": 6.33544921875e-05,
      "grad_norm": 0.24660873413085938,
      "learning_rate": 9.59205021441015e-05,
      "loss": 0.0687,
      "step": 10380
    },
    {
      "epoch": 6.33544921875e-05,
      "model_forward_time": 0.11729550361633301,
      "step": 10380
    },
    {
      "epoch": 6.33544921875e-05,
      "step": 10380,
      "training_step_time": 0.46441006660461426
    },
    {
      "epoch": 6.3360595703125e-05,
      "model_forward_time": 0.11855244636535645,
      "step": 10381
    },
    {
      "epoch": 6.3360595703125e-05,
      "step": 10381,
      "training_step_time": 0.4240896701812744
    },
    {
      "epoch": 6.336669921875e-05,
      "model_forward_time": 0.11619400978088379,
      "step": 10382
    },
    {
      "epoch": 6.336669921875e-05,
      "step": 10382,
      "training_step_time": 0.49201083183288574
    },
    {
      "epoch": 6.3372802734375e-05,
      "model_forward_time": 0.1168668270111084,
      "step": 10383
    },
    {
      "epoch": 6.3372802734375e-05,
      "step": 10383,
      "training_step_time": 0.4334256649017334
    },
    {
      "epoch": 6.337890625e-05,
      "model_forward_time": 0.11717009544372559,
      "step": 10384
    },
    {
      "epoch": 6.337890625e-05,
      "step": 10384,
      "training_step_time": 0.39009737968444824
    },
    {
      "epoch": 6.3385009765625e-05,
      "model_forward_time": 0.12298774719238281,
      "step": 10385
    },
    {
      "epoch": 6.3385009765625e-05,
      "step": 10385,
      "training_step_time": 0.39173293113708496
    },
    {
      "epoch": 6.339111328125e-05,
      "model_forward_time": 0.11624836921691895,
      "step": 10386
    },
    {
      "epoch": 6.339111328125e-05,
      "step": 10386,
      "training_step_time": 0.3815338611602783
    },
    {
      "epoch": 6.3397216796875e-05,
      "model_forward_time": 0.1168055534362793,
      "step": 10387
    },
    {
      "epoch": 6.3397216796875e-05,
      "step": 10387,
      "training_step_time": 0.4919905662536621
    },
    {
      "epoch": 6.34033203125e-05,
      "model_forward_time": 0.1167290210723877,
      "step": 10388
    },
    {
      "epoch": 6.34033203125e-05,
      "step": 10388,
      "training_step_time": 0.4563024044036865
    },
    {
      "epoch": 6.3409423828125e-05,
      "model_forward_time": 0.11719655990600586,
      "step": 10389
    },
    {
      "epoch": 6.3409423828125e-05,
      "step": 10389,
      "training_step_time": 0.4952249526977539
    },
    {
      "epoch": 6.341552734375e-05,
      "grad_norm": 0.31322938203811646,
      "learning_rate": 9.590959246914995e-05,
      "loss": 0.0663,
      "step": 10390
    },
    {
      "epoch": 6.341552734375e-05,
      "model_forward_time": 0.11661410331726074,
      "step": 10390
    },
    {
      "epoch": 6.341552734375e-05,
      "step": 10390,
      "training_step_time": 0.4036600589752197
    },
    {
      "epoch": 6.3421630859375e-05,
      "model_forward_time": 0.11837363243103027,
      "step": 10391
    },
    {
      "epoch": 6.3421630859375e-05,
      "step": 10391,
      "training_step_time": 0.38546252250671387
    },
    {
      "epoch": 6.3427734375e-05,
      "model_forward_time": 0.11852669715881348,
      "step": 10392
    },
    {
      "epoch": 6.3427734375e-05,
      "step": 10392,
      "training_step_time": 0.4361703395843506
    },
    {
      "epoch": 6.3433837890625e-05,
      "model_forward_time": 0.11655664443969727,
      "step": 10393
    },
    {
      "epoch": 6.3433837890625e-05,
      "step": 10393,
      "training_step_time": 0.3911588191986084
    },
    {
      "epoch": 6.343994140625e-05,
      "model_forward_time": 0.11708307266235352,
      "step": 10394
    },
    {
      "epoch": 6.343994140625e-05,
      "step": 10394,
      "training_step_time": 0.40020251274108887
    },
    {
      "epoch": 6.3446044921875e-05,
      "model_forward_time": 0.11638164520263672,
      "step": 10395
    },
    {
      "epoch": 6.3446044921875e-05,
      "step": 10395,
      "training_step_time": 0.39399218559265137
    },
    {
      "epoch": 6.34521484375e-05,
      "model_forward_time": 0.11767363548278809,
      "step": 10396
    },
    {
      "epoch": 6.34521484375e-05,
      "step": 10396,
      "training_step_time": 0.4626274108886719
    },
    {
      "epoch": 6.3458251953125e-05,
      "model_forward_time": 0.11670470237731934,
      "step": 10397
    },
    {
      "epoch": 6.3458251953125e-05,
      "step": 10397,
      "training_step_time": 0.4080026149749756
    },
    {
      "epoch": 6.346435546875e-05,
      "model_forward_time": 0.11825966835021973,
      "step": 10398
    },
    {
      "epoch": 6.346435546875e-05,
      "step": 10398,
      "training_step_time": 0.37834787368774414
    },
    {
      "epoch": 6.3470458984375e-05,
      "model_forward_time": 0.11827445030212402,
      "step": 10399
    },
    {
      "epoch": 6.3470458984375e-05,
      "step": 10399,
      "training_step_time": 0.39496707916259766
    },
    {
      "epoch": 6.34765625e-05,
      "grad_norm": 0.26482635736465454,
      "learning_rate": 9.589866884807635e-05,
      "loss": 0.0704,
      "step": 10400
    },
    {
      "epoch": 6.34765625e-05,
      "model_forward_time": 0.11709308624267578,
      "step": 10400
    },
    {
      "epoch": 6.34765625e-05,
      "step": 10400,
      "training_step_time": 0.39552783966064453
    },
    {
      "epoch": 6.3482666015625e-05,
      "model_forward_time": 0.11655306816101074,
      "step": 10401
    },
    {
      "epoch": 6.3482666015625e-05,
      "step": 10401,
      "training_step_time": 0.37109875679016113
    },
    {
      "epoch": 6.348876953125e-05,
      "model_forward_time": 0.11736655235290527,
      "step": 10402
    },
    {
      "epoch": 6.348876953125e-05,
      "step": 10402,
      "training_step_time": 0.4266519546508789
    },
    {
      "epoch": 6.3494873046875e-05,
      "model_forward_time": 0.11722707748413086,
      "step": 10403
    },
    {
      "epoch": 6.3494873046875e-05,
      "step": 10403,
      "training_step_time": 0.4312474727630615
    },
    {
      "epoch": 6.35009765625e-05,
      "model_forward_time": 0.1191558837890625,
      "step": 10404
    },
    {
      "epoch": 6.35009765625e-05,
      "step": 10404,
      "training_step_time": 0.37689638137817383
    },
    {
      "epoch": 6.3507080078125e-05,
      "model_forward_time": 0.11638474464416504,
      "step": 10405
    },
    {
      "epoch": 6.3507080078125e-05,
      "step": 10405,
      "training_step_time": 0.3925316333770752
    },
    {
      "epoch": 6.351318359375e-05,
      "model_forward_time": 0.11847043037414551,
      "step": 10406
    },
    {
      "epoch": 6.351318359375e-05,
      "step": 10406,
      "training_step_time": 0.3860018253326416
    },
    {
      "epoch": 6.3519287109375e-05,
      "model_forward_time": 0.11943793296813965,
      "step": 10407
    },
    {
      "epoch": 6.3519287109375e-05,
      "step": 10407,
      "training_step_time": 0.38590455055236816
    },
    {
      "epoch": 6.3525390625e-05,
      "model_forward_time": 0.11705327033996582,
      "step": 10408
    },
    {
      "epoch": 6.3525390625e-05,
      "step": 10408,
      "training_step_time": 0.3856675624847412
    },
    {
      "epoch": 6.3531494140625e-05,
      "model_forward_time": 0.11753582954406738,
      "step": 10409
    },
    {
      "epoch": 6.3531494140625e-05,
      "step": 10409,
      "training_step_time": 0.43675804138183594
    },
    {
      "epoch": 6.353759765625e-05,
      "grad_norm": 0.21254251897335052,
      "learning_rate": 9.588773128419906e-05,
      "loss": 0.0652,
      "step": 10410
    },
    {
      "epoch": 6.353759765625e-05,
      "model_forward_time": 0.11712908744812012,
      "step": 10410
    },
    {
      "epoch": 6.353759765625e-05,
      "step": 10410,
      "training_step_time": 0.4028596878051758
    },
    {
      "epoch": 6.3543701171875e-05,
      "model_forward_time": 0.11712431907653809,
      "step": 10411
    },
    {
      "epoch": 6.3543701171875e-05,
      "step": 10411,
      "training_step_time": 0.492462158203125
    },
    {
      "epoch": 6.35498046875e-05,
      "model_forward_time": 0.11642122268676758,
      "step": 10412
    },
    {
      "epoch": 6.35498046875e-05,
      "step": 10412,
      "training_step_time": 0.39468932151794434
    },
    {
      "epoch": 6.3555908203125e-05,
      "model_forward_time": 0.1169884204864502,
      "step": 10413
    },
    {
      "epoch": 6.3555908203125e-05,
      "step": 10413,
      "training_step_time": 0.38980960845947266
    },
    {
      "epoch": 6.356201171875e-05,
      "model_forward_time": 0.11696028709411621,
      "step": 10414
    },
    {
      "epoch": 6.356201171875e-05,
      "step": 10414,
      "training_step_time": 0.39023566246032715
    },
    {
      "epoch": 6.3568115234375e-05,
      "model_forward_time": 0.11708378791809082,
      "step": 10415
    },
    {
      "epoch": 6.3568115234375e-05,
      "step": 10415,
      "training_step_time": 0.38172483444213867
    },
    {
      "epoch": 6.357421875e-05,
      "model_forward_time": 0.12706589698791504,
      "step": 10416
    },
    {
      "epoch": 6.357421875e-05,
      "step": 10416,
      "training_step_time": 0.8194355964660645
    },
    {
      "epoch": 6.3580322265625e-05,
      "model_forward_time": 0.1156163215637207,
      "step": 10417
    },
    {
      "epoch": 6.3580322265625e-05,
      "step": 10417,
      "training_step_time": 0.3869743347167969
    },
    {
      "epoch": 6.358642578125e-05,
      "model_forward_time": 0.11632156372070312,
      "step": 10418
    },
    {
      "epoch": 6.358642578125e-05,
      "step": 10418,
      "training_step_time": 0.41806745529174805
    },
    {
      "epoch": 6.3592529296875e-05,
      "model_forward_time": 0.12213921546936035,
      "step": 10419
    },
    {
      "epoch": 6.3592529296875e-05,
      "step": 10419,
      "training_step_time": 0.45799994468688965
    },
    {
      "epoch": 6.35986328125e-05,
      "grad_norm": 0.15878483653068542,
      "learning_rate": 9.58767797808406e-05,
      "loss": 0.0661,
      "step": 10420
    },
    {
      "epoch": 6.35986328125e-05,
      "model_forward_time": 0.11563491821289062,
      "step": 10420
    },
    {
      "epoch": 6.35986328125e-05,
      "step": 10420,
      "training_step_time": 0.41625475883483887
    },
    {
      "epoch": 6.3604736328125e-05,
      "model_forward_time": 0.11592364311218262,
      "step": 10421
    },
    {
      "epoch": 6.3604736328125e-05,
      "step": 10421,
      "training_step_time": 0.3893544673919678
    },
    {
      "epoch": 6.361083984375e-05,
      "model_forward_time": 0.11554574966430664,
      "step": 10422
    },
    {
      "epoch": 6.361083984375e-05,
      "step": 10422,
      "training_step_time": 0.5431849956512451
    },
    {
      "epoch": 6.3616943359375e-05,
      "model_forward_time": 0.11643409729003906,
      "step": 10423
    },
    {
      "epoch": 6.3616943359375e-05,
      "step": 10423,
      "training_step_time": 0.43246030807495117
    },
    {
      "epoch": 6.3623046875e-05,
      "model_forward_time": 0.11613154411315918,
      "step": 10424
    },
    {
      "epoch": 6.3623046875e-05,
      "step": 10424,
      "training_step_time": 0.40685415267944336
    },
    {
      "epoch": 6.3629150390625e-05,
      "model_forward_time": 0.11601734161376953,
      "step": 10425
    },
    {
      "epoch": 6.3629150390625e-05,
      "step": 10425,
      "training_step_time": 0.4142265319824219
    },
    {
      "epoch": 6.363525390625e-05,
      "model_forward_time": 0.11610913276672363,
      "step": 10426
    },
    {
      "epoch": 6.363525390625e-05,
      "step": 10426,
      "training_step_time": 0.3872518539428711
    },
    {
      "epoch": 6.3641357421875e-05,
      "model_forward_time": 0.1159815788269043,
      "step": 10427
    },
    {
      "epoch": 6.3641357421875e-05,
      "step": 10427,
      "training_step_time": 0.3842146396636963
    },
    {
      "epoch": 6.36474609375e-05,
      "model_forward_time": 0.11665129661560059,
      "step": 10428
    },
    {
      "epoch": 6.36474609375e-05,
      "step": 10428,
      "training_step_time": 0.7790791988372803
    },
    {
      "epoch": 6.3653564453125e-05,
      "model_forward_time": 0.11542773246765137,
      "step": 10429
    },
    {
      "epoch": 6.3653564453125e-05,
      "step": 10429,
      "training_step_time": 0.4393942356109619
    },
    {
      "epoch": 6.365966796875e-05,
      "grad_norm": 0.20386353135108948,
      "learning_rate": 9.586581434132775e-05,
      "loss": 0.0634,
      "step": 10430
    },
    {
      "epoch": 6.365966796875e-05,
      "model_forward_time": 0.1156623363494873,
      "step": 10430
    },
    {
      "epoch": 6.365966796875e-05,
      "step": 10430,
      "training_step_time": 0.42989373207092285
    },
    {
      "epoch": 6.3665771484375e-05,
      "model_forward_time": 0.11536192893981934,
      "step": 10431
    },
    {
      "epoch": 6.3665771484375e-05,
      "step": 10431,
      "training_step_time": 0.3929269313812256
    },
    {
      "epoch": 6.3671875e-05,
      "model_forward_time": 0.11611413955688477,
      "step": 10432
    },
    {
      "epoch": 6.3671875e-05,
      "step": 10432,
      "training_step_time": 0.4529731273651123
    },
    {
      "epoch": 6.3677978515625e-05,
      "model_forward_time": 0.11612153053283691,
      "step": 10433
    },
    {
      "epoch": 6.3677978515625e-05,
      "step": 10433,
      "training_step_time": 0.41841864585876465
    },
    {
      "epoch": 6.368408203125e-05,
      "model_forward_time": 0.11674714088439941,
      "step": 10434
    },
    {
      "epoch": 6.368408203125e-05,
      "step": 10434,
      "training_step_time": 0.5006639957427979
    },
    {
      "epoch": 6.3690185546875e-05,
      "model_forward_time": 0.11717844009399414,
      "step": 10435
    },
    {
      "epoch": 6.3690185546875e-05,
      "step": 10435,
      "training_step_time": 0.4101994037628174
    },
    {
      "epoch": 6.36962890625e-05,
      "model_forward_time": 0.11648178100585938,
      "step": 10436
    },
    {
      "epoch": 6.36962890625e-05,
      "step": 10436,
      "training_step_time": 0.42767834663391113
    },
    {
      "epoch": 6.3702392578125e-05,
      "model_forward_time": 0.11662435531616211,
      "step": 10437
    },
    {
      "epoch": 6.3702392578125e-05,
      "step": 10437,
      "training_step_time": 0.37828922271728516
    },
    {
      "epoch": 6.370849609375e-05,
      "model_forward_time": 0.1175389289855957,
      "step": 10438
    },
    {
      "epoch": 6.370849609375e-05,
      "step": 10438,
      "training_step_time": 0.43982982635498047
    },
    {
      "epoch": 6.3714599609375e-05,
      "model_forward_time": 0.11677789688110352,
      "step": 10439
    },
    {
      "epoch": 6.3714599609375e-05,
      "step": 10439,
      "training_step_time": 0.38303375244140625
    },
    {
      "epoch": 6.3720703125e-05,
      "grad_norm": 0.15271510183811188,
      "learning_rate": 9.58548349689915e-05,
      "loss": 0.0694,
      "step": 10440
    },
    {
      "epoch": 6.3720703125e-05,
      "model_forward_time": 0.11809206008911133,
      "step": 10440
    },
    {
      "epoch": 6.3720703125e-05,
      "step": 10440,
      "training_step_time": 0.820178747177124
    },
    {
      "epoch": 6.3726806640625e-05,
      "model_forward_time": 0.11667442321777344,
      "step": 10441
    },
    {
      "epoch": 6.3726806640625e-05,
      "step": 10441,
      "training_step_time": 0.3852574825286865
    },
    {
      "epoch": 6.373291015625e-05,
      "model_forward_time": 0.11634683609008789,
      "step": 10442
    },
    {
      "epoch": 6.373291015625e-05,
      "step": 10442,
      "training_step_time": 0.44188928604125977
    },
    {
      "epoch": 6.3739013671875e-05,
      "model_forward_time": 0.11981868743896484,
      "step": 10443
    },
    {
      "epoch": 6.3739013671875e-05,
      "step": 10443,
      "training_step_time": 0.468625545501709
    },
    {
      "epoch": 6.37451171875e-05,
      "model_forward_time": 0.11644697189331055,
      "step": 10444
    },
    {
      "epoch": 6.37451171875e-05,
      "step": 10444,
      "training_step_time": 0.48506689071655273
    },
    {
      "epoch": 6.3751220703125e-05,
      "model_forward_time": 0.11587405204772949,
      "step": 10445
    },
    {
      "epoch": 6.3751220703125e-05,
      "step": 10445,
      "training_step_time": 0.4420759677886963
    },
    {
      "epoch": 6.375732421875e-05,
      "model_forward_time": 0.11608386039733887,
      "step": 10446
    },
    {
      "epoch": 6.375732421875e-05,
      "step": 10446,
      "training_step_time": 0.5732924938201904
    },
    {
      "epoch": 6.3763427734375e-05,
      "model_forward_time": 0.11676454544067383,
      "step": 10447
    },
    {
      "epoch": 6.3763427734375e-05,
      "step": 10447,
      "training_step_time": 0.4129142761230469
    },
    {
      "epoch": 6.376953125e-05,
      "model_forward_time": 0.11519026756286621,
      "step": 10448
    },
    {
      "epoch": 6.376953125e-05,
      "step": 10448,
      "training_step_time": 0.3963344097137451
    },
    {
      "epoch": 6.3775634765625e-05,
      "model_forward_time": 0.11611557006835938,
      "step": 10449
    },
    {
      "epoch": 6.3775634765625e-05,
      "step": 10449,
      "training_step_time": 0.38646626472473145
    },
    {
      "epoch": 6.378173828125e-05,
      "grad_norm": 0.31565552949905396,
      "learning_rate": 9.584384166716714e-05,
      "loss": 0.0672,
      "step": 10450
    },
    {
      "epoch": 6.378173828125e-05,
      "model_forward_time": 0.11745405197143555,
      "step": 10450
    },
    {
      "epoch": 6.378173828125e-05,
      "step": 10450,
      "training_step_time": 0.39530420303344727
    },
    {
      "epoch": 6.3787841796875e-05,
      "model_forward_time": 0.11764407157897949,
      "step": 10451
    },
    {
      "epoch": 6.3787841796875e-05,
      "step": 10451,
      "training_step_time": 0.3887631893157959
    },
    {
      "epoch": 6.37939453125e-05,
      "model_forward_time": 0.11850452423095703,
      "step": 10452
    },
    {
      "epoch": 6.37939453125e-05,
      "step": 10452,
      "training_step_time": 0.7034785747528076
    },
    {
      "epoch": 6.3800048828125e-05,
      "model_forward_time": 0.11669135093688965,
      "step": 10453
    },
    {
      "epoch": 6.3800048828125e-05,
      "step": 10453,
      "training_step_time": 0.3833937644958496
    },
    {
      "epoch": 6.380615234375e-05,
      "model_forward_time": 0.1164388656616211,
      "step": 10454
    },
    {
      "epoch": 6.380615234375e-05,
      "step": 10454,
      "training_step_time": 0.378371000289917
    },
    {
      "epoch": 6.3812255859375e-05,
      "model_forward_time": 0.11656665802001953,
      "step": 10455
    },
    {
      "epoch": 6.3812255859375e-05,
      "step": 10455,
      "training_step_time": 0.3729383945465088
    },
    {
      "epoch": 6.3818359375e-05,
      "model_forward_time": 0.11610531806945801,
      "step": 10456
    },
    {
      "epoch": 6.3818359375e-05,
      "step": 10456,
      "training_step_time": 0.40706658363342285
    },
    {
      "epoch": 6.3824462890625e-05,
      "model_forward_time": 0.11548972129821777,
      "step": 10457
    },
    {
      "epoch": 6.3824462890625e-05,
      "step": 10457,
      "training_step_time": 0.46471285820007324
    },
    {
      "epoch": 6.383056640625e-05,
      "model_forward_time": 0.11625242233276367,
      "step": 10458
    },
    {
      "epoch": 6.383056640625e-05,
      "step": 10458,
      "training_step_time": 0.5898408889770508
    },
    {
      "epoch": 6.3836669921875e-05,
      "model_forward_time": 0.11632347106933594,
      "step": 10459
    },
    {
      "epoch": 6.3836669921875e-05,
      "step": 10459,
      "training_step_time": 0.44066572189331055
    },
    {
      "epoch": 6.38427734375e-05,
      "grad_norm": 0.2629700005054474,
      "learning_rate": 9.583283443919409e-05,
      "loss": 0.063,
      "step": 10460
    },
    {
      "epoch": 6.38427734375e-05,
      "model_forward_time": 0.11563277244567871,
      "step": 10460
    },
    {
      "epoch": 6.38427734375e-05,
      "step": 10460,
      "training_step_time": 0.406951904296875
    },
    {
      "epoch": 6.3848876953125e-05,
      "model_forward_time": 0.11548423767089844,
      "step": 10461
    },
    {
      "epoch": 6.3848876953125e-05,
      "step": 10461,
      "training_step_time": 0.409287691116333
    },
    {
      "epoch": 6.385498046875e-05,
      "model_forward_time": 0.11553502082824707,
      "step": 10462
    },
    {
      "epoch": 6.385498046875e-05,
      "step": 10462,
      "training_step_time": 0.40102481842041016
    },
    {
      "epoch": 6.3861083984375e-05,
      "model_forward_time": 0.11671233177185059,
      "step": 10463
    },
    {
      "epoch": 6.3861083984375e-05,
      "step": 10463,
      "training_step_time": 0.48090529441833496
    },
    {
      "epoch": 6.38671875e-05,
      "model_forward_time": 0.11640143394470215,
      "step": 10464
    },
    {
      "epoch": 6.38671875e-05,
      "step": 10464,
      "training_step_time": 0.5196452140808105
    },
    {
      "epoch": 6.3873291015625e-05,
      "model_forward_time": 0.11656069755554199,
      "step": 10465
    },
    {
      "epoch": 6.3873291015625e-05,
      "step": 10465,
      "training_step_time": 0.395003080368042
    },
    {
      "epoch": 6.387939453125e-05,
      "model_forward_time": 0.11650991439819336,
      "step": 10466
    },
    {
      "epoch": 6.387939453125e-05,
      "step": 10466,
      "training_step_time": 0.39865541458129883
    },
    {
      "epoch": 6.3885498046875e-05,
      "model_forward_time": 0.11640238761901855,
      "step": 10467
    },
    {
      "epoch": 6.3885498046875e-05,
      "step": 10467,
      "training_step_time": 0.39002203941345215
    },
    {
      "epoch": 6.38916015625e-05,
      "model_forward_time": 0.11706805229187012,
      "step": 10468
    },
    {
      "epoch": 6.38916015625e-05,
      "step": 10468,
      "training_step_time": 0.3850569725036621
    },
    {
      "epoch": 6.3897705078125e-05,
      "model_forward_time": 0.11752152442932129,
      "step": 10469
    },
    {
      "epoch": 6.3897705078125e-05,
      "step": 10469,
      "training_step_time": 0.3868889808654785
    },
    {
      "epoch": 6.390380859375e-05,
      "grad_norm": 0.24007153511047363,
      "learning_rate": 9.582181328841611e-05,
      "loss": 0.0648,
      "step": 10470
    },
    {
      "epoch": 6.390380859375e-05,
      "model_forward_time": 0.11780881881713867,
      "step": 10470
    },
    {
      "epoch": 6.390380859375e-05,
      "step": 10470,
      "training_step_time": 0.7401328086853027
    },
    {
      "epoch": 6.3909912109375e-05,
      "model_forward_time": 0.11623430252075195,
      "step": 10471
    },
    {
      "epoch": 6.3909912109375e-05,
      "step": 10471,
      "training_step_time": 0.4013187885284424
    },
    {
      "epoch": 6.3916015625e-05,
      "model_forward_time": 0.1157233715057373,
      "step": 10472
    },
    {
      "epoch": 6.3916015625e-05,
      "step": 10472,
      "training_step_time": 0.4641275405883789
    },
    {
      "epoch": 6.3922119140625e-05,
      "model_forward_time": 0.11606073379516602,
      "step": 10473
    },
    {
      "epoch": 6.3922119140625e-05,
      "step": 10473,
      "training_step_time": 0.432680606842041
    },
    {
      "epoch": 6.392822265625e-05,
      "model_forward_time": 0.11777329444885254,
      "step": 10474
    },
    {
      "epoch": 6.392822265625e-05,
      "step": 10474,
      "training_step_time": 0.38303303718566895
    },
    {
      "epoch": 6.3934326171875e-05,
      "model_forward_time": 0.11532306671142578,
      "step": 10475
    },
    {
      "epoch": 6.3934326171875e-05,
      "step": 10475,
      "training_step_time": 0.39620137214660645
    },
    {
      "epoch": 6.39404296875e-05,
      "model_forward_time": 0.11677050590515137,
      "step": 10476
    },
    {
      "epoch": 6.39404296875e-05,
      "step": 10476,
      "training_step_time": 0.7303962707519531
    },
    {
      "epoch": 6.3946533203125e-05,
      "model_forward_time": 0.11543869972229004,
      "step": 10477
    },
    {
      "epoch": 6.3946533203125e-05,
      "step": 10477,
      "training_step_time": 0.42762327194213867
    },
    {
      "epoch": 6.395263671875e-05,
      "model_forward_time": 0.11579608917236328,
      "step": 10478
    },
    {
      "epoch": 6.395263671875e-05,
      "step": 10478,
      "training_step_time": 0.4758436679840088
    },
    {
      "epoch": 6.3958740234375e-05,
      "model_forward_time": 0.11636877059936523,
      "step": 10479
    },
    {
      "epoch": 6.3958740234375e-05,
      "step": 10479,
      "training_step_time": 0.3894028663635254
    },
    {
      "epoch": 6.396484375e-05,
      "grad_norm": 0.14709483087062836,
      "learning_rate": 9.581077821818109e-05,
      "loss": 0.064,
      "step": 10480
    },
    {
      "epoch": 6.396484375e-05,
      "model_forward_time": 0.11555242538452148,
      "step": 10480
    },
    {
      "epoch": 6.396484375e-05,
      "step": 10480,
      "training_step_time": 0.4037902355194092
    },
    {
      "epoch": 6.3970947265625e-05,
      "model_forward_time": 0.11600017547607422,
      "step": 10481
    },
    {
      "epoch": 6.3970947265625e-05,
      "step": 10481,
      "training_step_time": 0.3965904712677002
    },
    {
      "epoch": 6.397705078125e-05,
      "model_forward_time": 0.11609220504760742,
      "step": 10482
    },
    {
      "epoch": 6.397705078125e-05,
      "step": 10482,
      "training_step_time": 0.4937868118286133
    },
    {
      "epoch": 6.3983154296875e-05,
      "model_forward_time": 0.11651992797851562,
      "step": 10483
    },
    {
      "epoch": 6.3983154296875e-05,
      "step": 10483,
      "training_step_time": 0.42707371711730957
    },
    {
      "epoch": 6.39892578125e-05,
      "model_forward_time": 0.11631631851196289,
      "step": 10484
    },
    {
      "epoch": 6.39892578125e-05,
      "step": 10484,
      "training_step_time": 0.4573986530303955
    },
    {
      "epoch": 6.3995361328125e-05,
      "model_forward_time": 0.11868548393249512,
      "step": 10485
    },
    {
      "epoch": 6.3995361328125e-05,
      "step": 10485,
      "training_step_time": 0.4799227714538574
    },
    {
      "epoch": 6.400146484375e-05,
      "model_forward_time": 0.11631226539611816,
      "step": 10486
    },
    {
      "epoch": 6.400146484375e-05,
      "step": 10486,
      "training_step_time": 0.44125795364379883
    },
    {
      "epoch": 6.4007568359375e-05,
      "model_forward_time": 0.1161813735961914,
      "step": 10487
    },
    {
      "epoch": 6.4007568359375e-05,
      "step": 10487,
      "training_step_time": 0.38904809951782227
    },
    {
      "epoch": 6.4013671875e-05,
      "model_forward_time": 0.11552739143371582,
      "step": 10488
    },
    {
      "epoch": 6.4013671875e-05,
      "step": 10488,
      "training_step_time": 1.013477087020874
    },
    {
      "epoch": 6.4019775390625e-05,
      "model_forward_time": 0.11590576171875,
      "step": 10489
    },
    {
      "epoch": 6.4019775390625e-05,
      "step": 10489,
      "training_step_time": 0.3929438591003418
    },
    {
      "epoch": 6.402587890625e-05,
      "grad_norm": 0.17932185530662537,
      "learning_rate": 9.579972923184122e-05,
      "loss": 0.0621,
      "step": 10490
    },
    {
      "epoch": 6.402587890625e-05,
      "model_forward_time": 0.11505842208862305,
      "step": 10490
    },
    {
      "epoch": 6.402587890625e-05,
      "step": 10490,
      "training_step_time": 0.3914675712585449
    },
    {
      "epoch": 6.4031982421875e-05,
      "model_forward_time": 0.11483502388000488,
      "step": 10491
    },
    {
      "epoch": 6.4031982421875e-05,
      "step": 10491,
      "training_step_time": 0.4617421627044678
    },
    {
      "epoch": 6.40380859375e-05,
      "model_forward_time": 0.11575794219970703,
      "step": 10492
    },
    {
      "epoch": 6.40380859375e-05,
      "step": 10492,
      "training_step_time": 0.37678098678588867
    },
    {
      "epoch": 6.4044189453125e-05,
      "model_forward_time": 0.11527085304260254,
      "step": 10493
    },
    {
      "epoch": 6.4044189453125e-05,
      "step": 10493,
      "training_step_time": 0.38827037811279297
    },
    {
      "epoch": 6.405029296875e-05,
      "model_forward_time": 0.116546630859375,
      "step": 10494
    },
    {
      "epoch": 6.405029296875e-05,
      "step": 10494,
      "training_step_time": 0.6638481616973877
    },
    {
      "epoch": 6.4056396484375e-05,
      "model_forward_time": 0.11543750762939453,
      "step": 10495
    },
    {
      "epoch": 6.4056396484375e-05,
      "step": 10495,
      "training_step_time": 0.3877747058868408
    },
    {
      "epoch": 6.40625e-05,
      "model_forward_time": 0.1155850887298584,
      "step": 10496
    },
    {
      "epoch": 6.40625e-05,
      "step": 10496,
      "training_step_time": 0.3960914611816406
    },
    {
      "epoch": 6.4068603515625e-05,
      "model_forward_time": 0.11502695083618164,
      "step": 10497
    },
    {
      "epoch": 6.4068603515625e-05,
      "step": 10497,
      "training_step_time": 0.4718456268310547
    },
    {
      "epoch": 6.407470703125e-05,
      "model_forward_time": 0.11618995666503906,
      "step": 10498
    },
    {
      "epoch": 6.407470703125e-05,
      "step": 10498,
      "training_step_time": 0.41631293296813965
    },
    {
      "epoch": 6.4080810546875e-05,
      "model_forward_time": 0.11562943458557129,
      "step": 10499
    },
    {
      "epoch": 6.4080810546875e-05,
      "step": 10499,
      "training_step_time": 0.43079519271850586
    },
    {
      "epoch": 6.40869140625e-05,
      "grad_norm": 0.1738240122795105,
      "learning_rate": 9.578866633275288e-05,
      "loss": 0.062,
      "step": 10500
    },
    {
      "epoch": 6.40869140625e-05,
      "model_forward_time": 0.11609506607055664,
      "step": 10500
    },
    {
      "epoch": 6.40869140625e-05,
      "step": 10500,
      "training_step_time": 0.680811882019043
    },
    {
      "epoch": 6.4093017578125e-05,
      "model_forward_time": 0.11587715148925781,
      "step": 10501
    },
    {
      "epoch": 6.4093017578125e-05,
      "step": 10501,
      "training_step_time": 0.3933680057525635
    },
    {
      "epoch": 6.409912109375e-05,
      "model_forward_time": 0.11501121520996094,
      "step": 10502
    },
    {
      "epoch": 6.409912109375e-05,
      "step": 10502,
      "training_step_time": 0.39615917205810547
    },
    {
      "epoch": 6.4105224609375e-05,
      "model_forward_time": 0.11654186248779297,
      "step": 10503
    },
    {
      "epoch": 6.4105224609375e-05,
      "step": 10503,
      "training_step_time": 0.4302985668182373
    },
    {
      "epoch": 6.4111328125e-05,
      "model_forward_time": 0.11529040336608887,
      "step": 10504
    },
    {
      "epoch": 6.4111328125e-05,
      "step": 10504,
      "training_step_time": 0.45769762992858887
    },
    {
      "epoch": 6.4117431640625e-05,
      "model_forward_time": 0.11824870109558105,
      "step": 10505
    },
    {
      "epoch": 6.4117431640625e-05,
      "step": 10505,
      "training_step_time": 0.3836188316345215
    },
    {
      "epoch": 6.412353515625e-05,
      "model_forward_time": 0.11559867858886719,
      "step": 10506
    },
    {
      "epoch": 6.412353515625e-05,
      "step": 10506,
      "training_step_time": 0.9182250499725342
    },
    {
      "epoch": 6.4129638671875e-05,
      "model_forward_time": 0.11820864677429199,
      "step": 10507
    },
    {
      "epoch": 6.4129638671875e-05,
      "step": 10507,
      "training_step_time": 0.37804150581359863
    },
    {
      "epoch": 6.41357421875e-05,
      "model_forward_time": 0.1156914234161377,
      "step": 10508
    },
    {
      "epoch": 6.41357421875e-05,
      "step": 10508,
      "training_step_time": 0.4643361568450928
    },
    {
      "epoch": 6.4141845703125e-05,
      "model_forward_time": 0.11649298667907715,
      "step": 10509
    },
    {
      "epoch": 6.4141845703125e-05,
      "step": 10509,
      "training_step_time": 0.4263005256652832
    },
    {
      "epoch": 6.414794921875e-05,
      "grad_norm": 0.15499281883239746,
      "learning_rate": 9.577758952427669e-05,
      "loss": 0.0656,
      "step": 10510
    },
    {
      "epoch": 6.414794921875e-05,
      "model_forward_time": 0.11523032188415527,
      "step": 10510
    },
    {
      "epoch": 6.414794921875e-05,
      "step": 10510,
      "training_step_time": 0.4537498950958252
    },
    {
      "epoch": 6.4154052734375e-05,
      "model_forward_time": 0.11493515968322754,
      "step": 10511
    },
    {
      "epoch": 6.4154052734375e-05,
      "step": 10511,
      "training_step_time": 0.38259148597717285
    },
    {
      "epoch": 6.416015625e-05,
      "model_forward_time": 0.11531329154968262,
      "step": 10512
    },
    {
      "epoch": 6.416015625e-05,
      "step": 10512,
      "training_step_time": 0.6928458213806152
    },
    {
      "epoch": 6.4166259765625e-05,
      "model_forward_time": 0.11800742149353027,
      "step": 10513
    },
    {
      "epoch": 6.4166259765625e-05,
      "step": 10513,
      "training_step_time": 0.37581920623779297
    },
    {
      "epoch": 6.417236328125e-05,
      "model_forward_time": 0.11760067939758301,
      "step": 10514
    },
    {
      "epoch": 6.417236328125e-05,
      "step": 10514,
      "training_step_time": 0.37673020362854004
    },
    {
      "epoch": 6.4178466796875e-05,
      "model_forward_time": 0.12069940567016602,
      "step": 10515
    },
    {
      "epoch": 6.4178466796875e-05,
      "step": 10515,
      "training_step_time": 0.37783193588256836
    },
    {
      "epoch": 6.41845703125e-05,
      "model_forward_time": 0.11758208274841309,
      "step": 10516
    },
    {
      "epoch": 6.41845703125e-05,
      "step": 10516,
      "training_step_time": 0.435992956161499
    },
    {
      "epoch": 6.4190673828125e-05,
      "model_forward_time": 0.11586236953735352,
      "step": 10517
    },
    {
      "epoch": 6.4190673828125e-05,
      "step": 10517,
      "training_step_time": 0.4501059055328369
    },
    {
      "epoch": 6.419677734375e-05,
      "model_forward_time": 0.11551403999328613,
      "step": 10518
    },
    {
      "epoch": 6.419677734375e-05,
      "step": 10518,
      "training_step_time": 1.001155138015747
    },
    {
      "epoch": 6.4202880859375e-05,
      "model_forward_time": 0.11605620384216309,
      "step": 10519
    },
    {
      "epoch": 6.4202880859375e-05,
      "step": 10519,
      "training_step_time": 0.38140416145324707
    },
    {
      "epoch": 6.4208984375e-05,
      "grad_norm": 0.21871450543403625,
      "learning_rate": 9.576649880977748e-05,
      "loss": 0.0689,
      "step": 10520
    },
    {
      "epoch": 6.4208984375e-05,
      "model_forward_time": 0.1147162914276123,
      "step": 10520
    },
    {
      "epoch": 6.4208984375e-05,
      "step": 10520,
      "training_step_time": 0.38908815383911133
    },
    {
      "epoch": 6.4215087890625e-05,
      "model_forward_time": 0.11542749404907227,
      "step": 10521
    },
    {
      "epoch": 6.4215087890625e-05,
      "step": 10521,
      "training_step_time": 0.403031587600708
    },
    {
      "epoch": 6.422119140625e-05,
      "model_forward_time": 0.11529016494750977,
      "step": 10522
    },
    {
      "epoch": 6.422119140625e-05,
      "step": 10522,
      "training_step_time": 0.4226796627044678
    },
    {
      "epoch": 6.4227294921875e-05,
      "model_forward_time": 0.11579012870788574,
      "step": 10523
    },
    {
      "epoch": 6.4227294921875e-05,
      "step": 10523,
      "training_step_time": 0.45754313468933105
    },
    {
      "epoch": 6.42333984375e-05,
      "model_forward_time": 0.11580395698547363,
      "step": 10524
    },
    {
      "epoch": 6.42333984375e-05,
      "step": 10524,
      "training_step_time": 0.9072432518005371
    },
    {
      "epoch": 6.4239501953125e-05,
      "model_forward_time": 0.11520576477050781,
      "step": 10525
    },
    {
      "epoch": 6.4239501953125e-05,
      "step": 10525,
      "training_step_time": 0.3872823715209961
    },
    {
      "epoch": 6.424560546875e-05,
      "model_forward_time": 0.11498665809631348,
      "step": 10526
    },
    {
      "epoch": 6.424560546875e-05,
      "step": 10526,
      "training_step_time": 0.39522409439086914
    },
    {
      "epoch": 6.4251708984375e-05,
      "model_forward_time": 0.11536908149719238,
      "step": 10527
    },
    {
      "epoch": 6.4251708984375e-05,
      "step": 10527,
      "training_step_time": 0.38710546493530273
    },
    {
      "epoch": 6.42578125e-05,
      "model_forward_time": 0.11456561088562012,
      "step": 10528
    },
    {
      "epoch": 6.42578125e-05,
      "step": 10528,
      "training_step_time": 0.40177083015441895
    },
    {
      "epoch": 6.4263916015625e-05,
      "model_forward_time": 0.11488199234008789,
      "step": 10529
    },
    {
      "epoch": 6.4263916015625e-05,
      "step": 10529,
      "training_step_time": 0.4133493900299072
    },
    {
      "epoch": 6.427001953125e-05,
      "grad_norm": 0.19019603729248047,
      "learning_rate": 9.575539419262434e-05,
      "loss": 0.0669,
      "step": 10530
    },
    {
      "epoch": 6.427001953125e-05,
      "model_forward_time": 0.11557340621948242,
      "step": 10530
    },
    {
      "epoch": 6.427001953125e-05,
      "step": 10530,
      "training_step_time": 0.9350922107696533
    },
    {
      "epoch": 6.4276123046875e-05,
      "model_forward_time": 0.11630749702453613,
      "step": 10531
    },
    {
      "epoch": 6.4276123046875e-05,
      "step": 10531,
      "training_step_time": 0.376422643661499
    },
    {
      "epoch": 6.42822265625e-05,
      "model_forward_time": 0.11502218246459961,
      "step": 10532
    },
    {
      "epoch": 6.42822265625e-05,
      "step": 10532,
      "training_step_time": 0.411942720413208
    },
    {
      "epoch": 6.4288330078125e-05,
      "model_forward_time": 0.11485576629638672,
      "step": 10533
    },
    {
      "epoch": 6.4288330078125e-05,
      "step": 10533,
      "training_step_time": 0.36827993392944336
    },
    {
      "epoch": 6.429443359375e-05,
      "model_forward_time": 0.11511659622192383,
      "step": 10534
    },
    {
      "epoch": 6.429443359375e-05,
      "step": 10534,
      "training_step_time": 0.4039170742034912
    },
    {
      "epoch": 6.4300537109375e-05,
      "model_forward_time": 0.11507797241210938,
      "step": 10535
    },
    {
      "epoch": 6.4300537109375e-05,
      "step": 10535,
      "training_step_time": 0.45506739616394043
    },
    {
      "epoch": 6.4306640625e-05,
      "model_forward_time": 0.1158442497253418,
      "step": 10536
    },
    {
      "epoch": 6.4306640625e-05,
      "step": 10536,
      "training_step_time": 0.38100194931030273
    },
    {
      "epoch": 6.4312744140625e-05,
      "model_forward_time": 0.11519432067871094,
      "step": 10537
    },
    {
      "epoch": 6.4312744140625e-05,
      "step": 10537,
      "training_step_time": 0.47948503494262695
    },
    {
      "epoch": 6.431884765625e-05,
      "model_forward_time": 0.11603546142578125,
      "step": 10538
    },
    {
      "epoch": 6.431884765625e-05,
      "step": 10538,
      "training_step_time": 0.3835165500640869
    },
    {
      "epoch": 6.4324951171875e-05,
      "model_forward_time": 0.11536026000976562,
      "step": 10539
    },
    {
      "epoch": 6.4324951171875e-05,
      "step": 10539,
      "training_step_time": 0.39487195014953613
    },
    {
      "epoch": 6.43310546875e-05,
      "grad_norm": 0.2198268473148346,
      "learning_rate": 9.574427567619053e-05,
      "loss": 0.0612,
      "step": 10540
    },
    {
      "epoch": 6.43310546875e-05,
      "model_forward_time": 0.11514854431152344,
      "step": 10540
    },
    {
      "epoch": 6.43310546875e-05,
      "step": 10540,
      "training_step_time": 0.3893575668334961
    },
    {
      "epoch": 6.4337158203125e-05,
      "model_forward_time": 0.11559319496154785,
      "step": 10541
    },
    {
      "epoch": 6.4337158203125e-05,
      "step": 10541,
      "training_step_time": 0.390700101852417
    },
    {
      "epoch": 6.434326171875e-05,
      "model_forward_time": 0.11578083038330078,
      "step": 10542
    },
    {
      "epoch": 6.434326171875e-05,
      "step": 10542,
      "training_step_time": 0.6657896041870117
    },
    {
      "epoch": 6.4349365234375e-05,
      "model_forward_time": 0.11578917503356934,
      "step": 10543
    },
    {
      "epoch": 6.4349365234375e-05,
      "step": 10543,
      "training_step_time": 0.47232723236083984
    },
    {
      "epoch": 6.435546875e-05,
      "model_forward_time": 0.11554241180419922,
      "step": 10544
    },
    {
      "epoch": 6.435546875e-05,
      "step": 10544,
      "training_step_time": 0.37557387351989746
    },
    {
      "epoch": 6.4361572265625e-05,
      "model_forward_time": 0.12162399291992188,
      "step": 10545
    },
    {
      "epoch": 6.4361572265625e-05,
      "step": 10545,
      "training_step_time": 0.3961665630340576
    },
    {
      "epoch": 6.436767578125e-05,
      "model_forward_time": 0.11523199081420898,
      "step": 10546
    },
    {
      "epoch": 6.436767578125e-05,
      "step": 10546,
      "training_step_time": 0.39929676055908203
    },
    {
      "epoch": 6.4373779296875e-05,
      "model_forward_time": 0.11506175994873047,
      "step": 10547
    },
    {
      "epoch": 6.4373779296875e-05,
      "step": 10547,
      "training_step_time": 0.3692593574523926
    },
    {
      "epoch": 6.43798828125e-05,
      "model_forward_time": 0.11530685424804688,
      "step": 10548
    },
    {
      "epoch": 6.43798828125e-05,
      "step": 10548,
      "training_step_time": 0.5269086360931396
    },
    {
      "epoch": 6.4385986328125e-05,
      "model_forward_time": 0.11607646942138672,
      "step": 10549
    },
    {
      "epoch": 6.4385986328125e-05,
      "step": 10549,
      "training_step_time": 0.4177393913269043
    },
    {
      "epoch": 6.439208984375e-05,
      "grad_norm": 0.304482102394104,
      "learning_rate": 9.573314326385359e-05,
      "loss": 0.0584,
      "step": 10550
    },
    {
      "epoch": 6.439208984375e-05,
      "model_forward_time": 0.11574244499206543,
      "step": 10550
    },
    {
      "epoch": 6.439208984375e-05,
      "step": 10550,
      "training_step_time": 0.4444923400878906
    },
    {
      "epoch": 6.4398193359375e-05,
      "model_forward_time": 0.11541318893432617,
      "step": 10551
    },
    {
      "epoch": 6.4398193359375e-05,
      "step": 10551,
      "training_step_time": 0.39252710342407227
    },
    {
      "epoch": 6.4404296875e-05,
      "model_forward_time": 0.11603355407714844,
      "step": 10552
    },
    {
      "epoch": 6.4404296875e-05,
      "step": 10552,
      "training_step_time": 0.3887789249420166
    },
    {
      "epoch": 6.4410400390625e-05,
      "model_forward_time": 0.11874675750732422,
      "step": 10553
    },
    {
      "epoch": 6.4410400390625e-05,
      "step": 10553,
      "training_step_time": 0.4002559185028076
    },
    {
      "epoch": 6.441650390625e-05,
      "model_forward_time": 0.11868834495544434,
      "step": 10554
    },
    {
      "epoch": 6.441650390625e-05,
      "step": 10554,
      "training_step_time": 0.6704404354095459
    },
    {
      "epoch": 6.4422607421875e-05,
      "model_forward_time": 0.11765670776367188,
      "step": 10555
    },
    {
      "epoch": 6.4422607421875e-05,
      "step": 10555,
      "training_step_time": 0.3804593086242676
    },
    {
      "epoch": 6.44287109375e-05,
      "model_forward_time": 0.11775398254394531,
      "step": 10556
    },
    {
      "epoch": 6.44287109375e-05,
      "step": 10556,
      "training_step_time": 0.3946352005004883
    },
    {
      "epoch": 6.4434814453125e-05,
      "model_forward_time": 0.1150350570678711,
      "step": 10557
    },
    {
      "epoch": 6.4434814453125e-05,
      "step": 10557,
      "training_step_time": 0.44377589225769043
    },
    {
      "epoch": 6.444091796875e-05,
      "model_forward_time": 0.11843991279602051,
      "step": 10558
    },
    {
      "epoch": 6.444091796875e-05,
      "step": 10558,
      "training_step_time": 0.400007963180542
    },
    {
      "epoch": 6.4447021484375e-05,
      "model_forward_time": 0.11733365058898926,
      "step": 10559
    },
    {
      "epoch": 6.4447021484375e-05,
      "step": 10559,
      "training_step_time": 0.4114573001861572
    },
    {
      "epoch": 6.4453125e-05,
      "grad_norm": 0.2328374683856964,
      "learning_rate": 9.572199695899522e-05,
      "loss": 0.0644,
      "step": 10560
    },
    {
      "epoch": 6.4453125e-05,
      "model_forward_time": 0.11860346794128418,
      "step": 10560
    },
    {
      "epoch": 6.4453125e-05,
      "step": 10560,
      "training_step_time": 0.48902463912963867
    },
    {
      "epoch": 6.4459228515625e-05,
      "model_forward_time": 0.11599946022033691,
      "step": 10561
    },
    {
      "epoch": 6.4459228515625e-05,
      "step": 10561,
      "training_step_time": 0.47696924209594727
    },
    {
      "epoch": 6.446533203125e-05,
      "model_forward_time": 0.11534261703491211,
      "step": 10562
    },
    {
      "epoch": 6.446533203125e-05,
      "step": 10562,
      "training_step_time": 0.4495992660522461
    },
    {
      "epoch": 6.4471435546875e-05,
      "model_forward_time": 0.11585760116577148,
      "step": 10563
    },
    {
      "epoch": 6.4471435546875e-05,
      "step": 10563,
      "training_step_time": 0.5008907318115234
    },
    {
      "epoch": 6.44775390625e-05,
      "model_forward_time": 0.11521768569946289,
      "step": 10564
    },
    {
      "epoch": 6.44775390625e-05,
      "step": 10564,
      "training_step_time": 0.45559144020080566
    },
    {
      "epoch": 6.4483642578125e-05,
      "model_forward_time": 0.11502671241760254,
      "step": 10565
    },
    {
      "epoch": 6.4483642578125e-05,
      "step": 10565,
      "training_step_time": 0.40397000312805176
    },
    {
      "epoch": 6.448974609375e-05,
      "model_forward_time": 0.11556720733642578,
      "step": 10566
    },
    {
      "epoch": 6.448974609375e-05,
      "step": 10566,
      "training_step_time": 0.38272809982299805
    },
    {
      "epoch": 6.4495849609375e-05,
      "model_forward_time": 0.1163785457611084,
      "step": 10567
    },
    {
      "epoch": 6.4495849609375e-05,
      "step": 10567,
      "training_step_time": 0.41388654708862305
    },
    {
      "epoch": 6.4501953125e-05,
      "model_forward_time": 0.11591434478759766,
      "step": 10568
    },
    {
      "epoch": 6.4501953125e-05,
      "step": 10568,
      "training_step_time": 0.3928720951080322
    },
    {
      "epoch": 6.4508056640625e-05,
      "model_forward_time": 0.11555838584899902,
      "step": 10569
    },
    {
      "epoch": 6.4508056640625e-05,
      "step": 10569,
      "training_step_time": 0.49491381645202637
    },
    {
      "epoch": 6.451416015625e-05,
      "grad_norm": 0.20224744081497192,
      "learning_rate": 9.571083676500141e-05,
      "loss": 0.0653,
      "step": 10570
    },
    {
      "epoch": 6.451416015625e-05,
      "model_forward_time": 0.11576128005981445,
      "step": 10570
    },
    {
      "epoch": 6.451416015625e-05,
      "step": 10570,
      "training_step_time": 0.4210479259490967
    },
    {
      "epoch": 6.4520263671875e-05,
      "model_forward_time": 0.11561059951782227,
      "step": 10571
    },
    {
      "epoch": 6.4520263671875e-05,
      "step": 10571,
      "training_step_time": 0.49569058418273926
    },
    {
      "epoch": 6.45263671875e-05,
      "model_forward_time": 0.1180276870727539,
      "step": 10572
    },
    {
      "epoch": 6.45263671875e-05,
      "step": 10572,
      "training_step_time": 0.43768978118896484
    },
    {
      "epoch": 6.4532470703125e-05,
      "model_forward_time": 0.11821341514587402,
      "step": 10573
    },
    {
      "epoch": 6.4532470703125e-05,
      "step": 10573,
      "training_step_time": 0.3801712989807129
    },
    {
      "epoch": 6.453857421875e-05,
      "model_forward_time": 0.11811709403991699,
      "step": 10574
    },
    {
      "epoch": 6.453857421875e-05,
      "step": 10574,
      "training_step_time": 0.38281702995300293
    },
    {
      "epoch": 6.4544677734375e-05,
      "model_forward_time": 0.11768960952758789,
      "step": 10575
    },
    {
      "epoch": 6.4544677734375e-05,
      "step": 10575,
      "training_step_time": 0.37123775482177734
    },
    {
      "epoch": 6.455078125e-05,
      "model_forward_time": 0.1178593635559082,
      "step": 10576
    },
    {
      "epoch": 6.455078125e-05,
      "step": 10576,
      "training_step_time": 0.41627025604248047
    },
    {
      "epoch": 6.4556884765625e-05,
      "model_forward_time": 0.11909866333007812,
      "step": 10577
    },
    {
      "epoch": 6.4556884765625e-05,
      "step": 10577,
      "training_step_time": 0.46924376487731934
    },
    {
      "epoch": 6.456298828125e-05,
      "model_forward_time": 0.11608505249023438,
      "step": 10578
    },
    {
      "epoch": 6.456298828125e-05,
      "step": 10578,
      "training_step_time": 0.41078805923461914
    },
    {
      "epoch": 6.4569091796875e-05,
      "model_forward_time": 0.11873269081115723,
      "step": 10579
    },
    {
      "epoch": 6.4569091796875e-05,
      "step": 10579,
      "training_step_time": 0.38625144958496094
    },
    {
      "epoch": 6.45751953125e-05,
      "grad_norm": 0.2092425376176834,
      "learning_rate": 9.569966268526232e-05,
      "loss": 0.0614,
      "step": 10580
    },
    {
      "epoch": 6.45751953125e-05,
      "model_forward_time": 0.11561894416809082,
      "step": 10580
    },
    {
      "epoch": 6.45751953125e-05,
      "step": 10580,
      "training_step_time": 0.38683056831359863
    },
    {
      "epoch": 6.4581298828125e-05,
      "model_forward_time": 0.11759614944458008,
      "step": 10581
    },
    {
      "epoch": 6.4581298828125e-05,
      "step": 10581,
      "training_step_time": 0.3867342472076416
    },
    {
      "epoch": 6.458740234375e-05,
      "model_forward_time": 0.1172335147857666,
      "step": 10582
    },
    {
      "epoch": 6.458740234375e-05,
      "step": 10582,
      "training_step_time": 0.38120174407958984
    },
    {
      "epoch": 6.4593505859375e-05,
      "model_forward_time": 0.11840105056762695,
      "step": 10583
    },
    {
      "epoch": 6.4593505859375e-05,
      "step": 10583,
      "training_step_time": 0.3930172920227051
    },
    {
      "epoch": 6.4599609375e-05,
      "model_forward_time": 0.1184241771697998,
      "step": 10584
    },
    {
      "epoch": 6.4599609375e-05,
      "step": 10584,
      "training_step_time": 0.6520249843597412
    },
    {
      "epoch": 6.4605712890625e-05,
      "model_forward_time": 0.12295770645141602,
      "step": 10585
    },
    {
      "epoch": 6.4605712890625e-05,
      "step": 10585,
      "training_step_time": 0.43503427505493164
    },
    {
      "epoch": 6.461181640625e-05,
      "model_forward_time": 0.11544919013977051,
      "step": 10586
    },
    {
      "epoch": 6.461181640625e-05,
      "step": 10586,
      "training_step_time": 0.38697338104248047
    },
    {
      "epoch": 6.4617919921875e-05,
      "model_forward_time": 0.11656785011291504,
      "step": 10587
    },
    {
      "epoch": 6.4617919921875e-05,
      "step": 10587,
      "training_step_time": 0.3846452236175537
    },
    {
      "epoch": 6.46240234375e-05,
      "model_forward_time": 0.11720752716064453,
      "step": 10588
    },
    {
      "epoch": 6.46240234375e-05,
      "step": 10588,
      "training_step_time": 0.3806765079498291
    },
    {
      "epoch": 6.4630126953125e-05,
      "model_forward_time": 0.1159677505493164,
      "step": 10589
    },
    {
      "epoch": 6.4630126953125e-05,
      "step": 10589,
      "training_step_time": 0.38061046600341797
    },
    {
      "epoch": 6.463623046875e-05,
      "grad_norm": 0.2867467403411865,
      "learning_rate": 9.568847472317232e-05,
      "loss": 0.0659,
      "step": 10590
    },
    {
      "epoch": 6.463623046875e-05,
      "model_forward_time": 0.11644816398620605,
      "step": 10590
    },
    {
      "epoch": 6.463623046875e-05,
      "step": 10590,
      "training_step_time": 0.4076690673828125
    },
    {
      "epoch": 6.4642333984375e-05,
      "model_forward_time": 0.1155540943145752,
      "step": 10591
    },
    {
      "epoch": 6.4642333984375e-05,
      "step": 10591,
      "training_step_time": 0.4673311710357666
    },
    {
      "epoch": 6.46484375e-05,
      "model_forward_time": 0.1157677173614502,
      "step": 10592
    },
    {
      "epoch": 6.46484375e-05,
      "step": 10592,
      "training_step_time": 0.4209916591644287
    },
    {
      "epoch": 6.4654541015625e-05,
      "model_forward_time": 0.1149284839630127,
      "step": 10593
    },
    {
      "epoch": 6.4654541015625e-05,
      "step": 10593,
      "training_step_time": 0.3932173252105713
    },
    {
      "epoch": 6.466064453125e-05,
      "model_forward_time": 0.11591053009033203,
      "step": 10594
    },
    {
      "epoch": 6.466064453125e-05,
      "step": 10594,
      "training_step_time": 0.38908982276916504
    },
    {
      "epoch": 6.4666748046875e-05,
      "model_forward_time": 0.11862325668334961,
      "step": 10595
    },
    {
      "epoch": 6.4666748046875e-05,
      "step": 10595,
      "training_step_time": 0.38182640075683594
    },
    {
      "epoch": 6.46728515625e-05,
      "model_forward_time": 0.11932015419006348,
      "step": 10596
    },
    {
      "epoch": 6.46728515625e-05,
      "step": 10596,
      "training_step_time": 0.5108215808868408
    },
    {
      "epoch": 6.4678955078125e-05,
      "model_forward_time": 0.11814546585083008,
      "step": 10597
    },
    {
      "epoch": 6.4678955078125e-05,
      "step": 10597,
      "training_step_time": 0.4197120666503906
    },
    {
      "epoch": 6.468505859375e-05,
      "model_forward_time": 0.11682915687561035,
      "step": 10598
    },
    {
      "epoch": 6.468505859375e-05,
      "step": 10598,
      "training_step_time": 0.4627079963684082
    },
    {
      "epoch": 6.4691162109375e-05,
      "model_forward_time": 0.11800599098205566,
      "step": 10599
    },
    {
      "epoch": 6.4691162109375e-05,
      "step": 10599,
      "training_step_time": 0.43996286392211914
    },
    {
      "epoch": 6.4697265625e-05,
      "grad_norm": 0.21972548961639404,
      "learning_rate": 9.567727288213005e-05,
      "loss": 0.0638,
      "step": 10600
    },
    {
      "epoch": 6.4697265625e-05,
      "model_forward_time": 0.11995840072631836,
      "step": 10600
    },
    {
      "epoch": 6.4697265625e-05,
      "step": 10600,
      "training_step_time": 0.38243651390075684
    },
    {
      "epoch": 6.4703369140625e-05,
      "model_forward_time": 0.11577558517456055,
      "step": 10601
    },
    {
      "epoch": 6.4703369140625e-05,
      "step": 10601,
      "training_step_time": 0.38977813720703125
    },
    {
      "epoch": 6.470947265625e-05,
      "model_forward_time": 0.11571812629699707,
      "step": 10602
    },
    {
      "epoch": 6.470947265625e-05,
      "step": 10602,
      "training_step_time": 0.4974095821380615
    },
    {
      "epoch": 6.4715576171875e-05,
      "model_forward_time": 0.11588072776794434,
      "step": 10603
    },
    {
      "epoch": 6.4715576171875e-05,
      "step": 10603,
      "training_step_time": 0.3724055290222168
    },
    {
      "epoch": 6.47216796875e-05,
      "model_forward_time": 0.11684465408325195,
      "step": 10604
    },
    {
      "epoch": 6.47216796875e-05,
      "step": 10604,
      "training_step_time": 0.46107029914855957
    },
    {
      "epoch": 6.4727783203125e-05,
      "model_forward_time": 0.11543798446655273,
      "step": 10605
    },
    {
      "epoch": 6.4727783203125e-05,
      "step": 10605,
      "training_step_time": 0.45021772384643555
    },
    {
      "epoch": 6.473388671875e-05,
      "model_forward_time": 0.11528420448303223,
      "step": 10606
    },
    {
      "epoch": 6.473388671875e-05,
      "step": 10606,
      "training_step_time": 0.3982057571411133
    },
    {
      "epoch": 6.4739990234375e-05,
      "model_forward_time": 0.1144418716430664,
      "step": 10607
    },
    {
      "epoch": 6.4739990234375e-05,
      "step": 10607,
      "training_step_time": 0.38312268257141113
    },
    {
      "epoch": 6.474609375e-05,
      "model_forward_time": 0.11571502685546875,
      "step": 10608
    },
    {
      "epoch": 6.474609375e-05,
      "step": 10608,
      "training_step_time": 0.4285306930541992
    },
    {
      "epoch": 6.4752197265625e-05,
      "model_forward_time": 0.11634016036987305,
      "step": 10609
    },
    {
      "epoch": 6.4752197265625e-05,
      "step": 10609,
      "training_step_time": 0.40097689628601074
    },
    {
      "epoch": 6.475830078125e-05,
      "grad_norm": 0.17571179568767548,
      "learning_rate": 9.566605716553831e-05,
      "loss": 0.0641,
      "step": 10610
    },
    {
      "epoch": 6.475830078125e-05,
      "model_forward_time": 0.11507487297058105,
      "step": 10610
    },
    {
      "epoch": 6.475830078125e-05,
      "step": 10610,
      "training_step_time": 0.3885612487792969
    },
    {
      "epoch": 6.4764404296875e-05,
      "model_forward_time": 0.11545205116271973,
      "step": 10611
    },
    {
      "epoch": 6.4764404296875e-05,
      "step": 10611,
      "training_step_time": 0.4228689670562744
    },
    {
      "epoch": 6.47705078125e-05,
      "model_forward_time": 0.11535501480102539,
      "step": 10612
    },
    {
      "epoch": 6.47705078125e-05,
      "step": 10612,
      "training_step_time": 0.43901824951171875
    },
    {
      "epoch": 6.4776611328125e-05,
      "model_forward_time": 0.11483407020568848,
      "step": 10613
    },
    {
      "epoch": 6.4776611328125e-05,
      "step": 10613,
      "training_step_time": 0.42083311080932617
    },
    {
      "epoch": 6.478271484375e-05,
      "model_forward_time": 0.11541604995727539,
      "step": 10614
    },
    {
      "epoch": 6.478271484375e-05,
      "step": 10614,
      "training_step_time": 0.49555397033691406
    },
    {
      "epoch": 6.4788818359375e-05,
      "model_forward_time": 0.11528611183166504,
      "step": 10615
    },
    {
      "epoch": 6.4788818359375e-05,
      "step": 10615,
      "training_step_time": 0.3928706645965576
    },
    {
      "epoch": 6.4794921875e-05,
      "model_forward_time": 0.11574435234069824,
      "step": 10616
    },
    {
      "epoch": 6.4794921875e-05,
      "step": 10616,
      "training_step_time": 0.38883161544799805
    },
    {
      "epoch": 6.4801025390625e-05,
      "model_forward_time": 0.11546611785888672,
      "step": 10617
    },
    {
      "epoch": 6.4801025390625e-05,
      "step": 10617,
      "training_step_time": 0.3883192539215088
    },
    {
      "epoch": 6.480712890625e-05,
      "model_forward_time": 0.11577081680297852,
      "step": 10618
    },
    {
      "epoch": 6.480712890625e-05,
      "step": 10618,
      "training_step_time": 0.42624878883361816
    },
    {
      "epoch": 6.4813232421875e-05,
      "model_forward_time": 0.11571311950683594,
      "step": 10619
    },
    {
      "epoch": 6.4813232421875e-05,
      "step": 10619,
      "training_step_time": 0.46660757064819336
    },
    {
      "epoch": 6.48193359375e-05,
      "grad_norm": 0.2128036767244339,
      "learning_rate": 9.565482757680415e-05,
      "loss": 0.0607,
      "step": 10620
    },
    {
      "epoch": 6.48193359375e-05,
      "model_forward_time": 0.11539745330810547,
      "step": 10620
    },
    {
      "epoch": 6.48193359375e-05,
      "step": 10620,
      "training_step_time": 0.5154898166656494
    },
    {
      "epoch": 6.4825439453125e-05,
      "model_forward_time": 0.11600065231323242,
      "step": 10621
    },
    {
      "epoch": 6.4825439453125e-05,
      "step": 10621,
      "training_step_time": 0.39735913276672363
    },
    {
      "epoch": 6.483154296875e-05,
      "model_forward_time": 0.11493182182312012,
      "step": 10622
    },
    {
      "epoch": 6.483154296875e-05,
      "step": 10622,
      "training_step_time": 0.3875715732574463
    },
    {
      "epoch": 6.4837646484375e-05,
      "model_forward_time": 0.1162266731262207,
      "step": 10623
    },
    {
      "epoch": 6.4837646484375e-05,
      "step": 10623,
      "training_step_time": 0.38025498390197754
    },
    {
      "epoch": 6.484375e-05,
      "model_forward_time": 0.11575627326965332,
      "step": 10624
    },
    {
      "epoch": 6.484375e-05,
      "step": 10624,
      "training_step_time": 0.40805578231811523
    },
    {
      "epoch": 6.4849853515625e-05,
      "model_forward_time": 0.11533069610595703,
      "step": 10625
    },
    {
      "epoch": 6.4849853515625e-05,
      "step": 10625,
      "training_step_time": 0.4204127788543701
    },
    {
      "epoch": 6.485595703125e-05,
      "model_forward_time": 0.1152653694152832,
      "step": 10626
    },
    {
      "epoch": 6.485595703125e-05,
      "step": 10626,
      "training_step_time": 0.7361788749694824
    },
    {
      "epoch": 6.4862060546875e-05,
      "model_forward_time": 0.11514878273010254,
      "step": 10627
    },
    {
      "epoch": 6.4862060546875e-05,
      "step": 10627,
      "training_step_time": 0.41778993606567383
    },
    {
      "epoch": 6.48681640625e-05,
      "model_forward_time": 0.11441755294799805,
      "step": 10628
    },
    {
      "epoch": 6.48681640625e-05,
      "step": 10628,
      "training_step_time": 0.429210901260376
    },
    {
      "epoch": 6.4874267578125e-05,
      "model_forward_time": 0.11507797241210938,
      "step": 10629
    },
    {
      "epoch": 6.4874267578125e-05,
      "step": 10629,
      "training_step_time": 0.38588762283325195
    },
    {
      "epoch": 6.488037109375e-05,
      "grad_norm": 0.28445541858673096,
      "learning_rate": 9.56435841193388e-05,
      "loss": 0.0657,
      "step": 10630
    },
    {
      "epoch": 6.488037109375e-05,
      "model_forward_time": 0.11488604545593262,
      "step": 10630
    },
    {
      "epoch": 6.488037109375e-05,
      "step": 10630,
      "training_step_time": 0.38577985763549805
    },
    {
      "epoch": 6.4886474609375e-05,
      "model_forward_time": 0.11454892158508301,
      "step": 10631
    },
    {
      "epoch": 6.4886474609375e-05,
      "step": 10631,
      "training_step_time": 0.38957762718200684
    },
    {
      "epoch": 6.4892578125e-05,
      "model_forward_time": 0.11529946327209473,
      "step": 10632
    },
    {
      "epoch": 6.4892578125e-05,
      "step": 10632,
      "training_step_time": 0.4480562210083008
    },
    {
      "epoch": 6.4898681640625e-05,
      "model_forward_time": 0.11530399322509766,
      "step": 10633
    },
    {
      "epoch": 6.4898681640625e-05,
      "step": 10633,
      "training_step_time": 0.8453173637390137
    },
    {
      "epoch": 6.490478515625e-05,
      "model_forward_time": 0.11539626121520996,
      "step": 10634
    },
    {
      "epoch": 6.490478515625e-05,
      "step": 10634,
      "training_step_time": 0.39028358459472656
    },
    {
      "epoch": 6.4910888671875e-05,
      "model_forward_time": 0.11532020568847656,
      "step": 10635
    },
    {
      "epoch": 6.4910888671875e-05,
      "step": 10635,
      "training_step_time": 0.37958288192749023
    },
    {
      "epoch": 6.49169921875e-05,
      "model_forward_time": 0.11486959457397461,
      "step": 10636
    },
    {
      "epoch": 6.49169921875e-05,
      "step": 10636,
      "training_step_time": 0.37845754623413086
    },
    {
      "epoch": 6.4923095703125e-05,
      "model_forward_time": 0.11492443084716797,
      "step": 10637
    },
    {
      "epoch": 6.4923095703125e-05,
      "step": 10637,
      "training_step_time": 0.3997189998626709
    },
    {
      "epoch": 6.492919921875e-05,
      "model_forward_time": 0.11833691596984863,
      "step": 10638
    },
    {
      "epoch": 6.492919921875e-05,
      "step": 10638,
      "training_step_time": 0.40253400802612305
    },
    {
      "epoch": 6.4935302734375e-05,
      "model_forward_time": 0.11765646934509277,
      "step": 10639
    },
    {
      "epoch": 6.4935302734375e-05,
      "step": 10639,
      "training_step_time": 0.5046288967132568
    },
    {
      "epoch": 6.494140625e-05,
      "grad_norm": 0.2224886417388916,
      "learning_rate": 9.563232679655776e-05,
      "loss": 0.0616,
      "step": 10640
    },
    {
      "epoch": 6.494140625e-05,
      "model_forward_time": 0.11789441108703613,
      "step": 10640
    },
    {
      "epoch": 6.494140625e-05,
      "step": 10640,
      "training_step_time": 0.3853175640106201
    },
    {
      "epoch": 6.4947509765625e-05,
      "model_forward_time": 0.11560249328613281,
      "step": 10641
    },
    {
      "epoch": 6.4947509765625e-05,
      "step": 10641,
      "training_step_time": 0.3965723514556885
    },
    {
      "epoch": 6.495361328125e-05,
      "model_forward_time": 0.11507272720336914,
      "step": 10642
    },
    {
      "epoch": 6.495361328125e-05,
      "step": 10642,
      "training_step_time": 0.41928577423095703
    },
    {
      "epoch": 6.4959716796875e-05,
      "model_forward_time": 0.11756563186645508,
      "step": 10643
    },
    {
      "epoch": 6.4959716796875e-05,
      "step": 10643,
      "training_step_time": 0.3914508819580078
    },
    {
      "epoch": 6.49658203125e-05,
      "model_forward_time": 0.1181492805480957,
      "step": 10644
    },
    {
      "epoch": 6.49658203125e-05,
      "step": 10644,
      "training_step_time": 0.3768730163574219
    },
    {
      "epoch": 6.4971923828125e-05,
      "model_forward_time": 0.11867356300354004,
      "step": 10645
    },
    {
      "epoch": 6.4971923828125e-05,
      "step": 10645,
      "training_step_time": 0.7059543132781982
    },
    {
      "epoch": 6.497802734375e-05,
      "model_forward_time": 0.11711859703063965,
      "step": 10646
    },
    {
      "epoch": 6.497802734375e-05,
      "step": 10646,
      "training_step_time": 0.44942522048950195
    },
    {
      "epoch": 6.4984130859375e-05,
      "model_forward_time": 0.11431694030761719,
      "step": 10647
    },
    {
      "epoch": 6.4984130859375e-05,
      "step": 10647,
      "training_step_time": 0.4165060520172119
    },
    {
      "epoch": 6.4990234375e-05,
      "model_forward_time": 0.1173098087310791,
      "step": 10648
    },
    {
      "epoch": 6.4990234375e-05,
      "step": 10648,
      "training_step_time": 0.37766408920288086
    },
    {
      "epoch": 6.4996337890625e-05,
      "model_forward_time": 0.11483144760131836,
      "step": 10649
    },
    {
      "epoch": 6.4996337890625e-05,
      "step": 10649,
      "training_step_time": 0.38446855545043945
    },
    {
      "epoch": 6.500244140625e-05,
      "grad_norm": 0.24458855390548706,
      "learning_rate": 9.562105561188069e-05,
      "loss": 0.0566,
      "step": 10650
    },
    {
      "epoch": 6.500244140625e-05,
      "model_forward_time": 0.11705589294433594,
      "step": 10650
    },
    {
      "epoch": 6.500244140625e-05,
      "step": 10650,
      "training_step_time": 0.4799685478210449
    },
    {
      "epoch": 6.5008544921875e-05,
      "model_forward_time": 0.11867213249206543,
      "step": 10651
    },
    {
      "epoch": 6.5008544921875e-05,
      "step": 10651,
      "training_step_time": 0.45955324172973633
    },
    {
      "epoch": 6.50146484375e-05,
      "model_forward_time": 0.11775612831115723,
      "step": 10652
    },
    {
      "epoch": 6.50146484375e-05,
      "step": 10652,
      "training_step_time": 0.3739352226257324
    },
    {
      "epoch": 6.5020751953125e-05,
      "model_forward_time": 0.1172184944152832,
      "step": 10653
    },
    {
      "epoch": 6.5020751953125e-05,
      "step": 10653,
      "training_step_time": 0.3748626708984375
    },
    {
      "epoch": 6.502685546875e-05,
      "model_forward_time": 0.1176447868347168,
      "step": 10654
    },
    {
      "epoch": 6.502685546875e-05,
      "step": 10654,
      "training_step_time": 0.43791794776916504
    },
    {
      "epoch": 6.5032958984375e-05,
      "model_forward_time": 0.11576128005981445,
      "step": 10655
    },
    {
      "epoch": 6.5032958984375e-05,
      "step": 10655,
      "training_step_time": 0.4054110050201416
    },
    {
      "epoch": 6.50390625e-05,
      "model_forward_time": 0.1151127815246582,
      "step": 10656
    },
    {
      "epoch": 6.50390625e-05,
      "step": 10656,
      "training_step_time": 0.40543460845947266
    },
    {
      "epoch": 6.5045166015625e-05,
      "model_forward_time": 0.1167905330657959,
      "step": 10657
    },
    {
      "epoch": 6.5045166015625e-05,
      "step": 10657,
      "training_step_time": 0.5506505966186523
    },
    {
      "epoch": 6.505126953125e-05,
      "model_forward_time": 0.11535024642944336,
      "step": 10658
    },
    {
      "epoch": 6.505126953125e-05,
      "step": 10658,
      "training_step_time": 0.4031555652618408
    },
    {
      "epoch": 6.5057373046875e-05,
      "model_forward_time": 0.11483192443847656,
      "step": 10659
    },
    {
      "epoch": 6.5057373046875e-05,
      "step": 10659,
      "training_step_time": 0.3677191734313965
    },
    {
      "epoch": 6.50634765625e-05,
      "grad_norm": 0.17063674330711365,
      "learning_rate": 9.560977056873149e-05,
      "loss": 0.0632,
      "step": 10660
    },
    {
      "epoch": 6.50634765625e-05,
      "model_forward_time": 0.11526799201965332,
      "step": 10660
    },
    {
      "epoch": 6.50634765625e-05,
      "step": 10660,
      "training_step_time": 0.4218423366546631
    },
    {
      "epoch": 6.5069580078125e-05,
      "model_forward_time": 0.11545991897583008,
      "step": 10661
    },
    {
      "epoch": 6.5069580078125e-05,
      "step": 10661,
      "training_step_time": 0.4685661792755127
    },
    {
      "epoch": 6.507568359375e-05,
      "model_forward_time": 0.11491894721984863,
      "step": 10662
    },
    {
      "epoch": 6.507568359375e-05,
      "step": 10662,
      "training_step_time": 0.38404130935668945
    },
    {
      "epoch": 6.5081787109375e-05,
      "model_forward_time": 0.11632180213928223,
      "step": 10663
    },
    {
      "epoch": 6.5081787109375e-05,
      "step": 10663,
      "training_step_time": 0.5076758861541748
    },
    {
      "epoch": 6.5087890625e-05,
      "model_forward_time": 0.11521530151367188,
      "step": 10664
    },
    {
      "epoch": 6.5087890625e-05,
      "step": 10664,
      "training_step_time": 0.45261144638061523
    },
    {
      "epoch": 6.5093994140625e-05,
      "model_forward_time": 0.11503481864929199,
      "step": 10665
    },
    {
      "epoch": 6.5093994140625e-05,
      "step": 10665,
      "training_step_time": 0.3881959915161133
    },
    {
      "epoch": 6.510009765625e-05,
      "model_forward_time": 0.11771368980407715,
      "step": 10666
    },
    {
      "epoch": 6.510009765625e-05,
      "step": 10666,
      "training_step_time": 0.3870062828063965
    },
    {
      "epoch": 6.5106201171875e-05,
      "model_forward_time": 0.11803770065307617,
      "step": 10667
    },
    {
      "epoch": 6.5106201171875e-05,
      "step": 10667,
      "training_step_time": 0.37422704696655273
    },
    {
      "epoch": 6.51123046875e-05,
      "model_forward_time": 0.11954784393310547,
      "step": 10668
    },
    {
      "epoch": 6.51123046875e-05,
      "step": 10668,
      "training_step_time": 0.5082731246948242
    },
    {
      "epoch": 6.5118408203125e-05,
      "model_forward_time": 0.11759042739868164,
      "step": 10669
    },
    {
      "epoch": 6.5118408203125e-05,
      "step": 10669,
      "training_step_time": 1.0274739265441895
    },
    {
      "epoch": 6.512451171875e-05,
      "grad_norm": 0.2526404857635498,
      "learning_rate": 9.559847167053823e-05,
      "loss": 0.0631,
      "step": 10670
    },
    {
      "epoch": 6.512451171875e-05,
      "model_forward_time": 0.11619877815246582,
      "step": 10670
    },
    {
      "epoch": 6.512451171875e-05,
      "step": 10670,
      "training_step_time": 0.37021684646606445
    },
    {
      "epoch": 6.5130615234375e-05,
      "model_forward_time": 0.1165626049041748,
      "step": 10671
    },
    {
      "epoch": 6.5130615234375e-05,
      "step": 10671,
      "training_step_time": 0.3780479431152344
    },
    {
      "epoch": 6.513671875e-05,
      "model_forward_time": 0.1170041561126709,
      "step": 10672
    },
    {
      "epoch": 6.513671875e-05,
      "step": 10672,
      "training_step_time": 0.4001343250274658
    },
    {
      "epoch": 6.5142822265625e-05,
      "model_forward_time": 0.11648321151733398,
      "step": 10673
    },
    {
      "epoch": 6.5142822265625e-05,
      "step": 10673,
      "training_step_time": 0.40792059898376465
    },
    {
      "epoch": 6.514892578125e-05,
      "model_forward_time": 0.11545324325561523,
      "step": 10674
    },
    {
      "epoch": 6.514892578125e-05,
      "step": 10674,
      "training_step_time": 0.40924882888793945
    },
    {
      "epoch": 6.5155029296875e-05,
      "model_forward_time": 0.11561274528503418,
      "step": 10675
    },
    {
      "epoch": 6.5155029296875e-05,
      "step": 10675,
      "training_step_time": 0.756899356842041
    },
    {
      "epoch": 6.51611328125e-05,
      "model_forward_time": 0.11479043960571289,
      "step": 10676
    },
    {
      "epoch": 6.51611328125e-05,
      "step": 10676,
      "training_step_time": 0.38567233085632324
    },
    {
      "epoch": 6.5167236328125e-05,
      "model_forward_time": 0.11449241638183594,
      "step": 10677
    },
    {
      "epoch": 6.5167236328125e-05,
      "step": 10677,
      "training_step_time": 0.38535642623901367
    },
    {
      "epoch": 6.517333984375e-05,
      "model_forward_time": 0.11557888984680176,
      "step": 10678
    },
    {
      "epoch": 6.517333984375e-05,
      "step": 10678,
      "training_step_time": 0.3813803195953369
    },
    {
      "epoch": 6.5179443359375e-05,
      "model_forward_time": 0.11554312705993652,
      "step": 10679
    },
    {
      "epoch": 6.5179443359375e-05,
      "step": 10679,
      "training_step_time": 0.3736417293548584
    },
    {
      "epoch": 6.5185546875e-05,
      "grad_norm": 0.23788195848464966,
      "learning_rate": 9.558715892073323e-05,
      "loss": 0.0633,
      "step": 10680
    },
    {
      "epoch": 6.5185546875e-05,
      "model_forward_time": 0.11461496353149414,
      "step": 10680
    },
    {
      "epoch": 6.5185546875e-05,
      "step": 10680,
      "training_step_time": 0.413165807723999
    },
    {
      "epoch": 6.5191650390625e-05,
      "model_forward_time": 0.11545586585998535,
      "step": 10681
    },
    {
      "epoch": 6.5191650390625e-05,
      "step": 10681,
      "training_step_time": 1.1383368968963623
    },
    {
      "epoch": 6.519775390625e-05,
      "model_forward_time": 0.11620068550109863,
      "step": 10682
    },
    {
      "epoch": 6.519775390625e-05,
      "step": 10682,
      "training_step_time": 0.3744223117828369
    },
    {
      "epoch": 6.5203857421875e-05,
      "model_forward_time": 0.11622452735900879,
      "step": 10683
    },
    {
      "epoch": 6.5203857421875e-05,
      "step": 10683,
      "training_step_time": 0.3729586601257324
    },
    {
      "epoch": 6.52099609375e-05,
      "model_forward_time": 0.11598682403564453,
      "step": 10684
    },
    {
      "epoch": 6.52099609375e-05,
      "step": 10684,
      "training_step_time": 0.37590742111206055
    },
    {
      "epoch": 6.5216064453125e-05,
      "model_forward_time": 0.1163630485534668,
      "step": 10685
    },
    {
      "epoch": 6.5216064453125e-05,
      "step": 10685,
      "training_step_time": 0.3676161766052246
    },
    {
      "epoch": 6.522216796875e-05,
      "model_forward_time": 0.11719799041748047,
      "step": 10686
    },
    {
      "epoch": 6.522216796875e-05,
      "step": 10686,
      "training_step_time": 0.4136695861816406
    },
    {
      "epoch": 6.5228271484375e-05,
      "model_forward_time": 0.12210369110107422,
      "step": 10687
    },
    {
      "epoch": 6.5228271484375e-05,
      "step": 10687,
      "training_step_time": 0.9791531562805176
    },
    {
      "epoch": 6.5234375e-05,
      "model_forward_time": 0.11393475532531738,
      "step": 10688
    },
    {
      "epoch": 6.5234375e-05,
      "step": 10688,
      "training_step_time": 0.37543320655822754
    },
    {
      "epoch": 6.5240478515625e-05,
      "model_forward_time": 0.11733078956604004,
      "step": 10689
    },
    {
      "epoch": 6.5240478515625e-05,
      "step": 10689,
      "training_step_time": 0.3723287582397461
    },
    {
      "epoch": 6.524658203125e-05,
      "grad_norm": 0.30572113394737244,
      "learning_rate": 9.557583232275303e-05,
      "loss": 0.0608,
      "step": 10690
    },
    {
      "epoch": 6.524658203125e-05,
      "model_forward_time": 0.11564064025878906,
      "step": 10690
    },
    {
      "epoch": 6.524658203125e-05,
      "step": 10690,
      "training_step_time": 0.3796088695526123
    },
    {
      "epoch": 6.5252685546875e-05,
      "model_forward_time": 0.11450791358947754,
      "step": 10691
    },
    {
      "epoch": 6.5252685546875e-05,
      "step": 10691,
      "training_step_time": 0.3859748840332031
    },
    {
      "epoch": 6.52587890625e-05,
      "model_forward_time": 0.11453390121459961,
      "step": 10692
    },
    {
      "epoch": 6.52587890625e-05,
      "step": 10692,
      "training_step_time": 0.367539644241333
    },
    {
      "epoch": 6.5264892578125e-05,
      "model_forward_time": 0.11554384231567383,
      "step": 10693
    },
    {
      "epoch": 6.5264892578125e-05,
      "step": 10693,
      "training_step_time": 1.2334775924682617
    },
    {
      "epoch": 6.527099609375e-05,
      "model_forward_time": 0.11440134048461914,
      "step": 10694
    },
    {
      "epoch": 6.527099609375e-05,
      "step": 10694,
      "training_step_time": 0.374924898147583
    },
    {
      "epoch": 6.5277099609375e-05,
      "model_forward_time": 0.11510324478149414,
      "step": 10695
    },
    {
      "epoch": 6.5277099609375e-05,
      "step": 10695,
      "training_step_time": 0.3804299831390381
    },
    {
      "epoch": 6.5283203125e-05,
      "model_forward_time": 0.11405539512634277,
      "step": 10696
    },
    {
      "epoch": 6.5283203125e-05,
      "step": 10696,
      "training_step_time": 0.38935160636901855
    },
    {
      "epoch": 6.5289306640625e-05,
      "model_forward_time": 0.11414766311645508,
      "step": 10697
    },
    {
      "epoch": 6.5289306640625e-05,
      "step": 10697,
      "training_step_time": 0.42264676094055176
    },
    {
      "epoch": 6.529541015625e-05,
      "model_forward_time": 0.11487865447998047,
      "step": 10698
    },
    {
      "epoch": 6.529541015625e-05,
      "step": 10698,
      "training_step_time": 0.4252181053161621
    },
    {
      "epoch": 6.5301513671875e-05,
      "model_forward_time": 0.11545634269714355,
      "step": 10699
    },
    {
      "epoch": 6.5301513671875e-05,
      "step": 10699,
      "training_step_time": 0.8800094127655029
    },
    {
      "epoch": 6.53076171875e-05,
      "grad_norm": 0.19549845159053802,
      "learning_rate": 9.556449188003831e-05,
      "loss": 0.0614,
      "step": 10700
    },
    {
      "epoch": 6.53076171875e-05,
      "model_forward_time": 0.11655664443969727,
      "step": 10700
    },
    {
      "epoch": 6.53076171875e-05,
      "step": 10700,
      "training_step_time": 0.3773953914642334
    },
    {
      "epoch": 6.5313720703125e-05,
      "model_forward_time": 0.1164543628692627,
      "step": 10701
    },
    {
      "epoch": 6.5313720703125e-05,
      "step": 10701,
      "training_step_time": 0.38016176223754883
    },
    {
      "epoch": 6.531982421875e-05,
      "model_forward_time": 0.11732196807861328,
      "step": 10702
    },
    {
      "epoch": 6.531982421875e-05,
      "step": 10702,
      "training_step_time": 0.37018585205078125
    },
    {
      "epoch": 6.5325927734375e-05,
      "model_forward_time": 0.11764287948608398,
      "step": 10703
    },
    {
      "epoch": 6.5325927734375e-05,
      "step": 10703,
      "training_step_time": 0.3860185146331787
    },
    {
      "epoch": 6.533203125e-05,
      "model_forward_time": 0.11706066131591797,
      "step": 10704
    },
    {
      "epoch": 6.533203125e-05,
      "step": 10704,
      "training_step_time": 0.3758711814880371
    },
    {
      "epoch": 6.5338134765625e-05,
      "model_forward_time": 0.11640620231628418,
      "step": 10705
    },
    {
      "epoch": 6.5338134765625e-05,
      "step": 10705,
      "training_step_time": 1.0417401790618896
    },
    {
      "epoch": 6.534423828125e-05,
      "model_forward_time": 0.11600232124328613,
      "step": 10706
    },
    {
      "epoch": 6.534423828125e-05,
      "step": 10706,
      "training_step_time": 0.3974587917327881
    },
    {
      "epoch": 6.5350341796875e-05,
      "model_forward_time": 0.11648869514465332,
      "step": 10707
    },
    {
      "epoch": 6.5350341796875e-05,
      "step": 10707,
      "training_step_time": 0.3673391342163086
    },
    {
      "epoch": 6.53564453125e-05,
      "model_forward_time": 0.11734247207641602,
      "step": 10708
    },
    {
      "epoch": 6.53564453125e-05,
      "step": 10708,
      "training_step_time": 0.37027955055236816
    },
    {
      "epoch": 6.5362548828125e-05,
      "model_forward_time": 0.11506438255310059,
      "step": 10709
    },
    {
      "epoch": 6.5362548828125e-05,
      "step": 10709,
      "training_step_time": 0.40627002716064453
    },
    {
      "epoch": 6.536865234375e-05,
      "grad_norm": 0.27272170782089233,
      "learning_rate": 9.555313759603402e-05,
      "loss": 0.0679,
      "step": 10710
    },
    {
      "epoch": 6.536865234375e-05,
      "model_forward_time": 0.11485743522644043,
      "step": 10710
    },
    {
      "epoch": 6.536865234375e-05,
      "step": 10710,
      "training_step_time": 0.43223142623901367
    },
    {
      "epoch": 6.5374755859375e-05,
      "model_forward_time": 0.11516261100769043,
      "step": 10711
    },
    {
      "epoch": 6.5374755859375e-05,
      "step": 10711,
      "training_step_time": 0.6916437149047852
    },
    {
      "epoch": 6.5380859375e-05,
      "model_forward_time": 0.11525130271911621,
      "step": 10712
    },
    {
      "epoch": 6.5380859375e-05,
      "step": 10712,
      "training_step_time": 0.38524556159973145
    },
    {
      "epoch": 6.5386962890625e-05,
      "model_forward_time": 0.1144554615020752,
      "step": 10713
    },
    {
      "epoch": 6.5386962890625e-05,
      "step": 10713,
      "training_step_time": 0.3959801197052002
    },
    {
      "epoch": 6.539306640625e-05,
      "model_forward_time": 0.11559319496154785,
      "step": 10714
    },
    {
      "epoch": 6.539306640625e-05,
      "step": 10714,
      "training_step_time": 0.38893961906433105
    },
    {
      "epoch": 6.5399169921875e-05,
      "model_forward_time": 0.1151425838470459,
      "step": 10715
    },
    {
      "epoch": 6.5399169921875e-05,
      "step": 10715,
      "training_step_time": 0.38297080993652344
    },
    {
      "epoch": 6.54052734375e-05,
      "model_forward_time": 0.11517500877380371,
      "step": 10716
    },
    {
      "epoch": 6.54052734375e-05,
      "step": 10716,
      "training_step_time": 0.37700462341308594
    },
    {
      "epoch": 6.5411376953125e-05,
      "model_forward_time": 0.11513018608093262,
      "step": 10717
    },
    {
      "epoch": 6.5411376953125e-05,
      "step": 10717,
      "training_step_time": 1.0765020847320557
    },
    {
      "epoch": 6.541748046875e-05,
      "model_forward_time": 0.11509108543395996,
      "step": 10718
    },
    {
      "epoch": 6.541748046875e-05,
      "step": 10718,
      "training_step_time": 0.44672274589538574
    },
    {
      "epoch": 6.5423583984375e-05,
      "model_forward_time": 0.11469292640686035,
      "step": 10719
    },
    {
      "epoch": 6.5423583984375e-05,
      "step": 10719,
      "training_step_time": 0.4112968444824219
    },
    {
      "epoch": 6.54296875e-05,
      "grad_norm": 0.1597692221403122,
      "learning_rate": 9.554176947418931e-05,
      "loss": 0.0562,
      "step": 10720
    },
    {
      "epoch": 6.54296875e-05,
      "model_forward_time": 0.11412715911865234,
      "step": 10720
    },
    {
      "epoch": 6.54296875e-05,
      "step": 10720,
      "training_step_time": 0.3800232410430908
    },
    {
      "epoch": 6.5435791015625e-05,
      "model_forward_time": 0.1141664981842041,
      "step": 10721
    },
    {
      "epoch": 6.5435791015625e-05,
      "step": 10721,
      "training_step_time": 0.38534045219421387
    },
    {
      "epoch": 6.544189453125e-05,
      "model_forward_time": 0.11482119560241699,
      "step": 10722
    },
    {
      "epoch": 6.544189453125e-05,
      "step": 10722,
      "training_step_time": 0.3984377384185791
    },
    {
      "epoch": 6.5447998046875e-05,
      "model_forward_time": 0.11657071113586426,
      "step": 10723
    },
    {
      "epoch": 6.5447998046875e-05,
      "step": 10723,
      "training_step_time": 1.02657151222229
    },
    {
      "epoch": 6.54541015625e-05,
      "model_forward_time": 0.11417341232299805,
      "step": 10724
    },
    {
      "epoch": 6.54541015625e-05,
      "step": 10724,
      "training_step_time": 0.3815271854400635
    },
    {
      "epoch": 6.5460205078125e-05,
      "model_forward_time": 0.11500740051269531,
      "step": 10725
    },
    {
      "epoch": 6.5460205078125e-05,
      "step": 10725,
      "training_step_time": 0.3841664791107178
    },
    {
      "epoch": 6.546630859375e-05,
      "model_forward_time": 0.11424469947814941,
      "step": 10726
    },
    {
      "epoch": 6.546630859375e-05,
      "step": 10726,
      "training_step_time": 0.3795185089111328
    },
    {
      "epoch": 6.5472412109375e-05,
      "model_forward_time": 0.11527895927429199,
      "step": 10727
    },
    {
      "epoch": 6.5472412109375e-05,
      "step": 10727,
      "training_step_time": 0.37724947929382324
    },
    {
      "epoch": 6.5478515625e-05,
      "model_forward_time": 0.11434650421142578,
      "step": 10728
    },
    {
      "epoch": 6.5478515625e-05,
      "step": 10728,
      "training_step_time": 0.3770127296447754
    },
    {
      "epoch": 6.5484619140625e-05,
      "model_forward_time": 0.11568045616149902,
      "step": 10729
    },
    {
      "epoch": 6.5484619140625e-05,
      "step": 10729,
      "training_step_time": 0.8306007385253906
    },
    {
      "epoch": 6.549072265625e-05,
      "grad_norm": 0.20396485924720764,
      "learning_rate": 9.553038751795746e-05,
      "loss": 0.0586,
      "step": 10730
    },
    {
      "epoch": 6.549072265625e-05,
      "model_forward_time": 0.11499953269958496,
      "step": 10730
    },
    {
      "epoch": 6.549072265625e-05,
      "step": 10730,
      "training_step_time": 0.4553673267364502
    },
    {
      "epoch": 6.5496826171875e-05,
      "model_forward_time": 0.11523175239562988,
      "step": 10731
    },
    {
      "epoch": 6.5496826171875e-05,
      "step": 10731,
      "training_step_time": 0.39395999908447266
    },
    {
      "epoch": 6.55029296875e-05,
      "model_forward_time": 0.11495685577392578,
      "step": 10732
    },
    {
      "epoch": 6.55029296875e-05,
      "step": 10732,
      "training_step_time": 0.40079522132873535
    },
    {
      "epoch": 6.5509033203125e-05,
      "model_forward_time": 0.11471796035766602,
      "step": 10733
    },
    {
      "epoch": 6.5509033203125e-05,
      "step": 10733,
      "training_step_time": 0.3786499500274658
    },
    {
      "epoch": 6.551513671875e-05,
      "model_forward_time": 0.11452221870422363,
      "step": 10734
    },
    {
      "epoch": 6.551513671875e-05,
      "step": 10734,
      "training_step_time": 0.4163997173309326
    },
    {
      "epoch": 6.5521240234375e-05,
      "model_forward_time": 0.11516666412353516,
      "step": 10735
    },
    {
      "epoch": 6.5521240234375e-05,
      "step": 10735,
      "training_step_time": 0.8226501941680908
    },
    {
      "epoch": 6.552734375e-05,
      "model_forward_time": 0.11456084251403809,
      "step": 10736
    },
    {
      "epoch": 6.552734375e-05,
      "step": 10736,
      "training_step_time": 0.4033529758453369
    },
    {
      "epoch": 6.5533447265625e-05,
      "model_forward_time": 0.11491107940673828,
      "step": 10737
    },
    {
      "epoch": 6.5533447265625e-05,
      "step": 10737,
      "training_step_time": 0.3876075744628906
    },
    {
      "epoch": 6.553955078125e-05,
      "model_forward_time": 0.11427569389343262,
      "step": 10738
    },
    {
      "epoch": 6.553955078125e-05,
      "step": 10738,
      "training_step_time": 0.3905007839202881
    },
    {
      "epoch": 6.5545654296875e-05,
      "model_forward_time": 0.11438584327697754,
      "step": 10739
    },
    {
      "epoch": 6.5545654296875e-05,
      "step": 10739,
      "training_step_time": 0.38138580322265625
    },
    {
      "epoch": 6.55517578125e-05,
      "grad_norm": 0.19938893616199493,
      "learning_rate": 9.551899173079607e-05,
      "loss": 0.0624,
      "step": 10740
    },
    {
      "epoch": 6.55517578125e-05,
      "model_forward_time": 0.11467838287353516,
      "step": 10740
    },
    {
      "epoch": 6.55517578125e-05,
      "step": 10740,
      "training_step_time": 0.3729865550994873
    },
    {
      "epoch": 6.5557861328125e-05,
      "model_forward_time": 0.11477422714233398,
      "step": 10741
    },
    {
      "epoch": 6.5557861328125e-05,
      "step": 10741,
      "training_step_time": 1.1972911357879639
    },
    {
      "epoch": 6.556396484375e-05,
      "model_forward_time": 0.11455798149108887,
      "step": 10742
    },
    {
      "epoch": 6.556396484375e-05,
      "step": 10742,
      "training_step_time": 0.46763086318969727
    },
    {
      "epoch": 6.5570068359375e-05,
      "model_forward_time": 0.11409187316894531,
      "step": 10743
    },
    {
      "epoch": 6.5570068359375e-05,
      "step": 10743,
      "training_step_time": 0.38597559928894043
    },
    {
      "epoch": 6.5576171875e-05,
      "model_forward_time": 0.11379361152648926,
      "step": 10744
    },
    {
      "epoch": 6.5576171875e-05,
      "step": 10744,
      "training_step_time": 0.4551260471343994
    },
    {
      "epoch": 6.5582275390625e-05,
      "model_forward_time": 0.11542034149169922,
      "step": 10745
    },
    {
      "epoch": 6.5582275390625e-05,
      "step": 10745,
      "training_step_time": 0.38747119903564453
    },
    {
      "epoch": 6.558837890625e-05,
      "model_forward_time": 0.11489486694335938,
      "step": 10746
    },
    {
      "epoch": 6.558837890625e-05,
      "step": 10746,
      "training_step_time": 0.4340956211090088
    },
    {
      "epoch": 6.5594482421875e-05,
      "model_forward_time": 0.1148068904876709,
      "step": 10747
    },
    {
      "epoch": 6.5594482421875e-05,
      "step": 10747,
      "training_step_time": 0.3651413917541504
    },
    {
      "epoch": 6.56005859375e-05,
      "model_forward_time": 0.1147761344909668,
      "step": 10748
    },
    {
      "epoch": 6.56005859375e-05,
      "step": 10748,
      "training_step_time": 0.4268374443054199
    },
    {
      "epoch": 6.5606689453125e-05,
      "model_forward_time": 0.11515140533447266,
      "step": 10749
    },
    {
      "epoch": 6.5606689453125e-05,
      "step": 10749,
      "training_step_time": 0.47779130935668945
    },
    {
      "epoch": 6.561279296875e-05,
      "grad_norm": 0.2534341812133789,
      "learning_rate": 9.550758211616684e-05,
      "loss": 0.0612,
      "step": 10750
    },
    {
      "epoch": 6.561279296875e-05,
      "model_forward_time": 0.1151421070098877,
      "step": 10750
    },
    {
      "epoch": 6.561279296875e-05,
      "step": 10750,
      "training_step_time": 0.3930165767669678
    },
    {
      "epoch": 6.5618896484375e-05,
      "model_forward_time": 0.11534452438354492,
      "step": 10751
    },
    {
      "epoch": 6.5618896484375e-05,
      "step": 10751,
      "training_step_time": 0.3818695545196533
    },
    {
      "epoch": 6.5625e-05,
      "model_forward_time": 0.11508607864379883,
      "step": 10752
    },
    {
      "epoch": 6.5625e-05,
      "step": 10752,
      "training_step_time": 0.4077000617980957
    },
    {
      "epoch": 6.5631103515625e-05,
      "model_forward_time": 0.11517739295959473,
      "step": 10753
    },
    {
      "epoch": 6.5631103515625e-05,
      "step": 10753,
      "training_step_time": 0.3948063850402832
    },
    {
      "epoch": 6.563720703125e-05,
      "model_forward_time": 0.11479568481445312,
      "step": 10754
    },
    {
      "epoch": 6.563720703125e-05,
      "step": 10754,
      "training_step_time": 0.391859769821167
    },
    {
      "epoch": 6.5643310546875e-05,
      "model_forward_time": 0.11516809463500977,
      "step": 10755
    },
    {
      "epoch": 6.5643310546875e-05,
      "step": 10755,
      "training_step_time": 0.39621901512145996
    },
    {
      "epoch": 6.56494140625e-05,
      "model_forward_time": 0.11584997177124023,
      "step": 10756
    },
    {
      "epoch": 6.56494140625e-05,
      "step": 10756,
      "training_step_time": 0.39235353469848633
    },
    {
      "epoch": 6.5655517578125e-05,
      "model_forward_time": 0.11524605751037598,
      "step": 10757
    },
    {
      "epoch": 6.5655517578125e-05,
      "step": 10757,
      "training_step_time": 0.38803625106811523
    },
    {
      "epoch": 6.566162109375e-05,
      "model_forward_time": 0.11808371543884277,
      "step": 10758
    },
    {
      "epoch": 6.566162109375e-05,
      "step": 10758,
      "training_step_time": 0.38710713386535645
    },
    {
      "epoch": 6.5667724609375e-05,
      "model_forward_time": 0.11649537086486816,
      "step": 10759
    },
    {
      "epoch": 6.5667724609375e-05,
      "step": 10759,
      "training_step_time": 1.153498649597168
    },
    {
      "epoch": 6.5673828125e-05,
      "grad_norm": 0.2004890739917755,
      "learning_rate": 9.549615867753573e-05,
      "loss": 0.0649,
      "step": 10760
    },
    {
      "epoch": 6.5673828125e-05,
      "model_forward_time": 0.11432862281799316,
      "step": 10760
    },
    {
      "epoch": 6.5673828125e-05,
      "step": 10760,
      "training_step_time": 0.4426102638244629
    },
    {
      "epoch": 6.5679931640625e-05,
      "model_forward_time": 0.11416459083557129,
      "step": 10761
    },
    {
      "epoch": 6.5679931640625e-05,
      "step": 10761,
      "training_step_time": 0.4297966957092285
    },
    {
      "epoch": 6.568603515625e-05,
      "model_forward_time": 0.11443805694580078,
      "step": 10762
    },
    {
      "epoch": 6.568603515625e-05,
      "step": 10762,
      "training_step_time": 0.45195937156677246
    },
    {
      "epoch": 6.5692138671875e-05,
      "model_forward_time": 0.1141963005065918,
      "step": 10763
    },
    {
      "epoch": 6.5692138671875e-05,
      "step": 10763,
      "training_step_time": 0.3731250762939453
    },
    {
      "epoch": 6.56982421875e-05,
      "model_forward_time": 0.11367940902709961,
      "step": 10764
    },
    {
      "epoch": 6.56982421875e-05,
      "step": 10764,
      "training_step_time": 0.3671681880950928
    },
    {
      "epoch": 6.5704345703125e-05,
      "model_forward_time": 0.11493229866027832,
      "step": 10765
    },
    {
      "epoch": 6.5704345703125e-05,
      "step": 10765,
      "training_step_time": 0.7909619808197021
    },
    {
      "epoch": 6.571044921875e-05,
      "model_forward_time": 0.1148993968963623,
      "step": 10766
    },
    {
      "epoch": 6.571044921875e-05,
      "step": 10766,
      "training_step_time": 0.3855459690093994
    },
    {
      "epoch": 6.5716552734375e-05,
      "model_forward_time": 0.1151888370513916,
      "step": 10767
    },
    {
      "epoch": 6.5716552734375e-05,
      "step": 10767,
      "training_step_time": 0.38007473945617676
    },
    {
      "epoch": 6.572265625e-05,
      "model_forward_time": 0.11501550674438477,
      "step": 10768
    },
    {
      "epoch": 6.572265625e-05,
      "step": 10768,
      "training_step_time": 0.3865222930908203
    },
    {
      "epoch": 6.5728759765625e-05,
      "model_forward_time": 0.11528396606445312,
      "step": 10769
    },
    {
      "epoch": 6.5728759765625e-05,
      "step": 10769,
      "training_step_time": 0.4006693363189697
    },
    {
      "epoch": 6.573486328125e-05,
      "grad_norm": 0.1636994183063507,
      "learning_rate": 9.548472141837286e-05,
      "loss": 0.0618,
      "step": 10770
    },
    {
      "epoch": 6.573486328125e-05,
      "model_forward_time": 0.1150660514831543,
      "step": 10770
    },
    {
      "epoch": 6.573486328125e-05,
      "step": 10770,
      "training_step_time": 0.39118146896362305
    },
    {
      "epoch": 6.5740966796875e-05,
      "model_forward_time": 0.11507058143615723,
      "step": 10771
    },
    {
      "epoch": 6.5740966796875e-05,
      "step": 10771,
      "training_step_time": 0.8163583278656006
    },
    {
      "epoch": 6.57470703125e-05,
      "model_forward_time": 0.11474013328552246,
      "step": 10772
    },
    {
      "epoch": 6.57470703125e-05,
      "step": 10772,
      "training_step_time": 0.45601606369018555
    },
    {
      "epoch": 6.5753173828125e-05,
      "model_forward_time": 0.11458945274353027,
      "step": 10773
    },
    {
      "epoch": 6.5753173828125e-05,
      "step": 10773,
      "training_step_time": 0.3631629943847656
    },
    {
      "epoch": 6.575927734375e-05,
      "model_forward_time": 0.11412978172302246,
      "step": 10774
    },
    {
      "epoch": 6.575927734375e-05,
      "step": 10774,
      "training_step_time": 0.43970584869384766
    },
    {
      "epoch": 6.5765380859375e-05,
      "model_forward_time": 0.11510944366455078,
      "step": 10775
    },
    {
      "epoch": 6.5765380859375e-05,
      "step": 10775,
      "training_step_time": 0.45238637924194336
    },
    {
      "epoch": 6.5771484375e-05,
      "model_forward_time": 0.11487126350402832,
      "step": 10776
    },
    {
      "epoch": 6.5771484375e-05,
      "step": 10776,
      "training_step_time": 0.3841590881347656
    },
    {
      "epoch": 6.5777587890625e-05,
      "model_forward_time": 0.1148688793182373,
      "step": 10777
    },
    {
      "epoch": 6.5777587890625e-05,
      "step": 10777,
      "training_step_time": 0.384296178817749
    },
    {
      "epoch": 6.578369140625e-05,
      "model_forward_time": 0.11521458625793457,
      "step": 10778
    },
    {
      "epoch": 6.578369140625e-05,
      "step": 10778,
      "training_step_time": 0.38320279121398926
    },
    {
      "epoch": 6.5789794921875e-05,
      "model_forward_time": 0.11528325080871582,
      "step": 10779
    },
    {
      "epoch": 6.5789794921875e-05,
      "step": 10779,
      "training_step_time": 0.39199185371398926
    },
    {
      "epoch": 6.57958984375e-05,
      "grad_norm": 0.19070982933044434,
      "learning_rate": 9.54732703421526e-05,
      "loss": 0.0626,
      "step": 10780
    },
    {
      "epoch": 6.57958984375e-05,
      "model_forward_time": 0.11570024490356445,
      "step": 10780
    },
    {
      "epoch": 6.57958984375e-05,
      "step": 10780,
      "training_step_time": 0.4014739990234375
    },
    {
      "epoch": 6.5802001953125e-05,
      "model_forward_time": 0.11548995971679688,
      "step": 10781
    },
    {
      "epoch": 6.5802001953125e-05,
      "step": 10781,
      "training_step_time": 0.3905515670776367
    },
    {
      "epoch": 6.580810546875e-05,
      "model_forward_time": 0.11571598052978516,
      "step": 10782
    },
    {
      "epoch": 6.580810546875e-05,
      "step": 10782,
      "training_step_time": 0.3773174285888672
    },
    {
      "epoch": 6.5814208984375e-05,
      "model_forward_time": 0.11590290069580078,
      "step": 10783
    },
    {
      "epoch": 6.5814208984375e-05,
      "step": 10783,
      "training_step_time": 0.9243557453155518
    },
    {
      "epoch": 6.58203125e-05,
      "model_forward_time": 0.11520218849182129,
      "step": 10784
    },
    {
      "epoch": 6.58203125e-05,
      "step": 10784,
      "training_step_time": 0.3967771530151367
    },
    {
      "epoch": 6.5826416015625e-05,
      "model_forward_time": 0.11499762535095215,
      "step": 10785
    },
    {
      "epoch": 6.5826416015625e-05,
      "step": 10785,
      "training_step_time": 0.45006680488586426
    },
    {
      "epoch": 6.583251953125e-05,
      "model_forward_time": 0.11465239524841309,
      "step": 10786
    },
    {
      "epoch": 6.583251953125e-05,
      "step": 10786,
      "training_step_time": 0.401275634765625
    },
    {
      "epoch": 6.5838623046875e-05,
      "model_forward_time": 0.11549091339111328,
      "step": 10787
    },
    {
      "epoch": 6.5838623046875e-05,
      "step": 10787,
      "training_step_time": 0.451068639755249
    },
    {
      "epoch": 6.58447265625e-05,
      "model_forward_time": 0.11591935157775879,
      "step": 10788
    },
    {
      "epoch": 6.58447265625e-05,
      "step": 10788,
      "training_step_time": 0.42090725898742676
    },
    {
      "epoch": 6.5850830078125e-05,
      "model_forward_time": 0.11533713340759277,
      "step": 10789
    },
    {
      "epoch": 6.5850830078125e-05,
      "step": 10789,
      "training_step_time": 0.5781240463256836
    },
    {
      "epoch": 6.585693359375e-05,
      "grad_norm": 0.2046118527650833,
      "learning_rate": 9.546180545235344e-05,
      "loss": 0.0616,
      "step": 10790
    },
    {
      "epoch": 6.585693359375e-05,
      "model_forward_time": 0.11484670639038086,
      "step": 10790
    },
    {
      "epoch": 6.585693359375e-05,
      "step": 10790,
      "training_step_time": 0.3838179111480713
    },
    {
      "epoch": 6.5863037109375e-05,
      "model_forward_time": 0.11467719078063965,
      "step": 10791
    },
    {
      "epoch": 6.5863037109375e-05,
      "step": 10791,
      "training_step_time": 0.3913607597351074
    },
    {
      "epoch": 6.5869140625e-05,
      "model_forward_time": 0.1177055835723877,
      "step": 10792
    },
    {
      "epoch": 6.5869140625e-05,
      "step": 10792,
      "training_step_time": 0.38593196868896484
    },
    {
      "epoch": 6.5875244140625e-05,
      "model_forward_time": 0.11784505844116211,
      "step": 10793
    },
    {
      "epoch": 6.5875244140625e-05,
      "step": 10793,
      "training_step_time": 0.3840217590332031
    },
    {
      "epoch": 6.588134765625e-05,
      "model_forward_time": 0.11919116973876953,
      "step": 10794
    },
    {
      "epoch": 6.588134765625e-05,
      "step": 10794,
      "training_step_time": 0.38489747047424316
    },
    {
      "epoch": 6.5887451171875e-05,
      "model_forward_time": 0.11748218536376953,
      "step": 10795
    },
    {
      "epoch": 6.5887451171875e-05,
      "step": 10795,
      "training_step_time": 1.2105967998504639
    },
    {
      "epoch": 6.58935546875e-05,
      "model_forward_time": 0.1166982650756836,
      "step": 10796
    },
    {
      "epoch": 6.58935546875e-05,
      "step": 10796,
      "training_step_time": 0.399094820022583
    },
    {
      "epoch": 6.5899658203125e-05,
      "model_forward_time": 0.11626410484313965,
      "step": 10797
    },
    {
      "epoch": 6.5899658203125e-05,
      "step": 10797,
      "training_step_time": 0.4170827865600586
    },
    {
      "epoch": 6.590576171875e-05,
      "model_forward_time": 0.11454343795776367,
      "step": 10798
    },
    {
      "epoch": 6.590576171875e-05,
      "step": 10798,
      "training_step_time": 0.3917274475097656
    },
    {
      "epoch": 6.5911865234375e-05,
      "model_forward_time": 0.12020993232727051,
      "step": 10799
    },
    {
      "epoch": 6.5911865234375e-05,
      "step": 10799,
      "training_step_time": 0.39775776863098145
    },
    {
      "epoch": 6.591796875e-05,
      "grad_norm": 0.2143387347459793,
      "learning_rate": 9.545032675245813e-05,
      "loss": 0.0602,
      "step": 10800
    },
    {
      "epoch": 6.591796875e-05,
      "model_forward_time": 0.11807131767272949,
      "step": 10800
    },
    {
      "epoch": 6.591796875e-05,
      "step": 10800,
      "training_step_time": 0.3919403553009033
    },
    {
      "epoch": 6.5924072265625e-05,
      "model_forward_time": 0.11717057228088379,
      "step": 10801
    },
    {
      "epoch": 6.5924072265625e-05,
      "step": 10801,
      "training_step_time": 0.5306010246276855
    },
    {
      "epoch": 6.593017578125e-05,
      "model_forward_time": 0.11629772186279297,
      "step": 10802
    },
    {
      "epoch": 6.593017578125e-05,
      "step": 10802,
      "training_step_time": 0.37624335289001465
    },
    {
      "epoch": 6.5936279296875e-05,
      "model_forward_time": 0.11617088317871094,
      "step": 10803
    },
    {
      "epoch": 6.5936279296875e-05,
      "step": 10803,
      "training_step_time": 0.3871126174926758
    },
    {
      "epoch": 6.59423828125e-05,
      "model_forward_time": 0.11581993103027344,
      "step": 10804
    },
    {
      "epoch": 6.59423828125e-05,
      "step": 10804,
      "training_step_time": 0.40554189682006836
    },
    {
      "epoch": 6.5948486328125e-05,
      "model_forward_time": 0.11580109596252441,
      "step": 10805
    },
    {
      "epoch": 6.5948486328125e-05,
      "step": 10805,
      "training_step_time": 0.403972864151001
    },
    {
      "epoch": 6.595458984375e-05,
      "model_forward_time": 0.11625838279724121,
      "step": 10806
    },
    {
      "epoch": 6.595458984375e-05,
      "step": 10806,
      "training_step_time": 0.3894326686859131
    },
    {
      "epoch": 6.5960693359375e-05,
      "model_forward_time": 0.11578178405761719,
      "step": 10807
    },
    {
      "epoch": 6.5960693359375e-05,
      "step": 10807,
      "training_step_time": 1.096451997756958
    },
    {
      "epoch": 6.5966796875e-05,
      "model_forward_time": 0.1149451732635498,
      "step": 10808
    },
    {
      "epoch": 6.5966796875e-05,
      "step": 10808,
      "training_step_time": 0.42992401123046875
    },
    {
      "epoch": 6.5972900390625e-05,
      "model_forward_time": 0.11469125747680664,
      "step": 10809
    },
    {
      "epoch": 6.5972900390625e-05,
      "step": 10809,
      "training_step_time": 0.3853893280029297
    },
    {
      "epoch": 6.597900390625e-05,
      "grad_norm": 0.16920596361160278,
      "learning_rate": 9.543883424595361e-05,
      "loss": 0.0603,
      "step": 10810
    },
    {
      "epoch": 6.597900390625e-05,
      "model_forward_time": 0.11406493186950684,
      "step": 10810
    },
    {
      "epoch": 6.597900390625e-05,
      "step": 10810,
      "training_step_time": 0.4278411865234375
    },
    {
      "epoch": 6.5985107421875e-05,
      "model_forward_time": 0.11475729942321777,
      "step": 10811
    },
    {
      "epoch": 6.5985107421875e-05,
      "step": 10811,
      "training_step_time": 0.3875296115875244
    },
    {
      "epoch": 6.59912109375e-05,
      "model_forward_time": 0.11477541923522949,
      "step": 10812
    },
    {
      "epoch": 6.59912109375e-05,
      "step": 10812,
      "training_step_time": 0.4561493396759033
    },
    {
      "epoch": 6.5997314453125e-05,
      "model_forward_time": 0.11489343643188477,
      "step": 10813
    },
    {
      "epoch": 6.5997314453125e-05,
      "step": 10813,
      "training_step_time": 0.8047792911529541
    },
    {
      "epoch": 6.600341796875e-05,
      "model_forward_time": 0.11507153511047363,
      "step": 10814
    },
    {
      "epoch": 6.600341796875e-05,
      "step": 10814,
      "training_step_time": 0.4105827808380127
    },
    {
      "epoch": 6.6009521484375e-05,
      "model_forward_time": 0.11574172973632812,
      "step": 10815
    },
    {
      "epoch": 6.6009521484375e-05,
      "step": 10815,
      "training_step_time": 0.38144350051879883
    },
    {
      "epoch": 6.6015625e-05,
      "model_forward_time": 0.11662054061889648,
      "step": 10816
    },
    {
      "epoch": 6.6015625e-05,
      "step": 10816,
      "training_step_time": 0.37505054473876953
    },
    {
      "epoch": 6.6021728515625e-05,
      "model_forward_time": 0.11589670181274414,
      "step": 10817
    },
    {
      "epoch": 6.6021728515625e-05,
      "step": 10817,
      "training_step_time": 0.38141965866088867
    },
    {
      "epoch": 6.602783203125e-05,
      "model_forward_time": 0.11774349212646484,
      "step": 10818
    },
    {
      "epoch": 6.602783203125e-05,
      "step": 10818,
      "training_step_time": 0.37505483627319336
    },
    {
      "epoch": 6.6033935546875e-05,
      "model_forward_time": 0.12125158309936523,
      "step": 10819
    },
    {
      "epoch": 6.6033935546875e-05,
      "step": 10819,
      "training_step_time": 0.6655559539794922
    },
    {
      "epoch": 6.60400390625e-05,
      "grad_norm": 0.20239651203155518,
      "learning_rate": 9.542732793633098e-05,
      "loss": 0.0633,
      "step": 10820
    },
    {
      "epoch": 6.60400390625e-05,
      "model_forward_time": 0.11594319343566895,
      "step": 10820
    },
    {
      "epoch": 6.60400390625e-05,
      "step": 10820,
      "training_step_time": 0.4481527805328369
    },
    {
      "epoch": 6.6046142578125e-05,
      "model_forward_time": 0.11826944351196289,
      "step": 10821
    },
    {
      "epoch": 6.6046142578125e-05,
      "step": 10821,
      "training_step_time": 0.4115324020385742
    },
    {
      "epoch": 6.605224609375e-05,
      "model_forward_time": 0.11705923080444336,
      "step": 10822
    },
    {
      "epoch": 6.605224609375e-05,
      "step": 10822,
      "training_step_time": 0.39754700660705566
    },
    {
      "epoch": 6.6058349609375e-05,
      "model_forward_time": 0.1408679485321045,
      "step": 10823
    },
    {
      "epoch": 6.6058349609375e-05,
      "step": 10823,
      "training_step_time": 0.4716060161590576
    },
    {
      "epoch": 6.6064453125e-05,
      "model_forward_time": 0.11675620079040527,
      "step": 10824
    },
    {
      "epoch": 6.6064453125e-05,
      "step": 10824,
      "training_step_time": 0.38869714736938477
    },
    {
      "epoch": 6.6070556640625e-05,
      "model_forward_time": 0.11722207069396973,
      "step": 10825
    },
    {
      "epoch": 6.6070556640625e-05,
      "step": 10825,
      "training_step_time": 0.5884935855865479
    },
    {
      "epoch": 6.607666015625e-05,
      "model_forward_time": 0.1161198616027832,
      "step": 10826
    },
    {
      "epoch": 6.607666015625e-05,
      "step": 10826,
      "training_step_time": 0.43065381050109863
    },
    {
      "epoch": 6.6082763671875e-05,
      "model_forward_time": 0.11531186103820801,
      "step": 10827
    },
    {
      "epoch": 6.6082763671875e-05,
      "step": 10827,
      "training_step_time": 0.3907625675201416
    },
    {
      "epoch": 6.60888671875e-05,
      "model_forward_time": 0.11609268188476562,
      "step": 10828
    },
    {
      "epoch": 6.60888671875e-05,
      "step": 10828,
      "training_step_time": 0.3851487636566162
    },
    {
      "epoch": 6.6094970703125e-05,
      "model_forward_time": 0.11562514305114746,
      "step": 10829
    },
    {
      "epoch": 6.6094970703125e-05,
      "step": 10829,
      "training_step_time": 0.39127206802368164
    },
    {
      "epoch": 6.610107421875e-05,
      "grad_norm": 0.1687299609184265,
      "learning_rate": 9.541580782708557e-05,
      "loss": 0.0662,
      "step": 10830
    },
    {
      "epoch": 6.610107421875e-05,
      "model_forward_time": 0.11593174934387207,
      "step": 10830
    },
    {
      "epoch": 6.610107421875e-05,
      "step": 10830,
      "training_step_time": 0.3763854503631592
    },
    {
      "epoch": 6.6107177734375e-05,
      "model_forward_time": 0.11736178398132324,
      "step": 10831
    },
    {
      "epoch": 6.6107177734375e-05,
      "step": 10831,
      "training_step_time": 0.5700209140777588
    },
    {
      "epoch": 6.611328125e-05,
      "model_forward_time": 0.11605286598205566,
      "step": 10832
    },
    {
      "epoch": 6.611328125e-05,
      "step": 10832,
      "training_step_time": 0.45128369331359863
    },
    {
      "epoch": 6.6119384765625e-05,
      "model_forward_time": 0.11590433120727539,
      "step": 10833
    },
    {
      "epoch": 6.6119384765625e-05,
      "step": 10833,
      "training_step_time": 0.4374854564666748
    },
    {
      "epoch": 6.612548828125e-05,
      "model_forward_time": 0.11579155921936035,
      "step": 10834
    },
    {
      "epoch": 6.612548828125e-05,
      "step": 10834,
      "training_step_time": 0.3938887119293213
    },
    {
      "epoch": 6.6131591796875e-05,
      "model_forward_time": 0.11597347259521484,
      "step": 10835
    },
    {
      "epoch": 6.6131591796875e-05,
      "step": 10835,
      "training_step_time": 0.4164912700653076
    },
    {
      "epoch": 6.61376953125e-05,
      "model_forward_time": 0.1158602237701416,
      "step": 10836
    },
    {
      "epoch": 6.61376953125e-05,
      "step": 10836,
      "training_step_time": 0.4125401973724365
    },
    {
      "epoch": 6.6143798828125e-05,
      "model_forward_time": 0.11698365211486816,
      "step": 10837
    },
    {
      "epoch": 6.6143798828125e-05,
      "step": 10837,
      "training_step_time": 1.0567419528961182
    },
    {
      "epoch": 6.614990234375e-05,
      "model_forward_time": 0.11527824401855469,
      "step": 10838
    },
    {
      "epoch": 6.614990234375e-05,
      "step": 10838,
      "training_step_time": 0.46351003646850586
    },
    {
      "epoch": 6.6156005859375e-05,
      "model_forward_time": 0.11508870124816895,
      "step": 10839
    },
    {
      "epoch": 6.6156005859375e-05,
      "step": 10839,
      "training_step_time": 0.4271824359893799
    },
    {
      "epoch": 6.6162109375e-05,
      "grad_norm": 0.2503608763217926,
      "learning_rate": 9.540427392171688e-05,
      "loss": 0.0629,
      "step": 10840
    },
    {
      "epoch": 6.6162109375e-05,
      "model_forward_time": 0.1152493953704834,
      "step": 10840
    },
    {
      "epoch": 6.6162109375e-05,
      "step": 10840,
      "training_step_time": 0.4444756507873535
    },
    {
      "epoch": 6.6168212890625e-05,
      "model_forward_time": 0.11471152305603027,
      "step": 10841
    },
    {
      "epoch": 6.6168212890625e-05,
      "step": 10841,
      "training_step_time": 0.39336514472961426
    },
    {
      "epoch": 6.617431640625e-05,
      "model_forward_time": 0.11507487297058105,
      "step": 10842
    },
    {
      "epoch": 6.617431640625e-05,
      "step": 10842,
      "training_step_time": 0.3793468475341797
    },
    {
      "epoch": 6.6180419921875e-05,
      "model_forward_time": 0.11692118644714355,
      "step": 10843
    },
    {
      "epoch": 6.6180419921875e-05,
      "step": 10843,
      "training_step_time": 0.644974946975708
    },
    {
      "epoch": 6.61865234375e-05,
      "model_forward_time": 0.11573123931884766,
      "step": 10844
    },
    {
      "epoch": 6.61865234375e-05,
      "step": 10844,
      "training_step_time": 0.4476430416107178
    },
    {
      "epoch": 6.6192626953125e-05,
      "model_forward_time": 0.11563539505004883,
      "step": 10845
    },
    {
      "epoch": 6.6192626953125e-05,
      "step": 10845,
      "training_step_time": 0.42669105529785156
    },
    {
      "epoch": 6.619873046875e-05,
      "model_forward_time": 0.11572265625,
      "step": 10846
    },
    {
      "epoch": 6.619873046875e-05,
      "step": 10846,
      "training_step_time": 0.39368295669555664
    },
    {
      "epoch": 6.6204833984375e-05,
      "model_forward_time": 0.11558246612548828,
      "step": 10847
    },
    {
      "epoch": 6.6204833984375e-05,
      "step": 10847,
      "training_step_time": 0.4000723361968994
    },
    {
      "epoch": 6.62109375e-05,
      "model_forward_time": 0.1151580810546875,
      "step": 10848
    },
    {
      "epoch": 6.62109375e-05,
      "step": 10848,
      "training_step_time": 0.38056492805480957
    },
    {
      "epoch": 6.6217041015625e-05,
      "model_forward_time": 0.11622095108032227,
      "step": 10849
    },
    {
      "epoch": 6.6217041015625e-05,
      "step": 10849,
      "training_step_time": 0.5568616390228271
    },
    {
      "epoch": 6.622314453125e-05,
      "grad_norm": 0.18044009804725647,
      "learning_rate": 9.539272622372858e-05,
      "loss": 0.0597,
      "step": 10850
    },
    {
      "epoch": 6.622314453125e-05,
      "model_forward_time": 0.11681985855102539,
      "step": 10850
    },
    {
      "epoch": 6.622314453125e-05,
      "step": 10850,
      "training_step_time": 0.40448570251464844
    },
    {
      "epoch": 6.6229248046875e-05,
      "model_forward_time": 0.11605620384216309,
      "step": 10851
    },
    {
      "epoch": 6.6229248046875e-05,
      "step": 10851,
      "training_step_time": 0.4651167392730713
    },
    {
      "epoch": 6.62353515625e-05,
      "model_forward_time": 0.11627364158630371,
      "step": 10852
    },
    {
      "epoch": 6.62353515625e-05,
      "step": 10852,
      "training_step_time": 0.39067888259887695
    },
    {
      "epoch": 6.6241455078125e-05,
      "model_forward_time": 0.1157684326171875,
      "step": 10853
    },
    {
      "epoch": 6.6241455078125e-05,
      "step": 10853,
      "training_step_time": 0.4504265785217285
    },
    {
      "epoch": 6.624755859375e-05,
      "model_forward_time": 0.11509060859680176,
      "step": 10854
    },
    {
      "epoch": 6.624755859375e-05,
      "step": 10854,
      "training_step_time": 0.47692251205444336
    },
    {
      "epoch": 6.6253662109375e-05,
      "model_forward_time": 0.11666679382324219,
      "step": 10855
    },
    {
      "epoch": 6.6253662109375e-05,
      "step": 10855,
      "training_step_time": 0.907418966293335
    },
    {
      "epoch": 6.6259765625e-05,
      "model_forward_time": 0.11500930786132812,
      "step": 10856
    },
    {
      "epoch": 6.6259765625e-05,
      "step": 10856,
      "training_step_time": 0.42066264152526855
    },
    {
      "epoch": 6.6265869140625e-05,
      "model_forward_time": 0.11546826362609863,
      "step": 10857
    },
    {
      "epoch": 6.6265869140625e-05,
      "step": 10857,
      "training_step_time": 0.38294029235839844
    },
    {
      "epoch": 6.627197265625e-05,
      "model_forward_time": 0.11552858352661133,
      "step": 10858
    },
    {
      "epoch": 6.627197265625e-05,
      "step": 10858,
      "training_step_time": 0.3802647590637207
    },
    {
      "epoch": 6.6278076171875e-05,
      "model_forward_time": 0.11569595336914062,
      "step": 10859
    },
    {
      "epoch": 6.6278076171875e-05,
      "step": 10859,
      "training_step_time": 0.38149285316467285
    },
    {
      "epoch": 6.62841796875e-05,
      "grad_norm": 0.17051438987255096,
      "learning_rate": 9.538116473662861e-05,
      "loss": 0.057,
      "step": 10860
    },
    {
      "epoch": 6.62841796875e-05,
      "model_forward_time": 0.11562633514404297,
      "step": 10860
    },
    {
      "epoch": 6.62841796875e-05,
      "step": 10860,
      "training_step_time": 0.37856030464172363
    },
    {
      "epoch": 6.6290283203125e-05,
      "model_forward_time": 0.11604928970336914,
      "step": 10861
    },
    {
      "epoch": 6.6290283203125e-05,
      "step": 10861,
      "training_step_time": 0.9133284091949463
    },
    {
      "epoch": 6.629638671875e-05,
      "model_forward_time": 0.11548829078674316,
      "step": 10862
    },
    {
      "epoch": 6.629638671875e-05,
      "step": 10862,
      "training_step_time": 0.4481372833251953
    },
    {
      "epoch": 6.6302490234375e-05,
      "model_forward_time": 0.11544132232666016,
      "step": 10863
    },
    {
      "epoch": 6.6302490234375e-05,
      "step": 10863,
      "training_step_time": 0.39670348167419434
    },
    {
      "epoch": 6.630859375e-05,
      "model_forward_time": 0.11538100242614746,
      "step": 10864
    },
    {
      "epoch": 6.630859375e-05,
      "step": 10864,
      "training_step_time": 0.37256693840026855
    },
    {
      "epoch": 6.6314697265625e-05,
      "model_forward_time": 0.11602115631103516,
      "step": 10865
    },
    {
      "epoch": 6.6314697265625e-05,
      "step": 10865,
      "training_step_time": 0.3827705383300781
    },
    {
      "epoch": 6.632080078125e-05,
      "model_forward_time": 0.11618614196777344,
      "step": 10866
    },
    {
      "epoch": 6.632080078125e-05,
      "step": 10866,
      "training_step_time": 0.40800952911376953
    },
    {
      "epoch": 6.6326904296875e-05,
      "model_forward_time": 0.11585760116577148,
      "step": 10867
    },
    {
      "epoch": 6.6326904296875e-05,
      "step": 10867,
      "training_step_time": 0.47190022468566895
    },
    {
      "epoch": 6.63330078125e-05,
      "model_forward_time": 0.1159970760345459,
      "step": 10868
    },
    {
      "epoch": 6.63330078125e-05,
      "step": 10868,
      "training_step_time": 0.3879551887512207
    },
    {
      "epoch": 6.6339111328125e-05,
      "model_forward_time": 0.11600589752197266,
      "step": 10869
    },
    {
      "epoch": 6.6339111328125e-05,
      "step": 10869,
      "training_step_time": 0.4386250972747803
    },
    {
      "epoch": 6.634521484375e-05,
      "grad_norm": 0.18642370402812958,
      "learning_rate": 9.5369589463929e-05,
      "loss": 0.06,
      "step": 10870
    },
    {
      "epoch": 6.634521484375e-05,
      "model_forward_time": 0.11564850807189941,
      "step": 10870
    },
    {
      "epoch": 6.634521484375e-05,
      "step": 10870,
      "training_step_time": 0.3966045379638672
    },
    {
      "epoch": 6.6351318359375e-05,
      "model_forward_time": 0.1159827709197998,
      "step": 10871
    },
    {
      "epoch": 6.6351318359375e-05,
      "step": 10871,
      "training_step_time": 0.3911583423614502
    },
    {
      "epoch": 6.6357421875e-05,
      "model_forward_time": 0.11619448661804199,
      "step": 10872
    },
    {
      "epoch": 6.6357421875e-05,
      "step": 10872,
      "training_step_time": 0.37772250175476074
    },
    {
      "epoch": 6.6363525390625e-05,
      "model_forward_time": 0.11715245246887207,
      "step": 10873
    },
    {
      "epoch": 6.6363525390625e-05,
      "step": 10873,
      "training_step_time": 0.9447598457336426
    },
    {
      "epoch": 6.636962890625e-05,
      "model_forward_time": 0.11636161804199219,
      "step": 10874
    },
    {
      "epoch": 6.636962890625e-05,
      "step": 10874,
      "training_step_time": 0.4254150390625
    },
    {
      "epoch": 6.6375732421875e-05,
      "model_forward_time": 0.11493039131164551,
      "step": 10875
    },
    {
      "epoch": 6.6375732421875e-05,
      "step": 10875,
      "training_step_time": 0.4409365653991699
    },
    {
      "epoch": 6.63818359375e-05,
      "model_forward_time": 0.11602449417114258,
      "step": 10876
    },
    {
      "epoch": 6.63818359375e-05,
      "step": 10876,
      "training_step_time": 0.40868330001831055
    },
    {
      "epoch": 6.6387939453125e-05,
      "model_forward_time": 0.11498832702636719,
      "step": 10877
    },
    {
      "epoch": 6.6387939453125e-05,
      "step": 10877,
      "training_step_time": 0.40195322036743164
    },
    {
      "epoch": 6.639404296875e-05,
      "model_forward_time": 0.11540555953979492,
      "step": 10878
    },
    {
      "epoch": 6.639404296875e-05,
      "step": 10878,
      "training_step_time": 0.3641195297241211
    },
    {
      "epoch": 6.6400146484375e-05,
      "model_forward_time": 0.11609864234924316,
      "step": 10879
    },
    {
      "epoch": 6.6400146484375e-05,
      "step": 10879,
      "training_step_time": 0.46068525314331055
    },
    {
      "epoch": 6.640625e-05,
      "grad_norm": 0.17006278038024902,
      "learning_rate": 9.535800040914601e-05,
      "loss": 0.059,
      "step": 10880
    },
    {
      "epoch": 6.640625e-05,
      "model_forward_time": 0.1155855655670166,
      "step": 10880
    },
    {
      "epoch": 6.640625e-05,
      "step": 10880,
      "training_step_time": 0.4336585998535156
    },
    {
      "epoch": 6.6412353515625e-05,
      "model_forward_time": 0.11626124382019043,
      "step": 10881
    },
    {
      "epoch": 6.6412353515625e-05,
      "step": 10881,
      "training_step_time": 0.41205263137817383
    },
    {
      "epoch": 6.641845703125e-05,
      "model_forward_time": 0.11528325080871582,
      "step": 10882
    },
    {
      "epoch": 6.641845703125e-05,
      "step": 10882,
      "training_step_time": 0.3981132507324219
    },
    {
      "epoch": 6.6424560546875e-05,
      "model_forward_time": 0.11635088920593262,
      "step": 10883
    },
    {
      "epoch": 6.6424560546875e-05,
      "step": 10883,
      "training_step_time": 0.39894890785217285
    },
    {
      "epoch": 6.64306640625e-05,
      "model_forward_time": 0.11575746536254883,
      "step": 10884
    },
    {
      "epoch": 6.64306640625e-05,
      "step": 10884,
      "training_step_time": 0.38206911087036133
    },
    {
      "epoch": 6.6436767578125e-05,
      "model_forward_time": 0.11664795875549316,
      "step": 10885
    },
    {
      "epoch": 6.6436767578125e-05,
      "step": 10885,
      "training_step_time": 1.0436646938323975
    },
    {
      "epoch": 6.644287109375e-05,
      "model_forward_time": 0.11479544639587402,
      "step": 10886
    },
    {
      "epoch": 6.644287109375e-05,
      "step": 10886,
      "training_step_time": 0.3819005489349365
    },
    {
      "epoch": 6.6448974609375e-05,
      "model_forward_time": 0.11584854125976562,
      "step": 10887
    },
    {
      "epoch": 6.6448974609375e-05,
      "step": 10887,
      "training_step_time": 0.4140324592590332
    },
    {
      "epoch": 6.6455078125e-05,
      "model_forward_time": 0.1151590347290039,
      "step": 10888
    },
    {
      "epoch": 6.6455078125e-05,
      "step": 10888,
      "training_step_time": 0.4894082546234131
    },
    {
      "epoch": 6.6461181640625e-05,
      "model_forward_time": 0.11491870880126953,
      "step": 10889
    },
    {
      "epoch": 6.6461181640625e-05,
      "step": 10889,
      "training_step_time": 0.44985508918762207
    },
    {
      "epoch": 6.646728515625e-05,
      "grad_norm": 0.172703817486763,
      "learning_rate": 9.534639757580013e-05,
      "loss": 0.0594,
      "step": 10890
    },
    {
      "epoch": 6.646728515625e-05,
      "model_forward_time": 0.11588764190673828,
      "step": 10890
    },
    {
      "epoch": 6.646728515625e-05,
      "step": 10890,
      "training_step_time": 0.37073302268981934
    },
    {
      "epoch": 6.6473388671875e-05,
      "model_forward_time": 0.11536169052124023,
      "step": 10891
    },
    {
      "epoch": 6.6473388671875e-05,
      "step": 10891,
      "training_step_time": 0.7627749443054199
    },
    {
      "epoch": 6.64794921875e-05,
      "model_forward_time": 0.11490345001220703,
      "step": 10892
    },
    {
      "epoch": 6.64794921875e-05,
      "step": 10892,
      "training_step_time": 0.4474148750305176
    },
    {
      "epoch": 6.6485595703125e-05,
      "model_forward_time": 0.11470770835876465,
      "step": 10893
    },
    {
      "epoch": 6.6485595703125e-05,
      "step": 10893,
      "training_step_time": 0.42523908615112305
    },
    {
      "epoch": 6.649169921875e-05,
      "model_forward_time": 0.11475825309753418,
      "step": 10894
    },
    {
      "epoch": 6.649169921875e-05,
      "step": 10894,
      "training_step_time": 0.3818671703338623
    },
    {
      "epoch": 6.6497802734375e-05,
      "model_forward_time": 0.11460685729980469,
      "step": 10895
    },
    {
      "epoch": 6.6497802734375e-05,
      "step": 10895,
      "training_step_time": 0.38133716583251953
    },
    {
      "epoch": 6.650390625e-05,
      "model_forward_time": 0.11568522453308105,
      "step": 10896
    },
    {
      "epoch": 6.650390625e-05,
      "step": 10896,
      "training_step_time": 0.3689916133880615
    },
    {
      "epoch": 6.6510009765625e-05,
      "model_forward_time": 0.11676335334777832,
      "step": 10897
    },
    {
      "epoch": 6.6510009765625e-05,
      "step": 10897,
      "training_step_time": 0.40543341636657715
    },
    {
      "epoch": 6.651611328125e-05,
      "model_forward_time": 0.11610865592956543,
      "step": 10898
    },
    {
      "epoch": 6.651611328125e-05,
      "step": 10898,
      "training_step_time": 0.38836121559143066
    },
    {
      "epoch": 6.6522216796875e-05,
      "model_forward_time": 0.11606431007385254,
      "step": 10899
    },
    {
      "epoch": 6.6522216796875e-05,
      "step": 10899,
      "training_step_time": 0.3862760066986084
    },
    {
      "epoch": 6.65283203125e-05,
      "grad_norm": 0.24901214241981506,
      "learning_rate": 9.533478096741597e-05,
      "loss": 0.0598,
      "step": 10900
    },
    {
      "epoch": 6.65283203125e-05,
      "model_forward_time": 0.11661791801452637,
      "step": 10900
    },
    {
      "epoch": 6.65283203125e-05,
      "step": 10900,
      "training_step_time": 0.4382972717285156
    },
    {
      "epoch": 6.6534423828125e-05,
      "model_forward_time": 0.11695528030395508,
      "step": 10901
    },
    {
      "epoch": 6.6534423828125e-05,
      "step": 10901,
      "training_step_time": 0.46011900901794434
    },
    {
      "epoch": 6.654052734375e-05,
      "model_forward_time": 0.11585688591003418,
      "step": 10902
    },
    {
      "epoch": 6.654052734375e-05,
      "step": 10902,
      "training_step_time": 0.3905978202819824
    },
    {
      "epoch": 6.6546630859375e-05,
      "model_forward_time": 0.11588668823242188,
      "step": 10903
    },
    {
      "epoch": 6.6546630859375e-05,
      "step": 10903,
      "training_step_time": 0.5742242336273193
    },
    {
      "epoch": 6.6552734375e-05,
      "model_forward_time": 0.11627435684204102,
      "step": 10904
    },
    {
      "epoch": 6.6552734375e-05,
      "step": 10904,
      "training_step_time": 0.38839173316955566
    },
    {
      "epoch": 6.6558837890625e-05,
      "model_forward_time": 0.11614394187927246,
      "step": 10905
    },
    {
      "epoch": 6.6558837890625e-05,
      "step": 10905,
      "training_step_time": 0.3938727378845215
    },
    {
      "epoch": 6.656494140625e-05,
      "model_forward_time": 0.11611008644104004,
      "step": 10906
    },
    {
      "epoch": 6.656494140625e-05,
      "step": 10906,
      "training_step_time": 0.4302964210510254
    },
    {
      "epoch": 6.6571044921875e-05,
      "model_forward_time": 0.11639142036437988,
      "step": 10907
    },
    {
      "epoch": 6.6571044921875e-05,
      "step": 10907,
      "training_step_time": 0.4768960475921631
    },
    {
      "epoch": 6.65771484375e-05,
      "model_forward_time": 0.11560463905334473,
      "step": 10908
    },
    {
      "epoch": 6.65771484375e-05,
      "step": 10908,
      "training_step_time": 0.38054490089416504
    },
    {
      "epoch": 6.6583251953125e-05,
      "model_forward_time": 0.11633157730102539,
      "step": 10909
    },
    {
      "epoch": 6.6583251953125e-05,
      "step": 10909,
      "training_step_time": 0.47898197174072266
    },
    {
      "epoch": 6.658935546875e-05,
      "grad_norm": 0.21273843944072723,
      "learning_rate": 9.532315058752232e-05,
      "loss": 0.061,
      "step": 10910
    },
    {
      "epoch": 6.658935546875e-05,
      "model_forward_time": 0.11597895622253418,
      "step": 10910
    },
    {
      "epoch": 6.658935546875e-05,
      "step": 10910,
      "training_step_time": 0.39716005325317383
    },
    {
      "epoch": 6.6595458984375e-05,
      "model_forward_time": 0.11561059951782227,
      "step": 10911
    },
    {
      "epoch": 6.6595458984375e-05,
      "step": 10911,
      "training_step_time": 0.4030427932739258
    },
    {
      "epoch": 6.66015625e-05,
      "model_forward_time": 0.1161794662475586,
      "step": 10912
    },
    {
      "epoch": 6.66015625e-05,
      "step": 10912,
      "training_step_time": 0.3981168270111084
    },
    {
      "epoch": 6.6607666015625e-05,
      "model_forward_time": 0.11562609672546387,
      "step": 10913
    },
    {
      "epoch": 6.6607666015625e-05,
      "step": 10913,
      "training_step_time": 0.3919820785522461
    },
    {
      "epoch": 6.661376953125e-05,
      "model_forward_time": 0.11843037605285645,
      "step": 10914
    },
    {
      "epoch": 6.661376953125e-05,
      "step": 10914,
      "training_step_time": 0.3833582401275635
    },
    {
      "epoch": 6.6619873046875e-05,
      "model_forward_time": 0.11725950241088867,
      "step": 10915
    },
    {
      "epoch": 6.6619873046875e-05,
      "step": 10915,
      "training_step_time": 0.8134160041809082
    },
    {
      "epoch": 6.66259765625e-05,
      "model_forward_time": 0.11659431457519531,
      "step": 10916
    },
    {
      "epoch": 6.66259765625e-05,
      "step": 10916,
      "training_step_time": 0.4190328121185303
    },
    {
      "epoch": 6.6632080078125e-05,
      "model_forward_time": 0.11661863327026367,
      "step": 10917
    },
    {
      "epoch": 6.6632080078125e-05,
      "step": 10917,
      "training_step_time": 0.37692904472351074
    },
    {
      "epoch": 6.663818359375e-05,
      "model_forward_time": 0.1168818473815918,
      "step": 10918
    },
    {
      "epoch": 6.663818359375e-05,
      "step": 10918,
      "training_step_time": 0.37782907485961914
    },
    {
      "epoch": 6.6644287109375e-05,
      "model_forward_time": 0.11677718162536621,
      "step": 10919
    },
    {
      "epoch": 6.6644287109375e-05,
      "step": 10919,
      "training_step_time": 0.41683101654052734
    },
    {
      "epoch": 6.6650390625e-05,
      "grad_norm": 0.22784480452537537,
      "learning_rate": 9.531150643965223e-05,
      "loss": 0.0558,
      "step": 10920
    },
    {
      "epoch": 6.6650390625e-05,
      "model_forward_time": 0.11548876762390137,
      "step": 10920
    },
    {
      "epoch": 6.6650390625e-05,
      "step": 10920,
      "training_step_time": 0.426053524017334
    },
    {
      "epoch": 6.6656494140625e-05,
      "model_forward_time": 0.11785173416137695,
      "step": 10921
    },
    {
      "epoch": 6.6656494140625e-05,
      "step": 10921,
      "training_step_time": 0.7139923572540283
    },
    {
      "epoch": 6.666259765625e-05,
      "model_forward_time": 0.1151280403137207,
      "step": 10922
    },
    {
      "epoch": 6.666259765625e-05,
      "step": 10922,
      "training_step_time": 0.3835155963897705
    },
    {
      "epoch": 6.6668701171875e-05,
      "model_forward_time": 0.1152644157409668,
      "step": 10923
    },
    {
      "epoch": 6.6668701171875e-05,
      "step": 10923,
      "training_step_time": 0.3793797492980957
    },
    {
      "epoch": 6.66748046875e-05,
      "model_forward_time": 0.11506319046020508,
      "step": 10924
    },
    {
      "epoch": 6.66748046875e-05,
      "step": 10924,
      "training_step_time": 0.3958775997161865
    },
    {
      "epoch": 6.6680908203125e-05,
      "model_forward_time": 0.11484742164611816,
      "step": 10925
    },
    {
      "epoch": 6.6680908203125e-05,
      "step": 10925,
      "training_step_time": 0.39049720764160156
    },
    {
      "epoch": 6.668701171875e-05,
      "model_forward_time": 0.11533546447753906,
      "step": 10926
    },
    {
      "epoch": 6.668701171875e-05,
      "step": 10926,
      "training_step_time": 0.3799729347229004
    },
    {
      "epoch": 6.6693115234375e-05,
      "model_forward_time": 0.11627078056335449,
      "step": 10927
    },
    {
      "epoch": 6.6693115234375e-05,
      "step": 10927,
      "training_step_time": 0.9211256504058838
    },
    {
      "epoch": 6.669921875e-05,
      "model_forward_time": 0.11554932594299316,
      "step": 10928
    },
    {
      "epoch": 6.669921875e-05,
      "step": 10928,
      "training_step_time": 0.4243016242980957
    },
    {
      "epoch": 6.6705322265625e-05,
      "model_forward_time": 0.11737585067749023,
      "step": 10929
    },
    {
      "epoch": 6.6705322265625e-05,
      "step": 10929,
      "training_step_time": 0.47319507598876953
    },
    {
      "epoch": 6.671142578125e-05,
      "grad_norm": 0.2022397667169571,
      "learning_rate": 9.529984852734285e-05,
      "loss": 0.0629,
      "step": 10930
    },
    {
      "epoch": 6.671142578125e-05,
      "model_forward_time": 0.1167755126953125,
      "step": 10930
    },
    {
      "epoch": 6.671142578125e-05,
      "step": 10930,
      "training_step_time": 0.38069868087768555
    },
    {
      "epoch": 6.6717529296875e-05,
      "model_forward_time": 0.1163473129272461,
      "step": 10931
    },
    {
      "epoch": 6.6717529296875e-05,
      "step": 10931,
      "training_step_time": 0.37552881240844727
    },
    {
      "epoch": 6.67236328125e-05,
      "model_forward_time": 0.11638474464416504,
      "step": 10932
    },
    {
      "epoch": 6.67236328125e-05,
      "step": 10932,
      "training_step_time": 0.4558887481689453
    },
    {
      "epoch": 6.6729736328125e-05,
      "model_forward_time": 0.11757206916809082,
      "step": 10933
    },
    {
      "epoch": 6.6729736328125e-05,
      "step": 10933,
      "training_step_time": 0.4936201572418213
    },
    {
      "epoch": 6.673583984375e-05,
      "model_forward_time": 0.11811423301696777,
      "step": 10934
    },
    {
      "epoch": 6.673583984375e-05,
      "step": 10934,
      "training_step_time": 0.40028834342956543
    },
    {
      "epoch": 6.6741943359375e-05,
      "model_forward_time": 0.11722207069396973,
      "step": 10935
    },
    {
      "epoch": 6.6741943359375e-05,
      "step": 10935,
      "training_step_time": 0.393268346786499
    },
    {
      "epoch": 6.6748046875e-05,
      "model_forward_time": 0.11723971366882324,
      "step": 10936
    },
    {
      "epoch": 6.6748046875e-05,
      "step": 10936,
      "training_step_time": 0.38208794593811035
    },
    {
      "epoch": 6.6754150390625e-05,
      "model_forward_time": 0.1169431209564209,
      "step": 10937
    },
    {
      "epoch": 6.6754150390625e-05,
      "step": 10937,
      "training_step_time": 0.3971443176269531
    },
    {
      "epoch": 6.676025390625e-05,
      "model_forward_time": 0.11626195907592773,
      "step": 10938
    },
    {
      "epoch": 6.676025390625e-05,
      "step": 10938,
      "training_step_time": 0.3874845504760742
    },
    {
      "epoch": 6.6766357421875e-05,
      "model_forward_time": 0.11582088470458984,
      "step": 10939
    },
    {
      "epoch": 6.6766357421875e-05,
      "step": 10939,
      "training_step_time": 0.6353743076324463
    },
    {
      "epoch": 6.67724609375e-05,
      "grad_norm": 0.28057536482810974,
      "learning_rate": 9.528817685413558e-05,
      "loss": 0.0576,
      "step": 10940
    },
    {
      "epoch": 6.67724609375e-05,
      "model_forward_time": 0.1168365478515625,
      "step": 10940
    },
    {
      "epoch": 6.67724609375e-05,
      "step": 10940,
      "training_step_time": 0.40617799758911133
    },
    {
      "epoch": 6.6778564453125e-05,
      "model_forward_time": 0.1157691478729248,
      "step": 10941
    },
    {
      "epoch": 6.6778564453125e-05,
      "step": 10941,
      "training_step_time": 0.4839303493499756
    },
    {
      "epoch": 6.678466796875e-05,
      "model_forward_time": 0.11641502380371094,
      "step": 10942
    },
    {
      "epoch": 6.678466796875e-05,
      "step": 10942,
      "training_step_time": 0.4134049415588379
    },
    {
      "epoch": 6.6790771484375e-05,
      "model_forward_time": 0.11620163917541504,
      "step": 10943
    },
    {
      "epoch": 6.6790771484375e-05,
      "step": 10943,
      "training_step_time": 0.46782708168029785
    },
    {
      "epoch": 6.6796875e-05,
      "model_forward_time": 0.11486196517944336,
      "step": 10944
    },
    {
      "epoch": 6.6796875e-05,
      "step": 10944,
      "training_step_time": 0.36971497535705566
    },
    {
      "epoch": 6.6802978515625e-05,
      "model_forward_time": 0.11788225173950195,
      "step": 10945
    },
    {
      "epoch": 6.6802978515625e-05,
      "step": 10945,
      "training_step_time": 0.4292638301849365
    },
    {
      "epoch": 6.680908203125e-05,
      "model_forward_time": 0.12025094032287598,
      "step": 10946
    },
    {
      "epoch": 6.680908203125e-05,
      "step": 10946,
      "training_step_time": 0.4383971691131592
    },
    {
      "epoch": 6.6815185546875e-05,
      "model_forward_time": 0.11780333518981934,
      "step": 10947
    },
    {
      "epoch": 6.6815185546875e-05,
      "step": 10947,
      "training_step_time": 0.4069087505340576
    },
    {
      "epoch": 6.68212890625e-05,
      "model_forward_time": 0.11634445190429688,
      "step": 10948
    },
    {
      "epoch": 6.68212890625e-05,
      "step": 10948,
      "training_step_time": 0.39018964767456055
    },
    {
      "epoch": 6.6827392578125e-05,
      "model_forward_time": 0.11880612373352051,
      "step": 10949
    },
    {
      "epoch": 6.6827392578125e-05,
      "step": 10949,
      "training_step_time": 0.38956284523010254
    },
    {
      "epoch": 6.683349609375e-05,
      "grad_norm": 0.24310018122196198,
      "learning_rate": 9.527649142357596e-05,
      "loss": 0.0605,
      "step": 10950
    },
    {
      "epoch": 6.683349609375e-05,
      "model_forward_time": 0.11768579483032227,
      "step": 10950
    },
    {
      "epoch": 6.683349609375e-05,
      "step": 10950,
      "training_step_time": 0.38399529457092285
    },
    {
      "epoch": 6.6839599609375e-05,
      "model_forward_time": 0.11909723281860352,
      "step": 10951
    },
    {
      "epoch": 6.6839599609375e-05,
      "step": 10951,
      "training_step_time": 0.4138600826263428
    },
    {
      "epoch": 6.6845703125e-05,
      "model_forward_time": 0.1183626651763916,
      "step": 10952
    },
    {
      "epoch": 6.6845703125e-05,
      "step": 10952,
      "training_step_time": 0.3937201499938965
    },
    {
      "epoch": 6.6851806640625e-05,
      "model_forward_time": 0.11725068092346191,
      "step": 10953
    },
    {
      "epoch": 6.6851806640625e-05,
      "step": 10953,
      "training_step_time": 0.3811206817626953
    },
    {
      "epoch": 6.685791015625e-05,
      "model_forward_time": 0.11693239212036133,
      "step": 10954
    },
    {
      "epoch": 6.685791015625e-05,
      "step": 10954,
      "training_step_time": 0.38182997703552246
    },
    {
      "epoch": 6.6864013671875e-05,
      "model_forward_time": 0.11726117134094238,
      "step": 10955
    },
    {
      "epoch": 6.6864013671875e-05,
      "step": 10955,
      "training_step_time": 0.38795971870422363
    },
    {
      "epoch": 6.68701171875e-05,
      "model_forward_time": 0.12020397186279297,
      "step": 10956
    },
    {
      "epoch": 6.68701171875e-05,
      "step": 10956,
      "training_step_time": 0.4332854747772217
    },
    {
      "epoch": 6.6876220703125e-05,
      "model_forward_time": 0.11809134483337402,
      "step": 10957
    },
    {
      "epoch": 6.6876220703125e-05,
      "step": 10957,
      "training_step_time": 0.6056993007659912
    },
    {
      "epoch": 6.688232421875e-05,
      "model_forward_time": 0.11667418479919434,
      "step": 10958
    },
    {
      "epoch": 6.688232421875e-05,
      "step": 10958,
      "training_step_time": 0.4865105152130127
    },
    {
      "epoch": 6.6888427734375e-05,
      "model_forward_time": 0.11725878715515137,
      "step": 10959
    },
    {
      "epoch": 6.6888427734375e-05,
      "step": 10959,
      "training_step_time": 0.38137316703796387
    },
    {
      "epoch": 6.689453125e-05,
      "grad_norm": 0.14806032180786133,
      "learning_rate": 9.526479223921366e-05,
      "loss": 0.0579,
      "step": 10960
    },
    {
      "epoch": 6.689453125e-05,
      "model_forward_time": 0.11617183685302734,
      "step": 10960
    },
    {
      "epoch": 6.689453125e-05,
      "step": 10960,
      "training_step_time": 0.3732411861419678
    },
    {
      "epoch": 6.6900634765625e-05,
      "model_forward_time": 0.11557292938232422,
      "step": 10961
    },
    {
      "epoch": 6.6900634765625e-05,
      "step": 10961,
      "training_step_time": 0.4275176525115967
    },
    {
      "epoch": 6.690673828125e-05,
      "model_forward_time": 0.11548352241516113,
      "step": 10962
    },
    {
      "epoch": 6.690673828125e-05,
      "step": 10962,
      "training_step_time": 0.44248485565185547
    },
    {
      "epoch": 6.6912841796875e-05,
      "model_forward_time": 0.11515641212463379,
      "step": 10963
    },
    {
      "epoch": 6.6912841796875e-05,
      "step": 10963,
      "training_step_time": 0.5930461883544922
    },
    {
      "epoch": 6.69189453125e-05,
      "model_forward_time": 0.11611485481262207,
      "step": 10964
    },
    {
      "epoch": 6.69189453125e-05,
      "step": 10964,
      "training_step_time": 0.43888378143310547
    },
    {
      "epoch": 6.6925048828125e-05,
      "model_forward_time": 0.11713600158691406,
      "step": 10965
    },
    {
      "epoch": 6.6925048828125e-05,
      "step": 10965,
      "training_step_time": 0.37741971015930176
    },
    {
      "epoch": 6.693115234375e-05,
      "model_forward_time": 0.11616301536560059,
      "step": 10966
    },
    {
      "epoch": 6.693115234375e-05,
      "step": 10966,
      "training_step_time": 0.3738725185394287
    },
    {
      "epoch": 6.6937255859375e-05,
      "model_forward_time": 0.11680102348327637,
      "step": 10967
    },
    {
      "epoch": 6.6937255859375e-05,
      "step": 10967,
      "training_step_time": 0.3814089298248291
    },
    {
      "epoch": 6.6943359375e-05,
      "model_forward_time": 0.11648964881896973,
      "step": 10968
    },
    {
      "epoch": 6.6943359375e-05,
      "step": 10968,
      "training_step_time": 0.3760354518890381
    },
    {
      "epoch": 6.6949462890625e-05,
      "model_forward_time": 0.11696910858154297,
      "step": 10969
    },
    {
      "epoch": 6.6949462890625e-05,
      "step": 10969,
      "training_step_time": 0.6601047515869141
    },
    {
      "epoch": 6.695556640625e-05,
      "grad_norm": 0.22817325592041016,
      "learning_rate": 9.525307930460266e-05,
      "loss": 0.0573,
      "step": 10970
    },
    {
      "epoch": 6.695556640625e-05,
      "model_forward_time": 0.1168680191040039,
      "step": 10970
    },
    {
      "epoch": 6.695556640625e-05,
      "step": 10970,
      "training_step_time": 0.42201828956604004
    },
    {
      "epoch": 6.6961669921875e-05,
      "model_forward_time": 0.11696267127990723,
      "step": 10971
    },
    {
      "epoch": 6.6961669921875e-05,
      "step": 10971,
      "training_step_time": 0.4232649803161621
    },
    {
      "epoch": 6.69677734375e-05,
      "model_forward_time": 0.11688828468322754,
      "step": 10972
    },
    {
      "epoch": 6.69677734375e-05,
      "step": 10972,
      "training_step_time": 0.41619420051574707
    },
    {
      "epoch": 6.6973876953125e-05,
      "model_forward_time": 0.11661815643310547,
      "step": 10973
    },
    {
      "epoch": 6.6973876953125e-05,
      "step": 10973,
      "training_step_time": 0.383068323135376
    },
    {
      "epoch": 6.697998046875e-05,
      "model_forward_time": 0.11560249328613281,
      "step": 10974
    },
    {
      "epoch": 6.697998046875e-05,
      "step": 10974,
      "training_step_time": 0.3929445743560791
    },
    {
      "epoch": 6.6986083984375e-05,
      "model_forward_time": 0.11856389045715332,
      "step": 10975
    },
    {
      "epoch": 6.6986083984375e-05,
      "step": 10975,
      "training_step_time": 0.4244346618652344
    },
    {
      "epoch": 6.69921875e-05,
      "model_forward_time": 0.11670160293579102,
      "step": 10976
    },
    {
      "epoch": 6.69921875e-05,
      "step": 10976,
      "training_step_time": 0.8151755332946777
    },
    {
      "epoch": 6.6998291015625e-05,
      "model_forward_time": 0.1166994571685791,
      "step": 10977
    },
    {
      "epoch": 6.6998291015625e-05,
      "step": 10977,
      "training_step_time": 0.3886725902557373
    },
    {
      "epoch": 6.700439453125e-05,
      "model_forward_time": 0.11707806587219238,
      "step": 10978
    },
    {
      "epoch": 6.700439453125e-05,
      "step": 10978,
      "training_step_time": 0.377683162689209
    },
    {
      "epoch": 6.7010498046875e-05,
      "model_forward_time": 0.1162877082824707,
      "step": 10979
    },
    {
      "epoch": 6.7010498046875e-05,
      "step": 10979,
      "training_step_time": 0.38693690299987793
    },
    {
      "epoch": 6.70166015625e-05,
      "grad_norm": 0.15097226202487946,
      "learning_rate": 9.524135262330098e-05,
      "loss": 0.0625,
      "step": 10980
    },
    {
      "epoch": 6.70166015625e-05,
      "model_forward_time": 0.11731934547424316,
      "step": 10980
    },
    {
      "epoch": 6.70166015625e-05,
      "step": 10980,
      "training_step_time": 0.38168931007385254
    },
    {
      "epoch": 6.7022705078125e-05,
      "model_forward_time": 0.11664772033691406,
      "step": 10981
    },
    {
      "epoch": 6.7022705078125e-05,
      "step": 10981,
      "training_step_time": 0.37296628952026367
    },
    {
      "epoch": 6.702880859375e-05,
      "model_forward_time": 0.11758565902709961,
      "step": 10982
    },
    {
      "epoch": 6.702880859375e-05,
      "step": 10982,
      "training_step_time": 0.5004668235778809
    },
    {
      "epoch": 6.7034912109375e-05,
      "model_forward_time": 0.1172482967376709,
      "step": 10983
    },
    {
      "epoch": 6.7034912109375e-05,
      "step": 10983,
      "training_step_time": 0.4278876781463623
    },
    {
      "epoch": 6.7041015625e-05,
      "model_forward_time": 0.11793828010559082,
      "step": 10984
    },
    {
      "epoch": 6.7041015625e-05,
      "step": 10984,
      "training_step_time": 0.434558629989624
    },
    {
      "epoch": 6.7047119140625e-05,
      "model_forward_time": 0.11748385429382324,
      "step": 10985
    },
    {
      "epoch": 6.7047119140625e-05,
      "step": 10985,
      "training_step_time": 0.3890247344970703
    },
    {
      "epoch": 6.705322265625e-05,
      "model_forward_time": 0.11716437339782715,
      "step": 10986
    },
    {
      "epoch": 6.705322265625e-05,
      "step": 10986,
      "training_step_time": 0.3829209804534912
    },
    {
      "epoch": 6.7059326171875e-05,
      "model_forward_time": 0.11717438697814941,
      "step": 10987
    },
    {
      "epoch": 6.7059326171875e-05,
      "step": 10987,
      "training_step_time": 0.38940978050231934
    },
    {
      "epoch": 6.70654296875e-05,
      "model_forward_time": 0.11684799194335938,
      "step": 10988
    },
    {
      "epoch": 6.70654296875e-05,
      "step": 10988,
      "training_step_time": 0.7745785713195801
    },
    {
      "epoch": 6.7071533203125e-05,
      "model_forward_time": 0.11654448509216309,
      "step": 10989
    },
    {
      "epoch": 6.7071533203125e-05,
      "step": 10989,
      "training_step_time": 0.4855632781982422
    },
    {
      "epoch": 6.707763671875e-05,
      "grad_norm": 0.17977812886238098,
      "learning_rate": 9.522961219887092e-05,
      "loss": 0.0607,
      "step": 10990
    },
    {
      "epoch": 6.707763671875e-05,
      "model_forward_time": 0.11578130722045898,
      "step": 10990
    },
    {
      "epoch": 6.707763671875e-05,
      "step": 10990,
      "training_step_time": 0.38483190536499023
    },
    {
      "epoch": 6.7083740234375e-05,
      "model_forward_time": 0.11595487594604492,
      "step": 10991
    },
    {
      "epoch": 6.7083740234375e-05,
      "step": 10991,
      "training_step_time": 0.40113377571105957
    },
    {
      "epoch": 6.708984375e-05,
      "model_forward_time": 0.11515235900878906,
      "step": 10992
    },
    {
      "epoch": 6.708984375e-05,
      "step": 10992,
      "training_step_time": 0.374082088470459
    },
    {
      "epoch": 6.7095947265625e-05,
      "model_forward_time": 0.11552929878234863,
      "step": 10993
    },
    {
      "epoch": 6.7095947265625e-05,
      "step": 10993,
      "training_step_time": 0.3781261444091797
    },
    {
      "epoch": 6.710205078125e-05,
      "model_forward_time": 0.11633515357971191,
      "step": 10994
    },
    {
      "epoch": 6.710205078125e-05,
      "step": 10994,
      "training_step_time": 0.6210653781890869
    },
    {
      "epoch": 6.7108154296875e-05,
      "model_forward_time": 0.11606931686401367,
      "step": 10995
    },
    {
      "epoch": 6.7108154296875e-05,
      "step": 10995,
      "training_step_time": 0.3922743797302246
    },
    {
      "epoch": 6.71142578125e-05,
      "model_forward_time": 0.11597132682800293,
      "step": 10996
    },
    {
      "epoch": 6.71142578125e-05,
      "step": 10996,
      "training_step_time": 0.43363451957702637
    },
    {
      "epoch": 6.7120361328125e-05,
      "model_forward_time": 0.11593961715698242,
      "step": 10997
    },
    {
      "epoch": 6.7120361328125e-05,
      "step": 10997,
      "training_step_time": 0.45312047004699707
    },
    {
      "epoch": 6.712646484375e-05,
      "model_forward_time": 0.11778664588928223,
      "step": 10998
    },
    {
      "epoch": 6.712646484375e-05,
      "step": 10998,
      "training_step_time": 0.4636051654815674
    },
    {
      "epoch": 6.7132568359375e-05,
      "model_forward_time": 0.11557650566101074,
      "step": 10999
    },
    {
      "epoch": 6.7132568359375e-05,
      "step": 10999,
      "training_step_time": 0.38410162925720215
    },
    {
      "epoch": 6.7138671875e-05,
      "grad_norm": 0.1868220418691635,
      "learning_rate": 9.521785803487889e-05,
      "loss": 0.0687,
      "step": 11000
    },
    {
      "epoch": 6.7138671875e-05,
      "model_forward_time": 0.1139528751373291,
      "step": 11000
    },
    {
      "epoch": 6.7138671875e-05,
      "step": 11000,
      "training_step_time": 0.3823685646057129
    },
    {
      "epoch": 6.7144775390625e-05,
      "model_forward_time": 0.11357617378234863,
      "step": 11001
    },
    {
      "epoch": 6.7144775390625e-05,
      "step": 11001,
      "training_step_time": 0.3636763095855713
    },
    {
      "epoch": 6.715087890625e-05,
      "model_forward_time": 0.11447882652282715,
      "step": 11002
    },
    {
      "epoch": 6.715087890625e-05,
      "step": 11002,
      "training_step_time": 0.3659372329711914
    },
    {
      "epoch": 6.7156982421875e-05,
      "model_forward_time": 0.11477422714233398,
      "step": 11003
    },
    {
      "epoch": 6.7156982421875e-05,
      "step": 11003,
      "training_step_time": 0.38969874382019043
    },
    {
      "epoch": 6.71630859375e-05,
      "model_forward_time": 0.11528253555297852,
      "step": 11004
    },
    {
      "epoch": 6.71630859375e-05,
      "step": 11004,
      "training_step_time": 0.41156768798828125
    },
    {
      "epoch": 6.7169189453125e-05,
      "model_forward_time": 0.11547446250915527,
      "step": 11005
    },
    {
      "epoch": 6.7169189453125e-05,
      "step": 11005,
      "training_step_time": 0.3819694519042969
    },
    {
      "epoch": 6.717529296875e-05,
      "model_forward_time": 0.11597824096679688,
      "step": 11006
    },
    {
      "epoch": 6.717529296875e-05,
      "step": 11006,
      "training_step_time": 0.3900473117828369
    },
    {
      "epoch": 6.7181396484375e-05,
      "model_forward_time": 0.11672806739807129,
      "step": 11007
    },
    {
      "epoch": 6.7181396484375e-05,
      "step": 11007,
      "training_step_time": 0.37558698654174805
    },
    {
      "epoch": 6.71875e-05,
      "model_forward_time": 0.11660933494567871,
      "step": 11008
    },
    {
      "epoch": 6.71875e-05,
      "step": 11008,
      "training_step_time": 0.39931201934814453
    },
    {
      "epoch": 6.7193603515625e-05,
      "model_forward_time": 0.11623382568359375,
      "step": 11009
    },
    {
      "epoch": 6.7193603515625e-05,
      "step": 11009,
      "training_step_time": 0.39205288887023926
    },
    {
      "epoch": 6.719970703125e-05,
      "grad_norm": 0.21700966358184814,
      "learning_rate": 9.520609013489547e-05,
      "loss": 0.0578,
      "step": 11010
    },
    {
      "epoch": 6.719970703125e-05,
      "model_forward_time": 0.11693501472473145,
      "step": 11010
    },
    {
      "epoch": 6.719970703125e-05,
      "step": 11010,
      "training_step_time": 0.3834266662597656
    },
    {
      "epoch": 6.7205810546875e-05,
      "model_forward_time": 0.11672592163085938,
      "step": 11011
    },
    {
      "epoch": 6.7205810546875e-05,
      "step": 11011,
      "training_step_time": 0.3942382335662842
    },
    {
      "epoch": 6.72119140625e-05,
      "model_forward_time": 0.11775374412536621,
      "step": 11012
    },
    {
      "epoch": 6.72119140625e-05,
      "step": 11012,
      "training_step_time": 0.4277517795562744
    },
    {
      "epoch": 6.7218017578125e-05,
      "model_forward_time": 0.11720442771911621,
      "step": 11013
    },
    {
      "epoch": 6.7218017578125e-05,
      "step": 11013,
      "training_step_time": 0.40399670600891113
    },
    {
      "epoch": 6.722412109375e-05,
      "model_forward_time": 0.11718201637268066,
      "step": 11014
    },
    {
      "epoch": 6.722412109375e-05,
      "step": 11014,
      "training_step_time": 0.47392892837524414
    },
    {
      "epoch": 6.7230224609375e-05,
      "model_forward_time": 0.11714887619018555,
      "step": 11015
    },
    {
      "epoch": 6.7230224609375e-05,
      "step": 11015,
      "training_step_time": 0.390608549118042
    },
    {
      "epoch": 6.7236328125e-05,
      "model_forward_time": 0.11912131309509277,
      "step": 11016
    },
    {
      "epoch": 6.7236328125e-05,
      "step": 11016,
      "training_step_time": 0.38662099838256836
    },
    {
      "epoch": 6.7242431640625e-05,
      "model_forward_time": 0.11598682403564453,
      "step": 11017
    },
    {
      "epoch": 6.7242431640625e-05,
      "step": 11017,
      "training_step_time": 0.40679383277893066
    },
    {
      "epoch": 6.724853515625e-05,
      "model_forward_time": 0.12074661254882812,
      "step": 11018
    },
    {
      "epoch": 6.724853515625e-05,
      "step": 11018,
      "training_step_time": 0.44870591163635254
    },
    {
      "epoch": 6.7254638671875e-05,
      "model_forward_time": 0.11658120155334473,
      "step": 11019
    },
    {
      "epoch": 6.7254638671875e-05,
      "step": 11019,
      "training_step_time": 0.49229979515075684
    },
    {
      "epoch": 6.72607421875e-05,
      "grad_norm": 0.23430483043193817,
      "learning_rate": 9.51943085024955e-05,
      "loss": 0.0614,
      "step": 11020
    },
    {
      "epoch": 6.72607421875e-05,
      "model_forward_time": 0.115997314453125,
      "step": 11020
    },
    {
      "epoch": 6.72607421875e-05,
      "step": 11020,
      "training_step_time": 0.3912186622619629
    },
    {
      "epoch": 6.7266845703125e-05,
      "model_forward_time": 0.12115311622619629,
      "step": 11021
    },
    {
      "epoch": 6.7266845703125e-05,
      "step": 11021,
      "training_step_time": 0.39237451553344727
    },
    {
      "epoch": 6.727294921875e-05,
      "model_forward_time": 0.11878561973571777,
      "step": 11022
    },
    {
      "epoch": 6.727294921875e-05,
      "step": 11022,
      "training_step_time": 0.37987422943115234
    },
    {
      "epoch": 6.7279052734375e-05,
      "model_forward_time": 0.11669230461120605,
      "step": 11023
    },
    {
      "epoch": 6.7279052734375e-05,
      "step": 11023,
      "training_step_time": 0.385697603225708
    },
    {
      "epoch": 6.728515625e-05,
      "model_forward_time": 0.1173548698425293,
      "step": 11024
    },
    {
      "epoch": 6.728515625e-05,
      "step": 11024,
      "training_step_time": 0.377774715423584
    },
    {
      "epoch": 6.7291259765625e-05,
      "model_forward_time": 0.11675858497619629,
      "step": 11025
    },
    {
      "epoch": 6.7291259765625e-05,
      "step": 11025,
      "training_step_time": 0.40857434272766113
    },
    {
      "epoch": 6.729736328125e-05,
      "model_forward_time": 0.11666464805603027,
      "step": 11026
    },
    {
      "epoch": 6.729736328125e-05,
      "step": 11026,
      "training_step_time": 0.4048800468444824
    },
    {
      "epoch": 6.7303466796875e-05,
      "model_forward_time": 0.1193699836730957,
      "step": 11027
    },
    {
      "epoch": 6.7303466796875e-05,
      "step": 11027,
      "training_step_time": 0.39974498748779297
    },
    {
      "epoch": 6.73095703125e-05,
      "model_forward_time": 0.1165609359741211,
      "step": 11028
    },
    {
      "epoch": 6.73095703125e-05,
      "step": 11028,
      "training_step_time": 0.41364026069641113
    },
    {
      "epoch": 6.7315673828125e-05,
      "model_forward_time": 0.11666679382324219,
      "step": 11029
    },
    {
      "epoch": 6.7315673828125e-05,
      "step": 11029,
      "training_step_time": 0.4967031478881836
    },
    {
      "epoch": 6.732177734375e-05,
      "grad_norm": 0.17957651615142822,
      "learning_rate": 9.518251314125788e-05,
      "loss": 0.0664,
      "step": 11030
    },
    {
      "epoch": 6.732177734375e-05,
      "model_forward_time": 0.11578226089477539,
      "step": 11030
    },
    {
      "epoch": 6.732177734375e-05,
      "step": 11030,
      "training_step_time": 0.383220911026001
    },
    {
      "epoch": 6.7327880859375e-05,
      "model_forward_time": 0.11629152297973633,
      "step": 11031
    },
    {
      "epoch": 6.7327880859375e-05,
      "step": 11031,
      "training_step_time": 0.3730125427246094
    },
    {
      "epoch": 6.7333984375e-05,
      "model_forward_time": 0.11636638641357422,
      "step": 11032
    },
    {
      "epoch": 6.7333984375e-05,
      "step": 11032,
      "training_step_time": 0.43137025833129883
    },
    {
      "epoch": 6.7340087890625e-05,
      "model_forward_time": 0.11623215675354004,
      "step": 11033
    },
    {
      "epoch": 6.7340087890625e-05,
      "step": 11033,
      "training_step_time": 0.4800269603729248
    },
    {
      "epoch": 6.734619140625e-05,
      "model_forward_time": 0.11570405960083008,
      "step": 11034
    },
    {
      "epoch": 6.734619140625e-05,
      "step": 11034,
      "training_step_time": 0.3804759979248047
    },
    {
      "epoch": 6.7352294921875e-05,
      "model_forward_time": 0.11842584609985352,
      "step": 11035
    },
    {
      "epoch": 6.7352294921875e-05,
      "step": 11035,
      "training_step_time": 0.39533472061157227
    },
    {
      "epoch": 6.73583984375e-05,
      "model_forward_time": 0.11574292182922363,
      "step": 11036
    },
    {
      "epoch": 6.73583984375e-05,
      "step": 11036,
      "training_step_time": 0.3879842758178711
    },
    {
      "epoch": 6.7364501953125e-05,
      "model_forward_time": 0.11925768852233887,
      "step": 11037
    },
    {
      "epoch": 6.7364501953125e-05,
      "step": 11037,
      "training_step_time": 0.3936123847961426
    },
    {
      "epoch": 6.737060546875e-05,
      "model_forward_time": 0.11612486839294434,
      "step": 11038
    },
    {
      "epoch": 6.737060546875e-05,
      "step": 11038,
      "training_step_time": 0.3861103057861328
    },
    {
      "epoch": 6.7376708984375e-05,
      "model_forward_time": 0.1181185245513916,
      "step": 11039
    },
    {
      "epoch": 6.7376708984375e-05,
      "step": 11039,
      "training_step_time": 0.4165041446685791
    },
    {
      "epoch": 6.73828125e-05,
      "grad_norm": 0.14123547077178955,
      "learning_rate": 9.517070405476575e-05,
      "loss": 0.0543,
      "step": 11040
    },
    {
      "epoch": 6.73828125e-05,
      "model_forward_time": 0.11827921867370605,
      "step": 11040
    },
    {
      "epoch": 6.73828125e-05,
      "step": 11040,
      "training_step_time": 0.37868523597717285
    },
    {
      "epoch": 6.7388916015625e-05,
      "model_forward_time": 0.11755895614624023,
      "step": 11041
    },
    {
      "epoch": 6.7388916015625e-05,
      "step": 11041,
      "training_step_time": 0.40227770805358887
    },
    {
      "epoch": 6.739501953125e-05,
      "model_forward_time": 0.11855077743530273,
      "step": 11042
    },
    {
      "epoch": 6.739501953125e-05,
      "step": 11042,
      "training_step_time": 0.45201683044433594
    },
    {
      "epoch": 6.7401123046875e-05,
      "model_forward_time": 0.11762523651123047,
      "step": 11043
    },
    {
      "epoch": 6.7401123046875e-05,
      "step": 11043,
      "training_step_time": 0.46924400329589844
    },
    {
      "epoch": 6.74072265625e-05,
      "model_forward_time": 0.1217951774597168,
      "step": 11044
    },
    {
      "epoch": 6.74072265625e-05,
      "step": 11044,
      "training_step_time": 0.4459376335144043
    },
    {
      "epoch": 6.7413330078125e-05,
      "model_forward_time": 0.11814379692077637,
      "step": 11045
    },
    {
      "epoch": 6.7413330078125e-05,
      "step": 11045,
      "training_step_time": 0.38767337799072266
    },
    {
      "epoch": 6.741943359375e-05,
      "model_forward_time": 0.1163797378540039,
      "step": 11046
    },
    {
      "epoch": 6.741943359375e-05,
      "step": 11046,
      "training_step_time": 0.3866603374481201
    },
    {
      "epoch": 6.7425537109375e-05,
      "model_forward_time": 0.11681008338928223,
      "step": 11047
    },
    {
      "epoch": 6.7425537109375e-05,
      "step": 11047,
      "training_step_time": 0.6817722320556641
    },
    {
      "epoch": 6.7431640625e-05,
      "model_forward_time": 0.11614155769348145,
      "step": 11048
    },
    {
      "epoch": 6.7431640625e-05,
      "step": 11048,
      "training_step_time": 0.8573689460754395
    },
    {
      "epoch": 6.7437744140625e-05,
      "model_forward_time": 0.11616015434265137,
      "step": 11049
    },
    {
      "epoch": 6.7437744140625e-05,
      "step": 11049,
      "training_step_time": 0.37943458557128906
    },
    {
      "epoch": 6.744384765625e-05,
      "grad_norm": 0.21424169838428497,
      "learning_rate": 9.515888124660638e-05,
      "loss": 0.0592,
      "step": 11050
    },
    {
      "epoch": 6.744384765625e-05,
      "model_forward_time": 0.1153407096862793,
      "step": 11050
    },
    {
      "epoch": 6.744384765625e-05,
      "step": 11050,
      "training_step_time": 0.38692641258239746
    },
    {
      "epoch": 6.7449951171875e-05,
      "model_forward_time": 0.11633086204528809,
      "step": 11051
    },
    {
      "epoch": 6.7449951171875e-05,
      "step": 11051,
      "training_step_time": 0.3850288391113281
    },
    {
      "epoch": 6.74560546875e-05,
      "model_forward_time": 0.11622786521911621,
      "step": 11052
    },
    {
      "epoch": 6.74560546875e-05,
      "step": 11052,
      "training_step_time": 0.3829362392425537
    },
    {
      "epoch": 6.7462158203125e-05,
      "model_forward_time": 0.11572909355163574,
      "step": 11053
    },
    {
      "epoch": 6.7462158203125e-05,
      "step": 11053,
      "training_step_time": 0.5128347873687744
    },
    {
      "epoch": 6.746826171875e-05,
      "model_forward_time": 0.11565518379211426,
      "step": 11054
    },
    {
      "epoch": 6.746826171875e-05,
      "step": 11054,
      "training_step_time": 0.5903472900390625
    },
    {
      "epoch": 6.7474365234375e-05,
      "model_forward_time": 0.11504268646240234,
      "step": 11055
    },
    {
      "epoch": 6.7474365234375e-05,
      "step": 11055,
      "training_step_time": 0.41367101669311523
    },
    {
      "epoch": 6.748046875e-05,
      "model_forward_time": 0.11530041694641113,
      "step": 11056
    },
    {
      "epoch": 6.748046875e-05,
      "step": 11056,
      "training_step_time": 0.42813563346862793
    },
    {
      "epoch": 6.7486572265625e-05,
      "model_forward_time": 0.11513400077819824,
      "step": 11057
    },
    {
      "epoch": 6.7486572265625e-05,
      "step": 11057,
      "training_step_time": 0.37517762184143066
    },
    {
      "epoch": 6.749267578125e-05,
      "model_forward_time": 0.1156623363494873,
      "step": 11058
    },
    {
      "epoch": 6.749267578125e-05,
      "step": 11058,
      "training_step_time": 0.3863687515258789
    },
    {
      "epoch": 6.7498779296875e-05,
      "model_forward_time": 0.11653447151184082,
      "step": 11059
    },
    {
      "epoch": 6.7498779296875e-05,
      "step": 11059,
      "training_step_time": 0.9672353267669678
    },
    {
      "epoch": 6.75048828125e-05,
      "grad_norm": 0.17761601507663727,
      "learning_rate": 9.514704472037123e-05,
      "loss": 0.0535,
      "step": 11060
    },
    {
      "epoch": 6.75048828125e-05,
      "model_forward_time": 0.11519742012023926,
      "step": 11060
    },
    {
      "epoch": 6.75048828125e-05,
      "step": 11060,
      "training_step_time": 0.3880298137664795
    },
    {
      "epoch": 6.7510986328125e-05,
      "model_forward_time": 0.11478066444396973,
      "step": 11061
    },
    {
      "epoch": 6.7510986328125e-05,
      "step": 11061,
      "training_step_time": 0.3913891315460205
    },
    {
      "epoch": 6.751708984375e-05,
      "model_forward_time": 0.11548805236816406,
      "step": 11062
    },
    {
      "epoch": 6.751708984375e-05,
      "step": 11062,
      "training_step_time": 0.37824273109436035
    },
    {
      "epoch": 6.7523193359375e-05,
      "model_forward_time": 0.11628556251525879,
      "step": 11063
    },
    {
      "epoch": 6.7523193359375e-05,
      "step": 11063,
      "training_step_time": 0.41056060791015625
    },
    {
      "epoch": 6.7529296875e-05,
      "model_forward_time": 0.1153268814086914,
      "step": 11064
    },
    {
      "epoch": 6.7529296875e-05,
      "step": 11064,
      "training_step_time": 0.3806290626525879
    },
    {
      "epoch": 6.7535400390625e-05,
      "model_forward_time": 0.11687493324279785,
      "step": 11065
    },
    {
      "epoch": 6.7535400390625e-05,
      "step": 11065,
      "training_step_time": 0.736370325088501
    },
    {
      "epoch": 6.754150390625e-05,
      "model_forward_time": 0.11517214775085449,
      "step": 11066
    },
    {
      "epoch": 6.754150390625e-05,
      "step": 11066,
      "training_step_time": 0.4469587802886963
    },
    {
      "epoch": 6.7547607421875e-05,
      "model_forward_time": 0.11580729484558105,
      "step": 11067
    },
    {
      "epoch": 6.7547607421875e-05,
      "step": 11067,
      "training_step_time": 0.4013800621032715
    },
    {
      "epoch": 6.75537109375e-05,
      "model_forward_time": 0.1153564453125,
      "step": 11068
    },
    {
      "epoch": 6.75537109375e-05,
      "step": 11068,
      "training_step_time": 0.43245911598205566
    },
    {
      "epoch": 6.7559814453125e-05,
      "model_forward_time": 0.11578106880187988,
      "step": 11069
    },
    {
      "epoch": 6.7559814453125e-05,
      "step": 11069,
      "training_step_time": 0.44323062896728516
    },
    {
      "epoch": 6.756591796875e-05,
      "grad_norm": 0.14882133901119232,
      "learning_rate": 9.513519447965595e-05,
      "loss": 0.0632,
      "step": 11070
    },
    {
      "epoch": 6.756591796875e-05,
      "model_forward_time": 0.11546564102172852,
      "step": 11070
    },
    {
      "epoch": 6.756591796875e-05,
      "step": 11070,
      "training_step_time": 0.3999624252319336
    },
    {
      "epoch": 6.7572021484375e-05,
      "model_forward_time": 0.11752104759216309,
      "step": 11071
    },
    {
      "epoch": 6.7572021484375e-05,
      "step": 11071,
      "training_step_time": 1.0559256076812744
    },
    {
      "epoch": 6.7578125e-05,
      "model_forward_time": 0.11679840087890625,
      "step": 11072
    },
    {
      "epoch": 6.7578125e-05,
      "step": 11072,
      "training_step_time": 0.42694640159606934
    },
    {
      "epoch": 6.7584228515625e-05,
      "model_forward_time": 0.11587357521057129,
      "step": 11073
    },
    {
      "epoch": 6.7584228515625e-05,
      "step": 11073,
      "training_step_time": 0.39005184173583984
    },
    {
      "epoch": 6.759033203125e-05,
      "model_forward_time": 0.11457610130310059,
      "step": 11074
    },
    {
      "epoch": 6.759033203125e-05,
      "step": 11074,
      "training_step_time": 0.38408875465393066
    },
    {
      "epoch": 6.7596435546875e-05,
      "model_forward_time": 0.11420536041259766,
      "step": 11075
    },
    {
      "epoch": 6.7596435546875e-05,
      "step": 11075,
      "training_step_time": 0.3952474594116211
    },
    {
      "epoch": 6.76025390625e-05,
      "model_forward_time": 0.1158452033996582,
      "step": 11076
    },
    {
      "epoch": 6.76025390625e-05,
      "step": 11076,
      "training_step_time": 0.39618802070617676
    },
    {
      "epoch": 6.7608642578125e-05,
      "model_forward_time": 0.11760425567626953,
      "step": 11077
    },
    {
      "epoch": 6.7608642578125e-05,
      "step": 11077,
      "training_step_time": 0.40478062629699707
    },
    {
      "epoch": 6.761474609375e-05,
      "model_forward_time": 0.11596131324768066,
      "step": 11078
    },
    {
      "epoch": 6.761474609375e-05,
      "step": 11078,
      "training_step_time": 0.39171433448791504
    },
    {
      "epoch": 6.7620849609375e-05,
      "model_forward_time": 0.1157064437866211,
      "step": 11079
    },
    {
      "epoch": 6.7620849609375e-05,
      "step": 11079,
      "training_step_time": 0.3941178321838379
    },
    {
      "epoch": 6.7626953125e-05,
      "grad_norm": 0.24309615790843964,
      "learning_rate": 9.512333052806033e-05,
      "loss": 0.0612,
      "step": 11080
    },
    {
      "epoch": 6.7626953125e-05,
      "model_forward_time": 0.11554217338562012,
      "step": 11080
    },
    {
      "epoch": 6.7626953125e-05,
      "step": 11080,
      "training_step_time": 0.3954613208770752
    },
    {
      "epoch": 6.7633056640625e-05,
      "model_forward_time": 0.11803507804870605,
      "step": 11081
    },
    {
      "epoch": 6.7633056640625e-05,
      "step": 11081,
      "training_step_time": 0.4704415798187256
    },
    {
      "epoch": 6.763916015625e-05,
      "model_forward_time": 0.11816191673278809,
      "step": 11082
    },
    {
      "epoch": 6.763916015625e-05,
      "step": 11082,
      "training_step_time": 0.4192633628845215
    },
    {
      "epoch": 6.7645263671875e-05,
      "model_forward_time": 0.11870884895324707,
      "step": 11083
    },
    {
      "epoch": 6.7645263671875e-05,
      "step": 11083,
      "training_step_time": 0.44095325469970703
    },
    {
      "epoch": 6.76513671875e-05,
      "model_forward_time": 0.11766958236694336,
      "step": 11084
    },
    {
      "epoch": 6.76513671875e-05,
      "step": 11084,
      "training_step_time": 0.45631837844848633
    },
    {
      "epoch": 6.7657470703125e-05,
      "model_forward_time": 0.11842536926269531,
      "step": 11085
    },
    {
      "epoch": 6.7657470703125e-05,
      "step": 11085,
      "training_step_time": 0.44972658157348633
    },
    {
      "epoch": 6.766357421875e-05,
      "model_forward_time": 0.11728167533874512,
      "step": 11086
    },
    {
      "epoch": 6.766357421875e-05,
      "step": 11086,
      "training_step_time": 0.4419736862182617
    },
    {
      "epoch": 6.7669677734375e-05,
      "model_forward_time": 0.11774563789367676,
      "step": 11087
    },
    {
      "epoch": 6.7669677734375e-05,
      "step": 11087,
      "training_step_time": 0.46317338943481445
    },
    {
      "epoch": 6.767578125e-05,
      "model_forward_time": 0.11791229248046875,
      "step": 11088
    },
    {
      "epoch": 6.767578125e-05,
      "step": 11088,
      "training_step_time": 0.3816533088684082
    },
    {
      "epoch": 6.7681884765625e-05,
      "model_forward_time": 0.11754536628723145,
      "step": 11089
    },
    {
      "epoch": 6.7681884765625e-05,
      "step": 11089,
      "training_step_time": 0.3895285129547119
    },
    {
      "epoch": 6.768798828125e-05,
      "grad_norm": 0.23197102546691895,
      "learning_rate": 9.511145286918828e-05,
      "loss": 0.0591,
      "step": 11090
    },
    {
      "epoch": 6.768798828125e-05,
      "model_forward_time": 0.11829686164855957,
      "step": 11090
    },
    {
      "epoch": 6.768798828125e-05,
      "step": 11090,
      "training_step_time": 0.5942482948303223
    },
    {
      "epoch": 6.7694091796875e-05,
      "model_forward_time": 0.11616277694702148,
      "step": 11091
    },
    {
      "epoch": 6.7694091796875e-05,
      "step": 11091,
      "training_step_time": 0.3796505928039551
    },
    {
      "epoch": 6.77001953125e-05,
      "model_forward_time": 0.11764192581176758,
      "step": 11092
    },
    {
      "epoch": 6.77001953125e-05,
      "step": 11092,
      "training_step_time": 0.3902156352996826
    },
    {
      "epoch": 6.7706298828125e-05,
      "model_forward_time": 0.11702418327331543,
      "step": 11093
    },
    {
      "epoch": 6.7706298828125e-05,
      "step": 11093,
      "training_step_time": 0.3760569095611572
    },
    {
      "epoch": 6.771240234375e-05,
      "model_forward_time": 0.1186058521270752,
      "step": 11094
    },
    {
      "epoch": 6.771240234375e-05,
      "step": 11094,
      "training_step_time": 0.3829379081726074
    },
    {
      "epoch": 6.7718505859375e-05,
      "model_forward_time": 0.11775636672973633,
      "step": 11095
    },
    {
      "epoch": 6.7718505859375e-05,
      "step": 11095,
      "training_step_time": 0.46567201614379883
    },
    {
      "epoch": 6.7724609375e-05,
      "model_forward_time": 0.11757802963256836,
      "step": 11096
    },
    {
      "epoch": 6.7724609375e-05,
      "step": 11096,
      "training_step_time": 0.423321008682251
    },
    {
      "epoch": 6.7730712890625e-05,
      "model_forward_time": 0.11706757545471191,
      "step": 11097
    },
    {
      "epoch": 6.7730712890625e-05,
      "step": 11097,
      "training_step_time": 0.38721442222595215
    },
    {
      "epoch": 6.773681640625e-05,
      "model_forward_time": 0.11735391616821289,
      "step": 11098
    },
    {
      "epoch": 6.773681640625e-05,
      "step": 11098,
      "training_step_time": 0.3828554153442383
    },
    {
      "epoch": 6.7742919921875e-05,
      "model_forward_time": 0.1183931827545166,
      "step": 11099
    },
    {
      "epoch": 6.7742919921875e-05,
      "step": 11099,
      "training_step_time": 0.4866304397583008
    },
    {
      "epoch": 6.77490234375e-05,
      "grad_norm": 0.2620666027069092,
      "learning_rate": 9.509956150664796e-05,
      "loss": 0.0618,
      "step": 11100
    },
    {
      "epoch": 6.77490234375e-05,
      "model_forward_time": 0.11838245391845703,
      "step": 11100
    },
    {
      "epoch": 6.77490234375e-05,
      "step": 11100,
      "training_step_time": 0.43133068084716797
    },
    {
      "epoch": 6.7755126953125e-05,
      "model_forward_time": 0.1164700984954834,
      "step": 11101
    },
    {
      "epoch": 6.7755126953125e-05,
      "step": 11101,
      "training_step_time": 0.47411298751831055
    },
    {
      "epoch": 6.776123046875e-05,
      "model_forward_time": 0.11595845222473145,
      "step": 11102
    },
    {
      "epoch": 6.776123046875e-05,
      "step": 11102,
      "training_step_time": 0.386749267578125
    },
    {
      "epoch": 6.7767333984375e-05,
      "model_forward_time": 0.11603784561157227,
      "step": 11103
    },
    {
      "epoch": 6.7767333984375e-05,
      "step": 11103,
      "training_step_time": 0.3880147933959961
    },
    {
      "epoch": 6.77734375e-05,
      "model_forward_time": 0.11525535583496094,
      "step": 11104
    },
    {
      "epoch": 6.77734375e-05,
      "step": 11104,
      "training_step_time": 0.3822193145751953
    },
    {
      "epoch": 6.7779541015625e-05,
      "model_forward_time": 0.11629843711853027,
      "step": 11105
    },
    {
      "epoch": 6.7779541015625e-05,
      "step": 11105,
      "training_step_time": 0.3872199058532715
    },
    {
      "epoch": 6.778564453125e-05,
      "model_forward_time": 0.11808323860168457,
      "step": 11106
    },
    {
      "epoch": 6.778564453125e-05,
      "step": 11106,
      "training_step_time": 0.38239336013793945
    },
    {
      "epoch": 6.7791748046875e-05,
      "model_forward_time": 0.11811614036560059,
      "step": 11107
    },
    {
      "epoch": 6.7791748046875e-05,
      "step": 11107,
      "training_step_time": 0.5521063804626465
    },
    {
      "epoch": 6.77978515625e-05,
      "model_forward_time": 0.1179811954498291,
      "step": 11108
    },
    {
      "epoch": 6.77978515625e-05,
      "step": 11108,
      "training_step_time": 0.7016024589538574
    },
    {
      "epoch": 6.7803955078125e-05,
      "model_forward_time": 0.11671209335327148,
      "step": 11109
    },
    {
      "epoch": 6.7803955078125e-05,
      "step": 11109,
      "training_step_time": 0.44048428535461426
    },
    {
      "epoch": 6.781005859375e-05,
      "grad_norm": 0.22731119394302368,
      "learning_rate": 9.508765644405162e-05,
      "loss": 0.0668,
      "step": 11110
    },
    {
      "epoch": 6.781005859375e-05,
      "model_forward_time": 0.11786556243896484,
      "step": 11110
    },
    {
      "epoch": 6.781005859375e-05,
      "step": 11110,
      "training_step_time": 0.42878127098083496
    },
    {
      "epoch": 6.7816162109375e-05,
      "model_forward_time": 0.11618328094482422,
      "step": 11111
    },
    {
      "epoch": 6.7816162109375e-05,
      "step": 11111,
      "training_step_time": 0.39763951301574707
    },
    {
      "epoch": 6.7822265625e-05,
      "model_forward_time": 0.12377572059631348,
      "step": 11112
    },
    {
      "epoch": 6.7822265625e-05,
      "step": 11112,
      "training_step_time": 0.38500475883483887
    },
    {
      "epoch": 6.7828369140625e-05,
      "model_forward_time": 0.11605143547058105,
      "step": 11113
    },
    {
      "epoch": 6.7828369140625e-05,
      "step": 11113,
      "training_step_time": 0.43523216247558594
    },
    {
      "epoch": 6.783447265625e-05,
      "model_forward_time": 0.11612534523010254,
      "step": 11114
    },
    {
      "epoch": 6.783447265625e-05,
      "step": 11114,
      "training_step_time": 1.0127904415130615
    },
    {
      "epoch": 6.7840576171875e-05,
      "model_forward_time": 0.11599874496459961,
      "step": 11115
    },
    {
      "epoch": 6.7840576171875e-05,
      "step": 11115,
      "training_step_time": 0.38758063316345215
    },
    {
      "epoch": 6.78466796875e-05,
      "model_forward_time": 0.11611795425415039,
      "step": 11116
    },
    {
      "epoch": 6.78466796875e-05,
      "step": 11116,
      "training_step_time": 0.37917542457580566
    },
    {
      "epoch": 6.7852783203125e-05,
      "model_forward_time": 0.11521768569946289,
      "step": 11117
    },
    {
      "epoch": 6.7852783203125e-05,
      "step": 11117,
      "training_step_time": 0.37915635108947754
    },
    {
      "epoch": 6.785888671875e-05,
      "model_forward_time": 0.1155691146850586,
      "step": 11118
    },
    {
      "epoch": 6.785888671875e-05,
      "step": 11118,
      "training_step_time": 0.41108179092407227
    },
    {
      "epoch": 6.7864990234375e-05,
      "model_forward_time": 0.11534833908081055,
      "step": 11119
    },
    {
      "epoch": 6.7864990234375e-05,
      "step": 11119,
      "training_step_time": 0.38512325286865234
    },
    {
      "epoch": 6.787109375e-05,
      "grad_norm": 0.17850741744041443,
      "learning_rate": 9.507573768501574e-05,
      "loss": 0.0591,
      "step": 11120
    },
    {
      "epoch": 6.787109375e-05,
      "model_forward_time": 0.11640191078186035,
      "step": 11120
    },
    {
      "epoch": 6.787109375e-05,
      "step": 11120,
      "training_step_time": 0.8402209281921387
    },
    {
      "epoch": 6.7877197265625e-05,
      "model_forward_time": 0.11538553237915039,
      "step": 11121
    },
    {
      "epoch": 6.7877197265625e-05,
      "step": 11121,
      "training_step_time": 0.49416279792785645
    },
    {
      "epoch": 6.788330078125e-05,
      "model_forward_time": 0.11496281623840332,
      "step": 11122
    },
    {
      "epoch": 6.788330078125e-05,
      "step": 11122,
      "training_step_time": 0.414933443069458
    },
    {
      "epoch": 6.7889404296875e-05,
      "model_forward_time": 0.1153721809387207,
      "step": 11123
    },
    {
      "epoch": 6.7889404296875e-05,
      "step": 11123,
      "training_step_time": 0.4063084125518799
    },
    {
      "epoch": 6.78955078125e-05,
      "model_forward_time": 0.11508393287658691,
      "step": 11124
    },
    {
      "epoch": 6.78955078125e-05,
      "step": 11124,
      "training_step_time": 0.37755465507507324
    },
    {
      "epoch": 6.7901611328125e-05,
      "model_forward_time": 0.11584663391113281,
      "step": 11125
    },
    {
      "epoch": 6.7901611328125e-05,
      "step": 11125,
      "training_step_time": 0.3661768436431885
    },
    {
      "epoch": 6.790771484375e-05,
      "model_forward_time": 0.11543583869934082,
      "step": 11126
    },
    {
      "epoch": 6.790771484375e-05,
      "step": 11126,
      "training_step_time": 0.787574052810669
    },
    {
      "epoch": 6.7913818359375e-05,
      "model_forward_time": 0.11674737930297852,
      "step": 11127
    },
    {
      "epoch": 6.7913818359375e-05,
      "step": 11127,
      "training_step_time": 0.3892238140106201
    },
    {
      "epoch": 6.7919921875e-05,
      "model_forward_time": 0.11532330513000488,
      "step": 11128
    },
    {
      "epoch": 6.7919921875e-05,
      "step": 11128,
      "training_step_time": 0.38028812408447266
    },
    {
      "epoch": 6.7926025390625e-05,
      "model_forward_time": 0.11729574203491211,
      "step": 11129
    },
    {
      "epoch": 6.7926025390625e-05,
      "step": 11129,
      "training_step_time": 0.3802943229675293
    },
    {
      "epoch": 6.793212890625e-05,
      "grad_norm": 0.18353208899497986,
      "learning_rate": 9.50638052331609e-05,
      "loss": 0.0589,
      "step": 11130
    },
    {
      "epoch": 6.793212890625e-05,
      "model_forward_time": 0.11715865135192871,
      "step": 11130
    },
    {
      "epoch": 6.793212890625e-05,
      "step": 11130,
      "training_step_time": 0.37676334381103516
    },
    {
      "epoch": 6.7938232421875e-05,
      "model_forward_time": 0.11703944206237793,
      "step": 11131
    },
    {
      "epoch": 6.7938232421875e-05,
      "step": 11131,
      "training_step_time": 0.386883020401001
    },
    {
      "epoch": 6.79443359375e-05,
      "model_forward_time": 0.11741185188293457,
      "step": 11132
    },
    {
      "epoch": 6.79443359375e-05,
      "step": 11132,
      "training_step_time": 0.9651496410369873
    },
    {
      "epoch": 6.7950439453125e-05,
      "model_forward_time": 0.11893987655639648,
      "step": 11133
    },
    {
      "epoch": 6.7950439453125e-05,
      "step": 11133,
      "training_step_time": 0.38115382194519043
    },
    {
      "epoch": 6.795654296875e-05,
      "model_forward_time": 0.11676287651062012,
      "step": 11134
    },
    {
      "epoch": 6.795654296875e-05,
      "step": 11134,
      "training_step_time": 0.43358802795410156
    },
    {
      "epoch": 6.7962646484375e-05,
      "model_forward_time": 0.11473965644836426,
      "step": 11135
    },
    {
      "epoch": 6.7962646484375e-05,
      "step": 11135,
      "training_step_time": 0.45714902877807617
    },
    {
      "epoch": 6.796875e-05,
      "model_forward_time": 0.11504364013671875,
      "step": 11136
    },
    {
      "epoch": 6.796875e-05,
      "step": 11136,
      "training_step_time": 0.36997127532958984
    },
    {
      "epoch": 6.7974853515625e-05,
      "model_forward_time": 0.11539578437805176,
      "step": 11137
    },
    {
      "epoch": 6.7974853515625e-05,
      "step": 11137,
      "training_step_time": 0.4329202175140381
    },
    {
      "epoch": 6.798095703125e-05,
      "model_forward_time": 0.11570000648498535,
      "step": 11138
    },
    {
      "epoch": 6.798095703125e-05,
      "step": 11138,
      "training_step_time": 0.6161665916442871
    },
    {
      "epoch": 6.7987060546875e-05,
      "model_forward_time": 0.11651134490966797,
      "step": 11139
    },
    {
      "epoch": 6.7987060546875e-05,
      "step": 11139,
      "training_step_time": 0.3836216926574707
    },
    {
      "epoch": 6.79931640625e-05,
      "grad_norm": 0.19897420704364777,
      "learning_rate": 9.505185909211188e-05,
      "loss": 0.0622,
      "step": 11140
    },
    {
      "epoch": 6.79931640625e-05,
      "model_forward_time": 0.11543607711791992,
      "step": 11140
    },
    {
      "epoch": 6.79931640625e-05,
      "step": 11140,
      "training_step_time": 0.4434518814086914
    },
    {
      "epoch": 6.7999267578125e-05,
      "model_forward_time": 0.11543011665344238,
      "step": 11141
    },
    {
      "epoch": 6.7999267578125e-05,
      "step": 11141,
      "training_step_time": 0.3838372230529785
    },
    {
      "epoch": 6.800537109375e-05,
      "model_forward_time": 0.11781835556030273,
      "step": 11142
    },
    {
      "epoch": 6.800537109375e-05,
      "step": 11142,
      "training_step_time": 0.38104748725891113
    },
    {
      "epoch": 6.8011474609375e-05,
      "model_forward_time": 0.11693859100341797,
      "step": 11143
    },
    {
      "epoch": 6.8011474609375e-05,
      "step": 11143,
      "training_step_time": 0.37764859199523926
    },
    {
      "epoch": 6.8017578125e-05,
      "model_forward_time": 0.11774992942810059,
      "step": 11144
    },
    {
      "epoch": 6.8017578125e-05,
      "step": 11144,
      "training_step_time": 0.8402872085571289
    },
    {
      "epoch": 6.8023681640625e-05,
      "model_forward_time": 0.11570048332214355,
      "step": 11145
    },
    {
      "epoch": 6.8023681640625e-05,
      "step": 11145,
      "training_step_time": 0.3896346092224121
    },
    {
      "epoch": 6.802978515625e-05,
      "model_forward_time": 0.11729884147644043,
      "step": 11146
    },
    {
      "epoch": 6.802978515625e-05,
      "step": 11146,
      "training_step_time": 0.4631788730621338
    },
    {
      "epoch": 6.8035888671875e-05,
      "model_forward_time": 0.11561226844787598,
      "step": 11147
    },
    {
      "epoch": 6.8035888671875e-05,
      "step": 11147,
      "training_step_time": 0.4860537052154541
    },
    {
      "epoch": 6.80419921875e-05,
      "model_forward_time": 0.11536645889282227,
      "step": 11148
    },
    {
      "epoch": 6.80419921875e-05,
      "step": 11148,
      "training_step_time": 0.4157378673553467
    },
    {
      "epoch": 6.8048095703125e-05,
      "model_forward_time": 0.11582255363464355,
      "step": 11149
    },
    {
      "epoch": 6.8048095703125e-05,
      "step": 11149,
      "training_step_time": 0.4497957229614258
    },
    {
      "epoch": 6.805419921875e-05,
      "grad_norm": 0.16658635437488556,
      "learning_rate": 9.50398992654976e-05,
      "loss": 0.0671,
      "step": 11150
    },
    {
      "epoch": 6.805419921875e-05,
      "model_forward_time": 0.11540985107421875,
      "step": 11150
    },
    {
      "epoch": 6.805419921875e-05,
      "step": 11150,
      "training_step_time": 0.41519594192504883
    },
    {
      "epoch": 6.8060302734375e-05,
      "model_forward_time": 0.11505007743835449,
      "step": 11151
    },
    {
      "epoch": 6.8060302734375e-05,
      "step": 11151,
      "training_step_time": 0.3767235279083252
    },
    {
      "epoch": 6.806640625e-05,
      "model_forward_time": 0.11489677429199219,
      "step": 11152
    },
    {
      "epoch": 6.806640625e-05,
      "step": 11152,
      "training_step_time": 0.4225039482116699
    },
    {
      "epoch": 6.8072509765625e-05,
      "model_forward_time": 0.11543846130371094,
      "step": 11153
    },
    {
      "epoch": 6.8072509765625e-05,
      "step": 11153,
      "training_step_time": 0.48068928718566895
    },
    {
      "epoch": 6.807861328125e-05,
      "model_forward_time": 0.11534619331359863,
      "step": 11154
    },
    {
      "epoch": 6.807861328125e-05,
      "step": 11154,
      "training_step_time": 0.373915433883667
    },
    {
      "epoch": 6.8084716796875e-05,
      "model_forward_time": 0.11572551727294922,
      "step": 11155
    },
    {
      "epoch": 6.8084716796875e-05,
      "step": 11155,
      "training_step_time": 0.43177175521850586
    },
    {
      "epoch": 6.80908203125e-05,
      "model_forward_time": 0.11652398109436035,
      "step": 11156
    },
    {
      "epoch": 6.80908203125e-05,
      "step": 11156,
      "training_step_time": 0.5831515789031982
    },
    {
      "epoch": 6.8096923828125e-05,
      "model_forward_time": 0.11672568321228027,
      "step": 11157
    },
    {
      "epoch": 6.8096923828125e-05,
      "step": 11157,
      "training_step_time": 0.39577555656433105
    },
    {
      "epoch": 6.810302734375e-05,
      "model_forward_time": 0.1157381534576416,
      "step": 11158
    },
    {
      "epoch": 6.810302734375e-05,
      "step": 11158,
      "training_step_time": 0.45373058319091797
    },
    {
      "epoch": 6.8109130859375e-05,
      "model_forward_time": 0.11653947830200195,
      "step": 11159
    },
    {
      "epoch": 6.8109130859375e-05,
      "step": 11159,
      "training_step_time": 0.38165855407714844
    },
    {
      "epoch": 6.8115234375e-05,
      "grad_norm": 0.23325932025909424,
      "learning_rate": 9.502792575695112e-05,
      "loss": 0.0628,
      "step": 11160
    },
    {
      "epoch": 6.8115234375e-05,
      "model_forward_time": 0.11473202705383301,
      "step": 11160
    },
    {
      "epoch": 6.8115234375e-05,
      "step": 11160,
      "training_step_time": 0.38114047050476074
    },
    {
      "epoch": 6.8121337890625e-05,
      "model_forward_time": 0.11972618103027344,
      "step": 11161
    },
    {
      "epoch": 6.8121337890625e-05,
      "step": 11161,
      "training_step_time": 0.3937809467315674
    },
    {
      "epoch": 6.812744140625e-05,
      "model_forward_time": 0.12218642234802246,
      "step": 11162
    },
    {
      "epoch": 6.812744140625e-05,
      "step": 11162,
      "training_step_time": 0.5582225322723389
    },
    {
      "epoch": 6.8133544921875e-05,
      "model_forward_time": 0.1175382137298584,
      "step": 11163
    },
    {
      "epoch": 6.8133544921875e-05,
      "step": 11163,
      "training_step_time": 0.47287774085998535
    },
    {
      "epoch": 6.81396484375e-05,
      "model_forward_time": 0.11756658554077148,
      "step": 11164
    },
    {
      "epoch": 6.81396484375e-05,
      "step": 11164,
      "training_step_time": 0.38184165954589844
    },
    {
      "epoch": 6.8145751953125e-05,
      "model_forward_time": 0.11669039726257324,
      "step": 11165
    },
    {
      "epoch": 6.8145751953125e-05,
      "step": 11165,
      "training_step_time": 0.3743278980255127
    },
    {
      "epoch": 6.815185546875e-05,
      "model_forward_time": 0.12034106254577637,
      "step": 11166
    },
    {
      "epoch": 6.815185546875e-05,
      "step": 11166,
      "training_step_time": 0.41071200370788574
    },
    {
      "epoch": 6.8157958984375e-05,
      "model_forward_time": 0.11707901954650879,
      "step": 11167
    },
    {
      "epoch": 6.8157958984375e-05,
      "step": 11167,
      "training_step_time": 0.4590780735015869
    },
    {
      "epoch": 6.81640625e-05,
      "model_forward_time": 0.12003207206726074,
      "step": 11168
    },
    {
      "epoch": 6.81640625e-05,
      "step": 11168,
      "training_step_time": 0.9346659183502197
    },
    {
      "epoch": 6.8170166015625e-05,
      "model_forward_time": 0.11623001098632812,
      "step": 11169
    },
    {
      "epoch": 6.8170166015625e-05,
      "step": 11169,
      "training_step_time": 0.37893223762512207
    },
    {
      "epoch": 6.817626953125e-05,
      "grad_norm": 0.20168256759643555,
      "learning_rate": 9.501593857010969e-05,
      "loss": 0.0635,
      "step": 11170
    },
    {
      "epoch": 6.817626953125e-05,
      "model_forward_time": 0.11607146263122559,
      "step": 11170
    },
    {
      "epoch": 6.817626953125e-05,
      "step": 11170,
      "training_step_time": 0.3756289482116699
    },
    {
      "epoch": 6.8182373046875e-05,
      "model_forward_time": 0.1165311336517334,
      "step": 11171
    },
    {
      "epoch": 6.8182373046875e-05,
      "step": 11171,
      "training_step_time": 0.380673885345459
    },
    {
      "epoch": 6.81884765625e-05,
      "model_forward_time": 0.11526775360107422,
      "step": 11172
    },
    {
      "epoch": 6.81884765625e-05,
      "step": 11172,
      "training_step_time": 0.370863676071167
    },
    {
      "epoch": 6.8194580078125e-05,
      "model_forward_time": 0.11754751205444336,
      "step": 11173
    },
    {
      "epoch": 6.8194580078125e-05,
      "step": 11173,
      "training_step_time": 0.45296192169189453
    },
    {
      "epoch": 6.820068359375e-05,
      "model_forward_time": 0.11738014221191406,
      "step": 11174
    },
    {
      "epoch": 6.820068359375e-05,
      "step": 11174,
      "training_step_time": 0.8502354621887207
    },
    {
      "epoch": 6.8206787109375e-05,
      "model_forward_time": 0.11708545684814453,
      "step": 11175
    },
    {
      "epoch": 6.8206787109375e-05,
      "step": 11175,
      "training_step_time": 0.4340941905975342
    },
    {
      "epoch": 6.8212890625e-05,
      "model_forward_time": 0.11593866348266602,
      "step": 11176
    },
    {
      "epoch": 6.8212890625e-05,
      "step": 11176,
      "training_step_time": 0.39016175270080566
    },
    {
      "epoch": 6.8218994140625e-05,
      "model_forward_time": 0.11952733993530273,
      "step": 11177
    },
    {
      "epoch": 6.8218994140625e-05,
      "step": 11177,
      "training_step_time": 0.37940287590026855
    },
    {
      "epoch": 6.822509765625e-05,
      "model_forward_time": 0.11557507514953613,
      "step": 11178
    },
    {
      "epoch": 6.822509765625e-05,
      "step": 11178,
      "training_step_time": 0.44344305992126465
    },
    {
      "epoch": 6.8231201171875e-05,
      "model_forward_time": 0.11641693115234375,
      "step": 11179
    },
    {
      "epoch": 6.8231201171875e-05,
      "step": 11179,
      "training_step_time": 0.43759775161743164
    },
    {
      "epoch": 6.82373046875e-05,
      "grad_norm": 0.12795409560203552,
      "learning_rate": 9.50039377086147e-05,
      "loss": 0.0582,
      "step": 11180
    },
    {
      "epoch": 6.82373046875e-05,
      "model_forward_time": 0.11546802520751953,
      "step": 11180
    },
    {
      "epoch": 6.82373046875e-05,
      "step": 11180,
      "training_step_time": 0.4702167510986328
    },
    {
      "epoch": 6.8243408203125e-05,
      "model_forward_time": 0.11575698852539062,
      "step": 11181
    },
    {
      "epoch": 6.8243408203125e-05,
      "step": 11181,
      "training_step_time": 0.4030146598815918
    },
    {
      "epoch": 6.824951171875e-05,
      "model_forward_time": 0.11571669578552246,
      "step": 11182
    },
    {
      "epoch": 6.824951171875e-05,
      "step": 11182,
      "training_step_time": 0.3985941410064697
    },
    {
      "epoch": 6.8255615234375e-05,
      "model_forward_time": 0.11677861213684082,
      "step": 11183
    },
    {
      "epoch": 6.8255615234375e-05,
      "step": 11183,
      "training_step_time": 0.3946347236633301
    },
    {
      "epoch": 6.826171875e-05,
      "model_forward_time": 0.1163337230682373,
      "step": 11184
    },
    {
      "epoch": 6.826171875e-05,
      "step": 11184,
      "training_step_time": 0.384122371673584
    },
    {
      "epoch": 6.8267822265625e-05,
      "model_forward_time": 0.11621618270874023,
      "step": 11185
    },
    {
      "epoch": 6.8267822265625e-05,
      "step": 11185,
      "training_step_time": 0.39789462089538574
    },
    {
      "epoch": 6.827392578125e-05,
      "model_forward_time": 0.11650300025939941,
      "step": 11186
    },
    {
      "epoch": 6.827392578125e-05,
      "step": 11186,
      "training_step_time": 0.8994593620300293
    },
    {
      "epoch": 6.8280029296875e-05,
      "model_forward_time": 0.11562418937683105,
      "step": 11187
    },
    {
      "epoch": 6.8280029296875e-05,
      "step": 11187,
      "training_step_time": 0.4663846492767334
    },
    {
      "epoch": 6.82861328125e-05,
      "model_forward_time": 0.11490774154663086,
      "step": 11188
    },
    {
      "epoch": 6.82861328125e-05,
      "step": 11188,
      "training_step_time": 0.41300392150878906
    },
    {
      "epoch": 6.8292236328125e-05,
      "model_forward_time": 0.11539196968078613,
      "step": 11189
    },
    {
      "epoch": 6.8292236328125e-05,
      "step": 11189,
      "training_step_time": 0.3936736583709717
    },
    {
      "epoch": 6.829833984375e-05,
      "grad_norm": 0.22722521424293518,
      "learning_rate": 9.499192317611167e-05,
      "loss": 0.0613,
      "step": 11190
    },
    {
      "epoch": 6.829833984375e-05,
      "model_forward_time": 0.11551380157470703,
      "step": 11190
    },
    {
      "epoch": 6.829833984375e-05,
      "step": 11190,
      "training_step_time": 0.37153196334838867
    },
    {
      "epoch": 6.8304443359375e-05,
      "model_forward_time": 0.11561918258666992,
      "step": 11191
    },
    {
      "epoch": 6.8304443359375e-05,
      "step": 11191,
      "training_step_time": 0.38594937324523926
    },
    {
      "epoch": 6.8310546875e-05,
      "model_forward_time": 0.11632990837097168,
      "step": 11192
    },
    {
      "epoch": 6.8310546875e-05,
      "step": 11192,
      "training_step_time": 0.6154820919036865
    },
    {
      "epoch": 6.8316650390625e-05,
      "model_forward_time": 0.11529970169067383,
      "step": 11193
    },
    {
      "epoch": 6.8316650390625e-05,
      "step": 11193,
      "training_step_time": 0.4134237766265869
    },
    {
      "epoch": 6.832275390625e-05,
      "model_forward_time": 0.11533236503601074,
      "step": 11194
    },
    {
      "epoch": 6.832275390625e-05,
      "step": 11194,
      "training_step_time": 0.3892552852630615
    },
    {
      "epoch": 6.8328857421875e-05,
      "model_forward_time": 0.11612606048583984,
      "step": 11195
    },
    {
      "epoch": 6.8328857421875e-05,
      "step": 11195,
      "training_step_time": 0.3892350196838379
    },
    {
      "epoch": 6.83349609375e-05,
      "model_forward_time": 0.11601114273071289,
      "step": 11196
    },
    {
      "epoch": 6.83349609375e-05,
      "step": 11196,
      "training_step_time": 0.37966060638427734
    },
    {
      "epoch": 6.8341064453125e-05,
      "model_forward_time": 0.11581659317016602,
      "step": 11197
    },
    {
      "epoch": 6.8341064453125e-05,
      "step": 11197,
      "training_step_time": 0.3934450149536133
    },
    {
      "epoch": 6.834716796875e-05,
      "model_forward_time": 0.1157829761505127,
      "step": 11198
    },
    {
      "epoch": 6.834716796875e-05,
      "step": 11198,
      "training_step_time": 0.9897105693817139
    },
    {
      "epoch": 6.8353271484375e-05,
      "model_forward_time": 0.11517834663391113,
      "step": 11199
    },
    {
      "epoch": 6.8353271484375e-05,
      "step": 11199,
      "training_step_time": 0.4063231945037842
    },
    {
      "epoch": 6.8359375e-05,
      "grad_norm": 0.1632184088230133,
      "learning_rate": 9.497989497625035e-05,
      "loss": 0.063,
      "step": 11200
    },
    {
      "epoch": 6.8359375e-05,
      "model_forward_time": 0.1154477596282959,
      "step": 11200
    },
    {
      "epoch": 6.8359375e-05,
      "step": 11200,
      "training_step_time": 0.4781050682067871
    },
    {
      "epoch": 6.8365478515625e-05,
      "model_forward_time": 0.11504530906677246,
      "step": 11201
    },
    {
      "epoch": 6.8365478515625e-05,
      "step": 11201,
      "training_step_time": 0.40436220169067383
    },
    {
      "epoch": 6.837158203125e-05,
      "model_forward_time": 0.11487984657287598,
      "step": 11202
    },
    {
      "epoch": 6.837158203125e-05,
      "step": 11202,
      "training_step_time": 0.4771897792816162
    },
    {
      "epoch": 6.8377685546875e-05,
      "model_forward_time": 0.11572432518005371,
      "step": 11203
    },
    {
      "epoch": 6.8377685546875e-05,
      "step": 11203,
      "training_step_time": 0.3850579261779785
    },
    {
      "epoch": 6.83837890625e-05,
      "model_forward_time": 0.11630773544311523,
      "step": 11204
    },
    {
      "epoch": 6.83837890625e-05,
      "step": 11204,
      "training_step_time": 0.6904897689819336
    },
    {
      "epoch": 6.8389892578125e-05,
      "model_forward_time": 0.11470890045166016,
      "step": 11205
    },
    {
      "epoch": 6.8389892578125e-05,
      "step": 11205,
      "training_step_time": 0.41677045822143555
    },
    {
      "epoch": 6.839599609375e-05,
      "model_forward_time": 0.11525511741638184,
      "step": 11206
    },
    {
      "epoch": 6.839599609375e-05,
      "step": 11206,
      "training_step_time": 0.40188074111938477
    },
    {
      "epoch": 6.8402099609375e-05,
      "model_forward_time": 0.11556005477905273,
      "step": 11207
    },
    {
      "epoch": 6.8402099609375e-05,
      "step": 11207,
      "training_step_time": 0.3787539005279541
    },
    {
      "epoch": 6.8408203125e-05,
      "model_forward_time": 0.11527109146118164,
      "step": 11208
    },
    {
      "epoch": 6.8408203125e-05,
      "step": 11208,
      "training_step_time": 0.3811337947845459
    },
    {
      "epoch": 6.8414306640625e-05,
      "model_forward_time": 0.11795639991760254,
      "step": 11209
    },
    {
      "epoch": 6.8414306640625e-05,
      "step": 11209,
      "training_step_time": 0.37906765937805176
    },
    {
      "epoch": 6.842041015625e-05,
      "grad_norm": 0.18451201915740967,
      "learning_rate": 9.496785311268457e-05,
      "loss": 0.0635,
      "step": 11210
    },
    {
      "epoch": 6.842041015625e-05,
      "model_forward_time": 0.11741113662719727,
      "step": 11210
    },
    {
      "epoch": 6.842041015625e-05,
      "step": 11210,
      "training_step_time": 0.9156129360198975
    },
    {
      "epoch": 6.8426513671875e-05,
      "model_forward_time": 0.1170816421508789,
      "step": 11211
    },
    {
      "epoch": 6.8426513671875e-05,
      "step": 11211,
      "training_step_time": 0.3850438594818115
    },
    {
      "epoch": 6.84326171875e-05,
      "model_forward_time": 0.11672425270080566,
      "step": 11212
    },
    {
      "epoch": 6.84326171875e-05,
      "step": 11212,
      "training_step_time": 0.4039912223815918
    },
    {
      "epoch": 6.8438720703125e-05,
      "model_forward_time": 0.11679601669311523,
      "step": 11213
    },
    {
      "epoch": 6.8438720703125e-05,
      "step": 11213,
      "training_step_time": 0.46239304542541504
    },
    {
      "epoch": 6.844482421875e-05,
      "model_forward_time": 0.11697530746459961,
      "step": 11214
    },
    {
      "epoch": 6.844482421875e-05,
      "step": 11214,
      "training_step_time": 0.3790018558502197
    },
    {
      "epoch": 6.8450927734375e-05,
      "model_forward_time": 0.11622142791748047,
      "step": 11215
    },
    {
      "epoch": 6.8450927734375e-05,
      "step": 11215,
      "training_step_time": 0.4206850528717041
    },
    {
      "epoch": 6.845703125e-05,
      "model_forward_time": 0.11803698539733887,
      "step": 11216
    },
    {
      "epoch": 6.845703125e-05,
      "step": 11216,
      "training_step_time": 0.8723628520965576
    },
    {
      "epoch": 6.8463134765625e-05,
      "model_forward_time": 0.1150205135345459,
      "step": 11217
    },
    {
      "epoch": 6.8463134765625e-05,
      "step": 11217,
      "training_step_time": 0.4571268558502197
    },
    {
      "epoch": 6.846923828125e-05,
      "model_forward_time": 0.11515140533447266,
      "step": 11218
    },
    {
      "epoch": 6.846923828125e-05,
      "step": 11218,
      "training_step_time": 0.4235997200012207
    },
    {
      "epoch": 6.8475341796875e-05,
      "model_forward_time": 0.11490511894226074,
      "step": 11219
    },
    {
      "epoch": 6.8475341796875e-05,
      "step": 11219,
      "training_step_time": 0.43460965156555176
    },
    {
      "epoch": 6.84814453125e-05,
      "grad_norm": 0.22261784970760345,
      "learning_rate": 9.49557975890723e-05,
      "loss": 0.0578,
      "step": 11220
    },
    {
      "epoch": 6.84814453125e-05,
      "model_forward_time": 0.11451053619384766,
      "step": 11220
    },
    {
      "epoch": 6.84814453125e-05,
      "step": 11220,
      "training_step_time": 0.3782503604888916
    },
    {
      "epoch": 6.8487548828125e-05,
      "model_forward_time": 0.11523222923278809,
      "step": 11221
    },
    {
      "epoch": 6.8487548828125e-05,
      "step": 11221,
      "training_step_time": 0.3895142078399658
    },
    {
      "epoch": 6.849365234375e-05,
      "model_forward_time": 0.1164407730102539,
      "step": 11222
    },
    {
      "epoch": 6.849365234375e-05,
      "step": 11222,
      "training_step_time": 0.8396162986755371
    },
    {
      "epoch": 6.8499755859375e-05,
      "model_forward_time": 0.11526703834533691,
      "step": 11223
    },
    {
      "epoch": 6.8499755859375e-05,
      "step": 11223,
      "training_step_time": 0.45476508140563965
    },
    {
      "epoch": 6.8505859375e-05,
      "model_forward_time": 0.1151118278503418,
      "step": 11224
    },
    {
      "epoch": 6.8505859375e-05,
      "step": 11224,
      "training_step_time": 0.45113372802734375
    },
    {
      "epoch": 6.8511962890625e-05,
      "model_forward_time": 0.11510777473449707,
      "step": 11225
    },
    {
      "epoch": 6.8511962890625e-05,
      "step": 11225,
      "training_step_time": 0.3874657154083252
    },
    {
      "epoch": 6.851806640625e-05,
      "model_forward_time": 0.11490845680236816,
      "step": 11226
    },
    {
      "epoch": 6.851806640625e-05,
      "step": 11226,
      "training_step_time": 0.3995051383972168
    },
    {
      "epoch": 6.8524169921875e-05,
      "model_forward_time": 0.11520171165466309,
      "step": 11227
    },
    {
      "epoch": 6.8524169921875e-05,
      "step": 11227,
      "training_step_time": 0.4388003349304199
    },
    {
      "epoch": 6.85302734375e-05,
      "model_forward_time": 0.11713647842407227,
      "step": 11228
    },
    {
      "epoch": 6.85302734375e-05,
      "step": 11228,
      "training_step_time": 0.5086750984191895
    },
    {
      "epoch": 6.8536376953125e-05,
      "model_forward_time": 0.11529946327209473,
      "step": 11229
    },
    {
      "epoch": 6.8536376953125e-05,
      "step": 11229,
      "training_step_time": 0.38932180404663086
    },
    {
      "epoch": 6.854248046875e-05,
      "grad_norm": 0.17476530373096466,
      "learning_rate": 9.494372840907572e-05,
      "loss": 0.0643,
      "step": 11230
    },
    {
      "epoch": 6.854248046875e-05,
      "model_forward_time": 0.11594796180725098,
      "step": 11230
    },
    {
      "epoch": 6.854248046875e-05,
      "step": 11230,
      "training_step_time": 0.39136505126953125
    },
    {
      "epoch": 6.8548583984375e-05,
      "model_forward_time": 0.1173102855682373,
      "step": 11231
    },
    {
      "epoch": 6.8548583984375e-05,
      "step": 11231,
      "training_step_time": 0.4409630298614502
    },
    {
      "epoch": 6.85546875e-05,
      "model_forward_time": 0.11606287956237793,
      "step": 11232
    },
    {
      "epoch": 6.85546875e-05,
      "step": 11232,
      "training_step_time": 0.4328649044036865
    },
    {
      "epoch": 6.8560791015625e-05,
      "model_forward_time": 0.1162104606628418,
      "step": 11233
    },
    {
      "epoch": 6.8560791015625e-05,
      "step": 11233,
      "training_step_time": 0.39278459548950195
    },
    {
      "epoch": 6.856689453125e-05,
      "model_forward_time": 0.11658954620361328,
      "step": 11234
    },
    {
      "epoch": 6.856689453125e-05,
      "step": 11234,
      "training_step_time": 0.5783708095550537
    },
    {
      "epoch": 6.8572998046875e-05,
      "model_forward_time": 0.11576986312866211,
      "step": 11235
    },
    {
      "epoch": 6.8572998046875e-05,
      "step": 11235,
      "training_step_time": 0.3951082229614258
    },
    {
      "epoch": 6.85791015625e-05,
      "model_forward_time": 0.11609625816345215,
      "step": 11236
    },
    {
      "epoch": 6.85791015625e-05,
      "step": 11236,
      "training_step_time": 0.4290344715118408
    },
    {
      "epoch": 6.8585205078125e-05,
      "model_forward_time": 0.11634683609008789,
      "step": 11237
    },
    {
      "epoch": 6.8585205078125e-05,
      "step": 11237,
      "training_step_time": 0.39177417755126953
    },
    {
      "epoch": 6.859130859375e-05,
      "model_forward_time": 0.11645793914794922,
      "step": 11238
    },
    {
      "epoch": 6.859130859375e-05,
      "step": 11238,
      "training_step_time": 0.46975278854370117
    },
    {
      "epoch": 6.8597412109375e-05,
      "model_forward_time": 0.11736941337585449,
      "step": 11239
    },
    {
      "epoch": 6.8597412109375e-05,
      "step": 11239,
      "training_step_time": 0.5135498046875
    },
    {
      "epoch": 6.8603515625e-05,
      "grad_norm": 0.21686825156211853,
      "learning_rate": 9.493164557636112e-05,
      "loss": 0.0546,
      "step": 11240
    },
    {
      "epoch": 6.8603515625e-05,
      "model_forward_time": 0.11747384071350098,
      "step": 11240
    },
    {
      "epoch": 6.8603515625e-05,
      "step": 11240,
      "training_step_time": 0.6614871025085449
    },
    {
      "epoch": 6.8609619140625e-05,
      "model_forward_time": 0.11850404739379883,
      "step": 11241
    },
    {
      "epoch": 6.8609619140625e-05,
      "step": 11241,
      "training_step_time": 0.6485843658447266
    },
    {
      "epoch": 6.861572265625e-05,
      "model_forward_time": 0.12092256546020508,
      "step": 11242
    },
    {
      "epoch": 6.861572265625e-05,
      "step": 11242,
      "training_step_time": 0.627802848815918
    },
    {
      "epoch": 6.8621826171875e-05,
      "model_forward_time": 0.11997079849243164,
      "step": 11243
    },
    {
      "epoch": 6.8621826171875e-05,
      "step": 11243,
      "training_step_time": 0.6738448143005371
    },
    {
      "epoch": 6.86279296875e-05,
      "model_forward_time": 0.11826801300048828,
      "step": 11244
    },
    {
      "epoch": 6.86279296875e-05,
      "step": 11244,
      "training_step_time": 0.6259348392486572
    },
    {
      "epoch": 6.8634033203125e-05,
      "model_forward_time": 0.12077999114990234,
      "step": 11245
    },
    {
      "epoch": 6.8634033203125e-05,
      "step": 11245,
      "training_step_time": 0.6239073276519775
    },
    {
      "epoch": 6.864013671875e-05,
      "model_forward_time": 0.12036609649658203,
      "step": 11246
    },
    {
      "epoch": 6.864013671875e-05,
      "step": 11246,
      "training_step_time": 0.6303119659423828
    },
    {
      "epoch": 6.8646240234375e-05,
      "model_forward_time": 0.1193394660949707,
      "step": 11247
    },
    {
      "epoch": 6.8646240234375e-05,
      "step": 11247,
      "training_step_time": 0.6050148010253906
    },
    {
      "epoch": 6.865234375e-05,
      "model_forward_time": 0.11935305595397949,
      "step": 11248
    },
    {
      "epoch": 6.865234375e-05,
      "step": 11248,
      "training_step_time": 0.7658817768096924
    },
    {
      "epoch": 6.8658447265625e-05,
      "model_forward_time": 0.11914491653442383,
      "step": 11249
    },
    {
      "epoch": 6.8658447265625e-05,
      "step": 11249,
      "training_step_time": 0.6867415904998779
    },
    {
      "epoch": 6.866455078125e-05,
      "grad_norm": 0.2208181619644165,
      "learning_rate": 9.491954909459895e-05,
      "loss": 0.0645,
      "step": 11250
    },
    {
      "epoch": 6.866455078125e-05,
      "model_forward_time": 0.12236285209655762,
      "step": 11250
    },
    {
      "epoch": 6.866455078125e-05,
      "step": 11250,
      "training_step_time": 0.6890206336975098
    },
    {
      "epoch": 6.8670654296875e-05,
      "model_forward_time": 0.11756587028503418,
      "step": 11251
    },
    {
      "epoch": 6.8670654296875e-05,
      "step": 11251,
      "training_step_time": 0.6723389625549316
    },
    {
      "epoch": 6.86767578125e-05,
      "model_forward_time": 0.1174936294555664,
      "step": 11252
    },
    {
      "epoch": 6.86767578125e-05,
      "step": 11252,
      "training_step_time": 0.7195489406585693
    },
    {
      "epoch": 6.8682861328125e-05,
      "model_forward_time": 0.12066817283630371,
      "step": 11253
    },
    {
      "epoch": 6.8682861328125e-05,
      "step": 11253,
      "training_step_time": 0.7035937309265137
    },
    {
      "epoch": 6.868896484375e-05,
      "model_forward_time": 0.11757516860961914,
      "step": 11254
    },
    {
      "epoch": 6.868896484375e-05,
      "step": 11254,
      "training_step_time": 0.7665045261383057
    },
    {
      "epoch": 6.8695068359375e-05,
      "model_forward_time": 0.11771941184997559,
      "step": 11255
    },
    {
      "epoch": 6.8695068359375e-05,
      "step": 11255,
      "training_step_time": 0.6851232051849365
    },
    {
      "epoch": 6.8701171875e-05,
      "model_forward_time": 0.11702418327331543,
      "step": 11256
    },
    {
      "epoch": 6.8701171875e-05,
      "step": 11256,
      "training_step_time": 0.6756198406219482
    },
    {
      "epoch": 6.8707275390625e-05,
      "model_forward_time": 0.12406134605407715,
      "step": 11257
    },
    {
      "epoch": 6.8707275390625e-05,
      "step": 11257,
      "training_step_time": 0.7186551094055176
    },
    {
      "epoch": 6.871337890625e-05,
      "model_forward_time": 0.11818814277648926,
      "step": 11258
    },
    {
      "epoch": 6.871337890625e-05,
      "step": 11258,
      "training_step_time": 0.7948074340820312
    },
    {
      "epoch": 6.8719482421875e-05,
      "model_forward_time": 0.12205386161804199,
      "step": 11259
    },
    {
      "epoch": 6.8719482421875e-05,
      "step": 11259,
      "training_step_time": 0.7487962245941162
    },
    {
      "epoch": 6.87255859375e-05,
      "grad_norm": 0.17763367295265198,
      "learning_rate": 9.49074389674638e-05,
      "loss": 0.0663,
      "step": 11260
    },
    {
      "epoch": 6.87255859375e-05,
      "model_forward_time": 0.12021350860595703,
      "step": 11260
    },
    {
      "epoch": 6.87255859375e-05,
      "step": 11260,
      "training_step_time": 0.6518473625183105
    },
    {
      "epoch": 6.8731689453125e-05,
      "model_forward_time": 0.12101125717163086,
      "step": 11261
    },
    {
      "epoch": 6.8731689453125e-05,
      "step": 11261,
      "training_step_time": 0.6141562461853027
    },
    {
      "epoch": 6.873779296875e-05,
      "model_forward_time": 0.1206979751586914,
      "step": 11262
    },
    {
      "epoch": 6.873779296875e-05,
      "step": 11262,
      "training_step_time": 0.6979734897613525
    },
    {
      "epoch": 6.8743896484375e-05,
      "model_forward_time": 0.11771798133850098,
      "step": 11263
    },
    {
      "epoch": 6.8743896484375e-05,
      "step": 11263,
      "training_step_time": 0.6939697265625
    },
    {
      "epoch": 6.875e-05,
      "model_forward_time": 0.11697578430175781,
      "step": 11264
    },
    {
      "epoch": 6.875e-05,
      "step": 11264,
      "training_step_time": 0.7474846839904785
    },
    {
      "epoch": 6.8756103515625e-05,
      "model_forward_time": 0.11810183525085449,
      "step": 11265
    },
    {
      "epoch": 6.8756103515625e-05,
      "step": 11265,
      "training_step_time": 0.6461031436920166
    },
    {
      "epoch": 6.876220703125e-05,
      "model_forward_time": 0.11706066131591797,
      "step": 11266
    },
    {
      "epoch": 6.876220703125e-05,
      "step": 11266,
      "training_step_time": 0.6153435707092285
    },
    {
      "epoch": 6.8768310546875e-05,
      "model_forward_time": 0.12363719940185547,
      "step": 11267
    },
    {
      "epoch": 6.8768310546875e-05,
      "step": 11267,
      "training_step_time": 0.6388030052185059
    },
    {
      "epoch": 6.87744140625e-05,
      "model_forward_time": 0.14093780517578125,
      "step": 11268
    },
    {
      "epoch": 6.87744140625e-05,
      "step": 11268,
      "training_step_time": 0.7090926170349121
    },
    {
      "epoch": 6.8780517578125e-05,
      "model_forward_time": 0.11877608299255371,
      "step": 11269
    },
    {
      "epoch": 6.8780517578125e-05,
      "step": 11269,
      "training_step_time": 0.6285037994384766
    },
    {
      "epoch": 6.878662109375e-05,
      "grad_norm": 0.2579115033149719,
      "learning_rate": 9.48953151986344e-05,
      "loss": 0.0683,
      "step": 11270
    },
    {
      "epoch": 6.878662109375e-05,
      "model_forward_time": 0.11697173118591309,
      "step": 11270
    },
    {
      "epoch": 6.878662109375e-05,
      "step": 11270,
      "training_step_time": 0.6745133399963379
    },
    {
      "epoch": 6.8792724609375e-05,
      "model_forward_time": 0.11852216720581055,
      "step": 11271
    },
    {
      "epoch": 6.8792724609375e-05,
      "step": 11271,
      "training_step_time": 0.5725076198577881
    },
    {
      "epoch": 6.8798828125e-05,
      "model_forward_time": 0.11893415451049805,
      "step": 11272
    },
    {
      "epoch": 6.8798828125e-05,
      "step": 11272,
      "training_step_time": 0.7349302768707275
    },
    {
      "epoch": 6.8804931640625e-05,
      "model_forward_time": 0.1266462802886963,
      "step": 11273
    },
    {
      "epoch": 6.8804931640625e-05,
      "step": 11273,
      "training_step_time": 0.6528768539428711
    },
    {
      "epoch": 6.881103515625e-05,
      "model_forward_time": 0.11768507957458496,
      "step": 11274
    },
    {
      "epoch": 6.881103515625e-05,
      "step": 11274,
      "training_step_time": 0.6132116317749023
    },
    {
      "epoch": 6.8817138671875e-05,
      "model_forward_time": 0.11841964721679688,
      "step": 11275
    },
    {
      "epoch": 6.8817138671875e-05,
      "step": 11275,
      "training_step_time": 0.707343339920044
    },
    {
      "epoch": 6.88232421875e-05,
      "model_forward_time": 0.11957979202270508,
      "step": 11276
    },
    {
      "epoch": 6.88232421875e-05,
      "step": 11276,
      "training_step_time": 0.7011792659759521
    },
    {
      "epoch": 6.8829345703125e-05,
      "model_forward_time": 0.1215822696685791,
      "step": 11277
    },
    {
      "epoch": 6.8829345703125e-05,
      "step": 11277,
      "training_step_time": 0.6875057220458984
    },
    {
      "epoch": 6.883544921875e-05,
      "model_forward_time": 0.11982417106628418,
      "step": 11278
    },
    {
      "epoch": 6.883544921875e-05,
      "step": 11278,
      "training_step_time": 0.7400028705596924
    },
    {
      "epoch": 6.8841552734375e-05,
      "model_forward_time": 0.11947226524353027,
      "step": 11279
    },
    {
      "epoch": 6.8841552734375e-05,
      "step": 11279,
      "training_step_time": 0.6890978813171387
    },
    {
      "epoch": 6.884765625e-05,
      "grad_norm": 0.21738995611667633,
      "learning_rate": 9.488317779179361e-05,
      "loss": 0.0658,
      "step": 11280
    },
    {
      "epoch": 6.884765625e-05,
      "model_forward_time": 0.11949825286865234,
      "step": 11280
    },
    {
      "epoch": 6.884765625e-05,
      "step": 11280,
      "training_step_time": 0.6415526866912842
    },
    {
      "epoch": 6.8853759765625e-05,
      "model_forward_time": 0.11748695373535156,
      "step": 11281
    },
    {
      "epoch": 6.8853759765625e-05,
      "step": 11281,
      "training_step_time": 0.6917119026184082
    },
    {
      "epoch": 6.885986328125e-05,
      "model_forward_time": 0.12596821784973145,
      "step": 11282
    },
    {
      "epoch": 6.885986328125e-05,
      "step": 11282,
      "training_step_time": 0.7536125183105469
    },
    {
      "epoch": 6.8865966796875e-05,
      "model_forward_time": 0.11746859550476074,
      "step": 11283
    },
    {
      "epoch": 6.8865966796875e-05,
      "step": 11283,
      "training_step_time": 0.6302196979522705
    },
    {
      "epoch": 6.88720703125e-05,
      "model_forward_time": 0.11922049522399902,
      "step": 11284
    },
    {
      "epoch": 6.88720703125e-05,
      "step": 11284,
      "training_step_time": 0.7112376689910889
    },
    {
      "epoch": 6.8878173828125e-05,
      "model_forward_time": 0.11815214157104492,
      "step": 11285
    },
    {
      "epoch": 6.8878173828125e-05,
      "step": 11285,
      "training_step_time": 0.6754202842712402
    },
    {
      "epoch": 6.888427734375e-05,
      "model_forward_time": 0.11865592002868652,
      "step": 11286
    },
    {
      "epoch": 6.888427734375e-05,
      "step": 11286,
      "training_step_time": 0.6726999282836914
    },
    {
      "epoch": 6.8890380859375e-05,
      "model_forward_time": 0.1194148063659668,
      "step": 11287
    },
    {
      "epoch": 6.8890380859375e-05,
      "step": 11287,
      "training_step_time": 0.7171835899353027
    },
    {
      "epoch": 6.8896484375e-05,
      "model_forward_time": 0.12065649032592773,
      "step": 11288
    },
    {
      "epoch": 6.8896484375e-05,
      "step": 11288,
      "training_step_time": 0.6573300361633301
    },
    {
      "epoch": 6.8902587890625e-05,
      "model_forward_time": 0.1383354663848877,
      "step": 11289
    },
    {
      "epoch": 6.8902587890625e-05,
      "step": 11289,
      "training_step_time": 0.631829023361206
    },
    {
      "epoch": 6.890869140625e-05,
      "grad_norm": 0.1682446002960205,
      "learning_rate": 9.487102675062851e-05,
      "loss": 0.0703,
      "step": 11290
    },
    {
      "epoch": 6.890869140625e-05,
      "model_forward_time": 0.11774778366088867,
      "step": 11290
    },
    {
      "epoch": 6.890869140625e-05,
      "step": 11290,
      "training_step_time": 0.6927363872528076
    },
    {
      "epoch": 6.8914794921875e-05,
      "model_forward_time": 0.11702084541320801,
      "step": 11291
    },
    {
      "epoch": 6.8914794921875e-05,
      "step": 11291,
      "training_step_time": 0.6848905086517334
    },
    {
      "epoch": 6.89208984375e-05,
      "model_forward_time": 0.12144756317138672,
      "step": 11292
    },
    {
      "epoch": 6.89208984375e-05,
      "step": 11292,
      "training_step_time": 0.6288547515869141
    },
    {
      "epoch": 6.8927001953125e-05,
      "model_forward_time": 0.11831974983215332,
      "step": 11293
    },
    {
      "epoch": 6.8927001953125e-05,
      "step": 11293,
      "training_step_time": 0.7065720558166504
    },
    {
      "epoch": 6.893310546875e-05,
      "model_forward_time": 0.12346267700195312,
      "step": 11294
    },
    {
      "epoch": 6.893310546875e-05,
      "step": 11294,
      "training_step_time": 0.6057538986206055
    },
    {
      "epoch": 6.8939208984375e-05,
      "model_forward_time": 0.11926507949829102,
      "step": 11295
    },
    {
      "epoch": 6.8939208984375e-05,
      "step": 11295,
      "training_step_time": 0.6355452537536621
    },
    {
      "epoch": 6.89453125e-05,
      "model_forward_time": 0.12390780448913574,
      "step": 11296
    },
    {
      "epoch": 6.89453125e-05,
      "step": 11296,
      "training_step_time": 0.7203729152679443
    },
    {
      "epoch": 6.8951416015625e-05,
      "model_forward_time": 0.11783695220947266,
      "step": 11297
    },
    {
      "epoch": 6.8951416015625e-05,
      "step": 11297,
      "training_step_time": 0.6779351234436035
    },
    {
      "epoch": 6.895751953125e-05,
      "model_forward_time": 0.12116575241088867,
      "step": 11298
    },
    {
      "epoch": 6.895751953125e-05,
      "step": 11298,
      "training_step_time": 0.6746220588684082
    },
    {
      "epoch": 6.8963623046875e-05,
      "model_forward_time": 0.11841797828674316,
      "step": 11299
    },
    {
      "epoch": 6.8963623046875e-05,
      "step": 11299,
      "training_step_time": 0.6689915657043457
    },
    {
      "epoch": 6.89697265625e-05,
      "grad_norm": 0.19839902222156525,
      "learning_rate": 9.485886207883022e-05,
      "loss": 0.0717,
      "step": 11300
    },
    {
      "epoch": 6.89697265625e-05,
      "model_forward_time": 0.12577486038208008,
      "step": 11300
    },
    {
      "epoch": 6.89697265625e-05,
      "step": 11300,
      "training_step_time": 0.6258673667907715
    },
    {
      "epoch": 6.8975830078125e-05,
      "model_forward_time": 0.11800217628479004,
      "step": 11301
    },
    {
      "epoch": 6.8975830078125e-05,
      "step": 11301,
      "training_step_time": 0.6958267688751221
    },
    {
      "epoch": 6.898193359375e-05,
      "model_forward_time": 0.1258249282836914,
      "step": 11302
    },
    {
      "epoch": 6.898193359375e-05,
      "step": 11302,
      "training_step_time": 0.5943007469177246
    },
    {
      "epoch": 6.8988037109375e-05,
      "model_forward_time": 0.11666607856750488,
      "step": 11303
    },
    {
      "epoch": 6.8988037109375e-05,
      "step": 11303,
      "training_step_time": 0.6062023639678955
    },
    {
      "epoch": 6.8994140625e-05,
      "model_forward_time": 0.12259411811828613,
      "step": 11304
    },
    {
      "epoch": 6.8994140625e-05,
      "step": 11304,
      "training_step_time": 0.6763930320739746
    },
    {
      "epoch": 6.9000244140625e-05,
      "model_forward_time": 0.11934494972229004,
      "step": 11305
    },
    {
      "epoch": 6.9000244140625e-05,
      "step": 11305,
      "training_step_time": 0.6207973957061768
    },
    {
      "epoch": 6.900634765625e-05,
      "model_forward_time": 0.12098956108093262,
      "step": 11306
    },
    {
      "epoch": 6.900634765625e-05,
      "step": 11306,
      "training_step_time": 0.6248390674591064
    },
    {
      "epoch": 6.9012451171875e-05,
      "model_forward_time": 0.12277102470397949,
      "step": 11307
    },
    {
      "epoch": 6.9012451171875e-05,
      "step": 11307,
      "training_step_time": 0.6390745639801025
    },
    {
      "epoch": 6.90185546875e-05,
      "model_forward_time": 0.12437176704406738,
      "step": 11308
    },
    {
      "epoch": 6.90185546875e-05,
      "step": 11308,
      "training_step_time": 0.6167502403259277
    },
    {
      "epoch": 6.9024658203125e-05,
      "model_forward_time": 0.11879706382751465,
      "step": 11309
    },
    {
      "epoch": 6.9024658203125e-05,
      "step": 11309,
      "training_step_time": 0.5359482765197754
    },
    {
      "epoch": 6.903076171875e-05,
      "grad_norm": 0.17638324201107025,
      "learning_rate": 9.484668378009408e-05,
      "loss": 0.0639,
      "step": 11310
    },
    {
      "epoch": 6.903076171875e-05,
      "model_forward_time": 0.1335306167602539,
      "step": 11310
    },
    {
      "epoch": 6.903076171875e-05,
      "step": 11310,
      "training_step_time": 0.5104653835296631
    },
    {
      "epoch": 6.9036865234375e-05,
      "model_forward_time": 0.12128734588623047,
      "step": 11311
    },
    {
      "epoch": 6.9036865234375e-05,
      "step": 11311,
      "training_step_time": 0.5821926593780518
    },
    {
      "epoch": 6.904296875e-05,
      "model_forward_time": 0.12202048301696777,
      "step": 11312
    },
    {
      "epoch": 6.904296875e-05,
      "step": 11312,
      "training_step_time": 0.5652828216552734
    },
    {
      "epoch": 6.9049072265625e-05,
      "model_forward_time": 0.12148475646972656,
      "step": 11313
    },
    {
      "epoch": 6.9049072265625e-05,
      "step": 11313,
      "training_step_time": 0.43302035331726074
    },
    {
      "epoch": 6.905517578125e-05,
      "model_forward_time": 0.12143445014953613,
      "step": 11314
    },
    {
      "epoch": 6.905517578125e-05,
      "step": 11314,
      "training_step_time": 0.4116630554199219
    },
    {
      "epoch": 6.9061279296875e-05,
      "model_forward_time": 0.12315058708190918,
      "step": 11315
    },
    {
      "epoch": 6.9061279296875e-05,
      "step": 11315,
      "training_step_time": 0.41365694999694824
    },
    {
      "epoch": 6.90673828125e-05,
      "model_forward_time": 0.12057685852050781,
      "step": 11316
    },
    {
      "epoch": 6.90673828125e-05,
      "step": 11316,
      "training_step_time": 0.4870915412902832
    },
    {
      "epoch": 6.9073486328125e-05,
      "model_forward_time": 0.11687922477722168,
      "step": 11317
    },
    {
      "epoch": 6.9073486328125e-05,
      "step": 11317,
      "training_step_time": 0.42369890213012695
    },
    {
      "epoch": 6.907958984375e-05,
      "model_forward_time": 0.11928558349609375,
      "step": 11318
    },
    {
      "epoch": 6.907958984375e-05,
      "step": 11318,
      "training_step_time": 0.4230313301086426
    },
    {
      "epoch": 6.9085693359375e-05,
      "model_forward_time": 0.11848759651184082,
      "step": 11319
    },
    {
      "epoch": 6.9085693359375e-05,
      "step": 11319,
      "training_step_time": 0.3875257968902588
    },
    {
      "epoch": 6.9091796875e-05,
      "grad_norm": 0.2097311168909073,
      "learning_rate": 9.483449185811948e-05,
      "loss": 0.0636,
      "step": 11320
    },
    {
      "epoch": 6.9091796875e-05,
      "model_forward_time": 0.11873412132263184,
      "step": 11320
    },
    {
      "epoch": 6.9091796875e-05,
      "step": 11320,
      "training_step_time": 0.466585636138916
    },
    {
      "epoch": 6.9097900390625e-05,
      "model_forward_time": 0.11978816986083984,
      "step": 11321
    },
    {
      "epoch": 6.9097900390625e-05,
      "step": 11321,
      "training_step_time": 0.3808927536010742
    },
    {
      "epoch": 6.910400390625e-05,
      "model_forward_time": 0.11780929565429688,
      "step": 11322
    },
    {
      "epoch": 6.910400390625e-05,
      "step": 11322,
      "training_step_time": 0.4201371669769287
    },
    {
      "epoch": 6.9110107421875e-05,
      "model_forward_time": 0.11565041542053223,
      "step": 11323
    },
    {
      "epoch": 6.9110107421875e-05,
      "step": 11323,
      "training_step_time": 0.38359642028808594
    },
    {
      "epoch": 6.91162109375e-05,
      "model_forward_time": 0.11792993545532227,
      "step": 11324
    },
    {
      "epoch": 6.91162109375e-05,
      "step": 11324,
      "training_step_time": 0.37032389640808105
    },
    {
      "epoch": 6.9122314453125e-05,
      "model_forward_time": 0.11873936653137207,
      "step": 11325
    },
    {
      "epoch": 6.9122314453125e-05,
      "step": 11325,
      "training_step_time": 0.42787909507751465
    },
    {
      "epoch": 6.912841796875e-05,
      "model_forward_time": 0.11822938919067383,
      "step": 11326
    },
    {
      "epoch": 6.912841796875e-05,
      "step": 11326,
      "training_step_time": 0.4801814556121826
    },
    {
      "epoch": 6.9134521484375e-05,
      "model_forward_time": 0.11859846115112305,
      "step": 11327
    },
    {
      "epoch": 6.9134521484375e-05,
      "step": 11327,
      "training_step_time": 0.38500452041625977
    },
    {
      "epoch": 6.9140625e-05,
      "model_forward_time": 0.1193399429321289,
      "step": 11328
    },
    {
      "epoch": 6.9140625e-05,
      "step": 11328,
      "training_step_time": 0.39254212379455566
    },
    {
      "epoch": 6.9146728515625e-05,
      "model_forward_time": 0.11790895462036133,
      "step": 11329
    },
    {
      "epoch": 6.9146728515625e-05,
      "step": 11329,
      "training_step_time": 0.42166662216186523
    },
    {
      "epoch": 6.915283203125e-05,
      "grad_norm": 0.20160214602947235,
      "learning_rate": 9.482228631661005e-05,
      "loss": 0.0675,
      "step": 11330
    },
    {
      "epoch": 6.915283203125e-05,
      "model_forward_time": 0.11802220344543457,
      "step": 11330
    },
    {
      "epoch": 6.915283203125e-05,
      "step": 11330,
      "training_step_time": 0.43073368072509766
    },
    {
      "epoch": 6.9158935546875e-05,
      "model_forward_time": 0.11780667304992676,
      "step": 11331
    },
    {
      "epoch": 6.9158935546875e-05,
      "step": 11331,
      "training_step_time": 0.37921905517578125
    },
    {
      "epoch": 6.91650390625e-05,
      "model_forward_time": 0.11832404136657715,
      "step": 11332
    },
    {
      "epoch": 6.91650390625e-05,
      "step": 11332,
      "training_step_time": 0.39449214935302734
    },
    {
      "epoch": 6.9171142578125e-05,
      "model_forward_time": 0.11582636833190918,
      "step": 11333
    },
    {
      "epoch": 6.9171142578125e-05,
      "step": 11333,
      "training_step_time": 0.4419379234313965
    },
    {
      "epoch": 6.917724609375e-05,
      "model_forward_time": 0.11992478370666504,
      "step": 11334
    },
    {
      "epoch": 6.917724609375e-05,
      "step": 11334,
      "training_step_time": 0.40471363067626953
    },
    {
      "epoch": 6.9183349609375e-05,
      "model_forward_time": 0.11601805686950684,
      "step": 11335
    },
    {
      "epoch": 6.9183349609375e-05,
      "step": 11335,
      "training_step_time": 0.49137282371520996
    },
    {
      "epoch": 6.9189453125e-05,
      "model_forward_time": 0.11751008033752441,
      "step": 11336
    },
    {
      "epoch": 6.9189453125e-05,
      "step": 11336,
      "training_step_time": 0.39724111557006836
    },
    {
      "epoch": 6.9195556640625e-05,
      "model_forward_time": 0.11808037757873535,
      "step": 11337
    },
    {
      "epoch": 6.9195556640625e-05,
      "step": 11337,
      "training_step_time": 0.3813474178314209
    },
    {
      "epoch": 6.920166015625e-05,
      "model_forward_time": 0.11557674407958984,
      "step": 11338
    },
    {
      "epoch": 6.920166015625e-05,
      "step": 11338,
      "training_step_time": 0.37220144271850586
    },
    {
      "epoch": 6.9207763671875e-05,
      "model_forward_time": 0.11542940139770508,
      "step": 11339
    },
    {
      "epoch": 6.9207763671875e-05,
      "step": 11339,
      "training_step_time": 0.40387535095214844
    },
    {
      "epoch": 6.92138671875e-05,
      "grad_norm": 0.1632717400789261,
      "learning_rate": 9.481006715927351e-05,
      "loss": 0.0736,
      "step": 11340
    },
    {
      "epoch": 6.92138671875e-05,
      "model_forward_time": 0.11518406867980957,
      "step": 11340
    },
    {
      "epoch": 6.92138671875e-05,
      "step": 11340,
      "training_step_time": 0.463519811630249
    },
    {
      "epoch": 6.9219970703125e-05,
      "model_forward_time": 0.11759376525878906,
      "step": 11341
    },
    {
      "epoch": 6.9219970703125e-05,
      "step": 11341,
      "training_step_time": 0.39034056663513184
    },
    {
      "epoch": 6.922607421875e-05,
      "model_forward_time": 0.11509823799133301,
      "step": 11342
    },
    {
      "epoch": 6.922607421875e-05,
      "step": 11342,
      "training_step_time": 0.37911152839660645
    },
    {
      "epoch": 6.9232177734375e-05,
      "model_forward_time": 0.11804080009460449,
      "step": 11343
    },
    {
      "epoch": 6.9232177734375e-05,
      "step": 11343,
      "training_step_time": 0.3837473392486572
    },
    {
      "epoch": 6.923828125e-05,
      "model_forward_time": 0.11926841735839844,
      "step": 11344
    },
    {
      "epoch": 6.923828125e-05,
      "step": 11344,
      "training_step_time": 0.3926656246185303
    },
    {
      "epoch": 6.9244384765625e-05,
      "model_forward_time": 0.11823749542236328,
      "step": 11345
    },
    {
      "epoch": 6.9244384765625e-05,
      "step": 11345,
      "training_step_time": 0.46344900131225586
    },
    {
      "epoch": 6.925048828125e-05,
      "model_forward_time": 0.12088894844055176,
      "step": 11346
    },
    {
      "epoch": 6.925048828125e-05,
      "step": 11346,
      "training_step_time": 0.40295958518981934
    },
    {
      "epoch": 6.9256591796875e-05,
      "model_forward_time": 0.11818623542785645,
      "step": 11347
    },
    {
      "epoch": 6.9256591796875e-05,
      "step": 11347,
      "training_step_time": 0.3919086456298828
    },
    {
      "epoch": 6.92626953125e-05,
      "model_forward_time": 0.11777997016906738,
      "step": 11348
    },
    {
      "epoch": 6.92626953125e-05,
      "step": 11348,
      "training_step_time": 0.38978147506713867
    },
    {
      "epoch": 6.9268798828125e-05,
      "model_forward_time": 0.11553502082824707,
      "step": 11349
    },
    {
      "epoch": 6.9268798828125e-05,
      "step": 11349,
      "training_step_time": 0.4739503860473633
    },
    {
      "epoch": 6.927490234375e-05,
      "grad_norm": 0.18629755079746246,
      "learning_rate": 9.479783438982172e-05,
      "loss": 0.0615,
      "step": 11350
    },
    {
      "epoch": 6.927490234375e-05,
      "model_forward_time": 0.11985063552856445,
      "step": 11350
    },
    {
      "epoch": 6.927490234375e-05,
      "step": 11350,
      "training_step_time": 0.3867990970611572
    },
    {
      "epoch": 6.9281005859375e-05,
      "model_forward_time": 0.11820149421691895,
      "step": 11351
    },
    {
      "epoch": 6.9281005859375e-05,
      "step": 11351,
      "training_step_time": 0.386066198348999
    },
    {
      "epoch": 6.9287109375e-05,
      "model_forward_time": 0.11659550666809082,
      "step": 11352
    },
    {
      "epoch": 6.9287109375e-05,
      "step": 11352,
      "training_step_time": 0.374157190322876
    },
    {
      "epoch": 6.9293212890625e-05,
      "model_forward_time": 0.11565589904785156,
      "step": 11353
    },
    {
      "epoch": 6.9293212890625e-05,
      "step": 11353,
      "training_step_time": 0.4874868392944336
    },
    {
      "epoch": 6.929931640625e-05,
      "model_forward_time": 0.11559510231018066,
      "step": 11354
    },
    {
      "epoch": 6.929931640625e-05,
      "step": 11354,
      "training_step_time": 0.46435546875
    },
    {
      "epoch": 6.9305419921875e-05,
      "model_forward_time": 0.1159977912902832,
      "step": 11355
    },
    {
      "epoch": 6.9305419921875e-05,
      "step": 11355,
      "training_step_time": 0.48494505882263184
    },
    {
      "epoch": 6.93115234375e-05,
      "model_forward_time": 0.11676597595214844,
      "step": 11356
    },
    {
      "epoch": 6.93115234375e-05,
      "step": 11356,
      "training_step_time": 0.3864402770996094
    },
    {
      "epoch": 6.9317626953125e-05,
      "model_forward_time": 0.11887645721435547,
      "step": 11357
    },
    {
      "epoch": 6.9317626953125e-05,
      "step": 11357,
      "training_step_time": 0.38584375381469727
    },
    {
      "epoch": 6.932373046875e-05,
      "model_forward_time": 0.1184086799621582,
      "step": 11358
    },
    {
      "epoch": 6.932373046875e-05,
      "step": 11358,
      "training_step_time": 0.3835182189941406
    },
    {
      "epoch": 6.9329833984375e-05,
      "model_forward_time": 0.1181032657623291,
      "step": 11359
    },
    {
      "epoch": 6.9329833984375e-05,
      "step": 11359,
      "training_step_time": 0.45452451705932617
    },
    {
      "epoch": 6.93359375e-05,
      "grad_norm": 0.23624515533447266,
      "learning_rate": 9.478558801197065e-05,
      "loss": 0.0594,
      "step": 11360
    },
    {
      "epoch": 6.93359375e-05,
      "model_forward_time": 0.11766338348388672,
      "step": 11360
    },
    {
      "epoch": 6.93359375e-05,
      "step": 11360,
      "training_step_time": 0.3847322463989258
    },
    {
      "epoch": 6.9342041015625e-05,
      "model_forward_time": 0.11893844604492188,
      "step": 11361
    },
    {
      "epoch": 6.9342041015625e-05,
      "step": 11361,
      "training_step_time": 0.4514601230621338
    },
    {
      "epoch": 6.934814453125e-05,
      "model_forward_time": 0.12864971160888672,
      "step": 11362
    },
    {
      "epoch": 6.934814453125e-05,
      "step": 11362,
      "training_step_time": 0.43408942222595215
    },
    {
      "epoch": 6.9354248046875e-05,
      "model_forward_time": 0.1187903881072998,
      "step": 11363
    },
    {
      "epoch": 6.9354248046875e-05,
      "step": 11363,
      "training_step_time": 0.46575069427490234
    },
    {
      "epoch": 6.93603515625e-05,
      "model_forward_time": 0.125518798828125,
      "step": 11364
    },
    {
      "epoch": 6.93603515625e-05,
      "step": 11364,
      "training_step_time": 0.39763522148132324
    },
    {
      "epoch": 6.9366455078125e-05,
      "model_forward_time": 0.11785411834716797,
      "step": 11365
    },
    {
      "epoch": 6.9366455078125e-05,
      "step": 11365,
      "training_step_time": 0.3795943260192871
    },
    {
      "epoch": 6.937255859375e-05,
      "model_forward_time": 0.11754679679870605,
      "step": 11366
    },
    {
      "epoch": 6.937255859375e-05,
      "step": 11366,
      "training_step_time": 0.37992143630981445
    },
    {
      "epoch": 6.9378662109375e-05,
      "model_forward_time": 0.11745023727416992,
      "step": 11367
    },
    {
      "epoch": 6.9378662109375e-05,
      "step": 11367,
      "training_step_time": 0.38713550567626953
    },
    {
      "epoch": 6.9384765625e-05,
      "model_forward_time": 0.12004852294921875,
      "step": 11368
    },
    {
      "epoch": 6.9384765625e-05,
      "step": 11368,
      "training_step_time": 0.40983033180236816
    },
    {
      "epoch": 6.9390869140625e-05,
      "model_forward_time": 0.11643028259277344,
      "step": 11369
    },
    {
      "epoch": 6.9390869140625e-05,
      "step": 11369,
      "training_step_time": 0.4954826831817627
    },
    {
      "epoch": 6.939697265625e-05,
      "grad_norm": 0.20055070519447327,
      "learning_rate": 9.477332802944044e-05,
      "loss": 0.0687,
      "step": 11370
    },
    {
      "epoch": 6.939697265625e-05,
      "model_forward_time": 0.11584210395812988,
      "step": 11370
    },
    {
      "epoch": 6.939697265625e-05,
      "step": 11370,
      "training_step_time": 0.3812689781188965
    },
    {
      "epoch": 6.9403076171875e-05,
      "model_forward_time": 0.11504554748535156,
      "step": 11371
    },
    {
      "epoch": 6.9403076171875e-05,
      "step": 11371,
      "training_step_time": 0.4004182815551758
    },
    {
      "epoch": 6.94091796875e-05,
      "model_forward_time": 0.11509132385253906,
      "step": 11372
    },
    {
      "epoch": 6.94091796875e-05,
      "step": 11372,
      "training_step_time": 0.39252400398254395
    },
    {
      "epoch": 6.9415283203125e-05,
      "model_forward_time": 0.11607766151428223,
      "step": 11373
    },
    {
      "epoch": 6.9415283203125e-05,
      "step": 11373,
      "training_step_time": 0.38477206230163574
    },
    {
      "epoch": 6.942138671875e-05,
      "model_forward_time": 0.11624670028686523,
      "step": 11374
    },
    {
      "epoch": 6.942138671875e-05,
      "step": 11374,
      "training_step_time": 0.43287110328674316
    },
    {
      "epoch": 6.9427490234375e-05,
      "model_forward_time": 0.11513566970825195,
      "step": 11375
    },
    {
      "epoch": 6.9427490234375e-05,
      "step": 11375,
      "training_step_time": 0.446744441986084
    },
    {
      "epoch": 6.943359375e-05,
      "model_forward_time": 0.11595010757446289,
      "step": 11376
    },
    {
      "epoch": 6.943359375e-05,
      "step": 11376,
      "training_step_time": 0.40012121200561523
    },
    {
      "epoch": 6.9439697265625e-05,
      "model_forward_time": 0.11645007133483887,
      "step": 11377
    },
    {
      "epoch": 6.9439697265625e-05,
      "step": 11377,
      "training_step_time": 0.3963944911956787
    },
    {
      "epoch": 6.944580078125e-05,
      "model_forward_time": 0.11555886268615723,
      "step": 11378
    },
    {
      "epoch": 6.944580078125e-05,
      "step": 11378,
      "training_step_time": 0.4267587661743164
    },
    {
      "epoch": 6.9451904296875e-05,
      "model_forward_time": 0.11580324172973633,
      "step": 11379
    },
    {
      "epoch": 6.9451904296875e-05,
      "step": 11379,
      "training_step_time": 0.5819401741027832
    },
    {
      "epoch": 6.94580078125e-05,
      "grad_norm": 0.15015149116516113,
      "learning_rate": 9.476105444595534e-05,
      "loss": 0.0613,
      "step": 11380
    },
    {
      "epoch": 6.94580078125e-05,
      "model_forward_time": 0.1155238151550293,
      "step": 11380
    },
    {
      "epoch": 6.94580078125e-05,
      "step": 11380,
      "training_step_time": 0.38869261741638184
    },
    {
      "epoch": 6.9464111328125e-05,
      "model_forward_time": 0.11487936973571777,
      "step": 11381
    },
    {
      "epoch": 6.9464111328125e-05,
      "step": 11381,
      "training_step_time": 0.36818623542785645
    },
    {
      "epoch": 6.947021484375e-05,
      "model_forward_time": 0.11495471000671387,
      "step": 11382
    },
    {
      "epoch": 6.947021484375e-05,
      "step": 11382,
      "training_step_time": 0.41378045082092285
    },
    {
      "epoch": 6.9476318359375e-05,
      "model_forward_time": 0.11503744125366211,
      "step": 11383
    },
    {
      "epoch": 6.9476318359375e-05,
      "step": 11383,
      "training_step_time": 0.47267580032348633
    },
    {
      "epoch": 6.9482421875e-05,
      "model_forward_time": 0.11472535133361816,
      "step": 11384
    },
    {
      "epoch": 6.9482421875e-05,
      "step": 11384,
      "training_step_time": 0.38739514350891113
    },
    {
      "epoch": 6.9488525390625e-05,
      "model_forward_time": 0.11562728881835938,
      "step": 11385
    },
    {
      "epoch": 6.9488525390625e-05,
      "step": 11385,
      "training_step_time": 0.6345031261444092
    },
    {
      "epoch": 6.949462890625e-05,
      "model_forward_time": 0.114654541015625,
      "step": 11386
    },
    {
      "epoch": 6.949462890625e-05,
      "step": 11386,
      "training_step_time": 0.39222049713134766
    },
    {
      "epoch": 6.9500732421875e-05,
      "model_forward_time": 0.11560463905334473,
      "step": 11387
    },
    {
      "epoch": 6.9500732421875e-05,
      "step": 11387,
      "training_step_time": 0.3829987049102783
    },
    {
      "epoch": 6.95068359375e-05,
      "model_forward_time": 0.11505675315856934,
      "step": 11388
    },
    {
      "epoch": 6.95068359375e-05,
      "step": 11388,
      "training_step_time": 0.4267599582672119
    },
    {
      "epoch": 6.9512939453125e-05,
      "model_forward_time": 0.11555862426757812,
      "step": 11389
    },
    {
      "epoch": 6.9512939453125e-05,
      "step": 11389,
      "training_step_time": 0.45116138458251953
    },
    {
      "epoch": 6.951904296875e-05,
      "grad_norm": 0.16596348583698273,
      "learning_rate": 9.474876726524374e-05,
      "loss": 0.0599,
      "step": 11390
    },
    {
      "epoch": 6.951904296875e-05,
      "model_forward_time": 0.11565494537353516,
      "step": 11390
    },
    {
      "epoch": 6.951904296875e-05,
      "step": 11390,
      "training_step_time": 0.41645050048828125
    },
    {
      "epoch": 6.9525146484375e-05,
      "model_forward_time": 0.11512947082519531,
      "step": 11391
    },
    {
      "epoch": 6.9525146484375e-05,
      "step": 11391,
      "training_step_time": 0.9643075466156006
    },
    {
      "epoch": 6.953125e-05,
      "model_forward_time": 0.11488986015319824,
      "step": 11392
    },
    {
      "epoch": 6.953125e-05,
      "step": 11392,
      "training_step_time": 0.38384556770324707
    },
    {
      "epoch": 6.9537353515625e-05,
      "model_forward_time": 0.11455440521240234,
      "step": 11393
    },
    {
      "epoch": 6.9537353515625e-05,
      "step": 11393,
      "training_step_time": 0.3844642639160156
    },
    {
      "epoch": 6.954345703125e-05,
      "model_forward_time": 0.11501002311706543,
      "step": 11394
    },
    {
      "epoch": 6.954345703125e-05,
      "step": 11394,
      "training_step_time": 0.3664510250091553
    },
    {
      "epoch": 6.9549560546875e-05,
      "model_forward_time": 0.11491608619689941,
      "step": 11395
    },
    {
      "epoch": 6.9549560546875e-05,
      "step": 11395,
      "training_step_time": 0.40592408180236816
    },
    {
      "epoch": 6.95556640625e-05,
      "model_forward_time": 0.11596250534057617,
      "step": 11396
    },
    {
      "epoch": 6.95556640625e-05,
      "step": 11396,
      "training_step_time": 0.37578749656677246
    },
    {
      "epoch": 6.9561767578125e-05,
      "model_forward_time": 0.11569976806640625,
      "step": 11397
    },
    {
      "epoch": 6.9561767578125e-05,
      "step": 11397,
      "training_step_time": 0.8116393089294434
    },
    {
      "epoch": 6.956787109375e-05,
      "model_forward_time": 0.11524486541748047,
      "step": 11398
    },
    {
      "epoch": 6.956787109375e-05,
      "step": 11398,
      "training_step_time": 0.38598155975341797
    },
    {
      "epoch": 6.9573974609375e-05,
      "model_forward_time": 0.11486363410949707,
      "step": 11399
    },
    {
      "epoch": 6.9573974609375e-05,
      "step": 11399,
      "training_step_time": 0.3952443599700928
    },
    {
      "epoch": 6.9580078125e-05,
      "grad_norm": 0.20836320519447327,
      "learning_rate": 9.473646649103818e-05,
      "loss": 0.0594,
      "step": 11400
    },
    {
      "epoch": 6.9580078125e-05,
      "model_forward_time": 0.1148378849029541,
      "step": 11400
    },
    {
      "epoch": 6.9580078125e-05,
      "step": 11400,
      "training_step_time": 0.39882469177246094
    },
    {
      "epoch": 6.9586181640625e-05,
      "model_forward_time": 0.11490678787231445,
      "step": 11401
    },
    {
      "epoch": 6.9586181640625e-05,
      "step": 11401,
      "training_step_time": 0.4152557849884033
    },
    {
      "epoch": 6.959228515625e-05,
      "model_forward_time": 0.11512231826782227,
      "step": 11402
    },
    {
      "epoch": 6.959228515625e-05,
      "step": 11402,
      "training_step_time": 0.38756561279296875
    },
    {
      "epoch": 6.9598388671875e-05,
      "model_forward_time": 0.11535429954528809,
      "step": 11403
    },
    {
      "epoch": 6.9598388671875e-05,
      "step": 11403,
      "training_step_time": 0.8358488082885742
    },
    {
      "epoch": 6.96044921875e-05,
      "model_forward_time": 0.11609458923339844,
      "step": 11404
    },
    {
      "epoch": 6.96044921875e-05,
      "step": 11404,
      "training_step_time": 0.3948812484741211
    },
    {
      "epoch": 6.9610595703125e-05,
      "model_forward_time": 0.11513614654541016,
      "step": 11405
    },
    {
      "epoch": 6.9610595703125e-05,
      "step": 11405,
      "training_step_time": 0.3857731819152832
    },
    {
      "epoch": 6.961669921875e-05,
      "model_forward_time": 0.11471366882324219,
      "step": 11406
    },
    {
      "epoch": 6.961669921875e-05,
      "step": 11406,
      "training_step_time": 0.380612850189209
    },
    {
      "epoch": 6.9622802734375e-05,
      "model_forward_time": 0.11549901962280273,
      "step": 11407
    },
    {
      "epoch": 6.9622802734375e-05,
      "step": 11407,
      "training_step_time": 0.3927805423736572
    },
    {
      "epoch": 6.962890625e-05,
      "model_forward_time": 0.11586904525756836,
      "step": 11408
    },
    {
      "epoch": 6.962890625e-05,
      "step": 11408,
      "training_step_time": 0.4614272117614746
    },
    {
      "epoch": 6.9635009765625e-05,
      "model_forward_time": 0.11579775810241699,
      "step": 11409
    },
    {
      "epoch": 6.9635009765625e-05,
      "step": 11409,
      "training_step_time": 0.8142759799957275
    },
    {
      "epoch": 6.964111328125e-05,
      "grad_norm": 0.1718938946723938,
      "learning_rate": 9.47241521270753e-05,
      "loss": 0.0655,
      "step": 11410
    },
    {
      "epoch": 6.964111328125e-05,
      "model_forward_time": 0.11574649810791016,
      "step": 11410
    },
    {
      "epoch": 6.964111328125e-05,
      "step": 11410,
      "training_step_time": 0.381289005279541
    },
    {
      "epoch": 6.9647216796875e-05,
      "model_forward_time": 0.11470961570739746,
      "step": 11411
    },
    {
      "epoch": 6.9647216796875e-05,
      "step": 11411,
      "training_step_time": 0.3842923641204834
    },
    {
      "epoch": 6.96533203125e-05,
      "model_forward_time": 0.11863040924072266,
      "step": 11412
    },
    {
      "epoch": 6.96533203125e-05,
      "step": 11412,
      "training_step_time": 0.41483473777770996
    },
    {
      "epoch": 6.9659423828125e-05,
      "model_forward_time": 0.11511349678039551,
      "step": 11413
    },
    {
      "epoch": 6.9659423828125e-05,
      "step": 11413,
      "training_step_time": 0.48136472702026367
    },
    {
      "epoch": 6.966552734375e-05,
      "model_forward_time": 0.11648082733154297,
      "step": 11414
    },
    {
      "epoch": 6.966552734375e-05,
      "step": 11414,
      "training_step_time": 0.3920481204986572
    },
    {
      "epoch": 6.9671630859375e-05,
      "model_forward_time": 0.11526966094970703,
      "step": 11415
    },
    {
      "epoch": 6.9671630859375e-05,
      "step": 11415,
      "training_step_time": 0.612342119216919
    },
    {
      "epoch": 6.9677734375e-05,
      "model_forward_time": 0.11525464057922363,
      "step": 11416
    },
    {
      "epoch": 6.9677734375e-05,
      "step": 11416,
      "training_step_time": 0.47411537170410156
    },
    {
      "epoch": 6.9683837890625e-05,
      "model_forward_time": 0.1149454116821289,
      "step": 11417
    },
    {
      "epoch": 6.9683837890625e-05,
      "step": 11417,
      "training_step_time": 0.3915421962738037
    },
    {
      "epoch": 6.968994140625e-05,
      "model_forward_time": 0.11558294296264648,
      "step": 11418
    },
    {
      "epoch": 6.968994140625e-05,
      "step": 11418,
      "training_step_time": 0.3788132667541504
    },
    {
      "epoch": 6.9696044921875e-05,
      "model_forward_time": 0.11505842208862305,
      "step": 11419
    },
    {
      "epoch": 6.9696044921875e-05,
      "step": 11419,
      "training_step_time": 0.3874940872192383
    },
    {
      "epoch": 6.97021484375e-05,
      "grad_norm": 0.20824198424816132,
      "learning_rate": 9.471182417709587e-05,
      "loss": 0.0655,
      "step": 11420
    },
    {
      "epoch": 6.97021484375e-05,
      "model_forward_time": 0.11614131927490234,
      "step": 11420
    },
    {
      "epoch": 6.97021484375e-05,
      "step": 11420,
      "training_step_time": 0.36998724937438965
    },
    {
      "epoch": 6.9708251953125e-05,
      "model_forward_time": 0.11652445793151855,
      "step": 11421
    },
    {
      "epoch": 6.9708251953125e-05,
      "step": 11421,
      "training_step_time": 0.419292688369751
    },
    {
      "epoch": 6.971435546875e-05,
      "model_forward_time": 0.11515617370605469,
      "step": 11422
    },
    {
      "epoch": 6.971435546875e-05,
      "step": 11422,
      "training_step_time": 0.3969459533691406
    },
    {
      "epoch": 6.9720458984375e-05,
      "model_forward_time": 0.11618208885192871,
      "step": 11423
    },
    {
      "epoch": 6.9720458984375e-05,
      "step": 11423,
      "training_step_time": 0.4765126705169678
    },
    {
      "epoch": 6.97265625e-05,
      "model_forward_time": 0.11650991439819336,
      "step": 11424
    },
    {
      "epoch": 6.97265625e-05,
      "step": 11424,
      "training_step_time": 0.38566160202026367
    },
    {
      "epoch": 6.9732666015625e-05,
      "model_forward_time": 0.11597299575805664,
      "step": 11425
    },
    {
      "epoch": 6.9732666015625e-05,
      "step": 11425,
      "training_step_time": 0.40830492973327637
    },
    {
      "epoch": 6.973876953125e-05,
      "model_forward_time": 0.1162571907043457,
      "step": 11426
    },
    {
      "epoch": 6.973876953125e-05,
      "step": 11426,
      "training_step_time": 0.3735811710357666
    },
    {
      "epoch": 6.9744873046875e-05,
      "model_forward_time": 0.11567568778991699,
      "step": 11427
    },
    {
      "epoch": 6.9744873046875e-05,
      "step": 11427,
      "training_step_time": 0.44638633728027344
    },
    {
      "epoch": 6.97509765625e-05,
      "model_forward_time": 0.11618924140930176,
      "step": 11428
    },
    {
      "epoch": 6.97509765625e-05,
      "step": 11428,
      "training_step_time": 0.3914508819580078
    },
    {
      "epoch": 6.9757080078125e-05,
      "model_forward_time": 0.1165614128112793,
      "step": 11429
    },
    {
      "epoch": 6.9757080078125e-05,
      "step": 11429,
      "training_step_time": 0.4944169521331787
    },
    {
      "epoch": 6.976318359375e-05,
      "grad_norm": 0.18041616678237915,
      "learning_rate": 9.46994826448448e-05,
      "loss": 0.0637,
      "step": 11430
    },
    {
      "epoch": 6.976318359375e-05,
      "model_forward_time": 0.11716270446777344,
      "step": 11430
    },
    {
      "epoch": 6.976318359375e-05,
      "step": 11430,
      "training_step_time": 0.380474328994751
    },
    {
      "epoch": 6.9769287109375e-05,
      "model_forward_time": 0.11625838279724121,
      "step": 11431
    },
    {
      "epoch": 6.9769287109375e-05,
      "step": 11431,
      "training_step_time": 0.474733829498291
    },
    {
      "epoch": 6.9775390625e-05,
      "model_forward_time": 0.11564087867736816,
      "step": 11432
    },
    {
      "epoch": 6.9775390625e-05,
      "step": 11432,
      "training_step_time": 0.385387659072876
    },
    {
      "epoch": 6.9781494140625e-05,
      "model_forward_time": 0.11527514457702637,
      "step": 11433
    },
    {
      "epoch": 6.9781494140625e-05,
      "step": 11433,
      "training_step_time": 0.3866121768951416
    },
    {
      "epoch": 6.978759765625e-05,
      "model_forward_time": 0.11564779281616211,
      "step": 11434
    },
    {
      "epoch": 6.978759765625e-05,
      "step": 11434,
      "training_step_time": 0.40348386764526367
    },
    {
      "epoch": 6.9793701171875e-05,
      "model_forward_time": 0.11573672294616699,
      "step": 11435
    },
    {
      "epoch": 6.9793701171875e-05,
      "step": 11435,
      "training_step_time": 0.39259934425354004
    },
    {
      "epoch": 6.97998046875e-05,
      "model_forward_time": 0.11735081672668457,
      "step": 11436
    },
    {
      "epoch": 6.97998046875e-05,
      "step": 11436,
      "training_step_time": 0.38210582733154297
    },
    {
      "epoch": 6.9805908203125e-05,
      "model_forward_time": 0.11516857147216797,
      "step": 11437
    },
    {
      "epoch": 6.9805908203125e-05,
      "step": 11437,
      "training_step_time": 0.43045997619628906
    },
    {
      "epoch": 6.981201171875e-05,
      "model_forward_time": 0.11570048332214355,
      "step": 11438
    },
    {
      "epoch": 6.981201171875e-05,
      "step": 11438,
      "training_step_time": 0.4860067367553711
    },
    {
      "epoch": 6.9818115234375e-05,
      "model_forward_time": 0.1157386302947998,
      "step": 11439
    },
    {
      "epoch": 6.9818115234375e-05,
      "step": 11439,
      "training_step_time": 0.39894795417785645
    },
    {
      "epoch": 6.982421875e-05,
      "grad_norm": 0.14556901156902313,
      "learning_rate": 9.468712753407112e-05,
      "loss": 0.0607,
      "step": 11440
    },
    {
      "epoch": 6.982421875e-05,
      "model_forward_time": 0.1156761646270752,
      "step": 11440
    },
    {
      "epoch": 6.982421875e-05,
      "step": 11440,
      "training_step_time": 0.3862314224243164
    },
    {
      "epoch": 6.9830322265625e-05,
      "model_forward_time": 0.11579751968383789,
      "step": 11441
    },
    {
      "epoch": 6.9830322265625e-05,
      "step": 11441,
      "training_step_time": 0.46146368980407715
    },
    {
      "epoch": 6.983642578125e-05,
      "model_forward_time": 0.11588716506958008,
      "step": 11442
    },
    {
      "epoch": 6.983642578125e-05,
      "step": 11442,
      "training_step_time": 0.478318452835083
    },
    {
      "epoch": 6.9842529296875e-05,
      "model_forward_time": 0.11526751518249512,
      "step": 11443
    },
    {
      "epoch": 6.9842529296875e-05,
      "step": 11443,
      "training_step_time": 0.4395570755004883
    },
    {
      "epoch": 6.98486328125e-05,
      "model_forward_time": 0.11577272415161133,
      "step": 11444
    },
    {
      "epoch": 6.98486328125e-05,
      "step": 11444,
      "training_step_time": 0.4241325855255127
    },
    {
      "epoch": 6.9854736328125e-05,
      "model_forward_time": 0.115081787109375,
      "step": 11445
    },
    {
      "epoch": 6.9854736328125e-05,
      "step": 11445,
      "training_step_time": 0.4564197063446045
    },
    {
      "epoch": 6.986083984375e-05,
      "model_forward_time": 0.11478400230407715,
      "step": 11446
    },
    {
      "epoch": 6.986083984375e-05,
      "step": 11446,
      "training_step_time": 0.38013744354248047
    },
    {
      "epoch": 6.9866943359375e-05,
      "model_forward_time": 0.11702966690063477,
      "step": 11447
    },
    {
      "epoch": 6.9866943359375e-05,
      "step": 11447,
      "training_step_time": 0.382826566696167
    },
    {
      "epoch": 6.9873046875e-05,
      "model_forward_time": 0.11572742462158203,
      "step": 11448
    },
    {
      "epoch": 6.9873046875e-05,
      "step": 11448,
      "training_step_time": 0.37105226516723633
    },
    {
      "epoch": 6.9879150390625e-05,
      "model_forward_time": 0.1161198616027832,
      "step": 11449
    },
    {
      "epoch": 6.9879150390625e-05,
      "step": 11449,
      "training_step_time": 0.39098381996154785
    },
    {
      "epoch": 6.988525390625e-05,
      "grad_norm": 0.15350623428821564,
      "learning_rate": 9.4674758848528e-05,
      "loss": 0.0632,
      "step": 11450
    },
    {
      "epoch": 6.988525390625e-05,
      "model_forward_time": 0.11563467979431152,
      "step": 11450
    },
    {
      "epoch": 6.988525390625e-05,
      "step": 11450,
      "training_step_time": 0.37606358528137207
    },
    {
      "epoch": 6.9891357421875e-05,
      "model_forward_time": 0.11622142791748047,
      "step": 11451
    },
    {
      "epoch": 6.9891357421875e-05,
      "step": 11451,
      "training_step_time": 0.4216318130493164
    },
    {
      "epoch": 6.98974609375e-05,
      "model_forward_time": 0.11586189270019531,
      "step": 11452
    },
    {
      "epoch": 6.98974609375e-05,
      "step": 11452,
      "training_step_time": 0.38802385330200195
    },
    {
      "epoch": 6.9903564453125e-05,
      "model_forward_time": 0.1174767017364502,
      "step": 11453
    },
    {
      "epoch": 6.9903564453125e-05,
      "step": 11453,
      "training_step_time": 0.48442959785461426
    },
    {
      "epoch": 6.990966796875e-05,
      "model_forward_time": 0.11669206619262695,
      "step": 11454
    },
    {
      "epoch": 6.990966796875e-05,
      "step": 11454,
      "training_step_time": 0.40821361541748047
    },
    {
      "epoch": 6.9915771484375e-05,
      "model_forward_time": 0.11587738990783691,
      "step": 11455
    },
    {
      "epoch": 6.9915771484375e-05,
      "step": 11455,
      "training_step_time": 0.39014530181884766
    },
    {
      "epoch": 6.9921875e-05,
      "model_forward_time": 0.11575794219970703,
      "step": 11456
    },
    {
      "epoch": 6.9921875e-05,
      "step": 11456,
      "training_step_time": 0.4031801223754883
    },
    {
      "epoch": 6.9927978515625e-05,
      "model_forward_time": 0.11628532409667969,
      "step": 11457
    },
    {
      "epoch": 6.9927978515625e-05,
      "step": 11457,
      "training_step_time": 1.0328142642974854
    },
    {
      "epoch": 6.993408203125e-05,
      "model_forward_time": 0.11533069610595703,
      "step": 11458
    },
    {
      "epoch": 6.993408203125e-05,
      "step": 11458,
      "training_step_time": 0.385556697845459
    },
    {
      "epoch": 6.9940185546875e-05,
      "model_forward_time": 0.1153876781463623,
      "step": 11459
    },
    {
      "epoch": 6.9940185546875e-05,
      "step": 11459,
      "training_step_time": 0.44083189964294434
    },
    {
      "epoch": 6.99462890625e-05,
      "grad_norm": 0.24643702805042267,
      "learning_rate": 9.46623765919727e-05,
      "loss": 0.074,
      "step": 11460
    },
    {
      "epoch": 6.99462890625e-05,
      "model_forward_time": 0.11403036117553711,
      "step": 11460
    },
    {
      "epoch": 6.99462890625e-05,
      "step": 11460,
      "training_step_time": 0.3754129409790039
    },
    {
      "epoch": 6.9952392578125e-05,
      "model_forward_time": 0.11450791358947754,
      "step": 11461
    },
    {
      "epoch": 6.9952392578125e-05,
      "step": 11461,
      "training_step_time": 0.3834807872772217
    },
    {
      "epoch": 6.995849609375e-05,
      "model_forward_time": 0.11519718170166016,
      "step": 11462
    },
    {
      "epoch": 6.995849609375e-05,
      "step": 11462,
      "training_step_time": 0.38625311851501465
    },
    {
      "epoch": 6.9964599609375e-05,
      "model_forward_time": 0.11542630195617676,
      "step": 11463
    },
    {
      "epoch": 6.9964599609375e-05,
      "step": 11463,
      "training_step_time": 0.6184275150299072
    },
    {
      "epoch": 6.9970703125e-05,
      "model_forward_time": 0.11606955528259277,
      "step": 11464
    },
    {
      "epoch": 6.9970703125e-05,
      "step": 11464,
      "training_step_time": 0.40387558937072754
    },
    {
      "epoch": 6.9976806640625e-05,
      "model_forward_time": 0.11597943305969238,
      "step": 11465
    },
    {
      "epoch": 6.9976806640625e-05,
      "step": 11465,
      "training_step_time": 0.4393587112426758
    },
    {
      "epoch": 6.998291015625e-05,
      "model_forward_time": 0.11593174934387207,
      "step": 11466
    },
    {
      "epoch": 6.998291015625e-05,
      "step": 11466,
      "training_step_time": 0.4194917678833008
    },
    {
      "epoch": 6.9989013671875e-05,
      "model_forward_time": 0.11572480201721191,
      "step": 11467
    },
    {
      "epoch": 6.9989013671875e-05,
      "step": 11467,
      "training_step_time": 0.3943333625793457
    },
    {
      "epoch": 6.99951171875e-05,
      "model_forward_time": 0.11514496803283691,
      "step": 11468
    },
    {
      "epoch": 6.99951171875e-05,
      "step": 11468,
      "training_step_time": 0.37995338439941406
    },
    {
      "epoch": 7.0001220703125e-05,
      "model_forward_time": 0.11565017700195312,
      "step": 11469
    },
    {
      "epoch": 7.0001220703125e-05,
      "step": 11469,
      "training_step_time": 0.46221423149108887
    },
    {
      "epoch": 7.000732421875e-05,
      "grad_norm": 0.17620399594306946,
      "learning_rate": 9.464998076816664e-05,
      "loss": 0.065,
      "step": 11470
    },
    {
      "epoch": 7.000732421875e-05,
      "model_forward_time": 0.1163642406463623,
      "step": 11470
    },
    {
      "epoch": 7.000732421875e-05,
      "step": 11470,
      "training_step_time": 0.44588780403137207
    },
    {
      "epoch": 7.0013427734375e-05,
      "model_forward_time": 0.11518430709838867,
      "step": 11471
    },
    {
      "epoch": 7.0013427734375e-05,
      "step": 11471,
      "training_step_time": 0.5103659629821777
    },
    {
      "epoch": 7.001953125e-05,
      "model_forward_time": 0.11534953117370605,
      "step": 11472
    },
    {
      "epoch": 7.001953125e-05,
      "step": 11472,
      "training_step_time": 0.39364147186279297
    },
    {
      "epoch": 7.0025634765625e-05,
      "model_forward_time": 0.11564874649047852,
      "step": 11473
    },
    {
      "epoch": 7.0025634765625e-05,
      "step": 11473,
      "training_step_time": 0.4881911277770996
    },
    {
      "epoch": 7.003173828125e-05,
      "model_forward_time": 0.11633706092834473,
      "step": 11474
    },
    {
      "epoch": 7.003173828125e-05,
      "step": 11474,
      "training_step_time": 0.3951888084411621
    },
    {
      "epoch": 7.0037841796875e-05,
      "model_forward_time": 0.11464381217956543,
      "step": 11475
    },
    {
      "epoch": 7.0037841796875e-05,
      "step": 11475,
      "training_step_time": 0.5021567344665527
    },
    {
      "epoch": 7.00439453125e-05,
      "model_forward_time": 0.11522960662841797,
      "step": 11476
    },
    {
      "epoch": 7.00439453125e-05,
      "step": 11476,
      "training_step_time": 0.3942134380340576
    },
    {
      "epoch": 7.0050048828125e-05,
      "model_forward_time": 0.11535382270812988,
      "step": 11477
    },
    {
      "epoch": 7.0050048828125e-05,
      "step": 11477,
      "training_step_time": 0.3954136371612549
    },
    {
      "epoch": 7.005615234375e-05,
      "model_forward_time": 0.11598706245422363,
      "step": 11478
    },
    {
      "epoch": 7.005615234375e-05,
      "step": 11478,
      "training_step_time": 0.4236588478088379
    },
    {
      "epoch": 7.0062255859375e-05,
      "model_forward_time": 0.11530876159667969,
      "step": 11479
    },
    {
      "epoch": 7.0062255859375e-05,
      "step": 11479,
      "training_step_time": 0.44219446182250977
    },
    {
      "epoch": 7.0068359375e-05,
      "grad_norm": 0.1586388647556305,
      "learning_rate": 9.463757138087535e-05,
      "loss": 0.0618,
      "step": 11480
    },
    {
      "epoch": 7.0068359375e-05,
      "model_forward_time": 0.115631103515625,
      "step": 11480
    },
    {
      "epoch": 7.0068359375e-05,
      "step": 11480,
      "training_step_time": 0.47045159339904785
    },
    {
      "epoch": 7.0074462890625e-05,
      "model_forward_time": 0.11644601821899414,
      "step": 11481
    },
    {
      "epoch": 7.0074462890625e-05,
      "step": 11481,
      "training_step_time": 0.38512563705444336
    },
    {
      "epoch": 7.008056640625e-05,
      "model_forward_time": 0.11539816856384277,
      "step": 11482
    },
    {
      "epoch": 7.008056640625e-05,
      "step": 11482,
      "training_step_time": 0.37947988510131836
    },
    {
      "epoch": 7.0086669921875e-05,
      "model_forward_time": 0.11696171760559082,
      "step": 11483
    },
    {
      "epoch": 7.0086669921875e-05,
      "step": 11483,
      "training_step_time": 0.44411373138427734
    },
    {
      "epoch": 7.00927734375e-05,
      "model_forward_time": 0.11505007743835449,
      "step": 11484
    },
    {
      "epoch": 7.00927734375e-05,
      "step": 11484,
      "training_step_time": 0.41718602180480957
    },
    {
      "epoch": 7.0098876953125e-05,
      "model_forward_time": 0.11533355712890625,
      "step": 11485
    },
    {
      "epoch": 7.0098876953125e-05,
      "step": 11485,
      "training_step_time": 0.40854835510253906
    },
    {
      "epoch": 7.010498046875e-05,
      "model_forward_time": 0.11879611015319824,
      "step": 11486
    },
    {
      "epoch": 7.010498046875e-05,
      "step": 11486,
      "training_step_time": 0.43083834648132324
    },
    {
      "epoch": 7.0111083984375e-05,
      "model_forward_time": 0.11568832397460938,
      "step": 11487
    },
    {
      "epoch": 7.0111083984375e-05,
      "step": 11487,
      "training_step_time": 0.40082645416259766
    },
    {
      "epoch": 7.01171875e-05,
      "model_forward_time": 0.11535334587097168,
      "step": 11488
    },
    {
      "epoch": 7.01171875e-05,
      "step": 11488,
      "training_step_time": 0.39393162727355957
    },
    {
      "epoch": 7.0123291015625e-05,
      "model_forward_time": 0.11480212211608887,
      "step": 11489
    },
    {
      "epoch": 7.0123291015625e-05,
      "step": 11489,
      "training_step_time": 0.8404712677001953
    },
    {
      "epoch": 7.012939453125e-05,
      "grad_norm": 0.24239076673984528,
      "learning_rate": 9.462514843386845e-05,
      "loss": 0.0582,
      "step": 11490
    },
    {
      "epoch": 7.012939453125e-05,
      "model_forward_time": 0.11830520629882812,
      "step": 11490
    },
    {
      "epoch": 7.012939453125e-05,
      "step": 11490,
      "training_step_time": 0.3833773136138916
    },
    {
      "epoch": 7.0135498046875e-05,
      "model_forward_time": 0.11549091339111328,
      "step": 11491
    },
    {
      "epoch": 7.0135498046875e-05,
      "step": 11491,
      "training_step_time": 0.3989415168762207
    },
    {
      "epoch": 7.01416015625e-05,
      "model_forward_time": 0.11471843719482422,
      "step": 11492
    },
    {
      "epoch": 7.01416015625e-05,
      "step": 11492,
      "training_step_time": 0.4295926094055176
    },
    {
      "epoch": 7.0147705078125e-05,
      "model_forward_time": 0.11475253105163574,
      "step": 11493
    },
    {
      "epoch": 7.0147705078125e-05,
      "step": 11493,
      "training_step_time": 0.4539623260498047
    },
    {
      "epoch": 7.015380859375e-05,
      "model_forward_time": 0.11444973945617676,
      "step": 11494
    },
    {
      "epoch": 7.015380859375e-05,
      "step": 11494,
      "training_step_time": 0.38300585746765137
    },
    {
      "epoch": 7.0159912109375e-05,
      "model_forward_time": 0.11559772491455078,
      "step": 11495
    },
    {
      "epoch": 7.0159912109375e-05,
      "step": 11495,
      "training_step_time": 0.6730213165283203
    },
    {
      "epoch": 7.0166015625e-05,
      "model_forward_time": 0.1153409481048584,
      "step": 11496
    },
    {
      "epoch": 7.0166015625e-05,
      "step": 11496,
      "training_step_time": 0.4655301570892334
    },
    {
      "epoch": 7.0172119140625e-05,
      "model_forward_time": 0.11431884765625,
      "step": 11497
    },
    {
      "epoch": 7.0172119140625e-05,
      "step": 11497,
      "training_step_time": 0.4103846549987793
    },
    {
      "epoch": 7.017822265625e-05,
      "model_forward_time": 0.1150197982788086,
      "step": 11498
    },
    {
      "epoch": 7.017822265625e-05,
      "step": 11498,
      "training_step_time": 0.3850822448730469
    },
    {
      "epoch": 7.0184326171875e-05,
      "model_forward_time": 0.11499428749084473,
      "step": 11499
    },
    {
      "epoch": 7.0184326171875e-05,
      "step": 11499,
      "training_step_time": 0.3918163776397705
    },
    {
      "epoch": 7.01904296875e-05,
      "grad_norm": 0.2075490653514862,
      "learning_rate": 9.46127119309197e-05,
      "loss": 0.0632,
      "step": 11500
    },
    {
      "epoch": 7.01904296875e-05,
      "model_forward_time": 0.11453580856323242,
      "step": 11500
    },
    {
      "epoch": 7.01904296875e-05,
      "step": 11500,
      "training_step_time": 0.39002180099487305
    },
    {
      "epoch": 7.0196533203125e-05,
      "model_forward_time": 0.11524009704589844,
      "step": 11501
    },
    {
      "epoch": 7.0196533203125e-05,
      "step": 11501,
      "training_step_time": 0.8732781410217285
    },
    {
      "epoch": 7.020263671875e-05,
      "model_forward_time": 0.11488103866577148,
      "step": 11502
    },
    {
      "epoch": 7.020263671875e-05,
      "step": 11502,
      "training_step_time": 0.3838818073272705
    },
    {
      "epoch": 7.0208740234375e-05,
      "model_forward_time": 0.1142580509185791,
      "step": 11503
    },
    {
      "epoch": 7.0208740234375e-05,
      "step": 11503,
      "training_step_time": 0.390139102935791
    },
    {
      "epoch": 7.021484375e-05,
      "model_forward_time": 0.11456894874572754,
      "step": 11504
    },
    {
      "epoch": 7.021484375e-05,
      "step": 11504,
      "training_step_time": 0.3657646179199219
    },
    {
      "epoch": 7.0220947265625e-05,
      "model_forward_time": 0.11603069305419922,
      "step": 11505
    },
    {
      "epoch": 7.0220947265625e-05,
      "step": 11505,
      "training_step_time": 0.3847663402557373
    },
    {
      "epoch": 7.022705078125e-05,
      "model_forward_time": 0.11415982246398926,
      "step": 11506
    },
    {
      "epoch": 7.022705078125e-05,
      "step": 11506,
      "training_step_time": 0.4047985076904297
    },
    {
      "epoch": 7.0233154296875e-05,
      "model_forward_time": 0.11607003211975098,
      "step": 11507
    },
    {
      "epoch": 7.0233154296875e-05,
      "step": 11507,
      "training_step_time": 0.5455977916717529
    },
    {
      "epoch": 7.02392578125e-05,
      "model_forward_time": 0.11485767364501953,
      "step": 11508
    },
    {
      "epoch": 7.02392578125e-05,
      "step": 11508,
      "training_step_time": 0.3857758045196533
    },
    {
      "epoch": 7.0245361328125e-05,
      "model_forward_time": 0.12067890167236328,
      "step": 11509
    },
    {
      "epoch": 7.0245361328125e-05,
      "step": 11509,
      "training_step_time": 0.4106457233428955
    },
    {
      "epoch": 7.025146484375e-05,
      "grad_norm": 0.21139873564243317,
      "learning_rate": 9.460026187580702e-05,
      "loss": 0.0613,
      "step": 11510
    },
    {
      "epoch": 7.025146484375e-05,
      "model_forward_time": 0.11504912376403809,
      "step": 11510
    },
    {
      "epoch": 7.025146484375e-05,
      "step": 11510,
      "training_step_time": 0.4687020778656006
    },
    {
      "epoch": 7.0257568359375e-05,
      "model_forward_time": 0.11507654190063477,
      "step": 11511
    },
    {
      "epoch": 7.0257568359375e-05,
      "step": 11511,
      "training_step_time": 0.43080782890319824
    },
    {
      "epoch": 7.0263671875e-05,
      "model_forward_time": 0.11532139778137207,
      "step": 11512
    },
    {
      "epoch": 7.0263671875e-05,
      "step": 11512,
      "training_step_time": 0.4011538028717041
    },
    {
      "epoch": 7.0269775390625e-05,
      "model_forward_time": 0.11476898193359375,
      "step": 11513
    },
    {
      "epoch": 7.0269775390625e-05,
      "step": 11513,
      "training_step_time": 0.9778242111206055
    },
    {
      "epoch": 7.027587890625e-05,
      "model_forward_time": 0.11543917655944824,
      "step": 11514
    },
    {
      "epoch": 7.027587890625e-05,
      "step": 11514,
      "training_step_time": 0.38005852699279785
    },
    {
      "epoch": 7.0281982421875e-05,
      "model_forward_time": 0.11432456970214844,
      "step": 11515
    },
    {
      "epoch": 7.0281982421875e-05,
      "step": 11515,
      "training_step_time": 0.38760924339294434
    },
    {
      "epoch": 7.02880859375e-05,
      "model_forward_time": 0.11440420150756836,
      "step": 11516
    },
    {
      "epoch": 7.02880859375e-05,
      "step": 11516,
      "training_step_time": 0.39107418060302734
    },
    {
      "epoch": 7.0294189453125e-05,
      "model_forward_time": 0.11427116394042969,
      "step": 11517
    },
    {
      "epoch": 7.0294189453125e-05,
      "step": 11517,
      "training_step_time": 0.44942688941955566
    },
    {
      "epoch": 7.030029296875e-05,
      "model_forward_time": 0.11520171165466309,
      "step": 11518
    },
    {
      "epoch": 7.030029296875e-05,
      "step": 11518,
      "training_step_time": 0.3915233612060547
    },
    {
      "epoch": 7.0306396484375e-05,
      "model_forward_time": 0.11499834060668945,
      "step": 11519
    },
    {
      "epoch": 7.0306396484375e-05,
      "step": 11519,
      "training_step_time": 0.5902607440948486
    },
    {
      "epoch": 7.03125e-05,
      "grad_norm": 0.17313076555728912,
      "learning_rate": 9.458779827231237e-05,
      "loss": 0.0604,
      "step": 11520
    },
    {
      "epoch": 7.03125e-05,
      "model_forward_time": 0.11444449424743652,
      "step": 11520
    },
    {
      "epoch": 7.03125e-05,
      "step": 11520,
      "training_step_time": 0.38697123527526855
    },
    {
      "epoch": 7.0318603515625e-05,
      "model_forward_time": 0.11510276794433594,
      "step": 11521
    },
    {
      "epoch": 7.0318603515625e-05,
      "step": 11521,
      "training_step_time": 0.3899245262145996
    },
    {
      "epoch": 7.032470703125e-05,
      "model_forward_time": 0.11470413208007812,
      "step": 11522
    },
    {
      "epoch": 7.032470703125e-05,
      "step": 11522,
      "training_step_time": 0.4583768844604492
    },
    {
      "epoch": 7.0330810546875e-05,
      "model_forward_time": 0.11407232284545898,
      "step": 11523
    },
    {
      "epoch": 7.0330810546875e-05,
      "step": 11523,
      "training_step_time": 0.4641125202178955
    },
    {
      "epoch": 7.03369140625e-05,
      "model_forward_time": 0.11380362510681152,
      "step": 11524
    },
    {
      "epoch": 7.03369140625e-05,
      "step": 11524,
      "training_step_time": 0.4240713119506836
    },
    {
      "epoch": 7.0343017578125e-05,
      "model_forward_time": 0.11540985107421875,
      "step": 11525
    },
    {
      "epoch": 7.0343017578125e-05,
      "step": 11525,
      "training_step_time": 0.44272804260253906
    },
    {
      "epoch": 7.034912109375e-05,
      "model_forward_time": 0.11515092849731445,
      "step": 11526
    },
    {
      "epoch": 7.034912109375e-05,
      "step": 11526,
      "training_step_time": 0.404865026473999
    },
    {
      "epoch": 7.0355224609375e-05,
      "model_forward_time": 0.11448526382446289,
      "step": 11527
    },
    {
      "epoch": 7.0355224609375e-05,
      "step": 11527,
      "training_step_time": 0.49748730659484863
    },
    {
      "epoch": 7.0361328125e-05,
      "model_forward_time": 0.11490797996520996,
      "step": 11528
    },
    {
      "epoch": 7.0361328125e-05,
      "step": 11528,
      "training_step_time": 0.4043138027191162
    },
    {
      "epoch": 7.0367431640625e-05,
      "model_forward_time": 0.11517548561096191,
      "step": 11529
    },
    {
      "epoch": 7.0367431640625e-05,
      "step": 11529,
      "training_step_time": 0.4315927028656006
    },
    {
      "epoch": 7.037353515625e-05,
      "grad_norm": 0.241164892911911,
      "learning_rate": 9.457532112422187e-05,
      "loss": 0.0641,
      "step": 11530
    },
    {
      "epoch": 7.037353515625e-05,
      "model_forward_time": 0.11465239524841309,
      "step": 11530
    },
    {
      "epoch": 7.037353515625e-05,
      "step": 11530,
      "training_step_time": 0.39170050621032715
    },
    {
      "epoch": 7.0379638671875e-05,
      "model_forward_time": 0.11541128158569336,
      "step": 11531
    },
    {
      "epoch": 7.0379638671875e-05,
      "step": 11531,
      "training_step_time": 0.3934977054595947
    },
    {
      "epoch": 7.03857421875e-05,
      "model_forward_time": 0.11558890342712402,
      "step": 11532
    },
    {
      "epoch": 7.03857421875e-05,
      "step": 11532,
      "training_step_time": 0.37619805335998535
    },
    {
      "epoch": 7.0391845703125e-05,
      "model_forward_time": 0.11526298522949219,
      "step": 11533
    },
    {
      "epoch": 7.0391845703125e-05,
      "step": 11533,
      "training_step_time": 0.4326162338256836
    },
    {
      "epoch": 7.039794921875e-05,
      "model_forward_time": 0.11556458473205566,
      "step": 11534
    },
    {
      "epoch": 7.039794921875e-05,
      "step": 11534,
      "training_step_time": 0.4949643611907959
    },
    {
      "epoch": 7.0404052734375e-05,
      "model_forward_time": 0.11549258232116699,
      "step": 11535
    },
    {
      "epoch": 7.0404052734375e-05,
      "step": 11535,
      "training_step_time": 0.4301443099975586
    },
    {
      "epoch": 7.041015625e-05,
      "model_forward_time": 0.11444473266601562,
      "step": 11536
    },
    {
      "epoch": 7.041015625e-05,
      "step": 11536,
      "training_step_time": 0.40761518478393555
    },
    {
      "epoch": 7.0416259765625e-05,
      "model_forward_time": 0.11537361145019531,
      "step": 11537
    },
    {
      "epoch": 7.0416259765625e-05,
      "step": 11537,
      "training_step_time": 0.9831116199493408
    },
    {
      "epoch": 7.042236328125e-05,
      "model_forward_time": 0.11487722396850586,
      "step": 11538
    },
    {
      "epoch": 7.042236328125e-05,
      "step": 11538,
      "training_step_time": 0.4363377094268799
    },
    {
      "epoch": 7.0428466796875e-05,
      "model_forward_time": 0.11398792266845703,
      "step": 11539
    },
    {
      "epoch": 7.0428466796875e-05,
      "step": 11539,
      "training_step_time": 0.39769959449768066
    },
    {
      "epoch": 7.04345703125e-05,
      "grad_norm": 0.14429520070552826,
      "learning_rate": 9.456283043532576e-05,
      "loss": 0.0587,
      "step": 11540
    },
    {
      "epoch": 7.04345703125e-05,
      "model_forward_time": 0.11398506164550781,
      "step": 11540
    },
    {
      "epoch": 7.04345703125e-05,
      "step": 11540,
      "training_step_time": 0.3881721496582031
    },
    {
      "epoch": 7.0440673828125e-05,
      "model_forward_time": 0.11470413208007812,
      "step": 11541
    },
    {
      "epoch": 7.0440673828125e-05,
      "step": 11541,
      "training_step_time": 0.43379688262939453
    },
    {
      "epoch": 7.044677734375e-05,
      "model_forward_time": 0.11469531059265137,
      "step": 11542
    },
    {
      "epoch": 7.044677734375e-05,
      "step": 11542,
      "training_step_time": 0.39838314056396484
    },
    {
      "epoch": 7.0452880859375e-05,
      "model_forward_time": 0.11495161056518555,
      "step": 11543
    },
    {
      "epoch": 7.0452880859375e-05,
      "step": 11543,
      "training_step_time": 0.6555235385894775
    },
    {
      "epoch": 7.0458984375e-05,
      "model_forward_time": 0.11415362358093262,
      "step": 11544
    },
    {
      "epoch": 7.0458984375e-05,
      "step": 11544,
      "training_step_time": 0.39892053604125977
    },
    {
      "epoch": 7.0465087890625e-05,
      "model_forward_time": 0.11496710777282715,
      "step": 11545
    },
    {
      "epoch": 7.0465087890625e-05,
      "step": 11545,
      "training_step_time": 0.40433430671691895
    },
    {
      "epoch": 7.047119140625e-05,
      "model_forward_time": 0.11470484733581543,
      "step": 11546
    },
    {
      "epoch": 7.047119140625e-05,
      "step": 11546,
      "training_step_time": 0.45550084114074707
    },
    {
      "epoch": 7.0477294921875e-05,
      "model_forward_time": 0.1146082878112793,
      "step": 11547
    },
    {
      "epoch": 7.0477294921875e-05,
      "step": 11547,
      "training_step_time": 0.46428871154785156
    },
    {
      "epoch": 7.04833984375e-05,
      "model_forward_time": 0.1148996353149414,
      "step": 11548
    },
    {
      "epoch": 7.04833984375e-05,
      "step": 11548,
      "training_step_time": 0.38576316833496094
    },
    {
      "epoch": 7.0489501953125e-05,
      "model_forward_time": 0.1150660514831543,
      "step": 11549
    },
    {
      "epoch": 7.0489501953125e-05,
      "step": 11549,
      "training_step_time": 0.5755674839019775
    },
    {
      "epoch": 7.049560546875e-05,
      "grad_norm": 0.1678122580051422,
      "learning_rate": 9.45503262094184e-05,
      "loss": 0.0645,
      "step": 11550
    },
    {
      "epoch": 7.049560546875e-05,
      "model_forward_time": 0.11453437805175781,
      "step": 11550
    },
    {
      "epoch": 7.049560546875e-05,
      "step": 11550,
      "training_step_time": 0.43703699111938477
    },
    {
      "epoch": 7.0501708984375e-05,
      "model_forward_time": 0.11451244354248047,
      "step": 11551
    },
    {
      "epoch": 7.0501708984375e-05,
      "step": 11551,
      "training_step_time": 0.4647495746612549
    },
    {
      "epoch": 7.05078125e-05,
      "model_forward_time": 0.11458444595336914,
      "step": 11552
    },
    {
      "epoch": 7.05078125e-05,
      "step": 11552,
      "training_step_time": 0.40016937255859375
    },
    {
      "epoch": 7.0513916015625e-05,
      "model_forward_time": 0.11432671546936035,
      "step": 11553
    },
    {
      "epoch": 7.0513916015625e-05,
      "step": 11553,
      "training_step_time": 0.4782676696777344
    },
    {
      "epoch": 7.052001953125e-05,
      "model_forward_time": 0.11472463607788086,
      "step": 11554
    },
    {
      "epoch": 7.052001953125e-05,
      "step": 11554,
      "training_step_time": 0.3948023319244385
    },
    {
      "epoch": 7.0526123046875e-05,
      "model_forward_time": 0.1147763729095459,
      "step": 11555
    },
    {
      "epoch": 7.0526123046875e-05,
      "step": 11555,
      "training_step_time": 0.6218183040618896
    },
    {
      "epoch": 7.05322265625e-05,
      "model_forward_time": 0.11473202705383301,
      "step": 11556
    },
    {
      "epoch": 7.05322265625e-05,
      "step": 11556,
      "training_step_time": 0.40285372734069824
    },
    {
      "epoch": 7.0538330078125e-05,
      "model_forward_time": 0.11480450630187988,
      "step": 11557
    },
    {
      "epoch": 7.0538330078125e-05,
      "step": 11557,
      "training_step_time": 0.39261412620544434
    },
    {
      "epoch": 7.054443359375e-05,
      "model_forward_time": 0.11454200744628906,
      "step": 11558
    },
    {
      "epoch": 7.054443359375e-05,
      "step": 11558,
      "training_step_time": 0.3785369396209717
    },
    {
      "epoch": 7.0550537109375e-05,
      "model_forward_time": 0.11452889442443848,
      "step": 11559
    },
    {
      "epoch": 7.0550537109375e-05,
      "step": 11559,
      "training_step_time": 0.3929164409637451
    },
    {
      "epoch": 7.0556640625e-05,
      "grad_norm": 0.1741352528333664,
      "learning_rate": 9.453780845029821e-05,
      "loss": 0.062,
      "step": 11560
    },
    {
      "epoch": 7.0556640625e-05,
      "model_forward_time": 0.11525893211364746,
      "step": 11560
    },
    {
      "epoch": 7.0556640625e-05,
      "step": 11560,
      "training_step_time": 0.4156148433685303
    },
    {
      "epoch": 7.0562744140625e-05,
      "model_forward_time": 0.11531829833984375,
      "step": 11561
    },
    {
      "epoch": 7.0562744140625e-05,
      "step": 11561,
      "training_step_time": 0.5482001304626465
    },
    {
      "epoch": 7.056884765625e-05,
      "model_forward_time": 0.11525678634643555,
      "step": 11562
    },
    {
      "epoch": 7.056884765625e-05,
      "step": 11562,
      "training_step_time": 0.397141695022583
    },
    {
      "epoch": 7.0574951171875e-05,
      "model_forward_time": 0.1145329475402832,
      "step": 11563
    },
    {
      "epoch": 7.0574951171875e-05,
      "step": 11563,
      "training_step_time": 0.4851670265197754
    },
    {
      "epoch": 7.05810546875e-05,
      "model_forward_time": 0.11498856544494629,
      "step": 11564
    },
    {
      "epoch": 7.05810546875e-05,
      "step": 11564,
      "training_step_time": 0.4301435947418213
    },
    {
      "epoch": 7.0587158203125e-05,
      "model_forward_time": 0.11474394798278809,
      "step": 11565
    },
    {
      "epoch": 7.0587158203125e-05,
      "step": 11565,
      "training_step_time": 0.48578500747680664
    },
    {
      "epoch": 7.059326171875e-05,
      "model_forward_time": 0.11617922782897949,
      "step": 11566
    },
    {
      "epoch": 7.059326171875e-05,
      "step": 11566,
      "training_step_time": 0.38266873359680176
    },
    {
      "epoch": 7.0599365234375e-05,
      "model_forward_time": 0.11514830589294434,
      "step": 11567
    },
    {
      "epoch": 7.0599365234375e-05,
      "step": 11567,
      "training_step_time": 0.543877124786377
    },
    {
      "epoch": 7.060546875e-05,
      "model_forward_time": 0.11457467079162598,
      "step": 11568
    },
    {
      "epoch": 7.060546875e-05,
      "step": 11568,
      "training_step_time": 0.38730287551879883
    },
    {
      "epoch": 7.0611572265625e-05,
      "model_forward_time": 0.11496114730834961,
      "step": 11569
    },
    {
      "epoch": 7.0611572265625e-05,
      "step": 11569,
      "training_step_time": 0.39150118827819824
    },
    {
      "epoch": 7.061767578125e-05,
      "grad_norm": 0.11203926801681519,
      "learning_rate": 9.452527716176776e-05,
      "loss": 0.0624,
      "step": 11570
    },
    {
      "epoch": 7.061767578125e-05,
      "model_forward_time": 0.11491966247558594,
      "step": 11570
    },
    {
      "epoch": 7.061767578125e-05,
      "step": 11570,
      "training_step_time": 0.39876413345336914
    },
    {
      "epoch": 7.0623779296875e-05,
      "model_forward_time": 0.11513996124267578,
      "step": 11571
    },
    {
      "epoch": 7.0623779296875e-05,
      "step": 11571,
      "training_step_time": 0.39762187004089355
    },
    {
      "epoch": 7.06298828125e-05,
      "model_forward_time": 0.11542558670043945,
      "step": 11572
    },
    {
      "epoch": 7.06298828125e-05,
      "step": 11572,
      "training_step_time": 0.3919980525970459
    },
    {
      "epoch": 7.0635986328125e-05,
      "model_forward_time": 0.11589336395263672,
      "step": 11573
    },
    {
      "epoch": 7.0635986328125e-05,
      "step": 11573,
      "training_step_time": 0.555741548538208
    },
    {
      "epoch": 7.064208984375e-05,
      "model_forward_time": 0.11503148078918457,
      "step": 11574
    },
    {
      "epoch": 7.064208984375e-05,
      "step": 11574,
      "training_step_time": 0.42050981521606445
    },
    {
      "epoch": 7.0648193359375e-05,
      "model_forward_time": 0.11474370956420898,
      "step": 11575
    },
    {
      "epoch": 7.0648193359375e-05,
      "step": 11575,
      "training_step_time": 0.48781585693359375
    },
    {
      "epoch": 7.0654296875e-05,
      "model_forward_time": 0.11624741554260254,
      "step": 11576
    },
    {
      "epoch": 7.0654296875e-05,
      "step": 11576,
      "training_step_time": 0.46758413314819336
    },
    {
      "epoch": 7.0660400390625e-05,
      "model_forward_time": 0.11436629295349121,
      "step": 11577
    },
    {
      "epoch": 7.0660400390625e-05,
      "step": 11577,
      "training_step_time": 0.46991848945617676
    },
    {
      "epoch": 7.066650390625e-05,
      "model_forward_time": 0.11529207229614258,
      "step": 11578
    },
    {
      "epoch": 7.066650390625e-05,
      "step": 11578,
      "training_step_time": 0.4237501621246338
    },
    {
      "epoch": 7.0672607421875e-05,
      "model_forward_time": 0.11538505554199219,
      "step": 11579
    },
    {
      "epoch": 7.0672607421875e-05,
      "step": 11579,
      "training_step_time": 0.4597482681274414
    },
    {
      "epoch": 7.06787109375e-05,
      "grad_norm": 0.18690991401672363,
      "learning_rate": 9.451273234763371e-05,
      "loss": 0.0576,
      "step": 11580
    },
    {
      "epoch": 7.06787109375e-05,
      "model_forward_time": 0.11436843872070312,
      "step": 11580
    },
    {
      "epoch": 7.06787109375e-05,
      "step": 11580,
      "training_step_time": 0.418135404586792
    },
    {
      "epoch": 7.0684814453125e-05,
      "model_forward_time": 0.11495256423950195,
      "step": 11581
    },
    {
      "epoch": 7.0684814453125e-05,
      "step": 11581,
      "training_step_time": 0.4311680793762207
    },
    {
      "epoch": 7.069091796875e-05,
      "model_forward_time": 0.11492109298706055,
      "step": 11582
    },
    {
      "epoch": 7.069091796875e-05,
      "step": 11582,
      "training_step_time": 0.3962111473083496
    },
    {
      "epoch": 7.0697021484375e-05,
      "model_forward_time": 0.11449384689331055,
      "step": 11583
    },
    {
      "epoch": 7.0697021484375e-05,
      "step": 11583,
      "training_step_time": 0.39191770553588867
    },
    {
      "epoch": 7.0703125e-05,
      "model_forward_time": 0.11531472206115723,
      "step": 11584
    },
    {
      "epoch": 7.0703125e-05,
      "step": 11584,
      "training_step_time": 0.38968658447265625
    },
    {
      "epoch": 7.0709228515625e-05,
      "model_forward_time": 0.1145930290222168,
      "step": 11585
    },
    {
      "epoch": 7.0709228515625e-05,
      "step": 11585,
      "training_step_time": 0.3930552005767822
    },
    {
      "epoch": 7.071533203125e-05,
      "model_forward_time": 0.11528921127319336,
      "step": 11586
    },
    {
      "epoch": 7.071533203125e-05,
      "step": 11586,
      "training_step_time": 0.4009377956390381
    },
    {
      "epoch": 7.0721435546875e-05,
      "model_forward_time": 0.11481928825378418,
      "step": 11587
    },
    {
      "epoch": 7.0721435546875e-05,
      "step": 11587,
      "training_step_time": 0.3673825263977051
    },
    {
      "epoch": 7.07275390625e-05,
      "model_forward_time": 0.11472129821777344,
      "step": 11588
    },
    {
      "epoch": 7.07275390625e-05,
      "step": 11588,
      "training_step_time": 0.4521818161010742
    },
    {
      "epoch": 7.0733642578125e-05,
      "model_forward_time": 0.11490249633789062,
      "step": 11589
    },
    {
      "epoch": 7.0733642578125e-05,
      "step": 11589,
      "training_step_time": 0.4896228313446045
    },
    {
      "epoch": 7.073974609375e-05,
      "grad_norm": 0.1920263171195984,
      "learning_rate": 9.450017401170689e-05,
      "loss": 0.0638,
      "step": 11590
    },
    {
      "epoch": 7.073974609375e-05,
      "model_forward_time": 0.11555719375610352,
      "step": 11590
    },
    {
      "epoch": 7.073974609375e-05,
      "step": 11590,
      "training_step_time": 0.39714741706848145
    },
    {
      "epoch": 7.0745849609375e-05,
      "model_forward_time": 0.11505293846130371,
      "step": 11591
    },
    {
      "epoch": 7.0745849609375e-05,
      "step": 11591,
      "training_step_time": 0.40447211265563965
    },
    {
      "epoch": 7.0751953125e-05,
      "model_forward_time": 0.11586475372314453,
      "step": 11592
    },
    {
      "epoch": 7.0751953125e-05,
      "step": 11592,
      "training_step_time": 0.4198477268218994
    },
    {
      "epoch": 7.0758056640625e-05,
      "model_forward_time": 0.11504340171813965,
      "step": 11593
    },
    {
      "epoch": 7.0758056640625e-05,
      "step": 11593,
      "training_step_time": 0.45349955558776855
    },
    {
      "epoch": 7.076416015625e-05,
      "model_forward_time": 0.11502814292907715,
      "step": 11594
    },
    {
      "epoch": 7.076416015625e-05,
      "step": 11594,
      "training_step_time": 0.45015931129455566
    },
    {
      "epoch": 7.0770263671875e-05,
      "model_forward_time": 0.1151571273803711,
      "step": 11595
    },
    {
      "epoch": 7.0770263671875e-05,
      "step": 11595,
      "training_step_time": 0.4571671485900879
    },
    {
      "epoch": 7.07763671875e-05,
      "model_forward_time": 0.1148681640625,
      "step": 11596
    },
    {
      "epoch": 7.07763671875e-05,
      "step": 11596,
      "training_step_time": 0.39373111724853516
    },
    {
      "epoch": 7.0782470703125e-05,
      "model_forward_time": 0.11480569839477539,
      "step": 11597
    },
    {
      "epoch": 7.0782470703125e-05,
      "step": 11597,
      "training_step_time": 0.672013521194458
    },
    {
      "epoch": 7.078857421875e-05,
      "model_forward_time": 0.11441922187805176,
      "step": 11598
    },
    {
      "epoch": 7.078857421875e-05,
      "step": 11598,
      "training_step_time": 0.39177513122558594
    },
    {
      "epoch": 7.0794677734375e-05,
      "model_forward_time": 0.11401653289794922,
      "step": 11599
    },
    {
      "epoch": 7.0794677734375e-05,
      "step": 11599,
      "training_step_time": 0.391204833984375
    },
    {
      "epoch": 7.080078125e-05,
      "grad_norm": 0.15596044063568115,
      "learning_rate": 9.448760215780217e-05,
      "loss": 0.0608,
      "step": 11600
    },
    {
      "epoch": 7.080078125e-05,
      "model_forward_time": 0.11467742919921875,
      "step": 11600
    },
    {
      "epoch": 7.080078125e-05,
      "step": 11600,
      "training_step_time": 0.38979339599609375
    },
    {
      "epoch": 7.0806884765625e-05,
      "model_forward_time": 0.11466789245605469,
      "step": 11601
    },
    {
      "epoch": 7.0806884765625e-05,
      "step": 11601,
      "training_step_time": 0.3644695281982422
    },
    {
      "epoch": 7.081298828125e-05,
      "model_forward_time": 0.11448884010314941,
      "step": 11602
    },
    {
      "epoch": 7.081298828125e-05,
      "step": 11602,
      "training_step_time": 0.4736745357513428
    },
    {
      "epoch": 7.0819091796875e-05,
      "model_forward_time": 0.1150810718536377,
      "step": 11603
    },
    {
      "epoch": 7.0819091796875e-05,
      "step": 11603,
      "training_step_time": 0.4218566417694092
    },
    {
      "epoch": 7.08251953125e-05,
      "model_forward_time": 0.11469054222106934,
      "step": 11604
    },
    {
      "epoch": 7.08251953125e-05,
      "step": 11604,
      "training_step_time": 0.4128541946411133
    },
    {
      "epoch": 7.0831298828125e-05,
      "model_forward_time": 0.11564183235168457,
      "step": 11605
    },
    {
      "epoch": 7.0831298828125e-05,
      "step": 11605,
      "training_step_time": 0.42115044593811035
    },
    {
      "epoch": 7.083740234375e-05,
      "model_forward_time": 0.11516547203063965,
      "step": 11606
    },
    {
      "epoch": 7.083740234375e-05,
      "step": 11606,
      "training_step_time": 0.40259361267089844
    },
    {
      "epoch": 7.0843505859375e-05,
      "model_forward_time": 0.11557698249816895,
      "step": 11607
    },
    {
      "epoch": 7.0843505859375e-05,
      "step": 11607,
      "training_step_time": 0.436751127243042
    },
    {
      "epoch": 7.0849609375e-05,
      "model_forward_time": 0.11591124534606934,
      "step": 11608
    },
    {
      "epoch": 7.0849609375e-05,
      "step": 11608,
      "training_step_time": 0.4588940143585205
    },
    {
      "epoch": 7.0855712890625e-05,
      "model_forward_time": 0.1154632568359375,
      "step": 11609
    },
    {
      "epoch": 7.0855712890625e-05,
      "step": 11609,
      "training_step_time": 0.8695580959320068
    },
    {
      "epoch": 7.086181640625e-05,
      "grad_norm": 0.14404703676700592,
      "learning_rate": 9.447501678973852e-05,
      "loss": 0.0603,
      "step": 11610
    },
    {
      "epoch": 7.086181640625e-05,
      "model_forward_time": 0.11444997787475586,
      "step": 11610
    },
    {
      "epoch": 7.086181640625e-05,
      "step": 11610,
      "training_step_time": 0.3858625888824463
    },
    {
      "epoch": 7.0867919921875e-05,
      "model_forward_time": 0.11480140686035156,
      "step": 11611
    },
    {
      "epoch": 7.0867919921875e-05,
      "step": 11611,
      "training_step_time": 0.38541579246520996
    },
    {
      "epoch": 7.08740234375e-05,
      "model_forward_time": 0.11469268798828125,
      "step": 11612
    },
    {
      "epoch": 7.08740234375e-05,
      "step": 11612,
      "training_step_time": 0.3927040100097656
    },
    {
      "epoch": 7.0880126953125e-05,
      "model_forward_time": 0.1145637035369873,
      "step": 11613
    },
    {
      "epoch": 7.0880126953125e-05,
      "step": 11613,
      "training_step_time": 0.3890268802642822
    },
    {
      "epoch": 7.088623046875e-05,
      "model_forward_time": 0.11448454856872559,
      "step": 11614
    },
    {
      "epoch": 7.088623046875e-05,
      "step": 11614,
      "training_step_time": 0.3987727165222168
    },
    {
      "epoch": 7.0892333984375e-05,
      "model_forward_time": 0.11460518836975098,
      "step": 11615
    },
    {
      "epoch": 7.0892333984375e-05,
      "step": 11615,
      "training_step_time": 0.6340103149414062
    },
    {
      "epoch": 7.08984375e-05,
      "model_forward_time": 0.11461448669433594,
      "step": 11616
    },
    {
      "epoch": 7.08984375e-05,
      "step": 11616,
      "training_step_time": 0.39079904556274414
    },
    {
      "epoch": 7.0904541015625e-05,
      "model_forward_time": 0.1141977310180664,
      "step": 11617
    },
    {
      "epoch": 7.0904541015625e-05,
      "step": 11617,
      "training_step_time": 0.4461398124694824
    },
    {
      "epoch": 7.091064453125e-05,
      "model_forward_time": 0.11525511741638184,
      "step": 11618
    },
    {
      "epoch": 7.091064453125e-05,
      "step": 11618,
      "training_step_time": 0.3973870277404785
    },
    {
      "epoch": 7.0916748046875e-05,
      "model_forward_time": 0.11491632461547852,
      "step": 11619
    },
    {
      "epoch": 7.0916748046875e-05,
      "step": 11619,
      "training_step_time": 0.4299771785736084
    },
    {
      "epoch": 7.09228515625e-05,
      "grad_norm": 0.22563187777996063,
      "learning_rate": 9.446241791133907e-05,
      "loss": 0.064,
      "step": 11620
    },
    {
      "epoch": 7.09228515625e-05,
      "model_forward_time": 0.11464619636535645,
      "step": 11620
    },
    {
      "epoch": 7.09228515625e-05,
      "step": 11620,
      "training_step_time": 0.4925343990325928
    },
    {
      "epoch": 7.0928955078125e-05,
      "model_forward_time": 0.11493706703186035,
      "step": 11621
    },
    {
      "epoch": 7.0928955078125e-05,
      "step": 11621,
      "training_step_time": 0.4589672088623047
    },
    {
      "epoch": 7.093505859375e-05,
      "model_forward_time": 0.11499738693237305,
      "step": 11622
    },
    {
      "epoch": 7.093505859375e-05,
      "step": 11622,
      "training_step_time": 0.42948007583618164
    },
    {
      "epoch": 7.0941162109375e-05,
      "model_forward_time": 0.11501312255859375,
      "step": 11623
    },
    {
      "epoch": 7.0941162109375e-05,
      "step": 11623,
      "training_step_time": 0.39406776428222656
    },
    {
      "epoch": 7.0947265625e-05,
      "model_forward_time": 0.11459231376647949,
      "step": 11624
    },
    {
      "epoch": 7.0947265625e-05,
      "step": 11624,
      "training_step_time": 0.4027717113494873
    },
    {
      "epoch": 7.0953369140625e-05,
      "model_forward_time": 0.11467671394348145,
      "step": 11625
    },
    {
      "epoch": 7.0953369140625e-05,
      "step": 11625,
      "training_step_time": 0.38804030418395996
    },
    {
      "epoch": 7.095947265625e-05,
      "model_forward_time": 0.11512160301208496,
      "step": 11626
    },
    {
      "epoch": 7.095947265625e-05,
      "step": 11626,
      "training_step_time": 0.38394713401794434
    },
    {
      "epoch": 7.0965576171875e-05,
      "model_forward_time": 0.11527395248413086,
      "step": 11627
    },
    {
      "epoch": 7.0965576171875e-05,
      "step": 11627,
      "training_step_time": 0.6914093494415283
    },
    {
      "epoch": 7.09716796875e-05,
      "model_forward_time": 0.11457347869873047,
      "step": 11628
    },
    {
      "epoch": 7.09716796875e-05,
      "step": 11628,
      "training_step_time": 0.3822009563446045
    },
    {
      "epoch": 7.0977783203125e-05,
      "model_forward_time": 0.11572647094726562,
      "step": 11629
    },
    {
      "epoch": 7.0977783203125e-05,
      "step": 11629,
      "training_step_time": 0.4535844326019287
    },
    {
      "epoch": 7.098388671875e-05,
      "grad_norm": 0.22584502398967743,
      "learning_rate": 9.444980552643103e-05,
      "loss": 0.0586,
      "step": 11630
    },
    {
      "epoch": 7.098388671875e-05,
      "model_forward_time": 0.11543989181518555,
      "step": 11630
    },
    {
      "epoch": 7.098388671875e-05,
      "step": 11630,
      "training_step_time": 0.44065308570861816
    },
    {
      "epoch": 7.0989990234375e-05,
      "model_forward_time": 0.11500382423400879,
      "step": 11631
    },
    {
      "epoch": 7.0989990234375e-05,
      "step": 11631,
      "training_step_time": 0.47138524055480957
    },
    {
      "epoch": 7.099609375e-05,
      "model_forward_time": 0.1149146556854248,
      "step": 11632
    },
    {
      "epoch": 7.099609375e-05,
      "step": 11632,
      "training_step_time": 0.4748997688293457
    },
    {
      "epoch": 7.1002197265625e-05,
      "model_forward_time": 0.11493372917175293,
      "step": 11633
    },
    {
      "epoch": 7.1002197265625e-05,
      "step": 11633,
      "training_step_time": 0.4098472595214844
    },
    {
      "epoch": 7.100830078125e-05,
      "model_forward_time": 0.11482620239257812,
      "step": 11634
    },
    {
      "epoch": 7.100830078125e-05,
      "step": 11634,
      "training_step_time": 0.411663293838501
    },
    {
      "epoch": 7.1014404296875e-05,
      "model_forward_time": 0.11429357528686523,
      "step": 11635
    },
    {
      "epoch": 7.1014404296875e-05,
      "step": 11635,
      "training_step_time": 0.4135406017303467
    },
    {
      "epoch": 7.10205078125e-05,
      "model_forward_time": 0.11490845680236816,
      "step": 11636
    },
    {
      "epoch": 7.10205078125e-05,
      "step": 11636,
      "training_step_time": 0.45069169998168945
    },
    {
      "epoch": 7.1026611328125e-05,
      "model_forward_time": 0.11509323120117188,
      "step": 11637
    },
    {
      "epoch": 7.1026611328125e-05,
      "step": 11637,
      "training_step_time": 0.4011573791503906
    },
    {
      "epoch": 7.103271484375e-05,
      "model_forward_time": 0.11482429504394531,
      "step": 11638
    },
    {
      "epoch": 7.103271484375e-05,
      "step": 11638,
      "training_step_time": 0.3935422897338867
    },
    {
      "epoch": 7.1038818359375e-05,
      "model_forward_time": 0.11510038375854492,
      "step": 11639
    },
    {
      "epoch": 7.1038818359375e-05,
      "step": 11639,
      "training_step_time": 0.4012792110443115
    },
    {
      "epoch": 7.1044921875e-05,
      "grad_norm": 0.19309936463832855,
      "learning_rate": 9.443717963884569e-05,
      "loss": 0.0623,
      "step": 11640
    },
    {
      "epoch": 7.1044921875e-05,
      "model_forward_time": 0.1148827075958252,
      "step": 11640
    },
    {
      "epoch": 7.1044921875e-05,
      "step": 11640,
      "training_step_time": 0.39376330375671387
    },
    {
      "epoch": 7.1051025390625e-05,
      "model_forward_time": 0.11522221565246582,
      "step": 11641
    },
    {
      "epoch": 7.1051025390625e-05,
      "step": 11641,
      "training_step_time": 0.39690637588500977
    },
    {
      "epoch": 7.105712890625e-05,
      "model_forward_time": 0.1149742603302002,
      "step": 11642
    },
    {
      "epoch": 7.105712890625e-05,
      "step": 11642,
      "training_step_time": 0.3816993236541748
    },
    {
      "epoch": 7.1063232421875e-05,
      "model_forward_time": 0.11569428443908691,
      "step": 11643
    },
    {
      "epoch": 7.1063232421875e-05,
      "step": 11643,
      "training_step_time": 0.40101122856140137
    },
    {
      "epoch": 7.10693359375e-05,
      "model_forward_time": 0.11472344398498535,
      "step": 11644
    },
    {
      "epoch": 7.10693359375e-05,
      "step": 11644,
      "training_step_time": 0.43236660957336426
    },
    {
      "epoch": 7.1075439453125e-05,
      "model_forward_time": 0.11574792861938477,
      "step": 11645
    },
    {
      "epoch": 7.1075439453125e-05,
      "step": 11645,
      "training_step_time": 0.43560218811035156
    },
    {
      "epoch": 7.108154296875e-05,
      "model_forward_time": 0.11635541915893555,
      "step": 11646
    },
    {
      "epoch": 7.108154296875e-05,
      "step": 11646,
      "training_step_time": 0.4799959659576416
    },
    {
      "epoch": 7.1087646484375e-05,
      "model_forward_time": 0.11493110656738281,
      "step": 11647
    },
    {
      "epoch": 7.1087646484375e-05,
      "step": 11647,
      "training_step_time": 0.42084407806396484
    },
    {
      "epoch": 7.109375e-05,
      "model_forward_time": 0.11522960662841797,
      "step": 11648
    },
    {
      "epoch": 7.109375e-05,
      "step": 11648,
      "training_step_time": 0.40901970863342285
    },
    {
      "epoch": 7.1099853515625e-05,
      "model_forward_time": 0.11510610580444336,
      "step": 11649
    },
    {
      "epoch": 7.1099853515625e-05,
      "step": 11649,
      "training_step_time": 0.47286558151245117
    },
    {
      "epoch": 7.110595703125e-05,
      "grad_norm": 0.18094901740550995,
      "learning_rate": 9.442454025241847e-05,
      "loss": 0.0596,
      "step": 11650
    },
    {
      "epoch": 7.110595703125e-05,
      "model_forward_time": 0.1149144172668457,
      "step": 11650
    },
    {
      "epoch": 7.110595703125e-05,
      "step": 11650,
      "training_step_time": 0.39615631103515625
    },
    {
      "epoch": 7.1112060546875e-05,
      "model_forward_time": 0.11551284790039062,
      "step": 11651
    },
    {
      "epoch": 7.1112060546875e-05,
      "step": 11651,
      "training_step_time": 0.4586963653564453
    },
    {
      "epoch": 7.11181640625e-05,
      "model_forward_time": 0.11503267288208008,
      "step": 11652
    },
    {
      "epoch": 7.11181640625e-05,
      "step": 11652,
      "training_step_time": 0.38292860984802246
    },
    {
      "epoch": 7.1124267578125e-05,
      "model_forward_time": 0.1148991584777832,
      "step": 11653
    },
    {
      "epoch": 7.1124267578125e-05,
      "step": 11653,
      "training_step_time": 0.4049220085144043
    },
    {
      "epoch": 7.113037109375e-05,
      "model_forward_time": 0.11545825004577637,
      "step": 11654
    },
    {
      "epoch": 7.113037109375e-05,
      "step": 11654,
      "training_step_time": 0.3952326774597168
    },
    {
      "epoch": 7.1136474609375e-05,
      "model_forward_time": 0.1151881217956543,
      "step": 11655
    },
    {
      "epoch": 7.1136474609375e-05,
      "step": 11655,
      "training_step_time": 0.3967585563659668
    },
    {
      "epoch": 7.1142578125e-05,
      "model_forward_time": 0.11479616165161133,
      "step": 11656
    },
    {
      "epoch": 7.1142578125e-05,
      "step": 11656,
      "training_step_time": 0.39424705505371094
    },
    {
      "epoch": 7.1148681640625e-05,
      "model_forward_time": 0.11526799201965332,
      "step": 11657
    },
    {
      "epoch": 7.1148681640625e-05,
      "step": 11657,
      "training_step_time": 0.3892226219177246
    },
    {
      "epoch": 7.115478515625e-05,
      "model_forward_time": 0.1144869327545166,
      "step": 11658
    },
    {
      "epoch": 7.115478515625e-05,
      "step": 11658,
      "training_step_time": 0.36974668502807617
    },
    {
      "epoch": 7.1160888671875e-05,
      "model_forward_time": 0.11443805694580078,
      "step": 11659
    },
    {
      "epoch": 7.1160888671875e-05,
      "step": 11659,
      "training_step_time": 0.44066500663757324
    },
    {
      "epoch": 7.11669921875e-05,
      "grad_norm": 0.28053033351898193,
      "learning_rate": 9.441188737098889e-05,
      "loss": 0.0608,
      "step": 11660
    },
    {
      "epoch": 7.11669921875e-05,
      "model_forward_time": 0.11480498313903809,
      "step": 11660
    },
    {
      "epoch": 7.11669921875e-05,
      "step": 11660,
      "training_step_time": 0.48610997200012207
    },
    {
      "epoch": 7.1173095703125e-05,
      "model_forward_time": 0.11458706855773926,
      "step": 11661
    },
    {
      "epoch": 7.1173095703125e-05,
      "step": 11661,
      "training_step_time": 0.48088979721069336
    },
    {
      "epoch": 7.117919921875e-05,
      "model_forward_time": 0.11484837532043457,
      "step": 11662
    },
    {
      "epoch": 7.117919921875e-05,
      "step": 11662,
      "training_step_time": 0.42645716667175293
    },
    {
      "epoch": 7.1185302734375e-05,
      "model_forward_time": 0.11528277397155762,
      "step": 11663
    },
    {
      "epoch": 7.1185302734375e-05,
      "step": 11663,
      "training_step_time": 0.39792776107788086
    },
    {
      "epoch": 7.119140625e-05,
      "model_forward_time": 0.11504268646240234,
      "step": 11664
    },
    {
      "epoch": 7.119140625e-05,
      "step": 11664,
      "training_step_time": 0.3945305347442627
    },
    {
      "epoch": 7.1197509765625e-05,
      "model_forward_time": 0.1155691146850586,
      "step": 11665
    },
    {
      "epoch": 7.1197509765625e-05,
      "step": 11665,
      "training_step_time": 0.4307518005371094
    },
    {
      "epoch": 7.120361328125e-05,
      "model_forward_time": 0.11586642265319824,
      "step": 11666
    },
    {
      "epoch": 7.120361328125e-05,
      "step": 11666,
      "training_step_time": 0.42119264602661133
    },
    {
      "epoch": 7.1209716796875e-05,
      "model_forward_time": 0.11531376838684082,
      "step": 11667
    },
    {
      "epoch": 7.1209716796875e-05,
      "step": 11667,
      "training_step_time": 0.3889279365539551
    },
    {
      "epoch": 7.12158203125e-05,
      "model_forward_time": 0.11557316780090332,
      "step": 11668
    },
    {
      "epoch": 7.12158203125e-05,
      "step": 11668,
      "training_step_time": 0.3881218433380127
    },
    {
      "epoch": 7.1221923828125e-05,
      "model_forward_time": 0.11563253402709961,
      "step": 11669
    },
    {
      "epoch": 7.1221923828125e-05,
      "step": 11669,
      "training_step_time": 0.39933276176452637
    },
    {
      "epoch": 7.122802734375e-05,
      "grad_norm": 0.2234204113483429,
      "learning_rate": 9.439922099840054e-05,
      "loss": 0.0643,
      "step": 11670
    },
    {
      "epoch": 7.122802734375e-05,
      "model_forward_time": 0.11523127555847168,
      "step": 11670
    },
    {
      "epoch": 7.122802734375e-05,
      "step": 11670,
      "training_step_time": 0.3879258632659912
    },
    {
      "epoch": 7.1234130859375e-05,
      "model_forward_time": 0.1154787540435791,
      "step": 11671
    },
    {
      "epoch": 7.1234130859375e-05,
      "step": 11671,
      "training_step_time": 0.3997154235839844
    },
    {
      "epoch": 7.1240234375e-05,
      "model_forward_time": 0.1147301197052002,
      "step": 11672
    },
    {
      "epoch": 7.1240234375e-05,
      "step": 11672,
      "training_step_time": 0.39104175567626953
    },
    {
      "epoch": 7.1246337890625e-05,
      "model_forward_time": 0.11512470245361328,
      "step": 11673
    },
    {
      "epoch": 7.1246337890625e-05,
      "step": 11673,
      "training_step_time": 0.476367712020874
    },
    {
      "epoch": 7.125244140625e-05,
      "model_forward_time": 0.11523604393005371,
      "step": 11674
    },
    {
      "epoch": 7.125244140625e-05,
      "step": 11674,
      "training_step_time": 0.4660170078277588
    },
    {
      "epoch": 7.1258544921875e-05,
      "model_forward_time": 0.11565518379211426,
      "step": 11675
    },
    {
      "epoch": 7.1258544921875e-05,
      "step": 11675,
      "training_step_time": 0.4975299835205078
    },
    {
      "epoch": 7.12646484375e-05,
      "model_forward_time": 0.11506533622741699,
      "step": 11676
    },
    {
      "epoch": 7.12646484375e-05,
      "step": 11676,
      "training_step_time": 0.43282222747802734
    },
    {
      "epoch": 7.1270751953125e-05,
      "model_forward_time": 0.11476659774780273,
      "step": 11677
    },
    {
      "epoch": 7.1270751953125e-05,
      "step": 11677,
      "training_step_time": 0.3938751220703125
    },
    {
      "epoch": 7.127685546875e-05,
      "model_forward_time": 0.11498332023620605,
      "step": 11678
    },
    {
      "epoch": 7.127685546875e-05,
      "step": 11678,
      "training_step_time": 0.4852564334869385
    },
    {
      "epoch": 7.1282958984375e-05,
      "model_forward_time": 0.1143951416015625,
      "step": 11679
    },
    {
      "epoch": 7.1282958984375e-05,
      "step": 11679,
      "training_step_time": 0.3988966941833496
    },
    {
      "epoch": 7.12890625e-05,
      "grad_norm": 0.1804225891828537,
      "learning_rate": 9.438654113850118e-05,
      "loss": 0.0565,
      "step": 11680
    },
    {
      "epoch": 7.12890625e-05,
      "model_forward_time": 0.11480021476745605,
      "step": 11680
    },
    {
      "epoch": 7.12890625e-05,
      "step": 11680,
      "training_step_time": 0.4870133399963379
    },
    {
      "epoch": 7.1295166015625e-05,
      "model_forward_time": 0.1145017147064209,
      "step": 11681
    },
    {
      "epoch": 7.1295166015625e-05,
      "step": 11681,
      "training_step_time": 0.3818018436431885
    },
    {
      "epoch": 7.130126953125e-05,
      "model_forward_time": 0.11432647705078125,
      "step": 11682
    },
    {
      "epoch": 7.130126953125e-05,
      "step": 11682,
      "training_step_time": 0.40531373023986816
    },
    {
      "epoch": 7.1307373046875e-05,
      "model_forward_time": 0.11465167999267578,
      "step": 11683
    },
    {
      "epoch": 7.1307373046875e-05,
      "step": 11683,
      "training_step_time": 0.5552263259887695
    },
    {
      "epoch": 7.13134765625e-05,
      "model_forward_time": 0.11491656303405762,
      "step": 11684
    },
    {
      "epoch": 7.13134765625e-05,
      "step": 11684,
      "training_step_time": 0.3906691074371338
    },
    {
      "epoch": 7.1319580078125e-05,
      "model_forward_time": 0.11432242393493652,
      "step": 11685
    },
    {
      "epoch": 7.1319580078125e-05,
      "step": 11685,
      "training_step_time": 0.3875620365142822
    },
    {
      "epoch": 7.132568359375e-05,
      "model_forward_time": 0.11492156982421875,
      "step": 11686
    },
    {
      "epoch": 7.132568359375e-05,
      "step": 11686,
      "training_step_time": 0.415647029876709
    },
    {
      "epoch": 7.1331787109375e-05,
      "model_forward_time": 0.11491036415100098,
      "step": 11687
    },
    {
      "epoch": 7.1331787109375e-05,
      "step": 11687,
      "training_step_time": 0.43717002868652344
    },
    {
      "epoch": 7.1337890625e-05,
      "model_forward_time": 0.11519718170166016,
      "step": 11688
    },
    {
      "epoch": 7.1337890625e-05,
      "step": 11688,
      "training_step_time": 0.47623467445373535
    },
    {
      "epoch": 7.1343994140625e-05,
      "model_forward_time": 0.11641597747802734,
      "step": 11689
    },
    {
      "epoch": 7.1343994140625e-05,
      "step": 11689,
      "training_step_time": 0.6361417770385742
    },
    {
      "epoch": 7.135009765625e-05,
      "grad_norm": 0.1974046677350998,
      "learning_rate": 9.437384779514256e-05,
      "loss": 0.0614,
      "step": 11690
    },
    {
      "epoch": 7.135009765625e-05,
      "model_forward_time": 0.11404275894165039,
      "step": 11690
    },
    {
      "epoch": 7.135009765625e-05,
      "step": 11690,
      "training_step_time": 0.3855152130126953
    },
    {
      "epoch": 7.1356201171875e-05,
      "model_forward_time": 0.11411619186401367,
      "step": 11691
    },
    {
      "epoch": 7.1356201171875e-05,
      "step": 11691,
      "training_step_time": 0.3958420753479004
    },
    {
      "epoch": 7.13623046875e-05,
      "model_forward_time": 0.11545062065124512,
      "step": 11692
    },
    {
      "epoch": 7.13623046875e-05,
      "step": 11692,
      "training_step_time": 0.4438793659210205
    },
    {
      "epoch": 7.1368408203125e-05,
      "model_forward_time": 0.11543464660644531,
      "step": 11693
    },
    {
      "epoch": 7.1368408203125e-05,
      "step": 11693,
      "training_step_time": 0.4402785301208496
    },
    {
      "epoch": 7.137451171875e-05,
      "model_forward_time": 0.11469697952270508,
      "step": 11694
    },
    {
      "epoch": 7.137451171875e-05,
      "step": 11694,
      "training_step_time": 0.4314579963684082
    },
    {
      "epoch": 7.1380615234375e-05,
      "model_forward_time": 0.1153864860534668,
      "step": 11695
    },
    {
      "epoch": 7.1380615234375e-05,
      "step": 11695,
      "training_step_time": 0.5758626461029053
    },
    {
      "epoch": 7.138671875e-05,
      "model_forward_time": 0.11463618278503418,
      "step": 11696
    },
    {
      "epoch": 7.138671875e-05,
      "step": 11696,
      "training_step_time": 0.4070868492126465
    },
    {
      "epoch": 7.1392822265625e-05,
      "model_forward_time": 0.11478948593139648,
      "step": 11697
    },
    {
      "epoch": 7.1392822265625e-05,
      "step": 11697,
      "training_step_time": 0.38086462020874023
    },
    {
      "epoch": 7.139892578125e-05,
      "model_forward_time": 0.11516761779785156,
      "step": 11698
    },
    {
      "epoch": 7.139892578125e-05,
      "step": 11698,
      "training_step_time": 0.39228153228759766
    },
    {
      "epoch": 7.1405029296875e-05,
      "model_forward_time": 0.11501765251159668,
      "step": 11699
    },
    {
      "epoch": 7.1405029296875e-05,
      "step": 11699,
      "training_step_time": 0.4441859722137451
    },
    {
      "epoch": 7.14111328125e-05,
      "grad_norm": 0.23417814075946808,
      "learning_rate": 9.43611409721806e-05,
      "loss": 0.061,
      "step": 11700
    },
    {
      "epoch": 7.14111328125e-05,
      "model_forward_time": 0.11543846130371094,
      "step": 11700
    },
    {
      "epoch": 7.14111328125e-05,
      "step": 11700,
      "training_step_time": 0.4154932498931885
    },
    {
      "epoch": 7.1417236328125e-05,
      "model_forward_time": 0.1146841049194336,
      "step": 11701
    },
    {
      "epoch": 7.1417236328125e-05,
      "step": 11701,
      "training_step_time": 0.6701502799987793
    },
    {
      "epoch": 7.142333984375e-05,
      "model_forward_time": 0.11476993560791016,
      "step": 11702
    },
    {
      "epoch": 7.142333984375e-05,
      "step": 11702,
      "training_step_time": 0.41419053077697754
    },
    {
      "epoch": 7.1429443359375e-05,
      "model_forward_time": 0.11496472358703613,
      "step": 11703
    },
    {
      "epoch": 7.1429443359375e-05,
      "step": 11703,
      "training_step_time": 0.4583258628845215
    },
    {
      "epoch": 7.1435546875e-05,
      "model_forward_time": 0.11390161514282227,
      "step": 11704
    },
    {
      "epoch": 7.1435546875e-05,
      "step": 11704,
      "training_step_time": 0.38399195671081543
    },
    {
      "epoch": 7.1441650390625e-05,
      "model_forward_time": 0.11533617973327637,
      "step": 11705
    },
    {
      "epoch": 7.1441650390625e-05,
      "step": 11705,
      "training_step_time": 0.42355799674987793
    },
    {
      "epoch": 7.144775390625e-05,
      "model_forward_time": 0.11418938636779785,
      "step": 11706
    },
    {
      "epoch": 7.144775390625e-05,
      "step": 11706,
      "training_step_time": 0.4329342842102051
    },
    {
      "epoch": 7.1453857421875e-05,
      "model_forward_time": 0.11526036262512207,
      "step": 11707
    },
    {
      "epoch": 7.1453857421875e-05,
      "step": 11707,
      "training_step_time": 0.5764334201812744
    },
    {
      "epoch": 7.14599609375e-05,
      "model_forward_time": 0.11495256423950195,
      "step": 11708
    },
    {
      "epoch": 7.14599609375e-05,
      "step": 11708,
      "training_step_time": 0.3871634006500244
    },
    {
      "epoch": 7.1466064453125e-05,
      "model_forward_time": 0.11493587493896484,
      "step": 11709
    },
    {
      "epoch": 7.1466064453125e-05,
      "step": 11709,
      "training_step_time": 0.38891053199768066
    },
    {
      "epoch": 7.147216796875e-05,
      "grad_norm": 0.22918613255023956,
      "learning_rate": 9.43484206734753e-05,
      "loss": 0.0669,
      "step": 11710
    },
    {
      "epoch": 7.147216796875e-05,
      "model_forward_time": 0.11520886421203613,
      "step": 11710
    },
    {
      "epoch": 7.147216796875e-05,
      "step": 11710,
      "training_step_time": 0.39905548095703125
    },
    {
      "epoch": 7.1478271484375e-05,
      "model_forward_time": 0.11481237411499023,
      "step": 11711
    },
    {
      "epoch": 7.1478271484375e-05,
      "step": 11711,
      "training_step_time": 0.3904428482055664
    },
    {
      "epoch": 7.1484375e-05,
      "model_forward_time": 0.11574101448059082,
      "step": 11712
    },
    {
      "epoch": 7.1484375e-05,
      "step": 11712,
      "training_step_time": 0.43762898445129395
    },
    {
      "epoch": 7.1490478515625e-05,
      "model_forward_time": 0.11479496955871582,
      "step": 11713
    },
    {
      "epoch": 7.1490478515625e-05,
      "step": 11713,
      "training_step_time": 0.6247200965881348
    },
    {
      "epoch": 7.149658203125e-05,
      "model_forward_time": 0.11443591117858887,
      "step": 11714
    },
    {
      "epoch": 7.149658203125e-05,
      "step": 11714,
      "training_step_time": 0.46303272247314453
    },
    {
      "epoch": 7.1502685546875e-05,
      "model_forward_time": 0.11445379257202148,
      "step": 11715
    },
    {
      "epoch": 7.1502685546875e-05,
      "step": 11715,
      "training_step_time": 0.45038580894470215
    },
    {
      "epoch": 7.15087890625e-05,
      "model_forward_time": 0.11520886421203613,
      "step": 11716
    },
    {
      "epoch": 7.15087890625e-05,
      "step": 11716,
      "training_step_time": 0.4709744453430176
    },
    {
      "epoch": 7.1514892578125e-05,
      "model_forward_time": 0.11473655700683594,
      "step": 11717
    },
    {
      "epoch": 7.1514892578125e-05,
      "step": 11717,
      "training_step_time": 0.4737091064453125
    },
    {
      "epoch": 7.152099609375e-05,
      "model_forward_time": 0.11443448066711426,
      "step": 11718
    },
    {
      "epoch": 7.152099609375e-05,
      "step": 11718,
      "training_step_time": 0.41343045234680176
    },
    {
      "epoch": 7.1527099609375e-05,
      "model_forward_time": 0.11439824104309082,
      "step": 11719
    },
    {
      "epoch": 7.1527099609375e-05,
      "step": 11719,
      "training_step_time": 0.3961353302001953
    },
    {
      "epoch": 7.1533203125e-05,
      "grad_norm": 0.16049689054489136,
      "learning_rate": 9.433568690289075e-05,
      "loss": 0.058,
      "step": 11720
    },
    {
      "epoch": 7.1533203125e-05,
      "model_forward_time": 0.11548328399658203,
      "step": 11720
    },
    {
      "epoch": 7.1533203125e-05,
      "step": 11720,
      "training_step_time": 0.4276003837585449
    },
    {
      "epoch": 7.1539306640625e-05,
      "model_forward_time": 0.11567354202270508,
      "step": 11721
    },
    {
      "epoch": 7.1539306640625e-05,
      "step": 11721,
      "training_step_time": 0.46053171157836914
    },
    {
      "epoch": 7.154541015625e-05,
      "model_forward_time": 0.11530518531799316,
      "step": 11722
    },
    {
      "epoch": 7.154541015625e-05,
      "step": 11722,
      "training_step_time": 0.39105868339538574
    },
    {
      "epoch": 7.1551513671875e-05,
      "model_forward_time": 0.11539459228515625,
      "step": 11723
    },
    {
      "epoch": 7.1551513671875e-05,
      "step": 11723,
      "training_step_time": 0.39116334915161133
    },
    {
      "epoch": 7.15576171875e-05,
      "model_forward_time": 0.11550140380859375,
      "step": 11724
    },
    {
      "epoch": 7.15576171875e-05,
      "step": 11724,
      "training_step_time": 0.39057207107543945
    },
    {
      "epoch": 7.1563720703125e-05,
      "model_forward_time": 0.11560320854187012,
      "step": 11725
    },
    {
      "epoch": 7.1563720703125e-05,
      "step": 11725,
      "training_step_time": 0.5753343105316162
    },
    {
      "epoch": 7.156982421875e-05,
      "model_forward_time": 0.11519145965576172,
      "step": 11726
    },
    {
      "epoch": 7.156982421875e-05,
      "step": 11726,
      "training_step_time": 0.4121696949005127
    },
    {
      "epoch": 7.1575927734375e-05,
      "model_forward_time": 0.11547493934631348,
      "step": 11727
    },
    {
      "epoch": 7.1575927734375e-05,
      "step": 11727,
      "training_step_time": 0.3853893280029297
    },
    {
      "epoch": 7.158203125e-05,
      "model_forward_time": 0.1147298812866211,
      "step": 11728
    },
    {
      "epoch": 7.158203125e-05,
      "step": 11728,
      "training_step_time": 0.36716389656066895
    },
    {
      "epoch": 7.1588134765625e-05,
      "model_forward_time": 0.11615967750549316,
      "step": 11729
    },
    {
      "epoch": 7.1588134765625e-05,
      "step": 11729,
      "training_step_time": 0.44202518463134766
    },
    {
      "epoch": 7.159423828125e-05,
      "grad_norm": 0.25506657361984253,
      "learning_rate": 9.432293966429514e-05,
      "loss": 0.0579,
      "step": 11730
    },
    {
      "epoch": 7.159423828125e-05,
      "model_forward_time": 0.1153862476348877,
      "step": 11730
    },
    {
      "epoch": 7.159423828125e-05,
      "step": 11730,
      "training_step_time": 0.4796483516693115
    },
    {
      "epoch": 7.1600341796875e-05,
      "model_forward_time": 0.11575913429260254,
      "step": 11731
    },
    {
      "epoch": 7.1600341796875e-05,
      "step": 11731,
      "training_step_time": 0.5888986587524414
    },
    {
      "epoch": 7.16064453125e-05,
      "model_forward_time": 0.11535215377807617,
      "step": 11732
    },
    {
      "epoch": 7.16064453125e-05,
      "step": 11732,
      "training_step_time": 0.3866572380065918
    },
    {
      "epoch": 7.1612548828125e-05,
      "model_forward_time": 0.11604690551757812,
      "step": 11733
    },
    {
      "epoch": 7.1612548828125e-05,
      "step": 11733,
      "training_step_time": 0.4407382011413574
    },
    {
      "epoch": 7.161865234375e-05,
      "model_forward_time": 0.11484622955322266,
      "step": 11734
    },
    {
      "epoch": 7.161865234375e-05,
      "step": 11734,
      "training_step_time": 0.38452839851379395
    },
    {
      "epoch": 7.1624755859375e-05,
      "model_forward_time": 0.11452388763427734,
      "step": 11735
    },
    {
      "epoch": 7.1624755859375e-05,
      "step": 11735,
      "training_step_time": 0.44493818283081055
    },
    {
      "epoch": 7.1630859375e-05,
      "model_forward_time": 0.11489129066467285,
      "step": 11736
    },
    {
      "epoch": 7.1630859375e-05,
      "step": 11736,
      "training_step_time": 0.405850887298584
    },
    {
      "epoch": 7.1636962890625e-05,
      "model_forward_time": 0.11520075798034668,
      "step": 11737
    },
    {
      "epoch": 7.1636962890625e-05,
      "step": 11737,
      "training_step_time": 0.6332299709320068
    },
    {
      "epoch": 7.164306640625e-05,
      "model_forward_time": 0.11494040489196777,
      "step": 11738
    },
    {
      "epoch": 7.164306640625e-05,
      "step": 11738,
      "training_step_time": 0.39170098304748535
    },
    {
      "epoch": 7.1649169921875e-05,
      "model_forward_time": 0.11492633819580078,
      "step": 11739
    },
    {
      "epoch": 7.1649169921875e-05,
      "step": 11739,
      "training_step_time": 0.4205927848815918
    },
    {
      "epoch": 7.16552734375e-05,
      "grad_norm": 0.16266122460365295,
      "learning_rate": 9.431017896156074e-05,
      "loss": 0.058,
      "step": 11740
    },
    {
      "epoch": 7.16552734375e-05,
      "model_forward_time": 0.11486077308654785,
      "step": 11740
    },
    {
      "epoch": 7.16552734375e-05,
      "step": 11740,
      "training_step_time": 0.38786745071411133
    },
    {
      "epoch": 7.1661376953125e-05,
      "model_forward_time": 0.11518573760986328,
      "step": 11741
    },
    {
      "epoch": 7.1661376953125e-05,
      "step": 11741,
      "training_step_time": 0.403888463973999
    },
    {
      "epoch": 7.166748046875e-05,
      "model_forward_time": 0.11452579498291016,
      "step": 11742
    },
    {
      "epoch": 7.166748046875e-05,
      "step": 11742,
      "training_step_time": 0.4700472354888916
    },
    {
      "epoch": 7.1673583984375e-05,
      "model_forward_time": 0.11539959907531738,
      "step": 11743
    },
    {
      "epoch": 7.1673583984375e-05,
      "step": 11743,
      "training_step_time": 0.6668062210083008
    },
    {
      "epoch": 7.16796875e-05,
      "model_forward_time": 0.11460471153259277,
      "step": 11744
    },
    {
      "epoch": 7.16796875e-05,
      "step": 11744,
      "training_step_time": 0.45142650604248047
    },
    {
      "epoch": 7.1685791015625e-05,
      "model_forward_time": 0.11470460891723633,
      "step": 11745
    },
    {
      "epoch": 7.1685791015625e-05,
      "step": 11745,
      "training_step_time": 0.4858427047729492
    },
    {
      "epoch": 7.169189453125e-05,
      "model_forward_time": 0.11484384536743164,
      "step": 11746
    },
    {
      "epoch": 7.169189453125e-05,
      "step": 11746,
      "training_step_time": 0.4406285285949707
    },
    {
      "epoch": 7.1697998046875e-05,
      "model_forward_time": 0.11491060256958008,
      "step": 11747
    },
    {
      "epoch": 7.1697998046875e-05,
      "step": 11747,
      "training_step_time": 0.4556286334991455
    },
    {
      "epoch": 7.17041015625e-05,
      "model_forward_time": 0.11483550071716309,
      "step": 11748
    },
    {
      "epoch": 7.17041015625e-05,
      "step": 11748,
      "training_step_time": 0.3996548652648926
    },
    {
      "epoch": 7.1710205078125e-05,
      "model_forward_time": 0.11528325080871582,
      "step": 11749
    },
    {
      "epoch": 7.1710205078125e-05,
      "step": 11749,
      "training_step_time": 0.42829346656799316
    },
    {
      "epoch": 7.171630859375e-05,
      "grad_norm": 0.20869915187358856,
      "learning_rate": 9.42974047985639e-05,
      "loss": 0.06,
      "step": 11750
    },
    {
      "epoch": 7.171630859375e-05,
      "model_forward_time": 0.11527276039123535,
      "step": 11750
    },
    {
      "epoch": 7.171630859375e-05,
      "step": 11750,
      "training_step_time": 0.3803257942199707
    },
    {
      "epoch": 7.1722412109375e-05,
      "model_forward_time": 0.11535906791687012,
      "step": 11751
    },
    {
      "epoch": 7.1722412109375e-05,
      "step": 11751,
      "training_step_time": 0.45789289474487305
    },
    {
      "epoch": 7.1728515625e-05,
      "model_forward_time": 0.1152644157409668,
      "step": 11752
    },
    {
      "epoch": 7.1728515625e-05,
      "step": 11752,
      "training_step_time": 0.41312146186828613
    },
    {
      "epoch": 7.1734619140625e-05,
      "model_forward_time": 0.11525559425354004,
      "step": 11753
    },
    {
      "epoch": 7.1734619140625e-05,
      "step": 11753,
      "training_step_time": 0.3930368423461914
    },
    {
      "epoch": 7.174072265625e-05,
      "model_forward_time": 0.11550736427307129,
      "step": 11754
    },
    {
      "epoch": 7.174072265625e-05,
      "step": 11754,
      "training_step_time": 0.3902890682220459
    },
    {
      "epoch": 7.1746826171875e-05,
      "model_forward_time": 0.11504411697387695,
      "step": 11755
    },
    {
      "epoch": 7.1746826171875e-05,
      "step": 11755,
      "training_step_time": 0.5824649333953857
    },
    {
      "epoch": 7.17529296875e-05,
      "model_forward_time": 0.11516880989074707,
      "step": 11756
    },
    {
      "epoch": 7.17529296875e-05,
      "step": 11756,
      "training_step_time": 0.4227120876312256
    },
    {
      "epoch": 7.1759033203125e-05,
      "model_forward_time": 0.11520743370056152,
      "step": 11757
    },
    {
      "epoch": 7.1759033203125e-05,
      "step": 11757,
      "training_step_time": 0.38965368270874023
    },
    {
      "epoch": 7.176513671875e-05,
      "model_forward_time": 0.1155691146850586,
      "step": 11758
    },
    {
      "epoch": 7.176513671875e-05,
      "step": 11758,
      "training_step_time": 0.38883137702941895
    },
    {
      "epoch": 7.1771240234375e-05,
      "model_forward_time": 0.11531710624694824,
      "step": 11759
    },
    {
      "epoch": 7.1771240234375e-05,
      "step": 11759,
      "training_step_time": 0.4724133014678955
    },
    {
      "epoch": 7.177734375e-05,
      "grad_norm": 0.18101158738136292,
      "learning_rate": 9.428461717918511e-05,
      "loss": 0.06,
      "step": 11760
    },
    {
      "epoch": 7.177734375e-05,
      "model_forward_time": 0.11485433578491211,
      "step": 11760
    },
    {
      "epoch": 7.177734375e-05,
      "step": 11760,
      "training_step_time": 0.4289872646331787
    },
    {
      "epoch": 7.1783447265625e-05,
      "model_forward_time": 0.11504030227661133,
      "step": 11761
    },
    {
      "epoch": 7.1783447265625e-05,
      "step": 11761,
      "training_step_time": 0.6425280570983887
    },
    {
      "epoch": 7.178955078125e-05,
      "model_forward_time": 0.11461496353149414,
      "step": 11762
    },
    {
      "epoch": 7.178955078125e-05,
      "step": 11762,
      "training_step_time": 0.4456663131713867
    },
    {
      "epoch": 7.1795654296875e-05,
      "model_forward_time": 0.11421775817871094,
      "step": 11763
    },
    {
      "epoch": 7.1795654296875e-05,
      "step": 11763,
      "training_step_time": 0.40531301498413086
    },
    {
      "epoch": 7.18017578125e-05,
      "model_forward_time": 0.11472296714782715,
      "step": 11764
    },
    {
      "epoch": 7.18017578125e-05,
      "step": 11764,
      "training_step_time": 0.4025871753692627
    },
    {
      "epoch": 7.1807861328125e-05,
      "model_forward_time": 0.11471724510192871,
      "step": 11765
    },
    {
      "epoch": 7.1807861328125e-05,
      "step": 11765,
      "training_step_time": 0.42017197608947754
    },
    {
      "epoch": 7.181396484375e-05,
      "model_forward_time": 0.11392974853515625,
      "step": 11766
    },
    {
      "epoch": 7.181396484375e-05,
      "step": 11766,
      "training_step_time": 0.38411736488342285
    },
    {
      "epoch": 7.1820068359375e-05,
      "model_forward_time": 0.11538338661193848,
      "step": 11767
    },
    {
      "epoch": 7.1820068359375e-05,
      "step": 11767,
      "training_step_time": 0.5956802368164062
    },
    {
      "epoch": 7.1826171875e-05,
      "model_forward_time": 0.11515116691589355,
      "step": 11768
    },
    {
      "epoch": 7.1826171875e-05,
      "step": 11768,
      "training_step_time": 0.393845796585083
    },
    {
      "epoch": 7.1832275390625e-05,
      "model_forward_time": 0.11592650413513184,
      "step": 11769
    },
    {
      "epoch": 7.1832275390625e-05,
      "step": 11769,
      "training_step_time": 0.4917562007904053
    },
    {
      "epoch": 7.183837890625e-05,
      "grad_norm": 0.18500877916812897,
      "learning_rate": 9.427181610730888e-05,
      "loss": 0.0545,
      "step": 11770
    },
    {
      "epoch": 7.183837890625e-05,
      "model_forward_time": 0.11523199081420898,
      "step": 11770
    },
    {
      "epoch": 7.183837890625e-05,
      "step": 11770,
      "training_step_time": 0.44744133949279785
    },
    {
      "epoch": 7.1844482421875e-05,
      "model_forward_time": 0.11564087867736816,
      "step": 11771
    },
    {
      "epoch": 7.1844482421875e-05,
      "step": 11771,
      "training_step_time": 0.5002849102020264
    },
    {
      "epoch": 7.18505859375e-05,
      "model_forward_time": 0.11482644081115723,
      "step": 11772
    },
    {
      "epoch": 7.18505859375e-05,
      "step": 11772,
      "training_step_time": 0.43277716636657715
    },
    {
      "epoch": 7.1856689453125e-05,
      "model_forward_time": 0.11555194854736328,
      "step": 11773
    },
    {
      "epoch": 7.1856689453125e-05,
      "step": 11773,
      "training_step_time": 0.41168928146362305
    },
    {
      "epoch": 7.186279296875e-05,
      "model_forward_time": 0.11526918411254883,
      "step": 11774
    },
    {
      "epoch": 7.186279296875e-05,
      "step": 11774,
      "training_step_time": 0.3865036964416504
    },
    {
      "epoch": 7.1868896484375e-05,
      "model_forward_time": 0.1145029067993164,
      "step": 11775
    },
    {
      "epoch": 7.1868896484375e-05,
      "step": 11775,
      "training_step_time": 0.40216565132141113
    },
    {
      "epoch": 7.1875e-05,
      "model_forward_time": 0.1156930923461914,
      "step": 11776
    },
    {
      "epoch": 7.1875e-05,
      "step": 11776,
      "training_step_time": 0.4028620719909668
    },
    {
      "epoch": 7.1881103515625e-05,
      "model_forward_time": 0.11488676071166992,
      "step": 11777
    },
    {
      "epoch": 7.1881103515625e-05,
      "step": 11777,
      "training_step_time": 0.4209630489349365
    },
    {
      "epoch": 7.188720703125e-05,
      "model_forward_time": 0.11512517929077148,
      "step": 11778
    },
    {
      "epoch": 7.188720703125e-05,
      "step": 11778,
      "training_step_time": 0.39442873001098633
    },
    {
      "epoch": 7.1893310546875e-05,
      "model_forward_time": 0.11534237861633301,
      "step": 11779
    },
    {
      "epoch": 7.1893310546875e-05,
      "step": 11779,
      "training_step_time": 0.7782137393951416
    },
    {
      "epoch": 7.18994140625e-05,
      "grad_norm": 0.15797504782676697,
      "learning_rate": 9.425900158682385e-05,
      "loss": 0.0565,
      "step": 11780
    },
    {
      "epoch": 7.18994140625e-05,
      "model_forward_time": 0.11448144912719727,
      "step": 11780
    },
    {
      "epoch": 7.18994140625e-05,
      "step": 11780,
      "training_step_time": 0.38430237770080566
    },
    {
      "epoch": 7.1905517578125e-05,
      "model_forward_time": 0.11468696594238281,
      "step": 11781
    },
    {
      "epoch": 7.1905517578125e-05,
      "step": 11781,
      "training_step_time": 0.3876457214355469
    },
    {
      "epoch": 7.191162109375e-05,
      "model_forward_time": 0.1145787239074707,
      "step": 11782
    },
    {
      "epoch": 7.191162109375e-05,
      "step": 11782,
      "training_step_time": 0.39868617057800293
    },
    {
      "epoch": 7.1917724609375e-05,
      "model_forward_time": 0.11498808860778809,
      "step": 11783
    },
    {
      "epoch": 7.1917724609375e-05,
      "step": 11783,
      "training_step_time": 0.425076961517334
    },
    {
      "epoch": 7.1923828125e-05,
      "model_forward_time": 0.11442947387695312,
      "step": 11784
    },
    {
      "epoch": 7.1923828125e-05,
      "step": 11784,
      "training_step_time": 0.44703030586242676
    },
    {
      "epoch": 7.1929931640625e-05,
      "model_forward_time": 0.1156303882598877,
      "step": 11785
    },
    {
      "epoch": 7.1929931640625e-05,
      "step": 11785,
      "training_step_time": 0.5411458015441895
    },
    {
      "epoch": 7.193603515625e-05,
      "model_forward_time": 0.11468863487243652,
      "step": 11786
    },
    {
      "epoch": 7.193603515625e-05,
      "step": 11786,
      "training_step_time": 0.4826998710632324
    },
    {
      "epoch": 7.1942138671875e-05,
      "model_forward_time": 0.11469483375549316,
      "step": 11787
    },
    {
      "epoch": 7.1942138671875e-05,
      "step": 11787,
      "training_step_time": 0.4283108711242676
    },
    {
      "epoch": 7.19482421875e-05,
      "model_forward_time": 0.11457514762878418,
      "step": 11788
    },
    {
      "epoch": 7.19482421875e-05,
      "step": 11788,
      "training_step_time": 0.3929896354675293
    },
    {
      "epoch": 7.1954345703125e-05,
      "model_forward_time": 0.1157219409942627,
      "step": 11789
    },
    {
      "epoch": 7.1954345703125e-05,
      "step": 11789,
      "training_step_time": 0.45197486877441406
    },
    {
      "epoch": 7.196044921875e-05,
      "grad_norm": 0.18934504687786102,
      "learning_rate": 9.424617362162271e-05,
      "loss": 0.0588,
      "step": 11790
    },
    {
      "epoch": 7.196044921875e-05,
      "model_forward_time": 0.11503934860229492,
      "step": 11790
    },
    {
      "epoch": 7.196044921875e-05,
      "step": 11790,
      "training_step_time": 0.42507433891296387
    },
    {
      "epoch": 7.1966552734375e-05,
      "model_forward_time": 0.11562705039978027,
      "step": 11791
    },
    {
      "epoch": 7.1966552734375e-05,
      "step": 11791,
      "training_step_time": 0.47133302688598633
    },
    {
      "epoch": 7.197265625e-05,
      "model_forward_time": 0.1147613525390625,
      "step": 11792
    },
    {
      "epoch": 7.197265625e-05,
      "step": 11792,
      "training_step_time": 0.3951547145843506
    },
    {
      "epoch": 7.1978759765625e-05,
      "model_forward_time": 0.11491894721984863,
      "step": 11793
    },
    {
      "epoch": 7.1978759765625e-05,
      "step": 11793,
      "training_step_time": 0.39830493927001953
    },
    {
      "epoch": 7.198486328125e-05,
      "model_forward_time": 0.11564278602600098,
      "step": 11794
    },
    {
      "epoch": 7.198486328125e-05,
      "step": 11794,
      "training_step_time": 0.3940591812133789
    },
    {
      "epoch": 7.1990966796875e-05,
      "model_forward_time": 0.11469721794128418,
      "step": 11795
    },
    {
      "epoch": 7.1990966796875e-05,
      "step": 11795,
      "training_step_time": 0.39392638206481934
    },
    {
      "epoch": 7.19970703125e-05,
      "model_forward_time": 0.11542630195617676,
      "step": 11796
    },
    {
      "epoch": 7.19970703125e-05,
      "step": 11796,
      "training_step_time": 0.397310733795166
    },
    {
      "epoch": 7.2003173828125e-05,
      "model_forward_time": 0.11521553993225098,
      "step": 11797
    },
    {
      "epoch": 7.2003173828125e-05,
      "step": 11797,
      "training_step_time": 0.7940137386322021
    },
    {
      "epoch": 7.200927734375e-05,
      "model_forward_time": 0.11476278305053711,
      "step": 11798
    },
    {
      "epoch": 7.200927734375e-05,
      "step": 11798,
      "training_step_time": 0.38521575927734375
    },
    {
      "epoch": 7.2015380859375e-05,
      "model_forward_time": 0.11485433578491211,
      "step": 11799
    },
    {
      "epoch": 7.2015380859375e-05,
      "step": 11799,
      "training_step_time": 0.40444111824035645
    },
    {
      "epoch": 7.2021484375e-05,
      "grad_norm": 0.1918867528438568,
      "learning_rate": 9.42333322156023e-05,
      "loss": 0.0579,
      "step": 11800
    },
    {
      "epoch": 7.2021484375e-05,
      "model_forward_time": 0.11406993865966797,
      "step": 11800
    },
    {
      "epoch": 7.2021484375e-05,
      "step": 11800,
      "training_step_time": 0.42623353004455566
    },
    {
      "epoch": 7.2027587890625e-05,
      "model_forward_time": 0.11480355262756348,
      "step": 11801
    },
    {
      "epoch": 7.2027587890625e-05,
      "step": 11801,
      "training_step_time": 0.4192662239074707
    },
    {
      "epoch": 7.203369140625e-05,
      "model_forward_time": 0.11457514762878418,
      "step": 11802
    },
    {
      "epoch": 7.203369140625e-05,
      "step": 11802,
      "training_step_time": 0.42122364044189453
    },
    {
      "epoch": 7.2039794921875e-05,
      "model_forward_time": 0.1156618595123291,
      "step": 11803
    },
    {
      "epoch": 7.2039794921875e-05,
      "step": 11803,
      "training_step_time": 0.6524097919464111
    },
    {
      "epoch": 7.20458984375e-05,
      "model_forward_time": 0.11464238166809082,
      "step": 11804
    },
    {
      "epoch": 7.20458984375e-05,
      "step": 11804,
      "training_step_time": 0.42928099632263184
    },
    {
      "epoch": 7.2052001953125e-05,
      "model_forward_time": 0.11460232734680176,
      "step": 11805
    },
    {
      "epoch": 7.2052001953125e-05,
      "step": 11805,
      "training_step_time": 0.4007711410522461
    },
    {
      "epoch": 7.205810546875e-05,
      "model_forward_time": 0.11464643478393555,
      "step": 11806
    },
    {
      "epoch": 7.205810546875e-05,
      "step": 11806,
      "training_step_time": 0.3937366008758545
    },
    {
      "epoch": 7.2064208984375e-05,
      "model_forward_time": 0.11471891403198242,
      "step": 11807
    },
    {
      "epoch": 7.2064208984375e-05,
      "step": 11807,
      "training_step_time": 0.3910956382751465
    },
    {
      "epoch": 7.20703125e-05,
      "model_forward_time": 0.11417436599731445,
      "step": 11808
    },
    {
      "epoch": 7.20703125e-05,
      "step": 11808,
      "training_step_time": 0.4176023006439209
    },
    {
      "epoch": 7.2076416015625e-05,
      "model_forward_time": 0.11488175392150879,
      "step": 11809
    },
    {
      "epoch": 7.2076416015625e-05,
      "step": 11809,
      "training_step_time": 0.7588083744049072
    },
    {
      "epoch": 7.208251953125e-05,
      "grad_norm": 0.2095392495393753,
      "learning_rate": 9.422047737266347e-05,
      "loss": 0.0566,
      "step": 11810
    },
    {
      "epoch": 7.208251953125e-05,
      "model_forward_time": 0.11506223678588867,
      "step": 11810
    },
    {
      "epoch": 7.208251953125e-05,
      "step": 11810,
      "training_step_time": 0.4293637275695801
    },
    {
      "epoch": 7.2088623046875e-05,
      "model_forward_time": 0.11494898796081543,
      "step": 11811
    },
    {
      "epoch": 7.2088623046875e-05,
      "step": 11811,
      "training_step_time": 0.4348762035369873
    },
    {
      "epoch": 7.20947265625e-05,
      "model_forward_time": 0.11468243598937988,
      "step": 11812
    },
    {
      "epoch": 7.20947265625e-05,
      "step": 11812,
      "training_step_time": 0.47391343116760254
    },
    {
      "epoch": 7.2100830078125e-05,
      "model_forward_time": 0.11440873146057129,
      "step": 11813
    },
    {
      "epoch": 7.2100830078125e-05,
      "step": 11813,
      "training_step_time": 0.38703250885009766
    },
    {
      "epoch": 7.210693359375e-05,
      "model_forward_time": 0.11409664154052734,
      "step": 11814
    },
    {
      "epoch": 7.210693359375e-05,
      "step": 11814,
      "training_step_time": 0.43965816497802734
    },
    {
      "epoch": 7.2113037109375e-05,
      "model_forward_time": 0.11442899703979492,
      "step": 11815
    },
    {
      "epoch": 7.2113037109375e-05,
      "step": 11815,
      "training_step_time": 0.53666090965271
    },
    {
      "epoch": 7.2119140625e-05,
      "model_forward_time": 0.11452221870422363,
      "step": 11816
    },
    {
      "epoch": 7.2119140625e-05,
      "step": 11816,
      "training_step_time": 0.4707224369049072
    },
    {
      "epoch": 7.2125244140625e-05,
      "model_forward_time": 0.11499238014221191,
      "step": 11817
    },
    {
      "epoch": 7.2125244140625e-05,
      "step": 11817,
      "training_step_time": 0.39824938774108887
    },
    {
      "epoch": 7.213134765625e-05,
      "model_forward_time": 0.1150364875793457,
      "step": 11818
    },
    {
      "epoch": 7.213134765625e-05,
      "step": 11818,
      "training_step_time": 0.38482022285461426
    },
    {
      "epoch": 7.2137451171875e-05,
      "model_forward_time": 0.11451101303100586,
      "step": 11819
    },
    {
      "epoch": 7.2137451171875e-05,
      "step": 11819,
      "training_step_time": 0.38461923599243164
    },
    {
      "epoch": 7.21435546875e-05,
      "grad_norm": 0.19799768924713135,
      "learning_rate": 9.420760909671118e-05,
      "loss": 0.0612,
      "step": 11820
    },
    {
      "epoch": 7.21435546875e-05,
      "model_forward_time": 0.11425352096557617,
      "step": 11820
    },
    {
      "epoch": 7.21435546875e-05,
      "step": 11820,
      "training_step_time": 0.39166927337646484
    },
    {
      "epoch": 7.2149658203125e-05,
      "model_forward_time": 0.11465811729431152,
      "step": 11821
    },
    {
      "epoch": 7.2149658203125e-05,
      "step": 11821,
      "training_step_time": 0.5507400035858154
    },
    {
      "epoch": 7.215576171875e-05,
      "model_forward_time": 0.11500263214111328,
      "step": 11822
    },
    {
      "epoch": 7.215576171875e-05,
      "step": 11822,
      "training_step_time": 0.38034605979919434
    },
    {
      "epoch": 7.2161865234375e-05,
      "model_forward_time": 0.11503148078918457,
      "step": 11823
    },
    {
      "epoch": 7.2161865234375e-05,
      "step": 11823,
      "training_step_time": 0.3867642879486084
    },
    {
      "epoch": 7.216796875e-05,
      "model_forward_time": 0.11500740051269531,
      "step": 11824
    },
    {
      "epoch": 7.216796875e-05,
      "step": 11824,
      "training_step_time": 0.40183353424072266
    },
    {
      "epoch": 7.2174072265625e-05,
      "model_forward_time": 0.11542749404907227,
      "step": 11825
    },
    {
      "epoch": 7.2174072265625e-05,
      "step": 11825,
      "training_step_time": 0.39356327056884766
    },
    {
      "epoch": 7.218017578125e-05,
      "model_forward_time": 0.11505818367004395,
      "step": 11826
    },
    {
      "epoch": 7.218017578125e-05,
      "step": 11826,
      "training_step_time": 0.4696483612060547
    },
    {
      "epoch": 7.2186279296875e-05,
      "model_forward_time": 0.11546611785888672,
      "step": 11827
    },
    {
      "epoch": 7.2186279296875e-05,
      "step": 11827,
      "training_step_time": 0.7112290859222412
    },
    {
      "epoch": 7.21923828125e-05,
      "model_forward_time": 0.11479926109313965,
      "step": 11828
    },
    {
      "epoch": 7.21923828125e-05,
      "step": 11828,
      "training_step_time": 0.435774564743042
    },
    {
      "epoch": 7.2198486328125e-05,
      "model_forward_time": 0.11529850959777832,
      "step": 11829
    },
    {
      "epoch": 7.2198486328125e-05,
      "step": 11829,
      "training_step_time": 0.48252224922180176
    },
    {
      "epoch": 7.220458984375e-05,
      "grad_norm": 0.19351333379745483,
      "learning_rate": 9.419472739165449e-05,
      "loss": 0.0622,
      "step": 11830
    },
    {
      "epoch": 7.220458984375e-05,
      "model_forward_time": 0.11446022987365723,
      "step": 11830
    },
    {
      "epoch": 7.220458984375e-05,
      "step": 11830,
      "training_step_time": 0.3893435001373291
    },
    {
      "epoch": 7.2210693359375e-05,
      "model_forward_time": 0.1144099235534668,
      "step": 11831
    },
    {
      "epoch": 7.2210693359375e-05,
      "step": 11831,
      "training_step_time": 0.47245287895202637
    },
    {
      "epoch": 7.2216796875e-05,
      "model_forward_time": 0.11460518836975098,
      "step": 11832
    },
    {
      "epoch": 7.2216796875e-05,
      "step": 11832,
      "training_step_time": 0.38732171058654785
    },
    {
      "epoch": 7.2222900390625e-05,
      "model_forward_time": 0.11521291732788086,
      "step": 11833
    },
    {
      "epoch": 7.2222900390625e-05,
      "step": 11833,
      "training_step_time": 0.467923641204834
    },
    {
      "epoch": 7.222900390625e-05,
      "model_forward_time": 0.1152944564819336,
      "step": 11834
    },
    {
      "epoch": 7.222900390625e-05,
      "step": 11834,
      "training_step_time": 0.37909555435180664
    },
    {
      "epoch": 7.2235107421875e-05,
      "model_forward_time": 0.11536860466003418,
      "step": 11835
    },
    {
      "epoch": 7.2235107421875e-05,
      "step": 11835,
      "training_step_time": 0.38437843322753906
    },
    {
      "epoch": 7.22412109375e-05,
      "model_forward_time": 0.11566925048828125,
      "step": 11836
    },
    {
      "epoch": 7.22412109375e-05,
      "step": 11836,
      "training_step_time": 0.3852384090423584
    },
    {
      "epoch": 7.2247314453125e-05,
      "model_forward_time": 0.11549615859985352,
      "step": 11837
    },
    {
      "epoch": 7.2247314453125e-05,
      "step": 11837,
      "training_step_time": 0.3860917091369629
    },
    {
      "epoch": 7.225341796875e-05,
      "model_forward_time": 0.11517882347106934,
      "step": 11838
    },
    {
      "epoch": 7.225341796875e-05,
      "step": 11838,
      "training_step_time": 0.40462183952331543
    },
    {
      "epoch": 7.2259521484375e-05,
      "model_forward_time": 0.11599493026733398,
      "step": 11839
    },
    {
      "epoch": 7.2259521484375e-05,
      "step": 11839,
      "training_step_time": 0.733292818069458
    },
    {
      "epoch": 7.2265625e-05,
      "grad_norm": 0.17191873490810394,
      "learning_rate": 9.41818322614065e-05,
      "loss": 0.0561,
      "step": 11840
    },
    {
      "epoch": 7.2265625e-05,
      "model_forward_time": 0.114654541015625,
      "step": 11840
    },
    {
      "epoch": 7.2265625e-05,
      "step": 11840,
      "training_step_time": 0.4059138298034668
    },
    {
      "epoch": 7.2271728515625e-05,
      "model_forward_time": 0.11468124389648438,
      "step": 11841
    },
    {
      "epoch": 7.2271728515625e-05,
      "step": 11841,
      "training_step_time": 0.42154765129089355
    },
    {
      "epoch": 7.227783203125e-05,
      "model_forward_time": 0.11491799354553223,
      "step": 11842
    },
    {
      "epoch": 7.227783203125e-05,
      "step": 11842,
      "training_step_time": 0.3953688144683838
    },
    {
      "epoch": 7.2283935546875e-05,
      "model_forward_time": 0.11463618278503418,
      "step": 11843
    },
    {
      "epoch": 7.2283935546875e-05,
      "step": 11843,
      "training_step_time": 0.5183854103088379
    },
    {
      "epoch": 7.22900390625e-05,
      "model_forward_time": 0.11411452293395996,
      "step": 11844
    },
    {
      "epoch": 7.22900390625e-05,
      "step": 11844,
      "training_step_time": 0.3920004367828369
    },
    {
      "epoch": 7.2296142578125e-05,
      "model_forward_time": 0.11461758613586426,
      "step": 11845
    },
    {
      "epoch": 7.2296142578125e-05,
      "step": 11845,
      "training_step_time": 0.491039514541626
    },
    {
      "epoch": 7.230224609375e-05,
      "model_forward_time": 0.11447763442993164,
      "step": 11846
    },
    {
      "epoch": 7.230224609375e-05,
      "step": 11846,
      "training_step_time": 0.3918721675872803
    },
    {
      "epoch": 7.2308349609375e-05,
      "model_forward_time": 0.11456465721130371,
      "step": 11847
    },
    {
      "epoch": 7.2308349609375e-05,
      "step": 11847,
      "training_step_time": 0.39252662658691406
    },
    {
      "epoch": 7.2314453125e-05,
      "model_forward_time": 0.1153264045715332,
      "step": 11848
    },
    {
      "epoch": 7.2314453125e-05,
      "step": 11848,
      "training_step_time": 0.38713622093200684
    },
    {
      "epoch": 7.2320556640625e-05,
      "model_forward_time": 0.11470484733581543,
      "step": 11849
    },
    {
      "epoch": 7.2320556640625e-05,
      "step": 11849,
      "training_step_time": 0.4013943672180176
    },
    {
      "epoch": 7.232666015625e-05,
      "grad_norm": 0.15562771260738373,
      "learning_rate": 9.416892370988444e-05,
      "loss": 0.057,
      "step": 11850
    },
    {
      "epoch": 7.232666015625e-05,
      "model_forward_time": 0.11501884460449219,
      "step": 11850
    },
    {
      "epoch": 7.232666015625e-05,
      "step": 11850,
      "training_step_time": 0.39800262451171875
    },
    {
      "epoch": 7.2332763671875e-05,
      "model_forward_time": 0.11453771591186523,
      "step": 11851
    },
    {
      "epoch": 7.2332763671875e-05,
      "step": 11851,
      "training_step_time": 0.706803560256958
    },
    {
      "epoch": 7.23388671875e-05,
      "model_forward_time": 0.11466240882873535,
      "step": 11852
    },
    {
      "epoch": 7.23388671875e-05,
      "step": 11852,
      "training_step_time": 0.412045955657959
    },
    {
      "epoch": 7.2344970703125e-05,
      "model_forward_time": 0.11420345306396484,
      "step": 11853
    },
    {
      "epoch": 7.2344970703125e-05,
      "step": 11853,
      "training_step_time": 0.4053020477294922
    },
    {
      "epoch": 7.235107421875e-05,
      "model_forward_time": 0.11443710327148438,
      "step": 11854
    },
    {
      "epoch": 7.235107421875e-05,
      "step": 11854,
      "training_step_time": 0.4173111915588379
    },
    {
      "epoch": 7.2357177734375e-05,
      "model_forward_time": 0.11410808563232422,
      "step": 11855
    },
    {
      "epoch": 7.2357177734375e-05,
      "step": 11855,
      "training_step_time": 0.46426916122436523
    },
    {
      "epoch": 7.236328125e-05,
      "model_forward_time": 0.1144554615020752,
      "step": 11856
    },
    {
      "epoch": 7.236328125e-05,
      "step": 11856,
      "training_step_time": 0.418839693069458
    },
    {
      "epoch": 7.2369384765625e-05,
      "model_forward_time": 0.11537456512451172,
      "step": 11857
    },
    {
      "epoch": 7.2369384765625e-05,
      "step": 11857,
      "training_step_time": 0.6285593509674072
    },
    {
      "epoch": 7.237548828125e-05,
      "model_forward_time": 0.11429691314697266,
      "step": 11858
    },
    {
      "epoch": 7.237548828125e-05,
      "step": 11858,
      "training_step_time": 0.39941906929016113
    },
    {
      "epoch": 7.2381591796875e-05,
      "model_forward_time": 0.11526775360107422,
      "step": 11859
    },
    {
      "epoch": 7.2381591796875e-05,
      "step": 11859,
      "training_step_time": 0.4063737392425537
    },
    {
      "epoch": 7.23876953125e-05,
      "grad_norm": 0.15503709018230438,
      "learning_rate": 9.415600174100956e-05,
      "loss": 0.0658,
      "step": 11860
    },
    {
      "epoch": 7.23876953125e-05,
      "model_forward_time": 0.1144256591796875,
      "step": 11860
    },
    {
      "epoch": 7.23876953125e-05,
      "step": 11860,
      "training_step_time": 0.3960902690887451
    },
    {
      "epoch": 7.2393798828125e-05,
      "model_forward_time": 0.11517620086669922,
      "step": 11861
    },
    {
      "epoch": 7.2393798828125e-05,
      "step": 11861,
      "training_step_time": 0.3888983726501465
    },
    {
      "epoch": 7.239990234375e-05,
      "model_forward_time": 0.11501336097717285,
      "step": 11862
    },
    {
      "epoch": 7.239990234375e-05,
      "step": 11862,
      "training_step_time": 0.404555082321167
    },
    {
      "epoch": 7.2406005859375e-05,
      "model_forward_time": 0.11506414413452148,
      "step": 11863
    },
    {
      "epoch": 7.2406005859375e-05,
      "step": 11863,
      "training_step_time": 0.6692709922790527
    },
    {
      "epoch": 7.2412109375e-05,
      "model_forward_time": 0.11428117752075195,
      "step": 11864
    },
    {
      "epoch": 7.2412109375e-05,
      "step": 11864,
      "training_step_time": 0.38648343086242676
    },
    {
      "epoch": 7.2418212890625e-05,
      "model_forward_time": 0.11438751220703125,
      "step": 11865
    },
    {
      "epoch": 7.2418212890625e-05,
      "step": 11865,
      "training_step_time": 0.36571693420410156
    },
    {
      "epoch": 7.242431640625e-05,
      "model_forward_time": 0.11472582817077637,
      "step": 11866
    },
    {
      "epoch": 7.242431640625e-05,
      "step": 11866,
      "training_step_time": 0.43773531913757324
    },
    {
      "epoch": 7.2430419921875e-05,
      "model_forward_time": 0.11494851112365723,
      "step": 11867
    },
    {
      "epoch": 7.2430419921875e-05,
      "step": 11867,
      "training_step_time": 0.47103261947631836
    },
    {
      "epoch": 7.24365234375e-05,
      "model_forward_time": 0.11460542678833008,
      "step": 11868
    },
    {
      "epoch": 7.24365234375e-05,
      "step": 11868,
      "training_step_time": 0.42963457107543945
    },
    {
      "epoch": 7.2442626953125e-05,
      "model_forward_time": 0.11469578742980957,
      "step": 11869
    },
    {
      "epoch": 7.2442626953125e-05,
      "step": 11869,
      "training_step_time": 0.47560596466064453
    },
    {
      "epoch": 7.244873046875e-05,
      "grad_norm": 0.15466588735580444,
      "learning_rate": 9.414306635870722e-05,
      "loss": 0.0629,
      "step": 11870
    },
    {
      "epoch": 7.244873046875e-05,
      "model_forward_time": 0.11509370803833008,
      "step": 11870
    },
    {
      "epoch": 7.244873046875e-05,
      "step": 11870,
      "training_step_time": 0.4015491008758545
    },
    {
      "epoch": 7.2454833984375e-05,
      "model_forward_time": 0.11420488357543945,
      "step": 11871
    },
    {
      "epoch": 7.2454833984375e-05,
      "step": 11871,
      "training_step_time": 0.39153122901916504
    },
    {
      "epoch": 7.24609375e-05,
      "model_forward_time": 0.11511993408203125,
      "step": 11872
    },
    {
      "epoch": 7.24609375e-05,
      "step": 11872,
      "training_step_time": 0.39252686500549316
    },
    {
      "epoch": 7.2467041015625e-05,
      "model_forward_time": 0.11502480506896973,
      "step": 11873
    },
    {
      "epoch": 7.2467041015625e-05,
      "step": 11873,
      "training_step_time": 0.420166015625
    },
    {
      "epoch": 7.247314453125e-05,
      "model_forward_time": 0.11474990844726562,
      "step": 11874
    },
    {
      "epoch": 7.247314453125e-05,
      "step": 11874,
      "training_step_time": 0.38960719108581543
    },
    {
      "epoch": 7.2479248046875e-05,
      "model_forward_time": 0.11515927314758301,
      "step": 11875
    },
    {
      "epoch": 7.2479248046875e-05,
      "step": 11875,
      "training_step_time": 0.6827630996704102
    },
    {
      "epoch": 7.24853515625e-05,
      "model_forward_time": 0.11509895324707031,
      "step": 11876
    },
    {
      "epoch": 7.24853515625e-05,
      "step": 11876,
      "training_step_time": 0.38368964195251465
    },
    {
      "epoch": 7.2491455078125e-05,
      "model_forward_time": 0.11499238014221191,
      "step": 11877
    },
    {
      "epoch": 7.2491455078125e-05,
      "step": 11877,
      "training_step_time": 0.40155529975891113
    },
    {
      "epoch": 7.249755859375e-05,
      "model_forward_time": 0.11497855186462402,
      "step": 11878
    },
    {
      "epoch": 7.249755859375e-05,
      "step": 11878,
      "training_step_time": 0.39020538330078125
    },
    {
      "epoch": 7.2503662109375e-05,
      "model_forward_time": 0.11442732810974121,
      "step": 11879
    },
    {
      "epoch": 7.2503662109375e-05,
      "step": 11879,
      "training_step_time": 0.3628253936767578
    },
    {
      "epoch": 7.2509765625e-05,
      "grad_norm": 0.281992644071579,
      "learning_rate": 9.413011756690685e-05,
      "loss": 0.057,
      "step": 11880
    },
    {
      "epoch": 7.2509765625e-05,
      "model_forward_time": 0.11421322822570801,
      "step": 11880
    },
    {
      "epoch": 7.2509765625e-05,
      "step": 11880,
      "training_step_time": 0.43619751930236816
    },
    {
      "epoch": 7.2515869140625e-05,
      "model_forward_time": 0.11480021476745605,
      "step": 11881
    },
    {
      "epoch": 7.2515869140625e-05,
      "step": 11881,
      "training_step_time": 0.6087436676025391
    },
    {
      "epoch": 7.252197265625e-05,
      "model_forward_time": 0.11482524871826172,
      "step": 11882
    },
    {
      "epoch": 7.252197265625e-05,
      "step": 11882,
      "training_step_time": 0.3860628604888916
    },
    {
      "epoch": 7.2528076171875e-05,
      "model_forward_time": 0.11416888236999512,
      "step": 11883
    },
    {
      "epoch": 7.2528076171875e-05,
      "step": 11883,
      "training_step_time": 0.4037632942199707
    },
    {
      "epoch": 7.25341796875e-05,
      "model_forward_time": 0.11382889747619629,
      "step": 11884
    },
    {
      "epoch": 7.25341796875e-05,
      "step": 11884,
      "training_step_time": 0.38912153244018555
    },
    {
      "epoch": 7.2540283203125e-05,
      "model_forward_time": 0.11485934257507324,
      "step": 11885
    },
    {
      "epoch": 7.2540283203125e-05,
      "step": 11885,
      "training_step_time": 0.4308192729949951
    },
    {
      "epoch": 7.254638671875e-05,
      "model_forward_time": 0.1144249439239502,
      "step": 11886
    },
    {
      "epoch": 7.254638671875e-05,
      "step": 11886,
      "training_step_time": 0.39343690872192383
    },
    {
      "epoch": 7.2552490234375e-05,
      "model_forward_time": 0.11450362205505371,
      "step": 11887
    },
    {
      "epoch": 7.2552490234375e-05,
      "step": 11887,
      "training_step_time": 0.7131996154785156
    },
    {
      "epoch": 7.255859375e-05,
      "model_forward_time": 0.1147303581237793,
      "step": 11888
    },
    {
      "epoch": 7.255859375e-05,
      "step": 11888,
      "training_step_time": 0.39505934715270996
    },
    {
      "epoch": 7.2564697265625e-05,
      "model_forward_time": 0.1150672435760498,
      "step": 11889
    },
    {
      "epoch": 7.2564697265625e-05,
      "step": 11889,
      "training_step_time": 0.39143848419189453
    },
    {
      "epoch": 7.257080078125e-05,
      "grad_norm": 0.17679043114185333,
      "learning_rate": 9.411715536954196e-05,
      "loss": 0.0591,
      "step": 11890
    },
    {
      "epoch": 7.257080078125e-05,
      "model_forward_time": 0.11521577835083008,
      "step": 11890
    },
    {
      "epoch": 7.257080078125e-05,
      "step": 11890,
      "training_step_time": 0.39745020866394043
    },
    {
      "epoch": 7.2576904296875e-05,
      "model_forward_time": 0.11591148376464844,
      "step": 11891
    },
    {
      "epoch": 7.2576904296875e-05,
      "step": 11891,
      "training_step_time": 0.38673877716064453
    },
    {
      "epoch": 7.25830078125e-05,
      "model_forward_time": 0.11660170555114746,
      "step": 11892
    },
    {
      "epoch": 7.25830078125e-05,
      "step": 11892,
      "training_step_time": 0.400285005569458
    },
    {
      "epoch": 7.2589111328125e-05,
      "model_forward_time": 0.11506795883178711,
      "step": 11893
    },
    {
      "epoch": 7.2589111328125e-05,
      "step": 11893,
      "training_step_time": 0.7628486156463623
    },
    {
      "epoch": 7.259521484375e-05,
      "model_forward_time": 0.11440038681030273,
      "step": 11894
    },
    {
      "epoch": 7.259521484375e-05,
      "step": 11894,
      "training_step_time": 0.4405372142791748
    },
    {
      "epoch": 7.2601318359375e-05,
      "model_forward_time": 0.11438512802124023,
      "step": 11895
    },
    {
      "epoch": 7.2601318359375e-05,
      "step": 11895,
      "training_step_time": 0.4150972366333008
    },
    {
      "epoch": 7.2607421875e-05,
      "model_forward_time": 0.11471319198608398,
      "step": 11896
    },
    {
      "epoch": 7.2607421875e-05,
      "step": 11896,
      "training_step_time": 0.4242103099822998
    },
    {
      "epoch": 7.2613525390625e-05,
      "model_forward_time": 0.11362051963806152,
      "step": 11897
    },
    {
      "epoch": 7.2613525390625e-05,
      "step": 11897,
      "training_step_time": 0.38299059867858887
    },
    {
      "epoch": 7.261962890625e-05,
      "model_forward_time": 0.11490297317504883,
      "step": 11898
    },
    {
      "epoch": 7.261962890625e-05,
      "step": 11898,
      "training_step_time": 0.3840627670288086
    },
    {
      "epoch": 7.2625732421875e-05,
      "model_forward_time": 0.11587977409362793,
      "step": 11899
    },
    {
      "epoch": 7.2625732421875e-05,
      "step": 11899,
      "training_step_time": 0.4690361022949219
    },
    {
      "epoch": 7.26318359375e-05,
      "grad_norm": 0.20401492714881897,
      "learning_rate": 9.410417977055011e-05,
      "loss": 0.0608,
      "step": 11900
    },
    {
      "epoch": 7.26318359375e-05,
      "model_forward_time": 0.11567521095275879,
      "step": 11900
    },
    {
      "epoch": 7.26318359375e-05,
      "step": 11900,
      "training_step_time": 0.431429386138916
    },
    {
      "epoch": 7.2637939453125e-05,
      "model_forward_time": 0.1150503158569336,
      "step": 11901
    },
    {
      "epoch": 7.2637939453125e-05,
      "step": 11901,
      "training_step_time": 0.39386606216430664
    },
    {
      "epoch": 7.264404296875e-05,
      "model_forward_time": 0.11556720733642578,
      "step": 11902
    },
    {
      "epoch": 7.264404296875e-05,
      "step": 11902,
      "training_step_time": 0.4021120071411133
    },
    {
      "epoch": 7.2650146484375e-05,
      "model_forward_time": 0.11456179618835449,
      "step": 11903
    },
    {
      "epoch": 7.2650146484375e-05,
      "step": 11903,
      "training_step_time": 0.40166139602661133
    },
    {
      "epoch": 7.265625e-05,
      "model_forward_time": 0.11574101448059082,
      "step": 11904
    },
    {
      "epoch": 7.265625e-05,
      "step": 11904,
      "training_step_time": 0.38438963890075684
    },
    {
      "epoch": 7.2662353515625e-05,
      "model_forward_time": 0.11502838134765625,
      "step": 11905
    },
    {
      "epoch": 7.2662353515625e-05,
      "step": 11905,
      "training_step_time": 0.6934683322906494
    },
    {
      "epoch": 7.266845703125e-05,
      "model_forward_time": 0.11490464210510254,
      "step": 11906
    },
    {
      "epoch": 7.266845703125e-05,
      "step": 11906,
      "training_step_time": 0.3663482666015625
    },
    {
      "epoch": 7.2674560546875e-05,
      "model_forward_time": 0.11478686332702637,
      "step": 11907
    },
    {
      "epoch": 7.2674560546875e-05,
      "step": 11907,
      "training_step_time": 0.42209672927856445
    },
    {
      "epoch": 7.26806640625e-05,
      "model_forward_time": 0.11440467834472656,
      "step": 11908
    },
    {
      "epoch": 7.26806640625e-05,
      "step": 11908,
      "training_step_time": 0.4687988758087158
    },
    {
      "epoch": 7.2686767578125e-05,
      "model_forward_time": 0.11464452743530273,
      "step": 11909
    },
    {
      "epoch": 7.2686767578125e-05,
      "step": 11909,
      "training_step_time": 0.3976905345916748
    },
    {
      "epoch": 7.269287109375e-05,
      "grad_norm": 0.2163207232952118,
      "learning_rate": 9.409119077387294e-05,
      "loss": 0.0587,
      "step": 11910
    },
    {
      "epoch": 7.269287109375e-05,
      "model_forward_time": 0.11432433128356934,
      "step": 11910
    },
    {
      "epoch": 7.269287109375e-05,
      "step": 11910,
      "training_step_time": 0.4210383892059326
    },
    {
      "epoch": 7.2698974609375e-05,
      "model_forward_time": 0.11440396308898926,
      "step": 11911
    },
    {
      "epoch": 7.2698974609375e-05,
      "step": 11911,
      "training_step_time": 0.540665864944458
    },
    {
      "epoch": 7.2705078125e-05,
      "model_forward_time": 0.11433219909667969,
      "step": 11912
    },
    {
      "epoch": 7.2705078125e-05,
      "step": 11912,
      "training_step_time": 0.4479990005493164
    },
    {
      "epoch": 7.2711181640625e-05,
      "model_forward_time": 0.11426353454589844,
      "step": 11913
    },
    {
      "epoch": 7.2711181640625e-05,
      "step": 11913,
      "training_step_time": 0.4636087417602539
    },
    {
      "epoch": 7.271728515625e-05,
      "model_forward_time": 0.11459803581237793,
      "step": 11914
    },
    {
      "epoch": 7.271728515625e-05,
      "step": 11914,
      "training_step_time": 0.38851189613342285
    },
    {
      "epoch": 7.2723388671875e-05,
      "model_forward_time": 0.11476302146911621,
      "step": 11915
    },
    {
      "epoch": 7.2723388671875e-05,
      "step": 11915,
      "training_step_time": 0.38276076316833496
    },
    {
      "epoch": 7.27294921875e-05,
      "model_forward_time": 0.11390256881713867,
      "step": 11916
    },
    {
      "epoch": 7.27294921875e-05,
      "step": 11916,
      "training_step_time": 0.4216904640197754
    },
    {
      "epoch": 7.2735595703125e-05,
      "model_forward_time": 0.11517453193664551,
      "step": 11917
    },
    {
      "epoch": 7.2735595703125e-05,
      "step": 11917,
      "training_step_time": 0.5645480155944824
    },
    {
      "epoch": 7.274169921875e-05,
      "model_forward_time": 0.11534976959228516,
      "step": 11918
    },
    {
      "epoch": 7.274169921875e-05,
      "step": 11918,
      "training_step_time": 0.39250707626342773
    },
    {
      "epoch": 7.2747802734375e-05,
      "model_forward_time": 0.11473798751831055,
      "step": 11919
    },
    {
      "epoch": 7.2747802734375e-05,
      "step": 11919,
      "training_step_time": 0.452286958694458
    },
    {
      "epoch": 7.275390625e-05,
      "grad_norm": 0.1912895143032074,
      "learning_rate": 9.407818838345619e-05,
      "loss": 0.0569,
      "step": 11920
    },
    {
      "epoch": 7.275390625e-05,
      "model_forward_time": 0.11528182029724121,
      "step": 11920
    },
    {
      "epoch": 7.275390625e-05,
      "step": 11920,
      "training_step_time": 0.3883800506591797
    },
    {
      "epoch": 7.2760009765625e-05,
      "model_forward_time": 0.11489582061767578,
      "step": 11921
    },
    {
      "epoch": 7.2760009765625e-05,
      "step": 11921,
      "training_step_time": 0.41865110397338867
    },
    {
      "epoch": 7.276611328125e-05,
      "model_forward_time": 0.11462783813476562,
      "step": 11922
    },
    {
      "epoch": 7.276611328125e-05,
      "step": 11922,
      "training_step_time": 0.4829859733581543
    },
    {
      "epoch": 7.2772216796875e-05,
      "model_forward_time": 0.1152348518371582,
      "step": 11923
    },
    {
      "epoch": 7.2772216796875e-05,
      "step": 11923,
      "training_step_time": 0.6288886070251465
    },
    {
      "epoch": 7.27783203125e-05,
      "model_forward_time": 0.11463403701782227,
      "step": 11924
    },
    {
      "epoch": 7.27783203125e-05,
      "step": 11924,
      "training_step_time": 0.389462947845459
    },
    {
      "epoch": 7.2784423828125e-05,
      "model_forward_time": 0.11443853378295898,
      "step": 11925
    },
    {
      "epoch": 7.2784423828125e-05,
      "step": 11925,
      "training_step_time": 0.4956817626953125
    },
    {
      "epoch": 7.279052734375e-05,
      "model_forward_time": 0.11410403251647949,
      "step": 11926
    },
    {
      "epoch": 7.279052734375e-05,
      "step": 11926,
      "training_step_time": 0.40468931198120117
    },
    {
      "epoch": 7.2796630859375e-05,
      "model_forward_time": 0.11444878578186035,
      "step": 11927
    },
    {
      "epoch": 7.2796630859375e-05,
      "step": 11927,
      "training_step_time": 0.47460007667541504
    },
    {
      "epoch": 7.2802734375e-05,
      "model_forward_time": 0.11460065841674805,
      "step": 11928
    },
    {
      "epoch": 7.2802734375e-05,
      "step": 11928,
      "training_step_time": 0.39203596115112305
    },
    {
      "epoch": 7.2808837890625e-05,
      "model_forward_time": 0.11449503898620605,
      "step": 11929
    },
    {
      "epoch": 7.2808837890625e-05,
      "step": 11929,
      "training_step_time": 0.5067300796508789
    },
    {
      "epoch": 7.281494140625e-05,
      "grad_norm": 0.21475136280059814,
      "learning_rate": 9.40651726032496e-05,
      "loss": 0.0539,
      "step": 11930
    },
    {
      "epoch": 7.281494140625e-05,
      "model_forward_time": 0.11446261405944824,
      "step": 11930
    },
    {
      "epoch": 7.281494140625e-05,
      "step": 11930,
      "training_step_time": 0.39482641220092773
    },
    {
      "epoch": 7.2821044921875e-05,
      "model_forward_time": 0.11442303657531738,
      "step": 11931
    },
    {
      "epoch": 7.2821044921875e-05,
      "step": 11931,
      "training_step_time": 0.39481115341186523
    },
    {
      "epoch": 7.28271484375e-05,
      "model_forward_time": 0.11546611785888672,
      "step": 11932
    },
    {
      "epoch": 7.28271484375e-05,
      "step": 11932,
      "training_step_time": 0.3938436508178711
    },
    {
      "epoch": 7.2833251953125e-05,
      "model_forward_time": 0.11540555953979492,
      "step": 11933
    },
    {
      "epoch": 7.2833251953125e-05,
      "step": 11933,
      "training_step_time": 0.39779019355773926
    },
    {
      "epoch": 7.283935546875e-05,
      "model_forward_time": 0.11458706855773926,
      "step": 11934
    },
    {
      "epoch": 7.283935546875e-05,
      "step": 11934,
      "training_step_time": 0.36614179611206055
    },
    {
      "epoch": 7.2845458984375e-05,
      "model_forward_time": 0.11433720588684082,
      "step": 11935
    },
    {
      "epoch": 7.2845458984375e-05,
      "step": 11935,
      "training_step_time": 0.645423412322998
    },
    {
      "epoch": 7.28515625e-05,
      "model_forward_time": 0.11497306823730469,
      "step": 11936
    },
    {
      "epoch": 7.28515625e-05,
      "step": 11936,
      "training_step_time": 0.4844684600830078
    },
    {
      "epoch": 7.2857666015625e-05,
      "model_forward_time": 0.11608743667602539,
      "step": 11937
    },
    {
      "epoch": 7.2857666015625e-05,
      "step": 11937,
      "training_step_time": 0.502981424331665
    },
    {
      "epoch": 7.286376953125e-05,
      "model_forward_time": 0.11557936668395996,
      "step": 11938
    },
    {
      "epoch": 7.286376953125e-05,
      "step": 11938,
      "training_step_time": 0.4016432762145996
    },
    {
      "epoch": 7.2869873046875e-05,
      "model_forward_time": 0.11406636238098145,
      "step": 11939
    },
    {
      "epoch": 7.2869873046875e-05,
      "step": 11939,
      "training_step_time": 0.4326591491699219
    },
    {
      "epoch": 7.28759765625e-05,
      "grad_norm": 0.16066773235797882,
      "learning_rate": 9.405214343720707e-05,
      "loss": 0.0616,
      "step": 11940
    },
    {
      "epoch": 7.28759765625e-05,
      "model_forward_time": 0.11419677734375,
      "step": 11940
    },
    {
      "epoch": 7.28759765625e-05,
      "step": 11940,
      "training_step_time": 0.391920804977417
    },
    {
      "epoch": 7.2882080078125e-05,
      "model_forward_time": 0.11465787887573242,
      "step": 11941
    },
    {
      "epoch": 7.2882080078125e-05,
      "step": 11941,
      "training_step_time": 0.405426025390625
    },
    {
      "epoch": 7.288818359375e-05,
      "model_forward_time": 0.11484360694885254,
      "step": 11942
    },
    {
      "epoch": 7.288818359375e-05,
      "step": 11942,
      "training_step_time": 0.3945951461791992
    },
    {
      "epoch": 7.2894287109375e-05,
      "model_forward_time": 0.11521553993225098,
      "step": 11943
    },
    {
      "epoch": 7.2894287109375e-05,
      "step": 11943,
      "training_step_time": 0.3843848705291748
    },
    {
      "epoch": 7.2900390625e-05,
      "model_forward_time": 0.11484789848327637,
      "step": 11944
    },
    {
      "epoch": 7.2900390625e-05,
      "step": 11944,
      "training_step_time": 0.39872050285339355
    },
    {
      "epoch": 7.2906494140625e-05,
      "model_forward_time": 0.11590051651000977,
      "step": 11945
    },
    {
      "epoch": 7.2906494140625e-05,
      "step": 11945,
      "training_step_time": 0.4276151657104492
    },
    {
      "epoch": 7.291259765625e-05,
      "model_forward_time": 0.11611819267272949,
      "step": 11946
    },
    {
      "epoch": 7.291259765625e-05,
      "step": 11946,
      "training_step_time": 0.40596938133239746
    },
    {
      "epoch": 7.2918701171875e-05,
      "model_forward_time": 0.11585760116577148,
      "step": 11947
    },
    {
      "epoch": 7.2918701171875e-05,
      "step": 11947,
      "training_step_time": 0.6759781837463379
    },
    {
      "epoch": 7.29248046875e-05,
      "model_forward_time": 0.11489200592041016,
      "step": 11948
    },
    {
      "epoch": 7.29248046875e-05,
      "step": 11948,
      "training_step_time": 0.4133315086364746
    },
    {
      "epoch": 7.2930908203125e-05,
      "model_forward_time": 0.11459016799926758,
      "step": 11949
    },
    {
      "epoch": 7.2930908203125e-05,
      "step": 11949,
      "training_step_time": 0.4342329502105713
    },
    {
      "epoch": 7.293701171875e-05,
      "grad_norm": 0.15696518123149872,
      "learning_rate": 9.403910088928651e-05,
      "loss": 0.0579,
      "step": 11950
    },
    {
      "epoch": 7.293701171875e-05,
      "model_forward_time": 0.1148529052734375,
      "step": 11950
    },
    {
      "epoch": 7.293701171875e-05,
      "step": 11950,
      "training_step_time": 0.4746973514556885
    },
    {
      "epoch": 7.2943115234375e-05,
      "model_forward_time": 0.11456131935119629,
      "step": 11951
    },
    {
      "epoch": 7.2943115234375e-05,
      "step": 11951,
      "training_step_time": 0.4883449077606201
    },
    {
      "epoch": 7.294921875e-05,
      "model_forward_time": 0.1142587661743164,
      "step": 11952
    },
    {
      "epoch": 7.294921875e-05,
      "step": 11952,
      "training_step_time": 0.38820791244506836
    },
    {
      "epoch": 7.2955322265625e-05,
      "model_forward_time": 0.11544919013977051,
      "step": 11953
    },
    {
      "epoch": 7.2955322265625e-05,
      "step": 11953,
      "training_step_time": 0.49944639205932617
    },
    {
      "epoch": 7.296142578125e-05,
      "model_forward_time": 0.11554813385009766,
      "step": 11954
    },
    {
      "epoch": 7.296142578125e-05,
      "step": 11954,
      "training_step_time": 0.4120662212371826
    },
    {
      "epoch": 7.2967529296875e-05,
      "model_forward_time": 0.11428380012512207,
      "step": 11955
    },
    {
      "epoch": 7.2967529296875e-05,
      "step": 11955,
      "training_step_time": 0.44890832901000977
    },
    {
      "epoch": 7.29736328125e-05,
      "model_forward_time": 0.11464905738830566,
      "step": 11956
    },
    {
      "epoch": 7.29736328125e-05,
      "step": 11956,
      "training_step_time": 0.3863532543182373
    },
    {
      "epoch": 7.2979736328125e-05,
      "model_forward_time": 0.11548376083374023,
      "step": 11957
    },
    {
      "epoch": 7.2979736328125e-05,
      "step": 11957,
      "training_step_time": 0.39295125007629395
    },
    {
      "epoch": 7.298583984375e-05,
      "model_forward_time": 0.11491060256958008,
      "step": 11958
    },
    {
      "epoch": 7.298583984375e-05,
      "step": 11958,
      "training_step_time": 0.4598076343536377
    },
    {
      "epoch": 7.2991943359375e-05,
      "model_forward_time": 0.1146693229675293,
      "step": 11959
    },
    {
      "epoch": 7.2991943359375e-05,
      "step": 11959,
      "training_step_time": 0.6315944194793701
    },
    {
      "epoch": 7.2998046875e-05,
      "grad_norm": 0.18049165606498718,
      "learning_rate": 9.402604496344984e-05,
      "loss": 0.0544,
      "step": 11960
    },
    {
      "epoch": 7.2998046875e-05,
      "model_forward_time": 0.11417698860168457,
      "step": 11960
    },
    {
      "epoch": 7.2998046875e-05,
      "step": 11960,
      "training_step_time": 0.37892913818359375
    },
    {
      "epoch": 7.3004150390625e-05,
      "model_forward_time": 0.11435937881469727,
      "step": 11961
    },
    {
      "epoch": 7.3004150390625e-05,
      "step": 11961,
      "training_step_time": 0.398313045501709
    },
    {
      "epoch": 7.301025390625e-05,
      "model_forward_time": 0.11418604850769043,
      "step": 11962
    },
    {
      "epoch": 7.301025390625e-05,
      "step": 11962,
      "training_step_time": 0.36601901054382324
    },
    {
      "epoch": 7.3016357421875e-05,
      "model_forward_time": 0.11470603942871094,
      "step": 11963
    },
    {
      "epoch": 7.3016357421875e-05,
      "step": 11963,
      "training_step_time": 0.4628756046295166
    },
    {
      "epoch": 7.30224609375e-05,
      "model_forward_time": 0.11504721641540527,
      "step": 11964
    },
    {
      "epoch": 7.30224609375e-05,
      "step": 11964,
      "training_step_time": 0.42389822006225586
    },
    {
      "epoch": 7.3028564453125e-05,
      "model_forward_time": 0.11520004272460938,
      "step": 11965
    },
    {
      "epoch": 7.3028564453125e-05,
      "step": 11965,
      "training_step_time": 0.4784073829650879
    },
    {
      "epoch": 7.303466796875e-05,
      "model_forward_time": 0.11553359031677246,
      "step": 11966
    },
    {
      "epoch": 7.303466796875e-05,
      "step": 11966,
      "training_step_time": 0.38700103759765625
    },
    {
      "epoch": 7.3040771484375e-05,
      "model_forward_time": 0.11548376083374023,
      "step": 11967
    },
    {
      "epoch": 7.3040771484375e-05,
      "step": 11967,
      "training_step_time": 0.41765761375427246
    },
    {
      "epoch": 7.3046875e-05,
      "model_forward_time": 0.11525726318359375,
      "step": 11968
    },
    {
      "epoch": 7.3046875e-05,
      "step": 11968,
      "training_step_time": 0.40132951736450195
    },
    {
      "epoch": 7.3052978515625e-05,
      "model_forward_time": 0.1158914566040039,
      "step": 11969
    },
    {
      "epoch": 7.3052978515625e-05,
      "step": 11969,
      "training_step_time": 0.48697948455810547
    },
    {
      "epoch": 7.305908203125e-05,
      "grad_norm": 0.2796829044818878,
      "learning_rate": 9.401297566366318e-05,
      "loss": 0.0589,
      "step": 11970
    },
    {
      "epoch": 7.305908203125e-05,
      "model_forward_time": 0.11489105224609375,
      "step": 11970
    },
    {
      "epoch": 7.305908203125e-05,
      "step": 11970,
      "training_step_time": 0.40035104751586914
    },
    {
      "epoch": 7.3065185546875e-05,
      "model_forward_time": 0.11528730392456055,
      "step": 11971
    },
    {
      "epoch": 7.3065185546875e-05,
      "step": 11971,
      "training_step_time": 0.44455885887145996
    },
    {
      "epoch": 7.30712890625e-05,
      "model_forward_time": 0.11556315422058105,
      "step": 11972
    },
    {
      "epoch": 7.30712890625e-05,
      "step": 11972,
      "training_step_time": 0.397916316986084
    },
    {
      "epoch": 7.3077392578125e-05,
      "model_forward_time": 0.1158761978149414,
      "step": 11973
    },
    {
      "epoch": 7.3077392578125e-05,
      "step": 11973,
      "training_step_time": 0.3966071605682373
    },
    {
      "epoch": 7.308349609375e-05,
      "model_forward_time": 0.11544156074523926,
      "step": 11974
    },
    {
      "epoch": 7.308349609375e-05,
      "step": 11974,
      "training_step_time": 0.39214158058166504
    },
    {
      "epoch": 7.3089599609375e-05,
      "model_forward_time": 0.11519885063171387,
      "step": 11975
    },
    {
      "epoch": 7.3089599609375e-05,
      "step": 11975,
      "training_step_time": 0.4014897346496582
    },
    {
      "epoch": 7.3095703125e-05,
      "model_forward_time": 0.11572480201721191,
      "step": 11976
    },
    {
      "epoch": 7.3095703125e-05,
      "step": 11976,
      "training_step_time": 0.3974466323852539
    },
    {
      "epoch": 7.3101806640625e-05,
      "model_forward_time": 0.11563301086425781,
      "step": 11977
    },
    {
      "epoch": 7.3101806640625e-05,
      "step": 11977,
      "training_step_time": 0.8372077941894531
    },
    {
      "epoch": 7.310791015625e-05,
      "model_forward_time": 0.11480093002319336,
      "step": 11978
    },
    {
      "epoch": 7.310791015625e-05,
      "step": 11978,
      "training_step_time": 0.4222831726074219
    },
    {
      "epoch": 7.3114013671875e-05,
      "model_forward_time": 0.1143808364868164,
      "step": 11979
    },
    {
      "epoch": 7.3114013671875e-05,
      "step": 11979,
      "training_step_time": 0.40419793128967285
    },
    {
      "epoch": 7.31201171875e-05,
      "grad_norm": 0.15775166451931,
      "learning_rate": 9.399989299389661e-05,
      "loss": 0.0588,
      "step": 11980
    },
    {
      "epoch": 7.31201171875e-05,
      "model_forward_time": 0.11462903022766113,
      "step": 11980
    },
    {
      "epoch": 7.31201171875e-05,
      "step": 11980,
      "training_step_time": 0.3950622081756592
    },
    {
      "epoch": 7.3126220703125e-05,
      "model_forward_time": 0.1146245002746582,
      "step": 11981
    },
    {
      "epoch": 7.3126220703125e-05,
      "step": 11981,
      "training_step_time": 0.42273497581481934
    },
    {
      "epoch": 7.313232421875e-05,
      "model_forward_time": 0.11461567878723145,
      "step": 11982
    },
    {
      "epoch": 7.313232421875e-05,
      "step": 11982,
      "training_step_time": 0.40231943130493164
    },
    {
      "epoch": 7.3138427734375e-05,
      "model_forward_time": 0.11523962020874023,
      "step": 11983
    },
    {
      "epoch": 7.3138427734375e-05,
      "step": 11983,
      "training_step_time": 0.6200706958770752
    },
    {
      "epoch": 7.314453125e-05,
      "model_forward_time": 0.1148521900177002,
      "step": 11984
    },
    {
      "epoch": 7.314453125e-05,
      "step": 11984,
      "training_step_time": 0.4532194137573242
    },
    {
      "epoch": 7.3150634765625e-05,
      "model_forward_time": 0.11449432373046875,
      "step": 11985
    },
    {
      "epoch": 7.3150634765625e-05,
      "step": 11985,
      "training_step_time": 0.3863089084625244
    },
    {
      "epoch": 7.315673828125e-05,
      "model_forward_time": 0.11494278907775879,
      "step": 11986
    },
    {
      "epoch": 7.315673828125e-05,
      "step": 11986,
      "training_step_time": 0.39420056343078613
    },
    {
      "epoch": 7.3162841796875e-05,
      "model_forward_time": 0.11479759216308594,
      "step": 11987
    },
    {
      "epoch": 7.3162841796875e-05,
      "step": 11987,
      "training_step_time": 0.3882012367248535
    },
    {
      "epoch": 7.31689453125e-05,
      "model_forward_time": 0.1146540641784668,
      "step": 11988
    },
    {
      "epoch": 7.31689453125e-05,
      "step": 11988,
      "training_step_time": 0.40209269523620605
    },
    {
      "epoch": 7.3175048828125e-05,
      "model_forward_time": 0.11481881141662598,
      "step": 11989
    },
    {
      "epoch": 7.3175048828125e-05,
      "step": 11989,
      "training_step_time": 0.6081826686859131
    },
    {
      "epoch": 7.318115234375e-05,
      "grad_norm": 0.1936543732881546,
      "learning_rate": 9.39867969581243e-05,
      "loss": 0.056,
      "step": 11990
    },
    {
      "epoch": 7.318115234375e-05,
      "model_forward_time": 0.11457705497741699,
      "step": 11990
    },
    {
      "epoch": 7.318115234375e-05,
      "step": 11990,
      "training_step_time": 0.36593151092529297
    },
    {
      "epoch": 7.3187255859375e-05,
      "model_forward_time": 0.11452412605285645,
      "step": 11991
    },
    {
      "epoch": 7.3187255859375e-05,
      "step": 11991,
      "training_step_time": 0.40250158309936523
    },
    {
      "epoch": 7.3193359375e-05,
      "model_forward_time": 0.11484074592590332,
      "step": 11992
    },
    {
      "epoch": 7.3193359375e-05,
      "step": 11992,
      "training_step_time": 0.45406508445739746
    },
    {
      "epoch": 7.3199462890625e-05,
      "model_forward_time": 0.11505317687988281,
      "step": 11993
    },
    {
      "epoch": 7.3199462890625e-05,
      "step": 11993,
      "training_step_time": 0.41113758087158203
    },
    {
      "epoch": 7.320556640625e-05,
      "model_forward_time": 0.11408066749572754,
      "step": 11994
    },
    {
      "epoch": 7.320556640625e-05,
      "step": 11994,
      "training_step_time": 0.39055919647216797
    },
    {
      "epoch": 7.3211669921875e-05,
      "model_forward_time": 0.11455583572387695,
      "step": 11995
    },
    {
      "epoch": 7.3211669921875e-05,
      "step": 11995,
      "training_step_time": 0.7323479652404785
    },
    {
      "epoch": 7.32177734375e-05,
      "model_forward_time": 0.1141667366027832,
      "step": 11996
    },
    {
      "epoch": 7.32177734375e-05,
      "step": 11996,
      "training_step_time": 0.47309017181396484
    },
    {
      "epoch": 7.3223876953125e-05,
      "model_forward_time": 0.11423063278198242,
      "step": 11997
    },
    {
      "epoch": 7.3223876953125e-05,
      "step": 11997,
      "training_step_time": 0.4102661609649658
    },
    {
      "epoch": 7.322998046875e-05,
      "model_forward_time": 0.11489486694335938,
      "step": 11998
    },
    {
      "epoch": 7.322998046875e-05,
      "step": 11998,
      "training_step_time": 0.38227033615112305
    },
    {
      "epoch": 7.3236083984375e-05,
      "model_forward_time": 0.11437058448791504,
      "step": 11999
    },
    {
      "epoch": 7.3236083984375e-05,
      "step": 11999,
      "training_step_time": 0.37955689430236816
    },
    {
      "epoch": 7.32421875e-05,
      "grad_norm": 0.18668408691883087,
      "learning_rate": 9.397368756032445e-05,
      "loss": 0.0543,
      "step": 12000
    },
    {
      "epoch": 7.32421875e-05,
      "model_forward_time": 0.11251664161682129,
      "step": 12000
    },
    {
      "epoch": 7.32421875e-05,
      "step": 12000,
      "training_step_time": 0.35807013511657715
    },
    {
      "epoch": 7.3248291015625e-05,
      "model_forward_time": 0.11263132095336914,
      "step": 12001
    },
    {
      "epoch": 7.3248291015625e-05,
      "step": 12001,
      "training_step_time": 0.3754737377166748
    },
    {
      "epoch": 7.325439453125e-05,
      "model_forward_time": 0.1132051944732666,
      "step": 12002
    },
    {
      "epoch": 7.325439453125e-05,
      "step": 12002,
      "training_step_time": 0.37003064155578613
    },
    {
      "epoch": 7.3260498046875e-05,
      "model_forward_time": 0.11351823806762695,
      "step": 12003
    },
    {
      "epoch": 7.3260498046875e-05,
      "step": 12003,
      "training_step_time": 0.3770596981048584
    },
    {
      "epoch": 7.32666015625e-05,
      "model_forward_time": 0.1142120361328125,
      "step": 12004
    },
    {
      "epoch": 7.32666015625e-05,
      "step": 12004,
      "training_step_time": 0.3860650062561035
    },
    {
      "epoch": 7.3272705078125e-05,
      "model_forward_time": 0.1139376163482666,
      "step": 12005
    },
    {
      "epoch": 7.3272705078125e-05,
      "step": 12005,
      "training_step_time": 0.37920546531677246
    },
    {
      "epoch": 7.327880859375e-05,
      "model_forward_time": 0.11394715309143066,
      "step": 12006
    },
    {
      "epoch": 7.327880859375e-05,
      "step": 12006,
      "training_step_time": 0.39163637161254883
    },
    {
      "epoch": 7.3284912109375e-05,
      "model_forward_time": 0.11522245407104492,
      "step": 12007
    },
    {
      "epoch": 7.3284912109375e-05,
      "step": 12007,
      "training_step_time": 0.38681769371032715
    },
    {
      "epoch": 7.3291015625e-05,
      "model_forward_time": 0.11539220809936523,
      "step": 12008
    },
    {
      "epoch": 7.3291015625e-05,
      "step": 12008,
      "training_step_time": 0.41352367401123047
    },
    {
      "epoch": 7.3297119140625e-05,
      "model_forward_time": 0.11519694328308105,
      "step": 12009
    },
    {
      "epoch": 7.3297119140625e-05,
      "step": 12009,
      "training_step_time": 0.40670132637023926
    },
    {
      "epoch": 7.330322265625e-05,
      "grad_norm": 0.22753381729125977,
      "learning_rate": 9.39605648044794e-05,
      "loss": 0.06,
      "step": 12010
    },
    {
      "epoch": 7.330322265625e-05,
      "model_forward_time": 0.11469674110412598,
      "step": 12010
    },
    {
      "epoch": 7.330322265625e-05,
      "step": 12010,
      "training_step_time": 0.447002649307251
    },
    {
      "epoch": 7.3309326171875e-05,
      "model_forward_time": 0.11575889587402344,
      "step": 12011
    },
    {
      "epoch": 7.3309326171875e-05,
      "step": 12011,
      "training_step_time": 0.3827695846557617
    },
    {
      "epoch": 7.33154296875e-05,
      "model_forward_time": 0.11550021171569824,
      "step": 12012
    },
    {
      "epoch": 7.33154296875e-05,
      "step": 12012,
      "training_step_time": 0.40364861488342285
    },
    {
      "epoch": 7.3321533203125e-05,
      "model_forward_time": 0.11501240730285645,
      "step": 12013
    },
    {
      "epoch": 7.3321533203125e-05,
      "step": 12013,
      "training_step_time": 0.4664418697357178
    },
    {
      "epoch": 7.332763671875e-05,
      "model_forward_time": 0.11555099487304688,
      "step": 12014
    },
    {
      "epoch": 7.332763671875e-05,
      "step": 12014,
      "training_step_time": 0.49233460426330566
    },
    {
      "epoch": 7.3333740234375e-05,
      "model_forward_time": 0.11469340324401855,
      "step": 12015
    },
    {
      "epoch": 7.3333740234375e-05,
      "step": 12015,
      "training_step_time": 0.39252400398254395
    },
    {
      "epoch": 7.333984375e-05,
      "model_forward_time": 0.11557722091674805,
      "step": 12016
    },
    {
      "epoch": 7.333984375e-05,
      "step": 12016,
      "training_step_time": 0.3878359794616699
    },
    {
      "epoch": 7.3345947265625e-05,
      "model_forward_time": 0.1150357723236084,
      "step": 12017
    },
    {
      "epoch": 7.3345947265625e-05,
      "step": 12017,
      "training_step_time": 0.39009952545166016
    },
    {
      "epoch": 7.335205078125e-05,
      "model_forward_time": 0.11516404151916504,
      "step": 12018
    },
    {
      "epoch": 7.335205078125e-05,
      "step": 12018,
      "training_step_time": 0.3965432643890381
    },
    {
      "epoch": 7.3358154296875e-05,
      "model_forward_time": 0.1150197982788086,
      "step": 12019
    },
    {
      "epoch": 7.3358154296875e-05,
      "step": 12019,
      "training_step_time": 0.4122030735015869
    },
    {
      "epoch": 7.33642578125e-05,
      "grad_norm": 0.15026602149009705,
      "learning_rate": 9.394742869457547e-05,
      "loss": 0.0619,
      "step": 12020
    },
    {
      "epoch": 7.33642578125e-05,
      "model_forward_time": 0.11495399475097656,
      "step": 12020
    },
    {
      "epoch": 7.33642578125e-05,
      "step": 12020,
      "training_step_time": 0.38944554328918457
    },
    {
      "epoch": 7.3370361328125e-05,
      "model_forward_time": 0.11540365219116211,
      "step": 12021
    },
    {
      "epoch": 7.3370361328125e-05,
      "step": 12021,
      "training_step_time": 0.39525818824768066
    },
    {
      "epoch": 7.337646484375e-05,
      "model_forward_time": 0.1154947280883789,
      "step": 12022
    },
    {
      "epoch": 7.337646484375e-05,
      "step": 12022,
      "training_step_time": 0.39092111587524414
    },
    {
      "epoch": 7.3382568359375e-05,
      "model_forward_time": 0.11540412902832031,
      "step": 12023
    },
    {
      "epoch": 7.3382568359375e-05,
      "step": 12023,
      "training_step_time": 0.44692254066467285
    },
    {
      "epoch": 7.3388671875e-05,
      "model_forward_time": 0.11566638946533203,
      "step": 12024
    },
    {
      "epoch": 7.3388671875e-05,
      "step": 12024,
      "training_step_time": 0.46540307998657227
    },
    {
      "epoch": 7.3394775390625e-05,
      "model_forward_time": 0.1159520149230957,
      "step": 12025
    },
    {
      "epoch": 7.3394775390625e-05,
      "step": 12025,
      "training_step_time": 0.4917259216308594
    },
    {
      "epoch": 7.340087890625e-05,
      "model_forward_time": 0.11466169357299805,
      "step": 12026
    },
    {
      "epoch": 7.340087890625e-05,
      "step": 12026,
      "training_step_time": 0.42606544494628906
    },
    {
      "epoch": 7.3406982421875e-05,
      "model_forward_time": 0.11480450630187988,
      "step": 12027
    },
    {
      "epoch": 7.3406982421875e-05,
      "step": 12027,
      "training_step_time": 0.463900089263916
    },
    {
      "epoch": 7.34130859375e-05,
      "model_forward_time": 0.11522960662841797,
      "step": 12028
    },
    {
      "epoch": 7.34130859375e-05,
      "step": 12028,
      "training_step_time": 0.44585347175598145
    },
    {
      "epoch": 7.3419189453125e-05,
      "model_forward_time": 0.11751413345336914,
      "step": 12029
    },
    {
      "epoch": 7.3419189453125e-05,
      "step": 12029,
      "training_step_time": 0.3898632526397705
    },
    {
      "epoch": 7.342529296875e-05,
      "grad_norm": 0.17161791026592255,
      "learning_rate": 9.393427923460308e-05,
      "loss": 0.0583,
      "step": 12030
    },
    {
      "epoch": 7.342529296875e-05,
      "model_forward_time": 0.11488699913024902,
      "step": 12030
    },
    {
      "epoch": 7.342529296875e-05,
      "step": 12030,
      "training_step_time": 0.3856375217437744
    },
    {
      "epoch": 7.3431396484375e-05,
      "model_forward_time": 0.11492419242858887,
      "step": 12031
    },
    {
      "epoch": 7.3431396484375e-05,
      "step": 12031,
      "training_step_time": 0.41213345527648926
    },
    {
      "epoch": 7.34375e-05,
      "model_forward_time": 0.1146240234375,
      "step": 12032
    },
    {
      "epoch": 7.34375e-05,
      "step": 12032,
      "training_step_time": 0.38356685638427734
    },
    {
      "epoch": 7.3443603515625e-05,
      "model_forward_time": 0.11555743217468262,
      "step": 12033
    },
    {
      "epoch": 7.3443603515625e-05,
      "step": 12033,
      "training_step_time": 0.398639440536499
    },
    {
      "epoch": 7.344970703125e-05,
      "model_forward_time": 0.11574482917785645,
      "step": 12034
    },
    {
      "epoch": 7.344970703125e-05,
      "step": 12034,
      "training_step_time": 0.3860480785369873
    },
    {
      "epoch": 7.3455810546875e-05,
      "model_forward_time": 0.11522245407104492,
      "step": 12035
    },
    {
      "epoch": 7.3455810546875e-05,
      "step": 12035,
      "training_step_time": 0.38866591453552246
    },
    {
      "epoch": 7.34619140625e-05,
      "model_forward_time": 0.11598992347717285,
      "step": 12036
    },
    {
      "epoch": 7.34619140625e-05,
      "step": 12036,
      "training_step_time": 0.39086461067199707
    },
    {
      "epoch": 7.3468017578125e-05,
      "model_forward_time": 0.11470532417297363,
      "step": 12037
    },
    {
      "epoch": 7.3468017578125e-05,
      "step": 12037,
      "training_step_time": 0.36966776847839355
    },
    {
      "epoch": 7.347412109375e-05,
      "model_forward_time": 0.11533808708190918,
      "step": 12038
    },
    {
      "epoch": 7.347412109375e-05,
      "step": 12038,
      "training_step_time": 0.46613621711730957
    },
    {
      "epoch": 7.3480224609375e-05,
      "model_forward_time": 0.1149895191192627,
      "step": 12039
    },
    {
      "epoch": 7.3480224609375e-05,
      "step": 12039,
      "training_step_time": 0.4819478988647461
    },
    {
      "epoch": 7.3486328125e-05,
      "grad_norm": 0.1752343475818634,
      "learning_rate": 9.392111642855665e-05,
      "loss": 0.0563,
      "step": 12040
    },
    {
      "epoch": 7.3486328125e-05,
      "model_forward_time": 0.11495542526245117,
      "step": 12040
    },
    {
      "epoch": 7.3486328125e-05,
      "step": 12040,
      "training_step_time": 0.38994359970092773
    },
    {
      "epoch": 7.3492431640625e-05,
      "model_forward_time": 0.11583542823791504,
      "step": 12041
    },
    {
      "epoch": 7.3492431640625e-05,
      "step": 12041,
      "training_step_time": 0.43698668479919434
    },
    {
      "epoch": 7.349853515625e-05,
      "model_forward_time": 0.11516857147216797,
      "step": 12042
    },
    {
      "epoch": 7.349853515625e-05,
      "step": 12042,
      "training_step_time": 0.42572975158691406
    },
    {
      "epoch": 7.3504638671875e-05,
      "model_forward_time": 0.11507964134216309,
      "step": 12043
    },
    {
      "epoch": 7.3504638671875e-05,
      "step": 12043,
      "training_step_time": 0.46700286865234375
    },
    {
      "epoch": 7.35107421875e-05,
      "model_forward_time": 0.1149451732635498,
      "step": 12044
    },
    {
      "epoch": 7.35107421875e-05,
      "step": 12044,
      "training_step_time": 0.3829073905944824
    },
    {
      "epoch": 7.3516845703125e-05,
      "model_forward_time": 0.11572408676147461,
      "step": 12045
    },
    {
      "epoch": 7.3516845703125e-05,
      "step": 12045,
      "training_step_time": 0.3954126834869385
    },
    {
      "epoch": 7.352294921875e-05,
      "model_forward_time": 0.11419677734375,
      "step": 12046
    },
    {
      "epoch": 7.352294921875e-05,
      "step": 12046,
      "training_step_time": 0.3813166618347168
    },
    {
      "epoch": 7.3529052734375e-05,
      "model_forward_time": 0.11555647850036621,
      "step": 12047
    },
    {
      "epoch": 7.3529052734375e-05,
      "step": 12047,
      "training_step_time": 0.416717529296875
    },
    {
      "epoch": 7.353515625e-05,
      "model_forward_time": 0.11472654342651367,
      "step": 12048
    },
    {
      "epoch": 7.353515625e-05,
      "step": 12048,
      "training_step_time": 0.3803892135620117
    },
    {
      "epoch": 7.3541259765625e-05,
      "model_forward_time": 0.11497354507446289,
      "step": 12049
    },
    {
      "epoch": 7.3541259765625e-05,
      "step": 12049,
      "training_step_time": 0.393829345703125
    },
    {
      "epoch": 7.354736328125e-05,
      "grad_norm": 0.16481785476207733,
      "learning_rate": 9.390794028043474e-05,
      "loss": 0.0609,
      "step": 12050
    },
    {
      "epoch": 7.354736328125e-05,
      "model_forward_time": 0.11558866500854492,
      "step": 12050
    },
    {
      "epoch": 7.354736328125e-05,
      "step": 12050,
      "training_step_time": 0.3990945816040039
    },
    {
      "epoch": 7.3553466796875e-05,
      "model_forward_time": 0.11538910865783691,
      "step": 12051
    },
    {
      "epoch": 7.3553466796875e-05,
      "step": 12051,
      "training_step_time": 0.3977794647216797
    },
    {
      "epoch": 7.35595703125e-05,
      "model_forward_time": 0.11461091041564941,
      "step": 12052
    },
    {
      "epoch": 7.35595703125e-05,
      "step": 12052,
      "training_step_time": 0.424274206161499
    },
    {
      "epoch": 7.3565673828125e-05,
      "model_forward_time": 0.1147911548614502,
      "step": 12053
    },
    {
      "epoch": 7.3565673828125e-05,
      "step": 12053,
      "training_step_time": 0.4560554027557373
    },
    {
      "epoch": 7.357177734375e-05,
      "model_forward_time": 0.11481022834777832,
      "step": 12054
    },
    {
      "epoch": 7.357177734375e-05,
      "step": 12054,
      "training_step_time": 0.47582125663757324
    },
    {
      "epoch": 7.3577880859375e-05,
      "model_forward_time": 0.1153421401977539,
      "step": 12055
    },
    {
      "epoch": 7.3577880859375e-05,
      "step": 12055,
      "training_step_time": 0.514695405960083
    },
    {
      "epoch": 7.3583984375e-05,
      "model_forward_time": 0.11463189125061035,
      "step": 12056
    },
    {
      "epoch": 7.3583984375e-05,
      "step": 12056,
      "training_step_time": 0.4570150375366211
    },
    {
      "epoch": 7.3590087890625e-05,
      "model_forward_time": 0.11430573463439941,
      "step": 12057
    },
    {
      "epoch": 7.3590087890625e-05,
      "step": 12057,
      "training_step_time": 0.45780467987060547
    },
    {
      "epoch": 7.359619140625e-05,
      "model_forward_time": 0.1151273250579834,
      "step": 12058
    },
    {
      "epoch": 7.359619140625e-05,
      "step": 12058,
      "training_step_time": 0.40903162956237793
    },
    {
      "epoch": 7.3602294921875e-05,
      "model_forward_time": 0.11473345756530762,
      "step": 12059
    },
    {
      "epoch": 7.3602294921875e-05,
      "step": 12059,
      "training_step_time": 0.38349080085754395
    },
    {
      "epoch": 7.36083984375e-05,
      "grad_norm": 0.25680211186408997,
      "learning_rate": 9.389475079423988e-05,
      "loss": 0.0612,
      "step": 12060
    },
    {
      "epoch": 7.36083984375e-05,
      "model_forward_time": 0.11502599716186523,
      "step": 12060
    },
    {
      "epoch": 7.36083984375e-05,
      "step": 12060,
      "training_step_time": 0.3863658905029297
    },
    {
      "epoch": 7.3614501953125e-05,
      "model_forward_time": 0.1157829761505127,
      "step": 12061
    },
    {
      "epoch": 7.3614501953125e-05,
      "step": 12061,
      "training_step_time": 0.7003827095031738
    },
    {
      "epoch": 7.362060546875e-05,
      "model_forward_time": 0.11467933654785156,
      "step": 12062
    },
    {
      "epoch": 7.362060546875e-05,
      "step": 12062,
      "training_step_time": 0.4009110927581787
    },
    {
      "epoch": 7.3626708984375e-05,
      "model_forward_time": 0.11454010009765625,
      "step": 12063
    },
    {
      "epoch": 7.3626708984375e-05,
      "step": 12063,
      "training_step_time": 0.38756799697875977
    },
    {
      "epoch": 7.36328125e-05,
      "model_forward_time": 0.11466073989868164,
      "step": 12064
    },
    {
      "epoch": 7.36328125e-05,
      "step": 12064,
      "training_step_time": 0.39979076385498047
    },
    {
      "epoch": 7.3638916015625e-05,
      "model_forward_time": 0.11516046524047852,
      "step": 12065
    },
    {
      "epoch": 7.3638916015625e-05,
      "step": 12065,
      "training_step_time": 0.4024527072906494
    },
    {
      "epoch": 7.364501953125e-05,
      "model_forward_time": 0.11490774154663086,
      "step": 12066
    },
    {
      "epoch": 7.364501953125e-05,
      "step": 12066,
      "training_step_time": 0.4331519603729248
    },
    {
      "epoch": 7.3651123046875e-05,
      "model_forward_time": 0.11500310897827148,
      "step": 12067
    },
    {
      "epoch": 7.3651123046875e-05,
      "step": 12067,
      "training_step_time": 0.7314989566802979
    },
    {
      "epoch": 7.36572265625e-05,
      "model_forward_time": 0.11448264122009277,
      "step": 12068
    },
    {
      "epoch": 7.36572265625e-05,
      "step": 12068,
      "training_step_time": 0.3915529251098633
    },
    {
      "epoch": 7.3663330078125e-05,
      "model_forward_time": 0.11471843719482422,
      "step": 12069
    },
    {
      "epoch": 7.3663330078125e-05,
      "step": 12069,
      "training_step_time": 0.41265392303466797
    },
    {
      "epoch": 7.366943359375e-05,
      "grad_norm": 0.21731320023536682,
      "learning_rate": 9.388154797397871e-05,
      "loss": 0.0562,
      "step": 12070
    },
    {
      "epoch": 7.366943359375e-05,
      "model_forward_time": 0.11444902420043945,
      "step": 12070
    },
    {
      "epoch": 7.366943359375e-05,
      "step": 12070,
      "training_step_time": 0.39245152473449707
    },
    {
      "epoch": 7.3675537109375e-05,
      "model_forward_time": 0.11444664001464844,
      "step": 12071
    },
    {
      "epoch": 7.3675537109375e-05,
      "step": 12071,
      "training_step_time": 0.42400193214416504
    },
    {
      "epoch": 7.3681640625e-05,
      "model_forward_time": 0.11459755897521973,
      "step": 12072
    },
    {
      "epoch": 7.3681640625e-05,
      "step": 12072,
      "training_step_time": 0.39472365379333496
    },
    {
      "epoch": 7.3687744140625e-05,
      "model_forward_time": 0.11462569236755371,
      "step": 12073
    },
    {
      "epoch": 7.3687744140625e-05,
      "step": 12073,
      "training_step_time": 0.663323163986206
    },
    {
      "epoch": 7.369384765625e-05,
      "model_forward_time": 0.1144402027130127,
      "step": 12074
    },
    {
      "epoch": 7.369384765625e-05,
      "step": 12074,
      "training_step_time": 0.3883242607116699
    },
    {
      "epoch": 7.3699951171875e-05,
      "model_forward_time": 0.1142740249633789,
      "step": 12075
    },
    {
      "epoch": 7.3699951171875e-05,
      "step": 12075,
      "training_step_time": 0.3921849727630615
    },
    {
      "epoch": 7.37060546875e-05,
      "model_forward_time": 0.11472153663635254,
      "step": 12076
    },
    {
      "epoch": 7.37060546875e-05,
      "step": 12076,
      "training_step_time": 0.3846399784088135
    },
    {
      "epoch": 7.3712158203125e-05,
      "model_forward_time": 0.1153113842010498,
      "step": 12077
    },
    {
      "epoch": 7.3712158203125e-05,
      "step": 12077,
      "training_step_time": 0.39187073707580566
    },
    {
      "epoch": 7.371826171875e-05,
      "model_forward_time": 0.11450910568237305,
      "step": 12078
    },
    {
      "epoch": 7.371826171875e-05,
      "step": 12078,
      "training_step_time": 0.37747955322265625
    },
    {
      "epoch": 7.3724365234375e-05,
      "model_forward_time": 0.1147470474243164,
      "step": 12079
    },
    {
      "epoch": 7.3724365234375e-05,
      "step": 12079,
      "training_step_time": 0.7649180889129639
    },
    {
      "epoch": 7.373046875e-05,
      "grad_norm": 0.2832183539867401,
      "learning_rate": 9.38683318236619e-05,
      "loss": 0.057,
      "step": 12080
    },
    {
      "epoch": 7.373046875e-05,
      "model_forward_time": 0.11457538604736328,
      "step": 12080
    },
    {
      "epoch": 7.373046875e-05,
      "step": 12080,
      "training_step_time": 0.4808371067047119
    },
    {
      "epoch": 7.3736572265625e-05,
      "model_forward_time": 0.11421918869018555,
      "step": 12081
    },
    {
      "epoch": 7.3736572265625e-05,
      "step": 12081,
      "training_step_time": 0.39662718772888184
    },
    {
      "epoch": 7.374267578125e-05,
      "model_forward_time": 0.1144552230834961,
      "step": 12082
    },
    {
      "epoch": 7.374267578125e-05,
      "step": 12082,
      "training_step_time": 0.37676143646240234
    },
    {
      "epoch": 7.3748779296875e-05,
      "model_forward_time": 0.11493730545043945,
      "step": 12083
    },
    {
      "epoch": 7.3748779296875e-05,
      "step": 12083,
      "training_step_time": 0.4709951877593994
    },
    {
      "epoch": 7.37548828125e-05,
      "model_forward_time": 0.11571693420410156,
      "step": 12084
    },
    {
      "epoch": 7.37548828125e-05,
      "step": 12084,
      "training_step_time": 0.3649570941925049
    },
    {
      "epoch": 7.3760986328125e-05,
      "model_forward_time": 0.1149601936340332,
      "step": 12085
    },
    {
      "epoch": 7.3760986328125e-05,
      "step": 12085,
      "training_step_time": 0.5554158687591553
    },
    {
      "epoch": 7.376708984375e-05,
      "model_forward_time": 0.11461687088012695,
      "step": 12086
    },
    {
      "epoch": 7.376708984375e-05,
      "step": 12086,
      "training_step_time": 0.3970792293548584
    },
    {
      "epoch": 7.3773193359375e-05,
      "model_forward_time": 0.11505699157714844,
      "step": 12087
    },
    {
      "epoch": 7.3773193359375e-05,
      "step": 12087,
      "training_step_time": 0.4007542133331299
    },
    {
      "epoch": 7.3779296875e-05,
      "model_forward_time": 0.11537623405456543,
      "step": 12088
    },
    {
      "epoch": 7.3779296875e-05,
      "step": 12088,
      "training_step_time": 0.386340856552124
    },
    {
      "epoch": 7.3785400390625e-05,
      "model_forward_time": 0.11550307273864746,
      "step": 12089
    },
    {
      "epoch": 7.3785400390625e-05,
      "step": 12089,
      "training_step_time": 0.4003598690032959
    },
    {
      "epoch": 7.379150390625e-05,
      "grad_norm": 0.16302821040153503,
      "learning_rate": 9.385510234730415e-05,
      "loss": 0.0514,
      "step": 12090
    },
    {
      "epoch": 7.379150390625e-05,
      "model_forward_time": 0.11466002464294434,
      "step": 12090
    },
    {
      "epoch": 7.379150390625e-05,
      "step": 12090,
      "training_step_time": 0.3978235721588135
    },
    {
      "epoch": 7.3797607421875e-05,
      "model_forward_time": 0.11541438102722168,
      "step": 12091
    },
    {
      "epoch": 7.3797607421875e-05,
      "step": 12091,
      "training_step_time": 0.7279813289642334
    },
    {
      "epoch": 7.38037109375e-05,
      "model_forward_time": 0.11457419395446777,
      "step": 12092
    },
    {
      "epoch": 7.38037109375e-05,
      "step": 12092,
      "training_step_time": 0.43751955032348633
    },
    {
      "epoch": 7.3809814453125e-05,
      "model_forward_time": 0.1147317886352539,
      "step": 12093
    },
    {
      "epoch": 7.3809814453125e-05,
      "step": 12093,
      "training_step_time": 0.45551276206970215
    },
    {
      "epoch": 7.381591796875e-05,
      "model_forward_time": 0.11562037467956543,
      "step": 12094
    },
    {
      "epoch": 7.381591796875e-05,
      "step": 12094,
      "training_step_time": 0.48668909072875977
    },
    {
      "epoch": 7.3822021484375e-05,
      "model_forward_time": 0.11472582817077637,
      "step": 12095
    },
    {
      "epoch": 7.3822021484375e-05,
      "step": 12095,
      "training_step_time": 0.4710056781768799
    },
    {
      "epoch": 7.3828125e-05,
      "model_forward_time": 0.11464715003967285,
      "step": 12096
    },
    {
      "epoch": 7.3828125e-05,
      "step": 12096,
      "training_step_time": 0.40059614181518555
    },
    {
      "epoch": 7.3834228515625e-05,
      "model_forward_time": 0.11470580101013184,
      "step": 12097
    },
    {
      "epoch": 7.3834228515625e-05,
      "step": 12097,
      "training_step_time": 0.39327502250671387
    },
    {
      "epoch": 7.384033203125e-05,
      "model_forward_time": 0.11531567573547363,
      "step": 12098
    },
    {
      "epoch": 7.384033203125e-05,
      "step": 12098,
      "training_step_time": 0.39530253410339355
    },
    {
      "epoch": 7.3846435546875e-05,
      "model_forward_time": 0.11454224586486816,
      "step": 12099
    },
    {
      "epoch": 7.3846435546875e-05,
      "step": 12099,
      "training_step_time": 0.4178192615509033
    },
    {
      "epoch": 7.38525390625e-05,
      "grad_norm": 0.16352856159210205,
      "learning_rate": 9.384185954892422e-05,
      "loss": 0.059,
      "step": 12100
    },
    {
      "epoch": 7.38525390625e-05,
      "model_forward_time": 0.1155397891998291,
      "step": 12100
    },
    {
      "epoch": 7.38525390625e-05,
      "step": 12100,
      "training_step_time": 0.3882617950439453
    },
    {
      "epoch": 7.3858642578125e-05,
      "model_forward_time": 0.11617136001586914,
      "step": 12101
    },
    {
      "epoch": 7.3858642578125e-05,
      "step": 12101,
      "training_step_time": 0.3973402976989746
    },
    {
      "epoch": 7.386474609375e-05,
      "model_forward_time": 0.11590027809143066,
      "step": 12102
    },
    {
      "epoch": 7.386474609375e-05,
      "step": 12102,
      "training_step_time": 0.39674949645996094
    },
    {
      "epoch": 7.3870849609375e-05,
      "model_forward_time": 0.11502885818481445,
      "step": 12103
    },
    {
      "epoch": 7.3870849609375e-05,
      "step": 12103,
      "training_step_time": 0.6401195526123047
    },
    {
      "epoch": 7.3876953125e-05,
      "model_forward_time": 0.11446356773376465,
      "step": 12104
    },
    {
      "epoch": 7.3876953125e-05,
      "step": 12104,
      "training_step_time": 0.3761560916900635
    },
    {
      "epoch": 7.3883056640625e-05,
      "model_forward_time": 0.11471056938171387,
      "step": 12105
    },
    {
      "epoch": 7.3883056640625e-05,
      "step": 12105,
      "training_step_time": 0.42041707038879395
    },
    {
      "epoch": 7.388916015625e-05,
      "model_forward_time": 0.11520266532897949,
      "step": 12106
    },
    {
      "epoch": 7.388916015625e-05,
      "step": 12106,
      "training_step_time": 0.39653539657592773
    },
    {
      "epoch": 7.3895263671875e-05,
      "model_forward_time": 0.11526322364807129,
      "step": 12107
    },
    {
      "epoch": 7.3895263671875e-05,
      "step": 12107,
      "training_step_time": 0.3997914791107178
    },
    {
      "epoch": 7.39013671875e-05,
      "model_forward_time": 0.11536145210266113,
      "step": 12108
    },
    {
      "epoch": 7.39013671875e-05,
      "step": 12108,
      "training_step_time": 0.42545366287231445
    },
    {
      "epoch": 7.3907470703125e-05,
      "model_forward_time": 0.11554789543151855,
      "step": 12109
    },
    {
      "epoch": 7.3907470703125e-05,
      "step": 12109,
      "training_step_time": 0.680696964263916
    },
    {
      "epoch": 7.391357421875e-05,
      "grad_norm": 0.1995328813791275,
      "learning_rate": 9.382860343254496e-05,
      "loss": 0.0553,
      "step": 12110
    },
    {
      "epoch": 7.391357421875e-05,
      "model_forward_time": 0.11506891250610352,
      "step": 12110
    },
    {
      "epoch": 7.391357421875e-05,
      "step": 12110,
      "training_step_time": 0.37723350524902344
    },
    {
      "epoch": 7.3919677734375e-05,
      "model_forward_time": 0.11480522155761719,
      "step": 12111
    },
    {
      "epoch": 7.3919677734375e-05,
      "step": 12111,
      "training_step_time": 0.39630818367004395
    },
    {
      "epoch": 7.392578125e-05,
      "model_forward_time": 0.11472725868225098,
      "step": 12112
    },
    {
      "epoch": 7.392578125e-05,
      "step": 12112,
      "training_step_time": 0.4043142795562744
    },
    {
      "epoch": 7.3931884765625e-05,
      "model_forward_time": 0.11444950103759766,
      "step": 12113
    },
    {
      "epoch": 7.3931884765625e-05,
      "step": 12113,
      "training_step_time": 0.45741748809814453
    },
    {
      "epoch": 7.393798828125e-05,
      "model_forward_time": 0.1148681640625,
      "step": 12114
    },
    {
      "epoch": 7.393798828125e-05,
      "step": 12114,
      "training_step_time": 0.3917109966278076
    },
    {
      "epoch": 7.3944091796875e-05,
      "model_forward_time": 0.11539936065673828,
      "step": 12115
    },
    {
      "epoch": 7.3944091796875e-05,
      "step": 12115,
      "training_step_time": 0.7027451992034912
    },
    {
      "epoch": 7.39501953125e-05,
      "model_forward_time": 0.11536550521850586,
      "step": 12116
    },
    {
      "epoch": 7.39501953125e-05,
      "step": 12116,
      "training_step_time": 0.38161373138427734
    },
    {
      "epoch": 7.3956298828125e-05,
      "model_forward_time": 0.11510014533996582,
      "step": 12117
    },
    {
      "epoch": 7.3956298828125e-05,
      "step": 12117,
      "training_step_time": 0.4468190670013428
    },
    {
      "epoch": 7.396240234375e-05,
      "model_forward_time": 0.11439037322998047,
      "step": 12118
    },
    {
      "epoch": 7.396240234375e-05,
      "step": 12118,
      "training_step_time": 0.4594738483428955
    },
    {
      "epoch": 7.3968505859375e-05,
      "model_forward_time": 0.1148993968963623,
      "step": 12119
    },
    {
      "epoch": 7.3968505859375e-05,
      "step": 12119,
      "training_step_time": 0.3797876834869385
    },
    {
      "epoch": 7.3974609375e-05,
      "grad_norm": 0.18377786874771118,
      "learning_rate": 9.381533400219318e-05,
      "loss": 0.0608,
      "step": 12120
    },
    {
      "epoch": 7.3974609375e-05,
      "model_forward_time": 0.11414909362792969,
      "step": 12120
    },
    {
      "epoch": 7.3974609375e-05,
      "step": 12120,
      "training_step_time": 0.418773889541626
    },
    {
      "epoch": 7.3980712890625e-05,
      "model_forward_time": 0.11527895927429199,
      "step": 12121
    },
    {
      "epoch": 7.3980712890625e-05,
      "step": 12121,
      "training_step_time": 0.6714344024658203
    },
    {
      "epoch": 7.398681640625e-05,
      "model_forward_time": 0.11476564407348633,
      "step": 12122
    },
    {
      "epoch": 7.398681640625e-05,
      "step": 12122,
      "training_step_time": 0.44470858573913574
    },
    {
      "epoch": 7.3992919921875e-05,
      "model_forward_time": 0.11454606056213379,
      "step": 12123
    },
    {
      "epoch": 7.3992919921875e-05,
      "step": 12123,
      "training_step_time": 0.4596445560455322
    },
    {
      "epoch": 7.39990234375e-05,
      "model_forward_time": 0.11492395401000977,
      "step": 12124
    },
    {
      "epoch": 7.39990234375e-05,
      "step": 12124,
      "training_step_time": 0.3772885799407959
    },
    {
      "epoch": 7.4005126953125e-05,
      "model_forward_time": 0.11464953422546387,
      "step": 12125
    },
    {
      "epoch": 7.4005126953125e-05,
      "step": 12125,
      "training_step_time": 0.42325329780578613
    },
    {
      "epoch": 7.401123046875e-05,
      "model_forward_time": 0.11475825309753418,
      "step": 12126
    },
    {
      "epoch": 7.401123046875e-05,
      "step": 12126,
      "training_step_time": 0.4862229824066162
    },
    {
      "epoch": 7.4017333984375e-05,
      "model_forward_time": 0.11474180221557617,
      "step": 12127
    },
    {
      "epoch": 7.4017333984375e-05,
      "step": 12127,
      "training_step_time": 0.43839287757873535
    },
    {
      "epoch": 7.40234375e-05,
      "model_forward_time": 0.11499261856079102,
      "step": 12128
    },
    {
      "epoch": 7.40234375e-05,
      "step": 12128,
      "training_step_time": 0.3882458209991455
    },
    {
      "epoch": 7.4029541015625e-05,
      "model_forward_time": 0.11540746688842773,
      "step": 12129
    },
    {
      "epoch": 7.4029541015625e-05,
      "step": 12129,
      "training_step_time": 0.39255332946777344
    },
    {
      "epoch": 7.403564453125e-05,
      "grad_norm": 0.14648103713989258,
      "learning_rate": 9.380205126189983e-05,
      "loss": 0.0547,
      "step": 12130
    },
    {
      "epoch": 7.403564453125e-05,
      "model_forward_time": 0.11465954780578613,
      "step": 12130
    },
    {
      "epoch": 7.403564453125e-05,
      "step": 12130,
      "training_step_time": 0.3938610553741455
    },
    {
      "epoch": 7.4041748046875e-05,
      "model_forward_time": 0.11631917953491211,
      "step": 12131
    },
    {
      "epoch": 7.4041748046875e-05,
      "step": 12131,
      "training_step_time": 0.4075031280517578
    },
    {
      "epoch": 7.40478515625e-05,
      "model_forward_time": 0.11535429954528809,
      "step": 12132
    },
    {
      "epoch": 7.40478515625e-05,
      "step": 12132,
      "training_step_time": 0.38677072525024414
    },
    {
      "epoch": 7.4053955078125e-05,
      "model_forward_time": 0.11521744728088379,
      "step": 12133
    },
    {
      "epoch": 7.4053955078125e-05,
      "step": 12133,
      "training_step_time": 0.7178871631622314
    },
    {
      "epoch": 7.406005859375e-05,
      "model_forward_time": 0.11476445198059082,
      "step": 12134
    },
    {
      "epoch": 7.406005859375e-05,
      "step": 12134,
      "training_step_time": 0.47623491287231445
    },
    {
      "epoch": 7.4066162109375e-05,
      "model_forward_time": 0.1147160530090332,
      "step": 12135
    },
    {
      "epoch": 7.4066162109375e-05,
      "step": 12135,
      "training_step_time": 0.4632580280303955
    },
    {
      "epoch": 7.4072265625e-05,
      "model_forward_time": 0.11454272270202637,
      "step": 12136
    },
    {
      "epoch": 7.4072265625e-05,
      "step": 12136,
      "training_step_time": 0.4779844284057617
    },
    {
      "epoch": 7.4078369140625e-05,
      "model_forward_time": 0.11422300338745117,
      "step": 12137
    },
    {
      "epoch": 7.4078369140625e-05,
      "step": 12137,
      "training_step_time": 0.46766114234924316
    },
    {
      "epoch": 7.408447265625e-05,
      "model_forward_time": 0.11419916152954102,
      "step": 12138
    },
    {
      "epoch": 7.408447265625e-05,
      "step": 12138,
      "training_step_time": 0.40594983100891113
    },
    {
      "epoch": 7.4090576171875e-05,
      "model_forward_time": 0.11400079727172852,
      "step": 12139
    },
    {
      "epoch": 7.4090576171875e-05,
      "step": 12139,
      "training_step_time": 0.40700268745422363
    },
    {
      "epoch": 7.40966796875e-05,
      "grad_norm": 0.1769542396068573,
      "learning_rate": 9.378875521569981e-05,
      "loss": 0.0561,
      "step": 12140
    },
    {
      "epoch": 7.40966796875e-05,
      "model_forward_time": 0.11465096473693848,
      "step": 12140
    },
    {
      "epoch": 7.40966796875e-05,
      "step": 12140,
      "training_step_time": 0.43033647537231445
    },
    {
      "epoch": 7.4102783203125e-05,
      "model_forward_time": 0.11509418487548828,
      "step": 12141
    },
    {
      "epoch": 7.4102783203125e-05,
      "step": 12141,
      "training_step_time": 0.3875603675842285
    },
    {
      "epoch": 7.410888671875e-05,
      "model_forward_time": 0.11489510536193848,
      "step": 12142
    },
    {
      "epoch": 7.410888671875e-05,
      "step": 12142,
      "training_step_time": 0.3846256732940674
    },
    {
      "epoch": 7.4114990234375e-05,
      "model_forward_time": 0.1150214672088623,
      "step": 12143
    },
    {
      "epoch": 7.4114990234375e-05,
      "step": 12143,
      "training_step_time": 0.43510961532592773
    },
    {
      "epoch": 7.412109375e-05,
      "model_forward_time": 0.1152486801147461,
      "step": 12144
    },
    {
      "epoch": 7.412109375e-05,
      "step": 12144,
      "training_step_time": 0.39547300338745117
    },
    {
      "epoch": 7.4127197265625e-05,
      "model_forward_time": 0.11492776870727539,
      "step": 12145
    },
    {
      "epoch": 7.4127197265625e-05,
      "step": 12145,
      "training_step_time": 0.6538307666778564
    },
    {
      "epoch": 7.413330078125e-05,
      "model_forward_time": 0.11461806297302246,
      "step": 12146
    },
    {
      "epoch": 7.413330078125e-05,
      "step": 12146,
      "training_step_time": 0.39188694953918457
    },
    {
      "epoch": 7.4139404296875e-05,
      "model_forward_time": 0.11535239219665527,
      "step": 12147
    },
    {
      "epoch": 7.4139404296875e-05,
      "step": 12147,
      "training_step_time": 0.3973836898803711
    },
    {
      "epoch": 7.41455078125e-05,
      "model_forward_time": 0.1146700382232666,
      "step": 12148
    },
    {
      "epoch": 7.41455078125e-05,
      "step": 12148,
      "training_step_time": 0.398695707321167
    },
    {
      "epoch": 7.4151611328125e-05,
      "model_forward_time": 0.11502838134765625,
      "step": 12149
    },
    {
      "epoch": 7.4151611328125e-05,
      "step": 12149,
      "training_step_time": 0.365614652633667
    },
    {
      "epoch": 7.415771484375e-05,
      "grad_norm": 0.19184498488903046,
      "learning_rate": 9.377544586763215e-05,
      "loss": 0.0572,
      "step": 12150
    },
    {
      "epoch": 7.415771484375e-05,
      "model_forward_time": 0.11506104469299316,
      "step": 12150
    },
    {
      "epoch": 7.415771484375e-05,
      "step": 12150,
      "training_step_time": 0.4788217544555664
    },
    {
      "epoch": 7.4163818359375e-05,
      "model_forward_time": 0.11458683013916016,
      "step": 12151
    },
    {
      "epoch": 7.4163818359375e-05,
      "step": 12151,
      "training_step_time": 0.5889718532562256
    },
    {
      "epoch": 7.4169921875e-05,
      "model_forward_time": 0.1147150993347168,
      "step": 12152
    },
    {
      "epoch": 7.4169921875e-05,
      "step": 12152,
      "training_step_time": 0.41757750511169434
    },
    {
      "epoch": 7.4176025390625e-05,
      "model_forward_time": 0.11419391632080078,
      "step": 12153
    },
    {
      "epoch": 7.4176025390625e-05,
      "step": 12153,
      "training_step_time": 0.41187381744384766
    },
    {
      "epoch": 7.418212890625e-05,
      "model_forward_time": 0.11524081230163574,
      "step": 12154
    },
    {
      "epoch": 7.418212890625e-05,
      "step": 12154,
      "training_step_time": 0.414170503616333
    },
    {
      "epoch": 7.4188232421875e-05,
      "model_forward_time": 0.11508679389953613,
      "step": 12155
    },
    {
      "epoch": 7.4188232421875e-05,
      "step": 12155,
      "training_step_time": 0.3969459533691406
    },
    {
      "epoch": 7.41943359375e-05,
      "model_forward_time": 0.11477303504943848,
      "step": 12156
    },
    {
      "epoch": 7.41943359375e-05,
      "step": 12156,
      "training_step_time": 0.45034170150756836
    },
    {
      "epoch": 7.4200439453125e-05,
      "model_forward_time": 0.11533498764038086,
      "step": 12157
    },
    {
      "epoch": 7.4200439453125e-05,
      "step": 12157,
      "training_step_time": 0.627593994140625
    },
    {
      "epoch": 7.420654296875e-05,
      "model_forward_time": 0.11430072784423828,
      "step": 12158
    },
    {
      "epoch": 7.420654296875e-05,
      "step": 12158,
      "training_step_time": 0.39534997940063477
    },
    {
      "epoch": 7.4212646484375e-05,
      "model_forward_time": 0.11438751220703125,
      "step": 12159
    },
    {
      "epoch": 7.4212646484375e-05,
      "step": 12159,
      "training_step_time": 0.40301513671875
    },
    {
      "epoch": 7.421875e-05,
      "grad_norm": 0.1632259488105774,
      "learning_rate": 9.376212322173985e-05,
      "loss": 0.063,
      "step": 12160
    },
    {
      "epoch": 7.421875e-05,
      "model_forward_time": 0.11462903022766113,
      "step": 12160
    },
    {
      "epoch": 7.421875e-05,
      "step": 12160,
      "training_step_time": 0.4029734134674072
    },
    {
      "epoch": 7.4224853515625e-05,
      "model_forward_time": 0.11482810974121094,
      "step": 12161
    },
    {
      "epoch": 7.4224853515625e-05,
      "step": 12161,
      "training_step_time": 0.40081310272216797
    },
    {
      "epoch": 7.423095703125e-05,
      "model_forward_time": 0.11485099792480469,
      "step": 12162
    },
    {
      "epoch": 7.423095703125e-05,
      "step": 12162,
      "training_step_time": 0.4574429988861084
    },
    {
      "epoch": 7.4237060546875e-05,
      "model_forward_time": 0.11522293090820312,
      "step": 12163
    },
    {
      "epoch": 7.4237060546875e-05,
      "step": 12163,
      "training_step_time": 0.701099157333374
    },
    {
      "epoch": 7.42431640625e-05,
      "model_forward_time": 0.1144869327545166,
      "step": 12164
    },
    {
      "epoch": 7.42431640625e-05,
      "step": 12164,
      "training_step_time": 0.4712662696838379
    },
    {
      "epoch": 7.4249267578125e-05,
      "model_forward_time": 0.11385631561279297,
      "step": 12165
    },
    {
      "epoch": 7.4249267578125e-05,
      "step": 12165,
      "training_step_time": 0.3825099468231201
    },
    {
      "epoch": 7.425537109375e-05,
      "model_forward_time": 0.11441230773925781,
      "step": 12166
    },
    {
      "epoch": 7.425537109375e-05,
      "step": 12166,
      "training_step_time": 0.4318583011627197
    },
    {
      "epoch": 7.4261474609375e-05,
      "model_forward_time": 0.11508440971374512,
      "step": 12167
    },
    {
      "epoch": 7.4261474609375e-05,
      "step": 12167,
      "training_step_time": 0.45267701148986816
    },
    {
      "epoch": 7.4267578125e-05,
      "model_forward_time": 0.11419963836669922,
      "step": 12168
    },
    {
      "epoch": 7.4267578125e-05,
      "step": 12168,
      "training_step_time": 0.41713571548461914
    },
    {
      "epoch": 7.4273681640625e-05,
      "model_forward_time": 0.11549234390258789,
      "step": 12169
    },
    {
      "epoch": 7.4273681640625e-05,
      "step": 12169,
      "training_step_time": 0.4517548084259033
    },
    {
      "epoch": 7.427978515625e-05,
      "grad_norm": 0.14007796347141266,
      "learning_rate": 9.374878728206999e-05,
      "loss": 0.0554,
      "step": 12170
    },
    {
      "epoch": 7.427978515625e-05,
      "model_forward_time": 0.1143953800201416,
      "step": 12170
    },
    {
      "epoch": 7.427978515625e-05,
      "step": 12170,
      "training_step_time": 0.3995521068572998
    },
    {
      "epoch": 7.4285888671875e-05,
      "model_forward_time": 0.11493372917175293,
      "step": 12171
    },
    {
      "epoch": 7.4285888671875e-05,
      "step": 12171,
      "training_step_time": 0.4005565643310547
    },
    {
      "epoch": 7.42919921875e-05,
      "model_forward_time": 0.1148061752319336,
      "step": 12172
    },
    {
      "epoch": 7.42919921875e-05,
      "step": 12172,
      "training_step_time": 0.4003732204437256
    },
    {
      "epoch": 7.4298095703125e-05,
      "model_forward_time": 0.11493659019470215,
      "step": 12173
    },
    {
      "epoch": 7.4298095703125e-05,
      "step": 12173,
      "training_step_time": 0.4017374515533447
    },
    {
      "epoch": 7.430419921875e-05,
      "model_forward_time": 0.11516642570495605,
      "step": 12174
    },
    {
      "epoch": 7.430419921875e-05,
      "step": 12174,
      "training_step_time": 0.38184547424316406
    },
    {
      "epoch": 7.4310302734375e-05,
      "model_forward_time": 0.11509060859680176,
      "step": 12175
    },
    {
      "epoch": 7.4310302734375e-05,
      "step": 12175,
      "training_step_time": 0.6682581901550293
    },
    {
      "epoch": 7.431640625e-05,
      "model_forward_time": 0.11630439758300781,
      "step": 12176
    },
    {
      "epoch": 7.431640625e-05,
      "step": 12176,
      "training_step_time": 0.43059682846069336
    },
    {
      "epoch": 7.4322509765625e-05,
      "model_forward_time": 0.11710309982299805,
      "step": 12177
    },
    {
      "epoch": 7.4322509765625e-05,
      "step": 12177,
      "training_step_time": 0.6274008750915527
    },
    {
      "epoch": 7.432861328125e-05,
      "model_forward_time": 0.11772346496582031,
      "step": 12178
    },
    {
      "epoch": 7.432861328125e-05,
      "step": 12178,
      "training_step_time": 0.6997137069702148
    },
    {
      "epoch": 7.4334716796875e-05,
      "model_forward_time": 0.11840105056762695,
      "step": 12179
    },
    {
      "epoch": 7.4334716796875e-05,
      "step": 12179,
      "training_step_time": 0.5811755657196045
    },
    {
      "epoch": 7.43408203125e-05,
      "grad_norm": 0.15713590383529663,
      "learning_rate": 9.373543805267368e-05,
      "loss": 0.0574,
      "step": 12180
    },
    {
      "epoch": 7.43408203125e-05,
      "model_forward_time": 0.14325189590454102,
      "step": 12180
    },
    {
      "epoch": 7.43408203125e-05,
      "step": 12180,
      "training_step_time": 0.7691769599914551
    },
    {
      "epoch": 7.4346923828125e-05,
      "model_forward_time": 0.14017724990844727,
      "step": 12181
    },
    {
      "epoch": 7.4346923828125e-05,
      "step": 12181,
      "training_step_time": 0.6581466197967529
    },
    {
      "epoch": 7.435302734375e-05,
      "model_forward_time": 0.12562060356140137,
      "step": 12182
    },
    {
      "epoch": 7.435302734375e-05,
      "step": 12182,
      "training_step_time": 0.6502118110656738
    },
    {
      "epoch": 7.4359130859375e-05,
      "model_forward_time": 0.11958622932434082,
      "step": 12183
    },
    {
      "epoch": 7.4359130859375e-05,
      "step": 12183,
      "training_step_time": 0.6975297927856445
    },
    {
      "epoch": 7.4365234375e-05,
      "model_forward_time": 0.11739683151245117,
      "step": 12184
    },
    {
      "epoch": 7.4365234375e-05,
      "step": 12184,
      "training_step_time": 0.6444566249847412
    },
    {
      "epoch": 7.4371337890625e-05,
      "model_forward_time": 0.12302207946777344,
      "step": 12185
    },
    {
      "epoch": 7.4371337890625e-05,
      "step": 12185,
      "training_step_time": 0.6615972518920898
    },
    {
      "epoch": 7.437744140625e-05,
      "model_forward_time": 0.12571024894714355,
      "step": 12186
    },
    {
      "epoch": 7.437744140625e-05,
      "step": 12186,
      "training_step_time": 0.7837913036346436
    },
    {
      "epoch": 7.4383544921875e-05,
      "model_forward_time": 0.11610102653503418,
      "step": 12187
    },
    {
      "epoch": 7.4383544921875e-05,
      "step": 12187,
      "training_step_time": 0.6388797760009766
    },
    {
      "epoch": 7.43896484375e-05,
      "model_forward_time": 0.11937475204467773,
      "step": 12188
    },
    {
      "epoch": 7.43896484375e-05,
      "step": 12188,
      "training_step_time": 0.6590020656585693
    },
    {
      "epoch": 7.4395751953125e-05,
      "model_forward_time": 0.12030529975891113,
      "step": 12189
    },
    {
      "epoch": 7.4395751953125e-05,
      "step": 12189,
      "training_step_time": 0.7945642471313477
    },
    {
      "epoch": 7.440185546875e-05,
      "grad_norm": 0.16391371190547943,
      "learning_rate": 9.372207553760603e-05,
      "loss": 0.0635,
      "step": 12190
    },
    {
      "epoch": 7.440185546875e-05,
      "model_forward_time": 0.12067770957946777,
      "step": 12190
    },
    {
      "epoch": 7.440185546875e-05,
      "step": 12190,
      "training_step_time": 0.6284563541412354
    },
    {
      "epoch": 7.4407958984375e-05,
      "model_forward_time": 0.11868882179260254,
      "step": 12191
    },
    {
      "epoch": 7.4407958984375e-05,
      "step": 12191,
      "training_step_time": 0.6466488838195801
    },
    {
      "epoch": 7.44140625e-05,
      "model_forward_time": 0.12260031700134277,
      "step": 12192
    },
    {
      "epoch": 7.44140625e-05,
      "step": 12192,
      "training_step_time": 0.6651608943939209
    },
    {
      "epoch": 7.4420166015625e-05,
      "model_forward_time": 0.12676787376403809,
      "step": 12193
    },
    {
      "epoch": 7.4420166015625e-05,
      "step": 12193,
      "training_step_time": 0.6524941921234131
    },
    {
      "epoch": 7.442626953125e-05,
      "model_forward_time": 0.12488794326782227,
      "step": 12194
    },
    {
      "epoch": 7.442626953125e-05,
      "step": 12194,
      "training_step_time": 0.726057767868042
    },
    {
      "epoch": 7.4432373046875e-05,
      "model_forward_time": 0.1200096607208252,
      "step": 12195
    },
    {
      "epoch": 7.4432373046875e-05,
      "step": 12195,
      "training_step_time": 0.6166675090789795
    },
    {
      "epoch": 7.44384765625e-05,
      "model_forward_time": 0.11926984786987305,
      "step": 12196
    },
    {
      "epoch": 7.44384765625e-05,
      "step": 12196,
      "training_step_time": 0.7059378623962402
    },
    {
      "epoch": 7.4444580078125e-05,
      "model_forward_time": 0.11728453636169434,
      "step": 12197
    },
    {
      "epoch": 7.4444580078125e-05,
      "step": 12197,
      "training_step_time": 0.6672468185424805
    },
    {
      "epoch": 7.445068359375e-05,
      "model_forward_time": 0.11663508415222168,
      "step": 12198
    },
    {
      "epoch": 7.445068359375e-05,
      "step": 12198,
      "training_step_time": 0.8600246906280518
    },
    {
      "epoch": 7.4456787109375e-05,
      "model_forward_time": 0.11645245552062988,
      "step": 12199
    },
    {
      "epoch": 7.4456787109375e-05,
      "step": 12199,
      "training_step_time": 0.6877725124359131
    },
    {
      "epoch": 7.4462890625e-05,
      "grad_norm": 0.1815832108259201,
      "learning_rate": 9.370869974092629e-05,
      "loss": 0.0607,
      "step": 12200
    },
    {
      "epoch": 7.4462890625e-05,
      "model_forward_time": 0.11865806579589844,
      "step": 12200
    },
    {
      "epoch": 7.4462890625e-05,
      "step": 12200,
      "training_step_time": 0.6989567279815674
    },
    {
      "epoch": 7.4468994140625e-05,
      "model_forward_time": 0.1220698356628418,
      "step": 12201
    },
    {
      "epoch": 7.4468994140625e-05,
      "step": 12201,
      "training_step_time": 0.6591839790344238
    },
    {
      "epoch": 7.447509765625e-05,
      "model_forward_time": 0.11905932426452637,
      "step": 12202
    },
    {
      "epoch": 7.447509765625e-05,
      "step": 12202,
      "training_step_time": 0.6709918975830078
    },
    {
      "epoch": 7.4481201171875e-05,
      "model_forward_time": 0.12035322189331055,
      "step": 12203
    },
    {
      "epoch": 7.4481201171875e-05,
      "step": 12203,
      "training_step_time": 0.616356372833252
    },
    {
      "epoch": 7.44873046875e-05,
      "model_forward_time": 0.12718534469604492,
      "step": 12204
    },
    {
      "epoch": 7.44873046875e-05,
      "step": 12204,
      "training_step_time": 0.6941945552825928
    },
    {
      "epoch": 7.4493408203125e-05,
      "model_forward_time": 0.1269984245300293,
      "step": 12205
    },
    {
      "epoch": 7.4493408203125e-05,
      "step": 12205,
      "training_step_time": 0.690014123916626
    },
    {
      "epoch": 7.449951171875e-05,
      "model_forward_time": 0.11905622482299805,
      "step": 12206
    },
    {
      "epoch": 7.449951171875e-05,
      "step": 12206,
      "training_step_time": 0.7177231311798096
    },
    {
      "epoch": 7.4505615234375e-05,
      "model_forward_time": 0.12239575386047363,
      "step": 12207
    },
    {
      "epoch": 7.4505615234375e-05,
      "step": 12207,
      "training_step_time": 0.669525146484375
    },
    {
      "epoch": 7.451171875e-05,
      "model_forward_time": 0.1190330982208252,
      "step": 12208
    },
    {
      "epoch": 7.451171875e-05,
      "step": 12208,
      "training_step_time": 0.6955134868621826
    },
    {
      "epoch": 7.4517822265625e-05,
      "model_forward_time": 0.11855483055114746,
      "step": 12209
    },
    {
      "epoch": 7.4517822265625e-05,
      "step": 12209,
      "training_step_time": 0.6348614692687988
    },
    {
      "epoch": 7.452392578125e-05,
      "grad_norm": 0.25320306420326233,
      "learning_rate": 9.369531066669758e-05,
      "loss": 0.0673,
      "step": 12210
    },
    {
      "epoch": 7.452392578125e-05,
      "model_forward_time": 0.11958456039428711,
      "step": 12210
    },
    {
      "epoch": 7.452392578125e-05,
      "step": 12210,
      "training_step_time": 0.6505508422851562
    },
    {
      "epoch": 7.4530029296875e-05,
      "model_forward_time": 0.14654326438903809,
      "step": 12211
    },
    {
      "epoch": 7.4530029296875e-05,
      "step": 12211,
      "training_step_time": 0.6175475120544434
    },
    {
      "epoch": 7.45361328125e-05,
      "model_forward_time": 0.1172933578491211,
      "step": 12212
    },
    {
      "epoch": 7.45361328125e-05,
      "step": 12212,
      "training_step_time": 0.7008011341094971
    },
    {
      "epoch": 7.4542236328125e-05,
      "model_forward_time": 0.11846613883972168,
      "step": 12213
    },
    {
      "epoch": 7.4542236328125e-05,
      "step": 12213,
      "training_step_time": 0.6697690486907959
    },
    {
      "epoch": 7.454833984375e-05,
      "model_forward_time": 0.13210821151733398,
      "step": 12214
    },
    {
      "epoch": 7.454833984375e-05,
      "step": 12214,
      "training_step_time": 0.7598638534545898
    },
    {
      "epoch": 7.4554443359375e-05,
      "model_forward_time": 0.12630844116210938,
      "step": 12215
    },
    {
      "epoch": 7.4554443359375e-05,
      "step": 12215,
      "training_step_time": 0.6684315204620361
    },
    {
      "epoch": 7.4560546875e-05,
      "model_forward_time": 0.11734962463378906,
      "step": 12216
    },
    {
      "epoch": 7.4560546875e-05,
      "step": 12216,
      "training_step_time": 0.8347437381744385
    },
    {
      "epoch": 7.4566650390625e-05,
      "model_forward_time": 0.11762452125549316,
      "step": 12217
    },
    {
      "epoch": 7.4566650390625e-05,
      "step": 12217,
      "training_step_time": 0.6316580772399902
    },
    {
      "epoch": 7.457275390625e-05,
      "model_forward_time": 0.11957311630249023,
      "step": 12218
    },
    {
      "epoch": 7.457275390625e-05,
      "step": 12218,
      "training_step_time": 0.5938591957092285
    },
    {
      "epoch": 7.4578857421875e-05,
      "model_forward_time": 0.11541056632995605,
      "step": 12219
    },
    {
      "epoch": 7.4578857421875e-05,
      "step": 12219,
      "training_step_time": 0.6047918796539307
    },
    {
      "epoch": 7.45849609375e-05,
      "grad_norm": 0.17155633866786957,
      "learning_rate": 9.368190831898724e-05,
      "loss": 0.0669,
      "step": 12220
    },
    {
      "epoch": 7.45849609375e-05,
      "model_forward_time": 0.11861491203308105,
      "step": 12220
    },
    {
      "epoch": 7.45849609375e-05,
      "step": 12220,
      "training_step_time": 0.6601948738098145
    },
    {
      "epoch": 7.4591064453125e-05,
      "model_forward_time": 0.13383698463439941,
      "step": 12221
    },
    {
      "epoch": 7.4591064453125e-05,
      "step": 12221,
      "training_step_time": 0.695671796798706
    },
    {
      "epoch": 7.459716796875e-05,
      "model_forward_time": 0.1315314769744873,
      "step": 12222
    },
    {
      "epoch": 7.459716796875e-05,
      "step": 12222,
      "training_step_time": 0.7463588714599609
    },
    {
      "epoch": 7.4603271484375e-05,
      "model_forward_time": 0.12108445167541504,
      "step": 12223
    },
    {
      "epoch": 7.4603271484375e-05,
      "step": 12223,
      "training_step_time": 0.6809256076812744
    },
    {
      "epoch": 7.4609375e-05,
      "model_forward_time": 0.11642646789550781,
      "step": 12224
    },
    {
      "epoch": 7.4609375e-05,
      "step": 12224,
      "training_step_time": 0.6812803745269775
    },
    {
      "epoch": 7.4615478515625e-05,
      "model_forward_time": 0.11848282814025879,
      "step": 12225
    },
    {
      "epoch": 7.4615478515625e-05,
      "step": 12225,
      "training_step_time": 0.719679594039917
    },
    {
      "epoch": 7.462158203125e-05,
      "model_forward_time": 0.11941981315612793,
      "step": 12226
    },
    {
      "epoch": 7.462158203125e-05,
      "step": 12226,
      "training_step_time": 0.6678922176361084
    },
    {
      "epoch": 7.4627685546875e-05,
      "model_forward_time": 0.12164688110351562,
      "step": 12227
    },
    {
      "epoch": 7.4627685546875e-05,
      "step": 12227,
      "training_step_time": 0.8095085620880127
    },
    {
      "epoch": 7.46337890625e-05,
      "model_forward_time": 0.11682486534118652,
      "step": 12228
    },
    {
      "epoch": 7.46337890625e-05,
      "step": 12228,
      "training_step_time": 0.6457998752593994
    },
    {
      "epoch": 7.4639892578125e-05,
      "model_forward_time": 0.12239670753479004,
      "step": 12229
    },
    {
      "epoch": 7.4639892578125e-05,
      "step": 12229,
      "training_step_time": 0.6427671909332275
    },
    {
      "epoch": 7.464599609375e-05,
      "grad_norm": 0.24502591788768768,
      "learning_rate": 9.366849270186649e-05,
      "loss": 0.0724,
      "step": 12230
    },
    {
      "epoch": 7.464599609375e-05,
      "model_forward_time": 0.11849713325500488,
      "step": 12230
    },
    {
      "epoch": 7.464599609375e-05,
      "step": 12230,
      "training_step_time": 0.6506681442260742
    },
    {
      "epoch": 7.4652099609375e-05,
      "model_forward_time": 0.12293457984924316,
      "step": 12231
    },
    {
      "epoch": 7.4652099609375e-05,
      "step": 12231,
      "training_step_time": 0.6196279525756836
    },
    {
      "epoch": 7.4658203125e-05,
      "model_forward_time": 0.11713480949401855,
      "step": 12232
    },
    {
      "epoch": 7.4658203125e-05,
      "step": 12232,
      "training_step_time": 0.6666836738586426
    },
    {
      "epoch": 7.4664306640625e-05,
      "model_forward_time": 0.12537264823913574,
      "step": 12233
    },
    {
      "epoch": 7.4664306640625e-05,
      "step": 12233,
      "training_step_time": 0.6802408695220947
    },
    {
      "epoch": 7.467041015625e-05,
      "model_forward_time": 0.11587953567504883,
      "step": 12234
    },
    {
      "epoch": 7.467041015625e-05,
      "step": 12234,
      "training_step_time": 0.6370861530303955
    },
    {
      "epoch": 7.4676513671875e-05,
      "model_forward_time": 0.11965131759643555,
      "step": 12235
    },
    {
      "epoch": 7.4676513671875e-05,
      "step": 12235,
      "training_step_time": 0.7576866149902344
    },
    {
      "epoch": 7.46826171875e-05,
      "model_forward_time": 0.11878085136413574,
      "step": 12236
    },
    {
      "epoch": 7.46826171875e-05,
      "step": 12236,
      "training_step_time": 0.6352596282958984
    },
    {
      "epoch": 7.4688720703125e-05,
      "model_forward_time": 0.12420988082885742,
      "step": 12237
    },
    {
      "epoch": 7.4688720703125e-05,
      "step": 12237,
      "training_step_time": 0.5799202919006348
    },
    {
      "epoch": 7.469482421875e-05,
      "model_forward_time": 0.11649274826049805,
      "step": 12238
    },
    {
      "epoch": 7.469482421875e-05,
      "step": 12238,
      "training_step_time": 0.6475949287414551
    },
    {
      "epoch": 7.4700927734375e-05,
      "model_forward_time": 0.11858081817626953,
      "step": 12239
    },
    {
      "epoch": 7.4700927734375e-05,
      "step": 12239,
      "training_step_time": 0.6510281562805176
    },
    {
      "epoch": 7.470703125e-05,
      "grad_norm": 0.15460042655467987,
      "learning_rate": 9.365506381941066e-05,
      "loss": 0.062,
      "step": 12240
    },
    {
      "epoch": 7.470703125e-05,
      "model_forward_time": 0.11820435523986816,
      "step": 12240
    },
    {
      "epoch": 7.470703125e-05,
      "step": 12240,
      "training_step_time": 0.6246175765991211
    },
    {
      "epoch": 7.4713134765625e-05,
      "model_forward_time": 0.13894391059875488,
      "step": 12241
    },
    {
      "epoch": 7.4713134765625e-05,
      "step": 12241,
      "training_step_time": 0.6115124225616455
    },
    {
      "epoch": 7.471923828125e-05,
      "model_forward_time": 0.11997866630554199,
      "step": 12242
    },
    {
      "epoch": 7.471923828125e-05,
      "step": 12242,
      "training_step_time": 0.6425070762634277
    },
    {
      "epoch": 7.4725341796875e-05,
      "model_forward_time": 0.11882781982421875,
      "step": 12243
    },
    {
      "epoch": 7.4725341796875e-05,
      "step": 12243,
      "training_step_time": 0.6339871883392334
    },
    {
      "epoch": 7.47314453125e-05,
      "model_forward_time": 0.1198263168334961,
      "step": 12244
    },
    {
      "epoch": 7.47314453125e-05,
      "step": 12244,
      "training_step_time": 0.7035248279571533
    },
    {
      "epoch": 7.4737548828125e-05,
      "model_forward_time": 0.12943387031555176,
      "step": 12245
    },
    {
      "epoch": 7.4737548828125e-05,
      "step": 12245,
      "training_step_time": 0.7059063911437988
    },
    {
      "epoch": 7.474365234375e-05,
      "model_forward_time": 0.1211090087890625,
      "step": 12246
    },
    {
      "epoch": 7.474365234375e-05,
      "step": 12246,
      "training_step_time": 0.5837311744689941
    },
    {
      "epoch": 7.4749755859375e-05,
      "model_forward_time": 0.11768651008605957,
      "step": 12247
    },
    {
      "epoch": 7.4749755859375e-05,
      "step": 12247,
      "training_step_time": 0.4602823257446289
    },
    {
      "epoch": 7.4755859375e-05,
      "model_forward_time": 0.11789846420288086,
      "step": 12248
    },
    {
      "epoch": 7.4755859375e-05,
      "step": 12248,
      "training_step_time": 0.44794631004333496
    },
    {
      "epoch": 7.4761962890625e-05,
      "model_forward_time": 0.11802029609680176,
      "step": 12249
    },
    {
      "epoch": 7.4761962890625e-05,
      "step": 12249,
      "training_step_time": 0.40801453590393066
    },
    {
      "epoch": 7.476806640625e-05,
      "grad_norm": 0.16743110120296478,
      "learning_rate": 9.364162167569907e-05,
      "loss": 0.0635,
      "step": 12250
    },
    {
      "epoch": 7.476806640625e-05,
      "model_forward_time": 0.11699581146240234,
      "step": 12250
    },
    {
      "epoch": 7.476806640625e-05,
      "step": 12250,
      "training_step_time": 0.42584824562072754
    },
    {
      "epoch": 7.4774169921875e-05,
      "model_forward_time": 0.11585807800292969,
      "step": 12251
    },
    {
      "epoch": 7.4774169921875e-05,
      "step": 12251,
      "training_step_time": 0.43329691886901855
    },
    {
      "epoch": 7.47802734375e-05,
      "model_forward_time": 0.11584806442260742,
      "step": 12252
    },
    {
      "epoch": 7.47802734375e-05,
      "step": 12252,
      "training_step_time": 0.43947625160217285
    },
    {
      "epoch": 7.4786376953125e-05,
      "model_forward_time": 0.11533641815185547,
      "step": 12253
    },
    {
      "epoch": 7.4786376953125e-05,
      "step": 12253,
      "training_step_time": 0.4693582057952881
    },
    {
      "epoch": 7.479248046875e-05,
      "model_forward_time": 0.11500382423400879,
      "step": 12254
    },
    {
      "epoch": 7.479248046875e-05,
      "step": 12254,
      "training_step_time": 0.4263269901275635
    },
    {
      "epoch": 7.4798583984375e-05,
      "model_forward_time": 0.11577200889587402,
      "step": 12255
    },
    {
      "epoch": 7.4798583984375e-05,
      "step": 12255,
      "training_step_time": 0.4315836429595947
    },
    {
      "epoch": 7.48046875e-05,
      "model_forward_time": 0.11521720886230469,
      "step": 12256
    },
    {
      "epoch": 7.48046875e-05,
      "step": 12256,
      "training_step_time": 0.40233492851257324
    },
    {
      "epoch": 7.4810791015625e-05,
      "model_forward_time": 0.11544966697692871,
      "step": 12257
    },
    {
      "epoch": 7.4810791015625e-05,
      "step": 12257,
      "training_step_time": 0.4592752456665039
    },
    {
      "epoch": 7.481689453125e-05,
      "model_forward_time": 0.1154026985168457,
      "step": 12258
    },
    {
      "epoch": 7.481689453125e-05,
      "step": 12258,
      "training_step_time": 0.4458634853363037
    },
    {
      "epoch": 7.4822998046875e-05,
      "model_forward_time": 0.11576056480407715,
      "step": 12259
    },
    {
      "epoch": 7.4822998046875e-05,
      "step": 12259,
      "training_step_time": 0.45844364166259766
    },
    {
      "epoch": 7.48291015625e-05,
      "grad_norm": 0.21061943471431732,
      "learning_rate": 9.362816627481512e-05,
      "loss": 0.067,
      "step": 12260
    },
    {
      "epoch": 7.48291015625e-05,
      "model_forward_time": 0.11512231826782227,
      "step": 12260
    },
    {
      "epoch": 7.48291015625e-05,
      "step": 12260,
      "training_step_time": 0.39297962188720703
    },
    {
      "epoch": 7.4835205078125e-05,
      "model_forward_time": 0.13410139083862305,
      "step": 12261
    },
    {
      "epoch": 7.4835205078125e-05,
      "step": 12261,
      "training_step_time": 0.39391183853149414
    },
    {
      "epoch": 7.484130859375e-05,
      "model_forward_time": 0.11458992958068848,
      "step": 12262
    },
    {
      "epoch": 7.484130859375e-05,
      "step": 12262,
      "training_step_time": 0.39353227615356445
    },
    {
      "epoch": 7.4847412109375e-05,
      "model_forward_time": 0.11463165283203125,
      "step": 12263
    },
    {
      "epoch": 7.4847412109375e-05,
      "step": 12263,
      "training_step_time": 0.3903963565826416
    },
    {
      "epoch": 7.4853515625e-05,
      "model_forward_time": 0.11545801162719727,
      "step": 12264
    },
    {
      "epoch": 7.4853515625e-05,
      "step": 12264,
      "training_step_time": 0.38974881172180176
    },
    {
      "epoch": 7.4859619140625e-05,
      "model_forward_time": 0.11537742614746094,
      "step": 12265
    },
    {
      "epoch": 7.4859619140625e-05,
      "step": 12265,
      "training_step_time": 0.43825745582580566
    },
    {
      "epoch": 7.486572265625e-05,
      "model_forward_time": 0.11544418334960938,
      "step": 12266
    },
    {
      "epoch": 7.486572265625e-05,
      "step": 12266,
      "training_step_time": 0.4739112854003906
    },
    {
      "epoch": 7.4871826171875e-05,
      "model_forward_time": 0.11523318290710449,
      "step": 12267
    },
    {
      "epoch": 7.4871826171875e-05,
      "step": 12267,
      "training_step_time": 0.43662118911743164
    },
    {
      "epoch": 7.48779296875e-05,
      "model_forward_time": 0.11547207832336426,
      "step": 12268
    },
    {
      "epoch": 7.48779296875e-05,
      "step": 12268,
      "training_step_time": 0.3955678939819336
    },
    {
      "epoch": 7.4884033203125e-05,
      "model_forward_time": 0.11502647399902344,
      "step": 12269
    },
    {
      "epoch": 7.4884033203125e-05,
      "step": 12269,
      "training_step_time": 0.43036317825317383
    },
    {
      "epoch": 7.489013671875e-05,
      "grad_norm": 0.21182763576507568,
      "learning_rate": 9.36146976208462e-05,
      "loss": 0.0678,
      "step": 12270
    },
    {
      "epoch": 7.489013671875e-05,
      "model_forward_time": 0.11525273323059082,
      "step": 12270
    },
    {
      "epoch": 7.489013671875e-05,
      "step": 12270,
      "training_step_time": 0.40352296829223633
    },
    {
      "epoch": 7.4896240234375e-05,
      "model_forward_time": 0.11511850357055664,
      "step": 12271
    },
    {
      "epoch": 7.4896240234375e-05,
      "step": 12271,
      "training_step_time": 0.36570286750793457
    },
    {
      "epoch": 7.490234375e-05,
      "model_forward_time": 0.11552643775939941,
      "step": 12272
    },
    {
      "epoch": 7.490234375e-05,
      "step": 12272,
      "training_step_time": 0.4373149871826172
    },
    {
      "epoch": 7.4908447265625e-05,
      "model_forward_time": 0.11479806900024414,
      "step": 12273
    },
    {
      "epoch": 7.4908447265625e-05,
      "step": 12273,
      "training_step_time": 0.47378063201904297
    },
    {
      "epoch": 7.491455078125e-05,
      "model_forward_time": 0.11594533920288086,
      "step": 12274
    },
    {
      "epoch": 7.491455078125e-05,
      "step": 12274,
      "training_step_time": 0.44420862197875977
    },
    {
      "epoch": 7.4920654296875e-05,
      "model_forward_time": 0.11519813537597656,
      "step": 12275
    },
    {
      "epoch": 7.4920654296875e-05,
      "step": 12275,
      "training_step_time": 0.38776350021362305
    },
    {
      "epoch": 7.49267578125e-05,
      "model_forward_time": 0.11602544784545898,
      "step": 12276
    },
    {
      "epoch": 7.49267578125e-05,
      "step": 12276,
      "training_step_time": 0.3948831558227539
    },
    {
      "epoch": 7.4932861328125e-05,
      "model_forward_time": 0.11545085906982422,
      "step": 12277
    },
    {
      "epoch": 7.4932861328125e-05,
      "step": 12277,
      "training_step_time": 0.3938577175140381
    },
    {
      "epoch": 7.493896484375e-05,
      "model_forward_time": 0.11537861824035645,
      "step": 12278
    },
    {
      "epoch": 7.493896484375e-05,
      "step": 12278,
      "training_step_time": 0.42605018615722656
    },
    {
      "epoch": 7.4945068359375e-05,
      "model_forward_time": 0.11567974090576172,
      "step": 12279
    },
    {
      "epoch": 7.4945068359375e-05,
      "step": 12279,
      "training_step_time": 0.4038882255554199
    },
    {
      "epoch": 7.4951171875e-05,
      "grad_norm": 0.15520799160003662,
      "learning_rate": 9.360121571788371e-05,
      "loss": 0.0637,
      "step": 12280
    },
    {
      "epoch": 7.4951171875e-05,
      "model_forward_time": 0.11562228202819824,
      "step": 12280
    },
    {
      "epoch": 7.4951171875e-05,
      "step": 12280,
      "training_step_time": 0.39589500427246094
    },
    {
      "epoch": 7.4957275390625e-05,
      "model_forward_time": 0.11518740653991699,
      "step": 12281
    },
    {
      "epoch": 7.4957275390625e-05,
      "step": 12281,
      "training_step_time": 0.39973950386047363
    },
    {
      "epoch": 7.496337890625e-05,
      "model_forward_time": 0.11548495292663574,
      "step": 12282
    },
    {
      "epoch": 7.496337890625e-05,
      "step": 12282,
      "training_step_time": 0.4212636947631836
    },
    {
      "epoch": 7.4969482421875e-05,
      "model_forward_time": 0.11555886268615723,
      "step": 12283
    },
    {
      "epoch": 7.4969482421875e-05,
      "step": 12283,
      "training_step_time": 0.41436171531677246
    },
    {
      "epoch": 7.49755859375e-05,
      "model_forward_time": 0.11594510078430176,
      "step": 12284
    },
    {
      "epoch": 7.49755859375e-05,
      "step": 12284,
      "training_step_time": 0.4434316158294678
    },
    {
      "epoch": 7.4981689453125e-05,
      "model_forward_time": 0.1152498722076416,
      "step": 12285
    },
    {
      "epoch": 7.4981689453125e-05,
      "step": 12285,
      "training_step_time": 0.39743566513061523
    },
    {
      "epoch": 7.498779296875e-05,
      "model_forward_time": 0.11518287658691406,
      "step": 12286
    },
    {
      "epoch": 7.498779296875e-05,
      "step": 12286,
      "training_step_time": 0.3670964241027832
    },
    {
      "epoch": 7.4993896484375e-05,
      "model_forward_time": 0.11527729034423828,
      "step": 12287
    },
    {
      "epoch": 7.4993896484375e-05,
      "step": 12287,
      "training_step_time": 0.7234292030334473
    },
    {
      "epoch": 7.5e-05,
      "model_forward_time": 0.11443066596984863,
      "step": 12288
    },
    {
      "epoch": 7.5e-05,
      "step": 12288,
      "training_step_time": 0.4368422031402588
    },
    {
      "epoch": 7.5006103515625e-05,
      "model_forward_time": 0.11499404907226562,
      "step": 12289
    },
    {
      "epoch": 7.5006103515625e-05,
      "step": 12289,
      "training_step_time": 0.38298845291137695
    },
    {
      "epoch": 7.501220703125e-05,
      "grad_norm": 0.19559241831302643,
      "learning_rate": 9.358772057002312e-05,
      "loss": 0.0616,
      "step": 12290
    },
    {
      "epoch": 7.501220703125e-05,
      "model_forward_time": 0.11434578895568848,
      "step": 12290
    },
    {
      "epoch": 7.501220703125e-05,
      "step": 12290,
      "training_step_time": 0.38124990463256836
    },
    {
      "epoch": 7.5018310546875e-05,
      "model_forward_time": 0.1150350570678711,
      "step": 12291
    },
    {
      "epoch": 7.5018310546875e-05,
      "step": 12291,
      "training_step_time": 0.4053325653076172
    },
    {
      "epoch": 7.50244140625e-05,
      "model_forward_time": 0.11422324180603027,
      "step": 12292
    },
    {
      "epoch": 7.50244140625e-05,
      "step": 12292,
      "training_step_time": 0.4049382209777832
    },
    {
      "epoch": 7.5030517578125e-05,
      "model_forward_time": 0.11480712890625,
      "step": 12293
    },
    {
      "epoch": 7.5030517578125e-05,
      "step": 12293,
      "training_step_time": 0.7275617122650146
    },
    {
      "epoch": 7.503662109375e-05,
      "model_forward_time": 0.11410856246948242,
      "step": 12294
    },
    {
      "epoch": 7.503662109375e-05,
      "step": 12294,
      "training_step_time": 0.39275074005126953
    },
    {
      "epoch": 7.5042724609375e-05,
      "model_forward_time": 0.11470484733581543,
      "step": 12295
    },
    {
      "epoch": 7.5042724609375e-05,
      "step": 12295,
      "training_step_time": 0.3872721195220947
    },
    {
      "epoch": 7.5048828125e-05,
      "model_forward_time": 0.11490178108215332,
      "step": 12296
    },
    {
      "epoch": 7.5048828125e-05,
      "step": 12296,
      "training_step_time": 0.4102189540863037
    },
    {
      "epoch": 7.5054931640625e-05,
      "model_forward_time": 0.11418938636779785,
      "step": 12297
    },
    {
      "epoch": 7.5054931640625e-05,
      "step": 12297,
      "training_step_time": 0.3969719409942627
    },
    {
      "epoch": 7.506103515625e-05,
      "model_forward_time": 0.11463761329650879,
      "step": 12298
    },
    {
      "epoch": 7.506103515625e-05,
      "step": 12298,
      "training_step_time": 0.48359155654907227
    },
    {
      "epoch": 7.5067138671875e-05,
      "model_forward_time": 0.11565542221069336,
      "step": 12299
    },
    {
      "epoch": 7.5067138671875e-05,
      "step": 12299,
      "training_step_time": 0.6070058345794678
    },
    {
      "epoch": 7.50732421875e-05,
      "grad_norm": 0.16840727627277374,
      "learning_rate": 9.357421218136386e-05,
      "loss": 0.0601,
      "step": 12300
    },
    {
      "epoch": 7.50732421875e-05,
      "model_forward_time": 0.1141824722290039,
      "step": 12300
    },
    {
      "epoch": 7.50732421875e-05,
      "step": 12300,
      "training_step_time": 0.43113064765930176
    },
    {
      "epoch": 7.5079345703125e-05,
      "model_forward_time": 0.11473298072814941,
      "step": 12301
    },
    {
      "epoch": 7.5079345703125e-05,
      "step": 12301,
      "training_step_time": 0.42208218574523926
    },
    {
      "epoch": 7.508544921875e-05,
      "model_forward_time": 0.11461806297302246,
      "step": 12302
    },
    {
      "epoch": 7.508544921875e-05,
      "step": 12302,
      "training_step_time": 0.3996720314025879
    },
    {
      "epoch": 7.5091552734375e-05,
      "model_forward_time": 0.11400914192199707,
      "step": 12303
    },
    {
      "epoch": 7.5091552734375e-05,
      "step": 12303,
      "training_step_time": 0.39235806465148926
    },
    {
      "epoch": 7.509765625e-05,
      "model_forward_time": 0.11479425430297852,
      "step": 12304
    },
    {
      "epoch": 7.509765625e-05,
      "step": 12304,
      "training_step_time": 0.4336659908294678
    },
    {
      "epoch": 7.5103759765625e-05,
      "model_forward_time": 0.11524033546447754,
      "step": 12305
    },
    {
      "epoch": 7.5103759765625e-05,
      "step": 12305,
      "training_step_time": 0.5640075206756592
    },
    {
      "epoch": 7.510986328125e-05,
      "model_forward_time": 0.11444258689880371,
      "step": 12306
    },
    {
      "epoch": 7.510986328125e-05,
      "step": 12306,
      "training_step_time": 0.3903672695159912
    },
    {
      "epoch": 7.5115966796875e-05,
      "model_forward_time": 0.11607098579406738,
      "step": 12307
    },
    {
      "epoch": 7.5115966796875e-05,
      "step": 12307,
      "training_step_time": 0.3856210708618164
    },
    {
      "epoch": 7.51220703125e-05,
      "model_forward_time": 0.11479473114013672,
      "step": 12308
    },
    {
      "epoch": 7.51220703125e-05,
      "step": 12308,
      "training_step_time": 0.39418506622314453
    },
    {
      "epoch": 7.5128173828125e-05,
      "model_forward_time": 0.11538553237915039,
      "step": 12309
    },
    {
      "epoch": 7.5128173828125e-05,
      "step": 12309,
      "training_step_time": 0.40010952949523926
    },
    {
      "epoch": 7.513427734375e-05,
      "grad_norm": 0.17971152067184448,
      "learning_rate": 9.356069055600948e-05,
      "loss": 0.0595,
      "step": 12310
    },
    {
      "epoch": 7.513427734375e-05,
      "model_forward_time": 0.11533832550048828,
      "step": 12310
    },
    {
      "epoch": 7.513427734375e-05,
      "step": 12310,
      "training_step_time": 0.43363165855407715
    },
    {
      "epoch": 7.5140380859375e-05,
      "model_forward_time": 0.11528491973876953,
      "step": 12311
    },
    {
      "epoch": 7.5140380859375e-05,
      "step": 12311,
      "training_step_time": 0.9073965549468994
    },
    {
      "epoch": 7.5146484375e-05,
      "model_forward_time": 0.11507534980773926,
      "step": 12312
    },
    {
      "epoch": 7.5146484375e-05,
      "step": 12312,
      "training_step_time": 0.48229122161865234
    },
    {
      "epoch": 7.5152587890625e-05,
      "model_forward_time": 0.11477160453796387,
      "step": 12313
    },
    {
      "epoch": 7.5152587890625e-05,
      "step": 12313,
      "training_step_time": 0.496776819229126
    },
    {
      "epoch": 7.515869140625e-05,
      "model_forward_time": 0.11433577537536621,
      "step": 12314
    },
    {
      "epoch": 7.515869140625e-05,
      "step": 12314,
      "training_step_time": 0.40631985664367676
    },
    {
      "epoch": 7.5164794921875e-05,
      "model_forward_time": 0.1139838695526123,
      "step": 12315
    },
    {
      "epoch": 7.5164794921875e-05,
      "step": 12315,
      "training_step_time": 0.4387524127960205
    },
    {
      "epoch": 7.51708984375e-05,
      "model_forward_time": 0.1140286922454834,
      "step": 12316
    },
    {
      "epoch": 7.51708984375e-05,
      "step": 12316,
      "training_step_time": 0.39853763580322266
    },
    {
      "epoch": 7.5177001953125e-05,
      "model_forward_time": 0.11459660530090332,
      "step": 12317
    },
    {
      "epoch": 7.5177001953125e-05,
      "step": 12317,
      "training_step_time": 0.6022162437438965
    },
    {
      "epoch": 7.518310546875e-05,
      "model_forward_time": 0.11501359939575195,
      "step": 12318
    },
    {
      "epoch": 7.518310546875e-05,
      "step": 12318,
      "training_step_time": 0.3852987289428711
    },
    {
      "epoch": 7.5189208984375e-05,
      "model_forward_time": 0.11491250991821289,
      "step": 12319
    },
    {
      "epoch": 7.5189208984375e-05,
      "step": 12319,
      "training_step_time": 0.384854793548584
    },
    {
      "epoch": 7.51953125e-05,
      "grad_norm": 0.1816098988056183,
      "learning_rate": 9.354715569806744e-05,
      "loss": 0.0614,
      "step": 12320
    },
    {
      "epoch": 7.51953125e-05,
      "model_forward_time": 0.11449098587036133,
      "step": 12320
    },
    {
      "epoch": 7.51953125e-05,
      "step": 12320,
      "training_step_time": 0.38204526901245117
    },
    {
      "epoch": 7.5201416015625e-05,
      "model_forward_time": 0.11465644836425781,
      "step": 12321
    },
    {
      "epoch": 7.5201416015625e-05,
      "step": 12321,
      "training_step_time": 0.39557719230651855
    },
    {
      "epoch": 7.520751953125e-05,
      "model_forward_time": 0.1162266731262207,
      "step": 12322
    },
    {
      "epoch": 7.520751953125e-05,
      "step": 12322,
      "training_step_time": 0.38949155807495117
    },
    {
      "epoch": 7.5213623046875e-05,
      "model_forward_time": 0.11526250839233398,
      "step": 12323
    },
    {
      "epoch": 7.5213623046875e-05,
      "step": 12323,
      "training_step_time": 0.6911437511444092
    },
    {
      "epoch": 7.52197265625e-05,
      "model_forward_time": 0.11441946029663086,
      "step": 12324
    },
    {
      "epoch": 7.52197265625e-05,
      "step": 12324,
      "training_step_time": 0.48429179191589355
    },
    {
      "epoch": 7.5225830078125e-05,
      "model_forward_time": 0.11447310447692871,
      "step": 12325
    },
    {
      "epoch": 7.5225830078125e-05,
      "step": 12325,
      "training_step_time": 0.360532283782959
    },
    {
      "epoch": 7.523193359375e-05,
      "model_forward_time": 0.11490178108215332,
      "step": 12326
    },
    {
      "epoch": 7.523193359375e-05,
      "step": 12326,
      "training_step_time": 0.4287407398223877
    },
    {
      "epoch": 7.5238037109375e-05,
      "model_forward_time": 0.11441826820373535,
      "step": 12327
    },
    {
      "epoch": 7.5238037109375e-05,
      "step": 12327,
      "training_step_time": 0.46574974060058594
    },
    {
      "epoch": 7.5244140625e-05,
      "model_forward_time": 0.1159048080444336,
      "step": 12328
    },
    {
      "epoch": 7.5244140625e-05,
      "step": 12328,
      "training_step_time": 0.45537376403808594
    },
    {
      "epoch": 7.5250244140625e-05,
      "model_forward_time": 0.11462640762329102,
      "step": 12329
    },
    {
      "epoch": 7.5250244140625e-05,
      "step": 12329,
      "training_step_time": 0.4031190872192383
    },
    {
      "epoch": 7.525634765625e-05,
      "grad_norm": 0.17346042394638062,
      "learning_rate": 9.353360761164931e-05,
      "loss": 0.0576,
      "step": 12330
    },
    {
      "epoch": 7.525634765625e-05,
      "model_forward_time": 0.11504411697387695,
      "step": 12330
    },
    {
      "epoch": 7.525634765625e-05,
      "step": 12330,
      "training_step_time": 0.37857532501220703
    },
    {
      "epoch": 7.5262451171875e-05,
      "model_forward_time": 0.11556720733642578,
      "step": 12331
    },
    {
      "epoch": 7.5262451171875e-05,
      "step": 12331,
      "training_step_time": 0.3884866237640381
    },
    {
      "epoch": 7.52685546875e-05,
      "model_forward_time": 0.1142432689666748,
      "step": 12332
    },
    {
      "epoch": 7.52685546875e-05,
      "step": 12332,
      "training_step_time": 0.3862767219543457
    },
    {
      "epoch": 7.5274658203125e-05,
      "model_forward_time": 0.11446237564086914,
      "step": 12333
    },
    {
      "epoch": 7.5274658203125e-05,
      "step": 12333,
      "training_step_time": 0.3920116424560547
    },
    {
      "epoch": 7.528076171875e-05,
      "model_forward_time": 0.11539149284362793,
      "step": 12334
    },
    {
      "epoch": 7.528076171875e-05,
      "step": 12334,
      "training_step_time": 0.3875882625579834
    },
    {
      "epoch": 7.5286865234375e-05,
      "model_forward_time": 0.11585497856140137,
      "step": 12335
    },
    {
      "epoch": 7.5286865234375e-05,
      "step": 12335,
      "training_step_time": 0.6115212440490723
    },
    {
      "epoch": 7.529296875e-05,
      "model_forward_time": 0.1154336929321289,
      "step": 12336
    },
    {
      "epoch": 7.529296875e-05,
      "step": 12336,
      "training_step_time": 0.40559911727905273
    },
    {
      "epoch": 7.5299072265625e-05,
      "model_forward_time": 0.1155390739440918,
      "step": 12337
    },
    {
      "epoch": 7.5299072265625e-05,
      "step": 12337,
      "training_step_time": 0.41591811180114746
    },
    {
      "epoch": 7.530517578125e-05,
      "model_forward_time": 0.11506533622741699,
      "step": 12338
    },
    {
      "epoch": 7.530517578125e-05,
      "step": 12338,
      "training_step_time": 0.4282872676849365
    },
    {
      "epoch": 7.5311279296875e-05,
      "model_forward_time": 0.11447453498840332,
      "step": 12339
    },
    {
      "epoch": 7.5311279296875e-05,
      "step": 12339,
      "training_step_time": 0.3910348415374756
    },
    {
      "epoch": 7.53173828125e-05,
      "grad_norm": 0.10315780341625214,
      "learning_rate": 9.352004630087062e-05,
      "loss": 0.0612,
      "step": 12340
    },
    {
      "epoch": 7.53173828125e-05,
      "model_forward_time": 0.11521649360656738,
      "step": 12340
    },
    {
      "epoch": 7.53173828125e-05,
      "step": 12340,
      "training_step_time": 0.39348506927490234
    },
    {
      "epoch": 7.5323486328125e-05,
      "model_forward_time": 0.11491942405700684,
      "step": 12341
    },
    {
      "epoch": 7.5323486328125e-05,
      "step": 12341,
      "training_step_time": 0.7231855392456055
    },
    {
      "epoch": 7.532958984375e-05,
      "model_forward_time": 0.11448240280151367,
      "step": 12342
    },
    {
      "epoch": 7.532958984375e-05,
      "step": 12342,
      "training_step_time": 0.4389078617095947
    },
    {
      "epoch": 7.5335693359375e-05,
      "model_forward_time": 0.11478972434997559,
      "step": 12343
    },
    {
      "epoch": 7.5335693359375e-05,
      "step": 12343,
      "training_step_time": 0.4085574150085449
    },
    {
      "epoch": 7.5341796875e-05,
      "model_forward_time": 0.11406779289245605,
      "step": 12344
    },
    {
      "epoch": 7.5341796875e-05,
      "step": 12344,
      "training_step_time": 0.3730809688568115
    },
    {
      "epoch": 7.5347900390625e-05,
      "model_forward_time": 0.11517024040222168,
      "step": 12345
    },
    {
      "epoch": 7.5347900390625e-05,
      "step": 12345,
      "training_step_time": 0.39116907119750977
    },
    {
      "epoch": 7.535400390625e-05,
      "model_forward_time": 0.11417722702026367,
      "step": 12346
    },
    {
      "epoch": 7.535400390625e-05,
      "step": 12346,
      "training_step_time": 0.38506603240966797
    },
    {
      "epoch": 7.5360107421875e-05,
      "model_forward_time": 0.11592817306518555,
      "step": 12347
    },
    {
      "epoch": 7.5360107421875e-05,
      "step": 12347,
      "training_step_time": 0.6244993209838867
    },
    {
      "epoch": 7.53662109375e-05,
      "model_forward_time": 0.11496520042419434,
      "step": 12348
    },
    {
      "epoch": 7.53662109375e-05,
      "step": 12348,
      "training_step_time": 0.3917660713195801
    },
    {
      "epoch": 7.5372314453125e-05,
      "model_forward_time": 0.1152806282043457,
      "step": 12349
    },
    {
      "epoch": 7.5372314453125e-05,
      "step": 12349,
      "training_step_time": 0.38953638076782227
    },
    {
      "epoch": 7.537841796875e-05,
      "grad_norm": 0.20341768860816956,
      "learning_rate": 9.350647176985095e-05,
      "loss": 0.0627,
      "step": 12350
    },
    {
      "epoch": 7.537841796875e-05,
      "model_forward_time": 0.11507058143615723,
      "step": 12350
    },
    {
      "epoch": 7.537841796875e-05,
      "step": 12350,
      "training_step_time": 0.4750180244445801
    },
    {
      "epoch": 7.5384521484375e-05,
      "model_forward_time": 0.11443042755126953,
      "step": 12351
    },
    {
      "epoch": 7.5384521484375e-05,
      "step": 12351,
      "training_step_time": 0.4342164993286133
    },
    {
      "epoch": 7.5390625e-05,
      "model_forward_time": 0.11471414566040039,
      "step": 12352
    },
    {
      "epoch": 7.5390625e-05,
      "step": 12352,
      "training_step_time": 0.45851826667785645
    },
    {
      "epoch": 7.5396728515625e-05,
      "model_forward_time": 0.11443638801574707,
      "step": 12353
    },
    {
      "epoch": 7.5396728515625e-05,
      "step": 12353,
      "training_step_time": 0.5763547420501709
    },
    {
      "epoch": 7.540283203125e-05,
      "model_forward_time": 0.11476278305053711,
      "step": 12354
    },
    {
      "epoch": 7.540283203125e-05,
      "step": 12354,
      "training_step_time": 0.4272735118865967
    },
    {
      "epoch": 7.5408935546875e-05,
      "model_forward_time": 0.11480331420898438,
      "step": 12355
    },
    {
      "epoch": 7.5408935546875e-05,
      "step": 12355,
      "training_step_time": 0.4500081539154053
    },
    {
      "epoch": 7.54150390625e-05,
      "model_forward_time": 0.11449146270751953,
      "step": 12356
    },
    {
      "epoch": 7.54150390625e-05,
      "step": 12356,
      "training_step_time": 0.44492626190185547
    },
    {
      "epoch": 7.5421142578125e-05,
      "model_forward_time": 0.114166259765625,
      "step": 12357
    },
    {
      "epoch": 7.5421142578125e-05,
      "step": 12357,
      "training_step_time": 0.39212560653686523
    },
    {
      "epoch": 7.542724609375e-05,
      "model_forward_time": 0.11438417434692383,
      "step": 12358
    },
    {
      "epoch": 7.542724609375e-05,
      "step": 12358,
      "training_step_time": 0.38326239585876465
    },
    {
      "epoch": 7.5433349609375e-05,
      "model_forward_time": 0.11507797241210938,
      "step": 12359
    },
    {
      "epoch": 7.5433349609375e-05,
      "step": 12359,
      "training_step_time": 0.6861381530761719
    },
    {
      "epoch": 7.5439453125e-05,
      "grad_norm": 0.1497870534658432,
      "learning_rate": 9.349288402271388e-05,
      "loss": 0.0578,
      "step": 12360
    },
    {
      "epoch": 7.5439453125e-05,
      "model_forward_time": 0.1144556999206543,
      "step": 12360
    },
    {
      "epoch": 7.5439453125e-05,
      "step": 12360,
      "training_step_time": 0.3922436237335205
    },
    {
      "epoch": 7.5445556640625e-05,
      "model_forward_time": 0.11489343643188477,
      "step": 12361
    },
    {
      "epoch": 7.5445556640625e-05,
      "step": 12361,
      "training_step_time": 0.3832888603210449
    },
    {
      "epoch": 7.545166015625e-05,
      "model_forward_time": 0.11469769477844238,
      "step": 12362
    },
    {
      "epoch": 7.545166015625e-05,
      "step": 12362,
      "training_step_time": 0.3867683410644531
    },
    {
      "epoch": 7.5457763671875e-05,
      "model_forward_time": 0.11462736129760742,
      "step": 12363
    },
    {
      "epoch": 7.5457763671875e-05,
      "step": 12363,
      "training_step_time": 0.40195488929748535
    },
    {
      "epoch": 7.54638671875e-05,
      "model_forward_time": 0.11529302597045898,
      "step": 12364
    },
    {
      "epoch": 7.54638671875e-05,
      "step": 12364,
      "training_step_time": 0.4700593948364258
    },
    {
      "epoch": 7.5469970703125e-05,
      "model_forward_time": 0.11454534530639648,
      "step": 12365
    },
    {
      "epoch": 7.5469970703125e-05,
      "step": 12365,
      "training_step_time": 0.7496111392974854
    },
    {
      "epoch": 7.547607421875e-05,
      "model_forward_time": 0.11488103866577148,
      "step": 12366
    },
    {
      "epoch": 7.547607421875e-05,
      "step": 12366,
      "training_step_time": 0.3954782485961914
    },
    {
      "epoch": 7.5482177734375e-05,
      "model_forward_time": 0.11428284645080566,
      "step": 12367
    },
    {
      "epoch": 7.5482177734375e-05,
      "step": 12367,
      "training_step_time": 0.4652729034423828
    },
    {
      "epoch": 7.548828125e-05,
      "model_forward_time": 0.11472177505493164,
      "step": 12368
    },
    {
      "epoch": 7.548828125e-05,
      "step": 12368,
      "training_step_time": 0.4117012023925781
    },
    {
      "epoch": 7.5494384765625e-05,
      "model_forward_time": 0.11446499824523926,
      "step": 12369
    },
    {
      "epoch": 7.5494384765625e-05,
      "step": 12369,
      "training_step_time": 0.4114999771118164
    },
    {
      "epoch": 7.550048828125e-05,
      "grad_norm": 0.1830076277256012,
      "learning_rate": 9.347928306358699e-05,
      "loss": 0.0557,
      "step": 12370
    },
    {
      "epoch": 7.550048828125e-05,
      "model_forward_time": 0.11453580856323242,
      "step": 12370
    },
    {
      "epoch": 7.550048828125e-05,
      "step": 12370,
      "training_step_time": 0.4046051502227783
    },
    {
      "epoch": 7.5506591796875e-05,
      "model_forward_time": 0.11476635932922363,
      "step": 12371
    },
    {
      "epoch": 7.5506591796875e-05,
      "step": 12371,
      "training_step_time": 0.8678293228149414
    },
    {
      "epoch": 7.55126953125e-05,
      "model_forward_time": 0.11499834060668945,
      "step": 12372
    },
    {
      "epoch": 7.55126953125e-05,
      "step": 12372,
      "training_step_time": 0.38134288787841797
    },
    {
      "epoch": 7.5518798828125e-05,
      "model_forward_time": 0.1140437126159668,
      "step": 12373
    },
    {
      "epoch": 7.5518798828125e-05,
      "step": 12373,
      "training_step_time": 0.3818399906158447
    },
    {
      "epoch": 7.552490234375e-05,
      "model_forward_time": 0.11471796035766602,
      "step": 12374
    },
    {
      "epoch": 7.552490234375e-05,
      "step": 12374,
      "training_step_time": 0.38364195823669434
    },
    {
      "epoch": 7.5531005859375e-05,
      "model_forward_time": 0.11390137672424316,
      "step": 12375
    },
    {
      "epoch": 7.5531005859375e-05,
      "step": 12375,
      "training_step_time": 0.3934764862060547
    },
    {
      "epoch": 7.5537109375e-05,
      "model_forward_time": 0.11438989639282227,
      "step": 12376
    },
    {
      "epoch": 7.5537109375e-05,
      "step": 12376,
      "training_step_time": 0.4256124496459961
    },
    {
      "epoch": 7.5543212890625e-05,
      "model_forward_time": 0.11536955833435059,
      "step": 12377
    },
    {
      "epoch": 7.5543212890625e-05,
      "step": 12377,
      "training_step_time": 0.5837111473083496
    },
    {
      "epoch": 7.554931640625e-05,
      "model_forward_time": 0.1145784854888916,
      "step": 12378
    },
    {
      "epoch": 7.554931640625e-05,
      "step": 12378,
      "training_step_time": 0.39417076110839844
    },
    {
      "epoch": 7.5555419921875e-05,
      "model_forward_time": 0.11480283737182617,
      "step": 12379
    },
    {
      "epoch": 7.5555419921875e-05,
      "step": 12379,
      "training_step_time": 0.3637716770172119
    },
    {
      "epoch": 7.55615234375e-05,
      "grad_norm": 0.21040479838848114,
      "learning_rate": 9.346566889660193e-05,
      "loss": 0.0675,
      "step": 12380
    },
    {
      "epoch": 7.55615234375e-05,
      "model_forward_time": 0.11490392684936523,
      "step": 12380
    },
    {
      "epoch": 7.55615234375e-05,
      "step": 12380,
      "training_step_time": 0.4173705577850342
    },
    {
      "epoch": 7.5567626953125e-05,
      "model_forward_time": 0.1145176887512207,
      "step": 12381
    },
    {
      "epoch": 7.5567626953125e-05,
      "step": 12381,
      "training_step_time": 0.4402129650115967
    },
    {
      "epoch": 7.557373046875e-05,
      "model_forward_time": 0.11431121826171875,
      "step": 12382
    },
    {
      "epoch": 7.557373046875e-05,
      "step": 12382,
      "training_step_time": 0.43601322174072266
    },
    {
      "epoch": 7.5579833984375e-05,
      "model_forward_time": 0.11482977867126465,
      "step": 12383
    },
    {
      "epoch": 7.5579833984375e-05,
      "step": 12383,
      "training_step_time": 0.5438830852508545
    },
    {
      "epoch": 7.55859375e-05,
      "model_forward_time": 0.11493682861328125,
      "step": 12384
    },
    {
      "epoch": 7.55859375e-05,
      "step": 12384,
      "training_step_time": 0.3985481262207031
    },
    {
      "epoch": 7.5592041015625e-05,
      "model_forward_time": 0.11468338966369629,
      "step": 12385
    },
    {
      "epoch": 7.5592041015625e-05,
      "step": 12385,
      "training_step_time": 0.38921189308166504
    },
    {
      "epoch": 7.559814453125e-05,
      "model_forward_time": 0.11504697799682617,
      "step": 12386
    },
    {
      "epoch": 7.559814453125e-05,
      "step": 12386,
      "training_step_time": 0.39246320724487305
    },
    {
      "epoch": 7.5604248046875e-05,
      "model_forward_time": 0.11487388610839844,
      "step": 12387
    },
    {
      "epoch": 7.5604248046875e-05,
      "step": 12387,
      "training_step_time": 0.4042634963989258
    },
    {
      "epoch": 7.56103515625e-05,
      "model_forward_time": 0.11489701271057129,
      "step": 12388
    },
    {
      "epoch": 7.56103515625e-05,
      "step": 12388,
      "training_step_time": 0.40128231048583984
    },
    {
      "epoch": 7.5616455078125e-05,
      "model_forward_time": 0.1153266429901123,
      "step": 12389
    },
    {
      "epoch": 7.5616455078125e-05,
      "step": 12389,
      "training_step_time": 0.9557969570159912
    },
    {
      "epoch": 7.562255859375e-05,
      "grad_norm": 0.1988496035337448,
      "learning_rate": 9.345204152589428e-05,
      "loss": 0.0594,
      "step": 12390
    },
    {
      "epoch": 7.562255859375e-05,
      "model_forward_time": 0.11406970024108887,
      "step": 12390
    },
    {
      "epoch": 7.562255859375e-05,
      "step": 12390,
      "training_step_time": 0.4642782211303711
    },
    {
      "epoch": 7.5628662109375e-05,
      "model_forward_time": 0.11389851570129395,
      "step": 12391
    },
    {
      "epoch": 7.5628662109375e-05,
      "step": 12391,
      "training_step_time": 0.39063239097595215
    },
    {
      "epoch": 7.5634765625e-05,
      "model_forward_time": 0.1139078140258789,
      "step": 12392
    },
    {
      "epoch": 7.5634765625e-05,
      "step": 12392,
      "training_step_time": 0.41208600997924805
    },
    {
      "epoch": 7.5640869140625e-05,
      "model_forward_time": 0.11389446258544922,
      "step": 12393
    },
    {
      "epoch": 7.5640869140625e-05,
      "step": 12393,
      "training_step_time": 0.4193708896636963
    },
    {
      "epoch": 7.564697265625e-05,
      "model_forward_time": 0.11439371109008789,
      "step": 12394
    },
    {
      "epoch": 7.564697265625e-05,
      "step": 12394,
      "training_step_time": 0.4266698360443115
    },
    {
      "epoch": 7.5653076171875e-05,
      "model_forward_time": 0.11497688293457031,
      "step": 12395
    },
    {
      "epoch": 7.5653076171875e-05,
      "step": 12395,
      "training_step_time": 0.6765701770782471
    },
    {
      "epoch": 7.56591796875e-05,
      "model_forward_time": 0.1144399642944336,
      "step": 12396
    },
    {
      "epoch": 7.56591796875e-05,
      "step": 12396,
      "training_step_time": 0.4492030143737793
    },
    {
      "epoch": 7.5665283203125e-05,
      "model_forward_time": 0.11427664756774902,
      "step": 12397
    },
    {
      "epoch": 7.5665283203125e-05,
      "step": 12397,
      "training_step_time": 0.3781435489654541
    },
    {
      "epoch": 7.567138671875e-05,
      "model_forward_time": 0.11447691917419434,
      "step": 12398
    },
    {
      "epoch": 7.567138671875e-05,
      "step": 12398,
      "training_step_time": 0.37919187545776367
    },
    {
      "epoch": 7.5677490234375e-05,
      "model_forward_time": 0.11434769630432129,
      "step": 12399
    },
    {
      "epoch": 7.5677490234375e-05,
      "step": 12399,
      "training_step_time": 0.3776407241821289
    },
    {
      "epoch": 7.568359375e-05,
      "grad_norm": 0.25578367710113525,
      "learning_rate": 9.343840095560372e-05,
      "loss": 0.0672,
      "step": 12400
    },
    {
      "epoch": 7.568359375e-05,
      "model_forward_time": 0.1142120361328125,
      "step": 12400
    },
    {
      "epoch": 7.568359375e-05,
      "step": 12400,
      "training_step_time": 0.39218926429748535
    },
    {
      "epoch": 7.5689697265625e-05,
      "model_forward_time": 0.11511468887329102,
      "step": 12401
    },
    {
      "epoch": 7.5689697265625e-05,
      "step": 12401,
      "training_step_time": 0.7651312351226807
    },
    {
      "epoch": 7.569580078125e-05,
      "model_forward_time": 0.11443138122558594,
      "step": 12402
    },
    {
      "epoch": 7.569580078125e-05,
      "step": 12402,
      "training_step_time": 0.4107985496520996
    },
    {
      "epoch": 7.5701904296875e-05,
      "model_forward_time": 0.11429309844970703,
      "step": 12403
    },
    {
      "epoch": 7.5701904296875e-05,
      "step": 12403,
      "training_step_time": 0.4232015609741211
    },
    {
      "epoch": 7.57080078125e-05,
      "model_forward_time": 0.11433625221252441,
      "step": 12404
    },
    {
      "epoch": 7.57080078125e-05,
      "step": 12404,
      "training_step_time": 0.4022848606109619
    },
    {
      "epoch": 7.5714111328125e-05,
      "model_forward_time": 0.11424946784973145,
      "step": 12405
    },
    {
      "epoch": 7.5714111328125e-05,
      "step": 12405,
      "training_step_time": 0.4000816345214844
    },
    {
      "epoch": 7.572021484375e-05,
      "model_forward_time": 0.11452531814575195,
      "step": 12406
    },
    {
      "epoch": 7.572021484375e-05,
      "step": 12406,
      "training_step_time": 0.3631877899169922
    },
    {
      "epoch": 7.5726318359375e-05,
      "model_forward_time": 0.11476540565490723,
      "step": 12407
    },
    {
      "epoch": 7.5726318359375e-05,
      "step": 12407,
      "training_step_time": 0.5799636840820312
    },
    {
      "epoch": 7.5732421875e-05,
      "model_forward_time": 0.11514091491699219,
      "step": 12408
    },
    {
      "epoch": 7.5732421875e-05,
      "step": 12408,
      "training_step_time": 0.5114798545837402
    },
    {
      "epoch": 7.5738525390625e-05,
      "model_forward_time": 0.11495137214660645,
      "step": 12409
    },
    {
      "epoch": 7.5738525390625e-05,
      "step": 12409,
      "training_step_time": 0.50079345703125
    },
    {
      "epoch": 7.574462890625e-05,
      "grad_norm": 0.21031717956066132,
      "learning_rate": 9.342474718987386e-05,
      "loss": 0.0651,
      "step": 12410
    },
    {
      "epoch": 7.574462890625e-05,
      "model_forward_time": 0.1145944595336914,
      "step": 12410
    },
    {
      "epoch": 7.574462890625e-05,
      "step": 12410,
      "training_step_time": 0.37723398208618164
    },
    {
      "epoch": 7.5750732421875e-05,
      "model_forward_time": 0.11403417587280273,
      "step": 12411
    },
    {
      "epoch": 7.5750732421875e-05,
      "step": 12411,
      "training_step_time": 0.3820607662200928
    },
    {
      "epoch": 7.57568359375e-05,
      "model_forward_time": 0.11419296264648438,
      "step": 12412
    },
    {
      "epoch": 7.57568359375e-05,
      "step": 12412,
      "training_step_time": 0.3831157684326172
    },
    {
      "epoch": 7.5762939453125e-05,
      "model_forward_time": 0.11545467376708984,
      "step": 12413
    },
    {
      "epoch": 7.5762939453125e-05,
      "step": 12413,
      "training_step_time": 0.5236518383026123
    },
    {
      "epoch": 7.576904296875e-05,
      "model_forward_time": 0.11530160903930664,
      "step": 12414
    },
    {
      "epoch": 7.576904296875e-05,
      "step": 12414,
      "training_step_time": 0.39031362533569336
    },
    {
      "epoch": 7.5775146484375e-05,
      "model_forward_time": 0.1150667667388916,
      "step": 12415
    },
    {
      "epoch": 7.5775146484375e-05,
      "step": 12415,
      "training_step_time": 0.3897733688354492
    },
    {
      "epoch": 7.578125e-05,
      "model_forward_time": 0.11512255668640137,
      "step": 12416
    },
    {
      "epoch": 7.578125e-05,
      "step": 12416,
      "training_step_time": 0.4337339401245117
    },
    {
      "epoch": 7.5787353515625e-05,
      "model_forward_time": 0.1154177188873291,
      "step": 12417
    },
    {
      "epoch": 7.5787353515625e-05,
      "step": 12417,
      "training_step_time": 0.4277067184448242
    },
    {
      "epoch": 7.579345703125e-05,
      "model_forward_time": 0.11518311500549316,
      "step": 12418
    },
    {
      "epoch": 7.579345703125e-05,
      "step": 12418,
      "training_step_time": 0.4277060031890869
    },
    {
      "epoch": 7.5799560546875e-05,
      "model_forward_time": 0.11523842811584473,
      "step": 12419
    },
    {
      "epoch": 7.5799560546875e-05,
      "step": 12419,
      "training_step_time": 0.6961493492126465
    },
    {
      "epoch": 7.58056640625e-05,
      "grad_norm": 0.16988374292850494,
      "learning_rate": 9.341108023285238e-05,
      "loss": 0.0609,
      "step": 12420
    },
    {
      "epoch": 7.58056640625e-05,
      "model_forward_time": 0.11480331420898438,
      "step": 12420
    },
    {
      "epoch": 7.58056640625e-05,
      "step": 12420,
      "training_step_time": 0.4677009582519531
    },
    {
      "epoch": 7.5811767578125e-05,
      "model_forward_time": 0.11421608924865723,
      "step": 12421
    },
    {
      "epoch": 7.5811767578125e-05,
      "step": 12421,
      "training_step_time": 0.4346272945404053
    },
    {
      "epoch": 7.581787109375e-05,
      "model_forward_time": 0.11411666870117188,
      "step": 12422
    },
    {
      "epoch": 7.581787109375e-05,
      "step": 12422,
      "training_step_time": 0.39435243606567383
    },
    {
      "epoch": 7.5823974609375e-05,
      "model_forward_time": 0.11442899703979492,
      "step": 12423
    },
    {
      "epoch": 7.5823974609375e-05,
      "step": 12423,
      "training_step_time": 0.3961751461029053
    },
    {
      "epoch": 7.5830078125e-05,
      "model_forward_time": 0.11428666114807129,
      "step": 12424
    },
    {
      "epoch": 7.5830078125e-05,
      "step": 12424,
      "training_step_time": 0.38287854194641113
    },
    {
      "epoch": 7.5836181640625e-05,
      "model_forward_time": 0.11485767364501953,
      "step": 12425
    },
    {
      "epoch": 7.5836181640625e-05,
      "step": 12425,
      "training_step_time": 0.5597748756408691
    },
    {
      "epoch": 7.584228515625e-05,
      "model_forward_time": 0.1145942211151123,
      "step": 12426
    },
    {
      "epoch": 7.584228515625e-05,
      "step": 12426,
      "training_step_time": 0.39171385765075684
    },
    {
      "epoch": 7.5848388671875e-05,
      "model_forward_time": 0.11563444137573242,
      "step": 12427
    },
    {
      "epoch": 7.5848388671875e-05,
      "step": 12427,
      "training_step_time": 0.3868536949157715
    },
    {
      "epoch": 7.58544921875e-05,
      "model_forward_time": 0.1152186393737793,
      "step": 12428
    },
    {
      "epoch": 7.58544921875e-05,
      "step": 12428,
      "training_step_time": 0.38977813720703125
    },
    {
      "epoch": 7.5860595703125e-05,
      "model_forward_time": 0.11580371856689453,
      "step": 12429
    },
    {
      "epoch": 7.5860595703125e-05,
      "step": 12429,
      "training_step_time": 0.39038991928100586
    },
    {
      "epoch": 7.586669921875e-05,
      "grad_norm": 0.19676832854747772,
      "learning_rate": 9.339740008869092e-05,
      "loss": 0.0634,
      "step": 12430
    },
    {
      "epoch": 7.586669921875e-05,
      "model_forward_time": 0.11507892608642578,
      "step": 12430
    },
    {
      "epoch": 7.586669921875e-05,
      "step": 12430,
      "training_step_time": 0.4512598514556885
    },
    {
      "epoch": 7.5872802734375e-05,
      "model_forward_time": 0.1147763729095459,
      "step": 12431
    },
    {
      "epoch": 7.5872802734375e-05,
      "step": 12431,
      "training_step_time": 1.0256004333496094
    },
    {
      "epoch": 7.587890625e-05,
      "model_forward_time": 0.11421418190002441,
      "step": 12432
    },
    {
      "epoch": 7.587890625e-05,
      "step": 12432,
      "training_step_time": 0.3880455493927002
    },
    {
      "epoch": 7.5885009765625e-05,
      "model_forward_time": 0.11425161361694336,
      "step": 12433
    },
    {
      "epoch": 7.5885009765625e-05,
      "step": 12433,
      "training_step_time": 0.4336514472961426
    },
    {
      "epoch": 7.589111328125e-05,
      "model_forward_time": 0.11519050598144531,
      "step": 12434
    },
    {
      "epoch": 7.589111328125e-05,
      "step": 12434,
      "training_step_time": 0.4285733699798584
    },
    {
      "epoch": 7.5897216796875e-05,
      "model_forward_time": 0.11409568786621094,
      "step": 12435
    },
    {
      "epoch": 7.5897216796875e-05,
      "step": 12435,
      "training_step_time": 0.44119930267333984
    },
    {
      "epoch": 7.59033203125e-05,
      "model_forward_time": 0.11487126350402832,
      "step": 12436
    },
    {
      "epoch": 7.59033203125e-05,
      "step": 12436,
      "training_step_time": 0.43770694732666016
    },
    {
      "epoch": 7.5909423828125e-05,
      "model_forward_time": 0.11518716812133789,
      "step": 12437
    },
    {
      "epoch": 7.5909423828125e-05,
      "step": 12437,
      "training_step_time": 0.5955810546875
    },
    {
      "epoch": 7.591552734375e-05,
      "model_forward_time": 0.11455464363098145,
      "step": 12438
    },
    {
      "epoch": 7.591552734375e-05,
      "step": 12438,
      "training_step_time": 0.38284754753112793
    },
    {
      "epoch": 7.5921630859375e-05,
      "model_forward_time": 0.11425924301147461,
      "step": 12439
    },
    {
      "epoch": 7.5921630859375e-05,
      "step": 12439,
      "training_step_time": 0.390535831451416
    },
    {
      "epoch": 7.5927734375e-05,
      "grad_norm": 0.1501203328371048,
      "learning_rate": 9.338370676154516e-05,
      "loss": 0.061,
      "step": 12440
    },
    {
      "epoch": 7.5927734375e-05,
      "model_forward_time": 0.11455297470092773,
      "step": 12440
    },
    {
      "epoch": 7.5927734375e-05,
      "step": 12440,
      "training_step_time": 0.3833591938018799
    },
    {
      "epoch": 7.5933837890625e-05,
      "model_forward_time": 0.11488699913024902,
      "step": 12441
    },
    {
      "epoch": 7.5933837890625e-05,
      "step": 12441,
      "training_step_time": 0.40781164169311523
    },
    {
      "epoch": 7.593994140625e-05,
      "model_forward_time": 0.11530447006225586,
      "step": 12442
    },
    {
      "epoch": 7.593994140625e-05,
      "step": 12442,
      "training_step_time": 0.40898704528808594
    },
    {
      "epoch": 7.5946044921875e-05,
      "model_forward_time": 0.11476969718933105,
      "step": 12443
    },
    {
      "epoch": 7.5946044921875e-05,
      "step": 12443,
      "training_step_time": 0.8624763488769531
    },
    {
      "epoch": 7.59521484375e-05,
      "model_forward_time": 0.11437821388244629,
      "step": 12444
    },
    {
      "epoch": 7.59521484375e-05,
      "step": 12444,
      "training_step_time": 0.39497852325439453
    },
    {
      "epoch": 7.5958251953125e-05,
      "model_forward_time": 0.11509323120117188,
      "step": 12445
    },
    {
      "epoch": 7.5958251953125e-05,
      "step": 12445,
      "training_step_time": 0.38515615463256836
    },
    {
      "epoch": 7.596435546875e-05,
      "model_forward_time": 0.11414170265197754,
      "step": 12446
    },
    {
      "epoch": 7.596435546875e-05,
      "step": 12446,
      "training_step_time": 0.4587562084197998
    },
    {
      "epoch": 7.5970458984375e-05,
      "model_forward_time": 0.11495852470397949,
      "step": 12447
    },
    {
      "epoch": 7.5970458984375e-05,
      "step": 12447,
      "training_step_time": 0.4402275085449219
    },
    {
      "epoch": 7.59765625e-05,
      "model_forward_time": 0.11459708213806152,
      "step": 12448
    },
    {
      "epoch": 7.59765625e-05,
      "step": 12448,
      "training_step_time": 0.4651148319244385
    },
    {
      "epoch": 7.5982666015625e-05,
      "model_forward_time": 0.11480331420898438,
      "step": 12449
    },
    {
      "epoch": 7.5982666015625e-05,
      "step": 12449,
      "training_step_time": 0.6509995460510254
    },
    {
      "epoch": 7.598876953125e-05,
      "grad_norm": 0.17171035706996918,
      "learning_rate": 9.337000025557476e-05,
      "loss": 0.0559,
      "step": 12450
    },
    {
      "epoch": 7.598876953125e-05,
      "model_forward_time": 0.11408114433288574,
      "step": 12450
    },
    {
      "epoch": 7.598876953125e-05,
      "step": 12450,
      "training_step_time": 0.38973355293273926
    },
    {
      "epoch": 7.5994873046875e-05,
      "model_forward_time": 0.11469817161560059,
      "step": 12451
    },
    {
      "epoch": 7.5994873046875e-05,
      "step": 12451,
      "training_step_time": 0.3998136520385742
    },
    {
      "epoch": 7.60009765625e-05,
      "model_forward_time": 0.1147012710571289,
      "step": 12452
    },
    {
      "epoch": 7.60009765625e-05,
      "step": 12452,
      "training_step_time": 0.3945441246032715
    },
    {
      "epoch": 7.6007080078125e-05,
      "model_forward_time": 0.11426520347595215,
      "step": 12453
    },
    {
      "epoch": 7.6007080078125e-05,
      "step": 12453,
      "training_step_time": 0.3924429416656494
    },
    {
      "epoch": 7.601318359375e-05,
      "model_forward_time": 0.11611056327819824,
      "step": 12454
    },
    {
      "epoch": 7.601318359375e-05,
      "step": 12454,
      "training_step_time": 0.40430736541748047
    },
    {
      "epoch": 7.6019287109375e-05,
      "model_forward_time": 0.1145021915435791,
      "step": 12455
    },
    {
      "epoch": 7.6019287109375e-05,
      "step": 12455,
      "training_step_time": 0.8463084697723389
    },
    {
      "epoch": 7.6025390625e-05,
      "model_forward_time": 0.1139225959777832,
      "step": 12456
    },
    {
      "epoch": 7.6025390625e-05,
      "step": 12456,
      "training_step_time": 0.47377562522888184
    },
    {
      "epoch": 7.6031494140625e-05,
      "model_forward_time": 0.11420416831970215,
      "step": 12457
    },
    {
      "epoch": 7.6031494140625e-05,
      "step": 12457,
      "training_step_time": 0.43700242042541504
    },
    {
      "epoch": 7.603759765625e-05,
      "model_forward_time": 0.1142270565032959,
      "step": 12458
    },
    {
      "epoch": 7.603759765625e-05,
      "step": 12458,
      "training_step_time": 0.38105058670043945
    },
    {
      "epoch": 7.6043701171875e-05,
      "model_forward_time": 0.1142721176147461,
      "step": 12459
    },
    {
      "epoch": 7.6043701171875e-05,
      "step": 12459,
      "training_step_time": 0.3616509437561035
    },
    {
      "epoch": 7.60498046875e-05,
      "grad_norm": 0.15491601824760437,
      "learning_rate": 9.335628057494341e-05,
      "loss": 0.0633,
      "step": 12460
    },
    {
      "epoch": 7.60498046875e-05,
      "model_forward_time": 0.11435127258300781,
      "step": 12460
    },
    {
      "epoch": 7.60498046875e-05,
      "step": 12460,
      "training_step_time": 0.410733699798584
    },
    {
      "epoch": 7.6055908203125e-05,
      "model_forward_time": 0.11484217643737793,
      "step": 12461
    },
    {
      "epoch": 7.6055908203125e-05,
      "step": 12461,
      "training_step_time": 0.5048167705535889
    },
    {
      "epoch": 7.606201171875e-05,
      "model_forward_time": 0.11532044410705566,
      "step": 12462
    },
    {
      "epoch": 7.606201171875e-05,
      "step": 12462,
      "training_step_time": 0.49668240547180176
    },
    {
      "epoch": 7.6068115234375e-05,
      "model_forward_time": 0.11536574363708496,
      "step": 12463
    },
    {
      "epoch": 7.6068115234375e-05,
      "step": 12463,
      "training_step_time": 0.39434361457824707
    },
    {
      "epoch": 7.607421875e-05,
      "model_forward_time": 0.11540341377258301,
      "step": 12464
    },
    {
      "epoch": 7.607421875e-05,
      "step": 12464,
      "training_step_time": 0.39296507835388184
    },
    {
      "epoch": 7.6080322265625e-05,
      "model_forward_time": 0.11573147773742676,
      "step": 12465
    },
    {
      "epoch": 7.6080322265625e-05,
      "step": 12465,
      "training_step_time": 0.4215428829193115
    },
    {
      "epoch": 7.608642578125e-05,
      "model_forward_time": 0.11511373519897461,
      "step": 12466
    },
    {
      "epoch": 7.608642578125e-05,
      "step": 12466,
      "training_step_time": 0.3927638530731201
    },
    {
      "epoch": 7.6092529296875e-05,
      "model_forward_time": 0.11487364768981934,
      "step": 12467
    },
    {
      "epoch": 7.6092529296875e-05,
      "step": 12467,
      "training_step_time": 0.7235713005065918
    },
    {
      "epoch": 7.60986328125e-05,
      "model_forward_time": 0.1142575740814209,
      "step": 12468
    },
    {
      "epoch": 7.60986328125e-05,
      "step": 12468,
      "training_step_time": 0.396960973739624
    },
    {
      "epoch": 7.6104736328125e-05,
      "model_forward_time": 0.11439776420593262,
      "step": 12469
    },
    {
      "epoch": 7.6104736328125e-05,
      "step": 12469,
      "training_step_time": 0.409778356552124
    },
    {
      "epoch": 7.611083984375e-05,
      "grad_norm": 0.1887797713279724,
      "learning_rate": 9.334254772381876e-05,
      "loss": 0.06,
      "step": 12470
    },
    {
      "epoch": 7.611083984375e-05,
      "model_forward_time": 0.1145334243774414,
      "step": 12470
    },
    {
      "epoch": 7.611083984375e-05,
      "step": 12470,
      "training_step_time": 0.39519262313842773
    },
    {
      "epoch": 7.6116943359375e-05,
      "model_forward_time": 0.11414098739624023,
      "step": 12471
    },
    {
      "epoch": 7.6116943359375e-05,
      "step": 12471,
      "training_step_time": 0.4740560054779053
    },
    {
      "epoch": 7.6123046875e-05,
      "model_forward_time": 0.1145622730255127,
      "step": 12472
    },
    {
      "epoch": 7.6123046875e-05,
      "step": 12472,
      "training_step_time": 0.383007287979126
    },
    {
      "epoch": 7.6129150390625e-05,
      "model_forward_time": 0.11496138572692871,
      "step": 12473
    },
    {
      "epoch": 7.6129150390625e-05,
      "step": 12473,
      "training_step_time": 0.7401676177978516
    },
    {
      "epoch": 7.613525390625e-05,
      "model_forward_time": 0.11377501487731934,
      "step": 12474
    },
    {
      "epoch": 7.613525390625e-05,
      "step": 12474,
      "training_step_time": 0.38298797607421875
    },
    {
      "epoch": 7.6141357421875e-05,
      "model_forward_time": 0.11437559127807617,
      "step": 12475
    },
    {
      "epoch": 7.6141357421875e-05,
      "step": 12475,
      "training_step_time": 0.4052741527557373
    },
    {
      "epoch": 7.61474609375e-05,
      "model_forward_time": 0.11437463760375977,
      "step": 12476
    },
    {
      "epoch": 7.61474609375e-05,
      "step": 12476,
      "training_step_time": 0.40195369720458984
    },
    {
      "epoch": 7.6153564453125e-05,
      "model_forward_time": 0.11433792114257812,
      "step": 12477
    },
    {
      "epoch": 7.6153564453125e-05,
      "step": 12477,
      "training_step_time": 0.39517927169799805
    },
    {
      "epoch": 7.615966796875e-05,
      "model_forward_time": 0.1147305965423584,
      "step": 12478
    },
    {
      "epoch": 7.615966796875e-05,
      "step": 12478,
      "training_step_time": 0.40950846672058105
    },
    {
      "epoch": 7.6165771484375e-05,
      "model_forward_time": 0.11465120315551758,
      "step": 12479
    },
    {
      "epoch": 7.6165771484375e-05,
      "step": 12479,
      "training_step_time": 0.7005710601806641
    },
    {
      "epoch": 7.6171875e-05,
      "grad_norm": 0.22512505948543549,
      "learning_rate": 9.332880170637252e-05,
      "loss": 0.0632,
      "step": 12480
    },
    {
      "epoch": 7.6171875e-05,
      "model_forward_time": 0.11381959915161133,
      "step": 12480
    },
    {
      "epoch": 7.6171875e-05,
      "step": 12480,
      "training_step_time": 0.38983869552612305
    },
    {
      "epoch": 7.6177978515625e-05,
      "model_forward_time": 0.11497163772583008,
      "step": 12481
    },
    {
      "epoch": 7.6177978515625e-05,
      "step": 12481,
      "training_step_time": 0.3926575183868408
    },
    {
      "epoch": 7.618408203125e-05,
      "model_forward_time": 0.11415481567382812,
      "step": 12482
    },
    {
      "epoch": 7.618408203125e-05,
      "step": 12482,
      "training_step_time": 0.44252729415893555
    },
    {
      "epoch": 7.6190185546875e-05,
      "model_forward_time": 0.1145484447479248,
      "step": 12483
    },
    {
      "epoch": 7.6190185546875e-05,
      "step": 12483,
      "training_step_time": 0.4547295570373535
    },
    {
      "epoch": 7.61962890625e-05,
      "model_forward_time": 0.11348652839660645,
      "step": 12484
    },
    {
      "epoch": 7.61962890625e-05,
      "step": 12484,
      "training_step_time": 0.4313192367553711
    },
    {
      "epoch": 7.6202392578125e-05,
      "model_forward_time": 0.11505699157714844,
      "step": 12485
    },
    {
      "epoch": 7.6202392578125e-05,
      "step": 12485,
      "training_step_time": 0.7640657424926758
    },
    {
      "epoch": 7.620849609375e-05,
      "model_forward_time": 0.11368870735168457,
      "step": 12486
    },
    {
      "epoch": 7.620849609375e-05,
      "step": 12486,
      "training_step_time": 0.4447307586669922
    },
    {
      "epoch": 7.6214599609375e-05,
      "model_forward_time": 0.11471700668334961,
      "step": 12487
    },
    {
      "epoch": 7.6214599609375e-05,
      "step": 12487,
      "training_step_time": 0.4400932788848877
    },
    {
      "epoch": 7.6220703125e-05,
      "model_forward_time": 0.11422443389892578,
      "step": 12488
    },
    {
      "epoch": 7.6220703125e-05,
      "step": 12488,
      "training_step_time": 0.41007471084594727
    },
    {
      "epoch": 7.6226806640625e-05,
      "model_forward_time": 0.11465024948120117,
      "step": 12489
    },
    {
      "epoch": 7.6226806640625e-05,
      "step": 12489,
      "training_step_time": 0.4132547378540039
    },
    {
      "epoch": 7.623291015625e-05,
      "grad_norm": 0.15926092863082886,
      "learning_rate": 9.331504252678037e-05,
      "loss": 0.0596,
      "step": 12490
    },
    {
      "epoch": 7.623291015625e-05,
      "model_forward_time": 0.11420774459838867,
      "step": 12490
    },
    {
      "epoch": 7.623291015625e-05,
      "step": 12490,
      "training_step_time": 0.38394999504089355
    },
    {
      "epoch": 7.6239013671875e-05,
      "model_forward_time": 0.1153569221496582,
      "step": 12491
    },
    {
      "epoch": 7.6239013671875e-05,
      "step": 12491,
      "training_step_time": 0.6357693672180176
    },
    {
      "epoch": 7.62451171875e-05,
      "model_forward_time": 0.11444568634033203,
      "step": 12492
    },
    {
      "epoch": 7.62451171875e-05,
      "step": 12492,
      "training_step_time": 0.38762974739074707
    },
    {
      "epoch": 7.6251220703125e-05,
      "model_forward_time": 0.1149139404296875,
      "step": 12493
    },
    {
      "epoch": 7.6251220703125e-05,
      "step": 12493,
      "training_step_time": 0.3964855670928955
    },
    {
      "epoch": 7.625732421875e-05,
      "model_forward_time": 0.11521530151367188,
      "step": 12494
    },
    {
      "epoch": 7.625732421875e-05,
      "step": 12494,
      "training_step_time": 0.38709568977355957
    },
    {
      "epoch": 7.6263427734375e-05,
      "model_forward_time": 0.11479926109313965,
      "step": 12495
    },
    {
      "epoch": 7.6263427734375e-05,
      "step": 12495,
      "training_step_time": 0.3956282138824463
    },
    {
      "epoch": 7.626953125e-05,
      "model_forward_time": 0.11571884155273438,
      "step": 12496
    },
    {
      "epoch": 7.626953125e-05,
      "step": 12496,
      "training_step_time": 0.4565694332122803
    },
    {
      "epoch": 7.6275634765625e-05,
      "model_forward_time": 0.11534881591796875,
      "step": 12497
    },
    {
      "epoch": 7.6275634765625e-05,
      "step": 12497,
      "training_step_time": 0.6987690925598145
    },
    {
      "epoch": 7.628173828125e-05,
      "model_forward_time": 0.11432242393493652,
      "step": 12498
    },
    {
      "epoch": 7.628173828125e-05,
      "step": 12498,
      "training_step_time": 0.39754605293273926
    },
    {
      "epoch": 7.6287841796875e-05,
      "model_forward_time": 0.11446666717529297,
      "step": 12499
    },
    {
      "epoch": 7.6287841796875e-05,
      "step": 12499,
      "training_step_time": 0.36223530769348145
    },
    {
      "epoch": 7.62939453125e-05,
      "grad_norm": 0.20943675935268402,
      "learning_rate": 9.330127018922194e-05,
      "loss": 0.0594,
      "step": 12500
    },
    {
      "epoch": 7.62939453125e-05,
      "model_forward_time": 0.11428594589233398,
      "step": 12500
    },
    {
      "epoch": 7.62939453125e-05,
      "step": 12500,
      "training_step_time": 0.43817734718322754
    },
    {
      "epoch": 7.6300048828125e-05,
      "model_forward_time": 0.11477065086364746,
      "step": 12501
    },
    {
      "epoch": 7.6300048828125e-05,
      "step": 12501,
      "training_step_time": 0.45595645904541016
    },
    {
      "epoch": 7.630615234375e-05,
      "model_forward_time": 0.11426496505737305,
      "step": 12502
    },
    {
      "epoch": 7.630615234375e-05,
      "step": 12502,
      "training_step_time": 0.39104390144348145
    },
    {
      "epoch": 7.6312255859375e-05,
      "model_forward_time": 0.11489462852478027,
      "step": 12503
    },
    {
      "epoch": 7.6312255859375e-05,
      "step": 12503,
      "training_step_time": 0.7945384979248047
    },
    {
      "epoch": 7.6318359375e-05,
      "model_forward_time": 0.11422967910766602,
      "step": 12504
    },
    {
      "epoch": 7.6318359375e-05,
      "step": 12504,
      "training_step_time": 0.3993675708770752
    },
    {
      "epoch": 7.6324462890625e-05,
      "model_forward_time": 0.11498141288757324,
      "step": 12505
    },
    {
      "epoch": 7.6324462890625e-05,
      "step": 12505,
      "training_step_time": 0.38222336769104004
    },
    {
      "epoch": 7.633056640625e-05,
      "model_forward_time": 0.11406612396240234,
      "step": 12506
    },
    {
      "epoch": 7.633056640625e-05,
      "step": 12506,
      "training_step_time": 0.4138801097869873
    },
    {
      "epoch": 7.6336669921875e-05,
      "model_forward_time": 0.11431217193603516,
      "step": 12507
    },
    {
      "epoch": 7.6336669921875e-05,
      "step": 12507,
      "training_step_time": 0.39129018783569336
    },
    {
      "epoch": 7.63427734375e-05,
      "model_forward_time": 0.11448955535888672,
      "step": 12508
    },
    {
      "epoch": 7.63427734375e-05,
      "step": 12508,
      "training_step_time": 0.3793814182281494
    },
    {
      "epoch": 7.6348876953125e-05,
      "model_forward_time": 0.11512184143066406,
      "step": 12509
    },
    {
      "epoch": 7.6348876953125e-05,
      "step": 12509,
      "training_step_time": 0.863722562789917
    },
    {
      "epoch": 7.635498046875e-05,
      "grad_norm": 0.21733340620994568,
      "learning_rate": 9.328748469788093e-05,
      "loss": 0.0579,
      "step": 12510
    },
    {
      "epoch": 7.635498046875e-05,
      "model_forward_time": 0.11425423622131348,
      "step": 12510
    },
    {
      "epoch": 7.635498046875e-05,
      "step": 12510,
      "training_step_time": 0.4447596073150635
    },
    {
      "epoch": 7.6361083984375e-05,
      "model_forward_time": 0.11433172225952148,
      "step": 12511
    },
    {
      "epoch": 7.6361083984375e-05,
      "step": 12511,
      "training_step_time": 0.3891181945800781
    },
    {
      "epoch": 7.63671875e-05,
      "model_forward_time": 0.11482524871826172,
      "step": 12512
    },
    {
      "epoch": 7.63671875e-05,
      "step": 12512,
      "training_step_time": 0.36250948905944824
    },
    {
      "epoch": 7.6373291015625e-05,
      "model_forward_time": 0.11440753936767578,
      "step": 12513
    },
    {
      "epoch": 7.6373291015625e-05,
      "step": 12513,
      "training_step_time": 0.41846609115600586
    },
    {
      "epoch": 7.637939453125e-05,
      "model_forward_time": 0.11440420150756836,
      "step": 12514
    },
    {
      "epoch": 7.637939453125e-05,
      "step": 12514,
      "training_step_time": 0.45480895042419434
    },
    {
      "epoch": 7.6385498046875e-05,
      "model_forward_time": 0.11626172065734863,
      "step": 12515
    },
    {
      "epoch": 7.6385498046875e-05,
      "step": 12515,
      "training_step_time": 0.8631343841552734
    },
    {
      "epoch": 7.63916015625e-05,
      "model_forward_time": 0.1144876480102539,
      "step": 12516
    },
    {
      "epoch": 7.63916015625e-05,
      "step": 12516,
      "training_step_time": 0.39141082763671875
    },
    {
      "epoch": 7.6397705078125e-05,
      "model_forward_time": 0.11443495750427246,
      "step": 12517
    },
    {
      "epoch": 7.6397705078125e-05,
      "step": 12517,
      "training_step_time": 0.3899495601654053
    },
    {
      "epoch": 7.640380859375e-05,
      "model_forward_time": 0.11518573760986328,
      "step": 12518
    },
    {
      "epoch": 7.640380859375e-05,
      "step": 12518,
      "training_step_time": 0.38984036445617676
    },
    {
      "epoch": 7.6409912109375e-05,
      "model_forward_time": 0.11462521553039551,
      "step": 12519
    },
    {
      "epoch": 7.6409912109375e-05,
      "step": 12519,
      "training_step_time": 0.3814566135406494
    },
    {
      "epoch": 7.6416015625e-05,
      "grad_norm": 0.14485402405261993,
      "learning_rate": 9.327368605694502e-05,
      "loss": 0.0585,
      "step": 12520
    },
    {
      "epoch": 7.6416015625e-05,
      "model_forward_time": 0.11441254615783691,
      "step": 12520
    },
    {
      "epoch": 7.6416015625e-05,
      "step": 12520,
      "training_step_time": 0.38735198974609375
    },
    {
      "epoch": 7.6422119140625e-05,
      "model_forward_time": 0.11547136306762695,
      "step": 12521
    },
    {
      "epoch": 7.6422119140625e-05,
      "step": 12521,
      "training_step_time": 0.6438395977020264
    },
    {
      "epoch": 7.642822265625e-05,
      "model_forward_time": 0.11488556861877441,
      "step": 12522
    },
    {
      "epoch": 7.642822265625e-05,
      "step": 12522,
      "training_step_time": 0.4508819580078125
    },
    {
      "epoch": 7.6434326171875e-05,
      "model_forward_time": 0.11471247673034668,
      "step": 12523
    },
    {
      "epoch": 7.6434326171875e-05,
      "step": 12523,
      "training_step_time": 0.42475008964538574
    },
    {
      "epoch": 7.64404296875e-05,
      "model_forward_time": 0.11456799507141113,
      "step": 12524
    },
    {
      "epoch": 7.64404296875e-05,
      "step": 12524,
      "training_step_time": 0.4067511558532715
    },
    {
      "epoch": 7.6446533203125e-05,
      "model_forward_time": 0.11510419845581055,
      "step": 12525
    },
    {
      "epoch": 7.6446533203125e-05,
      "step": 12525,
      "training_step_time": 0.36531925201416016
    },
    {
      "epoch": 7.645263671875e-05,
      "model_forward_time": 0.11481285095214844,
      "step": 12526
    },
    {
      "epoch": 7.645263671875e-05,
      "step": 12526,
      "training_step_time": 0.38867831230163574
    },
    {
      "epoch": 7.6458740234375e-05,
      "model_forward_time": 0.11500811576843262,
      "step": 12527
    },
    {
      "epoch": 7.6458740234375e-05,
      "step": 12527,
      "training_step_time": 0.7919387817382812
    },
    {
      "epoch": 7.646484375e-05,
      "model_forward_time": 0.11417937278747559,
      "step": 12528
    },
    {
      "epoch": 7.646484375e-05,
      "step": 12528,
      "training_step_time": 0.40091848373413086
    },
    {
      "epoch": 7.6470947265625e-05,
      "model_forward_time": 0.11444854736328125,
      "step": 12529
    },
    {
      "epoch": 7.6470947265625e-05,
      "step": 12529,
      "training_step_time": 0.3913569450378418
    },
    {
      "epoch": 7.647705078125e-05,
      "grad_norm": 0.21466954052448273,
      "learning_rate": 9.325987427060586e-05,
      "loss": 0.0557,
      "step": 12530
    },
    {
      "epoch": 7.647705078125e-05,
      "model_forward_time": 0.11432147026062012,
      "step": 12530
    },
    {
      "epoch": 7.647705078125e-05,
      "step": 12530,
      "training_step_time": 0.3881816864013672
    },
    {
      "epoch": 7.6483154296875e-05,
      "model_forward_time": 0.11480164527893066,
      "step": 12531
    },
    {
      "epoch": 7.6483154296875e-05,
      "step": 12531,
      "training_step_time": 0.3820457458496094
    },
    {
      "epoch": 7.64892578125e-05,
      "model_forward_time": 0.11505770683288574,
      "step": 12532
    },
    {
      "epoch": 7.64892578125e-05,
      "step": 12532,
      "training_step_time": 0.3856041431427002
    },
    {
      "epoch": 7.6495361328125e-05,
      "model_forward_time": 0.11495113372802734,
      "step": 12533
    },
    {
      "epoch": 7.6495361328125e-05,
      "step": 12533,
      "training_step_time": 0.7477691173553467
    },
    {
      "epoch": 7.650146484375e-05,
      "model_forward_time": 0.11413168907165527,
      "step": 12534
    },
    {
      "epoch": 7.650146484375e-05,
      "step": 12534,
      "training_step_time": 0.3972194194793701
    },
    {
      "epoch": 7.6507568359375e-05,
      "model_forward_time": 0.11468195915222168,
      "step": 12535
    },
    {
      "epoch": 7.6507568359375e-05,
      "step": 12535,
      "training_step_time": 0.44869017601013184
    },
    {
      "epoch": 7.6513671875e-05,
      "model_forward_time": 0.11518168449401855,
      "step": 12536
    },
    {
      "epoch": 7.6513671875e-05,
      "step": 12536,
      "training_step_time": 0.4119584560394287
    },
    {
      "epoch": 7.6519775390625e-05,
      "model_forward_time": 0.1141657829284668,
      "step": 12537
    },
    {
      "epoch": 7.6519775390625e-05,
      "step": 12537,
      "training_step_time": 0.39087843894958496
    },
    {
      "epoch": 7.652587890625e-05,
      "model_forward_time": 0.11482834815979004,
      "step": 12538
    },
    {
      "epoch": 7.652587890625e-05,
      "step": 12538,
      "training_step_time": 0.380723237991333
    },
    {
      "epoch": 7.6531982421875e-05,
      "model_forward_time": 0.11528801918029785,
      "step": 12539
    },
    {
      "epoch": 7.6531982421875e-05,
      "step": 12539,
      "training_step_time": 0.8702929019927979
    },
    {
      "epoch": 7.65380859375e-05,
      "grad_norm": 0.12133840471506119,
      "learning_rate": 9.32460493430591e-05,
      "loss": 0.0575,
      "step": 12540
    },
    {
      "epoch": 7.65380859375e-05,
      "model_forward_time": 0.11472415924072266,
      "step": 12540
    },
    {
      "epoch": 7.65380859375e-05,
      "step": 12540,
      "training_step_time": 0.5057582855224609
    },
    {
      "epoch": 7.6544189453125e-05,
      "model_forward_time": 0.11439251899719238,
      "step": 12541
    },
    {
      "epoch": 7.6544189453125e-05,
      "step": 12541,
      "training_step_time": 0.43410634994506836
    },
    {
      "epoch": 7.655029296875e-05,
      "model_forward_time": 0.11417412757873535,
      "step": 12542
    },
    {
      "epoch": 7.655029296875e-05,
      "step": 12542,
      "training_step_time": 0.3943943977355957
    },
    {
      "epoch": 7.6556396484375e-05,
      "model_forward_time": 0.11388731002807617,
      "step": 12543
    },
    {
      "epoch": 7.6556396484375e-05,
      "step": 12543,
      "training_step_time": 0.38819098472595215
    },
    {
      "epoch": 7.65625e-05,
      "model_forward_time": 0.11411333084106445,
      "step": 12544
    },
    {
      "epoch": 7.65625e-05,
      "step": 12544,
      "training_step_time": 0.39355921745300293
    },
    {
      "epoch": 7.6568603515625e-05,
      "model_forward_time": 0.1150052547454834,
      "step": 12545
    },
    {
      "epoch": 7.6568603515625e-05,
      "step": 12545,
      "training_step_time": 0.9379477500915527
    },
    {
      "epoch": 7.657470703125e-05,
      "model_forward_time": 0.1144561767578125,
      "step": 12546
    },
    {
      "epoch": 7.657470703125e-05,
      "step": 12546,
      "training_step_time": 0.38732337951660156
    },
    {
      "epoch": 7.6580810546875e-05,
      "model_forward_time": 0.11404681205749512,
      "step": 12547
    },
    {
      "epoch": 7.6580810546875e-05,
      "step": 12547,
      "training_step_time": 0.42095303535461426
    },
    {
      "epoch": 7.65869140625e-05,
      "model_forward_time": 0.11458396911621094,
      "step": 12548
    },
    {
      "epoch": 7.65869140625e-05,
      "step": 12548,
      "training_step_time": 0.45257568359375
    },
    {
      "epoch": 7.6593017578125e-05,
      "model_forward_time": 0.11401557922363281,
      "step": 12549
    },
    {
      "epoch": 7.6593017578125e-05,
      "step": 12549,
      "training_step_time": 0.4182898998260498
    },
    {
      "epoch": 7.659912109375e-05,
      "grad_norm": 0.14123955368995667,
      "learning_rate": 9.323221127850441e-05,
      "loss": 0.0607,
      "step": 12550
    },
    {
      "epoch": 7.659912109375e-05,
      "model_forward_time": 0.1143805980682373,
      "step": 12550
    },
    {
      "epoch": 7.659912109375e-05,
      "step": 12550,
      "training_step_time": 0.39629316329956055
    },
    {
      "epoch": 7.6605224609375e-05,
      "model_forward_time": 0.11477851867675781,
      "step": 12551
    },
    {
      "epoch": 7.6605224609375e-05,
      "step": 12551,
      "training_step_time": 0.6697111129760742
    },
    {
      "epoch": 7.6611328125e-05,
      "model_forward_time": 0.11422133445739746,
      "step": 12552
    },
    {
      "epoch": 7.6611328125e-05,
      "step": 12552,
      "training_step_time": 0.4472968578338623
    },
    {
      "epoch": 7.6617431640625e-05,
      "model_forward_time": 0.11446142196655273,
      "step": 12553
    },
    {
      "epoch": 7.6617431640625e-05,
      "step": 12553,
      "training_step_time": 0.4062023162841797
    },
    {
      "epoch": 7.662353515625e-05,
      "model_forward_time": 0.11452651023864746,
      "step": 12554
    },
    {
      "epoch": 7.662353515625e-05,
      "step": 12554,
      "training_step_time": 0.47266459465026855
    },
    {
      "epoch": 7.6629638671875e-05,
      "model_forward_time": 0.11454319953918457,
      "step": 12555
    },
    {
      "epoch": 7.6629638671875e-05,
      "step": 12555,
      "training_step_time": 0.38402748107910156
    },
    {
      "epoch": 7.66357421875e-05,
      "model_forward_time": 0.11553788185119629,
      "step": 12556
    },
    {
      "epoch": 7.66357421875e-05,
      "step": 12556,
      "training_step_time": 0.38739800453186035
    },
    {
      "epoch": 7.6641845703125e-05,
      "model_forward_time": 0.11454463005065918,
      "step": 12557
    },
    {
      "epoch": 7.6641845703125e-05,
      "step": 12557,
      "training_step_time": 0.8227884769439697
    },
    {
      "epoch": 7.664794921875e-05,
      "model_forward_time": 0.1143941879272461,
      "step": 12558
    },
    {
      "epoch": 7.664794921875e-05,
      "step": 12558,
      "training_step_time": 0.39690279960632324
    },
    {
      "epoch": 7.6654052734375e-05,
      "model_forward_time": 0.1147928237915039,
      "step": 12559
    },
    {
      "epoch": 7.6654052734375e-05,
      "step": 12559,
      "training_step_time": 0.39191365242004395
    },
    {
      "epoch": 7.666015625e-05,
      "grad_norm": 0.1948423832654953,
      "learning_rate": 9.321836008114539e-05,
      "loss": 0.0519,
      "step": 12560
    },
    {
      "epoch": 7.666015625e-05,
      "model_forward_time": 0.11505508422851562,
      "step": 12560
    },
    {
      "epoch": 7.666015625e-05,
      "step": 12560,
      "training_step_time": 0.43000102043151855
    },
    {
      "epoch": 7.6666259765625e-05,
      "model_forward_time": 0.11461281776428223,
      "step": 12561
    },
    {
      "epoch": 7.6666259765625e-05,
      "step": 12561,
      "training_step_time": 0.4594881534576416
    },
    {
      "epoch": 7.667236328125e-05,
      "model_forward_time": 0.11384701728820801,
      "step": 12562
    },
    {
      "epoch": 7.667236328125e-05,
      "step": 12562,
      "training_step_time": 0.41574668884277344
    },
    {
      "epoch": 7.6678466796875e-05,
      "model_forward_time": 0.11490225791931152,
      "step": 12563
    },
    {
      "epoch": 7.6678466796875e-05,
      "step": 12563,
      "training_step_time": 0.6569719314575195
    },
    {
      "epoch": 7.66845703125e-05,
      "model_forward_time": 0.11469340324401855,
      "step": 12564
    },
    {
      "epoch": 7.66845703125e-05,
      "step": 12564,
      "training_step_time": 0.3627471923828125
    },
    {
      "epoch": 7.6690673828125e-05,
      "model_forward_time": 0.11465597152709961,
      "step": 12565
    },
    {
      "epoch": 7.6690673828125e-05,
      "step": 12565,
      "training_step_time": 0.4267704486846924
    },
    {
      "epoch": 7.669677734375e-05,
      "model_forward_time": 0.11445260047912598,
      "step": 12566
    },
    {
      "epoch": 7.669677734375e-05,
      "step": 12566,
      "training_step_time": 0.4493858814239502
    },
    {
      "epoch": 7.6702880859375e-05,
      "model_forward_time": 0.11448073387145996,
      "step": 12567
    },
    {
      "epoch": 7.6702880859375e-05,
      "step": 12567,
      "training_step_time": 0.44472813606262207
    },
    {
      "epoch": 7.6708984375e-05,
      "model_forward_time": 0.11469507217407227,
      "step": 12568
    },
    {
      "epoch": 7.6708984375e-05,
      "step": 12568,
      "training_step_time": 0.3971538543701172
    },
    {
      "epoch": 7.6715087890625e-05,
      "model_forward_time": 0.11462545394897461,
      "step": 12569
    },
    {
      "epoch": 7.6715087890625e-05,
      "step": 12569,
      "training_step_time": 0.7141597270965576
    },
    {
      "epoch": 7.672119140625e-05,
      "grad_norm": 0.1310776025056839,
      "learning_rate": 9.320449575518972e-05,
      "loss": 0.0616,
      "step": 12570
    },
    {
      "epoch": 7.672119140625e-05,
      "model_forward_time": 0.1141364574432373,
      "step": 12570
    },
    {
      "epoch": 7.672119140625e-05,
      "step": 12570,
      "training_step_time": 0.39624691009521484
    },
    {
      "epoch": 7.6727294921875e-05,
      "model_forward_time": 0.11417818069458008,
      "step": 12571
    },
    {
      "epoch": 7.6727294921875e-05,
      "step": 12571,
      "training_step_time": 0.4009833335876465
    },
    {
      "epoch": 7.67333984375e-05,
      "model_forward_time": 0.11461567878723145,
      "step": 12572
    },
    {
      "epoch": 7.67333984375e-05,
      "step": 12572,
      "training_step_time": 0.389141321182251
    },
    {
      "epoch": 7.6739501953125e-05,
      "model_forward_time": 0.11549520492553711,
      "step": 12573
    },
    {
      "epoch": 7.6739501953125e-05,
      "step": 12573,
      "training_step_time": 0.40172433853149414
    },
    {
      "epoch": 7.674560546875e-05,
      "model_forward_time": 0.11461949348449707,
      "step": 12574
    },
    {
      "epoch": 7.674560546875e-05,
      "step": 12574,
      "training_step_time": 0.4349992275238037
    },
    {
      "epoch": 7.6751708984375e-05,
      "model_forward_time": 0.11492252349853516,
      "step": 12575
    },
    {
      "epoch": 7.6751708984375e-05,
      "step": 12575,
      "training_step_time": 0.740147590637207
    },
    {
      "epoch": 7.67578125e-05,
      "model_forward_time": 0.11395525932312012,
      "step": 12576
    },
    {
      "epoch": 7.67578125e-05,
      "step": 12576,
      "training_step_time": 0.39565134048461914
    },
    {
      "epoch": 7.6763916015625e-05,
      "model_forward_time": 0.11532783508300781,
      "step": 12577
    },
    {
      "epoch": 7.6763916015625e-05,
      "step": 12577,
      "training_step_time": 0.4214353561401367
    },
    {
      "epoch": 7.677001953125e-05,
      "model_forward_time": 0.11455512046813965,
      "step": 12578
    },
    {
      "epoch": 7.677001953125e-05,
      "step": 12578,
      "training_step_time": 0.4610307216644287
    },
    {
      "epoch": 7.6776123046875e-05,
      "model_forward_time": 0.11440753936767578,
      "step": 12579
    },
    {
      "epoch": 7.6776123046875e-05,
      "step": 12579,
      "training_step_time": 0.45078229904174805
    },
    {
      "epoch": 7.67822265625e-05,
      "grad_norm": 0.1497996598482132,
      "learning_rate": 9.319061830484898e-05,
      "loss": 0.0614,
      "step": 12580
    },
    {
      "epoch": 7.67822265625e-05,
      "model_forward_time": 0.11464524269104004,
      "step": 12580
    },
    {
      "epoch": 7.67822265625e-05,
      "step": 12580,
      "training_step_time": 0.3980832099914551
    },
    {
      "epoch": 7.6788330078125e-05,
      "model_forward_time": 0.11450839042663574,
      "step": 12581
    },
    {
      "epoch": 7.6788330078125e-05,
      "step": 12581,
      "training_step_time": 0.5816347599029541
    },
    {
      "epoch": 7.679443359375e-05,
      "model_forward_time": 0.11479949951171875,
      "step": 12582
    },
    {
      "epoch": 7.679443359375e-05,
      "step": 12582,
      "training_step_time": 0.39096856117248535
    },
    {
      "epoch": 7.6800537109375e-05,
      "model_forward_time": 0.11514496803283691,
      "step": 12583
    },
    {
      "epoch": 7.6800537109375e-05,
      "step": 12583,
      "training_step_time": 0.3965423107147217
    },
    {
      "epoch": 7.6806640625e-05,
      "model_forward_time": 0.11420178413391113,
      "step": 12584
    },
    {
      "epoch": 7.6806640625e-05,
      "step": 12584,
      "training_step_time": 0.39490318298339844
    },
    {
      "epoch": 7.6812744140625e-05,
      "model_forward_time": 0.11538529396057129,
      "step": 12585
    },
    {
      "epoch": 7.6812744140625e-05,
      "step": 12585,
      "training_step_time": 0.3938262462615967
    },
    {
      "epoch": 7.681884765625e-05,
      "model_forward_time": 0.11514544486999512,
      "step": 12586
    },
    {
      "epoch": 7.681884765625e-05,
      "step": 12586,
      "training_step_time": 0.3899729251861572
    },
    {
      "epoch": 7.6824951171875e-05,
      "model_forward_time": 0.11503076553344727,
      "step": 12587
    },
    {
      "epoch": 7.6824951171875e-05,
      "step": 12587,
      "training_step_time": 0.878943681716919
    },
    {
      "epoch": 7.68310546875e-05,
      "model_forward_time": 0.11414957046508789,
      "step": 12588
    },
    {
      "epoch": 7.68310546875e-05,
      "step": 12588,
      "training_step_time": 0.4356546401977539
    },
    {
      "epoch": 7.6837158203125e-05,
      "model_forward_time": 0.11447644233703613,
      "step": 12589
    },
    {
      "epoch": 7.6837158203125e-05,
      "step": 12589,
      "training_step_time": 0.4009087085723877
    },
    {
      "epoch": 7.684326171875e-05,
      "grad_norm": 0.19583366811275482,
      "learning_rate": 9.317672773433876e-05,
      "loss": 0.0575,
      "step": 12590
    },
    {
      "epoch": 7.684326171875e-05,
      "model_forward_time": 0.11475801467895508,
      "step": 12590
    },
    {
      "epoch": 7.684326171875e-05,
      "step": 12590,
      "training_step_time": 0.40622472763061523
    },
    {
      "epoch": 7.6849365234375e-05,
      "model_forward_time": 0.1146247386932373,
      "step": 12591
    },
    {
      "epoch": 7.6849365234375e-05,
      "step": 12591,
      "training_step_time": 0.3736560344696045
    },
    {
      "epoch": 7.685546875e-05,
      "model_forward_time": 0.1139678955078125,
      "step": 12592
    },
    {
      "epoch": 7.685546875e-05,
      "step": 12592,
      "training_step_time": 0.4729914665222168
    },
    {
      "epoch": 7.6861572265625e-05,
      "model_forward_time": 0.1147456169128418,
      "step": 12593
    },
    {
      "epoch": 7.6861572265625e-05,
      "step": 12593,
      "training_step_time": 0.5645573139190674
    },
    {
      "epoch": 7.686767578125e-05,
      "model_forward_time": 0.11426782608032227,
      "step": 12594
    },
    {
      "epoch": 7.686767578125e-05,
      "step": 12594,
      "training_step_time": 0.39585161209106445
    },
    {
      "epoch": 7.6873779296875e-05,
      "model_forward_time": 0.11440682411193848,
      "step": 12595
    },
    {
      "epoch": 7.6873779296875e-05,
      "step": 12595,
      "training_step_time": 0.3878612518310547
    },
    {
      "epoch": 7.68798828125e-05,
      "model_forward_time": 0.11499357223510742,
      "step": 12596
    },
    {
      "epoch": 7.68798828125e-05,
      "step": 12596,
      "training_step_time": 0.39077115058898926
    },
    {
      "epoch": 7.6885986328125e-05,
      "model_forward_time": 0.11491775512695312,
      "step": 12597
    },
    {
      "epoch": 7.6885986328125e-05,
      "step": 12597,
      "training_step_time": 0.39575767517089844
    },
    {
      "epoch": 7.689208984375e-05,
      "model_forward_time": 0.11518025398254395,
      "step": 12598
    },
    {
      "epoch": 7.689208984375e-05,
      "step": 12598,
      "training_step_time": 0.39423227310180664
    },
    {
      "epoch": 7.6898193359375e-05,
      "model_forward_time": 0.11558651924133301,
      "step": 12599
    },
    {
      "epoch": 7.6898193359375e-05,
      "step": 12599,
      "training_step_time": 0.9089984893798828
    },
    {
      "epoch": 7.6904296875e-05,
      "grad_norm": 0.1858498752117157,
      "learning_rate": 9.316282404787871e-05,
      "loss": 0.0598,
      "step": 12600
    },
    {
      "epoch": 7.6904296875e-05,
      "model_forward_time": 0.11452531814575195,
      "step": 12600
    },
    {
      "epoch": 7.6904296875e-05,
      "step": 12600,
      "training_step_time": 0.4087948799133301
    },
    {
      "epoch": 7.6910400390625e-05,
      "model_forward_time": 0.11430907249450684,
      "step": 12601
    },
    {
      "epoch": 7.6910400390625e-05,
      "step": 12601,
      "training_step_time": 0.4201793670654297
    },
    {
      "epoch": 7.691650390625e-05,
      "model_forward_time": 0.1140594482421875,
      "step": 12602
    },
    {
      "epoch": 7.691650390625e-05,
      "step": 12602,
      "training_step_time": 0.43776726722717285
    },
    {
      "epoch": 7.6922607421875e-05,
      "model_forward_time": 0.11449623107910156,
      "step": 12603
    },
    {
      "epoch": 7.6922607421875e-05,
      "step": 12603,
      "training_step_time": 0.3874082565307617
    },
    {
      "epoch": 7.69287109375e-05,
      "model_forward_time": 0.11455535888671875,
      "step": 12604
    },
    {
      "epoch": 7.69287109375e-05,
      "step": 12604,
      "training_step_time": 0.3610846996307373
    },
    {
      "epoch": 7.6934814453125e-05,
      "model_forward_time": 0.1146998405456543,
      "step": 12605
    },
    {
      "epoch": 7.6934814453125e-05,
      "step": 12605,
      "training_step_time": 0.5692198276519775
    },
    {
      "epoch": 7.694091796875e-05,
      "model_forward_time": 0.11408352851867676,
      "step": 12606
    },
    {
      "epoch": 7.694091796875e-05,
      "step": 12606,
      "training_step_time": 0.460695743560791
    },
    {
      "epoch": 7.6947021484375e-05,
      "model_forward_time": 0.11507940292358398,
      "step": 12607
    },
    {
      "epoch": 7.6947021484375e-05,
      "step": 12607,
      "training_step_time": 0.44791555404663086
    },
    {
      "epoch": 7.6953125e-05,
      "model_forward_time": 0.11462593078613281,
      "step": 12608
    },
    {
      "epoch": 7.6953125e-05,
      "step": 12608,
      "training_step_time": 0.395397424697876
    },
    {
      "epoch": 7.6959228515625e-05,
      "model_forward_time": 0.11498332023620605,
      "step": 12609
    },
    {
      "epoch": 7.6959228515625e-05,
      "step": 12609,
      "training_step_time": 0.392254114151001
    },
    {
      "epoch": 7.696533203125e-05,
      "grad_norm": 0.1990409940481186,
      "learning_rate": 9.314890724969232e-05,
      "loss": 0.0557,
      "step": 12610
    },
    {
      "epoch": 7.696533203125e-05,
      "model_forward_time": 0.11420106887817383,
      "step": 12610
    },
    {
      "epoch": 7.696533203125e-05,
      "step": 12610,
      "training_step_time": 0.3809034824371338
    },
    {
      "epoch": 7.6971435546875e-05,
      "model_forward_time": 0.11491274833679199,
      "step": 12611
    },
    {
      "epoch": 7.6971435546875e-05,
      "step": 12611,
      "training_step_time": 0.7475295066833496
    },
    {
      "epoch": 7.69775390625e-05,
      "model_forward_time": 0.11438512802124023,
      "step": 12612
    },
    {
      "epoch": 7.69775390625e-05,
      "step": 12612,
      "training_step_time": 0.3899509906768799
    },
    {
      "epoch": 7.6983642578125e-05,
      "model_forward_time": 0.11430835723876953,
      "step": 12613
    },
    {
      "epoch": 7.6983642578125e-05,
      "step": 12613,
      "training_step_time": 0.4380347728729248
    },
    {
      "epoch": 7.698974609375e-05,
      "model_forward_time": 0.11479377746582031,
      "step": 12614
    },
    {
      "epoch": 7.698974609375e-05,
      "step": 12614,
      "training_step_time": 0.46017980575561523
    },
    {
      "epoch": 7.6995849609375e-05,
      "model_forward_time": 0.11385178565979004,
      "step": 12615
    },
    {
      "epoch": 7.6995849609375e-05,
      "step": 12615,
      "training_step_time": 0.42194533348083496
    },
    {
      "epoch": 7.7001953125e-05,
      "model_forward_time": 0.11404824256896973,
      "step": 12616
    },
    {
      "epoch": 7.7001953125e-05,
      "step": 12616,
      "training_step_time": 0.3887317180633545
    },
    {
      "epoch": 7.7008056640625e-05,
      "model_forward_time": 0.11517930030822754,
      "step": 12617
    },
    {
      "epoch": 7.7008056640625e-05,
      "step": 12617,
      "training_step_time": 0.6955816745758057
    },
    {
      "epoch": 7.701416015625e-05,
      "model_forward_time": 0.11482667922973633,
      "step": 12618
    },
    {
      "epoch": 7.701416015625e-05,
      "step": 12618,
      "training_step_time": 0.43541932106018066
    },
    {
      "epoch": 7.7020263671875e-05,
      "model_forward_time": 0.11503911018371582,
      "step": 12619
    },
    {
      "epoch": 7.7020263671875e-05,
      "step": 12619,
      "training_step_time": 0.4444084167480469
    },
    {
      "epoch": 7.70263671875e-05,
      "grad_norm": 0.15429255366325378,
      "learning_rate": 9.313497734400722e-05,
      "loss": 0.0578,
      "step": 12620
    },
    {
      "epoch": 7.70263671875e-05,
      "model_forward_time": 0.11419463157653809,
      "step": 12620
    },
    {
      "epoch": 7.70263671875e-05,
      "step": 12620,
      "training_step_time": 0.408402681350708
    },
    {
      "epoch": 7.7032470703125e-05,
      "model_forward_time": 0.11410403251647949,
      "step": 12621
    },
    {
      "epoch": 7.7032470703125e-05,
      "step": 12621,
      "training_step_time": 0.4104790687561035
    },
    {
      "epoch": 7.703857421875e-05,
      "model_forward_time": 0.11440753936767578,
      "step": 12622
    },
    {
      "epoch": 7.703857421875e-05,
      "step": 12622,
      "training_step_time": 0.38505077362060547
    },
    {
      "epoch": 7.7044677734375e-05,
      "model_forward_time": 0.11514091491699219,
      "step": 12623
    },
    {
      "epoch": 7.7044677734375e-05,
      "step": 12623,
      "training_step_time": 0.6456236839294434
    },
    {
      "epoch": 7.705078125e-05,
      "model_forward_time": 0.11464357376098633,
      "step": 12624
    },
    {
      "epoch": 7.705078125e-05,
      "step": 12624,
      "training_step_time": 0.38573384284973145
    },
    {
      "epoch": 7.7056884765625e-05,
      "model_forward_time": 0.11492466926574707,
      "step": 12625
    },
    {
      "epoch": 7.7056884765625e-05,
      "step": 12625,
      "training_step_time": 0.39478349685668945
    },
    {
      "epoch": 7.706298828125e-05,
      "model_forward_time": 0.11470246315002441,
      "step": 12626
    },
    {
      "epoch": 7.706298828125e-05,
      "step": 12626,
      "training_step_time": 0.38858556747436523
    },
    {
      "epoch": 7.7069091796875e-05,
      "model_forward_time": 0.11488509178161621,
      "step": 12627
    },
    {
      "epoch": 7.7069091796875e-05,
      "step": 12627,
      "training_step_time": 0.4276258945465088
    },
    {
      "epoch": 7.70751953125e-05,
      "model_forward_time": 0.11484360694885254,
      "step": 12628
    },
    {
      "epoch": 7.70751953125e-05,
      "step": 12628,
      "training_step_time": 0.46029138565063477
    },
    {
      "epoch": 7.7081298828125e-05,
      "model_forward_time": 0.11515378952026367,
      "step": 12629
    },
    {
      "epoch": 7.7081298828125e-05,
      "step": 12629,
      "training_step_time": 0.5786139965057373
    },
    {
      "epoch": 7.708740234375e-05,
      "grad_norm": 0.22530589997768402,
      "learning_rate": 9.31210343350549e-05,
      "loss": 0.0581,
      "step": 12630
    },
    {
      "epoch": 7.708740234375e-05,
      "model_forward_time": 0.11567926406860352,
      "step": 12630
    },
    {
      "epoch": 7.708740234375e-05,
      "step": 12630,
      "training_step_time": 0.39990973472595215
    },
    {
      "epoch": 7.7093505859375e-05,
      "model_forward_time": 0.11419105529785156,
      "step": 12631
    },
    {
      "epoch": 7.7093505859375e-05,
      "step": 12631,
      "training_step_time": 0.362811803817749
    },
    {
      "epoch": 7.7099609375e-05,
      "model_forward_time": 0.11468791961669922,
      "step": 12632
    },
    {
      "epoch": 7.7099609375e-05,
      "step": 12632,
      "training_step_time": 0.4340376853942871
    },
    {
      "epoch": 7.7105712890625e-05,
      "model_forward_time": 0.11420917510986328,
      "step": 12633
    },
    {
      "epoch": 7.7105712890625e-05,
      "step": 12633,
      "training_step_time": 0.45848703384399414
    },
    {
      "epoch": 7.711181640625e-05,
      "model_forward_time": 0.11444878578186035,
      "step": 12634
    },
    {
      "epoch": 7.711181640625e-05,
      "step": 12634,
      "training_step_time": 0.4067211151123047
    },
    {
      "epoch": 7.7117919921875e-05,
      "model_forward_time": 0.11459994316101074,
      "step": 12635
    },
    {
      "epoch": 7.7117919921875e-05,
      "step": 12635,
      "training_step_time": 0.6670198440551758
    },
    {
      "epoch": 7.71240234375e-05,
      "model_forward_time": 0.11372494697570801,
      "step": 12636
    },
    {
      "epoch": 7.71240234375e-05,
      "step": 12636,
      "training_step_time": 0.37874674797058105
    },
    {
      "epoch": 7.7130126953125e-05,
      "model_forward_time": 0.11636114120483398,
      "step": 12637
    },
    {
      "epoch": 7.7130126953125e-05,
      "step": 12637,
      "training_step_time": 0.40059351921081543
    },
    {
      "epoch": 7.713623046875e-05,
      "model_forward_time": 0.11473679542541504,
      "step": 12638
    },
    {
      "epoch": 7.713623046875e-05,
      "step": 12638,
      "training_step_time": 0.3885958194732666
    },
    {
      "epoch": 7.7142333984375e-05,
      "model_forward_time": 0.11470150947570801,
      "step": 12639
    },
    {
      "epoch": 7.7142333984375e-05,
      "step": 12639,
      "training_step_time": 0.3883945941925049
    },
    {
      "epoch": 7.71484375e-05,
      "grad_norm": 0.15739625692367554,
      "learning_rate": 9.31070782270709e-05,
      "loss": 0.0585,
      "step": 12640
    },
    {
      "epoch": 7.71484375e-05,
      "model_forward_time": 0.11495184898376465,
      "step": 12640
    },
    {
      "epoch": 7.71484375e-05,
      "step": 12640,
      "training_step_time": 0.4127359390258789
    },
    {
      "epoch": 7.7154541015625e-05,
      "model_forward_time": 0.11435699462890625,
      "step": 12641
    },
    {
      "epoch": 7.7154541015625e-05,
      "step": 12641,
      "training_step_time": 0.7797598838806152
    },
    {
      "epoch": 7.716064453125e-05,
      "model_forward_time": 0.11454439163208008,
      "step": 12642
    },
    {
      "epoch": 7.716064453125e-05,
      "step": 12642,
      "training_step_time": 0.47237515449523926
    },
    {
      "epoch": 7.7166748046875e-05,
      "model_forward_time": 0.11409378051757812,
      "step": 12643
    },
    {
      "epoch": 7.7166748046875e-05,
      "step": 12643,
      "training_step_time": 0.39507412910461426
    },
    {
      "epoch": 7.71728515625e-05,
      "model_forward_time": 0.11394429206848145,
      "step": 12644
    },
    {
      "epoch": 7.71728515625e-05,
      "step": 12644,
      "training_step_time": 0.38065290451049805
    },
    {
      "epoch": 7.7178955078125e-05,
      "model_forward_time": 0.1148381233215332,
      "step": 12645
    },
    {
      "epoch": 7.7178955078125e-05,
      "step": 12645,
      "training_step_time": 0.3823204040527344
    },
    {
      "epoch": 7.718505859375e-05,
      "model_forward_time": 0.11455249786376953,
      "step": 12646
    },
    {
      "epoch": 7.718505859375e-05,
      "step": 12646,
      "training_step_time": 0.42977046966552734
    },
    {
      "epoch": 7.7191162109375e-05,
      "model_forward_time": 0.11531877517700195,
      "step": 12647
    },
    {
      "epoch": 7.7191162109375e-05,
      "step": 12647,
      "training_step_time": 0.7101194858551025
    },
    {
      "epoch": 7.7197265625e-05,
      "model_forward_time": 0.11432027816772461,
      "step": 12648
    },
    {
      "epoch": 7.7197265625e-05,
      "step": 12648,
      "training_step_time": 0.40003418922424316
    },
    {
      "epoch": 7.7203369140625e-05,
      "model_forward_time": 0.1148231029510498,
      "step": 12649
    },
    {
      "epoch": 7.7203369140625e-05,
      "step": 12649,
      "training_step_time": 0.39197587966918945
    },
    {
      "epoch": 7.720947265625e-05,
      "grad_norm": 0.1605243682861328,
      "learning_rate": 9.309310902429472e-05,
      "loss": 0.0605,
      "step": 12650
    },
    {
      "epoch": 7.720947265625e-05,
      "model_forward_time": 0.11475229263305664,
      "step": 12650
    },
    {
      "epoch": 7.720947265625e-05,
      "step": 12650,
      "training_step_time": 0.39382505416870117
    },
    {
      "epoch": 7.7215576171875e-05,
      "model_forward_time": 0.11432075500488281,
      "step": 12651
    },
    {
      "epoch": 7.7215576171875e-05,
      "step": 12651,
      "training_step_time": 0.3898899555206299
    },
    {
      "epoch": 7.72216796875e-05,
      "model_forward_time": 0.11468839645385742,
      "step": 12652
    },
    {
      "epoch": 7.72216796875e-05,
      "step": 12652,
      "training_step_time": 0.40612220764160156
    },
    {
      "epoch": 7.7227783203125e-05,
      "model_forward_time": 0.11484718322753906,
      "step": 12653
    },
    {
      "epoch": 7.7227783203125e-05,
      "step": 12653,
      "training_step_time": 0.6888930797576904
    },
    {
      "epoch": 7.723388671875e-05,
      "model_forward_time": 0.11487936973571777,
      "step": 12654
    },
    {
      "epoch": 7.723388671875e-05,
      "step": 12654,
      "training_step_time": 0.4154789447784424
    },
    {
      "epoch": 7.7239990234375e-05,
      "model_forward_time": 0.11509275436401367,
      "step": 12655
    },
    {
      "epoch": 7.7239990234375e-05,
      "step": 12655,
      "training_step_time": 0.43588924407958984
    },
    {
      "epoch": 7.724609375e-05,
      "model_forward_time": 0.11441707611083984,
      "step": 12656
    },
    {
      "epoch": 7.724609375e-05,
      "step": 12656,
      "training_step_time": 0.3944582939147949
    },
    {
      "epoch": 7.7252197265625e-05,
      "model_forward_time": 0.11488723754882812,
      "step": 12657
    },
    {
      "epoch": 7.7252197265625e-05,
      "step": 12657,
      "training_step_time": 0.3859524726867676
    },
    {
      "epoch": 7.725830078125e-05,
      "model_forward_time": 0.11453557014465332,
      "step": 12658
    },
    {
      "epoch": 7.725830078125e-05,
      "step": 12658,
      "training_step_time": 0.39147019386291504
    },
    {
      "epoch": 7.7264404296875e-05,
      "model_forward_time": 0.11491942405700684,
      "step": 12659
    },
    {
      "epoch": 7.7264404296875e-05,
      "step": 12659,
      "training_step_time": 1.0067877769470215
    },
    {
      "epoch": 7.72705078125e-05,
      "grad_norm": 0.15728098154067993,
      "learning_rate": 9.30791267309698e-05,
      "loss": 0.061,
      "step": 12660
    },
    {
      "epoch": 7.72705078125e-05,
      "model_forward_time": 0.1148674488067627,
      "step": 12660
    },
    {
      "epoch": 7.72705078125e-05,
      "step": 12660,
      "training_step_time": 0.44281768798828125
    },
    {
      "epoch": 7.7276611328125e-05,
      "model_forward_time": 0.11395621299743652,
      "step": 12661
    },
    {
      "epoch": 7.7276611328125e-05,
      "step": 12661,
      "training_step_time": 0.3912620544433594
    },
    {
      "epoch": 7.728271484375e-05,
      "model_forward_time": 0.11633157730102539,
      "step": 12662
    },
    {
      "epoch": 7.728271484375e-05,
      "step": 12662,
      "training_step_time": 0.38873863220214844
    },
    {
      "epoch": 7.7288818359375e-05,
      "model_forward_time": 0.11421966552734375,
      "step": 12663
    },
    {
      "epoch": 7.7288818359375e-05,
      "step": 12663,
      "training_step_time": 0.377610445022583
    },
    {
      "epoch": 7.7294921875e-05,
      "model_forward_time": 0.11452317237854004,
      "step": 12664
    },
    {
      "epoch": 7.7294921875e-05,
      "step": 12664,
      "training_step_time": 0.4178035259246826
    },
    {
      "epoch": 7.7301025390625e-05,
      "model_forward_time": 0.11497235298156738,
      "step": 12665
    },
    {
      "epoch": 7.7301025390625e-05,
      "step": 12665,
      "training_step_time": 0.7025036811828613
    },
    {
      "epoch": 7.730712890625e-05,
      "model_forward_time": 0.11397552490234375,
      "step": 12666
    },
    {
      "epoch": 7.730712890625e-05,
      "step": 12666,
      "training_step_time": 0.4058418273925781
    },
    {
      "epoch": 7.7313232421875e-05,
      "model_forward_time": 0.11460638046264648,
      "step": 12667
    },
    {
      "epoch": 7.7313232421875e-05,
      "step": 12667,
      "training_step_time": 0.4572458267211914
    },
    {
      "epoch": 7.73193359375e-05,
      "model_forward_time": 0.11432099342346191,
      "step": 12668
    },
    {
      "epoch": 7.73193359375e-05,
      "step": 12668,
      "training_step_time": 0.4184401035308838
    },
    {
      "epoch": 7.7325439453125e-05,
      "model_forward_time": 0.11442375183105469,
      "step": 12669
    },
    {
      "epoch": 7.7325439453125e-05,
      "step": 12669,
      "training_step_time": 0.3903534412384033
    },
    {
      "epoch": 7.733154296875e-05,
      "grad_norm": 0.2803137004375458,
      "learning_rate": 9.306513135134362e-05,
      "loss": 0.062,
      "step": 12670
    },
    {
      "epoch": 7.733154296875e-05,
      "model_forward_time": 0.11394000053405762,
      "step": 12670
    },
    {
      "epoch": 7.733154296875e-05,
      "step": 12670,
      "training_step_time": 0.3808290958404541
    },
    {
      "epoch": 7.7337646484375e-05,
      "model_forward_time": 0.11506271362304688,
      "step": 12671
    },
    {
      "epoch": 7.7337646484375e-05,
      "step": 12671,
      "training_step_time": 0.5563602447509766
    },
    {
      "epoch": 7.734375e-05,
      "model_forward_time": 0.11448884010314941,
      "step": 12672
    },
    {
      "epoch": 7.734375e-05,
      "step": 12672,
      "training_step_time": 0.43541646003723145
    },
    {
      "epoch": 7.7349853515625e-05,
      "model_forward_time": 0.11510014533996582,
      "step": 12673
    },
    {
      "epoch": 7.7349853515625e-05,
      "step": 12673,
      "training_step_time": 0.4155600070953369
    },
    {
      "epoch": 7.735595703125e-05,
      "model_forward_time": 0.11503219604492188,
      "step": 12674
    },
    {
      "epoch": 7.735595703125e-05,
      "step": 12674,
      "training_step_time": 0.38799095153808594
    },
    {
      "epoch": 7.7362060546875e-05,
      "model_forward_time": 0.11511754989624023,
      "step": 12675
    },
    {
      "epoch": 7.7362060546875e-05,
      "step": 12675,
      "training_step_time": 0.39209842681884766
    },
    {
      "epoch": 7.73681640625e-05,
      "model_forward_time": 0.11517715454101562,
      "step": 12676
    },
    {
      "epoch": 7.73681640625e-05,
      "step": 12676,
      "training_step_time": 0.3993959426879883
    },
    {
      "epoch": 7.7374267578125e-05,
      "model_forward_time": 0.11577296257019043,
      "step": 12677
    },
    {
      "epoch": 7.7374267578125e-05,
      "step": 12677,
      "training_step_time": 0.7565045356750488
    },
    {
      "epoch": 7.738037109375e-05,
      "model_forward_time": 0.11443090438842773,
      "step": 12678
    },
    {
      "epoch": 7.738037109375e-05,
      "step": 12678,
      "training_step_time": 0.3899350166320801
    },
    {
      "epoch": 7.7386474609375e-05,
      "model_forward_time": 0.11461567878723145,
      "step": 12679
    },
    {
      "epoch": 7.7386474609375e-05,
      "step": 12679,
      "training_step_time": 0.40195655822753906
    },
    {
      "epoch": 7.7392578125e-05,
      "grad_norm": 0.20215538144111633,
      "learning_rate": 9.305112288966761e-05,
      "loss": 0.0553,
      "step": 12680
    },
    {
      "epoch": 7.7392578125e-05,
      "model_forward_time": 0.11394071578979492,
      "step": 12680
    },
    {
      "epoch": 7.7392578125e-05,
      "step": 12680,
      "training_step_time": 0.41856884956359863
    },
    {
      "epoch": 7.7398681640625e-05,
      "model_forward_time": 0.11415624618530273,
      "step": 12681
    },
    {
      "epoch": 7.7398681640625e-05,
      "step": 12681,
      "training_step_time": 0.3859727382659912
    },
    {
      "epoch": 7.740478515625e-05,
      "model_forward_time": 0.11425328254699707,
      "step": 12682
    },
    {
      "epoch": 7.740478515625e-05,
      "step": 12682,
      "training_step_time": 0.41385769844055176
    },
    {
      "epoch": 7.7410888671875e-05,
      "model_forward_time": 0.11528563499450684,
      "step": 12683
    },
    {
      "epoch": 7.7410888671875e-05,
      "step": 12683,
      "training_step_time": 0.666053056716919
    },
    {
      "epoch": 7.74169921875e-05,
      "model_forward_time": 0.1144251823425293,
      "step": 12684
    },
    {
      "epoch": 7.74169921875e-05,
      "step": 12684,
      "training_step_time": 0.3859739303588867
    },
    {
      "epoch": 7.7423095703125e-05,
      "model_forward_time": 0.11486124992370605,
      "step": 12685
    },
    {
      "epoch": 7.7423095703125e-05,
      "step": 12685,
      "training_step_time": 0.3626091480255127
    },
    {
      "epoch": 7.742919921875e-05,
      "model_forward_time": 0.1143960952758789,
      "step": 12686
    },
    {
      "epoch": 7.742919921875e-05,
      "step": 12686,
      "training_step_time": 0.43033266067504883
    },
    {
      "epoch": 7.7435302734375e-05,
      "model_forward_time": 0.11506056785583496,
      "step": 12687
    },
    {
      "epoch": 7.7435302734375e-05,
      "step": 12687,
      "training_step_time": 0.4626889228820801
    },
    {
      "epoch": 7.744140625e-05,
      "model_forward_time": 0.11503291130065918,
      "step": 12688
    },
    {
      "epoch": 7.744140625e-05,
      "step": 12688,
      "training_step_time": 0.42659974098205566
    },
    {
      "epoch": 7.7447509765625e-05,
      "model_forward_time": 0.11537742614746094,
      "step": 12689
    },
    {
      "epoch": 7.7447509765625e-05,
      "step": 12689,
      "training_step_time": 0.6687021255493164
    },
    {
      "epoch": 7.745361328125e-05,
      "grad_norm": 0.2882724404335022,
      "learning_rate": 9.30371013501972e-05,
      "loss": 0.0683,
      "step": 12690
    },
    {
      "epoch": 7.745361328125e-05,
      "model_forward_time": 0.11515259742736816,
      "step": 12690
    },
    {
      "epoch": 7.745361328125e-05,
      "step": 12690,
      "training_step_time": 0.46976733207702637
    },
    {
      "epoch": 7.7459716796875e-05,
      "model_forward_time": 0.11433768272399902,
      "step": 12691
    },
    {
      "epoch": 7.7459716796875e-05,
      "step": 12691,
      "training_step_time": 0.38340115547180176
    },
    {
      "epoch": 7.74658203125e-05,
      "model_forward_time": 0.11475539207458496,
      "step": 12692
    },
    {
      "epoch": 7.74658203125e-05,
      "step": 12692,
      "training_step_time": 0.37525177001953125
    },
    {
      "epoch": 7.7471923828125e-05,
      "model_forward_time": 0.11482429504394531,
      "step": 12693
    },
    {
      "epoch": 7.7471923828125e-05,
      "step": 12693,
      "training_step_time": 0.41814279556274414
    },
    {
      "epoch": 7.747802734375e-05,
      "model_forward_time": 0.1141669750213623,
      "step": 12694
    },
    {
      "epoch": 7.747802734375e-05,
      "step": 12694,
      "training_step_time": 0.42485737800598145
    },
    {
      "epoch": 7.7484130859375e-05,
      "model_forward_time": 0.11484456062316895,
      "step": 12695
    },
    {
      "epoch": 7.7484130859375e-05,
      "step": 12695,
      "training_step_time": 0.43248987197875977
    },
    {
      "epoch": 7.7490234375e-05,
      "model_forward_time": 0.11546206474304199,
      "step": 12696
    },
    {
      "epoch": 7.7490234375e-05,
      "step": 12696,
      "training_step_time": 0.3925609588623047
    },
    {
      "epoch": 7.7496337890625e-05,
      "model_forward_time": 0.11599469184875488,
      "step": 12697
    },
    {
      "epoch": 7.7496337890625e-05,
      "step": 12697,
      "training_step_time": 0.3986203670501709
    },
    {
      "epoch": 7.750244140625e-05,
      "model_forward_time": 0.11515092849731445,
      "step": 12698
    },
    {
      "epoch": 7.750244140625e-05,
      "step": 12698,
      "training_step_time": 0.40953660011291504
    },
    {
      "epoch": 7.7508544921875e-05,
      "model_forward_time": 0.1146092414855957,
      "step": 12699
    },
    {
      "epoch": 7.7508544921875e-05,
      "step": 12699,
      "training_step_time": 0.3627891540527344
    },
    {
      "epoch": 7.75146484375e-05,
      "grad_norm": 0.18819400668144226,
      "learning_rate": 9.30230667371917e-05,
      "loss": 0.0601,
      "step": 12700
    },
    {
      "epoch": 7.75146484375e-05,
      "model_forward_time": 0.11456775665283203,
      "step": 12700
    },
    {
      "epoch": 7.75146484375e-05,
      "step": 12700,
      "training_step_time": 0.44451069831848145
    },
    {
      "epoch": 7.7520751953125e-05,
      "model_forward_time": 0.11511611938476562,
      "step": 12701
    },
    {
      "epoch": 7.7520751953125e-05,
      "step": 12701,
      "training_step_time": 0.79128098487854
    },
    {
      "epoch": 7.752685546875e-05,
      "model_forward_time": 0.11458349227905273,
      "step": 12702
    },
    {
      "epoch": 7.752685546875e-05,
      "step": 12702,
      "training_step_time": 0.43840765953063965
    },
    {
      "epoch": 7.7532958984375e-05,
      "model_forward_time": 0.11540794372558594,
      "step": 12703
    },
    {
      "epoch": 7.7532958984375e-05,
      "step": 12703,
      "training_step_time": 0.38695836067199707
    },
    {
      "epoch": 7.75390625e-05,
      "model_forward_time": 0.11432456970214844,
      "step": 12704
    },
    {
      "epoch": 7.75390625e-05,
      "step": 12704,
      "training_step_time": 0.38292503356933594
    },
    {
      "epoch": 7.7545166015625e-05,
      "model_forward_time": 0.11446404457092285,
      "step": 12705
    },
    {
      "epoch": 7.7545166015625e-05,
      "step": 12705,
      "training_step_time": 0.3975331783294678
    },
    {
      "epoch": 7.755126953125e-05,
      "model_forward_time": 0.11426663398742676,
      "step": 12706
    },
    {
      "epoch": 7.755126953125e-05,
      "step": 12706,
      "training_step_time": 0.3924574851989746
    },
    {
      "epoch": 7.7557373046875e-05,
      "model_forward_time": 0.11477208137512207,
      "step": 12707
    },
    {
      "epoch": 7.7557373046875e-05,
      "step": 12707,
      "training_step_time": 0.9493904113769531
    },
    {
      "epoch": 7.75634765625e-05,
      "model_forward_time": 0.11433553695678711,
      "step": 12708
    },
    {
      "epoch": 7.75634765625e-05,
      "step": 12708,
      "training_step_time": 0.4757208824157715
    },
    {
      "epoch": 7.7569580078125e-05,
      "model_forward_time": 0.11415576934814453,
      "step": 12709
    },
    {
      "epoch": 7.7569580078125e-05,
      "step": 12709,
      "training_step_time": 0.3820459842681885
    },
    {
      "epoch": 7.757568359375e-05,
      "grad_norm": 0.2082192450761795,
      "learning_rate": 9.300901905491447e-05,
      "loss": 0.0537,
      "step": 12710
    },
    {
      "epoch": 7.757568359375e-05,
      "model_forward_time": 0.11411094665527344,
      "step": 12710
    },
    {
      "epoch": 7.757568359375e-05,
      "step": 12710,
      "training_step_time": 0.3838365077972412
    },
    {
      "epoch": 7.7581787109375e-05,
      "model_forward_time": 0.11365652084350586,
      "step": 12711
    },
    {
      "epoch": 7.7581787109375e-05,
      "step": 12711,
      "training_step_time": 0.3869152069091797
    },
    {
      "epoch": 7.7587890625e-05,
      "model_forward_time": 0.11438965797424316,
      "step": 12712
    },
    {
      "epoch": 7.7587890625e-05,
      "step": 12712,
      "training_step_time": 0.461916446685791
    },
    {
      "epoch": 7.7593994140625e-05,
      "model_forward_time": 0.11476540565490723,
      "step": 12713
    },
    {
      "epoch": 7.7593994140625e-05,
      "step": 12713,
      "training_step_time": 0.6254210472106934
    },
    {
      "epoch": 7.760009765625e-05,
      "model_forward_time": 0.11493492126464844,
      "step": 12714
    },
    {
      "epoch": 7.760009765625e-05,
      "step": 12714,
      "training_step_time": 0.3995797634124756
    },
    {
      "epoch": 7.7606201171875e-05,
      "model_forward_time": 0.11519145965576172,
      "step": 12715
    },
    {
      "epoch": 7.7606201171875e-05,
      "step": 12715,
      "training_step_time": 0.42497801780700684
    },
    {
      "epoch": 7.76123046875e-05,
      "model_forward_time": 0.11450767517089844,
      "step": 12716
    },
    {
      "epoch": 7.76123046875e-05,
      "step": 12716,
      "training_step_time": 0.39322686195373535
    },
    {
      "epoch": 7.7618408203125e-05,
      "model_forward_time": 0.11513185501098633,
      "step": 12717
    },
    {
      "epoch": 7.7618408203125e-05,
      "step": 12717,
      "training_step_time": 0.38491344451904297
    },
    {
      "epoch": 7.762451171875e-05,
      "model_forward_time": 0.11503410339355469,
      "step": 12718
    },
    {
      "epoch": 7.762451171875e-05,
      "step": 12718,
      "training_step_time": 0.38976192474365234
    },
    {
      "epoch": 7.7630615234375e-05,
      "model_forward_time": 0.11474728584289551,
      "step": 12719
    },
    {
      "epoch": 7.7630615234375e-05,
      "step": 12719,
      "training_step_time": 0.7642831802368164
    },
    {
      "epoch": 7.763671875e-05,
      "grad_norm": 0.16292127966880798,
      "learning_rate": 9.299495830763286e-05,
      "loss": 0.0587,
      "step": 12720
    },
    {
      "epoch": 7.763671875e-05,
      "model_forward_time": 0.1145179271697998,
      "step": 12720
    },
    {
      "epoch": 7.763671875e-05,
      "step": 12720,
      "training_step_time": 0.3919861316680908
    },
    {
      "epoch": 7.7642822265625e-05,
      "model_forward_time": 0.11445307731628418,
      "step": 12721
    },
    {
      "epoch": 7.7642822265625e-05,
      "step": 12721,
      "training_step_time": 0.42091870307922363
    },
    {
      "epoch": 7.764892578125e-05,
      "model_forward_time": 0.1141047477722168,
      "step": 12722
    },
    {
      "epoch": 7.764892578125e-05,
      "step": 12722,
      "training_step_time": 0.42338109016418457
    },
    {
      "epoch": 7.7655029296875e-05,
      "model_forward_time": 0.11449766159057617,
      "step": 12723
    },
    {
      "epoch": 7.7655029296875e-05,
      "step": 12723,
      "training_step_time": 0.39043188095092773
    },
    {
      "epoch": 7.76611328125e-05,
      "model_forward_time": 0.1139376163482666,
      "step": 12724
    },
    {
      "epoch": 7.76611328125e-05,
      "step": 12724,
      "training_step_time": 0.3886737823486328
    },
    {
      "epoch": 7.7667236328125e-05,
      "model_forward_time": 0.11506795883178711,
      "step": 12725
    },
    {
      "epoch": 7.7667236328125e-05,
      "step": 12725,
      "training_step_time": 0.757659912109375
    },
    {
      "epoch": 7.767333984375e-05,
      "model_forward_time": 0.11367058753967285,
      "step": 12726
    },
    {
      "epoch": 7.767333984375e-05,
      "step": 12726,
      "training_step_time": 0.4416508674621582
    },
    {
      "epoch": 7.7679443359375e-05,
      "model_forward_time": 0.11442375183105469,
      "step": 12727
    },
    {
      "epoch": 7.7679443359375e-05,
      "step": 12727,
      "training_step_time": 0.49793267250061035
    },
    {
      "epoch": 7.7685546875e-05,
      "model_forward_time": 0.11380648612976074,
      "step": 12728
    },
    {
      "epoch": 7.7685546875e-05,
      "step": 12728,
      "training_step_time": 0.3851444721221924
    },
    {
      "epoch": 7.7691650390625e-05,
      "model_forward_time": 0.11412405967712402,
      "step": 12729
    },
    {
      "epoch": 7.7691650390625e-05,
      "step": 12729,
      "training_step_time": 0.3867008686065674
    },
    {
      "epoch": 7.769775390625e-05,
      "grad_norm": 0.21846149861812592,
      "learning_rate": 9.298088449961813e-05,
      "loss": 0.0583,
      "step": 12730
    },
    {
      "epoch": 7.769775390625e-05,
      "model_forward_time": 0.11450076103210449,
      "step": 12730
    },
    {
      "epoch": 7.769775390625e-05,
      "step": 12730,
      "training_step_time": 0.3818533420562744
    },
    {
      "epoch": 7.7703857421875e-05,
      "model_forward_time": 0.11501288414001465,
      "step": 12731
    },
    {
      "epoch": 7.7703857421875e-05,
      "step": 12731,
      "training_step_time": 0.618105411529541
    },
    {
      "epoch": 7.77099609375e-05,
      "model_forward_time": 0.11591815948486328,
      "step": 12732
    },
    {
      "epoch": 7.77099609375e-05,
      "step": 12732,
      "training_step_time": 0.3963203430175781
    },
    {
      "epoch": 7.7716064453125e-05,
      "model_forward_time": 0.11509823799133301,
      "step": 12733
    },
    {
      "epoch": 7.7716064453125e-05,
      "step": 12733,
      "training_step_time": 0.5262529850006104
    },
    {
      "epoch": 7.772216796875e-05,
      "model_forward_time": 0.11436152458190918,
      "step": 12734
    },
    {
      "epoch": 7.772216796875e-05,
      "step": 12734,
      "training_step_time": 0.38505101203918457
    },
    {
      "epoch": 7.7728271484375e-05,
      "model_forward_time": 0.11478829383850098,
      "step": 12735
    },
    {
      "epoch": 7.7728271484375e-05,
      "step": 12735,
      "training_step_time": 0.4267425537109375
    },
    {
      "epoch": 7.7734375e-05,
      "model_forward_time": 0.11510801315307617,
      "step": 12736
    },
    {
      "epoch": 7.7734375e-05,
      "step": 12736,
      "training_step_time": 0.3898353576660156
    },
    {
      "epoch": 7.7740478515625e-05,
      "model_forward_time": 0.11529302597045898,
      "step": 12737
    },
    {
      "epoch": 7.7740478515625e-05,
      "step": 12737,
      "training_step_time": 0.5043339729309082
    },
    {
      "epoch": 7.774658203125e-05,
      "model_forward_time": 0.11451292037963867,
      "step": 12738
    },
    {
      "epoch": 7.774658203125e-05,
      "step": 12738,
      "training_step_time": 0.3648052215576172
    },
    {
      "epoch": 7.7752685546875e-05,
      "model_forward_time": 0.11488008499145508,
      "step": 12739
    },
    {
      "epoch": 7.7752685546875e-05,
      "step": 12739,
      "training_step_time": 0.4156301021575928
    },
    {
      "epoch": 7.77587890625e-05,
      "grad_norm": 0.27069342136383057,
      "learning_rate": 9.296679763514552e-05,
      "loss": 0.0587,
      "step": 12740
    },
    {
      "epoch": 7.77587890625e-05,
      "model_forward_time": 0.1147468090057373,
      "step": 12740
    },
    {
      "epoch": 7.77587890625e-05,
      "step": 12740,
      "training_step_time": 0.44103169441223145
    },
    {
      "epoch": 7.7764892578125e-05,
      "model_forward_time": 0.11530375480651855,
      "step": 12741
    },
    {
      "epoch": 7.7764892578125e-05,
      "step": 12741,
      "training_step_time": 0.43417787551879883
    },
    {
      "epoch": 7.777099609375e-05,
      "model_forward_time": 0.11444520950317383,
      "step": 12742
    },
    {
      "epoch": 7.777099609375e-05,
      "step": 12742,
      "training_step_time": 0.40171337127685547
    },
    {
      "epoch": 7.7777099609375e-05,
      "model_forward_time": 0.11536836624145508,
      "step": 12743
    },
    {
      "epoch": 7.7777099609375e-05,
      "step": 12743,
      "training_step_time": 0.6681997776031494
    },
    {
      "epoch": 7.7783203125e-05,
      "model_forward_time": 0.11396551132202148,
      "step": 12744
    },
    {
      "epoch": 7.7783203125e-05,
      "step": 12744,
      "training_step_time": 0.39525413513183594
    },
    {
      "epoch": 7.7789306640625e-05,
      "model_forward_time": 0.11479496955871582,
      "step": 12745
    },
    {
      "epoch": 7.7789306640625e-05,
      "step": 12745,
      "training_step_time": 0.3959667682647705
    },
    {
      "epoch": 7.779541015625e-05,
      "model_forward_time": 0.11431312561035156,
      "step": 12746
    },
    {
      "epoch": 7.779541015625e-05,
      "step": 12746,
      "training_step_time": 0.39649391174316406
    },
    {
      "epoch": 7.7801513671875e-05,
      "model_forward_time": 0.11482858657836914,
      "step": 12747
    },
    {
      "epoch": 7.7801513671875e-05,
      "step": 12747,
      "training_step_time": 0.4150228500366211
    },
    {
      "epoch": 7.78076171875e-05,
      "model_forward_time": 0.11407780647277832,
      "step": 12748
    },
    {
      "epoch": 7.78076171875e-05,
      "step": 12748,
      "training_step_time": 0.393129825592041
    },
    {
      "epoch": 7.7813720703125e-05,
      "model_forward_time": 0.11545228958129883,
      "step": 12749
    },
    {
      "epoch": 7.7813720703125e-05,
      "step": 12749,
      "training_step_time": 0.9150679111480713
    },
    {
      "epoch": 7.781982421875e-05,
      "grad_norm": 0.17994385957717896,
      "learning_rate": 9.295269771849427e-05,
      "loss": 0.0567,
      "step": 12750
    },
    {
      "epoch": 7.781982421875e-05,
      "model_forward_time": 0.11398720741271973,
      "step": 12750
    },
    {
      "epoch": 7.781982421875e-05,
      "step": 12750,
      "training_step_time": 0.39550065994262695
    },
    {
      "epoch": 7.7825927734375e-05,
      "model_forward_time": 0.11494326591491699,
      "step": 12751
    },
    {
      "epoch": 7.7825927734375e-05,
      "step": 12751,
      "training_step_time": 0.402296781539917
    },
    {
      "epoch": 7.783203125e-05,
      "model_forward_time": 0.11466336250305176,
      "step": 12752
    },
    {
      "epoch": 7.783203125e-05,
      "step": 12752,
      "training_step_time": 0.4197824001312256
    },
    {
      "epoch": 7.7838134765625e-05,
      "model_forward_time": 0.11421704292297363,
      "step": 12753
    },
    {
      "epoch": 7.7838134765625e-05,
      "step": 12753,
      "training_step_time": 0.41852760314941406
    },
    {
      "epoch": 7.784423828125e-05,
      "model_forward_time": 0.11504960060119629,
      "step": 12754
    },
    {
      "epoch": 7.784423828125e-05,
      "step": 12754,
      "training_step_time": 0.5028154850006104
    },
    {
      "epoch": 7.7850341796875e-05,
      "model_forward_time": 0.11420202255249023,
      "step": 12755
    },
    {
      "epoch": 7.7850341796875e-05,
      "step": 12755,
      "training_step_time": 0.5315444469451904
    },
    {
      "epoch": 7.78564453125e-05,
      "model_forward_time": 0.11438369750976562,
      "step": 12756
    },
    {
      "epoch": 7.78564453125e-05,
      "step": 12756,
      "training_step_time": 0.38832950592041016
    },
    {
      "epoch": 7.7862548828125e-05,
      "model_forward_time": 0.11428117752075195,
      "step": 12757
    },
    {
      "epoch": 7.7862548828125e-05,
      "step": 12757,
      "training_step_time": 0.40004467964172363
    },
    {
      "epoch": 7.786865234375e-05,
      "model_forward_time": 0.11488652229309082,
      "step": 12758
    },
    {
      "epoch": 7.786865234375e-05,
      "step": 12758,
      "training_step_time": 0.38483595848083496
    },
    {
      "epoch": 7.7874755859375e-05,
      "model_forward_time": 0.11516237258911133,
      "step": 12759
    },
    {
      "epoch": 7.7874755859375e-05,
      "step": 12759,
      "training_step_time": 0.38814759254455566
    },
    {
      "epoch": 7.7880859375e-05,
      "grad_norm": 0.17246775329113007,
      "learning_rate": 9.293858475394754e-05,
      "loss": 0.0599,
      "step": 12760
    },
    {
      "epoch": 7.7880859375e-05,
      "model_forward_time": 0.11504888534545898,
      "step": 12760
    },
    {
      "epoch": 7.7880859375e-05,
      "step": 12760,
      "training_step_time": 0.3807334899902344
    },
    {
      "epoch": 7.7886962890625e-05,
      "model_forward_time": 0.11525559425354004,
      "step": 12761
    },
    {
      "epoch": 7.7886962890625e-05,
      "step": 12761,
      "training_step_time": 0.8622400760650635
    },
    {
      "epoch": 7.789306640625e-05,
      "model_forward_time": 0.113677978515625,
      "step": 12762
    },
    {
      "epoch": 7.789306640625e-05,
      "step": 12762,
      "training_step_time": 0.3890681266784668
    },
    {
      "epoch": 7.7899169921875e-05,
      "model_forward_time": 0.1140744686126709,
      "step": 12763
    },
    {
      "epoch": 7.7899169921875e-05,
      "step": 12763,
      "training_step_time": 0.3927493095397949
    },
    {
      "epoch": 7.79052734375e-05,
      "model_forward_time": 0.11368894577026367,
      "step": 12764
    },
    {
      "epoch": 7.79052734375e-05,
      "step": 12764,
      "training_step_time": 0.395557165145874
    },
    {
      "epoch": 7.7911376953125e-05,
      "model_forward_time": 0.11370205879211426,
      "step": 12765
    },
    {
      "epoch": 7.7911376953125e-05,
      "step": 12765,
      "training_step_time": 0.3628056049346924
    },
    {
      "epoch": 7.791748046875e-05,
      "model_forward_time": 0.11428165435791016,
      "step": 12766
    },
    {
      "epoch": 7.791748046875e-05,
      "step": 12766,
      "training_step_time": 0.4205207824707031
    },
    {
      "epoch": 7.7923583984375e-05,
      "model_forward_time": 0.1147301197052002,
      "step": 12767
    },
    {
      "epoch": 7.7923583984375e-05,
      "step": 12767,
      "training_step_time": 0.8047935962677002
    },
    {
      "epoch": 7.79296875e-05,
      "model_forward_time": 0.11403679847717285,
      "step": 12768
    },
    {
      "epoch": 7.79296875e-05,
      "step": 12768,
      "training_step_time": 0.38826799392700195
    },
    {
      "epoch": 7.7935791015625e-05,
      "model_forward_time": 0.1143484115600586,
      "step": 12769
    },
    {
      "epoch": 7.7935791015625e-05,
      "step": 12769,
      "training_step_time": 0.38218092918395996
    },
    {
      "epoch": 7.794189453125e-05,
      "grad_norm": 0.18806537985801697,
      "learning_rate": 9.292445874579251e-05,
      "loss": 0.0538,
      "step": 12770
    },
    {
      "epoch": 7.794189453125e-05,
      "model_forward_time": 0.11404156684875488,
      "step": 12770
    },
    {
      "epoch": 7.794189453125e-05,
      "step": 12770,
      "training_step_time": 0.37886881828308105
    },
    {
      "epoch": 7.7947998046875e-05,
      "model_forward_time": 0.1139066219329834,
      "step": 12771
    },
    {
      "epoch": 7.7947998046875e-05,
      "step": 12771,
      "training_step_time": 0.38733816146850586
    },
    {
      "epoch": 7.79541015625e-05,
      "model_forward_time": 0.11407327651977539,
      "step": 12772
    },
    {
      "epoch": 7.79541015625e-05,
      "step": 12772,
      "training_step_time": 0.38132596015930176
    },
    {
      "epoch": 7.7960205078125e-05,
      "model_forward_time": 0.11434555053710938,
      "step": 12773
    },
    {
      "epoch": 7.7960205078125e-05,
      "step": 12773,
      "training_step_time": 0.9131069183349609
    },
    {
      "epoch": 7.796630859375e-05,
      "model_forward_time": 0.11431074142456055,
      "step": 12774
    },
    {
      "epoch": 7.796630859375e-05,
      "step": 12774,
      "training_step_time": 0.41879701614379883
    },
    {
      "epoch": 7.7972412109375e-05,
      "model_forward_time": 0.1144552230834961,
      "step": 12775
    },
    {
      "epoch": 7.7972412109375e-05,
      "step": 12775,
      "training_step_time": 0.38846731185913086
    },
    {
      "epoch": 7.7978515625e-05,
      "model_forward_time": 0.11371111869812012,
      "step": 12776
    },
    {
      "epoch": 7.7978515625e-05,
      "step": 12776,
      "training_step_time": 0.4249606132507324
    },
    {
      "epoch": 7.7984619140625e-05,
      "model_forward_time": 0.11458921432495117,
      "step": 12777
    },
    {
      "epoch": 7.7984619140625e-05,
      "step": 12777,
      "training_step_time": 0.4312138557434082
    },
    {
      "epoch": 7.799072265625e-05,
      "model_forward_time": 0.1157841682434082,
      "step": 12778
    },
    {
      "epoch": 7.799072265625e-05,
      "step": 12778,
      "training_step_time": 0.37020254135131836
    },
    {
      "epoch": 7.7996826171875e-05,
      "model_forward_time": 0.11503887176513672,
      "step": 12779
    },
    {
      "epoch": 7.7996826171875e-05,
      "step": 12779,
      "training_step_time": 0.42238736152648926
    },
    {
      "epoch": 7.80029296875e-05,
      "grad_norm": 0.13674737513065338,
      "learning_rate": 9.291031969832026e-05,
      "loss": 0.0633,
      "step": 12780
    },
    {
      "epoch": 7.80029296875e-05,
      "model_forward_time": 0.1144707202911377,
      "step": 12780
    },
    {
      "epoch": 7.80029296875e-05,
      "step": 12780,
      "training_step_time": 0.4770481586456299
    },
    {
      "epoch": 7.8009033203125e-05,
      "model_forward_time": 0.11522030830383301,
      "step": 12781
    },
    {
      "epoch": 7.8009033203125e-05,
      "step": 12781,
      "training_step_time": 0.3920748233795166
    },
    {
      "epoch": 7.801513671875e-05,
      "model_forward_time": 0.11503934860229492,
      "step": 12782
    },
    {
      "epoch": 7.801513671875e-05,
      "step": 12782,
      "training_step_time": 0.39018678665161133
    },
    {
      "epoch": 7.8021240234375e-05,
      "model_forward_time": 0.11477351188659668,
      "step": 12783
    },
    {
      "epoch": 7.8021240234375e-05,
      "step": 12783,
      "training_step_time": 0.39447593688964844
    },
    {
      "epoch": 7.802734375e-05,
      "model_forward_time": 0.1155843734741211,
      "step": 12784
    },
    {
      "epoch": 7.802734375e-05,
      "step": 12784,
      "training_step_time": 0.39684438705444336
    },
    {
      "epoch": 7.8033447265625e-05,
      "model_forward_time": 0.11551380157470703,
      "step": 12785
    },
    {
      "epoch": 7.8033447265625e-05,
      "step": 12785,
      "training_step_time": 0.6726794242858887
    },
    {
      "epoch": 7.803955078125e-05,
      "model_forward_time": 0.11406517028808594,
      "step": 12786
    },
    {
      "epoch": 7.803955078125e-05,
      "step": 12786,
      "training_step_time": 0.3989443778991699
    },
    {
      "epoch": 7.8045654296875e-05,
      "model_forward_time": 0.11499142646789551,
      "step": 12787
    },
    {
      "epoch": 7.8045654296875e-05,
      "step": 12787,
      "training_step_time": 0.39957594871520996
    },
    {
      "epoch": 7.80517578125e-05,
      "model_forward_time": 0.11390304565429688,
      "step": 12788
    },
    {
      "epoch": 7.80517578125e-05,
      "step": 12788,
      "training_step_time": 0.45011115074157715
    },
    {
      "epoch": 7.8057861328125e-05,
      "model_forward_time": 0.11507725715637207,
      "step": 12789
    },
    {
      "epoch": 7.8057861328125e-05,
      "step": 12789,
      "training_step_time": 0.40245580673217773
    },
    {
      "epoch": 7.806396484375e-05,
      "grad_norm": 0.18096689879894257,
      "learning_rate": 9.289616761582587e-05,
      "loss": 0.0625,
      "step": 12790
    },
    {
      "epoch": 7.806396484375e-05,
      "model_forward_time": 0.11442422866821289,
      "step": 12790
    },
    {
      "epoch": 7.806396484375e-05,
      "step": 12790,
      "training_step_time": 0.4452221393585205
    },
    {
      "epoch": 7.8070068359375e-05,
      "model_forward_time": 0.1155691146850586,
      "step": 12791
    },
    {
      "epoch": 7.8070068359375e-05,
      "step": 12791,
      "training_step_time": 0.8207125663757324
    },
    {
      "epoch": 7.8076171875e-05,
      "model_forward_time": 0.11412215232849121,
      "step": 12792
    },
    {
      "epoch": 7.8076171875e-05,
      "step": 12792,
      "training_step_time": 0.5009655952453613
    },
    {
      "epoch": 7.8082275390625e-05,
      "model_forward_time": 0.11460232734680176,
      "step": 12793
    },
    {
      "epoch": 7.8082275390625e-05,
      "step": 12793,
      "training_step_time": 0.45618486404418945
    },
    {
      "epoch": 7.808837890625e-05,
      "model_forward_time": 0.11376667022705078,
      "step": 12794
    },
    {
      "epoch": 7.808837890625e-05,
      "step": 12794,
      "training_step_time": 0.40480971336364746
    },
    {
      "epoch": 7.8094482421875e-05,
      "model_forward_time": 0.11395525932312012,
      "step": 12795
    },
    {
      "epoch": 7.8094482421875e-05,
      "step": 12795,
      "training_step_time": 0.38849449157714844
    },
    {
      "epoch": 7.81005859375e-05,
      "model_forward_time": 0.11426210403442383,
      "step": 12796
    },
    {
      "epoch": 7.81005859375e-05,
      "step": 12796,
      "training_step_time": 0.3914165496826172
    },
    {
      "epoch": 7.8106689453125e-05,
      "model_forward_time": 0.11444354057312012,
      "step": 12797
    },
    {
      "epoch": 7.8106689453125e-05,
      "step": 12797,
      "training_step_time": 0.5677838325500488
    },
    {
      "epoch": 7.811279296875e-05,
      "model_forward_time": 0.11495351791381836,
      "step": 12798
    },
    {
      "epoch": 7.811279296875e-05,
      "step": 12798,
      "training_step_time": 0.38786983489990234
    },
    {
      "epoch": 7.8118896484375e-05,
      "model_forward_time": 0.11529850959777832,
      "step": 12799
    },
    {
      "epoch": 7.8118896484375e-05,
      "step": 12799,
      "training_step_time": 0.4117751121520996
    },
    {
      "epoch": 7.8125e-05,
      "grad_norm": 0.20327319204807281,
      "learning_rate": 9.288200250260836e-05,
      "loss": 0.0603,
      "step": 12800
    },
    {
      "epoch": 7.8125e-05,
      "model_forward_time": 0.11474275588989258,
      "step": 12800
    },
    {
      "epoch": 7.8125e-05,
      "step": 12800,
      "training_step_time": 0.38899827003479004
    },
    {
      "epoch": 7.8131103515625e-05,
      "model_forward_time": 0.11457467079162598,
      "step": 12801
    },
    {
      "epoch": 7.8131103515625e-05,
      "step": 12801,
      "training_step_time": 0.41240644454956055
    },
    {
      "epoch": 7.813720703125e-05,
      "model_forward_time": 0.11488962173461914,
      "step": 12802
    },
    {
      "epoch": 7.813720703125e-05,
      "step": 12802,
      "training_step_time": 0.40487217903137207
    },
    {
      "epoch": 7.8143310546875e-05,
      "model_forward_time": 0.1147623062133789,
      "step": 12803
    },
    {
      "epoch": 7.8143310546875e-05,
      "step": 12803,
      "training_step_time": 0.9332211017608643
    },
    {
      "epoch": 7.81494140625e-05,
      "model_forward_time": 0.11416482925415039,
      "step": 12804
    },
    {
      "epoch": 7.81494140625e-05,
      "step": 12804,
      "training_step_time": 0.3645310401916504
    },
    {
      "epoch": 7.8155517578125e-05,
      "model_forward_time": 0.11448931694030762,
      "step": 12805
    },
    {
      "epoch": 7.8155517578125e-05,
      "step": 12805,
      "training_step_time": 0.3876168727874756
    },
    {
      "epoch": 7.816162109375e-05,
      "model_forward_time": 0.11449241638183594,
      "step": 12806
    },
    {
      "epoch": 7.816162109375e-05,
      "step": 12806,
      "training_step_time": 0.44223999977111816
    },
    {
      "epoch": 7.8167724609375e-05,
      "model_forward_time": 0.11462736129760742,
      "step": 12807
    },
    {
      "epoch": 7.8167724609375e-05,
      "step": 12807,
      "training_step_time": 0.4298744201660156
    },
    {
      "epoch": 7.8173828125e-05,
      "model_forward_time": 0.11426973342895508,
      "step": 12808
    },
    {
      "epoch": 7.8173828125e-05,
      "step": 12808,
      "training_step_time": 0.3867483139038086
    },
    {
      "epoch": 7.8179931640625e-05,
      "model_forward_time": 0.11511850357055664,
      "step": 12809
    },
    {
      "epoch": 7.8179931640625e-05,
      "step": 12809,
      "training_step_time": 0.7449548244476318
    },
    {
      "epoch": 7.818603515625e-05,
      "grad_norm": 0.19603823125362396,
      "learning_rate": 9.286782436297073e-05,
      "loss": 0.0645,
      "step": 12810
    },
    {
      "epoch": 7.818603515625e-05,
      "model_forward_time": 0.11398935317993164,
      "step": 12810
    },
    {
      "epoch": 7.818603515625e-05,
      "step": 12810,
      "training_step_time": 0.3847942352294922
    },
    {
      "epoch": 7.8192138671875e-05,
      "model_forward_time": 0.11415243148803711,
      "step": 12811
    },
    {
      "epoch": 7.8192138671875e-05,
      "step": 12811,
      "training_step_time": 0.39176034927368164
    },
    {
      "epoch": 7.81982421875e-05,
      "model_forward_time": 0.11487698554992676,
      "step": 12812
    },
    {
      "epoch": 7.81982421875e-05,
      "step": 12812,
      "training_step_time": 0.45412588119506836
    },
    {
      "epoch": 7.8204345703125e-05,
      "model_forward_time": 0.1142275333404541,
      "step": 12813
    },
    {
      "epoch": 7.8204345703125e-05,
      "step": 12813,
      "training_step_time": 0.480912446975708
    },
    {
      "epoch": 7.821044921875e-05,
      "model_forward_time": 0.11479520797729492,
      "step": 12814
    },
    {
      "epoch": 7.821044921875e-05,
      "step": 12814,
      "training_step_time": 0.4205324649810791
    },
    {
      "epoch": 7.8216552734375e-05,
      "model_forward_time": 0.11516094207763672,
      "step": 12815
    },
    {
      "epoch": 7.8216552734375e-05,
      "step": 12815,
      "training_step_time": 0.41727519035339355
    },
    {
      "epoch": 7.822265625e-05,
      "model_forward_time": 0.11463451385498047,
      "step": 12816
    },
    {
      "epoch": 7.822265625e-05,
      "step": 12816,
      "training_step_time": 0.41999363899230957
    },
    {
      "epoch": 7.8228759765625e-05,
      "model_forward_time": 0.11424374580383301,
      "step": 12817
    },
    {
      "epoch": 7.8228759765625e-05,
      "step": 12817,
      "training_step_time": 0.3859596252441406
    },
    {
      "epoch": 7.823486328125e-05,
      "model_forward_time": 0.11490345001220703,
      "step": 12818
    },
    {
      "epoch": 7.823486328125e-05,
      "step": 12818,
      "training_step_time": 0.3682687282562256
    },
    {
      "epoch": 7.8240966796875e-05,
      "model_forward_time": 0.1148691177368164,
      "step": 12819
    },
    {
      "epoch": 7.8240966796875e-05,
      "step": 12819,
      "training_step_time": 0.4041421413421631
    },
    {
      "epoch": 7.82470703125e-05,
      "grad_norm": 0.14293290674686432,
      "learning_rate": 9.285363320121992e-05,
      "loss": 0.0614,
      "step": 12820
    },
    {
      "epoch": 7.82470703125e-05,
      "model_forward_time": 0.11468648910522461,
      "step": 12820
    },
    {
      "epoch": 7.82470703125e-05,
      "step": 12820,
      "training_step_time": 0.45531654357910156
    },
    {
      "epoch": 7.8253173828125e-05,
      "model_forward_time": 0.11503362655639648,
      "step": 12821
    },
    {
      "epoch": 7.8253173828125e-05,
      "step": 12821,
      "training_step_time": 0.8064212799072266
    },
    {
      "epoch": 7.825927734375e-05,
      "model_forward_time": 0.1146402359008789,
      "step": 12822
    },
    {
      "epoch": 7.825927734375e-05,
      "step": 12822,
      "training_step_time": 0.39168429374694824
    },
    {
      "epoch": 7.8265380859375e-05,
      "model_forward_time": 0.11405301094055176,
      "step": 12823
    },
    {
      "epoch": 7.8265380859375e-05,
      "step": 12823,
      "training_step_time": 0.3932359218597412
    },
    {
      "epoch": 7.8271484375e-05,
      "model_forward_time": 0.11429810523986816,
      "step": 12824
    },
    {
      "epoch": 7.8271484375e-05,
      "step": 12824,
      "training_step_time": 0.3874690532684326
    },
    {
      "epoch": 7.8277587890625e-05,
      "model_forward_time": 0.11431431770324707,
      "step": 12825
    },
    {
      "epoch": 7.8277587890625e-05,
      "step": 12825,
      "training_step_time": 0.37929487228393555
    },
    {
      "epoch": 7.828369140625e-05,
      "model_forward_time": 0.11410307884216309,
      "step": 12826
    },
    {
      "epoch": 7.828369140625e-05,
      "step": 12826,
      "training_step_time": 0.3973686695098877
    },
    {
      "epoch": 7.8289794921875e-05,
      "model_forward_time": 0.11473703384399414,
      "step": 12827
    },
    {
      "epoch": 7.8289794921875e-05,
      "step": 12827,
      "training_step_time": 0.9361443519592285
    },
    {
      "epoch": 7.82958984375e-05,
      "model_forward_time": 0.11441969871520996,
      "step": 12828
    },
    {
      "epoch": 7.82958984375e-05,
      "step": 12828,
      "training_step_time": 0.3875899314880371
    },
    {
      "epoch": 7.8302001953125e-05,
      "model_forward_time": 0.11455249786376953,
      "step": 12829
    },
    {
      "epoch": 7.8302001953125e-05,
      "step": 12829,
      "training_step_time": 0.3870701789855957
    },
    {
      "epoch": 7.830810546875e-05,
      "grad_norm": 0.21764527261257172,
      "learning_rate": 9.283942902166681e-05,
      "loss": 0.0596,
      "step": 12830
    },
    {
      "epoch": 7.830810546875e-05,
      "model_forward_time": 0.11380314826965332,
      "step": 12830
    },
    {
      "epoch": 7.830810546875e-05,
      "step": 12830,
      "training_step_time": 0.39961814880371094
    },
    {
      "epoch": 7.8314208984375e-05,
      "model_forward_time": 0.11481595039367676,
      "step": 12831
    },
    {
      "epoch": 7.8314208984375e-05,
      "step": 12831,
      "training_step_time": 0.3817317485809326
    },
    {
      "epoch": 7.83203125e-05,
      "model_forward_time": 0.11453795433044434,
      "step": 12832
    },
    {
      "epoch": 7.83203125e-05,
      "step": 12832,
      "training_step_time": 0.41710925102233887
    },
    {
      "epoch": 7.8326416015625e-05,
      "model_forward_time": 0.11514019966125488,
      "step": 12833
    },
    {
      "epoch": 7.8326416015625e-05,
      "step": 12833,
      "training_step_time": 0.7769818305969238
    },
    {
      "epoch": 7.833251953125e-05,
      "model_forward_time": 0.11466217041015625,
      "step": 12834
    },
    {
      "epoch": 7.833251953125e-05,
      "step": 12834,
      "training_step_time": 0.455125093460083
    },
    {
      "epoch": 7.8338623046875e-05,
      "model_forward_time": 0.11396384239196777,
      "step": 12835
    },
    {
      "epoch": 7.8338623046875e-05,
      "step": 12835,
      "training_step_time": 0.38823723793029785
    },
    {
      "epoch": 7.83447265625e-05,
      "model_forward_time": 0.1141366958618164,
      "step": 12836
    },
    {
      "epoch": 7.83447265625e-05,
      "step": 12836,
      "training_step_time": 0.3956162929534912
    },
    {
      "epoch": 7.8350830078125e-05,
      "model_forward_time": 0.1136624813079834,
      "step": 12837
    },
    {
      "epoch": 7.8350830078125e-05,
      "step": 12837,
      "training_step_time": 0.38100147247314453
    },
    {
      "epoch": 7.835693359375e-05,
      "model_forward_time": 0.11456704139709473,
      "step": 12838
    },
    {
      "epoch": 7.835693359375e-05,
      "step": 12838,
      "training_step_time": 0.3875713348388672
    },
    {
      "epoch": 7.8363037109375e-05,
      "model_forward_time": 0.1148533821105957,
      "step": 12839
    },
    {
      "epoch": 7.8363037109375e-05,
      "step": 12839,
      "training_step_time": 0.716625452041626
    },
    {
      "epoch": 7.8369140625e-05,
      "grad_norm": 0.18928836286067963,
      "learning_rate": 9.282521182862629e-05,
      "loss": 0.0598,
      "step": 12840
    },
    {
      "epoch": 7.8369140625e-05,
      "model_forward_time": 0.11461186408996582,
      "step": 12840
    },
    {
      "epoch": 7.8369140625e-05,
      "step": 12840,
      "training_step_time": 0.4700915813446045
    },
    {
      "epoch": 7.8375244140625e-05,
      "model_forward_time": 0.11426782608032227,
      "step": 12841
    },
    {
      "epoch": 7.8375244140625e-05,
      "step": 12841,
      "training_step_time": 0.388655424118042
    },
    {
      "epoch": 7.838134765625e-05,
      "model_forward_time": 0.11414957046508789,
      "step": 12842
    },
    {
      "epoch": 7.838134765625e-05,
      "step": 12842,
      "training_step_time": 0.38486218452453613
    },
    {
      "epoch": 7.8387451171875e-05,
      "model_forward_time": 0.11449122428894043,
      "step": 12843
    },
    {
      "epoch": 7.8387451171875e-05,
      "step": 12843,
      "training_step_time": 0.39005398750305176
    },
    {
      "epoch": 7.83935546875e-05,
      "model_forward_time": 0.1144566535949707,
      "step": 12844
    },
    {
      "epoch": 7.83935546875e-05,
      "step": 12844,
      "training_step_time": 0.3898632526397705
    },
    {
      "epoch": 7.8399658203125e-05,
      "model_forward_time": 0.11507558822631836,
      "step": 12845
    },
    {
      "epoch": 7.8399658203125e-05,
      "step": 12845,
      "training_step_time": 0.593900203704834
    },
    {
      "epoch": 7.840576171875e-05,
      "model_forward_time": 0.11476731300354004,
      "step": 12846
    },
    {
      "epoch": 7.840576171875e-05,
      "step": 12846,
      "training_step_time": 0.46244096755981445
    },
    {
      "epoch": 7.8411865234375e-05,
      "model_forward_time": 0.11479449272155762,
      "step": 12847
    },
    {
      "epoch": 7.8411865234375e-05,
      "step": 12847,
      "training_step_time": 0.5041639804840088
    },
    {
      "epoch": 7.841796875e-05,
      "model_forward_time": 0.11496329307556152,
      "step": 12848
    },
    {
      "epoch": 7.841796875e-05,
      "step": 12848,
      "training_step_time": 0.3962256908416748
    },
    {
      "epoch": 7.8424072265625e-05,
      "model_forward_time": 0.11428618431091309,
      "step": 12849
    },
    {
      "epoch": 7.8424072265625e-05,
      "step": 12849,
      "training_step_time": 0.3862600326538086
    },
    {
      "epoch": 7.843017578125e-05,
      "grad_norm": 0.14454445242881775,
      "learning_rate": 9.281098162641714e-05,
      "loss": 0.0555,
      "step": 12850
    },
    {
      "epoch": 7.843017578125e-05,
      "model_forward_time": 0.11458396911621094,
      "step": 12850
    },
    {
      "epoch": 7.843017578125e-05,
      "step": 12850,
      "training_step_time": 0.3847672939300537
    },
    {
      "epoch": 7.8436279296875e-05,
      "model_forward_time": 0.11461734771728516,
      "step": 12851
    },
    {
      "epoch": 7.8436279296875e-05,
      "step": 12851,
      "training_step_time": 0.49179649353027344
    },
    {
      "epoch": 7.84423828125e-05,
      "model_forward_time": 0.11518168449401855,
      "step": 12852
    },
    {
      "epoch": 7.84423828125e-05,
      "step": 12852,
      "training_step_time": 0.4090867042541504
    },
    {
      "epoch": 7.8448486328125e-05,
      "model_forward_time": 0.11514830589294434,
      "step": 12853
    },
    {
      "epoch": 7.8448486328125e-05,
      "step": 12853,
      "training_step_time": 0.44098806381225586
    },
    {
      "epoch": 7.845458984375e-05,
      "model_forward_time": 0.11503744125366211,
      "step": 12854
    },
    {
      "epoch": 7.845458984375e-05,
      "step": 12854,
      "training_step_time": 0.4284522533416748
    },
    {
      "epoch": 7.8460693359375e-05,
      "model_forward_time": 0.11558008193969727,
      "step": 12855
    },
    {
      "epoch": 7.8460693359375e-05,
      "step": 12855,
      "training_step_time": 0.38843226432800293
    },
    {
      "epoch": 7.8466796875e-05,
      "model_forward_time": 0.1156613826751709,
      "step": 12856
    },
    {
      "epoch": 7.8466796875e-05,
      "step": 12856,
      "training_step_time": 0.40581512451171875
    },
    {
      "epoch": 7.8472900390625e-05,
      "model_forward_time": 0.1163182258605957,
      "step": 12857
    },
    {
      "epoch": 7.8472900390625e-05,
      "step": 12857,
      "training_step_time": 0.7703135013580322
    },
    {
      "epoch": 7.847900390625e-05,
      "model_forward_time": 0.11476802825927734,
      "step": 12858
    },
    {
      "epoch": 7.847900390625e-05,
      "step": 12858,
      "training_step_time": 0.38742494583129883
    },
    {
      "epoch": 7.8485107421875e-05,
      "model_forward_time": 0.11475133895874023,
      "step": 12859
    },
    {
      "epoch": 7.8485107421875e-05,
      "step": 12859,
      "training_step_time": 0.4536561965942383
    },
    {
      "epoch": 7.84912109375e-05,
      "grad_norm": 0.19099214673042297,
      "learning_rate": 9.279673841936214e-05,
      "loss": 0.0577,
      "step": 12860
    },
    {
      "epoch": 7.84912109375e-05,
      "model_forward_time": 0.11418819427490234,
      "step": 12860
    },
    {
      "epoch": 7.84912109375e-05,
      "step": 12860,
      "training_step_time": 0.45667243003845215
    },
    {
      "epoch": 7.8497314453125e-05,
      "model_forward_time": 0.11436915397644043,
      "step": 12861
    },
    {
      "epoch": 7.8497314453125e-05,
      "step": 12861,
      "training_step_time": 0.4002201557159424
    },
    {
      "epoch": 7.850341796875e-05,
      "model_forward_time": 0.11397337913513184,
      "step": 12862
    },
    {
      "epoch": 7.850341796875e-05,
      "step": 12862,
      "training_step_time": 0.37458372116088867
    },
    {
      "epoch": 7.8509521484375e-05,
      "model_forward_time": 0.1148383617401123,
      "step": 12863
    },
    {
      "epoch": 7.8509521484375e-05,
      "step": 12863,
      "training_step_time": 0.7343804836273193
    },
    {
      "epoch": 7.8515625e-05,
      "model_forward_time": 0.11358165740966797,
      "step": 12864
    },
    {
      "epoch": 7.8515625e-05,
      "step": 12864,
      "training_step_time": 0.4026987552642822
    },
    {
      "epoch": 7.8521728515625e-05,
      "model_forward_time": 0.11451959609985352,
      "step": 12865
    },
    {
      "epoch": 7.8521728515625e-05,
      "step": 12865,
      "training_step_time": 0.3912529945373535
    },
    {
      "epoch": 7.852783203125e-05,
      "model_forward_time": 0.1139369010925293,
      "step": 12866
    },
    {
      "epoch": 7.852783203125e-05,
      "step": 12866,
      "training_step_time": 0.40113067626953125
    },
    {
      "epoch": 7.8533935546875e-05,
      "model_forward_time": 0.11432838439941406,
      "step": 12867
    },
    {
      "epoch": 7.8533935546875e-05,
      "step": 12867,
      "training_step_time": 0.3990044593811035
    },
    {
      "epoch": 7.85400390625e-05,
      "model_forward_time": 0.11469650268554688,
      "step": 12868
    },
    {
      "epoch": 7.85400390625e-05,
      "step": 12868,
      "training_step_time": 0.4716911315917969
    },
    {
      "epoch": 7.8546142578125e-05,
      "model_forward_time": 0.11536836624145508,
      "step": 12869
    },
    {
      "epoch": 7.8546142578125e-05,
      "step": 12869,
      "training_step_time": 0.6514337062835693
    },
    {
      "epoch": 7.855224609375e-05,
      "grad_norm": 0.18610642850399017,
      "learning_rate": 9.278248221178798e-05,
      "loss": 0.0592,
      "step": 12870
    },
    {
      "epoch": 7.855224609375e-05,
      "model_forward_time": 0.11462974548339844,
      "step": 12870
    },
    {
      "epoch": 7.855224609375e-05,
      "step": 12870,
      "training_step_time": 0.38616371154785156
    },
    {
      "epoch": 7.8558349609375e-05,
      "model_forward_time": 0.11413455009460449,
      "step": 12871
    },
    {
      "epoch": 7.8558349609375e-05,
      "step": 12871,
      "training_step_time": 0.4028189182281494
    },
    {
      "epoch": 7.8564453125e-05,
      "model_forward_time": 0.11482429504394531,
      "step": 12872
    },
    {
      "epoch": 7.8564453125e-05,
      "step": 12872,
      "training_step_time": 0.49299144744873047
    },
    {
      "epoch": 7.8570556640625e-05,
      "model_forward_time": 0.11397647857666016,
      "step": 12873
    },
    {
      "epoch": 7.8570556640625e-05,
      "step": 12873,
      "training_step_time": 0.4436664581298828
    },
    {
      "epoch": 7.857666015625e-05,
      "model_forward_time": 0.11606383323669434,
      "step": 12874
    },
    {
      "epoch": 7.857666015625e-05,
      "step": 12874,
      "training_step_time": 0.4812660217285156
    },
    {
      "epoch": 7.8582763671875e-05,
      "model_forward_time": 0.11470413208007812,
      "step": 12875
    },
    {
      "epoch": 7.8582763671875e-05,
      "step": 12875,
      "training_step_time": 0.5201778411865234
    },
    {
      "epoch": 7.85888671875e-05,
      "model_forward_time": 0.11430644989013672,
      "step": 12876
    },
    {
      "epoch": 7.85888671875e-05,
      "step": 12876,
      "training_step_time": 0.40288305282592773
    },
    {
      "epoch": 7.8594970703125e-05,
      "model_forward_time": 0.11473226547241211,
      "step": 12877
    },
    {
      "epoch": 7.8594970703125e-05,
      "step": 12877,
      "training_step_time": 0.4064300060272217
    },
    {
      "epoch": 7.860107421875e-05,
      "model_forward_time": 0.11435294151306152,
      "step": 12878
    },
    {
      "epoch": 7.860107421875e-05,
      "step": 12878,
      "training_step_time": 0.3946034908294678
    },
    {
      "epoch": 7.8607177734375e-05,
      "model_forward_time": 0.11505341529846191,
      "step": 12879
    },
    {
      "epoch": 7.8607177734375e-05,
      "step": 12879,
      "training_step_time": 0.38440918922424316
    },
    {
      "epoch": 7.861328125e-05,
      "grad_norm": 0.18201656639575958,
      "learning_rate": 9.276821300802534e-05,
      "loss": 0.0557,
      "step": 12880
    },
    {
      "epoch": 7.861328125e-05,
      "model_forward_time": 0.11481809616088867,
      "step": 12880
    },
    {
      "epoch": 7.861328125e-05,
      "step": 12880,
      "training_step_time": 0.46280908584594727
    },
    {
      "epoch": 7.8619384765625e-05,
      "model_forward_time": 0.11591720581054688,
      "step": 12881
    },
    {
      "epoch": 7.8619384765625e-05,
      "step": 12881,
      "training_step_time": 0.7838699817657471
    },
    {
      "epoch": 7.862548828125e-05,
      "model_forward_time": 0.11462736129760742,
      "step": 12882
    },
    {
      "epoch": 7.862548828125e-05,
      "step": 12882,
      "training_step_time": 0.3809478282928467
    },
    {
      "epoch": 7.8631591796875e-05,
      "model_forward_time": 0.11467313766479492,
      "step": 12883
    },
    {
      "epoch": 7.8631591796875e-05,
      "step": 12883,
      "training_step_time": 0.387967586517334
    },
    {
      "epoch": 7.86376953125e-05,
      "model_forward_time": 0.11408591270446777,
      "step": 12884
    },
    {
      "epoch": 7.86376953125e-05,
      "step": 12884,
      "training_step_time": 0.3807709217071533
    },
    {
      "epoch": 7.8643798828125e-05,
      "model_forward_time": 0.11434745788574219,
      "step": 12885
    },
    {
      "epoch": 7.8643798828125e-05,
      "step": 12885,
      "training_step_time": 0.36035704612731934
    },
    {
      "epoch": 7.864990234375e-05,
      "model_forward_time": 0.11411070823669434,
      "step": 12886
    },
    {
      "epoch": 7.864990234375e-05,
      "step": 12886,
      "training_step_time": 0.4207122325897217
    },
    {
      "epoch": 7.8656005859375e-05,
      "model_forward_time": 0.1153101921081543,
      "step": 12887
    },
    {
      "epoch": 7.8656005859375e-05,
      "step": 12887,
      "training_step_time": 0.7894501686096191
    },
    {
      "epoch": 7.8662109375e-05,
      "model_forward_time": 0.11406302452087402,
      "step": 12888
    },
    {
      "epoch": 7.8662109375e-05,
      "step": 12888,
      "training_step_time": 0.39467787742614746
    },
    {
      "epoch": 7.8668212890625e-05,
      "model_forward_time": 0.11369943618774414,
      "step": 12889
    },
    {
      "epoch": 7.8668212890625e-05,
      "step": 12889,
      "training_step_time": 0.4345855712890625
    },
    {
      "epoch": 7.867431640625e-05,
      "grad_norm": 0.13112682104110718,
      "learning_rate": 9.275393081240882e-05,
      "loss": 0.0571,
      "step": 12890
    },
    {
      "epoch": 7.867431640625e-05,
      "model_forward_time": 0.11444711685180664,
      "step": 12890
    },
    {
      "epoch": 7.867431640625e-05,
      "step": 12890,
      "training_step_time": 0.3833463191986084
    },
    {
      "epoch": 7.8680419921875e-05,
      "model_forward_time": 0.11413717269897461,
      "step": 12891
    },
    {
      "epoch": 7.8680419921875e-05,
      "step": 12891,
      "training_step_time": 0.3809196949005127
    },
    {
      "epoch": 7.86865234375e-05,
      "model_forward_time": 0.1143333911895752,
      "step": 12892
    },
    {
      "epoch": 7.86865234375e-05,
      "step": 12892,
      "training_step_time": 0.3843095302581787
    },
    {
      "epoch": 7.8692626953125e-05,
      "model_forward_time": 0.11511039733886719,
      "step": 12893
    },
    {
      "epoch": 7.8692626953125e-05,
      "step": 12893,
      "training_step_time": 0.804610013961792
    },
    {
      "epoch": 7.869873046875e-05,
      "model_forward_time": 0.11444377899169922,
      "step": 12894
    },
    {
      "epoch": 7.869873046875e-05,
      "step": 12894,
      "training_step_time": 0.4337034225463867
    },
    {
      "epoch": 7.8704833984375e-05,
      "model_forward_time": 0.11433029174804688,
      "step": 12895
    },
    {
      "epoch": 7.8704833984375e-05,
      "step": 12895,
      "training_step_time": 0.3901793956756592
    },
    {
      "epoch": 7.87109375e-05,
      "model_forward_time": 0.1144266128540039,
      "step": 12896
    },
    {
      "epoch": 7.87109375e-05,
      "step": 12896,
      "training_step_time": 0.3917715549468994
    },
    {
      "epoch": 7.8717041015625e-05,
      "model_forward_time": 0.11451411247253418,
      "step": 12897
    },
    {
      "epoch": 7.8717041015625e-05,
      "step": 12897,
      "training_step_time": 0.3863523006439209
    },
    {
      "epoch": 7.872314453125e-05,
      "model_forward_time": 0.11433696746826172,
      "step": 12898
    },
    {
      "epoch": 7.872314453125e-05,
      "step": 12898,
      "training_step_time": 0.40782928466796875
    },
    {
      "epoch": 7.8729248046875e-05,
      "model_forward_time": 0.11467790603637695,
      "step": 12899
    },
    {
      "epoch": 7.8729248046875e-05,
      "step": 12899,
      "training_step_time": 0.6709730625152588
    },
    {
      "epoch": 7.87353515625e-05,
      "grad_norm": 0.11208663135766983,
      "learning_rate": 9.273963562927695e-05,
      "loss": 0.0523,
      "step": 12900
    },
    {
      "epoch": 7.87353515625e-05,
      "model_forward_time": 0.11454510688781738,
      "step": 12900
    },
    {
      "epoch": 7.87353515625e-05,
      "step": 12900,
      "training_step_time": 0.4058699607849121
    },
    {
      "epoch": 7.8741455078125e-05,
      "model_forward_time": 0.11433053016662598,
      "step": 12901
    },
    {
      "epoch": 7.8741455078125e-05,
      "step": 12901,
      "training_step_time": 0.4418327808380127
    },
    {
      "epoch": 7.874755859375e-05,
      "model_forward_time": 0.11474418640136719,
      "step": 12902
    },
    {
      "epoch": 7.874755859375e-05,
      "step": 12902,
      "training_step_time": 0.4022700786590576
    },
    {
      "epoch": 7.8753662109375e-05,
      "model_forward_time": 0.11444950103759766,
      "step": 12903
    },
    {
      "epoch": 7.8753662109375e-05,
      "step": 12903,
      "training_step_time": 0.3934903144836426
    },
    {
      "epoch": 7.8759765625e-05,
      "model_forward_time": 0.1143193244934082,
      "step": 12904
    },
    {
      "epoch": 7.8759765625e-05,
      "step": 12904,
      "training_step_time": 0.39571499824523926
    },
    {
      "epoch": 7.8765869140625e-05,
      "model_forward_time": 0.11518406867980957,
      "step": 12905
    },
    {
      "epoch": 7.8765869140625e-05,
      "step": 12905,
      "training_step_time": 0.6583342552185059
    },
    {
      "epoch": 7.877197265625e-05,
      "model_forward_time": 0.11436247825622559,
      "step": 12906
    },
    {
      "epoch": 7.877197265625e-05,
      "step": 12906,
      "training_step_time": 0.4031500816345215
    },
    {
      "epoch": 7.8778076171875e-05,
      "model_forward_time": 0.11475634574890137,
      "step": 12907
    },
    {
      "epoch": 7.8778076171875e-05,
      "step": 12907,
      "training_step_time": 0.4663553237915039
    },
    {
      "epoch": 7.87841796875e-05,
      "model_forward_time": 0.11428427696228027,
      "step": 12908
    },
    {
      "epoch": 7.87841796875e-05,
      "step": 12908,
      "training_step_time": 0.42714428901672363
    },
    {
      "epoch": 7.8790283203125e-05,
      "model_forward_time": 0.11492776870727539,
      "step": 12909
    },
    {
      "epoch": 7.8790283203125e-05,
      "step": 12909,
      "training_step_time": 0.36191892623901367
    },
    {
      "epoch": 7.879638671875e-05,
      "grad_norm": 0.18077623844146729,
      "learning_rate": 9.272532746297227e-05,
      "loss": 0.0549,
      "step": 12910
    },
    {
      "epoch": 7.879638671875e-05,
      "model_forward_time": 0.11395668983459473,
      "step": 12910
    },
    {
      "epoch": 7.879638671875e-05,
      "step": 12910,
      "training_step_time": 0.3867015838623047
    },
    {
      "epoch": 7.8802490234375e-05,
      "model_forward_time": 0.11501479148864746,
      "step": 12911
    },
    {
      "epoch": 7.8802490234375e-05,
      "step": 12911,
      "training_step_time": 0.5170996189117432
    },
    {
      "epoch": 7.880859375e-05,
      "model_forward_time": 0.11453104019165039,
      "step": 12912
    },
    {
      "epoch": 7.880859375e-05,
      "step": 12912,
      "training_step_time": 0.3865196704864502
    },
    {
      "epoch": 7.8814697265625e-05,
      "model_forward_time": 0.11483621597290039,
      "step": 12913
    },
    {
      "epoch": 7.8814697265625e-05,
      "step": 12913,
      "training_step_time": 0.4430196285247803
    },
    {
      "epoch": 7.882080078125e-05,
      "model_forward_time": 0.11476564407348633,
      "step": 12914
    },
    {
      "epoch": 7.882080078125e-05,
      "step": 12914,
      "training_step_time": 0.45878028869628906
    },
    {
      "epoch": 7.8826904296875e-05,
      "model_forward_time": 0.11474442481994629,
      "step": 12915
    },
    {
      "epoch": 7.8826904296875e-05,
      "step": 12915,
      "training_step_time": 0.46091413497924805
    },
    {
      "epoch": 7.88330078125e-05,
      "model_forward_time": 0.11503934860229492,
      "step": 12916
    },
    {
      "epoch": 7.88330078125e-05,
      "step": 12916,
      "training_step_time": 0.4003632068634033
    },
    {
      "epoch": 7.8839111328125e-05,
      "model_forward_time": 0.11412620544433594,
      "step": 12917
    },
    {
      "epoch": 7.8839111328125e-05,
      "step": 12917,
      "training_step_time": 0.6795482635498047
    },
    {
      "epoch": 7.884521484375e-05,
      "model_forward_time": 0.11403656005859375,
      "step": 12918
    },
    {
      "epoch": 7.884521484375e-05,
      "step": 12918,
      "training_step_time": 0.38451719284057617
    },
    {
      "epoch": 7.8851318359375e-05,
      "model_forward_time": 0.11474943161010742,
      "step": 12919
    },
    {
      "epoch": 7.8851318359375e-05,
      "step": 12919,
      "training_step_time": 0.38904595375061035
    },
    {
      "epoch": 7.8857421875e-05,
      "grad_norm": 0.2412412166595459,
      "learning_rate": 9.27110063178412e-05,
      "loss": 0.0629,
      "step": 12920
    },
    {
      "epoch": 7.8857421875e-05,
      "model_forward_time": 0.11552000045776367,
      "step": 12920
    },
    {
      "epoch": 7.8857421875e-05,
      "step": 12920,
      "training_step_time": 0.43782520294189453
    },
    {
      "epoch": 7.8863525390625e-05,
      "model_forward_time": 0.11463713645935059,
      "step": 12921
    },
    {
      "epoch": 7.8863525390625e-05,
      "step": 12921,
      "training_step_time": 0.385939359664917
    },
    {
      "epoch": 7.886962890625e-05,
      "model_forward_time": 0.11467409133911133,
      "step": 12922
    },
    {
      "epoch": 7.886962890625e-05,
      "step": 12922,
      "training_step_time": 0.4248378276824951
    },
    {
      "epoch": 7.8875732421875e-05,
      "model_forward_time": 0.11441755294799805,
      "step": 12923
    },
    {
      "epoch": 7.8875732421875e-05,
      "step": 12923,
      "training_step_time": 0.7975740432739258
    },
    {
      "epoch": 7.88818359375e-05,
      "model_forward_time": 0.11451554298400879,
      "step": 12924
    },
    {
      "epoch": 7.88818359375e-05,
      "step": 12924,
      "training_step_time": 0.3831787109375
    },
    {
      "epoch": 7.8887939453125e-05,
      "model_forward_time": 0.11458110809326172,
      "step": 12925
    },
    {
      "epoch": 7.8887939453125e-05,
      "step": 12925,
      "training_step_time": 0.3857259750366211
    },
    {
      "epoch": 7.889404296875e-05,
      "model_forward_time": 0.1139822006225586,
      "step": 12926
    },
    {
      "epoch": 7.889404296875e-05,
      "step": 12926,
      "training_step_time": 0.4607574939727783
    },
    {
      "epoch": 7.8900146484375e-05,
      "model_forward_time": 0.11455416679382324,
      "step": 12927
    },
    {
      "epoch": 7.8900146484375e-05,
      "step": 12927,
      "training_step_time": 0.4992804527282715
    },
    {
      "epoch": 7.890625e-05,
      "model_forward_time": 0.11455583572387695,
      "step": 12928
    },
    {
      "epoch": 7.890625e-05,
      "step": 12928,
      "training_step_time": 0.41536927223205566
    },
    {
      "epoch": 7.8912353515625e-05,
      "model_forward_time": 0.1144876480102539,
      "step": 12929
    },
    {
      "epoch": 7.8912353515625e-05,
      "step": 12929,
      "training_step_time": 0.6412825584411621
    },
    {
      "epoch": 7.891845703125e-05,
      "grad_norm": 0.1851770430803299,
      "learning_rate": 9.269667219823412e-05,
      "loss": 0.0557,
      "step": 12930
    },
    {
      "epoch": 7.891845703125e-05,
      "model_forward_time": 0.11438608169555664,
      "step": 12930
    },
    {
      "epoch": 7.891845703125e-05,
      "step": 12930,
      "training_step_time": 0.40003204345703125
    },
    {
      "epoch": 7.8924560546875e-05,
      "model_forward_time": 0.11443805694580078,
      "step": 12931
    },
    {
      "epoch": 7.8924560546875e-05,
      "step": 12931,
      "training_step_time": 0.37960338592529297
    },
    {
      "epoch": 7.89306640625e-05,
      "model_forward_time": 0.11442255973815918,
      "step": 12932
    },
    {
      "epoch": 7.89306640625e-05,
      "step": 12932,
      "training_step_time": 0.3800089359283447
    },
    {
      "epoch": 7.8936767578125e-05,
      "model_forward_time": 0.1146249771118164,
      "step": 12933
    },
    {
      "epoch": 7.8936767578125e-05,
      "step": 12933,
      "training_step_time": 0.3917527198791504
    },
    {
      "epoch": 7.894287109375e-05,
      "model_forward_time": 0.1148688793182373,
      "step": 12934
    },
    {
      "epoch": 7.894287109375e-05,
      "step": 12934,
      "training_step_time": 0.450228214263916
    },
    {
      "epoch": 7.8948974609375e-05,
      "model_forward_time": 0.11484694480895996,
      "step": 12935
    },
    {
      "epoch": 7.8948974609375e-05,
      "step": 12935,
      "training_step_time": 0.7613561153411865
    },
    {
      "epoch": 7.8955078125e-05,
      "model_forward_time": 0.11414051055908203,
      "step": 12936
    },
    {
      "epoch": 7.8955078125e-05,
      "step": 12936,
      "training_step_time": 0.38779544830322266
    },
    {
      "epoch": 7.8961181640625e-05,
      "model_forward_time": 0.11429405212402344,
      "step": 12937
    },
    {
      "epoch": 7.8961181640625e-05,
      "step": 12937,
      "training_step_time": 0.391451358795166
    },
    {
      "epoch": 7.896728515625e-05,
      "model_forward_time": 0.11465930938720703,
      "step": 12938
    },
    {
      "epoch": 7.896728515625e-05,
      "step": 12938,
      "training_step_time": 0.38732481002807617
    },
    {
      "epoch": 7.8973388671875e-05,
      "model_forward_time": 0.11398553848266602,
      "step": 12939
    },
    {
      "epoch": 7.8973388671875e-05,
      "step": 12939,
      "training_step_time": 0.4300878047943115
    },
    {
      "epoch": 7.89794921875e-05,
      "grad_norm": 0.1094302386045456,
      "learning_rate": 9.268232510850539e-05,
      "loss": 0.0506,
      "step": 12940
    },
    {
      "epoch": 7.89794921875e-05,
      "model_forward_time": 0.11444091796875,
      "step": 12940
    },
    {
      "epoch": 7.89794921875e-05,
      "step": 12940,
      "training_step_time": 0.4870734214782715
    },
    {
      "epoch": 7.8985595703125e-05,
      "model_forward_time": 0.11527585983276367,
      "step": 12941
    },
    {
      "epoch": 7.8985595703125e-05,
      "step": 12941,
      "training_step_time": 0.6066646575927734
    },
    {
      "epoch": 7.899169921875e-05,
      "model_forward_time": 0.11432218551635742,
      "step": 12942
    },
    {
      "epoch": 7.899169921875e-05,
      "step": 12942,
      "training_step_time": 0.3889169692993164
    },
    {
      "epoch": 7.8997802734375e-05,
      "model_forward_time": 0.11476325988769531,
      "step": 12943
    },
    {
      "epoch": 7.8997802734375e-05,
      "step": 12943,
      "training_step_time": 0.3887040615081787
    },
    {
      "epoch": 7.900390625e-05,
      "model_forward_time": 0.11443281173706055,
      "step": 12944
    },
    {
      "epoch": 7.900390625e-05,
      "step": 12944,
      "training_step_time": 0.38283753395080566
    },
    {
      "epoch": 7.9010009765625e-05,
      "model_forward_time": 0.11467885971069336,
      "step": 12945
    },
    {
      "epoch": 7.9010009765625e-05,
      "step": 12945,
      "training_step_time": 0.4035947322845459
    },
    {
      "epoch": 7.901611328125e-05,
      "model_forward_time": 0.11500883102416992,
      "step": 12946
    },
    {
      "epoch": 7.901611328125e-05,
      "step": 12946,
      "training_step_time": 0.3836793899536133
    },
    {
      "epoch": 7.9022216796875e-05,
      "model_forward_time": 0.11466336250305176,
      "step": 12947
    },
    {
      "epoch": 7.9022216796875e-05,
      "step": 12947,
      "training_step_time": 0.834282636642456
    },
    {
      "epoch": 7.90283203125e-05,
      "model_forward_time": 0.11419868469238281,
      "step": 12948
    },
    {
      "epoch": 7.90283203125e-05,
      "step": 12948,
      "training_step_time": 0.4510641098022461
    },
    {
      "epoch": 7.9034423828125e-05,
      "model_forward_time": 0.11417770385742188,
      "step": 12949
    },
    {
      "epoch": 7.9034423828125e-05,
      "step": 12949,
      "training_step_time": 0.3854789733886719
    },
    {
      "epoch": 7.904052734375e-05,
      "grad_norm": 0.15408022701740265,
      "learning_rate": 9.266796505301322e-05,
      "loss": 0.0574,
      "step": 12950
    },
    {
      "epoch": 7.904052734375e-05,
      "model_forward_time": 0.11445808410644531,
      "step": 12950
    },
    {
      "epoch": 7.904052734375e-05,
      "step": 12950,
      "training_step_time": 0.39871788024902344
    },
    {
      "epoch": 7.9046630859375e-05,
      "model_forward_time": 0.11397099494934082,
      "step": 12951
    },
    {
      "epoch": 7.9046630859375e-05,
      "step": 12951,
      "training_step_time": 0.38477110862731934
    },
    {
      "epoch": 7.9052734375e-05,
      "model_forward_time": 0.11426997184753418,
      "step": 12952
    },
    {
      "epoch": 7.9052734375e-05,
      "step": 12952,
      "training_step_time": 0.40901780128479004
    },
    {
      "epoch": 7.9058837890625e-05,
      "model_forward_time": 0.11608314514160156,
      "step": 12953
    },
    {
      "epoch": 7.9058837890625e-05,
      "step": 12953,
      "training_step_time": 0.6787254810333252
    },
    {
      "epoch": 7.906494140625e-05,
      "model_forward_time": 0.11447930335998535,
      "step": 12954
    },
    {
      "epoch": 7.906494140625e-05,
      "step": 12954,
      "training_step_time": 0.484738826751709
    },
    {
      "epoch": 7.9071044921875e-05,
      "model_forward_time": 0.1150979995727539,
      "step": 12955
    },
    {
      "epoch": 7.9071044921875e-05,
      "step": 12955,
      "training_step_time": 0.4186875820159912
    },
    {
      "epoch": 7.90771484375e-05,
      "model_forward_time": 0.11446809768676758,
      "step": 12956
    },
    {
      "epoch": 7.90771484375e-05,
      "step": 12956,
      "training_step_time": 0.3856511116027832
    },
    {
      "epoch": 7.9083251953125e-05,
      "model_forward_time": 0.1147756576538086,
      "step": 12957
    },
    {
      "epoch": 7.9083251953125e-05,
      "step": 12957,
      "training_step_time": 0.390338659286499
    },
    {
      "epoch": 7.908935546875e-05,
      "model_forward_time": 0.11481356620788574,
      "step": 12958
    },
    {
      "epoch": 7.908935546875e-05,
      "step": 12958,
      "training_step_time": 0.37438464164733887
    },
    {
      "epoch": 7.9095458984375e-05,
      "model_forward_time": 0.11499691009521484,
      "step": 12959
    },
    {
      "epoch": 7.9095458984375e-05,
      "step": 12959,
      "training_step_time": 0.7419378757476807
    },
    {
      "epoch": 7.91015625e-05,
      "grad_norm": 0.18569806218147278,
      "learning_rate": 9.265359203611987e-05,
      "loss": 0.0548,
      "step": 12960
    },
    {
      "epoch": 7.91015625e-05,
      "model_forward_time": 0.11454367637634277,
      "step": 12960
    },
    {
      "epoch": 7.91015625e-05,
      "step": 12960,
      "training_step_time": 0.3939793109893799
    },
    {
      "epoch": 7.9107666015625e-05,
      "model_forward_time": 0.1145486831665039,
      "step": 12961
    },
    {
      "epoch": 7.9107666015625e-05,
      "step": 12961,
      "training_step_time": 0.4256012439727783
    },
    {
      "epoch": 7.911376953125e-05,
      "model_forward_time": 0.11453795433044434,
      "step": 12962
    },
    {
      "epoch": 7.911376953125e-05,
      "step": 12962,
      "training_step_time": 0.40773868560791016
    },
    {
      "epoch": 7.9119873046875e-05,
      "model_forward_time": 0.11473226547241211,
      "step": 12963
    },
    {
      "epoch": 7.9119873046875e-05,
      "step": 12963,
      "training_step_time": 0.3905026912689209
    },
    {
      "epoch": 7.91259765625e-05,
      "model_forward_time": 0.11424708366394043,
      "step": 12964
    },
    {
      "epoch": 7.91259765625e-05,
      "step": 12964,
      "training_step_time": 0.39931488037109375
    },
    {
      "epoch": 7.9132080078125e-05,
      "model_forward_time": 0.11538171768188477,
      "step": 12965
    },
    {
      "epoch": 7.9132080078125e-05,
      "step": 12965,
      "training_step_time": 0.6225378513336182
    },
    {
      "epoch": 7.913818359375e-05,
      "model_forward_time": 0.1154627799987793,
      "step": 12966
    },
    {
      "epoch": 7.913818359375e-05,
      "step": 12966,
      "training_step_time": 0.4919157028198242
    },
    {
      "epoch": 7.9144287109375e-05,
      "model_forward_time": 0.11437177658081055,
      "step": 12967
    },
    {
      "epoch": 7.9144287109375e-05,
      "step": 12967,
      "training_step_time": 0.46126866340637207
    },
    {
      "epoch": 7.9150390625e-05,
      "model_forward_time": 0.11467170715332031,
      "step": 12968
    },
    {
      "epoch": 7.9150390625e-05,
      "step": 12968,
      "training_step_time": 0.47988295555114746
    },
    {
      "epoch": 7.9156494140625e-05,
      "model_forward_time": 0.11409187316894531,
      "step": 12969
    },
    {
      "epoch": 7.9156494140625e-05,
      "step": 12969,
      "training_step_time": 0.39214181900024414
    },
    {
      "epoch": 7.916259765625e-05,
      "grad_norm": 0.25258880853652954,
      "learning_rate": 9.263920606219147e-05,
      "loss": 0.062,
      "step": 12970
    },
    {
      "epoch": 7.916259765625e-05,
      "model_forward_time": 0.11417603492736816,
      "step": 12970
    },
    {
      "epoch": 7.916259765625e-05,
      "step": 12970,
      "training_step_time": 0.39333391189575195
    },
    {
      "epoch": 7.9168701171875e-05,
      "model_forward_time": 0.11480522155761719,
      "step": 12971
    },
    {
      "epoch": 7.9168701171875e-05,
      "step": 12971,
      "training_step_time": 0.404494047164917
    },
    {
      "epoch": 7.91748046875e-05,
      "model_forward_time": 0.11495399475097656,
      "step": 12972
    },
    {
      "epoch": 7.91748046875e-05,
      "step": 12972,
      "training_step_time": 0.398775577545166
    },
    {
      "epoch": 7.9180908203125e-05,
      "model_forward_time": 0.1150662899017334,
      "step": 12973
    },
    {
      "epoch": 7.9180908203125e-05,
      "step": 12973,
      "training_step_time": 0.3908524513244629
    },
    {
      "epoch": 7.918701171875e-05,
      "model_forward_time": 0.11505270004272461,
      "step": 12974
    },
    {
      "epoch": 7.918701171875e-05,
      "step": 12974,
      "training_step_time": 0.4836580753326416
    },
    {
      "epoch": 7.9193115234375e-05,
      "model_forward_time": 0.1163640022277832,
      "step": 12975
    },
    {
      "epoch": 7.9193115234375e-05,
      "step": 12975,
      "training_step_time": 0.43768310546875
    },
    {
      "epoch": 7.919921875e-05,
      "model_forward_time": 0.11499166488647461,
      "step": 12976
    },
    {
      "epoch": 7.919921875e-05,
      "step": 12976,
      "training_step_time": 0.39443182945251465
    },
    {
      "epoch": 7.9205322265625e-05,
      "model_forward_time": 0.11491584777832031,
      "step": 12977
    },
    {
      "epoch": 7.9205322265625e-05,
      "step": 12977,
      "training_step_time": 0.8973934650421143
    },
    {
      "epoch": 7.921142578125e-05,
      "model_forward_time": 0.11420321464538574,
      "step": 12978
    },
    {
      "epoch": 7.921142578125e-05,
      "step": 12978,
      "training_step_time": 0.39031052589416504
    },
    {
      "epoch": 7.9217529296875e-05,
      "model_forward_time": 0.11525964736938477,
      "step": 12979
    },
    {
      "epoch": 7.9217529296875e-05,
      "step": 12979,
      "training_step_time": 0.4325258731842041
    },
    {
      "epoch": 7.92236328125e-05,
      "grad_norm": 0.12978006899356842,
      "learning_rate": 9.262480713559808e-05,
      "loss": 0.0557,
      "step": 12980
    },
    {
      "epoch": 7.92236328125e-05,
      "model_forward_time": 0.11478519439697266,
      "step": 12980
    },
    {
      "epoch": 7.92236328125e-05,
      "step": 12980,
      "training_step_time": 0.37256765365600586
    },
    {
      "epoch": 7.9229736328125e-05,
      "model_forward_time": 0.11490011215209961,
      "step": 12981
    },
    {
      "epoch": 7.9229736328125e-05,
      "step": 12981,
      "training_step_time": 0.5101253986358643
    },
    {
      "epoch": 7.923583984375e-05,
      "model_forward_time": 0.11491203308105469,
      "step": 12982
    },
    {
      "epoch": 7.923583984375e-05,
      "step": 12982,
      "training_step_time": 0.41533422470092773
    },
    {
      "epoch": 7.9241943359375e-05,
      "model_forward_time": 0.11515283584594727,
      "step": 12983
    },
    {
      "epoch": 7.9241943359375e-05,
      "step": 12983,
      "training_step_time": 0.5878124237060547
    },
    {
      "epoch": 7.9248046875e-05,
      "model_forward_time": 0.1145634651184082,
      "step": 12984
    },
    {
      "epoch": 7.9248046875e-05,
      "step": 12984,
      "training_step_time": 0.398190975189209
    },
    {
      "epoch": 7.9254150390625e-05,
      "model_forward_time": 0.11393284797668457,
      "step": 12985
    },
    {
      "epoch": 7.9254150390625e-05,
      "step": 12985,
      "training_step_time": 0.4011256694793701
    },
    {
      "epoch": 7.926025390625e-05,
      "model_forward_time": 0.11420726776123047,
      "step": 12986
    },
    {
      "epoch": 7.926025390625e-05,
      "step": 12986,
      "training_step_time": 0.38039159774780273
    },
    {
      "epoch": 7.9266357421875e-05,
      "model_forward_time": 0.11554312705993652,
      "step": 12987
    },
    {
      "epoch": 7.9266357421875e-05,
      "step": 12987,
      "training_step_time": 0.4088308811187744
    },
    {
      "epoch": 7.92724609375e-05,
      "model_forward_time": 0.11575603485107422,
      "step": 12988
    },
    {
      "epoch": 7.92724609375e-05,
      "step": 12988,
      "training_step_time": 0.3881981372833252
    },
    {
      "epoch": 7.9278564453125e-05,
      "model_forward_time": 0.11529994010925293,
      "step": 12989
    },
    {
      "epoch": 7.9278564453125e-05,
      "step": 12989,
      "training_step_time": 0.7622487545013428
    },
    {
      "epoch": 7.928466796875e-05,
      "grad_norm": 0.1507616937160492,
      "learning_rate": 9.261039526071374e-05,
      "loss": 0.0555,
      "step": 12990
    },
    {
      "epoch": 7.928466796875e-05,
      "model_forward_time": 0.11451840400695801,
      "step": 12990
    },
    {
      "epoch": 7.928466796875e-05,
      "step": 12990,
      "training_step_time": 0.41092872619628906
    },
    {
      "epoch": 7.9290771484375e-05,
      "model_forward_time": 0.11435246467590332,
      "step": 12991
    },
    {
      "epoch": 7.9290771484375e-05,
      "step": 12991,
      "training_step_time": 0.46253299713134766
    },
    {
      "epoch": 7.9296875e-05,
      "model_forward_time": 0.1144711971282959,
      "step": 12992
    },
    {
      "epoch": 7.9296875e-05,
      "step": 12992,
      "training_step_time": 0.3841104507446289
    },
    {
      "epoch": 7.9302978515625e-05,
      "model_forward_time": 0.11429142951965332,
      "step": 12993
    },
    {
      "epoch": 7.9302978515625e-05,
      "step": 12993,
      "training_step_time": 0.39553284645080566
    },
    {
      "epoch": 7.930908203125e-05,
      "model_forward_time": 0.11382269859313965,
      "step": 12994
    },
    {
      "epoch": 7.930908203125e-05,
      "step": 12994,
      "training_step_time": 0.4370896816253662
    },
    {
      "epoch": 7.9315185546875e-05,
      "model_forward_time": 0.11451435089111328,
      "step": 12995
    },
    {
      "epoch": 7.9315185546875e-05,
      "step": 12995,
      "training_step_time": 0.7708075046539307
    },
    {
      "epoch": 7.93212890625e-05,
      "model_forward_time": 0.11440467834472656,
      "step": 12996
    },
    {
      "epoch": 7.93212890625e-05,
      "step": 12996,
      "training_step_time": 0.3934943675994873
    },
    {
      "epoch": 7.9327392578125e-05,
      "model_forward_time": 0.11398077011108398,
      "step": 12997
    },
    {
      "epoch": 7.9327392578125e-05,
      "step": 12997,
      "training_step_time": 0.3912384510040283
    },
    {
      "epoch": 7.933349609375e-05,
      "model_forward_time": 0.11430978775024414,
      "step": 12998
    },
    {
      "epoch": 7.933349609375e-05,
      "step": 12998,
      "training_step_time": 0.3968997001647949
    },
    {
      "epoch": 7.9339599609375e-05,
      "model_forward_time": 0.11417531967163086,
      "step": 12999
    },
    {
      "epoch": 7.9339599609375e-05,
      "step": 12999,
      "training_step_time": 0.45731639862060547
    },
    {
      "epoch": 7.9345703125e-05,
      "grad_norm": 0.17368747293949127,
      "learning_rate": 9.259597044191636e-05,
      "loss": 0.0582,
      "step": 13000
    },
    {
      "epoch": 7.9345703125e-05,
      "model_forward_time": 0.11253094673156738,
      "step": 13000
    },
    {
      "epoch": 7.9345703125e-05,
      "step": 13000,
      "training_step_time": 0.35210371017456055
    },
    {
      "epoch": 7.9351806640625e-05,
      "model_forward_time": 0.11245608329772949,
      "step": 13001
    },
    {
      "epoch": 7.9351806640625e-05,
      "step": 13001,
      "training_step_time": 0.3774759769439697
    },
    {
      "epoch": 7.935791015625e-05,
      "model_forward_time": 0.11273407936096191,
      "step": 13002
    },
    {
      "epoch": 7.935791015625e-05,
      "step": 13002,
      "training_step_time": 0.3780043125152588
    },
    {
      "epoch": 7.9364013671875e-05,
      "model_forward_time": 0.11363458633422852,
      "step": 13003
    },
    {
      "epoch": 7.9364013671875e-05,
      "step": 13003,
      "training_step_time": 0.4478268623352051
    },
    {
      "epoch": 7.93701171875e-05,
      "model_forward_time": 0.11340999603271484,
      "step": 13004
    },
    {
      "epoch": 7.93701171875e-05,
      "step": 13004,
      "training_step_time": 0.4305379390716553
    },
    {
      "epoch": 7.9376220703125e-05,
      "model_forward_time": 0.11394667625427246,
      "step": 13005
    },
    {
      "epoch": 7.9376220703125e-05,
      "step": 13005,
      "training_step_time": 0.4639933109283447
    },
    {
      "epoch": 7.938232421875e-05,
      "model_forward_time": 0.11444878578186035,
      "step": 13006
    },
    {
      "epoch": 7.938232421875e-05,
      "step": 13006,
      "training_step_time": 0.3848109245300293
    },
    {
      "epoch": 7.9388427734375e-05,
      "model_forward_time": 0.11461997032165527,
      "step": 13007
    },
    {
      "epoch": 7.9388427734375e-05,
      "step": 13007,
      "training_step_time": 0.3873565196990967
    },
    {
      "epoch": 7.939453125e-05,
      "model_forward_time": 0.1142282485961914,
      "step": 13008
    },
    {
      "epoch": 7.939453125e-05,
      "step": 13008,
      "training_step_time": 0.41814303398132324
    },
    {
      "epoch": 7.9400634765625e-05,
      "model_forward_time": 0.11499857902526855,
      "step": 13009
    },
    {
      "epoch": 7.9400634765625e-05,
      "step": 13009,
      "training_step_time": 0.39257168769836426
    },
    {
      "epoch": 7.940673828125e-05,
      "grad_norm": 0.27193015813827515,
      "learning_rate": 9.258153268358785e-05,
      "loss": 0.0601,
      "step": 13010
    },
    {
      "epoch": 7.940673828125e-05,
      "model_forward_time": 0.11499929428100586,
      "step": 13010
    },
    {
      "epoch": 7.940673828125e-05,
      "step": 13010,
      "training_step_time": 0.4572570323944092
    },
    {
      "epoch": 7.9412841796875e-05,
      "model_forward_time": 0.11422395706176758,
      "step": 13011
    },
    {
      "epoch": 7.9412841796875e-05,
      "step": 13011,
      "training_step_time": 0.4552619457244873
    },
    {
      "epoch": 7.94189453125e-05,
      "model_forward_time": 0.11477327346801758,
      "step": 13012
    },
    {
      "epoch": 7.94189453125e-05,
      "step": 13012,
      "training_step_time": 0.3986775875091553
    },
    {
      "epoch": 7.9425048828125e-05,
      "model_forward_time": 0.11528205871582031,
      "step": 13013
    },
    {
      "epoch": 7.9425048828125e-05,
      "step": 13013,
      "training_step_time": 0.3961296081542969
    },
    {
      "epoch": 7.943115234375e-05,
      "model_forward_time": 0.11518621444702148,
      "step": 13014
    },
    {
      "epoch": 7.943115234375e-05,
      "step": 13014,
      "training_step_time": 0.38977861404418945
    },
    {
      "epoch": 7.9437255859375e-05,
      "model_forward_time": 0.11515927314758301,
      "step": 13015
    },
    {
      "epoch": 7.9437255859375e-05,
      "step": 13015,
      "training_step_time": 0.3938889503479004
    },
    {
      "epoch": 7.9443359375e-05,
      "model_forward_time": 0.11507964134216309,
      "step": 13016
    },
    {
      "epoch": 7.9443359375e-05,
      "step": 13016,
      "training_step_time": 0.39566969871520996
    },
    {
      "epoch": 7.9449462890625e-05,
      "model_forward_time": 0.11542510986328125,
      "step": 13017
    },
    {
      "epoch": 7.9449462890625e-05,
      "step": 13017,
      "training_step_time": 0.39830946922302246
    },
    {
      "epoch": 7.945556640625e-05,
      "model_forward_time": 0.11490488052368164,
      "step": 13018
    },
    {
      "epoch": 7.945556640625e-05,
      "step": 13018,
      "training_step_time": 0.46602845191955566
    },
    {
      "epoch": 7.9461669921875e-05,
      "model_forward_time": 0.11478662490844727,
      "step": 13019
    },
    {
      "epoch": 7.9461669921875e-05,
      "step": 13019,
      "training_step_time": 0.44083356857299805
    },
    {
      "epoch": 7.94677734375e-05,
      "grad_norm": 0.20767058432102203,
      "learning_rate": 9.256708199011401e-05,
      "loss": 0.0563,
      "step": 13020
    },
    {
      "epoch": 7.94677734375e-05,
      "model_forward_time": 0.11499261856079102,
      "step": 13020
    },
    {
      "epoch": 7.94677734375e-05,
      "step": 13020,
      "training_step_time": 0.418410062789917
    },
    {
      "epoch": 7.9473876953125e-05,
      "model_forward_time": 0.1149590015411377,
      "step": 13021
    },
    {
      "epoch": 7.9473876953125e-05,
      "step": 13021,
      "training_step_time": 0.4106161594390869
    },
    {
      "epoch": 7.947998046875e-05,
      "model_forward_time": 0.11481666564941406,
      "step": 13022
    },
    {
      "epoch": 7.947998046875e-05,
      "step": 13022,
      "training_step_time": 0.39710474014282227
    },
    {
      "epoch": 7.9486083984375e-05,
      "model_forward_time": 0.11479949951171875,
      "step": 13023
    },
    {
      "epoch": 7.9486083984375e-05,
      "step": 13023,
      "training_step_time": 0.39568495750427246
    },
    {
      "epoch": 7.94921875e-05,
      "model_forward_time": 0.1147768497467041,
      "step": 13024
    },
    {
      "epoch": 7.94921875e-05,
      "step": 13024,
      "training_step_time": 0.41022610664367676
    },
    {
      "epoch": 7.9498291015625e-05,
      "model_forward_time": 0.11490392684936523,
      "step": 13025
    },
    {
      "epoch": 7.9498291015625e-05,
      "step": 13025,
      "training_step_time": 0.4746055603027344
    },
    {
      "epoch": 7.950439453125e-05,
      "model_forward_time": 0.11528801918029785,
      "step": 13026
    },
    {
      "epoch": 7.950439453125e-05,
      "step": 13026,
      "training_step_time": 0.4629173278808594
    },
    {
      "epoch": 7.9510498046875e-05,
      "model_forward_time": 0.11682248115539551,
      "step": 13027
    },
    {
      "epoch": 7.9510498046875e-05,
      "step": 13027,
      "training_step_time": 0.49252891540527344
    },
    {
      "epoch": 7.95166015625e-05,
      "model_forward_time": 0.11501860618591309,
      "step": 13028
    },
    {
      "epoch": 7.95166015625e-05,
      "step": 13028,
      "training_step_time": 0.388355016708374
    },
    {
      "epoch": 7.9522705078125e-05,
      "model_forward_time": 0.11454534530639648,
      "step": 13029
    },
    {
      "epoch": 7.9522705078125e-05,
      "step": 13029,
      "training_step_time": 0.3926389217376709
    },
    {
      "epoch": 7.952880859375e-05,
      "grad_norm": 0.205380380153656,
      "learning_rate": 9.255261836588458e-05,
      "loss": 0.0576,
      "step": 13030
    },
    {
      "epoch": 7.952880859375e-05,
      "model_forward_time": 0.11520600318908691,
      "step": 13030
    },
    {
      "epoch": 7.952880859375e-05,
      "step": 13030,
      "training_step_time": 0.3916285037994385
    },
    {
      "epoch": 7.9534912109375e-05,
      "model_forward_time": 0.11466026306152344,
      "step": 13031
    },
    {
      "epoch": 7.9534912109375e-05,
      "step": 13031,
      "training_step_time": 0.39525365829467773
    },
    {
      "epoch": 7.9541015625e-05,
      "model_forward_time": 0.11545991897583008,
      "step": 13032
    },
    {
      "epoch": 7.9541015625e-05,
      "step": 13032,
      "training_step_time": 0.5273182392120361
    },
    {
      "epoch": 7.9547119140625e-05,
      "model_forward_time": 0.11508989334106445,
      "step": 13033
    },
    {
      "epoch": 7.9547119140625e-05,
      "step": 13033,
      "training_step_time": 0.42846107482910156
    },
    {
      "epoch": 7.955322265625e-05,
      "model_forward_time": 0.11522793769836426,
      "step": 13034
    },
    {
      "epoch": 7.955322265625e-05,
      "step": 13034,
      "training_step_time": 0.4093754291534424
    },
    {
      "epoch": 7.9559326171875e-05,
      "model_forward_time": 0.11500787734985352,
      "step": 13035
    },
    {
      "epoch": 7.9559326171875e-05,
      "step": 13035,
      "training_step_time": 0.3923797607421875
    },
    {
      "epoch": 7.95654296875e-05,
      "model_forward_time": 0.11486339569091797,
      "step": 13036
    },
    {
      "epoch": 7.95654296875e-05,
      "step": 13036,
      "training_step_time": 0.3941464424133301
    },
    {
      "epoch": 7.9571533203125e-05,
      "model_forward_time": 0.11536026000976562,
      "step": 13037
    },
    {
      "epoch": 7.9571533203125e-05,
      "step": 13037,
      "training_step_time": 0.400820255279541
    },
    {
      "epoch": 7.957763671875e-05,
      "model_forward_time": 0.11474204063415527,
      "step": 13038
    },
    {
      "epoch": 7.957763671875e-05,
      "step": 13038,
      "training_step_time": 0.4577059745788574
    },
    {
      "epoch": 7.9583740234375e-05,
      "model_forward_time": 0.11466336250305176,
      "step": 13039
    },
    {
      "epoch": 7.9583740234375e-05,
      "step": 13039,
      "training_step_time": 0.3667147159576416
    },
    {
      "epoch": 7.958984375e-05,
      "grad_norm": 0.21217595040798187,
      "learning_rate": 9.253814181529323e-05,
      "loss": 0.0543,
      "step": 13040
    },
    {
      "epoch": 7.958984375e-05,
      "model_forward_time": 0.1141808032989502,
      "step": 13040
    },
    {
      "epoch": 7.958984375e-05,
      "step": 13040,
      "training_step_time": 0.41249918937683105
    },
    {
      "epoch": 7.9595947265625e-05,
      "model_forward_time": 0.11670207977294922,
      "step": 13041
    },
    {
      "epoch": 7.9595947265625e-05,
      "step": 13041,
      "training_step_time": 0.4587669372558594
    },
    {
      "epoch": 7.960205078125e-05,
      "model_forward_time": 0.1150045394897461,
      "step": 13042
    },
    {
      "epoch": 7.960205078125e-05,
      "step": 13042,
      "training_step_time": 0.3829991817474365
    },
    {
      "epoch": 7.9608154296875e-05,
      "model_forward_time": 0.11571335792541504,
      "step": 13043
    },
    {
      "epoch": 7.9608154296875e-05,
      "step": 13043,
      "training_step_time": 0.39650487899780273
    },
    {
      "epoch": 7.96142578125e-05,
      "model_forward_time": 0.11558222770690918,
      "step": 13044
    },
    {
      "epoch": 7.96142578125e-05,
      "step": 13044,
      "training_step_time": 0.384765625
    },
    {
      "epoch": 7.9620361328125e-05,
      "model_forward_time": 0.11529374122619629,
      "step": 13045
    },
    {
      "epoch": 7.9620361328125e-05,
      "step": 13045,
      "training_step_time": 0.4278898239135742
    },
    {
      "epoch": 7.962646484375e-05,
      "model_forward_time": 0.11457014083862305,
      "step": 13046
    },
    {
      "epoch": 7.962646484375e-05,
      "step": 13046,
      "training_step_time": 0.4576992988586426
    },
    {
      "epoch": 7.9632568359375e-05,
      "model_forward_time": 0.11559438705444336,
      "step": 13047
    },
    {
      "epoch": 7.9632568359375e-05,
      "step": 13047,
      "training_step_time": 0.48229098320007324
    },
    {
      "epoch": 7.9638671875e-05,
      "model_forward_time": 0.11513400077819824,
      "step": 13048
    },
    {
      "epoch": 7.9638671875e-05,
      "step": 13048,
      "training_step_time": 0.3918464183807373
    },
    {
      "epoch": 7.9644775390625e-05,
      "model_forward_time": 0.11507725715637207,
      "step": 13049
    },
    {
      "epoch": 7.9644775390625e-05,
      "step": 13049,
      "training_step_time": 0.4197254180908203
    },
    {
      "epoch": 7.965087890625e-05,
      "grad_norm": 0.19332049787044525,
      "learning_rate": 9.252365234273755e-05,
      "loss": 0.056,
      "step": 13050
    },
    {
      "epoch": 7.965087890625e-05,
      "model_forward_time": 0.11478233337402344,
      "step": 13050
    },
    {
      "epoch": 7.965087890625e-05,
      "step": 13050,
      "training_step_time": 0.40630674362182617
    },
    {
      "epoch": 7.9656982421875e-05,
      "model_forward_time": 0.11548519134521484,
      "step": 13051
    },
    {
      "epoch": 7.9656982421875e-05,
      "step": 13051,
      "training_step_time": 0.39785122871398926
    },
    {
      "epoch": 7.96630859375e-05,
      "model_forward_time": 0.11491179466247559,
      "step": 13052
    },
    {
      "epoch": 7.96630859375e-05,
      "step": 13052,
      "training_step_time": 0.3898308277130127
    },
    {
      "epoch": 7.9669189453125e-05,
      "model_forward_time": 0.11585688591003418,
      "step": 13053
    },
    {
      "epoch": 7.9669189453125e-05,
      "step": 13053,
      "training_step_time": 0.39821958541870117
    },
    {
      "epoch": 7.967529296875e-05,
      "model_forward_time": 0.11522245407104492,
      "step": 13054
    },
    {
      "epoch": 7.967529296875e-05,
      "step": 13054,
      "training_step_time": 0.36808180809020996
    },
    {
      "epoch": 7.9681396484375e-05,
      "model_forward_time": 0.11537814140319824,
      "step": 13055
    },
    {
      "epoch": 7.9681396484375e-05,
      "step": 13055,
      "training_step_time": 0.4877049922943115
    },
    {
      "epoch": 7.96875e-05,
      "model_forward_time": 0.1148982048034668,
      "step": 13056
    },
    {
      "epoch": 7.96875e-05,
      "step": 13056,
      "training_step_time": 0.43647241592407227
    },
    {
      "epoch": 7.9693603515625e-05,
      "model_forward_time": 0.11497855186462402,
      "step": 13057
    },
    {
      "epoch": 7.9693603515625e-05,
      "step": 13057,
      "training_step_time": 0.398348331451416
    },
    {
      "epoch": 7.969970703125e-05,
      "model_forward_time": 0.11526656150817871,
      "step": 13058
    },
    {
      "epoch": 7.969970703125e-05,
      "step": 13058,
      "training_step_time": 0.38830065727233887
    },
    {
      "epoch": 7.9705810546875e-05,
      "model_forward_time": 0.1147165298461914,
      "step": 13059
    },
    {
      "epoch": 7.9705810546875e-05,
      "step": 13059,
      "training_step_time": 0.4179830551147461
    },
    {
      "epoch": 7.97119140625e-05,
      "grad_norm": 0.17453821003437042,
      "learning_rate": 9.250914995261905e-05,
      "loss": 0.0566,
      "step": 13060
    },
    {
      "epoch": 7.97119140625e-05,
      "model_forward_time": 0.11537551879882812,
      "step": 13060
    },
    {
      "epoch": 7.97119140625e-05,
      "step": 13060,
      "training_step_time": 0.4133169651031494
    },
    {
      "epoch": 7.9718017578125e-05,
      "model_forward_time": 0.11522102355957031,
      "step": 13061
    },
    {
      "epoch": 7.9718017578125e-05,
      "step": 13061,
      "training_step_time": 0.9214637279510498
    },
    {
      "epoch": 7.972412109375e-05,
      "model_forward_time": 0.11488676071166992,
      "step": 13062
    },
    {
      "epoch": 7.972412109375e-05,
      "step": 13062,
      "training_step_time": 0.5121314525604248
    },
    {
      "epoch": 7.9730224609375e-05,
      "model_forward_time": 0.11490797996520996,
      "step": 13063
    },
    {
      "epoch": 7.9730224609375e-05,
      "step": 13063,
      "training_step_time": 0.39577817916870117
    },
    {
      "epoch": 7.9736328125e-05,
      "model_forward_time": 0.11412644386291504,
      "step": 13064
    },
    {
      "epoch": 7.9736328125e-05,
      "step": 13064,
      "training_step_time": 0.3783597946166992
    },
    {
      "epoch": 7.9742431640625e-05,
      "model_forward_time": 0.11445021629333496,
      "step": 13065
    },
    {
      "epoch": 7.9742431640625e-05,
      "step": 13065,
      "training_step_time": 0.3820779323577881
    },
    {
      "epoch": 7.974853515625e-05,
      "model_forward_time": 0.11400032043457031,
      "step": 13066
    },
    {
      "epoch": 7.974853515625e-05,
      "step": 13066,
      "training_step_time": 0.45378637313842773
    },
    {
      "epoch": 7.9754638671875e-05,
      "model_forward_time": 0.11472082138061523,
      "step": 13067
    },
    {
      "epoch": 7.9754638671875e-05,
      "step": 13067,
      "training_step_time": 0.6725327968597412
    },
    {
      "epoch": 7.97607421875e-05,
      "model_forward_time": 0.11446785926818848,
      "step": 13068
    },
    {
      "epoch": 7.97607421875e-05,
      "step": 13068,
      "training_step_time": 0.3907814025878906
    },
    {
      "epoch": 7.9766845703125e-05,
      "model_forward_time": 0.1142573356628418,
      "step": 13069
    },
    {
      "epoch": 7.9766845703125e-05,
      "step": 13069,
      "training_step_time": 0.461841344833374
    },
    {
      "epoch": 7.977294921875e-05,
      "grad_norm": 0.18152332305908203,
      "learning_rate": 9.249463464934321e-05,
      "loss": 0.0473,
      "step": 13070
    },
    {
      "epoch": 7.977294921875e-05,
      "model_forward_time": 0.1145792007446289,
      "step": 13070
    },
    {
      "epoch": 7.977294921875e-05,
      "step": 13070,
      "training_step_time": 0.3922240734100342
    },
    {
      "epoch": 7.9779052734375e-05,
      "model_forward_time": 0.11375904083251953,
      "step": 13071
    },
    {
      "epoch": 7.9779052734375e-05,
      "step": 13071,
      "training_step_time": 0.401561975479126
    },
    {
      "epoch": 7.978515625e-05,
      "model_forward_time": 0.11444616317749023,
      "step": 13072
    },
    {
      "epoch": 7.978515625e-05,
      "step": 13072,
      "training_step_time": 0.3999762535095215
    },
    {
      "epoch": 7.9791259765625e-05,
      "model_forward_time": 0.11478567123413086,
      "step": 13073
    },
    {
      "epoch": 7.9791259765625e-05,
      "step": 13073,
      "training_step_time": 0.7030737400054932
    },
    {
      "epoch": 7.979736328125e-05,
      "model_forward_time": 0.11455774307250977,
      "step": 13074
    },
    {
      "epoch": 7.979736328125e-05,
      "step": 13074,
      "training_step_time": 0.45701122283935547
    },
    {
      "epoch": 7.9803466796875e-05,
      "model_forward_time": 0.11465620994567871,
      "step": 13075
    },
    {
      "epoch": 7.9803466796875e-05,
      "step": 13075,
      "training_step_time": 0.4097480773925781
    },
    {
      "epoch": 7.98095703125e-05,
      "model_forward_time": 0.11440587043762207,
      "step": 13076
    },
    {
      "epoch": 7.98095703125e-05,
      "step": 13076,
      "training_step_time": 0.3719446659088135
    },
    {
      "epoch": 7.9815673828125e-05,
      "model_forward_time": 0.11494612693786621,
      "step": 13077
    },
    {
      "epoch": 7.9815673828125e-05,
      "step": 13077,
      "training_step_time": 0.40256261825561523
    },
    {
      "epoch": 7.982177734375e-05,
      "model_forward_time": 0.11373496055603027,
      "step": 13078
    },
    {
      "epoch": 7.982177734375e-05,
      "step": 13078,
      "training_step_time": 0.38335466384887695
    },
    {
      "epoch": 7.9827880859375e-05,
      "model_forward_time": 0.11558818817138672,
      "step": 13079
    },
    {
      "epoch": 7.9827880859375e-05,
      "step": 13079,
      "training_step_time": 0.5702500343322754
    },
    {
      "epoch": 7.9833984375e-05,
      "grad_norm": 0.18841160833835602,
      "learning_rate": 9.248010643731935e-05,
      "loss": 0.0567,
      "step": 13080
    },
    {
      "epoch": 7.9833984375e-05,
      "model_forward_time": 0.11475324630737305,
      "step": 13080
    },
    {
      "epoch": 7.9833984375e-05,
      "step": 13080,
      "training_step_time": 0.4581751823425293
    },
    {
      "epoch": 7.9840087890625e-05,
      "model_forward_time": 0.11476731300354004,
      "step": 13081
    },
    {
      "epoch": 7.9840087890625e-05,
      "step": 13081,
      "training_step_time": 0.46866273880004883
    },
    {
      "epoch": 7.984619140625e-05,
      "model_forward_time": 0.11455583572387695,
      "step": 13082
    },
    {
      "epoch": 7.984619140625e-05,
      "step": 13082,
      "training_step_time": 0.45197248458862305
    },
    {
      "epoch": 7.9852294921875e-05,
      "model_forward_time": 0.11519742012023926,
      "step": 13083
    },
    {
      "epoch": 7.9852294921875e-05,
      "step": 13083,
      "training_step_time": 0.41497254371643066
    },
    {
      "epoch": 7.98583984375e-05,
      "model_forward_time": 0.1144862174987793,
      "step": 13084
    },
    {
      "epoch": 7.98583984375e-05,
      "step": 13084,
      "training_step_time": 0.3999898433685303
    },
    {
      "epoch": 7.9864501953125e-05,
      "model_forward_time": 0.11497855186462402,
      "step": 13085
    },
    {
      "epoch": 7.9864501953125e-05,
      "step": 13085,
      "training_step_time": 0.45513343811035156
    },
    {
      "epoch": 7.987060546875e-05,
      "model_forward_time": 0.11512613296508789,
      "step": 13086
    },
    {
      "epoch": 7.987060546875e-05,
      "step": 13086,
      "training_step_time": 0.39522647857666016
    },
    {
      "epoch": 7.9876708984375e-05,
      "model_forward_time": 0.11485147476196289,
      "step": 13087
    },
    {
      "epoch": 7.9876708984375e-05,
      "step": 13087,
      "training_step_time": 0.39693260192871094
    },
    {
      "epoch": 7.98828125e-05,
      "model_forward_time": 0.11513853073120117,
      "step": 13088
    },
    {
      "epoch": 7.98828125e-05,
      "step": 13088,
      "training_step_time": 0.43831348419189453
    },
    {
      "epoch": 7.9888916015625e-05,
      "model_forward_time": 0.11513996124267578,
      "step": 13089
    },
    {
      "epoch": 7.9888916015625e-05,
      "step": 13089,
      "training_step_time": 0.4376339912414551
    },
    {
      "epoch": 7.989501953125e-05,
      "grad_norm": 0.1501334309577942,
      "learning_rate": 9.246556532096078e-05,
      "loss": 0.0544,
      "step": 13090
    },
    {
      "epoch": 7.989501953125e-05,
      "model_forward_time": 0.11609840393066406,
      "step": 13090
    },
    {
      "epoch": 7.989501953125e-05,
      "step": 13090,
      "training_step_time": 0.3846933841705322
    },
    {
      "epoch": 7.9901123046875e-05,
      "model_forward_time": 0.11492037773132324,
      "step": 13091
    },
    {
      "epoch": 7.9901123046875e-05,
      "step": 13091,
      "training_step_time": 0.743095874786377
    },
    {
      "epoch": 7.99072265625e-05,
      "model_forward_time": 0.11492800712585449,
      "step": 13092
    },
    {
      "epoch": 7.99072265625e-05,
      "step": 13092,
      "training_step_time": 0.39249110221862793
    },
    {
      "epoch": 7.9913330078125e-05,
      "model_forward_time": 0.11373662948608398,
      "step": 13093
    },
    {
      "epoch": 7.9913330078125e-05,
      "step": 13093,
      "training_step_time": 0.42876195907592773
    },
    {
      "epoch": 7.991943359375e-05,
      "model_forward_time": 0.11426854133605957,
      "step": 13094
    },
    {
      "epoch": 7.991943359375e-05,
      "step": 13094,
      "training_step_time": 0.45880126953125
    },
    {
      "epoch": 7.9925537109375e-05,
      "model_forward_time": 0.11450648307800293,
      "step": 13095
    },
    {
      "epoch": 7.9925537109375e-05,
      "step": 13095,
      "training_step_time": 0.4649834632873535
    },
    {
      "epoch": 7.9931640625e-05,
      "model_forward_time": 0.11493349075317383,
      "step": 13096
    },
    {
      "epoch": 7.9931640625e-05,
      "step": 13096,
      "training_step_time": 0.43235182762145996
    },
    {
      "epoch": 7.9937744140625e-05,
      "model_forward_time": 0.11452317237854004,
      "step": 13097
    },
    {
      "epoch": 7.9937744140625e-05,
      "step": 13097,
      "training_step_time": 0.652515172958374
    },
    {
      "epoch": 7.994384765625e-05,
      "model_forward_time": 0.11417484283447266,
      "step": 13098
    },
    {
      "epoch": 7.994384765625e-05,
      "step": 13098,
      "training_step_time": 0.392810583114624
    },
    {
      "epoch": 7.9949951171875e-05,
      "model_forward_time": 0.11426329612731934,
      "step": 13099
    },
    {
      "epoch": 7.9949951171875e-05,
      "step": 13099,
      "training_step_time": 0.3881494998931885
    },
    {
      "epoch": 7.99560546875e-05,
      "grad_norm": 0.16947698593139648,
      "learning_rate": 9.24510113046847e-05,
      "loss": 0.0582,
      "step": 13100
    },
    {
      "epoch": 7.99560546875e-05,
      "model_forward_time": 0.11451911926269531,
      "step": 13100
    },
    {
      "epoch": 7.99560546875e-05,
      "step": 13100,
      "training_step_time": 0.38585638999938965
    },
    {
      "epoch": 7.9962158203125e-05,
      "model_forward_time": 0.11458253860473633,
      "step": 13101
    },
    {
      "epoch": 7.9962158203125e-05,
      "step": 13101,
      "training_step_time": 0.40204834938049316
    },
    {
      "epoch": 7.996826171875e-05,
      "model_forward_time": 0.11479687690734863,
      "step": 13102
    },
    {
      "epoch": 7.996826171875e-05,
      "step": 13102,
      "training_step_time": 0.39160609245300293
    },
    {
      "epoch": 7.9974365234375e-05,
      "model_forward_time": 0.11515021324157715,
      "step": 13103
    },
    {
      "epoch": 7.9974365234375e-05,
      "step": 13103,
      "training_step_time": 0.8673927783966064
    },
    {
      "epoch": 7.998046875e-05,
      "model_forward_time": 0.11416411399841309,
      "step": 13104
    },
    {
      "epoch": 7.998046875e-05,
      "step": 13104,
      "training_step_time": 0.3868842124938965
    },
    {
      "epoch": 7.9986572265625e-05,
      "model_forward_time": 0.11412954330444336,
      "step": 13105
    },
    {
      "epoch": 7.9986572265625e-05,
      "step": 13105,
      "training_step_time": 0.3881382942199707
    },
    {
      "epoch": 7.999267578125e-05,
      "model_forward_time": 0.11403703689575195,
      "step": 13106
    },
    {
      "epoch": 7.999267578125e-05,
      "step": 13106,
      "training_step_time": 0.40134692192077637
    },
    {
      "epoch": 7.9998779296875e-05,
      "model_forward_time": 0.11467885971069336,
      "step": 13107
    },
    {
      "epoch": 7.9998779296875e-05,
      "step": 13107,
      "training_step_time": 0.41538572311401367
    },
    {
      "epoch": 8.00048828125e-05,
      "model_forward_time": 0.1142275333404541,
      "step": 13108
    },
    {
      "epoch": 8.00048828125e-05,
      "step": 13108,
      "training_step_time": 0.367480993270874
    },
    {
      "epoch": 8.0010986328125e-05,
      "model_forward_time": 0.1157982349395752,
      "step": 13109
    },
    {
      "epoch": 8.0010986328125e-05,
      "step": 13109,
      "training_step_time": 0.7666051387786865
    },
    {
      "epoch": 8.001708984375e-05,
      "grad_norm": 0.1667533665895462,
      "learning_rate": 9.243644439291223e-05,
      "loss": 0.0567,
      "step": 13110
    },
    {
      "epoch": 8.001708984375e-05,
      "model_forward_time": 0.11487984657287598,
      "step": 13110
    },
    {
      "epoch": 8.001708984375e-05,
      "step": 13110,
      "training_step_time": 0.38562583923339844
    },
    {
      "epoch": 8.0023193359375e-05,
      "model_forward_time": 0.11425566673278809,
      "step": 13111
    },
    {
      "epoch": 8.0023193359375e-05,
      "step": 13111,
      "training_step_time": 0.38150596618652344
    },
    {
      "epoch": 8.0029296875e-05,
      "model_forward_time": 0.11420416831970215,
      "step": 13112
    },
    {
      "epoch": 8.0029296875e-05,
      "step": 13112,
      "training_step_time": 0.3965468406677246
    },
    {
      "epoch": 8.0035400390625e-05,
      "model_forward_time": 0.11591935157775879,
      "step": 13113
    },
    {
      "epoch": 8.0035400390625e-05,
      "step": 13113,
      "training_step_time": 0.4012620449066162
    },
    {
      "epoch": 8.004150390625e-05,
      "model_forward_time": 0.11576247215270996,
      "step": 13114
    },
    {
      "epoch": 8.004150390625e-05,
      "step": 13114,
      "training_step_time": 0.4820568561553955
    },
    {
      "epoch": 8.0047607421875e-05,
      "model_forward_time": 0.1197652816772461,
      "step": 13115
    },
    {
      "epoch": 8.0047607421875e-05,
      "step": 13115,
      "training_step_time": 1.0846686363220215
    },
    {
      "epoch": 8.00537109375e-05,
      "model_forward_time": 0.11936116218566895,
      "step": 13116
    },
    {
      "epoch": 8.00537109375e-05,
      "step": 13116,
      "training_step_time": 0.6757204532623291
    },
    {
      "epoch": 8.0059814453125e-05,
      "model_forward_time": 0.11695003509521484,
      "step": 13117
    },
    {
      "epoch": 8.0059814453125e-05,
      "step": 13117,
      "training_step_time": 0.6875112056732178
    },
    {
      "epoch": 8.006591796875e-05,
      "model_forward_time": 0.11677312850952148,
      "step": 13118
    },
    {
      "epoch": 8.006591796875e-05,
      "step": 13118,
      "training_step_time": 0.6983320713043213
    },
    {
      "epoch": 8.0072021484375e-05,
      "model_forward_time": 0.12109041213989258,
      "step": 13119
    },
    {
      "epoch": 8.0072021484375e-05,
      "step": 13119,
      "training_step_time": 0.7221047878265381
    },
    {
      "epoch": 8.0078125e-05,
      "grad_norm": 0.16602279245853424,
      "learning_rate": 9.242186459006845e-05,
      "loss": 0.0612,
      "step": 13120
    },
    {
      "epoch": 8.0078125e-05,
      "model_forward_time": 0.13321542739868164,
      "step": 13120
    },
    {
      "epoch": 8.0078125e-05,
      "step": 13120,
      "training_step_time": 0.7606587409973145
    },
    {
      "epoch": 8.0084228515625e-05,
      "model_forward_time": 0.11696767807006836,
      "step": 13121
    },
    {
      "epoch": 8.0084228515625e-05,
      "step": 13121,
      "training_step_time": 0.6754288673400879
    },
    {
      "epoch": 8.009033203125e-05,
      "model_forward_time": 0.11671209335327148,
      "step": 13122
    },
    {
      "epoch": 8.009033203125e-05,
      "step": 13122,
      "training_step_time": 0.6128799915313721
    },
    {
      "epoch": 8.0096435546875e-05,
      "model_forward_time": 0.11864948272705078,
      "step": 13123
    },
    {
      "epoch": 8.0096435546875e-05,
      "step": 13123,
      "training_step_time": 0.7250919342041016
    },
    {
      "epoch": 8.01025390625e-05,
      "model_forward_time": 0.11817073822021484,
      "step": 13124
    },
    {
      "epoch": 8.01025390625e-05,
      "step": 13124,
      "training_step_time": 0.7144949436187744
    },
    {
      "epoch": 8.0108642578125e-05,
      "model_forward_time": 0.12668776512145996,
      "step": 13125
    },
    {
      "epoch": 8.0108642578125e-05,
      "step": 13125,
      "training_step_time": 0.6898219585418701
    },
    {
      "epoch": 8.011474609375e-05,
      "model_forward_time": 0.1220862865447998,
      "step": 13126
    },
    {
      "epoch": 8.011474609375e-05,
      "step": 13126,
      "training_step_time": 0.6068680286407471
    },
    {
      "epoch": 8.0120849609375e-05,
      "model_forward_time": 0.12263035774230957,
      "step": 13127
    },
    {
      "epoch": 8.0120849609375e-05,
      "step": 13127,
      "training_step_time": 0.6787707805633545
    },
    {
      "epoch": 8.0126953125e-05,
      "model_forward_time": 0.12167119979858398,
      "step": 13128
    },
    {
      "epoch": 8.0126953125e-05,
      "step": 13128,
      "training_step_time": 0.6208188533782959
    },
    {
      "epoch": 8.0133056640625e-05,
      "model_forward_time": 0.1219778060913086,
      "step": 13129
    },
    {
      "epoch": 8.0133056640625e-05,
      "step": 13129,
      "training_step_time": 0.7554049491882324
    },
    {
      "epoch": 8.013916015625e-05,
      "grad_norm": 0.15957152843475342,
      "learning_rate": 9.240727190058227e-05,
      "loss": 0.0608,
      "step": 13130
    },
    {
      "epoch": 8.013916015625e-05,
      "model_forward_time": 0.12273144721984863,
      "step": 13130
    },
    {
      "epoch": 8.013916015625e-05,
      "step": 13130,
      "training_step_time": 0.6444637775421143
    },
    {
      "epoch": 8.0145263671875e-05,
      "model_forward_time": 0.12662887573242188,
      "step": 13131
    },
    {
      "epoch": 8.0145263671875e-05,
      "step": 13131,
      "training_step_time": 0.6101088523864746
    },
    {
      "epoch": 8.01513671875e-05,
      "model_forward_time": 0.12986445426940918,
      "step": 13132
    },
    {
      "epoch": 8.01513671875e-05,
      "step": 13132,
      "training_step_time": 0.6788158416748047
    },
    {
      "epoch": 8.0157470703125e-05,
      "model_forward_time": 0.11637473106384277,
      "step": 13133
    },
    {
      "epoch": 8.0157470703125e-05,
      "step": 13133,
      "training_step_time": 0.6842164993286133
    },
    {
      "epoch": 8.016357421875e-05,
      "model_forward_time": 0.11966085433959961,
      "step": 13134
    },
    {
      "epoch": 8.016357421875e-05,
      "step": 13134,
      "training_step_time": 0.6844725608825684
    },
    {
      "epoch": 8.0169677734375e-05,
      "model_forward_time": 0.12175655364990234,
      "step": 13135
    },
    {
      "epoch": 8.0169677734375e-05,
      "step": 13135,
      "training_step_time": 0.6479077339172363
    },
    {
      "epoch": 8.017578125e-05,
      "model_forward_time": 0.11897516250610352,
      "step": 13136
    },
    {
      "epoch": 8.017578125e-05,
      "step": 13136,
      "training_step_time": 0.6189427375793457
    },
    {
      "epoch": 8.0181884765625e-05,
      "model_forward_time": 0.11734676361083984,
      "step": 13137
    },
    {
      "epoch": 8.0181884765625e-05,
      "step": 13137,
      "training_step_time": 0.7103688716888428
    },
    {
      "epoch": 8.018798828125e-05,
      "model_forward_time": 0.12185883522033691,
      "step": 13138
    },
    {
      "epoch": 8.018798828125e-05,
      "step": 13138,
      "training_step_time": 0.6498141288757324
    },
    {
      "epoch": 8.0194091796875e-05,
      "model_forward_time": 0.11863017082214355,
      "step": 13139
    },
    {
      "epoch": 8.0194091796875e-05,
      "step": 13139,
      "training_step_time": 0.7211008071899414
    },
    {
      "epoch": 8.02001953125e-05,
      "grad_norm": 0.16277898848056793,
      "learning_rate": 9.239266632888659e-05,
      "loss": 0.0636,
      "step": 13140
    },
    {
      "epoch": 8.02001953125e-05,
      "model_forward_time": 0.11710524559020996,
      "step": 13140
    },
    {
      "epoch": 8.02001953125e-05,
      "step": 13140,
      "training_step_time": 0.6652798652648926
    },
    {
      "epoch": 8.0206298828125e-05,
      "model_forward_time": 0.11636662483215332,
      "step": 13141
    },
    {
      "epoch": 8.0206298828125e-05,
      "step": 13141,
      "training_step_time": 0.6544022560119629
    },
    {
      "epoch": 8.021240234375e-05,
      "model_forward_time": 0.1221015453338623,
      "step": 13142
    },
    {
      "epoch": 8.021240234375e-05,
      "step": 13142,
      "training_step_time": 0.7559995651245117
    },
    {
      "epoch": 8.0218505859375e-05,
      "model_forward_time": 0.12338829040527344,
      "step": 13143
    },
    {
      "epoch": 8.0218505859375e-05,
      "step": 13143,
      "training_step_time": 0.7093181610107422
    },
    {
      "epoch": 8.0224609375e-05,
      "model_forward_time": 0.12059426307678223,
      "step": 13144
    },
    {
      "epoch": 8.0224609375e-05,
      "step": 13144,
      "training_step_time": 0.722661018371582
    },
    {
      "epoch": 8.0230712890625e-05,
      "model_forward_time": 0.12018179893493652,
      "step": 13145
    },
    {
      "epoch": 8.0230712890625e-05,
      "step": 13145,
      "training_step_time": 0.6505534648895264
    },
    {
      "epoch": 8.023681640625e-05,
      "model_forward_time": 0.12020468711853027,
      "step": 13146
    },
    {
      "epoch": 8.023681640625e-05,
      "step": 13146,
      "training_step_time": 0.7243802547454834
    },
    {
      "epoch": 8.0242919921875e-05,
      "model_forward_time": 0.14178776741027832,
      "step": 13147
    },
    {
      "epoch": 8.0242919921875e-05,
      "step": 13147,
      "training_step_time": 0.6113622188568115
    },
    {
      "epoch": 8.02490234375e-05,
      "model_forward_time": 0.12134265899658203,
      "step": 13148
    },
    {
      "epoch": 8.02490234375e-05,
      "step": 13148,
      "training_step_time": 0.7345278263092041
    },
    {
      "epoch": 8.0255126953125e-05,
      "model_forward_time": 0.11823034286499023,
      "step": 13149
    },
    {
      "epoch": 8.0255126953125e-05,
      "step": 13149,
      "training_step_time": 0.6697173118591309
    },
    {
      "epoch": 8.026123046875e-05,
      "grad_norm": 0.14595653116703033,
      "learning_rate": 9.237804787941819e-05,
      "loss": 0.0604,
      "step": 13150
    },
    {
      "epoch": 8.026123046875e-05,
      "model_forward_time": 0.12263631820678711,
      "step": 13150
    },
    {
      "epoch": 8.026123046875e-05,
      "step": 13150,
      "training_step_time": 0.705284595489502
    },
    {
      "epoch": 8.0267333984375e-05,
      "model_forward_time": 0.13987517356872559,
      "step": 13151
    },
    {
      "epoch": 8.0267333984375e-05,
      "step": 13151,
      "training_step_time": 0.7636444568634033
    },
    {
      "epoch": 8.02734375e-05,
      "model_forward_time": 0.1360785961151123,
      "step": 13152
    },
    {
      "epoch": 8.02734375e-05,
      "step": 13152,
      "training_step_time": 0.6863937377929688
    },
    {
      "epoch": 8.0279541015625e-05,
      "model_forward_time": 0.11695265769958496,
      "step": 13153
    },
    {
      "epoch": 8.0279541015625e-05,
      "step": 13153,
      "training_step_time": 0.6762046813964844
    },
    {
      "epoch": 8.028564453125e-05,
      "model_forward_time": 0.12225151062011719,
      "step": 13154
    },
    {
      "epoch": 8.028564453125e-05,
      "step": 13154,
      "training_step_time": 0.6771280765533447
    },
    {
      "epoch": 8.0291748046875e-05,
      "model_forward_time": 0.11699414253234863,
      "step": 13155
    },
    {
      "epoch": 8.0291748046875e-05,
      "step": 13155,
      "training_step_time": 0.711066722869873
    },
    {
      "epoch": 8.02978515625e-05,
      "model_forward_time": 0.11854934692382812,
      "step": 13156
    },
    {
      "epoch": 8.02978515625e-05,
      "step": 13156,
      "training_step_time": 0.8025813102722168
    },
    {
      "epoch": 8.0303955078125e-05,
      "model_forward_time": 0.14339590072631836,
      "step": 13157
    },
    {
      "epoch": 8.0303955078125e-05,
      "step": 13157,
      "training_step_time": 0.7107477188110352
    },
    {
      "epoch": 8.031005859375e-05,
      "model_forward_time": 0.11849427223205566,
      "step": 13158
    },
    {
      "epoch": 8.031005859375e-05,
      "step": 13158,
      "training_step_time": 0.6563787460327148
    },
    {
      "epoch": 8.0316162109375e-05,
      "model_forward_time": 0.12201428413391113,
      "step": 13159
    },
    {
      "epoch": 8.0316162109375e-05,
      "step": 13159,
      "training_step_time": 0.608013391494751
    },
    {
      "epoch": 8.0322265625e-05,
      "grad_norm": 0.1265861839056015,
      "learning_rate": 9.236341655661778e-05,
      "loss": 0.0567,
      "step": 13160
    },
    {
      "epoch": 8.0322265625e-05,
      "model_forward_time": 0.11781549453735352,
      "step": 13160
    },
    {
      "epoch": 8.0322265625e-05,
      "step": 13160,
      "training_step_time": 0.6694209575653076
    },
    {
      "epoch": 8.0328369140625e-05,
      "model_forward_time": 0.11783504486083984,
      "step": 13161
    },
    {
      "epoch": 8.0328369140625e-05,
      "step": 13161,
      "training_step_time": 0.6932468414306641
    },
    {
      "epoch": 8.033447265625e-05,
      "model_forward_time": 0.12117266654968262,
      "step": 13162
    },
    {
      "epoch": 8.033447265625e-05,
      "step": 13162,
      "training_step_time": 0.6917383670806885
    },
    {
      "epoch": 8.0340576171875e-05,
      "model_forward_time": 0.11811399459838867,
      "step": 13163
    },
    {
      "epoch": 8.0340576171875e-05,
      "step": 13163,
      "training_step_time": 0.6449599266052246
    },
    {
      "epoch": 8.03466796875e-05,
      "model_forward_time": 0.11856770515441895,
      "step": 13164
    },
    {
      "epoch": 8.03466796875e-05,
      "step": 13164,
      "training_step_time": 0.6993057727813721
    },
    {
      "epoch": 8.0352783203125e-05,
      "model_forward_time": 0.12508177757263184,
      "step": 13165
    },
    {
      "epoch": 8.0352783203125e-05,
      "step": 13165,
      "training_step_time": 0.6914980411529541
    },
    {
      "epoch": 8.035888671875e-05,
      "model_forward_time": 0.11944317817687988,
      "step": 13166
    },
    {
      "epoch": 8.035888671875e-05,
      "step": 13166,
      "training_step_time": 0.680394172668457
    },
    {
      "epoch": 8.0364990234375e-05,
      "model_forward_time": 0.11821556091308594,
      "step": 13167
    },
    {
      "epoch": 8.0364990234375e-05,
      "step": 13167,
      "training_step_time": 0.7377135753631592
    },
    {
      "epoch": 8.037109375e-05,
      "model_forward_time": 0.12974810600280762,
      "step": 13168
    },
    {
      "epoch": 8.037109375e-05,
      "step": 13168,
      "training_step_time": 0.7455182075500488
    },
    {
      "epoch": 8.0377197265625e-05,
      "model_forward_time": 0.11657238006591797,
      "step": 13169
    },
    {
      "epoch": 8.0377197265625e-05,
      "step": 13169,
      "training_step_time": 0.7047357559204102
    },
    {
      "epoch": 8.038330078125e-05,
      "grad_norm": 0.15403307974338531,
      "learning_rate": 9.234877236492997e-05,
      "loss": 0.0668,
      "step": 13170
    },
    {
      "epoch": 8.038330078125e-05,
      "model_forward_time": 0.12127161026000977,
      "step": 13170
    },
    {
      "epoch": 8.038330078125e-05,
      "step": 13170,
      "training_step_time": 0.686211109161377
    },
    {
      "epoch": 8.0389404296875e-05,
      "model_forward_time": 0.12007689476013184,
      "step": 13171
    },
    {
      "epoch": 8.0389404296875e-05,
      "step": 13171,
      "training_step_time": 0.6776823997497559
    },
    {
      "epoch": 8.03955078125e-05,
      "model_forward_time": 0.11801314353942871,
      "step": 13172
    },
    {
      "epoch": 8.03955078125e-05,
      "step": 13172,
      "training_step_time": 0.665982723236084
    },
    {
      "epoch": 8.0401611328125e-05,
      "model_forward_time": 0.11898159980773926,
      "step": 13173
    },
    {
      "epoch": 8.0401611328125e-05,
      "step": 13173,
      "training_step_time": 0.6714785099029541
    },
    {
      "epoch": 8.040771484375e-05,
      "model_forward_time": 0.1204984188079834,
      "step": 13174
    },
    {
      "epoch": 8.040771484375e-05,
      "step": 13174,
      "training_step_time": 0.645383358001709
    },
    {
      "epoch": 8.0413818359375e-05,
      "model_forward_time": 0.1183478832244873,
      "step": 13175
    },
    {
      "epoch": 8.0413818359375e-05,
      "step": 13175,
      "training_step_time": 0.6462662220001221
    },
    {
      "epoch": 8.0419921875e-05,
      "model_forward_time": 0.12482523918151855,
      "step": 13176
    },
    {
      "epoch": 8.0419921875e-05,
      "step": 13176,
      "training_step_time": 0.6128365993499756
    },
    {
      "epoch": 8.0426025390625e-05,
      "model_forward_time": 0.11973023414611816,
      "step": 13177
    },
    {
      "epoch": 8.0426025390625e-05,
      "step": 13177,
      "training_step_time": 0.7246420383453369
    },
    {
      "epoch": 8.043212890625e-05,
      "model_forward_time": 0.11670255661010742,
      "step": 13178
    },
    {
      "epoch": 8.043212890625e-05,
      "step": 13178,
      "training_step_time": 0.6354038715362549
    },
    {
      "epoch": 8.0438232421875e-05,
      "model_forward_time": 0.12645173072814941,
      "step": 13179
    },
    {
      "epoch": 8.0438232421875e-05,
      "step": 13179,
      "training_step_time": 0.5904757976531982
    },
    {
      "epoch": 8.04443359375e-05,
      "grad_norm": 0.15177646279335022,
      "learning_rate": 9.233411530880326e-05,
      "loss": 0.062,
      "step": 13180
    },
    {
      "epoch": 8.04443359375e-05,
      "model_forward_time": 0.12284183502197266,
      "step": 13180
    },
    {
      "epoch": 8.04443359375e-05,
      "step": 13180,
      "training_step_time": 0.6128232479095459
    },
    {
      "epoch": 8.0450439453125e-05,
      "model_forward_time": 0.12005233764648438,
      "step": 13181
    },
    {
      "epoch": 8.0450439453125e-05,
      "step": 13181,
      "training_step_time": 0.6051499843597412
    },
    {
      "epoch": 8.045654296875e-05,
      "model_forward_time": 0.12185454368591309,
      "step": 13182
    },
    {
      "epoch": 8.045654296875e-05,
      "step": 13182,
      "training_step_time": 0.5576283931732178
    },
    {
      "epoch": 8.0462646484375e-05,
      "model_forward_time": 0.11985540390014648,
      "step": 13183
    },
    {
      "epoch": 8.0462646484375e-05,
      "step": 13183,
      "training_step_time": 0.4921705722808838
    },
    {
      "epoch": 8.046875e-05,
      "model_forward_time": 0.12004232406616211,
      "step": 13184
    },
    {
      "epoch": 8.046875e-05,
      "step": 13184,
      "training_step_time": 0.4906435012817383
    },
    {
      "epoch": 8.0474853515625e-05,
      "model_forward_time": 0.11796021461486816,
      "step": 13185
    },
    {
      "epoch": 8.0474853515625e-05,
      "step": 13185,
      "training_step_time": 0.551140546798706
    },
    {
      "epoch": 8.048095703125e-05,
      "model_forward_time": 0.11869692802429199,
      "step": 13186
    },
    {
      "epoch": 8.048095703125e-05,
      "step": 13186,
      "training_step_time": 0.4738788604736328
    },
    {
      "epoch": 8.0487060546875e-05,
      "model_forward_time": 0.11707735061645508,
      "step": 13187
    },
    {
      "epoch": 8.0487060546875e-05,
      "step": 13187,
      "training_step_time": 0.38094377517700195
    },
    {
      "epoch": 8.04931640625e-05,
      "model_forward_time": 0.11509442329406738,
      "step": 13188
    },
    {
      "epoch": 8.04931640625e-05,
      "step": 13188,
      "training_step_time": 0.40546631813049316
    },
    {
      "epoch": 8.0499267578125e-05,
      "model_forward_time": 0.11626863479614258,
      "step": 13189
    },
    {
      "epoch": 8.0499267578125e-05,
      "step": 13189,
      "training_step_time": 0.4152228832244873
    },
    {
      "epoch": 8.050537109375e-05,
      "grad_norm": 0.17302989959716797,
      "learning_rate": 9.231944539269009e-05,
      "loss": 0.0578,
      "step": 13190
    },
    {
      "epoch": 8.050537109375e-05,
      "model_forward_time": 0.11567401885986328,
      "step": 13190
    },
    {
      "epoch": 8.050537109375e-05,
      "step": 13190,
      "training_step_time": 0.424971342086792
    },
    {
      "epoch": 8.0511474609375e-05,
      "model_forward_time": 0.11633920669555664,
      "step": 13191
    },
    {
      "epoch": 8.0511474609375e-05,
      "step": 13191,
      "training_step_time": 0.38590502738952637
    },
    {
      "epoch": 8.0517578125e-05,
      "model_forward_time": 0.11526036262512207,
      "step": 13192
    },
    {
      "epoch": 8.0517578125e-05,
      "step": 13192,
      "training_step_time": 0.39328551292419434
    },
    {
      "epoch": 8.0523681640625e-05,
      "model_forward_time": 0.11521601676940918,
      "step": 13193
    },
    {
      "epoch": 8.0523681640625e-05,
      "step": 13193,
      "training_step_time": 0.40442609786987305
    },
    {
      "epoch": 8.052978515625e-05,
      "model_forward_time": 0.11464238166809082,
      "step": 13194
    },
    {
      "epoch": 8.052978515625e-05,
      "step": 13194,
      "training_step_time": 0.4100019931793213
    },
    {
      "epoch": 8.0535888671875e-05,
      "model_forward_time": 0.11468029022216797,
      "step": 13195
    },
    {
      "epoch": 8.0535888671875e-05,
      "step": 13195,
      "training_step_time": 0.4924452304840088
    },
    {
      "epoch": 8.05419921875e-05,
      "model_forward_time": 0.11452937126159668,
      "step": 13196
    },
    {
      "epoch": 8.05419921875e-05,
      "step": 13196,
      "training_step_time": 0.3840937614440918
    },
    {
      "epoch": 8.0548095703125e-05,
      "model_forward_time": 0.11523270606994629,
      "step": 13197
    },
    {
      "epoch": 8.0548095703125e-05,
      "step": 13197,
      "training_step_time": 0.3879215717315674
    },
    {
      "epoch": 8.055419921875e-05,
      "model_forward_time": 0.11603617668151855,
      "step": 13198
    },
    {
      "epoch": 8.055419921875e-05,
      "step": 13198,
      "training_step_time": 0.39374589920043945
    },
    {
      "epoch": 8.0560302734375e-05,
      "model_forward_time": 0.11538314819335938,
      "step": 13199
    },
    {
      "epoch": 8.0560302734375e-05,
      "step": 13199,
      "training_step_time": 0.4067199230194092
    },
    {
      "epoch": 8.056640625e-05,
      "grad_norm": 0.14802008867263794,
      "learning_rate": 9.230476262104677e-05,
      "loss": 0.0705,
      "step": 13200
    },
    {
      "epoch": 8.056640625e-05,
      "model_forward_time": 0.11491155624389648,
      "step": 13200
    },
    {
      "epoch": 8.056640625e-05,
      "step": 13200,
      "training_step_time": 0.5094540119171143
    },
    {
      "epoch": 8.0572509765625e-05,
      "model_forward_time": 0.11583733558654785,
      "step": 13201
    },
    {
      "epoch": 8.0572509765625e-05,
      "step": 13201,
      "training_step_time": 0.44391703605651855
    },
    {
      "epoch": 8.057861328125e-05,
      "model_forward_time": 0.11533856391906738,
      "step": 13202
    },
    {
      "epoch": 8.057861328125e-05,
      "step": 13202,
      "training_step_time": 0.37189149856567383
    },
    {
      "epoch": 8.0584716796875e-05,
      "model_forward_time": 0.11520671844482422,
      "step": 13203
    },
    {
      "epoch": 8.0584716796875e-05,
      "step": 13203,
      "training_step_time": 0.4410378932952881
    },
    {
      "epoch": 8.05908203125e-05,
      "model_forward_time": 0.1153573989868164,
      "step": 13204
    },
    {
      "epoch": 8.05908203125e-05,
      "step": 13204,
      "training_step_time": 0.4466671943664551
    },
    {
      "epoch": 8.0596923828125e-05,
      "model_forward_time": 0.11544656753540039,
      "step": 13205
    },
    {
      "epoch": 8.0596923828125e-05,
      "step": 13205,
      "training_step_time": 0.39009952545166016
    },
    {
      "epoch": 8.060302734375e-05,
      "model_forward_time": 0.1149454116821289,
      "step": 13206
    },
    {
      "epoch": 8.060302734375e-05,
      "step": 13206,
      "training_step_time": 0.3898329734802246
    },
    {
      "epoch": 8.0609130859375e-05,
      "model_forward_time": 0.11562371253967285,
      "step": 13207
    },
    {
      "epoch": 8.0609130859375e-05,
      "step": 13207,
      "training_step_time": 0.3950519561767578
    },
    {
      "epoch": 8.0615234375e-05,
      "model_forward_time": 0.11506223678588867,
      "step": 13208
    },
    {
      "epoch": 8.0615234375e-05,
      "step": 13208,
      "training_step_time": 0.3858911991119385
    },
    {
      "epoch": 8.0621337890625e-05,
      "model_forward_time": 0.11489415168762207,
      "step": 13209
    },
    {
      "epoch": 8.0621337890625e-05,
      "step": 13209,
      "training_step_time": 0.3925626277923584
    },
    {
      "epoch": 8.062744140625e-05,
      "grad_norm": 0.16471023857593536,
      "learning_rate": 9.229006699833358e-05,
      "loss": 0.0641,
      "step": 13210
    },
    {
      "epoch": 8.062744140625e-05,
      "model_forward_time": 0.11492156982421875,
      "step": 13210
    },
    {
      "epoch": 8.062744140625e-05,
      "step": 13210,
      "training_step_time": 0.39537978172302246
    },
    {
      "epoch": 8.0633544921875e-05,
      "model_forward_time": 0.11505532264709473,
      "step": 13211
    },
    {
      "epoch": 8.0633544921875e-05,
      "step": 13211,
      "training_step_time": 0.38772034645080566
    },
    {
      "epoch": 8.06396484375e-05,
      "model_forward_time": 0.11696839332580566,
      "step": 13212
    },
    {
      "epoch": 8.06396484375e-05,
      "step": 13212,
      "training_step_time": 0.3891425132751465
    },
    {
      "epoch": 8.0645751953125e-05,
      "model_forward_time": 0.11508345603942871,
      "step": 13213
    },
    {
      "epoch": 8.0645751953125e-05,
      "step": 13213,
      "training_step_time": 0.3961374759674072
    },
    {
      "epoch": 8.065185546875e-05,
      "model_forward_time": 0.11484050750732422,
      "step": 13214
    },
    {
      "epoch": 8.065185546875e-05,
      "step": 13214,
      "training_step_time": 0.39209437370300293
    },
    {
      "epoch": 8.0657958984375e-05,
      "model_forward_time": 0.11509585380554199,
      "step": 13215
    },
    {
      "epoch": 8.0657958984375e-05,
      "step": 13215,
      "training_step_time": 0.5092732906341553
    },
    {
      "epoch": 8.06640625e-05,
      "model_forward_time": 0.1156454086303711,
      "step": 13216
    },
    {
      "epoch": 8.06640625e-05,
      "step": 13216,
      "training_step_time": 0.4359762668609619
    },
    {
      "epoch": 8.0670166015625e-05,
      "model_forward_time": 0.11603927612304688,
      "step": 13217
    },
    {
      "epoch": 8.0670166015625e-05,
      "step": 13217,
      "training_step_time": 0.5029911994934082
    },
    {
      "epoch": 8.067626953125e-05,
      "model_forward_time": 0.11466789245605469,
      "step": 13218
    },
    {
      "epoch": 8.067626953125e-05,
      "step": 13218,
      "training_step_time": 0.4652092456817627
    },
    {
      "epoch": 8.0682373046875e-05,
      "model_forward_time": 0.11567282676696777,
      "step": 13219
    },
    {
      "epoch": 8.0682373046875e-05,
      "step": 13219,
      "training_step_time": 0.48810505867004395
    },
    {
      "epoch": 8.06884765625e-05,
      "grad_norm": 0.23820628225803375,
      "learning_rate": 9.227535852901463e-05,
      "loss": 0.0651,
      "step": 13220
    },
    {
      "epoch": 8.06884765625e-05,
      "model_forward_time": 0.11535501480102539,
      "step": 13220
    },
    {
      "epoch": 8.06884765625e-05,
      "step": 13220,
      "training_step_time": 0.4137434959411621
    },
    {
      "epoch": 8.0694580078125e-05,
      "model_forward_time": 0.11496210098266602,
      "step": 13221
    },
    {
      "epoch": 8.0694580078125e-05,
      "step": 13221,
      "training_step_time": 0.39891767501831055
    },
    {
      "epoch": 8.070068359375e-05,
      "model_forward_time": 0.1149437427520752,
      "step": 13222
    },
    {
      "epoch": 8.070068359375e-05,
      "step": 13222,
      "training_step_time": 0.47623181343078613
    },
    {
      "epoch": 8.0706787109375e-05,
      "model_forward_time": 0.1146087646484375,
      "step": 13223
    },
    {
      "epoch": 8.0706787109375e-05,
      "step": 13223,
      "training_step_time": 0.4267244338989258
    },
    {
      "epoch": 8.0712890625e-05,
      "model_forward_time": 0.11554312705993652,
      "step": 13224
    },
    {
      "epoch": 8.0712890625e-05,
      "step": 13224,
      "training_step_time": 0.39075565338134766
    },
    {
      "epoch": 8.0718994140625e-05,
      "model_forward_time": 0.11533474922180176,
      "step": 13225
    },
    {
      "epoch": 8.0718994140625e-05,
      "step": 13225,
      "training_step_time": 0.3926732540130615
    },
    {
      "epoch": 8.072509765625e-05,
      "model_forward_time": 0.11543536186218262,
      "step": 13226
    },
    {
      "epoch": 8.072509765625e-05,
      "step": 13226,
      "training_step_time": 0.38263511657714844
    },
    {
      "epoch": 8.0731201171875e-05,
      "model_forward_time": 0.11489534378051758,
      "step": 13227
    },
    {
      "epoch": 8.0731201171875e-05,
      "step": 13227,
      "training_step_time": 0.3847227096557617
    },
    {
      "epoch": 8.07373046875e-05,
      "model_forward_time": 0.11473321914672852,
      "step": 13228
    },
    {
      "epoch": 8.07373046875e-05,
      "step": 13228,
      "training_step_time": 0.40788888931274414
    },
    {
      "epoch": 8.0743408203125e-05,
      "model_forward_time": 0.11543130874633789,
      "step": 13229
    },
    {
      "epoch": 8.0743408203125e-05,
      "step": 13229,
      "training_step_time": 0.3905305862426758
    },
    {
      "epoch": 8.074951171875e-05,
      "grad_norm": 0.1724148392677307,
      "learning_rate": 9.226063721755799e-05,
      "loss": 0.0644,
      "step": 13230
    },
    {
      "epoch": 8.074951171875e-05,
      "model_forward_time": 0.1156320571899414,
      "step": 13230
    },
    {
      "epoch": 8.074951171875e-05,
      "step": 13230,
      "training_step_time": 0.3938279151916504
    },
    {
      "epoch": 8.0755615234375e-05,
      "model_forward_time": 0.11530923843383789,
      "step": 13231
    },
    {
      "epoch": 8.0755615234375e-05,
      "step": 13231,
      "training_step_time": 0.42174243927001953
    },
    {
      "epoch": 8.076171875e-05,
      "model_forward_time": 0.1154775619506836,
      "step": 13232
    },
    {
      "epoch": 8.076171875e-05,
      "step": 13232,
      "training_step_time": 0.46642184257507324
    },
    {
      "epoch": 8.0767822265625e-05,
      "model_forward_time": 0.11494922637939453,
      "step": 13233
    },
    {
      "epoch": 8.0767822265625e-05,
      "step": 13233,
      "training_step_time": 0.44501209259033203
    },
    {
      "epoch": 8.077392578125e-05,
      "model_forward_time": 0.11530137062072754,
      "step": 13234
    },
    {
      "epoch": 8.077392578125e-05,
      "step": 13234,
      "training_step_time": 0.41691088676452637
    },
    {
      "epoch": 8.0780029296875e-05,
      "model_forward_time": 0.1154325008392334,
      "step": 13235
    },
    {
      "epoch": 8.0780029296875e-05,
      "step": 13235,
      "training_step_time": 0.4032118320465088
    },
    {
      "epoch": 8.07861328125e-05,
      "model_forward_time": 0.11509919166564941,
      "step": 13236
    },
    {
      "epoch": 8.07861328125e-05,
      "step": 13236,
      "training_step_time": 0.4048745632171631
    },
    {
      "epoch": 8.0792236328125e-05,
      "model_forward_time": 0.11582398414611816,
      "step": 13237
    },
    {
      "epoch": 8.0792236328125e-05,
      "step": 13237,
      "training_step_time": 0.40810060501098633
    },
    {
      "epoch": 8.079833984375e-05,
      "model_forward_time": 0.11521720886230469,
      "step": 13238
    },
    {
      "epoch": 8.079833984375e-05,
      "step": 13238,
      "training_step_time": 0.38945841789245605
    },
    {
      "epoch": 8.0804443359375e-05,
      "model_forward_time": 0.11531376838684082,
      "step": 13239
    },
    {
      "epoch": 8.0804443359375e-05,
      "step": 13239,
      "training_step_time": 0.8481273651123047
    },
    {
      "epoch": 8.0810546875e-05,
      "grad_norm": 0.22590389847755432,
      "learning_rate": 9.224590306843558e-05,
      "loss": 0.0627,
      "step": 13240
    },
    {
      "epoch": 8.0810546875e-05,
      "model_forward_time": 0.11428141593933105,
      "step": 13240
    },
    {
      "epoch": 8.0810546875e-05,
      "step": 13240,
      "training_step_time": 0.39557552337646484
    },
    {
      "epoch": 8.0816650390625e-05,
      "model_forward_time": 0.11498141288757324,
      "step": 13241
    },
    {
      "epoch": 8.0816650390625e-05,
      "step": 13241,
      "training_step_time": 0.39797043800354004
    },
    {
      "epoch": 8.082275390625e-05,
      "model_forward_time": 0.11400580406188965,
      "step": 13242
    },
    {
      "epoch": 8.082275390625e-05,
      "step": 13242,
      "training_step_time": 0.387967586517334
    },
    {
      "epoch": 8.0828857421875e-05,
      "model_forward_time": 0.11461210250854492,
      "step": 13243
    },
    {
      "epoch": 8.0828857421875e-05,
      "step": 13243,
      "training_step_time": 0.39862775802612305
    },
    {
      "epoch": 8.08349609375e-05,
      "model_forward_time": 0.1156153678894043,
      "step": 13244
    },
    {
      "epoch": 8.08349609375e-05,
      "step": 13244,
      "training_step_time": 0.3915574550628662
    },
    {
      "epoch": 8.0841064453125e-05,
      "model_forward_time": 0.11556220054626465,
      "step": 13245
    },
    {
      "epoch": 8.0841064453125e-05,
      "step": 13245,
      "training_step_time": 0.8069202899932861
    },
    {
      "epoch": 8.084716796875e-05,
      "model_forward_time": 0.11539101600646973,
      "step": 13246
    },
    {
      "epoch": 8.084716796875e-05,
      "step": 13246,
      "training_step_time": 0.42792677879333496
    },
    {
      "epoch": 8.0853271484375e-05,
      "model_forward_time": 0.1144864559173584,
      "step": 13247
    },
    {
      "epoch": 8.0853271484375e-05,
      "step": 13247,
      "training_step_time": 0.461148738861084
    },
    {
      "epoch": 8.0859375e-05,
      "model_forward_time": 0.11518549919128418,
      "step": 13248
    },
    {
      "epoch": 8.0859375e-05,
      "step": 13248,
      "training_step_time": 0.39001917839050293
    },
    {
      "epoch": 8.0865478515625e-05,
      "model_forward_time": 0.11536240577697754,
      "step": 13249
    },
    {
      "epoch": 8.0865478515625e-05,
      "step": 13249,
      "training_step_time": 0.4546542167663574
    },
    {
      "epoch": 8.087158203125e-05,
      "grad_norm": 0.1612291932106018,
      "learning_rate": 9.223115608612325e-05,
      "loss": 0.0652,
      "step": 13250
    },
    {
      "epoch": 8.087158203125e-05,
      "model_forward_time": 0.11423230171203613,
      "step": 13250
    },
    {
      "epoch": 8.087158203125e-05,
      "step": 13250,
      "training_step_time": 0.3803141117095947
    },
    {
      "epoch": 8.0877685546875e-05,
      "model_forward_time": 0.1150827407836914,
      "step": 13251
    },
    {
      "epoch": 8.0877685546875e-05,
      "step": 13251,
      "training_step_time": 0.6894428730010986
    },
    {
      "epoch": 8.08837890625e-05,
      "model_forward_time": 0.11415410041809082,
      "step": 13252
    },
    {
      "epoch": 8.08837890625e-05,
      "step": 13252,
      "training_step_time": 0.38672828674316406
    },
    {
      "epoch": 8.0889892578125e-05,
      "model_forward_time": 0.11410093307495117,
      "step": 13253
    },
    {
      "epoch": 8.0889892578125e-05,
      "step": 13253,
      "training_step_time": 0.3867645263671875
    },
    {
      "epoch": 8.089599609375e-05,
      "model_forward_time": 0.11492419242858887,
      "step": 13254
    },
    {
      "epoch": 8.089599609375e-05,
      "step": 13254,
      "training_step_time": 0.3915679454803467
    },
    {
      "epoch": 8.0902099609375e-05,
      "model_forward_time": 0.11470293998718262,
      "step": 13255
    },
    {
      "epoch": 8.0902099609375e-05,
      "step": 13255,
      "training_step_time": 0.39125967025756836
    },
    {
      "epoch": 8.0908203125e-05,
      "model_forward_time": 0.1142585277557373,
      "step": 13256
    },
    {
      "epoch": 8.0908203125e-05,
      "step": 13256,
      "training_step_time": 0.3818845748901367
    },
    {
      "epoch": 8.0914306640625e-05,
      "model_forward_time": 0.11544227600097656,
      "step": 13257
    },
    {
      "epoch": 8.0914306640625e-05,
      "step": 13257,
      "training_step_time": 0.764503002166748
    },
    {
      "epoch": 8.092041015625e-05,
      "model_forward_time": 0.11497712135314941,
      "step": 13258
    },
    {
      "epoch": 8.092041015625e-05,
      "step": 13258,
      "training_step_time": 0.4190964698791504
    },
    {
      "epoch": 8.0926513671875e-05,
      "model_forward_time": 0.11487627029418945,
      "step": 13259
    },
    {
      "epoch": 8.0926513671875e-05,
      "step": 13259,
      "training_step_time": 0.48584532737731934
    },
    {
      "epoch": 8.09326171875e-05,
      "grad_norm": 0.15547393262386322,
      "learning_rate": 9.221639627510076e-05,
      "loss": 0.067,
      "step": 13260
    },
    {
      "epoch": 8.09326171875e-05,
      "model_forward_time": 0.11516714096069336,
      "step": 13260
    },
    {
      "epoch": 8.09326171875e-05,
      "step": 13260,
      "training_step_time": 0.3867771625518799
    },
    {
      "epoch": 8.0938720703125e-05,
      "model_forward_time": 0.11448884010314941,
      "step": 13261
    },
    {
      "epoch": 8.0938720703125e-05,
      "step": 13261,
      "training_step_time": 0.3853585720062256
    },
    {
      "epoch": 8.094482421875e-05,
      "model_forward_time": 0.11383461952209473,
      "step": 13262
    },
    {
      "epoch": 8.094482421875e-05,
      "step": 13262,
      "training_step_time": 0.4363417625427246
    },
    {
      "epoch": 8.0950927734375e-05,
      "model_forward_time": 0.11496877670288086,
      "step": 13263
    },
    {
      "epoch": 8.0950927734375e-05,
      "step": 13263,
      "training_step_time": 0.6989943981170654
    },
    {
      "epoch": 8.095703125e-05,
      "model_forward_time": 0.11435723304748535,
      "step": 13264
    },
    {
      "epoch": 8.095703125e-05,
      "step": 13264,
      "training_step_time": 0.38355207443237305
    },
    {
      "epoch": 8.0963134765625e-05,
      "model_forward_time": 0.11494946479797363,
      "step": 13265
    },
    {
      "epoch": 8.0963134765625e-05,
      "step": 13265,
      "training_step_time": 0.3877990245819092
    },
    {
      "epoch": 8.096923828125e-05,
      "model_forward_time": 0.11437606811523438,
      "step": 13266
    },
    {
      "epoch": 8.096923828125e-05,
      "step": 13266,
      "training_step_time": 0.3881840705871582
    },
    {
      "epoch": 8.0975341796875e-05,
      "model_forward_time": 0.11476659774780273,
      "step": 13267
    },
    {
      "epoch": 8.0975341796875e-05,
      "step": 13267,
      "training_step_time": 0.3906376361846924
    },
    {
      "epoch": 8.09814453125e-05,
      "model_forward_time": 0.11476945877075195,
      "step": 13268
    },
    {
      "epoch": 8.09814453125e-05,
      "step": 13268,
      "training_step_time": 0.38167738914489746
    },
    {
      "epoch": 8.0987548828125e-05,
      "model_forward_time": 0.11598825454711914,
      "step": 13269
    },
    {
      "epoch": 8.0987548828125e-05,
      "step": 13269,
      "training_step_time": 0.8864820003509521
    },
    {
      "epoch": 8.099365234375e-05,
      "grad_norm": 0.17020727694034576,
      "learning_rate": 9.220162363985174e-05,
      "loss": 0.0689,
      "step": 13270
    },
    {
      "epoch": 8.099365234375e-05,
      "model_forward_time": 0.1149296760559082,
      "step": 13270
    },
    {
      "epoch": 8.099365234375e-05,
      "step": 13270,
      "training_step_time": 0.4331817626953125
    },
    {
      "epoch": 8.0999755859375e-05,
      "model_forward_time": 0.11441755294799805,
      "step": 13271
    },
    {
      "epoch": 8.0999755859375e-05,
      "step": 13271,
      "training_step_time": 0.38579297065734863
    },
    {
      "epoch": 8.1005859375e-05,
      "model_forward_time": 0.11445450782775879,
      "step": 13272
    },
    {
      "epoch": 8.1005859375e-05,
      "step": 13272,
      "training_step_time": 0.4466390609741211
    },
    {
      "epoch": 8.1011962890625e-05,
      "model_forward_time": 0.11511588096618652,
      "step": 13273
    },
    {
      "epoch": 8.1011962890625e-05,
      "step": 13273,
      "training_step_time": 0.42331624031066895
    },
    {
      "epoch": 8.101806640625e-05,
      "model_forward_time": 0.11439180374145508,
      "step": 13274
    },
    {
      "epoch": 8.101806640625e-05,
      "step": 13274,
      "training_step_time": 0.4592430591583252
    },
    {
      "epoch": 8.1024169921875e-05,
      "model_forward_time": 0.11438345909118652,
      "step": 13275
    },
    {
      "epoch": 8.1024169921875e-05,
      "step": 13275,
      "training_step_time": 0.5952863693237305
    },
    {
      "epoch": 8.10302734375e-05,
      "model_forward_time": 0.11429953575134277,
      "step": 13276
    },
    {
      "epoch": 8.10302734375e-05,
      "step": 13276,
      "training_step_time": 0.4193112850189209
    },
    {
      "epoch": 8.1036376953125e-05,
      "model_forward_time": 0.11486268043518066,
      "step": 13277
    },
    {
      "epoch": 8.1036376953125e-05,
      "step": 13277,
      "training_step_time": 0.4256477355957031
    },
    {
      "epoch": 8.104248046875e-05,
      "model_forward_time": 0.11491227149963379,
      "step": 13278
    },
    {
      "epoch": 8.104248046875e-05,
      "step": 13278,
      "training_step_time": 0.3891334533691406
    },
    {
      "epoch": 8.1048583984375e-05,
      "model_forward_time": 0.11417675018310547,
      "step": 13279
    },
    {
      "epoch": 8.1048583984375e-05,
      "step": 13279,
      "training_step_time": 0.389984130859375
    },
    {
      "epoch": 8.10546875e-05,
      "grad_norm": 0.17884349822998047,
      "learning_rate": 9.218683818486372e-05,
      "loss": 0.0616,
      "step": 13280
    },
    {
      "epoch": 8.10546875e-05,
      "model_forward_time": 0.11452460289001465,
      "step": 13280
    },
    {
      "epoch": 8.10546875e-05,
      "step": 13280,
      "training_step_time": 0.3867313861846924
    },
    {
      "epoch": 8.1060791015625e-05,
      "model_forward_time": 0.11530041694641113,
      "step": 13281
    },
    {
      "epoch": 8.1060791015625e-05,
      "step": 13281,
      "training_step_time": 0.6265895366668701
    },
    {
      "epoch": 8.106689453125e-05,
      "model_forward_time": 0.11458277702331543,
      "step": 13282
    },
    {
      "epoch": 8.106689453125e-05,
      "step": 13282,
      "training_step_time": 0.3895440101623535
    },
    {
      "epoch": 8.1072998046875e-05,
      "model_forward_time": 0.11490011215209961,
      "step": 13283
    },
    {
      "epoch": 8.1072998046875e-05,
      "step": 13283,
      "training_step_time": 0.392411470413208
    },
    {
      "epoch": 8.10791015625e-05,
      "model_forward_time": 0.11495733261108398,
      "step": 13284
    },
    {
      "epoch": 8.10791015625e-05,
      "step": 13284,
      "training_step_time": 0.4073977470397949
    },
    {
      "epoch": 8.1085205078125e-05,
      "model_forward_time": 0.11445236206054688,
      "step": 13285
    },
    {
      "epoch": 8.1085205078125e-05,
      "step": 13285,
      "training_step_time": 0.4234740734100342
    },
    {
      "epoch": 8.109130859375e-05,
      "model_forward_time": 0.11502385139465332,
      "step": 13286
    },
    {
      "epoch": 8.109130859375e-05,
      "step": 13286,
      "training_step_time": 0.42860865592956543
    },
    {
      "epoch": 8.1097412109375e-05,
      "model_forward_time": 0.11520195007324219,
      "step": 13287
    },
    {
      "epoch": 8.1097412109375e-05,
      "step": 13287,
      "training_step_time": 0.7909624576568604
    },
    {
      "epoch": 8.1103515625e-05,
      "model_forward_time": 0.11492538452148438,
      "step": 13288
    },
    {
      "epoch": 8.1103515625e-05,
      "step": 13288,
      "training_step_time": 0.3787097930908203
    },
    {
      "epoch": 8.1109619140625e-05,
      "model_forward_time": 0.11421775817871094,
      "step": 13289
    },
    {
      "epoch": 8.1109619140625e-05,
      "step": 13289,
      "training_step_time": 0.3872692584991455
    },
    {
      "epoch": 8.111572265625e-05,
      "grad_norm": 0.2060902863740921,
      "learning_rate": 9.217203991462815e-05,
      "loss": 0.0664,
      "step": 13290
    },
    {
      "epoch": 8.111572265625e-05,
      "model_forward_time": 0.11426568031311035,
      "step": 13290
    },
    {
      "epoch": 8.111572265625e-05,
      "step": 13290,
      "training_step_time": 0.382169246673584
    },
    {
      "epoch": 8.1121826171875e-05,
      "model_forward_time": 0.11421751976013184,
      "step": 13291
    },
    {
      "epoch": 8.1121826171875e-05,
      "step": 13291,
      "training_step_time": 0.3935739994049072
    },
    {
      "epoch": 8.11279296875e-05,
      "model_forward_time": 0.1147305965423584,
      "step": 13292
    },
    {
      "epoch": 8.11279296875e-05,
      "step": 13292,
      "training_step_time": 0.47319746017456055
    },
    {
      "epoch": 8.1134033203125e-05,
      "model_forward_time": 0.11534523963928223,
      "step": 13293
    },
    {
      "epoch": 8.1134033203125e-05,
      "step": 13293,
      "training_step_time": 0.6835000514984131
    },
    {
      "epoch": 8.114013671875e-05,
      "model_forward_time": 0.11453938484191895,
      "step": 13294
    },
    {
      "epoch": 8.114013671875e-05,
      "step": 13294,
      "training_step_time": 0.3890864849090576
    },
    {
      "epoch": 8.1146240234375e-05,
      "model_forward_time": 0.11422300338745117,
      "step": 13295
    },
    {
      "epoch": 8.1146240234375e-05,
      "step": 13295,
      "training_step_time": 0.3826742172241211
    },
    {
      "epoch": 8.115234375e-05,
      "model_forward_time": 0.11519670486450195,
      "step": 13296
    },
    {
      "epoch": 8.115234375e-05,
      "step": 13296,
      "training_step_time": 0.39689040184020996
    },
    {
      "epoch": 8.1158447265625e-05,
      "model_forward_time": 0.11519336700439453,
      "step": 13297
    },
    {
      "epoch": 8.1158447265625e-05,
      "step": 13297,
      "training_step_time": 0.38739991188049316
    },
    {
      "epoch": 8.116455078125e-05,
      "model_forward_time": 0.11448478698730469,
      "step": 13298
    },
    {
      "epoch": 8.116455078125e-05,
      "step": 13298,
      "training_step_time": 0.45963311195373535
    },
    {
      "epoch": 8.1170654296875e-05,
      "model_forward_time": 0.11534595489501953,
      "step": 13299
    },
    {
      "epoch": 8.1170654296875e-05,
      "step": 13299,
      "training_step_time": 0.7469041347503662
    },
    {
      "epoch": 8.11767578125e-05,
      "grad_norm": 0.14951851963996887,
      "learning_rate": 9.215722883364033e-05,
      "loss": 0.0644,
      "step": 13300
    },
    {
      "epoch": 8.11767578125e-05,
      "model_forward_time": 0.1146388053894043,
      "step": 13300
    },
    {
      "epoch": 8.11767578125e-05,
      "step": 13300,
      "training_step_time": 0.46385765075683594
    },
    {
      "epoch": 8.1182861328125e-05,
      "model_forward_time": 0.11446070671081543,
      "step": 13301
    },
    {
      "epoch": 8.1182861328125e-05,
      "step": 13301,
      "training_step_time": 0.3882486820220947
    },
    {
      "epoch": 8.118896484375e-05,
      "model_forward_time": 0.11411452293395996,
      "step": 13302
    },
    {
      "epoch": 8.118896484375e-05,
      "step": 13302,
      "training_step_time": 0.39060187339782715
    },
    {
      "epoch": 8.1195068359375e-05,
      "model_forward_time": 0.1152946949005127,
      "step": 13303
    },
    {
      "epoch": 8.1195068359375e-05,
      "step": 13303,
      "training_step_time": 0.38956356048583984
    },
    {
      "epoch": 8.1201171875e-05,
      "model_forward_time": 0.11431503295898438,
      "step": 13304
    },
    {
      "epoch": 8.1201171875e-05,
      "step": 13304,
      "training_step_time": 0.42546677589416504
    },
    {
      "epoch": 8.1207275390625e-05,
      "model_forward_time": 0.11476254463195801,
      "step": 13305
    },
    {
      "epoch": 8.1207275390625e-05,
      "step": 13305,
      "training_step_time": 0.6036932468414307
    },
    {
      "epoch": 8.121337890625e-05,
      "model_forward_time": 0.11478662490844727,
      "step": 13306
    },
    {
      "epoch": 8.121337890625e-05,
      "step": 13306,
      "training_step_time": 0.3892807960510254
    },
    {
      "epoch": 8.1219482421875e-05,
      "model_forward_time": 0.11524462699890137,
      "step": 13307
    },
    {
      "epoch": 8.1219482421875e-05,
      "step": 13307,
      "training_step_time": 0.38281965255737305
    },
    {
      "epoch": 8.12255859375e-05,
      "model_forward_time": 0.11457085609436035,
      "step": 13308
    },
    {
      "epoch": 8.12255859375e-05,
      "step": 13308,
      "training_step_time": 0.3895142078399658
    },
    {
      "epoch": 8.1231689453125e-05,
      "model_forward_time": 0.11519193649291992,
      "step": 13309
    },
    {
      "epoch": 8.1231689453125e-05,
      "step": 13309,
      "training_step_time": 0.39212822914123535
    },
    {
      "epoch": 8.123779296875e-05,
      "grad_norm": 0.18907614052295685,
      "learning_rate": 9.214240494639949e-05,
      "loss": 0.0533,
      "step": 13310
    },
    {
      "epoch": 8.123779296875e-05,
      "model_forward_time": 0.11549973487854004,
      "step": 13310
    },
    {
      "epoch": 8.123779296875e-05,
      "step": 13310,
      "training_step_time": 0.3944234848022461
    },
    {
      "epoch": 8.1243896484375e-05,
      "model_forward_time": 0.11522889137268066,
      "step": 13311
    },
    {
      "epoch": 8.1243896484375e-05,
      "step": 13311,
      "training_step_time": 0.8585042953491211
    },
    {
      "epoch": 8.125e-05,
      "model_forward_time": 0.1157989501953125,
      "step": 13312
    },
    {
      "epoch": 8.125e-05,
      "step": 13312,
      "training_step_time": 0.3774375915527344
    },
    {
      "epoch": 8.1256103515625e-05,
      "model_forward_time": 0.11427617073059082,
      "step": 13313
    },
    {
      "epoch": 8.1256103515625e-05,
      "step": 13313,
      "training_step_time": 0.40927577018737793
    },
    {
      "epoch": 8.126220703125e-05,
      "model_forward_time": 0.11464643478393555,
      "step": 13314
    },
    {
      "epoch": 8.126220703125e-05,
      "step": 13314,
      "training_step_time": 0.4097583293914795
    },
    {
      "epoch": 8.1268310546875e-05,
      "model_forward_time": 0.11445951461791992,
      "step": 13315
    },
    {
      "epoch": 8.1268310546875e-05,
      "step": 13315,
      "training_step_time": 0.38730764389038086
    },
    {
      "epoch": 8.12744140625e-05,
      "model_forward_time": 0.11402010917663574,
      "step": 13316
    },
    {
      "epoch": 8.12744140625e-05,
      "step": 13316,
      "training_step_time": 0.4698832035064697
    },
    {
      "epoch": 8.1280517578125e-05,
      "model_forward_time": 0.1147160530090332,
      "step": 13317
    },
    {
      "epoch": 8.1280517578125e-05,
      "step": 13317,
      "training_step_time": 0.6128380298614502
    },
    {
      "epoch": 8.128662109375e-05,
      "model_forward_time": 0.11494278907775879,
      "step": 13318
    },
    {
      "epoch": 8.128662109375e-05,
      "step": 13318,
      "training_step_time": 0.46556615829467773
    },
    {
      "epoch": 8.1292724609375e-05,
      "model_forward_time": 0.11444234848022461,
      "step": 13319
    },
    {
      "epoch": 8.1292724609375e-05,
      "step": 13319,
      "training_step_time": 0.40088725090026855
    },
    {
      "epoch": 8.1298828125e-05,
      "grad_norm": 0.24289411306381226,
      "learning_rate": 9.212756825740873e-05,
      "loss": 0.061,
      "step": 13320
    },
    {
      "epoch": 8.1298828125e-05,
      "model_forward_time": 0.11402368545532227,
      "step": 13320
    },
    {
      "epoch": 8.1298828125e-05,
      "step": 13320,
      "training_step_time": 0.39325428009033203
    },
    {
      "epoch": 8.1304931640625e-05,
      "model_forward_time": 0.11464738845825195,
      "step": 13321
    },
    {
      "epoch": 8.1304931640625e-05,
      "step": 13321,
      "training_step_time": 0.39009690284729004
    },
    {
      "epoch": 8.131103515625e-05,
      "model_forward_time": 0.11483931541442871,
      "step": 13322
    },
    {
      "epoch": 8.131103515625e-05,
      "step": 13322,
      "training_step_time": 0.39222192764282227
    },
    {
      "epoch": 8.1317138671875e-05,
      "model_forward_time": 0.11527180671691895,
      "step": 13323
    },
    {
      "epoch": 8.1317138671875e-05,
      "step": 13323,
      "training_step_time": 0.678760290145874
    },
    {
      "epoch": 8.13232421875e-05,
      "model_forward_time": 0.1145327091217041,
      "step": 13324
    },
    {
      "epoch": 8.13232421875e-05,
      "step": 13324,
      "training_step_time": 0.3874630928039551
    },
    {
      "epoch": 8.1329345703125e-05,
      "model_forward_time": 0.11464047431945801,
      "step": 13325
    },
    {
      "epoch": 8.1329345703125e-05,
      "step": 13325,
      "training_step_time": 0.47567248344421387
    },
    {
      "epoch": 8.133544921875e-05,
      "model_forward_time": 0.11486530303955078,
      "step": 13326
    },
    {
      "epoch": 8.133544921875e-05,
      "step": 13326,
      "training_step_time": 0.45780205726623535
    },
    {
      "epoch": 8.1341552734375e-05,
      "model_forward_time": 0.11500716209411621,
      "step": 13327
    },
    {
      "epoch": 8.1341552734375e-05,
      "step": 13327,
      "training_step_time": 0.44021177291870117
    },
    {
      "epoch": 8.134765625e-05,
      "model_forward_time": 0.11458110809326172,
      "step": 13328
    },
    {
      "epoch": 8.134765625e-05,
      "step": 13328,
      "training_step_time": 0.38377809524536133
    },
    {
      "epoch": 8.1353759765625e-05,
      "model_forward_time": 0.11491751670837402,
      "step": 13329
    },
    {
      "epoch": 8.1353759765625e-05,
      "step": 13329,
      "training_step_time": 0.5820317268371582
    },
    {
      "epoch": 8.135986328125e-05,
      "grad_norm": 0.19061224162578583,
      "learning_rate": 9.211271877117507e-05,
      "loss": 0.0644,
      "step": 13330
    },
    {
      "epoch": 8.135986328125e-05,
      "model_forward_time": 0.11452126502990723,
      "step": 13330
    },
    {
      "epoch": 8.135986328125e-05,
      "step": 13330,
      "training_step_time": 0.4039602279663086
    },
    {
      "epoch": 8.1365966796875e-05,
      "model_forward_time": 0.1150968074798584,
      "step": 13331
    },
    {
      "epoch": 8.1365966796875e-05,
      "step": 13331,
      "training_step_time": 0.41722846031188965
    },
    {
      "epoch": 8.13720703125e-05,
      "model_forward_time": 0.11465954780578613,
      "step": 13332
    },
    {
      "epoch": 8.13720703125e-05,
      "step": 13332,
      "training_step_time": 0.3926103115081787
    },
    {
      "epoch": 8.1378173828125e-05,
      "model_forward_time": 0.11462235450744629,
      "step": 13333
    },
    {
      "epoch": 8.1378173828125e-05,
      "step": 13333,
      "training_step_time": 0.3915085792541504
    },
    {
      "epoch": 8.138427734375e-05,
      "model_forward_time": 0.11480593681335449,
      "step": 13334
    },
    {
      "epoch": 8.138427734375e-05,
      "step": 13334,
      "training_step_time": 0.38956332206726074
    },
    {
      "epoch": 8.1390380859375e-05,
      "model_forward_time": 0.11535000801086426,
      "step": 13335
    },
    {
      "epoch": 8.1390380859375e-05,
      "step": 13335,
      "training_step_time": 0.6920011043548584
    },
    {
      "epoch": 8.1396484375e-05,
      "model_forward_time": 0.11494088172912598,
      "step": 13336
    },
    {
      "epoch": 8.1396484375e-05,
      "step": 13336,
      "training_step_time": 0.4151308536529541
    },
    {
      "epoch": 8.1402587890625e-05,
      "model_forward_time": 0.11417794227600098,
      "step": 13337
    },
    {
      "epoch": 8.1402587890625e-05,
      "step": 13337,
      "training_step_time": 0.39536023139953613
    },
    {
      "epoch": 8.140869140625e-05,
      "model_forward_time": 0.11530351638793945,
      "step": 13338
    },
    {
      "epoch": 8.140869140625e-05,
      "step": 13338,
      "training_step_time": 0.4268062114715576
    },
    {
      "epoch": 8.1414794921875e-05,
      "model_forward_time": 0.11432027816772461,
      "step": 13339
    },
    {
      "epoch": 8.1414794921875e-05,
      "step": 13339,
      "training_step_time": 0.3646419048309326
    },
    {
      "epoch": 8.14208984375e-05,
      "grad_norm": 0.18976399302482605,
      "learning_rate": 9.209785649220935e-05,
      "loss": 0.0595,
      "step": 13340
    },
    {
      "epoch": 8.14208984375e-05,
      "model_forward_time": 0.11428332328796387,
      "step": 13340
    },
    {
      "epoch": 8.14208984375e-05,
      "step": 13340,
      "training_step_time": 0.39218831062316895
    },
    {
      "epoch": 8.1427001953125e-05,
      "model_forward_time": 0.11490106582641602,
      "step": 13341
    },
    {
      "epoch": 8.1427001953125e-05,
      "step": 13341,
      "training_step_time": 0.6927628517150879
    },
    {
      "epoch": 8.143310546875e-05,
      "model_forward_time": 0.11434745788574219,
      "step": 13342
    },
    {
      "epoch": 8.143310546875e-05,
      "step": 13342,
      "training_step_time": 0.38211655616760254
    },
    {
      "epoch": 8.1439208984375e-05,
      "model_forward_time": 0.11472201347351074,
      "step": 13343
    },
    {
      "epoch": 8.1439208984375e-05,
      "step": 13343,
      "training_step_time": 0.3900020122528076
    },
    {
      "epoch": 8.14453125e-05,
      "model_forward_time": 0.11422014236450195,
      "step": 13344
    },
    {
      "epoch": 8.14453125e-05,
      "step": 13344,
      "training_step_time": 0.4809074401855469
    },
    {
      "epoch": 8.1451416015625e-05,
      "model_forward_time": 0.11463499069213867,
      "step": 13345
    },
    {
      "epoch": 8.1451416015625e-05,
      "step": 13345,
      "training_step_time": 0.4221806526184082
    },
    {
      "epoch": 8.145751953125e-05,
      "model_forward_time": 0.1147162914276123,
      "step": 13346
    },
    {
      "epoch": 8.145751953125e-05,
      "step": 13346,
      "training_step_time": 0.38525938987731934
    },
    {
      "epoch": 8.1463623046875e-05,
      "model_forward_time": 0.11538171768188477,
      "step": 13347
    },
    {
      "epoch": 8.1463623046875e-05,
      "step": 13347,
      "training_step_time": 0.5783603191375732
    },
    {
      "epoch": 8.14697265625e-05,
      "model_forward_time": 0.11483931541442871,
      "step": 13348
    },
    {
      "epoch": 8.14697265625e-05,
      "step": 13348,
      "training_step_time": 0.41272735595703125
    },
    {
      "epoch": 8.1475830078125e-05,
      "model_forward_time": 0.11453938484191895,
      "step": 13349
    },
    {
      "epoch": 8.1475830078125e-05,
      "step": 13349,
      "training_step_time": 0.3950660228729248
    },
    {
      "epoch": 8.148193359375e-05,
      "grad_norm": 0.17958477139472961,
      "learning_rate": 9.208298142502636e-05,
      "loss": 0.0566,
      "step": 13350
    },
    {
      "epoch": 8.148193359375e-05,
      "model_forward_time": 0.11484718322753906,
      "step": 13350
    },
    {
      "epoch": 8.148193359375e-05,
      "step": 13350,
      "training_step_time": 0.3930366039276123
    },
    {
      "epoch": 8.1488037109375e-05,
      "model_forward_time": 0.11439895629882812,
      "step": 13351
    },
    {
      "epoch": 8.1488037109375e-05,
      "step": 13351,
      "training_step_time": 0.39496469497680664
    },
    {
      "epoch": 8.1494140625e-05,
      "model_forward_time": 0.11510682106018066,
      "step": 13352
    },
    {
      "epoch": 8.1494140625e-05,
      "step": 13352,
      "training_step_time": 0.4424312114715576
    },
    {
      "epoch": 8.1500244140625e-05,
      "model_forward_time": 0.11507868766784668,
      "step": 13353
    },
    {
      "epoch": 8.1500244140625e-05,
      "step": 13353,
      "training_step_time": 0.6744260787963867
    },
    {
      "epoch": 8.150634765625e-05,
      "model_forward_time": 0.11476612091064453,
      "step": 13354
    },
    {
      "epoch": 8.150634765625e-05,
      "step": 13354,
      "training_step_time": 0.398756742477417
    },
    {
      "epoch": 8.1512451171875e-05,
      "model_forward_time": 0.11439919471740723,
      "step": 13355
    },
    {
      "epoch": 8.1512451171875e-05,
      "step": 13355,
      "training_step_time": 0.4557380676269531
    },
    {
      "epoch": 8.15185546875e-05,
      "model_forward_time": 0.11463737487792969,
      "step": 13356
    },
    {
      "epoch": 8.15185546875e-05,
      "step": 13356,
      "training_step_time": 0.3899087905883789
    },
    {
      "epoch": 8.1524658203125e-05,
      "model_forward_time": 0.11450004577636719,
      "step": 13357
    },
    {
      "epoch": 8.1524658203125e-05,
      "step": 13357,
      "training_step_time": 0.4135735034942627
    },
    {
      "epoch": 8.153076171875e-05,
      "model_forward_time": 0.11482954025268555,
      "step": 13358
    },
    {
      "epoch": 8.153076171875e-05,
      "step": 13358,
      "training_step_time": 0.45377206802368164
    },
    {
      "epoch": 8.1536865234375e-05,
      "model_forward_time": 0.11508631706237793,
      "step": 13359
    },
    {
      "epoch": 8.1536865234375e-05,
      "step": 13359,
      "training_step_time": 0.7272911071777344
    },
    {
      "epoch": 8.154296875e-05,
      "grad_norm": 0.21756909787654877,
      "learning_rate": 9.206809357414474e-05,
      "loss": 0.0575,
      "step": 13360
    },
    {
      "epoch": 8.154296875e-05,
      "model_forward_time": 0.1150972843170166,
      "step": 13360
    },
    {
      "epoch": 8.154296875e-05,
      "step": 13360,
      "training_step_time": 0.41293764114379883
    },
    {
      "epoch": 8.1549072265625e-05,
      "model_forward_time": 0.11431431770324707,
      "step": 13361
    },
    {
      "epoch": 8.1549072265625e-05,
      "step": 13361,
      "training_step_time": 0.3961830139160156
    },
    {
      "epoch": 8.155517578125e-05,
      "model_forward_time": 0.11440277099609375,
      "step": 13362
    },
    {
      "epoch": 8.155517578125e-05,
      "step": 13362,
      "training_step_time": 0.3912670612335205
    },
    {
      "epoch": 8.1561279296875e-05,
      "model_forward_time": 0.11445927619934082,
      "step": 13363
    },
    {
      "epoch": 8.1561279296875e-05,
      "step": 13363,
      "training_step_time": 0.3867626190185547
    },
    {
      "epoch": 8.15673828125e-05,
      "model_forward_time": 0.11466693878173828,
      "step": 13364
    },
    {
      "epoch": 8.15673828125e-05,
      "step": 13364,
      "training_step_time": 0.3812105655670166
    },
    {
      "epoch": 8.1573486328125e-05,
      "model_forward_time": 0.11522984504699707,
      "step": 13365
    },
    {
      "epoch": 8.1573486328125e-05,
      "step": 13365,
      "training_step_time": 0.8120803833007812
    },
    {
      "epoch": 8.157958984375e-05,
      "model_forward_time": 0.11461496353149414,
      "step": 13366
    },
    {
      "epoch": 8.157958984375e-05,
      "step": 13366,
      "training_step_time": 0.3658027648925781
    },
    {
      "epoch": 8.1585693359375e-05,
      "model_forward_time": 0.11465120315551758,
      "step": 13367
    },
    {
      "epoch": 8.1585693359375e-05,
      "step": 13367,
      "training_step_time": 0.4460639953613281
    },
    {
      "epoch": 8.1591796875e-05,
      "model_forward_time": 0.1146085262298584,
      "step": 13368
    },
    {
      "epoch": 8.1591796875e-05,
      "step": 13368,
      "training_step_time": 0.40827488899230957
    },
    {
      "epoch": 8.1597900390625e-05,
      "model_forward_time": 0.11432957649230957,
      "step": 13369
    },
    {
      "epoch": 8.1597900390625e-05,
      "step": 13369,
      "training_step_time": 0.391887903213501
    },
    {
      "epoch": 8.160400390625e-05,
      "grad_norm": 0.16705818474292755,
      "learning_rate": 9.205319294408705e-05,
      "loss": 0.0547,
      "step": 13370
    },
    {
      "epoch": 8.160400390625e-05,
      "model_forward_time": 0.11436176300048828,
      "step": 13370
    },
    {
      "epoch": 8.160400390625e-05,
      "step": 13370,
      "training_step_time": 0.4062013626098633
    },
    {
      "epoch": 8.1610107421875e-05,
      "model_forward_time": 0.11510014533996582,
      "step": 13371
    },
    {
      "epoch": 8.1610107421875e-05,
      "step": 13371,
      "training_step_time": 0.5622711181640625
    },
    {
      "epoch": 8.16162109375e-05,
      "model_forward_time": 0.11437416076660156,
      "step": 13372
    },
    {
      "epoch": 8.16162109375e-05,
      "step": 13372,
      "training_step_time": 0.39427661895751953
    },
    {
      "epoch": 8.1622314453125e-05,
      "model_forward_time": 0.11504983901977539,
      "step": 13373
    },
    {
      "epoch": 8.1622314453125e-05,
      "step": 13373,
      "training_step_time": 0.4086945056915283
    },
    {
      "epoch": 8.162841796875e-05,
      "model_forward_time": 0.11463379859924316,
      "step": 13374
    },
    {
      "epoch": 8.162841796875e-05,
      "step": 13374,
      "training_step_time": 0.38895559310913086
    },
    {
      "epoch": 8.1634521484375e-05,
      "model_forward_time": 0.11510276794433594,
      "step": 13375
    },
    {
      "epoch": 8.1634521484375e-05,
      "step": 13375,
      "training_step_time": 0.3938019275665283
    },
    {
      "epoch": 8.1640625e-05,
      "model_forward_time": 0.1151125431060791,
      "step": 13376
    },
    {
      "epoch": 8.1640625e-05,
      "step": 13376,
      "training_step_time": 0.3859672546386719
    },
    {
      "epoch": 8.1646728515625e-05,
      "model_forward_time": 0.11569070816040039,
      "step": 13377
    },
    {
      "epoch": 8.1646728515625e-05,
      "step": 13377,
      "training_step_time": 0.98240065574646
    },
    {
      "epoch": 8.165283203125e-05,
      "model_forward_time": 0.1146388053894043,
      "step": 13378
    },
    {
      "epoch": 8.165283203125e-05,
      "step": 13378,
      "training_step_time": 0.3804972171783447
    },
    {
      "epoch": 8.1658935546875e-05,
      "model_forward_time": 0.1149590015411377,
      "step": 13379
    },
    {
      "epoch": 8.1658935546875e-05,
      "step": 13379,
      "training_step_time": 0.3928372859954834
    },
    {
      "epoch": 8.16650390625e-05,
      "grad_norm": 0.15603302419185638,
      "learning_rate": 9.20382795393797e-05,
      "loss": 0.0603,
      "step": 13380
    },
    {
      "epoch": 8.16650390625e-05,
      "model_forward_time": 0.1141054630279541,
      "step": 13380
    },
    {
      "epoch": 8.16650390625e-05,
      "step": 13380,
      "training_step_time": 0.4170694351196289
    },
    {
      "epoch": 8.1671142578125e-05,
      "model_forward_time": 0.11398673057556152,
      "step": 13381
    },
    {
      "epoch": 8.1671142578125e-05,
      "step": 13381,
      "training_step_time": 0.4300081729888916
    },
    {
      "epoch": 8.167724609375e-05,
      "model_forward_time": 0.11493134498596191,
      "step": 13382
    },
    {
      "epoch": 8.167724609375e-05,
      "step": 13382,
      "training_step_time": 0.392946720123291
    },
    {
      "epoch": 8.1683349609375e-05,
      "model_forward_time": 0.11553096771240234,
      "step": 13383
    },
    {
      "epoch": 8.1683349609375e-05,
      "step": 13383,
      "training_step_time": 0.7944595813751221
    },
    {
      "epoch": 8.1689453125e-05,
      "model_forward_time": 0.11424660682678223,
      "step": 13384
    },
    {
      "epoch": 8.1689453125e-05,
      "step": 13384,
      "training_step_time": 0.3956315517425537
    },
    {
      "epoch": 8.1695556640625e-05,
      "model_forward_time": 0.1149439811706543,
      "step": 13385
    },
    {
      "epoch": 8.1695556640625e-05,
      "step": 13385,
      "training_step_time": 0.39953112602233887
    },
    {
      "epoch": 8.170166015625e-05,
      "model_forward_time": 0.11440515518188477,
      "step": 13386
    },
    {
      "epoch": 8.170166015625e-05,
      "step": 13386,
      "training_step_time": 0.39711809158325195
    },
    {
      "epoch": 8.1707763671875e-05,
      "model_forward_time": 0.1146235466003418,
      "step": 13387
    },
    {
      "epoch": 8.1707763671875e-05,
      "step": 13387,
      "training_step_time": 0.40123748779296875
    },
    {
      "epoch": 8.17138671875e-05,
      "model_forward_time": 0.11413979530334473,
      "step": 13388
    },
    {
      "epoch": 8.17138671875e-05,
      "step": 13388,
      "training_step_time": 0.37952470779418945
    },
    {
      "epoch": 8.1719970703125e-05,
      "model_forward_time": 0.11546969413757324,
      "step": 13389
    },
    {
      "epoch": 8.1719970703125e-05,
      "step": 13389,
      "training_step_time": 0.6117837429046631
    },
    {
      "epoch": 8.172607421875e-05,
      "grad_norm": 0.17386217415332794,
      "learning_rate": 9.202335336455296e-05,
      "loss": 0.0575,
      "step": 13390
    },
    {
      "epoch": 8.172607421875e-05,
      "model_forward_time": 0.11501383781433105,
      "step": 13390
    },
    {
      "epoch": 8.172607421875e-05,
      "step": 13390,
      "training_step_time": 0.38454723358154297
    },
    {
      "epoch": 8.1732177734375e-05,
      "model_forward_time": 0.11485958099365234,
      "step": 13391
    },
    {
      "epoch": 8.1732177734375e-05,
      "step": 13391,
      "training_step_time": 0.3901054859161377
    },
    {
      "epoch": 8.173828125e-05,
      "model_forward_time": 0.1150352954864502,
      "step": 13392
    },
    {
      "epoch": 8.173828125e-05,
      "step": 13392,
      "training_step_time": 0.48873400688171387
    },
    {
      "epoch": 8.1744384765625e-05,
      "model_forward_time": 0.11451888084411621,
      "step": 13393
    },
    {
      "epoch": 8.1744384765625e-05,
      "step": 13393,
      "training_step_time": 0.36306262016296387
    },
    {
      "epoch": 8.175048828125e-05,
      "model_forward_time": 0.11482620239257812,
      "step": 13394
    },
    {
      "epoch": 8.175048828125e-05,
      "step": 13394,
      "training_step_time": 0.4868459701538086
    },
    {
      "epoch": 8.1756591796875e-05,
      "model_forward_time": 0.11472177505493164,
      "step": 13395
    },
    {
      "epoch": 8.1756591796875e-05,
      "step": 13395,
      "training_step_time": 0.5850241184234619
    },
    {
      "epoch": 8.17626953125e-05,
      "model_forward_time": 0.11457562446594238,
      "step": 13396
    },
    {
      "epoch": 8.17626953125e-05,
      "step": 13396,
      "training_step_time": 0.37944984436035156
    },
    {
      "epoch": 8.1768798828125e-05,
      "model_forward_time": 0.11484718322753906,
      "step": 13397
    },
    {
      "epoch": 8.1768798828125e-05,
      "step": 13397,
      "training_step_time": 0.3836545944213867
    },
    {
      "epoch": 8.177490234375e-05,
      "model_forward_time": 0.11506366729736328,
      "step": 13398
    },
    {
      "epoch": 8.177490234375e-05,
      "step": 13398,
      "training_step_time": 0.43273353576660156
    },
    {
      "epoch": 8.1781005859375e-05,
      "model_forward_time": 0.11437273025512695,
      "step": 13399
    },
    {
      "epoch": 8.1781005859375e-05,
      "step": 13399,
      "training_step_time": 0.41100001335144043
    },
    {
      "epoch": 8.1787109375e-05,
      "grad_norm": 0.12019484490156174,
      "learning_rate": 9.200841442414106e-05,
      "loss": 0.0533,
      "step": 13400
    },
    {
      "epoch": 8.1787109375e-05,
      "model_forward_time": 0.11475586891174316,
      "step": 13400
    },
    {
      "epoch": 8.1787109375e-05,
      "step": 13400,
      "training_step_time": 0.38115882873535156
    },
    {
      "epoch": 8.1793212890625e-05,
      "model_forward_time": 0.11599588394165039,
      "step": 13401
    },
    {
      "epoch": 8.1793212890625e-05,
      "step": 13401,
      "training_step_time": 0.6350886821746826
    },
    {
      "epoch": 8.179931640625e-05,
      "model_forward_time": 0.11454606056213379,
      "step": 13402
    },
    {
      "epoch": 8.179931640625e-05,
      "step": 13402,
      "training_step_time": 0.38419604301452637
    },
    {
      "epoch": 8.1805419921875e-05,
      "model_forward_time": 0.11467957496643066,
      "step": 13403
    },
    {
      "epoch": 8.1805419921875e-05,
      "step": 13403,
      "training_step_time": 0.38338279724121094
    },
    {
      "epoch": 8.18115234375e-05,
      "model_forward_time": 0.11533641815185547,
      "step": 13404
    },
    {
      "epoch": 8.18115234375e-05,
      "step": 13404,
      "training_step_time": 0.3931090831756592
    },
    {
      "epoch": 8.1817626953125e-05,
      "model_forward_time": 0.1148526668548584,
      "step": 13405
    },
    {
      "epoch": 8.1817626953125e-05,
      "step": 13405,
      "training_step_time": 0.3926210403442383
    },
    {
      "epoch": 8.182373046875e-05,
      "model_forward_time": 0.1146388053894043,
      "step": 13406
    },
    {
      "epoch": 8.182373046875e-05,
      "step": 13406,
      "training_step_time": 0.40823912620544434
    },
    {
      "epoch": 8.1829833984375e-05,
      "model_forward_time": 0.11520862579345703,
      "step": 13407
    },
    {
      "epoch": 8.1829833984375e-05,
      "step": 13407,
      "training_step_time": 0.9736976623535156
    },
    {
      "epoch": 8.18359375e-05,
      "model_forward_time": 0.11468005180358887,
      "step": 13408
    },
    {
      "epoch": 8.18359375e-05,
      "step": 13408,
      "training_step_time": 0.5000245571136475
    },
    {
      "epoch": 8.1842041015625e-05,
      "model_forward_time": 0.11380267143249512,
      "step": 13409
    },
    {
      "epoch": 8.1842041015625e-05,
      "step": 13409,
      "training_step_time": 0.37561774253845215
    },
    {
      "epoch": 8.184814453125e-05,
      "grad_norm": 0.16658978164196014,
      "learning_rate": 9.199346272268199e-05,
      "loss": 0.0609,
      "step": 13410
    },
    {
      "epoch": 8.184814453125e-05,
      "model_forward_time": 0.1141207218170166,
      "step": 13410
    },
    {
      "epoch": 8.184814453125e-05,
      "step": 13410,
      "training_step_time": 0.4414658546447754
    },
    {
      "epoch": 8.1854248046875e-05,
      "model_forward_time": 0.1134939193725586,
      "step": 13411
    },
    {
      "epoch": 8.1854248046875e-05,
      "step": 13411,
      "training_step_time": 0.4005730152130127
    },
    {
      "epoch": 8.18603515625e-05,
      "model_forward_time": 0.11428999900817871,
      "step": 13412
    },
    {
      "epoch": 8.18603515625e-05,
      "step": 13412,
      "training_step_time": 0.39971351623535156
    },
    {
      "epoch": 8.1866455078125e-05,
      "model_forward_time": 0.11433005332946777,
      "step": 13413
    },
    {
      "epoch": 8.1866455078125e-05,
      "step": 13413,
      "training_step_time": 0.3925447463989258
    },
    {
      "epoch": 8.187255859375e-05,
      "model_forward_time": 0.11506032943725586,
      "step": 13414
    },
    {
      "epoch": 8.187255859375e-05,
      "step": 13414,
      "training_step_time": 0.39251112937927246
    },
    {
      "epoch": 8.1878662109375e-05,
      "model_forward_time": 0.11560678482055664,
      "step": 13415
    },
    {
      "epoch": 8.1878662109375e-05,
      "step": 13415,
      "training_step_time": 0.4145376682281494
    },
    {
      "epoch": 8.1884765625e-05,
      "model_forward_time": 0.11441755294799805,
      "step": 13416
    },
    {
      "epoch": 8.1884765625e-05,
      "step": 13416,
      "training_step_time": 0.3980579376220703
    },
    {
      "epoch": 8.1890869140625e-05,
      "model_forward_time": 0.11508703231811523,
      "step": 13417
    },
    {
      "epoch": 8.1890869140625e-05,
      "step": 13417,
      "training_step_time": 0.40166735649108887
    },
    {
      "epoch": 8.189697265625e-05,
      "model_forward_time": 0.11416482925415039,
      "step": 13418
    },
    {
      "epoch": 8.189697265625e-05,
      "step": 13418,
      "training_step_time": 0.3886733055114746
    },
    {
      "epoch": 8.1903076171875e-05,
      "model_forward_time": 0.11463570594787598,
      "step": 13419
    },
    {
      "epoch": 8.1903076171875e-05,
      "step": 13419,
      "training_step_time": 0.6679039001464844
    },
    {
      "epoch": 8.19091796875e-05,
      "grad_norm": 0.22856196761131287,
      "learning_rate": 9.197849826471774e-05,
      "loss": 0.0618,
      "step": 13420
    },
    {
      "epoch": 8.19091796875e-05,
      "model_forward_time": 0.11451387405395508,
      "step": 13420
    },
    {
      "epoch": 8.19091796875e-05,
      "step": 13420,
      "training_step_time": 0.3736732006072998
    },
    {
      "epoch": 8.1915283203125e-05,
      "model_forward_time": 0.11470699310302734,
      "step": 13421
    },
    {
      "epoch": 8.1915283203125e-05,
      "step": 13421,
      "training_step_time": 0.457150936126709
    },
    {
      "epoch": 8.192138671875e-05,
      "model_forward_time": 0.11492252349853516,
      "step": 13422
    },
    {
      "epoch": 8.192138671875e-05,
      "step": 13422,
      "training_step_time": 0.4154071807861328
    },
    {
      "epoch": 8.1927490234375e-05,
      "model_forward_time": 0.11483359336853027,
      "step": 13423
    },
    {
      "epoch": 8.1927490234375e-05,
      "step": 13423,
      "training_step_time": 0.40140390396118164
    },
    {
      "epoch": 8.193359375e-05,
      "model_forward_time": 0.11465311050415039,
      "step": 13424
    },
    {
      "epoch": 8.193359375e-05,
      "step": 13424,
      "training_step_time": 0.4684617519378662
    },
    {
      "epoch": 8.1939697265625e-05,
      "model_forward_time": 0.11507487297058105,
      "step": 13425
    },
    {
      "epoch": 8.1939697265625e-05,
      "step": 13425,
      "training_step_time": 0.7872223854064941
    },
    {
      "epoch": 8.194580078125e-05,
      "model_forward_time": 0.11456465721130371,
      "step": 13426
    },
    {
      "epoch": 8.194580078125e-05,
      "step": 13426,
      "training_step_time": 0.38385891914367676
    },
    {
      "epoch": 8.1951904296875e-05,
      "model_forward_time": 0.11504960060119629,
      "step": 13427
    },
    {
      "epoch": 8.1951904296875e-05,
      "step": 13427,
      "training_step_time": 0.3900868892669678
    },
    {
      "epoch": 8.19580078125e-05,
      "model_forward_time": 0.11408472061157227,
      "step": 13428
    },
    {
      "epoch": 8.19580078125e-05,
      "step": 13428,
      "training_step_time": 0.38893747329711914
    },
    {
      "epoch": 8.1964111328125e-05,
      "model_forward_time": 0.11447763442993164,
      "step": 13429
    },
    {
      "epoch": 8.1964111328125e-05,
      "step": 13429,
      "training_step_time": 0.3880012035369873
    },
    {
      "epoch": 8.197021484375e-05,
      "grad_norm": 0.28806397318840027,
      "learning_rate": 9.196352105479409e-05,
      "loss": 0.0628,
      "step": 13430
    },
    {
      "epoch": 8.197021484375e-05,
      "model_forward_time": 0.11430025100708008,
      "step": 13430
    },
    {
      "epoch": 8.197021484375e-05,
      "step": 13430,
      "training_step_time": 0.3872377872467041
    },
    {
      "epoch": 8.1976318359375e-05,
      "model_forward_time": 0.11532163619995117,
      "step": 13431
    },
    {
      "epoch": 8.1976318359375e-05,
      "step": 13431,
      "training_step_time": 0.7652802467346191
    },
    {
      "epoch": 8.1982421875e-05,
      "model_forward_time": 0.11488533020019531,
      "step": 13432
    },
    {
      "epoch": 8.1982421875e-05,
      "step": 13432,
      "training_step_time": 0.38671207427978516
    },
    {
      "epoch": 8.1988525390625e-05,
      "model_forward_time": 0.11486983299255371,
      "step": 13433
    },
    {
      "epoch": 8.1988525390625e-05,
      "step": 13433,
      "training_step_time": 0.41061925888061523
    },
    {
      "epoch": 8.199462890625e-05,
      "model_forward_time": 0.11455273628234863,
      "step": 13434
    },
    {
      "epoch": 8.199462890625e-05,
      "step": 13434,
      "training_step_time": 0.4211606979370117
    },
    {
      "epoch": 8.2000732421875e-05,
      "model_forward_time": 0.11437320709228516,
      "step": 13435
    },
    {
      "epoch": 8.2000732421875e-05,
      "step": 13435,
      "training_step_time": 0.47202253341674805
    },
    {
      "epoch": 8.20068359375e-05,
      "model_forward_time": 0.1143045425415039,
      "step": 13436
    },
    {
      "epoch": 8.20068359375e-05,
      "step": 13436,
      "training_step_time": 0.41939306259155273
    },
    {
      "epoch": 8.2012939453125e-05,
      "model_forward_time": 0.11555695533752441,
      "step": 13437
    },
    {
      "epoch": 8.2012939453125e-05,
      "step": 13437,
      "training_step_time": 0.533240795135498
    },
    {
      "epoch": 8.201904296875e-05,
      "model_forward_time": 0.11497139930725098,
      "step": 13438
    },
    {
      "epoch": 8.201904296875e-05,
      "step": 13438,
      "training_step_time": 0.4097933769226074
    },
    {
      "epoch": 8.2025146484375e-05,
      "model_forward_time": 0.11442184448242188,
      "step": 13439
    },
    {
      "epoch": 8.2025146484375e-05,
      "step": 13439,
      "training_step_time": 0.3929603099822998
    },
    {
      "epoch": 8.203125e-05,
      "grad_norm": 0.2080262154340744,
      "learning_rate": 9.194853109746074e-05,
      "loss": 0.0634,
      "step": 13440
    },
    {
      "epoch": 8.203125e-05,
      "model_forward_time": 0.11493349075317383,
      "step": 13440
    },
    {
      "epoch": 8.203125e-05,
      "step": 13440,
      "training_step_time": 0.40071797370910645
    },
    {
      "epoch": 8.2037353515625e-05,
      "model_forward_time": 0.11544227600097656,
      "step": 13441
    },
    {
      "epoch": 8.2037353515625e-05,
      "step": 13441,
      "training_step_time": 0.3950052261352539
    },
    {
      "epoch": 8.204345703125e-05,
      "model_forward_time": 0.11515355110168457,
      "step": 13442
    },
    {
      "epoch": 8.204345703125e-05,
      "step": 13442,
      "training_step_time": 0.38623738288879395
    },
    {
      "epoch": 8.2049560546875e-05,
      "model_forward_time": 0.11483335494995117,
      "step": 13443
    },
    {
      "epoch": 8.2049560546875e-05,
      "step": 13443,
      "training_step_time": 0.7509822845458984
    },
    {
      "epoch": 8.20556640625e-05,
      "model_forward_time": 0.1142416000366211,
      "step": 13444
    },
    {
      "epoch": 8.20556640625e-05,
      "step": 13444,
      "training_step_time": 0.3850440979003906
    },
    {
      "epoch": 8.2061767578125e-05,
      "model_forward_time": 0.11490774154663086,
      "step": 13445
    },
    {
      "epoch": 8.2061767578125e-05,
      "step": 13445,
      "training_step_time": 0.403240442276001
    },
    {
      "epoch": 8.206787109375e-05,
      "model_forward_time": 0.1145327091217041,
      "step": 13446
    },
    {
      "epoch": 8.206787109375e-05,
      "step": 13446,
      "training_step_time": 0.38849687576293945
    },
    {
      "epoch": 8.2073974609375e-05,
      "model_forward_time": 0.11488223075866699,
      "step": 13447
    },
    {
      "epoch": 8.2073974609375e-05,
      "step": 13447,
      "training_step_time": 0.420623779296875
    },
    {
      "epoch": 8.2080078125e-05,
      "model_forward_time": 0.11470603942871094,
      "step": 13448
    },
    {
      "epoch": 8.2080078125e-05,
      "step": 13448,
      "training_step_time": 0.4632291793823242
    },
    {
      "epoch": 8.2086181640625e-05,
      "model_forward_time": 0.11510276794433594,
      "step": 13449
    },
    {
      "epoch": 8.2086181640625e-05,
      "step": 13449,
      "training_step_time": 0.5426909923553467
    },
    {
      "epoch": 8.209228515625e-05,
      "grad_norm": 0.17326563596725464,
      "learning_rate": 9.193352839727121e-05,
      "loss": 0.0606,
      "step": 13450
    },
    {
      "epoch": 8.209228515625e-05,
      "model_forward_time": 0.11480712890625,
      "step": 13450
    },
    {
      "epoch": 8.209228515625e-05,
      "step": 13450,
      "training_step_time": 0.4498326778411865
    },
    {
      "epoch": 8.2098388671875e-05,
      "model_forward_time": 0.11466407775878906,
      "step": 13451
    },
    {
      "epoch": 8.2098388671875e-05,
      "step": 13451,
      "training_step_time": 0.39046788215637207
    },
    {
      "epoch": 8.21044921875e-05,
      "model_forward_time": 0.11503481864929199,
      "step": 13452
    },
    {
      "epoch": 8.21044921875e-05,
      "step": 13452,
      "training_step_time": 0.41225361824035645
    },
    {
      "epoch": 8.2110595703125e-05,
      "model_forward_time": 0.11505889892578125,
      "step": 13453
    },
    {
      "epoch": 8.2110595703125e-05,
      "step": 13453,
      "training_step_time": 0.391188383102417
    },
    {
      "epoch": 8.211669921875e-05,
      "model_forward_time": 0.1152181625366211,
      "step": 13454
    },
    {
      "epoch": 8.211669921875e-05,
      "step": 13454,
      "training_step_time": 0.411419153213501
    },
    {
      "epoch": 8.2122802734375e-05,
      "model_forward_time": 0.11449861526489258,
      "step": 13455
    },
    {
      "epoch": 8.2122802734375e-05,
      "step": 13455,
      "training_step_time": 0.7613546848297119
    },
    {
      "epoch": 8.212890625e-05,
      "model_forward_time": 0.11401796340942383,
      "step": 13456
    },
    {
      "epoch": 8.212890625e-05,
      "step": 13456,
      "training_step_time": 0.402087926864624
    },
    {
      "epoch": 8.2135009765625e-05,
      "model_forward_time": 0.11427974700927734,
      "step": 13457
    },
    {
      "epoch": 8.2135009765625e-05,
      "step": 13457,
      "training_step_time": 0.38175463676452637
    },
    {
      "epoch": 8.214111328125e-05,
      "model_forward_time": 0.11448359489440918,
      "step": 13458
    },
    {
      "epoch": 8.214111328125e-05,
      "step": 13458,
      "training_step_time": 0.38593292236328125
    },
    {
      "epoch": 8.2147216796875e-05,
      "model_forward_time": 0.11425113677978516,
      "step": 13459
    },
    {
      "epoch": 8.2147216796875e-05,
      "step": 13459,
      "training_step_time": 0.390244722366333
    },
    {
      "epoch": 8.21533203125e-05,
      "grad_norm": 0.18485091626644135,
      "learning_rate": 9.191851295878295e-05,
      "loss": 0.0602,
      "step": 13460
    },
    {
      "epoch": 8.21533203125e-05,
      "model_forward_time": 0.11406993865966797,
      "step": 13460
    },
    {
      "epoch": 8.21533203125e-05,
      "step": 13460,
      "training_step_time": 0.38183045387268066
    },
    {
      "epoch": 8.2159423828125e-05,
      "model_forward_time": 0.11548686027526855,
      "step": 13461
    },
    {
      "epoch": 8.2159423828125e-05,
      "step": 13461,
      "training_step_time": 0.6929991245269775
    },
    {
      "epoch": 8.216552734375e-05,
      "model_forward_time": 0.11457133293151855,
      "step": 13462
    },
    {
      "epoch": 8.216552734375e-05,
      "step": 13462,
      "training_step_time": 0.45632386207580566
    },
    {
      "epoch": 8.2171630859375e-05,
      "model_forward_time": 0.11484217643737793,
      "step": 13463
    },
    {
      "epoch": 8.2171630859375e-05,
      "step": 13463,
      "training_step_time": 0.44304466247558594
    },
    {
      "epoch": 8.2177734375e-05,
      "model_forward_time": 0.11523985862731934,
      "step": 13464
    },
    {
      "epoch": 8.2177734375e-05,
      "step": 13464,
      "training_step_time": 0.42197656631469727
    },
    {
      "epoch": 8.2183837890625e-05,
      "model_forward_time": 0.11494612693786621,
      "step": 13465
    },
    {
      "epoch": 8.2183837890625e-05,
      "step": 13465,
      "training_step_time": 0.44949984550476074
    },
    {
      "epoch": 8.218994140625e-05,
      "model_forward_time": 0.11447525024414062,
      "step": 13466
    },
    {
      "epoch": 8.218994140625e-05,
      "step": 13466,
      "training_step_time": 0.38002610206604004
    },
    {
      "epoch": 8.2196044921875e-05,
      "model_forward_time": 0.11566996574401855,
      "step": 13467
    },
    {
      "epoch": 8.2196044921875e-05,
      "step": 13467,
      "training_step_time": 0.691575288772583
    },
    {
      "epoch": 8.22021484375e-05,
      "model_forward_time": 0.1140739917755127,
      "step": 13468
    },
    {
      "epoch": 8.22021484375e-05,
      "step": 13468,
      "training_step_time": 0.3798086643218994
    },
    {
      "epoch": 8.2208251953125e-05,
      "model_forward_time": 0.1143960952758789,
      "step": 13469
    },
    {
      "epoch": 8.2208251953125e-05,
      "step": 13469,
      "training_step_time": 0.39064502716064453
    },
    {
      "epoch": 8.221435546875e-05,
      "grad_norm": 0.25747203826904297,
      "learning_rate": 9.190348478655724e-05,
      "loss": 0.0615,
      "step": 13470
    },
    {
      "epoch": 8.221435546875e-05,
      "model_forward_time": 0.11588716506958008,
      "step": 13470
    },
    {
      "epoch": 8.221435546875e-05,
      "step": 13470,
      "training_step_time": 0.39366674423217773
    },
    {
      "epoch": 8.2220458984375e-05,
      "model_forward_time": 0.11463356018066406,
      "step": 13471
    },
    {
      "epoch": 8.2220458984375e-05,
      "step": 13471,
      "training_step_time": 0.3969883918762207
    },
    {
      "epoch": 8.22265625e-05,
      "model_forward_time": 0.11489486694335938,
      "step": 13472
    },
    {
      "epoch": 8.22265625e-05,
      "step": 13472,
      "training_step_time": 0.38097715377807617
    },
    {
      "epoch": 8.2232666015625e-05,
      "model_forward_time": 0.11460566520690918,
      "step": 13473
    },
    {
      "epoch": 8.2232666015625e-05,
      "step": 13473,
      "training_step_time": 0.6468997001647949
    },
    {
      "epoch": 8.223876953125e-05,
      "model_forward_time": 0.1151115894317627,
      "step": 13474
    },
    {
      "epoch": 8.223876953125e-05,
      "step": 13474,
      "training_step_time": 0.42083072662353516
    },
    {
      "epoch": 8.2244873046875e-05,
      "model_forward_time": 0.11508536338806152,
      "step": 13475
    },
    {
      "epoch": 8.2244873046875e-05,
      "step": 13475,
      "training_step_time": 0.446331262588501
    },
    {
      "epoch": 8.22509765625e-05,
      "model_forward_time": 0.1147758960723877,
      "step": 13476
    },
    {
      "epoch": 8.22509765625e-05,
      "step": 13476,
      "training_step_time": 0.46958184242248535
    },
    {
      "epoch": 8.2257080078125e-05,
      "model_forward_time": 0.1142892837524414,
      "step": 13477
    },
    {
      "epoch": 8.2257080078125e-05,
      "step": 13477,
      "training_step_time": 0.4616677761077881
    },
    {
      "epoch": 8.226318359375e-05,
      "model_forward_time": 0.11529088020324707,
      "step": 13478
    },
    {
      "epoch": 8.226318359375e-05,
      "step": 13478,
      "training_step_time": 0.4164857864379883
    },
    {
      "epoch": 8.2269287109375e-05,
      "model_forward_time": 0.1145486831665039,
      "step": 13479
    },
    {
      "epoch": 8.2269287109375e-05,
      "step": 13479,
      "training_step_time": 0.38738059997558594
    },
    {
      "epoch": 8.2275390625e-05,
      "grad_norm": 0.17102031409740448,
      "learning_rate": 9.188844388515926e-05,
      "loss": 0.0525,
      "step": 13480
    },
    {
      "epoch": 8.2275390625e-05,
      "model_forward_time": 0.11482739448547363,
      "step": 13480
    },
    {
      "epoch": 8.2275390625e-05,
      "step": 13480,
      "training_step_time": 0.38259363174438477
    },
    {
      "epoch": 8.2281494140625e-05,
      "model_forward_time": 0.11488509178161621,
      "step": 13481
    },
    {
      "epoch": 8.2281494140625e-05,
      "step": 13481,
      "training_step_time": 0.39185190200805664
    },
    {
      "epoch": 8.228759765625e-05,
      "model_forward_time": 0.11536407470703125,
      "step": 13482
    },
    {
      "epoch": 8.228759765625e-05,
      "step": 13482,
      "training_step_time": 0.3869013786315918
    },
    {
      "epoch": 8.2293701171875e-05,
      "model_forward_time": 0.1161346435546875,
      "step": 13483
    },
    {
      "epoch": 8.2293701171875e-05,
      "step": 13483,
      "training_step_time": 0.39165306091308594
    },
    {
      "epoch": 8.22998046875e-05,
      "model_forward_time": 0.11501693725585938,
      "step": 13484
    },
    {
      "epoch": 8.22998046875e-05,
      "step": 13484,
      "training_step_time": 0.3961803913116455
    },
    {
      "epoch": 8.2305908203125e-05,
      "model_forward_time": 0.11497282981872559,
      "step": 13485
    },
    {
      "epoch": 8.2305908203125e-05,
      "step": 13485,
      "training_step_time": 0.9631850719451904
    },
    {
      "epoch": 8.231201171875e-05,
      "model_forward_time": 0.11470460891723633,
      "step": 13486
    },
    {
      "epoch": 8.231201171875e-05,
      "step": 13486,
      "training_step_time": 0.39906954765319824
    },
    {
      "epoch": 8.2318115234375e-05,
      "model_forward_time": 0.11454129219055176,
      "step": 13487
    },
    {
      "epoch": 8.2318115234375e-05,
      "step": 13487,
      "training_step_time": 0.3981356620788574
    },
    {
      "epoch": 8.232421875e-05,
      "model_forward_time": 0.114044189453125,
      "step": 13488
    },
    {
      "epoch": 8.232421875e-05,
      "step": 13488,
      "training_step_time": 0.4091978073120117
    },
    {
      "epoch": 8.2330322265625e-05,
      "model_forward_time": 0.1141057014465332,
      "step": 13489
    },
    {
      "epoch": 8.2330322265625e-05,
      "step": 13489,
      "training_step_time": 0.3679070472717285
    },
    {
      "epoch": 8.233642578125e-05,
      "grad_norm": 0.20086444914340973,
      "learning_rate": 9.187339025915802e-05,
      "loss": 0.0684,
      "step": 13490
    },
    {
      "epoch": 8.233642578125e-05,
      "model_forward_time": 0.11474990844726562,
      "step": 13490
    },
    {
      "epoch": 8.233642578125e-05,
      "step": 13490,
      "training_step_time": 0.44619035720825195
    },
    {
      "epoch": 8.2342529296875e-05,
      "model_forward_time": 0.11503744125366211,
      "step": 13491
    },
    {
      "epoch": 8.2342529296875e-05,
      "step": 13491,
      "training_step_time": 0.48592329025268555
    },
    {
      "epoch": 8.23486328125e-05,
      "model_forward_time": 0.11450791358947754,
      "step": 13492
    },
    {
      "epoch": 8.23486328125e-05,
      "step": 13492,
      "training_step_time": 0.43284130096435547
    },
    {
      "epoch": 8.2354736328125e-05,
      "model_forward_time": 0.11506438255310059,
      "step": 13493
    },
    {
      "epoch": 8.2354736328125e-05,
      "step": 13493,
      "training_step_time": 0.383573055267334
    },
    {
      "epoch": 8.236083984375e-05,
      "model_forward_time": 0.11536026000976562,
      "step": 13494
    },
    {
      "epoch": 8.236083984375e-05,
      "step": 13494,
      "training_step_time": 0.39069700241088867
    },
    {
      "epoch": 8.2366943359375e-05,
      "model_forward_time": 0.11481261253356934,
      "step": 13495
    },
    {
      "epoch": 8.2366943359375e-05,
      "step": 13495,
      "training_step_time": 0.40018200874328613
    },
    {
      "epoch": 8.2373046875e-05,
      "model_forward_time": 0.11458897590637207,
      "step": 13496
    },
    {
      "epoch": 8.2373046875e-05,
      "step": 13496,
      "training_step_time": 0.3967864513397217
    },
    {
      "epoch": 8.2379150390625e-05,
      "model_forward_time": 0.11561250686645508,
      "step": 13497
    },
    {
      "epoch": 8.2379150390625e-05,
      "step": 13497,
      "training_step_time": 0.7723932266235352
    },
    {
      "epoch": 8.238525390625e-05,
      "model_forward_time": 0.11420893669128418,
      "step": 13498
    },
    {
      "epoch": 8.238525390625e-05,
      "step": 13498,
      "training_step_time": 0.3834247589111328
    },
    {
      "epoch": 8.2391357421875e-05,
      "model_forward_time": 0.11454439163208008,
      "step": 13499
    },
    {
      "epoch": 8.2391357421875e-05,
      "step": 13499,
      "training_step_time": 0.3845858573913574
    },
    {
      "epoch": 8.23974609375e-05,
      "grad_norm": 0.12967565655708313,
      "learning_rate": 9.185832391312644e-05,
      "loss": 0.0597,
      "step": 13500
    },
    {
      "epoch": 8.23974609375e-05,
      "model_forward_time": 0.11457347869873047,
      "step": 13500
    },
    {
      "epoch": 8.23974609375e-05,
      "step": 13500,
      "training_step_time": 0.4045264720916748
    },
    {
      "epoch": 8.2403564453125e-05,
      "model_forward_time": 0.1143486499786377,
      "step": 13501
    },
    {
      "epoch": 8.2403564453125e-05,
      "step": 13501,
      "training_step_time": 0.42444372177124023
    },
    {
      "epoch": 8.240966796875e-05,
      "model_forward_time": 0.1140589714050293,
      "step": 13502
    },
    {
      "epoch": 8.240966796875e-05,
      "step": 13502,
      "training_step_time": 0.3958470821380615
    },
    {
      "epoch": 8.2415771484375e-05,
      "model_forward_time": 0.11507630348205566,
      "step": 13503
    },
    {
      "epoch": 8.2415771484375e-05,
      "step": 13503,
      "training_step_time": 0.5606296062469482
    },
    {
      "epoch": 8.2421875e-05,
      "model_forward_time": 0.11424016952514648,
      "step": 13504
    },
    {
      "epoch": 8.2421875e-05,
      "step": 13504,
      "training_step_time": 0.43709683418273926
    },
    {
      "epoch": 8.2427978515625e-05,
      "model_forward_time": 0.11508560180664062,
      "step": 13505
    },
    {
      "epoch": 8.2427978515625e-05,
      "step": 13505,
      "training_step_time": 0.4719808101654053
    },
    {
      "epoch": 8.243408203125e-05,
      "model_forward_time": 0.11421585083007812,
      "step": 13506
    },
    {
      "epoch": 8.243408203125e-05,
      "step": 13506,
      "training_step_time": 0.3869657516479492
    },
    {
      "epoch": 8.2440185546875e-05,
      "model_forward_time": 0.11485934257507324,
      "step": 13507
    },
    {
      "epoch": 8.2440185546875e-05,
      "step": 13507,
      "training_step_time": 0.38930225372314453
    },
    {
      "epoch": 8.24462890625e-05,
      "model_forward_time": 0.11376500129699707,
      "step": 13508
    },
    {
      "epoch": 8.24462890625e-05,
      "step": 13508,
      "training_step_time": 0.38671398162841797
    },
    {
      "epoch": 8.2452392578125e-05,
      "model_forward_time": 0.11516928672790527,
      "step": 13509
    },
    {
      "epoch": 8.2452392578125e-05,
      "step": 13509,
      "training_step_time": 0.7730093002319336
    },
    {
      "epoch": 8.245849609375e-05,
      "grad_norm": 0.18373410403728485,
      "learning_rate": 9.184324485164124e-05,
      "loss": 0.0621,
      "step": 13510
    },
    {
      "epoch": 8.245849609375e-05,
      "model_forward_time": 0.1138143539428711,
      "step": 13510
    },
    {
      "epoch": 8.245849609375e-05,
      "step": 13510,
      "training_step_time": 0.38936376571655273
    },
    {
      "epoch": 8.2464599609375e-05,
      "model_forward_time": 0.1145470142364502,
      "step": 13511
    },
    {
      "epoch": 8.2464599609375e-05,
      "step": 13511,
      "training_step_time": 0.3902125358581543
    },
    {
      "epoch": 8.2470703125e-05,
      "model_forward_time": 0.11480402946472168,
      "step": 13512
    },
    {
      "epoch": 8.2470703125e-05,
      "step": 13512,
      "training_step_time": 0.3908100128173828
    },
    {
      "epoch": 8.2476806640625e-05,
      "model_forward_time": 0.11413407325744629,
      "step": 13513
    },
    {
      "epoch": 8.2476806640625e-05,
      "step": 13513,
      "training_step_time": 0.44001150131225586
    },
    {
      "epoch": 8.248291015625e-05,
      "model_forward_time": 0.11424779891967773,
      "step": 13514
    },
    {
      "epoch": 8.248291015625e-05,
      "step": 13514,
      "training_step_time": 0.432053804397583
    },
    {
      "epoch": 8.2489013671875e-05,
      "model_forward_time": 0.11496543884277344,
      "step": 13515
    },
    {
      "epoch": 8.2489013671875e-05,
      "step": 13515,
      "training_step_time": 0.5350818634033203
    },
    {
      "epoch": 8.24951171875e-05,
      "model_forward_time": 0.1146552562713623,
      "step": 13516
    },
    {
      "epoch": 8.24951171875e-05,
      "step": 13516,
      "training_step_time": 0.3652181625366211
    },
    {
      "epoch": 8.2501220703125e-05,
      "model_forward_time": 0.11439990997314453,
      "step": 13517
    },
    {
      "epoch": 8.2501220703125e-05,
      "step": 13517,
      "training_step_time": 0.4835655689239502
    },
    {
      "epoch": 8.250732421875e-05,
      "model_forward_time": 0.11473679542541504,
      "step": 13518
    },
    {
      "epoch": 8.250732421875e-05,
      "step": 13518,
      "training_step_time": 0.465440034866333
    },
    {
      "epoch": 8.2513427734375e-05,
      "model_forward_time": 0.11427855491638184,
      "step": 13519
    },
    {
      "epoch": 8.2513427734375e-05,
      "step": 13519,
      "training_step_time": 0.41700196266174316
    },
    {
      "epoch": 8.251953125e-05,
      "grad_norm": 0.17217029631137848,
      "learning_rate": 9.182815307928307e-05,
      "loss": 0.0526,
      "step": 13520
    },
    {
      "epoch": 8.251953125e-05,
      "model_forward_time": 0.11507749557495117,
      "step": 13520
    },
    {
      "epoch": 8.251953125e-05,
      "step": 13520,
      "training_step_time": 0.38361549377441406
    },
    {
      "epoch": 8.2525634765625e-05,
      "model_forward_time": 0.11542963981628418,
      "step": 13521
    },
    {
      "epoch": 8.2525634765625e-05,
      "step": 13521,
      "training_step_time": 0.511237382888794
    },
    {
      "epoch": 8.253173828125e-05,
      "model_forward_time": 0.11490678787231445,
      "step": 13522
    },
    {
      "epoch": 8.253173828125e-05,
      "step": 13522,
      "training_step_time": 0.39308953285217285
    },
    {
      "epoch": 8.2537841796875e-05,
      "model_forward_time": 0.1165468692779541,
      "step": 13523
    },
    {
      "epoch": 8.2537841796875e-05,
      "step": 13523,
      "training_step_time": 0.38815760612487793
    },
    {
      "epoch": 8.25439453125e-05,
      "model_forward_time": 0.11517643928527832,
      "step": 13524
    },
    {
      "epoch": 8.25439453125e-05,
      "step": 13524,
      "training_step_time": 0.40358901023864746
    },
    {
      "epoch": 8.2550048828125e-05,
      "model_forward_time": 0.11669516563415527,
      "step": 13525
    },
    {
      "epoch": 8.2550048828125e-05,
      "step": 13525,
      "training_step_time": 0.39903712272644043
    },
    {
      "epoch": 8.255615234375e-05,
      "model_forward_time": 0.11489057540893555,
      "step": 13526
    },
    {
      "epoch": 8.255615234375e-05,
      "step": 13526,
      "training_step_time": 0.39874935150146484
    },
    {
      "epoch": 8.2562255859375e-05,
      "model_forward_time": 0.11524534225463867,
      "step": 13527
    },
    {
      "epoch": 8.2562255859375e-05,
      "step": 13527,
      "training_step_time": 0.9238178730010986
    },
    {
      "epoch": 8.2568359375e-05,
      "model_forward_time": 0.11388564109802246,
      "step": 13528
    },
    {
      "epoch": 8.2568359375e-05,
      "step": 13528,
      "training_step_time": 0.40021705627441406
    },
    {
      "epoch": 8.2574462890625e-05,
      "model_forward_time": 0.1147925853729248,
      "step": 13529
    },
    {
      "epoch": 8.2574462890625e-05,
      "step": 13529,
      "training_step_time": 0.4020678997039795
    },
    {
      "epoch": 8.258056640625e-05,
      "grad_norm": 0.14269573986530304,
      "learning_rate": 9.18130486006364e-05,
      "loss": 0.0561,
      "step": 13530
    },
    {
      "epoch": 8.258056640625e-05,
      "model_forward_time": 0.11516737937927246,
      "step": 13530
    },
    {
      "epoch": 8.258056640625e-05,
      "step": 13530,
      "training_step_time": 0.40786051750183105
    },
    {
      "epoch": 8.2586669921875e-05,
      "model_forward_time": 0.11425995826721191,
      "step": 13531
    },
    {
      "epoch": 8.2586669921875e-05,
      "step": 13531,
      "training_step_time": 0.4456913471221924
    },
    {
      "epoch": 8.25927734375e-05,
      "model_forward_time": 0.1158134937286377,
      "step": 13532
    },
    {
      "epoch": 8.25927734375e-05,
      "step": 13532,
      "training_step_time": 0.3861393928527832
    },
    {
      "epoch": 8.2598876953125e-05,
      "model_forward_time": 0.11480093002319336,
      "step": 13533
    },
    {
      "epoch": 8.2598876953125e-05,
      "step": 13533,
      "training_step_time": 0.669053316116333
    },
    {
      "epoch": 8.260498046875e-05,
      "model_forward_time": 0.11450695991516113,
      "step": 13534
    },
    {
      "epoch": 8.260498046875e-05,
      "step": 13534,
      "training_step_time": 0.3836534023284912
    },
    {
      "epoch": 8.2611083984375e-05,
      "model_forward_time": 0.11404299736022949,
      "step": 13535
    },
    {
      "epoch": 8.2611083984375e-05,
      "step": 13535,
      "training_step_time": 0.38796234130859375
    },
    {
      "epoch": 8.26171875e-05,
      "model_forward_time": 0.11488556861877441,
      "step": 13536
    },
    {
      "epoch": 8.26171875e-05,
      "step": 13536,
      "training_step_time": 0.3929166793823242
    },
    {
      "epoch": 8.2623291015625e-05,
      "model_forward_time": 0.11432242393493652,
      "step": 13537
    },
    {
      "epoch": 8.2623291015625e-05,
      "step": 13537,
      "training_step_time": 0.4016716480255127
    },
    {
      "epoch": 8.262939453125e-05,
      "model_forward_time": 0.11478877067565918,
      "step": 13538
    },
    {
      "epoch": 8.262939453125e-05,
      "step": 13538,
      "training_step_time": 0.4133751392364502
    },
    {
      "epoch": 8.2635498046875e-05,
      "model_forward_time": 0.11469173431396484,
      "step": 13539
    },
    {
      "epoch": 8.2635498046875e-05,
      "step": 13539,
      "training_step_time": 1.0002131462097168
    },
    {
      "epoch": 8.26416015625e-05,
      "grad_norm": 0.1322082132101059,
      "learning_rate": 9.179793142028959e-05,
      "loss": 0.0638,
      "step": 13540
    },
    {
      "epoch": 8.26416015625e-05,
      "model_forward_time": 0.11390948295593262,
      "step": 13540
    },
    {
      "epoch": 8.26416015625e-05,
      "step": 13540,
      "training_step_time": 0.3888890743255615
    },
    {
      "epoch": 8.2647705078125e-05,
      "model_forward_time": 0.11391305923461914,
      "step": 13541
    },
    {
      "epoch": 8.2647705078125e-05,
      "step": 13541,
      "training_step_time": 0.38904714584350586
    },
    {
      "epoch": 8.265380859375e-05,
      "model_forward_time": 0.11386346817016602,
      "step": 13542
    },
    {
      "epoch": 8.265380859375e-05,
      "step": 13542,
      "training_step_time": 0.41998982429504395
    },
    {
      "epoch": 8.2659912109375e-05,
      "model_forward_time": 0.11505246162414551,
      "step": 13543
    },
    {
      "epoch": 8.2659912109375e-05,
      "step": 13543,
      "training_step_time": 0.38911962509155273
    },
    {
      "epoch": 8.2666015625e-05,
      "model_forward_time": 0.11440277099609375,
      "step": 13544
    },
    {
      "epoch": 8.2666015625e-05,
      "step": 13544,
      "training_step_time": 0.47628235816955566
    },
    {
      "epoch": 8.2672119140625e-05,
      "model_forward_time": 0.11531329154968262,
      "step": 13545
    },
    {
      "epoch": 8.2672119140625e-05,
      "step": 13545,
      "training_step_time": 0.7438035011291504
    },
    {
      "epoch": 8.267822265625e-05,
      "model_forward_time": 0.11411833763122559,
      "step": 13546
    },
    {
      "epoch": 8.267822265625e-05,
      "step": 13546,
      "training_step_time": 0.3843808174133301
    },
    {
      "epoch": 8.2684326171875e-05,
      "model_forward_time": 0.11432075500488281,
      "step": 13547
    },
    {
      "epoch": 8.2684326171875e-05,
      "step": 13547,
      "training_step_time": 0.3920109272003174
    },
    {
      "epoch": 8.26904296875e-05,
      "model_forward_time": 0.1139829158782959,
      "step": 13548
    },
    {
      "epoch": 8.26904296875e-05,
      "step": 13548,
      "training_step_time": 0.39257264137268066
    },
    {
      "epoch": 8.2696533203125e-05,
      "model_forward_time": 0.11462783813476562,
      "step": 13549
    },
    {
      "epoch": 8.2696533203125e-05,
      "step": 13549,
      "training_step_time": 0.3867063522338867
    },
    {
      "epoch": 8.270263671875e-05,
      "grad_norm": 0.19892600178718567,
      "learning_rate": 9.17828015428348e-05,
      "loss": 0.0586,
      "step": 13550
    },
    {
      "epoch": 8.270263671875e-05,
      "model_forward_time": 0.11445975303649902,
      "step": 13550
    },
    {
      "epoch": 8.270263671875e-05,
      "step": 13550,
      "training_step_time": 0.40096211433410645
    },
    {
      "epoch": 8.2708740234375e-05,
      "model_forward_time": 0.11580228805541992,
      "step": 13551
    },
    {
      "epoch": 8.2708740234375e-05,
      "step": 13551,
      "training_step_time": 0.6173007488250732
    },
    {
      "epoch": 8.271484375e-05,
      "model_forward_time": 0.1149296760559082,
      "step": 13552
    },
    {
      "epoch": 8.271484375e-05,
      "step": 13552,
      "training_step_time": 0.38977932929992676
    },
    {
      "epoch": 8.2720947265625e-05,
      "model_forward_time": 0.11446356773376465,
      "step": 13553
    },
    {
      "epoch": 8.2720947265625e-05,
      "step": 13553,
      "training_step_time": 0.3941466808319092
    },
    {
      "epoch": 8.272705078125e-05,
      "model_forward_time": 0.11522507667541504,
      "step": 13554
    },
    {
      "epoch": 8.272705078125e-05,
      "step": 13554,
      "training_step_time": 0.39707326889038086
    },
    {
      "epoch": 8.2733154296875e-05,
      "model_forward_time": 0.11490440368652344,
      "step": 13555
    },
    {
      "epoch": 8.2733154296875e-05,
      "step": 13555,
      "training_step_time": 0.3919107913970947
    },
    {
      "epoch": 8.27392578125e-05,
      "model_forward_time": 0.1147913932800293,
      "step": 13556
    },
    {
      "epoch": 8.27392578125e-05,
      "step": 13556,
      "training_step_time": 0.44861507415771484
    },
    {
      "epoch": 8.2745361328125e-05,
      "model_forward_time": 0.11511015892028809,
      "step": 13557
    },
    {
      "epoch": 8.2745361328125e-05,
      "step": 13557,
      "training_step_time": 0.6888329982757568
    },
    {
      "epoch": 8.275146484375e-05,
      "model_forward_time": 0.11448049545288086,
      "step": 13558
    },
    {
      "epoch": 8.275146484375e-05,
      "step": 13558,
      "training_step_time": 0.4684009552001953
    },
    {
      "epoch": 8.2757568359375e-05,
      "model_forward_time": 0.11519432067871094,
      "step": 13559
    },
    {
      "epoch": 8.2757568359375e-05,
      "step": 13559,
      "training_step_time": 0.41170501708984375
    },
    {
      "epoch": 8.2763671875e-05,
      "grad_norm": 0.1311790645122528,
      "learning_rate": 9.176765897286813e-05,
      "loss": 0.0638,
      "step": 13560
    },
    {
      "epoch": 8.2763671875e-05,
      "model_forward_time": 0.11384201049804688,
      "step": 13560
    },
    {
      "epoch": 8.2763671875e-05,
      "step": 13560,
      "training_step_time": 0.39366984367370605
    },
    {
      "epoch": 8.2769775390625e-05,
      "model_forward_time": 0.11450481414794922,
      "step": 13561
    },
    {
      "epoch": 8.2769775390625e-05,
      "step": 13561,
      "training_step_time": 0.39035534858703613
    },
    {
      "epoch": 8.277587890625e-05,
      "model_forward_time": 0.11414361000061035,
      "step": 13562
    },
    {
      "epoch": 8.277587890625e-05,
      "step": 13562,
      "training_step_time": 0.38970088958740234
    },
    {
      "epoch": 8.2781982421875e-05,
      "model_forward_time": 0.11495399475097656,
      "step": 13563
    },
    {
      "epoch": 8.2781982421875e-05,
      "step": 13563,
      "training_step_time": 0.7000014781951904
    },
    {
      "epoch": 8.27880859375e-05,
      "model_forward_time": 0.1154031753540039,
      "step": 13564
    },
    {
      "epoch": 8.27880859375e-05,
      "step": 13564,
      "training_step_time": 0.3822360038757324
    },
    {
      "epoch": 8.2794189453125e-05,
      "model_forward_time": 0.11462044715881348,
      "step": 13565
    },
    {
      "epoch": 8.2794189453125e-05,
      "step": 13565,
      "training_step_time": 0.3869454860687256
    },
    {
      "epoch": 8.280029296875e-05,
      "model_forward_time": 0.11419677734375,
      "step": 13566
    },
    {
      "epoch": 8.280029296875e-05,
      "step": 13566,
      "training_step_time": 0.3891785144805908
    },
    {
      "epoch": 8.2806396484375e-05,
      "model_forward_time": 0.11466646194458008,
      "step": 13567
    },
    {
      "epoch": 8.2806396484375e-05,
      "step": 13567,
      "training_step_time": 0.39020371437072754
    },
    {
      "epoch": 8.28125e-05,
      "model_forward_time": 0.11438345909118652,
      "step": 13568
    },
    {
      "epoch": 8.28125e-05,
      "step": 13568,
      "training_step_time": 0.3849830627441406
    },
    {
      "epoch": 8.2818603515625e-05,
      "model_forward_time": 0.11532449722290039,
      "step": 13569
    },
    {
      "epoch": 8.2818603515625e-05,
      "step": 13569,
      "training_step_time": 0.7287330627441406
    },
    {
      "epoch": 8.282470703125e-05,
      "grad_norm": 0.13810230791568756,
      "learning_rate": 9.175250371498946e-05,
      "loss": 0.0508,
      "step": 13570
    },
    {
      "epoch": 8.282470703125e-05,
      "model_forward_time": 0.1146843433380127,
      "step": 13570
    },
    {
      "epoch": 8.282470703125e-05,
      "step": 13570,
      "training_step_time": 0.3743293285369873
    },
    {
      "epoch": 8.2830810546875e-05,
      "model_forward_time": 0.11449933052062988,
      "step": 13571
    },
    {
      "epoch": 8.2830810546875e-05,
      "step": 13571,
      "training_step_time": 0.4500117301940918
    },
    {
      "epoch": 8.28369140625e-05,
      "model_forward_time": 0.11456513404846191,
      "step": 13572
    },
    {
      "epoch": 8.28369140625e-05,
      "step": 13572,
      "training_step_time": 0.39818763732910156
    },
    {
      "epoch": 8.2843017578125e-05,
      "model_forward_time": 0.1140134334564209,
      "step": 13573
    },
    {
      "epoch": 8.2843017578125e-05,
      "step": 13573,
      "training_step_time": 0.40859460830688477
    },
    {
      "epoch": 8.284912109375e-05,
      "model_forward_time": 0.11439943313598633,
      "step": 13574
    },
    {
      "epoch": 8.284912109375e-05,
      "step": 13574,
      "training_step_time": 0.38402366638183594
    },
    {
      "epoch": 8.2855224609375e-05,
      "model_forward_time": 0.11586904525756836,
      "step": 13575
    },
    {
      "epoch": 8.2855224609375e-05,
      "step": 13575,
      "training_step_time": 0.5294520854949951
    },
    {
      "epoch": 8.2861328125e-05,
      "model_forward_time": 0.11535811424255371,
      "step": 13576
    },
    {
      "epoch": 8.2861328125e-05,
      "step": 13576,
      "training_step_time": 0.39278101921081543
    },
    {
      "epoch": 8.2867431640625e-05,
      "model_forward_time": 0.11475896835327148,
      "step": 13577
    },
    {
      "epoch": 8.2867431640625e-05,
      "step": 13577,
      "training_step_time": 0.4141261577606201
    },
    {
      "epoch": 8.287353515625e-05,
      "model_forward_time": 0.11485433578491211,
      "step": 13578
    },
    {
      "epoch": 8.287353515625e-05,
      "step": 13578,
      "training_step_time": 0.3960847854614258
    },
    {
      "epoch": 8.2879638671875e-05,
      "model_forward_time": 0.11462903022766113,
      "step": 13579
    },
    {
      "epoch": 8.2879638671875e-05,
      "step": 13579,
      "training_step_time": 0.39484596252441406
    },
    {
      "epoch": 8.28857421875e-05,
      "grad_norm": 0.14899680018424988,
      "learning_rate": 9.173733577380258e-05,
      "loss": 0.0607,
      "step": 13580
    },
    {
      "epoch": 8.28857421875e-05,
      "model_forward_time": 0.11437225341796875,
      "step": 13580
    },
    {
      "epoch": 8.28857421875e-05,
      "step": 13580,
      "training_step_time": 0.38306427001953125
    },
    {
      "epoch": 8.2891845703125e-05,
      "model_forward_time": 0.11446595191955566,
      "step": 13581
    },
    {
      "epoch": 8.2891845703125e-05,
      "step": 13581,
      "training_step_time": 0.7697727680206299
    },
    {
      "epoch": 8.289794921875e-05,
      "model_forward_time": 0.11475110054016113,
      "step": 13582
    },
    {
      "epoch": 8.289794921875e-05,
      "step": 13582,
      "training_step_time": 0.38791823387145996
    },
    {
      "epoch": 8.2904052734375e-05,
      "model_forward_time": 0.1142723560333252,
      "step": 13583
    },
    {
      "epoch": 8.2904052734375e-05,
      "step": 13583,
      "training_step_time": 0.4066488742828369
    },
    {
      "epoch": 8.291015625e-05,
      "model_forward_time": 0.11434364318847656,
      "step": 13584
    },
    {
      "epoch": 8.291015625e-05,
      "step": 13584,
      "training_step_time": 0.3649911880493164
    },
    {
      "epoch": 8.2916259765625e-05,
      "model_forward_time": 0.11415624618530273,
      "step": 13585
    },
    {
      "epoch": 8.2916259765625e-05,
      "step": 13585,
      "training_step_time": 0.4546985626220703
    },
    {
      "epoch": 8.292236328125e-05,
      "model_forward_time": 0.11423182487487793,
      "step": 13586
    },
    {
      "epoch": 8.292236328125e-05,
      "step": 13586,
      "training_step_time": 0.4273838996887207
    },
    {
      "epoch": 8.2928466796875e-05,
      "model_forward_time": 0.11527180671691895,
      "step": 13587
    },
    {
      "epoch": 8.2928466796875e-05,
      "step": 13587,
      "training_step_time": 0.49543213844299316
    },
    {
      "epoch": 8.29345703125e-05,
      "model_forward_time": 0.11513805389404297,
      "step": 13588
    },
    {
      "epoch": 8.29345703125e-05,
      "step": 13588,
      "training_step_time": 0.39297962188720703
    },
    {
      "epoch": 8.2940673828125e-05,
      "model_forward_time": 0.11509513854980469,
      "step": 13589
    },
    {
      "epoch": 8.2940673828125e-05,
      "step": 13589,
      "training_step_time": 0.4478635787963867
    },
    {
      "epoch": 8.294677734375e-05,
      "grad_norm": 0.18465489149093628,
      "learning_rate": 9.17221551539151e-05,
      "loss": 0.0563,
      "step": 13590
    },
    {
      "epoch": 8.294677734375e-05,
      "model_forward_time": 0.11436057090759277,
      "step": 13590
    },
    {
      "epoch": 8.294677734375e-05,
      "step": 13590,
      "training_step_time": 0.4221682548522949
    },
    {
      "epoch": 8.2952880859375e-05,
      "model_forward_time": 0.1149439811706543,
      "step": 13591
    },
    {
      "epoch": 8.2952880859375e-05,
      "step": 13591,
      "training_step_time": 0.3963043689727783
    },
    {
      "epoch": 8.2958984375e-05,
      "model_forward_time": 0.11512184143066406,
      "step": 13592
    },
    {
      "epoch": 8.2958984375e-05,
      "step": 13592,
      "training_step_time": 0.39054298400878906
    },
    {
      "epoch": 8.2965087890625e-05,
      "model_forward_time": 0.1148691177368164,
      "step": 13593
    },
    {
      "epoch": 8.2965087890625e-05,
      "step": 13593,
      "training_step_time": 0.7543826103210449
    },
    {
      "epoch": 8.297119140625e-05,
      "model_forward_time": 0.11448454856872559,
      "step": 13594
    },
    {
      "epoch": 8.297119140625e-05,
      "step": 13594,
      "training_step_time": 0.38028454780578613
    },
    {
      "epoch": 8.2977294921875e-05,
      "model_forward_time": 0.11398744583129883,
      "step": 13595
    },
    {
      "epoch": 8.2977294921875e-05,
      "step": 13595,
      "training_step_time": 0.39562416076660156
    },
    {
      "epoch": 8.29833984375e-05,
      "model_forward_time": 0.11492705345153809,
      "step": 13596
    },
    {
      "epoch": 8.29833984375e-05,
      "step": 13596,
      "training_step_time": 0.398681640625
    },
    {
      "epoch": 8.2989501953125e-05,
      "model_forward_time": 0.11457347869873047,
      "step": 13597
    },
    {
      "epoch": 8.2989501953125e-05,
      "step": 13597,
      "training_step_time": 0.412153959274292
    },
    {
      "epoch": 8.299560546875e-05,
      "model_forward_time": 0.11476993560791016,
      "step": 13598
    },
    {
      "epoch": 8.299560546875e-05,
      "step": 13598,
      "training_step_time": 0.39269566535949707
    },
    {
      "epoch": 8.3001708984375e-05,
      "model_forward_time": 0.11515355110168457,
      "step": 13599
    },
    {
      "epoch": 8.3001708984375e-05,
      "step": 13599,
      "training_step_time": 0.9026157855987549
    },
    {
      "epoch": 8.30078125e-05,
      "grad_norm": 0.22749686241149902,
      "learning_rate": 9.17069618599385e-05,
      "loss": 0.0583,
      "step": 13600
    },
    {
      "epoch": 8.30078125e-05,
      "model_forward_time": 0.11463785171508789,
      "step": 13600
    },
    {
      "epoch": 8.30078125e-05,
      "step": 13600,
      "training_step_time": 0.3814840316772461
    },
    {
      "epoch": 8.3013916015625e-05,
      "model_forward_time": 0.11398792266845703,
      "step": 13601
    },
    {
      "epoch": 8.3013916015625e-05,
      "step": 13601,
      "training_step_time": 0.444685697555542
    },
    {
      "epoch": 8.302001953125e-05,
      "model_forward_time": 0.11470651626586914,
      "step": 13602
    },
    {
      "epoch": 8.302001953125e-05,
      "step": 13602,
      "training_step_time": 0.4017961025238037
    },
    {
      "epoch": 8.3026123046875e-05,
      "model_forward_time": 0.11492466926574707,
      "step": 13603
    },
    {
      "epoch": 8.3026123046875e-05,
      "step": 13603,
      "training_step_time": 0.3850679397583008
    },
    {
      "epoch": 8.30322265625e-05,
      "model_forward_time": 0.11422991752624512,
      "step": 13604
    },
    {
      "epoch": 8.30322265625e-05,
      "step": 13604,
      "training_step_time": 0.3921687602996826
    },
    {
      "epoch": 8.3038330078125e-05,
      "model_forward_time": 0.11486434936523438,
      "step": 13605
    },
    {
      "epoch": 8.3038330078125e-05,
      "step": 13605,
      "training_step_time": 0.528914213180542
    },
    {
      "epoch": 8.304443359375e-05,
      "model_forward_time": 0.1148073673248291,
      "step": 13606
    },
    {
      "epoch": 8.304443359375e-05,
      "step": 13606,
      "training_step_time": 0.39984846115112305
    },
    {
      "epoch": 8.3050537109375e-05,
      "model_forward_time": 0.11552619934082031,
      "step": 13607
    },
    {
      "epoch": 8.3050537109375e-05,
      "step": 13607,
      "training_step_time": 0.39562034606933594
    },
    {
      "epoch": 8.3056640625e-05,
      "model_forward_time": 0.1150674819946289,
      "step": 13608
    },
    {
      "epoch": 8.3056640625e-05,
      "step": 13608,
      "training_step_time": 0.3865692615509033
    },
    {
      "epoch": 8.3062744140625e-05,
      "model_forward_time": 0.11485671997070312,
      "step": 13609
    },
    {
      "epoch": 8.3062744140625e-05,
      "step": 13609,
      "training_step_time": 0.4047713279724121
    },
    {
      "epoch": 8.306884765625e-05,
      "grad_norm": 0.16955018043518066,
      "learning_rate": 9.169175589648809e-05,
      "loss": 0.0571,
      "step": 13610
    },
    {
      "epoch": 8.306884765625e-05,
      "model_forward_time": 0.1146540641784668,
      "step": 13610
    },
    {
      "epoch": 8.306884765625e-05,
      "step": 13610,
      "training_step_time": 0.5128655433654785
    },
    {
      "epoch": 8.3074951171875e-05,
      "model_forward_time": 0.11606264114379883,
      "step": 13611
    },
    {
      "epoch": 8.3074951171875e-05,
      "step": 13611,
      "training_step_time": 0.5796115398406982
    },
    {
      "epoch": 8.30810546875e-05,
      "model_forward_time": 0.11477375030517578,
      "step": 13612
    },
    {
      "epoch": 8.30810546875e-05,
      "step": 13612,
      "training_step_time": 0.4817633628845215
    },
    {
      "epoch": 8.3087158203125e-05,
      "model_forward_time": 0.11483407020568848,
      "step": 13613
    },
    {
      "epoch": 8.3087158203125e-05,
      "step": 13613,
      "training_step_time": 0.41576528549194336
    },
    {
      "epoch": 8.309326171875e-05,
      "model_forward_time": 0.11454963684082031,
      "step": 13614
    },
    {
      "epoch": 8.309326171875e-05,
      "step": 13614,
      "training_step_time": 0.4156360626220703
    },
    {
      "epoch": 8.3099365234375e-05,
      "model_forward_time": 0.11500954627990723,
      "step": 13615
    },
    {
      "epoch": 8.3099365234375e-05,
      "step": 13615,
      "training_step_time": 0.4131605625152588
    },
    {
      "epoch": 8.310546875e-05,
      "model_forward_time": 0.11474037170410156,
      "step": 13616
    },
    {
      "epoch": 8.310546875e-05,
      "step": 13616,
      "training_step_time": 0.38697195053100586
    },
    {
      "epoch": 8.3111572265625e-05,
      "model_forward_time": 0.11456608772277832,
      "step": 13617
    },
    {
      "epoch": 8.3111572265625e-05,
      "step": 13617,
      "training_step_time": 0.6602523326873779
    },
    {
      "epoch": 8.311767578125e-05,
      "model_forward_time": 0.11462163925170898,
      "step": 13618
    },
    {
      "epoch": 8.311767578125e-05,
      "step": 13618,
      "training_step_time": 0.3780100345611572
    },
    {
      "epoch": 8.3123779296875e-05,
      "model_forward_time": 0.11428999900817871,
      "step": 13619
    },
    {
      "epoch": 8.3123779296875e-05,
      "step": 13619,
      "training_step_time": 0.39215683937072754
    },
    {
      "epoch": 8.31298828125e-05,
      "grad_norm": 0.2828015387058258,
      "learning_rate": 9.167653726818305e-05,
      "loss": 0.0612,
      "step": 13620
    },
    {
      "epoch": 8.31298828125e-05,
      "model_forward_time": 0.11459970474243164,
      "step": 13620
    },
    {
      "epoch": 8.31298828125e-05,
      "step": 13620,
      "training_step_time": 0.389876127243042
    },
    {
      "epoch": 8.3135986328125e-05,
      "model_forward_time": 0.11494898796081543,
      "step": 13621
    },
    {
      "epoch": 8.3135986328125e-05,
      "step": 13621,
      "training_step_time": 0.3872253894805908
    },
    {
      "epoch": 8.314208984375e-05,
      "model_forward_time": 0.11410641670227051,
      "step": 13622
    },
    {
      "epoch": 8.314208984375e-05,
      "step": 13622,
      "training_step_time": 0.3839435577392578
    },
    {
      "epoch": 8.3148193359375e-05,
      "model_forward_time": 0.11518311500549316,
      "step": 13623
    },
    {
      "epoch": 8.3148193359375e-05,
      "step": 13623,
      "training_step_time": 0.6082866191864014
    },
    {
      "epoch": 8.3154296875e-05,
      "model_forward_time": 0.11441993713378906,
      "step": 13624
    },
    {
      "epoch": 8.3154296875e-05,
      "step": 13624,
      "training_step_time": 0.3953835964202881
    },
    {
      "epoch": 8.3160400390625e-05,
      "model_forward_time": 0.1143646240234375,
      "step": 13625
    },
    {
      "epoch": 8.3160400390625e-05,
      "step": 13625,
      "training_step_time": 0.3917679786682129
    },
    {
      "epoch": 8.316650390625e-05,
      "model_forward_time": 0.11463093757629395,
      "step": 13626
    },
    {
      "epoch": 8.316650390625e-05,
      "step": 13626,
      "training_step_time": 0.45980405807495117
    },
    {
      "epoch": 8.3172607421875e-05,
      "model_forward_time": 0.11430597305297852,
      "step": 13627
    },
    {
      "epoch": 8.3172607421875e-05,
      "step": 13627,
      "training_step_time": 0.41765689849853516
    },
    {
      "epoch": 8.31787109375e-05,
      "model_forward_time": 0.11465835571289062,
      "step": 13628
    },
    {
      "epoch": 8.31787109375e-05,
      "step": 13628,
      "training_step_time": 0.44185853004455566
    },
    {
      "epoch": 8.3184814453125e-05,
      "model_forward_time": 0.11483454704284668,
      "step": 13629
    },
    {
      "epoch": 8.3184814453125e-05,
      "step": 13629,
      "training_step_time": 0.5921766757965088
    },
    {
      "epoch": 8.319091796875e-05,
      "grad_norm": 0.17315898835659027,
      "learning_rate": 9.16613059796464e-05,
      "loss": 0.0638,
      "step": 13630
    },
    {
      "epoch": 8.319091796875e-05,
      "model_forward_time": 0.11480545997619629,
      "step": 13630
    },
    {
      "epoch": 8.319091796875e-05,
      "step": 13630,
      "training_step_time": 0.3949406147003174
    },
    {
      "epoch": 8.3197021484375e-05,
      "model_forward_time": 0.11463451385498047,
      "step": 13631
    },
    {
      "epoch": 8.3197021484375e-05,
      "step": 13631,
      "training_step_time": 0.4006686210632324
    },
    {
      "epoch": 8.3203125e-05,
      "model_forward_time": 0.11450958251953125,
      "step": 13632
    },
    {
      "epoch": 8.3203125e-05,
      "step": 13632,
      "training_step_time": 0.4016444683074951
    },
    {
      "epoch": 8.3209228515625e-05,
      "model_forward_time": 0.11496543884277344,
      "step": 13633
    },
    {
      "epoch": 8.3209228515625e-05,
      "step": 13633,
      "training_step_time": 0.38804078102111816
    },
    {
      "epoch": 8.321533203125e-05,
      "model_forward_time": 0.1152653694152832,
      "step": 13634
    },
    {
      "epoch": 8.321533203125e-05,
      "step": 13634,
      "training_step_time": 0.38384246826171875
    },
    {
      "epoch": 8.3221435546875e-05,
      "model_forward_time": 0.11449551582336426,
      "step": 13635
    },
    {
      "epoch": 8.3221435546875e-05,
      "step": 13635,
      "training_step_time": 0.730828046798706
    },
    {
      "epoch": 8.32275390625e-05,
      "model_forward_time": 0.11484408378601074,
      "step": 13636
    },
    {
      "epoch": 8.32275390625e-05,
      "step": 13636,
      "training_step_time": 0.38837528228759766
    },
    {
      "epoch": 8.3233642578125e-05,
      "model_forward_time": 0.11459684371948242,
      "step": 13637
    },
    {
      "epoch": 8.3233642578125e-05,
      "step": 13637,
      "training_step_time": 0.3985724449157715
    },
    {
      "epoch": 8.323974609375e-05,
      "model_forward_time": 0.11469292640686035,
      "step": 13638
    },
    {
      "epoch": 8.323974609375e-05,
      "step": 13638,
      "training_step_time": 0.4381725788116455
    },
    {
      "epoch": 8.3245849609375e-05,
      "model_forward_time": 0.11514115333557129,
      "step": 13639
    },
    {
      "epoch": 8.3245849609375e-05,
      "step": 13639,
      "training_step_time": 0.4246084690093994
    },
    {
      "epoch": 8.3251953125e-05,
      "grad_norm": 0.1297161728143692,
      "learning_rate": 9.164606203550497e-05,
      "loss": 0.0585,
      "step": 13640
    },
    {
      "epoch": 8.3251953125e-05,
      "model_forward_time": 0.11485552787780762,
      "step": 13640
    },
    {
      "epoch": 8.3251953125e-05,
      "step": 13640,
      "training_step_time": 0.4510335922241211
    },
    {
      "epoch": 8.3258056640625e-05,
      "model_forward_time": 0.11499571800231934,
      "step": 13641
    },
    {
      "epoch": 8.3258056640625e-05,
      "step": 13641,
      "training_step_time": 0.6356735229492188
    },
    {
      "epoch": 8.326416015625e-05,
      "model_forward_time": 0.11466145515441895,
      "step": 13642
    },
    {
      "epoch": 8.326416015625e-05,
      "step": 13642,
      "training_step_time": 0.3825962543487549
    },
    {
      "epoch": 8.3270263671875e-05,
      "model_forward_time": 0.1143491268157959,
      "step": 13643
    },
    {
      "epoch": 8.3270263671875e-05,
      "step": 13643,
      "training_step_time": 0.3813815116882324
    },
    {
      "epoch": 8.32763671875e-05,
      "model_forward_time": 0.11491131782531738,
      "step": 13644
    },
    {
      "epoch": 8.32763671875e-05,
      "step": 13644,
      "training_step_time": 0.38310885429382324
    },
    {
      "epoch": 8.3282470703125e-05,
      "model_forward_time": 0.11620450019836426,
      "step": 13645
    },
    {
      "epoch": 8.3282470703125e-05,
      "step": 13645,
      "training_step_time": 0.38197970390319824
    },
    {
      "epoch": 8.328857421875e-05,
      "model_forward_time": 0.11435842514038086,
      "step": 13646
    },
    {
      "epoch": 8.328857421875e-05,
      "step": 13646,
      "training_step_time": 0.3958606719970703
    },
    {
      "epoch": 8.3294677734375e-05,
      "model_forward_time": 0.11491727828979492,
      "step": 13647
    },
    {
      "epoch": 8.3294677734375e-05,
      "step": 13647,
      "training_step_time": 0.8001019954681396
    },
    {
      "epoch": 8.330078125e-05,
      "model_forward_time": 0.11441898345947266,
      "step": 13648
    },
    {
      "epoch": 8.330078125e-05,
      "step": 13648,
      "training_step_time": 0.39107775688171387
    },
    {
      "epoch": 8.3306884765625e-05,
      "model_forward_time": 0.11447811126708984,
      "step": 13649
    },
    {
      "epoch": 8.3306884765625e-05,
      "step": 13649,
      "training_step_time": 0.37325167655944824
    },
    {
      "epoch": 8.331298828125e-05,
      "grad_norm": 0.13398534059524536,
      "learning_rate": 9.163080544038952e-05,
      "loss": 0.0548,
      "step": 13650
    },
    {
      "epoch": 8.331298828125e-05,
      "model_forward_time": 0.11409640312194824,
      "step": 13650
    },
    {
      "epoch": 8.331298828125e-05,
      "step": 13650,
      "training_step_time": 0.38964390754699707
    },
    {
      "epoch": 8.3319091796875e-05,
      "model_forward_time": 0.11453461647033691,
      "step": 13651
    },
    {
      "epoch": 8.3319091796875e-05,
      "step": 13651,
      "training_step_time": 0.3970158100128174
    },
    {
      "epoch": 8.33251953125e-05,
      "model_forward_time": 0.11437845230102539,
      "step": 13652
    },
    {
      "epoch": 8.33251953125e-05,
      "step": 13652,
      "training_step_time": 0.4257194995880127
    },
    {
      "epoch": 8.3331298828125e-05,
      "model_forward_time": 0.11485862731933594,
      "step": 13653
    },
    {
      "epoch": 8.3331298828125e-05,
      "step": 13653,
      "training_step_time": 0.5975275039672852
    },
    {
      "epoch": 8.333740234375e-05,
      "model_forward_time": 0.11490368843078613,
      "step": 13654
    },
    {
      "epoch": 8.333740234375e-05,
      "step": 13654,
      "training_step_time": 0.403674840927124
    },
    {
      "epoch": 8.3343505859375e-05,
      "model_forward_time": 0.11430025100708008,
      "step": 13655
    },
    {
      "epoch": 8.3343505859375e-05,
      "step": 13655,
      "training_step_time": 0.45615553855895996
    },
    {
      "epoch": 8.3349609375e-05,
      "model_forward_time": 0.11467456817626953,
      "step": 13656
    },
    {
      "epoch": 8.3349609375e-05,
      "step": 13656,
      "training_step_time": 0.3834950923919678
    },
    {
      "epoch": 8.3355712890625e-05,
      "model_forward_time": 0.11489224433898926,
      "step": 13657
    },
    {
      "epoch": 8.3355712890625e-05,
      "step": 13657,
      "training_step_time": 0.3854351043701172
    },
    {
      "epoch": 8.336181640625e-05,
      "model_forward_time": 0.11433863639831543,
      "step": 13658
    },
    {
      "epoch": 8.336181640625e-05,
      "step": 13658,
      "training_step_time": 0.38210415840148926
    },
    {
      "epoch": 8.3367919921875e-05,
      "model_forward_time": 0.11589884757995605,
      "step": 13659
    },
    {
      "epoch": 8.3367919921875e-05,
      "step": 13659,
      "training_step_time": 0.8338050842285156
    },
    {
      "epoch": 8.33740234375e-05,
      "grad_norm": 0.19823579490184784,
      "learning_rate": 9.161553619893457e-05,
      "loss": 0.0588,
      "step": 13660
    },
    {
      "epoch": 8.33740234375e-05,
      "model_forward_time": 0.11435317993164062,
      "step": 13660
    },
    {
      "epoch": 8.33740234375e-05,
      "step": 13660,
      "training_step_time": 0.38913798332214355
    },
    {
      "epoch": 8.3380126953125e-05,
      "model_forward_time": 0.11476492881774902,
      "step": 13661
    },
    {
      "epoch": 8.3380126953125e-05,
      "step": 13661,
      "training_step_time": 0.3740248680114746
    },
    {
      "epoch": 8.338623046875e-05,
      "model_forward_time": 0.11447691917419434,
      "step": 13662
    },
    {
      "epoch": 8.338623046875e-05,
      "step": 13662,
      "training_step_time": 0.3817143440246582
    },
    {
      "epoch": 8.3392333984375e-05,
      "model_forward_time": 0.1149907112121582,
      "step": 13663
    },
    {
      "epoch": 8.3392333984375e-05,
      "step": 13663,
      "training_step_time": 0.38855457305908203
    },
    {
      "epoch": 8.33984375e-05,
      "model_forward_time": 0.11404037475585938,
      "step": 13664
    },
    {
      "epoch": 8.33984375e-05,
      "step": 13664,
      "training_step_time": 0.39963459968566895
    },
    {
      "epoch": 8.3404541015625e-05,
      "model_forward_time": 0.11615371704101562,
      "step": 13665
    },
    {
      "epoch": 8.3404541015625e-05,
      "step": 13665,
      "training_step_time": 0.768521785736084
    },
    {
      "epoch": 8.341064453125e-05,
      "model_forward_time": 0.11508464813232422,
      "step": 13666
    },
    {
      "epoch": 8.341064453125e-05,
      "step": 13666,
      "training_step_time": 0.44136667251586914
    },
    {
      "epoch": 8.3416748046875e-05,
      "model_forward_time": 0.11459493637084961,
      "step": 13667
    },
    {
      "epoch": 8.3416748046875e-05,
      "step": 13667,
      "training_step_time": 0.4742920398712158
    },
    {
      "epoch": 8.34228515625e-05,
      "model_forward_time": 0.11509847640991211,
      "step": 13668
    },
    {
      "epoch": 8.34228515625e-05,
      "step": 13668,
      "training_step_time": 0.48720359802246094
    },
    {
      "epoch": 8.3428955078125e-05,
      "model_forward_time": 0.1142578125,
      "step": 13669
    },
    {
      "epoch": 8.3428955078125e-05,
      "step": 13669,
      "training_step_time": 0.3935420513153076
    },
    {
      "epoch": 8.343505859375e-05,
      "grad_norm": 0.2264798879623413,
      "learning_rate": 9.160025431577851e-05,
      "loss": 0.0527,
      "step": 13670
    },
    {
      "epoch": 8.343505859375e-05,
      "model_forward_time": 0.11416816711425781,
      "step": 13670
    },
    {
      "epoch": 8.343505859375e-05,
      "step": 13670,
      "training_step_time": 0.38767027854919434
    },
    {
      "epoch": 8.3441162109375e-05,
      "model_forward_time": 0.11434674263000488,
      "step": 13671
    },
    {
      "epoch": 8.3441162109375e-05,
      "step": 13671,
      "training_step_time": 0.5376532077789307
    },
    {
      "epoch": 8.3447265625e-05,
      "model_forward_time": 0.11492300033569336,
      "step": 13672
    },
    {
      "epoch": 8.3447265625e-05,
      "step": 13672,
      "training_step_time": 0.39705920219421387
    },
    {
      "epoch": 8.3453369140625e-05,
      "model_forward_time": 0.11406874656677246,
      "step": 13673
    },
    {
      "epoch": 8.3453369140625e-05,
      "step": 13673,
      "training_step_time": 0.39269304275512695
    },
    {
      "epoch": 8.345947265625e-05,
      "model_forward_time": 0.11488103866577148,
      "step": 13674
    },
    {
      "epoch": 8.345947265625e-05,
      "step": 13674,
      "training_step_time": 0.4049804210662842
    },
    {
      "epoch": 8.3465576171875e-05,
      "model_forward_time": 0.11486077308654785,
      "step": 13675
    },
    {
      "epoch": 8.3465576171875e-05,
      "step": 13675,
      "training_step_time": 0.39324474334716797
    },
    {
      "epoch": 8.34716796875e-05,
      "model_forward_time": 0.11482644081115723,
      "step": 13676
    },
    {
      "epoch": 8.34716796875e-05,
      "step": 13676,
      "training_step_time": 0.385634183883667
    },
    {
      "epoch": 8.3477783203125e-05,
      "model_forward_time": 0.11539459228515625,
      "step": 13677
    },
    {
      "epoch": 8.3477783203125e-05,
      "step": 13677,
      "training_step_time": 0.6091573238372803
    },
    {
      "epoch": 8.348388671875e-05,
      "model_forward_time": 0.11537551879882812,
      "step": 13678
    },
    {
      "epoch": 8.348388671875e-05,
      "step": 13678,
      "training_step_time": 0.4213218688964844
    },
    {
      "epoch": 8.3489990234375e-05,
      "model_forward_time": 0.1150200366973877,
      "step": 13679
    },
    {
      "epoch": 8.3489990234375e-05,
      "step": 13679,
      "training_step_time": 0.5204460620880127
    },
    {
      "epoch": 8.349609375e-05,
      "grad_norm": 0.2278164029121399,
      "learning_rate": 9.158495979556358e-05,
      "loss": 0.0586,
      "step": 13680
    },
    {
      "epoch": 8.349609375e-05,
      "model_forward_time": 0.11475062370300293,
      "step": 13680
    },
    {
      "epoch": 8.349609375e-05,
      "step": 13680,
      "training_step_time": 0.4875814914703369
    },
    {
      "epoch": 8.3502197265625e-05,
      "model_forward_time": 0.11470913887023926,
      "step": 13681
    },
    {
      "epoch": 8.3502197265625e-05,
      "step": 13681,
      "training_step_time": 0.44591808319091797
    },
    {
      "epoch": 8.350830078125e-05,
      "model_forward_time": 0.1141519546508789,
      "step": 13682
    },
    {
      "epoch": 8.350830078125e-05,
      "step": 13682,
      "training_step_time": 0.4698140621185303
    },
    {
      "epoch": 8.3514404296875e-05,
      "model_forward_time": 0.11476802825927734,
      "step": 13683
    },
    {
      "epoch": 8.3514404296875e-05,
      "step": 13683,
      "training_step_time": 0.3854241371154785
    },
    {
      "epoch": 8.35205078125e-05,
      "model_forward_time": 0.11464571952819824,
      "step": 13684
    },
    {
      "epoch": 8.35205078125e-05,
      "step": 13684,
      "training_step_time": 0.3903532028198242
    },
    {
      "epoch": 8.3526611328125e-05,
      "model_forward_time": 0.11480331420898438,
      "step": 13685
    },
    {
      "epoch": 8.3526611328125e-05,
      "step": 13685,
      "training_step_time": 0.37775444984436035
    },
    {
      "epoch": 8.353271484375e-05,
      "model_forward_time": 0.1147317886352539,
      "step": 13686
    },
    {
      "epoch": 8.353271484375e-05,
      "step": 13686,
      "training_step_time": 0.3890869617462158
    },
    {
      "epoch": 8.3538818359375e-05,
      "model_forward_time": 0.11588811874389648,
      "step": 13687
    },
    {
      "epoch": 8.3538818359375e-05,
      "step": 13687,
      "training_step_time": 0.3946957588195801
    },
    {
      "epoch": 8.3544921875e-05,
      "model_forward_time": 0.11496710777282715,
      "step": 13688
    },
    {
      "epoch": 8.3544921875e-05,
      "step": 13688,
      "training_step_time": 0.3805582523345947
    },
    {
      "epoch": 8.3551025390625e-05,
      "model_forward_time": 0.11532998085021973,
      "step": 13689
    },
    {
      "epoch": 8.3551025390625e-05,
      "step": 13689,
      "training_step_time": 0.7110412120819092
    },
    {
      "epoch": 8.355712890625e-05,
      "grad_norm": 0.18276576697826385,
      "learning_rate": 9.156965264293586e-05,
      "loss": 0.0562,
      "step": 13690
    },
    {
      "epoch": 8.355712890625e-05,
      "model_forward_time": 0.11510562896728516,
      "step": 13690
    },
    {
      "epoch": 8.355712890625e-05,
      "step": 13690,
      "training_step_time": 0.38399815559387207
    },
    {
      "epoch": 8.3563232421875e-05,
      "model_forward_time": 0.1145780086517334,
      "step": 13691
    },
    {
      "epoch": 8.3563232421875e-05,
      "step": 13691,
      "training_step_time": 0.417097806930542
    },
    {
      "epoch": 8.35693359375e-05,
      "model_forward_time": 0.11433577537536621,
      "step": 13692
    },
    {
      "epoch": 8.35693359375e-05,
      "step": 13692,
      "training_step_time": 0.46097683906555176
    },
    {
      "epoch": 8.3575439453125e-05,
      "model_forward_time": 0.11478662490844727,
      "step": 13693
    },
    {
      "epoch": 8.3575439453125e-05,
      "step": 13693,
      "training_step_time": 0.4916825294494629
    },
    {
      "epoch": 8.358154296875e-05,
      "model_forward_time": 0.11504244804382324,
      "step": 13694
    },
    {
      "epoch": 8.358154296875e-05,
      "step": 13694,
      "training_step_time": 0.42107534408569336
    },
    {
      "epoch": 8.3587646484375e-05,
      "model_forward_time": 0.11490106582641602,
      "step": 13695
    },
    {
      "epoch": 8.3587646484375e-05,
      "step": 13695,
      "training_step_time": 0.494534969329834
    },
    {
      "epoch": 8.359375e-05,
      "model_forward_time": 0.11518740653991699,
      "step": 13696
    },
    {
      "epoch": 8.359375e-05,
      "step": 13696,
      "training_step_time": 0.39435386657714844
    },
    {
      "epoch": 8.3599853515625e-05,
      "model_forward_time": 0.11436891555786133,
      "step": 13697
    },
    {
      "epoch": 8.3599853515625e-05,
      "step": 13697,
      "training_step_time": 0.3891615867614746
    },
    {
      "epoch": 8.360595703125e-05,
      "model_forward_time": 0.11503958702087402,
      "step": 13698
    },
    {
      "epoch": 8.360595703125e-05,
      "step": 13698,
      "training_step_time": 0.40013742446899414
    },
    {
      "epoch": 8.3612060546875e-05,
      "model_forward_time": 0.11652255058288574,
      "step": 13699
    },
    {
      "epoch": 8.3612060546875e-05,
      "step": 13699,
      "training_step_time": 0.40140604972839355
    },
    {
      "epoch": 8.36181640625e-05,
      "grad_norm": 0.1463710218667984,
      "learning_rate": 9.155433286254525e-05,
      "loss": 0.0589,
      "step": 13700
    },
    {
      "epoch": 8.36181640625e-05,
      "model_forward_time": 0.11532902717590332,
      "step": 13700
    },
    {
      "epoch": 8.36181640625e-05,
      "step": 13700,
      "training_step_time": 0.394533634185791
    },
    {
      "epoch": 8.3624267578125e-05,
      "model_forward_time": 0.11535525321960449,
      "step": 13701
    },
    {
      "epoch": 8.3624267578125e-05,
      "step": 13701,
      "training_step_time": 0.6286401748657227
    },
    {
      "epoch": 8.363037109375e-05,
      "model_forward_time": 0.11545181274414062,
      "step": 13702
    },
    {
      "epoch": 8.363037109375e-05,
      "step": 13702,
      "training_step_time": 0.3855006694793701
    },
    {
      "epoch": 8.3636474609375e-05,
      "model_forward_time": 0.1148223876953125,
      "step": 13703
    },
    {
      "epoch": 8.3636474609375e-05,
      "step": 13703,
      "training_step_time": 0.38436412811279297
    },
    {
      "epoch": 8.3642578125e-05,
      "model_forward_time": 0.11464500427246094,
      "step": 13704
    },
    {
      "epoch": 8.3642578125e-05,
      "step": 13704,
      "training_step_time": 0.39544081687927246
    },
    {
      "epoch": 8.3648681640625e-05,
      "model_forward_time": 0.1147911548614502,
      "step": 13705
    },
    {
      "epoch": 8.3648681640625e-05,
      "step": 13705,
      "training_step_time": 0.41844773292541504
    },
    {
      "epoch": 8.365478515625e-05,
      "model_forward_time": 0.11443448066711426,
      "step": 13706
    },
    {
      "epoch": 8.365478515625e-05,
      "step": 13706,
      "training_step_time": 0.44269466400146484
    },
    {
      "epoch": 8.3660888671875e-05,
      "model_forward_time": 0.1144566535949707,
      "step": 13707
    },
    {
      "epoch": 8.3660888671875e-05,
      "step": 13707,
      "training_step_time": 0.9324383735656738
    },
    {
      "epoch": 8.36669921875e-05,
      "model_forward_time": 0.11397981643676758,
      "step": 13708
    },
    {
      "epoch": 8.36669921875e-05,
      "step": 13708,
      "training_step_time": 0.456043004989624
    },
    {
      "epoch": 8.3673095703125e-05,
      "model_forward_time": 0.11466407775878906,
      "step": 13709
    },
    {
      "epoch": 8.3673095703125e-05,
      "step": 13709,
      "training_step_time": 0.4025607109069824
    },
    {
      "epoch": 8.367919921875e-05,
      "grad_norm": 0.19026847183704376,
      "learning_rate": 9.153900045904549e-05,
      "loss": 0.0594,
      "step": 13710
    },
    {
      "epoch": 8.367919921875e-05,
      "model_forward_time": 0.1137702465057373,
      "step": 13710
    },
    {
      "epoch": 8.367919921875e-05,
      "step": 13710,
      "training_step_time": 0.3838067054748535
    },
    {
      "epoch": 8.3685302734375e-05,
      "model_forward_time": 0.11438775062561035,
      "step": 13711
    },
    {
      "epoch": 8.3685302734375e-05,
      "step": 13711,
      "training_step_time": 0.393230676651001
    },
    {
      "epoch": 8.369140625e-05,
      "model_forward_time": 0.11493492126464844,
      "step": 13712
    },
    {
      "epoch": 8.369140625e-05,
      "step": 13712,
      "training_step_time": 0.37968945503234863
    },
    {
      "epoch": 8.3697509765625e-05,
      "model_forward_time": 0.11545634269714355,
      "step": 13713
    },
    {
      "epoch": 8.3697509765625e-05,
      "step": 13713,
      "training_step_time": 0.6438443660736084
    },
    {
      "epoch": 8.370361328125e-05,
      "model_forward_time": 0.11455273628234863,
      "step": 13714
    },
    {
      "epoch": 8.370361328125e-05,
      "step": 13714,
      "training_step_time": 0.3925938606262207
    },
    {
      "epoch": 8.3709716796875e-05,
      "model_forward_time": 0.1147313117980957,
      "step": 13715
    },
    {
      "epoch": 8.3709716796875e-05,
      "step": 13715,
      "training_step_time": 0.3845353126525879
    },
    {
      "epoch": 8.37158203125e-05,
      "model_forward_time": 0.11468219757080078,
      "step": 13716
    },
    {
      "epoch": 8.37158203125e-05,
      "step": 13716,
      "training_step_time": 0.39292097091674805
    },
    {
      "epoch": 8.3721923828125e-05,
      "model_forward_time": 0.11493277549743652,
      "step": 13717
    },
    {
      "epoch": 8.3721923828125e-05,
      "step": 13717,
      "training_step_time": 0.39244604110717773
    },
    {
      "epoch": 8.372802734375e-05,
      "model_forward_time": 0.11455011367797852,
      "step": 13718
    },
    {
      "epoch": 8.372802734375e-05,
      "step": 13718,
      "training_step_time": 0.39314746856689453
    },
    {
      "epoch": 8.3734130859375e-05,
      "model_forward_time": 0.11520648002624512,
      "step": 13719
    },
    {
      "epoch": 8.3734130859375e-05,
      "step": 13719,
      "training_step_time": 0.7028203010559082
    },
    {
      "epoch": 8.3740234375e-05,
      "grad_norm": 0.2612789571285248,
      "learning_rate": 9.152365543709416e-05,
      "loss": 0.052,
      "step": 13720
    },
    {
      "epoch": 8.3740234375e-05,
      "model_forward_time": 0.1142582893371582,
      "step": 13720
    },
    {
      "epoch": 8.3740234375e-05,
      "step": 13720,
      "training_step_time": 0.4434990882873535
    },
    {
      "epoch": 8.3746337890625e-05,
      "model_forward_time": 0.11496734619140625,
      "step": 13721
    },
    {
      "epoch": 8.3746337890625e-05,
      "step": 13721,
      "training_step_time": 0.41521334648132324
    },
    {
      "epoch": 8.375244140625e-05,
      "model_forward_time": 0.114776611328125,
      "step": 13722
    },
    {
      "epoch": 8.375244140625e-05,
      "step": 13722,
      "training_step_time": 0.39508843421936035
    },
    {
      "epoch": 8.3758544921875e-05,
      "model_forward_time": 0.1146998405456543,
      "step": 13723
    },
    {
      "epoch": 8.3758544921875e-05,
      "step": 13723,
      "training_step_time": 0.41332530975341797
    },
    {
      "epoch": 8.37646484375e-05,
      "model_forward_time": 0.11454081535339355,
      "step": 13724
    },
    {
      "epoch": 8.37646484375e-05,
      "step": 13724,
      "training_step_time": 0.38485264778137207
    },
    {
      "epoch": 8.3770751953125e-05,
      "model_forward_time": 0.11552858352661133,
      "step": 13725
    },
    {
      "epoch": 8.3770751953125e-05,
      "step": 13725,
      "training_step_time": 0.6758825778961182
    },
    {
      "epoch": 8.377685546875e-05,
      "model_forward_time": 0.11483168601989746,
      "step": 13726
    },
    {
      "epoch": 8.377685546875e-05,
      "step": 13726,
      "training_step_time": 0.386821985244751
    },
    {
      "epoch": 8.3782958984375e-05,
      "model_forward_time": 0.11501765251159668,
      "step": 13727
    },
    {
      "epoch": 8.3782958984375e-05,
      "step": 13727,
      "training_step_time": 0.38911938667297363
    },
    {
      "epoch": 8.37890625e-05,
      "model_forward_time": 0.11543536186218262,
      "step": 13728
    },
    {
      "epoch": 8.37890625e-05,
      "step": 13728,
      "training_step_time": 0.3934898376464844
    },
    {
      "epoch": 8.3795166015625e-05,
      "model_forward_time": 0.11458897590637207,
      "step": 13729
    },
    {
      "epoch": 8.3795166015625e-05,
      "step": 13729,
      "training_step_time": 0.39057159423828125
    },
    {
      "epoch": 8.380126953125e-05,
      "grad_norm": 0.2892007827758789,
      "learning_rate": 9.150829780135269e-05,
      "loss": 0.0591,
      "step": 13730
    },
    {
      "epoch": 8.380126953125e-05,
      "model_forward_time": 0.11465215682983398,
      "step": 13730
    },
    {
      "epoch": 8.380126953125e-05,
      "step": 13730,
      "training_step_time": 0.4278402328491211
    },
    {
      "epoch": 8.3807373046875e-05,
      "model_forward_time": 0.11537003517150879,
      "step": 13731
    },
    {
      "epoch": 8.3807373046875e-05,
      "step": 13731,
      "training_step_time": 0.7243020534515381
    },
    {
      "epoch": 8.38134765625e-05,
      "model_forward_time": 0.11420249938964844,
      "step": 13732
    },
    {
      "epoch": 8.38134765625e-05,
      "step": 13732,
      "training_step_time": 0.3811824321746826
    },
    {
      "epoch": 8.3819580078125e-05,
      "model_forward_time": 0.11448884010314941,
      "step": 13733
    },
    {
      "epoch": 8.3819580078125e-05,
      "step": 13733,
      "training_step_time": 0.386120080947876
    },
    {
      "epoch": 8.382568359375e-05,
      "model_forward_time": 0.11492037773132324,
      "step": 13734
    },
    {
      "epoch": 8.382568359375e-05,
      "step": 13734,
      "training_step_time": 0.413602352142334
    },
    {
      "epoch": 8.3831787109375e-05,
      "model_forward_time": 0.11435246467590332,
      "step": 13735
    },
    {
      "epoch": 8.3831787109375e-05,
      "step": 13735,
      "training_step_time": 0.4216468334197998
    },
    {
      "epoch": 8.3837890625e-05,
      "model_forward_time": 0.11464476585388184,
      "step": 13736
    },
    {
      "epoch": 8.3837890625e-05,
      "step": 13736,
      "training_step_time": 0.43219423294067383
    },
    {
      "epoch": 8.3843994140625e-05,
      "model_forward_time": 0.11551451683044434,
      "step": 13737
    },
    {
      "epoch": 8.3843994140625e-05,
      "step": 13737,
      "training_step_time": 0.549910306930542
    },
    {
      "epoch": 8.385009765625e-05,
      "model_forward_time": 0.11463451385498047,
      "step": 13738
    },
    {
      "epoch": 8.385009765625e-05,
      "step": 13738,
      "training_step_time": 0.3964569568634033
    },
    {
      "epoch": 8.3856201171875e-05,
      "model_forward_time": 0.1149907112121582,
      "step": 13739
    },
    {
      "epoch": 8.3856201171875e-05,
      "step": 13739,
      "training_step_time": 0.38080477714538574
    },
    {
      "epoch": 8.38623046875e-05,
      "grad_norm": 0.17573033273220062,
      "learning_rate": 9.14929275564863e-05,
      "loss": 0.0545,
      "step": 13740
    },
    {
      "epoch": 8.38623046875e-05,
      "model_forward_time": 0.11455297470092773,
      "step": 13740
    },
    {
      "epoch": 8.38623046875e-05,
      "step": 13740,
      "training_step_time": 0.4022941589355469
    },
    {
      "epoch": 8.3868408203125e-05,
      "model_forward_time": 0.11502480506896973,
      "step": 13741
    },
    {
      "epoch": 8.3868408203125e-05,
      "step": 13741,
      "training_step_time": 0.39375901222229004
    },
    {
      "epoch": 8.387451171875e-05,
      "model_forward_time": 0.11478853225708008,
      "step": 13742
    },
    {
      "epoch": 8.387451171875e-05,
      "step": 13742,
      "training_step_time": 0.39502573013305664
    },
    {
      "epoch": 8.3880615234375e-05,
      "model_forward_time": 0.1156313419342041,
      "step": 13743
    },
    {
      "epoch": 8.3880615234375e-05,
      "step": 13743,
      "training_step_time": 0.8386900424957275
    },
    {
      "epoch": 8.388671875e-05,
      "model_forward_time": 0.11407160758972168,
      "step": 13744
    },
    {
      "epoch": 8.388671875e-05,
      "step": 13744,
      "training_step_time": 0.3859903812408447
    },
    {
      "epoch": 8.3892822265625e-05,
      "model_forward_time": 0.11418581008911133,
      "step": 13745
    },
    {
      "epoch": 8.3892822265625e-05,
      "step": 13745,
      "training_step_time": 0.3809688091278076
    },
    {
      "epoch": 8.389892578125e-05,
      "model_forward_time": 0.11429834365844727,
      "step": 13746
    },
    {
      "epoch": 8.389892578125e-05,
      "step": 13746,
      "training_step_time": 0.3820207118988037
    },
    {
      "epoch": 8.3905029296875e-05,
      "model_forward_time": 0.11472916603088379,
      "step": 13747
    },
    {
      "epoch": 8.3905029296875e-05,
      "step": 13747,
      "training_step_time": 0.37888360023498535
    },
    {
      "epoch": 8.39111328125e-05,
      "model_forward_time": 0.1142277717590332,
      "step": 13748
    },
    {
      "epoch": 8.39111328125e-05,
      "step": 13748,
      "training_step_time": 0.44767045974731445
    },
    {
      "epoch": 8.3917236328125e-05,
      "model_forward_time": 0.11551475524902344,
      "step": 13749
    },
    {
      "epoch": 8.3917236328125e-05,
      "step": 13749,
      "training_step_time": 0.6776299476623535
    },
    {
      "epoch": 8.392333984375e-05,
      "grad_norm": 0.18629677593708038,
      "learning_rate": 9.147754470716408e-05,
      "loss": 0.06,
      "step": 13750
    },
    {
      "epoch": 8.392333984375e-05,
      "model_forward_time": 0.11475610733032227,
      "step": 13750
    },
    {
      "epoch": 8.392333984375e-05,
      "step": 13750,
      "training_step_time": 0.4295227527618408
    },
    {
      "epoch": 8.3929443359375e-05,
      "model_forward_time": 0.1143796443939209,
      "step": 13751
    },
    {
      "epoch": 8.3929443359375e-05,
      "step": 13751,
      "training_step_time": 0.3839139938354492
    },
    {
      "epoch": 8.3935546875e-05,
      "model_forward_time": 0.1143040657043457,
      "step": 13752
    },
    {
      "epoch": 8.3935546875e-05,
      "step": 13752,
      "training_step_time": 0.39241671562194824
    },
    {
      "epoch": 8.3941650390625e-05,
      "model_forward_time": 0.11420249938964844,
      "step": 13753
    },
    {
      "epoch": 8.3941650390625e-05,
      "step": 13753,
      "training_step_time": 0.3808023929595947
    },
    {
      "epoch": 8.394775390625e-05,
      "model_forward_time": 0.11511015892028809,
      "step": 13754
    },
    {
      "epoch": 8.394775390625e-05,
      "step": 13754,
      "training_step_time": 0.392535924911499
    },
    {
      "epoch": 8.3953857421875e-05,
      "model_forward_time": 0.1152501106262207,
      "step": 13755
    },
    {
      "epoch": 8.3953857421875e-05,
      "step": 13755,
      "training_step_time": 0.7408056259155273
    },
    {
      "epoch": 8.39599609375e-05,
      "model_forward_time": 0.11456155776977539,
      "step": 13756
    },
    {
      "epoch": 8.39599609375e-05,
      "step": 13756,
      "training_step_time": 0.3910219669342041
    },
    {
      "epoch": 8.3966064453125e-05,
      "model_forward_time": 0.11406946182250977,
      "step": 13757
    },
    {
      "epoch": 8.3966064453125e-05,
      "step": 13757,
      "training_step_time": 0.39124107360839844
    },
    {
      "epoch": 8.397216796875e-05,
      "model_forward_time": 0.11443185806274414,
      "step": 13758
    },
    {
      "epoch": 8.397216796875e-05,
      "step": 13758,
      "training_step_time": 0.39502668380737305
    },
    {
      "epoch": 8.3978271484375e-05,
      "model_forward_time": 0.11446857452392578,
      "step": 13759
    },
    {
      "epoch": 8.3978271484375e-05,
      "step": 13759,
      "training_step_time": 0.38480257987976074
    },
    {
      "epoch": 8.3984375e-05,
      "grad_norm": 0.14572933316230774,
      "learning_rate": 9.146214925805891e-05,
      "loss": 0.0532,
      "step": 13760
    },
    {
      "epoch": 8.3984375e-05,
      "model_forward_time": 0.11529207229614258,
      "step": 13760
    },
    {
      "epoch": 8.3984375e-05,
      "step": 13760,
      "training_step_time": 0.38337254524230957
    },
    {
      "epoch": 8.3990478515625e-05,
      "model_forward_time": 0.1151571273803711,
      "step": 13761
    },
    {
      "epoch": 8.3990478515625e-05,
      "step": 13761,
      "training_step_time": 0.9327669143676758
    },
    {
      "epoch": 8.399658203125e-05,
      "model_forward_time": 0.1146707534790039,
      "step": 13762
    },
    {
      "epoch": 8.399658203125e-05,
      "step": 13762,
      "training_step_time": 0.47054624557495117
    },
    {
      "epoch": 8.4002685546875e-05,
      "model_forward_time": 0.1145315170288086,
      "step": 13763
    },
    {
      "epoch": 8.4002685546875e-05,
      "step": 13763,
      "training_step_time": 0.43331241607666016
    },
    {
      "epoch": 8.40087890625e-05,
      "model_forward_time": 0.11467790603637695,
      "step": 13764
    },
    {
      "epoch": 8.40087890625e-05,
      "step": 13764,
      "training_step_time": 0.45183396339416504
    },
    {
      "epoch": 8.4014892578125e-05,
      "model_forward_time": 0.11402344703674316,
      "step": 13765
    },
    {
      "epoch": 8.4014892578125e-05,
      "step": 13765,
      "training_step_time": 0.38878774642944336
    },
    {
      "epoch": 8.402099609375e-05,
      "model_forward_time": 0.1141958236694336,
      "step": 13766
    },
    {
      "epoch": 8.402099609375e-05,
      "step": 13766,
      "training_step_time": 0.3795452117919922
    },
    {
      "epoch": 8.4027099609375e-05,
      "model_forward_time": 0.11461758613586426,
      "step": 13767
    },
    {
      "epoch": 8.4027099609375e-05,
      "step": 13767,
      "training_step_time": 0.42977023124694824
    },
    {
      "epoch": 8.4033203125e-05,
      "model_forward_time": 0.11492085456848145,
      "step": 13768
    },
    {
      "epoch": 8.4033203125e-05,
      "step": 13768,
      "training_step_time": 0.3929741382598877
    },
    {
      "epoch": 8.4039306640625e-05,
      "model_forward_time": 0.11488747596740723,
      "step": 13769
    },
    {
      "epoch": 8.4039306640625e-05,
      "step": 13769,
      "training_step_time": 0.39098048210144043
    },
    {
      "epoch": 8.404541015625e-05,
      "grad_norm": 0.23824040591716766,
      "learning_rate": 9.144674121384757e-05,
      "loss": 0.0567,
      "step": 13770
    },
    {
      "epoch": 8.404541015625e-05,
      "model_forward_time": 0.11478781700134277,
      "step": 13770
    },
    {
      "epoch": 8.404541015625e-05,
      "step": 13770,
      "training_step_time": 0.40723562240600586
    },
    {
      "epoch": 8.4051513671875e-05,
      "model_forward_time": 0.11517024040222168,
      "step": 13771
    },
    {
      "epoch": 8.4051513671875e-05,
      "step": 13771,
      "training_step_time": 0.39697837829589844
    },
    {
      "epoch": 8.40576171875e-05,
      "model_forward_time": 0.11508440971374512,
      "step": 13772
    },
    {
      "epoch": 8.40576171875e-05,
      "step": 13772,
      "training_step_time": 0.40700268745422363
    },
    {
      "epoch": 8.4063720703125e-05,
      "model_forward_time": 0.11496186256408691,
      "step": 13773
    },
    {
      "epoch": 8.4063720703125e-05,
      "step": 13773,
      "training_step_time": 0.8099579811096191
    },
    {
      "epoch": 8.406982421875e-05,
      "model_forward_time": 0.11401987075805664,
      "step": 13774
    },
    {
      "epoch": 8.406982421875e-05,
      "step": 13774,
      "training_step_time": 0.4509544372558594
    },
    {
      "epoch": 8.4075927734375e-05,
      "model_forward_time": 0.1134481430053711,
      "step": 13775
    },
    {
      "epoch": 8.4075927734375e-05,
      "step": 13775,
      "training_step_time": 0.4488060474395752
    },
    {
      "epoch": 8.408203125e-05,
      "model_forward_time": 0.11487126350402832,
      "step": 13776
    },
    {
      "epoch": 8.408203125e-05,
      "step": 13776,
      "training_step_time": 0.40126824378967285
    },
    {
      "epoch": 8.4088134765625e-05,
      "model_forward_time": 0.11415457725524902,
      "step": 13777
    },
    {
      "epoch": 8.4088134765625e-05,
      "step": 13777,
      "training_step_time": 0.38691282272338867
    },
    {
      "epoch": 8.409423828125e-05,
      "model_forward_time": 0.1144249439239502,
      "step": 13778
    },
    {
      "epoch": 8.409423828125e-05,
      "step": 13778,
      "training_step_time": 0.4150078296661377
    },
    {
      "epoch": 8.4100341796875e-05,
      "model_forward_time": 0.11414623260498047,
      "step": 13779
    },
    {
      "epoch": 8.4100341796875e-05,
      "step": 13779,
      "training_step_time": 0.6994192600250244
    },
    {
      "epoch": 8.41064453125e-05,
      "grad_norm": 0.174317866563797,
      "learning_rate": 9.143132057921058e-05,
      "loss": 0.0548,
      "step": 13780
    },
    {
      "epoch": 8.41064453125e-05,
      "model_forward_time": 0.11374497413635254,
      "step": 13780
    },
    {
      "epoch": 8.41064453125e-05,
      "step": 13780,
      "training_step_time": 0.3901960849761963
    },
    {
      "epoch": 8.4112548828125e-05,
      "model_forward_time": 0.11463022232055664,
      "step": 13781
    },
    {
      "epoch": 8.4112548828125e-05,
      "step": 13781,
      "training_step_time": 0.38195013999938965
    },
    {
      "epoch": 8.411865234375e-05,
      "model_forward_time": 0.11428046226501465,
      "step": 13782
    },
    {
      "epoch": 8.411865234375e-05,
      "step": 13782,
      "training_step_time": 0.3854665756225586
    },
    {
      "epoch": 8.4124755859375e-05,
      "model_forward_time": 0.11488604545593262,
      "step": 13783
    },
    {
      "epoch": 8.4124755859375e-05,
      "step": 13783,
      "training_step_time": 0.3969578742980957
    },
    {
      "epoch": 8.4130859375e-05,
      "model_forward_time": 0.11464905738830566,
      "step": 13784
    },
    {
      "epoch": 8.4130859375e-05,
      "step": 13784,
      "training_step_time": 0.3885841369628906
    },
    {
      "epoch": 8.4136962890625e-05,
      "model_forward_time": 0.1148679256439209,
      "step": 13785
    },
    {
      "epoch": 8.4136962890625e-05,
      "step": 13785,
      "training_step_time": 0.5679025650024414
    },
    {
      "epoch": 8.414306640625e-05,
      "model_forward_time": 0.11460542678833008,
      "step": 13786
    },
    {
      "epoch": 8.414306640625e-05,
      "step": 13786,
      "training_step_time": 0.3942086696624756
    },
    {
      "epoch": 8.4149169921875e-05,
      "model_forward_time": 0.11603474617004395,
      "step": 13787
    },
    {
      "epoch": 8.4149169921875e-05,
      "step": 13787,
      "training_step_time": 0.3882427215576172
    },
    {
      "epoch": 8.41552734375e-05,
      "model_forward_time": 0.11610174179077148,
      "step": 13788
    },
    {
      "epoch": 8.41552734375e-05,
      "step": 13788,
      "training_step_time": 0.4228837490081787
    },
    {
      "epoch": 8.4161376953125e-05,
      "model_forward_time": 0.11498355865478516,
      "step": 13789
    },
    {
      "epoch": 8.4161376953125e-05,
      "step": 13789,
      "training_step_time": 0.42774367332458496
    },
    {
      "epoch": 8.416748046875e-05,
      "grad_norm": 0.18457511067390442,
      "learning_rate": 9.141588735883232e-05,
      "loss": 0.0574,
      "step": 13790
    },
    {
      "epoch": 8.416748046875e-05,
      "model_forward_time": 0.11682868003845215,
      "step": 13790
    },
    {
      "epoch": 8.416748046875e-05,
      "step": 13790,
      "training_step_time": 0.36959123611450195
    },
    {
      "epoch": 8.4173583984375e-05,
      "model_forward_time": 0.1150960922241211,
      "step": 13791
    },
    {
      "epoch": 8.4173583984375e-05,
      "step": 13791,
      "training_step_time": 0.5338809490203857
    },
    {
      "epoch": 8.41796875e-05,
      "model_forward_time": 0.11491155624389648,
      "step": 13792
    },
    {
      "epoch": 8.41796875e-05,
      "step": 13792,
      "training_step_time": 0.3997499942779541
    },
    {
      "epoch": 8.4185791015625e-05,
      "model_forward_time": 0.11538934707641602,
      "step": 13793
    },
    {
      "epoch": 8.4185791015625e-05,
      "step": 13793,
      "training_step_time": 0.4179813861846924
    },
    {
      "epoch": 8.419189453125e-05,
      "model_forward_time": 0.114654541015625,
      "step": 13794
    },
    {
      "epoch": 8.419189453125e-05,
      "step": 13794,
      "training_step_time": 0.42076683044433594
    },
    {
      "epoch": 8.4197998046875e-05,
      "model_forward_time": 0.11479926109313965,
      "step": 13795
    },
    {
      "epoch": 8.4197998046875e-05,
      "step": 13795,
      "training_step_time": 0.39771342277526855
    },
    {
      "epoch": 8.42041015625e-05,
      "model_forward_time": 0.11472749710083008,
      "step": 13796
    },
    {
      "epoch": 8.42041015625e-05,
      "step": 13796,
      "training_step_time": 0.3911776542663574
    },
    {
      "epoch": 8.4210205078125e-05,
      "model_forward_time": 0.11491870880126953,
      "step": 13797
    },
    {
      "epoch": 8.4210205078125e-05,
      "step": 13797,
      "training_step_time": 0.7542486190795898
    },
    {
      "epoch": 8.421630859375e-05,
      "model_forward_time": 0.11415934562683105,
      "step": 13798
    },
    {
      "epoch": 8.421630859375e-05,
      "step": 13798,
      "training_step_time": 0.3856339454650879
    },
    {
      "epoch": 8.4222412109375e-05,
      "model_forward_time": 0.11402726173400879,
      "step": 13799
    },
    {
      "epoch": 8.4222412109375e-05,
      "step": 13799,
      "training_step_time": 0.3870832920074463
    },
    {
      "epoch": 8.4228515625e-05,
      "grad_norm": 0.23487482964992523,
      "learning_rate": 9.140044155740101e-05,
      "loss": 0.0583,
      "step": 13800
    },
    {
      "epoch": 8.4228515625e-05,
      "model_forward_time": 0.1141812801361084,
      "step": 13800
    },
    {
      "epoch": 8.4228515625e-05,
      "step": 13800,
      "training_step_time": 0.39625024795532227
    },
    {
      "epoch": 8.4234619140625e-05,
      "model_forward_time": 0.11422371864318848,
      "step": 13801
    },
    {
      "epoch": 8.4234619140625e-05,
      "step": 13801,
      "training_step_time": 0.38616943359375
    },
    {
      "epoch": 8.424072265625e-05,
      "model_forward_time": 0.11468768119812012,
      "step": 13802
    },
    {
      "epoch": 8.424072265625e-05,
      "step": 13802,
      "training_step_time": 0.42602014541625977
    },
    {
      "epoch": 8.4246826171875e-05,
      "model_forward_time": 0.1145625114440918,
      "step": 13803
    },
    {
      "epoch": 8.4246826171875e-05,
      "step": 13803,
      "training_step_time": 0.5370924472808838
    },
    {
      "epoch": 8.42529296875e-05,
      "model_forward_time": 0.11603474617004395,
      "step": 13804
    },
    {
      "epoch": 8.42529296875e-05,
      "step": 13804,
      "training_step_time": 0.3800520896911621
    },
    {
      "epoch": 8.4259033203125e-05,
      "model_forward_time": 0.11472153663635254,
      "step": 13805
    },
    {
      "epoch": 8.4259033203125e-05,
      "step": 13805,
      "training_step_time": 0.42078304290771484
    },
    {
      "epoch": 8.426513671875e-05,
      "model_forward_time": 0.11459517478942871,
      "step": 13806
    },
    {
      "epoch": 8.426513671875e-05,
      "step": 13806,
      "training_step_time": 0.4194917678833008
    },
    {
      "epoch": 8.4271240234375e-05,
      "model_forward_time": 0.11447358131408691,
      "step": 13807
    },
    {
      "epoch": 8.4271240234375e-05,
      "step": 13807,
      "training_step_time": 0.46103715896606445
    },
    {
      "epoch": 8.427734375e-05,
      "model_forward_time": 0.11542677879333496,
      "step": 13808
    },
    {
      "epoch": 8.427734375e-05,
      "step": 13808,
      "training_step_time": 0.3839545249938965
    },
    {
      "epoch": 8.4283447265625e-05,
      "model_forward_time": 0.1152031421661377,
      "step": 13809
    },
    {
      "epoch": 8.4283447265625e-05,
      "step": 13809,
      "training_step_time": 0.7413055896759033
    },
    {
      "epoch": 8.428955078125e-05,
      "grad_norm": 0.21589913964271545,
      "learning_rate": 9.138498317960867e-05,
      "loss": 0.0593,
      "step": 13810
    },
    {
      "epoch": 8.428955078125e-05,
      "model_forward_time": 0.11414623260498047,
      "step": 13810
    },
    {
      "epoch": 8.428955078125e-05,
      "step": 13810,
      "training_step_time": 0.3815789222717285
    },
    {
      "epoch": 8.4295654296875e-05,
      "model_forward_time": 0.11415767669677734,
      "step": 13811
    },
    {
      "epoch": 8.4295654296875e-05,
      "step": 13811,
      "training_step_time": 0.3790879249572754
    },
    {
      "epoch": 8.43017578125e-05,
      "model_forward_time": 0.11415839195251465,
      "step": 13812
    },
    {
      "epoch": 8.43017578125e-05,
      "step": 13812,
      "training_step_time": 0.3902146816253662
    },
    {
      "epoch": 8.4307861328125e-05,
      "model_forward_time": 0.1144869327545166,
      "step": 13813
    },
    {
      "epoch": 8.4307861328125e-05,
      "step": 13813,
      "training_step_time": 0.3891136646270752
    },
    {
      "epoch": 8.431396484375e-05,
      "model_forward_time": 0.11460232734680176,
      "step": 13814
    },
    {
      "epoch": 8.431396484375e-05,
      "step": 13814,
      "training_step_time": 0.3988494873046875
    },
    {
      "epoch": 8.4320068359375e-05,
      "model_forward_time": 0.11498188972473145,
      "step": 13815
    },
    {
      "epoch": 8.4320068359375e-05,
      "step": 13815,
      "training_step_time": 0.708451509475708
    },
    {
      "epoch": 8.4326171875e-05,
      "model_forward_time": 0.11443209648132324,
      "step": 13816
    },
    {
      "epoch": 8.4326171875e-05,
      "step": 13816,
      "training_step_time": 0.46590614318847656
    },
    {
      "epoch": 8.4332275390625e-05,
      "model_forward_time": 0.11475920677185059,
      "step": 13817
    },
    {
      "epoch": 8.4332275390625e-05,
      "step": 13817,
      "training_step_time": 0.40949058532714844
    },
    {
      "epoch": 8.433837890625e-05,
      "model_forward_time": 0.11434578895568848,
      "step": 13818
    },
    {
      "epoch": 8.433837890625e-05,
      "step": 13818,
      "training_step_time": 0.43170952796936035
    },
    {
      "epoch": 8.4344482421875e-05,
      "model_forward_time": 0.1140599250793457,
      "step": 13819
    },
    {
      "epoch": 8.4344482421875e-05,
      "step": 13819,
      "training_step_time": 0.4512205123901367
    },
    {
      "epoch": 8.43505859375e-05,
      "grad_norm": 0.2892286777496338,
      "learning_rate": 9.136951223015113e-05,
      "loss": 0.0581,
      "step": 13820
    },
    {
      "epoch": 8.43505859375e-05,
      "model_forward_time": 0.11393070220947266,
      "step": 13820
    },
    {
      "epoch": 8.43505859375e-05,
      "step": 13820,
      "training_step_time": 0.39531421661376953
    },
    {
      "epoch": 8.4356689453125e-05,
      "model_forward_time": 0.11418676376342773,
      "step": 13821
    },
    {
      "epoch": 8.4356689453125e-05,
      "step": 13821,
      "training_step_time": 0.4781310558319092
    },
    {
      "epoch": 8.436279296875e-05,
      "model_forward_time": 0.11420512199401855,
      "step": 13822
    },
    {
      "epoch": 8.436279296875e-05,
      "step": 13822,
      "training_step_time": 0.38391828536987305
    },
    {
      "epoch": 8.4368896484375e-05,
      "model_forward_time": 0.11501646041870117,
      "step": 13823
    },
    {
      "epoch": 8.4368896484375e-05,
      "step": 13823,
      "training_step_time": 0.3892960548400879
    },
    {
      "epoch": 8.4375e-05,
      "model_forward_time": 0.11424422264099121,
      "step": 13824
    },
    {
      "epoch": 8.4375e-05,
      "step": 13824,
      "training_step_time": 0.3969569206237793
    },
    {
      "epoch": 8.4381103515625e-05,
      "model_forward_time": 0.11501026153564453,
      "step": 13825
    },
    {
      "epoch": 8.4381103515625e-05,
      "step": 13825,
      "training_step_time": 0.4044795036315918
    },
    {
      "epoch": 8.438720703125e-05,
      "model_forward_time": 0.11510014533996582,
      "step": 13826
    },
    {
      "epoch": 8.438720703125e-05,
      "step": 13826,
      "training_step_time": 0.3936276435852051
    },
    {
      "epoch": 8.4393310546875e-05,
      "model_forward_time": 0.11465311050415039,
      "step": 13827
    },
    {
      "epoch": 8.4393310546875e-05,
      "step": 13827,
      "training_step_time": 0.9366273880004883
    },
    {
      "epoch": 8.43994140625e-05,
      "model_forward_time": 0.11398696899414062,
      "step": 13828
    },
    {
      "epoch": 8.43994140625e-05,
      "step": 13828,
      "training_step_time": 0.3886699676513672
    },
    {
      "epoch": 8.4405517578125e-05,
      "model_forward_time": 0.1144704818725586,
      "step": 13829
    },
    {
      "epoch": 8.4405517578125e-05,
      "step": 13829,
      "training_step_time": 0.4461488723754883
    },
    {
      "epoch": 8.441162109375e-05,
      "grad_norm": 0.2303529679775238,
      "learning_rate": 9.135402871372808e-05,
      "loss": 0.0544,
      "step": 13830
    },
    {
      "epoch": 8.441162109375e-05,
      "model_forward_time": 0.1141502857208252,
      "step": 13830
    },
    {
      "epoch": 8.441162109375e-05,
      "step": 13830,
      "training_step_time": 0.42695188522338867
    },
    {
      "epoch": 8.4417724609375e-05,
      "model_forward_time": 0.11429429054260254,
      "step": 13831
    },
    {
      "epoch": 8.4417724609375e-05,
      "step": 13831,
      "training_step_time": 0.442122220993042
    },
    {
      "epoch": 8.4423828125e-05,
      "model_forward_time": 0.11462020874023438,
      "step": 13832
    },
    {
      "epoch": 8.4423828125e-05,
      "step": 13832,
      "training_step_time": 0.4369502067565918
    },
    {
      "epoch": 8.4429931640625e-05,
      "model_forward_time": 0.11460351943969727,
      "step": 13833
    },
    {
      "epoch": 8.4429931640625e-05,
      "step": 13833,
      "training_step_time": 0.5293700695037842
    },
    {
      "epoch": 8.443603515625e-05,
      "model_forward_time": 0.11414575576782227,
      "step": 13834
    },
    {
      "epoch": 8.443603515625e-05,
      "step": 13834,
      "training_step_time": 0.3950212001800537
    },
    {
      "epoch": 8.4442138671875e-05,
      "model_forward_time": 0.11463189125061035,
      "step": 13835
    },
    {
      "epoch": 8.4442138671875e-05,
      "step": 13835,
      "training_step_time": 0.3883497714996338
    },
    {
      "epoch": 8.44482421875e-05,
      "model_forward_time": 0.11477351188659668,
      "step": 13836
    },
    {
      "epoch": 8.44482421875e-05,
      "step": 13836,
      "training_step_time": 0.4009735584259033
    },
    {
      "epoch": 8.4454345703125e-05,
      "model_forward_time": 0.11492156982421875,
      "step": 13837
    },
    {
      "epoch": 8.4454345703125e-05,
      "step": 13837,
      "training_step_time": 0.39059972763061523
    },
    {
      "epoch": 8.446044921875e-05,
      "model_forward_time": 0.11474180221557617,
      "step": 13838
    },
    {
      "epoch": 8.446044921875e-05,
      "step": 13838,
      "training_step_time": 0.3896806240081787
    },
    {
      "epoch": 8.4466552734375e-05,
      "model_forward_time": 0.1144719123840332,
      "step": 13839
    },
    {
      "epoch": 8.4466552734375e-05,
      "step": 13839,
      "training_step_time": 0.9028842449188232
    },
    {
      "epoch": 8.447265625e-05,
      "grad_norm": 0.2318999320268631,
      "learning_rate": 9.133853263504302e-05,
      "loss": 0.0552,
      "step": 13840
    },
    {
      "epoch": 8.447265625e-05,
      "model_forward_time": 0.11449956893920898,
      "step": 13840
    },
    {
      "epoch": 8.447265625e-05,
      "step": 13840,
      "training_step_time": 0.3905766010284424
    },
    {
      "epoch": 8.4478759765625e-05,
      "model_forward_time": 0.11444473266601562,
      "step": 13841
    },
    {
      "epoch": 8.4478759765625e-05,
      "step": 13841,
      "training_step_time": 0.38976502418518066
    },
    {
      "epoch": 8.448486328125e-05,
      "model_forward_time": 0.11410260200500488,
      "step": 13842
    },
    {
      "epoch": 8.448486328125e-05,
      "step": 13842,
      "training_step_time": 0.4030904769897461
    },
    {
      "epoch": 8.4490966796875e-05,
      "model_forward_time": 0.11430120468139648,
      "step": 13843
    },
    {
      "epoch": 8.4490966796875e-05,
      "step": 13843,
      "training_step_time": 0.4151194095611572
    },
    {
      "epoch": 8.44970703125e-05,
      "model_forward_time": 0.11486029624938965,
      "step": 13844
    },
    {
      "epoch": 8.44970703125e-05,
      "step": 13844,
      "training_step_time": 0.393413782119751
    },
    {
      "epoch": 8.4503173828125e-05,
      "model_forward_time": 0.1153864860534668,
      "step": 13845
    },
    {
      "epoch": 8.4503173828125e-05,
      "step": 13845,
      "training_step_time": 0.6615617275238037
    },
    {
      "epoch": 8.450927734375e-05,
      "model_forward_time": 0.1147158145904541,
      "step": 13846
    },
    {
      "epoch": 8.450927734375e-05,
      "step": 13846,
      "training_step_time": 0.47586560249328613
    },
    {
      "epoch": 8.4515380859375e-05,
      "model_forward_time": 0.1146240234375,
      "step": 13847
    },
    {
      "epoch": 8.4515380859375e-05,
      "step": 13847,
      "training_step_time": 0.38563966751098633
    },
    {
      "epoch": 8.4521484375e-05,
      "model_forward_time": 0.11401796340942383,
      "step": 13848
    },
    {
      "epoch": 8.4521484375e-05,
      "step": 13848,
      "training_step_time": 0.38948678970336914
    },
    {
      "epoch": 8.4527587890625e-05,
      "model_forward_time": 0.11462807655334473,
      "step": 13849
    },
    {
      "epoch": 8.4527587890625e-05,
      "step": 13849,
      "training_step_time": 0.394756555557251
    },
    {
      "epoch": 8.453369140625e-05,
      "grad_norm": 0.17883920669555664,
      "learning_rate": 9.132302399880321e-05,
      "loss": 0.0516,
      "step": 13850
    },
    {
      "epoch": 8.453369140625e-05,
      "model_forward_time": 0.11397171020507812,
      "step": 13850
    },
    {
      "epoch": 8.453369140625e-05,
      "step": 13850,
      "training_step_time": 0.3871278762817383
    },
    {
      "epoch": 8.4539794921875e-05,
      "model_forward_time": 0.11456704139709473,
      "step": 13851
    },
    {
      "epoch": 8.4539794921875e-05,
      "step": 13851,
      "training_step_time": 0.6808843612670898
    },
    {
      "epoch": 8.45458984375e-05,
      "model_forward_time": 0.11448788642883301,
      "step": 13852
    },
    {
      "epoch": 8.45458984375e-05,
      "step": 13852,
      "training_step_time": 0.38492488861083984
    },
    {
      "epoch": 8.4552001953125e-05,
      "model_forward_time": 0.11469817161560059,
      "step": 13853
    },
    {
      "epoch": 8.4552001953125e-05,
      "step": 13853,
      "training_step_time": 0.4083530902862549
    },
    {
      "epoch": 8.455810546875e-05,
      "model_forward_time": 0.11525750160217285,
      "step": 13854
    },
    {
      "epoch": 8.455810546875e-05,
      "step": 13854,
      "training_step_time": 0.41011667251586914
    },
    {
      "epoch": 8.4564208984375e-05,
      "model_forward_time": 0.11449503898620605,
      "step": 13855
    },
    {
      "epoch": 8.4564208984375e-05,
      "step": 13855,
      "training_step_time": 0.4097309112548828
    },
    {
      "epoch": 8.45703125e-05,
      "model_forward_time": 0.11515259742736816,
      "step": 13856
    },
    {
      "epoch": 8.45703125e-05,
      "step": 13856,
      "training_step_time": 0.4628865718841553
    },
    {
      "epoch": 8.4576416015625e-05,
      "model_forward_time": 0.11497783660888672,
      "step": 13857
    },
    {
      "epoch": 8.4576416015625e-05,
      "step": 13857,
      "training_step_time": 0.6373505592346191
    },
    {
      "epoch": 8.458251953125e-05,
      "model_forward_time": 0.11392045021057129,
      "step": 13858
    },
    {
      "epoch": 8.458251953125e-05,
      "step": 13858,
      "training_step_time": 0.4550042152404785
    },
    {
      "epoch": 8.4588623046875e-05,
      "model_forward_time": 0.1143808364868164,
      "step": 13859
    },
    {
      "epoch": 8.4588623046875e-05,
      "step": 13859,
      "training_step_time": 0.4364795684814453
    },
    {
      "epoch": 8.45947265625e-05,
      "grad_norm": 0.1782657653093338,
      "learning_rate": 9.130750280971978e-05,
      "loss": 0.0565,
      "step": 13860
    },
    {
      "epoch": 8.45947265625e-05,
      "model_forward_time": 0.11388516426086426,
      "step": 13860
    },
    {
      "epoch": 8.45947265625e-05,
      "step": 13860,
      "training_step_time": 0.388690710067749
    },
    {
      "epoch": 8.4600830078125e-05,
      "model_forward_time": 0.11460113525390625,
      "step": 13861
    },
    {
      "epoch": 8.4600830078125e-05,
      "step": 13861,
      "training_step_time": 0.39775705337524414
    },
    {
      "epoch": 8.460693359375e-05,
      "model_forward_time": 0.1138465404510498,
      "step": 13862
    },
    {
      "epoch": 8.460693359375e-05,
      "step": 13862,
      "training_step_time": 0.39614295959472656
    },
    {
      "epoch": 8.4613037109375e-05,
      "model_forward_time": 0.11452984809875488,
      "step": 13863
    },
    {
      "epoch": 8.4613037109375e-05,
      "step": 13863,
      "training_step_time": 0.5323889255523682
    },
    {
      "epoch": 8.4619140625e-05,
      "model_forward_time": 0.11490941047668457,
      "step": 13864
    },
    {
      "epoch": 8.4619140625e-05,
      "step": 13864,
      "training_step_time": 0.3910810947418213
    },
    {
      "epoch": 8.4625244140625e-05,
      "model_forward_time": 0.11523723602294922,
      "step": 13865
    },
    {
      "epoch": 8.4625244140625e-05,
      "step": 13865,
      "training_step_time": 0.396559476852417
    },
    {
      "epoch": 8.463134765625e-05,
      "model_forward_time": 0.11432576179504395,
      "step": 13866
    },
    {
      "epoch": 8.463134765625e-05,
      "step": 13866,
      "training_step_time": 0.39811229705810547
    },
    {
      "epoch": 8.4637451171875e-05,
      "model_forward_time": 0.11559009552001953,
      "step": 13867
    },
    {
      "epoch": 8.4637451171875e-05,
      "step": 13867,
      "training_step_time": 0.39755868911743164
    },
    {
      "epoch": 8.46435546875e-05,
      "model_forward_time": 0.11440467834472656,
      "step": 13868
    },
    {
      "epoch": 8.46435546875e-05,
      "step": 13868,
      "training_step_time": 0.385805606842041
    },
    {
      "epoch": 8.4649658203125e-05,
      "model_forward_time": 0.11505293846130371,
      "step": 13869
    },
    {
      "epoch": 8.4649658203125e-05,
      "step": 13869,
      "training_step_time": 0.7869088649749756
    },
    {
      "epoch": 8.465576171875e-05,
      "grad_norm": 0.17774106562137604,
      "learning_rate": 9.129196907250765e-05,
      "loss": 0.0498,
      "step": 13870
    },
    {
      "epoch": 8.465576171875e-05,
      "model_forward_time": 0.11429452896118164,
      "step": 13870
    },
    {
      "epoch": 8.465576171875e-05,
      "step": 13870,
      "training_step_time": 0.4359879493713379
    },
    {
      "epoch": 8.4661865234375e-05,
      "model_forward_time": 0.11455488204956055,
      "step": 13871
    },
    {
      "epoch": 8.4661865234375e-05,
      "step": 13871,
      "training_step_time": 0.48987245559692383
    },
    {
      "epoch": 8.466796875e-05,
      "model_forward_time": 0.11522102355957031,
      "step": 13872
    },
    {
      "epoch": 8.466796875e-05,
      "step": 13872,
      "training_step_time": 0.38909101486206055
    },
    {
      "epoch": 8.4674072265625e-05,
      "model_forward_time": 0.11445069313049316,
      "step": 13873
    },
    {
      "epoch": 8.4674072265625e-05,
      "step": 13873,
      "training_step_time": 0.42987871170043945
    },
    {
      "epoch": 8.468017578125e-05,
      "model_forward_time": 0.11431622505187988,
      "step": 13874
    },
    {
      "epoch": 8.468017578125e-05,
      "step": 13874,
      "training_step_time": 0.3858060836791992
    },
    {
      "epoch": 8.4686279296875e-05,
      "model_forward_time": 0.11464214324951172,
      "step": 13875
    },
    {
      "epoch": 8.4686279296875e-05,
      "step": 13875,
      "training_step_time": 0.5741608142852783
    },
    {
      "epoch": 8.46923828125e-05,
      "model_forward_time": 0.11473751068115234,
      "step": 13876
    },
    {
      "epoch": 8.46923828125e-05,
      "step": 13876,
      "training_step_time": 0.3845062255859375
    },
    {
      "epoch": 8.4698486328125e-05,
      "model_forward_time": 0.1147911548614502,
      "step": 13877
    },
    {
      "epoch": 8.4698486328125e-05,
      "step": 13877,
      "training_step_time": 0.38443470001220703
    },
    {
      "epoch": 8.470458984375e-05,
      "model_forward_time": 0.1148688793182373,
      "step": 13878
    },
    {
      "epoch": 8.470458984375e-05,
      "step": 13878,
      "training_step_time": 0.39639949798583984
    },
    {
      "epoch": 8.4710693359375e-05,
      "model_forward_time": 0.11558938026428223,
      "step": 13879
    },
    {
      "epoch": 8.4710693359375e-05,
      "step": 13879,
      "training_step_time": 0.39000678062438965
    },
    {
      "epoch": 8.4716796875e-05,
      "grad_norm": 0.24493086338043213,
      "learning_rate": 9.127642279188558e-05,
      "loss": 0.0534,
      "step": 13880
    },
    {
      "epoch": 8.4716796875e-05,
      "model_forward_time": 0.11543083190917969,
      "step": 13880
    },
    {
      "epoch": 8.4716796875e-05,
      "step": 13880,
      "training_step_time": 0.3953397274017334
    },
    {
      "epoch": 8.4722900390625e-05,
      "model_forward_time": 0.11519742012023926,
      "step": 13881
    },
    {
      "epoch": 8.4722900390625e-05,
      "step": 13881,
      "training_step_time": 0.6618545055389404
    },
    {
      "epoch": 8.472900390625e-05,
      "model_forward_time": 0.11489582061767578,
      "step": 13882
    },
    {
      "epoch": 8.472900390625e-05,
      "step": 13882,
      "training_step_time": 0.4391493797302246
    },
    {
      "epoch": 8.4735107421875e-05,
      "model_forward_time": 0.11463093757629395,
      "step": 13883
    },
    {
      "epoch": 8.4735107421875e-05,
      "step": 13883,
      "training_step_time": 0.4672572612762451
    },
    {
      "epoch": 8.47412109375e-05,
      "model_forward_time": 0.11497855186462402,
      "step": 13884
    },
    {
      "epoch": 8.47412109375e-05,
      "step": 13884,
      "training_step_time": 0.4430882930755615
    },
    {
      "epoch": 8.4747314453125e-05,
      "model_forward_time": 0.1145622730255127,
      "step": 13885
    },
    {
      "epoch": 8.4747314453125e-05,
      "step": 13885,
      "training_step_time": 0.36510777473449707
    },
    {
      "epoch": 8.475341796875e-05,
      "model_forward_time": 0.11421704292297363,
      "step": 13886
    },
    {
      "epoch": 8.475341796875e-05,
      "step": 13886,
      "training_step_time": 0.4522066116333008
    },
    {
      "epoch": 8.4759521484375e-05,
      "model_forward_time": 0.11498475074768066,
      "step": 13887
    },
    {
      "epoch": 8.4759521484375e-05,
      "step": 13887,
      "training_step_time": 0.5074920654296875
    },
    {
      "epoch": 8.4765625e-05,
      "model_forward_time": 0.11401009559631348,
      "step": 13888
    },
    {
      "epoch": 8.4765625e-05,
      "step": 13888,
      "training_step_time": 0.38538646697998047
    },
    {
      "epoch": 8.4771728515625e-05,
      "model_forward_time": 0.11445307731628418,
      "step": 13889
    },
    {
      "epoch": 8.4771728515625e-05,
      "step": 13889,
      "training_step_time": 0.38968491554260254
    },
    {
      "epoch": 8.477783203125e-05,
      "grad_norm": 0.15570302307605743,
      "learning_rate": 9.126086397257612e-05,
      "loss": 0.0585,
      "step": 13890
    },
    {
      "epoch": 8.477783203125e-05,
      "model_forward_time": 0.11485815048217773,
      "step": 13890
    },
    {
      "epoch": 8.477783203125e-05,
      "step": 13890,
      "training_step_time": 0.38684868812561035
    },
    {
      "epoch": 8.4783935546875e-05,
      "model_forward_time": 0.11497926712036133,
      "step": 13891
    },
    {
      "epoch": 8.4783935546875e-05,
      "step": 13891,
      "training_step_time": 0.39685916900634766
    },
    {
      "epoch": 8.47900390625e-05,
      "model_forward_time": 0.11489510536193848,
      "step": 13892
    },
    {
      "epoch": 8.47900390625e-05,
      "step": 13892,
      "training_step_time": 0.38723230361938477
    },
    {
      "epoch": 8.4796142578125e-05,
      "model_forward_time": 0.11489415168762207,
      "step": 13893
    },
    {
      "epoch": 8.4796142578125e-05,
      "step": 13893,
      "training_step_time": 0.7581698894500732
    },
    {
      "epoch": 8.480224609375e-05,
      "model_forward_time": 0.11460518836975098,
      "step": 13894
    },
    {
      "epoch": 8.480224609375e-05,
      "step": 13894,
      "training_step_time": 0.4033076763153076
    },
    {
      "epoch": 8.4808349609375e-05,
      "model_forward_time": 0.1138143539428711,
      "step": 13895
    },
    {
      "epoch": 8.4808349609375e-05,
      "step": 13895,
      "training_step_time": 0.5466504096984863
    },
    {
      "epoch": 8.4814453125e-05,
      "model_forward_time": 0.11456036567687988,
      "step": 13896
    },
    {
      "epoch": 8.4814453125e-05,
      "step": 13896,
      "training_step_time": 0.4782836437225342
    },
    {
      "epoch": 8.4820556640625e-05,
      "model_forward_time": 0.1147007942199707,
      "step": 13897
    },
    {
      "epoch": 8.4820556640625e-05,
      "step": 13897,
      "training_step_time": 0.43773841857910156
    },
    {
      "epoch": 8.482666015625e-05,
      "model_forward_time": 0.11450505256652832,
      "step": 13898
    },
    {
      "epoch": 8.482666015625e-05,
      "step": 13898,
      "training_step_time": 0.4486825466156006
    },
    {
      "epoch": 8.4832763671875e-05,
      "model_forward_time": 0.11490011215209961,
      "step": 13899
    },
    {
      "epoch": 8.4832763671875e-05,
      "step": 13899,
      "training_step_time": 0.6015622615814209
    },
    {
      "epoch": 8.48388671875e-05,
      "grad_norm": 0.16227692365646362,
      "learning_rate": 9.124529261930559e-05,
      "loss": 0.0568,
      "step": 13900
    },
    {
      "epoch": 8.48388671875e-05,
      "model_forward_time": 0.11392378807067871,
      "step": 13900
    },
    {
      "epoch": 8.48388671875e-05,
      "step": 13900,
      "training_step_time": 0.4073050022125244
    },
    {
      "epoch": 8.4844970703125e-05,
      "model_forward_time": 0.11437177658081055,
      "step": 13901
    },
    {
      "epoch": 8.4844970703125e-05,
      "step": 13901,
      "training_step_time": 0.37926769256591797
    },
    {
      "epoch": 8.485107421875e-05,
      "model_forward_time": 0.11446142196655273,
      "step": 13902
    },
    {
      "epoch": 8.485107421875e-05,
      "step": 13902,
      "training_step_time": 0.38096189498901367
    },
    {
      "epoch": 8.4857177734375e-05,
      "model_forward_time": 0.11485171318054199,
      "step": 13903
    },
    {
      "epoch": 8.4857177734375e-05,
      "step": 13903,
      "training_step_time": 0.4035036563873291
    },
    {
      "epoch": 8.486328125e-05,
      "model_forward_time": 0.11503911018371582,
      "step": 13904
    },
    {
      "epoch": 8.486328125e-05,
      "step": 13904,
      "training_step_time": 0.38811540603637695
    },
    {
      "epoch": 8.4869384765625e-05,
      "model_forward_time": 0.11498570442199707,
      "step": 13905
    },
    {
      "epoch": 8.4869384765625e-05,
      "step": 13905,
      "training_step_time": 0.788114070892334
    },
    {
      "epoch": 8.487548828125e-05,
      "model_forward_time": 0.11405658721923828,
      "step": 13906
    },
    {
      "epoch": 8.487548828125e-05,
      "step": 13906,
      "training_step_time": 0.39392971992492676
    },
    {
      "epoch": 8.4881591796875e-05,
      "model_forward_time": 0.11505436897277832,
      "step": 13907
    },
    {
      "epoch": 8.4881591796875e-05,
      "step": 13907,
      "training_step_time": 0.38614988327026367
    },
    {
      "epoch": 8.48876953125e-05,
      "model_forward_time": 0.11448121070861816,
      "step": 13908
    },
    {
      "epoch": 8.48876953125e-05,
      "step": 13908,
      "training_step_time": 0.40389251708984375
    },
    {
      "epoch": 8.4893798828125e-05,
      "model_forward_time": 0.1143803596496582,
      "step": 13909
    },
    {
      "epoch": 8.4893798828125e-05,
      "step": 13909,
      "training_step_time": 0.38634419441223145
    },
    {
      "epoch": 8.489990234375e-05,
      "grad_norm": 0.19528229534626007,
      "learning_rate": 9.122970873680419e-05,
      "loss": 0.0526,
      "step": 13910
    },
    {
      "epoch": 8.489990234375e-05,
      "model_forward_time": 0.11466431617736816,
      "step": 13910
    },
    {
      "epoch": 8.489990234375e-05,
      "step": 13910,
      "training_step_time": 0.4220612049102783
    },
    {
      "epoch": 8.4906005859375e-05,
      "model_forward_time": 0.1147615909576416,
      "step": 13911
    },
    {
      "epoch": 8.4906005859375e-05,
      "step": 13911,
      "training_step_time": 0.6965854167938232
    },
    {
      "epoch": 8.4912109375e-05,
      "model_forward_time": 0.11440324783325195,
      "step": 13912
    },
    {
      "epoch": 8.4912109375e-05,
      "step": 13912,
      "training_step_time": 0.40915560722351074
    },
    {
      "epoch": 8.4918212890625e-05,
      "model_forward_time": 0.11499881744384766,
      "step": 13913
    },
    {
      "epoch": 8.4918212890625e-05,
      "step": 13913,
      "training_step_time": 0.4191861152648926
    },
    {
      "epoch": 8.492431640625e-05,
      "model_forward_time": 0.1142876148223877,
      "step": 13914
    },
    {
      "epoch": 8.492431640625e-05,
      "step": 13914,
      "training_step_time": 0.4716658592224121
    },
    {
      "epoch": 8.4930419921875e-05,
      "model_forward_time": 0.11455988883972168,
      "step": 13915
    },
    {
      "epoch": 8.4930419921875e-05,
      "step": 13915,
      "training_step_time": 0.4825878143310547
    },
    {
      "epoch": 8.49365234375e-05,
      "model_forward_time": 0.11392927169799805,
      "step": 13916
    },
    {
      "epoch": 8.49365234375e-05,
      "step": 13916,
      "training_step_time": 0.422152042388916
    },
    {
      "epoch": 8.4942626953125e-05,
      "model_forward_time": 0.11422109603881836,
      "step": 13917
    },
    {
      "epoch": 8.4942626953125e-05,
      "step": 13917,
      "training_step_time": 0.5067226886749268
    },
    {
      "epoch": 8.494873046875e-05,
      "model_forward_time": 0.11413216590881348,
      "step": 13918
    },
    {
      "epoch": 8.494873046875e-05,
      "step": 13918,
      "training_step_time": 0.3804323673248291
    },
    {
      "epoch": 8.4954833984375e-05,
      "model_forward_time": 0.11478066444396973,
      "step": 13919
    },
    {
      "epoch": 8.4954833984375e-05,
      "step": 13919,
      "training_step_time": 0.39876604080200195
    },
    {
      "epoch": 8.49609375e-05,
      "grad_norm": 0.17138755321502686,
      "learning_rate": 9.121411232980588e-05,
      "loss": 0.06,
      "step": 13920
    },
    {
      "epoch": 8.49609375e-05,
      "model_forward_time": 0.1150057315826416,
      "step": 13920
    },
    {
      "epoch": 8.49609375e-05,
      "step": 13920,
      "training_step_time": 0.39119768142700195
    },
    {
      "epoch": 8.4967041015625e-05,
      "model_forward_time": 0.11516833305358887,
      "step": 13921
    },
    {
      "epoch": 8.4967041015625e-05,
      "step": 13921,
      "training_step_time": 0.39120006561279297
    },
    {
      "epoch": 8.497314453125e-05,
      "model_forward_time": 0.11435270309448242,
      "step": 13922
    },
    {
      "epoch": 8.497314453125e-05,
      "step": 13922,
      "training_step_time": 0.38854432106018066
    },
    {
      "epoch": 8.4979248046875e-05,
      "model_forward_time": 0.11515045166015625,
      "step": 13923
    },
    {
      "epoch": 8.4979248046875e-05,
      "step": 13923,
      "training_step_time": 0.7421071529388428
    },
    {
      "epoch": 8.49853515625e-05,
      "model_forward_time": 0.11423873901367188,
      "step": 13924
    },
    {
      "epoch": 8.49853515625e-05,
      "step": 13924,
      "training_step_time": 0.43364667892456055
    },
    {
      "epoch": 8.4991455078125e-05,
      "model_forward_time": 0.11428022384643555,
      "step": 13925
    },
    {
      "epoch": 8.4991455078125e-05,
      "step": 13925,
      "training_step_time": 0.3640725612640381
    },
    {
      "epoch": 8.499755859375e-05,
      "model_forward_time": 0.11429619789123535,
      "step": 13926
    },
    {
      "epoch": 8.499755859375e-05,
      "step": 13926,
      "training_step_time": 0.48839402198791504
    },
    {
      "epoch": 8.5003662109375e-05,
      "model_forward_time": 0.11432695388793945,
      "step": 13927
    },
    {
      "epoch": 8.5003662109375e-05,
      "step": 13927,
      "training_step_time": 0.38326215744018555
    },
    {
      "epoch": 8.5009765625e-05,
      "model_forward_time": 0.11399221420288086,
      "step": 13928
    },
    {
      "epoch": 8.5009765625e-05,
      "step": 13928,
      "training_step_time": 0.3812134265899658
    },
    {
      "epoch": 8.5015869140625e-05,
      "model_forward_time": 0.11443972587585449,
      "step": 13929
    },
    {
      "epoch": 8.5015869140625e-05,
      "step": 13929,
      "training_step_time": 0.5537014007568359
    },
    {
      "epoch": 8.502197265625e-05,
      "grad_norm": 0.1626938134431839,
      "learning_rate": 9.119850340304843e-05,
      "loss": 0.0536,
      "step": 13930
    },
    {
      "epoch": 8.502197265625e-05,
      "model_forward_time": 0.11417317390441895,
      "step": 13930
    },
    {
      "epoch": 8.502197265625e-05,
      "step": 13930,
      "training_step_time": 0.3866441249847412
    },
    {
      "epoch": 8.5028076171875e-05,
      "model_forward_time": 0.11453723907470703,
      "step": 13931
    },
    {
      "epoch": 8.5028076171875e-05,
      "step": 13931,
      "training_step_time": 0.38872694969177246
    },
    {
      "epoch": 8.50341796875e-05,
      "model_forward_time": 0.11553502082824707,
      "step": 13932
    },
    {
      "epoch": 8.50341796875e-05,
      "step": 13932,
      "training_step_time": 0.3898031711578369
    },
    {
      "epoch": 8.5040283203125e-05,
      "model_forward_time": 0.11545872688293457,
      "step": 13933
    },
    {
      "epoch": 8.5040283203125e-05,
      "step": 13933,
      "training_step_time": 0.4410672187805176
    },
    {
      "epoch": 8.504638671875e-05,
      "model_forward_time": 0.11481547355651855,
      "step": 13934
    },
    {
      "epoch": 8.504638671875e-05,
      "step": 13934,
      "training_step_time": 0.5356035232543945
    },
    {
      "epoch": 8.5052490234375e-05,
      "model_forward_time": 0.1150357723236084,
      "step": 13935
    },
    {
      "epoch": 8.5052490234375e-05,
      "step": 13935,
      "training_step_time": 0.6663250923156738
    },
    {
      "epoch": 8.505859375e-05,
      "model_forward_time": 0.11422371864318848,
      "step": 13936
    },
    {
      "epoch": 8.505859375e-05,
      "step": 13936,
      "training_step_time": 0.42055797576904297
    },
    {
      "epoch": 8.5064697265625e-05,
      "model_forward_time": 0.11460494995117188,
      "step": 13937
    },
    {
      "epoch": 8.5064697265625e-05,
      "step": 13937,
      "training_step_time": 0.3981626033782959
    },
    {
      "epoch": 8.507080078125e-05,
      "model_forward_time": 0.11453604698181152,
      "step": 13938
    },
    {
      "epoch": 8.507080078125e-05,
      "step": 13938,
      "training_step_time": 0.40224361419677734
    },
    {
      "epoch": 8.5076904296875e-05,
      "model_forward_time": 0.11415600776672363,
      "step": 13939
    },
    {
      "epoch": 8.5076904296875e-05,
      "step": 13939,
      "training_step_time": 0.444918155670166
    },
    {
      "epoch": 8.50830078125e-05,
      "grad_norm": 0.1946394294500351,
      "learning_rate": 9.118288196127345e-05,
      "loss": 0.0532,
      "step": 13940
    },
    {
      "epoch": 8.50830078125e-05,
      "model_forward_time": 0.11487746238708496,
      "step": 13940
    },
    {
      "epoch": 8.50830078125e-05,
      "step": 13940,
      "training_step_time": 0.4380016326904297
    },
    {
      "epoch": 8.5089111328125e-05,
      "model_forward_time": 0.11484670639038086,
      "step": 13941
    },
    {
      "epoch": 8.5089111328125e-05,
      "step": 13941,
      "training_step_time": 0.6682052612304688
    },
    {
      "epoch": 8.509521484375e-05,
      "model_forward_time": 0.11418271064758301,
      "step": 13942
    },
    {
      "epoch": 8.509521484375e-05,
      "step": 13942,
      "training_step_time": 0.38770604133605957
    },
    {
      "epoch": 8.5101318359375e-05,
      "model_forward_time": 0.114105224609375,
      "step": 13943
    },
    {
      "epoch": 8.5101318359375e-05,
      "step": 13943,
      "training_step_time": 0.3943042755126953
    },
    {
      "epoch": 8.5107421875e-05,
      "model_forward_time": 0.11454010009765625,
      "step": 13944
    },
    {
      "epoch": 8.5107421875e-05,
      "step": 13944,
      "training_step_time": 0.3904728889465332
    },
    {
      "epoch": 8.5113525390625e-05,
      "model_forward_time": 0.11480307579040527,
      "step": 13945
    },
    {
      "epoch": 8.5113525390625e-05,
      "step": 13945,
      "training_step_time": 0.39862966537475586
    },
    {
      "epoch": 8.511962890625e-05,
      "model_forward_time": 0.11482357978820801,
      "step": 13946
    },
    {
      "epoch": 8.511962890625e-05,
      "step": 13946,
      "training_step_time": 0.3922715187072754
    },
    {
      "epoch": 8.5125732421875e-05,
      "model_forward_time": 0.1150977611541748,
      "step": 13947
    },
    {
      "epoch": 8.5125732421875e-05,
      "step": 13947,
      "training_step_time": 0.9266350269317627
    },
    {
      "epoch": 8.51318359375e-05,
      "model_forward_time": 0.11403918266296387,
      "step": 13948
    },
    {
      "epoch": 8.51318359375e-05,
      "step": 13948,
      "training_step_time": 0.38532209396362305
    },
    {
      "epoch": 8.5137939453125e-05,
      "model_forward_time": 0.11414790153503418,
      "step": 13949
    },
    {
      "epoch": 8.5137939453125e-05,
      "step": 13949,
      "training_step_time": 0.38766026496887207
    },
    {
      "epoch": 8.514404296875e-05,
      "grad_norm": 0.21734468638896942,
      "learning_rate": 9.116724800922629e-05,
      "loss": 0.0603,
      "step": 13950
    },
    {
      "epoch": 8.514404296875e-05,
      "model_forward_time": 0.11402249336242676,
      "step": 13950
    },
    {
      "epoch": 8.514404296875e-05,
      "step": 13950,
      "training_step_time": 0.39160633087158203
    },
    {
      "epoch": 8.5150146484375e-05,
      "model_forward_time": 0.11428570747375488,
      "step": 13951
    },
    {
      "epoch": 8.5150146484375e-05,
      "step": 13951,
      "training_step_time": 0.4003591537475586
    },
    {
      "epoch": 8.515625e-05,
      "model_forward_time": 0.11481261253356934,
      "step": 13952
    },
    {
      "epoch": 8.515625e-05,
      "step": 13952,
      "training_step_time": 0.4474644660949707
    },
    {
      "epoch": 8.5162353515625e-05,
      "model_forward_time": 0.11516714096069336,
      "step": 13953
    },
    {
      "epoch": 8.5162353515625e-05,
      "step": 13953,
      "training_step_time": 0.8542637825012207
    },
    {
      "epoch": 8.516845703125e-05,
      "model_forward_time": 0.11510086059570312,
      "step": 13954
    },
    {
      "epoch": 8.516845703125e-05,
      "step": 13954,
      "training_step_time": 0.37914586067199707
    },
    {
      "epoch": 8.5174560546875e-05,
      "model_forward_time": 0.11427116394042969,
      "step": 13955
    },
    {
      "epoch": 8.5174560546875e-05,
      "step": 13955,
      "training_step_time": 0.3869359493255615
    },
    {
      "epoch": 8.51806640625e-05,
      "model_forward_time": 0.11402750015258789,
      "step": 13956
    },
    {
      "epoch": 8.51806640625e-05,
      "step": 13956,
      "training_step_time": 0.38192319869995117
    },
    {
      "epoch": 8.5186767578125e-05,
      "model_forward_time": 0.1139228343963623,
      "step": 13957
    },
    {
      "epoch": 8.5186767578125e-05,
      "step": 13957,
      "training_step_time": 0.44010138511657715
    },
    {
      "epoch": 8.519287109375e-05,
      "model_forward_time": 0.11454319953918457,
      "step": 13958
    },
    {
      "epoch": 8.519287109375e-05,
      "step": 13958,
      "training_step_time": 0.39774298667907715
    },
    {
      "epoch": 8.5198974609375e-05,
      "model_forward_time": 0.11456179618835449,
      "step": 13959
    },
    {
      "epoch": 8.5198974609375e-05,
      "step": 13959,
      "training_step_time": 0.745161771774292
    },
    {
      "epoch": 8.5205078125e-05,
      "grad_norm": 0.1552966833114624,
      "learning_rate": 9.115160155165614e-05,
      "loss": 0.0515,
      "step": 13960
    },
    {
      "epoch": 8.5205078125e-05,
      "model_forward_time": 0.1149904727935791,
      "step": 13960
    },
    {
      "epoch": 8.5205078125e-05,
      "step": 13960,
      "training_step_time": 0.3966233730316162
    },
    {
      "epoch": 8.5211181640625e-05,
      "model_forward_time": 0.11444544792175293,
      "step": 13961
    },
    {
      "epoch": 8.5211181640625e-05,
      "step": 13961,
      "training_step_time": 0.3867323398590088
    },
    {
      "epoch": 8.521728515625e-05,
      "model_forward_time": 0.11388182640075684,
      "step": 13962
    },
    {
      "epoch": 8.521728515625e-05,
      "step": 13962,
      "training_step_time": 0.3901369571685791
    },
    {
      "epoch": 8.5223388671875e-05,
      "model_forward_time": 0.11523222923278809,
      "step": 13963
    },
    {
      "epoch": 8.5223388671875e-05,
      "step": 13963,
      "training_step_time": 0.43742823600769043
    },
    {
      "epoch": 8.52294921875e-05,
      "model_forward_time": 0.11349630355834961,
      "step": 13964
    },
    {
      "epoch": 8.52294921875e-05,
      "step": 13964,
      "training_step_time": 0.44656896591186523
    },
    {
      "epoch": 8.5235595703125e-05,
      "model_forward_time": 0.11456751823425293,
      "step": 13965
    },
    {
      "epoch": 8.5235595703125e-05,
      "step": 13965,
      "training_step_time": 0.49651098251342773
    },
    {
      "epoch": 8.524169921875e-05,
      "model_forward_time": 0.11428546905517578,
      "step": 13966
    },
    {
      "epoch": 8.524169921875e-05,
      "step": 13966,
      "training_step_time": 0.39980101585388184
    },
    {
      "epoch": 8.5247802734375e-05,
      "model_forward_time": 0.11483907699584961,
      "step": 13967
    },
    {
      "epoch": 8.5247802734375e-05,
      "step": 13967,
      "training_step_time": 0.38997316360473633
    },
    {
      "epoch": 8.525390625e-05,
      "model_forward_time": 0.1145944595336914,
      "step": 13968
    },
    {
      "epoch": 8.525390625e-05,
      "step": 13968,
      "training_step_time": 0.3961448669433594
    },
    {
      "epoch": 8.5260009765625e-05,
      "model_forward_time": 0.11421751976013184,
      "step": 13969
    },
    {
      "epoch": 8.5260009765625e-05,
      "step": 13969,
      "training_step_time": 0.39592623710632324
    },
    {
      "epoch": 8.526611328125e-05,
      "grad_norm": 0.18534724414348602,
      "learning_rate": 9.1135942593316e-05,
      "loss": 0.0522,
      "step": 13970
    },
    {
      "epoch": 8.526611328125e-05,
      "model_forward_time": 0.11537003517150879,
      "step": 13970
    },
    {
      "epoch": 8.526611328125e-05,
      "step": 13970,
      "training_step_time": 0.41744184494018555
    },
    {
      "epoch": 8.5272216796875e-05,
      "model_forward_time": 0.1139678955078125,
      "step": 13971
    },
    {
      "epoch": 8.5272216796875e-05,
      "step": 13971,
      "training_step_time": 0.7432777881622314
    },
    {
      "epoch": 8.52783203125e-05,
      "model_forward_time": 0.11495280265808105,
      "step": 13972
    },
    {
      "epoch": 8.52783203125e-05,
      "step": 13972,
      "training_step_time": 0.3954951763153076
    },
    {
      "epoch": 8.5284423828125e-05,
      "model_forward_time": 0.1142570972442627,
      "step": 13973
    },
    {
      "epoch": 8.5284423828125e-05,
      "step": 13973,
      "training_step_time": 0.39356231689453125
    },
    {
      "epoch": 8.529052734375e-05,
      "model_forward_time": 0.11404919624328613,
      "step": 13974
    },
    {
      "epoch": 8.529052734375e-05,
      "step": 13974,
      "training_step_time": 0.39061832427978516
    },
    {
      "epoch": 8.5296630859375e-05,
      "model_forward_time": 0.1151118278503418,
      "step": 13975
    },
    {
      "epoch": 8.5296630859375e-05,
      "step": 13975,
      "training_step_time": 0.409254789352417
    },
    {
      "epoch": 8.5302734375e-05,
      "model_forward_time": 0.11476349830627441,
      "step": 13976
    },
    {
      "epoch": 8.5302734375e-05,
      "step": 13976,
      "training_step_time": 0.4454793930053711
    },
    {
      "epoch": 8.5308837890625e-05,
      "model_forward_time": 0.11483597755432129,
      "step": 13977
    },
    {
      "epoch": 8.5308837890625e-05,
      "step": 13977,
      "training_step_time": 0.5487055778503418
    },
    {
      "epoch": 8.531494140625e-05,
      "model_forward_time": 0.1151576042175293,
      "step": 13978
    },
    {
      "epoch": 8.531494140625e-05,
      "step": 13978,
      "training_step_time": 0.46545982360839844
    },
    {
      "epoch": 8.5321044921875e-05,
      "model_forward_time": 0.11469888687133789,
      "step": 13979
    },
    {
      "epoch": 8.5321044921875e-05,
      "step": 13979,
      "training_step_time": 0.47205686569213867
    },
    {
      "epoch": 8.53271484375e-05,
      "grad_norm": 0.14772562682628632,
      "learning_rate": 9.112027113896262e-05,
      "loss": 0.0526,
      "step": 13980
    },
    {
      "epoch": 8.53271484375e-05,
      "model_forward_time": 0.11403083801269531,
      "step": 13980
    },
    {
      "epoch": 8.53271484375e-05,
      "step": 13980,
      "training_step_time": 0.46329498291015625
    },
    {
      "epoch": 8.5333251953125e-05,
      "model_forward_time": 0.1141510009765625,
      "step": 13981
    },
    {
      "epoch": 8.5333251953125e-05,
      "step": 13981,
      "training_step_time": 0.3908655643463135
    },
    {
      "epoch": 8.533935546875e-05,
      "model_forward_time": 0.11490559577941895,
      "step": 13982
    },
    {
      "epoch": 8.533935546875e-05,
      "step": 13982,
      "training_step_time": 0.40918421745300293
    },
    {
      "epoch": 8.5345458984375e-05,
      "model_forward_time": 0.11415815353393555,
      "step": 13983
    },
    {
      "epoch": 8.5345458984375e-05,
      "step": 13983,
      "training_step_time": 0.42098045349121094
    },
    {
      "epoch": 8.53515625e-05,
      "model_forward_time": 0.11465764045715332,
      "step": 13984
    },
    {
      "epoch": 8.53515625e-05,
      "step": 13984,
      "training_step_time": 0.3957033157348633
    },
    {
      "epoch": 8.5357666015625e-05,
      "model_forward_time": 0.11459946632385254,
      "step": 13985
    },
    {
      "epoch": 8.5357666015625e-05,
      "step": 13985,
      "training_step_time": 0.39954209327697754
    },
    {
      "epoch": 8.536376953125e-05,
      "model_forward_time": 0.11479687690734863,
      "step": 13986
    },
    {
      "epoch": 8.536376953125e-05,
      "step": 13986,
      "training_step_time": 0.39283251762390137
    },
    {
      "epoch": 8.5369873046875e-05,
      "model_forward_time": 0.11558127403259277,
      "step": 13987
    },
    {
      "epoch": 8.5369873046875e-05,
      "step": 13987,
      "training_step_time": 0.39879345893859863
    },
    {
      "epoch": 8.53759765625e-05,
      "model_forward_time": 0.11482000350952148,
      "step": 13988
    },
    {
      "epoch": 8.53759765625e-05,
      "step": 13988,
      "training_step_time": 0.3934612274169922
    },
    {
      "epoch": 8.5382080078125e-05,
      "model_forward_time": 0.11484026908874512,
      "step": 13989
    },
    {
      "epoch": 8.5382080078125e-05,
      "step": 13989,
      "training_step_time": 0.8187015056610107
    },
    {
      "epoch": 8.538818359375e-05,
      "grad_norm": 0.1506882607936859,
      "learning_rate": 9.110458719335659e-05,
      "loss": 0.0523,
      "step": 13990
    },
    {
      "epoch": 8.538818359375e-05,
      "model_forward_time": 0.11411285400390625,
      "step": 13990
    },
    {
      "epoch": 8.538818359375e-05,
      "step": 13990,
      "training_step_time": 0.4351372718811035
    },
    {
      "epoch": 8.5394287109375e-05,
      "model_forward_time": 0.11452889442443848,
      "step": 13991
    },
    {
      "epoch": 8.5394287109375e-05,
      "step": 13991,
      "training_step_time": 0.3967103958129883
    },
    {
      "epoch": 8.5400390625e-05,
      "model_forward_time": 0.11417508125305176,
      "step": 13992
    },
    {
      "epoch": 8.5400390625e-05,
      "step": 13992,
      "training_step_time": 0.5048580169677734
    },
    {
      "epoch": 8.5406494140625e-05,
      "model_forward_time": 0.11438918113708496,
      "step": 13993
    },
    {
      "epoch": 8.5406494140625e-05,
      "step": 13993,
      "training_step_time": 0.3973836898803711
    },
    {
      "epoch": 8.541259765625e-05,
      "model_forward_time": 0.11433911323547363,
      "step": 13994
    },
    {
      "epoch": 8.541259765625e-05,
      "step": 13994,
      "training_step_time": 0.407773494720459
    },
    {
      "epoch": 8.5418701171875e-05,
      "model_forward_time": 0.11460065841674805,
      "step": 13995
    },
    {
      "epoch": 8.5418701171875e-05,
      "step": 13995,
      "training_step_time": 0.6189801692962646
    },
    {
      "epoch": 8.54248046875e-05,
      "model_forward_time": 0.11395478248596191,
      "step": 13996
    },
    {
      "epoch": 8.54248046875e-05,
      "step": 13996,
      "training_step_time": 0.46238136291503906
    },
    {
      "epoch": 8.5430908203125e-05,
      "model_forward_time": 0.11417961120605469,
      "step": 13997
    },
    {
      "epoch": 8.5430908203125e-05,
      "step": 13997,
      "training_step_time": 0.3910946846008301
    },
    {
      "epoch": 8.543701171875e-05,
      "model_forward_time": 0.1145625114440918,
      "step": 13998
    },
    {
      "epoch": 8.543701171875e-05,
      "step": 13998,
      "training_step_time": 0.40215444564819336
    },
    {
      "epoch": 8.5443115234375e-05,
      "model_forward_time": 0.11409521102905273,
      "step": 13999
    },
    {
      "epoch": 8.5443115234375e-05,
      "step": 13999,
      "training_step_time": 0.3970496654510498
    },
    {
      "epoch": 8.544921875e-05,
      "grad_norm": 0.1305714249610901,
      "learning_rate": 9.108889076126226e-05,
      "loss": 0.0489,
      "step": 14000
    },
    {
      "epoch": 8.544921875e-05,
      "model_forward_time": 0.11332869529724121,
      "step": 14000
    },
    {
      "epoch": 8.544921875e-05,
      "step": 14000,
      "training_step_time": 0.3546907901763916
    },
    {
      "epoch": 8.5455322265625e-05,
      "model_forward_time": 0.11286616325378418,
      "step": 14001
    },
    {
      "epoch": 8.5455322265625e-05,
      "step": 14001,
      "training_step_time": 0.37883591651916504
    },
    {
      "epoch": 8.546142578125e-05,
      "model_forward_time": 0.1130063533782959,
      "step": 14002
    },
    {
      "epoch": 8.546142578125e-05,
      "step": 14002,
      "training_step_time": 0.3650341033935547
    },
    {
      "epoch": 8.5467529296875e-05,
      "model_forward_time": 0.1146705150604248,
      "step": 14003
    },
    {
      "epoch": 8.5467529296875e-05,
      "step": 14003,
      "training_step_time": 0.37354230880737305
    },
    {
      "epoch": 8.54736328125e-05,
      "model_forward_time": 0.11438298225402832,
      "step": 14004
    },
    {
      "epoch": 8.54736328125e-05,
      "step": 14004,
      "training_step_time": 0.3805704116821289
    },
    {
      "epoch": 8.5479736328125e-05,
      "model_forward_time": 0.11389851570129395,
      "step": 14005
    },
    {
      "epoch": 8.5479736328125e-05,
      "step": 14005,
      "training_step_time": 0.3812246322631836
    },
    {
      "epoch": 8.548583984375e-05,
      "model_forward_time": 0.11414504051208496,
      "step": 14006
    },
    {
      "epoch": 8.548583984375e-05,
      "step": 14006,
      "training_step_time": 0.3792102336883545
    },
    {
      "epoch": 8.5491943359375e-05,
      "model_forward_time": 0.11483883857727051,
      "step": 14007
    },
    {
      "epoch": 8.5491943359375e-05,
      "step": 14007,
      "training_step_time": 0.4395418167114258
    },
    {
      "epoch": 8.5498046875e-05,
      "model_forward_time": 0.11493182182312012,
      "step": 14008
    },
    {
      "epoch": 8.5498046875e-05,
      "step": 14008,
      "training_step_time": 0.4135289192199707
    },
    {
      "epoch": 8.5504150390625e-05,
      "model_forward_time": 0.11477041244506836,
      "step": 14009
    },
    {
      "epoch": 8.5504150390625e-05,
      "step": 14009,
      "training_step_time": 0.4337332248687744
    },
    {
      "epoch": 8.551025390625e-05,
      "grad_norm": 0.1002214252948761,
      "learning_rate": 9.107318184744781e-05,
      "loss": 0.0542,
      "step": 14010
    },
    {
      "epoch": 8.551025390625e-05,
      "model_forward_time": 0.11556315422058105,
      "step": 14010
    },
    {
      "epoch": 8.551025390625e-05,
      "step": 14010,
      "training_step_time": 0.4084625244140625
    },
    {
      "epoch": 8.5516357421875e-05,
      "model_forward_time": 0.11421728134155273,
      "step": 14011
    },
    {
      "epoch": 8.5516357421875e-05,
      "step": 14011,
      "training_step_time": 0.4750828742980957
    },
    {
      "epoch": 8.55224609375e-05,
      "model_forward_time": 0.11585330963134766,
      "step": 14012
    },
    {
      "epoch": 8.55224609375e-05,
      "step": 14012,
      "training_step_time": 0.4645988941192627
    },
    {
      "epoch": 8.5528564453125e-05,
      "model_forward_time": 0.11502695083618164,
      "step": 14013
    },
    {
      "epoch": 8.5528564453125e-05,
      "step": 14013,
      "training_step_time": 0.39556336402893066
    },
    {
      "epoch": 8.553466796875e-05,
      "model_forward_time": 0.11467313766479492,
      "step": 14014
    },
    {
      "epoch": 8.553466796875e-05,
      "step": 14014,
      "training_step_time": 0.3826406002044678
    },
    {
      "epoch": 8.5540771484375e-05,
      "model_forward_time": 0.11497116088867188,
      "step": 14015
    },
    {
      "epoch": 8.5540771484375e-05,
      "step": 14015,
      "training_step_time": 0.4045279026031494
    },
    {
      "epoch": 8.5546875e-05,
      "model_forward_time": 0.11499857902526855,
      "step": 14016
    },
    {
      "epoch": 8.5546875e-05,
      "step": 14016,
      "training_step_time": 0.3960075378417969
    },
    {
      "epoch": 8.5552978515625e-05,
      "model_forward_time": 0.11535501480102539,
      "step": 14017
    },
    {
      "epoch": 8.5552978515625e-05,
      "step": 14017,
      "training_step_time": 0.3829305171966553
    },
    {
      "epoch": 8.555908203125e-05,
      "model_forward_time": 0.11483240127563477,
      "step": 14018
    },
    {
      "epoch": 8.555908203125e-05,
      "step": 14018,
      "training_step_time": 0.38434576988220215
    },
    {
      "epoch": 8.5565185546875e-05,
      "model_forward_time": 0.11539244651794434,
      "step": 14019
    },
    {
      "epoch": 8.5565185546875e-05,
      "step": 14019,
      "training_step_time": 0.39797353744506836
    },
    {
      "epoch": 8.55712890625e-05,
      "grad_norm": 0.17701369524002075,
      "learning_rate": 9.105746045668521e-05,
      "loss": 0.0574,
      "step": 14020
    },
    {
      "epoch": 8.55712890625e-05,
      "model_forward_time": 0.11457681655883789,
      "step": 14020
    },
    {
      "epoch": 8.55712890625e-05,
      "step": 14020,
      "training_step_time": 0.4039633274078369
    },
    {
      "epoch": 8.5577392578125e-05,
      "model_forward_time": 0.11520624160766602,
      "step": 14021
    },
    {
      "epoch": 8.5577392578125e-05,
      "step": 14021,
      "training_step_time": 0.39490556716918945
    },
    {
      "epoch": 8.558349609375e-05,
      "model_forward_time": 0.11524319648742676,
      "step": 14022
    },
    {
      "epoch": 8.558349609375e-05,
      "step": 14022,
      "training_step_time": 0.44469618797302246
    },
    {
      "epoch": 8.5589599609375e-05,
      "model_forward_time": 0.11558651924133301,
      "step": 14023
    },
    {
      "epoch": 8.5589599609375e-05,
      "step": 14023,
      "training_step_time": 0.38986921310424805
    },
    {
      "epoch": 8.5595703125e-05,
      "model_forward_time": 0.11493182182312012,
      "step": 14024
    },
    {
      "epoch": 8.5595703125e-05,
      "step": 14024,
      "training_step_time": 0.445340633392334
    },
    {
      "epoch": 8.5601806640625e-05,
      "model_forward_time": 0.11516332626342773,
      "step": 14025
    },
    {
      "epoch": 8.5601806640625e-05,
      "step": 14025,
      "training_step_time": 0.4511234760284424
    },
    {
      "epoch": 8.560791015625e-05,
      "model_forward_time": 0.11520791053771973,
      "step": 14026
    },
    {
      "epoch": 8.560791015625e-05,
      "step": 14026,
      "training_step_time": 0.4269683361053467
    },
    {
      "epoch": 8.5614013671875e-05,
      "model_forward_time": 0.11438655853271484,
      "step": 14027
    },
    {
      "epoch": 8.5614013671875e-05,
      "step": 14027,
      "training_step_time": 0.3937981128692627
    },
    {
      "epoch": 8.56201171875e-05,
      "model_forward_time": 0.11496233940124512,
      "step": 14028
    },
    {
      "epoch": 8.56201171875e-05,
      "step": 14028,
      "training_step_time": 0.4029979705810547
    },
    {
      "epoch": 8.5626220703125e-05,
      "model_forward_time": 0.1161339282989502,
      "step": 14029
    },
    {
      "epoch": 8.5626220703125e-05,
      "step": 14029,
      "training_step_time": 0.39824652671813965
    },
    {
      "epoch": 8.563232421875e-05,
      "grad_norm": 0.13959676027297974,
      "learning_rate": 9.104172659375017e-05,
      "loss": 0.054,
      "step": 14030
    },
    {
      "epoch": 8.563232421875e-05,
      "model_forward_time": 0.11482858657836914,
      "step": 14030
    },
    {
      "epoch": 8.563232421875e-05,
      "step": 14030,
      "training_step_time": 0.39620089530944824
    },
    {
      "epoch": 8.5638427734375e-05,
      "model_forward_time": 0.1155710220336914,
      "step": 14031
    },
    {
      "epoch": 8.5638427734375e-05,
      "step": 14031,
      "training_step_time": 0.3902125358581543
    },
    {
      "epoch": 8.564453125e-05,
      "model_forward_time": 0.11508655548095703,
      "step": 14032
    },
    {
      "epoch": 8.564453125e-05,
      "step": 14032,
      "training_step_time": 0.3966341018676758
    },
    {
      "epoch": 8.5650634765625e-05,
      "model_forward_time": 0.11525201797485352,
      "step": 14033
    },
    {
      "epoch": 8.5650634765625e-05,
      "step": 14033,
      "training_step_time": 0.3993833065032959
    },
    {
      "epoch": 8.565673828125e-05,
      "model_forward_time": 0.1148841381072998,
      "step": 14034
    },
    {
      "epoch": 8.565673828125e-05,
      "step": 14034,
      "training_step_time": 0.39195847511291504
    },
    {
      "epoch": 8.5662841796875e-05,
      "model_forward_time": 0.11580276489257812,
      "step": 14035
    },
    {
      "epoch": 8.5662841796875e-05,
      "step": 14035,
      "training_step_time": 0.39007568359375
    },
    {
      "epoch": 8.56689453125e-05,
      "model_forward_time": 0.11505460739135742,
      "step": 14036
    },
    {
      "epoch": 8.56689453125e-05,
      "step": 14036,
      "training_step_time": 0.3972632884979248
    },
    {
      "epoch": 8.5675048828125e-05,
      "model_forward_time": 0.11482524871826172,
      "step": 14037
    },
    {
      "epoch": 8.5675048828125e-05,
      "step": 14037,
      "training_step_time": 0.39742398262023926
    },
    {
      "epoch": 8.568115234375e-05,
      "model_forward_time": 0.11519122123718262,
      "step": 14038
    },
    {
      "epoch": 8.568115234375e-05,
      "step": 14038,
      "training_step_time": 0.4615662097930908
    },
    {
      "epoch": 8.5687255859375e-05,
      "model_forward_time": 0.11436867713928223,
      "step": 14039
    },
    {
      "epoch": 8.5687255859375e-05,
      "step": 14039,
      "training_step_time": 0.422588586807251
    },
    {
      "epoch": 8.5693359375e-05,
      "grad_norm": 0.1628081351518631,
      "learning_rate": 9.102598026342222e-05,
      "loss": 0.0523,
      "step": 14040
    },
    {
      "epoch": 8.5693359375e-05,
      "model_forward_time": 0.11448121070861816,
      "step": 14040
    },
    {
      "epoch": 8.5693359375e-05,
      "step": 14040,
      "training_step_time": 0.3662385940551758
    },
    {
      "epoch": 8.5699462890625e-05,
      "model_forward_time": 0.11477184295654297,
      "step": 14041
    },
    {
      "epoch": 8.5699462890625e-05,
      "step": 14041,
      "training_step_time": 0.4090914726257324
    },
    {
      "epoch": 8.570556640625e-05,
      "model_forward_time": 0.11511373519897461,
      "step": 14042
    },
    {
      "epoch": 8.570556640625e-05,
      "step": 14042,
      "training_step_time": 0.46297740936279297
    },
    {
      "epoch": 8.5711669921875e-05,
      "model_forward_time": 0.11459040641784668,
      "step": 14043
    },
    {
      "epoch": 8.5711669921875e-05,
      "step": 14043,
      "training_step_time": 0.39220714569091797
    },
    {
      "epoch": 8.57177734375e-05,
      "model_forward_time": 0.11523270606994629,
      "step": 14044
    },
    {
      "epoch": 8.57177734375e-05,
      "step": 14044,
      "training_step_time": 0.3956480026245117
    },
    {
      "epoch": 8.5723876953125e-05,
      "model_forward_time": 0.115234375,
      "step": 14045
    },
    {
      "epoch": 8.5723876953125e-05,
      "step": 14045,
      "training_step_time": 0.3871035575866699
    },
    {
      "epoch": 8.572998046875e-05,
      "model_forward_time": 0.11561083793640137,
      "step": 14046
    },
    {
      "epoch": 8.572998046875e-05,
      "step": 14046,
      "training_step_time": 0.40271592140197754
    },
    {
      "epoch": 8.5736083984375e-05,
      "model_forward_time": 0.11547064781188965,
      "step": 14047
    },
    {
      "epoch": 8.5736083984375e-05,
      "step": 14047,
      "training_step_time": 0.3939642906188965
    },
    {
      "epoch": 8.57421875e-05,
      "model_forward_time": 0.11527872085571289,
      "step": 14048
    },
    {
      "epoch": 8.57421875e-05,
      "step": 14048,
      "training_step_time": 0.39478206634521484
    },
    {
      "epoch": 8.5748291015625e-05,
      "model_forward_time": 0.11592507362365723,
      "step": 14049
    },
    {
      "epoch": 8.5748291015625e-05,
      "step": 14049,
      "training_step_time": 0.5795719623565674
    },
    {
      "epoch": 8.575439453125e-05,
      "grad_norm": 0.17152130603790283,
      "learning_rate": 9.101022147048473e-05,
      "loss": 0.0481,
      "step": 14050
    },
    {
      "epoch": 8.575439453125e-05,
      "model_forward_time": 0.11549973487854004,
      "step": 14050
    },
    {
      "epoch": 8.575439453125e-05,
      "step": 14050,
      "training_step_time": 0.4155294895172119
    },
    {
      "epoch": 8.5760498046875e-05,
      "model_forward_time": 0.11812925338745117,
      "step": 14051
    },
    {
      "epoch": 8.5760498046875e-05,
      "step": 14051,
      "training_step_time": 0.43997693061828613
    },
    {
      "epoch": 8.57666015625e-05,
      "model_forward_time": 0.11689019203186035,
      "step": 14052
    },
    {
      "epoch": 8.57666015625e-05,
      "step": 14052,
      "training_step_time": 0.5918359756469727
    },
    {
      "epoch": 8.5772705078125e-05,
      "model_forward_time": 0.11935830116271973,
      "step": 14053
    },
    {
      "epoch": 8.5772705078125e-05,
      "step": 14053,
      "training_step_time": 0.750154972076416
    },
    {
      "epoch": 8.577880859375e-05,
      "model_forward_time": 0.12196993827819824,
      "step": 14054
    },
    {
      "epoch": 8.577880859375e-05,
      "step": 14054,
      "training_step_time": 0.7622578144073486
    },
    {
      "epoch": 8.5784912109375e-05,
      "model_forward_time": 0.11645054817199707,
      "step": 14055
    },
    {
      "epoch": 8.5784912109375e-05,
      "step": 14055,
      "training_step_time": 0.6094644069671631
    },
    {
      "epoch": 8.5791015625e-05,
      "model_forward_time": 0.12586092948913574,
      "step": 14056
    },
    {
      "epoch": 8.5791015625e-05,
      "step": 14056,
      "training_step_time": 0.6584017276763916
    },
    {
      "epoch": 8.5797119140625e-05,
      "model_forward_time": 0.11987113952636719,
      "step": 14057
    },
    {
      "epoch": 8.5797119140625e-05,
      "step": 14057,
      "training_step_time": 0.6539363861083984
    },
    {
      "epoch": 8.580322265625e-05,
      "model_forward_time": 0.11884021759033203,
      "step": 14058
    },
    {
      "epoch": 8.580322265625e-05,
      "step": 14058,
      "training_step_time": 0.6246020793914795
    },
    {
      "epoch": 8.5809326171875e-05,
      "model_forward_time": 0.11850976943969727,
      "step": 14059
    },
    {
      "epoch": 8.5809326171875e-05,
      "step": 14059,
      "training_step_time": 0.6917824745178223
    },
    {
      "epoch": 8.58154296875e-05,
      "grad_norm": 0.18377256393432617,
      "learning_rate": 9.099445021972473e-05,
      "loss": 0.0567,
      "step": 14060
    },
    {
      "epoch": 8.58154296875e-05,
      "model_forward_time": 0.1162111759185791,
      "step": 14060
    },
    {
      "epoch": 8.58154296875e-05,
      "step": 14060,
      "training_step_time": 0.6283290386199951
    },
    {
      "epoch": 8.5821533203125e-05,
      "model_forward_time": 0.12157177925109863,
      "step": 14061
    },
    {
      "epoch": 8.5821533203125e-05,
      "step": 14061,
      "training_step_time": 0.7571723461151123
    },
    {
      "epoch": 8.582763671875e-05,
      "model_forward_time": 0.11736249923706055,
      "step": 14062
    },
    {
      "epoch": 8.582763671875e-05,
      "step": 14062,
      "training_step_time": 0.6709342002868652
    },
    {
      "epoch": 8.5833740234375e-05,
      "model_forward_time": 0.11906051635742188,
      "step": 14063
    },
    {
      "epoch": 8.5833740234375e-05,
      "step": 14063,
      "training_step_time": 0.7260258197784424
    },
    {
      "epoch": 8.583984375e-05,
      "model_forward_time": 0.12166833877563477,
      "step": 14064
    },
    {
      "epoch": 8.583984375e-05,
      "step": 14064,
      "training_step_time": 0.6969375610351562
    },
    {
      "epoch": 8.5845947265625e-05,
      "model_forward_time": 0.11938071250915527,
      "step": 14065
    },
    {
      "epoch": 8.5845947265625e-05,
      "step": 14065,
      "training_step_time": 0.6923596858978271
    },
    {
      "epoch": 8.585205078125e-05,
      "model_forward_time": 0.11648964881896973,
      "step": 14066
    },
    {
      "epoch": 8.585205078125e-05,
      "step": 14066,
      "training_step_time": 0.6207430362701416
    },
    {
      "epoch": 8.5858154296875e-05,
      "model_forward_time": 0.11677050590515137,
      "step": 14067
    },
    {
      "epoch": 8.5858154296875e-05,
      "step": 14067,
      "training_step_time": 0.824859619140625
    },
    {
      "epoch": 8.58642578125e-05,
      "model_forward_time": 0.11985206604003906,
      "step": 14068
    },
    {
      "epoch": 8.58642578125e-05,
      "step": 14068,
      "training_step_time": 0.70465087890625
    },
    {
      "epoch": 8.5870361328125e-05,
      "model_forward_time": 0.11953496932983398,
      "step": 14069
    },
    {
      "epoch": 8.5870361328125e-05,
      "step": 14069,
      "training_step_time": 0.68959641456604
    },
    {
      "epoch": 8.587646484375e-05,
      "grad_norm": 0.1610356867313385,
      "learning_rate": 9.097866651593317e-05,
      "loss": 0.0634,
      "step": 14070
    },
    {
      "epoch": 8.587646484375e-05,
      "model_forward_time": 0.11725735664367676,
      "step": 14070
    },
    {
      "epoch": 8.587646484375e-05,
      "step": 14070,
      "training_step_time": 0.678156852722168
    },
    {
      "epoch": 8.5882568359375e-05,
      "model_forward_time": 0.12103414535522461,
      "step": 14071
    },
    {
      "epoch": 8.5882568359375e-05,
      "step": 14071,
      "training_step_time": 0.754094123840332
    },
    {
      "epoch": 8.5888671875e-05,
      "model_forward_time": 0.11686205863952637,
      "step": 14072
    },
    {
      "epoch": 8.5888671875e-05,
      "step": 14072,
      "training_step_time": 0.7448444366455078
    },
    {
      "epoch": 8.5894775390625e-05,
      "model_forward_time": 0.11721491813659668,
      "step": 14073
    },
    {
      "epoch": 8.5894775390625e-05,
      "step": 14073,
      "training_step_time": 0.6674268245697021
    },
    {
      "epoch": 8.590087890625e-05,
      "model_forward_time": 0.11785697937011719,
      "step": 14074
    },
    {
      "epoch": 8.590087890625e-05,
      "step": 14074,
      "training_step_time": 0.6497952938079834
    },
    {
      "epoch": 8.5906982421875e-05,
      "model_forward_time": 0.1166539192199707,
      "step": 14075
    },
    {
      "epoch": 8.5906982421875e-05,
      "step": 14075,
      "training_step_time": 0.7037806510925293
    },
    {
      "epoch": 8.59130859375e-05,
      "model_forward_time": 0.11978387832641602,
      "step": 14076
    },
    {
      "epoch": 8.59130859375e-05,
      "step": 14076,
      "training_step_time": 0.6394650936126709
    },
    {
      "epoch": 8.5919189453125e-05,
      "model_forward_time": 0.12568068504333496,
      "step": 14077
    },
    {
      "epoch": 8.5919189453125e-05,
      "step": 14077,
      "training_step_time": 0.6736421585083008
    },
    {
      "epoch": 8.592529296875e-05,
      "model_forward_time": 0.12261724472045898,
      "step": 14078
    },
    {
      "epoch": 8.592529296875e-05,
      "step": 14078,
      "training_step_time": 0.7091751098632812
    },
    {
      "epoch": 8.5931396484375e-05,
      "model_forward_time": 0.11787891387939453,
      "step": 14079
    },
    {
      "epoch": 8.5931396484375e-05,
      "step": 14079,
      "training_step_time": 0.665071964263916
    },
    {
      "epoch": 8.59375e-05,
      "grad_norm": 0.15609776973724365,
      "learning_rate": 9.09628703639047e-05,
      "loss": 0.0628,
      "step": 14080
    },
    {
      "epoch": 8.59375e-05,
      "model_forward_time": 0.12483572959899902,
      "step": 14080
    },
    {
      "epoch": 8.59375e-05,
      "step": 14080,
      "training_step_time": 0.7318518161773682
    },
    {
      "epoch": 8.5943603515625e-05,
      "model_forward_time": 0.11877703666687012,
      "step": 14081
    },
    {
      "epoch": 8.5943603515625e-05,
      "step": 14081,
      "training_step_time": 0.6704180240631104
    },
    {
      "epoch": 8.594970703125e-05,
      "model_forward_time": 0.12025594711303711,
      "step": 14082
    },
    {
      "epoch": 8.594970703125e-05,
      "step": 14082,
      "training_step_time": 0.6271121501922607
    },
    {
      "epoch": 8.5955810546875e-05,
      "model_forward_time": 0.11791634559631348,
      "step": 14083
    },
    {
      "epoch": 8.5955810546875e-05,
      "step": 14083,
      "training_step_time": 0.6931614875793457
    },
    {
      "epoch": 8.59619140625e-05,
      "model_forward_time": 0.12655067443847656,
      "step": 14084
    },
    {
      "epoch": 8.59619140625e-05,
      "step": 14084,
      "training_step_time": 0.728813648223877
    },
    {
      "epoch": 8.5968017578125e-05,
      "model_forward_time": 0.11853218078613281,
      "step": 14085
    },
    {
      "epoch": 8.5968017578125e-05,
      "step": 14085,
      "training_step_time": 0.6509983539581299
    },
    {
      "epoch": 8.597412109375e-05,
      "model_forward_time": 0.11615324020385742,
      "step": 14086
    },
    {
      "epoch": 8.597412109375e-05,
      "step": 14086,
      "training_step_time": 0.6533594131469727
    },
    {
      "epoch": 8.5980224609375e-05,
      "model_forward_time": 0.11654424667358398,
      "step": 14087
    },
    {
      "epoch": 8.5980224609375e-05,
      "step": 14087,
      "training_step_time": 0.7460401058197021
    },
    {
      "epoch": 8.5986328125e-05,
      "model_forward_time": 0.12387943267822266,
      "step": 14088
    },
    {
      "epoch": 8.5986328125e-05,
      "step": 14088,
      "training_step_time": 0.6270594596862793
    },
    {
      "epoch": 8.5992431640625e-05,
      "model_forward_time": 0.12352180480957031,
      "step": 14089
    },
    {
      "epoch": 8.5992431640625e-05,
      "step": 14089,
      "training_step_time": 0.7078530788421631
    },
    {
      "epoch": 8.599853515625e-05,
      "grad_norm": 0.1747036874294281,
      "learning_rate": 9.094706176843777e-05,
      "loss": 0.0608,
      "step": 14090
    },
    {
      "epoch": 8.599853515625e-05,
      "model_forward_time": 0.11971139907836914,
      "step": 14090
    },
    {
      "epoch": 8.599853515625e-05,
      "step": 14090,
      "training_step_time": 0.6900298595428467
    },
    {
      "epoch": 8.6004638671875e-05,
      "model_forward_time": 0.14059090614318848,
      "step": 14091
    },
    {
      "epoch": 8.6004638671875e-05,
      "step": 14091,
      "training_step_time": 0.7219347953796387
    },
    {
      "epoch": 8.60107421875e-05,
      "model_forward_time": 0.11765885353088379,
      "step": 14092
    },
    {
      "epoch": 8.60107421875e-05,
      "step": 14092,
      "training_step_time": 0.7193000316619873
    },
    {
      "epoch": 8.6016845703125e-05,
      "model_forward_time": 0.12061905860900879,
      "step": 14093
    },
    {
      "epoch": 8.6016845703125e-05,
      "step": 14093,
      "training_step_time": 0.684312105178833
    },
    {
      "epoch": 8.602294921875e-05,
      "model_forward_time": 0.1160120964050293,
      "step": 14094
    },
    {
      "epoch": 8.602294921875e-05,
      "step": 14094,
      "training_step_time": 0.7384259700775146
    },
    {
      "epoch": 8.6029052734375e-05,
      "model_forward_time": 0.12368607521057129,
      "step": 14095
    },
    {
      "epoch": 8.6029052734375e-05,
      "step": 14095,
      "training_step_time": 0.7372581958770752
    },
    {
      "epoch": 8.603515625e-05,
      "model_forward_time": 0.1185464859008789,
      "step": 14096
    },
    {
      "epoch": 8.603515625e-05,
      "step": 14096,
      "training_step_time": 0.7104799747467041
    },
    {
      "epoch": 8.6041259765625e-05,
      "model_forward_time": 0.12394905090332031,
      "step": 14097
    },
    {
      "epoch": 8.6041259765625e-05,
      "step": 14097,
      "training_step_time": 0.6852662563323975
    },
    {
      "epoch": 8.604736328125e-05,
      "model_forward_time": 0.12870526313781738,
      "step": 14098
    },
    {
      "epoch": 8.604736328125e-05,
      "step": 14098,
      "training_step_time": 0.7226438522338867
    },
    {
      "epoch": 8.6053466796875e-05,
      "model_forward_time": 0.11859655380249023,
      "step": 14099
    },
    {
      "epoch": 8.6053466796875e-05,
      "step": 14099,
      "training_step_time": 0.6694300174713135
    },
    {
      "epoch": 8.60595703125e-05,
      "grad_norm": 0.16367293894290924,
      "learning_rate": 9.093124073433463e-05,
      "loss": 0.0602,
      "step": 14100
    },
    {
      "epoch": 8.60595703125e-05,
      "model_forward_time": 0.11591958999633789,
      "step": 14100
    },
    {
      "epoch": 8.60595703125e-05,
      "step": 14100,
      "training_step_time": 0.700718879699707
    },
    {
      "epoch": 8.6065673828125e-05,
      "model_forward_time": 0.11867547035217285,
      "step": 14101
    },
    {
      "epoch": 8.6065673828125e-05,
      "step": 14101,
      "training_step_time": 0.6941566467285156
    },
    {
      "epoch": 8.607177734375e-05,
      "model_forward_time": 0.1174917221069336,
      "step": 14102
    },
    {
      "epoch": 8.607177734375e-05,
      "step": 14102,
      "training_step_time": 0.6887702941894531
    },
    {
      "epoch": 8.6077880859375e-05,
      "model_forward_time": 0.11703252792358398,
      "step": 14103
    },
    {
      "epoch": 8.6077880859375e-05,
      "step": 14103,
      "training_step_time": 0.6808254718780518
    },
    {
      "epoch": 8.6083984375e-05,
      "model_forward_time": 0.11933708190917969,
      "step": 14104
    },
    {
      "epoch": 8.6083984375e-05,
      "step": 14104,
      "training_step_time": 0.6751317977905273
    },
    {
      "epoch": 8.6090087890625e-05,
      "model_forward_time": 0.12053322792053223,
      "step": 14105
    },
    {
      "epoch": 8.6090087890625e-05,
      "step": 14105,
      "training_step_time": 0.633237361907959
    },
    {
      "epoch": 8.609619140625e-05,
      "model_forward_time": 0.1192617416381836,
      "step": 14106
    },
    {
      "epoch": 8.609619140625e-05,
      "step": 14106,
      "training_step_time": 0.6662998199462891
    },
    {
      "epoch": 8.6102294921875e-05,
      "model_forward_time": 0.12434840202331543,
      "step": 14107
    },
    {
      "epoch": 8.6102294921875e-05,
      "step": 14107,
      "training_step_time": 0.6339786052703857
    },
    {
      "epoch": 8.61083984375e-05,
      "model_forward_time": 0.1272270679473877,
      "step": 14108
    },
    {
      "epoch": 8.61083984375e-05,
      "step": 14108,
      "training_step_time": 0.7094929218292236
    },
    {
      "epoch": 8.6114501953125e-05,
      "model_forward_time": 0.11961102485656738,
      "step": 14109
    },
    {
      "epoch": 8.6114501953125e-05,
      "step": 14109,
      "training_step_time": 0.7959356307983398
    },
    {
      "epoch": 8.612060546875e-05,
      "grad_norm": 0.14628373086452484,
      "learning_rate": 9.091540726640126e-05,
      "loss": 0.0583,
      "step": 14110
    },
    {
      "epoch": 8.612060546875e-05,
      "model_forward_time": 0.11922693252563477,
      "step": 14110
    },
    {
      "epoch": 8.612060546875e-05,
      "step": 14110,
      "training_step_time": 0.6307241916656494
    },
    {
      "epoch": 8.6126708984375e-05,
      "model_forward_time": 0.11964583396911621,
      "step": 14111
    },
    {
      "epoch": 8.6126708984375e-05,
      "step": 14111,
      "training_step_time": 0.6595475673675537
    },
    {
      "epoch": 8.61328125e-05,
      "model_forward_time": 0.11887741088867188,
      "step": 14112
    },
    {
      "epoch": 8.61328125e-05,
      "step": 14112,
      "training_step_time": 0.7583401203155518
    },
    {
      "epoch": 8.6138916015625e-05,
      "model_forward_time": 0.12293529510498047,
      "step": 14113
    },
    {
      "epoch": 8.6138916015625e-05,
      "step": 14113,
      "training_step_time": 0.6928129196166992
    },
    {
      "epoch": 8.614501953125e-05,
      "model_forward_time": 0.11826348304748535,
      "step": 14114
    },
    {
      "epoch": 8.614501953125e-05,
      "step": 14114,
      "training_step_time": 0.6060824394226074
    },
    {
      "epoch": 8.6151123046875e-05,
      "model_forward_time": 0.1183626651763916,
      "step": 14115
    },
    {
      "epoch": 8.6151123046875e-05,
      "step": 14115,
      "training_step_time": 0.64933180809021
    },
    {
      "epoch": 8.61572265625e-05,
      "model_forward_time": 0.11849117279052734,
      "step": 14116
    },
    {
      "epoch": 8.61572265625e-05,
      "step": 14116,
      "training_step_time": 0.6266496181488037
    },
    {
      "epoch": 8.6163330078125e-05,
      "model_forward_time": 0.12339496612548828,
      "step": 14117
    },
    {
      "epoch": 8.6163330078125e-05,
      "step": 14117,
      "training_step_time": 0.7067244052886963
    },
    {
      "epoch": 8.616943359375e-05,
      "model_forward_time": 0.11971092224121094,
      "step": 14118
    },
    {
      "epoch": 8.616943359375e-05,
      "step": 14118,
      "training_step_time": 0.6682887077331543
    },
    {
      "epoch": 8.6175537109375e-05,
      "model_forward_time": 0.12445521354675293,
      "step": 14119
    },
    {
      "epoch": 8.6175537109375e-05,
      "step": 14119,
      "training_step_time": 0.5962345600128174
    },
    {
      "epoch": 8.6181640625e-05,
      "grad_norm": 0.17298056185245514,
      "learning_rate": 9.089956136944751e-05,
      "loss": 0.0583,
      "step": 14120
    },
    {
      "epoch": 8.6181640625e-05,
      "model_forward_time": 0.11857318878173828,
      "step": 14120
    },
    {
      "epoch": 8.6181640625e-05,
      "step": 14120,
      "training_step_time": 0.5115392208099365
    },
    {
      "epoch": 8.6187744140625e-05,
      "model_forward_time": 0.11873340606689453,
      "step": 14121
    },
    {
      "epoch": 8.6187744140625e-05,
      "step": 14121,
      "training_step_time": 0.5492959022521973
    },
    {
      "epoch": 8.619384765625e-05,
      "model_forward_time": 0.11783361434936523,
      "step": 14122
    },
    {
      "epoch": 8.619384765625e-05,
      "step": 14122,
      "training_step_time": 0.5135304927825928
    },
    {
      "epoch": 8.6199951171875e-05,
      "model_forward_time": 0.11752200126647949,
      "step": 14123
    },
    {
      "epoch": 8.6199951171875e-05,
      "step": 14123,
      "training_step_time": 0.47519707679748535
    },
    {
      "epoch": 8.62060546875e-05,
      "model_forward_time": 0.11692166328430176,
      "step": 14124
    },
    {
      "epoch": 8.62060546875e-05,
      "step": 14124,
      "training_step_time": 0.44387006759643555
    },
    {
      "epoch": 8.6212158203125e-05,
      "model_forward_time": 0.11701726913452148,
      "step": 14125
    },
    {
      "epoch": 8.6212158203125e-05,
      "step": 14125,
      "training_step_time": 0.44609618186950684
    },
    {
      "epoch": 8.621826171875e-05,
      "model_forward_time": 0.11530900001525879,
      "step": 14126
    },
    {
      "epoch": 8.621826171875e-05,
      "step": 14126,
      "training_step_time": 0.4252755641937256
    },
    {
      "epoch": 8.6224365234375e-05,
      "model_forward_time": 0.11606001853942871,
      "step": 14127
    },
    {
      "epoch": 8.6224365234375e-05,
      "step": 14127,
      "training_step_time": 0.4011557102203369
    },
    {
      "epoch": 8.623046875e-05,
      "model_forward_time": 0.11618375778198242,
      "step": 14128
    },
    {
      "epoch": 8.623046875e-05,
      "step": 14128,
      "training_step_time": 0.39934206008911133
    },
    {
      "epoch": 8.6236572265625e-05,
      "model_forward_time": 0.11582565307617188,
      "step": 14129
    },
    {
      "epoch": 8.6236572265625e-05,
      "step": 14129,
      "training_step_time": 0.4029526710510254
    },
    {
      "epoch": 8.624267578125e-05,
      "grad_norm": 0.17734362185001373,
      "learning_rate": 9.088370304828685e-05,
      "loss": 0.0622,
      "step": 14130
    },
    {
      "epoch": 8.624267578125e-05,
      "model_forward_time": 0.11552119255065918,
      "step": 14130
    },
    {
      "epoch": 8.624267578125e-05,
      "step": 14130,
      "training_step_time": 0.43956589698791504
    },
    {
      "epoch": 8.6248779296875e-05,
      "model_forward_time": 0.11537551879882812,
      "step": 14131
    },
    {
      "epoch": 8.6248779296875e-05,
      "step": 14131,
      "training_step_time": 0.46598100662231445
    },
    {
      "epoch": 8.62548828125e-05,
      "model_forward_time": 0.11512327194213867,
      "step": 14132
    },
    {
      "epoch": 8.62548828125e-05,
      "step": 14132,
      "training_step_time": 0.4335367679595947
    },
    {
      "epoch": 8.6260986328125e-05,
      "model_forward_time": 0.11543869972229004,
      "step": 14133
    },
    {
      "epoch": 8.6260986328125e-05,
      "step": 14133,
      "training_step_time": 0.5184006690979004
    },
    {
      "epoch": 8.626708984375e-05,
      "model_forward_time": 0.11502599716186523,
      "step": 14134
    },
    {
      "epoch": 8.626708984375e-05,
      "step": 14134,
      "training_step_time": 0.41030359268188477
    },
    {
      "epoch": 8.6273193359375e-05,
      "model_forward_time": 0.11536931991577148,
      "step": 14135
    },
    {
      "epoch": 8.6273193359375e-05,
      "step": 14135,
      "training_step_time": 0.40062570571899414
    },
    {
      "epoch": 8.6279296875e-05,
      "model_forward_time": 0.11536383628845215,
      "step": 14136
    },
    {
      "epoch": 8.6279296875e-05,
      "step": 14136,
      "training_step_time": 0.38231825828552246
    },
    {
      "epoch": 8.6285400390625e-05,
      "model_forward_time": 0.11576604843139648,
      "step": 14137
    },
    {
      "epoch": 8.6285400390625e-05,
      "step": 14137,
      "training_step_time": 0.4738953113555908
    },
    {
      "epoch": 8.629150390625e-05,
      "model_forward_time": 0.11582565307617188,
      "step": 14138
    },
    {
      "epoch": 8.629150390625e-05,
      "step": 14138,
      "training_step_time": 0.3895716667175293
    },
    {
      "epoch": 8.6297607421875e-05,
      "model_forward_time": 0.11469078063964844,
      "step": 14139
    },
    {
      "epoch": 8.6297607421875e-05,
      "step": 14139,
      "training_step_time": 0.42651915550231934
    },
    {
      "epoch": 8.63037109375e-05,
      "grad_norm": 0.16024376451969147,
      "learning_rate": 9.086783230773672e-05,
      "loss": 0.0653,
      "step": 14140
    },
    {
      "epoch": 8.63037109375e-05,
      "model_forward_time": 0.11437702178955078,
      "step": 14140
    },
    {
      "epoch": 8.63037109375e-05,
      "step": 14140,
      "training_step_time": 0.38225674629211426
    },
    {
      "epoch": 8.6309814453125e-05,
      "model_forward_time": 0.1151125431060791,
      "step": 14141
    },
    {
      "epoch": 8.6309814453125e-05,
      "step": 14141,
      "training_step_time": 0.39733314514160156
    },
    {
      "epoch": 8.631591796875e-05,
      "model_forward_time": 0.11524200439453125,
      "step": 14142
    },
    {
      "epoch": 8.631591796875e-05,
      "step": 14142,
      "training_step_time": 0.3907451629638672
    },
    {
      "epoch": 8.6322021484375e-05,
      "model_forward_time": 0.11469531059265137,
      "step": 14143
    },
    {
      "epoch": 8.6322021484375e-05,
      "step": 14143,
      "training_step_time": 0.4208528995513916
    },
    {
      "epoch": 8.6328125e-05,
      "model_forward_time": 0.1154024600982666,
      "step": 14144
    },
    {
      "epoch": 8.6328125e-05,
      "step": 14144,
      "training_step_time": 0.4845616817474365
    },
    {
      "epoch": 8.6334228515625e-05,
      "model_forward_time": 0.11513781547546387,
      "step": 14145
    },
    {
      "epoch": 8.6334228515625e-05,
      "step": 14145,
      "training_step_time": 0.44966840744018555
    },
    {
      "epoch": 8.634033203125e-05,
      "model_forward_time": 0.11484694480895996,
      "step": 14146
    },
    {
      "epoch": 8.634033203125e-05,
      "step": 14146,
      "training_step_time": 0.42182302474975586
    },
    {
      "epoch": 8.6346435546875e-05,
      "model_forward_time": 0.11463499069213867,
      "step": 14147
    },
    {
      "epoch": 8.6346435546875e-05,
      "step": 14147,
      "training_step_time": 0.3994414806365967
    },
    {
      "epoch": 8.63525390625e-05,
      "model_forward_time": 0.11467528343200684,
      "step": 14148
    },
    {
      "epoch": 8.63525390625e-05,
      "step": 14148,
      "training_step_time": 0.4488866329193115
    },
    {
      "epoch": 8.6358642578125e-05,
      "model_forward_time": 0.11517548561096191,
      "step": 14149
    },
    {
      "epoch": 8.6358642578125e-05,
      "step": 14149,
      "training_step_time": 0.4150235652923584
    },
    {
      "epoch": 8.636474609375e-05,
      "grad_norm": 0.1623186618089676,
      "learning_rate": 9.085194915261818e-05,
      "loss": 0.0569,
      "step": 14150
    },
    {
      "epoch": 8.636474609375e-05,
      "model_forward_time": 0.11545991897583008,
      "step": 14150
    },
    {
      "epoch": 8.636474609375e-05,
      "step": 14150,
      "training_step_time": 0.4372107982635498
    },
    {
      "epoch": 8.6370849609375e-05,
      "model_forward_time": 0.11473917961120605,
      "step": 14151
    },
    {
      "epoch": 8.6370849609375e-05,
      "step": 14151,
      "training_step_time": 0.5087699890136719
    },
    {
      "epoch": 8.6376953125e-05,
      "model_forward_time": 0.1151123046875,
      "step": 14152
    },
    {
      "epoch": 8.6376953125e-05,
      "step": 14152,
      "training_step_time": 0.5031821727752686
    },
    {
      "epoch": 8.6383056640625e-05,
      "model_forward_time": 0.11456155776977539,
      "step": 14153
    },
    {
      "epoch": 8.6383056640625e-05,
      "step": 14153,
      "training_step_time": 0.493854284286499
    },
    {
      "epoch": 8.638916015625e-05,
      "model_forward_time": 0.11427044868469238,
      "step": 14154
    },
    {
      "epoch": 8.638916015625e-05,
      "step": 14154,
      "training_step_time": 0.3883399963378906
    },
    {
      "epoch": 8.6395263671875e-05,
      "model_forward_time": 0.1148538589477539,
      "step": 14155
    },
    {
      "epoch": 8.6395263671875e-05,
      "step": 14155,
      "training_step_time": 0.39669251441955566
    },
    {
      "epoch": 8.64013671875e-05,
      "model_forward_time": 0.11513638496398926,
      "step": 14156
    },
    {
      "epoch": 8.64013671875e-05,
      "step": 14156,
      "training_step_time": 0.36579227447509766
    },
    {
      "epoch": 8.6407470703125e-05,
      "model_forward_time": 0.11463475227355957,
      "step": 14157
    },
    {
      "epoch": 8.6407470703125e-05,
      "step": 14157,
      "training_step_time": 0.4074404239654541
    },
    {
      "epoch": 8.641357421875e-05,
      "model_forward_time": 0.11600780487060547,
      "step": 14158
    },
    {
      "epoch": 8.641357421875e-05,
      "step": 14158,
      "training_step_time": 0.40525054931640625
    },
    {
      "epoch": 8.6419677734375e-05,
      "model_forward_time": 0.11521434783935547,
      "step": 14159
    },
    {
      "epoch": 8.6419677734375e-05,
      "step": 14159,
      "training_step_time": 0.5154538154602051
    },
    {
      "epoch": 8.642578125e-05,
      "grad_norm": 0.1593427062034607,
      "learning_rate": 9.083605358775612e-05,
      "loss": 0.0583,
      "step": 14160
    },
    {
      "epoch": 8.642578125e-05,
      "model_forward_time": 0.11476016044616699,
      "step": 14160
    },
    {
      "epoch": 8.642578125e-05,
      "step": 14160,
      "training_step_time": 0.4046950340270996
    },
    {
      "epoch": 8.6431884765625e-05,
      "model_forward_time": 0.11614370346069336,
      "step": 14161
    },
    {
      "epoch": 8.6431884765625e-05,
      "step": 14161,
      "training_step_time": 0.3949923515319824
    },
    {
      "epoch": 8.643798828125e-05,
      "model_forward_time": 0.11529922485351562,
      "step": 14162
    },
    {
      "epoch": 8.643798828125e-05,
      "step": 14162,
      "training_step_time": 0.39780640602111816
    },
    {
      "epoch": 8.6444091796875e-05,
      "model_forward_time": 0.11470341682434082,
      "step": 14163
    },
    {
      "epoch": 8.6444091796875e-05,
      "step": 14163,
      "training_step_time": 0.399705171585083
    },
    {
      "epoch": 8.64501953125e-05,
      "model_forward_time": 0.11520910263061523,
      "step": 14164
    },
    {
      "epoch": 8.64501953125e-05,
      "step": 14164,
      "training_step_time": 0.39604878425598145
    },
    {
      "epoch": 8.6456298828125e-05,
      "model_forward_time": 0.11597871780395508,
      "step": 14165
    },
    {
      "epoch": 8.6456298828125e-05,
      "step": 14165,
      "training_step_time": 0.5089931488037109
    },
    {
      "epoch": 8.646240234375e-05,
      "model_forward_time": 0.1151120662689209,
      "step": 14166
    },
    {
      "epoch": 8.646240234375e-05,
      "step": 14166,
      "training_step_time": 0.400745153427124
    },
    {
      "epoch": 8.6468505859375e-05,
      "model_forward_time": 0.11551547050476074,
      "step": 14167
    },
    {
      "epoch": 8.6468505859375e-05,
      "step": 14167,
      "training_step_time": 0.4277229309082031
    },
    {
      "epoch": 8.6474609375e-05,
      "model_forward_time": 0.11532187461853027,
      "step": 14168
    },
    {
      "epoch": 8.6474609375e-05,
      "step": 14168,
      "training_step_time": 0.3803389072418213
    },
    {
      "epoch": 8.6480712890625e-05,
      "model_forward_time": 0.11495041847229004,
      "step": 14169
    },
    {
      "epoch": 8.6480712890625e-05,
      "step": 14169,
      "training_step_time": 0.40651535987854004
    },
    {
      "epoch": 8.648681640625e-05,
      "grad_norm": 0.14760629832744598,
      "learning_rate": 9.082014561797918e-05,
      "loss": 0.0615,
      "step": 14170
    },
    {
      "epoch": 8.648681640625e-05,
      "model_forward_time": 0.11513113975524902,
      "step": 14170
    },
    {
      "epoch": 8.648681640625e-05,
      "step": 14170,
      "training_step_time": 0.40412116050720215
    },
    {
      "epoch": 8.6492919921875e-05,
      "model_forward_time": 0.11496114730834961,
      "step": 14171
    },
    {
      "epoch": 8.6492919921875e-05,
      "step": 14171,
      "training_step_time": 0.5148513317108154
    },
    {
      "epoch": 8.64990234375e-05,
      "model_forward_time": 0.11466813087463379,
      "step": 14172
    },
    {
      "epoch": 8.64990234375e-05,
      "step": 14172,
      "training_step_time": 0.4119150638580322
    },
    {
      "epoch": 8.6505126953125e-05,
      "model_forward_time": 0.11472058296203613,
      "step": 14173
    },
    {
      "epoch": 8.6505126953125e-05,
      "step": 14173,
      "training_step_time": 0.4215271472930908
    },
    {
      "epoch": 8.651123046875e-05,
      "model_forward_time": 0.11617445945739746,
      "step": 14174
    },
    {
      "epoch": 8.651123046875e-05,
      "step": 14174,
      "training_step_time": 0.46547412872314453
    },
    {
      "epoch": 8.6517333984375e-05,
      "model_forward_time": 0.11486506462097168,
      "step": 14175
    },
    {
      "epoch": 8.6517333984375e-05,
      "step": 14175,
      "training_step_time": 0.4279134273529053
    },
    {
      "epoch": 8.65234375e-05,
      "model_forward_time": 0.11435723304748535,
      "step": 14176
    },
    {
      "epoch": 8.65234375e-05,
      "step": 14176,
      "training_step_time": 0.4188699722290039
    },
    {
      "epoch": 8.6529541015625e-05,
      "model_forward_time": 0.11461710929870605,
      "step": 14177
    },
    {
      "epoch": 8.6529541015625e-05,
      "step": 14177,
      "training_step_time": 0.3841893672943115
    },
    {
      "epoch": 8.653564453125e-05,
      "model_forward_time": 0.11513352394104004,
      "step": 14178
    },
    {
      "epoch": 8.653564453125e-05,
      "step": 14178,
      "training_step_time": 0.39012932777404785
    },
    {
      "epoch": 8.6541748046875e-05,
      "model_forward_time": 0.11540102958679199,
      "step": 14179
    },
    {
      "epoch": 8.6541748046875e-05,
      "step": 14179,
      "training_step_time": 0.3882756233215332
    },
    {
      "epoch": 8.65478515625e-05,
      "grad_norm": 0.13236820697784424,
      "learning_rate": 9.080422524811982e-05,
      "loss": 0.0584,
      "step": 14180
    },
    {
      "epoch": 8.65478515625e-05,
      "model_forward_time": 0.11585068702697754,
      "step": 14180
    },
    {
      "epoch": 8.65478515625e-05,
      "step": 14180,
      "training_step_time": 0.4018702507019043
    },
    {
      "epoch": 8.6553955078125e-05,
      "model_forward_time": 0.11509466171264648,
      "step": 14181
    },
    {
      "epoch": 8.6553955078125e-05,
      "step": 14181,
      "training_step_time": 0.43827223777770996
    },
    {
      "epoch": 8.656005859375e-05,
      "model_forward_time": 0.11547517776489258,
      "step": 14182
    },
    {
      "epoch": 8.656005859375e-05,
      "step": 14182,
      "training_step_time": 0.4530479907989502
    },
    {
      "epoch": 8.6566162109375e-05,
      "model_forward_time": 0.11527705192565918,
      "step": 14183
    },
    {
      "epoch": 8.6566162109375e-05,
      "step": 14183,
      "training_step_time": 0.3938021659851074
    },
    {
      "epoch": 8.6572265625e-05,
      "model_forward_time": 0.11680793762207031,
      "step": 14184
    },
    {
      "epoch": 8.6572265625e-05,
      "step": 14184,
      "training_step_time": 0.39253711700439453
    },
    {
      "epoch": 8.6578369140625e-05,
      "model_forward_time": 0.1156768798828125,
      "step": 14185
    },
    {
      "epoch": 8.6578369140625e-05,
      "step": 14185,
      "training_step_time": 0.3982100486755371
    },
    {
      "epoch": 8.658447265625e-05,
      "model_forward_time": 0.11542940139770508,
      "step": 14186
    },
    {
      "epoch": 8.658447265625e-05,
      "step": 14186,
      "training_step_time": 0.44696831703186035
    },
    {
      "epoch": 8.6590576171875e-05,
      "model_forward_time": 0.11490225791931152,
      "step": 14187
    },
    {
      "epoch": 8.6590576171875e-05,
      "step": 14187,
      "training_step_time": 0.5006334781646729
    },
    {
      "epoch": 8.65966796875e-05,
      "model_forward_time": 0.11464095115661621,
      "step": 14188
    },
    {
      "epoch": 8.65966796875e-05,
      "step": 14188,
      "training_step_time": 0.4789900779724121
    },
    {
      "epoch": 8.6602783203125e-05,
      "model_forward_time": 0.11516332626342773,
      "step": 14189
    },
    {
      "epoch": 8.6602783203125e-05,
      "step": 14189,
      "training_step_time": 0.4026312828063965
    },
    {
      "epoch": 8.660888671875e-05,
      "grad_norm": 0.22240868210792542,
      "learning_rate": 9.078829248301417e-05,
      "loss": 0.0591,
      "step": 14190
    },
    {
      "epoch": 8.660888671875e-05,
      "model_forward_time": 0.11412286758422852,
      "step": 14190
    },
    {
      "epoch": 8.660888671875e-05,
      "step": 14190,
      "training_step_time": 0.6251814365386963
    },
    {
      "epoch": 8.6614990234375e-05,
      "model_forward_time": 0.11432170867919922,
      "step": 14191
    },
    {
      "epoch": 8.6614990234375e-05,
      "step": 14191,
      "training_step_time": 0.3855857849121094
    },
    {
      "epoch": 8.662109375e-05,
      "model_forward_time": 0.11402273178100586,
      "step": 14192
    },
    {
      "epoch": 8.662109375e-05,
      "step": 14192,
      "training_step_time": 0.3839588165283203
    },
    {
      "epoch": 8.6627197265625e-05,
      "model_forward_time": 0.1149601936340332,
      "step": 14193
    },
    {
      "epoch": 8.6627197265625e-05,
      "step": 14193,
      "training_step_time": 0.39902591705322266
    },
    {
      "epoch": 8.663330078125e-05,
      "model_forward_time": 0.11493325233459473,
      "step": 14194
    },
    {
      "epoch": 8.663330078125e-05,
      "step": 14194,
      "training_step_time": 0.4429931640625
    },
    {
      "epoch": 8.6639404296875e-05,
      "model_forward_time": 0.11502790451049805,
      "step": 14195
    },
    {
      "epoch": 8.6639404296875e-05,
      "step": 14195,
      "training_step_time": 0.48641395568847656
    },
    {
      "epoch": 8.66455078125e-05,
      "model_forward_time": 0.11647534370422363,
      "step": 14196
    },
    {
      "epoch": 8.66455078125e-05,
      "step": 14196,
      "training_step_time": 0.8026142120361328
    },
    {
      "epoch": 8.6651611328125e-05,
      "model_forward_time": 0.11414003372192383,
      "step": 14197
    },
    {
      "epoch": 8.6651611328125e-05,
      "step": 14197,
      "training_step_time": 0.38315725326538086
    },
    {
      "epoch": 8.665771484375e-05,
      "model_forward_time": 0.11431145668029785,
      "step": 14198
    },
    {
      "epoch": 8.665771484375e-05,
      "step": 14198,
      "training_step_time": 0.38047075271606445
    },
    {
      "epoch": 8.6663818359375e-05,
      "model_forward_time": 0.11455965042114258,
      "step": 14199
    },
    {
      "epoch": 8.6663818359375e-05,
      "step": 14199,
      "training_step_time": 0.38953614234924316
    },
    {
      "epoch": 8.6669921875e-05,
      "grad_norm": 0.1783551424741745,
      "learning_rate": 9.077234732750224e-05,
      "loss": 0.0593,
      "step": 14200
    },
    {
      "epoch": 8.6669921875e-05,
      "model_forward_time": 0.11413073539733887,
      "step": 14200
    },
    {
      "epoch": 8.6669921875e-05,
      "step": 14200,
      "training_step_time": 0.45339512825012207
    },
    {
      "epoch": 8.6676025390625e-05,
      "model_forward_time": 0.11446666717529297,
      "step": 14201
    },
    {
      "epoch": 8.6676025390625e-05,
      "step": 14201,
      "training_step_time": 0.4205303192138672
    },
    {
      "epoch": 8.668212890625e-05,
      "model_forward_time": 0.11535406112670898,
      "step": 14202
    },
    {
      "epoch": 8.668212890625e-05,
      "step": 14202,
      "training_step_time": 1.0749902725219727
    },
    {
      "epoch": 8.6688232421875e-05,
      "model_forward_time": 0.1145627498626709,
      "step": 14203
    },
    {
      "epoch": 8.6688232421875e-05,
      "step": 14203,
      "training_step_time": 0.40758395195007324
    },
    {
      "epoch": 8.66943359375e-05,
      "model_forward_time": 0.11409378051757812,
      "step": 14204
    },
    {
      "epoch": 8.66943359375e-05,
      "step": 14204,
      "training_step_time": 0.3809938430786133
    },
    {
      "epoch": 8.6700439453125e-05,
      "model_forward_time": 0.11412692070007324,
      "step": 14205
    },
    {
      "epoch": 8.6700439453125e-05,
      "step": 14205,
      "training_step_time": 0.3877084255218506
    },
    {
      "epoch": 8.670654296875e-05,
      "model_forward_time": 0.11438918113708496,
      "step": 14206
    },
    {
      "epoch": 8.670654296875e-05,
      "step": 14206,
      "training_step_time": 0.36341047286987305
    },
    {
      "epoch": 8.6712646484375e-05,
      "model_forward_time": 0.11476325988769531,
      "step": 14207
    },
    {
      "epoch": 8.6712646484375e-05,
      "step": 14207,
      "training_step_time": 0.42258572578430176
    },
    {
      "epoch": 8.671875e-05,
      "model_forward_time": 0.11517620086669922,
      "step": 14208
    },
    {
      "epoch": 8.671875e-05,
      "step": 14208,
      "training_step_time": 0.605480432510376
    },
    {
      "epoch": 8.6724853515625e-05,
      "model_forward_time": 0.11507081985473633,
      "step": 14209
    },
    {
      "epoch": 8.6724853515625e-05,
      "step": 14209,
      "training_step_time": 0.3932178020477295
    },
    {
      "epoch": 8.673095703125e-05,
      "grad_norm": 0.24811257421970367,
      "learning_rate": 9.075638978642771e-05,
      "loss": 0.0593,
      "step": 14210
    },
    {
      "epoch": 8.673095703125e-05,
      "model_forward_time": 0.11473298072814941,
      "step": 14210
    },
    {
      "epoch": 8.673095703125e-05,
      "step": 14210,
      "training_step_time": 0.3866922855377197
    },
    {
      "epoch": 8.6737060546875e-05,
      "model_forward_time": 0.11459183692932129,
      "step": 14211
    },
    {
      "epoch": 8.6737060546875e-05,
      "step": 14211,
      "training_step_time": 0.38454151153564453
    },
    {
      "epoch": 8.67431640625e-05,
      "model_forward_time": 0.11498069763183594,
      "step": 14212
    },
    {
      "epoch": 8.67431640625e-05,
      "step": 14212,
      "training_step_time": 0.46358370780944824
    },
    {
      "epoch": 8.6749267578125e-05,
      "model_forward_time": 0.11444401741027832,
      "step": 14213
    },
    {
      "epoch": 8.6749267578125e-05,
      "step": 14213,
      "training_step_time": 0.4620506763458252
    },
    {
      "epoch": 8.675537109375e-05,
      "model_forward_time": 0.11534285545349121,
      "step": 14214
    },
    {
      "epoch": 8.675537109375e-05,
      "step": 14214,
      "training_step_time": 0.6957857608795166
    },
    {
      "epoch": 8.6761474609375e-05,
      "model_forward_time": 0.11413908004760742,
      "step": 14215
    },
    {
      "epoch": 8.6761474609375e-05,
      "step": 14215,
      "training_step_time": 0.4133460521697998
    },
    {
      "epoch": 8.6767578125e-05,
      "model_forward_time": 0.11412239074707031,
      "step": 14216
    },
    {
      "epoch": 8.6767578125e-05,
      "step": 14216,
      "training_step_time": 0.42371416091918945
    },
    {
      "epoch": 8.6773681640625e-05,
      "model_forward_time": 0.1151273250579834,
      "step": 14217
    },
    {
      "epoch": 8.6773681640625e-05,
      "step": 14217,
      "training_step_time": 0.37947821617126465
    },
    {
      "epoch": 8.677978515625e-05,
      "model_forward_time": 0.11459231376647949,
      "step": 14218
    },
    {
      "epoch": 8.677978515625e-05,
      "step": 14218,
      "training_step_time": 0.3862793445587158
    },
    {
      "epoch": 8.6785888671875e-05,
      "model_forward_time": 0.11446928977966309,
      "step": 14219
    },
    {
      "epoch": 8.6785888671875e-05,
      "step": 14219,
      "training_step_time": 0.38683557510375977
    },
    {
      "epoch": 8.67919921875e-05,
      "grad_norm": 0.155619278550148,
      "learning_rate": 9.074041986463808e-05,
      "loss": 0.0597,
      "step": 14220
    },
    {
      "epoch": 8.67919921875e-05,
      "model_forward_time": 0.11487483978271484,
      "step": 14220
    },
    {
      "epoch": 8.67919921875e-05,
      "step": 14220,
      "training_step_time": 0.47483277320861816
    },
    {
      "epoch": 8.6798095703125e-05,
      "model_forward_time": 0.11505126953125,
      "step": 14221
    },
    {
      "epoch": 8.6798095703125e-05,
      "step": 14221,
      "training_step_time": 0.43558812141418457
    },
    {
      "epoch": 8.680419921875e-05,
      "model_forward_time": 0.11486363410949707,
      "step": 14222
    },
    {
      "epoch": 8.680419921875e-05,
      "step": 14222,
      "training_step_time": 0.4000403881072998
    },
    {
      "epoch": 8.6810302734375e-05,
      "model_forward_time": 0.11517930030822754,
      "step": 14223
    },
    {
      "epoch": 8.6810302734375e-05,
      "step": 14223,
      "training_step_time": 0.38934946060180664
    },
    {
      "epoch": 8.681640625e-05,
      "model_forward_time": 0.11506462097167969,
      "step": 14224
    },
    {
      "epoch": 8.681640625e-05,
      "step": 14224,
      "training_step_time": 0.3846712112426758
    },
    {
      "epoch": 8.6822509765625e-05,
      "model_forward_time": 0.11512255668640137,
      "step": 14225
    },
    {
      "epoch": 8.6822509765625e-05,
      "step": 14225,
      "training_step_time": 0.3869926929473877
    },
    {
      "epoch": 8.682861328125e-05,
      "model_forward_time": 0.11516785621643066,
      "step": 14226
    },
    {
      "epoch": 8.682861328125e-05,
      "step": 14226,
      "training_step_time": 0.6316442489624023
    },
    {
      "epoch": 8.6834716796875e-05,
      "model_forward_time": 0.11524319648742676,
      "step": 14227
    },
    {
      "epoch": 8.6834716796875e-05,
      "step": 14227,
      "training_step_time": 0.43974924087524414
    },
    {
      "epoch": 8.68408203125e-05,
      "model_forward_time": 0.11441993713378906,
      "step": 14228
    },
    {
      "epoch": 8.68408203125e-05,
      "step": 14228,
      "training_step_time": 0.3935067653656006
    },
    {
      "epoch": 8.6846923828125e-05,
      "model_forward_time": 0.11516642570495605,
      "step": 14229
    },
    {
      "epoch": 8.6846923828125e-05,
      "step": 14229,
      "training_step_time": 0.3917820453643799
    },
    {
      "epoch": 8.685302734375e-05,
      "grad_norm": 0.13824400305747986,
      "learning_rate": 9.072443756698459e-05,
      "loss": 0.0635,
      "step": 14230
    },
    {
      "epoch": 8.685302734375e-05,
      "model_forward_time": 0.11467504501342773,
      "step": 14230
    },
    {
      "epoch": 8.685302734375e-05,
      "step": 14230,
      "training_step_time": 0.4096047878265381
    },
    {
      "epoch": 8.6859130859375e-05,
      "model_forward_time": 0.11403369903564453,
      "step": 14231
    },
    {
      "epoch": 8.6859130859375e-05,
      "step": 14231,
      "training_step_time": 0.4629693031311035
    },
    {
      "epoch": 8.6865234375e-05,
      "model_forward_time": 0.1155242919921875,
      "step": 14232
    },
    {
      "epoch": 8.6865234375e-05,
      "step": 14232,
      "training_step_time": 0.6374380588531494
    },
    {
      "epoch": 8.6871337890625e-05,
      "model_forward_time": 0.11522078514099121,
      "step": 14233
    },
    {
      "epoch": 8.6871337890625e-05,
      "step": 14233,
      "training_step_time": 0.39883947372436523
    },
    {
      "epoch": 8.687744140625e-05,
      "model_forward_time": 0.1147611141204834,
      "step": 14234
    },
    {
      "epoch": 8.687744140625e-05,
      "step": 14234,
      "training_step_time": 0.40595173835754395
    },
    {
      "epoch": 8.6883544921875e-05,
      "model_forward_time": 0.11486268043518066,
      "step": 14235
    },
    {
      "epoch": 8.6883544921875e-05,
      "step": 14235,
      "training_step_time": 0.42749619483947754
    },
    {
      "epoch": 8.68896484375e-05,
      "model_forward_time": 0.11487483978271484,
      "step": 14236
    },
    {
      "epoch": 8.68896484375e-05,
      "step": 14236,
      "training_step_time": 0.4029574394226074
    },
    {
      "epoch": 8.6895751953125e-05,
      "model_forward_time": 0.11449837684631348,
      "step": 14237
    },
    {
      "epoch": 8.6895751953125e-05,
      "step": 14237,
      "training_step_time": 0.3836648464202881
    },
    {
      "epoch": 8.690185546875e-05,
      "model_forward_time": 0.11558651924133301,
      "step": 14238
    },
    {
      "epoch": 8.690185546875e-05,
      "step": 14238,
      "training_step_time": 0.40794968605041504
    },
    {
      "epoch": 8.6907958984375e-05,
      "model_forward_time": 0.11552906036376953,
      "step": 14239
    },
    {
      "epoch": 8.6907958984375e-05,
      "step": 14239,
      "training_step_time": 0.4564826488494873
    },
    {
      "epoch": 8.69140625e-05,
      "grad_norm": 0.1997436285018921,
      "learning_rate": 9.070844289832224e-05,
      "loss": 0.0553,
      "step": 14240
    },
    {
      "epoch": 8.69140625e-05,
      "model_forward_time": 0.11467790603637695,
      "step": 14240
    },
    {
      "epoch": 8.69140625e-05,
      "step": 14240,
      "training_step_time": 0.4736306667327881
    },
    {
      "epoch": 8.6920166015625e-05,
      "model_forward_time": 0.11537623405456543,
      "step": 14241
    },
    {
      "epoch": 8.6920166015625e-05,
      "step": 14241,
      "training_step_time": 0.4579496383666992
    },
    {
      "epoch": 8.692626953125e-05,
      "model_forward_time": 0.11464095115661621,
      "step": 14242
    },
    {
      "epoch": 8.692626953125e-05,
      "step": 14242,
      "training_step_time": 0.3988678455352783
    },
    {
      "epoch": 8.6932373046875e-05,
      "model_forward_time": 0.11479353904724121,
      "step": 14243
    },
    {
      "epoch": 8.6932373046875e-05,
      "step": 14243,
      "training_step_time": 0.40224218368530273
    },
    {
      "epoch": 8.69384765625e-05,
      "model_forward_time": 0.11485719680786133,
      "step": 14244
    },
    {
      "epoch": 8.69384765625e-05,
      "step": 14244,
      "training_step_time": 0.395383358001709
    },
    {
      "epoch": 8.6944580078125e-05,
      "model_forward_time": 0.11517977714538574,
      "step": 14245
    },
    {
      "epoch": 8.6944580078125e-05,
      "step": 14245,
      "training_step_time": 0.42491817474365234
    },
    {
      "epoch": 8.695068359375e-05,
      "model_forward_time": 0.11469244956970215,
      "step": 14246
    },
    {
      "epoch": 8.695068359375e-05,
      "step": 14246,
      "training_step_time": 0.3888437747955322
    },
    {
      "epoch": 8.6956787109375e-05,
      "model_forward_time": 0.11469244956970215,
      "step": 14247
    },
    {
      "epoch": 8.6956787109375e-05,
      "step": 14247,
      "training_step_time": 0.4132423400878906
    },
    {
      "epoch": 8.6962890625e-05,
      "model_forward_time": 0.11529326438903809,
      "step": 14248
    },
    {
      "epoch": 8.6962890625e-05,
      "step": 14248,
      "training_step_time": 0.3897700309753418
    },
    {
      "epoch": 8.6968994140625e-05,
      "model_forward_time": 0.11548542976379395,
      "step": 14249
    },
    {
      "epoch": 8.6968994140625e-05,
      "step": 14249,
      "training_step_time": 0.41559720039367676
    },
    {
      "epoch": 8.697509765625e-05,
      "grad_norm": 0.1615772843360901,
      "learning_rate": 9.069243586350975e-05,
      "loss": 0.0636,
      "step": 14250
    },
    {
      "epoch": 8.697509765625e-05,
      "model_forward_time": 0.11536526679992676,
      "step": 14250
    },
    {
      "epoch": 8.697509765625e-05,
      "step": 14250,
      "training_step_time": 0.6239938735961914
    },
    {
      "epoch": 8.6981201171875e-05,
      "model_forward_time": 0.11572098731994629,
      "step": 14251
    },
    {
      "epoch": 8.6981201171875e-05,
      "step": 14251,
      "training_step_time": 0.39522886276245117
    },
    {
      "epoch": 8.69873046875e-05,
      "model_forward_time": 0.11511731147766113,
      "step": 14252
    },
    {
      "epoch": 8.69873046875e-05,
      "step": 14252,
      "training_step_time": 0.405137300491333
    },
    {
      "epoch": 8.6993408203125e-05,
      "model_forward_time": 0.11438250541687012,
      "step": 14253
    },
    {
      "epoch": 8.6993408203125e-05,
      "step": 14253,
      "training_step_time": 0.4296994209289551
    },
    {
      "epoch": 8.699951171875e-05,
      "model_forward_time": 0.1155250072479248,
      "step": 14254
    },
    {
      "epoch": 8.699951171875e-05,
      "step": 14254,
      "training_step_time": 0.42825746536254883
    },
    {
      "epoch": 8.7005615234375e-05,
      "model_forward_time": 0.11424732208251953,
      "step": 14255
    },
    {
      "epoch": 8.7005615234375e-05,
      "step": 14255,
      "training_step_time": 0.4062812328338623
    },
    {
      "epoch": 8.701171875e-05,
      "model_forward_time": 0.11550307273864746,
      "step": 14256
    },
    {
      "epoch": 8.701171875e-05,
      "step": 14256,
      "training_step_time": 0.48134303092956543
    },
    {
      "epoch": 8.7017822265625e-05,
      "model_forward_time": 0.11504864692687988,
      "step": 14257
    },
    {
      "epoch": 8.7017822265625e-05,
      "step": 14257,
      "training_step_time": 0.4755551815032959
    },
    {
      "epoch": 8.702392578125e-05,
      "model_forward_time": 0.11449527740478516,
      "step": 14258
    },
    {
      "epoch": 8.702392578125e-05,
      "step": 14258,
      "training_step_time": 0.4048128128051758
    },
    {
      "epoch": 8.7030029296875e-05,
      "model_forward_time": 0.11448001861572266,
      "step": 14259
    },
    {
      "epoch": 8.7030029296875e-05,
      "step": 14259,
      "training_step_time": 0.42659831047058105
    },
    {
      "epoch": 8.70361328125e-05,
      "grad_norm": 0.19311057031154633,
      "learning_rate": 9.067641646740968e-05,
      "loss": 0.0605,
      "step": 14260
    },
    {
      "epoch": 8.70361328125e-05,
      "model_forward_time": 0.114776611328125,
      "step": 14260
    },
    {
      "epoch": 8.70361328125e-05,
      "step": 14260,
      "training_step_time": 0.40380382537841797
    },
    {
      "epoch": 8.7042236328125e-05,
      "model_forward_time": 0.11483597755432129,
      "step": 14261
    },
    {
      "epoch": 8.7042236328125e-05,
      "step": 14261,
      "training_step_time": 0.40100693702697754
    },
    {
      "epoch": 8.704833984375e-05,
      "model_forward_time": 0.11455130577087402,
      "step": 14262
    },
    {
      "epoch": 8.704833984375e-05,
      "step": 14262,
      "training_step_time": 0.494215726852417
    },
    {
      "epoch": 8.7054443359375e-05,
      "model_forward_time": 0.11514472961425781,
      "step": 14263
    },
    {
      "epoch": 8.7054443359375e-05,
      "step": 14263,
      "training_step_time": 0.5081441402435303
    },
    {
      "epoch": 8.7060546875e-05,
      "model_forward_time": 0.11605358123779297,
      "step": 14264
    },
    {
      "epoch": 8.7060546875e-05,
      "step": 14264,
      "training_step_time": 0.44501280784606934
    },
    {
      "epoch": 8.7066650390625e-05,
      "model_forward_time": 0.12305855751037598,
      "step": 14265
    },
    {
      "epoch": 8.7066650390625e-05,
      "step": 14265,
      "training_step_time": 0.481095552444458
    },
    {
      "epoch": 8.707275390625e-05,
      "model_forward_time": 0.1148383617401123,
      "step": 14266
    },
    {
      "epoch": 8.707275390625e-05,
      "step": 14266,
      "training_step_time": 0.4351058006286621
    },
    {
      "epoch": 8.7078857421875e-05,
      "model_forward_time": 0.11463308334350586,
      "step": 14267
    },
    {
      "epoch": 8.7078857421875e-05,
      "step": 14267,
      "training_step_time": 0.39873337745666504
    },
    {
      "epoch": 8.70849609375e-05,
      "model_forward_time": 0.1146092414855957,
      "step": 14268
    },
    {
      "epoch": 8.70849609375e-05,
      "step": 14268,
      "training_step_time": 0.4474327564239502
    },
    {
      "epoch": 8.7091064453125e-05,
      "model_forward_time": 0.11509513854980469,
      "step": 14269
    },
    {
      "epoch": 8.7091064453125e-05,
      "step": 14269,
      "training_step_time": 0.3938453197479248
    },
    {
      "epoch": 8.709716796875e-05,
      "grad_norm": 0.14048191905021667,
      "learning_rate": 9.066038471488829e-05,
      "loss": 0.0555,
      "step": 14270
    },
    {
      "epoch": 8.709716796875e-05,
      "model_forward_time": 0.11464357376098633,
      "step": 14270
    },
    {
      "epoch": 8.709716796875e-05,
      "step": 14270,
      "training_step_time": 0.4181094169616699
    },
    {
      "epoch": 8.7103271484375e-05,
      "model_forward_time": 0.11438179016113281,
      "step": 14271
    },
    {
      "epoch": 8.7103271484375e-05,
      "step": 14271,
      "training_step_time": 0.4558708667755127
    },
    {
      "epoch": 8.7109375e-05,
      "model_forward_time": 0.11621546745300293,
      "step": 14272
    },
    {
      "epoch": 8.7109375e-05,
      "step": 14272,
      "training_step_time": 0.42325425148010254
    },
    {
      "epoch": 8.7115478515625e-05,
      "model_forward_time": 0.11444473266601562,
      "step": 14273
    },
    {
      "epoch": 8.7115478515625e-05,
      "step": 14273,
      "training_step_time": 0.4920339584350586
    },
    {
      "epoch": 8.712158203125e-05,
      "model_forward_time": 0.11492085456848145,
      "step": 14274
    },
    {
      "epoch": 8.712158203125e-05,
      "step": 14274,
      "training_step_time": 0.38725781440734863
    },
    {
      "epoch": 8.7127685546875e-05,
      "model_forward_time": 0.11551904678344727,
      "step": 14275
    },
    {
      "epoch": 8.7127685546875e-05,
      "step": 14275,
      "training_step_time": 0.3959689140319824
    },
    {
      "epoch": 8.71337890625e-05,
      "model_forward_time": 0.11504411697387695,
      "step": 14276
    },
    {
      "epoch": 8.71337890625e-05,
      "step": 14276,
      "training_step_time": 0.39411067962646484
    },
    {
      "epoch": 8.7139892578125e-05,
      "model_forward_time": 0.11495542526245117,
      "step": 14277
    },
    {
      "epoch": 8.7139892578125e-05,
      "step": 14277,
      "training_step_time": 0.46565866470336914
    },
    {
      "epoch": 8.714599609375e-05,
      "model_forward_time": 0.1152963638305664,
      "step": 14278
    },
    {
      "epoch": 8.714599609375e-05,
      "step": 14278,
      "training_step_time": 0.4249751567840576
    },
    {
      "epoch": 8.7152099609375e-05,
      "model_forward_time": 0.1149590015411377,
      "step": 14279
    },
    {
      "epoch": 8.7152099609375e-05,
      "step": 14279,
      "training_step_time": 0.4588901996612549
    },
    {
      "epoch": 8.7158203125e-05,
      "grad_norm": 0.22267252206802368,
      "learning_rate": 9.064434061081562e-05,
      "loss": 0.057,
      "step": 14280
    },
    {
      "epoch": 8.7158203125e-05,
      "model_forward_time": 0.11523675918579102,
      "step": 14280
    },
    {
      "epoch": 8.7158203125e-05,
      "step": 14280,
      "training_step_time": 0.39976072311401367
    },
    {
      "epoch": 8.7164306640625e-05,
      "model_forward_time": 0.11505579948425293,
      "step": 14281
    },
    {
      "epoch": 8.7164306640625e-05,
      "step": 14281,
      "training_step_time": 0.39453721046447754
    },
    {
      "epoch": 8.717041015625e-05,
      "model_forward_time": 0.1149454116821289,
      "step": 14282
    },
    {
      "epoch": 8.717041015625e-05,
      "step": 14282,
      "training_step_time": 0.44225525856018066
    },
    {
      "epoch": 8.7176513671875e-05,
      "model_forward_time": 0.11517930030822754,
      "step": 14283
    },
    {
      "epoch": 8.7176513671875e-05,
      "step": 14283,
      "training_step_time": 0.38881349563598633
    },
    {
      "epoch": 8.71826171875e-05,
      "model_forward_time": 0.1151421070098877,
      "step": 14284
    },
    {
      "epoch": 8.71826171875e-05,
      "step": 14284,
      "training_step_time": 0.4466090202331543
    },
    {
      "epoch": 8.7188720703125e-05,
      "model_forward_time": 0.11501502990722656,
      "step": 14285
    },
    {
      "epoch": 8.7188720703125e-05,
      "step": 14285,
      "training_step_time": 0.40178346633911133
    },
    {
      "epoch": 8.719482421875e-05,
      "model_forward_time": 0.11513805389404297,
      "step": 14286
    },
    {
      "epoch": 8.719482421875e-05,
      "step": 14286,
      "training_step_time": 0.4006962776184082
    },
    {
      "epoch": 8.7200927734375e-05,
      "model_forward_time": 0.11513876914978027,
      "step": 14287
    },
    {
      "epoch": 8.7200927734375e-05,
      "step": 14287,
      "training_step_time": 0.3975942134857178
    },
    {
      "epoch": 8.720703125e-05,
      "model_forward_time": 0.11502766609191895,
      "step": 14288
    },
    {
      "epoch": 8.720703125e-05,
      "step": 14288,
      "training_step_time": 0.4277651309967041
    },
    {
      "epoch": 8.7213134765625e-05,
      "model_forward_time": 0.11477398872375488,
      "step": 14289
    },
    {
      "epoch": 8.7213134765625e-05,
      "step": 14289,
      "training_step_time": 0.38729405403137207
    },
    {
      "epoch": 8.721923828125e-05,
      "grad_norm": 0.17031896114349365,
      "learning_rate": 9.062828416006539e-05,
      "loss": 0.0507,
      "step": 14290
    },
    {
      "epoch": 8.721923828125e-05,
      "model_forward_time": 0.11486983299255371,
      "step": 14290
    },
    {
      "epoch": 8.721923828125e-05,
      "step": 14290,
      "training_step_time": 0.3925318717956543
    },
    {
      "epoch": 8.7225341796875e-05,
      "model_forward_time": 0.11508011817932129,
      "step": 14291
    },
    {
      "epoch": 8.7225341796875e-05,
      "step": 14291,
      "training_step_time": 0.40575695037841797
    },
    {
      "epoch": 8.72314453125e-05,
      "model_forward_time": 0.11582636833190918,
      "step": 14292
    },
    {
      "epoch": 8.72314453125e-05,
      "step": 14292,
      "training_step_time": 0.4196484088897705
    },
    {
      "epoch": 8.7237548828125e-05,
      "model_forward_time": 0.11491203308105469,
      "step": 14293
    },
    {
      "epoch": 8.7237548828125e-05,
      "step": 14293,
      "training_step_time": 0.4278700351715088
    },
    {
      "epoch": 8.724365234375e-05,
      "model_forward_time": 0.11548924446105957,
      "step": 14294
    },
    {
      "epoch": 8.724365234375e-05,
      "step": 14294,
      "training_step_time": 0.4203505516052246
    },
    {
      "epoch": 8.7249755859375e-05,
      "model_forward_time": 0.11505389213562012,
      "step": 14295
    },
    {
      "epoch": 8.7249755859375e-05,
      "step": 14295,
      "training_step_time": 0.39008069038391113
    },
    {
      "epoch": 8.7255859375e-05,
      "model_forward_time": 0.11533308029174805,
      "step": 14296
    },
    {
      "epoch": 8.7255859375e-05,
      "step": 14296,
      "training_step_time": 0.3964815139770508
    },
    {
      "epoch": 8.7261962890625e-05,
      "model_forward_time": 0.11565756797790527,
      "step": 14297
    },
    {
      "epoch": 8.7261962890625e-05,
      "step": 14297,
      "training_step_time": 0.42147374153137207
    },
    {
      "epoch": 8.726806640625e-05,
      "model_forward_time": 0.11543798446655273,
      "step": 14298
    },
    {
      "epoch": 8.726806640625e-05,
      "step": 14298,
      "training_step_time": 0.3991055488586426
    },
    {
      "epoch": 8.7274169921875e-05,
      "model_forward_time": 0.11511516571044922,
      "step": 14299
    },
    {
      "epoch": 8.7274169921875e-05,
      "step": 14299,
      "training_step_time": 0.4187135696411133
    },
    {
      "epoch": 8.72802734375e-05,
      "grad_norm": 0.18193477392196655,
      "learning_rate": 9.061221536751517e-05,
      "loss": 0.0556,
      "step": 14300
    },
    {
      "epoch": 8.72802734375e-05,
      "model_forward_time": 0.11575531959533691,
      "step": 14300
    },
    {
      "epoch": 8.72802734375e-05,
      "step": 14300,
      "training_step_time": 0.3875894546508789
    },
    {
      "epoch": 8.7286376953125e-05,
      "model_forward_time": 0.11493277549743652,
      "step": 14301
    },
    {
      "epoch": 8.7286376953125e-05,
      "step": 14301,
      "training_step_time": 0.4888489246368408
    },
    {
      "epoch": 8.729248046875e-05,
      "model_forward_time": 0.11551475524902344,
      "step": 14302
    },
    {
      "epoch": 8.729248046875e-05,
      "step": 14302,
      "training_step_time": 0.40444445610046387
    },
    {
      "epoch": 8.7298583984375e-05,
      "model_forward_time": 0.11504197120666504,
      "step": 14303
    },
    {
      "epoch": 8.7298583984375e-05,
      "step": 14303,
      "training_step_time": 0.4456799030303955
    },
    {
      "epoch": 8.73046875e-05,
      "model_forward_time": 0.1145639419555664,
      "step": 14304
    },
    {
      "epoch": 8.73046875e-05,
      "step": 14304,
      "training_step_time": 0.3955419063568115
    },
    {
      "epoch": 8.7310791015625e-05,
      "model_forward_time": 0.11518192291259766,
      "step": 14305
    },
    {
      "epoch": 8.7310791015625e-05,
      "step": 14305,
      "training_step_time": 0.38990020751953125
    },
    {
      "epoch": 8.731689453125e-05,
      "model_forward_time": 0.11479496955871582,
      "step": 14306
    },
    {
      "epoch": 8.731689453125e-05,
      "step": 14306,
      "training_step_time": 0.39867234230041504
    },
    {
      "epoch": 8.7322998046875e-05,
      "model_forward_time": 0.11571526527404785,
      "step": 14307
    },
    {
      "epoch": 8.7322998046875e-05,
      "step": 14307,
      "training_step_time": 0.3895409107208252
    },
    {
      "epoch": 8.73291015625e-05,
      "model_forward_time": 0.11514163017272949,
      "step": 14308
    },
    {
      "epoch": 8.73291015625e-05,
      "step": 14308,
      "training_step_time": 0.4159219264984131
    },
    {
      "epoch": 8.7335205078125e-05,
      "model_forward_time": 0.11510968208312988,
      "step": 14309
    },
    {
      "epoch": 8.7335205078125e-05,
      "step": 14309,
      "training_step_time": 0.488692045211792
    },
    {
      "epoch": 8.734130859375e-05,
      "grad_norm": 0.2559242248535156,
      "learning_rate": 9.059613423804623e-05,
      "loss": 0.0537,
      "step": 14310
    },
    {
      "epoch": 8.734130859375e-05,
      "model_forward_time": 0.11603713035583496,
      "step": 14310
    },
    {
      "epoch": 8.734130859375e-05,
      "step": 14310,
      "training_step_time": 0.39104557037353516
    },
    {
      "epoch": 8.7347412109375e-05,
      "model_forward_time": 0.11509442329406738,
      "step": 14311
    },
    {
      "epoch": 8.7347412109375e-05,
      "step": 14311,
      "training_step_time": 0.3952455520629883
    },
    {
      "epoch": 8.7353515625e-05,
      "model_forward_time": 0.11579036712646484,
      "step": 14312
    },
    {
      "epoch": 8.7353515625e-05,
      "step": 14312,
      "training_step_time": 0.4126002788543701
    },
    {
      "epoch": 8.7359619140625e-05,
      "model_forward_time": 0.11498403549194336,
      "step": 14313
    },
    {
      "epoch": 8.7359619140625e-05,
      "step": 14313,
      "training_step_time": 0.3867785930633545
    },
    {
      "epoch": 8.736572265625e-05,
      "model_forward_time": 0.1244497299194336,
      "step": 14314
    },
    {
      "epoch": 8.736572265625e-05,
      "step": 14314,
      "training_step_time": 0.41303467750549316
    },
    {
      "epoch": 8.7371826171875e-05,
      "model_forward_time": 0.11517167091369629,
      "step": 14315
    },
    {
      "epoch": 8.7371826171875e-05,
      "step": 14315,
      "training_step_time": 0.3943290710449219
    },
    {
      "epoch": 8.73779296875e-05,
      "model_forward_time": 0.11540603637695312,
      "step": 14316
    },
    {
      "epoch": 8.73779296875e-05,
      "step": 14316,
      "training_step_time": 0.45778417587280273
    },
    {
      "epoch": 8.7384033203125e-05,
      "model_forward_time": 0.11540102958679199,
      "step": 14317
    },
    {
      "epoch": 8.7384033203125e-05,
      "step": 14317,
      "training_step_time": 0.4065079689025879
    },
    {
      "epoch": 8.739013671875e-05,
      "model_forward_time": 0.11583566665649414,
      "step": 14318
    },
    {
      "epoch": 8.739013671875e-05,
      "step": 14318,
      "training_step_time": 0.4264521598815918
    },
    {
      "epoch": 8.7396240234375e-05,
      "model_forward_time": 0.11524653434753418,
      "step": 14319
    },
    {
      "epoch": 8.7396240234375e-05,
      "step": 14319,
      "training_step_time": 0.3859598636627197
    },
    {
      "epoch": 8.740234375e-05,
      "grad_norm": 0.19059795141220093,
      "learning_rate": 9.058004077654359e-05,
      "loss": 0.0633,
      "step": 14320
    },
    {
      "epoch": 8.740234375e-05,
      "model_forward_time": 0.11612772941589355,
      "step": 14320
    },
    {
      "epoch": 8.740234375e-05,
      "step": 14320,
      "training_step_time": 0.3884766101837158
    },
    {
      "epoch": 8.7408447265625e-05,
      "model_forward_time": 0.11519813537597656,
      "step": 14321
    },
    {
      "epoch": 8.7408447265625e-05,
      "step": 14321,
      "training_step_time": 0.383131742477417
    },
    {
      "epoch": 8.741455078125e-05,
      "model_forward_time": 0.11518311500549316,
      "step": 14322
    },
    {
      "epoch": 8.741455078125e-05,
      "step": 14322,
      "training_step_time": 0.40129566192626953
    },
    {
      "epoch": 8.7420654296875e-05,
      "model_forward_time": 0.11554765701293945,
      "step": 14323
    },
    {
      "epoch": 8.7420654296875e-05,
      "step": 14323,
      "training_step_time": 0.41419506072998047
    },
    {
      "epoch": 8.74267578125e-05,
      "model_forward_time": 0.11524486541748047,
      "step": 14324
    },
    {
      "epoch": 8.74267578125e-05,
      "step": 14324,
      "training_step_time": 0.42130041122436523
    },
    {
      "epoch": 8.7432861328125e-05,
      "model_forward_time": 0.11460471153259277,
      "step": 14325
    },
    {
      "epoch": 8.7432861328125e-05,
      "step": 14325,
      "training_step_time": 0.38319873809814453
    },
    {
      "epoch": 8.743896484375e-05,
      "model_forward_time": 0.11575174331665039,
      "step": 14326
    },
    {
      "epoch": 8.743896484375e-05,
      "step": 14326,
      "training_step_time": 0.4040842056274414
    },
    {
      "epoch": 8.7445068359375e-05,
      "model_forward_time": 0.1150670051574707,
      "step": 14327
    },
    {
      "epoch": 8.7445068359375e-05,
      "step": 14327,
      "training_step_time": 0.4590463638305664
    },
    {
      "epoch": 8.7451171875e-05,
      "model_forward_time": 0.11470913887023926,
      "step": 14328
    },
    {
      "epoch": 8.7451171875e-05,
      "step": 14328,
      "training_step_time": 0.5894923210144043
    },
    {
      "epoch": 8.7457275390625e-05,
      "model_forward_time": 0.11559057235717773,
      "step": 14329
    },
    {
      "epoch": 8.7457275390625e-05,
      "step": 14329,
      "training_step_time": 0.4014432430267334
    },
    {
      "epoch": 8.746337890625e-05,
      "grad_norm": 0.22410322725772858,
      "learning_rate": 9.056393498789602e-05,
      "loss": 0.0624,
      "step": 14330
    },
    {
      "epoch": 8.746337890625e-05,
      "model_forward_time": 0.11526846885681152,
      "step": 14330
    },
    {
      "epoch": 8.746337890625e-05,
      "step": 14330,
      "training_step_time": 0.3885023593902588
    },
    {
      "epoch": 8.7469482421875e-05,
      "model_forward_time": 0.11604166030883789,
      "step": 14331
    },
    {
      "epoch": 8.7469482421875e-05,
      "step": 14331,
      "training_step_time": 0.3875420093536377
    },
    {
      "epoch": 8.74755859375e-05,
      "model_forward_time": 0.11541962623596191,
      "step": 14332
    },
    {
      "epoch": 8.74755859375e-05,
      "step": 14332,
      "training_step_time": 0.3906667232513428
    },
    {
      "epoch": 8.7481689453125e-05,
      "model_forward_time": 0.11586403846740723,
      "step": 14333
    },
    {
      "epoch": 8.7481689453125e-05,
      "step": 14333,
      "training_step_time": 0.38399767875671387
    },
    {
      "epoch": 8.748779296875e-05,
      "model_forward_time": 0.11503124237060547,
      "step": 14334
    },
    {
      "epoch": 8.748779296875e-05,
      "step": 14334,
      "training_step_time": 0.615623950958252
    },
    {
      "epoch": 8.7493896484375e-05,
      "model_forward_time": 0.11564373970031738,
      "step": 14335
    },
    {
      "epoch": 8.7493896484375e-05,
      "step": 14335,
      "training_step_time": 0.39226222038269043
    },
    {
      "epoch": 8.75e-05,
      "model_forward_time": 0.11553812026977539,
      "step": 14336
    },
    {
      "epoch": 8.75e-05,
      "step": 14336,
      "training_step_time": 0.366666316986084
    },
    {
      "epoch": 8.7506103515625e-05,
      "model_forward_time": 0.11524343490600586,
      "step": 14337
    },
    {
      "epoch": 8.7506103515625e-05,
      "step": 14337,
      "training_step_time": 0.4338250160217285
    },
    {
      "epoch": 8.751220703125e-05,
      "model_forward_time": 0.1150522232055664,
      "step": 14338
    },
    {
      "epoch": 8.751220703125e-05,
      "step": 14338,
      "training_step_time": 0.5124585628509521
    },
    {
      "epoch": 8.7518310546875e-05,
      "model_forward_time": 0.11538004875183105,
      "step": 14339
    },
    {
      "epoch": 8.7518310546875e-05,
      "step": 14339,
      "training_step_time": 0.3841664791107178
    },
    {
      "epoch": 8.75244140625e-05,
      "grad_norm": 0.20142550766468048,
      "learning_rate": 9.0547816876996e-05,
      "loss": 0.0596,
      "step": 14340
    },
    {
      "epoch": 8.75244140625e-05,
      "model_forward_time": 0.11533784866333008,
      "step": 14340
    },
    {
      "epoch": 8.75244140625e-05,
      "step": 14340,
      "training_step_time": 0.3855109214782715
    },
    {
      "epoch": 8.7530517578125e-05,
      "model_forward_time": 0.11534571647644043,
      "step": 14341
    },
    {
      "epoch": 8.7530517578125e-05,
      "step": 14341,
      "training_step_time": 0.387042760848999
    },
    {
      "epoch": 8.753662109375e-05,
      "model_forward_time": 0.1159512996673584,
      "step": 14342
    },
    {
      "epoch": 8.753662109375e-05,
      "step": 14342,
      "training_step_time": 0.397275447845459
    },
    {
      "epoch": 8.7542724609375e-05,
      "model_forward_time": 0.11559367179870605,
      "step": 14343
    },
    {
      "epoch": 8.7542724609375e-05,
      "step": 14343,
      "training_step_time": 0.4854695796966553
    },
    {
      "epoch": 8.7548828125e-05,
      "model_forward_time": 0.11489200592041016,
      "step": 14344
    },
    {
      "epoch": 8.7548828125e-05,
      "step": 14344,
      "training_step_time": 0.4889488220214844
    },
    {
      "epoch": 8.7554931640625e-05,
      "model_forward_time": 0.11475372314453125,
      "step": 14345
    },
    {
      "epoch": 8.7554931640625e-05,
      "step": 14345,
      "training_step_time": 0.4053223133087158
    },
    {
      "epoch": 8.756103515625e-05,
      "model_forward_time": 0.11536026000976562,
      "step": 14346
    },
    {
      "epoch": 8.756103515625e-05,
      "step": 14346,
      "training_step_time": 0.5072662830352783
    },
    {
      "epoch": 8.7567138671875e-05,
      "model_forward_time": 0.11510872840881348,
      "step": 14347
    },
    {
      "epoch": 8.7567138671875e-05,
      "step": 14347,
      "training_step_time": 0.4246680736541748
    },
    {
      "epoch": 8.75732421875e-05,
      "model_forward_time": 0.11482429504394531,
      "step": 14348
    },
    {
      "epoch": 8.75732421875e-05,
      "step": 14348,
      "training_step_time": 0.38477396965026855
    },
    {
      "epoch": 8.7579345703125e-05,
      "model_forward_time": 0.11552214622497559,
      "step": 14349
    },
    {
      "epoch": 8.7579345703125e-05,
      "step": 14349,
      "training_step_time": 0.39520955085754395
    },
    {
      "epoch": 8.758544921875e-05,
      "grad_norm": 0.18556715548038483,
      "learning_rate": 9.053168644873984e-05,
      "loss": 0.056,
      "step": 14350
    },
    {
      "epoch": 8.758544921875e-05,
      "model_forward_time": 0.11542510986328125,
      "step": 14350
    },
    {
      "epoch": 8.758544921875e-05,
      "step": 14350,
      "training_step_time": 0.38607144355773926
    },
    {
      "epoch": 8.7591552734375e-05,
      "model_forward_time": 0.11562132835388184,
      "step": 14351
    },
    {
      "epoch": 8.7591552734375e-05,
      "step": 14351,
      "training_step_time": 0.4206557273864746
    },
    {
      "epoch": 8.759765625e-05,
      "model_forward_time": 0.11535978317260742,
      "step": 14352
    },
    {
      "epoch": 8.759765625e-05,
      "step": 14352,
      "training_step_time": 0.5858080387115479
    },
    {
      "epoch": 8.7603759765625e-05,
      "model_forward_time": 0.11590242385864258,
      "step": 14353
    },
    {
      "epoch": 8.7603759765625e-05,
      "step": 14353,
      "training_step_time": 0.40375494956970215
    },
    {
      "epoch": 8.760986328125e-05,
      "model_forward_time": 0.11591267585754395,
      "step": 14354
    },
    {
      "epoch": 8.760986328125e-05,
      "step": 14354,
      "training_step_time": 0.39311814308166504
    },
    {
      "epoch": 8.7615966796875e-05,
      "model_forward_time": 0.11469125747680664,
      "step": 14355
    },
    {
      "epoch": 8.7615966796875e-05,
      "step": 14355,
      "training_step_time": 0.4125325679779053
    },
    {
      "epoch": 8.76220703125e-05,
      "model_forward_time": 0.11522793769836426,
      "step": 14356
    },
    {
      "epoch": 8.76220703125e-05,
      "step": 14356,
      "training_step_time": 0.408994197845459
    },
    {
      "epoch": 8.7628173828125e-05,
      "model_forward_time": 0.11546945571899414,
      "step": 14357
    },
    {
      "epoch": 8.7628173828125e-05,
      "step": 14357,
      "training_step_time": 0.39171719551086426
    },
    {
      "epoch": 8.763427734375e-05,
      "model_forward_time": 0.11509156227111816,
      "step": 14358
    },
    {
      "epoch": 8.763427734375e-05,
      "step": 14358,
      "training_step_time": 0.6220877170562744
    },
    {
      "epoch": 8.7640380859375e-05,
      "model_forward_time": 0.11524248123168945,
      "step": 14359
    },
    {
      "epoch": 8.7640380859375e-05,
      "step": 14359,
      "training_step_time": 0.3922395706176758
    },
    {
      "epoch": 8.7646484375e-05,
      "grad_norm": 0.15943175554275513,
      "learning_rate": 9.05155437080275e-05,
      "loss": 0.0622,
      "step": 14360
    },
    {
      "epoch": 8.7646484375e-05,
      "model_forward_time": 0.11469817161560059,
      "step": 14360
    },
    {
      "epoch": 8.7646484375e-05,
      "step": 14360,
      "training_step_time": 0.43532490730285645
    },
    {
      "epoch": 8.7652587890625e-05,
      "model_forward_time": 0.11659765243530273,
      "step": 14361
    },
    {
      "epoch": 8.7652587890625e-05,
      "step": 14361,
      "training_step_time": 0.41102147102355957
    },
    {
      "epoch": 8.765869140625e-05,
      "model_forward_time": 0.11498785018920898,
      "step": 14362
    },
    {
      "epoch": 8.765869140625e-05,
      "step": 14362,
      "training_step_time": 0.38507699966430664
    },
    {
      "epoch": 8.7664794921875e-05,
      "model_forward_time": 0.11588859558105469,
      "step": 14363
    },
    {
      "epoch": 8.7664794921875e-05,
      "step": 14363,
      "training_step_time": 0.3870208263397217
    },
    {
      "epoch": 8.76708984375e-05,
      "model_forward_time": 0.11586618423461914,
      "step": 14364
    },
    {
      "epoch": 8.76708984375e-05,
      "step": 14364,
      "training_step_time": 0.5833926200866699
    },
    {
      "epoch": 8.7677001953125e-05,
      "model_forward_time": 0.11521339416503906,
      "step": 14365
    },
    {
      "epoch": 8.7677001953125e-05,
      "step": 14365,
      "training_step_time": 0.48885631561279297
    },
    {
      "epoch": 8.768310546875e-05,
      "model_forward_time": 0.1151120662689209,
      "step": 14366
    },
    {
      "epoch": 8.768310546875e-05,
      "step": 14366,
      "training_step_time": 0.5278754234313965
    },
    {
      "epoch": 8.7689208984375e-05,
      "model_forward_time": 0.11554455757141113,
      "step": 14367
    },
    {
      "epoch": 8.7689208984375e-05,
      "step": 14367,
      "training_step_time": 0.4068174362182617
    },
    {
      "epoch": 8.76953125e-05,
      "model_forward_time": 0.11521482467651367,
      "step": 14368
    },
    {
      "epoch": 8.76953125e-05,
      "step": 14368,
      "training_step_time": 0.383941650390625
    },
    {
      "epoch": 8.7701416015625e-05,
      "model_forward_time": 0.11445808410644531,
      "step": 14369
    },
    {
      "epoch": 8.7701416015625e-05,
      "step": 14369,
      "training_step_time": 0.46663641929626465
    },
    {
      "epoch": 8.770751953125e-05,
      "grad_norm": 0.2010451853275299,
      "learning_rate": 9.049938865976275e-05,
      "loss": 0.0607,
      "step": 14370
    },
    {
      "epoch": 8.770751953125e-05,
      "model_forward_time": 0.11529040336608887,
      "step": 14370
    },
    {
      "epoch": 8.770751953125e-05,
      "step": 14370,
      "training_step_time": 0.42114734649658203
    },
    {
      "epoch": 8.7713623046875e-05,
      "model_forward_time": 0.11542534828186035,
      "step": 14371
    },
    {
      "epoch": 8.7713623046875e-05,
      "step": 14371,
      "training_step_time": 0.38939380645751953
    },
    {
      "epoch": 8.77197265625e-05,
      "model_forward_time": 0.11541962623596191,
      "step": 14372
    },
    {
      "epoch": 8.77197265625e-05,
      "step": 14372,
      "training_step_time": 0.39338254928588867
    },
    {
      "epoch": 8.7725830078125e-05,
      "model_forward_time": 0.11496233940124512,
      "step": 14373
    },
    {
      "epoch": 8.7725830078125e-05,
      "step": 14373,
      "training_step_time": 0.4069845676422119
    },
    {
      "epoch": 8.773193359375e-05,
      "model_forward_time": 0.11524415016174316,
      "step": 14374
    },
    {
      "epoch": 8.773193359375e-05,
      "step": 14374,
      "training_step_time": 0.449962854385376
    },
    {
      "epoch": 8.7738037109375e-05,
      "model_forward_time": 0.1152791976928711,
      "step": 14375
    },
    {
      "epoch": 8.7738037109375e-05,
      "step": 14375,
      "training_step_time": 0.3900926113128662
    },
    {
      "epoch": 8.7744140625e-05,
      "model_forward_time": 0.1151723861694336,
      "step": 14376
    },
    {
      "epoch": 8.7744140625e-05,
      "step": 14376,
      "training_step_time": 0.5659687519073486
    },
    {
      "epoch": 8.7750244140625e-05,
      "model_forward_time": 0.11485624313354492,
      "step": 14377
    },
    {
      "epoch": 8.7750244140625e-05,
      "step": 14377,
      "training_step_time": 0.3907473087310791
    },
    {
      "epoch": 8.775634765625e-05,
      "model_forward_time": 0.11533498764038086,
      "step": 14378
    },
    {
      "epoch": 8.775634765625e-05,
      "step": 14378,
      "training_step_time": 0.3900778293609619
    },
    {
      "epoch": 8.7762451171875e-05,
      "model_forward_time": 0.11489510536193848,
      "step": 14379
    },
    {
      "epoch": 8.7762451171875e-05,
      "step": 14379,
      "training_step_time": 0.36898303031921387
    },
    {
      "epoch": 8.77685546875e-05,
      "grad_norm": 0.14760036766529083,
      "learning_rate": 9.048322130885305e-05,
      "loss": 0.0603,
      "step": 14380
    },
    {
      "epoch": 8.77685546875e-05,
      "model_forward_time": 0.11519742012023926,
      "step": 14380
    },
    {
      "epoch": 8.77685546875e-05,
      "step": 14380,
      "training_step_time": 0.4605283737182617
    },
    {
      "epoch": 8.7774658203125e-05,
      "model_forward_time": 0.11488866806030273,
      "step": 14381
    },
    {
      "epoch": 8.7774658203125e-05,
      "step": 14381,
      "training_step_time": 0.41623616218566895
    },
    {
      "epoch": 8.778076171875e-05,
      "model_forward_time": 0.11552691459655762,
      "step": 14382
    },
    {
      "epoch": 8.778076171875e-05,
      "step": 14382,
      "training_step_time": 0.5106382369995117
    },
    {
      "epoch": 8.7786865234375e-05,
      "model_forward_time": 0.1148519515991211,
      "step": 14383
    },
    {
      "epoch": 8.7786865234375e-05,
      "step": 14383,
      "training_step_time": 0.45490360260009766
    },
    {
      "epoch": 8.779296875e-05,
      "model_forward_time": 0.11461186408996582,
      "step": 14384
    },
    {
      "epoch": 8.779296875e-05,
      "step": 14384,
      "training_step_time": 0.39395570755004883
    },
    {
      "epoch": 8.7799072265625e-05,
      "model_forward_time": 0.1155393123626709,
      "step": 14385
    },
    {
      "epoch": 8.7799072265625e-05,
      "step": 14385,
      "training_step_time": 0.39594197273254395
    },
    {
      "epoch": 8.780517578125e-05,
      "model_forward_time": 0.11567258834838867,
      "step": 14386
    },
    {
      "epoch": 8.780517578125e-05,
      "step": 14386,
      "training_step_time": 0.4640791416168213
    },
    {
      "epoch": 8.7811279296875e-05,
      "model_forward_time": 0.11511111259460449,
      "step": 14387
    },
    {
      "epoch": 8.7811279296875e-05,
      "step": 14387,
      "training_step_time": 0.4241957664489746
    },
    {
      "epoch": 8.78173828125e-05,
      "model_forward_time": 0.11608648300170898,
      "step": 14388
    },
    {
      "epoch": 8.78173828125e-05,
      "step": 14388,
      "training_step_time": 0.5665543079376221
    },
    {
      "epoch": 8.7823486328125e-05,
      "model_forward_time": 0.11493945121765137,
      "step": 14389
    },
    {
      "epoch": 8.7823486328125e-05,
      "step": 14389,
      "training_step_time": 0.4045982360839844
    },
    {
      "epoch": 8.782958984375e-05,
      "grad_norm": 0.1444140523672104,
      "learning_rate": 9.046704166020961e-05,
      "loss": 0.0549,
      "step": 14390
    },
    {
      "epoch": 8.782958984375e-05,
      "model_forward_time": 0.11538100242614746,
      "step": 14390
    },
    {
      "epoch": 8.782958984375e-05,
      "step": 14390,
      "training_step_time": 0.39641523361206055
    },
    {
      "epoch": 8.7835693359375e-05,
      "model_forward_time": 0.1150217056274414,
      "step": 14391
    },
    {
      "epoch": 8.7835693359375e-05,
      "step": 14391,
      "training_step_time": 0.39374232292175293
    },
    {
      "epoch": 8.7841796875e-05,
      "model_forward_time": 0.1152958869934082,
      "step": 14392
    },
    {
      "epoch": 8.7841796875e-05,
      "step": 14392,
      "training_step_time": 0.38806843757629395
    },
    {
      "epoch": 8.7847900390625e-05,
      "model_forward_time": 0.11514163017272949,
      "step": 14393
    },
    {
      "epoch": 8.7847900390625e-05,
      "step": 14393,
      "training_step_time": 0.385514497756958
    },
    {
      "epoch": 8.785400390625e-05,
      "model_forward_time": 0.11720943450927734,
      "step": 14394
    },
    {
      "epoch": 8.785400390625e-05,
      "step": 14394,
      "training_step_time": 0.5810487270355225
    },
    {
      "epoch": 8.7860107421875e-05,
      "model_forward_time": 0.11507654190063477,
      "step": 14395
    },
    {
      "epoch": 8.7860107421875e-05,
      "step": 14395,
      "training_step_time": 0.44612908363342285
    },
    {
      "epoch": 8.78662109375e-05,
      "model_forward_time": 0.1153860092163086,
      "step": 14396
    },
    {
      "epoch": 8.78662109375e-05,
      "step": 14396,
      "training_step_time": 0.38617897033691406
    },
    {
      "epoch": 8.7872314453125e-05,
      "model_forward_time": 0.11503362655639648,
      "step": 14397
    },
    {
      "epoch": 8.7872314453125e-05,
      "step": 14397,
      "training_step_time": 0.46608400344848633
    },
    {
      "epoch": 8.787841796875e-05,
      "model_forward_time": 0.11542844772338867,
      "step": 14398
    },
    {
      "epoch": 8.787841796875e-05,
      "step": 14398,
      "training_step_time": 0.4340474605560303
    },
    {
      "epoch": 8.7884521484375e-05,
      "model_forward_time": 0.11464762687683105,
      "step": 14399
    },
    {
      "epoch": 8.7884521484375e-05,
      "step": 14399,
      "training_step_time": 0.4189474582672119
    },
    {
      "epoch": 8.7890625e-05,
      "grad_norm": 0.17582498490810394,
      "learning_rate": 9.045084971874738e-05,
      "loss": 0.0587,
      "step": 14400
    },
    {
      "epoch": 8.7890625e-05,
      "model_forward_time": 0.11621379852294922,
      "step": 14400
    },
    {
      "epoch": 8.7890625e-05,
      "step": 14400,
      "training_step_time": 0.4224262237548828
    },
    {
      "epoch": 8.7896728515625e-05,
      "model_forward_time": 0.11529684066772461,
      "step": 14401
    },
    {
      "epoch": 8.7896728515625e-05,
      "step": 14401,
      "training_step_time": 0.39948034286499023
    },
    {
      "epoch": 8.790283203125e-05,
      "model_forward_time": 0.11593794822692871,
      "step": 14402
    },
    {
      "epoch": 8.790283203125e-05,
      "step": 14402,
      "training_step_time": 0.46556878089904785
    },
    {
      "epoch": 8.7908935546875e-05,
      "model_forward_time": 0.11492204666137695,
      "step": 14403
    },
    {
      "epoch": 8.7908935546875e-05,
      "step": 14403,
      "training_step_time": 0.3906574249267578
    },
    {
      "epoch": 8.79150390625e-05,
      "model_forward_time": 0.11562204360961914,
      "step": 14404
    },
    {
      "epoch": 8.79150390625e-05,
      "step": 14404,
      "training_step_time": 0.39424705505371094
    },
    {
      "epoch": 8.7921142578125e-05,
      "model_forward_time": 0.1152348518371582,
      "step": 14405
    },
    {
      "epoch": 8.7921142578125e-05,
      "step": 14405,
      "training_step_time": 0.39527463912963867
    },
    {
      "epoch": 8.792724609375e-05,
      "model_forward_time": 0.11550641059875488,
      "step": 14406
    },
    {
      "epoch": 8.792724609375e-05,
      "step": 14406,
      "training_step_time": 0.6170241832733154
    },
    {
      "epoch": 8.7933349609375e-05,
      "model_forward_time": 0.1156468391418457,
      "step": 14407
    },
    {
      "epoch": 8.7933349609375e-05,
      "step": 14407,
      "training_step_time": 0.3642303943634033
    },
    {
      "epoch": 8.7939453125e-05,
      "model_forward_time": 0.11599135398864746,
      "step": 14408
    },
    {
      "epoch": 8.7939453125e-05,
      "step": 14408,
      "training_step_time": 0.4307239055633545
    },
    {
      "epoch": 8.7945556640625e-05,
      "model_forward_time": 0.11493968963623047,
      "step": 14409
    },
    {
      "epoch": 8.7945556640625e-05,
      "step": 14409,
      "training_step_time": 0.4763636589050293
    },
    {
      "epoch": 8.795166015625e-05,
      "grad_norm": 0.1496574580669403,
      "learning_rate": 9.043464548938506e-05,
      "loss": 0.0571,
      "step": 14410
    },
    {
      "epoch": 8.795166015625e-05,
      "model_forward_time": 0.11494827270507812,
      "step": 14410
    },
    {
      "epoch": 8.795166015625e-05,
      "step": 14410,
      "training_step_time": 0.4038574695587158
    },
    {
      "epoch": 8.7957763671875e-05,
      "model_forward_time": 0.11452054977416992,
      "step": 14411
    },
    {
      "epoch": 8.7957763671875e-05,
      "step": 14411,
      "training_step_time": 0.4589986801147461
    },
    {
      "epoch": 8.79638671875e-05,
      "model_forward_time": 0.11494898796081543,
      "step": 14412
    },
    {
      "epoch": 8.79638671875e-05,
      "step": 14412,
      "training_step_time": 0.4256317615509033
    },
    {
      "epoch": 8.7969970703125e-05,
      "model_forward_time": 0.1146543025970459,
      "step": 14413
    },
    {
      "epoch": 8.7969970703125e-05,
      "step": 14413,
      "training_step_time": 0.38622355461120605
    },
    {
      "epoch": 8.797607421875e-05,
      "model_forward_time": 0.11524248123168945,
      "step": 14414
    },
    {
      "epoch": 8.797607421875e-05,
      "step": 14414,
      "training_step_time": 0.4661552906036377
    },
    {
      "epoch": 8.7982177734375e-05,
      "model_forward_time": 0.11456060409545898,
      "step": 14415
    },
    {
      "epoch": 8.7982177734375e-05,
      "step": 14415,
      "training_step_time": 0.39589762687683105
    },
    {
      "epoch": 8.798828125e-05,
      "model_forward_time": 0.11512231826782227,
      "step": 14416
    },
    {
      "epoch": 8.798828125e-05,
      "step": 14416,
      "training_step_time": 0.44177985191345215
    },
    {
      "epoch": 8.7994384765625e-05,
      "model_forward_time": 0.11569833755493164,
      "step": 14417
    },
    {
      "epoch": 8.7994384765625e-05,
      "step": 14417,
      "training_step_time": 0.39193058013916016
    },
    {
      "epoch": 8.800048828125e-05,
      "model_forward_time": 0.11582207679748535,
      "step": 14418
    },
    {
      "epoch": 8.800048828125e-05,
      "step": 14418,
      "training_step_time": 0.42369508743286133
    },
    {
      "epoch": 8.8006591796875e-05,
      "model_forward_time": 0.11485505104064941,
      "step": 14419
    },
    {
      "epoch": 8.8006591796875e-05,
      "step": 14419,
      "training_step_time": 0.39105653762817383
    },
    {
      "epoch": 8.80126953125e-05,
      "grad_norm": 0.24709436297416687,
      "learning_rate": 9.041842897704502e-05,
      "loss": 0.0557,
      "step": 14420
    },
    {
      "epoch": 8.80126953125e-05,
      "model_forward_time": 0.11555171012878418,
      "step": 14420
    },
    {
      "epoch": 8.80126953125e-05,
      "step": 14420,
      "training_step_time": 0.38452863693237305
    },
    {
      "epoch": 8.8018798828125e-05,
      "model_forward_time": 0.11588835716247559,
      "step": 14421
    },
    {
      "epoch": 8.8018798828125e-05,
      "step": 14421,
      "training_step_time": 0.38787364959716797
    },
    {
      "epoch": 8.802490234375e-05,
      "model_forward_time": 0.11501312255859375,
      "step": 14422
    },
    {
      "epoch": 8.802490234375e-05,
      "step": 14422,
      "training_step_time": 0.4664130210876465
    },
    {
      "epoch": 8.8031005859375e-05,
      "model_forward_time": 0.11560416221618652,
      "step": 14423
    },
    {
      "epoch": 8.8031005859375e-05,
      "step": 14423,
      "training_step_time": 0.4689044952392578
    },
    {
      "epoch": 8.8037109375e-05,
      "model_forward_time": 0.11527824401855469,
      "step": 14424
    },
    {
      "epoch": 8.8037109375e-05,
      "step": 14424,
      "training_step_time": 0.5056507587432861
    },
    {
      "epoch": 8.8043212890625e-05,
      "model_forward_time": 0.11527729034423828,
      "step": 14425
    },
    {
      "epoch": 8.8043212890625e-05,
      "step": 14425,
      "training_step_time": 0.4046359062194824
    },
    {
      "epoch": 8.804931640625e-05,
      "model_forward_time": 0.11557817459106445,
      "step": 14426
    },
    {
      "epoch": 8.804931640625e-05,
      "step": 14426,
      "training_step_time": 0.4228382110595703
    },
    {
      "epoch": 8.8055419921875e-05,
      "model_forward_time": 0.11522030830383301,
      "step": 14427
    },
    {
      "epoch": 8.8055419921875e-05,
      "step": 14427,
      "training_step_time": 0.4097483158111572
    },
    {
      "epoch": 8.80615234375e-05,
      "model_forward_time": 0.1152656078338623,
      "step": 14428
    },
    {
      "epoch": 8.80615234375e-05,
      "step": 14428,
      "training_step_time": 0.48541688919067383
    },
    {
      "epoch": 8.8067626953125e-05,
      "model_forward_time": 0.11542868614196777,
      "step": 14429
    },
    {
      "epoch": 8.8067626953125e-05,
      "step": 14429,
      "training_step_time": 0.38802123069763184
    },
    {
      "epoch": 8.807373046875e-05,
      "grad_norm": 0.18627013266086578,
      "learning_rate": 9.040220018665347e-05,
      "loss": 0.0511,
      "step": 14430
    },
    {
      "epoch": 8.807373046875e-05,
      "model_forward_time": 0.11585783958435059,
      "step": 14430
    },
    {
      "epoch": 8.807373046875e-05,
      "step": 14430,
      "training_step_time": 0.49521350860595703
    },
    {
      "epoch": 8.8079833984375e-05,
      "model_forward_time": 0.11509156227111816,
      "step": 14431
    },
    {
      "epoch": 8.8079833984375e-05,
      "step": 14431,
      "training_step_time": 0.3975391387939453
    },
    {
      "epoch": 8.80859375e-05,
      "model_forward_time": 0.11565518379211426,
      "step": 14432
    },
    {
      "epoch": 8.80859375e-05,
      "step": 14432,
      "training_step_time": 0.3827667236328125
    },
    {
      "epoch": 8.8092041015625e-05,
      "model_forward_time": 0.11560678482055664,
      "step": 14433
    },
    {
      "epoch": 8.8092041015625e-05,
      "step": 14433,
      "training_step_time": 0.39108967781066895
    },
    {
      "epoch": 8.809814453125e-05,
      "model_forward_time": 0.11521697044372559,
      "step": 14434
    },
    {
      "epoch": 8.809814453125e-05,
      "step": 14434,
      "training_step_time": 0.3844940662384033
    },
    {
      "epoch": 8.8104248046875e-05,
      "model_forward_time": 0.11551547050476074,
      "step": 14435
    },
    {
      "epoch": 8.8104248046875e-05,
      "step": 14435,
      "training_step_time": 0.3945047855377197
    },
    {
      "epoch": 8.81103515625e-05,
      "model_forward_time": 0.11515450477600098,
      "step": 14436
    },
    {
      "epoch": 8.81103515625e-05,
      "step": 14436,
      "training_step_time": 0.6345076560974121
    },
    {
      "epoch": 8.8116455078125e-05,
      "model_forward_time": 0.1151888370513916,
      "step": 14437
    },
    {
      "epoch": 8.8116455078125e-05,
      "step": 14437,
      "training_step_time": 0.38486337661743164
    },
    {
      "epoch": 8.812255859375e-05,
      "model_forward_time": 0.11507654190063477,
      "step": 14438
    },
    {
      "epoch": 8.812255859375e-05,
      "step": 14438,
      "training_step_time": 0.46854329109191895
    },
    {
      "epoch": 8.8128662109375e-05,
      "model_forward_time": 0.11459469795227051,
      "step": 14439
    },
    {
      "epoch": 8.8128662109375e-05,
      "step": 14439,
      "training_step_time": 0.4661426544189453
    },
    {
      "epoch": 8.8134765625e-05,
      "grad_norm": 0.2251816689968109,
      "learning_rate": 9.038595912314027e-05,
      "loss": 0.0509,
      "step": 14440
    },
    {
      "epoch": 8.8134765625e-05,
      "model_forward_time": 0.11461114883422852,
      "step": 14440
    },
    {
      "epoch": 8.8134765625e-05,
      "step": 14440,
      "training_step_time": 0.40030670166015625
    },
    {
      "epoch": 8.8140869140625e-05,
      "model_forward_time": 0.11462569236755371,
      "step": 14441
    },
    {
      "epoch": 8.8140869140625e-05,
      "step": 14441,
      "training_step_time": 0.3976881504058838
    },
    {
      "epoch": 8.814697265625e-05,
      "model_forward_time": 0.11569452285766602,
      "step": 14442
    },
    {
      "epoch": 8.814697265625e-05,
      "step": 14442,
      "training_step_time": 0.4914696216583252
    },
    {
      "epoch": 8.8153076171875e-05,
      "model_forward_time": 0.11513876914978027,
      "step": 14443
    },
    {
      "epoch": 8.8153076171875e-05,
      "step": 14443,
      "training_step_time": 0.3932309150695801
    },
    {
      "epoch": 8.81591796875e-05,
      "model_forward_time": 0.11530542373657227,
      "step": 14444
    },
    {
      "epoch": 8.81591796875e-05,
      "step": 14444,
      "training_step_time": 0.5006697177886963
    },
    {
      "epoch": 8.8165283203125e-05,
      "model_forward_time": 0.11518311500549316,
      "step": 14445
    },
    {
      "epoch": 8.8165283203125e-05,
      "step": 14445,
      "training_step_time": 0.39126062393188477
    },
    {
      "epoch": 8.817138671875e-05,
      "model_forward_time": 0.11512279510498047,
      "step": 14446
    },
    {
      "epoch": 8.817138671875e-05,
      "step": 14446,
      "training_step_time": 0.3828575611114502
    },
    {
      "epoch": 8.8177490234375e-05,
      "model_forward_time": 0.11533904075622559,
      "step": 14447
    },
    {
      "epoch": 8.8177490234375e-05,
      "step": 14447,
      "training_step_time": 0.39292168617248535
    },
    {
      "epoch": 8.818359375e-05,
      "model_forward_time": 0.11545515060424805,
      "step": 14448
    },
    {
      "epoch": 8.818359375e-05,
      "step": 14448,
      "training_step_time": 0.6728129386901855
    },
    {
      "epoch": 8.8189697265625e-05,
      "model_forward_time": 0.11498641967773438,
      "step": 14449
    },
    {
      "epoch": 8.8189697265625e-05,
      "step": 14449,
      "training_step_time": 0.406080961227417
    },
    {
      "epoch": 8.819580078125e-05,
      "grad_norm": 0.18289200961589813,
      "learning_rate": 9.0369705791439e-05,
      "loss": 0.0644,
      "step": 14450
    },
    {
      "epoch": 8.819580078125e-05,
      "model_forward_time": 0.11539220809936523,
      "step": 14450
    },
    {
      "epoch": 8.819580078125e-05,
      "step": 14450,
      "training_step_time": 0.42346644401550293
    },
    {
      "epoch": 8.8201904296875e-05,
      "model_forward_time": 0.11508607864379883,
      "step": 14451
    },
    {
      "epoch": 8.8201904296875e-05,
      "step": 14451,
      "training_step_time": 0.4513089656829834
    },
    {
      "epoch": 8.82080078125e-05,
      "model_forward_time": 0.12531733512878418,
      "step": 14452
    },
    {
      "epoch": 8.82080078125e-05,
      "step": 14452,
      "training_step_time": 0.48494529724121094
    },
    {
      "epoch": 8.8214111328125e-05,
      "model_forward_time": 0.11493349075317383,
      "step": 14453
    },
    {
      "epoch": 8.8214111328125e-05,
      "step": 14453,
      "training_step_time": 0.47403430938720703
    },
    {
      "epoch": 8.822021484375e-05,
      "model_forward_time": 0.11524581909179688,
      "step": 14454
    },
    {
      "epoch": 8.822021484375e-05,
      "step": 14454,
      "training_step_time": 0.44866323471069336
    },
    {
      "epoch": 8.8226318359375e-05,
      "model_forward_time": 0.11502671241760254,
      "step": 14455
    },
    {
      "epoch": 8.8226318359375e-05,
      "step": 14455,
      "training_step_time": 0.3857419490814209
    },
    {
      "epoch": 8.8232421875e-05,
      "model_forward_time": 0.1151576042175293,
      "step": 14456
    },
    {
      "epoch": 8.8232421875e-05,
      "step": 14456,
      "training_step_time": 0.39300012588500977
    },
    {
      "epoch": 8.8238525390625e-05,
      "model_forward_time": 0.11557149887084961,
      "step": 14457
    },
    {
      "epoch": 8.8238525390625e-05,
      "step": 14457,
      "training_step_time": 0.40518951416015625
    },
    {
      "epoch": 8.824462890625e-05,
      "model_forward_time": 0.11554694175720215,
      "step": 14458
    },
    {
      "epoch": 8.824462890625e-05,
      "step": 14458,
      "training_step_time": 0.48926734924316406
    },
    {
      "epoch": 8.8250732421875e-05,
      "model_forward_time": 0.11502528190612793,
      "step": 14459
    },
    {
      "epoch": 8.8250732421875e-05,
      "step": 14459,
      "training_step_time": 0.39157843589782715
    },
    {
      "epoch": 8.82568359375e-05,
      "grad_norm": 0.1815534234046936,
      "learning_rate": 9.035344019648702e-05,
      "loss": 0.0561,
      "step": 14460
    },
    {
      "epoch": 8.82568359375e-05,
      "model_forward_time": 0.11609077453613281,
      "step": 14460
    },
    {
      "epoch": 8.82568359375e-05,
      "step": 14460,
      "training_step_time": 0.4601597785949707
    },
    {
      "epoch": 8.8262939453125e-05,
      "model_forward_time": 0.11513924598693848,
      "step": 14461
    },
    {
      "epoch": 8.8262939453125e-05,
      "step": 14461,
      "training_step_time": 0.38847994804382324
    },
    {
      "epoch": 8.826904296875e-05,
      "model_forward_time": 0.115142822265625,
      "step": 14462
    },
    {
      "epoch": 8.826904296875e-05,
      "step": 14462,
      "training_step_time": 0.3902254104614258
    },
    {
      "epoch": 8.8275146484375e-05,
      "model_forward_time": 0.1156468391418457,
      "step": 14463
    },
    {
      "epoch": 8.8275146484375e-05,
      "step": 14463,
      "training_step_time": 0.3907895088195801
    },
    {
      "epoch": 8.828125e-05,
      "model_forward_time": 0.1156911849975586,
      "step": 14464
    },
    {
      "epoch": 8.828125e-05,
      "step": 14464,
      "training_step_time": 0.3699967861175537
    },
    {
      "epoch": 8.8287353515625e-05,
      "model_forward_time": 0.11558747291564941,
      "step": 14465
    },
    {
      "epoch": 8.8287353515625e-05,
      "step": 14465,
      "training_step_time": 0.44437384605407715
    },
    {
      "epoch": 8.829345703125e-05,
      "model_forward_time": 0.11510634422302246,
      "step": 14466
    },
    {
      "epoch": 8.829345703125e-05,
      "step": 14466,
      "training_step_time": 0.6765742301940918
    },
    {
      "epoch": 8.8299560546875e-05,
      "model_forward_time": 0.11477971076965332,
      "step": 14467
    },
    {
      "epoch": 8.8299560546875e-05,
      "step": 14467,
      "training_step_time": 0.4642314910888672
    },
    {
      "epoch": 8.83056640625e-05,
      "model_forward_time": 0.11484074592590332,
      "step": 14468
    },
    {
      "epoch": 8.83056640625e-05,
      "step": 14468,
      "training_step_time": 0.4099462032318115
    },
    {
      "epoch": 8.8311767578125e-05,
      "model_forward_time": 0.11525225639343262,
      "step": 14469
    },
    {
      "epoch": 8.8311767578125e-05,
      "step": 14469,
      "training_step_time": 0.3988208770751953
    },
    {
      "epoch": 8.831787109375e-05,
      "grad_norm": 0.2242894023656845,
      "learning_rate": 9.033716234322538e-05,
      "loss": 0.0617,
      "step": 14470
    },
    {
      "epoch": 8.831787109375e-05,
      "model_forward_time": 0.11499404907226562,
      "step": 14470
    },
    {
      "epoch": 8.831787109375e-05,
      "step": 14470,
      "training_step_time": 0.4133131504058838
    },
    {
      "epoch": 8.8323974609375e-05,
      "model_forward_time": 0.11582326889038086,
      "step": 14471
    },
    {
      "epoch": 8.8323974609375e-05,
      "step": 14471,
      "training_step_time": 0.3918337821960449
    },
    {
      "epoch": 8.8330078125e-05,
      "model_forward_time": 0.11572861671447754,
      "step": 14472
    },
    {
      "epoch": 8.8330078125e-05,
      "step": 14472,
      "training_step_time": 0.5099673271179199
    },
    {
      "epoch": 8.8336181640625e-05,
      "model_forward_time": 0.11488056182861328,
      "step": 14473
    },
    {
      "epoch": 8.8336181640625e-05,
      "step": 14473,
      "training_step_time": 0.39521098136901855
    },
    {
      "epoch": 8.834228515625e-05,
      "model_forward_time": 0.11539340019226074,
      "step": 14474
    },
    {
      "epoch": 8.834228515625e-05,
      "step": 14474,
      "training_step_time": 0.38759684562683105
    },
    {
      "epoch": 8.8348388671875e-05,
      "model_forward_time": 0.11521100997924805,
      "step": 14475
    },
    {
      "epoch": 8.8348388671875e-05,
      "step": 14475,
      "training_step_time": 0.3956606388092041
    },
    {
      "epoch": 8.83544921875e-05,
      "model_forward_time": 0.11592674255371094,
      "step": 14476
    },
    {
      "epoch": 8.83544921875e-05,
      "step": 14476,
      "training_step_time": 0.38609862327575684
    },
    {
      "epoch": 8.8360595703125e-05,
      "model_forward_time": 0.11523103713989258,
      "step": 14477
    },
    {
      "epoch": 8.8360595703125e-05,
      "step": 14477,
      "training_step_time": 0.40248799324035645
    },
    {
      "epoch": 8.836669921875e-05,
      "model_forward_time": 0.11484479904174805,
      "step": 14478
    },
    {
      "epoch": 8.836669921875e-05,
      "step": 14478,
      "training_step_time": 0.6164872646331787
    },
    {
      "epoch": 8.8372802734375e-05,
      "model_forward_time": 0.11471939086914062,
      "step": 14479
    },
    {
      "epoch": 8.8372802734375e-05,
      "step": 14479,
      "training_step_time": 0.3991577625274658
    },
    {
      "epoch": 8.837890625e-05,
      "grad_norm": 0.11996948719024658,
      "learning_rate": 9.032087223659885e-05,
      "loss": 0.0603,
      "step": 14480
    },
    {
      "epoch": 8.837890625e-05,
      "model_forward_time": 0.11428403854370117,
      "step": 14480
    },
    {
      "epoch": 8.837890625e-05,
      "step": 14480,
      "training_step_time": 0.47095704078674316
    },
    {
      "epoch": 8.8385009765625e-05,
      "model_forward_time": 0.11502647399902344,
      "step": 14481
    },
    {
      "epoch": 8.8385009765625e-05,
      "step": 14481,
      "training_step_time": 0.3850588798522949
    },
    {
      "epoch": 8.839111328125e-05,
      "model_forward_time": 0.11523175239562988,
      "step": 14482
    },
    {
      "epoch": 8.839111328125e-05,
      "step": 14482,
      "training_step_time": 0.41339874267578125
    },
    {
      "epoch": 8.8397216796875e-05,
      "model_forward_time": 0.11527490615844727,
      "step": 14483
    },
    {
      "epoch": 8.8397216796875e-05,
      "step": 14483,
      "training_step_time": 0.3935277462005615
    },
    {
      "epoch": 8.84033203125e-05,
      "model_forward_time": 0.11501336097717285,
      "step": 14484
    },
    {
      "epoch": 8.84033203125e-05,
      "step": 14484,
      "training_step_time": 0.5947237014770508
    },
    {
      "epoch": 8.8409423828125e-05,
      "model_forward_time": 0.1145939826965332,
      "step": 14485
    },
    {
      "epoch": 8.8409423828125e-05,
      "step": 14485,
      "training_step_time": 0.425001859664917
    },
    {
      "epoch": 8.841552734375e-05,
      "model_forward_time": 0.11449384689331055,
      "step": 14486
    },
    {
      "epoch": 8.841552734375e-05,
      "step": 14486,
      "training_step_time": 0.4224512577056885
    },
    {
      "epoch": 8.8421630859375e-05,
      "model_forward_time": 0.11613917350769043,
      "step": 14487
    },
    {
      "epoch": 8.8421630859375e-05,
      "step": 14487,
      "training_step_time": 0.3960251808166504
    },
    {
      "epoch": 8.8427734375e-05,
      "model_forward_time": 0.11574959754943848,
      "step": 14488
    },
    {
      "epoch": 8.8427734375e-05,
      "step": 14488,
      "training_step_time": 0.38901734352111816
    },
    {
      "epoch": 8.8433837890625e-05,
      "model_forward_time": 0.11485004425048828,
      "step": 14489
    },
    {
      "epoch": 8.8433837890625e-05,
      "step": 14489,
      "training_step_time": 0.3978698253631592
    },
    {
      "epoch": 8.843994140625e-05,
      "grad_norm": 0.14611785113811493,
      "learning_rate": 9.030456988155596e-05,
      "loss": 0.0575,
      "step": 14490
    },
    {
      "epoch": 8.843994140625e-05,
      "model_forward_time": 0.11559891700744629,
      "step": 14490
    },
    {
      "epoch": 8.843994140625e-05,
      "step": 14490,
      "training_step_time": 0.559760570526123
    },
    {
      "epoch": 8.8446044921875e-05,
      "model_forward_time": 0.11477947235107422,
      "step": 14491
    },
    {
      "epoch": 8.8446044921875e-05,
      "step": 14491,
      "training_step_time": 0.4128737449645996
    },
    {
      "epoch": 8.84521484375e-05,
      "model_forward_time": 0.11521697044372559,
      "step": 14492
    },
    {
      "epoch": 8.84521484375e-05,
      "step": 14492,
      "training_step_time": 0.36404967308044434
    },
    {
      "epoch": 8.8458251953125e-05,
      "model_forward_time": 0.11482000350952148,
      "step": 14493
    },
    {
      "epoch": 8.8458251953125e-05,
      "step": 14493,
      "training_step_time": 0.44034790992736816
    },
    {
      "epoch": 8.846435546875e-05,
      "model_forward_time": 0.11508917808532715,
      "step": 14494
    },
    {
      "epoch": 8.846435546875e-05,
      "step": 14494,
      "training_step_time": 0.48322129249572754
    },
    {
      "epoch": 8.8470458984375e-05,
      "model_forward_time": 0.11595749855041504,
      "step": 14495
    },
    {
      "epoch": 8.8470458984375e-05,
      "step": 14495,
      "training_step_time": 0.41529059410095215
    },
    {
      "epoch": 8.84765625e-05,
      "model_forward_time": 0.11484909057617188,
      "step": 14496
    },
    {
      "epoch": 8.84765625e-05,
      "step": 14496,
      "training_step_time": 0.4367642402648926
    },
    {
      "epoch": 8.8482666015625e-05,
      "model_forward_time": 0.11451148986816406,
      "step": 14497
    },
    {
      "epoch": 8.8482666015625e-05,
      "step": 14497,
      "training_step_time": 0.40033531188964844
    },
    {
      "epoch": 8.848876953125e-05,
      "model_forward_time": 0.11483383178710938,
      "step": 14498
    },
    {
      "epoch": 8.848876953125e-05,
      "step": 14498,
      "training_step_time": 0.43195056915283203
    },
    {
      "epoch": 8.8494873046875e-05,
      "model_forward_time": 0.11548256874084473,
      "step": 14499
    },
    {
      "epoch": 8.8494873046875e-05,
      "step": 14499,
      "training_step_time": 0.3925471305847168
    },
    {
      "epoch": 8.85009765625e-05,
      "grad_norm": 0.16616462171077728,
      "learning_rate": 9.028825528304892e-05,
      "loss": 0.0501,
      "step": 14500
    },
    {
      "epoch": 8.85009765625e-05,
      "model_forward_time": 0.1147909164428711,
      "step": 14500
    },
    {
      "epoch": 8.85009765625e-05,
      "step": 14500,
      "training_step_time": 0.4945716857910156
    },
    {
      "epoch": 8.8507080078125e-05,
      "model_forward_time": 0.11539006233215332,
      "step": 14501
    },
    {
      "epoch": 8.8507080078125e-05,
      "step": 14501,
      "training_step_time": 0.40810132026672363
    },
    {
      "epoch": 8.851318359375e-05,
      "model_forward_time": 0.11535906791687012,
      "step": 14502
    },
    {
      "epoch": 8.851318359375e-05,
      "step": 14502,
      "training_step_time": 0.5505337715148926
    },
    {
      "epoch": 8.8519287109375e-05,
      "model_forward_time": 0.1148381233215332,
      "step": 14503
    },
    {
      "epoch": 8.8519287109375e-05,
      "step": 14503,
      "training_step_time": 0.39096879959106445
    },
    {
      "epoch": 8.8525390625e-05,
      "model_forward_time": 0.11459064483642578,
      "step": 14504
    },
    {
      "epoch": 8.8525390625e-05,
      "step": 14504,
      "training_step_time": 0.3986802101135254
    },
    {
      "epoch": 8.8531494140625e-05,
      "model_forward_time": 0.11541056632995605,
      "step": 14505
    },
    {
      "epoch": 8.8531494140625e-05,
      "step": 14505,
      "training_step_time": 0.41698503494262695
    },
    {
      "epoch": 8.853759765625e-05,
      "model_forward_time": 0.11492013931274414,
      "step": 14506
    },
    {
      "epoch": 8.853759765625e-05,
      "step": 14506,
      "training_step_time": 0.36354660987854004
    },
    {
      "epoch": 8.8543701171875e-05,
      "model_forward_time": 0.11550474166870117,
      "step": 14507
    },
    {
      "epoch": 8.8543701171875e-05,
      "step": 14507,
      "training_step_time": 0.42116212844848633
    },
    {
      "epoch": 8.85498046875e-05,
      "model_forward_time": 0.11500191688537598,
      "step": 14508
    },
    {
      "epoch": 8.85498046875e-05,
      "step": 14508,
      "training_step_time": 0.6088838577270508
    },
    {
      "epoch": 8.8555908203125e-05,
      "model_forward_time": 0.11413979530334473,
      "step": 14509
    },
    {
      "epoch": 8.8555908203125e-05,
      "step": 14509,
      "training_step_time": 0.4196145534515381
    },
    {
      "epoch": 8.856201171875e-05,
      "grad_norm": 0.13876652717590332,
      "learning_rate": 9.027192844603365e-05,
      "loss": 0.0567,
      "step": 14510
    },
    {
      "epoch": 8.856201171875e-05,
      "model_forward_time": 0.11431574821472168,
      "step": 14510
    },
    {
      "epoch": 8.856201171875e-05,
      "step": 14510,
      "training_step_time": 0.3889005184173584
    },
    {
      "epoch": 8.8568115234375e-05,
      "model_forward_time": 0.11433696746826172,
      "step": 14511
    },
    {
      "epoch": 8.8568115234375e-05,
      "step": 14511,
      "training_step_time": 0.393801212310791
    },
    {
      "epoch": 8.857421875e-05,
      "model_forward_time": 0.11479663848876953,
      "step": 14512
    },
    {
      "epoch": 8.857421875e-05,
      "step": 14512,
      "training_step_time": 0.4211413860321045
    },
    {
      "epoch": 8.8580322265625e-05,
      "model_forward_time": 0.11511683464050293,
      "step": 14513
    },
    {
      "epoch": 8.8580322265625e-05,
      "step": 14513,
      "training_step_time": 0.39081788063049316
    },
    {
      "epoch": 8.858642578125e-05,
      "model_forward_time": 0.11543464660644531,
      "step": 14514
    },
    {
      "epoch": 8.858642578125e-05,
      "step": 14514,
      "training_step_time": 0.7670383453369141
    },
    {
      "epoch": 8.8592529296875e-05,
      "model_forward_time": 0.11412906646728516,
      "step": 14515
    },
    {
      "epoch": 8.8592529296875e-05,
      "step": 14515,
      "training_step_time": 0.39429378509521484
    },
    {
      "epoch": 8.85986328125e-05,
      "model_forward_time": 0.11429810523986816,
      "step": 14516
    },
    {
      "epoch": 8.85986328125e-05,
      "step": 14516,
      "training_step_time": 0.38529229164123535
    },
    {
      "epoch": 8.8604736328125e-05,
      "model_forward_time": 0.11415958404541016,
      "step": 14517
    },
    {
      "epoch": 8.8604736328125e-05,
      "step": 14517,
      "training_step_time": 0.3906135559082031
    },
    {
      "epoch": 8.861083984375e-05,
      "model_forward_time": 0.11454319953918457,
      "step": 14518
    },
    {
      "epoch": 8.861083984375e-05,
      "step": 14518,
      "training_step_time": 0.38713717460632324
    },
    {
      "epoch": 8.8616943359375e-05,
      "model_forward_time": 0.1141061782836914,
      "step": 14519
    },
    {
      "epoch": 8.8616943359375e-05,
      "step": 14519,
      "training_step_time": 0.3875699043273926
    },
    {
      "epoch": 8.8623046875e-05,
      "grad_norm": 0.15772715210914612,
      "learning_rate": 9.025558937546988e-05,
      "loss": 0.0504,
      "step": 14520
    },
    {
      "epoch": 8.8623046875e-05,
      "model_forward_time": 0.11485433578491211,
      "step": 14520
    },
    {
      "epoch": 8.8623046875e-05,
      "step": 14520,
      "training_step_time": 0.6897189617156982
    },
    {
      "epoch": 8.8629150390625e-05,
      "model_forward_time": 0.1147921085357666,
      "step": 14521
    },
    {
      "epoch": 8.8629150390625e-05,
      "step": 14521,
      "training_step_time": 0.40749406814575195
    },
    {
      "epoch": 8.863525390625e-05,
      "model_forward_time": 0.1146540641784668,
      "step": 14522
    },
    {
      "epoch": 8.863525390625e-05,
      "step": 14522,
      "training_step_time": 0.4067354202270508
    },
    {
      "epoch": 8.8641357421875e-05,
      "model_forward_time": 0.11466503143310547,
      "step": 14523
    },
    {
      "epoch": 8.8641357421875e-05,
      "step": 14523,
      "training_step_time": 0.39101696014404297
    },
    {
      "epoch": 8.86474609375e-05,
      "model_forward_time": 0.11479854583740234,
      "step": 14524
    },
    {
      "epoch": 8.86474609375e-05,
      "step": 14524,
      "training_step_time": 0.48648953437805176
    },
    {
      "epoch": 8.8653564453125e-05,
      "model_forward_time": 0.11478114128112793,
      "step": 14525
    },
    {
      "epoch": 8.8653564453125e-05,
      "step": 14525,
      "training_step_time": 0.42300868034362793
    },
    {
      "epoch": 8.865966796875e-05,
      "model_forward_time": 0.11493611335754395,
      "step": 14526
    },
    {
      "epoch": 8.865966796875e-05,
      "step": 14526,
      "training_step_time": 0.5304656028747559
    },
    {
      "epoch": 8.8665771484375e-05,
      "model_forward_time": 0.11489319801330566,
      "step": 14527
    },
    {
      "epoch": 8.8665771484375e-05,
      "step": 14527,
      "training_step_time": 0.48056793212890625
    },
    {
      "epoch": 8.8671875e-05,
      "model_forward_time": 0.11490559577941895,
      "step": 14528
    },
    {
      "epoch": 8.8671875e-05,
      "step": 14528,
      "training_step_time": 0.40946292877197266
    },
    {
      "epoch": 8.8677978515625e-05,
      "model_forward_time": 0.11447811126708984,
      "step": 14529
    },
    {
      "epoch": 8.8677978515625e-05,
      "step": 14529,
      "training_step_time": 0.38309812545776367
    },
    {
      "epoch": 8.868408203125e-05,
      "grad_norm": 0.1991964429616928,
      "learning_rate": 9.02392380763209e-05,
      "loss": 0.0556,
      "step": 14530
    },
    {
      "epoch": 8.868408203125e-05,
      "model_forward_time": 0.11453700065612793,
      "step": 14530
    },
    {
      "epoch": 8.868408203125e-05,
      "step": 14530,
      "training_step_time": 0.42560362815856934
    },
    {
      "epoch": 8.8690185546875e-05,
      "model_forward_time": 0.11442780494689941,
      "step": 14531
    },
    {
      "epoch": 8.8690185546875e-05,
      "step": 14531,
      "training_step_time": 0.465165376663208
    },
    {
      "epoch": 8.86962890625e-05,
      "model_forward_time": 0.11481451988220215,
      "step": 14532
    },
    {
      "epoch": 8.86962890625e-05,
      "step": 14532,
      "training_step_time": 0.3959043025970459
    },
    {
      "epoch": 8.8702392578125e-05,
      "model_forward_time": 0.11432147026062012,
      "step": 14533
    },
    {
      "epoch": 8.8702392578125e-05,
      "step": 14533,
      "training_step_time": 0.39307117462158203
    },
    {
      "epoch": 8.870849609375e-05,
      "model_forward_time": 0.1153573989868164,
      "step": 14534
    },
    {
      "epoch": 8.870849609375e-05,
      "step": 14534,
      "training_step_time": 0.4805729389190674
    },
    {
      "epoch": 8.8714599609375e-05,
      "model_forward_time": 0.11441183090209961,
      "step": 14535
    },
    {
      "epoch": 8.8714599609375e-05,
      "step": 14535,
      "training_step_time": 0.4531259536743164
    },
    {
      "epoch": 8.8720703125e-05,
      "model_forward_time": 0.11522936820983887,
      "step": 14536
    },
    {
      "epoch": 8.8720703125e-05,
      "step": 14536,
      "training_step_time": 0.49362897872924805
    },
    {
      "epoch": 8.8726806640625e-05,
      "model_forward_time": 0.11501121520996094,
      "step": 14537
    },
    {
      "epoch": 8.8726806640625e-05,
      "step": 14537,
      "training_step_time": 0.4279632568359375
    },
    {
      "epoch": 8.873291015625e-05,
      "model_forward_time": 0.11475133895874023,
      "step": 14538
    },
    {
      "epoch": 8.873291015625e-05,
      "step": 14538,
      "training_step_time": 0.39997005462646484
    },
    {
      "epoch": 8.8739013671875e-05,
      "model_forward_time": 0.1145334243774414,
      "step": 14539
    },
    {
      "epoch": 8.8739013671875e-05,
      "step": 14539,
      "training_step_time": 0.4198644161224365
    },
    {
      "epoch": 8.87451171875e-05,
      "grad_norm": 0.10722903907299042,
      "learning_rate": 9.022287455355387e-05,
      "loss": 0.0612,
      "step": 14540
    },
    {
      "epoch": 8.87451171875e-05,
      "model_forward_time": 0.1143946647644043,
      "step": 14540
    },
    {
      "epoch": 8.87451171875e-05,
      "step": 14540,
      "training_step_time": 0.4197103977203369
    },
    {
      "epoch": 8.8751220703125e-05,
      "model_forward_time": 0.11475372314453125,
      "step": 14541
    },
    {
      "epoch": 8.8751220703125e-05,
      "step": 14541,
      "training_step_time": 0.40113306045532227
    },
    {
      "epoch": 8.875732421875e-05,
      "model_forward_time": 0.11531281471252441,
      "step": 14542
    },
    {
      "epoch": 8.875732421875e-05,
      "step": 14542,
      "training_step_time": 0.38974952697753906
    },
    {
      "epoch": 8.8763427734375e-05,
      "model_forward_time": 0.11439204216003418,
      "step": 14543
    },
    {
      "epoch": 8.8763427734375e-05,
      "step": 14543,
      "training_step_time": 0.4080164432525635
    },
    {
      "epoch": 8.876953125e-05,
      "model_forward_time": 0.11467742919921875,
      "step": 14544
    },
    {
      "epoch": 8.876953125e-05,
      "step": 14544,
      "training_step_time": 0.5490052700042725
    },
    {
      "epoch": 8.8775634765625e-05,
      "model_forward_time": 0.11507010459899902,
      "step": 14545
    },
    {
      "epoch": 8.8775634765625e-05,
      "step": 14545,
      "training_step_time": 0.3894619941711426
    },
    {
      "epoch": 8.878173828125e-05,
      "model_forward_time": 0.11516475677490234,
      "step": 14546
    },
    {
      "epoch": 8.878173828125e-05,
      "step": 14546,
      "training_step_time": 0.38307881355285645
    },
    {
      "epoch": 8.8787841796875e-05,
      "model_forward_time": 0.1150820255279541,
      "step": 14547
    },
    {
      "epoch": 8.8787841796875e-05,
      "step": 14547,
      "training_step_time": 0.4012477397918701
    },
    {
      "epoch": 8.87939453125e-05,
      "model_forward_time": 0.11516785621643066,
      "step": 14548
    },
    {
      "epoch": 8.87939453125e-05,
      "step": 14548,
      "training_step_time": 0.3926122188568115
    },
    {
      "epoch": 8.8800048828125e-05,
      "model_forward_time": 0.11560773849487305,
      "step": 14549
    },
    {
      "epoch": 8.8800048828125e-05,
      "step": 14549,
      "training_step_time": 0.4697599411010742
    },
    {
      "epoch": 8.880615234375e-05,
      "grad_norm": 0.13025245070457458,
      "learning_rate": 9.020649881213958e-05,
      "loss": 0.0589,
      "step": 14550
    },
    {
      "epoch": 8.880615234375e-05,
      "model_forward_time": 0.11534714698791504,
      "step": 14550
    },
    {
      "epoch": 8.880615234375e-05,
      "step": 14550,
      "training_step_time": 0.6229977607727051
    },
    {
      "epoch": 8.8812255859375e-05,
      "model_forward_time": 0.1152658462524414,
      "step": 14551
    },
    {
      "epoch": 8.8812255859375e-05,
      "step": 14551,
      "training_step_time": 0.40604209899902344
    },
    {
      "epoch": 8.8818359375e-05,
      "model_forward_time": 0.11459732055664062,
      "step": 14552
    },
    {
      "epoch": 8.8818359375e-05,
      "step": 14552,
      "training_step_time": 0.4202733039855957
    },
    {
      "epoch": 8.8824462890625e-05,
      "model_forward_time": 0.1157369613647461,
      "step": 14553
    },
    {
      "epoch": 8.8824462890625e-05,
      "step": 14553,
      "training_step_time": 0.37750816345214844
    },
    {
      "epoch": 8.883056640625e-05,
      "model_forward_time": 0.11458539962768555,
      "step": 14554
    },
    {
      "epoch": 8.883056640625e-05,
      "step": 14554,
      "training_step_time": 0.4471554756164551
    },
    {
      "epoch": 8.8836669921875e-05,
      "model_forward_time": 0.11438226699829102,
      "step": 14555
    },
    {
      "epoch": 8.8836669921875e-05,
      "step": 14555,
      "training_step_time": 0.4470987319946289
    },
    {
      "epoch": 8.88427734375e-05,
      "model_forward_time": 0.1159217357635498,
      "step": 14556
    },
    {
      "epoch": 8.88427734375e-05,
      "step": 14556,
      "training_step_time": 0.5461525917053223
    },
    {
      "epoch": 8.8848876953125e-05,
      "model_forward_time": 0.11468863487243652,
      "step": 14557
    },
    {
      "epoch": 8.8848876953125e-05,
      "step": 14557,
      "training_step_time": 0.4183518886566162
    },
    {
      "epoch": 8.885498046875e-05,
      "model_forward_time": 0.11519002914428711,
      "step": 14558
    },
    {
      "epoch": 8.885498046875e-05,
      "step": 14558,
      "training_step_time": 0.3961362838745117
    },
    {
      "epoch": 8.8861083984375e-05,
      "model_forward_time": 0.11456704139709473,
      "step": 14559
    },
    {
      "epoch": 8.8861083984375e-05,
      "step": 14559,
      "training_step_time": 0.3904392719268799
    },
    {
      "epoch": 8.88671875e-05,
      "grad_norm": 0.15688519179821014,
      "learning_rate": 9.019011085705253e-05,
      "loss": 0.0535,
      "step": 14560
    },
    {
      "epoch": 8.88671875e-05,
      "model_forward_time": 0.11506843566894531,
      "step": 14560
    },
    {
      "epoch": 8.88671875e-05,
      "step": 14560,
      "training_step_time": 0.3940095901489258
    },
    {
      "epoch": 8.8873291015625e-05,
      "model_forward_time": 0.1157827377319336,
      "step": 14561
    },
    {
      "epoch": 8.8873291015625e-05,
      "step": 14561,
      "training_step_time": 0.3829953670501709
    },
    {
      "epoch": 8.887939453125e-05,
      "model_forward_time": 0.1161348819732666,
      "step": 14562
    },
    {
      "epoch": 8.887939453125e-05,
      "step": 14562,
      "training_step_time": 0.5411412715911865
    },
    {
      "epoch": 8.8885498046875e-05,
      "model_forward_time": 0.11535906791687012,
      "step": 14563
    },
    {
      "epoch": 8.8885498046875e-05,
      "step": 14563,
      "training_step_time": 0.4180600643157959
    },
    {
      "epoch": 8.88916015625e-05,
      "model_forward_time": 0.11496925354003906,
      "step": 14564
    },
    {
      "epoch": 8.88916015625e-05,
      "step": 14564,
      "training_step_time": 0.45712995529174805
    },
    {
      "epoch": 8.8897705078125e-05,
      "model_forward_time": 0.1154642105102539,
      "step": 14565
    },
    {
      "epoch": 8.8897705078125e-05,
      "step": 14565,
      "training_step_time": 0.47899436950683594
    },
    {
      "epoch": 8.890380859375e-05,
      "model_forward_time": 0.11491727828979492,
      "step": 14566
    },
    {
      "epoch": 8.890380859375e-05,
      "step": 14566,
      "training_step_time": 0.42316460609436035
    },
    {
      "epoch": 8.8909912109375e-05,
      "model_forward_time": 0.11435914039611816,
      "step": 14567
    },
    {
      "epoch": 8.8909912109375e-05,
      "step": 14567,
      "training_step_time": 0.38788509368896484
    },
    {
      "epoch": 8.8916015625e-05,
      "model_forward_time": 0.11729168891906738,
      "step": 14568
    },
    {
      "epoch": 8.8916015625e-05,
      "step": 14568,
      "training_step_time": 0.4553053379058838
    },
    {
      "epoch": 8.8922119140625e-05,
      "model_forward_time": 0.1147608757019043,
      "step": 14569
    },
    {
      "epoch": 8.8922119140625e-05,
      "step": 14569,
      "training_step_time": 0.4517822265625
    },
    {
      "epoch": 8.892822265625e-05,
      "grad_norm": 0.18180666863918304,
      "learning_rate": 9.017371069327096e-05,
      "loss": 0.0574,
      "step": 14570
    },
    {
      "epoch": 8.892822265625e-05,
      "model_forward_time": 0.11517572402954102,
      "step": 14570
    },
    {
      "epoch": 8.892822265625e-05,
      "step": 14570,
      "training_step_time": 0.40203356742858887
    },
    {
      "epoch": 8.8934326171875e-05,
      "model_forward_time": 0.11515569686889648,
      "step": 14571
    },
    {
      "epoch": 8.8934326171875e-05,
      "step": 14571,
      "training_step_time": 0.4068570137023926
    },
    {
      "epoch": 8.89404296875e-05,
      "model_forward_time": 0.11464428901672363,
      "step": 14572
    },
    {
      "epoch": 8.89404296875e-05,
      "step": 14572,
      "training_step_time": 0.40064263343811035
    },
    {
      "epoch": 8.8946533203125e-05,
      "model_forward_time": 0.11540532112121582,
      "step": 14573
    },
    {
      "epoch": 8.8946533203125e-05,
      "step": 14573,
      "training_step_time": 0.3907647132873535
    },
    {
      "epoch": 8.895263671875e-05,
      "model_forward_time": 0.11517667770385742,
      "step": 14574
    },
    {
      "epoch": 8.895263671875e-05,
      "step": 14574,
      "training_step_time": 0.611309289932251
    },
    {
      "epoch": 8.8958740234375e-05,
      "model_forward_time": 0.1144266128540039,
      "step": 14575
    },
    {
      "epoch": 8.8958740234375e-05,
      "step": 14575,
      "training_step_time": 0.3930528163909912
    },
    {
      "epoch": 8.896484375e-05,
      "model_forward_time": 0.11426067352294922,
      "step": 14576
    },
    {
      "epoch": 8.896484375e-05,
      "step": 14576,
      "training_step_time": 0.3658270835876465
    },
    {
      "epoch": 8.8970947265625e-05,
      "model_forward_time": 0.11432409286499023,
      "step": 14577
    },
    {
      "epoch": 8.8970947265625e-05,
      "step": 14577,
      "training_step_time": 0.43799829483032227
    },
    {
      "epoch": 8.897705078125e-05,
      "model_forward_time": 0.11481332778930664,
      "step": 14578
    },
    {
      "epoch": 8.897705078125e-05,
      "step": 14578,
      "training_step_time": 0.46925783157348633
    },
    {
      "epoch": 8.8983154296875e-05,
      "model_forward_time": 0.11451530456542969,
      "step": 14579
    },
    {
      "epoch": 8.8983154296875e-05,
      "step": 14579,
      "training_step_time": 0.3978848457336426
    },
    {
      "epoch": 8.89892578125e-05,
      "grad_norm": 0.1854744851589203,
      "learning_rate": 9.015729832577681e-05,
      "loss": 0.0554,
      "step": 14580
    },
    {
      "epoch": 8.89892578125e-05,
      "model_forward_time": 0.11452126502990723,
      "step": 14580
    },
    {
      "epoch": 8.89892578125e-05,
      "step": 14580,
      "training_step_time": 0.49059224128723145
    },
    {
      "epoch": 8.8995361328125e-05,
      "model_forward_time": 0.1153419017791748,
      "step": 14581
    },
    {
      "epoch": 8.8995361328125e-05,
      "step": 14581,
      "training_step_time": 0.38516831398010254
    },
    {
      "epoch": 8.900146484375e-05,
      "model_forward_time": 0.11484217643737793,
      "step": 14582
    },
    {
      "epoch": 8.900146484375e-05,
      "step": 14582,
      "training_step_time": 0.388629674911499
    },
    {
      "epoch": 8.9007568359375e-05,
      "model_forward_time": 0.1149590015411377,
      "step": 14583
    },
    {
      "epoch": 8.9007568359375e-05,
      "step": 14583,
      "training_step_time": 0.44451141357421875
    },
    {
      "epoch": 8.9013671875e-05,
      "model_forward_time": 0.12031698226928711,
      "step": 14584
    },
    {
      "epoch": 8.9013671875e-05,
      "step": 14584,
      "training_step_time": 0.41595911979675293
    },
    {
      "epoch": 8.9019775390625e-05,
      "model_forward_time": 0.11524391174316406,
      "step": 14585
    },
    {
      "epoch": 8.9019775390625e-05,
      "step": 14585,
      "training_step_time": 0.39050889015197754
    },
    {
      "epoch": 8.902587890625e-05,
      "model_forward_time": 0.11545872688293457,
      "step": 14586
    },
    {
      "epoch": 8.902587890625e-05,
      "step": 14586,
      "training_step_time": 0.6493949890136719
    },
    {
      "epoch": 8.9031982421875e-05,
      "model_forward_time": 0.1151285171508789,
      "step": 14587
    },
    {
      "epoch": 8.9031982421875e-05,
      "step": 14587,
      "training_step_time": 0.3898160457611084
    },
    {
      "epoch": 8.90380859375e-05,
      "model_forward_time": 0.11448788642883301,
      "step": 14588
    },
    {
      "epoch": 8.90380859375e-05,
      "step": 14588,
      "training_step_time": 0.38198208808898926
    },
    {
      "epoch": 8.9044189453125e-05,
      "model_forward_time": 0.11492753028869629,
      "step": 14589
    },
    {
      "epoch": 8.9044189453125e-05,
      "step": 14589,
      "training_step_time": 0.4114987850189209
    },
    {
      "epoch": 8.905029296875e-05,
      "grad_norm": 0.1510145515203476,
      "learning_rate": 9.014087375955573e-05,
      "loss": 0.0515,
      "step": 14590
    },
    {
      "epoch": 8.905029296875e-05,
      "model_forward_time": 0.11440038681030273,
      "step": 14590
    },
    {
      "epoch": 8.905029296875e-05,
      "step": 14590,
      "training_step_time": 0.3666675090789795
    },
    {
      "epoch": 8.9056396484375e-05,
      "model_forward_time": 0.11438703536987305,
      "step": 14591
    },
    {
      "epoch": 8.9056396484375e-05,
      "step": 14591,
      "training_step_time": 0.4689972400665283
    },
    {
      "epoch": 8.90625e-05,
      "model_forward_time": 0.11574387550354004,
      "step": 14592
    },
    {
      "epoch": 8.90625e-05,
      "step": 14592,
      "training_step_time": 0.4809699058532715
    },
    {
      "epoch": 8.9068603515625e-05,
      "model_forward_time": 0.11494088172912598,
      "step": 14593
    },
    {
      "epoch": 8.9068603515625e-05,
      "step": 14593,
      "training_step_time": 0.45704174041748047
    },
    {
      "epoch": 8.907470703125e-05,
      "model_forward_time": 0.11545133590698242,
      "step": 14594
    },
    {
      "epoch": 8.907470703125e-05,
      "step": 14594,
      "training_step_time": 0.38604068756103516
    },
    {
      "epoch": 8.9080810546875e-05,
      "model_forward_time": 0.1145317554473877,
      "step": 14595
    },
    {
      "epoch": 8.9080810546875e-05,
      "step": 14595,
      "training_step_time": 0.3941314220428467
    },
    {
      "epoch": 8.90869140625e-05,
      "model_forward_time": 0.11442255973815918,
      "step": 14596
    },
    {
      "epoch": 8.90869140625e-05,
      "step": 14596,
      "training_step_time": 0.4480304718017578
    },
    {
      "epoch": 8.9093017578125e-05,
      "model_forward_time": 0.11497664451599121,
      "step": 14597
    },
    {
      "epoch": 8.9093017578125e-05,
      "step": 14597,
      "training_step_time": 0.45103883743286133
    },
    {
      "epoch": 8.909912109375e-05,
      "model_forward_time": 0.11524581909179688,
      "step": 14598
    },
    {
      "epoch": 8.909912109375e-05,
      "step": 14598,
      "training_step_time": 0.44865918159484863
    },
    {
      "epoch": 8.9105224609375e-05,
      "model_forward_time": 0.11457586288452148,
      "step": 14599
    },
    {
      "epoch": 8.9105224609375e-05,
      "step": 14599,
      "training_step_time": 0.39606451988220215
    },
    {
      "epoch": 8.9111328125e-05,
      "grad_norm": 0.12754863500595093,
      "learning_rate": 9.012443699959705e-05,
      "loss": 0.0546,
      "step": 14600
    },
    {
      "epoch": 8.9111328125e-05,
      "model_forward_time": 0.11531281471252441,
      "step": 14600
    },
    {
      "epoch": 8.9111328125e-05,
      "step": 14600,
      "training_step_time": 0.39939260482788086
    },
    {
      "epoch": 8.9117431640625e-05,
      "model_forward_time": 0.11545634269714355,
      "step": 14601
    },
    {
      "epoch": 8.9117431640625e-05,
      "step": 14601,
      "training_step_time": 0.3937540054321289
    },
    {
      "epoch": 8.912353515625e-05,
      "model_forward_time": 0.13114500045776367,
      "step": 14602
    },
    {
      "epoch": 8.912353515625e-05,
      "step": 14602,
      "training_step_time": 0.4010195732116699
    },
    {
      "epoch": 8.9129638671875e-05,
      "model_forward_time": 0.11549091339111328,
      "step": 14603
    },
    {
      "epoch": 8.9129638671875e-05,
      "step": 14603,
      "training_step_time": 0.395550012588501
    },
    {
      "epoch": 8.91357421875e-05,
      "model_forward_time": 0.11527705192565918,
      "step": 14604
    },
    {
      "epoch": 8.91357421875e-05,
      "step": 14604,
      "training_step_time": 0.6865208148956299
    },
    {
      "epoch": 8.9141845703125e-05,
      "model_forward_time": 0.11490058898925781,
      "step": 14605
    },
    {
      "epoch": 8.9141845703125e-05,
      "step": 14605,
      "training_step_time": 0.4412386417388916
    },
    {
      "epoch": 8.914794921875e-05,
      "model_forward_time": 0.11492276191711426,
      "step": 14606
    },
    {
      "epoch": 8.914794921875e-05,
      "step": 14606,
      "training_step_time": 0.46872735023498535
    },
    {
      "epoch": 8.9154052734375e-05,
      "model_forward_time": 0.1143643856048584,
      "step": 14607
    },
    {
      "epoch": 8.9154052734375e-05,
      "step": 14607,
      "training_step_time": 0.4854116439819336
    },
    {
      "epoch": 8.916015625e-05,
      "model_forward_time": 0.11416077613830566,
      "step": 14608
    },
    {
      "epoch": 8.916015625e-05,
      "step": 14608,
      "training_step_time": 0.41857194900512695
    },
    {
      "epoch": 8.9166259765625e-05,
      "model_forward_time": 0.11391425132751465,
      "step": 14609
    },
    {
      "epoch": 8.9166259765625e-05,
      "step": 14609,
      "training_step_time": 0.44834160804748535
    },
    {
      "epoch": 8.917236328125e-05,
      "grad_norm": 0.16054916381835938,
      "learning_rate": 9.010798805089384e-05,
      "loss": 0.0578,
      "step": 14610
    },
    {
      "epoch": 8.917236328125e-05,
      "model_forward_time": 0.11456060409545898,
      "step": 14610
    },
    {
      "epoch": 8.917236328125e-05,
      "step": 14610,
      "training_step_time": 0.45256948471069336
    },
    {
      "epoch": 8.9178466796875e-05,
      "model_forward_time": 0.11468386650085449,
      "step": 14611
    },
    {
      "epoch": 8.9178466796875e-05,
      "step": 14611,
      "training_step_time": 0.44063496589660645
    },
    {
      "epoch": 8.91845703125e-05,
      "model_forward_time": 0.11444854736328125,
      "step": 14612
    },
    {
      "epoch": 8.91845703125e-05,
      "step": 14612,
      "training_step_time": 0.3982689380645752
    },
    {
      "epoch": 8.9190673828125e-05,
      "model_forward_time": 0.11555218696594238,
      "step": 14613
    },
    {
      "epoch": 8.9190673828125e-05,
      "step": 14613,
      "training_step_time": 0.39451074600219727
    },
    {
      "epoch": 8.919677734375e-05,
      "model_forward_time": 0.11481237411499023,
      "step": 14614
    },
    {
      "epoch": 8.919677734375e-05,
      "step": 14614,
      "training_step_time": 0.3825244903564453
    },
    {
      "epoch": 8.9202880859375e-05,
      "model_forward_time": 0.11563467979431152,
      "step": 14615
    },
    {
      "epoch": 8.9202880859375e-05,
      "step": 14615,
      "training_step_time": 0.39428138732910156
    },
    {
      "epoch": 8.9208984375e-05,
      "model_forward_time": 0.1156759262084961,
      "step": 14616
    },
    {
      "epoch": 8.9208984375e-05,
      "step": 14616,
      "training_step_time": 0.3909614086151123
    },
    {
      "epoch": 8.9215087890625e-05,
      "model_forward_time": 0.11496376991271973,
      "step": 14617
    },
    {
      "epoch": 8.9215087890625e-05,
      "step": 14617,
      "training_step_time": 0.3942606449127197
    },
    {
      "epoch": 8.922119140625e-05,
      "model_forward_time": 0.11555981636047363,
      "step": 14618
    },
    {
      "epoch": 8.922119140625e-05,
      "step": 14618,
      "training_step_time": 0.39853382110595703
    },
    {
      "epoch": 8.9227294921875e-05,
      "model_forward_time": 0.11527061462402344,
      "step": 14619
    },
    {
      "epoch": 8.9227294921875e-05,
      "step": 14619,
      "training_step_time": 0.39720630645751953
    },
    {
      "epoch": 8.92333984375e-05,
      "grad_norm": 0.1792570799589157,
      "learning_rate": 9.009152691844285e-05,
      "loss": 0.053,
      "step": 14620
    },
    {
      "epoch": 8.92333984375e-05,
      "model_forward_time": 0.11623883247375488,
      "step": 14620
    },
    {
      "epoch": 8.92333984375e-05,
      "step": 14620,
      "training_step_time": 0.4868338108062744
    },
    {
      "epoch": 8.9239501953125e-05,
      "model_forward_time": 0.11539435386657715,
      "step": 14621
    },
    {
      "epoch": 8.9239501953125e-05,
      "step": 14621,
      "training_step_time": 0.4188261032104492
    },
    {
      "epoch": 8.924560546875e-05,
      "model_forward_time": 0.11476016044616699,
      "step": 14622
    },
    {
      "epoch": 8.924560546875e-05,
      "step": 14622,
      "training_step_time": 0.7001926898956299
    },
    {
      "epoch": 8.9251708984375e-05,
      "model_forward_time": 0.1151740550994873,
      "step": 14623
    },
    {
      "epoch": 8.9251708984375e-05,
      "step": 14623,
      "training_step_time": 0.3847775459289551
    },
    {
      "epoch": 8.92578125e-05,
      "model_forward_time": 0.11446976661682129,
      "step": 14624
    },
    {
      "epoch": 8.92578125e-05,
      "step": 14624,
      "training_step_time": 0.38892030715942383
    },
    {
      "epoch": 8.9263916015625e-05,
      "model_forward_time": 0.11481070518493652,
      "step": 14625
    },
    {
      "epoch": 8.9263916015625e-05,
      "step": 14625,
      "training_step_time": 0.47237467765808105
    },
    {
      "epoch": 8.927001953125e-05,
      "model_forward_time": 0.11506271362304688,
      "step": 14626
    },
    {
      "epoch": 8.927001953125e-05,
      "step": 14626,
      "training_step_time": 0.38474369049072266
    },
    {
      "epoch": 8.9276123046875e-05,
      "model_forward_time": 0.11482787132263184,
      "step": 14627
    },
    {
      "epoch": 8.9276123046875e-05,
      "step": 14627,
      "training_step_time": 0.3850703239440918
    },
    {
      "epoch": 8.92822265625e-05,
      "model_forward_time": 0.11744189262390137,
      "step": 14628
    },
    {
      "epoch": 8.92822265625e-05,
      "step": 14628,
      "training_step_time": 0.5341532230377197
    },
    {
      "epoch": 8.9288330078125e-05,
      "model_forward_time": 0.11483931541442871,
      "step": 14629
    },
    {
      "epoch": 8.9288330078125e-05,
      "step": 14629,
      "training_step_time": 0.38590312004089355
    },
    {
      "epoch": 8.929443359375e-05,
      "grad_norm": 0.1268174946308136,
      "learning_rate": 9.007505360724453e-05,
      "loss": 0.0629,
      "step": 14630
    },
    {
      "epoch": 8.929443359375e-05,
      "model_forward_time": 0.11465668678283691,
      "step": 14630
    },
    {
      "epoch": 8.929443359375e-05,
      "step": 14630,
      "training_step_time": 0.39631199836730957
    },
    {
      "epoch": 8.9300537109375e-05,
      "model_forward_time": 0.11552834510803223,
      "step": 14631
    },
    {
      "epoch": 8.9300537109375e-05,
      "step": 14631,
      "training_step_time": 0.38541221618652344
    },
    {
      "epoch": 8.9306640625e-05,
      "model_forward_time": 0.11538505554199219,
      "step": 14632
    },
    {
      "epoch": 8.9306640625e-05,
      "step": 14632,
      "training_step_time": 0.3842964172363281
    },
    {
      "epoch": 8.9312744140625e-05,
      "model_forward_time": 0.11471128463745117,
      "step": 14633
    },
    {
      "epoch": 8.9312744140625e-05,
      "step": 14633,
      "training_step_time": 0.4750816822052002
    },
    {
      "epoch": 8.931884765625e-05,
      "model_forward_time": 0.11561131477355957,
      "step": 14634
    },
    {
      "epoch": 8.931884765625e-05,
      "step": 14634,
      "training_step_time": 0.5839004516601562
    },
    {
      "epoch": 8.9324951171875e-05,
      "model_forward_time": 0.11545920372009277,
      "step": 14635
    },
    {
      "epoch": 8.9324951171875e-05,
      "step": 14635,
      "training_step_time": 0.49472999572753906
    },
    {
      "epoch": 8.93310546875e-05,
      "model_forward_time": 0.11467504501342773,
      "step": 14636
    },
    {
      "epoch": 8.93310546875e-05,
      "step": 14636,
      "training_step_time": 0.4305582046508789
    },
    {
      "epoch": 8.9337158203125e-05,
      "model_forward_time": 0.11441874504089355,
      "step": 14637
    },
    {
      "epoch": 8.9337158203125e-05,
      "step": 14637,
      "training_step_time": 0.436614990234375
    },
    {
      "epoch": 8.934326171875e-05,
      "model_forward_time": 0.11426830291748047,
      "step": 14638
    },
    {
      "epoch": 8.934326171875e-05,
      "step": 14638,
      "training_step_time": 0.38803553581237793
    },
    {
      "epoch": 8.9349365234375e-05,
      "model_forward_time": 0.1145179271697998,
      "step": 14639
    },
    {
      "epoch": 8.9349365234375e-05,
      "step": 14639,
      "training_step_time": 0.3991246223449707
    },
    {
      "epoch": 8.935546875e-05,
      "grad_norm": 0.19358091056346893,
      "learning_rate": 9.005856812230304e-05,
      "loss": 0.0558,
      "step": 14640
    },
    {
      "epoch": 8.935546875e-05,
      "model_forward_time": 0.11428403854370117,
      "step": 14640
    },
    {
      "epoch": 8.935546875e-05,
      "step": 14640,
      "training_step_time": 0.45451807975769043
    },
    {
      "epoch": 8.9361572265625e-05,
      "model_forward_time": 0.11536717414855957,
      "step": 14641
    },
    {
      "epoch": 8.9361572265625e-05,
      "step": 14641,
      "training_step_time": 0.39365053176879883
    },
    {
      "epoch": 8.936767578125e-05,
      "model_forward_time": 0.1148526668548584,
      "step": 14642
    },
    {
      "epoch": 8.936767578125e-05,
      "step": 14642,
      "training_step_time": 0.4092116355895996
    },
    {
      "epoch": 8.9373779296875e-05,
      "model_forward_time": 0.11476826667785645,
      "step": 14643
    },
    {
      "epoch": 8.9373779296875e-05,
      "step": 14643,
      "training_step_time": 0.4093458652496338
    },
    {
      "epoch": 8.93798828125e-05,
      "model_forward_time": 0.11501908302307129,
      "step": 14644
    },
    {
      "epoch": 8.93798828125e-05,
      "step": 14644,
      "training_step_time": 0.39414381980895996
    },
    {
      "epoch": 8.9385986328125e-05,
      "model_forward_time": 0.11461782455444336,
      "step": 14645
    },
    {
      "epoch": 8.9385986328125e-05,
      "step": 14645,
      "training_step_time": 0.39058732986450195
    },
    {
      "epoch": 8.939208984375e-05,
      "model_forward_time": 0.11509871482849121,
      "step": 14646
    },
    {
      "epoch": 8.939208984375e-05,
      "step": 14646,
      "training_step_time": 0.7766809463500977
    },
    {
      "epoch": 8.9398193359375e-05,
      "model_forward_time": 0.11481618881225586,
      "step": 14647
    },
    {
      "epoch": 8.9398193359375e-05,
      "step": 14647,
      "training_step_time": 0.42154979705810547
    },
    {
      "epoch": 8.9404296875e-05,
      "model_forward_time": 0.11403656005859375,
      "step": 14648
    },
    {
      "epoch": 8.9404296875e-05,
      "step": 14648,
      "training_step_time": 0.45451855659484863
    },
    {
      "epoch": 8.9410400390625e-05,
      "model_forward_time": 0.11452198028564453,
      "step": 14649
    },
    {
      "epoch": 8.9410400390625e-05,
      "step": 14649,
      "training_step_time": 0.43619656562805176
    },
    {
      "epoch": 8.941650390625e-05,
      "grad_norm": 0.17394818365573883,
      "learning_rate": 9.004207046862624e-05,
      "loss": 0.0557,
      "step": 14650
    },
    {
      "epoch": 8.941650390625e-05,
      "model_forward_time": 0.11495184898376465,
      "step": 14650
    },
    {
      "epoch": 8.941650390625e-05,
      "step": 14650,
      "training_step_time": 0.4712083339691162
    },
    {
      "epoch": 8.9422607421875e-05,
      "model_forward_time": 0.1144094467163086,
      "step": 14651
    },
    {
      "epoch": 8.9422607421875e-05,
      "step": 14651,
      "training_step_time": 0.42565059661865234
    },
    {
      "epoch": 8.94287109375e-05,
      "model_forward_time": 0.1145317554473877,
      "step": 14652
    },
    {
      "epoch": 8.94287109375e-05,
      "step": 14652,
      "training_step_time": 0.48499393463134766
    },
    {
      "epoch": 8.9434814453125e-05,
      "model_forward_time": 0.11422514915466309,
      "step": 14653
    },
    {
      "epoch": 8.9434814453125e-05,
      "step": 14653,
      "training_step_time": 0.425586462020874
    },
    {
      "epoch": 8.944091796875e-05,
      "model_forward_time": 0.11417555809020996,
      "step": 14654
    },
    {
      "epoch": 8.944091796875e-05,
      "step": 14654,
      "training_step_time": 0.3891184329986572
    },
    {
      "epoch": 8.9447021484375e-05,
      "model_forward_time": 0.11461067199707031,
      "step": 14655
    },
    {
      "epoch": 8.9447021484375e-05,
      "step": 14655,
      "training_step_time": 0.389786958694458
    },
    {
      "epoch": 8.9453125e-05,
      "model_forward_time": 0.11494922637939453,
      "step": 14656
    },
    {
      "epoch": 8.9453125e-05,
      "step": 14656,
      "training_step_time": 0.3802323341369629
    },
    {
      "epoch": 8.9459228515625e-05,
      "model_forward_time": 0.11469745635986328,
      "step": 14657
    },
    {
      "epoch": 8.9459228515625e-05,
      "step": 14657,
      "training_step_time": 0.42661118507385254
    },
    {
      "epoch": 8.946533203125e-05,
      "model_forward_time": 0.11511635780334473,
      "step": 14658
    },
    {
      "epoch": 8.946533203125e-05,
      "step": 14658,
      "training_step_time": 0.5579955577850342
    },
    {
      "epoch": 8.9471435546875e-05,
      "model_forward_time": 0.11490821838378906,
      "step": 14659
    },
    {
      "epoch": 8.9471435546875e-05,
      "step": 14659,
      "training_step_time": 0.38881921768188477
    },
    {
      "epoch": 8.94775390625e-05,
      "grad_norm": 0.15271426737308502,
      "learning_rate": 9.002556065122571e-05,
      "loss": 0.0523,
      "step": 14660
    },
    {
      "epoch": 8.94775390625e-05,
      "model_forward_time": 0.11466169357299805,
      "step": 14660
    },
    {
      "epoch": 8.94775390625e-05,
      "step": 14660,
      "training_step_time": 0.38718175888061523
    },
    {
      "epoch": 8.9483642578125e-05,
      "model_forward_time": 0.11468219757080078,
      "step": 14661
    },
    {
      "epoch": 8.9483642578125e-05,
      "step": 14661,
      "training_step_time": 0.366544246673584
    },
    {
      "epoch": 8.948974609375e-05,
      "model_forward_time": 0.11467146873474121,
      "step": 14662
    },
    {
      "epoch": 8.948974609375e-05,
      "step": 14662,
      "training_step_time": 0.4232192039489746
    },
    {
      "epoch": 8.9495849609375e-05,
      "model_forward_time": 0.1148233413696289,
      "step": 14663
    },
    {
      "epoch": 8.9495849609375e-05,
      "step": 14663,
      "training_step_time": 0.4556140899658203
    },
    {
      "epoch": 8.9501953125e-05,
      "model_forward_time": 0.11581897735595703,
      "step": 14664
    },
    {
      "epoch": 8.9501953125e-05,
      "step": 14664,
      "training_step_time": 0.6002199649810791
    },
    {
      "epoch": 8.9508056640625e-05,
      "model_forward_time": 0.11473989486694336,
      "step": 14665
    },
    {
      "epoch": 8.9508056640625e-05,
      "step": 14665,
      "training_step_time": 0.3908572196960449
    },
    {
      "epoch": 8.951416015625e-05,
      "model_forward_time": 0.11522030830383301,
      "step": 14666
    },
    {
      "epoch": 8.951416015625e-05,
      "step": 14666,
      "training_step_time": 0.40973639488220215
    },
    {
      "epoch": 8.9520263671875e-05,
      "model_forward_time": 0.11507797241210938,
      "step": 14667
    },
    {
      "epoch": 8.9520263671875e-05,
      "step": 14667,
      "training_step_time": 0.45942187309265137
    },
    {
      "epoch": 8.95263671875e-05,
      "model_forward_time": 0.1145484447479248,
      "step": 14668
    },
    {
      "epoch": 8.95263671875e-05,
      "step": 14668,
      "training_step_time": 0.4007754325866699
    },
    {
      "epoch": 8.9532470703125e-05,
      "model_forward_time": 0.11452889442443848,
      "step": 14669
    },
    {
      "epoch": 8.9532470703125e-05,
      "step": 14669,
      "training_step_time": 0.3834841251373291
    },
    {
      "epoch": 8.953857421875e-05,
      "grad_norm": 0.10280763357877731,
      "learning_rate": 9.000903867511666e-05,
      "loss": 0.0516,
      "step": 14670
    },
    {
      "epoch": 8.953857421875e-05,
      "model_forward_time": 0.11629986763000488,
      "step": 14670
    },
    {
      "epoch": 8.953857421875e-05,
      "step": 14670,
      "training_step_time": 0.5179612636566162
    },
    {
      "epoch": 8.9544677734375e-05,
      "model_forward_time": 0.1149301528930664,
      "step": 14671
    },
    {
      "epoch": 8.9544677734375e-05,
      "step": 14671,
      "training_step_time": 0.38745951652526855
    },
    {
      "epoch": 8.955078125e-05,
      "model_forward_time": 0.11490511894226074,
      "step": 14672
    },
    {
      "epoch": 8.955078125e-05,
      "step": 14672,
      "training_step_time": 0.3902919292449951
    },
    {
      "epoch": 8.9556884765625e-05,
      "model_forward_time": 0.11566376686096191,
      "step": 14673
    },
    {
      "epoch": 8.9556884765625e-05,
      "step": 14673,
      "training_step_time": 0.388721227645874
    },
    {
      "epoch": 8.956298828125e-05,
      "model_forward_time": 0.11500835418701172,
      "step": 14674
    },
    {
      "epoch": 8.956298828125e-05,
      "step": 14674,
      "training_step_time": 0.44785618782043457
    },
    {
      "epoch": 8.9569091796875e-05,
      "model_forward_time": 0.11574721336364746,
      "step": 14675
    },
    {
      "epoch": 8.9569091796875e-05,
      "step": 14675,
      "training_step_time": 0.3894779682159424
    },
    {
      "epoch": 8.95751953125e-05,
      "model_forward_time": 0.11693453788757324,
      "step": 14676
    },
    {
      "epoch": 8.95751953125e-05,
      "step": 14676,
      "training_step_time": 0.5697875022888184
    },
    {
      "epoch": 8.9581298828125e-05,
      "model_forward_time": 0.1146090030670166,
      "step": 14677
    },
    {
      "epoch": 8.9581298828125e-05,
      "step": 14677,
      "training_step_time": 0.5019869804382324
    },
    {
      "epoch": 8.958740234375e-05,
      "model_forward_time": 0.11422348022460938,
      "step": 14678
    },
    {
      "epoch": 8.958740234375e-05,
      "step": 14678,
      "training_step_time": 0.3914923667907715
    },
    {
      "epoch": 8.9593505859375e-05,
      "model_forward_time": 0.11444330215454102,
      "step": 14679
    },
    {
      "epoch": 8.9593505859375e-05,
      "step": 14679,
      "training_step_time": 0.39517903327941895
    },
    {
      "epoch": 8.9599609375e-05,
      "grad_norm": 0.12101182341575623,
      "learning_rate": 8.999250454531802e-05,
      "loss": 0.055,
      "step": 14680
    },
    {
      "epoch": 8.9599609375e-05,
      "model_forward_time": 0.11427164077758789,
      "step": 14680
    },
    {
      "epoch": 8.9599609375e-05,
      "step": 14680,
      "training_step_time": 0.4100515842437744
    },
    {
      "epoch": 8.9605712890625e-05,
      "model_forward_time": 0.11546945571899414,
      "step": 14681
    },
    {
      "epoch": 8.9605712890625e-05,
      "step": 14681,
      "training_step_time": 0.4445011615753174
    },
    {
      "epoch": 8.961181640625e-05,
      "model_forward_time": 0.11580586433410645,
      "step": 14682
    },
    {
      "epoch": 8.961181640625e-05,
      "step": 14682,
      "training_step_time": 0.39102745056152344
    },
    {
      "epoch": 8.9617919921875e-05,
      "model_forward_time": 0.1163029670715332,
      "step": 14683
    },
    {
      "epoch": 8.9617919921875e-05,
      "step": 14683,
      "training_step_time": 0.41141200065612793
    },
    {
      "epoch": 8.96240234375e-05,
      "model_forward_time": 0.11524772644042969,
      "step": 14684
    },
    {
      "epoch": 8.96240234375e-05,
      "step": 14684,
      "training_step_time": 0.39011549949645996
    },
    {
      "epoch": 8.9630126953125e-05,
      "model_forward_time": 0.11557888984680176,
      "step": 14685
    },
    {
      "epoch": 8.9630126953125e-05,
      "step": 14685,
      "training_step_time": 0.39297986030578613
    },
    {
      "epoch": 8.963623046875e-05,
      "model_forward_time": 0.11501097679138184,
      "step": 14686
    },
    {
      "epoch": 8.963623046875e-05,
      "step": 14686,
      "training_step_time": 0.3983581066131592
    },
    {
      "epoch": 8.9642333984375e-05,
      "model_forward_time": 0.1156318187713623,
      "step": 14687
    },
    {
      "epoch": 8.9642333984375e-05,
      "step": 14687,
      "training_step_time": 0.3871331214904785
    },
    {
      "epoch": 8.96484375e-05,
      "model_forward_time": 0.1148231029510498,
      "step": 14688
    },
    {
      "epoch": 8.96484375e-05,
      "step": 14688,
      "training_step_time": 0.632253885269165
    },
    {
      "epoch": 8.9654541015625e-05,
      "model_forward_time": 0.11570405960083008,
      "step": 14689
    },
    {
      "epoch": 8.9654541015625e-05,
      "step": 14689,
      "training_step_time": 0.3969533443450928
    },
    {
      "epoch": 8.966064453125e-05,
      "grad_norm": 0.14006389677524567,
      "learning_rate": 8.997595826685243e-05,
      "loss": 0.0523,
      "step": 14690
    },
    {
      "epoch": 8.966064453125e-05,
      "model_forward_time": 0.11613869667053223,
      "step": 14690
    },
    {
      "epoch": 8.966064453125e-05,
      "step": 14690,
      "training_step_time": 0.4777071475982666
    },
    {
      "epoch": 8.9666748046875e-05,
      "model_forward_time": 0.11511492729187012,
      "step": 14691
    },
    {
      "epoch": 8.9666748046875e-05,
      "step": 14691,
      "training_step_time": 0.4398791790008545
    },
    {
      "epoch": 8.96728515625e-05,
      "model_forward_time": 0.11553621292114258,
      "step": 14692
    },
    {
      "epoch": 8.96728515625e-05,
      "step": 14692,
      "training_step_time": 0.4770066738128662
    },
    {
      "epoch": 8.9678955078125e-05,
      "model_forward_time": 0.11429023742675781,
      "step": 14693
    },
    {
      "epoch": 8.9678955078125e-05,
      "step": 14693,
      "training_step_time": 0.4217190742492676
    },
    {
      "epoch": 8.968505859375e-05,
      "model_forward_time": 0.11477160453796387,
      "step": 14694
    },
    {
      "epoch": 8.968505859375e-05,
      "step": 14694,
      "training_step_time": 0.42443156242370605
    },
    {
      "epoch": 8.9691162109375e-05,
      "model_forward_time": 0.11484432220458984,
      "step": 14695
    },
    {
      "epoch": 8.9691162109375e-05,
      "step": 14695,
      "training_step_time": 0.3932349681854248
    },
    {
      "epoch": 8.9697265625e-05,
      "model_forward_time": 0.11539602279663086,
      "step": 14696
    },
    {
      "epoch": 8.9697265625e-05,
      "step": 14696,
      "training_step_time": 0.38478589057922363
    },
    {
      "epoch": 8.9703369140625e-05,
      "model_forward_time": 0.11487579345703125,
      "step": 14697
    },
    {
      "epoch": 8.9703369140625e-05,
      "step": 14697,
      "training_step_time": 0.3919501304626465
    },
    {
      "epoch": 8.970947265625e-05,
      "model_forward_time": 0.11539387702941895,
      "step": 14698
    },
    {
      "epoch": 8.970947265625e-05,
      "step": 14698,
      "training_step_time": 0.3860313892364502
    },
    {
      "epoch": 8.9715576171875e-05,
      "model_forward_time": 0.11661815643310547,
      "step": 14699
    },
    {
      "epoch": 8.9715576171875e-05,
      "step": 14699,
      "training_step_time": 0.3911900520324707
    },
    {
      "epoch": 8.97216796875e-05,
      "grad_norm": 0.19273622334003448,
      "learning_rate": 8.995939984474624e-05,
      "loss": 0.0559,
      "step": 14700
    },
    {
      "epoch": 8.97216796875e-05,
      "model_forward_time": 0.11523628234863281,
      "step": 14700
    },
    {
      "epoch": 8.97216796875e-05,
      "step": 14700,
      "training_step_time": 0.7310726642608643
    },
    {
      "epoch": 8.9727783203125e-05,
      "model_forward_time": 0.11467742919921875,
      "step": 14701
    },
    {
      "epoch": 8.9727783203125e-05,
      "step": 14701,
      "training_step_time": 0.40677809715270996
    },
    {
      "epoch": 8.973388671875e-05,
      "model_forward_time": 0.11442065238952637,
      "step": 14702
    },
    {
      "epoch": 8.973388671875e-05,
      "step": 14702,
      "training_step_time": 0.3782651424407959
    },
    {
      "epoch": 8.9739990234375e-05,
      "model_forward_time": 0.11482453346252441,
      "step": 14703
    },
    {
      "epoch": 8.9739990234375e-05,
      "step": 14703,
      "training_step_time": 0.3853721618652344
    },
    {
      "epoch": 8.974609375e-05,
      "model_forward_time": 0.11501193046569824,
      "step": 14704
    },
    {
      "epoch": 8.974609375e-05,
      "step": 14704,
      "training_step_time": 0.43683600425720215
    },
    {
      "epoch": 8.9752197265625e-05,
      "model_forward_time": 0.11467790603637695,
      "step": 14705
    },
    {
      "epoch": 8.9752197265625e-05,
      "step": 14705,
      "training_step_time": 0.4947226047515869
    },
    {
      "epoch": 8.975830078125e-05,
      "model_forward_time": 0.11500406265258789,
      "step": 14706
    },
    {
      "epoch": 8.975830078125e-05,
      "step": 14706,
      "training_step_time": 0.48918962478637695
    },
    {
      "epoch": 8.9764404296875e-05,
      "model_forward_time": 0.11490035057067871,
      "step": 14707
    },
    {
      "epoch": 8.9764404296875e-05,
      "step": 14707,
      "training_step_time": 0.3948531150817871
    },
    {
      "epoch": 8.97705078125e-05,
      "model_forward_time": 0.11489343643188477,
      "step": 14708
    },
    {
      "epoch": 8.97705078125e-05,
      "step": 14708,
      "training_step_time": 0.41856861114501953
    },
    {
      "epoch": 8.9776611328125e-05,
      "model_forward_time": 0.11539244651794434,
      "step": 14709
    },
    {
      "epoch": 8.9776611328125e-05,
      "step": 14709,
      "training_step_time": 0.41702890396118164
    },
    {
      "epoch": 8.978271484375e-05,
      "grad_norm": 0.12110630422830582,
      "learning_rate": 8.994282928402944e-05,
      "loss": 0.0608,
      "step": 14710
    },
    {
      "epoch": 8.978271484375e-05,
      "model_forward_time": 0.11495327949523926,
      "step": 14710
    },
    {
      "epoch": 8.978271484375e-05,
      "step": 14710,
      "training_step_time": 0.37906885147094727
    },
    {
      "epoch": 8.9788818359375e-05,
      "model_forward_time": 0.11549735069274902,
      "step": 14711
    },
    {
      "epoch": 8.9788818359375e-05,
      "step": 14711,
      "training_step_time": 0.38577699661254883
    },
    {
      "epoch": 8.9794921875e-05,
      "model_forward_time": 0.11535024642944336,
      "step": 14712
    },
    {
      "epoch": 8.9794921875e-05,
      "step": 14712,
      "training_step_time": 0.7001843452453613
    },
    {
      "epoch": 8.9801025390625e-05,
      "model_forward_time": 0.11475658416748047,
      "step": 14713
    },
    {
      "epoch": 8.9801025390625e-05,
      "step": 14713,
      "training_step_time": 0.39258861541748047
    },
    {
      "epoch": 8.980712890625e-05,
      "model_forward_time": 0.11411261558532715,
      "step": 14714
    },
    {
      "epoch": 8.980712890625e-05,
      "step": 14714,
      "training_step_time": 0.39141201972961426
    },
    {
      "epoch": 8.9813232421875e-05,
      "model_forward_time": 0.11450552940368652,
      "step": 14715
    },
    {
      "epoch": 8.9813232421875e-05,
      "step": 14715,
      "training_step_time": 0.38260865211486816
    },
    {
      "epoch": 8.98193359375e-05,
      "model_forward_time": 0.11518096923828125,
      "step": 14716
    },
    {
      "epoch": 8.98193359375e-05,
      "step": 14716,
      "training_step_time": 0.39568448066711426
    },
    {
      "epoch": 8.9825439453125e-05,
      "model_forward_time": 0.114990234375,
      "step": 14717
    },
    {
      "epoch": 8.9825439453125e-05,
      "step": 14717,
      "training_step_time": 0.3861987590789795
    },
    {
      "epoch": 8.983154296875e-05,
      "model_forward_time": 0.11497259140014648,
      "step": 14718
    },
    {
      "epoch": 8.983154296875e-05,
      "step": 14718,
      "training_step_time": 0.7066221237182617
    },
    {
      "epoch": 8.9837646484375e-05,
      "model_forward_time": 0.11437034606933594,
      "step": 14719
    },
    {
      "epoch": 8.9837646484375e-05,
      "step": 14719,
      "training_step_time": 0.4975275993347168
    },
    {
      "epoch": 8.984375e-05,
      "grad_norm": 0.19440238177776337,
      "learning_rate": 8.992624658973574e-05,
      "loss": 0.0609,
      "step": 14720
    },
    {
      "epoch": 8.984375e-05,
      "model_forward_time": 0.11450600624084473,
      "step": 14720
    },
    {
      "epoch": 8.984375e-05,
      "step": 14720,
      "training_step_time": 0.39250802993774414
    },
    {
      "epoch": 8.9849853515625e-05,
      "model_forward_time": 0.1149137020111084,
      "step": 14721
    },
    {
      "epoch": 8.9849853515625e-05,
      "step": 14721,
      "training_step_time": 0.4371645450592041
    },
    {
      "epoch": 8.985595703125e-05,
      "model_forward_time": 0.11447954177856445,
      "step": 14722
    },
    {
      "epoch": 8.985595703125e-05,
      "step": 14722,
      "training_step_time": 0.4514758586883545
    },
    {
      "epoch": 8.9862060546875e-05,
      "model_forward_time": 0.11425495147705078,
      "step": 14723
    },
    {
      "epoch": 8.9862060546875e-05,
      "step": 14723,
      "training_step_time": 0.39101171493530273
    },
    {
      "epoch": 8.98681640625e-05,
      "model_forward_time": 0.11485052108764648,
      "step": 14724
    },
    {
      "epoch": 8.98681640625e-05,
      "step": 14724,
      "training_step_time": 0.3974184989929199
    },
    {
      "epoch": 8.9874267578125e-05,
      "model_forward_time": 0.11493802070617676,
      "step": 14725
    },
    {
      "epoch": 8.9874267578125e-05,
      "step": 14725,
      "training_step_time": 0.38997459411621094
    },
    {
      "epoch": 8.988037109375e-05,
      "model_forward_time": 0.11478328704833984,
      "step": 14726
    },
    {
      "epoch": 8.988037109375e-05,
      "step": 14726,
      "training_step_time": 0.4024088382720947
    },
    {
      "epoch": 8.9886474609375e-05,
      "model_forward_time": 0.11467695236206055,
      "step": 14727
    },
    {
      "epoch": 8.9886474609375e-05,
      "step": 14727,
      "training_step_time": 0.41258955001831055
    },
    {
      "epoch": 8.9892578125e-05,
      "model_forward_time": 0.11468935012817383,
      "step": 14728
    },
    {
      "epoch": 8.9892578125e-05,
      "step": 14728,
      "training_step_time": 0.3825538158416748
    },
    {
      "epoch": 8.9898681640625e-05,
      "model_forward_time": 0.11537742614746094,
      "step": 14729
    },
    {
      "epoch": 8.9898681640625e-05,
      "step": 14729,
      "training_step_time": 0.39282894134521484
    },
    {
      "epoch": 8.990478515625e-05,
      "grad_norm": 0.22810429334640503,
      "learning_rate": 8.990965176690252e-05,
      "loss": 0.0557,
      "step": 14730
    },
    {
      "epoch": 8.990478515625e-05,
      "model_forward_time": 0.11498475074768066,
      "step": 14730
    },
    {
      "epoch": 8.990478515625e-05,
      "step": 14730,
      "training_step_time": 0.6579430103302002
    },
    {
      "epoch": 8.9910888671875e-05,
      "model_forward_time": 0.11473655700683594,
      "step": 14731
    },
    {
      "epoch": 8.9910888671875e-05,
      "step": 14731,
      "training_step_time": 0.39325475692749023
    },
    {
      "epoch": 8.99169921875e-05,
      "model_forward_time": 0.11486315727233887,
      "step": 14732
    },
    {
      "epoch": 8.99169921875e-05,
      "step": 14732,
      "training_step_time": 0.4368929862976074
    },
    {
      "epoch": 8.9923095703125e-05,
      "model_forward_time": 0.11513471603393555,
      "step": 14733
    },
    {
      "epoch": 8.9923095703125e-05,
      "step": 14733,
      "training_step_time": 0.434619665145874
    },
    {
      "epoch": 8.992919921875e-05,
      "model_forward_time": 0.11511635780334473,
      "step": 14734
    },
    {
      "epoch": 8.992919921875e-05,
      "step": 14734,
      "training_step_time": 0.47106456756591797
    },
    {
      "epoch": 8.9935302734375e-05,
      "model_forward_time": 0.11434483528137207,
      "step": 14735
    },
    {
      "epoch": 8.9935302734375e-05,
      "step": 14735,
      "training_step_time": 0.3849468231201172
    },
    {
      "epoch": 8.994140625e-05,
      "model_forward_time": 0.11567997932434082,
      "step": 14736
    },
    {
      "epoch": 8.994140625e-05,
      "step": 14736,
      "training_step_time": 0.58168625831604
    },
    {
      "epoch": 8.9947509765625e-05,
      "model_forward_time": 0.11452841758728027,
      "step": 14737
    },
    {
      "epoch": 8.9947509765625e-05,
      "step": 14737,
      "training_step_time": 0.39958786964416504
    },
    {
      "epoch": 8.995361328125e-05,
      "model_forward_time": 0.1151585578918457,
      "step": 14738
    },
    {
      "epoch": 8.995361328125e-05,
      "step": 14738,
      "training_step_time": 0.40050673484802246
    },
    {
      "epoch": 8.9959716796875e-05,
      "model_forward_time": 0.11504936218261719,
      "step": 14739
    },
    {
      "epoch": 8.9959716796875e-05,
      "step": 14739,
      "training_step_time": 0.3975968360900879
    },
    {
      "epoch": 8.99658203125e-05,
      "grad_norm": 0.15294267237186432,
      "learning_rate": 8.989304482057084e-05,
      "loss": 0.0621,
      "step": 14740
    },
    {
      "epoch": 8.99658203125e-05,
      "model_forward_time": 0.11486649513244629,
      "step": 14740
    },
    {
      "epoch": 8.99658203125e-05,
      "step": 14740,
      "training_step_time": 0.3924446105957031
    },
    {
      "epoch": 8.9971923828125e-05,
      "model_forward_time": 0.11489081382751465,
      "step": 14741
    },
    {
      "epoch": 8.9971923828125e-05,
      "step": 14741,
      "training_step_time": 0.4021732807159424
    },
    {
      "epoch": 8.997802734375e-05,
      "model_forward_time": 0.1150965690612793,
      "step": 14742
    },
    {
      "epoch": 8.997802734375e-05,
      "step": 14742,
      "training_step_time": 0.6515140533447266
    },
    {
      "epoch": 8.9984130859375e-05,
      "model_forward_time": 0.11517953872680664,
      "step": 14743
    },
    {
      "epoch": 8.9984130859375e-05,
      "step": 14743,
      "training_step_time": 0.39675331115722656
    },
    {
      "epoch": 8.9990234375e-05,
      "model_forward_time": 0.11521768569946289,
      "step": 14744
    },
    {
      "epoch": 8.9990234375e-05,
      "step": 14744,
      "training_step_time": 0.3939223289489746
    },
    {
      "epoch": 8.9996337890625e-05,
      "model_forward_time": 0.1167149543762207,
      "step": 14745
    },
    {
      "epoch": 8.9996337890625e-05,
      "step": 14745,
      "training_step_time": 0.44209957122802734
    },
    {
      "epoch": 9.000244140625e-05,
      "model_forward_time": 0.11490654945373535,
      "step": 14746
    },
    {
      "epoch": 9.000244140625e-05,
      "step": 14746,
      "training_step_time": 0.47809648513793945
    },
    {
      "epoch": 9.0008544921875e-05,
      "model_forward_time": 0.1143944263458252,
      "step": 14747
    },
    {
      "epoch": 9.0008544921875e-05,
      "step": 14747,
      "training_step_time": 0.4193241596221924
    },
    {
      "epoch": 9.00146484375e-05,
      "model_forward_time": 0.11518526077270508,
      "step": 14748
    },
    {
      "epoch": 9.00146484375e-05,
      "step": 14748,
      "training_step_time": 0.5629551410675049
    },
    {
      "epoch": 9.0020751953125e-05,
      "model_forward_time": 0.11456584930419922,
      "step": 14749
    },
    {
      "epoch": 9.0020751953125e-05,
      "step": 14749,
      "training_step_time": 0.3985757827758789
    },
    {
      "epoch": 9.002685546875e-05,
      "grad_norm": 0.1453397423028946,
      "learning_rate": 8.987642575578545e-05,
      "loss": 0.0566,
      "step": 14750
    },
    {
      "epoch": 9.002685546875e-05,
      "model_forward_time": 0.11553668975830078,
      "step": 14750
    },
    {
      "epoch": 9.002685546875e-05,
      "step": 14750,
      "training_step_time": 0.471541166305542
    },
    {
      "epoch": 9.0032958984375e-05,
      "model_forward_time": 0.11457705497741699,
      "step": 14751
    },
    {
      "epoch": 9.0032958984375e-05,
      "step": 14751,
      "training_step_time": 0.3845651149749756
    },
    {
      "epoch": 9.00390625e-05,
      "model_forward_time": 0.11477041244506836,
      "step": 14752
    },
    {
      "epoch": 9.00390625e-05,
      "step": 14752,
      "training_step_time": 0.3860480785369873
    },
    {
      "epoch": 9.0045166015625e-05,
      "model_forward_time": 0.11523151397705078,
      "step": 14753
    },
    {
      "epoch": 9.0045166015625e-05,
      "step": 14753,
      "training_step_time": 0.4311177730560303
    },
    {
      "epoch": 9.005126953125e-05,
      "model_forward_time": 0.11618566513061523,
      "step": 14754
    },
    {
      "epoch": 9.005126953125e-05,
      "step": 14754,
      "training_step_time": 0.5235624313354492
    },
    {
      "epoch": 9.0057373046875e-05,
      "model_forward_time": 0.11475539207458496,
      "step": 14755
    },
    {
      "epoch": 9.0057373046875e-05,
      "step": 14755,
      "training_step_time": 0.391277551651001
    },
    {
      "epoch": 9.00634765625e-05,
      "model_forward_time": 0.11522769927978516,
      "step": 14756
    },
    {
      "epoch": 9.00634765625e-05,
      "step": 14756,
      "training_step_time": 0.37874388694763184
    },
    {
      "epoch": 9.0069580078125e-05,
      "model_forward_time": 0.11506533622741699,
      "step": 14757
    },
    {
      "epoch": 9.0069580078125e-05,
      "step": 14757,
      "training_step_time": 0.3863818645477295
    },
    {
      "epoch": 9.007568359375e-05,
      "model_forward_time": 0.11497068405151367,
      "step": 14758
    },
    {
      "epoch": 9.007568359375e-05,
      "step": 14758,
      "training_step_time": 0.3850746154785156
    },
    {
      "epoch": 9.0081787109375e-05,
      "model_forward_time": 0.11573910713195801,
      "step": 14759
    },
    {
      "epoch": 9.0081787109375e-05,
      "step": 14759,
      "training_step_time": 0.40217065811157227
    },
    {
      "epoch": 9.0087890625e-05,
      "grad_norm": 0.13270941376686096,
      "learning_rate": 8.98597945775948e-05,
      "loss": 0.0526,
      "step": 14760
    },
    {
      "epoch": 9.0087890625e-05,
      "model_forward_time": 0.11498785018920898,
      "step": 14760
    },
    {
      "epoch": 9.0087890625e-05,
      "step": 14760,
      "training_step_time": 0.6236426830291748
    },
    {
      "epoch": 9.0093994140625e-05,
      "model_forward_time": 0.11514663696289062,
      "step": 14761
    },
    {
      "epoch": 9.0093994140625e-05,
      "step": 14761,
      "training_step_time": 0.5143816471099854
    },
    {
      "epoch": 9.010009765625e-05,
      "model_forward_time": 0.11465096473693848,
      "step": 14762
    },
    {
      "epoch": 9.010009765625e-05,
      "step": 14762,
      "training_step_time": 0.413907527923584
    },
    {
      "epoch": 9.0106201171875e-05,
      "model_forward_time": 0.11412739753723145,
      "step": 14763
    },
    {
      "epoch": 9.0106201171875e-05,
      "step": 14763,
      "training_step_time": 0.39620208740234375
    },
    {
      "epoch": 9.01123046875e-05,
      "model_forward_time": 0.11460661888122559,
      "step": 14764
    },
    {
      "epoch": 9.01123046875e-05,
      "step": 14764,
      "training_step_time": 0.44677186012268066
    },
    {
      "epoch": 9.0118408203125e-05,
      "model_forward_time": 0.1151418685913086,
      "step": 14765
    },
    {
      "epoch": 9.0118408203125e-05,
      "step": 14765,
      "training_step_time": 0.3843510150909424
    },
    {
      "epoch": 9.012451171875e-05,
      "model_forward_time": 0.11501646041870117,
      "step": 14766
    },
    {
      "epoch": 9.012451171875e-05,
      "step": 14766,
      "training_step_time": 0.446547269821167
    },
    {
      "epoch": 9.0130615234375e-05,
      "model_forward_time": 0.11558961868286133,
      "step": 14767
    },
    {
      "epoch": 9.0130615234375e-05,
      "step": 14767,
      "training_step_time": 0.45287322998046875
    },
    {
      "epoch": 9.013671875e-05,
      "model_forward_time": 0.11449956893920898,
      "step": 14768
    },
    {
      "epoch": 9.013671875e-05,
      "step": 14768,
      "training_step_time": 0.40120410919189453
    },
    {
      "epoch": 9.0142822265625e-05,
      "model_forward_time": 0.11522364616394043,
      "step": 14769
    },
    {
      "epoch": 9.0142822265625e-05,
      "step": 14769,
      "training_step_time": 0.3945019245147705
    },
    {
      "epoch": 9.014892578125e-05,
      "grad_norm": 0.14502651989459991,
      "learning_rate": 8.984315129105099e-05,
      "loss": 0.064,
      "step": 14770
    },
    {
      "epoch": 9.014892578125e-05,
      "model_forward_time": 0.11513853073120117,
      "step": 14770
    },
    {
      "epoch": 9.014892578125e-05,
      "step": 14770,
      "training_step_time": 0.3963310718536377
    },
    {
      "epoch": 9.0155029296875e-05,
      "model_forward_time": 0.11466407775878906,
      "step": 14771
    },
    {
      "epoch": 9.0155029296875e-05,
      "step": 14771,
      "training_step_time": 0.38862085342407227
    },
    {
      "epoch": 9.01611328125e-05,
      "model_forward_time": 0.11506080627441406,
      "step": 14772
    },
    {
      "epoch": 9.01611328125e-05,
      "step": 14772,
      "training_step_time": 0.614140510559082
    },
    {
      "epoch": 9.0167236328125e-05,
      "model_forward_time": 0.11469721794128418,
      "step": 14773
    },
    {
      "epoch": 9.0167236328125e-05,
      "step": 14773,
      "training_step_time": 0.4087839126586914
    },
    {
      "epoch": 9.017333984375e-05,
      "model_forward_time": 0.11541008949279785,
      "step": 14774
    },
    {
      "epoch": 9.017333984375e-05,
      "step": 14774,
      "training_step_time": 0.48044466972351074
    },
    {
      "epoch": 9.0179443359375e-05,
      "model_forward_time": 0.11476492881774902,
      "step": 14775
    },
    {
      "epoch": 9.0179443359375e-05,
      "step": 14775,
      "training_step_time": 0.4991459846496582
    },
    {
      "epoch": 9.0185546875e-05,
      "model_forward_time": 0.11418795585632324,
      "step": 14776
    },
    {
      "epoch": 9.0185546875e-05,
      "step": 14776,
      "training_step_time": 0.45459938049316406
    },
    {
      "epoch": 9.0191650390625e-05,
      "model_forward_time": 0.11462831497192383,
      "step": 14777
    },
    {
      "epoch": 9.0191650390625e-05,
      "step": 14777,
      "training_step_time": 0.44626641273498535
    },
    {
      "epoch": 9.019775390625e-05,
      "model_forward_time": 0.11457109451293945,
      "step": 14778
    },
    {
      "epoch": 9.019775390625e-05,
      "step": 14778,
      "training_step_time": 0.40248584747314453
    },
    {
      "epoch": 9.0203857421875e-05,
      "model_forward_time": 0.11378836631774902,
      "step": 14779
    },
    {
      "epoch": 9.0203857421875e-05,
      "step": 14779,
      "training_step_time": 0.40457892417907715
    },
    {
      "epoch": 9.02099609375e-05,
      "grad_norm": 0.10509517788887024,
      "learning_rate": 8.982649590120982e-05,
      "loss": 0.0521,
      "step": 14780
    },
    {
      "epoch": 9.02099609375e-05,
      "model_forward_time": 0.11435055732727051,
      "step": 14780
    },
    {
      "epoch": 9.02099609375e-05,
      "step": 14780,
      "training_step_time": 0.4427950382232666
    },
    {
      "epoch": 9.0216064453125e-05,
      "model_forward_time": 0.1154623031616211,
      "step": 14781
    },
    {
      "epoch": 9.0216064453125e-05,
      "step": 14781,
      "training_step_time": 0.38483572006225586
    },
    {
      "epoch": 9.022216796875e-05,
      "model_forward_time": 0.11468243598937988,
      "step": 14782
    },
    {
      "epoch": 9.022216796875e-05,
      "step": 14782,
      "training_step_time": 0.3849759101867676
    },
    {
      "epoch": 9.0228271484375e-05,
      "model_forward_time": 0.11557245254516602,
      "step": 14783
    },
    {
      "epoch": 9.0228271484375e-05,
      "step": 14783,
      "training_step_time": 0.39098334312438965
    },
    {
      "epoch": 9.0234375e-05,
      "model_forward_time": 0.11505818367004395,
      "step": 14784
    },
    {
      "epoch": 9.0234375e-05,
      "step": 14784,
      "training_step_time": 0.7308816909790039
    },
    {
      "epoch": 9.0240478515625e-05,
      "model_forward_time": 0.1150820255279541,
      "step": 14785
    },
    {
      "epoch": 9.0240478515625e-05,
      "step": 14785,
      "training_step_time": 0.39542555809020996
    },
    {
      "epoch": 9.024658203125e-05,
      "model_forward_time": 0.11484456062316895,
      "step": 14786
    },
    {
      "epoch": 9.024658203125e-05,
      "step": 14786,
      "training_step_time": 0.38213562965393066
    },
    {
      "epoch": 9.0252685546875e-05,
      "model_forward_time": 0.11485576629638672,
      "step": 14787
    },
    {
      "epoch": 9.0252685546875e-05,
      "step": 14787,
      "training_step_time": 0.44796252250671387
    },
    {
      "epoch": 9.02587890625e-05,
      "model_forward_time": 0.11475658416748047,
      "step": 14788
    },
    {
      "epoch": 9.02587890625e-05,
      "step": 14788,
      "training_step_time": 0.40732789039611816
    },
    {
      "epoch": 9.0264892578125e-05,
      "model_forward_time": 0.11584186553955078,
      "step": 14789
    },
    {
      "epoch": 9.0264892578125e-05,
      "step": 14789,
      "training_step_time": 0.3926084041595459
    },
    {
      "epoch": 9.027099609375e-05,
      "grad_norm": 0.12905912101268768,
      "learning_rate": 8.980982841313074e-05,
      "loss": 0.0567,
      "step": 14790
    },
    {
      "epoch": 9.027099609375e-05,
      "model_forward_time": 0.11470222473144531,
      "step": 14790
    },
    {
      "epoch": 9.027099609375e-05,
      "step": 14790,
      "training_step_time": 0.5672867298126221
    },
    {
      "epoch": 9.0277099609375e-05,
      "model_forward_time": 0.11817264556884766,
      "step": 14791
    },
    {
      "epoch": 9.0277099609375e-05,
      "step": 14791,
      "training_step_time": 0.4128139019012451
    },
    {
      "epoch": 9.0283203125e-05,
      "model_forward_time": 0.11775350570678711,
      "step": 14792
    },
    {
      "epoch": 9.0283203125e-05,
      "step": 14792,
      "training_step_time": 0.43425774574279785
    },
    {
      "epoch": 9.0289306640625e-05,
      "model_forward_time": 0.11822223663330078,
      "step": 14793
    },
    {
      "epoch": 9.0289306640625e-05,
      "step": 14793,
      "training_step_time": 0.3956453800201416
    },
    {
      "epoch": 9.029541015625e-05,
      "model_forward_time": 0.11569499969482422,
      "step": 14794
    },
    {
      "epoch": 9.029541015625e-05,
      "step": 14794,
      "training_step_time": 0.3832967281341553
    },
    {
      "epoch": 9.0301513671875e-05,
      "model_forward_time": 0.11536216735839844,
      "step": 14795
    },
    {
      "epoch": 9.0301513671875e-05,
      "step": 14795,
      "training_step_time": 0.38941192626953125
    },
    {
      "epoch": 9.03076171875e-05,
      "model_forward_time": 0.1149592399597168,
      "step": 14796
    },
    {
      "epoch": 9.03076171875e-05,
      "step": 14796,
      "training_step_time": 0.6090390682220459
    },
    {
      "epoch": 9.0313720703125e-05,
      "model_forward_time": 0.11483168601989746,
      "step": 14797
    },
    {
      "epoch": 9.0313720703125e-05,
      "step": 14797,
      "training_step_time": 0.4000685214996338
    },
    {
      "epoch": 9.031982421875e-05,
      "model_forward_time": 0.11443257331848145,
      "step": 14798
    },
    {
      "epoch": 9.031982421875e-05,
      "step": 14798,
      "training_step_time": 0.3926548957824707
    },
    {
      "epoch": 9.0325927734375e-05,
      "model_forward_time": 0.1152503490447998,
      "step": 14799
    },
    {
      "epoch": 9.0325927734375e-05,
      "step": 14799,
      "training_step_time": 0.39115166664123535
    },
    {
      "epoch": 9.033203125e-05,
      "grad_norm": 0.1128959208726883,
      "learning_rate": 8.979314883187693e-05,
      "loss": 0.0474,
      "step": 14800
    },
    {
      "epoch": 9.033203125e-05,
      "model_forward_time": 0.11521053314208984,
      "step": 14800
    },
    {
      "epoch": 9.033203125e-05,
      "step": 14800,
      "training_step_time": 0.38191914558410645
    },
    {
      "epoch": 9.0338134765625e-05,
      "model_forward_time": 0.11483883857727051,
      "step": 14801
    },
    {
      "epoch": 9.0338134765625e-05,
      "step": 14801,
      "training_step_time": 0.48975467681884766
    },
    {
      "epoch": 9.034423828125e-05,
      "model_forward_time": 0.11606884002685547,
      "step": 14802
    },
    {
      "epoch": 9.034423828125e-05,
      "step": 14802,
      "training_step_time": 0.5316035747528076
    },
    {
      "epoch": 9.0350341796875e-05,
      "model_forward_time": 0.11523056030273438,
      "step": 14803
    },
    {
      "epoch": 9.0350341796875e-05,
      "step": 14803,
      "training_step_time": 0.4510958194732666
    },
    {
      "epoch": 9.03564453125e-05,
      "model_forward_time": 0.11487817764282227,
      "step": 14804
    },
    {
      "epoch": 9.03564453125e-05,
      "step": 14804,
      "training_step_time": 0.42776966094970703
    },
    {
      "epoch": 9.0362548828125e-05,
      "model_forward_time": 0.11558032035827637,
      "step": 14805
    },
    {
      "epoch": 9.0362548828125e-05,
      "step": 14805,
      "training_step_time": 0.4312918186187744
    },
    {
      "epoch": 9.036865234375e-05,
      "model_forward_time": 0.11484932899475098,
      "step": 14806
    },
    {
      "epoch": 9.036865234375e-05,
      "step": 14806,
      "training_step_time": 0.41358518600463867
    },
    {
      "epoch": 9.0374755859375e-05,
      "model_forward_time": 0.11497163772583008,
      "step": 14807
    },
    {
      "epoch": 9.0374755859375e-05,
      "step": 14807,
      "training_step_time": 0.385204553604126
    },
    {
      "epoch": 9.0380859375e-05,
      "model_forward_time": 0.11513805389404297,
      "step": 14808
    },
    {
      "epoch": 9.0380859375e-05,
      "step": 14808,
      "training_step_time": 0.5037009716033936
    },
    {
      "epoch": 9.0386962890625e-05,
      "model_forward_time": 0.11469817161560059,
      "step": 14809
    },
    {
      "epoch": 9.0386962890625e-05,
      "step": 14809,
      "training_step_time": 0.3936476707458496
    },
    {
      "epoch": 9.039306640625e-05,
      "grad_norm": 0.1525934338569641,
      "learning_rate": 8.977645716251518e-05,
      "loss": 0.0519,
      "step": 14810
    },
    {
      "epoch": 9.039306640625e-05,
      "model_forward_time": 0.11535263061523438,
      "step": 14810
    },
    {
      "epoch": 9.039306640625e-05,
      "step": 14810,
      "training_step_time": 0.37619662284851074
    },
    {
      "epoch": 9.0399169921875e-05,
      "model_forward_time": 0.11452722549438477,
      "step": 14811
    },
    {
      "epoch": 9.0399169921875e-05,
      "step": 14811,
      "training_step_time": 0.3949000835418701
    },
    {
      "epoch": 9.04052734375e-05,
      "model_forward_time": 0.11513829231262207,
      "step": 14812
    },
    {
      "epoch": 9.04052734375e-05,
      "step": 14812,
      "training_step_time": 0.39818644523620605
    },
    {
      "epoch": 9.0411376953125e-05,
      "model_forward_time": 0.11431431770324707,
      "step": 14813
    },
    {
      "epoch": 9.0411376953125e-05,
      "step": 14813,
      "training_step_time": 0.3925292491912842
    },
    {
      "epoch": 9.041748046875e-05,
      "model_forward_time": 0.11454248428344727,
      "step": 14814
    },
    {
      "epoch": 9.041748046875e-05,
      "step": 14814,
      "training_step_time": 0.6665565967559814
    },
    {
      "epoch": 9.0423583984375e-05,
      "model_forward_time": 0.11476826667785645,
      "step": 14815
    },
    {
      "epoch": 9.0423583984375e-05,
      "step": 14815,
      "training_step_time": 0.41409969329833984
    },
    {
      "epoch": 9.04296875e-05,
      "model_forward_time": 0.11479401588439941,
      "step": 14816
    },
    {
      "epoch": 9.04296875e-05,
      "step": 14816,
      "training_step_time": 0.369152307510376
    },
    {
      "epoch": 9.0435791015625e-05,
      "model_forward_time": 0.11490106582641602,
      "step": 14817
    },
    {
      "epoch": 9.0435791015625e-05,
      "step": 14817,
      "training_step_time": 0.41034889221191406
    },
    {
      "epoch": 9.044189453125e-05,
      "model_forward_time": 0.11448121070861816,
      "step": 14818
    },
    {
      "epoch": 9.044189453125e-05,
      "step": 14818,
      "training_step_time": 0.42900705337524414
    },
    {
      "epoch": 9.0447998046875e-05,
      "model_forward_time": 0.11414813995361328,
      "step": 14819
    },
    {
      "epoch": 9.0447998046875e-05,
      "step": 14819,
      "training_step_time": 0.4162445068359375
    },
    {
      "epoch": 9.04541015625e-05,
      "grad_norm": 0.17441174387931824,
      "learning_rate": 8.975975341011596e-05,
      "loss": 0.0548,
      "step": 14820
    },
    {
      "epoch": 9.04541015625e-05,
      "model_forward_time": 0.1146857738494873,
      "step": 14820
    },
    {
      "epoch": 9.04541015625e-05,
      "step": 14820,
      "training_step_time": 0.45316004753112793
    },
    {
      "epoch": 9.0460205078125e-05,
      "model_forward_time": 0.11455082893371582,
      "step": 14821
    },
    {
      "epoch": 9.0460205078125e-05,
      "step": 14821,
      "training_step_time": 0.3914611339569092
    },
    {
      "epoch": 9.046630859375e-05,
      "model_forward_time": 0.11469483375549316,
      "step": 14822
    },
    {
      "epoch": 9.046630859375e-05,
      "step": 14822,
      "training_step_time": 0.3930213451385498
    },
    {
      "epoch": 9.0472412109375e-05,
      "model_forward_time": 0.11477828025817871,
      "step": 14823
    },
    {
      "epoch": 9.0472412109375e-05,
      "step": 14823,
      "training_step_time": 0.39560508728027344
    },
    {
      "epoch": 9.0478515625e-05,
      "model_forward_time": 0.11516571044921875,
      "step": 14824
    },
    {
      "epoch": 9.0478515625e-05,
      "step": 14824,
      "training_step_time": 0.39061713218688965
    },
    {
      "epoch": 9.0484619140625e-05,
      "model_forward_time": 0.11554336547851562,
      "step": 14825
    },
    {
      "epoch": 9.0484619140625e-05,
      "step": 14825,
      "training_step_time": 0.3968071937561035
    },
    {
      "epoch": 9.049072265625e-05,
      "model_forward_time": 0.11499357223510742,
      "step": 14826
    },
    {
      "epoch": 9.049072265625e-05,
      "step": 14826,
      "training_step_time": 0.7063863277435303
    },
    {
      "epoch": 9.0496826171875e-05,
      "model_forward_time": 0.11612892150878906,
      "step": 14827
    },
    {
      "epoch": 9.0496826171875e-05,
      "step": 14827,
      "training_step_time": 0.3905010223388672
    },
    {
      "epoch": 9.05029296875e-05,
      "model_forward_time": 0.11431598663330078,
      "step": 14828
    },
    {
      "epoch": 9.05029296875e-05,
      "step": 14828,
      "training_step_time": 0.3974730968475342
    },
    {
      "epoch": 9.0509033203125e-05,
      "model_forward_time": 0.11589431762695312,
      "step": 14829
    },
    {
      "epoch": 9.0509033203125e-05,
      "step": 14829,
      "training_step_time": 0.5382750034332275
    },
    {
      "epoch": 9.051513671875e-05,
      "grad_norm": 0.21267525851726532,
      "learning_rate": 8.974303757975345e-05,
      "loss": 0.0579,
      "step": 14830
    },
    {
      "epoch": 9.051513671875e-05,
      "model_forward_time": 0.11559009552001953,
      "step": 14830
    },
    {
      "epoch": 9.051513671875e-05,
      "step": 14830,
      "training_step_time": 0.36632490158081055
    },
    {
      "epoch": 9.0521240234375e-05,
      "model_forward_time": 0.1140129566192627,
      "step": 14831
    },
    {
      "epoch": 9.0521240234375e-05,
      "step": 14831,
      "training_step_time": 0.4313664436340332
    },
    {
      "epoch": 9.052734375e-05,
      "model_forward_time": 0.11523318290710449,
      "step": 14832
    },
    {
      "epoch": 9.052734375e-05,
      "step": 14832,
      "training_step_time": 0.4721646308898926
    },
    {
      "epoch": 9.0533447265625e-05,
      "model_forward_time": 0.1145772933959961,
      "step": 14833
    },
    {
      "epoch": 9.0533447265625e-05,
      "step": 14833,
      "training_step_time": 0.496462345123291
    },
    {
      "epoch": 9.053955078125e-05,
      "model_forward_time": 0.11475920677185059,
      "step": 14834
    },
    {
      "epoch": 9.053955078125e-05,
      "step": 14834,
      "training_step_time": 0.4126622676849365
    },
    {
      "epoch": 9.0545654296875e-05,
      "model_forward_time": 0.1146233081817627,
      "step": 14835
    },
    {
      "epoch": 9.0545654296875e-05,
      "step": 14835,
      "training_step_time": 0.3856520652770996
    },
    {
      "epoch": 9.05517578125e-05,
      "model_forward_time": 0.11494874954223633,
      "step": 14836
    },
    {
      "epoch": 9.05517578125e-05,
      "step": 14836,
      "training_step_time": 0.3882606029510498
    },
    {
      "epoch": 9.0557861328125e-05,
      "model_forward_time": 0.11553001403808594,
      "step": 14837
    },
    {
      "epoch": 9.0557861328125e-05,
      "step": 14837,
      "training_step_time": 0.3900930881500244
    },
    {
      "epoch": 9.056396484375e-05,
      "model_forward_time": 0.1150350570678711,
      "step": 14838
    },
    {
      "epoch": 9.056396484375e-05,
      "step": 14838,
      "training_step_time": 0.4023771286010742
    },
    {
      "epoch": 9.0570068359375e-05,
      "model_forward_time": 0.11506342887878418,
      "step": 14839
    },
    {
      "epoch": 9.0570068359375e-05,
      "step": 14839,
      "training_step_time": 0.39130735397338867
    },
    {
      "epoch": 9.0576171875e-05,
      "grad_norm": 0.23277707397937775,
      "learning_rate": 8.972630967650548e-05,
      "loss": 0.0548,
      "step": 14840
    },
    {
      "epoch": 9.0576171875e-05,
      "model_forward_time": 0.11540675163269043,
      "step": 14840
    },
    {
      "epoch": 9.0576171875e-05,
      "step": 14840,
      "training_step_time": 0.38884735107421875
    },
    {
      "epoch": 9.0582275390625e-05,
      "model_forward_time": 0.11502885818481445,
      "step": 14841
    },
    {
      "epoch": 9.0582275390625e-05,
      "step": 14841,
      "training_step_time": 0.39556241035461426
    },
    {
      "epoch": 9.058837890625e-05,
      "model_forward_time": 0.1159203052520752,
      "step": 14842
    },
    {
      "epoch": 9.058837890625e-05,
      "step": 14842,
      "training_step_time": 0.3924579620361328
    },
    {
      "epoch": 9.0594482421875e-05,
      "model_forward_time": 0.11553955078125,
      "step": 14843
    },
    {
      "epoch": 9.0594482421875e-05,
      "step": 14843,
      "training_step_time": 0.42699289321899414
    },
    {
      "epoch": 9.06005859375e-05,
      "model_forward_time": 0.11474204063415527,
      "step": 14844
    },
    {
      "epoch": 9.06005859375e-05,
      "step": 14844,
      "training_step_time": 0.6465566158294678
    },
    {
      "epoch": 9.0606689453125e-05,
      "model_forward_time": 0.11554694175720215,
      "step": 14845
    },
    {
      "epoch": 9.0606689453125e-05,
      "step": 14845,
      "training_step_time": 0.4343385696411133
    },
    {
      "epoch": 9.061279296875e-05,
      "model_forward_time": 0.1146690845489502,
      "step": 14846
    },
    {
      "epoch": 9.061279296875e-05,
      "step": 14846,
      "training_step_time": 0.41826510429382324
    },
    {
      "epoch": 9.0618896484375e-05,
      "model_forward_time": 0.11513996124267578,
      "step": 14847
    },
    {
      "epoch": 9.0618896484375e-05,
      "step": 14847,
      "training_step_time": 0.5022375583648682
    },
    {
      "epoch": 9.0625e-05,
      "model_forward_time": 0.11532950401306152,
      "step": 14848
    },
    {
      "epoch": 9.0625e-05,
      "step": 14848,
      "training_step_time": 0.42301249504089355
    },
    {
      "epoch": 9.0631103515625e-05,
      "model_forward_time": 0.11494684219360352,
      "step": 14849
    },
    {
      "epoch": 9.0631103515625e-05,
      "step": 14849,
      "training_step_time": 0.37470531463623047
    },
    {
      "epoch": 9.063720703125e-05,
      "grad_norm": 0.24011071026325226,
      "learning_rate": 8.970956970545355e-05,
      "loss": 0.0523,
      "step": 14850
    },
    {
      "epoch": 9.063720703125e-05,
      "model_forward_time": 0.11496448516845703,
      "step": 14850
    },
    {
      "epoch": 9.063720703125e-05,
      "step": 14850,
      "training_step_time": 0.5875883102416992
    },
    {
      "epoch": 9.0643310546875e-05,
      "model_forward_time": 0.11410260200500488,
      "step": 14851
    },
    {
      "epoch": 9.0643310546875e-05,
      "step": 14851,
      "training_step_time": 0.39170241355895996
    },
    {
      "epoch": 9.06494140625e-05,
      "model_forward_time": 0.11463451385498047,
      "step": 14852
    },
    {
      "epoch": 9.06494140625e-05,
      "step": 14852,
      "training_step_time": 0.40825676918029785
    },
    {
      "epoch": 9.0655517578125e-05,
      "model_forward_time": 0.11536288261413574,
      "step": 14853
    },
    {
      "epoch": 9.0655517578125e-05,
      "step": 14853,
      "training_step_time": 0.4099407196044922
    },
    {
      "epoch": 9.066162109375e-05,
      "model_forward_time": 0.11518645286560059,
      "step": 14854
    },
    {
      "epoch": 9.066162109375e-05,
      "step": 14854,
      "training_step_time": 0.3876502513885498
    },
    {
      "epoch": 9.0667724609375e-05,
      "model_forward_time": 0.11532306671142578,
      "step": 14855
    },
    {
      "epoch": 9.0667724609375e-05,
      "step": 14855,
      "training_step_time": 0.3923213481903076
    },
    {
      "epoch": 9.0673828125e-05,
      "model_forward_time": 0.11644864082336426,
      "step": 14856
    },
    {
      "epoch": 9.0673828125e-05,
      "step": 14856,
      "training_step_time": 0.5518853664398193
    },
    {
      "epoch": 9.0679931640625e-05,
      "model_forward_time": 0.11500835418701172,
      "step": 14857
    },
    {
      "epoch": 9.0679931640625e-05,
      "step": 14857,
      "training_step_time": 0.43834471702575684
    },
    {
      "epoch": 9.068603515625e-05,
      "model_forward_time": 0.11605954170227051,
      "step": 14858
    },
    {
      "epoch": 9.068603515625e-05,
      "step": 14858,
      "training_step_time": 0.42949533462524414
    },
    {
      "epoch": 9.0692138671875e-05,
      "model_forward_time": 0.11473655700683594,
      "step": 14859
    },
    {
      "epoch": 9.0692138671875e-05,
      "step": 14859,
      "training_step_time": 0.36546802520751953
    },
    {
      "epoch": 9.06982421875e-05,
      "grad_norm": 0.24550355970859528,
      "learning_rate": 8.969281767168283e-05,
      "loss": 0.0553,
      "step": 14860
    },
    {
      "epoch": 9.06982421875e-05,
      "model_forward_time": 0.1146395206451416,
      "step": 14860
    },
    {
      "epoch": 9.06982421875e-05,
      "step": 14860,
      "training_step_time": 0.46288418769836426
    },
    {
      "epoch": 9.0704345703125e-05,
      "model_forward_time": 0.1144709587097168,
      "step": 14861
    },
    {
      "epoch": 9.0704345703125e-05,
      "step": 14861,
      "training_step_time": 0.4491422176361084
    },
    {
      "epoch": 9.071044921875e-05,
      "model_forward_time": 0.11498594284057617,
      "step": 14862
    },
    {
      "epoch": 9.071044921875e-05,
      "step": 14862,
      "training_step_time": 0.38933777809143066
    },
    {
      "epoch": 9.0716552734375e-05,
      "model_forward_time": 0.11472034454345703,
      "step": 14863
    },
    {
      "epoch": 9.0716552734375e-05,
      "step": 14863,
      "training_step_time": 0.3904426097869873
    },
    {
      "epoch": 9.072265625e-05,
      "model_forward_time": 0.1146085262298584,
      "step": 14864
    },
    {
      "epoch": 9.072265625e-05,
      "step": 14864,
      "training_step_time": 0.39154911041259766
    },
    {
      "epoch": 9.0728759765625e-05,
      "model_forward_time": 0.11591935157775879,
      "step": 14865
    },
    {
      "epoch": 9.0728759765625e-05,
      "step": 14865,
      "training_step_time": 0.39838123321533203
    },
    {
      "epoch": 9.073486328125e-05,
      "model_forward_time": 0.11538934707641602,
      "step": 14866
    },
    {
      "epoch": 9.073486328125e-05,
      "step": 14866,
      "training_step_time": 0.3914041519165039
    },
    {
      "epoch": 9.0740966796875e-05,
      "model_forward_time": 0.11548542976379395,
      "step": 14867
    },
    {
      "epoch": 9.0740966796875e-05,
      "step": 14867,
      "training_step_time": 0.38602685928344727
    },
    {
      "epoch": 9.07470703125e-05,
      "model_forward_time": 0.11579108238220215,
      "step": 14868
    },
    {
      "epoch": 9.07470703125e-05,
      "step": 14868,
      "training_step_time": 0.580693244934082
    },
    {
      "epoch": 9.0753173828125e-05,
      "model_forward_time": 0.1143953800201416,
      "step": 14869
    },
    {
      "epoch": 9.0753173828125e-05,
      "step": 14869,
      "training_step_time": 0.38617682456970215
    },
    {
      "epoch": 9.075927734375e-05,
      "grad_norm": 0.22223392128944397,
      "learning_rate": 8.967605358028211e-05,
      "loss": 0.0567,
      "step": 14870
    },
    {
      "epoch": 9.075927734375e-05,
      "model_forward_time": 0.11518144607543945,
      "step": 14870
    },
    {
      "epoch": 9.075927734375e-05,
      "step": 14870,
      "training_step_time": 0.38493776321411133
    },
    {
      "epoch": 9.0765380859375e-05,
      "model_forward_time": 0.11473417282104492,
      "step": 14871
    },
    {
      "epoch": 9.0765380859375e-05,
      "step": 14871,
      "training_step_time": 0.49645376205444336
    },
    {
      "epoch": 9.0771484375e-05,
      "model_forward_time": 0.1150507926940918,
      "step": 14872
    },
    {
      "epoch": 9.0771484375e-05,
      "step": 14872,
      "training_step_time": 0.42172765731811523
    },
    {
      "epoch": 9.0777587890625e-05,
      "model_forward_time": 0.11470150947570801,
      "step": 14873
    },
    {
      "epoch": 9.0777587890625e-05,
      "step": 14873,
      "training_step_time": 0.49243593215942383
    },
    {
      "epoch": 9.078369140625e-05,
      "model_forward_time": 0.11511731147766113,
      "step": 14874
    },
    {
      "epoch": 9.078369140625e-05,
      "step": 14874,
      "training_step_time": 0.5863559246063232
    },
    {
      "epoch": 9.0789794921875e-05,
      "model_forward_time": 0.11418581008911133,
      "step": 14875
    },
    {
      "epoch": 9.0789794921875e-05,
      "step": 14875,
      "training_step_time": 0.4412343502044678
    },
    {
      "epoch": 9.07958984375e-05,
      "model_forward_time": 0.11439824104309082,
      "step": 14876
    },
    {
      "epoch": 9.07958984375e-05,
      "step": 14876,
      "training_step_time": 0.3781554698944092
    },
    {
      "epoch": 9.0802001953125e-05,
      "model_forward_time": 0.11492371559143066,
      "step": 14877
    },
    {
      "epoch": 9.0802001953125e-05,
      "step": 14877,
      "training_step_time": 0.39888620376586914
    },
    {
      "epoch": 9.080810546875e-05,
      "model_forward_time": 0.1144723892211914,
      "step": 14878
    },
    {
      "epoch": 9.080810546875e-05,
      "step": 14878,
      "training_step_time": 0.38909482955932617
    },
    {
      "epoch": 9.0814208984375e-05,
      "model_forward_time": 0.11489605903625488,
      "step": 14879
    },
    {
      "epoch": 9.0814208984375e-05,
      "step": 14879,
      "training_step_time": 0.39133238792419434
    },
    {
      "epoch": 9.08203125e-05,
      "grad_norm": 0.19251637160778046,
      "learning_rate": 8.965927743634391e-05,
      "loss": 0.0533,
      "step": 14880
    },
    {
      "epoch": 9.08203125e-05,
      "model_forward_time": 0.11494636535644531,
      "step": 14880
    },
    {
      "epoch": 9.08203125e-05,
      "step": 14880,
      "training_step_time": 0.5247735977172852
    },
    {
      "epoch": 9.0826416015625e-05,
      "model_forward_time": 0.11517643928527832,
      "step": 14881
    },
    {
      "epoch": 9.0826416015625e-05,
      "step": 14881,
      "training_step_time": 0.38463544845581055
    },
    {
      "epoch": 9.083251953125e-05,
      "model_forward_time": 0.11463189125061035,
      "step": 14882
    },
    {
      "epoch": 9.083251953125e-05,
      "step": 14882,
      "training_step_time": 0.3841369152069092
    },
    {
      "epoch": 9.0838623046875e-05,
      "model_forward_time": 0.1157991886138916,
      "step": 14883
    },
    {
      "epoch": 9.0838623046875e-05,
      "step": 14883,
      "training_step_time": 0.3890390396118164
    },
    {
      "epoch": 9.08447265625e-05,
      "model_forward_time": 0.11519289016723633,
      "step": 14884
    },
    {
      "epoch": 9.08447265625e-05,
      "step": 14884,
      "training_step_time": 0.4001157283782959
    },
    {
      "epoch": 9.0850830078125e-05,
      "model_forward_time": 0.11469697952270508,
      "step": 14885
    },
    {
      "epoch": 9.0850830078125e-05,
      "step": 14885,
      "training_step_time": 0.4710712432861328
    },
    {
      "epoch": 9.085693359375e-05,
      "model_forward_time": 0.11492466926574707,
      "step": 14886
    },
    {
      "epoch": 9.085693359375e-05,
      "step": 14886,
      "training_step_time": 0.6355006694793701
    },
    {
      "epoch": 9.0863037109375e-05,
      "model_forward_time": 0.11472606658935547,
      "step": 14887
    },
    {
      "epoch": 9.0863037109375e-05,
      "step": 14887,
      "training_step_time": 0.3704719543457031
    },
    {
      "epoch": 9.0869140625e-05,
      "model_forward_time": 0.11492013931274414,
      "step": 14888
    },
    {
      "epoch": 9.0869140625e-05,
      "step": 14888,
      "training_step_time": 0.44315147399902344
    },
    {
      "epoch": 9.0875244140625e-05,
      "model_forward_time": 0.11505508422851562,
      "step": 14889
    },
    {
      "epoch": 9.0875244140625e-05,
      "step": 14889,
      "training_step_time": 0.3984220027923584
    },
    {
      "epoch": 9.088134765625e-05,
      "grad_norm": 0.18029192090034485,
      "learning_rate": 8.964248924496435e-05,
      "loss": 0.0521,
      "step": 14890
    },
    {
      "epoch": 9.088134765625e-05,
      "model_forward_time": 0.11440086364746094,
      "step": 14890
    },
    {
      "epoch": 9.088134765625e-05,
      "step": 14890,
      "training_step_time": 0.38860201835632324
    },
    {
      "epoch": 9.0887451171875e-05,
      "model_forward_time": 0.11422538757324219,
      "step": 14891
    },
    {
      "epoch": 9.0887451171875e-05,
      "step": 14891,
      "training_step_time": 0.38698267936706543
    },
    {
      "epoch": 9.08935546875e-05,
      "model_forward_time": 0.11497664451599121,
      "step": 14892
    },
    {
      "epoch": 9.08935546875e-05,
      "step": 14892,
      "training_step_time": 0.5564563274383545
    },
    {
      "epoch": 9.0899658203125e-05,
      "model_forward_time": 0.11480855941772461,
      "step": 14893
    },
    {
      "epoch": 9.0899658203125e-05,
      "step": 14893,
      "training_step_time": 0.38665151596069336
    },
    {
      "epoch": 9.090576171875e-05,
      "model_forward_time": 0.11518025398254395,
      "step": 14894
    },
    {
      "epoch": 9.090576171875e-05,
      "step": 14894,
      "training_step_time": 0.44309115409851074
    },
    {
      "epoch": 9.0911865234375e-05,
      "model_forward_time": 0.11500763893127441,
      "step": 14895
    },
    {
      "epoch": 9.0911865234375e-05,
      "step": 14895,
      "training_step_time": 0.39989304542541504
    },
    {
      "epoch": 9.091796875e-05,
      "model_forward_time": 0.11588454246520996,
      "step": 14896
    },
    {
      "epoch": 9.091796875e-05,
      "step": 14896,
      "training_step_time": 0.38277173042297363
    },
    {
      "epoch": 9.0924072265625e-05,
      "model_forward_time": 0.11501383781433105,
      "step": 14897
    },
    {
      "epoch": 9.0924072265625e-05,
      "step": 14897,
      "training_step_time": 0.3822777271270752
    },
    {
      "epoch": 9.093017578125e-05,
      "model_forward_time": 0.11507225036621094,
      "step": 14898
    },
    {
      "epoch": 9.093017578125e-05,
      "step": 14898,
      "training_step_time": 0.644542932510376
    },
    {
      "epoch": 9.0936279296875e-05,
      "model_forward_time": 0.11457014083862305,
      "step": 14899
    },
    {
      "epoch": 9.0936279296875e-05,
      "step": 14899,
      "training_step_time": 0.4504697322845459
    },
    {
      "epoch": 9.09423828125e-05,
      "grad_norm": 0.1628330945968628,
      "learning_rate": 8.962568901124327e-05,
      "loss": 0.0504,
      "step": 14900
    },
    {
      "epoch": 9.09423828125e-05,
      "model_forward_time": 0.11610007286071777,
      "step": 14900
    },
    {
      "epoch": 9.09423828125e-05,
      "step": 14900,
      "training_step_time": 0.4048655033111572
    },
    {
      "epoch": 9.0948486328125e-05,
      "model_forward_time": 0.11452651023864746,
      "step": 14901
    },
    {
      "epoch": 9.0948486328125e-05,
      "step": 14901,
      "training_step_time": 0.3649156093597412
    },
    {
      "epoch": 9.095458984375e-05,
      "model_forward_time": 0.11495089530944824,
      "step": 14902
    },
    {
      "epoch": 9.095458984375e-05,
      "step": 14902,
      "training_step_time": 0.43936681747436523
    },
    {
      "epoch": 9.0960693359375e-05,
      "model_forward_time": 0.11467742919921875,
      "step": 14903
    },
    {
      "epoch": 9.0960693359375e-05,
      "step": 14903,
      "training_step_time": 0.41869091987609863
    },
    {
      "epoch": 9.0966796875e-05,
      "model_forward_time": 0.11481809616088867,
      "step": 14904
    },
    {
      "epoch": 9.0966796875e-05,
      "step": 14904,
      "training_step_time": 0.4369683265686035
    },
    {
      "epoch": 9.0972900390625e-05,
      "model_forward_time": 0.11668634414672852,
      "step": 14905
    },
    {
      "epoch": 9.0972900390625e-05,
      "step": 14905,
      "training_step_time": 0.3916463851928711
    },
    {
      "epoch": 9.097900390625e-05,
      "model_forward_time": 0.11469435691833496,
      "step": 14906
    },
    {
      "epoch": 9.097900390625e-05,
      "step": 14906,
      "training_step_time": 0.38425183296203613
    },
    {
      "epoch": 9.0985107421875e-05,
      "model_forward_time": 0.11545586585998535,
      "step": 14907
    },
    {
      "epoch": 9.0985107421875e-05,
      "step": 14907,
      "training_step_time": 0.3898606300354004
    },
    {
      "epoch": 9.09912109375e-05,
      "model_forward_time": 0.1152791976928711,
      "step": 14908
    },
    {
      "epoch": 9.09912109375e-05,
      "step": 14908,
      "training_step_time": 0.38993215560913086
    },
    {
      "epoch": 9.0997314453125e-05,
      "model_forward_time": 0.11503458023071289,
      "step": 14909
    },
    {
      "epoch": 9.0997314453125e-05,
      "step": 14909,
      "training_step_time": 0.38985562324523926
    },
    {
      "epoch": 9.100341796875e-05,
      "grad_norm": 0.18411193788051605,
      "learning_rate": 8.96088767402841e-05,
      "loss": 0.0509,
      "step": 14910
    },
    {
      "epoch": 9.100341796875e-05,
      "model_forward_time": 0.11537003517150879,
      "step": 14910
    },
    {
      "epoch": 9.100341796875e-05,
      "step": 14910,
      "training_step_time": 0.723160982131958
    },
    {
      "epoch": 9.1009521484375e-05,
      "model_forward_time": 0.11490654945373535,
      "step": 14911
    },
    {
      "epoch": 9.1009521484375e-05,
      "step": 14911,
      "training_step_time": 0.39058518409729004
    },
    {
      "epoch": 9.1015625e-05,
      "model_forward_time": 0.11495757102966309,
      "step": 14912
    },
    {
      "epoch": 9.1015625e-05,
      "step": 14912,
      "training_step_time": 0.43218493461608887
    },
    {
      "epoch": 9.1021728515625e-05,
      "model_forward_time": 0.11535143852233887,
      "step": 14913
    },
    {
      "epoch": 9.1021728515625e-05,
      "step": 14913,
      "training_step_time": 0.5024921894073486
    },
    {
      "epoch": 9.102783203125e-05,
      "model_forward_time": 0.11442303657531738,
      "step": 14914
    },
    {
      "epoch": 9.102783203125e-05,
      "step": 14914,
      "training_step_time": 0.38661861419677734
    },
    {
      "epoch": 9.1033935546875e-05,
      "model_forward_time": 0.11484098434448242,
      "step": 14915
    },
    {
      "epoch": 9.1033935546875e-05,
      "step": 14915,
      "training_step_time": 0.4878809452056885
    },
    {
      "epoch": 9.10400390625e-05,
      "model_forward_time": 0.11482977867126465,
      "step": 14916
    },
    {
      "epoch": 9.10400390625e-05,
      "step": 14916,
      "training_step_time": 0.4806184768676758
    },
    {
      "epoch": 9.1046142578125e-05,
      "model_forward_time": 0.1152188777923584,
      "step": 14917
    },
    {
      "epoch": 9.1046142578125e-05,
      "step": 14917,
      "training_step_time": 0.42359375953674316
    },
    {
      "epoch": 9.105224609375e-05,
      "model_forward_time": 0.11475491523742676,
      "step": 14918
    },
    {
      "epoch": 9.105224609375e-05,
      "step": 14918,
      "training_step_time": 0.39391589164733887
    },
    {
      "epoch": 9.1058349609375e-05,
      "model_forward_time": 0.11450695991516113,
      "step": 14919
    },
    {
      "epoch": 9.1058349609375e-05,
      "step": 14919,
      "training_step_time": 0.41559743881225586
    },
    {
      "epoch": 9.1064453125e-05,
      "grad_norm": 0.19271433353424072,
      "learning_rate": 8.959205243719402e-05,
      "loss": 0.0669,
      "step": 14920
    },
    {
      "epoch": 9.1064453125e-05,
      "model_forward_time": 0.11430954933166504,
      "step": 14920
    },
    {
      "epoch": 9.1064453125e-05,
      "step": 14920,
      "training_step_time": 0.38941049575805664
    },
    {
      "epoch": 9.1070556640625e-05,
      "model_forward_time": 0.11478233337402344,
      "step": 14921
    },
    {
      "epoch": 9.1070556640625e-05,
      "step": 14921,
      "training_step_time": 0.3862118721008301
    },
    {
      "epoch": 9.107666015625e-05,
      "model_forward_time": 0.1147305965423584,
      "step": 14922
    },
    {
      "epoch": 9.107666015625e-05,
      "step": 14922,
      "training_step_time": 0.5143959522247314
    },
    {
      "epoch": 9.1082763671875e-05,
      "model_forward_time": 0.11446762084960938,
      "step": 14923
    },
    {
      "epoch": 9.1082763671875e-05,
      "step": 14923,
      "training_step_time": 0.4338963031768799
    },
    {
      "epoch": 9.10888671875e-05,
      "model_forward_time": 0.11438703536987305,
      "step": 14924
    },
    {
      "epoch": 9.10888671875e-05,
      "step": 14924,
      "training_step_time": 0.3905329704284668
    },
    {
      "epoch": 9.1094970703125e-05,
      "model_forward_time": 0.11649560928344727,
      "step": 14925
    },
    {
      "epoch": 9.1094970703125e-05,
      "step": 14925,
      "training_step_time": 0.39165544509887695
    },
    {
      "epoch": 9.110107421875e-05,
      "model_forward_time": 0.1140127182006836,
      "step": 14926
    },
    {
      "epoch": 9.110107421875e-05,
      "step": 14926,
      "training_step_time": 0.3912346363067627
    },
    {
      "epoch": 9.1107177734375e-05,
      "model_forward_time": 0.11508870124816895,
      "step": 14927
    },
    {
      "epoch": 9.1107177734375e-05,
      "step": 14927,
      "training_step_time": 0.4699842929840088
    },
    {
      "epoch": 9.111328125e-05,
      "model_forward_time": 0.11456155776977539,
      "step": 14928
    },
    {
      "epoch": 9.111328125e-05,
      "step": 14928,
      "training_step_time": 0.5608670711517334
    },
    {
      "epoch": 9.1119384765625e-05,
      "model_forward_time": 0.11503052711486816,
      "step": 14929
    },
    {
      "epoch": 9.1119384765625e-05,
      "step": 14929,
      "training_step_time": 0.36573290824890137
    },
    {
      "epoch": 9.112548828125e-05,
      "grad_norm": 0.16890987753868103,
      "learning_rate": 8.957521610708375e-05,
      "loss": 0.0506,
      "step": 14930
    },
    {
      "epoch": 9.112548828125e-05,
      "model_forward_time": 0.11449360847473145,
      "step": 14930
    },
    {
      "epoch": 9.112548828125e-05,
      "step": 14930,
      "training_step_time": 0.45261263847351074
    },
    {
      "epoch": 9.1131591796875e-05,
      "model_forward_time": 0.11469411849975586,
      "step": 14931
    },
    {
      "epoch": 9.1131591796875e-05,
      "step": 14931,
      "training_step_time": 0.4546480178833008
    },
    {
      "epoch": 9.11376953125e-05,
      "model_forward_time": 0.11483407020568848,
      "step": 14932
    },
    {
      "epoch": 9.11376953125e-05,
      "step": 14932,
      "training_step_time": 0.387087345123291
    },
    {
      "epoch": 9.1143798828125e-05,
      "model_forward_time": 0.11499214172363281,
      "step": 14933
    },
    {
      "epoch": 9.1143798828125e-05,
      "step": 14933,
      "training_step_time": 0.3894352912902832
    },
    {
      "epoch": 9.114990234375e-05,
      "model_forward_time": 0.11420178413391113,
      "step": 14934
    },
    {
      "epoch": 9.114990234375e-05,
      "step": 14934,
      "training_step_time": 0.5470085144042969
    },
    {
      "epoch": 9.1156005859375e-05,
      "model_forward_time": 0.1147623062133789,
      "step": 14935
    },
    {
      "epoch": 9.1156005859375e-05,
      "step": 14935,
      "training_step_time": 0.39581847190856934
    },
    {
      "epoch": 9.1162109375e-05,
      "model_forward_time": 0.11467361450195312,
      "step": 14936
    },
    {
      "epoch": 9.1162109375e-05,
      "step": 14936,
      "training_step_time": 0.42006564140319824
    },
    {
      "epoch": 9.1168212890625e-05,
      "model_forward_time": 0.11510562896728516,
      "step": 14937
    },
    {
      "epoch": 9.1168212890625e-05,
      "step": 14937,
      "training_step_time": 0.40257859230041504
    },
    {
      "epoch": 9.117431640625e-05,
      "model_forward_time": 0.1153111457824707,
      "step": 14938
    },
    {
      "epoch": 9.117431640625e-05,
      "step": 14938,
      "training_step_time": 0.3897132873535156
    },
    {
      "epoch": 9.1180419921875e-05,
      "model_forward_time": 0.11557650566101074,
      "step": 14939
    },
    {
      "epoch": 9.1180419921875e-05,
      "step": 14939,
      "training_step_time": 0.39122581481933594
    },
    {
      "epoch": 9.11865234375e-05,
      "grad_norm": 0.2052527815103531,
      "learning_rate": 8.955836775506776e-05,
      "loss": 0.0593,
      "step": 14940
    },
    {
      "epoch": 9.11865234375e-05,
      "model_forward_time": 0.11537575721740723,
      "step": 14940
    },
    {
      "epoch": 9.11865234375e-05,
      "step": 14940,
      "training_step_time": 0.659433126449585
    },
    {
      "epoch": 9.1192626953125e-05,
      "model_forward_time": 0.11467599868774414,
      "step": 14941
    },
    {
      "epoch": 9.1192626953125e-05,
      "step": 14941,
      "training_step_time": 0.4077298641204834
    },
    {
      "epoch": 9.119873046875e-05,
      "model_forward_time": 0.11478757858276367,
      "step": 14942
    },
    {
      "epoch": 9.119873046875e-05,
      "step": 14942,
      "training_step_time": 0.3892831802368164
    },
    {
      "epoch": 9.1204833984375e-05,
      "model_forward_time": 0.11470460891723633,
      "step": 14943
    },
    {
      "epoch": 9.1204833984375e-05,
      "step": 14943,
      "training_step_time": 0.3637661933898926
    },
    {
      "epoch": 9.12109375e-05,
      "model_forward_time": 0.11445856094360352,
      "step": 14944
    },
    {
      "epoch": 9.12109375e-05,
      "step": 14944,
      "training_step_time": 0.39751505851745605
    },
    {
      "epoch": 9.1217041015625e-05,
      "model_forward_time": 0.11502742767333984,
      "step": 14945
    },
    {
      "epoch": 9.1217041015625e-05,
      "step": 14945,
      "training_step_time": 0.4702911376953125
    },
    {
      "epoch": 9.122314453125e-05,
      "model_forward_time": 0.11488938331604004,
      "step": 14946
    },
    {
      "epoch": 9.122314453125e-05,
      "step": 14946,
      "training_step_time": 0.4801173210144043
    },
    {
      "epoch": 9.1229248046875e-05,
      "model_forward_time": 0.11490058898925781,
      "step": 14947
    },
    {
      "epoch": 9.1229248046875e-05,
      "step": 14947,
      "training_step_time": 0.39325523376464844
    },
    {
      "epoch": 9.12353515625e-05,
      "model_forward_time": 0.11580729484558105,
      "step": 14948
    },
    {
      "epoch": 9.12353515625e-05,
      "step": 14948,
      "training_step_time": 0.38680553436279297
    },
    {
      "epoch": 9.1241455078125e-05,
      "model_forward_time": 0.1150665283203125,
      "step": 14949
    },
    {
      "epoch": 9.1241455078125e-05,
      "step": 14949,
      "training_step_time": 0.42748236656188965
    },
    {
      "epoch": 9.124755859375e-05,
      "grad_norm": 0.19232375919818878,
      "learning_rate": 8.954150738626414e-05,
      "loss": 0.0537,
      "step": 14950
    },
    {
      "epoch": 9.124755859375e-05,
      "model_forward_time": 0.11489200592041016,
      "step": 14950
    },
    {
      "epoch": 9.124755859375e-05,
      "step": 14950,
      "training_step_time": 0.3860032558441162
    },
    {
      "epoch": 9.1253662109375e-05,
      "model_forward_time": 0.1154031753540039,
      "step": 14951
    },
    {
      "epoch": 9.1253662109375e-05,
      "step": 14951,
      "training_step_time": 0.41962146759033203
    },
    {
      "epoch": 9.1259765625e-05,
      "model_forward_time": 0.1148982048034668,
      "step": 14952
    },
    {
      "epoch": 9.1259765625e-05,
      "step": 14952,
      "training_step_time": 0.6107394695281982
    },
    {
      "epoch": 9.1265869140625e-05,
      "model_forward_time": 0.11469078063964844,
      "step": 14953
    },
    {
      "epoch": 9.1265869140625e-05,
      "step": 14953,
      "training_step_time": 0.39090800285339355
    },
    {
      "epoch": 9.127197265625e-05,
      "model_forward_time": 0.11510491371154785,
      "step": 14954
    },
    {
      "epoch": 9.127197265625e-05,
      "step": 14954,
      "training_step_time": 0.3807351589202881
    },
    {
      "epoch": 9.1278076171875e-05,
      "model_forward_time": 0.11458420753479004,
      "step": 14955
    },
    {
      "epoch": 9.1278076171875e-05,
      "step": 14955,
      "training_step_time": 0.476348876953125
    },
    {
      "epoch": 9.12841796875e-05,
      "model_forward_time": 0.11534404754638672,
      "step": 14956
    },
    {
      "epoch": 9.12841796875e-05,
      "step": 14956,
      "training_step_time": 0.47117090225219727
    },
    {
      "epoch": 9.1290283203125e-05,
      "model_forward_time": 0.1150057315826416,
      "step": 14957
    },
    {
      "epoch": 9.1290283203125e-05,
      "step": 14957,
      "training_step_time": 0.3639352321624756
    },
    {
      "epoch": 9.129638671875e-05,
      "model_forward_time": 0.11519527435302734,
      "step": 14958
    },
    {
      "epoch": 9.129638671875e-05,
      "step": 14958,
      "training_step_time": 0.4346194267272949
    },
    {
      "epoch": 9.1302490234375e-05,
      "model_forward_time": 0.11487841606140137,
      "step": 14959
    },
    {
      "epoch": 9.1302490234375e-05,
      "step": 14959,
      "training_step_time": 0.44167351722717285
    },
    {
      "epoch": 9.130859375e-05,
      "grad_norm": 0.1751086711883545,
      "learning_rate": 8.95246350057946e-05,
      "loss": 0.0489,
      "step": 14960
    },
    {
      "epoch": 9.130859375e-05,
      "model_forward_time": 0.1144707202911377,
      "step": 14960
    },
    {
      "epoch": 9.130859375e-05,
      "step": 14960,
      "training_step_time": 0.39297008514404297
    },
    {
      "epoch": 9.1314697265625e-05,
      "model_forward_time": 0.11499404907226562,
      "step": 14961
    },
    {
      "epoch": 9.1314697265625e-05,
      "step": 14961,
      "training_step_time": 0.39326930046081543
    },
    {
      "epoch": 9.132080078125e-05,
      "model_forward_time": 0.11478805541992188,
      "step": 14962
    },
    {
      "epoch": 9.132080078125e-05,
      "step": 14962,
      "training_step_time": 0.38414835929870605
    },
    {
      "epoch": 9.1326904296875e-05,
      "model_forward_time": 0.11484098434448242,
      "step": 14963
    },
    {
      "epoch": 9.1326904296875e-05,
      "step": 14963,
      "training_step_time": 0.3981785774230957
    },
    {
      "epoch": 9.13330078125e-05,
      "model_forward_time": 0.11500000953674316,
      "step": 14964
    },
    {
      "epoch": 9.13330078125e-05,
      "step": 14964,
      "training_step_time": 0.6058557033538818
    },
    {
      "epoch": 9.1339111328125e-05,
      "model_forward_time": 0.11506509780883789,
      "step": 14965
    },
    {
      "epoch": 9.1339111328125e-05,
      "step": 14965,
      "training_step_time": 0.4150557518005371
    },
    {
      "epoch": 9.134521484375e-05,
      "model_forward_time": 0.11481213569641113,
      "step": 14966
    },
    {
      "epoch": 9.134521484375e-05,
      "step": 14966,
      "training_step_time": 0.3883810043334961
    },
    {
      "epoch": 9.1351318359375e-05,
      "model_forward_time": 0.11551880836486816,
      "step": 14967
    },
    {
      "epoch": 9.1351318359375e-05,
      "step": 14967,
      "training_step_time": 0.3956611156463623
    },
    {
      "epoch": 9.1357421875e-05,
      "model_forward_time": 0.11570024490356445,
      "step": 14968
    },
    {
      "epoch": 9.1357421875e-05,
      "step": 14968,
      "training_step_time": 0.39135074615478516
    },
    {
      "epoch": 9.1363525390625e-05,
      "model_forward_time": 0.11539435386657715,
      "step": 14969
    },
    {
      "epoch": 9.1363525390625e-05,
      "step": 14969,
      "training_step_time": 0.3934822082519531
    },
    {
      "epoch": 9.136962890625e-05,
      "grad_norm": 0.12033876776695251,
      "learning_rate": 8.950775061878453e-05,
      "loss": 0.0481,
      "step": 14970
    },
    {
      "epoch": 9.136962890625e-05,
      "model_forward_time": 0.11483573913574219,
      "step": 14970
    },
    {
      "epoch": 9.136962890625e-05,
      "step": 14970,
      "training_step_time": 0.7069785594940186
    },
    {
      "epoch": 9.1375732421875e-05,
      "model_forward_time": 0.11479473114013672,
      "step": 14971
    },
    {
      "epoch": 9.1375732421875e-05,
      "step": 14971,
      "training_step_time": 0.4685666561126709
    },
    {
      "epoch": 9.13818359375e-05,
      "model_forward_time": 0.11438512802124023,
      "step": 14972
    },
    {
      "epoch": 9.13818359375e-05,
      "step": 14972,
      "training_step_time": 0.3894472122192383
    },
    {
      "epoch": 9.1387939453125e-05,
      "model_forward_time": 0.11416006088256836,
      "step": 14973
    },
    {
      "epoch": 9.1387939453125e-05,
      "step": 14973,
      "training_step_time": 0.46228551864624023
    },
    {
      "epoch": 9.139404296875e-05,
      "model_forward_time": 0.11691641807556152,
      "step": 14974
    },
    {
      "epoch": 9.139404296875e-05,
      "step": 14974,
      "training_step_time": 0.3827395439147949
    },
    {
      "epoch": 9.1400146484375e-05,
      "model_forward_time": 0.11400127410888672,
      "step": 14975
    },
    {
      "epoch": 9.1400146484375e-05,
      "step": 14975,
      "training_step_time": 0.38812947273254395
    },
    {
      "epoch": 9.140625e-05,
      "model_forward_time": 0.11496901512145996,
      "step": 14976
    },
    {
      "epoch": 9.140625e-05,
      "step": 14976,
      "training_step_time": 0.5223371982574463
    },
    {
      "epoch": 9.1412353515625e-05,
      "model_forward_time": 0.11484146118164062,
      "step": 14977
    },
    {
      "epoch": 9.1412353515625e-05,
      "step": 14977,
      "training_step_time": 0.3914680480957031
    },
    {
      "epoch": 9.141845703125e-05,
      "model_forward_time": 0.1148538589477539,
      "step": 14978
    },
    {
      "epoch": 9.141845703125e-05,
      "step": 14978,
      "training_step_time": 0.3876156806945801
    },
    {
      "epoch": 9.1424560546875e-05,
      "model_forward_time": 0.11477398872375488,
      "step": 14979
    },
    {
      "epoch": 9.1424560546875e-05,
      "step": 14979,
      "training_step_time": 0.39784836769104004
    },
    {
      "epoch": 9.14306640625e-05,
      "grad_norm": 0.1642753928899765,
      "learning_rate": 8.949085423036296e-05,
      "loss": 0.0516,
      "step": 14980
    },
    {
      "epoch": 9.14306640625e-05,
      "model_forward_time": 0.11477875709533691,
      "step": 14980
    },
    {
      "epoch": 9.14306640625e-05,
      "step": 14980,
      "training_step_time": 0.4058341979980469
    },
    {
      "epoch": 9.1436767578125e-05,
      "model_forward_time": 0.1146841049194336,
      "step": 14981
    },
    {
      "epoch": 9.1436767578125e-05,
      "step": 14981,
      "training_step_time": 0.4131925106048584
    },
    {
      "epoch": 9.144287109375e-05,
      "model_forward_time": 0.11464190483093262,
      "step": 14982
    },
    {
      "epoch": 9.144287109375e-05,
      "step": 14982,
      "training_step_time": 0.7217817306518555
    },
    {
      "epoch": 9.1448974609375e-05,
      "model_forward_time": 0.11450791358947754,
      "step": 14983
    },
    {
      "epoch": 9.1448974609375e-05,
      "step": 14983,
      "training_step_time": 0.3915855884552002
    },
    {
      "epoch": 9.1455078125e-05,
      "model_forward_time": 0.11457991600036621,
      "step": 14984
    },
    {
      "epoch": 9.1455078125e-05,
      "step": 14984,
      "training_step_time": 0.423492431640625
    },
    {
      "epoch": 9.1461181640625e-05,
      "model_forward_time": 0.11485123634338379,
      "step": 14985
    },
    {
      "epoch": 9.1461181640625e-05,
      "step": 14985,
      "training_step_time": 0.4259836673736572
    },
    {
      "epoch": 9.146728515625e-05,
      "model_forward_time": 0.1151282787322998,
      "step": 14986
    },
    {
      "epoch": 9.146728515625e-05,
      "step": 14986,
      "training_step_time": 0.3891563415527344
    },
    {
      "epoch": 9.1473388671875e-05,
      "model_forward_time": 0.11453390121459961,
      "step": 14987
    },
    {
      "epoch": 9.1473388671875e-05,
      "step": 14987,
      "training_step_time": 0.4745783805847168
    },
    {
      "epoch": 9.14794921875e-05,
      "model_forward_time": 0.11551475524902344,
      "step": 14988
    },
    {
      "epoch": 9.14794921875e-05,
      "step": 14988,
      "training_step_time": 0.4460177421569824
    },
    {
      "epoch": 9.1485595703125e-05,
      "model_forward_time": 0.11832094192504883,
      "step": 14989
    },
    {
      "epoch": 9.1485595703125e-05,
      "step": 14989,
      "training_step_time": 0.5001065731048584
    },
    {
      "epoch": 9.149169921875e-05,
      "grad_norm": 0.2146717607975006,
      "learning_rate": 8.947394584566258e-05,
      "loss": 0.059,
      "step": 14990
    },
    {
      "epoch": 9.149169921875e-05,
      "model_forward_time": 0.12226176261901855,
      "step": 14990
    },
    {
      "epoch": 9.149169921875e-05,
      "step": 14990,
      "training_step_time": 0.5894291400909424
    },
    {
      "epoch": 9.1497802734375e-05,
      "model_forward_time": 0.11945939064025879,
      "step": 14991
    },
    {
      "epoch": 9.1497802734375e-05,
      "step": 14991,
      "training_step_time": 0.651801586151123
    },
    {
      "epoch": 9.150390625e-05,
      "model_forward_time": 0.11837291717529297,
      "step": 14992
    },
    {
      "epoch": 9.150390625e-05,
      "step": 14992,
      "training_step_time": 0.6137247085571289
    },
    {
      "epoch": 9.1510009765625e-05,
      "model_forward_time": 0.11891865730285645,
      "step": 14993
    },
    {
      "epoch": 9.1510009765625e-05,
      "step": 14993,
      "training_step_time": 0.6827352046966553
    },
    {
      "epoch": 9.151611328125e-05,
      "model_forward_time": 0.13439202308654785,
      "step": 14994
    },
    {
      "epoch": 9.151611328125e-05,
      "step": 14994,
      "training_step_time": 0.7252633571624756
    },
    {
      "epoch": 9.1522216796875e-05,
      "model_forward_time": 0.11899757385253906,
      "step": 14995
    },
    {
      "epoch": 9.1522216796875e-05,
      "step": 14995,
      "training_step_time": 0.6447587013244629
    },
    {
      "epoch": 9.15283203125e-05,
      "model_forward_time": 0.1166849136352539,
      "step": 14996
    },
    {
      "epoch": 9.15283203125e-05,
      "step": 14996,
      "training_step_time": 0.5823044776916504
    },
    {
      "epoch": 9.1534423828125e-05,
      "model_forward_time": 0.1236422061920166,
      "step": 14997
    },
    {
      "epoch": 9.1534423828125e-05,
      "step": 14997,
      "training_step_time": 0.7506208419799805
    },
    {
      "epoch": 9.154052734375e-05,
      "model_forward_time": 0.11860990524291992,
      "step": 14998
    },
    {
      "epoch": 9.154052734375e-05,
      "step": 14998,
      "training_step_time": 0.6721856594085693
    },
    {
      "epoch": 9.1546630859375e-05,
      "model_forward_time": 0.12364625930786133,
      "step": 14999
    },
    {
      "epoch": 9.1546630859375e-05,
      "step": 14999,
      "training_step_time": 0.6582770347595215
    },
    {
      "epoch": 9.1552734375e-05,
      "grad_norm": 0.18430504202842712,
      "learning_rate": 8.945702546981969e-05,
      "loss": 0.052,
      "step": 15000
    },
    {
      "epoch": 9.1552734375e-05,
      "model_forward_time": 0.11661839485168457,
      "step": 15000
    },
    {
      "epoch": 9.1552734375e-05,
      "step": 15000,
      "training_step_time": 0.5155861377716064
    },
    {
      "epoch": 9.1558837890625e-05,
      "model_forward_time": 0.11828947067260742,
      "step": 15001
    },
    {
      "epoch": 9.1558837890625e-05,
      "step": 15001,
      "training_step_time": 0.6014449596405029
    },
    {
      "epoch": 9.156494140625e-05,
      "model_forward_time": 0.11661505699157715,
      "step": 15002
    },
    {
      "epoch": 9.156494140625e-05,
      "step": 15002,
      "training_step_time": 0.6513471603393555
    },
    {
      "epoch": 9.1571044921875e-05,
      "model_forward_time": 0.11953115463256836,
      "step": 15003
    },
    {
      "epoch": 9.1571044921875e-05,
      "step": 15003,
      "training_step_time": 0.6577026844024658
    },
    {
      "epoch": 9.15771484375e-05,
      "model_forward_time": 0.12220311164855957,
      "step": 15004
    },
    {
      "epoch": 9.15771484375e-05,
      "step": 15004,
      "training_step_time": 0.6792166233062744
    },
    {
      "epoch": 9.1583251953125e-05,
      "model_forward_time": 0.12121963500976562,
      "step": 15005
    },
    {
      "epoch": 9.1583251953125e-05,
      "step": 15005,
      "training_step_time": 0.6655070781707764
    },
    {
      "epoch": 9.158935546875e-05,
      "model_forward_time": 0.12049365043640137,
      "step": 15006
    },
    {
      "epoch": 9.158935546875e-05,
      "step": 15006,
      "training_step_time": 0.558274507522583
    },
    {
      "epoch": 9.1595458984375e-05,
      "model_forward_time": 0.11783409118652344,
      "step": 15007
    },
    {
      "epoch": 9.1595458984375e-05,
      "step": 15007,
      "training_step_time": 0.7288849353790283
    },
    {
      "epoch": 9.16015625e-05,
      "model_forward_time": 0.12113046646118164,
      "step": 15008
    },
    {
      "epoch": 9.16015625e-05,
      "step": 15008,
      "training_step_time": 0.5685195922851562
    },
    {
      "epoch": 9.1607666015625e-05,
      "model_forward_time": 0.11667919158935547,
      "step": 15009
    },
    {
      "epoch": 9.1607666015625e-05,
      "step": 15009,
      "training_step_time": 0.6517472267150879
    },
    {
      "epoch": 9.161376953125e-05,
      "grad_norm": 0.20884697139263153,
      "learning_rate": 8.944009310797426e-05,
      "loss": 0.0626,
      "step": 15010
    },
    {
      "epoch": 9.161376953125e-05,
      "model_forward_time": 0.12497091293334961,
      "step": 15010
    },
    {
      "epoch": 9.161376953125e-05,
      "step": 15010,
      "training_step_time": 0.6455214023590088
    },
    {
      "epoch": 9.1619873046875e-05,
      "model_forward_time": 0.11953377723693848,
      "step": 15011
    },
    {
      "epoch": 9.1619873046875e-05,
      "step": 15011,
      "training_step_time": 0.6474535465240479
    },
    {
      "epoch": 9.16259765625e-05,
      "model_forward_time": 0.1217496395111084,
      "step": 15012
    },
    {
      "epoch": 9.16259765625e-05,
      "step": 15012,
      "training_step_time": 0.70121169090271
    },
    {
      "epoch": 9.1632080078125e-05,
      "model_forward_time": 0.12627816200256348,
      "step": 15013
    },
    {
      "epoch": 9.1632080078125e-05,
      "step": 15013,
      "training_step_time": 0.6355352401733398
    },
    {
      "epoch": 9.163818359375e-05,
      "model_forward_time": 0.12009811401367188,
      "step": 15014
    },
    {
      "epoch": 9.163818359375e-05,
      "step": 15014,
      "training_step_time": 0.696098804473877
    },
    {
      "epoch": 9.1644287109375e-05,
      "model_forward_time": 0.11799335479736328,
      "step": 15015
    },
    {
      "epoch": 9.1644287109375e-05,
      "step": 15015,
      "training_step_time": 0.7509346008300781
    },
    {
      "epoch": 9.1650390625e-05,
      "model_forward_time": 0.12626075744628906,
      "step": 15016
    },
    {
      "epoch": 9.1650390625e-05,
      "step": 15016,
      "training_step_time": 0.699350118637085
    },
    {
      "epoch": 9.1656494140625e-05,
      "model_forward_time": 0.11656641960144043,
      "step": 15017
    },
    {
      "epoch": 9.1656494140625e-05,
      "step": 15017,
      "training_step_time": 0.7472987174987793
    },
    {
      "epoch": 9.166259765625e-05,
      "model_forward_time": 0.11747074127197266,
      "step": 15018
    },
    {
      "epoch": 9.166259765625e-05,
      "step": 15018,
      "training_step_time": 0.6591770648956299
    },
    {
      "epoch": 9.1668701171875e-05,
      "model_forward_time": 0.12266778945922852,
      "step": 15019
    },
    {
      "epoch": 9.1668701171875e-05,
      "step": 15019,
      "training_step_time": 0.6504011154174805
    },
    {
      "epoch": 9.16748046875e-05,
      "grad_norm": 0.15382100641727448,
      "learning_rate": 8.942314876526992e-05,
      "loss": 0.0595,
      "step": 15020
    },
    {
      "epoch": 9.16748046875e-05,
      "model_forward_time": 0.11749887466430664,
      "step": 15020
    },
    {
      "epoch": 9.16748046875e-05,
      "step": 15020,
      "training_step_time": 0.6893081665039062
    },
    {
      "epoch": 9.1680908203125e-05,
      "model_forward_time": 0.12318706512451172,
      "step": 15021
    },
    {
      "epoch": 9.1680908203125e-05,
      "step": 15021,
      "training_step_time": 0.6601462364196777
    },
    {
      "epoch": 9.168701171875e-05,
      "model_forward_time": 0.12691116333007812,
      "step": 15022
    },
    {
      "epoch": 9.168701171875e-05,
      "step": 15022,
      "training_step_time": 0.6937782764434814
    },
    {
      "epoch": 9.1693115234375e-05,
      "model_forward_time": 0.11822867393493652,
      "step": 15023
    },
    {
      "epoch": 9.1693115234375e-05,
      "step": 15023,
      "training_step_time": 0.6704840660095215
    },
    {
      "epoch": 9.169921875e-05,
      "model_forward_time": 0.12712574005126953,
      "step": 15024
    },
    {
      "epoch": 9.169921875e-05,
      "step": 15024,
      "training_step_time": 0.7162773609161377
    },
    {
      "epoch": 9.1705322265625e-05,
      "model_forward_time": 0.12726116180419922,
      "step": 15025
    },
    {
      "epoch": 9.1705322265625e-05,
      "step": 15025,
      "training_step_time": 0.5830395221710205
    },
    {
      "epoch": 9.171142578125e-05,
      "model_forward_time": 0.11870098114013672,
      "step": 15026
    },
    {
      "epoch": 9.171142578125e-05,
      "step": 15026,
      "training_step_time": 0.6910610198974609
    },
    {
      "epoch": 9.1717529296875e-05,
      "model_forward_time": 0.12138032913208008,
      "step": 15027
    },
    {
      "epoch": 9.1717529296875e-05,
      "step": 15027,
      "training_step_time": 0.6538875102996826
    },
    {
      "epoch": 9.17236328125e-05,
      "model_forward_time": 0.11867141723632812,
      "step": 15028
    },
    {
      "epoch": 9.17236328125e-05,
      "step": 15028,
      "training_step_time": 0.6623964309692383
    },
    {
      "epoch": 9.1729736328125e-05,
      "model_forward_time": 0.11815929412841797,
      "step": 15029
    },
    {
      "epoch": 9.1729736328125e-05,
      "step": 15029,
      "training_step_time": 0.7467739582061768
    },
    {
      "epoch": 9.173583984375e-05,
      "grad_norm": 0.1524696797132492,
      "learning_rate": 8.940619244685388e-05,
      "loss": 0.0642,
      "step": 15030
    },
    {
      "epoch": 9.173583984375e-05,
      "model_forward_time": 0.11587738990783691,
      "step": 15030
    },
    {
      "epoch": 9.173583984375e-05,
      "step": 15030,
      "training_step_time": 0.7098395824432373
    },
    {
      "epoch": 9.1741943359375e-05,
      "model_forward_time": 0.12316083908081055,
      "step": 15031
    },
    {
      "epoch": 9.1741943359375e-05,
      "step": 15031,
      "training_step_time": 0.7146272659301758
    },
    {
      "epoch": 9.1748046875e-05,
      "model_forward_time": 0.12851333618164062,
      "step": 15032
    },
    {
      "epoch": 9.1748046875e-05,
      "step": 15032,
      "training_step_time": 0.7164559364318848
    },
    {
      "epoch": 9.1754150390625e-05,
      "model_forward_time": 0.1158442497253418,
      "step": 15033
    },
    {
      "epoch": 9.1754150390625e-05,
      "step": 15033,
      "training_step_time": 0.7708473205566406
    },
    {
      "epoch": 9.176025390625e-05,
      "model_forward_time": 0.1232442855834961,
      "step": 15034
    },
    {
      "epoch": 9.176025390625e-05,
      "step": 15034,
      "training_step_time": 0.6844503879547119
    },
    {
      "epoch": 9.1766357421875e-05,
      "model_forward_time": 0.11993408203125,
      "step": 15035
    },
    {
      "epoch": 9.1766357421875e-05,
      "step": 15035,
      "training_step_time": 0.7341864109039307
    },
    {
      "epoch": 9.17724609375e-05,
      "model_forward_time": 0.11894011497497559,
      "step": 15036
    },
    {
      "epoch": 9.17724609375e-05,
      "step": 15036,
      "training_step_time": 0.7574632167816162
    },
    {
      "epoch": 9.1778564453125e-05,
      "model_forward_time": 0.11968588829040527,
      "step": 15037
    },
    {
      "epoch": 9.1778564453125e-05,
      "step": 15037,
      "training_step_time": 0.6715350151062012
    },
    {
      "epoch": 9.178466796875e-05,
      "model_forward_time": 0.11942911148071289,
      "step": 15038
    },
    {
      "epoch": 9.178466796875e-05,
      "step": 15038,
      "training_step_time": 0.6032121181488037
    },
    {
      "epoch": 9.1790771484375e-05,
      "model_forward_time": 0.1258866786956787,
      "step": 15039
    },
    {
      "epoch": 9.1790771484375e-05,
      "step": 15039,
      "training_step_time": 0.6250948905944824
    },
    {
      "epoch": 9.1796875e-05,
      "grad_norm": 0.193611741065979,
      "learning_rate": 8.938922415787703e-05,
      "loss": 0.0614,
      "step": 15040
    },
    {
      "epoch": 9.1796875e-05,
      "model_forward_time": 0.12187981605529785,
      "step": 15040
    },
    {
      "epoch": 9.1796875e-05,
      "step": 15040,
      "training_step_time": 0.6707010269165039
    },
    {
      "epoch": 9.1802978515625e-05,
      "model_forward_time": 0.12572550773620605,
      "step": 15041
    },
    {
      "epoch": 9.1802978515625e-05,
      "step": 15041,
      "training_step_time": 0.6837570667266846
    },
    {
      "epoch": 9.180908203125e-05,
      "model_forward_time": 0.12022137641906738,
      "step": 15042
    },
    {
      "epoch": 9.180908203125e-05,
      "step": 15042,
      "training_step_time": 0.7218999862670898
    },
    {
      "epoch": 9.1815185546875e-05,
      "model_forward_time": 0.12013387680053711,
      "step": 15043
    },
    {
      "epoch": 9.1815185546875e-05,
      "step": 15043,
      "training_step_time": 0.6981275081634521
    },
    {
      "epoch": 9.18212890625e-05,
      "model_forward_time": 0.12019991874694824,
      "step": 15044
    },
    {
      "epoch": 9.18212890625e-05,
      "step": 15044,
      "training_step_time": 0.6862096786499023
    },
    {
      "epoch": 9.1827392578125e-05,
      "model_forward_time": 0.12358474731445312,
      "step": 15045
    },
    {
      "epoch": 9.1827392578125e-05,
      "step": 15045,
      "training_step_time": 0.6584584712982178
    },
    {
      "epoch": 9.183349609375e-05,
      "model_forward_time": 0.11929583549499512,
      "step": 15046
    },
    {
      "epoch": 9.183349609375e-05,
      "step": 15046,
      "training_step_time": 0.56465744972229
    },
    {
      "epoch": 9.1839599609375e-05,
      "model_forward_time": 0.11777949333190918,
      "step": 15047
    },
    {
      "epoch": 9.1839599609375e-05,
      "step": 15047,
      "training_step_time": 0.44932079315185547
    },
    {
      "epoch": 9.1845703125e-05,
      "model_forward_time": 0.11716723442077637,
      "step": 15048
    },
    {
      "epoch": 9.1845703125e-05,
      "step": 15048,
      "training_step_time": 0.4420511722564697
    },
    {
      "epoch": 9.1851806640625e-05,
      "model_forward_time": 0.11782979965209961,
      "step": 15049
    },
    {
      "epoch": 9.1851806640625e-05,
      "step": 15049,
      "training_step_time": 0.4258091449737549
    },
    {
      "epoch": 9.185791015625e-05,
      "grad_norm": 0.12019986659288406,
      "learning_rate": 8.93722439034939e-05,
      "loss": 0.0615,
      "step": 15050
    },
    {
      "epoch": 9.185791015625e-05,
      "model_forward_time": 0.11667704582214355,
      "step": 15050
    },
    {
      "epoch": 9.185791015625e-05,
      "step": 15050,
      "training_step_time": 0.40212202072143555
    },
    {
      "epoch": 9.1864013671875e-05,
      "model_forward_time": 0.11568760871887207,
      "step": 15051
    },
    {
      "epoch": 9.1864013671875e-05,
      "step": 15051,
      "training_step_time": 0.4134845733642578
    },
    {
      "epoch": 9.18701171875e-05,
      "model_forward_time": 0.11565160751342773,
      "step": 15052
    },
    {
      "epoch": 9.18701171875e-05,
      "step": 15052,
      "training_step_time": 0.42783284187316895
    },
    {
      "epoch": 9.1876220703125e-05,
      "model_forward_time": 0.1145932674407959,
      "step": 15053
    },
    {
      "epoch": 9.1876220703125e-05,
      "step": 15053,
      "training_step_time": 0.44330859184265137
    },
    {
      "epoch": 9.188232421875e-05,
      "model_forward_time": 0.11593127250671387,
      "step": 15054
    },
    {
      "epoch": 9.188232421875e-05,
      "step": 15054,
      "training_step_time": 0.4295344352722168
    },
    {
      "epoch": 9.1888427734375e-05,
      "model_forward_time": 0.11516427993774414,
      "step": 15055
    },
    {
      "epoch": 9.1888427734375e-05,
      "step": 15055,
      "training_step_time": 0.4038543701171875
    },
    {
      "epoch": 9.189453125e-05,
      "model_forward_time": 0.11585712432861328,
      "step": 15056
    },
    {
      "epoch": 9.189453125e-05,
      "step": 15056,
      "training_step_time": 0.38754940032958984
    },
    {
      "epoch": 9.1900634765625e-05,
      "model_forward_time": 0.1174461841583252,
      "step": 15057
    },
    {
      "epoch": 9.1900634765625e-05,
      "step": 15057,
      "training_step_time": 0.3935379981994629
    },
    {
      "epoch": 9.190673828125e-05,
      "model_forward_time": 0.11583232879638672,
      "step": 15058
    },
    {
      "epoch": 9.190673828125e-05,
      "step": 15058,
      "training_step_time": 0.4698653221130371
    },
    {
      "epoch": 9.1912841796875e-05,
      "model_forward_time": 0.11524343490600586,
      "step": 15059
    },
    {
      "epoch": 9.1912841796875e-05,
      "step": 15059,
      "training_step_time": 0.45754289627075195
    },
    {
      "epoch": 9.19189453125e-05,
      "grad_norm": 0.2026972472667694,
      "learning_rate": 8.935525168886262e-05,
      "loss": 0.0596,
      "step": 15060
    },
    {
      "epoch": 9.19189453125e-05,
      "model_forward_time": 0.11484265327453613,
      "step": 15060
    },
    {
      "epoch": 9.19189453125e-05,
      "step": 15060,
      "training_step_time": 0.47989964485168457
    },
    {
      "epoch": 9.1925048828125e-05,
      "model_forward_time": 0.11509013175964355,
      "step": 15061
    },
    {
      "epoch": 9.1925048828125e-05,
      "step": 15061,
      "training_step_time": 0.39853334426879883
    },
    {
      "epoch": 9.193115234375e-05,
      "model_forward_time": 0.11566162109375,
      "step": 15062
    },
    {
      "epoch": 9.193115234375e-05,
      "step": 15062,
      "training_step_time": 0.3770561218261719
    },
    {
      "epoch": 9.1937255859375e-05,
      "model_forward_time": 0.1148526668548584,
      "step": 15063
    },
    {
      "epoch": 9.1937255859375e-05,
      "step": 15063,
      "training_step_time": 0.38849782943725586
    },
    {
      "epoch": 9.1943359375e-05,
      "model_forward_time": 0.11538887023925781,
      "step": 15064
    },
    {
      "epoch": 9.1943359375e-05,
      "step": 15064,
      "training_step_time": 0.39344143867492676
    },
    {
      "epoch": 9.1949462890625e-05,
      "model_forward_time": 0.1153101921081543,
      "step": 15065
    },
    {
      "epoch": 9.1949462890625e-05,
      "step": 15065,
      "training_step_time": 0.3874943256378174
    },
    {
      "epoch": 9.195556640625e-05,
      "model_forward_time": 0.11491513252258301,
      "step": 15066
    },
    {
      "epoch": 9.195556640625e-05,
      "step": 15066,
      "training_step_time": 0.4299781322479248
    },
    {
      "epoch": 9.1961669921875e-05,
      "model_forward_time": 0.11554527282714844,
      "step": 15067
    },
    {
      "epoch": 9.1961669921875e-05,
      "step": 15067,
      "training_step_time": 0.4029572010040283
    },
    {
      "epoch": 9.19677734375e-05,
      "model_forward_time": 0.1156456470489502,
      "step": 15068
    },
    {
      "epoch": 9.19677734375e-05,
      "step": 15068,
      "training_step_time": 0.38327741622924805
    },
    {
      "epoch": 9.1973876953125e-05,
      "model_forward_time": 0.11574363708496094,
      "step": 15069
    },
    {
      "epoch": 9.1973876953125e-05,
      "step": 15069,
      "training_step_time": 0.47206616401672363
    },
    {
      "epoch": 9.197998046875e-05,
      "grad_norm": 0.20656244456768036,
      "learning_rate": 8.933824751914502e-05,
      "loss": 0.0632,
      "step": 15070
    },
    {
      "epoch": 9.197998046875e-05,
      "model_forward_time": 0.11590075492858887,
      "step": 15070
    },
    {
      "epoch": 9.197998046875e-05,
      "step": 15070,
      "training_step_time": 0.4369010925292969
    },
    {
      "epoch": 9.1986083984375e-05,
      "model_forward_time": 0.1148381233215332,
      "step": 15071
    },
    {
      "epoch": 9.1986083984375e-05,
      "step": 15071,
      "training_step_time": 0.39451050758361816
    },
    {
      "epoch": 9.19921875e-05,
      "model_forward_time": 0.11956906318664551,
      "step": 15072
    },
    {
      "epoch": 9.19921875e-05,
      "step": 15072,
      "training_step_time": 0.38278889656066895
    },
    {
      "epoch": 9.1998291015625e-05,
      "model_forward_time": 0.11516714096069336,
      "step": 15073
    },
    {
      "epoch": 9.1998291015625e-05,
      "step": 15073,
      "training_step_time": 0.45638036727905273
    },
    {
      "epoch": 9.200439453125e-05,
      "model_forward_time": 0.11522746086120605,
      "step": 15074
    },
    {
      "epoch": 9.200439453125e-05,
      "step": 15074,
      "training_step_time": 0.449826717376709
    },
    {
      "epoch": 9.2010498046875e-05,
      "model_forward_time": 0.11531805992126465,
      "step": 15075
    },
    {
      "epoch": 9.2010498046875e-05,
      "step": 15075,
      "training_step_time": 0.5021941661834717
    },
    {
      "epoch": 9.20166015625e-05,
      "model_forward_time": 0.11513829231262207,
      "step": 15076
    },
    {
      "epoch": 9.20166015625e-05,
      "step": 15076,
      "training_step_time": 0.3951451778411865
    },
    {
      "epoch": 9.2022705078125e-05,
      "model_forward_time": 0.1158597469329834,
      "step": 15077
    },
    {
      "epoch": 9.2022705078125e-05,
      "step": 15077,
      "training_step_time": 0.39389634132385254
    },
    {
      "epoch": 9.202880859375e-05,
      "model_forward_time": 0.11506509780883789,
      "step": 15078
    },
    {
      "epoch": 9.202880859375e-05,
      "step": 15078,
      "training_step_time": 0.4039325714111328
    },
    {
      "epoch": 9.2034912109375e-05,
      "model_forward_time": 0.11493325233459473,
      "step": 15079
    },
    {
      "epoch": 9.2034912109375e-05,
      "step": 15079,
      "training_step_time": 0.39748215675354004
    },
    {
      "epoch": 9.2041015625e-05,
      "grad_norm": 0.18390078842639923,
      "learning_rate": 8.932123139950648e-05,
      "loss": 0.0566,
      "step": 15080
    },
    {
      "epoch": 9.2041015625e-05,
      "model_forward_time": 0.11469864845275879,
      "step": 15080
    },
    {
      "epoch": 9.2041015625e-05,
      "step": 15080,
      "training_step_time": 0.4063241481781006
    },
    {
      "epoch": 9.2047119140625e-05,
      "model_forward_time": 0.1148214340209961,
      "step": 15081
    },
    {
      "epoch": 9.2047119140625e-05,
      "step": 15081,
      "training_step_time": 0.41136598587036133
    },
    {
      "epoch": 9.205322265625e-05,
      "model_forward_time": 0.11455273628234863,
      "step": 15082
    },
    {
      "epoch": 9.205322265625e-05,
      "step": 15082,
      "training_step_time": 0.38896918296813965
    },
    {
      "epoch": 9.2059326171875e-05,
      "model_forward_time": 0.11514830589294434,
      "step": 15083
    },
    {
      "epoch": 9.2059326171875e-05,
      "step": 15083,
      "training_step_time": 0.4146842956542969
    },
    {
      "epoch": 9.20654296875e-05,
      "model_forward_time": 0.11466765403747559,
      "step": 15084
    },
    {
      "epoch": 9.20654296875e-05,
      "step": 15084,
      "training_step_time": 0.41019368171691895
    },
    {
      "epoch": 9.2071533203125e-05,
      "model_forward_time": 0.1149749755859375,
      "step": 15085
    },
    {
      "epoch": 9.2071533203125e-05,
      "step": 15085,
      "training_step_time": 0.4089357852935791
    },
    {
      "epoch": 9.207763671875e-05,
      "model_forward_time": 0.11461997032165527,
      "step": 15086
    },
    {
      "epoch": 9.207763671875e-05,
      "step": 15086,
      "training_step_time": 0.4027740955352783
    },
    {
      "epoch": 9.2083740234375e-05,
      "model_forward_time": 0.11495447158813477,
      "step": 15087
    },
    {
      "epoch": 9.2083740234375e-05,
      "step": 15087,
      "training_step_time": 0.43790769577026367
    },
    {
      "epoch": 9.208984375e-05,
      "model_forward_time": 0.11644935607910156,
      "step": 15088
    },
    {
      "epoch": 9.208984375e-05,
      "step": 15088,
      "training_step_time": 0.4294869899749756
    },
    {
      "epoch": 9.2095947265625e-05,
      "model_forward_time": 0.11573219299316406,
      "step": 15089
    },
    {
      "epoch": 9.2095947265625e-05,
      "step": 15089,
      "training_step_time": 0.48162269592285156
    },
    {
      "epoch": 9.210205078125e-05,
      "grad_norm": 0.1608247607946396,
      "learning_rate": 8.930420333511606e-05,
      "loss": 0.0596,
      "step": 15090
    },
    {
      "epoch": 9.210205078125e-05,
      "model_forward_time": 0.11550211906433105,
      "step": 15090
    },
    {
      "epoch": 9.210205078125e-05,
      "step": 15090,
      "training_step_time": 0.4289233684539795
    },
    {
      "epoch": 9.2108154296875e-05,
      "model_forward_time": 0.11485838890075684,
      "step": 15091
    },
    {
      "epoch": 9.2108154296875e-05,
      "step": 15091,
      "training_step_time": 0.400418758392334
    },
    {
      "epoch": 9.21142578125e-05,
      "model_forward_time": 0.11519980430603027,
      "step": 15092
    },
    {
      "epoch": 9.21142578125e-05,
      "step": 15092,
      "training_step_time": 0.4004509449005127
    },
    {
      "epoch": 9.2120361328125e-05,
      "model_forward_time": 0.11422967910766602,
      "step": 15093
    },
    {
      "epoch": 9.2120361328125e-05,
      "step": 15093,
      "training_step_time": 0.4354381561279297
    },
    {
      "epoch": 9.212646484375e-05,
      "model_forward_time": 0.11476445198059082,
      "step": 15094
    },
    {
      "epoch": 9.212646484375e-05,
      "step": 15094,
      "training_step_time": 0.4030601978302002
    },
    {
      "epoch": 9.2132568359375e-05,
      "model_forward_time": 0.11513400077819824,
      "step": 15095
    },
    {
      "epoch": 9.2132568359375e-05,
      "step": 15095,
      "training_step_time": 0.3874967098236084
    },
    {
      "epoch": 9.2138671875e-05,
      "model_forward_time": 0.1148223876953125,
      "step": 15096
    },
    {
      "epoch": 9.2138671875e-05,
      "step": 15096,
      "training_step_time": 0.4038522243499756
    },
    {
      "epoch": 9.2144775390625e-05,
      "model_forward_time": 0.1154031753540039,
      "step": 15097
    },
    {
      "epoch": 9.2144775390625e-05,
      "step": 15097,
      "training_step_time": 0.39391589164733887
    },
    {
      "epoch": 9.215087890625e-05,
      "model_forward_time": 0.11531686782836914,
      "step": 15098
    },
    {
      "epoch": 9.215087890625e-05,
      "step": 15098,
      "training_step_time": 0.48834872245788574
    },
    {
      "epoch": 9.2156982421875e-05,
      "model_forward_time": 0.11556005477905273,
      "step": 15099
    },
    {
      "epoch": 9.2156982421875e-05,
      "step": 15099,
      "training_step_time": 0.4488227367401123
    },
    {
      "epoch": 9.21630859375e-05,
      "grad_norm": 0.16243024170398712,
      "learning_rate": 8.928716333114643e-05,
      "loss": 0.0597,
      "step": 15100
    },
    {
      "epoch": 9.21630859375e-05,
      "model_forward_time": 0.11517715454101562,
      "step": 15100
    },
    {
      "epoch": 9.21630859375e-05,
      "step": 15100,
      "training_step_time": 0.39142751693725586
    },
    {
      "epoch": 9.2169189453125e-05,
      "model_forward_time": 0.11533212661743164,
      "step": 15101
    },
    {
      "epoch": 9.2169189453125e-05,
      "step": 15101,
      "training_step_time": 0.40218067169189453
    },
    {
      "epoch": 9.217529296875e-05,
      "model_forward_time": 0.11522459983825684,
      "step": 15102
    },
    {
      "epoch": 9.217529296875e-05,
      "step": 15102,
      "training_step_time": 0.42160844802856445
    },
    {
      "epoch": 9.2181396484375e-05,
      "model_forward_time": 0.11522936820983887,
      "step": 15103
    },
    {
      "epoch": 9.2181396484375e-05,
      "step": 15103,
      "training_step_time": 0.4924802780151367
    },
    {
      "epoch": 9.21875e-05,
      "model_forward_time": 0.11470437049865723,
      "step": 15104
    },
    {
      "epoch": 9.21875e-05,
      "step": 15104,
      "training_step_time": 0.4959228038787842
    },
    {
      "epoch": 9.2193603515625e-05,
      "model_forward_time": 0.1152639389038086,
      "step": 15105
    },
    {
      "epoch": 9.2193603515625e-05,
      "step": 15105,
      "training_step_time": 0.39427828788757324
    },
    {
      "epoch": 9.219970703125e-05,
      "model_forward_time": 0.11517620086669922,
      "step": 15106
    },
    {
      "epoch": 9.219970703125e-05,
      "step": 15106,
      "training_step_time": 0.39811110496520996
    },
    {
      "epoch": 9.2205810546875e-05,
      "model_forward_time": 0.11718344688415527,
      "step": 15107
    },
    {
      "epoch": 9.2205810546875e-05,
      "step": 15107,
      "training_step_time": 0.3834104537963867
    },
    {
      "epoch": 9.22119140625e-05,
      "model_forward_time": 0.11442923545837402,
      "step": 15108
    },
    {
      "epoch": 9.22119140625e-05,
      "step": 15108,
      "training_step_time": 0.422224760055542
    },
    {
      "epoch": 9.2218017578125e-05,
      "model_forward_time": 0.11541962623596191,
      "step": 15109
    },
    {
      "epoch": 9.2218017578125e-05,
      "step": 15109,
      "training_step_time": 0.39594173431396484
    },
    {
      "epoch": 9.222412109375e-05,
      "grad_norm": 0.28801557421684265,
      "learning_rate": 8.927011139277389e-05,
      "loss": 0.0666,
      "step": 15110
    },
    {
      "epoch": 9.222412109375e-05,
      "model_forward_time": 0.11627793312072754,
      "step": 15110
    },
    {
      "epoch": 9.222412109375e-05,
      "step": 15110,
      "training_step_time": 0.3860445022583008
    },
    {
      "epoch": 9.2230224609375e-05,
      "model_forward_time": 0.11535334587097168,
      "step": 15111
    },
    {
      "epoch": 9.2230224609375e-05,
      "step": 15111,
      "training_step_time": 0.5728464126586914
    },
    {
      "epoch": 9.2236328125e-05,
      "model_forward_time": 0.11625552177429199,
      "step": 15112
    },
    {
      "epoch": 9.2236328125e-05,
      "step": 15112,
      "training_step_time": 0.48663997650146484
    },
    {
      "epoch": 9.2242431640625e-05,
      "model_forward_time": 0.1146388053894043,
      "step": 15113
    },
    {
      "epoch": 9.2242431640625e-05,
      "step": 15113,
      "training_step_time": 0.43203043937683105
    },
    {
      "epoch": 9.224853515625e-05,
      "model_forward_time": 0.1143946647644043,
      "step": 15114
    },
    {
      "epoch": 9.224853515625e-05,
      "step": 15114,
      "training_step_time": 0.3909730911254883
    },
    {
      "epoch": 9.2254638671875e-05,
      "model_forward_time": 0.11534547805786133,
      "step": 15115
    },
    {
      "epoch": 9.2254638671875e-05,
      "step": 15115,
      "training_step_time": 0.3895092010498047
    },
    {
      "epoch": 9.22607421875e-05,
      "model_forward_time": 0.1156458854675293,
      "step": 15116
    },
    {
      "epoch": 9.22607421875e-05,
      "step": 15116,
      "training_step_time": 0.4364328384399414
    },
    {
      "epoch": 9.2266845703125e-05,
      "model_forward_time": 0.11504912376403809,
      "step": 15117
    },
    {
      "epoch": 9.2266845703125e-05,
      "step": 15117,
      "training_step_time": 0.6794376373291016
    },
    {
      "epoch": 9.227294921875e-05,
      "model_forward_time": 0.11472892761230469,
      "step": 15118
    },
    {
      "epoch": 9.227294921875e-05,
      "step": 15118,
      "training_step_time": 0.4351615905761719
    },
    {
      "epoch": 9.2279052734375e-05,
      "model_forward_time": 0.11442685127258301,
      "step": 15119
    },
    {
      "epoch": 9.2279052734375e-05,
      "step": 15119,
      "training_step_time": 0.45773935317993164
    },
    {
      "epoch": 9.228515625e-05,
      "grad_norm": 0.16183771193027496,
      "learning_rate": 8.92530475251784e-05,
      "loss": 0.0643,
      "step": 15120
    },
    {
      "epoch": 9.228515625e-05,
      "model_forward_time": 0.1147911548614502,
      "step": 15120
    },
    {
      "epoch": 9.228515625e-05,
      "step": 15120,
      "training_step_time": 0.3896768093109131
    },
    {
      "epoch": 9.2291259765625e-05,
      "model_forward_time": 0.11443805694580078,
      "step": 15121
    },
    {
      "epoch": 9.2291259765625e-05,
      "step": 15121,
      "training_step_time": 0.39343810081481934
    },
    {
      "epoch": 9.229736328125e-05,
      "model_forward_time": 0.11438798904418945,
      "step": 15122
    },
    {
      "epoch": 9.229736328125e-05,
      "step": 15122,
      "training_step_time": 0.3897833824157715
    },
    {
      "epoch": 9.2303466796875e-05,
      "model_forward_time": 0.11496543884277344,
      "step": 15123
    },
    {
      "epoch": 9.2303466796875e-05,
      "step": 15123,
      "training_step_time": 0.49584364891052246
    },
    {
      "epoch": 9.23095703125e-05,
      "model_forward_time": 0.11451840400695801,
      "step": 15124
    },
    {
      "epoch": 9.23095703125e-05,
      "step": 15124,
      "training_step_time": 0.393115758895874
    },
    {
      "epoch": 9.2315673828125e-05,
      "model_forward_time": 0.11651754379272461,
      "step": 15125
    },
    {
      "epoch": 9.2315673828125e-05,
      "step": 15125,
      "training_step_time": 0.38593530654907227
    },
    {
      "epoch": 9.232177734375e-05,
      "model_forward_time": 0.11472678184509277,
      "step": 15126
    },
    {
      "epoch": 9.232177734375e-05,
      "step": 15126,
      "training_step_time": 0.4356999397277832
    },
    {
      "epoch": 9.2327880859375e-05,
      "model_forward_time": 0.1147773265838623,
      "step": 15127
    },
    {
      "epoch": 9.2327880859375e-05,
      "step": 15127,
      "training_step_time": 0.41254758834838867
    },
    {
      "epoch": 9.2333984375e-05,
      "model_forward_time": 0.11447262763977051,
      "step": 15128
    },
    {
      "epoch": 9.2333984375e-05,
      "step": 15128,
      "training_step_time": 0.38721203804016113
    },
    {
      "epoch": 9.2340087890625e-05,
      "model_forward_time": 0.11517214775085449,
      "step": 15129
    },
    {
      "epoch": 9.2340087890625e-05,
      "step": 15129,
      "training_step_time": 0.6745824813842773
    },
    {
      "epoch": 9.234619140625e-05,
      "grad_norm": 0.230233833193779,
      "learning_rate": 8.923597173354345e-05,
      "loss": 0.0622,
      "step": 15130
    },
    {
      "epoch": 9.234619140625e-05,
      "model_forward_time": 0.11508393287658691,
      "step": 15130
    },
    {
      "epoch": 9.234619140625e-05,
      "step": 15130,
      "training_step_time": 0.3935253620147705
    },
    {
      "epoch": 9.2352294921875e-05,
      "model_forward_time": 0.11460542678833008,
      "step": 15131
    },
    {
      "epoch": 9.2352294921875e-05,
      "step": 15131,
      "training_step_time": 0.3689539432525635
    },
    {
      "epoch": 9.23583984375e-05,
      "model_forward_time": 0.11502838134765625,
      "step": 15132
    },
    {
      "epoch": 9.23583984375e-05,
      "step": 15132,
      "training_step_time": 0.4244511127471924
    },
    {
      "epoch": 9.2364501953125e-05,
      "model_forward_time": 0.11444830894470215,
      "step": 15133
    },
    {
      "epoch": 9.2364501953125e-05,
      "step": 15133,
      "training_step_time": 0.43706274032592773
    },
    {
      "epoch": 9.237060546875e-05,
      "model_forward_time": 0.11440801620483398,
      "step": 15134
    },
    {
      "epoch": 9.237060546875e-05,
      "step": 15134,
      "training_step_time": 0.39900779724121094
    },
    {
      "epoch": 9.2376708984375e-05,
      "model_forward_time": 0.11448359489440918,
      "step": 15135
    },
    {
      "epoch": 9.2376708984375e-05,
      "step": 15135,
      "training_step_time": 0.5296018123626709
    },
    {
      "epoch": 9.23828125e-05,
      "model_forward_time": 0.1150214672088623,
      "step": 15136
    },
    {
      "epoch": 9.23828125e-05,
      "step": 15136,
      "training_step_time": 0.3851325511932373
    },
    {
      "epoch": 9.2388916015625e-05,
      "model_forward_time": 0.11485910415649414,
      "step": 15137
    },
    {
      "epoch": 9.2388916015625e-05,
      "step": 15137,
      "training_step_time": 0.38547444343566895
    },
    {
      "epoch": 9.239501953125e-05,
      "model_forward_time": 0.11475133895874023,
      "step": 15138
    },
    {
      "epoch": 9.239501953125e-05,
      "step": 15138,
      "training_step_time": 0.3910200595855713
    },
    {
      "epoch": 9.2401123046875e-05,
      "model_forward_time": 0.11510729789733887,
      "step": 15139
    },
    {
      "epoch": 9.2401123046875e-05,
      "step": 15139,
      "training_step_time": 0.3953113555908203
    },
    {
      "epoch": 9.24072265625e-05,
      "grad_norm": 0.16834361851215363,
      "learning_rate": 8.921888402305628e-05,
      "loss": 0.055,
      "step": 15140
    },
    {
      "epoch": 9.24072265625e-05,
      "model_forward_time": 0.11485505104064941,
      "step": 15140
    },
    {
      "epoch": 9.24072265625e-05,
      "step": 15140,
      "training_step_time": 0.43816471099853516
    },
    {
      "epoch": 9.2413330078125e-05,
      "model_forward_time": 0.11464834213256836,
      "step": 15141
    },
    {
      "epoch": 9.2413330078125e-05,
      "step": 15141,
      "training_step_time": 0.7949860095977783
    },
    {
      "epoch": 9.241943359375e-05,
      "model_forward_time": 0.11523866653442383,
      "step": 15142
    },
    {
      "epoch": 9.241943359375e-05,
      "step": 15142,
      "training_step_time": 0.39075756072998047
    },
    {
      "epoch": 9.2425537109375e-05,
      "model_forward_time": 0.11409330368041992,
      "step": 15143
    },
    {
      "epoch": 9.2425537109375e-05,
      "step": 15143,
      "training_step_time": 0.4050731658935547
    },
    {
      "epoch": 9.2431640625e-05,
      "model_forward_time": 0.1139822006225586,
      "step": 15144
    },
    {
      "epoch": 9.2431640625e-05,
      "step": 15144,
      "training_step_time": 0.40457940101623535
    },
    {
      "epoch": 9.2437744140625e-05,
      "model_forward_time": 0.11450648307800293,
      "step": 15145
    },
    {
      "epoch": 9.2437744140625e-05,
      "step": 15145,
      "training_step_time": 0.40990686416625977
    },
    {
      "epoch": 9.244384765625e-05,
      "model_forward_time": 0.11455702781677246,
      "step": 15146
    },
    {
      "epoch": 9.244384765625e-05,
      "step": 15146,
      "training_step_time": 0.43956756591796875
    },
    {
      "epoch": 9.2449951171875e-05,
      "model_forward_time": 0.11530494689941406,
      "step": 15147
    },
    {
      "epoch": 9.2449951171875e-05,
      "step": 15147,
      "training_step_time": 0.6570279598236084
    },
    {
      "epoch": 9.24560546875e-05,
      "model_forward_time": 0.11400103569030762,
      "step": 15148
    },
    {
      "epoch": 9.24560546875e-05,
      "step": 15148,
      "training_step_time": 0.3848910331726074
    },
    {
      "epoch": 9.2462158203125e-05,
      "model_forward_time": 0.11503338813781738,
      "step": 15149
    },
    {
      "epoch": 9.2462158203125e-05,
      "step": 15149,
      "training_step_time": 0.3910033702850342
    },
    {
      "epoch": 9.246826171875e-05,
      "grad_norm": 0.13739991188049316,
      "learning_rate": 8.920178439890765e-05,
      "loss": 0.0563,
      "step": 15150
    },
    {
      "epoch": 9.246826171875e-05,
      "model_forward_time": 0.11463761329650879,
      "step": 15150
    },
    {
      "epoch": 9.246826171875e-05,
      "step": 15150,
      "training_step_time": 0.39955878257751465
    },
    {
      "epoch": 9.2474365234375e-05,
      "model_forward_time": 0.11426138877868652,
      "step": 15151
    },
    {
      "epoch": 9.2474365234375e-05,
      "step": 15151,
      "training_step_time": 0.3874013423919678
    },
    {
      "epoch": 9.248046875e-05,
      "model_forward_time": 0.11533665657043457,
      "step": 15152
    },
    {
      "epoch": 9.248046875e-05,
      "step": 15152,
      "training_step_time": 0.3879060745239258
    },
    {
      "epoch": 9.2486572265625e-05,
      "model_forward_time": 0.11509060859680176,
      "step": 15153
    },
    {
      "epoch": 9.2486572265625e-05,
      "step": 15153,
      "training_step_time": 0.7964861392974854
    },
    {
      "epoch": 9.249267578125e-05,
      "model_forward_time": 0.11472058296203613,
      "step": 15154
    },
    {
      "epoch": 9.249267578125e-05,
      "step": 15154,
      "training_step_time": 0.46851134300231934
    },
    {
      "epoch": 9.2498779296875e-05,
      "model_forward_time": 0.11403584480285645,
      "step": 15155
    },
    {
      "epoch": 9.2498779296875e-05,
      "step": 15155,
      "training_step_time": 0.4237549304962158
    },
    {
      "epoch": 9.25048828125e-05,
      "model_forward_time": 0.11472034454345703,
      "step": 15156
    },
    {
      "epoch": 9.25048828125e-05,
      "step": 15156,
      "training_step_time": 0.38580799102783203
    },
    {
      "epoch": 9.2510986328125e-05,
      "model_forward_time": 0.11456108093261719,
      "step": 15157
    },
    {
      "epoch": 9.2510986328125e-05,
      "step": 15157,
      "training_step_time": 0.4790530204772949
    },
    {
      "epoch": 9.251708984375e-05,
      "model_forward_time": 0.11486124992370605,
      "step": 15158
    },
    {
      "epoch": 9.251708984375e-05,
      "step": 15158,
      "training_step_time": 0.46864891052246094
    },
    {
      "epoch": 9.2523193359375e-05,
      "model_forward_time": 0.1148843765258789,
      "step": 15159
    },
    {
      "epoch": 9.2523193359375e-05,
      "step": 15159,
      "training_step_time": 0.5017914772033691
    },
    {
      "epoch": 9.2529296875e-05,
      "grad_norm": 0.1646459549665451,
      "learning_rate": 8.9184672866292e-05,
      "loss": 0.056,
      "step": 15160
    },
    {
      "epoch": 9.2529296875e-05,
      "model_forward_time": 0.11491775512695312,
      "step": 15160
    },
    {
      "epoch": 9.2529296875e-05,
      "step": 15160,
      "training_step_time": 0.43176960945129395
    },
    {
      "epoch": 9.2535400390625e-05,
      "model_forward_time": 0.11462593078613281,
      "step": 15161
    },
    {
      "epoch": 9.2535400390625e-05,
      "step": 15161,
      "training_step_time": 0.5136642456054688
    },
    {
      "epoch": 9.254150390625e-05,
      "model_forward_time": 0.11455297470092773,
      "step": 15162
    },
    {
      "epoch": 9.254150390625e-05,
      "step": 15162,
      "training_step_time": 0.38469409942626953
    },
    {
      "epoch": 9.2547607421875e-05,
      "model_forward_time": 0.11489558219909668,
      "step": 15163
    },
    {
      "epoch": 9.2547607421875e-05,
      "step": 15163,
      "training_step_time": 0.3877525329589844
    },
    {
      "epoch": 9.25537109375e-05,
      "model_forward_time": 0.11446237564086914,
      "step": 15164
    },
    {
      "epoch": 9.25537109375e-05,
      "step": 15164,
      "training_step_time": 0.38237953186035156
    },
    {
      "epoch": 9.2559814453125e-05,
      "model_forward_time": 0.11524701118469238,
      "step": 15165
    },
    {
      "epoch": 9.2559814453125e-05,
      "step": 15165,
      "training_step_time": 0.512505054473877
    },
    {
      "epoch": 9.256591796875e-05,
      "model_forward_time": 0.1146242618560791,
      "step": 15166
    },
    {
      "epoch": 9.256591796875e-05,
      "step": 15166,
      "training_step_time": 0.38263416290283203
    },
    {
      "epoch": 9.2572021484375e-05,
      "model_forward_time": 0.11499142646789551,
      "step": 15167
    },
    {
      "epoch": 9.2572021484375e-05,
      "step": 15167,
      "training_step_time": 0.439131498336792
    },
    {
      "epoch": 9.2578125e-05,
      "model_forward_time": 0.11476492881774902,
      "step": 15168
    },
    {
      "epoch": 9.2578125e-05,
      "step": 15168,
      "training_step_time": 0.400876522064209
    },
    {
      "epoch": 9.2584228515625e-05,
      "model_forward_time": 0.11510992050170898,
      "step": 15169
    },
    {
      "epoch": 9.2584228515625e-05,
      "step": 15169,
      "training_step_time": 0.4537827968597412
    },
    {
      "epoch": 9.259033203125e-05,
      "grad_norm": 0.19095827639102936,
      "learning_rate": 8.916754943040732e-05,
      "loss": 0.0581,
      "step": 15170
    },
    {
      "epoch": 9.259033203125e-05,
      "model_forward_time": 0.11510872840881348,
      "step": 15170
    },
    {
      "epoch": 9.259033203125e-05,
      "step": 15170,
      "training_step_time": 0.4042224884033203
    },
    {
      "epoch": 9.2596435546875e-05,
      "model_forward_time": 0.11475038528442383,
      "step": 15171
    },
    {
      "epoch": 9.2596435546875e-05,
      "step": 15171,
      "training_step_time": 0.6579766273498535
    },
    {
      "epoch": 9.26025390625e-05,
      "model_forward_time": 0.11500334739685059,
      "step": 15172
    },
    {
      "epoch": 9.26025390625e-05,
      "step": 15172,
      "training_step_time": 0.4275510311126709
    },
    {
      "epoch": 9.2608642578125e-05,
      "model_forward_time": 0.11446452140808105,
      "step": 15173
    },
    {
      "epoch": 9.2608642578125e-05,
      "step": 15173,
      "training_step_time": 0.3640279769897461
    },
    {
      "epoch": 9.261474609375e-05,
      "model_forward_time": 0.1143953800201416,
      "step": 15174
    },
    {
      "epoch": 9.261474609375e-05,
      "step": 15174,
      "training_step_time": 0.4507882595062256
    },
    {
      "epoch": 9.2620849609375e-05,
      "model_forward_time": 0.11421632766723633,
      "step": 15175
    },
    {
      "epoch": 9.2620849609375e-05,
      "step": 15175,
      "training_step_time": 0.4688224792480469
    },
    {
      "epoch": 9.2626953125e-05,
      "model_forward_time": 0.1142416000366211,
      "step": 15176
    },
    {
      "epoch": 9.2626953125e-05,
      "step": 15176,
      "training_step_time": 0.3829379081726074
    },
    {
      "epoch": 9.2633056640625e-05,
      "model_forward_time": 0.11437320709228516,
      "step": 15177
    },
    {
      "epoch": 9.2633056640625e-05,
      "step": 15177,
      "training_step_time": 0.5376236438751221
    },
    {
      "epoch": 9.263916015625e-05,
      "model_forward_time": 0.11485600471496582,
      "step": 15178
    },
    {
      "epoch": 9.263916015625e-05,
      "step": 15178,
      "training_step_time": 0.38654422760009766
    },
    {
      "epoch": 9.2645263671875e-05,
      "model_forward_time": 0.115631103515625,
      "step": 15179
    },
    {
      "epoch": 9.2645263671875e-05,
      "step": 15179,
      "training_step_time": 0.38724327087402344
    },
    {
      "epoch": 9.26513671875e-05,
      "grad_norm": 0.2643648087978363,
      "learning_rate": 8.91504140964553e-05,
      "loss": 0.0556,
      "step": 15180
    },
    {
      "epoch": 9.26513671875e-05,
      "model_forward_time": 0.11518740653991699,
      "step": 15180
    },
    {
      "epoch": 9.26513671875e-05,
      "step": 15180,
      "training_step_time": 0.390521764755249
    },
    {
      "epoch": 9.2657470703125e-05,
      "model_forward_time": 0.1152496337890625,
      "step": 15181
    },
    {
      "epoch": 9.2657470703125e-05,
      "step": 15181,
      "training_step_time": 0.4051625728607178
    },
    {
      "epoch": 9.266357421875e-05,
      "model_forward_time": 0.11534357070922852,
      "step": 15182
    },
    {
      "epoch": 9.266357421875e-05,
      "step": 15182,
      "training_step_time": 0.42582154273986816
    },
    {
      "epoch": 9.2669677734375e-05,
      "model_forward_time": 0.11455416679382324,
      "step": 15183
    },
    {
      "epoch": 9.2669677734375e-05,
      "step": 15183,
      "training_step_time": 0.7152886390686035
    },
    {
      "epoch": 9.267578125e-05,
      "model_forward_time": 0.11440682411193848,
      "step": 15184
    },
    {
      "epoch": 9.267578125e-05,
      "step": 15184,
      "training_step_time": 0.40831804275512695
    },
    {
      "epoch": 9.2681884765625e-05,
      "model_forward_time": 0.11468911170959473,
      "step": 15185
    },
    {
      "epoch": 9.2681884765625e-05,
      "step": 15185,
      "training_step_time": 0.4349827766418457
    },
    {
      "epoch": 9.268798828125e-05,
      "model_forward_time": 0.11483407020568848,
      "step": 15186
    },
    {
      "epoch": 9.268798828125e-05,
      "step": 15186,
      "training_step_time": 0.38866353034973145
    },
    {
      "epoch": 9.2694091796875e-05,
      "model_forward_time": 0.11439657211303711,
      "step": 15187
    },
    {
      "epoch": 9.2694091796875e-05,
      "step": 15187,
      "training_step_time": 0.3723936080932617
    },
    {
      "epoch": 9.27001953125e-05,
      "model_forward_time": 0.11403465270996094,
      "step": 15188
    },
    {
      "epoch": 9.27001953125e-05,
      "step": 15188,
      "training_step_time": 0.42145442962646484
    },
    {
      "epoch": 9.2706298828125e-05,
      "model_forward_time": 0.11518144607543945,
      "step": 15189
    },
    {
      "epoch": 9.2706298828125e-05,
      "step": 15189,
      "training_step_time": 0.5117795467376709
    },
    {
      "epoch": 9.271240234375e-05,
      "grad_norm": 0.17661724984645844,
      "learning_rate": 8.913326686964117e-05,
      "loss": 0.0553,
      "step": 15190
    },
    {
      "epoch": 9.271240234375e-05,
      "model_forward_time": 0.1148073673248291,
      "step": 15190
    },
    {
      "epoch": 9.271240234375e-05,
      "step": 15190,
      "training_step_time": 0.3898806571960449
    },
    {
      "epoch": 9.2718505859375e-05,
      "model_forward_time": 0.11547040939331055,
      "step": 15191
    },
    {
      "epoch": 9.2718505859375e-05,
      "step": 15191,
      "training_step_time": 0.40799975395202637
    },
    {
      "epoch": 9.2724609375e-05,
      "model_forward_time": 0.11549234390258789,
      "step": 15192
    },
    {
      "epoch": 9.2724609375e-05,
      "step": 15192,
      "training_step_time": 0.389556884765625
    },
    {
      "epoch": 9.2730712890625e-05,
      "model_forward_time": 0.11572766304016113,
      "step": 15193
    },
    {
      "epoch": 9.2730712890625e-05,
      "step": 15193,
      "training_step_time": 0.40438222885131836
    },
    {
      "epoch": 9.273681640625e-05,
      "model_forward_time": 0.1146841049194336,
      "step": 15194
    },
    {
      "epoch": 9.273681640625e-05,
      "step": 15194,
      "training_step_time": 0.39321374893188477
    },
    {
      "epoch": 9.2742919921875e-05,
      "model_forward_time": 0.11504006385803223,
      "step": 15195
    },
    {
      "epoch": 9.2742919921875e-05,
      "step": 15195,
      "training_step_time": 0.8027801513671875
    },
    {
      "epoch": 9.27490234375e-05,
      "model_forward_time": 0.11463785171508789,
      "step": 15196
    },
    {
      "epoch": 9.27490234375e-05,
      "step": 15196,
      "training_step_time": 0.42841172218322754
    },
    {
      "epoch": 9.2755126953125e-05,
      "model_forward_time": 0.11456012725830078,
      "step": 15197
    },
    {
      "epoch": 9.2755126953125e-05,
      "step": 15197,
      "training_step_time": 0.40981626510620117
    },
    {
      "epoch": 9.276123046875e-05,
      "model_forward_time": 0.11427950859069824,
      "step": 15198
    },
    {
      "epoch": 9.276123046875e-05,
      "step": 15198,
      "training_step_time": 0.39032626152038574
    },
    {
      "epoch": 9.2767333984375e-05,
      "model_forward_time": 0.11430525779724121,
      "step": 15199
    },
    {
      "epoch": 9.2767333984375e-05,
      "step": 15199,
      "training_step_time": 0.3978595733642578
    },
    {
      "epoch": 9.27734375e-05,
      "grad_norm": 0.15436197817325592,
      "learning_rate": 8.911610775517382e-05,
      "loss": 0.0606,
      "step": 15200
    },
    {
      "epoch": 9.27734375e-05,
      "model_forward_time": 0.11444497108459473,
      "step": 15200
    },
    {
      "epoch": 9.27734375e-05,
      "step": 15200,
      "training_step_time": 0.40199708938598633
    },
    {
      "epoch": 9.2779541015625e-05,
      "model_forward_time": 0.11511564254760742,
      "step": 15201
    },
    {
      "epoch": 9.2779541015625e-05,
      "step": 15201,
      "training_step_time": 0.651500940322876
    },
    {
      "epoch": 9.278564453125e-05,
      "model_forward_time": 0.11464834213256836,
      "step": 15202
    },
    {
      "epoch": 9.278564453125e-05,
      "step": 15202,
      "training_step_time": 0.4451611042022705
    },
    {
      "epoch": 9.2791748046875e-05,
      "model_forward_time": 0.1151893138885498,
      "step": 15203
    },
    {
      "epoch": 9.2791748046875e-05,
      "step": 15203,
      "training_step_time": 0.4523763656616211
    },
    {
      "epoch": 9.27978515625e-05,
      "model_forward_time": 0.1144399642944336,
      "step": 15204
    },
    {
      "epoch": 9.27978515625e-05,
      "step": 15204,
      "training_step_time": 0.4169158935546875
    },
    {
      "epoch": 9.2803955078125e-05,
      "model_forward_time": 0.11458396911621094,
      "step": 15205
    },
    {
      "epoch": 9.2803955078125e-05,
      "step": 15205,
      "training_step_time": 0.3894929885864258
    },
    {
      "epoch": 9.281005859375e-05,
      "model_forward_time": 0.11438202857971191,
      "step": 15206
    },
    {
      "epoch": 9.281005859375e-05,
      "step": 15206,
      "training_step_time": 0.3983781337738037
    },
    {
      "epoch": 9.2816162109375e-05,
      "model_forward_time": 0.11510825157165527,
      "step": 15207
    },
    {
      "epoch": 9.2816162109375e-05,
      "step": 15207,
      "training_step_time": 0.5254676342010498
    },
    {
      "epoch": 9.2822265625e-05,
      "model_forward_time": 0.11513304710388184,
      "step": 15208
    },
    {
      "epoch": 9.2822265625e-05,
      "step": 15208,
      "training_step_time": 0.44723963737487793
    },
    {
      "epoch": 9.2828369140625e-05,
      "model_forward_time": 0.11490011215209961,
      "step": 15209
    },
    {
      "epoch": 9.2828369140625e-05,
      "step": 15209,
      "training_step_time": 0.47434210777282715
    },
    {
      "epoch": 9.283447265625e-05,
      "grad_norm": 0.18510712683200836,
      "learning_rate": 8.909893675826574e-05,
      "loss": 0.057,
      "step": 15210
    },
    {
      "epoch": 9.283447265625e-05,
      "model_forward_time": 0.11446809768676758,
      "step": 15210
    },
    {
      "epoch": 9.283447265625e-05,
      "step": 15210,
      "training_step_time": 0.4490997791290283
    },
    {
      "epoch": 9.2840576171875e-05,
      "model_forward_time": 0.1150968074798584,
      "step": 15211
    },
    {
      "epoch": 9.2840576171875e-05,
      "step": 15211,
      "training_step_time": 0.4514458179473877
    },
    {
      "epoch": 9.28466796875e-05,
      "model_forward_time": 0.11561346054077148,
      "step": 15212
    },
    {
      "epoch": 9.28466796875e-05,
      "step": 15212,
      "training_step_time": 0.422607421875
    },
    {
      "epoch": 9.2852783203125e-05,
      "model_forward_time": 0.11439108848571777,
      "step": 15213
    },
    {
      "epoch": 9.2852783203125e-05,
      "step": 15213,
      "training_step_time": 0.4182467460632324
    },
    {
      "epoch": 9.285888671875e-05,
      "model_forward_time": 0.11496543884277344,
      "step": 15214
    },
    {
      "epoch": 9.285888671875e-05,
      "step": 15214,
      "training_step_time": 0.4825584888458252
    },
    {
      "epoch": 9.2864990234375e-05,
      "model_forward_time": 0.11499619483947754,
      "step": 15215
    },
    {
      "epoch": 9.2864990234375e-05,
      "step": 15215,
      "training_step_time": 0.45050930976867676
    },
    {
      "epoch": 9.287109375e-05,
      "model_forward_time": 0.1148686408996582,
      "step": 15216
    },
    {
      "epoch": 9.287109375e-05,
      "step": 15216,
      "training_step_time": 0.45818281173706055
    },
    {
      "epoch": 9.2877197265625e-05,
      "model_forward_time": 0.11557745933532715,
      "step": 15217
    },
    {
      "epoch": 9.2877197265625e-05,
      "step": 15217,
      "training_step_time": 0.3955225944519043
    },
    {
      "epoch": 9.288330078125e-05,
      "model_forward_time": 0.11432528495788574,
      "step": 15218
    },
    {
      "epoch": 9.288330078125e-05,
      "step": 15218,
      "training_step_time": 0.39371538162231445
    },
    {
      "epoch": 9.2889404296875e-05,
      "model_forward_time": 0.11579346656799316,
      "step": 15219
    },
    {
      "epoch": 9.2889404296875e-05,
      "step": 15219,
      "training_step_time": 0.4622049331665039
    },
    {
      "epoch": 9.28955078125e-05,
      "grad_norm": 0.1282314658164978,
      "learning_rate": 8.908175388413304e-05,
      "loss": 0.0571,
      "step": 15220
    },
    {
      "epoch": 9.28955078125e-05,
      "model_forward_time": 0.1151437759399414,
      "step": 15220
    },
    {
      "epoch": 9.28955078125e-05,
      "step": 15220,
      "training_step_time": 0.3864288330078125
    },
    {
      "epoch": 9.2901611328125e-05,
      "model_forward_time": 0.1147770881652832,
      "step": 15221
    },
    {
      "epoch": 9.2901611328125e-05,
      "step": 15221,
      "training_step_time": 0.38124918937683105
    },
    {
      "epoch": 9.290771484375e-05,
      "model_forward_time": 0.1147315502166748,
      "step": 15222
    },
    {
      "epoch": 9.290771484375e-05,
      "step": 15222,
      "training_step_time": 0.4260988235473633
    },
    {
      "epoch": 9.2913818359375e-05,
      "model_forward_time": 0.11513376235961914,
      "step": 15223
    },
    {
      "epoch": 9.2913818359375e-05,
      "step": 15223,
      "training_step_time": 0.3971061706542969
    },
    {
      "epoch": 9.2919921875e-05,
      "model_forward_time": 0.1153712272644043,
      "step": 15224
    },
    {
      "epoch": 9.2919921875e-05,
      "step": 15224,
      "training_step_time": 0.42645764350891113
    },
    {
      "epoch": 9.2926025390625e-05,
      "model_forward_time": 0.11522030830383301,
      "step": 15225
    },
    {
      "epoch": 9.2926025390625e-05,
      "step": 15225,
      "training_step_time": 0.7874224185943604
    },
    {
      "epoch": 9.293212890625e-05,
      "model_forward_time": 0.11502599716186523,
      "step": 15226
    },
    {
      "epoch": 9.293212890625e-05,
      "step": 15226,
      "training_step_time": 0.43973445892333984
    },
    {
      "epoch": 9.2938232421875e-05,
      "model_forward_time": 0.11431479454040527,
      "step": 15227
    },
    {
      "epoch": 9.2938232421875e-05,
      "step": 15227,
      "training_step_time": 0.38001537322998047
    },
    {
      "epoch": 9.29443359375e-05,
      "model_forward_time": 0.11407685279846191,
      "step": 15228
    },
    {
      "epoch": 9.29443359375e-05,
      "step": 15228,
      "training_step_time": 0.41416382789611816
    },
    {
      "epoch": 9.2950439453125e-05,
      "model_forward_time": 0.11435127258300781,
      "step": 15229
    },
    {
      "epoch": 9.2950439453125e-05,
      "step": 15229,
      "training_step_time": 0.42694520950317383
    },
    {
      "epoch": 9.295654296875e-05,
      "grad_norm": 0.22542771697044373,
      "learning_rate": 8.906455913799538e-05,
      "loss": 0.0571,
      "step": 15230
    },
    {
      "epoch": 9.295654296875e-05,
      "model_forward_time": 0.11514401435852051,
      "step": 15230
    },
    {
      "epoch": 9.295654296875e-05,
      "step": 15230,
      "training_step_time": 0.5085933208465576
    },
    {
      "epoch": 9.2962646484375e-05,
      "model_forward_time": 0.11508393287658691,
      "step": 15231
    },
    {
      "epoch": 9.2962646484375e-05,
      "step": 15231,
      "training_step_time": 0.4413278102874756
    },
    {
      "epoch": 9.296875e-05,
      "model_forward_time": 0.11563682556152344,
      "step": 15232
    },
    {
      "epoch": 9.296875e-05,
      "step": 15232,
      "training_step_time": 0.3833436965942383
    },
    {
      "epoch": 9.2974853515625e-05,
      "model_forward_time": 0.1152031421661377,
      "step": 15233
    },
    {
      "epoch": 9.2974853515625e-05,
      "step": 15233,
      "training_step_time": 0.3952932357788086
    },
    {
      "epoch": 9.298095703125e-05,
      "model_forward_time": 0.11467814445495605,
      "step": 15234
    },
    {
      "epoch": 9.298095703125e-05,
      "step": 15234,
      "training_step_time": 0.4048750400543213
    },
    {
      "epoch": 9.2987060546875e-05,
      "model_forward_time": 0.11546897888183594,
      "step": 15235
    },
    {
      "epoch": 9.2987060546875e-05,
      "step": 15235,
      "training_step_time": 0.3938612937927246
    },
    {
      "epoch": 9.29931640625e-05,
      "model_forward_time": 0.11536693572998047,
      "step": 15236
    },
    {
      "epoch": 9.29931640625e-05,
      "step": 15236,
      "training_step_time": 0.4310574531555176
    },
    {
      "epoch": 9.2999267578125e-05,
      "model_forward_time": 0.11509847640991211,
      "step": 15237
    },
    {
      "epoch": 9.2999267578125e-05,
      "step": 15237,
      "training_step_time": 0.4859142303466797
    },
    {
      "epoch": 9.300537109375e-05,
      "model_forward_time": 0.11534953117370605,
      "step": 15238
    },
    {
      "epoch": 9.300537109375e-05,
      "step": 15238,
      "training_step_time": 0.4415299892425537
    },
    {
      "epoch": 9.3011474609375e-05,
      "model_forward_time": 0.11509180068969727,
      "step": 15239
    },
    {
      "epoch": 9.3011474609375e-05,
      "step": 15239,
      "training_step_time": 0.38192176818847656
    },
    {
      "epoch": 9.3017578125e-05,
      "grad_norm": 0.2110159546136856,
      "learning_rate": 8.90473525250761e-05,
      "loss": 0.0584,
      "step": 15240
    },
    {
      "epoch": 9.3017578125e-05,
      "model_forward_time": 0.11539626121520996,
      "step": 15240
    },
    {
      "epoch": 9.3017578125e-05,
      "step": 15240,
      "training_step_time": 0.46424436569213867
    },
    {
      "epoch": 9.3023681640625e-05,
      "model_forward_time": 0.11540961265563965,
      "step": 15241
    },
    {
      "epoch": 9.3023681640625e-05,
      "step": 15241,
      "training_step_time": 0.41971540451049805
    },
    {
      "epoch": 9.302978515625e-05,
      "model_forward_time": 0.11415219306945801,
      "step": 15242
    },
    {
      "epoch": 9.302978515625e-05,
      "step": 15242,
      "training_step_time": 0.42998528480529785
    },
    {
      "epoch": 9.3035888671875e-05,
      "model_forward_time": 0.11547636985778809,
      "step": 15243
    },
    {
      "epoch": 9.3035888671875e-05,
      "step": 15243,
      "training_step_time": 0.39357900619506836
    },
    {
      "epoch": 9.30419921875e-05,
      "model_forward_time": 0.11483478546142578,
      "step": 15244
    },
    {
      "epoch": 9.30419921875e-05,
      "step": 15244,
      "training_step_time": 0.41190004348754883
    },
    {
      "epoch": 9.3048095703125e-05,
      "model_forward_time": 0.11501526832580566,
      "step": 15245
    },
    {
      "epoch": 9.3048095703125e-05,
      "step": 15245,
      "training_step_time": 0.42523694038391113
    },
    {
      "epoch": 9.305419921875e-05,
      "model_forward_time": 0.11526775360107422,
      "step": 15246
    },
    {
      "epoch": 9.305419921875e-05,
      "step": 15246,
      "training_step_time": 0.39554476737976074
    },
    {
      "epoch": 9.3060302734375e-05,
      "model_forward_time": 0.11486124992370605,
      "step": 15247
    },
    {
      "epoch": 9.3060302734375e-05,
      "step": 15247,
      "training_step_time": 0.39410877227783203
    },
    {
      "epoch": 9.306640625e-05,
      "model_forward_time": 0.11506319046020508,
      "step": 15248
    },
    {
      "epoch": 9.306640625e-05,
      "step": 15248,
      "training_step_time": 0.3868436813354492
    },
    {
      "epoch": 9.3072509765625e-05,
      "model_forward_time": 0.11532926559448242,
      "step": 15249
    },
    {
      "epoch": 9.3072509765625e-05,
      "step": 15249,
      "training_step_time": 0.6696255207061768
    },
    {
      "epoch": 9.307861328125e-05,
      "grad_norm": 0.1994684338569641,
      "learning_rate": 8.903013405060211e-05,
      "loss": 0.0593,
      "step": 15250
    },
    {
      "epoch": 9.307861328125e-05,
      "model_forward_time": 0.11535429954528809,
      "step": 15250
    },
    {
      "epoch": 9.307861328125e-05,
      "step": 15250,
      "training_step_time": 0.3880026340484619
    },
    {
      "epoch": 9.3084716796875e-05,
      "model_forward_time": 0.11480998992919922,
      "step": 15251
    },
    {
      "epoch": 9.3084716796875e-05,
      "step": 15251,
      "training_step_time": 0.3854801654815674
    },
    {
      "epoch": 9.30908203125e-05,
      "model_forward_time": 0.11491537094116211,
      "step": 15252
    },
    {
      "epoch": 9.30908203125e-05,
      "step": 15252,
      "training_step_time": 0.40094709396362305
    },
    {
      "epoch": 9.3096923828125e-05,
      "model_forward_time": 0.11565780639648438,
      "step": 15253
    },
    {
      "epoch": 9.3096923828125e-05,
      "step": 15253,
      "training_step_time": 0.389693021774292
    },
    {
      "epoch": 9.310302734375e-05,
      "model_forward_time": 0.11474227905273438,
      "step": 15254
    },
    {
      "epoch": 9.310302734375e-05,
      "step": 15254,
      "training_step_time": 0.39305543899536133
    },
    {
      "epoch": 9.3109130859375e-05,
      "model_forward_time": 0.11510205268859863,
      "step": 15255
    },
    {
      "epoch": 9.3109130859375e-05,
      "step": 15255,
      "training_step_time": 0.7643697261810303
    },
    {
      "epoch": 9.3115234375e-05,
      "model_forward_time": 0.11476302146911621,
      "step": 15256
    },
    {
      "epoch": 9.3115234375e-05,
      "step": 15256,
      "training_step_time": 0.4361443519592285
    },
    {
      "epoch": 9.3121337890625e-05,
      "model_forward_time": 0.11447668075561523,
      "step": 15257
    },
    {
      "epoch": 9.3121337890625e-05,
      "step": 15257,
      "training_step_time": 0.36560988426208496
    },
    {
      "epoch": 9.312744140625e-05,
      "model_forward_time": 0.11426711082458496,
      "step": 15258
    },
    {
      "epoch": 9.312744140625e-05,
      "step": 15258,
      "training_step_time": 0.4410736560821533
    },
    {
      "epoch": 9.3133544921875e-05,
      "model_forward_time": 0.11403751373291016,
      "step": 15259
    },
    {
      "epoch": 9.3133544921875e-05,
      "step": 15259,
      "training_step_time": 0.48720812797546387
    },
    {
      "epoch": 9.31396484375e-05,
      "grad_norm": 0.19878259301185608,
      "learning_rate": 8.901290371980393e-05,
      "loss": 0.0559,
      "step": 15260
    },
    {
      "epoch": 9.31396484375e-05,
      "model_forward_time": 0.11504244804382324,
      "step": 15260
    },
    {
      "epoch": 9.31396484375e-05,
      "step": 15260,
      "training_step_time": 0.39160728454589844
    },
    {
      "epoch": 9.3145751953125e-05,
      "model_forward_time": 0.11461591720581055,
      "step": 15261
    },
    {
      "epoch": 9.3145751953125e-05,
      "step": 15261,
      "training_step_time": 0.47376513481140137
    },
    {
      "epoch": 9.315185546875e-05,
      "model_forward_time": 0.1150970458984375,
      "step": 15262
    },
    {
      "epoch": 9.315185546875e-05,
      "step": 15262,
      "training_step_time": 0.42000460624694824
    },
    {
      "epoch": 9.3157958984375e-05,
      "model_forward_time": 0.11485838890075684,
      "step": 15263
    },
    {
      "epoch": 9.3157958984375e-05,
      "step": 15263,
      "training_step_time": 0.3924999237060547
    },
    {
      "epoch": 9.31640625e-05,
      "model_forward_time": 0.11493563652038574,
      "step": 15264
    },
    {
      "epoch": 9.31640625e-05,
      "step": 15264,
      "training_step_time": 0.49317240715026855
    },
    {
      "epoch": 9.3170166015625e-05,
      "model_forward_time": 0.11494922637939453,
      "step": 15265
    },
    {
      "epoch": 9.3170166015625e-05,
      "step": 15265,
      "training_step_time": 0.42264771461486816
    },
    {
      "epoch": 9.317626953125e-05,
      "model_forward_time": 0.11554479598999023,
      "step": 15266
    },
    {
      "epoch": 9.317626953125e-05,
      "step": 15266,
      "training_step_time": 0.3923201560974121
    },
    {
      "epoch": 9.3182373046875e-05,
      "model_forward_time": 0.11602926254272461,
      "step": 15267
    },
    {
      "epoch": 9.3182373046875e-05,
      "step": 15267,
      "training_step_time": 0.52838134765625
    },
    {
      "epoch": 9.31884765625e-05,
      "model_forward_time": 0.1143958568572998,
      "step": 15268
    },
    {
      "epoch": 9.31884765625e-05,
      "step": 15268,
      "training_step_time": 0.47125673294067383
    },
    {
      "epoch": 9.3194580078125e-05,
      "model_forward_time": 0.1145467758178711,
      "step": 15269
    },
    {
      "epoch": 9.3194580078125e-05,
      "step": 15269,
      "training_step_time": 0.4169270992279053
    },
    {
      "epoch": 9.320068359375e-05,
      "grad_norm": 0.20355042815208435,
      "learning_rate": 8.899566153791566e-05,
      "loss": 0.0612,
      "step": 15270
    },
    {
      "epoch": 9.320068359375e-05,
      "model_forward_time": 0.11455297470092773,
      "step": 15270
    },
    {
      "epoch": 9.320068359375e-05,
      "step": 15270,
      "training_step_time": 0.4789113998413086
    },
    {
      "epoch": 9.3206787109375e-05,
      "model_forward_time": 0.11492776870727539,
      "step": 15271
    },
    {
      "epoch": 9.3206787109375e-05,
      "step": 15271,
      "training_step_time": 0.3702256679534912
    },
    {
      "epoch": 9.3212890625e-05,
      "model_forward_time": 0.11441993713378906,
      "step": 15272
    },
    {
      "epoch": 9.3212890625e-05,
      "step": 15272,
      "training_step_time": 0.40308690071105957
    },
    {
      "epoch": 9.3218994140625e-05,
      "model_forward_time": 0.11464643478393555,
      "step": 15273
    },
    {
      "epoch": 9.3218994140625e-05,
      "step": 15273,
      "training_step_time": 0.45376038551330566
    },
    {
      "epoch": 9.322509765625e-05,
      "model_forward_time": 0.1149759292602539,
      "step": 15274
    },
    {
      "epoch": 9.322509765625e-05,
      "step": 15274,
      "training_step_time": 0.41002655029296875
    },
    {
      "epoch": 9.3231201171875e-05,
      "model_forward_time": 0.1143038272857666,
      "step": 15275
    },
    {
      "epoch": 9.3231201171875e-05,
      "step": 15275,
      "training_step_time": 0.3915383815765381
    },
    {
      "epoch": 9.32373046875e-05,
      "model_forward_time": 0.11452174186706543,
      "step": 15276
    },
    {
      "epoch": 9.32373046875e-05,
      "step": 15276,
      "training_step_time": 0.40041685104370117
    },
    {
      "epoch": 9.3243408203125e-05,
      "model_forward_time": 0.1149587631225586,
      "step": 15277
    },
    {
      "epoch": 9.3243408203125e-05,
      "step": 15277,
      "training_step_time": 0.4056663513183594
    },
    {
      "epoch": 9.324951171875e-05,
      "model_forward_time": 0.1145632266998291,
      "step": 15278
    },
    {
      "epoch": 9.324951171875e-05,
      "step": 15278,
      "training_step_time": 0.49935436248779297
    },
    {
      "epoch": 9.3255615234375e-05,
      "model_forward_time": 0.11522054672241211,
      "step": 15279
    },
    {
      "epoch": 9.3255615234375e-05,
      "step": 15279,
      "training_step_time": 0.6358211040496826
    },
    {
      "epoch": 9.326171875e-05,
      "grad_norm": 0.19240164756774902,
      "learning_rate": 8.897840751017506e-05,
      "loss": 0.0568,
      "step": 15280
    },
    {
      "epoch": 9.326171875e-05,
      "model_forward_time": 0.11538267135620117,
      "step": 15280
    },
    {
      "epoch": 9.326171875e-05,
      "step": 15280,
      "training_step_time": 0.3981015682220459
    },
    {
      "epoch": 9.3267822265625e-05,
      "model_forward_time": 0.1145319938659668,
      "step": 15281
    },
    {
      "epoch": 9.3267822265625e-05,
      "step": 15281,
      "training_step_time": 0.3939487934112549
    },
    {
      "epoch": 9.327392578125e-05,
      "model_forward_time": 0.1147301197052002,
      "step": 15282
    },
    {
      "epoch": 9.327392578125e-05,
      "step": 15282,
      "training_step_time": 0.4563729763031006
    },
    {
      "epoch": 9.3280029296875e-05,
      "model_forward_time": 0.11437034606933594,
      "step": 15283
    },
    {
      "epoch": 9.3280029296875e-05,
      "step": 15283,
      "training_step_time": 0.3949403762817383
    },
    {
      "epoch": 9.32861328125e-05,
      "model_forward_time": 0.11437726020812988,
      "step": 15284
    },
    {
      "epoch": 9.32861328125e-05,
      "step": 15284,
      "training_step_time": 0.40845417976379395
    },
    {
      "epoch": 9.3292236328125e-05,
      "model_forward_time": 0.11533117294311523,
      "step": 15285
    },
    {
      "epoch": 9.3292236328125e-05,
      "step": 15285,
      "training_step_time": 0.4781794548034668
    },
    {
      "epoch": 9.329833984375e-05,
      "model_forward_time": 0.11538529396057129,
      "step": 15286
    },
    {
      "epoch": 9.329833984375e-05,
      "step": 15286,
      "training_step_time": 0.42968225479125977
    },
    {
      "epoch": 9.3304443359375e-05,
      "model_forward_time": 0.11487364768981934,
      "step": 15287
    },
    {
      "epoch": 9.3304443359375e-05,
      "step": 15287,
      "training_step_time": 0.4661076068878174
    },
    {
      "epoch": 9.3310546875e-05,
      "model_forward_time": 0.1147618293762207,
      "step": 15288
    },
    {
      "epoch": 9.3310546875e-05,
      "step": 15288,
      "training_step_time": 0.4411895275115967
    },
    {
      "epoch": 9.3316650390625e-05,
      "model_forward_time": 0.11484670639038086,
      "step": 15289
    },
    {
      "epoch": 9.3316650390625e-05,
      "step": 15289,
      "training_step_time": 0.40417933464050293
    },
    {
      "epoch": 9.332275390625e-05,
      "grad_norm": 0.18975532054901123,
      "learning_rate": 8.89611416418234e-05,
      "loss": 0.0597,
      "step": 15290
    },
    {
      "epoch": 9.332275390625e-05,
      "model_forward_time": 0.11423063278198242,
      "step": 15290
    },
    {
      "epoch": 9.332275390625e-05,
      "step": 15290,
      "training_step_time": 0.39467549324035645
    },
    {
      "epoch": 9.3328857421875e-05,
      "model_forward_time": 0.11513996124267578,
      "step": 15291
    },
    {
      "epoch": 9.3328857421875e-05,
      "step": 15291,
      "training_step_time": 0.5994024276733398
    },
    {
      "epoch": 9.33349609375e-05,
      "model_forward_time": 0.11591410636901855,
      "step": 15292
    },
    {
      "epoch": 9.33349609375e-05,
      "step": 15292,
      "training_step_time": 0.4718165397644043
    },
    {
      "epoch": 9.3341064453125e-05,
      "model_forward_time": 0.11433553695678711,
      "step": 15293
    },
    {
      "epoch": 9.3341064453125e-05,
      "step": 15293,
      "training_step_time": 0.42471909523010254
    },
    {
      "epoch": 9.334716796875e-05,
      "model_forward_time": 0.11470603942871094,
      "step": 15294
    },
    {
      "epoch": 9.334716796875e-05,
      "step": 15294,
      "training_step_time": 0.39601898193359375
    },
    {
      "epoch": 9.3353271484375e-05,
      "model_forward_time": 0.11465620994567871,
      "step": 15295
    },
    {
      "epoch": 9.3353271484375e-05,
      "step": 15295,
      "training_step_time": 0.39842796325683594
    },
    {
      "epoch": 9.3359375e-05,
      "model_forward_time": 0.11475396156311035,
      "step": 15296
    },
    {
      "epoch": 9.3359375e-05,
      "step": 15296,
      "training_step_time": 0.4195287227630615
    },
    {
      "epoch": 9.3365478515625e-05,
      "model_forward_time": 0.11500191688537598,
      "step": 15297
    },
    {
      "epoch": 9.3365478515625e-05,
      "step": 15297,
      "training_step_time": 0.6813759803771973
    },
    {
      "epoch": 9.337158203125e-05,
      "model_forward_time": 0.11512017250061035,
      "step": 15298
    },
    {
      "epoch": 9.337158203125e-05,
      "step": 15298,
      "training_step_time": 0.38834285736083984
    },
    {
      "epoch": 9.3377685546875e-05,
      "model_forward_time": 0.11496591567993164,
      "step": 15299
    },
    {
      "epoch": 9.3377685546875e-05,
      "step": 15299,
      "training_step_time": 0.4716973304748535
    },
    {
      "epoch": 9.33837890625e-05,
      "grad_norm": 0.2031819075345993,
      "learning_rate": 8.894386393810563e-05,
      "loss": 0.0565,
      "step": 15300
    },
    {
      "epoch": 9.33837890625e-05,
      "model_forward_time": 0.11417984962463379,
      "step": 15300
    },
    {
      "epoch": 9.33837890625e-05,
      "step": 15300,
      "training_step_time": 0.4060702323913574
    },
    {
      "epoch": 9.3389892578125e-05,
      "model_forward_time": 0.11470150947570801,
      "step": 15301
    },
    {
      "epoch": 9.3389892578125e-05,
      "step": 15301,
      "training_step_time": 0.45375728607177734
    },
    {
      "epoch": 9.339599609375e-05,
      "model_forward_time": 0.11498498916625977,
      "step": 15302
    },
    {
      "epoch": 9.339599609375e-05,
      "step": 15302,
      "training_step_time": 0.4266927242279053
    },
    {
      "epoch": 9.3402099609375e-05,
      "model_forward_time": 0.11456894874572754,
      "step": 15303
    },
    {
      "epoch": 9.3402099609375e-05,
      "step": 15303,
      "training_step_time": 0.6342530250549316
    },
    {
      "epoch": 9.3408203125e-05,
      "model_forward_time": 0.11425352096557617,
      "step": 15304
    },
    {
      "epoch": 9.3408203125e-05,
      "step": 15304,
      "training_step_time": 0.39856839179992676
    },
    {
      "epoch": 9.3414306640625e-05,
      "model_forward_time": 0.1140897274017334,
      "step": 15305
    },
    {
      "epoch": 9.3414306640625e-05,
      "step": 15305,
      "training_step_time": 0.5213725566864014
    },
    {
      "epoch": 9.342041015625e-05,
      "model_forward_time": 0.1145925521850586,
      "step": 15306
    },
    {
      "epoch": 9.342041015625e-05,
      "step": 15306,
      "training_step_time": 0.4336578845977783
    },
    {
      "epoch": 9.3426513671875e-05,
      "model_forward_time": 0.11445069313049316,
      "step": 15307
    },
    {
      "epoch": 9.3426513671875e-05,
      "step": 15307,
      "training_step_time": 0.3926405906677246
    },
    {
      "epoch": 9.34326171875e-05,
      "model_forward_time": 0.11407852172851562,
      "step": 15308
    },
    {
      "epoch": 9.34326171875e-05,
      "step": 15308,
      "training_step_time": 0.3905904293060303
    },
    {
      "epoch": 9.3438720703125e-05,
      "model_forward_time": 0.11436820030212402,
      "step": 15309
    },
    {
      "epoch": 9.3438720703125e-05,
      "step": 15309,
      "training_step_time": 0.4338696002960205
    },
    {
      "epoch": 9.344482421875e-05,
      "grad_norm": 0.21118496358394623,
      "learning_rate": 8.892657440427025e-05,
      "loss": 0.0624,
      "step": 15310
    },
    {
      "epoch": 9.344482421875e-05,
      "model_forward_time": 0.11505365371704102,
      "step": 15310
    },
    {
      "epoch": 9.344482421875e-05,
      "step": 15310,
      "training_step_time": 0.4286918640136719
    },
    {
      "epoch": 9.3450927734375e-05,
      "model_forward_time": 0.11490559577941895,
      "step": 15311
    },
    {
      "epoch": 9.3450927734375e-05,
      "step": 15311,
      "training_step_time": 0.3948657512664795
    },
    {
      "epoch": 9.345703125e-05,
      "model_forward_time": 0.11520171165466309,
      "step": 15312
    },
    {
      "epoch": 9.345703125e-05,
      "step": 15312,
      "training_step_time": 0.401869535446167
    },
    {
      "epoch": 9.3463134765625e-05,
      "model_forward_time": 0.11502647399902344,
      "step": 15313
    },
    {
      "epoch": 9.3463134765625e-05,
      "step": 15313,
      "training_step_time": 0.3651149272918701
    },
    {
      "epoch": 9.346923828125e-05,
      "model_forward_time": 0.11508822441101074,
      "step": 15314
    },
    {
      "epoch": 9.346923828125e-05,
      "step": 15314,
      "training_step_time": 0.44892191886901855
    },
    {
      "epoch": 9.3475341796875e-05,
      "model_forward_time": 0.11507225036621094,
      "step": 15315
    },
    {
      "epoch": 9.3475341796875e-05,
      "step": 15315,
      "training_step_time": 0.5687158107757568
    },
    {
      "epoch": 9.34814453125e-05,
      "model_forward_time": 0.11495041847229004,
      "step": 15316
    },
    {
      "epoch": 9.34814453125e-05,
      "step": 15316,
      "training_step_time": 0.4007914066314697
    },
    {
      "epoch": 9.3487548828125e-05,
      "model_forward_time": 0.11463570594787598,
      "step": 15317
    },
    {
      "epoch": 9.3487548828125e-05,
      "step": 15317,
      "training_step_time": 0.380601167678833
    },
    {
      "epoch": 9.349365234375e-05,
      "model_forward_time": 0.11430215835571289,
      "step": 15318
    },
    {
      "epoch": 9.349365234375e-05,
      "step": 15318,
      "training_step_time": 0.3910353183746338
    },
    {
      "epoch": 9.3499755859375e-05,
      "model_forward_time": 0.11456775665283203,
      "step": 15319
    },
    {
      "epoch": 9.3499755859375e-05,
      "step": 15319,
      "training_step_time": 0.457080602645874
    },
    {
      "epoch": 9.3505859375e-05,
      "grad_norm": 0.24865080416202545,
      "learning_rate": 8.890927304556935e-05,
      "loss": 0.0665,
      "step": 15320
    },
    {
      "epoch": 9.3505859375e-05,
      "model_forward_time": 0.11502552032470703,
      "step": 15320
    },
    {
      "epoch": 9.3505859375e-05,
      "step": 15320,
      "training_step_time": 0.3992643356323242
    },
    {
      "epoch": 9.3511962890625e-05,
      "model_forward_time": 0.11532807350158691,
      "step": 15321
    },
    {
      "epoch": 9.3511962890625e-05,
      "step": 15321,
      "training_step_time": 0.7529423236846924
    },
    {
      "epoch": 9.351806640625e-05,
      "model_forward_time": 0.1144247055053711,
      "step": 15322
    },
    {
      "epoch": 9.351806640625e-05,
      "step": 15322,
      "training_step_time": 0.39403629302978516
    },
    {
      "epoch": 9.3524169921875e-05,
      "model_forward_time": 0.11398482322692871,
      "step": 15323
    },
    {
      "epoch": 9.3524169921875e-05,
      "step": 15323,
      "training_step_time": 0.39383816719055176
    },
    {
      "epoch": 9.35302734375e-05,
      "model_forward_time": 0.11429977416992188,
      "step": 15324
    },
    {
      "epoch": 9.35302734375e-05,
      "step": 15324,
      "training_step_time": 0.44032955169677734
    },
    {
      "epoch": 9.3536376953125e-05,
      "model_forward_time": 0.11489009857177734,
      "step": 15325
    },
    {
      "epoch": 9.3536376953125e-05,
      "step": 15325,
      "training_step_time": 0.41886019706726074
    },
    {
      "epoch": 9.354248046875e-05,
      "model_forward_time": 0.11408185958862305,
      "step": 15326
    },
    {
      "epoch": 9.354248046875e-05,
      "step": 15326,
      "training_step_time": 0.40174221992492676
    },
    {
      "epoch": 9.3548583984375e-05,
      "model_forward_time": 0.11570620536804199,
      "step": 15327
    },
    {
      "epoch": 9.3548583984375e-05,
      "step": 15327,
      "training_step_time": 0.6042087078094482
    },
    {
      "epoch": 9.35546875e-05,
      "model_forward_time": 0.11487698554992676,
      "step": 15328
    },
    {
      "epoch": 9.35546875e-05,
      "step": 15328,
      "training_step_time": 0.41106271743774414
    },
    {
      "epoch": 9.3560791015625e-05,
      "model_forward_time": 0.11452221870422363,
      "step": 15329
    },
    {
      "epoch": 9.3560791015625e-05,
      "step": 15329,
      "training_step_time": 0.47280263900756836
    },
    {
      "epoch": 9.356689453125e-05,
      "grad_norm": 0.28078460693359375,
      "learning_rate": 8.889195986725865e-05,
      "loss": 0.0606,
      "step": 15330
    },
    {
      "epoch": 9.356689453125e-05,
      "model_forward_time": 0.11473894119262695,
      "step": 15330
    },
    {
      "epoch": 9.356689453125e-05,
      "step": 15330,
      "training_step_time": 0.39730024337768555
    },
    {
      "epoch": 9.3572998046875e-05,
      "model_forward_time": 0.11429333686828613,
      "step": 15331
    },
    {
      "epoch": 9.3572998046875e-05,
      "step": 15331,
      "training_step_time": 0.38512611389160156
    },
    {
      "epoch": 9.35791015625e-05,
      "model_forward_time": 0.11410856246948242,
      "step": 15332
    },
    {
      "epoch": 9.35791015625e-05,
      "step": 15332,
      "training_step_time": 0.4781630039215088
    },
    {
      "epoch": 9.3585205078125e-05,
      "model_forward_time": 0.11438608169555664,
      "step": 15333
    },
    {
      "epoch": 9.3585205078125e-05,
      "step": 15333,
      "training_step_time": 0.6414108276367188
    },
    {
      "epoch": 9.359130859375e-05,
      "model_forward_time": 0.11412596702575684,
      "step": 15334
    },
    {
      "epoch": 9.359130859375e-05,
      "step": 15334,
      "training_step_time": 0.4474067687988281
    },
    {
      "epoch": 9.3597412109375e-05,
      "model_forward_time": 0.11418771743774414,
      "step": 15335
    },
    {
      "epoch": 9.3597412109375e-05,
      "step": 15335,
      "training_step_time": 0.39697861671447754
    },
    {
      "epoch": 9.3603515625e-05,
      "model_forward_time": 0.11374855041503906,
      "step": 15336
    },
    {
      "epoch": 9.3603515625e-05,
      "step": 15336,
      "training_step_time": 0.3915116786956787
    },
    {
      "epoch": 9.3609619140625e-05,
      "model_forward_time": 0.1145634651184082,
      "step": 15337
    },
    {
      "epoch": 9.3609619140625e-05,
      "step": 15337,
      "training_step_time": 0.4132401943206787
    },
    {
      "epoch": 9.361572265625e-05,
      "model_forward_time": 0.11480498313903809,
      "step": 15338
    },
    {
      "epoch": 9.361572265625e-05,
      "step": 15338,
      "training_step_time": 0.41656994819641113
    },
    {
      "epoch": 9.3621826171875e-05,
      "model_forward_time": 0.11447644233703613,
      "step": 15339
    },
    {
      "epoch": 9.3621826171875e-05,
      "step": 15339,
      "training_step_time": 0.6018173694610596
    },
    {
      "epoch": 9.36279296875e-05,
      "grad_norm": 0.261919766664505,
      "learning_rate": 8.887463487459742e-05,
      "loss": 0.0593,
      "step": 15340
    },
    {
      "epoch": 9.36279296875e-05,
      "model_forward_time": 0.1145472526550293,
      "step": 15340
    },
    {
      "epoch": 9.36279296875e-05,
      "step": 15340,
      "training_step_time": 0.4038882255554199
    },
    {
      "epoch": 9.3634033203125e-05,
      "model_forward_time": 0.1146085262298584,
      "step": 15341
    },
    {
      "epoch": 9.3634033203125e-05,
      "step": 15341,
      "training_step_time": 0.39790821075439453
    },
    {
      "epoch": 9.364013671875e-05,
      "model_forward_time": 0.11505126953125,
      "step": 15342
    },
    {
      "epoch": 9.364013671875e-05,
      "step": 15342,
      "training_step_time": 0.4505488872528076
    },
    {
      "epoch": 9.3646240234375e-05,
      "model_forward_time": 0.11484003067016602,
      "step": 15343
    },
    {
      "epoch": 9.3646240234375e-05,
      "step": 15343,
      "training_step_time": 0.4817500114440918
    },
    {
      "epoch": 9.365234375e-05,
      "model_forward_time": 0.11460995674133301,
      "step": 15344
    },
    {
      "epoch": 9.365234375e-05,
      "step": 15344,
      "training_step_time": 0.38968777656555176
    },
    {
      "epoch": 9.3658447265625e-05,
      "model_forward_time": 0.1144869327545166,
      "step": 15345
    },
    {
      "epoch": 9.3658447265625e-05,
      "step": 15345,
      "training_step_time": 0.49431419372558594
    },
    {
      "epoch": 9.366455078125e-05,
      "model_forward_time": 0.11514830589294434,
      "step": 15346
    },
    {
      "epoch": 9.366455078125e-05,
      "step": 15346,
      "training_step_time": 0.4390408992767334
    },
    {
      "epoch": 9.3670654296875e-05,
      "model_forward_time": 0.11455225944519043,
      "step": 15347
    },
    {
      "epoch": 9.3670654296875e-05,
      "step": 15347,
      "training_step_time": 0.3936944007873535
    },
    {
      "epoch": 9.36767578125e-05,
      "model_forward_time": 0.11465311050415039,
      "step": 15348
    },
    {
      "epoch": 9.36767578125e-05,
      "step": 15348,
      "training_step_time": 0.4153439998626709
    },
    {
      "epoch": 9.3682861328125e-05,
      "model_forward_time": 0.11523795127868652,
      "step": 15349
    },
    {
      "epoch": 9.3682861328125e-05,
      "step": 15349,
      "training_step_time": 0.402148962020874
    },
    {
      "epoch": 9.368896484375e-05,
      "grad_norm": 0.13054624199867249,
      "learning_rate": 8.885729807284856e-05,
      "loss": 0.0551,
      "step": 15350
    },
    {
      "epoch": 9.368896484375e-05,
      "model_forward_time": 0.11534476280212402,
      "step": 15350
    },
    {
      "epoch": 9.368896484375e-05,
      "step": 15350,
      "training_step_time": 0.38397836685180664
    },
    {
      "epoch": 9.3695068359375e-05,
      "model_forward_time": 0.11584758758544922,
      "step": 15351
    },
    {
      "epoch": 9.3695068359375e-05,
      "step": 15351,
      "training_step_time": 0.6863009929656982
    },
    {
      "epoch": 9.3701171875e-05,
      "model_forward_time": 0.11508297920227051,
      "step": 15352
    },
    {
      "epoch": 9.3701171875e-05,
      "step": 15352,
      "training_step_time": 0.4066798686981201
    },
    {
      "epoch": 9.3707275390625e-05,
      "model_forward_time": 0.11433768272399902,
      "step": 15353
    },
    {
      "epoch": 9.3707275390625e-05,
      "step": 15353,
      "training_step_time": 0.46550679206848145
    },
    {
      "epoch": 9.371337890625e-05,
      "model_forward_time": 0.11480188369750977,
      "step": 15354
    },
    {
      "epoch": 9.371337890625e-05,
      "step": 15354,
      "training_step_time": 0.40473175048828125
    },
    {
      "epoch": 9.3719482421875e-05,
      "model_forward_time": 0.11373448371887207,
      "step": 15355
    },
    {
      "epoch": 9.3719482421875e-05,
      "step": 15355,
      "training_step_time": 0.45429277420043945
    },
    {
      "epoch": 9.37255859375e-05,
      "model_forward_time": 0.11513113975524902,
      "step": 15356
    },
    {
      "epoch": 9.37255859375e-05,
      "step": 15356,
      "training_step_time": 0.4567713737487793
    },
    {
      "epoch": 9.3731689453125e-05,
      "model_forward_time": 0.11454176902770996,
      "step": 15357
    },
    {
      "epoch": 9.3731689453125e-05,
      "step": 15357,
      "training_step_time": 0.5176956653594971
    },
    {
      "epoch": 9.373779296875e-05,
      "model_forward_time": 0.1150503158569336,
      "step": 15358
    },
    {
      "epoch": 9.373779296875e-05,
      "step": 15358,
      "training_step_time": 0.39617180824279785
    },
    {
      "epoch": 9.3743896484375e-05,
      "model_forward_time": 0.11472344398498535,
      "step": 15359
    },
    {
      "epoch": 9.3743896484375e-05,
      "step": 15359,
      "training_step_time": 0.39066243171691895
    },
    {
      "epoch": 9.375e-05,
      "grad_norm": 0.17207522690296173,
      "learning_rate": 8.883994946727849e-05,
      "loss": 0.0546,
      "step": 15360
    },
    {
      "epoch": 9.375e-05,
      "model_forward_time": 0.11450076103210449,
      "step": 15360
    },
    {
      "epoch": 9.375e-05,
      "step": 15360,
      "training_step_time": 0.4149637222290039
    },
    {
      "epoch": 9.3756103515625e-05,
      "model_forward_time": 0.11491966247558594,
      "step": 15361
    },
    {
      "epoch": 9.3756103515625e-05,
      "step": 15361,
      "training_step_time": 0.4060335159301758
    },
    {
      "epoch": 9.376220703125e-05,
      "model_forward_time": 0.11495637893676758,
      "step": 15362
    },
    {
      "epoch": 9.376220703125e-05,
      "step": 15362,
      "training_step_time": 0.4825136661529541
    },
    {
      "epoch": 9.3768310546875e-05,
      "model_forward_time": 0.11480569839477539,
      "step": 15363
    },
    {
      "epoch": 9.3768310546875e-05,
      "step": 15363,
      "training_step_time": 0.6530511379241943
    },
    {
      "epoch": 9.37744140625e-05,
      "model_forward_time": 0.11462259292602539,
      "step": 15364
    },
    {
      "epoch": 9.37744140625e-05,
      "step": 15364,
      "training_step_time": 0.4376487731933594
    },
    {
      "epoch": 9.3780517578125e-05,
      "model_forward_time": 0.11522364616394043,
      "step": 15365
    },
    {
      "epoch": 9.3780517578125e-05,
      "step": 15365,
      "training_step_time": 0.4294114112854004
    },
    {
      "epoch": 9.378662109375e-05,
      "model_forward_time": 0.11485004425048828,
      "step": 15366
    },
    {
      "epoch": 9.378662109375e-05,
      "step": 15366,
      "training_step_time": 0.43323254585266113
    },
    {
      "epoch": 9.3792724609375e-05,
      "model_forward_time": 0.11498188972473145,
      "step": 15367
    },
    {
      "epoch": 9.3792724609375e-05,
      "step": 15367,
      "training_step_time": 0.39741015434265137
    },
    {
      "epoch": 9.3798828125e-05,
      "model_forward_time": 0.1147615909576416,
      "step": 15368
    },
    {
      "epoch": 9.3798828125e-05,
      "step": 15368,
      "training_step_time": 0.36672115325927734
    },
    {
      "epoch": 9.3804931640625e-05,
      "model_forward_time": 0.11425614356994629,
      "step": 15369
    },
    {
      "epoch": 9.3804931640625e-05,
      "step": 15369,
      "training_step_time": 0.663379430770874
    },
    {
      "epoch": 9.381103515625e-05,
      "grad_norm": 0.12360773235559464,
      "learning_rate": 8.882258906315729e-05,
      "loss": 0.0524,
      "step": 15370
    },
    {
      "epoch": 9.381103515625e-05,
      "model_forward_time": 0.11522030830383301,
      "step": 15370
    },
    {
      "epoch": 9.381103515625e-05,
      "step": 15370,
      "training_step_time": 0.4568941593170166
    },
    {
      "epoch": 9.3817138671875e-05,
      "model_forward_time": 0.1145162582397461,
      "step": 15371
    },
    {
      "epoch": 9.3817138671875e-05,
      "step": 15371,
      "training_step_time": 0.38782811164855957
    },
    {
      "epoch": 9.38232421875e-05,
      "model_forward_time": 0.11462831497192383,
      "step": 15372
    },
    {
      "epoch": 9.38232421875e-05,
      "step": 15372,
      "training_step_time": 0.39429354667663574
    },
    {
      "epoch": 9.3829345703125e-05,
      "model_forward_time": 0.11444544792175293,
      "step": 15373
    },
    {
      "epoch": 9.3829345703125e-05,
      "step": 15373,
      "training_step_time": 0.4134833812713623
    },
    {
      "epoch": 9.383544921875e-05,
      "model_forward_time": 0.11427140235900879,
      "step": 15374
    },
    {
      "epoch": 9.383544921875e-05,
      "step": 15374,
      "training_step_time": 0.463914155960083
    },
    {
      "epoch": 9.3841552734375e-05,
      "model_forward_time": 0.11471009254455566,
      "step": 15375
    },
    {
      "epoch": 9.3841552734375e-05,
      "step": 15375,
      "training_step_time": 0.5260617733001709
    },
    {
      "epoch": 9.384765625e-05,
      "model_forward_time": 0.11482048034667969,
      "step": 15376
    },
    {
      "epoch": 9.384765625e-05,
      "step": 15376,
      "training_step_time": 0.3877396583557129
    },
    {
      "epoch": 9.3853759765625e-05,
      "model_forward_time": 0.11525082588195801,
      "step": 15377
    },
    {
      "epoch": 9.3853759765625e-05,
      "step": 15377,
      "training_step_time": 0.40055179595947266
    },
    {
      "epoch": 9.385986328125e-05,
      "model_forward_time": 0.11486268043518066,
      "step": 15378
    },
    {
      "epoch": 9.385986328125e-05,
      "step": 15378,
      "training_step_time": 0.41052675247192383
    },
    {
      "epoch": 9.3865966796875e-05,
      "model_forward_time": 0.11466813087463379,
      "step": 15379
    },
    {
      "epoch": 9.3865966796875e-05,
      "step": 15379,
      "training_step_time": 0.41026878356933594
    },
    {
      "epoch": 9.38720703125e-05,
      "grad_norm": 0.1627439260482788,
      "learning_rate": 8.880521686575857e-05,
      "loss": 0.0546,
      "step": 15380
    },
    {
      "epoch": 9.38720703125e-05,
      "model_forward_time": 0.11628580093383789,
      "step": 15380
    },
    {
      "epoch": 9.38720703125e-05,
      "step": 15380,
      "training_step_time": 0.45394015312194824
    },
    {
      "epoch": 9.3878173828125e-05,
      "model_forward_time": 0.11531281471252441,
      "step": 15381
    },
    {
      "epoch": 9.3878173828125e-05,
      "step": 15381,
      "training_step_time": 0.558521032333374
    },
    {
      "epoch": 9.388427734375e-05,
      "model_forward_time": 0.11471700668334961,
      "step": 15382
    },
    {
      "epoch": 9.388427734375e-05,
      "step": 15382,
      "training_step_time": 0.43660521507263184
    },
    {
      "epoch": 9.3890380859375e-05,
      "model_forward_time": 0.11497020721435547,
      "step": 15383
    },
    {
      "epoch": 9.3890380859375e-05,
      "step": 15383,
      "training_step_time": 0.4107818603515625
    },
    {
      "epoch": 9.3896484375e-05,
      "model_forward_time": 0.11449384689331055,
      "step": 15384
    },
    {
      "epoch": 9.3896484375e-05,
      "step": 15384,
      "training_step_time": 0.38982582092285156
    },
    {
      "epoch": 9.3902587890625e-05,
      "model_forward_time": 0.11443185806274414,
      "step": 15385
    },
    {
      "epoch": 9.3902587890625e-05,
      "step": 15385,
      "training_step_time": 0.3886251449584961
    },
    {
      "epoch": 9.390869140625e-05,
      "model_forward_time": 0.11507487297058105,
      "step": 15386
    },
    {
      "epoch": 9.390869140625e-05,
      "step": 15386,
      "training_step_time": 0.3899812698364258
    },
    {
      "epoch": 9.3914794921875e-05,
      "model_forward_time": 0.11503410339355469,
      "step": 15387
    },
    {
      "epoch": 9.3914794921875e-05,
      "step": 15387,
      "training_step_time": 0.557666540145874
    },
    {
      "epoch": 9.39208984375e-05,
      "model_forward_time": 0.11522531509399414,
      "step": 15388
    },
    {
      "epoch": 9.39208984375e-05,
      "step": 15388,
      "training_step_time": 0.439439058303833
    },
    {
      "epoch": 9.3927001953125e-05,
      "model_forward_time": 0.11469340324401855,
      "step": 15389
    },
    {
      "epoch": 9.3927001953125e-05,
      "step": 15389,
      "training_step_time": 0.43558692932128906
    },
    {
      "epoch": 9.393310546875e-05,
      "grad_norm": 0.14894913136959076,
      "learning_rate": 8.878783288035957e-05,
      "loss": 0.0625,
      "step": 15390
    },
    {
      "epoch": 9.393310546875e-05,
      "model_forward_time": 0.11481571197509766,
      "step": 15390
    },
    {
      "epoch": 9.393310546875e-05,
      "step": 15390,
      "training_step_time": 0.40088963508605957
    },
    {
      "epoch": 9.3939208984375e-05,
      "model_forward_time": 0.11510348320007324,
      "step": 15391
    },
    {
      "epoch": 9.3939208984375e-05,
      "step": 15391,
      "training_step_time": 0.39817094802856445
    },
    {
      "epoch": 9.39453125e-05,
      "model_forward_time": 0.11470365524291992,
      "step": 15392
    },
    {
      "epoch": 9.39453125e-05,
      "step": 15392,
      "training_step_time": 0.3873310089111328
    },
    {
      "epoch": 9.3951416015625e-05,
      "model_forward_time": 0.11493754386901855,
      "step": 15393
    },
    {
      "epoch": 9.3951416015625e-05,
      "step": 15393,
      "training_step_time": 0.7941856384277344
    },
    {
      "epoch": 9.395751953125e-05,
      "model_forward_time": 0.11462783813476562,
      "step": 15394
    },
    {
      "epoch": 9.395751953125e-05,
      "step": 15394,
      "training_step_time": 0.3833954334259033
    },
    {
      "epoch": 9.3963623046875e-05,
      "model_forward_time": 0.11429452896118164,
      "step": 15395
    },
    {
      "epoch": 9.3963623046875e-05,
      "step": 15395,
      "training_step_time": 0.39638614654541016
    },
    {
      "epoch": 9.39697265625e-05,
      "model_forward_time": 0.1144704818725586,
      "step": 15396
    },
    {
      "epoch": 9.39697265625e-05,
      "step": 15396,
      "training_step_time": 0.39066004753112793
    },
    {
      "epoch": 9.3975830078125e-05,
      "model_forward_time": 0.11436772346496582,
      "step": 15397
    },
    {
      "epoch": 9.3975830078125e-05,
      "step": 15397,
      "training_step_time": 0.4457278251647949
    },
    {
      "epoch": 9.398193359375e-05,
      "model_forward_time": 0.11519479751586914,
      "step": 15398
    },
    {
      "epoch": 9.398193359375e-05,
      "step": 15398,
      "training_step_time": 0.47393298149108887
    },
    {
      "epoch": 9.3988037109375e-05,
      "model_forward_time": 0.11475658416748047,
      "step": 15399
    },
    {
      "epoch": 9.3988037109375e-05,
      "step": 15399,
      "training_step_time": 0.6400482654571533
    },
    {
      "epoch": 9.3994140625e-05,
      "grad_norm": 0.143758162856102,
      "learning_rate": 8.877043711224108e-05,
      "loss": 0.062,
      "step": 15400
    },
    {
      "epoch": 9.3994140625e-05,
      "model_forward_time": 0.11425662040710449,
      "step": 15400
    },
    {
      "epoch": 9.3994140625e-05,
      "step": 15400,
      "training_step_time": 0.42788147926330566
    },
    {
      "epoch": 9.4000244140625e-05,
      "model_forward_time": 0.11487746238708496,
      "step": 15401
    },
    {
      "epoch": 9.4000244140625e-05,
      "step": 15401,
      "training_step_time": 0.4591329097747803
    },
    {
      "epoch": 9.400634765625e-05,
      "model_forward_time": 0.11450529098510742,
      "step": 15402
    },
    {
      "epoch": 9.400634765625e-05,
      "step": 15402,
      "training_step_time": 0.4195854663848877
    },
    {
      "epoch": 9.4012451171875e-05,
      "model_forward_time": 0.11452150344848633,
      "step": 15403
    },
    {
      "epoch": 9.4012451171875e-05,
      "step": 15403,
      "training_step_time": 0.40948939323425293
    },
    {
      "epoch": 9.40185546875e-05,
      "model_forward_time": 0.11434221267700195,
      "step": 15404
    },
    {
      "epoch": 9.40185546875e-05,
      "step": 15404,
      "training_step_time": 0.41970205307006836
    },
    {
      "epoch": 9.4024658203125e-05,
      "model_forward_time": 0.11481404304504395,
      "step": 15405
    },
    {
      "epoch": 9.4024658203125e-05,
      "step": 15405,
      "training_step_time": 0.4677259922027588
    },
    {
      "epoch": 9.403076171875e-05,
      "model_forward_time": 0.11499381065368652,
      "step": 15406
    },
    {
      "epoch": 9.403076171875e-05,
      "step": 15406,
      "training_step_time": 0.48450732231140137
    },
    {
      "epoch": 9.4036865234375e-05,
      "model_forward_time": 0.1149446964263916,
      "step": 15407
    },
    {
      "epoch": 9.4036865234375e-05,
      "step": 15407,
      "training_step_time": 0.4888954162597656
    },
    {
      "epoch": 9.404296875e-05,
      "model_forward_time": 0.11441564559936523,
      "step": 15408
    },
    {
      "epoch": 9.404296875e-05,
      "step": 15408,
      "training_step_time": 0.3896803855895996
    },
    {
      "epoch": 9.4049072265625e-05,
      "model_forward_time": 0.11524248123168945,
      "step": 15409
    },
    {
      "epoch": 9.4049072265625e-05,
      "step": 15409,
      "training_step_time": 0.3963460922241211
    },
    {
      "epoch": 9.405517578125e-05,
      "grad_norm": 0.18338890373706818,
      "learning_rate": 8.875302956668747e-05,
      "loss": 0.054,
      "step": 15410
    },
    {
      "epoch": 9.405517578125e-05,
      "model_forward_time": 0.11500263214111328,
      "step": 15410
    },
    {
      "epoch": 9.405517578125e-05,
      "step": 15410,
      "training_step_time": 0.45143938064575195
    },
    {
      "epoch": 9.4061279296875e-05,
      "model_forward_time": 0.11443734169006348,
      "step": 15411
    },
    {
      "epoch": 9.4061279296875e-05,
      "step": 15411,
      "training_step_time": 0.46004152297973633
    },
    {
      "epoch": 9.40673828125e-05,
      "model_forward_time": 0.11507225036621094,
      "step": 15412
    },
    {
      "epoch": 9.40673828125e-05,
      "step": 15412,
      "training_step_time": 0.3896024227142334
    },
    {
      "epoch": 9.4073486328125e-05,
      "model_forward_time": 0.1151571273803711,
      "step": 15413
    },
    {
      "epoch": 9.4073486328125e-05,
      "step": 15413,
      "training_step_time": 0.3947734832763672
    },
    {
      "epoch": 9.407958984375e-05,
      "model_forward_time": 0.11475014686584473,
      "step": 15414
    },
    {
      "epoch": 9.407958984375e-05,
      "step": 15414,
      "training_step_time": 0.46306324005126953
    },
    {
      "epoch": 9.4085693359375e-05,
      "model_forward_time": 0.1146554946899414,
      "step": 15415
    },
    {
      "epoch": 9.4085693359375e-05,
      "step": 15415,
      "training_step_time": 0.48894166946411133
    },
    {
      "epoch": 9.4091796875e-05,
      "model_forward_time": 0.11454892158508301,
      "step": 15416
    },
    {
      "epoch": 9.4091796875e-05,
      "step": 15416,
      "training_step_time": 0.42565417289733887
    },
    {
      "epoch": 9.4097900390625e-05,
      "model_forward_time": 0.11425542831420898,
      "step": 15417
    },
    {
      "epoch": 9.4097900390625e-05,
      "step": 15417,
      "training_step_time": 0.5127396583557129
    },
    {
      "epoch": 9.410400390625e-05,
      "model_forward_time": 0.11541342735290527,
      "step": 15418
    },
    {
      "epoch": 9.410400390625e-05,
      "step": 15418,
      "training_step_time": 0.38437867164611816
    },
    {
      "epoch": 9.4110107421875e-05,
      "model_forward_time": 0.11528134346008301,
      "step": 15419
    },
    {
      "epoch": 9.4110107421875e-05,
      "step": 15419,
      "training_step_time": 0.4899446964263916
    },
    {
      "epoch": 9.41162109375e-05,
      "grad_norm": 0.1286918967962265,
      "learning_rate": 8.873561024898668e-05,
      "loss": 0.0584,
      "step": 15420
    },
    {
      "epoch": 9.41162109375e-05,
      "model_forward_time": 0.11383461952209473,
      "step": 15420
    },
    {
      "epoch": 9.41162109375e-05,
      "step": 15420,
      "training_step_time": 0.4494297504425049
    },
    {
      "epoch": 9.4122314453125e-05,
      "model_forward_time": 0.1145787239074707,
      "step": 15421
    },
    {
      "epoch": 9.4122314453125e-05,
      "step": 15421,
      "training_step_time": 0.47751402854919434
    },
    {
      "epoch": 9.412841796875e-05,
      "model_forward_time": 0.11420822143554688,
      "step": 15422
    },
    {
      "epoch": 9.412841796875e-05,
      "step": 15422,
      "training_step_time": 0.38382673263549805
    },
    {
      "epoch": 9.4134521484375e-05,
      "model_forward_time": 0.114837646484375,
      "step": 15423
    },
    {
      "epoch": 9.4134521484375e-05,
      "step": 15423,
      "training_step_time": 0.5332963466644287
    },
    {
      "epoch": 9.4140625e-05,
      "model_forward_time": 0.11500167846679688,
      "step": 15424
    },
    {
      "epoch": 9.4140625e-05,
      "step": 15424,
      "training_step_time": 0.38248610496520996
    },
    {
      "epoch": 9.4146728515625e-05,
      "model_forward_time": 0.1148538589477539,
      "step": 15425
    },
    {
      "epoch": 9.4146728515625e-05,
      "step": 15425,
      "training_step_time": 0.4519181251525879
    },
    {
      "epoch": 9.415283203125e-05,
      "model_forward_time": 0.11436676979064941,
      "step": 15426
    },
    {
      "epoch": 9.415283203125e-05,
      "step": 15426,
      "training_step_time": 0.38472676277160645
    },
    {
      "epoch": 9.4158935546875e-05,
      "model_forward_time": 0.11458182334899902,
      "step": 15427
    },
    {
      "epoch": 9.4158935546875e-05,
      "step": 15427,
      "training_step_time": 0.3965475559234619
    },
    {
      "epoch": 9.41650390625e-05,
      "model_forward_time": 0.11493277549743652,
      "step": 15428
    },
    {
      "epoch": 9.41650390625e-05,
      "step": 15428,
      "training_step_time": 0.381911039352417
    },
    {
      "epoch": 9.4171142578125e-05,
      "model_forward_time": 0.11555743217468262,
      "step": 15429
    },
    {
      "epoch": 9.4171142578125e-05,
      "step": 15429,
      "training_step_time": 0.5175402164459229
    },
    {
      "epoch": 9.417724609375e-05,
      "grad_norm": 0.10775893181562424,
      "learning_rate": 8.871817916443025e-05,
      "loss": 0.0526,
      "step": 15430
    },
    {
      "epoch": 9.417724609375e-05,
      "model_forward_time": 0.11553311347961426,
      "step": 15430
    },
    {
      "epoch": 9.417724609375e-05,
      "step": 15430,
      "training_step_time": 0.4382932186126709
    },
    {
      "epoch": 9.4183349609375e-05,
      "model_forward_time": 0.1152346134185791,
      "step": 15431
    },
    {
      "epoch": 9.4183349609375e-05,
      "step": 15431,
      "training_step_time": 0.3963801860809326
    },
    {
      "epoch": 9.4189453125e-05,
      "model_forward_time": 0.11541867256164551,
      "step": 15432
    },
    {
      "epoch": 9.4189453125e-05,
      "step": 15432,
      "training_step_time": 0.40550971031188965
    },
    {
      "epoch": 9.4195556640625e-05,
      "model_forward_time": 0.11511564254760742,
      "step": 15433
    },
    {
      "epoch": 9.4195556640625e-05,
      "step": 15433,
      "training_step_time": 0.396953821182251
    },
    {
      "epoch": 9.420166015625e-05,
      "model_forward_time": 0.11530780792236328,
      "step": 15434
    },
    {
      "epoch": 9.420166015625e-05,
      "step": 15434,
      "training_step_time": 0.39532971382141113
    },
    {
      "epoch": 9.4207763671875e-05,
      "model_forward_time": 0.11492633819580078,
      "step": 15435
    },
    {
      "epoch": 9.4207763671875e-05,
      "step": 15435,
      "training_step_time": 0.7338705062866211
    },
    {
      "epoch": 9.42138671875e-05,
      "model_forward_time": 0.11514568328857422,
      "step": 15436
    },
    {
      "epoch": 9.42138671875e-05,
      "step": 15436,
      "training_step_time": 0.39119720458984375
    },
    {
      "epoch": 9.4219970703125e-05,
      "model_forward_time": 0.11539983749389648,
      "step": 15437
    },
    {
      "epoch": 9.4219970703125e-05,
      "step": 15437,
      "training_step_time": 0.36655306816101074
    },
    {
      "epoch": 9.422607421875e-05,
      "model_forward_time": 0.11494016647338867,
      "step": 15438
    },
    {
      "epoch": 9.422607421875e-05,
      "step": 15438,
      "training_step_time": 0.4495096206665039
    },
    {
      "epoch": 9.4232177734375e-05,
      "model_forward_time": 0.11396002769470215,
      "step": 15439
    },
    {
      "epoch": 9.4232177734375e-05,
      "step": 15439,
      "training_step_time": 0.45311570167541504
    },
    {
      "epoch": 9.423828125e-05,
      "grad_norm": 0.15665987133979797,
      "learning_rate": 8.87007363183133e-05,
      "loss": 0.0516,
      "step": 15440
    },
    {
      "epoch": 9.423828125e-05,
      "model_forward_time": 0.11435246467590332,
      "step": 15440
    },
    {
      "epoch": 9.423828125e-05,
      "step": 15440,
      "training_step_time": 0.3914334774017334
    },
    {
      "epoch": 9.4244384765625e-05,
      "model_forward_time": 0.1146705150604248,
      "step": 15441
    },
    {
      "epoch": 9.4244384765625e-05,
      "step": 15441,
      "training_step_time": 0.5292928218841553
    },
    {
      "epoch": 9.425048828125e-05,
      "model_forward_time": 0.11549782752990723,
      "step": 15442
    },
    {
      "epoch": 9.425048828125e-05,
      "step": 15442,
      "training_step_time": 0.46445608139038086
    },
    {
      "epoch": 9.4256591796875e-05,
      "model_forward_time": 0.1146855354309082,
      "step": 15443
    },
    {
      "epoch": 9.4256591796875e-05,
      "step": 15443,
      "training_step_time": 0.42871832847595215
    },
    {
      "epoch": 9.42626953125e-05,
      "model_forward_time": 0.11479806900024414,
      "step": 15444
    },
    {
      "epoch": 9.42626953125e-05,
      "step": 15444,
      "training_step_time": 0.3904538154602051
    },
    {
      "epoch": 9.4268798828125e-05,
      "model_forward_time": 0.11444878578186035,
      "step": 15445
    },
    {
      "epoch": 9.4268798828125e-05,
      "step": 15445,
      "training_step_time": 0.4002034664154053
    },
    {
      "epoch": 9.427490234375e-05,
      "model_forward_time": 0.11510801315307617,
      "step": 15446
    },
    {
      "epoch": 9.427490234375e-05,
      "step": 15446,
      "training_step_time": 0.3898282051086426
    },
    {
      "epoch": 9.4281005859375e-05,
      "model_forward_time": 0.11584782600402832,
      "step": 15447
    },
    {
      "epoch": 9.4281005859375e-05,
      "step": 15447,
      "training_step_time": 0.560025691986084
    },
    {
      "epoch": 9.4287109375e-05,
      "model_forward_time": 0.11476564407348633,
      "step": 15448
    },
    {
      "epoch": 9.4287109375e-05,
      "step": 15448,
      "training_step_time": 0.38419198989868164
    },
    {
      "epoch": 9.4293212890625e-05,
      "model_forward_time": 0.11475515365600586,
      "step": 15449
    },
    {
      "epoch": 9.4293212890625e-05,
      "step": 15449,
      "training_step_time": 0.42409420013427734
    },
    {
      "epoch": 9.429931640625e-05,
      "grad_norm": 0.1336001753807068,
      "learning_rate": 8.868328171593448e-05,
      "loss": 0.0552,
      "step": 15450
    },
    {
      "epoch": 9.429931640625e-05,
      "model_forward_time": 0.11541271209716797,
      "step": 15450
    },
    {
      "epoch": 9.429931640625e-05,
      "step": 15450,
      "training_step_time": 0.39180541038513184
    },
    {
      "epoch": 9.4305419921875e-05,
      "model_forward_time": 0.11430597305297852,
      "step": 15451
    },
    {
      "epoch": 9.4305419921875e-05,
      "step": 15451,
      "training_step_time": 0.3635861873626709
    },
    {
      "epoch": 9.43115234375e-05,
      "model_forward_time": 0.1154625415802002,
      "step": 15452
    },
    {
      "epoch": 9.43115234375e-05,
      "step": 15452,
      "training_step_time": 0.40469789505004883
    },
    {
      "epoch": 9.4317626953125e-05,
      "model_forward_time": 0.11477065086364746,
      "step": 15453
    },
    {
      "epoch": 9.4317626953125e-05,
      "step": 15453,
      "training_step_time": 0.6155579090118408
    },
    {
      "epoch": 9.432373046875e-05,
      "model_forward_time": 0.11502671241760254,
      "step": 15454
    },
    {
      "epoch": 9.432373046875e-05,
      "step": 15454,
      "training_step_time": 0.38282322883605957
    },
    {
      "epoch": 9.4329833984375e-05,
      "model_forward_time": 0.11423134803771973,
      "step": 15455
    },
    {
      "epoch": 9.4329833984375e-05,
      "step": 15455,
      "training_step_time": 0.4601285457611084
    },
    {
      "epoch": 9.43359375e-05,
      "model_forward_time": 0.1144869327545166,
      "step": 15456
    },
    {
      "epoch": 9.43359375e-05,
      "step": 15456,
      "training_step_time": 0.42622852325439453
    },
    {
      "epoch": 9.4342041015625e-05,
      "model_forward_time": 0.11463689804077148,
      "step": 15457
    },
    {
      "epoch": 9.4342041015625e-05,
      "step": 15457,
      "training_step_time": 0.41297245025634766
    },
    {
      "epoch": 9.434814453125e-05,
      "model_forward_time": 0.11459016799926758,
      "step": 15458
    },
    {
      "epoch": 9.434814453125e-05,
      "step": 15458,
      "training_step_time": 0.39135217666625977
    },
    {
      "epoch": 9.4354248046875e-05,
      "model_forward_time": 0.11472177505493164,
      "step": 15459
    },
    {
      "epoch": 9.4354248046875e-05,
      "step": 15459,
      "training_step_time": 0.6613607406616211
    },
    {
      "epoch": 9.43603515625e-05,
      "grad_norm": 0.16053129732608795,
      "learning_rate": 8.866581536259605e-05,
      "loss": 0.0525,
      "step": 15460
    },
    {
      "epoch": 9.43603515625e-05,
      "model_forward_time": 0.1140592098236084,
      "step": 15460
    },
    {
      "epoch": 9.43603515625e-05,
      "step": 15460,
      "training_step_time": 0.38155484199523926
    },
    {
      "epoch": 9.4366455078125e-05,
      "model_forward_time": 0.11443877220153809,
      "step": 15461
    },
    {
      "epoch": 9.4366455078125e-05,
      "step": 15461,
      "training_step_time": 0.39983582496643066
    },
    {
      "epoch": 9.437255859375e-05,
      "model_forward_time": 0.11460304260253906,
      "step": 15462
    },
    {
      "epoch": 9.437255859375e-05,
      "step": 15462,
      "training_step_time": 0.4079318046569824
    },
    {
      "epoch": 9.4378662109375e-05,
      "model_forward_time": 0.11461901664733887,
      "step": 15463
    },
    {
      "epoch": 9.4378662109375e-05,
      "step": 15463,
      "training_step_time": 0.4757421016693115
    },
    {
      "epoch": 9.4384765625e-05,
      "model_forward_time": 0.11418676376342773,
      "step": 15464
    },
    {
      "epoch": 9.4384765625e-05,
      "step": 15464,
      "training_step_time": 0.401824951171875
    },
    {
      "epoch": 9.4390869140625e-05,
      "model_forward_time": 0.11498117446899414,
      "step": 15465
    },
    {
      "epoch": 9.4390869140625e-05,
      "step": 15465,
      "training_step_time": 0.7063884735107422
    },
    {
      "epoch": 9.439697265625e-05,
      "model_forward_time": 0.11410355567932129,
      "step": 15466
    },
    {
      "epoch": 9.439697265625e-05,
      "step": 15466,
      "training_step_time": 0.43889856338500977
    },
    {
      "epoch": 9.4403076171875e-05,
      "model_forward_time": 0.11408877372741699,
      "step": 15467
    },
    {
      "epoch": 9.4403076171875e-05,
      "step": 15467,
      "training_step_time": 0.4259493350982666
    },
    {
      "epoch": 9.44091796875e-05,
      "model_forward_time": 0.11456942558288574,
      "step": 15468
    },
    {
      "epoch": 9.44091796875e-05,
      "step": 15468,
      "training_step_time": 0.41120219230651855
    },
    {
      "epoch": 9.4415283203125e-05,
      "model_forward_time": 0.11415600776672363,
      "step": 15469
    },
    {
      "epoch": 9.4415283203125e-05,
      "step": 15469,
      "training_step_time": 0.40796327590942383
    },
    {
      "epoch": 9.442138671875e-05,
      "grad_norm": 0.2084455043077469,
      "learning_rate": 8.864833726360383e-05,
      "loss": 0.0563,
      "step": 15470
    },
    {
      "epoch": 9.442138671875e-05,
      "model_forward_time": 0.11423850059509277,
      "step": 15470
    },
    {
      "epoch": 9.442138671875e-05,
      "step": 15470,
      "training_step_time": 0.39551758766174316
    },
    {
      "epoch": 9.4427490234375e-05,
      "model_forward_time": 0.11467814445495605,
      "step": 15471
    },
    {
      "epoch": 9.4427490234375e-05,
      "step": 15471,
      "training_step_time": 0.5488111972808838
    },
    {
      "epoch": 9.443359375e-05,
      "model_forward_time": 0.11471295356750488,
      "step": 15472
    },
    {
      "epoch": 9.443359375e-05,
      "step": 15472,
      "training_step_time": 0.3883829116821289
    },
    {
      "epoch": 9.4439697265625e-05,
      "model_forward_time": 0.11547517776489258,
      "step": 15473
    },
    {
      "epoch": 9.4439697265625e-05,
      "step": 15473,
      "training_step_time": 0.38578319549560547
    },
    {
      "epoch": 9.444580078125e-05,
      "model_forward_time": 0.11475706100463867,
      "step": 15474
    },
    {
      "epoch": 9.444580078125e-05,
      "step": 15474,
      "training_step_time": 0.397568941116333
    },
    {
      "epoch": 9.4451904296875e-05,
      "model_forward_time": 0.11460280418395996,
      "step": 15475
    },
    {
      "epoch": 9.4451904296875e-05,
      "step": 15475,
      "training_step_time": 0.4100375175476074
    },
    {
      "epoch": 9.44580078125e-05,
      "model_forward_time": 0.11511683464050293,
      "step": 15476
    },
    {
      "epoch": 9.44580078125e-05,
      "step": 15476,
      "training_step_time": 0.39728832244873047
    },
    {
      "epoch": 9.4464111328125e-05,
      "model_forward_time": 0.1147317886352539,
      "step": 15477
    },
    {
      "epoch": 9.4464111328125e-05,
      "step": 15477,
      "training_step_time": 0.6488573551177979
    },
    {
      "epoch": 9.447021484375e-05,
      "model_forward_time": 0.11490082740783691,
      "step": 15478
    },
    {
      "epoch": 9.447021484375e-05,
      "step": 15478,
      "training_step_time": 0.38576388359069824
    },
    {
      "epoch": 9.4476318359375e-05,
      "model_forward_time": 0.11587691307067871,
      "step": 15479
    },
    {
      "epoch": 9.4476318359375e-05,
      "step": 15479,
      "training_step_time": 0.40900731086730957
    },
    {
      "epoch": 9.4482421875e-05,
      "grad_norm": 0.14941728115081787,
      "learning_rate": 8.863084742426719e-05,
      "loss": 0.0491,
      "step": 15480
    },
    {
      "epoch": 9.4482421875e-05,
      "model_forward_time": 0.11551713943481445,
      "step": 15480
    },
    {
      "epoch": 9.4482421875e-05,
      "step": 15480,
      "training_step_time": 0.4453709125518799
    },
    {
      "epoch": 9.4488525390625e-05,
      "model_forward_time": 0.11492443084716797,
      "step": 15481
    },
    {
      "epoch": 9.4488525390625e-05,
      "step": 15481,
      "training_step_time": 0.47787928581237793
    },
    {
      "epoch": 9.449462890625e-05,
      "model_forward_time": 0.1152811050415039,
      "step": 15482
    },
    {
      "epoch": 9.449462890625e-05,
      "step": 15482,
      "training_step_time": 0.418776273727417
    },
    {
      "epoch": 9.4500732421875e-05,
      "model_forward_time": 0.11443805694580078,
      "step": 15483
    },
    {
      "epoch": 9.4500732421875e-05,
      "step": 15483,
      "training_step_time": 0.5595870018005371
    },
    {
      "epoch": 9.45068359375e-05,
      "model_forward_time": 0.11480832099914551,
      "step": 15484
    },
    {
      "epoch": 9.45068359375e-05,
      "step": 15484,
      "training_step_time": 0.421525239944458
    },
    {
      "epoch": 9.4512939453125e-05,
      "model_forward_time": 0.11462759971618652,
      "step": 15485
    },
    {
      "epoch": 9.4512939453125e-05,
      "step": 15485,
      "training_step_time": 0.3942692279815674
    },
    {
      "epoch": 9.451904296875e-05,
      "model_forward_time": 0.11651968955993652,
      "step": 15486
    },
    {
      "epoch": 9.451904296875e-05,
      "step": 15486,
      "training_step_time": 0.39235925674438477
    },
    {
      "epoch": 9.4525146484375e-05,
      "model_forward_time": 0.11521267890930176,
      "step": 15487
    },
    {
      "epoch": 9.4525146484375e-05,
      "step": 15487,
      "training_step_time": 0.39795947074890137
    },
    {
      "epoch": 9.453125e-05,
      "model_forward_time": 0.11474132537841797,
      "step": 15488
    },
    {
      "epoch": 9.453125e-05,
      "step": 15488,
      "training_step_time": 0.38848876953125
    },
    {
      "epoch": 9.4537353515625e-05,
      "model_forward_time": 0.11488890647888184,
      "step": 15489
    },
    {
      "epoch": 9.4537353515625e-05,
      "step": 15489,
      "training_step_time": 0.7803037166595459
    },
    {
      "epoch": 9.454345703125e-05,
      "grad_norm": 0.13205072283744812,
      "learning_rate": 8.86133458498991e-05,
      "loss": 0.0585,
      "step": 15490
    },
    {
      "epoch": 9.454345703125e-05,
      "model_forward_time": 0.11494660377502441,
      "step": 15490
    },
    {
      "epoch": 9.454345703125e-05,
      "step": 15490,
      "training_step_time": 0.4064788818359375
    },
    {
      "epoch": 9.4549560546875e-05,
      "model_forward_time": 0.11451482772827148,
      "step": 15491
    },
    {
      "epoch": 9.4549560546875e-05,
      "step": 15491,
      "training_step_time": 0.38427305221557617
    },
    {
      "epoch": 9.45556640625e-05,
      "model_forward_time": 0.11497998237609863,
      "step": 15492
    },
    {
      "epoch": 9.45556640625e-05,
      "step": 15492,
      "training_step_time": 0.38498616218566895
    },
    {
      "epoch": 9.4561767578125e-05,
      "model_forward_time": 0.11422562599182129,
      "step": 15493
    },
    {
      "epoch": 9.4561767578125e-05,
      "step": 15493,
      "training_step_time": 0.4203352928161621
    },
    {
      "epoch": 9.456787109375e-05,
      "model_forward_time": 0.1146240234375,
      "step": 15494
    },
    {
      "epoch": 9.456787109375e-05,
      "step": 15494,
      "training_step_time": 0.453855037689209
    },
    {
      "epoch": 9.4573974609375e-05,
      "model_forward_time": 0.11447668075561523,
      "step": 15495
    },
    {
      "epoch": 9.4573974609375e-05,
      "step": 15495,
      "training_step_time": 0.5672168731689453
    },
    {
      "epoch": 9.4580078125e-05,
      "model_forward_time": 0.11521077156066895,
      "step": 15496
    },
    {
      "epoch": 9.4580078125e-05,
      "step": 15496,
      "training_step_time": 0.4124259948730469
    },
    {
      "epoch": 9.4586181640625e-05,
      "model_forward_time": 0.11502933502197266,
      "step": 15497
    },
    {
      "epoch": 9.4586181640625e-05,
      "step": 15497,
      "training_step_time": 0.5088145732879639
    },
    {
      "epoch": 9.459228515625e-05,
      "model_forward_time": 0.1145634651184082,
      "step": 15498
    },
    {
      "epoch": 9.459228515625e-05,
      "step": 15498,
      "training_step_time": 0.4058666229248047
    },
    {
      "epoch": 9.4598388671875e-05,
      "model_forward_time": 0.1138913631439209,
      "step": 15499
    },
    {
      "epoch": 9.4598388671875e-05,
      "step": 15499,
      "training_step_time": 0.3998262882232666
    },
    {
      "epoch": 9.46044921875e-05,
      "grad_norm": 0.1483224779367447,
      "learning_rate": 8.859583254581605e-05,
      "loss": 0.0527,
      "step": 15500
    },
    {
      "epoch": 9.46044921875e-05,
      "model_forward_time": 0.11421775817871094,
      "step": 15500
    },
    {
      "epoch": 9.46044921875e-05,
      "step": 15500,
      "training_step_time": 0.3913400173187256
    },
    {
      "epoch": 9.4610595703125e-05,
      "model_forward_time": 0.11545658111572266,
      "step": 15501
    },
    {
      "epoch": 9.4610595703125e-05,
      "step": 15501,
      "training_step_time": 0.5512995719909668
    },
    {
      "epoch": 9.461669921875e-05,
      "model_forward_time": 0.11411094665527344,
      "step": 15502
    },
    {
      "epoch": 9.461669921875e-05,
      "step": 15502,
      "training_step_time": 0.4442737102508545
    },
    {
      "epoch": 9.4622802734375e-05,
      "model_forward_time": 0.11440825462341309,
      "step": 15503
    },
    {
      "epoch": 9.4622802734375e-05,
      "step": 15503,
      "training_step_time": 0.399259090423584
    },
    {
      "epoch": 9.462890625e-05,
      "model_forward_time": 0.11419796943664551,
      "step": 15504
    },
    {
      "epoch": 9.462890625e-05,
      "step": 15504,
      "training_step_time": 0.4172787666320801
    },
    {
      "epoch": 9.4635009765625e-05,
      "model_forward_time": 0.11461734771728516,
      "step": 15505
    },
    {
      "epoch": 9.4635009765625e-05,
      "step": 15505,
      "training_step_time": 0.4309580326080322
    },
    {
      "epoch": 9.464111328125e-05,
      "model_forward_time": 0.11449098587036133,
      "step": 15506
    },
    {
      "epoch": 9.464111328125e-05,
      "step": 15506,
      "training_step_time": 0.36620068550109863
    },
    {
      "epoch": 9.4647216796875e-05,
      "model_forward_time": 0.11511850357055664,
      "step": 15507
    },
    {
      "epoch": 9.4647216796875e-05,
      "step": 15507,
      "training_step_time": 0.5965628623962402
    },
    {
      "epoch": 9.46533203125e-05,
      "model_forward_time": 0.11559796333312988,
      "step": 15508
    },
    {
      "epoch": 9.46533203125e-05,
      "step": 15508,
      "training_step_time": 0.4919140338897705
    },
    {
      "epoch": 9.4659423828125e-05,
      "model_forward_time": 0.11429286003112793,
      "step": 15509
    },
    {
      "epoch": 9.4659423828125e-05,
      "step": 15509,
      "training_step_time": 0.3906364440917969
    },
    {
      "epoch": 9.466552734375e-05,
      "grad_norm": 0.13735824823379517,
      "learning_rate": 8.857830751733815e-05,
      "loss": 0.0522,
      "step": 15510
    },
    {
      "epoch": 9.466552734375e-05,
      "model_forward_time": 0.11399126052856445,
      "step": 15510
    },
    {
      "epoch": 9.466552734375e-05,
      "step": 15510,
      "training_step_time": 0.4810912609100342
    },
    {
      "epoch": 9.4671630859375e-05,
      "model_forward_time": 0.11405014991760254,
      "step": 15511
    },
    {
      "epoch": 9.4671630859375e-05,
      "step": 15511,
      "training_step_time": 0.44345545768737793
    },
    {
      "epoch": 9.4677734375e-05,
      "model_forward_time": 0.11450457572937012,
      "step": 15512
    },
    {
      "epoch": 9.4677734375e-05,
      "step": 15512,
      "training_step_time": 0.39478325843811035
    },
    {
      "epoch": 9.4683837890625e-05,
      "model_forward_time": 0.1150968074798584,
      "step": 15513
    },
    {
      "epoch": 9.4683837890625e-05,
      "step": 15513,
      "training_step_time": 0.5028989315032959
    },
    {
      "epoch": 9.468994140625e-05,
      "model_forward_time": 0.11462736129760742,
      "step": 15514
    },
    {
      "epoch": 9.468994140625e-05,
      "step": 15514,
      "training_step_time": 0.3927583694458008
    },
    {
      "epoch": 9.4696044921875e-05,
      "model_forward_time": 0.11489152908325195,
      "step": 15515
    },
    {
      "epoch": 9.4696044921875e-05,
      "step": 15515,
      "training_step_time": 0.39538097381591797
    },
    {
      "epoch": 9.47021484375e-05,
      "model_forward_time": 0.11509394645690918,
      "step": 15516
    },
    {
      "epoch": 9.47021484375e-05,
      "step": 15516,
      "training_step_time": 0.4011862277984619
    },
    {
      "epoch": 9.4708251953125e-05,
      "model_forward_time": 0.11428713798522949,
      "step": 15517
    },
    {
      "epoch": 9.4708251953125e-05,
      "step": 15517,
      "training_step_time": 0.3962082862854004
    },
    {
      "epoch": 9.471435546875e-05,
      "model_forward_time": 0.11507248878479004,
      "step": 15518
    },
    {
      "epoch": 9.471435546875e-05,
      "step": 15518,
      "training_step_time": 0.4410746097564697
    },
    {
      "epoch": 9.4720458984375e-05,
      "model_forward_time": 0.11520719528198242,
      "step": 15519
    },
    {
      "epoch": 9.4720458984375e-05,
      "step": 15519,
      "training_step_time": 0.6760196685791016
    },
    {
      "epoch": 9.47265625e-05,
      "grad_norm": 0.13862727582454681,
      "learning_rate": 8.856077076978902e-05,
      "loss": 0.0538,
      "step": 15520
    },
    {
      "epoch": 9.47265625e-05,
      "model_forward_time": 0.11460709571838379,
      "step": 15520
    },
    {
      "epoch": 9.47265625e-05,
      "step": 15520,
      "training_step_time": 0.46239233016967773
    },
    {
      "epoch": 9.4732666015625e-05,
      "model_forward_time": 0.11501145362854004,
      "step": 15521
    },
    {
      "epoch": 9.4732666015625e-05,
      "step": 15521,
      "training_step_time": 0.4459691047668457
    },
    {
      "epoch": 9.473876953125e-05,
      "model_forward_time": 0.11507725715637207,
      "step": 15522
    },
    {
      "epoch": 9.473876953125e-05,
      "step": 15522,
      "training_step_time": 0.3895304203033447
    },
    {
      "epoch": 9.4744873046875e-05,
      "model_forward_time": 0.11450505256652832,
      "step": 15523
    },
    {
      "epoch": 9.4744873046875e-05,
      "step": 15523,
      "training_step_time": 0.4288489818572998
    },
    {
      "epoch": 9.47509765625e-05,
      "model_forward_time": 0.11431598663330078,
      "step": 15524
    },
    {
      "epoch": 9.47509765625e-05,
      "step": 15524,
      "training_step_time": 0.4538412094116211
    },
    {
      "epoch": 9.4757080078125e-05,
      "model_forward_time": 0.11447834968566895,
      "step": 15525
    },
    {
      "epoch": 9.4757080078125e-05,
      "step": 15525,
      "training_step_time": 0.6393969058990479
    },
    {
      "epoch": 9.476318359375e-05,
      "model_forward_time": 0.1158604621887207,
      "step": 15526
    },
    {
      "epoch": 9.476318359375e-05,
      "step": 15526,
      "training_step_time": 0.3886880874633789
    },
    {
      "epoch": 9.4769287109375e-05,
      "model_forward_time": 0.11429929733276367,
      "step": 15527
    },
    {
      "epoch": 9.4769287109375e-05,
      "step": 15527,
      "training_step_time": 0.3846602439880371
    },
    {
      "epoch": 9.4775390625e-05,
      "model_forward_time": 0.11480093002319336,
      "step": 15528
    },
    {
      "epoch": 9.4775390625e-05,
      "step": 15528,
      "training_step_time": 0.3958745002746582
    },
    {
      "epoch": 9.4781494140625e-05,
      "model_forward_time": 0.11490917205810547,
      "step": 15529
    },
    {
      "epoch": 9.4781494140625e-05,
      "step": 15529,
      "training_step_time": 0.41486549377441406
    },
    {
      "epoch": 9.478759765625e-05,
      "grad_norm": 0.180918887257576,
      "learning_rate": 8.854322230849588e-05,
      "loss": 0.0625,
      "step": 15530
    },
    {
      "epoch": 9.478759765625e-05,
      "model_forward_time": 0.1146395206451416,
      "step": 15530
    },
    {
      "epoch": 9.478759765625e-05,
      "step": 15530,
      "training_step_time": 0.4229447841644287
    },
    {
      "epoch": 9.4793701171875e-05,
      "model_forward_time": 0.11495780944824219,
      "step": 15531
    },
    {
      "epoch": 9.4793701171875e-05,
      "step": 15531,
      "training_step_time": 0.7081491947174072
    },
    {
      "epoch": 9.47998046875e-05,
      "model_forward_time": 0.11480593681335449,
      "step": 15532
    },
    {
      "epoch": 9.47998046875e-05,
      "step": 15532,
      "training_step_time": 0.4068789482116699
    },
    {
      "epoch": 9.4805908203125e-05,
      "model_forward_time": 0.11409687995910645,
      "step": 15533
    },
    {
      "epoch": 9.4805908203125e-05,
      "step": 15533,
      "training_step_time": 0.3649129867553711
    },
    {
      "epoch": 9.481201171875e-05,
      "model_forward_time": 0.11449980735778809,
      "step": 15534
    },
    {
      "epoch": 9.481201171875e-05,
      "step": 15534,
      "training_step_time": 0.42000579833984375
    },
    {
      "epoch": 9.4818115234375e-05,
      "model_forward_time": 0.11431074142456055,
      "step": 15535
    },
    {
      "epoch": 9.4818115234375e-05,
      "step": 15535,
      "training_step_time": 0.4537019729614258
    },
    {
      "epoch": 9.482421875e-05,
      "model_forward_time": 0.11349034309387207,
      "step": 15536
    },
    {
      "epoch": 9.482421875e-05,
      "step": 15536,
      "training_step_time": 0.46810388565063477
    },
    {
      "epoch": 9.4830322265625e-05,
      "model_forward_time": 0.11458992958068848,
      "step": 15537
    },
    {
      "epoch": 9.4830322265625e-05,
      "step": 15537,
      "training_step_time": 0.5009114742279053
    },
    {
      "epoch": 9.483642578125e-05,
      "model_forward_time": 0.11494016647338867,
      "step": 15538
    },
    {
      "epoch": 9.483642578125e-05,
      "step": 15538,
      "training_step_time": 0.3993048667907715
    },
    {
      "epoch": 9.4842529296875e-05,
      "model_forward_time": 0.11472105979919434,
      "step": 15539
    },
    {
      "epoch": 9.4842529296875e-05,
      "step": 15539,
      "training_step_time": 0.40444350242614746
    },
    {
      "epoch": 9.48486328125e-05,
      "grad_norm": 0.19195616245269775,
      "learning_rate": 8.852566213878947e-05,
      "loss": 0.0578,
      "step": 15540
    },
    {
      "epoch": 9.48486328125e-05,
      "model_forward_time": 0.11492156982421875,
      "step": 15540
    },
    {
      "epoch": 9.48486328125e-05,
      "step": 15540,
      "training_step_time": 0.3927619457244873
    },
    {
      "epoch": 9.4854736328125e-05,
      "model_forward_time": 0.11513781547546387,
      "step": 15541
    },
    {
      "epoch": 9.4854736328125e-05,
      "step": 15541,
      "training_step_time": 0.3872103691101074
    },
    {
      "epoch": 9.486083984375e-05,
      "model_forward_time": 0.11548614501953125,
      "step": 15542
    },
    {
      "epoch": 9.486083984375e-05,
      "step": 15542,
      "training_step_time": 0.3903172016143799
    },
    {
      "epoch": 9.4866943359375e-05,
      "model_forward_time": 0.11529374122619629,
      "step": 15543
    },
    {
      "epoch": 9.4866943359375e-05,
      "step": 15543,
      "training_step_time": 0.6994571685791016
    },
    {
      "epoch": 9.4873046875e-05,
      "model_forward_time": 0.11458778381347656,
      "step": 15544
    },
    {
      "epoch": 9.4873046875e-05,
      "step": 15544,
      "training_step_time": 0.4328432083129883
    },
    {
      "epoch": 9.4879150390625e-05,
      "model_forward_time": 0.11452651023864746,
      "step": 15545
    },
    {
      "epoch": 9.4879150390625e-05,
      "step": 15545,
      "training_step_time": 0.45024824142456055
    },
    {
      "epoch": 9.488525390625e-05,
      "model_forward_time": 0.11397576332092285,
      "step": 15546
    },
    {
      "epoch": 9.488525390625e-05,
      "step": 15546,
      "training_step_time": 0.3842809200286865
    },
    {
      "epoch": 9.4891357421875e-05,
      "model_forward_time": 0.11530637741088867,
      "step": 15547
    },
    {
      "epoch": 9.4891357421875e-05,
      "step": 15547,
      "training_step_time": 0.4019308090209961
    },
    {
      "epoch": 9.48974609375e-05,
      "model_forward_time": 0.11411666870117188,
      "step": 15548
    },
    {
      "epoch": 9.48974609375e-05,
      "step": 15548,
      "training_step_time": 0.4258265495300293
    },
    {
      "epoch": 9.4903564453125e-05,
      "model_forward_time": 0.11518025398254395,
      "step": 15549
    },
    {
      "epoch": 9.4903564453125e-05,
      "step": 15549,
      "training_step_time": 0.6269290447235107
    },
    {
      "epoch": 9.490966796875e-05,
      "grad_norm": 0.1755245178937912,
      "learning_rate": 8.85080902660041e-05,
      "loss": 0.0525,
      "step": 15550
    },
    {
      "epoch": 9.490966796875e-05,
      "model_forward_time": 0.11463642120361328,
      "step": 15550
    },
    {
      "epoch": 9.490966796875e-05,
      "step": 15550,
      "training_step_time": 0.4111614227294922
    },
    {
      "epoch": 9.4915771484375e-05,
      "model_forward_time": 0.11492133140563965,
      "step": 15551
    },
    {
      "epoch": 9.4915771484375e-05,
      "step": 15551,
      "training_step_time": 0.4686124324798584
    },
    {
      "epoch": 9.4921875e-05,
      "model_forward_time": 0.1149141788482666,
      "step": 15552
    },
    {
      "epoch": 9.4921875e-05,
      "step": 15552,
      "training_step_time": 0.4246387481689453
    },
    {
      "epoch": 9.4927978515625e-05,
      "model_forward_time": 0.11476397514343262,
      "step": 15553
    },
    {
      "epoch": 9.4927978515625e-05,
      "step": 15553,
      "training_step_time": 0.3907170295715332
    },
    {
      "epoch": 9.493408203125e-05,
      "model_forward_time": 0.11452102661132812,
      "step": 15554
    },
    {
      "epoch": 9.493408203125e-05,
      "step": 15554,
      "training_step_time": 0.3953845500946045
    },
    {
      "epoch": 9.4940185546875e-05,
      "model_forward_time": 0.11549687385559082,
      "step": 15555
    },
    {
      "epoch": 9.4940185546875e-05,
      "step": 15555,
      "training_step_time": 0.49596238136291504
    },
    {
      "epoch": 9.49462890625e-05,
      "model_forward_time": 0.11495399475097656,
      "step": 15556
    },
    {
      "epoch": 9.49462890625e-05,
      "step": 15556,
      "training_step_time": 0.437847375869751
    },
    {
      "epoch": 9.4952392578125e-05,
      "model_forward_time": 0.11477208137512207,
      "step": 15557
    },
    {
      "epoch": 9.4952392578125e-05,
      "step": 15557,
      "training_step_time": 0.4324197769165039
    },
    {
      "epoch": 9.495849609375e-05,
      "model_forward_time": 0.11574077606201172,
      "step": 15558
    },
    {
      "epoch": 9.495849609375e-05,
      "step": 15558,
      "training_step_time": 0.48868560791015625
    },
    {
      "epoch": 9.4964599609375e-05,
      "model_forward_time": 0.11428594589233398,
      "step": 15559
    },
    {
      "epoch": 9.4964599609375e-05,
      "step": 15559,
      "training_step_time": 0.3947739601135254
    },
    {
      "epoch": 9.4970703125e-05,
      "grad_norm": 0.2035982459783554,
      "learning_rate": 8.849050669547768e-05,
      "loss": 0.0501,
      "step": 15560
    },
    {
      "epoch": 9.4970703125e-05,
      "model_forward_time": 0.11488080024719238,
      "step": 15560
    },
    {
      "epoch": 9.4970703125e-05,
      "step": 15560,
      "training_step_time": 0.3813178539276123
    },
    {
      "epoch": 9.4976806640625e-05,
      "model_forward_time": 0.11507415771484375,
      "step": 15561
    },
    {
      "epoch": 9.4976806640625e-05,
      "step": 15561,
      "training_step_time": 0.4927639961242676
    },
    {
      "epoch": 9.498291015625e-05,
      "model_forward_time": 0.11516976356506348,
      "step": 15562
    },
    {
      "epoch": 9.498291015625e-05,
      "step": 15562,
      "training_step_time": 0.3874683380126953
    },
    {
      "epoch": 9.4989013671875e-05,
      "model_forward_time": 0.1151728630065918,
      "step": 15563
    },
    {
      "epoch": 9.4989013671875e-05,
      "step": 15563,
      "training_step_time": 0.4914233684539795
    },
    {
      "epoch": 9.49951171875e-05,
      "model_forward_time": 0.11423611640930176,
      "step": 15564
    },
    {
      "epoch": 9.49951171875e-05,
      "step": 15564,
      "training_step_time": 0.4246044158935547
    },
    {
      "epoch": 9.5001220703125e-05,
      "model_forward_time": 0.11434006690979004,
      "step": 15565
    },
    {
      "epoch": 9.5001220703125e-05,
      "step": 15565,
      "training_step_time": 0.4703338146209717
    },
    {
      "epoch": 9.500732421875e-05,
      "model_forward_time": 0.11452150344848633,
      "step": 15566
    },
    {
      "epoch": 9.500732421875e-05,
      "step": 15566,
      "training_step_time": 0.438046932220459
    },
    {
      "epoch": 9.5013427734375e-05,
      "model_forward_time": 0.1150522232055664,
      "step": 15567
    },
    {
      "epoch": 9.5013427734375e-05,
      "step": 15567,
      "training_step_time": 0.45326852798461914
    },
    {
      "epoch": 9.501953125e-05,
      "model_forward_time": 0.11565637588500977,
      "step": 15568
    },
    {
      "epoch": 9.501953125e-05,
      "step": 15568,
      "training_step_time": 0.39436888694763184
    },
    {
      "epoch": 9.5025634765625e-05,
      "model_forward_time": 0.11458373069763184,
      "step": 15569
    },
    {
      "epoch": 9.5025634765625e-05,
      "step": 15569,
      "training_step_time": 0.38957691192626953
    },
    {
      "epoch": 9.503173828125e-05,
      "grad_norm": 0.21199658513069153,
      "learning_rate": 8.84729114325516e-05,
      "loss": 0.0502,
      "step": 15570
    },
    {
      "epoch": 9.503173828125e-05,
      "model_forward_time": 0.1147310733795166,
      "step": 15570
    },
    {
      "epoch": 9.503173828125e-05,
      "step": 15570,
      "training_step_time": 0.5112462043762207
    },
    {
      "epoch": 9.5037841796875e-05,
      "model_forward_time": 0.11396479606628418,
      "step": 15571
    },
    {
      "epoch": 9.5037841796875e-05,
      "step": 15571,
      "training_step_time": 0.4577622413635254
    },
    {
      "epoch": 9.50439453125e-05,
      "model_forward_time": 0.11469006538391113,
      "step": 15572
    },
    {
      "epoch": 9.50439453125e-05,
      "step": 15572,
      "training_step_time": 0.46001172065734863
    },
    {
      "epoch": 9.5050048828125e-05,
      "model_forward_time": 0.11524438858032227,
      "step": 15573
    },
    {
      "epoch": 9.5050048828125e-05,
      "step": 15573,
      "training_step_time": 0.3882744312286377
    },
    {
      "epoch": 9.505615234375e-05,
      "model_forward_time": 0.11488890647888184,
      "step": 15574
    },
    {
      "epoch": 9.505615234375e-05,
      "step": 15574,
      "training_step_time": 0.389819860458374
    },
    {
      "epoch": 9.5062255859375e-05,
      "model_forward_time": 0.11541318893432617,
      "step": 15575
    },
    {
      "epoch": 9.5062255859375e-05,
      "step": 15575,
      "training_step_time": 0.3993098735809326
    },
    {
      "epoch": 9.5068359375e-05,
      "model_forward_time": 0.11481189727783203,
      "step": 15576
    },
    {
      "epoch": 9.5068359375e-05,
      "step": 15576,
      "training_step_time": 0.43056297302246094
    },
    {
      "epoch": 9.5074462890625e-05,
      "model_forward_time": 0.11521697044372559,
      "step": 15577
    },
    {
      "epoch": 9.5074462890625e-05,
      "step": 15577,
      "training_step_time": 0.43352603912353516
    },
    {
      "epoch": 9.508056640625e-05,
      "model_forward_time": 0.11537742614746094,
      "step": 15578
    },
    {
      "epoch": 9.508056640625e-05,
      "step": 15578,
      "training_step_time": 0.4126420021057129
    },
    {
      "epoch": 9.5086669921875e-05,
      "model_forward_time": 0.11516427993774414,
      "step": 15579
    },
    {
      "epoch": 9.5086669921875e-05,
      "step": 15579,
      "training_step_time": 0.8405075073242188
    },
    {
      "epoch": 9.50927734375e-05,
      "grad_norm": 0.170938178896904,
      "learning_rate": 8.845530448257085e-05,
      "loss": 0.0551,
      "step": 15580
    },
    {
      "epoch": 9.50927734375e-05,
      "model_forward_time": 0.11401224136352539,
      "step": 15580
    },
    {
      "epoch": 9.50927734375e-05,
      "step": 15580,
      "training_step_time": 0.38218021392822266
    },
    {
      "epoch": 9.5098876953125e-05,
      "model_forward_time": 0.11419916152954102,
      "step": 15581
    },
    {
      "epoch": 9.5098876953125e-05,
      "step": 15581,
      "training_step_time": 0.38422465324401855
    },
    {
      "epoch": 9.510498046875e-05,
      "model_forward_time": 0.11486172676086426,
      "step": 15582
    },
    {
      "epoch": 9.510498046875e-05,
      "step": 15582,
      "training_step_time": 0.38037776947021484
    },
    {
      "epoch": 9.5111083984375e-05,
      "model_forward_time": 0.11426067352294922,
      "step": 15583
    },
    {
      "epoch": 9.5111083984375e-05,
      "step": 15583,
      "training_step_time": 0.38776540756225586
    },
    {
      "epoch": 9.51171875e-05,
      "model_forward_time": 0.11375117301940918,
      "step": 15584
    },
    {
      "epoch": 9.51171875e-05,
      "step": 15584,
      "training_step_time": 0.4613924026489258
    },
    {
      "epoch": 9.5123291015625e-05,
      "model_forward_time": 0.11527872085571289,
      "step": 15585
    },
    {
      "epoch": 9.5123291015625e-05,
      "step": 15585,
      "training_step_time": 0.5331017971038818
    },
    {
      "epoch": 9.512939453125e-05,
      "model_forward_time": 0.11513376235961914,
      "step": 15586
    },
    {
      "epoch": 9.512939453125e-05,
      "step": 15586,
      "training_step_time": 0.39563894271850586
    },
    {
      "epoch": 9.5135498046875e-05,
      "model_forward_time": 0.11510801315307617,
      "step": 15587
    },
    {
      "epoch": 9.5135498046875e-05,
      "step": 15587,
      "training_step_time": 0.38271331787109375
    },
    {
      "epoch": 9.51416015625e-05,
      "model_forward_time": 0.11514019966125488,
      "step": 15588
    },
    {
      "epoch": 9.51416015625e-05,
      "step": 15588,
      "training_step_time": 0.3931145668029785
    },
    {
      "epoch": 9.5147705078125e-05,
      "model_forward_time": 0.11456608772277832,
      "step": 15589
    },
    {
      "epoch": 9.5147705078125e-05,
      "step": 15589,
      "training_step_time": 0.39177441596984863
    },
    {
      "epoch": 9.515380859375e-05,
      "grad_norm": 0.16885805130004883,
      "learning_rate": 8.843768585088393e-05,
      "loss": 0.0574,
      "step": 15590
    },
    {
      "epoch": 9.515380859375e-05,
      "model_forward_time": 0.11505794525146484,
      "step": 15590
    },
    {
      "epoch": 9.515380859375e-05,
      "step": 15590,
      "training_step_time": 0.39662742614746094
    },
    {
      "epoch": 9.5159912109375e-05,
      "model_forward_time": 0.1149284839630127,
      "step": 15591
    },
    {
      "epoch": 9.5159912109375e-05,
      "step": 15591,
      "training_step_time": 0.8891448974609375
    },
    {
      "epoch": 9.5166015625e-05,
      "model_forward_time": 0.1141824722290039,
      "step": 15592
    },
    {
      "epoch": 9.5166015625e-05,
      "step": 15592,
      "training_step_time": 0.4630138874053955
    },
    {
      "epoch": 9.5172119140625e-05,
      "model_forward_time": 0.11483049392700195,
      "step": 15593
    },
    {
      "epoch": 9.5172119140625e-05,
      "step": 15593,
      "training_step_time": 0.4159681797027588
    },
    {
      "epoch": 9.517822265625e-05,
      "model_forward_time": 0.11362385749816895,
      "step": 15594
    },
    {
      "epoch": 9.517822265625e-05,
      "step": 15594,
      "training_step_time": 0.4033687114715576
    },
    {
      "epoch": 9.5184326171875e-05,
      "model_forward_time": 0.11491942405700684,
      "step": 15595
    },
    {
      "epoch": 9.5184326171875e-05,
      "step": 15595,
      "training_step_time": 0.38225841522216797
    },
    {
      "epoch": 9.51904296875e-05,
      "model_forward_time": 0.114013671875,
      "step": 15596
    },
    {
      "epoch": 9.51904296875e-05,
      "step": 15596,
      "training_step_time": 0.4080770015716553
    },
    {
      "epoch": 9.5196533203125e-05,
      "model_forward_time": 0.11501908302307129,
      "step": 15597
    },
    {
      "epoch": 9.5196533203125e-05,
      "step": 15597,
      "training_step_time": 0.4705629348754883
    },
    {
      "epoch": 9.520263671875e-05,
      "model_forward_time": 0.11484861373901367,
      "step": 15598
    },
    {
      "epoch": 9.520263671875e-05,
      "step": 15598,
      "training_step_time": 0.3918149471282959
    },
    {
      "epoch": 9.5208740234375e-05,
      "model_forward_time": 0.11461448669433594,
      "step": 15599
    },
    {
      "epoch": 9.5208740234375e-05,
      "step": 15599,
      "training_step_time": 0.3951730728149414
    },
    {
      "epoch": 9.521484375e-05,
      "grad_norm": 0.18955934047698975,
      "learning_rate": 8.842005554284296e-05,
      "loss": 0.0522,
      "step": 15600
    },
    {
      "epoch": 9.521484375e-05,
      "model_forward_time": 0.11583280563354492,
      "step": 15600
    },
    {
      "epoch": 9.521484375e-05,
      "step": 15600,
      "training_step_time": 0.4288969039916992
    },
    {
      "epoch": 9.5220947265625e-05,
      "model_forward_time": 0.11467909812927246,
      "step": 15601
    },
    {
      "epoch": 9.5220947265625e-05,
      "step": 15601,
      "training_step_time": 0.4160017967224121
    },
    {
      "epoch": 9.522705078125e-05,
      "model_forward_time": 0.1157529354095459,
      "step": 15602
    },
    {
      "epoch": 9.522705078125e-05,
      "step": 15602,
      "training_step_time": 0.39788365364074707
    },
    {
      "epoch": 9.5233154296875e-05,
      "model_forward_time": 0.11511373519897461,
      "step": 15603
    },
    {
      "epoch": 9.5233154296875e-05,
      "step": 15603,
      "training_step_time": 0.8324823379516602
    },
    {
      "epoch": 9.52392578125e-05,
      "model_forward_time": 0.11419510841369629,
      "step": 15604
    },
    {
      "epoch": 9.52392578125e-05,
      "step": 15604,
      "training_step_time": 0.46546292304992676
    },
    {
      "epoch": 9.5245361328125e-05,
      "model_forward_time": 0.11408424377441406,
      "step": 15605
    },
    {
      "epoch": 9.5245361328125e-05,
      "step": 15605,
      "training_step_time": 0.449596643447876
    },
    {
      "epoch": 9.525146484375e-05,
      "model_forward_time": 0.11422228813171387,
      "step": 15606
    },
    {
      "epoch": 9.525146484375e-05,
      "step": 15606,
      "training_step_time": 0.4094095230102539
    },
    {
      "epoch": 9.5257568359375e-05,
      "model_forward_time": 0.11347150802612305,
      "step": 15607
    },
    {
      "epoch": 9.5257568359375e-05,
      "step": 15607,
      "training_step_time": 0.3917527198791504
    },
    {
      "epoch": 9.5263671875e-05,
      "model_forward_time": 0.11386466026306152,
      "step": 15608
    },
    {
      "epoch": 9.5263671875e-05,
      "step": 15608,
      "training_step_time": 0.40683794021606445
    },
    {
      "epoch": 9.5269775390625e-05,
      "model_forward_time": 0.11488652229309082,
      "step": 15609
    },
    {
      "epoch": 9.5269775390625e-05,
      "step": 15609,
      "training_step_time": 0.44659924507141113
    },
    {
      "epoch": 9.527587890625e-05,
      "grad_norm": 0.21665243804454803,
      "learning_rate": 8.840241356380352e-05,
      "loss": 0.0547,
      "step": 15610
    },
    {
      "epoch": 9.527587890625e-05,
      "model_forward_time": 0.11550617218017578,
      "step": 15610
    },
    {
      "epoch": 9.527587890625e-05,
      "step": 15610,
      "training_step_time": 0.3844437599182129
    },
    {
      "epoch": 9.5281982421875e-05,
      "model_forward_time": 0.11475229263305664,
      "step": 15611
    },
    {
      "epoch": 9.5281982421875e-05,
      "step": 15611,
      "training_step_time": 0.3919239044189453
    },
    {
      "epoch": 9.52880859375e-05,
      "model_forward_time": 0.11542081832885742,
      "step": 15612
    },
    {
      "epoch": 9.52880859375e-05,
      "step": 15612,
      "training_step_time": 0.39872169494628906
    },
    {
      "epoch": 9.5294189453125e-05,
      "model_forward_time": 0.1147770881652832,
      "step": 15613
    },
    {
      "epoch": 9.5294189453125e-05,
      "step": 15613,
      "training_step_time": 0.42627429962158203
    },
    {
      "epoch": 9.530029296875e-05,
      "model_forward_time": 0.11465096473693848,
      "step": 15614
    },
    {
      "epoch": 9.530029296875e-05,
      "step": 15614,
      "training_step_time": 0.4584341049194336
    },
    {
      "epoch": 9.5306396484375e-05,
      "model_forward_time": 0.11499190330505371,
      "step": 15615
    },
    {
      "epoch": 9.5306396484375e-05,
      "step": 15615,
      "training_step_time": 0.6711657047271729
    },
    {
      "epoch": 9.53125e-05,
      "model_forward_time": 0.11570596694946289,
      "step": 15616
    },
    {
      "epoch": 9.53125e-05,
      "step": 15616,
      "training_step_time": 0.4877002239227295
    },
    {
      "epoch": 9.5318603515625e-05,
      "model_forward_time": 0.11425328254699707,
      "step": 15617
    },
    {
      "epoch": 9.5318603515625e-05,
      "step": 15617,
      "training_step_time": 0.44716525077819824
    },
    {
      "epoch": 9.532470703125e-05,
      "model_forward_time": 0.11455464363098145,
      "step": 15618
    },
    {
      "epoch": 9.532470703125e-05,
      "step": 15618,
      "training_step_time": 0.46230268478393555
    },
    {
      "epoch": 9.5330810546875e-05,
      "model_forward_time": 0.11440920829772949,
      "step": 15619
    },
    {
      "epoch": 9.5330810546875e-05,
      "step": 15619,
      "training_step_time": 0.4020986557006836
    },
    {
      "epoch": 9.53369140625e-05,
      "grad_norm": 0.2547939121723175,
      "learning_rate": 8.838475991912482e-05,
      "loss": 0.0553,
      "step": 15620
    },
    {
      "epoch": 9.53369140625e-05,
      "model_forward_time": 0.11447930335998535,
      "step": 15620
    },
    {
      "epoch": 9.53369140625e-05,
      "step": 15620,
      "training_step_time": 0.39165830612182617
    },
    {
      "epoch": 9.5343017578125e-05,
      "model_forward_time": 0.11472225189208984,
      "step": 15621
    },
    {
      "epoch": 9.5343017578125e-05,
      "step": 15621,
      "training_step_time": 0.5169401168823242
    },
    {
      "epoch": 9.534912109375e-05,
      "model_forward_time": 0.11515617370605469,
      "step": 15622
    },
    {
      "epoch": 9.534912109375e-05,
      "step": 15622,
      "training_step_time": 0.39781808853149414
    },
    {
      "epoch": 9.5355224609375e-05,
      "model_forward_time": 0.11505436897277832,
      "step": 15623
    },
    {
      "epoch": 9.5355224609375e-05,
      "step": 15623,
      "training_step_time": 0.3787860870361328
    },
    {
      "epoch": 9.5361328125e-05,
      "model_forward_time": 0.11502575874328613,
      "step": 15624
    },
    {
      "epoch": 9.5361328125e-05,
      "step": 15624,
      "training_step_time": 0.4126405715942383
    },
    {
      "epoch": 9.5367431640625e-05,
      "model_forward_time": 0.11484646797180176,
      "step": 15625
    },
    {
      "epoch": 9.5367431640625e-05,
      "step": 15625,
      "training_step_time": 0.43303823471069336
    },
    {
      "epoch": 9.537353515625e-05,
      "model_forward_time": 0.11542749404907227,
      "step": 15626
    },
    {
      "epoch": 9.537353515625e-05,
      "step": 15626,
      "training_step_time": 0.429720401763916
    },
    {
      "epoch": 9.5379638671875e-05,
      "model_forward_time": 0.11464142799377441,
      "step": 15627
    },
    {
      "epoch": 9.5379638671875e-05,
      "step": 15627,
      "training_step_time": 0.7620251178741455
    },
    {
      "epoch": 9.53857421875e-05,
      "model_forward_time": 0.11461949348449707,
      "step": 15628
    },
    {
      "epoch": 9.53857421875e-05,
      "step": 15628,
      "training_step_time": 0.3913447856903076
    },
    {
      "epoch": 9.5391845703125e-05,
      "model_forward_time": 0.11455273628234863,
      "step": 15629
    },
    {
      "epoch": 9.5391845703125e-05,
      "step": 15629,
      "training_step_time": 0.39450573921203613
    },
    {
      "epoch": 9.539794921875e-05,
      "grad_norm": 0.18019142746925354,
      "learning_rate": 8.836709461416952e-05,
      "loss": 0.0489,
      "step": 15630
    },
    {
      "epoch": 9.539794921875e-05,
      "model_forward_time": 0.11454248428344727,
      "step": 15630
    },
    {
      "epoch": 9.539794921875e-05,
      "step": 15630,
      "training_step_time": 0.36178016662597656
    },
    {
      "epoch": 9.5404052734375e-05,
      "model_forward_time": 0.1143045425415039,
      "step": 15631
    },
    {
      "epoch": 9.5404052734375e-05,
      "step": 15631,
      "training_step_time": 0.3989439010620117
    },
    {
      "epoch": 9.541015625e-05,
      "model_forward_time": 0.11436963081359863,
      "step": 15632
    },
    {
      "epoch": 9.541015625e-05,
      "step": 15632,
      "training_step_time": 0.4486210346221924
    },
    {
      "epoch": 9.5416259765625e-05,
      "model_forward_time": 0.11480307579040527,
      "step": 15633
    },
    {
      "epoch": 9.5416259765625e-05,
      "step": 15633,
      "training_step_time": 0.5220584869384766
    },
    {
      "epoch": 9.542236328125e-05,
      "model_forward_time": 0.11497950553894043,
      "step": 15634
    },
    {
      "epoch": 9.542236328125e-05,
      "step": 15634,
      "training_step_time": 0.41007566452026367
    },
    {
      "epoch": 9.5428466796875e-05,
      "model_forward_time": 0.11417269706726074,
      "step": 15635
    },
    {
      "epoch": 9.5428466796875e-05,
      "step": 15635,
      "training_step_time": 0.3996150493621826
    },
    {
      "epoch": 9.54345703125e-05,
      "model_forward_time": 0.11523175239562988,
      "step": 15636
    },
    {
      "epoch": 9.54345703125e-05,
      "step": 15636,
      "training_step_time": 0.4326496124267578
    },
    {
      "epoch": 9.5440673828125e-05,
      "model_forward_time": 0.11450409889221191,
      "step": 15637
    },
    {
      "epoch": 9.5440673828125e-05,
      "step": 15637,
      "training_step_time": 0.4251217842102051
    },
    {
      "epoch": 9.544677734375e-05,
      "model_forward_time": 0.11469578742980957,
      "step": 15638
    },
    {
      "epoch": 9.544677734375e-05,
      "step": 15638,
      "training_step_time": 0.3946726322174072
    },
    {
      "epoch": 9.5452880859375e-05,
      "model_forward_time": 0.11484575271606445,
      "step": 15639
    },
    {
      "epoch": 9.5452880859375e-05,
      "step": 15639,
      "training_step_time": 0.6935765743255615
    },
    {
      "epoch": 9.5458984375e-05,
      "grad_norm": 0.24357107281684875,
      "learning_rate": 8.834941765430391e-05,
      "loss": 0.0522,
      "step": 15640
    },
    {
      "epoch": 9.5458984375e-05,
      "model_forward_time": 0.11429834365844727,
      "step": 15640
    },
    {
      "epoch": 9.5458984375e-05,
      "step": 15640,
      "training_step_time": 0.4078388214111328
    },
    {
      "epoch": 9.5465087890625e-05,
      "model_forward_time": 0.11450362205505371,
      "step": 15641
    },
    {
      "epoch": 9.5465087890625e-05,
      "step": 15641,
      "training_step_time": 0.45708274841308594
    },
    {
      "epoch": 9.547119140625e-05,
      "model_forward_time": 0.11397290229797363,
      "step": 15642
    },
    {
      "epoch": 9.547119140625e-05,
      "step": 15642,
      "training_step_time": 0.4122605323791504
    },
    {
      "epoch": 9.5477294921875e-05,
      "model_forward_time": 0.11474752426147461,
      "step": 15643
    },
    {
      "epoch": 9.5477294921875e-05,
      "step": 15643,
      "training_step_time": 0.3862903118133545
    },
    {
      "epoch": 9.54833984375e-05,
      "model_forward_time": 0.11491179466247559,
      "step": 15644
    },
    {
      "epoch": 9.54833984375e-05,
      "step": 15644,
      "training_step_time": 0.363537073135376
    },
    {
      "epoch": 9.5489501953125e-05,
      "model_forward_time": 0.11466360092163086,
      "step": 15645
    },
    {
      "epoch": 9.5489501953125e-05,
      "step": 15645,
      "training_step_time": 0.6829266548156738
    },
    {
      "epoch": 9.549560546875e-05,
      "model_forward_time": 0.11489343643188477,
      "step": 15646
    },
    {
      "epoch": 9.549560546875e-05,
      "step": 15646,
      "training_step_time": 0.42911648750305176
    },
    {
      "epoch": 9.5501708984375e-05,
      "model_forward_time": 0.11412858963012695,
      "step": 15647
    },
    {
      "epoch": 9.5501708984375e-05,
      "step": 15647,
      "training_step_time": 0.433643102645874
    },
    {
      "epoch": 9.55078125e-05,
      "model_forward_time": 0.11411356925964355,
      "step": 15648
    },
    {
      "epoch": 9.55078125e-05,
      "step": 15648,
      "training_step_time": 0.3827803134918213
    },
    {
      "epoch": 9.5513916015625e-05,
      "model_forward_time": 0.11435627937316895,
      "step": 15649
    },
    {
      "epoch": 9.5513916015625e-05,
      "step": 15649,
      "training_step_time": 0.39554500579833984
    },
    {
      "epoch": 9.552001953125e-05,
      "grad_norm": 0.17812862992286682,
      "learning_rate": 8.83317290448978e-05,
      "loss": 0.0578,
      "step": 15650
    },
    {
      "epoch": 9.552001953125e-05,
      "model_forward_time": 0.11464834213256836,
      "step": 15650
    },
    {
      "epoch": 9.552001953125e-05,
      "step": 15650,
      "training_step_time": 0.3900480270385742
    },
    {
      "epoch": 9.5526123046875e-05,
      "model_forward_time": 0.11445331573486328,
      "step": 15651
    },
    {
      "epoch": 9.5526123046875e-05,
      "step": 15651,
      "training_step_time": 0.6217217445373535
    },
    {
      "epoch": 9.55322265625e-05,
      "model_forward_time": 0.11496281623840332,
      "step": 15652
    },
    {
      "epoch": 9.55322265625e-05,
      "step": 15652,
      "training_step_time": 0.38532280921936035
    },
    {
      "epoch": 9.5538330078125e-05,
      "model_forward_time": 0.11458206176757812,
      "step": 15653
    },
    {
      "epoch": 9.5538330078125e-05,
      "step": 15653,
      "training_step_time": 0.4458611011505127
    },
    {
      "epoch": 9.554443359375e-05,
      "model_forward_time": 0.11478590965270996,
      "step": 15654
    },
    {
      "epoch": 9.554443359375e-05,
      "step": 15654,
      "training_step_time": 0.40810704231262207
    },
    {
      "epoch": 9.5550537109375e-05,
      "model_forward_time": 0.11464405059814453,
      "step": 15655
    },
    {
      "epoch": 9.5550537109375e-05,
      "step": 15655,
      "training_step_time": 0.4134230613708496
    },
    {
      "epoch": 9.5556640625e-05,
      "model_forward_time": 0.11485815048217773,
      "step": 15656
    },
    {
      "epoch": 9.5556640625e-05,
      "step": 15656,
      "training_step_time": 0.38950610160827637
    },
    {
      "epoch": 9.5562744140625e-05,
      "model_forward_time": 0.11496472358703613,
      "step": 15657
    },
    {
      "epoch": 9.5562744140625e-05,
      "step": 15657,
      "training_step_time": 0.7025291919708252
    },
    {
      "epoch": 9.556884765625e-05,
      "model_forward_time": 0.11467981338500977,
      "step": 15658
    },
    {
      "epoch": 9.556884765625e-05,
      "step": 15658,
      "training_step_time": 0.41676974296569824
    },
    {
      "epoch": 9.5574951171875e-05,
      "model_forward_time": 0.11382436752319336,
      "step": 15659
    },
    {
      "epoch": 9.5574951171875e-05,
      "step": 15659,
      "training_step_time": 0.4509129524230957
    },
    {
      "epoch": 9.55810546875e-05,
      "grad_norm": 0.1381147801876068,
      "learning_rate": 8.831402879132446e-05,
      "loss": 0.0521,
      "step": 15660
    },
    {
      "epoch": 9.55810546875e-05,
      "model_forward_time": 0.11383748054504395,
      "step": 15660
    },
    {
      "epoch": 9.55810546875e-05,
      "step": 15660,
      "training_step_time": 0.4166538715362549
    },
    {
      "epoch": 9.5587158203125e-05,
      "model_forward_time": 0.11477351188659668,
      "step": 15661
    },
    {
      "epoch": 9.5587158203125e-05,
      "step": 15661,
      "training_step_time": 0.3956596851348877
    },
    {
      "epoch": 9.559326171875e-05,
      "model_forward_time": 0.11512184143066406,
      "step": 15662
    },
    {
      "epoch": 9.559326171875e-05,
      "step": 15662,
      "training_step_time": 0.40317606925964355
    },
    {
      "epoch": 9.5599365234375e-05,
      "model_forward_time": 0.11625885963439941,
      "step": 15663
    },
    {
      "epoch": 9.5599365234375e-05,
      "step": 15663,
      "training_step_time": 0.5418314933776855
    },
    {
      "epoch": 9.560546875e-05,
      "model_forward_time": 0.11472511291503906,
      "step": 15664
    },
    {
      "epoch": 9.560546875e-05,
      "step": 15664,
      "training_step_time": 0.39746522903442383
    },
    {
      "epoch": 9.5611572265625e-05,
      "model_forward_time": 0.11665630340576172,
      "step": 15665
    },
    {
      "epoch": 9.5611572265625e-05,
      "step": 15665,
      "training_step_time": 0.4215116500854492
    },
    {
      "epoch": 9.561767578125e-05,
      "model_forward_time": 0.11478400230407715,
      "step": 15666
    },
    {
      "epoch": 9.561767578125e-05,
      "step": 15666,
      "training_step_time": 0.3907592296600342
    },
    {
      "epoch": 9.5623779296875e-05,
      "model_forward_time": 0.11564898490905762,
      "step": 15667
    },
    {
      "epoch": 9.5623779296875e-05,
      "step": 15667,
      "training_step_time": 0.46302032470703125
    },
    {
      "epoch": 9.56298828125e-05,
      "model_forward_time": 0.11498260498046875,
      "step": 15668
    },
    {
      "epoch": 9.56298828125e-05,
      "step": 15668,
      "training_step_time": 0.397963285446167
    },
    {
      "epoch": 9.5635986328125e-05,
      "model_forward_time": 0.11458826065063477,
      "step": 15669
    },
    {
      "epoch": 9.5635986328125e-05,
      "step": 15669,
      "training_step_time": 0.6058049201965332
    },
    {
      "epoch": 9.564208984375e-05,
      "grad_norm": 0.1550069898366928,
      "learning_rate": 8.829631689896082e-05,
      "loss": 0.0535,
      "step": 15670
    },
    {
      "epoch": 9.564208984375e-05,
      "model_forward_time": 0.11468243598937988,
      "step": 15670
    },
    {
      "epoch": 9.564208984375e-05,
      "step": 15670,
      "training_step_time": 0.38448524475097656
    },
    {
      "epoch": 9.5648193359375e-05,
      "model_forward_time": 0.11495590209960938,
      "step": 15671
    },
    {
      "epoch": 9.5648193359375e-05,
      "step": 15671,
      "training_step_time": 0.36574649810791016
    },
    {
      "epoch": 9.5654296875e-05,
      "model_forward_time": 0.11505460739135742,
      "step": 15672
    },
    {
      "epoch": 9.5654296875e-05,
      "step": 15672,
      "training_step_time": 0.44190120697021484
    },
    {
      "epoch": 9.5660400390625e-05,
      "model_forward_time": 0.11421418190002441,
      "step": 15673
    },
    {
      "epoch": 9.5660400390625e-05,
      "step": 15673,
      "training_step_time": 0.4626500606536865
    },
    {
      "epoch": 9.566650390625e-05,
      "model_forward_time": 0.11446070671081543,
      "step": 15674
    },
    {
      "epoch": 9.566650390625e-05,
      "step": 15674,
      "training_step_time": 0.47510695457458496
    },
    {
      "epoch": 9.5672607421875e-05,
      "model_forward_time": 0.11465239524841309,
      "step": 15675
    },
    {
      "epoch": 9.5672607421875e-05,
      "step": 15675,
      "training_step_time": 0.5984807014465332
    },
    {
      "epoch": 9.56787109375e-05,
      "model_forward_time": 0.11467289924621582,
      "step": 15676
    },
    {
      "epoch": 9.56787109375e-05,
      "step": 15676,
      "training_step_time": 0.38541626930236816
    },
    {
      "epoch": 9.5684814453125e-05,
      "model_forward_time": 0.11444425582885742,
      "step": 15677
    },
    {
      "epoch": 9.5684814453125e-05,
      "step": 15677,
      "training_step_time": 0.3876056671142578
    },
    {
      "epoch": 9.569091796875e-05,
      "model_forward_time": 0.11507797241210938,
      "step": 15678
    },
    {
      "epoch": 9.569091796875e-05,
      "step": 15678,
      "training_step_time": 0.38417530059814453
    },
    {
      "epoch": 9.5697021484375e-05,
      "model_forward_time": 0.11491107940673828,
      "step": 15679
    },
    {
      "epoch": 9.5697021484375e-05,
      "step": 15679,
      "training_step_time": 0.3905322551727295
    },
    {
      "epoch": 9.5703125e-05,
      "grad_norm": 0.18306683003902435,
      "learning_rate": 8.827859337318725e-05,
      "loss": 0.0505,
      "step": 15680
    },
    {
      "epoch": 9.5703125e-05,
      "model_forward_time": 0.11521625518798828,
      "step": 15680
    },
    {
      "epoch": 9.5703125e-05,
      "step": 15680,
      "training_step_time": 0.39679884910583496
    },
    {
      "epoch": 9.5709228515625e-05,
      "model_forward_time": 0.11445140838623047,
      "step": 15681
    },
    {
      "epoch": 9.5709228515625e-05,
      "step": 15681,
      "training_step_time": 0.6985855102539062
    },
    {
      "epoch": 9.571533203125e-05,
      "model_forward_time": 0.11439943313598633,
      "step": 15682
    },
    {
      "epoch": 9.571533203125e-05,
      "step": 15682,
      "training_step_time": 0.4497361183166504
    },
    {
      "epoch": 9.5721435546875e-05,
      "model_forward_time": 0.11531543731689453,
      "step": 15683
    },
    {
      "epoch": 9.5721435546875e-05,
      "step": 15683,
      "training_step_time": 0.38824915885925293
    },
    {
      "epoch": 9.57275390625e-05,
      "model_forward_time": 0.11404037475585938,
      "step": 15684
    },
    {
      "epoch": 9.57275390625e-05,
      "step": 15684,
      "training_step_time": 0.39102959632873535
    },
    {
      "epoch": 9.5733642578125e-05,
      "model_forward_time": 0.11490368843078613,
      "step": 15685
    },
    {
      "epoch": 9.5733642578125e-05,
      "step": 15685,
      "training_step_time": 0.39133691787719727
    },
    {
      "epoch": 9.573974609375e-05,
      "model_forward_time": 0.11458301544189453,
      "step": 15686
    },
    {
      "epoch": 9.573974609375e-05,
      "step": 15686,
      "training_step_time": 0.4429514408111572
    },
    {
      "epoch": 9.5745849609375e-05,
      "model_forward_time": 0.11501789093017578,
      "step": 15687
    },
    {
      "epoch": 9.5745849609375e-05,
      "step": 15687,
      "training_step_time": 0.4284486770629883
    },
    {
      "epoch": 9.5751953125e-05,
      "model_forward_time": 0.11507010459899902,
      "step": 15688
    },
    {
      "epoch": 9.5751953125e-05,
      "step": 15688,
      "training_step_time": 0.3898012638092041
    },
    {
      "epoch": 9.5758056640625e-05,
      "model_forward_time": 0.11565327644348145,
      "step": 15689
    },
    {
      "epoch": 9.5758056640625e-05,
      "step": 15689,
      "training_step_time": 0.3886599540710449
    },
    {
      "epoch": 9.576416015625e-05,
      "grad_norm": 0.13506238162517548,
      "learning_rate": 8.82608582193877e-05,
      "loss": 0.0522,
      "step": 15690
    },
    {
      "epoch": 9.576416015625e-05,
      "model_forward_time": 0.11503100395202637,
      "step": 15690
    },
    {
      "epoch": 9.576416015625e-05,
      "step": 15690,
      "training_step_time": 0.38912177085876465
    },
    {
      "epoch": 9.5770263671875e-05,
      "model_forward_time": 0.11636090278625488,
      "step": 15691
    },
    {
      "epoch": 9.5770263671875e-05,
      "step": 15691,
      "training_step_time": 0.39072465896606445
    },
    {
      "epoch": 9.57763671875e-05,
      "model_forward_time": 0.11512255668640137,
      "step": 15692
    },
    {
      "epoch": 9.57763671875e-05,
      "step": 15692,
      "training_step_time": 0.3803257942199707
    },
    {
      "epoch": 9.5782470703125e-05,
      "model_forward_time": 0.1146395206451416,
      "step": 15693
    },
    {
      "epoch": 9.5782470703125e-05,
      "step": 15693,
      "training_step_time": 0.6213932037353516
    },
    {
      "epoch": 9.578857421875e-05,
      "model_forward_time": 0.11479949951171875,
      "step": 15694
    },
    {
      "epoch": 9.578857421875e-05,
      "step": 15694,
      "training_step_time": 0.41518259048461914
    },
    {
      "epoch": 9.5794677734375e-05,
      "model_forward_time": 0.11520934104919434,
      "step": 15695
    },
    {
      "epoch": 9.5794677734375e-05,
      "step": 15695,
      "training_step_time": 0.39085936546325684
    },
    {
      "epoch": 9.580078125e-05,
      "model_forward_time": 0.11526298522949219,
      "step": 15696
    },
    {
      "epoch": 9.580078125e-05,
      "step": 15696,
      "training_step_time": 0.4267551898956299
    },
    {
      "epoch": 9.5806884765625e-05,
      "model_forward_time": 0.11477231979370117,
      "step": 15697
    },
    {
      "epoch": 9.5806884765625e-05,
      "step": 15697,
      "training_step_time": 0.39129090309143066
    },
    {
      "epoch": 9.581298828125e-05,
      "model_forward_time": 0.11548686027526855,
      "step": 15698
    },
    {
      "epoch": 9.581298828125e-05,
      "step": 15698,
      "training_step_time": 0.3948845863342285
    },
    {
      "epoch": 9.5819091796875e-05,
      "model_forward_time": 0.11433792114257812,
      "step": 15699
    },
    {
      "epoch": 9.5819091796875e-05,
      "step": 15699,
      "training_step_time": 0.8173174858093262
    },
    {
      "epoch": 9.58251953125e-05,
      "grad_norm": 0.1578679233789444,
      "learning_rate": 8.824311144294965e-05,
      "loss": 0.0512,
      "step": 15700
    },
    {
      "epoch": 9.58251953125e-05,
      "model_forward_time": 0.11494779586791992,
      "step": 15700
    },
    {
      "epoch": 9.58251953125e-05,
      "step": 15700,
      "training_step_time": 0.46820878982543945
    },
    {
      "epoch": 9.5831298828125e-05,
      "model_forward_time": 0.11413049697875977,
      "step": 15701
    },
    {
      "epoch": 9.5831298828125e-05,
      "step": 15701,
      "training_step_time": 0.4499027729034424
    },
    {
      "epoch": 9.583740234375e-05,
      "model_forward_time": 0.11351799964904785,
      "step": 15702
    },
    {
      "epoch": 9.583740234375e-05,
      "step": 15702,
      "training_step_time": 0.3907787799835205
    },
    {
      "epoch": 9.5843505859375e-05,
      "model_forward_time": 0.1139993667602539,
      "step": 15703
    },
    {
      "epoch": 9.5843505859375e-05,
      "step": 15703,
      "training_step_time": 0.38652801513671875
    },
    {
      "epoch": 9.5849609375e-05,
      "model_forward_time": 0.11426711082458496,
      "step": 15704
    },
    {
      "epoch": 9.5849609375e-05,
      "step": 15704,
      "training_step_time": 0.3829059600830078
    },
    {
      "epoch": 9.5855712890625e-05,
      "model_forward_time": 0.11498093605041504,
      "step": 15705
    },
    {
      "epoch": 9.5855712890625e-05,
      "step": 15705,
      "training_step_time": 0.5421755313873291
    },
    {
      "epoch": 9.586181640625e-05,
      "model_forward_time": 0.11487293243408203,
      "step": 15706
    },
    {
      "epoch": 9.586181640625e-05,
      "step": 15706,
      "training_step_time": 0.3906371593475342
    },
    {
      "epoch": 9.5867919921875e-05,
      "model_forward_time": 0.11551833152770996,
      "step": 15707
    },
    {
      "epoch": 9.5867919921875e-05,
      "step": 15707,
      "training_step_time": 0.38816261291503906
    },
    {
      "epoch": 9.58740234375e-05,
      "model_forward_time": 0.11473464965820312,
      "step": 15708
    },
    {
      "epoch": 9.58740234375e-05,
      "step": 15708,
      "training_step_time": 0.4764106273651123
    },
    {
      "epoch": 9.5880126953125e-05,
      "model_forward_time": 0.11481308937072754,
      "step": 15709
    },
    {
      "epoch": 9.5880126953125e-05,
      "step": 15709,
      "training_step_time": 0.44943833351135254
    },
    {
      "epoch": 9.588623046875e-05,
      "grad_norm": 0.13498327136039734,
      "learning_rate": 8.822535304926409e-05,
      "loss": 0.0522,
      "step": 15710
    },
    {
      "epoch": 9.588623046875e-05,
      "model_forward_time": 0.11456680297851562,
      "step": 15710
    },
    {
      "epoch": 9.588623046875e-05,
      "step": 15710,
      "training_step_time": 0.46721792221069336
    },
    {
      "epoch": 9.5892333984375e-05,
      "model_forward_time": 0.11499691009521484,
      "step": 15711
    },
    {
      "epoch": 9.5892333984375e-05,
      "step": 15711,
      "training_step_time": 0.5104472637176514
    },
    {
      "epoch": 9.58984375e-05,
      "model_forward_time": 0.11504673957824707,
      "step": 15712
    },
    {
      "epoch": 9.58984375e-05,
      "step": 15712,
      "training_step_time": 0.38948726654052734
    },
    {
      "epoch": 9.5904541015625e-05,
      "model_forward_time": 0.11439394950866699,
      "step": 15713
    },
    {
      "epoch": 9.5904541015625e-05,
      "step": 15713,
      "training_step_time": 0.3652641773223877
    },
    {
      "epoch": 9.591064453125e-05,
      "model_forward_time": 0.1147611141204834,
      "step": 15714
    },
    {
      "epoch": 9.591064453125e-05,
      "step": 15714,
      "training_step_time": 0.479891300201416
    },
    {
      "epoch": 9.5916748046875e-05,
      "model_forward_time": 0.11395001411437988,
      "step": 15715
    },
    {
      "epoch": 9.5916748046875e-05,
      "step": 15715,
      "training_step_time": 0.4333059787750244
    },
    {
      "epoch": 9.59228515625e-05,
      "model_forward_time": 0.11512422561645508,
      "step": 15716
    },
    {
      "epoch": 9.59228515625e-05,
      "step": 15716,
      "training_step_time": 0.38700103759765625
    },
    {
      "epoch": 9.5928955078125e-05,
      "model_forward_time": 0.11510801315307617,
      "step": 15717
    },
    {
      "epoch": 9.5928955078125e-05,
      "step": 15717,
      "training_step_time": 0.474353551864624
    },
    {
      "epoch": 9.593505859375e-05,
      "model_forward_time": 0.11526799201965332,
      "step": 15718
    },
    {
      "epoch": 9.593505859375e-05,
      "step": 15718,
      "training_step_time": 0.38754916191101074
    },
    {
      "epoch": 9.5941162109375e-05,
      "model_forward_time": 0.11448359489440918,
      "step": 15719
    },
    {
      "epoch": 9.5941162109375e-05,
      "step": 15719,
      "training_step_time": 0.390453577041626
    },
    {
      "epoch": 9.5947265625e-05,
      "grad_norm": 0.10632487386465073,
      "learning_rate": 8.820758304372557e-05,
      "loss": 0.0541,
      "step": 15720
    },
    {
      "epoch": 9.5947265625e-05,
      "model_forward_time": 0.11525678634643555,
      "step": 15720
    },
    {
      "epoch": 9.5947265625e-05,
      "step": 15720,
      "training_step_time": 0.399399995803833
    },
    {
      "epoch": 9.5953369140625e-05,
      "model_forward_time": 0.1144418716430664,
      "step": 15721
    },
    {
      "epoch": 9.5953369140625e-05,
      "step": 15721,
      "training_step_time": 0.39644527435302734
    },
    {
      "epoch": 9.595947265625e-05,
      "model_forward_time": 0.11540389060974121,
      "step": 15722
    },
    {
      "epoch": 9.595947265625e-05,
      "step": 15722,
      "training_step_time": 0.46054816246032715
    },
    {
      "epoch": 9.5965576171875e-05,
      "model_forward_time": 0.11467242240905762,
      "step": 15723
    },
    {
      "epoch": 9.5965576171875e-05,
      "step": 15723,
      "training_step_time": 0.6545572280883789
    },
    {
      "epoch": 9.59716796875e-05,
      "model_forward_time": 0.11455774307250977,
      "step": 15724
    },
    {
      "epoch": 9.59716796875e-05,
      "step": 15724,
      "training_step_time": 0.3999300003051758
    },
    {
      "epoch": 9.5977783203125e-05,
      "model_forward_time": 0.11428356170654297,
      "step": 15725
    },
    {
      "epoch": 9.5977783203125e-05,
      "step": 15725,
      "training_step_time": 0.43775439262390137
    },
    {
      "epoch": 9.598388671875e-05,
      "model_forward_time": 0.11400485038757324,
      "step": 15726
    },
    {
      "epoch": 9.598388671875e-05,
      "step": 15726,
      "training_step_time": 0.38687992095947266
    },
    {
      "epoch": 9.5989990234375e-05,
      "model_forward_time": 0.11478400230407715,
      "step": 15727
    },
    {
      "epoch": 9.5989990234375e-05,
      "step": 15727,
      "training_step_time": 0.3668649196624756
    },
    {
      "epoch": 9.599609375e-05,
      "model_forward_time": 0.1146554946899414,
      "step": 15728
    },
    {
      "epoch": 9.599609375e-05,
      "step": 15728,
      "training_step_time": 0.4285757541656494
    },
    {
      "epoch": 9.6002197265625e-05,
      "model_forward_time": 0.11542034149169922,
      "step": 15729
    },
    {
      "epoch": 9.6002197265625e-05,
      "step": 15729,
      "training_step_time": 0.6810333728790283
    },
    {
      "epoch": 9.600830078125e-05,
      "grad_norm": 0.12612715363502502,
      "learning_rate": 8.818980143173213e-05,
      "loss": 0.0486,
      "step": 15730
    },
    {
      "epoch": 9.600830078125e-05,
      "model_forward_time": 0.11514711380004883,
      "step": 15730
    },
    {
      "epoch": 9.600830078125e-05,
      "step": 15730,
      "training_step_time": 0.3865482807159424
    },
    {
      "epoch": 9.6014404296875e-05,
      "model_forward_time": 0.11528491973876953,
      "step": 15731
    },
    {
      "epoch": 9.6014404296875e-05,
      "step": 15731,
      "training_step_time": 0.3837473392486572
    },
    {
      "epoch": 9.60205078125e-05,
      "model_forward_time": 0.11480069160461426,
      "step": 15732
    },
    {
      "epoch": 9.60205078125e-05,
      "step": 15732,
      "training_step_time": 0.3883662223815918
    },
    {
      "epoch": 9.6026611328125e-05,
      "model_forward_time": 0.11435103416442871,
      "step": 15733
    },
    {
      "epoch": 9.6026611328125e-05,
      "step": 15733,
      "training_step_time": 0.3969841003417969
    },
    {
      "epoch": 9.603271484375e-05,
      "model_forward_time": 0.11446666717529297,
      "step": 15734
    },
    {
      "epoch": 9.603271484375e-05,
      "step": 15734,
      "training_step_time": 0.38498520851135254
    },
    {
      "epoch": 9.6038818359375e-05,
      "model_forward_time": 0.11490035057067871,
      "step": 15735
    },
    {
      "epoch": 9.6038818359375e-05,
      "step": 15735,
      "training_step_time": 0.7341885566711426
    },
    {
      "epoch": 9.6044921875e-05,
      "model_forward_time": 0.1150963306427002,
      "step": 15736
    },
    {
      "epoch": 9.6044921875e-05,
      "step": 15736,
      "training_step_time": 0.39262986183166504
    },
    {
      "epoch": 9.6051025390625e-05,
      "model_forward_time": 0.1141817569732666,
      "step": 15737
    },
    {
      "epoch": 9.6051025390625e-05,
      "step": 15737,
      "training_step_time": 0.43201446533203125
    },
    {
      "epoch": 9.605712890625e-05,
      "model_forward_time": 0.11430001258850098,
      "step": 15738
    },
    {
      "epoch": 9.605712890625e-05,
      "step": 15738,
      "training_step_time": 0.43616247177124023
    },
    {
      "epoch": 9.6063232421875e-05,
      "model_forward_time": 0.1144857406616211,
      "step": 15739
    },
    {
      "epoch": 9.6063232421875e-05,
      "step": 15739,
      "training_step_time": 0.4009826183319092
    },
    {
      "epoch": 9.60693359375e-05,
      "grad_norm": 0.1838630735874176,
      "learning_rate": 8.817200821868533e-05,
      "loss": 0.0553,
      "step": 15740
    },
    {
      "epoch": 9.60693359375e-05,
      "model_forward_time": 0.11424875259399414,
      "step": 15740
    },
    {
      "epoch": 9.60693359375e-05,
      "step": 15740,
      "training_step_time": 0.3884902000427246
    },
    {
      "epoch": 9.6075439453125e-05,
      "model_forward_time": 0.11558151245117188,
      "step": 15741
    },
    {
      "epoch": 9.6075439453125e-05,
      "step": 15741,
      "training_step_time": 0.5640349388122559
    },
    {
      "epoch": 9.608154296875e-05,
      "model_forward_time": 0.11538171768188477,
      "step": 15742
    },
    {
      "epoch": 9.608154296875e-05,
      "step": 15742,
      "training_step_time": 0.44368958473205566
    },
    {
      "epoch": 9.6087646484375e-05,
      "model_forward_time": 0.11537051200866699,
      "step": 15743
    },
    {
      "epoch": 9.6087646484375e-05,
      "step": 15743,
      "training_step_time": 0.39425063133239746
    },
    {
      "epoch": 9.609375e-05,
      "model_forward_time": 0.11439347267150879,
      "step": 15744
    },
    {
      "epoch": 9.609375e-05,
      "step": 15744,
      "training_step_time": 0.41431212425231934
    },
    {
      "epoch": 9.6099853515625e-05,
      "model_forward_time": 0.11484289169311523,
      "step": 15745
    },
    {
      "epoch": 9.6099853515625e-05,
      "step": 15745,
      "training_step_time": 0.3880636692047119
    },
    {
      "epoch": 9.610595703125e-05,
      "model_forward_time": 0.11509823799133301,
      "step": 15746
    },
    {
      "epoch": 9.610595703125e-05,
      "step": 15746,
      "training_step_time": 0.3872053623199463
    },
    {
      "epoch": 9.6112060546875e-05,
      "model_forward_time": 0.11455082893371582,
      "step": 15747
    },
    {
      "epoch": 9.6112060546875e-05,
      "step": 15747,
      "training_step_time": 0.4802258014678955
    },
    {
      "epoch": 9.61181640625e-05,
      "model_forward_time": 0.11461377143859863,
      "step": 15748
    },
    {
      "epoch": 9.61181640625e-05,
      "step": 15748,
      "training_step_time": 0.3915126323699951
    },
    {
      "epoch": 9.6124267578125e-05,
      "model_forward_time": 0.11537766456604004,
      "step": 15749
    },
    {
      "epoch": 9.6124267578125e-05,
      "step": 15749,
      "training_step_time": 0.3891003131866455
    },
    {
      "epoch": 9.613037109375e-05,
      "grad_norm": 0.19387272000312805,
      "learning_rate": 8.815420340999033e-05,
      "loss": 0.0534,
      "step": 15750
    },
    {
      "epoch": 9.613037109375e-05,
      "model_forward_time": 0.1150047779083252,
      "step": 15750
    },
    {
      "epoch": 9.613037109375e-05,
      "step": 15750,
      "training_step_time": 0.446868896484375
    },
    {
      "epoch": 9.6136474609375e-05,
      "model_forward_time": 0.11516284942626953,
      "step": 15751
    },
    {
      "epoch": 9.6136474609375e-05,
      "step": 15751,
      "training_step_time": 0.4360027313232422
    },
    {
      "epoch": 9.6142578125e-05,
      "model_forward_time": 0.11505722999572754,
      "step": 15752
    },
    {
      "epoch": 9.6142578125e-05,
      "step": 15752,
      "training_step_time": 0.4838745594024658
    },
    {
      "epoch": 9.6148681640625e-05,
      "model_forward_time": 0.11432409286499023,
      "step": 15753
    },
    {
      "epoch": 9.6148681640625e-05,
      "step": 15753,
      "training_step_time": 0.5199007987976074
    },
    {
      "epoch": 9.615478515625e-05,
      "model_forward_time": 0.11513543128967285,
      "step": 15754
    },
    {
      "epoch": 9.615478515625e-05,
      "step": 15754,
      "training_step_time": 0.3898594379425049
    },
    {
      "epoch": 9.6160888671875e-05,
      "model_forward_time": 0.11432433128356934,
      "step": 15755
    },
    {
      "epoch": 9.6160888671875e-05,
      "step": 15755,
      "training_step_time": 0.3880925178527832
    },
    {
      "epoch": 9.61669921875e-05,
      "model_forward_time": 0.11461663246154785,
      "step": 15756
    },
    {
      "epoch": 9.61669921875e-05,
      "step": 15756,
      "training_step_time": 0.41350364685058594
    },
    {
      "epoch": 9.6173095703125e-05,
      "model_forward_time": 0.11429286003112793,
      "step": 15757
    },
    {
      "epoch": 9.6173095703125e-05,
      "step": 15757,
      "training_step_time": 0.47605371475219727
    },
    {
      "epoch": 9.617919921875e-05,
      "model_forward_time": 0.11504769325256348,
      "step": 15758
    },
    {
      "epoch": 9.617919921875e-05,
      "step": 15758,
      "training_step_time": 0.38196539878845215
    },
    {
      "epoch": 9.6185302734375e-05,
      "model_forward_time": 0.11521148681640625,
      "step": 15759
    },
    {
      "epoch": 9.6185302734375e-05,
      "step": 15759,
      "training_step_time": 0.6588377952575684
    },
    {
      "epoch": 9.619140625e-05,
      "grad_norm": 0.1677774041891098,
      "learning_rate": 8.813638701105573e-05,
      "loss": 0.0533,
      "step": 15760
    },
    {
      "epoch": 9.619140625e-05,
      "model_forward_time": 0.11426997184753418,
      "step": 15760
    },
    {
      "epoch": 9.619140625e-05,
      "step": 15760,
      "training_step_time": 0.399442195892334
    },
    {
      "epoch": 9.6197509765625e-05,
      "model_forward_time": 0.11449027061462402,
      "step": 15761
    },
    {
      "epoch": 9.6197509765625e-05,
      "step": 15761,
      "training_step_time": 0.40245890617370605
    },
    {
      "epoch": 9.620361328125e-05,
      "model_forward_time": 0.11466193199157715,
      "step": 15762
    },
    {
      "epoch": 9.620361328125e-05,
      "step": 15762,
      "training_step_time": 0.38873767852783203
    },
    {
      "epoch": 9.6209716796875e-05,
      "model_forward_time": 0.11480236053466797,
      "step": 15763
    },
    {
      "epoch": 9.6209716796875e-05,
      "step": 15763,
      "training_step_time": 0.38604140281677246
    },
    {
      "epoch": 9.62158203125e-05,
      "model_forward_time": 0.11544013023376465,
      "step": 15764
    },
    {
      "epoch": 9.62158203125e-05,
      "step": 15764,
      "training_step_time": 0.4975547790527344
    },
    {
      "epoch": 9.6221923828125e-05,
      "model_forward_time": 0.11510753631591797,
      "step": 15765
    },
    {
      "epoch": 9.6221923828125e-05,
      "step": 15765,
      "training_step_time": 0.7487311363220215
    },
    {
      "epoch": 9.622802734375e-05,
      "model_forward_time": 0.11443948745727539,
      "step": 15766
    },
    {
      "epoch": 9.622802734375e-05,
      "step": 15766,
      "training_step_time": 0.3828878402709961
    },
    {
      "epoch": 9.6234130859375e-05,
      "model_forward_time": 0.11491823196411133,
      "step": 15767
    },
    {
      "epoch": 9.6234130859375e-05,
      "step": 15767,
      "training_step_time": 0.3788325786590576
    },
    {
      "epoch": 9.6240234375e-05,
      "model_forward_time": 0.11452555656433105,
      "step": 15768
    },
    {
      "epoch": 9.6240234375e-05,
      "step": 15768,
      "training_step_time": 0.4414947032928467
    },
    {
      "epoch": 9.6246337890625e-05,
      "model_forward_time": 0.11545014381408691,
      "step": 15769
    },
    {
      "epoch": 9.6246337890625e-05,
      "step": 15769,
      "training_step_time": 0.40757298469543457
    },
    {
      "epoch": 9.625244140625e-05,
      "grad_norm": 0.19941557943820953,
      "learning_rate": 8.81185590272937e-05,
      "loss": 0.0572,
      "step": 15770
    },
    {
      "epoch": 9.625244140625e-05,
      "model_forward_time": 0.11598443984985352,
      "step": 15770
    },
    {
      "epoch": 9.625244140625e-05,
      "step": 15770,
      "training_step_time": 0.412524938583374
    },
    {
      "epoch": 9.6258544921875e-05,
      "model_forward_time": 0.11493158340454102,
      "step": 15771
    },
    {
      "epoch": 9.6258544921875e-05,
      "step": 15771,
      "training_step_time": 0.5094394683837891
    },
    {
      "epoch": 9.62646484375e-05,
      "model_forward_time": 0.11596822738647461,
      "step": 15772
    },
    {
      "epoch": 9.62646484375e-05,
      "step": 15772,
      "training_step_time": 0.3834075927734375
    },
    {
      "epoch": 9.6270751953125e-05,
      "model_forward_time": 0.11512899398803711,
      "step": 15773
    },
    {
      "epoch": 9.6270751953125e-05,
      "step": 15773,
      "training_step_time": 0.3824784755706787
    },
    {
      "epoch": 9.627685546875e-05,
      "model_forward_time": 0.11549854278564453,
      "step": 15774
    },
    {
      "epoch": 9.627685546875e-05,
      "step": 15774,
      "training_step_time": 0.4154970645904541
    },
    {
      "epoch": 9.6282958984375e-05,
      "model_forward_time": 0.11503934860229492,
      "step": 15775
    },
    {
      "epoch": 9.6282958984375e-05,
      "step": 15775,
      "training_step_time": 0.3954465389251709
    },
    {
      "epoch": 9.62890625e-05,
      "model_forward_time": 0.11511516571044922,
      "step": 15776
    },
    {
      "epoch": 9.62890625e-05,
      "step": 15776,
      "training_step_time": 0.4103529453277588
    },
    {
      "epoch": 9.6295166015625e-05,
      "model_forward_time": 0.1150217056274414,
      "step": 15777
    },
    {
      "epoch": 9.6295166015625e-05,
      "step": 15777,
      "training_step_time": 0.9158530235290527
    },
    {
      "epoch": 9.630126953125e-05,
      "model_forward_time": 0.11427450180053711,
      "step": 15778
    },
    {
      "epoch": 9.630126953125e-05,
      "step": 15778,
      "training_step_time": 0.4194798469543457
    },
    {
      "epoch": 9.6307373046875e-05,
      "model_forward_time": 0.1145472526550293,
      "step": 15779
    },
    {
      "epoch": 9.6307373046875e-05,
      "step": 15779,
      "training_step_time": 0.4158656597137451
    },
    {
      "epoch": 9.63134765625e-05,
      "grad_norm": 0.18845930695533752,
      "learning_rate": 8.810071946411989e-05,
      "loss": 0.0564,
      "step": 15780
    },
    {
      "epoch": 9.63134765625e-05,
      "model_forward_time": 0.11406183242797852,
      "step": 15780
    },
    {
      "epoch": 9.63134765625e-05,
      "step": 15780,
      "training_step_time": 0.3803696632385254
    },
    {
      "epoch": 9.6319580078125e-05,
      "model_forward_time": 0.11448502540588379,
      "step": 15781
    },
    {
      "epoch": 9.6319580078125e-05,
      "step": 15781,
      "training_step_time": 0.3855268955230713
    },
    {
      "epoch": 9.632568359375e-05,
      "model_forward_time": 0.11458849906921387,
      "step": 15782
    },
    {
      "epoch": 9.632568359375e-05,
      "step": 15782,
      "training_step_time": 0.41579508781433105
    },
    {
      "epoch": 9.6331787109375e-05,
      "model_forward_time": 0.11492562294006348,
      "step": 15783
    },
    {
      "epoch": 9.6331787109375e-05,
      "step": 15783,
      "training_step_time": 0.7301743030548096
    },
    {
      "epoch": 9.6337890625e-05,
      "model_forward_time": 0.11598372459411621,
      "step": 15784
    },
    {
      "epoch": 9.6337890625e-05,
      "step": 15784,
      "training_step_time": 0.3931765556335449
    },
    {
      "epoch": 9.6343994140625e-05,
      "model_forward_time": 0.1145474910736084,
      "step": 15785
    },
    {
      "epoch": 9.6343994140625e-05,
      "step": 15785,
      "training_step_time": 0.3926115036010742
    },
    {
      "epoch": 9.635009765625e-05,
      "model_forward_time": 0.11525821685791016,
      "step": 15786
    },
    {
      "epoch": 9.635009765625e-05,
      "step": 15786,
      "training_step_time": 0.3877730369567871
    },
    {
      "epoch": 9.6356201171875e-05,
      "model_forward_time": 0.11504626274108887,
      "step": 15787
    },
    {
      "epoch": 9.6356201171875e-05,
      "step": 15787,
      "training_step_time": 0.3878045082092285
    },
    {
      "epoch": 9.63623046875e-05,
      "model_forward_time": 0.1144256591796875,
      "step": 15788
    },
    {
      "epoch": 9.63623046875e-05,
      "step": 15788,
      "training_step_time": 0.3827192783355713
    },
    {
      "epoch": 9.6368408203125e-05,
      "model_forward_time": 0.11442875862121582,
      "step": 15789
    },
    {
      "epoch": 9.6368408203125e-05,
      "step": 15789,
      "training_step_time": 0.6414687633514404
    },
    {
      "epoch": 9.637451171875e-05,
      "grad_norm": 0.10866913944482803,
      "learning_rate": 8.80828683269535e-05,
      "loss": 0.0518,
      "step": 15790
    },
    {
      "epoch": 9.637451171875e-05,
      "model_forward_time": 0.11547732353210449,
      "step": 15790
    },
    {
      "epoch": 9.637451171875e-05,
      "step": 15790,
      "training_step_time": 0.3835930824279785
    },
    {
      "epoch": 9.6380615234375e-05,
      "model_forward_time": 0.11570477485656738,
      "step": 15791
    },
    {
      "epoch": 9.6380615234375e-05,
      "step": 15791,
      "training_step_time": 0.3761930465698242
    },
    {
      "epoch": 9.638671875e-05,
      "model_forward_time": 0.11503124237060547,
      "step": 15792
    },
    {
      "epoch": 9.638671875e-05,
      "step": 15792,
      "training_step_time": 0.39684224128723145
    },
    {
      "epoch": 9.6392822265625e-05,
      "model_forward_time": 0.1151742935180664,
      "step": 15793
    },
    {
      "epoch": 9.6392822265625e-05,
      "step": 15793,
      "training_step_time": 0.4253532886505127
    },
    {
      "epoch": 9.639892578125e-05,
      "model_forward_time": 0.11476826667785645,
      "step": 15794
    },
    {
      "epoch": 9.639892578125e-05,
      "step": 15794,
      "training_step_time": 0.4003632068634033
    },
    {
      "epoch": 9.6405029296875e-05,
      "model_forward_time": 0.11521005630493164,
      "step": 15795
    },
    {
      "epoch": 9.6405029296875e-05,
      "step": 15795,
      "training_step_time": 0.7422440052032471
    },
    {
      "epoch": 9.64111328125e-05,
      "model_forward_time": 0.11535191535949707,
      "step": 15796
    },
    {
      "epoch": 9.64111328125e-05,
      "step": 15796,
      "training_step_time": 0.36583900451660156
    },
    {
      "epoch": 9.6417236328125e-05,
      "model_forward_time": 0.11454200744628906,
      "step": 15797
    },
    {
      "epoch": 9.6417236328125e-05,
      "step": 15797,
      "training_step_time": 0.48009657859802246
    },
    {
      "epoch": 9.642333984375e-05,
      "model_forward_time": 0.11429452896118164,
      "step": 15798
    },
    {
      "epoch": 9.642333984375e-05,
      "step": 15798,
      "training_step_time": 0.43006205558776855
    },
    {
      "epoch": 9.6429443359375e-05,
      "model_forward_time": 0.11485695838928223,
      "step": 15799
    },
    {
      "epoch": 9.6429443359375e-05,
      "step": 15799,
      "training_step_time": 0.39077305793762207
    },
    {
      "epoch": 9.6435546875e-05,
      "grad_norm": 0.15419040620326996,
      "learning_rate": 8.806500562121723e-05,
      "loss": 0.0538,
      "step": 15800
    },
    {
      "epoch": 9.6435546875e-05,
      "model_forward_time": 0.11412739753723145,
      "step": 15800
    },
    {
      "epoch": 9.6435546875e-05,
      "step": 15800,
      "training_step_time": 0.3945729732513428
    },
    {
      "epoch": 9.6441650390625e-05,
      "model_forward_time": 0.11429452896118164,
      "step": 15801
    },
    {
      "epoch": 9.6441650390625e-05,
      "step": 15801,
      "training_step_time": 0.5396161079406738
    },
    {
      "epoch": 9.644775390625e-05,
      "model_forward_time": 0.11522507667541504,
      "step": 15802
    },
    {
      "epoch": 9.644775390625e-05,
      "step": 15802,
      "training_step_time": 0.3947746753692627
    },
    {
      "epoch": 9.6453857421875e-05,
      "model_forward_time": 0.11452245712280273,
      "step": 15803
    },
    {
      "epoch": 9.6453857421875e-05,
      "step": 15803,
      "training_step_time": 0.38431501388549805
    },
    {
      "epoch": 9.64599609375e-05,
      "model_forward_time": 0.11429381370544434,
      "step": 15804
    },
    {
      "epoch": 9.64599609375e-05,
      "step": 15804,
      "training_step_time": 0.3961930274963379
    },
    {
      "epoch": 9.6466064453125e-05,
      "model_forward_time": 0.1155385971069336,
      "step": 15805
    },
    {
      "epoch": 9.6466064453125e-05,
      "step": 15805,
      "training_step_time": 0.3944668769836426
    },
    {
      "epoch": 9.647216796875e-05,
      "model_forward_time": 0.11504292488098145,
      "step": 15806
    },
    {
      "epoch": 9.647216796875e-05,
      "step": 15806,
      "training_step_time": 0.41888856887817383
    },
    {
      "epoch": 9.6478271484375e-05,
      "model_forward_time": 0.1150355339050293,
      "step": 15807
    },
    {
      "epoch": 9.6478271484375e-05,
      "step": 15807,
      "training_step_time": 0.6461467742919922
    },
    {
      "epoch": 9.6484375e-05,
      "model_forward_time": 0.11473417282104492,
      "step": 15808
    },
    {
      "epoch": 9.6484375e-05,
      "step": 15808,
      "training_step_time": 0.37950658798217773
    },
    {
      "epoch": 9.6490478515625e-05,
      "model_forward_time": 0.11530637741088867,
      "step": 15809
    },
    {
      "epoch": 9.6490478515625e-05,
      "step": 15809,
      "training_step_time": 0.43622732162475586
    },
    {
      "epoch": 9.649658203125e-05,
      "grad_norm": 0.15865406394004822,
      "learning_rate": 8.804713135233731e-05,
      "loss": 0.0494,
      "step": 15810
    },
    {
      "epoch": 9.649658203125e-05,
      "model_forward_time": 0.11455607414245605,
      "step": 15810
    },
    {
      "epoch": 9.649658203125e-05,
      "step": 15810,
      "training_step_time": 0.4205286502838135
    },
    {
      "epoch": 9.6502685546875e-05,
      "model_forward_time": 0.11411905288696289,
      "step": 15811
    },
    {
      "epoch": 9.6502685546875e-05,
      "step": 15811,
      "training_step_time": 0.4668741226196289
    },
    {
      "epoch": 9.65087890625e-05,
      "model_forward_time": 0.11459183692932129,
      "step": 15812
    },
    {
      "epoch": 9.65087890625e-05,
      "step": 15812,
      "training_step_time": 0.40445470809936523
    },
    {
      "epoch": 9.6514892578125e-05,
      "model_forward_time": 0.11449170112609863,
      "step": 15813
    },
    {
      "epoch": 9.6514892578125e-05,
      "step": 15813,
      "training_step_time": 0.6635479927062988
    },
    {
      "epoch": 9.652099609375e-05,
      "model_forward_time": 0.11392760276794434,
      "step": 15814
    },
    {
      "epoch": 9.652099609375e-05,
      "step": 15814,
      "training_step_time": 0.40826964378356934
    },
    {
      "epoch": 9.6527099609375e-05,
      "model_forward_time": 0.1144108772277832,
      "step": 15815
    },
    {
      "epoch": 9.6527099609375e-05,
      "step": 15815,
      "training_step_time": 0.40257692337036133
    },
    {
      "epoch": 9.6533203125e-05,
      "model_forward_time": 0.11396169662475586,
      "step": 15816
    },
    {
      "epoch": 9.6533203125e-05,
      "step": 15816,
      "training_step_time": 0.39324188232421875
    },
    {
      "epoch": 9.6539306640625e-05,
      "model_forward_time": 0.11476349830627441,
      "step": 15817
    },
    {
      "epoch": 9.6539306640625e-05,
      "step": 15817,
      "training_step_time": 0.392467737197876
    },
    {
      "epoch": 9.654541015625e-05,
      "model_forward_time": 0.1148369312286377,
      "step": 15818
    },
    {
      "epoch": 9.654541015625e-05,
      "step": 15818,
      "training_step_time": 0.40347838401794434
    },
    {
      "epoch": 9.6551513671875e-05,
      "model_forward_time": 0.1145167350769043,
      "step": 15819
    },
    {
      "epoch": 9.6551513671875e-05,
      "step": 15819,
      "training_step_time": 0.7284743785858154
    },
    {
      "epoch": 9.65576171875e-05,
      "grad_norm": 0.20326761901378632,
      "learning_rate": 8.802924552574345e-05,
      "loss": 0.0563,
      "step": 15820
    },
    {
      "epoch": 9.65576171875e-05,
      "model_forward_time": 0.1154630184173584,
      "step": 15820
    },
    {
      "epoch": 9.65576171875e-05,
      "step": 15820,
      "training_step_time": 0.3910067081451416
    },
    {
      "epoch": 9.6563720703125e-05,
      "model_forward_time": 0.11438703536987305,
      "step": 15821
    },
    {
      "epoch": 9.6563720703125e-05,
      "step": 15821,
      "training_step_time": 0.39585065841674805
    },
    {
      "epoch": 9.656982421875e-05,
      "model_forward_time": 0.11476945877075195,
      "step": 15822
    },
    {
      "epoch": 9.656982421875e-05,
      "step": 15822,
      "training_step_time": 0.44338202476501465
    },
    {
      "epoch": 9.6575927734375e-05,
      "model_forward_time": 0.11392354965209961,
      "step": 15823
    },
    {
      "epoch": 9.6575927734375e-05,
      "step": 15823,
      "training_step_time": 0.42424559593200684
    },
    {
      "epoch": 9.658203125e-05,
      "model_forward_time": 0.11639261245727539,
      "step": 15824
    },
    {
      "epoch": 9.658203125e-05,
      "step": 15824,
      "training_step_time": 0.5013344287872314
    },
    {
      "epoch": 9.6588134765625e-05,
      "model_forward_time": 0.11539459228515625,
      "step": 15825
    },
    {
      "epoch": 9.6588134765625e-05,
      "step": 15825,
      "training_step_time": 0.43987178802490234
    },
    {
      "epoch": 9.659423828125e-05,
      "model_forward_time": 0.11509537696838379,
      "step": 15826
    },
    {
      "epoch": 9.659423828125e-05,
      "step": 15826,
      "training_step_time": 0.47568321228027344
    },
    {
      "epoch": 9.6600341796875e-05,
      "model_forward_time": 0.1148674488067627,
      "step": 15827
    },
    {
      "epoch": 9.6600341796875e-05,
      "step": 15827,
      "training_step_time": 0.4318661689758301
    },
    {
      "epoch": 9.66064453125e-05,
      "model_forward_time": 0.11471772193908691,
      "step": 15828
    },
    {
      "epoch": 9.66064453125e-05,
      "step": 15828,
      "training_step_time": 0.44038891792297363
    },
    {
      "epoch": 9.6612548828125e-05,
      "model_forward_time": 0.11512017250061035,
      "step": 15829
    },
    {
      "epoch": 9.6612548828125e-05,
      "step": 15829,
      "training_step_time": 0.3948078155517578
    },
    {
      "epoch": 9.661865234375e-05,
      "grad_norm": 0.15039850771427155,
      "learning_rate": 8.801134814686891e-05,
      "loss": 0.0528,
      "step": 15830
    },
    {
      "epoch": 9.661865234375e-05,
      "model_forward_time": 0.11521482467651367,
      "step": 15830
    },
    {
      "epoch": 9.661865234375e-05,
      "step": 15830,
      "training_step_time": 0.39429402351379395
    },
    {
      "epoch": 9.6624755859375e-05,
      "model_forward_time": 0.11477899551391602,
      "step": 15831
    },
    {
      "epoch": 9.6624755859375e-05,
      "step": 15831,
      "training_step_time": 0.4158627986907959
    },
    {
      "epoch": 9.6630859375e-05,
      "model_forward_time": 0.11510038375854492,
      "step": 15832
    },
    {
      "epoch": 9.6630859375e-05,
      "step": 15832,
      "training_step_time": 0.5692203044891357
    },
    {
      "epoch": 9.6636962890625e-05,
      "model_forward_time": 0.1144113540649414,
      "step": 15833
    },
    {
      "epoch": 9.6636962890625e-05,
      "step": 15833,
      "training_step_time": 0.49980711936950684
    },
    {
      "epoch": 9.664306640625e-05,
      "model_forward_time": 0.11463642120361328,
      "step": 15834
    },
    {
      "epoch": 9.664306640625e-05,
      "step": 15834,
      "training_step_time": 0.39684009552001953
    },
    {
      "epoch": 9.6649169921875e-05,
      "model_forward_time": 0.11528348922729492,
      "step": 15835
    },
    {
      "epoch": 9.6649169921875e-05,
      "step": 15835,
      "training_step_time": 0.4266223907470703
    },
    {
      "epoch": 9.66552734375e-05,
      "model_forward_time": 0.11393857002258301,
      "step": 15836
    },
    {
      "epoch": 9.66552734375e-05,
      "step": 15836,
      "training_step_time": 0.41151928901672363
    },
    {
      "epoch": 9.6661376953125e-05,
      "model_forward_time": 0.11444282531738281,
      "step": 15837
    },
    {
      "epoch": 9.6661376953125e-05,
      "step": 15837,
      "training_step_time": 0.40515947341918945
    },
    {
      "epoch": 9.666748046875e-05,
      "model_forward_time": 0.11543941497802734,
      "step": 15838
    },
    {
      "epoch": 9.666748046875e-05,
      "step": 15838,
      "training_step_time": 0.45774078369140625
    },
    {
      "epoch": 9.6673583984375e-05,
      "model_forward_time": 0.11557435989379883,
      "step": 15839
    },
    {
      "epoch": 9.6673583984375e-05,
      "step": 15839,
      "training_step_time": 0.45302534103393555
    },
    {
      "epoch": 9.66796875e-05,
      "grad_norm": 0.11761152744293213,
      "learning_rate": 8.799343922115044e-05,
      "loss": 0.0471,
      "step": 15840
    },
    {
      "epoch": 9.66796875e-05,
      "model_forward_time": 0.11551594734191895,
      "step": 15840
    },
    {
      "epoch": 9.66796875e-05,
      "step": 15840,
      "training_step_time": 0.47440505027770996
    },
    {
      "epoch": 9.6685791015625e-05,
      "model_forward_time": 0.11493301391601562,
      "step": 15841
    },
    {
      "epoch": 9.6685791015625e-05,
      "step": 15841,
      "training_step_time": 0.45453834533691406
    },
    {
      "epoch": 9.669189453125e-05,
      "model_forward_time": 0.11518383026123047,
      "step": 15842
    },
    {
      "epoch": 9.669189453125e-05,
      "step": 15842,
      "training_step_time": 0.3851304054260254
    },
    {
      "epoch": 9.6697998046875e-05,
      "model_forward_time": 0.11474466323852539,
      "step": 15843
    },
    {
      "epoch": 9.6697998046875e-05,
      "step": 15843,
      "training_step_time": 0.5303881168365479
    },
    {
      "epoch": 9.67041015625e-05,
      "model_forward_time": 0.11571979522705078,
      "step": 15844
    },
    {
      "epoch": 9.67041015625e-05,
      "step": 15844,
      "training_step_time": 0.41866254806518555
    },
    {
      "epoch": 9.6710205078125e-05,
      "model_forward_time": 0.11476635932922363,
      "step": 15845
    },
    {
      "epoch": 9.6710205078125e-05,
      "step": 15845,
      "training_step_time": 0.4861774444580078
    },
    {
      "epoch": 9.671630859375e-05,
      "model_forward_time": 0.11473441123962402,
      "step": 15846
    },
    {
      "epoch": 9.671630859375e-05,
      "step": 15846,
      "training_step_time": 0.44169044494628906
    },
    {
      "epoch": 9.6722412109375e-05,
      "model_forward_time": 0.1148231029510498,
      "step": 15847
    },
    {
      "epoch": 9.6722412109375e-05,
      "step": 15847,
      "training_step_time": 0.45420169830322266
    },
    {
      "epoch": 9.6728515625e-05,
      "model_forward_time": 0.11443758010864258,
      "step": 15848
    },
    {
      "epoch": 9.6728515625e-05,
      "step": 15848,
      "training_step_time": 0.396716833114624
    },
    {
      "epoch": 9.6734619140625e-05,
      "model_forward_time": 0.11416745185852051,
      "step": 15849
    },
    {
      "epoch": 9.6734619140625e-05,
      "step": 15849,
      "training_step_time": 0.39187097549438477
    },
    {
      "epoch": 9.674072265625e-05,
      "grad_norm": 0.14185266196727753,
      "learning_rate": 8.797551875402827e-05,
      "loss": 0.0531,
      "step": 15850
    },
    {
      "epoch": 9.674072265625e-05,
      "model_forward_time": 0.11565399169921875,
      "step": 15850
    },
    {
      "epoch": 9.674072265625e-05,
      "step": 15850,
      "training_step_time": 0.45096373558044434
    },
    {
      "epoch": 9.6746826171875e-05,
      "model_forward_time": 0.11486482620239258,
      "step": 15851
    },
    {
      "epoch": 9.6746826171875e-05,
      "step": 15851,
      "training_step_time": 0.400676965713501
    },
    {
      "epoch": 9.67529296875e-05,
      "model_forward_time": 0.11636829376220703,
      "step": 15852
    },
    {
      "epoch": 9.67529296875e-05,
      "step": 15852,
      "training_step_time": 0.5438418388366699
    },
    {
      "epoch": 9.6759033203125e-05,
      "model_forward_time": 0.11499428749084473,
      "step": 15853
    },
    {
      "epoch": 9.6759033203125e-05,
      "step": 15853,
      "training_step_time": 0.4211442470550537
    },
    {
      "epoch": 9.676513671875e-05,
      "model_forward_time": 0.11554265022277832,
      "step": 15854
    },
    {
      "epoch": 9.676513671875e-05,
      "step": 15854,
      "training_step_time": 0.408581018447876
    },
    {
      "epoch": 9.6771240234375e-05,
      "model_forward_time": 0.11473345756530762,
      "step": 15855
    },
    {
      "epoch": 9.6771240234375e-05,
      "step": 15855,
      "training_step_time": 0.5105054378509521
    },
    {
      "epoch": 9.677734375e-05,
      "model_forward_time": 0.11421918869018555,
      "step": 15856
    },
    {
      "epoch": 9.677734375e-05,
      "step": 15856,
      "training_step_time": 0.38512706756591797
    },
    {
      "epoch": 9.6783447265625e-05,
      "model_forward_time": 0.11468982696533203,
      "step": 15857
    },
    {
      "epoch": 9.6783447265625e-05,
      "step": 15857,
      "training_step_time": 0.38846325874328613
    },
    {
      "epoch": 9.678955078125e-05,
      "model_forward_time": 0.11550307273864746,
      "step": 15858
    },
    {
      "epoch": 9.678955078125e-05,
      "step": 15858,
      "training_step_time": 0.39785218238830566
    },
    {
      "epoch": 9.6795654296875e-05,
      "model_forward_time": 0.1146855354309082,
      "step": 15859
    },
    {
      "epoch": 9.6795654296875e-05,
      "step": 15859,
      "training_step_time": 0.39545726776123047
    },
    {
      "epoch": 9.68017578125e-05,
      "grad_norm": 0.1689329296350479,
      "learning_rate": 8.795758675094621e-05,
      "loss": 0.0482,
      "step": 15860
    },
    {
      "epoch": 9.68017578125e-05,
      "model_forward_time": 0.11557555198669434,
      "step": 15860
    },
    {
      "epoch": 9.68017578125e-05,
      "step": 15860,
      "training_step_time": 0.43158721923828125
    },
    {
      "epoch": 9.6807861328125e-05,
      "model_forward_time": 0.11540818214416504,
      "step": 15861
    },
    {
      "epoch": 9.6807861328125e-05,
      "step": 15861,
      "training_step_time": 0.6471936702728271
    },
    {
      "epoch": 9.681396484375e-05,
      "model_forward_time": 0.11476707458496094,
      "step": 15862
    },
    {
      "epoch": 9.681396484375e-05,
      "step": 15862,
      "training_step_time": 0.3843231201171875
    },
    {
      "epoch": 9.6820068359375e-05,
      "model_forward_time": 0.11757278442382812,
      "step": 15863
    },
    {
      "epoch": 9.6820068359375e-05,
      "step": 15863,
      "training_step_time": 0.39764857292175293
    },
    {
      "epoch": 9.6826171875e-05,
      "model_forward_time": 0.11488509178161621,
      "step": 15864
    },
    {
      "epoch": 9.6826171875e-05,
      "step": 15864,
      "training_step_time": 0.46746397018432617
    },
    {
      "epoch": 9.6832275390625e-05,
      "model_forward_time": 0.11460185050964355,
      "step": 15865
    },
    {
      "epoch": 9.6832275390625e-05,
      "step": 15865,
      "training_step_time": 0.3657662868499756
    },
    {
      "epoch": 9.683837890625e-05,
      "model_forward_time": 0.1145944595336914,
      "step": 15866
    },
    {
      "epoch": 9.683837890625e-05,
      "step": 15866,
      "training_step_time": 0.45992588996887207
    },
    {
      "epoch": 9.6844482421875e-05,
      "model_forward_time": 0.11516571044921875,
      "step": 15867
    },
    {
      "epoch": 9.6844482421875e-05,
      "step": 15867,
      "training_step_time": 0.4975166320800781
    },
    {
      "epoch": 9.68505859375e-05,
      "model_forward_time": 0.11553192138671875,
      "step": 15868
    },
    {
      "epoch": 9.68505859375e-05,
      "step": 15868,
      "training_step_time": 0.3931283950805664
    },
    {
      "epoch": 9.6856689453125e-05,
      "model_forward_time": 0.11504983901977539,
      "step": 15869
    },
    {
      "epoch": 9.6856689453125e-05,
      "step": 15869,
      "training_step_time": 0.3881356716156006
    },
    {
      "epoch": 9.686279296875e-05,
      "grad_norm": 0.17971128225326538,
      "learning_rate": 8.79396432173515e-05,
      "loss": 0.0508,
      "step": 15870
    },
    {
      "epoch": 9.686279296875e-05,
      "model_forward_time": 0.1147620677947998,
      "step": 15870
    },
    {
      "epoch": 9.686279296875e-05,
      "step": 15870,
      "training_step_time": 0.3918583393096924
    },
    {
      "epoch": 9.6868896484375e-05,
      "model_forward_time": 0.11533904075622559,
      "step": 15871
    },
    {
      "epoch": 9.6868896484375e-05,
      "step": 15871,
      "training_step_time": 0.3869779109954834
    },
    {
      "epoch": 9.6875e-05,
      "model_forward_time": 0.11509108543395996,
      "step": 15872
    },
    {
      "epoch": 9.6875e-05,
      "step": 15872,
      "training_step_time": 0.39656972885131836
    },
    {
      "epoch": 9.6881103515625e-05,
      "model_forward_time": 0.11478900909423828,
      "step": 15873
    },
    {
      "epoch": 9.6881103515625e-05,
      "step": 15873,
      "training_step_time": 0.7686688899993896
    },
    {
      "epoch": 9.688720703125e-05,
      "model_forward_time": 0.11499691009521484,
      "step": 15874
    },
    {
      "epoch": 9.688720703125e-05,
      "step": 15874,
      "training_step_time": 0.427126407623291
    },
    {
      "epoch": 9.6893310546875e-05,
      "model_forward_time": 0.11437416076660156,
      "step": 15875
    },
    {
      "epoch": 9.6893310546875e-05,
      "step": 15875,
      "training_step_time": 0.4228377342224121
    },
    {
      "epoch": 9.68994140625e-05,
      "model_forward_time": 0.11440157890319824,
      "step": 15876
    },
    {
      "epoch": 9.68994140625e-05,
      "step": 15876,
      "training_step_time": 0.39862990379333496
    },
    {
      "epoch": 9.6905517578125e-05,
      "model_forward_time": 0.11396169662475586,
      "step": 15877
    },
    {
      "epoch": 9.6905517578125e-05,
      "step": 15877,
      "training_step_time": 0.3950643539428711
    },
    {
      "epoch": 9.691162109375e-05,
      "model_forward_time": 0.11452937126159668,
      "step": 15878
    },
    {
      "epoch": 9.691162109375e-05,
      "step": 15878,
      "training_step_time": 0.46529078483581543
    },
    {
      "epoch": 9.6917724609375e-05,
      "model_forward_time": 0.11791872978210449,
      "step": 15879
    },
    {
      "epoch": 9.6917724609375e-05,
      "step": 15879,
      "training_step_time": 0.40526819229125977
    },
    {
      "epoch": 9.6923828125e-05,
      "grad_norm": 0.17813031375408173,
      "learning_rate": 8.792168815869493e-05,
      "loss": 0.0597,
      "step": 15880
    },
    {
      "epoch": 9.6923828125e-05,
      "model_forward_time": 0.11504030227661133,
      "step": 15880
    },
    {
      "epoch": 9.6923828125e-05,
      "step": 15880,
      "training_step_time": 0.4191470146179199
    },
    {
      "epoch": 9.6929931640625e-05,
      "model_forward_time": 0.11475563049316406,
      "step": 15881
    },
    {
      "epoch": 9.6929931640625e-05,
      "step": 15881,
      "training_step_time": 0.4492502212524414
    },
    {
      "epoch": 9.693603515625e-05,
      "model_forward_time": 0.11479353904724121,
      "step": 15882
    },
    {
      "epoch": 9.693603515625e-05,
      "step": 15882,
      "training_step_time": 0.39376378059387207
    },
    {
      "epoch": 9.6942138671875e-05,
      "model_forward_time": 0.11543822288513184,
      "step": 15883
    },
    {
      "epoch": 9.6942138671875e-05,
      "step": 15883,
      "training_step_time": 0.39543938636779785
    },
    {
      "epoch": 9.69482421875e-05,
      "model_forward_time": 0.11525440216064453,
      "step": 15884
    },
    {
      "epoch": 9.69482421875e-05,
      "step": 15884,
      "training_step_time": 0.3921048641204834
    },
    {
      "epoch": 9.6954345703125e-05,
      "model_forward_time": 0.11493372917175293,
      "step": 15885
    },
    {
      "epoch": 9.6954345703125e-05,
      "step": 15885,
      "training_step_time": 0.6502745151519775
    },
    {
      "epoch": 9.696044921875e-05,
      "model_forward_time": 0.11449575424194336,
      "step": 15886
    },
    {
      "epoch": 9.696044921875e-05,
      "step": 15886,
      "training_step_time": 0.3955800533294678
    },
    {
      "epoch": 9.6966552734375e-05,
      "model_forward_time": 0.11490368843078613,
      "step": 15887
    },
    {
      "epoch": 9.6966552734375e-05,
      "step": 15887,
      "training_step_time": 0.38674283027648926
    },
    {
      "epoch": 9.697265625e-05,
      "model_forward_time": 0.11459040641784668,
      "step": 15888
    },
    {
      "epoch": 9.697265625e-05,
      "step": 15888,
      "training_step_time": 0.43370509147644043
    },
    {
      "epoch": 9.6978759765625e-05,
      "model_forward_time": 0.11549782752990723,
      "step": 15889
    },
    {
      "epoch": 9.6978759765625e-05,
      "step": 15889,
      "training_step_time": 0.47228527069091797
    },
    {
      "epoch": 9.698486328125e-05,
      "grad_norm": 0.16810421645641327,
      "learning_rate": 8.790372158043074e-05,
      "loss": 0.0494,
      "step": 15890
    },
    {
      "epoch": 9.698486328125e-05,
      "model_forward_time": 0.11451220512390137,
      "step": 15890
    },
    {
      "epoch": 9.698486328125e-05,
      "step": 15890,
      "training_step_time": 0.3904111385345459
    },
    {
      "epoch": 9.6990966796875e-05,
      "model_forward_time": 0.11606240272521973,
      "step": 15891
    },
    {
      "epoch": 9.6990966796875e-05,
      "step": 15891,
      "training_step_time": 0.6218171119689941
    },
    {
      "epoch": 9.69970703125e-05,
      "model_forward_time": 0.11518001556396484,
      "step": 15892
    },
    {
      "epoch": 9.69970703125e-05,
      "step": 15892,
      "training_step_time": 0.3956718444824219
    },
    {
      "epoch": 9.7003173828125e-05,
      "model_forward_time": 0.11530709266662598,
      "step": 15893
    },
    {
      "epoch": 9.7003173828125e-05,
      "step": 15893,
      "training_step_time": 0.5133373737335205
    },
    {
      "epoch": 9.700927734375e-05,
      "model_forward_time": 0.11481356620788574,
      "step": 15894
    },
    {
      "epoch": 9.700927734375e-05,
      "step": 15894,
      "training_step_time": 0.43923449516296387
    },
    {
      "epoch": 9.7015380859375e-05,
      "model_forward_time": 0.11500239372253418,
      "step": 15895
    },
    {
      "epoch": 9.7015380859375e-05,
      "step": 15895,
      "training_step_time": 0.46204423904418945
    },
    {
      "epoch": 9.7021484375e-05,
      "model_forward_time": 0.11359071731567383,
      "step": 15896
    },
    {
      "epoch": 9.7021484375e-05,
      "step": 15896,
      "training_step_time": 0.38691091537475586
    },
    {
      "epoch": 9.7027587890625e-05,
      "model_forward_time": 0.11525416374206543,
      "step": 15897
    },
    {
      "epoch": 9.7027587890625e-05,
      "step": 15897,
      "training_step_time": 0.45685434341430664
    },
    {
      "epoch": 9.703369140625e-05,
      "model_forward_time": 0.11527323722839355,
      "step": 15898
    },
    {
      "epoch": 9.703369140625e-05,
      "step": 15898,
      "training_step_time": 0.4032561779022217
    },
    {
      "epoch": 9.7039794921875e-05,
      "model_forward_time": 0.11422514915466309,
      "step": 15899
    },
    {
      "epoch": 9.7039794921875e-05,
      "step": 15899,
      "training_step_time": 0.39695262908935547
    },
    {
      "epoch": 9.70458984375e-05,
      "grad_norm": 0.15702082216739655,
      "learning_rate": 8.788574348801675e-05,
      "loss": 0.0526,
      "step": 15900
    },
    {
      "epoch": 9.70458984375e-05,
      "model_forward_time": 0.11492705345153809,
      "step": 15900
    },
    {
      "epoch": 9.70458984375e-05,
      "step": 15900,
      "training_step_time": 0.41248536109924316
    },
    {
      "epoch": 9.7052001953125e-05,
      "model_forward_time": 0.11566448211669922,
      "step": 15901
    },
    {
      "epoch": 9.7052001953125e-05,
      "step": 15901,
      "training_step_time": 0.44939661026000977
    },
    {
      "epoch": 9.705810546875e-05,
      "model_forward_time": 0.1148221492767334,
      "step": 15902
    },
    {
      "epoch": 9.705810546875e-05,
      "step": 15902,
      "training_step_time": 0.42670273780822754
    },
    {
      "epoch": 9.7064208984375e-05,
      "model_forward_time": 0.1147470474243164,
      "step": 15903
    },
    {
      "epoch": 9.7064208984375e-05,
      "step": 15903,
      "training_step_time": 0.6093177795410156
    },
    {
      "epoch": 9.70703125e-05,
      "model_forward_time": 0.11635398864746094,
      "step": 15904
    },
    {
      "epoch": 9.70703125e-05,
      "step": 15904,
      "training_step_time": 0.3927583694458008
    },
    {
      "epoch": 9.7076416015625e-05,
      "model_forward_time": 0.11486172676086426,
      "step": 15905
    },
    {
      "epoch": 9.7076416015625e-05,
      "step": 15905,
      "training_step_time": 0.4181492328643799
    },
    {
      "epoch": 9.708251953125e-05,
      "model_forward_time": 0.11511659622192383,
      "step": 15906
    },
    {
      "epoch": 9.708251953125e-05,
      "step": 15906,
      "training_step_time": 0.4238409996032715
    },
    {
      "epoch": 9.7088623046875e-05,
      "model_forward_time": 0.11445069313049316,
      "step": 15907
    },
    {
      "epoch": 9.7088623046875e-05,
      "step": 15907,
      "training_step_time": 0.4602489471435547
    },
    {
      "epoch": 9.70947265625e-05,
      "model_forward_time": 0.1149146556854248,
      "step": 15908
    },
    {
      "epoch": 9.70947265625e-05,
      "step": 15908,
      "training_step_time": 0.4495081901550293
    },
    {
      "epoch": 9.7100830078125e-05,
      "model_forward_time": 0.11507749557495117,
      "step": 15909
    },
    {
      "epoch": 9.7100830078125e-05,
      "step": 15909,
      "training_step_time": 0.6104476451873779
    },
    {
      "epoch": 9.710693359375e-05,
      "grad_norm": 0.15409013628959656,
      "learning_rate": 8.786775388691418e-05,
      "loss": 0.0581,
      "step": 15910
    },
    {
      "epoch": 9.710693359375e-05,
      "model_forward_time": 0.11512613296508789,
      "step": 15910
    },
    {
      "epoch": 9.710693359375e-05,
      "step": 15910,
      "training_step_time": 0.4000110626220703
    },
    {
      "epoch": 9.7113037109375e-05,
      "model_forward_time": 0.11512279510498047,
      "step": 15911
    },
    {
      "epoch": 9.7113037109375e-05,
      "step": 15911,
      "training_step_time": 0.3984251022338867
    },
    {
      "epoch": 9.7119140625e-05,
      "model_forward_time": 0.1153419017791748,
      "step": 15912
    },
    {
      "epoch": 9.7119140625e-05,
      "step": 15912,
      "training_step_time": 0.38277745246887207
    },
    {
      "epoch": 9.7125244140625e-05,
      "model_forward_time": 0.11527466773986816,
      "step": 15913
    },
    {
      "epoch": 9.7125244140625e-05,
      "step": 15913,
      "training_step_time": 0.3926575183868408
    },
    {
      "epoch": 9.713134765625e-05,
      "model_forward_time": 0.11529946327209473,
      "step": 15914
    },
    {
      "epoch": 9.713134765625e-05,
      "step": 15914,
      "training_step_time": 0.38756251335144043
    },
    {
      "epoch": 9.7137451171875e-05,
      "model_forward_time": 0.1155998706817627,
      "step": 15915
    },
    {
      "epoch": 9.7137451171875e-05,
      "step": 15915,
      "training_step_time": 0.5536665916442871
    },
    {
      "epoch": 9.71435546875e-05,
      "model_forward_time": 0.11492609977722168,
      "step": 15916
    },
    {
      "epoch": 9.71435546875e-05,
      "step": 15916,
      "training_step_time": 0.3886983394622803
    },
    {
      "epoch": 9.7149658203125e-05,
      "model_forward_time": 0.1148226261138916,
      "step": 15917
    },
    {
      "epoch": 9.7149658203125e-05,
      "step": 15917,
      "training_step_time": 0.38427305221557617
    },
    {
      "epoch": 9.715576171875e-05,
      "model_forward_time": 0.11603331565856934,
      "step": 15918
    },
    {
      "epoch": 9.715576171875e-05,
      "step": 15918,
      "training_step_time": 0.42343854904174805
    },
    {
      "epoch": 9.7161865234375e-05,
      "model_forward_time": 0.11584997177124023,
      "step": 15919
    },
    {
      "epoch": 9.7161865234375e-05,
      "step": 15919,
      "training_step_time": 0.42923974990844727
    },
    {
      "epoch": 9.716796875e-05,
      "grad_norm": 0.12372096627950668,
      "learning_rate": 8.784975278258783e-05,
      "loss": 0.0523,
      "step": 15920
    },
    {
      "epoch": 9.716796875e-05,
      "model_forward_time": 0.11438918113708496,
      "step": 15920
    },
    {
      "epoch": 9.716796875e-05,
      "step": 15920,
      "training_step_time": 0.3879127502441406
    },
    {
      "epoch": 9.7174072265625e-05,
      "model_forward_time": 0.1154017448425293,
      "step": 15921
    },
    {
      "epoch": 9.7174072265625e-05,
      "step": 15921,
      "training_step_time": 0.4742245674133301
    },
    {
      "epoch": 9.718017578125e-05,
      "model_forward_time": 0.1152639389038086,
      "step": 15922
    },
    {
      "epoch": 9.718017578125e-05,
      "step": 15922,
      "training_step_time": 0.4130096435546875
    },
    {
      "epoch": 9.7186279296875e-05,
      "model_forward_time": 0.11466193199157715,
      "step": 15923
    },
    {
      "epoch": 9.7186279296875e-05,
      "step": 15923,
      "training_step_time": 0.399611234664917
    },
    {
      "epoch": 9.71923828125e-05,
      "model_forward_time": 0.11446404457092285,
      "step": 15924
    },
    {
      "epoch": 9.71923828125e-05,
      "step": 15924,
      "training_step_time": 0.41701769828796387
    },
    {
      "epoch": 9.7198486328125e-05,
      "model_forward_time": 0.11601805686950684,
      "step": 15925
    },
    {
      "epoch": 9.7198486328125e-05,
      "step": 15925,
      "training_step_time": 0.398972749710083
    },
    {
      "epoch": 9.720458984375e-05,
      "model_forward_time": 0.11637735366821289,
      "step": 15926
    },
    {
      "epoch": 9.720458984375e-05,
      "step": 15926,
      "training_step_time": 0.4224576950073242
    },
    {
      "epoch": 9.7210693359375e-05,
      "model_forward_time": 0.11664414405822754,
      "step": 15927
    },
    {
      "epoch": 9.7210693359375e-05,
      "step": 15927,
      "training_step_time": 0.6283376216888428
    },
    {
      "epoch": 9.7216796875e-05,
      "model_forward_time": 0.11623048782348633,
      "step": 15928
    },
    {
      "epoch": 9.7216796875e-05,
      "step": 15928,
      "training_step_time": 0.6324677467346191
    },
    {
      "epoch": 9.7222900390625e-05,
      "model_forward_time": 0.12008380889892578,
      "step": 15929
    },
    {
      "epoch": 9.7222900390625e-05,
      "step": 15929,
      "training_step_time": 0.725029706954956
    },
    {
      "epoch": 9.722900390625e-05,
      "grad_norm": 0.14808984100818634,
      "learning_rate": 8.783174018050594e-05,
      "loss": 0.062,
      "step": 15930
    },
    {
      "epoch": 9.722900390625e-05,
      "model_forward_time": 0.12163901329040527,
      "step": 15930
    },
    {
      "epoch": 9.722900390625e-05,
      "step": 15930,
      "training_step_time": 0.7442941665649414
    },
    {
      "epoch": 9.7235107421875e-05,
      "model_forward_time": 0.12244248390197754,
      "step": 15931
    },
    {
      "epoch": 9.7235107421875e-05,
      "step": 15931,
      "training_step_time": 0.6905291080474854
    },
    {
      "epoch": 9.72412109375e-05,
      "model_forward_time": 0.11723041534423828,
      "step": 15932
    },
    {
      "epoch": 9.72412109375e-05,
      "step": 15932,
      "training_step_time": 0.6434569358825684
    },
    {
      "epoch": 9.7247314453125e-05,
      "model_forward_time": 0.12002205848693848,
      "step": 15933
    },
    {
      "epoch": 9.7247314453125e-05,
      "step": 15933,
      "training_step_time": 0.7197237014770508
    },
    {
      "epoch": 9.725341796875e-05,
      "model_forward_time": 0.11998844146728516,
      "step": 15934
    },
    {
      "epoch": 9.725341796875e-05,
      "step": 15934,
      "training_step_time": 0.7087361812591553
    },
    {
      "epoch": 9.7259521484375e-05,
      "model_forward_time": 0.1178431510925293,
      "step": 15935
    },
    {
      "epoch": 9.7259521484375e-05,
      "step": 15935,
      "training_step_time": 0.5813217163085938
    },
    {
      "epoch": 9.7265625e-05,
      "model_forward_time": 0.11923623085021973,
      "step": 15936
    },
    {
      "epoch": 9.7265625e-05,
      "step": 15936,
      "training_step_time": 0.6746537685394287
    },
    {
      "epoch": 9.7271728515625e-05,
      "model_forward_time": 0.11934137344360352,
      "step": 15937
    },
    {
      "epoch": 9.7271728515625e-05,
      "step": 15937,
      "training_step_time": 0.6814560890197754
    },
    {
      "epoch": 9.727783203125e-05,
      "model_forward_time": 0.11676144599914551,
      "step": 15938
    },
    {
      "epoch": 9.727783203125e-05,
      "step": 15938,
      "training_step_time": 0.7036783695220947
    },
    {
      "epoch": 9.7283935546875e-05,
      "model_forward_time": 0.11857390403747559,
      "step": 15939
    },
    {
      "epoch": 9.7283935546875e-05,
      "step": 15939,
      "training_step_time": 0.7512929439544678
    },
    {
      "epoch": 9.72900390625e-05,
      "grad_norm": 0.132648304104805,
      "learning_rate": 8.781371608614029e-05,
      "loss": 0.0529,
      "step": 15940
    },
    {
      "epoch": 9.72900390625e-05,
      "model_forward_time": 0.1242666244506836,
      "step": 15940
    },
    {
      "epoch": 9.72900390625e-05,
      "step": 15940,
      "training_step_time": 0.6635298728942871
    },
    {
      "epoch": 9.7296142578125e-05,
      "model_forward_time": 0.11678695678710938,
      "step": 15941
    },
    {
      "epoch": 9.7296142578125e-05,
      "step": 15941,
      "training_step_time": 0.6355130672454834
    },
    {
      "epoch": 9.730224609375e-05,
      "model_forward_time": 0.11848163604736328,
      "step": 15942
    },
    {
      "epoch": 9.730224609375e-05,
      "step": 15942,
      "training_step_time": 0.7172446250915527
    },
    {
      "epoch": 9.7308349609375e-05,
      "model_forward_time": 0.1244657039642334,
      "step": 15943
    },
    {
      "epoch": 9.7308349609375e-05,
      "step": 15943,
      "training_step_time": 0.6476023197174072
    },
    {
      "epoch": 9.7314453125e-05,
      "model_forward_time": 0.11937236785888672,
      "step": 15944
    },
    {
      "epoch": 9.7314453125e-05,
      "step": 15944,
      "training_step_time": 0.6804804801940918
    },
    {
      "epoch": 9.7320556640625e-05,
      "model_forward_time": 0.11994171142578125,
      "step": 15945
    },
    {
      "epoch": 9.7320556640625e-05,
      "step": 15945,
      "training_step_time": 0.6892023086547852
    },
    {
      "epoch": 9.732666015625e-05,
      "model_forward_time": 0.12378454208374023,
      "step": 15946
    },
    {
      "epoch": 9.732666015625e-05,
      "step": 15946,
      "training_step_time": 0.6635611057281494
    },
    {
      "epoch": 9.7332763671875e-05,
      "model_forward_time": 0.11861419677734375,
      "step": 15947
    },
    {
      "epoch": 9.7332763671875e-05,
      "step": 15947,
      "training_step_time": 0.6815173625946045
    },
    {
      "epoch": 9.73388671875e-05,
      "model_forward_time": 0.12804079055786133,
      "step": 15948
    },
    {
      "epoch": 9.73388671875e-05,
      "step": 15948,
      "training_step_time": 0.6926662921905518
    },
    {
      "epoch": 9.7344970703125e-05,
      "model_forward_time": 0.1267714500427246,
      "step": 15949
    },
    {
      "epoch": 9.7344970703125e-05,
      "step": 15949,
      "training_step_time": 0.7140836715698242
    },
    {
      "epoch": 9.735107421875e-05,
      "grad_norm": 0.16749198734760284,
      "learning_rate": 8.77956805049661e-05,
      "loss": 0.0638,
      "step": 15950
    },
    {
      "epoch": 9.735107421875e-05,
      "model_forward_time": 0.11664462089538574,
      "step": 15950
    },
    {
      "epoch": 9.735107421875e-05,
      "step": 15950,
      "training_step_time": 0.6960833072662354
    },
    {
      "epoch": 9.7357177734375e-05,
      "model_forward_time": 0.12372946739196777,
      "step": 15951
    },
    {
      "epoch": 9.7357177734375e-05,
      "step": 15951,
      "training_step_time": 0.6263620853424072
    },
    {
      "epoch": 9.736328125e-05,
      "model_forward_time": 0.11881089210510254,
      "step": 15952
    },
    {
      "epoch": 9.736328125e-05,
      "step": 15952,
      "training_step_time": 0.6642203330993652
    },
    {
      "epoch": 9.7369384765625e-05,
      "model_forward_time": 0.11980319023132324,
      "step": 15953
    },
    {
      "epoch": 9.7369384765625e-05,
      "step": 15953,
      "training_step_time": 0.6430697441101074
    },
    {
      "epoch": 9.737548828125e-05,
      "model_forward_time": 0.11779999732971191,
      "step": 15954
    },
    {
      "epoch": 9.737548828125e-05,
      "step": 15954,
      "training_step_time": 0.6488037109375
    },
    {
      "epoch": 9.7381591796875e-05,
      "model_forward_time": 0.11703896522521973,
      "step": 15955
    },
    {
      "epoch": 9.7381591796875e-05,
      "step": 15955,
      "training_step_time": 0.6662256717681885
    },
    {
      "epoch": 9.73876953125e-05,
      "model_forward_time": 0.13603925704956055,
      "step": 15956
    },
    {
      "epoch": 9.73876953125e-05,
      "step": 15956,
      "training_step_time": 0.7030045986175537
    },
    {
      "epoch": 9.7393798828125e-05,
      "model_forward_time": 0.11597681045532227,
      "step": 15957
    },
    {
      "epoch": 9.7393798828125e-05,
      "step": 15957,
      "training_step_time": 0.6842594146728516
    },
    {
      "epoch": 9.739990234375e-05,
      "model_forward_time": 0.11924529075622559,
      "step": 15958
    },
    {
      "epoch": 9.739990234375e-05,
      "step": 15958,
      "training_step_time": 0.7586719989776611
    },
    {
      "epoch": 9.7406005859375e-05,
      "model_forward_time": 0.12153100967407227,
      "step": 15959
    },
    {
      "epoch": 9.7406005859375e-05,
      "step": 15959,
      "training_step_time": 0.7204225063323975
    },
    {
      "epoch": 9.7412109375e-05,
      "grad_norm": 0.1472148597240448,
      "learning_rate": 8.77776334424621e-05,
      "loss": 0.0659,
      "step": 15960
    },
    {
      "epoch": 9.7412109375e-05,
      "model_forward_time": 0.11845088005065918,
      "step": 15960
    },
    {
      "epoch": 9.7412109375e-05,
      "step": 15960,
      "training_step_time": 0.6371681690216064
    },
    {
      "epoch": 9.7418212890625e-05,
      "model_forward_time": 0.11623764038085938,
      "step": 15961
    },
    {
      "epoch": 9.7418212890625e-05,
      "step": 15961,
      "training_step_time": 0.7435309886932373
    },
    {
      "epoch": 9.742431640625e-05,
      "model_forward_time": 0.12221384048461914,
      "step": 15962
    },
    {
      "epoch": 9.742431640625e-05,
      "step": 15962,
      "training_step_time": 0.7202916145324707
    },
    {
      "epoch": 9.7430419921875e-05,
      "model_forward_time": 0.11922883987426758,
      "step": 15963
    },
    {
      "epoch": 9.7430419921875e-05,
      "step": 15963,
      "training_step_time": 0.621906042098999
    },
    {
      "epoch": 9.74365234375e-05,
      "model_forward_time": 0.1186070442199707,
      "step": 15964
    },
    {
      "epoch": 9.74365234375e-05,
      "step": 15964,
      "training_step_time": 0.6705472469329834
    },
    {
      "epoch": 9.7442626953125e-05,
      "model_forward_time": 0.12133216857910156,
      "step": 15965
    },
    {
      "epoch": 9.7442626953125e-05,
      "step": 15965,
      "training_step_time": 0.673560380935669
    },
    {
      "epoch": 9.744873046875e-05,
      "model_forward_time": 0.12051248550415039,
      "step": 15966
    },
    {
      "epoch": 9.744873046875e-05,
      "step": 15966,
      "training_step_time": 0.6867880821228027
    },
    {
      "epoch": 9.7454833984375e-05,
      "model_forward_time": 0.12103009223937988,
      "step": 15967
    },
    {
      "epoch": 9.7454833984375e-05,
      "step": 15967,
      "training_step_time": 0.6275157928466797
    },
    {
      "epoch": 9.74609375e-05,
      "model_forward_time": 0.1188957691192627,
      "step": 15968
    },
    {
      "epoch": 9.74609375e-05,
      "step": 15968,
      "training_step_time": 0.6708691120147705
    },
    {
      "epoch": 9.7467041015625e-05,
      "model_forward_time": 0.12197136878967285,
      "step": 15969
    },
    {
      "epoch": 9.7467041015625e-05,
      "step": 15969,
      "training_step_time": 0.6701924800872803
    },
    {
      "epoch": 9.747314453125e-05,
      "grad_norm": 0.19220319390296936,
      "learning_rate": 8.775957490411053e-05,
      "loss": 0.0631,
      "step": 15970
    },
    {
      "epoch": 9.747314453125e-05,
      "model_forward_time": 0.11776566505432129,
      "step": 15970
    },
    {
      "epoch": 9.747314453125e-05,
      "step": 15970,
      "training_step_time": 0.6468808650970459
    },
    {
      "epoch": 9.7479248046875e-05,
      "model_forward_time": 0.11812472343444824,
      "step": 15971
    },
    {
      "epoch": 9.7479248046875e-05,
      "step": 15971,
      "training_step_time": 0.6567449569702148
    },
    {
      "epoch": 9.74853515625e-05,
      "model_forward_time": 0.1272425651550293,
      "step": 15972
    },
    {
      "epoch": 9.74853515625e-05,
      "step": 15972,
      "training_step_time": 0.6691005229949951
    },
    {
      "epoch": 9.7491455078125e-05,
      "model_forward_time": 0.11997199058532715,
      "step": 15973
    },
    {
      "epoch": 9.7491455078125e-05,
      "step": 15973,
      "training_step_time": 0.6378903388977051
    },
    {
      "epoch": 9.749755859375e-05,
      "model_forward_time": 0.12146830558776855,
      "step": 15974
    },
    {
      "epoch": 9.749755859375e-05,
      "step": 15974,
      "training_step_time": 0.6437313556671143
    },
    {
      "epoch": 9.7503662109375e-05,
      "model_forward_time": 0.11896538734436035,
      "step": 15975
    },
    {
      "epoch": 9.7503662109375e-05,
      "step": 15975,
      "training_step_time": 0.6732451915740967
    },
    {
      "epoch": 9.7509765625e-05,
      "model_forward_time": 0.12642431259155273,
      "step": 15976
    },
    {
      "epoch": 9.7509765625e-05,
      "step": 15976,
      "training_step_time": 0.6558308601379395
    },
    {
      "epoch": 9.7515869140625e-05,
      "model_forward_time": 0.12312793731689453,
      "step": 15977
    },
    {
      "epoch": 9.7515869140625e-05,
      "step": 15977,
      "training_step_time": 0.7425355911254883
    },
    {
      "epoch": 9.752197265625e-05,
      "model_forward_time": 0.11673831939697266,
      "step": 15978
    },
    {
      "epoch": 9.752197265625e-05,
      "step": 15978,
      "training_step_time": 0.6422417163848877
    },
    {
      "epoch": 9.7528076171875e-05,
      "model_forward_time": 0.11927628517150879,
      "step": 15979
    },
    {
      "epoch": 9.7528076171875e-05,
      "step": 15979,
      "training_step_time": 0.721198558807373
    },
    {
      "epoch": 9.75341796875e-05,
      "grad_norm": 0.16740655899047852,
      "learning_rate": 8.774150489539707e-05,
      "loss": 0.0608,
      "step": 15980
    },
    {
      "epoch": 9.75341796875e-05,
      "model_forward_time": 0.11628985404968262,
      "step": 15980
    },
    {
      "epoch": 9.75341796875e-05,
      "step": 15980,
      "training_step_time": 0.6259489059448242
    },
    {
      "epoch": 9.7540283203125e-05,
      "model_forward_time": 0.12097978591918945,
      "step": 15981
    },
    {
      "epoch": 9.7540283203125e-05,
      "step": 15981,
      "training_step_time": 0.671820878982544
    },
    {
      "epoch": 9.754638671875e-05,
      "model_forward_time": 0.11517143249511719,
      "step": 15982
    },
    {
      "epoch": 9.754638671875e-05,
      "step": 15982,
      "training_step_time": 0.6646215915679932
    },
    {
      "epoch": 9.7552490234375e-05,
      "model_forward_time": 0.11925888061523438,
      "step": 15983
    },
    {
      "epoch": 9.7552490234375e-05,
      "step": 15983,
      "training_step_time": 0.6660587787628174
    },
    {
      "epoch": 9.755859375e-05,
      "model_forward_time": 0.12952542304992676,
      "step": 15984
    },
    {
      "epoch": 9.755859375e-05,
      "step": 15984,
      "training_step_time": 0.7380940914154053
    },
    {
      "epoch": 9.7564697265625e-05,
      "model_forward_time": 0.11960005760192871,
      "step": 15985
    },
    {
      "epoch": 9.7564697265625e-05,
      "step": 15985,
      "training_step_time": 0.6650209426879883
    },
    {
      "epoch": 9.757080078125e-05,
      "model_forward_time": 0.12021183967590332,
      "step": 15986
    },
    {
      "epoch": 9.757080078125e-05,
      "step": 15986,
      "training_step_time": 0.6982254981994629
    },
    {
      "epoch": 9.7576904296875e-05,
      "model_forward_time": 0.11974191665649414,
      "step": 15987
    },
    {
      "epoch": 9.7576904296875e-05,
      "step": 15987,
      "training_step_time": 0.6280264854431152
    },
    {
      "epoch": 9.75830078125e-05,
      "model_forward_time": 0.14542603492736816,
      "step": 15988
    },
    {
      "epoch": 9.75830078125e-05,
      "step": 15988,
      "training_step_time": 0.6224396228790283
    },
    {
      "epoch": 9.7589111328125e-05,
      "model_forward_time": 0.11879658699035645,
      "step": 15989
    },
    {
      "epoch": 9.7589111328125e-05,
      "step": 15989,
      "training_step_time": 0.6742725372314453
    },
    {
      "epoch": 9.759521484375e-05,
      "grad_norm": 0.14806261658668518,
      "learning_rate": 8.772342342181095e-05,
      "loss": 0.0574,
      "step": 15990
    },
    {
      "epoch": 9.759521484375e-05,
      "model_forward_time": 0.12127375602722168,
      "step": 15990
    },
    {
      "epoch": 9.759521484375e-05,
      "step": 15990,
      "training_step_time": 0.6946384906768799
    },
    {
      "epoch": 9.7601318359375e-05,
      "model_forward_time": 0.11734771728515625,
      "step": 15991
    },
    {
      "epoch": 9.7601318359375e-05,
      "step": 15991,
      "training_step_time": 0.6582129001617432
    },
    {
      "epoch": 9.7607421875e-05,
      "model_forward_time": 0.11641049385070801,
      "step": 15992
    },
    {
      "epoch": 9.7607421875e-05,
      "step": 15992,
      "training_step_time": 0.600147008895874
    },
    {
      "epoch": 9.7613525390625e-05,
      "model_forward_time": 0.11985278129577637,
      "step": 15993
    },
    {
      "epoch": 9.7613525390625e-05,
      "step": 15993,
      "training_step_time": 0.6248011589050293
    },
    {
      "epoch": 9.761962890625e-05,
      "model_forward_time": 0.12168312072753906,
      "step": 15994
    },
    {
      "epoch": 9.761962890625e-05,
      "step": 15994,
      "training_step_time": 0.5667455196380615
    },
    {
      "epoch": 9.7625732421875e-05,
      "model_forward_time": 0.12116408348083496,
      "step": 15995
    },
    {
      "epoch": 9.7625732421875e-05,
      "step": 15995,
      "training_step_time": 0.6849820613861084
    },
    {
      "epoch": 9.76318359375e-05,
      "model_forward_time": 0.11965560913085938,
      "step": 15996
    },
    {
      "epoch": 9.76318359375e-05,
      "step": 15996,
      "training_step_time": 0.5955348014831543
    },
    {
      "epoch": 9.7637939453125e-05,
      "model_forward_time": 0.12116026878356934,
      "step": 15997
    },
    {
      "epoch": 9.7637939453125e-05,
      "step": 15997,
      "training_step_time": 0.6118254661560059
    },
    {
      "epoch": 9.764404296875e-05,
      "model_forward_time": 0.11939573287963867,
      "step": 15998
    },
    {
      "epoch": 9.764404296875e-05,
      "step": 15998,
      "training_step_time": 0.5374810695648193
    },
    {
      "epoch": 9.7650146484375e-05,
      "model_forward_time": 0.12428832054138184,
      "step": 15999
    },
    {
      "epoch": 9.7650146484375e-05,
      "step": 15999,
      "training_step_time": 0.5353469848632812
    },
    {
      "epoch": 9.765625e-05,
      "grad_norm": 0.2017529010772705,
      "learning_rate": 8.770533048884482e-05,
      "loss": 0.0593,
      "step": 16000
    },
    {
      "epoch": 9.765625e-05,
      "model_forward_time": 0.11264753341674805,
      "step": 16000
    },
    {
      "epoch": 9.765625e-05,
      "step": 16000,
      "training_step_time": 0.3563120365142822
    },
    {
      "epoch": 9.7662353515625e-05,
      "model_forward_time": 0.11192584037780762,
      "step": 16001
    },
    {
      "epoch": 9.7662353515625e-05,
      "step": 16001,
      "training_step_time": 0.38945889472961426
    },
    {
      "epoch": 9.766845703125e-05,
      "model_forward_time": 0.11337876319885254,
      "step": 16002
    },
    {
      "epoch": 9.766845703125e-05,
      "step": 16002,
      "training_step_time": 0.39968061447143555
    },
    {
      "epoch": 9.7674560546875e-05,
      "model_forward_time": 0.11377906799316406,
      "step": 16003
    },
    {
      "epoch": 9.7674560546875e-05,
      "step": 16003,
      "training_step_time": 0.41087794303894043
    },
    {
      "epoch": 9.76806640625e-05,
      "model_forward_time": 0.11486983299255371,
      "step": 16004
    },
    {
      "epoch": 9.76806640625e-05,
      "step": 16004,
      "training_step_time": 0.3793632984161377
    },
    {
      "epoch": 9.7686767578125e-05,
      "model_forward_time": 0.113739013671875,
      "step": 16005
    },
    {
      "epoch": 9.7686767578125e-05,
      "step": 16005,
      "training_step_time": 0.4044015407562256
    },
    {
      "epoch": 9.769287109375e-05,
      "model_forward_time": 0.11454224586486816,
      "step": 16006
    },
    {
      "epoch": 9.769287109375e-05,
      "step": 16006,
      "training_step_time": 0.38661670684814453
    },
    {
      "epoch": 9.7698974609375e-05,
      "model_forward_time": 0.11539268493652344,
      "step": 16007
    },
    {
      "epoch": 9.7698974609375e-05,
      "step": 16007,
      "training_step_time": 0.39757251739501953
    },
    {
      "epoch": 9.7705078125e-05,
      "model_forward_time": 0.11438250541687012,
      "step": 16008
    },
    {
      "epoch": 9.7705078125e-05,
      "step": 16008,
      "training_step_time": 0.39593958854675293
    },
    {
      "epoch": 9.7711181640625e-05,
      "model_forward_time": 0.11461615562438965,
      "step": 16009
    },
    {
      "epoch": 9.7711181640625e-05,
      "step": 16009,
      "training_step_time": 0.447629451751709
    },
    {
      "epoch": 9.771728515625e-05,
      "grad_norm": 0.14827941358089447,
      "learning_rate": 8.768722610199484e-05,
      "loss": 0.0588,
      "step": 16010
    },
    {
      "epoch": 9.771728515625e-05,
      "model_forward_time": 0.11529016494750977,
      "step": 16010
    },
    {
      "epoch": 9.771728515625e-05,
      "step": 16010,
      "training_step_time": 0.4228048324584961
    },
    {
      "epoch": 9.7723388671875e-05,
      "model_forward_time": 0.11641073226928711,
      "step": 16011
    },
    {
      "epoch": 9.7723388671875e-05,
      "step": 16011,
      "training_step_time": 0.49359607696533203
    },
    {
      "epoch": 9.77294921875e-05,
      "model_forward_time": 0.11560344696044922,
      "step": 16012
    },
    {
      "epoch": 9.77294921875e-05,
      "step": 16012,
      "training_step_time": 0.3945167064666748
    },
    {
      "epoch": 9.7735595703125e-05,
      "model_forward_time": 0.11596441268920898,
      "step": 16013
    },
    {
      "epoch": 9.7735595703125e-05,
      "step": 16013,
      "training_step_time": 0.43411970138549805
    },
    {
      "epoch": 9.774169921875e-05,
      "model_forward_time": 0.11493206024169922,
      "step": 16014
    },
    {
      "epoch": 9.774169921875e-05,
      "step": 16014,
      "training_step_time": 0.40851378440856934
    },
    {
      "epoch": 9.7747802734375e-05,
      "model_forward_time": 0.11487507820129395,
      "step": 16015
    },
    {
      "epoch": 9.7747802734375e-05,
      "step": 16015,
      "training_step_time": 0.43650245666503906
    },
    {
      "epoch": 9.775390625e-05,
      "model_forward_time": 0.11536765098571777,
      "step": 16016
    },
    {
      "epoch": 9.775390625e-05,
      "step": 16016,
      "training_step_time": 0.40073132514953613
    },
    {
      "epoch": 9.7760009765625e-05,
      "model_forward_time": 0.11573672294616699,
      "step": 16017
    },
    {
      "epoch": 9.7760009765625e-05,
      "step": 16017,
      "training_step_time": 0.405102014541626
    },
    {
      "epoch": 9.776611328125e-05,
      "model_forward_time": 0.11520099639892578,
      "step": 16018
    },
    {
      "epoch": 9.776611328125e-05,
      "step": 16018,
      "training_step_time": 0.4923741817474365
    },
    {
      "epoch": 9.7772216796875e-05,
      "model_forward_time": 0.11472654342651367,
      "step": 16019
    },
    {
      "epoch": 9.7772216796875e-05,
      "step": 16019,
      "training_step_time": 0.39489197731018066
    },
    {
      "epoch": 9.77783203125e-05,
      "grad_norm": 0.15695956349372864,
      "learning_rate": 8.766911026676064e-05,
      "loss": 0.0532,
      "step": 16020
    },
    {
      "epoch": 9.77783203125e-05,
      "model_forward_time": 0.11497306823730469,
      "step": 16020
    },
    {
      "epoch": 9.77783203125e-05,
      "step": 16020,
      "training_step_time": 0.39850282669067383
    },
    {
      "epoch": 9.7784423828125e-05,
      "model_forward_time": 0.11519265174865723,
      "step": 16021
    },
    {
      "epoch": 9.7784423828125e-05,
      "step": 16021,
      "training_step_time": 0.39699673652648926
    },
    {
      "epoch": 9.779052734375e-05,
      "model_forward_time": 0.1166234016418457,
      "step": 16022
    },
    {
      "epoch": 9.779052734375e-05,
      "step": 16022,
      "training_step_time": 0.39194726943969727
    },
    {
      "epoch": 9.7796630859375e-05,
      "model_forward_time": 0.11503028869628906,
      "step": 16023
    },
    {
      "epoch": 9.7796630859375e-05,
      "step": 16023,
      "training_step_time": 0.39338111877441406
    },
    {
      "epoch": 9.7802734375e-05,
      "model_forward_time": 0.11626029014587402,
      "step": 16024
    },
    {
      "epoch": 9.7802734375e-05,
      "step": 16024,
      "training_step_time": 0.3988823890686035
    },
    {
      "epoch": 9.7808837890625e-05,
      "model_forward_time": 0.11589336395263672,
      "step": 16025
    },
    {
      "epoch": 9.7808837890625e-05,
      "step": 16025,
      "training_step_time": 0.43445920944213867
    },
    {
      "epoch": 9.781494140625e-05,
      "model_forward_time": 0.11517500877380371,
      "step": 16026
    },
    {
      "epoch": 9.781494140625e-05,
      "step": 16026,
      "training_step_time": 0.4352717399597168
    },
    {
      "epoch": 9.7821044921875e-05,
      "model_forward_time": 0.11547517776489258,
      "step": 16027
    },
    {
      "epoch": 9.7821044921875e-05,
      "step": 16027,
      "training_step_time": 0.3927733898162842
    },
    {
      "epoch": 9.78271484375e-05,
      "model_forward_time": 0.115020751953125,
      "step": 16028
    },
    {
      "epoch": 9.78271484375e-05,
      "step": 16028,
      "training_step_time": 0.49793410301208496
    },
    {
      "epoch": 9.7833251953125e-05,
      "model_forward_time": 0.1157224178314209,
      "step": 16029
    },
    {
      "epoch": 9.7833251953125e-05,
      "step": 16029,
      "training_step_time": 0.4285290241241455
    },
    {
      "epoch": 9.783935546875e-05,
      "grad_norm": 0.14227300882339478,
      "learning_rate": 8.765098298864533e-05,
      "loss": 0.0574,
      "step": 16030
    },
    {
      "epoch": 9.783935546875e-05,
      "model_forward_time": 0.11711907386779785,
      "step": 16030
    },
    {
      "epoch": 9.783935546875e-05,
      "step": 16030,
      "training_step_time": 0.43093085289001465
    },
    {
      "epoch": 9.7845458984375e-05,
      "model_forward_time": 0.11670637130737305,
      "step": 16031
    },
    {
      "epoch": 9.7845458984375e-05,
      "step": 16031,
      "training_step_time": 0.3901047706604004
    },
    {
      "epoch": 9.78515625e-05,
      "model_forward_time": 0.11601400375366211,
      "step": 16032
    },
    {
      "epoch": 9.78515625e-05,
      "step": 16032,
      "training_step_time": 0.4496142864227295
    },
    {
      "epoch": 9.7857666015625e-05,
      "model_forward_time": 0.11612176895141602,
      "step": 16033
    },
    {
      "epoch": 9.7857666015625e-05,
      "step": 16033,
      "training_step_time": 0.4874124526977539
    },
    {
      "epoch": 9.786376953125e-05,
      "model_forward_time": 0.11583137512207031,
      "step": 16034
    },
    {
      "epoch": 9.786376953125e-05,
      "step": 16034,
      "training_step_time": 0.38996291160583496
    },
    {
      "epoch": 9.7869873046875e-05,
      "model_forward_time": 0.1151878833770752,
      "step": 16035
    },
    {
      "epoch": 9.7869873046875e-05,
      "step": 16035,
      "training_step_time": 0.39072561264038086
    },
    {
      "epoch": 9.78759765625e-05,
      "model_forward_time": 0.11589574813842773,
      "step": 16036
    },
    {
      "epoch": 9.78759765625e-05,
      "step": 16036,
      "training_step_time": 0.40437984466552734
    },
    {
      "epoch": 9.7882080078125e-05,
      "model_forward_time": 0.11575937271118164,
      "step": 16037
    },
    {
      "epoch": 9.7882080078125e-05,
      "step": 16037,
      "training_step_time": 0.39617204666137695
    },
    {
      "epoch": 9.788818359375e-05,
      "model_forward_time": 0.11538267135620117,
      "step": 16038
    },
    {
      "epoch": 9.788818359375e-05,
      "step": 16038,
      "training_step_time": 0.423412561416626
    },
    {
      "epoch": 9.7894287109375e-05,
      "model_forward_time": 0.11501288414001465,
      "step": 16039
    },
    {
      "epoch": 9.7894287109375e-05,
      "step": 16039,
      "training_step_time": 0.39117908477783203
    },
    {
      "epoch": 9.7900390625e-05,
      "grad_norm": 0.2730836570262909,
      "learning_rate": 8.763284427315551e-05,
      "loss": 0.0606,
      "step": 16040
    },
    {
      "epoch": 9.7900390625e-05,
      "model_forward_time": 0.1147007942199707,
      "step": 16040
    },
    {
      "epoch": 9.7900390625e-05,
      "step": 16040,
      "training_step_time": 0.44179463386535645
    },
    {
      "epoch": 9.7906494140625e-05,
      "model_forward_time": 0.11497616767883301,
      "step": 16041
    },
    {
      "epoch": 9.7906494140625e-05,
      "step": 16041,
      "training_step_time": 0.40201473236083984
    },
    {
      "epoch": 9.791259765625e-05,
      "model_forward_time": 0.11636185646057129,
      "step": 16042
    },
    {
      "epoch": 9.791259765625e-05,
      "step": 16042,
      "training_step_time": 0.4638204574584961
    },
    {
      "epoch": 9.7918701171875e-05,
      "model_forward_time": 0.11629414558410645,
      "step": 16043
    },
    {
      "epoch": 9.7918701171875e-05,
      "step": 16043,
      "training_step_time": 0.49484872817993164
    },
    {
      "epoch": 9.79248046875e-05,
      "model_forward_time": 0.11519837379455566,
      "step": 16044
    },
    {
      "epoch": 9.79248046875e-05,
      "step": 16044,
      "training_step_time": 0.464613676071167
    },
    {
      "epoch": 9.7930908203125e-05,
      "model_forward_time": 0.11660909652709961,
      "step": 16045
    },
    {
      "epoch": 9.7930908203125e-05,
      "step": 16045,
      "training_step_time": 0.4271082878112793
    },
    {
      "epoch": 9.793701171875e-05,
      "model_forward_time": 0.11558985710144043,
      "step": 16046
    },
    {
      "epoch": 9.793701171875e-05,
      "step": 16046,
      "training_step_time": 0.3800489902496338
    },
    {
      "epoch": 9.7943115234375e-05,
      "model_forward_time": 0.11530947685241699,
      "step": 16047
    },
    {
      "epoch": 9.7943115234375e-05,
      "step": 16047,
      "training_step_time": 0.46379590034484863
    },
    {
      "epoch": 9.794921875e-05,
      "model_forward_time": 0.11550569534301758,
      "step": 16048
    },
    {
      "epoch": 9.794921875e-05,
      "step": 16048,
      "training_step_time": 0.3959805965423584
    },
    {
      "epoch": 9.7955322265625e-05,
      "model_forward_time": 0.11631441116333008,
      "step": 16049
    },
    {
      "epoch": 9.7955322265625e-05,
      "step": 16049,
      "training_step_time": 0.38438987731933594
    },
    {
      "epoch": 9.796142578125e-05,
      "grad_norm": 0.10433229058980942,
      "learning_rate": 8.761469412580125e-05,
      "loss": 0.0598,
      "step": 16050
    },
    {
      "epoch": 9.796142578125e-05,
      "model_forward_time": 0.11533403396606445,
      "step": 16050
    },
    {
      "epoch": 9.796142578125e-05,
      "step": 16050,
      "training_step_time": 0.3956766128540039
    },
    {
      "epoch": 9.7967529296875e-05,
      "model_forward_time": 0.11490988731384277,
      "step": 16051
    },
    {
      "epoch": 9.7967529296875e-05,
      "step": 16051,
      "training_step_time": 0.4168272018432617
    },
    {
      "epoch": 9.79736328125e-05,
      "model_forward_time": 0.11543464660644531,
      "step": 16052
    },
    {
      "epoch": 9.79736328125e-05,
      "step": 16052,
      "training_step_time": 0.3748621940612793
    },
    {
      "epoch": 9.7979736328125e-05,
      "model_forward_time": 0.11470341682434082,
      "step": 16053
    },
    {
      "epoch": 9.7979736328125e-05,
      "step": 16053,
      "training_step_time": 0.6856415271759033
    },
    {
      "epoch": 9.798583984375e-05,
      "model_forward_time": 0.11554741859436035,
      "step": 16054
    },
    {
      "epoch": 9.798583984375e-05,
      "step": 16054,
      "training_step_time": 0.41105222702026367
    },
    {
      "epoch": 9.7991943359375e-05,
      "model_forward_time": 0.11586451530456543,
      "step": 16055
    },
    {
      "epoch": 9.7991943359375e-05,
      "step": 16055,
      "training_step_time": 0.4576728343963623
    },
    {
      "epoch": 9.7998046875e-05,
      "model_forward_time": 0.11557149887084961,
      "step": 16056
    },
    {
      "epoch": 9.7998046875e-05,
      "step": 16056,
      "training_step_time": 0.45433521270751953
    },
    {
      "epoch": 9.8004150390625e-05,
      "model_forward_time": 0.11496806144714355,
      "step": 16057
    },
    {
      "epoch": 9.8004150390625e-05,
      "step": 16057,
      "training_step_time": 0.3998434543609619
    },
    {
      "epoch": 9.801025390625e-05,
      "model_forward_time": 0.1145620346069336,
      "step": 16058
    },
    {
      "epoch": 9.801025390625e-05,
      "step": 16058,
      "training_step_time": 0.3897590637207031
    },
    {
      "epoch": 9.8016357421875e-05,
      "model_forward_time": 0.11478710174560547,
      "step": 16059
    },
    {
      "epoch": 9.8016357421875e-05,
      "step": 16059,
      "training_step_time": 0.5479109287261963
    },
    {
      "epoch": 9.80224609375e-05,
      "grad_norm": 0.10990314185619354,
      "learning_rate": 8.759653255209606e-05,
      "loss": 0.0551,
      "step": 16060
    },
    {
      "epoch": 9.80224609375e-05,
      "model_forward_time": 0.1143338680267334,
      "step": 16060
    },
    {
      "epoch": 9.80224609375e-05,
      "step": 16060,
      "training_step_time": 0.3873264789581299
    },
    {
      "epoch": 9.8028564453125e-05,
      "model_forward_time": 0.11522412300109863,
      "step": 16061
    },
    {
      "epoch": 9.8028564453125e-05,
      "step": 16061,
      "training_step_time": 0.40665316581726074
    },
    {
      "epoch": 9.803466796875e-05,
      "model_forward_time": 0.11536955833435059,
      "step": 16062
    },
    {
      "epoch": 9.803466796875e-05,
      "step": 16062,
      "training_step_time": 0.39028167724609375
    },
    {
      "epoch": 9.8040771484375e-05,
      "model_forward_time": 0.11538529396057129,
      "step": 16063
    },
    {
      "epoch": 9.8040771484375e-05,
      "step": 16063,
      "training_step_time": 0.3920445442199707
    },
    {
      "epoch": 9.8046875e-05,
      "model_forward_time": 0.11536812782287598,
      "step": 16064
    },
    {
      "epoch": 9.8046875e-05,
      "step": 16064,
      "training_step_time": 0.37058234214782715
    },
    {
      "epoch": 9.8052978515625e-05,
      "model_forward_time": 0.1159977912902832,
      "step": 16065
    },
    {
      "epoch": 9.8052978515625e-05,
      "step": 16065,
      "training_step_time": 0.7796618938446045
    },
    {
      "epoch": 9.805908203125e-05,
      "model_forward_time": 0.11464715003967285,
      "step": 16066
    },
    {
      "epoch": 9.805908203125e-05,
      "step": 16066,
      "training_step_time": 0.3924243450164795
    },
    {
      "epoch": 9.8065185546875e-05,
      "model_forward_time": 0.11473965644836426,
      "step": 16067
    },
    {
      "epoch": 9.8065185546875e-05,
      "step": 16067,
      "training_step_time": 0.42113780975341797
    },
    {
      "epoch": 9.80712890625e-05,
      "model_forward_time": 0.11450958251953125,
      "step": 16068
    },
    {
      "epoch": 9.80712890625e-05,
      "step": 16068,
      "training_step_time": 0.4747178554534912
    },
    {
      "epoch": 9.8077392578125e-05,
      "model_forward_time": 0.11496686935424805,
      "step": 16069
    },
    {
      "epoch": 9.8077392578125e-05,
      "step": 16069,
      "training_step_time": 0.40997815132141113
    },
    {
      "epoch": 9.808349609375e-05,
      "grad_norm": 0.1983831524848938,
      "learning_rate": 8.757835955755695e-05,
      "loss": 0.0572,
      "step": 16070
    },
    {
      "epoch": 9.808349609375e-05,
      "model_forward_time": 0.11440753936767578,
      "step": 16070
    },
    {
      "epoch": 9.808349609375e-05,
      "step": 16070,
      "training_step_time": 0.467165470123291
    },
    {
      "epoch": 9.8089599609375e-05,
      "model_forward_time": 0.11486053466796875,
      "step": 16071
    },
    {
      "epoch": 9.8089599609375e-05,
      "step": 16071,
      "training_step_time": 0.7526381015777588
    },
    {
      "epoch": 9.8095703125e-05,
      "model_forward_time": 0.11406350135803223,
      "step": 16072
    },
    {
      "epoch": 9.8095703125e-05,
      "step": 16072,
      "training_step_time": 0.38887691497802734
    },
    {
      "epoch": 9.8101806640625e-05,
      "model_forward_time": 0.11462116241455078,
      "step": 16073
    },
    {
      "epoch": 9.8101806640625e-05,
      "step": 16073,
      "training_step_time": 0.45789647102355957
    },
    {
      "epoch": 9.810791015625e-05,
      "model_forward_time": 0.11463451385498047,
      "step": 16074
    },
    {
      "epoch": 9.810791015625e-05,
      "step": 16074,
      "training_step_time": 0.4264819622039795
    },
    {
      "epoch": 9.8114013671875e-05,
      "model_forward_time": 0.11439657211303711,
      "step": 16075
    },
    {
      "epoch": 9.8114013671875e-05,
      "step": 16075,
      "training_step_time": 0.4049515724182129
    },
    {
      "epoch": 9.81201171875e-05,
      "model_forward_time": 0.11444520950317383,
      "step": 16076
    },
    {
      "epoch": 9.81201171875e-05,
      "step": 16076,
      "training_step_time": 0.3832402229309082
    },
    {
      "epoch": 9.8126220703125e-05,
      "model_forward_time": 0.11544489860534668,
      "step": 16077
    },
    {
      "epoch": 9.8126220703125e-05,
      "step": 16077,
      "training_step_time": 0.5287623405456543
    },
    {
      "epoch": 9.813232421875e-05,
      "model_forward_time": 0.1149747371673584,
      "step": 16078
    },
    {
      "epoch": 9.813232421875e-05,
      "step": 16078,
      "training_step_time": 0.3933444023132324
    },
    {
      "epoch": 9.8138427734375e-05,
      "model_forward_time": 0.1149301528930664,
      "step": 16079
    },
    {
      "epoch": 9.8138427734375e-05,
      "step": 16079,
      "training_step_time": 0.3896467685699463
    },
    {
      "epoch": 9.814453125e-05,
      "grad_norm": 0.2289319634437561,
      "learning_rate": 8.756017514770443e-05,
      "loss": 0.0619,
      "step": 16080
    },
    {
      "epoch": 9.814453125e-05,
      "model_forward_time": 0.11440038681030273,
      "step": 16080
    },
    {
      "epoch": 9.814453125e-05,
      "step": 16080,
      "training_step_time": 0.40658116340637207
    },
    {
      "epoch": 9.8150634765625e-05,
      "model_forward_time": 0.11529541015625,
      "step": 16081
    },
    {
      "epoch": 9.8150634765625e-05,
      "step": 16081,
      "training_step_time": 0.42069196701049805
    },
    {
      "epoch": 9.815673828125e-05,
      "model_forward_time": 0.11580586433410645,
      "step": 16082
    },
    {
      "epoch": 9.815673828125e-05,
      "step": 16082,
      "training_step_time": 0.4678919315338135
    },
    {
      "epoch": 9.8162841796875e-05,
      "model_forward_time": 0.11494994163513184,
      "step": 16083
    },
    {
      "epoch": 9.8162841796875e-05,
      "step": 16083,
      "training_step_time": 0.5333962440490723
    },
    {
      "epoch": 9.81689453125e-05,
      "model_forward_time": 0.11519718170166016,
      "step": 16084
    },
    {
      "epoch": 9.81689453125e-05,
      "step": 16084,
      "training_step_time": 0.4250011444091797
    },
    {
      "epoch": 9.8175048828125e-05,
      "model_forward_time": 0.11495542526245117,
      "step": 16085
    },
    {
      "epoch": 9.8175048828125e-05,
      "step": 16085,
      "training_step_time": 0.4832942485809326
    },
    {
      "epoch": 9.818115234375e-05,
      "model_forward_time": 0.11600518226623535,
      "step": 16086
    },
    {
      "epoch": 9.818115234375e-05,
      "step": 16086,
      "training_step_time": 0.3896467685699463
    },
    {
      "epoch": 9.8187255859375e-05,
      "model_forward_time": 0.11493110656738281,
      "step": 16087
    },
    {
      "epoch": 9.8187255859375e-05,
      "step": 16087,
      "training_step_time": 0.4637644290924072
    },
    {
      "epoch": 9.8193359375e-05,
      "model_forward_time": 0.11459112167358398,
      "step": 16088
    },
    {
      "epoch": 9.8193359375e-05,
      "step": 16088,
      "training_step_time": 0.4508998394012451
    },
    {
      "epoch": 9.8199462890625e-05,
      "model_forward_time": 0.11505556106567383,
      "step": 16089
    },
    {
      "epoch": 9.8199462890625e-05,
      "step": 16089,
      "training_step_time": 1.0642640590667725
    },
    {
      "epoch": 9.820556640625e-05,
      "grad_norm": 0.14020834863185883,
      "learning_rate": 8.75419793280624e-05,
      "loss": 0.0581,
      "step": 16090
    },
    {
      "epoch": 9.820556640625e-05,
      "model_forward_time": 0.11377644538879395,
      "step": 16090
    },
    {
      "epoch": 9.820556640625e-05,
      "step": 16090,
      "training_step_time": 0.41324901580810547
    },
    {
      "epoch": 9.8211669921875e-05,
      "model_forward_time": 0.11423110961914062,
      "step": 16091
    },
    {
      "epoch": 9.8211669921875e-05,
      "step": 16091,
      "training_step_time": 0.37964320182800293
    },
    {
      "epoch": 9.82177734375e-05,
      "model_forward_time": 0.1139688491821289,
      "step": 16092
    },
    {
      "epoch": 9.82177734375e-05,
      "step": 16092,
      "training_step_time": 0.38556981086730957
    },
    {
      "epoch": 9.8223876953125e-05,
      "model_forward_time": 0.11382555961608887,
      "step": 16093
    },
    {
      "epoch": 9.8223876953125e-05,
      "step": 16093,
      "training_step_time": 0.4313771724700928
    },
    {
      "epoch": 9.822998046875e-05,
      "model_forward_time": 0.1143031120300293,
      "step": 16094
    },
    {
      "epoch": 9.822998046875e-05,
      "step": 16094,
      "training_step_time": 0.419783353805542
    },
    {
      "epoch": 9.8236083984375e-05,
      "model_forward_time": 0.1146998405456543,
      "step": 16095
    },
    {
      "epoch": 9.8236083984375e-05,
      "step": 16095,
      "training_step_time": 0.8537828922271729
    },
    {
      "epoch": 9.82421875e-05,
      "model_forward_time": 0.11351871490478516,
      "step": 16096
    },
    {
      "epoch": 9.82421875e-05,
      "step": 16096,
      "training_step_time": 0.39566755294799805
    },
    {
      "epoch": 9.8248291015625e-05,
      "model_forward_time": 0.11425662040710449,
      "step": 16097
    },
    {
      "epoch": 9.8248291015625e-05,
      "step": 16097,
      "training_step_time": 0.40860939025878906
    },
    {
      "epoch": 9.825439453125e-05,
      "model_forward_time": 0.11446237564086914,
      "step": 16098
    },
    {
      "epoch": 9.825439453125e-05,
      "step": 16098,
      "training_step_time": 0.3882300853729248
    },
    {
      "epoch": 9.8260498046875e-05,
      "model_forward_time": 0.1139991283416748,
      "step": 16099
    },
    {
      "epoch": 9.8260498046875e-05,
      "step": 16099,
      "training_step_time": 0.4736757278442383
    },
    {
      "epoch": 9.82666015625e-05,
      "grad_norm": 0.21437053382396698,
      "learning_rate": 8.75237721041583e-05,
      "loss": 0.058,
      "step": 16100
    },
    {
      "epoch": 9.82666015625e-05,
      "model_forward_time": 0.11463141441345215,
      "step": 16100
    },
    {
      "epoch": 9.82666015625e-05,
      "step": 16100,
      "training_step_time": 0.4944114685058594
    },
    {
      "epoch": 9.8272705078125e-05,
      "model_forward_time": 0.11548566818237305,
      "step": 16101
    },
    {
      "epoch": 9.8272705078125e-05,
      "step": 16101,
      "training_step_time": 0.7850475311279297
    },
    {
      "epoch": 9.827880859375e-05,
      "model_forward_time": 0.11508297920227051,
      "step": 16102
    },
    {
      "epoch": 9.827880859375e-05,
      "step": 16102,
      "training_step_time": 0.38142895698547363
    },
    {
      "epoch": 9.8284912109375e-05,
      "model_forward_time": 0.1145780086517334,
      "step": 16103
    },
    {
      "epoch": 9.8284912109375e-05,
      "step": 16103,
      "training_step_time": 0.3777012825012207
    },
    {
      "epoch": 9.8291015625e-05,
      "model_forward_time": 0.1146242618560791,
      "step": 16104
    },
    {
      "epoch": 9.8291015625e-05,
      "step": 16104,
      "training_step_time": 0.3769338130950928
    },
    {
      "epoch": 9.8297119140625e-05,
      "model_forward_time": 0.11399078369140625,
      "step": 16105
    },
    {
      "epoch": 9.8297119140625e-05,
      "step": 16105,
      "training_step_time": 0.4102919101715088
    },
    {
      "epoch": 9.830322265625e-05,
      "model_forward_time": 0.11458778381347656,
      "step": 16106
    },
    {
      "epoch": 9.830322265625e-05,
      "step": 16106,
      "training_step_time": 0.39022159576416016
    },
    {
      "epoch": 9.8309326171875e-05,
      "model_forward_time": 0.11558127403259277,
      "step": 16107
    },
    {
      "epoch": 9.8309326171875e-05,
      "step": 16107,
      "training_step_time": 1.0325713157653809
    },
    {
      "epoch": 9.83154296875e-05,
      "model_forward_time": 0.11397171020507812,
      "step": 16108
    },
    {
      "epoch": 9.83154296875e-05,
      "step": 16108,
      "training_step_time": 0.4410414695739746
    },
    {
      "epoch": 9.8321533203125e-05,
      "model_forward_time": 0.11398792266845703,
      "step": 16109
    },
    {
      "epoch": 9.8321533203125e-05,
      "step": 16109,
      "training_step_time": 0.40238165855407715
    },
    {
      "epoch": 9.832763671875e-05,
      "grad_norm": 0.1880820244550705,
      "learning_rate": 8.750555348152298e-05,
      "loss": 0.0551,
      "step": 16110
    },
    {
      "epoch": 9.832763671875e-05,
      "model_forward_time": 0.11438250541687012,
      "step": 16110
    },
    {
      "epoch": 9.832763671875e-05,
      "step": 16110,
      "training_step_time": 0.38914036750793457
    },
    {
      "epoch": 9.8333740234375e-05,
      "model_forward_time": 0.1140131950378418,
      "step": 16111
    },
    {
      "epoch": 9.8333740234375e-05,
      "step": 16111,
      "training_step_time": 0.3831906318664551
    },
    {
      "epoch": 9.833984375e-05,
      "model_forward_time": 0.1140737533569336,
      "step": 16112
    },
    {
      "epoch": 9.833984375e-05,
      "step": 16112,
      "training_step_time": 0.45166492462158203
    },
    {
      "epoch": 9.8345947265625e-05,
      "model_forward_time": 0.11474323272705078,
      "step": 16113
    },
    {
      "epoch": 9.8345947265625e-05,
      "step": 16113,
      "training_step_time": 0.905238151550293
    },
    {
      "epoch": 9.835205078125e-05,
      "model_forward_time": 0.11475110054016113,
      "step": 16114
    },
    {
      "epoch": 9.835205078125e-05,
      "step": 16114,
      "training_step_time": 0.3774588108062744
    },
    {
      "epoch": 9.8358154296875e-05,
      "model_forward_time": 0.11487960815429688,
      "step": 16115
    },
    {
      "epoch": 9.8358154296875e-05,
      "step": 16115,
      "training_step_time": 0.38665318489074707
    },
    {
      "epoch": 9.83642578125e-05,
      "model_forward_time": 0.11436891555786133,
      "step": 16116
    },
    {
      "epoch": 9.83642578125e-05,
      "step": 16116,
      "training_step_time": 0.40002989768981934
    },
    {
      "epoch": 9.8370361328125e-05,
      "model_forward_time": 0.11523604393005371,
      "step": 16117
    },
    {
      "epoch": 9.8370361328125e-05,
      "step": 16117,
      "training_step_time": 0.4090399742126465
    },
    {
      "epoch": 9.837646484375e-05,
      "model_forward_time": 0.11441326141357422,
      "step": 16118
    },
    {
      "epoch": 9.837646484375e-05,
      "step": 16118,
      "training_step_time": 0.4103353023529053
    },
    {
      "epoch": 9.8382568359375e-05,
      "model_forward_time": 0.1144554615020752,
      "step": 16119
    },
    {
      "epoch": 9.8382568359375e-05,
      "step": 16119,
      "training_step_time": 0.8718554973602295
    },
    {
      "epoch": 9.8388671875e-05,
      "grad_norm": 0.19824077188968658,
      "learning_rate": 8.74873234656908e-05,
      "loss": 0.059,
      "step": 16120
    },
    {
      "epoch": 9.8388671875e-05,
      "model_forward_time": 0.11414742469787598,
      "step": 16120
    },
    {
      "epoch": 9.8388671875e-05,
      "step": 16120,
      "training_step_time": 0.4746572971343994
    },
    {
      "epoch": 9.8394775390625e-05,
      "model_forward_time": 0.11452817916870117,
      "step": 16121
    },
    {
      "epoch": 9.8394775390625e-05,
      "step": 16121,
      "training_step_time": 0.4778914451599121
    },
    {
      "epoch": 9.840087890625e-05,
      "model_forward_time": 0.11343646049499512,
      "step": 16122
    },
    {
      "epoch": 9.840087890625e-05,
      "step": 16122,
      "training_step_time": 0.43093442916870117
    },
    {
      "epoch": 9.8406982421875e-05,
      "model_forward_time": 0.11405301094055176,
      "step": 16123
    },
    {
      "epoch": 9.8406982421875e-05,
      "step": 16123,
      "training_step_time": 0.39287877082824707
    },
    {
      "epoch": 9.84130859375e-05,
      "model_forward_time": 0.11667799949645996,
      "step": 16124
    },
    {
      "epoch": 9.84130859375e-05,
      "step": 16124,
      "training_step_time": 0.43300890922546387
    },
    {
      "epoch": 9.8419189453125e-05,
      "model_forward_time": 0.11507439613342285,
      "step": 16125
    },
    {
      "epoch": 9.8419189453125e-05,
      "step": 16125,
      "training_step_time": 0.6997721195220947
    },
    {
      "epoch": 9.842529296875e-05,
      "model_forward_time": 0.11503386497497559,
      "step": 16126
    },
    {
      "epoch": 9.842529296875e-05,
      "step": 16126,
      "training_step_time": 0.3854844570159912
    },
    {
      "epoch": 9.8431396484375e-05,
      "model_forward_time": 0.11353826522827148,
      "step": 16127
    },
    {
      "epoch": 9.8431396484375e-05,
      "step": 16127,
      "training_step_time": 0.38162875175476074
    },
    {
      "epoch": 9.84375e-05,
      "model_forward_time": 0.11422848701477051,
      "step": 16128
    },
    {
      "epoch": 9.84375e-05,
      "step": 16128,
      "training_step_time": 0.42046380043029785
    },
    {
      "epoch": 9.8443603515625e-05,
      "model_forward_time": 0.11447501182556152,
      "step": 16129
    },
    {
      "epoch": 9.8443603515625e-05,
      "step": 16129,
      "training_step_time": 0.41253209114074707
    },
    {
      "epoch": 9.844970703125e-05,
      "grad_norm": 0.2028522789478302,
      "learning_rate": 8.746908206219955e-05,
      "loss": 0.0552,
      "step": 16130
    },
    {
      "epoch": 9.844970703125e-05,
      "model_forward_time": 0.11485457420349121,
      "step": 16130
    },
    {
      "epoch": 9.844970703125e-05,
      "step": 16130,
      "training_step_time": 0.42299580574035645
    },
    {
      "epoch": 9.8455810546875e-05,
      "model_forward_time": 0.1152639389038086,
      "step": 16131
    },
    {
      "epoch": 9.8455810546875e-05,
      "step": 16131,
      "training_step_time": 0.6663556098937988
    },
    {
      "epoch": 9.84619140625e-05,
      "model_forward_time": 0.11424398422241211,
      "step": 16132
    },
    {
      "epoch": 9.84619140625e-05,
      "step": 16132,
      "training_step_time": 0.43191003799438477
    },
    {
      "epoch": 9.8468017578125e-05,
      "model_forward_time": 0.11429405212402344,
      "step": 16133
    },
    {
      "epoch": 9.8468017578125e-05,
      "step": 16133,
      "training_step_time": 0.38379907608032227
    },
    {
      "epoch": 9.847412109375e-05,
      "model_forward_time": 0.11451268196105957,
      "step": 16134
    },
    {
      "epoch": 9.847412109375e-05,
      "step": 16134,
      "training_step_time": 0.4761693477630615
    },
    {
      "epoch": 9.8480224609375e-05,
      "model_forward_time": 0.11469268798828125,
      "step": 16135
    },
    {
      "epoch": 9.8480224609375e-05,
      "step": 16135,
      "training_step_time": 0.42014217376708984
    },
    {
      "epoch": 9.8486328125e-05,
      "model_forward_time": 0.11437821388244629,
      "step": 16136
    },
    {
      "epoch": 9.8486328125e-05,
      "step": 16136,
      "training_step_time": 0.38683080673217773
    },
    {
      "epoch": 9.8492431640625e-05,
      "model_forward_time": 0.11519861221313477,
      "step": 16137
    },
    {
      "epoch": 9.8492431640625e-05,
      "step": 16137,
      "training_step_time": 0.5030832290649414
    },
    {
      "epoch": 9.849853515625e-05,
      "model_forward_time": 0.11529660224914551,
      "step": 16138
    },
    {
      "epoch": 9.849853515625e-05,
      "step": 16138,
      "training_step_time": 0.48462462425231934
    },
    {
      "epoch": 9.8504638671875e-05,
      "model_forward_time": 0.1146230697631836,
      "step": 16139
    },
    {
      "epoch": 9.8504638671875e-05,
      "step": 16139,
      "training_step_time": 0.4684412479400635
    },
    {
      "epoch": 9.85107421875e-05,
      "grad_norm": 0.17246420681476593,
      "learning_rate": 8.745082927659047e-05,
      "loss": 0.0598,
      "step": 16140
    },
    {
      "epoch": 9.85107421875e-05,
      "model_forward_time": 0.11557626724243164,
      "step": 16140
    },
    {
      "epoch": 9.85107421875e-05,
      "step": 16140,
      "training_step_time": 0.3780691623687744
    },
    {
      "epoch": 9.8516845703125e-05,
      "model_forward_time": 0.11523985862731934,
      "step": 16141
    },
    {
      "epoch": 9.8516845703125e-05,
      "step": 16141,
      "training_step_time": 0.39211177825927734
    },
    {
      "epoch": 9.852294921875e-05,
      "model_forward_time": 0.11475753784179688,
      "step": 16142
    },
    {
      "epoch": 9.852294921875e-05,
      "step": 16142,
      "training_step_time": 0.4185678958892822
    },
    {
      "epoch": 9.8529052734375e-05,
      "model_forward_time": 0.11581540107727051,
      "step": 16143
    },
    {
      "epoch": 9.8529052734375e-05,
      "step": 16143,
      "training_step_time": 0.5088119506835938
    },
    {
      "epoch": 9.853515625e-05,
      "model_forward_time": 0.11529707908630371,
      "step": 16144
    },
    {
      "epoch": 9.853515625e-05,
      "step": 16144,
      "training_step_time": 0.45917558670043945
    },
    {
      "epoch": 9.8541259765625e-05,
      "model_forward_time": 0.11504840850830078,
      "step": 16145
    },
    {
      "epoch": 9.8541259765625e-05,
      "step": 16145,
      "training_step_time": 0.39020681381225586
    },
    {
      "epoch": 9.854736328125e-05,
      "model_forward_time": 0.1152353286743164,
      "step": 16146
    },
    {
      "epoch": 9.854736328125e-05,
      "step": 16146,
      "training_step_time": 0.430513858795166
    },
    {
      "epoch": 9.8553466796875e-05,
      "model_forward_time": 0.11546802520751953,
      "step": 16147
    },
    {
      "epoch": 9.8553466796875e-05,
      "step": 16147,
      "training_step_time": 0.4493217468261719
    },
    {
      "epoch": 9.85595703125e-05,
      "model_forward_time": 0.11554408073425293,
      "step": 16148
    },
    {
      "epoch": 9.85595703125e-05,
      "step": 16148,
      "training_step_time": 0.4801807403564453
    },
    {
      "epoch": 9.8565673828125e-05,
      "model_forward_time": 0.11478137969970703,
      "step": 16149
    },
    {
      "epoch": 9.8565673828125e-05,
      "step": 16149,
      "training_step_time": 0.42825913429260254
    },
    {
      "epoch": 9.857177734375e-05,
      "grad_norm": 0.17698773741722107,
      "learning_rate": 8.74325651144083e-05,
      "loss": 0.0563,
      "step": 16150
    },
    {
      "epoch": 9.857177734375e-05,
      "model_forward_time": 0.1167607307434082,
      "step": 16150
    },
    {
      "epoch": 9.857177734375e-05,
      "step": 16150,
      "training_step_time": 0.39243364334106445
    },
    {
      "epoch": 9.8577880859375e-05,
      "model_forward_time": 0.11592316627502441,
      "step": 16151
    },
    {
      "epoch": 9.8577880859375e-05,
      "step": 16151,
      "training_step_time": 0.4798250198364258
    },
    {
      "epoch": 9.8583984375e-05,
      "model_forward_time": 0.11532044410705566,
      "step": 16152
    },
    {
      "epoch": 9.8583984375e-05,
      "step": 16152,
      "training_step_time": 0.43689703941345215
    },
    {
      "epoch": 9.8590087890625e-05,
      "model_forward_time": 0.11506867408752441,
      "step": 16153
    },
    {
      "epoch": 9.8590087890625e-05,
      "step": 16153,
      "training_step_time": 0.49270129203796387
    },
    {
      "epoch": 9.859619140625e-05,
      "model_forward_time": 0.11475396156311035,
      "step": 16154
    },
    {
      "epoch": 9.859619140625e-05,
      "step": 16154,
      "training_step_time": 0.38123464584350586
    },
    {
      "epoch": 9.8602294921875e-05,
      "model_forward_time": 0.11490797996520996,
      "step": 16155
    },
    {
      "epoch": 9.8602294921875e-05,
      "step": 16155,
      "training_step_time": 0.5948305130004883
    },
    {
      "epoch": 9.86083984375e-05,
      "model_forward_time": 0.11483097076416016,
      "step": 16156
    },
    {
      "epoch": 9.86083984375e-05,
      "step": 16156,
      "training_step_time": 0.37979698181152344
    },
    {
      "epoch": 9.8614501953125e-05,
      "model_forward_time": 0.11487174034118652,
      "step": 16157
    },
    {
      "epoch": 9.8614501953125e-05,
      "step": 16157,
      "training_step_time": 0.38931941986083984
    },
    {
      "epoch": 9.862060546875e-05,
      "model_forward_time": 0.11481308937072754,
      "step": 16158
    },
    {
      "epoch": 9.862060546875e-05,
      "step": 16158,
      "training_step_time": 0.38892221450805664
    },
    {
      "epoch": 9.8626708984375e-05,
      "model_forward_time": 0.11491656303405762,
      "step": 16159
    },
    {
      "epoch": 9.8626708984375e-05,
      "step": 16159,
      "training_step_time": 0.39731311798095703
    },
    {
      "epoch": 9.86328125e-05,
      "grad_norm": 0.13470667600631714,
      "learning_rate": 8.741428958120118e-05,
      "loss": 0.0598,
      "step": 16160
    },
    {
      "epoch": 9.86328125e-05,
      "model_forward_time": 0.11510515213012695,
      "step": 16160
    },
    {
      "epoch": 9.86328125e-05,
      "step": 16160,
      "training_step_time": 0.4632430076599121
    },
    {
      "epoch": 9.8638916015625e-05,
      "model_forward_time": 0.1151421070098877,
      "step": 16161
    },
    {
      "epoch": 9.8638916015625e-05,
      "step": 16161,
      "training_step_time": 0.7943727970123291
    },
    {
      "epoch": 9.864501953125e-05,
      "model_forward_time": 0.11374521255493164,
      "step": 16162
    },
    {
      "epoch": 9.864501953125e-05,
      "step": 16162,
      "training_step_time": 0.4318218231201172
    },
    {
      "epoch": 9.8651123046875e-05,
      "model_forward_time": 0.1140892505645752,
      "step": 16163
    },
    {
      "epoch": 9.8651123046875e-05,
      "step": 16163,
      "training_step_time": 0.3822956085205078
    },
    {
      "epoch": 9.86572265625e-05,
      "model_forward_time": 0.11658310890197754,
      "step": 16164
    },
    {
      "epoch": 9.86572265625e-05,
      "step": 16164,
      "training_step_time": 0.3648948669433594
    },
    {
      "epoch": 9.8663330078125e-05,
      "model_forward_time": 0.11504387855529785,
      "step": 16165
    },
    {
      "epoch": 9.8663330078125e-05,
      "step": 16165,
      "training_step_time": 0.42021846771240234
    },
    {
      "epoch": 9.866943359375e-05,
      "model_forward_time": 0.11659598350524902,
      "step": 16166
    },
    {
      "epoch": 9.866943359375e-05,
      "step": 16166,
      "training_step_time": 0.46347665786743164
    },
    {
      "epoch": 9.8675537109375e-05,
      "model_forward_time": 0.11562466621398926,
      "step": 16167
    },
    {
      "epoch": 9.8675537109375e-05,
      "step": 16167,
      "training_step_time": 0.5991594791412354
    },
    {
      "epoch": 9.8681640625e-05,
      "model_forward_time": 0.11517715454101562,
      "step": 16168
    },
    {
      "epoch": 9.8681640625e-05,
      "step": 16168,
      "training_step_time": 0.40786194801330566
    },
    {
      "epoch": 9.8687744140625e-05,
      "model_forward_time": 0.11457347869873047,
      "step": 16169
    },
    {
      "epoch": 9.8687744140625e-05,
      "step": 16169,
      "training_step_time": 0.3844583034515381
    },
    {
      "epoch": 9.869384765625e-05,
      "grad_norm": 0.18601536750793457,
      "learning_rate": 8.739600268252078e-05,
      "loss": 0.0531,
      "step": 16170
    },
    {
      "epoch": 9.869384765625e-05,
      "model_forward_time": 0.1147615909576416,
      "step": 16170
    },
    {
      "epoch": 9.869384765625e-05,
      "step": 16170,
      "training_step_time": 0.3929295539855957
    },
    {
      "epoch": 9.8699951171875e-05,
      "model_forward_time": 0.11493515968322754,
      "step": 16171
    },
    {
      "epoch": 9.8699951171875e-05,
      "step": 16171,
      "training_step_time": 0.42691993713378906
    },
    {
      "epoch": 9.87060546875e-05,
      "model_forward_time": 0.11513328552246094,
      "step": 16172
    },
    {
      "epoch": 9.87060546875e-05,
      "step": 16172,
      "training_step_time": 0.43083834648132324
    },
    {
      "epoch": 9.8712158203125e-05,
      "model_forward_time": 0.11591315269470215,
      "step": 16173
    },
    {
      "epoch": 9.8712158203125e-05,
      "step": 16173,
      "training_step_time": 0.4887197017669678
    },
    {
      "epoch": 9.871826171875e-05,
      "model_forward_time": 0.11477279663085938,
      "step": 16174
    },
    {
      "epoch": 9.871826171875e-05,
      "step": 16174,
      "training_step_time": 0.3992156982421875
    },
    {
      "epoch": 9.8724365234375e-05,
      "model_forward_time": 0.11485099792480469,
      "step": 16175
    },
    {
      "epoch": 9.8724365234375e-05,
      "step": 16175,
      "training_step_time": 0.47538256645202637
    },
    {
      "epoch": 9.873046875e-05,
      "model_forward_time": 0.11481904983520508,
      "step": 16176
    },
    {
      "epoch": 9.873046875e-05,
      "step": 16176,
      "training_step_time": 0.4578969478607178
    },
    {
      "epoch": 9.8736572265625e-05,
      "model_forward_time": 0.11595034599304199,
      "step": 16177
    },
    {
      "epoch": 9.8736572265625e-05,
      "step": 16177,
      "training_step_time": 0.3923070430755615
    },
    {
      "epoch": 9.874267578125e-05,
      "model_forward_time": 0.11521267890930176,
      "step": 16178
    },
    {
      "epoch": 9.874267578125e-05,
      "step": 16178,
      "training_step_time": 0.3708343505859375
    },
    {
      "epoch": 9.8748779296875e-05,
      "model_forward_time": 0.11538028717041016,
      "step": 16179
    },
    {
      "epoch": 9.8748779296875e-05,
      "step": 16179,
      "training_step_time": 0.4366881847381592
    },
    {
      "epoch": 9.87548828125e-05,
      "grad_norm": 0.13481521606445312,
      "learning_rate": 8.737770442392212e-05,
      "loss": 0.0618,
      "step": 16180
    },
    {
      "epoch": 9.87548828125e-05,
      "model_forward_time": 0.11482524871826172,
      "step": 16180
    },
    {
      "epoch": 9.87548828125e-05,
      "step": 16180,
      "training_step_time": 0.4687657356262207
    },
    {
      "epoch": 9.8760986328125e-05,
      "model_forward_time": 0.11628484725952148,
      "step": 16181
    },
    {
      "epoch": 9.8760986328125e-05,
      "step": 16181,
      "training_step_time": 0.4266357421875
    },
    {
      "epoch": 9.876708984375e-05,
      "model_forward_time": 0.1150655746459961,
      "step": 16182
    },
    {
      "epoch": 9.876708984375e-05,
      "step": 16182,
      "training_step_time": 0.3980560302734375
    },
    {
      "epoch": 9.8773193359375e-05,
      "model_forward_time": 0.11564826965332031,
      "step": 16183
    },
    {
      "epoch": 9.8773193359375e-05,
      "step": 16183,
      "training_step_time": 0.40566253662109375
    },
    {
      "epoch": 9.8779296875e-05,
      "model_forward_time": 0.11582136154174805,
      "step": 16184
    },
    {
      "epoch": 9.8779296875e-05,
      "step": 16184,
      "training_step_time": 0.3914937973022461
    },
    {
      "epoch": 9.8785400390625e-05,
      "model_forward_time": 0.11539244651794434,
      "step": 16185
    },
    {
      "epoch": 9.8785400390625e-05,
      "step": 16185,
      "training_step_time": 0.53190016746521
    },
    {
      "epoch": 9.879150390625e-05,
      "model_forward_time": 0.11481237411499023,
      "step": 16186
    },
    {
      "epoch": 9.879150390625e-05,
      "step": 16186,
      "training_step_time": 0.4491443634033203
    },
    {
      "epoch": 9.8797607421875e-05,
      "model_forward_time": 0.11488103866577148,
      "step": 16187
    },
    {
      "epoch": 9.8797607421875e-05,
      "step": 16187,
      "training_step_time": 0.493710994720459
    },
    {
      "epoch": 9.88037109375e-05,
      "model_forward_time": 0.11449384689331055,
      "step": 16188
    },
    {
      "epoch": 9.88037109375e-05,
      "step": 16188,
      "training_step_time": 0.40795469284057617
    },
    {
      "epoch": 9.8809814453125e-05,
      "model_forward_time": 0.11487531661987305,
      "step": 16189
    },
    {
      "epoch": 9.8809814453125e-05,
      "step": 16189,
      "training_step_time": 0.40289878845214844
    },
    {
      "epoch": 9.881591796875e-05,
      "grad_norm": 0.15739141404628754,
      "learning_rate": 8.735939481096378e-05,
      "loss": 0.0571,
      "step": 16190
    },
    {
      "epoch": 9.881591796875e-05,
      "model_forward_time": 0.11401724815368652,
      "step": 16190
    },
    {
      "epoch": 9.881591796875e-05,
      "step": 16190,
      "training_step_time": 0.41375279426574707
    },
    {
      "epoch": 9.8822021484375e-05,
      "model_forward_time": 0.11487436294555664,
      "step": 16191
    },
    {
      "epoch": 9.8822021484375e-05,
      "step": 16191,
      "training_step_time": 0.4918034076690674
    },
    {
      "epoch": 9.8828125e-05,
      "model_forward_time": 0.11402034759521484,
      "step": 16192
    },
    {
      "epoch": 9.8828125e-05,
      "step": 16192,
      "training_step_time": 0.3909785747528076
    },
    {
      "epoch": 9.8834228515625e-05,
      "model_forward_time": 0.11496281623840332,
      "step": 16193
    },
    {
      "epoch": 9.8834228515625e-05,
      "step": 16193,
      "training_step_time": 0.44519972801208496
    },
    {
      "epoch": 9.884033203125e-05,
      "model_forward_time": 0.11472749710083008,
      "step": 16194
    },
    {
      "epoch": 9.884033203125e-05,
      "step": 16194,
      "training_step_time": 0.45739054679870605
    },
    {
      "epoch": 9.8846435546875e-05,
      "model_forward_time": 0.1157538890838623,
      "step": 16195
    },
    {
      "epoch": 9.8846435546875e-05,
      "step": 16195,
      "training_step_time": 0.4887077808380127
    },
    {
      "epoch": 9.88525390625e-05,
      "model_forward_time": 0.11481642723083496,
      "step": 16196
    },
    {
      "epoch": 9.88525390625e-05,
      "step": 16196,
      "training_step_time": 0.3864583969116211
    },
    {
      "epoch": 9.8858642578125e-05,
      "model_forward_time": 0.11554646492004395,
      "step": 16197
    },
    {
      "epoch": 9.8858642578125e-05,
      "step": 16197,
      "training_step_time": 0.39857029914855957
    },
    {
      "epoch": 9.886474609375e-05,
      "model_forward_time": 0.11494755744934082,
      "step": 16198
    },
    {
      "epoch": 9.886474609375e-05,
      "step": 16198,
      "training_step_time": 0.39046812057495117
    },
    {
      "epoch": 9.8870849609375e-05,
      "model_forward_time": 0.11500668525695801,
      "step": 16199
    },
    {
      "epoch": 9.8870849609375e-05,
      "step": 16199,
      "training_step_time": 0.3981168270111084
    },
    {
      "epoch": 9.8876953125e-05,
      "grad_norm": 0.16148126125335693,
      "learning_rate": 8.73410738492077e-05,
      "loss": 0.0598,
      "step": 16200
    },
    {
      "epoch": 9.8876953125e-05,
      "model_forward_time": 0.11643195152282715,
      "step": 16200
    },
    {
      "epoch": 9.8876953125e-05,
      "step": 16200,
      "training_step_time": 0.39043259620666504
    },
    {
      "epoch": 9.8883056640625e-05,
      "model_forward_time": 0.11567497253417969,
      "step": 16201
    },
    {
      "epoch": 9.8883056640625e-05,
      "step": 16201,
      "training_step_time": 0.4464845657348633
    },
    {
      "epoch": 9.888916015625e-05,
      "model_forward_time": 0.11539864540100098,
      "step": 16202
    },
    {
      "epoch": 9.888916015625e-05,
      "step": 16202,
      "training_step_time": 0.4459869861602783
    },
    {
      "epoch": 9.8895263671875e-05,
      "model_forward_time": 0.11579322814941406,
      "step": 16203
    },
    {
      "epoch": 9.8895263671875e-05,
      "step": 16203,
      "training_step_time": 0.5545437335968018
    },
    {
      "epoch": 9.89013671875e-05,
      "model_forward_time": 0.1152200698852539,
      "step": 16204
    },
    {
      "epoch": 9.89013671875e-05,
      "step": 16204,
      "training_step_time": 0.44337892532348633
    },
    {
      "epoch": 9.8907470703125e-05,
      "model_forward_time": 0.11541461944580078,
      "step": 16205
    },
    {
      "epoch": 9.8907470703125e-05,
      "step": 16205,
      "training_step_time": 0.484372615814209
    },
    {
      "epoch": 9.891357421875e-05,
      "model_forward_time": 0.11519575119018555,
      "step": 16206
    },
    {
      "epoch": 9.891357421875e-05,
      "step": 16206,
      "training_step_time": 0.3892064094543457
    },
    {
      "epoch": 9.8919677734375e-05,
      "model_forward_time": 0.11536645889282227,
      "step": 16207
    },
    {
      "epoch": 9.8919677734375e-05,
      "step": 16207,
      "training_step_time": 0.46616697311401367
    },
    {
      "epoch": 9.892578125e-05,
      "model_forward_time": 0.11435890197753906,
      "step": 16208
    },
    {
      "epoch": 9.892578125e-05,
      "step": 16208,
      "training_step_time": 0.47818541526794434
    },
    {
      "epoch": 9.8931884765625e-05,
      "model_forward_time": 0.11490225791931152,
      "step": 16209
    },
    {
      "epoch": 9.8931884765625e-05,
      "step": 16209,
      "training_step_time": 0.4384489059448242
    },
    {
      "epoch": 9.893798828125e-05,
      "grad_norm": 0.1306891143321991,
      "learning_rate": 8.732274154421933e-05,
      "loss": 0.0487,
      "step": 16210
    },
    {
      "epoch": 9.893798828125e-05,
      "model_forward_time": 0.1145174503326416,
      "step": 16210
    },
    {
      "epoch": 9.893798828125e-05,
      "step": 16210,
      "training_step_time": 0.39919424057006836
    },
    {
      "epoch": 9.8944091796875e-05,
      "model_forward_time": 0.11491870880126953,
      "step": 16211
    },
    {
      "epoch": 9.8944091796875e-05,
      "step": 16211,
      "training_step_time": 0.3908998966217041
    },
    {
      "epoch": 9.89501953125e-05,
      "model_forward_time": 0.11555051803588867,
      "step": 16212
    },
    {
      "epoch": 9.89501953125e-05,
      "step": 16212,
      "training_step_time": 0.4072916507720947
    },
    {
      "epoch": 9.8956298828125e-05,
      "model_forward_time": 0.11505484580993652,
      "step": 16213
    },
    {
      "epoch": 9.8956298828125e-05,
      "step": 16213,
      "training_step_time": 0.3991208076477051
    },
    {
      "epoch": 9.896240234375e-05,
      "model_forward_time": 0.11562228202819824,
      "step": 16214
    },
    {
      "epoch": 9.896240234375e-05,
      "step": 16214,
      "training_step_time": 0.3923909664154053
    },
    {
      "epoch": 9.8968505859375e-05,
      "model_forward_time": 0.11462879180908203,
      "step": 16215
    },
    {
      "epoch": 9.8968505859375e-05,
      "step": 16215,
      "training_step_time": 0.6376473903656006
    },
    {
      "epoch": 9.8974609375e-05,
      "model_forward_time": 0.11525988578796387,
      "step": 16216
    },
    {
      "epoch": 9.8974609375e-05,
      "step": 16216,
      "training_step_time": 0.4035663604736328
    },
    {
      "epoch": 9.8980712890625e-05,
      "model_forward_time": 0.11500716209411621,
      "step": 16217
    },
    {
      "epoch": 9.8980712890625e-05,
      "step": 16217,
      "training_step_time": 0.44202470779418945
    },
    {
      "epoch": 9.898681640625e-05,
      "model_forward_time": 0.11514925956726074,
      "step": 16218
    },
    {
      "epoch": 9.898681640625e-05,
      "step": 16218,
      "training_step_time": 0.438417911529541
    },
    {
      "epoch": 9.8992919921875e-05,
      "model_forward_time": 0.11512422561645508,
      "step": 16219
    },
    {
      "epoch": 9.8992919921875e-05,
      "step": 16219,
      "training_step_time": 0.39539194107055664
    },
    {
      "epoch": 9.89990234375e-05,
      "grad_norm": 0.18447929620742798,
      "learning_rate": 8.730439790156752e-05,
      "loss": 0.0568,
      "step": 16220
    },
    {
      "epoch": 9.89990234375e-05,
      "model_forward_time": 0.1144869327545166,
      "step": 16220
    },
    {
      "epoch": 9.89990234375e-05,
      "step": 16220,
      "training_step_time": 0.3856320381164551
    },
    {
      "epoch": 9.9005126953125e-05,
      "model_forward_time": 0.11439228057861328,
      "step": 16221
    },
    {
      "epoch": 9.9005126953125e-05,
      "step": 16221,
      "training_step_time": 0.4121735095977783
    },
    {
      "epoch": 9.901123046875e-05,
      "model_forward_time": 0.11579585075378418,
      "step": 16222
    },
    {
      "epoch": 9.901123046875e-05,
      "step": 16222,
      "training_step_time": 0.38980817794799805
    },
    {
      "epoch": 9.9017333984375e-05,
      "model_forward_time": 0.1149587631225586,
      "step": 16223
    },
    {
      "epoch": 9.9017333984375e-05,
      "step": 16223,
      "training_step_time": 0.4896421432495117
    },
    {
      "epoch": 9.90234375e-05,
      "model_forward_time": 0.11498618125915527,
      "step": 16224
    },
    {
      "epoch": 9.90234375e-05,
      "step": 16224,
      "training_step_time": 0.38602733612060547
    },
    {
      "epoch": 9.9029541015625e-05,
      "model_forward_time": 0.11519861221313477,
      "step": 16225
    },
    {
      "epoch": 9.9029541015625e-05,
      "step": 16225,
      "training_step_time": 0.39698290824890137
    },
    {
      "epoch": 9.903564453125e-05,
      "model_forward_time": 0.11542439460754395,
      "step": 16226
    },
    {
      "epoch": 9.903564453125e-05,
      "step": 16226,
      "training_step_time": 0.3797736167907715
    },
    {
      "epoch": 9.9041748046875e-05,
      "model_forward_time": 0.11536884307861328,
      "step": 16227
    },
    {
      "epoch": 9.9041748046875e-05,
      "step": 16227,
      "training_step_time": 0.644512414932251
    },
    {
      "epoch": 9.90478515625e-05,
      "model_forward_time": 0.1144556999206543,
      "step": 16228
    },
    {
      "epoch": 9.90478515625e-05,
      "step": 16228,
      "training_step_time": 0.4297976493835449
    },
    {
      "epoch": 9.9053955078125e-05,
      "model_forward_time": 0.11512637138366699,
      "step": 16229
    },
    {
      "epoch": 9.9053955078125e-05,
      "step": 16229,
      "training_step_time": 0.3869760036468506
    },
    {
      "epoch": 9.906005859375e-05,
      "grad_norm": 0.22502394020557404,
      "learning_rate": 8.728604292682459e-05,
      "loss": 0.0573,
      "step": 16230
    },
    {
      "epoch": 9.906005859375e-05,
      "model_forward_time": 0.11561155319213867,
      "step": 16230
    },
    {
      "epoch": 9.906005859375e-05,
      "step": 16230,
      "training_step_time": 0.3864781856536865
    },
    {
      "epoch": 9.9066162109375e-05,
      "model_forward_time": 0.11588335037231445,
      "step": 16231
    },
    {
      "epoch": 9.9066162109375e-05,
      "step": 16231,
      "training_step_time": 0.4720492362976074
    },
    {
      "epoch": 9.9072265625e-05,
      "model_forward_time": 0.11491155624389648,
      "step": 16232
    },
    {
      "epoch": 9.9072265625e-05,
      "step": 16232,
      "training_step_time": 0.43175339698791504
    },
    {
      "epoch": 9.9078369140625e-05,
      "model_forward_time": 0.11501622200012207,
      "step": 16233
    },
    {
      "epoch": 9.9078369140625e-05,
      "step": 16233,
      "training_step_time": 0.47716236114501953
    },
    {
      "epoch": 9.908447265625e-05,
      "model_forward_time": 0.11521482467651367,
      "step": 16234
    },
    {
      "epoch": 9.908447265625e-05,
      "step": 16234,
      "training_step_time": 0.39255285263061523
    },
    {
      "epoch": 9.9090576171875e-05,
      "model_forward_time": 0.11463356018066406,
      "step": 16235
    },
    {
      "epoch": 9.9090576171875e-05,
      "step": 16235,
      "training_step_time": 0.3639242649078369
    },
    {
      "epoch": 9.90966796875e-05,
      "model_forward_time": 0.11526656150817871,
      "step": 16236
    },
    {
      "epoch": 9.90966796875e-05,
      "step": 16236,
      "training_step_time": 0.47818946838378906
    },
    {
      "epoch": 9.9102783203125e-05,
      "model_forward_time": 0.11487364768981934,
      "step": 16237
    },
    {
      "epoch": 9.9102783203125e-05,
      "step": 16237,
      "training_step_time": 0.4643585681915283
    },
    {
      "epoch": 9.910888671875e-05,
      "model_forward_time": 0.11507987976074219,
      "step": 16238
    },
    {
      "epoch": 9.910888671875e-05,
      "step": 16238,
      "training_step_time": 0.3935565948486328
    },
    {
      "epoch": 9.9114990234375e-05,
      "model_forward_time": 0.1148529052734375,
      "step": 16239
    },
    {
      "epoch": 9.9114990234375e-05,
      "step": 16239,
      "training_step_time": 0.39362335205078125
    },
    {
      "epoch": 9.912109375e-05,
      "grad_norm": 0.1580689549446106,
      "learning_rate": 8.72676766255663e-05,
      "loss": 0.0583,
      "step": 16240
    },
    {
      "epoch": 9.912109375e-05,
      "model_forward_time": 0.1146402359008789,
      "step": 16240
    },
    {
      "epoch": 9.912109375e-05,
      "step": 16240,
      "training_step_time": 0.4113309383392334
    },
    {
      "epoch": 9.9127197265625e-05,
      "model_forward_time": 0.1147918701171875,
      "step": 16241
    },
    {
      "epoch": 9.9127197265625e-05,
      "step": 16241,
      "training_step_time": 0.3938159942626953
    },
    {
      "epoch": 9.913330078125e-05,
      "model_forward_time": 0.11484360694885254,
      "step": 16242
    },
    {
      "epoch": 9.913330078125e-05,
      "step": 16242,
      "training_step_time": 0.42905187606811523
    },
    {
      "epoch": 9.9139404296875e-05,
      "model_forward_time": 0.11482453346252441,
      "step": 16243
    },
    {
      "epoch": 9.9139404296875e-05,
      "step": 16243,
      "training_step_time": 0.4921727180480957
    },
    {
      "epoch": 9.91455078125e-05,
      "model_forward_time": 0.11562037467956543,
      "step": 16244
    },
    {
      "epoch": 9.91455078125e-05,
      "step": 16244,
      "training_step_time": 0.40265321731567383
    },
    {
      "epoch": 9.9151611328125e-05,
      "model_forward_time": 0.11490201950073242,
      "step": 16245
    },
    {
      "epoch": 9.9151611328125e-05,
      "step": 16245,
      "training_step_time": 0.45263004302978516
    },
    {
      "epoch": 9.915771484375e-05,
      "model_forward_time": 0.11635851860046387,
      "step": 16246
    },
    {
      "epoch": 9.915771484375e-05,
      "step": 16246,
      "training_step_time": 0.6727495193481445
    },
    {
      "epoch": 9.9163818359375e-05,
      "model_forward_time": 0.11510848999023438,
      "step": 16247
    },
    {
      "epoch": 9.9163818359375e-05,
      "step": 16247,
      "training_step_time": 0.4101438522338867
    },
    {
      "epoch": 9.9169921875e-05,
      "model_forward_time": 0.11435437202453613,
      "step": 16248
    },
    {
      "epoch": 9.9169921875e-05,
      "step": 16248,
      "training_step_time": 0.38449597358703613
    },
    {
      "epoch": 9.9176025390625e-05,
      "model_forward_time": 0.11465764045715332,
      "step": 16249
    },
    {
      "epoch": 9.9176025390625e-05,
      "step": 16249,
      "training_step_time": 0.3695650100708008
    },
    {
      "epoch": 9.918212890625e-05,
      "grad_norm": 0.18624155223369598,
      "learning_rate": 8.724929900337186e-05,
      "loss": 0.0548,
      "step": 16250
    },
    {
      "epoch": 9.918212890625e-05,
      "model_forward_time": 0.11409664154052734,
      "step": 16250
    },
    {
      "epoch": 9.918212890625e-05,
      "step": 16250,
      "training_step_time": 0.42009925842285156
    },
    {
      "epoch": 9.9188232421875e-05,
      "model_forward_time": 0.11588048934936523,
      "step": 16251
    },
    {
      "epoch": 9.9188232421875e-05,
      "step": 16251,
      "training_step_time": 0.46378254890441895
    },
    {
      "epoch": 9.91943359375e-05,
      "model_forward_time": 0.11541414260864258,
      "step": 16252
    },
    {
      "epoch": 9.91943359375e-05,
      "step": 16252,
      "training_step_time": 0.9940042495727539
    },
    {
      "epoch": 9.9200439453125e-05,
      "model_forward_time": 0.11443734169006348,
      "step": 16253
    },
    {
      "epoch": 9.9200439453125e-05,
      "step": 16253,
      "training_step_time": 0.4016265869140625
    },
    {
      "epoch": 9.920654296875e-05,
      "model_forward_time": 0.1138465404510498,
      "step": 16254
    },
    {
      "epoch": 9.920654296875e-05,
      "step": 16254,
      "training_step_time": 0.3949098587036133
    },
    {
      "epoch": 9.9212646484375e-05,
      "model_forward_time": 0.1136026382446289,
      "step": 16255
    },
    {
      "epoch": 9.9212646484375e-05,
      "step": 16255,
      "training_step_time": 0.3897428512573242
    },
    {
      "epoch": 9.921875e-05,
      "model_forward_time": 0.11368584632873535,
      "step": 16256
    },
    {
      "epoch": 9.921875e-05,
      "step": 16256,
      "training_step_time": 0.45107007026672363
    },
    {
      "epoch": 9.9224853515625e-05,
      "model_forward_time": 0.11423087120056152,
      "step": 16257
    },
    {
      "epoch": 9.9224853515625e-05,
      "step": 16257,
      "training_step_time": 0.39873433113098145
    },
    {
      "epoch": 9.923095703125e-05,
      "model_forward_time": 0.11488819122314453,
      "step": 16258
    },
    {
      "epoch": 9.923095703125e-05,
      "step": 16258,
      "training_step_time": 0.4619712829589844
    },
    {
      "epoch": 9.9237060546875e-05,
      "model_forward_time": 0.11487388610839844,
      "step": 16259
    },
    {
      "epoch": 9.9237060546875e-05,
      "step": 16259,
      "training_step_time": 0.4339141845703125
    },
    {
      "epoch": 9.92431640625e-05,
      "grad_norm": 0.11143413186073303,
      "learning_rate": 8.723091006582389e-05,
      "loss": 0.0594,
      "step": 16260
    },
    {
      "epoch": 9.92431640625e-05,
      "model_forward_time": 0.11486482620239258,
      "step": 16260
    },
    {
      "epoch": 9.92431640625e-05,
      "step": 16260,
      "training_step_time": 0.41327691078186035
    },
    {
      "epoch": 9.9249267578125e-05,
      "model_forward_time": 0.11486029624938965,
      "step": 16261
    },
    {
      "epoch": 9.9249267578125e-05,
      "step": 16261,
      "training_step_time": 0.3880040645599365
    },
    {
      "epoch": 9.925537109375e-05,
      "model_forward_time": 0.11495137214660645,
      "step": 16262
    },
    {
      "epoch": 9.925537109375e-05,
      "step": 16262,
      "training_step_time": 0.38605570793151855
    },
    {
      "epoch": 9.9261474609375e-05,
      "model_forward_time": 0.11560177803039551,
      "step": 16263
    },
    {
      "epoch": 9.9261474609375e-05,
      "step": 16263,
      "training_step_time": 0.3817868232727051
    },
    {
      "epoch": 9.9267578125e-05,
      "model_forward_time": 0.11523318290710449,
      "step": 16264
    },
    {
      "epoch": 9.9267578125e-05,
      "step": 16264,
      "training_step_time": 0.9361982345581055
    },
    {
      "epoch": 9.9273681640625e-05,
      "model_forward_time": 0.11419463157653809,
      "step": 16265
    },
    {
      "epoch": 9.9273681640625e-05,
      "step": 16265,
      "training_step_time": 0.383364200592041
    },
    {
      "epoch": 9.927978515625e-05,
      "model_forward_time": 0.1147918701171875,
      "step": 16266
    },
    {
      "epoch": 9.927978515625e-05,
      "step": 16266,
      "training_step_time": 0.3788571357727051
    },
    {
      "epoch": 9.9285888671875e-05,
      "model_forward_time": 0.11387395858764648,
      "step": 16267
    },
    {
      "epoch": 9.9285888671875e-05,
      "step": 16267,
      "training_step_time": 0.3814053535461426
    },
    {
      "epoch": 9.92919921875e-05,
      "model_forward_time": 0.11366677284240723,
      "step": 16268
    },
    {
      "epoch": 9.92919921875e-05,
      "step": 16268,
      "training_step_time": 0.4433767795562744
    },
    {
      "epoch": 9.9298095703125e-05,
      "model_forward_time": 0.11416506767272949,
      "step": 16269
    },
    {
      "epoch": 9.9298095703125e-05,
      "step": 16269,
      "training_step_time": 0.4205009937286377
    },
    {
      "epoch": 9.930419921875e-05,
      "grad_norm": 0.1534370481967926,
      "learning_rate": 8.721250981850846e-05,
      "loss": 0.0566,
      "step": 16270
    },
    {
      "epoch": 9.930419921875e-05,
      "model_forward_time": 0.11508035659790039,
      "step": 16270
    },
    {
      "epoch": 9.930419921875e-05,
      "step": 16270,
      "training_step_time": 0.645714521408081
    },
    {
      "epoch": 9.9310302734375e-05,
      "model_forward_time": 0.11385369300842285,
      "step": 16271
    },
    {
      "epoch": 9.9310302734375e-05,
      "step": 16271,
      "training_step_time": 0.42657041549682617
    },
    {
      "epoch": 9.931640625e-05,
      "model_forward_time": 0.11441540718078613,
      "step": 16272
    },
    {
      "epoch": 9.931640625e-05,
      "step": 16272,
      "training_step_time": 0.3998684883117676
    },
    {
      "epoch": 9.9322509765625e-05,
      "model_forward_time": 0.11447381973266602,
      "step": 16273
    },
    {
      "epoch": 9.9322509765625e-05,
      "step": 16273,
      "training_step_time": 0.4006466865539551
    },
    {
      "epoch": 9.932861328125e-05,
      "model_forward_time": 0.11474466323852539,
      "step": 16274
    },
    {
      "epoch": 9.932861328125e-05,
      "step": 16274,
      "training_step_time": 0.3851439952850342
    },
    {
      "epoch": 9.9334716796875e-05,
      "model_forward_time": 0.11478781700134277,
      "step": 16275
    },
    {
      "epoch": 9.9334716796875e-05,
      "step": 16275,
      "training_step_time": 0.3926730155944824
    },
    {
      "epoch": 9.93408203125e-05,
      "model_forward_time": 0.11541390419006348,
      "step": 16276
    },
    {
      "epoch": 9.93408203125e-05,
      "step": 16276,
      "training_step_time": 0.7329192161560059
    },
    {
      "epoch": 9.9346923828125e-05,
      "model_forward_time": 0.11439132690429688,
      "step": 16277
    },
    {
      "epoch": 9.9346923828125e-05,
      "step": 16277,
      "training_step_time": 0.3900761604309082
    },
    {
      "epoch": 9.935302734375e-05,
      "model_forward_time": 0.11486434936523438,
      "step": 16278
    },
    {
      "epoch": 9.935302734375e-05,
      "step": 16278,
      "training_step_time": 0.39110875129699707
    },
    {
      "epoch": 9.9359130859375e-05,
      "model_forward_time": 0.11515140533447266,
      "step": 16279
    },
    {
      "epoch": 9.9359130859375e-05,
      "step": 16279,
      "training_step_time": 0.3911616802215576
    },
    {
      "epoch": 9.9365234375e-05,
      "grad_norm": 0.15099255740642548,
      "learning_rate": 8.719409826701508e-05,
      "loss": 0.0527,
      "step": 16280
    },
    {
      "epoch": 9.9365234375e-05,
      "model_forward_time": 0.11438965797424316,
      "step": 16280
    },
    {
      "epoch": 9.9365234375e-05,
      "step": 16280,
      "training_step_time": 0.39228391647338867
    },
    {
      "epoch": 9.9371337890625e-05,
      "model_forward_time": 0.11430072784423828,
      "step": 16281
    },
    {
      "epoch": 9.9371337890625e-05,
      "step": 16281,
      "training_step_time": 0.39009761810302734
    },
    {
      "epoch": 9.937744140625e-05,
      "model_forward_time": 0.11557579040527344,
      "step": 16282
    },
    {
      "epoch": 9.937744140625e-05,
      "step": 16282,
      "training_step_time": 0.9045789241790771
    },
    {
      "epoch": 9.9383544921875e-05,
      "model_forward_time": 0.1152498722076416,
      "step": 16283
    },
    {
      "epoch": 9.9383544921875e-05,
      "step": 16283,
      "training_step_time": 0.4151625633239746
    },
    {
      "epoch": 9.93896484375e-05,
      "model_forward_time": 0.11470818519592285,
      "step": 16284
    },
    {
      "epoch": 9.93896484375e-05,
      "step": 16284,
      "training_step_time": 0.39550352096557617
    },
    {
      "epoch": 9.9395751953125e-05,
      "model_forward_time": 0.1142435073852539,
      "step": 16285
    },
    {
      "epoch": 9.9395751953125e-05,
      "step": 16285,
      "training_step_time": 0.46036410331726074
    },
    {
      "epoch": 9.940185546875e-05,
      "model_forward_time": 0.11380195617675781,
      "step": 16286
    },
    {
      "epoch": 9.940185546875e-05,
      "step": 16286,
      "training_step_time": 0.40638136863708496
    },
    {
      "epoch": 9.9407958984375e-05,
      "model_forward_time": 0.11439037322998047,
      "step": 16287
    },
    {
      "epoch": 9.9407958984375e-05,
      "step": 16287,
      "training_step_time": 0.3801281452178955
    },
    {
      "epoch": 9.94140625e-05,
      "model_forward_time": 0.11452937126159668,
      "step": 16288
    },
    {
      "epoch": 9.94140625e-05,
      "step": 16288,
      "training_step_time": 0.388918399810791
    },
    {
      "epoch": 9.9420166015625e-05,
      "model_forward_time": 0.11449313163757324,
      "step": 16289
    },
    {
      "epoch": 9.9420166015625e-05,
      "step": 16289,
      "training_step_time": 0.376971960067749
    },
    {
      "epoch": 9.942626953125e-05,
      "grad_norm": 0.18685917556285858,
      "learning_rate": 8.717567541693673e-05,
      "loss": 0.058,
      "step": 16290
    },
    {
      "epoch": 9.942626953125e-05,
      "model_forward_time": 0.11501646041870117,
      "step": 16290
    },
    {
      "epoch": 9.942626953125e-05,
      "step": 16290,
      "training_step_time": 0.4357030391693115
    },
    {
      "epoch": 9.9432373046875e-05,
      "model_forward_time": 0.11504673957824707,
      "step": 16291
    },
    {
      "epoch": 9.9432373046875e-05,
      "step": 16291,
      "training_step_time": 0.39875292778015137
    },
    {
      "epoch": 9.94384765625e-05,
      "model_forward_time": 0.11617255210876465,
      "step": 16292
    },
    {
      "epoch": 9.94384765625e-05,
      "step": 16292,
      "training_step_time": 0.38298869132995605
    },
    {
      "epoch": 9.9444580078125e-05,
      "model_forward_time": 0.11515641212463379,
      "step": 16293
    },
    {
      "epoch": 9.9444580078125e-05,
      "step": 16293,
      "training_step_time": 0.4000382423400879
    },
    {
      "epoch": 9.945068359375e-05,
      "model_forward_time": 0.1153266429901123,
      "step": 16294
    },
    {
      "epoch": 9.945068359375e-05,
      "step": 16294,
      "training_step_time": 0.4109938144683838
    },
    {
      "epoch": 9.9456787109375e-05,
      "model_forward_time": 0.1150197982788086,
      "step": 16295
    },
    {
      "epoch": 9.9456787109375e-05,
      "step": 16295,
      "training_step_time": 0.3999137878417969
    },
    {
      "epoch": 9.9462890625e-05,
      "model_forward_time": 0.1148061752319336,
      "step": 16296
    },
    {
      "epoch": 9.9462890625e-05,
      "step": 16296,
      "training_step_time": 0.4051225185394287
    },
    {
      "epoch": 9.9468994140625e-05,
      "model_forward_time": 0.11455202102661133,
      "step": 16297
    },
    {
      "epoch": 9.9468994140625e-05,
      "step": 16297,
      "training_step_time": 0.43094706535339355
    },
    {
      "epoch": 9.947509765625e-05,
      "model_forward_time": 0.11503410339355469,
      "step": 16298
    },
    {
      "epoch": 9.947509765625e-05,
      "step": 16298,
      "training_step_time": 0.48445868492126465
    },
    {
      "epoch": 9.9481201171875e-05,
      "model_forward_time": 0.11533093452453613,
      "step": 16299
    },
    {
      "epoch": 9.9481201171875e-05,
      "step": 16299,
      "training_step_time": 0.38702893257141113
    },
    {
      "epoch": 9.94873046875e-05,
      "grad_norm": 0.16122202575206757,
      "learning_rate": 8.715724127386972e-05,
      "loss": 0.0555,
      "step": 16300
    },
    {
      "epoch": 9.94873046875e-05,
      "model_forward_time": 0.11490488052368164,
      "step": 16300
    },
    {
      "epoch": 9.94873046875e-05,
      "step": 16300,
      "training_step_time": 0.41457033157348633
    },
    {
      "epoch": 9.9493408203125e-05,
      "model_forward_time": 0.1148383617401123,
      "step": 16301
    },
    {
      "epoch": 9.9493408203125e-05,
      "step": 16301,
      "training_step_time": 0.4922652244567871
    },
    {
      "epoch": 9.949951171875e-05,
      "model_forward_time": 0.11527395248413086,
      "step": 16302
    },
    {
      "epoch": 9.949951171875e-05,
      "step": 16302,
      "training_step_time": 0.42594194412231445
    },
    {
      "epoch": 9.9505615234375e-05,
      "model_forward_time": 0.11509895324707031,
      "step": 16303
    },
    {
      "epoch": 9.9505615234375e-05,
      "step": 16303,
      "training_step_time": 0.39208078384399414
    },
    {
      "epoch": 9.951171875e-05,
      "model_forward_time": 0.11531448364257812,
      "step": 16304
    },
    {
      "epoch": 9.951171875e-05,
      "step": 16304,
      "training_step_time": 0.45975232124328613
    },
    {
      "epoch": 9.9517822265625e-05,
      "model_forward_time": 0.1145467758178711,
      "step": 16305
    },
    {
      "epoch": 9.9517822265625e-05,
      "step": 16305,
      "training_step_time": 0.46356749534606934
    },
    {
      "epoch": 9.952392578125e-05,
      "model_forward_time": 0.11600780487060547,
      "step": 16306
    },
    {
      "epoch": 9.952392578125e-05,
      "step": 16306,
      "training_step_time": 0.4893646240234375
    },
    {
      "epoch": 9.9530029296875e-05,
      "model_forward_time": 0.11461877822875977,
      "step": 16307
    },
    {
      "epoch": 9.9530029296875e-05,
      "step": 16307,
      "training_step_time": 0.38323235511779785
    },
    {
      "epoch": 9.95361328125e-05,
      "model_forward_time": 0.11653256416320801,
      "step": 16308
    },
    {
      "epoch": 9.95361328125e-05,
      "step": 16308,
      "training_step_time": 0.3908705711364746
    },
    {
      "epoch": 9.9542236328125e-05,
      "model_forward_time": 0.11550712585449219,
      "step": 16309
    },
    {
      "epoch": 9.9542236328125e-05,
      "step": 16309,
      "training_step_time": 0.3927731513977051
    },
    {
      "epoch": 9.954833984375e-05,
      "grad_norm": 0.1467830091714859,
      "learning_rate": 8.713879584341391e-05,
      "loss": 0.0564,
      "step": 16310
    },
    {
      "epoch": 9.954833984375e-05,
      "model_forward_time": 0.11494994163513184,
      "step": 16310
    },
    {
      "epoch": 9.954833984375e-05,
      "step": 16310,
      "training_step_time": 0.3964874744415283
    },
    {
      "epoch": 9.9554443359375e-05,
      "model_forward_time": 0.11581707000732422,
      "step": 16311
    },
    {
      "epoch": 9.9554443359375e-05,
      "step": 16311,
      "training_step_time": 0.3913309574127197
    },
    {
      "epoch": 9.9560546875e-05,
      "model_forward_time": 0.11586403846740723,
      "step": 16312
    },
    {
      "epoch": 9.9560546875e-05,
      "step": 16312,
      "training_step_time": 0.48503851890563965
    },
    {
      "epoch": 9.9566650390625e-05,
      "model_forward_time": 0.11532402038574219,
      "step": 16313
    },
    {
      "epoch": 9.9566650390625e-05,
      "step": 16313,
      "training_step_time": 0.4583771228790283
    },
    {
      "epoch": 9.957275390625e-05,
      "model_forward_time": 0.11557841300964355,
      "step": 16314
    },
    {
      "epoch": 9.957275390625e-05,
      "step": 16314,
      "training_step_time": 0.3912529945373535
    },
    {
      "epoch": 9.9578857421875e-05,
      "model_forward_time": 0.11657118797302246,
      "step": 16315
    },
    {
      "epoch": 9.9578857421875e-05,
      "step": 16315,
      "training_step_time": 0.40734410285949707
    },
    {
      "epoch": 9.95849609375e-05,
      "model_forward_time": 0.11541318893432617,
      "step": 16316
    },
    {
      "epoch": 9.95849609375e-05,
      "step": 16316,
      "training_step_time": 0.38470005989074707
    },
    {
      "epoch": 9.9591064453125e-05,
      "model_forward_time": 0.11519026756286621,
      "step": 16317
    },
    {
      "epoch": 9.9591064453125e-05,
      "step": 16317,
      "training_step_time": 0.3945596218109131
    },
    {
      "epoch": 9.959716796875e-05,
      "model_forward_time": 0.11496233940124512,
      "step": 16318
    },
    {
      "epoch": 9.959716796875e-05,
      "step": 16318,
      "training_step_time": 0.36942172050476074
    },
    {
      "epoch": 9.9603271484375e-05,
      "model_forward_time": 0.11549782752990723,
      "step": 16319
    },
    {
      "epoch": 9.9603271484375e-05,
      "step": 16319,
      "training_step_time": 0.4424855709075928
    },
    {
      "epoch": 9.9609375e-05,
      "grad_norm": 0.13729679584503174,
      "learning_rate": 8.71203391311725e-05,
      "loss": 0.0529,
      "step": 16320
    },
    {
      "epoch": 9.9609375e-05,
      "model_forward_time": 0.11507129669189453,
      "step": 16320
    },
    {
      "epoch": 9.9609375e-05,
      "step": 16320,
      "training_step_time": 0.4847743511199951
    },
    {
      "epoch": 9.9615478515625e-05,
      "model_forward_time": 0.11532449722290039,
      "step": 16321
    },
    {
      "epoch": 9.9615478515625e-05,
      "step": 16321,
      "training_step_time": 0.39693737030029297
    },
    {
      "epoch": 9.962158203125e-05,
      "model_forward_time": 0.11574006080627441,
      "step": 16322
    },
    {
      "epoch": 9.962158203125e-05,
      "step": 16322,
      "training_step_time": 0.3845500946044922
    },
    {
      "epoch": 9.9627685546875e-05,
      "model_forward_time": 0.11542153358459473,
      "step": 16323
    },
    {
      "epoch": 9.9627685546875e-05,
      "step": 16323,
      "training_step_time": 0.4174785614013672
    },
    {
      "epoch": 9.96337890625e-05,
      "model_forward_time": 0.11487102508544922,
      "step": 16324
    },
    {
      "epoch": 9.96337890625e-05,
      "step": 16324,
      "training_step_time": 0.6800687313079834
    },
    {
      "epoch": 9.9639892578125e-05,
      "model_forward_time": 0.11449933052062988,
      "step": 16325
    },
    {
      "epoch": 9.9639892578125e-05,
      "step": 16325,
      "training_step_time": 0.4227480888366699
    },
    {
      "epoch": 9.964599609375e-05,
      "model_forward_time": 0.11504721641540527,
      "step": 16326
    },
    {
      "epoch": 9.964599609375e-05,
      "step": 16326,
      "training_step_time": 0.41926097869873047
    },
    {
      "epoch": 9.9652099609375e-05,
      "model_forward_time": 0.11477255821228027,
      "step": 16327
    },
    {
      "epoch": 9.9652099609375e-05,
      "step": 16327,
      "training_step_time": 0.3964264392852783
    },
    {
      "epoch": 9.9658203125e-05,
      "model_forward_time": 0.11493563652038574,
      "step": 16328
    },
    {
      "epoch": 9.9658203125e-05,
      "step": 16328,
      "training_step_time": 0.42778563499450684
    },
    {
      "epoch": 9.9664306640625e-05,
      "model_forward_time": 0.11406469345092773,
      "step": 16329
    },
    {
      "epoch": 9.9664306640625e-05,
      "step": 16329,
      "training_step_time": 0.4779658317565918
    },
    {
      "epoch": 9.967041015625e-05,
      "grad_norm": 0.1651085466146469,
      "learning_rate": 8.710187114275219e-05,
      "loss": 0.0567,
      "step": 16330
    },
    {
      "epoch": 9.967041015625e-05,
      "model_forward_time": 0.1155385971069336,
      "step": 16330
    },
    {
      "epoch": 9.967041015625e-05,
      "step": 16330,
      "training_step_time": 0.46667957305908203
    },
    {
      "epoch": 9.9676513671875e-05,
      "model_forward_time": 0.11468887329101562,
      "step": 16331
    },
    {
      "epoch": 9.9676513671875e-05,
      "step": 16331,
      "training_step_time": 0.38695621490478516
    },
    {
      "epoch": 9.96826171875e-05,
      "model_forward_time": 0.11427640914916992,
      "step": 16332
    },
    {
      "epoch": 9.96826171875e-05,
      "step": 16332,
      "training_step_time": 0.3677704334259033
    },
    {
      "epoch": 9.9688720703125e-05,
      "model_forward_time": 0.11443471908569336,
      "step": 16333
    },
    {
      "epoch": 9.9688720703125e-05,
      "step": 16333,
      "training_step_time": 0.45122528076171875
    },
    {
      "epoch": 9.969482421875e-05,
      "model_forward_time": 0.11441588401794434,
      "step": 16334
    },
    {
      "epoch": 9.969482421875e-05,
      "step": 16334,
      "training_step_time": 0.4693105220794678
    },
    {
      "epoch": 9.9700927734375e-05,
      "model_forward_time": 0.11542868614196777,
      "step": 16335
    },
    {
      "epoch": 9.9700927734375e-05,
      "step": 16335,
      "training_step_time": 0.38843417167663574
    },
    {
      "epoch": 9.970703125e-05,
      "model_forward_time": 0.11504149436950684,
      "step": 16336
    },
    {
      "epoch": 9.970703125e-05,
      "step": 16336,
      "training_step_time": 0.41766953468322754
    },
    {
      "epoch": 9.9713134765625e-05,
      "model_forward_time": 0.11545896530151367,
      "step": 16337
    },
    {
      "epoch": 9.9713134765625e-05,
      "step": 16337,
      "training_step_time": 0.45241355895996094
    },
    {
      "epoch": 9.971923828125e-05,
      "model_forward_time": 0.11488199234008789,
      "step": 16338
    },
    {
      "epoch": 9.971923828125e-05,
      "step": 16338,
      "training_step_time": 0.485642671585083
    },
    {
      "epoch": 9.9725341796875e-05,
      "model_forward_time": 0.11510753631591797,
      "step": 16339
    },
    {
      "epoch": 9.9725341796875e-05,
      "step": 16339,
      "training_step_time": 0.3909339904785156
    },
    {
      "epoch": 9.97314453125e-05,
      "grad_norm": 0.17095230519771576,
      "learning_rate": 8.708339188376302e-05,
      "loss": 0.0543,
      "step": 16340
    },
    {
      "epoch": 9.97314453125e-05,
      "model_forward_time": 0.11544370651245117,
      "step": 16340
    },
    {
      "epoch": 9.97314453125e-05,
      "step": 16340,
      "training_step_time": 0.4073305130004883
    },
    {
      "epoch": 9.9737548828125e-05,
      "model_forward_time": 0.1154787540435791,
      "step": 16341
    },
    {
      "epoch": 9.9737548828125e-05,
      "step": 16341,
      "training_step_time": 0.43640613555908203
    },
    {
      "epoch": 9.974365234375e-05,
      "model_forward_time": 0.1151120662689209,
      "step": 16342
    },
    {
      "epoch": 9.974365234375e-05,
      "step": 16342,
      "training_step_time": 0.4009518623352051
    },
    {
      "epoch": 9.9749755859375e-05,
      "model_forward_time": 0.11456537246704102,
      "step": 16343
    },
    {
      "epoch": 9.9749755859375e-05,
      "step": 16343,
      "training_step_time": 0.45882511138916016
    },
    {
      "epoch": 9.9755859375e-05,
      "model_forward_time": 0.11428046226501465,
      "step": 16344
    },
    {
      "epoch": 9.9755859375e-05,
      "step": 16344,
      "training_step_time": 0.3958094120025635
    },
    {
      "epoch": 9.9761962890625e-05,
      "model_forward_time": 0.11547565460205078,
      "step": 16345
    },
    {
      "epoch": 9.9761962890625e-05,
      "step": 16345,
      "training_step_time": 0.39527440071105957
    },
    {
      "epoch": 9.976806640625e-05,
      "model_forward_time": 0.11460328102111816,
      "step": 16346
    },
    {
      "epoch": 9.976806640625e-05,
      "step": 16346,
      "training_step_time": 0.3852875232696533
    },
    {
      "epoch": 9.9774169921875e-05,
      "model_forward_time": 0.1149439811706543,
      "step": 16347
    },
    {
      "epoch": 9.9774169921875e-05,
      "step": 16347,
      "training_step_time": 0.423079252243042
    },
    {
      "epoch": 9.97802734375e-05,
      "model_forward_time": 0.11470150947570801,
      "step": 16348
    },
    {
      "epoch": 9.97802734375e-05,
      "step": 16348,
      "training_step_time": 0.4623749256134033
    },
    {
      "epoch": 9.9786376953125e-05,
      "model_forward_time": 0.11549973487854004,
      "step": 16349
    },
    {
      "epoch": 9.9786376953125e-05,
      "step": 16349,
      "training_step_time": 0.4255094528198242
    },
    {
      "epoch": 9.979248046875e-05,
      "grad_norm": 0.14101199805736542,
      "learning_rate": 8.706490135981855e-05,
      "loss": 0.0513,
      "step": 16350
    },
    {
      "epoch": 9.979248046875e-05,
      "model_forward_time": 0.11520600318908691,
      "step": 16350
    },
    {
      "epoch": 9.979248046875e-05,
      "step": 16350,
      "training_step_time": 0.395723819732666
    },
    {
      "epoch": 9.9798583984375e-05,
      "model_forward_time": 0.1147775650024414,
      "step": 16351
    },
    {
      "epoch": 9.9798583984375e-05,
      "step": 16351,
      "training_step_time": 0.45182299613952637
    },
    {
      "epoch": 9.98046875e-05,
      "model_forward_time": 0.11526870727539062,
      "step": 16352
    },
    {
      "epoch": 9.98046875e-05,
      "step": 16352,
      "training_step_time": 0.4003720283508301
    },
    {
      "epoch": 9.9810791015625e-05,
      "model_forward_time": 0.11540651321411133,
      "step": 16353
    },
    {
      "epoch": 9.9810791015625e-05,
      "step": 16353,
      "training_step_time": 0.3983123302459717
    },
    {
      "epoch": 9.981689453125e-05,
      "model_forward_time": 0.11470603942871094,
      "step": 16354
    },
    {
      "epoch": 9.981689453125e-05,
      "step": 16354,
      "training_step_time": 0.4320833683013916
    },
    {
      "epoch": 9.9822998046875e-05,
      "model_forward_time": 0.11475992202758789,
      "step": 16355
    },
    {
      "epoch": 9.9822998046875e-05,
      "step": 16355,
      "training_step_time": 0.4737858772277832
    },
    {
      "epoch": 9.98291015625e-05,
      "model_forward_time": 0.11549234390258789,
      "step": 16356
    },
    {
      "epoch": 9.98291015625e-05,
      "step": 16356,
      "training_step_time": 0.46999120712280273
    },
    {
      "epoch": 9.9835205078125e-05,
      "model_forward_time": 0.11501240730285645,
      "step": 16357
    },
    {
      "epoch": 9.9835205078125e-05,
      "step": 16357,
      "training_step_time": 0.435335636138916
    },
    {
      "epoch": 9.984130859375e-05,
      "model_forward_time": 0.1154470443725586,
      "step": 16358
    },
    {
      "epoch": 9.984130859375e-05,
      "step": 16358,
      "training_step_time": 0.3907291889190674
    },
    {
      "epoch": 9.9847412109375e-05,
      "model_forward_time": 0.11478400230407715,
      "step": 16359
    },
    {
      "epoch": 9.9847412109375e-05,
      "step": 16359,
      "training_step_time": 0.39336442947387695
    },
    {
      "epoch": 9.9853515625e-05,
      "grad_norm": 0.15297618508338928,
      "learning_rate": 8.704639957653567e-05,
      "loss": 0.0559,
      "step": 16360
    },
    {
      "epoch": 9.9853515625e-05,
      "model_forward_time": 0.1143045425415039,
      "step": 16360
    },
    {
      "epoch": 9.9853515625e-05,
      "step": 16360,
      "training_step_time": 0.3918612003326416
    },
    {
      "epoch": 9.9859619140625e-05,
      "model_forward_time": 0.11542820930480957,
      "step": 16361
    },
    {
      "epoch": 9.9859619140625e-05,
      "step": 16361,
      "training_step_time": 0.3856379985809326
    },
    {
      "epoch": 9.986572265625e-05,
      "model_forward_time": 0.11508035659790039,
      "step": 16362
    },
    {
      "epoch": 9.986572265625e-05,
      "step": 16362,
      "training_step_time": 0.39082932472229004
    },
    {
      "epoch": 9.9871826171875e-05,
      "model_forward_time": 0.11520528793334961,
      "step": 16363
    },
    {
      "epoch": 9.9871826171875e-05,
      "step": 16363,
      "training_step_time": 0.4468417167663574
    },
    {
      "epoch": 9.98779296875e-05,
      "model_forward_time": 0.11564421653747559,
      "step": 16364
    },
    {
      "epoch": 9.98779296875e-05,
      "step": 16364,
      "training_step_time": 0.5012881755828857
    },
    {
      "epoch": 9.9884033203125e-05,
      "model_forward_time": 0.11502337455749512,
      "step": 16365
    },
    {
      "epoch": 9.9884033203125e-05,
      "step": 16365,
      "training_step_time": 0.4484841823577881
    },
    {
      "epoch": 9.989013671875e-05,
      "model_forward_time": 0.11463570594787598,
      "step": 16366
    },
    {
      "epoch": 9.989013671875e-05,
      "step": 16366,
      "training_step_time": 0.4319901466369629
    },
    {
      "epoch": 9.9896240234375e-05,
      "model_forward_time": 0.11569547653198242,
      "step": 16367
    },
    {
      "epoch": 9.9896240234375e-05,
      "step": 16367,
      "training_step_time": 0.4074227809906006
    },
    {
      "epoch": 9.990234375e-05,
      "model_forward_time": 0.11482071876525879,
      "step": 16368
    },
    {
      "epoch": 9.990234375e-05,
      "step": 16368,
      "training_step_time": 0.42096972465515137
    },
    {
      "epoch": 9.9908447265625e-05,
      "model_forward_time": 0.11471056938171387,
      "step": 16369
    },
    {
      "epoch": 9.9908447265625e-05,
      "step": 16369,
      "training_step_time": 0.47382569313049316
    },
    {
      "epoch": 9.991455078125e-05,
      "grad_norm": 0.17832519114017487,
      "learning_rate": 8.702788653953477e-05,
      "loss": 0.0559,
      "step": 16370
    },
    {
      "epoch": 9.991455078125e-05,
      "model_forward_time": 0.1148369312286377,
      "step": 16370
    },
    {
      "epoch": 9.991455078125e-05,
      "step": 16370,
      "training_step_time": 0.41364502906799316
    },
    {
      "epoch": 9.9920654296875e-05,
      "model_forward_time": 0.11491560935974121,
      "step": 16371
    },
    {
      "epoch": 9.9920654296875e-05,
      "step": 16371,
      "training_step_time": 0.39110803604125977
    },
    {
      "epoch": 9.99267578125e-05,
      "model_forward_time": 0.11525082588195801,
      "step": 16372
    },
    {
      "epoch": 9.99267578125e-05,
      "step": 16372,
      "training_step_time": 0.3967432975769043
    },
    {
      "epoch": 9.9932861328125e-05,
      "model_forward_time": 0.11503171920776367,
      "step": 16373
    },
    {
      "epoch": 9.9932861328125e-05,
      "step": 16373,
      "training_step_time": 0.402940034866333
    },
    {
      "epoch": 9.993896484375e-05,
      "model_forward_time": 0.11563563346862793,
      "step": 16374
    },
    {
      "epoch": 9.993896484375e-05,
      "step": 16374,
      "training_step_time": 0.3908040523529053
    },
    {
      "epoch": 9.9945068359375e-05,
      "model_forward_time": 0.1148371696472168,
      "step": 16375
    },
    {
      "epoch": 9.9945068359375e-05,
      "step": 16375,
      "training_step_time": 0.38961076736450195
    },
    {
      "epoch": 9.9951171875e-05,
      "model_forward_time": 0.11509180068969727,
      "step": 16376
    },
    {
      "epoch": 9.9951171875e-05,
      "step": 16376,
      "training_step_time": 0.3705158233642578
    },
    {
      "epoch": 9.9957275390625e-05,
      "model_forward_time": 0.1155250072479248,
      "step": 16377
    },
    {
      "epoch": 9.9957275390625e-05,
      "step": 16377,
      "training_step_time": 0.4575927257537842
    },
    {
      "epoch": 9.996337890625e-05,
      "model_forward_time": 0.11467623710632324,
      "step": 16378
    },
    {
      "epoch": 9.996337890625e-05,
      "step": 16378,
      "training_step_time": 0.7588262557983398
    },
    {
      "epoch": 9.9969482421875e-05,
      "model_forward_time": 0.11452794075012207,
      "step": 16379
    },
    {
      "epoch": 9.9969482421875e-05,
      "step": 16379,
      "training_step_time": 0.4022226333618164
    },
    {
      "epoch": 9.99755859375e-05,
      "grad_norm": 0.1954023391008377,
      "learning_rate": 8.700936225443959e-05,
      "loss": 0.0589,
      "step": 16380
    },
    {
      "epoch": 9.99755859375e-05,
      "model_forward_time": 0.1143040657043457,
      "step": 16380
    },
    {
      "epoch": 9.99755859375e-05,
      "step": 16380,
      "training_step_time": 0.39379334449768066
    },
    {
      "epoch": 9.9981689453125e-05,
      "model_forward_time": 0.11455249786376953,
      "step": 16381
    },
    {
      "epoch": 9.9981689453125e-05,
      "step": 16381,
      "training_step_time": 0.5042479038238525
    },
    {
      "epoch": 9.998779296875e-05,
      "model_forward_time": 0.11395120620727539,
      "step": 16382
    },
    {
      "epoch": 9.998779296875e-05,
      "step": 16382,
      "training_step_time": 0.41336560249328613
    },
    {
      "epoch": 9.9993896484375e-05,
      "model_forward_time": 0.11390328407287598,
      "step": 16383
    },
    {
      "epoch": 9.9993896484375e-05,
      "step": 16383,
      "training_step_time": 0.4686775207519531
    },
    {
      "epoch": 0.0001,
      "model_forward_time": 0.11465096473693848,
      "step": 16384
    },
    {
      "epoch": 0.0001,
      "step": 16384,
      "training_step_time": 0.407977819442749
    },
    {
      "epoch": 0.000100006103515625,
      "model_forward_time": 0.1150364875793457,
      "step": 16385
    },
    {
      "epoch": 0.000100006103515625,
      "step": 16385,
      "training_step_time": 0.39789772033691406
    },
    {
      "epoch": 0.00010001220703125,
      "model_forward_time": 0.11508798599243164,
      "step": 16386
    },
    {
      "epoch": 0.00010001220703125,
      "step": 16386,
      "training_step_time": 0.4831366539001465
    },
    {
      "epoch": 0.000100018310546875,
      "model_forward_time": 0.11479401588439941,
      "step": 16387
    },
    {
      "epoch": 0.000100018310546875,
      "step": 16387,
      "training_step_time": 0.40247082710266113
    },
    {
      "epoch": 0.0001000244140625,
      "model_forward_time": 0.11483216285705566,
      "step": 16388
    },
    {
      "epoch": 0.0001000244140625,
      "step": 16388,
      "training_step_time": 0.3856058120727539
    },
    {
      "epoch": 0.000100030517578125,
      "model_forward_time": 0.11482453346252441,
      "step": 16389
    },
    {
      "epoch": 0.000100030517578125,
      "step": 16389,
      "training_step_time": 0.3908541202545166
    },
    {
      "epoch": 0.00010003662109375,
      "grad_norm": 0.13955844938755035,
      "learning_rate": 8.699082672687734e-05,
      "loss": 0.0525,
      "step": 16390
    },
    {
      "epoch": 0.00010003662109375,
      "model_forward_time": 0.1154932975769043,
      "step": 16390
    },
    {
      "epoch": 0.00010003662109375,
      "step": 16390,
      "training_step_time": 0.5709555149078369
    },
    {
      "epoch": 0.000100042724609375,
      "model_forward_time": 0.11460280418395996,
      "step": 16391
    },
    {
      "epoch": 0.000100042724609375,
      "step": 16391,
      "training_step_time": 0.44625353813171387
    },
    {
      "epoch": 0.000100048828125,
      "model_forward_time": 0.11552166938781738,
      "step": 16392
    },
    {
      "epoch": 0.000100048828125,
      "step": 16392,
      "training_step_time": 0.40617823600769043
    },
    {
      "epoch": 0.000100054931640625,
      "model_forward_time": 0.11438751220703125,
      "step": 16393
    },
    {
      "epoch": 0.000100054931640625,
      "step": 16393,
      "training_step_time": 0.3945801258087158
    },
    {
      "epoch": 0.00010006103515625,
      "model_forward_time": 0.11545896530151367,
      "step": 16394
    },
    {
      "epoch": 0.00010006103515625,
      "step": 16394,
      "training_step_time": 0.4016585350036621
    },
    {
      "epoch": 0.000100067138671875,
      "model_forward_time": 0.11471223831176758,
      "step": 16395
    },
    {
      "epoch": 0.000100067138671875,
      "step": 16395,
      "training_step_time": 0.39476752281188965
    },
    {
      "epoch": 0.0001000732421875,
      "model_forward_time": 0.11520242691040039,
      "step": 16396
    },
    {
      "epoch": 0.0001000732421875,
      "step": 16396,
      "training_step_time": 0.4812185764312744
    },
    {
      "epoch": 0.000100079345703125,
      "model_forward_time": 0.11503338813781738,
      "step": 16397
    },
    {
      "epoch": 0.000100079345703125,
      "step": 16397,
      "training_step_time": 0.6889488697052002
    },
    {
      "epoch": 0.00010008544921875,
      "model_forward_time": 0.11438846588134766,
      "step": 16398
    },
    {
      "epoch": 0.00010008544921875,
      "step": 16398,
      "training_step_time": 0.45311546325683594
    },
    {
      "epoch": 0.000100091552734375,
      "model_forward_time": 0.11469244956970215,
      "step": 16399
    },
    {
      "epoch": 0.000100091552734375,
      "step": 16399,
      "training_step_time": 0.3987579345703125
    },
    {
      "epoch": 0.00010009765625,
      "grad_norm": 0.20877128839492798,
      "learning_rate": 8.697227996247861e-05,
      "loss": 0.0549,
      "step": 16400
    },
    {
      "epoch": 0.00010009765625,
      "model_forward_time": 0.11407923698425293,
      "step": 16400
    },
    {
      "epoch": 0.00010009765625,
      "step": 16400,
      "training_step_time": 0.38972973823547363
    },
    {
      "epoch": 0.000100103759765625,
      "model_forward_time": 0.1152944564819336,
      "step": 16401
    },
    {
      "epoch": 0.000100103759765625,
      "step": 16401,
      "training_step_time": 0.38971424102783203
    },
    {
      "epoch": 0.00010010986328125,
      "model_forward_time": 0.11372828483581543,
      "step": 16402
    },
    {
      "epoch": 0.00010010986328125,
      "step": 16402,
      "training_step_time": 0.3855407238006592
    },
    {
      "epoch": 0.000100115966796875,
      "model_forward_time": 0.11484813690185547,
      "step": 16403
    },
    {
      "epoch": 0.000100115966796875,
      "step": 16403,
      "training_step_time": 0.7130100727081299
    },
    {
      "epoch": 0.0001001220703125,
      "model_forward_time": 0.11495637893676758,
      "step": 16404
    },
    {
      "epoch": 0.0001001220703125,
      "step": 16404,
      "training_step_time": 0.4236598014831543
    },
    {
      "epoch": 0.000100128173828125,
      "model_forward_time": 0.11467647552490234,
      "step": 16405
    },
    {
      "epoch": 0.000100128173828125,
      "step": 16405,
      "training_step_time": 0.48073601722717285
    },
    {
      "epoch": 0.00010013427734375,
      "model_forward_time": 0.1140751838684082,
      "step": 16406
    },
    {
      "epoch": 0.00010013427734375,
      "step": 16406,
      "training_step_time": 0.3858959674835205
    },
    {
      "epoch": 0.000100140380859375,
      "model_forward_time": 0.11464142799377441,
      "step": 16407
    },
    {
      "epoch": 0.000100140380859375,
      "step": 16407,
      "training_step_time": 0.3825347423553467
    },
    {
      "epoch": 0.000100146484375,
      "model_forward_time": 0.11452722549438477,
      "step": 16408
    },
    {
      "epoch": 0.000100146484375,
      "step": 16408,
      "training_step_time": 0.38742947578430176
    },
    {
      "epoch": 0.000100152587890625,
      "model_forward_time": 0.11603116989135742,
      "step": 16409
    },
    {
      "epoch": 0.000100152587890625,
      "step": 16409,
      "training_step_time": 0.397937536239624
    },
    {
      "epoch": 0.00010015869140625,
      "grad_norm": 0.15960586071014404,
      "learning_rate": 8.695372196687743e-05,
      "loss": 0.0541,
      "step": 16410
    },
    {
      "epoch": 0.00010015869140625,
      "model_forward_time": 0.11515402793884277,
      "step": 16410
    },
    {
      "epoch": 0.00010015869140625,
      "step": 16410,
      "training_step_time": 0.47611284255981445
    },
    {
      "epoch": 0.000100164794921875,
      "model_forward_time": 0.11442732810974121,
      "step": 16411
    },
    {
      "epoch": 0.000100164794921875,
      "step": 16411,
      "training_step_time": 0.4176924228668213
    },
    {
      "epoch": 0.0001001708984375,
      "model_forward_time": 0.11482763290405273,
      "step": 16412
    },
    {
      "epoch": 0.0001001708984375,
      "step": 16412,
      "training_step_time": 0.4697422981262207
    },
    {
      "epoch": 0.000100177001953125,
      "model_forward_time": 0.11426472663879395,
      "step": 16413
    },
    {
      "epoch": 0.000100177001953125,
      "step": 16413,
      "training_step_time": 0.44153523445129395
    },
    {
      "epoch": 0.00010018310546875,
      "model_forward_time": 0.11499428749084473,
      "step": 16414
    },
    {
      "epoch": 0.00010018310546875,
      "step": 16414,
      "training_step_time": 0.3961455821990967
    },
    {
      "epoch": 0.000100189208984375,
      "model_forward_time": 0.1142885684967041,
      "step": 16415
    },
    {
      "epoch": 0.000100189208984375,
      "step": 16415,
      "training_step_time": 0.3913912773132324
    },
    {
      "epoch": 0.0001001953125,
      "model_forward_time": 0.11521697044372559,
      "step": 16416
    },
    {
      "epoch": 0.0001001953125,
      "step": 16416,
      "training_step_time": 0.4079165458679199
    },
    {
      "epoch": 0.000100201416015625,
      "model_forward_time": 0.11510825157165527,
      "step": 16417
    },
    {
      "epoch": 0.000100201416015625,
      "step": 16417,
      "training_step_time": 0.41991591453552246
    },
    {
      "epoch": 0.00010020751953125,
      "model_forward_time": 0.11500954627990723,
      "step": 16418
    },
    {
      "epoch": 0.00010020751953125,
      "step": 16418,
      "training_step_time": 0.40423083305358887
    },
    {
      "epoch": 0.000100213623046875,
      "model_forward_time": 0.11519670486450195,
      "step": 16419
    },
    {
      "epoch": 0.000100213623046875,
      "step": 16419,
      "training_step_time": 0.4436929225921631
    },
    {
      "epoch": 0.0001002197265625,
      "grad_norm": 0.14867199957370758,
      "learning_rate": 8.693515274571123e-05,
      "loss": 0.0498,
      "step": 16420
    },
    {
      "epoch": 0.0001002197265625,
      "model_forward_time": 0.11563968658447266,
      "step": 16420
    },
    {
      "epoch": 0.0001002197265625,
      "step": 16420,
      "training_step_time": 0.4859800338745117
    },
    {
      "epoch": 0.000100225830078125,
      "model_forward_time": 0.11481738090515137,
      "step": 16421
    },
    {
      "epoch": 0.000100225830078125,
      "step": 16421,
      "training_step_time": 0.4123380184173584
    },
    {
      "epoch": 0.00010023193359375,
      "model_forward_time": 0.11498165130615234,
      "step": 16422
    },
    {
      "epoch": 0.00010023193359375,
      "step": 16422,
      "training_step_time": 0.39009571075439453
    },
    {
      "epoch": 0.000100238037109375,
      "model_forward_time": 0.11495542526245117,
      "step": 16423
    },
    {
      "epoch": 0.000100238037109375,
      "step": 16423,
      "training_step_time": 0.38947176933288574
    },
    {
      "epoch": 0.000100244140625,
      "model_forward_time": 0.11512112617492676,
      "step": 16424
    },
    {
      "epoch": 0.000100244140625,
      "step": 16424,
      "training_step_time": 0.38666629791259766
    },
    {
      "epoch": 0.000100250244140625,
      "model_forward_time": 0.11532449722290039,
      "step": 16425
    },
    {
      "epoch": 0.000100250244140625,
      "step": 16425,
      "training_step_time": 0.5103859901428223
    },
    {
      "epoch": 0.00010025634765625,
      "model_forward_time": 0.11421942710876465,
      "step": 16426
    },
    {
      "epoch": 0.00010025634765625,
      "step": 16426,
      "training_step_time": 0.445178747177124
    },
    {
      "epoch": 0.000100262451171875,
      "model_forward_time": 0.11487317085266113,
      "step": 16427
    },
    {
      "epoch": 0.000100262451171875,
      "step": 16427,
      "training_step_time": 0.3988487720489502
    },
    {
      "epoch": 0.0001002685546875,
      "model_forward_time": 0.11484074592590332,
      "step": 16428
    },
    {
      "epoch": 0.0001002685546875,
      "step": 16428,
      "training_step_time": 0.4220762252807617
    },
    {
      "epoch": 0.000100274658203125,
      "model_forward_time": 0.11490941047668457,
      "step": 16429
    },
    {
      "epoch": 0.000100274658203125,
      "step": 16429,
      "training_step_time": 0.3986091613769531
    },
    {
      "epoch": 0.00010028076171875,
      "grad_norm": 0.11173024028539658,
      "learning_rate": 8.691657230462083e-05,
      "loss": 0.0492,
      "step": 16430
    },
    {
      "epoch": 0.00010028076171875,
      "model_forward_time": 0.11547541618347168,
      "step": 16430
    },
    {
      "epoch": 0.00010028076171875,
      "step": 16430,
      "training_step_time": 0.4040985107421875
    },
    {
      "epoch": 0.000100286865234375,
      "model_forward_time": 0.11485171318054199,
      "step": 16431
    },
    {
      "epoch": 0.000100286865234375,
      "step": 16431,
      "training_step_time": 0.42584848403930664
    },
    {
      "epoch": 0.00010029296875,
      "model_forward_time": 0.11482834815979004,
      "step": 16432
    },
    {
      "epoch": 0.00010029296875,
      "step": 16432,
      "training_step_time": 0.3665659427642822
    },
    {
      "epoch": 0.000100299072265625,
      "model_forward_time": 0.11478400230407715,
      "step": 16433
    },
    {
      "epoch": 0.000100299072265625,
      "step": 16433,
      "training_step_time": 0.441180944442749
    },
    {
      "epoch": 0.00010030517578125,
      "model_forward_time": 0.11522245407104492,
      "step": 16434
    },
    {
      "epoch": 0.00010030517578125,
      "step": 16434,
      "training_step_time": 0.48668527603149414
    },
    {
      "epoch": 0.000100311279296875,
      "model_forward_time": 0.11535811424255371,
      "step": 16435
    },
    {
      "epoch": 0.000100311279296875,
      "step": 16435,
      "training_step_time": 0.3915848731994629
    },
    {
      "epoch": 0.0001003173828125,
      "model_forward_time": 0.11490845680236816,
      "step": 16436
    },
    {
      "epoch": 0.0001003173828125,
      "step": 16436,
      "training_step_time": 0.3950002193450928
    },
    {
      "epoch": 0.000100323486328125,
      "model_forward_time": 0.11491799354553223,
      "step": 16437
    },
    {
      "epoch": 0.000100323486328125,
      "step": 16437,
      "training_step_time": 0.4292917251586914
    },
    {
      "epoch": 0.00010032958984375,
      "model_forward_time": 0.11446952819824219,
      "step": 16438
    },
    {
      "epoch": 0.00010032958984375,
      "step": 16438,
      "training_step_time": 0.39552783966064453
    },
    {
      "epoch": 0.000100335693359375,
      "model_forward_time": 0.11579656600952148,
      "step": 16439
    },
    {
      "epoch": 0.000100335693359375,
      "step": 16439,
      "training_step_time": 0.3984215259552002
    },
    {
      "epoch": 0.000100341796875,
      "grad_norm": 0.1573537439107895,
      "learning_rate": 8.689798064925049e-05,
      "loss": 0.0559,
      "step": 16440
    },
    {
      "epoch": 0.000100341796875,
      "model_forward_time": 0.11478948593139648,
      "step": 16440
    },
    {
      "epoch": 0.000100341796875,
      "step": 16440,
      "training_step_time": 0.4184257984161377
    },
    {
      "epoch": 0.000100347900390625,
      "model_forward_time": 0.11517834663391113,
      "step": 16441
    },
    {
      "epoch": 0.000100347900390625,
      "step": 16441,
      "training_step_time": 0.48409533500671387
    },
    {
      "epoch": 0.00010035400390625,
      "model_forward_time": 0.11546635627746582,
      "step": 16442
    },
    {
      "epoch": 0.00010035400390625,
      "step": 16442,
      "training_step_time": 0.38484644889831543
    },
    {
      "epoch": 0.000100360107421875,
      "model_forward_time": 0.11560392379760742,
      "step": 16443
    },
    {
      "epoch": 0.000100360107421875,
      "step": 16443,
      "training_step_time": 0.3932664394378662
    },
    {
      "epoch": 0.0001003662109375,
      "model_forward_time": 0.11456799507141113,
      "step": 16444
    },
    {
      "epoch": 0.0001003662109375,
      "step": 16444,
      "training_step_time": 0.41516685485839844
    },
    {
      "epoch": 0.000100372314453125,
      "model_forward_time": 0.11475205421447754,
      "step": 16445
    },
    {
      "epoch": 0.000100372314453125,
      "step": 16445,
      "training_step_time": 0.40001583099365234
    },
    {
      "epoch": 0.00010037841796875,
      "model_forward_time": 0.11488103866577148,
      "step": 16446
    },
    {
      "epoch": 0.00010037841796875,
      "step": 16446,
      "training_step_time": 0.7264792919158936
    },
    {
      "epoch": 0.000100384521484375,
      "model_forward_time": 0.11539912223815918,
      "step": 16447
    },
    {
      "epoch": 0.000100384521484375,
      "step": 16447,
      "training_step_time": 0.42078137397766113
    },
    {
      "epoch": 0.000100390625,
      "model_forward_time": 0.11423921585083008,
      "step": 16448
    },
    {
      "epoch": 0.000100390625,
      "step": 16448,
      "training_step_time": 0.46956872940063477
    },
    {
      "epoch": 0.000100396728515625,
      "model_forward_time": 0.11406707763671875,
      "step": 16449
    },
    {
      "epoch": 0.000100396728515625,
      "step": 16449,
      "training_step_time": 0.38667988777160645
    },
    {
      "epoch": 0.00010040283203125,
      "grad_norm": 0.13138064742088318,
      "learning_rate": 8.687937778524786e-05,
      "loss": 0.0505,
      "step": 16450
    },
    {
      "epoch": 0.00010040283203125,
      "model_forward_time": 0.11449146270751953,
      "step": 16450
    },
    {
      "epoch": 0.00010040283203125,
      "step": 16450,
      "training_step_time": 0.38615846633911133
    },
    {
      "epoch": 0.000100408935546875,
      "model_forward_time": 0.1146388053894043,
      "step": 16451
    },
    {
      "epoch": 0.000100408935546875,
      "step": 16451,
      "training_step_time": 0.3894643783569336
    },
    {
      "epoch": 0.0001004150390625,
      "model_forward_time": 0.11542057991027832,
      "step": 16452
    },
    {
      "epoch": 0.0001004150390625,
      "step": 16452,
      "training_step_time": 0.9004654884338379
    },
    {
      "epoch": 0.000100421142578125,
      "model_forward_time": 0.11430239677429199,
      "step": 16453
    },
    {
      "epoch": 0.000100421142578125,
      "step": 16453,
      "training_step_time": 2.870959758758545
    },
    {
      "epoch": 0.00010042724609375,
      "model_forward_time": 0.11200714111328125,
      "step": 16454
    },
    {
      "epoch": 0.00010042724609375,
      "step": 16454,
      "training_step_time": 0.36510705947875977
    },
    {
      "epoch": 0.000100433349609375,
      "model_forward_time": 0.11248421669006348,
      "step": 16455
    },
    {
      "epoch": 0.000100433349609375,
      "step": 16455,
      "training_step_time": 0.4264099597930908
    },
    {
      "epoch": 0.000100439453125,
      "model_forward_time": 0.11347436904907227,
      "step": 16456
    },
    {
      "epoch": 0.000100439453125,
      "step": 16456,
      "training_step_time": 0.4044790267944336
    },
    {
      "epoch": 0.000100445556640625,
      "model_forward_time": 0.11362457275390625,
      "step": 16457
    },
    {
      "epoch": 0.000100445556640625,
      "step": 16457,
      "training_step_time": 0.3848106861114502
    },
    {
      "epoch": 0.00010045166015625,
      "model_forward_time": 0.1143651008605957,
      "step": 16458
    },
    {
      "epoch": 0.00010045166015625,
      "step": 16458,
      "training_step_time": 0.43455004692077637
    },
    {
      "epoch": 0.000100457763671875,
      "model_forward_time": 0.1143195629119873,
      "step": 16459
    },
    {
      "epoch": 0.000100457763671875,
      "step": 16459,
      "training_step_time": 0.38471555709838867
    },
    {
      "epoch": 0.0001004638671875,
      "grad_norm": 0.2577396631240845,
      "learning_rate": 8.686076371826401e-05,
      "loss": 0.0559,
      "step": 16460
    },
    {
      "epoch": 0.0001004638671875,
      "model_forward_time": 0.11504220962524414,
      "step": 16460
    },
    {
      "epoch": 0.0001004638671875,
      "step": 16460,
      "training_step_time": 0.3843839168548584
    },
    {
      "epoch": 0.000100469970703125,
      "model_forward_time": 0.11573433876037598,
      "step": 16461
    },
    {
      "epoch": 0.000100469970703125,
      "step": 16461,
      "training_step_time": 0.41376662254333496
    },
    {
      "epoch": 0.00010047607421875,
      "model_forward_time": 0.11543083190917969,
      "step": 16462
    },
    {
      "epoch": 0.00010047607421875,
      "step": 16462,
      "training_step_time": 0.42948198318481445
    },
    {
      "epoch": 0.000100482177734375,
      "model_forward_time": 0.11493325233459473,
      "step": 16463
    },
    {
      "epoch": 0.000100482177734375,
      "step": 16463,
      "training_step_time": 0.41364383697509766
    },
    {
      "epoch": 0.00010048828125,
      "model_forward_time": 0.1147310733795166,
      "step": 16464
    },
    {
      "epoch": 0.00010048828125,
      "step": 16464,
      "training_step_time": 0.4259617328643799
    },
    {
      "epoch": 0.000100494384765625,
      "model_forward_time": 0.11521387100219727,
      "step": 16465
    },
    {
      "epoch": 0.000100494384765625,
      "step": 16465,
      "training_step_time": 0.4101715087890625
    },
    {
      "epoch": 0.00010050048828125,
      "model_forward_time": 0.11467957496643066,
      "step": 16466
    },
    {
      "epoch": 0.00010050048828125,
      "step": 16466,
      "training_step_time": 0.3918585777282715
    },
    {
      "epoch": 0.000100506591796875,
      "model_forward_time": 0.11509394645690918,
      "step": 16467
    },
    {
      "epoch": 0.000100506591796875,
      "step": 16467,
      "training_step_time": 0.38980627059936523
    },
    {
      "epoch": 0.0001005126953125,
      "model_forward_time": 0.11555218696594238,
      "step": 16468
    },
    {
      "epoch": 0.0001005126953125,
      "step": 16468,
      "training_step_time": 0.3976740837097168
    },
    {
      "epoch": 0.000100518798828125,
      "model_forward_time": 0.1153569221496582,
      "step": 16469
    },
    {
      "epoch": 0.000100518798828125,
      "step": 16469,
      "training_step_time": 0.3957500457763672
    },
    {
      "epoch": 0.00010052490234375,
      "grad_norm": 0.13795146346092224,
      "learning_rate": 8.684213845395339e-05,
      "loss": 0.0521,
      "step": 16470
    },
    {
      "epoch": 0.00010052490234375,
      "model_forward_time": 0.11557745933532715,
      "step": 16470
    },
    {
      "epoch": 0.00010052490234375,
      "step": 16470,
      "training_step_time": 0.4792964458465576
    },
    {
      "epoch": 0.000100531005859375,
      "model_forward_time": 0.11443591117858887,
      "step": 16471
    },
    {
      "epoch": 0.000100531005859375,
      "step": 16471,
      "training_step_time": 0.46538734436035156
    },
    {
      "epoch": 0.000100537109375,
      "model_forward_time": 0.11462593078613281,
      "step": 16472
    },
    {
      "epoch": 0.000100537109375,
      "step": 16472,
      "training_step_time": 0.4199981689453125
    },
    {
      "epoch": 0.000100543212890625,
      "model_forward_time": 0.11527824401855469,
      "step": 16473
    },
    {
      "epoch": 0.000100543212890625,
      "step": 16473,
      "training_step_time": 0.39474964141845703
    },
    {
      "epoch": 0.00010054931640625,
      "model_forward_time": 0.11546826362609863,
      "step": 16474
    },
    {
      "epoch": 0.00010054931640625,
      "step": 16474,
      "training_step_time": 0.39702939987182617
    },
    {
      "epoch": 0.000100555419921875,
      "model_forward_time": 0.11501717567443848,
      "step": 16475
    },
    {
      "epoch": 0.000100555419921875,
      "step": 16475,
      "training_step_time": 0.3991544246673584
    },
    {
      "epoch": 0.0001005615234375,
      "model_forward_time": 0.11534667015075684,
      "step": 16476
    },
    {
      "epoch": 0.0001005615234375,
      "step": 16476,
      "training_step_time": 0.4123353958129883
    },
    {
      "epoch": 0.000100567626953125,
      "model_forward_time": 0.11531758308410645,
      "step": 16477
    },
    {
      "epoch": 0.000100567626953125,
      "step": 16477,
      "training_step_time": 0.42577600479125977
    },
    {
      "epoch": 0.00010057373046875,
      "model_forward_time": 0.1152944564819336,
      "step": 16478
    },
    {
      "epoch": 0.00010057373046875,
      "step": 16478,
      "training_step_time": 0.48851871490478516
    },
    {
      "epoch": 0.000100579833984375,
      "model_forward_time": 0.11443948745727539,
      "step": 16479
    },
    {
      "epoch": 0.000100579833984375,
      "step": 16479,
      "training_step_time": 0.44437694549560547
    },
    {
      "epoch": 0.0001005859375,
      "grad_norm": 0.10034002363681793,
      "learning_rate": 8.682350199797388e-05,
      "loss": 0.0544,
      "step": 16480
    },
    {
      "epoch": 0.0001005859375,
      "model_forward_time": 0.11483597755432129,
      "step": 16480
    },
    {
      "epoch": 0.0001005859375,
      "step": 16480,
      "training_step_time": 0.39060282707214355
    },
    {
      "epoch": 0.000100592041015625,
      "model_forward_time": 0.11455488204956055,
      "step": 16481
    },
    {
      "epoch": 0.000100592041015625,
      "step": 16481,
      "training_step_time": 0.39178037643432617
    },
    {
      "epoch": 0.00010059814453125,
      "model_forward_time": 0.1151118278503418,
      "step": 16482
    },
    {
      "epoch": 0.00010059814453125,
      "step": 16482,
      "training_step_time": 0.39316296577453613
    },
    {
      "epoch": 0.000100604248046875,
      "model_forward_time": 0.11524438858032227,
      "step": 16483
    },
    {
      "epoch": 0.000100604248046875,
      "step": 16483,
      "training_step_time": 0.39086270332336426
    },
    {
      "epoch": 0.0001006103515625,
      "model_forward_time": 0.11468625068664551,
      "step": 16484
    },
    {
      "epoch": 0.0001006103515625,
      "step": 16484,
      "training_step_time": 0.3661344051361084
    },
    {
      "epoch": 0.000100616455078125,
      "model_forward_time": 0.11497163772583008,
      "step": 16485
    },
    {
      "epoch": 0.000100616455078125,
      "step": 16485,
      "training_step_time": 0.41512298583984375
    },
    {
      "epoch": 0.00010062255859375,
      "model_forward_time": 0.11524319648742676,
      "step": 16486
    },
    {
      "epoch": 0.00010062255859375,
      "step": 16486,
      "training_step_time": 0.48786234855651855
    },
    {
      "epoch": 0.000100628662109375,
      "model_forward_time": 0.11480021476745605,
      "step": 16487
    },
    {
      "epoch": 0.000100628662109375,
      "step": 16487,
      "training_step_time": 0.4261012077331543
    },
    {
      "epoch": 0.000100634765625,
      "model_forward_time": 0.11419153213500977,
      "step": 16488
    },
    {
      "epoch": 0.000100634765625,
      "step": 16488,
      "training_step_time": 0.3956320285797119
    },
    {
      "epoch": 0.000100640869140625,
      "model_forward_time": 0.11577987670898438,
      "step": 16489
    },
    {
      "epoch": 0.000100640869140625,
      "step": 16489,
      "training_step_time": 0.39051294326782227
    },
    {
      "epoch": 0.00010064697265625,
      "grad_norm": 0.13213123381137848,
      "learning_rate": 8.680485435598673e-05,
      "loss": 0.0574,
      "step": 16490
    },
    {
      "epoch": 0.00010064697265625,
      "model_forward_time": 0.11494064331054688,
      "step": 16490
    },
    {
      "epoch": 0.00010064697265625,
      "step": 16490,
      "training_step_time": 0.38813304901123047
    },
    {
      "epoch": 0.000100653076171875,
      "model_forward_time": 0.11509990692138672,
      "step": 16491
    },
    {
      "epoch": 0.000100653076171875,
      "step": 16491,
      "training_step_time": 0.47641420364379883
    },
    {
      "epoch": 0.0001006591796875,
      "model_forward_time": 0.11515426635742188,
      "step": 16492
    },
    {
      "epoch": 0.0001006591796875,
      "step": 16492,
      "training_step_time": 0.40723729133605957
    },
    {
      "epoch": 0.000100665283203125,
      "model_forward_time": 0.11465334892272949,
      "step": 16493
    },
    {
      "epoch": 0.000100665283203125,
      "step": 16493,
      "training_step_time": 0.45189332962036133
    },
    {
      "epoch": 0.00010067138671875,
      "model_forward_time": 0.11539649963378906,
      "step": 16494
    },
    {
      "epoch": 0.00010067138671875,
      "step": 16494,
      "training_step_time": 0.4808540344238281
    },
    {
      "epoch": 0.000100677490234375,
      "model_forward_time": 0.11510848999023438,
      "step": 16495
    },
    {
      "epoch": 0.000100677490234375,
      "step": 16495,
      "training_step_time": 0.3939633369445801
    },
    {
      "epoch": 0.00010068359375,
      "model_forward_time": 0.11467123031616211,
      "step": 16496
    },
    {
      "epoch": 0.00010068359375,
      "step": 16496,
      "training_step_time": 0.38806939125061035
    },
    {
      "epoch": 0.000100689697265625,
      "model_forward_time": 0.1148827075958252,
      "step": 16497
    },
    {
      "epoch": 0.000100689697265625,
      "step": 16497,
      "training_step_time": 0.3932619094848633
    },
    {
      "epoch": 0.00010069580078125,
      "model_forward_time": 0.11509227752685547,
      "step": 16498
    },
    {
      "epoch": 0.00010069580078125,
      "step": 16498,
      "training_step_time": 0.396742582321167
    },
    {
      "epoch": 0.000100701904296875,
      "model_forward_time": 0.11494779586791992,
      "step": 16499
    },
    {
      "epoch": 0.000100701904296875,
      "step": 16499,
      "training_step_time": 0.43427324295043945
    },
    {
      "epoch": 0.0001007080078125,
      "grad_norm": 0.1420712023973465,
      "learning_rate": 8.678619553365659e-05,
      "loss": 0.0523,
      "step": 16500
    },
    {
      "epoch": 0.0001007080078125,
      "model_forward_time": 0.11501288414001465,
      "step": 16500
    },
    {
      "epoch": 0.0001007080078125,
      "step": 16500,
      "training_step_time": 0.934520959854126
    },
    {
      "epoch": 0.000100714111328125,
      "model_forward_time": 0.11408281326293945,
      "step": 16501
    },
    {
      "epoch": 0.000100714111328125,
      "step": 16501,
      "training_step_time": 0.38255834579467773
    },
    {
      "epoch": 0.00010072021484375,
      "model_forward_time": 0.11386275291442871,
      "step": 16502
    },
    {
      "epoch": 0.00010072021484375,
      "step": 16502,
      "training_step_time": 0.38364720344543457
    },
    {
      "epoch": 0.000100726318359375,
      "model_forward_time": 0.11451959609985352,
      "step": 16503
    },
    {
      "epoch": 0.000100726318359375,
      "step": 16503,
      "training_step_time": 0.3874208927154541
    },
    {
      "epoch": 0.000100732421875,
      "model_forward_time": 0.11451387405395508,
      "step": 16504
    },
    {
      "epoch": 0.000100732421875,
      "step": 16504,
      "training_step_time": 0.4330580234527588
    },
    {
      "epoch": 0.000100738525390625,
      "model_forward_time": 0.11412405967712402,
      "step": 16505
    },
    {
      "epoch": 0.000100738525390625,
      "step": 16505,
      "training_step_time": 0.44252824783325195
    },
    {
      "epoch": 0.00010074462890625,
      "model_forward_time": 0.11466407775878906,
      "step": 16506
    },
    {
      "epoch": 0.00010074462890625,
      "step": 16506,
      "training_step_time": 0.7315988540649414
    },
    {
      "epoch": 0.000100750732421875,
      "model_forward_time": 0.11421942710876465,
      "step": 16507
    },
    {
      "epoch": 0.000100750732421875,
      "step": 16507,
      "training_step_time": 0.3978419303894043
    },
    {
      "epoch": 0.0001007568359375,
      "model_forward_time": 0.1140291690826416,
      "step": 16508
    },
    {
      "epoch": 0.0001007568359375,
      "step": 16508,
      "training_step_time": 0.38399744033813477
    },
    {
      "epoch": 0.000100762939453125,
      "model_forward_time": 0.11421036720275879,
      "step": 16509
    },
    {
      "epoch": 0.000100762939453125,
      "step": 16509,
      "training_step_time": 0.39167356491088867
    },
    {
      "epoch": 0.00010076904296875,
      "grad_norm": 0.17323696613311768,
      "learning_rate": 8.676752553665153e-05,
      "loss": 0.0505,
      "step": 16510
    },
    {
      "epoch": 0.00010076904296875,
      "model_forward_time": 0.11437058448791504,
      "step": 16510
    },
    {
      "epoch": 0.00010076904296875,
      "step": 16510,
      "training_step_time": 0.4014906883239746
    },
    {
      "epoch": 0.000100775146484375,
      "model_forward_time": 0.11446261405944824,
      "step": 16511
    },
    {
      "epoch": 0.000100775146484375,
      "step": 16511,
      "training_step_time": 0.39888477325439453
    },
    {
      "epoch": 0.00010078125,
      "model_forward_time": 0.11513113975524902,
      "step": 16512
    },
    {
      "epoch": 0.00010078125,
      "step": 16512,
      "training_step_time": 0.9257731437683105
    },
    {
      "epoch": 0.000100787353515625,
      "model_forward_time": 0.1139230728149414,
      "step": 16513
    },
    {
      "epoch": 0.000100787353515625,
      "step": 16513,
      "training_step_time": 0.45901918411254883
    },
    {
      "epoch": 0.00010079345703125,
      "model_forward_time": 0.11374187469482422,
      "step": 16514
    },
    {
      "epoch": 0.00010079345703125,
      "step": 16514,
      "training_step_time": 0.3960127830505371
    },
    {
      "epoch": 0.000100799560546875,
      "model_forward_time": 0.11372685432434082,
      "step": 16515
    },
    {
      "epoch": 0.000100799560546875,
      "step": 16515,
      "training_step_time": 0.40493249893188477
    },
    {
      "epoch": 0.0001008056640625,
      "model_forward_time": 0.11474227905273438,
      "step": 16516
    },
    {
      "epoch": 0.0001008056640625,
      "step": 16516,
      "training_step_time": 0.405397891998291
    },
    {
      "epoch": 0.000100811767578125,
      "model_forward_time": 0.1142578125,
      "step": 16517
    },
    {
      "epoch": 0.000100811767578125,
      "step": 16517,
      "training_step_time": 0.39098167419433594
    },
    {
      "epoch": 0.00010081787109375,
      "model_forward_time": 0.11461281776428223,
      "step": 16518
    },
    {
      "epoch": 0.00010081787109375,
      "step": 16518,
      "training_step_time": 0.7258577346801758
    },
    {
      "epoch": 0.000100823974609375,
      "model_forward_time": 0.1146392822265625,
      "step": 16519
    },
    {
      "epoch": 0.000100823974609375,
      "step": 16519,
      "training_step_time": 0.4131777286529541
    },
    {
      "epoch": 0.000100830078125,
      "grad_norm": 0.12258311361074448,
      "learning_rate": 8.674884437064302e-05,
      "loss": 0.0518,
      "step": 16520
    },
    {
      "epoch": 0.000100830078125,
      "model_forward_time": 0.11431741714477539,
      "step": 16520
    },
    {
      "epoch": 0.000100830078125,
      "step": 16520,
      "training_step_time": 0.3854851722717285
    },
    {
      "epoch": 0.000100836181640625,
      "model_forward_time": 0.11444520950317383,
      "step": 16521
    },
    {
      "epoch": 0.000100836181640625,
      "step": 16521,
      "training_step_time": 0.38953733444213867
    },
    {
      "epoch": 0.00010084228515625,
      "model_forward_time": 0.1141350269317627,
      "step": 16522
    },
    {
      "epoch": 0.00010084228515625,
      "step": 16522,
      "training_step_time": 0.40601539611816406
    },
    {
      "epoch": 0.000100848388671875,
      "model_forward_time": 0.11487364768981934,
      "step": 16523
    },
    {
      "epoch": 0.000100848388671875,
      "step": 16523,
      "training_step_time": 0.3807716369628906
    },
    {
      "epoch": 0.0001008544921875,
      "model_forward_time": 0.11499977111816406,
      "step": 16524
    },
    {
      "epoch": 0.0001008544921875,
      "step": 16524,
      "training_step_time": 0.7175862789154053
    },
    {
      "epoch": 0.000100860595703125,
      "model_forward_time": 0.11587333679199219,
      "step": 16525
    },
    {
      "epoch": 0.000100860595703125,
      "step": 16525,
      "training_step_time": 0.4003734588623047
    },
    {
      "epoch": 0.00010086669921875,
      "model_forward_time": 0.11455154418945312,
      "step": 16526
    },
    {
      "epoch": 0.00010086669921875,
      "step": 16526,
      "training_step_time": 0.4383523464202881
    },
    {
      "epoch": 0.000100872802734375,
      "model_forward_time": 0.11476731300354004,
      "step": 16527
    },
    {
      "epoch": 0.000100872802734375,
      "step": 16527,
      "training_step_time": 0.4736640453338623
    },
    {
      "epoch": 0.00010087890625,
      "model_forward_time": 0.11424469947814941,
      "step": 16528
    },
    {
      "epoch": 0.00010087890625,
      "step": 16528,
      "training_step_time": 0.41515398025512695
    },
    {
      "epoch": 0.000100885009765625,
      "model_forward_time": 0.11406731605529785,
      "step": 16529
    },
    {
      "epoch": 0.000100885009765625,
      "step": 16529,
      "training_step_time": 0.42873406410217285
    },
    {
      "epoch": 0.00010089111328125,
      "grad_norm": 0.17615514993667603,
      "learning_rate": 8.673015204130586e-05,
      "loss": 0.0523,
      "step": 16530
    },
    {
      "epoch": 0.00010089111328125,
      "model_forward_time": 0.11455416679382324,
      "step": 16530
    },
    {
      "epoch": 0.00010089111328125,
      "step": 16530,
      "training_step_time": 0.421419620513916
    },
    {
      "epoch": 0.000100897216796875,
      "model_forward_time": 0.11396384239196777,
      "step": 16531
    },
    {
      "epoch": 0.000100897216796875,
      "step": 16531,
      "training_step_time": 0.4211709499359131
    },
    {
      "epoch": 0.0001009033203125,
      "model_forward_time": 0.11489367485046387,
      "step": 16532
    },
    {
      "epoch": 0.0001009033203125,
      "step": 16532,
      "training_step_time": 0.40152835845947266
    },
    {
      "epoch": 0.000100909423828125,
      "model_forward_time": 0.11464571952819824,
      "step": 16533
    },
    {
      "epoch": 0.000100909423828125,
      "step": 16533,
      "training_step_time": 0.48119044303894043
    },
    {
      "epoch": 0.00010091552734375,
      "model_forward_time": 0.11547541618347168,
      "step": 16534
    },
    {
      "epoch": 0.00010091552734375,
      "step": 16534,
      "training_step_time": 0.4055771827697754
    },
    {
      "epoch": 0.000100921630859375,
      "model_forward_time": 0.11478447914123535,
      "step": 16535
    },
    {
      "epoch": 0.000100921630859375,
      "step": 16535,
      "training_step_time": 0.3964879512786865
    },
    {
      "epoch": 0.000100927734375,
      "model_forward_time": 0.11479711532592773,
      "step": 16536
    },
    {
      "epoch": 0.000100927734375,
      "step": 16536,
      "training_step_time": 0.40134096145629883
    },
    {
      "epoch": 0.000100933837890625,
      "model_forward_time": 0.11533856391906738,
      "step": 16537
    },
    {
      "epoch": 0.000100933837890625,
      "step": 16537,
      "training_step_time": 0.3891932964324951
    },
    {
      "epoch": 0.00010093994140625,
      "model_forward_time": 0.11495208740234375,
      "step": 16538
    },
    {
      "epoch": 0.00010093994140625,
      "step": 16538,
      "training_step_time": 0.392322301864624
    },
    {
      "epoch": 0.000100946044921875,
      "model_forward_time": 0.1151742935180664,
      "step": 16539
    },
    {
      "epoch": 0.000100946044921875,
      "step": 16539,
      "training_step_time": 0.3930246829986572
    },
    {
      "epoch": 0.0001009521484375,
      "grad_norm": 0.17316173017024994,
      "learning_rate": 8.671144855431833e-05,
      "loss": 0.0523,
      "step": 16540
    },
    {
      "epoch": 0.0001009521484375,
      "model_forward_time": 0.11605978012084961,
      "step": 16540
    },
    {
      "epoch": 0.0001009521484375,
      "step": 16540,
      "training_step_time": 0.4541594982147217
    },
    {
      "epoch": 0.000100958251953125,
      "model_forward_time": 0.11507821083068848,
      "step": 16541
    },
    {
      "epoch": 0.000100958251953125,
      "step": 16541,
      "training_step_time": 0.39690327644348145
    },
    {
      "epoch": 0.00010096435546875,
      "model_forward_time": 0.1156764030456543,
      "step": 16542
    },
    {
      "epoch": 0.00010096435546875,
      "step": 16542,
      "training_step_time": 0.5362632274627686
    },
    {
      "epoch": 0.000100970458984375,
      "model_forward_time": 0.11548662185668945,
      "step": 16543
    },
    {
      "epoch": 0.000100970458984375,
      "step": 16543,
      "training_step_time": 0.3966093063354492
    },
    {
      "epoch": 0.0001009765625,
      "model_forward_time": 0.11469411849975586,
      "step": 16544
    },
    {
      "epoch": 0.0001009765625,
      "step": 16544,
      "training_step_time": 0.39780330657958984
    },
    {
      "epoch": 0.000100982666015625,
      "model_forward_time": 0.11512541770935059,
      "step": 16545
    },
    {
      "epoch": 0.000100982666015625,
      "step": 16545,
      "training_step_time": 0.3885633945465088
    },
    {
      "epoch": 0.00010098876953125,
      "model_forward_time": 0.11556887626647949,
      "step": 16546
    },
    {
      "epoch": 0.00010098876953125,
      "step": 16546,
      "training_step_time": 0.4750964641571045
    },
    {
      "epoch": 0.000100994873046875,
      "model_forward_time": 0.1147756576538086,
      "step": 16547
    },
    {
      "epoch": 0.000100994873046875,
      "step": 16547,
      "training_step_time": 0.47078943252563477
    },
    {
      "epoch": 0.0001010009765625,
      "model_forward_time": 0.11455249786376953,
      "step": 16548
    },
    {
      "epoch": 0.0001010009765625,
      "step": 16548,
      "training_step_time": 0.5689826011657715
    },
    {
      "epoch": 0.000101007080078125,
      "model_forward_time": 0.11551237106323242,
      "step": 16549
    },
    {
      "epoch": 0.000101007080078125,
      "step": 16549,
      "training_step_time": 0.3867466449737549
    },
    {
      "epoch": 0.00010101318359375,
      "grad_norm": 0.17484568059444427,
      "learning_rate": 8.669273391536204e-05,
      "loss": 0.0564,
      "step": 16550
    },
    {
      "epoch": 0.00010101318359375,
      "model_forward_time": 0.11438822746276855,
      "step": 16550
    },
    {
      "epoch": 0.00010101318359375,
      "step": 16550,
      "training_step_time": 0.38793015480041504
    },
    {
      "epoch": 0.000101019287109375,
      "model_forward_time": 0.1149284839630127,
      "step": 16551
    },
    {
      "epoch": 0.000101019287109375,
      "step": 16551,
      "training_step_time": 0.3899857997894287
    },
    {
      "epoch": 0.000101025390625,
      "model_forward_time": 0.1148836612701416,
      "step": 16552
    },
    {
      "epoch": 0.000101025390625,
      "step": 16552,
      "training_step_time": 0.38778042793273926
    },
    {
      "epoch": 0.000101031494140625,
      "model_forward_time": 0.11546206474304199,
      "step": 16553
    },
    {
      "epoch": 0.000101031494140625,
      "step": 16553,
      "training_step_time": 0.3885629177093506
    },
    {
      "epoch": 0.00010103759765625,
      "model_forward_time": 0.11496949195861816,
      "step": 16554
    },
    {
      "epoch": 0.00010103759765625,
      "step": 16554,
      "training_step_time": 0.4326660633087158
    },
    {
      "epoch": 0.000101043701171875,
      "model_forward_time": 0.11552119255065918,
      "step": 16555
    },
    {
      "epoch": 0.000101043701171875,
      "step": 16555,
      "training_step_time": 0.45685720443725586
    },
    {
      "epoch": 0.0001010498046875,
      "model_forward_time": 0.11572122573852539,
      "step": 16556
    },
    {
      "epoch": 0.0001010498046875,
      "step": 16556,
      "training_step_time": 0.49554014205932617
    },
    {
      "epoch": 0.000101055908203125,
      "model_forward_time": 0.11486124992370605,
      "step": 16557
    },
    {
      "epoch": 0.000101055908203125,
      "step": 16557,
      "training_step_time": 0.40994882583618164
    },
    {
      "epoch": 0.00010106201171875,
      "model_forward_time": 0.11562204360961914,
      "step": 16558
    },
    {
      "epoch": 0.00010106201171875,
      "step": 16558,
      "training_step_time": 0.3920862674713135
    },
    {
      "epoch": 0.000101068115234375,
      "model_forward_time": 0.11537718772888184,
      "step": 16559
    },
    {
      "epoch": 0.000101068115234375,
      "step": 16559,
      "training_step_time": 0.40086865425109863
    },
    {
      "epoch": 0.00010107421875,
      "grad_norm": 0.13723227381706238,
      "learning_rate": 8.6674008130122e-05,
      "loss": 0.0517,
      "step": 16560
    },
    {
      "epoch": 0.00010107421875,
      "model_forward_time": 0.11471295356750488,
      "step": 16560
    },
    {
      "epoch": 0.00010107421875,
      "step": 16560,
      "training_step_time": 0.4247429370880127
    },
    {
      "epoch": 0.000101080322265625,
      "model_forward_time": 0.11520528793334961,
      "step": 16561
    },
    {
      "epoch": 0.000101080322265625,
      "step": 16561,
      "training_step_time": 0.4801657199859619
    },
    {
      "epoch": 0.00010108642578125,
      "model_forward_time": 0.11556053161621094,
      "step": 16562
    },
    {
      "epoch": 0.00010108642578125,
      "step": 16562,
      "training_step_time": 0.4402737617492676
    },
    {
      "epoch": 0.000101092529296875,
      "model_forward_time": 0.11529326438903809,
      "step": 16563
    },
    {
      "epoch": 0.000101092529296875,
      "step": 16563,
      "training_step_time": 0.3921537399291992
    },
    {
      "epoch": 0.0001010986328125,
      "model_forward_time": 0.11505985260009766,
      "step": 16564
    },
    {
      "epoch": 0.0001010986328125,
      "step": 16564,
      "training_step_time": 0.39284634590148926
    },
    {
      "epoch": 0.000101104736328125,
      "model_forward_time": 0.11493873596191406,
      "step": 16565
    },
    {
      "epoch": 0.000101104736328125,
      "step": 16565,
      "training_step_time": 0.3950843811035156
    },
    {
      "epoch": 0.00010111083984375,
      "model_forward_time": 0.11499428749084473,
      "step": 16566
    },
    {
      "epoch": 0.00010111083984375,
      "step": 16566,
      "training_step_time": 0.4015657901763916
    },
    {
      "epoch": 0.000101116943359375,
      "model_forward_time": 0.1149740219116211,
      "step": 16567
    },
    {
      "epoch": 0.000101116943359375,
      "step": 16567,
      "training_step_time": 0.40920329093933105
    },
    {
      "epoch": 0.000101123046875,
      "model_forward_time": 0.11522507667541504,
      "step": 16568
    },
    {
      "epoch": 0.000101123046875,
      "step": 16568,
      "training_step_time": 0.3694770336151123
    },
    {
      "epoch": 0.000101129150390625,
      "model_forward_time": 0.1148838996887207,
      "step": 16569
    },
    {
      "epoch": 0.000101129150390625,
      "step": 16569,
      "training_step_time": 0.4155271053314209
    },
    {
      "epoch": 0.00010113525390625,
      "grad_norm": 0.1134418472647667,
      "learning_rate": 8.66552712042866e-05,
      "loss": 0.0523,
      "step": 16570
    },
    {
      "epoch": 0.00010113525390625,
      "model_forward_time": 0.1151437759399414,
      "step": 16570
    },
    {
      "epoch": 0.00010113525390625,
      "step": 16570,
      "training_step_time": 0.4604320526123047
    },
    {
      "epoch": 0.000101141357421875,
      "model_forward_time": 0.11484169960021973,
      "step": 16571
    },
    {
      "epoch": 0.000101141357421875,
      "step": 16571,
      "training_step_time": 0.39904069900512695
    },
    {
      "epoch": 0.0001011474609375,
      "model_forward_time": 0.1146550178527832,
      "step": 16572
    },
    {
      "epoch": 0.0001011474609375,
      "step": 16572,
      "training_step_time": 0.4011189937591553
    },
    {
      "epoch": 0.000101153564453125,
      "model_forward_time": 0.1148831844329834,
      "step": 16573
    },
    {
      "epoch": 0.000101153564453125,
      "step": 16573,
      "training_step_time": 0.3896520137786865
    },
    {
      "epoch": 0.00010115966796875,
      "model_forward_time": 0.1145787239074707,
      "step": 16574
    },
    {
      "epoch": 0.00010115966796875,
      "step": 16574,
      "training_step_time": 0.3911299705505371
    },
    {
      "epoch": 0.000101165771484375,
      "model_forward_time": 0.11487746238708496,
      "step": 16575
    },
    {
      "epoch": 0.000101165771484375,
      "step": 16575,
      "training_step_time": 0.43692779541015625
    },
    {
      "epoch": 0.000101171875,
      "model_forward_time": 0.11506080627441406,
      "step": 16576
    },
    {
      "epoch": 0.000101171875,
      "step": 16576,
      "training_step_time": 0.4330782890319824
    },
    {
      "epoch": 0.000101177978515625,
      "model_forward_time": 0.11537432670593262,
      "step": 16577
    },
    {
      "epoch": 0.000101177978515625,
      "step": 16577,
      "training_step_time": 0.466078519821167
    },
    {
      "epoch": 0.00010118408203125,
      "model_forward_time": 0.11526632308959961,
      "step": 16578
    },
    {
      "epoch": 0.00010118408203125,
      "step": 16578,
      "training_step_time": 0.5933451652526855
    },
    {
      "epoch": 0.000101190185546875,
      "model_forward_time": 0.11435937881469727,
      "step": 16579
    },
    {
      "epoch": 0.000101190185546875,
      "step": 16579,
      "training_step_time": 0.38857173919677734
    },
    {
      "epoch": 0.0001011962890625,
      "grad_norm": 0.1925314962863922,
      "learning_rate": 8.663652314354765e-05,
      "loss": 0.057,
      "step": 16580
    },
    {
      "epoch": 0.0001011962890625,
      "model_forward_time": 0.11477327346801758,
      "step": 16580
    },
    {
      "epoch": 0.0001011962890625,
      "step": 16580,
      "training_step_time": 0.3827080726623535
    },
    {
      "epoch": 0.000101202392578125,
      "model_forward_time": 0.11598062515258789,
      "step": 16581
    },
    {
      "epoch": 0.000101202392578125,
      "step": 16581,
      "training_step_time": 0.391803503036499
    },
    {
      "epoch": 0.00010120849609375,
      "model_forward_time": 0.11489582061767578,
      "step": 16582
    },
    {
      "epoch": 0.00010120849609375,
      "step": 16582,
      "training_step_time": 0.3862452507019043
    },
    {
      "epoch": 0.000101214599609375,
      "model_forward_time": 0.11511659622192383,
      "step": 16583
    },
    {
      "epoch": 0.000101214599609375,
      "step": 16583,
      "training_step_time": 0.44027018547058105
    },
    {
      "epoch": 0.000101220703125,
      "model_forward_time": 0.11443901062011719,
      "step": 16584
    },
    {
      "epoch": 0.000101220703125,
      "step": 16584,
      "training_step_time": 0.6515743732452393
    },
    {
      "epoch": 0.000101226806640625,
      "model_forward_time": 0.11463451385498047,
      "step": 16585
    },
    {
      "epoch": 0.000101226806640625,
      "step": 16585,
      "training_step_time": 0.3992125988006592
    },
    {
      "epoch": 0.00010123291015625,
      "model_forward_time": 0.11456584930419922,
      "step": 16586
    },
    {
      "epoch": 0.00010123291015625,
      "step": 16586,
      "training_step_time": 0.38938355445861816
    },
    {
      "epoch": 0.000101239013671875,
      "model_forward_time": 0.11489009857177734,
      "step": 16587
    },
    {
      "epoch": 0.000101239013671875,
      "step": 16587,
      "training_step_time": 0.39084601402282715
    },
    {
      "epoch": 0.0001012451171875,
      "model_forward_time": 0.1153867244720459,
      "step": 16588
    },
    {
      "epoch": 0.0001012451171875,
      "step": 16588,
      "training_step_time": 0.3886375427246094
    },
    {
      "epoch": 0.000101251220703125,
      "model_forward_time": 0.1150507926940918,
      "step": 16589
    },
    {
      "epoch": 0.000101251220703125,
      "step": 16589,
      "training_step_time": 0.42847228050231934
    },
    {
      "epoch": 0.00010125732421875,
      "grad_norm": 0.14925746619701385,
      "learning_rate": 8.661776395360029e-05,
      "loss": 0.0514,
      "step": 16590
    },
    {
      "epoch": 0.00010125732421875,
      "model_forward_time": 0.11472272872924805,
      "step": 16590
    },
    {
      "epoch": 0.00010125732421875,
      "step": 16590,
      "training_step_time": 0.7698273658752441
    },
    {
      "epoch": 0.000101263427734375,
      "model_forward_time": 0.11409783363342285,
      "step": 16591
    },
    {
      "epoch": 0.000101263427734375,
      "step": 16591,
      "training_step_time": 0.38924098014831543
    },
    {
      "epoch": 0.00010126953125,
      "model_forward_time": 0.11457180976867676,
      "step": 16592
    },
    {
      "epoch": 0.00010126953125,
      "step": 16592,
      "training_step_time": 0.38403797149658203
    },
    {
      "epoch": 0.000101275634765625,
      "model_forward_time": 0.1145017147064209,
      "step": 16593
    },
    {
      "epoch": 0.000101275634765625,
      "step": 16593,
      "training_step_time": 0.382871150970459
    },
    {
      "epoch": 0.00010128173828125,
      "model_forward_time": 0.11486673355102539,
      "step": 16594
    },
    {
      "epoch": 0.00010128173828125,
      "step": 16594,
      "training_step_time": 0.3833014965057373
    },
    {
      "epoch": 0.000101287841796875,
      "model_forward_time": 0.1143038272857666,
      "step": 16595
    },
    {
      "epoch": 0.000101287841796875,
      "step": 16595,
      "training_step_time": 0.38109588623046875
    },
    {
      "epoch": 0.0001012939453125,
      "model_forward_time": 0.11549568176269531,
      "step": 16596
    },
    {
      "epoch": 0.0001012939453125,
      "step": 16596,
      "training_step_time": 0.6770853996276855
    },
    {
      "epoch": 0.000101300048828125,
      "model_forward_time": 0.11496376991271973,
      "step": 16597
    },
    {
      "epoch": 0.000101300048828125,
      "step": 16597,
      "training_step_time": 0.41657423973083496
    },
    {
      "epoch": 0.00010130615234375,
      "model_forward_time": 0.11542820930480957,
      "step": 16598
    },
    {
      "epoch": 0.00010130615234375,
      "step": 16598,
      "training_step_time": 0.38547849655151367
    },
    {
      "epoch": 0.000101312255859375,
      "model_forward_time": 0.11514115333557129,
      "step": 16599
    },
    {
      "epoch": 0.000101312255859375,
      "step": 16599,
      "training_step_time": 0.3886139392852783
    },
    {
      "epoch": 0.000101318359375,
      "grad_norm": 0.139535054564476,
      "learning_rate": 8.659899364014309e-05,
      "loss": 0.0531,
      "step": 16600
    },
    {
      "epoch": 0.000101318359375,
      "model_forward_time": 0.11462616920471191,
      "step": 16600
    },
    {
      "epoch": 0.000101318359375,
      "step": 16600,
      "training_step_time": 0.3944237232208252
    },
    {
      "epoch": 0.000101324462890625,
      "model_forward_time": 0.11513948440551758,
      "step": 16601
    },
    {
      "epoch": 0.000101324462890625,
      "step": 16601,
      "training_step_time": 0.3909115791320801
    },
    {
      "epoch": 0.00010133056640625,
      "model_forward_time": 0.11527132987976074,
      "step": 16602
    },
    {
      "epoch": 0.00010133056640625,
      "step": 16602,
      "training_step_time": 0.41857337951660156
    },
    {
      "epoch": 0.000101336669921875,
      "model_forward_time": 0.11555147171020508,
      "step": 16603
    },
    {
      "epoch": 0.000101336669921875,
      "step": 16603,
      "training_step_time": 0.43771815299987793
    },
    {
      "epoch": 0.0001013427734375,
      "model_forward_time": 0.11600685119628906,
      "step": 16604
    },
    {
      "epoch": 0.0001013427734375,
      "step": 16604,
      "training_step_time": 0.4589204788208008
    },
    {
      "epoch": 0.000101348876953125,
      "model_forward_time": 0.11533045768737793,
      "step": 16605
    },
    {
      "epoch": 0.000101348876953125,
      "step": 16605,
      "training_step_time": 0.47871875762939453
    },
    {
      "epoch": 0.00010135498046875,
      "model_forward_time": 0.11538124084472656,
      "step": 16606
    },
    {
      "epoch": 0.00010135498046875,
      "step": 16606,
      "training_step_time": 0.39424633979797363
    },
    {
      "epoch": 0.000101361083984375,
      "model_forward_time": 0.11533427238464355,
      "step": 16607
    },
    {
      "epoch": 0.000101361083984375,
      "step": 16607,
      "training_step_time": 0.3839702606201172
    },
    {
      "epoch": 0.0001013671875,
      "model_forward_time": 0.1149747371673584,
      "step": 16608
    },
    {
      "epoch": 0.0001013671875,
      "step": 16608,
      "training_step_time": 0.545391321182251
    },
    {
      "epoch": 0.000101373291015625,
      "model_forward_time": 0.11468338966369629,
      "step": 16609
    },
    {
      "epoch": 0.000101373291015625,
      "step": 16609,
      "training_step_time": 0.3990440368652344
    },
    {
      "epoch": 0.00010137939453125,
      "grad_norm": 0.2268413007259369,
      "learning_rate": 8.658021220887795e-05,
      "loss": 0.0531,
      "step": 16610
    },
    {
      "epoch": 0.00010137939453125,
      "model_forward_time": 0.11551976203918457,
      "step": 16610
    },
    {
      "epoch": 0.00010137939453125,
      "step": 16610,
      "training_step_time": 0.3960542678833008
    },
    {
      "epoch": 0.000101385498046875,
      "model_forward_time": 0.11587643623352051,
      "step": 16611
    },
    {
      "epoch": 0.000101385498046875,
      "step": 16611,
      "training_step_time": 0.4272339344024658
    },
    {
      "epoch": 0.0001013916015625,
      "model_forward_time": 0.11489653587341309,
      "step": 16612
    },
    {
      "epoch": 0.0001013916015625,
      "step": 16612,
      "training_step_time": 0.43892765045166016
    },
    {
      "epoch": 0.000101397705078125,
      "model_forward_time": 0.11521482467651367,
      "step": 16613
    },
    {
      "epoch": 0.000101397705078125,
      "step": 16613,
      "training_step_time": 0.4880363941192627
    },
    {
      "epoch": 0.00010140380859375,
      "model_forward_time": 0.11491751670837402,
      "step": 16614
    },
    {
      "epoch": 0.00010140380859375,
      "step": 16614,
      "training_step_time": 0.4389791488647461
    },
    {
      "epoch": 0.000101409912109375,
      "model_forward_time": 0.11556577682495117,
      "step": 16615
    },
    {
      "epoch": 0.000101409912109375,
      "step": 16615,
      "training_step_time": 0.39014530181884766
    },
    {
      "epoch": 0.000101416015625,
      "model_forward_time": 0.11433720588684082,
      "step": 16616
    },
    {
      "epoch": 0.000101416015625,
      "step": 16616,
      "training_step_time": 0.38906145095825195
    },
    {
      "epoch": 0.000101422119140625,
      "model_forward_time": 0.11680078506469727,
      "step": 16617
    },
    {
      "epoch": 0.000101422119140625,
      "step": 16617,
      "training_step_time": 0.4291496276855469
    },
    {
      "epoch": 0.00010142822265625,
      "model_forward_time": 0.11513686180114746,
      "step": 16618
    },
    {
      "epoch": 0.00010142822265625,
      "step": 16618,
      "training_step_time": 0.3948934078216553
    },
    {
      "epoch": 0.000101434326171875,
      "model_forward_time": 0.11535501480102539,
      "step": 16619
    },
    {
      "epoch": 0.000101434326171875,
      "step": 16619,
      "training_step_time": 0.5184412002563477
    },
    {
      "epoch": 0.0001014404296875,
      "grad_norm": 0.15016469359397888,
      "learning_rate": 8.656141966551019e-05,
      "loss": 0.0565,
      "step": 16620
    },
    {
      "epoch": 0.0001014404296875,
      "model_forward_time": 0.11468911170959473,
      "step": 16620
    },
    {
      "epoch": 0.0001014404296875,
      "step": 16620,
      "training_step_time": 0.3936614990234375
    },
    {
      "epoch": 0.000101446533203125,
      "model_forward_time": 0.11587691307067871,
      "step": 16621
    },
    {
      "epoch": 0.000101446533203125,
      "step": 16621,
      "training_step_time": 0.4033675193786621
    },
    {
      "epoch": 0.00010145263671875,
      "model_forward_time": 0.1150519847869873,
      "step": 16622
    },
    {
      "epoch": 0.00010145263671875,
      "step": 16622,
      "training_step_time": 0.3844592571258545
    },
    {
      "epoch": 0.000101458740234375,
      "model_forward_time": 0.11502480506896973,
      "step": 16623
    },
    {
      "epoch": 0.000101458740234375,
      "step": 16623,
      "training_step_time": 0.40284204483032227
    },
    {
      "epoch": 0.00010146484375,
      "model_forward_time": 0.11545205116271973,
      "step": 16624
    },
    {
      "epoch": 0.00010146484375,
      "step": 16624,
      "training_step_time": 0.4205944538116455
    },
    {
      "epoch": 0.000101470947265625,
      "model_forward_time": 0.11588621139526367,
      "step": 16625
    },
    {
      "epoch": 0.000101470947265625,
      "step": 16625,
      "training_step_time": 0.36852145195007324
    },
    {
      "epoch": 0.00010147705078125,
      "model_forward_time": 0.11555266380310059,
      "step": 16626
    },
    {
      "epoch": 0.00010147705078125,
      "step": 16626,
      "training_step_time": 0.5593588352203369
    },
    {
      "epoch": 0.000101483154296875,
      "model_forward_time": 0.11529421806335449,
      "step": 16627
    },
    {
      "epoch": 0.000101483154296875,
      "step": 16627,
      "training_step_time": 0.39449024200439453
    },
    {
      "epoch": 0.0001014892578125,
      "model_forward_time": 0.11509227752685547,
      "step": 16628
    },
    {
      "epoch": 0.0001014892578125,
      "step": 16628,
      "training_step_time": 0.38243603706359863
    },
    {
      "epoch": 0.000101495361328125,
      "model_forward_time": 0.11510848999023438,
      "step": 16629
    },
    {
      "epoch": 0.000101495361328125,
      "step": 16629,
      "training_step_time": 0.451526403427124
    },
    {
      "epoch": 0.00010150146484375,
      "grad_norm": 0.16238895058631897,
      "learning_rate": 8.654261601574849e-05,
      "loss": 0.0521,
      "step": 16630
    },
    {
      "epoch": 0.00010150146484375,
      "model_forward_time": 0.11545157432556152,
      "step": 16630
    },
    {
      "epoch": 0.00010150146484375,
      "step": 16630,
      "training_step_time": 0.421825647354126
    },
    {
      "epoch": 0.000101507568359375,
      "model_forward_time": 0.11478400230407715,
      "step": 16631
    },
    {
      "epoch": 0.000101507568359375,
      "step": 16631,
      "training_step_time": 0.394498348236084
    },
    {
      "epoch": 0.000101513671875,
      "model_forward_time": 0.11611056327819824,
      "step": 16632
    },
    {
      "epoch": 0.000101513671875,
      "step": 16632,
      "training_step_time": 0.4679539203643799
    },
    {
      "epoch": 0.000101519775390625,
      "model_forward_time": 0.11487483978271484,
      "step": 16633
    },
    {
      "epoch": 0.000101519775390625,
      "step": 16633,
      "training_step_time": 0.4179503917694092
    },
    {
      "epoch": 0.00010152587890625,
      "model_forward_time": 0.11504578590393066,
      "step": 16634
    },
    {
      "epoch": 0.00010152587890625,
      "step": 16634,
      "training_step_time": 0.3890852928161621
    },
    {
      "epoch": 0.000101531982421875,
      "model_forward_time": 0.11511588096618652,
      "step": 16635
    },
    {
      "epoch": 0.000101531982421875,
      "step": 16635,
      "training_step_time": 0.5364353656768799
    },
    {
      "epoch": 0.0001015380859375,
      "model_forward_time": 0.1154317855834961,
      "step": 16636
    },
    {
      "epoch": 0.0001015380859375,
      "step": 16636,
      "training_step_time": 0.3863067626953125
    },
    {
      "epoch": 0.000101544189453125,
      "model_forward_time": 0.11474084854125977,
      "step": 16637
    },
    {
      "epoch": 0.000101544189453125,
      "step": 16637,
      "training_step_time": 0.40941381454467773
    },
    {
      "epoch": 0.00010155029296875,
      "model_forward_time": 0.11539721488952637,
      "step": 16638
    },
    {
      "epoch": 0.00010155029296875,
      "step": 16638,
      "training_step_time": 0.4023299217224121
    },
    {
      "epoch": 0.000101556396484375,
      "model_forward_time": 0.11545801162719727,
      "step": 16639
    },
    {
      "epoch": 0.000101556396484375,
      "step": 16639,
      "training_step_time": 0.39345741271972656
    },
    {
      "epoch": 0.0001015625,
      "grad_norm": 0.13628782331943512,
      "learning_rate": 8.652380126530488e-05,
      "loss": 0.0524,
      "step": 16640
    },
    {
      "epoch": 0.0001015625,
      "model_forward_time": 0.11514878273010254,
      "step": 16640
    },
    {
      "epoch": 0.0001015625,
      "step": 16640,
      "training_step_time": 0.4187450408935547
    },
    {
      "epoch": 0.000101568603515625,
      "model_forward_time": 0.11476731300354004,
      "step": 16641
    },
    {
      "epoch": 0.000101568603515625,
      "step": 16641,
      "training_step_time": 0.6500027179718018
    },
    {
      "epoch": 0.00010157470703125,
      "model_forward_time": 0.1148979663848877,
      "step": 16642
    },
    {
      "epoch": 0.00010157470703125,
      "step": 16642,
      "training_step_time": 0.3881402015686035
    },
    {
      "epoch": 0.000101580810546875,
      "model_forward_time": 0.1151731014251709,
      "step": 16643
    },
    {
      "epoch": 0.000101580810546875,
      "step": 16643,
      "training_step_time": 0.3822488784790039
    },
    {
      "epoch": 0.0001015869140625,
      "model_forward_time": 0.1154329776763916,
      "step": 16644
    },
    {
      "epoch": 0.0001015869140625,
      "step": 16644,
      "training_step_time": 0.38733530044555664
    },
    {
      "epoch": 0.000101593017578125,
      "model_forward_time": 0.1151130199432373,
      "step": 16645
    },
    {
      "epoch": 0.000101593017578125,
      "step": 16645,
      "training_step_time": 0.38938021659851074
    },
    {
      "epoch": 0.00010159912109375,
      "model_forward_time": 0.11533856391906738,
      "step": 16646
    },
    {
      "epoch": 0.00010159912109375,
      "step": 16646,
      "training_step_time": 0.4202444553375244
    },
    {
      "epoch": 0.000101605224609375,
      "model_forward_time": 0.11566472053527832,
      "step": 16647
    },
    {
      "epoch": 0.000101605224609375,
      "step": 16647,
      "training_step_time": 0.620995283126831
    },
    {
      "epoch": 0.000101611328125,
      "model_forward_time": 0.11542153358459473,
      "step": 16648
    },
    {
      "epoch": 0.000101611328125,
      "step": 16648,
      "training_step_time": 0.3947768211364746
    },
    {
      "epoch": 0.000101617431640625,
      "model_forward_time": 0.11501884460449219,
      "step": 16649
    },
    {
      "epoch": 0.000101617431640625,
      "step": 16649,
      "training_step_time": 0.3886594772338867
    },
    {
      "epoch": 0.00010162353515625,
      "grad_norm": 0.15170586109161377,
      "learning_rate": 8.650497541989482e-05,
      "loss": 0.0545,
      "step": 16650
    },
    {
      "epoch": 0.00010162353515625,
      "model_forward_time": 0.11535406112670898,
      "step": 16650
    },
    {
      "epoch": 0.00010162353515625,
      "step": 16650,
      "training_step_time": 0.41112422943115234
    },
    {
      "epoch": 0.000101629638671875,
      "model_forward_time": 0.11450076103210449,
      "step": 16651
    },
    {
      "epoch": 0.000101629638671875,
      "step": 16651,
      "training_step_time": 0.39291977882385254
    },
    {
      "epoch": 0.0001016357421875,
      "model_forward_time": 0.11512446403503418,
      "step": 16652
    },
    {
      "epoch": 0.0001016357421875,
      "step": 16652,
      "training_step_time": 0.3797433376312256
    },
    {
      "epoch": 0.000101641845703125,
      "model_forward_time": 0.11497735977172852,
      "step": 16653
    },
    {
      "epoch": 0.000101641845703125,
      "step": 16653,
      "training_step_time": 0.6854257583618164
    },
    {
      "epoch": 0.00010164794921875,
      "model_forward_time": 0.11488032341003418,
      "step": 16654
    },
    {
      "epoch": 0.00010164794921875,
      "step": 16654,
      "training_step_time": 0.45314502716064453
    },
    {
      "epoch": 0.000101654052734375,
      "model_forward_time": 0.11414718627929688,
      "step": 16655
    },
    {
      "epoch": 0.000101654052734375,
      "step": 16655,
      "training_step_time": 0.46862339973449707
    },
    {
      "epoch": 0.00010166015625,
      "model_forward_time": 0.11441874504089355,
      "step": 16656
    },
    {
      "epoch": 0.00010166015625,
      "step": 16656,
      "training_step_time": 0.38840770721435547
    },
    {
      "epoch": 0.000101666259765625,
      "model_forward_time": 0.1148831844329834,
      "step": 16657
    },
    {
      "epoch": 0.000101666259765625,
      "step": 16657,
      "training_step_time": 0.3871638774871826
    },
    {
      "epoch": 0.00010167236328125,
      "model_forward_time": 0.11431264877319336,
      "step": 16658
    },
    {
      "epoch": 0.00010167236328125,
      "step": 16658,
      "training_step_time": 0.38556790351867676
    },
    {
      "epoch": 0.000101678466796875,
      "model_forward_time": 0.11470675468444824,
      "step": 16659
    },
    {
      "epoch": 0.000101678466796875,
      "step": 16659,
      "training_step_time": 0.4972662925720215
    },
    {
      "epoch": 0.0001016845703125,
      "grad_norm": 0.14897920191287994,
      "learning_rate": 8.648613848523707e-05,
      "loss": 0.0492,
      "step": 16660
    },
    {
      "epoch": 0.0001016845703125,
      "model_forward_time": 0.11493539810180664,
      "step": 16660
    },
    {
      "epoch": 0.0001016845703125,
      "step": 16660,
      "training_step_time": 0.4873647689819336
    },
    {
      "epoch": 0.000101690673828125,
      "model_forward_time": 0.11577916145324707,
      "step": 16661
    },
    {
      "epoch": 0.000101690673828125,
      "step": 16661,
      "training_step_time": 0.44222450256347656
    },
    {
      "epoch": 0.00010169677734375,
      "model_forward_time": 0.11496281623840332,
      "step": 16662
    },
    {
      "epoch": 0.00010169677734375,
      "step": 16662,
      "training_step_time": 0.3955078125
    },
    {
      "epoch": 0.000101702880859375,
      "model_forward_time": 0.11476588249206543,
      "step": 16663
    },
    {
      "epoch": 0.000101702880859375,
      "step": 16663,
      "training_step_time": 0.3907465934753418
    },
    {
      "epoch": 0.000101708984375,
      "model_forward_time": 0.1147916316986084,
      "step": 16664
    },
    {
      "epoch": 0.000101708984375,
      "step": 16664,
      "training_step_time": 0.38896965980529785
    },
    {
      "epoch": 0.000101715087890625,
      "model_forward_time": 0.11534237861633301,
      "step": 16665
    },
    {
      "epoch": 0.000101715087890625,
      "step": 16665,
      "training_step_time": 0.5998833179473877
    },
    {
      "epoch": 0.00010172119140625,
      "model_forward_time": 0.11517858505249023,
      "step": 16666
    },
    {
      "epoch": 0.00010172119140625,
      "step": 16666,
      "training_step_time": 0.38409876823425293
    },
    {
      "epoch": 0.000101727294921875,
      "model_forward_time": 0.1152646541595459,
      "step": 16667
    },
    {
      "epoch": 0.000101727294921875,
      "step": 16667,
      "training_step_time": 0.3668649196624756
    },
    {
      "epoch": 0.0001017333984375,
      "model_forward_time": 0.11492323875427246,
      "step": 16668
    },
    {
      "epoch": 0.0001017333984375,
      "step": 16668,
      "training_step_time": 0.4401545524597168
    },
    {
      "epoch": 0.000101739501953125,
      "model_forward_time": 0.11541318893432617,
      "step": 16669
    },
    {
      "epoch": 0.000101739501953125,
      "step": 16669,
      "training_step_time": 0.48343873023986816
    },
    {
      "epoch": 0.00010174560546875,
      "grad_norm": 0.133546382188797,
      "learning_rate": 8.646729046705382e-05,
      "loss": 0.0521,
      "step": 16670
    },
    {
      "epoch": 0.00010174560546875,
      "model_forward_time": 0.11417341232299805,
      "step": 16670
    },
    {
      "epoch": 0.00010174560546875,
      "step": 16670,
      "training_step_time": 0.3819582462310791
    },
    {
      "epoch": 0.000101751708984375,
      "model_forward_time": 0.11551403999328613,
      "step": 16671
    },
    {
      "epoch": 0.000101751708984375,
      "step": 16671,
      "training_step_time": 0.5059258937835693
    },
    {
      "epoch": 0.0001017578125,
      "model_forward_time": 0.11466574668884277,
      "step": 16672
    },
    {
      "epoch": 0.0001017578125,
      "step": 16672,
      "training_step_time": 0.3921363353729248
    },
    {
      "epoch": 0.000101763916015625,
      "model_forward_time": 0.11454272270202637,
      "step": 16673
    },
    {
      "epoch": 0.000101763916015625,
      "step": 16673,
      "training_step_time": 0.5052480697631836
    },
    {
      "epoch": 0.00010177001953125,
      "model_forward_time": 0.1150052547454834,
      "step": 16674
    },
    {
      "epoch": 0.00010177001953125,
      "step": 16674,
      "training_step_time": 0.44393372535705566
    },
    {
      "epoch": 0.000101776123046875,
      "model_forward_time": 0.1148374080657959,
      "step": 16675
    },
    {
      "epoch": 0.000101776123046875,
      "step": 16675,
      "training_step_time": 0.46178317070007324
    },
    {
      "epoch": 0.0001017822265625,
      "model_forward_time": 0.11427116394042969,
      "step": 16676
    },
    {
      "epoch": 0.0001017822265625,
      "step": 16676,
      "training_step_time": 0.4272146224975586
    },
    {
      "epoch": 0.000101788330078125,
      "model_forward_time": 0.11502361297607422,
      "step": 16677
    },
    {
      "epoch": 0.000101788330078125,
      "step": 16677,
      "training_step_time": 0.45718836784362793
    },
    {
      "epoch": 0.00010179443359375,
      "model_forward_time": 0.11544990539550781,
      "step": 16678
    },
    {
      "epoch": 0.00010179443359375,
      "step": 16678,
      "training_step_time": 0.3886721134185791
    },
    {
      "epoch": 0.000101800537109375,
      "model_forward_time": 0.1146705150604248,
      "step": 16679
    },
    {
      "epoch": 0.000101800537109375,
      "step": 16679,
      "training_step_time": 0.41967248916625977
    },
    {
      "epoch": 0.000101806640625,
      "grad_norm": 0.14962278306484222,
      "learning_rate": 8.644843137107059e-05,
      "loss": 0.0498,
      "step": 16680
    },
    {
      "epoch": 0.000101806640625,
      "model_forward_time": 0.11501264572143555,
      "step": 16680
    },
    {
      "epoch": 0.000101806640625,
      "step": 16680,
      "training_step_time": 0.3907740116119385
    },
    {
      "epoch": 0.000101812744140625,
      "model_forward_time": 0.11446857452392578,
      "step": 16681
    },
    {
      "epoch": 0.000101812744140625,
      "step": 16681,
      "training_step_time": 0.3645138740539551
    },
    {
      "epoch": 0.00010181884765625,
      "model_forward_time": 0.11490821838378906,
      "step": 16682
    },
    {
      "epoch": 0.00010181884765625,
      "step": 16682,
      "training_step_time": 0.4514293670654297
    },
    {
      "epoch": 0.000101824951171875,
      "model_forward_time": 0.11469769477844238,
      "step": 16683
    },
    {
      "epoch": 0.000101824951171875,
      "step": 16683,
      "training_step_time": 0.4972245693206787
    },
    {
      "epoch": 0.0001018310546875,
      "model_forward_time": 0.11496233940124512,
      "step": 16684
    },
    {
      "epoch": 0.0001018310546875,
      "step": 16684,
      "training_step_time": 0.3933122158050537
    },
    {
      "epoch": 0.000101837158203125,
      "model_forward_time": 0.11489534378051758,
      "step": 16685
    },
    {
      "epoch": 0.000101837158203125,
      "step": 16685,
      "training_step_time": 0.38568973541259766
    },
    {
      "epoch": 0.00010184326171875,
      "model_forward_time": 0.1150960922241211,
      "step": 16686
    },
    {
      "epoch": 0.00010184326171875,
      "step": 16686,
      "training_step_time": 0.3910839557647705
    },
    {
      "epoch": 0.000101849365234375,
      "model_forward_time": 0.11503171920776367,
      "step": 16687
    },
    {
      "epoch": 0.000101849365234375,
      "step": 16687,
      "training_step_time": 0.3886909484863281
    },
    {
      "epoch": 0.00010185546875,
      "model_forward_time": 0.11546683311462402,
      "step": 16688
    },
    {
      "epoch": 0.00010185546875,
      "step": 16688,
      "training_step_time": 0.3881723880767822
    },
    {
      "epoch": 0.000101861572265625,
      "model_forward_time": 0.1152493953704834,
      "step": 16689
    },
    {
      "epoch": 0.000101861572265625,
      "step": 16689,
      "training_step_time": 0.508547306060791
    },
    {
      "epoch": 0.00010186767578125,
      "grad_norm": 0.1470479667186737,
      "learning_rate": 8.642956120301626e-05,
      "loss": 0.0504,
      "step": 16690
    },
    {
      "epoch": 0.00010186767578125,
      "model_forward_time": 0.11450004577636719,
      "step": 16690
    },
    {
      "epoch": 0.00010186767578125,
      "step": 16690,
      "training_step_time": 0.4720780849456787
    },
    {
      "epoch": 0.000101873779296875,
      "model_forward_time": 0.11469054222106934,
      "step": 16691
    },
    {
      "epoch": 0.000101873779296875,
      "step": 16691,
      "training_step_time": 0.38953542709350586
    },
    {
      "epoch": 0.0001018798828125,
      "model_forward_time": 0.1158297061920166,
      "step": 16692
    },
    {
      "epoch": 0.0001018798828125,
      "step": 16692,
      "training_step_time": 0.3926715850830078
    },
    {
      "epoch": 0.000101885986328125,
      "model_forward_time": 0.11540102958679199,
      "step": 16693
    },
    {
      "epoch": 0.000101885986328125,
      "step": 16693,
      "training_step_time": 0.4021146297454834
    },
    {
      "epoch": 0.00010189208984375,
      "model_forward_time": 0.11501049995422363,
      "step": 16694
    },
    {
      "epoch": 0.00010189208984375,
      "step": 16694,
      "training_step_time": 0.3845937252044678
    },
    {
      "epoch": 0.000101898193359375,
      "model_forward_time": 0.1148214340209961,
      "step": 16695
    },
    {
      "epoch": 0.000101898193359375,
      "step": 16695,
      "training_step_time": 0.6008179187774658
    },
    {
      "epoch": 0.000101904296875,
      "model_forward_time": 0.1153571605682373,
      "step": 16696
    },
    {
      "epoch": 0.000101904296875,
      "step": 16696,
      "training_step_time": 0.4451019763946533
    },
    {
      "epoch": 0.000101910400390625,
      "model_forward_time": 0.11478710174560547,
      "step": 16697
    },
    {
      "epoch": 0.000101910400390625,
      "step": 16697,
      "training_step_time": 0.45760607719421387
    },
    {
      "epoch": 0.00010191650390625,
      "model_forward_time": 0.11511015892028809,
      "step": 16698
    },
    {
      "epoch": 0.00010191650390625,
      "step": 16698,
      "training_step_time": 0.39203500747680664
    },
    {
      "epoch": 0.000101922607421875,
      "model_forward_time": 0.1147458553314209,
      "step": 16699
    },
    {
      "epoch": 0.000101922607421875,
      "step": 16699,
      "training_step_time": 0.39618515968322754
    },
    {
      "epoch": 0.0001019287109375,
      "grad_norm": 0.17459285259246826,
      "learning_rate": 8.641067996862311e-05,
      "loss": 0.0559,
      "step": 16700
    },
    {
      "epoch": 0.0001019287109375,
      "model_forward_time": 0.11460232734680176,
      "step": 16700
    },
    {
      "epoch": 0.0001019287109375,
      "step": 16700,
      "training_step_time": 0.40067124366760254
    },
    {
      "epoch": 0.000101934814453125,
      "model_forward_time": 0.1149289608001709,
      "step": 16701
    },
    {
      "epoch": 0.000101934814453125,
      "step": 16701,
      "training_step_time": 0.5185577869415283
    },
    {
      "epoch": 0.00010194091796875,
      "model_forward_time": 0.11496734619140625,
      "step": 16702
    },
    {
      "epoch": 0.00010194091796875,
      "step": 16702,
      "training_step_time": 0.48168253898620605
    },
    {
      "epoch": 0.000101947021484375,
      "model_forward_time": 0.11489653587341309,
      "step": 16703
    },
    {
      "epoch": 0.000101947021484375,
      "step": 16703,
      "training_step_time": 0.46654653549194336
    },
    {
      "epoch": 0.000101953125,
      "model_forward_time": 0.11443305015563965,
      "step": 16704
    },
    {
      "epoch": 0.000101953125,
      "step": 16704,
      "training_step_time": 0.39392948150634766
    },
    {
      "epoch": 0.000101959228515625,
      "model_forward_time": 0.11480927467346191,
      "step": 16705
    },
    {
      "epoch": 0.000101959228515625,
      "step": 16705,
      "training_step_time": 0.3896164894104004
    },
    {
      "epoch": 0.00010196533203125,
      "model_forward_time": 0.11495614051818848,
      "step": 16706
    },
    {
      "epoch": 0.00010196533203125,
      "step": 16706,
      "training_step_time": 0.3908257484436035
    },
    {
      "epoch": 0.000101971435546875,
      "model_forward_time": 0.11537909507751465,
      "step": 16707
    },
    {
      "epoch": 0.000101971435546875,
      "step": 16707,
      "training_step_time": 0.5685238838195801
    },
    {
      "epoch": 0.0001019775390625,
      "model_forward_time": 0.1149909496307373,
      "step": 16708
    },
    {
      "epoch": 0.0001019775390625,
      "step": 16708,
      "training_step_time": 0.39020252227783203
    },
    {
      "epoch": 0.000101983642578125,
      "model_forward_time": 0.11552286148071289,
      "step": 16709
    },
    {
      "epoch": 0.000101983642578125,
      "step": 16709,
      "training_step_time": 0.3930046558380127
    },
    {
      "epoch": 0.00010198974609375,
      "grad_norm": 0.1471719592809677,
      "learning_rate": 8.639178767362676e-05,
      "loss": 0.0469,
      "step": 16710
    },
    {
      "epoch": 0.00010198974609375,
      "model_forward_time": 0.11538267135620117,
      "step": 16710
    },
    {
      "epoch": 0.00010198974609375,
      "step": 16710,
      "training_step_time": 0.44031691551208496
    },
    {
      "epoch": 0.000101995849609375,
      "model_forward_time": 0.1159970760345459,
      "step": 16711
    },
    {
      "epoch": 0.000101995849609375,
      "step": 16711,
      "training_step_time": 0.4916672706604004
    },
    {
      "epoch": 0.000102001953125,
      "model_forward_time": 0.11480093002319336,
      "step": 16712
    },
    {
      "epoch": 0.000102001953125,
      "step": 16712,
      "training_step_time": 0.38863205909729004
    },
    {
      "epoch": 0.000102008056640625,
      "model_forward_time": 0.11542296409606934,
      "step": 16713
    },
    {
      "epoch": 0.000102008056640625,
      "step": 16713,
      "training_step_time": 0.5622310638427734
    },
    {
      "epoch": 0.00010201416015625,
      "model_forward_time": 0.11447453498840332,
      "step": 16714
    },
    {
      "epoch": 0.00010201416015625,
      "step": 16714,
      "training_step_time": 0.3863799571990967
    },
    {
      "epoch": 0.000102020263671875,
      "model_forward_time": 0.1146395206451416,
      "step": 16715
    },
    {
      "epoch": 0.000102020263671875,
      "step": 16715,
      "training_step_time": 0.39688658714294434
    },
    {
      "epoch": 0.0001020263671875,
      "model_forward_time": 0.11478519439697266,
      "step": 16716
    },
    {
      "epoch": 0.0001020263671875,
      "step": 16716,
      "training_step_time": 0.4687042236328125
    },
    {
      "epoch": 0.000102032470703125,
      "model_forward_time": 0.11590981483459473,
      "step": 16717
    },
    {
      "epoch": 0.000102032470703125,
      "step": 16717,
      "training_step_time": 0.4692671298980713
    },
    {
      "epoch": 0.00010203857421875,
      "model_forward_time": 0.11435818672180176,
      "step": 16718
    },
    {
      "epoch": 0.00010203857421875,
      "step": 16718,
      "training_step_time": 0.38579797744750977
    },
    {
      "epoch": 0.000102044677734375,
      "model_forward_time": 0.11461687088012695,
      "step": 16719
    },
    {
      "epoch": 0.000102044677734375,
      "step": 16719,
      "training_step_time": 0.5490946769714355
    },
    {
      "epoch": 0.00010205078125,
      "grad_norm": 0.17613086104393005,
      "learning_rate": 8.637288432376618e-05,
      "loss": 0.0537,
      "step": 16720
    },
    {
      "epoch": 0.00010205078125,
      "model_forward_time": 0.11522483825683594,
      "step": 16720
    },
    {
      "epoch": 0.00010205078125,
      "step": 16720,
      "training_step_time": 0.4013020992279053
    },
    {
      "epoch": 0.000102056884765625,
      "model_forward_time": 0.11475276947021484,
      "step": 16721
    },
    {
      "epoch": 0.000102056884765625,
      "step": 16721,
      "training_step_time": 0.39690637588500977
    },
    {
      "epoch": 0.00010206298828125,
      "model_forward_time": 0.1151895523071289,
      "step": 16722
    },
    {
      "epoch": 0.00010206298828125,
      "step": 16722,
      "training_step_time": 0.3978288173675537
    },
    {
      "epoch": 0.000102069091796875,
      "model_forward_time": 0.11499810218811035,
      "step": 16723
    },
    {
      "epoch": 0.000102069091796875,
      "step": 16723,
      "training_step_time": 0.39377355575561523
    },
    {
      "epoch": 0.0001020751953125,
      "model_forward_time": 0.1154794692993164,
      "step": 16724
    },
    {
      "epoch": 0.0001020751953125,
      "step": 16724,
      "training_step_time": 0.4256293773651123
    },
    {
      "epoch": 0.000102081298828125,
      "model_forward_time": 0.1152198314666748,
      "step": 16725
    },
    {
      "epoch": 0.000102081298828125,
      "step": 16725,
      "training_step_time": 0.6494834423065186
    },
    {
      "epoch": 0.00010208740234375,
      "model_forward_time": 0.11469531059265137,
      "step": 16726
    },
    {
      "epoch": 0.00010208740234375,
      "step": 16726,
      "training_step_time": 0.3821690082550049
    },
    {
      "epoch": 0.000102093505859375,
      "model_forward_time": 0.11485147476196289,
      "step": 16727
    },
    {
      "epoch": 0.000102093505859375,
      "step": 16727,
      "training_step_time": 0.39252233505249023
    },
    {
      "epoch": 0.000102099609375,
      "model_forward_time": 0.1151876449584961,
      "step": 16728
    },
    {
      "epoch": 0.000102099609375,
      "step": 16728,
      "training_step_time": 0.3965482711791992
    },
    {
      "epoch": 0.000102105712890625,
      "model_forward_time": 0.11464262008666992,
      "step": 16729
    },
    {
      "epoch": 0.000102105712890625,
      "step": 16729,
      "training_step_time": 0.4693765640258789
    },
    {
      "epoch": 0.00010211181640625,
      "grad_norm": 0.13394437730312347,
      "learning_rate": 8.635396992478371e-05,
      "loss": 0.046,
      "step": 16730
    },
    {
      "epoch": 0.00010211181640625,
      "model_forward_time": 0.1147470474243164,
      "step": 16730
    },
    {
      "epoch": 0.00010211181640625,
      "step": 16730,
      "training_step_time": 0.4579885005950928
    },
    {
      "epoch": 0.000102117919921875,
      "model_forward_time": 0.11551809310913086,
      "step": 16731
    },
    {
      "epoch": 0.000102117919921875,
      "step": 16731,
      "training_step_time": 0.4670982360839844
    },
    {
      "epoch": 0.0001021240234375,
      "model_forward_time": 0.11467957496643066,
      "step": 16732
    },
    {
      "epoch": 0.0001021240234375,
      "step": 16732,
      "training_step_time": 0.38840198516845703
    },
    {
      "epoch": 0.000102130126953125,
      "model_forward_time": 0.11463427543640137,
      "step": 16733
    },
    {
      "epoch": 0.000102130126953125,
      "step": 16733,
      "training_step_time": 0.3927185535430908
    },
    {
      "epoch": 0.00010213623046875,
      "model_forward_time": 0.11512374877929688,
      "step": 16734
    },
    {
      "epoch": 0.00010213623046875,
      "step": 16734,
      "training_step_time": 0.3990137577056885
    },
    {
      "epoch": 0.000102142333984375,
      "model_forward_time": 0.1143031120300293,
      "step": 16735
    },
    {
      "epoch": 0.000102142333984375,
      "step": 16735,
      "training_step_time": 0.3929100036621094
    },
    {
      "epoch": 0.0001021484375,
      "model_forward_time": 0.11530184745788574,
      "step": 16736
    },
    {
      "epoch": 0.0001021484375,
      "step": 16736,
      "training_step_time": 0.39005041122436523
    },
    {
      "epoch": 0.000102154541015625,
      "model_forward_time": 0.1155238151550293,
      "step": 16737
    },
    {
      "epoch": 0.000102154541015625,
      "step": 16737,
      "training_step_time": 0.6595139503479004
    },
    {
      "epoch": 0.00010216064453125,
      "model_forward_time": 0.11505389213562012,
      "step": 16738
    },
    {
      "epoch": 0.00010216064453125,
      "step": 16738,
      "training_step_time": 0.4519026279449463
    },
    {
      "epoch": 0.000102166748046875,
      "model_forward_time": 0.1166379451751709,
      "step": 16739
    },
    {
      "epoch": 0.000102166748046875,
      "step": 16739,
      "training_step_time": 0.45423340797424316
    },
    {
      "epoch": 0.0001021728515625,
      "grad_norm": 0.1554337441921234,
      "learning_rate": 8.633504448242505e-05,
      "loss": 0.0468,
      "step": 16740
    },
    {
      "epoch": 0.0001021728515625,
      "model_forward_time": 0.11506938934326172,
      "step": 16740
    },
    {
      "epoch": 0.0001021728515625,
      "step": 16740,
      "training_step_time": 0.3798856735229492
    },
    {
      "epoch": 0.000102178955078125,
      "model_forward_time": 0.11476016044616699,
      "step": 16741
    },
    {
      "epoch": 0.000102178955078125,
      "step": 16741,
      "training_step_time": 0.38933348655700684
    },
    {
      "epoch": 0.00010218505859375,
      "model_forward_time": 0.11457180976867676,
      "step": 16742
    },
    {
      "epoch": 0.00010218505859375,
      "step": 16742,
      "training_step_time": 0.4043447971343994
    },
    {
      "epoch": 0.000102191162109375,
      "model_forward_time": 0.11505007743835449,
      "step": 16743
    },
    {
      "epoch": 0.000102191162109375,
      "step": 16743,
      "training_step_time": 0.4854466915130615
    },
    {
      "epoch": 0.000102197265625,
      "model_forward_time": 0.11475563049316406,
      "step": 16744
    },
    {
      "epoch": 0.000102197265625,
      "step": 16744,
      "training_step_time": 0.4836289882659912
    },
    {
      "epoch": 0.000102203369140625,
      "model_forward_time": 0.11444258689880371,
      "step": 16745
    },
    {
      "epoch": 0.000102203369140625,
      "step": 16745,
      "training_step_time": 0.41294169425964355
    },
    {
      "epoch": 0.00010220947265625,
      "model_forward_time": 0.11478519439697266,
      "step": 16746
    },
    {
      "epoch": 0.00010220947265625,
      "step": 16746,
      "training_step_time": 0.4024624824523926
    },
    {
      "epoch": 0.000102215576171875,
      "model_forward_time": 0.11484670639038086,
      "step": 16747
    },
    {
      "epoch": 0.000102215576171875,
      "step": 16747,
      "training_step_time": 0.39757251739501953
    },
    {
      "epoch": 0.0001022216796875,
      "model_forward_time": 0.11458086967468262,
      "step": 16748
    },
    {
      "epoch": 0.0001022216796875,
      "step": 16748,
      "training_step_time": 0.3966960906982422
    },
    {
      "epoch": 0.000102227783203125,
      "model_forward_time": 0.1156306266784668,
      "step": 16749
    },
    {
      "epoch": 0.000102227783203125,
      "step": 16749,
      "training_step_time": 0.4863147735595703
    },
    {
      "epoch": 0.00010223388671875,
      "grad_norm": 0.18090574443340302,
      "learning_rate": 8.631610800243926e-05,
      "loss": 0.0516,
      "step": 16750
    },
    {
      "epoch": 0.00010223388671875,
      "model_forward_time": 0.11498093605041504,
      "step": 16750
    },
    {
      "epoch": 0.00010223388671875,
      "step": 16750,
      "training_step_time": 0.4053008556365967
    },
    {
      "epoch": 0.000102239990234375,
      "model_forward_time": 0.11475396156311035,
      "step": 16751
    },
    {
      "epoch": 0.000102239990234375,
      "step": 16751,
      "training_step_time": 0.4027249813079834
    },
    {
      "epoch": 0.00010224609375,
      "model_forward_time": 0.11543583869934082,
      "step": 16752
    },
    {
      "epoch": 0.00010224609375,
      "step": 16752,
      "training_step_time": 0.5071001052856445
    },
    {
      "epoch": 0.000102252197265625,
      "model_forward_time": 0.11524176597595215,
      "step": 16753
    },
    {
      "epoch": 0.000102252197265625,
      "step": 16753,
      "training_step_time": 0.43268847465515137
    },
    {
      "epoch": 0.00010225830078125,
      "model_forward_time": 0.11524033546447754,
      "step": 16754
    },
    {
      "epoch": 0.00010225830078125,
      "step": 16754,
      "training_step_time": 0.509361743927002
    },
    {
      "epoch": 0.000102264404296875,
      "model_forward_time": 0.11434578895568848,
      "step": 16755
    },
    {
      "epoch": 0.000102264404296875,
      "step": 16755,
      "training_step_time": 0.46266651153564453
    },
    {
      "epoch": 0.0001022705078125,
      "model_forward_time": 0.11530065536499023,
      "step": 16756
    },
    {
      "epoch": 0.0001022705078125,
      "step": 16756,
      "training_step_time": 0.46054673194885254
    },
    {
      "epoch": 0.000102276611328125,
      "model_forward_time": 0.11456775665283203,
      "step": 16757
    },
    {
      "epoch": 0.000102276611328125,
      "step": 16757,
      "training_step_time": 0.507124662399292
    },
    {
      "epoch": 0.00010228271484375,
      "model_forward_time": 0.11416244506835938,
      "step": 16758
    },
    {
      "epoch": 0.00010228271484375,
      "step": 16758,
      "training_step_time": 0.4465517997741699
    },
    {
      "epoch": 0.000102288818359375,
      "model_forward_time": 0.11440134048461914,
      "step": 16759
    },
    {
      "epoch": 0.000102288818359375,
      "step": 16759,
      "training_step_time": 0.43943214416503906
    },
    {
      "epoch": 0.000102294921875,
      "grad_norm": 0.19779980182647705,
      "learning_rate": 8.629716049057872e-05,
      "loss": 0.0536,
      "step": 16760
    },
    {
      "epoch": 0.000102294921875,
      "model_forward_time": 0.11437678337097168,
      "step": 16760
    },
    {
      "epoch": 0.000102294921875,
      "step": 16760,
      "training_step_time": 0.3896365165710449
    },
    {
      "epoch": 0.000102301025390625,
      "model_forward_time": 0.11458301544189453,
      "step": 16761
    },
    {
      "epoch": 0.000102301025390625,
      "step": 16761,
      "training_step_time": 0.38175106048583984
    },
    {
      "epoch": 0.00010230712890625,
      "model_forward_time": 0.11548852920532227,
      "step": 16762
    },
    {
      "epoch": 0.00010230712890625,
      "step": 16762,
      "training_step_time": 0.39397764205932617
    },
    {
      "epoch": 0.000102313232421875,
      "model_forward_time": 0.11487269401550293,
      "step": 16763
    },
    {
      "epoch": 0.000102313232421875,
      "step": 16763,
      "training_step_time": 0.7068874835968018
    },
    {
      "epoch": 0.0001023193359375,
      "model_forward_time": 0.11494755744934082,
      "step": 16764
    },
    {
      "epoch": 0.0001023193359375,
      "step": 16764,
      "training_step_time": 0.38507890701293945
    },
    {
      "epoch": 0.000102325439453125,
      "model_forward_time": 0.11435198783874512,
      "step": 16765
    },
    {
      "epoch": 0.000102325439453125,
      "step": 16765,
      "training_step_time": 0.36415648460388184
    },
    {
      "epoch": 0.00010233154296875,
      "model_forward_time": 0.11433625221252441,
      "step": 16766
    },
    {
      "epoch": 0.00010233154296875,
      "step": 16766,
      "training_step_time": 0.44123101234436035
    },
    {
      "epoch": 0.000102337646484375,
      "model_forward_time": 0.1146385669708252,
      "step": 16767
    },
    {
      "epoch": 0.000102337646484375,
      "step": 16767,
      "training_step_time": 0.47342443466186523
    },
    {
      "epoch": 0.00010234375,
      "model_forward_time": 0.1144099235534668,
      "step": 16768
    },
    {
      "epoch": 0.00010234375,
      "step": 16768,
      "training_step_time": 0.4189467430114746
    },
    {
      "epoch": 0.000102349853515625,
      "model_forward_time": 0.11455798149108887,
      "step": 16769
    },
    {
      "epoch": 0.000102349853515625,
      "step": 16769,
      "training_step_time": 0.38885951042175293
    },
    {
      "epoch": 0.00010235595703125,
      "grad_norm": 0.19090187549591064,
      "learning_rate": 8.627820195259918e-05,
      "loss": 0.056,
      "step": 16770
    },
    {
      "epoch": 0.00010235595703125,
      "model_forward_time": 0.11571335792541504,
      "step": 16770
    },
    {
      "epoch": 0.00010235595703125,
      "step": 16770,
      "training_step_time": 0.464648962020874
    },
    {
      "epoch": 0.000102362060546875,
      "model_forward_time": 0.11524295806884766,
      "step": 16771
    },
    {
      "epoch": 0.000102362060546875,
      "step": 16771,
      "training_step_time": 0.4709746837615967
    },
    {
      "epoch": 0.0001023681640625,
      "model_forward_time": 0.1150672435760498,
      "step": 16772
    },
    {
      "epoch": 0.0001023681640625,
      "step": 16772,
      "training_step_time": 0.4036221504211426
    },
    {
      "epoch": 0.000102374267578125,
      "model_forward_time": 0.11467719078063965,
      "step": 16773
    },
    {
      "epoch": 0.000102374267578125,
      "step": 16773,
      "training_step_time": 0.41820478439331055
    },
    {
      "epoch": 0.00010238037109375,
      "model_forward_time": 0.11510086059570312,
      "step": 16774
    },
    {
      "epoch": 0.00010238037109375,
      "step": 16774,
      "training_step_time": 0.39235472679138184
    },
    {
      "epoch": 0.000102386474609375,
      "model_forward_time": 0.11448240280151367,
      "step": 16775
    },
    {
      "epoch": 0.000102386474609375,
      "step": 16775,
      "training_step_time": 0.4028325080871582
    },
    {
      "epoch": 0.000102392578125,
      "model_forward_time": 0.11607623100280762,
      "step": 16776
    },
    {
      "epoch": 0.000102392578125,
      "step": 16776,
      "training_step_time": 0.3955729007720947
    },
    {
      "epoch": 0.000102398681640625,
      "model_forward_time": 0.11507010459899902,
      "step": 16777
    },
    {
      "epoch": 0.000102398681640625,
      "step": 16777,
      "training_step_time": 0.4075746536254883
    },
    {
      "epoch": 0.00010240478515625,
      "model_forward_time": 0.11516880989074707,
      "step": 16778
    },
    {
      "epoch": 0.00010240478515625,
      "step": 16778,
      "training_step_time": 0.39150071144104004
    },
    {
      "epoch": 0.000102410888671875,
      "model_forward_time": 0.11565685272216797,
      "step": 16779
    },
    {
      "epoch": 0.000102410888671875,
      "step": 16779,
      "training_step_time": 0.39060020446777344
    },
    {
      "epoch": 0.0001024169921875,
      "grad_norm": 0.1606638878583908,
      "learning_rate": 8.625923239425978e-05,
      "loss": 0.0508,
      "step": 16780
    },
    {
      "epoch": 0.0001024169921875,
      "model_forward_time": 0.1151113510131836,
      "step": 16780
    },
    {
      "epoch": 0.0001024169921875,
      "step": 16780,
      "training_step_time": 0.48159360885620117
    },
    {
      "epoch": 0.000102423095703125,
      "model_forward_time": 0.11505675315856934,
      "step": 16781
    },
    {
      "epoch": 0.000102423095703125,
      "step": 16781,
      "training_step_time": 0.6439230442047119
    },
    {
      "epoch": 0.00010242919921875,
      "model_forward_time": 0.11458063125610352,
      "step": 16782
    },
    {
      "epoch": 0.00010242919921875,
      "step": 16782,
      "training_step_time": 0.4072277545928955
    },
    {
      "epoch": 0.000102435302734375,
      "model_forward_time": 0.1155247688293457,
      "step": 16783
    },
    {
      "epoch": 0.000102435302734375,
      "step": 16783,
      "training_step_time": 0.3924870491027832
    },
    {
      "epoch": 0.00010244140625,
      "model_forward_time": 0.11465239524841309,
      "step": 16784
    },
    {
      "epoch": 0.00010244140625,
      "step": 16784,
      "training_step_time": 0.4805448055267334
    },
    {
      "epoch": 0.000102447509765625,
      "model_forward_time": 0.11404848098754883,
      "step": 16785
    },
    {
      "epoch": 0.000102447509765625,
      "step": 16785,
      "training_step_time": 0.4266993999481201
    },
    {
      "epoch": 0.00010245361328125,
      "model_forward_time": 0.11486339569091797,
      "step": 16786
    },
    {
      "epoch": 0.00010245361328125,
      "step": 16786,
      "training_step_time": 0.40722179412841797
    },
    {
      "epoch": 0.000102459716796875,
      "model_forward_time": 0.11551880836486816,
      "step": 16787
    },
    {
      "epoch": 0.000102459716796875,
      "step": 16787,
      "training_step_time": 0.49066162109375
    },
    {
      "epoch": 0.0001024658203125,
      "model_forward_time": 0.11495494842529297,
      "step": 16788
    },
    {
      "epoch": 0.0001024658203125,
      "step": 16788,
      "training_step_time": 0.3912620544433594
    },
    {
      "epoch": 0.000102471923828125,
      "model_forward_time": 0.11516666412353516,
      "step": 16789
    },
    {
      "epoch": 0.000102471923828125,
      "step": 16789,
      "training_step_time": 0.46126413345336914
    },
    {
      "epoch": 0.00010247802734375,
      "grad_norm": 0.2519115209579468,
      "learning_rate": 8.624025182132292e-05,
      "loss": 0.0528,
      "step": 16790
    },
    {
      "epoch": 0.00010247802734375,
      "model_forward_time": 0.11502528190612793,
      "step": 16790
    },
    {
      "epoch": 0.00010247802734375,
      "step": 16790,
      "training_step_time": 0.39414215087890625
    },
    {
      "epoch": 0.000102484130859375,
      "model_forward_time": 0.11484217643737793,
      "step": 16791
    },
    {
      "epoch": 0.000102484130859375,
      "step": 16791,
      "training_step_time": 0.4322826862335205
    },
    {
      "epoch": 0.000102490234375,
      "model_forward_time": 0.11500835418701172,
      "step": 16792
    },
    {
      "epoch": 0.000102490234375,
      "step": 16792,
      "training_step_time": 0.4788234233856201
    },
    {
      "epoch": 0.000102496337890625,
      "model_forward_time": 0.11453485488891602,
      "step": 16793
    },
    {
      "epoch": 0.000102496337890625,
      "step": 16793,
      "training_step_time": 0.36690235137939453
    },
    {
      "epoch": 0.00010250244140625,
      "model_forward_time": 0.11477112770080566,
      "step": 16794
    },
    {
      "epoch": 0.00010250244140625,
      "step": 16794,
      "training_step_time": 0.4371829032897949
    },
    {
      "epoch": 0.000102508544921875,
      "model_forward_time": 0.11447834968566895,
      "step": 16795
    },
    {
      "epoch": 0.000102508544921875,
      "step": 16795,
      "training_step_time": 0.4116251468658447
    },
    {
      "epoch": 0.0001025146484375,
      "model_forward_time": 0.11386537551879883,
      "step": 16796
    },
    {
      "epoch": 0.0001025146484375,
      "step": 16796,
      "training_step_time": 0.37910962104797363
    },
    {
      "epoch": 0.000102520751953125,
      "model_forward_time": 0.11553382873535156,
      "step": 16797
    },
    {
      "epoch": 0.000102520751953125,
      "step": 16797,
      "training_step_time": 0.3864452838897705
    },
    {
      "epoch": 0.00010252685546875,
      "model_forward_time": 0.11527800559997559,
      "step": 16798
    },
    {
      "epoch": 0.00010252685546875,
      "step": 16798,
      "training_step_time": 0.47087621688842773
    },
    {
      "epoch": 0.000102532958984375,
      "model_forward_time": 0.11496233940124512,
      "step": 16799
    },
    {
      "epoch": 0.000102532958984375,
      "step": 16799,
      "training_step_time": 0.5081391334533691
    },
    {
      "epoch": 0.0001025390625,
      "grad_norm": 0.13031607866287231,
      "learning_rate": 8.622126023955446e-05,
      "loss": 0.05,
      "step": 16800
    },
    {
      "epoch": 0.0001025390625,
      "model_forward_time": 0.11479997634887695,
      "step": 16800
    },
    {
      "epoch": 0.0001025390625,
      "step": 16800,
      "training_step_time": 0.4009523391723633
    },
    {
      "epoch": 0.000102545166015625,
      "model_forward_time": 0.11567211151123047,
      "step": 16801
    },
    {
      "epoch": 0.000102545166015625,
      "step": 16801,
      "training_step_time": 0.44735121726989746
    },
    {
      "epoch": 0.00010255126953125,
      "model_forward_time": 0.11499404907226562,
      "step": 16802
    },
    {
      "epoch": 0.00010255126953125,
      "step": 16802,
      "training_step_time": 0.385894775390625
    },
    {
      "epoch": 0.000102557373046875,
      "model_forward_time": 0.11489462852478027,
      "step": 16803
    },
    {
      "epoch": 0.000102557373046875,
      "step": 16803,
      "training_step_time": 0.3888125419616699
    },
    {
      "epoch": 0.0001025634765625,
      "model_forward_time": 0.11514806747436523,
      "step": 16804
    },
    {
      "epoch": 0.0001025634765625,
      "step": 16804,
      "training_step_time": 0.3892781734466553
    },
    {
      "epoch": 0.000102569580078125,
      "model_forward_time": 0.1152489185333252,
      "step": 16805
    },
    {
      "epoch": 0.000102569580078125,
      "step": 16805,
      "training_step_time": 0.8526835441589355
    },
    {
      "epoch": 0.00010257568359375,
      "model_forward_time": 0.11438751220703125,
      "step": 16806
    },
    {
      "epoch": 0.00010257568359375,
      "step": 16806,
      "training_step_time": 0.3877418041229248
    },
    {
      "epoch": 0.000102581787109375,
      "model_forward_time": 0.11419963836669922,
      "step": 16807
    },
    {
      "epoch": 0.000102581787109375,
      "step": 16807,
      "training_step_time": 0.46753430366516113
    },
    {
      "epoch": 0.000102587890625,
      "model_forward_time": 0.1142432689666748,
      "step": 16808
    },
    {
      "epoch": 0.000102587890625,
      "step": 16808,
      "training_step_time": 0.46083998680114746
    },
    {
      "epoch": 0.000102593994140625,
      "model_forward_time": 0.11434149742126465,
      "step": 16809
    },
    {
      "epoch": 0.000102593994140625,
      "step": 16809,
      "training_step_time": 0.4677889347076416
    },
    {
      "epoch": 0.00010260009765625,
      "grad_norm": 0.1760137528181076,
      "learning_rate": 8.620225765472348e-05,
      "loss": 0.056,
      "step": 16810
    },
    {
      "epoch": 0.00010260009765625,
      "model_forward_time": 0.11381053924560547,
      "step": 16810
    },
    {
      "epoch": 0.00010260009765625,
      "step": 16810,
      "training_step_time": 0.3928091526031494
    },
    {
      "epoch": 0.000102606201171875,
      "model_forward_time": 0.11499810218811035,
      "step": 16811
    },
    {
      "epoch": 0.000102606201171875,
      "step": 16811,
      "training_step_time": 0.47492432594299316
    },
    {
      "epoch": 0.0001026123046875,
      "model_forward_time": 0.11561417579650879,
      "step": 16812
    },
    {
      "epoch": 0.0001026123046875,
      "step": 16812,
      "training_step_time": 0.5045123100280762
    },
    {
      "epoch": 0.000102618408203125,
      "model_forward_time": 0.1148371696472168,
      "step": 16813
    },
    {
      "epoch": 0.000102618408203125,
      "step": 16813,
      "training_step_time": 0.44807004928588867
    },
    {
      "epoch": 0.00010262451171875,
      "model_forward_time": 0.11472535133361816,
      "step": 16814
    },
    {
      "epoch": 0.00010262451171875,
      "step": 16814,
      "training_step_time": 0.4459073543548584
    },
    {
      "epoch": 0.000102630615234375,
      "model_forward_time": 0.11458468437194824,
      "step": 16815
    },
    {
      "epoch": 0.000102630615234375,
      "step": 16815,
      "training_step_time": 0.4362621307373047
    },
    {
      "epoch": 0.00010263671875,
      "model_forward_time": 0.11465692520141602,
      "step": 16816
    },
    {
      "epoch": 0.00010263671875,
      "step": 16816,
      "training_step_time": 0.3880455493927002
    },
    {
      "epoch": 0.000102642822265625,
      "model_forward_time": 0.11473703384399414,
      "step": 16817
    },
    {
      "epoch": 0.000102642822265625,
      "step": 16817,
      "training_step_time": 0.3824150562286377
    },
    {
      "epoch": 0.00010264892578125,
      "model_forward_time": 0.11486053466796875,
      "step": 16818
    },
    {
      "epoch": 0.00010264892578125,
      "step": 16818,
      "training_step_time": 0.40073490142822266
    },
    {
      "epoch": 0.000102655029296875,
      "model_forward_time": 0.11497759819030762,
      "step": 16819
    },
    {
      "epoch": 0.000102655029296875,
      "step": 16819,
      "training_step_time": 0.38919615745544434
    },
    {
      "epoch": 0.0001026611328125,
      "grad_norm": 0.17522001266479492,
      "learning_rate": 8.61832440726025e-05,
      "loss": 0.053,
      "step": 16820
    },
    {
      "epoch": 0.0001026611328125,
      "model_forward_time": 0.11528754234313965,
      "step": 16820
    },
    {
      "epoch": 0.0001026611328125,
      "step": 16820,
      "training_step_time": 0.39728546142578125
    },
    {
      "epoch": 0.000102667236328125,
      "model_forward_time": 0.11551594734191895,
      "step": 16821
    },
    {
      "epoch": 0.000102667236328125,
      "step": 16821,
      "training_step_time": 0.47837066650390625
    },
    {
      "epoch": 0.00010267333984375,
      "model_forward_time": 0.1157217025756836,
      "step": 16822
    },
    {
      "epoch": 0.00010267333984375,
      "step": 16822,
      "training_step_time": 0.47701144218444824
    },
    {
      "epoch": 0.000102679443359375,
      "model_forward_time": 0.11579442024230957,
      "step": 16823
    },
    {
      "epoch": 0.000102679443359375,
      "step": 16823,
      "training_step_time": 0.485015869140625
    },
    {
      "epoch": 0.000102685546875,
      "model_forward_time": 0.11518096923828125,
      "step": 16824
    },
    {
      "epoch": 0.000102685546875,
      "step": 16824,
      "training_step_time": 0.4453392028808594
    },
    {
      "epoch": 0.000102691650390625,
      "model_forward_time": 0.11545109748840332,
      "step": 16825
    },
    {
      "epoch": 0.000102691650390625,
      "step": 16825,
      "training_step_time": 0.39020776748657227
    },
    {
      "epoch": 0.00010269775390625,
      "model_forward_time": 0.11538434028625488,
      "step": 16826
    },
    {
      "epoch": 0.00010269775390625,
      "step": 16826,
      "training_step_time": 0.41222524642944336
    },
    {
      "epoch": 0.000102703857421875,
      "model_forward_time": 0.11504387855529785,
      "step": 16827
    },
    {
      "epoch": 0.000102703857421875,
      "step": 16827,
      "training_step_time": 0.48993778228759766
    },
    {
      "epoch": 0.0001027099609375,
      "model_forward_time": 0.11538910865783691,
      "step": 16828
    },
    {
      "epoch": 0.0001027099609375,
      "step": 16828,
      "training_step_time": 0.44231724739074707
    },
    {
      "epoch": 0.000102716064453125,
      "model_forward_time": 0.11677193641662598,
      "step": 16829
    },
    {
      "epoch": 0.000102716064453125,
      "step": 16829,
      "training_step_time": 0.4539492130279541
    },
    {
      "epoch": 0.00010272216796875,
      "grad_norm": 0.16528216004371643,
      "learning_rate": 8.616421949896734e-05,
      "loss": 0.0496,
      "step": 16830
    },
    {
      "epoch": 0.00010272216796875,
      "model_forward_time": 0.11552786827087402,
      "step": 16830
    },
    {
      "epoch": 0.00010272216796875,
      "step": 16830,
      "training_step_time": 0.5179104804992676
    },
    {
      "epoch": 0.000102728271484375,
      "model_forward_time": 0.11510372161865234,
      "step": 16831
    },
    {
      "epoch": 0.000102728271484375,
      "step": 16831,
      "training_step_time": 0.4173552989959717
    },
    {
      "epoch": 0.000102734375,
      "model_forward_time": 0.11448001861572266,
      "step": 16832
    },
    {
      "epoch": 0.000102734375,
      "step": 16832,
      "training_step_time": 0.3741750717163086
    },
    {
      "epoch": 0.000102740478515625,
      "model_forward_time": 0.11461353302001953,
      "step": 16833
    },
    {
      "epoch": 0.000102740478515625,
      "step": 16833,
      "training_step_time": 0.42635488510131836
    },
    {
      "epoch": 0.00010274658203125,
      "model_forward_time": 0.11507797241210938,
      "step": 16834
    },
    {
      "epoch": 0.00010274658203125,
      "step": 16834,
      "training_step_time": 0.40316295623779297
    },
    {
      "epoch": 0.000102752685546875,
      "model_forward_time": 0.11539244651794434,
      "step": 16835
    },
    {
      "epoch": 0.000102752685546875,
      "step": 16835,
      "training_step_time": 0.4218912124633789
    },
    {
      "epoch": 0.0001027587890625,
      "model_forward_time": 0.11515951156616211,
      "step": 16836
    },
    {
      "epoch": 0.0001027587890625,
      "step": 16836,
      "training_step_time": 0.4567148685455322
    },
    {
      "epoch": 0.000102764892578125,
      "model_forward_time": 0.11575031280517578,
      "step": 16837
    },
    {
      "epoch": 0.000102764892578125,
      "step": 16837,
      "training_step_time": 0.48792123794555664
    },
    {
      "epoch": 0.00010277099609375,
      "model_forward_time": 0.1154625415802002,
      "step": 16838
    },
    {
      "epoch": 0.00010277099609375,
      "step": 16838,
      "training_step_time": 0.45702171325683594
    },
    {
      "epoch": 0.000102777099609375,
      "model_forward_time": 0.11529970169067383,
      "step": 16839
    },
    {
      "epoch": 0.000102777099609375,
      "step": 16839,
      "training_step_time": 0.477003812789917
    },
    {
      "epoch": 0.000102783203125,
      "grad_norm": 0.18024252355098724,
      "learning_rate": 8.614518393959714e-05,
      "loss": 0.0545,
      "step": 16840
    },
    {
      "epoch": 0.000102783203125,
      "model_forward_time": 0.11568355560302734,
      "step": 16840
    },
    {
      "epoch": 0.000102783203125,
      "step": 16840,
      "training_step_time": 0.400648832321167
    },
    {
      "epoch": 0.000102789306640625,
      "model_forward_time": 0.11467790603637695,
      "step": 16841
    },
    {
      "epoch": 0.000102789306640625,
      "step": 16841,
      "training_step_time": 0.3871572017669678
    },
    {
      "epoch": 0.00010279541015625,
      "model_forward_time": 0.1155095100402832,
      "step": 16842
    },
    {
      "epoch": 0.00010279541015625,
      "step": 16842,
      "training_step_time": 0.41668272018432617
    },
    {
      "epoch": 0.000102801513671875,
      "model_forward_time": 0.11616277694702148,
      "step": 16843
    },
    {
      "epoch": 0.000102801513671875,
      "step": 16843,
      "training_step_time": 0.41979169845581055
    },
    {
      "epoch": 0.0001028076171875,
      "model_forward_time": 0.11578583717346191,
      "step": 16844
    },
    {
      "epoch": 0.0001028076171875,
      "step": 16844,
      "training_step_time": 0.40323805809020996
    },
    {
      "epoch": 0.000102813720703125,
      "model_forward_time": 0.11527490615844727,
      "step": 16845
    },
    {
      "epoch": 0.000102813720703125,
      "step": 16845,
      "training_step_time": 0.44554686546325684
    },
    {
      "epoch": 0.00010281982421875,
      "model_forward_time": 0.11534237861633301,
      "step": 16846
    },
    {
      "epoch": 0.00010281982421875,
      "step": 16846,
      "training_step_time": 0.41594910621643066
    },
    {
      "epoch": 0.000102825927734375,
      "model_forward_time": 0.11503767967224121,
      "step": 16847
    },
    {
      "epoch": 0.000102825927734375,
      "step": 16847,
      "training_step_time": 0.4200465679168701
    },
    {
      "epoch": 0.00010283203125,
      "model_forward_time": 0.11525321006774902,
      "step": 16848
    },
    {
      "epoch": 0.00010283203125,
      "step": 16848,
      "training_step_time": 0.4057152271270752
    },
    {
      "epoch": 0.000102838134765625,
      "model_forward_time": 0.11496376991271973,
      "step": 16849
    },
    {
      "epoch": 0.000102838134765625,
      "step": 16849,
      "training_step_time": 0.36792588233947754
    },
    {
      "epoch": 0.00010284423828125,
      "grad_norm": 0.17181460559368134,
      "learning_rate": 8.612613740027443e-05,
      "loss": 0.051,
      "step": 16850
    },
    {
      "epoch": 0.00010284423828125,
      "model_forward_time": 0.11502790451049805,
      "step": 16850
    },
    {
      "epoch": 0.00010284423828125,
      "step": 16850,
      "training_step_time": 0.46588778495788574
    },
    {
      "epoch": 0.000102850341796875,
      "model_forward_time": 0.11538028717041016,
      "step": 16851
    },
    {
      "epoch": 0.000102850341796875,
      "step": 16851,
      "training_step_time": 0.5255763530731201
    },
    {
      "epoch": 0.0001028564453125,
      "model_forward_time": 0.11530733108520508,
      "step": 16852
    },
    {
      "epoch": 0.0001028564453125,
      "step": 16852,
      "training_step_time": 0.41883158683776855
    },
    {
      "epoch": 0.000102862548828125,
      "model_forward_time": 0.1144871711730957,
      "step": 16853
    },
    {
      "epoch": 0.000102862548828125,
      "step": 16853,
      "training_step_time": 0.45610785484313965
    },
    {
      "epoch": 0.00010286865234375,
      "model_forward_time": 0.1147150993347168,
      "step": 16854
    },
    {
      "epoch": 0.00010286865234375,
      "step": 16854,
      "training_step_time": 0.46742939949035645
    },
    {
      "epoch": 0.000102874755859375,
      "model_forward_time": 0.11440515518188477,
      "step": 16855
    },
    {
      "epoch": 0.000102874755859375,
      "step": 16855,
      "training_step_time": 0.38979506492614746
    },
    {
      "epoch": 0.000102880859375,
      "model_forward_time": 0.11488914489746094,
      "step": 16856
    },
    {
      "epoch": 0.000102880859375,
      "step": 16856,
      "training_step_time": 0.39032411575317383
    },
    {
      "epoch": 0.000102886962890625,
      "model_forward_time": 0.11518049240112305,
      "step": 16857
    },
    {
      "epoch": 0.000102886962890625,
      "step": 16857,
      "training_step_time": 0.4042983055114746
    },
    {
      "epoch": 0.00010289306640625,
      "model_forward_time": 0.11545348167419434,
      "step": 16858
    },
    {
      "epoch": 0.00010289306640625,
      "step": 16858,
      "training_step_time": 0.3884549140930176
    },
    {
      "epoch": 0.000102899169921875,
      "model_forward_time": 0.11538481712341309,
      "step": 16859
    },
    {
      "epoch": 0.000102899169921875,
      "step": 16859,
      "training_step_time": 0.40550732612609863
    },
    {
      "epoch": 0.0001029052734375,
      "grad_norm": 0.14294755458831787,
      "learning_rate": 8.610707988678503e-05,
      "loss": 0.05,
      "step": 16860
    },
    {
      "epoch": 0.0001029052734375,
      "model_forward_time": 0.11492586135864258,
      "step": 16860
    },
    {
      "epoch": 0.0001029052734375,
      "step": 16860,
      "training_step_time": 0.40171170234680176
    },
    {
      "epoch": 0.000102911376953125,
      "model_forward_time": 0.11566781997680664,
      "step": 16861
    },
    {
      "epoch": 0.000102911376953125,
      "step": 16861,
      "training_step_time": 0.38956689834594727
    },
    {
      "epoch": 0.00010291748046875,
      "model_forward_time": 0.11571431159973145,
      "step": 16862
    },
    {
      "epoch": 0.00010291748046875,
      "step": 16862,
      "training_step_time": 0.4005250930786133
    },
    {
      "epoch": 0.000102923583984375,
      "model_forward_time": 0.11662602424621582,
      "step": 16863
    },
    {
      "epoch": 0.000102923583984375,
      "step": 16863,
      "training_step_time": 0.4209420680999756
    },
    {
      "epoch": 0.0001029296875,
      "model_forward_time": 0.11849284172058105,
      "step": 16864
    },
    {
      "epoch": 0.0001029296875,
      "step": 16864,
      "training_step_time": 0.48779964447021484
    },
    {
      "epoch": 0.000102935791015625,
      "model_forward_time": 0.11783957481384277,
      "step": 16865
    },
    {
      "epoch": 0.000102935791015625,
      "step": 16865,
      "training_step_time": 0.5667078495025635
    },
    {
      "epoch": 0.00010294189453125,
      "model_forward_time": 0.11926579475402832,
      "step": 16866
    },
    {
      "epoch": 0.00010294189453125,
      "step": 16866,
      "training_step_time": 0.7551321983337402
    },
    {
      "epoch": 0.000102947998046875,
      "model_forward_time": 0.11980247497558594,
      "step": 16867
    },
    {
      "epoch": 0.000102947998046875,
      "step": 16867,
      "training_step_time": 0.7059919834136963
    },
    {
      "epoch": 0.0001029541015625,
      "model_forward_time": 0.1170802116394043,
      "step": 16868
    },
    {
      "epoch": 0.0001029541015625,
      "step": 16868,
      "training_step_time": 0.7044463157653809
    },
    {
      "epoch": 0.000102960205078125,
      "model_forward_time": 0.11605525016784668,
      "step": 16869
    },
    {
      "epoch": 0.000102960205078125,
      "step": 16869,
      "training_step_time": 0.6401023864746094
    },
    {
      "epoch": 0.00010296630859375,
      "grad_norm": 0.12548324465751648,
      "learning_rate": 8.608801140491811e-05,
      "loss": 0.0505,
      "step": 16870
    },
    {
      "epoch": 0.00010296630859375,
      "model_forward_time": 0.12235474586486816,
      "step": 16870
    },
    {
      "epoch": 0.00010296630859375,
      "step": 16870,
      "training_step_time": 0.7052767276763916
    },
    {
      "epoch": 0.000102972412109375,
      "model_forward_time": 0.11708474159240723,
      "step": 16871
    },
    {
      "epoch": 0.000102972412109375,
      "step": 16871,
      "training_step_time": 0.6538596153259277
    },
    {
      "epoch": 0.000102978515625,
      "model_forward_time": 0.1190180778503418,
      "step": 16872
    },
    {
      "epoch": 0.000102978515625,
      "step": 16872,
      "training_step_time": 0.6462335586547852
    },
    {
      "epoch": 0.000102984619140625,
      "model_forward_time": 0.1312093734741211,
      "step": 16873
    },
    {
      "epoch": 0.000102984619140625,
      "step": 16873,
      "training_step_time": 0.7079122066497803
    },
    {
      "epoch": 0.00010299072265625,
      "model_forward_time": 0.11721992492675781,
      "step": 16874
    },
    {
      "epoch": 0.00010299072265625,
      "step": 16874,
      "training_step_time": 0.6831910610198975
    },
    {
      "epoch": 0.000102996826171875,
      "model_forward_time": 0.11743426322937012,
      "step": 16875
    },
    {
      "epoch": 0.000102996826171875,
      "step": 16875,
      "training_step_time": 0.6019084453582764
    },
    {
      "epoch": 0.0001030029296875,
      "model_forward_time": 0.1222691535949707,
      "step": 16876
    },
    {
      "epoch": 0.0001030029296875,
      "step": 16876,
      "training_step_time": 0.6872639656066895
    },
    {
      "epoch": 0.000103009033203125,
      "model_forward_time": 0.12397956848144531,
      "step": 16877
    },
    {
      "epoch": 0.000103009033203125,
      "step": 16877,
      "training_step_time": 0.6845037937164307
    },
    {
      "epoch": 0.00010301513671875,
      "model_forward_time": 0.12258505821228027,
      "step": 16878
    },
    {
      "epoch": 0.00010301513671875,
      "step": 16878,
      "training_step_time": 0.6986105442047119
    },
    {
      "epoch": 0.000103021240234375,
      "model_forward_time": 0.12030291557312012,
      "step": 16879
    },
    {
      "epoch": 0.000103021240234375,
      "step": 16879,
      "training_step_time": 0.7088837623596191
    },
    {
      "epoch": 0.00010302734375,
      "grad_norm": 0.15557287633419037,
      "learning_rate": 8.606893196046619e-05,
      "loss": 0.0628,
      "step": 16880
    },
    {
      "epoch": 0.00010302734375,
      "model_forward_time": 0.11866474151611328,
      "step": 16880
    },
    {
      "epoch": 0.00010302734375,
      "step": 16880,
      "training_step_time": 0.6905159950256348
    },
    {
      "epoch": 0.000103033447265625,
      "model_forward_time": 0.11840271949768066,
      "step": 16881
    },
    {
      "epoch": 0.000103033447265625,
      "step": 16881,
      "training_step_time": 0.6803169250488281
    },
    {
      "epoch": 0.00010303955078125,
      "model_forward_time": 0.12002086639404297,
      "step": 16882
    },
    {
      "epoch": 0.00010303955078125,
      "step": 16882,
      "training_step_time": 0.6910114288330078
    },
    {
      "epoch": 0.000103045654296875,
      "model_forward_time": 0.11526870727539062,
      "step": 16883
    },
    {
      "epoch": 0.000103045654296875,
      "step": 16883,
      "training_step_time": 0.6281406879425049
    },
    {
      "epoch": 0.0001030517578125,
      "model_forward_time": 0.12014508247375488,
      "step": 16884
    },
    {
      "epoch": 0.0001030517578125,
      "step": 16884,
      "training_step_time": 0.6696267127990723
    },
    {
      "epoch": 0.000103057861328125,
      "model_forward_time": 0.12252092361450195,
      "step": 16885
    },
    {
      "epoch": 0.000103057861328125,
      "step": 16885,
      "training_step_time": 0.7827258110046387
    },
    {
      "epoch": 0.00010306396484375,
      "model_forward_time": 0.11695337295532227,
      "step": 16886
    },
    {
      "epoch": 0.00010306396484375,
      "step": 16886,
      "training_step_time": 0.6450693607330322
    },
    {
      "epoch": 0.000103070068359375,
      "model_forward_time": 0.12671828269958496,
      "step": 16887
    },
    {
      "epoch": 0.000103070068359375,
      "step": 16887,
      "training_step_time": 0.6760425567626953
    },
    {
      "epoch": 0.000103076171875,
      "model_forward_time": 0.14105939865112305,
      "step": 16888
    },
    {
      "epoch": 0.000103076171875,
      "step": 16888,
      "training_step_time": 0.7253499031066895
    },
    {
      "epoch": 0.000103082275390625,
      "model_forward_time": 0.11772918701171875,
      "step": 16889
    },
    {
      "epoch": 0.000103082275390625,
      "step": 16889,
      "training_step_time": 0.7514820098876953
    },
    {
      "epoch": 0.00010308837890625,
      "grad_norm": 0.17373761534690857,
      "learning_rate": 8.604984155922506e-05,
      "loss": 0.0653,
      "step": 16890
    },
    {
      "epoch": 0.00010308837890625,
      "model_forward_time": 0.11721491813659668,
      "step": 16890
    },
    {
      "epoch": 0.00010308837890625,
      "step": 16890,
      "training_step_time": 0.619208812713623
    },
    {
      "epoch": 0.000103094482421875,
      "model_forward_time": 0.12648606300354004,
      "step": 16891
    },
    {
      "epoch": 0.000103094482421875,
      "step": 16891,
      "training_step_time": 0.6462757587432861
    },
    {
      "epoch": 0.0001031005859375,
      "model_forward_time": 0.11842060089111328,
      "step": 16892
    },
    {
      "epoch": 0.0001031005859375,
      "step": 16892,
      "training_step_time": 0.6519370079040527
    },
    {
      "epoch": 0.000103106689453125,
      "model_forward_time": 0.1157083511352539,
      "step": 16893
    },
    {
      "epoch": 0.000103106689453125,
      "step": 16893,
      "training_step_time": 0.6442677974700928
    },
    {
      "epoch": 0.00010311279296875,
      "model_forward_time": 0.12163782119750977,
      "step": 16894
    },
    {
      "epoch": 0.00010311279296875,
      "step": 16894,
      "training_step_time": 0.6776115894317627
    },
    {
      "epoch": 0.000103118896484375,
      "model_forward_time": 0.12694025039672852,
      "step": 16895
    },
    {
      "epoch": 0.000103118896484375,
      "step": 16895,
      "training_step_time": 0.6107337474822998
    },
    {
      "epoch": 0.000103125,
      "model_forward_time": 0.11963367462158203,
      "step": 16896
    },
    {
      "epoch": 0.000103125,
      "step": 16896,
      "training_step_time": 0.6913211345672607
    },
    {
      "epoch": 0.000103131103515625,
      "model_forward_time": 0.11922121047973633,
      "step": 16897
    },
    {
      "epoch": 0.000103131103515625,
      "step": 16897,
      "training_step_time": 0.7019262313842773
    },
    {
      "epoch": 0.00010313720703125,
      "model_forward_time": 0.11703896522521973,
      "step": 16898
    },
    {
      "epoch": 0.00010313720703125,
      "step": 16898,
      "training_step_time": 0.6560862064361572
    },
    {
      "epoch": 0.000103143310546875,
      "model_forward_time": 0.1213369369506836,
      "step": 16899
    },
    {
      "epoch": 0.000103143310546875,
      "step": 16899,
      "training_step_time": 0.6428506374359131
    },
    {
      "epoch": 0.0001031494140625,
      "grad_norm": 0.15088550746440887,
      "learning_rate": 8.603074020699393e-05,
      "loss": 0.0697,
      "step": 16900
    },
    {
      "epoch": 0.0001031494140625,
      "model_forward_time": 0.1163625717163086,
      "step": 16900
    },
    {
      "epoch": 0.0001031494140625,
      "step": 16900,
      "training_step_time": 0.6550209522247314
    },
    {
      "epoch": 0.000103155517578125,
      "model_forward_time": 0.12424874305725098,
      "step": 16901
    },
    {
      "epoch": 0.000103155517578125,
      "step": 16901,
      "training_step_time": 0.6587526798248291
    },
    {
      "epoch": 0.00010316162109375,
      "model_forward_time": 0.12177658081054688,
      "step": 16902
    },
    {
      "epoch": 0.00010316162109375,
      "step": 16902,
      "training_step_time": 0.6234633922576904
    },
    {
      "epoch": 0.000103167724609375,
      "model_forward_time": 0.1257333755493164,
      "step": 16903
    },
    {
      "epoch": 0.000103167724609375,
      "step": 16903,
      "training_step_time": 0.7088255882263184
    },
    {
      "epoch": 0.000103173828125,
      "model_forward_time": 0.12339353561401367,
      "step": 16904
    },
    {
      "epoch": 0.000103173828125,
      "step": 16904,
      "training_step_time": 0.6378464698791504
    },
    {
      "epoch": 0.000103179931640625,
      "model_forward_time": 0.11880302429199219,
      "step": 16905
    },
    {
      "epoch": 0.000103179931640625,
      "step": 16905,
      "training_step_time": 0.6794588565826416
    },
    {
      "epoch": 0.00010318603515625,
      "model_forward_time": 0.118133544921875,
      "step": 16906
    },
    {
      "epoch": 0.00010318603515625,
      "step": 16906,
      "training_step_time": 0.7315325736999512
    },
    {
      "epoch": 0.000103192138671875,
      "model_forward_time": 0.11605453491210938,
      "step": 16907
    },
    {
      "epoch": 0.000103192138671875,
      "step": 16907,
      "training_step_time": 0.7103672027587891
    },
    {
      "epoch": 0.0001031982421875,
      "model_forward_time": 0.11622214317321777,
      "step": 16908
    },
    {
      "epoch": 0.0001031982421875,
      "step": 16908,
      "training_step_time": 0.6295619010925293
    },
    {
      "epoch": 0.000103204345703125,
      "model_forward_time": 0.11623978614807129,
      "step": 16909
    },
    {
      "epoch": 0.000103204345703125,
      "step": 16909,
      "training_step_time": 0.6523463726043701
    },
    {
      "epoch": 0.00010321044921875,
      "grad_norm": 0.15286217629909515,
      "learning_rate": 8.601162790957525e-05,
      "loss": 0.0608,
      "step": 16910
    },
    {
      "epoch": 0.00010321044921875,
      "model_forward_time": 0.1190791130065918,
      "step": 16910
    },
    {
      "epoch": 0.00010321044921875,
      "step": 16910,
      "training_step_time": 0.6882703304290771
    },
    {
      "epoch": 0.000103216552734375,
      "model_forward_time": 0.1183931827545166,
      "step": 16911
    },
    {
      "epoch": 0.000103216552734375,
      "step": 16911,
      "training_step_time": 0.670346736907959
    },
    {
      "epoch": 0.00010322265625,
      "model_forward_time": 0.11780357360839844,
      "step": 16912
    },
    {
      "epoch": 0.00010322265625,
      "step": 16912,
      "training_step_time": 0.6517581939697266
    },
    {
      "epoch": 0.000103228759765625,
      "model_forward_time": 0.12038636207580566,
      "step": 16913
    },
    {
      "epoch": 0.000103228759765625,
      "step": 16913,
      "training_step_time": 0.6644120216369629
    },
    {
      "epoch": 0.00010323486328125,
      "model_forward_time": 0.11684989929199219,
      "step": 16914
    },
    {
      "epoch": 0.00010323486328125,
      "step": 16914,
      "training_step_time": 0.7460675239562988
    },
    {
      "epoch": 0.000103240966796875,
      "model_forward_time": 0.12126946449279785,
      "step": 16915
    },
    {
      "epoch": 0.000103240966796875,
      "step": 16915,
      "training_step_time": 0.6748659610748291
    },
    {
      "epoch": 0.0001032470703125,
      "model_forward_time": 0.11907601356506348,
      "step": 16916
    },
    {
      "epoch": 0.0001032470703125,
      "step": 16916,
      "training_step_time": 0.6242334842681885
    },
    {
      "epoch": 0.000103253173828125,
      "model_forward_time": 0.11873888969421387,
      "step": 16917
    },
    {
      "epoch": 0.000103253173828125,
      "step": 16917,
      "training_step_time": 0.6418752670288086
    },
    {
      "epoch": 0.00010325927734375,
      "model_forward_time": 0.1180872917175293,
      "step": 16918
    },
    {
      "epoch": 0.00010325927734375,
      "step": 16918,
      "training_step_time": 0.6687667369842529
    },
    {
      "epoch": 0.000103265380859375,
      "model_forward_time": 0.12022185325622559,
      "step": 16919
    },
    {
      "epoch": 0.000103265380859375,
      "step": 16919,
      "training_step_time": 0.6601755619049072
    },
    {
      "epoch": 0.000103271484375,
      "grad_norm": 0.16996654868125916,
      "learning_rate": 8.599250467277483e-05,
      "loss": 0.0578,
      "step": 16920
    },
    {
      "epoch": 0.000103271484375,
      "model_forward_time": 0.12131571769714355,
      "step": 16920
    },
    {
      "epoch": 0.000103271484375,
      "step": 16920,
      "training_step_time": 0.6486263275146484
    },
    {
      "epoch": 0.000103277587890625,
      "model_forward_time": 0.11900544166564941,
      "step": 16921
    },
    {
      "epoch": 0.000103277587890625,
      "step": 16921,
      "training_step_time": 0.626366376876831
    },
    {
      "epoch": 0.00010328369140625,
      "model_forward_time": 0.12391066551208496,
      "step": 16922
    },
    {
      "epoch": 0.00010328369140625,
      "step": 16922,
      "training_step_time": 0.7029626369476318
    },
    {
      "epoch": 0.000103289794921875,
      "model_forward_time": 0.14265060424804688,
      "step": 16923
    },
    {
      "epoch": 0.000103289794921875,
      "step": 16923,
      "training_step_time": 0.7500131130218506
    },
    {
      "epoch": 0.0001032958984375,
      "model_forward_time": 0.11840152740478516,
      "step": 16924
    },
    {
      "epoch": 0.0001032958984375,
      "step": 16924,
      "training_step_time": 0.723750114440918
    },
    {
      "epoch": 0.000103302001953125,
      "model_forward_time": 0.1226189136505127,
      "step": 16925
    },
    {
      "epoch": 0.000103302001953125,
      "step": 16925,
      "training_step_time": 0.6756069660186768
    },
    {
      "epoch": 0.00010330810546875,
      "model_forward_time": 0.11778378486633301,
      "step": 16926
    },
    {
      "epoch": 0.00010330810546875,
      "step": 16926,
      "training_step_time": 0.6706697940826416
    },
    {
      "epoch": 0.000103314208984375,
      "model_forward_time": 0.1180875301361084,
      "step": 16927
    },
    {
      "epoch": 0.000103314208984375,
      "step": 16927,
      "training_step_time": 0.6512489318847656
    },
    {
      "epoch": 0.0001033203125,
      "model_forward_time": 0.12003612518310547,
      "step": 16928
    },
    {
      "epoch": 0.0001033203125,
      "step": 16928,
      "training_step_time": 0.6010255813598633
    },
    {
      "epoch": 0.000103326416015625,
      "model_forward_time": 0.12093448638916016,
      "step": 16929
    },
    {
      "epoch": 0.000103326416015625,
      "step": 16929,
      "training_step_time": 0.6357245445251465
    },
    {
      "epoch": 0.00010333251953125,
      "grad_norm": 0.20918430387973785,
      "learning_rate": 8.597337050240184e-05,
      "loss": 0.057,
      "step": 16930
    },
    {
      "epoch": 0.00010333251953125,
      "model_forward_time": 0.11971044540405273,
      "step": 16930
    },
    {
      "epoch": 0.00010333251953125,
      "step": 16930,
      "training_step_time": 0.634221076965332
    },
    {
      "epoch": 0.000103338623046875,
      "model_forward_time": 0.12975430488586426,
      "step": 16931
    },
    {
      "epoch": 0.000103338623046875,
      "step": 16931,
      "training_step_time": 0.6449644565582275
    },
    {
      "epoch": 0.0001033447265625,
      "model_forward_time": 0.1198427677154541,
      "step": 16932
    },
    {
      "epoch": 0.0001033447265625,
      "step": 16932,
      "training_step_time": 0.734095573425293
    },
    {
      "epoch": 0.000103350830078125,
      "model_forward_time": 0.12108707427978516,
      "step": 16933
    },
    {
      "epoch": 0.000103350830078125,
      "step": 16933,
      "training_step_time": 0.6329586505889893
    },
    {
      "epoch": 0.00010335693359375,
      "model_forward_time": 0.11893868446350098,
      "step": 16934
    },
    {
      "epoch": 0.00010335693359375,
      "step": 16934,
      "training_step_time": 0.7396113872528076
    },
    {
      "epoch": 0.000103363037109375,
      "model_forward_time": 0.11788511276245117,
      "step": 16935
    },
    {
      "epoch": 0.000103363037109375,
      "step": 16935,
      "training_step_time": 0.5490739345550537
    },
    {
      "epoch": 0.000103369140625,
      "model_forward_time": 0.11764097213745117,
      "step": 16936
    },
    {
      "epoch": 0.000103369140625,
      "step": 16936,
      "training_step_time": 0.5313997268676758
    },
    {
      "epoch": 0.000103375244140625,
      "model_forward_time": 0.11618638038635254,
      "step": 16937
    },
    {
      "epoch": 0.000103375244140625,
      "step": 16937,
      "training_step_time": 0.49529218673706055
    },
    {
      "epoch": 0.00010338134765625,
      "model_forward_time": 0.11588573455810547,
      "step": 16938
    },
    {
      "epoch": 0.00010338134765625,
      "step": 16938,
      "training_step_time": 0.43971824645996094
    },
    {
      "epoch": 0.000103387451171875,
      "model_forward_time": 0.11625051498413086,
      "step": 16939
    },
    {
      "epoch": 0.000103387451171875,
      "step": 16939,
      "training_step_time": 0.42864370346069336
    },
    {
      "epoch": 0.0001033935546875,
      "grad_norm": 0.18405361473560333,
      "learning_rate": 8.595422540426869e-05,
      "loss": 0.0582,
      "step": 16940
    },
    {
      "epoch": 0.0001033935546875,
      "model_forward_time": 0.11510825157165527,
      "step": 16940
    },
    {
      "epoch": 0.0001033935546875,
      "step": 16940,
      "training_step_time": 0.4418039321899414
    },
    {
      "epoch": 0.000103399658203125,
      "model_forward_time": 0.11609053611755371,
      "step": 16941
    },
    {
      "epoch": 0.000103399658203125,
      "step": 16941,
      "training_step_time": 0.4118683338165283
    },
    {
      "epoch": 0.00010340576171875,
      "model_forward_time": 0.11547303199768066,
      "step": 16942
    },
    {
      "epoch": 0.00010340576171875,
      "step": 16942,
      "training_step_time": 0.41458749771118164
    },
    {
      "epoch": 0.000103411865234375,
      "model_forward_time": 0.11507201194763184,
      "step": 16943
    },
    {
      "epoch": 0.000103411865234375,
      "step": 16943,
      "training_step_time": 0.4237997531890869
    },
    {
      "epoch": 0.00010341796875,
      "model_forward_time": 0.11525774002075195,
      "step": 16944
    },
    {
      "epoch": 0.00010341796875,
      "step": 16944,
      "training_step_time": 0.4407813549041748
    },
    {
      "epoch": 0.000103424072265625,
      "model_forward_time": 0.11497926712036133,
      "step": 16945
    },
    {
      "epoch": 0.000103424072265625,
      "step": 16945,
      "training_step_time": 0.4493522644042969
    },
    {
      "epoch": 0.00010343017578125,
      "model_forward_time": 0.11588788032531738,
      "step": 16946
    },
    {
      "epoch": 0.00010343017578125,
      "step": 16946,
      "training_step_time": 0.4457073211669922
    },
    {
      "epoch": 0.000103436279296875,
      "model_forward_time": 0.11869263648986816,
      "step": 16947
    },
    {
      "epoch": 0.000103436279296875,
      "step": 16947,
      "training_step_time": 0.5010132789611816
    },
    {
      "epoch": 0.0001034423828125,
      "model_forward_time": 0.1160893440246582,
      "step": 16948
    },
    {
      "epoch": 0.0001034423828125,
      "step": 16948,
      "training_step_time": 0.3909037113189697
    },
    {
      "epoch": 0.000103448486328125,
      "model_forward_time": 0.11675548553466797,
      "step": 16949
    },
    {
      "epoch": 0.000103448486328125,
      "step": 16949,
      "training_step_time": 0.38442540168762207
    },
    {
      "epoch": 0.00010345458984375,
      "grad_norm": 0.16969037055969238,
      "learning_rate": 8.59350693841912e-05,
      "loss": 0.0551,
      "step": 16950
    },
    {
      "epoch": 0.00010345458984375,
      "model_forward_time": 0.11498594284057617,
      "step": 16950
    },
    {
      "epoch": 0.00010345458984375,
      "step": 16950,
      "training_step_time": 0.43082165718078613
    },
    {
      "epoch": 0.000103460693359375,
      "model_forward_time": 0.11486124992370605,
      "step": 16951
    },
    {
      "epoch": 0.000103460693359375,
      "step": 16951,
      "training_step_time": 0.46692490577697754
    },
    {
      "epoch": 0.000103466796875,
      "model_forward_time": 0.11510014533996582,
      "step": 16952
    },
    {
      "epoch": 0.000103466796875,
      "step": 16952,
      "training_step_time": 0.3930373191833496
    },
    {
      "epoch": 0.000103472900390625,
      "model_forward_time": 0.11614680290222168,
      "step": 16953
    },
    {
      "epoch": 0.000103472900390625,
      "step": 16953,
      "training_step_time": 0.3955397605895996
    },
    {
      "epoch": 0.00010347900390625,
      "model_forward_time": 0.11565899848937988,
      "step": 16954
    },
    {
      "epoch": 0.00010347900390625,
      "step": 16954,
      "training_step_time": 0.39339518547058105
    },
    {
      "epoch": 0.000103485107421875,
      "model_forward_time": 0.11469054222106934,
      "step": 16955
    },
    {
      "epoch": 0.000103485107421875,
      "step": 16955,
      "training_step_time": 0.37615156173706055
    },
    {
      "epoch": 0.0001034912109375,
      "model_forward_time": 0.1147317886352539,
      "step": 16956
    },
    {
      "epoch": 0.0001034912109375,
      "step": 16956,
      "training_step_time": 0.3929479122161865
    },
    {
      "epoch": 0.000103497314453125,
      "model_forward_time": 0.11479806900024414,
      "step": 16957
    },
    {
      "epoch": 0.000103497314453125,
      "step": 16957,
      "training_step_time": 0.40978574752807617
    },
    {
      "epoch": 0.00010350341796875,
      "model_forward_time": 0.1154634952545166,
      "step": 16958
    },
    {
      "epoch": 0.00010350341796875,
      "step": 16958,
      "training_step_time": 0.4330892562866211
    },
    {
      "epoch": 0.000103509521484375,
      "model_forward_time": 0.11514925956726074,
      "step": 16959
    },
    {
      "epoch": 0.000103509521484375,
      "step": 16959,
      "training_step_time": 0.3660757541656494
    },
    {
      "epoch": 0.000103515625,
      "grad_norm": 0.12205861508846283,
      "learning_rate": 8.591590244798844e-05,
      "loss": 0.0615,
      "step": 16960
    },
    {
      "epoch": 0.000103515625,
      "model_forward_time": 0.11511993408203125,
      "step": 16960
    },
    {
      "epoch": 0.000103515625,
      "step": 16960,
      "training_step_time": 0.44537997245788574
    },
    {
      "epoch": 0.000103521728515625,
      "model_forward_time": 0.11530566215515137,
      "step": 16961
    },
    {
      "epoch": 0.000103521728515625,
      "step": 16961,
      "training_step_time": 0.43961143493652344
    },
    {
      "epoch": 0.00010352783203125,
      "model_forward_time": 0.11503434181213379,
      "step": 16962
    },
    {
      "epoch": 0.00010352783203125,
      "step": 16962,
      "training_step_time": 0.39836788177490234
    },
    {
      "epoch": 0.000103533935546875,
      "model_forward_time": 0.11557292938232422,
      "step": 16963
    },
    {
      "epoch": 0.000103533935546875,
      "step": 16963,
      "training_step_time": 0.40809059143066406
    },
    {
      "epoch": 0.0001035400390625,
      "model_forward_time": 0.11517715454101562,
      "step": 16964
    },
    {
      "epoch": 0.0001035400390625,
      "step": 16964,
      "training_step_time": 0.4407010078430176
    },
    {
      "epoch": 0.000103546142578125,
      "model_forward_time": 0.11513948440551758,
      "step": 16965
    },
    {
      "epoch": 0.000103546142578125,
      "step": 16965,
      "training_step_time": 0.47280168533325195
    },
    {
      "epoch": 0.00010355224609375,
      "model_forward_time": 0.11503219604492188,
      "step": 16966
    },
    {
      "epoch": 0.00010355224609375,
      "step": 16966,
      "training_step_time": 0.38453149795532227
    },
    {
      "epoch": 0.000103558349609375,
      "model_forward_time": 0.11493849754333496,
      "step": 16967
    },
    {
      "epoch": 0.000103558349609375,
      "step": 16967,
      "training_step_time": 0.38419342041015625
    },
    {
      "epoch": 0.000103564453125,
      "model_forward_time": 0.11453962326049805,
      "step": 16968
    },
    {
      "epoch": 0.000103564453125,
      "step": 16968,
      "training_step_time": 0.38831353187561035
    },
    {
      "epoch": 0.000103570556640625,
      "model_forward_time": 0.1148529052734375,
      "step": 16969
    },
    {
      "epoch": 0.000103570556640625,
      "step": 16969,
      "training_step_time": 0.39705538749694824
    },
    {
      "epoch": 0.00010357666015625,
      "grad_norm": 0.1343534290790558,
      "learning_rate": 8.589672460148281e-05,
      "loss": 0.0606,
      "step": 16970
    },
    {
      "epoch": 0.00010357666015625,
      "model_forward_time": 0.11470460891723633,
      "step": 16970
    },
    {
      "epoch": 0.00010357666015625,
      "step": 16970,
      "training_step_time": 0.38877201080322266
    },
    {
      "epoch": 0.000103582763671875,
      "model_forward_time": 0.11516499519348145,
      "step": 16971
    },
    {
      "epoch": 0.000103582763671875,
      "step": 16971,
      "training_step_time": 0.4440746307373047
    },
    {
      "epoch": 0.0001035888671875,
      "model_forward_time": 0.11564302444458008,
      "step": 16972
    },
    {
      "epoch": 0.0001035888671875,
      "step": 16972,
      "training_step_time": 0.4071049690246582
    },
    {
      "epoch": 0.000103594970703125,
      "model_forward_time": 0.11505842208862305,
      "step": 16973
    },
    {
      "epoch": 0.000103594970703125,
      "step": 16973,
      "training_step_time": 0.39397478103637695
    },
    {
      "epoch": 0.00010360107421875,
      "model_forward_time": 0.11575007438659668,
      "step": 16974
    },
    {
      "epoch": 0.00010360107421875,
      "step": 16974,
      "training_step_time": 0.4918973445892334
    },
    {
      "epoch": 0.000103607177734375,
      "model_forward_time": 0.11484551429748535,
      "step": 16975
    },
    {
      "epoch": 0.000103607177734375,
      "step": 16975,
      "training_step_time": 0.46245431900024414
    },
    {
      "epoch": 0.00010361328125,
      "model_forward_time": 0.11574029922485352,
      "step": 16976
    },
    {
      "epoch": 0.00010361328125,
      "step": 16976,
      "training_step_time": 0.4956185817718506
    },
    {
      "epoch": 0.000103619384765625,
      "model_forward_time": 0.11485910415649414,
      "step": 16977
    },
    {
      "epoch": 0.000103619384765625,
      "step": 16977,
      "training_step_time": 0.4122483730316162
    },
    {
      "epoch": 0.00010362548828125,
      "model_forward_time": 0.11532878875732422,
      "step": 16978
    },
    {
      "epoch": 0.00010362548828125,
      "step": 16978,
      "training_step_time": 0.47312331199645996
    },
    {
      "epoch": 0.000103631591796875,
      "model_forward_time": 0.11474180221557617,
      "step": 16979
    },
    {
      "epoch": 0.000103631591796875,
      "step": 16979,
      "training_step_time": 0.4097135066986084
    },
    {
      "epoch": 0.0001036376953125,
      "grad_norm": 0.17460136115550995,
      "learning_rate": 8.587753585050004e-05,
      "loss": 0.0547,
      "step": 16980
    },
    {
      "epoch": 0.0001036376953125,
      "model_forward_time": 0.11489319801330566,
      "step": 16980
    },
    {
      "epoch": 0.0001036376953125,
      "step": 16980,
      "training_step_time": 0.4119911193847656
    },
    {
      "epoch": 0.000103643798828125,
      "model_forward_time": 0.11472892761230469,
      "step": 16981
    },
    {
      "epoch": 0.000103643798828125,
      "step": 16981,
      "training_step_time": 0.39319467544555664
    },
    {
      "epoch": 0.00010364990234375,
      "model_forward_time": 0.11626648902893066,
      "step": 16982
    },
    {
      "epoch": 0.00010364990234375,
      "step": 16982,
      "training_step_time": 0.39720773696899414
    },
    {
      "epoch": 0.000103656005859375,
      "model_forward_time": 0.11492657661437988,
      "step": 16983
    },
    {
      "epoch": 0.000103656005859375,
      "step": 16983,
      "training_step_time": 0.3911263942718506
    },
    {
      "epoch": 0.000103662109375,
      "model_forward_time": 0.11536431312561035,
      "step": 16984
    },
    {
      "epoch": 0.000103662109375,
      "step": 16984,
      "training_step_time": 0.3848271369934082
    },
    {
      "epoch": 0.000103668212890625,
      "model_forward_time": 0.1152184009552002,
      "step": 16985
    },
    {
      "epoch": 0.000103668212890625,
      "step": 16985,
      "training_step_time": 1.127408504486084
    },
    {
      "epoch": 0.00010367431640625,
      "model_forward_time": 0.11375260353088379,
      "step": 16986
    },
    {
      "epoch": 0.00010367431640625,
      "step": 16986,
      "training_step_time": 0.3881556987762451
    },
    {
      "epoch": 0.000103680419921875,
      "model_forward_time": 0.11401176452636719,
      "step": 16987
    },
    {
      "epoch": 0.000103680419921875,
      "step": 16987,
      "training_step_time": 0.3994295597076416
    },
    {
      "epoch": 0.0001036865234375,
      "model_forward_time": 0.11392545700073242,
      "step": 16988
    },
    {
      "epoch": 0.0001036865234375,
      "step": 16988,
      "training_step_time": 0.3975944519042969
    },
    {
      "epoch": 0.000103692626953125,
      "model_forward_time": 0.11377143859863281,
      "step": 16989
    },
    {
      "epoch": 0.000103692626953125,
      "step": 16989,
      "training_step_time": 0.44927382469177246
    },
    {
      "epoch": 0.00010369873046875,
      "grad_norm": 0.2729155421257019,
      "learning_rate": 8.585833620086918e-05,
      "loss": 0.0611,
      "step": 16990
    },
    {
      "epoch": 0.00010369873046875,
      "model_forward_time": 0.11401891708374023,
      "step": 16990
    },
    {
      "epoch": 0.00010369873046875,
      "step": 16990,
      "training_step_time": 0.3846869468688965
    },
    {
      "epoch": 0.000103704833984375,
      "model_forward_time": 0.11435961723327637,
      "step": 16991
    },
    {
      "epoch": 0.000103704833984375,
      "step": 16991,
      "training_step_time": 0.6306149959564209
    },
    {
      "epoch": 0.0001037109375,
      "model_forward_time": 0.11463642120361328,
      "step": 16992
    },
    {
      "epoch": 0.0001037109375,
      "step": 16992,
      "training_step_time": 0.40921783447265625
    },
    {
      "epoch": 0.000103717041015625,
      "model_forward_time": 0.11428999900817871,
      "step": 16993
    },
    {
      "epoch": 0.000103717041015625,
      "step": 16993,
      "training_step_time": 0.4100823402404785
    },
    {
      "epoch": 0.00010372314453125,
      "model_forward_time": 0.11461019515991211,
      "step": 16994
    },
    {
      "epoch": 0.00010372314453125,
      "step": 16994,
      "training_step_time": 0.38211679458618164
    },
    {
      "epoch": 0.000103729248046875,
      "model_forward_time": 0.11508417129516602,
      "step": 16995
    },
    {
      "epoch": 0.000103729248046875,
      "step": 16995,
      "training_step_time": 0.3874845504760742
    },
    {
      "epoch": 0.0001037353515625,
      "model_forward_time": 0.11476421356201172,
      "step": 16996
    },
    {
      "epoch": 0.0001037353515625,
      "step": 16996,
      "training_step_time": 0.3783226013183594
    },
    {
      "epoch": 0.000103741455078125,
      "model_forward_time": 0.11428189277648926,
      "step": 16997
    },
    {
      "epoch": 0.000103741455078125,
      "step": 16997,
      "training_step_time": 0.8579912185668945
    },
    {
      "epoch": 0.00010374755859375,
      "model_forward_time": 0.11424827575683594,
      "step": 16998
    },
    {
      "epoch": 0.00010374755859375,
      "step": 16998,
      "training_step_time": 0.4007883071899414
    },
    {
      "epoch": 0.000103753662109375,
      "model_forward_time": 0.11459922790527344,
      "step": 16999
    },
    {
      "epoch": 0.000103753662109375,
      "step": 16999,
      "training_step_time": 0.39845824241638184
    },
    {
      "epoch": 0.000103759765625,
      "grad_norm": 0.17497150599956512,
      "learning_rate": 8.583912565842257e-05,
      "loss": 0.0656,
      "step": 17000
    },
    {
      "epoch": 0.000103759765625,
      "model_forward_time": 0.11226963996887207,
      "step": 17000
    },
    {
      "epoch": 0.000103759765625,
      "step": 17000,
      "training_step_time": 0.35123682022094727
    },
    {
      "epoch": 0.000103765869140625,
      "model_forward_time": 0.11248517036437988,
      "step": 17001
    },
    {
      "epoch": 0.000103765869140625,
      "step": 17001,
      "training_step_time": 0.3849935531616211
    },
    {
      "epoch": 0.00010377197265625,
      "model_forward_time": 0.1128692626953125,
      "step": 17002
    },
    {
      "epoch": 0.00010377197265625,
      "step": 17002,
      "training_step_time": 0.37250685691833496
    },
    {
      "epoch": 0.000103778076171875,
      "model_forward_time": 0.1130990982055664,
      "step": 17003
    },
    {
      "epoch": 0.000103778076171875,
      "step": 17003,
      "training_step_time": 0.38677358627319336
    },
    {
      "epoch": 0.0001037841796875,
      "model_forward_time": 0.11333441734313965,
      "step": 17004
    },
    {
      "epoch": 0.0001037841796875,
      "step": 17004,
      "training_step_time": 0.40265798568725586
    },
    {
      "epoch": 0.000103790283203125,
      "model_forward_time": 0.11434507369995117,
      "step": 17005
    },
    {
      "epoch": 0.000103790283203125,
      "step": 17005,
      "training_step_time": 0.4507002830505371
    },
    {
      "epoch": 0.00010379638671875,
      "model_forward_time": 0.11521458625793457,
      "step": 17006
    },
    {
      "epoch": 0.00010379638671875,
      "step": 17006,
      "training_step_time": 0.39615559577941895
    },
    {
      "epoch": 0.000103802490234375,
      "model_forward_time": 0.11421895027160645,
      "step": 17007
    },
    {
      "epoch": 0.000103802490234375,
      "step": 17007,
      "training_step_time": 0.3932766914367676
    },
    {
      "epoch": 0.00010380859375,
      "model_forward_time": 0.11488151550292969,
      "step": 17008
    },
    {
      "epoch": 0.00010380859375,
      "step": 17008,
      "training_step_time": 0.38650989532470703
    },
    {
      "epoch": 0.000103814697265625,
      "model_forward_time": 0.11467194557189941,
      "step": 17009
    },
    {
      "epoch": 0.000103814697265625,
      "step": 17009,
      "training_step_time": 0.3900294303894043
    },
    {
      "epoch": 0.00010382080078125,
      "grad_norm": 0.16230648756027222,
      "learning_rate": 8.581990422899585e-05,
      "loss": 0.0577,
      "step": 17010
    },
    {
      "epoch": 0.00010382080078125,
      "model_forward_time": 0.1152191162109375,
      "step": 17010
    },
    {
      "epoch": 0.00010382080078125,
      "step": 17010,
      "training_step_time": 0.41615772247314453
    },
    {
      "epoch": 0.000103826904296875,
      "model_forward_time": 0.11483120918273926,
      "step": 17011
    },
    {
      "epoch": 0.000103826904296875,
      "step": 17011,
      "training_step_time": 0.3934934139251709
    },
    {
      "epoch": 0.0001038330078125,
      "model_forward_time": 0.11541628837585449,
      "step": 17012
    },
    {
      "epoch": 0.0001038330078125,
      "step": 17012,
      "training_step_time": 0.4544975757598877
    },
    {
      "epoch": 0.000103839111328125,
      "model_forward_time": 0.11475753784179688,
      "step": 17013
    },
    {
      "epoch": 0.000103839111328125,
      "step": 17013,
      "training_step_time": 0.4246852397918701
    },
    {
      "epoch": 0.00010384521484375,
      "model_forward_time": 0.11493778228759766,
      "step": 17014
    },
    {
      "epoch": 0.00010384521484375,
      "step": 17014,
      "training_step_time": 0.40613245964050293
    },
    {
      "epoch": 0.000103851318359375,
      "model_forward_time": 0.1149909496307373,
      "step": 17015
    },
    {
      "epoch": 0.000103851318359375,
      "step": 17015,
      "training_step_time": 0.4030020236968994
    },
    {
      "epoch": 0.000103857421875,
      "model_forward_time": 0.11496233940124512,
      "step": 17016
    },
    {
      "epoch": 0.000103857421875,
      "step": 17016,
      "training_step_time": 0.40105247497558594
    },
    {
      "epoch": 0.000103863525390625,
      "model_forward_time": 0.11492347717285156,
      "step": 17017
    },
    {
      "epoch": 0.000103863525390625,
      "step": 17017,
      "training_step_time": 0.3867015838623047
    },
    {
      "epoch": 0.00010386962890625,
      "model_forward_time": 0.1150825023651123,
      "step": 17018
    },
    {
      "epoch": 0.00010386962890625,
      "step": 17018,
      "training_step_time": 0.4516787528991699
    },
    {
      "epoch": 0.000103875732421875,
      "model_forward_time": 0.11527156829833984,
      "step": 17019
    },
    {
      "epoch": 0.000103875732421875,
      "step": 17019,
      "training_step_time": 0.4275553226470947
    },
    {
      "epoch": 0.0001038818359375,
      "grad_norm": 0.21527042984962463,
      "learning_rate": 8.5800671918428e-05,
      "loss": 0.0603,
      "step": 17020
    },
    {
      "epoch": 0.0001038818359375,
      "model_forward_time": 0.11513042449951172,
      "step": 17020
    },
    {
      "epoch": 0.0001038818359375,
      "step": 17020,
      "training_step_time": 0.4747951030731201
    },
    {
      "epoch": 0.000103887939453125,
      "model_forward_time": 0.11546826362609863,
      "step": 17021
    },
    {
      "epoch": 0.000103887939453125,
      "step": 17021,
      "training_step_time": 0.39341163635253906
    },
    {
      "epoch": 0.00010389404296875,
      "model_forward_time": 0.11495399475097656,
      "step": 17022
    },
    {
      "epoch": 0.00010389404296875,
      "step": 17022,
      "training_step_time": 0.4036386013031006
    },
    {
      "epoch": 0.000103900146484375,
      "model_forward_time": 0.11527204513549805,
      "step": 17023
    },
    {
      "epoch": 0.000103900146484375,
      "step": 17023,
      "training_step_time": 0.43465471267700195
    },
    {
      "epoch": 0.00010390625,
      "model_forward_time": 0.11486077308654785,
      "step": 17024
    },
    {
      "epoch": 0.00010390625,
      "step": 17024,
      "training_step_time": 0.45398497581481934
    },
    {
      "epoch": 0.000103912353515625,
      "model_forward_time": 0.11470770835876465,
      "step": 17025
    },
    {
      "epoch": 0.000103912353515625,
      "step": 17025,
      "training_step_time": 0.39728498458862305
    },
    {
      "epoch": 0.00010391845703125,
      "model_forward_time": 0.11463522911071777,
      "step": 17026
    },
    {
      "epoch": 0.00010391845703125,
      "step": 17026,
      "training_step_time": 0.44878339767456055
    },
    {
      "epoch": 0.000103924560546875,
      "model_forward_time": 0.11493062973022461,
      "step": 17027
    },
    {
      "epoch": 0.000103924560546875,
      "step": 17027,
      "training_step_time": 0.3944230079650879
    },
    {
      "epoch": 0.0001039306640625,
      "model_forward_time": 0.11540365219116211,
      "step": 17028
    },
    {
      "epoch": 0.0001039306640625,
      "step": 17028,
      "training_step_time": 0.39134979248046875
    },
    {
      "epoch": 0.000103936767578125,
      "model_forward_time": 0.11508059501647949,
      "step": 17029
    },
    {
      "epoch": 0.000103936767578125,
      "step": 17029,
      "training_step_time": 0.3944218158721924
    },
    {
      "epoch": 0.00010394287109375,
      "grad_norm": 0.16669675707817078,
      "learning_rate": 8.578142873256129e-05,
      "loss": 0.0579,
      "step": 17030
    },
    {
      "epoch": 0.00010394287109375,
      "model_forward_time": 0.1154017448425293,
      "step": 17030
    },
    {
      "epoch": 0.00010394287109375,
      "step": 17030,
      "training_step_time": 0.3935673236846924
    },
    {
      "epoch": 0.000103948974609375,
      "model_forward_time": 0.11637639999389648,
      "step": 17031
    },
    {
      "epoch": 0.000103948974609375,
      "step": 17031,
      "training_step_time": 0.388303279876709
    },
    {
      "epoch": 0.000103955078125,
      "model_forward_time": 0.1151583194732666,
      "step": 17032
    },
    {
      "epoch": 0.000103955078125,
      "step": 17032,
      "training_step_time": 0.3904712200164795
    },
    {
      "epoch": 0.000103961181640625,
      "model_forward_time": 0.11598682403564453,
      "step": 17033
    },
    {
      "epoch": 0.000103961181640625,
      "step": 17033,
      "training_step_time": 0.4656944274902344
    },
    {
      "epoch": 0.00010396728515625,
      "model_forward_time": 0.11534380912780762,
      "step": 17034
    },
    {
      "epoch": 0.00010396728515625,
      "step": 17034,
      "training_step_time": 0.42984509468078613
    },
    {
      "epoch": 0.000103973388671875,
      "model_forward_time": 0.11472773551940918,
      "step": 17035
    },
    {
      "epoch": 0.000103973388671875,
      "step": 17035,
      "training_step_time": 0.4775834083557129
    },
    {
      "epoch": 0.0001039794921875,
      "model_forward_time": 0.1159825325012207,
      "step": 17036
    },
    {
      "epoch": 0.0001039794921875,
      "step": 17036,
      "training_step_time": 0.38976144790649414
    },
    {
      "epoch": 0.000103985595703125,
      "model_forward_time": 0.11557507514953613,
      "step": 17037
    },
    {
      "epoch": 0.000103985595703125,
      "step": 17037,
      "training_step_time": 0.42491841316223145
    },
    {
      "epoch": 0.00010399169921875,
      "model_forward_time": 0.11589336395263672,
      "step": 17038
    },
    {
      "epoch": 0.00010399169921875,
      "step": 17038,
      "training_step_time": 0.3969235420227051
    },
    {
      "epoch": 0.000103997802734375,
      "model_forward_time": 0.11501288414001465,
      "step": 17039
    },
    {
      "epoch": 0.000103997802734375,
      "step": 17039,
      "training_step_time": 0.49395251274108887
    },
    {
      "epoch": 0.00010400390625,
      "grad_norm": 0.1125858873128891,
      "learning_rate": 8.576217467724128e-05,
      "loss": 0.0526,
      "step": 17040
    },
    {
      "epoch": 0.00010400390625,
      "model_forward_time": 0.1144709587097168,
      "step": 17040
    },
    {
      "epoch": 0.00010400390625,
      "step": 17040,
      "training_step_time": 0.42740392684936523
    },
    {
      "epoch": 0.000104010009765625,
      "model_forward_time": 0.11554408073425293,
      "step": 17041
    },
    {
      "epoch": 0.000104010009765625,
      "step": 17041,
      "training_step_time": 0.3909306526184082
    },
    {
      "epoch": 0.00010401611328125,
      "model_forward_time": 0.11461234092712402,
      "step": 17042
    },
    {
      "epoch": 0.00010401611328125,
      "step": 17042,
      "training_step_time": 0.40157628059387207
    },
    {
      "epoch": 0.000104022216796875,
      "model_forward_time": 0.11496090888977051,
      "step": 17043
    },
    {
      "epoch": 0.000104022216796875,
      "step": 17043,
      "training_step_time": 0.4118673801422119
    },
    {
      "epoch": 0.0001040283203125,
      "model_forward_time": 0.1150972843170166,
      "step": 17044
    },
    {
      "epoch": 0.0001040283203125,
      "step": 17044,
      "training_step_time": 0.4225795269012451
    },
    {
      "epoch": 0.000104034423828125,
      "model_forward_time": 0.11473464965820312,
      "step": 17045
    },
    {
      "epoch": 0.000104034423828125,
      "step": 17045,
      "training_step_time": 0.3885626792907715
    },
    {
      "epoch": 0.00010404052734375,
      "model_forward_time": 0.11641073226928711,
      "step": 17046
    },
    {
      "epoch": 0.00010404052734375,
      "step": 17046,
      "training_step_time": 0.4195075035095215
    },
    {
      "epoch": 0.000104046630859375,
      "model_forward_time": 0.11474299430847168,
      "step": 17047
    },
    {
      "epoch": 0.000104046630859375,
      "step": 17047,
      "training_step_time": 0.4425671100616455
    },
    {
      "epoch": 0.000104052734375,
      "model_forward_time": 0.11531424522399902,
      "step": 17048
    },
    {
      "epoch": 0.000104052734375,
      "step": 17048,
      "training_step_time": 0.3651542663574219
    },
    {
      "epoch": 0.000104058837890625,
      "model_forward_time": 0.11487841606140137,
      "step": 17049
    },
    {
      "epoch": 0.000104058837890625,
      "step": 17049,
      "training_step_time": 0.45948028564453125
    },
    {
      "epoch": 0.00010406494140625,
      "grad_norm": 0.12237082421779633,
      "learning_rate": 8.574290975831685e-05,
      "loss": 0.0548,
      "step": 17050
    },
    {
      "epoch": 0.00010406494140625,
      "model_forward_time": 0.11432790756225586,
      "step": 17050
    },
    {
      "epoch": 0.00010406494140625,
      "step": 17050,
      "training_step_time": 0.4722123146057129
    },
    {
      "epoch": 0.000104071044921875,
      "model_forward_time": 0.11425471305847168,
      "step": 17051
    },
    {
      "epoch": 0.000104071044921875,
      "step": 17051,
      "training_step_time": 0.3876156806945801
    },
    {
      "epoch": 0.0001040771484375,
      "model_forward_time": 0.1149134635925293,
      "step": 17052
    },
    {
      "epoch": 0.0001040771484375,
      "step": 17052,
      "training_step_time": 0.44537830352783203
    },
    {
      "epoch": 0.000104083251953125,
      "model_forward_time": 0.11476254463195801,
      "step": 17053
    },
    {
      "epoch": 0.000104083251953125,
      "step": 17053,
      "training_step_time": 0.45372748374938965
    },
    {
      "epoch": 0.00010408935546875,
      "model_forward_time": 0.11459803581237793,
      "step": 17054
    },
    {
      "epoch": 0.00010408935546875,
      "step": 17054,
      "training_step_time": 0.3935818672180176
    },
    {
      "epoch": 0.000104095458984375,
      "model_forward_time": 0.11422491073608398,
      "step": 17055
    },
    {
      "epoch": 0.000104095458984375,
      "step": 17055,
      "training_step_time": 0.3885223865509033
    },
    {
      "epoch": 0.0001041015625,
      "model_forward_time": 0.1153111457824707,
      "step": 17056
    },
    {
      "epoch": 0.0001041015625,
      "step": 17056,
      "training_step_time": 0.4114115238189697
    },
    {
      "epoch": 0.000104107666015625,
      "model_forward_time": 0.1148064136505127,
      "step": 17057
    },
    {
      "epoch": 0.000104107666015625,
      "step": 17057,
      "training_step_time": 0.3944084644317627
    },
    {
      "epoch": 0.00010411376953125,
      "model_forward_time": 0.11493301391601562,
      "step": 17058
    },
    {
      "epoch": 0.00010411376953125,
      "step": 17058,
      "training_step_time": 0.4022371768951416
    },
    {
      "epoch": 0.000104119873046875,
      "model_forward_time": 0.1145944595336914,
      "step": 17059
    },
    {
      "epoch": 0.000104119873046875,
      "step": 17059,
      "training_step_time": 0.6794302463531494
    },
    {
      "epoch": 0.0001041259765625,
      "grad_norm": 0.17162570357322693,
      "learning_rate": 8.572363398164017e-05,
      "loss": 0.0549,
      "step": 17060
    },
    {
      "epoch": 0.0001041259765625,
      "model_forward_time": 0.11494040489196777,
      "step": 17060
    },
    {
      "epoch": 0.0001041259765625,
      "step": 17060,
      "training_step_time": 0.38482189178466797
    },
    {
      "epoch": 0.000104132080078125,
      "model_forward_time": 0.11432242393493652,
      "step": 17061
    },
    {
      "epoch": 0.000104132080078125,
      "step": 17061,
      "training_step_time": 0.39385294914245605
    },
    {
      "epoch": 0.00010413818359375,
      "model_forward_time": 0.11451077461242676,
      "step": 17062
    },
    {
      "epoch": 0.00010413818359375,
      "step": 17062,
      "training_step_time": 0.4282855987548828
    },
    {
      "epoch": 0.000104144287109375,
      "model_forward_time": 0.11490440368652344,
      "step": 17063
    },
    {
      "epoch": 0.000104144287109375,
      "step": 17063,
      "training_step_time": 0.41863083839416504
    },
    {
      "epoch": 0.000104150390625,
      "model_forward_time": 0.11440014839172363,
      "step": 17064
    },
    {
      "epoch": 0.000104150390625,
      "step": 17064,
      "training_step_time": 0.4198455810546875
    },
    {
      "epoch": 0.000104156494140625,
      "model_forward_time": 0.11577749252319336,
      "step": 17065
    },
    {
      "epoch": 0.000104156494140625,
      "step": 17065,
      "training_step_time": 0.4521505832672119
    },
    {
      "epoch": 0.00010416259765625,
      "model_forward_time": 0.11495327949523926,
      "step": 17066
    },
    {
      "epoch": 0.00010416259765625,
      "step": 17066,
      "training_step_time": 0.44632720947265625
    },
    {
      "epoch": 0.000104168701171875,
      "model_forward_time": 0.1153709888458252,
      "step": 17067
    },
    {
      "epoch": 0.000104168701171875,
      "step": 17067,
      "training_step_time": 0.46855616569519043
    },
    {
      "epoch": 0.0001041748046875,
      "model_forward_time": 0.1149294376373291,
      "step": 17068
    },
    {
      "epoch": 0.0001041748046875,
      "step": 17068,
      "training_step_time": 0.38837647438049316
    },
    {
      "epoch": 0.000104180908203125,
      "model_forward_time": 0.11485505104064941,
      "step": 17069
    },
    {
      "epoch": 0.000104180908203125,
      "step": 17069,
      "training_step_time": 0.387176513671875
    },
    {
      "epoch": 0.00010418701171875,
      "grad_norm": 0.15217918157577515,
      "learning_rate": 8.570434735306671e-05,
      "loss": 0.0612,
      "step": 17070
    },
    {
      "epoch": 0.00010418701171875,
      "model_forward_time": 0.11471772193908691,
      "step": 17070
    },
    {
      "epoch": 0.00010418701171875,
      "step": 17070,
      "training_step_time": 0.39975929260253906
    },
    {
      "epoch": 0.000104193115234375,
      "model_forward_time": 0.11493945121765137,
      "step": 17071
    },
    {
      "epoch": 0.000104193115234375,
      "step": 17071,
      "training_step_time": 0.727196216583252
    },
    {
      "epoch": 0.00010419921875,
      "model_forward_time": 0.11509513854980469,
      "step": 17072
    },
    {
      "epoch": 0.00010419921875,
      "step": 17072,
      "training_step_time": 0.38971471786499023
    },
    {
      "epoch": 0.000104205322265625,
      "model_forward_time": 0.11487555503845215,
      "step": 17073
    },
    {
      "epoch": 0.000104205322265625,
      "step": 17073,
      "training_step_time": 0.3864893913269043
    },
    {
      "epoch": 0.00010421142578125,
      "model_forward_time": 0.11580872535705566,
      "step": 17074
    },
    {
      "epoch": 0.00010421142578125,
      "step": 17074,
      "training_step_time": 0.3840301036834717
    },
    {
      "epoch": 0.000104217529296875,
      "model_forward_time": 0.11423993110656738,
      "step": 17075
    },
    {
      "epoch": 0.000104217529296875,
      "step": 17075,
      "training_step_time": 0.3826906681060791
    },
    {
      "epoch": 0.0001042236328125,
      "model_forward_time": 0.11498236656188965,
      "step": 17076
    },
    {
      "epoch": 0.0001042236328125,
      "step": 17076,
      "training_step_time": 0.4648756980895996
    },
    {
      "epoch": 0.000104229736328125,
      "model_forward_time": 0.11463379859924316,
      "step": 17077
    },
    {
      "epoch": 0.000104229736328125,
      "step": 17077,
      "training_step_time": 0.7502622604370117
    },
    {
      "epoch": 0.00010423583984375,
      "model_forward_time": 0.11510968208312988,
      "step": 17078
    },
    {
      "epoch": 0.00010423583984375,
      "step": 17078,
      "training_step_time": 0.40353870391845703
    },
    {
      "epoch": 0.000104241943359375,
      "model_forward_time": 0.11464190483093262,
      "step": 17079
    },
    {
      "epoch": 0.000104241943359375,
      "step": 17079,
      "training_step_time": 0.4476888179779053
    },
    {
      "epoch": 0.000104248046875,
      "grad_norm": 0.1358596235513687,
      "learning_rate": 8.568504987845525e-05,
      "loss": 0.0607,
      "step": 17080
    },
    {
      "epoch": 0.000104248046875,
      "model_forward_time": 0.11598515510559082,
      "step": 17080
    },
    {
      "epoch": 0.000104248046875,
      "step": 17080,
      "training_step_time": 0.40841031074523926
    },
    {
      "epoch": 0.000104254150390625,
      "model_forward_time": 0.11436963081359863,
      "step": 17081
    },
    {
      "epoch": 0.000104254150390625,
      "step": 17081,
      "training_step_time": 0.4745974540710449
    },
    {
      "epoch": 0.00010426025390625,
      "model_forward_time": 0.11465048789978027,
      "step": 17082
    },
    {
      "epoch": 0.00010426025390625,
      "step": 17082,
      "training_step_time": 0.39011096954345703
    },
    {
      "epoch": 0.000104266357421875,
      "model_forward_time": 0.11457991600036621,
      "step": 17083
    },
    {
      "epoch": 0.000104266357421875,
      "step": 17083,
      "training_step_time": 0.3901042938232422
    },
    {
      "epoch": 0.0001042724609375,
      "model_forward_time": 0.11539816856384277,
      "step": 17084
    },
    {
      "epoch": 0.0001042724609375,
      "step": 17084,
      "training_step_time": 0.3828258514404297
    },
    {
      "epoch": 0.000104278564453125,
      "model_forward_time": 0.11471080780029297,
      "step": 17085
    },
    {
      "epoch": 0.000104278564453125,
      "step": 17085,
      "training_step_time": 0.38967084884643555
    },
    {
      "epoch": 0.00010428466796875,
      "model_forward_time": 0.11542940139770508,
      "step": 17086
    },
    {
      "epoch": 0.00010428466796875,
      "step": 17086,
      "training_step_time": 0.38911938667297363
    },
    {
      "epoch": 0.000104290771484375,
      "model_forward_time": 0.11540842056274414,
      "step": 17087
    },
    {
      "epoch": 0.000104290771484375,
      "step": 17087,
      "training_step_time": 0.3938899040222168
    },
    {
      "epoch": 0.000104296875,
      "model_forward_time": 0.11495804786682129,
      "step": 17088
    },
    {
      "epoch": 0.000104296875,
      "step": 17088,
      "training_step_time": 0.3979167938232422
    },
    {
      "epoch": 0.000104302978515625,
      "model_forward_time": 0.11578488349914551,
      "step": 17089
    },
    {
      "epoch": 0.000104302978515625,
      "step": 17089,
      "training_step_time": 0.6464121341705322
    },
    {
      "epoch": 0.00010430908203125,
      "grad_norm": 0.13490723073482513,
      "learning_rate": 8.566574156366784e-05,
      "loss": 0.0511,
      "step": 17090
    },
    {
      "epoch": 0.00010430908203125,
      "model_forward_time": 0.11460256576538086,
      "step": 17090
    },
    {
      "epoch": 0.00010430908203125,
      "step": 17090,
      "training_step_time": 0.3852043151855469
    },
    {
      "epoch": 0.000104315185546875,
      "model_forward_time": 0.11460757255554199,
      "step": 17091
    },
    {
      "epoch": 0.000104315185546875,
      "step": 17091,
      "training_step_time": 0.4709036350250244
    },
    {
      "epoch": 0.0001043212890625,
      "model_forward_time": 0.11492133140563965,
      "step": 17092
    },
    {
      "epoch": 0.0001043212890625,
      "step": 17092,
      "training_step_time": 0.4590158462524414
    },
    {
      "epoch": 0.000104327392578125,
      "model_forward_time": 0.1145169734954834,
      "step": 17093
    },
    {
      "epoch": 0.000104327392578125,
      "step": 17093,
      "training_step_time": 0.4878225326538086
    },
    {
      "epoch": 0.00010433349609375,
      "model_forward_time": 0.11465191841125488,
      "step": 17094
    },
    {
      "epoch": 0.00010433349609375,
      "step": 17094,
      "training_step_time": 0.3920722007751465
    },
    {
      "epoch": 0.000104339599609375,
      "model_forward_time": 0.11468219757080078,
      "step": 17095
    },
    {
      "epoch": 0.000104339599609375,
      "step": 17095,
      "training_step_time": 0.564000129699707
    },
    {
      "epoch": 0.000104345703125,
      "model_forward_time": 0.11490845680236816,
      "step": 17096
    },
    {
      "epoch": 0.000104345703125,
      "step": 17096,
      "training_step_time": 0.3817746639251709
    },
    {
      "epoch": 0.000104351806640625,
      "model_forward_time": 0.11505508422851562,
      "step": 17097
    },
    {
      "epoch": 0.000104351806640625,
      "step": 17097,
      "training_step_time": 0.38008570671081543
    },
    {
      "epoch": 0.00010435791015625,
      "model_forward_time": 0.11507058143615723,
      "step": 17098
    },
    {
      "epoch": 0.00010435791015625,
      "step": 17098,
      "training_step_time": 0.38210558891296387
    },
    {
      "epoch": 0.000104364013671875,
      "model_forward_time": 0.11510276794433594,
      "step": 17099
    },
    {
      "epoch": 0.000104364013671875,
      "step": 17099,
      "training_step_time": 0.380871057510376
    },
    {
      "epoch": 0.0001043701171875,
      "grad_norm": 0.1496427208185196,
      "learning_rate": 8.564642241456986e-05,
      "loss": 0.0564,
      "step": 17100
    },
    {
      "epoch": 0.0001043701171875,
      "model_forward_time": 0.1151285171508789,
      "step": 17100
    },
    {
      "epoch": 0.0001043701171875,
      "step": 17100,
      "training_step_time": 0.40093135833740234
    },
    {
      "epoch": 0.000104376220703125,
      "model_forward_time": 0.1152346134185791,
      "step": 17101
    },
    {
      "epoch": 0.000104376220703125,
      "step": 17101,
      "training_step_time": 0.702049970626831
    },
    {
      "epoch": 0.00010438232421875,
      "model_forward_time": 0.11479854583740234,
      "step": 17102
    },
    {
      "epoch": 0.00010438232421875,
      "step": 17102,
      "training_step_time": 0.39263176918029785
    },
    {
      "epoch": 0.000104388427734375,
      "model_forward_time": 0.11475801467895508,
      "step": 17103
    },
    {
      "epoch": 0.000104388427734375,
      "step": 17103,
      "training_step_time": 0.4014451503753662
    },
    {
      "epoch": 0.00010439453125,
      "model_forward_time": 0.11461305618286133,
      "step": 17104
    },
    {
      "epoch": 0.00010439453125,
      "step": 17104,
      "training_step_time": 0.3880348205566406
    },
    {
      "epoch": 0.000104400634765625,
      "model_forward_time": 0.11484384536743164,
      "step": 17105
    },
    {
      "epoch": 0.000104400634765625,
      "step": 17105,
      "training_step_time": 0.4388613700866699
    },
    {
      "epoch": 0.00010440673828125,
      "model_forward_time": 0.11568450927734375,
      "step": 17106
    },
    {
      "epoch": 0.00010440673828125,
      "step": 17106,
      "training_step_time": 0.4407334327697754
    },
    {
      "epoch": 0.000104412841796875,
      "model_forward_time": 0.1151583194732666,
      "step": 17107
    },
    {
      "epoch": 0.000104412841796875,
      "step": 17107,
      "training_step_time": 0.4751901626586914
    },
    {
      "epoch": 0.0001044189453125,
      "model_forward_time": 0.1150352954864502,
      "step": 17108
    },
    {
      "epoch": 0.0001044189453125,
      "step": 17108,
      "training_step_time": 0.4131934642791748
    },
    {
      "epoch": 0.000104425048828125,
      "model_forward_time": 0.11465144157409668,
      "step": 17109
    },
    {
      "epoch": 0.000104425048828125,
      "step": 17109,
      "training_step_time": 0.42264723777770996
    },
    {
      "epoch": 0.00010443115234375,
      "grad_norm": 0.2302144318819046,
      "learning_rate": 8.562709243702993e-05,
      "loss": 0.0573,
      "step": 17110
    },
    {
      "epoch": 0.00010443115234375,
      "model_forward_time": 0.11542320251464844,
      "step": 17110
    },
    {
      "epoch": 0.00010443115234375,
      "step": 17110,
      "training_step_time": 0.38586950302124023
    },
    {
      "epoch": 0.000104437255859375,
      "model_forward_time": 0.1157078742980957,
      "step": 17111
    },
    {
      "epoch": 0.000104437255859375,
      "step": 17111,
      "training_step_time": 0.3861813545227051
    },
    {
      "epoch": 0.000104443359375,
      "model_forward_time": 0.11499476432800293,
      "step": 17112
    },
    {
      "epoch": 0.000104443359375,
      "step": 17112,
      "training_step_time": 0.4014286994934082
    },
    {
      "epoch": 0.000104449462890625,
      "model_forward_time": 0.11489987373352051,
      "step": 17113
    },
    {
      "epoch": 0.000104449462890625,
      "step": 17113,
      "training_step_time": 0.4896833896636963
    },
    {
      "epoch": 0.00010445556640625,
      "model_forward_time": 0.11581897735595703,
      "step": 17114
    },
    {
      "epoch": 0.00010445556640625,
      "step": 17114,
      "training_step_time": 0.38957977294921875
    },
    {
      "epoch": 0.000104461669921875,
      "model_forward_time": 0.1323833465576172,
      "step": 17115
    },
    {
      "epoch": 0.000104461669921875,
      "step": 17115,
      "training_step_time": 0.3969287872314453
    },
    {
      "epoch": 0.0001044677734375,
      "model_forward_time": 0.11606359481811523,
      "step": 17116
    },
    {
      "epoch": 0.0001044677734375,
      "step": 17116,
      "training_step_time": 0.3941826820373535
    },
    {
      "epoch": 0.000104473876953125,
      "model_forward_time": 0.11608123779296875,
      "step": 17117
    },
    {
      "epoch": 0.000104473876953125,
      "step": 17117,
      "training_step_time": 0.41959667205810547
    },
    {
      "epoch": 0.00010447998046875,
      "model_forward_time": 0.11637520790100098,
      "step": 17118
    },
    {
      "epoch": 0.00010447998046875,
      "step": 17118,
      "training_step_time": 0.3850555419921875
    },
    {
      "epoch": 0.000104486083984375,
      "model_forward_time": 0.11438751220703125,
      "step": 17119
    },
    {
      "epoch": 0.000104486083984375,
      "step": 17119,
      "training_step_time": 0.7101237773895264
    },
    {
      "epoch": 0.0001044921875,
      "grad_norm": 0.20538471639156342,
      "learning_rate": 8.560775163691999e-05,
      "loss": 0.0606,
      "step": 17120
    },
    {
      "epoch": 0.0001044921875,
      "model_forward_time": 0.1157536506652832,
      "step": 17120
    },
    {
      "epoch": 0.0001044921875,
      "step": 17120,
      "training_step_time": 0.5081775188446045
    },
    {
      "epoch": 0.000104498291015625,
      "model_forward_time": 0.11512255668640137,
      "step": 17121
    },
    {
      "epoch": 0.000104498291015625,
      "step": 17121,
      "training_step_time": 0.4693930149078369
    },
    {
      "epoch": 0.00010450439453125,
      "model_forward_time": 0.11386632919311523,
      "step": 17122
    },
    {
      "epoch": 0.00010450439453125,
      "step": 17122,
      "training_step_time": 0.39853882789611816
    },
    {
      "epoch": 0.000104510498046875,
      "model_forward_time": 0.11389875411987305,
      "step": 17123
    },
    {
      "epoch": 0.000104510498046875,
      "step": 17123,
      "training_step_time": 0.41098546981811523
    },
    {
      "epoch": 0.0001045166015625,
      "model_forward_time": 0.11429548263549805,
      "step": 17124
    },
    {
      "epoch": 0.0001045166015625,
      "step": 17124,
      "training_step_time": 0.3839600086212158
    },
    {
      "epoch": 0.000104522705078125,
      "model_forward_time": 0.11505675315856934,
      "step": 17125
    },
    {
      "epoch": 0.000104522705078125,
      "step": 17125,
      "training_step_time": 0.3880128860473633
    },
    {
      "epoch": 0.00010452880859375,
      "model_forward_time": 0.1149144172668457,
      "step": 17126
    },
    {
      "epoch": 0.00010452880859375,
      "step": 17126,
      "training_step_time": 0.39513611793518066
    },
    {
      "epoch": 0.000104534912109375,
      "model_forward_time": 0.11499857902526855,
      "step": 17127
    },
    {
      "epoch": 0.000104534912109375,
      "step": 17127,
      "training_step_time": 0.4015185832977295
    },
    {
      "epoch": 0.000104541015625,
      "model_forward_time": 0.11562752723693848,
      "step": 17128
    },
    {
      "epoch": 0.000104541015625,
      "step": 17128,
      "training_step_time": 0.3824021816253662
    },
    {
      "epoch": 0.000104547119140625,
      "model_forward_time": 0.11531686782836914,
      "step": 17129
    },
    {
      "epoch": 0.000104547119140625,
      "step": 17129,
      "training_step_time": 0.3944852352142334
    },
    {
      "epoch": 0.00010455322265625,
      "grad_norm": 0.20503516495227814,
      "learning_rate": 8.558840002011528e-05,
      "loss": 0.0643,
      "step": 17130
    },
    {
      "epoch": 0.00010455322265625,
      "model_forward_time": 0.11511349678039551,
      "step": 17130
    },
    {
      "epoch": 0.00010455322265625,
      "step": 17130,
      "training_step_time": 0.4057760238647461
    },
    {
      "epoch": 0.000104559326171875,
      "model_forward_time": 0.11506128311157227,
      "step": 17131
    },
    {
      "epoch": 0.000104559326171875,
      "step": 17131,
      "training_step_time": 0.4194774627685547
    },
    {
      "epoch": 0.0001045654296875,
      "model_forward_time": 0.11591935157775879,
      "step": 17132
    },
    {
      "epoch": 0.0001045654296875,
      "step": 17132,
      "training_step_time": 0.407947301864624
    },
    {
      "epoch": 0.000104571533203125,
      "model_forward_time": 0.11517047882080078,
      "step": 17133
    },
    {
      "epoch": 0.000104571533203125,
      "step": 17133,
      "training_step_time": 0.4557666778564453
    },
    {
      "epoch": 0.00010457763671875,
      "model_forward_time": 0.11532759666442871,
      "step": 17134
    },
    {
      "epoch": 0.00010457763671875,
      "step": 17134,
      "training_step_time": 0.4292302131652832
    },
    {
      "epoch": 0.000104583740234375,
      "model_forward_time": 0.11501574516296387,
      "step": 17135
    },
    {
      "epoch": 0.000104583740234375,
      "step": 17135,
      "training_step_time": 0.46541595458984375
    },
    {
      "epoch": 0.00010458984375,
      "model_forward_time": 0.11619114875793457,
      "step": 17136
    },
    {
      "epoch": 0.00010458984375,
      "step": 17136,
      "training_step_time": 0.4924192428588867
    },
    {
      "epoch": 0.000104595947265625,
      "model_forward_time": 0.13330554962158203,
      "step": 17137
    },
    {
      "epoch": 0.000104595947265625,
      "step": 17137,
      "training_step_time": 0.5242154598236084
    },
    {
      "epoch": 0.00010460205078125,
      "model_forward_time": 0.1155092716217041,
      "step": 17138
    },
    {
      "epoch": 0.00010460205078125,
      "step": 17138,
      "training_step_time": 0.4006495475769043
    },
    {
      "epoch": 0.000104608154296875,
      "model_forward_time": 0.11485075950622559,
      "step": 17139
    },
    {
      "epoch": 0.000104608154296875,
      "step": 17139,
      "training_step_time": 0.3983578681945801
    },
    {
      "epoch": 0.0001046142578125,
      "grad_norm": 0.1754632443189621,
      "learning_rate": 8.556903759249428e-05,
      "loss": 0.0577,
      "step": 17140
    },
    {
      "epoch": 0.0001046142578125,
      "model_forward_time": 0.11567234992980957,
      "step": 17140
    },
    {
      "epoch": 0.0001046142578125,
      "step": 17140,
      "training_step_time": 0.40869832038879395
    },
    {
      "epoch": 0.000104620361328125,
      "model_forward_time": 0.1150808334350586,
      "step": 17141
    },
    {
      "epoch": 0.000104620361328125,
      "step": 17141,
      "training_step_time": 0.38532304763793945
    },
    {
      "epoch": 0.00010462646484375,
      "model_forward_time": 0.1155080795288086,
      "step": 17142
    },
    {
      "epoch": 0.00010462646484375,
      "step": 17142,
      "training_step_time": 0.39664578437805176
    },
    {
      "epoch": 0.000104632568359375,
      "model_forward_time": 0.11516618728637695,
      "step": 17143
    },
    {
      "epoch": 0.000104632568359375,
      "step": 17143,
      "training_step_time": 0.39856791496276855
    },
    {
      "epoch": 0.000104638671875,
      "model_forward_time": 0.11497807502746582,
      "step": 17144
    },
    {
      "epoch": 0.000104638671875,
      "step": 17144,
      "training_step_time": 0.3895683288574219
    },
    {
      "epoch": 0.000104644775390625,
      "model_forward_time": 0.11544513702392578,
      "step": 17145
    },
    {
      "epoch": 0.000104644775390625,
      "step": 17145,
      "training_step_time": 0.39550065994262695
    },
    {
      "epoch": 0.00010465087890625,
      "model_forward_time": 0.11538529396057129,
      "step": 17146
    },
    {
      "epoch": 0.00010465087890625,
      "step": 17146,
      "training_step_time": 0.5041749477386475
    },
    {
      "epoch": 0.000104656982421875,
      "model_forward_time": 0.11519670486450195,
      "step": 17147
    },
    {
      "epoch": 0.000104656982421875,
      "step": 17147,
      "training_step_time": 0.4438199996948242
    },
    {
      "epoch": 0.0001046630859375,
      "model_forward_time": 0.11527776718139648,
      "step": 17148
    },
    {
      "epoch": 0.0001046630859375,
      "step": 17148,
      "training_step_time": 0.3631408214569092
    },
    {
      "epoch": 0.000104669189453125,
      "model_forward_time": 0.11553215980529785,
      "step": 17149
    },
    {
      "epoch": 0.000104669189453125,
      "step": 17149,
      "training_step_time": 0.4683375358581543
    },
    {
      "epoch": 0.00010467529296875,
      "grad_norm": 0.17154991626739502,
      "learning_rate": 8.554966435993882e-05,
      "loss": 0.0575,
      "step": 17150
    },
    {
      "epoch": 0.00010467529296875,
      "model_forward_time": 0.1143343448638916,
      "step": 17150
    },
    {
      "epoch": 0.00010467529296875,
      "step": 17150,
      "training_step_time": 0.4744565486907959
    },
    {
      "epoch": 0.000104681396484375,
      "model_forward_time": 0.11481308937072754,
      "step": 17151
    },
    {
      "epoch": 0.000104681396484375,
      "step": 17151,
      "training_step_time": 0.40595293045043945
    },
    {
      "epoch": 0.0001046875,
      "model_forward_time": 0.1147153377532959,
      "step": 17152
    },
    {
      "epoch": 0.0001046875,
      "step": 17152,
      "training_step_time": 0.4540135860443115
    },
    {
      "epoch": 0.000104693603515625,
      "model_forward_time": 0.11500167846679688,
      "step": 17153
    },
    {
      "epoch": 0.000104693603515625,
      "step": 17153,
      "training_step_time": 0.39234185218811035
    },
    {
      "epoch": 0.00010469970703125,
      "model_forward_time": 0.11536002159118652,
      "step": 17154
    },
    {
      "epoch": 0.00010469970703125,
      "step": 17154,
      "training_step_time": 0.38522839546203613
    },
    {
      "epoch": 0.000104705810546875,
      "model_forward_time": 0.11515474319458008,
      "step": 17155
    },
    {
      "epoch": 0.000104705810546875,
      "step": 17155,
      "training_step_time": 0.39102911949157715
    },
    {
      "epoch": 0.0001047119140625,
      "model_forward_time": 0.11586999893188477,
      "step": 17156
    },
    {
      "epoch": 0.0001047119140625,
      "step": 17156,
      "training_step_time": 0.4542675018310547
    },
    {
      "epoch": 0.000104718017578125,
      "model_forward_time": 0.11542749404907227,
      "step": 17157
    },
    {
      "epoch": 0.000104718017578125,
      "step": 17157,
      "training_step_time": 0.3921639919281006
    },
    {
      "epoch": 0.00010472412109375,
      "model_forward_time": 0.1154165267944336,
      "step": 17158
    },
    {
      "epoch": 0.00010472412109375,
      "step": 17158,
      "training_step_time": 0.3927123546600342
    },
    {
      "epoch": 0.000104730224609375,
      "model_forward_time": 0.11515927314758301,
      "step": 17159
    },
    {
      "epoch": 0.000104730224609375,
      "step": 17159,
      "training_step_time": 0.38609981536865234
    },
    {
      "epoch": 0.000104736328125,
      "grad_norm": 0.15167413651943207,
      "learning_rate": 8.553028032833397e-05,
      "loss": 0.0559,
      "step": 17160
    },
    {
      "epoch": 0.000104736328125,
      "model_forward_time": 0.11521673202514648,
      "step": 17160
    },
    {
      "epoch": 0.000104736328125,
      "step": 17160,
      "training_step_time": 0.4236934185028076
    },
    {
      "epoch": 0.000104742431640625,
      "model_forward_time": 0.11508440971374512,
      "step": 17161
    },
    {
      "epoch": 0.000104742431640625,
      "step": 17161,
      "training_step_time": 0.40524864196777344
    },
    {
      "epoch": 0.00010474853515625,
      "model_forward_time": 0.11526799201965332,
      "step": 17162
    },
    {
      "epoch": 0.00010474853515625,
      "step": 17162,
      "training_step_time": 0.5663912296295166
    },
    {
      "epoch": 0.000104754638671875,
      "model_forward_time": 0.11604857444763184,
      "step": 17163
    },
    {
      "epoch": 0.000104754638671875,
      "step": 17163,
      "training_step_time": 0.4620692729949951
    },
    {
      "epoch": 0.0001047607421875,
      "model_forward_time": 0.11496615409851074,
      "step": 17164
    },
    {
      "epoch": 0.0001047607421875,
      "step": 17164,
      "training_step_time": 0.4689147472381592
    },
    {
      "epoch": 0.000104766845703125,
      "model_forward_time": 0.11475062370300293,
      "step": 17165
    },
    {
      "epoch": 0.000104766845703125,
      "step": 17165,
      "training_step_time": 0.3954899311065674
    },
    {
      "epoch": 0.00010477294921875,
      "model_forward_time": 0.11470603942871094,
      "step": 17166
    },
    {
      "epoch": 0.00010477294921875,
      "step": 17166,
      "training_step_time": 0.49962353706359863
    },
    {
      "epoch": 0.000104779052734375,
      "model_forward_time": 0.11434698104858398,
      "step": 17167
    },
    {
      "epoch": 0.000104779052734375,
      "step": 17167,
      "training_step_time": 0.38224363327026367
    },
    {
      "epoch": 0.00010478515625,
      "model_forward_time": 0.11490154266357422,
      "step": 17168
    },
    {
      "epoch": 0.00010478515625,
      "step": 17168,
      "training_step_time": 0.43176841735839844
    },
    {
      "epoch": 0.000104791259765625,
      "model_forward_time": 0.11425089836120605,
      "step": 17169
    },
    {
      "epoch": 0.000104791259765625,
      "step": 17169,
      "training_step_time": 0.3985862731933594
    },
    {
      "epoch": 0.00010479736328125,
      "grad_norm": 0.21018844842910767,
      "learning_rate": 8.551088550356807e-05,
      "loss": 0.0508,
      "step": 17170
    },
    {
      "epoch": 0.00010479736328125,
      "model_forward_time": 0.11545968055725098,
      "step": 17170
    },
    {
      "epoch": 0.00010479736328125,
      "step": 17170,
      "training_step_time": 0.382610559463501
    },
    {
      "epoch": 0.000104803466796875,
      "model_forward_time": 0.1156158447265625,
      "step": 17171
    },
    {
      "epoch": 0.000104803466796875,
      "step": 17171,
      "training_step_time": 0.3937666416168213
    },
    {
      "epoch": 0.0001048095703125,
      "model_forward_time": 0.11534309387207031,
      "step": 17172
    },
    {
      "epoch": 0.0001048095703125,
      "step": 17172,
      "training_step_time": 0.39642977714538574
    },
    {
      "epoch": 0.000104815673828125,
      "model_forward_time": 0.1157841682434082,
      "step": 17173
    },
    {
      "epoch": 0.000104815673828125,
      "step": 17173,
      "training_step_time": 0.4042088985443115
    },
    {
      "epoch": 0.00010482177734375,
      "model_forward_time": 0.11526823043823242,
      "step": 17174
    },
    {
      "epoch": 0.00010482177734375,
      "step": 17174,
      "training_step_time": 0.7745473384857178
    },
    {
      "epoch": 0.000104827880859375,
      "model_forward_time": 0.11433100700378418,
      "step": 17175
    },
    {
      "epoch": 0.000104827880859375,
      "step": 17175,
      "training_step_time": 0.44413065910339355
    },
    {
      "epoch": 0.000104833984375,
      "model_forward_time": 0.11493039131164551,
      "step": 17176
    },
    {
      "epoch": 0.000104833984375,
      "step": 17176,
      "training_step_time": 0.4123423099517822
    },
    {
      "epoch": 0.000104840087890625,
      "model_forward_time": 0.1144413948059082,
      "step": 17177
    },
    {
      "epoch": 0.000104840087890625,
      "step": 17177,
      "training_step_time": 0.4322340488433838
    },
    {
      "epoch": 0.00010484619140625,
      "model_forward_time": 0.11508893966674805,
      "step": 17178
    },
    {
      "epoch": 0.00010484619140625,
      "step": 17178,
      "training_step_time": 0.40889763832092285
    },
    {
      "epoch": 0.000104852294921875,
      "model_forward_time": 0.11463046073913574,
      "step": 17179
    },
    {
      "epoch": 0.000104852294921875,
      "step": 17179,
      "training_step_time": 0.4118335247039795
    },
    {
      "epoch": 0.0001048583984375,
      "grad_norm": 0.1832515448331833,
      "learning_rate": 8.549147989153276e-05,
      "loss": 0.0572,
      "step": 17180
    },
    {
      "epoch": 0.0001048583984375,
      "model_forward_time": 0.11539769172668457,
      "step": 17180
    },
    {
      "epoch": 0.0001048583984375,
      "step": 17180,
      "training_step_time": 0.6566827297210693
    },
    {
      "epoch": 0.000104864501953125,
      "model_forward_time": 0.11415505409240723,
      "step": 17181
    },
    {
      "epoch": 0.000104864501953125,
      "step": 17181,
      "training_step_time": 0.39315247535705566
    },
    {
      "epoch": 0.00010487060546875,
      "model_forward_time": 0.11447548866271973,
      "step": 17182
    },
    {
      "epoch": 0.00010487060546875,
      "step": 17182,
      "training_step_time": 0.3892090320587158
    },
    {
      "epoch": 0.000104876708984375,
      "model_forward_time": 0.11564970016479492,
      "step": 17183
    },
    {
      "epoch": 0.000104876708984375,
      "step": 17183,
      "training_step_time": 0.3852698802947998
    },
    {
      "epoch": 0.0001048828125,
      "model_forward_time": 0.1149139404296875,
      "step": 17184
    },
    {
      "epoch": 0.0001048828125,
      "step": 17184,
      "training_step_time": 0.38921618461608887
    },
    {
      "epoch": 0.000104888916015625,
      "model_forward_time": 0.11422133445739746,
      "step": 17185
    },
    {
      "epoch": 0.000104888916015625,
      "step": 17185,
      "training_step_time": 0.39485716819763184
    },
    {
      "epoch": 0.00010489501953125,
      "model_forward_time": 0.1148836612701416,
      "step": 17186
    },
    {
      "epoch": 0.00010489501953125,
      "step": 17186,
      "training_step_time": 0.660132646560669
    },
    {
      "epoch": 0.000104901123046875,
      "model_forward_time": 0.11478543281555176,
      "step": 17187
    },
    {
      "epoch": 0.000104901123046875,
      "step": 17187,
      "training_step_time": 0.41866445541381836
    },
    {
      "epoch": 0.0001049072265625,
      "model_forward_time": 0.11478853225708008,
      "step": 17188
    },
    {
      "epoch": 0.0001049072265625,
      "step": 17188,
      "training_step_time": 0.43593311309814453
    },
    {
      "epoch": 0.000104913330078125,
      "model_forward_time": 0.11477851867675781,
      "step": 17189
    },
    {
      "epoch": 0.000104913330078125,
      "step": 17189,
      "training_step_time": 0.4384021759033203
    },
    {
      "epoch": 0.00010491943359375,
      "grad_norm": 0.16713447868824005,
      "learning_rate": 8.547206349812298e-05,
      "loss": 0.0521,
      "step": 17190
    },
    {
      "epoch": 0.00010491943359375,
      "model_forward_time": 0.11499834060668945,
      "step": 17190
    },
    {
      "epoch": 0.00010491943359375,
      "step": 17190,
      "training_step_time": 0.39403653144836426
    },
    {
      "epoch": 0.000104925537109375,
      "model_forward_time": 0.11620593070983887,
      "step": 17191
    },
    {
      "epoch": 0.000104925537109375,
      "step": 17191,
      "training_step_time": 0.4172937870025635
    },
    {
      "epoch": 0.000104931640625,
      "model_forward_time": 0.11572003364562988,
      "step": 17192
    },
    {
      "epoch": 0.000104931640625,
      "step": 17192,
      "training_step_time": 0.6001377105712891
    },
    {
      "epoch": 0.000104937744140625,
      "model_forward_time": 0.11541914939880371,
      "step": 17193
    },
    {
      "epoch": 0.000104937744140625,
      "step": 17193,
      "training_step_time": 0.4380500316619873
    },
    {
      "epoch": 0.00010494384765625,
      "model_forward_time": 0.11496376991271973,
      "step": 17194
    },
    {
      "epoch": 0.00010494384765625,
      "step": 17194,
      "training_step_time": 0.38658928871154785
    },
    {
      "epoch": 0.000104949951171875,
      "model_forward_time": 0.11464309692382812,
      "step": 17195
    },
    {
      "epoch": 0.000104949951171875,
      "step": 17195,
      "training_step_time": 0.3921542167663574
    },
    {
      "epoch": 0.0001049560546875,
      "model_forward_time": 0.1147315502166748,
      "step": 17196
    },
    {
      "epoch": 0.0001049560546875,
      "step": 17196,
      "training_step_time": 0.40332674980163574
    },
    {
      "epoch": 0.000104962158203125,
      "model_forward_time": 0.11427760124206543,
      "step": 17197
    },
    {
      "epoch": 0.000104962158203125,
      "step": 17197,
      "training_step_time": 0.3960096836090088
    },
    {
      "epoch": 0.00010496826171875,
      "model_forward_time": 0.11545586585998535,
      "step": 17198
    },
    {
      "epoch": 0.00010496826171875,
      "step": 17198,
      "training_step_time": 0.5530977249145508
    },
    {
      "epoch": 0.000104974365234375,
      "model_forward_time": 0.11484169960021973,
      "step": 17199
    },
    {
      "epoch": 0.000104974365234375,
      "step": 17199,
      "training_step_time": 0.4491856098175049
    },
    {
      "epoch": 0.00010498046875,
      "grad_norm": 0.1891174614429474,
      "learning_rate": 8.545263632923687e-05,
      "loss": 0.0551,
      "step": 17200
    },
    {
      "epoch": 0.00010498046875,
      "model_forward_time": 0.11496424674987793,
      "step": 17200
    },
    {
      "epoch": 0.00010498046875,
      "step": 17200,
      "training_step_time": 0.3882434368133545
    },
    {
      "epoch": 0.000104986572265625,
      "model_forward_time": 0.11491680145263672,
      "step": 17201
    },
    {
      "epoch": 0.000104986572265625,
      "step": 17201,
      "training_step_time": 0.39676356315612793
    },
    {
      "epoch": 0.00010499267578125,
      "model_forward_time": 0.11531281471252441,
      "step": 17202
    },
    {
      "epoch": 0.00010499267578125,
      "step": 17202,
      "training_step_time": 0.4727649688720703
    },
    {
      "epoch": 0.000104998779296875,
      "model_forward_time": 0.11435246467590332,
      "step": 17203
    },
    {
      "epoch": 0.000104998779296875,
      "step": 17203,
      "training_step_time": 0.4380066394805908
    },
    {
      "epoch": 0.0001050048828125,
      "model_forward_time": 0.1151580810546875,
      "step": 17204
    },
    {
      "epoch": 0.0001050048828125,
      "step": 17204,
      "training_step_time": 0.630042314529419
    },
    {
      "epoch": 0.000105010986328125,
      "model_forward_time": 0.11466217041015625,
      "step": 17205
    },
    {
      "epoch": 0.000105010986328125,
      "step": 17205,
      "training_step_time": 0.4401404857635498
    },
    {
      "epoch": 0.00010501708984375,
      "model_forward_time": 0.11474204063415527,
      "step": 17206
    },
    {
      "epoch": 0.00010501708984375,
      "step": 17206,
      "training_step_time": 0.4451587200164795
    },
    {
      "epoch": 0.000105023193359375,
      "model_forward_time": 0.11419296264648438,
      "step": 17207
    },
    {
      "epoch": 0.000105023193359375,
      "step": 17207,
      "training_step_time": 0.41573309898376465
    },
    {
      "epoch": 0.000105029296875,
      "model_forward_time": 0.11481595039367676,
      "step": 17208
    },
    {
      "epoch": 0.000105029296875,
      "step": 17208,
      "training_step_time": 0.3889048099517822
    },
    {
      "epoch": 0.000105035400390625,
      "model_forward_time": 0.114593505859375,
      "step": 17209
    },
    {
      "epoch": 0.000105035400390625,
      "step": 17209,
      "training_step_time": 0.39043354988098145
    },
    {
      "epoch": 0.00010504150390625,
      "grad_norm": 0.11770551651716232,
      "learning_rate": 8.543319839077593e-05,
      "loss": 0.0574,
      "step": 17210
    },
    {
      "epoch": 0.00010504150390625,
      "model_forward_time": 0.11517572402954102,
      "step": 17210
    },
    {
      "epoch": 0.00010504150390625,
      "step": 17210,
      "training_step_time": 0.592233419418335
    },
    {
      "epoch": 0.000105047607421875,
      "model_forward_time": 0.11449265480041504,
      "step": 17211
    },
    {
      "epoch": 0.000105047607421875,
      "step": 17211,
      "training_step_time": 0.4115722179412842
    },
    {
      "epoch": 0.0001050537109375,
      "model_forward_time": 0.11450028419494629,
      "step": 17212
    },
    {
      "epoch": 0.0001050537109375,
      "step": 17212,
      "training_step_time": 0.4172811508178711
    },
    {
      "epoch": 0.000105059814453125,
      "model_forward_time": 0.11442708969116211,
      "step": 17213
    },
    {
      "epoch": 0.000105059814453125,
      "step": 17213,
      "training_step_time": 0.39363956451416016
    },
    {
      "epoch": 0.00010506591796875,
      "model_forward_time": 0.11609721183776855,
      "step": 17214
    },
    {
      "epoch": 0.00010506591796875,
      "step": 17214,
      "training_step_time": 0.4116952419281006
    },
    {
      "epoch": 0.000105072021484375,
      "model_forward_time": 0.1149911880493164,
      "step": 17215
    },
    {
      "epoch": 0.000105072021484375,
      "step": 17215,
      "training_step_time": 0.40712594985961914
    },
    {
      "epoch": 0.000105078125,
      "model_forward_time": 0.11475801467895508,
      "step": 17216
    },
    {
      "epoch": 0.000105078125,
      "step": 17216,
      "training_step_time": 0.5843155384063721
    },
    {
      "epoch": 0.000105084228515625,
      "model_forward_time": 0.11453580856323242,
      "step": 17217
    },
    {
      "epoch": 0.000105084228515625,
      "step": 17217,
      "training_step_time": 0.3960909843444824
    },
    {
      "epoch": 0.00010509033203125,
      "model_forward_time": 0.11511087417602539,
      "step": 17218
    },
    {
      "epoch": 0.00010509033203125,
      "step": 17218,
      "training_step_time": 0.3863348960876465
    },
    {
      "epoch": 0.000105096435546875,
      "model_forward_time": 0.11498737335205078,
      "step": 17219
    },
    {
      "epoch": 0.000105096435546875,
      "step": 17219,
      "training_step_time": 0.41103219985961914
    },
    {
      "epoch": 0.0001051025390625,
      "grad_norm": 0.16357654333114624,
      "learning_rate": 8.541374968864487e-05,
      "loss": 0.0544,
      "step": 17220
    },
    {
      "epoch": 0.0001051025390625,
      "model_forward_time": 0.11536002159118652,
      "step": 17220
    },
    {
      "epoch": 0.0001051025390625,
      "step": 17220,
      "training_step_time": 0.4361233711242676
    },
    {
      "epoch": 0.000105108642578125,
      "model_forward_time": 0.11506772041320801,
      "step": 17221
    },
    {
      "epoch": 0.000105108642578125,
      "step": 17221,
      "training_step_time": 0.49068284034729004
    },
    {
      "epoch": 0.00010511474609375,
      "model_forward_time": 0.11525321006774902,
      "step": 17222
    },
    {
      "epoch": 0.00010511474609375,
      "step": 17222,
      "training_step_time": 0.5130572319030762
    },
    {
      "epoch": 0.000105120849609375,
      "model_forward_time": 0.11451864242553711,
      "step": 17223
    },
    {
      "epoch": 0.000105120849609375,
      "step": 17223,
      "training_step_time": 0.39418458938598633
    },
    {
      "epoch": 0.000105126953125,
      "model_forward_time": 0.11483359336853027,
      "step": 17224
    },
    {
      "epoch": 0.000105126953125,
      "step": 17224,
      "training_step_time": 0.41322946548461914
    },
    {
      "epoch": 0.000105133056640625,
      "model_forward_time": 0.11485743522644043,
      "step": 17225
    },
    {
      "epoch": 0.000105133056640625,
      "step": 17225,
      "training_step_time": 0.4345371723175049
    },
    {
      "epoch": 0.00010513916015625,
      "model_forward_time": 0.11491012573242188,
      "step": 17226
    },
    {
      "epoch": 0.00010513916015625,
      "step": 17226,
      "training_step_time": 0.40369367599487305
    },
    {
      "epoch": 0.000105145263671875,
      "model_forward_time": 0.11465764045715332,
      "step": 17227
    },
    {
      "epoch": 0.000105145263671875,
      "step": 17227,
      "training_step_time": 0.38954973220825195
    },
    {
      "epoch": 0.0001051513671875,
      "model_forward_time": 0.11526608467102051,
      "step": 17228
    },
    {
      "epoch": 0.0001051513671875,
      "step": 17228,
      "training_step_time": 0.6579768657684326
    },
    {
      "epoch": 0.000105157470703125,
      "model_forward_time": 0.11546492576599121,
      "step": 17229
    },
    {
      "epoch": 0.000105157470703125,
      "step": 17229,
      "training_step_time": 0.4774324893951416
    },
    {
      "epoch": 0.00010516357421875,
      "grad_norm": 0.13932181894779205,
      "learning_rate": 8.539429022875169e-05,
      "loss": 0.0526,
      "step": 17230
    },
    {
      "epoch": 0.00010516357421875,
      "model_forward_time": 0.11455631256103516,
      "step": 17230
    },
    {
      "epoch": 0.00010516357421875,
      "step": 17230,
      "training_step_time": 0.41060733795166016
    },
    {
      "epoch": 0.000105169677734375,
      "model_forward_time": 0.11424112319946289,
      "step": 17231
    },
    {
      "epoch": 0.000105169677734375,
      "step": 17231,
      "training_step_time": 0.386521577835083
    },
    {
      "epoch": 0.00010517578125,
      "model_forward_time": 0.11493492126464844,
      "step": 17232
    },
    {
      "epoch": 0.00010517578125,
      "step": 17232,
      "training_step_time": 0.36504101753234863
    },
    {
      "epoch": 0.000105181884765625,
      "model_forward_time": 0.11568665504455566,
      "step": 17233
    },
    {
      "epoch": 0.000105181884765625,
      "step": 17233,
      "training_step_time": 0.44326114654541016
    },
    {
      "epoch": 0.00010518798828125,
      "model_forward_time": 0.11507153511047363,
      "step": 17234
    },
    {
      "epoch": 0.00010518798828125,
      "step": 17234,
      "training_step_time": 0.47162342071533203
    },
    {
      "epoch": 0.000105194091796875,
      "model_forward_time": 0.11458849906921387,
      "step": 17235
    },
    {
      "epoch": 0.000105194091796875,
      "step": 17235,
      "training_step_time": 0.4373281002044678
    },
    {
      "epoch": 0.0001052001953125,
      "model_forward_time": 0.11472463607788086,
      "step": 17236
    },
    {
      "epoch": 0.0001052001953125,
      "step": 17236,
      "training_step_time": 0.38457393646240234
    },
    {
      "epoch": 0.000105206298828125,
      "model_forward_time": 0.11494898796081543,
      "step": 17237
    },
    {
      "epoch": 0.000105206298828125,
      "step": 17237,
      "training_step_time": 0.41638922691345215
    },
    {
      "epoch": 0.00010521240234375,
      "model_forward_time": 0.11566042900085449,
      "step": 17238
    },
    {
      "epoch": 0.00010521240234375,
      "step": 17238,
      "training_step_time": 0.38900017738342285
    },
    {
      "epoch": 0.000105218505859375,
      "model_forward_time": 0.1153416633605957,
      "step": 17239
    },
    {
      "epoch": 0.000105218505859375,
      "step": 17239,
      "training_step_time": 0.3931155204772949
    },
    {
      "epoch": 0.000105224609375,
      "grad_norm": 0.1741252839565277,
      "learning_rate": 8.537482001700769e-05,
      "loss": 0.0537,
      "step": 17240
    },
    {
      "epoch": 0.000105224609375,
      "model_forward_time": 0.11476469039916992,
      "step": 17240
    },
    {
      "epoch": 0.000105224609375,
      "step": 17240,
      "training_step_time": 0.40236997604370117
    },
    {
      "epoch": 0.000105230712890625,
      "model_forward_time": 0.1149601936340332,
      "step": 17241
    },
    {
      "epoch": 0.000105230712890625,
      "step": 17241,
      "training_step_time": 0.39911961555480957
    },
    {
      "epoch": 0.00010523681640625,
      "model_forward_time": 0.11596441268920898,
      "step": 17242
    },
    {
      "epoch": 0.00010523681640625,
      "step": 17242,
      "training_step_time": 0.39814019203186035
    },
    {
      "epoch": 0.000105242919921875,
      "model_forward_time": 0.11524009704589844,
      "step": 17243
    },
    {
      "epoch": 0.000105242919921875,
      "step": 17243,
      "training_step_time": 0.49969053268432617
    },
    {
      "epoch": 0.0001052490234375,
      "model_forward_time": 0.11495161056518555,
      "step": 17244
    },
    {
      "epoch": 0.0001052490234375,
      "step": 17244,
      "training_step_time": 0.4529843330383301
    },
    {
      "epoch": 0.000105255126953125,
      "model_forward_time": 0.11494755744934082,
      "step": 17245
    },
    {
      "epoch": 0.000105255126953125,
      "step": 17245,
      "training_step_time": 0.4151747226715088
    },
    {
      "epoch": 0.00010526123046875,
      "model_forward_time": 0.11543512344360352,
      "step": 17246
    },
    {
      "epoch": 0.00010526123046875,
      "step": 17246,
      "training_step_time": 0.5187528133392334
    },
    {
      "epoch": 0.000105267333984375,
      "model_forward_time": 0.11539411544799805,
      "step": 17247
    },
    {
      "epoch": 0.000105267333984375,
      "step": 17247,
      "training_step_time": 0.41764378547668457
    },
    {
      "epoch": 0.0001052734375,
      "model_forward_time": 0.11497807502746582,
      "step": 17248
    },
    {
      "epoch": 0.0001052734375,
      "step": 17248,
      "training_step_time": 0.4657325744628906
    },
    {
      "epoch": 0.000105279541015625,
      "model_forward_time": 0.11506867408752441,
      "step": 17249
    },
    {
      "epoch": 0.000105279541015625,
      "step": 17249,
      "training_step_time": 0.4015803337097168
    },
    {
      "epoch": 0.00010528564453125,
      "grad_norm": 0.1609690934419632,
      "learning_rate": 8.535533905932738e-05,
      "loss": 0.0556,
      "step": 17250
    },
    {
      "epoch": 0.00010528564453125,
      "model_forward_time": 0.11498737335205078,
      "step": 17250
    },
    {
      "epoch": 0.00010528564453125,
      "step": 17250,
      "training_step_time": 0.41828131675720215
    },
    {
      "epoch": 0.000105291748046875,
      "model_forward_time": 0.11434197425842285,
      "step": 17251
    },
    {
      "epoch": 0.000105291748046875,
      "step": 17251,
      "training_step_time": 0.3988504409790039
    },
    {
      "epoch": 0.0001052978515625,
      "model_forward_time": 0.11622071266174316,
      "step": 17252
    },
    {
      "epoch": 0.0001052978515625,
      "step": 17252,
      "training_step_time": 0.5264706611633301
    },
    {
      "epoch": 0.000105303955078125,
      "model_forward_time": 0.1148824691772461,
      "step": 17253
    },
    {
      "epoch": 0.000105303955078125,
      "step": 17253,
      "training_step_time": 0.3973426818847656
    },
    {
      "epoch": 0.00010531005859375,
      "model_forward_time": 0.11469697952270508,
      "step": 17254
    },
    {
      "epoch": 0.00010531005859375,
      "step": 17254,
      "training_step_time": 0.39164209365844727
    },
    {
      "epoch": 0.000105316162109375,
      "model_forward_time": 0.11498904228210449,
      "step": 17255
    },
    {
      "epoch": 0.000105316162109375,
      "step": 17255,
      "training_step_time": 0.39374566078186035
    },
    {
      "epoch": 0.000105322265625,
      "model_forward_time": 0.11570620536804199,
      "step": 17256
    },
    {
      "epoch": 0.000105322265625,
      "step": 17256,
      "training_step_time": 0.395538330078125
    },
    {
      "epoch": 0.000105328369140625,
      "model_forward_time": 0.11492180824279785,
      "step": 17257
    },
    {
      "epoch": 0.000105328369140625,
      "step": 17257,
      "training_step_time": 0.4616072177886963
    },
    {
      "epoch": 0.00010533447265625,
      "model_forward_time": 0.11514759063720703,
      "step": 17258
    },
    {
      "epoch": 0.00010533447265625,
      "step": 17258,
      "training_step_time": 0.5467629432678223
    },
    {
      "epoch": 0.000105340576171875,
      "model_forward_time": 0.11483407020568848,
      "step": 17259
    },
    {
      "epoch": 0.000105340576171875,
      "step": 17259,
      "training_step_time": 0.41846370697021484
    },
    {
      "epoch": 0.0001053466796875,
      "grad_norm": 0.1679307222366333,
      "learning_rate": 8.533584736162857e-05,
      "loss": 0.056,
      "step": 17260
    },
    {
      "epoch": 0.0001053466796875,
      "model_forward_time": 0.11449050903320312,
      "step": 17260
    },
    {
      "epoch": 0.0001053466796875,
      "step": 17260,
      "training_step_time": 0.4556405544281006
    },
    {
      "epoch": 0.000105352783203125,
      "model_forward_time": 0.11475014686584473,
      "step": 17261
    },
    {
      "epoch": 0.000105352783203125,
      "step": 17261,
      "training_step_time": 0.4509396553039551
    },
    {
      "epoch": 0.00010535888671875,
      "model_forward_time": 0.11498498916625977,
      "step": 17262
    },
    {
      "epoch": 0.00010535888671875,
      "step": 17262,
      "training_step_time": 0.4602208137512207
    },
    {
      "epoch": 0.000105364990234375,
      "model_forward_time": 0.11458086967468262,
      "step": 17263
    },
    {
      "epoch": 0.000105364990234375,
      "step": 17263,
      "training_step_time": 0.4537372589111328
    },
    {
      "epoch": 0.00010537109375,
      "model_forward_time": 0.11467194557189941,
      "step": 17264
    },
    {
      "epoch": 0.00010537109375,
      "step": 17264,
      "training_step_time": 0.45120906829833984
    },
    {
      "epoch": 0.000105377197265625,
      "model_forward_time": 0.11611628532409668,
      "step": 17265
    },
    {
      "epoch": 0.000105377197265625,
      "step": 17265,
      "training_step_time": 0.3894233703613281
    },
    {
      "epoch": 0.00010538330078125,
      "model_forward_time": 0.1146240234375,
      "step": 17266
    },
    {
      "epoch": 0.00010538330078125,
      "step": 17266,
      "training_step_time": 0.3830685615539551
    },
    {
      "epoch": 0.000105389404296875,
      "model_forward_time": 0.11507034301757812,
      "step": 17267
    },
    {
      "epoch": 0.000105389404296875,
      "step": 17267,
      "training_step_time": 0.39638328552246094
    },
    {
      "epoch": 0.0001053955078125,
      "model_forward_time": 0.1153104305267334,
      "step": 17268
    },
    {
      "epoch": 0.0001053955078125,
      "step": 17268,
      "training_step_time": 0.3968849182128906
    },
    {
      "epoch": 0.000105401611328125,
      "model_forward_time": 0.11506867408752441,
      "step": 17269
    },
    {
      "epoch": 0.000105401611328125,
      "step": 17269,
      "training_step_time": 0.3985257148742676
    },
    {
      "epoch": 0.00010540771484375,
      "grad_norm": 0.21940462291240692,
      "learning_rate": 8.531634492983232e-05,
      "loss": 0.0571,
      "step": 17270
    },
    {
      "epoch": 0.00010540771484375,
      "model_forward_time": 0.11666321754455566,
      "step": 17270
    },
    {
      "epoch": 0.00010540771484375,
      "step": 17270,
      "training_step_time": 0.4897646903991699
    },
    {
      "epoch": 0.000105413818359375,
      "model_forward_time": 0.11460256576538086,
      "step": 17271
    },
    {
      "epoch": 0.000105413818359375,
      "step": 17271,
      "training_step_time": 0.43898510932922363
    },
    {
      "epoch": 0.000105419921875,
      "model_forward_time": 0.11490416526794434,
      "step": 17272
    },
    {
      "epoch": 0.000105419921875,
      "step": 17272,
      "training_step_time": 0.40083980560302734
    },
    {
      "epoch": 0.000105426025390625,
      "model_forward_time": 0.11463618278503418,
      "step": 17273
    },
    {
      "epoch": 0.000105426025390625,
      "step": 17273,
      "training_step_time": 0.3980522155761719
    },
    {
      "epoch": 0.00010543212890625,
      "model_forward_time": 0.11551070213317871,
      "step": 17274
    },
    {
      "epoch": 0.00010543212890625,
      "step": 17274,
      "training_step_time": 0.46869921684265137
    },
    {
      "epoch": 0.000105438232421875,
      "model_forward_time": 0.11594510078430176,
      "step": 17275
    },
    {
      "epoch": 0.000105438232421875,
      "step": 17275,
      "training_step_time": 0.4020540714263916
    },
    {
      "epoch": 0.0001054443359375,
      "model_forward_time": 0.11509275436401367,
      "step": 17276
    },
    {
      "epoch": 0.0001054443359375,
      "step": 17276,
      "training_step_time": 0.5382318496704102
    },
    {
      "epoch": 0.000105450439453125,
      "model_forward_time": 0.11456775665283203,
      "step": 17277
    },
    {
      "epoch": 0.000105450439453125,
      "step": 17277,
      "training_step_time": 0.46625208854675293
    },
    {
      "epoch": 0.00010545654296875,
      "model_forward_time": 0.11511945724487305,
      "step": 17278
    },
    {
      "epoch": 0.00010545654296875,
      "step": 17278,
      "training_step_time": 0.38941192626953125
    },
    {
      "epoch": 0.000105462646484375,
      "model_forward_time": 0.1141660213470459,
      "step": 17279
    },
    {
      "epoch": 0.000105462646484375,
      "step": 17279,
      "training_step_time": 0.3910682201385498
    },
    {
      "epoch": 0.00010546875,
      "grad_norm": 0.20658761262893677,
      "learning_rate": 8.529683176986295e-05,
      "loss": 0.0581,
      "step": 17280
    },
    {
      "epoch": 0.00010546875,
      "model_forward_time": 0.11468148231506348,
      "step": 17280
    },
    {
      "epoch": 0.00010546875,
      "step": 17280,
      "training_step_time": 0.3903210163116455
    },
    {
      "epoch": 0.000105474853515625,
      "model_forward_time": 0.11539983749389648,
      "step": 17281
    },
    {
      "epoch": 0.000105474853515625,
      "step": 17281,
      "training_step_time": 0.3939182758331299
    },
    {
      "epoch": 0.00010548095703125,
      "model_forward_time": 0.11443948745727539,
      "step": 17282
    },
    {
      "epoch": 0.00010548095703125,
      "step": 17282,
      "training_step_time": 0.5534822940826416
    },
    {
      "epoch": 0.000105487060546875,
      "model_forward_time": 0.11512231826782227,
      "step": 17283
    },
    {
      "epoch": 0.000105487060546875,
      "step": 17283,
      "training_step_time": 0.38838982582092285
    },
    {
      "epoch": 0.0001054931640625,
      "model_forward_time": 0.11477899551391602,
      "step": 17284
    },
    {
      "epoch": 0.0001054931640625,
      "step": 17284,
      "training_step_time": 0.3893728256225586
    },
    {
      "epoch": 0.000105499267578125,
      "model_forward_time": 0.11417555809020996,
      "step": 17285
    },
    {
      "epoch": 0.000105499267578125,
      "step": 17285,
      "training_step_time": 0.39124345779418945
    },
    {
      "epoch": 0.00010550537109375,
      "model_forward_time": 0.11513495445251465,
      "step": 17286
    },
    {
      "epoch": 0.00010550537109375,
      "step": 17286,
      "training_step_time": 0.49412012100219727
    },
    {
      "epoch": 0.000105511474609375,
      "model_forward_time": 0.11511111259460449,
      "step": 17287
    },
    {
      "epoch": 0.000105511474609375,
      "step": 17287,
      "training_step_time": 0.4447019100189209
    },
    {
      "epoch": 0.000105517578125,
      "model_forward_time": 0.11420726776123047,
      "step": 17288
    },
    {
      "epoch": 0.000105517578125,
      "step": 17288,
      "training_step_time": 0.4807775020599365
    },
    {
      "epoch": 0.000105523681640625,
      "model_forward_time": 0.11443805694580078,
      "step": 17289
    },
    {
      "epoch": 0.000105523681640625,
      "step": 17289,
      "training_step_time": 0.3981635570526123
    },
    {
      "epoch": 0.00010552978515625,
      "grad_norm": 0.16646552085876465,
      "learning_rate": 8.527730788764805e-05,
      "loss": 0.0533,
      "step": 17290
    },
    {
      "epoch": 0.00010552978515625,
      "model_forward_time": 0.11453723907470703,
      "step": 17290
    },
    {
      "epoch": 0.00010552978515625,
      "step": 17290,
      "training_step_time": 0.4141867160797119
    },
    {
      "epoch": 0.000105535888671875,
      "model_forward_time": 0.1142275333404541,
      "step": 17291
    },
    {
      "epoch": 0.000105535888671875,
      "step": 17291,
      "training_step_time": 0.4581422805786133
    },
    {
      "epoch": 0.0001055419921875,
      "model_forward_time": 0.11498165130615234,
      "step": 17292
    },
    {
      "epoch": 0.0001055419921875,
      "step": 17292,
      "training_step_time": 0.399275541305542
    },
    {
      "epoch": 0.000105548095703125,
      "model_forward_time": 0.11420917510986328,
      "step": 17293
    },
    {
      "epoch": 0.000105548095703125,
      "step": 17293,
      "training_step_time": 0.42867493629455566
    },
    {
      "epoch": 0.00010555419921875,
      "model_forward_time": 0.11491560935974121,
      "step": 17294
    },
    {
      "epoch": 0.00010555419921875,
      "step": 17294,
      "training_step_time": 0.5355448722839355
    },
    {
      "epoch": 0.000105560302734375,
      "model_forward_time": 0.11534476280212402,
      "step": 17295
    },
    {
      "epoch": 0.000105560302734375,
      "step": 17295,
      "training_step_time": 0.399350643157959
    },
    {
      "epoch": 0.00010556640625,
      "model_forward_time": 0.11479544639587402,
      "step": 17296
    },
    {
      "epoch": 0.00010556640625,
      "step": 17296,
      "training_step_time": 0.3917961120605469
    },
    {
      "epoch": 0.000105572509765625,
      "model_forward_time": 0.11505508422851562,
      "step": 17297
    },
    {
      "epoch": 0.000105572509765625,
      "step": 17297,
      "training_step_time": 0.38597989082336426
    },
    {
      "epoch": 0.00010557861328125,
      "model_forward_time": 0.11518478393554688,
      "step": 17298
    },
    {
      "epoch": 0.00010557861328125,
      "step": 17298,
      "training_step_time": 0.3908243179321289
    },
    {
      "epoch": 0.000105584716796875,
      "model_forward_time": 0.11560797691345215,
      "step": 17299
    },
    {
      "epoch": 0.000105584716796875,
      "step": 17299,
      "training_step_time": 0.3953373432159424
    },
    {
      "epoch": 0.0001055908203125,
      "grad_norm": 0.13711941242218018,
      "learning_rate": 8.525777328911846e-05,
      "loss": 0.0603,
      "step": 17300
    },
    {
      "epoch": 0.0001055908203125,
      "model_forward_time": 0.11532425880432129,
      "step": 17300
    },
    {
      "epoch": 0.0001055908203125,
      "step": 17300,
      "training_step_time": 0.7049248218536377
    },
    {
      "epoch": 0.000105596923828125,
      "model_forward_time": 0.11559939384460449,
      "step": 17301
    },
    {
      "epoch": 0.000105596923828125,
      "step": 17301,
      "training_step_time": 0.43842291831970215
    },
    {
      "epoch": 0.00010560302734375,
      "model_forward_time": 0.11511373519897461,
      "step": 17302
    },
    {
      "epoch": 0.00010560302734375,
      "step": 17302,
      "training_step_time": 0.4975290298461914
    },
    {
      "epoch": 0.000105609130859375,
      "model_forward_time": 0.11416983604431152,
      "step": 17303
    },
    {
      "epoch": 0.000105609130859375,
      "step": 17303,
      "training_step_time": 0.44821739196777344
    },
    {
      "epoch": 0.000105615234375,
      "model_forward_time": 0.1156005859375,
      "step": 17304
    },
    {
      "epoch": 0.000105615234375,
      "step": 17304,
      "training_step_time": 0.4827861785888672
    },
    {
      "epoch": 0.000105621337890625,
      "model_forward_time": 0.11433839797973633,
      "step": 17305
    },
    {
      "epoch": 0.000105621337890625,
      "step": 17305,
      "training_step_time": 0.4292480945587158
    },
    {
      "epoch": 0.00010562744140625,
      "model_forward_time": 0.11451339721679688,
      "step": 17306
    },
    {
      "epoch": 0.00010562744140625,
      "step": 17306,
      "training_step_time": 0.47120165824890137
    },
    {
      "epoch": 0.000105633544921875,
      "model_forward_time": 0.11452507972717285,
      "step": 17307
    },
    {
      "epoch": 0.000105633544921875,
      "step": 17307,
      "training_step_time": 0.40206432342529297
    },
    {
      "epoch": 0.0001056396484375,
      "model_forward_time": 0.11438393592834473,
      "step": 17308
    },
    {
      "epoch": 0.0001056396484375,
      "step": 17308,
      "training_step_time": 0.38205504417419434
    },
    {
      "epoch": 0.000105645751953125,
      "model_forward_time": 0.11496472358703613,
      "step": 17309
    },
    {
      "epoch": 0.000105645751953125,
      "step": 17309,
      "training_step_time": 0.38674163818359375
    },
    {
      "epoch": 0.00010565185546875,
      "grad_norm": 0.18912342190742493,
      "learning_rate": 8.523822798020827e-05,
      "loss": 0.0564,
      "step": 17310
    },
    {
      "epoch": 0.00010565185546875,
      "model_forward_time": 0.11510205268859863,
      "step": 17310
    },
    {
      "epoch": 0.00010565185546875,
      "step": 17310,
      "training_step_time": 0.39155054092407227
    },
    {
      "epoch": 0.000105657958984375,
      "model_forward_time": 0.11581087112426758,
      "step": 17311
    },
    {
      "epoch": 0.000105657958984375,
      "step": 17311,
      "training_step_time": 0.39660000801086426
    },
    {
      "epoch": 0.0001056640625,
      "model_forward_time": 0.11502671241760254,
      "step": 17312
    },
    {
      "epoch": 0.0001056640625,
      "step": 17312,
      "training_step_time": 0.4710087776184082
    },
    {
      "epoch": 0.000105670166015625,
      "model_forward_time": 0.11458778381347656,
      "step": 17313
    },
    {
      "epoch": 0.000105670166015625,
      "step": 17313,
      "training_step_time": 0.4271533489227295
    },
    {
      "epoch": 0.00010567626953125,
      "model_forward_time": 0.11564373970031738,
      "step": 17314
    },
    {
      "epoch": 0.00010567626953125,
      "step": 17314,
      "training_step_time": 0.409928560256958
    },
    {
      "epoch": 0.000105682373046875,
      "model_forward_time": 0.1181037425994873,
      "step": 17315
    },
    {
      "epoch": 0.000105682373046875,
      "step": 17315,
      "training_step_time": 0.4673140048980713
    },
    {
      "epoch": 0.0001056884765625,
      "model_forward_time": 0.1151587963104248,
      "step": 17316
    },
    {
      "epoch": 0.0001056884765625,
      "step": 17316,
      "training_step_time": 0.3774251937866211
    },
    {
      "epoch": 0.000105694580078125,
      "model_forward_time": 0.1149747371673584,
      "step": 17317
    },
    {
      "epoch": 0.000105694580078125,
      "step": 17317,
      "training_step_time": 0.4592750072479248
    },
    {
      "epoch": 0.00010570068359375,
      "model_forward_time": 0.11556673049926758,
      "step": 17318
    },
    {
      "epoch": 0.00010570068359375,
      "step": 17318,
      "training_step_time": 0.4521801471710205
    },
    {
      "epoch": 0.000105706787109375,
      "model_forward_time": 0.1145780086517334,
      "step": 17319
    },
    {
      "epoch": 0.000105706787109375,
      "step": 17319,
      "training_step_time": 0.4317500591278076
    },
    {
      "epoch": 0.000105712890625,
      "grad_norm": 0.1755313277244568,
      "learning_rate": 8.521867196685482e-05,
      "loss": 0.0507,
      "step": 17320
    },
    {
      "epoch": 0.000105712890625,
      "model_forward_time": 0.11450886726379395,
      "step": 17320
    },
    {
      "epoch": 0.000105712890625,
      "step": 17320,
      "training_step_time": 0.446702241897583
    },
    {
      "epoch": 0.000105718994140625,
      "model_forward_time": 0.11483883857727051,
      "step": 17321
    },
    {
      "epoch": 0.000105718994140625,
      "step": 17321,
      "training_step_time": 0.39124178886413574
    },
    {
      "epoch": 0.00010572509765625,
      "model_forward_time": 0.11439228057861328,
      "step": 17322
    },
    {
      "epoch": 0.00010572509765625,
      "step": 17322,
      "training_step_time": 0.38796281814575195
    },
    {
      "epoch": 0.000105731201171875,
      "model_forward_time": 0.11514425277709961,
      "step": 17323
    },
    {
      "epoch": 0.000105731201171875,
      "step": 17323,
      "training_step_time": 0.38973569869995117
    },
    {
      "epoch": 0.0001057373046875,
      "model_forward_time": 0.11506843566894531,
      "step": 17324
    },
    {
      "epoch": 0.0001057373046875,
      "step": 17324,
      "training_step_time": 0.5667424201965332
    },
    {
      "epoch": 0.000105743408203125,
      "model_forward_time": 0.11650538444519043,
      "step": 17325
    },
    {
      "epoch": 0.000105743408203125,
      "step": 17325,
      "training_step_time": 0.39437222480773926
    },
    {
      "epoch": 0.00010574951171875,
      "model_forward_time": 0.1146688461303711,
      "step": 17326
    },
    {
      "epoch": 0.00010574951171875,
      "step": 17326,
      "training_step_time": 0.3935234546661377
    },
    {
      "epoch": 0.000105755615234375,
      "model_forward_time": 0.11508631706237793,
      "step": 17327
    },
    {
      "epoch": 0.000105755615234375,
      "step": 17327,
      "training_step_time": 0.43850064277648926
    },
    {
      "epoch": 0.00010576171875,
      "model_forward_time": 0.11483454704284668,
      "step": 17328
    },
    {
      "epoch": 0.00010576171875,
      "step": 17328,
      "training_step_time": 0.404979944229126
    },
    {
      "epoch": 0.000105767822265625,
      "model_forward_time": 0.11447429656982422,
      "step": 17329
    },
    {
      "epoch": 0.000105767822265625,
      "step": 17329,
      "training_step_time": 0.42342519760131836
    },
    {
      "epoch": 0.00010577392578125,
      "grad_norm": 0.16890966892242432,
      "learning_rate": 8.519910525499874e-05,
      "loss": 0.0579,
      "step": 17330
    },
    {
      "epoch": 0.00010577392578125,
      "model_forward_time": 0.11519122123718262,
      "step": 17330
    },
    {
      "epoch": 0.00010577392578125,
      "step": 17330,
      "training_step_time": 0.5711021423339844
    },
    {
      "epoch": 0.000105780029296875,
      "model_forward_time": 0.11485910415649414,
      "step": 17331
    },
    {
      "epoch": 0.000105780029296875,
      "step": 17331,
      "training_step_time": 0.46414828300476074
    },
    {
      "epoch": 0.0001057861328125,
      "model_forward_time": 0.11482548713684082,
      "step": 17332
    },
    {
      "epoch": 0.0001057861328125,
      "step": 17332,
      "training_step_time": 0.45101094245910645
    },
    {
      "epoch": 0.000105792236328125,
      "model_forward_time": 0.11438226699829102,
      "step": 17333
    },
    {
      "epoch": 0.000105792236328125,
      "step": 17333,
      "training_step_time": 0.39401936531066895
    },
    {
      "epoch": 0.00010579833984375,
      "model_forward_time": 0.11425113677978516,
      "step": 17334
    },
    {
      "epoch": 0.00010579833984375,
      "step": 17334,
      "training_step_time": 0.3986237049102783
    },
    {
      "epoch": 0.000105804443359375,
      "model_forward_time": 0.1138758659362793,
      "step": 17335
    },
    {
      "epoch": 0.000105804443359375,
      "step": 17335,
      "training_step_time": 0.39710330963134766
    },
    {
      "epoch": 0.000105810546875,
      "model_forward_time": 0.11467242240905762,
      "step": 17336
    },
    {
      "epoch": 0.000105810546875,
      "step": 17336,
      "training_step_time": 0.3994729518890381
    },
    {
      "epoch": 0.000105816650390625,
      "model_forward_time": 0.1149146556854248,
      "step": 17337
    },
    {
      "epoch": 0.000105816650390625,
      "step": 17337,
      "training_step_time": 0.3969004154205322
    },
    {
      "epoch": 0.00010582275390625,
      "model_forward_time": 0.11645007133483887,
      "step": 17338
    },
    {
      "epoch": 0.00010582275390625,
      "step": 17338,
      "training_step_time": 0.38683629035949707
    },
    {
      "epoch": 0.000105828857421875,
      "model_forward_time": 0.11528420448303223,
      "step": 17339
    },
    {
      "epoch": 0.000105828857421875,
      "step": 17339,
      "training_step_time": 0.4025862216949463
    },
    {
      "epoch": 0.0001058349609375,
      "grad_norm": 0.1482996791601181,
      "learning_rate": 8.517952785058385e-05,
      "loss": 0.0562,
      "step": 17340
    },
    {
      "epoch": 0.0001058349609375,
      "model_forward_time": 0.1144113540649414,
      "step": 17340
    },
    {
      "epoch": 0.0001058349609375,
      "step": 17340,
      "training_step_time": 0.39914488792419434
    },
    {
      "epoch": 0.000105841064453125,
      "model_forward_time": 0.11551833152770996,
      "step": 17341
    },
    {
      "epoch": 0.000105841064453125,
      "step": 17341,
      "training_step_time": 0.4258744716644287
    },
    {
      "epoch": 0.00010584716796875,
      "model_forward_time": 0.11531686782836914,
      "step": 17342
    },
    {
      "epoch": 0.00010584716796875,
      "step": 17342,
      "training_step_time": 0.693488359451294
    },
    {
      "epoch": 0.000105853271484375,
      "model_forward_time": 0.11460423469543457,
      "step": 17343
    },
    {
      "epoch": 0.000105853271484375,
      "step": 17343,
      "training_step_time": 0.458280086517334
    },
    {
      "epoch": 0.000105859375,
      "model_forward_time": 0.11438989639282227,
      "step": 17344
    },
    {
      "epoch": 0.000105859375,
      "step": 17344,
      "training_step_time": 0.39765381813049316
    },
    {
      "epoch": 0.000105865478515625,
      "model_forward_time": 0.11456489562988281,
      "step": 17345
    },
    {
      "epoch": 0.000105865478515625,
      "step": 17345,
      "training_step_time": 0.3986036777496338
    },
    {
      "epoch": 0.00010587158203125,
      "model_forward_time": 0.11451315879821777,
      "step": 17346
    },
    {
      "epoch": 0.00010587158203125,
      "step": 17346,
      "training_step_time": 0.42997312545776367
    },
    {
      "epoch": 0.000105877685546875,
      "model_forward_time": 0.11454272270202637,
      "step": 17347
    },
    {
      "epoch": 0.000105877685546875,
      "step": 17347,
      "training_step_time": 0.4648096561431885
    },
    {
      "epoch": 0.0001058837890625,
      "model_forward_time": 0.11502480506896973,
      "step": 17348
    },
    {
      "epoch": 0.0001058837890625,
      "step": 17348,
      "training_step_time": 0.4343264102935791
    },
    {
      "epoch": 0.000105889892578125,
      "model_forward_time": 0.11479997634887695,
      "step": 17349
    },
    {
      "epoch": 0.000105889892578125,
      "step": 17349,
      "training_step_time": 0.3978285789489746
    },
    {
      "epoch": 0.00010589599609375,
      "grad_norm": 0.2612176835536957,
      "learning_rate": 8.515993975955727e-05,
      "loss": 0.0575,
      "step": 17350
    },
    {
      "epoch": 0.00010589599609375,
      "model_forward_time": 0.11463093757629395,
      "step": 17350
    },
    {
      "epoch": 0.00010589599609375,
      "step": 17350,
      "training_step_time": 0.39168524742126465
    },
    {
      "epoch": 0.000105902099609375,
      "model_forward_time": 0.11563587188720703,
      "step": 17351
    },
    {
      "epoch": 0.000105902099609375,
      "step": 17351,
      "training_step_time": 0.3872218132019043
    },
    {
      "epoch": 0.000105908203125,
      "model_forward_time": 0.11511850357055664,
      "step": 17352
    },
    {
      "epoch": 0.000105908203125,
      "step": 17352,
      "training_step_time": 0.3896462917327881
    },
    {
      "epoch": 0.000105914306640625,
      "model_forward_time": 0.11520814895629883,
      "step": 17353
    },
    {
      "epoch": 0.000105914306640625,
      "step": 17353,
      "training_step_time": 0.39539599418640137
    },
    {
      "epoch": 0.00010592041015625,
      "model_forward_time": 0.11493372917175293,
      "step": 17354
    },
    {
      "epoch": 0.00010592041015625,
      "step": 17354,
      "training_step_time": 0.6405055522918701
    },
    {
      "epoch": 0.000105926513671875,
      "model_forward_time": 0.11450600624084473,
      "step": 17355
    },
    {
      "epoch": 0.000105926513671875,
      "step": 17355,
      "training_step_time": 0.4154679775238037
    },
    {
      "epoch": 0.0001059326171875,
      "model_forward_time": 0.11483192443847656,
      "step": 17356
    },
    {
      "epoch": 0.0001059326171875,
      "step": 17356,
      "training_step_time": 0.40670037269592285
    },
    {
      "epoch": 0.000105938720703125,
      "model_forward_time": 0.11464285850524902,
      "step": 17357
    },
    {
      "epoch": 0.000105938720703125,
      "step": 17357,
      "training_step_time": 0.44999217987060547
    },
    {
      "epoch": 0.00010594482421875,
      "model_forward_time": 0.1153404712677002,
      "step": 17358
    },
    {
      "epoch": 0.00010594482421875,
      "step": 17358,
      "training_step_time": 0.4008817672729492
    },
    {
      "epoch": 0.000105950927734375,
      "model_forward_time": 0.11469101905822754,
      "step": 17359
    },
    {
      "epoch": 0.000105950927734375,
      "step": 17359,
      "training_step_time": 0.5065865516662598
    },
    {
      "epoch": 0.00010595703125,
      "grad_norm": 0.198889821767807,
      "learning_rate": 8.514034098786933e-05,
      "loss": 0.0568,
      "step": 17360
    },
    {
      "epoch": 0.00010595703125,
      "model_forward_time": 0.11429524421691895,
      "step": 17360
    },
    {
      "epoch": 0.00010595703125,
      "step": 17360,
      "training_step_time": 0.4406917095184326
    },
    {
      "epoch": 0.000105963134765625,
      "model_forward_time": 0.1154935359954834,
      "step": 17361
    },
    {
      "epoch": 0.000105963134765625,
      "step": 17361,
      "training_step_time": 0.4852752685546875
    },
    {
      "epoch": 0.00010596923828125,
      "model_forward_time": 0.11454439163208008,
      "step": 17362
    },
    {
      "epoch": 0.00010596923828125,
      "step": 17362,
      "training_step_time": 0.4776194095611572
    },
    {
      "epoch": 0.000105975341796875,
      "model_forward_time": 0.11671948432922363,
      "step": 17363
    },
    {
      "epoch": 0.000105975341796875,
      "step": 17363,
      "training_step_time": 0.4046213626861572
    },
    {
      "epoch": 0.0001059814453125,
      "model_forward_time": 0.11442732810974121,
      "step": 17364
    },
    {
      "epoch": 0.0001059814453125,
      "step": 17364,
      "training_step_time": 0.39388561248779297
    },
    {
      "epoch": 0.000105987548828125,
      "model_forward_time": 0.11452603340148926,
      "step": 17365
    },
    {
      "epoch": 0.000105987548828125,
      "step": 17365,
      "training_step_time": 0.3962361812591553
    },
    {
      "epoch": 0.00010599365234375,
      "model_forward_time": 0.1148824691772461,
      "step": 17366
    },
    {
      "epoch": 0.00010599365234375,
      "step": 17366,
      "training_step_time": 0.5393273830413818
    },
    {
      "epoch": 0.000105999755859375,
      "model_forward_time": 0.11472010612487793,
      "step": 17367
    },
    {
      "epoch": 0.000105999755859375,
      "step": 17367,
      "training_step_time": 0.38773107528686523
    },
    {
      "epoch": 0.000106005859375,
      "model_forward_time": 0.11455130577087402,
      "step": 17368
    },
    {
      "epoch": 0.000106005859375,
      "step": 17368,
      "training_step_time": 0.40378642082214355
    },
    {
      "epoch": 0.000106011962890625,
      "model_forward_time": 0.11458349227905273,
      "step": 17369
    },
    {
      "epoch": 0.000106011962890625,
      "step": 17369,
      "training_step_time": 0.42376160621643066
    },
    {
      "epoch": 0.00010601806640625,
      "grad_norm": 0.14230535924434662,
      "learning_rate": 8.512073154147362e-05,
      "loss": 0.0498,
      "step": 17370
    },
    {
      "epoch": 0.00010601806640625,
      "model_forward_time": 0.11483240127563477,
      "step": 17370
    },
    {
      "epoch": 0.00010601806640625,
      "step": 17370,
      "training_step_time": 0.42012524604797363
    },
    {
      "epoch": 0.000106024169921875,
      "model_forward_time": 0.1144263744354248,
      "step": 17371
    },
    {
      "epoch": 0.000106024169921875,
      "step": 17371,
      "training_step_time": 0.46264028549194336
    },
    {
      "epoch": 0.0001060302734375,
      "model_forward_time": 0.11551022529602051,
      "step": 17372
    },
    {
      "epoch": 0.0001060302734375,
      "step": 17372,
      "training_step_time": 0.49226999282836914
    },
    {
      "epoch": 0.000106036376953125,
      "model_forward_time": 0.11516976356506348,
      "step": 17373
    },
    {
      "epoch": 0.000106036376953125,
      "step": 17373,
      "training_step_time": 0.39362597465515137
    },
    {
      "epoch": 0.00010604248046875,
      "model_forward_time": 0.1146090030670166,
      "step": 17374
    },
    {
      "epoch": 0.00010604248046875,
      "step": 17374,
      "training_step_time": 0.39846324920654297
    },
    {
      "epoch": 0.000106048583984375,
      "model_forward_time": 0.11447405815124512,
      "step": 17375
    },
    {
      "epoch": 0.000106048583984375,
      "step": 17375,
      "training_step_time": 0.4595608711242676
    },
    {
      "epoch": 0.0001060546875,
      "model_forward_time": 0.11502909660339355,
      "step": 17376
    },
    {
      "epoch": 0.0001060546875,
      "step": 17376,
      "training_step_time": 0.4118690490722656
    },
    {
      "epoch": 0.000106060791015625,
      "model_forward_time": 0.11501002311706543,
      "step": 17377
    },
    {
      "epoch": 0.000106060791015625,
      "step": 17377,
      "training_step_time": 0.4125099182128906
    },
    {
      "epoch": 0.00010606689453125,
      "model_forward_time": 0.11479616165161133,
      "step": 17378
    },
    {
      "epoch": 0.00010606689453125,
      "step": 17378,
      "training_step_time": 0.45125484466552734
    },
    {
      "epoch": 0.000106072998046875,
      "model_forward_time": 0.1150968074798584,
      "step": 17379
    },
    {
      "epoch": 0.000106072998046875,
      "step": 17379,
      "training_step_time": 0.3895692825317383
    },
    {
      "epoch": 0.0001060791015625,
      "grad_norm": 0.13627685606479645,
      "learning_rate": 8.510111142632698e-05,
      "loss": 0.0482,
      "step": 17380
    },
    {
      "epoch": 0.0001060791015625,
      "model_forward_time": 0.11527276039123535,
      "step": 17380
    },
    {
      "epoch": 0.0001060791015625,
      "step": 17380,
      "training_step_time": 0.39201927185058594
    },
    {
      "epoch": 0.000106085205078125,
      "model_forward_time": 0.11477518081665039,
      "step": 17381
    },
    {
      "epoch": 0.000106085205078125,
      "step": 17381,
      "training_step_time": 0.40222907066345215
    },
    {
      "epoch": 0.00010609130859375,
      "model_forward_time": 0.1149294376373291,
      "step": 17382
    },
    {
      "epoch": 0.00010609130859375,
      "step": 17382,
      "training_step_time": 0.38888025283813477
    },
    {
      "epoch": 0.000106097412109375,
      "model_forward_time": 0.11487293243408203,
      "step": 17383
    },
    {
      "epoch": 0.000106097412109375,
      "step": 17383,
      "training_step_time": 0.41614341735839844
    },
    {
      "epoch": 0.000106103515625,
      "model_forward_time": 0.11512279510498047,
      "step": 17384
    },
    {
      "epoch": 0.000106103515625,
      "step": 17384,
      "training_step_time": 0.5470943450927734
    },
    {
      "epoch": 0.000106109619140625,
      "model_forward_time": 0.11503195762634277,
      "step": 17385
    },
    {
      "epoch": 0.000106109619140625,
      "step": 17385,
      "training_step_time": 0.3932509422302246
    },
    {
      "epoch": 0.00010611572265625,
      "model_forward_time": 0.11493444442749023,
      "step": 17386
    },
    {
      "epoch": 0.00010611572265625,
      "step": 17386,
      "training_step_time": 0.39649391174316406
    },
    {
      "epoch": 0.000106121826171875,
      "model_forward_time": 0.11520147323608398,
      "step": 17387
    },
    {
      "epoch": 0.000106121826171875,
      "step": 17387,
      "training_step_time": 0.3979482650756836
    },
    {
      "epoch": 0.0001061279296875,
      "model_forward_time": 0.11501502990722656,
      "step": 17388
    },
    {
      "epoch": 0.0001061279296875,
      "step": 17388,
      "training_step_time": 0.4357147216796875
    },
    {
      "epoch": 0.000106134033203125,
      "model_forward_time": 0.11498641967773438,
      "step": 17389
    },
    {
      "epoch": 0.000106134033203125,
      "step": 17389,
      "training_step_time": 0.49245643615722656
    },
    {
      "epoch": 0.00010614013671875,
      "grad_norm": 0.1377515196800232,
      "learning_rate": 8.508148064838948e-05,
      "loss": 0.0495,
      "step": 17390
    },
    {
      "epoch": 0.00010614013671875,
      "model_forward_time": 0.11451935768127441,
      "step": 17390
    },
    {
      "epoch": 0.00010614013671875,
      "step": 17390,
      "training_step_time": 0.5727605819702148
    },
    {
      "epoch": 0.000106146240234375,
      "model_forward_time": 0.11434364318847656,
      "step": 17391
    },
    {
      "epoch": 0.000106146240234375,
      "step": 17391,
      "training_step_time": 0.40589165687561035
    },
    {
      "epoch": 0.00010615234375,
      "model_forward_time": 0.11455035209655762,
      "step": 17392
    },
    {
      "epoch": 0.00010615234375,
      "step": 17392,
      "training_step_time": 0.3862597942352295
    },
    {
      "epoch": 0.000106158447265625,
      "model_forward_time": 0.11470532417297363,
      "step": 17393
    },
    {
      "epoch": 0.000106158447265625,
      "step": 17393,
      "training_step_time": 0.39521026611328125
    },
    {
      "epoch": 0.00010616455078125,
      "model_forward_time": 0.11503386497497559,
      "step": 17394
    },
    {
      "epoch": 0.00010616455078125,
      "step": 17394,
      "training_step_time": 0.3882784843444824
    },
    {
      "epoch": 0.000106170654296875,
      "model_forward_time": 0.11484313011169434,
      "step": 17395
    },
    {
      "epoch": 0.000106170654296875,
      "step": 17395,
      "training_step_time": 0.4006774425506592
    },
    {
      "epoch": 0.0001061767578125,
      "model_forward_time": 0.11516690254211426,
      "step": 17396
    },
    {
      "epoch": 0.0001061767578125,
      "step": 17396,
      "training_step_time": 0.6425409317016602
    },
    {
      "epoch": 0.000106182861328125,
      "model_forward_time": 0.11504721641540527,
      "step": 17397
    },
    {
      "epoch": 0.000106182861328125,
      "step": 17397,
      "training_step_time": 0.39302968978881836
    },
    {
      "epoch": 0.00010618896484375,
      "model_forward_time": 0.11467456817626953,
      "step": 17398
    },
    {
      "epoch": 0.00010618896484375,
      "step": 17398,
      "training_step_time": 0.3987424373626709
    },
    {
      "epoch": 0.000106195068359375,
      "model_forward_time": 0.11472344398498535,
      "step": 17399
    },
    {
      "epoch": 0.000106195068359375,
      "step": 17399,
      "training_step_time": 0.4206404685974121
    },
    {
      "epoch": 0.000106201171875,
      "grad_norm": 0.1617133915424347,
      "learning_rate": 8.506183921362443e-05,
      "loss": 0.047,
      "step": 17400
    },
    {
      "epoch": 0.000106201171875,
      "model_forward_time": 0.11487770080566406,
      "step": 17400
    },
    {
      "epoch": 0.000106201171875,
      "step": 17400,
      "training_step_time": 0.37831544876098633
    },
    {
      "epoch": 0.000106207275390625,
      "model_forward_time": 0.11517977714538574,
      "step": 17401
    },
    {
      "epoch": 0.000106207275390625,
      "step": 17401,
      "training_step_time": 0.3935537338256836
    },
    {
      "epoch": 0.00010621337890625,
      "model_forward_time": 0.11464571952819824,
      "step": 17402
    },
    {
      "epoch": 0.00010621337890625,
      "step": 17402,
      "training_step_time": 0.6570615768432617
    },
    {
      "epoch": 0.000106219482421875,
      "model_forward_time": 0.11445856094360352,
      "step": 17403
    },
    {
      "epoch": 0.000106219482421875,
      "step": 17403,
      "training_step_time": 0.44573354721069336
    },
    {
      "epoch": 0.0001062255859375,
      "model_forward_time": 0.11482048034667969,
      "step": 17404
    },
    {
      "epoch": 0.0001062255859375,
      "step": 17404,
      "training_step_time": 0.3989217281341553
    },
    {
      "epoch": 0.000106231689453125,
      "model_forward_time": 0.11500430107116699,
      "step": 17405
    },
    {
      "epoch": 0.000106231689453125,
      "step": 17405,
      "training_step_time": 0.4234602451324463
    },
    {
      "epoch": 0.00010623779296875,
      "model_forward_time": 0.11450004577636719,
      "step": 17406
    },
    {
      "epoch": 0.00010623779296875,
      "step": 17406,
      "training_step_time": 0.38857460021972656
    },
    {
      "epoch": 0.000106243896484375,
      "model_forward_time": 0.1147618293762207,
      "step": 17407
    },
    {
      "epoch": 0.000106243896484375,
      "step": 17407,
      "training_step_time": 0.39899301528930664
    },
    {
      "epoch": 0.00010625,
      "model_forward_time": 0.11527371406555176,
      "step": 17408
    },
    {
      "epoch": 0.00010625,
      "step": 17408,
      "training_step_time": 0.5321319103240967
    },
    {
      "epoch": 0.000106256103515625,
      "model_forward_time": 0.11499905586242676,
      "step": 17409
    },
    {
      "epoch": 0.000106256103515625,
      "step": 17409,
      "training_step_time": 0.44806480407714844
    },
    {
      "epoch": 0.00010626220703125,
      "grad_norm": 0.14448192715644836,
      "learning_rate": 8.504218712799839e-05,
      "loss": 0.0512,
      "step": 17410
    },
    {
      "epoch": 0.00010626220703125,
      "model_forward_time": 0.11458420753479004,
      "step": 17410
    },
    {
      "epoch": 0.00010626220703125,
      "step": 17410,
      "training_step_time": 0.45028042793273926
    },
    {
      "epoch": 0.000106268310546875,
      "model_forward_time": 0.11430859565734863,
      "step": 17411
    },
    {
      "epoch": 0.000106268310546875,
      "step": 17411,
      "training_step_time": 0.41854119300842285
    },
    {
      "epoch": 0.0001062744140625,
      "model_forward_time": 0.11530518531799316,
      "step": 17412
    },
    {
      "epoch": 0.0001062744140625,
      "step": 17412,
      "training_step_time": 0.3973240852355957
    },
    {
      "epoch": 0.000106280517578125,
      "model_forward_time": 0.11442089080810547,
      "step": 17413
    },
    {
      "epoch": 0.000106280517578125,
      "step": 17413,
      "training_step_time": 0.42739439010620117
    },
    {
      "epoch": 0.00010628662109375,
      "model_forward_time": 0.11506247520446777,
      "step": 17414
    },
    {
      "epoch": 0.00010628662109375,
      "step": 17414,
      "training_step_time": 0.463367223739624
    },
    {
      "epoch": 0.000106292724609375,
      "model_forward_time": 0.11523747444152832,
      "step": 17415
    },
    {
      "epoch": 0.000106292724609375,
      "step": 17415,
      "training_step_time": 0.38952112197875977
    },
    {
      "epoch": 0.000106298828125,
      "model_forward_time": 0.11485028266906738,
      "step": 17416
    },
    {
      "epoch": 0.000106298828125,
      "step": 17416,
      "training_step_time": 0.3906230926513672
    },
    {
      "epoch": 0.000106304931640625,
      "model_forward_time": 0.11503934860229492,
      "step": 17417
    },
    {
      "epoch": 0.000106304931640625,
      "step": 17417,
      "training_step_time": 0.4951341152191162
    },
    {
      "epoch": 0.00010631103515625,
      "model_forward_time": 0.11546540260314941,
      "step": 17418
    },
    {
      "epoch": 0.00010631103515625,
      "step": 17418,
      "training_step_time": 0.4418618679046631
    },
    {
      "epoch": 0.000106317138671875,
      "model_forward_time": 0.11468100547790527,
      "step": 17419
    },
    {
      "epoch": 0.000106317138671875,
      "step": 17419,
      "training_step_time": 0.47342443466186523
    },
    {
      "epoch": 0.0001063232421875,
      "grad_norm": 0.258371502161026,
      "learning_rate": 8.502252439748113e-05,
      "loss": 0.0554,
      "step": 17420
    },
    {
      "epoch": 0.0001063232421875,
      "model_forward_time": 0.11456441879272461,
      "step": 17420
    },
    {
      "epoch": 0.0001063232421875,
      "step": 17420,
      "training_step_time": 0.5137186050415039
    },
    {
      "epoch": 0.000106329345703125,
      "model_forward_time": 0.11489129066467285,
      "step": 17421
    },
    {
      "epoch": 0.000106329345703125,
      "step": 17421,
      "training_step_time": 0.40840911865234375
    },
    {
      "epoch": 0.00010633544921875,
      "model_forward_time": 0.1141660213470459,
      "step": 17422
    },
    {
      "epoch": 0.00010633544921875,
      "step": 17422,
      "training_step_time": 0.39556264877319336
    },
    {
      "epoch": 0.000106341552734375,
      "model_forward_time": 0.11532378196716309,
      "step": 17423
    },
    {
      "epoch": 0.000106341552734375,
      "step": 17423,
      "training_step_time": 0.4954109191894531
    },
    {
      "epoch": 0.00010634765625,
      "model_forward_time": 0.11484813690185547,
      "step": 17424
    },
    {
      "epoch": 0.00010634765625,
      "step": 17424,
      "training_step_time": 0.3915596008300781
    },
    {
      "epoch": 0.000106353759765625,
      "model_forward_time": 0.11446642875671387,
      "step": 17425
    },
    {
      "epoch": 0.000106353759765625,
      "step": 17425,
      "training_step_time": 0.44883131980895996
    },
    {
      "epoch": 0.00010635986328125,
      "model_forward_time": 0.11565423011779785,
      "step": 17426
    },
    {
      "epoch": 0.00010635986328125,
      "step": 17426,
      "training_step_time": 0.5538210868835449
    },
    {
      "epoch": 0.000106365966796875,
      "model_forward_time": 0.1144263744354248,
      "step": 17427
    },
    {
      "epoch": 0.000106365966796875,
      "step": 17427,
      "training_step_time": 0.45810794830322266
    },
    {
      "epoch": 0.0001063720703125,
      "model_forward_time": 0.11501908302307129,
      "step": 17428
    },
    {
      "epoch": 0.0001063720703125,
      "step": 17428,
      "training_step_time": 0.3774232864379883
    },
    {
      "epoch": 0.000106378173828125,
      "model_forward_time": 0.11431694030761719,
      "step": 17429
    },
    {
      "epoch": 0.000106378173828125,
      "step": 17429,
      "training_step_time": 0.8241562843322754
    },
    {
      "epoch": 0.00010638427734375,
      "grad_norm": 0.14068707823753357,
      "learning_rate": 8.500285102804568e-05,
      "loss": 0.057,
      "step": 17430
    },
    {
      "epoch": 0.00010638427734375,
      "model_forward_time": 0.11374378204345703,
      "step": 17430
    },
    {
      "epoch": 0.00010638427734375,
      "step": 17430,
      "training_step_time": 0.4664463996887207
    },
    {
      "epoch": 0.000106390380859375,
      "model_forward_time": 0.11446189880371094,
      "step": 17431
    },
    {
      "epoch": 0.000106390380859375,
      "step": 17431,
      "training_step_time": 0.452437162399292
    },
    {
      "epoch": 0.000106396484375,
      "model_forward_time": 0.11409854888916016,
      "step": 17432
    },
    {
      "epoch": 0.000106396484375,
      "step": 17432,
      "training_step_time": 0.41235852241516113
    },
    {
      "epoch": 0.000106402587890625,
      "model_forward_time": 0.11387515068054199,
      "step": 17433
    },
    {
      "epoch": 0.000106402587890625,
      "step": 17433,
      "training_step_time": 0.39055633544921875
    },
    {
      "epoch": 0.00010640869140625,
      "model_forward_time": 0.11392450332641602,
      "step": 17434
    },
    {
      "epoch": 0.00010640869140625,
      "step": 17434,
      "training_step_time": 0.3783550262451172
    },
    {
      "epoch": 0.000106414794921875,
      "model_forward_time": 0.1146700382232666,
      "step": 17435
    },
    {
      "epoch": 0.000106414794921875,
      "step": 17435,
      "training_step_time": 0.5609414577484131
    },
    {
      "epoch": 0.0001064208984375,
      "model_forward_time": 0.11439180374145508,
      "step": 17436
    },
    {
      "epoch": 0.0001064208984375,
      "step": 17436,
      "training_step_time": 0.4755885601043701
    },
    {
      "epoch": 0.000106427001953125,
      "model_forward_time": 0.11422157287597656,
      "step": 17437
    },
    {
      "epoch": 0.000106427001953125,
      "step": 17437,
      "training_step_time": 0.39047718048095703
    },
    {
      "epoch": 0.00010643310546875,
      "model_forward_time": 0.11515283584594727,
      "step": 17438
    },
    {
      "epoch": 0.00010643310546875,
      "step": 17438,
      "training_step_time": 0.5116803646087646
    },
    {
      "epoch": 0.000106439208984375,
      "model_forward_time": 0.11513614654541016,
      "step": 17439
    },
    {
      "epoch": 0.000106439208984375,
      "step": 17439,
      "training_step_time": 0.426255464553833
    },
    {
      "epoch": 0.0001064453125,
      "grad_norm": 0.17142461240291595,
      "learning_rate": 8.498316702566828e-05,
      "loss": 0.0502,
      "step": 17440
    },
    {
      "epoch": 0.0001064453125,
      "model_forward_time": 0.113922119140625,
      "step": 17440
    },
    {
      "epoch": 0.0001064453125,
      "step": 17440,
      "training_step_time": 0.4147312641143799
    },
    {
      "epoch": 0.000106451416015625,
      "model_forward_time": 0.11440801620483398,
      "step": 17441
    },
    {
      "epoch": 0.000106451416015625,
      "step": 17441,
      "training_step_time": 0.8630166053771973
    },
    {
      "epoch": 0.00010645751953125,
      "model_forward_time": 0.11453485488891602,
      "step": 17442
    },
    {
      "epoch": 0.00010645751953125,
      "step": 17442,
      "training_step_time": 0.43160557746887207
    },
    {
      "epoch": 0.000106463623046875,
      "model_forward_time": 0.11476325988769531,
      "step": 17443
    },
    {
      "epoch": 0.000106463623046875,
      "step": 17443,
      "training_step_time": 0.43242812156677246
    },
    {
      "epoch": 0.0001064697265625,
      "model_forward_time": 0.11440110206604004,
      "step": 17444
    },
    {
      "epoch": 0.0001064697265625,
      "step": 17444,
      "training_step_time": 0.4537365436553955
    },
    {
      "epoch": 0.000106475830078125,
      "model_forward_time": 0.11433982849121094,
      "step": 17445
    },
    {
      "epoch": 0.000106475830078125,
      "step": 17445,
      "training_step_time": 0.40575623512268066
    },
    {
      "epoch": 0.00010648193359375,
      "model_forward_time": 0.11576509475708008,
      "step": 17446
    },
    {
      "epoch": 0.00010648193359375,
      "step": 17446,
      "training_step_time": 0.3755314350128174
    },
    {
      "epoch": 0.000106488037109375,
      "model_forward_time": 0.11536073684692383,
      "step": 17447
    },
    {
      "epoch": 0.000106488037109375,
      "step": 17447,
      "training_step_time": 0.8951249122619629
    },
    {
      "epoch": 0.000106494140625,
      "model_forward_time": 0.11349177360534668,
      "step": 17448
    },
    {
      "epoch": 0.000106494140625,
      "step": 17448,
      "training_step_time": 0.38501405715942383
    },
    {
      "epoch": 0.000106500244140625,
      "model_forward_time": 0.11400651931762695,
      "step": 17449
    },
    {
      "epoch": 0.000106500244140625,
      "step": 17449,
      "training_step_time": 0.3962893486022949
    },
    {
      "epoch": 0.00010650634765625,
      "grad_norm": 0.24083290994167328,
      "learning_rate": 8.49634723963284e-05,
      "loss": 0.0513,
      "step": 17450
    },
    {
      "epoch": 0.00010650634765625,
      "model_forward_time": 0.114501953125,
      "step": 17450
    },
    {
      "epoch": 0.00010650634765625,
      "step": 17450,
      "training_step_time": 0.38109683990478516
    },
    {
      "epoch": 0.000106512451171875,
      "model_forward_time": 0.11468935012817383,
      "step": 17451
    },
    {
      "epoch": 0.000106512451171875,
      "step": 17451,
      "training_step_time": 0.43042778968811035
    },
    {
      "epoch": 0.0001065185546875,
      "model_forward_time": 0.1143951416015625,
      "step": 17452
    },
    {
      "epoch": 0.0001065185546875,
      "step": 17452,
      "training_step_time": 0.3872058391571045
    },
    {
      "epoch": 0.000106524658203125,
      "model_forward_time": 0.11452078819274902,
      "step": 17453
    },
    {
      "epoch": 0.000106524658203125,
      "step": 17453,
      "training_step_time": 0.7266874313354492
    },
    {
      "epoch": 0.00010653076171875,
      "model_forward_time": 0.11494874954223633,
      "step": 17454
    },
    {
      "epoch": 0.00010653076171875,
      "step": 17454,
      "training_step_time": 0.39440226554870605
    },
    {
      "epoch": 0.000106536865234375,
      "model_forward_time": 0.11440134048461914,
      "step": 17455
    },
    {
      "epoch": 0.000106536865234375,
      "step": 17455,
      "training_step_time": 0.4532477855682373
    },
    {
      "epoch": 0.00010654296875,
      "model_forward_time": 0.11497974395751953,
      "step": 17456
    },
    {
      "epoch": 0.00010654296875,
      "step": 17456,
      "training_step_time": 0.43631696701049805
    },
    {
      "epoch": 0.000106549072265625,
      "model_forward_time": 0.11576485633850098,
      "step": 17457
    },
    {
      "epoch": 0.000106549072265625,
      "step": 17457,
      "training_step_time": 0.48599696159362793
    },
    {
      "epoch": 0.00010655517578125,
      "model_forward_time": 0.11402201652526855,
      "step": 17458
    },
    {
      "epoch": 0.00010655517578125,
      "step": 17458,
      "training_step_time": 0.3965418338775635
    },
    {
      "epoch": 0.000106561279296875,
      "model_forward_time": 0.11435294151306152,
      "step": 17459
    },
    {
      "epoch": 0.000106561279296875,
      "step": 17459,
      "training_step_time": 0.7640194892883301
    },
    {
      "epoch": 0.0001065673828125,
      "grad_norm": 0.14725986123085022,
      "learning_rate": 8.494376714600878e-05,
      "loss": 0.0527,
      "step": 17460
    },
    {
      "epoch": 0.0001065673828125,
      "model_forward_time": 0.1139822006225586,
      "step": 17460
    },
    {
      "epoch": 0.0001065673828125,
      "step": 17460,
      "training_step_time": 0.38475704193115234
    },
    {
      "epoch": 0.000106573486328125,
      "model_forward_time": 0.11417269706726074,
      "step": 17461
    },
    {
      "epoch": 0.000106573486328125,
      "step": 17461,
      "training_step_time": 0.3928947448730469
    },
    {
      "epoch": 0.00010657958984375,
      "model_forward_time": 0.11444401741027832,
      "step": 17462
    },
    {
      "epoch": 0.00010657958984375,
      "step": 17462,
      "training_step_time": 0.3854336738586426
    },
    {
      "epoch": 0.000106585693359375,
      "model_forward_time": 0.11408329010009766,
      "step": 17463
    },
    {
      "epoch": 0.000106585693359375,
      "step": 17463,
      "training_step_time": 0.3843080997467041
    },
    {
      "epoch": 0.000106591796875,
      "model_forward_time": 0.1139535903930664,
      "step": 17464
    },
    {
      "epoch": 0.000106591796875,
      "step": 17464,
      "training_step_time": 0.43289852142333984
    },
    {
      "epoch": 0.000106597900390625,
      "model_forward_time": 0.11469507217407227,
      "step": 17465
    },
    {
      "epoch": 0.000106597900390625,
      "step": 17465,
      "training_step_time": 0.7148592472076416
    },
    {
      "epoch": 0.00010660400390625,
      "model_forward_time": 0.11432313919067383,
      "step": 17466
    },
    {
      "epoch": 0.00010660400390625,
      "step": 17466,
      "training_step_time": 0.3831501007080078
    },
    {
      "epoch": 0.000106610107421875,
      "model_forward_time": 0.11443853378295898,
      "step": 17467
    },
    {
      "epoch": 0.000106610107421875,
      "step": 17467,
      "training_step_time": 0.38311171531677246
    },
    {
      "epoch": 0.0001066162109375,
      "model_forward_time": 0.11443209648132324,
      "step": 17468
    },
    {
      "epoch": 0.0001066162109375,
      "step": 17468,
      "training_step_time": 0.37407708168029785
    },
    {
      "epoch": 0.000106622314453125,
      "model_forward_time": 0.11457109451293945,
      "step": 17469
    },
    {
      "epoch": 0.000106622314453125,
      "step": 17469,
      "training_step_time": 0.4797990322113037
    },
    {
      "epoch": 0.00010662841796875,
      "grad_norm": 0.13549795746803284,
      "learning_rate": 8.492405128069534e-05,
      "loss": 0.052,
      "step": 17470
    },
    {
      "epoch": 0.00010662841796875,
      "model_forward_time": 0.11405563354492188,
      "step": 17470
    },
    {
      "epoch": 0.00010662841796875,
      "step": 17470,
      "training_step_time": 0.40968847274780273
    },
    {
      "epoch": 0.000106634521484375,
      "model_forward_time": 0.11451125144958496,
      "step": 17471
    },
    {
      "epoch": 0.000106634521484375,
      "step": 17471,
      "training_step_time": 0.7868990898132324
    },
    {
      "epoch": 0.000106640625,
      "model_forward_time": 0.11378192901611328,
      "step": 17472
    },
    {
      "epoch": 0.000106640625,
      "step": 17472,
      "training_step_time": 0.40931081771850586
    },
    {
      "epoch": 0.000106646728515625,
      "model_forward_time": 0.11411905288696289,
      "step": 17473
    },
    {
      "epoch": 0.000106646728515625,
      "step": 17473,
      "training_step_time": 0.39619874954223633
    },
    {
      "epoch": 0.00010665283203125,
      "model_forward_time": 0.11435127258300781,
      "step": 17474
    },
    {
      "epoch": 0.00010665283203125,
      "step": 17474,
      "training_step_time": 0.3840339183807373
    },
    {
      "epoch": 0.000106658935546875,
      "model_forward_time": 0.11429262161254883,
      "step": 17475
    },
    {
      "epoch": 0.000106658935546875,
      "step": 17475,
      "training_step_time": 0.3790464401245117
    },
    {
      "epoch": 0.0001066650390625,
      "model_forward_time": 0.11426949501037598,
      "step": 17476
    },
    {
      "epoch": 0.0001066650390625,
      "step": 17476,
      "training_step_time": 0.37987685203552246
    },
    {
      "epoch": 0.000106671142578125,
      "model_forward_time": 0.11521124839782715,
      "step": 17477
    },
    {
      "epoch": 0.000106671142578125,
      "step": 17477,
      "training_step_time": 0.41118550300598145
    },
    {
      "epoch": 0.00010667724609375,
      "model_forward_time": 0.11480450630187988,
      "step": 17478
    },
    {
      "epoch": 0.00010667724609375,
      "step": 17478,
      "training_step_time": 0.41389966011047363
    },
    {
      "epoch": 0.000106683349609375,
      "model_forward_time": 0.1156308650970459,
      "step": 17479
    },
    {
      "epoch": 0.000106683349609375,
      "step": 17479,
      "training_step_time": 0.41802096366882324
    },
    {
      "epoch": 0.000106689453125,
      "grad_norm": 0.21887469291687012,
      "learning_rate": 8.490432480637723e-05,
      "loss": 0.055,
      "step": 17480
    },
    {
      "epoch": 0.000106689453125,
      "model_forward_time": 0.11482429504394531,
      "step": 17480
    },
    {
      "epoch": 0.000106689453125,
      "step": 17480,
      "training_step_time": 0.39072227478027344
    },
    {
      "epoch": 0.000106695556640625,
      "model_forward_time": 0.11554312705993652,
      "step": 17481
    },
    {
      "epoch": 0.000106695556640625,
      "step": 17481,
      "training_step_time": 0.3891639709472656
    },
    {
      "epoch": 0.00010670166015625,
      "model_forward_time": 0.11458563804626465,
      "step": 17482
    },
    {
      "epoch": 0.00010670166015625,
      "step": 17482,
      "training_step_time": 0.36839771270751953
    },
    {
      "epoch": 0.000106707763671875,
      "model_forward_time": 0.11538076400756836,
      "step": 17483
    },
    {
      "epoch": 0.000106707763671875,
      "step": 17483,
      "training_step_time": 0.4464256763458252
    },
    {
      "epoch": 0.0001067138671875,
      "model_forward_time": 0.11600351333618164,
      "step": 17484
    },
    {
      "epoch": 0.0001067138671875,
      "step": 17484,
      "training_step_time": 0.4794118404388428
    },
    {
      "epoch": 0.000106719970703125,
      "model_forward_time": 0.11507868766784668,
      "step": 17485
    },
    {
      "epoch": 0.000106719970703125,
      "step": 17485,
      "training_step_time": 0.4117443561553955
    },
    {
      "epoch": 0.00010672607421875,
      "model_forward_time": 0.11510181427001953,
      "step": 17486
    },
    {
      "epoch": 0.00010672607421875,
      "step": 17486,
      "training_step_time": 0.3898932933807373
    },
    {
      "epoch": 0.000106732177734375,
      "model_forward_time": 0.11560177803039551,
      "step": 17487
    },
    {
      "epoch": 0.000106732177734375,
      "step": 17487,
      "training_step_time": 0.39099717140197754
    },
    {
      "epoch": 0.00010673828125,
      "model_forward_time": 0.11594271659851074,
      "step": 17488
    },
    {
      "epoch": 0.00010673828125,
      "step": 17488,
      "training_step_time": 0.39504551887512207
    },
    {
      "epoch": 0.000106744384765625,
      "model_forward_time": 0.11606431007385254,
      "step": 17489
    },
    {
      "epoch": 0.000106744384765625,
      "step": 17489,
      "training_step_time": 0.4878966808319092
    },
    {
      "epoch": 0.00010675048828125,
      "grad_norm": 0.12650896608829498,
      "learning_rate": 8.488458772904684e-05,
      "loss": 0.0536,
      "step": 17490
    },
    {
      "epoch": 0.00010675048828125,
      "model_forward_time": 0.11486983299255371,
      "step": 17490
    },
    {
      "epoch": 0.00010675048828125,
      "step": 17490,
      "training_step_time": 0.4097297191619873
    },
    {
      "epoch": 0.000106756591796875,
      "model_forward_time": 0.11572527885437012,
      "step": 17491
    },
    {
      "epoch": 0.000106756591796875,
      "step": 17491,
      "training_step_time": 0.40380382537841797
    },
    {
      "epoch": 0.0001067626953125,
      "model_forward_time": 0.11548542976379395,
      "step": 17492
    },
    {
      "epoch": 0.0001067626953125,
      "step": 17492,
      "training_step_time": 0.3964419364929199
    },
    {
      "epoch": 0.000106768798828125,
      "model_forward_time": 0.11554837226867676,
      "step": 17493
    },
    {
      "epoch": 0.000106768798828125,
      "step": 17493,
      "training_step_time": 0.48809242248535156
    },
    {
      "epoch": 0.00010677490234375,
      "model_forward_time": 0.11513853073120117,
      "step": 17494
    },
    {
      "epoch": 0.00010677490234375,
      "step": 17494,
      "training_step_time": 0.43943357467651367
    },
    {
      "epoch": 0.000106781005859375,
      "model_forward_time": 0.11596536636352539,
      "step": 17495
    },
    {
      "epoch": 0.000106781005859375,
      "step": 17495,
      "training_step_time": 0.9057338237762451
    },
    {
      "epoch": 0.000106787109375,
      "model_forward_time": 0.11448788642883301,
      "step": 17496
    },
    {
      "epoch": 0.000106787109375,
      "step": 17496,
      "training_step_time": 0.40227675437927246
    },
    {
      "epoch": 0.000106793212890625,
      "model_forward_time": 0.11424064636230469,
      "step": 17497
    },
    {
      "epoch": 0.000106793212890625,
      "step": 17497,
      "training_step_time": 0.4394838809967041
    },
    {
      "epoch": 0.00010679931640625,
      "model_forward_time": 0.1147146224975586,
      "step": 17498
    },
    {
      "epoch": 0.00010679931640625,
      "step": 17498,
      "training_step_time": 0.3817596435546875
    },
    {
      "epoch": 0.000106805419921875,
      "model_forward_time": 0.11462664604187012,
      "step": 17499
    },
    {
      "epoch": 0.000106805419921875,
      "step": 17499,
      "training_step_time": 0.4017460346221924
    },
    {
      "epoch": 0.0001068115234375,
      "grad_norm": 0.17175264656543732,
      "learning_rate": 8.486484005469977e-05,
      "loss": 0.0449,
      "step": 17500
    },
    {
      "epoch": 0.0001068115234375,
      "model_forward_time": 0.11403226852416992,
      "step": 17500
    },
    {
      "epoch": 0.0001068115234375,
      "step": 17500,
      "training_step_time": 0.38231539726257324
    },
    {
      "epoch": 0.000106817626953125,
      "model_forward_time": 0.11445355415344238,
      "step": 17501
    },
    {
      "epoch": 0.000106817626953125,
      "step": 17501,
      "training_step_time": 0.7425069808959961
    },
    {
      "epoch": 0.00010682373046875,
      "model_forward_time": 0.11469149589538574,
      "step": 17502
    },
    {
      "epoch": 0.00010682373046875,
      "step": 17502,
      "training_step_time": 0.38767099380493164
    },
    {
      "epoch": 0.000106829833984375,
      "model_forward_time": 0.11435699462890625,
      "step": 17503
    },
    {
      "epoch": 0.000106829833984375,
      "step": 17503,
      "training_step_time": 0.38478851318359375
    },
    {
      "epoch": 0.0001068359375,
      "model_forward_time": 0.11479520797729492,
      "step": 17504
    },
    {
      "epoch": 0.0001068359375,
      "step": 17504,
      "training_step_time": 0.38358449935913086
    },
    {
      "epoch": 0.000106842041015625,
      "model_forward_time": 0.11623239517211914,
      "step": 17505
    },
    {
      "epoch": 0.000106842041015625,
      "step": 17505,
      "training_step_time": 0.4000990390777588
    },
    {
      "epoch": 0.00010684814453125,
      "model_forward_time": 0.11397075653076172,
      "step": 17506
    },
    {
      "epoch": 0.00010684814453125,
      "step": 17506,
      "training_step_time": 0.5015280246734619
    },
    {
      "epoch": 0.000106854248046875,
      "model_forward_time": 0.11468958854675293,
      "step": 17507
    },
    {
      "epoch": 0.000106854248046875,
      "step": 17507,
      "training_step_time": 0.5429120063781738
    },
    {
      "epoch": 0.0001068603515625,
      "model_forward_time": 0.11484837532043457,
      "step": 17508
    },
    {
      "epoch": 0.0001068603515625,
      "step": 17508,
      "training_step_time": 0.39989638328552246
    },
    {
      "epoch": 0.000106866455078125,
      "model_forward_time": 0.11495494842529297,
      "step": 17509
    },
    {
      "epoch": 0.000106866455078125,
      "step": 17509,
      "training_step_time": 0.3998985290527344
    },
    {
      "epoch": 0.00010687255859375,
      "grad_norm": 0.13688978552818298,
      "learning_rate": 8.484508178933486e-05,
      "loss": 0.0484,
      "step": 17510
    },
    {
      "epoch": 0.00010687255859375,
      "model_forward_time": 0.11511969566345215,
      "step": 17510
    },
    {
      "epoch": 0.00010687255859375,
      "step": 17510,
      "training_step_time": 0.45423245429992676
    },
    {
      "epoch": 0.000106878662109375,
      "model_forward_time": 0.11402726173400879,
      "step": 17511
    },
    {
      "epoch": 0.000106878662109375,
      "step": 17511,
      "training_step_time": 0.46286821365356445
    },
    {
      "epoch": 0.000106884765625,
      "model_forward_time": 0.11436653137207031,
      "step": 17512
    },
    {
      "epoch": 0.000106884765625,
      "step": 17512,
      "training_step_time": 0.432586669921875
    },
    {
      "epoch": 0.000106890869140625,
      "model_forward_time": 0.11454892158508301,
      "step": 17513
    },
    {
      "epoch": 0.000106890869140625,
      "step": 17513,
      "training_step_time": 0.7450835704803467
    },
    {
      "epoch": 0.00010689697265625,
      "model_forward_time": 0.11508822441101074,
      "step": 17514
    },
    {
      "epoch": 0.00010689697265625,
      "step": 17514,
      "training_step_time": 0.3835599422454834
    },
    {
      "epoch": 0.000106903076171875,
      "model_forward_time": 0.11398911476135254,
      "step": 17515
    },
    {
      "epoch": 0.000106903076171875,
      "step": 17515,
      "training_step_time": 0.3914828300476074
    },
    {
      "epoch": 0.0001069091796875,
      "model_forward_time": 0.1143653392791748,
      "step": 17516
    },
    {
      "epoch": 0.0001069091796875,
      "step": 17516,
      "training_step_time": 0.38545680046081543
    },
    {
      "epoch": 0.000106915283203125,
      "model_forward_time": 0.11495804786682129,
      "step": 17517
    },
    {
      "epoch": 0.000106915283203125,
      "step": 17517,
      "training_step_time": 0.3954315185546875
    },
    {
      "epoch": 0.00010692138671875,
      "model_forward_time": 0.11401486396789551,
      "step": 17518
    },
    {
      "epoch": 0.00010692138671875,
      "step": 17518,
      "training_step_time": 0.4052870273590088
    },
    {
      "epoch": 0.000106927490234375,
      "model_forward_time": 0.11514401435852051,
      "step": 17519
    },
    {
      "epoch": 0.000106927490234375,
      "step": 17519,
      "training_step_time": 0.8068687915802002
    },
    {
      "epoch": 0.00010693359375,
      "grad_norm": 0.12055148929357529,
      "learning_rate": 8.482531293895412e-05,
      "loss": 0.0537,
      "step": 17520
    },
    {
      "epoch": 0.00010693359375,
      "model_forward_time": 0.11358761787414551,
      "step": 17520
    },
    {
      "epoch": 0.00010693359375,
      "step": 17520,
      "training_step_time": 0.4700052738189697
    },
    {
      "epoch": 0.000106939697265625,
      "model_forward_time": 0.11388325691223145,
      "step": 17521
    },
    {
      "epoch": 0.000106939697265625,
      "step": 17521,
      "training_step_time": 0.3861513137817383
    },
    {
      "epoch": 0.00010694580078125,
      "model_forward_time": 0.11429977416992188,
      "step": 17522
    },
    {
      "epoch": 0.00010694580078125,
      "step": 17522,
      "training_step_time": 0.3874204158782959
    },
    {
      "epoch": 0.000106951904296875,
      "model_forward_time": 0.11443090438842773,
      "step": 17523
    },
    {
      "epoch": 0.000106951904296875,
      "step": 17523,
      "training_step_time": 0.41828060150146484
    },
    {
      "epoch": 0.0001069580078125,
      "model_forward_time": 0.11485028266906738,
      "step": 17524
    },
    {
      "epoch": 0.0001069580078125,
      "step": 17524,
      "training_step_time": 0.46632957458496094
    },
    {
      "epoch": 0.000106964111328125,
      "model_forward_time": 0.11535120010375977,
      "step": 17525
    },
    {
      "epoch": 0.000106964111328125,
      "step": 17525,
      "training_step_time": 0.7011632919311523
    },
    {
      "epoch": 0.00010697021484375,
      "model_forward_time": 0.11447358131408691,
      "step": 17526
    },
    {
      "epoch": 0.00010697021484375,
      "step": 17526,
      "training_step_time": 0.3938872814178467
    },
    {
      "epoch": 0.000106976318359375,
      "model_forward_time": 0.11414384841918945,
      "step": 17527
    },
    {
      "epoch": 0.000106976318359375,
      "step": 17527,
      "training_step_time": 0.3867056369781494
    },
    {
      "epoch": 0.000106982421875,
      "model_forward_time": 0.11406207084655762,
      "step": 17528
    },
    {
      "epoch": 0.000106982421875,
      "step": 17528,
      "training_step_time": 0.38182616233825684
    },
    {
      "epoch": 0.000106988525390625,
      "model_forward_time": 0.11476373672485352,
      "step": 17529
    },
    {
      "epoch": 0.000106988525390625,
      "step": 17529,
      "training_step_time": 0.3886380195617676
    },
    {
      "epoch": 0.00010699462890625,
      "grad_norm": 0.1836402863264084,
      "learning_rate": 8.480553350956282e-05,
      "loss": 0.0539,
      "step": 17530
    },
    {
      "epoch": 0.00010699462890625,
      "model_forward_time": 0.11428141593933105,
      "step": 17530
    },
    {
      "epoch": 0.00010699462890625,
      "step": 17530,
      "training_step_time": 0.40427350997924805
    },
    {
      "epoch": 0.000107000732421875,
      "model_forward_time": 0.11534643173217773,
      "step": 17531
    },
    {
      "epoch": 0.000107000732421875,
      "step": 17531,
      "training_step_time": 0.7805490493774414
    },
    {
      "epoch": 0.0001070068359375,
      "model_forward_time": 0.11504316329956055,
      "step": 17532
    },
    {
      "epoch": 0.0001070068359375,
      "step": 17532,
      "training_step_time": 0.4192509651184082
    },
    {
      "epoch": 0.000107012939453125,
      "model_forward_time": 0.11449742317199707,
      "step": 17533
    },
    {
      "epoch": 0.000107012939453125,
      "step": 17533,
      "training_step_time": 0.41693544387817383
    },
    {
      "epoch": 0.00010701904296875,
      "model_forward_time": 0.1141047477722168,
      "step": 17534
    },
    {
      "epoch": 0.00010701904296875,
      "step": 17534,
      "training_step_time": 0.3858323097229004
    },
    {
      "epoch": 0.000107025146484375,
      "model_forward_time": 0.11458611488342285,
      "step": 17535
    },
    {
      "epoch": 0.000107025146484375,
      "step": 17535,
      "training_step_time": 0.4364464282989502
    },
    {
      "epoch": 0.00010703125,
      "model_forward_time": 0.1147160530090332,
      "step": 17536
    },
    {
      "epoch": 0.00010703125,
      "step": 17536,
      "training_step_time": 0.3630518913269043
    },
    {
      "epoch": 0.000107037353515625,
      "model_forward_time": 0.11518430709838867,
      "step": 17537
    },
    {
      "epoch": 0.000107037353515625,
      "step": 17537,
      "training_step_time": 0.5943448543548584
    },
    {
      "epoch": 0.00010704345703125,
      "model_forward_time": 0.11516380310058594,
      "step": 17538
    },
    {
      "epoch": 0.00010704345703125,
      "step": 17538,
      "training_step_time": 0.5219535827636719
    },
    {
      "epoch": 0.000107049560546875,
      "model_forward_time": 0.11401820182800293,
      "step": 17539
    },
    {
      "epoch": 0.000107049560546875,
      "step": 17539,
      "training_step_time": 0.4836905002593994
    },
    {
      "epoch": 0.0001070556640625,
      "grad_norm": 0.16359613835811615,
      "learning_rate": 8.478574350716941e-05,
      "loss": 0.0573,
      "step": 17540
    },
    {
      "epoch": 0.0001070556640625,
      "model_forward_time": 0.11414885520935059,
      "step": 17540
    },
    {
      "epoch": 0.0001070556640625,
      "step": 17540,
      "training_step_time": 0.38053202629089355
    },
    {
      "epoch": 0.000107061767578125,
      "model_forward_time": 0.1146547794342041,
      "step": 17541
    },
    {
      "epoch": 0.000107061767578125,
      "step": 17541,
      "training_step_time": 0.381483793258667
    },
    {
      "epoch": 0.00010706787109375,
      "model_forward_time": 0.1142890453338623,
      "step": 17542
    },
    {
      "epoch": 0.00010706787109375,
      "step": 17542,
      "training_step_time": 0.38628172874450684
    },
    {
      "epoch": 0.000107073974609375,
      "model_forward_time": 0.11472702026367188,
      "step": 17543
    },
    {
      "epoch": 0.000107073974609375,
      "step": 17543,
      "training_step_time": 0.9062399864196777
    },
    {
      "epoch": 0.000107080078125,
      "model_forward_time": 0.11423683166503906,
      "step": 17544
    },
    {
      "epoch": 0.000107080078125,
      "step": 17544,
      "training_step_time": 0.38474202156066895
    },
    {
      "epoch": 0.000107086181640625,
      "model_forward_time": 0.1136634349822998,
      "step": 17545
    },
    {
      "epoch": 0.000107086181640625,
      "step": 17545,
      "training_step_time": 0.4712390899658203
    },
    {
      "epoch": 0.00010709228515625,
      "model_forward_time": 0.1138143539428711,
      "step": 17546
    },
    {
      "epoch": 0.00010709228515625,
      "step": 17546,
      "training_step_time": 0.4199378490447998
    },
    {
      "epoch": 0.000107098388671875,
      "model_forward_time": 0.11499261856079102,
      "step": 17547
    },
    {
      "epoch": 0.000107098388671875,
      "step": 17547,
      "training_step_time": 0.38977909088134766
    },
    {
      "epoch": 0.0001071044921875,
      "model_forward_time": 0.11522316932678223,
      "step": 17548
    },
    {
      "epoch": 0.0001071044921875,
      "step": 17548,
      "training_step_time": 0.3861348628997803
    },
    {
      "epoch": 0.000107110595703125,
      "model_forward_time": 0.11474084854125977,
      "step": 17549
    },
    {
      "epoch": 0.000107110595703125,
      "step": 17549,
      "training_step_time": 0.4561648368835449
    },
    {
      "epoch": 0.00010711669921875,
      "grad_norm": 0.13678854703903198,
      "learning_rate": 8.476594293778561e-05,
      "loss": 0.0545,
      "step": 17550
    },
    {
      "epoch": 0.00010711669921875,
      "model_forward_time": 0.11467194557189941,
      "step": 17550
    },
    {
      "epoch": 0.00010711669921875,
      "step": 17550,
      "training_step_time": 0.48615527153015137
    },
    {
      "epoch": 0.000107122802734375,
      "model_forward_time": 0.114715576171875,
      "step": 17551
    },
    {
      "epoch": 0.000107122802734375,
      "step": 17551,
      "training_step_time": 0.4358537197113037
    },
    {
      "epoch": 0.00010712890625,
      "model_forward_time": 0.11591696739196777,
      "step": 17552
    },
    {
      "epoch": 0.00010712890625,
      "step": 17552,
      "training_step_time": 0.4014711380004883
    },
    {
      "epoch": 0.000107135009765625,
      "model_forward_time": 0.11440634727478027,
      "step": 17553
    },
    {
      "epoch": 0.000107135009765625,
      "step": 17553,
      "training_step_time": 0.39395904541015625
    },
    {
      "epoch": 0.00010714111328125,
      "model_forward_time": 0.11512970924377441,
      "step": 17554
    },
    {
      "epoch": 0.00010714111328125,
      "step": 17554,
      "training_step_time": 0.3922760486602783
    },
    {
      "epoch": 0.000107147216796875,
      "model_forward_time": 0.11461901664733887,
      "step": 17555
    },
    {
      "epoch": 0.000107147216796875,
      "step": 17555,
      "training_step_time": 0.9670746326446533
    },
    {
      "epoch": 0.0001071533203125,
      "model_forward_time": 0.11459898948669434,
      "step": 17556
    },
    {
      "epoch": 0.0001071533203125,
      "step": 17556,
      "training_step_time": 0.38385844230651855
    },
    {
      "epoch": 0.000107159423828125,
      "model_forward_time": 0.11426043510437012,
      "step": 17557
    },
    {
      "epoch": 0.000107159423828125,
      "step": 17557,
      "training_step_time": 0.38646650314331055
    },
    {
      "epoch": 0.00010716552734375,
      "model_forward_time": 0.11409139633178711,
      "step": 17558
    },
    {
      "epoch": 0.00010716552734375,
      "step": 17558,
      "training_step_time": 0.3822021484375
    },
    {
      "epoch": 0.000107171630859375,
      "model_forward_time": 0.11391735076904297,
      "step": 17559
    },
    {
      "epoch": 0.000107171630859375,
      "step": 17559,
      "training_step_time": 0.42998743057250977
    },
    {
      "epoch": 0.000107177734375,
      "grad_norm": 0.14385582506656647,
      "learning_rate": 8.474613180742628e-05,
      "loss": 0.0556,
      "step": 17560
    },
    {
      "epoch": 0.000107177734375,
      "model_forward_time": 0.11393976211547852,
      "step": 17560
    },
    {
      "epoch": 0.000107177734375,
      "step": 17560,
      "training_step_time": 0.44919419288635254
    },
    {
      "epoch": 0.000107183837890625,
      "model_forward_time": 0.11442685127258301,
      "step": 17561
    },
    {
      "epoch": 0.000107183837890625,
      "step": 17561,
      "training_step_time": 0.8371858596801758
    },
    {
      "epoch": 0.00010718994140625,
      "model_forward_time": 0.11483621597290039,
      "step": 17562
    },
    {
      "epoch": 0.00010718994140625,
      "step": 17562,
      "training_step_time": 0.48833799362182617
    },
    {
      "epoch": 0.000107196044921875,
      "model_forward_time": 0.1143488883972168,
      "step": 17563
    },
    {
      "epoch": 0.000107196044921875,
      "step": 17563,
      "training_step_time": 0.43993210792541504
    },
    {
      "epoch": 0.0001072021484375,
      "model_forward_time": 0.11403369903564453,
      "step": 17564
    },
    {
      "epoch": 0.0001072021484375,
      "step": 17564,
      "training_step_time": 0.4575629234313965
    },
    {
      "epoch": 0.000107208251953125,
      "model_forward_time": 0.11404752731323242,
      "step": 17565
    },
    {
      "epoch": 0.000107208251953125,
      "step": 17565,
      "training_step_time": 0.3871142864227295
    },
    {
      "epoch": 0.00010721435546875,
      "model_forward_time": 0.11404561996459961,
      "step": 17566
    },
    {
      "epoch": 0.00010721435546875,
      "step": 17566,
      "training_step_time": 0.382753849029541
    },
    {
      "epoch": 0.000107220458984375,
      "model_forward_time": 0.11417651176452637,
      "step": 17567
    },
    {
      "epoch": 0.000107220458984375,
      "step": 17567,
      "training_step_time": 0.5519673824310303
    },
    {
      "epoch": 0.0001072265625,
      "model_forward_time": 0.11430835723876953,
      "step": 17568
    },
    {
      "epoch": 0.0001072265625,
      "step": 17568,
      "training_step_time": 0.38751983642578125
    },
    {
      "epoch": 0.000107232666015625,
      "model_forward_time": 0.11389970779418945,
      "step": 17569
    },
    {
      "epoch": 0.000107232666015625,
      "step": 17569,
      "training_step_time": 0.3949744701385498
    },
    {
      "epoch": 0.00010723876953125,
      "grad_norm": 0.1527251899242401,
      "learning_rate": 8.472631012210953e-05,
      "loss": 0.0506,
      "step": 17570
    },
    {
      "epoch": 0.00010723876953125,
      "model_forward_time": 0.11482930183410645,
      "step": 17570
    },
    {
      "epoch": 0.00010723876953125,
      "step": 17570,
      "training_step_time": 0.3784945011138916
    },
    {
      "epoch": 0.000107244873046875,
      "model_forward_time": 0.1151430606842041,
      "step": 17571
    },
    {
      "epoch": 0.000107244873046875,
      "step": 17571,
      "training_step_time": 0.4413328170776367
    },
    {
      "epoch": 0.0001072509765625,
      "model_forward_time": 0.11512923240661621,
      "step": 17572
    },
    {
      "epoch": 0.0001072509765625,
      "step": 17572,
      "training_step_time": 0.39491987228393555
    },
    {
      "epoch": 0.000107257080078125,
      "model_forward_time": 0.11526775360107422,
      "step": 17573
    },
    {
      "epoch": 0.000107257080078125,
      "step": 17573,
      "training_step_time": 0.7703039646148682
    },
    {
      "epoch": 0.00010726318359375,
      "model_forward_time": 0.11490058898925781,
      "step": 17574
    },
    {
      "epoch": 0.00010726318359375,
      "step": 17574,
      "training_step_time": 0.40187525749206543
    },
    {
      "epoch": 0.000107269287109375,
      "model_forward_time": 0.11432003974914551,
      "step": 17575
    },
    {
      "epoch": 0.000107269287109375,
      "step": 17575,
      "training_step_time": 0.36261534690856934
    },
    {
      "epoch": 0.000107275390625,
      "model_forward_time": 0.11457371711730957,
      "step": 17576
    },
    {
      "epoch": 0.000107275390625,
      "step": 17576,
      "training_step_time": 0.45157408714294434
    },
    {
      "epoch": 0.000107281494140625,
      "model_forward_time": 0.11402750015258789,
      "step": 17577
    },
    {
      "epoch": 0.000107281494140625,
      "step": 17577,
      "training_step_time": 0.4581797122955322
    },
    {
      "epoch": 0.00010728759765625,
      "model_forward_time": 0.1137242317199707,
      "step": 17578
    },
    {
      "epoch": 0.00010728759765625,
      "step": 17578,
      "training_step_time": 0.3879702091217041
    },
    {
      "epoch": 0.000107293701171875,
      "model_forward_time": 0.11493802070617676,
      "step": 17579
    },
    {
      "epoch": 0.000107293701171875,
      "step": 17579,
      "training_step_time": 0.5273475646972656
    },
    {
      "epoch": 0.0001072998046875,
      "grad_norm": 0.16078628599643707,
      "learning_rate": 8.470647788785665e-05,
      "loss": 0.0538,
      "step": 17580
    },
    {
      "epoch": 0.0001072998046875,
      "model_forward_time": 0.11457586288452148,
      "step": 17580
    },
    {
      "epoch": 0.0001072998046875,
      "step": 17580,
      "training_step_time": 0.38840627670288086
    },
    {
      "epoch": 0.000107305908203125,
      "model_forward_time": 0.11496329307556152,
      "step": 17581
    },
    {
      "epoch": 0.000107305908203125,
      "step": 17581,
      "training_step_time": 0.39652347564697266
    },
    {
      "epoch": 0.00010731201171875,
      "model_forward_time": 0.11484432220458984,
      "step": 17582
    },
    {
      "epoch": 0.00010731201171875,
      "step": 17582,
      "training_step_time": 0.4015641212463379
    },
    {
      "epoch": 0.000107318115234375,
      "model_forward_time": 0.11478376388549805,
      "step": 17583
    },
    {
      "epoch": 0.000107318115234375,
      "step": 17583,
      "training_step_time": 0.38985443115234375
    },
    {
      "epoch": 0.00010732421875,
      "model_forward_time": 0.11504769325256348,
      "step": 17584
    },
    {
      "epoch": 0.00010732421875,
      "step": 17584,
      "training_step_time": 0.42462873458862305
    },
    {
      "epoch": 0.000107330322265625,
      "model_forward_time": 0.11578583717346191,
      "step": 17585
    },
    {
      "epoch": 0.000107330322265625,
      "step": 17585,
      "training_step_time": 0.8514139652252197
    },
    {
      "epoch": 0.00010733642578125,
      "model_forward_time": 0.11477136611938477,
      "step": 17586
    },
    {
      "epoch": 0.00010733642578125,
      "step": 17586,
      "training_step_time": 0.39002132415771484
    },
    {
      "epoch": 0.000107342529296875,
      "model_forward_time": 0.11397528648376465,
      "step": 17587
    },
    {
      "epoch": 0.000107342529296875,
      "step": 17587,
      "training_step_time": 0.39347052574157715
    },
    {
      "epoch": 0.0001073486328125,
      "model_forward_time": 0.11422514915466309,
      "step": 17588
    },
    {
      "epoch": 0.0001073486328125,
      "step": 17588,
      "training_step_time": 0.38863229751586914
    },
    {
      "epoch": 0.000107354736328125,
      "model_forward_time": 0.1148538589477539,
      "step": 17589
    },
    {
      "epoch": 0.000107354736328125,
      "step": 17589,
      "training_step_time": 0.4388437271118164
    },
    {
      "epoch": 0.00010736083984375,
      "grad_norm": 0.1805490106344223,
      "learning_rate": 8.468663511069217e-05,
      "loss": 0.047,
      "step": 17590
    },
    {
      "epoch": 0.00010736083984375,
      "model_forward_time": 0.11445927619934082,
      "step": 17590
    },
    {
      "epoch": 0.00010736083984375,
      "step": 17590,
      "training_step_time": 0.4282827377319336
    },
    {
      "epoch": 0.000107366943359375,
      "model_forward_time": 0.11564397811889648,
      "step": 17591
    },
    {
      "epoch": 0.000107366943359375,
      "step": 17591,
      "training_step_time": 0.5527729988098145
    },
    {
      "epoch": 0.000107373046875,
      "model_forward_time": 0.11574578285217285,
      "step": 17592
    },
    {
      "epoch": 0.000107373046875,
      "step": 17592,
      "training_step_time": 0.38254308700561523
    },
    {
      "epoch": 0.000107379150390625,
      "model_forward_time": 0.11469697952270508,
      "step": 17593
    },
    {
      "epoch": 0.000107379150390625,
      "step": 17593,
      "training_step_time": 0.3875463008880615
    },
    {
      "epoch": 0.00010738525390625,
      "model_forward_time": 0.1149439811706543,
      "step": 17594
    },
    {
      "epoch": 0.00010738525390625,
      "step": 17594,
      "training_step_time": 0.39173364639282227
    },
    {
      "epoch": 0.000107391357421875,
      "model_forward_time": 0.11560773849487305,
      "step": 17595
    },
    {
      "epoch": 0.000107391357421875,
      "step": 17595,
      "training_step_time": 0.39125943183898926
    },
    {
      "epoch": 0.0001073974609375,
      "model_forward_time": 0.11537432670593262,
      "step": 17596
    },
    {
      "epoch": 0.0001073974609375,
      "step": 17596,
      "training_step_time": 0.3880441188812256
    },
    {
      "epoch": 0.000107403564453125,
      "model_forward_time": 0.11488676071166992,
      "step": 17597
    },
    {
      "epoch": 0.000107403564453125,
      "step": 17597,
      "training_step_time": 0.8186028003692627
    },
    {
      "epoch": 0.00010740966796875,
      "model_forward_time": 0.11498594284057617,
      "step": 17598
    },
    {
      "epoch": 0.00010740966796875,
      "step": 17598,
      "training_step_time": 0.46344804763793945
    },
    {
      "epoch": 0.000107415771484375,
      "model_forward_time": 0.11542797088623047,
      "step": 17599
    },
    {
      "epoch": 0.000107415771484375,
      "step": 17599,
      "training_step_time": 0.4636986255645752
    },
    {
      "epoch": 0.000107421875,
      "grad_norm": 0.1535293161869049,
      "learning_rate": 8.466678179664379e-05,
      "loss": 0.0476,
      "step": 17600
    },
    {
      "epoch": 0.000107421875,
      "model_forward_time": 0.11482501029968262,
      "step": 17600
    },
    {
      "epoch": 0.000107421875,
      "step": 17600,
      "training_step_time": 0.37618088722229004
    },
    {
      "epoch": 0.000107427978515625,
      "model_forward_time": 0.11454939842224121,
      "step": 17601
    },
    {
      "epoch": 0.000107427978515625,
      "step": 17601,
      "training_step_time": 0.3804469108581543
    },
    {
      "epoch": 0.00010743408203125,
      "model_forward_time": 0.11491203308105469,
      "step": 17602
    },
    {
      "epoch": 0.00010743408203125,
      "step": 17602,
      "training_step_time": 0.3663821220397949
    },
    {
      "epoch": 0.000107440185546875,
      "model_forward_time": 0.11582589149475098,
      "step": 17603
    },
    {
      "epoch": 0.000107440185546875,
      "step": 17603,
      "training_step_time": 0.471301794052124
    },
    {
      "epoch": 0.0001074462890625,
      "model_forward_time": 0.11486673355102539,
      "step": 17604
    },
    {
      "epoch": 0.0001074462890625,
      "step": 17604,
      "training_step_time": 0.42113208770751953
    },
    {
      "epoch": 0.000107452392578125,
      "model_forward_time": 0.1146237850189209,
      "step": 17605
    },
    {
      "epoch": 0.000107452392578125,
      "step": 17605,
      "training_step_time": 0.48062849044799805
    },
    {
      "epoch": 0.00010745849609375,
      "model_forward_time": 0.11408400535583496,
      "step": 17606
    },
    {
      "epoch": 0.00010745849609375,
      "step": 17606,
      "training_step_time": 0.38141393661499023
    },
    {
      "epoch": 0.000107464599609375,
      "model_forward_time": 0.11469101905822754,
      "step": 17607
    },
    {
      "epoch": 0.000107464599609375,
      "step": 17607,
      "training_step_time": 0.38457679748535156
    },
    {
      "epoch": 0.000107470703125,
      "model_forward_time": 0.11517119407653809,
      "step": 17608
    },
    {
      "epoch": 0.000107470703125,
      "step": 17608,
      "training_step_time": 0.3796420097351074
    },
    {
      "epoch": 0.000107476806640625,
      "model_forward_time": 0.11537909507751465,
      "step": 17609
    },
    {
      "epoch": 0.000107476806640625,
      "step": 17609,
      "training_step_time": 0.4389212131500244
    },
    {
      "epoch": 0.00010748291015625,
      "grad_norm": 0.1710605025291443,
      "learning_rate": 8.46469179517424e-05,
      "loss": 0.0442,
      "step": 17610
    },
    {
      "epoch": 0.00010748291015625,
      "model_forward_time": 0.11621522903442383,
      "step": 17610
    },
    {
      "epoch": 0.00010748291015625,
      "step": 17610,
      "training_step_time": 0.46237611770629883
    },
    {
      "epoch": 0.000107489013671875,
      "model_forward_time": 0.11477899551391602,
      "step": 17611
    },
    {
      "epoch": 0.000107489013671875,
      "step": 17611,
      "training_step_time": 0.43499255180358887
    },
    {
      "epoch": 0.0001074951171875,
      "model_forward_time": 0.11516475677490234,
      "step": 17612
    },
    {
      "epoch": 0.0001074951171875,
      "step": 17612,
      "training_step_time": 0.4607117176055908
    },
    {
      "epoch": 0.000107501220703125,
      "model_forward_time": 0.11533284187316895,
      "step": 17613
    },
    {
      "epoch": 0.000107501220703125,
      "step": 17613,
      "training_step_time": 0.4401888847351074
    },
    {
      "epoch": 0.00010750732421875,
      "model_forward_time": 0.11482596397399902,
      "step": 17614
    },
    {
      "epoch": 0.00010750732421875,
      "step": 17614,
      "training_step_time": 0.3831923007965088
    },
    {
      "epoch": 0.000107513427734375,
      "model_forward_time": 0.11515426635742188,
      "step": 17615
    },
    {
      "epoch": 0.000107513427734375,
      "step": 17615,
      "training_step_time": 0.3954763412475586
    },
    {
      "epoch": 0.00010751953125,
      "model_forward_time": 0.1147308349609375,
      "step": 17616
    },
    {
      "epoch": 0.00010751953125,
      "step": 17616,
      "training_step_time": 0.36576390266418457
    },
    {
      "epoch": 0.000107525634765625,
      "model_forward_time": 0.11465096473693848,
      "step": 17617
    },
    {
      "epoch": 0.000107525634765625,
      "step": 17617,
      "training_step_time": 0.46924877166748047
    },
    {
      "epoch": 0.00010753173828125,
      "model_forward_time": 0.11508822441101074,
      "step": 17618
    },
    {
      "epoch": 0.00010753173828125,
      "step": 17618,
      "training_step_time": 0.48378610610961914
    },
    {
      "epoch": 0.000107537841796875,
      "model_forward_time": 0.11440610885620117,
      "step": 17619
    },
    {
      "epoch": 0.000107537841796875,
      "step": 17619,
      "training_step_time": 0.4803144931793213
    },
    {
      "epoch": 0.0001075439453125,
      "grad_norm": 0.17945647239685059,
      "learning_rate": 8.462704358202216e-05,
      "loss": 0.0502,
      "step": 17620
    },
    {
      "epoch": 0.0001075439453125,
      "model_forward_time": 0.11388707160949707,
      "step": 17620
    },
    {
      "epoch": 0.0001075439453125,
      "step": 17620,
      "training_step_time": 0.38140249252319336
    },
    {
      "epoch": 0.000107550048828125,
      "model_forward_time": 0.11460518836975098,
      "step": 17621
    },
    {
      "epoch": 0.000107550048828125,
      "step": 17621,
      "training_step_time": 0.38885951042175293
    },
    {
      "epoch": 0.00010755615234375,
      "model_forward_time": 0.11479759216308594,
      "step": 17622
    },
    {
      "epoch": 0.00010755615234375,
      "step": 17622,
      "training_step_time": 0.43689894676208496
    },
    {
      "epoch": 0.000107562255859375,
      "model_forward_time": 0.11497616767883301,
      "step": 17623
    },
    {
      "epoch": 0.000107562255859375,
      "step": 17623,
      "training_step_time": 0.4091300964355469
    },
    {
      "epoch": 0.000107568359375,
      "model_forward_time": 0.11539840698242188,
      "step": 17624
    },
    {
      "epoch": 0.000107568359375,
      "step": 17624,
      "training_step_time": 0.3856546878814697
    },
    {
      "epoch": 0.000107574462890625,
      "model_forward_time": 0.11454391479492188,
      "step": 17625
    },
    {
      "epoch": 0.000107574462890625,
      "step": 17625,
      "training_step_time": 0.383192777633667
    },
    {
      "epoch": 0.00010758056640625,
      "model_forward_time": 0.1149752140045166,
      "step": 17626
    },
    {
      "epoch": 0.00010758056640625,
      "step": 17626,
      "training_step_time": 0.39917898178100586
    },
    {
      "epoch": 0.000107586669921875,
      "model_forward_time": 0.11487388610839844,
      "step": 17627
    },
    {
      "epoch": 0.000107586669921875,
      "step": 17627,
      "training_step_time": 0.3862309455871582
    },
    {
      "epoch": 0.0001075927734375,
      "model_forward_time": 0.1153874397277832,
      "step": 17628
    },
    {
      "epoch": 0.0001075927734375,
      "step": 17628,
      "training_step_time": 0.4107635021209717
    },
    {
      "epoch": 0.000107598876953125,
      "model_forward_time": 0.11506152153015137,
      "step": 17629
    },
    {
      "epoch": 0.000107598876953125,
      "step": 17629,
      "training_step_time": 0.3979940414428711
    },
    {
      "epoch": 0.00010760498046875,
      "grad_norm": 0.13815858960151672,
      "learning_rate": 8.460715869352035e-05,
      "loss": 0.0518,
      "step": 17630
    },
    {
      "epoch": 0.00010760498046875,
      "model_forward_time": 0.11697649955749512,
      "step": 17630
    },
    {
      "epoch": 0.00010760498046875,
      "step": 17630,
      "training_step_time": 0.39028334617614746
    },
    {
      "epoch": 0.000107611083984375,
      "model_forward_time": 0.11473345756530762,
      "step": 17631
    },
    {
      "epoch": 0.000107611083984375,
      "step": 17631,
      "training_step_time": 0.42419958114624023
    },
    {
      "epoch": 0.0001076171875,
      "model_forward_time": 0.11546015739440918,
      "step": 17632
    },
    {
      "epoch": 0.0001076171875,
      "step": 17632,
      "training_step_time": 0.3961465358734131
    },
    {
      "epoch": 0.000107623291015625,
      "model_forward_time": 0.11527228355407715,
      "step": 17633
    },
    {
      "epoch": 0.000107623291015625,
      "step": 17633,
      "training_step_time": 0.47241926193237305
    },
    {
      "epoch": 0.00010762939453125,
      "model_forward_time": 0.11454916000366211,
      "step": 17634
    },
    {
      "epoch": 0.00010762939453125,
      "step": 17634,
      "training_step_time": 0.4358694553375244
    },
    {
      "epoch": 0.000107635498046875,
      "model_forward_time": 0.11546158790588379,
      "step": 17635
    },
    {
      "epoch": 0.000107635498046875,
      "step": 17635,
      "training_step_time": 0.39489078521728516
    },
    {
      "epoch": 0.0001076416015625,
      "model_forward_time": 0.11488199234008789,
      "step": 17636
    },
    {
      "epoch": 0.0001076416015625,
      "step": 17636,
      "training_step_time": 0.4438650608062744
    },
    {
      "epoch": 0.000107647705078125,
      "model_forward_time": 0.11560416221618652,
      "step": 17637
    },
    {
      "epoch": 0.000107647705078125,
      "step": 17637,
      "training_step_time": 0.3984110355377197
    },
    {
      "epoch": 0.00010765380859375,
      "model_forward_time": 0.11505007743835449,
      "step": 17638
    },
    {
      "epoch": 0.00010765380859375,
      "step": 17638,
      "training_step_time": 0.4117095470428467
    },
    {
      "epoch": 0.000107659912109375,
      "model_forward_time": 0.11504268646240234,
      "step": 17639
    },
    {
      "epoch": 0.000107659912109375,
      "step": 17639,
      "training_step_time": 0.40212273597717285
    },
    {
      "epoch": 0.000107666015625,
      "grad_norm": 0.1247388944029808,
      "learning_rate": 8.458726329227747e-05,
      "loss": 0.0545,
      "step": 17640
    },
    {
      "epoch": 0.000107666015625,
      "model_forward_time": 0.11513066291809082,
      "step": 17640
    },
    {
      "epoch": 0.000107666015625,
      "step": 17640,
      "training_step_time": 0.4298057556152344
    },
    {
      "epoch": 0.000107672119140625,
      "model_forward_time": 0.11455225944519043,
      "step": 17641
    },
    {
      "epoch": 0.000107672119140625,
      "step": 17641,
      "training_step_time": 0.4077785015106201
    },
    {
      "epoch": 0.00010767822265625,
      "model_forward_time": 0.11453008651733398,
      "step": 17642
    },
    {
      "epoch": 0.00010767822265625,
      "step": 17642,
      "training_step_time": 0.4373745918273926
    },
    {
      "epoch": 0.000107684326171875,
      "model_forward_time": 0.11475086212158203,
      "step": 17643
    },
    {
      "epoch": 0.000107684326171875,
      "step": 17643,
      "training_step_time": 0.3889603614807129
    },
    {
      "epoch": 0.0001076904296875,
      "model_forward_time": 0.1153261661529541,
      "step": 17644
    },
    {
      "epoch": 0.0001076904296875,
      "step": 17644,
      "training_step_time": 0.39110660552978516
    },
    {
      "epoch": 0.000107696533203125,
      "model_forward_time": 0.11499500274658203,
      "step": 17645
    },
    {
      "epoch": 0.000107696533203125,
      "step": 17645,
      "training_step_time": 0.38860607147216797
    },
    {
      "epoch": 0.00010770263671875,
      "model_forward_time": 0.11564350128173828,
      "step": 17646
    },
    {
      "epoch": 0.00010770263671875,
      "step": 17646,
      "training_step_time": 0.4859805107116699
    },
    {
      "epoch": 0.000107708740234375,
      "model_forward_time": 0.11493277549743652,
      "step": 17647
    },
    {
      "epoch": 0.000107708740234375,
      "step": 17647,
      "training_step_time": 0.4687352180480957
    },
    {
      "epoch": 0.00010771484375,
      "model_forward_time": 0.11570501327514648,
      "step": 17648
    },
    {
      "epoch": 0.00010771484375,
      "step": 17648,
      "training_step_time": 0.4978811740875244
    },
    {
      "epoch": 0.000107720947265625,
      "model_forward_time": 0.11474204063415527,
      "step": 17649
    },
    {
      "epoch": 0.000107720947265625,
      "step": 17649,
      "training_step_time": 0.42935752868652344
    },
    {
      "epoch": 0.00010772705078125,
      "grad_norm": 0.13789650797843933,
      "learning_rate": 8.456735738433723e-05,
      "loss": 0.052,
      "step": 17650
    },
    {
      "epoch": 0.00010772705078125,
      "model_forward_time": 0.11452102661132812,
      "step": 17650
    },
    {
      "epoch": 0.00010772705078125,
      "step": 17650,
      "training_step_time": 0.47310924530029297
    },
    {
      "epoch": 0.000107733154296875,
      "model_forward_time": 0.11395645141601562,
      "step": 17651
    },
    {
      "epoch": 0.000107733154296875,
      "step": 17651,
      "training_step_time": 0.37691259384155273
    },
    {
      "epoch": 0.0001077392578125,
      "model_forward_time": 0.11491012573242188,
      "step": 17652
    },
    {
      "epoch": 0.0001077392578125,
      "step": 17652,
      "training_step_time": 0.39713168144226074
    },
    {
      "epoch": 0.000107745361328125,
      "model_forward_time": 0.11581969261169434,
      "step": 17653
    },
    {
      "epoch": 0.000107745361328125,
      "step": 17653,
      "training_step_time": 0.39384007453918457
    },
    {
      "epoch": 0.00010775146484375,
      "model_forward_time": 0.11525535583496094,
      "step": 17654
    },
    {
      "epoch": 0.00010775146484375,
      "step": 17654,
      "training_step_time": 0.3868081569671631
    },
    {
      "epoch": 0.000107757568359375,
      "model_forward_time": 0.11494565010070801,
      "step": 17655
    },
    {
      "epoch": 0.000107757568359375,
      "step": 17655,
      "training_step_time": 0.41861844062805176
    },
    {
      "epoch": 0.000107763671875,
      "model_forward_time": 0.11507749557495117,
      "step": 17656
    },
    {
      "epoch": 0.000107763671875,
      "step": 17656,
      "training_step_time": 0.3880045413970947
    },
    {
      "epoch": 0.000107769775390625,
      "model_forward_time": 0.11532378196716309,
      "step": 17657
    },
    {
      "epoch": 0.000107769775390625,
      "step": 17657,
      "training_step_time": 0.4111495018005371
    },
    {
      "epoch": 0.00010777587890625,
      "model_forward_time": 0.11577129364013672,
      "step": 17658
    },
    {
      "epoch": 0.00010777587890625,
      "step": 17658,
      "training_step_time": 0.3884005546569824
    },
    {
      "epoch": 0.000107781982421875,
      "model_forward_time": 0.11555242538452148,
      "step": 17659
    },
    {
      "epoch": 0.000107781982421875,
      "step": 17659,
      "training_step_time": 0.4052743911743164
    },
    {
      "epoch": 0.0001077880859375,
      "grad_norm": 0.20179864764213562,
      "learning_rate": 8.454744097574652e-05,
      "loss": 0.0526,
      "step": 17660
    },
    {
      "epoch": 0.0001077880859375,
      "model_forward_time": 0.11461949348449707,
      "step": 17660
    },
    {
      "epoch": 0.0001077880859375,
      "step": 17660,
      "training_step_time": 0.40474534034729004
    },
    {
      "epoch": 0.000107794189453125,
      "model_forward_time": 0.11486291885375977,
      "step": 17661
    },
    {
      "epoch": 0.000107794189453125,
      "step": 17661,
      "training_step_time": 0.48574328422546387
    },
    {
      "epoch": 0.00010780029296875,
      "model_forward_time": 0.11544466018676758,
      "step": 17662
    },
    {
      "epoch": 0.00010780029296875,
      "step": 17662,
      "training_step_time": 0.44052767753601074
    },
    {
      "epoch": 0.000107806396484375,
      "model_forward_time": 0.11589860916137695,
      "step": 17663
    },
    {
      "epoch": 0.000107806396484375,
      "step": 17663,
      "training_step_time": 0.4742269515991211
    },
    {
      "epoch": 0.0001078125,
      "model_forward_time": 0.11559057235717773,
      "step": 17664
    },
    {
      "epoch": 0.0001078125,
      "step": 17664,
      "training_step_time": 0.4314570426940918
    },
    {
      "epoch": 0.000107818603515625,
      "model_forward_time": 0.1150660514831543,
      "step": 17665
    },
    {
      "epoch": 0.000107818603515625,
      "step": 17665,
      "training_step_time": 0.3934357166290283
    },
    {
      "epoch": 0.00010782470703125,
      "model_forward_time": 0.1144552230834961,
      "step": 17666
    },
    {
      "epoch": 0.00010782470703125,
      "step": 17666,
      "training_step_time": 0.4005122184753418
    },
    {
      "epoch": 0.000107830810546875,
      "model_forward_time": 0.11529111862182617,
      "step": 17667
    },
    {
      "epoch": 0.000107830810546875,
      "step": 17667,
      "training_step_time": 0.388277530670166
    },
    {
      "epoch": 0.0001078369140625,
      "model_forward_time": 0.11473250389099121,
      "step": 17668
    },
    {
      "epoch": 0.0001078369140625,
      "step": 17668,
      "training_step_time": 0.38416290283203125
    },
    {
      "epoch": 0.000107843017578125,
      "model_forward_time": 0.11493039131164551,
      "step": 17669
    },
    {
      "epoch": 0.000107843017578125,
      "step": 17669,
      "training_step_time": 0.3949282169342041
    },
    {
      "epoch": 0.00010784912109375,
      "grad_norm": 0.15112674236297607,
      "learning_rate": 8.452751407255541e-05,
      "loss": 0.0584,
      "step": 17670
    },
    {
      "epoch": 0.00010784912109375,
      "model_forward_time": 0.11542344093322754,
      "step": 17670
    },
    {
      "epoch": 0.00010784912109375,
      "step": 17670,
      "training_step_time": 0.3926541805267334
    },
    {
      "epoch": 0.000107855224609375,
      "model_forward_time": 0.11545228958129883,
      "step": 17671
    },
    {
      "epoch": 0.000107855224609375,
      "step": 17671,
      "training_step_time": 0.4099886417388916
    },
    {
      "epoch": 0.000107861328125,
      "model_forward_time": 0.11547994613647461,
      "step": 17672
    },
    {
      "epoch": 0.000107861328125,
      "step": 17672,
      "training_step_time": 0.4777994155883789
    },
    {
      "epoch": 0.000107867431640625,
      "model_forward_time": 0.11448216438293457,
      "step": 17673
    },
    {
      "epoch": 0.000107867431640625,
      "step": 17673,
      "training_step_time": 0.38623476028442383
    },
    {
      "epoch": 0.00010787353515625,
      "model_forward_time": 0.11510467529296875,
      "step": 17674
    },
    {
      "epoch": 0.00010787353515625,
      "step": 17674,
      "training_step_time": 0.3936631679534912
    },
    {
      "epoch": 0.000107879638671875,
      "model_forward_time": 0.11492156982421875,
      "step": 17675
    },
    {
      "epoch": 0.000107879638671875,
      "step": 17675,
      "training_step_time": 0.3671736717224121
    },
    {
      "epoch": 0.0001078857421875,
      "model_forward_time": 0.11579680442810059,
      "step": 17676
    },
    {
      "epoch": 0.0001078857421875,
      "step": 17676,
      "training_step_time": 0.41022443771362305
    },
    {
      "epoch": 0.000107891845703125,
      "model_forward_time": 0.11544942855834961,
      "step": 17677
    },
    {
      "epoch": 0.000107891845703125,
      "step": 17677,
      "training_step_time": 0.47231316566467285
    },
    {
      "epoch": 0.00010789794921875,
      "model_forward_time": 0.11487483978271484,
      "step": 17678
    },
    {
      "epoch": 0.00010789794921875,
      "step": 17678,
      "training_step_time": 0.3944823741912842
    },
    {
      "epoch": 0.000107904052734375,
      "model_forward_time": 0.11574435234069824,
      "step": 17679
    },
    {
      "epoch": 0.000107904052734375,
      "step": 17679,
      "training_step_time": 0.3915061950683594
    },
    {
      "epoch": 0.00010791015625,
      "grad_norm": 0.1631401628255844,
      "learning_rate": 8.450757668081716e-05,
      "loss": 0.0499,
      "step": 17680
    },
    {
      "epoch": 0.00010791015625,
      "model_forward_time": 0.11493206024169922,
      "step": 17680
    },
    {
      "epoch": 0.00010791015625,
      "step": 17680,
      "training_step_time": 0.3915591239929199
    },
    {
      "epoch": 0.000107916259765625,
      "model_forward_time": 0.11562132835388184,
      "step": 17681
    },
    {
      "epoch": 0.000107916259765625,
      "step": 17681,
      "training_step_time": 0.38356518745422363
    },
    {
      "epoch": 0.00010792236328125,
      "model_forward_time": 0.11523151397705078,
      "step": 17682
    },
    {
      "epoch": 0.00010792236328125,
      "step": 17682,
      "training_step_time": 0.38367128372192383
    },
    {
      "epoch": 0.000107928466796875,
      "model_forward_time": 0.1147921085357666,
      "step": 17683
    },
    {
      "epoch": 0.000107928466796875,
      "step": 17683,
      "training_step_time": 0.3948538303375244
    },
    {
      "epoch": 0.0001079345703125,
      "model_forward_time": 0.11544656753540039,
      "step": 17684
    },
    {
      "epoch": 0.0001079345703125,
      "step": 17684,
      "training_step_time": 0.4625101089477539
    },
    {
      "epoch": 0.000107940673828125,
      "model_forward_time": 0.11526131629943848,
      "step": 17685
    },
    {
      "epoch": 0.000107940673828125,
      "step": 17685,
      "training_step_time": 0.4633355140686035
    },
    {
      "epoch": 0.00010794677734375,
      "model_forward_time": 0.1147153377532959,
      "step": 17686
    },
    {
      "epoch": 0.00010794677734375,
      "step": 17686,
      "training_step_time": 0.4567070007324219
    },
    {
      "epoch": 0.000107952880859375,
      "model_forward_time": 0.11551165580749512,
      "step": 17687
    },
    {
      "epoch": 0.000107952880859375,
      "step": 17687,
      "training_step_time": 0.39428234100341797
    },
    {
      "epoch": 0.000107958984375,
      "model_forward_time": 0.11511421203613281,
      "step": 17688
    },
    {
      "epoch": 0.000107958984375,
      "step": 17688,
      "training_step_time": 0.3911740779876709
    },
    {
      "epoch": 0.000107965087890625,
      "model_forward_time": 0.11472606658935547,
      "step": 17689
    },
    {
      "epoch": 0.000107965087890625,
      "step": 17689,
      "training_step_time": 0.41165852546691895
    },
    {
      "epoch": 0.00010797119140625,
      "grad_norm": 0.20576678216457367,
      "learning_rate": 8.448762880658825e-05,
      "loss": 0.0478,
      "step": 17690
    },
    {
      "epoch": 0.00010797119140625,
      "model_forward_time": 0.11603569984436035,
      "step": 17690
    },
    {
      "epoch": 0.00010797119140625,
      "step": 17690,
      "training_step_time": 0.4943246841430664
    },
    {
      "epoch": 0.000107977294921875,
      "model_forward_time": 0.11509084701538086,
      "step": 17691
    },
    {
      "epoch": 0.000107977294921875,
      "step": 17691,
      "training_step_time": 0.451556921005249
    },
    {
      "epoch": 0.0001079833984375,
      "model_forward_time": 0.11659026145935059,
      "step": 17692
    },
    {
      "epoch": 0.0001079833984375,
      "step": 17692,
      "training_step_time": 0.5119748115539551
    },
    {
      "epoch": 0.000107989501953125,
      "model_forward_time": 0.11477899551391602,
      "step": 17693
    },
    {
      "epoch": 0.000107989501953125,
      "step": 17693,
      "training_step_time": 0.3899657726287842
    },
    {
      "epoch": 0.00010799560546875,
      "model_forward_time": 0.11468195915222168,
      "step": 17694
    },
    {
      "epoch": 0.00010799560546875,
      "step": 17694,
      "training_step_time": 0.3950057029724121
    },
    {
      "epoch": 0.000108001708984375,
      "model_forward_time": 0.11470293998718262,
      "step": 17695
    },
    {
      "epoch": 0.000108001708984375,
      "step": 17695,
      "training_step_time": 0.3951137065887451
    },
    {
      "epoch": 0.0001080078125,
      "model_forward_time": 0.11519074440002441,
      "step": 17696
    },
    {
      "epoch": 0.0001080078125,
      "step": 17696,
      "training_step_time": 0.39136576652526855
    },
    {
      "epoch": 0.000108013916015625,
      "model_forward_time": 0.11488580703735352,
      "step": 17697
    },
    {
      "epoch": 0.000108013916015625,
      "step": 17697,
      "training_step_time": 0.38593387603759766
    },
    {
      "epoch": 0.00010802001953125,
      "model_forward_time": 0.11498188972473145,
      "step": 17698
    },
    {
      "epoch": 0.00010802001953125,
      "step": 17698,
      "training_step_time": 0.39246129989624023
    },
    {
      "epoch": 0.000108026123046875,
      "model_forward_time": 0.11547732353210449,
      "step": 17699
    },
    {
      "epoch": 0.000108026123046875,
      "step": 17699,
      "training_step_time": 0.41010117530822754
    },
    {
      "epoch": 0.0001080322265625,
      "grad_norm": 0.21008938550949097,
      "learning_rate": 8.44676704559283e-05,
      "loss": 0.0622,
      "step": 17700
    },
    {
      "epoch": 0.0001080322265625,
      "model_forward_time": 0.11523222923278809,
      "step": 17700
    },
    {
      "epoch": 0.0001080322265625,
      "step": 17700,
      "training_step_time": 0.415679931640625
    },
    {
      "epoch": 0.000108038330078125,
      "model_forward_time": 0.11489176750183105,
      "step": 17701
    },
    {
      "epoch": 0.000108038330078125,
      "step": 17701,
      "training_step_time": 0.45134735107421875
    },
    {
      "epoch": 0.00010804443359375,
      "model_forward_time": 0.11569595336914062,
      "step": 17702
    },
    {
      "epoch": 0.00010804443359375,
      "step": 17702,
      "training_step_time": 0.39437007904052734
    },
    {
      "epoch": 0.000108050537109375,
      "model_forward_time": 0.11581635475158691,
      "step": 17703
    },
    {
      "epoch": 0.000108050537109375,
      "step": 17703,
      "training_step_time": 0.3953268527984619
    },
    {
      "epoch": 0.000108056640625,
      "model_forward_time": 0.1145620346069336,
      "step": 17704
    },
    {
      "epoch": 0.000108056640625,
      "step": 17704,
      "training_step_time": 0.39072608947753906
    },
    {
      "epoch": 0.000108062744140625,
      "model_forward_time": 0.11484217643737793,
      "step": 17705
    },
    {
      "epoch": 0.000108062744140625,
      "step": 17705,
      "training_step_time": 0.4669671058654785
    },
    {
      "epoch": 0.00010806884765625,
      "model_forward_time": 0.11490297317504883,
      "step": 17706
    },
    {
      "epoch": 0.00010806884765625,
      "step": 17706,
      "training_step_time": 0.4704611301422119
    },
    {
      "epoch": 0.000108074951171875,
      "model_forward_time": 0.1153252124786377,
      "step": 17707
    },
    {
      "epoch": 0.000108074951171875,
      "step": 17707,
      "training_step_time": 0.49577808380126953
    },
    {
      "epoch": 0.0001080810546875,
      "model_forward_time": 0.1155250072479248,
      "step": 17708
    },
    {
      "epoch": 0.0001080810546875,
      "step": 17708,
      "training_step_time": 0.394331693649292
    },
    {
      "epoch": 0.000108087158203125,
      "model_forward_time": 0.11498141288757324,
      "step": 17709
    },
    {
      "epoch": 0.000108087158203125,
      "step": 17709,
      "training_step_time": 0.3918631076812744
    },
    {
      "epoch": 0.00010809326171875,
      "grad_norm": 0.144412562251091,
      "learning_rate": 8.444770163490012e-05,
      "loss": 0.0498,
      "step": 17710
    },
    {
      "epoch": 0.00010809326171875,
      "model_forward_time": 0.11490631103515625,
      "step": 17710
    },
    {
      "epoch": 0.00010809326171875,
      "step": 17710,
      "training_step_time": 0.38016200065612793
    },
    {
      "epoch": 0.000108099365234375,
      "model_forward_time": 0.11489367485046387,
      "step": 17711
    },
    {
      "epoch": 0.000108099365234375,
      "step": 17711,
      "training_step_time": 0.3909595012664795
    },
    {
      "epoch": 0.00010810546875,
      "model_forward_time": 0.11515998840332031,
      "step": 17712
    },
    {
      "epoch": 0.00010810546875,
      "step": 17712,
      "training_step_time": 0.3940308094024658
    },
    {
      "epoch": 0.000108111572265625,
      "model_forward_time": 0.11490774154663086,
      "step": 17713
    },
    {
      "epoch": 0.000108111572265625,
      "step": 17713,
      "training_step_time": 0.39488887786865234
    },
    {
      "epoch": 0.00010811767578125,
      "model_forward_time": 0.11622881889343262,
      "step": 17714
    },
    {
      "epoch": 0.00010811767578125,
      "step": 17714,
      "training_step_time": 0.47442126274108887
    },
    {
      "epoch": 0.000108123779296875,
      "model_forward_time": 0.11572265625,
      "step": 17715
    },
    {
      "epoch": 0.000108123779296875,
      "step": 17715,
      "training_step_time": 0.47251176834106445
    },
    {
      "epoch": 0.0001081298828125,
      "model_forward_time": 0.11591696739196777,
      "step": 17716
    },
    {
      "epoch": 0.0001081298828125,
      "step": 17716,
      "training_step_time": 0.39074015617370605
    },
    {
      "epoch": 0.000108135986328125,
      "model_forward_time": 0.11566662788391113,
      "step": 17717
    },
    {
      "epoch": 0.000108135986328125,
      "step": 17717,
      "training_step_time": 0.3881509304046631
    },
    {
      "epoch": 0.00010814208984375,
      "model_forward_time": 0.1159665584564209,
      "step": 17718
    },
    {
      "epoch": 0.00010814208984375,
      "step": 17718,
      "training_step_time": 0.43841099739074707
    },
    {
      "epoch": 0.000108148193359375,
      "model_forward_time": 0.11515927314758301,
      "step": 17719
    },
    {
      "epoch": 0.000108148193359375,
      "step": 17719,
      "training_step_time": 0.4092092514038086
    },
    {
      "epoch": 0.000108154296875,
      "grad_norm": 0.23542620241641998,
      "learning_rate": 8.442772234956972e-05,
      "loss": 0.0538,
      "step": 17720
    },
    {
      "epoch": 0.000108154296875,
      "model_forward_time": 0.11518239974975586,
      "step": 17720
    },
    {
      "epoch": 0.000108154296875,
      "step": 17720,
      "training_step_time": 0.46234750747680664
    },
    {
      "epoch": 0.000108160400390625,
      "model_forward_time": 0.11476492881774902,
      "step": 17721
    },
    {
      "epoch": 0.000108160400390625,
      "step": 17721,
      "training_step_time": 0.42420506477355957
    },
    {
      "epoch": 0.00010816650390625,
      "model_forward_time": 0.11493563652038574,
      "step": 17722
    },
    {
      "epoch": 0.00010816650390625,
      "step": 17722,
      "training_step_time": 0.41259145736694336
    },
    {
      "epoch": 0.000108172607421875,
      "model_forward_time": 0.11439275741577148,
      "step": 17723
    },
    {
      "epoch": 0.000108172607421875,
      "step": 17723,
      "training_step_time": 0.38521671295166016
    },
    {
      "epoch": 0.0001081787109375,
      "model_forward_time": 0.1145474910736084,
      "step": 17724
    },
    {
      "epoch": 0.0001081787109375,
      "step": 17724,
      "training_step_time": 0.3923683166503906
    },
    {
      "epoch": 0.000108184814453125,
      "model_forward_time": 0.11506819725036621,
      "step": 17725
    },
    {
      "epoch": 0.000108184814453125,
      "step": 17725,
      "training_step_time": 0.39658403396606445
    },
    {
      "epoch": 0.00010819091796875,
      "model_forward_time": 0.11493611335754395,
      "step": 17726
    },
    {
      "epoch": 0.00010819091796875,
      "step": 17726,
      "training_step_time": 0.3995780944824219
    },
    {
      "epoch": 0.000108197021484375,
      "model_forward_time": 0.11603355407714844,
      "step": 17727
    },
    {
      "epoch": 0.000108197021484375,
      "step": 17727,
      "training_step_time": 0.3899686336517334
    },
    {
      "epoch": 0.000108203125,
      "model_forward_time": 0.11439800262451172,
      "step": 17728
    },
    {
      "epoch": 0.000108203125,
      "step": 17728,
      "training_step_time": 0.39548182487487793
    },
    {
      "epoch": 0.000108209228515625,
      "model_forward_time": 0.11554312705993652,
      "step": 17729
    },
    {
      "epoch": 0.000108209228515625,
      "step": 17729,
      "training_step_time": 0.48639726638793945
    },
    {
      "epoch": 0.00010821533203125,
      "grad_norm": 0.20700906217098236,
      "learning_rate": 8.44077326060063e-05,
      "loss": 0.0546,
      "step": 17730
    },
    {
      "epoch": 0.00010821533203125,
      "model_forward_time": 0.11511087417602539,
      "step": 17730
    },
    {
      "epoch": 0.00010821533203125,
      "step": 17730,
      "training_step_time": 0.45399999618530273
    },
    {
      "epoch": 0.000108221435546875,
      "model_forward_time": 0.11422443389892578,
      "step": 17731
    },
    {
      "epoch": 0.000108221435546875,
      "step": 17731,
      "training_step_time": 0.3965578079223633
    },
    {
      "epoch": 0.0001082275390625,
      "model_forward_time": 0.11546516418457031,
      "step": 17732
    },
    {
      "epoch": 0.0001082275390625,
      "step": 17732,
      "training_step_time": 0.6290791034698486
    },
    {
      "epoch": 0.000108233642578125,
      "model_forward_time": 0.11477851867675781,
      "step": 17733
    },
    {
      "epoch": 0.000108233642578125,
      "step": 17733,
      "training_step_time": 0.4212629795074463
    },
    {
      "epoch": 0.00010823974609375,
      "model_forward_time": 0.11424541473388672,
      "step": 17734
    },
    {
      "epoch": 0.00010823974609375,
      "step": 17734,
      "training_step_time": 0.46066975593566895
    },
    {
      "epoch": 0.000108245849609375,
      "model_forward_time": 0.11421728134155273,
      "step": 17735
    },
    {
      "epoch": 0.000108245849609375,
      "step": 17735,
      "training_step_time": 0.43128490447998047
    },
    {
      "epoch": 0.000108251953125,
      "model_forward_time": 0.11457610130310059,
      "step": 17736
    },
    {
      "epoch": 0.000108251953125,
      "step": 17736,
      "training_step_time": 0.4037308692932129
    },
    {
      "epoch": 0.000108258056640625,
      "model_forward_time": 0.1143486499786377,
      "step": 17737
    },
    {
      "epoch": 0.000108258056640625,
      "step": 17737,
      "training_step_time": 0.40651535987854004
    },
    {
      "epoch": 0.00010826416015625,
      "model_forward_time": 0.11534857749938965,
      "step": 17738
    },
    {
      "epoch": 0.00010826416015625,
      "step": 17738,
      "training_step_time": 0.40596580505371094
    },
    {
      "epoch": 0.000108270263671875,
      "model_forward_time": 0.1149606704711914,
      "step": 17739
    },
    {
      "epoch": 0.000108270263671875,
      "step": 17739,
      "training_step_time": 0.3883020877838135
    },
    {
      "epoch": 0.0001082763671875,
      "grad_norm": 0.14464151859283447,
      "learning_rate": 8.438773241028219e-05,
      "loss": 0.0534,
      "step": 17740
    },
    {
      "epoch": 0.0001082763671875,
      "model_forward_time": 0.1144258975982666,
      "step": 17740
    },
    {
      "epoch": 0.0001082763671875,
      "step": 17740,
      "training_step_time": 0.3897402286529541
    },
    {
      "epoch": 0.000108282470703125,
      "model_forward_time": 0.11547970771789551,
      "step": 17741
    },
    {
      "epoch": 0.000108282470703125,
      "step": 17741,
      "training_step_time": 0.38588404655456543
    },
    {
      "epoch": 0.00010828857421875,
      "model_forward_time": 0.11548829078674316,
      "step": 17742
    },
    {
      "epoch": 0.00010828857421875,
      "step": 17742,
      "training_step_time": 0.3983194828033447
    },
    {
      "epoch": 0.000108294677734375,
      "model_forward_time": 0.1152961254119873,
      "step": 17743
    },
    {
      "epoch": 0.000108294677734375,
      "step": 17743,
      "training_step_time": 0.4516441822052002
    },
    {
      "epoch": 0.00010830078125,
      "model_forward_time": 0.11527180671691895,
      "step": 17744
    },
    {
      "epoch": 0.00010830078125,
      "step": 17744,
      "training_step_time": 0.661513090133667
    },
    {
      "epoch": 0.000108306884765625,
      "model_forward_time": 0.11498308181762695,
      "step": 17745
    },
    {
      "epoch": 0.000108306884765625,
      "step": 17745,
      "training_step_time": 0.4170265197753906
    },
    {
      "epoch": 0.00010831298828125,
      "model_forward_time": 0.11447024345397949,
      "step": 17746
    },
    {
      "epoch": 0.00010831298828125,
      "step": 17746,
      "training_step_time": 0.40282750129699707
    },
    {
      "epoch": 0.000108319091796875,
      "model_forward_time": 0.11474180221557617,
      "step": 17747
    },
    {
      "epoch": 0.000108319091796875,
      "step": 17747,
      "training_step_time": 0.3861734867095947
    },
    {
      "epoch": 0.0001083251953125,
      "model_forward_time": 0.11466717720031738,
      "step": 17748
    },
    {
      "epoch": 0.0001083251953125,
      "step": 17748,
      "training_step_time": 0.46062564849853516
    },
    {
      "epoch": 0.000108331298828125,
      "model_forward_time": 0.11473202705383301,
      "step": 17749
    },
    {
      "epoch": 0.000108331298828125,
      "step": 17749,
      "training_step_time": 0.4520101547241211
    },
    {
      "epoch": 0.00010833740234375,
      "grad_norm": 0.13870131969451904,
      "learning_rate": 8.436772176847294e-05,
      "loss": 0.0466,
      "step": 17750
    },
    {
      "epoch": 0.00010833740234375,
      "model_forward_time": 0.11548328399658203,
      "step": 17750
    },
    {
      "epoch": 0.00010833740234375,
      "step": 17750,
      "training_step_time": 0.5634360313415527
    },
    {
      "epoch": 0.000108343505859375,
      "model_forward_time": 0.11486697196960449,
      "step": 17751
    },
    {
      "epoch": 0.000108343505859375,
      "step": 17751,
      "training_step_time": 0.39315009117126465
    },
    {
      "epoch": 0.000108349609375,
      "model_forward_time": 0.11390829086303711,
      "step": 17752
    },
    {
      "epoch": 0.000108349609375,
      "step": 17752,
      "training_step_time": 0.38494229316711426
    },
    {
      "epoch": 0.000108355712890625,
      "model_forward_time": 0.11431574821472168,
      "step": 17753
    },
    {
      "epoch": 0.000108355712890625,
      "step": 17753,
      "training_step_time": 0.39161038398742676
    },
    {
      "epoch": 0.00010836181640625,
      "model_forward_time": 0.11516833305358887,
      "step": 17754
    },
    {
      "epoch": 0.00010836181640625,
      "step": 17754,
      "training_step_time": 0.38899779319763184
    },
    {
      "epoch": 0.000108367919921875,
      "model_forward_time": 0.11476373672485352,
      "step": 17755
    },
    {
      "epoch": 0.000108367919921875,
      "step": 17755,
      "training_step_time": 0.37854480743408203
    },
    {
      "epoch": 0.0001083740234375,
      "model_forward_time": 0.1148076057434082,
      "step": 17756
    },
    {
      "epoch": 0.0001083740234375,
      "step": 17756,
      "training_step_time": 0.7804129123687744
    },
    {
      "epoch": 0.000108380126953125,
      "model_forward_time": 0.1146249771118164,
      "step": 17757
    },
    {
      "epoch": 0.000108380126953125,
      "step": 17757,
      "training_step_time": 0.4804866313934326
    },
    {
      "epoch": 0.00010838623046875,
      "model_forward_time": 0.11490082740783691,
      "step": 17758
    },
    {
      "epoch": 0.00010838623046875,
      "step": 17758,
      "training_step_time": 0.41435766220092773
    },
    {
      "epoch": 0.000108392333984375,
      "model_forward_time": 0.11383724212646484,
      "step": 17759
    },
    {
      "epoch": 0.000108392333984375,
      "step": 17759,
      "training_step_time": 0.42138075828552246
    },
    {
      "epoch": 0.0001083984375,
      "grad_norm": 0.1326276808977127,
      "learning_rate": 8.434770068665723e-05,
      "loss": 0.0526,
      "step": 17760
    },
    {
      "epoch": 0.0001083984375,
      "model_forward_time": 0.11543798446655273,
      "step": 17760
    },
    {
      "epoch": 0.0001083984375,
      "step": 17760,
      "training_step_time": 0.377094030380249
    },
    {
      "epoch": 0.000108404541015625,
      "model_forward_time": 0.11397600173950195,
      "step": 17761
    },
    {
      "epoch": 0.000108404541015625,
      "step": 17761,
      "training_step_time": 0.3897080421447754
    },
    {
      "epoch": 0.00010841064453125,
      "model_forward_time": 0.11476325988769531,
      "step": 17762
    },
    {
      "epoch": 0.00010841064453125,
      "step": 17762,
      "training_step_time": 0.5380454063415527
    },
    {
      "epoch": 0.000108416748046875,
      "model_forward_time": 0.11504912376403809,
      "step": 17763
    },
    {
      "epoch": 0.000108416748046875,
      "step": 17763,
      "training_step_time": 0.44742846488952637
    },
    {
      "epoch": 0.0001084228515625,
      "model_forward_time": 0.11524009704589844,
      "step": 17764
    },
    {
      "epoch": 0.0001084228515625,
      "step": 17764,
      "training_step_time": 0.42339038848876953
    },
    {
      "epoch": 0.000108428955078125,
      "model_forward_time": 0.11478519439697266,
      "step": 17765
    },
    {
      "epoch": 0.000108428955078125,
      "step": 17765,
      "training_step_time": 0.39046669006347656
    },
    {
      "epoch": 0.00010843505859375,
      "model_forward_time": 0.11499524116516113,
      "step": 17766
    },
    {
      "epoch": 0.00010843505859375,
      "step": 17766,
      "training_step_time": 0.3928563594818115
    },
    {
      "epoch": 0.000108441162109375,
      "model_forward_time": 0.1149287223815918,
      "step": 17767
    },
    {
      "epoch": 0.000108441162109375,
      "step": 17767,
      "training_step_time": 0.3912007808685303
    },
    {
      "epoch": 0.000108447265625,
      "model_forward_time": 0.11532092094421387,
      "step": 17768
    },
    {
      "epoch": 0.000108447265625,
      "step": 17768,
      "training_step_time": 0.734438419342041
    },
    {
      "epoch": 0.000108453369140625,
      "model_forward_time": 0.11484694480895996,
      "step": 17769
    },
    {
      "epoch": 0.000108453369140625,
      "step": 17769,
      "training_step_time": 0.39348840713500977
    },
    {
      "epoch": 0.00010845947265625,
      "grad_norm": 0.1181492879986763,
      "learning_rate": 8.432766917091694e-05,
      "loss": 0.054,
      "step": 17770
    },
    {
      "epoch": 0.00010845947265625,
      "model_forward_time": 0.1148214340209961,
      "step": 17770
    },
    {
      "epoch": 0.00010845947265625,
      "step": 17770,
      "training_step_time": 0.4148585796356201
    },
    {
      "epoch": 0.000108465576171875,
      "model_forward_time": 0.11446475982666016,
      "step": 17771
    },
    {
      "epoch": 0.000108465576171875,
      "step": 17771,
      "training_step_time": 0.4190647602081299
    },
    {
      "epoch": 0.0001084716796875,
      "model_forward_time": 0.11528635025024414,
      "step": 17772
    },
    {
      "epoch": 0.0001084716796875,
      "step": 17772,
      "training_step_time": 0.4647197723388672
    },
    {
      "epoch": 0.000108477783203125,
      "model_forward_time": 0.11415386199951172,
      "step": 17773
    },
    {
      "epoch": 0.000108477783203125,
      "step": 17773,
      "training_step_time": 0.39846158027648926
    },
    {
      "epoch": 0.00010848388671875,
      "model_forward_time": 0.11474323272705078,
      "step": 17774
    },
    {
      "epoch": 0.00010848388671875,
      "step": 17774,
      "training_step_time": 0.45246458053588867
    },
    {
      "epoch": 0.000108489990234375,
      "model_forward_time": 0.11514115333557129,
      "step": 17775
    },
    {
      "epoch": 0.000108489990234375,
      "step": 17775,
      "training_step_time": 0.3938429355621338
    },
    {
      "epoch": 0.00010849609375,
      "model_forward_time": 0.11578941345214844,
      "step": 17776
    },
    {
      "epoch": 0.00010849609375,
      "step": 17776,
      "training_step_time": 0.42691850662231445
    },
    {
      "epoch": 0.000108502197265625,
      "model_forward_time": 0.11398148536682129,
      "step": 17777
    },
    {
      "epoch": 0.000108502197265625,
      "step": 17777,
      "training_step_time": 0.4841270446777344
    },
    {
      "epoch": 0.00010850830078125,
      "model_forward_time": 0.11488032341003418,
      "step": 17778
    },
    {
      "epoch": 0.00010850830078125,
      "step": 17778,
      "training_step_time": 0.5723600387573242
    },
    {
      "epoch": 0.000108514404296875,
      "model_forward_time": 0.1141347885131836,
      "step": 17779
    },
    {
      "epoch": 0.000108514404296875,
      "step": 17779,
      "training_step_time": 0.394519567489624
    },
    {
      "epoch": 0.0001085205078125,
      "grad_norm": 0.14560946822166443,
      "learning_rate": 8.430762722733714e-05,
      "loss": 0.0516,
      "step": 17780
    },
    {
      "epoch": 0.0001085205078125,
      "model_forward_time": 0.11500096321105957,
      "step": 17780
    },
    {
      "epoch": 0.0001085205078125,
      "step": 17780,
      "training_step_time": 0.39165568351745605
    },
    {
      "epoch": 0.000108526611328125,
      "model_forward_time": 0.11487150192260742,
      "step": 17781
    },
    {
      "epoch": 0.000108526611328125,
      "step": 17781,
      "training_step_time": 0.3860127925872803
    },
    {
      "epoch": 0.00010853271484375,
      "model_forward_time": 0.11452746391296387,
      "step": 17782
    },
    {
      "epoch": 0.00010853271484375,
      "step": 17782,
      "training_step_time": 0.3928370475769043
    },
    {
      "epoch": 0.000108538818359375,
      "model_forward_time": 0.1145479679107666,
      "step": 17783
    },
    {
      "epoch": 0.000108538818359375,
      "step": 17783,
      "training_step_time": 0.3985116481781006
    },
    {
      "epoch": 0.000108544921875,
      "model_forward_time": 0.11496758460998535,
      "step": 17784
    },
    {
      "epoch": 0.000108544921875,
      "step": 17784,
      "training_step_time": 0.44606971740722656
    },
    {
      "epoch": 0.000108551025390625,
      "model_forward_time": 0.11551952362060547,
      "step": 17785
    },
    {
      "epoch": 0.000108551025390625,
      "step": 17785,
      "training_step_time": 0.46188902854919434
    },
    {
      "epoch": 0.00010855712890625,
      "model_forward_time": 0.1148374080657959,
      "step": 17786
    },
    {
      "epoch": 0.00010855712890625,
      "step": 17786,
      "training_step_time": 0.4522702693939209
    },
    {
      "epoch": 0.000108563232421875,
      "model_forward_time": 0.11430001258850098,
      "step": 17787
    },
    {
      "epoch": 0.000108563232421875,
      "step": 17787,
      "training_step_time": 0.3865025043487549
    },
    {
      "epoch": 0.0001085693359375,
      "model_forward_time": 0.11557722091674805,
      "step": 17788
    },
    {
      "epoch": 0.0001085693359375,
      "step": 17788,
      "training_step_time": 0.39099574089050293
    },
    {
      "epoch": 0.000108575439453125,
      "model_forward_time": 0.1145637035369873,
      "step": 17789
    },
    {
      "epoch": 0.000108575439453125,
      "step": 17789,
      "training_step_time": 0.387836217880249
    },
    {
      "epoch": 0.00010858154296875,
      "grad_norm": 0.1506950557231903,
      "learning_rate": 8.428757486200603e-05,
      "loss": 0.0499,
      "step": 17790
    },
    {
      "epoch": 0.00010858154296875,
      "model_forward_time": 0.11487388610839844,
      "step": 17790
    },
    {
      "epoch": 0.00010858154296875,
      "step": 17790,
      "training_step_time": 0.5037479400634766
    },
    {
      "epoch": 0.000108587646484375,
      "model_forward_time": 0.11500787734985352,
      "step": 17791
    },
    {
      "epoch": 0.000108587646484375,
      "step": 17791,
      "training_step_time": 0.4426558017730713
    },
    {
      "epoch": 0.00010859375,
      "model_forward_time": 0.11484885215759277,
      "step": 17792
    },
    {
      "epoch": 0.00010859375,
      "step": 17792,
      "training_step_time": 0.602959394454956
    },
    {
      "epoch": 0.000108599853515625,
      "model_forward_time": 0.11378836631774902,
      "step": 17793
    },
    {
      "epoch": 0.000108599853515625,
      "step": 17793,
      "training_step_time": 0.39720749855041504
    },
    {
      "epoch": 0.00010860595703125,
      "model_forward_time": 0.11492657661437988,
      "step": 17794
    },
    {
      "epoch": 0.00010860595703125,
      "step": 17794,
      "training_step_time": 0.38623952865600586
    },
    {
      "epoch": 0.000108612060546875,
      "model_forward_time": 0.11454081535339355,
      "step": 17795
    },
    {
      "epoch": 0.000108612060546875,
      "step": 17795,
      "training_step_time": 0.3925642967224121
    },
    {
      "epoch": 0.0001086181640625,
      "model_forward_time": 0.11534953117370605,
      "step": 17796
    },
    {
      "epoch": 0.0001086181640625,
      "step": 17796,
      "training_step_time": 0.39059019088745117
    },
    {
      "epoch": 0.000108624267578125,
      "model_forward_time": 0.11504292488098145,
      "step": 17797
    },
    {
      "epoch": 0.000108624267578125,
      "step": 17797,
      "training_step_time": 0.3982851505279541
    },
    {
      "epoch": 0.00010863037109375,
      "model_forward_time": 0.11484789848327637,
      "step": 17798
    },
    {
      "epoch": 0.00010863037109375,
      "step": 17798,
      "training_step_time": 0.6516525745391846
    },
    {
      "epoch": 0.000108636474609375,
      "model_forward_time": 0.11459589004516602,
      "step": 17799
    },
    {
      "epoch": 0.000108636474609375,
      "step": 17799,
      "training_step_time": 0.5698843002319336
    },
    {
      "epoch": 0.000108642578125,
      "grad_norm": 0.16010920703411102,
      "learning_rate": 8.4267512081015e-05,
      "loss": 0.0517,
      "step": 17800
    },
    {
      "epoch": 0.000108642578125,
      "model_forward_time": 0.11480903625488281,
      "step": 17800
    },
    {
      "epoch": 0.000108642578125,
      "step": 17800,
      "training_step_time": 0.5487182140350342
    },
    {
      "epoch": 0.000108648681640625,
      "model_forward_time": 0.11745882034301758,
      "step": 17801
    },
    {
      "epoch": 0.000108648681640625,
      "step": 17801,
      "training_step_time": 0.563624382019043
    },
    {
      "epoch": 0.00010865478515625,
      "model_forward_time": 0.11825084686279297,
      "step": 17802
    },
    {
      "epoch": 0.00010865478515625,
      "step": 17802,
      "training_step_time": 0.5765528678894043
    },
    {
      "epoch": 0.000108660888671875,
      "model_forward_time": 0.11537051200866699,
      "step": 17803
    },
    {
      "epoch": 0.000108660888671875,
      "step": 17803,
      "training_step_time": 0.7296960353851318
    },
    {
      "epoch": 0.0001086669921875,
      "model_forward_time": 0.13247370719909668,
      "step": 17804
    },
    {
      "epoch": 0.0001086669921875,
      "step": 17804,
      "training_step_time": 0.7755141258239746
    },
    {
      "epoch": 0.000108673095703125,
      "model_forward_time": 0.12424039840698242,
      "step": 17805
    },
    {
      "epoch": 0.000108673095703125,
      "step": 17805,
      "training_step_time": 0.7348346710205078
    },
    {
      "epoch": 0.00010867919921875,
      "model_forward_time": 0.1278684139251709,
      "step": 17806
    },
    {
      "epoch": 0.00010867919921875,
      "step": 17806,
      "training_step_time": 0.6907854080200195
    },
    {
      "epoch": 0.000108685302734375,
      "model_forward_time": 0.11919355392456055,
      "step": 17807
    },
    {
      "epoch": 0.000108685302734375,
      "step": 17807,
      "training_step_time": 0.7441792488098145
    },
    {
      "epoch": 0.00010869140625,
      "model_forward_time": 0.11597394943237305,
      "step": 17808
    },
    {
      "epoch": 0.00010869140625,
      "step": 17808,
      "training_step_time": 0.6709139347076416
    },
    {
      "epoch": 0.000108697509765625,
      "model_forward_time": 0.11906123161315918,
      "step": 17809
    },
    {
      "epoch": 0.000108697509765625,
      "step": 17809,
      "training_step_time": 0.6863219738006592
    },
    {
      "epoch": 0.00010870361328125,
      "grad_norm": 0.12583617866039276,
      "learning_rate": 8.424743889045856e-05,
      "loss": 0.0525,
      "step": 17810
    },
    {
      "epoch": 0.00010870361328125,
      "model_forward_time": 0.11908650398254395,
      "step": 17810
    },
    {
      "epoch": 0.00010870361328125,
      "step": 17810,
      "training_step_time": 0.6977081298828125
    },
    {
      "epoch": 0.000108709716796875,
      "model_forward_time": 0.12053394317626953,
      "step": 17811
    },
    {
      "epoch": 0.000108709716796875,
      "step": 17811,
      "training_step_time": 0.6595683097839355
    },
    {
      "epoch": 0.0001087158203125,
      "model_forward_time": 0.1188967227935791,
      "step": 17812
    },
    {
      "epoch": 0.0001087158203125,
      "step": 17812,
      "training_step_time": 0.5829067230224609
    },
    {
      "epoch": 0.000108721923828125,
      "model_forward_time": 0.1194303035736084,
      "step": 17813
    },
    {
      "epoch": 0.000108721923828125,
      "step": 17813,
      "training_step_time": 0.6734869480133057
    },
    {
      "epoch": 0.00010872802734375,
      "model_forward_time": 0.11982560157775879,
      "step": 17814
    },
    {
      "epoch": 0.00010872802734375,
      "step": 17814,
      "training_step_time": 0.6736083030700684
    },
    {
      "epoch": 0.000108734130859375,
      "model_forward_time": 0.11926651000976562,
      "step": 17815
    },
    {
      "epoch": 0.000108734130859375,
      "step": 17815,
      "training_step_time": 0.6603043079376221
    },
    {
      "epoch": 0.000108740234375,
      "model_forward_time": 0.11651253700256348,
      "step": 17816
    },
    {
      "epoch": 0.000108740234375,
      "step": 17816,
      "training_step_time": 0.6715662479400635
    },
    {
      "epoch": 0.000108746337890625,
      "model_forward_time": 0.12863826751708984,
      "step": 17817
    },
    {
      "epoch": 0.000108746337890625,
      "step": 17817,
      "training_step_time": 0.6622521877288818
    },
    {
      "epoch": 0.00010875244140625,
      "model_forward_time": 0.12075090408325195,
      "step": 17818
    },
    {
      "epoch": 0.00010875244140625,
      "step": 17818,
      "training_step_time": 0.6705679893493652
    },
    {
      "epoch": 0.000108758544921875,
      "model_forward_time": 0.12468218803405762,
      "step": 17819
    },
    {
      "epoch": 0.000108758544921875,
      "step": 17819,
      "training_step_time": 0.6728830337524414
    },
    {
      "epoch": 0.0001087646484375,
      "grad_norm": 0.17189165949821472,
      "learning_rate": 8.422735529643444e-05,
      "loss": 0.0567,
      "step": 17820
    },
    {
      "epoch": 0.0001087646484375,
      "model_forward_time": 0.11906647682189941,
      "step": 17820
    },
    {
      "epoch": 0.0001087646484375,
      "step": 17820,
      "training_step_time": 0.6813397407531738
    },
    {
      "epoch": 0.000108770751953125,
      "model_forward_time": 0.11773991584777832,
      "step": 17821
    },
    {
      "epoch": 0.000108770751953125,
      "step": 17821,
      "training_step_time": 0.6490564346313477
    },
    {
      "epoch": 0.00010877685546875,
      "model_forward_time": 0.11990690231323242,
      "step": 17822
    },
    {
      "epoch": 0.00010877685546875,
      "step": 17822,
      "training_step_time": 0.7450497150421143
    },
    {
      "epoch": 0.000108782958984375,
      "model_forward_time": 0.11967229843139648,
      "step": 17823
    },
    {
      "epoch": 0.000108782958984375,
      "step": 17823,
      "training_step_time": 0.7706239223480225
    },
    {
      "epoch": 0.0001087890625,
      "model_forward_time": 0.12478399276733398,
      "step": 17824
    },
    {
      "epoch": 0.0001087890625,
      "step": 17824,
      "training_step_time": 0.6687874794006348
    },
    {
      "epoch": 0.000108795166015625,
      "model_forward_time": 0.11591601371765137,
      "step": 17825
    },
    {
      "epoch": 0.000108795166015625,
      "step": 17825,
      "training_step_time": 0.6810867786407471
    },
    {
      "epoch": 0.00010880126953125,
      "model_forward_time": 0.12009310722351074,
      "step": 17826
    },
    {
      "epoch": 0.00010880126953125,
      "step": 17826,
      "training_step_time": 0.6566252708435059
    },
    {
      "epoch": 0.000108807373046875,
      "model_forward_time": 0.12166070938110352,
      "step": 17827
    },
    {
      "epoch": 0.000108807373046875,
      "step": 17827,
      "training_step_time": 0.6386890411376953
    },
    {
      "epoch": 0.0001088134765625,
      "model_forward_time": 0.11922001838684082,
      "step": 17828
    },
    {
      "epoch": 0.0001088134765625,
      "step": 17828,
      "training_step_time": 0.6289863586425781
    },
    {
      "epoch": 0.000108819580078125,
      "model_forward_time": 0.1308612823486328,
      "step": 17829
    },
    {
      "epoch": 0.000108819580078125,
      "step": 17829,
      "training_step_time": 0.6687908172607422
    },
    {
      "epoch": 0.00010882568359375,
      "grad_norm": 0.14892075955867767,
      "learning_rate": 8.420726130504351e-05,
      "loss": 0.0594,
      "step": 17830
    },
    {
      "epoch": 0.00010882568359375,
      "model_forward_time": 0.11574411392211914,
      "step": 17830
    },
    {
      "epoch": 0.00010882568359375,
      "step": 17830,
      "training_step_time": 0.6371669769287109
    },
    {
      "epoch": 0.000108831787109375,
      "model_forward_time": 0.12317347526550293,
      "step": 17831
    },
    {
      "epoch": 0.000108831787109375,
      "step": 17831,
      "training_step_time": 0.6778273582458496
    },
    {
      "epoch": 0.000108837890625,
      "model_forward_time": 0.11797022819519043,
      "step": 17832
    },
    {
      "epoch": 0.000108837890625,
      "step": 17832,
      "training_step_time": 0.6474533081054688
    },
    {
      "epoch": 0.000108843994140625,
      "model_forward_time": 0.1248924732208252,
      "step": 17833
    },
    {
      "epoch": 0.000108843994140625,
      "step": 17833,
      "training_step_time": 0.8192963600158691
    },
    {
      "epoch": 0.00010885009765625,
      "model_forward_time": 0.11805939674377441,
      "step": 17834
    },
    {
      "epoch": 0.00010885009765625,
      "step": 17834,
      "training_step_time": 0.6546711921691895
    },
    {
      "epoch": 0.000108856201171875,
      "model_forward_time": 0.11539030075073242,
      "step": 17835
    },
    {
      "epoch": 0.000108856201171875,
      "step": 17835,
      "training_step_time": 0.6186940670013428
    },
    {
      "epoch": 0.0001088623046875,
      "model_forward_time": 0.11965036392211914,
      "step": 17836
    },
    {
      "epoch": 0.0001088623046875,
      "step": 17836,
      "training_step_time": 0.6762895584106445
    },
    {
      "epoch": 0.000108868408203125,
      "model_forward_time": 0.11779260635375977,
      "step": 17837
    },
    {
      "epoch": 0.000108868408203125,
      "step": 17837,
      "training_step_time": 0.6785891056060791
    },
    {
      "epoch": 0.00010887451171875,
      "model_forward_time": 0.1228935718536377,
      "step": 17838
    },
    {
      "epoch": 0.00010887451171875,
      "step": 17838,
      "training_step_time": 0.6687049865722656
    },
    {
      "epoch": 0.000108880615234375,
      "model_forward_time": 0.12129569053649902,
      "step": 17839
    },
    {
      "epoch": 0.000108880615234375,
      "step": 17839,
      "training_step_time": 0.6701874732971191
    },
    {
      "epoch": 0.00010888671875,
      "grad_norm": 0.16621799767017365,
      "learning_rate": 8.418715692238978e-05,
      "loss": 0.0629,
      "step": 17840
    },
    {
      "epoch": 0.00010888671875,
      "model_forward_time": 0.12005615234375,
      "step": 17840
    },
    {
      "epoch": 0.00010888671875,
      "step": 17840,
      "training_step_time": 0.6785092353820801
    },
    {
      "epoch": 0.000108892822265625,
      "model_forward_time": 0.1212763786315918,
      "step": 17841
    },
    {
      "epoch": 0.000108892822265625,
      "step": 17841,
      "training_step_time": 0.6545746326446533
    },
    {
      "epoch": 0.00010889892578125,
      "model_forward_time": 0.12137389183044434,
      "step": 17842
    },
    {
      "epoch": 0.00010889892578125,
      "step": 17842,
      "training_step_time": 0.8003876209259033
    },
    {
      "epoch": 0.000108905029296875,
      "model_forward_time": 0.1271052360534668,
      "step": 17843
    },
    {
      "epoch": 0.000108905029296875,
      "step": 17843,
      "training_step_time": 0.641455888748169
    },
    {
      "epoch": 0.0001089111328125,
      "model_forward_time": 0.12702035903930664,
      "step": 17844
    },
    {
      "epoch": 0.0001089111328125,
      "step": 17844,
      "training_step_time": 0.6222562789916992
    },
    {
      "epoch": 0.000108917236328125,
      "model_forward_time": 0.11887621879577637,
      "step": 17845
    },
    {
      "epoch": 0.000108917236328125,
      "step": 17845,
      "training_step_time": 0.7244918346405029
    },
    {
      "epoch": 0.00010892333984375,
      "model_forward_time": 0.1163170337677002,
      "step": 17846
    },
    {
      "epoch": 0.00010892333984375,
      "step": 17846,
      "training_step_time": 0.6614425182342529
    },
    {
      "epoch": 0.000108929443359375,
      "model_forward_time": 0.11817359924316406,
      "step": 17847
    },
    {
      "epoch": 0.000108929443359375,
      "step": 17847,
      "training_step_time": 0.6265420913696289
    },
    {
      "epoch": 0.000108935546875,
      "model_forward_time": 0.12422537803649902,
      "step": 17848
    },
    {
      "epoch": 0.000108935546875,
      "step": 17848,
      "training_step_time": 0.6474781036376953
    },
    {
      "epoch": 0.000108941650390625,
      "model_forward_time": 0.12200117111206055,
      "step": 17849
    },
    {
      "epoch": 0.000108941650390625,
      "step": 17849,
      "training_step_time": 0.638939380645752
    },
    {
      "epoch": 0.00010894775390625,
      "grad_norm": 0.16795714199543,
      "learning_rate": 8.416704215458043e-05,
      "loss": 0.0705,
      "step": 17850
    },
    {
      "epoch": 0.00010894775390625,
      "model_forward_time": 0.12701106071472168,
      "step": 17850
    },
    {
      "epoch": 0.00010894775390625,
      "step": 17850,
      "training_step_time": 0.5876634120941162
    },
    {
      "epoch": 0.000108953857421875,
      "model_forward_time": 0.13269543647766113,
      "step": 17851
    },
    {
      "epoch": 0.000108953857421875,
      "step": 17851,
      "training_step_time": 0.6520302295684814
    },
    {
      "epoch": 0.0001089599609375,
      "model_forward_time": 0.12042951583862305,
      "step": 17852
    },
    {
      "epoch": 0.0001089599609375,
      "step": 17852,
      "training_step_time": 0.7042527198791504
    },
    {
      "epoch": 0.000108966064453125,
      "model_forward_time": 0.12246584892272949,
      "step": 17853
    },
    {
      "epoch": 0.000108966064453125,
      "step": 17853,
      "training_step_time": 0.635392427444458
    },
    {
      "epoch": 0.00010897216796875,
      "model_forward_time": 0.1176602840423584,
      "step": 17854
    },
    {
      "epoch": 0.00010897216796875,
      "step": 17854,
      "training_step_time": 0.7253329753875732
    },
    {
      "epoch": 0.000108978271484375,
      "model_forward_time": 0.11948513984680176,
      "step": 17855
    },
    {
      "epoch": 0.000108978271484375,
      "step": 17855,
      "training_step_time": 0.7547821998596191
    },
    {
      "epoch": 0.000108984375,
      "model_forward_time": 0.11744332313537598,
      "step": 17856
    },
    {
      "epoch": 0.000108984375,
      "step": 17856,
      "training_step_time": 0.6817858219146729
    },
    {
      "epoch": 0.000108990478515625,
      "model_forward_time": 0.1174466609954834,
      "step": 17857
    },
    {
      "epoch": 0.000108990478515625,
      "step": 17857,
      "training_step_time": 0.6208786964416504
    },
    {
      "epoch": 0.00010899658203125,
      "model_forward_time": 0.11983513832092285,
      "step": 17858
    },
    {
      "epoch": 0.00010899658203125,
      "step": 17858,
      "training_step_time": 0.6467158794403076
    },
    {
      "epoch": 0.000109002685546875,
      "model_forward_time": 0.12266325950622559,
      "step": 17859
    },
    {
      "epoch": 0.000109002685546875,
      "step": 17859,
      "training_step_time": 0.614882230758667
    },
    {
      "epoch": 0.0001090087890625,
      "grad_norm": 0.13472937047481537,
      "learning_rate": 8.41469170077258e-05,
      "loss": 0.0532,
      "step": 17860
    },
    {
      "epoch": 0.0001090087890625,
      "model_forward_time": 0.1182413101196289,
      "step": 17860
    },
    {
      "epoch": 0.0001090087890625,
      "step": 17860,
      "training_step_time": 0.7098040580749512
    },
    {
      "epoch": 0.000109014892578125,
      "model_forward_time": 0.12424850463867188,
      "step": 17861
    },
    {
      "epoch": 0.000109014892578125,
      "step": 17861,
      "training_step_time": 0.7748985290527344
    },
    {
      "epoch": 0.00010902099609375,
      "model_forward_time": 0.1302015781402588,
      "step": 17862
    },
    {
      "epoch": 0.00010902099609375,
      "step": 17862,
      "training_step_time": 0.6032054424285889
    },
    {
      "epoch": 0.000109027099609375,
      "model_forward_time": 0.12195825576782227,
      "step": 17863
    },
    {
      "epoch": 0.000109027099609375,
      "step": 17863,
      "training_step_time": 0.5993497371673584
    },
    {
      "epoch": 0.000109033203125,
      "model_forward_time": 0.1166529655456543,
      "step": 17864
    },
    {
      "epoch": 0.000109033203125,
      "step": 17864,
      "training_step_time": 0.7143445014953613
    },
    {
      "epoch": 0.000109039306640625,
      "model_forward_time": 0.12133550643920898,
      "step": 17865
    },
    {
      "epoch": 0.000109039306640625,
      "step": 17865,
      "training_step_time": 0.6227586269378662
    },
    {
      "epoch": 0.00010904541015625,
      "model_forward_time": 0.1170344352722168,
      "step": 17866
    },
    {
      "epoch": 0.00010904541015625,
      "step": 17866,
      "training_step_time": 0.629117488861084
    },
    {
      "epoch": 0.000109051513671875,
      "model_forward_time": 0.11915826797485352,
      "step": 17867
    },
    {
      "epoch": 0.000109051513671875,
      "step": 17867,
      "training_step_time": 0.5829160213470459
    },
    {
      "epoch": 0.0001090576171875,
      "model_forward_time": 0.11915969848632812,
      "step": 17868
    },
    {
      "epoch": 0.0001090576171875,
      "step": 17868,
      "training_step_time": 0.5616030693054199
    },
    {
      "epoch": 0.000109063720703125,
      "model_forward_time": 0.12447428703308105,
      "step": 17869
    },
    {
      "epoch": 0.000109063720703125,
      "step": 17869,
      "training_step_time": 0.6022682189941406
    },
    {
      "epoch": 0.00010906982421875,
      "grad_norm": 0.1747923642396927,
      "learning_rate": 8.41267814879394e-05,
      "loss": 0.058,
      "step": 17870
    },
    {
      "epoch": 0.00010906982421875,
      "model_forward_time": 0.12009692192077637,
      "step": 17870
    },
    {
      "epoch": 0.00010906982421875,
      "step": 17870,
      "training_step_time": 0.6124699115753174
    },
    {
      "epoch": 0.000109075927734375,
      "model_forward_time": 0.12817597389221191,
      "step": 17871
    },
    {
      "epoch": 0.000109075927734375,
      "step": 17871,
      "training_step_time": 0.577106237411499
    },
    {
      "epoch": 0.00010908203125,
      "model_forward_time": 0.11708593368530273,
      "step": 17872
    },
    {
      "epoch": 0.00010908203125,
      "step": 17872,
      "training_step_time": 0.6067421436309814
    },
    {
      "epoch": 0.000109088134765625,
      "model_forward_time": 0.1206519603729248,
      "step": 17873
    },
    {
      "epoch": 0.000109088134765625,
      "step": 17873,
      "training_step_time": 0.45693254470825195
    },
    {
      "epoch": 0.00010909423828125,
      "model_forward_time": 0.11622118949890137,
      "step": 17874
    },
    {
      "epoch": 0.00010909423828125,
      "step": 17874,
      "training_step_time": 0.43450307846069336
    },
    {
      "epoch": 0.000109100341796875,
      "model_forward_time": 0.11669588088989258,
      "step": 17875
    },
    {
      "epoch": 0.000109100341796875,
      "step": 17875,
      "training_step_time": 0.46408867835998535
    },
    {
      "epoch": 0.0001091064453125,
      "model_forward_time": 0.11667442321777344,
      "step": 17876
    },
    {
      "epoch": 0.0001091064453125,
      "step": 17876,
      "training_step_time": 0.43010568618774414
    },
    {
      "epoch": 0.000109112548828125,
      "model_forward_time": 0.11670947074890137,
      "step": 17877
    },
    {
      "epoch": 0.000109112548828125,
      "step": 17877,
      "training_step_time": 0.41280102729797363
    },
    {
      "epoch": 0.00010911865234375,
      "model_forward_time": 0.11588501930236816,
      "step": 17878
    },
    {
      "epoch": 0.00010911865234375,
      "step": 17878,
      "training_step_time": 0.39952802658081055
    },
    {
      "epoch": 0.000109124755859375,
      "model_forward_time": 0.11693167686462402,
      "step": 17879
    },
    {
      "epoch": 0.000109124755859375,
      "step": 17879,
      "training_step_time": 0.38314008712768555
    },
    {
      "epoch": 0.000109130859375,
      "grad_norm": 0.16935281455516815,
      "learning_rate": 8.410663560133784e-05,
      "loss": 0.0641,
      "step": 17880
    },
    {
      "epoch": 0.000109130859375,
      "model_forward_time": 0.11562442779541016,
      "step": 17880
    },
    {
      "epoch": 0.000109130859375,
      "step": 17880,
      "training_step_time": 0.38857078552246094
    },
    {
      "epoch": 0.000109136962890625,
      "model_forward_time": 0.11518025398254395,
      "step": 17881
    },
    {
      "epoch": 0.000109136962890625,
      "step": 17881,
      "training_step_time": 0.3876798152923584
    },
    {
      "epoch": 0.00010914306640625,
      "model_forward_time": 0.11650705337524414,
      "step": 17882
    },
    {
      "epoch": 0.00010914306640625,
      "step": 17882,
      "training_step_time": 0.4195716381072998
    },
    {
      "epoch": 0.000109149169921875,
      "model_forward_time": 0.11535859107971191,
      "step": 17883
    },
    {
      "epoch": 0.000109149169921875,
      "step": 17883,
      "training_step_time": 0.44417500495910645
    },
    {
      "epoch": 0.0001091552734375,
      "model_forward_time": 0.11642980575561523,
      "step": 17884
    },
    {
      "epoch": 0.0001091552734375,
      "step": 17884,
      "training_step_time": 0.4601893424987793
    },
    {
      "epoch": 0.000109161376953125,
      "model_forward_time": 0.11545467376708984,
      "step": 17885
    },
    {
      "epoch": 0.000109161376953125,
      "step": 17885,
      "training_step_time": 0.38944196701049805
    },
    {
      "epoch": 0.00010916748046875,
      "model_forward_time": 0.11478543281555176,
      "step": 17886
    },
    {
      "epoch": 0.00010916748046875,
      "step": 17886,
      "training_step_time": 0.4183542728424072
    },
    {
      "epoch": 0.000109173583984375,
      "model_forward_time": 0.11560726165771484,
      "step": 17887
    },
    {
      "epoch": 0.000109173583984375,
      "step": 17887,
      "training_step_time": 0.509171724319458
    },
    {
      "epoch": 0.0001091796875,
      "model_forward_time": 0.11562466621398926,
      "step": 17888
    },
    {
      "epoch": 0.0001091796875,
      "step": 17888,
      "training_step_time": 0.3923814296722412
    },
    {
      "epoch": 0.000109185791015625,
      "model_forward_time": 0.11476445198059082,
      "step": 17889
    },
    {
      "epoch": 0.000109185791015625,
      "step": 17889,
      "training_step_time": 0.38785243034362793
    },
    {
      "epoch": 0.00010919189453125,
      "grad_norm": 0.14205226302146912,
      "learning_rate": 8.40864793540409e-05,
      "loss": 0.0563,
      "step": 17890
    },
    {
      "epoch": 0.00010919189453125,
      "model_forward_time": 0.11536407470703125,
      "step": 17890
    },
    {
      "epoch": 0.00010919189453125,
      "step": 17890,
      "training_step_time": 0.4154386520385742
    },
    {
      "epoch": 0.000109197998046875,
      "model_forward_time": 0.11542654037475586,
      "step": 17891
    },
    {
      "epoch": 0.000109197998046875,
      "step": 17891,
      "training_step_time": 0.3990201950073242
    },
    {
      "epoch": 0.0001092041015625,
      "model_forward_time": 0.11539101600646973,
      "step": 17892
    },
    {
      "epoch": 0.0001092041015625,
      "step": 17892,
      "training_step_time": 0.4349510669708252
    },
    {
      "epoch": 0.000109210205078125,
      "model_forward_time": 0.11564421653747559,
      "step": 17893
    },
    {
      "epoch": 0.000109210205078125,
      "step": 17893,
      "training_step_time": 0.3904099464416504
    },
    {
      "epoch": 0.00010921630859375,
      "model_forward_time": 0.11456918716430664,
      "step": 17894
    },
    {
      "epoch": 0.00010921630859375,
      "step": 17894,
      "training_step_time": 0.3852860927581787
    },
    {
      "epoch": 0.000109222412109375,
      "model_forward_time": 0.11567354202270508,
      "step": 17895
    },
    {
      "epoch": 0.000109222412109375,
      "step": 17895,
      "training_step_time": 0.39444613456726074
    },
    {
      "epoch": 0.000109228515625,
      "model_forward_time": 0.11536192893981934,
      "step": 17896
    },
    {
      "epoch": 0.000109228515625,
      "step": 17896,
      "training_step_time": 0.4509549140930176
    },
    {
      "epoch": 0.000109234619140625,
      "model_forward_time": 0.11530613899230957,
      "step": 17897
    },
    {
      "epoch": 0.000109234619140625,
      "step": 17897,
      "training_step_time": 0.410297155380249
    },
    {
      "epoch": 0.00010924072265625,
      "model_forward_time": 0.11531519889831543,
      "step": 17898
    },
    {
      "epoch": 0.00010924072265625,
      "step": 17898,
      "training_step_time": 0.41632509231567383
    },
    {
      "epoch": 0.000109246826171875,
      "model_forward_time": 0.11536765098571777,
      "step": 17899
    },
    {
      "epoch": 0.000109246826171875,
      "step": 17899,
      "training_step_time": 0.43219590187072754
    },
    {
      "epoch": 0.0001092529296875,
      "grad_norm": 0.1440986692905426,
      "learning_rate": 8.406631275217156e-05,
      "loss": 0.0574,
      "step": 17900
    },
    {
      "epoch": 0.0001092529296875,
      "model_forward_time": 0.11461901664733887,
      "step": 17900
    },
    {
      "epoch": 0.0001092529296875,
      "step": 17900,
      "training_step_time": 0.46566128730773926
    },
    {
      "epoch": 0.000109259033203125,
      "model_forward_time": 0.11529326438903809,
      "step": 17901
    },
    {
      "epoch": 0.000109259033203125,
      "step": 17901,
      "training_step_time": 0.46687793731689453
    },
    {
      "epoch": 0.00010926513671875,
      "model_forward_time": 0.11596131324768066,
      "step": 17902
    },
    {
      "epoch": 0.00010926513671875,
      "step": 17902,
      "training_step_time": 0.3894791603088379
    },
    {
      "epoch": 0.000109271240234375,
      "model_forward_time": 0.1144571304321289,
      "step": 17903
    },
    {
      "epoch": 0.000109271240234375,
      "step": 17903,
      "training_step_time": 0.3867604732513428
    },
    {
      "epoch": 0.00010927734375,
      "model_forward_time": 0.11542415618896484,
      "step": 17904
    },
    {
      "epoch": 0.00010927734375,
      "step": 17904,
      "training_step_time": 0.3911874294281006
    },
    {
      "epoch": 0.000109283447265625,
      "model_forward_time": 0.11516976356506348,
      "step": 17905
    },
    {
      "epoch": 0.000109283447265625,
      "step": 17905,
      "training_step_time": 0.398038387298584
    },
    {
      "epoch": 0.00010928955078125,
      "model_forward_time": 0.11574840545654297,
      "step": 17906
    },
    {
      "epoch": 0.00010928955078125,
      "step": 17906,
      "training_step_time": 0.4261627197265625
    },
    {
      "epoch": 0.000109295654296875,
      "model_forward_time": 0.11512446403503418,
      "step": 17907
    },
    {
      "epoch": 0.000109295654296875,
      "step": 17907,
      "training_step_time": 0.44057536125183105
    },
    {
      "epoch": 0.0001093017578125,
      "model_forward_time": 0.11559081077575684,
      "step": 17908
    },
    {
      "epoch": 0.0001093017578125,
      "step": 17908,
      "training_step_time": 0.39398193359375
    },
    {
      "epoch": 0.000109307861328125,
      "model_forward_time": 0.11548614501953125,
      "step": 17909
    },
    {
      "epoch": 0.000109307861328125,
      "step": 17909,
      "training_step_time": 0.39293789863586426
    },
    {
      "epoch": 0.00010931396484375,
      "grad_norm": 0.23790013790130615,
      "learning_rate": 8.404613580185585e-05,
      "loss": 0.0558,
      "step": 17910
    },
    {
      "epoch": 0.00010931396484375,
      "model_forward_time": 0.11514830589294434,
      "step": 17910
    },
    {
      "epoch": 0.00010931396484375,
      "step": 17910,
      "training_step_time": 0.41571974754333496
    },
    {
      "epoch": 0.000109320068359375,
      "model_forward_time": 0.11545848846435547,
      "step": 17911
    },
    {
      "epoch": 0.000109320068359375,
      "step": 17911,
      "training_step_time": 0.4379146099090576
    },
    {
      "epoch": 0.000109326171875,
      "model_forward_time": 0.11528563499450684,
      "step": 17912
    },
    {
      "epoch": 0.000109326171875,
      "step": 17912,
      "training_step_time": 0.4302952289581299
    },
    {
      "epoch": 0.000109332275390625,
      "model_forward_time": 0.11507797241210938,
      "step": 17913
    },
    {
      "epoch": 0.000109332275390625,
      "step": 17913,
      "training_step_time": 0.5258626937866211
    },
    {
      "epoch": 0.00010933837890625,
      "model_forward_time": 0.11552143096923828,
      "step": 17914
    },
    {
      "epoch": 0.00010933837890625,
      "step": 17914,
      "training_step_time": 0.42181944847106934
    },
    {
      "epoch": 0.000109344482421875,
      "model_forward_time": 0.11794829368591309,
      "step": 17915
    },
    {
      "epoch": 0.000109344482421875,
      "step": 17915,
      "training_step_time": 0.3865814208984375
    },
    {
      "epoch": 0.0001093505859375,
      "model_forward_time": 0.11519932746887207,
      "step": 17916
    },
    {
      "epoch": 0.0001093505859375,
      "step": 17916,
      "training_step_time": 0.47491455078125
    },
    {
      "epoch": 0.000109356689453125,
      "model_forward_time": 0.11470389366149902,
      "step": 17917
    },
    {
      "epoch": 0.000109356689453125,
      "step": 17917,
      "training_step_time": 0.38059210777282715
    },
    {
      "epoch": 0.00010936279296875,
      "model_forward_time": 0.11524581909179688,
      "step": 17918
    },
    {
      "epoch": 0.00010936279296875,
      "step": 17918,
      "training_step_time": 0.39217066764831543
    },
    {
      "epoch": 0.000109368896484375,
      "model_forward_time": 0.11501502990722656,
      "step": 17919
    },
    {
      "epoch": 0.000109368896484375,
      "step": 17919,
      "training_step_time": 0.38846802711486816
    },
    {
      "epoch": 0.000109375,
      "grad_norm": 0.204807847738266,
      "learning_rate": 8.402594850922305e-05,
      "loss": 0.059,
      "step": 17920
    },
    {
      "epoch": 0.000109375,
      "model_forward_time": 0.11448383331298828,
      "step": 17920
    },
    {
      "epoch": 0.000109375,
      "step": 17920,
      "training_step_time": 0.4527106285095215
    },
    {
      "epoch": 0.000109381103515625,
      "model_forward_time": 0.11507868766784668,
      "step": 17921
    },
    {
      "epoch": 0.000109381103515625,
      "step": 17921,
      "training_step_time": 0.39201784133911133
    },
    {
      "epoch": 0.00010938720703125,
      "model_forward_time": 0.11609792709350586,
      "step": 17922
    },
    {
      "epoch": 0.00010938720703125,
      "step": 17922,
      "training_step_time": 0.4034433364868164
    },
    {
      "epoch": 0.000109393310546875,
      "model_forward_time": 0.11500144004821777,
      "step": 17923
    },
    {
      "epoch": 0.000109393310546875,
      "step": 17923,
      "training_step_time": 0.39518165588378906
    },
    {
      "epoch": 0.0001093994140625,
      "model_forward_time": 0.11521339416503906,
      "step": 17924
    },
    {
      "epoch": 0.0001093994140625,
      "step": 17924,
      "training_step_time": 0.39772891998291016
    },
    {
      "epoch": 0.000109405517578125,
      "model_forward_time": 0.11488580703735352,
      "step": 17925
    },
    {
      "epoch": 0.000109405517578125,
      "step": 17925,
      "training_step_time": 0.5185120105743408
    },
    {
      "epoch": 0.00010941162109375,
      "model_forward_time": 0.11523151397705078,
      "step": 17926
    },
    {
      "epoch": 0.00010941162109375,
      "step": 17926,
      "training_step_time": 0.4650290012359619
    },
    {
      "epoch": 0.000109417724609375,
      "model_forward_time": 0.1143503189086914,
      "step": 17927
    },
    {
      "epoch": 0.000109417724609375,
      "step": 17927,
      "training_step_time": 0.39481687545776367
    },
    {
      "epoch": 0.000109423828125,
      "model_forward_time": 0.1151270866394043,
      "step": 17928
    },
    {
      "epoch": 0.000109423828125,
      "step": 17928,
      "training_step_time": 0.48138427734375
    },
    {
      "epoch": 0.000109429931640625,
      "model_forward_time": 0.11519145965576172,
      "step": 17929
    },
    {
      "epoch": 0.000109429931640625,
      "step": 17929,
      "training_step_time": 0.4522264003753662
    },
    {
      "epoch": 0.00010943603515625,
      "grad_norm": 0.1690540909767151,
      "learning_rate": 8.400575088040548e-05,
      "loss": 0.0564,
      "step": 17930
    },
    {
      "epoch": 0.00010943603515625,
      "model_forward_time": 0.1146857738494873,
      "step": 17930
    },
    {
      "epoch": 0.00010943603515625,
      "step": 17930,
      "training_step_time": 0.40763092041015625
    },
    {
      "epoch": 0.000109442138671875,
      "model_forward_time": 0.11609005928039551,
      "step": 17931
    },
    {
      "epoch": 0.000109442138671875,
      "step": 17931,
      "training_step_time": 0.4799942970275879
    },
    {
      "epoch": 0.0001094482421875,
      "model_forward_time": 0.1150352954864502,
      "step": 17932
    },
    {
      "epoch": 0.0001094482421875,
      "step": 17932,
      "training_step_time": 0.39449024200439453
    },
    {
      "epoch": 0.000109454345703125,
      "model_forward_time": 0.1149287223815918,
      "step": 17933
    },
    {
      "epoch": 0.000109454345703125,
      "step": 17933,
      "training_step_time": 0.38979387283325195
    },
    {
      "epoch": 0.00010946044921875,
      "model_forward_time": 0.11463379859924316,
      "step": 17934
    },
    {
      "epoch": 0.00010946044921875,
      "step": 17934,
      "training_step_time": 0.45312952995300293
    },
    {
      "epoch": 0.000109466552734375,
      "model_forward_time": 0.11488103866577148,
      "step": 17935
    },
    {
      "epoch": 0.000109466552734375,
      "step": 17935,
      "training_step_time": 0.4000124931335449
    },
    {
      "epoch": 0.00010947265625,
      "model_forward_time": 0.11560273170471191,
      "step": 17936
    },
    {
      "epoch": 0.00010947265625,
      "step": 17936,
      "training_step_time": 0.4202854633331299
    },
    {
      "epoch": 0.000109478759765625,
      "model_forward_time": 0.11545419692993164,
      "step": 17937
    },
    {
      "epoch": 0.000109478759765625,
      "step": 17937,
      "training_step_time": 0.3951594829559326
    },
    {
      "epoch": 0.00010948486328125,
      "model_forward_time": 0.11533045768737793,
      "step": 17938
    },
    {
      "epoch": 0.00010948486328125,
      "step": 17938,
      "training_step_time": 0.4152028560638428
    },
    {
      "epoch": 0.000109490966796875,
      "model_forward_time": 0.11450982093811035,
      "step": 17939
    },
    {
      "epoch": 0.000109490966796875,
      "step": 17939,
      "training_step_time": 0.38485193252563477
    },
    {
      "epoch": 0.0001094970703125,
      "grad_norm": 0.11542842537164688,
      "learning_rate": 8.398554292153866e-05,
      "loss": 0.0583,
      "step": 17940
    },
    {
      "epoch": 0.0001094970703125,
      "model_forward_time": 0.11546707153320312,
      "step": 17940
    },
    {
      "epoch": 0.0001094970703125,
      "step": 17940,
      "training_step_time": 0.3968470096588135
    },
    {
      "epoch": 0.000109503173828125,
      "model_forward_time": 0.11606621742248535,
      "step": 17941
    },
    {
      "epoch": 0.000109503173828125,
      "step": 17941,
      "training_step_time": 0.39281249046325684
    },
    {
      "epoch": 0.00010950927734375,
      "model_forward_time": 0.11498117446899414,
      "step": 17942
    },
    {
      "epoch": 0.00010950927734375,
      "step": 17942,
      "training_step_time": 0.43256449699401855
    },
    {
      "epoch": 0.000109515380859375,
      "model_forward_time": 0.11499953269958496,
      "step": 17943
    },
    {
      "epoch": 0.000109515380859375,
      "step": 17943,
      "training_step_time": 0.4792189598083496
    },
    {
      "epoch": 0.000109521484375,
      "model_forward_time": 0.11513566970825195,
      "step": 17944
    },
    {
      "epoch": 0.000109521484375,
      "step": 17944,
      "training_step_time": 0.41286277770996094
    },
    {
      "epoch": 0.000109527587890625,
      "model_forward_time": 0.11541581153869629,
      "step": 17945
    },
    {
      "epoch": 0.000109527587890625,
      "step": 17945,
      "training_step_time": 0.4170231819152832
    },
    {
      "epoch": 0.00010953369140625,
      "model_forward_time": 0.11473464965820312,
      "step": 17946
    },
    {
      "epoch": 0.00010953369140625,
      "step": 17946,
      "training_step_time": 0.39840221405029297
    },
    {
      "epoch": 0.000109539794921875,
      "model_forward_time": 0.11520123481750488,
      "step": 17947
    },
    {
      "epoch": 0.000109539794921875,
      "step": 17947,
      "training_step_time": 0.38956546783447266
    },
    {
      "epoch": 0.0001095458984375,
      "model_forward_time": 0.11465716361999512,
      "step": 17948
    },
    {
      "epoch": 0.0001095458984375,
      "step": 17948,
      "training_step_time": 0.3927421569824219
    },
    {
      "epoch": 0.000109552001953125,
      "model_forward_time": 0.11557126045227051,
      "step": 17949
    },
    {
      "epoch": 0.000109552001953125,
      "step": 17949,
      "training_step_time": 0.665510892868042
    },
    {
      "epoch": 0.00010955810546875,
      "grad_norm": 0.12197358906269073,
      "learning_rate": 8.396532463876124e-05,
      "loss": 0.0559,
      "step": 17950
    },
    {
      "epoch": 0.00010955810546875,
      "model_forward_time": 0.11540579795837402,
      "step": 17950
    },
    {
      "epoch": 0.00010955810546875,
      "step": 17950,
      "training_step_time": 0.4116997718811035
    },
    {
      "epoch": 0.000109564208984375,
      "model_forward_time": 0.11588907241821289,
      "step": 17951
    },
    {
      "epoch": 0.000109564208984375,
      "step": 17951,
      "training_step_time": 0.428652286529541
    },
    {
      "epoch": 0.0001095703125,
      "model_forward_time": 0.11513948440551758,
      "step": 17952
    },
    {
      "epoch": 0.0001095703125,
      "step": 17952,
      "training_step_time": 0.4309349060058594
    },
    {
      "epoch": 0.000109576416015625,
      "model_forward_time": 0.11402297019958496,
      "step": 17953
    },
    {
      "epoch": 0.000109576416015625,
      "step": 17953,
      "training_step_time": 0.39623332023620605
    },
    {
      "epoch": 0.00010958251953125,
      "model_forward_time": 0.11485981941223145,
      "step": 17954
    },
    {
      "epoch": 0.00010958251953125,
      "step": 17954,
      "training_step_time": 0.3801605701446533
    },
    {
      "epoch": 0.000109588623046875,
      "model_forward_time": 0.11536192893981934,
      "step": 17955
    },
    {
      "epoch": 0.000109588623046875,
      "step": 17955,
      "training_step_time": 0.5550658702850342
    },
    {
      "epoch": 0.0001095947265625,
      "model_forward_time": 0.11543798446655273,
      "step": 17956
    },
    {
      "epoch": 0.0001095947265625,
      "step": 17956,
      "training_step_time": 0.46507835388183594
    },
    {
      "epoch": 0.000109600830078125,
      "model_forward_time": 0.11568021774291992,
      "step": 17957
    },
    {
      "epoch": 0.000109600830078125,
      "step": 17957,
      "training_step_time": 0.37099552154541016
    },
    {
      "epoch": 0.00010960693359375,
      "model_forward_time": 0.11473512649536133,
      "step": 17958
    },
    {
      "epoch": 0.00010960693359375,
      "step": 17958,
      "training_step_time": 0.4116218090057373
    },
    {
      "epoch": 0.000109613037109375,
      "model_forward_time": 0.11515164375305176,
      "step": 17959
    },
    {
      "epoch": 0.000109613037109375,
      "step": 17959,
      "training_step_time": 0.48384952545166016
    },
    {
      "epoch": 0.000109619140625,
      "grad_norm": 0.1360204517841339,
      "learning_rate": 8.394509603821499e-05,
      "loss": 0.0594,
      "step": 17960
    },
    {
      "epoch": 0.000109619140625,
      "model_forward_time": 0.11470818519592285,
      "step": 17960
    },
    {
      "epoch": 0.000109619140625,
      "step": 17960,
      "training_step_time": 0.38189101219177246
    },
    {
      "epoch": 0.000109625244140625,
      "model_forward_time": 0.1160271167755127,
      "step": 17961
    },
    {
      "epoch": 0.000109625244140625,
      "step": 17961,
      "training_step_time": 0.40371084213256836
    },
    {
      "epoch": 0.00010963134765625,
      "model_forward_time": 0.11535453796386719,
      "step": 17962
    },
    {
      "epoch": 0.00010963134765625,
      "step": 17962,
      "training_step_time": 0.4061884880065918
    },
    {
      "epoch": 0.000109637451171875,
      "model_forward_time": 0.11549520492553711,
      "step": 17963
    },
    {
      "epoch": 0.000109637451171875,
      "step": 17963,
      "training_step_time": 0.45288777351379395
    },
    {
      "epoch": 0.0001096435546875,
      "model_forward_time": 0.11471676826477051,
      "step": 17964
    },
    {
      "epoch": 0.0001096435546875,
      "step": 17964,
      "training_step_time": 0.42353391647338867
    },
    {
      "epoch": 0.000109649658203125,
      "model_forward_time": 0.1156156063079834,
      "step": 17965
    },
    {
      "epoch": 0.000109649658203125,
      "step": 17965,
      "training_step_time": 0.39214205741882324
    },
    {
      "epoch": 0.00010965576171875,
      "model_forward_time": 0.11518287658691406,
      "step": 17966
    },
    {
      "epoch": 0.00010965576171875,
      "step": 17966,
      "training_step_time": 0.45090222358703613
    },
    {
      "epoch": 0.000109661865234375,
      "model_forward_time": 0.11512637138366699,
      "step": 17967
    },
    {
      "epoch": 0.000109661865234375,
      "step": 17967,
      "training_step_time": 0.4917943477630615
    },
    {
      "epoch": 0.00010966796875,
      "model_forward_time": 0.11567020416259766,
      "step": 17968
    },
    {
      "epoch": 0.00010966796875,
      "step": 17968,
      "training_step_time": 0.3990480899810791
    },
    {
      "epoch": 0.000109674072265625,
      "model_forward_time": 0.11682343482971191,
      "step": 17969
    },
    {
      "epoch": 0.000109674072265625,
      "step": 17969,
      "training_step_time": 0.43958282470703125
    },
    {
      "epoch": 0.00010968017578125,
      "grad_norm": 0.20598483085632324,
      "learning_rate": 8.392485712604483e-05,
      "loss": 0.0598,
      "step": 17970
    },
    {
      "epoch": 0.00010968017578125,
      "model_forward_time": 0.11536455154418945,
      "step": 17970
    },
    {
      "epoch": 0.00010968017578125,
      "step": 17970,
      "training_step_time": 0.4100637435913086
    },
    {
      "epoch": 0.000109686279296875,
      "model_forward_time": 0.11506938934326172,
      "step": 17971
    },
    {
      "epoch": 0.000109686279296875,
      "step": 17971,
      "training_step_time": 0.4700472354888916
    },
    {
      "epoch": 0.0001096923828125,
      "model_forward_time": 0.11597633361816406,
      "step": 17972
    },
    {
      "epoch": 0.0001096923828125,
      "step": 17972,
      "training_step_time": 0.3698742389678955
    },
    {
      "epoch": 0.000109698486328125,
      "model_forward_time": 0.11629247665405273,
      "step": 17973
    },
    {
      "epoch": 0.000109698486328125,
      "step": 17973,
      "training_step_time": 0.42525601387023926
    },
    {
      "epoch": 0.00010970458984375,
      "model_forward_time": 0.11519408226013184,
      "step": 17974
    },
    {
      "epoch": 0.00010970458984375,
      "step": 17974,
      "training_step_time": 0.4359703063964844
    },
    {
      "epoch": 0.000109710693359375,
      "model_forward_time": 0.11561083793640137,
      "step": 17975
    },
    {
      "epoch": 0.000109710693359375,
      "step": 17975,
      "training_step_time": 0.3893735408782959
    },
    {
      "epoch": 0.000109716796875,
      "model_forward_time": 0.11492180824279785,
      "step": 17976
    },
    {
      "epoch": 0.000109716796875,
      "step": 17976,
      "training_step_time": 0.4484553337097168
    },
    {
      "epoch": 0.000109722900390625,
      "model_forward_time": 0.11464571952819824,
      "step": 17977
    },
    {
      "epoch": 0.000109722900390625,
      "step": 17977,
      "training_step_time": 0.482820987701416
    },
    {
      "epoch": 0.00010972900390625,
      "model_forward_time": 0.11808252334594727,
      "step": 17978
    },
    {
      "epoch": 0.00010972900390625,
      "step": 17978,
      "training_step_time": 0.44950175285339355
    },
    {
      "epoch": 0.000109735107421875,
      "model_forward_time": 0.11503434181213379,
      "step": 17979
    },
    {
      "epoch": 0.000109735107421875,
      "step": 17979,
      "training_step_time": 0.3974437713623047
    },
    {
      "epoch": 0.0001097412109375,
      "grad_norm": 0.17364327609539032,
      "learning_rate": 8.390460790839882e-05,
      "loss": 0.0542,
      "step": 17980
    },
    {
      "epoch": 0.0001097412109375,
      "model_forward_time": 0.1150057315826416,
      "step": 17980
    },
    {
      "epoch": 0.0001097412109375,
      "step": 17980,
      "training_step_time": 0.3888530731201172
    },
    {
      "epoch": 0.000109747314453125,
      "model_forward_time": 0.11522936820983887,
      "step": 17981
    },
    {
      "epoch": 0.000109747314453125,
      "step": 17981,
      "training_step_time": 0.3962838649749756
    },
    {
      "epoch": 0.00010975341796875,
      "model_forward_time": 0.11523199081420898,
      "step": 17982
    },
    {
      "epoch": 0.00010975341796875,
      "step": 17982,
      "training_step_time": 0.39716053009033203
    },
    {
      "epoch": 0.000109759521484375,
      "model_forward_time": 0.11515235900878906,
      "step": 17983
    },
    {
      "epoch": 0.000109759521484375,
      "step": 17983,
      "training_step_time": 0.3924229145050049
    },
    {
      "epoch": 0.000109765625,
      "model_forward_time": 0.11619448661804199,
      "step": 17984
    },
    {
      "epoch": 0.000109765625,
      "step": 17984,
      "training_step_time": 0.40578627586364746
    },
    {
      "epoch": 0.000109771728515625,
      "model_forward_time": 0.11517119407653809,
      "step": 17985
    },
    {
      "epoch": 0.000109771728515625,
      "step": 17985,
      "training_step_time": 0.4493076801300049
    },
    {
      "epoch": 0.00010977783203125,
      "model_forward_time": 0.11508488655090332,
      "step": 17986
    },
    {
      "epoch": 0.00010977783203125,
      "step": 17986,
      "training_step_time": 0.3898439407348633
    },
    {
      "epoch": 0.000109783935546875,
      "model_forward_time": 0.11557626724243164,
      "step": 17987
    },
    {
      "epoch": 0.000109783935546875,
      "step": 17987,
      "training_step_time": 0.42705798149108887
    },
    {
      "epoch": 0.0001097900390625,
      "model_forward_time": 0.11572146415710449,
      "step": 17988
    },
    {
      "epoch": 0.0001097900390625,
      "step": 17988,
      "training_step_time": 0.39254331588745117
    },
    {
      "epoch": 0.000109796142578125,
      "model_forward_time": 0.11522054672241211,
      "step": 17989
    },
    {
      "epoch": 0.000109796142578125,
      "step": 17989,
      "training_step_time": 0.4757709503173828
    },
    {
      "epoch": 0.00010980224609375,
      "grad_norm": 0.18185056746006012,
      "learning_rate": 8.388434839142813e-05,
      "loss": 0.0581,
      "step": 17990
    },
    {
      "epoch": 0.00010980224609375,
      "model_forward_time": 0.11610960960388184,
      "step": 17990
    },
    {
      "epoch": 0.00010980224609375,
      "step": 17990,
      "training_step_time": 0.39621734619140625
    },
    {
      "epoch": 0.000109808349609375,
      "model_forward_time": 0.11500883102416992,
      "step": 17991
    },
    {
      "epoch": 0.000109808349609375,
      "step": 17991,
      "training_step_time": 0.46688365936279297
    },
    {
      "epoch": 0.000109814453125,
      "model_forward_time": 0.11516761779785156,
      "step": 17992
    },
    {
      "epoch": 0.000109814453125,
      "step": 17992,
      "training_step_time": 0.45191454887390137
    },
    {
      "epoch": 0.000109820556640625,
      "model_forward_time": 0.11536884307861328,
      "step": 17993
    },
    {
      "epoch": 0.000109820556640625,
      "step": 17993,
      "training_step_time": 0.4230046272277832
    },
    {
      "epoch": 0.00010982666015625,
      "model_forward_time": 0.11527228355407715,
      "step": 17994
    },
    {
      "epoch": 0.00010982666015625,
      "step": 17994,
      "training_step_time": 0.4021177291870117
    },
    {
      "epoch": 0.000109832763671875,
      "model_forward_time": 0.11476469039916992,
      "step": 17995
    },
    {
      "epoch": 0.000109832763671875,
      "step": 17995,
      "training_step_time": 0.3956732749938965
    },
    {
      "epoch": 0.0001098388671875,
      "model_forward_time": 0.1150960922241211,
      "step": 17996
    },
    {
      "epoch": 0.0001098388671875,
      "step": 17996,
      "training_step_time": 0.3854484558105469
    },
    {
      "epoch": 0.000109844970703125,
      "model_forward_time": 0.11537861824035645,
      "step": 17997
    },
    {
      "epoch": 0.000109844970703125,
      "step": 17997,
      "training_step_time": 0.444033145904541
    },
    {
      "epoch": 0.00010985107421875,
      "model_forward_time": 0.11564397811889648,
      "step": 17998
    },
    {
      "epoch": 0.00010985107421875,
      "step": 17998,
      "training_step_time": 0.45640087127685547
    },
    {
      "epoch": 0.000109857177734375,
      "model_forward_time": 0.11501049995422363,
      "step": 17999
    },
    {
      "epoch": 0.000109857177734375,
      "step": 17999,
      "training_step_time": 0.4213557243347168
    },
    {
      "epoch": 0.00010986328125,
      "grad_norm": 0.15351475775241852,
      "learning_rate": 8.386407858128706e-05,
      "loss": 0.0595,
      "step": 18000
    },
    {
      "epoch": 0.00010986328125,
      "model_forward_time": 0.11248564720153809,
      "step": 18000
    },
    {
      "epoch": 0.00010986328125,
      "step": 18000,
      "training_step_time": 0.35366296768188477
    },
    {
      "epoch": 0.000109869384765625,
      "model_forward_time": 0.11229419708251953,
      "step": 18001
    },
    {
      "epoch": 0.000109869384765625,
      "step": 18001,
      "training_step_time": 0.37810730934143066
    },
    {
      "epoch": 0.00010987548828125,
      "model_forward_time": 0.11285710334777832,
      "step": 18002
    },
    {
      "epoch": 0.00010987548828125,
      "step": 18002,
      "training_step_time": 0.4061436653137207
    },
    {
      "epoch": 0.000109881591796875,
      "model_forward_time": 0.11415910720825195,
      "step": 18003
    },
    {
      "epoch": 0.000109881591796875,
      "step": 18003,
      "training_step_time": 0.39588236808776855
    },
    {
      "epoch": 0.0001098876953125,
      "model_forward_time": 0.11367964744567871,
      "step": 18004
    },
    {
      "epoch": 0.0001098876953125,
      "step": 18004,
      "training_step_time": 0.3927311897277832
    },
    {
      "epoch": 0.000109893798828125,
      "model_forward_time": 0.11403775215148926,
      "step": 18005
    },
    {
      "epoch": 0.000109893798828125,
      "step": 18005,
      "training_step_time": 0.38442158699035645
    },
    {
      "epoch": 0.00010989990234375,
      "model_forward_time": 0.11437582969665527,
      "step": 18006
    },
    {
      "epoch": 0.00010989990234375,
      "step": 18006,
      "training_step_time": 0.4197273254394531
    },
    {
      "epoch": 0.000109906005859375,
      "model_forward_time": 0.11452174186706543,
      "step": 18007
    },
    {
      "epoch": 0.000109906005859375,
      "step": 18007,
      "training_step_time": 0.444141149520874
    },
    {
      "epoch": 0.000109912109375,
      "model_forward_time": 0.11514568328857422,
      "step": 18008
    },
    {
      "epoch": 0.000109912109375,
      "step": 18008,
      "training_step_time": 0.43245458602905273
    },
    {
      "epoch": 0.000109918212890625,
      "model_forward_time": 0.11501193046569824,
      "step": 18009
    },
    {
      "epoch": 0.000109918212890625,
      "step": 18009,
      "training_step_time": 0.4145815372467041
    },
    {
      "epoch": 0.00010992431640625,
      "grad_norm": 0.13341355323791504,
      "learning_rate": 8.384379848413304e-05,
      "loss": 0.0561,
      "step": 18010
    },
    {
      "epoch": 0.00010992431640625,
      "model_forward_time": 0.11525106430053711,
      "step": 18010
    },
    {
      "epoch": 0.00010992431640625,
      "step": 18010,
      "training_step_time": 0.37216997146606445
    },
    {
      "epoch": 0.000109930419921875,
      "model_forward_time": 0.11510276794433594,
      "step": 18011
    },
    {
      "epoch": 0.000109930419921875,
      "step": 18011,
      "training_step_time": 0.3923373222351074
    },
    {
      "epoch": 0.0001099365234375,
      "model_forward_time": 0.1152791976928711,
      "step": 18012
    },
    {
      "epoch": 0.0001099365234375,
      "step": 18012,
      "training_step_time": 0.4026334285736084
    },
    {
      "epoch": 0.000109942626953125,
      "model_forward_time": 0.11494970321655273,
      "step": 18013
    },
    {
      "epoch": 0.000109942626953125,
      "step": 18013,
      "training_step_time": 0.3941671848297119
    },
    {
      "epoch": 0.00010994873046875,
      "model_forward_time": 0.11486673355102539,
      "step": 18014
    },
    {
      "epoch": 0.00010994873046875,
      "step": 18014,
      "training_step_time": 0.40503478050231934
    },
    {
      "epoch": 0.000109954833984375,
      "model_forward_time": 0.11512446403503418,
      "step": 18015
    },
    {
      "epoch": 0.000109954833984375,
      "step": 18015,
      "training_step_time": 0.4686543941497803
    },
    {
      "epoch": 0.0001099609375,
      "model_forward_time": 0.1151423454284668,
      "step": 18016
    },
    {
      "epoch": 0.0001099609375,
      "step": 18016,
      "training_step_time": 0.42260098457336426
    },
    {
      "epoch": 0.000109967041015625,
      "model_forward_time": 0.11484169960021973,
      "step": 18017
    },
    {
      "epoch": 0.000109967041015625,
      "step": 18017,
      "training_step_time": 0.4706149101257324
    },
    {
      "epoch": 0.00010997314453125,
      "model_forward_time": 0.11454510688781738,
      "step": 18018
    },
    {
      "epoch": 0.00010997314453125,
      "step": 18018,
      "training_step_time": 0.45119619369506836
    },
    {
      "epoch": 0.000109979248046875,
      "model_forward_time": 0.1161198616027832,
      "step": 18019
    },
    {
      "epoch": 0.000109979248046875,
      "step": 18019,
      "training_step_time": 0.4282979965209961
    },
    {
      "epoch": 0.0001099853515625,
      "grad_norm": 0.19077323377132416,
      "learning_rate": 8.382350810612663e-05,
      "loss": 0.0581,
      "step": 18020
    },
    {
      "epoch": 0.0001099853515625,
      "model_forward_time": 0.11519908905029297,
      "step": 18020
    },
    {
      "epoch": 0.0001099853515625,
      "step": 18020,
      "training_step_time": 0.4580881595611572
    },
    {
      "epoch": 0.000109991455078125,
      "model_forward_time": 0.11484694480895996,
      "step": 18021
    },
    {
      "epoch": 0.000109991455078125,
      "step": 18021,
      "training_step_time": 0.4011847972869873
    },
    {
      "epoch": 0.00010999755859375,
      "model_forward_time": 0.11529421806335449,
      "step": 18022
    },
    {
      "epoch": 0.00010999755859375,
      "step": 18022,
      "training_step_time": 0.3858664035797119
    },
    {
      "epoch": 0.000110003662109375,
      "model_forward_time": 0.11535811424255371,
      "step": 18023
    },
    {
      "epoch": 0.000110003662109375,
      "step": 18023,
      "training_step_time": 0.3845195770263672
    },
    {
      "epoch": 0.000110009765625,
      "model_forward_time": 0.11493659019470215,
      "step": 18024
    },
    {
      "epoch": 0.000110009765625,
      "step": 18024,
      "training_step_time": 0.3879811763763428
    },
    {
      "epoch": 0.000110015869140625,
      "model_forward_time": 0.11577296257019043,
      "step": 18025
    },
    {
      "epoch": 0.000110015869140625,
      "step": 18025,
      "training_step_time": 0.3929171562194824
    },
    {
      "epoch": 0.00011002197265625,
      "model_forward_time": 0.11536788940429688,
      "step": 18026
    },
    {
      "epoch": 0.00011002197265625,
      "step": 18026,
      "training_step_time": 0.3960227966308594
    },
    {
      "epoch": 0.000110028076171875,
      "model_forward_time": 0.11525869369506836,
      "step": 18027
    },
    {
      "epoch": 0.000110028076171875,
      "step": 18027,
      "training_step_time": 0.399658203125
    },
    {
      "epoch": 0.0001100341796875,
      "model_forward_time": 0.1151876449584961,
      "step": 18028
    },
    {
      "epoch": 0.0001100341796875,
      "step": 18028,
      "training_step_time": 0.388380765914917
    },
    {
      "epoch": 0.000110040283203125,
      "model_forward_time": 0.11504101753234863,
      "step": 18029
    },
    {
      "epoch": 0.000110040283203125,
      "step": 18029,
      "training_step_time": 0.3916192054748535
    },
    {
      "epoch": 0.00011004638671875,
      "grad_norm": 0.1379629224538803,
      "learning_rate": 8.380320745343153e-05,
      "loss": 0.0597,
      "step": 18030
    },
    {
      "epoch": 0.00011004638671875,
      "model_forward_time": 0.11512613296508789,
      "step": 18030
    },
    {
      "epoch": 0.00011004638671875,
      "step": 18030,
      "training_step_time": 0.39678215980529785
    },
    {
      "epoch": 0.000110052490234375,
      "model_forward_time": 0.11547350883483887,
      "step": 18031
    },
    {
      "epoch": 0.000110052490234375,
      "step": 18031,
      "training_step_time": 0.3930933475494385
    },
    {
      "epoch": 0.00011005859375,
      "model_forward_time": 0.11514759063720703,
      "step": 18032
    },
    {
      "epoch": 0.00011005859375,
      "step": 18032,
      "training_step_time": 0.4396979808807373
    },
    {
      "epoch": 0.000110064697265625,
      "model_forward_time": 0.11469340324401855,
      "step": 18033
    },
    {
      "epoch": 0.000110064697265625,
      "step": 18033,
      "training_step_time": 0.455092191696167
    },
    {
      "epoch": 0.00011007080078125,
      "model_forward_time": 0.11506199836730957,
      "step": 18034
    },
    {
      "epoch": 0.00011007080078125,
      "step": 18034,
      "training_step_time": 0.3948652744293213
    },
    {
      "epoch": 0.000110076904296875,
      "model_forward_time": 0.11545729637145996,
      "step": 18035
    },
    {
      "epoch": 0.000110076904296875,
      "step": 18035,
      "training_step_time": 0.47423672676086426
    },
    {
      "epoch": 0.0001100830078125,
      "model_forward_time": 0.11567521095275879,
      "step": 18036
    },
    {
      "epoch": 0.0001100830078125,
      "step": 18036,
      "training_step_time": 0.47339296340942383
    },
    {
      "epoch": 0.000110089111328125,
      "model_forward_time": 0.11455416679382324,
      "step": 18037
    },
    {
      "epoch": 0.000110089111328125,
      "step": 18037,
      "training_step_time": 0.47017645835876465
    },
    {
      "epoch": 0.00011009521484375,
      "model_forward_time": 0.11500978469848633,
      "step": 18038
    },
    {
      "epoch": 0.00011009521484375,
      "step": 18038,
      "training_step_time": 0.4359903335571289
    },
    {
      "epoch": 0.000110101318359375,
      "model_forward_time": 0.11403131484985352,
      "step": 18039
    },
    {
      "epoch": 0.000110101318359375,
      "step": 18039,
      "training_step_time": 0.49785375595092773
    },
    {
      "epoch": 0.000110107421875,
      "grad_norm": 0.10984742641448975,
      "learning_rate": 8.378289653221452e-05,
      "loss": 0.0533,
      "step": 18040
    },
    {
      "epoch": 0.000110107421875,
      "model_forward_time": 0.11431074142456055,
      "step": 18040
    },
    {
      "epoch": 0.000110107421875,
      "step": 18040,
      "training_step_time": 0.3906538486480713
    },
    {
      "epoch": 0.000110113525390625,
      "model_forward_time": 0.11515593528747559,
      "step": 18041
    },
    {
      "epoch": 0.000110113525390625,
      "step": 18041,
      "training_step_time": 0.38945984840393066
    },
    {
      "epoch": 0.00011011962890625,
      "model_forward_time": 0.11540985107421875,
      "step": 18042
    },
    {
      "epoch": 0.00011011962890625,
      "step": 18042,
      "training_step_time": 0.38880372047424316
    },
    {
      "epoch": 0.000110125732421875,
      "model_forward_time": 0.11473655700683594,
      "step": 18043
    },
    {
      "epoch": 0.000110125732421875,
      "step": 18043,
      "training_step_time": 0.3972890377044678
    },
    {
      "epoch": 0.0001101318359375,
      "model_forward_time": 0.11494040489196777,
      "step": 18044
    },
    {
      "epoch": 0.0001101318359375,
      "step": 18044,
      "training_step_time": 0.41926074028015137
    },
    {
      "epoch": 0.000110137939453125,
      "model_forward_time": 0.114532470703125,
      "step": 18045
    },
    {
      "epoch": 0.000110137939453125,
      "step": 18045,
      "training_step_time": 0.8647983074188232
    },
    {
      "epoch": 0.00011014404296875,
      "model_forward_time": 0.11482882499694824,
      "step": 18046
    },
    {
      "epoch": 0.00011014404296875,
      "step": 18046,
      "training_step_time": 0.3975522518157959
    },
    {
      "epoch": 0.000110150146484375,
      "model_forward_time": 0.11441540718078613,
      "step": 18047
    },
    {
      "epoch": 0.000110150146484375,
      "step": 18047,
      "training_step_time": 0.5926303863525391
    },
    {
      "epoch": 0.00011015625,
      "model_forward_time": 0.11388564109802246,
      "step": 18048
    },
    {
      "epoch": 0.00011015625,
      "step": 18048,
      "training_step_time": 0.42193126678466797
    },
    {
      "epoch": 0.000110162353515625,
      "model_forward_time": 0.11423206329345703,
      "step": 18049
    },
    {
      "epoch": 0.000110162353515625,
      "step": 18049,
      "training_step_time": 0.3858675956726074
    },
    {
      "epoch": 0.00011016845703125,
      "grad_norm": 0.15928800404071808,
      "learning_rate": 8.376257534864553e-05,
      "loss": 0.0569,
      "step": 18050
    },
    {
      "epoch": 0.00011016845703125,
      "model_forward_time": 0.11421084403991699,
      "step": 18050
    },
    {
      "epoch": 0.00011016845703125,
      "step": 18050,
      "training_step_time": 0.44414472579956055
    },
    {
      "epoch": 0.000110174560546875,
      "model_forward_time": 0.11475825309753418,
      "step": 18051
    },
    {
      "epoch": 0.000110174560546875,
      "step": 18051,
      "training_step_time": 0.4977693557739258
    },
    {
      "epoch": 0.0001101806640625,
      "model_forward_time": 0.1151881217956543,
      "step": 18052
    },
    {
      "epoch": 0.0001101806640625,
      "step": 18052,
      "training_step_time": 0.3878898620605469
    },
    {
      "epoch": 0.000110186767578125,
      "model_forward_time": 0.1144707202911377,
      "step": 18053
    },
    {
      "epoch": 0.000110186767578125,
      "step": 18053,
      "training_step_time": 0.38591694831848145
    },
    {
      "epoch": 0.00011019287109375,
      "model_forward_time": 0.11512589454650879,
      "step": 18054
    },
    {
      "epoch": 0.00011019287109375,
      "step": 18054,
      "training_step_time": 0.3926656246185303
    },
    {
      "epoch": 0.000110198974609375,
      "model_forward_time": 0.11533498764038086,
      "step": 18055
    },
    {
      "epoch": 0.000110198974609375,
      "step": 18055,
      "training_step_time": 0.39522576332092285
    },
    {
      "epoch": 0.000110205078125,
      "model_forward_time": 0.11496424674987793,
      "step": 18056
    },
    {
      "epoch": 0.000110205078125,
      "step": 18056,
      "training_step_time": 0.38992953300476074
    },
    {
      "epoch": 0.000110211181640625,
      "model_forward_time": 0.1150364875793457,
      "step": 18057
    },
    {
      "epoch": 0.000110211181640625,
      "step": 18057,
      "training_step_time": 0.8005683422088623
    },
    {
      "epoch": 0.00011021728515625,
      "model_forward_time": 0.11455464363098145,
      "step": 18058
    },
    {
      "epoch": 0.00011021728515625,
      "step": 18058,
      "training_step_time": 0.5014083385467529
    },
    {
      "epoch": 0.000110223388671875,
      "model_forward_time": 0.11433076858520508,
      "step": 18059
    },
    {
      "epoch": 0.000110223388671875,
      "step": 18059,
      "training_step_time": 0.3614799976348877
    },
    {
      "epoch": 0.0001102294921875,
      "grad_norm": 0.15120236575603485,
      "learning_rate": 8.37422439088976e-05,
      "loss": 0.0541,
      "step": 18060
    },
    {
      "epoch": 0.0001102294921875,
      "model_forward_time": 0.11426520347595215,
      "step": 18060
    },
    {
      "epoch": 0.0001102294921875,
      "step": 18060,
      "training_step_time": 0.4623265266418457
    },
    {
      "epoch": 0.000110235595703125,
      "model_forward_time": 0.11420536041259766,
      "step": 18061
    },
    {
      "epoch": 0.000110235595703125,
      "step": 18061,
      "training_step_time": 0.41640496253967285
    },
    {
      "epoch": 0.00011024169921875,
      "model_forward_time": 0.11362242698669434,
      "step": 18062
    },
    {
      "epoch": 0.00011024169921875,
      "step": 18062,
      "training_step_time": 0.37729907035827637
    },
    {
      "epoch": 0.000110247802734375,
      "model_forward_time": 0.11466169357299805,
      "step": 18063
    },
    {
      "epoch": 0.000110247802734375,
      "step": 18063,
      "training_step_time": 0.4979074001312256
    },
    {
      "epoch": 0.00011025390625,
      "model_forward_time": 0.11494684219360352,
      "step": 18064
    },
    {
      "epoch": 0.00011025390625,
      "step": 18064,
      "training_step_time": 0.4256443977355957
    },
    {
      "epoch": 0.000110260009765625,
      "model_forward_time": 0.11490035057067871,
      "step": 18065
    },
    {
      "epoch": 0.000110260009765625,
      "step": 18065,
      "training_step_time": 0.4795520305633545
    },
    {
      "epoch": 0.00011026611328125,
      "model_forward_time": 0.11490178108215332,
      "step": 18066
    },
    {
      "epoch": 0.00011026611328125,
      "step": 18066,
      "training_step_time": 0.3920249938964844
    },
    {
      "epoch": 0.000110272216796875,
      "model_forward_time": 0.11474895477294922,
      "step": 18067
    },
    {
      "epoch": 0.000110272216796875,
      "step": 18067,
      "training_step_time": 0.3876214027404785
    },
    {
      "epoch": 0.0001102783203125,
      "model_forward_time": 0.11485481262207031,
      "step": 18068
    },
    {
      "epoch": 0.0001102783203125,
      "step": 18068,
      "training_step_time": 0.40570735931396484
    },
    {
      "epoch": 0.000110284423828125,
      "model_forward_time": 0.11505627632141113,
      "step": 18069
    },
    {
      "epoch": 0.000110284423828125,
      "step": 18069,
      "training_step_time": 0.6943960189819336
    },
    {
      "epoch": 0.00011029052734375,
      "grad_norm": 0.21124710142612457,
      "learning_rate": 8.372190221914689e-05,
      "loss": 0.0535,
      "step": 18070
    },
    {
      "epoch": 0.00011029052734375,
      "model_forward_time": 0.11486148834228516,
      "step": 18070
    },
    {
      "epoch": 0.00011029052734375,
      "step": 18070,
      "training_step_time": 0.4039490222930908
    },
    {
      "epoch": 0.000110296630859375,
      "model_forward_time": 0.1147615909576416,
      "step": 18071
    },
    {
      "epoch": 0.000110296630859375,
      "step": 18071,
      "training_step_time": 0.38231635093688965
    },
    {
      "epoch": 0.000110302734375,
      "model_forward_time": 0.11444783210754395,
      "step": 18072
    },
    {
      "epoch": 0.000110302734375,
      "step": 18072,
      "training_step_time": 0.4946773052215576
    },
    {
      "epoch": 0.000110308837890625,
      "model_forward_time": 0.11406993865966797,
      "step": 18073
    },
    {
      "epoch": 0.000110308837890625,
      "step": 18073,
      "training_step_time": 0.4070467948913574
    },
    {
      "epoch": 0.00011031494140625,
      "model_forward_time": 0.11446452140808105,
      "step": 18074
    },
    {
      "epoch": 0.00011031494140625,
      "step": 18074,
      "training_step_time": 0.5063340663909912
    },
    {
      "epoch": 0.000110321044921875,
      "model_forward_time": 0.1146078109741211,
      "step": 18075
    },
    {
      "epoch": 0.000110321044921875,
      "step": 18075,
      "training_step_time": 0.5389447212219238
    },
    {
      "epoch": 0.0001103271484375,
      "model_forward_time": 0.11423707008361816,
      "step": 18076
    },
    {
      "epoch": 0.0001103271484375,
      "step": 18076,
      "training_step_time": 0.4168062210083008
    },
    {
      "epoch": 0.000110333251953125,
      "model_forward_time": 0.11399102210998535,
      "step": 18077
    },
    {
      "epoch": 0.000110333251953125,
      "step": 18077,
      "training_step_time": 0.37787294387817383
    },
    {
      "epoch": 0.00011033935546875,
      "model_forward_time": 0.11580944061279297,
      "step": 18078
    },
    {
      "epoch": 0.00011033935546875,
      "step": 18078,
      "training_step_time": 0.43042826652526855
    },
    {
      "epoch": 0.000110345458984375,
      "model_forward_time": 0.11522126197814941,
      "step": 18079
    },
    {
      "epoch": 0.000110345458984375,
      "step": 18079,
      "training_step_time": 0.3932616710662842
    },
    {
      "epoch": 0.0001103515625,
      "grad_norm": 0.24076420068740845,
      "learning_rate": 8.370155028557265e-05,
      "loss": 0.0559,
      "step": 18080
    },
    {
      "epoch": 0.0001103515625,
      "model_forward_time": 0.11460208892822266,
      "step": 18080
    },
    {
      "epoch": 0.0001103515625,
      "step": 18080,
      "training_step_time": 0.4089033603668213
    },
    {
      "epoch": 0.000110357666015625,
      "model_forward_time": 0.11459660530090332,
      "step": 18081
    },
    {
      "epoch": 0.000110357666015625,
      "step": 18081,
      "training_step_time": 0.6740086078643799
    },
    {
      "epoch": 0.00011036376953125,
      "model_forward_time": 0.1156148910522461,
      "step": 18082
    },
    {
      "epoch": 0.00011036376953125,
      "step": 18082,
      "training_step_time": 0.37925124168395996
    },
    {
      "epoch": 0.000110369873046875,
      "model_forward_time": 0.11428642272949219,
      "step": 18083
    },
    {
      "epoch": 0.000110369873046875,
      "step": 18083,
      "training_step_time": 0.3900935649871826
    },
    {
      "epoch": 0.0001103759765625,
      "model_forward_time": 0.11517691612243652,
      "step": 18084
    },
    {
      "epoch": 0.0001103759765625,
      "step": 18084,
      "training_step_time": 0.41849470138549805
    },
    {
      "epoch": 0.000110382080078125,
      "model_forward_time": 0.11434435844421387,
      "step": 18085
    },
    {
      "epoch": 0.000110382080078125,
      "step": 18085,
      "training_step_time": 0.44665980339050293
    },
    {
      "epoch": 0.00011038818359375,
      "model_forward_time": 0.1142115592956543,
      "step": 18086
    },
    {
      "epoch": 0.00011038818359375,
      "step": 18086,
      "training_step_time": 0.40534472465515137
    },
    {
      "epoch": 0.000110394287109375,
      "model_forward_time": 0.11489629745483398,
      "step": 18087
    },
    {
      "epoch": 0.000110394287109375,
      "step": 18087,
      "training_step_time": 0.6942558288574219
    },
    {
      "epoch": 0.000110400390625,
      "model_forward_time": 0.1143491268157959,
      "step": 18088
    },
    {
      "epoch": 0.000110400390625,
      "step": 18088,
      "training_step_time": 0.4573178291320801
    },
    {
      "epoch": 0.000110406494140625,
      "model_forward_time": 0.11480283737182617,
      "step": 18089
    },
    {
      "epoch": 0.000110406494140625,
      "step": 18089,
      "training_step_time": 0.3932223320007324
    },
    {
      "epoch": 0.00011041259765625,
      "grad_norm": 0.16161483526229858,
      "learning_rate": 8.368118811435726e-05,
      "loss": 0.051,
      "step": 18090
    },
    {
      "epoch": 0.00011041259765625,
      "model_forward_time": 0.11466693878173828,
      "step": 18090
    },
    {
      "epoch": 0.00011041259765625,
      "step": 18090,
      "training_step_time": 0.4004335403442383
    },
    {
      "epoch": 0.000110418701171875,
      "model_forward_time": 0.11422061920166016,
      "step": 18091
    },
    {
      "epoch": 0.000110418701171875,
      "step": 18091,
      "training_step_time": 0.38925647735595703
    },
    {
      "epoch": 0.0001104248046875,
      "model_forward_time": 0.1154184341430664,
      "step": 18092
    },
    {
      "epoch": 0.0001104248046875,
      "step": 18092,
      "training_step_time": 0.40426182746887207
    },
    {
      "epoch": 0.000110430908203125,
      "model_forward_time": 0.1149892807006836,
      "step": 18093
    },
    {
      "epoch": 0.000110430908203125,
      "step": 18093,
      "training_step_time": 0.5719764232635498
    },
    {
      "epoch": 0.00011043701171875,
      "model_forward_time": 0.11537885665893555,
      "step": 18094
    },
    {
      "epoch": 0.00011043701171875,
      "step": 18094,
      "training_step_time": 0.38512706756591797
    },
    {
      "epoch": 0.000110443115234375,
      "model_forward_time": 0.11503171920776367,
      "step": 18095
    },
    {
      "epoch": 0.000110443115234375,
      "step": 18095,
      "training_step_time": 0.38611721992492676
    },
    {
      "epoch": 0.00011044921875,
      "model_forward_time": 0.11517548561096191,
      "step": 18096
    },
    {
      "epoch": 0.00011044921875,
      "step": 18096,
      "training_step_time": 0.38542723655700684
    },
    {
      "epoch": 0.000110455322265625,
      "model_forward_time": 0.11533594131469727,
      "step": 18097
    },
    {
      "epoch": 0.000110455322265625,
      "step": 18097,
      "training_step_time": 0.40234994888305664
    },
    {
      "epoch": 0.00011046142578125,
      "model_forward_time": 0.114654541015625,
      "step": 18098
    },
    {
      "epoch": 0.00011046142578125,
      "step": 18098,
      "training_step_time": 0.43212294578552246
    },
    {
      "epoch": 0.000110467529296875,
      "model_forward_time": 0.11494946479797363,
      "step": 18099
    },
    {
      "epoch": 0.000110467529296875,
      "step": 18099,
      "training_step_time": 0.7084658145904541
    },
    {
      "epoch": 0.0001104736328125,
      "grad_norm": 0.18848444521427155,
      "learning_rate": 8.366081571168625e-05,
      "loss": 0.0533,
      "step": 18100
    },
    {
      "epoch": 0.0001104736328125,
      "model_forward_time": 0.11544537544250488,
      "step": 18100
    },
    {
      "epoch": 0.0001104736328125,
      "step": 18100,
      "training_step_time": 0.36443066596984863
    },
    {
      "epoch": 0.000110479736328125,
      "model_forward_time": 0.11491799354553223,
      "step": 18101
    },
    {
      "epoch": 0.000110479736328125,
      "step": 18101,
      "training_step_time": 0.462982177734375
    },
    {
      "epoch": 0.00011048583984375,
      "model_forward_time": 0.11461353302001953,
      "step": 18102
    },
    {
      "epoch": 0.00011048583984375,
      "step": 18102,
      "training_step_time": 0.40384507179260254
    },
    {
      "epoch": 0.000110491943359375,
      "model_forward_time": 0.11521649360656738,
      "step": 18103
    },
    {
      "epoch": 0.000110491943359375,
      "step": 18103,
      "training_step_time": 0.3867976665496826
    },
    {
      "epoch": 0.000110498046875,
      "model_forward_time": 0.1137092113494873,
      "step": 18104
    },
    {
      "epoch": 0.000110498046875,
      "step": 18104,
      "training_step_time": 0.45525622367858887
    },
    {
      "epoch": 0.000110504150390625,
      "model_forward_time": 0.11475801467895508,
      "step": 18105
    },
    {
      "epoch": 0.000110504150390625,
      "step": 18105,
      "training_step_time": 0.5413060188293457
    },
    {
      "epoch": 0.00011051025390625,
      "model_forward_time": 0.11484670639038086,
      "step": 18106
    },
    {
      "epoch": 0.00011051025390625,
      "step": 18106,
      "training_step_time": 0.40206360816955566
    },
    {
      "epoch": 0.000110516357421875,
      "model_forward_time": 0.11450743675231934,
      "step": 18107
    },
    {
      "epoch": 0.000110516357421875,
      "step": 18107,
      "training_step_time": 0.3801710605621338
    },
    {
      "epoch": 0.0001105224609375,
      "model_forward_time": 0.11491823196411133,
      "step": 18108
    },
    {
      "epoch": 0.0001105224609375,
      "step": 18108,
      "training_step_time": 0.39798951148986816
    },
    {
      "epoch": 0.000110528564453125,
      "model_forward_time": 0.1148977279663086,
      "step": 18109
    },
    {
      "epoch": 0.000110528564453125,
      "step": 18109,
      "training_step_time": 0.3897714614868164
    },
    {
      "epoch": 0.00011053466796875,
      "grad_norm": 0.12961445748806,
      "learning_rate": 8.364043308374816e-05,
      "loss": 0.059,
      "step": 18110
    },
    {
      "epoch": 0.00011053466796875,
      "model_forward_time": 0.11624932289123535,
      "step": 18110
    },
    {
      "epoch": 0.00011053466796875,
      "step": 18110,
      "training_step_time": 0.455031156539917
    },
    {
      "epoch": 0.000110540771484375,
      "model_forward_time": 0.11486959457397461,
      "step": 18111
    },
    {
      "epoch": 0.000110540771484375,
      "step": 18111,
      "training_step_time": 0.6754252910614014
    },
    {
      "epoch": 0.000110546875,
      "model_forward_time": 0.11458635330200195,
      "step": 18112
    },
    {
      "epoch": 0.000110546875,
      "step": 18112,
      "training_step_time": 0.4199552536010742
    },
    {
      "epoch": 0.000110552978515625,
      "model_forward_time": 0.11429572105407715,
      "step": 18113
    },
    {
      "epoch": 0.000110552978515625,
      "step": 18113,
      "training_step_time": 0.48322129249572754
    },
    {
      "epoch": 0.00011055908203125,
      "model_forward_time": 0.11490535736083984,
      "step": 18114
    },
    {
      "epoch": 0.00011055908203125,
      "step": 18114,
      "training_step_time": 0.4452781677246094
    },
    {
      "epoch": 0.000110565185546875,
      "model_forward_time": 0.11504817008972168,
      "step": 18115
    },
    {
      "epoch": 0.000110565185546875,
      "step": 18115,
      "training_step_time": 0.4890766143798828
    },
    {
      "epoch": 0.0001105712890625,
      "model_forward_time": 0.11567211151123047,
      "step": 18116
    },
    {
      "epoch": 0.0001105712890625,
      "step": 18116,
      "training_step_time": 0.4064760208129883
    },
    {
      "epoch": 0.000110577392578125,
      "model_forward_time": 0.11508893966674805,
      "step": 18117
    },
    {
      "epoch": 0.000110577392578125,
      "step": 18117,
      "training_step_time": 0.42948031425476074
    },
    {
      "epoch": 0.00011058349609375,
      "model_forward_time": 0.11551451683044434,
      "step": 18118
    },
    {
      "epoch": 0.00011058349609375,
      "step": 18118,
      "training_step_time": 0.4786539077758789
    },
    {
      "epoch": 0.000110589599609375,
      "model_forward_time": 0.11477017402648926,
      "step": 18119
    },
    {
      "epoch": 0.000110589599609375,
      "step": 18119,
      "training_step_time": 0.42105603218078613
    },
    {
      "epoch": 0.000110595703125,
      "grad_norm": 0.15178252756595612,
      "learning_rate": 8.362004023673474e-05,
      "loss": 0.056,
      "step": 18120
    },
    {
      "epoch": 0.000110595703125,
      "model_forward_time": 0.1144716739654541,
      "step": 18120
    },
    {
      "epoch": 0.000110595703125,
      "step": 18120,
      "training_step_time": 0.3911867141723633
    },
    {
      "epoch": 0.000110601806640625,
      "model_forward_time": 0.11518168449401855,
      "step": 18121
    },
    {
      "epoch": 0.000110601806640625,
      "step": 18121,
      "training_step_time": 0.390028715133667
    },
    {
      "epoch": 0.00011060791015625,
      "model_forward_time": 0.11525487899780273,
      "step": 18122
    },
    {
      "epoch": 0.00011060791015625,
      "step": 18122,
      "training_step_time": 0.38550758361816406
    },
    {
      "epoch": 0.000110614013671875,
      "model_forward_time": 0.11523556709289551,
      "step": 18123
    },
    {
      "epoch": 0.000110614013671875,
      "step": 18123,
      "training_step_time": 0.43770623207092285
    },
    {
      "epoch": 0.0001106201171875,
      "model_forward_time": 0.11496472358703613,
      "step": 18124
    },
    {
      "epoch": 0.0001106201171875,
      "step": 18124,
      "training_step_time": 0.43332815170288086
    },
    {
      "epoch": 0.000110626220703125,
      "model_forward_time": 0.11580586433410645,
      "step": 18125
    },
    {
      "epoch": 0.000110626220703125,
      "step": 18125,
      "training_step_time": 0.48914647102355957
    },
    {
      "epoch": 0.00011063232421875,
      "model_forward_time": 0.11474990844726562,
      "step": 18126
    },
    {
      "epoch": 0.00011063232421875,
      "step": 18126,
      "training_step_time": 0.44414663314819336
    },
    {
      "epoch": 0.000110638427734375,
      "model_forward_time": 0.11563825607299805,
      "step": 18127
    },
    {
      "epoch": 0.000110638427734375,
      "step": 18127,
      "training_step_time": 0.4810795783996582
    },
    {
      "epoch": 0.00011064453125,
      "model_forward_time": 0.11438202857971191,
      "step": 18128
    },
    {
      "epoch": 0.00011064453125,
      "step": 18128,
      "training_step_time": 0.45851588249206543
    },
    {
      "epoch": 0.000110650634765625,
      "model_forward_time": 0.11487960815429688,
      "step": 18129
    },
    {
      "epoch": 0.000110650634765625,
      "step": 18129,
      "training_step_time": 0.48990464210510254
    },
    {
      "epoch": 0.00011065673828125,
      "grad_norm": 0.1764887571334839,
      "learning_rate": 8.359963717684077e-05,
      "loss": 0.0487,
      "step": 18130
    },
    {
      "epoch": 0.00011065673828125,
      "model_forward_time": 0.11507725715637207,
      "step": 18130
    },
    {
      "epoch": 0.00011065673828125,
      "step": 18130,
      "training_step_time": 0.4067723751068115
    },
    {
      "epoch": 0.000110662841796875,
      "model_forward_time": 0.11421537399291992,
      "step": 18131
    },
    {
      "epoch": 0.000110662841796875,
      "step": 18131,
      "training_step_time": 0.41892147064208984
    },
    {
      "epoch": 0.0001106689453125,
      "model_forward_time": 0.11513543128967285,
      "step": 18132
    },
    {
      "epoch": 0.0001106689453125,
      "step": 18132,
      "training_step_time": 0.3948671817779541
    },
    {
      "epoch": 0.000110675048828125,
      "model_forward_time": 0.11502385139465332,
      "step": 18133
    },
    {
      "epoch": 0.000110675048828125,
      "step": 18133,
      "training_step_time": 0.41843175888061523
    },
    {
      "epoch": 0.00011068115234375,
      "model_forward_time": 0.11493945121765137,
      "step": 18134
    },
    {
      "epoch": 0.00011068115234375,
      "step": 18134,
      "training_step_time": 0.3840298652648926
    },
    {
      "epoch": 0.000110687255859375,
      "model_forward_time": 0.11487174034118652,
      "step": 18135
    },
    {
      "epoch": 0.000110687255859375,
      "step": 18135,
      "training_step_time": 0.39195919036865234
    },
    {
      "epoch": 0.000110693359375,
      "model_forward_time": 0.1150674819946289,
      "step": 18136
    },
    {
      "epoch": 0.000110693359375,
      "step": 18136,
      "training_step_time": 0.39598989486694336
    },
    {
      "epoch": 0.000110699462890625,
      "model_forward_time": 0.11594915390014648,
      "step": 18137
    },
    {
      "epoch": 0.000110699462890625,
      "step": 18137,
      "training_step_time": 0.3992924690246582
    },
    {
      "epoch": 0.00011070556640625,
      "model_forward_time": 0.1153559684753418,
      "step": 18138
    },
    {
      "epoch": 0.00011070556640625,
      "step": 18138,
      "training_step_time": 0.3927724361419678
    },
    {
      "epoch": 0.000110711669921875,
      "model_forward_time": 0.1153409481048584,
      "step": 18139
    },
    {
      "epoch": 0.000110711669921875,
      "step": 18139,
      "training_step_time": 0.38766956329345703
    },
    {
      "epoch": 0.0001107177734375,
      "grad_norm": 0.16534683108329773,
      "learning_rate": 8.357922391026418e-05,
      "loss": 0.0598,
      "step": 18140
    },
    {
      "epoch": 0.0001107177734375,
      "model_forward_time": 0.11534380912780762,
      "step": 18140
    },
    {
      "epoch": 0.0001107177734375,
      "step": 18140,
      "training_step_time": 0.3999500274658203
    },
    {
      "epoch": 0.000110723876953125,
      "model_forward_time": 0.1158602237701416,
      "step": 18141
    },
    {
      "epoch": 0.000110723876953125,
      "step": 18141,
      "training_step_time": 0.39547157287597656
    },
    {
      "epoch": 0.00011072998046875,
      "model_forward_time": 0.11516809463500977,
      "step": 18142
    },
    {
      "epoch": 0.00011072998046875,
      "step": 18142,
      "training_step_time": 0.4072093963623047
    },
    {
      "epoch": 0.000110736083984375,
      "model_forward_time": 0.1153116226196289,
      "step": 18143
    },
    {
      "epoch": 0.000110736083984375,
      "step": 18143,
      "training_step_time": 0.4678189754486084
    },
    {
      "epoch": 0.0001107421875,
      "model_forward_time": 0.11526250839233398,
      "step": 18144
    },
    {
      "epoch": 0.0001107421875,
      "step": 18144,
      "training_step_time": 0.5015716552734375
    },
    {
      "epoch": 0.000110748291015625,
      "model_forward_time": 0.11489462852478027,
      "step": 18145
    },
    {
      "epoch": 0.000110748291015625,
      "step": 18145,
      "training_step_time": 0.45366764068603516
    },
    {
      "epoch": 0.00011075439453125,
      "model_forward_time": 0.11538290977478027,
      "step": 18146
    },
    {
      "epoch": 0.00011075439453125,
      "step": 18146,
      "training_step_time": 0.4568195343017578
    },
    {
      "epoch": 0.000110760498046875,
      "model_forward_time": 0.11483478546142578,
      "step": 18147
    },
    {
      "epoch": 0.000110760498046875,
      "step": 18147,
      "training_step_time": 0.43192291259765625
    },
    {
      "epoch": 0.0001107666015625,
      "model_forward_time": 0.11590409278869629,
      "step": 18148
    },
    {
      "epoch": 0.0001107666015625,
      "step": 18148,
      "training_step_time": 0.4187753200531006
    },
    {
      "epoch": 0.000110772705078125,
      "model_forward_time": 0.11523604393005371,
      "step": 18149
    },
    {
      "epoch": 0.000110772705078125,
      "step": 18149,
      "training_step_time": 0.41318464279174805
    },
    {
      "epoch": 0.00011077880859375,
      "grad_norm": 0.12809507548809052,
      "learning_rate": 8.355880044320598e-05,
      "loss": 0.0492,
      "step": 18150
    },
    {
      "epoch": 0.00011077880859375,
      "model_forward_time": 0.11621952056884766,
      "step": 18150
    },
    {
      "epoch": 0.00011077880859375,
      "step": 18150,
      "training_step_time": 0.3865070343017578
    },
    {
      "epoch": 0.000110784912109375,
      "model_forward_time": 0.11495351791381836,
      "step": 18151
    },
    {
      "epoch": 0.000110784912109375,
      "step": 18151,
      "training_step_time": 0.4221060276031494
    },
    {
      "epoch": 0.000110791015625,
      "model_forward_time": 0.11605668067932129,
      "step": 18152
    },
    {
      "epoch": 0.000110791015625,
      "step": 18152,
      "training_step_time": 0.3864579200744629
    },
    {
      "epoch": 0.000110797119140625,
      "model_forward_time": 0.11513829231262207,
      "step": 18153
    },
    {
      "epoch": 0.000110797119140625,
      "step": 18153,
      "training_step_time": 0.38510870933532715
    },
    {
      "epoch": 0.00011080322265625,
      "model_forward_time": 0.11489391326904297,
      "step": 18154
    },
    {
      "epoch": 0.00011080322265625,
      "step": 18154,
      "training_step_time": 0.4844496250152588
    },
    {
      "epoch": 0.000110809326171875,
      "model_forward_time": 0.11435270309448242,
      "step": 18155
    },
    {
      "epoch": 0.000110809326171875,
      "step": 18155,
      "training_step_time": 0.40718841552734375
    },
    {
      "epoch": 0.0001108154296875,
      "model_forward_time": 0.11486554145812988,
      "step": 18156
    },
    {
      "epoch": 0.0001108154296875,
      "step": 18156,
      "training_step_time": 0.48303771018981934
    },
    {
      "epoch": 0.000110821533203125,
      "model_forward_time": 0.11581158638000488,
      "step": 18157
    },
    {
      "epoch": 0.000110821533203125,
      "step": 18157,
      "training_step_time": 0.3695411682128906
    },
    {
      "epoch": 0.00011082763671875,
      "model_forward_time": 0.11645627021789551,
      "step": 18158
    },
    {
      "epoch": 0.00011082763671875,
      "step": 18158,
      "training_step_time": 0.44059085845947266
    },
    {
      "epoch": 0.000110833740234375,
      "model_forward_time": 0.11615896224975586,
      "step": 18159
    },
    {
      "epoch": 0.000110833740234375,
      "step": 18159,
      "training_step_time": 0.4004940986633301
    },
    {
      "epoch": 0.00011083984375,
      "grad_norm": 0.14404752850532532,
      "learning_rate": 8.353836678187027e-05,
      "loss": 0.0573,
      "step": 18160
    },
    {
      "epoch": 0.00011083984375,
      "model_forward_time": 0.11571741104125977,
      "step": 18160
    },
    {
      "epoch": 0.00011083984375,
      "step": 18160,
      "training_step_time": 0.3967154026031494
    },
    {
      "epoch": 0.000110845947265625,
      "model_forward_time": 0.11465716361999512,
      "step": 18161
    },
    {
      "epoch": 0.000110845947265625,
      "step": 18161,
      "training_step_time": 0.39383578300476074
    },
    {
      "epoch": 0.00011085205078125,
      "model_forward_time": 0.1164693832397461,
      "step": 18162
    },
    {
      "epoch": 0.00011085205078125,
      "step": 18162,
      "training_step_time": 0.44061994552612305
    },
    {
      "epoch": 0.000110858154296875,
      "model_forward_time": 0.11471891403198242,
      "step": 18163
    },
    {
      "epoch": 0.000110858154296875,
      "step": 18163,
      "training_step_time": 0.40781378746032715
    },
    {
      "epoch": 0.0001108642578125,
      "model_forward_time": 0.11513590812683105,
      "step": 18164
    },
    {
      "epoch": 0.0001108642578125,
      "step": 18164,
      "training_step_time": 0.3943948745727539
    },
    {
      "epoch": 0.000110870361328125,
      "model_forward_time": 0.11544132232666016,
      "step": 18165
    },
    {
      "epoch": 0.000110870361328125,
      "step": 18165,
      "training_step_time": 0.3846418857574463
    },
    {
      "epoch": 0.00011087646484375,
      "model_forward_time": 0.11465334892272949,
      "step": 18166
    },
    {
      "epoch": 0.00011087646484375,
      "step": 18166,
      "training_step_time": 0.39290499687194824
    },
    {
      "epoch": 0.000110882568359375,
      "model_forward_time": 0.1146852970123291,
      "step": 18167
    },
    {
      "epoch": 0.000110882568359375,
      "step": 18167,
      "training_step_time": 0.3962595462799072
    },
    {
      "epoch": 0.000110888671875,
      "model_forward_time": 0.11558413505554199,
      "step": 18168
    },
    {
      "epoch": 0.000110888671875,
      "step": 18168,
      "training_step_time": 0.3992881774902344
    },
    {
      "epoch": 0.000110894775390625,
      "model_forward_time": 0.11548638343811035,
      "step": 18169
    },
    {
      "epoch": 0.000110894775390625,
      "step": 18169,
      "training_step_time": 0.4182772636413574
    },
    {
      "epoch": 0.00011090087890625,
      "grad_norm": 0.14022278785705566,
      "learning_rate": 8.351792293246427e-05,
      "loss": 0.0483,
      "step": 18170
    },
    {
      "epoch": 0.00011090087890625,
      "model_forward_time": 0.1157691478729248,
      "step": 18170
    },
    {
      "epoch": 0.00011090087890625,
      "step": 18170,
      "training_step_time": 0.398362398147583
    },
    {
      "epoch": 0.000110906982421875,
      "model_forward_time": 0.11492919921875,
      "step": 18171
    },
    {
      "epoch": 0.000110906982421875,
      "step": 18171,
      "training_step_time": 0.5490052700042725
    },
    {
      "epoch": 0.0001109130859375,
      "model_forward_time": 0.11474490165710449,
      "step": 18172
    },
    {
      "epoch": 0.0001109130859375,
      "step": 18172,
      "training_step_time": 0.4601421356201172
    },
    {
      "epoch": 0.000110919189453125,
      "model_forward_time": 0.11481738090515137,
      "step": 18173
    },
    {
      "epoch": 0.000110919189453125,
      "step": 18173,
      "training_step_time": 0.3978900909423828
    },
    {
      "epoch": 0.00011092529296875,
      "model_forward_time": 0.11530709266662598,
      "step": 18174
    },
    {
      "epoch": 0.00011092529296875,
      "step": 18174,
      "training_step_time": 0.45300745964050293
    },
    {
      "epoch": 0.000110931396484375,
      "model_forward_time": 0.11495661735534668,
      "step": 18175
    },
    {
      "epoch": 0.000110931396484375,
      "step": 18175,
      "training_step_time": 0.41272854804992676
    },
    {
      "epoch": 0.0001109375,
      "model_forward_time": 0.11522412300109863,
      "step": 18176
    },
    {
      "epoch": 0.0001109375,
      "step": 18176,
      "training_step_time": 0.3839609622955322
    },
    {
      "epoch": 0.000110943603515625,
      "model_forward_time": 0.11548495292663574,
      "step": 18177
    },
    {
      "epoch": 0.000110943603515625,
      "step": 18177,
      "training_step_time": 0.3978404998779297
    },
    {
      "epoch": 0.00011094970703125,
      "model_forward_time": 0.11490726470947266,
      "step": 18178
    },
    {
      "epoch": 0.00011094970703125,
      "step": 18178,
      "training_step_time": 0.3871419429779053
    },
    {
      "epoch": 0.000110955810546875,
      "model_forward_time": 0.11506032943725586,
      "step": 18179
    },
    {
      "epoch": 0.000110955810546875,
      "step": 18179,
      "training_step_time": 0.4117751121520996
    },
    {
      "epoch": 0.0001109619140625,
      "grad_norm": 0.1247142031788826,
      "learning_rate": 8.349746890119826e-05,
      "loss": 0.0544,
      "step": 18180
    },
    {
      "epoch": 0.0001109619140625,
      "model_forward_time": 0.11502742767333984,
      "step": 18180
    },
    {
      "epoch": 0.0001109619140625,
      "step": 18180,
      "training_step_time": 0.3955092430114746
    },
    {
      "epoch": 0.000110968017578125,
      "model_forward_time": 0.11559629440307617,
      "step": 18181
    },
    {
      "epoch": 0.000110968017578125,
      "step": 18181,
      "training_step_time": 0.4024174213409424
    },
    {
      "epoch": 0.00011097412109375,
      "model_forward_time": 0.11543869972229004,
      "step": 18182
    },
    {
      "epoch": 0.00011097412109375,
      "step": 18182,
      "training_step_time": 0.40044713020324707
    },
    {
      "epoch": 0.000110980224609375,
      "model_forward_time": 0.11493968963623047,
      "step": 18183
    },
    {
      "epoch": 0.000110980224609375,
      "step": 18183,
      "training_step_time": 0.5808451175689697
    },
    {
      "epoch": 0.000110986328125,
      "model_forward_time": 0.1152048110961914,
      "step": 18184
    },
    {
      "epoch": 0.000110986328125,
      "step": 18184,
      "training_step_time": 0.4264490604400635
    },
    {
      "epoch": 0.000110992431640625,
      "model_forward_time": 0.11491751670837402,
      "step": 18185
    },
    {
      "epoch": 0.000110992431640625,
      "step": 18185,
      "training_step_time": 0.4276871681213379
    },
    {
      "epoch": 0.00011099853515625,
      "model_forward_time": 0.11450433731079102,
      "step": 18186
    },
    {
      "epoch": 0.00011099853515625,
      "step": 18186,
      "training_step_time": 0.36582422256469727
    },
    {
      "epoch": 0.000111004638671875,
      "model_forward_time": 0.11465811729431152,
      "step": 18187
    },
    {
      "epoch": 0.000111004638671875,
      "step": 18187,
      "training_step_time": 0.4602997303009033
    },
    {
      "epoch": 0.0001110107421875,
      "model_forward_time": 0.11469268798828125,
      "step": 18188
    },
    {
      "epoch": 0.0001110107421875,
      "step": 18188,
      "training_step_time": 0.4591968059539795
    },
    {
      "epoch": 0.000111016845703125,
      "model_forward_time": 0.11496424674987793,
      "step": 18189
    },
    {
      "epoch": 0.000111016845703125,
      "step": 18189,
      "training_step_time": 0.4401540756225586
    },
    {
      "epoch": 0.00011102294921875,
      "grad_norm": 0.2001354992389679,
      "learning_rate": 8.347700469428564e-05,
      "loss": 0.0551,
      "step": 18190
    },
    {
      "epoch": 0.00011102294921875,
      "model_forward_time": 0.11526608467102051,
      "step": 18190
    },
    {
      "epoch": 0.00011102294921875,
      "step": 18190,
      "training_step_time": 0.4256591796875
    },
    {
      "epoch": 0.000111029052734375,
      "model_forward_time": 0.1144404411315918,
      "step": 18191
    },
    {
      "epoch": 0.000111029052734375,
      "step": 18191,
      "training_step_time": 0.41303443908691406
    },
    {
      "epoch": 0.00011103515625,
      "model_forward_time": 0.11442804336547852,
      "step": 18192
    },
    {
      "epoch": 0.00011103515625,
      "step": 18192,
      "training_step_time": 0.39315176010131836
    },
    {
      "epoch": 0.000111041259765625,
      "model_forward_time": 0.11448311805725098,
      "step": 18193
    },
    {
      "epoch": 0.000111041259765625,
      "step": 18193,
      "training_step_time": 0.3904879093170166
    },
    {
      "epoch": 0.00011104736328125,
      "model_forward_time": 0.11609363555908203,
      "step": 18194
    },
    {
      "epoch": 0.00011104736328125,
      "step": 18194,
      "training_step_time": 0.38719844818115234
    },
    {
      "epoch": 0.000111053466796875,
      "model_forward_time": 0.11504173278808594,
      "step": 18195
    },
    {
      "epoch": 0.000111053466796875,
      "step": 18195,
      "training_step_time": 0.40538501739501953
    },
    {
      "epoch": 0.0001110595703125,
      "model_forward_time": 0.11501932144165039,
      "step": 18196
    },
    {
      "epoch": 0.0001110595703125,
      "step": 18196,
      "training_step_time": 0.39606523513793945
    },
    {
      "epoch": 0.000111065673828125,
      "model_forward_time": 0.11685419082641602,
      "step": 18197
    },
    {
      "epoch": 0.000111065673828125,
      "step": 18197,
      "training_step_time": 0.4158782958984375
    },
    {
      "epoch": 0.00011107177734375,
      "model_forward_time": 0.11591124534606934,
      "step": 18198
    },
    {
      "epoch": 0.00011107177734375,
      "step": 18198,
      "training_step_time": 0.4434072971343994
    },
    {
      "epoch": 0.000111077880859375,
      "model_forward_time": 0.11474823951721191,
      "step": 18199
    },
    {
      "epoch": 0.000111077880859375,
      "step": 18199,
      "training_step_time": 0.40982651710510254
    },
    {
      "epoch": 0.000111083984375,
      "grad_norm": 0.13925676047801971,
      "learning_rate": 8.345653031794292e-05,
      "loss": 0.0468,
      "step": 18200
    },
    {
      "epoch": 0.000111083984375,
      "model_forward_time": 0.11538529396057129,
      "step": 18200
    },
    {
      "epoch": 0.000111083984375,
      "step": 18200,
      "training_step_time": 0.39310383796691895
    },
    {
      "epoch": 0.000111090087890625,
      "model_forward_time": 0.11506104469299316,
      "step": 18201
    },
    {
      "epoch": 0.000111090087890625,
      "step": 18201,
      "training_step_time": 0.4789700508117676
    },
    {
      "epoch": 0.00011109619140625,
      "model_forward_time": 0.11456298828125,
      "step": 18202
    },
    {
      "epoch": 0.00011109619140625,
      "step": 18202,
      "training_step_time": 0.4028482437133789
    },
    {
      "epoch": 0.000111102294921875,
      "model_forward_time": 0.11457014083862305,
      "step": 18203
    },
    {
      "epoch": 0.000111102294921875,
      "step": 18203,
      "training_step_time": 0.4945535659790039
    },
    {
      "epoch": 0.0001111083984375,
      "model_forward_time": 0.11546754837036133,
      "step": 18204
    },
    {
      "epoch": 0.0001111083984375,
      "step": 18204,
      "training_step_time": 0.40375757217407227
    },
    {
      "epoch": 0.000111114501953125,
      "model_forward_time": 0.11472463607788086,
      "step": 18205
    },
    {
      "epoch": 0.000111114501953125,
      "step": 18205,
      "training_step_time": 0.46804213523864746
    },
    {
      "epoch": 0.00011112060546875,
      "model_forward_time": 0.11427164077758789,
      "step": 18206
    },
    {
      "epoch": 0.00011112060546875,
      "step": 18206,
      "training_step_time": 0.38446784019470215
    },
    {
      "epoch": 0.000111126708984375,
      "model_forward_time": 0.11584353446960449,
      "step": 18207
    },
    {
      "epoch": 0.000111126708984375,
      "step": 18207,
      "training_step_time": 0.3810276985168457
    },
    {
      "epoch": 0.0001111328125,
      "model_forward_time": 0.11512565612792969,
      "step": 18208
    },
    {
      "epoch": 0.0001111328125,
      "step": 18208,
      "training_step_time": 0.3873708248138428
    },
    {
      "epoch": 0.000111138916015625,
      "model_forward_time": 0.11467742919921875,
      "step": 18209
    },
    {
      "epoch": 0.000111138916015625,
      "step": 18209,
      "training_step_time": 0.386976957321167
    },
    {
      "epoch": 0.00011114501953125,
      "grad_norm": 0.1906217485666275,
      "learning_rate": 8.343604577838964e-05,
      "loss": 0.0505,
      "step": 18210
    },
    {
      "epoch": 0.00011114501953125,
      "model_forward_time": 0.11494970321655273,
      "step": 18210
    },
    {
      "epoch": 0.00011114501953125,
      "step": 18210,
      "training_step_time": 0.39584922790527344
    },
    {
      "epoch": 0.000111151123046875,
      "model_forward_time": 0.11490774154663086,
      "step": 18211
    },
    {
      "epoch": 0.000111151123046875,
      "step": 18211,
      "training_step_time": 0.39466238021850586
    },
    {
      "epoch": 0.0001111572265625,
      "model_forward_time": 0.11552739143371582,
      "step": 18212
    },
    {
      "epoch": 0.0001111572265625,
      "step": 18212,
      "training_step_time": 0.469501256942749
    },
    {
      "epoch": 0.000111163330078125,
      "model_forward_time": 0.11657333374023438,
      "step": 18213
    },
    {
      "epoch": 0.000111163330078125,
      "step": 18213,
      "training_step_time": 0.49356937408447266
    },
    {
      "epoch": 0.00011116943359375,
      "model_forward_time": 0.11515927314758301,
      "step": 18214
    },
    {
      "epoch": 0.00011116943359375,
      "step": 18214,
      "training_step_time": 0.4220695495605469
    },
    {
      "epoch": 0.000111175537109375,
      "model_forward_time": 0.1149909496307373,
      "step": 18215
    },
    {
      "epoch": 0.000111175537109375,
      "step": 18215,
      "training_step_time": 0.40779852867126465
    },
    {
      "epoch": 0.000111181640625,
      "model_forward_time": 0.11448526382446289,
      "step": 18216
    },
    {
      "epoch": 0.000111181640625,
      "step": 18216,
      "training_step_time": 0.47454309463500977
    },
    {
      "epoch": 0.000111187744140625,
      "model_forward_time": 0.11473608016967773,
      "step": 18217
    },
    {
      "epoch": 0.000111187744140625,
      "step": 18217,
      "training_step_time": 0.4992241859436035
    },
    {
      "epoch": 0.00011119384765625,
      "model_forward_time": 0.11468100547790527,
      "step": 18218
    },
    {
      "epoch": 0.00011119384765625,
      "step": 18218,
      "training_step_time": 0.43012428283691406
    },
    {
      "epoch": 0.000111199951171875,
      "model_forward_time": 0.11452460289001465,
      "step": 18219
    },
    {
      "epoch": 0.000111199951171875,
      "step": 18219,
      "training_step_time": 0.4998946189880371
    },
    {
      "epoch": 0.0001112060546875,
      "grad_norm": 0.20100873708724976,
      "learning_rate": 8.34155510818485e-05,
      "loss": 0.0536,
      "step": 18220
    },
    {
      "epoch": 0.0001112060546875,
      "model_forward_time": 0.1149592399597168,
      "step": 18220
    },
    {
      "epoch": 0.0001112060546875,
      "step": 18220,
      "training_step_time": 0.3857004642486572
    },
    {
      "epoch": 0.000111212158203125,
      "model_forward_time": 0.11565446853637695,
      "step": 18221
    },
    {
      "epoch": 0.000111212158203125,
      "step": 18221,
      "training_step_time": 0.39037108421325684
    },
    {
      "epoch": 0.00011121826171875,
      "model_forward_time": 0.1155710220336914,
      "step": 18222
    },
    {
      "epoch": 0.00011121826171875,
      "step": 18222,
      "training_step_time": 0.39380431175231934
    },
    {
      "epoch": 0.000111224365234375,
      "model_forward_time": 0.11533069610595703,
      "step": 18223
    },
    {
      "epoch": 0.000111224365234375,
      "step": 18223,
      "training_step_time": 0.4044833183288574
    },
    {
      "epoch": 0.00011123046875,
      "model_forward_time": 0.11560845375061035,
      "step": 18224
    },
    {
      "epoch": 0.00011123046875,
      "step": 18224,
      "training_step_time": 0.3925514221191406
    },
    {
      "epoch": 0.000111236572265625,
      "model_forward_time": 0.11507797241210938,
      "step": 18225
    },
    {
      "epoch": 0.000111236572265625,
      "step": 18225,
      "training_step_time": 0.4962191581726074
    },
    {
      "epoch": 0.00011124267578125,
      "model_forward_time": 0.11485767364501953,
      "step": 18226
    },
    {
      "epoch": 0.00011124267578125,
      "step": 18226,
      "training_step_time": 0.4111909866333008
    },
    {
      "epoch": 0.000111248779296875,
      "model_forward_time": 0.1148676872253418,
      "step": 18227
    },
    {
      "epoch": 0.000111248779296875,
      "step": 18227,
      "training_step_time": 0.40130066871643066
    },
    {
      "epoch": 0.0001112548828125,
      "model_forward_time": 0.11431622505187988,
      "step": 18228
    },
    {
      "epoch": 0.0001112548828125,
      "step": 18228,
      "training_step_time": 0.45191192626953125
    },
    {
      "epoch": 0.000111260986328125,
      "model_forward_time": 0.11430597305297852,
      "step": 18229
    },
    {
      "epoch": 0.000111260986328125,
      "step": 18229,
      "training_step_time": 0.39676356315612793
    },
    {
      "epoch": 0.00011126708984375,
      "grad_norm": 0.15947532653808594,
      "learning_rate": 8.339504623454521e-05,
      "loss": 0.0541,
      "step": 18230
    },
    {
      "epoch": 0.00011126708984375,
      "model_forward_time": 0.1149904727935791,
      "step": 18230
    },
    {
      "epoch": 0.00011126708984375,
      "step": 18230,
      "training_step_time": 0.40886807441711426
    },
    {
      "epoch": 0.000111273193359375,
      "model_forward_time": 0.1151425838470459,
      "step": 18231
    },
    {
      "epoch": 0.000111273193359375,
      "step": 18231,
      "training_step_time": 0.44337964057922363
    },
    {
      "epoch": 0.000111279296875,
      "model_forward_time": 0.11556077003479004,
      "step": 18232
    },
    {
      "epoch": 0.000111279296875,
      "step": 18232,
      "training_step_time": 0.5088999271392822
    },
    {
      "epoch": 0.000111285400390625,
      "model_forward_time": 0.11519789695739746,
      "step": 18233
    },
    {
      "epoch": 0.000111285400390625,
      "step": 18233,
      "training_step_time": 0.4328920841217041
    },
    {
      "epoch": 0.00011129150390625,
      "model_forward_time": 0.1153097152709961,
      "step": 18234
    },
    {
      "epoch": 0.00011129150390625,
      "step": 18234,
      "training_step_time": 0.3990206718444824
    },
    {
      "epoch": 0.000111297607421875,
      "model_forward_time": 0.11425638198852539,
      "step": 18235
    },
    {
      "epoch": 0.000111297607421875,
      "step": 18235,
      "training_step_time": 0.39214587211608887
    },
    {
      "epoch": 0.0001113037109375,
      "model_forward_time": 0.11676836013793945,
      "step": 18236
    },
    {
      "epoch": 0.0001113037109375,
      "step": 18236,
      "training_step_time": 0.3966670036315918
    },
    {
      "epoch": 0.000111309814453125,
      "model_forward_time": 0.11523771286010742,
      "step": 18237
    },
    {
      "epoch": 0.000111309814453125,
      "step": 18237,
      "training_step_time": 0.6254703998565674
    },
    {
      "epoch": 0.00011131591796875,
      "model_forward_time": 0.11517882347106934,
      "step": 18238
    },
    {
      "epoch": 0.00011131591796875,
      "step": 18238,
      "training_step_time": 0.38661718368530273
    },
    {
      "epoch": 0.000111322021484375,
      "model_forward_time": 0.11537575721740723,
      "step": 18239
    },
    {
      "epoch": 0.000111322021484375,
      "step": 18239,
      "training_step_time": 0.39139533042907715
    },
    {
      "epoch": 0.000111328125,
      "grad_norm": 0.14965391159057617,
      "learning_rate": 8.337453124270863e-05,
      "loss": 0.0526,
      "step": 18240
    },
    {
      "epoch": 0.000111328125,
      "model_forward_time": 0.11512613296508789,
      "step": 18240
    },
    {
      "epoch": 0.000111328125,
      "step": 18240,
      "training_step_time": 0.48950791358947754
    },
    {
      "epoch": 0.000111334228515625,
      "model_forward_time": 0.11470341682434082,
      "step": 18241
    },
    {
      "epoch": 0.000111334228515625,
      "step": 18241,
      "training_step_time": 0.4015681743621826
    },
    {
      "epoch": 0.00011134033203125,
      "model_forward_time": 0.11512899398803711,
      "step": 18242
    },
    {
      "epoch": 0.00011134033203125,
      "step": 18242,
      "training_step_time": 0.4960319995880127
    },
    {
      "epoch": 0.000111346435546875,
      "model_forward_time": 0.11473441123962402,
      "step": 18243
    },
    {
      "epoch": 0.000111346435546875,
      "step": 18243,
      "training_step_time": 0.5001387596130371
    },
    {
      "epoch": 0.0001113525390625,
      "model_forward_time": 0.1145029067993164,
      "step": 18244
    },
    {
      "epoch": 0.0001113525390625,
      "step": 18244,
      "training_step_time": 0.3665003776550293
    },
    {
      "epoch": 0.000111358642578125,
      "model_forward_time": 0.11456990242004395,
      "step": 18245
    },
    {
      "epoch": 0.000111358642578125,
      "step": 18245,
      "training_step_time": 0.4315640926361084
    },
    {
      "epoch": 0.00011136474609375,
      "model_forward_time": 0.1144564151763916,
      "step": 18246
    },
    {
      "epoch": 0.00011136474609375,
      "step": 18246,
      "training_step_time": 0.46091151237487793
    },
    {
      "epoch": 0.000111370849609375,
      "model_forward_time": 0.11483955383300781,
      "step": 18247
    },
    {
      "epoch": 0.000111370849609375,
      "step": 18247,
      "training_step_time": 0.41854214668273926
    },
    {
      "epoch": 0.000111376953125,
      "model_forward_time": 0.11508011817932129,
      "step": 18248
    },
    {
      "epoch": 0.000111376953125,
      "step": 18248,
      "training_step_time": 0.3884611129760742
    },
    {
      "epoch": 0.000111383056640625,
      "model_forward_time": 0.11510539054870605,
      "step": 18249
    },
    {
      "epoch": 0.000111383056640625,
      "step": 18249,
      "training_step_time": 0.5927550792694092
    },
    {
      "epoch": 0.00011138916015625,
      "grad_norm": 0.17970559000968933,
      "learning_rate": 8.335400611257067e-05,
      "loss": 0.0586,
      "step": 18250
    },
    {
      "epoch": 0.00011138916015625,
      "model_forward_time": 0.11517333984375,
      "step": 18250
    },
    {
      "epoch": 0.00011138916015625,
      "step": 18250,
      "training_step_time": 0.3892486095428467
    },
    {
      "epoch": 0.000111395263671875,
      "model_forward_time": 0.11523079872131348,
      "step": 18251
    },
    {
      "epoch": 0.000111395263671875,
      "step": 18251,
      "training_step_time": 0.3851957321166992
    },
    {
      "epoch": 0.0001114013671875,
      "model_forward_time": 0.11520576477050781,
      "step": 18252
    },
    {
      "epoch": 0.0001114013671875,
      "step": 18252,
      "training_step_time": 0.3889045715332031
    },
    {
      "epoch": 0.000111407470703125,
      "model_forward_time": 0.11461305618286133,
      "step": 18253
    },
    {
      "epoch": 0.000111407470703125,
      "step": 18253,
      "training_step_time": 0.39658284187316895
    },
    {
      "epoch": 0.00011141357421875,
      "model_forward_time": 0.11584353446960449,
      "step": 18254
    },
    {
      "epoch": 0.00011141357421875,
      "step": 18254,
      "training_step_time": 0.4837992191314697
    },
    {
      "epoch": 0.000111419677734375,
      "model_forward_time": 0.11581921577453613,
      "step": 18255
    },
    {
      "epoch": 0.000111419677734375,
      "step": 18255,
      "training_step_time": 0.7622950077056885
    },
    {
      "epoch": 0.00011142578125,
      "model_forward_time": 0.11461138725280762,
      "step": 18256
    },
    {
      "epoch": 0.00011142578125,
      "step": 18256,
      "training_step_time": 0.3972909450531006
    },
    {
      "epoch": 0.000111431884765625,
      "model_forward_time": 0.11460065841674805,
      "step": 18257
    },
    {
      "epoch": 0.000111431884765625,
      "step": 18257,
      "training_step_time": 0.3944363594055176
    },
    {
      "epoch": 0.00011143798828125,
      "model_forward_time": 0.11505484580993652,
      "step": 18258
    },
    {
      "epoch": 0.00011143798828125,
      "step": 18258,
      "training_step_time": 0.3677196502685547
    },
    {
      "epoch": 0.000111444091796875,
      "model_forward_time": 0.11415290832519531,
      "step": 18259
    },
    {
      "epoch": 0.000111444091796875,
      "step": 18259,
      "training_step_time": 0.43462467193603516
    },
    {
      "epoch": 0.0001114501953125,
      "grad_norm": 0.14474178850650787,
      "learning_rate": 8.33334708503663e-05,
      "loss": 0.0493,
      "step": 18260
    },
    {
      "epoch": 0.0001114501953125,
      "model_forward_time": 0.11442446708679199,
      "step": 18260
    },
    {
      "epoch": 0.0001114501953125,
      "step": 18260,
      "training_step_time": 0.49335169792175293
    },
    {
      "epoch": 0.000111456298828125,
      "model_forward_time": 0.1150503158569336,
      "step": 18261
    },
    {
      "epoch": 0.000111456298828125,
      "step": 18261,
      "training_step_time": 0.6363301277160645
    },
    {
      "epoch": 0.00011146240234375,
      "model_forward_time": 0.11477375030517578,
      "step": 18262
    },
    {
      "epoch": 0.00011146240234375,
      "step": 18262,
      "training_step_time": 0.3892481327056885
    },
    {
      "epoch": 0.000111468505859375,
      "model_forward_time": 0.11437034606933594,
      "step": 18263
    },
    {
      "epoch": 0.000111468505859375,
      "step": 18263,
      "training_step_time": 0.38216471672058105
    },
    {
      "epoch": 0.000111474609375,
      "model_forward_time": 0.1144099235534668,
      "step": 18264
    },
    {
      "epoch": 0.000111474609375,
      "step": 18264,
      "training_step_time": 0.393115758895874
    },
    {
      "epoch": 0.000111480712890625,
      "model_forward_time": 0.11479616165161133,
      "step": 18265
    },
    {
      "epoch": 0.000111480712890625,
      "step": 18265,
      "training_step_time": 0.39684462547302246
    },
    {
      "epoch": 0.00011148681640625,
      "model_forward_time": 0.1142888069152832,
      "step": 18266
    },
    {
      "epoch": 0.00011148681640625,
      "step": 18266,
      "training_step_time": 0.389371395111084
    },
    {
      "epoch": 0.000111492919921875,
      "model_forward_time": 0.11507129669189453,
      "step": 18267
    },
    {
      "epoch": 0.000111492919921875,
      "step": 18267,
      "training_step_time": 0.6441242694854736
    },
    {
      "epoch": 0.0001114990234375,
      "model_forward_time": 0.11432099342346191,
      "step": 18268
    },
    {
      "epoch": 0.0001114990234375,
      "step": 18268,
      "training_step_time": 0.47135138511657715
    },
    {
      "epoch": 0.000111505126953125,
      "model_forward_time": 0.11460304260253906,
      "step": 18269
    },
    {
      "epoch": 0.000111505126953125,
      "step": 18269,
      "training_step_time": 0.49385809898376465
    },
    {
      "epoch": 0.00011151123046875,
      "grad_norm": 0.1525072455406189,
      "learning_rate": 8.331292546233362e-05,
      "loss": 0.0543,
      "step": 18270
    },
    {
      "epoch": 0.00011151123046875,
      "model_forward_time": 0.11462807655334473,
      "step": 18270
    },
    {
      "epoch": 0.00011151123046875,
      "step": 18270,
      "training_step_time": 0.4157276153564453
    },
    {
      "epoch": 0.000111517333984375,
      "model_forward_time": 0.11384892463684082,
      "step": 18271
    },
    {
      "epoch": 0.000111517333984375,
      "step": 18271,
      "training_step_time": 0.3905825614929199
    },
    {
      "epoch": 0.0001115234375,
      "model_forward_time": 0.11403250694274902,
      "step": 18272
    },
    {
      "epoch": 0.0001115234375,
      "step": 18272,
      "training_step_time": 0.4389512538909912
    },
    {
      "epoch": 0.000111529541015625,
      "model_forward_time": 0.11476325988769531,
      "step": 18273
    },
    {
      "epoch": 0.000111529541015625,
      "step": 18273,
      "training_step_time": 0.48454856872558594
    },
    {
      "epoch": 0.00011153564453125,
      "model_forward_time": 0.11480855941772461,
      "step": 18274
    },
    {
      "epoch": 0.00011153564453125,
      "step": 18274,
      "training_step_time": 0.4245309829711914
    },
    {
      "epoch": 0.000111541748046875,
      "model_forward_time": 0.11530065536499023,
      "step": 18275
    },
    {
      "epoch": 0.000111541748046875,
      "step": 18275,
      "training_step_time": 0.3916141986846924
    },
    {
      "epoch": 0.0001115478515625,
      "model_forward_time": 0.11515474319458008,
      "step": 18276
    },
    {
      "epoch": 0.0001115478515625,
      "step": 18276,
      "training_step_time": 0.4287431240081787
    },
    {
      "epoch": 0.000111553955078125,
      "model_forward_time": 0.11464548110961914,
      "step": 18277
    },
    {
      "epoch": 0.000111553955078125,
      "step": 18277,
      "training_step_time": 0.40752291679382324
    },
    {
      "epoch": 0.00011156005859375,
      "model_forward_time": 0.11539912223815918,
      "step": 18278
    },
    {
      "epoch": 0.00011156005859375,
      "step": 18278,
      "training_step_time": 0.39367127418518066
    },
    {
      "epoch": 0.000111566162109375,
      "model_forward_time": 0.1152503490447998,
      "step": 18279
    },
    {
      "epoch": 0.000111566162109375,
      "step": 18279,
      "training_step_time": 0.7021796703338623
    },
    {
      "epoch": 0.000111572265625,
      "grad_norm": 0.17937543988227844,
      "learning_rate": 8.329236995471373e-05,
      "loss": 0.0461,
      "step": 18280
    },
    {
      "epoch": 0.000111572265625,
      "model_forward_time": 0.11455702781677246,
      "step": 18280
    },
    {
      "epoch": 0.000111572265625,
      "step": 18280,
      "training_step_time": 0.38571953773498535
    },
    {
      "epoch": 0.000111578369140625,
      "model_forward_time": 0.11407971382141113,
      "step": 18281
    },
    {
      "epoch": 0.000111578369140625,
      "step": 18281,
      "training_step_time": 0.47472620010375977
    },
    {
      "epoch": 0.00011158447265625,
      "model_forward_time": 0.11499381065368652,
      "step": 18282
    },
    {
      "epoch": 0.00011158447265625,
      "step": 18282,
      "training_step_time": 0.41948914527893066
    },
    {
      "epoch": 0.000111590576171875,
      "model_forward_time": 0.11485791206359863,
      "step": 18283
    },
    {
      "epoch": 0.000111590576171875,
      "step": 18283,
      "training_step_time": 0.41494202613830566
    },
    {
      "epoch": 0.0001115966796875,
      "model_forward_time": 0.11400294303894043,
      "step": 18284
    },
    {
      "epoch": 0.0001115966796875,
      "step": 18284,
      "training_step_time": 0.3899412155151367
    },
    {
      "epoch": 0.000111602783203125,
      "model_forward_time": 0.11530375480651855,
      "step": 18285
    },
    {
      "epoch": 0.000111602783203125,
      "step": 18285,
      "training_step_time": 0.5866858959197998
    },
    {
      "epoch": 0.00011160888671875,
      "model_forward_time": 0.1145780086517334,
      "step": 18286
    },
    {
      "epoch": 0.00011160888671875,
      "step": 18286,
      "training_step_time": 0.3647136688232422
    },
    {
      "epoch": 0.000111614990234375,
      "model_forward_time": 0.11499309539794922,
      "step": 18287
    },
    {
      "epoch": 0.000111614990234375,
      "step": 18287,
      "training_step_time": 0.4651374816894531
    },
    {
      "epoch": 0.00011162109375,
      "model_forward_time": 0.1142420768737793,
      "step": 18288
    },
    {
      "epoch": 0.00011162109375,
      "step": 18288,
      "training_step_time": 0.45526671409606934
    },
    {
      "epoch": 0.000111627197265625,
      "model_forward_time": 0.11450910568237305,
      "step": 18289
    },
    {
      "epoch": 0.000111627197265625,
      "step": 18289,
      "training_step_time": 0.38889217376708984
    },
    {
      "epoch": 0.00011163330078125,
      "grad_norm": 0.15393662452697754,
      "learning_rate": 8.327180433375091e-05,
      "loss": 0.0559,
      "step": 18290
    },
    {
      "epoch": 0.00011163330078125,
      "model_forward_time": 0.11461400985717773,
      "step": 18290
    },
    {
      "epoch": 0.00011163330078125,
      "step": 18290,
      "training_step_time": 0.3906586170196533
    },
    {
      "epoch": 0.000111639404296875,
      "model_forward_time": 0.11485624313354492,
      "step": 18291
    },
    {
      "epoch": 0.000111639404296875,
      "step": 18291,
      "training_step_time": 0.44022202491760254
    },
    {
      "epoch": 0.0001116455078125,
      "model_forward_time": 0.11490845680236816,
      "step": 18292
    },
    {
      "epoch": 0.0001116455078125,
      "step": 18292,
      "training_step_time": 0.4010450839996338
    },
    {
      "epoch": 0.000111651611328125,
      "model_forward_time": 0.1144568920135498,
      "step": 18293
    },
    {
      "epoch": 0.000111651611328125,
      "step": 18293,
      "training_step_time": 0.4008498191833496
    },
    {
      "epoch": 0.00011165771484375,
      "model_forward_time": 0.11439347267150879,
      "step": 18294
    },
    {
      "epoch": 0.00011165771484375,
      "step": 18294,
      "training_step_time": 0.3878469467163086
    },
    {
      "epoch": 0.000111663818359375,
      "model_forward_time": 0.11471700668334961,
      "step": 18295
    },
    {
      "epoch": 0.000111663818359375,
      "step": 18295,
      "training_step_time": 0.4459719657897949
    },
    {
      "epoch": 0.000111669921875,
      "model_forward_time": 0.11561393737792969,
      "step": 18296
    },
    {
      "epoch": 0.000111669921875,
      "step": 18296,
      "training_step_time": 0.43377089500427246
    },
    {
      "epoch": 0.000111676025390625,
      "model_forward_time": 0.11513924598693848,
      "step": 18297
    },
    {
      "epoch": 0.000111676025390625,
      "step": 18297,
      "training_step_time": 0.6918702125549316
    },
    {
      "epoch": 0.00011168212890625,
      "model_forward_time": 0.11505270004272461,
      "step": 18298
    },
    {
      "epoch": 0.00011168212890625,
      "step": 18298,
      "training_step_time": 0.3920431137084961
    },
    {
      "epoch": 0.000111688232421875,
      "model_forward_time": 0.11522603034973145,
      "step": 18299
    },
    {
      "epoch": 0.000111688232421875,
      "step": 18299,
      "training_step_time": 0.3933851718902588
    },
    {
      "epoch": 0.0001116943359375,
      "grad_norm": 0.12938237190246582,
      "learning_rate": 8.32512286056924e-05,
      "loss": 0.0564,
      "step": 18300
    },
    {
      "epoch": 0.0001116943359375,
      "model_forward_time": 0.11501193046569824,
      "step": 18300
    },
    {
      "epoch": 0.0001116943359375,
      "step": 18300,
      "training_step_time": 0.3856081962585449
    },
    {
      "epoch": 0.000111700439453125,
      "model_forward_time": 0.11423015594482422,
      "step": 18301
    },
    {
      "epoch": 0.000111700439453125,
      "step": 18301,
      "training_step_time": 0.4207115173339844
    },
    {
      "epoch": 0.00011170654296875,
      "model_forward_time": 0.11517786979675293,
      "step": 18302
    },
    {
      "epoch": 0.00011170654296875,
      "step": 18302,
      "training_step_time": 0.3957035541534424
    },
    {
      "epoch": 0.000111712646484375,
      "model_forward_time": 0.1154637336730957,
      "step": 18303
    },
    {
      "epoch": 0.000111712646484375,
      "step": 18303,
      "training_step_time": 0.6015779972076416
    },
    {
      "epoch": 0.00011171875,
      "model_forward_time": 0.11561727523803711,
      "step": 18304
    },
    {
      "epoch": 0.00011171875,
      "step": 18304,
      "training_step_time": 0.3883545398712158
    },
    {
      "epoch": 0.000111724853515625,
      "model_forward_time": 0.11444568634033203,
      "step": 18305
    },
    {
      "epoch": 0.000111724853515625,
      "step": 18305,
      "training_step_time": 0.3842179775238037
    },
    {
      "epoch": 0.00011173095703125,
      "model_forward_time": 0.11473703384399414,
      "step": 18306
    },
    {
      "epoch": 0.00011173095703125,
      "step": 18306,
      "training_step_time": 0.39464855194091797
    },
    {
      "epoch": 0.000111737060546875,
      "model_forward_time": 0.11521339416503906,
      "step": 18307
    },
    {
      "epoch": 0.000111737060546875,
      "step": 18307,
      "training_step_time": 0.3878777027130127
    },
    {
      "epoch": 0.0001117431640625,
      "model_forward_time": 0.11506175994873047,
      "step": 18308
    },
    {
      "epoch": 0.0001117431640625,
      "step": 18308,
      "training_step_time": 0.3949296474456787
    },
    {
      "epoch": 0.000111749267578125,
      "model_forward_time": 0.11424136161804199,
      "step": 18309
    },
    {
      "epoch": 0.000111749267578125,
      "step": 18309,
      "training_step_time": 0.8072073459625244
    },
    {
      "epoch": 0.00011175537109375,
      "grad_norm": 0.11103856563568115,
      "learning_rate": 8.323064277678862e-05,
      "loss": 0.0509,
      "step": 18310
    },
    {
      "epoch": 0.00011175537109375,
      "model_forward_time": 0.11462092399597168,
      "step": 18310
    },
    {
      "epoch": 0.00011175537109375,
      "step": 18310,
      "training_step_time": 0.46157217025756836
    },
    {
      "epoch": 0.000111761474609375,
      "model_forward_time": 0.11425638198852539,
      "step": 18311
    },
    {
      "epoch": 0.000111761474609375,
      "step": 18311,
      "training_step_time": 0.37719225883483887
    },
    {
      "epoch": 0.000111767578125,
      "model_forward_time": 0.1142280101776123,
      "step": 18312
    },
    {
      "epoch": 0.000111767578125,
      "step": 18312,
      "training_step_time": 0.3785703182220459
    },
    {
      "epoch": 0.000111773681640625,
      "model_forward_time": 0.11387801170349121,
      "step": 18313
    },
    {
      "epoch": 0.000111773681640625,
      "step": 18313,
      "training_step_time": 0.40467023849487305
    },
    {
      "epoch": 0.00011177978515625,
      "model_forward_time": 0.11526942253112793,
      "step": 18314
    },
    {
      "epoch": 0.00011177978515625,
      "step": 18314,
      "training_step_time": 0.4612123966217041
    },
    {
      "epoch": 0.000111785888671875,
      "model_forward_time": 0.11525678634643555,
      "step": 18315
    },
    {
      "epoch": 0.000111785888671875,
      "step": 18315,
      "training_step_time": 0.6303696632385254
    },
    {
      "epoch": 0.0001117919921875,
      "model_forward_time": 0.11512160301208496,
      "step": 18316
    },
    {
      "epoch": 0.0001117919921875,
      "step": 18316,
      "training_step_time": 0.3871898651123047
    },
    {
      "epoch": 0.000111798095703125,
      "model_forward_time": 0.11461615562438965,
      "step": 18317
    },
    {
      "epoch": 0.000111798095703125,
      "step": 18317,
      "training_step_time": 0.3891286849975586
    },
    {
      "epoch": 0.00011180419921875,
      "model_forward_time": 0.11464309692382812,
      "step": 18318
    },
    {
      "epoch": 0.00011180419921875,
      "step": 18318,
      "training_step_time": 0.38168978691101074
    },
    {
      "epoch": 0.000111810302734375,
      "model_forward_time": 0.11461925506591797,
      "step": 18319
    },
    {
      "epoch": 0.000111810302734375,
      "step": 18319,
      "training_step_time": 0.39801859855651855
    },
    {
      "epoch": 0.00011181640625,
      "grad_norm": 0.18138012290000916,
      "learning_rate": 8.321004685329296e-05,
      "loss": 0.0456,
      "step": 18320
    },
    {
      "epoch": 0.00011181640625,
      "model_forward_time": 0.11456155776977539,
      "step": 18320
    },
    {
      "epoch": 0.00011181640625,
      "step": 18320,
      "training_step_time": 0.38994264602661133
    },
    {
      "epoch": 0.000111822509765625,
      "model_forward_time": 0.1145639419555664,
      "step": 18321
    },
    {
      "epoch": 0.000111822509765625,
      "step": 18321,
      "training_step_time": 0.9239237308502197
    },
    {
      "epoch": 0.00011182861328125,
      "model_forward_time": 0.11465144157409668,
      "step": 18322
    },
    {
      "epoch": 0.00011182861328125,
      "step": 18322,
      "training_step_time": 0.41398000717163086
    },
    {
      "epoch": 0.000111834716796875,
      "model_forward_time": 0.1146841049194336,
      "step": 18323
    },
    {
      "epoch": 0.000111834716796875,
      "step": 18323,
      "training_step_time": 0.47260618209838867
    },
    {
      "epoch": 0.0001118408203125,
      "model_forward_time": 0.11375260353088379,
      "step": 18324
    },
    {
      "epoch": 0.0001118408203125,
      "step": 18324,
      "training_step_time": 0.3819742202758789
    },
    {
      "epoch": 0.000111846923828125,
      "model_forward_time": 0.11413717269897461,
      "step": 18325
    },
    {
      "epoch": 0.000111846923828125,
      "step": 18325,
      "training_step_time": 0.39192914962768555
    },
    {
      "epoch": 0.00011185302734375,
      "model_forward_time": 0.1139681339263916,
      "step": 18326
    },
    {
      "epoch": 0.00011185302734375,
      "step": 18326,
      "training_step_time": 0.4546797275543213
    },
    {
      "epoch": 0.000111859130859375,
      "model_forward_time": 0.1147615909576416,
      "step": 18327
    },
    {
      "epoch": 0.000111859130859375,
      "step": 18327,
      "training_step_time": 0.5428807735443115
    },
    {
      "epoch": 0.000111865234375,
      "model_forward_time": 0.11426472663879395,
      "step": 18328
    },
    {
      "epoch": 0.000111865234375,
      "step": 18328,
      "training_step_time": 0.47459983825683594
    },
    {
      "epoch": 0.000111871337890625,
      "model_forward_time": 0.11424565315246582,
      "step": 18329
    },
    {
      "epoch": 0.000111871337890625,
      "step": 18329,
      "training_step_time": 0.4664003849029541
    },
    {
      "epoch": 0.00011187744140625,
      "grad_norm": 0.202727809548378,
      "learning_rate": 8.318944084146192e-05,
      "loss": 0.0553,
      "step": 18330
    },
    {
      "epoch": 0.00011187744140625,
      "model_forward_time": 0.11438989639282227,
      "step": 18330
    },
    {
      "epoch": 0.00011187744140625,
      "step": 18330,
      "training_step_time": 0.3943607807159424
    },
    {
      "epoch": 0.000111883544921875,
      "model_forward_time": 0.11428642272949219,
      "step": 18331
    },
    {
      "epoch": 0.000111883544921875,
      "step": 18331,
      "training_step_time": 0.39374208450317383
    },
    {
      "epoch": 0.0001118896484375,
      "model_forward_time": 0.11515021324157715,
      "step": 18332
    },
    {
      "epoch": 0.0001118896484375,
      "step": 18332,
      "training_step_time": 0.40104079246520996
    },
    {
      "epoch": 0.000111895751953125,
      "model_forward_time": 0.11408472061157227,
      "step": 18333
    },
    {
      "epoch": 0.000111895751953125,
      "step": 18333,
      "training_step_time": 0.45202159881591797
    },
    {
      "epoch": 0.00011190185546875,
      "model_forward_time": 0.11489033699035645,
      "step": 18334
    },
    {
      "epoch": 0.00011190185546875,
      "step": 18334,
      "training_step_time": 0.3910689353942871
    },
    {
      "epoch": 0.000111907958984375,
      "model_forward_time": 0.11546039581298828,
      "step": 18335
    },
    {
      "epoch": 0.000111907958984375,
      "step": 18335,
      "training_step_time": 0.3919062614440918
    },
    {
      "epoch": 0.0001119140625,
      "model_forward_time": 0.11569666862487793,
      "step": 18336
    },
    {
      "epoch": 0.0001119140625,
      "step": 18336,
      "training_step_time": 0.399486780166626
    },
    {
      "epoch": 0.000111920166015625,
      "model_forward_time": 0.11506175994873047,
      "step": 18337
    },
    {
      "epoch": 0.000111920166015625,
      "step": 18337,
      "training_step_time": 0.41800475120544434
    },
    {
      "epoch": 0.00011192626953125,
      "model_forward_time": 0.11489439010620117,
      "step": 18338
    },
    {
      "epoch": 0.00011192626953125,
      "step": 18338,
      "training_step_time": 0.4308605194091797
    },
    {
      "epoch": 0.000111932373046875,
      "model_forward_time": 0.11515045166015625,
      "step": 18339
    },
    {
      "epoch": 0.000111932373046875,
      "step": 18339,
      "training_step_time": 0.7937192916870117
    },
    {
      "epoch": 0.0001119384765625,
      "grad_norm": 0.17136013507843018,
      "learning_rate": 8.316882474755507e-05,
      "loss": 0.0508,
      "step": 18340
    },
    {
      "epoch": 0.0001119384765625,
      "model_forward_time": 0.1144108772277832,
      "step": 18340
    },
    {
      "epoch": 0.0001119384765625,
      "step": 18340,
      "training_step_time": 0.38884425163269043
    },
    {
      "epoch": 0.000111944580078125,
      "model_forward_time": 0.1142265796661377,
      "step": 18341
    },
    {
      "epoch": 0.000111944580078125,
      "step": 18341,
      "training_step_time": 0.4922938346862793
    },
    {
      "epoch": 0.00011195068359375,
      "model_forward_time": 0.11490750312805176,
      "step": 18342
    },
    {
      "epoch": 0.00011195068359375,
      "step": 18342,
      "training_step_time": 0.49062633514404297
    },
    {
      "epoch": 0.000111956787109375,
      "model_forward_time": 0.11439919471740723,
      "step": 18343
    },
    {
      "epoch": 0.000111956787109375,
      "step": 18343,
      "training_step_time": 0.4092271327972412
    },
    {
      "epoch": 0.000111962890625,
      "model_forward_time": 0.11348700523376465,
      "step": 18344
    },
    {
      "epoch": 0.000111962890625,
      "step": 18344,
      "training_step_time": 0.39066576957702637
    },
    {
      "epoch": 0.000111968994140625,
      "model_forward_time": 0.11493730545043945,
      "step": 18345
    },
    {
      "epoch": 0.000111968994140625,
      "step": 18345,
      "training_step_time": 0.4674546718597412
    },
    {
      "epoch": 0.00011197509765625,
      "model_forward_time": 0.11444258689880371,
      "step": 18346
    },
    {
      "epoch": 0.00011197509765625,
      "step": 18346,
      "training_step_time": 0.4021296501159668
    },
    {
      "epoch": 0.000111981201171875,
      "model_forward_time": 0.11551117897033691,
      "step": 18347
    },
    {
      "epoch": 0.000111981201171875,
      "step": 18347,
      "training_step_time": 0.3875901699066162
    },
    {
      "epoch": 0.0001119873046875,
      "model_forward_time": 0.11495518684387207,
      "step": 18348
    },
    {
      "epoch": 0.0001119873046875,
      "step": 18348,
      "training_step_time": 0.39202070236206055
    },
    {
      "epoch": 0.000111993408203125,
      "model_forward_time": 0.11561250686645508,
      "step": 18349
    },
    {
      "epoch": 0.000111993408203125,
      "step": 18349,
      "training_step_time": 0.38901376724243164
    },
    {
      "epoch": 0.00011199951171875,
      "grad_norm": 0.20425470173358917,
      "learning_rate": 8.314819857783503e-05,
      "loss": 0.0485,
      "step": 18350
    },
    {
      "epoch": 0.00011199951171875,
      "model_forward_time": 0.11491823196411133,
      "step": 18350
    },
    {
      "epoch": 0.00011199951171875,
      "step": 18350,
      "training_step_time": 0.45162463188171387
    },
    {
      "epoch": 0.000112005615234375,
      "model_forward_time": 0.11452841758728027,
      "step": 18351
    },
    {
      "epoch": 0.000112005615234375,
      "step": 18351,
      "training_step_time": 0.6783995628356934
    },
    {
      "epoch": 0.00011201171875,
      "model_forward_time": 0.11493825912475586,
      "step": 18352
    },
    {
      "epoch": 0.00011201171875,
      "step": 18352,
      "training_step_time": 0.3814995288848877
    },
    {
      "epoch": 0.000112017822265625,
      "model_forward_time": 0.1144254207611084,
      "step": 18353
    },
    {
      "epoch": 0.000112017822265625,
      "step": 18353,
      "training_step_time": 0.38185548782348633
    },
    {
      "epoch": 0.00011202392578125,
      "model_forward_time": 0.11486601829528809,
      "step": 18354
    },
    {
      "epoch": 0.00011202392578125,
      "step": 18354,
      "training_step_time": 0.4224371910095215
    },
    {
      "epoch": 0.000112030029296875,
      "model_forward_time": 0.11451935768127441,
      "step": 18355
    },
    {
      "epoch": 0.000112030029296875,
      "step": 18355,
      "training_step_time": 0.5091404914855957
    },
    {
      "epoch": 0.0001120361328125,
      "model_forward_time": 0.11446595191955566,
      "step": 18356
    },
    {
      "epoch": 0.0001120361328125,
      "step": 18356,
      "training_step_time": 0.4827842712402344
    },
    {
      "epoch": 0.000112042236328125,
      "model_forward_time": 0.11499619483947754,
      "step": 18357
    },
    {
      "epoch": 0.000112042236328125,
      "step": 18357,
      "training_step_time": 0.5146245956420898
    },
    {
      "epoch": 0.00011204833984375,
      "model_forward_time": 0.11453723907470703,
      "step": 18358
    },
    {
      "epoch": 0.00011204833984375,
      "step": 18358,
      "training_step_time": 0.42810750007629395
    },
    {
      "epoch": 0.000112054443359375,
      "model_forward_time": 0.11457157135009766,
      "step": 18359
    },
    {
      "epoch": 0.000112054443359375,
      "step": 18359,
      "training_step_time": 0.3947288990020752
    },
    {
      "epoch": 0.000112060546875,
      "grad_norm": 0.27270781993865967,
      "learning_rate": 8.31275623385675e-05,
      "loss": 0.0546,
      "step": 18360
    },
    {
      "epoch": 0.000112060546875,
      "model_forward_time": 0.11481475830078125,
      "step": 18360
    },
    {
      "epoch": 0.000112060546875,
      "step": 18360,
      "training_step_time": 0.3841230869293213
    },
    {
      "epoch": 0.000112066650390625,
      "model_forward_time": 0.1146852970123291,
      "step": 18361
    },
    {
      "epoch": 0.000112066650390625,
      "step": 18361,
      "training_step_time": 0.38885998725891113
    },
    {
      "epoch": 0.00011207275390625,
      "model_forward_time": 0.11513590812683105,
      "step": 18362
    },
    {
      "epoch": 0.00011207275390625,
      "step": 18362,
      "training_step_time": 0.38773393630981445
    },
    {
      "epoch": 0.000112078857421875,
      "model_forward_time": 0.11480879783630371,
      "step": 18363
    },
    {
      "epoch": 0.000112078857421875,
      "step": 18363,
      "training_step_time": 0.7359740734100342
    },
    {
      "epoch": 0.0001120849609375,
      "model_forward_time": 0.11482000350952148,
      "step": 18364
    },
    {
      "epoch": 0.0001120849609375,
      "step": 18364,
      "training_step_time": 0.4922161102294922
    },
    {
      "epoch": 0.000112091064453125,
      "model_forward_time": 0.11416959762573242,
      "step": 18365
    },
    {
      "epoch": 0.000112091064453125,
      "step": 18365,
      "training_step_time": 0.36746692657470703
    },
    {
      "epoch": 0.00011209716796875,
      "model_forward_time": 0.11443734169006348,
      "step": 18366
    },
    {
      "epoch": 0.00011209716796875,
      "step": 18366,
      "training_step_time": 0.3876941204071045
    },
    {
      "epoch": 0.000112103271484375,
      "model_forward_time": 0.11461544036865234,
      "step": 18367
    },
    {
      "epoch": 0.000112103271484375,
      "step": 18367,
      "training_step_time": 0.38400840759277344
    },
    {
      "epoch": 0.000112109375,
      "model_forward_time": 0.11511778831481934,
      "step": 18368
    },
    {
      "epoch": 0.000112109375,
      "step": 18368,
      "training_step_time": 0.4363224506378174
    },
    {
      "epoch": 0.000112115478515625,
      "model_forward_time": 0.11499595642089844,
      "step": 18369
    },
    {
      "epoch": 0.000112115478515625,
      "step": 18369,
      "training_step_time": 0.5509176254272461
    },
    {
      "epoch": 0.00011212158203125,
      "grad_norm": 0.23803642392158508,
      "learning_rate": 8.31069160360212e-05,
      "loss": 0.0584,
      "step": 18370
    },
    {
      "epoch": 0.00011212158203125,
      "model_forward_time": 0.11568260192871094,
      "step": 18370
    },
    {
      "epoch": 0.00011212158203125,
      "step": 18370,
      "training_step_time": 0.40415477752685547
    },
    {
      "epoch": 0.000112127685546875,
      "model_forward_time": 0.11519718170166016,
      "step": 18371
    },
    {
      "epoch": 0.000112127685546875,
      "step": 18371,
      "training_step_time": 0.39272165298461914
    },
    {
      "epoch": 0.0001121337890625,
      "model_forward_time": 0.11524105072021484,
      "step": 18372
    },
    {
      "epoch": 0.0001121337890625,
      "step": 18372,
      "training_step_time": 0.3910398483276367
    },
    {
      "epoch": 0.000112139892578125,
      "model_forward_time": 0.11442351341247559,
      "step": 18373
    },
    {
      "epoch": 0.000112139892578125,
      "step": 18373,
      "training_step_time": 0.40442824363708496
    },
    {
      "epoch": 0.00011214599609375,
      "model_forward_time": 0.11485409736633301,
      "step": 18374
    },
    {
      "epoch": 0.00011214599609375,
      "step": 18374,
      "training_step_time": 0.38083338737487793
    },
    {
      "epoch": 0.000112152099609375,
      "model_forward_time": 0.11496281623840332,
      "step": 18375
    },
    {
      "epoch": 0.000112152099609375,
      "step": 18375,
      "training_step_time": 0.7722814083099365
    },
    {
      "epoch": 0.000112158203125,
      "model_forward_time": 0.1151580810546875,
      "step": 18376
    },
    {
      "epoch": 0.000112158203125,
      "step": 18376,
      "training_step_time": 0.3895761966705322
    },
    {
      "epoch": 0.000112164306640625,
      "model_forward_time": 0.11404609680175781,
      "step": 18377
    },
    {
      "epoch": 0.000112164306640625,
      "step": 18377,
      "training_step_time": 0.41438817977905273
    },
    {
      "epoch": 0.00011217041015625,
      "model_forward_time": 0.11497735977172852,
      "step": 18378
    },
    {
      "epoch": 0.00011217041015625,
      "step": 18378,
      "training_step_time": 0.39223766326904297
    },
    {
      "epoch": 0.000112176513671875,
      "model_forward_time": 0.11452102661132812,
      "step": 18379
    },
    {
      "epoch": 0.000112176513671875,
      "step": 18379,
      "training_step_time": 0.40636277198791504
    },
    {
      "epoch": 0.0001121826171875,
      "grad_norm": 0.21327704191207886,
      "learning_rate": 8.308625967646795e-05,
      "loss": 0.054,
      "step": 18380
    },
    {
      "epoch": 0.0001121826171875,
      "model_forward_time": 0.11406087875366211,
      "step": 18380
    },
    {
      "epoch": 0.0001121826171875,
      "step": 18380,
      "training_step_time": 0.3878343105316162
    },
    {
      "epoch": 0.000112188720703125,
      "model_forward_time": 0.11513543128967285,
      "step": 18381
    },
    {
      "epoch": 0.000112188720703125,
      "step": 18381,
      "training_step_time": 0.7292964458465576
    },
    {
      "epoch": 0.00011219482421875,
      "model_forward_time": 0.1150052547454834,
      "step": 18382
    },
    {
      "epoch": 0.00011219482421875,
      "step": 18382,
      "training_step_time": 0.3891878128051758
    },
    {
      "epoch": 0.000112200927734375,
      "model_forward_time": 0.11470317840576172,
      "step": 18383
    },
    {
      "epoch": 0.000112200927734375,
      "step": 18383,
      "training_step_time": 0.4766368865966797
    },
    {
      "epoch": 0.00011220703125,
      "model_forward_time": 0.11408233642578125,
      "step": 18384
    },
    {
      "epoch": 0.00011220703125,
      "step": 18384,
      "training_step_time": 0.4058799743652344
    },
    {
      "epoch": 0.000112213134765625,
      "model_forward_time": 0.11446332931518555,
      "step": 18385
    },
    {
      "epoch": 0.000112213134765625,
      "step": 18385,
      "training_step_time": 0.39060258865356445
    },
    {
      "epoch": 0.00011221923828125,
      "model_forward_time": 0.11466431617736816,
      "step": 18386
    },
    {
      "epoch": 0.00011221923828125,
      "step": 18386,
      "training_step_time": 0.3861088752746582
    },
    {
      "epoch": 0.000112225341796875,
      "model_forward_time": 0.11617445945739746,
      "step": 18387
    },
    {
      "epoch": 0.000112225341796875,
      "step": 18387,
      "training_step_time": 0.5943899154663086
    },
    {
      "epoch": 0.0001122314453125,
      "model_forward_time": 0.1145944595336914,
      "step": 18388
    },
    {
      "epoch": 0.0001122314453125,
      "step": 18388,
      "training_step_time": 0.38559889793395996
    },
    {
      "epoch": 0.000112237548828125,
      "model_forward_time": 0.11529827117919922,
      "step": 18389
    },
    {
      "epoch": 0.000112237548828125,
      "step": 18389,
      "training_step_time": 0.38111209869384766
    },
    {
      "epoch": 0.00011224365234375,
      "grad_norm": 0.11867893487215042,
      "learning_rate": 8.306559326618259e-05,
      "loss": 0.0494,
      "step": 18390
    },
    {
      "epoch": 0.00011224365234375,
      "model_forward_time": 0.11545586585998535,
      "step": 18390
    },
    {
      "epoch": 0.00011224365234375,
      "step": 18390,
      "training_step_time": 0.3872957229614258
    },
    {
      "epoch": 0.000112249755859375,
      "model_forward_time": 0.11496138572692871,
      "step": 18391
    },
    {
      "epoch": 0.000112249755859375,
      "step": 18391,
      "training_step_time": 0.4251883029937744
    },
    {
      "epoch": 0.000112255859375,
      "model_forward_time": 0.11445069313049316,
      "step": 18392
    },
    {
      "epoch": 0.000112255859375,
      "step": 18392,
      "training_step_time": 0.38898777961730957
    },
    {
      "epoch": 0.000112261962890625,
      "model_forward_time": 0.11463117599487305,
      "step": 18393
    },
    {
      "epoch": 0.000112261962890625,
      "step": 18393,
      "training_step_time": 0.7980003356933594
    },
    {
      "epoch": 0.00011226806640625,
      "model_forward_time": 0.1142892837524414,
      "step": 18394
    },
    {
      "epoch": 0.00011226806640625,
      "step": 18394,
      "training_step_time": 0.3799457550048828
    },
    {
      "epoch": 0.000112274169921875,
      "model_forward_time": 0.11437630653381348,
      "step": 18395
    },
    {
      "epoch": 0.000112274169921875,
      "step": 18395,
      "training_step_time": 0.4056072235107422
    },
    {
      "epoch": 0.0001122802734375,
      "model_forward_time": 0.11481642723083496,
      "step": 18396
    },
    {
      "epoch": 0.0001122802734375,
      "step": 18396,
      "training_step_time": 0.4444131851196289
    },
    {
      "epoch": 0.000112286376953125,
      "model_forward_time": 0.11479640007019043,
      "step": 18397
    },
    {
      "epoch": 0.000112286376953125,
      "step": 18397,
      "training_step_time": 0.478118896484375
    },
    {
      "epoch": 0.00011229248046875,
      "model_forward_time": 0.1144413948059082,
      "step": 18398
    },
    {
      "epoch": 0.00011229248046875,
      "step": 18398,
      "training_step_time": 0.38192009925842285
    },
    {
      "epoch": 0.000112298583984375,
      "model_forward_time": 0.11496567726135254,
      "step": 18399
    },
    {
      "epoch": 0.000112298583984375,
      "step": 18399,
      "training_step_time": 0.5013699531555176
    },
    {
      "epoch": 0.0001123046875,
      "grad_norm": 0.1376318484544754,
      "learning_rate": 8.304491681144306e-05,
      "loss": 0.057,
      "step": 18400
    },
    {
      "epoch": 0.0001123046875,
      "model_forward_time": 0.11474132537841797,
      "step": 18400
    },
    {
      "epoch": 0.0001123046875,
      "step": 18400,
      "training_step_time": 0.38355135917663574
    },
    {
      "epoch": 0.000112310791015625,
      "model_forward_time": 0.1158602237701416,
      "step": 18401
    },
    {
      "epoch": 0.000112310791015625,
      "step": 18401,
      "training_step_time": 0.3825089931488037
    },
    {
      "epoch": 0.00011231689453125,
      "model_forward_time": 0.11448407173156738,
      "step": 18402
    },
    {
      "epoch": 0.00011231689453125,
      "step": 18402,
      "training_step_time": 0.4018135070800781
    },
    {
      "epoch": 0.000112322998046875,
      "model_forward_time": 0.11510252952575684,
      "step": 18403
    },
    {
      "epoch": 0.000112322998046875,
      "step": 18403,
      "training_step_time": 0.3915741443634033
    },
    {
      "epoch": 0.0001123291015625,
      "model_forward_time": 0.11532759666442871,
      "step": 18404
    },
    {
      "epoch": 0.0001123291015625,
      "step": 18404,
      "training_step_time": 0.4709010124206543
    },
    {
      "epoch": 0.000112335205078125,
      "model_forward_time": 0.11470150947570801,
      "step": 18405
    },
    {
      "epoch": 0.000112335205078125,
      "step": 18405,
      "training_step_time": 0.6419050693511963
    },
    {
      "epoch": 0.00011234130859375,
      "model_forward_time": 0.11475276947021484,
      "step": 18406
    },
    {
      "epoch": 0.00011234130859375,
      "step": 18406,
      "training_step_time": 0.41414475440979004
    },
    {
      "epoch": 0.000112347412109375,
      "model_forward_time": 0.11474323272705078,
      "step": 18407
    },
    {
      "epoch": 0.000112347412109375,
      "step": 18407,
      "training_step_time": 0.38892340660095215
    },
    {
      "epoch": 0.000112353515625,
      "model_forward_time": 0.11499214172363281,
      "step": 18408
    },
    {
      "epoch": 0.000112353515625,
      "step": 18408,
      "training_step_time": 0.44811582565307617
    },
    {
      "epoch": 0.000112359619140625,
      "model_forward_time": 0.1144859790802002,
      "step": 18409
    },
    {
      "epoch": 0.000112359619140625,
      "step": 18409,
      "training_step_time": 0.46589112281799316
    },
    {
      "epoch": 0.00011236572265625,
      "grad_norm": 0.17058774828910828,
      "learning_rate": 8.30242303185303e-05,
      "loss": 0.0545,
      "step": 18410
    },
    {
      "epoch": 0.00011236572265625,
      "model_forward_time": 0.1153709888458252,
      "step": 18410
    },
    {
      "epoch": 0.00011236572265625,
      "step": 18410,
      "training_step_time": 0.4606955051422119
    },
    {
      "epoch": 0.000112371826171875,
      "model_forward_time": 0.11444687843322754,
      "step": 18411
    },
    {
      "epoch": 0.000112371826171875,
      "step": 18411,
      "training_step_time": 0.49959444999694824
    },
    {
      "epoch": 0.0001123779296875,
      "model_forward_time": 0.11518120765686035,
      "step": 18412
    },
    {
      "epoch": 0.0001123779296875,
      "step": 18412,
      "training_step_time": 0.38045406341552734
    },
    {
      "epoch": 0.000112384033203125,
      "model_forward_time": 0.11475062370300293,
      "step": 18413
    },
    {
      "epoch": 0.000112384033203125,
      "step": 18413,
      "training_step_time": 0.387376070022583
    },
    {
      "epoch": 0.00011239013671875,
      "model_forward_time": 0.11461615562438965,
      "step": 18414
    },
    {
      "epoch": 0.00011239013671875,
      "step": 18414,
      "training_step_time": 0.4038705825805664
    },
    {
      "epoch": 0.000112396240234375,
      "model_forward_time": 0.11477804183959961,
      "step": 18415
    },
    {
      "epoch": 0.000112396240234375,
      "step": 18415,
      "training_step_time": 0.3880743980407715
    },
    {
      "epoch": 0.00011240234375,
      "model_forward_time": 0.11478424072265625,
      "step": 18416
    },
    {
      "epoch": 0.00011240234375,
      "step": 18416,
      "training_step_time": 0.3903357982635498
    },
    {
      "epoch": 0.000112408447265625,
      "model_forward_time": 0.11601114273071289,
      "step": 18417
    },
    {
      "epoch": 0.000112408447265625,
      "step": 18417,
      "training_step_time": 0.7499740123748779
    },
    {
      "epoch": 0.00011241455078125,
      "model_forward_time": 0.11529016494750977,
      "step": 18418
    },
    {
      "epoch": 0.00011241455078125,
      "step": 18418,
      "training_step_time": 0.4457097053527832
    },
    {
      "epoch": 0.000112420654296875,
      "model_forward_time": 0.1141352653503418,
      "step": 18419
    },
    {
      "epoch": 0.000112420654296875,
      "step": 18419,
      "training_step_time": 0.4478907585144043
    },
    {
      "epoch": 0.0001124267578125,
      "grad_norm": 0.17472508549690247,
      "learning_rate": 8.300353379372834e-05,
      "loss": 0.0513,
      "step": 18420
    },
    {
      "epoch": 0.0001124267578125,
      "model_forward_time": 0.11388492584228516,
      "step": 18420
    },
    {
      "epoch": 0.0001124267578125,
      "step": 18420,
      "training_step_time": 0.3962562084197998
    },
    {
      "epoch": 0.000112432861328125,
      "model_forward_time": 0.11409831047058105,
      "step": 18421
    },
    {
      "epoch": 0.000112432861328125,
      "step": 18421,
      "training_step_time": 0.38315820693969727
    },
    {
      "epoch": 0.00011243896484375,
      "model_forward_time": 0.11458802223205566,
      "step": 18422
    },
    {
      "epoch": 0.00011243896484375,
      "step": 18422,
      "training_step_time": 0.40703248977661133
    },
    {
      "epoch": 0.000112445068359375,
      "model_forward_time": 0.11574053764343262,
      "step": 18423
    },
    {
      "epoch": 0.000112445068359375,
      "step": 18423,
      "training_step_time": 0.6250457763671875
    },
    {
      "epoch": 0.000112451171875,
      "model_forward_time": 0.11544442176818848,
      "step": 18424
    },
    {
      "epoch": 0.000112451171875,
      "step": 18424,
      "training_step_time": 0.4769268035888672
    },
    {
      "epoch": 0.000112457275390625,
      "model_forward_time": 0.11493420600891113,
      "step": 18425
    },
    {
      "epoch": 0.000112457275390625,
      "step": 18425,
      "training_step_time": 0.4075009822845459
    },
    {
      "epoch": 0.00011246337890625,
      "model_forward_time": 0.11438965797424316,
      "step": 18426
    },
    {
      "epoch": 0.00011246337890625,
      "step": 18426,
      "training_step_time": 0.4011201858520508
    },
    {
      "epoch": 0.000112469482421875,
      "model_forward_time": 0.11456966400146484,
      "step": 18427
    },
    {
      "epoch": 0.000112469482421875,
      "step": 18427,
      "training_step_time": 0.3861501216888428
    },
    {
      "epoch": 0.0001124755859375,
      "model_forward_time": 0.11429047584533691,
      "step": 18428
    },
    {
      "epoch": 0.0001124755859375,
      "step": 18428,
      "training_step_time": 0.3878180980682373
    },
    {
      "epoch": 0.000112481689453125,
      "model_forward_time": 0.11486339569091797,
      "step": 18429
    },
    {
      "epoch": 0.000112481689453125,
      "step": 18429,
      "training_step_time": 0.6556849479675293
    },
    {
      "epoch": 0.00011248779296875,
      "grad_norm": 0.13303805887699127,
      "learning_rate": 8.298282724332419e-05,
      "loss": 0.0528,
      "step": 18430
    },
    {
      "epoch": 0.00011248779296875,
      "model_forward_time": 0.1155402660369873,
      "step": 18430
    },
    {
      "epoch": 0.00011248779296875,
      "step": 18430,
      "training_step_time": 0.38036203384399414
    },
    {
      "epoch": 0.000112493896484375,
      "model_forward_time": 0.1144247055053711,
      "step": 18431
    },
    {
      "epoch": 0.000112493896484375,
      "step": 18431,
      "training_step_time": 0.44651365280151367
    },
    {
      "epoch": 0.0001125,
      "model_forward_time": 0.11475467681884766,
      "step": 18432
    },
    {
      "epoch": 0.0001125,
      "step": 18432,
      "training_step_time": 0.42035913467407227
    },
    {
      "epoch": 0.000112506103515625,
      "model_forward_time": 0.11551737785339355,
      "step": 18433
    },
    {
      "epoch": 0.000112506103515625,
      "step": 18433,
      "training_step_time": 0.4700157642364502
    },
    {
      "epoch": 0.00011251220703125,
      "model_forward_time": 0.1153402328491211,
      "step": 18434
    },
    {
      "epoch": 0.00011251220703125,
      "step": 18434,
      "training_step_time": 0.38982653617858887
    },
    {
      "epoch": 0.000112518310546875,
      "model_forward_time": 0.11490893363952637,
      "step": 18435
    },
    {
      "epoch": 0.000112518310546875,
      "step": 18435,
      "training_step_time": 0.6348466873168945
    },
    {
      "epoch": 0.0001125244140625,
      "model_forward_time": 0.11529064178466797,
      "step": 18436
    },
    {
      "epoch": 0.0001125244140625,
      "step": 18436,
      "training_step_time": 0.38417577743530273
    },
    {
      "epoch": 0.000112530517578125,
      "model_forward_time": 0.11450815200805664,
      "step": 18437
    },
    {
      "epoch": 0.000112530517578125,
      "step": 18437,
      "training_step_time": 0.4230384826660156
    },
    {
      "epoch": 0.00011253662109375,
      "model_forward_time": 0.11465191841125488,
      "step": 18438
    },
    {
      "epoch": 0.00011253662109375,
      "step": 18438,
      "training_step_time": 0.4642024040222168
    },
    {
      "epoch": 0.000112542724609375,
      "model_forward_time": 0.11437630653381348,
      "step": 18439
    },
    {
      "epoch": 0.000112542724609375,
      "step": 18439,
      "training_step_time": 0.3901858329772949
    },
    {
      "epoch": 0.000112548828125,
      "grad_norm": 0.18063724040985107,
      "learning_rate": 8.2962110673608e-05,
      "loss": 0.054,
      "step": 18440
    },
    {
      "epoch": 0.000112548828125,
      "model_forward_time": 0.11433267593383789,
      "step": 18440
    },
    {
      "epoch": 0.000112548828125,
      "step": 18440,
      "training_step_time": 0.38039445877075195
    },
    {
      "epoch": 0.000112554931640625,
      "model_forward_time": 0.11512231826782227,
      "step": 18441
    },
    {
      "epoch": 0.000112554931640625,
      "step": 18441,
      "training_step_time": 0.6101980209350586
    },
    {
      "epoch": 0.00011256103515625,
      "model_forward_time": 0.11464309692382812,
      "step": 18442
    },
    {
      "epoch": 0.00011256103515625,
      "step": 18442,
      "training_step_time": 0.3835740089416504
    },
    {
      "epoch": 0.000112567138671875,
      "model_forward_time": 0.11531472206115723,
      "step": 18443
    },
    {
      "epoch": 0.000112567138671875,
      "step": 18443,
      "training_step_time": 0.38023853302001953
    },
    {
      "epoch": 0.0001125732421875,
      "model_forward_time": 0.11557745933532715,
      "step": 18444
    },
    {
      "epoch": 0.0001125732421875,
      "step": 18444,
      "training_step_time": 0.3979458808898926
    },
    {
      "epoch": 0.000112579345703125,
      "model_forward_time": 0.11529803276062012,
      "step": 18445
    },
    {
      "epoch": 0.000112579345703125,
      "step": 18445,
      "training_step_time": 0.3877840042114258
    },
    {
      "epoch": 0.00011258544921875,
      "model_forward_time": 0.11492919921875,
      "step": 18446
    },
    {
      "epoch": 0.00011258544921875,
      "step": 18446,
      "training_step_time": 0.3995027542114258
    },
    {
      "epoch": 0.000112591552734375,
      "model_forward_time": 0.11536622047424316,
      "step": 18447
    },
    {
      "epoch": 0.000112591552734375,
      "step": 18447,
      "training_step_time": 0.7913954257965088
    },
    {
      "epoch": 0.00011259765625,
      "model_forward_time": 0.1145162582397461,
      "step": 18448
    },
    {
      "epoch": 0.00011259765625,
      "step": 18448,
      "training_step_time": 0.3797121047973633
    },
    {
      "epoch": 0.000112603759765625,
      "model_forward_time": 0.11450386047363281,
      "step": 18449
    },
    {
      "epoch": 0.000112603759765625,
      "step": 18449,
      "training_step_time": 0.38678407669067383
    },
    {
      "epoch": 0.00011260986328125,
      "grad_norm": 0.20908454060554504,
      "learning_rate": 8.29413840908729e-05,
      "loss": 0.0515,
      "step": 18450
    },
    {
      "epoch": 0.00011260986328125,
      "model_forward_time": 0.11475229263305664,
      "step": 18450
    },
    {
      "epoch": 0.00011260986328125,
      "step": 18450,
      "training_step_time": 0.3927767276763916
    },
    {
      "epoch": 0.000112615966796875,
      "model_forward_time": 0.11620020866394043,
      "step": 18451
    },
    {
      "epoch": 0.000112615966796875,
      "step": 18451,
      "training_step_time": 0.4115879535675049
    },
    {
      "epoch": 0.0001126220703125,
      "model_forward_time": 0.11490774154663086,
      "step": 18452
    },
    {
      "epoch": 0.0001126220703125,
      "step": 18452,
      "training_step_time": 0.3889122009277344
    },
    {
      "epoch": 0.000112628173828125,
      "model_forward_time": 0.11466002464294434,
      "step": 18453
    },
    {
      "epoch": 0.000112628173828125,
      "step": 18453,
      "training_step_time": 0.7852094173431396
    },
    {
      "epoch": 0.00011263427734375,
      "model_forward_time": 0.11438274383544922,
      "step": 18454
    },
    {
      "epoch": 0.00011263427734375,
      "step": 18454,
      "training_step_time": 0.3858804702758789
    },
    {
      "epoch": 0.000112640380859375,
      "model_forward_time": 0.11426377296447754,
      "step": 18455
    },
    {
      "epoch": 0.000112640380859375,
      "step": 18455,
      "training_step_time": 0.3872795104980469
    },
    {
      "epoch": 0.000112646484375,
      "model_forward_time": 0.11389398574829102,
      "step": 18456
    },
    {
      "epoch": 0.000112646484375,
      "step": 18456,
      "training_step_time": 0.38944411277770996
    },
    {
      "epoch": 0.000112652587890625,
      "model_forward_time": 0.11401557922363281,
      "step": 18457
    },
    {
      "epoch": 0.000112652587890625,
      "step": 18457,
      "training_step_time": 0.39963364601135254
    },
    {
      "epoch": 0.00011265869140625,
      "model_forward_time": 0.11454963684082031,
      "step": 18458
    },
    {
      "epoch": 0.00011265869140625,
      "step": 18458,
      "training_step_time": 0.395719051361084
    },
    {
      "epoch": 0.000112664794921875,
      "model_forward_time": 0.11491632461547852,
      "step": 18459
    },
    {
      "epoch": 0.000112664794921875,
      "step": 18459,
      "training_step_time": 0.838778018951416
    },
    {
      "epoch": 0.0001126708984375,
      "grad_norm": 0.21379174292087555,
      "learning_rate": 8.292064750141509e-05,
      "loss": 0.053,
      "step": 18460
    },
    {
      "epoch": 0.0001126708984375,
      "model_forward_time": 0.11456942558288574,
      "step": 18460
    },
    {
      "epoch": 0.0001126708984375,
      "step": 18460,
      "training_step_time": 0.3920118808746338
    },
    {
      "epoch": 0.000112677001953125,
      "model_forward_time": 0.11504507064819336,
      "step": 18461
    },
    {
      "epoch": 0.000112677001953125,
      "step": 18461,
      "training_step_time": 0.41060900688171387
    },
    {
      "epoch": 0.00011268310546875,
      "model_forward_time": 0.11429738998413086,
      "step": 18462
    },
    {
      "epoch": 0.00011268310546875,
      "step": 18462,
      "training_step_time": 0.43771910667419434
    },
    {
      "epoch": 0.000112689208984375,
      "model_forward_time": 0.1145317554473877,
      "step": 18463
    },
    {
      "epoch": 0.000112689208984375,
      "step": 18463,
      "training_step_time": 0.4714322090148926
    },
    {
      "epoch": 0.0001126953125,
      "model_forward_time": 0.11512422561645508,
      "step": 18464
    },
    {
      "epoch": 0.0001126953125,
      "step": 18464,
      "training_step_time": 0.40894651412963867
    },
    {
      "epoch": 0.000112701416015625,
      "model_forward_time": 0.11495184898376465,
      "step": 18465
    },
    {
      "epoch": 0.000112701416015625,
      "step": 18465,
      "training_step_time": 0.6076164245605469
    },
    {
      "epoch": 0.00011270751953125,
      "model_forward_time": 0.11491894721984863,
      "step": 18466
    },
    {
      "epoch": 0.00011270751953125,
      "step": 18466,
      "training_step_time": 0.3887183666229248
    },
    {
      "epoch": 0.000112713623046875,
      "model_forward_time": 0.11434364318847656,
      "step": 18467
    },
    {
      "epoch": 0.000112713623046875,
      "step": 18467,
      "training_step_time": 0.37793684005737305
    },
    {
      "epoch": 0.0001127197265625,
      "model_forward_time": 0.11427903175354004,
      "step": 18468
    },
    {
      "epoch": 0.0001127197265625,
      "step": 18468,
      "training_step_time": 0.3825857639312744
    },
    {
      "epoch": 0.000112725830078125,
      "model_forward_time": 0.11567831039428711,
      "step": 18469
    },
    {
      "epoch": 0.000112725830078125,
      "step": 18469,
      "training_step_time": 0.3930203914642334
    },
    {
      "epoch": 0.00011273193359375,
      "grad_norm": 0.14653486013412476,
      "learning_rate": 8.289990091153376e-05,
      "loss": 0.05,
      "step": 18470
    },
    {
      "epoch": 0.00011273193359375,
      "model_forward_time": 0.11531710624694824,
      "step": 18470
    },
    {
      "epoch": 0.00011273193359375,
      "step": 18470,
      "training_step_time": 0.3889334201812744
    },
    {
      "epoch": 0.000112738037109375,
      "model_forward_time": 0.11486244201660156,
      "step": 18471
    },
    {
      "epoch": 0.000112738037109375,
      "step": 18471,
      "training_step_time": 0.8790061473846436
    },
    {
      "epoch": 0.000112744140625,
      "model_forward_time": 0.11507248878479004,
      "step": 18472
    },
    {
      "epoch": 0.000112744140625,
      "step": 18472,
      "training_step_time": 0.4427459239959717
    },
    {
      "epoch": 0.000112750244140625,
      "model_forward_time": 0.11405181884765625,
      "step": 18473
    },
    {
      "epoch": 0.000112750244140625,
      "step": 18473,
      "training_step_time": 0.43353748321533203
    },
    {
      "epoch": 0.00011275634765625,
      "model_forward_time": 0.11420392990112305,
      "step": 18474
    },
    {
      "epoch": 0.00011275634765625,
      "step": 18474,
      "training_step_time": 0.4024512767791748
    },
    {
      "epoch": 0.000112762451171875,
      "model_forward_time": 0.11441516876220703,
      "step": 18475
    },
    {
      "epoch": 0.000112762451171875,
      "step": 18475,
      "training_step_time": 0.384448766708374
    },
    {
      "epoch": 0.0001127685546875,
      "model_forward_time": 0.11501765251159668,
      "step": 18476
    },
    {
      "epoch": 0.0001127685546875,
      "step": 18476,
      "training_step_time": 0.3894162178039551
    },
    {
      "epoch": 0.000112774658203125,
      "model_forward_time": 0.1155238151550293,
      "step": 18477
    },
    {
      "epoch": 0.000112774658203125,
      "step": 18477,
      "training_step_time": 0.5520596504211426
    },
    {
      "epoch": 0.00011278076171875,
      "model_forward_time": 0.11482477188110352,
      "step": 18478
    },
    {
      "epoch": 0.00011278076171875,
      "step": 18478,
      "training_step_time": 0.45584869384765625
    },
    {
      "epoch": 0.000112786865234375,
      "model_forward_time": 0.11508870124816895,
      "step": 18479
    },
    {
      "epoch": 0.000112786865234375,
      "step": 18479,
      "training_step_time": 0.3922567367553711
    },
    {
      "epoch": 0.00011279296875,
      "grad_norm": 0.1427326202392578,
      "learning_rate": 8.287914432753123e-05,
      "loss": 0.0465,
      "step": 18480
    },
    {
      "epoch": 0.00011279296875,
      "model_forward_time": 0.11466169357299805,
      "step": 18480
    },
    {
      "epoch": 0.00011279296875,
      "step": 18480,
      "training_step_time": 0.39897847175598145
    },
    {
      "epoch": 0.000112799072265625,
      "model_forward_time": 0.11484694480895996,
      "step": 18481
    },
    {
      "epoch": 0.000112799072265625,
      "step": 18481,
      "training_step_time": 0.39249706268310547
    },
    {
      "epoch": 0.00011280517578125,
      "model_forward_time": 0.11523008346557617,
      "step": 18482
    },
    {
      "epoch": 0.00011280517578125,
      "step": 18482,
      "training_step_time": 0.39670705795288086
    },
    {
      "epoch": 0.000112811279296875,
      "model_forward_time": 0.11536359786987305,
      "step": 18483
    },
    {
      "epoch": 0.000112811279296875,
      "step": 18483,
      "training_step_time": 0.6553230285644531
    },
    {
      "epoch": 0.0001128173828125,
      "model_forward_time": 0.11406779289245605,
      "step": 18484
    },
    {
      "epoch": 0.0001128173828125,
      "step": 18484,
      "training_step_time": 0.40880465507507324
    },
    {
      "epoch": 0.000112823486328125,
      "model_forward_time": 0.1143331527709961,
      "step": 18485
    },
    {
      "epoch": 0.000112823486328125,
      "step": 18485,
      "training_step_time": 0.402498722076416
    },
    {
      "epoch": 0.00011282958984375,
      "model_forward_time": 0.11498093605041504,
      "step": 18486
    },
    {
      "epoch": 0.00011282958984375,
      "step": 18486,
      "training_step_time": 0.42908573150634766
    },
    {
      "epoch": 0.000112835693359375,
      "model_forward_time": 0.1147458553314209,
      "step": 18487
    },
    {
      "epoch": 0.000112835693359375,
      "step": 18487,
      "training_step_time": 0.46712613105773926
    },
    {
      "epoch": 0.000112841796875,
      "model_forward_time": 0.11439967155456543,
      "step": 18488
    },
    {
      "epoch": 0.000112841796875,
      "step": 18488,
      "training_step_time": 0.3983292579650879
    },
    {
      "epoch": 0.000112847900390625,
      "model_forward_time": 0.11511826515197754,
      "step": 18489
    },
    {
      "epoch": 0.000112847900390625,
      "step": 18489,
      "training_step_time": 0.6022281646728516
    },
    {
      "epoch": 0.00011285400390625,
      "grad_norm": 0.20413023233413696,
      "learning_rate": 8.285837775571276e-05,
      "loss": 0.0508,
      "step": 18490
    },
    {
      "epoch": 0.00011285400390625,
      "model_forward_time": 0.11497163772583008,
      "step": 18490
    },
    {
      "epoch": 0.00011285400390625,
      "step": 18490,
      "training_step_time": 0.36874961853027344
    },
    {
      "epoch": 0.000112860107421875,
      "model_forward_time": 0.11424112319946289,
      "step": 18491
    },
    {
      "epoch": 0.000112860107421875,
      "step": 18491,
      "training_step_time": 0.4408571720123291
    },
    {
      "epoch": 0.0001128662109375,
      "model_forward_time": 0.11403632164001465,
      "step": 18492
    },
    {
      "epoch": 0.0001128662109375,
      "step": 18492,
      "training_step_time": 0.44220781326293945
    },
    {
      "epoch": 0.000112872314453125,
      "model_forward_time": 0.11514496803283691,
      "step": 18493
    },
    {
      "epoch": 0.000112872314453125,
      "step": 18493,
      "training_step_time": 0.39156413078308105
    },
    {
      "epoch": 0.00011287841796875,
      "model_forward_time": 0.11454987525939941,
      "step": 18494
    },
    {
      "epoch": 0.00011287841796875,
      "step": 18494,
      "training_step_time": 0.39008021354675293
    },
    {
      "epoch": 0.000112884521484375,
      "model_forward_time": 0.11504554748535156,
      "step": 18495
    },
    {
      "epoch": 0.000112884521484375,
      "step": 18495,
      "training_step_time": 0.6683614253997803
    },
    {
      "epoch": 0.000112890625,
      "model_forward_time": 0.11440372467041016,
      "step": 18496
    },
    {
      "epoch": 0.000112890625,
      "step": 18496,
      "training_step_time": 0.3872227668762207
    },
    {
      "epoch": 0.000112896728515625,
      "model_forward_time": 0.11456465721130371,
      "step": 18497
    },
    {
      "epoch": 0.000112896728515625,
      "step": 18497,
      "training_step_time": 0.38686418533325195
    },
    {
      "epoch": 0.00011290283203125,
      "model_forward_time": 0.11539411544799805,
      "step": 18498
    },
    {
      "epoch": 0.00011290283203125,
      "step": 18498,
      "training_step_time": 0.48370981216430664
    },
    {
      "epoch": 0.000112908935546875,
      "model_forward_time": 0.11390519142150879,
      "step": 18499
    },
    {
      "epoch": 0.000112908935546875,
      "step": 18499,
      "training_step_time": 0.40383005142211914
    },
    {
      "epoch": 0.0001129150390625,
      "grad_norm": 0.1312006562948227,
      "learning_rate": 8.283760120238672e-05,
      "loss": 0.0497,
      "step": 18500
    },
    {
      "epoch": 0.0001129150390625,
      "model_forward_time": 0.11490249633789062,
      "step": 18500
    },
    {
      "epoch": 0.0001129150390625,
      "step": 18500,
      "training_step_time": 0.46693849563598633
    },
    {
      "epoch": 0.000112921142578125,
      "model_forward_time": 0.11437678337097168,
      "step": 18501
    },
    {
      "epoch": 0.000112921142578125,
      "step": 18501,
      "training_step_time": 0.5526976585388184
    },
    {
      "epoch": 0.00011292724609375,
      "model_forward_time": 0.11492013931274414,
      "step": 18502
    },
    {
      "epoch": 0.00011292724609375,
      "step": 18502,
      "training_step_time": 0.4784393310546875
    },
    {
      "epoch": 0.000112933349609375,
      "model_forward_time": 0.1148526668548584,
      "step": 18503
    },
    {
      "epoch": 0.000112933349609375,
      "step": 18503,
      "training_step_time": 0.4657156467437744
    },
    {
      "epoch": 0.000112939453125,
      "model_forward_time": 0.11465239524841309,
      "step": 18504
    },
    {
      "epoch": 0.000112939453125,
      "step": 18504,
      "training_step_time": 0.43161654472351074
    },
    {
      "epoch": 0.000112945556640625,
      "model_forward_time": 0.11504459381103516,
      "step": 18505
    },
    {
      "epoch": 0.000112945556640625,
      "step": 18505,
      "training_step_time": 0.5124022960662842
    },
    {
      "epoch": 0.00011295166015625,
      "model_forward_time": 0.11446714401245117,
      "step": 18506
    },
    {
      "epoch": 0.00011295166015625,
      "step": 18506,
      "training_step_time": 0.4271862506866455
    },
    {
      "epoch": 0.000112957763671875,
      "model_forward_time": 0.11461925506591797,
      "step": 18507
    },
    {
      "epoch": 0.000112957763671875,
      "step": 18507,
      "training_step_time": 0.5051655769348145
    },
    {
      "epoch": 0.0001129638671875,
      "model_forward_time": 0.11476564407348633,
      "step": 18508
    },
    {
      "epoch": 0.0001129638671875,
      "step": 18508,
      "training_step_time": 0.40122509002685547
    },
    {
      "epoch": 0.000112969970703125,
      "model_forward_time": 0.11420297622680664,
      "step": 18509
    },
    {
      "epoch": 0.000112969970703125,
      "step": 18509,
      "training_step_time": 0.3824470043182373
    },
    {
      "epoch": 0.00011297607421875,
      "grad_norm": 0.18361066281795502,
      "learning_rate": 8.281681467386446e-05,
      "loss": 0.0531,
      "step": 18510
    },
    {
      "epoch": 0.00011297607421875,
      "model_forward_time": 0.11445999145507812,
      "step": 18510
    },
    {
      "epoch": 0.00011297607421875,
      "step": 18510,
      "training_step_time": 0.39557671546936035
    },
    {
      "epoch": 0.000112982177734375,
      "model_forward_time": 0.11596846580505371,
      "step": 18511
    },
    {
      "epoch": 0.000112982177734375,
      "step": 18511,
      "training_step_time": 0.41672277450561523
    },
    {
      "epoch": 0.00011298828125,
      "model_forward_time": 0.11479043960571289,
      "step": 18512
    },
    {
      "epoch": 0.00011298828125,
      "step": 18512,
      "training_step_time": 0.4090087413787842
    },
    {
      "epoch": 0.000112994384765625,
      "model_forward_time": 0.11491918563842773,
      "step": 18513
    },
    {
      "epoch": 0.000112994384765625,
      "step": 18513,
      "training_step_time": 0.7526400089263916
    },
    {
      "epoch": 0.00011300048828125,
      "model_forward_time": 0.11502718925476074,
      "step": 18514
    },
    {
      "epoch": 0.00011300048828125,
      "step": 18514,
      "training_step_time": 0.3953518867492676
    },
    {
      "epoch": 0.000113006591796875,
      "model_forward_time": 0.11473798751831055,
      "step": 18515
    },
    {
      "epoch": 0.000113006591796875,
      "step": 18515,
      "training_step_time": 0.3917965888977051
    },
    {
      "epoch": 0.0001130126953125,
      "model_forward_time": 0.11499571800231934,
      "step": 18516
    },
    {
      "epoch": 0.0001130126953125,
      "step": 18516,
      "training_step_time": 0.39536619186401367
    },
    {
      "epoch": 0.000113018798828125,
      "model_forward_time": 0.11448431015014648,
      "step": 18517
    },
    {
      "epoch": 0.000113018798828125,
      "step": 18517,
      "training_step_time": 0.42838096618652344
    },
    {
      "epoch": 0.00011302490234375,
      "model_forward_time": 0.11398696899414062,
      "step": 18518
    },
    {
      "epoch": 0.00011302490234375,
      "step": 18518,
      "training_step_time": 0.36541748046875
    },
    {
      "epoch": 0.000113031005859375,
      "model_forward_time": 0.11456418037414551,
      "step": 18519
    },
    {
      "epoch": 0.000113031005859375,
      "step": 18519,
      "training_step_time": 0.7124905586242676
    },
    {
      "epoch": 0.000113037109375,
      "grad_norm": 0.18378794193267822,
      "learning_rate": 8.279601817646036e-05,
      "loss": 0.0485,
      "step": 18520
    },
    {
      "epoch": 0.000113037109375,
      "model_forward_time": 0.11430811882019043,
      "step": 18520
    },
    {
      "epoch": 0.000113037109375,
      "step": 18520,
      "training_step_time": 0.4610164165496826
    },
    {
      "epoch": 0.000113043212890625,
      "model_forward_time": 0.11409401893615723,
      "step": 18521
    },
    {
      "epoch": 0.000113043212890625,
      "step": 18521,
      "training_step_time": 0.4017674922943115
    },
    {
      "epoch": 0.00011304931640625,
      "model_forward_time": 0.11477398872375488,
      "step": 18522
    },
    {
      "epoch": 0.00011304931640625,
      "step": 18522,
      "training_step_time": 0.38759303092956543
    },
    {
      "epoch": 0.000113055419921875,
      "model_forward_time": 0.11475419998168945,
      "step": 18523
    },
    {
      "epoch": 0.000113055419921875,
      "step": 18523,
      "training_step_time": 0.3965132236480713
    },
    {
      "epoch": 0.0001130615234375,
      "model_forward_time": 0.11389350891113281,
      "step": 18524
    },
    {
      "epoch": 0.0001130615234375,
      "step": 18524,
      "training_step_time": 0.4480011463165283
    },
    {
      "epoch": 0.000113067626953125,
      "model_forward_time": 0.11442112922668457,
      "step": 18525
    },
    {
      "epoch": 0.000113067626953125,
      "step": 18525,
      "training_step_time": 0.4062631130218506
    },
    {
      "epoch": 0.00011307373046875,
      "model_forward_time": 0.11505579948425293,
      "step": 18526
    },
    {
      "epoch": 0.00011307373046875,
      "step": 18526,
      "training_step_time": 0.4000537395477295
    },
    {
      "epoch": 0.000113079833984375,
      "model_forward_time": 0.11486124992370605,
      "step": 18527
    },
    {
      "epoch": 0.000113079833984375,
      "step": 18527,
      "training_step_time": 0.4739518165588379
    },
    {
      "epoch": 0.0001130859375,
      "model_forward_time": 0.11432504653930664,
      "step": 18528
    },
    {
      "epoch": 0.0001130859375,
      "step": 18528,
      "training_step_time": 0.40234899520874023
    },
    {
      "epoch": 0.000113092041015625,
      "model_forward_time": 0.11507153511047363,
      "step": 18529
    },
    {
      "epoch": 0.000113092041015625,
      "step": 18529,
      "training_step_time": 0.3997688293457031
    },
    {
      "epoch": 0.00011309814453125,
      "grad_norm": 0.18125228583812714,
      "learning_rate": 8.277521171649189e-05,
      "loss": 0.0519,
      "step": 18530
    },
    {
      "epoch": 0.00011309814453125,
      "model_forward_time": 0.11501264572143555,
      "step": 18530
    },
    {
      "epoch": 0.00011309814453125,
      "step": 18530,
      "training_step_time": 0.4786686897277832
    },
    {
      "epoch": 0.000113104248046875,
      "model_forward_time": 0.11447000503540039,
      "step": 18531
    },
    {
      "epoch": 0.000113104248046875,
      "step": 18531,
      "training_step_time": 0.467423677444458
    },
    {
      "epoch": 0.0001131103515625,
      "model_forward_time": 0.11449193954467773,
      "step": 18532
    },
    {
      "epoch": 0.0001131103515625,
      "step": 18532,
      "training_step_time": 0.3660311698913574
    },
    {
      "epoch": 0.000113116455078125,
      "model_forward_time": 0.11447906494140625,
      "step": 18533
    },
    {
      "epoch": 0.000113116455078125,
      "step": 18533,
      "training_step_time": 0.4529294967651367
    },
    {
      "epoch": 0.00011312255859375,
      "model_forward_time": 0.11513733863830566,
      "step": 18534
    },
    {
      "epoch": 0.00011312255859375,
      "step": 18534,
      "training_step_time": 0.4065067768096924
    },
    {
      "epoch": 0.000113128662109375,
      "model_forward_time": 0.11405467987060547,
      "step": 18535
    },
    {
      "epoch": 0.000113128662109375,
      "step": 18535,
      "training_step_time": 0.3930540084838867
    },
    {
      "epoch": 0.000113134765625,
      "model_forward_time": 0.11575937271118164,
      "step": 18536
    },
    {
      "epoch": 0.000113134765625,
      "step": 18536,
      "training_step_time": 0.4023158550262451
    },
    {
      "epoch": 0.000113140869140625,
      "model_forward_time": 0.11457324028015137,
      "step": 18537
    },
    {
      "epoch": 0.000113140869140625,
      "step": 18537,
      "training_step_time": 0.4493584632873535
    },
    {
      "epoch": 0.00011314697265625,
      "model_forward_time": 0.1152486801147461,
      "step": 18538
    },
    {
      "epoch": 0.00011314697265625,
      "step": 18538,
      "training_step_time": 0.3924601078033447
    },
    {
      "epoch": 0.000113153076171875,
      "model_forward_time": 0.11475038528442383,
      "step": 18539
    },
    {
      "epoch": 0.000113153076171875,
      "step": 18539,
      "training_step_time": 0.48189520835876465
    },
    {
      "epoch": 0.0001131591796875,
      "grad_norm": 0.13571201264858246,
      "learning_rate": 8.275439530027948e-05,
      "loss": 0.0581,
      "step": 18540
    },
    {
      "epoch": 0.0001131591796875,
      "model_forward_time": 0.11542916297912598,
      "step": 18540
    },
    {
      "epoch": 0.0001131591796875,
      "step": 18540,
      "training_step_time": 0.45552563667297363
    },
    {
      "epoch": 0.000113165283203125,
      "model_forward_time": 0.1156163215637207,
      "step": 18541
    },
    {
      "epoch": 0.000113165283203125,
      "step": 18541,
      "training_step_time": 0.46039390563964844
    },
    {
      "epoch": 0.00011317138671875,
      "model_forward_time": 0.11530375480651855,
      "step": 18542
    },
    {
      "epoch": 0.00011317138671875,
      "step": 18542,
      "training_step_time": 0.39775824546813965
    },
    {
      "epoch": 0.000113177490234375,
      "model_forward_time": 0.1148216724395752,
      "step": 18543
    },
    {
      "epoch": 0.000113177490234375,
      "step": 18543,
      "training_step_time": 0.43825531005859375
    },
    {
      "epoch": 0.00011318359375,
      "model_forward_time": 0.1148996353149414,
      "step": 18544
    },
    {
      "epoch": 0.00011318359375,
      "step": 18544,
      "training_step_time": 0.3965444564819336
    },
    {
      "epoch": 0.000113189697265625,
      "model_forward_time": 0.11549973487854004,
      "step": 18545
    },
    {
      "epoch": 0.000113189697265625,
      "step": 18545,
      "training_step_time": 0.4302680492401123
    },
    {
      "epoch": 0.00011319580078125,
      "model_forward_time": 0.1151728630065918,
      "step": 18546
    },
    {
      "epoch": 0.00011319580078125,
      "step": 18546,
      "training_step_time": 0.3965895175933838
    },
    {
      "epoch": 0.000113201904296875,
      "model_forward_time": 0.11486077308654785,
      "step": 18547
    },
    {
      "epoch": 0.000113201904296875,
      "step": 18547,
      "training_step_time": 0.4806699752807617
    },
    {
      "epoch": 0.0001132080078125,
      "model_forward_time": 0.115142822265625,
      "step": 18548
    },
    {
      "epoch": 0.0001132080078125,
      "step": 18548,
      "training_step_time": 0.5009429454803467
    },
    {
      "epoch": 0.000113214111328125,
      "model_forward_time": 0.11469674110412598,
      "step": 18549
    },
    {
      "epoch": 0.000113214111328125,
      "step": 18549,
      "training_step_time": 0.4394969940185547
    },
    {
      "epoch": 0.00011322021484375,
      "grad_norm": 0.13161160051822662,
      "learning_rate": 8.273356893414659e-05,
      "loss": 0.0473,
      "step": 18550
    },
    {
      "epoch": 0.00011322021484375,
      "model_forward_time": 0.11519837379455566,
      "step": 18550
    },
    {
      "epoch": 0.00011322021484375,
      "step": 18550,
      "training_step_time": 0.4300072193145752
    },
    {
      "epoch": 0.000113226318359375,
      "model_forward_time": 0.11472272872924805,
      "step": 18551
    },
    {
      "epoch": 0.000113226318359375,
      "step": 18551,
      "training_step_time": 0.3904533386230469
    },
    {
      "epoch": 0.000113232421875,
      "model_forward_time": 0.1152029037475586,
      "step": 18552
    },
    {
      "epoch": 0.000113232421875,
      "step": 18552,
      "training_step_time": 0.4035623073577881
    },
    {
      "epoch": 0.000113238525390625,
      "model_forward_time": 0.11476850509643555,
      "step": 18553
    },
    {
      "epoch": 0.000113238525390625,
      "step": 18553,
      "training_step_time": 0.48632073402404785
    },
    {
      "epoch": 0.00011324462890625,
      "model_forward_time": 0.11550235748291016,
      "step": 18554
    },
    {
      "epoch": 0.00011324462890625,
      "step": 18554,
      "training_step_time": 0.3949263095855713
    },
    {
      "epoch": 0.000113250732421875,
      "model_forward_time": 0.11494112014770508,
      "step": 18555
    },
    {
      "epoch": 0.000113250732421875,
      "step": 18555,
      "training_step_time": 0.39835476875305176
    },
    {
      "epoch": 0.0001132568359375,
      "model_forward_time": 0.11607742309570312,
      "step": 18556
    },
    {
      "epoch": 0.0001132568359375,
      "step": 18556,
      "training_step_time": 0.3886423110961914
    },
    {
      "epoch": 0.000113262939453125,
      "model_forward_time": 0.11420035362243652,
      "step": 18557
    },
    {
      "epoch": 0.000113262939453125,
      "step": 18557,
      "training_step_time": 0.3879678249359131
    },
    {
      "epoch": 0.00011326904296875,
      "model_forward_time": 0.11477851867675781,
      "step": 18558
    },
    {
      "epoch": 0.00011326904296875,
      "step": 18558,
      "training_step_time": 0.4147305488586426
    },
    {
      "epoch": 0.000113275146484375,
      "model_forward_time": 0.1142117977142334,
      "step": 18559
    },
    {
      "epoch": 0.000113275146484375,
      "step": 18559,
      "training_step_time": 0.41409969329833984
    },
    {
      "epoch": 0.00011328125,
      "grad_norm": 0.15236863493919373,
      "learning_rate": 8.271273262441975e-05,
      "loss": 0.0559,
      "step": 18560
    },
    {
      "epoch": 0.00011328125,
      "model_forward_time": 0.11493730545043945,
      "step": 18560
    },
    {
      "epoch": 0.00011328125,
      "step": 18560,
      "training_step_time": 0.43744730949401855
    },
    {
      "epoch": 0.000113287353515625,
      "model_forward_time": 0.11536264419555664,
      "step": 18561
    },
    {
      "epoch": 0.000113287353515625,
      "step": 18561,
      "training_step_time": 0.40575265884399414
    },
    {
      "epoch": 0.00011329345703125,
      "model_forward_time": 0.11573481559753418,
      "step": 18562
    },
    {
      "epoch": 0.00011329345703125,
      "step": 18562,
      "training_step_time": 0.5112528800964355
    },
    {
      "epoch": 0.000113299560546875,
      "model_forward_time": 0.11537837982177734,
      "step": 18563
    },
    {
      "epoch": 0.000113299560546875,
      "step": 18563,
      "training_step_time": 0.4391164779663086
    },
    {
      "epoch": 0.0001133056640625,
      "model_forward_time": 0.11490678787231445,
      "step": 18564
    },
    {
      "epoch": 0.0001133056640625,
      "step": 18564,
      "training_step_time": 0.39589405059814453
    },
    {
      "epoch": 0.000113311767578125,
      "model_forward_time": 0.1147150993347168,
      "step": 18565
    },
    {
      "epoch": 0.000113311767578125,
      "step": 18565,
      "training_step_time": 0.39096641540527344
    },
    {
      "epoch": 0.00011331787109375,
      "model_forward_time": 0.11549830436706543,
      "step": 18566
    },
    {
      "epoch": 0.00011331787109375,
      "step": 18566,
      "training_step_time": 0.3844461441040039
    },
    {
      "epoch": 0.000113323974609375,
      "model_forward_time": 0.11568427085876465,
      "step": 18567
    },
    {
      "epoch": 0.000113323974609375,
      "step": 18567,
      "training_step_time": 0.3904907703399658
    },
    {
      "epoch": 0.000113330078125,
      "model_forward_time": 0.1150655746459961,
      "step": 18568
    },
    {
      "epoch": 0.000113330078125,
      "step": 18568,
      "training_step_time": 0.41591405868530273
    },
    {
      "epoch": 0.000113336181640625,
      "model_forward_time": 0.11458301544189453,
      "step": 18569
    },
    {
      "epoch": 0.000113336181640625,
      "step": 18569,
      "training_step_time": 0.4262392520904541
    },
    {
      "epoch": 0.00011334228515625,
      "grad_norm": 0.12617100775241852,
      "learning_rate": 8.269188637742846e-05,
      "loss": 0.0507,
      "step": 18570
    },
    {
      "epoch": 0.00011334228515625,
      "model_forward_time": 0.11523985862731934,
      "step": 18570
    },
    {
      "epoch": 0.00011334228515625,
      "step": 18570,
      "training_step_time": 0.4793076515197754
    },
    {
      "epoch": 0.000113348388671875,
      "model_forward_time": 0.11543655395507812,
      "step": 18571
    },
    {
      "epoch": 0.000113348388671875,
      "step": 18571,
      "training_step_time": 0.4243171215057373
    },
    {
      "epoch": 0.0001133544921875,
      "model_forward_time": 0.11578607559204102,
      "step": 18572
    },
    {
      "epoch": 0.0001133544921875,
      "step": 18572,
      "training_step_time": 0.38887858390808105
    },
    {
      "epoch": 0.000113360595703125,
      "model_forward_time": 0.11477828025817871,
      "step": 18573
    },
    {
      "epoch": 0.000113360595703125,
      "step": 18573,
      "training_step_time": 0.4085094928741455
    },
    {
      "epoch": 0.00011336669921875,
      "model_forward_time": 0.11556649208068848,
      "step": 18574
    },
    {
      "epoch": 0.00011336669921875,
      "step": 18574,
      "training_step_time": 0.44980835914611816
    },
    {
      "epoch": 0.000113372802734375,
      "model_forward_time": 0.11501193046569824,
      "step": 18575
    },
    {
      "epoch": 0.000113372802734375,
      "step": 18575,
      "training_step_time": 0.47646665573120117
    },
    {
      "epoch": 0.00011337890625,
      "model_forward_time": 0.11481380462646484,
      "step": 18576
    },
    {
      "epoch": 0.00011337890625,
      "step": 18576,
      "training_step_time": 0.45690202713012695
    },
    {
      "epoch": 0.000113385009765625,
      "model_forward_time": 0.11493778228759766,
      "step": 18577
    },
    {
      "epoch": 0.000113385009765625,
      "step": 18577,
      "training_step_time": 0.4550590515136719
    },
    {
      "epoch": 0.00011339111328125,
      "model_forward_time": 0.11455082893371582,
      "step": 18578
    },
    {
      "epoch": 0.00011339111328125,
      "step": 18578,
      "training_step_time": 0.3861987590789795
    },
    {
      "epoch": 0.000113397216796875,
      "model_forward_time": 0.11455750465393066,
      "step": 18579
    },
    {
      "epoch": 0.000113397216796875,
      "step": 18579,
      "training_step_time": 0.38922715187072754
    },
    {
      "epoch": 0.0001134033203125,
      "grad_norm": 0.15672467648983002,
      "learning_rate": 8.267103019950529e-05,
      "loss": 0.0472,
      "step": 18580
    },
    {
      "epoch": 0.0001134033203125,
      "model_forward_time": 0.11524367332458496,
      "step": 18580
    },
    {
      "epoch": 0.0001134033203125,
      "step": 18580,
      "training_step_time": 0.41142892837524414
    },
    {
      "epoch": 0.000113409423828125,
      "model_forward_time": 0.1151876449584961,
      "step": 18581
    },
    {
      "epoch": 0.000113409423828125,
      "step": 18581,
      "training_step_time": 0.859647274017334
    },
    {
      "epoch": 0.00011341552734375,
      "model_forward_time": 0.11439347267150879,
      "step": 18582
    },
    {
      "epoch": 0.00011341552734375,
      "step": 18582,
      "training_step_time": 0.4582176208496094
    },
    {
      "epoch": 0.000113421630859375,
      "model_forward_time": 0.11444592475891113,
      "step": 18583
    },
    {
      "epoch": 0.000113421630859375,
      "step": 18583,
      "training_step_time": 0.4381437301635742
    },
    {
      "epoch": 0.000113427734375,
      "model_forward_time": 0.11399149894714355,
      "step": 18584
    },
    {
      "epoch": 0.000113427734375,
      "step": 18584,
      "training_step_time": 0.393451452255249
    },
    {
      "epoch": 0.000113433837890625,
      "model_forward_time": 0.11426711082458496,
      "step": 18585
    },
    {
      "epoch": 0.000113433837890625,
      "step": 18585,
      "training_step_time": 0.3794879913330078
    },
    {
      "epoch": 0.00011343994140625,
      "model_forward_time": 0.11382102966308594,
      "step": 18586
    },
    {
      "epoch": 0.00011343994140625,
      "step": 18586,
      "training_step_time": 0.3857142925262451
    },
    {
      "epoch": 0.000113446044921875,
      "model_forward_time": 0.11461377143859863,
      "step": 18587
    },
    {
      "epoch": 0.000113446044921875,
      "step": 18587,
      "training_step_time": 0.9234452247619629
    },
    {
      "epoch": 0.0001134521484375,
      "model_forward_time": 0.11415886878967285,
      "step": 18588
    },
    {
      "epoch": 0.0001134521484375,
      "step": 18588,
      "training_step_time": 0.39686059951782227
    },
    {
      "epoch": 0.000113458251953125,
      "model_forward_time": 0.1143031120300293,
      "step": 18589
    },
    {
      "epoch": 0.000113458251953125,
      "step": 18589,
      "training_step_time": 0.4230811595916748
    },
    {
      "epoch": 0.00011346435546875,
      "grad_norm": 0.17042118310928345,
      "learning_rate": 8.265016409698573e-05,
      "loss": 0.0528,
      "step": 18590
    },
    {
      "epoch": 0.00011346435546875,
      "model_forward_time": 0.11549615859985352,
      "step": 18590
    },
    {
      "epoch": 0.00011346435546875,
      "step": 18590,
      "training_step_time": 0.39430928230285645
    },
    {
      "epoch": 0.000113470458984375,
      "model_forward_time": 0.11464428901672363,
      "step": 18591
    },
    {
      "epoch": 0.000113470458984375,
      "step": 18591,
      "training_step_time": 0.41862034797668457
    },
    {
      "epoch": 0.0001134765625,
      "model_forward_time": 0.11439085006713867,
      "step": 18592
    },
    {
      "epoch": 0.0001134765625,
      "step": 18592,
      "training_step_time": 0.3897409439086914
    },
    {
      "epoch": 0.000113482666015625,
      "model_forward_time": 0.11456894874572754,
      "step": 18593
    },
    {
      "epoch": 0.000113482666015625,
      "step": 18593,
      "training_step_time": 0.7180900573730469
    },
    {
      "epoch": 0.00011348876953125,
      "model_forward_time": 0.11426448822021484,
      "step": 18594
    },
    {
      "epoch": 0.00011348876953125,
      "step": 18594,
      "training_step_time": 0.39325618743896484
    },
    {
      "epoch": 0.000113494873046875,
      "model_forward_time": 0.11480593681335449,
      "step": 18595
    },
    {
      "epoch": 0.000113494873046875,
      "step": 18595,
      "training_step_time": 0.3959798812866211
    },
    {
      "epoch": 0.0001135009765625,
      "model_forward_time": 0.11481308937072754,
      "step": 18596
    },
    {
      "epoch": 0.0001135009765625,
      "step": 18596,
      "training_step_time": 0.4120466709136963
    },
    {
      "epoch": 0.000113507080078125,
      "model_forward_time": 0.11516666412353516,
      "step": 18597
    },
    {
      "epoch": 0.000113507080078125,
      "step": 18597,
      "training_step_time": 0.4825022220611572
    },
    {
      "epoch": 0.00011351318359375,
      "model_forward_time": 0.1146538257598877,
      "step": 18598
    },
    {
      "epoch": 0.00011351318359375,
      "step": 18598,
      "training_step_time": 0.39041948318481445
    },
    {
      "epoch": 0.000113519287109375,
      "model_forward_time": 0.11468815803527832,
      "step": 18599
    },
    {
      "epoch": 0.000113519287109375,
      "step": 18599,
      "training_step_time": 0.4047091007232666
    },
    {
      "epoch": 0.000113525390625,
      "grad_norm": 0.18210825324058533,
      "learning_rate": 8.262928807620843e-05,
      "loss": 0.0538,
      "step": 18600
    },
    {
      "epoch": 0.000113525390625,
      "model_forward_time": 0.11474823951721191,
      "step": 18600
    },
    {
      "epoch": 0.000113525390625,
      "step": 18600,
      "training_step_time": 0.3991377353668213
    },
    {
      "epoch": 0.000113531494140625,
      "model_forward_time": 0.11515140533447266,
      "step": 18601
    },
    {
      "epoch": 0.000113531494140625,
      "step": 18601,
      "training_step_time": 0.46497035026550293
    },
    {
      "epoch": 0.00011353759765625,
      "model_forward_time": 0.11476016044616699,
      "step": 18602
    },
    {
      "epoch": 0.00011353759765625,
      "step": 18602,
      "training_step_time": 0.4056103229522705
    },
    {
      "epoch": 0.000113543701171875,
      "model_forward_time": 0.11508297920227051,
      "step": 18603
    },
    {
      "epoch": 0.000113543701171875,
      "step": 18603,
      "training_step_time": 0.4016835689544678
    },
    {
      "epoch": 0.0001135498046875,
      "model_forward_time": 0.11427116394042969,
      "step": 18604
    },
    {
      "epoch": 0.0001135498046875,
      "step": 18604,
      "training_step_time": 0.4407203197479248
    },
    {
      "epoch": 0.000113555908203125,
      "model_forward_time": 0.11539125442504883,
      "step": 18605
    },
    {
      "epoch": 0.000113555908203125,
      "step": 18605,
      "training_step_time": 0.5658516883850098
    },
    {
      "epoch": 0.00011356201171875,
      "model_forward_time": 0.11443567276000977,
      "step": 18606
    },
    {
      "epoch": 0.00011356201171875,
      "step": 18606,
      "training_step_time": 0.3956611156463623
    },
    {
      "epoch": 0.000113568115234375,
      "model_forward_time": 0.11563301086425781,
      "step": 18607
    },
    {
      "epoch": 0.000113568115234375,
      "step": 18607,
      "training_step_time": 0.3969130516052246
    },
    {
      "epoch": 0.00011357421875,
      "model_forward_time": 0.11512160301208496,
      "step": 18608
    },
    {
      "epoch": 0.00011357421875,
      "step": 18608,
      "training_step_time": 0.3847482204437256
    },
    {
      "epoch": 0.000113580322265625,
      "model_forward_time": 0.11503243446350098,
      "step": 18609
    },
    {
      "epoch": 0.000113580322265625,
      "step": 18609,
      "training_step_time": 0.42029380798339844
    },
    {
      "epoch": 0.00011358642578125,
      "grad_norm": 0.18494102358818054,
      "learning_rate": 8.260840214351493e-05,
      "loss": 0.0511,
      "step": 18610
    },
    {
      "epoch": 0.00011358642578125,
      "model_forward_time": 0.11550307273864746,
      "step": 18610
    },
    {
      "epoch": 0.00011358642578125,
      "step": 18610,
      "training_step_time": 0.4580416679382324
    },
    {
      "epoch": 0.000113592529296875,
      "model_forward_time": 0.11486196517944336,
      "step": 18611
    },
    {
      "epoch": 0.000113592529296875,
      "step": 18611,
      "training_step_time": 1.003807783126831
    },
    {
      "epoch": 0.0001135986328125,
      "model_forward_time": 0.11390471458435059,
      "step": 18612
    },
    {
      "epoch": 0.0001135986328125,
      "step": 18612,
      "training_step_time": 0.3822824954986572
    },
    {
      "epoch": 0.000113604736328125,
      "model_forward_time": 0.11385464668273926,
      "step": 18613
    },
    {
      "epoch": 0.000113604736328125,
      "step": 18613,
      "training_step_time": 0.39801692962646484
    },
    {
      "epoch": 0.00011361083984375,
      "model_forward_time": 0.11465144157409668,
      "step": 18614
    },
    {
      "epoch": 0.00011361083984375,
      "step": 18614,
      "training_step_time": 0.42018866539001465
    },
    {
      "epoch": 0.000113616943359375,
      "model_forward_time": 0.11403274536132812,
      "step": 18615
    },
    {
      "epoch": 0.000113616943359375,
      "step": 18615,
      "training_step_time": 0.41364097595214844
    },
    {
      "epoch": 0.000113623046875,
      "model_forward_time": 0.11404228210449219,
      "step": 18616
    },
    {
      "epoch": 0.000113623046875,
      "step": 18616,
      "training_step_time": 0.3821582794189453
    },
    {
      "epoch": 0.000113629150390625,
      "model_forward_time": 0.11511826515197754,
      "step": 18617
    },
    {
      "epoch": 0.000113629150390625,
      "step": 18617,
      "training_step_time": 0.730907678604126
    },
    {
      "epoch": 0.00011363525390625,
      "model_forward_time": 0.11410140991210938,
      "step": 18618
    },
    {
      "epoch": 0.00011363525390625,
      "step": 18618,
      "training_step_time": 0.39220595359802246
    },
    {
      "epoch": 0.000113641357421875,
      "model_forward_time": 0.11407232284545898,
      "step": 18619
    },
    {
      "epoch": 0.000113641357421875,
      "step": 18619,
      "training_step_time": 0.3879847526550293
    },
    {
      "epoch": 0.0001136474609375,
      "grad_norm": 0.1687328964471817,
      "learning_rate": 8.258750630524984e-05,
      "loss": 0.045,
      "step": 18620
    },
    {
      "epoch": 0.0001136474609375,
      "model_forward_time": 0.11603903770446777,
      "step": 18620
    },
    {
      "epoch": 0.0001136474609375,
      "step": 18620,
      "training_step_time": 0.38919711112976074
    },
    {
      "epoch": 0.000113653564453125,
      "model_forward_time": 0.11513566970825195,
      "step": 18621
    },
    {
      "epoch": 0.000113653564453125,
      "step": 18621,
      "training_step_time": 0.3818778991699219
    },
    {
      "epoch": 0.00011365966796875,
      "model_forward_time": 0.11524248123168945,
      "step": 18622
    },
    {
      "epoch": 0.00011365966796875,
      "step": 18622,
      "training_step_time": 0.4436037540435791
    },
    {
      "epoch": 0.000113665771484375,
      "model_forward_time": 0.11506223678588867,
      "step": 18623
    },
    {
      "epoch": 0.000113665771484375,
      "step": 18623,
      "training_step_time": 1.0570507049560547
    },
    {
      "epoch": 0.000113671875,
      "model_forward_time": 0.1136777400970459,
      "step": 18624
    },
    {
      "epoch": 0.000113671875,
      "step": 18624,
      "training_step_time": 0.38730311393737793
    },
    {
      "epoch": 0.000113677978515625,
      "model_forward_time": 0.11392426490783691,
      "step": 18625
    },
    {
      "epoch": 0.000113677978515625,
      "step": 18625,
      "training_step_time": 0.46755361557006836
    },
    {
      "epoch": 0.00011368408203125,
      "model_forward_time": 0.11379003524780273,
      "step": 18626
    },
    {
      "epoch": 0.00011368408203125,
      "step": 18626,
      "training_step_time": 0.45168352127075195
    },
    {
      "epoch": 0.000113690185546875,
      "model_forward_time": 0.11428284645080566,
      "step": 18627
    },
    {
      "epoch": 0.000113690185546875,
      "step": 18627,
      "training_step_time": 0.41814565658569336
    },
    {
      "epoch": 0.0001136962890625,
      "model_forward_time": 0.11429929733276367,
      "step": 18628
    },
    {
      "epoch": 0.0001136962890625,
      "step": 18628,
      "training_step_time": 0.38063645362854004
    },
    {
      "epoch": 0.000113702392578125,
      "model_forward_time": 0.11471319198608398,
      "step": 18629
    },
    {
      "epoch": 0.000113702392578125,
      "step": 18629,
      "training_step_time": 0.3644545078277588
    },
    {
      "epoch": 0.00011370849609375,
      "grad_norm": 0.163381427526474,
      "learning_rate": 8.256660056776076e-05,
      "loss": 0.0507,
      "step": 18630
    },
    {
      "epoch": 0.00011370849609375,
      "model_forward_time": 0.11525940895080566,
      "step": 18630
    },
    {
      "epoch": 0.00011370849609375,
      "step": 18630,
      "training_step_time": 0.4183518886566162
    },
    {
      "epoch": 0.000113714599609375,
      "model_forward_time": 0.1148827075958252,
      "step": 18631
    },
    {
      "epoch": 0.000113714599609375,
      "step": 18631,
      "training_step_time": 0.42200207710266113
    },
    {
      "epoch": 0.000113720703125,
      "model_forward_time": 0.11483073234558105,
      "step": 18632
    },
    {
      "epoch": 0.000113720703125,
      "step": 18632,
      "training_step_time": 0.38451480865478516
    },
    {
      "epoch": 0.000113726806640625,
      "model_forward_time": 0.11483120918273926,
      "step": 18633
    },
    {
      "epoch": 0.000113726806640625,
      "step": 18633,
      "training_step_time": 0.3937370777130127
    },
    {
      "epoch": 0.00011373291015625,
      "model_forward_time": 0.1145780086517334,
      "step": 18634
    },
    {
      "epoch": 0.00011373291015625,
      "step": 18634,
      "training_step_time": 0.3862771987915039
    },
    {
      "epoch": 0.000113739013671875,
      "model_forward_time": 0.11536407470703125,
      "step": 18635
    },
    {
      "epoch": 0.000113739013671875,
      "step": 18635,
      "training_step_time": 0.49338603019714355
    },
    {
      "epoch": 0.0001137451171875,
      "model_forward_time": 0.11507749557495117,
      "step": 18636
    },
    {
      "epoch": 0.0001137451171875,
      "step": 18636,
      "training_step_time": 0.4489471912384033
    },
    {
      "epoch": 0.000113751220703125,
      "model_forward_time": 0.1153562068939209,
      "step": 18637
    },
    {
      "epoch": 0.000113751220703125,
      "step": 18637,
      "training_step_time": 0.4653432369232178
    },
    {
      "epoch": 0.00011375732421875,
      "model_forward_time": 0.11478400230407715,
      "step": 18638
    },
    {
      "epoch": 0.00011375732421875,
      "step": 18638,
      "training_step_time": 0.38373351097106934
    },
    {
      "epoch": 0.000113763427734375,
      "model_forward_time": 0.11503314971923828,
      "step": 18639
    },
    {
      "epoch": 0.000113763427734375,
      "step": 18639,
      "training_step_time": 0.4210541248321533
    },
    {
      "epoch": 0.00011376953125,
      "grad_norm": 0.11692143231630325,
      "learning_rate": 8.254568493739828e-05,
      "loss": 0.0471,
      "step": 18640
    },
    {
      "epoch": 0.00011376953125,
      "model_forward_time": 0.11555981636047363,
      "step": 18640
    },
    {
      "epoch": 0.00011376953125,
      "step": 18640,
      "training_step_time": 0.44408512115478516
    },
    {
      "epoch": 0.000113775634765625,
      "model_forward_time": 0.11470532417297363,
      "step": 18641
    },
    {
      "epoch": 0.000113775634765625,
      "step": 18641,
      "training_step_time": 0.48911142349243164
    },
    {
      "epoch": 0.00011378173828125,
      "model_forward_time": 0.11507010459899902,
      "step": 18642
    },
    {
      "epoch": 0.00011378173828125,
      "step": 18642,
      "training_step_time": 0.393099308013916
    },
    {
      "epoch": 0.000113787841796875,
      "model_forward_time": 0.1154775619506836,
      "step": 18643
    },
    {
      "epoch": 0.000113787841796875,
      "step": 18643,
      "training_step_time": 0.4011051654815674
    },
    {
      "epoch": 0.0001137939453125,
      "model_forward_time": 0.11544060707092285,
      "step": 18644
    },
    {
      "epoch": 0.0001137939453125,
      "step": 18644,
      "training_step_time": 0.40813446044921875
    },
    {
      "epoch": 0.000113800048828125,
      "model_forward_time": 0.1150827407836914,
      "step": 18645
    },
    {
      "epoch": 0.000113800048828125,
      "step": 18645,
      "training_step_time": 0.47534632682800293
    },
    {
      "epoch": 0.00011380615234375,
      "model_forward_time": 0.11505436897277832,
      "step": 18646
    },
    {
      "epoch": 0.00011380615234375,
      "step": 18646,
      "training_step_time": 0.42755937576293945
    },
    {
      "epoch": 0.000113812255859375,
      "model_forward_time": 0.11537551879882812,
      "step": 18647
    },
    {
      "epoch": 0.000113812255859375,
      "step": 18647,
      "training_step_time": 0.4056727886199951
    },
    {
      "epoch": 0.000113818359375,
      "model_forward_time": 0.11529183387756348,
      "step": 18648
    },
    {
      "epoch": 0.000113818359375,
      "step": 18648,
      "training_step_time": 0.40181946754455566
    },
    {
      "epoch": 0.000113824462890625,
      "model_forward_time": 0.11538243293762207,
      "step": 18649
    },
    {
      "epoch": 0.000113824462890625,
      "step": 18649,
      "training_step_time": 0.3958301544189453
    },
    {
      "epoch": 0.00011383056640625,
      "grad_norm": 0.18410326540470123,
      "learning_rate": 8.252475942051605e-05,
      "loss": 0.049,
      "step": 18650
    },
    {
      "epoch": 0.00011383056640625,
      "model_forward_time": 0.11471104621887207,
      "step": 18650
    },
    {
      "epoch": 0.00011383056640625,
      "step": 18650,
      "training_step_time": 0.39156389236450195
    },
    {
      "epoch": 0.000113836669921875,
      "model_forward_time": 0.11526179313659668,
      "step": 18651
    },
    {
      "epoch": 0.000113836669921875,
      "step": 18651,
      "training_step_time": 0.4442627429962158
    },
    {
      "epoch": 0.0001138427734375,
      "model_forward_time": 0.11458015441894531,
      "step": 18652
    },
    {
      "epoch": 0.0001138427734375,
      "step": 18652,
      "training_step_time": 0.4195272922515869
    },
    {
      "epoch": 0.000113848876953125,
      "model_forward_time": 0.11493778228759766,
      "step": 18653
    },
    {
      "epoch": 0.000113848876953125,
      "step": 18653,
      "training_step_time": 0.7007970809936523
    },
    {
      "epoch": 0.00011385498046875,
      "model_forward_time": 0.11532139778137207,
      "step": 18654
    },
    {
      "epoch": 0.00011385498046875,
      "step": 18654,
      "training_step_time": 0.4857792854309082
    },
    {
      "epoch": 0.000113861083984375,
      "model_forward_time": 0.11436653137207031,
      "step": 18655
    },
    {
      "epoch": 0.000113861083984375,
      "step": 18655,
      "training_step_time": 0.4457814693450928
    },
    {
      "epoch": 0.0001138671875,
      "model_forward_time": 0.11441493034362793,
      "step": 18656
    },
    {
      "epoch": 0.0001138671875,
      "step": 18656,
      "training_step_time": 0.38539695739746094
    },
    {
      "epoch": 0.000113873291015625,
      "model_forward_time": 0.11421561241149902,
      "step": 18657
    },
    {
      "epoch": 0.000113873291015625,
      "step": 18657,
      "training_step_time": 0.38971400260925293
    },
    {
      "epoch": 0.00011387939453125,
      "model_forward_time": 0.11446762084960938,
      "step": 18658
    },
    {
      "epoch": 0.00011387939453125,
      "step": 18658,
      "training_step_time": 0.4434547424316406
    },
    {
      "epoch": 0.000113885498046875,
      "model_forward_time": 0.11540985107421875,
      "step": 18659
    },
    {
      "epoch": 0.000113885498046875,
      "step": 18659,
      "training_step_time": 0.5414519309997559
    },
    {
      "epoch": 0.0001138916015625,
      "grad_norm": 0.17312100529670715,
      "learning_rate": 8.250382402347065e-05,
      "loss": 0.0487,
      "step": 18660
    },
    {
      "epoch": 0.0001138916015625,
      "model_forward_time": 0.11520576477050781,
      "step": 18660
    },
    {
      "epoch": 0.0001138916015625,
      "step": 18660,
      "training_step_time": 0.4253995418548584
    },
    {
      "epoch": 0.000113897705078125,
      "model_forward_time": 0.11461997032165527,
      "step": 18661
    },
    {
      "epoch": 0.000113897705078125,
      "step": 18661,
      "training_step_time": 0.3944671154022217
    },
    {
      "epoch": 0.00011390380859375,
      "model_forward_time": 0.11552810668945312,
      "step": 18662
    },
    {
      "epoch": 0.00011390380859375,
      "step": 18662,
      "training_step_time": 0.38469529151916504
    },
    {
      "epoch": 0.000113909912109375,
      "model_forward_time": 0.11517524719238281,
      "step": 18663
    },
    {
      "epoch": 0.000113909912109375,
      "step": 18663,
      "training_step_time": 0.43344926834106445
    },
    {
      "epoch": 0.000113916015625,
      "model_forward_time": 0.11503243446350098,
      "step": 18664
    },
    {
      "epoch": 0.000113916015625,
      "step": 18664,
      "training_step_time": 0.43839478492736816
    },
    {
      "epoch": 0.000113922119140625,
      "model_forward_time": 0.11526370048522949,
      "step": 18665
    },
    {
      "epoch": 0.000113922119140625,
      "step": 18665,
      "training_step_time": 0.525510311126709
    },
    {
      "epoch": 0.00011392822265625,
      "model_forward_time": 0.11508893966674805,
      "step": 18666
    },
    {
      "epoch": 0.00011392822265625,
      "step": 18666,
      "training_step_time": 0.3959026336669922
    },
    {
      "epoch": 0.000113934326171875,
      "model_forward_time": 0.11491727828979492,
      "step": 18667
    },
    {
      "epoch": 0.000113934326171875,
      "step": 18667,
      "training_step_time": 0.43225884437561035
    },
    {
      "epoch": 0.0001139404296875,
      "model_forward_time": 0.11501526832580566,
      "step": 18668
    },
    {
      "epoch": 0.0001139404296875,
      "step": 18668,
      "training_step_time": 0.41210293769836426
    },
    {
      "epoch": 0.000113946533203125,
      "model_forward_time": 0.11424016952514648,
      "step": 18669
    },
    {
      "epoch": 0.000113946533203125,
      "step": 18669,
      "training_step_time": 0.39414143562316895
    },
    {
      "epoch": 0.00011395263671875,
      "grad_norm": 0.14157359302043915,
      "learning_rate": 8.248287875262176e-05,
      "loss": 0.0601,
      "step": 18670
    },
    {
      "epoch": 0.00011395263671875,
      "model_forward_time": 0.11562013626098633,
      "step": 18670
    },
    {
      "epoch": 0.00011395263671875,
      "step": 18670,
      "training_step_time": 0.3888509273529053
    },
    {
      "epoch": 0.000113958740234375,
      "model_forward_time": 0.11536693572998047,
      "step": 18671
    },
    {
      "epoch": 0.000113958740234375,
      "step": 18671,
      "training_step_time": 0.9087727069854736
    },
    {
      "epoch": 0.00011396484375,
      "model_forward_time": 0.11539673805236816,
      "step": 18672
    },
    {
      "epoch": 0.00011396484375,
      "step": 18672,
      "training_step_time": 0.46822452545166016
    },
    {
      "epoch": 0.000113970947265625,
      "model_forward_time": 0.11417174339294434,
      "step": 18673
    },
    {
      "epoch": 0.000113970947265625,
      "step": 18673,
      "training_step_time": 0.47976064682006836
    },
    {
      "epoch": 0.00011397705078125,
      "model_forward_time": 0.11444854736328125,
      "step": 18674
    },
    {
      "epoch": 0.00011397705078125,
      "step": 18674,
      "training_step_time": 0.3852958679199219
    },
    {
      "epoch": 0.000113983154296875,
      "model_forward_time": 0.11409544944763184,
      "step": 18675
    },
    {
      "epoch": 0.000113983154296875,
      "step": 18675,
      "training_step_time": 0.3811025619506836
    },
    {
      "epoch": 0.0001139892578125,
      "model_forward_time": 0.11451435089111328,
      "step": 18676
    },
    {
      "epoch": 0.0001139892578125,
      "step": 18676,
      "training_step_time": 0.3823251724243164
    },
    {
      "epoch": 0.000113995361328125,
      "model_forward_time": 0.11505293846130371,
      "step": 18677
    },
    {
      "epoch": 0.000113995361328125,
      "step": 18677,
      "training_step_time": 0.43883490562438965
    },
    {
      "epoch": 0.00011400146484375,
      "model_forward_time": 0.1150045394897461,
      "step": 18678
    },
    {
      "epoch": 0.00011400146484375,
      "step": 18678,
      "training_step_time": 0.39811062812805176
    },
    {
      "epoch": 0.000114007568359375,
      "model_forward_time": 0.11484384536743164,
      "step": 18679
    },
    {
      "epoch": 0.000114007568359375,
      "step": 18679,
      "training_step_time": 0.4268198013305664
    },
    {
      "epoch": 0.000114013671875,
      "grad_norm": 0.1206355094909668,
      "learning_rate": 8.246192361433196e-05,
      "loss": 0.0486,
      "step": 18680
    },
    {
      "epoch": 0.000114013671875,
      "model_forward_time": 0.11485075950622559,
      "step": 18680
    },
    {
      "epoch": 0.000114013671875,
      "step": 18680,
      "training_step_time": 0.4053316116333008
    },
    {
      "epoch": 0.000114019775390625,
      "model_forward_time": 0.11553502082824707,
      "step": 18681
    },
    {
      "epoch": 0.000114019775390625,
      "step": 18681,
      "training_step_time": 0.4308445453643799
    },
    {
      "epoch": 0.00011402587890625,
      "model_forward_time": 0.11521410942077637,
      "step": 18682
    },
    {
      "epoch": 0.00011402587890625,
      "step": 18682,
      "training_step_time": 0.4206511974334717
    },
    {
      "epoch": 0.000114031982421875,
      "model_forward_time": 0.11508941650390625,
      "step": 18683
    },
    {
      "epoch": 0.000114031982421875,
      "step": 18683,
      "training_step_time": 0.45852160453796387
    },
    {
      "epoch": 0.0001140380859375,
      "model_forward_time": 0.114776611328125,
      "step": 18684
    },
    {
      "epoch": 0.0001140380859375,
      "step": 18684,
      "training_step_time": 0.40253710746765137
    },
    {
      "epoch": 0.000114044189453125,
      "model_forward_time": 0.1147305965423584,
      "step": 18685
    },
    {
      "epoch": 0.000114044189453125,
      "step": 18685,
      "training_step_time": 0.39366793632507324
    },
    {
      "epoch": 0.00011405029296875,
      "model_forward_time": 0.11593818664550781,
      "step": 18686
    },
    {
      "epoch": 0.00011405029296875,
      "step": 18686,
      "training_step_time": 0.4128241539001465
    },
    {
      "epoch": 0.000114056396484375,
      "model_forward_time": 0.11621928215026855,
      "step": 18687
    },
    {
      "epoch": 0.000114056396484375,
      "step": 18687,
      "training_step_time": 0.3959317207336426
    },
    {
      "epoch": 0.0001140625,
      "model_forward_time": 0.1179811954498291,
      "step": 18688
    },
    {
      "epoch": 0.0001140625,
      "step": 18688,
      "training_step_time": 0.39072108268737793
    },
    {
      "epoch": 0.000114068603515625,
      "model_forward_time": 0.11871051788330078,
      "step": 18689
    },
    {
      "epoch": 0.000114068603515625,
      "step": 18689,
      "training_step_time": 0.47130250930786133
    },
    {
      "epoch": 0.00011407470703125,
      "grad_norm": 0.11642362177371979,
      "learning_rate": 8.244095861496686e-05,
      "loss": 0.0518,
      "step": 18690
    },
    {
      "epoch": 0.00011407470703125,
      "model_forward_time": 0.1158449649810791,
      "step": 18690
    },
    {
      "epoch": 0.00011407470703125,
      "step": 18690,
      "training_step_time": 0.39684438705444336
    },
    {
      "epoch": 0.000114080810546875,
      "model_forward_time": 0.11536669731140137,
      "step": 18691
    },
    {
      "epoch": 0.000114080810546875,
      "step": 18691,
      "training_step_time": 0.39646244049072266
    },
    {
      "epoch": 0.0001140869140625,
      "model_forward_time": 0.1152498722076416,
      "step": 18692
    },
    {
      "epoch": 0.0001140869140625,
      "step": 18692,
      "training_step_time": 0.4455416202545166
    },
    {
      "epoch": 0.000114093017578125,
      "model_forward_time": 0.11534357070922852,
      "step": 18693
    },
    {
      "epoch": 0.000114093017578125,
      "step": 18693,
      "training_step_time": 0.4598352909088135
    },
    {
      "epoch": 0.00011409912109375,
      "model_forward_time": 0.11507844924926758,
      "step": 18694
    },
    {
      "epoch": 0.00011409912109375,
      "step": 18694,
      "training_step_time": 0.3986361026763916
    },
    {
      "epoch": 0.000114105224609375,
      "model_forward_time": 0.11549544334411621,
      "step": 18695
    },
    {
      "epoch": 0.000114105224609375,
      "step": 18695,
      "training_step_time": 0.45943164825439453
    },
    {
      "epoch": 0.000114111328125,
      "model_forward_time": 0.11514163017272949,
      "step": 18696
    },
    {
      "epoch": 0.000114111328125,
      "step": 18696,
      "training_step_time": 0.4224231243133545
    },
    {
      "epoch": 0.000114117431640625,
      "model_forward_time": 0.11492037773132324,
      "step": 18697
    },
    {
      "epoch": 0.000114117431640625,
      "step": 18697,
      "training_step_time": 0.4225761890411377
    },
    {
      "epoch": 0.00011412353515625,
      "model_forward_time": 0.11490249633789062,
      "step": 18698
    },
    {
      "epoch": 0.00011412353515625,
      "step": 18698,
      "training_step_time": 0.39923715591430664
    },
    {
      "epoch": 0.000114129638671875,
      "model_forward_time": 0.11531567573547363,
      "step": 18699
    },
    {
      "epoch": 0.000114129638671875,
      "step": 18699,
      "training_step_time": 0.3959531784057617
    },
    {
      "epoch": 0.0001141357421875,
      "grad_norm": 0.1556544005870819,
      "learning_rate": 8.241998376089508e-05,
      "loss": 0.0503,
      "step": 18700
    },
    {
      "epoch": 0.0001141357421875,
      "model_forward_time": 0.11481499671936035,
      "step": 18700
    },
    {
      "epoch": 0.0001141357421875,
      "step": 18700,
      "training_step_time": 0.366253137588501
    },
    {
      "epoch": 0.000114141845703125,
      "model_forward_time": 0.11501622200012207,
      "step": 18701
    },
    {
      "epoch": 0.000114141845703125,
      "step": 18701,
      "training_step_time": 0.8123977184295654
    },
    {
      "epoch": 0.00011414794921875,
      "model_forward_time": 0.11500954627990723,
      "step": 18702
    },
    {
      "epoch": 0.00011414794921875,
      "step": 18702,
      "training_step_time": 0.4150395393371582
    },
    {
      "epoch": 0.000114154052734375,
      "model_forward_time": 0.11479020118713379,
      "step": 18703
    },
    {
      "epoch": 0.000114154052734375,
      "step": 18703,
      "training_step_time": 0.3931422233581543
    },
    {
      "epoch": 0.00011416015625,
      "model_forward_time": 0.11396408081054688,
      "step": 18704
    },
    {
      "epoch": 0.00011416015625,
      "step": 18704,
      "training_step_time": 0.38237428665161133
    },
    {
      "epoch": 0.000114166259765625,
      "model_forward_time": 0.11409831047058105,
      "step": 18705
    },
    {
      "epoch": 0.000114166259765625,
      "step": 18705,
      "training_step_time": 0.382504940032959
    },
    {
      "epoch": 0.00011417236328125,
      "model_forward_time": 0.11459732055664062,
      "step": 18706
    },
    {
      "epoch": 0.00011417236328125,
      "step": 18706,
      "training_step_time": 0.39655423164367676
    },
    {
      "epoch": 0.000114178466796875,
      "model_forward_time": 0.1156003475189209,
      "step": 18707
    },
    {
      "epoch": 0.000114178466796875,
      "step": 18707,
      "training_step_time": 0.6495072841644287
    },
    {
      "epoch": 0.0001141845703125,
      "model_forward_time": 0.11463022232055664,
      "step": 18708
    },
    {
      "epoch": 0.0001141845703125,
      "step": 18708,
      "training_step_time": 0.3885538578033447
    },
    {
      "epoch": 0.000114190673828125,
      "model_forward_time": 0.11443257331848145,
      "step": 18709
    },
    {
      "epoch": 0.000114190673828125,
      "step": 18709,
      "training_step_time": 0.40294432640075684
    },
    {
      "epoch": 0.00011419677734375,
      "grad_norm": 0.16609808802604675,
      "learning_rate": 8.239899905848825e-05,
      "loss": 0.0507,
      "step": 18710
    },
    {
      "epoch": 0.00011419677734375,
      "model_forward_time": 0.11501193046569824,
      "step": 18710
    },
    {
      "epoch": 0.00011419677734375,
      "step": 18710,
      "training_step_time": 0.3954596519470215
    },
    {
      "epoch": 0.000114202880859375,
      "model_forward_time": 0.11592841148376465,
      "step": 18711
    },
    {
      "epoch": 0.000114202880859375,
      "step": 18711,
      "training_step_time": 0.4856252670288086
    },
    {
      "epoch": 0.000114208984375,
      "model_forward_time": 0.1155085563659668,
      "step": 18712
    },
    {
      "epoch": 0.000114208984375,
      "step": 18712,
      "training_step_time": 0.38202953338623047
    },
    {
      "epoch": 0.000114215087890625,
      "model_forward_time": 0.11511731147766113,
      "step": 18713
    },
    {
      "epoch": 0.000114215087890625,
      "step": 18713,
      "training_step_time": 0.6048593521118164
    },
    {
      "epoch": 0.00011422119140625,
      "model_forward_time": 0.11473965644836426,
      "step": 18714
    },
    {
      "epoch": 0.00011422119140625,
      "step": 18714,
      "training_step_time": 0.5112888813018799
    },
    {
      "epoch": 0.000114227294921875,
      "model_forward_time": 0.11465573310852051,
      "step": 18715
    },
    {
      "epoch": 0.000114227294921875,
      "step": 18715,
      "training_step_time": 0.39928722381591797
    },
    {
      "epoch": 0.0001142333984375,
      "model_forward_time": 0.1150820255279541,
      "step": 18716
    },
    {
      "epoch": 0.0001142333984375,
      "step": 18716,
      "training_step_time": 0.4174537658691406
    },
    {
      "epoch": 0.000114239501953125,
      "model_forward_time": 0.11500000953674316,
      "step": 18717
    },
    {
      "epoch": 0.000114239501953125,
      "step": 18717,
      "training_step_time": 0.39304471015930176
    },
    {
      "epoch": 0.00011424560546875,
      "model_forward_time": 0.11462092399597168,
      "step": 18718
    },
    {
      "epoch": 0.00011424560546875,
      "step": 18718,
      "training_step_time": 0.39735984802246094
    },
    {
      "epoch": 0.000114251708984375,
      "model_forward_time": 0.11510396003723145,
      "step": 18719
    },
    {
      "epoch": 0.000114251708984375,
      "step": 18719,
      "training_step_time": 0.4387693405151367
    },
    {
      "epoch": 0.0001142578125,
      "grad_norm": 0.18531498312950134,
      "learning_rate": 8.237800451412095e-05,
      "loss": 0.0488,
      "step": 18720
    },
    {
      "epoch": 0.0001142578125,
      "model_forward_time": 0.115570068359375,
      "step": 18720
    },
    {
      "epoch": 0.0001142578125,
      "step": 18720,
      "training_step_time": 0.4887979030609131
    },
    {
      "epoch": 0.000114263916015625,
      "model_forward_time": 0.11553597450256348,
      "step": 18721
    },
    {
      "epoch": 0.000114263916015625,
      "step": 18721,
      "training_step_time": 0.3858463764190674
    },
    {
      "epoch": 0.00011427001953125,
      "model_forward_time": 0.11577773094177246,
      "step": 18722
    },
    {
      "epoch": 0.00011427001953125,
      "step": 18722,
      "training_step_time": 0.38399505615234375
    },
    {
      "epoch": 0.000114276123046875,
      "model_forward_time": 0.11597275733947754,
      "step": 18723
    },
    {
      "epoch": 0.000114276123046875,
      "step": 18723,
      "training_step_time": 0.39759373664855957
    },
    {
      "epoch": 0.0001142822265625,
      "model_forward_time": 0.11577582359313965,
      "step": 18724
    },
    {
      "epoch": 0.0001142822265625,
      "step": 18724,
      "training_step_time": 0.4532468318939209
    },
    {
      "epoch": 0.000114288330078125,
      "model_forward_time": 0.11512017250061035,
      "step": 18725
    },
    {
      "epoch": 0.000114288330078125,
      "step": 18725,
      "training_step_time": 0.4049344062805176
    },
    {
      "epoch": 0.00011429443359375,
      "model_forward_time": 0.11526179313659668,
      "step": 18726
    },
    {
      "epoch": 0.00011429443359375,
      "step": 18726,
      "training_step_time": 0.4139828681945801
    },
    {
      "epoch": 0.000114300537109375,
      "model_forward_time": 0.11530947685241699,
      "step": 18727
    },
    {
      "epoch": 0.000114300537109375,
      "step": 18727,
      "training_step_time": 0.394298791885376
    },
    {
      "epoch": 0.000114306640625,
      "model_forward_time": 0.11506128311157227,
      "step": 18728
    },
    {
      "epoch": 0.000114306640625,
      "step": 18728,
      "training_step_time": 0.43572115898132324
    },
    {
      "epoch": 0.000114312744140625,
      "model_forward_time": 0.11600518226623535,
      "step": 18729
    },
    {
      "epoch": 0.000114312744140625,
      "step": 18729,
      "training_step_time": 0.43222594261169434
    },
    {
      "epoch": 0.00011431884765625,
      "grad_norm": 0.1666710376739502,
      "learning_rate": 8.235700013417076e-05,
      "loss": 0.0464,
      "step": 18730
    },
    {
      "epoch": 0.00011431884765625,
      "model_forward_time": 0.11495804786682129,
      "step": 18730
    },
    {
      "epoch": 0.00011431884765625,
      "step": 18730,
      "training_step_time": 0.4456906318664551
    },
    {
      "epoch": 0.000114324951171875,
      "model_forward_time": 0.11529254913330078,
      "step": 18731
    },
    {
      "epoch": 0.000114324951171875,
      "step": 18731,
      "training_step_time": 0.9854691028594971
    },
    {
      "epoch": 0.0001143310546875,
      "model_forward_time": 0.11392402648925781,
      "step": 18732
    },
    {
      "epoch": 0.0001143310546875,
      "step": 18732,
      "training_step_time": 0.3878481388092041
    },
    {
      "epoch": 0.000114337158203125,
      "model_forward_time": 0.11427855491638184,
      "step": 18733
    },
    {
      "epoch": 0.000114337158203125,
      "step": 18733,
      "training_step_time": 0.42390918731689453
    },
    {
      "epoch": 0.00011434326171875,
      "model_forward_time": 0.11463546752929688,
      "step": 18734
    },
    {
      "epoch": 0.00011434326171875,
      "step": 18734,
      "training_step_time": 0.455493688583374
    },
    {
      "epoch": 0.000114349365234375,
      "model_forward_time": 0.11407470703125,
      "step": 18735
    },
    {
      "epoch": 0.000114349365234375,
      "step": 18735,
      "training_step_time": 0.379558801651001
    },
    {
      "epoch": 0.00011435546875,
      "model_forward_time": 0.11472702026367188,
      "step": 18736
    },
    {
      "epoch": 0.00011435546875,
      "step": 18736,
      "training_step_time": 0.4121274948120117
    },
    {
      "epoch": 0.000114361572265625,
      "model_forward_time": 0.11463594436645508,
      "step": 18737
    },
    {
      "epoch": 0.000114361572265625,
      "step": 18737,
      "training_step_time": 0.4255208969116211
    },
    {
      "epoch": 0.00011436767578125,
      "model_forward_time": 0.11609029769897461,
      "step": 18738
    },
    {
      "epoch": 0.00011436767578125,
      "step": 18738,
      "training_step_time": 0.4232931137084961
    },
    {
      "epoch": 0.000114373779296875,
      "model_forward_time": 0.11932015419006348,
      "step": 18739
    },
    {
      "epoch": 0.000114373779296875,
      "step": 18739,
      "training_step_time": 0.4625396728515625
    },
    {
      "epoch": 0.0001143798828125,
      "grad_norm": 0.11314091831445694,
      "learning_rate": 8.233598592501828e-05,
      "loss": 0.048,
      "step": 18740
    },
    {
      "epoch": 0.0001143798828125,
      "model_forward_time": 0.11846590042114258,
      "step": 18740
    },
    {
      "epoch": 0.0001143798828125,
      "step": 18740,
      "training_step_time": 0.5685617923736572
    },
    {
      "epoch": 0.000114385986328125,
      "model_forward_time": 0.12313127517700195,
      "step": 18741
    },
    {
      "epoch": 0.000114385986328125,
      "step": 18741,
      "training_step_time": 0.5744926929473877
    },
    {
      "epoch": 0.00011439208984375,
      "model_forward_time": 0.12233376502990723,
      "step": 18742
    },
    {
      "epoch": 0.00011439208984375,
      "step": 18742,
      "training_step_time": 0.6755280494689941
    },
    {
      "epoch": 0.000114398193359375,
      "model_forward_time": 0.11710619926452637,
      "step": 18743
    },
    {
      "epoch": 0.000114398193359375,
      "step": 18743,
      "training_step_time": 0.7162096500396729
    },
    {
      "epoch": 0.000114404296875,
      "model_forward_time": 0.1211237907409668,
      "step": 18744
    },
    {
      "epoch": 0.000114404296875,
      "step": 18744,
      "training_step_time": 0.6523919105529785
    },
    {
      "epoch": 0.000114410400390625,
      "model_forward_time": 0.11653494834899902,
      "step": 18745
    },
    {
      "epoch": 0.000114410400390625,
      "step": 18745,
      "training_step_time": 0.6679017543792725
    },
    {
      "epoch": 0.00011441650390625,
      "model_forward_time": 0.12019705772399902,
      "step": 18746
    },
    {
      "epoch": 0.00011441650390625,
      "step": 18746,
      "training_step_time": 0.7366223335266113
    },
    {
      "epoch": 0.000114422607421875,
      "model_forward_time": 0.11901140213012695,
      "step": 18747
    },
    {
      "epoch": 0.000114422607421875,
      "step": 18747,
      "training_step_time": 0.6557989120483398
    },
    {
      "epoch": 0.0001144287109375,
      "model_forward_time": 0.11857962608337402,
      "step": 18748
    },
    {
      "epoch": 0.0001144287109375,
      "step": 18748,
      "training_step_time": 0.6866307258605957
    },
    {
      "epoch": 0.000114434814453125,
      "model_forward_time": 0.11631035804748535,
      "step": 18749
    },
    {
      "epoch": 0.000114434814453125,
      "step": 18749,
      "training_step_time": 0.6562066078186035
    },
    {
      "epoch": 0.00011444091796875,
      "grad_norm": 0.21572011709213257,
      "learning_rate": 8.231496189304704e-05,
      "loss": 0.0446,
      "step": 18750
    },
    {
      "epoch": 0.00011444091796875,
      "model_forward_time": 0.14493012428283691,
      "step": 18750
    },
    {
      "epoch": 0.00011444091796875,
      "step": 18750,
      "training_step_time": 0.7198386192321777
    },
    {
      "epoch": 0.000114447021484375,
      "model_forward_time": 0.12538862228393555,
      "step": 18751
    },
    {
      "epoch": 0.000114447021484375,
      "step": 18751,
      "training_step_time": 0.7588620185852051
    },
    {
      "epoch": 0.000114453125,
      "model_forward_time": 0.11910271644592285,
      "step": 18752
    },
    {
      "epoch": 0.000114453125,
      "step": 18752,
      "training_step_time": 0.7162261009216309
    },
    {
      "epoch": 0.000114459228515625,
      "model_forward_time": 0.11752152442932129,
      "step": 18753
    },
    {
      "epoch": 0.000114459228515625,
      "step": 18753,
      "training_step_time": 0.775456428527832
    },
    {
      "epoch": 0.00011446533203125,
      "model_forward_time": 0.11810922622680664,
      "step": 18754
    },
    {
      "epoch": 0.00011446533203125,
      "step": 18754,
      "training_step_time": 0.7367300987243652
    },
    {
      "epoch": 0.000114471435546875,
      "model_forward_time": 0.12104225158691406,
      "step": 18755
    },
    {
      "epoch": 0.000114471435546875,
      "step": 18755,
      "training_step_time": 0.7302756309509277
    },
    {
      "epoch": 0.0001144775390625,
      "model_forward_time": 0.11877870559692383,
      "step": 18756
    },
    {
      "epoch": 0.0001144775390625,
      "step": 18756,
      "training_step_time": 0.6789054870605469
    },
    {
      "epoch": 0.000114483642578125,
      "model_forward_time": 0.13577628135681152,
      "step": 18757
    },
    {
      "epoch": 0.000114483642578125,
      "step": 18757,
      "training_step_time": 0.7025835514068604
    },
    {
      "epoch": 0.00011448974609375,
      "model_forward_time": 0.11534953117370605,
      "step": 18758
    },
    {
      "epoch": 0.00011448974609375,
      "step": 18758,
      "training_step_time": 0.6364786624908447
    },
    {
      "epoch": 0.000114495849609375,
      "model_forward_time": 0.11876606941223145,
      "step": 18759
    },
    {
      "epoch": 0.000114495849609375,
      "step": 18759,
      "training_step_time": 0.680307149887085
    },
    {
      "epoch": 0.000114501953125,
      "grad_norm": 0.20953646302223206,
      "learning_rate": 8.229392804464362e-05,
      "loss": 0.0566,
      "step": 18760
    },
    {
      "epoch": 0.000114501953125,
      "model_forward_time": 0.1215054988861084,
      "step": 18760
    },
    {
      "epoch": 0.000114501953125,
      "step": 18760,
      "training_step_time": 0.7124428749084473
    },
    {
      "epoch": 0.000114508056640625,
      "model_forward_time": 0.11934733390808105,
      "step": 18761
    },
    {
      "epoch": 0.000114508056640625,
      "step": 18761,
      "training_step_time": 0.6657507419586182
    },
    {
      "epoch": 0.00011451416015625,
      "model_forward_time": 0.12581610679626465,
      "step": 18762
    },
    {
      "epoch": 0.00011451416015625,
      "step": 18762,
      "training_step_time": 0.749222993850708
    },
    {
      "epoch": 0.000114520263671875,
      "model_forward_time": 0.11880159378051758,
      "step": 18763
    },
    {
      "epoch": 0.000114520263671875,
      "step": 18763,
      "training_step_time": 0.6466355323791504
    },
    {
      "epoch": 0.0001145263671875,
      "model_forward_time": 0.11969351768493652,
      "step": 18764
    },
    {
      "epoch": 0.0001145263671875,
      "step": 18764,
      "training_step_time": 0.5979959964752197
    },
    {
      "epoch": 0.000114532470703125,
      "model_forward_time": 0.12415313720703125,
      "step": 18765
    },
    {
      "epoch": 0.000114532470703125,
      "step": 18765,
      "training_step_time": 0.686211109161377
    },
    {
      "epoch": 0.00011453857421875,
      "model_forward_time": 0.11744141578674316,
      "step": 18766
    },
    {
      "epoch": 0.00011453857421875,
      "step": 18766,
      "training_step_time": 0.7056353092193604
    },
    {
      "epoch": 0.000114544677734375,
      "model_forward_time": 0.11655235290527344,
      "step": 18767
    },
    {
      "epoch": 0.000114544677734375,
      "step": 18767,
      "training_step_time": 0.6904501914978027
    },
    {
      "epoch": 0.00011455078125,
      "model_forward_time": 0.11599206924438477,
      "step": 18768
    },
    {
      "epoch": 0.00011455078125,
      "step": 18768,
      "training_step_time": 0.6415295600891113
    },
    {
      "epoch": 0.000114556884765625,
      "model_forward_time": 0.11932539939880371,
      "step": 18769
    },
    {
      "epoch": 0.000114556884765625,
      "step": 18769,
      "training_step_time": 0.6707570552825928
    },
    {
      "epoch": 0.00011456298828125,
      "grad_norm": 0.15508036315441132,
      "learning_rate": 8.227288438619754e-05,
      "loss": 0.0592,
      "step": 18770
    },
    {
      "epoch": 0.00011456298828125,
      "model_forward_time": 0.11629366874694824,
      "step": 18770
    },
    {
      "epoch": 0.00011456298828125,
      "step": 18770,
      "training_step_time": 0.686215877532959
    },
    {
      "epoch": 0.000114569091796875,
      "model_forward_time": 0.11879634857177734,
      "step": 18771
    },
    {
      "epoch": 0.000114569091796875,
      "step": 18771,
      "training_step_time": 0.6463663578033447
    },
    {
      "epoch": 0.0001145751953125,
      "model_forward_time": 0.11710190773010254,
      "step": 18772
    },
    {
      "epoch": 0.0001145751953125,
      "step": 18772,
      "training_step_time": 0.6894626617431641
    },
    {
      "epoch": 0.000114581298828125,
      "model_forward_time": 0.11874556541442871,
      "step": 18773
    },
    {
      "epoch": 0.000114581298828125,
      "step": 18773,
      "training_step_time": 0.7095093727111816
    },
    {
      "epoch": 0.00011458740234375,
      "model_forward_time": 0.11911916732788086,
      "step": 18774
    },
    {
      "epoch": 0.00011458740234375,
      "step": 18774,
      "training_step_time": 0.6557283401489258
    },
    {
      "epoch": 0.000114593505859375,
      "model_forward_time": 0.11837220191955566,
      "step": 18775
    },
    {
      "epoch": 0.000114593505859375,
      "step": 18775,
      "training_step_time": 0.7417664527893066
    },
    {
      "epoch": 0.000114599609375,
      "model_forward_time": 0.11654782295227051,
      "step": 18776
    },
    {
      "epoch": 0.000114599609375,
      "step": 18776,
      "training_step_time": 0.6857438087463379
    },
    {
      "epoch": 0.000114605712890625,
      "model_forward_time": 0.12006807327270508,
      "step": 18777
    },
    {
      "epoch": 0.000114605712890625,
      "step": 18777,
      "training_step_time": 0.6220438480377197
    },
    {
      "epoch": 0.00011461181640625,
      "model_forward_time": 0.11563777923583984,
      "step": 18778
    },
    {
      "epoch": 0.00011461181640625,
      "step": 18778,
      "training_step_time": 0.5248005390167236
    },
    {
      "epoch": 0.000114617919921875,
      "model_forward_time": 0.12388443946838379,
      "step": 18779
    },
    {
      "epoch": 0.000114617919921875,
      "step": 18779,
      "training_step_time": 0.7389285564422607
    },
    {
      "epoch": 0.0001146240234375,
      "grad_norm": 0.16198092699050903,
      "learning_rate": 8.225183092410128e-05,
      "loss": 0.0577,
      "step": 18780
    },
    {
      "epoch": 0.0001146240234375,
      "model_forward_time": 0.12271857261657715,
      "step": 18780
    },
    {
      "epoch": 0.0001146240234375,
      "step": 18780,
      "training_step_time": 0.6930625438690186
    },
    {
      "epoch": 0.000114630126953125,
      "model_forward_time": 0.11881566047668457,
      "step": 18781
    },
    {
      "epoch": 0.000114630126953125,
      "step": 18781,
      "training_step_time": 0.7114644050598145
    },
    {
      "epoch": 0.00011463623046875,
      "model_forward_time": 0.11786627769470215,
      "step": 18782
    },
    {
      "epoch": 0.00011463623046875,
      "step": 18782,
      "training_step_time": 0.6006746292114258
    },
    {
      "epoch": 0.000114642333984375,
      "model_forward_time": 0.12018728256225586,
      "step": 18783
    },
    {
      "epoch": 0.000114642333984375,
      "step": 18783,
      "training_step_time": 0.6783339977264404
    },
    {
      "epoch": 0.0001146484375,
      "model_forward_time": 0.13085508346557617,
      "step": 18784
    },
    {
      "epoch": 0.0001146484375,
      "step": 18784,
      "training_step_time": 0.6927981376647949
    },
    {
      "epoch": 0.000114654541015625,
      "model_forward_time": 0.12188911437988281,
      "step": 18785
    },
    {
      "epoch": 0.000114654541015625,
      "step": 18785,
      "training_step_time": 0.6895818710327148
    },
    {
      "epoch": 0.00011466064453125,
      "model_forward_time": 0.12027120590209961,
      "step": 18786
    },
    {
      "epoch": 0.00011466064453125,
      "step": 18786,
      "training_step_time": 0.7382256984710693
    },
    {
      "epoch": 0.000114666748046875,
      "model_forward_time": 0.12203669548034668,
      "step": 18787
    },
    {
      "epoch": 0.000114666748046875,
      "step": 18787,
      "training_step_time": 0.6307685375213623
    },
    {
      "epoch": 0.0001146728515625,
      "model_forward_time": 0.1206057071685791,
      "step": 18788
    },
    {
      "epoch": 0.0001146728515625,
      "step": 18788,
      "training_step_time": 0.6908230781555176
    },
    {
      "epoch": 0.000114678955078125,
      "model_forward_time": 0.11910223960876465,
      "step": 18789
    },
    {
      "epoch": 0.000114678955078125,
      "step": 18789,
      "training_step_time": 0.7070479393005371
    },
    {
      "epoch": 0.00011468505859375,
      "grad_norm": 0.19444960355758667,
      "learning_rate": 8.223076766475035e-05,
      "loss": 0.0589,
      "step": 18790
    },
    {
      "epoch": 0.00011468505859375,
      "model_forward_time": 0.11985325813293457,
      "step": 18790
    },
    {
      "epoch": 0.00011468505859375,
      "step": 18790,
      "training_step_time": 0.696753740310669
    },
    {
      "epoch": 0.000114691162109375,
      "model_forward_time": 0.11877751350402832,
      "step": 18791
    },
    {
      "epoch": 0.000114691162109375,
      "step": 18791,
      "training_step_time": 0.7026021480560303
    },
    {
      "epoch": 0.000114697265625,
      "model_forward_time": 0.11671638488769531,
      "step": 18792
    },
    {
      "epoch": 0.000114697265625,
      "step": 18792,
      "training_step_time": 0.7397756576538086
    },
    {
      "epoch": 0.000114703369140625,
      "model_forward_time": 0.11773490905761719,
      "step": 18793
    },
    {
      "epoch": 0.000114703369140625,
      "step": 18793,
      "training_step_time": 0.711961030960083
    },
    {
      "epoch": 0.00011470947265625,
      "model_forward_time": 0.11787176132202148,
      "step": 18794
    },
    {
      "epoch": 0.00011470947265625,
      "step": 18794,
      "training_step_time": 0.6697092056274414
    },
    {
      "epoch": 0.000114715576171875,
      "model_forward_time": 0.11908221244812012,
      "step": 18795
    },
    {
      "epoch": 0.000114715576171875,
      "step": 18795,
      "training_step_time": 0.6985347270965576
    },
    {
      "epoch": 0.0001147216796875,
      "model_forward_time": 0.11668086051940918,
      "step": 18796
    },
    {
      "epoch": 0.0001147216796875,
      "step": 18796,
      "training_step_time": 0.6625142097473145
    },
    {
      "epoch": 0.000114727783203125,
      "model_forward_time": 0.11722016334533691,
      "step": 18797
    },
    {
      "epoch": 0.000114727783203125,
      "step": 18797,
      "training_step_time": 0.6923525333404541
    },
    {
      "epoch": 0.00011473388671875,
      "model_forward_time": 0.12529850006103516,
      "step": 18798
    },
    {
      "epoch": 0.00011473388671875,
      "step": 18798,
      "training_step_time": 0.6209449768066406
    },
    {
      "epoch": 0.000114739990234375,
      "model_forward_time": 0.11858201026916504,
      "step": 18799
    },
    {
      "epoch": 0.000114739990234375,
      "step": 18799,
      "training_step_time": 0.6808960437774658
    },
    {
      "epoch": 0.00011474609375,
      "grad_norm": 0.1652189940214157,
      "learning_rate": 8.220969461454322e-05,
      "loss": 0.0575,
      "step": 18800
    },
    {
      "epoch": 0.00011474609375,
      "model_forward_time": 0.11972570419311523,
      "step": 18800
    },
    {
      "epoch": 0.00011474609375,
      "step": 18800,
      "training_step_time": 0.6699862480163574
    },
    {
      "epoch": 0.000114752197265625,
      "model_forward_time": 0.13344025611877441,
      "step": 18801
    },
    {
      "epoch": 0.000114752197265625,
      "step": 18801,
      "training_step_time": 0.6549606323242188
    },
    {
      "epoch": 0.00011475830078125,
      "model_forward_time": 0.11926937103271484,
      "step": 18802
    },
    {
      "epoch": 0.00011475830078125,
      "step": 18802,
      "training_step_time": 0.6612570285797119
    },
    {
      "epoch": 0.000114764404296875,
      "model_forward_time": 0.11823534965515137,
      "step": 18803
    },
    {
      "epoch": 0.000114764404296875,
      "step": 18803,
      "training_step_time": 0.5928759574890137
    },
    {
      "epoch": 0.0001147705078125,
      "model_forward_time": 0.11973071098327637,
      "step": 18804
    },
    {
      "epoch": 0.0001147705078125,
      "step": 18804,
      "training_step_time": 0.6384940147399902
    },
    {
      "epoch": 0.000114776611328125,
      "model_forward_time": 0.11852312088012695,
      "step": 18805
    },
    {
      "epoch": 0.000114776611328125,
      "step": 18805,
      "training_step_time": 0.65362548828125
    },
    {
      "epoch": 0.00011478271484375,
      "model_forward_time": 0.11896228790283203,
      "step": 18806
    },
    {
      "epoch": 0.00011478271484375,
      "step": 18806,
      "training_step_time": 0.5640289783477783
    },
    {
      "epoch": 0.000114788818359375,
      "model_forward_time": 0.11846256256103516,
      "step": 18807
    },
    {
      "epoch": 0.000114788818359375,
      "step": 18807,
      "training_step_time": 0.5940592288970947
    },
    {
      "epoch": 0.000114794921875,
      "model_forward_time": 0.11633801460266113,
      "step": 18808
    },
    {
      "epoch": 0.000114794921875,
      "step": 18808,
      "training_step_time": 0.6597442626953125
    },
    {
      "epoch": 0.000114801025390625,
      "model_forward_time": 0.1161947250366211,
      "step": 18809
    },
    {
      "epoch": 0.000114801025390625,
      "step": 18809,
      "training_step_time": 0.5395932197570801
    },
    {
      "epoch": 0.00011480712890625,
      "grad_norm": 0.20910127460956573,
      "learning_rate": 8.218861177988129e-05,
      "loss": 0.0602,
      "step": 18810
    },
    {
      "epoch": 0.00011480712890625,
      "model_forward_time": 0.11642670631408691,
      "step": 18810
    },
    {
      "epoch": 0.00011480712890625,
      "step": 18810,
      "training_step_time": 0.6740028858184814
    },
    {
      "epoch": 0.000114813232421875,
      "model_forward_time": 0.11387252807617188,
      "step": 18811
    },
    {
      "epoch": 0.000114813232421875,
      "step": 18811,
      "training_step_time": 0.4729278087615967
    },
    {
      "epoch": 0.0001148193359375,
      "model_forward_time": 0.11430597305297852,
      "step": 18812
    },
    {
      "epoch": 0.0001148193359375,
      "step": 18812,
      "training_step_time": 0.6875636577606201
    },
    {
      "epoch": 0.000114825439453125,
      "model_forward_time": 0.1140434741973877,
      "step": 18813
    },
    {
      "epoch": 0.000114825439453125,
      "step": 18813,
      "training_step_time": 0.5246028900146484
    },
    {
      "epoch": 0.00011483154296875,
      "model_forward_time": 0.11403965950012207,
      "step": 18814
    },
    {
      "epoch": 0.00011483154296875,
      "step": 18814,
      "training_step_time": 0.45072364807128906
    },
    {
      "epoch": 0.000114837646484375,
      "model_forward_time": 0.1168355941772461,
      "step": 18815
    },
    {
      "epoch": 0.000114837646484375,
      "step": 18815,
      "training_step_time": 0.4397404193878174
    },
    {
      "epoch": 0.00011484375,
      "model_forward_time": 0.11755752563476562,
      "step": 18816
    },
    {
      "epoch": 0.00011484375,
      "step": 18816,
      "training_step_time": 0.40535736083984375
    },
    {
      "epoch": 0.000114849853515625,
      "model_forward_time": 0.11692285537719727,
      "step": 18817
    },
    {
      "epoch": 0.000114849853515625,
      "step": 18817,
      "training_step_time": 0.4019739627838135
    },
    {
      "epoch": 0.00011485595703125,
      "model_forward_time": 0.11786556243896484,
      "step": 18818
    },
    {
      "epoch": 0.00011485595703125,
      "step": 18818,
      "training_step_time": 0.38028979301452637
    },
    {
      "epoch": 0.000114862060546875,
      "model_forward_time": 0.11676526069641113,
      "step": 18819
    },
    {
      "epoch": 0.000114862060546875,
      "step": 18819,
      "training_step_time": 0.3746027946472168
    },
    {
      "epoch": 0.0001148681640625,
      "grad_norm": 0.20305761694908142,
      "learning_rate": 8.2167519167169e-05,
      "loss": 0.0573,
      "step": 18820
    },
    {
      "epoch": 0.0001148681640625,
      "model_forward_time": 0.11821651458740234,
      "step": 18820
    },
    {
      "epoch": 0.0001148681640625,
      "step": 18820,
      "training_step_time": 0.4491002559661865
    },
    {
      "epoch": 0.000114874267578125,
      "model_forward_time": 0.11814236640930176,
      "step": 18821
    },
    {
      "epoch": 0.000114874267578125,
      "step": 18821,
      "training_step_time": 0.3937807083129883
    },
    {
      "epoch": 0.00011488037109375,
      "model_forward_time": 0.11799812316894531,
      "step": 18822
    },
    {
      "epoch": 0.00011488037109375,
      "step": 18822,
      "training_step_time": 0.386380672454834
    },
    {
      "epoch": 0.000114886474609375,
      "model_forward_time": 0.11752581596374512,
      "step": 18823
    },
    {
      "epoch": 0.000114886474609375,
      "step": 18823,
      "training_step_time": 0.38929319381713867
    },
    {
      "epoch": 0.000114892578125,
      "model_forward_time": 0.11512160301208496,
      "step": 18824
    },
    {
      "epoch": 0.000114892578125,
      "step": 18824,
      "training_step_time": 0.39186882972717285
    },
    {
      "epoch": 0.000114898681640625,
      "model_forward_time": 0.11504220962524414,
      "step": 18825
    },
    {
      "epoch": 0.000114898681640625,
      "step": 18825,
      "training_step_time": 0.40480589866638184
    },
    {
      "epoch": 0.00011490478515625,
      "model_forward_time": 0.115020751953125,
      "step": 18826
    },
    {
      "epoch": 0.00011490478515625,
      "step": 18826,
      "training_step_time": 0.4041931629180908
    },
    {
      "epoch": 0.000114910888671875,
      "model_forward_time": 0.11544013023376465,
      "step": 18827
    },
    {
      "epoch": 0.000114910888671875,
      "step": 18827,
      "training_step_time": 0.43590736389160156
    },
    {
      "epoch": 0.0001149169921875,
      "model_forward_time": 0.11472177505493164,
      "step": 18828
    },
    {
      "epoch": 0.0001149169921875,
      "step": 18828,
      "training_step_time": 0.44677209854125977
    },
    {
      "epoch": 0.000114923095703125,
      "model_forward_time": 0.11558246612548828,
      "step": 18829
    },
    {
      "epoch": 0.000114923095703125,
      "step": 18829,
      "training_step_time": 0.42827820777893066
    },
    {
      "epoch": 0.00011492919921875,
      "grad_norm": 0.1441527009010315,
      "learning_rate": 8.214641678281374e-05,
      "loss": 0.0566,
      "step": 18830
    },
    {
      "epoch": 0.00011492919921875,
      "model_forward_time": 0.11426639556884766,
      "step": 18830
    },
    {
      "epoch": 0.00011492919921875,
      "step": 18830,
      "training_step_time": 0.40416741371154785
    },
    {
      "epoch": 0.000114935302734375,
      "model_forward_time": 0.11667943000793457,
      "step": 18831
    },
    {
      "epoch": 0.000114935302734375,
      "step": 18831,
      "training_step_time": 0.38864970207214355
    },
    {
      "epoch": 0.00011494140625,
      "model_forward_time": 0.1154475212097168,
      "step": 18832
    },
    {
      "epoch": 0.00011494140625,
      "step": 18832,
      "training_step_time": 0.397510290145874
    },
    {
      "epoch": 0.000114947509765625,
      "model_forward_time": 0.1154022216796875,
      "step": 18833
    },
    {
      "epoch": 0.000114947509765625,
      "step": 18833,
      "training_step_time": 0.40190863609313965
    },
    {
      "epoch": 0.00011495361328125,
      "model_forward_time": 0.11556601524353027,
      "step": 18834
    },
    {
      "epoch": 0.00011495361328125,
      "step": 18834,
      "training_step_time": 0.4029810428619385
    },
    {
      "epoch": 0.000114959716796875,
      "model_forward_time": 0.1154630184173584,
      "step": 18835
    },
    {
      "epoch": 0.000114959716796875,
      "step": 18835,
      "training_step_time": 0.4428434371948242
    },
    {
      "epoch": 0.0001149658203125,
      "model_forward_time": 0.11510491371154785,
      "step": 18836
    },
    {
      "epoch": 0.0001149658203125,
      "step": 18836,
      "training_step_time": 0.4322969913482666
    },
    {
      "epoch": 0.000114971923828125,
      "model_forward_time": 0.11488676071166992,
      "step": 18837
    },
    {
      "epoch": 0.000114971923828125,
      "step": 18837,
      "training_step_time": 0.3812103271484375
    },
    {
      "epoch": 0.00011497802734375,
      "model_forward_time": 0.11505746841430664,
      "step": 18838
    },
    {
      "epoch": 0.00011497802734375,
      "step": 18838,
      "training_step_time": 0.3867363929748535
    },
    {
      "epoch": 0.000114984130859375,
      "model_forward_time": 0.11562800407409668,
      "step": 18839
    },
    {
      "epoch": 0.000114984130859375,
      "step": 18839,
      "training_step_time": 0.3903963565826416
    },
    {
      "epoch": 0.000114990234375,
      "grad_norm": 0.13398884236812592,
      "learning_rate": 8.212530463322583e-05,
      "loss": 0.0524,
      "step": 18840
    },
    {
      "epoch": 0.000114990234375,
      "model_forward_time": 0.11467289924621582,
      "step": 18840
    },
    {
      "epoch": 0.000114990234375,
      "step": 18840,
      "training_step_time": 0.4143826961517334
    },
    {
      "epoch": 0.000114996337890625,
      "model_forward_time": 0.11507534980773926,
      "step": 18841
    },
    {
      "epoch": 0.000114996337890625,
      "step": 18841,
      "training_step_time": 0.4025888442993164
    },
    {
      "epoch": 0.00011500244140625,
      "model_forward_time": 0.11578536033630371,
      "step": 18842
    },
    {
      "epoch": 0.00011500244140625,
      "step": 18842,
      "training_step_time": 0.48575592041015625
    },
    {
      "epoch": 0.000115008544921875,
      "model_forward_time": 0.11510157585144043,
      "step": 18843
    },
    {
      "epoch": 0.000115008544921875,
      "step": 18843,
      "training_step_time": 0.5015933513641357
    },
    {
      "epoch": 0.0001150146484375,
      "model_forward_time": 0.11455202102661133,
      "step": 18844
    },
    {
      "epoch": 0.0001150146484375,
      "step": 18844,
      "training_step_time": 0.4392678737640381
    },
    {
      "epoch": 0.000115020751953125,
      "model_forward_time": 0.11465001106262207,
      "step": 18845
    },
    {
      "epoch": 0.000115020751953125,
      "step": 18845,
      "training_step_time": 0.38829517364501953
    },
    {
      "epoch": 0.00011502685546875,
      "model_forward_time": 0.11515378952026367,
      "step": 18846
    },
    {
      "epoch": 0.00011502685546875,
      "step": 18846,
      "training_step_time": 0.37256336212158203
    },
    {
      "epoch": 0.000115032958984375,
      "model_forward_time": 0.1150658130645752,
      "step": 18847
    },
    {
      "epoch": 0.000115032958984375,
      "step": 18847,
      "training_step_time": 0.40021324157714844
    },
    {
      "epoch": 0.0001150390625,
      "model_forward_time": 0.11560463905334473,
      "step": 18848
    },
    {
      "epoch": 0.0001150390625,
      "step": 18848,
      "training_step_time": 0.3893740177154541
    },
    {
      "epoch": 0.000115045166015625,
      "model_forward_time": 0.11538338661193848,
      "step": 18849
    },
    {
      "epoch": 0.000115045166015625,
      "step": 18849,
      "training_step_time": 0.4589571952819824
    },
    {
      "epoch": 0.00011505126953125,
      "grad_norm": 0.21998478472232819,
      "learning_rate": 8.210418272481859e-05,
      "loss": 0.0561,
      "step": 18850
    },
    {
      "epoch": 0.00011505126953125,
      "model_forward_time": 0.1165306568145752,
      "step": 18850
    },
    {
      "epoch": 0.00011505126953125,
      "step": 18850,
      "training_step_time": 0.5149557590484619
    },
    {
      "epoch": 0.000115057373046875,
      "model_forward_time": 0.11536550521850586,
      "step": 18851
    },
    {
      "epoch": 0.000115057373046875,
      "step": 18851,
      "training_step_time": 0.4369010925292969
    },
    {
      "epoch": 0.0001150634765625,
      "model_forward_time": 0.11587667465209961,
      "step": 18852
    },
    {
      "epoch": 0.0001150634765625,
      "step": 18852,
      "training_step_time": 0.37845349311828613
    },
    {
      "epoch": 0.000115069580078125,
      "model_forward_time": 0.1162559986114502,
      "step": 18853
    },
    {
      "epoch": 0.000115069580078125,
      "step": 18853,
      "training_step_time": 0.39768481254577637
    },
    {
      "epoch": 0.00011507568359375,
      "model_forward_time": 0.11510324478149414,
      "step": 18854
    },
    {
      "epoch": 0.00011507568359375,
      "step": 18854,
      "training_step_time": 0.4094984531402588
    },
    {
      "epoch": 0.000115081787109375,
      "model_forward_time": 0.11452674865722656,
      "step": 18855
    },
    {
      "epoch": 0.000115081787109375,
      "step": 18855,
      "training_step_time": 0.4132559299468994
    },
    {
      "epoch": 0.000115087890625,
      "model_forward_time": 0.11477828025817871,
      "step": 18856
    },
    {
      "epoch": 0.000115087890625,
      "step": 18856,
      "training_step_time": 0.3975341320037842
    },
    {
      "epoch": 0.000115093994140625,
      "model_forward_time": 0.11593770980834961,
      "step": 18857
    },
    {
      "epoch": 0.000115093994140625,
      "step": 18857,
      "training_step_time": 0.4019339084625244
    },
    {
      "epoch": 0.00011510009765625,
      "model_forward_time": 0.11443948745727539,
      "step": 18858
    },
    {
      "epoch": 0.00011510009765625,
      "step": 18858,
      "training_step_time": 0.3799605369567871
    },
    {
      "epoch": 0.000115106201171875,
      "model_forward_time": 0.11531758308410645,
      "step": 18859
    },
    {
      "epoch": 0.000115106201171875,
      "step": 18859,
      "training_step_time": 0.41294288635253906
    },
    {
      "epoch": 0.0001151123046875,
      "grad_norm": 0.20863133668899536,
      "learning_rate": 8.20830510640083e-05,
      "loss": 0.055,
      "step": 18860
    },
    {
      "epoch": 0.0001151123046875,
      "model_forward_time": 0.11501812934875488,
      "step": 18860
    },
    {
      "epoch": 0.0001151123046875,
      "step": 18860,
      "training_step_time": 0.3923659324645996
    },
    {
      "epoch": 0.000115118408203125,
      "model_forward_time": 0.11495065689086914,
      "step": 18861
    },
    {
      "epoch": 0.000115118408203125,
      "step": 18861,
      "training_step_time": 0.3948638439178467
    },
    {
      "epoch": 0.00011512451171875,
      "model_forward_time": 0.11477541923522949,
      "step": 18862
    },
    {
      "epoch": 0.00011512451171875,
      "step": 18862,
      "training_step_time": 0.3872365951538086
    },
    {
      "epoch": 0.000115130615234375,
      "model_forward_time": 0.11506485939025879,
      "step": 18863
    },
    {
      "epoch": 0.000115130615234375,
      "step": 18863,
      "training_step_time": 0.38434481620788574
    },
    {
      "epoch": 0.00011513671875,
      "model_forward_time": 0.1150054931640625,
      "step": 18864
    },
    {
      "epoch": 0.00011513671875,
      "step": 18864,
      "training_step_time": 0.36723804473876953
    },
    {
      "epoch": 0.000115142822265625,
      "model_forward_time": 0.11505579948425293,
      "step": 18865
    },
    {
      "epoch": 0.000115142822265625,
      "step": 18865,
      "training_step_time": 0.4798879623413086
    },
    {
      "epoch": 0.00011514892578125,
      "model_forward_time": 0.11961698532104492,
      "step": 18866
    },
    {
      "epoch": 0.00011514892578125,
      "step": 18866,
      "training_step_time": 0.3945786952972412
    },
    {
      "epoch": 0.000115155029296875,
      "model_forward_time": 0.11821460723876953,
      "step": 18867
    },
    {
      "epoch": 0.000115155029296875,
      "step": 18867,
      "training_step_time": 0.3833749294281006
    },
    {
      "epoch": 0.0001151611328125,
      "model_forward_time": 0.11788630485534668,
      "step": 18868
    },
    {
      "epoch": 0.0001151611328125,
      "step": 18868,
      "training_step_time": 0.39410924911499023
    },
    {
      "epoch": 0.000115167236328125,
      "model_forward_time": 0.11575126647949219,
      "step": 18869
    },
    {
      "epoch": 0.000115167236328125,
      "step": 18869,
      "training_step_time": 0.4629807472229004
    },
    {
      "epoch": 0.00011517333984375,
      "grad_norm": 0.15310925245285034,
      "learning_rate": 8.206190965721419e-05,
      "loss": 0.0532,
      "step": 18870
    },
    {
      "epoch": 0.00011517333984375,
      "model_forward_time": 0.11476969718933105,
      "step": 18870
    },
    {
      "epoch": 0.00011517333984375,
      "step": 18870,
      "training_step_time": 0.4093818664550781
    },
    {
      "epoch": 0.000115179443359375,
      "model_forward_time": 0.11500668525695801,
      "step": 18871
    },
    {
      "epoch": 0.000115179443359375,
      "step": 18871,
      "training_step_time": 0.42937397956848145
    },
    {
      "epoch": 0.000115185546875,
      "model_forward_time": 0.11519789695739746,
      "step": 18872
    },
    {
      "epoch": 0.000115185546875,
      "step": 18872,
      "training_step_time": 0.3814687728881836
    },
    {
      "epoch": 0.000115191650390625,
      "model_forward_time": 0.11495375633239746,
      "step": 18873
    },
    {
      "epoch": 0.000115191650390625,
      "step": 18873,
      "training_step_time": 0.4503512382507324
    },
    {
      "epoch": 0.00011519775390625,
      "model_forward_time": 0.11493396759033203,
      "step": 18874
    },
    {
      "epoch": 0.00011519775390625,
      "step": 18874,
      "training_step_time": 0.40769147872924805
    },
    {
      "epoch": 0.000115203857421875,
      "model_forward_time": 0.11531496047973633,
      "step": 18875
    },
    {
      "epoch": 0.000115203857421875,
      "step": 18875,
      "training_step_time": 0.3900580406188965
    },
    {
      "epoch": 0.0001152099609375,
      "model_forward_time": 0.11471700668334961,
      "step": 18876
    },
    {
      "epoch": 0.0001152099609375,
      "step": 18876,
      "training_step_time": 0.4003770351409912
    },
    {
      "epoch": 0.000115216064453125,
      "model_forward_time": 0.11455678939819336,
      "step": 18877
    },
    {
      "epoch": 0.000115216064453125,
      "step": 18877,
      "training_step_time": 0.3943312168121338
    },
    {
      "epoch": 0.00011522216796875,
      "model_forward_time": 0.11503434181213379,
      "step": 18878
    },
    {
      "epoch": 0.00011522216796875,
      "step": 18878,
      "training_step_time": 0.41091442108154297
    },
    {
      "epoch": 0.000115228271484375,
      "model_forward_time": 0.11540079116821289,
      "step": 18879
    },
    {
      "epoch": 0.000115228271484375,
      "step": 18879,
      "training_step_time": 0.40672826766967773
    },
    {
      "epoch": 0.000115234375,
      "grad_norm": 0.12725377082824707,
      "learning_rate": 8.204075851085849e-05,
      "loss": 0.0575,
      "step": 18880
    },
    {
      "epoch": 0.000115234375,
      "model_forward_time": 0.11483621597290039,
      "step": 18880
    },
    {
      "epoch": 0.000115234375,
      "step": 18880,
      "training_step_time": 0.38864564895629883
    },
    {
      "epoch": 0.000115240478515625,
      "model_forward_time": 0.11500215530395508,
      "step": 18881
    },
    {
      "epoch": 0.000115240478515625,
      "step": 18881,
      "training_step_time": 0.4324479103088379
    },
    {
      "epoch": 0.00011524658203125,
      "model_forward_time": 0.11535024642944336,
      "step": 18882
    },
    {
      "epoch": 0.00011524658203125,
      "step": 18882,
      "training_step_time": 0.38900017738342285
    },
    {
      "epoch": 0.000115252685546875,
      "model_forward_time": 0.11607837677001953,
      "step": 18883
    },
    {
      "epoch": 0.000115252685546875,
      "step": 18883,
      "training_step_time": 0.3945810794830322
    },
    {
      "epoch": 0.0001152587890625,
      "model_forward_time": 0.11484026908874512,
      "step": 18884
    },
    {
      "epoch": 0.0001152587890625,
      "step": 18884,
      "training_step_time": 0.4015684127807617
    },
    {
      "epoch": 0.000115264892578125,
      "model_forward_time": 0.1159055233001709,
      "step": 18885
    },
    {
      "epoch": 0.000115264892578125,
      "step": 18885,
      "training_step_time": 0.4444580078125
    },
    {
      "epoch": 0.00011527099609375,
      "model_forward_time": 0.11560416221618652,
      "step": 18886
    },
    {
      "epoch": 0.00011527099609375,
      "step": 18886,
      "training_step_time": 0.43538808822631836
    },
    {
      "epoch": 0.000115277099609375,
      "model_forward_time": 0.11519646644592285,
      "step": 18887
    },
    {
      "epoch": 0.000115277099609375,
      "step": 18887,
      "training_step_time": 0.4369387626647949
    },
    {
      "epoch": 0.000115283203125,
      "model_forward_time": 0.11460018157958984,
      "step": 18888
    },
    {
      "epoch": 0.000115283203125,
      "step": 18888,
      "training_step_time": 0.40727710723876953
    },
    {
      "epoch": 0.000115289306640625,
      "model_forward_time": 0.11554193496704102,
      "step": 18889
    },
    {
      "epoch": 0.000115289306640625,
      "step": 18889,
      "training_step_time": 0.4696378707885742
    },
    {
      "epoch": 0.00011529541015625,
      "grad_norm": 0.15314538776874542,
      "learning_rate": 8.201959763136633e-05,
      "loss": 0.0536,
      "step": 18890
    },
    {
      "epoch": 0.00011529541015625,
      "model_forward_time": 0.11520695686340332,
      "step": 18890
    },
    {
      "epoch": 0.00011529541015625,
      "step": 18890,
      "training_step_time": 0.39501094818115234
    },
    {
      "epoch": 0.000115301513671875,
      "model_forward_time": 0.11522459983825684,
      "step": 18891
    },
    {
      "epoch": 0.000115301513671875,
      "step": 18891,
      "training_step_time": 0.39412736892700195
    },
    {
      "epoch": 0.0001153076171875,
      "model_forward_time": 0.11557865142822266,
      "step": 18892
    },
    {
      "epoch": 0.0001153076171875,
      "step": 18892,
      "training_step_time": 0.3908970355987549
    },
    {
      "epoch": 0.000115313720703125,
      "model_forward_time": 0.11524653434753418,
      "step": 18893
    },
    {
      "epoch": 0.000115313720703125,
      "step": 18893,
      "training_step_time": 0.3970909118652344
    },
    {
      "epoch": 0.00011531982421875,
      "model_forward_time": 0.11544466018676758,
      "step": 18894
    },
    {
      "epoch": 0.00011531982421875,
      "step": 18894,
      "training_step_time": 0.4160647392272949
    },
    {
      "epoch": 0.000115325927734375,
      "model_forward_time": 0.11822152137756348,
      "step": 18895
    },
    {
      "epoch": 0.000115325927734375,
      "step": 18895,
      "training_step_time": 0.5814590454101562
    },
    {
      "epoch": 0.00011533203125,
      "model_forward_time": 0.11826157569885254,
      "step": 18896
    },
    {
      "epoch": 0.00011533203125,
      "step": 18896,
      "training_step_time": 0.38362884521484375
    },
    {
      "epoch": 0.000115338134765625,
      "model_forward_time": 0.11911535263061523,
      "step": 18897
    },
    {
      "epoch": 0.000115338134765625,
      "step": 18897,
      "training_step_time": 0.4172208309173584
    },
    {
      "epoch": 0.00011534423828125,
      "model_forward_time": 0.1189568042755127,
      "step": 18898
    },
    {
      "epoch": 0.00011534423828125,
      "step": 18898,
      "training_step_time": 0.38773250579833984
    },
    {
      "epoch": 0.000115350341796875,
      "model_forward_time": 0.11800432205200195,
      "step": 18899
    },
    {
      "epoch": 0.000115350341796875,
      "step": 18899,
      "training_step_time": 0.4086489677429199
    },
    {
      "epoch": 0.0001153564453125,
      "grad_norm": 0.15258798003196716,
      "learning_rate": 8.199842702516583e-05,
      "loss": 0.0558,
      "step": 18900
    },
    {
      "epoch": 0.0001153564453125,
      "model_forward_time": 0.11807847023010254,
      "step": 18900
    },
    {
      "epoch": 0.0001153564453125,
      "step": 18900,
      "training_step_time": 0.5022623538970947
    },
    {
      "epoch": 0.000115362548828125,
      "model_forward_time": 0.12107563018798828,
      "step": 18901
    },
    {
      "epoch": 0.000115362548828125,
      "step": 18901,
      "training_step_time": 0.5185563564300537
    },
    {
      "epoch": 0.00011536865234375,
      "model_forward_time": 0.1158604621887207,
      "step": 18902
    },
    {
      "epoch": 0.00011536865234375,
      "step": 18902,
      "training_step_time": 0.4333205223083496
    },
    {
      "epoch": 0.000115374755859375,
      "model_forward_time": 0.1147000789642334,
      "step": 18903
    },
    {
      "epoch": 0.000115374755859375,
      "step": 18903,
      "training_step_time": 0.39179301261901855
    },
    {
      "epoch": 0.000115380859375,
      "model_forward_time": 0.11419200897216797,
      "step": 18904
    },
    {
      "epoch": 0.000115380859375,
      "step": 18904,
      "training_step_time": 0.3879368305206299
    },
    {
      "epoch": 0.000115386962890625,
      "model_forward_time": 0.11442947387695312,
      "step": 18905
    },
    {
      "epoch": 0.000115386962890625,
      "step": 18905,
      "training_step_time": 0.3909943103790283
    },
    {
      "epoch": 0.00011539306640625,
      "model_forward_time": 0.11554718017578125,
      "step": 18906
    },
    {
      "epoch": 0.00011539306640625,
      "step": 18906,
      "training_step_time": 0.3893423080444336
    },
    {
      "epoch": 0.000115399169921875,
      "model_forward_time": 0.11536741256713867,
      "step": 18907
    },
    {
      "epoch": 0.000115399169921875,
      "step": 18907,
      "training_step_time": 0.6113994121551514
    },
    {
      "epoch": 0.0001154052734375,
      "model_forward_time": 0.1145925521850586,
      "step": 18908
    },
    {
      "epoch": 0.0001154052734375,
      "step": 18908,
      "training_step_time": 0.4292466640472412
    },
    {
      "epoch": 0.000115411376953125,
      "model_forward_time": 0.11470723152160645,
      "step": 18909
    },
    {
      "epoch": 0.000115411376953125,
      "step": 18909,
      "training_step_time": 0.4909074306488037
    },
    {
      "epoch": 0.00011541748046875,
      "grad_norm": 0.13998039066791534,
      "learning_rate": 8.197724669868807e-05,
      "loss": 0.0528,
      "step": 18910
    },
    {
      "epoch": 0.00011541748046875,
      "model_forward_time": 0.11478352546691895,
      "step": 18910
    },
    {
      "epoch": 0.00011541748046875,
      "step": 18910,
      "training_step_time": 0.4167940616607666
    },
    {
      "epoch": 0.000115423583984375,
      "model_forward_time": 0.11460614204406738,
      "step": 18911
    },
    {
      "epoch": 0.000115423583984375,
      "step": 18911,
      "training_step_time": 0.3892991542816162
    },
    {
      "epoch": 0.0001154296875,
      "model_forward_time": 0.11412930488586426,
      "step": 18912
    },
    {
      "epoch": 0.0001154296875,
      "step": 18912,
      "training_step_time": 0.39739418029785156
    },
    {
      "epoch": 0.000115435791015625,
      "model_forward_time": 0.11587285995483398,
      "step": 18913
    },
    {
      "epoch": 0.000115435791015625,
      "step": 18913,
      "training_step_time": 0.49935412406921387
    },
    {
      "epoch": 0.00011544189453125,
      "model_forward_time": 0.11502814292907715,
      "step": 18914
    },
    {
      "epoch": 0.00011544189453125,
      "step": 18914,
      "training_step_time": 0.41103053092956543
    },
    {
      "epoch": 0.000115447998046875,
      "model_forward_time": 0.11500692367553711,
      "step": 18915
    },
    {
      "epoch": 0.000115447998046875,
      "step": 18915,
      "training_step_time": 0.48241686820983887
    },
    {
      "epoch": 0.0001154541015625,
      "model_forward_time": 0.11485004425048828,
      "step": 18916
    },
    {
      "epoch": 0.0001154541015625,
      "step": 18916,
      "training_step_time": 0.472409725189209
    },
    {
      "epoch": 0.000115460205078125,
      "model_forward_time": 0.11447954177856445,
      "step": 18917
    },
    {
      "epoch": 0.000115460205078125,
      "step": 18917,
      "training_step_time": 0.3996131420135498
    },
    {
      "epoch": 0.00011546630859375,
      "model_forward_time": 0.11664438247680664,
      "step": 18918
    },
    {
      "epoch": 0.00011546630859375,
      "step": 18918,
      "training_step_time": 0.3978903293609619
    },
    {
      "epoch": 0.000115472412109375,
      "model_forward_time": 0.11517047882080078,
      "step": 18919
    },
    {
      "epoch": 0.000115472412109375,
      "step": 18919,
      "training_step_time": 0.495650053024292
    },
    {
      "epoch": 0.000115478515625,
      "grad_norm": 0.16155503690242767,
      "learning_rate": 8.19560566583671e-05,
      "loss": 0.0603,
      "step": 18920
    },
    {
      "epoch": 0.000115478515625,
      "model_forward_time": 0.11456441879272461,
      "step": 18920
    },
    {
      "epoch": 0.000115478515625,
      "step": 18920,
      "training_step_time": 0.39338088035583496
    },
    {
      "epoch": 0.000115484619140625,
      "model_forward_time": 0.11467385292053223,
      "step": 18921
    },
    {
      "epoch": 0.000115484619140625,
      "step": 18921,
      "training_step_time": 0.3999009132385254
    },
    {
      "epoch": 0.00011549072265625,
      "model_forward_time": 0.11581087112426758,
      "step": 18922
    },
    {
      "epoch": 0.00011549072265625,
      "step": 18922,
      "training_step_time": 0.4256618022918701
    },
    {
      "epoch": 0.000115496826171875,
      "model_forward_time": 0.11599445343017578,
      "step": 18923
    },
    {
      "epoch": 0.000115496826171875,
      "step": 18923,
      "training_step_time": 0.48733973503112793
    },
    {
      "epoch": 0.0001155029296875,
      "model_forward_time": 0.11566972732543945,
      "step": 18924
    },
    {
      "epoch": 0.0001155029296875,
      "step": 18924,
      "training_step_time": 0.45589756965637207
    },
    {
      "epoch": 0.000115509033203125,
      "model_forward_time": 0.11507153511047363,
      "step": 18925
    },
    {
      "epoch": 0.000115509033203125,
      "step": 18925,
      "training_step_time": 0.5247542858123779
    },
    {
      "epoch": 0.00011551513671875,
      "model_forward_time": 0.11478400230407715,
      "step": 18926
    },
    {
      "epoch": 0.00011551513671875,
      "step": 18926,
      "training_step_time": 0.38583946228027344
    },
    {
      "epoch": 0.000115521240234375,
      "model_forward_time": 0.11458992958068848,
      "step": 18927
    },
    {
      "epoch": 0.000115521240234375,
      "step": 18927,
      "training_step_time": 0.38484835624694824
    },
    {
      "epoch": 0.00011552734375,
      "model_forward_time": 0.11457324028015137,
      "step": 18928
    },
    {
      "epoch": 0.00011552734375,
      "step": 18928,
      "training_step_time": 0.4359588623046875
    },
    {
      "epoch": 0.000115533447265625,
      "model_forward_time": 0.11450433731079102,
      "step": 18929
    },
    {
      "epoch": 0.000115533447265625,
      "step": 18929,
      "training_step_time": 0.5284593105316162
    },
    {
      "epoch": 0.00011553955078125,
      "grad_norm": 0.1309194415807724,
      "learning_rate": 8.193485691063985e-05,
      "loss": 0.0548,
      "step": 18930
    },
    {
      "epoch": 0.00011553955078125,
      "model_forward_time": 0.11509990692138672,
      "step": 18930
    },
    {
      "epoch": 0.00011553955078125,
      "step": 18930,
      "training_step_time": 0.40268850326538086
    },
    {
      "epoch": 0.000115545654296875,
      "model_forward_time": 0.11510705947875977,
      "step": 18931
    },
    {
      "epoch": 0.000115545654296875,
      "step": 18931,
      "training_step_time": 0.4310789108276367
    },
    {
      "epoch": 0.0001155517578125,
      "model_forward_time": 0.1152040958404541,
      "step": 18932
    },
    {
      "epoch": 0.0001155517578125,
      "step": 18932,
      "training_step_time": 0.38126611709594727
    },
    {
      "epoch": 0.000115557861328125,
      "model_forward_time": 0.11606884002685547,
      "step": 18933
    },
    {
      "epoch": 0.000115557861328125,
      "step": 18933,
      "training_step_time": 0.3894631862640381
    },
    {
      "epoch": 0.00011556396484375,
      "model_forward_time": 0.11464262008666992,
      "step": 18934
    },
    {
      "epoch": 0.00011556396484375,
      "step": 18934,
      "training_step_time": 0.38924336433410645
    },
    {
      "epoch": 0.000115570068359375,
      "model_forward_time": 0.11722469329833984,
      "step": 18935
    },
    {
      "epoch": 0.000115570068359375,
      "step": 18935,
      "training_step_time": 0.39322495460510254
    },
    {
      "epoch": 0.000115576171875,
      "model_forward_time": 0.11531186103820801,
      "step": 18936
    },
    {
      "epoch": 0.000115576171875,
      "step": 18936,
      "training_step_time": 0.37974071502685547
    },
    {
      "epoch": 0.000115582275390625,
      "model_forward_time": 0.11488056182861328,
      "step": 18937
    },
    {
      "epoch": 0.000115582275390625,
      "step": 18937,
      "training_step_time": 0.7899720668792725
    },
    {
      "epoch": 0.00011558837890625,
      "model_forward_time": 0.11502361297607422,
      "step": 18938
    },
    {
      "epoch": 0.00011558837890625,
      "step": 18938,
      "training_step_time": 0.45406532287597656
    },
    {
      "epoch": 0.000115594482421875,
      "model_forward_time": 0.1140141487121582,
      "step": 18939
    },
    {
      "epoch": 0.000115594482421875,
      "step": 18939,
      "training_step_time": 0.3802924156188965
    },
    {
      "epoch": 0.0001156005859375,
      "grad_norm": 0.16715241968631744,
      "learning_rate": 8.191364746194625e-05,
      "loss": 0.0563,
      "step": 18940
    },
    {
      "epoch": 0.0001156005859375,
      "model_forward_time": 0.11421012878417969,
      "step": 18940
    },
    {
      "epoch": 0.0001156005859375,
      "step": 18940,
      "training_step_time": 0.3959038257598877
    },
    {
      "epoch": 0.000115606689453125,
      "model_forward_time": 0.11406946182250977,
      "step": 18941
    },
    {
      "epoch": 0.000115606689453125,
      "step": 18941,
      "training_step_time": 0.40278172492980957
    },
    {
      "epoch": 0.00011561279296875,
      "model_forward_time": 0.11458158493041992,
      "step": 18942
    },
    {
      "epoch": 0.00011561279296875,
      "step": 18942,
      "training_step_time": 0.5092549324035645
    },
    {
      "epoch": 0.000115618896484375,
      "model_forward_time": 0.1154935359954834,
      "step": 18943
    },
    {
      "epoch": 0.000115618896484375,
      "step": 18943,
      "training_step_time": 0.4965839385986328
    },
    {
      "epoch": 0.000115625,
      "model_forward_time": 0.11477494239807129,
      "step": 18944
    },
    {
      "epoch": 0.000115625,
      "step": 18944,
      "training_step_time": 0.45168614387512207
    },
    {
      "epoch": 0.000115631103515625,
      "model_forward_time": 0.11537647247314453,
      "step": 18945
    },
    {
      "epoch": 0.000115631103515625,
      "step": 18945,
      "training_step_time": 0.38460469245910645
    },
    {
      "epoch": 0.00011563720703125,
      "model_forward_time": 0.11475825309753418,
      "step": 18946
    },
    {
      "epoch": 0.00011563720703125,
      "step": 18946,
      "training_step_time": 0.389925479888916
    },
    {
      "epoch": 0.000115643310546875,
      "model_forward_time": 0.11477112770080566,
      "step": 18947
    },
    {
      "epoch": 0.000115643310546875,
      "step": 18947,
      "training_step_time": 0.3828146457672119
    },
    {
      "epoch": 0.0001156494140625,
      "model_forward_time": 0.11513304710388184,
      "step": 18948
    },
    {
      "epoch": 0.0001156494140625,
      "step": 18948,
      "training_step_time": 0.39017176628112793
    },
    {
      "epoch": 0.000115655517578125,
      "model_forward_time": 0.11656928062438965,
      "step": 18949
    },
    {
      "epoch": 0.000115655517578125,
      "step": 18949,
      "training_step_time": 0.5863285064697266
    },
    {
      "epoch": 0.00011566162109375,
      "grad_norm": 0.14783252775669098,
      "learning_rate": 8.18924283187292e-05,
      "loss": 0.0523,
      "step": 18950
    },
    {
      "epoch": 0.00011566162109375,
      "model_forward_time": 0.11501646041870117,
      "step": 18950
    },
    {
      "epoch": 0.00011566162109375,
      "step": 18950,
      "training_step_time": 0.424760103225708
    },
    {
      "epoch": 0.000115667724609375,
      "model_forward_time": 0.11553335189819336,
      "step": 18951
    },
    {
      "epoch": 0.000115667724609375,
      "step": 18951,
      "training_step_time": 0.39382123947143555
    },
    {
      "epoch": 0.000115673828125,
      "model_forward_time": 0.11581230163574219,
      "step": 18952
    },
    {
      "epoch": 0.000115673828125,
      "step": 18952,
      "training_step_time": 0.4227597713470459
    },
    {
      "epoch": 0.000115679931640625,
      "model_forward_time": 0.11464595794677734,
      "step": 18953
    },
    {
      "epoch": 0.000115679931640625,
      "step": 18953,
      "training_step_time": 0.39714860916137695
    },
    {
      "epoch": 0.00011568603515625,
      "model_forward_time": 0.11500811576843262,
      "step": 18954
    },
    {
      "epoch": 0.00011568603515625,
      "step": 18954,
      "training_step_time": 0.39476633071899414
    },
    {
      "epoch": 0.000115692138671875,
      "model_forward_time": 0.11504912376403809,
      "step": 18955
    },
    {
      "epoch": 0.000115692138671875,
      "step": 18955,
      "training_step_time": 0.5229048728942871
    },
    {
      "epoch": 0.0001156982421875,
      "model_forward_time": 0.11492753028869629,
      "step": 18956
    },
    {
      "epoch": 0.0001156982421875,
      "step": 18956,
      "training_step_time": 0.48716306686401367
    },
    {
      "epoch": 0.000115704345703125,
      "model_forward_time": 0.1149907112121582,
      "step": 18957
    },
    {
      "epoch": 0.000115704345703125,
      "step": 18957,
      "training_step_time": 0.4017305374145508
    },
    {
      "epoch": 0.00011571044921875,
      "model_forward_time": 0.11518216133117676,
      "step": 18958
    },
    {
      "epoch": 0.00011571044921875,
      "step": 18958,
      "training_step_time": 0.4700140953063965
    },
    {
      "epoch": 0.000115716552734375,
      "model_forward_time": 0.11442708969116211,
      "step": 18959
    },
    {
      "epoch": 0.000115716552734375,
      "step": 18959,
      "training_step_time": 0.3880941867828369
    },
    {
      "epoch": 0.00011572265625,
      "grad_norm": 0.15808501839637756,
      "learning_rate": 8.18711994874345e-05,
      "loss": 0.0595,
      "step": 18960
    },
    {
      "epoch": 0.00011572265625,
      "model_forward_time": 0.1143045425415039,
      "step": 18960
    },
    {
      "epoch": 0.00011572265625,
      "step": 18960,
      "training_step_time": 0.3844268321990967
    },
    {
      "epoch": 0.000115728759765625,
      "model_forward_time": 0.11476778984069824,
      "step": 18961
    },
    {
      "epoch": 0.000115728759765625,
      "step": 18961,
      "training_step_time": 0.48408031463623047
    },
    {
      "epoch": 0.00011573486328125,
      "model_forward_time": 0.11472058296203613,
      "step": 18962
    },
    {
      "epoch": 0.00011573486328125,
      "step": 18962,
      "training_step_time": 0.39075326919555664
    },
    {
      "epoch": 0.000115740966796875,
      "model_forward_time": 0.11497879028320312,
      "step": 18963
    },
    {
      "epoch": 0.000115740966796875,
      "step": 18963,
      "training_step_time": 0.4205949306488037
    },
    {
      "epoch": 0.0001157470703125,
      "model_forward_time": 0.11453700065612793,
      "step": 18964
    },
    {
      "epoch": 0.0001157470703125,
      "step": 18964,
      "training_step_time": 0.36514902114868164
    },
    {
      "epoch": 0.000115753173828125,
      "model_forward_time": 0.11410903930664062,
      "step": 18965
    },
    {
      "epoch": 0.000115753173828125,
      "step": 18965,
      "training_step_time": 0.41950201988220215
    },
    {
      "epoch": 0.00011575927734375,
      "model_forward_time": 0.11479640007019043,
      "step": 18966
    },
    {
      "epoch": 0.00011575927734375,
      "step": 18966,
      "training_step_time": 0.38741207122802734
    },
    {
      "epoch": 0.000115765380859375,
      "model_forward_time": 0.11514973640441895,
      "step": 18967
    },
    {
      "epoch": 0.000115765380859375,
      "step": 18967,
      "training_step_time": 0.4897747039794922
    },
    {
      "epoch": 0.000115771484375,
      "model_forward_time": 0.11474370956420898,
      "step": 18968
    },
    {
      "epoch": 0.000115771484375,
      "step": 18968,
      "training_step_time": 0.38260912895202637
    },
    {
      "epoch": 0.000115777587890625,
      "model_forward_time": 0.1143331527709961,
      "step": 18969
    },
    {
      "epoch": 0.000115777587890625,
      "step": 18969,
      "training_step_time": 0.4272630214691162
    },
    {
      "epoch": 0.00011578369140625,
      "grad_norm": 0.14004135131835938,
      "learning_rate": 8.184996097451089e-05,
      "loss": 0.0514,
      "step": 18970
    },
    {
      "epoch": 0.00011578369140625,
      "model_forward_time": 0.1138460636138916,
      "step": 18970
    },
    {
      "epoch": 0.00011578369140625,
      "step": 18970,
      "training_step_time": 0.45513916015625
    },
    {
      "epoch": 0.000115789794921875,
      "model_forward_time": 0.11566638946533203,
      "step": 18971
    },
    {
      "epoch": 0.000115789794921875,
      "step": 18971,
      "training_step_time": 0.38750791549682617
    },
    {
      "epoch": 0.0001157958984375,
      "model_forward_time": 0.11567950248718262,
      "step": 18972
    },
    {
      "epoch": 0.0001157958984375,
      "step": 18972,
      "training_step_time": 0.3951900005340576
    },
    {
      "epoch": 0.000115802001953125,
      "model_forward_time": 0.11495065689086914,
      "step": 18973
    },
    {
      "epoch": 0.000115802001953125,
      "step": 18973,
      "training_step_time": 0.5676836967468262
    },
    {
      "epoch": 0.00011580810546875,
      "model_forward_time": 0.1172337532043457,
      "step": 18974
    },
    {
      "epoch": 0.00011580810546875,
      "step": 18974,
      "training_step_time": 0.38712382316589355
    },
    {
      "epoch": 0.000115814208984375,
      "model_forward_time": 0.11528539657592773,
      "step": 18975
    },
    {
      "epoch": 0.000115814208984375,
      "step": 18975,
      "training_step_time": 0.386613130569458
    },
    {
      "epoch": 0.0001158203125,
      "model_forward_time": 0.11564302444458008,
      "step": 18976
    },
    {
      "epoch": 0.0001158203125,
      "step": 18976,
      "training_step_time": 0.44637274742126465
    },
    {
      "epoch": 0.000115826416015625,
      "model_forward_time": 0.11457657814025879,
      "step": 18977
    },
    {
      "epoch": 0.000115826416015625,
      "step": 18977,
      "training_step_time": 0.4072103500366211
    },
    {
      "epoch": 0.00011583251953125,
      "model_forward_time": 0.11468291282653809,
      "step": 18978
    },
    {
      "epoch": 0.00011583251953125,
      "step": 18978,
      "training_step_time": 0.39609384536743164
    },
    {
      "epoch": 0.000115838623046875,
      "model_forward_time": 0.11572384834289551,
      "step": 18979
    },
    {
      "epoch": 0.000115838623046875,
      "step": 18979,
      "training_step_time": 0.5659935474395752
    },
    {
      "epoch": 0.0001158447265625,
      "grad_norm": 0.1757759302854538,
      "learning_rate": 8.182871278641009e-05,
      "loss": 0.0535,
      "step": 18980
    },
    {
      "epoch": 0.0001158447265625,
      "model_forward_time": 0.11446809768676758,
      "step": 18980
    },
    {
      "epoch": 0.0001158447265625,
      "step": 18980,
      "training_step_time": 0.4212930202484131
    },
    {
      "epoch": 0.000115850830078125,
      "model_forward_time": 0.11502218246459961,
      "step": 18981
    },
    {
      "epoch": 0.000115850830078125,
      "step": 18981,
      "training_step_time": 0.38027524948120117
    },
    {
      "epoch": 0.00011585693359375,
      "model_forward_time": 0.11479830741882324,
      "step": 18982
    },
    {
      "epoch": 0.00011585693359375,
      "step": 18982,
      "training_step_time": 0.3878040313720703
    },
    {
      "epoch": 0.000115863037109375,
      "model_forward_time": 0.11497282981872559,
      "step": 18983
    },
    {
      "epoch": 0.000115863037109375,
      "step": 18983,
      "training_step_time": 0.4690399169921875
    },
    {
      "epoch": 0.000115869140625,
      "model_forward_time": 0.11507439613342285,
      "step": 18984
    },
    {
      "epoch": 0.000115869140625,
      "step": 18984,
      "training_step_time": 0.4001739025115967
    },
    {
      "epoch": 0.000115875244140625,
      "model_forward_time": 0.11413359642028809,
      "step": 18985
    },
    {
      "epoch": 0.000115875244140625,
      "step": 18985,
      "training_step_time": 0.5459229946136475
    },
    {
      "epoch": 0.00011588134765625,
      "model_forward_time": 0.11498808860778809,
      "step": 18986
    },
    {
      "epoch": 0.00011588134765625,
      "step": 18986,
      "training_step_time": 0.4165627956390381
    },
    {
      "epoch": 0.000115887451171875,
      "model_forward_time": 0.1156158447265625,
      "step": 18987
    },
    {
      "epoch": 0.000115887451171875,
      "step": 18987,
      "training_step_time": 0.49213337898254395
    },
    {
      "epoch": 0.0001158935546875,
      "model_forward_time": 0.11400246620178223,
      "step": 18988
    },
    {
      "epoch": 0.0001158935546875,
      "step": 18988,
      "training_step_time": 0.3932044506072998
    },
    {
      "epoch": 0.000115899658203125,
      "model_forward_time": 0.11452007293701172,
      "step": 18989
    },
    {
      "epoch": 0.000115899658203125,
      "step": 18989,
      "training_step_time": 0.4210050106048584
    },
    {
      "epoch": 0.00011590576171875,
      "grad_norm": 0.12101232260465622,
      "learning_rate": 8.180745492958674e-05,
      "loss": 0.0482,
      "step": 18990
    },
    {
      "epoch": 0.00011590576171875,
      "model_forward_time": 0.1152799129486084,
      "step": 18990
    },
    {
      "epoch": 0.00011590576171875,
      "step": 18990,
      "training_step_time": 0.4012911319732666
    },
    {
      "epoch": 0.000115911865234375,
      "model_forward_time": 0.11430644989013672,
      "step": 18991
    },
    {
      "epoch": 0.000115911865234375,
      "step": 18991,
      "training_step_time": 0.5554163455963135
    },
    {
      "epoch": 0.00011591796875,
      "model_forward_time": 0.11458539962768555,
      "step": 18992
    },
    {
      "epoch": 0.00011591796875,
      "step": 18992,
      "training_step_time": 0.360703706741333
    },
    {
      "epoch": 0.000115924072265625,
      "model_forward_time": 0.1147317886352539,
      "step": 18993
    },
    {
      "epoch": 0.000115924072265625,
      "step": 18993,
      "training_step_time": 0.4913806915283203
    },
    {
      "epoch": 0.00011593017578125,
      "model_forward_time": 0.11444091796875,
      "step": 18994
    },
    {
      "epoch": 0.00011593017578125,
      "step": 18994,
      "training_step_time": 0.4813215732574463
    },
    {
      "epoch": 0.000115936279296875,
      "model_forward_time": 0.1143791675567627,
      "step": 18995
    },
    {
      "epoch": 0.000115936279296875,
      "step": 18995,
      "training_step_time": 0.3916032314300537
    },
    {
      "epoch": 0.0001159423828125,
      "model_forward_time": 0.11467266082763672,
      "step": 18996
    },
    {
      "epoch": 0.0001159423828125,
      "step": 18996,
      "training_step_time": 0.3896613121032715
    },
    {
      "epoch": 0.000115948486328125,
      "model_forward_time": 0.1142430305480957,
      "step": 18997
    },
    {
      "epoch": 0.000115948486328125,
      "step": 18997,
      "training_step_time": 0.41997337341308594
    },
    {
      "epoch": 0.00011595458984375,
      "model_forward_time": 0.11496949195861816,
      "step": 18998
    },
    {
      "epoch": 0.00011595458984375,
      "step": 18998,
      "training_step_time": 0.44136905670166016
    },
    {
      "epoch": 0.000115960693359375,
      "model_forward_time": 0.11436939239501953,
      "step": 18999
    },
    {
      "epoch": 0.000115960693359375,
      "step": 18999,
      "training_step_time": 0.4582672119140625
    },
    {
      "epoch": 0.000115966796875,
      "grad_norm": 0.14201945066452026,
      "learning_rate": 8.178618741049842e-05,
      "loss": 0.0492,
      "step": 19000
    },
    {
      "epoch": 0.000115966796875,
      "model_forward_time": 0.11339855194091797,
      "step": 19000
    },
    {
      "epoch": 0.000115966796875,
      "step": 19000,
      "training_step_time": 0.3545675277709961
    },
    {
      "epoch": 0.000115972900390625,
      "model_forward_time": 0.11267709732055664,
      "step": 19001
    },
    {
      "epoch": 0.000115972900390625,
      "step": 19001,
      "training_step_time": 0.406466007232666
    },
    {
      "epoch": 0.00011597900390625,
      "model_forward_time": 0.11273813247680664,
      "step": 19002
    },
    {
      "epoch": 0.00011597900390625,
      "step": 19002,
      "training_step_time": 0.46189212799072266
    },
    {
      "epoch": 0.000115985107421875,
      "model_forward_time": 0.1138753890991211,
      "step": 19003
    },
    {
      "epoch": 0.000115985107421875,
      "step": 19003,
      "training_step_time": 0.4041562080383301
    },
    {
      "epoch": 0.0001159912109375,
      "model_forward_time": 0.11396527290344238,
      "step": 19004
    },
    {
      "epoch": 0.0001159912109375,
      "step": 19004,
      "training_step_time": 0.39370274543762207
    },
    {
      "epoch": 0.000115997314453125,
      "model_forward_time": 0.11429882049560547,
      "step": 19005
    },
    {
      "epoch": 0.000115997314453125,
      "step": 19005,
      "training_step_time": 0.3964707851409912
    },
    {
      "epoch": 0.00011600341796875,
      "model_forward_time": 0.11438155174255371,
      "step": 19006
    },
    {
      "epoch": 0.00011600341796875,
      "step": 19006,
      "training_step_time": 0.3897707462310791
    },
    {
      "epoch": 0.000116009521484375,
      "model_forward_time": 0.11435151100158691,
      "step": 19007
    },
    {
      "epoch": 0.000116009521484375,
      "step": 19007,
      "training_step_time": 0.3840525150299072
    },
    {
      "epoch": 0.000116015625,
      "model_forward_time": 0.11482644081115723,
      "step": 19008
    },
    {
      "epoch": 0.000116015625,
      "step": 19008,
      "training_step_time": 0.3876988887786865
    },
    {
      "epoch": 0.000116021728515625,
      "model_forward_time": 0.11446094512939453,
      "step": 19009
    },
    {
      "epoch": 0.000116021728515625,
      "step": 19009,
      "training_step_time": 0.3881800174713135
    },
    {
      "epoch": 0.00011602783203125,
      "grad_norm": 0.1792784035205841,
      "learning_rate": 8.17649102356056e-05,
      "loss": 0.0519,
      "step": 19010
    },
    {
      "epoch": 0.00011602783203125,
      "model_forward_time": 0.1146540641784668,
      "step": 19010
    },
    {
      "epoch": 0.00011602783203125,
      "step": 19010,
      "training_step_time": 0.46320104598999023
    },
    {
      "epoch": 0.000116033935546875,
      "model_forward_time": 0.11460661888122559,
      "step": 19011
    },
    {
      "epoch": 0.000116033935546875,
      "step": 19011,
      "training_step_time": 0.40714263916015625
    },
    {
      "epoch": 0.0001160400390625,
      "model_forward_time": 0.11606907844543457,
      "step": 19012
    },
    {
      "epoch": 0.0001160400390625,
      "step": 19012,
      "training_step_time": 0.49875521659851074
    },
    {
      "epoch": 0.000116046142578125,
      "model_forward_time": 0.11462974548339844,
      "step": 19013
    },
    {
      "epoch": 0.000116046142578125,
      "step": 19013,
      "training_step_time": 0.40181851387023926
    },
    {
      "epoch": 0.00011605224609375,
      "model_forward_time": 0.11474418640136719,
      "step": 19014
    },
    {
      "epoch": 0.00011605224609375,
      "step": 19014,
      "training_step_time": 0.4046132564544678
    },
    {
      "epoch": 0.000116058349609375,
      "model_forward_time": 0.1146395206451416,
      "step": 19015
    },
    {
      "epoch": 0.000116058349609375,
      "step": 19015,
      "training_step_time": 0.44023942947387695
    },
    {
      "epoch": 0.000116064453125,
      "model_forward_time": 0.11647295951843262,
      "step": 19016
    },
    {
      "epoch": 0.000116064453125,
      "step": 19016,
      "training_step_time": 0.4047887325286865
    },
    {
      "epoch": 0.000116070556640625,
      "model_forward_time": 0.11493659019470215,
      "step": 19017
    },
    {
      "epoch": 0.000116070556640625,
      "step": 19017,
      "training_step_time": 0.48569250106811523
    },
    {
      "epoch": 0.00011607666015625,
      "model_forward_time": 0.1145162582397461,
      "step": 19018
    },
    {
      "epoch": 0.00011607666015625,
      "step": 19018,
      "training_step_time": 0.4391772747039795
    },
    {
      "epoch": 0.000116082763671875,
      "model_forward_time": 0.11524724960327148,
      "step": 19019
    },
    {
      "epoch": 0.000116082763671875,
      "step": 19019,
      "training_step_time": 0.40122532844543457
    },
    {
      "epoch": 0.0001160888671875,
      "grad_norm": 0.1547696888446808,
      "learning_rate": 8.174362341137177e-05,
      "loss": 0.0551,
      "step": 19020
    },
    {
      "epoch": 0.0001160888671875,
      "model_forward_time": 0.11509919166564941,
      "step": 19020
    },
    {
      "epoch": 0.0001160888671875,
      "step": 19020,
      "training_step_time": 0.3909618854522705
    },
    {
      "epoch": 0.000116094970703125,
      "model_forward_time": 0.11582565307617188,
      "step": 19021
    },
    {
      "epoch": 0.000116094970703125,
      "step": 19021,
      "training_step_time": 0.39069581031799316
    },
    {
      "epoch": 0.00011610107421875,
      "model_forward_time": 0.11491870880126953,
      "step": 19022
    },
    {
      "epoch": 0.00011610107421875,
      "step": 19022,
      "training_step_time": 0.4056868553161621
    },
    {
      "epoch": 0.000116107177734375,
      "model_forward_time": 0.11467504501342773,
      "step": 19023
    },
    {
      "epoch": 0.000116107177734375,
      "step": 19023,
      "training_step_time": 0.39693498611450195
    },
    {
      "epoch": 0.00011611328125,
      "model_forward_time": 0.11572980880737305,
      "step": 19024
    },
    {
      "epoch": 0.00011611328125,
      "step": 19024,
      "training_step_time": 0.40072011947631836
    },
    {
      "epoch": 0.000116119384765625,
      "model_forward_time": 0.11536407470703125,
      "step": 19025
    },
    {
      "epoch": 0.000116119384765625,
      "step": 19025,
      "training_step_time": 0.40340590476989746
    },
    {
      "epoch": 0.00011612548828125,
      "model_forward_time": 0.11542344093322754,
      "step": 19026
    },
    {
      "epoch": 0.00011612548828125,
      "step": 19026,
      "training_step_time": 0.5116631984710693
    },
    {
      "epoch": 0.000116131591796875,
      "model_forward_time": 0.11458039283752441,
      "step": 19027
    },
    {
      "epoch": 0.000116131591796875,
      "step": 19027,
      "training_step_time": 0.4080641269683838
    },
    {
      "epoch": 0.0001161376953125,
      "model_forward_time": 0.11504817008972168,
      "step": 19028
    },
    {
      "epoch": 0.0001161376953125,
      "step": 19028,
      "training_step_time": 0.4140932559967041
    },
    {
      "epoch": 0.000116143798828125,
      "model_forward_time": 0.11490154266357422,
      "step": 19029
    },
    {
      "epoch": 0.000116143798828125,
      "step": 19029,
      "training_step_time": 0.3988935947418213
    },
    {
      "epoch": 0.00011614990234375,
      "grad_norm": 0.1293909251689911,
      "learning_rate": 8.172232694426329e-05,
      "loss": 0.0567,
      "step": 19030
    },
    {
      "epoch": 0.00011614990234375,
      "model_forward_time": 0.11481499671936035,
      "step": 19030
    },
    {
      "epoch": 0.00011614990234375,
      "step": 19030,
      "training_step_time": 0.4290955066680908
    },
    {
      "epoch": 0.000116156005859375,
      "model_forward_time": 0.11439347267150879,
      "step": 19031
    },
    {
      "epoch": 0.000116156005859375,
      "step": 19031,
      "training_step_time": 0.5226762294769287
    },
    {
      "epoch": 0.000116162109375,
      "model_forward_time": 0.11438918113708496,
      "step": 19032
    },
    {
      "epoch": 0.000116162109375,
      "step": 19032,
      "training_step_time": 0.4442331790924072
    },
    {
      "epoch": 0.000116168212890625,
      "model_forward_time": 0.11496853828430176,
      "step": 19033
    },
    {
      "epoch": 0.000116168212890625,
      "step": 19033,
      "training_step_time": 0.3926970958709717
    },
    {
      "epoch": 0.00011617431640625,
      "model_forward_time": 0.11466026306152344,
      "step": 19034
    },
    {
      "epoch": 0.00011617431640625,
      "step": 19034,
      "training_step_time": 0.3909893035888672
    },
    {
      "epoch": 0.000116180419921875,
      "model_forward_time": 0.11507511138916016,
      "step": 19035
    },
    {
      "epoch": 0.000116180419921875,
      "step": 19035,
      "training_step_time": 0.3790004253387451
    },
    {
      "epoch": 0.0001161865234375,
      "model_forward_time": 0.1149744987487793,
      "step": 19036
    },
    {
      "epoch": 0.0001161865234375,
      "step": 19036,
      "training_step_time": 0.38918590545654297
    },
    {
      "epoch": 0.000116192626953125,
      "model_forward_time": 0.11512565612792969,
      "step": 19037
    },
    {
      "epoch": 0.000116192626953125,
      "step": 19037,
      "training_step_time": 0.40723586082458496
    },
    {
      "epoch": 0.00011619873046875,
      "model_forward_time": 0.11503124237060547,
      "step": 19038
    },
    {
      "epoch": 0.00011619873046875,
      "step": 19038,
      "training_step_time": 0.3959496021270752
    },
    {
      "epoch": 0.000116204833984375,
      "model_forward_time": 0.11528944969177246,
      "step": 19039
    },
    {
      "epoch": 0.000116204833984375,
      "step": 19039,
      "training_step_time": 0.39418983459472656
    },
    {
      "epoch": 0.0001162109375,
      "grad_norm": 0.1373336911201477,
      "learning_rate": 8.170102084074946e-05,
      "loss": 0.0532,
      "step": 19040
    },
    {
      "epoch": 0.0001162109375,
      "model_forward_time": 0.11546897888183594,
      "step": 19040
    },
    {
      "epoch": 0.0001162109375,
      "step": 19040,
      "training_step_time": 0.498180627822876
    },
    {
      "epoch": 0.000116217041015625,
      "model_forward_time": 0.11484050750732422,
      "step": 19041
    },
    {
      "epoch": 0.000116217041015625,
      "step": 19041,
      "training_step_time": 0.48224568367004395
    },
    {
      "epoch": 0.00011622314453125,
      "model_forward_time": 0.1159210205078125,
      "step": 19042
    },
    {
      "epoch": 0.00011622314453125,
      "step": 19042,
      "training_step_time": 0.38524436950683594
    },
    {
      "epoch": 0.000116229248046875,
      "model_forward_time": 0.11515522003173828,
      "step": 19043
    },
    {
      "epoch": 0.000116229248046875,
      "step": 19043,
      "training_step_time": 0.39327478408813477
    },
    {
      "epoch": 0.0001162353515625,
      "model_forward_time": 0.11575698852539062,
      "step": 19044
    },
    {
      "epoch": 0.0001162353515625,
      "step": 19044,
      "training_step_time": 0.4165232181549072
    },
    {
      "epoch": 0.000116241455078125,
      "model_forward_time": 0.11512374877929688,
      "step": 19045
    },
    {
      "epoch": 0.000116241455078125,
      "step": 19045,
      "training_step_time": 0.47080087661743164
    },
    {
      "epoch": 0.00011624755859375,
      "model_forward_time": 0.11546111106872559,
      "step": 19046
    },
    {
      "epoch": 0.00011624755859375,
      "step": 19046,
      "training_step_time": 0.4528670310974121
    },
    {
      "epoch": 0.000116253662109375,
      "model_forward_time": 0.1155233383178711,
      "step": 19047
    },
    {
      "epoch": 0.000116253662109375,
      "step": 19047,
      "training_step_time": 0.4076685905456543
    },
    {
      "epoch": 0.000116259765625,
      "model_forward_time": 0.11462616920471191,
      "step": 19048
    },
    {
      "epoch": 0.000116259765625,
      "step": 19048,
      "training_step_time": 0.38309645652770996
    },
    {
      "epoch": 0.000116265869140625,
      "model_forward_time": 0.11568164825439453,
      "step": 19049
    },
    {
      "epoch": 0.000116265869140625,
      "step": 19049,
      "training_step_time": 0.3868236541748047
    },
    {
      "epoch": 0.00011627197265625,
      "grad_norm": 0.12045358866453171,
      "learning_rate": 8.167970510730253e-05,
      "loss": 0.0489,
      "step": 19050
    },
    {
      "epoch": 0.00011627197265625,
      "model_forward_time": 0.1147620677947998,
      "step": 19050
    },
    {
      "epoch": 0.00011627197265625,
      "step": 19050,
      "training_step_time": 0.392772912979126
    },
    {
      "epoch": 0.000116278076171875,
      "model_forward_time": 0.11594939231872559,
      "step": 19051
    },
    {
      "epoch": 0.000116278076171875,
      "step": 19051,
      "training_step_time": 0.4095335006713867
    },
    {
      "epoch": 0.0001162841796875,
      "model_forward_time": 0.11472845077514648,
      "step": 19052
    },
    {
      "epoch": 0.0001162841796875,
      "step": 19052,
      "training_step_time": 0.3867363929748535
    },
    {
      "epoch": 0.000116290283203125,
      "model_forward_time": 0.11516070365905762,
      "step": 19053
    },
    {
      "epoch": 0.000116290283203125,
      "step": 19053,
      "training_step_time": 0.4026618003845215
    },
    {
      "epoch": 0.00011629638671875,
      "model_forward_time": 0.11503171920776367,
      "step": 19054
    },
    {
      "epoch": 0.00011629638671875,
      "step": 19054,
      "training_step_time": 0.3672327995300293
    },
    {
      "epoch": 0.000116302490234375,
      "model_forward_time": 0.11527729034423828,
      "step": 19055
    },
    {
      "epoch": 0.000116302490234375,
      "step": 19055,
      "training_step_time": 0.41898441314697266
    },
    {
      "epoch": 0.00011630859375,
      "model_forward_time": 0.11501622200012207,
      "step": 19056
    },
    {
      "epoch": 0.00011630859375,
      "step": 19056,
      "training_step_time": 0.39115405082702637
    },
    {
      "epoch": 0.000116314697265625,
      "model_forward_time": 0.11528730392456055,
      "step": 19057
    },
    {
      "epoch": 0.000116314697265625,
      "step": 19057,
      "training_step_time": 0.39377832412719727
    },
    {
      "epoch": 0.00011632080078125,
      "model_forward_time": 0.11528730392456055,
      "step": 19058
    },
    {
      "epoch": 0.00011632080078125,
      "step": 19058,
      "training_step_time": 0.39620113372802734
    },
    {
      "epoch": 0.000116326904296875,
      "model_forward_time": 0.11518645286560059,
      "step": 19059
    },
    {
      "epoch": 0.000116326904296875,
      "step": 19059,
      "training_step_time": 0.4915034770965576
    },
    {
      "epoch": 0.0001163330078125,
      "grad_norm": 0.11515098065137863,
      "learning_rate": 8.165837975039763e-05,
      "loss": 0.0494,
      "step": 19060
    },
    {
      "epoch": 0.0001163330078125,
      "model_forward_time": 0.11513495445251465,
      "step": 19060
    },
    {
      "epoch": 0.0001163330078125,
      "step": 19060,
      "training_step_time": 0.4938685894012451
    },
    {
      "epoch": 0.000116339111328125,
      "model_forward_time": 0.11506366729736328,
      "step": 19061
    },
    {
      "epoch": 0.000116339111328125,
      "step": 19061,
      "training_step_time": 0.4364926815032959
    },
    {
      "epoch": 0.00011634521484375,
      "model_forward_time": 0.11513090133666992,
      "step": 19062
    },
    {
      "epoch": 0.00011634521484375,
      "step": 19062,
      "training_step_time": 0.3991367816925049
    },
    {
      "epoch": 0.000116351318359375,
      "model_forward_time": 0.11466717720031738,
      "step": 19063
    },
    {
      "epoch": 0.000116351318359375,
      "step": 19063,
      "training_step_time": 0.38596105575561523
    },
    {
      "epoch": 0.000116357421875,
      "model_forward_time": 0.1147012710571289,
      "step": 19064
    },
    {
      "epoch": 0.000116357421875,
      "step": 19064,
      "training_step_time": 0.3834095001220703
    },
    {
      "epoch": 0.000116363525390625,
      "model_forward_time": 0.11533832550048828,
      "step": 19065
    },
    {
      "epoch": 0.000116363525390625,
      "step": 19065,
      "training_step_time": 0.396512508392334
    },
    {
      "epoch": 0.00011636962890625,
      "model_forward_time": 0.11504960060119629,
      "step": 19066
    },
    {
      "epoch": 0.00011636962890625,
      "step": 19066,
      "training_step_time": 0.38889098167419434
    },
    {
      "epoch": 0.000116375732421875,
      "model_forward_time": 0.11575818061828613,
      "step": 19067
    },
    {
      "epoch": 0.000116375732421875,
      "step": 19067,
      "training_step_time": 0.3863494396209717
    },
    {
      "epoch": 0.0001163818359375,
      "model_forward_time": 0.11503839492797852,
      "step": 19068
    },
    {
      "epoch": 0.0001163818359375,
      "step": 19068,
      "training_step_time": 0.3986189365386963
    },
    {
      "epoch": 0.000116387939453125,
      "model_forward_time": 0.11610245704650879,
      "step": 19069
    },
    {
      "epoch": 0.000116387939453125,
      "step": 19069,
      "training_step_time": 0.43070530891418457
    },
    {
      "epoch": 0.00011639404296875,
      "grad_norm": 0.14411099255084991,
      "learning_rate": 8.163704477651287e-05,
      "loss": 0.053,
      "step": 19070
    },
    {
      "epoch": 0.00011639404296875,
      "model_forward_time": 0.11506795883178711,
      "step": 19070
    },
    {
      "epoch": 0.00011639404296875,
      "step": 19070,
      "training_step_time": 0.4753153324127197
    },
    {
      "epoch": 0.000116400146484375,
      "model_forward_time": 0.11509513854980469,
      "step": 19071
    },
    {
      "epoch": 0.000116400146484375,
      "step": 19071,
      "training_step_time": 0.3798685073852539
    },
    {
      "epoch": 0.00011640625,
      "model_forward_time": 0.11538267135620117,
      "step": 19072
    },
    {
      "epoch": 0.00011640625,
      "step": 19072,
      "training_step_time": 0.3819105625152588
    },
    {
      "epoch": 0.000116412353515625,
      "model_forward_time": 0.11514043807983398,
      "step": 19073
    },
    {
      "epoch": 0.000116412353515625,
      "step": 19073,
      "training_step_time": 0.38858914375305176
    },
    {
      "epoch": 0.00011641845703125,
      "model_forward_time": 0.1151125431060791,
      "step": 19074
    },
    {
      "epoch": 0.00011641845703125,
      "step": 19074,
      "training_step_time": 0.44939351081848145
    },
    {
      "epoch": 0.000116424560546875,
      "model_forward_time": 0.11532068252563477,
      "step": 19075
    },
    {
      "epoch": 0.000116424560546875,
      "step": 19075,
      "training_step_time": 0.44377779960632324
    },
    {
      "epoch": 0.0001164306640625,
      "model_forward_time": 0.11615276336669922,
      "step": 19076
    },
    {
      "epoch": 0.0001164306640625,
      "step": 19076,
      "training_step_time": 0.40656375885009766
    },
    {
      "epoch": 0.000116436767578125,
      "model_forward_time": 0.11508488655090332,
      "step": 19077
    },
    {
      "epoch": 0.000116436767578125,
      "step": 19077,
      "training_step_time": 0.39254117012023926
    },
    {
      "epoch": 0.00011644287109375,
      "model_forward_time": 0.11454296112060547,
      "step": 19078
    },
    {
      "epoch": 0.00011644287109375,
      "step": 19078,
      "training_step_time": 0.4017448425292969
    },
    {
      "epoch": 0.000116448974609375,
      "model_forward_time": 0.11502695083618164,
      "step": 19079
    },
    {
      "epoch": 0.000116448974609375,
      "step": 19079,
      "training_step_time": 0.3983142375946045
    },
    {
      "epoch": 0.000116455078125,
      "grad_norm": 0.22913874685764313,
      "learning_rate": 8.161570019212921e-05,
      "loss": 0.0499,
      "step": 19080
    },
    {
      "epoch": 0.000116455078125,
      "model_forward_time": 0.11417555809020996,
      "step": 19080
    },
    {
      "epoch": 0.000116455078125,
      "step": 19080,
      "training_step_time": 0.3939483165740967
    },
    {
      "epoch": 0.000116461181640625,
      "model_forward_time": 0.11514139175415039,
      "step": 19081
    },
    {
      "epoch": 0.000116461181640625,
      "step": 19081,
      "training_step_time": 0.39487743377685547
    },
    {
      "epoch": 0.00011646728515625,
      "model_forward_time": 0.11490845680236816,
      "step": 19082
    },
    {
      "epoch": 0.00011646728515625,
      "step": 19082,
      "training_step_time": 0.39502906799316406
    },
    {
      "epoch": 0.000116473388671875,
      "model_forward_time": 0.11544609069824219,
      "step": 19083
    },
    {
      "epoch": 0.000116473388671875,
      "step": 19083,
      "training_step_time": 0.3846290111541748
    },
    {
      "epoch": 0.0001164794921875,
      "model_forward_time": 0.11544919013977051,
      "step": 19084
    },
    {
      "epoch": 0.0001164794921875,
      "step": 19084,
      "training_step_time": 0.4792747497558594
    },
    {
      "epoch": 0.000116485595703125,
      "model_forward_time": 0.11522173881530762,
      "step": 19085
    },
    {
      "epoch": 0.000116485595703125,
      "step": 19085,
      "training_step_time": 0.39112281799316406
    },
    {
      "epoch": 0.00011649169921875,
      "model_forward_time": 0.11509108543395996,
      "step": 19086
    },
    {
      "epoch": 0.00011649169921875,
      "step": 19086,
      "training_step_time": 0.41751551628112793
    },
    {
      "epoch": 0.000116497802734375,
      "model_forward_time": 0.11505627632141113,
      "step": 19087
    },
    {
      "epoch": 0.000116497802734375,
      "step": 19087,
      "training_step_time": 0.42020297050476074
    },
    {
      "epoch": 0.00011650390625,
      "model_forward_time": 0.11527276039123535,
      "step": 19088
    },
    {
      "epoch": 0.00011650390625,
      "step": 19088,
      "training_step_time": 0.38987231254577637
    },
    {
      "epoch": 0.000116510009765625,
      "model_forward_time": 0.11547017097473145,
      "step": 19089
    },
    {
      "epoch": 0.000116510009765625,
      "step": 19089,
      "training_step_time": 0.49573612213134766
    },
    {
      "epoch": 0.00011651611328125,
      "grad_norm": 0.2152308076620102,
      "learning_rate": 8.159434600373061e-05,
      "loss": 0.0513,
      "step": 19090
    },
    {
      "epoch": 0.00011651611328125,
      "model_forward_time": 0.11466288566589355,
      "step": 19090
    },
    {
      "epoch": 0.00011651611328125,
      "step": 19090,
      "training_step_time": 0.39510083198547363
    },
    {
      "epoch": 0.000116522216796875,
      "model_forward_time": 0.11473274230957031,
      "step": 19091
    },
    {
      "epoch": 0.000116522216796875,
      "step": 19091,
      "training_step_time": 0.5149843692779541
    },
    {
      "epoch": 0.0001165283203125,
      "model_forward_time": 0.11532902717590332,
      "step": 19092
    },
    {
      "epoch": 0.0001165283203125,
      "step": 19092,
      "training_step_time": 0.39532470703125
    },
    {
      "epoch": 0.000116534423828125,
      "model_forward_time": 0.11460065841674805,
      "step": 19093
    },
    {
      "epoch": 0.000116534423828125,
      "step": 19093,
      "training_step_time": 0.4357001781463623
    },
    {
      "epoch": 0.00011654052734375,
      "model_forward_time": 0.11525201797485352,
      "step": 19094
    },
    {
      "epoch": 0.00011654052734375,
      "step": 19094,
      "training_step_time": 0.39401721954345703
    },
    {
      "epoch": 0.000116546630859375,
      "model_forward_time": 0.11507129669189453,
      "step": 19095
    },
    {
      "epoch": 0.000116546630859375,
      "step": 19095,
      "training_step_time": 0.38726186752319336
    },
    {
      "epoch": 0.000116552734375,
      "model_forward_time": 0.11488628387451172,
      "step": 19096
    },
    {
      "epoch": 0.000116552734375,
      "step": 19096,
      "training_step_time": 0.3860466480255127
    },
    {
      "epoch": 0.000116558837890625,
      "model_forward_time": 0.1150662899017334,
      "step": 19097
    },
    {
      "epoch": 0.000116558837890625,
      "step": 19097,
      "training_step_time": 0.39739561080932617
    },
    {
      "epoch": 0.00011656494140625,
      "model_forward_time": 0.11436986923217773,
      "step": 19098
    },
    {
      "epoch": 0.00011656494140625,
      "step": 19098,
      "training_step_time": 0.3643507957458496
    },
    {
      "epoch": 0.000116571044921875,
      "model_forward_time": 0.11564803123474121,
      "step": 19099
    },
    {
      "epoch": 0.000116571044921875,
      "step": 19099,
      "training_step_time": 0.6142852306365967
    },
    {
      "epoch": 0.0001165771484375,
      "grad_norm": 0.14570537209510803,
      "learning_rate": 8.157298221780389e-05,
      "loss": 0.0538,
      "step": 19100
    },
    {
      "epoch": 0.0001165771484375,
      "model_forward_time": 0.11579537391662598,
      "step": 19100
    },
    {
      "epoch": 0.0001165771484375,
      "step": 19100,
      "training_step_time": 0.41866183280944824
    },
    {
      "epoch": 0.000116583251953125,
      "model_forward_time": 0.1145472526550293,
      "step": 19101
    },
    {
      "epoch": 0.000116583251953125,
      "step": 19101,
      "training_step_time": 0.4124767780303955
    },
    {
      "epoch": 0.00011658935546875,
      "model_forward_time": 0.11440563201904297,
      "step": 19102
    },
    {
      "epoch": 0.00011658935546875,
      "step": 19102,
      "training_step_time": 0.38690662384033203
    },
    {
      "epoch": 0.000116595458984375,
      "model_forward_time": 0.11451148986816406,
      "step": 19103
    },
    {
      "epoch": 0.000116595458984375,
      "step": 19103,
      "training_step_time": 0.41151928901672363
    },
    {
      "epoch": 0.0001166015625,
      "model_forward_time": 0.11475181579589844,
      "step": 19104
    },
    {
      "epoch": 0.0001166015625,
      "step": 19104,
      "training_step_time": 0.4288609027862549
    },
    {
      "epoch": 0.000116607666015625,
      "model_forward_time": 0.11473941802978516,
      "step": 19105
    },
    {
      "epoch": 0.000116607666015625,
      "step": 19105,
      "training_step_time": 0.48511219024658203
    },
    {
      "epoch": 0.00011661376953125,
      "model_forward_time": 0.11470580101013184,
      "step": 19106
    },
    {
      "epoch": 0.00011661376953125,
      "step": 19106,
      "training_step_time": 0.3830678462982178
    },
    {
      "epoch": 0.000116619873046875,
      "model_forward_time": 0.1147768497467041,
      "step": 19107
    },
    {
      "epoch": 0.000116619873046875,
      "step": 19107,
      "training_step_time": 0.3877723217010498
    },
    {
      "epoch": 0.0001166259765625,
      "model_forward_time": 0.11536788940429688,
      "step": 19108
    },
    {
      "epoch": 0.0001166259765625,
      "step": 19108,
      "training_step_time": 0.38469457626342773
    },
    {
      "epoch": 0.000116632080078125,
      "model_forward_time": 0.1151893138885498,
      "step": 19109
    },
    {
      "epoch": 0.000116632080078125,
      "step": 19109,
      "training_step_time": 0.3882930278778076
    },
    {
      "epoch": 0.00011663818359375,
      "grad_norm": 0.11041810363531113,
      "learning_rate": 8.155160884083881e-05,
      "loss": 0.0503,
      "step": 19110
    },
    {
      "epoch": 0.00011663818359375,
      "model_forward_time": 0.1152794361114502,
      "step": 19110
    },
    {
      "epoch": 0.00011663818359375,
      "step": 19110,
      "training_step_time": 0.3955955505371094
    },
    {
      "epoch": 0.000116644287109375,
      "model_forward_time": 0.11533117294311523,
      "step": 19111
    },
    {
      "epoch": 0.000116644287109375,
      "step": 19111,
      "training_step_time": 0.7863128185272217
    },
    {
      "epoch": 0.000116650390625,
      "model_forward_time": 0.11498332023620605,
      "step": 19112
    },
    {
      "epoch": 0.000116650390625,
      "step": 19112,
      "training_step_time": 0.4559495449066162
    },
    {
      "epoch": 0.000116656494140625,
      "model_forward_time": 0.11460423469543457,
      "step": 19113
    },
    {
      "epoch": 0.000116656494140625,
      "step": 19113,
      "training_step_time": 0.4956474304199219
    },
    {
      "epoch": 0.00011666259765625,
      "model_forward_time": 0.11471366882324219,
      "step": 19114
    },
    {
      "epoch": 0.00011666259765625,
      "step": 19114,
      "training_step_time": 0.42791175842285156
    },
    {
      "epoch": 0.000116668701171875,
      "model_forward_time": 0.11405634880065918,
      "step": 19115
    },
    {
      "epoch": 0.000116668701171875,
      "step": 19115,
      "training_step_time": 0.3835444450378418
    },
    {
      "epoch": 0.0001166748046875,
      "model_forward_time": 0.11442065238952637,
      "step": 19116
    },
    {
      "epoch": 0.0001166748046875,
      "step": 19116,
      "training_step_time": 0.386807918548584
    },
    {
      "epoch": 0.000116680908203125,
      "model_forward_time": 0.11463260650634766,
      "step": 19117
    },
    {
      "epoch": 0.000116680908203125,
      "step": 19117,
      "training_step_time": 0.46731138229370117
    },
    {
      "epoch": 0.00011668701171875,
      "model_forward_time": 0.11533999443054199,
      "step": 19118
    },
    {
      "epoch": 0.00011668701171875,
      "step": 19118,
      "training_step_time": 0.41930294036865234
    },
    {
      "epoch": 0.000116693115234375,
      "model_forward_time": 0.11469864845275879,
      "step": 19119
    },
    {
      "epoch": 0.000116693115234375,
      "step": 19119,
      "training_step_time": 0.5042126178741455
    },
    {
      "epoch": 0.00011669921875,
      "grad_norm": 0.13637712597846985,
      "learning_rate": 8.153022587932803e-05,
      "loss": 0.0496,
      "step": 19120
    },
    {
      "epoch": 0.00011669921875,
      "model_forward_time": 0.11516284942626953,
      "step": 19120
    },
    {
      "epoch": 0.00011669921875,
      "step": 19120,
      "training_step_time": 0.38820600509643555
    },
    {
      "epoch": 0.000116705322265625,
      "model_forward_time": 0.11524581909179688,
      "step": 19121
    },
    {
      "epoch": 0.000116705322265625,
      "step": 19121,
      "training_step_time": 0.3951117992401123
    },
    {
      "epoch": 0.00011671142578125,
      "model_forward_time": 0.11514782905578613,
      "step": 19122
    },
    {
      "epoch": 0.00011671142578125,
      "step": 19122,
      "training_step_time": 0.38654160499572754
    },
    {
      "epoch": 0.000116717529296875,
      "model_forward_time": 0.11546683311462402,
      "step": 19123
    },
    {
      "epoch": 0.000116717529296875,
      "step": 19123,
      "training_step_time": 0.6487956047058105
    },
    {
      "epoch": 0.0001167236328125,
      "model_forward_time": 0.11569428443908691,
      "step": 19124
    },
    {
      "epoch": 0.0001167236328125,
      "step": 19124,
      "training_step_time": 0.37915611267089844
    },
    {
      "epoch": 0.000116729736328125,
      "model_forward_time": 0.11474180221557617,
      "step": 19125
    },
    {
      "epoch": 0.000116729736328125,
      "step": 19125,
      "training_step_time": 0.38456034660339355
    },
    {
      "epoch": 0.00011673583984375,
      "model_forward_time": 0.11482691764831543,
      "step": 19126
    },
    {
      "epoch": 0.00011673583984375,
      "step": 19126,
      "training_step_time": 0.3753623962402344
    },
    {
      "epoch": 0.000116741943359375,
      "model_forward_time": 0.11504554748535156,
      "step": 19127
    },
    {
      "epoch": 0.000116741943359375,
      "step": 19127,
      "training_step_time": 0.41475939750671387
    },
    {
      "epoch": 0.000116748046875,
      "model_forward_time": 0.11586189270019531,
      "step": 19128
    },
    {
      "epoch": 0.000116748046875,
      "step": 19128,
      "training_step_time": 0.40200090408325195
    },
    {
      "epoch": 0.000116754150390625,
      "model_forward_time": 0.115936279296875,
      "step": 19129
    },
    {
      "epoch": 0.000116754150390625,
      "step": 19129,
      "training_step_time": 0.7167365550994873
    },
    {
      "epoch": 0.00011676025390625,
      "grad_norm": 0.13415147364139557,
      "learning_rate": 8.150883333976713e-05,
      "loss": 0.054,
      "step": 19130
    },
    {
      "epoch": 0.00011676025390625,
      "model_forward_time": 0.11509108543395996,
      "step": 19130
    },
    {
      "epoch": 0.00011676025390625,
      "step": 19130,
      "training_step_time": 0.4008188247680664
    },
    {
      "epoch": 0.000116766357421875,
      "model_forward_time": 0.11430573463439941,
      "step": 19131
    },
    {
      "epoch": 0.000116766357421875,
      "step": 19131,
      "training_step_time": 0.4683518409729004
    },
    {
      "epoch": 0.0001167724609375,
      "model_forward_time": 0.11435747146606445,
      "step": 19132
    },
    {
      "epoch": 0.0001167724609375,
      "step": 19132,
      "training_step_time": 0.48828983306884766
    },
    {
      "epoch": 0.000116778564453125,
      "model_forward_time": 0.11448383331298828,
      "step": 19133
    },
    {
      "epoch": 0.000116778564453125,
      "step": 19133,
      "training_step_time": 0.43532824516296387
    },
    {
      "epoch": 0.00011678466796875,
      "model_forward_time": 0.1152200698852539,
      "step": 19134
    },
    {
      "epoch": 0.00011678466796875,
      "step": 19134,
      "training_step_time": 0.3737483024597168
    },
    {
      "epoch": 0.000116790771484375,
      "model_forward_time": 0.11615848541259766,
      "step": 19135
    },
    {
      "epoch": 0.000116790771484375,
      "step": 19135,
      "training_step_time": 0.46791744232177734
    },
    {
      "epoch": 0.000116796875,
      "model_forward_time": 0.1149139404296875,
      "step": 19136
    },
    {
      "epoch": 0.000116796875,
      "step": 19136,
      "training_step_time": 0.387951135635376
    },
    {
      "epoch": 0.000116802978515625,
      "model_forward_time": 0.11438751220703125,
      "step": 19137
    },
    {
      "epoch": 0.000116802978515625,
      "step": 19137,
      "training_step_time": 0.3869802951812744
    },
    {
      "epoch": 0.00011680908203125,
      "model_forward_time": 0.11527371406555176,
      "step": 19138
    },
    {
      "epoch": 0.00011680908203125,
      "step": 19138,
      "training_step_time": 0.3927140235900879
    },
    {
      "epoch": 0.000116815185546875,
      "model_forward_time": 0.1159200668334961,
      "step": 19139
    },
    {
      "epoch": 0.000116815185546875,
      "step": 19139,
      "training_step_time": 0.401273250579834
    },
    {
      "epoch": 0.0001168212890625,
      "grad_norm": 0.1279241442680359,
      "learning_rate": 8.148743122865463e-05,
      "loss": 0.048,
      "step": 19140
    },
    {
      "epoch": 0.0001168212890625,
      "model_forward_time": 0.11493444442749023,
      "step": 19140
    },
    {
      "epoch": 0.0001168212890625,
      "step": 19140,
      "training_step_time": 0.43740248680114746
    },
    {
      "epoch": 0.000116827392578125,
      "model_forward_time": 0.11499881744384766,
      "step": 19141
    },
    {
      "epoch": 0.000116827392578125,
      "step": 19141,
      "training_step_time": 0.5557024478912354
    },
    {
      "epoch": 0.00011683349609375,
      "model_forward_time": 0.11508488655090332,
      "step": 19142
    },
    {
      "epoch": 0.00011683349609375,
      "step": 19142,
      "training_step_time": 0.44165658950805664
    },
    {
      "epoch": 0.000116839599609375,
      "model_forward_time": 0.11445283889770508,
      "step": 19143
    },
    {
      "epoch": 0.000116839599609375,
      "step": 19143,
      "training_step_time": 0.3842051029205322
    },
    {
      "epoch": 0.000116845703125,
      "model_forward_time": 0.11483478546142578,
      "step": 19144
    },
    {
      "epoch": 0.000116845703125,
      "step": 19144,
      "training_step_time": 0.4180936813354492
    },
    {
      "epoch": 0.000116851806640625,
      "model_forward_time": 0.11486530303955078,
      "step": 19145
    },
    {
      "epoch": 0.000116851806640625,
      "step": 19145,
      "training_step_time": 0.5048909187316895
    },
    {
      "epoch": 0.00011685791015625,
      "model_forward_time": 0.11461877822875977,
      "step": 19146
    },
    {
      "epoch": 0.00011685791015625,
      "step": 19146,
      "training_step_time": 0.5127661228179932
    },
    {
      "epoch": 0.000116864013671875,
      "model_forward_time": 0.11434698104858398,
      "step": 19147
    },
    {
      "epoch": 0.000116864013671875,
      "step": 19147,
      "training_step_time": 0.39615488052368164
    },
    {
      "epoch": 0.0001168701171875,
      "model_forward_time": 0.11458134651184082,
      "step": 19148
    },
    {
      "epoch": 0.0001168701171875,
      "step": 19148,
      "training_step_time": 0.4013211727142334
    },
    {
      "epoch": 0.000116876220703125,
      "model_forward_time": 0.11499261856079102,
      "step": 19149
    },
    {
      "epoch": 0.000116876220703125,
      "step": 19149,
      "training_step_time": 0.39359498023986816
    },
    {
      "epoch": 0.00011688232421875,
      "grad_norm": 0.16391225159168243,
      "learning_rate": 8.146601955249188e-05,
      "loss": 0.0552,
      "step": 19150
    },
    {
      "epoch": 0.00011688232421875,
      "model_forward_time": 0.11484956741333008,
      "step": 19150
    },
    {
      "epoch": 0.00011688232421875,
      "step": 19150,
      "training_step_time": 0.38778042793273926
    },
    {
      "epoch": 0.000116888427734375,
      "model_forward_time": 0.11455965042114258,
      "step": 19151
    },
    {
      "epoch": 0.000116888427734375,
      "step": 19151,
      "training_step_time": 0.39772534370422363
    },
    {
      "epoch": 0.00011689453125,
      "model_forward_time": 0.11469388008117676,
      "step": 19152
    },
    {
      "epoch": 0.00011689453125,
      "step": 19152,
      "training_step_time": 0.39073920249938965
    },
    {
      "epoch": 0.000116900634765625,
      "model_forward_time": 0.11544060707092285,
      "step": 19153
    },
    {
      "epoch": 0.000116900634765625,
      "step": 19153,
      "training_step_time": 0.6679093837738037
    },
    {
      "epoch": 0.00011690673828125,
      "model_forward_time": 0.11466550827026367,
      "step": 19154
    },
    {
      "epoch": 0.00011690673828125,
      "step": 19154,
      "training_step_time": 0.4728410243988037
    },
    {
      "epoch": 0.000116912841796875,
      "model_forward_time": 0.11521267890930176,
      "step": 19155
    },
    {
      "epoch": 0.000116912841796875,
      "step": 19155,
      "training_step_time": 0.49109768867492676
    },
    {
      "epoch": 0.0001169189453125,
      "model_forward_time": 0.11455011367797852,
      "step": 19156
    },
    {
      "epoch": 0.0001169189453125,
      "step": 19156,
      "training_step_time": 0.3826618194580078
    },
    {
      "epoch": 0.000116925048828125,
      "model_forward_time": 0.1142435073852539,
      "step": 19157
    },
    {
      "epoch": 0.000116925048828125,
      "step": 19157,
      "training_step_time": 0.3837404251098633
    },
    {
      "epoch": 0.00011693115234375,
      "model_forward_time": 0.11383247375488281,
      "step": 19158
    },
    {
      "epoch": 0.00011693115234375,
      "step": 19158,
      "training_step_time": 0.47173500061035156
    },
    {
      "epoch": 0.000116937255859375,
      "model_forward_time": 0.11433243751525879,
      "step": 19159
    },
    {
      "epoch": 0.000116937255859375,
      "step": 19159,
      "training_step_time": 0.49182772636413574
    },
    {
      "epoch": 0.000116943359375,
      "grad_norm": 0.13524852693080902,
      "learning_rate": 8.14445983177832e-05,
      "loss": 0.0555,
      "step": 19160
    },
    {
      "epoch": 0.000116943359375,
      "model_forward_time": 0.1143496036529541,
      "step": 19160
    },
    {
      "epoch": 0.000116943359375,
      "step": 19160,
      "training_step_time": 0.4086017608642578
    },
    {
      "epoch": 0.000116949462890625,
      "model_forward_time": 0.11464548110961914,
      "step": 19161
    },
    {
      "epoch": 0.000116949462890625,
      "step": 19161,
      "training_step_time": 0.40860414505004883
    },
    {
      "epoch": 0.00011695556640625,
      "model_forward_time": 0.11503076553344727,
      "step": 19162
    },
    {
      "epoch": 0.00011695556640625,
      "step": 19162,
      "training_step_time": 0.3805856704711914
    },
    {
      "epoch": 0.000116961669921875,
      "model_forward_time": 0.11478614807128906,
      "step": 19163
    },
    {
      "epoch": 0.000116961669921875,
      "step": 19163,
      "training_step_time": 0.3936774730682373
    },
    {
      "epoch": 0.0001169677734375,
      "model_forward_time": 0.1144704818725586,
      "step": 19164
    },
    {
      "epoch": 0.0001169677734375,
      "step": 19164,
      "training_step_time": 0.39510607719421387
    },
    {
      "epoch": 0.000116973876953125,
      "model_forward_time": 0.11573171615600586,
      "step": 19165
    },
    {
      "epoch": 0.000116973876953125,
      "step": 19165,
      "training_step_time": 0.4968383312225342
    },
    {
      "epoch": 0.00011697998046875,
      "model_forward_time": 0.11544966697692871,
      "step": 19166
    },
    {
      "epoch": 0.00011697998046875,
      "step": 19166,
      "training_step_time": 0.442840576171875
    },
    {
      "epoch": 0.000116986083984375,
      "model_forward_time": 0.11512207984924316,
      "step": 19167
    },
    {
      "epoch": 0.000116986083984375,
      "step": 19167,
      "training_step_time": 0.39403653144836426
    },
    {
      "epoch": 0.0001169921875,
      "model_forward_time": 0.11456298828125,
      "step": 19168
    },
    {
      "epoch": 0.0001169921875,
      "step": 19168,
      "training_step_time": 0.3891265392303467
    },
    {
      "epoch": 0.000116998291015625,
      "model_forward_time": 0.11663269996643066,
      "step": 19169
    },
    {
      "epoch": 0.000116998291015625,
      "step": 19169,
      "training_step_time": 0.3923008441925049
    },
    {
      "epoch": 0.00011700439453125,
      "grad_norm": 0.10165727883577347,
      "learning_rate": 8.14231675310358e-05,
      "loss": 0.0521,
      "step": 19170
    },
    {
      "epoch": 0.00011700439453125,
      "model_forward_time": 0.11592626571655273,
      "step": 19170
    },
    {
      "epoch": 0.00011700439453125,
      "step": 19170,
      "training_step_time": 0.47010135650634766
    },
    {
      "epoch": 0.000117010498046875,
      "model_forward_time": 0.11627721786499023,
      "step": 19171
    },
    {
      "epoch": 0.000117010498046875,
      "step": 19171,
      "training_step_time": 0.5204460620880127
    },
    {
      "epoch": 0.0001170166015625,
      "model_forward_time": 0.11629080772399902,
      "step": 19172
    },
    {
      "epoch": 0.0001170166015625,
      "step": 19172,
      "training_step_time": 0.38218045234680176
    },
    {
      "epoch": 0.000117022705078125,
      "model_forward_time": 0.11498069763183594,
      "step": 19173
    },
    {
      "epoch": 0.000117022705078125,
      "step": 19173,
      "training_step_time": 0.43901586532592773
    },
    {
      "epoch": 0.00011702880859375,
      "model_forward_time": 0.11480402946472168,
      "step": 19174
    },
    {
      "epoch": 0.00011702880859375,
      "step": 19174,
      "training_step_time": 0.4290742874145508
    },
    {
      "epoch": 0.000117034912109375,
      "model_forward_time": 0.11487960815429688,
      "step": 19175
    },
    {
      "epoch": 0.000117034912109375,
      "step": 19175,
      "training_step_time": 0.44347286224365234
    },
    {
      "epoch": 0.000117041015625,
      "model_forward_time": 0.11481618881225586,
      "step": 19176
    },
    {
      "epoch": 0.000117041015625,
      "step": 19176,
      "training_step_time": 0.3909108638763428
    },
    {
      "epoch": 0.000117047119140625,
      "model_forward_time": 0.1150054931640625,
      "step": 19177
    },
    {
      "epoch": 0.000117047119140625,
      "step": 19177,
      "training_step_time": 0.5636529922485352
    },
    {
      "epoch": 0.00011705322265625,
      "model_forward_time": 0.11412620544433594,
      "step": 19178
    },
    {
      "epoch": 0.00011705322265625,
      "step": 19178,
      "training_step_time": 0.3892679214477539
    },
    {
      "epoch": 0.000117059326171875,
      "model_forward_time": 0.11455702781677246,
      "step": 19179
    },
    {
      "epoch": 0.000117059326171875,
      "step": 19179,
      "training_step_time": 0.4242897033691406
    },
    {
      "epoch": 0.0001170654296875,
      "grad_norm": 0.1182728186249733,
      "learning_rate": 8.140172719875979e-05,
      "loss": 0.0506,
      "step": 19180
    },
    {
      "epoch": 0.0001170654296875,
      "model_forward_time": 0.11495351791381836,
      "step": 19180
    },
    {
      "epoch": 0.0001170654296875,
      "step": 19180,
      "training_step_time": 0.4187910556793213
    },
    {
      "epoch": 0.000117071533203125,
      "model_forward_time": 0.11578154563903809,
      "step": 19181
    },
    {
      "epoch": 0.000117071533203125,
      "step": 19181,
      "training_step_time": 0.3944103717803955
    },
    {
      "epoch": 0.00011707763671875,
      "model_forward_time": 0.11536097526550293,
      "step": 19182
    },
    {
      "epoch": 0.00011707763671875,
      "step": 19182,
      "training_step_time": 0.3893163204193115
    },
    {
      "epoch": 0.000117083740234375,
      "model_forward_time": 0.1157083511352539,
      "step": 19183
    },
    {
      "epoch": 0.000117083740234375,
      "step": 19183,
      "training_step_time": 0.5102481842041016
    },
    {
      "epoch": 0.00011708984375,
      "model_forward_time": 0.11460304260253906,
      "step": 19184
    },
    {
      "epoch": 0.00011708984375,
      "step": 19184,
      "training_step_time": 0.3872964382171631
    },
    {
      "epoch": 0.000117095947265625,
      "model_forward_time": 0.11528372764587402,
      "step": 19185
    },
    {
      "epoch": 0.000117095947265625,
      "step": 19185,
      "training_step_time": 0.4103066921234131
    },
    {
      "epoch": 0.00011710205078125,
      "model_forward_time": 0.1148536205291748,
      "step": 19186
    },
    {
      "epoch": 0.00011710205078125,
      "step": 19186,
      "training_step_time": 0.388094425201416
    },
    {
      "epoch": 0.000117108154296875,
      "model_forward_time": 0.11491966247558594,
      "step": 19187
    },
    {
      "epoch": 0.000117108154296875,
      "step": 19187,
      "training_step_time": 0.401353120803833
    },
    {
      "epoch": 0.0001171142578125,
      "model_forward_time": 0.11514830589294434,
      "step": 19188
    },
    {
      "epoch": 0.0001171142578125,
      "step": 19188,
      "training_step_time": 0.40210700035095215
    },
    {
      "epoch": 0.000117120361328125,
      "model_forward_time": 0.11477303504943848,
      "step": 19189
    },
    {
      "epoch": 0.000117120361328125,
      "step": 19189,
      "training_step_time": 0.660935640335083
    },
    {
      "epoch": 0.00011712646484375,
      "grad_norm": 0.11646521091461182,
      "learning_rate": 8.138027732746818e-05,
      "loss": 0.0535,
      "step": 19190
    },
    {
      "epoch": 0.00011712646484375,
      "model_forward_time": 0.11502861976623535,
      "step": 19190
    },
    {
      "epoch": 0.00011712646484375,
      "step": 19190,
      "training_step_time": 0.4223940372467041
    },
    {
      "epoch": 0.000117132568359375,
      "model_forward_time": 0.11533761024475098,
      "step": 19191
    },
    {
      "epoch": 0.000117132568359375,
      "step": 19191,
      "training_step_time": 0.3986184597015381
    },
    {
      "epoch": 0.000117138671875,
      "model_forward_time": 0.11449384689331055,
      "step": 19192
    },
    {
      "epoch": 0.000117138671875,
      "step": 19192,
      "training_step_time": 0.4057168960571289
    },
    {
      "epoch": 0.000117144775390625,
      "model_forward_time": 0.11496567726135254,
      "step": 19193
    },
    {
      "epoch": 0.000117144775390625,
      "step": 19193,
      "training_step_time": 0.44095373153686523
    },
    {
      "epoch": 0.00011715087890625,
      "model_forward_time": 0.11435246467590332,
      "step": 19194
    },
    {
      "epoch": 0.00011715087890625,
      "step": 19194,
      "training_step_time": 0.3945920467376709
    },
    {
      "epoch": 0.000117156982421875,
      "model_forward_time": 0.11519646644592285,
      "step": 19195
    },
    {
      "epoch": 0.000117156982421875,
      "step": 19195,
      "training_step_time": 0.5926470756530762
    },
    {
      "epoch": 0.0001171630859375,
      "model_forward_time": 0.11417603492736816,
      "step": 19196
    },
    {
      "epoch": 0.0001171630859375,
      "step": 19196,
      "training_step_time": 0.3894650936126709
    },
    {
      "epoch": 0.000117169189453125,
      "model_forward_time": 0.11490178108215332,
      "step": 19197
    },
    {
      "epoch": 0.000117169189453125,
      "step": 19197,
      "training_step_time": 0.4243659973144531
    },
    {
      "epoch": 0.00011717529296875,
      "model_forward_time": 0.1146993637084961,
      "step": 19198
    },
    {
      "epoch": 0.00011717529296875,
      "step": 19198,
      "training_step_time": 0.4292151927947998
    },
    {
      "epoch": 0.000117181396484375,
      "model_forward_time": 0.11512589454650879,
      "step": 19199
    },
    {
      "epoch": 0.000117181396484375,
      "step": 19199,
      "training_step_time": 0.41813039779663086
    },
    {
      "epoch": 0.0001171875,
      "grad_norm": 0.1663629114627838,
      "learning_rate": 8.135881792367686e-05,
      "loss": 0.0508,
      "step": 19200
    },
    {
      "epoch": 0.0001171875,
      "model_forward_time": 0.11507153511047363,
      "step": 19200
    },
    {
      "epoch": 0.0001171875,
      "step": 19200,
      "training_step_time": 0.39405107498168945
    },
    {
      "epoch": 0.000117193603515625,
      "model_forward_time": 0.11433935165405273,
      "step": 19201
    },
    {
      "epoch": 0.000117193603515625,
      "step": 19201,
      "training_step_time": 0.5450232028961182
    },
    {
      "epoch": 0.00011719970703125,
      "model_forward_time": 0.11478137969970703,
      "step": 19202
    },
    {
      "epoch": 0.00011719970703125,
      "step": 19202,
      "training_step_time": 0.4218292236328125
    },
    {
      "epoch": 0.000117205810546875,
      "model_forward_time": 0.1142888069152832,
      "step": 19203
    },
    {
      "epoch": 0.000117205810546875,
      "step": 19203,
      "training_step_time": 0.3963792324066162
    },
    {
      "epoch": 0.0001172119140625,
      "model_forward_time": 0.11559367179870605,
      "step": 19204
    },
    {
      "epoch": 0.0001172119140625,
      "step": 19204,
      "training_step_time": 0.45926785469055176
    },
    {
      "epoch": 0.000117218017578125,
      "model_forward_time": 0.11460256576538086,
      "step": 19205
    },
    {
      "epoch": 0.000117218017578125,
      "step": 19205,
      "training_step_time": 0.3816795349121094
    },
    {
      "epoch": 0.00011722412109375,
      "model_forward_time": 0.11521029472351074,
      "step": 19206
    },
    {
      "epoch": 0.00011722412109375,
      "step": 19206,
      "training_step_time": 0.4121358394622803
    },
    {
      "epoch": 0.000117230224609375,
      "model_forward_time": 0.11475515365600586,
      "step": 19207
    },
    {
      "epoch": 0.000117230224609375,
      "step": 19207,
      "training_step_time": 0.5347940921783447
    },
    {
      "epoch": 0.000117236328125,
      "model_forward_time": 0.11500215530395508,
      "step": 19208
    },
    {
      "epoch": 0.000117236328125,
      "step": 19208,
      "training_step_time": 0.3921651840209961
    },
    {
      "epoch": 0.000117242431640625,
      "model_forward_time": 0.11475658416748047,
      "step": 19209
    },
    {
      "epoch": 0.000117242431640625,
      "step": 19209,
      "training_step_time": 0.38483357429504395
    },
    {
      "epoch": 0.00011724853515625,
      "grad_norm": 0.1960224211215973,
      "learning_rate": 8.133734899390464e-05,
      "loss": 0.0491,
      "step": 19210
    },
    {
      "epoch": 0.00011724853515625,
      "model_forward_time": 0.11613965034484863,
      "step": 19210
    },
    {
      "epoch": 0.00011724853515625,
      "step": 19210,
      "training_step_time": 0.38799428939819336
    },
    {
      "epoch": 0.000117254638671875,
      "model_forward_time": 0.11487817764282227,
      "step": 19211
    },
    {
      "epoch": 0.000117254638671875,
      "step": 19211,
      "training_step_time": 0.3648402690887451
    },
    {
      "epoch": 0.0001172607421875,
      "model_forward_time": 0.11539435386657715,
      "step": 19212
    },
    {
      "epoch": 0.0001172607421875,
      "step": 19212,
      "training_step_time": 0.4553954601287842
    },
    {
      "epoch": 0.000117266845703125,
      "model_forward_time": 0.11575198173522949,
      "step": 19213
    },
    {
      "epoch": 0.000117266845703125,
      "step": 19213,
      "training_step_time": 0.5122179985046387
    },
    {
      "epoch": 0.00011727294921875,
      "model_forward_time": 0.11482381820678711,
      "step": 19214
    },
    {
      "epoch": 0.00011727294921875,
      "step": 19214,
      "training_step_time": 0.4201064109802246
    },
    {
      "epoch": 0.000117279052734375,
      "model_forward_time": 0.11490297317504883,
      "step": 19215
    },
    {
      "epoch": 0.000117279052734375,
      "step": 19215,
      "training_step_time": 0.4810984134674072
    },
    {
      "epoch": 0.00011728515625,
      "model_forward_time": 0.1150505542755127,
      "step": 19216
    },
    {
      "epoch": 0.00011728515625,
      "step": 19216,
      "training_step_time": 0.40781474113464355
    },
    {
      "epoch": 0.000117291259765625,
      "model_forward_time": 0.11458659172058105,
      "step": 19217
    },
    {
      "epoch": 0.000117291259765625,
      "step": 19217,
      "training_step_time": 0.38504481315612793
    },
    {
      "epoch": 0.00011729736328125,
      "model_forward_time": 0.11528182029724121,
      "step": 19218
    },
    {
      "epoch": 0.00011729736328125,
      "step": 19218,
      "training_step_time": 0.49555158615112305
    },
    {
      "epoch": 0.000117303466796875,
      "model_forward_time": 0.11422467231750488,
      "step": 19219
    },
    {
      "epoch": 0.000117303466796875,
      "step": 19219,
      "training_step_time": 0.47669076919555664
    },
    {
      "epoch": 0.0001173095703125,
      "grad_norm": 0.17939841747283936,
      "learning_rate": 8.13158705446732e-05,
      "loss": 0.0524,
      "step": 19220
    },
    {
      "epoch": 0.0001173095703125,
      "model_forward_time": 0.11571979522705078,
      "step": 19220
    },
    {
      "epoch": 0.0001173095703125,
      "step": 19220,
      "training_step_time": 0.39531922340393066
    },
    {
      "epoch": 0.000117315673828125,
      "model_forward_time": 0.11426401138305664,
      "step": 19221
    },
    {
      "epoch": 0.000117315673828125,
      "step": 19221,
      "training_step_time": 0.3916046619415283
    },
    {
      "epoch": 0.00011732177734375,
      "model_forward_time": 0.11579632759094238,
      "step": 19222
    },
    {
      "epoch": 0.00011732177734375,
      "step": 19222,
      "training_step_time": 0.39395570755004883
    },
    {
      "epoch": 0.000117327880859375,
      "model_forward_time": 0.11481332778930664,
      "step": 19223
    },
    {
      "epoch": 0.000117327880859375,
      "step": 19223,
      "training_step_time": 0.3982429504394531
    },
    {
      "epoch": 0.000117333984375,
      "model_forward_time": 0.11463618278503418,
      "step": 19224
    },
    {
      "epoch": 0.000117333984375,
      "step": 19224,
      "training_step_time": 0.3963472843170166
    },
    {
      "epoch": 0.000117340087890625,
      "model_forward_time": 0.11562538146972656,
      "step": 19225
    },
    {
      "epoch": 0.000117340087890625,
      "step": 19225,
      "training_step_time": 0.5169508457183838
    },
    {
      "epoch": 0.00011734619140625,
      "model_forward_time": 0.11529326438903809,
      "step": 19226
    },
    {
      "epoch": 0.00011734619140625,
      "step": 19226,
      "training_step_time": 0.4258754253387451
    },
    {
      "epoch": 0.000117352294921875,
      "model_forward_time": 0.11511635780334473,
      "step": 19227
    },
    {
      "epoch": 0.000117352294921875,
      "step": 19227,
      "training_step_time": 0.4021773338317871
    },
    {
      "epoch": 0.0001173583984375,
      "model_forward_time": 0.11454558372497559,
      "step": 19228
    },
    {
      "epoch": 0.0001173583984375,
      "step": 19228,
      "training_step_time": 0.45863771438598633
    },
    {
      "epoch": 0.000117364501953125,
      "model_forward_time": 0.11492443084716797,
      "step": 19229
    },
    {
      "epoch": 0.000117364501953125,
      "step": 19229,
      "training_step_time": 0.486081600189209
    },
    {
      "epoch": 0.00011737060546875,
      "grad_norm": 0.2339634746313095,
      "learning_rate": 8.129438258250712e-05,
      "loss": 0.0576,
      "step": 19230
    },
    {
      "epoch": 0.00011737060546875,
      "model_forward_time": 0.1154329776763916,
      "step": 19230
    },
    {
      "epoch": 0.00011737060546875,
      "step": 19230,
      "training_step_time": 0.46930480003356934
    },
    {
      "epoch": 0.000117376708984375,
      "model_forward_time": 0.11517119407653809,
      "step": 19231
    },
    {
      "epoch": 0.000117376708984375,
      "step": 19231,
      "training_step_time": 0.4348609447479248
    },
    {
      "epoch": 0.0001173828125,
      "model_forward_time": 0.11439323425292969,
      "step": 19232
    },
    {
      "epoch": 0.0001173828125,
      "step": 19232,
      "training_step_time": 0.42362427711486816
    },
    {
      "epoch": 0.000117388916015625,
      "model_forward_time": 0.11527204513549805,
      "step": 19233
    },
    {
      "epoch": 0.000117388916015625,
      "step": 19233,
      "training_step_time": 0.40556955337524414
    },
    {
      "epoch": 0.00011739501953125,
      "model_forward_time": 0.11472034454345703,
      "step": 19234
    },
    {
      "epoch": 0.00011739501953125,
      "step": 19234,
      "training_step_time": 0.3915069103240967
    },
    {
      "epoch": 0.000117401123046875,
      "model_forward_time": 0.11534380912780762,
      "step": 19235
    },
    {
      "epoch": 0.000117401123046875,
      "step": 19235,
      "training_step_time": 0.38521385192871094
    },
    {
      "epoch": 0.0001174072265625,
      "model_forward_time": 0.11522889137268066,
      "step": 19236
    },
    {
      "epoch": 0.0001174072265625,
      "step": 19236,
      "training_step_time": 0.40154552459716797
    },
    {
      "epoch": 0.000117413330078125,
      "model_forward_time": 0.11519646644592285,
      "step": 19237
    },
    {
      "epoch": 0.000117413330078125,
      "step": 19237,
      "training_step_time": 0.5730173587799072
    },
    {
      "epoch": 0.00011741943359375,
      "model_forward_time": 0.11558985710144043,
      "step": 19238
    },
    {
      "epoch": 0.00011741943359375,
      "step": 19238,
      "training_step_time": 0.38416337966918945
    },
    {
      "epoch": 0.000117425537109375,
      "model_forward_time": 0.11476731300354004,
      "step": 19239
    },
    {
      "epoch": 0.000117425537109375,
      "step": 19239,
      "training_step_time": 0.3668532371520996
    },
    {
      "epoch": 0.000117431640625,
      "grad_norm": 0.15616793930530548,
      "learning_rate": 8.127288511393392e-05,
      "loss": 0.0508,
      "step": 19240
    },
    {
      "epoch": 0.000117431640625,
      "model_forward_time": 0.11476850509643555,
      "step": 19240
    },
    {
      "epoch": 0.000117431640625,
      "step": 19240,
      "training_step_time": 0.4262220859527588
    },
    {
      "epoch": 0.000117437744140625,
      "model_forward_time": 0.1145939826965332,
      "step": 19241
    },
    {
      "epoch": 0.000117437744140625,
      "step": 19241,
      "training_step_time": 0.3851337432861328
    },
    {
      "epoch": 0.00011744384765625,
      "model_forward_time": 0.11533379554748535,
      "step": 19242
    },
    {
      "epoch": 0.00011744384765625,
      "step": 19242,
      "training_step_time": 0.5232748985290527
    },
    {
      "epoch": 0.000117449951171875,
      "model_forward_time": 0.11502313613891602,
      "step": 19243
    },
    {
      "epoch": 0.000117449951171875,
      "step": 19243,
      "training_step_time": 0.41442155838012695
    },
    {
      "epoch": 0.0001174560546875,
      "model_forward_time": 0.11470937728881836,
      "step": 19244
    },
    {
      "epoch": 0.0001174560546875,
      "step": 19244,
      "training_step_time": 0.38565707206726074
    },
    {
      "epoch": 0.000117462158203125,
      "model_forward_time": 0.11511445045471191,
      "step": 19245
    },
    {
      "epoch": 0.000117462158203125,
      "step": 19245,
      "training_step_time": 0.40862154960632324
    },
    {
      "epoch": 0.00011746826171875,
      "model_forward_time": 0.11516761779785156,
      "step": 19246
    },
    {
      "epoch": 0.00011746826171875,
      "step": 19246,
      "training_step_time": 0.42615413665771484
    },
    {
      "epoch": 0.000117474365234375,
      "model_forward_time": 0.1152803897857666,
      "step": 19247
    },
    {
      "epoch": 0.000117474365234375,
      "step": 19247,
      "training_step_time": 0.46538686752319336
    },
    {
      "epoch": 0.00011748046875,
      "model_forward_time": 0.11531472206115723,
      "step": 19248
    },
    {
      "epoch": 0.00011748046875,
      "step": 19248,
      "training_step_time": 0.723517656326294
    },
    {
      "epoch": 0.000117486572265625,
      "model_forward_time": 0.1148843765258789,
      "step": 19249
    },
    {
      "epoch": 0.000117486572265625,
      "step": 19249,
      "training_step_time": 0.38725876808166504
    },
    {
      "epoch": 0.00011749267578125,
      "grad_norm": 0.15925557911396027,
      "learning_rate": 8.125137814548393e-05,
      "loss": 0.0541,
      "step": 19250
    },
    {
      "epoch": 0.00011749267578125,
      "model_forward_time": 0.11408400535583496,
      "step": 19250
    },
    {
      "epoch": 0.00011749267578125,
      "step": 19250,
      "training_step_time": 0.3899843692779541
    },
    {
      "epoch": 0.000117498779296875,
      "model_forward_time": 0.1144399642944336,
      "step": 19251
    },
    {
      "epoch": 0.000117498779296875,
      "step": 19251,
      "training_step_time": 0.37853455543518066
    },
    {
      "epoch": 0.0001175048828125,
      "model_forward_time": 0.11489009857177734,
      "step": 19252
    },
    {
      "epoch": 0.0001175048828125,
      "step": 19252,
      "training_step_time": 0.38848304748535156
    },
    {
      "epoch": 0.000117510986328125,
      "model_forward_time": 0.11470174789428711,
      "step": 19253
    },
    {
      "epoch": 0.000117510986328125,
      "step": 19253,
      "training_step_time": 0.36269664764404297
    },
    {
      "epoch": 0.00011751708984375,
      "model_forward_time": 0.11519718170166016,
      "step": 19254
    },
    {
      "epoch": 0.00011751708984375,
      "step": 19254,
      "training_step_time": 0.9234521389007568
    },
    {
      "epoch": 0.000117523193359375,
      "model_forward_time": 0.1148519515991211,
      "step": 19255
    },
    {
      "epoch": 0.000117523193359375,
      "step": 19255,
      "training_step_time": 0.3996562957763672
    },
    {
      "epoch": 0.000117529296875,
      "model_forward_time": 0.11431670188903809,
      "step": 19256
    },
    {
      "epoch": 0.000117529296875,
      "step": 19256,
      "training_step_time": 0.3963942527770996
    },
    {
      "epoch": 0.000117535400390625,
      "model_forward_time": 0.1138768196105957,
      "step": 19257
    },
    {
      "epoch": 0.000117535400390625,
      "step": 19257,
      "training_step_time": 0.41237664222717285
    },
    {
      "epoch": 0.00011754150390625,
      "model_forward_time": 0.11481881141662598,
      "step": 19258
    },
    {
      "epoch": 0.00011754150390625,
      "step": 19258,
      "training_step_time": 0.4320998191833496
    },
    {
      "epoch": 0.000117547607421875,
      "model_forward_time": 0.11425447463989258,
      "step": 19259
    },
    {
      "epoch": 0.000117547607421875,
      "step": 19259,
      "training_step_time": 0.43607234954833984
    },
    {
      "epoch": 0.0001175537109375,
      "grad_norm": 0.13292506337165833,
      "learning_rate": 8.12298616836904e-05,
      "loss": 0.0514,
      "step": 19260
    },
    {
      "epoch": 0.0001175537109375,
      "model_forward_time": 0.11465930938720703,
      "step": 19260
    },
    {
      "epoch": 0.0001175537109375,
      "step": 19260,
      "training_step_time": 0.7042636871337891
    },
    {
      "epoch": 0.000117559814453125,
      "model_forward_time": 0.11415410041809082,
      "step": 19261
    },
    {
      "epoch": 0.000117559814453125,
      "step": 19261,
      "training_step_time": 0.3894939422607422
    },
    {
      "epoch": 0.00011756591796875,
      "model_forward_time": 0.11446094512939453,
      "step": 19262
    },
    {
      "epoch": 0.00011756591796875,
      "step": 19262,
      "training_step_time": 0.3868696689605713
    },
    {
      "epoch": 0.000117572021484375,
      "model_forward_time": 0.11435294151306152,
      "step": 19263
    },
    {
      "epoch": 0.000117572021484375,
      "step": 19263,
      "training_step_time": 0.390134334564209
    },
    {
      "epoch": 0.000117578125,
      "model_forward_time": 0.11461877822875977,
      "step": 19264
    },
    {
      "epoch": 0.000117578125,
      "step": 19264,
      "training_step_time": 0.39125967025756836
    },
    {
      "epoch": 0.000117584228515625,
      "model_forward_time": 0.11429119110107422,
      "step": 19265
    },
    {
      "epoch": 0.000117584228515625,
      "step": 19265,
      "training_step_time": 0.38907599449157715
    },
    {
      "epoch": 0.00011759033203125,
      "model_forward_time": 0.11474156379699707,
      "step": 19266
    },
    {
      "epoch": 0.00011759033203125,
      "step": 19266,
      "training_step_time": 0.8056962490081787
    },
    {
      "epoch": 0.000117596435546875,
      "model_forward_time": 0.11471295356750488,
      "step": 19267
    },
    {
      "epoch": 0.000117596435546875,
      "step": 19267,
      "training_step_time": 0.45291590690612793
    },
    {
      "epoch": 0.0001176025390625,
      "model_forward_time": 0.11405611038208008,
      "step": 19268
    },
    {
      "epoch": 0.0001176025390625,
      "step": 19268,
      "training_step_time": 0.40792369842529297
    },
    {
      "epoch": 0.000117608642578125,
      "model_forward_time": 0.11405801773071289,
      "step": 19269
    },
    {
      "epoch": 0.000117608642578125,
      "step": 19269,
      "training_step_time": 0.4204540252685547
    },
    {
      "epoch": 0.00011761474609375,
      "grad_norm": 0.18840451538562775,
      "learning_rate": 8.120833573508948e-05,
      "loss": 0.0521,
      "step": 19270
    },
    {
      "epoch": 0.00011761474609375,
      "model_forward_time": 0.1144411563873291,
      "step": 19270
    },
    {
      "epoch": 0.00011761474609375,
      "step": 19270,
      "training_step_time": 0.41240477561950684
    },
    {
      "epoch": 0.000117620849609375,
      "model_forward_time": 0.11414074897766113,
      "step": 19271
    },
    {
      "epoch": 0.000117620849609375,
      "step": 19271,
      "training_step_time": 0.3757333755493164
    },
    {
      "epoch": 0.000117626953125,
      "model_forward_time": 0.11465716361999512,
      "step": 19272
    },
    {
      "epoch": 0.000117626953125,
      "step": 19272,
      "training_step_time": 0.7061316967010498
    },
    {
      "epoch": 0.000117633056640625,
      "model_forward_time": 0.11434316635131836,
      "step": 19273
    },
    {
      "epoch": 0.000117633056640625,
      "step": 19273,
      "training_step_time": 0.38591527938842773
    },
    {
      "epoch": 0.00011763916015625,
      "model_forward_time": 0.11488556861877441,
      "step": 19274
    },
    {
      "epoch": 0.00011763916015625,
      "step": 19274,
      "training_step_time": 0.3762028217315674
    },
    {
      "epoch": 0.000117645263671875,
      "model_forward_time": 0.11429023742675781,
      "step": 19275
    },
    {
      "epoch": 0.000117645263671875,
      "step": 19275,
      "training_step_time": 0.38457179069519043
    },
    {
      "epoch": 0.0001176513671875,
      "model_forward_time": 0.11479401588439941,
      "step": 19276
    },
    {
      "epoch": 0.0001176513671875,
      "step": 19276,
      "training_step_time": 0.3807218074798584
    },
    {
      "epoch": 0.000117657470703125,
      "model_forward_time": 0.11525893211364746,
      "step": 19277
    },
    {
      "epoch": 0.000117657470703125,
      "step": 19277,
      "training_step_time": 0.38581275939941406
    },
    {
      "epoch": 0.00011766357421875,
      "model_forward_time": 0.11453056335449219,
      "step": 19278
    },
    {
      "epoch": 0.00011766357421875,
      "step": 19278,
      "training_step_time": 0.9661712646484375
    },
    {
      "epoch": 0.000117669677734375,
      "model_forward_time": 0.11486196517944336,
      "step": 19279
    },
    {
      "epoch": 0.000117669677734375,
      "step": 19279,
      "training_step_time": 0.389920711517334
    },
    {
      "epoch": 0.00011767578125,
      "grad_norm": 0.22829663753509521,
      "learning_rate": 8.118680030622014e-05,
      "loss": 0.0556,
      "step": 19280
    },
    {
      "epoch": 0.00011767578125,
      "model_forward_time": 0.11453485488891602,
      "step": 19280
    },
    {
      "epoch": 0.00011767578125,
      "step": 19280,
      "training_step_time": 0.4613950252532959
    },
    {
      "epoch": 0.000117681884765625,
      "model_forward_time": 0.11464309692382812,
      "step": 19281
    },
    {
      "epoch": 0.000117681884765625,
      "step": 19281,
      "training_step_time": 0.4010791778564453
    },
    {
      "epoch": 0.00011768798828125,
      "model_forward_time": 0.11389589309692383,
      "step": 19282
    },
    {
      "epoch": 0.00011768798828125,
      "step": 19282,
      "training_step_time": 0.4215545654296875
    },
    {
      "epoch": 0.000117694091796875,
      "model_forward_time": 0.11410164833068848,
      "step": 19283
    },
    {
      "epoch": 0.000117694091796875,
      "step": 19283,
      "training_step_time": 0.4048759937286377
    },
    {
      "epoch": 0.0001177001953125,
      "model_forward_time": 0.1145777702331543,
      "step": 19284
    },
    {
      "epoch": 0.0001177001953125,
      "step": 19284,
      "training_step_time": 0.7028164863586426
    },
    {
      "epoch": 0.000117706298828125,
      "model_forward_time": 0.11411309242248535,
      "step": 19285
    },
    {
      "epoch": 0.000117706298828125,
      "step": 19285,
      "training_step_time": 0.49889087677001953
    },
    {
      "epoch": 0.00011771240234375,
      "model_forward_time": 0.11419987678527832,
      "step": 19286
    },
    {
      "epoch": 0.00011771240234375,
      "step": 19286,
      "training_step_time": 0.3929157257080078
    },
    {
      "epoch": 0.000117718505859375,
      "model_forward_time": 0.11380434036254883,
      "step": 19287
    },
    {
      "epoch": 0.000117718505859375,
      "step": 19287,
      "training_step_time": 0.383683443069458
    },
    {
      "epoch": 0.000117724609375,
      "model_forward_time": 0.11447262763977051,
      "step": 19288
    },
    {
      "epoch": 0.000117724609375,
      "step": 19288,
      "training_step_time": 0.3833773136138916
    },
    {
      "epoch": 0.000117730712890625,
      "model_forward_time": 0.11441969871520996,
      "step": 19289
    },
    {
      "epoch": 0.000117730712890625,
      "step": 19289,
      "training_step_time": 0.3851306438446045
    },
    {
      "epoch": 0.00011773681640625,
      "grad_norm": 0.26427239179611206,
      "learning_rate": 8.116525540362434e-05,
      "loss": 0.0569,
      "step": 19290
    },
    {
      "epoch": 0.00011773681640625,
      "model_forward_time": 0.11534357070922852,
      "step": 19290
    },
    {
      "epoch": 0.00011773681640625,
      "step": 19290,
      "training_step_time": 0.7121610641479492
    },
    {
      "epoch": 0.000117742919921875,
      "model_forward_time": 0.11452221870422363,
      "step": 19291
    },
    {
      "epoch": 0.000117742919921875,
      "step": 19291,
      "training_step_time": 0.3919234275817871
    },
    {
      "epoch": 0.0001177490234375,
      "model_forward_time": 0.11465215682983398,
      "step": 19292
    },
    {
      "epoch": 0.0001177490234375,
      "step": 19292,
      "training_step_time": 0.4595811367034912
    },
    {
      "epoch": 0.000117755126953125,
      "model_forward_time": 0.11450386047363281,
      "step": 19293
    },
    {
      "epoch": 0.000117755126953125,
      "step": 19293,
      "training_step_time": 0.46718311309814453
    },
    {
      "epoch": 0.00011776123046875,
      "model_forward_time": 0.11423134803771973,
      "step": 19294
    },
    {
      "epoch": 0.00011776123046875,
      "step": 19294,
      "training_step_time": 0.41861939430236816
    },
    {
      "epoch": 0.000117767333984375,
      "model_forward_time": 0.1147150993347168,
      "step": 19295
    },
    {
      "epoch": 0.000117767333984375,
      "step": 19295,
      "training_step_time": 0.39861559867858887
    },
    {
      "epoch": 0.0001177734375,
      "model_forward_time": 0.11491751670837402,
      "step": 19296
    },
    {
      "epoch": 0.0001177734375,
      "step": 19296,
      "training_step_time": 0.5130035877227783
    },
    {
      "epoch": 0.000117779541015625,
      "model_forward_time": 0.1141510009765625,
      "step": 19297
    },
    {
      "epoch": 0.000117779541015625,
      "step": 19297,
      "training_step_time": 0.40735626220703125
    },
    {
      "epoch": 0.00011778564453125,
      "model_forward_time": 0.11440849304199219,
      "step": 19298
    },
    {
      "epoch": 0.00011778564453125,
      "step": 19298,
      "training_step_time": 0.42208409309387207
    },
    {
      "epoch": 0.000117791748046875,
      "model_forward_time": 0.11444306373596191,
      "step": 19299
    },
    {
      "epoch": 0.000117791748046875,
      "step": 19299,
      "training_step_time": 0.39095139503479004
    },
    {
      "epoch": 0.0001177978515625,
      "grad_norm": 0.2018076628446579,
      "learning_rate": 8.114370103384681e-05,
      "loss": 0.0509,
      "step": 19300
    },
    {
      "epoch": 0.0001177978515625,
      "model_forward_time": 0.11486601829528809,
      "step": 19300
    },
    {
      "epoch": 0.0001177978515625,
      "step": 19300,
      "training_step_time": 0.3898346424102783
    },
    {
      "epoch": 0.000117803955078125,
      "model_forward_time": 0.11468267440795898,
      "step": 19301
    },
    {
      "epoch": 0.000117803955078125,
      "step": 19301,
      "training_step_time": 0.41308021545410156
    },
    {
      "epoch": 0.00011781005859375,
      "model_forward_time": 0.11498904228210449,
      "step": 19302
    },
    {
      "epoch": 0.00011781005859375,
      "step": 19302,
      "training_step_time": 0.8294317722320557
    },
    {
      "epoch": 0.000117816162109375,
      "model_forward_time": 0.11446523666381836,
      "step": 19303
    },
    {
      "epoch": 0.000117816162109375,
      "step": 19303,
      "training_step_time": 0.3803684711456299
    },
    {
      "epoch": 0.000117822265625,
      "model_forward_time": 0.1147463321685791,
      "step": 19304
    },
    {
      "epoch": 0.000117822265625,
      "step": 19304,
      "training_step_time": 0.3969554901123047
    },
    {
      "epoch": 0.000117828369140625,
      "model_forward_time": 0.11445903778076172,
      "step": 19305
    },
    {
      "epoch": 0.000117828369140625,
      "step": 19305,
      "training_step_time": 0.3637514114379883
    },
    {
      "epoch": 0.00011783447265625,
      "model_forward_time": 0.11401510238647461,
      "step": 19306
    },
    {
      "epoch": 0.00011783447265625,
      "step": 19306,
      "training_step_time": 0.43435192108154297
    },
    {
      "epoch": 0.000117840576171875,
      "model_forward_time": 0.11454486846923828,
      "step": 19307
    },
    {
      "epoch": 0.000117840576171875,
      "step": 19307,
      "training_step_time": 0.4142894744873047
    },
    {
      "epoch": 0.0001178466796875,
      "model_forward_time": 0.11499762535095215,
      "step": 19308
    },
    {
      "epoch": 0.0001178466796875,
      "step": 19308,
      "training_step_time": 0.583059549331665
    },
    {
      "epoch": 0.000117852783203125,
      "model_forward_time": 0.11504173278808594,
      "step": 19309
    },
    {
      "epoch": 0.000117852783203125,
      "step": 19309,
      "training_step_time": 0.41600966453552246
    },
    {
      "epoch": 0.00011785888671875,
      "grad_norm": 0.21718156337738037,
      "learning_rate": 8.11221372034352e-05,
      "loss": 0.0514,
      "step": 19310
    },
    {
      "epoch": 0.00011785888671875,
      "model_forward_time": 0.11459493637084961,
      "step": 19310
    },
    {
      "epoch": 0.00011785888671875,
      "step": 19310,
      "training_step_time": 0.3952202796936035
    },
    {
      "epoch": 0.000117864990234375,
      "model_forward_time": 0.11512970924377441,
      "step": 19311
    },
    {
      "epoch": 0.000117864990234375,
      "step": 19311,
      "training_step_time": 0.46807360649108887
    },
    {
      "epoch": 0.00011787109375,
      "model_forward_time": 0.11483073234558105,
      "step": 19312
    },
    {
      "epoch": 0.00011787109375,
      "step": 19312,
      "training_step_time": 0.4045255184173584
    },
    {
      "epoch": 0.000117877197265625,
      "model_forward_time": 0.11500930786132812,
      "step": 19313
    },
    {
      "epoch": 0.000117877197265625,
      "step": 19313,
      "training_step_time": 0.43015050888061523
    },
    {
      "epoch": 0.00011788330078125,
      "model_forward_time": 0.11469459533691406,
      "step": 19314
    },
    {
      "epoch": 0.00011788330078125,
      "step": 19314,
      "training_step_time": 0.7357819080352783
    },
    {
      "epoch": 0.000117889404296875,
      "model_forward_time": 0.11427474021911621,
      "step": 19315
    },
    {
      "epoch": 0.000117889404296875,
      "step": 19315,
      "training_step_time": 0.3831613063812256
    },
    {
      "epoch": 0.0001178955078125,
      "model_forward_time": 0.11421680450439453,
      "step": 19316
    },
    {
      "epoch": 0.0001178955078125,
      "step": 19316,
      "training_step_time": 0.38245582580566406
    },
    {
      "epoch": 0.000117901611328125,
      "model_forward_time": 0.11387419700622559,
      "step": 19317
    },
    {
      "epoch": 0.000117901611328125,
      "step": 19317,
      "training_step_time": 0.3898317813873291
    },
    {
      "epoch": 0.00011790771484375,
      "model_forward_time": 0.11425280570983887,
      "step": 19318
    },
    {
      "epoch": 0.00011790771484375,
      "step": 19318,
      "training_step_time": 0.39893555641174316
    },
    {
      "epoch": 0.000117913818359375,
      "model_forward_time": 0.11424756050109863,
      "step": 19319
    },
    {
      "epoch": 0.000117913818359375,
      "step": 19319,
      "training_step_time": 0.36806297302246094
    },
    {
      "epoch": 0.000117919921875,
      "grad_norm": 0.21499748528003693,
      "learning_rate": 8.110056391894005e-05,
      "loss": 0.0481,
      "step": 19320
    },
    {
      "epoch": 0.000117919921875,
      "model_forward_time": 0.11500120162963867,
      "step": 19320
    },
    {
      "epoch": 0.000117919921875,
      "step": 19320,
      "training_step_time": 0.5810990333557129
    },
    {
      "epoch": 0.000117926025390625,
      "model_forward_time": 0.11464047431945801,
      "step": 19321
    },
    {
      "epoch": 0.000117926025390625,
      "step": 19321,
      "training_step_time": 0.4591834545135498
    },
    {
      "epoch": 0.00011793212890625,
      "model_forward_time": 0.11523818969726562,
      "step": 19322
    },
    {
      "epoch": 0.00011793212890625,
      "step": 19322,
      "training_step_time": 0.4005610942840576
    },
    {
      "epoch": 0.000117938232421875,
      "model_forward_time": 0.11472964286804199,
      "step": 19323
    },
    {
      "epoch": 0.000117938232421875,
      "step": 19323,
      "training_step_time": 0.41452646255493164
    },
    {
      "epoch": 0.0001179443359375,
      "model_forward_time": 0.11439156532287598,
      "step": 19324
    },
    {
      "epoch": 0.0001179443359375,
      "step": 19324,
      "training_step_time": 0.388690710067749
    },
    {
      "epoch": 0.000117950439453125,
      "model_forward_time": 0.11466383934020996,
      "step": 19325
    },
    {
      "epoch": 0.000117950439453125,
      "step": 19325,
      "training_step_time": 0.423107385635376
    },
    {
      "epoch": 0.00011795654296875,
      "model_forward_time": 0.11532115936279297,
      "step": 19326
    },
    {
      "epoch": 0.00011795654296875,
      "step": 19326,
      "training_step_time": 0.3999817371368408
    },
    {
      "epoch": 0.000117962646484375,
      "model_forward_time": 0.11500787734985352,
      "step": 19327
    },
    {
      "epoch": 0.000117962646484375,
      "step": 19327,
      "training_step_time": 0.4367177486419678
    },
    {
      "epoch": 0.00011796875,
      "model_forward_time": 0.11532449722290039,
      "step": 19328
    },
    {
      "epoch": 0.00011796875,
      "step": 19328,
      "training_step_time": 0.3893733024597168
    },
    {
      "epoch": 0.000117974853515625,
      "model_forward_time": 0.1150350570678711,
      "step": 19329
    },
    {
      "epoch": 0.000117974853515625,
      "step": 19329,
      "training_step_time": 0.38705015182495117
    },
    {
      "epoch": 0.00011798095703125,
      "grad_norm": 0.17063665390014648,
      "learning_rate": 8.107898118691473e-05,
      "loss": 0.049,
      "step": 19330
    },
    {
      "epoch": 0.00011798095703125,
      "model_forward_time": 0.11534881591796875,
      "step": 19330
    },
    {
      "epoch": 0.00011798095703125,
      "step": 19330,
      "training_step_time": 0.3859386444091797
    },
    {
      "epoch": 0.000117987060546875,
      "model_forward_time": 0.11546802520751953,
      "step": 19331
    },
    {
      "epoch": 0.000117987060546875,
      "step": 19331,
      "training_step_time": 0.39339613914489746
    },
    {
      "epoch": 0.0001179931640625,
      "model_forward_time": 0.11513829231262207,
      "step": 19332
    },
    {
      "epoch": 0.0001179931640625,
      "step": 19332,
      "training_step_time": 0.8900752067565918
    },
    {
      "epoch": 0.000117999267578125,
      "model_forward_time": 0.11522412300109863,
      "step": 19333
    },
    {
      "epoch": 0.000117999267578125,
      "step": 19333,
      "training_step_time": 0.40857648849487305
    },
    {
      "epoch": 0.00011800537109375,
      "model_forward_time": 0.11406159400939941,
      "step": 19334
    },
    {
      "epoch": 0.00011800537109375,
      "step": 19334,
      "training_step_time": 0.4418184757232666
    },
    {
      "epoch": 0.000118011474609375,
      "model_forward_time": 0.11474013328552246,
      "step": 19335
    },
    {
      "epoch": 0.000118011474609375,
      "step": 19335,
      "training_step_time": 0.40883636474609375
    },
    {
      "epoch": 0.000118017578125,
      "model_forward_time": 0.11427903175354004,
      "step": 19336
    },
    {
      "epoch": 0.000118017578125,
      "step": 19336,
      "training_step_time": 0.4027395248413086
    },
    {
      "epoch": 0.000118023681640625,
      "model_forward_time": 0.11442399024963379,
      "step": 19337
    },
    {
      "epoch": 0.000118023681640625,
      "step": 19337,
      "training_step_time": 0.4534153938293457
    },
    {
      "epoch": 0.00011802978515625,
      "model_forward_time": 0.1147468090057373,
      "step": 19338
    },
    {
      "epoch": 0.00011802978515625,
      "step": 19338,
      "training_step_time": 0.3953366279602051
    },
    {
      "epoch": 0.000118035888671875,
      "model_forward_time": 0.11408114433288574,
      "step": 19339
    },
    {
      "epoch": 0.000118035888671875,
      "step": 19339,
      "training_step_time": 0.3953969478607178
    },
    {
      "epoch": 0.0001180419921875,
      "grad_norm": 0.15280063450336456,
      "learning_rate": 8.105738901391552e-05,
      "loss": 0.0512,
      "step": 19340
    },
    {
      "epoch": 0.0001180419921875,
      "model_forward_time": 0.11576128005981445,
      "step": 19340
    },
    {
      "epoch": 0.0001180419921875,
      "step": 19340,
      "training_step_time": 0.39162707328796387
    },
    {
      "epoch": 0.000118048095703125,
      "model_forward_time": 0.11476683616638184,
      "step": 19341
    },
    {
      "epoch": 0.000118048095703125,
      "step": 19341,
      "training_step_time": 0.4355933666229248
    },
    {
      "epoch": 0.00011805419921875,
      "model_forward_time": 0.11467123031616211,
      "step": 19342
    },
    {
      "epoch": 0.00011805419921875,
      "step": 19342,
      "training_step_time": 0.38260817527770996
    },
    {
      "epoch": 0.000118060302734375,
      "model_forward_time": 0.11426949501037598,
      "step": 19343
    },
    {
      "epoch": 0.000118060302734375,
      "step": 19343,
      "training_step_time": 0.4105391502380371
    },
    {
      "epoch": 0.00011806640625,
      "model_forward_time": 0.11559057235717773,
      "step": 19344
    },
    {
      "epoch": 0.00011806640625,
      "step": 19344,
      "training_step_time": 0.46457648277282715
    },
    {
      "epoch": 0.000118072509765625,
      "model_forward_time": 0.1153862476348877,
      "step": 19345
    },
    {
      "epoch": 0.000118072509765625,
      "step": 19345,
      "training_step_time": 0.3895292282104492
    },
    {
      "epoch": 0.00011807861328125,
      "model_forward_time": 0.11527562141418457,
      "step": 19346
    },
    {
      "epoch": 0.00011807861328125,
      "step": 19346,
      "training_step_time": 0.3960611820220947
    },
    {
      "epoch": 0.000118084716796875,
      "model_forward_time": 0.11582136154174805,
      "step": 19347
    },
    {
      "epoch": 0.000118084716796875,
      "step": 19347,
      "training_step_time": 0.39690113067626953
    },
    {
      "epoch": 0.0001180908203125,
      "model_forward_time": 0.11566758155822754,
      "step": 19348
    },
    {
      "epoch": 0.0001180908203125,
      "step": 19348,
      "training_step_time": 0.40368175506591797
    },
    {
      "epoch": 0.000118096923828125,
      "model_forward_time": 0.11533427238464355,
      "step": 19349
    },
    {
      "epoch": 0.000118096923828125,
      "step": 19349,
      "training_step_time": 0.496734619140625
    },
    {
      "epoch": 0.00011810302734375,
      "grad_norm": 0.17787253856658936,
      "learning_rate": 8.103578740650156e-05,
      "loss": 0.0508,
      "step": 19350
    },
    {
      "epoch": 0.00011810302734375,
      "model_forward_time": 0.11599421501159668,
      "step": 19350
    },
    {
      "epoch": 0.00011810302734375,
      "step": 19350,
      "training_step_time": 1.0531947612762451
    },
    {
      "epoch": 0.000118109130859375,
      "model_forward_time": 0.11367392539978027,
      "step": 19351
    },
    {
      "epoch": 0.000118109130859375,
      "step": 19351,
      "training_step_time": 0.3781280517578125
    },
    {
      "epoch": 0.000118115234375,
      "model_forward_time": 0.11345219612121582,
      "step": 19352
    },
    {
      "epoch": 0.000118115234375,
      "step": 19352,
      "training_step_time": 0.40952205657958984
    },
    {
      "epoch": 0.000118121337890625,
      "model_forward_time": 0.1138312816619873,
      "step": 19353
    },
    {
      "epoch": 0.000118121337890625,
      "step": 19353,
      "training_step_time": 0.39287400245666504
    },
    {
      "epoch": 0.00011812744140625,
      "model_forward_time": 0.1138458251953125,
      "step": 19354
    },
    {
      "epoch": 0.00011812744140625,
      "step": 19354,
      "training_step_time": 0.4632530212402344
    },
    {
      "epoch": 0.000118133544921875,
      "model_forward_time": 0.11451172828674316,
      "step": 19355
    },
    {
      "epoch": 0.000118133544921875,
      "step": 19355,
      "training_step_time": 0.385439395904541
    },
    {
      "epoch": 0.0001181396484375,
      "model_forward_time": 0.11512947082519531,
      "step": 19356
    },
    {
      "epoch": 0.0001181396484375,
      "step": 19356,
      "training_step_time": 0.612908124923706
    },
    {
      "epoch": 0.000118145751953125,
      "model_forward_time": 0.11443591117858887,
      "step": 19357
    },
    {
      "epoch": 0.000118145751953125,
      "step": 19357,
      "training_step_time": 0.3972666263580322
    },
    {
      "epoch": 0.00011815185546875,
      "model_forward_time": 0.11436343193054199,
      "step": 19358
    },
    {
      "epoch": 0.00011815185546875,
      "step": 19358,
      "training_step_time": 0.43174242973327637
    },
    {
      "epoch": 0.000118157958984375,
      "model_forward_time": 0.11468219757080078,
      "step": 19359
    },
    {
      "epoch": 0.000118157958984375,
      "step": 19359,
      "training_step_time": 0.40363287925720215
    },
    {
      "epoch": 0.0001181640625,
      "grad_norm": 0.1079459860920906,
      "learning_rate": 8.101417637123484e-05,
      "loss": 0.0461,
      "step": 19360
    },
    {
      "epoch": 0.0001181640625,
      "model_forward_time": 0.1147301197052002,
      "step": 19360
    },
    {
      "epoch": 0.0001181640625,
      "step": 19360,
      "training_step_time": 0.4002559185028076
    },
    {
      "epoch": 0.000118170166015625,
      "model_forward_time": 0.1150205135345459,
      "step": 19361
    },
    {
      "epoch": 0.000118170166015625,
      "step": 19361,
      "training_step_time": 0.46510815620422363
    },
    {
      "epoch": 0.00011817626953125,
      "model_forward_time": 0.11478209495544434,
      "step": 19362
    },
    {
      "epoch": 0.00011817626953125,
      "step": 19362,
      "training_step_time": 0.6968202590942383
    },
    {
      "epoch": 0.000118182373046875,
      "model_forward_time": 0.11439156532287598,
      "step": 19363
    },
    {
      "epoch": 0.000118182373046875,
      "step": 19363,
      "training_step_time": 0.4248819351196289
    },
    {
      "epoch": 0.0001181884765625,
      "model_forward_time": 0.11448121070861816,
      "step": 19364
    },
    {
      "epoch": 0.0001181884765625,
      "step": 19364,
      "training_step_time": 0.37993478775024414
    },
    {
      "epoch": 0.000118194580078125,
      "model_forward_time": 0.11421799659729004,
      "step": 19365
    },
    {
      "epoch": 0.000118194580078125,
      "step": 19365,
      "training_step_time": 0.3923819065093994
    },
    {
      "epoch": 0.00011820068359375,
      "model_forward_time": 0.1143956184387207,
      "step": 19366
    },
    {
      "epoch": 0.00011820068359375,
      "step": 19366,
      "training_step_time": 0.38927173614501953
    },
    {
      "epoch": 0.000118206787109375,
      "model_forward_time": 0.11443638801574707,
      "step": 19367
    },
    {
      "epoch": 0.000118206787109375,
      "step": 19367,
      "training_step_time": 0.41629576683044434
    },
    {
      "epoch": 0.000118212890625,
      "model_forward_time": 0.1149139404296875,
      "step": 19368
    },
    {
      "epoch": 0.000118212890625,
      "step": 19368,
      "training_step_time": 0.8097090721130371
    },
    {
      "epoch": 0.000118218994140625,
      "model_forward_time": 0.11446428298950195,
      "step": 19369
    },
    {
      "epoch": 0.000118218994140625,
      "step": 19369,
      "training_step_time": 0.398026704788208
    },
    {
      "epoch": 0.00011822509765625,
      "grad_norm": 0.1459958255290985,
      "learning_rate": 8.099255591468022e-05,
      "loss": 0.05,
      "step": 19370
    },
    {
      "epoch": 0.00011822509765625,
      "model_forward_time": 0.11415767669677734,
      "step": 19370
    },
    {
      "epoch": 0.00011822509765625,
      "step": 19370,
      "training_step_time": 0.42864465713500977
    },
    {
      "epoch": 0.000118231201171875,
      "model_forward_time": 0.11392474174499512,
      "step": 19371
    },
    {
      "epoch": 0.000118231201171875,
      "step": 19371,
      "training_step_time": 0.43204259872436523
    },
    {
      "epoch": 0.0001182373046875,
      "model_forward_time": 0.11413216590881348,
      "step": 19372
    },
    {
      "epoch": 0.0001182373046875,
      "step": 19372,
      "training_step_time": 0.3988649845123291
    },
    {
      "epoch": 0.000118243408203125,
      "model_forward_time": 0.11414361000061035,
      "step": 19373
    },
    {
      "epoch": 0.000118243408203125,
      "step": 19373,
      "training_step_time": 0.3881564140319824
    },
    {
      "epoch": 0.00011824951171875,
      "model_forward_time": 0.11524701118469238,
      "step": 19374
    },
    {
      "epoch": 0.00011824951171875,
      "step": 19374,
      "training_step_time": 0.4336111545562744
    },
    {
      "epoch": 0.000118255615234375,
      "model_forward_time": 0.11458277702331543,
      "step": 19375
    },
    {
      "epoch": 0.000118255615234375,
      "step": 19375,
      "training_step_time": 0.41445398330688477
    },
    {
      "epoch": 0.00011826171875,
      "model_forward_time": 0.11529827117919922,
      "step": 19376
    },
    {
      "epoch": 0.00011826171875,
      "step": 19376,
      "training_step_time": 0.4933474063873291
    },
    {
      "epoch": 0.000118267822265625,
      "model_forward_time": 0.11493611335754395,
      "step": 19377
    },
    {
      "epoch": 0.000118267822265625,
      "step": 19377,
      "training_step_time": 0.4588277339935303
    },
    {
      "epoch": 0.00011827392578125,
      "model_forward_time": 0.11545085906982422,
      "step": 19378
    },
    {
      "epoch": 0.00011827392578125,
      "step": 19378,
      "training_step_time": 0.39146971702575684
    },
    {
      "epoch": 0.000118280029296875,
      "model_forward_time": 0.1139521598815918,
      "step": 19379
    },
    {
      "epoch": 0.000118280029296875,
      "step": 19379,
      "training_step_time": 0.3953359127044678
    },
    {
      "epoch": 0.0001182861328125,
      "grad_norm": 0.1399063915014267,
      "learning_rate": 8.097092604340542e-05,
      "loss": 0.0496,
      "step": 19380
    },
    {
      "epoch": 0.0001182861328125,
      "model_forward_time": 0.11486482620239258,
      "step": 19380
    },
    {
      "epoch": 0.0001182861328125,
      "step": 19380,
      "training_step_time": 0.4103541374206543
    },
    {
      "epoch": 0.000118292236328125,
      "model_forward_time": 0.11577248573303223,
      "step": 19381
    },
    {
      "epoch": 0.000118292236328125,
      "step": 19381,
      "training_step_time": 0.39412546157836914
    },
    {
      "epoch": 0.00011829833984375,
      "model_forward_time": 0.11511874198913574,
      "step": 19382
    },
    {
      "epoch": 0.00011829833984375,
      "step": 19382,
      "training_step_time": 0.4107174873352051
    },
    {
      "epoch": 0.000118304443359375,
      "model_forward_time": 0.11460995674133301,
      "step": 19383
    },
    {
      "epoch": 0.000118304443359375,
      "step": 19383,
      "training_step_time": 0.39771413803100586
    },
    {
      "epoch": 0.000118310546875,
      "model_forward_time": 0.1149148941040039,
      "step": 19384
    },
    {
      "epoch": 0.000118310546875,
      "step": 19384,
      "training_step_time": 0.4229402542114258
    },
    {
      "epoch": 0.000118316650390625,
      "model_forward_time": 0.11480522155761719,
      "step": 19385
    },
    {
      "epoch": 0.000118316650390625,
      "step": 19385,
      "training_step_time": 0.4537193775177002
    },
    {
      "epoch": 0.00011832275390625,
      "model_forward_time": 0.11512160301208496,
      "step": 19386
    },
    {
      "epoch": 0.00011832275390625,
      "step": 19386,
      "training_step_time": 0.44203829765319824
    },
    {
      "epoch": 0.000118328857421875,
      "model_forward_time": 0.11513590812683105,
      "step": 19387
    },
    {
      "epoch": 0.000118328857421875,
      "step": 19387,
      "training_step_time": 0.392362117767334
    },
    {
      "epoch": 0.0001183349609375,
      "model_forward_time": 0.11563348770141602,
      "step": 19388
    },
    {
      "epoch": 0.0001183349609375,
      "step": 19388,
      "training_step_time": 0.3927440643310547
    },
    {
      "epoch": 0.000118341064453125,
      "model_forward_time": 0.114501953125,
      "step": 19389
    },
    {
      "epoch": 0.000118341064453125,
      "step": 19389,
      "training_step_time": 0.4461231231689453
    },
    {
      "epoch": 0.00011834716796875,
      "grad_norm": 0.1540597528219223,
      "learning_rate": 8.094928676398101e-05,
      "loss": 0.055,
      "step": 19390
    },
    {
      "epoch": 0.00011834716796875,
      "model_forward_time": 0.11484789848327637,
      "step": 19390
    },
    {
      "epoch": 0.00011834716796875,
      "step": 19390,
      "training_step_time": 0.40511131286621094
    },
    {
      "epoch": 0.000118353271484375,
      "model_forward_time": 0.11696553230285645,
      "step": 19391
    },
    {
      "epoch": 0.000118353271484375,
      "step": 19391,
      "training_step_time": 0.4723057746887207
    },
    {
      "epoch": 0.000118359375,
      "model_forward_time": 0.11612367630004883,
      "step": 19392
    },
    {
      "epoch": 0.000118359375,
      "step": 19392,
      "training_step_time": 0.514798641204834
    },
    {
      "epoch": 0.000118365478515625,
      "model_forward_time": 0.1148066520690918,
      "step": 19393
    },
    {
      "epoch": 0.000118365478515625,
      "step": 19393,
      "training_step_time": 0.38546133041381836
    },
    {
      "epoch": 0.00011837158203125,
      "model_forward_time": 0.11642193794250488,
      "step": 19394
    },
    {
      "epoch": 0.00011837158203125,
      "step": 19394,
      "training_step_time": 0.44691920280456543
    },
    {
      "epoch": 0.000118377685546875,
      "model_forward_time": 0.11515998840332031,
      "step": 19395
    },
    {
      "epoch": 0.000118377685546875,
      "step": 19395,
      "training_step_time": 0.3953063488006592
    },
    {
      "epoch": 0.0001183837890625,
      "model_forward_time": 0.11510968208312988,
      "step": 19396
    },
    {
      "epoch": 0.0001183837890625,
      "step": 19396,
      "training_step_time": 0.4503779411315918
    },
    {
      "epoch": 0.000118389892578125,
      "model_forward_time": 0.1151731014251709,
      "step": 19397
    },
    {
      "epoch": 0.000118389892578125,
      "step": 19397,
      "training_step_time": 0.43766140937805176
    },
    {
      "epoch": 0.00011839599609375,
      "model_forward_time": 0.11487913131713867,
      "step": 19398
    },
    {
      "epoch": 0.00011839599609375,
      "step": 19398,
      "training_step_time": 0.6372268199920654
    },
    {
      "epoch": 0.000118402099609375,
      "model_forward_time": 0.11469888687133789,
      "step": 19399
    },
    {
      "epoch": 0.000118402099609375,
      "step": 19399,
      "training_step_time": 0.3798673152923584
    },
    {
      "epoch": 0.000118408203125,
      "grad_norm": 0.17292064428329468,
      "learning_rate": 8.092763808298048e-05,
      "loss": 0.0516,
      "step": 19400
    },
    {
      "epoch": 0.000118408203125,
      "model_forward_time": 0.11549663543701172,
      "step": 19400
    },
    {
      "epoch": 0.000118408203125,
      "step": 19400,
      "training_step_time": 0.38077807426452637
    },
    {
      "epoch": 0.000118414306640625,
      "model_forward_time": 0.11444544792175293,
      "step": 19401
    },
    {
      "epoch": 0.000118414306640625,
      "step": 19401,
      "training_step_time": 0.38625025749206543
    },
    {
      "epoch": 0.00011842041015625,
      "model_forward_time": 0.11521553993225098,
      "step": 19402
    },
    {
      "epoch": 0.00011842041015625,
      "step": 19402,
      "training_step_time": 0.3891110420227051
    },
    {
      "epoch": 0.000118426513671875,
      "model_forward_time": 0.11541008949279785,
      "step": 19403
    },
    {
      "epoch": 0.000118426513671875,
      "step": 19403,
      "training_step_time": 0.43665504455566406
    },
    {
      "epoch": 0.0001184326171875,
      "model_forward_time": 0.11555266380310059,
      "step": 19404
    },
    {
      "epoch": 0.0001184326171875,
      "step": 19404,
      "training_step_time": 0.9263720512390137
    },
    {
      "epoch": 0.000118438720703125,
      "model_forward_time": 0.11472725868225098,
      "step": 19405
    },
    {
      "epoch": 0.000118438720703125,
      "step": 19405,
      "training_step_time": 0.3784799575805664
    },
    {
      "epoch": 0.00011844482421875,
      "model_forward_time": 0.1141049861907959,
      "step": 19406
    },
    {
      "epoch": 0.00011844482421875,
      "step": 19406,
      "training_step_time": 0.3791236877441406
    },
    {
      "epoch": 0.000118450927734375,
      "model_forward_time": 0.11452722549438477,
      "step": 19407
    },
    {
      "epoch": 0.000118450927734375,
      "step": 19407,
      "training_step_time": 0.3726921081542969
    },
    {
      "epoch": 0.00011845703125,
      "model_forward_time": 0.11440181732177734,
      "step": 19408
    },
    {
      "epoch": 0.00011845703125,
      "step": 19408,
      "training_step_time": 0.382584810256958
    },
    {
      "epoch": 0.000118463134765625,
      "model_forward_time": 0.11456298828125,
      "step": 19409
    },
    {
      "epoch": 0.000118463134765625,
      "step": 19409,
      "training_step_time": 0.3839724063873291
    },
    {
      "epoch": 0.00011846923828125,
      "grad_norm": 0.22134573757648468,
      "learning_rate": 8.090598000698009e-05,
      "loss": 0.0549,
      "step": 19410
    },
    {
      "epoch": 0.00011846923828125,
      "model_forward_time": 0.1147770881652832,
      "step": 19410
    },
    {
      "epoch": 0.00011846923828125,
      "step": 19410,
      "training_step_time": 0.6973471641540527
    },
    {
      "epoch": 0.000118475341796875,
      "model_forward_time": 0.11520791053771973,
      "step": 19411
    },
    {
      "epoch": 0.000118475341796875,
      "step": 19411,
      "training_step_time": 0.3833274841308594
    },
    {
      "epoch": 0.0001184814453125,
      "model_forward_time": 0.11495280265808105,
      "step": 19412
    },
    {
      "epoch": 0.0001184814453125,
      "step": 19412,
      "training_step_time": 0.3760874271392822
    },
    {
      "epoch": 0.000118487548828125,
      "model_forward_time": 0.11463356018066406,
      "step": 19413
    },
    {
      "epoch": 0.000118487548828125,
      "step": 19413,
      "training_step_time": 0.38246870040893555
    },
    {
      "epoch": 0.00011849365234375,
      "model_forward_time": 0.11515545845031738,
      "step": 19414
    },
    {
      "epoch": 0.00011849365234375,
      "step": 19414,
      "training_step_time": 0.39219117164611816
    },
    {
      "epoch": 0.000118499755859375,
      "model_forward_time": 0.1145334243774414,
      "step": 19415
    },
    {
      "epoch": 0.000118499755859375,
      "step": 19415,
      "training_step_time": 0.38166236877441406
    },
    {
      "epoch": 0.000118505859375,
      "model_forward_time": 0.11522555351257324,
      "step": 19416
    },
    {
      "epoch": 0.000118505859375,
      "step": 19416,
      "training_step_time": 0.9264929294586182
    },
    {
      "epoch": 0.000118511962890625,
      "model_forward_time": 0.11403465270996094,
      "step": 19417
    },
    {
      "epoch": 0.000118511962890625,
      "step": 19417,
      "training_step_time": 0.3963472843170166
    },
    {
      "epoch": 0.00011851806640625,
      "model_forward_time": 0.11446142196655273,
      "step": 19418
    },
    {
      "epoch": 0.00011851806640625,
      "step": 19418,
      "training_step_time": 0.415966272354126
    },
    {
      "epoch": 0.000118524169921875,
      "model_forward_time": 0.11426091194152832,
      "step": 19419
    },
    {
      "epoch": 0.000118524169921875,
      "step": 19419,
      "training_step_time": 0.37931060791015625
    },
    {
      "epoch": 0.0001185302734375,
      "grad_norm": 0.15867561101913452,
      "learning_rate": 8.088431254255899e-05,
      "loss": 0.0477,
      "step": 19420
    },
    {
      "epoch": 0.0001185302734375,
      "model_forward_time": 0.11419034004211426,
      "step": 19420
    },
    {
      "epoch": 0.0001185302734375,
      "step": 19420,
      "training_step_time": 0.3807344436645508
    },
    {
      "epoch": 0.000118536376953125,
      "model_forward_time": 0.11484599113464355,
      "step": 19421
    },
    {
      "epoch": 0.000118536376953125,
      "step": 19421,
      "training_step_time": 0.4372398853302002
    },
    {
      "epoch": 0.00011854248046875,
      "model_forward_time": 0.11482810974121094,
      "step": 19422
    },
    {
      "epoch": 0.00011854248046875,
      "step": 19422,
      "training_step_time": 0.5187785625457764
    },
    {
      "epoch": 0.000118548583984375,
      "model_forward_time": 0.11523222923278809,
      "step": 19423
    },
    {
      "epoch": 0.000118548583984375,
      "step": 19423,
      "training_step_time": 0.42237401008605957
    },
    {
      "epoch": 0.0001185546875,
      "model_forward_time": 0.11522102355957031,
      "step": 19424
    },
    {
      "epoch": 0.0001185546875,
      "step": 19424,
      "training_step_time": 0.3920624256134033
    },
    {
      "epoch": 0.000118560791015625,
      "model_forward_time": 0.11504101753234863,
      "step": 19425
    },
    {
      "epoch": 0.000118560791015625,
      "step": 19425,
      "training_step_time": 0.41138267517089844
    },
    {
      "epoch": 0.00011856689453125,
      "model_forward_time": 0.11470174789428711,
      "step": 19426
    },
    {
      "epoch": 0.00011856689453125,
      "step": 19426,
      "training_step_time": 0.39046287536621094
    },
    {
      "epoch": 0.000118572998046875,
      "model_forward_time": 0.11512517929077148,
      "step": 19427
    },
    {
      "epoch": 0.000118572998046875,
      "step": 19427,
      "training_step_time": 0.3968675136566162
    },
    {
      "epoch": 0.0001185791015625,
      "model_forward_time": 0.11457991600036621,
      "step": 19428
    },
    {
      "epoch": 0.0001185791015625,
      "step": 19428,
      "training_step_time": 0.653634786605835
    },
    {
      "epoch": 0.000118585205078125,
      "model_forward_time": 0.11432576179504395,
      "step": 19429
    },
    {
      "epoch": 0.000118585205078125,
      "step": 19429,
      "training_step_time": 0.39003491401672363
    },
    {
      "epoch": 0.00011859130859375,
      "grad_norm": 0.11018432676792145,
      "learning_rate": 8.086263569629919e-05,
      "loss": 0.0492,
      "step": 19430
    },
    {
      "epoch": 0.00011859130859375,
      "model_forward_time": 0.11497068405151367,
      "step": 19430
    },
    {
      "epoch": 0.00011859130859375,
      "step": 19430,
      "training_step_time": 0.46535468101501465
    },
    {
      "epoch": 0.000118597412109375,
      "model_forward_time": 0.11517572402954102,
      "step": 19431
    },
    {
      "epoch": 0.000118597412109375,
      "step": 19431,
      "training_step_time": 0.49228715896606445
    },
    {
      "epoch": 0.000118603515625,
      "model_forward_time": 0.11576652526855469,
      "step": 19432
    },
    {
      "epoch": 0.000118603515625,
      "step": 19432,
      "training_step_time": 0.4211733341217041
    },
    {
      "epoch": 0.000118609619140625,
      "model_forward_time": 0.11458158493041992,
      "step": 19433
    },
    {
      "epoch": 0.000118609619140625,
      "step": 19433,
      "training_step_time": 0.38768863677978516
    },
    {
      "epoch": 0.00011861572265625,
      "model_forward_time": 0.11511468887329102,
      "step": 19434
    },
    {
      "epoch": 0.00011861572265625,
      "step": 19434,
      "training_step_time": 0.5626893043518066
    },
    {
      "epoch": 0.000118621826171875,
      "model_forward_time": 0.11426639556884766,
      "step": 19435
    },
    {
      "epoch": 0.000118621826171875,
      "step": 19435,
      "training_step_time": 0.46539950370788574
    },
    {
      "epoch": 0.0001186279296875,
      "model_forward_time": 0.11480855941772461,
      "step": 19436
    },
    {
      "epoch": 0.0001186279296875,
      "step": 19436,
      "training_step_time": 0.3875420093536377
    },
    {
      "epoch": 0.000118634033203125,
      "model_forward_time": 0.1144857406616211,
      "step": 19437
    },
    {
      "epoch": 0.000118634033203125,
      "step": 19437,
      "training_step_time": 0.41074490547180176
    },
    {
      "epoch": 0.00011864013671875,
      "model_forward_time": 0.11455345153808594,
      "step": 19438
    },
    {
      "epoch": 0.00011864013671875,
      "step": 19438,
      "training_step_time": 0.38451504707336426
    },
    {
      "epoch": 0.000118646240234375,
      "model_forward_time": 0.11421418190002441,
      "step": 19439
    },
    {
      "epoch": 0.000118646240234375,
      "step": 19439,
      "training_step_time": 0.39304256439208984
    },
    {
      "epoch": 0.00011865234375,
      "grad_norm": 0.1713828295469284,
      "learning_rate": 8.084094947478556e-05,
      "loss": 0.0523,
      "step": 19440
    },
    {
      "epoch": 0.00011865234375,
      "model_forward_time": 0.11531829833984375,
      "step": 19440
    },
    {
      "epoch": 0.00011865234375,
      "step": 19440,
      "training_step_time": 0.43929195404052734
    },
    {
      "epoch": 0.000118658447265625,
      "model_forward_time": 0.11542510986328125,
      "step": 19441
    },
    {
      "epoch": 0.000118658447265625,
      "step": 19441,
      "training_step_time": 0.384552001953125
    },
    {
      "epoch": 0.00011866455078125,
      "model_forward_time": 0.1148068904876709,
      "step": 19442
    },
    {
      "epoch": 0.00011866455078125,
      "step": 19442,
      "training_step_time": 0.39428257942199707
    },
    {
      "epoch": 0.000118670654296875,
      "model_forward_time": 0.11528420448303223,
      "step": 19443
    },
    {
      "epoch": 0.000118670654296875,
      "step": 19443,
      "training_step_time": 0.3907930850982666
    },
    {
      "epoch": 0.0001186767578125,
      "model_forward_time": 0.11537456512451172,
      "step": 19444
    },
    {
      "epoch": 0.0001186767578125,
      "step": 19444,
      "training_step_time": 0.3668360710144043
    },
    {
      "epoch": 0.000118682861328125,
      "model_forward_time": 0.11498618125915527,
      "step": 19445
    },
    {
      "epoch": 0.000118682861328125,
      "step": 19445,
      "training_step_time": 0.4175407886505127
    },
    {
      "epoch": 0.00011868896484375,
      "model_forward_time": 0.11541199684143066,
      "step": 19446
    },
    {
      "epoch": 0.00011868896484375,
      "step": 19446,
      "training_step_time": 0.617497444152832
    },
    {
      "epoch": 0.000118695068359375,
      "model_forward_time": 0.11460161209106445,
      "step": 19447
    },
    {
      "epoch": 0.000118695068359375,
      "step": 19447,
      "training_step_time": 0.3891618251800537
    },
    {
      "epoch": 0.000118701171875,
      "model_forward_time": 0.11550140380859375,
      "step": 19448
    },
    {
      "epoch": 0.000118701171875,
      "step": 19448,
      "training_step_time": 0.40764379501342773
    },
    {
      "epoch": 0.000118707275390625,
      "model_forward_time": 0.11446452140808105,
      "step": 19449
    },
    {
      "epoch": 0.000118707275390625,
      "step": 19449,
      "training_step_time": 0.43294286727905273
    },
    {
      "epoch": 0.00011871337890625,
      "grad_norm": 0.1124848946928978,
      "learning_rate": 8.081925388460578e-05,
      "loss": 0.0489,
      "step": 19450
    },
    {
      "epoch": 0.00011871337890625,
      "model_forward_time": 0.11562919616699219,
      "step": 19450
    },
    {
      "epoch": 0.00011871337890625,
      "step": 19450,
      "training_step_time": 0.3995378017425537
    },
    {
      "epoch": 0.000118719482421875,
      "model_forward_time": 0.11421775817871094,
      "step": 19451
    },
    {
      "epoch": 0.000118719482421875,
      "step": 19451,
      "training_step_time": 0.49838924407958984
    },
    {
      "epoch": 0.0001187255859375,
      "model_forward_time": 0.11540341377258301,
      "step": 19452
    },
    {
      "epoch": 0.0001187255859375,
      "step": 19452,
      "training_step_time": 0.5657353401184082
    },
    {
      "epoch": 0.000118731689453125,
      "model_forward_time": 0.11422395706176758,
      "step": 19453
    },
    {
      "epoch": 0.000118731689453125,
      "step": 19453,
      "training_step_time": 0.38549065589904785
    },
    {
      "epoch": 0.00011873779296875,
      "model_forward_time": 0.11503815650939941,
      "step": 19454
    },
    {
      "epoch": 0.00011873779296875,
      "step": 19454,
      "training_step_time": 0.37897491455078125
    },
    {
      "epoch": 0.000118743896484375,
      "model_forward_time": 0.1141054630279541,
      "step": 19455
    },
    {
      "epoch": 0.000118743896484375,
      "step": 19455,
      "training_step_time": 0.379563570022583
    },
    {
      "epoch": 0.00011875,
      "model_forward_time": 0.11514592170715332,
      "step": 19456
    },
    {
      "epoch": 0.00011875,
      "step": 19456,
      "training_step_time": 0.3781294822692871
    },
    {
      "epoch": 0.000118756103515625,
      "model_forward_time": 0.11432242393493652,
      "step": 19457
    },
    {
      "epoch": 0.000118756103515625,
      "step": 19457,
      "training_step_time": 0.37825465202331543
    },
    {
      "epoch": 0.00011876220703125,
      "model_forward_time": 0.11514830589294434,
      "step": 19458
    },
    {
      "epoch": 0.00011876220703125,
      "step": 19458,
      "training_step_time": 0.6687891483306885
    },
    {
      "epoch": 0.000118768310546875,
      "model_forward_time": 0.11553716659545898,
      "step": 19459
    },
    {
      "epoch": 0.000118768310546875,
      "step": 19459,
      "training_step_time": 0.42134571075439453
    },
    {
      "epoch": 0.0001187744140625,
      "grad_norm": 0.12091437727212906,
      "learning_rate": 8.07975489323504e-05,
      "loss": 0.0453,
      "step": 19460
    },
    {
      "epoch": 0.0001187744140625,
      "model_forward_time": 0.11448049545288086,
      "step": 19460
    },
    {
      "epoch": 0.0001187744140625,
      "step": 19460,
      "training_step_time": 0.48771190643310547
    },
    {
      "epoch": 0.000118780517578125,
      "model_forward_time": 0.11434626579284668,
      "step": 19461
    },
    {
      "epoch": 0.000118780517578125,
      "step": 19461,
      "training_step_time": 0.4167337417602539
    },
    {
      "epoch": 0.00011878662109375,
      "model_forward_time": 0.11480188369750977,
      "step": 19462
    },
    {
      "epoch": 0.00011878662109375,
      "step": 19462,
      "training_step_time": 0.40883421897888184
    },
    {
      "epoch": 0.000118792724609375,
      "model_forward_time": 0.11447644233703613,
      "step": 19463
    },
    {
      "epoch": 0.000118792724609375,
      "step": 19463,
      "training_step_time": 0.45774030685424805
    },
    {
      "epoch": 0.000118798828125,
      "model_forward_time": 0.11466455459594727,
      "step": 19464
    },
    {
      "epoch": 0.000118798828125,
      "step": 19464,
      "training_step_time": 0.5872073173522949
    },
    {
      "epoch": 0.000118804931640625,
      "model_forward_time": 0.11457633972167969,
      "step": 19465
    },
    {
      "epoch": 0.000118804931640625,
      "step": 19465,
      "training_step_time": 0.3830442428588867
    },
    {
      "epoch": 0.00011881103515625,
      "model_forward_time": 0.11455678939819336,
      "step": 19466
    },
    {
      "epoch": 0.00011881103515625,
      "step": 19466,
      "training_step_time": 0.382474422454834
    },
    {
      "epoch": 0.000118817138671875,
      "model_forward_time": 0.11506891250610352,
      "step": 19467
    },
    {
      "epoch": 0.000118817138671875,
      "step": 19467,
      "training_step_time": 0.3785524368286133
    },
    {
      "epoch": 0.0001188232421875,
      "model_forward_time": 0.11476540565490723,
      "step": 19468
    },
    {
      "epoch": 0.0001188232421875,
      "step": 19468,
      "training_step_time": 0.3937802314758301
    },
    {
      "epoch": 0.000118829345703125,
      "model_forward_time": 0.1155860424041748,
      "step": 19469
    },
    {
      "epoch": 0.000118829345703125,
      "step": 19469,
      "training_step_time": 0.3849787712097168
    },
    {
      "epoch": 0.00011883544921875,
      "grad_norm": 0.15773235261440277,
      "learning_rate": 8.077583462461283e-05,
      "loss": 0.0519,
      "step": 19470
    },
    {
      "epoch": 0.00011883544921875,
      "model_forward_time": 0.11630487442016602,
      "step": 19470
    },
    {
      "epoch": 0.00011883544921875,
      "step": 19470,
      "training_step_time": 0.5174863338470459
    },
    {
      "epoch": 0.000118841552734375,
      "model_forward_time": 0.11510682106018066,
      "step": 19471
    },
    {
      "epoch": 0.000118841552734375,
      "step": 19471,
      "training_step_time": 0.388458251953125
    },
    {
      "epoch": 0.00011884765625,
      "model_forward_time": 0.11531496047973633,
      "step": 19472
    },
    {
      "epoch": 0.00011884765625,
      "step": 19472,
      "training_step_time": 0.38391566276550293
    },
    {
      "epoch": 0.000118853759765625,
      "model_forward_time": 0.11549091339111328,
      "step": 19473
    },
    {
      "epoch": 0.000118853759765625,
      "step": 19473,
      "training_step_time": 0.47283244132995605
    },
    {
      "epoch": 0.00011885986328125,
      "model_forward_time": 0.11476469039916992,
      "step": 19474
    },
    {
      "epoch": 0.00011885986328125,
      "step": 19474,
      "training_step_time": 0.44796109199523926
    },
    {
      "epoch": 0.000118865966796875,
      "model_forward_time": 0.11531734466552734,
      "step": 19475
    },
    {
      "epoch": 0.000118865966796875,
      "step": 19475,
      "training_step_time": 0.413865327835083
    },
    {
      "epoch": 0.0001188720703125,
      "model_forward_time": 0.11612629890441895,
      "step": 19476
    },
    {
      "epoch": 0.0001188720703125,
      "step": 19476,
      "training_step_time": 0.4063551425933838
    },
    {
      "epoch": 0.000118878173828125,
      "model_forward_time": 0.11508846282958984,
      "step": 19477
    },
    {
      "epoch": 0.000118878173828125,
      "step": 19477,
      "training_step_time": 0.3942091464996338
    },
    {
      "epoch": 0.00011888427734375,
      "model_forward_time": 0.11496901512145996,
      "step": 19478
    },
    {
      "epoch": 0.00011888427734375,
      "step": 19478,
      "training_step_time": 0.4146232604980469
    },
    {
      "epoch": 0.000118890380859375,
      "model_forward_time": 0.11419057846069336,
      "step": 19479
    },
    {
      "epoch": 0.000118890380859375,
      "step": 19479,
      "training_step_time": 0.4446682929992676
    },
    {
      "epoch": 0.000118896484375,
      "grad_norm": 0.15301862359046936,
      "learning_rate": 8.075411096798928e-05,
      "loss": 0.0555,
      "step": 19480
    },
    {
      "epoch": 0.000118896484375,
      "model_forward_time": 0.11478853225708008,
      "step": 19480
    },
    {
      "epoch": 0.000118896484375,
      "step": 19480,
      "training_step_time": 0.3914783000946045
    },
    {
      "epoch": 0.000118902587890625,
      "model_forward_time": 0.11508059501647949,
      "step": 19481
    },
    {
      "epoch": 0.000118902587890625,
      "step": 19481,
      "training_step_time": 0.3969721794128418
    },
    {
      "epoch": 0.00011890869140625,
      "model_forward_time": 0.11489987373352051,
      "step": 19482
    },
    {
      "epoch": 0.00011890869140625,
      "step": 19482,
      "training_step_time": 0.5971012115478516
    },
    {
      "epoch": 0.000118914794921875,
      "model_forward_time": 0.11464905738830566,
      "step": 19483
    },
    {
      "epoch": 0.000118914794921875,
      "step": 19483,
      "training_step_time": 0.394467830657959
    },
    {
      "epoch": 0.0001189208984375,
      "model_forward_time": 0.11510586738586426,
      "step": 19484
    },
    {
      "epoch": 0.0001189208984375,
      "step": 19484,
      "training_step_time": 0.3849930763244629
    },
    {
      "epoch": 0.000118927001953125,
      "model_forward_time": 0.11480116844177246,
      "step": 19485
    },
    {
      "epoch": 0.000118927001953125,
      "step": 19485,
      "training_step_time": 0.3909797668457031
    },
    {
      "epoch": 0.00011893310546875,
      "model_forward_time": 0.11417770385742188,
      "step": 19486
    },
    {
      "epoch": 0.00011893310546875,
      "step": 19486,
      "training_step_time": 0.4000437259674072
    },
    {
      "epoch": 0.000118939208984375,
      "model_forward_time": 0.11473488807678223,
      "step": 19487
    },
    {
      "epoch": 0.000118939208984375,
      "step": 19487,
      "training_step_time": 0.38062405586242676
    },
    {
      "epoch": 0.0001189453125,
      "model_forward_time": 0.11522436141967773,
      "step": 19488
    },
    {
      "epoch": 0.0001189453125,
      "step": 19488,
      "training_step_time": 0.693270206451416
    },
    {
      "epoch": 0.000118951416015625,
      "model_forward_time": 0.11517810821533203,
      "step": 19489
    },
    {
      "epoch": 0.000118951416015625,
      "step": 19489,
      "training_step_time": 0.402158260345459
    },
    {
      "epoch": 0.00011895751953125,
      "grad_norm": 0.12034787237644196,
      "learning_rate": 8.073237796907882e-05,
      "loss": 0.0577,
      "step": 19490
    },
    {
      "epoch": 0.00011895751953125,
      "model_forward_time": 0.11453485488891602,
      "step": 19490
    },
    {
      "epoch": 0.00011895751953125,
      "step": 19490,
      "training_step_time": 0.4050486087799072
    },
    {
      "epoch": 0.000118963623046875,
      "model_forward_time": 0.11447334289550781,
      "step": 19491
    },
    {
      "epoch": 0.000118963623046875,
      "step": 19491,
      "training_step_time": 0.4217414855957031
    },
    {
      "epoch": 0.0001189697265625,
      "model_forward_time": 0.1141195297241211,
      "step": 19492
    },
    {
      "epoch": 0.0001189697265625,
      "step": 19492,
      "training_step_time": 0.42183780670166016
    },
    {
      "epoch": 0.000118975830078125,
      "model_forward_time": 0.11443233489990234,
      "step": 19493
    },
    {
      "epoch": 0.000118975830078125,
      "step": 19493,
      "training_step_time": 0.3870973587036133
    },
    {
      "epoch": 0.00011898193359375,
      "model_forward_time": 0.1149301528930664,
      "step": 19494
    },
    {
      "epoch": 0.00011898193359375,
      "step": 19494,
      "training_step_time": 0.5120542049407959
    },
    {
      "epoch": 0.000118988037109375,
      "model_forward_time": 0.11468124389648438,
      "step": 19495
    },
    {
      "epoch": 0.000118988037109375,
      "step": 19495,
      "training_step_time": 0.38823819160461426
    },
    {
      "epoch": 0.000118994140625,
      "model_forward_time": 0.11496829986572266,
      "step": 19496
    },
    {
      "epoch": 0.000118994140625,
      "step": 19496,
      "training_step_time": 0.39601850509643555
    },
    {
      "epoch": 0.000119000244140625,
      "model_forward_time": 0.11539530754089355,
      "step": 19497
    },
    {
      "epoch": 0.000119000244140625,
      "step": 19497,
      "training_step_time": 0.39287304878234863
    },
    {
      "epoch": 0.00011900634765625,
      "model_forward_time": 0.11453723907470703,
      "step": 19498
    },
    {
      "epoch": 0.00011900634765625,
      "step": 19498,
      "training_step_time": 0.39879560470581055
    },
    {
      "epoch": 0.000119012451171875,
      "model_forward_time": 0.11489009857177734,
      "step": 19499
    },
    {
      "epoch": 0.000119012451171875,
      "step": 19499,
      "training_step_time": 0.3919565677642822
    },
    {
      "epoch": 0.0001190185546875,
      "grad_norm": 0.1332988440990448,
      "learning_rate": 8.07106356344834e-05,
      "loss": 0.0475,
      "step": 19500
    },
    {
      "epoch": 0.0001190185546875,
      "model_forward_time": 0.11522293090820312,
      "step": 19500
    },
    {
      "epoch": 0.0001190185546875,
      "step": 19500,
      "training_step_time": 0.7224700450897217
    },
    {
      "epoch": 0.000119024658203125,
      "model_forward_time": 0.11469841003417969,
      "step": 19501
    },
    {
      "epoch": 0.000119024658203125,
      "step": 19501,
      "training_step_time": 0.43070435523986816
    },
    {
      "epoch": 0.00011903076171875,
      "model_forward_time": 0.11423063278198242,
      "step": 19502
    },
    {
      "epoch": 0.00011903076171875,
      "step": 19502,
      "training_step_time": 0.4599485397338867
    },
    {
      "epoch": 0.000119036865234375,
      "model_forward_time": 0.11418747901916504,
      "step": 19503
    },
    {
      "epoch": 0.000119036865234375,
      "step": 19503,
      "training_step_time": 0.40230393409729004
    },
    {
      "epoch": 0.00011904296875,
      "model_forward_time": 0.11468815803527832,
      "step": 19504
    },
    {
      "epoch": 0.00011904296875,
      "step": 19504,
      "training_step_time": 0.3905942440032959
    },
    {
      "epoch": 0.000119049072265625,
      "model_forward_time": 0.11418581008911133,
      "step": 19505
    },
    {
      "epoch": 0.000119049072265625,
      "step": 19505,
      "training_step_time": 0.3880012035369873
    },
    {
      "epoch": 0.00011905517578125,
      "model_forward_time": 0.11478996276855469,
      "step": 19506
    },
    {
      "epoch": 0.00011905517578125,
      "step": 19506,
      "training_step_time": 0.42795276641845703
    },
    {
      "epoch": 0.000119061279296875,
      "model_forward_time": 0.11434292793273926,
      "step": 19507
    },
    {
      "epoch": 0.000119061279296875,
      "step": 19507,
      "training_step_time": 0.4783914089202881
    },
    {
      "epoch": 0.0001190673828125,
      "model_forward_time": 0.11502361297607422,
      "step": 19508
    },
    {
      "epoch": 0.0001190673828125,
      "step": 19508,
      "training_step_time": 0.3953135013580322
    },
    {
      "epoch": 0.000119073486328125,
      "model_forward_time": 0.11460638046264648,
      "step": 19509
    },
    {
      "epoch": 0.000119073486328125,
      "step": 19509,
      "training_step_time": 0.3896145820617676
    },
    {
      "epoch": 0.00011907958984375,
      "grad_norm": 0.09561439603567123,
      "learning_rate": 8.068888397080772e-05,
      "loss": 0.0485,
      "step": 19510
    },
    {
      "epoch": 0.00011907958984375,
      "model_forward_time": 0.11554861068725586,
      "step": 19510
    },
    {
      "epoch": 0.00011907958984375,
      "step": 19510,
      "training_step_time": 0.3960423469543457
    },
    {
      "epoch": 0.000119085693359375,
      "model_forward_time": 0.11501741409301758,
      "step": 19511
    },
    {
      "epoch": 0.000119085693359375,
      "step": 19511,
      "training_step_time": 0.3906369209289551
    },
    {
      "epoch": 0.000119091796875,
      "model_forward_time": 0.11560964584350586,
      "step": 19512
    },
    {
      "epoch": 0.000119091796875,
      "step": 19512,
      "training_step_time": 0.5473175048828125
    },
    {
      "epoch": 0.000119097900390625,
      "model_forward_time": 0.11486983299255371,
      "step": 19513
    },
    {
      "epoch": 0.000119097900390625,
      "step": 19513,
      "training_step_time": 0.39809179306030273
    },
    {
      "epoch": 0.00011910400390625,
      "model_forward_time": 0.11454963684082031,
      "step": 19514
    },
    {
      "epoch": 0.00011910400390625,
      "step": 19514,
      "training_step_time": 0.42297792434692383
    },
    {
      "epoch": 0.000119110107421875,
      "model_forward_time": 0.11476516723632812,
      "step": 19515
    },
    {
      "epoch": 0.000119110107421875,
      "step": 19515,
      "training_step_time": 0.36315083503723145
    },
    {
      "epoch": 0.0001191162109375,
      "model_forward_time": 0.1154026985168457,
      "step": 19516
    },
    {
      "epoch": 0.0001191162109375,
      "step": 19516,
      "training_step_time": 0.42411255836486816
    },
    {
      "epoch": 0.000119122314453125,
      "model_forward_time": 0.11457204818725586,
      "step": 19517
    },
    {
      "epoch": 0.000119122314453125,
      "step": 19517,
      "training_step_time": 0.48258042335510254
    },
    {
      "epoch": 0.00011912841796875,
      "model_forward_time": 0.1157693862915039,
      "step": 19518
    },
    {
      "epoch": 0.00011912841796875,
      "step": 19518,
      "training_step_time": 0.7016417980194092
    },
    {
      "epoch": 0.000119134521484375,
      "model_forward_time": 0.11540961265563965,
      "step": 19519
    },
    {
      "epoch": 0.000119134521484375,
      "step": 19519,
      "training_step_time": 0.41338610649108887
    },
    {
      "epoch": 0.000119140625,
      "grad_norm": 0.13894984126091003,
      "learning_rate": 8.06671229846594e-05,
      "loss": 0.0478,
      "step": 19520
    },
    {
      "epoch": 0.000119140625,
      "model_forward_time": 0.11481332778930664,
      "step": 19520
    },
    {
      "epoch": 0.000119140625,
      "step": 19520,
      "training_step_time": 0.46361732482910156
    },
    {
      "epoch": 0.000119146728515625,
      "model_forward_time": 0.11439132690429688,
      "step": 19521
    },
    {
      "epoch": 0.000119146728515625,
      "step": 19521,
      "training_step_time": 0.4765150547027588
    },
    {
      "epoch": 0.00011915283203125,
      "model_forward_time": 0.11364364624023438,
      "step": 19522
    },
    {
      "epoch": 0.00011915283203125,
      "step": 19522,
      "training_step_time": 0.37684106826782227
    },
    {
      "epoch": 0.000119158935546875,
      "model_forward_time": 0.11420130729675293,
      "step": 19523
    },
    {
      "epoch": 0.000119158935546875,
      "step": 19523,
      "training_step_time": 0.39971923828125
    },
    {
      "epoch": 0.0001191650390625,
      "model_forward_time": 0.11485552787780762,
      "step": 19524
    },
    {
      "epoch": 0.0001191650390625,
      "step": 19524,
      "training_step_time": 0.815833330154419
    },
    {
      "epoch": 0.000119171142578125,
      "model_forward_time": 0.11445331573486328,
      "step": 19525
    },
    {
      "epoch": 0.000119171142578125,
      "step": 19525,
      "training_step_time": 0.4404170513153076
    },
    {
      "epoch": 0.00011917724609375,
      "model_forward_time": 0.11445403099060059,
      "step": 19526
    },
    {
      "epoch": 0.00011917724609375,
      "step": 19526,
      "training_step_time": 0.3818695545196533
    },
    {
      "epoch": 0.000119183349609375,
      "model_forward_time": 0.11413455009460449,
      "step": 19527
    },
    {
      "epoch": 0.000119183349609375,
      "step": 19527,
      "training_step_time": 0.3899691104888916
    },
    {
      "epoch": 0.000119189453125,
      "model_forward_time": 0.11421442031860352,
      "step": 19528
    },
    {
      "epoch": 0.000119189453125,
      "step": 19528,
      "training_step_time": 0.38329362869262695
    },
    {
      "epoch": 0.000119195556640625,
      "model_forward_time": 0.1141824722290039,
      "step": 19529
    },
    {
      "epoch": 0.000119195556640625,
      "step": 19529,
      "training_step_time": 0.4614100456237793
    },
    {
      "epoch": 0.00011920166015625,
      "grad_norm": 0.1560257077217102,
      "learning_rate": 8.064535268264883e-05,
      "loss": 0.0485,
      "step": 19530
    },
    {
      "epoch": 0.00011920166015625,
      "model_forward_time": 0.11539125442504883,
      "step": 19530
    },
    {
      "epoch": 0.00011920166015625,
      "step": 19530,
      "training_step_time": 0.7172770500183105
    },
    {
      "epoch": 0.000119207763671875,
      "model_forward_time": 0.11402249336242676,
      "step": 19531
    },
    {
      "epoch": 0.000119207763671875,
      "step": 19531,
      "training_step_time": 0.392376184463501
    },
    {
      "epoch": 0.0001192138671875,
      "model_forward_time": 0.11430096626281738,
      "step": 19532
    },
    {
      "epoch": 0.0001192138671875,
      "step": 19532,
      "training_step_time": 0.40815186500549316
    },
    {
      "epoch": 0.000119219970703125,
      "model_forward_time": 0.11386823654174805,
      "step": 19533
    },
    {
      "epoch": 0.000119219970703125,
      "step": 19533,
      "training_step_time": 0.38316845893859863
    },
    {
      "epoch": 0.00011922607421875,
      "model_forward_time": 0.11429190635681152,
      "step": 19534
    },
    {
      "epoch": 0.00011922607421875,
      "step": 19534,
      "training_step_time": 0.4777836799621582
    },
    {
      "epoch": 0.000119232177734375,
      "model_forward_time": 0.11424660682678223,
      "step": 19535
    },
    {
      "epoch": 0.000119232177734375,
      "step": 19535,
      "training_step_time": 0.4012415409088135
    },
    {
      "epoch": 0.00011923828125,
      "model_forward_time": 0.11518144607543945,
      "step": 19536
    },
    {
      "epoch": 0.00011923828125,
      "step": 19536,
      "training_step_time": 0.8733046054840088
    },
    {
      "epoch": 0.000119244384765625,
      "model_forward_time": 0.11411595344543457,
      "step": 19537
    },
    {
      "epoch": 0.000119244384765625,
      "step": 19537,
      "training_step_time": 0.40944933891296387
    },
    {
      "epoch": 0.00011925048828125,
      "model_forward_time": 0.11365652084350586,
      "step": 19538
    },
    {
      "epoch": 0.00011925048828125,
      "step": 19538,
      "training_step_time": 0.38996267318725586
    },
    {
      "epoch": 0.000119256591796875,
      "model_forward_time": 0.11442112922668457,
      "step": 19539
    },
    {
      "epoch": 0.000119256591796875,
      "step": 19539,
      "training_step_time": 0.3777585029602051
    },
    {
      "epoch": 0.0001192626953125,
      "grad_norm": 0.16399027407169342,
      "learning_rate": 8.062357307138926e-05,
      "loss": 0.0475,
      "step": 19540
    },
    {
      "epoch": 0.0001192626953125,
      "model_forward_time": 0.11447739601135254,
      "step": 19540
    },
    {
      "epoch": 0.0001192626953125,
      "step": 19540,
      "training_step_time": 0.38271069526672363
    },
    {
      "epoch": 0.000119268798828125,
      "model_forward_time": 0.11506772041320801,
      "step": 19541
    },
    {
      "epoch": 0.000119268798828125,
      "step": 19541,
      "training_step_time": 0.40396761894226074
    },
    {
      "epoch": 0.00011927490234375,
      "model_forward_time": 0.11495232582092285,
      "step": 19542
    },
    {
      "epoch": 0.00011927490234375,
      "step": 19542,
      "training_step_time": 0.6217501163482666
    },
    {
      "epoch": 0.000119281005859375,
      "model_forward_time": 0.11511611938476562,
      "step": 19543
    },
    {
      "epoch": 0.000119281005859375,
      "step": 19543,
      "training_step_time": 0.43004894256591797
    },
    {
      "epoch": 0.000119287109375,
      "model_forward_time": 0.11470866203308105,
      "step": 19544
    },
    {
      "epoch": 0.000119287109375,
      "step": 19544,
      "training_step_time": 0.41073060035705566
    },
    {
      "epoch": 0.000119293212890625,
      "model_forward_time": 0.11471390724182129,
      "step": 19545
    },
    {
      "epoch": 0.000119293212890625,
      "step": 19545,
      "training_step_time": 0.38318371772766113
    },
    {
      "epoch": 0.00011929931640625,
      "model_forward_time": 0.11491656303405762,
      "step": 19546
    },
    {
      "epoch": 0.00011929931640625,
      "step": 19546,
      "training_step_time": 0.43642735481262207
    },
    {
      "epoch": 0.000119305419921875,
      "model_forward_time": 0.11581993103027344,
      "step": 19547
    },
    {
      "epoch": 0.000119305419921875,
      "step": 19547,
      "training_step_time": 0.4275531768798828
    },
    {
      "epoch": 0.0001193115234375,
      "model_forward_time": 0.11557888984680176,
      "step": 19548
    },
    {
      "epoch": 0.0001193115234375,
      "step": 19548,
      "training_step_time": 0.8202028274536133
    },
    {
      "epoch": 0.000119317626953125,
      "model_forward_time": 0.1145322322845459,
      "step": 19549
    },
    {
      "epoch": 0.000119317626953125,
      "step": 19549,
      "training_step_time": 0.38378190994262695
    },
    {
      "epoch": 0.00011932373046875,
      "grad_norm": 0.2169187366962433,
      "learning_rate": 8.060178415749674e-05,
      "loss": 0.053,
      "step": 19550
    },
    {
      "epoch": 0.00011932373046875,
      "model_forward_time": 0.11568593978881836,
      "step": 19550
    },
    {
      "epoch": 0.00011932373046875,
      "step": 19550,
      "training_step_time": 0.45921897888183594
    },
    {
      "epoch": 0.000119329833984375,
      "model_forward_time": 0.11546182632446289,
      "step": 19551
    },
    {
      "epoch": 0.000119329833984375,
      "step": 19551,
      "training_step_time": 0.38892221450805664
    },
    {
      "epoch": 0.0001193359375,
      "model_forward_time": 0.11461758613586426,
      "step": 19552
    },
    {
      "epoch": 0.0001193359375,
      "step": 19552,
      "training_step_time": 0.38298606872558594
    },
    {
      "epoch": 0.000119342041015625,
      "model_forward_time": 0.11469364166259766,
      "step": 19553
    },
    {
      "epoch": 0.000119342041015625,
      "step": 19553,
      "training_step_time": 0.37943315505981445
    },
    {
      "epoch": 0.00011934814453125,
      "model_forward_time": 0.11555624008178711,
      "step": 19554
    },
    {
      "epoch": 0.00011934814453125,
      "step": 19554,
      "training_step_time": 0.8896405696868896
    },
    {
      "epoch": 0.000119354248046875,
      "model_forward_time": 0.1148688793182373,
      "step": 19555
    },
    {
      "epoch": 0.000119354248046875,
      "step": 19555,
      "training_step_time": 0.41812705993652344
    },
    {
      "epoch": 0.0001193603515625,
      "model_forward_time": 0.11527776718139648,
      "step": 19556
    },
    {
      "epoch": 0.0001193603515625,
      "step": 19556,
      "training_step_time": 0.42920613288879395
    },
    {
      "epoch": 0.000119366455078125,
      "model_forward_time": 0.11478781700134277,
      "step": 19557
    },
    {
      "epoch": 0.000119366455078125,
      "step": 19557,
      "training_step_time": 0.40404462814331055
    },
    {
      "epoch": 0.00011937255859375,
      "model_forward_time": 0.11412739753723145,
      "step": 19558
    },
    {
      "epoch": 0.00011937255859375,
      "step": 19558,
      "training_step_time": 0.381833553314209
    },
    {
      "epoch": 0.000119378662109375,
      "model_forward_time": 0.1140439510345459,
      "step": 19559
    },
    {
      "epoch": 0.000119378662109375,
      "step": 19559,
      "training_step_time": 0.38991475105285645
    },
    {
      "epoch": 0.000119384765625,
      "grad_norm": 0.2105383425951004,
      "learning_rate": 8.057998594759022e-05,
      "loss": 0.05,
      "step": 19560
    },
    {
      "epoch": 0.000119384765625,
      "model_forward_time": 0.115997314453125,
      "step": 19560
    },
    {
      "epoch": 0.000119384765625,
      "step": 19560,
      "training_step_time": 0.6276540756225586
    },
    {
      "epoch": 0.000119390869140625,
      "model_forward_time": 0.11498522758483887,
      "step": 19561
    },
    {
      "epoch": 0.000119390869140625,
      "step": 19561,
      "training_step_time": 0.3915717601776123
    },
    {
      "epoch": 0.00011939697265625,
      "model_forward_time": 0.11435937881469727,
      "step": 19562
    },
    {
      "epoch": 0.00011939697265625,
      "step": 19562,
      "training_step_time": 0.3844263553619385
    },
    {
      "epoch": 0.000119403076171875,
      "model_forward_time": 0.11496758460998535,
      "step": 19563
    },
    {
      "epoch": 0.000119403076171875,
      "step": 19563,
      "training_step_time": 0.39558839797973633
    },
    {
      "epoch": 0.0001194091796875,
      "model_forward_time": 0.114837646484375,
      "step": 19564
    },
    {
      "epoch": 0.0001194091796875,
      "step": 19564,
      "training_step_time": 0.397463321685791
    },
    {
      "epoch": 0.000119415283203125,
      "model_forward_time": 0.11431145668029785,
      "step": 19565
    },
    {
      "epoch": 0.000119415283203125,
      "step": 19565,
      "training_step_time": 0.39119863510131836
    },
    {
      "epoch": 0.00011942138671875,
      "model_forward_time": 0.11534810066223145,
      "step": 19566
    },
    {
      "epoch": 0.00011942138671875,
      "step": 19566,
      "training_step_time": 0.7647733688354492
    },
    {
      "epoch": 0.000119427490234375,
      "model_forward_time": 0.1142892837524414,
      "step": 19567
    },
    {
      "epoch": 0.000119427490234375,
      "step": 19567,
      "training_step_time": 0.44257378578186035
    },
    {
      "epoch": 0.00011943359375,
      "model_forward_time": 0.11493802070617676,
      "step": 19568
    },
    {
      "epoch": 0.00011943359375,
      "step": 19568,
      "training_step_time": 0.46328043937683105
    },
    {
      "epoch": 0.000119439697265625,
      "model_forward_time": 0.11461925506591797,
      "step": 19569
    },
    {
      "epoch": 0.000119439697265625,
      "step": 19569,
      "training_step_time": 0.43511056900024414
    },
    {
      "epoch": 0.00011944580078125,
      "grad_norm": 0.20396696031093597,
      "learning_rate": 8.055817844829136e-05,
      "loss": 0.0552,
      "step": 19570
    },
    {
      "epoch": 0.00011944580078125,
      "model_forward_time": 0.11426424980163574,
      "step": 19570
    },
    {
      "epoch": 0.00011944580078125,
      "step": 19570,
      "training_step_time": 0.46913743019104004
    },
    {
      "epoch": 0.000119451904296875,
      "model_forward_time": 0.11444306373596191,
      "step": 19571
    },
    {
      "epoch": 0.000119451904296875,
      "step": 19571,
      "training_step_time": 0.38382744789123535
    },
    {
      "epoch": 0.0001194580078125,
      "model_forward_time": 0.11463665962219238,
      "step": 19572
    },
    {
      "epoch": 0.0001194580078125,
      "step": 19572,
      "training_step_time": 0.6323368549346924
    },
    {
      "epoch": 0.000119464111328125,
      "model_forward_time": 0.11464166641235352,
      "step": 19573
    },
    {
      "epoch": 0.000119464111328125,
      "step": 19573,
      "training_step_time": 0.43709254264831543
    },
    {
      "epoch": 0.00011947021484375,
      "model_forward_time": 0.11479735374450684,
      "step": 19574
    },
    {
      "epoch": 0.00011947021484375,
      "step": 19574,
      "training_step_time": 0.41256046295166016
    },
    {
      "epoch": 0.000119476318359375,
      "model_forward_time": 0.11472082138061523,
      "step": 19575
    },
    {
      "epoch": 0.000119476318359375,
      "step": 19575,
      "training_step_time": 0.38687729835510254
    },
    {
      "epoch": 0.000119482421875,
      "model_forward_time": 0.1150813102722168,
      "step": 19576
    },
    {
      "epoch": 0.000119482421875,
      "step": 19576,
      "training_step_time": 0.3865628242492676
    },
    {
      "epoch": 0.000119488525390625,
      "model_forward_time": 0.11436104774475098,
      "step": 19577
    },
    {
      "epoch": 0.000119488525390625,
      "step": 19577,
      "training_step_time": 0.38535499572753906
    },
    {
      "epoch": 0.00011949462890625,
      "model_forward_time": 0.11536526679992676,
      "step": 19578
    },
    {
      "epoch": 0.00011949462890625,
      "step": 19578,
      "training_step_time": 0.5524330139160156
    },
    {
      "epoch": 0.000119500732421875,
      "model_forward_time": 0.11493587493896484,
      "step": 19579
    },
    {
      "epoch": 0.000119500732421875,
      "step": 19579,
      "training_step_time": 0.39458751678466797
    },
    {
      "epoch": 0.0001195068359375,
      "grad_norm": 0.15907011926174164,
      "learning_rate": 8.053636166622476e-05,
      "loss": 0.0514,
      "step": 19580
    },
    {
      "epoch": 0.0001195068359375,
      "model_forward_time": 0.11524510383605957,
      "step": 19580
    },
    {
      "epoch": 0.0001195068359375,
      "step": 19580,
      "training_step_time": 0.3895730972290039
    },
    {
      "epoch": 0.000119512939453125,
      "model_forward_time": 0.11491036415100098,
      "step": 19581
    },
    {
      "epoch": 0.000119512939453125,
      "step": 19581,
      "training_step_time": 0.452913761138916
    },
    {
      "epoch": 0.00011951904296875,
      "model_forward_time": 0.11496210098266602,
      "step": 19582
    },
    {
      "epoch": 0.00011951904296875,
      "step": 19582,
      "training_step_time": 0.39646077156066895
    },
    {
      "epoch": 0.000119525146484375,
      "model_forward_time": 0.11526656150817871,
      "step": 19583
    },
    {
      "epoch": 0.000119525146484375,
      "step": 19583,
      "training_step_time": 0.4701721668243408
    },
    {
      "epoch": 0.00011953125,
      "model_forward_time": 0.11525726318359375,
      "step": 19584
    },
    {
      "epoch": 0.00011953125,
      "step": 19584,
      "training_step_time": 0.7654786109924316
    },
    {
      "epoch": 0.000119537353515625,
      "model_forward_time": 0.11426186561584473,
      "step": 19585
    },
    {
      "epoch": 0.000119537353515625,
      "step": 19585,
      "training_step_time": 0.38991212844848633
    },
    {
      "epoch": 0.00011954345703125,
      "model_forward_time": 0.11471915245056152,
      "step": 19586
    },
    {
      "epoch": 0.00011954345703125,
      "step": 19586,
      "training_step_time": 0.40382957458496094
    },
    {
      "epoch": 0.000119549560546875,
      "model_forward_time": 0.11475110054016113,
      "step": 19587
    },
    {
      "epoch": 0.000119549560546875,
      "step": 19587,
      "training_step_time": 0.5188431739807129
    },
    {
      "epoch": 0.0001195556640625,
      "model_forward_time": 0.11408424377441406,
      "step": 19588
    },
    {
      "epoch": 0.0001195556640625,
      "step": 19588,
      "training_step_time": 0.3965740203857422
    },
    {
      "epoch": 0.000119561767578125,
      "model_forward_time": 0.1153860092163086,
      "step": 19589
    },
    {
      "epoch": 0.000119561767578125,
      "step": 19589,
      "training_step_time": 0.3941078186035156
    },
    {
      "epoch": 0.00011956787109375,
      "grad_norm": 0.14805062115192413,
      "learning_rate": 8.051453560801772e-05,
      "loss": 0.0453,
      "step": 19590
    },
    {
      "epoch": 0.00011956787109375,
      "model_forward_time": 0.11448287963867188,
      "step": 19590
    },
    {
      "epoch": 0.00011956787109375,
      "step": 19590,
      "training_step_time": 0.7032783031463623
    },
    {
      "epoch": 0.000119573974609375,
      "model_forward_time": 0.1143496036529541,
      "step": 19591
    },
    {
      "epoch": 0.000119573974609375,
      "step": 19591,
      "training_step_time": 0.3787870407104492
    },
    {
      "epoch": 0.000119580078125,
      "model_forward_time": 0.11467766761779785,
      "step": 19592
    },
    {
      "epoch": 0.000119580078125,
      "step": 19592,
      "training_step_time": 0.4051828384399414
    },
    {
      "epoch": 0.000119586181640625,
      "model_forward_time": 0.1145639419555664,
      "step": 19593
    },
    {
      "epoch": 0.000119586181640625,
      "step": 19593,
      "training_step_time": 0.39966416358947754
    },
    {
      "epoch": 0.00011959228515625,
      "model_forward_time": 0.1149592399597168,
      "step": 19594
    },
    {
      "epoch": 0.00011959228515625,
      "step": 19594,
      "training_step_time": 0.4375491142272949
    },
    {
      "epoch": 0.000119598388671875,
      "model_forward_time": 0.11502957344055176,
      "step": 19595
    },
    {
      "epoch": 0.000119598388671875,
      "step": 19595,
      "training_step_time": 0.4573497772216797
    },
    {
      "epoch": 0.0001196044921875,
      "model_forward_time": 0.1151118278503418,
      "step": 19596
    },
    {
      "epoch": 0.0001196044921875,
      "step": 19596,
      "training_step_time": 0.47695207595825195
    },
    {
      "epoch": 0.000119610595703125,
      "model_forward_time": 0.11479663848876953,
      "step": 19597
    },
    {
      "epoch": 0.000119610595703125,
      "step": 19597,
      "training_step_time": 0.48918914794921875
    },
    {
      "epoch": 0.00011961669921875,
      "model_forward_time": 0.11474967002868652,
      "step": 19598
    },
    {
      "epoch": 0.00011961669921875,
      "step": 19598,
      "training_step_time": 0.3844301700592041
    },
    {
      "epoch": 0.000119622802734375,
      "model_forward_time": 0.11458230018615723,
      "step": 19599
    },
    {
      "epoch": 0.000119622802734375,
      "step": 19599,
      "training_step_time": 0.4106016159057617
    },
    {
      "epoch": 0.00011962890625,
      "grad_norm": 0.13554714620113373,
      "learning_rate": 8.049270028030046e-05,
      "loss": 0.0459,
      "step": 19600
    },
    {
      "epoch": 0.00011962890625,
      "model_forward_time": 0.11425423622131348,
      "step": 19600
    },
    {
      "epoch": 0.00011962890625,
      "step": 19600,
      "training_step_time": 0.4361400604248047
    },
    {
      "epoch": 0.000119635009765625,
      "model_forward_time": 0.11526751518249512,
      "step": 19601
    },
    {
      "epoch": 0.000119635009765625,
      "step": 19601,
      "training_step_time": 0.39600205421447754
    },
    {
      "epoch": 0.00011964111328125,
      "model_forward_time": 0.11506247520446777,
      "step": 19602
    },
    {
      "epoch": 0.00011964111328125,
      "step": 19602,
      "training_step_time": 0.39606809616088867
    },
    {
      "epoch": 0.000119647216796875,
      "model_forward_time": 0.11526298522949219,
      "step": 19603
    },
    {
      "epoch": 0.000119647216796875,
      "step": 19603,
      "training_step_time": 0.3851609230041504
    },
    {
      "epoch": 0.0001196533203125,
      "model_forward_time": 0.11518144607543945,
      "step": 19604
    },
    {
      "epoch": 0.0001196533203125,
      "step": 19604,
      "training_step_time": 0.38655900955200195
    },
    {
      "epoch": 0.000119659423828125,
      "model_forward_time": 0.11574029922485352,
      "step": 19605
    },
    {
      "epoch": 0.000119659423828125,
      "step": 19605,
      "training_step_time": 0.3937067985534668
    },
    {
      "epoch": 0.00011966552734375,
      "model_forward_time": 0.1153712272644043,
      "step": 19606
    },
    {
      "epoch": 0.00011966552734375,
      "step": 19606,
      "training_step_time": 0.384519100189209
    },
    {
      "epoch": 0.000119671630859375,
      "model_forward_time": 0.11519527435302734,
      "step": 19607
    },
    {
      "epoch": 0.000119671630859375,
      "step": 19607,
      "training_step_time": 0.3947775363922119
    },
    {
      "epoch": 0.000119677734375,
      "model_forward_time": 0.11555218696594238,
      "step": 19608
    },
    {
      "epoch": 0.000119677734375,
      "step": 19608,
      "training_step_time": 1.071930170059204
    },
    {
      "epoch": 0.000119683837890625,
      "model_forward_time": 0.11528801918029785,
      "step": 19609
    },
    {
      "epoch": 0.000119683837890625,
      "step": 19609,
      "training_step_time": 0.43411993980407715
    },
    {
      "epoch": 0.00011968994140625,
      "grad_norm": 0.18672092258930206,
      "learning_rate": 8.047085568970598e-05,
      "loss": 0.0435,
      "step": 19610
    },
    {
      "epoch": 0.00011968994140625,
      "model_forward_time": 0.11463260650634766,
      "step": 19610
    },
    {
      "epoch": 0.00011968994140625,
      "step": 19610,
      "training_step_time": 0.4683091640472412
    },
    {
      "epoch": 0.000119696044921875,
      "model_forward_time": 0.1143648624420166,
      "step": 19611
    },
    {
      "epoch": 0.000119696044921875,
      "step": 19611,
      "training_step_time": 0.4567580223083496
    },
    {
      "epoch": 0.0001197021484375,
      "model_forward_time": 0.11369156837463379,
      "step": 19612
    },
    {
      "epoch": 0.0001197021484375,
      "step": 19612,
      "training_step_time": 0.39337611198425293
    },
    {
      "epoch": 0.000119708251953125,
      "model_forward_time": 0.11443972587585449,
      "step": 19613
    },
    {
      "epoch": 0.000119708251953125,
      "step": 19613,
      "training_step_time": 0.39441943168640137
    },
    {
      "epoch": 0.00011971435546875,
      "model_forward_time": 0.11490368843078613,
      "step": 19614
    },
    {
      "epoch": 0.00011971435546875,
      "step": 19614,
      "training_step_time": 0.396604061126709
    },
    {
      "epoch": 0.000119720458984375,
      "model_forward_time": 0.1151881217956543,
      "step": 19615
    },
    {
      "epoch": 0.000119720458984375,
      "step": 19615,
      "training_step_time": 0.4687361717224121
    },
    {
      "epoch": 0.0001197265625,
      "model_forward_time": 0.11544299125671387,
      "step": 19616
    },
    {
      "epoch": 0.0001197265625,
      "step": 19616,
      "training_step_time": 0.3811788558959961
    },
    {
      "epoch": 0.000119732666015625,
      "model_forward_time": 0.11481595039367676,
      "step": 19617
    },
    {
      "epoch": 0.000119732666015625,
      "step": 19617,
      "training_step_time": 0.3885955810546875
    },
    {
      "epoch": 0.00011973876953125,
      "model_forward_time": 0.11530184745788574,
      "step": 19618
    },
    {
      "epoch": 0.00011973876953125,
      "step": 19618,
      "training_step_time": 0.4031856060028076
    },
    {
      "epoch": 0.000119744873046875,
      "model_forward_time": 0.1156163215637207,
      "step": 19619
    },
    {
      "epoch": 0.000119744873046875,
      "step": 19619,
      "training_step_time": 0.38907289505004883
    },
    {
      "epoch": 0.0001197509765625,
      "grad_norm": 0.14184428751468658,
      "learning_rate": 8.044900184287007e-05,
      "loss": 0.049,
      "step": 19620
    },
    {
      "epoch": 0.0001197509765625,
      "model_forward_time": 0.11502671241760254,
      "step": 19620
    },
    {
      "epoch": 0.0001197509765625,
      "step": 19620,
      "training_step_time": 0.9477658271789551
    },
    {
      "epoch": 0.000119757080078125,
      "model_forward_time": 0.11460018157958984,
      "step": 19621
    },
    {
      "epoch": 0.000119757080078125,
      "step": 19621,
      "training_step_time": 0.38426828384399414
    },
    {
      "epoch": 0.00011976318359375,
      "model_forward_time": 0.1142878532409668,
      "step": 19622
    },
    {
      "epoch": 0.00011976318359375,
      "step": 19622,
      "training_step_time": 0.394392728805542
    },
    {
      "epoch": 0.000119769287109375,
      "model_forward_time": 0.11518406867980957,
      "step": 19623
    },
    {
      "epoch": 0.000119769287109375,
      "step": 19623,
      "training_step_time": 0.39476490020751953
    },
    {
      "epoch": 0.000119775390625,
      "model_forward_time": 0.11409425735473633,
      "step": 19624
    },
    {
      "epoch": 0.000119775390625,
      "step": 19624,
      "training_step_time": 0.4322805404663086
    },
    {
      "epoch": 0.000119781494140625,
      "model_forward_time": 0.11486148834228516,
      "step": 19625
    },
    {
      "epoch": 0.000119781494140625,
      "step": 19625,
      "training_step_time": 0.40480852127075195
    },
    {
      "epoch": 0.00011978759765625,
      "model_forward_time": 0.1152963638305664,
      "step": 19626
    },
    {
      "epoch": 0.00011978759765625,
      "step": 19626,
      "training_step_time": 0.6126823425292969
    },
    {
      "epoch": 0.000119793701171875,
      "model_forward_time": 0.11465191841125488,
      "step": 19627
    },
    {
      "epoch": 0.000119793701171875,
      "step": 19627,
      "training_step_time": 0.40901803970336914
    },
    {
      "epoch": 0.0001197998046875,
      "model_forward_time": 0.11480832099914551,
      "step": 19628
    },
    {
      "epoch": 0.0001197998046875,
      "step": 19628,
      "training_step_time": 0.4402179718017578
    },
    {
      "epoch": 0.000119805908203125,
      "model_forward_time": 0.11427426338195801,
      "step": 19629
    },
    {
      "epoch": 0.000119805908203125,
      "step": 19629,
      "training_step_time": 0.3986942768096924
    },
    {
      "epoch": 0.00011981201171875,
      "grad_norm": 0.09169413149356842,
      "learning_rate": 8.042713874643136e-05,
      "loss": 0.0473,
      "step": 19630
    },
    {
      "epoch": 0.00011981201171875,
      "model_forward_time": 0.11436915397644043,
      "step": 19630
    },
    {
      "epoch": 0.00011981201171875,
      "step": 19630,
      "training_step_time": 0.38943052291870117
    },
    {
      "epoch": 0.000119818115234375,
      "model_forward_time": 0.11492538452148438,
      "step": 19631
    },
    {
      "epoch": 0.000119818115234375,
      "step": 19631,
      "training_step_time": 0.4086313247680664
    },
    {
      "epoch": 0.00011982421875,
      "model_forward_time": 0.1150670051574707,
      "step": 19632
    },
    {
      "epoch": 0.00011982421875,
      "step": 19632,
      "training_step_time": 0.5963642597198486
    },
    {
      "epoch": 0.000119830322265625,
      "model_forward_time": 0.11499309539794922,
      "step": 19633
    },
    {
      "epoch": 0.000119830322265625,
      "step": 19633,
      "training_step_time": 0.38515758514404297
    },
    {
      "epoch": 0.00011983642578125,
      "model_forward_time": 0.11471414566040039,
      "step": 19634
    },
    {
      "epoch": 0.00011983642578125,
      "step": 19634,
      "training_step_time": 0.42210841178894043
    },
    {
      "epoch": 0.000119842529296875,
      "model_forward_time": 0.11525130271911621,
      "step": 19635
    },
    {
      "epoch": 0.000119842529296875,
      "step": 19635,
      "training_step_time": 0.47669219970703125
    },
    {
      "epoch": 0.0001198486328125,
      "model_forward_time": 0.11481595039367676,
      "step": 19636
    },
    {
      "epoch": 0.0001198486328125,
      "step": 19636,
      "training_step_time": 0.3640279769897461
    },
    {
      "epoch": 0.000119854736328125,
      "model_forward_time": 0.11459541320800781,
      "step": 19637
    },
    {
      "epoch": 0.000119854736328125,
      "step": 19637,
      "training_step_time": 0.4392271041870117
    },
    {
      "epoch": 0.00011986083984375,
      "model_forward_time": 0.11476898193359375,
      "step": 19638
    },
    {
      "epoch": 0.00011986083984375,
      "step": 19638,
      "training_step_time": 0.4254896640777588
    },
    {
      "epoch": 0.000119866943359375,
      "model_forward_time": 0.11566758155822754,
      "step": 19639
    },
    {
      "epoch": 0.000119866943359375,
      "step": 19639,
      "training_step_time": 0.38164854049682617
    },
    {
      "epoch": 0.000119873046875,
      "grad_norm": 0.14268282055854797,
      "learning_rate": 8.040526640703128e-05,
      "loss": 0.0457,
      "step": 19640
    },
    {
      "epoch": 0.000119873046875,
      "model_forward_time": 0.11492037773132324,
      "step": 19640
    },
    {
      "epoch": 0.000119873046875,
      "step": 19640,
      "training_step_time": 0.37953925132751465
    },
    {
      "epoch": 0.000119879150390625,
      "model_forward_time": 0.11502718925476074,
      "step": 19641
    },
    {
      "epoch": 0.000119879150390625,
      "step": 19641,
      "training_step_time": 0.4394795894622803
    },
    {
      "epoch": 0.00011988525390625,
      "model_forward_time": 0.11500191688537598,
      "step": 19642
    },
    {
      "epoch": 0.00011988525390625,
      "step": 19642,
      "training_step_time": 0.4102354049682617
    },
    {
      "epoch": 0.000119891357421875,
      "model_forward_time": 0.11463403701782227,
      "step": 19643
    },
    {
      "epoch": 0.000119891357421875,
      "step": 19643,
      "training_step_time": 0.42586493492126465
    },
    {
      "epoch": 0.0001198974609375,
      "model_forward_time": 0.11492609977722168,
      "step": 19644
    },
    {
      "epoch": 0.0001198974609375,
      "step": 19644,
      "training_step_time": 0.4065682888031006
    },
    {
      "epoch": 0.000119903564453125,
      "model_forward_time": 0.11571598052978516,
      "step": 19645
    },
    {
      "epoch": 0.000119903564453125,
      "step": 19645,
      "training_step_time": 0.38475871086120605
    },
    {
      "epoch": 0.00011990966796875,
      "model_forward_time": 0.11556768417358398,
      "step": 19646
    },
    {
      "epoch": 0.00011990966796875,
      "step": 19646,
      "training_step_time": 0.3884146213531494
    },
    {
      "epoch": 0.000119915771484375,
      "model_forward_time": 0.11517500877380371,
      "step": 19647
    },
    {
      "epoch": 0.000119915771484375,
      "step": 19647,
      "training_step_time": 0.38585662841796875
    },
    {
      "epoch": 0.000119921875,
      "model_forward_time": 0.1156609058380127,
      "step": 19648
    },
    {
      "epoch": 0.000119921875,
      "step": 19648,
      "training_step_time": 0.3849949836730957
    },
    {
      "epoch": 0.000119927978515625,
      "model_forward_time": 0.1153116226196289,
      "step": 19649
    },
    {
      "epoch": 0.000119927978515625,
      "step": 19649,
      "training_step_time": 0.3995027542114258
    },
    {
      "epoch": 0.00011993408203125,
      "grad_norm": 0.16322316229343414,
      "learning_rate": 8.038338483131407e-05,
      "loss": 0.0447,
      "step": 19650
    },
    {
      "epoch": 0.00011993408203125,
      "model_forward_time": 0.1155252456665039,
      "step": 19650
    },
    {
      "epoch": 0.00011993408203125,
      "step": 19650,
      "training_step_time": 0.6004531383514404
    },
    {
      "epoch": 0.000119940185546875,
      "model_forward_time": 0.11536788940429688,
      "step": 19651
    },
    {
      "epoch": 0.000119940185546875,
      "step": 19651,
      "training_step_time": 0.4915142059326172
    },
    {
      "epoch": 0.0001199462890625,
      "model_forward_time": 0.1151726245880127,
      "step": 19652
    },
    {
      "epoch": 0.0001199462890625,
      "step": 19652,
      "training_step_time": 0.4955124855041504
    },
    {
      "epoch": 0.000119952392578125,
      "model_forward_time": 0.11465573310852051,
      "step": 19653
    },
    {
      "epoch": 0.000119952392578125,
      "step": 19653,
      "training_step_time": 0.3895149230957031
    },
    {
      "epoch": 0.00011995849609375,
      "model_forward_time": 0.11552047729492188,
      "step": 19654
    },
    {
      "epoch": 0.00011995849609375,
      "step": 19654,
      "training_step_time": 0.3836841583251953
    },
    {
      "epoch": 0.000119964599609375,
      "model_forward_time": 0.11510825157165527,
      "step": 19655
    },
    {
      "epoch": 0.000119964599609375,
      "step": 19655,
      "training_step_time": 0.44463467597961426
    },
    {
      "epoch": 0.000119970703125,
      "model_forward_time": 0.11546897888183594,
      "step": 19656
    },
    {
      "epoch": 0.000119970703125,
      "step": 19656,
      "training_step_time": 0.596315860748291
    },
    {
      "epoch": 0.000119976806640625,
      "model_forward_time": 0.11478447914123535,
      "step": 19657
    },
    {
      "epoch": 0.000119976806640625,
      "step": 19657,
      "training_step_time": 0.4838137626647949
    },
    {
      "epoch": 0.00011998291015625,
      "model_forward_time": 0.11475801467895508,
      "step": 19658
    },
    {
      "epoch": 0.00011998291015625,
      "step": 19658,
      "training_step_time": 0.3826639652252197
    },
    {
      "epoch": 0.000119989013671875,
      "model_forward_time": 0.11463451385498047,
      "step": 19659
    },
    {
      "epoch": 0.000119989013671875,
      "step": 19659,
      "training_step_time": 0.380734920501709
    },
    {
      "epoch": 0.0001199951171875,
      "grad_norm": 0.1513390988111496,
      "learning_rate": 8.036149402592676e-05,
      "loss": 0.0557,
      "step": 19660
    },
    {
      "epoch": 0.0001199951171875,
      "model_forward_time": 0.11425304412841797,
      "step": 19660
    },
    {
      "epoch": 0.0001199951171875,
      "step": 19660,
      "training_step_time": 0.38219547271728516
    },
    {
      "epoch": 0.000120001220703125,
      "model_forward_time": 0.1140894889831543,
      "step": 19661
    },
    {
      "epoch": 0.000120001220703125,
      "step": 19661,
      "training_step_time": 0.3962132930755615
    },
    {
      "epoch": 0.00012000732421875,
      "model_forward_time": 0.11561083793640137,
      "step": 19662
    },
    {
      "epoch": 0.00012000732421875,
      "step": 19662,
      "training_step_time": 0.6206321716308594
    },
    {
      "epoch": 0.000120013427734375,
      "model_forward_time": 0.11484599113464355,
      "step": 19663
    },
    {
      "epoch": 0.000120013427734375,
      "step": 19663,
      "training_step_time": 0.48619771003723145
    },
    {
      "epoch": 0.00012001953125,
      "model_forward_time": 0.11455273628234863,
      "step": 19664
    },
    {
      "epoch": 0.00012001953125,
      "step": 19664,
      "training_step_time": 0.40721702575683594
    },
    {
      "epoch": 0.000120025634765625,
      "model_forward_time": 0.11531233787536621,
      "step": 19665
    },
    {
      "epoch": 0.000120025634765625,
      "step": 19665,
      "training_step_time": 0.4607832431793213
    },
    {
      "epoch": 0.00012003173828125,
      "model_forward_time": 0.11482071876525879,
      "step": 19666
    },
    {
      "epoch": 0.00012003173828125,
      "step": 19666,
      "training_step_time": 0.48282742500305176
    },
    {
      "epoch": 0.000120037841796875,
      "model_forward_time": 0.11449623107910156,
      "step": 19667
    },
    {
      "epoch": 0.000120037841796875,
      "step": 19667,
      "training_step_time": 0.3940877914428711
    },
    {
      "epoch": 0.0001200439453125,
      "model_forward_time": 0.11512589454650879,
      "step": 19668
    },
    {
      "epoch": 0.0001200439453125,
      "step": 19668,
      "training_step_time": 0.3922407627105713
    },
    {
      "epoch": 0.000120050048828125,
      "model_forward_time": 0.11522936820983887,
      "step": 19669
    },
    {
      "epoch": 0.000120050048828125,
      "step": 19669,
      "training_step_time": 0.40607595443725586
    },
    {
      "epoch": 0.00012005615234375,
      "grad_norm": 0.18316015601158142,
      "learning_rate": 8.033959399751924e-05,
      "loss": 0.0539,
      "step": 19670
    },
    {
      "epoch": 0.00012005615234375,
      "model_forward_time": 0.11569428443908691,
      "step": 19670
    },
    {
      "epoch": 0.00012005615234375,
      "step": 19670,
      "training_step_time": 0.39609193801879883
    },
    {
      "epoch": 0.000120062255859375,
      "model_forward_time": 0.11531567573547363,
      "step": 19671
    },
    {
      "epoch": 0.000120062255859375,
      "step": 19671,
      "training_step_time": 0.4596073627471924
    },
    {
      "epoch": 0.000120068359375,
      "model_forward_time": 0.11593413352966309,
      "step": 19672
    },
    {
      "epoch": 0.000120068359375,
      "step": 19672,
      "training_step_time": 0.38245415687561035
    },
    {
      "epoch": 0.000120074462890625,
      "model_forward_time": 0.11632227897644043,
      "step": 19673
    },
    {
      "epoch": 0.000120074462890625,
      "step": 19673,
      "training_step_time": 0.39156198501586914
    },
    {
      "epoch": 0.00012008056640625,
      "model_forward_time": 0.11492204666137695,
      "step": 19674
    },
    {
      "epoch": 0.00012008056640625,
      "step": 19674,
      "training_step_time": 0.7742061614990234
    },
    {
      "epoch": 0.000120086669921875,
      "model_forward_time": 0.11602401733398438,
      "step": 19675
    },
    {
      "epoch": 0.000120086669921875,
      "step": 19675,
      "training_step_time": 0.4087700843811035
    },
    {
      "epoch": 0.0001200927734375,
      "model_forward_time": 0.11467218399047852,
      "step": 19676
    },
    {
      "epoch": 0.0001200927734375,
      "step": 19676,
      "training_step_time": 0.41683006286621094
    },
    {
      "epoch": 0.000120098876953125,
      "model_forward_time": 0.11852169036865234,
      "step": 19677
    },
    {
      "epoch": 0.000120098876953125,
      "step": 19677,
      "training_step_time": 0.567920446395874
    },
    {
      "epoch": 0.00012010498046875,
      "model_forward_time": 0.11931872367858887,
      "step": 19678
    },
    {
      "epoch": 0.00012010498046875,
      "step": 19678,
      "training_step_time": 0.5554232597351074
    },
    {
      "epoch": 0.000120111083984375,
      "model_forward_time": 0.12081003189086914,
      "step": 19679
    },
    {
      "epoch": 0.000120111083984375,
      "step": 19679,
      "training_step_time": 0.7050478458404541
    },
    {
      "epoch": 0.0001201171875,
      "grad_norm": 0.17660638689994812,
      "learning_rate": 8.031768475274413e-05,
      "loss": 0.0466,
      "step": 19680
    },
    {
      "epoch": 0.0001201171875,
      "model_forward_time": 0.12106108665466309,
      "step": 19680
    },
    {
      "epoch": 0.0001201171875,
      "step": 19680,
      "training_step_time": 0.7536931037902832
    },
    {
      "epoch": 0.000120123291015625,
      "model_forward_time": 0.12220335006713867,
      "step": 19681
    },
    {
      "epoch": 0.000120123291015625,
      "step": 19681,
      "training_step_time": 0.6616578102111816
    },
    {
      "epoch": 0.00012012939453125,
      "model_forward_time": 0.126617431640625,
      "step": 19682
    },
    {
      "epoch": 0.00012012939453125,
      "step": 19682,
      "training_step_time": 0.7213008403778076
    },
    {
      "epoch": 0.000120135498046875,
      "model_forward_time": 0.12386393547058105,
      "step": 19683
    },
    {
      "epoch": 0.000120135498046875,
      "step": 19683,
      "training_step_time": 0.6827008724212646
    },
    {
      "epoch": 0.0001201416015625,
      "model_forward_time": 0.11619353294372559,
      "step": 19684
    },
    {
      "epoch": 0.0001201416015625,
      "step": 19684,
      "training_step_time": 0.6992068290710449
    },
    {
      "epoch": 0.000120147705078125,
      "model_forward_time": 0.12632131576538086,
      "step": 19685
    },
    {
      "epoch": 0.000120147705078125,
      "step": 19685,
      "training_step_time": 0.7691419124603271
    },
    {
      "epoch": 0.00012015380859375,
      "model_forward_time": 0.11784887313842773,
      "step": 19686
    },
    {
      "epoch": 0.00012015380859375,
      "step": 19686,
      "training_step_time": 0.6697158813476562
    },
    {
      "epoch": 0.000120159912109375,
      "model_forward_time": 0.11592555046081543,
      "step": 19687
    },
    {
      "epoch": 0.000120159912109375,
      "step": 19687,
      "training_step_time": 0.6531403064727783
    },
    {
      "epoch": 0.000120166015625,
      "model_forward_time": 0.11887621879577637,
      "step": 19688
    },
    {
      "epoch": 0.000120166015625,
      "step": 19688,
      "training_step_time": 0.6851346492767334
    },
    {
      "epoch": 0.000120172119140625,
      "model_forward_time": 0.1186678409576416,
      "step": 19689
    },
    {
      "epoch": 0.000120172119140625,
      "step": 19689,
      "training_step_time": 0.6604347229003906
    },
    {
      "epoch": 0.00012017822265625,
      "grad_norm": 0.1315726339817047,
      "learning_rate": 8.029576629825687e-05,
      "loss": 0.0503,
      "step": 19690
    },
    {
      "epoch": 0.00012017822265625,
      "model_forward_time": 0.12006163597106934,
      "step": 19690
    },
    {
      "epoch": 0.00012017822265625,
      "step": 19690,
      "training_step_time": 0.6932394504547119
    },
    {
      "epoch": 0.000120184326171875,
      "model_forward_time": 0.11651802062988281,
      "step": 19691
    },
    {
      "epoch": 0.000120184326171875,
      "step": 19691,
      "training_step_time": 0.6523520946502686
    },
    {
      "epoch": 0.0001201904296875,
      "model_forward_time": 0.12154459953308105,
      "step": 19692
    },
    {
      "epoch": 0.0001201904296875,
      "step": 19692,
      "training_step_time": 0.6664087772369385
    },
    {
      "epoch": 0.000120196533203125,
      "model_forward_time": 0.12032556533813477,
      "step": 19693
    },
    {
      "epoch": 0.000120196533203125,
      "step": 19693,
      "training_step_time": 0.6417942047119141
    },
    {
      "epoch": 0.00012020263671875,
      "model_forward_time": 0.11679315567016602,
      "step": 19694
    },
    {
      "epoch": 0.00012020263671875,
      "step": 19694,
      "training_step_time": 0.6844203472137451
    },
    {
      "epoch": 0.000120208740234375,
      "model_forward_time": 0.11880064010620117,
      "step": 19695
    },
    {
      "epoch": 0.000120208740234375,
      "step": 19695,
      "training_step_time": 0.6636347770690918
    },
    {
      "epoch": 0.00012021484375,
      "model_forward_time": 0.11871337890625,
      "step": 19696
    },
    {
      "epoch": 0.00012021484375,
      "step": 19696,
      "training_step_time": 0.726517915725708
    },
    {
      "epoch": 0.000120220947265625,
      "model_forward_time": 0.11657404899597168,
      "step": 19697
    },
    {
      "epoch": 0.000120220947265625,
      "step": 19697,
      "training_step_time": 0.726264238357544
    },
    {
      "epoch": 0.00012022705078125,
      "model_forward_time": 0.1166071891784668,
      "step": 19698
    },
    {
      "epoch": 0.00012022705078125,
      "step": 19698,
      "training_step_time": 0.7449378967285156
    },
    {
      "epoch": 0.000120233154296875,
      "model_forward_time": 0.11788034439086914,
      "step": 19699
    },
    {
      "epoch": 0.000120233154296875,
      "step": 19699,
      "training_step_time": 0.700054407119751
    },
    {
      "epoch": 0.0001202392578125,
      "grad_norm": 0.15337391197681427,
      "learning_rate": 8.027383864071573e-05,
      "loss": 0.0628,
      "step": 19700
    },
    {
      "epoch": 0.0001202392578125,
      "model_forward_time": 0.12140464782714844,
      "step": 19700
    },
    {
      "epoch": 0.0001202392578125,
      "step": 19700,
      "training_step_time": 0.7295331954956055
    },
    {
      "epoch": 0.000120245361328125,
      "model_forward_time": 0.11858749389648438,
      "step": 19701
    },
    {
      "epoch": 0.000120245361328125,
      "step": 19701,
      "training_step_time": 0.6175820827484131
    },
    {
      "epoch": 0.00012025146484375,
      "model_forward_time": 0.11623120307922363,
      "step": 19702
    },
    {
      "epoch": 0.00012025146484375,
      "step": 19702,
      "training_step_time": 0.6877217292785645
    },
    {
      "epoch": 0.000120257568359375,
      "model_forward_time": 0.12044358253479004,
      "step": 19703
    },
    {
      "epoch": 0.000120257568359375,
      "step": 19703,
      "training_step_time": 0.6655724048614502
    },
    {
      "epoch": 0.000120263671875,
      "model_forward_time": 0.1165463924407959,
      "step": 19704
    },
    {
      "epoch": 0.000120263671875,
      "step": 19704,
      "training_step_time": 0.7200484275817871
    },
    {
      "epoch": 0.000120269775390625,
      "model_forward_time": 0.11698150634765625,
      "step": 19705
    },
    {
      "epoch": 0.000120269775390625,
      "step": 19705,
      "training_step_time": 0.6173226833343506
    },
    {
      "epoch": 0.00012027587890625,
      "model_forward_time": 0.1169888973236084,
      "step": 19706
    },
    {
      "epoch": 0.00012027587890625,
      "step": 19706,
      "training_step_time": 0.6655013561248779
    },
    {
      "epoch": 0.000120281982421875,
      "model_forward_time": 0.11985564231872559,
      "step": 19707
    },
    {
      "epoch": 0.000120281982421875,
      "step": 19707,
      "training_step_time": 0.6225090026855469
    },
    {
      "epoch": 0.0001202880859375,
      "model_forward_time": 0.12670207023620605,
      "step": 19708
    },
    {
      "epoch": 0.0001202880859375,
      "step": 19708,
      "training_step_time": 0.7102375030517578
    },
    {
      "epoch": 0.000120294189453125,
      "model_forward_time": 0.12073588371276855,
      "step": 19709
    },
    {
      "epoch": 0.000120294189453125,
      "step": 19709,
      "training_step_time": 0.6744861602783203
    },
    {
      "epoch": 0.00012030029296875,
      "grad_norm": 0.18926243484020233,
      "learning_rate": 8.025190178678175e-05,
      "loss": 0.0565,
      "step": 19710
    },
    {
      "epoch": 0.00012030029296875,
      "model_forward_time": 0.12761807441711426,
      "step": 19710
    },
    {
      "epoch": 0.00012030029296875,
      "step": 19710,
      "training_step_time": 0.6546530723571777
    },
    {
      "epoch": 0.000120306396484375,
      "model_forward_time": 0.11982131004333496,
      "step": 19711
    },
    {
      "epoch": 0.000120306396484375,
      "step": 19711,
      "training_step_time": 0.6701970100402832
    },
    {
      "epoch": 0.0001203125,
      "model_forward_time": 0.12070536613464355,
      "step": 19712
    },
    {
      "epoch": 0.0001203125,
      "step": 19712,
      "training_step_time": 0.6671075820922852
    },
    {
      "epoch": 0.000120318603515625,
      "model_forward_time": 0.1182246208190918,
      "step": 19713
    },
    {
      "epoch": 0.000120318603515625,
      "step": 19713,
      "training_step_time": 0.6218221187591553
    },
    {
      "epoch": 0.00012032470703125,
      "model_forward_time": 0.11954617500305176,
      "step": 19714
    },
    {
      "epoch": 0.00012032470703125,
      "step": 19714,
      "training_step_time": 0.7044928073883057
    },
    {
      "epoch": 0.000120330810546875,
      "model_forward_time": 0.14986157417297363,
      "step": 19715
    },
    {
      "epoch": 0.000120330810546875,
      "step": 19715,
      "training_step_time": 0.6748652458190918
    },
    {
      "epoch": 0.0001203369140625,
      "model_forward_time": 0.1212465763092041,
      "step": 19716
    },
    {
      "epoch": 0.0001203369140625,
      "step": 19716,
      "training_step_time": 0.7400970458984375
    },
    {
      "epoch": 0.000120343017578125,
      "model_forward_time": 0.12079286575317383,
      "step": 19717
    },
    {
      "epoch": 0.000120343017578125,
      "step": 19717,
      "training_step_time": 0.7116708755493164
    },
    {
      "epoch": 0.00012034912109375,
      "model_forward_time": 0.12287092208862305,
      "step": 19718
    },
    {
      "epoch": 0.00012034912109375,
      "step": 19718,
      "training_step_time": 0.703157901763916
    },
    {
      "epoch": 0.000120355224609375,
      "model_forward_time": 0.11752986907958984,
      "step": 19719
    },
    {
      "epoch": 0.000120355224609375,
      "step": 19719,
      "training_step_time": 0.6835811138153076
    },
    {
      "epoch": 0.000120361328125,
      "grad_norm": 0.17240601778030396,
      "learning_rate": 8.022995574311876e-05,
      "loss": 0.0585,
      "step": 19720
    },
    {
      "epoch": 0.000120361328125,
      "model_forward_time": 0.11724114418029785,
      "step": 19720
    },
    {
      "epoch": 0.000120361328125,
      "step": 19720,
      "training_step_time": 0.7779805660247803
    },
    {
      "epoch": 0.000120367431640625,
      "model_forward_time": 0.11625432968139648,
      "step": 19721
    },
    {
      "epoch": 0.000120367431640625,
      "step": 19721,
      "training_step_time": 0.6766223907470703
    },
    {
      "epoch": 0.00012037353515625,
      "model_forward_time": 0.12200284004211426,
      "step": 19722
    },
    {
      "epoch": 0.00012037353515625,
      "step": 19722,
      "training_step_time": 0.8502364158630371
    },
    {
      "epoch": 0.000120379638671875,
      "model_forward_time": 0.1243581771850586,
      "step": 19723
    },
    {
      "epoch": 0.000120379638671875,
      "step": 19723,
      "training_step_time": 0.6607739925384521
    },
    {
      "epoch": 0.0001203857421875,
      "model_forward_time": 0.11607003211975098,
      "step": 19724
    },
    {
      "epoch": 0.0001203857421875,
      "step": 19724,
      "training_step_time": 0.7188260555267334
    },
    {
      "epoch": 0.000120391845703125,
      "model_forward_time": 0.11803030967712402,
      "step": 19725
    },
    {
      "epoch": 0.000120391845703125,
      "step": 19725,
      "training_step_time": 0.6964941024780273
    },
    {
      "epoch": 0.00012039794921875,
      "model_forward_time": 0.11874508857727051,
      "step": 19726
    },
    {
      "epoch": 0.00012039794921875,
      "step": 19726,
      "training_step_time": 0.6944994926452637
    },
    {
      "epoch": 0.000120404052734375,
      "model_forward_time": 0.11816215515136719,
      "step": 19727
    },
    {
      "epoch": 0.000120404052734375,
      "step": 19727,
      "training_step_time": 0.6922016143798828
    },
    {
      "epoch": 0.00012041015625,
      "model_forward_time": 0.11841058731079102,
      "step": 19728
    },
    {
      "epoch": 0.00012041015625,
      "step": 19728,
      "training_step_time": 0.6882507801055908
    },
    {
      "epoch": 0.000120416259765625,
      "model_forward_time": 0.11658406257629395,
      "step": 19729
    },
    {
      "epoch": 0.000120416259765625,
      "step": 19729,
      "training_step_time": 0.6669259071350098
    },
    {
      "epoch": 0.00012042236328125,
      "grad_norm": 0.167119100689888,
      "learning_rate": 8.020800051639337e-05,
      "loss": 0.0585,
      "step": 19730
    },
    {
      "epoch": 0.00012042236328125,
      "model_forward_time": 0.12441730499267578,
      "step": 19730
    },
    {
      "epoch": 0.00012042236328125,
      "step": 19730,
      "training_step_time": 0.6784708499908447
    },
    {
      "epoch": 0.000120428466796875,
      "model_forward_time": 0.13153600692749023,
      "step": 19731
    },
    {
      "epoch": 0.000120428466796875,
      "step": 19731,
      "training_step_time": 0.6641230583190918
    },
    {
      "epoch": 0.0001204345703125,
      "model_forward_time": 0.12052798271179199,
      "step": 19732
    },
    {
      "epoch": 0.0001204345703125,
      "step": 19732,
      "training_step_time": 0.6847059726715088
    },
    {
      "epoch": 0.000120440673828125,
      "model_forward_time": 0.11623167991638184,
      "step": 19733
    },
    {
      "epoch": 0.000120440673828125,
      "step": 19733,
      "training_step_time": 0.7101359367370605
    },
    {
      "epoch": 0.00012044677734375,
      "model_forward_time": 0.11636233329772949,
      "step": 19734
    },
    {
      "epoch": 0.00012044677734375,
      "step": 19734,
      "training_step_time": 0.7597308158874512
    },
    {
      "epoch": 0.000120452880859375,
      "model_forward_time": 0.12285590171813965,
      "step": 19735
    },
    {
      "epoch": 0.000120452880859375,
      "step": 19735,
      "training_step_time": 0.6667096614837646
    },
    {
      "epoch": 0.000120458984375,
      "model_forward_time": 0.13029217720031738,
      "step": 19736
    },
    {
      "epoch": 0.000120458984375,
      "step": 19736,
      "training_step_time": 0.6369338035583496
    },
    {
      "epoch": 0.000120465087890625,
      "model_forward_time": 0.11595010757446289,
      "step": 19737
    },
    {
      "epoch": 0.000120465087890625,
      "step": 19737,
      "training_step_time": 0.6803956031799316
    },
    {
      "epoch": 0.00012047119140625,
      "model_forward_time": 0.11654424667358398,
      "step": 19738
    },
    {
      "epoch": 0.00012047119140625,
      "step": 19738,
      "training_step_time": 0.6421065330505371
    },
    {
      "epoch": 0.000120477294921875,
      "model_forward_time": 0.12138867378234863,
      "step": 19739
    },
    {
      "epoch": 0.000120477294921875,
      "step": 19739,
      "training_step_time": 0.6712191104888916
    },
    {
      "epoch": 0.0001204833984375,
      "grad_norm": 0.14873678982257843,
      "learning_rate": 8.018603611327504e-05,
      "loss": 0.0593,
      "step": 19740
    },
    {
      "epoch": 0.0001204833984375,
      "model_forward_time": 0.1270310878753662,
      "step": 19740
    },
    {
      "epoch": 0.0001204833984375,
      "step": 19740,
      "training_step_time": 0.6459484100341797
    },
    {
      "epoch": 0.000120489501953125,
      "model_forward_time": 0.12998175621032715,
      "step": 19741
    },
    {
      "epoch": 0.000120489501953125,
      "step": 19741,
      "training_step_time": 0.6148018836975098
    },
    {
      "epoch": 0.00012049560546875,
      "model_forward_time": 0.12525629997253418,
      "step": 19742
    },
    {
      "epoch": 0.00012049560546875,
      "step": 19742,
      "training_step_time": 0.6569890975952148
    },
    {
      "epoch": 0.000120501708984375,
      "model_forward_time": 0.12401509284973145,
      "step": 19743
    },
    {
      "epoch": 0.000120501708984375,
      "step": 19743,
      "training_step_time": 0.6233229637145996
    },
    {
      "epoch": 0.0001205078125,
      "model_forward_time": 0.12581515312194824,
      "step": 19744
    },
    {
      "epoch": 0.0001205078125,
      "step": 19744,
      "training_step_time": 0.6444094181060791
    },
    {
      "epoch": 0.000120513916015625,
      "model_forward_time": 0.12034010887145996,
      "step": 19745
    },
    {
      "epoch": 0.000120513916015625,
      "step": 19745,
      "training_step_time": 0.6104557514190674
    },
    {
      "epoch": 0.00012052001953125,
      "model_forward_time": 0.11813235282897949,
      "step": 19746
    },
    {
      "epoch": 0.00012052001953125,
      "step": 19746,
      "training_step_time": 0.6291799545288086
    },
    {
      "epoch": 0.000120526123046875,
      "model_forward_time": 0.11920952796936035,
      "step": 19747
    },
    {
      "epoch": 0.000120526123046875,
      "step": 19747,
      "training_step_time": 0.514458417892456
    },
    {
      "epoch": 0.0001205322265625,
      "model_forward_time": 0.11975860595703125,
      "step": 19748
    },
    {
      "epoch": 0.0001205322265625,
      "step": 19748,
      "training_step_time": 0.5337750911712646
    },
    {
      "epoch": 0.000120538330078125,
      "model_forward_time": 0.11683893203735352,
      "step": 19749
    },
    {
      "epoch": 0.000120538330078125,
      "step": 19749,
      "training_step_time": 0.49559783935546875
    },
    {
      "epoch": 0.00012054443359375,
      "grad_norm": 0.16469882428646088,
      "learning_rate": 8.016406254043595e-05,
      "loss": 0.061,
      "step": 19750
    },
    {
      "epoch": 0.00012054443359375,
      "model_forward_time": 0.11698246002197266,
      "step": 19750
    },
    {
      "epoch": 0.00012054443359375,
      "step": 19750,
      "training_step_time": 0.4211850166320801
    },
    {
      "epoch": 0.000120550537109375,
      "model_forward_time": 0.11576366424560547,
      "step": 19751
    },
    {
      "epoch": 0.000120550537109375,
      "step": 19751,
      "training_step_time": 0.44146728515625
    },
    {
      "epoch": 0.000120556640625,
      "model_forward_time": 0.11594057083129883,
      "step": 19752
    },
    {
      "epoch": 0.000120556640625,
      "step": 19752,
      "training_step_time": 0.4177260398864746
    },
    {
      "epoch": 0.000120562744140625,
      "model_forward_time": 0.11563420295715332,
      "step": 19753
    },
    {
      "epoch": 0.000120562744140625,
      "step": 19753,
      "training_step_time": 0.40647125244140625
    },
    {
      "epoch": 0.00012056884765625,
      "model_forward_time": 0.11519360542297363,
      "step": 19754
    },
    {
      "epoch": 0.00012056884765625,
      "step": 19754,
      "training_step_time": 0.40045976638793945
    },
    {
      "epoch": 0.000120574951171875,
      "model_forward_time": 0.1155240535736084,
      "step": 19755
    },
    {
      "epoch": 0.000120574951171875,
      "step": 19755,
      "training_step_time": 0.45642852783203125
    },
    {
      "epoch": 0.0001205810546875,
      "model_forward_time": 0.11519336700439453,
      "step": 19756
    },
    {
      "epoch": 0.0001205810546875,
      "step": 19756,
      "training_step_time": 0.46234965324401855
    },
    {
      "epoch": 0.000120587158203125,
      "model_forward_time": 0.11493611335754395,
      "step": 19757
    },
    {
      "epoch": 0.000120587158203125,
      "step": 19757,
      "training_step_time": 0.38750457763671875
    },
    {
      "epoch": 0.00012059326171875,
      "model_forward_time": 0.11554121971130371,
      "step": 19758
    },
    {
      "epoch": 0.00012059326171875,
      "step": 19758,
      "training_step_time": 0.3887510299682617
    },
    {
      "epoch": 0.000120599365234375,
      "model_forward_time": 0.11506891250610352,
      "step": 19759
    },
    {
      "epoch": 0.000120599365234375,
      "step": 19759,
      "training_step_time": 0.5069103240966797
    },
    {
      "epoch": 0.00012060546875,
      "grad_norm": 0.14338240027427673,
      "learning_rate": 8.01420798045511e-05,
      "loss": 0.0556,
      "step": 19760
    },
    {
      "epoch": 0.00012060546875,
      "model_forward_time": 0.11483335494995117,
      "step": 19760
    },
    {
      "epoch": 0.00012060546875,
      "step": 19760,
      "training_step_time": 0.4617586135864258
    },
    {
      "epoch": 0.000120611572265625,
      "model_forward_time": 0.11584854125976562,
      "step": 19761
    },
    {
      "epoch": 0.000120611572265625,
      "step": 19761,
      "training_step_time": 0.4953310489654541
    },
    {
      "epoch": 0.00012061767578125,
      "model_forward_time": 0.11500048637390137,
      "step": 19762
    },
    {
      "epoch": 0.00012061767578125,
      "step": 19762,
      "training_step_time": 0.43291282653808594
    },
    {
      "epoch": 0.000120623779296875,
      "model_forward_time": 0.11460566520690918,
      "step": 19763
    },
    {
      "epoch": 0.000120623779296875,
      "step": 19763,
      "training_step_time": 0.47404909133911133
    },
    {
      "epoch": 0.0001206298828125,
      "model_forward_time": 0.11504411697387695,
      "step": 19764
    },
    {
      "epoch": 0.0001206298828125,
      "step": 19764,
      "training_step_time": 0.3965156078338623
    },
    {
      "epoch": 0.000120635986328125,
      "model_forward_time": 0.11409759521484375,
      "step": 19765
    },
    {
      "epoch": 0.000120635986328125,
      "step": 19765,
      "training_step_time": 0.3925600051879883
    },
    {
      "epoch": 0.00012064208984375,
      "model_forward_time": 0.11510562896728516,
      "step": 19766
    },
    {
      "epoch": 0.00012064208984375,
      "step": 19766,
      "training_step_time": 0.3890259265899658
    },
    {
      "epoch": 0.000120648193359375,
      "model_forward_time": 0.11491751670837402,
      "step": 19767
    },
    {
      "epoch": 0.000120648193359375,
      "step": 19767,
      "training_step_time": 0.3939952850341797
    },
    {
      "epoch": 0.000120654296875,
      "model_forward_time": 0.11484456062316895,
      "step": 19768
    },
    {
      "epoch": 0.000120654296875,
      "step": 19768,
      "training_step_time": 0.41445088386535645
    },
    {
      "epoch": 0.000120660400390625,
      "model_forward_time": 0.11429905891418457,
      "step": 19769
    },
    {
      "epoch": 0.000120660400390625,
      "step": 19769,
      "training_step_time": 0.4109978675842285
    },
    {
      "epoch": 0.00012066650390625,
      "grad_norm": 0.19550248980522156,
      "learning_rate": 8.012008791229826e-05,
      "loss": 0.0573,
      "step": 19770
    },
    {
      "epoch": 0.00012066650390625,
      "model_forward_time": 0.11520171165466309,
      "step": 19770
    },
    {
      "epoch": 0.00012066650390625,
      "step": 19770,
      "training_step_time": 0.46340227127075195
    },
    {
      "epoch": 0.000120672607421875,
      "model_forward_time": 0.11486649513244629,
      "step": 19771
    },
    {
      "epoch": 0.000120672607421875,
      "step": 19771,
      "training_step_time": 0.451657772064209
    },
    {
      "epoch": 0.0001206787109375,
      "model_forward_time": 0.11551928520202637,
      "step": 19772
    },
    {
      "epoch": 0.0001206787109375,
      "step": 19772,
      "training_step_time": 0.45562005043029785
    },
    {
      "epoch": 0.000120684814453125,
      "model_forward_time": 0.11674284934997559,
      "step": 19773
    },
    {
      "epoch": 0.000120684814453125,
      "step": 19773,
      "training_step_time": 0.499941349029541
    },
    {
      "epoch": 0.00012069091796875,
      "model_forward_time": 0.11619162559509277,
      "step": 19774
    },
    {
      "epoch": 0.00012069091796875,
      "step": 19774,
      "training_step_time": 0.4242849349975586
    },
    {
      "epoch": 0.000120697021484375,
      "model_forward_time": 0.11496186256408691,
      "step": 19775
    },
    {
      "epoch": 0.000120697021484375,
      "step": 19775,
      "training_step_time": 0.45087671279907227
    },
    {
      "epoch": 0.000120703125,
      "model_forward_time": 0.1156148910522461,
      "step": 19776
    },
    {
      "epoch": 0.000120703125,
      "step": 19776,
      "training_step_time": 0.4492456912994385
    },
    {
      "epoch": 0.000120709228515625,
      "model_forward_time": 0.11529374122619629,
      "step": 19777
    },
    {
      "epoch": 0.000120709228515625,
      "step": 19777,
      "training_step_time": 0.4173140525817871
    },
    {
      "epoch": 0.00012071533203125,
      "model_forward_time": 0.11503243446350098,
      "step": 19778
    },
    {
      "epoch": 0.00012071533203125,
      "step": 19778,
      "training_step_time": 0.390622615814209
    },
    {
      "epoch": 0.000120721435546875,
      "model_forward_time": 0.11506104469299316,
      "step": 19779
    },
    {
      "epoch": 0.000120721435546875,
      "step": 19779,
      "training_step_time": 0.39905548095703125
    },
    {
      "epoch": 0.0001207275390625,
      "grad_norm": 0.12440119683742523,
      "learning_rate": 8.009808687035798e-05,
      "loss": 0.0547,
      "step": 19780
    },
    {
      "epoch": 0.0001207275390625,
      "model_forward_time": 0.1155385971069336,
      "step": 19780
    },
    {
      "epoch": 0.0001207275390625,
      "step": 19780,
      "training_step_time": 0.3825984001159668
    },
    {
      "epoch": 0.000120733642578125,
      "model_forward_time": 0.1152653694152832,
      "step": 19781
    },
    {
      "epoch": 0.000120733642578125,
      "step": 19781,
      "training_step_time": 0.4008607864379883
    },
    {
      "epoch": 0.00012073974609375,
      "model_forward_time": 0.11511540412902832,
      "step": 19782
    },
    {
      "epoch": 0.00012073974609375,
      "step": 19782,
      "training_step_time": 0.38466739654541016
    },
    {
      "epoch": 0.000120745849609375,
      "model_forward_time": 0.11591291427612305,
      "step": 19783
    },
    {
      "epoch": 0.000120745849609375,
      "step": 19783,
      "training_step_time": 0.4018683433532715
    },
    {
      "epoch": 0.000120751953125,
      "model_forward_time": 0.11505770683288574,
      "step": 19784
    },
    {
      "epoch": 0.000120751953125,
      "step": 19784,
      "training_step_time": 0.4522252082824707
    },
    {
      "epoch": 0.000120758056640625,
      "model_forward_time": 0.11494064331054688,
      "step": 19785
    },
    {
      "epoch": 0.000120758056640625,
      "step": 19785,
      "training_step_time": 0.48370957374572754
    },
    {
      "epoch": 0.00012076416015625,
      "model_forward_time": 0.11492800712585449,
      "step": 19786
    },
    {
      "epoch": 0.00012076416015625,
      "step": 19786,
      "training_step_time": 0.3816196918487549
    },
    {
      "epoch": 0.000120770263671875,
      "model_forward_time": 0.11511683464050293,
      "step": 19787
    },
    {
      "epoch": 0.000120770263671875,
      "step": 19787,
      "training_step_time": 0.39631032943725586
    },
    {
      "epoch": 0.0001207763671875,
      "model_forward_time": 0.11541295051574707,
      "step": 19788
    },
    {
      "epoch": 0.0001207763671875,
      "step": 19788,
      "training_step_time": 0.48731255531311035
    },
    {
      "epoch": 0.000120782470703125,
      "model_forward_time": 0.11698341369628906,
      "step": 19789
    },
    {
      "epoch": 0.000120782470703125,
      "step": 19789,
      "training_step_time": 0.43445634841918945
    },
    {
      "epoch": 0.00012078857421875,
      "grad_norm": 0.18399542570114136,
      "learning_rate": 8.007607668541362e-05,
      "loss": 0.0569,
      "step": 19790
    },
    {
      "epoch": 0.00012078857421875,
      "model_forward_time": 0.1149756908416748,
      "step": 19790
    },
    {
      "epoch": 0.00012078857421875,
      "step": 19790,
      "training_step_time": 0.46224212646484375
    },
    {
      "epoch": 0.000120794677734375,
      "model_forward_time": 0.11511564254760742,
      "step": 19791
    },
    {
      "epoch": 0.000120794677734375,
      "step": 19791,
      "training_step_time": 0.42804455757141113
    },
    {
      "epoch": 0.00012080078125,
      "model_forward_time": 0.11510419845581055,
      "step": 19792
    },
    {
      "epoch": 0.00012080078125,
      "step": 19792,
      "training_step_time": 0.45832228660583496
    },
    {
      "epoch": 0.000120806884765625,
      "model_forward_time": 0.11430239677429199,
      "step": 19793
    },
    {
      "epoch": 0.000120806884765625,
      "step": 19793,
      "training_step_time": 0.38524627685546875
    },
    {
      "epoch": 0.00012081298828125,
      "model_forward_time": 0.11505532264709473,
      "step": 19794
    },
    {
      "epoch": 0.00012081298828125,
      "step": 19794,
      "training_step_time": 0.40648984909057617
    },
    {
      "epoch": 0.000120819091796875,
      "model_forward_time": 0.11574196815490723,
      "step": 19795
    },
    {
      "epoch": 0.000120819091796875,
      "step": 19795,
      "training_step_time": 0.4216482639312744
    },
    {
      "epoch": 0.0001208251953125,
      "model_forward_time": 0.11553740501403809,
      "step": 19796
    },
    {
      "epoch": 0.0001208251953125,
      "step": 19796,
      "training_step_time": 0.3934776782989502
    },
    {
      "epoch": 0.000120831298828125,
      "model_forward_time": 0.11523890495300293,
      "step": 19797
    },
    {
      "epoch": 0.000120831298828125,
      "step": 19797,
      "training_step_time": 0.38512468338012695
    },
    {
      "epoch": 0.00012083740234375,
      "model_forward_time": 0.11551213264465332,
      "step": 19798
    },
    {
      "epoch": 0.00012083740234375,
      "step": 19798,
      "training_step_time": 0.4246537685394287
    },
    {
      "epoch": 0.000120843505859375,
      "model_forward_time": 0.11546778678894043,
      "step": 19799
    },
    {
      "epoch": 0.000120843505859375,
      "step": 19799,
      "training_step_time": 0.3963816165924072
    },
    {
      "epoch": 0.000120849609375,
      "grad_norm": 0.16417881846427917,
      "learning_rate": 8.005405736415126e-05,
      "loss": 0.0528,
      "step": 19800
    },
    {
      "epoch": 0.000120849609375,
      "model_forward_time": 0.11494660377502441,
      "step": 19800
    },
    {
      "epoch": 0.000120849609375,
      "step": 19800,
      "training_step_time": 0.3952755928039551
    },
    {
      "epoch": 0.000120855712890625,
      "model_forward_time": 0.1154177188873291,
      "step": 19801
    },
    {
      "epoch": 0.000120855712890625,
      "step": 19801,
      "training_step_time": 0.3677947521209717
    },
    {
      "epoch": 0.00012086181640625,
      "model_forward_time": 0.11651158332824707,
      "step": 19802
    },
    {
      "epoch": 0.00012086181640625,
      "step": 19802,
      "training_step_time": 0.4625895023345947
    },
    {
      "epoch": 0.000120867919921875,
      "model_forward_time": 0.11758661270141602,
      "step": 19803
    },
    {
      "epoch": 0.000120867919921875,
      "step": 19803,
      "training_step_time": 0.45076465606689453
    },
    {
      "epoch": 0.0001208740234375,
      "model_forward_time": 0.11834239959716797,
      "step": 19804
    },
    {
      "epoch": 0.0001208740234375,
      "step": 19804,
      "training_step_time": 0.4102606773376465
    },
    {
      "epoch": 0.000120880126953125,
      "model_forward_time": 0.11536049842834473,
      "step": 19805
    },
    {
      "epoch": 0.000120880126953125,
      "step": 19805,
      "training_step_time": 0.4213423728942871
    },
    {
      "epoch": 0.00012088623046875,
      "model_forward_time": 0.11485052108764648,
      "step": 19806
    },
    {
      "epoch": 0.00012088623046875,
      "step": 19806,
      "training_step_time": 0.48561525344848633
    },
    {
      "epoch": 0.000120892333984375,
      "model_forward_time": 0.11493730545043945,
      "step": 19807
    },
    {
      "epoch": 0.000120892333984375,
      "step": 19807,
      "training_step_time": 0.4011037349700928
    },
    {
      "epoch": 0.0001208984375,
      "model_forward_time": 0.11441969871520996,
      "step": 19808
    },
    {
      "epoch": 0.0001208984375,
      "step": 19808,
      "training_step_time": 0.38192319869995117
    },
    {
      "epoch": 0.000120904541015625,
      "model_forward_time": 0.1154022216796875,
      "step": 19809
    },
    {
      "epoch": 0.000120904541015625,
      "step": 19809,
      "training_step_time": 0.38808441162109375
    },
    {
      "epoch": 0.00012091064453125,
      "grad_norm": 0.152639240026474,
      "learning_rate": 8.00320289132598e-05,
      "loss": 0.0578,
      "step": 19810
    },
    {
      "epoch": 0.00012091064453125,
      "model_forward_time": 0.11517047882080078,
      "step": 19810
    },
    {
      "epoch": 0.00012091064453125,
      "step": 19810,
      "training_step_time": 0.4023451805114746
    },
    {
      "epoch": 0.000120916748046875,
      "model_forward_time": 0.11459875106811523,
      "step": 19811
    },
    {
      "epoch": 0.000120916748046875,
      "step": 19811,
      "training_step_time": 0.39382410049438477
    },
    {
      "epoch": 0.0001209228515625,
      "model_forward_time": 0.1154928207397461,
      "step": 19812
    },
    {
      "epoch": 0.0001209228515625,
      "step": 19812,
      "training_step_time": 0.4248058795928955
    },
    {
      "epoch": 0.000120928955078125,
      "model_forward_time": 0.11483335494995117,
      "step": 19813
    },
    {
      "epoch": 0.000120928955078125,
      "step": 19813,
      "training_step_time": 0.3989431858062744
    },
    {
      "epoch": 0.00012093505859375,
      "model_forward_time": 0.11538100242614746,
      "step": 19814
    },
    {
      "epoch": 0.00012093505859375,
      "step": 19814,
      "training_step_time": 0.42616748809814453
    },
    {
      "epoch": 0.000120941162109375,
      "model_forward_time": 0.11550474166870117,
      "step": 19815
    },
    {
      "epoch": 0.000120941162109375,
      "step": 19815,
      "training_step_time": 0.3887941837310791
    },
    {
      "epoch": 0.000120947265625,
      "model_forward_time": 0.11616182327270508,
      "step": 19816
    },
    {
      "epoch": 0.000120947265625,
      "step": 19816,
      "training_step_time": 0.42476320266723633
    },
    {
      "epoch": 0.000120953369140625,
      "model_forward_time": 0.11528420448303223,
      "step": 19817
    },
    {
      "epoch": 0.000120953369140625,
      "step": 19817,
      "training_step_time": 0.38689422607421875
    },
    {
      "epoch": 0.00012095947265625,
      "model_forward_time": 0.11538434028625488,
      "step": 19818
    },
    {
      "epoch": 0.00012095947265625,
      "step": 19818,
      "training_step_time": 0.41500401496887207
    },
    {
      "epoch": 0.000120965576171875,
      "model_forward_time": 0.11475682258605957,
      "step": 19819
    },
    {
      "epoch": 0.000120965576171875,
      "step": 19819,
      "training_step_time": 0.392535924911499
    },
    {
      "epoch": 0.0001209716796875,
      "grad_norm": 0.2277587354183197,
      "learning_rate": 8.000999133943093e-05,
      "loss": 0.0522,
      "step": 19820
    },
    {
      "epoch": 0.0001209716796875,
      "model_forward_time": 0.11511754989624023,
      "step": 19820
    },
    {
      "epoch": 0.0001209716796875,
      "step": 19820,
      "training_step_time": 0.4654722213745117
    },
    {
      "epoch": 0.000120977783203125,
      "model_forward_time": 0.11478328704833984,
      "step": 19821
    },
    {
      "epoch": 0.000120977783203125,
      "step": 19821,
      "training_step_time": 0.481278657913208
    },
    {
      "epoch": 0.00012098388671875,
      "model_forward_time": 0.1157693862915039,
      "step": 19822
    },
    {
      "epoch": 0.00012098388671875,
      "step": 19822,
      "training_step_time": 0.38803720474243164
    },
    {
      "epoch": 0.000120989990234375,
      "model_forward_time": 0.11541748046875,
      "step": 19823
    },
    {
      "epoch": 0.000120989990234375,
      "step": 19823,
      "training_step_time": 0.3775792121887207
    },
    {
      "epoch": 0.00012099609375,
      "model_forward_time": 0.11524796485900879,
      "step": 19824
    },
    {
      "epoch": 0.00012099609375,
      "step": 19824,
      "training_step_time": 0.395932674407959
    },
    {
      "epoch": 0.000121002197265625,
      "model_forward_time": 0.11519265174865723,
      "step": 19825
    },
    {
      "epoch": 0.000121002197265625,
      "step": 19825,
      "training_step_time": 0.3985605239868164
    },
    {
      "epoch": 0.00012100830078125,
      "model_forward_time": 0.11539673805236816,
      "step": 19826
    },
    {
      "epoch": 0.00012100830078125,
      "step": 19826,
      "training_step_time": 0.39156436920166016
    },
    {
      "epoch": 0.000121014404296875,
      "model_forward_time": 0.11533260345458984,
      "step": 19827
    },
    {
      "epoch": 0.000121014404296875,
      "step": 19827,
      "training_step_time": 0.47826647758483887
    },
    {
      "epoch": 0.0001210205078125,
      "model_forward_time": 0.11466193199157715,
      "step": 19828
    },
    {
      "epoch": 0.0001210205078125,
      "step": 19828,
      "training_step_time": 0.4619565010070801
    },
    {
      "epoch": 0.000121026611328125,
      "model_forward_time": 0.11557412147521973,
      "step": 19829
    },
    {
      "epoch": 0.000121026611328125,
      "step": 19829,
      "training_step_time": 0.42777490615844727
    },
    {
      "epoch": 0.00012103271484375,
      "grad_norm": 0.15086039900779724,
      "learning_rate": 7.998794464935904e-05,
      "loss": 0.0558,
      "step": 19830
    },
    {
      "epoch": 0.00012103271484375,
      "model_forward_time": 0.11511468887329102,
      "step": 19830
    },
    {
      "epoch": 0.00012103271484375,
      "step": 19830,
      "training_step_time": 0.40496277809143066
    },
    {
      "epoch": 0.000121038818359375,
      "model_forward_time": 0.11456608772277832,
      "step": 19831
    },
    {
      "epoch": 0.000121038818359375,
      "step": 19831,
      "training_step_time": 0.47220921516418457
    },
    {
      "epoch": 0.000121044921875,
      "model_forward_time": 0.11548900604248047,
      "step": 19832
    },
    {
      "epoch": 0.000121044921875,
      "step": 19832,
      "training_step_time": 0.4815993309020996
    },
    {
      "epoch": 0.000121051025390625,
      "model_forward_time": 0.11554622650146484,
      "step": 19833
    },
    {
      "epoch": 0.000121051025390625,
      "step": 19833,
      "training_step_time": 0.3833796977996826
    },
    {
      "epoch": 0.00012105712890625,
      "model_forward_time": 0.1146538257598877,
      "step": 19834
    },
    {
      "epoch": 0.00012105712890625,
      "step": 19834,
      "training_step_time": 0.4297525882720947
    },
    {
      "epoch": 0.000121063232421875,
      "model_forward_time": 0.11430859565734863,
      "step": 19835
    },
    {
      "epoch": 0.000121063232421875,
      "step": 19835,
      "training_step_time": 0.3777792453765869
    },
    {
      "epoch": 0.0001210693359375,
      "model_forward_time": 0.11501836776733398,
      "step": 19836
    },
    {
      "epoch": 0.0001210693359375,
      "step": 19836,
      "training_step_time": 0.4293942451477051
    },
    {
      "epoch": 0.000121075439453125,
      "model_forward_time": 0.11446118354797363,
      "step": 19837
    },
    {
      "epoch": 0.000121075439453125,
      "step": 19837,
      "training_step_time": 0.38759732246398926
    },
    {
      "epoch": 0.00012108154296875,
      "model_forward_time": 0.11574482917785645,
      "step": 19838
    },
    {
      "epoch": 0.00012108154296875,
      "step": 19838,
      "training_step_time": 0.40317511558532715
    },
    {
      "epoch": 0.000121087646484375,
      "model_forward_time": 0.11491131782531738,
      "step": 19839
    },
    {
      "epoch": 0.000121087646484375,
      "step": 19839,
      "training_step_time": 0.3994131088256836
    },
    {
      "epoch": 0.00012109375,
      "grad_norm": 0.1690087616443634,
      "learning_rate": 7.996588884974135e-05,
      "loss": 0.0592,
      "step": 19840
    },
    {
      "epoch": 0.00012109375,
      "model_forward_time": 0.11526966094970703,
      "step": 19840
    },
    {
      "epoch": 0.00012109375,
      "step": 19840,
      "training_step_time": 0.3901240825653076
    },
    {
      "epoch": 0.000121099853515625,
      "model_forward_time": 0.11659693717956543,
      "step": 19841
    },
    {
      "epoch": 0.000121099853515625,
      "step": 19841,
      "training_step_time": 0.3882608413696289
    },
    {
      "epoch": 0.00012110595703125,
      "model_forward_time": 0.11533427238464355,
      "step": 19842
    },
    {
      "epoch": 0.00012110595703125,
      "step": 19842,
      "training_step_time": 0.3917086124420166
    },
    {
      "epoch": 0.000121112060546875,
      "model_forward_time": 0.11548352241516113,
      "step": 19843
    },
    {
      "epoch": 0.000121112060546875,
      "step": 19843,
      "training_step_time": 0.40639257431030273
    },
    {
      "epoch": 0.0001211181640625,
      "model_forward_time": 0.11550545692443848,
      "step": 19844
    },
    {
      "epoch": 0.0001211181640625,
      "step": 19844,
      "training_step_time": 0.5184230804443359
    },
    {
      "epoch": 0.000121124267578125,
      "model_forward_time": 0.11514472961425781,
      "step": 19845
    },
    {
      "epoch": 0.000121124267578125,
      "step": 19845,
      "training_step_time": 0.36753106117248535
    },
    {
      "epoch": 0.00012113037109375,
      "model_forward_time": 0.11477971076965332,
      "step": 19846
    },
    {
      "epoch": 0.00012113037109375,
      "step": 19846,
      "training_step_time": 0.4524543285369873
    },
    {
      "epoch": 0.000121136474609375,
      "model_forward_time": 0.11487340927124023,
      "step": 19847
    },
    {
      "epoch": 0.000121136474609375,
      "step": 19847,
      "training_step_time": 0.389650821685791
    },
    {
      "epoch": 0.000121142578125,
      "model_forward_time": 0.11515212059020996,
      "step": 19848
    },
    {
      "epoch": 0.000121142578125,
      "step": 19848,
      "training_step_time": 0.5086548328399658
    },
    {
      "epoch": 0.000121148681640625,
      "model_forward_time": 0.1160280704498291,
      "step": 19849
    },
    {
      "epoch": 0.000121148681640625,
      "step": 19849,
      "training_step_time": 0.4295620918273926
    },
    {
      "epoch": 0.00012115478515625,
      "grad_norm": 0.14924462139606476,
      "learning_rate": 7.994382394727784e-05,
      "loss": 0.0607,
      "step": 19850
    },
    {
      "epoch": 0.00012115478515625,
      "model_forward_time": 0.11514544486999512,
      "step": 19850
    },
    {
      "epoch": 0.00012115478515625,
      "step": 19850,
      "training_step_time": 0.4668600559234619
    },
    {
      "epoch": 0.000121160888671875,
      "model_forward_time": 0.11435294151306152,
      "step": 19851
    },
    {
      "epoch": 0.000121160888671875,
      "step": 19851,
      "training_step_time": 0.3850400447845459
    },
    {
      "epoch": 0.0001211669921875,
      "model_forward_time": 0.11488127708435059,
      "step": 19852
    },
    {
      "epoch": 0.0001211669921875,
      "step": 19852,
      "training_step_time": 0.38974738121032715
    },
    {
      "epoch": 0.000121173095703125,
      "model_forward_time": 0.11466097831726074,
      "step": 19853
    },
    {
      "epoch": 0.000121173095703125,
      "step": 19853,
      "training_step_time": 0.36937904357910156
    },
    {
      "epoch": 0.00012117919921875,
      "model_forward_time": 0.11558866500854492,
      "step": 19854
    },
    {
      "epoch": 0.00012117919921875,
      "step": 19854,
      "training_step_time": 0.39130616188049316
    },
    {
      "epoch": 0.000121185302734375,
      "model_forward_time": 0.11458754539489746,
      "step": 19855
    },
    {
      "epoch": 0.000121185302734375,
      "step": 19855,
      "training_step_time": 0.4056410789489746
    },
    {
      "epoch": 0.00012119140625,
      "model_forward_time": 0.11734676361083984,
      "step": 19856
    },
    {
      "epoch": 0.00012119140625,
      "step": 19856,
      "training_step_time": 0.6423351764678955
    },
    {
      "epoch": 0.000121197509765625,
      "model_forward_time": 0.11545419692993164,
      "step": 19857
    },
    {
      "epoch": 0.000121197509765625,
      "step": 19857,
      "training_step_time": 0.47208619117736816
    },
    {
      "epoch": 0.00012120361328125,
      "model_forward_time": 0.11490726470947266,
      "step": 19858
    },
    {
      "epoch": 0.00012120361328125,
      "step": 19858,
      "training_step_time": 0.39370203018188477
    },
    {
      "epoch": 0.000121209716796875,
      "model_forward_time": 0.1148824691772461,
      "step": 19859
    },
    {
      "epoch": 0.000121209716796875,
      "step": 19859,
      "training_step_time": 0.36497926712036133
    },
    {
      "epoch": 0.0001212158203125,
      "grad_norm": 0.17115047574043274,
      "learning_rate": 7.992174994867123e-05,
      "loss": 0.0595,
      "step": 19860
    },
    {
      "epoch": 0.0001212158203125,
      "model_forward_time": 0.11448812484741211,
      "step": 19860
    },
    {
      "epoch": 0.0001212158203125,
      "step": 19860,
      "training_step_time": 0.4342470169067383
    },
    {
      "epoch": 0.000121221923828125,
      "model_forward_time": 0.11445879936218262,
      "step": 19861
    },
    {
      "epoch": 0.000121221923828125,
      "step": 19861,
      "training_step_time": 0.3969838619232178
    },
    {
      "epoch": 0.00012122802734375,
      "model_forward_time": 0.11451148986816406,
      "step": 19862
    },
    {
      "epoch": 0.00012122802734375,
      "step": 19862,
      "training_step_time": 0.507939338684082
    },
    {
      "epoch": 0.000121234130859375,
      "model_forward_time": 0.11469006538391113,
      "step": 19863
    },
    {
      "epoch": 0.000121234130859375,
      "step": 19863,
      "training_step_time": 0.397951602935791
    },
    {
      "epoch": 0.000121240234375,
      "model_forward_time": 0.11462783813476562,
      "step": 19864
    },
    {
      "epoch": 0.000121240234375,
      "step": 19864,
      "training_step_time": 0.5022830963134766
    },
    {
      "epoch": 0.000121246337890625,
      "model_forward_time": 0.11443352699279785,
      "step": 19865
    },
    {
      "epoch": 0.000121246337890625,
      "step": 19865,
      "training_step_time": 0.37169790267944336
    },
    {
      "epoch": 0.00012125244140625,
      "model_forward_time": 0.11526012420654297,
      "step": 19866
    },
    {
      "epoch": 0.00012125244140625,
      "step": 19866,
      "training_step_time": 0.3850109577178955
    },
    {
      "epoch": 0.000121258544921875,
      "model_forward_time": 0.11438965797424316,
      "step": 19867
    },
    {
      "epoch": 0.000121258544921875,
      "step": 19867,
      "training_step_time": 0.3889899253845215
    },
    {
      "epoch": 0.0001212646484375,
      "model_forward_time": 0.11522626876831055,
      "step": 19868
    },
    {
      "epoch": 0.0001212646484375,
      "step": 19868,
      "training_step_time": 0.5421931743621826
    },
    {
      "epoch": 0.000121270751953125,
      "model_forward_time": 0.11516284942626953,
      "step": 19869
    },
    {
      "epoch": 0.000121270751953125,
      "step": 19869,
      "training_step_time": 0.5266425609588623
    },
    {
      "epoch": 0.00012127685546875,
      "grad_norm": 0.17201243340969086,
      "learning_rate": 7.989966686062702e-05,
      "loss": 0.0556,
      "step": 19870
    },
    {
      "epoch": 0.00012127685546875,
      "model_forward_time": 0.11535191535949707,
      "step": 19870
    },
    {
      "epoch": 0.00012127685546875,
      "step": 19870,
      "training_step_time": 0.43611764907836914
    },
    {
      "epoch": 0.000121282958984375,
      "model_forward_time": 0.11426877975463867,
      "step": 19871
    },
    {
      "epoch": 0.000121282958984375,
      "step": 19871,
      "training_step_time": 0.45468926429748535
    },
    {
      "epoch": 0.0001212890625,
      "model_forward_time": 0.11475300788879395,
      "step": 19872
    },
    {
      "epoch": 0.0001212890625,
      "step": 19872,
      "training_step_time": 0.4128532409667969
    },
    {
      "epoch": 0.000121295166015625,
      "model_forward_time": 0.11421513557434082,
      "step": 19873
    },
    {
      "epoch": 0.000121295166015625,
      "step": 19873,
      "training_step_time": 0.4170799255371094
    },
    {
      "epoch": 0.00012130126953125,
      "model_forward_time": 0.1145176887512207,
      "step": 19874
    },
    {
      "epoch": 0.00012130126953125,
      "step": 19874,
      "training_step_time": 0.4687223434448242
    },
    {
      "epoch": 0.000121307373046875,
      "model_forward_time": 0.11458158493041992,
      "step": 19875
    },
    {
      "epoch": 0.000121307373046875,
      "step": 19875,
      "training_step_time": 0.4195098876953125
    },
    {
      "epoch": 0.0001213134765625,
      "model_forward_time": 0.11475539207458496,
      "step": 19876
    },
    {
      "epoch": 0.0001213134765625,
      "step": 19876,
      "training_step_time": 0.42228174209594727
    },
    {
      "epoch": 0.000121319580078125,
      "model_forward_time": 0.11473274230957031,
      "step": 19877
    },
    {
      "epoch": 0.000121319580078125,
      "step": 19877,
      "training_step_time": 0.387561559677124
    },
    {
      "epoch": 0.00012132568359375,
      "model_forward_time": 0.11508727073669434,
      "step": 19878
    },
    {
      "epoch": 0.00012132568359375,
      "step": 19878,
      "training_step_time": 0.44081878662109375
    },
    {
      "epoch": 0.000121331787109375,
      "model_forward_time": 0.11513829231262207,
      "step": 19879
    },
    {
      "epoch": 0.000121331787109375,
      "step": 19879,
      "training_step_time": 0.3913135528564453
    },
    {
      "epoch": 0.000121337890625,
      "grad_norm": 0.14579342305660248,
      "learning_rate": 7.987757468985348e-05,
      "loss": 0.0526,
      "step": 19880
    },
    {
      "epoch": 0.000121337890625,
      "model_forward_time": 0.11500787734985352,
      "step": 19880
    },
    {
      "epoch": 0.000121337890625,
      "step": 19880,
      "training_step_time": 0.39646196365356445
    },
    {
      "epoch": 0.000121343994140625,
      "model_forward_time": 0.11461973190307617,
      "step": 19881
    },
    {
      "epoch": 0.000121343994140625,
      "step": 19881,
      "training_step_time": 0.3987445831298828
    },
    {
      "epoch": 0.00012135009765625,
      "model_forward_time": 0.11445832252502441,
      "step": 19882
    },
    {
      "epoch": 0.00012135009765625,
      "step": 19882,
      "training_step_time": 0.3946511745452881
    },
    {
      "epoch": 0.000121356201171875,
      "model_forward_time": 0.1147923469543457,
      "step": 19883
    },
    {
      "epoch": 0.000121356201171875,
      "step": 19883,
      "training_step_time": 0.40863633155822754
    },
    {
      "epoch": 0.0001213623046875,
      "model_forward_time": 0.11671328544616699,
      "step": 19884
    },
    {
      "epoch": 0.0001213623046875,
      "step": 19884,
      "training_step_time": 0.5094802379608154
    },
    {
      "epoch": 0.000121368408203125,
      "model_forward_time": 0.11572885513305664,
      "step": 19885
    },
    {
      "epoch": 0.000121368408203125,
      "step": 19885,
      "training_step_time": 0.4400348663330078
    },
    {
      "epoch": 0.00012137451171875,
      "model_forward_time": 0.11489152908325195,
      "step": 19886
    },
    {
      "epoch": 0.00012137451171875,
      "step": 19886,
      "training_step_time": 0.47461581230163574
    },
    {
      "epoch": 0.000121380615234375,
      "model_forward_time": 0.11512422561645508,
      "step": 19887
    },
    {
      "epoch": 0.000121380615234375,
      "step": 19887,
      "training_step_time": 0.3653419017791748
    },
    {
      "epoch": 0.00012138671875,
      "model_forward_time": 0.11580801010131836,
      "step": 19888
    },
    {
      "epoch": 0.00012138671875,
      "step": 19888,
      "training_step_time": 0.41635751724243164
    },
    {
      "epoch": 0.000121392822265625,
      "model_forward_time": 0.11450409889221191,
      "step": 19889
    },
    {
      "epoch": 0.000121392822265625,
      "step": 19889,
      "training_step_time": 0.40090203285217285
    },
    {
      "epoch": 0.00012139892578125,
      "grad_norm": 0.1932019293308258,
      "learning_rate": 7.985547344306161e-05,
      "loss": 0.0557,
      "step": 19890
    },
    {
      "epoch": 0.00012139892578125,
      "model_forward_time": 0.11531424522399902,
      "step": 19890
    },
    {
      "epoch": 0.00012139892578125,
      "step": 19890,
      "training_step_time": 0.39202070236206055
    },
    {
      "epoch": 0.000121405029296875,
      "model_forward_time": 0.11467385292053223,
      "step": 19891
    },
    {
      "epoch": 0.000121405029296875,
      "step": 19891,
      "training_step_time": 0.40024828910827637
    },
    {
      "epoch": 0.0001214111328125,
      "model_forward_time": 0.11552906036376953,
      "step": 19892
    },
    {
      "epoch": 0.0001214111328125,
      "step": 19892,
      "training_step_time": 0.5587544441223145
    },
    {
      "epoch": 0.000121417236328125,
      "model_forward_time": 0.11447477340698242,
      "step": 19893
    },
    {
      "epoch": 0.000121417236328125,
      "step": 19893,
      "training_step_time": 0.3943164348602295
    },
    {
      "epoch": 0.00012142333984375,
      "model_forward_time": 0.11545419692993164,
      "step": 19894
    },
    {
      "epoch": 0.00012142333984375,
      "step": 19894,
      "training_step_time": 0.39319753646850586
    },
    {
      "epoch": 0.000121429443359375,
      "model_forward_time": 0.11542606353759766,
      "step": 19895
    },
    {
      "epoch": 0.000121429443359375,
      "step": 19895,
      "training_step_time": 0.38774585723876953
    },
    {
      "epoch": 0.000121435546875,
      "model_forward_time": 0.11597299575805664,
      "step": 19896
    },
    {
      "epoch": 0.000121435546875,
      "step": 19896,
      "training_step_time": 0.38683295249938965
    },
    {
      "epoch": 0.000121441650390625,
      "model_forward_time": 0.11470603942871094,
      "step": 19897
    },
    {
      "epoch": 0.000121441650390625,
      "step": 19897,
      "training_step_time": 0.4273536205291748
    },
    {
      "epoch": 0.00012144775390625,
      "model_forward_time": 0.1148684024810791,
      "step": 19898
    },
    {
      "epoch": 0.00012144775390625,
      "step": 19898,
      "training_step_time": 0.7693836688995361
    },
    {
      "epoch": 0.000121453857421875,
      "model_forward_time": 0.11420536041259766,
      "step": 19899
    },
    {
      "epoch": 0.000121453857421875,
      "step": 19899,
      "training_step_time": 0.41723203659057617
    },
    {
      "epoch": 0.0001214599609375,
      "grad_norm": 0.16293445229530334,
      "learning_rate": 7.983336312696522e-05,
      "loss": 0.0548,
      "step": 19900
    },
    {
      "epoch": 0.0001214599609375,
      "model_forward_time": 0.11565375328063965,
      "step": 19900
    },
    {
      "epoch": 0.0001214599609375,
      "step": 19900,
      "training_step_time": 0.4028325080871582
    },
    {
      "epoch": 0.000121466064453125,
      "model_forward_time": 0.11446762084960938,
      "step": 19901
    },
    {
      "epoch": 0.000121466064453125,
      "step": 19901,
      "training_step_time": 0.4082620143890381
    },
    {
      "epoch": 0.00012147216796875,
      "model_forward_time": 0.11485075950622559,
      "step": 19902
    },
    {
      "epoch": 0.00012147216796875,
      "step": 19902,
      "training_step_time": 0.419710636138916
    },
    {
      "epoch": 0.000121478271484375,
      "model_forward_time": 0.11403560638427734,
      "step": 19903
    },
    {
      "epoch": 0.000121478271484375,
      "step": 19903,
      "training_step_time": 0.4610438346862793
    },
    {
      "epoch": 0.000121484375,
      "model_forward_time": 0.11478734016418457,
      "step": 19904
    },
    {
      "epoch": 0.000121484375,
      "step": 19904,
      "training_step_time": 0.5369844436645508
    },
    {
      "epoch": 0.000121490478515625,
      "model_forward_time": 0.11426043510437012,
      "step": 19905
    },
    {
      "epoch": 0.000121490478515625,
      "step": 19905,
      "training_step_time": 0.4357461929321289
    },
    {
      "epoch": 0.00012149658203125,
      "model_forward_time": 0.1148366928100586,
      "step": 19906
    },
    {
      "epoch": 0.00012149658203125,
      "step": 19906,
      "training_step_time": 0.4292454719543457
    },
    {
      "epoch": 0.000121502685546875,
      "model_forward_time": 0.1153254508972168,
      "step": 19907
    },
    {
      "epoch": 0.000121502685546875,
      "step": 19907,
      "training_step_time": 0.39334893226623535
    },
    {
      "epoch": 0.0001215087890625,
      "model_forward_time": 0.11473989486694336,
      "step": 19908
    },
    {
      "epoch": 0.0001215087890625,
      "step": 19908,
      "training_step_time": 0.3954498767852783
    },
    {
      "epoch": 0.000121514892578125,
      "model_forward_time": 0.11466264724731445,
      "step": 19909
    },
    {
      "epoch": 0.000121514892578125,
      "step": 19909,
      "training_step_time": 0.39652204513549805
    },
    {
      "epoch": 0.00012152099609375,
      "grad_norm": 0.1875544935464859,
      "learning_rate": 7.98112437482808e-05,
      "loss": 0.0507,
      "step": 19910
    },
    {
      "epoch": 0.00012152099609375,
      "model_forward_time": 0.11487483978271484,
      "step": 19910
    },
    {
      "epoch": 0.00012152099609375,
      "step": 19910,
      "training_step_time": 0.49274492263793945
    },
    {
      "epoch": 0.000121527099609375,
      "model_forward_time": 0.11458611488342285,
      "step": 19911
    },
    {
      "epoch": 0.000121527099609375,
      "step": 19911,
      "training_step_time": 0.39324307441711426
    },
    {
      "epoch": 0.000121533203125,
      "model_forward_time": 0.11484313011169434,
      "step": 19912
    },
    {
      "epoch": 0.000121533203125,
      "step": 19912,
      "training_step_time": 0.4169948101043701
    },
    {
      "epoch": 0.000121539306640625,
      "model_forward_time": 0.11474967002868652,
      "step": 19913
    },
    {
      "epoch": 0.000121539306640625,
      "step": 19913,
      "training_step_time": 0.39495253562927246
    },
    {
      "epoch": 0.00012154541015625,
      "model_forward_time": 0.1153113842010498,
      "step": 19914
    },
    {
      "epoch": 0.00012154541015625,
      "step": 19914,
      "training_step_time": 0.40558362007141113
    },
    {
      "epoch": 0.000121551513671875,
      "model_forward_time": 0.11510562896728516,
      "step": 19915
    },
    {
      "epoch": 0.000121551513671875,
      "step": 19915,
      "training_step_time": 0.4005577564239502
    },
    {
      "epoch": 0.0001215576171875,
      "model_forward_time": 0.11598801612854004,
      "step": 19916
    },
    {
      "epoch": 0.0001215576171875,
      "step": 19916,
      "training_step_time": 0.5374302864074707
    },
    {
      "epoch": 0.000121563720703125,
      "model_forward_time": 0.11563730239868164,
      "step": 19917
    },
    {
      "epoch": 0.000121563720703125,
      "step": 19917,
      "training_step_time": 0.4573681354522705
    },
    {
      "epoch": 0.00012156982421875,
      "model_forward_time": 0.11515307426452637,
      "step": 19918
    },
    {
      "epoch": 0.00012156982421875,
      "step": 19918,
      "training_step_time": 0.4154667854309082
    },
    {
      "epoch": 0.000121575927734375,
      "model_forward_time": 0.11471700668334961,
      "step": 19919
    },
    {
      "epoch": 0.000121575927734375,
      "step": 19919,
      "training_step_time": 0.39208412170410156
    },
    {
      "epoch": 0.00012158203125,
      "grad_norm": 0.24361109733581543,
      "learning_rate": 7.978911531372765e-05,
      "loss": 0.0525,
      "step": 19920
    },
    {
      "epoch": 0.00012158203125,
      "model_forward_time": 0.11498570442199707,
      "step": 19920
    },
    {
      "epoch": 0.00012158203125,
      "step": 19920,
      "training_step_time": 0.5229079723358154
    },
    {
      "epoch": 0.000121588134765625,
      "model_forward_time": 0.1142423152923584,
      "step": 19921
    },
    {
      "epoch": 0.000121588134765625,
      "step": 19921,
      "training_step_time": 0.39279890060424805
    },
    {
      "epoch": 0.00012159423828125,
      "model_forward_time": 0.1152801513671875,
      "step": 19922
    },
    {
      "epoch": 0.00012159423828125,
      "step": 19922,
      "training_step_time": 0.3932759761810303
    },
    {
      "epoch": 0.000121600341796875,
      "model_forward_time": 0.11572027206420898,
      "step": 19923
    },
    {
      "epoch": 0.000121600341796875,
      "step": 19923,
      "training_step_time": 0.4672539234161377
    },
    {
      "epoch": 0.0001216064453125,
      "model_forward_time": 0.11476707458496094,
      "step": 19924
    },
    {
      "epoch": 0.0001216064453125,
      "step": 19924,
      "training_step_time": 0.4037172794342041
    },
    {
      "epoch": 0.000121612548828125,
      "model_forward_time": 0.11438894271850586,
      "step": 19925
    },
    {
      "epoch": 0.000121612548828125,
      "step": 19925,
      "training_step_time": 0.38301706314086914
    },
    {
      "epoch": 0.00012161865234375,
      "model_forward_time": 0.11506462097167969,
      "step": 19926
    },
    {
      "epoch": 0.00012161865234375,
      "step": 19926,
      "training_step_time": 0.41001462936401367
    },
    {
      "epoch": 0.000121624755859375,
      "model_forward_time": 0.11510419845581055,
      "step": 19927
    },
    {
      "epoch": 0.000121624755859375,
      "step": 19927,
      "training_step_time": 0.4030139446258545
    },
    {
      "epoch": 0.000121630859375,
      "model_forward_time": 0.11450862884521484,
      "step": 19928
    },
    {
      "epoch": 0.000121630859375,
      "step": 19928,
      "training_step_time": 0.464357852935791
    },
    {
      "epoch": 0.000121636962890625,
      "model_forward_time": 0.11580872535705566,
      "step": 19929
    },
    {
      "epoch": 0.000121636962890625,
      "step": 19929,
      "training_step_time": 0.36822962760925293
    },
    {
      "epoch": 0.00012164306640625,
      "grad_norm": 0.1467059850692749,
      "learning_rate": 7.97669778300278e-05,
      "loss": 0.0484,
      "step": 19930
    },
    {
      "epoch": 0.00012164306640625,
      "model_forward_time": 0.11582303047180176,
      "step": 19930
    },
    {
      "epoch": 0.00012164306640625,
      "step": 19930,
      "training_step_time": 0.3943972587585449
    },
    {
      "epoch": 0.000121649169921875,
      "model_forward_time": 0.1152963638305664,
      "step": 19931
    },
    {
      "epoch": 0.000121649169921875,
      "step": 19931,
      "training_step_time": 0.45662546157836914
    },
    {
      "epoch": 0.0001216552734375,
      "model_forward_time": 0.11570382118225098,
      "step": 19932
    },
    {
      "epoch": 0.0001216552734375,
      "step": 19932,
      "training_step_time": 0.38966870307922363
    },
    {
      "epoch": 0.000121661376953125,
      "model_forward_time": 0.11416912078857422,
      "step": 19933
    },
    {
      "epoch": 0.000121661376953125,
      "step": 19933,
      "training_step_time": 0.4201653003692627
    },
    {
      "epoch": 0.00012166748046875,
      "model_forward_time": 0.11469841003417969,
      "step": 19934
    },
    {
      "epoch": 0.00012166748046875,
      "step": 19934,
      "training_step_time": 0.4756298065185547
    },
    {
      "epoch": 0.000121673583984375,
      "model_forward_time": 0.1144556999206543,
      "step": 19935
    },
    {
      "epoch": 0.000121673583984375,
      "step": 19935,
      "training_step_time": 0.4593467712402344
    },
    {
      "epoch": 0.0001216796875,
      "model_forward_time": 0.11561775207519531,
      "step": 19936
    },
    {
      "epoch": 0.0001216796875,
      "step": 19936,
      "training_step_time": 0.3885314464569092
    },
    {
      "epoch": 0.000121685791015625,
      "model_forward_time": 0.11471271514892578,
      "step": 19937
    },
    {
      "epoch": 0.000121685791015625,
      "step": 19937,
      "training_step_time": 0.4630129337310791
    },
    {
      "epoch": 0.00012169189453125,
      "model_forward_time": 0.1146702766418457,
      "step": 19938
    },
    {
      "epoch": 0.00012169189453125,
      "step": 19938,
      "training_step_time": 0.4079256057739258
    },
    {
      "epoch": 0.000121697998046875,
      "model_forward_time": 0.1141655445098877,
      "step": 19939
    },
    {
      "epoch": 0.000121697998046875,
      "step": 19939,
      "training_step_time": 0.3850290775299072
    },
    {
      "epoch": 0.0001217041015625,
      "grad_norm": 0.14928708970546722,
      "learning_rate": 7.974483130390604e-05,
      "loss": 0.0524,
      "step": 19940
    },
    {
      "epoch": 0.0001217041015625,
      "model_forward_time": 0.1154947280883789,
      "step": 19940
    },
    {
      "epoch": 0.0001217041015625,
      "step": 19940,
      "training_step_time": 0.4590590000152588
    },
    {
      "epoch": 0.000121710205078125,
      "model_forward_time": 0.11533975601196289,
      "step": 19941
    },
    {
      "epoch": 0.000121710205078125,
      "step": 19941,
      "training_step_time": 0.39835667610168457
    },
    {
      "epoch": 0.00012171630859375,
      "model_forward_time": 0.12479114532470703,
      "step": 19942
    },
    {
      "epoch": 0.00012171630859375,
      "step": 19942,
      "training_step_time": 0.3971219062805176
    },
    {
      "epoch": 0.000121722412109375,
      "model_forward_time": 0.11451411247253418,
      "step": 19943
    },
    {
      "epoch": 0.000121722412109375,
      "step": 19943,
      "training_step_time": 0.4048879146575928
    },
    {
      "epoch": 0.000121728515625,
      "model_forward_time": 0.11493635177612305,
      "step": 19944
    },
    {
      "epoch": 0.000121728515625,
      "step": 19944,
      "training_step_time": 0.40130114555358887
    },
    {
      "epoch": 0.000121734619140625,
      "model_forward_time": 0.11571645736694336,
      "step": 19945
    },
    {
      "epoch": 0.000121734619140625,
      "step": 19945,
      "training_step_time": 0.44189953804016113
    },
    {
      "epoch": 0.00012174072265625,
      "model_forward_time": 0.11665821075439453,
      "step": 19946
    },
    {
      "epoch": 0.00012174072265625,
      "step": 19946,
      "training_step_time": 0.6884627342224121
    },
    {
      "epoch": 0.000121746826171875,
      "model_forward_time": 0.11437058448791504,
      "step": 19947
    },
    {
      "epoch": 0.000121746826171875,
      "step": 19947,
      "training_step_time": 0.445981502532959
    },
    {
      "epoch": 0.0001217529296875,
      "model_forward_time": 0.11469388008117676,
      "step": 19948
    },
    {
      "epoch": 0.0001217529296875,
      "step": 19948,
      "training_step_time": 0.3886678218841553
    },
    {
      "epoch": 0.000121759033203125,
      "model_forward_time": 0.11452746391296387,
      "step": 19949
    },
    {
      "epoch": 0.000121759033203125,
      "step": 19949,
      "training_step_time": 0.41317200660705566
    },
    {
      "epoch": 0.00012176513671875,
      "grad_norm": 0.17174288630485535,
      "learning_rate": 7.972267574208991e-05,
      "loss": 0.0554,
      "step": 19950
    },
    {
      "epoch": 0.00012176513671875,
      "model_forward_time": 0.11421489715576172,
      "step": 19950
    },
    {
      "epoch": 0.00012176513671875,
      "step": 19950,
      "training_step_time": 0.4050452709197998
    },
    {
      "epoch": 0.000121771240234375,
      "model_forward_time": 0.11458325386047363,
      "step": 19951
    },
    {
      "epoch": 0.000121771240234375,
      "step": 19951,
      "training_step_time": 0.42175889015197754
    },
    {
      "epoch": 0.00012177734375,
      "model_forward_time": 0.11465215682983398,
      "step": 19952
    },
    {
      "epoch": 0.00012177734375,
      "step": 19952,
      "training_step_time": 0.45569872856140137
    },
    {
      "epoch": 0.000121783447265625,
      "model_forward_time": 0.11530828475952148,
      "step": 19953
    },
    {
      "epoch": 0.000121783447265625,
      "step": 19953,
      "training_step_time": 0.3863711357116699
    },
    {
      "epoch": 0.00012178955078125,
      "model_forward_time": 0.11632871627807617,
      "step": 19954
    },
    {
      "epoch": 0.00012178955078125,
      "step": 19954,
      "training_step_time": 0.39128971099853516
    },
    {
      "epoch": 0.000121795654296875,
      "model_forward_time": 0.11437058448791504,
      "step": 19955
    },
    {
      "epoch": 0.000121795654296875,
      "step": 19955,
      "training_step_time": 0.4033064842224121
    },
    {
      "epoch": 0.0001218017578125,
      "model_forward_time": 0.11523056030273438,
      "step": 19956
    },
    {
      "epoch": 0.0001218017578125,
      "step": 19956,
      "training_step_time": 0.5104670524597168
    },
    {
      "epoch": 0.000121807861328125,
      "model_forward_time": 0.11513900756835938,
      "step": 19957
    },
    {
      "epoch": 0.000121807861328125,
      "step": 19957,
      "training_step_time": 0.44820690155029297
    },
    {
      "epoch": 0.00012181396484375,
      "model_forward_time": 0.1154787540435791,
      "step": 19958
    },
    {
      "epoch": 0.00012181396484375,
      "step": 19958,
      "training_step_time": 0.5850682258605957
    },
    {
      "epoch": 0.000121820068359375,
      "model_forward_time": 0.11458182334899902,
      "step": 19959
    },
    {
      "epoch": 0.000121820068359375,
      "step": 19959,
      "training_step_time": 0.4397275447845459
    },
    {
      "epoch": 0.000121826171875,
      "grad_norm": 0.14629703760147095,
      "learning_rate": 7.970051115130966e-05,
      "loss": 0.0486,
      "step": 19960
    },
    {
      "epoch": 0.000121826171875,
      "model_forward_time": 0.11470460891723633,
      "step": 19960
    },
    {
      "epoch": 0.000121826171875,
      "step": 19960,
      "training_step_time": 0.39241623878479004
    },
    {
      "epoch": 0.000121832275390625,
      "model_forward_time": 0.11408233642578125,
      "step": 19961
    },
    {
      "epoch": 0.000121832275390625,
      "step": 19961,
      "training_step_time": 0.39208340644836426
    },
    {
      "epoch": 0.00012183837890625,
      "model_forward_time": 0.11483454704284668,
      "step": 19962
    },
    {
      "epoch": 0.00012183837890625,
      "step": 19962,
      "training_step_time": 0.3985905647277832
    },
    {
      "epoch": 0.000121844482421875,
      "model_forward_time": 0.1154172420501709,
      "step": 19963
    },
    {
      "epoch": 0.000121844482421875,
      "step": 19963,
      "training_step_time": 0.4442317485809326
    },
    {
      "epoch": 0.0001218505859375,
      "model_forward_time": 0.11493325233459473,
      "step": 19964
    },
    {
      "epoch": 0.0001218505859375,
      "step": 19964,
      "training_step_time": 0.4678938388824463
    },
    {
      "epoch": 0.000121856689453125,
      "model_forward_time": 0.11492061614990234,
      "step": 19965
    },
    {
      "epoch": 0.000121856689453125,
      "step": 19965,
      "training_step_time": 0.4033665657043457
    },
    {
      "epoch": 0.00012186279296875,
      "model_forward_time": 0.11555814743041992,
      "step": 19966
    },
    {
      "epoch": 0.00012186279296875,
      "step": 19966,
      "training_step_time": 0.38614845275878906
    },
    {
      "epoch": 0.000121868896484375,
      "model_forward_time": 0.11454653739929199,
      "step": 19967
    },
    {
      "epoch": 0.000121868896484375,
      "step": 19967,
      "training_step_time": 0.39305830001831055
    },
    {
      "epoch": 0.000121875,
      "model_forward_time": 0.11536550521850586,
      "step": 19968
    },
    {
      "epoch": 0.000121875,
      "step": 19968,
      "training_step_time": 0.39512014389038086
    },
    {
      "epoch": 0.000121881103515625,
      "model_forward_time": 0.11523318290710449,
      "step": 19969
    },
    {
      "epoch": 0.000121881103515625,
      "step": 19969,
      "training_step_time": 0.41693687438964844
    },
    {
      "epoch": 0.00012188720703125,
      "grad_norm": 0.14025931060314178,
      "learning_rate": 7.96783375382983e-05,
      "loss": 0.0526,
      "step": 19970
    },
    {
      "epoch": 0.00012188720703125,
      "model_forward_time": 0.11526322364807129,
      "step": 19970
    },
    {
      "epoch": 0.00012188720703125,
      "step": 19970,
      "training_step_time": 0.7848312854766846
    },
    {
      "epoch": 0.000121893310546875,
      "model_forward_time": 0.11496376991271973,
      "step": 19971
    },
    {
      "epoch": 0.000121893310546875,
      "step": 19971,
      "training_step_time": 0.3747260570526123
    },
    {
      "epoch": 0.0001218994140625,
      "model_forward_time": 0.11470675468444824,
      "step": 19972
    },
    {
      "epoch": 0.0001218994140625,
      "step": 19972,
      "training_step_time": 0.3919811248779297
    },
    {
      "epoch": 0.000121905517578125,
      "model_forward_time": 0.11450767517089844,
      "step": 19973
    },
    {
      "epoch": 0.000121905517578125,
      "step": 19973,
      "training_step_time": 0.4797835350036621
    },
    {
      "epoch": 0.00012191162109375,
      "model_forward_time": 0.11408233642578125,
      "step": 19974
    },
    {
      "epoch": 0.00012191162109375,
      "step": 19974,
      "training_step_time": 0.4122312068939209
    },
    {
      "epoch": 0.000121917724609375,
      "model_forward_time": 0.11449503898620605,
      "step": 19975
    },
    {
      "epoch": 0.000121917724609375,
      "step": 19975,
      "training_step_time": 0.3877394199371338
    },
    {
      "epoch": 0.000121923828125,
      "model_forward_time": 0.11510515213012695,
      "step": 19976
    },
    {
      "epoch": 0.000121923828125,
      "step": 19976,
      "training_step_time": 0.5094287395477295
    },
    {
      "epoch": 0.000121929931640625,
      "model_forward_time": 0.11459827423095703,
      "step": 19977
    },
    {
      "epoch": 0.000121929931640625,
      "step": 19977,
      "training_step_time": 0.4575047492980957
    },
    {
      "epoch": 0.00012193603515625,
      "model_forward_time": 0.1145484447479248,
      "step": 19978
    },
    {
      "epoch": 0.00012193603515625,
      "step": 19978,
      "training_step_time": 0.3815453052520752
    },
    {
      "epoch": 0.000121942138671875,
      "model_forward_time": 0.11513614654541016,
      "step": 19979
    },
    {
      "epoch": 0.000121942138671875,
      "step": 19979,
      "training_step_time": 0.3846120834350586
    },
    {
      "epoch": 0.0001219482421875,
      "grad_norm": 0.1432732790708542,
      "learning_rate": 7.965615490979163e-05,
      "loss": 0.0486,
      "step": 19980
    },
    {
      "epoch": 0.0001219482421875,
      "model_forward_time": 0.1152656078338623,
      "step": 19980
    },
    {
      "epoch": 0.0001219482421875,
      "step": 19980,
      "training_step_time": 0.3888232707977295
    },
    {
      "epoch": 0.000121954345703125,
      "model_forward_time": 0.11480951309204102,
      "step": 19981
    },
    {
      "epoch": 0.000121954345703125,
      "step": 19981,
      "training_step_time": 0.3831174373626709
    },
    {
      "epoch": 0.00012196044921875,
      "model_forward_time": 0.11509513854980469,
      "step": 19982
    },
    {
      "epoch": 0.00012196044921875,
      "step": 19982,
      "training_step_time": 0.5304250717163086
    },
    {
      "epoch": 0.000121966552734375,
      "model_forward_time": 0.11523175239562988,
      "step": 19983
    },
    {
      "epoch": 0.000121966552734375,
      "step": 19983,
      "training_step_time": 0.46769285202026367
    },
    {
      "epoch": 0.00012197265625,
      "model_forward_time": 0.1154637336730957,
      "step": 19984
    },
    {
      "epoch": 0.00012197265625,
      "step": 19984,
      "training_step_time": 0.4528944492340088
    },
    {
      "epoch": 0.000121978759765625,
      "model_forward_time": 0.11511707305908203,
      "step": 19985
    },
    {
      "epoch": 0.000121978759765625,
      "step": 19985,
      "training_step_time": 0.4334676265716553
    },
    {
      "epoch": 0.00012198486328125,
      "model_forward_time": 0.1141815185546875,
      "step": 19986
    },
    {
      "epoch": 0.00012198486328125,
      "step": 19986,
      "training_step_time": 0.46398258209228516
    },
    {
      "epoch": 0.000121990966796875,
      "model_forward_time": 0.11497950553894043,
      "step": 19987
    },
    {
      "epoch": 0.000121990966796875,
      "step": 19987,
      "training_step_time": 0.4419093132019043
    },
    {
      "epoch": 0.0001219970703125,
      "model_forward_time": 0.1154487133026123,
      "step": 19988
    },
    {
      "epoch": 0.0001219970703125,
      "step": 19988,
      "training_step_time": 0.4067983627319336
    },
    {
      "epoch": 0.000122003173828125,
      "model_forward_time": 0.1145789623260498,
      "step": 19989
    },
    {
      "epoch": 0.000122003173828125,
      "step": 19989,
      "training_step_time": 0.4584696292877197
    },
    {
      "epoch": 0.00012200927734375,
      "grad_norm": 0.19706639647483826,
      "learning_rate": 7.963396327252812e-05,
      "loss": 0.0553,
      "step": 19990
    },
    {
      "epoch": 0.00012200927734375,
      "model_forward_time": 0.11497664451599121,
      "step": 19990
    },
    {
      "epoch": 0.00012200927734375,
      "step": 19990,
      "training_step_time": 0.3895752429962158
    },
    {
      "epoch": 0.000122015380859375,
      "model_forward_time": 0.11557602882385254,
      "step": 19991
    },
    {
      "epoch": 0.000122015380859375,
      "step": 19991,
      "training_step_time": 0.4965224266052246
    },
    {
      "epoch": 0.000122021484375,
      "model_forward_time": 0.11503362655639648,
      "step": 19992
    },
    {
      "epoch": 0.000122021484375,
      "step": 19992,
      "training_step_time": 0.3976914882659912
    },
    {
      "epoch": 0.000122027587890625,
      "model_forward_time": 0.1148233413696289,
      "step": 19993
    },
    {
      "epoch": 0.000122027587890625,
      "step": 19993,
      "training_step_time": 0.38426661491394043
    },
    {
      "epoch": 0.00012203369140625,
      "model_forward_time": 0.11497211456298828,
      "step": 19994
    },
    {
      "epoch": 0.00012203369140625,
      "step": 19994,
      "training_step_time": 0.38878870010375977
    },
    {
      "epoch": 0.000122039794921875,
      "model_forward_time": 0.11409950256347656,
      "step": 19995
    },
    {
      "epoch": 0.000122039794921875,
      "step": 19995,
      "training_step_time": 0.39939045906066895
    },
    {
      "epoch": 0.0001220458984375,
      "model_forward_time": 0.11547684669494629,
      "step": 19996
    },
    {
      "epoch": 0.0001220458984375,
      "step": 19996,
      "training_step_time": 0.3868134021759033
    },
    {
      "epoch": 0.000122052001953125,
      "model_forward_time": 0.1142737865447998,
      "step": 19997
    },
    {
      "epoch": 0.000122052001953125,
      "step": 19997,
      "training_step_time": 0.3850743770599365
    },
    {
      "epoch": 0.00012205810546875,
      "model_forward_time": 0.11541318893432617,
      "step": 19998
    },
    {
      "epoch": 0.00012205810546875,
      "step": 19998,
      "training_step_time": 0.47043704986572266
    },
    {
      "epoch": 0.000122064208984375,
      "model_forward_time": 0.1148829460144043,
      "step": 19999
    },
    {
      "epoch": 0.000122064208984375,
      "step": 19999,
      "training_step_time": 0.4438657760620117
    },
    {
      "epoch": 0.0001220703125,
      "grad_norm": 0.13774451613426208,
      "learning_rate": 7.961176263324901e-05,
      "loss": 0.0487,
      "step": 20000
    },
    {
      "epoch": 0.0001220703125,
      "model_forward_time": 0.11206388473510742,
      "step": 20000
    },
    {
      "epoch": 0.0001220703125,
      "step": 20000,
      "training_step_time": 0.3781394958496094
    },
    {
      "epoch": 0.000122076416015625,
      "model_forward_time": 0.11323976516723633,
      "step": 20001
    },
    {
      "epoch": 0.000122076416015625,
      "step": 20001,
      "training_step_time": 0.4287858009338379
    },
    {
      "epoch": 0.00012208251953125,
      "model_forward_time": 0.11307501792907715,
      "step": 20002
    },
    {
      "epoch": 0.00012208251953125,
      "step": 20002,
      "training_step_time": 0.41600608825683594
    },
    {
      "epoch": 0.000122088623046875,
      "model_forward_time": 0.11363530158996582,
      "step": 20003
    },
    {
      "epoch": 0.000122088623046875,
      "step": 20003,
      "training_step_time": 0.44443511962890625
    },
    {
      "epoch": 0.0001220947265625,
      "model_forward_time": 0.11450862884521484,
      "step": 20004
    },
    {
      "epoch": 0.0001220947265625,
      "step": 20004,
      "training_step_time": 0.3977205753326416
    },
    {
      "epoch": 0.000122100830078125,
      "model_forward_time": 0.1142582893371582,
      "step": 20005
    },
    {
      "epoch": 0.000122100830078125,
      "step": 20005,
      "training_step_time": 0.4687643051147461
    },
    {
      "epoch": 0.00012210693359375,
      "model_forward_time": 0.11426258087158203,
      "step": 20006
    },
    {
      "epoch": 0.00012210693359375,
      "step": 20006,
      "training_step_time": 0.45810794830322266
    },
    {
      "epoch": 0.000122113037109375,
      "model_forward_time": 0.11471104621887207,
      "step": 20007
    },
    {
      "epoch": 0.000122113037109375,
      "step": 20007,
      "training_step_time": 0.38324880599975586
    },
    {
      "epoch": 0.000122119140625,
      "model_forward_time": 0.1145620346069336,
      "step": 20008
    },
    {
      "epoch": 0.000122119140625,
      "step": 20008,
      "training_step_time": 0.4930717945098877
    },
    {
      "epoch": 0.000122125244140625,
      "model_forward_time": 0.11524629592895508,
      "step": 20009
    },
    {
      "epoch": 0.000122125244140625,
      "step": 20009,
      "training_step_time": 0.36960768699645996
    },
    {
      "epoch": 0.00012213134765625,
      "grad_norm": 0.1357692927122116,
      "learning_rate": 7.958955299869825e-05,
      "loss": 0.0503,
      "step": 20010
    },
    {
      "epoch": 0.00012213134765625,
      "model_forward_time": 0.11440896987915039,
      "step": 20010
    },
    {
      "epoch": 0.00012213134765625,
      "step": 20010,
      "training_step_time": 0.38334178924560547
    },
    {
      "epoch": 0.000122137451171875,
      "model_forward_time": 0.11509370803833008,
      "step": 20011
    },
    {
      "epoch": 0.000122137451171875,
      "step": 20011,
      "training_step_time": 0.385221004486084
    },
    {
      "epoch": 0.0001221435546875,
      "model_forward_time": 0.11533927917480469,
      "step": 20012
    },
    {
      "epoch": 0.0001221435546875,
      "step": 20012,
      "training_step_time": 0.39440298080444336
    },
    {
      "epoch": 0.000122149658203125,
      "model_forward_time": 0.11460137367248535,
      "step": 20013
    },
    {
      "epoch": 0.000122149658203125,
      "step": 20013,
      "training_step_time": 0.3912656307220459
    },
    {
      "epoch": 0.00012215576171875,
      "model_forward_time": 0.11547303199768066,
      "step": 20014
    },
    {
      "epoch": 0.00012215576171875,
      "step": 20014,
      "training_step_time": 0.39379143714904785
    },
    {
      "epoch": 0.000122161865234375,
      "model_forward_time": 0.11510777473449707,
      "step": 20015
    },
    {
      "epoch": 0.000122161865234375,
      "step": 20015,
      "training_step_time": 0.394298791885376
    },
    {
      "epoch": 0.00012216796875,
      "model_forward_time": 0.11481499671936035,
      "step": 20016
    },
    {
      "epoch": 0.00012216796875,
      "step": 20016,
      "training_step_time": 0.39167261123657227
    },
    {
      "epoch": 0.000122174072265625,
      "model_forward_time": 0.11552000045776367,
      "step": 20017
    },
    {
      "epoch": 0.000122174072265625,
      "step": 20017,
      "training_step_time": 0.41634535789489746
    },
    {
      "epoch": 0.00012218017578125,
      "model_forward_time": 0.11485433578491211,
      "step": 20018
    },
    {
      "epoch": 0.00012218017578125,
      "step": 20018,
      "training_step_time": 0.36744260787963867
    },
    {
      "epoch": 0.000122186279296875,
      "model_forward_time": 0.1151115894317627,
      "step": 20019
    },
    {
      "epoch": 0.000122186279296875,
      "step": 20019,
      "training_step_time": 0.45624351501464844
    },
    {
      "epoch": 0.0001221923828125,
      "grad_norm": 0.12480585277080536,
      "learning_rate": 7.956733437562259e-05,
      "loss": 0.0496,
      "step": 20020
    },
    {
      "epoch": 0.0001221923828125,
      "model_forward_time": 0.1156456470489502,
      "step": 20020
    },
    {
      "epoch": 0.0001221923828125,
      "step": 20020,
      "training_step_time": 0.44976329803466797
    },
    {
      "epoch": 0.000122198486328125,
      "model_forward_time": 0.11553144454956055,
      "step": 20021
    },
    {
      "epoch": 0.000122198486328125,
      "step": 20021,
      "training_step_time": 0.4073202610015869
    },
    {
      "epoch": 0.00012220458984375,
      "model_forward_time": 0.11505651473999023,
      "step": 20022
    },
    {
      "epoch": 0.00012220458984375,
      "step": 20022,
      "training_step_time": 0.3995535373687744
    },
    {
      "epoch": 0.000122210693359375,
      "model_forward_time": 0.11569643020629883,
      "step": 20023
    },
    {
      "epoch": 0.000122210693359375,
      "step": 20023,
      "training_step_time": 0.42850399017333984
    },
    {
      "epoch": 0.000122216796875,
      "model_forward_time": 0.11600041389465332,
      "step": 20024
    },
    {
      "epoch": 0.000122216796875,
      "step": 20024,
      "training_step_time": 0.3929717540740967
    },
    {
      "epoch": 0.000122222900390625,
      "model_forward_time": 0.11475515365600586,
      "step": 20025
    },
    {
      "epoch": 0.000122222900390625,
      "step": 20025,
      "training_step_time": 0.3954806327819824
    },
    {
      "epoch": 0.00012222900390625,
      "model_forward_time": 0.11570405960083008,
      "step": 20026
    },
    {
      "epoch": 0.00012222900390625,
      "step": 20026,
      "training_step_time": 0.38854050636291504
    },
    {
      "epoch": 0.000122235107421875,
      "model_forward_time": 0.1154012680053711,
      "step": 20027
    },
    {
      "epoch": 0.000122235107421875,
      "step": 20027,
      "training_step_time": 0.402728796005249
    },
    {
      "epoch": 0.0001222412109375,
      "model_forward_time": 0.11493539810180664,
      "step": 20028
    },
    {
      "epoch": 0.0001222412109375,
      "step": 20028,
      "training_step_time": 0.40167689323425293
    },
    {
      "epoch": 0.000122247314453125,
      "model_forward_time": 0.1152198314666748,
      "step": 20029
    },
    {
      "epoch": 0.000122247314453125,
      "step": 20029,
      "training_step_time": 0.3944511413574219
    },
    {
      "epoch": 0.00012225341796875,
      "grad_norm": 0.1568082720041275,
      "learning_rate": 7.954510677077138e-05,
      "loss": 0.0508,
      "step": 20030
    },
    {
      "epoch": 0.00012225341796875,
      "model_forward_time": 0.11511015892028809,
      "step": 20030
    },
    {
      "epoch": 0.00012225341796875,
      "step": 20030,
      "training_step_time": 0.45791149139404297
    },
    {
      "epoch": 0.000122259521484375,
      "model_forward_time": 0.11490774154663086,
      "step": 20031
    },
    {
      "epoch": 0.000122259521484375,
      "step": 20031,
      "training_step_time": 0.40683555603027344
    },
    {
      "epoch": 0.000122265625,
      "model_forward_time": 0.11504364013671875,
      "step": 20032
    },
    {
      "epoch": 0.000122265625,
      "step": 20032,
      "training_step_time": 0.4378321170806885
    },
    {
      "epoch": 0.000122271728515625,
      "model_forward_time": 0.11448264122009277,
      "step": 20033
    },
    {
      "epoch": 0.000122271728515625,
      "step": 20033,
      "training_step_time": 0.36566996574401855
    },
    {
      "epoch": 0.00012227783203125,
      "model_forward_time": 0.11813950538635254,
      "step": 20034
    },
    {
      "epoch": 0.00012227783203125,
      "step": 20034,
      "training_step_time": 0.45880961418151855
    },
    {
      "epoch": 0.000122283935546875,
      "model_forward_time": 0.11438655853271484,
      "step": 20035
    },
    {
      "epoch": 0.000122283935546875,
      "step": 20035,
      "training_step_time": 0.40700435638427734
    },
    {
      "epoch": 0.0001222900390625,
      "model_forward_time": 0.11455321311950684,
      "step": 20036
    },
    {
      "epoch": 0.0001222900390625,
      "step": 20036,
      "training_step_time": 0.43691468238830566
    },
    {
      "epoch": 0.000122296142578125,
      "model_forward_time": 0.1150517463684082,
      "step": 20037
    },
    {
      "epoch": 0.000122296142578125,
      "step": 20037,
      "training_step_time": 0.3879549503326416
    },
    {
      "epoch": 0.00012230224609375,
      "model_forward_time": 0.11416244506835938,
      "step": 20038
    },
    {
      "epoch": 0.00012230224609375,
      "step": 20038,
      "training_step_time": 0.3876655101776123
    },
    {
      "epoch": 0.000122308349609375,
      "model_forward_time": 0.1148226261138916,
      "step": 20039
    },
    {
      "epoch": 0.000122308349609375,
      "step": 20039,
      "training_step_time": 0.39346981048583984
    },
    {
      "epoch": 0.000122314453125,
      "grad_norm": 0.17172595858573914,
      "learning_rate": 7.952287019089685e-05,
      "loss": 0.0535,
      "step": 20040
    },
    {
      "epoch": 0.000122314453125,
      "model_forward_time": 0.11521673202514648,
      "step": 20040
    },
    {
      "epoch": 0.000122314453125,
      "step": 20040,
      "training_step_time": 0.3976423740386963
    },
    {
      "epoch": 0.000122320556640625,
      "model_forward_time": 0.11500716209411621,
      "step": 20041
    },
    {
      "epoch": 0.000122320556640625,
      "step": 20041,
      "training_step_time": 0.3942251205444336
    },
    {
      "epoch": 0.00012232666015625,
      "model_forward_time": 0.11536884307861328,
      "step": 20042
    },
    {
      "epoch": 0.00012232666015625,
      "step": 20042,
      "training_step_time": 0.39571404457092285
    },
    {
      "epoch": 0.000122332763671875,
      "model_forward_time": 0.11578726768493652,
      "step": 20043
    },
    {
      "epoch": 0.000122332763671875,
      "step": 20043,
      "training_step_time": 0.3988606929779053
    },
    {
      "epoch": 0.0001223388671875,
      "model_forward_time": 0.1155235767364502,
      "step": 20044
    },
    {
      "epoch": 0.0001223388671875,
      "step": 20044,
      "training_step_time": 0.3962564468383789
    },
    {
      "epoch": 0.000122344970703125,
      "model_forward_time": 0.11502194404602051,
      "step": 20045
    },
    {
      "epoch": 0.000122344970703125,
      "step": 20045,
      "training_step_time": 0.40730786323547363
    },
    {
      "epoch": 0.00012235107421875,
      "model_forward_time": 0.11508440971374512,
      "step": 20046
    },
    {
      "epoch": 0.00012235107421875,
      "step": 20046,
      "training_step_time": 0.43807387351989746
    },
    {
      "epoch": 0.000122357177734375,
      "model_forward_time": 0.11491847038269043,
      "step": 20047
    },
    {
      "epoch": 0.000122357177734375,
      "step": 20047,
      "training_step_time": 0.39989376068115234
    },
    {
      "epoch": 0.00012236328125,
      "model_forward_time": 0.11544442176818848,
      "step": 20048
    },
    {
      "epoch": 0.00012236328125,
      "step": 20048,
      "training_step_time": 0.4233877658843994
    },
    {
      "epoch": 0.000122369384765625,
      "model_forward_time": 0.11514472961425781,
      "step": 20049
    },
    {
      "epoch": 0.000122369384765625,
      "step": 20049,
      "training_step_time": 0.39326906204223633
    },
    {
      "epoch": 0.00012237548828125,
      "grad_norm": 0.12527675926685333,
      "learning_rate": 7.950062464275387e-05,
      "loss": 0.0486,
      "step": 20050
    },
    {
      "epoch": 0.00012237548828125,
      "model_forward_time": 0.11483478546142578,
      "step": 20050
    },
    {
      "epoch": 0.00012237548828125,
      "step": 20050,
      "training_step_time": 0.4124257564544678
    },
    {
      "epoch": 0.000122381591796875,
      "model_forward_time": 0.11520576477050781,
      "step": 20051
    },
    {
      "epoch": 0.000122381591796875,
      "step": 20051,
      "training_step_time": 0.4326298236846924
    },
    {
      "epoch": 0.0001223876953125,
      "model_forward_time": 0.11449980735778809,
      "step": 20052
    },
    {
      "epoch": 0.0001223876953125,
      "step": 20052,
      "training_step_time": 0.3942880630493164
    },
    {
      "epoch": 0.000122393798828125,
      "model_forward_time": 0.1155703067779541,
      "step": 20053
    },
    {
      "epoch": 0.000122393798828125,
      "step": 20053,
      "training_step_time": 0.3955268859863281
    },
    {
      "epoch": 0.00012239990234375,
      "model_forward_time": 0.1156468391418457,
      "step": 20054
    },
    {
      "epoch": 0.00012239990234375,
      "step": 20054,
      "training_step_time": 0.39446401596069336
    },
    {
      "epoch": 0.000122406005859375,
      "model_forward_time": 0.11523580551147461,
      "step": 20055
    },
    {
      "epoch": 0.000122406005859375,
      "step": 20055,
      "training_step_time": 0.39741063117980957
    },
    {
      "epoch": 0.000122412109375,
      "model_forward_time": 0.11498546600341797,
      "step": 20056
    },
    {
      "epoch": 0.000122412109375,
      "step": 20056,
      "training_step_time": 0.3855311870574951
    },
    {
      "epoch": 0.000122418212890625,
      "model_forward_time": 0.11526799201965332,
      "step": 20057
    },
    {
      "epoch": 0.000122418212890625,
      "step": 20057,
      "training_step_time": 0.40190577507019043
    },
    {
      "epoch": 0.00012242431640625,
      "model_forward_time": 0.11586666107177734,
      "step": 20058
    },
    {
      "epoch": 0.00012242431640625,
      "step": 20058,
      "training_step_time": 0.4006993770599365
    },
    {
      "epoch": 0.000122430419921875,
      "model_forward_time": 0.11472249031066895,
      "step": 20059
    },
    {
      "epoch": 0.000122430419921875,
      "step": 20059,
      "training_step_time": 0.4065241813659668
    },
    {
      "epoch": 0.0001224365234375,
      "grad_norm": 0.12533442676067352,
      "learning_rate": 7.947837013310005e-05,
      "loss": 0.0533,
      "step": 20060
    },
    {
      "epoch": 0.0001224365234375,
      "model_forward_time": 0.11516165733337402,
      "step": 20060
    },
    {
      "epoch": 0.0001224365234375,
      "step": 20060,
      "training_step_time": 0.45900893211364746
    },
    {
      "epoch": 0.000122442626953125,
      "model_forward_time": 0.1155843734741211,
      "step": 20061
    },
    {
      "epoch": 0.000122442626953125,
      "step": 20061,
      "training_step_time": 0.4034736156463623
    },
    {
      "epoch": 0.00012244873046875,
      "model_forward_time": 0.11513042449951172,
      "step": 20062
    },
    {
      "epoch": 0.00012244873046875,
      "step": 20062,
      "training_step_time": 0.39031553268432617
    },
    {
      "epoch": 0.000122454833984375,
      "model_forward_time": 0.1150977611541748,
      "step": 20063
    },
    {
      "epoch": 0.000122454833984375,
      "step": 20063,
      "training_step_time": 0.43187975883483887
    },
    {
      "epoch": 0.0001224609375,
      "model_forward_time": 0.11560463905334473,
      "step": 20064
    },
    {
      "epoch": 0.0001224609375,
      "step": 20064,
      "training_step_time": 0.49714207649230957
    },
    {
      "epoch": 0.000122467041015625,
      "model_forward_time": 0.11531352996826172,
      "step": 20065
    },
    {
      "epoch": 0.000122467041015625,
      "step": 20065,
      "training_step_time": 0.4252750873565674
    },
    {
      "epoch": 0.00012247314453125,
      "model_forward_time": 0.11474609375,
      "step": 20066
    },
    {
      "epoch": 0.00012247314453125,
      "step": 20066,
      "training_step_time": 0.44618964195251465
    },
    {
      "epoch": 0.000122479248046875,
      "model_forward_time": 0.1148979663848877,
      "step": 20067
    },
    {
      "epoch": 0.000122479248046875,
      "step": 20067,
      "training_step_time": 0.4610757827758789
    },
    {
      "epoch": 0.0001224853515625,
      "model_forward_time": 0.1144258975982666,
      "step": 20068
    },
    {
      "epoch": 0.0001224853515625,
      "step": 20068,
      "training_step_time": 0.39528679847717285
    },
    {
      "epoch": 0.000122491455078125,
      "model_forward_time": 0.1143941879272461,
      "step": 20069
    },
    {
      "epoch": 0.000122491455078125,
      "step": 20069,
      "training_step_time": 0.39508605003356934
    },
    {
      "epoch": 0.00012249755859375,
      "grad_norm": 0.12867651879787445,
      "learning_rate": 7.945610666869568e-05,
      "loss": 0.0532,
      "step": 20070
    },
    {
      "epoch": 0.00012249755859375,
      "model_forward_time": 0.11527442932128906,
      "step": 20070
    },
    {
      "epoch": 0.00012249755859375,
      "step": 20070,
      "training_step_time": 0.38121628761291504
    },
    {
      "epoch": 0.000122503662109375,
      "model_forward_time": 0.11434364318847656,
      "step": 20071
    },
    {
      "epoch": 0.000122503662109375,
      "step": 20071,
      "training_step_time": 0.3982579708099365
    },
    {
      "epoch": 0.000122509765625,
      "model_forward_time": 0.11498045921325684,
      "step": 20072
    },
    {
      "epoch": 0.000122509765625,
      "step": 20072,
      "training_step_time": 0.393491268157959
    },
    {
      "epoch": 0.000122515869140625,
      "model_forward_time": 0.11602210998535156,
      "step": 20073
    },
    {
      "epoch": 0.000122515869140625,
      "step": 20073,
      "training_step_time": 0.3934473991394043
    },
    {
      "epoch": 0.00012252197265625,
      "model_forward_time": 0.11499381065368652,
      "step": 20074
    },
    {
      "epoch": 0.00012252197265625,
      "step": 20074,
      "training_step_time": 0.4502899646759033
    },
    {
      "epoch": 0.000122528076171875,
      "model_forward_time": 0.11421489715576172,
      "step": 20075
    },
    {
      "epoch": 0.000122528076171875,
      "step": 20075,
      "training_step_time": 0.4273073673248291
    },
    {
      "epoch": 0.0001225341796875,
      "model_forward_time": 0.11548733711242676,
      "step": 20076
    },
    {
      "epoch": 0.0001225341796875,
      "step": 20076,
      "training_step_time": 0.3880910873413086
    },
    {
      "epoch": 0.000122540283203125,
      "model_forward_time": 0.11505270004272461,
      "step": 20077
    },
    {
      "epoch": 0.000122540283203125,
      "step": 20077,
      "training_step_time": 0.3893122673034668
    },
    {
      "epoch": 0.00012254638671875,
      "model_forward_time": 0.1158444881439209,
      "step": 20078
    },
    {
      "epoch": 0.00012254638671875,
      "step": 20078,
      "training_step_time": 0.4340238571166992
    },
    {
      "epoch": 0.000122552490234375,
      "model_forward_time": 0.11503815650939941,
      "step": 20079
    },
    {
      "epoch": 0.000122552490234375,
      "step": 20079,
      "training_step_time": 0.38701915740966797
    },
    {
      "epoch": 0.00012255859375,
      "grad_norm": 0.1470266729593277,
      "learning_rate": 7.943383425630387e-05,
      "loss": 0.0478,
      "step": 20080
    },
    {
      "epoch": 0.00012255859375,
      "model_forward_time": 0.1154780387878418,
      "step": 20080
    },
    {
      "epoch": 0.00012255859375,
      "step": 20080,
      "training_step_time": 0.4286313056945801
    },
    {
      "epoch": 0.000122564697265625,
      "model_forward_time": 0.11681532859802246,
      "step": 20081
    },
    {
      "epoch": 0.000122564697265625,
      "step": 20081,
      "training_step_time": 0.4023129940032959
    },
    {
      "epoch": 0.00012257080078125,
      "model_forward_time": 0.11631488800048828,
      "step": 20082
    },
    {
      "epoch": 0.00012257080078125,
      "step": 20082,
      "training_step_time": 0.4103271961212158
    },
    {
      "epoch": 0.000122576904296875,
      "model_forward_time": 0.11474275588989258,
      "step": 20083
    },
    {
      "epoch": 0.000122576904296875,
      "step": 20083,
      "training_step_time": 0.3930792808532715
    },
    {
      "epoch": 0.0001225830078125,
      "model_forward_time": 0.11531901359558105,
      "step": 20084
    },
    {
      "epoch": 0.0001225830078125,
      "step": 20084,
      "training_step_time": 0.38697195053100586
    },
    {
      "epoch": 0.000122589111328125,
      "model_forward_time": 0.11533665657043457,
      "step": 20085
    },
    {
      "epoch": 0.000122589111328125,
      "step": 20085,
      "training_step_time": 0.40076351165771484
    },
    {
      "epoch": 0.00012259521484375,
      "model_forward_time": 0.11485123634338379,
      "step": 20086
    },
    {
      "epoch": 0.00012259521484375,
      "step": 20086,
      "training_step_time": 0.396594762802124
    },
    {
      "epoch": 0.000122601318359375,
      "model_forward_time": 0.11535811424255371,
      "step": 20087
    },
    {
      "epoch": 0.000122601318359375,
      "step": 20087,
      "training_step_time": 0.3917224407196045
    },
    {
      "epoch": 0.000122607421875,
      "model_forward_time": 0.11644887924194336,
      "step": 20088
    },
    {
      "epoch": 0.000122607421875,
      "step": 20088,
      "training_step_time": 0.42237019538879395
    },
    {
      "epoch": 0.000122613525390625,
      "model_forward_time": 0.11538124084472656,
      "step": 20089
    },
    {
      "epoch": 0.000122613525390625,
      "step": 20089,
      "training_step_time": 0.4320657253265381
    },
    {
      "epoch": 0.00012261962890625,
      "grad_norm": 0.15706072747707367,
      "learning_rate": 7.941155290269038e-05,
      "loss": 0.0512,
      "step": 20090
    },
    {
      "epoch": 0.00012261962890625,
      "model_forward_time": 0.11503839492797852,
      "step": 20090
    },
    {
      "epoch": 0.00012261962890625,
      "step": 20090,
      "training_step_time": 0.38980698585510254
    },
    {
      "epoch": 0.000122625732421875,
      "model_forward_time": 0.11525130271911621,
      "step": 20091
    },
    {
      "epoch": 0.000122625732421875,
      "step": 20091,
      "training_step_time": 0.47620368003845215
    },
    {
      "epoch": 0.0001226318359375,
      "model_forward_time": 0.11485767364501953,
      "step": 20092
    },
    {
      "epoch": 0.0001226318359375,
      "step": 20092,
      "training_step_time": 0.3920867443084717
    },
    {
      "epoch": 0.000122637939453125,
      "model_forward_time": 0.11546635627746582,
      "step": 20093
    },
    {
      "epoch": 0.000122637939453125,
      "step": 20093,
      "training_step_time": 0.4009406566619873
    },
    {
      "epoch": 0.00012264404296875,
      "model_forward_time": 0.11535406112670898,
      "step": 20094
    },
    {
      "epoch": 0.00012264404296875,
      "step": 20094,
      "training_step_time": 0.433988094329834
    },
    {
      "epoch": 0.000122650146484375,
      "model_forward_time": 0.11519289016723633,
      "step": 20095
    },
    {
      "epoch": 0.000122650146484375,
      "step": 20095,
      "training_step_time": 0.4145236015319824
    },
    {
      "epoch": 0.00012265625,
      "model_forward_time": 0.11521649360656738,
      "step": 20096
    },
    {
      "epoch": 0.00012265625,
      "step": 20096,
      "training_step_time": 0.47255992889404297
    },
    {
      "epoch": 0.000122662353515625,
      "model_forward_time": 0.11528778076171875,
      "step": 20097
    },
    {
      "epoch": 0.000122662353515625,
      "step": 20097,
      "training_step_time": 0.4934091567993164
    },
    {
      "epoch": 0.00012266845703125,
      "model_forward_time": 0.1147775650024414,
      "step": 20098
    },
    {
      "epoch": 0.00012266845703125,
      "step": 20098,
      "training_step_time": 0.38779139518737793
    },
    {
      "epoch": 0.000122674560546875,
      "model_forward_time": 0.11486244201660156,
      "step": 20099
    },
    {
      "epoch": 0.000122674560546875,
      "step": 20099,
      "training_step_time": 0.39490222930908203
    },
    {
      "epoch": 0.0001226806640625,
      "grad_norm": 0.1396983414888382,
      "learning_rate": 7.938926261462366e-05,
      "loss": 0.05,
      "step": 20100
    },
    {
      "epoch": 0.0001226806640625,
      "model_forward_time": 0.1150052547454834,
      "step": 20100
    },
    {
      "epoch": 0.0001226806640625,
      "step": 20100,
      "training_step_time": 0.3959391117095947
    },
    {
      "epoch": 0.000122686767578125,
      "model_forward_time": 0.11617708206176758,
      "step": 20101
    },
    {
      "epoch": 0.000122686767578125,
      "step": 20101,
      "training_step_time": 0.39458584785461426
    },
    {
      "epoch": 0.00012269287109375,
      "model_forward_time": 0.11547565460205078,
      "step": 20102
    },
    {
      "epoch": 0.00012269287109375,
      "step": 20102,
      "training_step_time": 0.3919816017150879
    },
    {
      "epoch": 0.000122698974609375,
      "model_forward_time": 0.11502599716186523,
      "step": 20103
    },
    {
      "epoch": 0.000122698974609375,
      "step": 20103,
      "training_step_time": 0.5701620578765869
    },
    {
      "epoch": 0.000122705078125,
      "model_forward_time": 0.11563515663146973,
      "step": 20104
    },
    {
      "epoch": 0.000122705078125,
      "step": 20104,
      "training_step_time": 0.39119935035705566
    },
    {
      "epoch": 0.000122711181640625,
      "model_forward_time": 0.1151876449584961,
      "step": 20105
    },
    {
      "epoch": 0.000122711181640625,
      "step": 20105,
      "training_step_time": 0.39447569847106934
    },
    {
      "epoch": 0.00012271728515625,
      "model_forward_time": 0.11522078514099121,
      "step": 20106
    },
    {
      "epoch": 0.00012271728515625,
      "step": 20106,
      "training_step_time": 0.39355039596557617
    },
    {
      "epoch": 0.000122723388671875,
      "model_forward_time": 0.11547017097473145,
      "step": 20107
    },
    {
      "epoch": 0.000122723388671875,
      "step": 20107,
      "training_step_time": 0.4011952877044678
    },
    {
      "epoch": 0.0001227294921875,
      "model_forward_time": 0.11570906639099121,
      "step": 20108
    },
    {
      "epoch": 0.0001227294921875,
      "step": 20108,
      "training_step_time": 0.4694054126739502
    },
    {
      "epoch": 0.000122735595703125,
      "model_forward_time": 0.11634349822998047,
      "step": 20109
    },
    {
      "epoch": 0.000122735595703125,
      "step": 20109,
      "training_step_time": 0.6077029705047607
    },
    {
      "epoch": 0.00012274169921875,
      "grad_norm": 0.14565573632717133,
      "learning_rate": 7.936696339887494e-05,
      "loss": 0.0467,
      "step": 20110
    },
    {
      "epoch": 0.00012274169921875,
      "model_forward_time": 0.11468935012817383,
      "step": 20110
    },
    {
      "epoch": 0.00012274169921875,
      "step": 20110,
      "training_step_time": 0.40296244621276855
    },
    {
      "epoch": 0.000122747802734375,
      "model_forward_time": 0.11453437805175781,
      "step": 20111
    },
    {
      "epoch": 0.000122747802734375,
      "step": 20111,
      "training_step_time": 0.38991475105285645
    },
    {
      "epoch": 0.00012275390625,
      "model_forward_time": 0.11562728881835938,
      "step": 20112
    },
    {
      "epoch": 0.00012275390625,
      "step": 20112,
      "training_step_time": 0.42784833908081055
    },
    {
      "epoch": 0.000122760009765625,
      "model_forward_time": 0.11432552337646484,
      "step": 20113
    },
    {
      "epoch": 0.000122760009765625,
      "step": 20113,
      "training_step_time": 0.39207887649536133
    },
    {
      "epoch": 0.00012276611328125,
      "model_forward_time": 0.11497163772583008,
      "step": 20114
    },
    {
      "epoch": 0.00012276611328125,
      "step": 20114,
      "training_step_time": 0.39557409286499023
    },
    {
      "epoch": 0.000122772216796875,
      "model_forward_time": 0.11508750915527344,
      "step": 20115
    },
    {
      "epoch": 0.000122772216796875,
      "step": 20115,
      "training_step_time": 0.5867049694061279
    },
    {
      "epoch": 0.0001227783203125,
      "model_forward_time": 0.1146693229675293,
      "step": 20116
    },
    {
      "epoch": 0.0001227783203125,
      "step": 20116,
      "training_step_time": 0.3903007507324219
    },
    {
      "epoch": 0.000122784423828125,
      "model_forward_time": 0.11577415466308594,
      "step": 20117
    },
    {
      "epoch": 0.000122784423828125,
      "step": 20117,
      "training_step_time": 0.43005895614624023
    },
    {
      "epoch": 0.00012279052734375,
      "model_forward_time": 0.11475515365600586,
      "step": 20118
    },
    {
      "epoch": 0.00012279052734375,
      "step": 20118,
      "training_step_time": 0.40166735649108887
    },
    {
      "epoch": 0.000122796630859375,
      "model_forward_time": 0.11530804634094238,
      "step": 20119
    },
    {
      "epoch": 0.000122796630859375,
      "step": 20119,
      "training_step_time": 0.420398473739624
    },
    {
      "epoch": 0.000122802734375,
      "grad_norm": 0.15468530356884003,
      "learning_rate": 7.934465526221815e-05,
      "loss": 0.0487,
      "step": 20120
    },
    {
      "epoch": 0.000122802734375,
      "model_forward_time": 0.11477780342102051,
      "step": 20120
    },
    {
      "epoch": 0.000122802734375,
      "step": 20120,
      "training_step_time": 0.3832070827484131
    },
    {
      "epoch": 0.000122808837890625,
      "model_forward_time": 0.11546564102172852,
      "step": 20121
    },
    {
      "epoch": 0.000122808837890625,
      "step": 20121,
      "training_step_time": 0.5828938484191895
    },
    {
      "epoch": 0.00012281494140625,
      "model_forward_time": 0.11525106430053711,
      "step": 20122
    },
    {
      "epoch": 0.00012281494140625,
      "step": 20122,
      "training_step_time": 0.4032413959503174
    },
    {
      "epoch": 0.000122821044921875,
      "model_forward_time": 0.11477136611938477,
      "step": 20123
    },
    {
      "epoch": 0.000122821044921875,
      "step": 20123,
      "training_step_time": 0.5050160884857178
    },
    {
      "epoch": 0.0001228271484375,
      "model_forward_time": 0.11476945877075195,
      "step": 20124
    },
    {
      "epoch": 0.0001228271484375,
      "step": 20124,
      "training_step_time": 0.40310239791870117
    },
    {
      "epoch": 0.000122833251953125,
      "model_forward_time": 0.11440777778625488,
      "step": 20125
    },
    {
      "epoch": 0.000122833251953125,
      "step": 20125,
      "training_step_time": 0.3917369842529297
    },
    {
      "epoch": 0.00012283935546875,
      "model_forward_time": 0.11462903022766113,
      "step": 20126
    },
    {
      "epoch": 0.00012283935546875,
      "step": 20126,
      "training_step_time": 0.41678500175476074
    },
    {
      "epoch": 0.000122845458984375,
      "model_forward_time": 0.11523699760437012,
      "step": 20127
    },
    {
      "epoch": 0.000122845458984375,
      "step": 20127,
      "training_step_time": 0.48614954948425293
    },
    {
      "epoch": 0.0001228515625,
      "model_forward_time": 0.11459136009216309,
      "step": 20128
    },
    {
      "epoch": 0.0001228515625,
      "step": 20128,
      "training_step_time": 0.4156160354614258
    },
    {
      "epoch": 0.000122857666015625,
      "model_forward_time": 0.11493229866027832,
      "step": 20129
    },
    {
      "epoch": 0.000122857666015625,
      "step": 20129,
      "training_step_time": 0.43913841247558594
    },
    {
      "epoch": 0.00012286376953125,
      "grad_norm": 0.13763736188411713,
      "learning_rate": 7.932233821142987e-05,
      "loss": 0.0477,
      "step": 20130
    },
    {
      "epoch": 0.00012286376953125,
      "model_forward_time": 0.11464095115661621,
      "step": 20130
    },
    {
      "epoch": 0.00012286376953125,
      "step": 20130,
      "training_step_time": 0.4151637554168701
    },
    {
      "epoch": 0.000122869873046875,
      "model_forward_time": 0.11444520950317383,
      "step": 20131
    },
    {
      "epoch": 0.000122869873046875,
      "step": 20131,
      "training_step_time": 0.41717028617858887
    },
    {
      "epoch": 0.0001228759765625,
      "model_forward_time": 0.11548376083374023,
      "step": 20132
    },
    {
      "epoch": 0.0001228759765625,
      "step": 20132,
      "training_step_time": 0.43319010734558105
    },
    {
      "epoch": 0.000122882080078125,
      "model_forward_time": 0.11495351791381836,
      "step": 20133
    },
    {
      "epoch": 0.000122882080078125,
      "step": 20133,
      "training_step_time": 0.4728844165802002
    },
    {
      "epoch": 0.00012288818359375,
      "model_forward_time": 0.11500358581542969,
      "step": 20134
    },
    {
      "epoch": 0.00012288818359375,
      "step": 20134,
      "training_step_time": 0.3824591636657715
    },
    {
      "epoch": 0.000122894287109375,
      "model_forward_time": 0.11501145362854004,
      "step": 20135
    },
    {
      "epoch": 0.000122894287109375,
      "step": 20135,
      "training_step_time": 0.3881673812866211
    },
    {
      "epoch": 0.000122900390625,
      "model_forward_time": 0.11552095413208008,
      "step": 20136
    },
    {
      "epoch": 0.000122900390625,
      "step": 20136,
      "training_step_time": 0.4924967288970947
    },
    {
      "epoch": 0.000122906494140625,
      "model_forward_time": 0.11897850036621094,
      "step": 20137
    },
    {
      "epoch": 0.000122906494140625,
      "step": 20137,
      "training_step_time": 0.5134310722351074
    },
    {
      "epoch": 0.00012291259765625,
      "model_forward_time": 0.11430859565734863,
      "step": 20138
    },
    {
      "epoch": 0.00012291259765625,
      "step": 20138,
      "training_step_time": 0.44618988037109375
    },
    {
      "epoch": 0.000122918701171875,
      "model_forward_time": 0.11429858207702637,
      "step": 20139
    },
    {
      "epoch": 0.000122918701171875,
      "step": 20139,
      "training_step_time": 0.4798111915588379
    },
    {
      "epoch": 0.0001229248046875,
      "grad_norm": 0.18372568488121033,
      "learning_rate": 7.930001225328946e-05,
      "loss": 0.0543,
      "step": 20140
    },
    {
      "epoch": 0.0001229248046875,
      "model_forward_time": 0.1144101619720459,
      "step": 20140
    },
    {
      "epoch": 0.0001229248046875,
      "step": 20140,
      "training_step_time": 0.40122294425964355
    },
    {
      "epoch": 0.000122930908203125,
      "model_forward_time": 0.11454939842224121,
      "step": 20141
    },
    {
      "epoch": 0.000122930908203125,
      "step": 20141,
      "training_step_time": 0.39256858825683594
    },
    {
      "epoch": 0.00012293701171875,
      "model_forward_time": 0.11449503898620605,
      "step": 20142
    },
    {
      "epoch": 0.00012293701171875,
      "step": 20142,
      "training_step_time": 0.43453478813171387
    },
    {
      "epoch": 0.000122943115234375,
      "model_forward_time": 0.11541342735290527,
      "step": 20143
    },
    {
      "epoch": 0.000122943115234375,
      "step": 20143,
      "training_step_time": 0.38800764083862305
    },
    {
      "epoch": 0.00012294921875,
      "model_forward_time": 0.11485838890075684,
      "step": 20144
    },
    {
      "epoch": 0.00012294921875,
      "step": 20144,
      "training_step_time": 0.3975839614868164
    },
    {
      "epoch": 0.000122955322265625,
      "model_forward_time": 0.11581611633300781,
      "step": 20145
    },
    {
      "epoch": 0.000122955322265625,
      "step": 20145,
      "training_step_time": 0.5559015274047852
    },
    {
      "epoch": 0.00012296142578125,
      "model_forward_time": 0.11601543426513672,
      "step": 20146
    },
    {
      "epoch": 0.00012296142578125,
      "step": 20146,
      "training_step_time": 0.38800477981567383
    },
    {
      "epoch": 0.000122967529296875,
      "model_forward_time": 0.1150054931640625,
      "step": 20147
    },
    {
      "epoch": 0.000122967529296875,
      "step": 20147,
      "training_step_time": 0.4294319152832031
    },
    {
      "epoch": 0.0001229736328125,
      "model_forward_time": 0.11488080024719238,
      "step": 20148
    },
    {
      "epoch": 0.0001229736328125,
      "step": 20148,
      "training_step_time": 0.3977665901184082
    },
    {
      "epoch": 0.000122979736328125,
      "model_forward_time": 0.11613965034484863,
      "step": 20149
    },
    {
      "epoch": 0.000122979736328125,
      "step": 20149,
      "training_step_time": 0.39281582832336426
    },
    {
      "epoch": 0.00012298583984375,
      "grad_norm": 0.1641000211238861,
      "learning_rate": 7.927767739457897e-05,
      "loss": 0.0525,
      "step": 20150
    },
    {
      "epoch": 0.00012298583984375,
      "model_forward_time": 0.1156456470489502,
      "step": 20150
    },
    {
      "epoch": 0.00012298583984375,
      "step": 20150,
      "training_step_time": 0.3896923065185547
    },
    {
      "epoch": 0.000122991943359375,
      "model_forward_time": 0.11555314064025879,
      "step": 20151
    },
    {
      "epoch": 0.000122991943359375,
      "step": 20151,
      "training_step_time": 0.5904519557952881
    },
    {
      "epoch": 0.000122998046875,
      "model_forward_time": 0.11543130874633789,
      "step": 20152
    },
    {
      "epoch": 0.000122998046875,
      "step": 20152,
      "training_step_time": 0.44941139221191406
    },
    {
      "epoch": 0.000123004150390625,
      "model_forward_time": 0.11615157127380371,
      "step": 20153
    },
    {
      "epoch": 0.000123004150390625,
      "step": 20153,
      "training_step_time": 0.39539122581481934
    },
    {
      "epoch": 0.00012301025390625,
      "model_forward_time": 0.11592435836791992,
      "step": 20154
    },
    {
      "epoch": 0.00012301025390625,
      "step": 20154,
      "training_step_time": 0.5262012481689453
    },
    {
      "epoch": 0.000123016357421875,
      "model_forward_time": 0.11623716354370117,
      "step": 20155
    },
    {
      "epoch": 0.000123016357421875,
      "step": 20155,
      "training_step_time": 0.410142183303833
    },
    {
      "epoch": 0.0001230224609375,
      "model_forward_time": 0.11543726921081543,
      "step": 20156
    },
    {
      "epoch": 0.0001230224609375,
      "step": 20156,
      "training_step_time": 0.39124464988708496
    },
    {
      "epoch": 0.000123028564453125,
      "model_forward_time": 0.11454653739929199,
      "step": 20157
    },
    {
      "epoch": 0.000123028564453125,
      "step": 20157,
      "training_step_time": 0.48656463623046875
    },
    {
      "epoch": 0.00012303466796875,
      "model_forward_time": 0.11627626419067383,
      "step": 20158
    },
    {
      "epoch": 0.00012303466796875,
      "step": 20158,
      "training_step_time": 0.3901684284210205
    },
    {
      "epoch": 0.000123040771484375,
      "model_forward_time": 0.11471724510192871,
      "step": 20159
    },
    {
      "epoch": 0.000123040771484375,
      "step": 20159,
      "training_step_time": 0.4328336715698242
    },
    {
      "epoch": 0.000123046875,
      "grad_norm": 0.15876054763793945,
      "learning_rate": 7.925533364208309e-05,
      "loss": 0.0548,
      "step": 20160
    },
    {
      "epoch": 0.000123046875,
      "model_forward_time": 0.11538290977478027,
      "step": 20160
    },
    {
      "epoch": 0.000123046875,
      "step": 20160,
      "training_step_time": 0.4076385498046875
    },
    {
      "epoch": 0.000123052978515625,
      "model_forward_time": 0.11533665657043457,
      "step": 20161
    },
    {
      "epoch": 0.000123052978515625,
      "step": 20161,
      "training_step_time": 0.41083359718322754
    },
    {
      "epoch": 0.00012305908203125,
      "model_forward_time": 0.11586737632751465,
      "step": 20162
    },
    {
      "epoch": 0.00012305908203125,
      "step": 20162,
      "training_step_time": 0.3891947269439697
    },
    {
      "epoch": 0.000123065185546875,
      "model_forward_time": 0.11521005630493164,
      "step": 20163
    },
    {
      "epoch": 0.000123065185546875,
      "step": 20163,
      "training_step_time": 0.5404894351959229
    },
    {
      "epoch": 0.0001230712890625,
      "model_forward_time": 0.1149587631225586,
      "step": 20164
    },
    {
      "epoch": 0.0001230712890625,
      "step": 20164,
      "training_step_time": 0.43463134765625
    },
    {
      "epoch": 0.000123077392578125,
      "model_forward_time": 0.11508560180664062,
      "step": 20165
    },
    {
      "epoch": 0.000123077392578125,
      "step": 20165,
      "training_step_time": 0.42327213287353516
    },
    {
      "epoch": 0.00012308349609375,
      "model_forward_time": 0.11547327041625977,
      "step": 20166
    },
    {
      "epoch": 0.00012308349609375,
      "step": 20166,
      "training_step_time": 0.48794054985046387
    },
    {
      "epoch": 0.000123089599609375,
      "model_forward_time": 0.11488533020019531,
      "step": 20167
    },
    {
      "epoch": 0.000123089599609375,
      "step": 20167,
      "training_step_time": 0.4667017459869385
    },
    {
      "epoch": 0.000123095703125,
      "model_forward_time": 0.1144108772277832,
      "step": 20168
    },
    {
      "epoch": 0.000123095703125,
      "step": 20168,
      "training_step_time": 0.4654524326324463
    },
    {
      "epoch": 0.000123101806640625,
      "model_forward_time": 0.1147465705871582,
      "step": 20169
    },
    {
      "epoch": 0.000123101806640625,
      "step": 20169,
      "training_step_time": 0.3984367847442627
    },
    {
      "epoch": 0.00012310791015625,
      "grad_norm": 0.1534871906042099,
      "learning_rate": 7.923298100258929e-05,
      "loss": 0.0475,
      "step": 20170
    },
    {
      "epoch": 0.00012310791015625,
      "model_forward_time": 0.11478328704833984,
      "step": 20170
    },
    {
      "epoch": 0.00012310791015625,
      "step": 20170,
      "training_step_time": 0.38821983337402344
    },
    {
      "epoch": 0.000123114013671875,
      "model_forward_time": 0.11487770080566406,
      "step": 20171
    },
    {
      "epoch": 0.000123114013671875,
      "step": 20171,
      "training_step_time": 0.3949124813079834
    },
    {
      "epoch": 0.0001231201171875,
      "model_forward_time": 0.11516356468200684,
      "step": 20172
    },
    {
      "epoch": 0.0001231201171875,
      "step": 20172,
      "training_step_time": 0.3973731994628906
    },
    {
      "epoch": 0.000123126220703125,
      "model_forward_time": 0.11525130271911621,
      "step": 20173
    },
    {
      "epoch": 0.000123126220703125,
      "step": 20173,
      "training_step_time": 0.39451098442077637
    },
    {
      "epoch": 0.00012313232421875,
      "model_forward_time": 0.11540961265563965,
      "step": 20174
    },
    {
      "epoch": 0.00012313232421875,
      "step": 20174,
      "training_step_time": 0.48589444160461426
    },
    {
      "epoch": 0.000123138427734375,
      "model_forward_time": 0.11546063423156738,
      "step": 20175
    },
    {
      "epoch": 0.000123138427734375,
      "step": 20175,
      "training_step_time": 0.46302342414855957
    },
    {
      "epoch": 0.00012314453125,
      "model_forward_time": 0.11502361297607422,
      "step": 20176
    },
    {
      "epoch": 0.00012314453125,
      "step": 20176,
      "training_step_time": 0.4005093574523926
    },
    {
      "epoch": 0.000123150634765625,
      "model_forward_time": 0.11557149887084961,
      "step": 20177
    },
    {
      "epoch": 0.000123150634765625,
      "step": 20177,
      "training_step_time": 0.39577221870422363
    },
    {
      "epoch": 0.00012315673828125,
      "model_forward_time": 0.11577296257019043,
      "step": 20178
    },
    {
      "epoch": 0.00012315673828125,
      "step": 20178,
      "training_step_time": 0.4653048515319824
    },
    {
      "epoch": 0.000123162841796875,
      "model_forward_time": 0.11596846580505371,
      "step": 20179
    },
    {
      "epoch": 0.000123162841796875,
      "step": 20179,
      "training_step_time": 0.5127246379852295
    },
    {
      "epoch": 0.0001231689453125,
      "grad_norm": 0.15766042470932007,
      "learning_rate": 7.921061948288773e-05,
      "loss": 0.0529,
      "step": 20180
    },
    {
      "epoch": 0.0001231689453125,
      "model_forward_time": 0.11488723754882812,
      "step": 20180
    },
    {
      "epoch": 0.0001231689453125,
      "step": 20180,
      "training_step_time": 0.44825053215026855
    },
    {
      "epoch": 0.000123175048828125,
      "model_forward_time": 0.11537718772888184,
      "step": 20181
    },
    {
      "epoch": 0.000123175048828125,
      "step": 20181,
      "training_step_time": 0.41938281059265137
    },
    {
      "epoch": 0.00012318115234375,
      "model_forward_time": 0.11957550048828125,
      "step": 20182
    },
    {
      "epoch": 0.00012318115234375,
      "step": 20182,
      "training_step_time": 0.4040088653564453
    },
    {
      "epoch": 0.000123187255859375,
      "model_forward_time": 0.11571455001831055,
      "step": 20183
    },
    {
      "epoch": 0.000123187255859375,
      "step": 20183,
      "training_step_time": 0.38071656227111816
    },
    {
      "epoch": 0.000123193359375,
      "model_forward_time": 0.1159665584564209,
      "step": 20184
    },
    {
      "epoch": 0.000123193359375,
      "step": 20184,
      "training_step_time": 0.3897247314453125
    },
    {
      "epoch": 0.000123199462890625,
      "model_forward_time": 0.11494064331054688,
      "step": 20185
    },
    {
      "epoch": 0.000123199462890625,
      "step": 20185,
      "training_step_time": 0.3973398208618164
    },
    {
      "epoch": 0.00012320556640625,
      "model_forward_time": 0.11551833152770996,
      "step": 20186
    },
    {
      "epoch": 0.00012320556640625,
      "step": 20186,
      "training_step_time": 0.396679162979126
    },
    {
      "epoch": 0.000123211669921875,
      "model_forward_time": 0.11566615104675293,
      "step": 20187
    },
    {
      "epoch": 0.000123211669921875,
      "step": 20187,
      "training_step_time": 0.7273855209350586
    },
    {
      "epoch": 0.0001232177734375,
      "model_forward_time": 0.11482834815979004,
      "step": 20188
    },
    {
      "epoch": 0.0001232177734375,
      "step": 20188,
      "training_step_time": 0.4215977191925049
    },
    {
      "epoch": 0.000123223876953125,
      "model_forward_time": 0.11486458778381348,
      "step": 20189
    },
    {
      "epoch": 0.000123223876953125,
      "step": 20189,
      "training_step_time": 0.4352385997772217
    },
    {
      "epoch": 0.00012322998046875,
      "grad_norm": 0.14710219204425812,
      "learning_rate": 7.918824908977123e-05,
      "loss": 0.051,
      "step": 20190
    },
    {
      "epoch": 0.00012322998046875,
      "model_forward_time": 0.11464548110961914,
      "step": 20190
    },
    {
      "epoch": 0.00012322998046875,
      "step": 20190,
      "training_step_time": 0.38678646087646484
    },
    {
      "epoch": 0.000123236083984375,
      "model_forward_time": 0.11465311050415039,
      "step": 20191
    },
    {
      "epoch": 0.000123236083984375,
      "step": 20191,
      "training_step_time": 0.3924448490142822
    },
    {
      "epoch": 0.0001232421875,
      "model_forward_time": 0.1148064136505127,
      "step": 20192
    },
    {
      "epoch": 0.0001232421875,
      "step": 20192,
      "training_step_time": 0.3642578125
    },
    {
      "epoch": 0.000123248291015625,
      "model_forward_time": 0.1155707836151123,
      "step": 20193
    },
    {
      "epoch": 0.000123248291015625,
      "step": 20193,
      "training_step_time": 0.4875812530517578
    },
    {
      "epoch": 0.00012325439453125,
      "model_forward_time": 0.11529660224914551,
      "step": 20194
    },
    {
      "epoch": 0.00012325439453125,
      "step": 20194,
      "training_step_time": 0.4909811019897461
    },
    {
      "epoch": 0.000123260498046875,
      "model_forward_time": 0.11512422561645508,
      "step": 20195
    },
    {
      "epoch": 0.000123260498046875,
      "step": 20195,
      "training_step_time": 0.39830875396728516
    },
    {
      "epoch": 0.0001232666015625,
      "model_forward_time": 0.11545038223266602,
      "step": 20196
    },
    {
      "epoch": 0.0001232666015625,
      "step": 20196,
      "training_step_time": 0.3891429901123047
    },
    {
      "epoch": 0.000123272705078125,
      "model_forward_time": 0.11474823951721191,
      "step": 20197
    },
    {
      "epoch": 0.000123272705078125,
      "step": 20197,
      "training_step_time": 0.4477698802947998
    },
    {
      "epoch": 0.00012327880859375,
      "model_forward_time": 0.11545372009277344,
      "step": 20198
    },
    {
      "epoch": 0.00012327880859375,
      "step": 20198,
      "training_step_time": 0.38747715950012207
    },
    {
      "epoch": 0.000123284912109375,
      "model_forward_time": 0.11441421508789062,
      "step": 20199
    },
    {
      "epoch": 0.000123284912109375,
      "step": 20199,
      "training_step_time": 0.5337803363800049
    },
    {
      "epoch": 0.000123291015625,
      "grad_norm": 0.10611991584300995,
      "learning_rate": 7.916586983003533e-05,
      "loss": 0.0566,
      "step": 20200
    },
    {
      "epoch": 0.000123291015625,
      "model_forward_time": 0.11519384384155273,
      "step": 20200
    },
    {
      "epoch": 0.000123291015625,
      "step": 20200,
      "training_step_time": 0.3881247043609619
    },
    {
      "epoch": 0.000123297119140625,
      "model_forward_time": 0.11531591415405273,
      "step": 20201
    },
    {
      "epoch": 0.000123297119140625,
      "step": 20201,
      "training_step_time": 0.4056370258331299
    },
    {
      "epoch": 0.00012330322265625,
      "model_forward_time": 0.1159200668334961,
      "step": 20202
    },
    {
      "epoch": 0.00012330322265625,
      "step": 20202,
      "training_step_time": 0.39378905296325684
    },
    {
      "epoch": 0.000123309326171875,
      "model_forward_time": 0.11524391174316406,
      "step": 20203
    },
    {
      "epoch": 0.000123309326171875,
      "step": 20203,
      "training_step_time": 0.415053129196167
    },
    {
      "epoch": 0.0001233154296875,
      "model_forward_time": 0.11588144302368164,
      "step": 20204
    },
    {
      "epoch": 0.0001233154296875,
      "step": 20204,
      "training_step_time": 0.4463956356048584
    },
    {
      "epoch": 0.000123321533203125,
      "model_forward_time": 0.1156916618347168,
      "step": 20205
    },
    {
      "epoch": 0.000123321533203125,
      "step": 20205,
      "training_step_time": 0.5691535472869873
    },
    {
      "epoch": 0.00012332763671875,
      "model_forward_time": 0.11446332931518555,
      "step": 20206
    },
    {
      "epoch": 0.00012332763671875,
      "step": 20206,
      "training_step_time": 0.36522984504699707
    },
    {
      "epoch": 0.000123333740234375,
      "model_forward_time": 0.11487078666687012,
      "step": 20207
    },
    {
      "epoch": 0.000123333740234375,
      "step": 20207,
      "training_step_time": 0.44324564933776855
    },
    {
      "epoch": 0.00012333984375,
      "model_forward_time": 0.11558294296264648,
      "step": 20208
    },
    {
      "epoch": 0.00012333984375,
      "step": 20208,
      "training_step_time": 0.4384944438934326
    },
    {
      "epoch": 0.000123345947265625,
      "model_forward_time": 0.11499142646789551,
      "step": 20209
    },
    {
      "epoch": 0.000123345947265625,
      "step": 20209,
      "training_step_time": 0.4033186435699463
    },
    {
      "epoch": 0.00012335205078125,
      "grad_norm": 0.16465555131435394,
      "learning_rate": 7.914348171047825e-05,
      "loss": 0.0521,
      "step": 20210
    },
    {
      "epoch": 0.00012335205078125,
      "model_forward_time": 0.11475300788879395,
      "step": 20210
    },
    {
      "epoch": 0.00012335205078125,
      "step": 20210,
      "training_step_time": 0.41271424293518066
    },
    {
      "epoch": 0.000123358154296875,
      "model_forward_time": 0.1151578426361084,
      "step": 20211
    },
    {
      "epoch": 0.000123358154296875,
      "step": 20211,
      "training_step_time": 0.5088624954223633
    },
    {
      "epoch": 0.0001233642578125,
      "model_forward_time": 0.11514639854431152,
      "step": 20212
    },
    {
      "epoch": 0.0001233642578125,
      "step": 20212,
      "training_step_time": 0.38425183296203613
    },
    {
      "epoch": 0.000123370361328125,
      "model_forward_time": 0.11483097076416016,
      "step": 20213
    },
    {
      "epoch": 0.000123370361328125,
      "step": 20213,
      "training_step_time": 0.38886451721191406
    },
    {
      "epoch": 0.00012337646484375,
      "model_forward_time": 0.11598324775695801,
      "step": 20214
    },
    {
      "epoch": 0.00012337646484375,
      "step": 20214,
      "training_step_time": 0.390941858291626
    },
    {
      "epoch": 0.000123382568359375,
      "model_forward_time": 0.11441278457641602,
      "step": 20215
    },
    {
      "epoch": 0.000123382568359375,
      "step": 20215,
      "training_step_time": 0.3951132297515869
    },
    {
      "epoch": 0.000123388671875,
      "model_forward_time": 0.11552023887634277,
      "step": 20216
    },
    {
      "epoch": 0.000123388671875,
      "step": 20216,
      "training_step_time": 0.3943965435028076
    },
    {
      "epoch": 0.000123394775390625,
      "model_forward_time": 0.11511826515197754,
      "step": 20217
    },
    {
      "epoch": 0.000123394775390625,
      "step": 20217,
      "training_step_time": 0.5075504779815674
    },
    {
      "epoch": 0.00012340087890625,
      "model_forward_time": 0.11489462852478027,
      "step": 20218
    },
    {
      "epoch": 0.00012340087890625,
      "step": 20218,
      "training_step_time": 0.3948535919189453
    },
    {
      "epoch": 0.000123406982421875,
      "model_forward_time": 0.11568617820739746,
      "step": 20219
    },
    {
      "epoch": 0.000123406982421875,
      "step": 20219,
      "training_step_time": 0.3957710266113281
    },
    {
      "epoch": 0.0001234130859375,
      "grad_norm": 0.1562623828649521,
      "learning_rate": 7.912108473790092e-05,
      "loss": 0.0523,
      "step": 20220
    },
    {
      "epoch": 0.0001234130859375,
      "model_forward_time": 0.11518144607543945,
      "step": 20220
    },
    {
      "epoch": 0.0001234130859375,
      "step": 20220,
      "training_step_time": 0.3934905529022217
    },
    {
      "epoch": 0.000123419189453125,
      "model_forward_time": 0.11542248725891113,
      "step": 20221
    },
    {
      "epoch": 0.000123419189453125,
      "step": 20221,
      "training_step_time": 0.42087793350219727
    },
    {
      "epoch": 0.00012342529296875,
      "model_forward_time": 0.11608529090881348,
      "step": 20222
    },
    {
      "epoch": 0.00012342529296875,
      "step": 20222,
      "training_step_time": 0.49062395095825195
    },
    {
      "epoch": 0.000123431396484375,
      "model_forward_time": 0.11561751365661621,
      "step": 20223
    },
    {
      "epoch": 0.000123431396484375,
      "step": 20223,
      "training_step_time": 0.5774469375610352
    },
    {
      "epoch": 0.0001234375,
      "model_forward_time": 0.11484503746032715,
      "step": 20224
    },
    {
      "epoch": 0.0001234375,
      "step": 20224,
      "training_step_time": 0.4001131057739258
    },
    {
      "epoch": 0.000123443603515625,
      "model_forward_time": 0.1149282455444336,
      "step": 20225
    },
    {
      "epoch": 0.000123443603515625,
      "step": 20225,
      "training_step_time": 0.4322340488433838
    },
    {
      "epoch": 0.00012344970703125,
      "model_forward_time": 0.11453771591186523,
      "step": 20226
    },
    {
      "epoch": 0.00012344970703125,
      "step": 20226,
      "training_step_time": 0.395402193069458
    },
    {
      "epoch": 0.000123455810546875,
      "model_forward_time": 0.11496114730834961,
      "step": 20227
    },
    {
      "epoch": 0.000123455810546875,
      "step": 20227,
      "training_step_time": 0.3902933597564697
    },
    {
      "epoch": 0.0001234619140625,
      "model_forward_time": 0.11476492881774902,
      "step": 20228
    },
    {
      "epoch": 0.0001234619140625,
      "step": 20228,
      "training_step_time": 0.39931344985961914
    },
    {
      "epoch": 0.000123468017578125,
      "model_forward_time": 0.11495041847229004,
      "step": 20229
    },
    {
      "epoch": 0.000123468017578125,
      "step": 20229,
      "training_step_time": 0.5689361095428467
    },
    {
      "epoch": 0.00012347412109375,
      "grad_norm": 0.12090758234262466,
      "learning_rate": 7.909867891910694e-05,
      "loss": 0.0438,
      "step": 20230
    },
    {
      "epoch": 0.00012347412109375,
      "model_forward_time": 0.11488580703735352,
      "step": 20230
    },
    {
      "epoch": 0.00012347412109375,
      "step": 20230,
      "training_step_time": 0.4541003704071045
    },
    {
      "epoch": 0.000123480224609375,
      "model_forward_time": 0.11523103713989258,
      "step": 20231
    },
    {
      "epoch": 0.000123480224609375,
      "step": 20231,
      "training_step_time": 0.43943095207214355
    },
    {
      "epoch": 0.000123486328125,
      "model_forward_time": 0.1150820255279541,
      "step": 20232
    },
    {
      "epoch": 0.000123486328125,
      "step": 20232,
      "training_step_time": 0.3923022747039795
    },
    {
      "epoch": 0.000123492431640625,
      "model_forward_time": 0.11501741409301758,
      "step": 20233
    },
    {
      "epoch": 0.000123492431640625,
      "step": 20233,
      "training_step_time": 0.38861775398254395
    },
    {
      "epoch": 0.00012349853515625,
      "model_forward_time": 0.11498641967773438,
      "step": 20234
    },
    {
      "epoch": 0.00012349853515625,
      "step": 20234,
      "training_step_time": 0.39156436920166016
    },
    {
      "epoch": 0.000123504638671875,
      "model_forward_time": 0.11550164222717285,
      "step": 20235
    },
    {
      "epoch": 0.000123504638671875,
      "step": 20235,
      "training_step_time": 0.5213282108306885
    },
    {
      "epoch": 0.0001235107421875,
      "model_forward_time": 0.1152184009552002,
      "step": 20236
    },
    {
      "epoch": 0.0001235107421875,
      "step": 20236,
      "training_step_time": 0.4243199825286865
    },
    {
      "epoch": 0.000123516845703125,
      "model_forward_time": 0.11494612693786621,
      "step": 20237
    },
    {
      "epoch": 0.000123516845703125,
      "step": 20237,
      "training_step_time": 0.41922855377197266
    },
    {
      "epoch": 0.00012352294921875,
      "model_forward_time": 0.11484766006469727,
      "step": 20238
    },
    {
      "epoch": 0.00012352294921875,
      "step": 20238,
      "training_step_time": 0.3901185989379883
    },
    {
      "epoch": 0.000123529052734375,
      "model_forward_time": 0.11526656150817871,
      "step": 20239
    },
    {
      "epoch": 0.000123529052734375,
      "step": 20239,
      "training_step_time": 0.4307692050933838
    },
    {
      "epoch": 0.00012353515625,
      "grad_norm": 0.1286974996328354,
      "learning_rate": 7.907626426090262e-05,
      "loss": 0.0539,
      "step": 20240
    },
    {
      "epoch": 0.00012353515625,
      "model_forward_time": 0.11457180976867676,
      "step": 20240
    },
    {
      "epoch": 0.00012353515625,
      "step": 20240,
      "training_step_time": 0.4667050838470459
    },
    {
      "epoch": 0.000123541259765625,
      "model_forward_time": 0.11508655548095703,
      "step": 20241
    },
    {
      "epoch": 0.000123541259765625,
      "step": 20241,
      "training_step_time": 0.4911036491394043
    },
    {
      "epoch": 0.00012354736328125,
      "model_forward_time": 0.11491799354553223,
      "step": 20242
    },
    {
      "epoch": 0.00012354736328125,
      "step": 20242,
      "training_step_time": 0.38607215881347656
    },
    {
      "epoch": 0.000123553466796875,
      "model_forward_time": 0.11507201194763184,
      "step": 20243
    },
    {
      "epoch": 0.000123553466796875,
      "step": 20243,
      "training_step_time": 0.3878328800201416
    },
    {
      "epoch": 0.0001235595703125,
      "model_forward_time": 0.11512613296508789,
      "step": 20244
    },
    {
      "epoch": 0.0001235595703125,
      "step": 20244,
      "training_step_time": 0.40480709075927734
    },
    {
      "epoch": 0.000123565673828125,
      "model_forward_time": 0.11553001403808594,
      "step": 20245
    },
    {
      "epoch": 0.000123565673828125,
      "step": 20245,
      "training_step_time": 0.40884900093078613
    },
    {
      "epoch": 0.00012357177734375,
      "model_forward_time": 0.11593151092529297,
      "step": 20246
    },
    {
      "epoch": 0.00012357177734375,
      "step": 20246,
      "training_step_time": 0.42746829986572266
    },
    {
      "epoch": 0.000123577880859375,
      "model_forward_time": 0.11533689498901367,
      "step": 20247
    },
    {
      "epoch": 0.000123577880859375,
      "step": 20247,
      "training_step_time": 0.6303720474243164
    },
    {
      "epoch": 0.000123583984375,
      "model_forward_time": 0.1154937744140625,
      "step": 20248
    },
    {
      "epoch": 0.000123583984375,
      "step": 20248,
      "training_step_time": 0.41613149642944336
    },
    {
      "epoch": 0.000123590087890625,
      "model_forward_time": 0.11437511444091797,
      "step": 20249
    },
    {
      "epoch": 0.000123590087890625,
      "step": 20249,
      "training_step_time": 0.44863247871398926
    },
    {
      "epoch": 0.00012359619140625,
      "grad_norm": 0.12017786502838135,
      "learning_rate": 7.905384077009693e-05,
      "loss": 0.0436,
      "step": 20250
    },
    {
      "epoch": 0.00012359619140625,
      "model_forward_time": 0.11506462097167969,
      "step": 20250
    },
    {
      "epoch": 0.00012359619140625,
      "step": 20250,
      "training_step_time": 0.4706282615661621
    },
    {
      "epoch": 0.000123602294921875,
      "model_forward_time": 0.11571741104125977,
      "step": 20251
    },
    {
      "epoch": 0.000123602294921875,
      "step": 20251,
      "training_step_time": 0.41550707817077637
    },
    {
      "epoch": 0.0001236083984375,
      "model_forward_time": 0.11480236053466797,
      "step": 20252
    },
    {
      "epoch": 0.0001236083984375,
      "step": 20252,
      "training_step_time": 0.3885040283203125
    },
    {
      "epoch": 0.000123614501953125,
      "model_forward_time": 0.11461901664733887,
      "step": 20253
    },
    {
      "epoch": 0.000123614501953125,
      "step": 20253,
      "training_step_time": 0.458301305770874
    },
    {
      "epoch": 0.00012362060546875,
      "model_forward_time": 0.11482691764831543,
      "step": 20254
    },
    {
      "epoch": 0.00012362060546875,
      "step": 20254,
      "training_step_time": 0.44390368461608887
    },
    {
      "epoch": 0.000123626708984375,
      "model_forward_time": 0.11477303504943848,
      "step": 20255
    },
    {
      "epoch": 0.000123626708984375,
      "step": 20255,
      "training_step_time": 0.3903849124908447
    },
    {
      "epoch": 0.0001236328125,
      "model_forward_time": 0.11526989936828613,
      "step": 20256
    },
    {
      "epoch": 0.0001236328125,
      "step": 20256,
      "training_step_time": 0.39603376388549805
    },
    {
      "epoch": 0.000123638916015625,
      "model_forward_time": 0.11446309089660645,
      "step": 20257
    },
    {
      "epoch": 0.000123638916015625,
      "step": 20257,
      "training_step_time": 0.39397764205932617
    },
    {
      "epoch": 0.00012364501953125,
      "model_forward_time": 0.11570286750793457,
      "step": 20258
    },
    {
      "epoch": 0.00012364501953125,
      "step": 20258,
      "training_step_time": 0.4536569118499756
    },
    {
      "epoch": 0.000123651123046875,
      "model_forward_time": 0.1154778003692627,
      "step": 20259
    },
    {
      "epoch": 0.000123651123046875,
      "step": 20259,
      "training_step_time": 0.5844588279724121
    },
    {
      "epoch": 0.0001236572265625,
      "grad_norm": 0.14604291319847107,
      "learning_rate": 7.903140845350153e-05,
      "loss": 0.049,
      "step": 20260
    },
    {
      "epoch": 0.0001236572265625,
      "model_forward_time": 0.11532998085021973,
      "step": 20260
    },
    {
      "epoch": 0.0001236572265625,
      "step": 20260,
      "training_step_time": 0.3909425735473633
    },
    {
      "epoch": 0.000123663330078125,
      "model_forward_time": 0.11474299430847168,
      "step": 20261
    },
    {
      "epoch": 0.000123663330078125,
      "step": 20261,
      "training_step_time": 0.3941068649291992
    },
    {
      "epoch": 0.00012366943359375,
      "model_forward_time": 0.1171255111694336,
      "step": 20262
    },
    {
      "epoch": 0.00012366943359375,
      "step": 20262,
      "training_step_time": 0.42293286323547363
    },
    {
      "epoch": 0.000123675537109375,
      "model_forward_time": 0.1141197681427002,
      "step": 20263
    },
    {
      "epoch": 0.000123675537109375,
      "step": 20263,
      "training_step_time": 0.3608839511871338
    },
    {
      "epoch": 0.000123681640625,
      "model_forward_time": 0.1145467758178711,
      "step": 20264
    },
    {
      "epoch": 0.000123681640625,
      "step": 20264,
      "training_step_time": 0.44617319107055664
    },
    {
      "epoch": 0.000123687744140625,
      "model_forward_time": 0.11550664901733398,
      "step": 20265
    },
    {
      "epoch": 0.000123687744140625,
      "step": 20265,
      "training_step_time": 0.4596133232116699
    },
    {
      "epoch": 0.00012369384765625,
      "model_forward_time": 0.11520576477050781,
      "step": 20266
    },
    {
      "epoch": 0.00012369384765625,
      "step": 20266,
      "training_step_time": 0.39182496070861816
    },
    {
      "epoch": 0.000123699951171875,
      "model_forward_time": 0.11481881141662598,
      "step": 20267
    },
    {
      "epoch": 0.000123699951171875,
      "step": 20267,
      "training_step_time": 0.3851931095123291
    },
    {
      "epoch": 0.0001237060546875,
      "model_forward_time": 0.11483430862426758,
      "step": 20268
    },
    {
      "epoch": 0.0001237060546875,
      "step": 20268,
      "training_step_time": 0.42577457427978516
    },
    {
      "epoch": 0.000123712158203125,
      "model_forward_time": 0.11468839645385742,
      "step": 20269
    },
    {
      "epoch": 0.000123712158203125,
      "step": 20269,
      "training_step_time": 0.39563727378845215
    },
    {
      "epoch": 0.00012371826171875,
      "grad_norm": 0.17352740466594696,
      "learning_rate": 7.900896731793077e-05,
      "loss": 0.0509,
      "step": 20270
    },
    {
      "epoch": 0.00012371826171875,
      "model_forward_time": 0.1144707202911377,
      "step": 20270
    },
    {
      "epoch": 0.00012371826171875,
      "step": 20270,
      "training_step_time": 0.3900797367095947
    },
    {
      "epoch": 0.000123724365234375,
      "model_forward_time": 0.1147770881652832,
      "step": 20271
    },
    {
      "epoch": 0.000123724365234375,
      "step": 20271,
      "training_step_time": 0.551889181137085
    },
    {
      "epoch": 0.00012373046875,
      "model_forward_time": 0.11510753631591797,
      "step": 20272
    },
    {
      "epoch": 0.00012373046875,
      "step": 20272,
      "training_step_time": 0.4699687957763672
    },
    {
      "epoch": 0.000123736572265625,
      "model_forward_time": 0.11435484886169434,
      "step": 20273
    },
    {
      "epoch": 0.000123736572265625,
      "step": 20273,
      "training_step_time": 0.4640939235687256
    },
    {
      "epoch": 0.00012374267578125,
      "model_forward_time": 0.11495184898376465,
      "step": 20274
    },
    {
      "epoch": 0.00012374267578125,
      "step": 20274,
      "training_step_time": 0.39806699752807617
    },
    {
      "epoch": 0.000123748779296875,
      "model_forward_time": 0.11436271667480469,
      "step": 20275
    },
    {
      "epoch": 0.000123748779296875,
      "step": 20275,
      "training_step_time": 0.41046142578125
    },
    {
      "epoch": 0.0001237548828125,
      "model_forward_time": 0.1145944595336914,
      "step": 20276
    },
    {
      "epoch": 0.0001237548828125,
      "step": 20276,
      "training_step_time": 0.38920092582702637
    },
    {
      "epoch": 0.000123760986328125,
      "model_forward_time": 0.11475491523742676,
      "step": 20277
    },
    {
      "epoch": 0.000123760986328125,
      "step": 20277,
      "training_step_time": 0.37720608711242676
    },
    {
      "epoch": 0.00012376708984375,
      "model_forward_time": 0.11432218551635742,
      "step": 20278
    },
    {
      "epoch": 0.00012376708984375,
      "step": 20278,
      "training_step_time": 0.40354347229003906
    },
    {
      "epoch": 0.000123773193359375,
      "model_forward_time": 0.11508965492248535,
      "step": 20279
    },
    {
      "epoch": 0.000123773193359375,
      "step": 20279,
      "training_step_time": 0.49660801887512207
    },
    {
      "epoch": 0.000123779296875,
      "grad_norm": 0.1616278737783432,
      "learning_rate": 7.898651737020166e-05,
      "loss": 0.0483,
      "step": 20280
    },
    {
      "epoch": 0.000123779296875,
      "model_forward_time": 0.11479783058166504,
      "step": 20280
    },
    {
      "epoch": 0.000123779296875,
      "step": 20280,
      "training_step_time": 0.42049551010131836
    },
    {
      "epoch": 0.000123785400390625,
      "model_forward_time": 0.11410045623779297,
      "step": 20281
    },
    {
      "epoch": 0.000123785400390625,
      "step": 20281,
      "training_step_time": 0.40737247467041016
    },
    {
      "epoch": 0.00012379150390625,
      "model_forward_time": 0.1146697998046875,
      "step": 20282
    },
    {
      "epoch": 0.00012379150390625,
      "step": 20282,
      "training_step_time": 0.4283936023712158
    },
    {
      "epoch": 0.000123797607421875,
      "model_forward_time": 0.11538171768188477,
      "step": 20283
    },
    {
      "epoch": 0.000123797607421875,
      "step": 20283,
      "training_step_time": 0.4871976375579834
    },
    {
      "epoch": 0.0001238037109375,
      "model_forward_time": 0.1140279769897461,
      "step": 20284
    },
    {
      "epoch": 0.0001238037109375,
      "step": 20284,
      "training_step_time": 0.3888704776763916
    },
    {
      "epoch": 0.000123809814453125,
      "model_forward_time": 0.1150963306427002,
      "step": 20285
    },
    {
      "epoch": 0.000123809814453125,
      "step": 20285,
      "training_step_time": 0.38757991790771484
    },
    {
      "epoch": 0.00012381591796875,
      "model_forward_time": 0.11552906036376953,
      "step": 20286
    },
    {
      "epoch": 0.00012381591796875,
      "step": 20286,
      "training_step_time": 0.44479894638061523
    },
    {
      "epoch": 0.000123822021484375,
      "model_forward_time": 0.11518621444702148,
      "step": 20287
    },
    {
      "epoch": 0.000123822021484375,
      "step": 20287,
      "training_step_time": 0.40757131576538086
    },
    {
      "epoch": 0.000123828125,
      "model_forward_time": 0.11597681045532227,
      "step": 20288
    },
    {
      "epoch": 0.000123828125,
      "step": 20288,
      "training_step_time": 0.39278674125671387
    },
    {
      "epoch": 0.000123834228515625,
      "model_forward_time": 0.11552619934082031,
      "step": 20289
    },
    {
      "epoch": 0.000123834228515625,
      "step": 20289,
      "training_step_time": 0.6236996650695801
    },
    {
      "epoch": 0.00012384033203125,
      "grad_norm": 0.17808014154434204,
      "learning_rate": 7.896405861713394e-05,
      "loss": 0.0451,
      "step": 20290
    },
    {
      "epoch": 0.00012384033203125,
      "model_forward_time": 0.11488676071166992,
      "step": 20290
    },
    {
      "epoch": 0.00012384033203125,
      "step": 20290,
      "training_step_time": 0.3836801052093506
    },
    {
      "epoch": 0.000123846435546875,
      "model_forward_time": 0.11519432067871094,
      "step": 20291
    },
    {
      "epoch": 0.000123846435546875,
      "step": 20291,
      "training_step_time": 0.3962225914001465
    },
    {
      "epoch": 0.0001238525390625,
      "model_forward_time": 0.11636757850646973,
      "step": 20292
    },
    {
      "epoch": 0.0001238525390625,
      "step": 20292,
      "training_step_time": 0.3949394226074219
    },
    {
      "epoch": 0.000123858642578125,
      "model_forward_time": 0.11526179313659668,
      "step": 20293
    },
    {
      "epoch": 0.000123858642578125,
      "step": 20293,
      "training_step_time": 0.4906270503997803
    },
    {
      "epoch": 0.00012386474609375,
      "model_forward_time": 0.11491107940673828,
      "step": 20294
    },
    {
      "epoch": 0.00012386474609375,
      "step": 20294,
      "training_step_time": 0.4961116313934326
    },
    {
      "epoch": 0.000123870849609375,
      "model_forward_time": 0.11529231071472168,
      "step": 20295
    },
    {
      "epoch": 0.000123870849609375,
      "step": 20295,
      "training_step_time": 0.39613962173461914
    },
    {
      "epoch": 0.000123876953125,
      "model_forward_time": 0.11470818519592285,
      "step": 20296
    },
    {
      "epoch": 0.000123876953125,
      "step": 20296,
      "training_step_time": 0.48391294479370117
    },
    {
      "epoch": 0.000123883056640625,
      "model_forward_time": 0.11524033546447754,
      "step": 20297
    },
    {
      "epoch": 0.000123883056640625,
      "step": 20297,
      "training_step_time": 0.38940882682800293
    },
    {
      "epoch": 0.00012388916015625,
      "model_forward_time": 0.11556077003479004,
      "step": 20298
    },
    {
      "epoch": 0.00012388916015625,
      "step": 20298,
      "training_step_time": 0.39171528816223145
    },
    {
      "epoch": 0.000123895263671875,
      "model_forward_time": 0.11466717720031738,
      "step": 20299
    },
    {
      "epoch": 0.000123895263671875,
      "step": 20299,
      "training_step_time": 0.4099149703979492
    },
    {
      "epoch": 0.0001239013671875,
      "grad_norm": 0.11249715834856033,
      "learning_rate": 7.894159106554997e-05,
      "loss": 0.0465,
      "step": 20300
    },
    {
      "epoch": 0.0001239013671875,
      "model_forward_time": 0.11481595039367676,
      "step": 20300
    },
    {
      "epoch": 0.0001239013671875,
      "step": 20300,
      "training_step_time": 0.407015323638916
    },
    {
      "epoch": 0.000123907470703125,
      "model_forward_time": 0.11475372314453125,
      "step": 20301
    },
    {
      "epoch": 0.000123907470703125,
      "step": 20301,
      "training_step_time": 0.5257387161254883
    },
    {
      "epoch": 0.00012391357421875,
      "model_forward_time": 0.1153109073638916,
      "step": 20302
    },
    {
      "epoch": 0.00012391357421875,
      "step": 20302,
      "training_step_time": 0.4009437561035156
    },
    {
      "epoch": 0.000123919677734375,
      "model_forward_time": 0.11483311653137207,
      "step": 20303
    },
    {
      "epoch": 0.000123919677734375,
      "step": 20303,
      "training_step_time": 0.404782772064209
    },
    {
      "epoch": 0.00012392578125,
      "model_forward_time": 0.11464238166809082,
      "step": 20304
    },
    {
      "epoch": 0.00012392578125,
      "step": 20304,
      "training_step_time": 0.39870381355285645
    },
    {
      "epoch": 0.000123931884765625,
      "model_forward_time": 0.1153562068939209,
      "step": 20305
    },
    {
      "epoch": 0.000123931884765625,
      "step": 20305,
      "training_step_time": 0.4046967029571533
    },
    {
      "epoch": 0.00012393798828125,
      "model_forward_time": 0.11521577835083008,
      "step": 20306
    },
    {
      "epoch": 0.00012393798828125,
      "step": 20306,
      "training_step_time": 0.43695068359375
    },
    {
      "epoch": 0.000123944091796875,
      "model_forward_time": 0.1155390739440918,
      "step": 20307
    },
    {
      "epoch": 0.000123944091796875,
      "step": 20307,
      "training_step_time": 0.6134493350982666
    },
    {
      "epoch": 0.0001239501953125,
      "model_forward_time": 0.1149299144744873,
      "step": 20308
    },
    {
      "epoch": 0.0001239501953125,
      "step": 20308,
      "training_step_time": 0.4456663131713867
    },
    {
      "epoch": 0.000123956298828125,
      "model_forward_time": 0.11466026306152344,
      "step": 20309
    },
    {
      "epoch": 0.000123956298828125,
      "step": 20309,
      "training_step_time": 0.39791321754455566
    },
    {
      "epoch": 0.00012396240234375,
      "grad_norm": 0.13312087953090668,
      "learning_rate": 7.891911472227478e-05,
      "loss": 0.049,
      "step": 20310
    },
    {
      "epoch": 0.00012396240234375,
      "model_forward_time": 0.1147317886352539,
      "step": 20310
    },
    {
      "epoch": 0.00012396240234375,
      "step": 20310,
      "training_step_time": 0.4891948699951172
    },
    {
      "epoch": 0.000123968505859375,
      "model_forward_time": 0.11463308334350586,
      "step": 20311
    },
    {
      "epoch": 0.000123968505859375,
      "step": 20311,
      "training_step_time": 0.38753557205200195
    },
    {
      "epoch": 0.000123974609375,
      "model_forward_time": 0.11499285697937012,
      "step": 20312
    },
    {
      "epoch": 0.000123974609375,
      "step": 20312,
      "training_step_time": 0.3979215621948242
    },
    {
      "epoch": 0.000123980712890625,
      "model_forward_time": 0.11492419242858887,
      "step": 20313
    },
    {
      "epoch": 0.000123980712890625,
      "step": 20313,
      "training_step_time": 0.5025973320007324
    },
    {
      "epoch": 0.00012398681640625,
      "model_forward_time": 0.11506772041320801,
      "step": 20314
    },
    {
      "epoch": 0.00012398681640625,
      "step": 20314,
      "training_step_time": 0.42482709884643555
    },
    {
      "epoch": 0.000123992919921875,
      "model_forward_time": 0.11412453651428223,
      "step": 20315
    },
    {
      "epoch": 0.000123992919921875,
      "step": 20315,
      "training_step_time": 0.41599440574645996
    },
    {
      "epoch": 0.0001239990234375,
      "model_forward_time": 0.11468505859375,
      "step": 20316
    },
    {
      "epoch": 0.0001239990234375,
      "step": 20316,
      "training_step_time": 0.5176401138305664
    },
    {
      "epoch": 0.000124005126953125,
      "model_forward_time": 0.11411452293395996,
      "step": 20317
    },
    {
      "epoch": 0.000124005126953125,
      "step": 20317,
      "training_step_time": 0.3923003673553467
    },
    {
      "epoch": 0.00012401123046875,
      "model_forward_time": 0.11446452140808105,
      "step": 20318
    },
    {
      "epoch": 0.00012401123046875,
      "step": 20318,
      "training_step_time": 0.39385056495666504
    },
    {
      "epoch": 0.000124017333984375,
      "model_forward_time": 0.11439847946166992,
      "step": 20319
    },
    {
      "epoch": 0.000124017333984375,
      "step": 20319,
      "training_step_time": 0.5174558162689209
    },
    {
      "epoch": 0.0001240234375,
      "grad_norm": 0.132886603474617,
      "learning_rate": 7.88966295941361e-05,
      "loss": 0.0465,
      "step": 20320
    },
    {
      "epoch": 0.0001240234375,
      "model_forward_time": 0.11424636840820312,
      "step": 20320
    },
    {
      "epoch": 0.0001240234375,
      "step": 20320,
      "training_step_time": 0.7383720874786377
    },
    {
      "epoch": 0.000124029541015625,
      "model_forward_time": 0.11449027061462402,
      "step": 20321
    },
    {
      "epoch": 0.000124029541015625,
      "step": 20321,
      "training_step_time": 0.4382796287536621
    },
    {
      "epoch": 0.00012403564453125,
      "model_forward_time": 0.11463475227355957,
      "step": 20322
    },
    {
      "epoch": 0.00012403564453125,
      "step": 20322,
      "training_step_time": 0.42711353302001953
    },
    {
      "epoch": 0.000124041748046875,
      "model_forward_time": 0.11453533172607422,
      "step": 20323
    },
    {
      "epoch": 0.000124041748046875,
      "step": 20323,
      "training_step_time": 0.41909289360046387
    },
    {
      "epoch": 0.0001240478515625,
      "model_forward_time": 0.11410903930664062,
      "step": 20324
    },
    {
      "epoch": 0.0001240478515625,
      "step": 20324,
      "training_step_time": 0.38239097595214844
    },
    {
      "epoch": 0.000124053955078125,
      "model_forward_time": 0.11401915550231934,
      "step": 20325
    },
    {
      "epoch": 0.000124053955078125,
      "step": 20325,
      "training_step_time": 0.38953638076782227
    },
    {
      "epoch": 0.00012406005859375,
      "model_forward_time": 0.11527824401855469,
      "step": 20326
    },
    {
      "epoch": 0.00012406005859375,
      "step": 20326,
      "training_step_time": 0.5107929706573486
    },
    {
      "epoch": 0.000124066162109375,
      "model_forward_time": 0.11483073234558105,
      "step": 20327
    },
    {
      "epoch": 0.000124066162109375,
      "step": 20327,
      "training_step_time": 0.41313838958740234
    },
    {
      "epoch": 0.000124072265625,
      "model_forward_time": 0.11435246467590332,
      "step": 20328
    },
    {
      "epoch": 0.000124072265625,
      "step": 20328,
      "training_step_time": 0.4133429527282715
    },
    {
      "epoch": 0.000124078369140625,
      "model_forward_time": 0.11473989486694336,
      "step": 20329
    },
    {
      "epoch": 0.000124078369140625,
      "step": 20329,
      "training_step_time": 0.4548337459564209
    },
    {
      "epoch": 0.00012408447265625,
      "grad_norm": 0.14806699752807617,
      "learning_rate": 7.887413568796433e-05,
      "loss": 0.0483,
      "step": 20330
    },
    {
      "epoch": 0.00012408447265625,
      "model_forward_time": 0.11528372764587402,
      "step": 20330
    },
    {
      "epoch": 0.00012408447265625,
      "step": 20330,
      "training_step_time": 0.39042234420776367
    },
    {
      "epoch": 0.000124090576171875,
      "model_forward_time": 0.11470603942871094,
      "step": 20331
    },
    {
      "epoch": 0.000124090576171875,
      "step": 20331,
      "training_step_time": 0.3903956413269043
    },
    {
      "epoch": 0.0001240966796875,
      "model_forward_time": 0.11562895774841309,
      "step": 20332
    },
    {
      "epoch": 0.0001240966796875,
      "step": 20332,
      "training_step_time": 0.9151790142059326
    },
    {
      "epoch": 0.000124102783203125,
      "model_forward_time": 0.11575508117675781,
      "step": 20333
    },
    {
      "epoch": 0.000124102783203125,
      "step": 20333,
      "training_step_time": 0.448169469833374
    },
    {
      "epoch": 0.00012410888671875,
      "model_forward_time": 0.11489033699035645,
      "step": 20334
    },
    {
      "epoch": 0.00012410888671875,
      "step": 20334,
      "training_step_time": 0.45842742919921875
    },
    {
      "epoch": 0.000124114990234375,
      "model_forward_time": 0.11384868621826172,
      "step": 20335
    },
    {
      "epoch": 0.000124114990234375,
      "step": 20335,
      "training_step_time": 0.42516350746154785
    },
    {
      "epoch": 0.00012412109375,
      "model_forward_time": 0.1143941879272461,
      "step": 20336
    },
    {
      "epoch": 0.00012412109375,
      "step": 20336,
      "training_step_time": 0.4407968521118164
    },
    {
      "epoch": 0.000124127197265625,
      "model_forward_time": 0.11426568031311035,
      "step": 20337
    },
    {
      "epoch": 0.000124127197265625,
      "step": 20337,
      "training_step_time": 0.41942381858825684
    },
    {
      "epoch": 0.00012413330078125,
      "model_forward_time": 0.11431646347045898,
      "step": 20338
    },
    {
      "epoch": 0.00012413330078125,
      "step": 20338,
      "training_step_time": 0.6151988506317139
    },
    {
      "epoch": 0.000124139404296875,
      "model_forward_time": 0.1146235466003418,
      "step": 20339
    },
    {
      "epoch": 0.000124139404296875,
      "step": 20339,
      "training_step_time": 0.393801212310791
    },
    {
      "epoch": 0.0001241455078125,
      "grad_norm": 0.20961681008338928,
      "learning_rate": 7.88516330105925e-05,
      "loss": 0.0571,
      "step": 20340
    },
    {
      "epoch": 0.0001241455078125,
      "model_forward_time": 0.1139223575592041,
      "step": 20340
    },
    {
      "epoch": 0.0001241455078125,
      "step": 20340,
      "training_step_time": 0.4198880195617676
    },
    {
      "epoch": 0.000124151611328125,
      "model_forward_time": 0.1143503189086914,
      "step": 20341
    },
    {
      "epoch": 0.000124151611328125,
      "step": 20341,
      "training_step_time": 0.3948509693145752
    },
    {
      "epoch": 0.00012415771484375,
      "model_forward_time": 0.11499547958374023,
      "step": 20342
    },
    {
      "epoch": 0.00012415771484375,
      "step": 20342,
      "training_step_time": 0.4292335510253906
    },
    {
      "epoch": 0.000124163818359375,
      "model_forward_time": 0.11474919319152832,
      "step": 20343
    },
    {
      "epoch": 0.000124163818359375,
      "step": 20343,
      "training_step_time": 0.38657212257385254
    },
    {
      "epoch": 0.000124169921875,
      "model_forward_time": 0.1155853271484375,
      "step": 20344
    },
    {
      "epoch": 0.000124169921875,
      "step": 20344,
      "training_step_time": 0.3908500671386719
    },
    {
      "epoch": 0.000124176025390625,
      "model_forward_time": 0.11541509628295898,
      "step": 20345
    },
    {
      "epoch": 0.000124176025390625,
      "step": 20345,
      "training_step_time": 0.37739038467407227
    },
    {
      "epoch": 0.00012418212890625,
      "model_forward_time": 0.1152184009552002,
      "step": 20346
    },
    {
      "epoch": 0.00012418212890625,
      "step": 20346,
      "training_step_time": 0.3949759006500244
    },
    {
      "epoch": 0.000124188232421875,
      "model_forward_time": 0.11518573760986328,
      "step": 20347
    },
    {
      "epoch": 0.000124188232421875,
      "step": 20347,
      "training_step_time": 0.5158791542053223
    },
    {
      "epoch": 0.0001241943359375,
      "model_forward_time": 0.11576104164123535,
      "step": 20348
    },
    {
      "epoch": 0.0001241943359375,
      "step": 20348,
      "training_step_time": 0.4926624298095703
    },
    {
      "epoch": 0.000124200439453125,
      "model_forward_time": 0.11436700820922852,
      "step": 20349
    },
    {
      "epoch": 0.000124200439453125,
      "step": 20349,
      "training_step_time": 0.42213940620422363
    },
    {
      "epoch": 0.00012420654296875,
      "grad_norm": 0.12769243121147156,
      "learning_rate": 7.882912156885637e-05,
      "loss": 0.0481,
      "step": 20350
    },
    {
      "epoch": 0.00012420654296875,
      "model_forward_time": 0.11621952056884766,
      "step": 20350
    },
    {
      "epoch": 0.00012420654296875,
      "step": 20350,
      "training_step_time": 0.3905520439147949
    },
    {
      "epoch": 0.000124212646484375,
      "model_forward_time": 0.11438632011413574,
      "step": 20351
    },
    {
      "epoch": 0.000124212646484375,
      "step": 20351,
      "training_step_time": 0.5023791790008545
    },
    {
      "epoch": 0.00012421875,
      "model_forward_time": 0.11478948593139648,
      "step": 20352
    },
    {
      "epoch": 0.00012421875,
      "step": 20352,
      "training_step_time": 0.4269728660583496
    },
    {
      "epoch": 0.000124224853515625,
      "model_forward_time": 0.11498713493347168,
      "step": 20353
    },
    {
      "epoch": 0.000124224853515625,
      "step": 20353,
      "training_step_time": 0.42078518867492676
    },
    {
      "epoch": 0.00012423095703125,
      "model_forward_time": 0.11424088478088379,
      "step": 20354
    },
    {
      "epoch": 0.00012423095703125,
      "step": 20354,
      "training_step_time": 0.43358540534973145
    },
    {
      "epoch": 0.000124237060546875,
      "model_forward_time": 0.1145930290222168,
      "step": 20355
    },
    {
      "epoch": 0.000124237060546875,
      "step": 20355,
      "training_step_time": 0.4780151844024658
    },
    {
      "epoch": 0.0001242431640625,
      "model_forward_time": 0.11472082138061523,
      "step": 20356
    },
    {
      "epoch": 0.0001242431640625,
      "step": 20356,
      "training_step_time": 0.431842565536499
    },
    {
      "epoch": 0.000124249267578125,
      "model_forward_time": 0.1154177188873291,
      "step": 20357
    },
    {
      "epoch": 0.000124249267578125,
      "step": 20357,
      "training_step_time": 0.3822002410888672
    },
    {
      "epoch": 0.00012425537109375,
      "model_forward_time": 0.11565947532653809,
      "step": 20358
    },
    {
      "epoch": 0.00012425537109375,
      "step": 20358,
      "training_step_time": 0.3958101272583008
    },
    {
      "epoch": 0.000124261474609375,
      "model_forward_time": 0.11469268798828125,
      "step": 20359
    },
    {
      "epoch": 0.000124261474609375,
      "step": 20359,
      "training_step_time": 0.40008044242858887
    },
    {
      "epoch": 0.000124267578125,
      "grad_norm": 0.157504141330719,
      "learning_rate": 7.880660136959428e-05,
      "loss": 0.0511,
      "step": 20360
    },
    {
      "epoch": 0.000124267578125,
      "model_forward_time": 0.11477041244506836,
      "step": 20360
    },
    {
      "epoch": 0.000124267578125,
      "step": 20360,
      "training_step_time": 0.39445066452026367
    },
    {
      "epoch": 0.000124273681640625,
      "model_forward_time": 0.1153874397277832,
      "step": 20361
    },
    {
      "epoch": 0.000124273681640625,
      "step": 20361,
      "training_step_time": 0.3983118534088135
    },
    {
      "epoch": 0.00012427978515625,
      "model_forward_time": 0.11593246459960938,
      "step": 20362
    },
    {
      "epoch": 0.00012427978515625,
      "step": 20362,
      "training_step_time": 0.5063533782958984
    },
    {
      "epoch": 0.000124285888671875,
      "model_forward_time": 0.1154634952545166,
      "step": 20363
    },
    {
      "epoch": 0.000124285888671875,
      "step": 20363,
      "training_step_time": 0.4461674690246582
    },
    {
      "epoch": 0.0001242919921875,
      "model_forward_time": 0.1153862476348877,
      "step": 20364
    },
    {
      "epoch": 0.0001242919921875,
      "step": 20364,
      "training_step_time": 0.5037937164306641
    },
    {
      "epoch": 0.000124298095703125,
      "model_forward_time": 0.11525726318359375,
      "step": 20365
    },
    {
      "epoch": 0.000124298095703125,
      "step": 20365,
      "training_step_time": 0.41628146171569824
    },
    {
      "epoch": 0.00012430419921875,
      "model_forward_time": 0.1143653392791748,
      "step": 20366
    },
    {
      "epoch": 0.00012430419921875,
      "step": 20366,
      "training_step_time": 0.3988316059112549
    },
    {
      "epoch": 0.000124310302734375,
      "model_forward_time": 0.115478515625,
      "step": 20367
    },
    {
      "epoch": 0.000124310302734375,
      "step": 20367,
      "training_step_time": 0.3964545726776123
    },
    {
      "epoch": 0.00012431640625,
      "model_forward_time": 0.11473536491394043,
      "step": 20368
    },
    {
      "epoch": 0.00012431640625,
      "step": 20368,
      "training_step_time": 0.3931145668029785
    },
    {
      "epoch": 0.000124322509765625,
      "model_forward_time": 0.11471724510192871,
      "step": 20369
    },
    {
      "epoch": 0.000124322509765625,
      "step": 20369,
      "training_step_time": 0.46126556396484375
    },
    {
      "epoch": 0.00012432861328125,
      "grad_norm": 0.16813082993030548,
      "learning_rate": 7.878407241964729e-05,
      "loss": 0.0461,
      "step": 20370
    },
    {
      "epoch": 0.00012432861328125,
      "model_forward_time": 0.11510396003723145,
      "step": 20370
    },
    {
      "epoch": 0.00012432861328125,
      "step": 20370,
      "training_step_time": 0.43883323669433594
    },
    {
      "epoch": 0.000124334716796875,
      "model_forward_time": 0.11484766006469727,
      "step": 20371
    },
    {
      "epoch": 0.000124334716796875,
      "step": 20371,
      "training_step_time": 0.39638733863830566
    },
    {
      "epoch": 0.0001243408203125,
      "model_forward_time": 0.11543464660644531,
      "step": 20372
    },
    {
      "epoch": 0.0001243408203125,
      "step": 20372,
      "training_step_time": 0.40126919746398926
    },
    {
      "epoch": 0.000124346923828125,
      "model_forward_time": 0.11469030380249023,
      "step": 20373
    },
    {
      "epoch": 0.000124346923828125,
      "step": 20373,
      "training_step_time": 0.40135955810546875
    },
    {
      "epoch": 0.00012435302734375,
      "model_forward_time": 0.11503124237060547,
      "step": 20374
    },
    {
      "epoch": 0.00012435302734375,
      "step": 20374,
      "training_step_time": 0.4060227870941162
    },
    {
      "epoch": 0.000124359130859375,
      "model_forward_time": 0.12604761123657227,
      "step": 20375
    },
    {
      "epoch": 0.000124359130859375,
      "step": 20375,
      "training_step_time": 0.39329028129577637
    },
    {
      "epoch": 0.000124365234375,
      "model_forward_time": 0.11552214622497559,
      "step": 20376
    },
    {
      "epoch": 0.000124365234375,
      "step": 20376,
      "training_step_time": 0.43501949310302734
    },
    {
      "epoch": 0.000124371337890625,
      "model_forward_time": 0.11532402038574219,
      "step": 20377
    },
    {
      "epoch": 0.000124371337890625,
      "step": 20377,
      "training_step_time": 0.4059727191925049
    },
    {
      "epoch": 0.00012437744140625,
      "model_forward_time": 0.1153104305267334,
      "step": 20378
    },
    {
      "epoch": 0.00012437744140625,
      "step": 20378,
      "training_step_time": 0.46455883979797363
    },
    {
      "epoch": 0.000124383544921875,
      "model_forward_time": 0.11510801315307617,
      "step": 20379
    },
    {
      "epoch": 0.000124383544921875,
      "step": 20379,
      "training_step_time": 0.40465259552001953
    },
    {
      "epoch": 0.0001243896484375,
      "grad_norm": 0.14515326917171478,
      "learning_rate": 7.87615347258591e-05,
      "loss": 0.0461,
      "step": 20380
    },
    {
      "epoch": 0.0001243896484375,
      "model_forward_time": 0.11468076705932617,
      "step": 20380
    },
    {
      "epoch": 0.0001243896484375,
      "step": 20380,
      "training_step_time": 0.4140589237213135
    },
    {
      "epoch": 0.000124395751953125,
      "model_forward_time": 0.11506056785583496,
      "step": 20381
    },
    {
      "epoch": 0.000124395751953125,
      "step": 20381,
      "training_step_time": 0.39037489891052246
    },
    {
      "epoch": 0.00012440185546875,
      "model_forward_time": 0.11517548561096191,
      "step": 20382
    },
    {
      "epoch": 0.00012440185546875,
      "step": 20382,
      "training_step_time": 0.39694690704345703
    },
    {
      "epoch": 0.000124407958984375,
      "model_forward_time": 0.11543679237365723,
      "step": 20383
    },
    {
      "epoch": 0.000124407958984375,
      "step": 20383,
      "training_step_time": 0.39640331268310547
    },
    {
      "epoch": 0.0001244140625,
      "model_forward_time": 0.11529207229614258,
      "step": 20384
    },
    {
      "epoch": 0.0001244140625,
      "step": 20384,
      "training_step_time": 0.48342132568359375
    },
    {
      "epoch": 0.000124420166015625,
      "model_forward_time": 0.11642932891845703,
      "step": 20385
    },
    {
      "epoch": 0.000124420166015625,
      "step": 20385,
      "training_step_time": 0.47298407554626465
    },
    {
      "epoch": 0.00012442626953125,
      "model_forward_time": 0.1145927906036377,
      "step": 20386
    },
    {
      "epoch": 0.00012442626953125,
      "step": 20386,
      "training_step_time": 0.38540148735046387
    },
    {
      "epoch": 0.000124432373046875,
      "model_forward_time": 0.11539244651794434,
      "step": 20387
    },
    {
      "epoch": 0.000124432373046875,
      "step": 20387,
      "training_step_time": 0.3973500728607178
    },
    {
      "epoch": 0.0001244384765625,
      "model_forward_time": 0.11469864845275879,
      "step": 20388
    },
    {
      "epoch": 0.0001244384765625,
      "step": 20388,
      "training_step_time": 0.3984558582305908
    },
    {
      "epoch": 0.000124444580078125,
      "model_forward_time": 0.11584115028381348,
      "step": 20389
    },
    {
      "epoch": 0.000124444580078125,
      "step": 20389,
      "training_step_time": 0.39884257316589355
    },
    {
      "epoch": 0.00012445068359375,
      "grad_norm": 0.10665438324213028,
      "learning_rate": 7.873898829507606e-05,
      "loss": 0.0498,
      "step": 20390
    },
    {
      "epoch": 0.00012445068359375,
      "model_forward_time": 0.11597108840942383,
      "step": 20390
    },
    {
      "epoch": 0.00012445068359375,
      "step": 20390,
      "training_step_time": 0.38158178329467773
    },
    {
      "epoch": 0.000124456787109375,
      "model_forward_time": 0.11502337455749512,
      "step": 20391
    },
    {
      "epoch": 0.000124456787109375,
      "step": 20391,
      "training_step_time": 0.43711113929748535
    },
    {
      "epoch": 0.000124462890625,
      "model_forward_time": 0.11546826362609863,
      "step": 20392
    },
    {
      "epoch": 0.000124462890625,
      "step": 20392,
      "training_step_time": 0.4156458377838135
    },
    {
      "epoch": 0.000124468994140625,
      "model_forward_time": 0.11476659774780273,
      "step": 20393
    },
    {
      "epoch": 0.000124468994140625,
      "step": 20393,
      "training_step_time": 0.3854024410247803
    },
    {
      "epoch": 0.00012447509765625,
      "model_forward_time": 0.11591744422912598,
      "step": 20394
    },
    {
      "epoch": 0.00012447509765625,
      "step": 20394,
      "training_step_time": 0.4319648742675781
    },
    {
      "epoch": 0.000124481201171875,
      "model_forward_time": 0.11578059196472168,
      "step": 20395
    },
    {
      "epoch": 0.000124481201171875,
      "step": 20395,
      "training_step_time": 0.41026735305786133
    },
    {
      "epoch": 0.0001244873046875,
      "model_forward_time": 0.11518406867980957,
      "step": 20396
    },
    {
      "epoch": 0.0001244873046875,
      "step": 20396,
      "training_step_time": 0.3998525142669678
    },
    {
      "epoch": 0.000124493408203125,
      "model_forward_time": 0.11485028266906738,
      "step": 20397
    },
    {
      "epoch": 0.000124493408203125,
      "step": 20397,
      "training_step_time": 0.4601624011993408
    },
    {
      "epoch": 0.00012449951171875,
      "model_forward_time": 0.11664199829101562,
      "step": 20398
    },
    {
      "epoch": 0.00012449951171875,
      "step": 20398,
      "training_step_time": 0.41782450675964355
    },
    {
      "epoch": 0.000124505615234375,
      "model_forward_time": 0.11505270004272461,
      "step": 20399
    },
    {
      "epoch": 0.000124505615234375,
      "step": 20399,
      "training_step_time": 0.4303884506225586
    },
    {
      "epoch": 0.00012451171875,
      "grad_norm": 0.20856013894081116,
      "learning_rate": 7.871643313414718e-05,
      "loss": 0.0437,
      "step": 20400
    },
    {
      "epoch": 0.00012451171875,
      "model_forward_time": 0.1147158145904541,
      "step": 20400
    },
    {
      "epoch": 0.00012451171875,
      "step": 20400,
      "training_step_time": 0.4917721748352051
    },
    {
      "epoch": 0.000124517822265625,
      "model_forward_time": 0.11470746994018555,
      "step": 20401
    },
    {
      "epoch": 0.000124517822265625,
      "step": 20401,
      "training_step_time": 0.40366220474243164
    },
    {
      "epoch": 0.00012452392578125,
      "model_forward_time": 0.1153111457824707,
      "step": 20402
    },
    {
      "epoch": 0.00012452392578125,
      "step": 20402,
      "training_step_time": 0.4141819477081299
    },
    {
      "epoch": 0.000124530029296875,
      "model_forward_time": 0.11455249786376953,
      "step": 20403
    },
    {
      "epoch": 0.000124530029296875,
      "step": 20403,
      "training_step_time": 0.5396115779876709
    },
    {
      "epoch": 0.0001245361328125,
      "model_forward_time": 0.11471700668334961,
      "step": 20404
    },
    {
      "epoch": 0.0001245361328125,
      "step": 20404,
      "training_step_time": 0.3636903762817383
    },
    {
      "epoch": 0.000124542236328125,
      "model_forward_time": 0.11440825462341309,
      "step": 20405
    },
    {
      "epoch": 0.000124542236328125,
      "step": 20405,
      "training_step_time": 0.44177675247192383
    },
    {
      "epoch": 0.00012454833984375,
      "model_forward_time": 0.11502361297607422,
      "step": 20406
    },
    {
      "epoch": 0.00012454833984375,
      "step": 20406,
      "training_step_time": 0.42256808280944824
    },
    {
      "epoch": 0.000124554443359375,
      "model_forward_time": 0.11523699760437012,
      "step": 20407
    },
    {
      "epoch": 0.000124554443359375,
      "step": 20407,
      "training_step_time": 0.41098666191101074
    },
    {
      "epoch": 0.000124560546875,
      "model_forward_time": 0.11490392684936523,
      "step": 20408
    },
    {
      "epoch": 0.000124560546875,
      "step": 20408,
      "training_step_time": 0.3991060256958008
    },
    {
      "epoch": 0.000124566650390625,
      "model_forward_time": 0.11519885063171387,
      "step": 20409
    },
    {
      "epoch": 0.000124566650390625,
      "step": 20409,
      "training_step_time": 0.5963389873504639
    },
    {
      "epoch": 0.00012457275390625,
      "grad_norm": 0.20376218855381012,
      "learning_rate": 7.869386924992414e-05,
      "loss": 0.05,
      "step": 20410
    },
    {
      "epoch": 0.00012457275390625,
      "model_forward_time": 0.11426043510437012,
      "step": 20410
    },
    {
      "epoch": 0.00012457275390625,
      "step": 20410,
      "training_step_time": 0.3831779956817627
    },
    {
      "epoch": 0.000124578857421875,
      "model_forward_time": 0.11453461647033691,
      "step": 20411
    },
    {
      "epoch": 0.000124578857421875,
      "step": 20411,
      "training_step_time": 0.39285993576049805
    },
    {
      "epoch": 0.0001245849609375,
      "model_forward_time": 0.11499524116516113,
      "step": 20412
    },
    {
      "epoch": 0.0001245849609375,
      "step": 20412,
      "training_step_time": 0.43277597427368164
    },
    {
      "epoch": 0.000124591064453125,
      "model_forward_time": 0.11478328704833984,
      "step": 20413
    },
    {
      "epoch": 0.000124591064453125,
      "step": 20413,
      "training_step_time": 0.40737247467041016
    },
    {
      "epoch": 0.00012459716796875,
      "model_forward_time": 0.11494255065917969,
      "step": 20414
    },
    {
      "epoch": 0.00012459716796875,
      "step": 20414,
      "training_step_time": 0.4983065128326416
    },
    {
      "epoch": 0.000124603271484375,
      "model_forward_time": 0.11490631103515625,
      "step": 20415
    },
    {
      "epoch": 0.000124603271484375,
      "step": 20415,
      "training_step_time": 0.46998143196105957
    },
    {
      "epoch": 0.000124609375,
      "model_forward_time": 0.11513423919677734,
      "step": 20416
    },
    {
      "epoch": 0.000124609375,
      "step": 20416,
      "training_step_time": 0.3876035213470459
    },
    {
      "epoch": 0.000124615478515625,
      "model_forward_time": 0.11466550827026367,
      "step": 20417
    },
    {
      "epoch": 0.000124615478515625,
      "step": 20417,
      "training_step_time": 0.38877081871032715
    },
    {
      "epoch": 0.00012462158203125,
      "model_forward_time": 0.1145174503326416,
      "step": 20418
    },
    {
      "epoch": 0.00012462158203125,
      "step": 20418,
      "training_step_time": 0.36606812477111816
    },
    {
      "epoch": 0.000124627685546875,
      "model_forward_time": 0.11516928672790527,
      "step": 20419
    },
    {
      "epoch": 0.000124627685546875,
      "step": 20419,
      "training_step_time": 0.44361400604248047
    },
    {
      "epoch": 0.0001246337890625,
      "grad_norm": 0.16333797574043274,
      "learning_rate": 7.867129664926123e-05,
      "loss": 0.0456,
      "step": 20420
    },
    {
      "epoch": 0.0001246337890625,
      "model_forward_time": 0.11520814895629883,
      "step": 20420
    },
    {
      "epoch": 0.0001246337890625,
      "step": 20420,
      "training_step_time": 0.39963364601135254
    },
    {
      "epoch": 0.000124639892578125,
      "model_forward_time": 0.11512494087219238,
      "step": 20421
    },
    {
      "epoch": 0.000124639892578125,
      "step": 20421,
      "training_step_time": 0.4109959602355957
    },
    {
      "epoch": 0.00012464599609375,
      "model_forward_time": 0.11498713493347168,
      "step": 20422
    },
    {
      "epoch": 0.00012464599609375,
      "step": 20422,
      "training_step_time": 0.443436861038208
    },
    {
      "epoch": 0.000124652099609375,
      "model_forward_time": 0.11550688743591309,
      "step": 20423
    },
    {
      "epoch": 0.000124652099609375,
      "step": 20423,
      "training_step_time": 0.4738175868988037
    },
    {
      "epoch": 0.000124658203125,
      "model_forward_time": 0.11595582962036133,
      "step": 20424
    },
    {
      "epoch": 0.000124658203125,
      "step": 20424,
      "training_step_time": 0.3901357650756836
    },
    {
      "epoch": 0.000124664306640625,
      "model_forward_time": 0.1146397590637207,
      "step": 20425
    },
    {
      "epoch": 0.000124664306640625,
      "step": 20425,
      "training_step_time": 0.3824021816253662
    },
    {
      "epoch": 0.00012467041015625,
      "model_forward_time": 0.11541318893432617,
      "step": 20426
    },
    {
      "epoch": 0.00012467041015625,
      "step": 20426,
      "training_step_time": 0.44558215141296387
    },
    {
      "epoch": 0.000124676513671875,
      "model_forward_time": 0.11558794975280762,
      "step": 20427
    },
    {
      "epoch": 0.000124676513671875,
      "step": 20427,
      "training_step_time": 0.4115290641784668
    },
    {
      "epoch": 0.0001246826171875,
      "model_forward_time": 0.11520934104919434,
      "step": 20428
    },
    {
      "epoch": 0.0001246826171875,
      "step": 20428,
      "training_step_time": 0.3991694450378418
    },
    {
      "epoch": 0.000124688720703125,
      "model_forward_time": 0.11533379554748535,
      "step": 20429
    },
    {
      "epoch": 0.000124688720703125,
      "step": 20429,
      "training_step_time": 0.38849878311157227
    },
    {
      "epoch": 0.00012469482421875,
      "grad_norm": 0.15390445291996002,
      "learning_rate": 7.864871533901544e-05,
      "loss": 0.0518,
      "step": 20430
    },
    {
      "epoch": 0.00012469482421875,
      "model_forward_time": 0.11513614654541016,
      "step": 20430
    },
    {
      "epoch": 0.00012469482421875,
      "step": 20430,
      "training_step_time": 0.4006009101867676
    },
    {
      "epoch": 0.000124700927734375,
      "model_forward_time": 0.1144871711730957,
      "step": 20431
    },
    {
      "epoch": 0.000124700927734375,
      "step": 20431,
      "training_step_time": 0.38789939880371094
    },
    {
      "epoch": 0.00012470703125,
      "model_forward_time": 0.11558890342712402,
      "step": 20432
    },
    {
      "epoch": 0.00012470703125,
      "step": 20432,
      "training_step_time": 0.39177417755126953
    },
    {
      "epoch": 0.000124713134765625,
      "model_forward_time": 0.11518096923828125,
      "step": 20433
    },
    {
      "epoch": 0.000124713134765625,
      "step": 20433,
      "training_step_time": 0.541999340057373
    },
    {
      "epoch": 0.00012471923828125,
      "model_forward_time": 0.11466145515441895,
      "step": 20434
    },
    {
      "epoch": 0.00012471923828125,
      "step": 20434,
      "training_step_time": 0.4552175998687744
    },
    {
      "epoch": 0.000124725341796875,
      "model_forward_time": 0.1154336929321289,
      "step": 20435
    },
    {
      "epoch": 0.000124725341796875,
      "step": 20435,
      "training_step_time": 0.38454222679138184
    },
    {
      "epoch": 0.0001247314453125,
      "model_forward_time": 0.11551308631896973,
      "step": 20436
    },
    {
      "epoch": 0.0001247314453125,
      "step": 20436,
      "training_step_time": 0.41405153274536133
    },
    {
      "epoch": 0.000124737548828125,
      "model_forward_time": 0.11522245407104492,
      "step": 20437
    },
    {
      "epoch": 0.000124737548828125,
      "step": 20437,
      "training_step_time": 0.49305272102355957
    },
    {
      "epoch": 0.00012474365234375,
      "model_forward_time": 0.11425185203552246,
      "step": 20438
    },
    {
      "epoch": 0.00012474365234375,
      "step": 20438,
      "training_step_time": 0.3884599208831787
    },
    {
      "epoch": 0.000124749755859375,
      "model_forward_time": 0.1151881217956543,
      "step": 20439
    },
    {
      "epoch": 0.000124749755859375,
      "step": 20439,
      "training_step_time": 0.4855172634124756
    },
    {
      "epoch": 0.000124755859375,
      "grad_norm": 0.15385088324546814,
      "learning_rate": 7.862612532604632e-05,
      "loss": 0.0518,
      "step": 20440
    },
    {
      "epoch": 0.000124755859375,
      "model_forward_time": 0.11455321311950684,
      "step": 20440
    },
    {
      "epoch": 0.000124755859375,
      "step": 20440,
      "training_step_time": 0.40622663497924805
    },
    {
      "epoch": 0.000124761962890625,
      "model_forward_time": 0.11483216285705566,
      "step": 20441
    },
    {
      "epoch": 0.000124761962890625,
      "step": 20441,
      "training_step_time": 0.4057803153991699
    },
    {
      "epoch": 0.00012476806640625,
      "model_forward_time": 0.11536526679992676,
      "step": 20442
    },
    {
      "epoch": 0.00012476806640625,
      "step": 20442,
      "training_step_time": 0.4429497718811035
    },
    {
      "epoch": 0.000124774169921875,
      "model_forward_time": 0.11523199081420898,
      "step": 20443
    },
    {
      "epoch": 0.000124774169921875,
      "step": 20443,
      "training_step_time": 0.39641499519348145
    },
    {
      "epoch": 0.0001247802734375,
      "model_forward_time": 0.1148684024810791,
      "step": 20444
    },
    {
      "epoch": 0.0001247802734375,
      "step": 20444,
      "training_step_time": 0.40561676025390625
    },
    {
      "epoch": 0.000124786376953125,
      "model_forward_time": 0.1153712272644043,
      "step": 20445
    },
    {
      "epoch": 0.000124786376953125,
      "step": 20445,
      "training_step_time": 0.733640193939209
    },
    {
      "epoch": 0.00012479248046875,
      "model_forward_time": 0.11568903923034668,
      "step": 20446
    },
    {
      "epoch": 0.00012479248046875,
      "step": 20446,
      "training_step_time": 0.4075510501861572
    },
    {
      "epoch": 0.000124798583984375,
      "model_forward_time": 0.11431002616882324,
      "step": 20447
    },
    {
      "epoch": 0.000124798583984375,
      "step": 20447,
      "training_step_time": 0.46829938888549805
    },
    {
      "epoch": 0.0001248046875,
      "model_forward_time": 0.11453962326049805,
      "step": 20448
    },
    {
      "epoch": 0.0001248046875,
      "step": 20448,
      "training_step_time": 0.4017033576965332
    },
    {
      "epoch": 0.000124810791015625,
      "model_forward_time": 0.11443257331848145,
      "step": 20449
    },
    {
      "epoch": 0.000124810791015625,
      "step": 20449,
      "training_step_time": 0.4491560459136963
    },
    {
      "epoch": 0.00012481689453125,
      "grad_norm": 0.13836956024169922,
      "learning_rate": 7.860352661721619e-05,
      "loss": 0.0442,
      "step": 20450
    },
    {
      "epoch": 0.00012481689453125,
      "model_forward_time": 0.11442327499389648,
      "step": 20450
    },
    {
      "epoch": 0.00012481689453125,
      "step": 20450,
      "training_step_time": 0.38664865493774414
    },
    {
      "epoch": 0.000124822998046875,
      "model_forward_time": 0.11463689804077148,
      "step": 20451
    },
    {
      "epoch": 0.000124822998046875,
      "step": 20451,
      "training_step_time": 0.40477848052978516
    },
    {
      "epoch": 0.0001248291015625,
      "model_forward_time": 0.1168670654296875,
      "step": 20452
    },
    {
      "epoch": 0.0001248291015625,
      "step": 20452,
      "training_step_time": 0.3843724727630615
    },
    {
      "epoch": 0.000124835205078125,
      "model_forward_time": 0.11487722396850586,
      "step": 20453
    },
    {
      "epoch": 0.000124835205078125,
      "step": 20453,
      "training_step_time": 0.38840246200561523
    },
    {
      "epoch": 0.00012484130859375,
      "model_forward_time": 0.11515069007873535,
      "step": 20454
    },
    {
      "epoch": 0.00012484130859375,
      "step": 20454,
      "training_step_time": 0.40503668785095215
    },
    {
      "epoch": 0.000124847412109375,
      "model_forward_time": 0.11464786529541016,
      "step": 20455
    },
    {
      "epoch": 0.000124847412109375,
      "step": 20455,
      "training_step_time": 0.45977020263671875
    },
    {
      "epoch": 0.000124853515625,
      "model_forward_time": 0.11505460739135742,
      "step": 20456
    },
    {
      "epoch": 0.000124853515625,
      "step": 20456,
      "training_step_time": 0.3945176601409912
    },
    {
      "epoch": 0.000124859619140625,
      "model_forward_time": 0.11510419845581055,
      "step": 20457
    },
    {
      "epoch": 0.000124859619140625,
      "step": 20457,
      "training_step_time": 0.40102529525756836
    },
    {
      "epoch": 0.00012486572265625,
      "model_forward_time": 0.1154181957244873,
      "step": 20458
    },
    {
      "epoch": 0.00012486572265625,
      "step": 20458,
      "training_step_time": 0.3894782066345215
    },
    {
      "epoch": 0.000124871826171875,
      "model_forward_time": 0.11519670486450195,
      "step": 20459
    },
    {
      "epoch": 0.000124871826171875,
      "step": 20459,
      "training_step_time": 0.40801429748535156
    },
    {
      "epoch": 0.0001248779296875,
      "grad_norm": 0.15459701418876648,
      "learning_rate": 7.858091921938988e-05,
      "loss": 0.0469,
      "step": 20460
    },
    {
      "epoch": 0.0001248779296875,
      "model_forward_time": 0.1151728630065918,
      "step": 20460
    },
    {
      "epoch": 0.0001248779296875,
      "step": 20460,
      "training_step_time": 0.402956485748291
    },
    {
      "epoch": 0.000124884033203125,
      "model_forward_time": 0.11549758911132812,
      "step": 20461
    },
    {
      "epoch": 0.000124884033203125,
      "step": 20461,
      "training_step_time": 0.4460008144378662
    },
    {
      "epoch": 0.00012489013671875,
      "model_forward_time": 0.11510515213012695,
      "step": 20462
    },
    {
      "epoch": 0.00012489013671875,
      "step": 20462,
      "training_step_time": 0.40385913848876953
    },
    {
      "epoch": 0.000124896240234375,
      "model_forward_time": 0.11589574813842773,
      "step": 20463
    },
    {
      "epoch": 0.000124896240234375,
      "step": 20463,
      "training_step_time": 0.41265296936035156
    },
    {
      "epoch": 0.00012490234375,
      "model_forward_time": 0.11449837684631348,
      "step": 20464
    },
    {
      "epoch": 0.00012490234375,
      "step": 20464,
      "training_step_time": 0.45122385025024414
    },
    {
      "epoch": 0.000124908447265625,
      "model_forward_time": 0.1147453784942627,
      "step": 20465
    },
    {
      "epoch": 0.000124908447265625,
      "step": 20465,
      "training_step_time": 0.4137406349182129
    },
    {
      "epoch": 0.00012491455078125,
      "model_forward_time": 0.11470317840576172,
      "step": 20466
    },
    {
      "epoch": 0.00012491455078125,
      "step": 20466,
      "training_step_time": 0.42145252227783203
    },
    {
      "epoch": 0.000124920654296875,
      "model_forward_time": 0.1142418384552002,
      "step": 20467
    },
    {
      "epoch": 0.000124920654296875,
      "step": 20467,
      "training_step_time": 0.39098477363586426
    },
    {
      "epoch": 0.0001249267578125,
      "model_forward_time": 0.11495065689086914,
      "step": 20468
    },
    {
      "epoch": 0.0001249267578125,
      "step": 20468,
      "training_step_time": 0.39047741889953613
    },
    {
      "epoch": 0.000124932861328125,
      "model_forward_time": 0.11526226997375488,
      "step": 20469
    },
    {
      "epoch": 0.000124932861328125,
      "step": 20469,
      "training_step_time": 0.3849475383758545
    },
    {
      "epoch": 0.00012493896484375,
      "grad_norm": 0.16816546022891998,
      "learning_rate": 7.855830313943497e-05,
      "loss": 0.0479,
      "step": 20470
    },
    {
      "epoch": 0.00012493896484375,
      "model_forward_time": 0.1147310733795166,
      "step": 20470
    },
    {
      "epoch": 0.00012493896484375,
      "step": 20470,
      "training_step_time": 0.43183159828186035
    },
    {
      "epoch": 0.000124945068359375,
      "model_forward_time": 0.11538815498352051,
      "step": 20471
    },
    {
      "epoch": 0.000124945068359375,
      "step": 20471,
      "training_step_time": 0.4115030765533447
    },
    {
      "epoch": 0.000124951171875,
      "model_forward_time": 0.11518526077270508,
      "step": 20472
    },
    {
      "epoch": 0.000124951171875,
      "step": 20472,
      "training_step_time": 0.4944941997528076
    },
    {
      "epoch": 0.000124957275390625,
      "model_forward_time": 0.1154778003692627,
      "step": 20473
    },
    {
      "epoch": 0.000124957275390625,
      "step": 20473,
      "training_step_time": 0.3950474262237549
    },
    {
      "epoch": 0.00012496337890625,
      "model_forward_time": 0.11449003219604492,
      "step": 20474
    },
    {
      "epoch": 0.00012496337890625,
      "step": 20474,
      "training_step_time": 0.43136167526245117
    },
    {
      "epoch": 0.000124969482421875,
      "model_forward_time": 0.11463189125061035,
      "step": 20475
    },
    {
      "epoch": 0.000124969482421875,
      "step": 20475,
      "training_step_time": 0.36499524116516113
    },
    {
      "epoch": 0.0001249755859375,
      "model_forward_time": 0.11532115936279297,
      "step": 20476
    },
    {
      "epoch": 0.0001249755859375,
      "step": 20476,
      "training_step_time": 0.41406869888305664
    },
    {
      "epoch": 0.000124981689453125,
      "model_forward_time": 0.11418366432189941,
      "step": 20477
    },
    {
      "epoch": 0.000124981689453125,
      "step": 20477,
      "training_step_time": 0.41551947593688965
    },
    {
      "epoch": 0.00012498779296875,
      "model_forward_time": 0.11461400985717773,
      "step": 20478
    },
    {
      "epoch": 0.00012498779296875,
      "step": 20478,
      "training_step_time": 0.4470973014831543
    },
    {
      "epoch": 0.000124993896484375,
      "model_forward_time": 0.11415600776672363,
      "step": 20479
    },
    {
      "epoch": 0.000124993896484375,
      "step": 20479,
      "training_step_time": 0.3948945999145508
    },
    {
      "epoch": 0.000125,
      "grad_norm": 0.17210900783538818,
      "learning_rate": 7.85356783842216e-05,
      "loss": 0.0457,
      "step": 20480
    },
    {
      "epoch": 0.000125,
      "model_forward_time": 0.11434602737426758,
      "step": 20480
    },
    {
      "epoch": 0.000125,
      "step": 20480,
      "training_step_time": 0.44496941566467285
    },
    {
      "epoch": 0.000125006103515625,
      "model_forward_time": 0.11420845985412598,
      "step": 20481
    },
    {
      "epoch": 0.000125006103515625,
      "step": 20481,
      "training_step_time": 0.40192580223083496
    },
    {
      "epoch": 0.00012501220703125,
      "model_forward_time": 0.11430120468139648,
      "step": 20482
    },
    {
      "epoch": 0.00012501220703125,
      "step": 20482,
      "training_step_time": 0.3940284252166748
    },
    {
      "epoch": 0.000125018310546875,
      "model_forward_time": 0.11496925354003906,
      "step": 20483
    },
    {
      "epoch": 0.000125018310546875,
      "step": 20483,
      "training_step_time": 0.4040791988372803
    },
    {
      "epoch": 0.0001250244140625,
      "model_forward_time": 0.11492013931274414,
      "step": 20484
    },
    {
      "epoch": 0.0001250244140625,
      "step": 20484,
      "training_step_time": 0.4648773670196533
    },
    {
      "epoch": 0.000125030517578125,
      "model_forward_time": 0.11531186103820801,
      "step": 20485
    },
    {
      "epoch": 0.000125030517578125,
      "step": 20485,
      "training_step_time": 0.4109530448913574
    },
    {
      "epoch": 0.00012503662109375,
      "model_forward_time": 0.11513376235961914,
      "step": 20486
    },
    {
      "epoch": 0.00012503662109375,
      "step": 20486,
      "training_step_time": 0.41831350326538086
    },
    {
      "epoch": 0.000125042724609375,
      "model_forward_time": 0.11658811569213867,
      "step": 20487
    },
    {
      "epoch": 0.000125042724609375,
      "step": 20487,
      "training_step_time": 0.397904634475708
    },
    {
      "epoch": 0.000125048828125,
      "model_forward_time": 0.11579465866088867,
      "step": 20488
    },
    {
      "epoch": 0.000125048828125,
      "step": 20488,
      "training_step_time": 0.4179956912994385
    },
    {
      "epoch": 0.000125054931640625,
      "model_forward_time": 0.1152808666229248,
      "step": 20489
    },
    {
      "epoch": 0.000125054931640625,
      "step": 20489,
      "training_step_time": 0.3991668224334717
    },
    {
      "epoch": 0.00012506103515625,
      "grad_norm": 0.12591317296028137,
      "learning_rate": 7.851304496062254e-05,
      "loss": 0.0469,
      "step": 20490
    },
    {
      "epoch": 0.00012506103515625,
      "model_forward_time": 0.11527204513549805,
      "step": 20490
    },
    {
      "epoch": 0.00012506103515625,
      "step": 20490,
      "training_step_time": 0.36532068252563477
    },
    {
      "epoch": 0.000125067138671875,
      "model_forward_time": 0.11504435539245605,
      "step": 20491
    },
    {
      "epoch": 0.000125067138671875,
      "step": 20491,
      "training_step_time": 0.38936281204223633
    },
    {
      "epoch": 0.0001250732421875,
      "model_forward_time": 0.11533069610595703,
      "step": 20492
    },
    {
      "epoch": 0.0001250732421875,
      "step": 20492,
      "training_step_time": 0.48623156547546387
    },
    {
      "epoch": 0.000125079345703125,
      "model_forward_time": 0.11448144912719727,
      "step": 20493
    },
    {
      "epoch": 0.000125079345703125,
      "step": 20493,
      "training_step_time": 0.45559000968933105
    },
    {
      "epoch": 0.00012508544921875,
      "model_forward_time": 0.11526727676391602,
      "step": 20494
    },
    {
      "epoch": 0.00012508544921875,
      "step": 20494,
      "training_step_time": 0.3965756893157959
    },
    {
      "epoch": 0.000125091552734375,
      "model_forward_time": 0.11437845230102539,
      "step": 20495
    },
    {
      "epoch": 0.000125091552734375,
      "step": 20495,
      "training_step_time": 0.41826486587524414
    },
    {
      "epoch": 0.00012509765625,
      "model_forward_time": 0.1150979995727539,
      "step": 20496
    },
    {
      "epoch": 0.00012509765625,
      "step": 20496,
      "training_step_time": 0.39412879943847656
    },
    {
      "epoch": 0.000125103759765625,
      "model_forward_time": 0.1147005558013916,
      "step": 20497
    },
    {
      "epoch": 0.000125103759765625,
      "step": 20497,
      "training_step_time": 0.3880312442779541
    },
    {
      "epoch": 0.00012510986328125,
      "model_forward_time": 0.11547517776489258,
      "step": 20498
    },
    {
      "epoch": 0.00012510986328125,
      "step": 20498,
      "training_step_time": 0.42321228981018066
    },
    {
      "epoch": 0.000125115966796875,
      "model_forward_time": 0.11445975303649902,
      "step": 20499
    },
    {
      "epoch": 0.000125115966796875,
      "step": 20499,
      "training_step_time": 0.3931901454925537
    },
    {
      "epoch": 0.0001251220703125,
      "grad_norm": 0.16451051831245422,
      "learning_rate": 7.849040287551331e-05,
      "loss": 0.0456,
      "step": 20500
    },
    {
      "epoch": 0.0001251220703125,
      "model_forward_time": 0.11527872085571289,
      "step": 20500
    },
    {
      "epoch": 0.0001251220703125,
      "step": 20500,
      "training_step_time": 0.45131611824035645
    },
    {
      "epoch": 0.000125128173828125,
      "model_forward_time": 0.11540436744689941,
      "step": 20501
    },
    {
      "epoch": 0.000125128173828125,
      "step": 20501,
      "training_step_time": 0.4377298355102539
    },
    {
      "epoch": 0.00012513427734375,
      "model_forward_time": 0.11550498008728027,
      "step": 20502
    },
    {
      "epoch": 0.00012513427734375,
      "step": 20502,
      "training_step_time": 0.41205763816833496
    },
    {
      "epoch": 0.000125140380859375,
      "model_forward_time": 0.1154026985168457,
      "step": 20503
    },
    {
      "epoch": 0.000125140380859375,
      "step": 20503,
      "training_step_time": 0.39281272888183594
    },
    {
      "epoch": 0.000125146484375,
      "model_forward_time": 0.11594033241271973,
      "step": 20504
    },
    {
      "epoch": 0.000125146484375,
      "step": 20504,
      "training_step_time": 0.3916664123535156
    },
    {
      "epoch": 0.000125152587890625,
      "model_forward_time": 0.11579251289367676,
      "step": 20505
    },
    {
      "epoch": 0.000125152587890625,
      "step": 20505,
      "training_step_time": 0.43909192085266113
    },
    {
      "epoch": 0.00012515869140625,
      "model_forward_time": 0.11537933349609375,
      "step": 20506
    },
    {
      "epoch": 0.00012515869140625,
      "step": 20506,
      "training_step_time": 0.3877878189086914
    },
    {
      "epoch": 0.000125164794921875,
      "model_forward_time": 0.11545133590698242,
      "step": 20507
    },
    {
      "epoch": 0.000125164794921875,
      "step": 20507,
      "training_step_time": 0.3959627151489258
    },
    {
      "epoch": 0.0001251708984375,
      "model_forward_time": 0.11487030982971191,
      "step": 20508
    },
    {
      "epoch": 0.0001251708984375,
      "step": 20508,
      "training_step_time": 0.4058713912963867
    },
    {
      "epoch": 0.000125177001953125,
      "model_forward_time": 0.11458373069763184,
      "step": 20509
    },
    {
      "epoch": 0.000125177001953125,
      "step": 20509,
      "training_step_time": 0.4332275390625
    },
    {
      "epoch": 0.00012518310546875,
      "grad_norm": 0.13872727751731873,
      "learning_rate": 7.846775213577192e-05,
      "loss": 0.0528,
      "step": 20510
    },
    {
      "epoch": 0.00012518310546875,
      "model_forward_time": 0.11419677734375,
      "step": 20510
    },
    {
      "epoch": 0.00012518310546875,
      "step": 20510,
      "training_step_time": 0.45081067085266113
    },
    {
      "epoch": 0.000125189208984375,
      "model_forward_time": 0.11458659172058105,
      "step": 20511
    },
    {
      "epoch": 0.000125189208984375,
      "step": 20511,
      "training_step_time": 0.3756134510040283
    },
    {
      "epoch": 0.0001251953125,
      "model_forward_time": 0.11474013328552246,
      "step": 20512
    },
    {
      "epoch": 0.0001251953125,
      "step": 20512,
      "training_step_time": 0.3944816589355469
    },
    {
      "epoch": 0.000125201416015625,
      "model_forward_time": 0.11487984657287598,
      "step": 20513
    },
    {
      "epoch": 0.000125201416015625,
      "step": 20513,
      "training_step_time": 0.3933703899383545
    },
    {
      "epoch": 0.00012520751953125,
      "model_forward_time": 0.1150064468383789,
      "step": 20514
    },
    {
      "epoch": 0.00012520751953125,
      "step": 20514,
      "training_step_time": 0.4861025810241699
    },
    {
      "epoch": 0.000125213623046875,
      "model_forward_time": 0.1147310733795166,
      "step": 20515
    },
    {
      "epoch": 0.000125213623046875,
      "step": 20515,
      "training_step_time": 0.46462225914001465
    },
    {
      "epoch": 0.0001252197265625,
      "model_forward_time": 0.11532044410705566,
      "step": 20516
    },
    {
      "epoch": 0.0001252197265625,
      "step": 20516,
      "training_step_time": 0.38799262046813965
    },
    {
      "epoch": 0.000125225830078125,
      "model_forward_time": 0.11556839942932129,
      "step": 20517
    },
    {
      "epoch": 0.000125225830078125,
      "step": 20517,
      "training_step_time": 0.3813185691833496
    },
    {
      "epoch": 0.00012523193359375,
      "model_forward_time": 0.11567878723144531,
      "step": 20518
    },
    {
      "epoch": 0.00012523193359375,
      "step": 20518,
      "training_step_time": 0.3860771656036377
    },
    {
      "epoch": 0.000125238037109375,
      "model_forward_time": 0.11474418640136719,
      "step": 20519
    },
    {
      "epoch": 0.000125238037109375,
      "step": 20519,
      "training_step_time": 0.38518643379211426
    },
    {
      "epoch": 0.000125244140625,
      "grad_norm": 0.1840657740831375,
      "learning_rate": 7.844509274827907e-05,
      "loss": 0.0489,
      "step": 20520
    },
    {
      "epoch": 0.000125244140625,
      "model_forward_time": 0.11468362808227539,
      "step": 20520
    },
    {
      "epoch": 0.000125244140625,
      "step": 20520,
      "training_step_time": 0.39876770973205566
    },
    {
      "epoch": 0.000125250244140625,
      "model_forward_time": 0.1159217357635498,
      "step": 20521
    },
    {
      "epoch": 0.000125250244140625,
      "step": 20521,
      "training_step_time": 0.49403929710388184
    },
    {
      "epoch": 0.00012525634765625,
      "model_forward_time": 0.1151890754699707,
      "step": 20522
    },
    {
      "epoch": 0.00012525634765625,
      "step": 20522,
      "training_step_time": 0.48169445991516113
    },
    {
      "epoch": 0.000125262451171875,
      "model_forward_time": 0.11473751068115234,
      "step": 20523
    },
    {
      "epoch": 0.000125262451171875,
      "step": 20523,
      "training_step_time": 0.38541507720947266
    },
    {
      "epoch": 0.0001252685546875,
      "model_forward_time": 0.11495041847229004,
      "step": 20524
    },
    {
      "epoch": 0.0001252685546875,
      "step": 20524,
      "training_step_time": 0.3866119384765625
    },
    {
      "epoch": 0.000125274658203125,
      "model_forward_time": 0.11483120918273926,
      "step": 20525
    },
    {
      "epoch": 0.000125274658203125,
      "step": 20525,
      "training_step_time": 0.38608479499816895
    },
    {
      "epoch": 0.00012528076171875,
      "model_forward_time": 0.11507129669189453,
      "step": 20526
    },
    {
      "epoch": 0.00012528076171875,
      "step": 20526,
      "training_step_time": 0.4008479118347168
    },
    {
      "epoch": 0.000125286865234375,
      "model_forward_time": 0.11498570442199707,
      "step": 20527
    },
    {
      "epoch": 0.000125286865234375,
      "step": 20527,
      "training_step_time": 0.39066410064697266
    },
    {
      "epoch": 0.00012529296875,
      "model_forward_time": 0.11516284942626953,
      "step": 20528
    },
    {
      "epoch": 0.00012529296875,
      "step": 20528,
      "training_step_time": 0.4313526153564453
    },
    {
      "epoch": 0.000125299072265625,
      "model_forward_time": 0.1155693531036377,
      "step": 20529
    },
    {
      "epoch": 0.000125299072265625,
      "step": 20529,
      "training_step_time": 0.41948628425598145
    },
    {
      "epoch": 0.00012530517578125,
      "grad_norm": 0.13179396092891693,
      "learning_rate": 7.842242471991809e-05,
      "loss": 0.0471,
      "step": 20530
    },
    {
      "epoch": 0.00012530517578125,
      "model_forward_time": 0.11527752876281738,
      "step": 20530
    },
    {
      "epoch": 0.00012530517578125,
      "step": 20530,
      "training_step_time": 0.44727420806884766
    },
    {
      "epoch": 0.000125311279296875,
      "model_forward_time": 0.11533164978027344,
      "step": 20531
    },
    {
      "epoch": 0.000125311279296875,
      "step": 20531,
      "training_step_time": 0.3891885280609131
    },
    {
      "epoch": 0.0001253173828125,
      "model_forward_time": 0.11545920372009277,
      "step": 20532
    },
    {
      "epoch": 0.0001253173828125,
      "step": 20532,
      "training_step_time": 0.39119839668273926
    },
    {
      "epoch": 0.000125323486328125,
      "model_forward_time": 0.11455035209655762,
      "step": 20533
    },
    {
      "epoch": 0.000125323486328125,
      "step": 20533,
      "training_step_time": 0.399219274520874
    },
    {
      "epoch": 0.00012532958984375,
      "model_forward_time": 0.11488509178161621,
      "step": 20534
    },
    {
      "epoch": 0.00012532958984375,
      "step": 20534,
      "training_step_time": 0.36441588401794434
    },
    {
      "epoch": 0.000125335693359375,
      "model_forward_time": 0.11519598960876465,
      "step": 20535
    },
    {
      "epoch": 0.000125335693359375,
      "step": 20535,
      "training_step_time": 0.43117499351501465
    },
    {
      "epoch": 0.000125341796875,
      "model_forward_time": 0.1147458553314209,
      "step": 20536
    },
    {
      "epoch": 0.000125341796875,
      "step": 20536,
      "training_step_time": 0.499788761138916
    },
    {
      "epoch": 0.000125347900390625,
      "model_forward_time": 0.11474204063415527,
      "step": 20537
    },
    {
      "epoch": 0.000125347900390625,
      "step": 20537,
      "training_step_time": 0.3914368152618408
    },
    {
      "epoch": 0.00012535400390625,
      "model_forward_time": 0.11529159545898438,
      "step": 20538
    },
    {
      "epoch": 0.00012535400390625,
      "step": 20538,
      "training_step_time": 0.4035160541534424
    },
    {
      "epoch": 0.000125360107421875,
      "model_forward_time": 0.11456131935119629,
      "step": 20539
    },
    {
      "epoch": 0.000125360107421875,
      "step": 20539,
      "training_step_time": 0.5158569812774658
    },
    {
      "epoch": 0.0001253662109375,
      "grad_norm": 0.12202507257461548,
      "learning_rate": 7.839974805757496e-05,
      "loss": 0.051,
      "step": 20540
    },
    {
      "epoch": 0.0001253662109375,
      "model_forward_time": 0.11476540565490723,
      "step": 20540
    },
    {
      "epoch": 0.0001253662109375,
      "step": 20540,
      "training_step_time": 0.39000821113586426
    },
    {
      "epoch": 0.000125372314453125,
      "model_forward_time": 0.11546015739440918,
      "step": 20541
    },
    {
      "epoch": 0.000125372314453125,
      "step": 20541,
      "training_step_time": 0.3819866180419922
    },
    {
      "epoch": 0.00012537841796875,
      "model_forward_time": 0.11475896835327148,
      "step": 20542
    },
    {
      "epoch": 0.00012537841796875,
      "step": 20542,
      "training_step_time": 0.39080238342285156
    },
    {
      "epoch": 0.000125384521484375,
      "model_forward_time": 0.11539721488952637,
      "step": 20543
    },
    {
      "epoch": 0.000125384521484375,
      "step": 20543,
      "training_step_time": 0.4592266082763672
    },
    {
      "epoch": 0.000125390625,
      "model_forward_time": 0.11430072784423828,
      "step": 20544
    },
    {
      "epoch": 0.000125390625,
      "step": 20544,
      "training_step_time": 0.44144201278686523
    },
    {
      "epoch": 0.000125396728515625,
      "model_forward_time": 0.11484026908874512,
      "step": 20545
    },
    {
      "epoch": 0.000125396728515625,
      "step": 20545,
      "training_step_time": 0.3947257995605469
    },
    {
      "epoch": 0.00012540283203125,
      "model_forward_time": 0.11443662643432617,
      "step": 20546
    },
    {
      "epoch": 0.00012540283203125,
      "step": 20546,
      "training_step_time": 0.46357107162475586
    },
    {
      "epoch": 0.000125408935546875,
      "model_forward_time": 0.11465764045715332,
      "step": 20547
    },
    {
      "epoch": 0.000125408935546875,
      "step": 20547,
      "training_step_time": 0.38094472885131836
    },
    {
      "epoch": 0.0001254150390625,
      "model_forward_time": 0.11509251594543457,
      "step": 20548
    },
    {
      "epoch": 0.0001254150390625,
      "step": 20548,
      "training_step_time": 0.3982973098754883
    },
    {
      "epoch": 0.000125421142578125,
      "model_forward_time": 0.11454343795776367,
      "step": 20549
    },
    {
      "epoch": 0.000125421142578125,
      "step": 20549,
      "training_step_time": 0.3917250633239746
    },
    {
      "epoch": 0.00012542724609375,
      "grad_norm": 0.11601780354976654,
      "learning_rate": 7.837706276813819e-05,
      "loss": 0.0455,
      "step": 20550
    },
    {
      "epoch": 0.00012542724609375,
      "model_forward_time": 0.11475372314453125,
      "step": 20550
    },
    {
      "epoch": 0.00012542724609375,
      "step": 20550,
      "training_step_time": 0.4286808967590332
    },
    {
      "epoch": 0.000125433349609375,
      "model_forward_time": 0.11444473266601562,
      "step": 20551
    },
    {
      "epoch": 0.000125433349609375,
      "step": 20551,
      "training_step_time": 0.44667887687683105
    },
    {
      "epoch": 0.000125439453125,
      "model_forward_time": 0.11502408981323242,
      "step": 20552
    },
    {
      "epoch": 0.000125439453125,
      "step": 20552,
      "training_step_time": 0.5959875583648682
    },
    {
      "epoch": 0.000125445556640625,
      "model_forward_time": 0.11412692070007324,
      "step": 20553
    },
    {
      "epoch": 0.000125445556640625,
      "step": 20553,
      "training_step_time": 0.46330738067626953
    },
    {
      "epoch": 0.00012545166015625,
      "model_forward_time": 0.11424493789672852,
      "step": 20554
    },
    {
      "epoch": 0.00012545166015625,
      "step": 20554,
      "training_step_time": 0.3807051181793213
    },
    {
      "epoch": 0.000125457763671875,
      "model_forward_time": 0.1149137020111084,
      "step": 20555
    },
    {
      "epoch": 0.000125457763671875,
      "step": 20555,
      "training_step_time": 0.3929567337036133
    },
    {
      "epoch": 0.0001254638671875,
      "model_forward_time": 0.11503458023071289,
      "step": 20556
    },
    {
      "epoch": 0.0001254638671875,
      "step": 20556,
      "training_step_time": 0.44464898109436035
    },
    {
      "epoch": 0.000125469970703125,
      "model_forward_time": 0.11464548110961914,
      "step": 20557
    },
    {
      "epoch": 0.000125469970703125,
      "step": 20557,
      "training_step_time": 0.4161808490753174
    },
    {
      "epoch": 0.00012547607421875,
      "model_forward_time": 0.11441206932067871,
      "step": 20558
    },
    {
      "epoch": 0.00012547607421875,
      "step": 20558,
      "training_step_time": 0.4180283546447754
    },
    {
      "epoch": 0.000125482177734375,
      "model_forward_time": 0.11473298072814941,
      "step": 20559
    },
    {
      "epoch": 0.000125482177734375,
      "step": 20559,
      "training_step_time": 0.39042043685913086
    },
    {
      "epoch": 0.00012548828125,
      "grad_norm": 0.13654305040836334,
      "learning_rate": 7.835436885849902e-05,
      "loss": 0.049,
      "step": 20560
    },
    {
      "epoch": 0.00012548828125,
      "model_forward_time": 0.11521220207214355,
      "step": 20560
    },
    {
      "epoch": 0.00012548828125,
      "step": 20560,
      "training_step_time": 0.395404577255249
    },
    {
      "epoch": 0.000125494384765625,
      "model_forward_time": 0.1147773265838623,
      "step": 20561
    },
    {
      "epoch": 0.000125494384765625,
      "step": 20561,
      "training_step_time": 0.3933753967285156
    },
    {
      "epoch": 0.00012550048828125,
      "model_forward_time": 0.11534476280212402,
      "step": 20562
    },
    {
      "epoch": 0.00012550048828125,
      "step": 20562,
      "training_step_time": 0.3913745880126953
    },
    {
      "epoch": 0.000125506591796875,
      "model_forward_time": 0.11477017402648926,
      "step": 20563
    },
    {
      "epoch": 0.000125506591796875,
      "step": 20563,
      "training_step_time": 0.3648490905761719
    },
    {
      "epoch": 0.0001255126953125,
      "model_forward_time": 0.11581087112426758,
      "step": 20564
    },
    {
      "epoch": 0.0001255126953125,
      "step": 20564,
      "training_step_time": 0.6365993022918701
    },
    {
      "epoch": 0.000125518798828125,
      "model_forward_time": 0.11417245864868164,
      "step": 20565
    },
    {
      "epoch": 0.000125518798828125,
      "step": 20565,
      "training_step_time": 0.5082206726074219
    },
    {
      "epoch": 0.00012552490234375,
      "model_forward_time": 0.11458373069763184,
      "step": 20566
    },
    {
      "epoch": 0.00012552490234375,
      "step": 20566,
      "training_step_time": 0.39761853218078613
    },
    {
      "epoch": 0.000125531005859375,
      "model_forward_time": 0.11444735527038574,
      "step": 20567
    },
    {
      "epoch": 0.000125531005859375,
      "step": 20567,
      "training_step_time": 0.47914934158325195
    },
    {
      "epoch": 0.000125537109375,
      "model_forward_time": 0.11409211158752441,
      "step": 20568
    },
    {
      "epoch": 0.000125537109375,
      "step": 20568,
      "training_step_time": 0.3884117603302002
    },
    {
      "epoch": 0.000125543212890625,
      "model_forward_time": 0.11441993713378906,
      "step": 20569
    },
    {
      "epoch": 0.000125543212890625,
      "step": 20569,
      "training_step_time": 0.4029414653778076
    },
    {
      "epoch": 0.00012554931640625,
      "grad_norm": 0.16780585050582886,
      "learning_rate": 7.833166633555124e-05,
      "loss": 0.0473,
      "step": 20570
    },
    {
      "epoch": 0.00012554931640625,
      "model_forward_time": 0.11463356018066406,
      "step": 20570
    },
    {
      "epoch": 0.00012554931640625,
      "step": 20570,
      "training_step_time": 0.4254601001739502
    },
    {
      "epoch": 0.000125555419921875,
      "model_forward_time": 0.115264892578125,
      "step": 20571
    },
    {
      "epoch": 0.000125555419921875,
      "step": 20571,
      "training_step_time": 0.43589091300964355
    },
    {
      "epoch": 0.0001255615234375,
      "model_forward_time": 0.11544156074523926,
      "step": 20572
    },
    {
      "epoch": 0.0001255615234375,
      "step": 20572,
      "training_step_time": 0.40810155868530273
    },
    {
      "epoch": 0.000125567626953125,
      "model_forward_time": 0.11483144760131836,
      "step": 20573
    },
    {
      "epoch": 0.000125567626953125,
      "step": 20573,
      "training_step_time": 0.41069865226745605
    },
    {
      "epoch": 0.00012557373046875,
      "model_forward_time": 0.11513829231262207,
      "step": 20574
    },
    {
      "epoch": 0.00012557373046875,
      "step": 20574,
      "training_step_time": 0.39833617210388184
    },
    {
      "epoch": 0.000125579833984375,
      "model_forward_time": 0.11522102355957031,
      "step": 20575
    },
    {
      "epoch": 0.000125579833984375,
      "step": 20575,
      "training_step_time": 0.3994903564453125
    },
    {
      "epoch": 0.0001255859375,
      "model_forward_time": 0.11504197120666504,
      "step": 20576
    },
    {
      "epoch": 0.0001255859375,
      "step": 20576,
      "training_step_time": 0.6283817291259766
    },
    {
      "epoch": 0.000125592041015625,
      "model_forward_time": 0.11488842964172363,
      "step": 20577
    },
    {
      "epoch": 0.000125592041015625,
      "step": 20577,
      "training_step_time": 0.366718053817749
    },
    {
      "epoch": 0.00012559814453125,
      "model_forward_time": 0.11509823799133301,
      "step": 20578
    },
    {
      "epoch": 0.00012559814453125,
      "step": 20578,
      "training_step_time": 0.44875645637512207
    },
    {
      "epoch": 0.000125604248046875,
      "model_forward_time": 0.11538982391357422,
      "step": 20579
    },
    {
      "epoch": 0.000125604248046875,
      "step": 20579,
      "training_step_time": 0.4725525379180908
    },
    {
      "epoch": 0.0001256103515625,
      "grad_norm": 0.16556674242019653,
      "learning_rate": 7.830895520619128e-05,
      "loss": 0.052,
      "step": 20580
    },
    {
      "epoch": 0.0001256103515625,
      "model_forward_time": 0.11446976661682129,
      "step": 20580
    },
    {
      "epoch": 0.0001256103515625,
      "step": 20580,
      "training_step_time": 0.39174342155456543
    },
    {
      "epoch": 0.000125616455078125,
      "model_forward_time": 0.11442852020263672,
      "step": 20581
    },
    {
      "epoch": 0.000125616455078125,
      "step": 20581,
      "training_step_time": 0.49304795265197754
    },
    {
      "epoch": 0.00012562255859375,
      "model_forward_time": 0.11539888381958008,
      "step": 20582
    },
    {
      "epoch": 0.00012562255859375,
      "step": 20582,
      "training_step_time": 0.4533722400665283
    },
    {
      "epoch": 0.000125628662109375,
      "model_forward_time": 0.11535334587097168,
      "step": 20583
    },
    {
      "epoch": 0.000125628662109375,
      "step": 20583,
      "training_step_time": 0.4212982654571533
    },
    {
      "epoch": 0.000125634765625,
      "model_forward_time": 0.11448812484741211,
      "step": 20584
    },
    {
      "epoch": 0.000125634765625,
      "step": 20584,
      "training_step_time": 0.38777661323547363
    },
    {
      "epoch": 0.000125640869140625,
      "model_forward_time": 0.1158287525177002,
      "step": 20585
    },
    {
      "epoch": 0.000125640869140625,
      "step": 20585,
      "training_step_time": 0.42937397956848145
    },
    {
      "epoch": 0.00012564697265625,
      "model_forward_time": 0.11470866203308105,
      "step": 20586
    },
    {
      "epoch": 0.00012564697265625,
      "step": 20586,
      "training_step_time": 0.43721652030944824
    },
    {
      "epoch": 0.000125653076171875,
      "model_forward_time": 0.1324763298034668,
      "step": 20587
    },
    {
      "epoch": 0.000125653076171875,
      "step": 20587,
      "training_step_time": 0.3905928134918213
    },
    {
      "epoch": 0.0001256591796875,
      "model_forward_time": 0.11518383026123047,
      "step": 20588
    },
    {
      "epoch": 0.0001256591796875,
      "step": 20588,
      "training_step_time": 0.5849463939666748
    },
    {
      "epoch": 0.000125665283203125,
      "model_forward_time": 0.11487865447998047,
      "step": 20589
    },
    {
      "epoch": 0.000125665283203125,
      "step": 20589,
      "training_step_time": 0.38909006118774414
    },
    {
      "epoch": 0.00012567138671875,
      "grad_norm": 0.19044658541679382,
      "learning_rate": 7.828623547731818e-05,
      "loss": 0.0448,
      "step": 20590
    },
    {
      "epoch": 0.00012567138671875,
      "model_forward_time": 0.11537837982177734,
      "step": 20590
    },
    {
      "epoch": 0.00012567138671875,
      "step": 20590,
      "training_step_time": 0.38855934143066406
    },
    {
      "epoch": 0.000125677490234375,
      "model_forward_time": 0.11472463607788086,
      "step": 20591
    },
    {
      "epoch": 0.000125677490234375,
      "step": 20591,
      "training_step_time": 0.36490774154663086
    },
    {
      "epoch": 0.00012568359375,
      "model_forward_time": 0.11456155776977539,
      "step": 20592
    },
    {
      "epoch": 0.00012568359375,
      "step": 20592,
      "training_step_time": 0.4490511417388916
    },
    {
      "epoch": 0.000125689697265625,
      "model_forward_time": 0.11477231979370117,
      "step": 20593
    },
    {
      "epoch": 0.000125689697265625,
      "step": 20593,
      "training_step_time": 0.41715455055236816
    },
    {
      "epoch": 0.00012569580078125,
      "model_forward_time": 0.11429858207702637,
      "step": 20594
    },
    {
      "epoch": 0.00012569580078125,
      "step": 20594,
      "training_step_time": 0.5239753723144531
    },
    {
      "epoch": 0.000125701904296875,
      "model_forward_time": 0.11458539962768555,
      "step": 20595
    },
    {
      "epoch": 0.000125701904296875,
      "step": 20595,
      "training_step_time": 0.4397890567779541
    },
    {
      "epoch": 0.0001257080078125,
      "model_forward_time": 0.11499381065368652,
      "step": 20596
    },
    {
      "epoch": 0.0001257080078125,
      "step": 20596,
      "training_step_time": 0.38369131088256836
    },
    {
      "epoch": 0.000125714111328125,
      "model_forward_time": 0.11471867561340332,
      "step": 20597
    },
    {
      "epoch": 0.000125714111328125,
      "step": 20597,
      "training_step_time": 0.3927628993988037
    },
    {
      "epoch": 0.00012572021484375,
      "model_forward_time": 0.11496329307556152,
      "step": 20598
    },
    {
      "epoch": 0.00012572021484375,
      "step": 20598,
      "training_step_time": 0.39246654510498047
    },
    {
      "epoch": 0.000125726318359375,
      "model_forward_time": 0.11512088775634766,
      "step": 20599
    },
    {
      "epoch": 0.000125726318359375,
      "step": 20599,
      "training_step_time": 0.39861345291137695
    },
    {
      "epoch": 0.000125732421875,
      "grad_norm": 0.20551389455795288,
      "learning_rate": 7.82635071558336e-05,
      "loss": 0.0528,
      "step": 20600
    },
    {
      "epoch": 0.000125732421875,
      "model_forward_time": 0.11489176750183105,
      "step": 20600
    },
    {
      "epoch": 0.000125732421875,
      "step": 20600,
      "training_step_time": 0.627934455871582
    },
    {
      "epoch": 0.000125738525390625,
      "model_forward_time": 0.11515450477600098,
      "step": 20601
    },
    {
      "epoch": 0.000125738525390625,
      "step": 20601,
      "training_step_time": 0.4047200679779053
    },
    {
      "epoch": 0.00012574462890625,
      "model_forward_time": 0.11455011367797852,
      "step": 20602
    },
    {
      "epoch": 0.00012574462890625,
      "step": 20602,
      "training_step_time": 0.3869657516479492
    },
    {
      "epoch": 0.000125750732421875,
      "model_forward_time": 0.11477160453796387,
      "step": 20603
    },
    {
      "epoch": 0.000125750732421875,
      "step": 20603,
      "training_step_time": 0.39566516876220703
    },
    {
      "epoch": 0.0001257568359375,
      "model_forward_time": 0.11485171318054199,
      "step": 20604
    },
    {
      "epoch": 0.0001257568359375,
      "step": 20604,
      "training_step_time": 0.3968958854675293
    },
    {
      "epoch": 0.000125762939453125,
      "model_forward_time": 0.11402583122253418,
      "step": 20605
    },
    {
      "epoch": 0.000125762939453125,
      "step": 20605,
      "training_step_time": 0.36377382278442383
    },
    {
      "epoch": 0.00012576904296875,
      "model_forward_time": 0.11605644226074219,
      "step": 20606
    },
    {
      "epoch": 0.00012576904296875,
      "step": 20606,
      "training_step_time": 0.5411224365234375
    },
    {
      "epoch": 0.000125775146484375,
      "model_forward_time": 0.11605358123779297,
      "step": 20607
    },
    {
      "epoch": 0.000125775146484375,
      "step": 20607,
      "training_step_time": 0.4438612461090088
    },
    {
      "epoch": 0.00012578125,
      "model_forward_time": 0.11413455009460449,
      "step": 20608
    },
    {
      "epoch": 0.00012578125,
      "step": 20608,
      "training_step_time": 0.41300106048583984
    },
    {
      "epoch": 0.000125787353515625,
      "model_forward_time": 0.11391663551330566,
      "step": 20609
    },
    {
      "epoch": 0.000125787353515625,
      "step": 20609,
      "training_step_time": 0.4177718162536621
    },
    {
      "epoch": 0.00012579345703125,
      "grad_norm": 0.16808287799358368,
      "learning_rate": 7.824077024864179e-05,
      "loss": 0.0513,
      "step": 20610
    },
    {
      "epoch": 0.00012579345703125,
      "model_forward_time": 0.11442732810974121,
      "step": 20610
    },
    {
      "epoch": 0.00012579345703125,
      "step": 20610,
      "training_step_time": 0.39327335357666016
    },
    {
      "epoch": 0.000125799560546875,
      "model_forward_time": 0.11384081840515137,
      "step": 20611
    },
    {
      "epoch": 0.000125799560546875,
      "step": 20611,
      "training_step_time": 0.38930249214172363
    },
    {
      "epoch": 0.0001258056640625,
      "model_forward_time": 0.11453628540039062,
      "step": 20612
    },
    {
      "epoch": 0.0001258056640625,
      "step": 20612,
      "training_step_time": 0.5691215991973877
    },
    {
      "epoch": 0.000125811767578125,
      "model_forward_time": 0.11545395851135254,
      "step": 20613
    },
    {
      "epoch": 0.000125811767578125,
      "step": 20613,
      "training_step_time": 0.417417049407959
    },
    {
      "epoch": 0.00012581787109375,
      "model_forward_time": 0.11873459815979004,
      "step": 20614
    },
    {
      "epoch": 0.00012581787109375,
      "step": 20614,
      "training_step_time": 0.5639746189117432
    },
    {
      "epoch": 0.000125823974609375,
      "model_forward_time": 0.11641788482666016,
      "step": 20615
    },
    {
      "epoch": 0.000125823974609375,
      "step": 20615,
      "training_step_time": 0.5680513381958008
    },
    {
      "epoch": 0.000125830078125,
      "model_forward_time": 0.12013745307922363,
      "step": 20616
    },
    {
      "epoch": 0.000125830078125,
      "step": 20616,
      "training_step_time": 0.6417925357818604
    },
    {
      "epoch": 0.000125836181640625,
      "model_forward_time": 0.11608147621154785,
      "step": 20617
    },
    {
      "epoch": 0.000125836181640625,
      "step": 20617,
      "training_step_time": 0.7487621307373047
    },
    {
      "epoch": 0.00012584228515625,
      "model_forward_time": 0.11819314956665039,
      "step": 20618
    },
    {
      "epoch": 0.00012584228515625,
      "step": 20618,
      "training_step_time": 0.7576947212219238
    },
    {
      "epoch": 0.000125848388671875,
      "model_forward_time": 0.1367664337158203,
      "step": 20619
    },
    {
      "epoch": 0.000125848388671875,
      "step": 20619,
      "training_step_time": 0.7450301647186279
    },
    {
      "epoch": 0.0001258544921875,
      "grad_norm": 0.1264304667711258,
      "learning_rate": 7.821802476264966e-05,
      "loss": 0.0461,
      "step": 20620
    },
    {
      "epoch": 0.0001258544921875,
      "model_forward_time": 0.11754846572875977,
      "step": 20620
    },
    {
      "epoch": 0.0001258544921875,
      "step": 20620,
      "training_step_time": 0.7398450374603271
    },
    {
      "epoch": 0.000125860595703125,
      "model_forward_time": 0.11873626708984375,
      "step": 20621
    },
    {
      "epoch": 0.000125860595703125,
      "step": 20621,
      "training_step_time": 0.7250833511352539
    },
    {
      "epoch": 0.00012586669921875,
      "model_forward_time": 0.11963891983032227,
      "step": 20622
    },
    {
      "epoch": 0.00012586669921875,
      "step": 20622,
      "training_step_time": 0.7663478851318359
    },
    {
      "epoch": 0.000125872802734375,
      "model_forward_time": 0.11862707138061523,
      "step": 20623
    },
    {
      "epoch": 0.000125872802734375,
      "step": 20623,
      "training_step_time": 0.7665457725524902
    },
    {
      "epoch": 0.00012587890625,
      "model_forward_time": 0.11802029609680176,
      "step": 20624
    },
    {
      "epoch": 0.00012587890625,
      "step": 20624,
      "training_step_time": 0.6744077205657959
    },
    {
      "epoch": 0.000125885009765625,
      "model_forward_time": 0.12161111831665039,
      "step": 20625
    },
    {
      "epoch": 0.000125885009765625,
      "step": 20625,
      "training_step_time": 0.7005043029785156
    },
    {
      "epoch": 0.00012589111328125,
      "model_forward_time": 0.13188958168029785,
      "step": 20626
    },
    {
      "epoch": 0.00012589111328125,
      "step": 20626,
      "training_step_time": 0.7336091995239258
    },
    {
      "epoch": 0.000125897216796875,
      "model_forward_time": 0.11906242370605469,
      "step": 20627
    },
    {
      "epoch": 0.000125897216796875,
      "step": 20627,
      "training_step_time": 0.6499977111816406
    },
    {
      "epoch": 0.0001259033203125,
      "model_forward_time": 0.11875414848327637,
      "step": 20628
    },
    {
      "epoch": 0.0001259033203125,
      "step": 20628,
      "training_step_time": 0.685255765914917
    },
    {
      "epoch": 0.000125909423828125,
      "model_forward_time": 0.11804676055908203,
      "step": 20629
    },
    {
      "epoch": 0.000125909423828125,
      "step": 20629,
      "training_step_time": 0.7019665241241455
    },
    {
      "epoch": 0.00012591552734375,
      "grad_norm": 0.14919210970401764,
      "learning_rate": 7.819527070476665e-05,
      "loss": 0.0489,
      "step": 20630
    },
    {
      "epoch": 0.00012591552734375,
      "model_forward_time": 0.11722898483276367,
      "step": 20630
    },
    {
      "epoch": 0.00012591552734375,
      "step": 20630,
      "training_step_time": 0.649747371673584
    },
    {
      "epoch": 0.000125921630859375,
      "model_forward_time": 0.11770033836364746,
      "step": 20631
    },
    {
      "epoch": 0.000125921630859375,
      "step": 20631,
      "training_step_time": 0.7238528728485107
    },
    {
      "epoch": 0.000125927734375,
      "model_forward_time": 0.11897706985473633,
      "step": 20632
    },
    {
      "epoch": 0.000125927734375,
      "step": 20632,
      "training_step_time": 0.6137056350708008
    },
    {
      "epoch": 0.000125933837890625,
      "model_forward_time": 0.11712336540222168,
      "step": 20633
    },
    {
      "epoch": 0.000125933837890625,
      "step": 20633,
      "training_step_time": 0.6663720607757568
    },
    {
      "epoch": 0.00012593994140625,
      "model_forward_time": 0.12423563003540039,
      "step": 20634
    },
    {
      "epoch": 0.00012593994140625,
      "step": 20634,
      "training_step_time": 0.6843552589416504
    },
    {
      "epoch": 0.000125946044921875,
      "model_forward_time": 0.12082123756408691,
      "step": 20635
    },
    {
      "epoch": 0.000125946044921875,
      "step": 20635,
      "training_step_time": 0.7369201183319092
    },
    {
      "epoch": 0.0001259521484375,
      "model_forward_time": 0.12599706649780273,
      "step": 20636
    },
    {
      "epoch": 0.0001259521484375,
      "step": 20636,
      "training_step_time": 0.6205625534057617
    },
    {
      "epoch": 0.000125958251953125,
      "model_forward_time": 0.11700582504272461,
      "step": 20637
    },
    {
      "epoch": 0.000125958251953125,
      "step": 20637,
      "training_step_time": 0.6971514225006104
    },
    {
      "epoch": 0.00012596435546875,
      "model_forward_time": 0.11930203437805176,
      "step": 20638
    },
    {
      "epoch": 0.00012596435546875,
      "step": 20638,
      "training_step_time": 0.7883048057556152
    },
    {
      "epoch": 0.000125970458984375,
      "model_forward_time": 0.12185359001159668,
      "step": 20639
    },
    {
      "epoch": 0.000125970458984375,
      "step": 20639,
      "training_step_time": 0.5927994251251221
    },
    {
      "epoch": 0.0001259765625,
      "grad_norm": 0.13557317852973938,
      "learning_rate": 7.817250808190483e-05,
      "loss": 0.0543,
      "step": 20640
    },
    {
      "epoch": 0.0001259765625,
      "model_forward_time": 0.11750197410583496,
      "step": 20640
    },
    {
      "epoch": 0.0001259765625,
      "step": 20640,
      "training_step_time": 0.6713988780975342
    },
    {
      "epoch": 0.000125982666015625,
      "model_forward_time": 0.12368249893188477,
      "step": 20641
    },
    {
      "epoch": 0.000125982666015625,
      "step": 20641,
      "training_step_time": 0.6984179019927979
    },
    {
      "epoch": 0.00012598876953125,
      "model_forward_time": 0.11920905113220215,
      "step": 20642
    },
    {
      "epoch": 0.00012598876953125,
      "step": 20642,
      "training_step_time": 0.7334344387054443
    },
    {
      "epoch": 0.000125994873046875,
      "model_forward_time": 0.12857842445373535,
      "step": 20643
    },
    {
      "epoch": 0.000125994873046875,
      "step": 20643,
      "training_step_time": 0.638988733291626
    },
    {
      "epoch": 0.0001260009765625,
      "model_forward_time": 0.1285560131072998,
      "step": 20644
    },
    {
      "epoch": 0.0001260009765625,
      "step": 20644,
      "training_step_time": 0.6216144561767578
    },
    {
      "epoch": 0.000126007080078125,
      "model_forward_time": 0.11679410934448242,
      "step": 20645
    },
    {
      "epoch": 0.000126007080078125,
      "step": 20645,
      "training_step_time": 0.7134273052215576
    },
    {
      "epoch": 0.00012601318359375,
      "model_forward_time": 0.12252116203308105,
      "step": 20646
    },
    {
      "epoch": 0.00012601318359375,
      "step": 20646,
      "training_step_time": 0.6304423809051514
    },
    {
      "epoch": 0.000126019287109375,
      "model_forward_time": 0.11920690536499023,
      "step": 20647
    },
    {
      "epoch": 0.000126019287109375,
      "step": 20647,
      "training_step_time": 0.7175991535186768
    },
    {
      "epoch": 0.000126025390625,
      "model_forward_time": 0.12285041809082031,
      "step": 20648
    },
    {
      "epoch": 0.000126025390625,
      "step": 20648,
      "training_step_time": 0.6322214603424072
    },
    {
      "epoch": 0.000126031494140625,
      "model_forward_time": 0.12033534049987793,
      "step": 20649
    },
    {
      "epoch": 0.000126031494140625,
      "step": 20649,
      "training_step_time": 0.6260039806365967
    },
    {
      "epoch": 0.00012603759765625,
      "grad_norm": 0.1993919461965561,
      "learning_rate": 7.814973690097893e-05,
      "loss": 0.0568,
      "step": 20650
    },
    {
      "epoch": 0.00012603759765625,
      "model_forward_time": 0.11975240707397461,
      "step": 20650
    },
    {
      "epoch": 0.00012603759765625,
      "step": 20650,
      "training_step_time": 0.7595517635345459
    },
    {
      "epoch": 0.000126043701171875,
      "model_forward_time": 0.12212753295898438,
      "step": 20651
    },
    {
      "epoch": 0.000126043701171875,
      "step": 20651,
      "training_step_time": 0.6295595169067383
    },
    {
      "epoch": 0.0001260498046875,
      "model_forward_time": 0.1174924373626709,
      "step": 20652
    },
    {
      "epoch": 0.0001260498046875,
      "step": 20652,
      "training_step_time": 0.7088186740875244
    },
    {
      "epoch": 0.000126055908203125,
      "model_forward_time": 0.11796927452087402,
      "step": 20653
    },
    {
      "epoch": 0.000126055908203125,
      "step": 20653,
      "training_step_time": 0.6489524841308594
    },
    {
      "epoch": 0.00012606201171875,
      "model_forward_time": 0.117279052734375,
      "step": 20654
    },
    {
      "epoch": 0.00012606201171875,
      "step": 20654,
      "training_step_time": 0.7219638824462891
    },
    {
      "epoch": 0.000126068115234375,
      "model_forward_time": 0.11674618721008301,
      "step": 20655
    },
    {
      "epoch": 0.000126068115234375,
      "step": 20655,
      "training_step_time": 0.6850800514221191
    },
    {
      "epoch": 0.00012607421875,
      "model_forward_time": 0.11930036544799805,
      "step": 20656
    },
    {
      "epoch": 0.00012607421875,
      "step": 20656,
      "training_step_time": 0.7037549018859863
    },
    {
      "epoch": 0.000126080322265625,
      "model_forward_time": 0.11652112007141113,
      "step": 20657
    },
    {
      "epoch": 0.000126080322265625,
      "step": 20657,
      "training_step_time": 0.7097201347351074
    },
    {
      "epoch": 0.00012608642578125,
      "model_forward_time": 0.11995577812194824,
      "step": 20658
    },
    {
      "epoch": 0.00012608642578125,
      "step": 20658,
      "training_step_time": 0.6643028259277344
    },
    {
      "epoch": 0.000126092529296875,
      "model_forward_time": 0.11893892288208008,
      "step": 20659
    },
    {
      "epoch": 0.000126092529296875,
      "step": 20659,
      "training_step_time": 0.6409640312194824
    },
    {
      "epoch": 0.0001260986328125,
      "grad_norm": 0.15995541214942932,
      "learning_rate": 7.81269571689062e-05,
      "loss": 0.0587,
      "step": 20660
    },
    {
      "epoch": 0.0001260986328125,
      "model_forward_time": 0.12186002731323242,
      "step": 20660
    },
    {
      "epoch": 0.0001260986328125,
      "step": 20660,
      "training_step_time": 0.6524844169616699
    },
    {
      "epoch": 0.000126104736328125,
      "model_forward_time": 0.12043237686157227,
      "step": 20661
    },
    {
      "epoch": 0.000126104736328125,
      "step": 20661,
      "training_step_time": 0.663275957107544
    },
    {
      "epoch": 0.00012611083984375,
      "model_forward_time": 0.12072300910949707,
      "step": 20662
    },
    {
      "epoch": 0.00012611083984375,
      "step": 20662,
      "training_step_time": 0.6141517162322998
    },
    {
      "epoch": 0.000126116943359375,
      "model_forward_time": 0.12244629859924316,
      "step": 20663
    },
    {
      "epoch": 0.000126116943359375,
      "step": 20663,
      "training_step_time": 0.6255700588226318
    },
    {
      "epoch": 0.000126123046875,
      "model_forward_time": 0.11587238311767578,
      "step": 20664
    },
    {
      "epoch": 0.000126123046875,
      "step": 20664,
      "training_step_time": 0.6846663951873779
    },
    {
      "epoch": 0.000126129150390625,
      "model_forward_time": 0.12543225288391113,
      "step": 20665
    },
    {
      "epoch": 0.000126129150390625,
      "step": 20665,
      "training_step_time": 0.6037638187408447
    },
    {
      "epoch": 0.00012613525390625,
      "model_forward_time": 0.12339496612548828,
      "step": 20666
    },
    {
      "epoch": 0.00012613525390625,
      "step": 20666,
      "training_step_time": 0.6497364044189453
    },
    {
      "epoch": 0.000126141357421875,
      "model_forward_time": 0.11723661422729492,
      "step": 20667
    },
    {
      "epoch": 0.000126141357421875,
      "step": 20667,
      "training_step_time": 0.6415104866027832
    },
    {
      "epoch": 0.0001261474609375,
      "model_forward_time": 0.12103581428527832,
      "step": 20668
    },
    {
      "epoch": 0.0001261474609375,
      "step": 20668,
      "training_step_time": 0.6279938220977783
    },
    {
      "epoch": 0.000126153564453125,
      "model_forward_time": 0.11817216873168945,
      "step": 20669
    },
    {
      "epoch": 0.000126153564453125,
      "step": 20669,
      "training_step_time": 0.7113378047943115
    },
    {
      "epoch": 0.00012615966796875,
      "grad_norm": 0.14738932251930237,
      "learning_rate": 7.810416889260653e-05,
      "loss": 0.0548,
      "step": 20670
    },
    {
      "epoch": 0.00012615966796875,
      "model_forward_time": 0.12734580039978027,
      "step": 20670
    },
    {
      "epoch": 0.00012615966796875,
      "step": 20670,
      "training_step_time": 0.6839187145233154
    },
    {
      "epoch": 0.000126165771484375,
      "model_forward_time": 0.12253189086914062,
      "step": 20671
    },
    {
      "epoch": 0.000126165771484375,
      "step": 20671,
      "training_step_time": 0.7412846088409424
    },
    {
      "epoch": 0.000126171875,
      "model_forward_time": 0.11764407157897949,
      "step": 20672
    },
    {
      "epoch": 0.000126171875,
      "step": 20672,
      "training_step_time": 0.647514820098877
    },
    {
      "epoch": 0.000126177978515625,
      "model_forward_time": 0.12807106971740723,
      "step": 20673
    },
    {
      "epoch": 0.000126177978515625,
      "step": 20673,
      "training_step_time": 0.6317088603973389
    },
    {
      "epoch": 0.00012618408203125,
      "model_forward_time": 0.12786173820495605,
      "step": 20674
    },
    {
      "epoch": 0.00012618408203125,
      "step": 20674,
      "training_step_time": 0.6534929275512695
    },
    {
      "epoch": 0.000126190185546875,
      "model_forward_time": 0.1170189380645752,
      "step": 20675
    },
    {
      "epoch": 0.000126190185546875,
      "step": 20675,
      "training_step_time": 0.7016055583953857
    },
    {
      "epoch": 0.0001261962890625,
      "model_forward_time": 0.11938714981079102,
      "step": 20676
    },
    {
      "epoch": 0.0001261962890625,
      "step": 20676,
      "training_step_time": 0.6737196445465088
    },
    {
      "epoch": 0.000126202392578125,
      "model_forward_time": 0.12304902076721191,
      "step": 20677
    },
    {
      "epoch": 0.000126202392578125,
      "step": 20677,
      "training_step_time": 0.6207275390625
    },
    {
      "epoch": 0.00012620849609375,
      "model_forward_time": 0.1176753044128418,
      "step": 20678
    },
    {
      "epoch": 0.00012620849609375,
      "step": 20678,
      "training_step_time": 0.6358857154846191
    },
    {
      "epoch": 0.000126214599609375,
      "model_forward_time": 0.1233978271484375,
      "step": 20679
    },
    {
      "epoch": 0.000126214599609375,
      "step": 20679,
      "training_step_time": 0.6431407928466797
    },
    {
      "epoch": 0.000126220703125,
      "grad_norm": 0.09415281563997269,
      "learning_rate": 7.808137207900241e-05,
      "loss": 0.0536,
      "step": 20680
    },
    {
      "epoch": 0.000126220703125,
      "model_forward_time": 0.11968016624450684,
      "step": 20680
    },
    {
      "epoch": 0.000126220703125,
      "step": 20680,
      "training_step_time": 0.6585488319396973
    },
    {
      "epoch": 0.000126226806640625,
      "model_forward_time": 0.11961865425109863,
      "step": 20681
    },
    {
      "epoch": 0.000126226806640625,
      "step": 20681,
      "training_step_time": 0.7077920436859131
    },
    {
      "epoch": 0.00012623291015625,
      "model_forward_time": 0.12021160125732422,
      "step": 20682
    },
    {
      "epoch": 0.00012623291015625,
      "step": 20682,
      "training_step_time": 0.5795094966888428
    },
    {
      "epoch": 0.000126239013671875,
      "model_forward_time": 0.11884641647338867,
      "step": 20683
    },
    {
      "epoch": 0.000126239013671875,
      "step": 20683,
      "training_step_time": 0.5593783855438232
    },
    {
      "epoch": 0.0001262451171875,
      "model_forward_time": 0.12358832359313965,
      "step": 20684
    },
    {
      "epoch": 0.0001262451171875,
      "step": 20684,
      "training_step_time": 0.5267646312713623
    },
    {
      "epoch": 0.000126251220703125,
      "model_forward_time": 0.11881136894226074,
      "step": 20685
    },
    {
      "epoch": 0.000126251220703125,
      "step": 20685,
      "training_step_time": 0.4635121822357178
    },
    {
      "epoch": 0.00012625732421875,
      "model_forward_time": 0.11790156364440918,
      "step": 20686
    },
    {
      "epoch": 0.00012625732421875,
      "step": 20686,
      "training_step_time": 0.5637810230255127
    },
    {
      "epoch": 0.000126263427734375,
      "model_forward_time": 0.11680841445922852,
      "step": 20687
    },
    {
      "epoch": 0.000126263427734375,
      "step": 20687,
      "training_step_time": 0.5273189544677734
    },
    {
      "epoch": 0.00012626953125,
      "model_forward_time": 0.11600828170776367,
      "step": 20688
    },
    {
      "epoch": 0.00012626953125,
      "step": 20688,
      "training_step_time": 0.42811131477355957
    },
    {
      "epoch": 0.000126275634765625,
      "model_forward_time": 0.11521244049072266,
      "step": 20689
    },
    {
      "epoch": 0.000126275634765625,
      "step": 20689,
      "training_step_time": 0.4416215419769287
    },
    {
      "epoch": 0.00012628173828125,
      "grad_norm": 0.15752491354942322,
      "learning_rate": 7.80585667350189e-05,
      "loss": 0.0563,
      "step": 20690
    },
    {
      "epoch": 0.00012628173828125,
      "model_forward_time": 0.11827921867370605,
      "step": 20690
    },
    {
      "epoch": 0.00012628173828125,
      "step": 20690,
      "training_step_time": 0.42543792724609375
    },
    {
      "epoch": 0.000126287841796875,
      "model_forward_time": 0.11483073234558105,
      "step": 20691
    },
    {
      "epoch": 0.000126287841796875,
      "step": 20691,
      "training_step_time": 0.432539701461792
    },
    {
      "epoch": 0.0001262939453125,
      "model_forward_time": 0.11530637741088867,
      "step": 20692
    },
    {
      "epoch": 0.0001262939453125,
      "step": 20692,
      "training_step_time": 0.4615445137023926
    },
    {
      "epoch": 0.000126300048828125,
      "model_forward_time": 0.11564898490905762,
      "step": 20693
    },
    {
      "epoch": 0.000126300048828125,
      "step": 20693,
      "training_step_time": 0.4005396366119385
    },
    {
      "epoch": 0.00012630615234375,
      "model_forward_time": 0.11505579948425293,
      "step": 20694
    },
    {
      "epoch": 0.00012630615234375,
      "step": 20694,
      "training_step_time": 0.46634459495544434
    },
    {
      "epoch": 0.000126312255859375,
      "model_forward_time": 0.11517596244812012,
      "step": 20695
    },
    {
      "epoch": 0.000126312255859375,
      "step": 20695,
      "training_step_time": 0.4182603359222412
    },
    {
      "epoch": 0.000126318359375,
      "model_forward_time": 0.1153416633605957,
      "step": 20696
    },
    {
      "epoch": 0.000126318359375,
      "step": 20696,
      "training_step_time": 0.4000051021575928
    },
    {
      "epoch": 0.000126324462890625,
      "model_forward_time": 0.11484694480895996,
      "step": 20697
    },
    {
      "epoch": 0.000126324462890625,
      "step": 20697,
      "training_step_time": 0.43311452865600586
    },
    {
      "epoch": 0.00012633056640625,
      "model_forward_time": 0.11562418937683105,
      "step": 20698
    },
    {
      "epoch": 0.00012633056640625,
      "step": 20698,
      "training_step_time": 0.403167724609375
    },
    {
      "epoch": 0.000126336669921875,
      "model_forward_time": 0.11446619033813477,
      "step": 20699
    },
    {
      "epoch": 0.000126336669921875,
      "step": 20699,
      "training_step_time": 0.39060449600219727
    },
    {
      "epoch": 0.0001263427734375,
      "grad_norm": 0.166922926902771,
      "learning_rate": 7.803575286758364e-05,
      "loss": 0.054,
      "step": 20700
    },
    {
      "epoch": 0.0001263427734375,
      "model_forward_time": 0.11560249328613281,
      "step": 20700
    },
    {
      "epoch": 0.0001263427734375,
      "step": 20700,
      "training_step_time": 0.45442962646484375
    },
    {
      "epoch": 0.000126348876953125,
      "model_forward_time": 0.11500048637390137,
      "step": 20701
    },
    {
      "epoch": 0.000126348876953125,
      "step": 20701,
      "training_step_time": 0.445375919342041
    },
    {
      "epoch": 0.00012635498046875,
      "model_forward_time": 0.11536192893981934,
      "step": 20702
    },
    {
      "epoch": 0.00012635498046875,
      "step": 20702,
      "training_step_time": 0.42366552352905273
    },
    {
      "epoch": 0.000126361083984375,
      "model_forward_time": 0.11469125747680664,
      "step": 20703
    },
    {
      "epoch": 0.000126361083984375,
      "step": 20703,
      "training_step_time": 0.4200596809387207
    },
    {
      "epoch": 0.0001263671875,
      "model_forward_time": 0.1149129867553711,
      "step": 20704
    },
    {
      "epoch": 0.0001263671875,
      "step": 20704,
      "training_step_time": 0.3923063278198242
    },
    {
      "epoch": 0.000126373291015625,
      "model_forward_time": 0.11464786529541016,
      "step": 20705
    },
    {
      "epoch": 0.000126373291015625,
      "step": 20705,
      "training_step_time": 0.4323084354400635
    },
    {
      "epoch": 0.00012637939453125,
      "model_forward_time": 0.11492395401000977,
      "step": 20706
    },
    {
      "epoch": 0.00012637939453125,
      "step": 20706,
      "training_step_time": 0.49210500717163086
    },
    {
      "epoch": 0.000126385498046875,
      "model_forward_time": 0.1156473159790039,
      "step": 20707
    },
    {
      "epoch": 0.000126385498046875,
      "step": 20707,
      "training_step_time": 0.4026343822479248
    },
    {
      "epoch": 0.0001263916015625,
      "model_forward_time": 0.1153719425201416,
      "step": 20708
    },
    {
      "epoch": 0.0001263916015625,
      "step": 20708,
      "training_step_time": 0.4145951271057129
    },
    {
      "epoch": 0.000126397705078125,
      "model_forward_time": 0.11427760124206543,
      "step": 20709
    },
    {
      "epoch": 0.000126397705078125,
      "step": 20709,
      "training_step_time": 0.39464592933654785
    },
    {
      "epoch": 0.00012640380859375,
      "grad_norm": 0.1087845116853714,
      "learning_rate": 7.801293048362691e-05,
      "loss": 0.0534,
      "step": 20710
    },
    {
      "epoch": 0.00012640380859375,
      "model_forward_time": 0.11474180221557617,
      "step": 20710
    },
    {
      "epoch": 0.00012640380859375,
      "step": 20710,
      "training_step_time": 0.4069030284881592
    },
    {
      "epoch": 0.000126409912109375,
      "model_forward_time": 0.11490345001220703,
      "step": 20711
    },
    {
      "epoch": 0.000126409912109375,
      "step": 20711,
      "training_step_time": 0.41142892837524414
    },
    {
      "epoch": 0.000126416015625,
      "model_forward_time": 0.11409211158752441,
      "step": 20712
    },
    {
      "epoch": 0.000126416015625,
      "step": 20712,
      "training_step_time": 0.4001173973083496
    },
    {
      "epoch": 0.000126422119140625,
      "model_forward_time": 0.11509227752685547,
      "step": 20713
    },
    {
      "epoch": 0.000126422119140625,
      "step": 20713,
      "training_step_time": 0.3940126895904541
    },
    {
      "epoch": 0.00012642822265625,
      "model_forward_time": 0.11494946479797363,
      "step": 20714
    },
    {
      "epoch": 0.00012642822265625,
      "step": 20714,
      "training_step_time": 0.4146239757537842
    },
    {
      "epoch": 0.000126434326171875,
      "model_forward_time": 0.11521053314208984,
      "step": 20715
    },
    {
      "epoch": 0.000126434326171875,
      "step": 20715,
      "training_step_time": 0.4248933792114258
    },
    {
      "epoch": 0.0001264404296875,
      "model_forward_time": 0.11595630645751953,
      "step": 20716
    },
    {
      "epoch": 0.0001264404296875,
      "step": 20716,
      "training_step_time": 0.5045480728149414
    },
    {
      "epoch": 0.000126446533203125,
      "model_forward_time": 0.1145789623260498,
      "step": 20717
    },
    {
      "epoch": 0.000126446533203125,
      "step": 20717,
      "training_step_time": 0.3840022087097168
    },
    {
      "epoch": 0.00012645263671875,
      "model_forward_time": 0.11506009101867676,
      "step": 20718
    },
    {
      "epoch": 0.00012645263671875,
      "step": 20718,
      "training_step_time": 0.392254114151001
    },
    {
      "epoch": 0.000126458740234375,
      "model_forward_time": 0.11461806297302246,
      "step": 20719
    },
    {
      "epoch": 0.000126458740234375,
      "step": 20719,
      "training_step_time": 0.4424867630004883
    },
    {
      "epoch": 0.00012646484375,
      "grad_norm": 0.18018768727779388,
      "learning_rate": 7.799009959008155e-05,
      "loss": 0.0545,
      "step": 20720
    },
    {
      "epoch": 0.00012646484375,
      "model_forward_time": 0.11487770080566406,
      "step": 20720
    },
    {
      "epoch": 0.00012646484375,
      "step": 20720,
      "training_step_time": 0.3854358196258545
    },
    {
      "epoch": 0.000126470947265625,
      "model_forward_time": 0.11615252494812012,
      "step": 20721
    },
    {
      "epoch": 0.000126470947265625,
      "step": 20721,
      "training_step_time": 0.4157392978668213
    },
    {
      "epoch": 0.00012647705078125,
      "model_forward_time": 0.11460733413696289,
      "step": 20722
    },
    {
      "epoch": 0.00012647705078125,
      "step": 20722,
      "training_step_time": 0.46341753005981445
    },
    {
      "epoch": 0.000126483154296875,
      "model_forward_time": 0.1156930923461914,
      "step": 20723
    },
    {
      "epoch": 0.000126483154296875,
      "step": 20723,
      "training_step_time": 0.40996384620666504
    },
    {
      "epoch": 0.0001264892578125,
      "model_forward_time": 0.11476898193359375,
      "step": 20724
    },
    {
      "epoch": 0.0001264892578125,
      "step": 20724,
      "training_step_time": 0.38819050788879395
    },
    {
      "epoch": 0.000126495361328125,
      "model_forward_time": 0.11509156227111816,
      "step": 20725
    },
    {
      "epoch": 0.000126495361328125,
      "step": 20725,
      "training_step_time": 0.3960745334625244
    },
    {
      "epoch": 0.00012650146484375,
      "model_forward_time": 0.11540770530700684,
      "step": 20726
    },
    {
      "epoch": 0.00012650146484375,
      "step": 20726,
      "training_step_time": 0.3858630657196045
    },
    {
      "epoch": 0.000126507568359375,
      "model_forward_time": 0.11526060104370117,
      "step": 20727
    },
    {
      "epoch": 0.000126507568359375,
      "step": 20727,
      "training_step_time": 0.40122079849243164
    },
    {
      "epoch": 0.000126513671875,
      "model_forward_time": 0.11527538299560547,
      "step": 20728
    },
    {
      "epoch": 0.000126513671875,
      "step": 20728,
      "training_step_time": 0.36759066581726074
    },
    {
      "epoch": 0.000126519775390625,
      "model_forward_time": 0.11522912979125977,
      "step": 20729
    },
    {
      "epoch": 0.000126519775390625,
      "step": 20729,
      "training_step_time": 0.49561405181884766
    },
    {
      "epoch": 0.00012652587890625,
      "grad_norm": 0.15680576860904694,
      "learning_rate": 7.796726019388295e-05,
      "loss": 0.057,
      "step": 20730
    },
    {
      "epoch": 0.00012652587890625,
      "model_forward_time": 0.11466145515441895,
      "step": 20730
    },
    {
      "epoch": 0.00012652587890625,
      "step": 20730,
      "training_step_time": 0.4988517761230469
    },
    {
      "epoch": 0.000126531982421875,
      "model_forward_time": 0.1181633472442627,
      "step": 20731
    },
    {
      "epoch": 0.000126531982421875,
      "step": 20731,
      "training_step_time": 0.37935638427734375
    },
    {
      "epoch": 0.0001265380859375,
      "model_forward_time": 0.1179189682006836,
      "step": 20732
    },
    {
      "epoch": 0.0001265380859375,
      "step": 20732,
      "training_step_time": 0.3866744041442871
    },
    {
      "epoch": 0.000126544189453125,
      "model_forward_time": 0.11513495445251465,
      "step": 20733
    },
    {
      "epoch": 0.000126544189453125,
      "step": 20733,
      "training_step_time": 0.44762372970581055
    },
    {
      "epoch": 0.00012655029296875,
      "model_forward_time": 0.11512160301208496,
      "step": 20734
    },
    {
      "epoch": 0.00012655029296875,
      "step": 20734,
      "training_step_time": 0.38912248611450195
    },
    {
      "epoch": 0.000126556396484375,
      "model_forward_time": 0.1154634952545166,
      "step": 20735
    },
    {
      "epoch": 0.000126556396484375,
      "step": 20735,
      "training_step_time": 0.4279022216796875
    },
    {
      "epoch": 0.0001265625,
      "model_forward_time": 0.11484026908874512,
      "step": 20736
    },
    {
      "epoch": 0.0001265625,
      "step": 20736,
      "training_step_time": 0.3955543041229248
    },
    {
      "epoch": 0.000126568603515625,
      "model_forward_time": 0.11495828628540039,
      "step": 20737
    },
    {
      "epoch": 0.000126568603515625,
      "step": 20737,
      "training_step_time": 0.4319169521331787
    },
    {
      "epoch": 0.00012657470703125,
      "model_forward_time": 0.11533594131469727,
      "step": 20738
    },
    {
      "epoch": 0.00012657470703125,
      "step": 20738,
      "training_step_time": 0.38533878326416016
    },
    {
      "epoch": 0.000126580810546875,
      "model_forward_time": 0.11502623558044434,
      "step": 20739
    },
    {
      "epoch": 0.000126580810546875,
      "step": 20739,
      "training_step_time": 0.38918495178222656
    },
    {
      "epoch": 0.0001265869140625,
      "grad_norm": 0.12859632074832916,
      "learning_rate": 7.794441230196913e-05,
      "loss": 0.0508,
      "step": 20740
    },
    {
      "epoch": 0.0001265869140625,
      "model_forward_time": 0.11551570892333984,
      "step": 20740
    },
    {
      "epoch": 0.0001265869140625,
      "step": 20740,
      "training_step_time": 0.3992645740509033
    },
    {
      "epoch": 0.000126593017578125,
      "model_forward_time": 0.11565113067626953,
      "step": 20741
    },
    {
      "epoch": 0.000126593017578125,
      "step": 20741,
      "training_step_time": 0.6617071628570557
    },
    {
      "epoch": 0.00012659912109375,
      "model_forward_time": 0.11573386192321777,
      "step": 20742
    },
    {
      "epoch": 0.00012659912109375,
      "step": 20742,
      "training_step_time": 0.3745701313018799
    },
    {
      "epoch": 0.000126605224609375,
      "model_forward_time": 0.11539459228515625,
      "step": 20743
    },
    {
      "epoch": 0.000126605224609375,
      "step": 20743,
      "training_step_time": 0.4633193016052246
    },
    {
      "epoch": 0.000126611328125,
      "model_forward_time": 0.11432981491088867,
      "step": 20744
    },
    {
      "epoch": 0.000126611328125,
      "step": 20744,
      "training_step_time": 0.4518575668334961
    },
    {
      "epoch": 0.000126617431640625,
      "model_forward_time": 0.11486983299255371,
      "step": 20745
    },
    {
      "epoch": 0.000126617431640625,
      "step": 20745,
      "training_step_time": 0.4129149913787842
    },
    {
      "epoch": 0.00012662353515625,
      "model_forward_time": 0.11432242393493652,
      "step": 20746
    },
    {
      "epoch": 0.00012662353515625,
      "step": 20746,
      "training_step_time": 0.3864288330078125
    },
    {
      "epoch": 0.000126629638671875,
      "model_forward_time": 0.11515092849731445,
      "step": 20747
    },
    {
      "epoch": 0.000126629638671875,
      "step": 20747,
      "training_step_time": 0.5964174270629883
    },
    {
      "epoch": 0.0001266357421875,
      "model_forward_time": 0.11515927314758301,
      "step": 20748
    },
    {
      "epoch": 0.0001266357421875,
      "step": 20748,
      "training_step_time": 0.39218783378601074
    },
    {
      "epoch": 0.000126641845703125,
      "model_forward_time": 0.11539053916931152,
      "step": 20749
    },
    {
      "epoch": 0.000126641845703125,
      "step": 20749,
      "training_step_time": 0.390092134475708
    },
    {
      "epoch": 0.00012664794921875,
      "grad_norm": 0.13644292950630188,
      "learning_rate": 7.79215559212807e-05,
      "loss": 0.051,
      "step": 20750
    },
    {
      "epoch": 0.00012664794921875,
      "model_forward_time": 0.11522698402404785,
      "step": 20750
    },
    {
      "epoch": 0.00012664794921875,
      "step": 20750,
      "training_step_time": 0.4053342342376709
    },
    {
      "epoch": 0.000126654052734375,
      "model_forward_time": 0.11497759819030762,
      "step": 20751
    },
    {
      "epoch": 0.000126654052734375,
      "step": 20751,
      "training_step_time": 0.40567541122436523
    },
    {
      "epoch": 0.00012666015625,
      "model_forward_time": 0.11517453193664551,
      "step": 20752
    },
    {
      "epoch": 0.00012666015625,
      "step": 20752,
      "training_step_time": 0.3973264694213867
    },
    {
      "epoch": 0.000126666259765625,
      "model_forward_time": 0.11511802673339844,
      "step": 20753
    },
    {
      "epoch": 0.000126666259765625,
      "step": 20753,
      "training_step_time": 0.711092472076416
    },
    {
      "epoch": 0.00012667236328125,
      "model_forward_time": 0.11513018608093262,
      "step": 20754
    },
    {
      "epoch": 0.00012667236328125,
      "step": 20754,
      "training_step_time": 0.39762401580810547
    },
    {
      "epoch": 0.000126678466796875,
      "model_forward_time": 0.11463332176208496,
      "step": 20755
    },
    {
      "epoch": 0.000126678466796875,
      "step": 20755,
      "training_step_time": 0.39250755310058594
    },
    {
      "epoch": 0.0001266845703125,
      "model_forward_time": 0.11449003219604492,
      "step": 20756
    },
    {
      "epoch": 0.0001266845703125,
      "step": 20756,
      "training_step_time": 0.36368346214294434
    },
    {
      "epoch": 0.000126690673828125,
      "model_forward_time": 0.11456990242004395,
      "step": 20757
    },
    {
      "epoch": 0.000126690673828125,
      "step": 20757,
      "training_step_time": 0.4387400150299072
    },
    {
      "epoch": 0.00012669677734375,
      "model_forward_time": 0.1150364875793457,
      "step": 20758
    },
    {
      "epoch": 0.00012669677734375,
      "step": 20758,
      "training_step_time": 0.3972475528717041
    },
    {
      "epoch": 0.000126702880859375,
      "model_forward_time": 0.11495113372802734,
      "step": 20759
    },
    {
      "epoch": 0.000126702880859375,
      "step": 20759,
      "training_step_time": 0.4831662178039551
    },
    {
      "epoch": 0.000126708984375,
      "grad_norm": 0.1453302800655365,
      "learning_rate": 7.789869105876083e-05,
      "loss": 0.0512,
      "step": 20760
    },
    {
      "epoch": 0.000126708984375,
      "model_forward_time": 0.11548304557800293,
      "step": 20760
    },
    {
      "epoch": 0.000126708984375,
      "step": 20760,
      "training_step_time": 0.38102006912231445
    },
    {
      "epoch": 0.000126715087890625,
      "model_forward_time": 0.11477184295654297,
      "step": 20761
    },
    {
      "epoch": 0.000126715087890625,
      "step": 20761,
      "training_step_time": 0.399472713470459
    },
    {
      "epoch": 0.00012672119140625,
      "model_forward_time": 0.11521744728088379,
      "step": 20762
    },
    {
      "epoch": 0.00012672119140625,
      "step": 20762,
      "training_step_time": 0.379840612411499
    },
    {
      "epoch": 0.000126727294921875,
      "model_forward_time": 0.11568260192871094,
      "step": 20763
    },
    {
      "epoch": 0.000126727294921875,
      "step": 20763,
      "training_step_time": 0.40457940101623535
    },
    {
      "epoch": 0.0001267333984375,
      "model_forward_time": 0.11484813690185547,
      "step": 20764
    },
    {
      "epoch": 0.0001267333984375,
      "step": 20764,
      "training_step_time": 0.4015829563140869
    },
    {
      "epoch": 0.000126739501953125,
      "model_forward_time": 0.11545968055725098,
      "step": 20765
    },
    {
      "epoch": 0.000126739501953125,
      "step": 20765,
      "training_step_time": 0.8229751586914062
    },
    {
      "epoch": 0.00012674560546875,
      "model_forward_time": 0.11456608772277832,
      "step": 20766
    },
    {
      "epoch": 0.00012674560546875,
      "step": 20766,
      "training_step_time": 0.3933873176574707
    },
    {
      "epoch": 0.000126751708984375,
      "model_forward_time": 0.11447358131408691,
      "step": 20767
    },
    {
      "epoch": 0.000126751708984375,
      "step": 20767,
      "training_step_time": 0.3855135440826416
    },
    {
      "epoch": 0.0001267578125,
      "model_forward_time": 0.11477017402648926,
      "step": 20768
    },
    {
      "epoch": 0.0001267578125,
      "step": 20768,
      "training_step_time": 0.37648487091064453
    },
    {
      "epoch": 0.000126763916015625,
      "model_forward_time": 0.11413264274597168,
      "step": 20769
    },
    {
      "epoch": 0.000126763916015625,
      "step": 20769,
      "training_step_time": 0.3842427730560303
    },
    {
      "epoch": 0.00012677001953125,
      "grad_norm": 0.12056361883878708,
      "learning_rate": 7.78758177213552e-05,
      "loss": 0.0517,
      "step": 20770
    },
    {
      "epoch": 0.00012677001953125,
      "model_forward_time": 0.11470937728881836,
      "step": 20770
    },
    {
      "epoch": 0.00012677001953125,
      "step": 20770,
      "training_step_time": 0.46669983863830566
    },
    {
      "epoch": 0.000126776123046875,
      "model_forward_time": 0.1153266429901123,
      "step": 20771
    },
    {
      "epoch": 0.000126776123046875,
      "step": 20771,
      "training_step_time": 0.4931364059448242
    },
    {
      "epoch": 0.0001267822265625,
      "model_forward_time": 0.11575102806091309,
      "step": 20772
    },
    {
      "epoch": 0.0001267822265625,
      "step": 20772,
      "training_step_time": 0.48203182220458984
    },
    {
      "epoch": 0.000126788330078125,
      "model_forward_time": 0.11474847793579102,
      "step": 20773
    },
    {
      "epoch": 0.000126788330078125,
      "step": 20773,
      "training_step_time": 0.3845181465148926
    },
    {
      "epoch": 0.00012679443359375,
      "model_forward_time": 0.11466598510742188,
      "step": 20774
    },
    {
      "epoch": 0.00012679443359375,
      "step": 20774,
      "training_step_time": 0.38880085945129395
    },
    {
      "epoch": 0.000126800537109375,
      "model_forward_time": 0.11463737487792969,
      "step": 20775
    },
    {
      "epoch": 0.000126800537109375,
      "step": 20775,
      "training_step_time": 0.39603090286254883
    },
    {
      "epoch": 0.000126806640625,
      "model_forward_time": 0.11471176147460938,
      "step": 20776
    },
    {
      "epoch": 0.000126806640625,
      "step": 20776,
      "training_step_time": 0.39874720573425293
    },
    {
      "epoch": 0.000126812744140625,
      "model_forward_time": 0.11509060859680176,
      "step": 20777
    },
    {
      "epoch": 0.000126812744140625,
      "step": 20777,
      "training_step_time": 0.5524718761444092
    },
    {
      "epoch": 0.00012681884765625,
      "model_forward_time": 0.11519217491149902,
      "step": 20778
    },
    {
      "epoch": 0.00012681884765625,
      "step": 20778,
      "training_step_time": 0.38760995864868164
    },
    {
      "epoch": 0.000126824951171875,
      "model_forward_time": 0.11802458763122559,
      "step": 20779
    },
    {
      "epoch": 0.000126824951171875,
      "step": 20779,
      "training_step_time": 0.40230727195739746
    },
    {
      "epoch": 0.0001268310546875,
      "grad_norm": 0.17782431840896606,
      "learning_rate": 7.785293591601217e-05,
      "loss": 0.0573,
      "step": 20780
    },
    {
      "epoch": 0.0001268310546875,
      "model_forward_time": 0.11503815650939941,
      "step": 20780
    },
    {
      "epoch": 0.0001268310546875,
      "step": 20780,
      "training_step_time": 0.3898942470550537
    },
    {
      "epoch": 0.000126837158203125,
      "model_forward_time": 0.11454224586486816,
      "step": 20781
    },
    {
      "epoch": 0.000126837158203125,
      "step": 20781,
      "training_step_time": 0.3925457000732422
    },
    {
      "epoch": 0.00012684326171875,
      "model_forward_time": 0.11576175689697266,
      "step": 20782
    },
    {
      "epoch": 0.00012684326171875,
      "step": 20782,
      "training_step_time": 0.3844766616821289
    },
    {
      "epoch": 0.000126849365234375,
      "model_forward_time": 0.11480140686035156,
      "step": 20783
    },
    {
      "epoch": 0.000126849365234375,
      "step": 20783,
      "training_step_time": 0.7280106544494629
    },
    {
      "epoch": 0.00012685546875,
      "model_forward_time": 0.11534976959228516,
      "step": 20784
    },
    {
      "epoch": 0.00012685546875,
      "step": 20784,
      "training_step_time": 0.41570019721984863
    },
    {
      "epoch": 0.000126861572265625,
      "model_forward_time": 0.1149744987487793,
      "step": 20785
    },
    {
      "epoch": 0.000126861572265625,
      "step": 20785,
      "training_step_time": 0.48070669174194336
    },
    {
      "epoch": 0.00012686767578125,
      "model_forward_time": 0.11398935317993164,
      "step": 20786
    },
    {
      "epoch": 0.00012686767578125,
      "step": 20786,
      "training_step_time": 0.40291786193847656
    },
    {
      "epoch": 0.000126873779296875,
      "model_forward_time": 0.11471080780029297,
      "step": 20787
    },
    {
      "epoch": 0.000126873779296875,
      "step": 20787,
      "training_step_time": 0.37188053131103516
    },
    {
      "epoch": 0.0001268798828125,
      "model_forward_time": 0.11400985717773438,
      "step": 20788
    },
    {
      "epoch": 0.0001268798828125,
      "step": 20788,
      "training_step_time": 0.3841838836669922
    },
    {
      "epoch": 0.000126885986328125,
      "model_forward_time": 0.11523032188415527,
      "step": 20789
    },
    {
      "epoch": 0.000126885986328125,
      "step": 20789,
      "training_step_time": 0.5065646171569824
    },
    {
      "epoch": 0.00012689208984375,
      "grad_norm": 0.19048285484313965,
      "learning_rate": 7.783004564968263e-05,
      "loss": 0.0554,
      "step": 20790
    },
    {
      "epoch": 0.00012689208984375,
      "model_forward_time": 0.11493754386901855,
      "step": 20790
    },
    {
      "epoch": 0.00012689208984375,
      "step": 20790,
      "training_step_time": 0.42824864387512207
    },
    {
      "epoch": 0.000126898193359375,
      "model_forward_time": 0.11422109603881836,
      "step": 20791
    },
    {
      "epoch": 0.000126898193359375,
      "step": 20791,
      "training_step_time": 0.41072845458984375
    },
    {
      "epoch": 0.000126904296875,
      "model_forward_time": 0.11522412300109863,
      "step": 20792
    },
    {
      "epoch": 0.000126904296875,
      "step": 20792,
      "training_step_time": 0.42942070960998535
    },
    {
      "epoch": 0.000126910400390625,
      "model_forward_time": 0.11484956741333008,
      "step": 20793
    },
    {
      "epoch": 0.000126910400390625,
      "step": 20793,
      "training_step_time": 0.37935733795166016
    },
    {
      "epoch": 0.00012691650390625,
      "model_forward_time": 0.11553740501403809,
      "step": 20794
    },
    {
      "epoch": 0.00012691650390625,
      "step": 20794,
      "training_step_time": 0.3850398063659668
    },
    {
      "epoch": 0.000126922607421875,
      "model_forward_time": 0.11576247215270996,
      "step": 20795
    },
    {
      "epoch": 0.000126922607421875,
      "step": 20795,
      "training_step_time": 0.581946611404419
    },
    {
      "epoch": 0.0001269287109375,
      "model_forward_time": 0.11525392532348633,
      "step": 20796
    },
    {
      "epoch": 0.0001269287109375,
      "step": 20796,
      "training_step_time": 0.41102147102355957
    },
    {
      "epoch": 0.000126934814453125,
      "model_forward_time": 0.11455821990966797,
      "step": 20797
    },
    {
      "epoch": 0.000126934814453125,
      "step": 20797,
      "training_step_time": 0.40050530433654785
    },
    {
      "epoch": 0.00012694091796875,
      "model_forward_time": 0.11797785758972168,
      "step": 20798
    },
    {
      "epoch": 0.00012694091796875,
      "step": 20798,
      "training_step_time": 0.3803548812866211
    },
    {
      "epoch": 0.000126947021484375,
      "model_forward_time": 0.11770892143249512,
      "step": 20799
    },
    {
      "epoch": 0.000126947021484375,
      "step": 20799,
      "training_step_time": 0.4425983428955078
    },
    {
      "epoch": 0.000126953125,
      "grad_norm": 0.12755486369132996,
      "learning_rate": 7.780714692932002e-05,
      "loss": 0.0547,
      "step": 20800
    },
    {
      "epoch": 0.000126953125,
      "model_forward_time": 0.1174919605255127,
      "step": 20800
    },
    {
      "epoch": 0.000126953125,
      "step": 20800,
      "training_step_time": 0.4890739917755127
    },
    {
      "epoch": 0.000126959228515625,
      "model_forward_time": 0.11815547943115234,
      "step": 20801
    },
    {
      "epoch": 0.000126959228515625,
      "step": 20801,
      "training_step_time": 0.38068103790283203
    },
    {
      "epoch": 0.00012696533203125,
      "model_forward_time": 0.11835217475891113,
      "step": 20802
    },
    {
      "epoch": 0.00012696533203125,
      "step": 20802,
      "training_step_time": 0.38333988189697266
    },
    {
      "epoch": 0.000126971435546875,
      "model_forward_time": 0.11551809310913086,
      "step": 20803
    },
    {
      "epoch": 0.000126971435546875,
      "step": 20803,
      "training_step_time": 0.38462185859680176
    },
    {
      "epoch": 0.0001269775390625,
      "model_forward_time": 0.11523771286010742,
      "step": 20804
    },
    {
      "epoch": 0.0001269775390625,
      "step": 20804,
      "training_step_time": 0.39292407035827637
    },
    {
      "epoch": 0.000126983642578125,
      "model_forward_time": 0.11511731147766113,
      "step": 20805
    },
    {
      "epoch": 0.000126983642578125,
      "step": 20805,
      "training_step_time": 0.42050671577453613
    },
    {
      "epoch": 0.00012698974609375,
      "model_forward_time": 0.1146550178527832,
      "step": 20806
    },
    {
      "epoch": 0.00012698974609375,
      "step": 20806,
      "training_step_time": 0.48456430435180664
    },
    {
      "epoch": 0.000126995849609375,
      "model_forward_time": 0.11476421356201172,
      "step": 20807
    },
    {
      "epoch": 0.000126995849609375,
      "step": 20807,
      "training_step_time": 0.4739706516265869
    },
    {
      "epoch": 0.000127001953125,
      "model_forward_time": 0.11568117141723633,
      "step": 20808
    },
    {
      "epoch": 0.000127001953125,
      "step": 20808,
      "training_step_time": 0.3967936038970947
    },
    {
      "epoch": 0.000127008056640625,
      "model_forward_time": 0.11493182182312012,
      "step": 20809
    },
    {
      "epoch": 0.000127008056640625,
      "step": 20809,
      "training_step_time": 0.4076869487762451
    },
    {
      "epoch": 0.00012701416015625,
      "grad_norm": 0.16159415245056152,
      "learning_rate": 7.77842397618804e-05,
      "loss": 0.0523,
      "step": 20810
    },
    {
      "epoch": 0.00012701416015625,
      "model_forward_time": 0.1152796745300293,
      "step": 20810
    },
    {
      "epoch": 0.00012701416015625,
      "step": 20810,
      "training_step_time": 0.39917850494384766
    },
    {
      "epoch": 0.000127020263671875,
      "model_forward_time": 0.11537289619445801,
      "step": 20811
    },
    {
      "epoch": 0.000127020263671875,
      "step": 20811,
      "training_step_time": 0.4052760601043701
    },
    {
      "epoch": 0.0001270263671875,
      "model_forward_time": 0.11525845527648926,
      "step": 20812
    },
    {
      "epoch": 0.0001270263671875,
      "step": 20812,
      "training_step_time": 0.4111807346343994
    },
    {
      "epoch": 0.000127032470703125,
      "model_forward_time": 0.11578607559204102,
      "step": 20813
    },
    {
      "epoch": 0.000127032470703125,
      "step": 20813,
      "training_step_time": 0.5690150260925293
    },
    {
      "epoch": 0.00012703857421875,
      "model_forward_time": 0.11800217628479004,
      "step": 20814
    },
    {
      "epoch": 0.00012703857421875,
      "step": 20814,
      "training_step_time": 0.46976470947265625
    },
    {
      "epoch": 0.000127044677734375,
      "model_forward_time": 0.11535310745239258,
      "step": 20815
    },
    {
      "epoch": 0.000127044677734375,
      "step": 20815,
      "training_step_time": 0.4774742126464844
    },
    {
      "epoch": 0.00012705078125,
      "model_forward_time": 0.11421012878417969,
      "step": 20816
    },
    {
      "epoch": 0.00012705078125,
      "step": 20816,
      "training_step_time": 0.37494564056396484
    },
    {
      "epoch": 0.000127056884765625,
      "model_forward_time": 0.11510705947875977,
      "step": 20817
    },
    {
      "epoch": 0.000127056884765625,
      "step": 20817,
      "training_step_time": 0.4015333652496338
    },
    {
      "epoch": 0.00012706298828125,
      "model_forward_time": 0.11449694633483887,
      "step": 20818
    },
    {
      "epoch": 0.00012706298828125,
      "step": 20818,
      "training_step_time": 0.38625025749206543
    },
    {
      "epoch": 0.000127069091796875,
      "model_forward_time": 0.11537623405456543,
      "step": 20819
    },
    {
      "epoch": 0.000127069091796875,
      "step": 20819,
      "training_step_time": 0.4932243824005127
    },
    {
      "epoch": 0.0001270751953125,
      "grad_norm": 0.18371497094631195,
      "learning_rate": 7.776132415432234e-05,
      "loss": 0.0518,
      "step": 20820
    },
    {
      "epoch": 0.0001270751953125,
      "model_forward_time": 0.11514520645141602,
      "step": 20820
    },
    {
      "epoch": 0.0001270751953125,
      "step": 20820,
      "training_step_time": 0.39513635635375977
    },
    {
      "epoch": 0.000127081298828125,
      "model_forward_time": 0.11560511589050293,
      "step": 20821
    },
    {
      "epoch": 0.000127081298828125,
      "step": 20821,
      "training_step_time": 0.5084717273712158
    },
    {
      "epoch": 0.00012708740234375,
      "model_forward_time": 0.11465215682983398,
      "step": 20822
    },
    {
      "epoch": 0.00012708740234375,
      "step": 20822,
      "training_step_time": 0.3789074420928955
    },
    {
      "epoch": 0.000127093505859375,
      "model_forward_time": 0.11517214775085449,
      "step": 20823
    },
    {
      "epoch": 0.000127093505859375,
      "step": 20823,
      "training_step_time": 0.3747978210449219
    },
    {
      "epoch": 0.000127099609375,
      "model_forward_time": 0.11541962623596191,
      "step": 20824
    },
    {
      "epoch": 0.000127099609375,
      "step": 20824,
      "training_step_time": 0.3838012218475342
    },
    {
      "epoch": 0.000127105712890625,
      "model_forward_time": 0.11473846435546875,
      "step": 20825
    },
    {
      "epoch": 0.000127105712890625,
      "step": 20825,
      "training_step_time": 0.5766556262969971
    },
    {
      "epoch": 0.00012711181640625,
      "model_forward_time": 0.11537718772888184,
      "step": 20826
    },
    {
      "epoch": 0.00012711181640625,
      "step": 20826,
      "training_step_time": 0.37363243103027344
    },
    {
      "epoch": 0.000127117919921875,
      "model_forward_time": 0.1150975227355957,
      "step": 20827
    },
    {
      "epoch": 0.000127117919921875,
      "step": 20827,
      "training_step_time": 0.4041590690612793
    },
    {
      "epoch": 0.0001271240234375,
      "model_forward_time": 0.11825728416442871,
      "step": 20828
    },
    {
      "epoch": 0.0001271240234375,
      "step": 20828,
      "training_step_time": 0.5136692523956299
    },
    {
      "epoch": 0.000127130126953125,
      "model_forward_time": 0.11590266227722168,
      "step": 20829
    },
    {
      "epoch": 0.000127130126953125,
      "step": 20829,
      "training_step_time": 0.4155855178833008
    },
    {
      "epoch": 0.00012713623046875,
      "grad_norm": 0.15794780850410461,
      "learning_rate": 7.773840011360698e-05,
      "loss": 0.0505,
      "step": 20830
    },
    {
      "epoch": 0.00012713623046875,
      "model_forward_time": 0.11505913734436035,
      "step": 20830
    },
    {
      "epoch": 0.00012713623046875,
      "step": 20830,
      "training_step_time": 0.4442424774169922
    },
    {
      "epoch": 0.000127142333984375,
      "model_forward_time": 0.11492609977722168,
      "step": 20831
    },
    {
      "epoch": 0.000127142333984375,
      "step": 20831,
      "training_step_time": 0.4374523162841797
    },
    {
      "epoch": 0.0001271484375,
      "model_forward_time": 0.1155538558959961,
      "step": 20832
    },
    {
      "epoch": 0.0001271484375,
      "step": 20832,
      "training_step_time": 0.3993558883666992
    },
    {
      "epoch": 0.000127154541015625,
      "model_forward_time": 0.11552977561950684,
      "step": 20833
    },
    {
      "epoch": 0.000127154541015625,
      "step": 20833,
      "training_step_time": 0.4034271240234375
    },
    {
      "epoch": 0.00012716064453125,
      "model_forward_time": 0.114837646484375,
      "step": 20834
    },
    {
      "epoch": 0.00012716064453125,
      "step": 20834,
      "training_step_time": 0.4397251605987549
    },
    {
      "epoch": 0.000127166748046875,
      "model_forward_time": 0.11503362655639648,
      "step": 20835
    },
    {
      "epoch": 0.000127166748046875,
      "step": 20835,
      "training_step_time": 0.5082333087921143
    },
    {
      "epoch": 0.0001271728515625,
      "model_forward_time": 0.11549186706542969,
      "step": 20836
    },
    {
      "epoch": 0.0001271728515625,
      "step": 20836,
      "training_step_time": 0.3904883861541748
    },
    {
      "epoch": 0.000127178955078125,
      "model_forward_time": 0.11513161659240723,
      "step": 20837
    },
    {
      "epoch": 0.000127178955078125,
      "step": 20837,
      "training_step_time": 0.5585939884185791
    },
    {
      "epoch": 0.00012718505859375,
      "model_forward_time": 0.11564183235168457,
      "step": 20838
    },
    {
      "epoch": 0.00012718505859375,
      "step": 20838,
      "training_step_time": 0.39772915840148926
    },
    {
      "epoch": 0.000127191162109375,
      "model_forward_time": 0.11564111709594727,
      "step": 20839
    },
    {
      "epoch": 0.000127191162109375,
      "step": 20839,
      "training_step_time": 0.4040663242340088
    },
    {
      "epoch": 0.000127197265625,
      "grad_norm": 0.18927223980426788,
      "learning_rate": 7.771546764669807e-05,
      "loss": 0.0517,
      "step": 20840
    },
    {
      "epoch": 0.000127197265625,
      "model_forward_time": 0.1150658130645752,
      "step": 20840
    },
    {
      "epoch": 0.000127197265625,
      "step": 20840,
      "training_step_time": 0.3629171848297119
    },
    {
      "epoch": 0.000127203369140625,
      "model_forward_time": 0.11534523963928223,
      "step": 20841
    },
    {
      "epoch": 0.000127203369140625,
      "step": 20841,
      "training_step_time": 0.4146840572357178
    },
    {
      "epoch": 0.00012720947265625,
      "model_forward_time": 0.11544346809387207,
      "step": 20842
    },
    {
      "epoch": 0.00012720947265625,
      "step": 20842,
      "training_step_time": 0.5059573650360107
    },
    {
      "epoch": 0.000127215576171875,
      "model_forward_time": 0.11529374122619629,
      "step": 20843
    },
    {
      "epoch": 0.000127215576171875,
      "step": 20843,
      "training_step_time": 0.7096049785614014
    },
    {
      "epoch": 0.0001272216796875,
      "model_forward_time": 0.11474037170410156,
      "step": 20844
    },
    {
      "epoch": 0.0001272216796875,
      "step": 20844,
      "training_step_time": 0.38368701934814453
    },
    {
      "epoch": 0.000127227783203125,
      "model_forward_time": 0.1150979995727539,
      "step": 20845
    },
    {
      "epoch": 0.000127227783203125,
      "step": 20845,
      "training_step_time": 0.3938295841217041
    },
    {
      "epoch": 0.00012723388671875,
      "model_forward_time": 0.11460518836975098,
      "step": 20846
    },
    {
      "epoch": 0.00012723388671875,
      "step": 20846,
      "training_step_time": 0.3791654109954834
    },
    {
      "epoch": 0.000127239990234375,
      "model_forward_time": 0.11660146713256836,
      "step": 20847
    },
    {
      "epoch": 0.000127239990234375,
      "step": 20847,
      "training_step_time": 0.42871594429016113
    },
    {
      "epoch": 0.00012724609375,
      "model_forward_time": 0.11496353149414062,
      "step": 20848
    },
    {
      "epoch": 0.00012724609375,
      "step": 20848,
      "training_step_time": 0.44814181327819824
    },
    {
      "epoch": 0.000127252197265625,
      "model_forward_time": 0.1152184009552002,
      "step": 20849
    },
    {
      "epoch": 0.000127252197265625,
      "step": 20849,
      "training_step_time": 0.6158545017242432
    },
    {
      "epoch": 0.00012725830078125,
      "grad_norm": 0.10850992798805237,
      "learning_rate": 7.769252676056187e-05,
      "loss": 0.0507,
      "step": 20850
    },
    {
      "epoch": 0.00012725830078125,
      "model_forward_time": 0.1146996021270752,
      "step": 20850
    },
    {
      "epoch": 0.00012725830078125,
      "step": 20850,
      "training_step_time": 0.4245128631591797
    },
    {
      "epoch": 0.000127264404296875,
      "model_forward_time": 0.11488652229309082,
      "step": 20851
    },
    {
      "epoch": 0.000127264404296875,
      "step": 20851,
      "training_step_time": 0.466933012008667
    },
    {
      "epoch": 0.0001272705078125,
      "model_forward_time": 0.11506843566894531,
      "step": 20852
    },
    {
      "epoch": 0.0001272705078125,
      "step": 20852,
      "training_step_time": 0.3879406452178955
    },
    {
      "epoch": 0.000127276611328125,
      "model_forward_time": 0.11474180221557617,
      "step": 20853
    },
    {
      "epoch": 0.000127276611328125,
      "step": 20853,
      "training_step_time": 0.3842785358428955
    },
    {
      "epoch": 0.00012728271484375,
      "model_forward_time": 0.11490297317504883,
      "step": 20854
    },
    {
      "epoch": 0.00012728271484375,
      "step": 20854,
      "training_step_time": 0.4248178005218506
    },
    {
      "epoch": 0.000127288818359375,
      "model_forward_time": 0.11499762535095215,
      "step": 20855
    },
    {
      "epoch": 0.000127288818359375,
      "step": 20855,
      "training_step_time": 0.5610260963439941
    },
    {
      "epoch": 0.000127294921875,
      "model_forward_time": 0.11586570739746094,
      "step": 20856
    },
    {
      "epoch": 0.000127294921875,
      "step": 20856,
      "training_step_time": 0.444896936416626
    },
    {
      "epoch": 0.000127301025390625,
      "model_forward_time": 0.11487770080566406,
      "step": 20857
    },
    {
      "epoch": 0.000127301025390625,
      "step": 20857,
      "training_step_time": 0.44270920753479004
    },
    {
      "epoch": 0.00012730712890625,
      "model_forward_time": 0.1149907112121582,
      "step": 20858
    },
    {
      "epoch": 0.00012730712890625,
      "step": 20858,
      "training_step_time": 0.3930830955505371
    },
    {
      "epoch": 0.000127313232421875,
      "model_forward_time": 0.11513209342956543,
      "step": 20859
    },
    {
      "epoch": 0.000127313232421875,
      "step": 20859,
      "training_step_time": 0.39961671829223633
    },
    {
      "epoch": 0.0001273193359375,
      "grad_norm": 0.1400117129087448,
      "learning_rate": 7.766957746216721e-05,
      "loss": 0.0457,
      "step": 20860
    },
    {
      "epoch": 0.0001273193359375,
      "model_forward_time": 0.11506152153015137,
      "step": 20860
    },
    {
      "epoch": 0.0001273193359375,
      "step": 20860,
      "training_step_time": 0.4004354476928711
    },
    {
      "epoch": 0.000127325439453125,
      "model_forward_time": 0.11524319648742676,
      "step": 20861
    },
    {
      "epoch": 0.000127325439453125,
      "step": 20861,
      "training_step_time": 0.6784303188323975
    },
    {
      "epoch": 0.00012733154296875,
      "model_forward_time": 0.11490130424499512,
      "step": 20862
    },
    {
      "epoch": 0.00012733154296875,
      "step": 20862,
      "training_step_time": 0.47382140159606934
    },
    {
      "epoch": 0.000127337646484375,
      "model_forward_time": 0.1155853271484375,
      "step": 20863
    },
    {
      "epoch": 0.000127337646484375,
      "step": 20863,
      "training_step_time": 0.4265458583831787
    },
    {
      "epoch": 0.00012734375,
      "model_forward_time": 0.11500763893127441,
      "step": 20864
    },
    {
      "epoch": 0.00012734375,
      "step": 20864,
      "training_step_time": 0.4478750228881836
    },
    {
      "epoch": 0.000127349853515625,
      "model_forward_time": 0.11555337905883789,
      "step": 20865
    },
    {
      "epoch": 0.000127349853515625,
      "step": 20865,
      "training_step_time": 0.3890998363494873
    },
    {
      "epoch": 0.00012735595703125,
      "model_forward_time": 0.11446189880371094,
      "step": 20866
    },
    {
      "epoch": 0.00012735595703125,
      "step": 20866,
      "training_step_time": 0.3814873695373535
    },
    {
      "epoch": 0.000127362060546875,
      "model_forward_time": 0.11511731147766113,
      "step": 20867
    },
    {
      "epoch": 0.000127362060546875,
      "step": 20867,
      "training_step_time": 0.39168381690979004
    },
    {
      "epoch": 0.0001273681640625,
      "model_forward_time": 0.11612486839294434,
      "step": 20868
    },
    {
      "epoch": 0.0001273681640625,
      "step": 20868,
      "training_step_time": 0.39153289794921875
    },
    {
      "epoch": 0.000127374267578125,
      "model_forward_time": 0.11575174331665039,
      "step": 20869
    },
    {
      "epoch": 0.000127374267578125,
      "step": 20869,
      "training_step_time": 0.5108957290649414
    },
    {
      "epoch": 0.00012738037109375,
      "grad_norm": 0.1682995706796646,
      "learning_rate": 7.76466197584855e-05,
      "loss": 0.0487,
      "step": 20870
    },
    {
      "epoch": 0.00012738037109375,
      "model_forward_time": 0.1150515079498291,
      "step": 20870
    },
    {
      "epoch": 0.00012738037109375,
      "step": 20870,
      "training_step_time": 0.4559516906738281
    },
    {
      "epoch": 0.000127386474609375,
      "model_forward_time": 0.1155087947845459,
      "step": 20871
    },
    {
      "epoch": 0.000127386474609375,
      "step": 20871,
      "training_step_time": 0.4642925262451172
    },
    {
      "epoch": 0.000127392578125,
      "model_forward_time": 0.11572480201721191,
      "step": 20872
    },
    {
      "epoch": 0.000127392578125,
      "step": 20872,
      "training_step_time": 0.3950493335723877
    },
    {
      "epoch": 0.000127398681640625,
      "model_forward_time": 0.11495661735534668,
      "step": 20873
    },
    {
      "epoch": 0.000127398681640625,
      "step": 20873,
      "training_step_time": 0.40814948081970215
    },
    {
      "epoch": 0.00012740478515625,
      "model_forward_time": 0.1154794692993164,
      "step": 20874
    },
    {
      "epoch": 0.00012740478515625,
      "step": 20874,
      "training_step_time": 0.4569358825683594
    },
    {
      "epoch": 0.000127410888671875,
      "model_forward_time": 0.11504626274108887,
      "step": 20875
    },
    {
      "epoch": 0.000127410888671875,
      "step": 20875,
      "training_step_time": 0.4972100257873535
    },
    {
      "epoch": 0.0001274169921875,
      "model_forward_time": 0.11492538452148438,
      "step": 20876
    },
    {
      "epoch": 0.0001274169921875,
      "step": 20876,
      "training_step_time": 0.4855802059173584
    },
    {
      "epoch": 0.000127423095703125,
      "model_forward_time": 0.1146402359008789,
      "step": 20877
    },
    {
      "epoch": 0.000127423095703125,
      "step": 20877,
      "training_step_time": 0.40296101570129395
    },
    {
      "epoch": 0.00012742919921875,
      "model_forward_time": 0.11512160301208496,
      "step": 20878
    },
    {
      "epoch": 0.00012742919921875,
      "step": 20878,
      "training_step_time": 0.38890790939331055
    },
    {
      "epoch": 0.000127435302734375,
      "model_forward_time": 0.1150062084197998,
      "step": 20879
    },
    {
      "epoch": 0.000127435302734375,
      "step": 20879,
      "training_step_time": 0.38772130012512207
    },
    {
      "epoch": 0.00012744140625,
      "grad_norm": 0.13967783749103546,
      "learning_rate": 7.762365365649067e-05,
      "loss": 0.0502,
      "step": 20880
    },
    {
      "epoch": 0.00012744140625,
      "model_forward_time": 0.11547255516052246,
      "step": 20880
    },
    {
      "epoch": 0.00012744140625,
      "step": 20880,
      "training_step_time": 0.3978557586669922
    },
    {
      "epoch": 0.000127447509765625,
      "model_forward_time": 0.11569905281066895,
      "step": 20881
    },
    {
      "epoch": 0.000127447509765625,
      "step": 20881,
      "training_step_time": 0.38799405097961426
    },
    {
      "epoch": 0.00012745361328125,
      "model_forward_time": 0.11492514610290527,
      "step": 20882
    },
    {
      "epoch": 0.00012745361328125,
      "step": 20882,
      "training_step_time": 0.36670374870300293
    },
    {
      "epoch": 0.000127459716796875,
      "model_forward_time": 0.11519312858581543,
      "step": 20883
    },
    {
      "epoch": 0.000127459716796875,
      "step": 20883,
      "training_step_time": 0.4699714183807373
    },
    {
      "epoch": 0.0001274658203125,
      "model_forward_time": 0.11677408218383789,
      "step": 20884
    },
    {
      "epoch": 0.0001274658203125,
      "step": 20884,
      "training_step_time": 0.4195733070373535
    },
    {
      "epoch": 0.000127471923828125,
      "model_forward_time": 0.11542701721191406,
      "step": 20885
    },
    {
      "epoch": 0.000127471923828125,
      "step": 20885,
      "training_step_time": 0.43842005729675293
    },
    {
      "epoch": 0.00012747802734375,
      "model_forward_time": 0.1147301197052002,
      "step": 20886
    },
    {
      "epoch": 0.00012747802734375,
      "step": 20886,
      "training_step_time": 0.4009087085723877
    },
    {
      "epoch": 0.000127484130859375,
      "model_forward_time": 0.11496877670288086,
      "step": 20887
    },
    {
      "epoch": 0.000127484130859375,
      "step": 20887,
      "training_step_time": 0.3995695114135742
    },
    {
      "epoch": 0.000127490234375,
      "model_forward_time": 0.11550259590148926,
      "step": 20888
    },
    {
      "epoch": 0.000127490234375,
      "step": 20888,
      "training_step_time": 0.38784098625183105
    },
    {
      "epoch": 0.000127496337890625,
      "model_forward_time": 0.11592936515808105,
      "step": 20889
    },
    {
      "epoch": 0.000127496337890625,
      "step": 20889,
      "training_step_time": 0.4382662773132324
    },
    {
      "epoch": 0.00012750244140625,
      "grad_norm": 0.23966139554977417,
      "learning_rate": 7.760067916315921e-05,
      "loss": 0.0469,
      "step": 20890
    },
    {
      "epoch": 0.00012750244140625,
      "model_forward_time": 0.11542797088623047,
      "step": 20890
    },
    {
      "epoch": 0.00012750244140625,
      "step": 20890,
      "training_step_time": 0.45221948623657227
    },
    {
      "epoch": 0.000127508544921875,
      "model_forward_time": 0.11561179161071777,
      "step": 20891
    },
    {
      "epoch": 0.000127508544921875,
      "step": 20891,
      "training_step_time": 0.41184115409851074
    },
    {
      "epoch": 0.0001275146484375,
      "model_forward_time": 0.11457562446594238,
      "step": 20892
    },
    {
      "epoch": 0.0001275146484375,
      "step": 20892,
      "training_step_time": 0.4771444797515869
    },
    {
      "epoch": 0.000127520751953125,
      "model_forward_time": 0.11559391021728516,
      "step": 20893
    },
    {
      "epoch": 0.000127520751953125,
      "step": 20893,
      "training_step_time": 0.3973119258880615
    },
    {
      "epoch": 0.00012752685546875,
      "model_forward_time": 0.11697554588317871,
      "step": 20894
    },
    {
      "epoch": 0.00012752685546875,
      "step": 20894,
      "training_step_time": 0.3878631591796875
    },
    {
      "epoch": 0.000127532958984375,
      "model_forward_time": 0.11576557159423828,
      "step": 20895
    },
    {
      "epoch": 0.000127532958984375,
      "step": 20895,
      "training_step_time": 0.3863387107849121
    },
    {
      "epoch": 0.0001275390625,
      "model_forward_time": 0.11544275283813477,
      "step": 20896
    },
    {
      "epoch": 0.0001275390625,
      "step": 20896,
      "training_step_time": 0.39665937423706055
    },
    {
      "epoch": 0.000127545166015625,
      "model_forward_time": 0.11544466018676758,
      "step": 20897
    },
    {
      "epoch": 0.000127545166015625,
      "step": 20897,
      "training_step_time": 0.40662384033203125
    },
    {
      "epoch": 0.00012755126953125,
      "model_forward_time": 0.11501932144165039,
      "step": 20898
    },
    {
      "epoch": 0.00012755126953125,
      "step": 20898,
      "training_step_time": 0.41129469871520996
    },
    {
      "epoch": 0.000127557373046875,
      "model_forward_time": 0.11699891090393066,
      "step": 20899
    },
    {
      "epoch": 0.000127557373046875,
      "step": 20899,
      "training_step_time": 0.4995689392089844
    },
    {
      "epoch": 0.0001275634765625,
      "grad_norm": 0.13477212190628052,
      "learning_rate": 7.757769628547018e-05,
      "loss": 0.0523,
      "step": 20900
    },
    {
      "epoch": 0.0001275634765625,
      "model_forward_time": 0.11524248123168945,
      "step": 20900
    },
    {
      "epoch": 0.0001275634765625,
      "step": 20900,
      "training_step_time": 0.42848896980285645
    },
    {
      "epoch": 0.000127569580078125,
      "model_forward_time": 0.11581826210021973,
      "step": 20901
    },
    {
      "epoch": 0.000127569580078125,
      "step": 20901,
      "training_step_time": 0.385540246963501
    },
    {
      "epoch": 0.00012757568359375,
      "model_forward_time": 0.11508774757385254,
      "step": 20902
    },
    {
      "epoch": 0.00012757568359375,
      "step": 20902,
      "training_step_time": 0.391920804977417
    },
    {
      "epoch": 0.000127581787109375,
      "model_forward_time": 0.11539268493652344,
      "step": 20903
    },
    {
      "epoch": 0.000127581787109375,
      "step": 20903,
      "training_step_time": 0.475816011428833
    },
    {
      "epoch": 0.000127587890625,
      "model_forward_time": 0.1149294376373291,
      "step": 20904
    },
    {
      "epoch": 0.000127587890625,
      "step": 20904,
      "training_step_time": 0.45792531967163086
    },
    {
      "epoch": 0.000127593994140625,
      "model_forward_time": 0.11516547203063965,
      "step": 20905
    },
    {
      "epoch": 0.000127593994140625,
      "step": 20905,
      "training_step_time": 0.40532398223876953
    },
    {
      "epoch": 0.00012760009765625,
      "model_forward_time": 0.1157686710357666,
      "step": 20906
    },
    {
      "epoch": 0.00012760009765625,
      "step": 20906,
      "training_step_time": 0.3824348449707031
    },
    {
      "epoch": 0.000127606201171875,
      "model_forward_time": 0.11538338661193848,
      "step": 20907
    },
    {
      "epoch": 0.000127606201171875,
      "step": 20907,
      "training_step_time": 0.3798863887786865
    },
    {
      "epoch": 0.0001276123046875,
      "model_forward_time": 0.11544609069824219,
      "step": 20908
    },
    {
      "epoch": 0.0001276123046875,
      "step": 20908,
      "training_step_time": 0.38593006134033203
    },
    {
      "epoch": 0.000127618408203125,
      "model_forward_time": 0.11637735366821289,
      "step": 20909
    },
    {
      "epoch": 0.000127618408203125,
      "step": 20909,
      "training_step_time": 0.6831910610198975
    },
    {
      "epoch": 0.00012762451171875,
      "grad_norm": 0.21737748384475708,
      "learning_rate": 7.755470503040516e-05,
      "loss": 0.0502,
      "step": 20910
    },
    {
      "epoch": 0.00012762451171875,
      "model_forward_time": 0.11551237106323242,
      "step": 20910
    },
    {
      "epoch": 0.00012762451171875,
      "step": 20910,
      "training_step_time": 0.40054869651794434
    },
    {
      "epoch": 0.000127630615234375,
      "model_forward_time": 0.11492228507995605,
      "step": 20911
    },
    {
      "epoch": 0.000127630615234375,
      "step": 20911,
      "training_step_time": 0.3652794361114502
    },
    {
      "epoch": 0.00012763671875,
      "model_forward_time": 0.1151132583618164,
      "step": 20912
    },
    {
      "epoch": 0.00012763671875,
      "step": 20912,
      "training_step_time": 0.44144392013549805
    },
    {
      "epoch": 0.000127642822265625,
      "model_forward_time": 0.11474847793579102,
      "step": 20913
    },
    {
      "epoch": 0.000127642822265625,
      "step": 20913,
      "training_step_time": 0.4442415237426758
    },
    {
      "epoch": 0.00012764892578125,
      "model_forward_time": 0.11471176147460938,
      "step": 20914
    },
    {
      "epoch": 0.00012764892578125,
      "step": 20914,
      "training_step_time": 0.40224575996398926
    },
    {
      "epoch": 0.000127655029296875,
      "model_forward_time": 0.11546516418457031,
      "step": 20915
    },
    {
      "epoch": 0.000127655029296875,
      "step": 20915,
      "training_step_time": 0.3996009826660156
    },
    {
      "epoch": 0.0001276611328125,
      "model_forward_time": 0.11507916450500488,
      "step": 20916
    },
    {
      "epoch": 0.0001276611328125,
      "step": 20916,
      "training_step_time": 0.3955802917480469
    },
    {
      "epoch": 0.000127667236328125,
      "model_forward_time": 0.11582493782043457,
      "step": 20917
    },
    {
      "epoch": 0.000127667236328125,
      "step": 20917,
      "training_step_time": 0.489682674407959
    },
    {
      "epoch": 0.00012767333984375,
      "model_forward_time": 0.11476397514343262,
      "step": 20918
    },
    {
      "epoch": 0.00012767333984375,
      "step": 20918,
      "training_step_time": 0.38608860969543457
    },
    {
      "epoch": 0.000127679443359375,
      "model_forward_time": 0.11523938179016113,
      "step": 20919
    },
    {
      "epoch": 0.000127679443359375,
      "step": 20919,
      "training_step_time": 0.464078426361084
    },
    {
      "epoch": 0.000127685546875,
      "grad_norm": 0.12154275923967361,
      "learning_rate": 7.753170540494832e-05,
      "loss": 0.0533,
      "step": 20920
    },
    {
      "epoch": 0.000127685546875,
      "model_forward_time": 0.11557817459106445,
      "step": 20920
    },
    {
      "epoch": 0.000127685546875,
      "step": 20920,
      "training_step_time": 0.39962244033813477
    },
    {
      "epoch": 0.000127691650390625,
      "model_forward_time": 0.11534738540649414,
      "step": 20921
    },
    {
      "epoch": 0.000127691650390625,
      "step": 20921,
      "training_step_time": 0.39212846755981445
    },
    {
      "epoch": 0.00012769775390625,
      "model_forward_time": 0.1148982048034668,
      "step": 20922
    },
    {
      "epoch": 0.00012769775390625,
      "step": 20922,
      "training_step_time": 0.3924558162689209
    },
    {
      "epoch": 0.000127703857421875,
      "model_forward_time": 0.11556601524353027,
      "step": 20923
    },
    {
      "epoch": 0.000127703857421875,
      "step": 20923,
      "training_step_time": 0.38883399963378906
    },
    {
      "epoch": 0.0001277099609375,
      "model_forward_time": 0.11543679237365723,
      "step": 20924
    },
    {
      "epoch": 0.0001277099609375,
      "step": 20924,
      "training_step_time": 0.3912932872772217
    },
    {
      "epoch": 0.000127716064453125,
      "model_forward_time": 0.11592817306518555,
      "step": 20925
    },
    {
      "epoch": 0.000127716064453125,
      "step": 20925,
      "training_step_time": 0.38721537590026855
    },
    {
      "epoch": 0.00012772216796875,
      "model_forward_time": 0.11520695686340332,
      "step": 20926
    },
    {
      "epoch": 0.00012772216796875,
      "step": 20926,
      "training_step_time": 0.389324426651001
    },
    {
      "epoch": 0.000127728271484375,
      "model_forward_time": 0.11538410186767578,
      "step": 20927
    },
    {
      "epoch": 0.000127728271484375,
      "step": 20927,
      "training_step_time": 0.6926765441894531
    },
    {
      "epoch": 0.000127734375,
      "model_forward_time": 0.11930346488952637,
      "step": 20928
    },
    {
      "epoch": 0.000127734375,
      "step": 20928,
      "training_step_time": 0.38463902473449707
    },
    {
      "epoch": 0.000127740478515625,
      "model_forward_time": 0.1174471378326416,
      "step": 20929
    },
    {
      "epoch": 0.000127740478515625,
      "step": 20929,
      "training_step_time": 0.3833925724029541
    },
    {
      "epoch": 0.00012774658203125,
      "grad_norm": 0.24098235368728638,
      "learning_rate": 7.750869741608628e-05,
      "loss": 0.0481,
      "step": 20930
    },
    {
      "epoch": 0.00012774658203125,
      "model_forward_time": 0.115325927734375,
      "step": 20930
    },
    {
      "epoch": 0.00012774658203125,
      "step": 20930,
      "training_step_time": 0.3935532569885254
    },
    {
      "epoch": 0.000127752685546875,
      "model_forward_time": 0.11806941032409668,
      "step": 20931
    },
    {
      "epoch": 0.000127752685546875,
      "step": 20931,
      "training_step_time": 0.4120495319366455
    },
    {
      "epoch": 0.0001277587890625,
      "model_forward_time": 0.11765313148498535,
      "step": 20932
    },
    {
      "epoch": 0.0001277587890625,
      "step": 20932,
      "training_step_time": 0.4077134132385254
    },
    {
      "epoch": 0.000127764892578125,
      "model_forward_time": 0.11896419525146484,
      "step": 20933
    },
    {
      "epoch": 0.000127764892578125,
      "step": 20933,
      "training_step_time": 0.5327954292297363
    },
    {
      "epoch": 0.00012777099609375,
      "model_forward_time": 0.11797213554382324,
      "step": 20934
    },
    {
      "epoch": 0.00012777099609375,
      "step": 20934,
      "training_step_time": 0.38510966300964355
    },
    {
      "epoch": 0.000127777099609375,
      "model_forward_time": 0.11579275131225586,
      "step": 20935
    },
    {
      "epoch": 0.000127777099609375,
      "step": 20935,
      "training_step_time": 0.3908956050872803
    },
    {
      "epoch": 0.000127783203125,
      "model_forward_time": 0.1153862476348877,
      "step": 20936
    },
    {
      "epoch": 0.000127783203125,
      "step": 20936,
      "training_step_time": 0.41127634048461914
    },
    {
      "epoch": 0.000127789306640625,
      "model_forward_time": 0.11537408828735352,
      "step": 20937
    },
    {
      "epoch": 0.000127789306640625,
      "step": 20937,
      "training_step_time": 0.39812612533569336
    },
    {
      "epoch": 0.00012779541015625,
      "model_forward_time": 0.11534333229064941,
      "step": 20938
    },
    {
      "epoch": 0.00012779541015625,
      "step": 20938,
      "training_step_time": 0.38217949867248535
    },
    {
      "epoch": 0.000127801513671875,
      "model_forward_time": 0.11480140686035156,
      "step": 20939
    },
    {
      "epoch": 0.000127801513671875,
      "step": 20939,
      "training_step_time": 0.7185719013214111
    },
    {
      "epoch": 0.0001278076171875,
      "grad_norm": 0.16372790932655334,
      "learning_rate": 7.748568107080832e-05,
      "loss": 0.0519,
      "step": 20940
    },
    {
      "epoch": 0.0001278076171875,
      "model_forward_time": 0.1150057315826416,
      "step": 20940
    },
    {
      "epoch": 0.0001278076171875,
      "step": 20940,
      "training_step_time": 0.3620436191558838
    },
    {
      "epoch": 0.000127813720703125,
      "model_forward_time": 0.11530399322509766,
      "step": 20941
    },
    {
      "epoch": 0.000127813720703125,
      "step": 20941,
      "training_step_time": 0.45908522605895996
    },
    {
      "epoch": 0.00012781982421875,
      "model_forward_time": 0.11582040786743164,
      "step": 20942
    },
    {
      "epoch": 0.00012781982421875,
      "step": 20942,
      "training_step_time": 0.4630272388458252
    },
    {
      "epoch": 0.000127825927734375,
      "model_forward_time": 0.1144719123840332,
      "step": 20943
    },
    {
      "epoch": 0.000127825927734375,
      "step": 20943,
      "training_step_time": 0.43979907035827637
    },
    {
      "epoch": 0.00012783203125,
      "model_forward_time": 0.11452412605285645,
      "step": 20944
    },
    {
      "epoch": 0.00012783203125,
      "step": 20944,
      "training_step_time": 0.41675591468811035
    },
    {
      "epoch": 0.000127838134765625,
      "model_forward_time": 0.1149592399597168,
      "step": 20945
    },
    {
      "epoch": 0.000127838134765625,
      "step": 20945,
      "training_step_time": 0.4109036922454834
    },
    {
      "epoch": 0.00012784423828125,
      "model_forward_time": 0.11539936065673828,
      "step": 20946
    },
    {
      "epoch": 0.00012784423828125,
      "step": 20946,
      "training_step_time": 0.41248464584350586
    },
    {
      "epoch": 0.000127850341796875,
      "model_forward_time": 0.11502766609191895,
      "step": 20947
    },
    {
      "epoch": 0.000127850341796875,
      "step": 20947,
      "training_step_time": 0.400179386138916
    },
    {
      "epoch": 0.0001278564453125,
      "model_forward_time": 0.11504650115966797,
      "step": 20948
    },
    {
      "epoch": 0.0001278564453125,
      "step": 20948,
      "training_step_time": 0.4066143035888672
    },
    {
      "epoch": 0.000127862548828125,
      "model_forward_time": 0.1150515079498291,
      "step": 20949
    },
    {
      "epoch": 0.000127862548828125,
      "step": 20949,
      "training_step_time": 0.39722156524658203
    },
    {
      "epoch": 0.00012786865234375,
      "grad_norm": 0.11937615275382996,
      "learning_rate": 7.746265637610613e-05,
      "loss": 0.0476,
      "step": 20950
    },
    {
      "epoch": 0.00012786865234375,
      "model_forward_time": 0.11496305465698242,
      "step": 20950
    },
    {
      "epoch": 0.00012786865234375,
      "step": 20950,
      "training_step_time": 0.3948230743408203
    },
    {
      "epoch": 0.000127874755859375,
      "model_forward_time": 0.11545276641845703,
      "step": 20951
    },
    {
      "epoch": 0.000127874755859375,
      "step": 20951,
      "training_step_time": 0.5134594440460205
    },
    {
      "epoch": 0.000127880859375,
      "model_forward_time": 0.11585164070129395,
      "step": 20952
    },
    {
      "epoch": 0.000127880859375,
      "step": 20952,
      "training_step_time": 0.3908274173736572
    },
    {
      "epoch": 0.000127886962890625,
      "model_forward_time": 0.11515927314758301,
      "step": 20953
    },
    {
      "epoch": 0.000127886962890625,
      "step": 20953,
      "training_step_time": 0.3997647762298584
    },
    {
      "epoch": 0.00012789306640625,
      "model_forward_time": 0.11547279357910156,
      "step": 20954
    },
    {
      "epoch": 0.00012789306640625,
      "step": 20954,
      "training_step_time": 0.38393092155456543
    },
    {
      "epoch": 0.000127899169921875,
      "model_forward_time": 0.11616683006286621,
      "step": 20955
    },
    {
      "epoch": 0.000127899169921875,
      "step": 20955,
      "training_step_time": 0.42618322372436523
    },
    {
      "epoch": 0.0001279052734375,
      "model_forward_time": 0.11572265625,
      "step": 20956
    },
    {
      "epoch": 0.0001279052734375,
      "step": 20956,
      "training_step_time": 0.4851233959197998
    },
    {
      "epoch": 0.000127911376953125,
      "model_forward_time": 0.11569857597351074,
      "step": 20957
    },
    {
      "epoch": 0.000127911376953125,
      "step": 20957,
      "training_step_time": 0.5452289581298828
    },
    {
      "epoch": 0.00012791748046875,
      "model_forward_time": 0.11562705039978027,
      "step": 20958
    },
    {
      "epoch": 0.00012791748046875,
      "step": 20958,
      "training_step_time": 0.43419599533081055
    },
    {
      "epoch": 0.000127923583984375,
      "model_forward_time": 0.1156301498413086,
      "step": 20959
    },
    {
      "epoch": 0.000127923583984375,
      "step": 20959,
      "training_step_time": 0.40262365341186523
    },
    {
      "epoch": 0.0001279296875,
      "grad_norm": 0.1340685337781906,
      "learning_rate": 7.743962333897405e-05,
      "loss": 0.0513,
      "step": 20960
    },
    {
      "epoch": 0.0001279296875,
      "model_forward_time": 0.11499142646789551,
      "step": 20960
    },
    {
      "epoch": 0.0001279296875,
      "step": 20960,
      "training_step_time": 0.41817569732666016
    },
    {
      "epoch": 0.000127935791015625,
      "model_forward_time": 0.11677908897399902,
      "step": 20961
    },
    {
      "epoch": 0.000127935791015625,
      "step": 20961,
      "training_step_time": 0.3952207565307617
    },
    {
      "epoch": 0.00012794189453125,
      "model_forward_time": 0.11512374877929688,
      "step": 20962
    },
    {
      "epoch": 0.00012794189453125,
      "step": 20962,
      "training_step_time": 0.3928701877593994
    },
    {
      "epoch": 0.000127947998046875,
      "model_forward_time": 0.11577939987182617,
      "step": 20963
    },
    {
      "epoch": 0.000127947998046875,
      "step": 20963,
      "training_step_time": 0.5679898262023926
    },
    {
      "epoch": 0.0001279541015625,
      "model_forward_time": 0.1154325008392334,
      "step": 20964
    },
    {
      "epoch": 0.0001279541015625,
      "step": 20964,
      "training_step_time": 0.3876335620880127
    },
    {
      "epoch": 0.000127960205078125,
      "model_forward_time": 0.11569857597351074,
      "step": 20965
    },
    {
      "epoch": 0.000127960205078125,
      "step": 20965,
      "training_step_time": 0.3921635150909424
    },
    {
      "epoch": 0.00012796630859375,
      "model_forward_time": 0.11494255065917969,
      "step": 20966
    },
    {
      "epoch": 0.00012796630859375,
      "step": 20966,
      "training_step_time": 0.38640522956848145
    },
    {
      "epoch": 0.000127972412109375,
      "model_forward_time": 0.11597466468811035,
      "step": 20967
    },
    {
      "epoch": 0.000127972412109375,
      "step": 20967,
      "training_step_time": 0.3810896873474121
    },
    {
      "epoch": 0.000127978515625,
      "model_forward_time": 0.11553525924682617,
      "step": 20968
    },
    {
      "epoch": 0.000127978515625,
      "step": 20968,
      "training_step_time": 0.3989527225494385
    },
    {
      "epoch": 0.000127984619140625,
      "model_forward_time": 0.11641979217529297,
      "step": 20969
    },
    {
      "epoch": 0.000127984619140625,
      "step": 20969,
      "training_step_time": 0.5734357833862305
    },
    {
      "epoch": 0.00012799072265625,
      "grad_norm": 0.1159597784280777,
      "learning_rate": 7.741658196640892e-05,
      "loss": 0.0545,
      "step": 20970
    },
    {
      "epoch": 0.00012799072265625,
      "model_forward_time": 0.11494946479797363,
      "step": 20970
    },
    {
      "epoch": 0.00012799072265625,
      "step": 20970,
      "training_step_time": 0.44153857231140137
    },
    {
      "epoch": 0.000127996826171875,
      "model_forward_time": 0.1152353286743164,
      "step": 20971
    },
    {
      "epoch": 0.000127996826171875,
      "step": 20971,
      "training_step_time": 0.4939846992492676
    },
    {
      "epoch": 0.0001280029296875,
      "model_forward_time": 0.11466026306152344,
      "step": 20972
    },
    {
      "epoch": 0.0001280029296875,
      "step": 20972,
      "training_step_time": 0.47536683082580566
    },
    {
      "epoch": 0.000128009033203125,
      "model_forward_time": 0.1143951416015625,
      "step": 20973
    },
    {
      "epoch": 0.000128009033203125,
      "step": 20973,
      "training_step_time": 0.40976786613464355
    },
    {
      "epoch": 0.00012801513671875,
      "model_forward_time": 0.11477327346801758,
      "step": 20974
    },
    {
      "epoch": 0.00012801513671875,
      "step": 20974,
      "training_step_time": 0.4498908519744873
    },
    {
      "epoch": 0.000128021240234375,
      "model_forward_time": 0.11503982543945312,
      "step": 20975
    },
    {
      "epoch": 0.000128021240234375,
      "step": 20975,
      "training_step_time": 0.37863826751708984
    },
    {
      "epoch": 0.00012802734375,
      "model_forward_time": 0.11502504348754883,
      "step": 20976
    },
    {
      "epoch": 0.00012802734375,
      "step": 20976,
      "training_step_time": 0.39719343185424805
    },
    {
      "epoch": 0.000128033447265625,
      "model_forward_time": 0.11530756950378418,
      "step": 20977
    },
    {
      "epoch": 0.000128033447265625,
      "step": 20977,
      "training_step_time": 0.3873262405395508
    },
    {
      "epoch": 0.00012803955078125,
      "model_forward_time": 0.11493968963623047,
      "step": 20978
    },
    {
      "epoch": 0.00012803955078125,
      "step": 20978,
      "training_step_time": 0.3907287120819092
    },
    {
      "epoch": 0.000128045654296875,
      "model_forward_time": 0.11590170860290527,
      "step": 20979
    },
    {
      "epoch": 0.000128045654296875,
      "step": 20979,
      "training_step_time": 0.3900337219238281
    },
    {
      "epoch": 0.0001280517578125,
      "grad_norm": 0.15766023099422455,
      "learning_rate": 7.739353226541009e-05,
      "loss": 0.0537,
      "step": 20980
    },
    {
      "epoch": 0.0001280517578125,
      "model_forward_time": 0.11695146560668945,
      "step": 20980
    },
    {
      "epoch": 0.0001280517578125,
      "step": 20980,
      "training_step_time": 0.3886227607727051
    },
    {
      "epoch": 0.000128057861328125,
      "model_forward_time": 0.11576056480407715,
      "step": 20981
    },
    {
      "epoch": 0.000128057861328125,
      "step": 20981,
      "training_step_time": 0.3998904228210449
    },
    {
      "epoch": 0.00012806396484375,
      "model_forward_time": 0.11536049842834473,
      "step": 20982
    },
    {
      "epoch": 0.00012806396484375,
      "step": 20982,
      "training_step_time": 0.396930456161499
    },
    {
      "epoch": 0.000128070068359375,
      "model_forward_time": 0.11522483825683594,
      "step": 20983
    },
    {
      "epoch": 0.000128070068359375,
      "step": 20983,
      "training_step_time": 0.4601478576660156
    },
    {
      "epoch": 0.000128076171875,
      "model_forward_time": 0.11707711219787598,
      "step": 20984
    },
    {
      "epoch": 0.000128076171875,
      "step": 20984,
      "training_step_time": 0.5059731006622314
    },
    {
      "epoch": 0.000128082275390625,
      "model_forward_time": 0.11551904678344727,
      "step": 20985
    },
    {
      "epoch": 0.000128082275390625,
      "step": 20985,
      "training_step_time": 0.4333822727203369
    },
    {
      "epoch": 0.00012808837890625,
      "model_forward_time": 0.11556434631347656,
      "step": 20986
    },
    {
      "epoch": 0.00012808837890625,
      "step": 20986,
      "training_step_time": 0.4182765483856201
    },
    {
      "epoch": 0.000128094482421875,
      "model_forward_time": 0.1159062385559082,
      "step": 20987
    },
    {
      "epoch": 0.000128094482421875,
      "step": 20987,
      "training_step_time": 0.5946509838104248
    },
    {
      "epoch": 0.0001281005859375,
      "model_forward_time": 0.11509227752685547,
      "step": 20988
    },
    {
      "epoch": 0.0001281005859375,
      "step": 20988,
      "training_step_time": 0.48090028762817383
    },
    {
      "epoch": 0.000128106689453125,
      "model_forward_time": 0.11490011215209961,
      "step": 20989
    },
    {
      "epoch": 0.000128106689453125,
      "step": 20989,
      "training_step_time": 0.4098234176635742
    },
    {
      "epoch": 0.00012811279296875,
      "grad_norm": 0.1577233523130417,
      "learning_rate": 7.737047424297941e-05,
      "loss": 0.0546,
      "step": 20990
    },
    {
      "epoch": 0.00012811279296875,
      "model_forward_time": 0.11478352546691895,
      "step": 20990
    },
    {
      "epoch": 0.00012811279296875,
      "step": 20990,
      "training_step_time": 0.38983941078186035
    },
    {
      "epoch": 0.000128118896484375,
      "model_forward_time": 0.11476922035217285,
      "step": 20991
    },
    {
      "epoch": 0.000128118896484375,
      "step": 20991,
      "training_step_time": 0.3836548328399658
    },
    {
      "epoch": 0.000128125,
      "model_forward_time": 0.1143946647644043,
      "step": 20992
    },
    {
      "epoch": 0.000128125,
      "step": 20992,
      "training_step_time": 0.39605045318603516
    },
    {
      "epoch": 0.000128131103515625,
      "model_forward_time": 0.11543011665344238,
      "step": 20993
    },
    {
      "epoch": 0.000128131103515625,
      "step": 20993,
      "training_step_time": 0.4826960563659668
    },
    {
      "epoch": 0.00012813720703125,
      "model_forward_time": 0.11531424522399902,
      "step": 20994
    },
    {
      "epoch": 0.00012813720703125,
      "step": 20994,
      "training_step_time": 0.3873329162597656
    },
    {
      "epoch": 0.000128143310546875,
      "model_forward_time": 0.11525774002075195,
      "step": 20995
    },
    {
      "epoch": 0.000128143310546875,
      "step": 20995,
      "training_step_time": 0.39697742462158203
    },
    {
      "epoch": 0.0001281494140625,
      "model_forward_time": 0.11509895324707031,
      "step": 20996
    },
    {
      "epoch": 0.0001281494140625,
      "step": 20996,
      "training_step_time": 0.3841431140899658
    },
    {
      "epoch": 0.000128155517578125,
      "model_forward_time": 0.11832141876220703,
      "step": 20997
    },
    {
      "epoch": 0.000128155517578125,
      "step": 20997,
      "training_step_time": 0.42868900299072266
    },
    {
      "epoch": 0.00012816162109375,
      "model_forward_time": 0.11712861061096191,
      "step": 20998
    },
    {
      "epoch": 0.00012816162109375,
      "step": 20998,
      "training_step_time": 0.40273427963256836
    },
    {
      "epoch": 0.000128167724609375,
      "model_forward_time": 0.11779332160949707,
      "step": 20999
    },
    {
      "epoch": 0.000128167724609375,
      "step": 20999,
      "training_step_time": 0.6978416442871094
    },
    {
      "epoch": 0.000128173828125,
      "grad_norm": 0.11594480276107788,
      "learning_rate": 7.734740790612136e-05,
      "loss": 0.0568,
      "step": 21000
    },
    {
      "epoch": 0.000128173828125,
      "model_forward_time": 0.11633729934692383,
      "step": 21000
    },
    {
      "epoch": 0.000128173828125,
      "step": 21000,
      "training_step_time": 0.3629417419433594
    },
    {
      "epoch": 0.000128179931640625,
      "model_forward_time": 0.11378717422485352,
      "step": 21001
    },
    {
      "epoch": 0.000128179931640625,
      "step": 21001,
      "training_step_time": 0.38505983352661133
    },
    {
      "epoch": 0.00012818603515625,
      "model_forward_time": 0.11362910270690918,
      "step": 21002
    },
    {
      "epoch": 0.00012818603515625,
      "step": 21002,
      "training_step_time": 0.3704416751861572
    },
    {
      "epoch": 0.000128192138671875,
      "model_forward_time": 0.11407947540283203,
      "step": 21003
    },
    {
      "epoch": 0.000128192138671875,
      "step": 21003,
      "training_step_time": 0.40496373176574707
    },
    {
      "epoch": 0.0001281982421875,
      "model_forward_time": 0.11451125144958496,
      "step": 21004
    },
    {
      "epoch": 0.0001281982421875,
      "step": 21004,
      "training_step_time": 0.45729804039001465
    },
    {
      "epoch": 0.000128204345703125,
      "model_forward_time": 0.11442708969116211,
      "step": 21005
    },
    {
      "epoch": 0.000128204345703125,
      "step": 21005,
      "training_step_time": 0.38700389862060547
    },
    {
      "epoch": 0.00012821044921875,
      "model_forward_time": 0.11504459381103516,
      "step": 21006
    },
    {
      "epoch": 0.00012821044921875,
      "step": 21006,
      "training_step_time": 0.38741278648376465
    },
    {
      "epoch": 0.000128216552734375,
      "model_forward_time": 0.11504244804382324,
      "step": 21007
    },
    {
      "epoch": 0.000128216552734375,
      "step": 21007,
      "training_step_time": 0.39418983459472656
    },
    {
      "epoch": 0.00012822265625,
      "model_forward_time": 0.11538100242614746,
      "step": 21008
    },
    {
      "epoch": 0.00012822265625,
      "step": 21008,
      "training_step_time": 0.3886263370513916
    },
    {
      "epoch": 0.000128228759765625,
      "model_forward_time": 0.11558103561401367,
      "step": 21009
    },
    {
      "epoch": 0.000128228759765625,
      "step": 21009,
      "training_step_time": 0.3980128765106201
    },
    {
      "epoch": 0.00012823486328125,
      "grad_norm": 0.15138742327690125,
      "learning_rate": 7.732433326184283e-05,
      "loss": 0.0469,
      "step": 21010
    },
    {
      "epoch": 0.00012823486328125,
      "model_forward_time": 0.11515688896179199,
      "step": 21010
    },
    {
      "epoch": 0.00012823486328125,
      "step": 21010,
      "training_step_time": 0.39824414253234863
    },
    {
      "epoch": 0.000128240966796875,
      "model_forward_time": 0.11532735824584961,
      "step": 21011
    },
    {
      "epoch": 0.000128240966796875,
      "step": 21011,
      "training_step_time": 0.40878748893737793
    },
    {
      "epoch": 0.0001282470703125,
      "model_forward_time": 0.11613678932189941,
      "step": 21012
    },
    {
      "epoch": 0.0001282470703125,
      "step": 21012,
      "training_step_time": 0.3949275016784668
    },
    {
      "epoch": 0.000128253173828125,
      "model_forward_time": 0.11505794525146484,
      "step": 21013
    },
    {
      "epoch": 0.000128253173828125,
      "step": 21013,
      "training_step_time": 0.3683788776397705
    },
    {
      "epoch": 0.00012825927734375,
      "model_forward_time": 0.11543536186218262,
      "step": 21014
    },
    {
      "epoch": 0.00012825927734375,
      "step": 21014,
      "training_step_time": 0.4635176658630371
    },
    {
      "epoch": 0.000128265380859375,
      "model_forward_time": 0.11548399925231934,
      "step": 21015
    },
    {
      "epoch": 0.000128265380859375,
      "step": 21015,
      "training_step_time": 0.4062347412109375
    },
    {
      "epoch": 0.000128271484375,
      "model_forward_time": 0.11580634117126465,
      "step": 21016
    },
    {
      "epoch": 0.000128271484375,
      "step": 21016,
      "training_step_time": 0.4553089141845703
    },
    {
      "epoch": 0.000128277587890625,
      "model_forward_time": 0.11537551879882812,
      "step": 21017
    },
    {
      "epoch": 0.000128277587890625,
      "step": 21017,
      "training_step_time": 0.4871065616607666
    },
    {
      "epoch": 0.00012828369140625,
      "model_forward_time": 0.11522936820983887,
      "step": 21018
    },
    {
      "epoch": 0.00012828369140625,
      "step": 21018,
      "training_step_time": 0.4784965515136719
    },
    {
      "epoch": 0.000128289794921875,
      "model_forward_time": 0.1155855655670166,
      "step": 21019
    },
    {
      "epoch": 0.000128289794921875,
      "step": 21019,
      "training_step_time": 0.3910229206085205
    },
    {
      "epoch": 0.0001282958984375,
      "grad_norm": 0.1909695565700531,
      "learning_rate": 7.730125031715331e-05,
      "loss": 0.0481,
      "step": 21020
    },
    {
      "epoch": 0.0001282958984375,
      "model_forward_time": 0.11552929878234863,
      "step": 21020
    },
    {
      "epoch": 0.0001282958984375,
      "step": 21020,
      "training_step_time": 0.3899838924407959
    },
    {
      "epoch": 0.000128302001953125,
      "model_forward_time": 0.11535930633544922,
      "step": 21021
    },
    {
      "epoch": 0.000128302001953125,
      "step": 21021,
      "training_step_time": 0.39025139808654785
    },
    {
      "epoch": 0.00012830810546875,
      "model_forward_time": 0.11546778678894043,
      "step": 21022
    },
    {
      "epoch": 0.00012830810546875,
      "step": 21022,
      "training_step_time": 0.3899402618408203
    },
    {
      "epoch": 0.000128314208984375,
      "model_forward_time": 0.1150062084197998,
      "step": 21023
    },
    {
      "epoch": 0.000128314208984375,
      "step": 21023,
      "training_step_time": 0.3832664489746094
    },
    {
      "epoch": 0.0001283203125,
      "model_forward_time": 0.11554718017578125,
      "step": 21024
    },
    {
      "epoch": 0.0001283203125,
      "step": 21024,
      "training_step_time": 0.3975837230682373
    },
    {
      "epoch": 0.000128326416015625,
      "model_forward_time": 0.11567068099975586,
      "step": 21025
    },
    {
      "epoch": 0.000128326416015625,
      "step": 21025,
      "training_step_time": 0.4078176021575928
    },
    {
      "epoch": 0.00012833251953125,
      "model_forward_time": 0.11639952659606934,
      "step": 21026
    },
    {
      "epoch": 0.00012833251953125,
      "step": 21026,
      "training_step_time": 0.38162970542907715
    },
    {
      "epoch": 0.000128338623046875,
      "model_forward_time": 0.11602401733398438,
      "step": 21027
    },
    {
      "epoch": 0.000128338623046875,
      "step": 21027,
      "training_step_time": 0.3871030807495117
    },
    {
      "epoch": 0.0001283447265625,
      "model_forward_time": 0.11694025993347168,
      "step": 21028
    },
    {
      "epoch": 0.0001283447265625,
      "step": 21028,
      "training_step_time": 0.39936089515686035
    },
    {
      "epoch": 0.000128350830078125,
      "model_forward_time": 0.11616206169128418,
      "step": 21029
    },
    {
      "epoch": 0.000128350830078125,
      "step": 21029,
      "training_step_time": 0.5026538372039795
    },
    {
      "epoch": 0.00012835693359375,
      "grad_norm": 0.1292628049850464,
      "learning_rate": 7.727815907906481e-05,
      "loss": 0.0493,
      "step": 21030
    },
    {
      "epoch": 0.00012835693359375,
      "model_forward_time": 0.11753630638122559,
      "step": 21030
    },
    {
      "epoch": 0.00012835693359375,
      "step": 21030,
      "training_step_time": 0.4317433834075928
    },
    {
      "epoch": 0.000128363037109375,
      "model_forward_time": 0.11552214622497559,
      "step": 21031
    },
    {
      "epoch": 0.000128363037109375,
      "step": 21031,
      "training_step_time": 0.4393651485443115
    },
    {
      "epoch": 0.000128369140625,
      "model_forward_time": 0.11627340316772461,
      "step": 21032
    },
    {
      "epoch": 0.000128369140625,
      "step": 21032,
      "training_step_time": 0.40024328231811523
    },
    {
      "epoch": 0.000128375244140625,
      "model_forward_time": 0.1158301830291748,
      "step": 21033
    },
    {
      "epoch": 0.000128375244140625,
      "step": 21033,
      "training_step_time": 0.4579305648803711
    },
    {
      "epoch": 0.00012838134765625,
      "model_forward_time": 0.11539077758789062,
      "step": 21034
    },
    {
      "epoch": 0.00012838134765625,
      "step": 21034,
      "training_step_time": 0.40210890769958496
    },
    {
      "epoch": 0.000128387451171875,
      "model_forward_time": 0.11576724052429199,
      "step": 21035
    },
    {
      "epoch": 0.000128387451171875,
      "step": 21035,
      "training_step_time": 0.39954590797424316
    },
    {
      "epoch": 0.0001283935546875,
      "model_forward_time": 0.11658716201782227,
      "step": 21036
    },
    {
      "epoch": 0.0001283935546875,
      "step": 21036,
      "training_step_time": 0.4000692367553711
    },
    {
      "epoch": 0.000128399658203125,
      "model_forward_time": 0.11554932594299316,
      "step": 21037
    },
    {
      "epoch": 0.000128399658203125,
      "step": 21037,
      "training_step_time": 0.39189720153808594
    },
    {
      "epoch": 0.00012840576171875,
      "model_forward_time": 0.11584663391113281,
      "step": 21038
    },
    {
      "epoch": 0.00012840576171875,
      "step": 21038,
      "training_step_time": 0.4049978256225586
    },
    {
      "epoch": 0.000128411865234375,
      "model_forward_time": 0.11519241333007812,
      "step": 21039
    },
    {
      "epoch": 0.000128411865234375,
      "step": 21039,
      "training_step_time": 0.45964503288269043
    },
    {
      "epoch": 0.00012841796875,
      "grad_norm": 0.1475236415863037,
      "learning_rate": 7.725505955459183e-05,
      "loss": 0.0465,
      "step": 21040
    },
    {
      "epoch": 0.00012841796875,
      "model_forward_time": 0.11698317527770996,
      "step": 21040
    },
    {
      "epoch": 0.00012841796875,
      "step": 21040,
      "training_step_time": 0.3961780071258545
    },
    {
      "epoch": 0.000128424072265625,
      "model_forward_time": 0.11607694625854492,
      "step": 21041
    },
    {
      "epoch": 0.000128424072265625,
      "step": 21041,
      "training_step_time": 0.3902590274810791
    },
    {
      "epoch": 0.00012843017578125,
      "model_forward_time": 0.11528253555297852,
      "step": 21042
    },
    {
      "epoch": 0.00012843017578125,
      "step": 21042,
      "training_step_time": 0.3820803165435791
    },
    {
      "epoch": 0.000128436279296875,
      "model_forward_time": 0.11611676216125488,
      "step": 21043
    },
    {
      "epoch": 0.000128436279296875,
      "step": 21043,
      "training_step_time": 0.4117777347564697
    },
    {
      "epoch": 0.0001284423828125,
      "model_forward_time": 0.11507916450500488,
      "step": 21044
    },
    {
      "epoch": 0.0001284423828125,
      "step": 21044,
      "training_step_time": 0.4277310371398926
    },
    {
      "epoch": 0.000128448486328125,
      "model_forward_time": 0.11764717102050781,
      "step": 21045
    },
    {
      "epoch": 0.000128448486328125,
      "step": 21045,
      "training_step_time": 0.42478299140930176
    },
    {
      "epoch": 0.00012845458984375,
      "model_forward_time": 0.11606502532958984,
      "step": 21046
    },
    {
      "epoch": 0.00012845458984375,
      "step": 21046,
      "training_step_time": 0.4930605888366699
    },
    {
      "epoch": 0.000128460693359375,
      "model_forward_time": 0.11527013778686523,
      "step": 21047
    },
    {
      "epoch": 0.000128460693359375,
      "step": 21047,
      "training_step_time": 0.4062514305114746
    },
    {
      "epoch": 0.000128466796875,
      "model_forward_time": 0.11585187911987305,
      "step": 21048
    },
    {
      "epoch": 0.000128466796875,
      "step": 21048,
      "training_step_time": 0.47586703300476074
    },
    {
      "epoch": 0.000128472900390625,
      "model_forward_time": 0.116119384765625,
      "step": 21049
    },
    {
      "epoch": 0.000128472900390625,
      "step": 21049,
      "training_step_time": 0.4078047275543213
    },
    {
      "epoch": 0.00012847900390625,
      "grad_norm": 0.13154812157154083,
      "learning_rate": 7.723195175075136e-05,
      "loss": 0.0503,
      "step": 21050
    },
    {
      "epoch": 0.00012847900390625,
      "model_forward_time": 0.1153104305267334,
      "step": 21050
    },
    {
      "epoch": 0.00012847900390625,
      "step": 21050,
      "training_step_time": 0.39116334915161133
    },
    {
      "epoch": 0.000128485107421875,
      "model_forward_time": 0.11565136909484863,
      "step": 21051
    },
    {
      "epoch": 0.000128485107421875,
      "step": 21051,
      "training_step_time": 0.4010894298553467
    },
    {
      "epoch": 0.0001284912109375,
      "model_forward_time": 0.11564207077026367,
      "step": 21052
    },
    {
      "epoch": 0.0001284912109375,
      "step": 21052,
      "training_step_time": 0.39337849617004395
    },
    {
      "epoch": 0.000128497314453125,
      "model_forward_time": 0.1159827709197998,
      "step": 21053
    },
    {
      "epoch": 0.000128497314453125,
      "step": 21053,
      "training_step_time": 0.4012722969055176
    },
    {
      "epoch": 0.00012850341796875,
      "model_forward_time": 0.11549758911132812,
      "step": 21054
    },
    {
      "epoch": 0.00012850341796875,
      "step": 21054,
      "training_step_time": 0.3894767761230469
    },
    {
      "epoch": 0.000128509521484375,
      "model_forward_time": 0.11487436294555664,
      "step": 21055
    },
    {
      "epoch": 0.000128509521484375,
      "step": 21055,
      "training_step_time": 0.40975356101989746
    },
    {
      "epoch": 0.000128515625,
      "model_forward_time": 0.11607861518859863,
      "step": 21056
    },
    {
      "epoch": 0.000128515625,
      "step": 21056,
      "training_step_time": 0.3930084705352783
    },
    {
      "epoch": 0.000128521728515625,
      "model_forward_time": 0.11609792709350586,
      "step": 21057
    },
    {
      "epoch": 0.000128521728515625,
      "step": 21057,
      "training_step_time": 0.39008641242980957
    },
    {
      "epoch": 0.00012852783203125,
      "model_forward_time": 0.1165773868560791,
      "step": 21058
    },
    {
      "epoch": 0.00012852783203125,
      "step": 21058,
      "training_step_time": 0.4483649730682373
    },
    {
      "epoch": 0.000128533935546875,
      "model_forward_time": 0.11607956886291504,
      "step": 21059
    },
    {
      "epoch": 0.000128533935546875,
      "step": 21059,
      "training_step_time": 0.49970364570617676
    },
    {
      "epoch": 0.0001285400390625,
      "grad_norm": 0.14534232020378113,
      "learning_rate": 7.720883567456298e-05,
      "loss": 0.0509,
      "step": 21060
    },
    {
      "epoch": 0.0001285400390625,
      "model_forward_time": 0.1161501407623291,
      "step": 21060
    },
    {
      "epoch": 0.0001285400390625,
      "step": 21060,
      "training_step_time": 0.487180233001709
    },
    {
      "epoch": 0.000128546142578125,
      "model_forward_time": 0.11565232276916504,
      "step": 21061
    },
    {
      "epoch": 0.000128546142578125,
      "step": 21061,
      "training_step_time": 0.41848063468933105
    },
    {
      "epoch": 0.00012855224609375,
      "model_forward_time": 0.11482977867126465,
      "step": 21062
    },
    {
      "epoch": 0.00012855224609375,
      "step": 21062,
      "training_step_time": 0.3943674564361572
    },
    {
      "epoch": 0.000128558349609375,
      "model_forward_time": 0.1159515380859375,
      "step": 21063
    },
    {
      "epoch": 0.000128558349609375,
      "step": 21063,
      "training_step_time": 0.3883354663848877
    },
    {
      "epoch": 0.000128564453125,
      "model_forward_time": 0.1156156063079834,
      "step": 21064
    },
    {
      "epoch": 0.000128564453125,
      "step": 21064,
      "training_step_time": 0.38980793952941895
    },
    {
      "epoch": 0.000128570556640625,
      "model_forward_time": 0.1158437728881836,
      "step": 21065
    },
    {
      "epoch": 0.000128570556640625,
      "step": 21065,
      "training_step_time": 0.41042208671569824
    },
    {
      "epoch": 0.00012857666015625,
      "model_forward_time": 0.11492228507995605,
      "step": 21066
    },
    {
      "epoch": 0.00012857666015625,
      "step": 21066,
      "training_step_time": 0.40799951553344727
    },
    {
      "epoch": 0.000128582763671875,
      "model_forward_time": 0.11553406715393066,
      "step": 21067
    },
    {
      "epoch": 0.000128582763671875,
      "step": 21067,
      "training_step_time": 0.3971593379974365
    },
    {
      "epoch": 0.0001285888671875,
      "model_forward_time": 0.11579680442810059,
      "step": 21068
    },
    {
      "epoch": 0.0001285888671875,
      "step": 21068,
      "training_step_time": 0.38382530212402344
    },
    {
      "epoch": 0.000128594970703125,
      "model_forward_time": 0.11554479598999023,
      "step": 21069
    },
    {
      "epoch": 0.000128594970703125,
      "step": 21069,
      "training_step_time": 0.39227890968322754
    },
    {
      "epoch": 0.00012860107421875,
      "grad_norm": 0.134473979473114,
      "learning_rate": 7.718571133304871e-05,
      "loss": 0.048,
      "step": 21070
    },
    {
      "epoch": 0.00012860107421875,
      "model_forward_time": 0.11618876457214355,
      "step": 21070
    },
    {
      "epoch": 0.00012860107421875,
      "step": 21070,
      "training_step_time": 0.39279675483703613
    },
    {
      "epoch": 0.000128607177734375,
      "model_forward_time": 0.11577749252319336,
      "step": 21071
    },
    {
      "epoch": 0.000128607177734375,
      "step": 21071,
      "training_step_time": 0.3894953727722168
    },
    {
      "epoch": 0.00012861328125,
      "model_forward_time": 0.11618328094482422,
      "step": 21072
    },
    {
      "epoch": 0.00012861328125,
      "step": 21072,
      "training_step_time": 0.40457582473754883
    },
    {
      "epoch": 0.000128619384765625,
      "model_forward_time": 0.11600041389465332,
      "step": 21073
    },
    {
      "epoch": 0.000128619384765625,
      "step": 21073,
      "training_step_time": 0.4044301509857178
    },
    {
      "epoch": 0.00012862548828125,
      "model_forward_time": 0.11621403694152832,
      "step": 21074
    },
    {
      "epoch": 0.00012862548828125,
      "step": 21074,
      "training_step_time": 0.43004465103149414
    },
    {
      "epoch": 0.000128631591796875,
      "model_forward_time": 0.11594557762145996,
      "step": 21075
    },
    {
      "epoch": 0.000128631591796875,
      "step": 21075,
      "training_step_time": 0.5071628093719482
    },
    {
      "epoch": 0.0001286376953125,
      "model_forward_time": 0.1163032054901123,
      "step": 21076
    },
    {
      "epoch": 0.0001286376953125,
      "step": 21076,
      "training_step_time": 0.40931248664855957
    },
    {
      "epoch": 0.000128643798828125,
      "model_forward_time": 0.11598491668701172,
      "step": 21077
    },
    {
      "epoch": 0.000128643798828125,
      "step": 21077,
      "training_step_time": 0.3971536159515381
    },
    {
      "epoch": 0.00012864990234375,
      "model_forward_time": 0.11638307571411133,
      "step": 21078
    },
    {
      "epoch": 0.00012864990234375,
      "step": 21078,
      "training_step_time": 0.4197368621826172
    },
    {
      "epoch": 0.000128656005859375,
      "model_forward_time": 0.11538505554199219,
      "step": 21079
    },
    {
      "epoch": 0.000128656005859375,
      "step": 21079,
      "training_step_time": 0.41876935958862305
    },
    {
      "epoch": 0.000128662109375,
      "grad_norm": 0.18974237143993378,
      "learning_rate": 7.716257873323316e-05,
      "loss": 0.0554,
      "step": 21080
    },
    {
      "epoch": 0.000128662109375,
      "model_forward_time": 0.11536026000976562,
      "step": 21080
    },
    {
      "epoch": 0.000128662109375,
      "step": 21080,
      "training_step_time": 0.4073166847229004
    },
    {
      "epoch": 0.000128668212890625,
      "model_forward_time": 0.11592841148376465,
      "step": 21081
    },
    {
      "epoch": 0.000128668212890625,
      "step": 21081,
      "training_step_time": 0.3869781494140625
    },
    {
      "epoch": 0.00012867431640625,
      "model_forward_time": 0.11590886116027832,
      "step": 21082
    },
    {
      "epoch": 0.00012867431640625,
      "step": 21082,
      "training_step_time": 0.4363415241241455
    },
    {
      "epoch": 0.000128680419921875,
      "model_forward_time": 0.1158137321472168,
      "step": 21083
    },
    {
      "epoch": 0.000128680419921875,
      "step": 21083,
      "training_step_time": 0.42728233337402344
    },
    {
      "epoch": 0.0001286865234375,
      "model_forward_time": 0.1159522533416748,
      "step": 21084
    },
    {
      "epoch": 0.0001286865234375,
      "step": 21084,
      "training_step_time": 0.3866591453552246
    },
    {
      "epoch": 0.000128692626953125,
      "model_forward_time": 0.1159505844116211,
      "step": 21085
    },
    {
      "epoch": 0.000128692626953125,
      "step": 21085,
      "training_step_time": 0.39061856269836426
    },
    {
      "epoch": 0.00012869873046875,
      "model_forward_time": 0.11696553230285645,
      "step": 21086
    },
    {
      "epoch": 0.00012869873046875,
      "step": 21086,
      "training_step_time": 0.3822801113128662
    },
    {
      "epoch": 0.000128704833984375,
      "model_forward_time": 0.11570525169372559,
      "step": 21087
    },
    {
      "epoch": 0.000128704833984375,
      "step": 21087,
      "training_step_time": 0.366971492767334
    },
    {
      "epoch": 0.0001287109375,
      "model_forward_time": 0.14010024070739746,
      "step": 21088
    },
    {
      "epoch": 0.0001287109375,
      "step": 21088,
      "training_step_time": 0.5854763984680176
    },
    {
      "epoch": 0.000128717041015625,
      "model_forward_time": 0.1156625747680664,
      "step": 21089
    },
    {
      "epoch": 0.000128717041015625,
      "step": 21089,
      "training_step_time": 0.5088636875152588
    },
    {
      "epoch": 0.00012872314453125,
      "grad_norm": 0.17754709720611572,
      "learning_rate": 7.713943788214337e-05,
      "loss": 0.047,
      "step": 21090
    },
    {
      "epoch": 0.00012872314453125,
      "model_forward_time": 0.11431884765625,
      "step": 21090
    },
    {
      "epoch": 0.00012872314453125,
      "step": 21090,
      "training_step_time": 0.4562039375305176
    },
    {
      "epoch": 0.000128729248046875,
      "model_forward_time": 0.1146554946899414,
      "step": 21091
    },
    {
      "epoch": 0.000128729248046875,
      "step": 21091,
      "training_step_time": 0.3894665241241455
    },
    {
      "epoch": 0.0001287353515625,
      "model_forward_time": 0.11523604393005371,
      "step": 21092
    },
    {
      "epoch": 0.0001287353515625,
      "step": 21092,
      "training_step_time": 0.43808627128601074
    },
    {
      "epoch": 0.000128741455078125,
      "model_forward_time": 0.11593246459960938,
      "step": 21093
    },
    {
      "epoch": 0.000128741455078125,
      "step": 21093,
      "training_step_time": 0.41043758392333984
    },
    {
      "epoch": 0.00012874755859375,
      "model_forward_time": 0.11501812934875488,
      "step": 21094
    },
    {
      "epoch": 0.00012874755859375,
      "step": 21094,
      "training_step_time": 0.3828597068786621
    },
    {
      "epoch": 0.000128753662109375,
      "model_forward_time": 0.1155092716217041,
      "step": 21095
    },
    {
      "epoch": 0.000128753662109375,
      "step": 21095,
      "training_step_time": 0.38100433349609375
    },
    {
      "epoch": 0.000128759765625,
      "model_forward_time": 0.11494851112365723,
      "step": 21096
    },
    {
      "epoch": 0.000128759765625,
      "step": 21096,
      "training_step_time": 0.3857860565185547
    },
    {
      "epoch": 0.000128765869140625,
      "model_forward_time": 0.11521220207214355,
      "step": 21097
    },
    {
      "epoch": 0.000128765869140625,
      "step": 21097,
      "training_step_time": 0.39299488067626953
    },
    {
      "epoch": 0.00012877197265625,
      "model_forward_time": 0.11539697647094727,
      "step": 21098
    },
    {
      "epoch": 0.00012877197265625,
      "step": 21098,
      "training_step_time": 0.38529181480407715
    },
    {
      "epoch": 0.000128778076171875,
      "model_forward_time": 0.11556839942932129,
      "step": 21099
    },
    {
      "epoch": 0.000128778076171875,
      "step": 21099,
      "training_step_time": 0.3775146007537842
    },
    {
      "epoch": 0.0001287841796875,
      "grad_norm": 0.19891180098056793,
      "learning_rate": 7.711628878680892e-05,
      "loss": 0.055,
      "step": 21100
    },
    {
      "epoch": 0.0001287841796875,
      "model_forward_time": 0.1164243221282959,
      "step": 21100
    },
    {
      "epoch": 0.0001287841796875,
      "step": 21100,
      "training_step_time": 0.5649542808532715
    },
    {
      "epoch": 0.000128790283203125,
      "model_forward_time": 0.11689019203186035,
      "step": 21101
    },
    {
      "epoch": 0.000128790283203125,
      "step": 21101,
      "training_step_time": 0.39191150665283203
    },
    {
      "epoch": 0.00012879638671875,
      "model_forward_time": 0.11634612083435059,
      "step": 21102
    },
    {
      "epoch": 0.00012879638671875,
      "step": 21102,
      "training_step_time": 0.41019606590270996
    },
    {
      "epoch": 0.000128802490234375,
      "model_forward_time": 0.11646723747253418,
      "step": 21103
    },
    {
      "epoch": 0.000128802490234375,
      "step": 21103,
      "training_step_time": 0.48400020599365234
    },
    {
      "epoch": 0.00012880859375,
      "model_forward_time": 0.11600542068481445,
      "step": 21104
    },
    {
      "epoch": 0.00012880859375,
      "step": 21104,
      "training_step_time": 0.41742467880249023
    },
    {
      "epoch": 0.000128814697265625,
      "model_forward_time": 0.11929440498352051,
      "step": 21105
    },
    {
      "epoch": 0.000128814697265625,
      "step": 21105,
      "training_step_time": 0.4050295352935791
    },
    {
      "epoch": 0.00012882080078125,
      "model_forward_time": 0.11513662338256836,
      "step": 21106
    },
    {
      "epoch": 0.00012882080078125,
      "step": 21106,
      "training_step_time": 0.4504663944244385
    },
    {
      "epoch": 0.000128826904296875,
      "model_forward_time": 0.11511802673339844,
      "step": 21107
    },
    {
      "epoch": 0.000128826904296875,
      "step": 21107,
      "training_step_time": 0.497647762298584
    },
    {
      "epoch": 0.0001288330078125,
      "model_forward_time": 0.11601686477661133,
      "step": 21108
    },
    {
      "epoch": 0.0001288330078125,
      "step": 21108,
      "training_step_time": 0.3902618885040283
    },
    {
      "epoch": 0.000128839111328125,
      "model_forward_time": 0.11527013778686523,
      "step": 21109
    },
    {
      "epoch": 0.000128839111328125,
      "step": 21109,
      "training_step_time": 0.39842653274536133
    },
    {
      "epoch": 0.00012884521484375,
      "grad_norm": 0.16848903894424438,
      "learning_rate": 7.70931314542619e-05,
      "loss": 0.0481,
      "step": 21110
    },
    {
      "epoch": 0.00012884521484375,
      "model_forward_time": 0.11501574516296387,
      "step": 21110
    },
    {
      "epoch": 0.00012884521484375,
      "step": 21110,
      "training_step_time": 0.3715958595275879
    },
    {
      "epoch": 0.000128851318359375,
      "model_forward_time": 0.11437582969665527,
      "step": 21111
    },
    {
      "epoch": 0.000128851318359375,
      "step": 21111,
      "training_step_time": 0.3898453712463379
    },
    {
      "epoch": 0.000128857421875,
      "model_forward_time": 0.11546778678894043,
      "step": 21112
    },
    {
      "epoch": 0.000128857421875,
      "step": 21112,
      "training_step_time": 0.6072709560394287
    },
    {
      "epoch": 0.000128863525390625,
      "model_forward_time": 0.11560392379760742,
      "step": 21113
    },
    {
      "epoch": 0.000128863525390625,
      "step": 21113,
      "training_step_time": 0.39425182342529297
    },
    {
      "epoch": 0.00012886962890625,
      "model_forward_time": 0.11510133743286133,
      "step": 21114
    },
    {
      "epoch": 0.00012886962890625,
      "step": 21114,
      "training_step_time": 0.3924121856689453
    },
    {
      "epoch": 0.000128875732421875,
      "model_forward_time": 0.11513042449951172,
      "step": 21115
    },
    {
      "epoch": 0.000128875732421875,
      "step": 21115,
      "training_step_time": 0.3936893939971924
    },
    {
      "epoch": 0.0001288818359375,
      "model_forward_time": 0.1208655834197998,
      "step": 21116
    },
    {
      "epoch": 0.0001288818359375,
      "step": 21116,
      "training_step_time": 0.39942121505737305
    },
    {
      "epoch": 0.000128887939453125,
      "model_forward_time": 0.11843347549438477,
      "step": 21117
    },
    {
      "epoch": 0.000128887939453125,
      "step": 21117,
      "training_step_time": 0.4049050807952881
    },
    {
      "epoch": 0.00012889404296875,
      "model_forward_time": 0.12176966667175293,
      "step": 21118
    },
    {
      "epoch": 0.00012889404296875,
      "step": 21118,
      "training_step_time": 0.47029662132263184
    },
    {
      "epoch": 0.000128900146484375,
      "model_forward_time": 0.1208200454711914,
      "step": 21119
    },
    {
      "epoch": 0.000128900146484375,
      "step": 21119,
      "training_step_time": 0.441103458404541
    },
    {
      "epoch": 0.00012890625,
      "grad_norm": 0.09782162308692932,
      "learning_rate": 7.70699658915369e-05,
      "loss": 0.0489,
      "step": 21120
    },
    {
      "epoch": 0.00012890625,
      "model_forward_time": 0.1174931526184082,
      "step": 21120
    },
    {
      "epoch": 0.00012890625,
      "step": 21120,
      "training_step_time": 0.40665483474731445
    },
    {
      "epoch": 0.000128912353515625,
      "model_forward_time": 0.11656427383422852,
      "step": 21121
    },
    {
      "epoch": 0.000128912353515625,
      "step": 21121,
      "training_step_time": 0.41167664527893066
    },
    {
      "epoch": 0.00012891845703125,
      "model_forward_time": 0.11564779281616211,
      "step": 21122
    },
    {
      "epoch": 0.00012891845703125,
      "step": 21122,
      "training_step_time": 0.38118410110473633
    },
    {
      "epoch": 0.000128924560546875,
      "model_forward_time": 0.11627435684204102,
      "step": 21123
    },
    {
      "epoch": 0.000128924560546875,
      "step": 21123,
      "training_step_time": 0.3911600112915039
    },
    {
      "epoch": 0.0001289306640625,
      "model_forward_time": 0.11578130722045898,
      "step": 21124
    },
    {
      "epoch": 0.0001289306640625,
      "step": 21124,
      "training_step_time": 0.4681081771850586
    },
    {
      "epoch": 0.000128936767578125,
      "model_forward_time": 0.11517024040222168,
      "step": 21125
    },
    {
      "epoch": 0.000128936767578125,
      "step": 21125,
      "training_step_time": 0.38802194595336914
    },
    {
      "epoch": 0.00012894287109375,
      "model_forward_time": 0.11635422706604004,
      "step": 21126
    },
    {
      "epoch": 0.00012894287109375,
      "step": 21126,
      "training_step_time": 0.40238332748413086
    },
    {
      "epoch": 0.000128948974609375,
      "model_forward_time": 0.11578154563903809,
      "step": 21127
    },
    {
      "epoch": 0.000128948974609375,
      "step": 21127,
      "training_step_time": 0.39306044578552246
    },
    {
      "epoch": 0.000128955078125,
      "model_forward_time": 0.11593174934387207,
      "step": 21128
    },
    {
      "epoch": 0.000128955078125,
      "step": 21128,
      "training_step_time": 0.38373661041259766
    },
    {
      "epoch": 0.000128961181640625,
      "model_forward_time": 0.11575722694396973,
      "step": 21129
    },
    {
      "epoch": 0.000128961181640625,
      "step": 21129,
      "training_step_time": 0.3922770023345947
    },
    {
      "epoch": 0.00012896728515625,
      "grad_norm": 0.10057903081178665,
      "learning_rate": 7.7046792105671e-05,
      "loss": 0.0469,
      "step": 21130
    },
    {
      "epoch": 0.00012896728515625,
      "model_forward_time": 0.11526799201965332,
      "step": 21130
    },
    {
      "epoch": 0.00012896728515625,
      "step": 21130,
      "training_step_time": 0.6844255924224854
    },
    {
      "epoch": 0.000128973388671875,
      "model_forward_time": 0.11530947685241699,
      "step": 21131
    },
    {
      "epoch": 0.000128973388671875,
      "step": 21131,
      "training_step_time": 0.4675328731536865
    },
    {
      "epoch": 0.0001289794921875,
      "model_forward_time": 0.11562609672546387,
      "step": 21132
    },
    {
      "epoch": 0.0001289794921875,
      "step": 21132,
      "training_step_time": 0.5026636123657227
    },
    {
      "epoch": 0.000128985595703125,
      "model_forward_time": 0.11508870124816895,
      "step": 21133
    },
    {
      "epoch": 0.000128985595703125,
      "step": 21133,
      "training_step_time": 0.4857807159423828
    },
    {
      "epoch": 0.00012899169921875,
      "model_forward_time": 0.11493825912475586,
      "step": 21134
    },
    {
      "epoch": 0.00012899169921875,
      "step": 21134,
      "training_step_time": 0.3921504020690918
    },
    {
      "epoch": 0.000128997802734375,
      "model_forward_time": 0.1151132583618164,
      "step": 21135
    },
    {
      "epoch": 0.000128997802734375,
      "step": 21135,
      "training_step_time": 0.4239020347595215
    },
    {
      "epoch": 0.00012900390625,
      "model_forward_time": 0.1153256893157959,
      "step": 21136
    },
    {
      "epoch": 0.00012900390625,
      "step": 21136,
      "training_step_time": 0.3939194679260254
    },
    {
      "epoch": 0.000129010009765625,
      "model_forward_time": 0.1154794692993164,
      "step": 21137
    },
    {
      "epoch": 0.000129010009765625,
      "step": 21137,
      "training_step_time": 0.3892674446105957
    },
    {
      "epoch": 0.00012901611328125,
      "model_forward_time": 0.11602926254272461,
      "step": 21138
    },
    {
      "epoch": 0.00012901611328125,
      "step": 21138,
      "training_step_time": 0.3832554817199707
    },
    {
      "epoch": 0.000129022216796875,
      "model_forward_time": 0.1154181957244873,
      "step": 21139
    },
    {
      "epoch": 0.000129022216796875,
      "step": 21139,
      "training_step_time": 0.39047813415527344
    },
    {
      "epoch": 0.0001290283203125,
      "grad_norm": 0.15492308139801025,
      "learning_rate": 7.70236101037038e-05,
      "loss": 0.0523,
      "step": 21140
    },
    {
      "epoch": 0.0001290283203125,
      "model_forward_time": 0.11623120307922363,
      "step": 21140
    },
    {
      "epoch": 0.0001290283203125,
      "step": 21140,
      "training_step_time": 0.38036370277404785
    },
    {
      "epoch": 0.000129034423828125,
      "model_forward_time": 0.11551451683044434,
      "step": 21141
    },
    {
      "epoch": 0.000129034423828125,
      "step": 21141,
      "training_step_time": 0.3872675895690918
    },
    {
      "epoch": 0.00012904052734375,
      "model_forward_time": 0.11667227745056152,
      "step": 21142
    },
    {
      "epoch": 0.00012904052734375,
      "step": 21142,
      "training_step_time": 0.7175757884979248
    },
    {
      "epoch": 0.000129046630859375,
      "model_forward_time": 0.1156167984008789,
      "step": 21143
    },
    {
      "epoch": 0.000129046630859375,
      "step": 21143,
      "training_step_time": 0.38695216178894043
    },
    {
      "epoch": 0.000129052734375,
      "model_forward_time": 0.1154932975769043,
      "step": 21144
    },
    {
      "epoch": 0.000129052734375,
      "step": 21144,
      "training_step_time": 0.36618566513061523
    },
    {
      "epoch": 0.000129058837890625,
      "model_forward_time": 0.11620807647705078,
      "step": 21145
    },
    {
      "epoch": 0.000129058837890625,
      "step": 21145,
      "training_step_time": 0.5143117904663086
    },
    {
      "epoch": 0.00012906494140625,
      "model_forward_time": 0.11519074440002441,
      "step": 21146
    },
    {
      "epoch": 0.00012906494140625,
      "step": 21146,
      "training_step_time": 0.450176477432251
    },
    {
      "epoch": 0.000129071044921875,
      "model_forward_time": 0.1151590347290039,
      "step": 21147
    },
    {
      "epoch": 0.000129071044921875,
      "step": 21147,
      "training_step_time": 0.4114830493927002
    },
    {
      "epoch": 0.0001290771484375,
      "model_forward_time": 0.11511754989624023,
      "step": 21148
    },
    {
      "epoch": 0.0001290771484375,
      "step": 21148,
      "training_step_time": 0.4532327651977539
    },
    {
      "epoch": 0.000129083251953125,
      "model_forward_time": 0.11711740493774414,
      "step": 21149
    },
    {
      "epoch": 0.000129083251953125,
      "step": 21149,
      "training_step_time": 0.4080488681793213
    },
    {
      "epoch": 0.00012908935546875,
      "grad_norm": 0.12600712478160858,
      "learning_rate": 7.700041989267736e-05,
      "loss": 0.0504,
      "step": 21150
    },
    {
      "epoch": 0.00012908935546875,
      "model_forward_time": 0.11477994918823242,
      "step": 21150
    },
    {
      "epoch": 0.00012908935546875,
      "step": 21150,
      "training_step_time": 0.39437031745910645
    },
    {
      "epoch": 0.000129095458984375,
      "model_forward_time": 0.11576008796691895,
      "step": 21151
    },
    {
      "epoch": 0.000129095458984375,
      "step": 21151,
      "training_step_time": 0.3890345096588135
    },
    {
      "epoch": 0.0001291015625,
      "model_forward_time": 0.11562252044677734,
      "step": 21152
    },
    {
      "epoch": 0.0001291015625,
      "step": 21152,
      "training_step_time": 0.3804652690887451
    },
    {
      "epoch": 0.000129107666015625,
      "model_forward_time": 0.11504316329956055,
      "step": 21153
    },
    {
      "epoch": 0.000129107666015625,
      "step": 21153,
      "training_step_time": 0.39514851570129395
    },
    {
      "epoch": 0.00012911376953125,
      "model_forward_time": 0.1152501106262207,
      "step": 21154
    },
    {
      "epoch": 0.00012911376953125,
      "step": 21154,
      "training_step_time": 0.6863467693328857
    },
    {
      "epoch": 0.000129119873046875,
      "model_forward_time": 0.11605525016784668,
      "step": 21155
    },
    {
      "epoch": 0.000129119873046875,
      "step": 21155,
      "training_step_time": 0.3956332206726074
    },
    {
      "epoch": 0.0001291259765625,
      "model_forward_time": 0.11604809761047363,
      "step": 21156
    },
    {
      "epoch": 0.0001291259765625,
      "step": 21156,
      "training_step_time": 0.3870384693145752
    },
    {
      "epoch": 0.000129132080078125,
      "model_forward_time": 0.11570334434509277,
      "step": 21157
    },
    {
      "epoch": 0.000129132080078125,
      "step": 21157,
      "training_step_time": 0.3921191692352295
    },
    {
      "epoch": 0.00012913818359375,
      "model_forward_time": 0.11519408226013184,
      "step": 21158
    },
    {
      "epoch": 0.00012913818359375,
      "step": 21158,
      "training_step_time": 0.4451565742492676
    },
    {
      "epoch": 0.000129144287109375,
      "model_forward_time": 0.1146552562713623,
      "step": 21159
    },
    {
      "epoch": 0.000129144287109375,
      "step": 21159,
      "training_step_time": 0.4775660037994385
    },
    {
      "epoch": 0.000129150390625,
      "grad_norm": 0.13406017422676086,
      "learning_rate": 7.697722147963626e-05,
      "loss": 0.0454,
      "step": 21160
    },
    {
      "epoch": 0.000129150390625,
      "model_forward_time": 0.11535811424255371,
      "step": 21160
    },
    {
      "epoch": 0.000129150390625,
      "step": 21160,
      "training_step_time": 0.4762117862701416
    },
    {
      "epoch": 0.000129156494140625,
      "model_forward_time": 0.11471128463745117,
      "step": 21161
    },
    {
      "epoch": 0.000129156494140625,
      "step": 21161,
      "training_step_time": 0.42362141609191895
    },
    {
      "epoch": 0.00012916259765625,
      "model_forward_time": 0.11541342735290527,
      "step": 21162
    },
    {
      "epoch": 0.00012916259765625,
      "step": 21162,
      "training_step_time": 0.3869626522064209
    },
    {
      "epoch": 0.000129168701171875,
      "model_forward_time": 0.11532378196716309,
      "step": 21163
    },
    {
      "epoch": 0.000129168701171875,
      "step": 21163,
      "training_step_time": 0.39560437202453613
    },
    {
      "epoch": 0.0001291748046875,
      "model_forward_time": 0.1149892807006836,
      "step": 21164
    },
    {
      "epoch": 0.0001291748046875,
      "step": 21164,
      "training_step_time": 0.39710140228271484
    },
    {
      "epoch": 0.000129180908203125,
      "model_forward_time": 0.11559939384460449,
      "step": 21165
    },
    {
      "epoch": 0.000129180908203125,
      "step": 21165,
      "training_step_time": 0.3857605457305908
    },
    {
      "epoch": 0.00012918701171875,
      "model_forward_time": 0.11536073684692383,
      "step": 21166
    },
    {
      "epoch": 0.00012918701171875,
      "step": 21166,
      "training_step_time": 0.4451415538787842
    },
    {
      "epoch": 0.000129193115234375,
      "model_forward_time": 0.1159968376159668,
      "step": 21167
    },
    {
      "epoch": 0.000129193115234375,
      "step": 21167,
      "training_step_time": 0.4093914031982422
    },
    {
      "epoch": 0.00012919921875,
      "model_forward_time": 0.1157374382019043,
      "step": 21168
    },
    {
      "epoch": 0.00012919921875,
      "step": 21168,
      "training_step_time": 0.3873405456542969
    },
    {
      "epoch": 0.000129205322265625,
      "model_forward_time": 0.11595892906188965,
      "step": 21169
    },
    {
      "epoch": 0.000129205322265625,
      "step": 21169,
      "training_step_time": 0.3817627429962158
    },
    {
      "epoch": 0.00012921142578125,
      "grad_norm": 0.12993395328521729,
      "learning_rate": 7.695401487162757e-05,
      "loss": 0.0513,
      "step": 21170
    },
    {
      "epoch": 0.00012921142578125,
      "model_forward_time": 0.11615538597106934,
      "step": 21170
    },
    {
      "epoch": 0.00012921142578125,
      "step": 21170,
      "training_step_time": 0.3979067802429199
    },
    {
      "epoch": 0.000129217529296875,
      "model_forward_time": 0.11632442474365234,
      "step": 21171
    },
    {
      "epoch": 0.000129217529296875,
      "step": 21171,
      "training_step_time": 0.45235157012939453
    },
    {
      "epoch": 0.0001292236328125,
      "model_forward_time": 0.11519432067871094,
      "step": 21172
    },
    {
      "epoch": 0.0001292236328125,
      "step": 21172,
      "training_step_time": 0.4382176399230957
    },
    {
      "epoch": 0.000129229736328125,
      "model_forward_time": 0.1155085563659668,
      "step": 21173
    },
    {
      "epoch": 0.000129229736328125,
      "step": 21173,
      "training_step_time": 0.466019868850708
    },
    {
      "epoch": 0.00012923583984375,
      "model_forward_time": 0.11554765701293945,
      "step": 21174
    },
    {
      "epoch": 0.00012923583984375,
      "step": 21174,
      "training_step_time": 0.49592137336730957
    },
    {
      "epoch": 0.000129241943359375,
      "model_forward_time": 0.11558032035827637,
      "step": 21175
    },
    {
      "epoch": 0.000129241943359375,
      "step": 21175,
      "training_step_time": 0.4183964729309082
    },
    {
      "epoch": 0.000129248046875,
      "model_forward_time": 0.11510086059570312,
      "step": 21176
    },
    {
      "epoch": 0.000129248046875,
      "step": 21176,
      "training_step_time": 0.47531747817993164
    },
    {
      "epoch": 0.000129254150390625,
      "model_forward_time": 0.11516308784484863,
      "step": 21177
    },
    {
      "epoch": 0.000129254150390625,
      "step": 21177,
      "training_step_time": 0.3828299045562744
    },
    {
      "epoch": 0.00012926025390625,
      "model_forward_time": 0.1160426139831543,
      "step": 21178
    },
    {
      "epoch": 0.00012926025390625,
      "step": 21178,
      "training_step_time": 0.3940105438232422
    },
    {
      "epoch": 0.000129266357421875,
      "model_forward_time": 0.11507868766784668,
      "step": 21179
    },
    {
      "epoch": 0.000129266357421875,
      "step": 21179,
      "training_step_time": 0.3878474235534668
    },
    {
      "epoch": 0.0001292724609375,
      "grad_norm": 0.11035045236349106,
      "learning_rate": 7.693080007570084e-05,
      "loss": 0.0427,
      "step": 21180
    },
    {
      "epoch": 0.0001292724609375,
      "model_forward_time": 0.11584210395812988,
      "step": 21180
    },
    {
      "epoch": 0.0001292724609375,
      "step": 21180,
      "training_step_time": 0.40356016159057617
    },
    {
      "epoch": 0.000129278564453125,
      "model_forward_time": 0.11695003509521484,
      "step": 21181
    },
    {
      "epoch": 0.000129278564453125,
      "step": 21181,
      "training_step_time": 0.38486790657043457
    },
    {
      "epoch": 0.00012928466796875,
      "model_forward_time": 0.11560225486755371,
      "step": 21182
    },
    {
      "epoch": 0.00012928466796875,
      "step": 21182,
      "training_step_time": 0.3829312324523926
    },
    {
      "epoch": 0.000129290771484375,
      "model_forward_time": 0.11581707000732422,
      "step": 21183
    },
    {
      "epoch": 0.000129290771484375,
      "step": 21183,
      "training_step_time": 0.3997385501861572
    },
    {
      "epoch": 0.000129296875,
      "model_forward_time": 0.11568617820739746,
      "step": 21184
    },
    {
      "epoch": 0.000129296875,
      "step": 21184,
      "training_step_time": 0.45099806785583496
    },
    {
      "epoch": 0.000129302978515625,
      "model_forward_time": 0.11570024490356445,
      "step": 21185
    },
    {
      "epoch": 0.000129302978515625,
      "step": 21185,
      "training_step_time": 0.3865315914154053
    },
    {
      "epoch": 0.00012930908203125,
      "model_forward_time": 0.1157383918762207,
      "step": 21186
    },
    {
      "epoch": 0.00012930908203125,
      "step": 21186,
      "training_step_time": 0.3687398433685303
    },
    {
      "epoch": 0.000129315185546875,
      "model_forward_time": 0.11604809761047363,
      "step": 21187
    },
    {
      "epoch": 0.000129315185546875,
      "step": 21187,
      "training_step_time": 0.3937649726867676
    },
    {
      "epoch": 0.0001293212890625,
      "model_forward_time": 0.11607170104980469,
      "step": 21188
    },
    {
      "epoch": 0.0001293212890625,
      "step": 21188,
      "training_step_time": 0.45955538749694824
    },
    {
      "epoch": 0.000129327392578125,
      "model_forward_time": 0.11627316474914551,
      "step": 21189
    },
    {
      "epoch": 0.000129327392578125,
      "step": 21189,
      "training_step_time": 0.42257094383239746
    },
    {
      "epoch": 0.00012933349609375,
      "grad_norm": 0.1071220263838768,
      "learning_rate": 7.690757709890812e-05,
      "loss": 0.0456,
      "step": 21190
    },
    {
      "epoch": 0.00012933349609375,
      "model_forward_time": 0.11547327041625977,
      "step": 21190
    },
    {
      "epoch": 0.00012933349609375,
      "step": 21190,
      "training_step_time": 0.5452303886413574
    },
    {
      "epoch": 0.000129339599609375,
      "model_forward_time": 0.11529874801635742,
      "step": 21191
    },
    {
      "epoch": 0.000129339599609375,
      "step": 21191,
      "training_step_time": 0.4610707759857178
    },
    {
      "epoch": 0.000129345703125,
      "model_forward_time": 0.11593198776245117,
      "step": 21192
    },
    {
      "epoch": 0.000129345703125,
      "step": 21192,
      "training_step_time": 0.4059009552001953
    },
    {
      "epoch": 0.000129351806640625,
      "model_forward_time": 0.11510014533996582,
      "step": 21193
    },
    {
      "epoch": 0.000129351806640625,
      "step": 21193,
      "training_step_time": 0.3919210433959961
    },
    {
      "epoch": 0.00012935791015625,
      "model_forward_time": 0.11525321006774902,
      "step": 21194
    },
    {
      "epoch": 0.00012935791015625,
      "step": 21194,
      "training_step_time": 0.38776564598083496
    },
    {
      "epoch": 0.000129364013671875,
      "model_forward_time": 0.1156759262084961,
      "step": 21195
    },
    {
      "epoch": 0.000129364013671875,
      "step": 21195,
      "training_step_time": 0.3807671070098877
    },
    {
      "epoch": 0.0001293701171875,
      "model_forward_time": 0.11543798446655273,
      "step": 21196
    },
    {
      "epoch": 0.0001293701171875,
      "step": 21196,
      "training_step_time": 0.5184764862060547
    },
    {
      "epoch": 0.000129376220703125,
      "model_forward_time": 0.11512160301208496,
      "step": 21197
    },
    {
      "epoch": 0.000129376220703125,
      "step": 21197,
      "training_step_time": 0.3942379951477051
    },
    {
      "epoch": 0.00012938232421875,
      "model_forward_time": 0.11524438858032227,
      "step": 21198
    },
    {
      "epoch": 0.00012938232421875,
      "step": 21198,
      "training_step_time": 0.38748908042907715
    },
    {
      "epoch": 0.000129388427734375,
      "model_forward_time": 0.11508917808532715,
      "step": 21199
    },
    {
      "epoch": 0.000129388427734375,
      "step": 21199,
      "training_step_time": 0.42528438568115234
    },
    {
      "epoch": 0.00012939453125,
      "grad_norm": 0.1720055192708969,
      "learning_rate": 7.688434594830392e-05,
      "loss": 0.0476,
      "step": 21200
    },
    {
      "epoch": 0.00012939453125,
      "model_forward_time": 0.11520576477050781,
      "step": 21200
    },
    {
      "epoch": 0.00012939453125,
      "step": 21200,
      "training_step_time": 0.3650014400482178
    },
    {
      "epoch": 0.000129400634765625,
      "model_forward_time": 0.1155543327331543,
      "step": 21201
    },
    {
      "epoch": 0.000129400634765625,
      "step": 21201,
      "training_step_time": 0.38961005210876465
    },
    {
      "epoch": 0.00012940673828125,
      "model_forward_time": 0.11517095565795898,
      "step": 21202
    },
    {
      "epoch": 0.00012940673828125,
      "step": 21202,
      "training_step_time": 0.6121494770050049
    },
    {
      "epoch": 0.000129412841796875,
      "model_forward_time": 0.11854100227355957,
      "step": 21203
    },
    {
      "epoch": 0.000129412841796875,
      "step": 21203,
      "training_step_time": 0.47515439987182617
    },
    {
      "epoch": 0.0001294189453125,
      "model_forward_time": 0.11470246315002441,
      "step": 21204
    },
    {
      "epoch": 0.0001294189453125,
      "step": 21204,
      "training_step_time": 0.458019495010376
    },
    {
      "epoch": 0.000129425048828125,
      "model_forward_time": 0.11458849906921387,
      "step": 21205
    },
    {
      "epoch": 0.000129425048828125,
      "step": 21205,
      "training_step_time": 0.3980438709259033
    },
    {
      "epoch": 0.00012943115234375,
      "model_forward_time": 0.11439871788024902,
      "step": 21206
    },
    {
      "epoch": 0.00012943115234375,
      "step": 21206,
      "training_step_time": 0.3796830177307129
    },
    {
      "epoch": 0.000129437255859375,
      "model_forward_time": 0.1146845817565918,
      "step": 21207
    },
    {
      "epoch": 0.000129437255859375,
      "step": 21207,
      "training_step_time": 0.37534260749816895
    },
    {
      "epoch": 0.000129443359375,
      "model_forward_time": 0.11534547805786133,
      "step": 21208
    },
    {
      "epoch": 0.000129443359375,
      "step": 21208,
      "training_step_time": 0.5231626033782959
    },
    {
      "epoch": 0.000129449462890625,
      "model_forward_time": 0.11513352394104004,
      "step": 21209
    },
    {
      "epoch": 0.000129449462890625,
      "step": 21209,
      "training_step_time": 0.39577794075012207
    },
    {
      "epoch": 0.00012945556640625,
      "grad_norm": 0.14452925324440002,
      "learning_rate": 7.686110663094525e-05,
      "loss": 0.0472,
      "step": 21210
    },
    {
      "epoch": 0.00012945556640625,
      "model_forward_time": 0.11558151245117188,
      "step": 21210
    },
    {
      "epoch": 0.00012945556640625,
      "step": 21210,
      "training_step_time": 0.3884549140930176
    },
    {
      "epoch": 0.000129461669921875,
      "model_forward_time": 0.11551690101623535,
      "step": 21211
    },
    {
      "epoch": 0.000129461669921875,
      "step": 21211,
      "training_step_time": 0.3973119258880615
    },
    {
      "epoch": 0.0001294677734375,
      "model_forward_time": 0.11597847938537598,
      "step": 21212
    },
    {
      "epoch": 0.0001294677734375,
      "step": 21212,
      "training_step_time": 0.4344465732574463
    },
    {
      "epoch": 0.000129473876953125,
      "model_forward_time": 0.11590313911437988,
      "step": 21213
    },
    {
      "epoch": 0.000129473876953125,
      "step": 21213,
      "training_step_time": 0.38800477981567383
    },
    {
      "epoch": 0.00012947998046875,
      "model_forward_time": 0.12005758285522461,
      "step": 21214
    },
    {
      "epoch": 0.00012947998046875,
      "step": 21214,
      "training_step_time": 0.5645427703857422
    },
    {
      "epoch": 0.000129486083984375,
      "model_forward_time": 0.12679672241210938,
      "step": 21215
    },
    {
      "epoch": 0.000129486083984375,
      "step": 21215,
      "training_step_time": 0.39255833625793457
    },
    {
      "epoch": 0.0001294921875,
      "model_forward_time": 0.12058067321777344,
      "step": 21216
    },
    {
      "epoch": 0.0001294921875,
      "step": 21216,
      "training_step_time": 0.47095179557800293
    },
    {
      "epoch": 0.000129498291015625,
      "model_forward_time": 0.11812877655029297,
      "step": 21217
    },
    {
      "epoch": 0.000129498291015625,
      "step": 21217,
      "training_step_time": 0.49536776542663574
    },
    {
      "epoch": 0.00012950439453125,
      "model_forward_time": 0.11592817306518555,
      "step": 21218
    },
    {
      "epoch": 0.00012950439453125,
      "step": 21218,
      "training_step_time": 0.41496944427490234
    },
    {
      "epoch": 0.000129510498046875,
      "model_forward_time": 0.11478495597839355,
      "step": 21219
    },
    {
      "epoch": 0.000129510498046875,
      "step": 21219,
      "training_step_time": 0.47356557846069336
    },
    {
      "epoch": 0.0001295166015625,
      "grad_norm": 0.1317303329706192,
      "learning_rate": 7.683785915389164e-05,
      "loss": 0.0479,
      "step": 21220
    },
    {
      "epoch": 0.0001295166015625,
      "model_forward_time": 0.11508846282958984,
      "step": 21220
    },
    {
      "epoch": 0.0001295166015625,
      "step": 21220,
      "training_step_time": 0.3930025100708008
    },
    {
      "epoch": 0.000129522705078125,
      "model_forward_time": 0.11586833000183105,
      "step": 21221
    },
    {
      "epoch": 0.000129522705078125,
      "step": 21221,
      "training_step_time": 0.3949258327484131
    },
    {
      "epoch": 0.00012952880859375,
      "model_forward_time": 0.11546587944030762,
      "step": 21222
    },
    {
      "epoch": 0.00012952880859375,
      "step": 21222,
      "training_step_time": 0.3991551399230957
    },
    {
      "epoch": 0.000129534912109375,
      "model_forward_time": 0.11554169654846191,
      "step": 21223
    },
    {
      "epoch": 0.000129534912109375,
      "step": 21223,
      "training_step_time": 0.3984038829803467
    },
    {
      "epoch": 0.000129541015625,
      "model_forward_time": 0.11609411239624023,
      "step": 21224
    },
    {
      "epoch": 0.000129541015625,
      "step": 21224,
      "training_step_time": 0.3922109603881836
    },
    {
      "epoch": 0.000129547119140625,
      "model_forward_time": 0.11521744728088379,
      "step": 21225
    },
    {
      "epoch": 0.000129547119140625,
      "step": 21225,
      "training_step_time": 0.41044044494628906
    },
    {
      "epoch": 0.00012955322265625,
      "model_forward_time": 0.11592555046081543,
      "step": 21226
    },
    {
      "epoch": 0.00012955322265625,
      "step": 21226,
      "training_step_time": 0.5381290912628174
    },
    {
      "epoch": 0.000129559326171875,
      "model_forward_time": 0.11575794219970703,
      "step": 21227
    },
    {
      "epoch": 0.000129559326171875,
      "step": 21227,
      "training_step_time": 0.38494038581848145
    },
    {
      "epoch": 0.0001295654296875,
      "model_forward_time": 0.11520147323608398,
      "step": 21228
    },
    {
      "epoch": 0.0001295654296875,
      "step": 21228,
      "training_step_time": 0.3672981262207031
    },
    {
      "epoch": 0.000129571533203125,
      "model_forward_time": 0.1154022216796875,
      "step": 21229
    },
    {
      "epoch": 0.000129571533203125,
      "step": 21229,
      "training_step_time": 0.4538595676422119
    },
    {
      "epoch": 0.00012957763671875,
      "grad_norm": 0.13910657167434692,
      "learning_rate": 7.6814603524205e-05,
      "loss": 0.0495,
      "step": 21230
    },
    {
      "epoch": 0.00012957763671875,
      "model_forward_time": 0.1155548095703125,
      "step": 21230
    },
    {
      "epoch": 0.00012957763671875,
      "step": 21230,
      "training_step_time": 0.41066455841064453
    },
    {
      "epoch": 0.000129583740234375,
      "model_forward_time": 0.11490130424499512,
      "step": 21231
    },
    {
      "epoch": 0.000129583740234375,
      "step": 21231,
      "training_step_time": 0.4415764808654785
    },
    {
      "epoch": 0.00012958984375,
      "model_forward_time": 0.11540651321411133,
      "step": 21232
    },
    {
      "epoch": 0.00012958984375,
      "step": 21232,
      "training_step_time": 0.5029942989349365
    },
    {
      "epoch": 0.000129595947265625,
      "model_forward_time": 0.11529397964477539,
      "step": 21233
    },
    {
      "epoch": 0.000129595947265625,
      "step": 21233,
      "training_step_time": 0.40576696395874023
    },
    {
      "epoch": 0.00012960205078125,
      "model_forward_time": 0.11553096771240234,
      "step": 21234
    },
    {
      "epoch": 0.00012960205078125,
      "step": 21234,
      "training_step_time": 0.3892502784729004
    },
    {
      "epoch": 0.000129608154296875,
      "model_forward_time": 0.11497712135314941,
      "step": 21235
    },
    {
      "epoch": 0.000129608154296875,
      "step": 21235,
      "training_step_time": 0.4066314697265625
    },
    {
      "epoch": 0.0001296142578125,
      "model_forward_time": 0.11540007591247559,
      "step": 21236
    },
    {
      "epoch": 0.0001296142578125,
      "step": 21236,
      "training_step_time": 0.38317084312438965
    },
    {
      "epoch": 0.000129620361328125,
      "model_forward_time": 0.11523175239562988,
      "step": 21237
    },
    {
      "epoch": 0.000129620361328125,
      "step": 21237,
      "training_step_time": 0.39915037155151367
    },
    {
      "epoch": 0.00012962646484375,
      "model_forward_time": 0.1154942512512207,
      "step": 21238
    },
    {
      "epoch": 0.00012962646484375,
      "step": 21238,
      "training_step_time": 0.6242568492889404
    },
    {
      "epoch": 0.000129632568359375,
      "model_forward_time": 0.1153569221496582,
      "step": 21239
    },
    {
      "epoch": 0.000129632568359375,
      "step": 21239,
      "training_step_time": 0.38802146911621094
    },
    {
      "epoch": 0.000129638671875,
      "grad_norm": 0.19083939492702484,
      "learning_rate": 7.679133974894983e-05,
      "loss": 0.0516,
      "step": 21240
    },
    {
      "epoch": 0.000129638671875,
      "model_forward_time": 0.11544156074523926,
      "step": 21240
    },
    {
      "epoch": 0.000129638671875,
      "step": 21240,
      "training_step_time": 0.39873623847961426
    },
    {
      "epoch": 0.000129644775390625,
      "model_forward_time": 0.11496639251708984,
      "step": 21241
    },
    {
      "epoch": 0.000129644775390625,
      "step": 21241,
      "training_step_time": 0.39787912368774414
    },
    {
      "epoch": 0.00012965087890625,
      "model_forward_time": 0.11573219299316406,
      "step": 21242
    },
    {
      "epoch": 0.00012965087890625,
      "step": 21242,
      "training_step_time": 0.38831448554992676
    },
    {
      "epoch": 0.000129656982421875,
      "model_forward_time": 0.11524176597595215,
      "step": 21243
    },
    {
      "epoch": 0.000129656982421875,
      "step": 21243,
      "training_step_time": 0.4911386966705322
    },
    {
      "epoch": 0.0001296630859375,
      "model_forward_time": 0.11522889137268066,
      "step": 21244
    },
    {
      "epoch": 0.0001296630859375,
      "step": 21244,
      "training_step_time": 0.6908478736877441
    },
    {
      "epoch": 0.000129669189453125,
      "model_forward_time": 0.11542010307312012,
      "step": 21245
    },
    {
      "epoch": 0.000129669189453125,
      "step": 21245,
      "training_step_time": 0.4744589328765869
    },
    {
      "epoch": 0.00012967529296875,
      "model_forward_time": 0.11487579345703125,
      "step": 21246
    },
    {
      "epoch": 0.00012967529296875,
      "step": 21246,
      "training_step_time": 0.4840829372406006
    },
    {
      "epoch": 0.000129681396484375,
      "model_forward_time": 0.11475253105163574,
      "step": 21247
    },
    {
      "epoch": 0.000129681396484375,
      "step": 21247,
      "training_step_time": 0.3936624526977539
    },
    {
      "epoch": 0.0001296875,
      "model_forward_time": 0.11475372314453125,
      "step": 21248
    },
    {
      "epoch": 0.0001296875,
      "step": 21248,
      "training_step_time": 0.38361120223999023
    },
    {
      "epoch": 0.000129693603515625,
      "model_forward_time": 0.12210226058959961,
      "step": 21249
    },
    {
      "epoch": 0.000129693603515625,
      "step": 21249,
      "training_step_time": 0.38892436027526855
    },
    {
      "epoch": 0.00012969970703125,
      "grad_norm": 0.19710105657577515,
      "learning_rate": 7.676806783519304e-05,
      "loss": 0.0505,
      "step": 21250
    },
    {
      "epoch": 0.00012969970703125,
      "model_forward_time": 0.11701440811157227,
      "step": 21250
    },
    {
      "epoch": 0.00012969970703125,
      "step": 21250,
      "training_step_time": 0.45483851432800293
    },
    {
      "epoch": 0.000129705810546875,
      "model_forward_time": 0.11575174331665039,
      "step": 21251
    },
    {
      "epoch": 0.000129705810546875,
      "step": 21251,
      "training_step_time": 0.44609999656677246
    },
    {
      "epoch": 0.0001297119140625,
      "model_forward_time": 0.11565876007080078,
      "step": 21252
    },
    {
      "epoch": 0.0001297119140625,
      "step": 21252,
      "training_step_time": 0.44681334495544434
    },
    {
      "epoch": 0.000129718017578125,
      "model_forward_time": 0.11577987670898438,
      "step": 21253
    },
    {
      "epoch": 0.000129718017578125,
      "step": 21253,
      "training_step_time": 0.39353370666503906
    },
    {
      "epoch": 0.00012972412109375,
      "model_forward_time": 0.11615610122680664,
      "step": 21254
    },
    {
      "epoch": 0.00012972412109375,
      "step": 21254,
      "training_step_time": 0.3874044418334961
    },
    {
      "epoch": 0.000129730224609375,
      "model_forward_time": 0.11536312103271484,
      "step": 21255
    },
    {
      "epoch": 0.000129730224609375,
      "step": 21255,
      "training_step_time": 0.3867943286895752
    },
    {
      "epoch": 0.000129736328125,
      "model_forward_time": 0.1158287525177002,
      "step": 21256
    },
    {
      "epoch": 0.000129736328125,
      "step": 21256,
      "training_step_time": 0.5546493530273438
    },
    {
      "epoch": 0.000129742431640625,
      "model_forward_time": 0.11499953269958496,
      "step": 21257
    },
    {
      "epoch": 0.000129742431640625,
      "step": 21257,
      "training_step_time": 0.4183659553527832
    },
    {
      "epoch": 0.00012974853515625,
      "model_forward_time": 0.11504864692687988,
      "step": 21258
    },
    {
      "epoch": 0.00012974853515625,
      "step": 21258,
      "training_step_time": 0.48745226860046387
    },
    {
      "epoch": 0.000129754638671875,
      "model_forward_time": 0.11532068252563477,
      "step": 21259
    },
    {
      "epoch": 0.000129754638671875,
      "step": 21259,
      "training_step_time": 0.48974180221557617
    },
    {
      "epoch": 0.0001297607421875,
      "grad_norm": 0.10772999376058578,
      "learning_rate": 7.674478779000398e-05,
      "loss": 0.053,
      "step": 21260
    },
    {
      "epoch": 0.0001297607421875,
      "model_forward_time": 0.11546039581298828,
      "step": 21260
    },
    {
      "epoch": 0.0001297607421875,
      "step": 21260,
      "training_step_time": 0.48670434951782227
    },
    {
      "epoch": 0.000129766845703125,
      "model_forward_time": 0.11484360694885254,
      "step": 21261
    },
    {
      "epoch": 0.000129766845703125,
      "step": 21261,
      "training_step_time": 0.3883371353149414
    },
    {
      "epoch": 0.00012977294921875,
      "model_forward_time": 0.11443495750427246,
      "step": 21262
    },
    {
      "epoch": 0.00012977294921875,
      "step": 21262,
      "training_step_time": 0.39530086517333984
    },
    {
      "epoch": 0.000129779052734375,
      "model_forward_time": 0.11418986320495605,
      "step": 21263
    },
    {
      "epoch": 0.000129779052734375,
      "step": 21263,
      "training_step_time": 0.3932514190673828
    },
    {
      "epoch": 0.00012978515625,
      "model_forward_time": 0.11714434623718262,
      "step": 21264
    },
    {
      "epoch": 0.00012978515625,
      "step": 21264,
      "training_step_time": 0.40987563133239746
    },
    {
      "epoch": 0.000129791259765625,
      "model_forward_time": 0.11525249481201172,
      "step": 21265
    },
    {
      "epoch": 0.000129791259765625,
      "step": 21265,
      "training_step_time": 0.3990790843963623
    },
    {
      "epoch": 0.00012979736328125,
      "model_forward_time": 0.11479043960571289,
      "step": 21266
    },
    {
      "epoch": 0.00012979736328125,
      "step": 21266,
      "training_step_time": 0.42425966262817383
    },
    {
      "epoch": 0.000129803466796875,
      "model_forward_time": 0.11569619178771973,
      "step": 21267
    },
    {
      "epoch": 0.000129803466796875,
      "step": 21267,
      "training_step_time": 0.3904874324798584
    },
    {
      "epoch": 0.0001298095703125,
      "model_forward_time": 0.11464595794677734,
      "step": 21268
    },
    {
      "epoch": 0.0001298095703125,
      "step": 21268,
      "training_step_time": 0.6615478992462158
    },
    {
      "epoch": 0.000129815673828125,
      "model_forward_time": 0.11486411094665527,
      "step": 21269
    },
    {
      "epoch": 0.000129815673828125,
      "step": 21269,
      "training_step_time": 0.39538145065307617
    },
    {
      "epoch": 0.00012982177734375,
      "grad_norm": 0.11725031584501266,
      "learning_rate": 7.672149962045457e-05,
      "loss": 0.0547,
      "step": 21270
    },
    {
      "epoch": 0.00012982177734375,
      "model_forward_time": 0.11513137817382812,
      "step": 21270
    },
    {
      "epoch": 0.00012982177734375,
      "step": 21270,
      "training_step_time": 0.365264892578125
    },
    {
      "epoch": 0.000129827880859375,
      "model_forward_time": 0.11537599563598633,
      "step": 21271
    },
    {
      "epoch": 0.000129827880859375,
      "step": 21271,
      "training_step_time": 0.43416357040405273
    },
    {
      "epoch": 0.000129833984375,
      "model_forward_time": 0.11519670486450195,
      "step": 21272
    },
    {
      "epoch": 0.000129833984375,
      "step": 21272,
      "training_step_time": 0.41667675971984863
    },
    {
      "epoch": 0.000129840087890625,
      "model_forward_time": 0.11494016647338867,
      "step": 21273
    },
    {
      "epoch": 0.000129840087890625,
      "step": 21273,
      "training_step_time": 0.46430444717407227
    },
    {
      "epoch": 0.00012984619140625,
      "model_forward_time": 0.11578106880187988,
      "step": 21274
    },
    {
      "epoch": 0.00012984619140625,
      "step": 21274,
      "training_step_time": 0.4893918037414551
    },
    {
      "epoch": 0.000129852294921875,
      "model_forward_time": 0.11476874351501465,
      "step": 21275
    },
    {
      "epoch": 0.000129852294921875,
      "step": 21275,
      "training_step_time": 0.3863224983215332
    },
    {
      "epoch": 0.0001298583984375,
      "model_forward_time": 0.11499905586242676,
      "step": 21276
    },
    {
      "epoch": 0.0001298583984375,
      "step": 21276,
      "training_step_time": 0.39632368087768555
    },
    {
      "epoch": 0.000129864501953125,
      "model_forward_time": 0.11515688896179199,
      "step": 21277
    },
    {
      "epoch": 0.000129864501953125,
      "step": 21277,
      "training_step_time": 0.38852810859680176
    },
    {
      "epoch": 0.00012987060546875,
      "model_forward_time": 0.11488771438598633,
      "step": 21278
    },
    {
      "epoch": 0.00012987060546875,
      "step": 21278,
      "training_step_time": 0.42022275924682617
    },
    {
      "epoch": 0.000129876708984375,
      "model_forward_time": 0.11545777320861816,
      "step": 21279
    },
    {
      "epoch": 0.000129876708984375,
      "step": 21279,
      "training_step_time": 0.39327168464660645
    },
    {
      "epoch": 0.0001298828125,
      "grad_norm": 0.1275855302810669,
      "learning_rate": 7.66982033336191e-05,
      "loss": 0.0512,
      "step": 21280
    },
    {
      "epoch": 0.0001298828125,
      "model_forward_time": 0.1152803897857666,
      "step": 21280
    },
    {
      "epoch": 0.0001298828125,
      "step": 21280,
      "training_step_time": 0.6062617301940918
    },
    {
      "epoch": 0.000129888916015625,
      "model_forward_time": 0.11495518684387207,
      "step": 21281
    },
    {
      "epoch": 0.000129888916015625,
      "step": 21281,
      "training_step_time": 0.4026336669921875
    },
    {
      "epoch": 0.00012989501953125,
      "model_forward_time": 0.11482095718383789,
      "step": 21282
    },
    {
      "epoch": 0.00012989501953125,
      "step": 21282,
      "training_step_time": 0.38607311248779297
    },
    {
      "epoch": 0.000129901123046875,
      "model_forward_time": 0.11453127861022949,
      "step": 21283
    },
    {
      "epoch": 0.000129901123046875,
      "step": 21283,
      "training_step_time": 0.38622403144836426
    },
    {
      "epoch": 0.0001299072265625,
      "model_forward_time": 0.11574673652648926,
      "step": 21284
    },
    {
      "epoch": 0.0001299072265625,
      "step": 21284,
      "training_step_time": 0.3662083148956299
    },
    {
      "epoch": 0.000129913330078125,
      "model_forward_time": 0.11505270004272461,
      "step": 21285
    },
    {
      "epoch": 0.000129913330078125,
      "step": 21285,
      "training_step_time": 0.44327259063720703
    },
    {
      "epoch": 0.00012991943359375,
      "model_forward_time": 0.11566781997680664,
      "step": 21286
    },
    {
      "epoch": 0.00012991943359375,
      "step": 21286,
      "training_step_time": 0.47197771072387695
    },
    {
      "epoch": 0.000129925537109375,
      "model_forward_time": 0.11548709869384766,
      "step": 21287
    },
    {
      "epoch": 0.000129925537109375,
      "step": 21287,
      "training_step_time": 0.6443703174591064
    },
    {
      "epoch": 0.000129931640625,
      "model_forward_time": 0.11533999443054199,
      "step": 21288
    },
    {
      "epoch": 0.000129931640625,
      "step": 21288,
      "training_step_time": 0.4380500316619873
    },
    {
      "epoch": 0.000129937744140625,
      "model_forward_time": 0.11410999298095703,
      "step": 21289
    },
    {
      "epoch": 0.000129937744140625,
      "step": 21289,
      "training_step_time": 0.3852086067199707
    },
    {
      "epoch": 0.00012994384765625,
      "grad_norm": 0.18527744710445404,
      "learning_rate": 7.66748989365744e-05,
      "loss": 0.0493,
      "step": 21290
    },
    {
      "epoch": 0.00012994384765625,
      "model_forward_time": 0.11484456062316895,
      "step": 21290
    },
    {
      "epoch": 0.00012994384765625,
      "step": 21290,
      "training_step_time": 0.38976502418518066
    },
    {
      "epoch": 0.000129949951171875,
      "model_forward_time": 0.11462163925170898,
      "step": 21291
    },
    {
      "epoch": 0.000129949951171875,
      "step": 21291,
      "training_step_time": 0.446946382522583
    },
    {
      "epoch": 0.0001299560546875,
      "model_forward_time": 0.11508059501647949,
      "step": 21292
    },
    {
      "epoch": 0.0001299560546875,
      "step": 21292,
      "training_step_time": 0.4116971492767334
    },
    {
      "epoch": 0.000129962158203125,
      "model_forward_time": 0.11485552787780762,
      "step": 21293
    },
    {
      "epoch": 0.000129962158203125,
      "step": 21293,
      "training_step_time": 0.8791074752807617
    },
    {
      "epoch": 0.00012996826171875,
      "model_forward_time": 0.1145620346069336,
      "step": 21294
    },
    {
      "epoch": 0.00012996826171875,
      "step": 21294,
      "training_step_time": 0.3883087635040283
    },
    {
      "epoch": 0.000129974365234375,
      "model_forward_time": 0.11402297019958496,
      "step": 21295
    },
    {
      "epoch": 0.000129974365234375,
      "step": 21295,
      "training_step_time": 0.39105844497680664
    },
    {
      "epoch": 0.00012998046875,
      "model_forward_time": 0.11447834968566895,
      "step": 21296
    },
    {
      "epoch": 0.00012998046875,
      "step": 21296,
      "training_step_time": 0.3840298652648926
    },
    {
      "epoch": 0.000129986572265625,
      "model_forward_time": 0.11484098434448242,
      "step": 21297
    },
    {
      "epoch": 0.000129986572265625,
      "step": 21297,
      "training_step_time": 0.3985016345977783
    },
    {
      "epoch": 0.00012999267578125,
      "model_forward_time": 0.11487483978271484,
      "step": 21298
    },
    {
      "epoch": 0.00012999267578125,
      "step": 21298,
      "training_step_time": 0.45717859268188477
    },
    {
      "epoch": 0.000129998779296875,
      "model_forward_time": 0.11477494239807129,
      "step": 21299
    },
    {
      "epoch": 0.000129998779296875,
      "step": 21299,
      "training_step_time": 0.5924167633056641
    },
    {
      "epoch": 0.0001300048828125,
      "grad_norm": 0.12020794302225113,
      "learning_rate": 7.66515864363997e-05,
      "loss": 0.0525,
      "step": 21300
    },
    {
      "epoch": 0.0001300048828125,
      "model_forward_time": 0.11483526229858398,
      "step": 21300
    },
    {
      "epoch": 0.0001300048828125,
      "step": 21300,
      "training_step_time": 0.45630645751953125
    },
    {
      "epoch": 0.000130010986328125,
      "model_forward_time": 0.11489105224609375,
      "step": 21301
    },
    {
      "epoch": 0.000130010986328125,
      "step": 21301,
      "training_step_time": 0.3979959487915039
    },
    {
      "epoch": 0.00013001708984375,
      "model_forward_time": 0.11759567260742188,
      "step": 21302
    },
    {
      "epoch": 0.00013001708984375,
      "step": 21302,
      "training_step_time": 0.3894033432006836
    },
    {
      "epoch": 0.000130023193359375,
      "model_forward_time": 0.11457586288452148,
      "step": 21303
    },
    {
      "epoch": 0.000130023193359375,
      "step": 21303,
      "training_step_time": 0.4587700366973877
    },
    {
      "epoch": 0.000130029296875,
      "model_forward_time": 0.11564016342163086,
      "step": 21304
    },
    {
      "epoch": 0.000130029296875,
      "step": 21304,
      "training_step_time": 0.3939692974090576
    },
    {
      "epoch": 0.000130035400390625,
      "model_forward_time": 0.11523628234863281,
      "step": 21305
    },
    {
      "epoch": 0.000130035400390625,
      "step": 21305,
      "training_step_time": 0.5607354640960693
    },
    {
      "epoch": 0.00013004150390625,
      "model_forward_time": 0.11484599113464355,
      "step": 21306
    },
    {
      "epoch": 0.00013004150390625,
      "step": 21306,
      "training_step_time": 0.403519868850708
    },
    {
      "epoch": 0.000130047607421875,
      "model_forward_time": 0.11489200592041016,
      "step": 21307
    },
    {
      "epoch": 0.000130047607421875,
      "step": 21307,
      "training_step_time": 0.3892378807067871
    },
    {
      "epoch": 0.0001300537109375,
      "model_forward_time": 0.11457610130310059,
      "step": 21308
    },
    {
      "epoch": 0.0001300537109375,
      "step": 21308,
      "training_step_time": 0.40723156929016113
    },
    {
      "epoch": 0.000130059814453125,
      "model_forward_time": 0.11543679237365723,
      "step": 21309
    },
    {
      "epoch": 0.000130059814453125,
      "step": 21309,
      "training_step_time": 0.3892247676849365
    },
    {
      "epoch": 0.00013006591796875,
      "grad_norm": 0.13828304409980774,
      "learning_rate": 7.662826584017672e-05,
      "loss": 0.0408,
      "step": 21310
    },
    {
      "epoch": 0.00013006591796875,
      "model_forward_time": 0.11492419242858887,
      "step": 21310
    },
    {
      "epoch": 0.00013006591796875,
      "step": 21310,
      "training_step_time": 0.3987247943878174
    },
    {
      "epoch": 0.000130072021484375,
      "model_forward_time": 0.11577391624450684,
      "step": 21311
    },
    {
      "epoch": 0.000130072021484375,
      "step": 21311,
      "training_step_time": 0.6156055927276611
    },
    {
      "epoch": 0.000130078125,
      "model_forward_time": 0.11485791206359863,
      "step": 21312
    },
    {
      "epoch": 0.000130078125,
      "step": 21312,
      "training_step_time": 0.43999576568603516
    },
    {
      "epoch": 0.000130084228515625,
      "model_forward_time": 0.11501002311706543,
      "step": 21313
    },
    {
      "epoch": 0.000130084228515625,
      "step": 21313,
      "training_step_time": 0.49465513229370117
    },
    {
      "epoch": 0.00013009033203125,
      "model_forward_time": 0.11412906646728516,
      "step": 21314
    },
    {
      "epoch": 0.00013009033203125,
      "step": 21314,
      "training_step_time": 0.4189112186431885
    },
    {
      "epoch": 0.000130096435546875,
      "model_forward_time": 0.1146550178527832,
      "step": 21315
    },
    {
      "epoch": 0.000130096435546875,
      "step": 21315,
      "training_step_time": 0.38987278938293457
    },
    {
      "epoch": 0.0001301025390625,
      "model_forward_time": 0.11448526382446289,
      "step": 21316
    },
    {
      "epoch": 0.0001301025390625,
      "step": 21316,
      "training_step_time": 0.3953275680541992
    },
    {
      "epoch": 0.000130108642578125,
      "model_forward_time": 0.11430168151855469,
      "step": 21317
    },
    {
      "epoch": 0.000130108642578125,
      "step": 21317,
      "training_step_time": 0.39567112922668457
    },
    {
      "epoch": 0.00013011474609375,
      "model_forward_time": 0.11505937576293945,
      "step": 21318
    },
    {
      "epoch": 0.00013011474609375,
      "step": 21318,
      "training_step_time": 0.39415764808654785
    },
    {
      "epoch": 0.000130120849609375,
      "model_forward_time": 0.11507320404052734,
      "step": 21319
    },
    {
      "epoch": 0.000130120849609375,
      "step": 21319,
      "training_step_time": 0.39696192741394043
    },
    {
      "epoch": 0.000130126953125,
      "grad_norm": 0.1217249184846878,
      "learning_rate": 7.660493715498969e-05,
      "loss": 0.0448,
      "step": 21320
    },
    {
      "epoch": 0.000130126953125,
      "model_forward_time": 0.11523008346557617,
      "step": 21320
    },
    {
      "epoch": 0.000130126953125,
      "step": 21320,
      "training_step_time": 0.3830571174621582
    },
    {
      "epoch": 0.000130133056640625,
      "model_forward_time": 0.11587285995483398,
      "step": 21321
    },
    {
      "epoch": 0.000130133056640625,
      "step": 21321,
      "training_step_time": 0.3853323459625244
    },
    {
      "epoch": 0.00013013916015625,
      "model_forward_time": 0.1155092716217041,
      "step": 21322
    },
    {
      "epoch": 0.00013013916015625,
      "step": 21322,
      "training_step_time": 0.39372873306274414
    },
    {
      "epoch": 0.000130145263671875,
      "model_forward_time": 0.11500859260559082,
      "step": 21323
    },
    {
      "epoch": 0.000130145263671875,
      "step": 21323,
      "training_step_time": 0.5819010734558105
    },
    {
      "epoch": 0.0001301513671875,
      "model_forward_time": 0.1147010326385498,
      "step": 21324
    },
    {
      "epoch": 0.0001301513671875,
      "step": 21324,
      "training_step_time": 0.38927745819091797
    },
    {
      "epoch": 0.000130157470703125,
      "model_forward_time": 0.11512088775634766,
      "step": 21325
    },
    {
      "epoch": 0.000130157470703125,
      "step": 21325,
      "training_step_time": 0.36705517768859863
    },
    {
      "epoch": 0.00013016357421875,
      "model_forward_time": 0.11517715454101562,
      "step": 21326
    },
    {
      "epoch": 0.00013016357421875,
      "step": 21326,
      "training_step_time": 0.41571474075317383
    },
    {
      "epoch": 0.000130169677734375,
      "model_forward_time": 0.11487030982971191,
      "step": 21327
    },
    {
      "epoch": 0.000130169677734375,
      "step": 21327,
      "training_step_time": 0.4978902339935303
    },
    {
      "epoch": 0.00013017578125,
      "model_forward_time": 0.11435127258300781,
      "step": 21328
    },
    {
      "epoch": 0.00013017578125,
      "step": 21328,
      "training_step_time": 0.4901440143585205
    },
    {
      "epoch": 0.000130181884765625,
      "model_forward_time": 0.11478590965270996,
      "step": 21329
    },
    {
      "epoch": 0.000130181884765625,
      "step": 21329,
      "training_step_time": 0.44432663917541504
    },
    {
      "epoch": 0.00013018798828125,
      "grad_norm": 0.14846469461917877,
      "learning_rate": 7.658160038792518e-05,
      "loss": 0.0446,
      "step": 21330
    },
    {
      "epoch": 0.00013018798828125,
      "model_forward_time": 0.1150200366973877,
      "step": 21330
    },
    {
      "epoch": 0.00013018798828125,
      "step": 21330,
      "training_step_time": 0.40418267250061035
    },
    {
      "epoch": 0.000130194091796875,
      "model_forward_time": 0.11534285545349121,
      "step": 21331
    },
    {
      "epoch": 0.000130194091796875,
      "step": 21331,
      "training_step_time": 0.4019777774810791
    },
    {
      "epoch": 0.0001302001953125,
      "model_forward_time": 0.11478638648986816,
      "step": 21332
    },
    {
      "epoch": 0.0001302001953125,
      "step": 21332,
      "training_step_time": 0.3832247257232666
    },
    {
      "epoch": 0.000130206298828125,
      "model_forward_time": 0.11478662490844727,
      "step": 21333
    },
    {
      "epoch": 0.000130206298828125,
      "step": 21333,
      "training_step_time": 0.38832569122314453
    },
    {
      "epoch": 0.00013021240234375,
      "model_forward_time": 0.11534237861633301,
      "step": 21334
    },
    {
      "epoch": 0.00013021240234375,
      "step": 21334,
      "training_step_time": 0.39231061935424805
    },
    {
      "epoch": 0.000130218505859375,
      "model_forward_time": 0.11483955383300781,
      "step": 21335
    },
    {
      "epoch": 0.000130218505859375,
      "step": 21335,
      "training_step_time": 0.4849205017089844
    },
    {
      "epoch": 0.000130224609375,
      "model_forward_time": 0.1150367259979248,
      "step": 21336
    },
    {
      "epoch": 0.000130224609375,
      "step": 21336,
      "training_step_time": 0.3993968963623047
    },
    {
      "epoch": 0.000130230712890625,
      "model_forward_time": 0.11514902114868164,
      "step": 21337
    },
    {
      "epoch": 0.000130230712890625,
      "step": 21337,
      "training_step_time": 0.39923572540283203
    },
    {
      "epoch": 0.00013023681640625,
      "model_forward_time": 0.11534404754638672,
      "step": 21338
    },
    {
      "epoch": 0.00013023681640625,
      "step": 21338,
      "training_step_time": 0.3856940269470215
    },
    {
      "epoch": 0.000130242919921875,
      "model_forward_time": 0.11457228660583496,
      "step": 21339
    },
    {
      "epoch": 0.000130242919921875,
      "step": 21339,
      "training_step_time": 0.3841722011566162
    },
    {
      "epoch": 0.0001302490234375,
      "grad_norm": 0.15889640152454376,
      "learning_rate": 7.655825554607235e-05,
      "loss": 0.0505,
      "step": 21340
    },
    {
      "epoch": 0.0001302490234375,
      "model_forward_time": 0.11545467376708984,
      "step": 21340
    },
    {
      "epoch": 0.0001302490234375,
      "step": 21340,
      "training_step_time": 0.46977996826171875
    },
    {
      "epoch": 0.000130255126953125,
      "model_forward_time": 0.1153261661529541,
      "step": 21341
    },
    {
      "epoch": 0.000130255126953125,
      "step": 21341,
      "training_step_time": 0.5282535552978516
    },
    {
      "epoch": 0.00013026123046875,
      "model_forward_time": 0.11504030227661133,
      "step": 21342
    },
    {
      "epoch": 0.00013026123046875,
      "step": 21342,
      "training_step_time": 0.4725668430328369
    },
    {
      "epoch": 0.000130267333984375,
      "model_forward_time": 0.11467862129211426,
      "step": 21343
    },
    {
      "epoch": 0.000130267333984375,
      "step": 21343,
      "training_step_time": 0.4369027614593506
    },
    {
      "epoch": 0.0001302734375,
      "model_forward_time": 0.11505675315856934,
      "step": 21344
    },
    {
      "epoch": 0.0001302734375,
      "step": 21344,
      "training_step_time": 0.3973960876464844
    },
    {
      "epoch": 0.000130279541015625,
      "model_forward_time": 0.11508035659790039,
      "step": 21345
    },
    {
      "epoch": 0.000130279541015625,
      "step": 21345,
      "training_step_time": 0.37850284576416016
    },
    {
      "epoch": 0.00013028564453125,
      "model_forward_time": 0.1157686710357666,
      "step": 21346
    },
    {
      "epoch": 0.00013028564453125,
      "step": 21346,
      "training_step_time": 0.3864116668701172
    },
    {
      "epoch": 0.000130291748046875,
      "model_forward_time": 0.11572790145874023,
      "step": 21347
    },
    {
      "epoch": 0.000130291748046875,
      "step": 21347,
      "training_step_time": 0.47405481338500977
    },
    {
      "epoch": 0.0001302978515625,
      "model_forward_time": 0.11477065086364746,
      "step": 21348
    },
    {
      "epoch": 0.0001302978515625,
      "step": 21348,
      "training_step_time": 0.3932201862335205
    },
    {
      "epoch": 0.000130303955078125,
      "model_forward_time": 0.11514592170715332,
      "step": 21349
    },
    {
      "epoch": 0.000130303955078125,
      "step": 21349,
      "training_step_time": 0.4000680446624756
    },
    {
      "epoch": 0.00013031005859375,
      "grad_norm": 0.14148423075675964,
      "learning_rate": 7.653490263652269e-05,
      "loss": 0.0454,
      "step": 21350
    },
    {
      "epoch": 0.00013031005859375,
      "model_forward_time": 0.11602377891540527,
      "step": 21350
    },
    {
      "epoch": 0.00013031005859375,
      "step": 21350,
      "training_step_time": 0.3870551586151123
    },
    {
      "epoch": 0.000130316162109375,
      "model_forward_time": 0.11568689346313477,
      "step": 21351
    },
    {
      "epoch": 0.000130316162109375,
      "step": 21351,
      "training_step_time": 0.39401984214782715
    },
    {
      "epoch": 0.000130322265625,
      "model_forward_time": 0.11603236198425293,
      "step": 21352
    },
    {
      "epoch": 0.000130322265625,
      "step": 21352,
      "training_step_time": 0.39595460891723633
    },
    {
      "epoch": 0.000130328369140625,
      "model_forward_time": 0.11477088928222656,
      "step": 21353
    },
    {
      "epoch": 0.000130328369140625,
      "step": 21353,
      "training_step_time": 0.5494194030761719
    },
    {
      "epoch": 0.00013033447265625,
      "model_forward_time": 0.11590838432312012,
      "step": 21354
    },
    {
      "epoch": 0.00013033447265625,
      "step": 21354,
      "training_step_time": 0.4046957492828369
    },
    {
      "epoch": 0.000130340576171875,
      "model_forward_time": 0.11491107940673828,
      "step": 21355
    },
    {
      "epoch": 0.000130340576171875,
      "step": 21355,
      "training_step_time": 0.49468088150024414
    },
    {
      "epoch": 0.0001303466796875,
      "model_forward_time": 0.11467432975769043,
      "step": 21356
    },
    {
      "epoch": 0.0001303466796875,
      "step": 21356,
      "training_step_time": 0.5035145282745361
    },
    {
      "epoch": 0.000130352783203125,
      "model_forward_time": 0.11449050903320312,
      "step": 21357
    },
    {
      "epoch": 0.000130352783203125,
      "step": 21357,
      "training_step_time": 0.41738128662109375
    },
    {
      "epoch": 0.00013035888671875,
      "model_forward_time": 0.11451363563537598,
      "step": 21358
    },
    {
      "epoch": 0.00013035888671875,
      "step": 21358,
      "training_step_time": 0.3904256820678711
    },
    {
      "epoch": 0.000130364990234375,
      "model_forward_time": 0.11433577537536621,
      "step": 21359
    },
    {
      "epoch": 0.000130364990234375,
      "step": 21359,
      "training_step_time": 0.39626336097717285
    },
    {
      "epoch": 0.00013037109375,
      "grad_norm": 0.17137062549591064,
      "learning_rate": 7.651154166637025e-05,
      "loss": 0.0535,
      "step": 21360
    },
    {
      "epoch": 0.00013037109375,
      "model_forward_time": 0.11464405059814453,
      "step": 21360
    },
    {
      "epoch": 0.00013037109375,
      "step": 21360,
      "training_step_time": 0.38928723335266113
    },
    {
      "epoch": 0.000130377197265625,
      "model_forward_time": 0.11539435386657715,
      "step": 21361
    },
    {
      "epoch": 0.000130377197265625,
      "step": 21361,
      "training_step_time": 0.3885514736175537
    },
    {
      "epoch": 0.00013038330078125,
      "model_forward_time": 0.11482906341552734,
      "step": 21362
    },
    {
      "epoch": 0.00013038330078125,
      "step": 21362,
      "training_step_time": 0.37694525718688965
    },
    {
      "epoch": 0.000130389404296875,
      "model_forward_time": 0.11557579040527344,
      "step": 21363
    },
    {
      "epoch": 0.000130389404296875,
      "step": 21363,
      "training_step_time": 0.3858065605163574
    },
    {
      "epoch": 0.0001303955078125,
      "model_forward_time": 0.11546874046325684,
      "step": 21364
    },
    {
      "epoch": 0.0001303955078125,
      "step": 21364,
      "training_step_time": 0.385530948638916
    },
    {
      "epoch": 0.000130401611328125,
      "model_forward_time": 0.11603808403015137,
      "step": 21365
    },
    {
      "epoch": 0.000130401611328125,
      "step": 21365,
      "training_step_time": 0.526278018951416
    },
    {
      "epoch": 0.00013040771484375,
      "model_forward_time": 0.1151723861694336,
      "step": 21366
    },
    {
      "epoch": 0.00013040771484375,
      "step": 21366,
      "training_step_time": 0.38840794563293457
    },
    {
      "epoch": 0.000130413818359375,
      "model_forward_time": 0.11539149284362793,
      "step": 21367
    },
    {
      "epoch": 0.000130413818359375,
      "step": 21367,
      "training_step_time": 0.39054393768310547
    },
    {
      "epoch": 0.000130419921875,
      "model_forward_time": 0.1148381233215332,
      "step": 21368
    },
    {
      "epoch": 0.000130419921875,
      "step": 21368,
      "training_step_time": 0.3840029239654541
    },
    {
      "epoch": 0.000130426025390625,
      "model_forward_time": 0.11530804634094238,
      "step": 21369
    },
    {
      "epoch": 0.000130426025390625,
      "step": 21369,
      "training_step_time": 0.3889048099517822
    },
    {
      "epoch": 0.00013043212890625,
      "grad_norm": 0.11664433032274246,
      "learning_rate": 7.648817264271142e-05,
      "loss": 0.0491,
      "step": 21370
    },
    {
      "epoch": 0.00013043212890625,
      "model_forward_time": 0.1152493953704834,
      "step": 21370
    },
    {
      "epoch": 0.00013043212890625,
      "step": 21370,
      "training_step_time": 0.47974538803100586
    },
    {
      "epoch": 0.000130438232421875,
      "model_forward_time": 0.11504912376403809,
      "step": 21371
    },
    {
      "epoch": 0.000130438232421875,
      "step": 21371,
      "training_step_time": 0.6543259620666504
    },
    {
      "epoch": 0.0001304443359375,
      "model_forward_time": 0.11499667167663574,
      "step": 21372
    },
    {
      "epoch": 0.0001304443359375,
      "step": 21372,
      "training_step_time": 0.3852417469024658
    },
    {
      "epoch": 0.000130450439453125,
      "model_forward_time": 0.11452531814575195,
      "step": 21373
    },
    {
      "epoch": 0.000130450439453125,
      "step": 21373,
      "training_step_time": 0.3955068588256836
    },
    {
      "epoch": 0.00013045654296875,
      "model_forward_time": 0.1146707534790039,
      "step": 21374
    },
    {
      "epoch": 0.00013045654296875,
      "step": 21374,
      "training_step_time": 0.3896329402923584
    },
    {
      "epoch": 0.000130462646484375,
      "model_forward_time": 0.1147623062133789,
      "step": 21375
    },
    {
      "epoch": 0.000130462646484375,
      "step": 21375,
      "training_step_time": 0.38260579109191895
    },
    {
      "epoch": 0.00013046875,
      "model_forward_time": 0.11489272117614746,
      "step": 21376
    },
    {
      "epoch": 0.00013046875,
      "step": 21376,
      "training_step_time": 0.39528393745422363
    },
    {
      "epoch": 0.000130474853515625,
      "model_forward_time": 0.11419367790222168,
      "step": 21377
    },
    {
      "epoch": 0.000130474853515625,
      "step": 21377,
      "training_step_time": 0.49515461921691895
    },
    {
      "epoch": 0.00013048095703125,
      "model_forward_time": 0.1145925521850586,
      "step": 21378
    },
    {
      "epoch": 0.00013048095703125,
      "step": 21378,
      "training_step_time": 0.3934497833251953
    },
    {
      "epoch": 0.000130487060546875,
      "model_forward_time": 0.11538243293762207,
      "step": 21379
    },
    {
      "epoch": 0.000130487060546875,
      "step": 21379,
      "training_step_time": 0.3929762840270996
    },
    {
      "epoch": 0.0001304931640625,
      "grad_norm": 0.15234766900539398,
      "learning_rate": 7.646479557264513e-05,
      "loss": 0.0489,
      "step": 21380
    },
    {
      "epoch": 0.0001304931640625,
      "model_forward_time": 0.11474299430847168,
      "step": 21380
    },
    {
      "epoch": 0.0001304931640625,
      "step": 21380,
      "training_step_time": 0.3858504295349121
    },
    {
      "epoch": 0.000130499267578125,
      "model_forward_time": 0.11537909507751465,
      "step": 21381
    },
    {
      "epoch": 0.000130499267578125,
      "step": 21381,
      "training_step_time": 0.38356947898864746
    },
    {
      "epoch": 0.00013050537109375,
      "model_forward_time": 0.1159660816192627,
      "step": 21382
    },
    {
      "epoch": 0.00013050537109375,
      "step": 21382,
      "training_step_time": 0.5295913219451904
    },
    {
      "epoch": 0.000130511474609375,
      "model_forward_time": 0.11517763137817383,
      "step": 21383
    },
    {
      "epoch": 0.000130511474609375,
      "step": 21383,
      "training_step_time": 0.38875341415405273
    },
    {
      "epoch": 0.000130517578125,
      "model_forward_time": 0.11755824089050293,
      "step": 21384
    },
    {
      "epoch": 0.000130517578125,
      "step": 21384,
      "training_step_time": 0.4846177101135254
    },
    {
      "epoch": 0.000130523681640625,
      "model_forward_time": 0.11783289909362793,
      "step": 21385
    },
    {
      "epoch": 0.000130523681640625,
      "step": 21385,
      "training_step_time": 0.4579298496246338
    },
    {
      "epoch": 0.00013052978515625,
      "model_forward_time": 0.11786532402038574,
      "step": 21386
    },
    {
      "epoch": 0.00013052978515625,
      "step": 21386,
      "training_step_time": 0.3868420124053955
    },
    {
      "epoch": 0.000130535888671875,
      "model_forward_time": 0.11781907081604004,
      "step": 21387
    },
    {
      "epoch": 0.000130535888671875,
      "step": 21387,
      "training_step_time": 0.3830902576446533
    },
    {
      "epoch": 0.0001305419921875,
      "model_forward_time": 0.1197202205657959,
      "step": 21388
    },
    {
      "epoch": 0.0001305419921875,
      "step": 21388,
      "training_step_time": 0.4691026210784912
    },
    {
      "epoch": 0.000130548095703125,
      "model_forward_time": 0.11611294746398926,
      "step": 21389
    },
    {
      "epoch": 0.000130548095703125,
      "step": 21389,
      "training_step_time": 0.37557172775268555
    },
    {
      "epoch": 0.00013055419921875,
      "grad_norm": 0.23319830000400543,
      "learning_rate": 7.644141046327271e-05,
      "loss": 0.0464,
      "step": 21390
    },
    {
      "epoch": 0.00013055419921875,
      "model_forward_time": 0.11506199836730957,
      "step": 21390
    },
    {
      "epoch": 0.00013055419921875,
      "step": 21390,
      "training_step_time": 0.3845503330230713
    },
    {
      "epoch": 0.000130560302734375,
      "model_forward_time": 0.11530947685241699,
      "step": 21391
    },
    {
      "epoch": 0.000130560302734375,
      "step": 21391,
      "training_step_time": 0.39238953590393066
    },
    {
      "epoch": 0.00013056640625,
      "model_forward_time": 0.11609578132629395,
      "step": 21392
    },
    {
      "epoch": 0.00013056640625,
      "step": 21392,
      "training_step_time": 0.3785073757171631
    },
    {
      "epoch": 0.000130572509765625,
      "model_forward_time": 0.11540031433105469,
      "step": 21393
    },
    {
      "epoch": 0.000130572509765625,
      "step": 21393,
      "training_step_time": 0.3900794982910156
    },
    {
      "epoch": 0.00013057861328125,
      "model_forward_time": 0.11552739143371582,
      "step": 21394
    },
    {
      "epoch": 0.00013057861328125,
      "step": 21394,
      "training_step_time": 0.7900998592376709
    },
    {
      "epoch": 0.000130584716796875,
      "model_forward_time": 0.11507010459899902,
      "step": 21395
    },
    {
      "epoch": 0.000130584716796875,
      "step": 21395,
      "training_step_time": 0.38652873039245605
    },
    {
      "epoch": 0.0001305908203125,
      "model_forward_time": 0.1152496337890625,
      "step": 21396
    },
    {
      "epoch": 0.0001305908203125,
      "step": 21396,
      "training_step_time": 0.4044778347015381
    },
    {
      "epoch": 0.000130596923828125,
      "model_forward_time": 0.11538529396057129,
      "step": 21397
    },
    {
      "epoch": 0.000130596923828125,
      "step": 21397,
      "training_step_time": 0.36266160011291504
    },
    {
      "epoch": 0.00013060302734375,
      "model_forward_time": 0.11455202102661133,
      "step": 21398
    },
    {
      "epoch": 0.00013060302734375,
      "step": 21398,
      "training_step_time": 0.4215874671936035
    },
    {
      "epoch": 0.000130609130859375,
      "model_forward_time": 0.11608076095581055,
      "step": 21399
    },
    {
      "epoch": 0.000130609130859375,
      "step": 21399,
      "training_step_time": 0.47469353675842285
    },
    {
      "epoch": 0.000130615234375,
      "grad_norm": 0.1576130986213684,
      "learning_rate": 7.641801732169795e-05,
      "loss": 0.0513,
      "step": 21400
    },
    {
      "epoch": 0.000130615234375,
      "model_forward_time": 0.11510133743286133,
      "step": 21400
    },
    {
      "epoch": 0.000130615234375,
      "step": 21400,
      "training_step_time": 0.5416522026062012
    },
    {
      "epoch": 0.000130621337890625,
      "model_forward_time": 0.11448240280151367,
      "step": 21401
    },
    {
      "epoch": 0.000130621337890625,
      "step": 21401,
      "training_step_time": 0.3895435333251953
    },
    {
      "epoch": 0.00013062744140625,
      "model_forward_time": 0.1143639087677002,
      "step": 21402
    },
    {
      "epoch": 0.00013062744140625,
      "step": 21402,
      "training_step_time": 0.386122465133667
    },
    {
      "epoch": 0.000130633544921875,
      "model_forward_time": 0.1150813102722168,
      "step": 21403
    },
    {
      "epoch": 0.000130633544921875,
      "step": 21403,
      "training_step_time": 0.3849670886993408
    },
    {
      "epoch": 0.0001306396484375,
      "model_forward_time": 0.11496853828430176,
      "step": 21404
    },
    {
      "epoch": 0.0001306396484375,
      "step": 21404,
      "training_step_time": 0.3799247741699219
    },
    {
      "epoch": 0.000130645751953125,
      "model_forward_time": 0.11620759963989258,
      "step": 21405
    },
    {
      "epoch": 0.000130645751953125,
      "step": 21405,
      "training_step_time": 0.3671729564666748
    },
    {
      "epoch": 0.00013065185546875,
      "model_forward_time": 0.11513352394104004,
      "step": 21406
    },
    {
      "epoch": 0.00013065185546875,
      "step": 21406,
      "training_step_time": 0.7089309692382812
    },
    {
      "epoch": 0.000130657958984375,
      "model_forward_time": 0.11558723449707031,
      "step": 21407
    },
    {
      "epoch": 0.000130657958984375,
      "step": 21407,
      "training_step_time": 0.387606143951416
    },
    {
      "epoch": 0.0001306640625,
      "model_forward_time": 0.11512517929077148,
      "step": 21408
    },
    {
      "epoch": 0.0001306640625,
      "step": 21408,
      "training_step_time": 0.39051055908203125
    },
    {
      "epoch": 0.000130670166015625,
      "model_forward_time": 0.11540079116821289,
      "step": 21409
    },
    {
      "epoch": 0.000130670166015625,
      "step": 21409,
      "training_step_time": 0.4278984069824219
    },
    {
      "epoch": 0.00013067626953125,
      "grad_norm": 0.2443644404411316,
      "learning_rate": 7.639461615502704e-05,
      "loss": 0.0484,
      "step": 21410
    },
    {
      "epoch": 0.00013067626953125,
      "model_forward_time": 0.11423015594482422,
      "step": 21410
    },
    {
      "epoch": 0.00013067626953125,
      "step": 21410,
      "training_step_time": 0.40273022651672363
    },
    {
      "epoch": 0.000130682373046875,
      "model_forward_time": 0.11450028419494629,
      "step": 21411
    },
    {
      "epoch": 0.000130682373046875,
      "step": 21411,
      "training_step_time": 0.36401867866516113
    },
    {
      "epoch": 0.0001306884765625,
      "model_forward_time": 0.11556363105773926,
      "step": 21412
    },
    {
      "epoch": 0.0001306884765625,
      "step": 21412,
      "training_step_time": 0.574202299118042
    },
    {
      "epoch": 0.000130694580078125,
      "model_forward_time": 0.11562442779541016,
      "step": 21413
    },
    {
      "epoch": 0.000130694580078125,
      "step": 21413,
      "training_step_time": 0.45409679412841797
    },
    {
      "epoch": 0.00013070068359375,
      "model_forward_time": 0.11498355865478516,
      "step": 21414
    },
    {
      "epoch": 0.00013070068359375,
      "step": 21414,
      "training_step_time": 0.37720608711242676
    },
    {
      "epoch": 0.000130706787109375,
      "model_forward_time": 0.11546516418457031,
      "step": 21415
    },
    {
      "epoch": 0.000130706787109375,
      "step": 21415,
      "training_step_time": 0.3763265609741211
    },
    {
      "epoch": 0.000130712890625,
      "model_forward_time": 0.11539626121520996,
      "step": 21416
    },
    {
      "epoch": 0.000130712890625,
      "step": 21416,
      "training_step_time": 0.3796522617340088
    },
    {
      "epoch": 0.000130718994140625,
      "model_forward_time": 0.11493062973022461,
      "step": 21417
    },
    {
      "epoch": 0.000130718994140625,
      "step": 21417,
      "training_step_time": 0.37676358222961426
    },
    {
      "epoch": 0.00013072509765625,
      "model_forward_time": 0.11557483673095703,
      "step": 21418
    },
    {
      "epoch": 0.00013072509765625,
      "step": 21418,
      "training_step_time": 0.5861070156097412
    },
    {
      "epoch": 0.000130731201171875,
      "model_forward_time": 0.11641144752502441,
      "step": 21419
    },
    {
      "epoch": 0.000130731201171875,
      "step": 21419,
      "training_step_time": 0.38603878021240234
    },
    {
      "epoch": 0.0001307373046875,
      "grad_norm": 0.15759196877479553,
      "learning_rate": 7.637120697036866e-05,
      "loss": 0.0469,
      "step": 21420
    },
    {
      "epoch": 0.0001307373046875,
      "model_forward_time": 0.11478781700134277,
      "step": 21420
    },
    {
      "epoch": 0.0001307373046875,
      "step": 21420,
      "training_step_time": 0.38631415367126465
    },
    {
      "epoch": 0.000130743408203125,
      "model_forward_time": 0.11526036262512207,
      "step": 21421
    },
    {
      "epoch": 0.000130743408203125,
      "step": 21421,
      "training_step_time": 0.3897097110748291
    },
    {
      "epoch": 0.00013074951171875,
      "model_forward_time": 0.11515307426452637,
      "step": 21422
    },
    {
      "epoch": 0.00013074951171875,
      "step": 21422,
      "training_step_time": 0.40134596824645996
    },
    {
      "epoch": 0.000130755615234375,
      "model_forward_time": 0.11535024642944336,
      "step": 21423
    },
    {
      "epoch": 0.000130755615234375,
      "step": 21423,
      "training_step_time": 0.45227718353271484
    },
    {
      "epoch": 0.00013076171875,
      "model_forward_time": 0.11502647399902344,
      "step": 21424
    },
    {
      "epoch": 0.00013076171875,
      "step": 21424,
      "training_step_time": 0.7008850574493408
    },
    {
      "epoch": 0.000130767822265625,
      "model_forward_time": 0.11527895927429199,
      "step": 21425
    },
    {
      "epoch": 0.000130767822265625,
      "step": 21425,
      "training_step_time": 0.4055290222167969
    },
    {
      "epoch": 0.00013077392578125,
      "model_forward_time": 0.11505603790283203,
      "step": 21426
    },
    {
      "epoch": 0.00013077392578125,
      "step": 21426,
      "training_step_time": 0.4641697406768799
    },
    {
      "epoch": 0.000130780029296875,
      "model_forward_time": 0.11481332778930664,
      "step": 21427
    },
    {
      "epoch": 0.000130780029296875,
      "step": 21427,
      "training_step_time": 0.41152167320251465
    },
    {
      "epoch": 0.0001307861328125,
      "model_forward_time": 0.11438512802124023,
      "step": 21428
    },
    {
      "epoch": 0.0001307861328125,
      "step": 21428,
      "training_step_time": 0.4141397476196289
    },
    {
      "epoch": 0.000130792236328125,
      "model_forward_time": 0.11442852020263672,
      "step": 21429
    },
    {
      "epoch": 0.000130792236328125,
      "step": 21429,
      "training_step_time": 0.3854539394378662
    },
    {
      "epoch": 0.00013079833984375,
      "grad_norm": 0.17607377469539642,
      "learning_rate": 7.634778977483389e-05,
      "loss": 0.05,
      "step": 21430
    },
    {
      "epoch": 0.00013079833984375,
      "model_forward_time": 0.11480712890625,
      "step": 21430
    },
    {
      "epoch": 0.00013079833984375,
      "step": 21430,
      "training_step_time": 0.43662118911743164
    },
    {
      "epoch": 0.000130804443359375,
      "model_forward_time": 0.11519813537597656,
      "step": 21431
    },
    {
      "epoch": 0.000130804443359375,
      "step": 21431,
      "training_step_time": 0.3809356689453125
    },
    {
      "epoch": 0.000130810546875,
      "model_forward_time": 0.1153724193572998,
      "step": 21432
    },
    {
      "epoch": 0.000130810546875,
      "step": 21432,
      "training_step_time": 0.400848388671875
    },
    {
      "epoch": 0.000130816650390625,
      "model_forward_time": 0.11508417129516602,
      "step": 21433
    },
    {
      "epoch": 0.000130816650390625,
      "step": 21433,
      "training_step_time": 0.40299415588378906
    },
    {
      "epoch": 0.00013082275390625,
      "model_forward_time": 0.11506319046020508,
      "step": 21434
    },
    {
      "epoch": 0.00013082275390625,
      "step": 21434,
      "training_step_time": 0.39484477043151855
    },
    {
      "epoch": 0.000130828857421875,
      "model_forward_time": 0.11515593528747559,
      "step": 21435
    },
    {
      "epoch": 0.000130828857421875,
      "step": 21435,
      "training_step_time": 0.39304566383361816
    },
    {
      "epoch": 0.0001308349609375,
      "model_forward_time": 0.11629986763000488,
      "step": 21436
    },
    {
      "epoch": 0.0001308349609375,
      "step": 21436,
      "training_step_time": 0.7374694347381592
    },
    {
      "epoch": 0.000130841064453125,
      "model_forward_time": 0.11571073532104492,
      "step": 21437
    },
    {
      "epoch": 0.000130841064453125,
      "step": 21437,
      "training_step_time": 0.3868598937988281
    },
    {
      "epoch": 0.00013084716796875,
      "model_forward_time": 0.11514687538146973,
      "step": 21438
    },
    {
      "epoch": 0.00013084716796875,
      "step": 21438,
      "training_step_time": 0.39820027351379395
    },
    {
      "epoch": 0.000130853271484375,
      "model_forward_time": 0.1149299144744873,
      "step": 21439
    },
    {
      "epoch": 0.000130853271484375,
      "step": 21439,
      "training_step_time": 0.36475324630737305
    },
    {
      "epoch": 0.000130859375,
      "grad_norm": 0.23914484679698944,
      "learning_rate": 7.632436457553625e-05,
      "loss": 0.05,
      "step": 21440
    },
    {
      "epoch": 0.000130859375,
      "model_forward_time": 0.11458945274353027,
      "step": 21440
    },
    {
      "epoch": 0.000130859375,
      "step": 21440,
      "training_step_time": 0.43760156631469727
    },
    {
      "epoch": 0.000130865478515625,
      "model_forward_time": 0.11499953269958496,
      "step": 21441
    },
    {
      "epoch": 0.000130865478515625,
      "step": 21441,
      "training_step_time": 0.4789252281188965
    },
    {
      "epoch": 0.00013087158203125,
      "model_forward_time": 0.11540603637695312,
      "step": 21442
    },
    {
      "epoch": 0.00013087158203125,
      "step": 21442,
      "training_step_time": 0.5422420501708984
    },
    {
      "epoch": 0.000130877685546875,
      "model_forward_time": 0.11505818367004395,
      "step": 21443
    },
    {
      "epoch": 0.000130877685546875,
      "step": 21443,
      "training_step_time": 0.38427257537841797
    },
    {
      "epoch": 0.0001308837890625,
      "model_forward_time": 0.11516976356506348,
      "step": 21444
    },
    {
      "epoch": 0.0001308837890625,
      "step": 21444,
      "training_step_time": 0.3934628963470459
    },
    {
      "epoch": 0.000130889892578125,
      "model_forward_time": 0.11504220962524414,
      "step": 21445
    },
    {
      "epoch": 0.000130889892578125,
      "step": 21445,
      "training_step_time": 0.38729190826416016
    },
    {
      "epoch": 0.00013089599609375,
      "model_forward_time": 0.11512994766235352,
      "step": 21446
    },
    {
      "epoch": 0.00013089599609375,
      "step": 21446,
      "training_step_time": 0.3794827461242676
    },
    {
      "epoch": 0.000130902099609375,
      "model_forward_time": 0.11603593826293945,
      "step": 21447
    },
    {
      "epoch": 0.000130902099609375,
      "step": 21447,
      "training_step_time": 0.39006805419921875
    },
    {
      "epoch": 0.000130908203125,
      "model_forward_time": 0.11603951454162598,
      "step": 21448
    },
    {
      "epoch": 0.000130908203125,
      "step": 21448,
      "training_step_time": 0.7202470302581787
    },
    {
      "epoch": 0.000130914306640625,
      "model_forward_time": 0.11572909355163574,
      "step": 21449
    },
    {
      "epoch": 0.000130914306640625,
      "step": 21449,
      "training_step_time": 0.3810257911682129
    },
    {
      "epoch": 0.00013092041015625,
      "grad_norm": 0.17368406057357788,
      "learning_rate": 7.630093137959171e-05,
      "loss": 0.0506,
      "step": 21450
    },
    {
      "epoch": 0.00013092041015625,
      "model_forward_time": 0.1148219108581543,
      "step": 21450
    },
    {
      "epoch": 0.00013092041015625,
      "step": 21450,
      "training_step_time": 0.39594244956970215
    },
    {
      "epoch": 0.000130926513671875,
      "model_forward_time": 0.11477303504943848,
      "step": 21451
    },
    {
      "epoch": 0.000130926513671875,
      "step": 21451,
      "training_step_time": 0.3875732421875
    },
    {
      "epoch": 0.0001309326171875,
      "model_forward_time": 0.11507058143615723,
      "step": 21452
    },
    {
      "epoch": 0.0001309326171875,
      "step": 21452,
      "training_step_time": 0.37958741188049316
    },
    {
      "epoch": 0.000130938720703125,
      "model_forward_time": 0.11418652534484863,
      "step": 21453
    },
    {
      "epoch": 0.000130938720703125,
      "step": 21453,
      "training_step_time": 0.36402177810668945
    },
    {
      "epoch": 0.00013094482421875,
      "model_forward_time": 0.11541914939880371,
      "step": 21454
    },
    {
      "epoch": 0.00013094482421875,
      "step": 21454,
      "training_step_time": 0.6426873207092285
    },
    {
      "epoch": 0.000130950927734375,
      "model_forward_time": 0.11494112014770508,
      "step": 21455
    },
    {
      "epoch": 0.000130950927734375,
      "step": 21455,
      "training_step_time": 0.4737882614135742
    },
    {
      "epoch": 0.00013095703125,
      "model_forward_time": 0.11855125427246094,
      "step": 21456
    },
    {
      "epoch": 0.00013095703125,
      "step": 21456,
      "training_step_time": 0.3887302875518799
    },
    {
      "epoch": 0.000130963134765625,
      "model_forward_time": 0.11479043960571289,
      "step": 21457
    },
    {
      "epoch": 0.000130963134765625,
      "step": 21457,
      "training_step_time": 0.39894604682922363
    },
    {
      "epoch": 0.00013096923828125,
      "model_forward_time": 0.11473536491394043,
      "step": 21458
    },
    {
      "epoch": 0.00013096923828125,
      "step": 21458,
      "training_step_time": 0.3900296688079834
    },
    {
      "epoch": 0.000130975341796875,
      "model_forward_time": 0.1150212287902832,
      "step": 21459
    },
    {
      "epoch": 0.000130975341796875,
      "step": 21459,
      "training_step_time": 0.3885812759399414
    },
    {
      "epoch": 0.0001309814453125,
      "grad_norm": 0.22410179674625397,
      "learning_rate": 7.627749019411866e-05,
      "loss": 0.0468,
      "step": 21460
    },
    {
      "epoch": 0.0001309814453125,
      "model_forward_time": 0.11537432670593262,
      "step": 21460
    },
    {
      "epoch": 0.0001309814453125,
      "step": 21460,
      "training_step_time": 0.6165156364440918
    },
    {
      "epoch": 0.000130987548828125,
      "model_forward_time": 0.11512613296508789,
      "step": 21461
    },
    {
      "epoch": 0.000130987548828125,
      "step": 21461,
      "training_step_time": 0.38190627098083496
    },
    {
      "epoch": 0.00013099365234375,
      "model_forward_time": 0.11514449119567871,
      "step": 21462
    },
    {
      "epoch": 0.00013099365234375,
      "step": 21462,
      "training_step_time": 0.39624810218811035
    },
    {
      "epoch": 0.000130999755859375,
      "model_forward_time": 0.11583781242370605,
      "step": 21463
    },
    {
      "epoch": 0.000130999755859375,
      "step": 21463,
      "training_step_time": 0.39649534225463867
    },
    {
      "epoch": 0.000131005859375,
      "model_forward_time": 0.11518359184265137,
      "step": 21464
    },
    {
      "epoch": 0.000131005859375,
      "step": 21464,
      "training_step_time": 0.38131189346313477
    },
    {
      "epoch": 0.000131011962890625,
      "model_forward_time": 0.1150813102722168,
      "step": 21465
    },
    {
      "epoch": 0.000131011962890625,
      "step": 21465,
      "training_step_time": 0.3810865879058838
    },
    {
      "epoch": 0.00013101806640625,
      "model_forward_time": 0.1153254508972168,
      "step": 21466
    },
    {
      "epoch": 0.00013101806640625,
      "step": 21466,
      "training_step_time": 0.7482960224151611
    },
    {
      "epoch": 0.000131024169921875,
      "model_forward_time": 0.11673259735107422,
      "step": 21467
    },
    {
      "epoch": 0.000131024169921875,
      "step": 21467,
      "training_step_time": 0.4936821460723877
    },
    {
      "epoch": 0.0001310302734375,
      "model_forward_time": 0.11473941802978516,
      "step": 21468
    },
    {
      "epoch": 0.0001310302734375,
      "step": 21468,
      "training_step_time": 0.4836583137512207
    },
    {
      "epoch": 0.000131036376953125,
      "model_forward_time": 0.11417126655578613,
      "step": 21469
    },
    {
      "epoch": 0.000131036376953125,
      "step": 21469,
      "training_step_time": 0.3984975814819336
    },
    {
      "epoch": 0.00013104248046875,
      "grad_norm": 0.16760559380054474,
      "learning_rate": 7.625404102623791e-05,
      "loss": 0.0491,
      "step": 21470
    },
    {
      "epoch": 0.00013104248046875,
      "model_forward_time": 0.1148836612701416,
      "step": 21470
    },
    {
      "epoch": 0.00013104248046875,
      "step": 21470,
      "training_step_time": 0.3875710964202881
    },
    {
      "epoch": 0.000131048583984375,
      "model_forward_time": 0.11522459983825684,
      "step": 21471
    },
    {
      "epoch": 0.000131048583984375,
      "step": 21471,
      "training_step_time": 0.38809823989868164
    },
    {
      "epoch": 0.0001310546875,
      "model_forward_time": 0.11428260803222656,
      "step": 21472
    },
    {
      "epoch": 0.0001310546875,
      "step": 21472,
      "training_step_time": 0.39188504219055176
    },
    {
      "epoch": 0.000131060791015625,
      "model_forward_time": 0.11514544486999512,
      "step": 21473
    },
    {
      "epoch": 0.000131060791015625,
      "step": 21473,
      "training_step_time": 0.39093613624572754
    },
    {
      "epoch": 0.00013106689453125,
      "model_forward_time": 0.11557221412658691,
      "step": 21474
    },
    {
      "epoch": 0.00013106689453125,
      "step": 21474,
      "training_step_time": 0.3945426940917969
    },
    {
      "epoch": 0.000131072998046875,
      "model_forward_time": 0.11491513252258301,
      "step": 21475
    },
    {
      "epoch": 0.000131072998046875,
      "step": 21475,
      "training_step_time": 0.4380652904510498
    },
    {
      "epoch": 0.0001310791015625,
      "model_forward_time": 0.11531543731689453,
      "step": 21476
    },
    {
      "epoch": 0.0001310791015625,
      "step": 21476,
      "training_step_time": 0.3942229747772217
    },
    {
      "epoch": 0.000131085205078125,
      "model_forward_time": 0.11529111862182617,
      "step": 21477
    },
    {
      "epoch": 0.000131085205078125,
      "step": 21477,
      "training_step_time": 0.38823986053466797
    },
    {
      "epoch": 0.00013109130859375,
      "model_forward_time": 0.11823558807373047,
      "step": 21478
    },
    {
      "epoch": 0.00013109130859375,
      "step": 21478,
      "training_step_time": 0.5631134510040283
    },
    {
      "epoch": 0.000131097412109375,
      "model_forward_time": 0.11509966850280762,
      "step": 21479
    },
    {
      "epoch": 0.000131097412109375,
      "step": 21479,
      "training_step_time": 0.39510011672973633
    },
    {
      "epoch": 0.000131103515625,
      "grad_norm": 0.1382352113723755,
      "learning_rate": 7.623058388307269e-05,
      "loss": 0.0465,
      "step": 21480
    },
    {
      "epoch": 0.000131103515625,
      "model_forward_time": 0.11540007591247559,
      "step": 21480
    },
    {
      "epoch": 0.000131103515625,
      "step": 21480,
      "training_step_time": 0.4124436378479004
    },
    {
      "epoch": 0.000131109619140625,
      "model_forward_time": 0.11508655548095703,
      "step": 21481
    },
    {
      "epoch": 0.000131109619140625,
      "step": 21481,
      "training_step_time": 0.3673522472381592
    },
    {
      "epoch": 0.00013111572265625,
      "model_forward_time": 0.11548495292663574,
      "step": 21482
    },
    {
      "epoch": 0.00013111572265625,
      "step": 21482,
      "training_step_time": 0.4576599597930908
    },
    {
      "epoch": 0.000131121826171875,
      "model_forward_time": 0.11497330665588379,
      "step": 21483
    },
    {
      "epoch": 0.000131121826171875,
      "step": 21483,
      "training_step_time": 0.4672422409057617
    },
    {
      "epoch": 0.0001311279296875,
      "model_forward_time": 0.1152195930480957,
      "step": 21484
    },
    {
      "epoch": 0.0001311279296875,
      "step": 21484,
      "training_step_time": 0.46041226387023926
    },
    {
      "epoch": 0.000131134033203125,
      "model_forward_time": 0.11504173278808594,
      "step": 21485
    },
    {
      "epoch": 0.000131134033203125,
      "step": 21485,
      "training_step_time": 0.3898751735687256
    },
    {
      "epoch": 0.00013114013671875,
      "model_forward_time": 0.11523222923278809,
      "step": 21486
    },
    {
      "epoch": 0.00013114013671875,
      "step": 21486,
      "training_step_time": 0.3915214538574219
    },
    {
      "epoch": 0.000131146240234375,
      "model_forward_time": 0.11540961265563965,
      "step": 21487
    },
    {
      "epoch": 0.000131146240234375,
      "step": 21487,
      "training_step_time": 0.3896660804748535
    },
    {
      "epoch": 0.00013115234375,
      "model_forward_time": 0.11518383026123047,
      "step": 21488
    },
    {
      "epoch": 0.00013115234375,
      "step": 21488,
      "training_step_time": 0.42468881607055664
    },
    {
      "epoch": 0.000131158447265625,
      "model_forward_time": 0.11473417282104492,
      "step": 21489
    },
    {
      "epoch": 0.000131158447265625,
      "step": 21489,
      "training_step_time": 0.45818400382995605
    },
    {
      "epoch": 0.00013116455078125,
      "grad_norm": 0.18096493184566498,
      "learning_rate": 7.620711877174866e-05,
      "loss": 0.049,
      "step": 21490
    },
    {
      "epoch": 0.00013116455078125,
      "model_forward_time": 0.11525487899780273,
      "step": 21490
    },
    {
      "epoch": 0.00013116455078125,
      "step": 21490,
      "training_step_time": 0.615471363067627
    },
    {
      "epoch": 0.000131170654296875,
      "model_forward_time": 0.11518287658691406,
      "step": 21491
    },
    {
      "epoch": 0.000131170654296875,
      "step": 21491,
      "training_step_time": 0.39225244522094727
    },
    {
      "epoch": 0.0001311767578125,
      "model_forward_time": 0.11490941047668457,
      "step": 21492
    },
    {
      "epoch": 0.0001311767578125,
      "step": 21492,
      "training_step_time": 0.39351344108581543
    },
    {
      "epoch": 0.000131182861328125,
      "model_forward_time": 0.11518144607543945,
      "step": 21493
    },
    {
      "epoch": 0.000131182861328125,
      "step": 21493,
      "training_step_time": 0.3947465419769287
    },
    {
      "epoch": 0.00013118896484375,
      "model_forward_time": 0.11481738090515137,
      "step": 21494
    },
    {
      "epoch": 0.00013118896484375,
      "step": 21494,
      "training_step_time": 0.3876516819000244
    },
    {
      "epoch": 0.000131195068359375,
      "model_forward_time": 0.11528563499450684,
      "step": 21495
    },
    {
      "epoch": 0.000131195068359375,
      "step": 21495,
      "training_step_time": 0.4757862091064453
    },
    {
      "epoch": 0.000131201171875,
      "model_forward_time": 0.11620402336120605,
      "step": 21496
    },
    {
      "epoch": 0.000131201171875,
      "step": 21496,
      "training_step_time": 0.5274076461791992
    },
    {
      "epoch": 0.000131207275390625,
      "model_forward_time": 0.11497211456298828,
      "step": 21497
    },
    {
      "epoch": 0.000131207275390625,
      "step": 21497,
      "training_step_time": 0.4887058734893799
    },
    {
      "epoch": 0.00013121337890625,
      "model_forward_time": 0.1151740550994873,
      "step": 21498
    },
    {
      "epoch": 0.00013121337890625,
      "step": 21498,
      "training_step_time": 0.3904531002044678
    },
    {
      "epoch": 0.000131219482421875,
      "model_forward_time": 0.11538147926330566,
      "step": 21499
    },
    {
      "epoch": 0.000131219482421875,
      "step": 21499,
      "training_step_time": 0.39624595642089844
    },
    {
      "epoch": 0.0001312255859375,
      "grad_norm": 0.17762422561645508,
      "learning_rate": 7.618364569939391e-05,
      "loss": 0.0478,
      "step": 21500
    },
    {
      "epoch": 0.0001312255859375,
      "model_forward_time": 0.1146855354309082,
      "step": 21500
    },
    {
      "epoch": 0.0001312255859375,
      "step": 21500,
      "training_step_time": 0.3837006092071533
    },
    {
      "epoch": 0.000131231689453125,
      "model_forward_time": 0.11496734619140625,
      "step": 21501
    },
    {
      "epoch": 0.000131231689453125,
      "step": 21501,
      "training_step_time": 0.4417839050292969
    },
    {
      "epoch": 0.00013123779296875,
      "model_forward_time": 0.11550045013427734,
      "step": 21502
    },
    {
      "epoch": 0.00013123779296875,
      "step": 21502,
      "training_step_time": 0.5207760334014893
    },
    {
      "epoch": 0.000131243896484375,
      "model_forward_time": 0.11543798446655273,
      "step": 21503
    },
    {
      "epoch": 0.000131243896484375,
      "step": 21503,
      "training_step_time": 0.388232946395874
    },
    {
      "epoch": 0.00013125,
      "model_forward_time": 0.1157071590423584,
      "step": 21504
    },
    {
      "epoch": 0.00013125,
      "step": 21504,
      "training_step_time": 0.4070420265197754
    },
    {
      "epoch": 0.000131256103515625,
      "model_forward_time": 0.11561346054077148,
      "step": 21505
    },
    {
      "epoch": 0.000131256103515625,
      "step": 21505,
      "training_step_time": 0.394733190536499
    },
    {
      "epoch": 0.00013126220703125,
      "model_forward_time": 0.11499929428100586,
      "step": 21506
    },
    {
      "epoch": 0.00013126220703125,
      "step": 21506,
      "training_step_time": 0.3816847801208496
    },
    {
      "epoch": 0.000131268310546875,
      "model_forward_time": 0.1153416633605957,
      "step": 21507
    },
    {
      "epoch": 0.000131268310546875,
      "step": 21507,
      "training_step_time": 0.3847799301147461
    },
    {
      "epoch": 0.0001312744140625,
      "model_forward_time": 0.11728644371032715,
      "step": 21508
    },
    {
      "epoch": 0.0001312744140625,
      "step": 21508,
      "training_step_time": 0.6580886840820312
    },
    {
      "epoch": 0.000131280517578125,
      "model_forward_time": 0.11544966697692871,
      "step": 21509
    },
    {
      "epoch": 0.000131280517578125,
      "step": 21509,
      "training_step_time": 0.5034053325653076
    },
    {
      "epoch": 0.00013128662109375,
      "grad_norm": 0.15949749946594238,
      "learning_rate": 7.616016467313891e-05,
      "loss": 0.0462,
      "step": 21510
    },
    {
      "epoch": 0.00013128662109375,
      "model_forward_time": 0.11526870727539062,
      "step": 21510
    },
    {
      "epoch": 0.00013128662109375,
      "step": 21510,
      "training_step_time": 0.4187502861022949
    },
    {
      "epoch": 0.000131292724609375,
      "model_forward_time": 0.11766576766967773,
      "step": 21511
    },
    {
      "epoch": 0.000131292724609375,
      "step": 21511,
      "training_step_time": 0.4559445381164551
    },
    {
      "epoch": 0.000131298828125,
      "model_forward_time": 0.11766624450683594,
      "step": 21512
    },
    {
      "epoch": 0.000131298828125,
      "step": 21512,
      "training_step_time": 0.3754448890686035
    },
    {
      "epoch": 0.000131304931640625,
      "model_forward_time": 0.11425089836120605,
      "step": 21513
    },
    {
      "epoch": 0.000131304931640625,
      "step": 21513,
      "training_step_time": 0.3795480728149414
    },
    {
      "epoch": 0.00013131103515625,
      "model_forward_time": 0.11467194557189941,
      "step": 21514
    },
    {
      "epoch": 0.00013131103515625,
      "step": 21514,
      "training_step_time": 0.4430506229400635
    },
    {
      "epoch": 0.000131317138671875,
      "model_forward_time": 0.11484289169311523,
      "step": 21515
    },
    {
      "epoch": 0.000131317138671875,
      "step": 21515,
      "training_step_time": 0.4044616222381592
    },
    {
      "epoch": 0.0001313232421875,
      "model_forward_time": 0.11584162712097168,
      "step": 21516
    },
    {
      "epoch": 0.0001313232421875,
      "step": 21516,
      "training_step_time": 0.40238237380981445
    },
    {
      "epoch": 0.000131329345703125,
      "model_forward_time": 0.11603403091430664,
      "step": 21517
    },
    {
      "epoch": 0.000131329345703125,
      "step": 21517,
      "training_step_time": 0.39691996574401855
    },
    {
      "epoch": 0.00013133544921875,
      "model_forward_time": 0.11533689498901367,
      "step": 21518
    },
    {
      "epoch": 0.00013133544921875,
      "step": 21518,
      "training_step_time": 0.38613009452819824
    },
    {
      "epoch": 0.000131341552734375,
      "model_forward_time": 0.11521291732788086,
      "step": 21519
    },
    {
      "epoch": 0.000131341552734375,
      "step": 21519,
      "training_step_time": 0.3917820453643799
    },
    {
      "epoch": 0.00013134765625,
      "grad_norm": 0.11278640478849411,
      "learning_rate": 7.613667570011663e-05,
      "loss": 0.0459,
      "step": 21520
    },
    {
      "epoch": 0.00013134765625,
      "model_forward_time": 0.11533212661743164,
      "step": 21520
    },
    {
      "epoch": 0.00013134765625,
      "step": 21520,
      "training_step_time": 0.5315036773681641
    },
    {
      "epoch": 0.000131353759765625,
      "model_forward_time": 0.1157534122467041,
      "step": 21521
    },
    {
      "epoch": 0.000131353759765625,
      "step": 21521,
      "training_step_time": 0.4050912857055664
    },
    {
      "epoch": 0.00013135986328125,
      "model_forward_time": 0.1152193546295166,
      "step": 21522
    },
    {
      "epoch": 0.00013135986328125,
      "step": 21522,
      "training_step_time": 0.39417409896850586
    },
    {
      "epoch": 0.000131365966796875,
      "model_forward_time": 0.11506104469299316,
      "step": 21523
    },
    {
      "epoch": 0.000131365966796875,
      "step": 21523,
      "training_step_time": 0.4506862163543701
    },
    {
      "epoch": 0.0001313720703125,
      "model_forward_time": 0.11521673202514648,
      "step": 21524
    },
    {
      "epoch": 0.0001313720703125,
      "step": 21524,
      "training_step_time": 0.4190516471862793
    },
    {
      "epoch": 0.000131378173828125,
      "model_forward_time": 0.11532998085021973,
      "step": 21525
    },
    {
      "epoch": 0.000131378173828125,
      "step": 21525,
      "training_step_time": 0.5009925365447998
    },
    {
      "epoch": 0.00013138427734375,
      "model_forward_time": 0.11621594429016113,
      "step": 21526
    },
    {
      "epoch": 0.00013138427734375,
      "step": 21526,
      "training_step_time": 0.6254684925079346
    },
    {
      "epoch": 0.000131390380859375,
      "model_forward_time": 0.11452388763427734,
      "step": 21527
    },
    {
      "epoch": 0.000131390380859375,
      "step": 21527,
      "training_step_time": 0.46506428718566895
    },
    {
      "epoch": 0.000131396484375,
      "model_forward_time": 0.11492133140563965,
      "step": 21528
    },
    {
      "epoch": 0.000131396484375,
      "step": 21528,
      "training_step_time": 0.3887355327606201
    },
    {
      "epoch": 0.000131402587890625,
      "model_forward_time": 0.11473202705383301,
      "step": 21529
    },
    {
      "epoch": 0.000131402587890625,
      "step": 21529,
      "training_step_time": 0.39511537551879883
    },
    {
      "epoch": 0.00013140869140625,
      "grad_norm": 0.1569407880306244,
      "learning_rate": 7.611317878746238e-05,
      "loss": 0.0416,
      "step": 21530
    },
    {
      "epoch": 0.00013140869140625,
      "model_forward_time": 0.11405348777770996,
      "step": 21530
    },
    {
      "epoch": 0.00013140869140625,
      "step": 21530,
      "training_step_time": 0.3880348205566406
    },
    {
      "epoch": 0.000131414794921875,
      "model_forward_time": 0.11483097076416016,
      "step": 21531
    },
    {
      "epoch": 0.000131414794921875,
      "step": 21531,
      "training_step_time": 0.3880488872528076
    },
    {
      "epoch": 0.0001314208984375,
      "model_forward_time": 0.11493253707885742,
      "step": 21532
    },
    {
      "epoch": 0.0001314208984375,
      "step": 21532,
      "training_step_time": 0.6096155643463135
    },
    {
      "epoch": 0.000131427001953125,
      "model_forward_time": 0.11474347114562988,
      "step": 21533
    },
    {
      "epoch": 0.000131427001953125,
      "step": 21533,
      "training_step_time": 0.3927757740020752
    },
    {
      "epoch": 0.00013143310546875,
      "model_forward_time": 0.11468386650085449,
      "step": 21534
    },
    {
      "epoch": 0.00013143310546875,
      "step": 21534,
      "training_step_time": 0.39389586448669434
    },
    {
      "epoch": 0.000131439208984375,
      "model_forward_time": 0.11725330352783203,
      "step": 21535
    },
    {
      "epoch": 0.000131439208984375,
      "step": 21535,
      "training_step_time": 0.38741159439086914
    },
    {
      "epoch": 0.0001314453125,
      "model_forward_time": 0.11530399322509766,
      "step": 21536
    },
    {
      "epoch": 0.0001314453125,
      "step": 21536,
      "training_step_time": 0.40019869804382324
    },
    {
      "epoch": 0.000131451416015625,
      "model_forward_time": 0.11481022834777832,
      "step": 21537
    },
    {
      "epoch": 0.000131451416015625,
      "step": 21537,
      "training_step_time": 0.4776320457458496
    },
    {
      "epoch": 0.00013145751953125,
      "model_forward_time": 0.11566734313964844,
      "step": 21538
    },
    {
      "epoch": 0.00013145751953125,
      "step": 21538,
      "training_step_time": 0.5588839054107666
    },
    {
      "epoch": 0.000131463623046875,
      "model_forward_time": 0.11492252349853516,
      "step": 21539
    },
    {
      "epoch": 0.000131463623046875,
      "step": 21539,
      "training_step_time": 0.47620272636413574
    },
    {
      "epoch": 0.0001314697265625,
      "grad_norm": 0.1133832186460495,
      "learning_rate": 7.608967394231387e-05,
      "loss": 0.0444,
      "step": 21540
    },
    {
      "epoch": 0.0001314697265625,
      "model_forward_time": 0.11509442329406738,
      "step": 21540
    },
    {
      "epoch": 0.0001314697265625,
      "step": 21540,
      "training_step_time": 0.44953203201293945
    },
    {
      "epoch": 0.000131475830078125,
      "model_forward_time": 0.1154778003692627,
      "step": 21541
    },
    {
      "epoch": 0.000131475830078125,
      "step": 21541,
      "training_step_time": 0.409121036529541
    },
    {
      "epoch": 0.00013148193359375,
      "model_forward_time": 0.1147611141204834,
      "step": 21542
    },
    {
      "epoch": 0.00013148193359375,
      "step": 21542,
      "training_step_time": 0.3926093578338623
    },
    {
      "epoch": 0.000131488037109375,
      "model_forward_time": 0.11492514610290527,
      "step": 21543
    },
    {
      "epoch": 0.000131488037109375,
      "step": 21543,
      "training_step_time": 0.3886375427246094
    },
    {
      "epoch": 0.000131494140625,
      "model_forward_time": 0.11453676223754883,
      "step": 21544
    },
    {
      "epoch": 0.000131494140625,
      "step": 21544,
      "training_step_time": 0.44022154808044434
    },
    {
      "epoch": 0.000131500244140625,
      "model_forward_time": 0.11502313613891602,
      "step": 21545
    },
    {
      "epoch": 0.000131500244140625,
      "step": 21545,
      "training_step_time": 0.38941025733947754
    },
    {
      "epoch": 0.00013150634765625,
      "model_forward_time": 0.11490511894226074,
      "step": 21546
    },
    {
      "epoch": 0.00013150634765625,
      "step": 21546,
      "training_step_time": 0.39173388481140137
    },
    {
      "epoch": 0.000131512451171875,
      "model_forward_time": 0.11546635627746582,
      "step": 21547
    },
    {
      "epoch": 0.000131512451171875,
      "step": 21547,
      "training_step_time": 0.42551612854003906
    },
    {
      "epoch": 0.0001315185546875,
      "model_forward_time": 0.11482763290405273,
      "step": 21548
    },
    {
      "epoch": 0.0001315185546875,
      "step": 21548,
      "training_step_time": 0.38391804695129395
    },
    {
      "epoch": 0.000131524658203125,
      "model_forward_time": 0.11505699157714844,
      "step": 21549
    },
    {
      "epoch": 0.000131524658203125,
      "step": 21549,
      "training_step_time": 0.389162540435791
    },
    {
      "epoch": 0.00013153076171875,
      "grad_norm": 0.11665131151676178,
      "learning_rate": 7.606616117181128e-05,
      "loss": 0.0404,
      "step": 21550
    },
    {
      "epoch": 0.00013153076171875,
      "model_forward_time": 0.11576509475708008,
      "step": 21550
    },
    {
      "epoch": 0.00013153076171875,
      "step": 21550,
      "training_step_time": 0.7378649711608887
    },
    {
      "epoch": 0.000131536865234375,
      "model_forward_time": 0.11611008644104004,
      "step": 21551
    },
    {
      "epoch": 0.000131536865234375,
      "step": 21551,
      "training_step_time": 0.4663064479827881
    },
    {
      "epoch": 0.00013154296875,
      "model_forward_time": 0.11786103248596191,
      "step": 21552
    },
    {
      "epoch": 0.00013154296875,
      "step": 21552,
      "training_step_time": 0.6966333389282227
    },
    {
      "epoch": 0.000131549072265625,
      "model_forward_time": 0.11681222915649414,
      "step": 21553
    },
    {
      "epoch": 0.000131549072265625,
      "step": 21553,
      "training_step_time": 0.6925547122955322
    },
    {
      "epoch": 0.00013155517578125,
      "model_forward_time": 0.11720609664916992,
      "step": 21554
    },
    {
      "epoch": 0.00013155517578125,
      "step": 21554,
      "training_step_time": 0.6691062450408936
    },
    {
      "epoch": 0.000131561279296875,
      "model_forward_time": 0.132002592086792,
      "step": 21555
    },
    {
      "epoch": 0.000131561279296875,
      "step": 21555,
      "training_step_time": 0.745274543762207
    },
    {
      "epoch": 0.0001315673828125,
      "model_forward_time": 0.11925458908081055,
      "step": 21556
    },
    {
      "epoch": 0.0001315673828125,
      "step": 21556,
      "training_step_time": 0.7725727558135986
    },
    {
      "epoch": 0.000131573486328125,
      "model_forward_time": 0.11611008644104004,
      "step": 21557
    },
    {
      "epoch": 0.000131573486328125,
      "step": 21557,
      "training_step_time": 0.7110145092010498
    },
    {
      "epoch": 0.00013157958984375,
      "model_forward_time": 0.11914348602294922,
      "step": 21558
    },
    {
      "epoch": 0.00013157958984375,
      "step": 21558,
      "training_step_time": 0.6748850345611572
    },
    {
      "epoch": 0.000131585693359375,
      "model_forward_time": 0.11592793464660645,
      "step": 21559
    },
    {
      "epoch": 0.000131585693359375,
      "step": 21559,
      "training_step_time": 0.6638181209564209
    },
    {
      "epoch": 0.000131591796875,
      "grad_norm": 0.18326090276241302,
      "learning_rate": 7.604264048309717e-05,
      "loss": 0.0431,
      "step": 21560
    },
    {
      "epoch": 0.000131591796875,
      "model_forward_time": 0.12878203392028809,
      "step": 21560
    },
    {
      "epoch": 0.000131591796875,
      "step": 21560,
      "training_step_time": 0.6384952068328857
    },
    {
      "epoch": 0.000131597900390625,
      "model_forward_time": 0.1324312686920166,
      "step": 21561
    },
    {
      "epoch": 0.000131597900390625,
      "step": 21561,
      "training_step_time": 0.7417423725128174
    },
    {
      "epoch": 0.00013160400390625,
      "model_forward_time": 0.11696720123291016,
      "step": 21562
    },
    {
      "epoch": 0.00013160400390625,
      "step": 21562,
      "training_step_time": 0.722684383392334
    },
    {
      "epoch": 0.000131610107421875,
      "model_forward_time": 0.12261366844177246,
      "step": 21563
    },
    {
      "epoch": 0.000131610107421875,
      "step": 21563,
      "training_step_time": 0.6685163974761963
    },
    {
      "epoch": 0.0001316162109375,
      "model_forward_time": 0.12123680114746094,
      "step": 21564
    },
    {
      "epoch": 0.0001316162109375,
      "step": 21564,
      "training_step_time": 0.7488584518432617
    },
    {
      "epoch": 0.000131622314453125,
      "model_forward_time": 0.12077546119689941,
      "step": 21565
    },
    {
      "epoch": 0.000131622314453125,
      "step": 21565,
      "training_step_time": 0.669156551361084
    },
    {
      "epoch": 0.00013162841796875,
      "model_forward_time": 0.11705183982849121,
      "step": 21566
    },
    {
      "epoch": 0.00013162841796875,
      "step": 21566,
      "training_step_time": 0.6314651966094971
    },
    {
      "epoch": 0.000131634521484375,
      "model_forward_time": 0.12084794044494629,
      "step": 21567
    },
    {
      "epoch": 0.000131634521484375,
      "step": 21567,
      "training_step_time": 0.6399204730987549
    },
    {
      "epoch": 0.000131640625,
      "model_forward_time": 0.12159538269042969,
      "step": 21568
    },
    {
      "epoch": 0.000131640625,
      "step": 21568,
      "training_step_time": 0.6164002418518066
    },
    {
      "epoch": 0.000131646728515625,
      "model_forward_time": 0.12291789054870605,
      "step": 21569
    },
    {
      "epoch": 0.000131646728515625,
      "step": 21569,
      "training_step_time": 0.7122809886932373
    },
    {
      "epoch": 0.00013165283203125,
      "grad_norm": 0.13420818746089935,
      "learning_rate": 7.60191118833165e-05,
      "loss": 0.0516,
      "step": 21570
    },
    {
      "epoch": 0.00013165283203125,
      "model_forward_time": 0.12322473526000977,
      "step": 21570
    },
    {
      "epoch": 0.00013165283203125,
      "step": 21570,
      "training_step_time": 0.6098227500915527
    },
    {
      "epoch": 0.000131658935546875,
      "model_forward_time": 0.12337970733642578,
      "step": 21571
    },
    {
      "epoch": 0.000131658935546875,
      "step": 21571,
      "training_step_time": 0.702141284942627
    },
    {
      "epoch": 0.0001316650390625,
      "model_forward_time": 0.11688661575317383,
      "step": 21572
    },
    {
      "epoch": 0.0001316650390625,
      "step": 21572,
      "training_step_time": 0.6170797348022461
    },
    {
      "epoch": 0.000131671142578125,
      "model_forward_time": 0.12350726127624512,
      "step": 21573
    },
    {
      "epoch": 0.000131671142578125,
      "step": 21573,
      "training_step_time": 0.6620512008666992
    },
    {
      "epoch": 0.00013167724609375,
      "model_forward_time": 0.12600493431091309,
      "step": 21574
    },
    {
      "epoch": 0.00013167724609375,
      "step": 21574,
      "training_step_time": 0.6495940685272217
    },
    {
      "epoch": 0.000131683349609375,
      "model_forward_time": 0.1202852725982666,
      "step": 21575
    },
    {
      "epoch": 0.000131683349609375,
      "step": 21575,
      "training_step_time": 0.6886484622955322
    },
    {
      "epoch": 0.000131689453125,
      "model_forward_time": 0.11944890022277832,
      "step": 21576
    },
    {
      "epoch": 0.000131689453125,
      "step": 21576,
      "training_step_time": 0.7048919200897217
    },
    {
      "epoch": 0.000131695556640625,
      "model_forward_time": 0.12715506553649902,
      "step": 21577
    },
    {
      "epoch": 0.000131695556640625,
      "step": 21577,
      "training_step_time": 0.6546270847320557
    },
    {
      "epoch": 0.00013170166015625,
      "model_forward_time": 0.11951708793640137,
      "step": 21578
    },
    {
      "epoch": 0.00013170166015625,
      "step": 21578,
      "training_step_time": 0.6212594509124756
    },
    {
      "epoch": 0.000131707763671875,
      "model_forward_time": 0.11987090110778809,
      "step": 21579
    },
    {
      "epoch": 0.000131707763671875,
      "step": 21579,
      "training_step_time": 0.718076229095459
    },
    {
      "epoch": 0.0001317138671875,
      "grad_norm": 0.13770869374275208,
      "learning_rate": 7.599557537961663e-05,
      "loss": 0.0538,
      "step": 21580
    },
    {
      "epoch": 0.0001317138671875,
      "model_forward_time": 0.14157509803771973,
      "step": 21580
    },
    {
      "epoch": 0.0001317138671875,
      "step": 21580,
      "training_step_time": 0.685823917388916
    },
    {
      "epoch": 0.000131719970703125,
      "model_forward_time": 0.12534666061401367,
      "step": 21581
    },
    {
      "epoch": 0.000131719970703125,
      "step": 21581,
      "training_step_time": 0.6451480388641357
    },
    {
      "epoch": 0.00013172607421875,
      "model_forward_time": 0.12125658988952637,
      "step": 21582
    },
    {
      "epoch": 0.00013172607421875,
      "step": 21582,
      "training_step_time": 0.6546339988708496
    },
    {
      "epoch": 0.000131732177734375,
      "model_forward_time": 0.11542034149169922,
      "step": 21583
    },
    {
      "epoch": 0.000131732177734375,
      "step": 21583,
      "training_step_time": 0.6576457023620605
    },
    {
      "epoch": 0.00013173828125,
      "model_forward_time": 0.11794400215148926,
      "step": 21584
    },
    {
      "epoch": 0.00013173828125,
      "step": 21584,
      "training_step_time": 0.694878101348877
    },
    {
      "epoch": 0.000131744384765625,
      "model_forward_time": 0.1242976188659668,
      "step": 21585
    },
    {
      "epoch": 0.000131744384765625,
      "step": 21585,
      "training_step_time": 0.7016525268554688
    },
    {
      "epoch": 0.00013175048828125,
      "model_forward_time": 0.12245917320251465,
      "step": 21586
    },
    {
      "epoch": 0.00013175048828125,
      "step": 21586,
      "training_step_time": 0.6580288410186768
    },
    {
      "epoch": 0.000131756591796875,
      "model_forward_time": 0.12612509727478027,
      "step": 21587
    },
    {
      "epoch": 0.000131756591796875,
      "step": 21587,
      "training_step_time": 0.6937212944030762
    },
    {
      "epoch": 0.0001317626953125,
      "model_forward_time": 0.1202385425567627,
      "step": 21588
    },
    {
      "epoch": 0.0001317626953125,
      "step": 21588,
      "training_step_time": 0.7837040424346924
    },
    {
      "epoch": 0.000131768798828125,
      "model_forward_time": 0.11967134475708008,
      "step": 21589
    },
    {
      "epoch": 0.000131768798828125,
      "step": 21589,
      "training_step_time": 0.7328214645385742
    },
    {
      "epoch": 0.00013177490234375,
      "grad_norm": 0.10939133167266846,
      "learning_rate": 7.597203097914732e-05,
      "loss": 0.0621,
      "step": 21590
    },
    {
      "epoch": 0.00013177490234375,
      "model_forward_time": 0.12013030052185059,
      "step": 21590
    },
    {
      "epoch": 0.00013177490234375,
      "step": 21590,
      "training_step_time": 0.6878559589385986
    },
    {
      "epoch": 0.000131781005859375,
      "model_forward_time": 0.12746667861938477,
      "step": 21591
    },
    {
      "epoch": 0.000131781005859375,
      "step": 21591,
      "training_step_time": 0.6820724010467529
    },
    {
      "epoch": 0.000131787109375,
      "model_forward_time": 0.11600351333618164,
      "step": 21592
    },
    {
      "epoch": 0.000131787109375,
      "step": 21592,
      "training_step_time": 0.6268563270568848
    },
    {
      "epoch": 0.000131793212890625,
      "model_forward_time": 0.12978720664978027,
      "step": 21593
    },
    {
      "epoch": 0.000131793212890625,
      "step": 21593,
      "training_step_time": 0.6837732791900635
    },
    {
      "epoch": 0.00013179931640625,
      "model_forward_time": 0.11972808837890625,
      "step": 21594
    },
    {
      "epoch": 0.00013179931640625,
      "step": 21594,
      "training_step_time": 0.7859034538269043
    },
    {
      "epoch": 0.000131805419921875,
      "model_forward_time": 0.12038993835449219,
      "step": 21595
    },
    {
      "epoch": 0.000131805419921875,
      "step": 21595,
      "training_step_time": 0.6535730361938477
    },
    {
      "epoch": 0.0001318115234375,
      "model_forward_time": 0.11915826797485352,
      "step": 21596
    },
    {
      "epoch": 0.0001318115234375,
      "step": 21596,
      "training_step_time": 0.6167318820953369
    },
    {
      "epoch": 0.000131817626953125,
      "model_forward_time": 0.12035751342773438,
      "step": 21597
    },
    {
      "epoch": 0.000131817626953125,
      "step": 21597,
      "training_step_time": 0.5922455787658691
    },
    {
      "epoch": 0.00013182373046875,
      "model_forward_time": 0.12212181091308594,
      "step": 21598
    },
    {
      "epoch": 0.00013182373046875,
      "step": 21598,
      "training_step_time": 0.7027666568756104
    },
    {
      "epoch": 0.000131829833984375,
      "model_forward_time": 0.12298870086669922,
      "step": 21599
    },
    {
      "epoch": 0.000131829833984375,
      "step": 21599,
      "training_step_time": 0.7241418361663818
    },
    {
      "epoch": 0.0001318359375,
      "grad_norm": 0.1756313294172287,
      "learning_rate": 7.594847868906076e-05,
      "loss": 0.0521,
      "step": 21600
    },
    {
      "epoch": 0.0001318359375,
      "model_forward_time": 0.1250743865966797,
      "step": 21600
    },
    {
      "epoch": 0.0001318359375,
      "step": 21600,
      "training_step_time": 0.7106330394744873
    },
    {
      "epoch": 0.000131842041015625,
      "model_forward_time": 0.12135672569274902,
      "step": 21601
    },
    {
      "epoch": 0.000131842041015625,
      "step": 21601,
      "training_step_time": 0.6191189289093018
    },
    {
      "epoch": 0.00013184814453125,
      "model_forward_time": 0.11782479286193848,
      "step": 21602
    },
    {
      "epoch": 0.00013184814453125,
      "step": 21602,
      "training_step_time": 0.6753041744232178
    },
    {
      "epoch": 0.000131854248046875,
      "model_forward_time": 0.11982417106628418,
      "step": 21603
    },
    {
      "epoch": 0.000131854248046875,
      "step": 21603,
      "training_step_time": 0.6812527179718018
    },
    {
      "epoch": 0.0001318603515625,
      "model_forward_time": 0.12033438682556152,
      "step": 21604
    },
    {
      "epoch": 0.0001318603515625,
      "step": 21604,
      "training_step_time": 0.6409916877746582
    },
    {
      "epoch": 0.000131866455078125,
      "model_forward_time": 0.12458372116088867,
      "step": 21605
    },
    {
      "epoch": 0.000131866455078125,
      "step": 21605,
      "training_step_time": 0.6474664211273193
    },
    {
      "epoch": 0.00013187255859375,
      "model_forward_time": 0.1447901725769043,
      "step": 21606
    },
    {
      "epoch": 0.00013187255859375,
      "step": 21606,
      "training_step_time": 0.7144110202789307
    },
    {
      "epoch": 0.000131878662109375,
      "model_forward_time": 0.11596107482910156,
      "step": 21607
    },
    {
      "epoch": 0.000131878662109375,
      "step": 21607,
      "training_step_time": 0.7541289329528809
    },
    {
      "epoch": 0.000131884765625,
      "model_forward_time": 0.12326598167419434,
      "step": 21608
    },
    {
      "epoch": 0.000131884765625,
      "step": 21608,
      "training_step_time": 0.6900627613067627
    },
    {
      "epoch": 0.000131890869140625,
      "model_forward_time": 0.12675976753234863,
      "step": 21609
    },
    {
      "epoch": 0.000131890869140625,
      "step": 21609,
      "training_step_time": 0.6901910305023193
    },
    {
      "epoch": 0.00013189697265625,
      "grad_norm": 0.16442327201366425,
      "learning_rate": 7.592491851651151e-05,
      "loss": 0.0576,
      "step": 21610
    },
    {
      "epoch": 0.00013189697265625,
      "model_forward_time": 0.12129926681518555,
      "step": 21610
    },
    {
      "epoch": 0.00013189697265625,
      "step": 21610,
      "training_step_time": 0.6775023937225342
    },
    {
      "epoch": 0.000131903076171875,
      "model_forward_time": 0.12323117256164551,
      "step": 21611
    },
    {
      "epoch": 0.000131903076171875,
      "step": 21611,
      "training_step_time": 0.6478409767150879
    },
    {
      "epoch": 0.0001319091796875,
      "model_forward_time": 0.11741137504577637,
      "step": 21612
    },
    {
      "epoch": 0.0001319091796875,
      "step": 21612,
      "training_step_time": 0.6495623588562012
    },
    {
      "epoch": 0.000131915283203125,
      "model_forward_time": 0.11920714378356934,
      "step": 21613
    },
    {
      "epoch": 0.000131915283203125,
      "step": 21613,
      "training_step_time": 0.6702075004577637
    },
    {
      "epoch": 0.00013192138671875,
      "model_forward_time": 0.12109947204589844,
      "step": 21614
    },
    {
      "epoch": 0.00013192138671875,
      "step": 21614,
      "training_step_time": 0.6527223587036133
    },
    {
      "epoch": 0.000131927490234375,
      "model_forward_time": 0.1251840591430664,
      "step": 21615
    },
    {
      "epoch": 0.000131927490234375,
      "step": 21615,
      "training_step_time": 0.6055135726928711
    },
    {
      "epoch": 0.00013193359375,
      "model_forward_time": 0.12148475646972656,
      "step": 21616
    },
    {
      "epoch": 0.00013193359375,
      "step": 21616,
      "training_step_time": 0.6611645221710205
    },
    {
      "epoch": 0.000131939697265625,
      "model_forward_time": 0.12791705131530762,
      "step": 21617
    },
    {
      "epoch": 0.000131939697265625,
      "step": 21617,
      "training_step_time": 0.6661689281463623
    },
    {
      "epoch": 0.00013194580078125,
      "model_forward_time": 0.12536883354187012,
      "step": 21618
    },
    {
      "epoch": 0.00013194580078125,
      "step": 21618,
      "training_step_time": 0.5659234523773193
    },
    {
      "epoch": 0.000131951904296875,
      "model_forward_time": 0.1216728687286377,
      "step": 21619
    },
    {
      "epoch": 0.000131951904296875,
      "step": 21619,
      "training_step_time": 0.6457033157348633
    },
    {
      "epoch": 0.0001319580078125,
      "grad_norm": 0.16761191189289093,
      "learning_rate": 7.590135046865651e-05,
      "loss": 0.0536,
      "step": 21620
    },
    {
      "epoch": 0.0001319580078125,
      "model_forward_time": 0.11952066421508789,
      "step": 21620
    },
    {
      "epoch": 0.0001319580078125,
      "step": 21620,
      "training_step_time": 0.6371653079986572
    },
    {
      "epoch": 0.000131964111328125,
      "model_forward_time": 0.11858534812927246,
      "step": 21621
    },
    {
      "epoch": 0.000131964111328125,
      "step": 21621,
      "training_step_time": 0.4963185787200928
    },
    {
      "epoch": 0.00013197021484375,
      "model_forward_time": 0.12016868591308594,
      "step": 21622
    },
    {
      "epoch": 0.00013197021484375,
      "step": 21622,
      "training_step_time": 0.49052953720092773
    },
    {
      "epoch": 0.000131976318359375,
      "model_forward_time": 0.11801815032958984,
      "step": 21623
    },
    {
      "epoch": 0.000131976318359375,
      "step": 21623,
      "training_step_time": 0.4280269145965576
    },
    {
      "epoch": 0.000131982421875,
      "model_forward_time": 0.1171116828918457,
      "step": 21624
    },
    {
      "epoch": 0.000131982421875,
      "step": 21624,
      "training_step_time": 0.43708086013793945
    },
    {
      "epoch": 0.000131988525390625,
      "model_forward_time": 0.1170356273651123,
      "step": 21625
    },
    {
      "epoch": 0.000131988525390625,
      "step": 21625,
      "training_step_time": 0.44808173179626465
    },
    {
      "epoch": 0.00013199462890625,
      "model_forward_time": 0.1166989803314209,
      "step": 21626
    },
    {
      "epoch": 0.00013199462890625,
      "step": 21626,
      "training_step_time": 0.44258761405944824
    },
    {
      "epoch": 0.000132000732421875,
      "model_forward_time": 0.11779212951660156,
      "step": 21627
    },
    {
      "epoch": 0.000132000732421875,
      "step": 21627,
      "training_step_time": 0.3759651184082031
    },
    {
      "epoch": 0.0001320068359375,
      "model_forward_time": 0.11551904678344727,
      "step": 21628
    },
    {
      "epoch": 0.0001320068359375,
      "step": 21628,
      "training_step_time": 0.42267560958862305
    },
    {
      "epoch": 0.000132012939453125,
      "model_forward_time": 0.1167905330657959,
      "step": 21629
    },
    {
      "epoch": 0.000132012939453125,
      "step": 21629,
      "training_step_time": 0.46814846992492676
    },
    {
      "epoch": 0.00013201904296875,
      "grad_norm": 0.1720273196697235,
      "learning_rate": 7.587777455265515e-05,
      "loss": 0.0504,
      "step": 21630
    },
    {
      "epoch": 0.00013201904296875,
      "model_forward_time": 0.1152808666229248,
      "step": 21630
    },
    {
      "epoch": 0.00013201904296875,
      "step": 21630,
      "training_step_time": 0.46166229248046875
    },
    {
      "epoch": 0.000132025146484375,
      "model_forward_time": 0.11510014533996582,
      "step": 21631
    },
    {
      "epoch": 0.000132025146484375,
      "step": 21631,
      "training_step_time": 0.39885878562927246
    },
    {
      "epoch": 0.00013203125,
      "model_forward_time": 0.11580276489257812,
      "step": 21632
    },
    {
      "epoch": 0.00013203125,
      "step": 21632,
      "training_step_time": 0.45044922828674316
    },
    {
      "epoch": 0.000132037353515625,
      "model_forward_time": 0.11574149131774902,
      "step": 21633
    },
    {
      "epoch": 0.000132037353515625,
      "step": 21633,
      "training_step_time": 0.5126445293426514
    },
    {
      "epoch": 0.00013204345703125,
      "model_forward_time": 0.11560344696044922,
      "step": 21634
    },
    {
      "epoch": 0.00013204345703125,
      "step": 21634,
      "training_step_time": 0.4397554397583008
    },
    {
      "epoch": 0.000132049560546875,
      "model_forward_time": 0.11494660377502441,
      "step": 21635
    },
    {
      "epoch": 0.000132049560546875,
      "step": 21635,
      "training_step_time": 0.382204532623291
    },
    {
      "epoch": 0.0001320556640625,
      "model_forward_time": 0.11600470542907715,
      "step": 21636
    },
    {
      "epoch": 0.0001320556640625,
      "step": 21636,
      "training_step_time": 0.39513301849365234
    },
    {
      "epoch": 0.000132061767578125,
      "model_forward_time": 0.11510992050170898,
      "step": 21637
    },
    {
      "epoch": 0.000132061767578125,
      "step": 21637,
      "training_step_time": 0.3832740783691406
    },
    {
      "epoch": 0.00013206787109375,
      "model_forward_time": 0.11527848243713379,
      "step": 21638
    },
    {
      "epoch": 0.00013206787109375,
      "step": 21638,
      "training_step_time": 0.402238130569458
    },
    {
      "epoch": 0.000132073974609375,
      "model_forward_time": 0.11504173278808594,
      "step": 21639
    },
    {
      "epoch": 0.000132073974609375,
      "step": 21639,
      "training_step_time": 0.4036412239074707
    },
    {
      "epoch": 0.000132080078125,
      "grad_norm": 0.1521274745464325,
      "learning_rate": 7.585419077566912e-05,
      "loss": 0.0555,
      "step": 21640
    },
    {
      "epoch": 0.000132080078125,
      "model_forward_time": 0.11562800407409668,
      "step": 21640
    },
    {
      "epoch": 0.000132080078125,
      "step": 21640,
      "training_step_time": 0.46251773834228516
    },
    {
      "epoch": 0.000132086181640625,
      "model_forward_time": 0.11627483367919922,
      "step": 21641
    },
    {
      "epoch": 0.000132086181640625,
      "step": 21641,
      "training_step_time": 0.3912525177001953
    },
    {
      "epoch": 0.00013209228515625,
      "model_forward_time": 0.11566376686096191,
      "step": 21642
    },
    {
      "epoch": 0.00013209228515625,
      "step": 21642,
      "training_step_time": 0.39562439918518066
    },
    {
      "epoch": 0.000132098388671875,
      "model_forward_time": 0.11529827117919922,
      "step": 21643
    },
    {
      "epoch": 0.000132098388671875,
      "step": 21643,
      "training_step_time": 0.4001948833465576
    },
    {
      "epoch": 0.0001321044921875,
      "model_forward_time": 0.11470794677734375,
      "step": 21644
    },
    {
      "epoch": 0.0001321044921875,
      "step": 21644,
      "training_step_time": 0.5161726474761963
    },
    {
      "epoch": 0.000132110595703125,
      "model_forward_time": 0.11488056182861328,
      "step": 21645
    },
    {
      "epoch": 0.000132110595703125,
      "step": 21645,
      "training_step_time": 0.40467214584350586
    },
    {
      "epoch": 0.00013211669921875,
      "model_forward_time": 0.11452198028564453,
      "step": 21646
    },
    {
      "epoch": 0.00013211669921875,
      "step": 21646,
      "training_step_time": 0.4237215518951416
    },
    {
      "epoch": 0.000132122802734375,
      "model_forward_time": 0.11478805541992188,
      "step": 21647
    },
    {
      "epoch": 0.000132122802734375,
      "step": 21647,
      "training_step_time": 0.47021937370300293
    },
    {
      "epoch": 0.00013212890625,
      "model_forward_time": 0.11555051803588867,
      "step": 21648
    },
    {
      "epoch": 0.00013212890625,
      "step": 21648,
      "training_step_time": 0.4916553497314453
    },
    {
      "epoch": 0.000132135009765625,
      "model_forward_time": 0.11490821838378906,
      "step": 21649
    },
    {
      "epoch": 0.000132135009765625,
      "step": 21649,
      "training_step_time": 0.398052453994751
    },
    {
      "epoch": 0.00013214111328125,
      "grad_norm": 0.1243329793214798,
      "learning_rate": 7.583059914486257e-05,
      "loss": 0.0509,
      "step": 21650
    },
    {
      "epoch": 0.00013214111328125,
      "model_forward_time": 0.11462640762329102,
      "step": 21650
    },
    {
      "epoch": 0.00013214111328125,
      "step": 21650,
      "training_step_time": 0.3983643054962158
    },
    {
      "epoch": 0.000132147216796875,
      "model_forward_time": 0.11450719833374023,
      "step": 21651
    },
    {
      "epoch": 0.000132147216796875,
      "step": 21651,
      "training_step_time": 0.3975362777709961
    },
    {
      "epoch": 0.0001321533203125,
      "model_forward_time": 0.11505794525146484,
      "step": 21652
    },
    {
      "epoch": 0.0001321533203125,
      "step": 21652,
      "training_step_time": 0.4091789722442627
    },
    {
      "epoch": 0.000132159423828125,
      "model_forward_time": 0.11517596244812012,
      "step": 21653
    },
    {
      "epoch": 0.000132159423828125,
      "step": 21653,
      "training_step_time": 0.3927581310272217
    },
    {
      "epoch": 0.00013216552734375,
      "model_forward_time": 0.11623501777648926,
      "step": 21654
    },
    {
      "epoch": 0.00013216552734375,
      "step": 21654,
      "training_step_time": 0.40270161628723145
    },
    {
      "epoch": 0.000132171630859375,
      "model_forward_time": 0.11463046073913574,
      "step": 21655
    },
    {
      "epoch": 0.000132171630859375,
      "step": 21655,
      "training_step_time": 0.39899110794067383
    },
    {
      "epoch": 0.000132177734375,
      "model_forward_time": 0.11527848243713379,
      "step": 21656
    },
    {
      "epoch": 0.000132177734375,
      "step": 21656,
      "training_step_time": 0.40956950187683105
    },
    {
      "epoch": 0.000132183837890625,
      "model_forward_time": 0.11501908302307129,
      "step": 21657
    },
    {
      "epoch": 0.000132183837890625,
      "step": 21657,
      "training_step_time": 0.47277379035949707
    },
    {
      "epoch": 0.00013218994140625,
      "model_forward_time": 0.11555051803588867,
      "step": 21658
    },
    {
      "epoch": 0.00013218994140625,
      "step": 21658,
      "training_step_time": 0.42174291610717773
    },
    {
      "epoch": 0.000132196044921875,
      "model_forward_time": 0.11547517776489258,
      "step": 21659
    },
    {
      "epoch": 0.000132196044921875,
      "step": 21659,
      "training_step_time": 0.4627389907836914
    },
    {
      "epoch": 0.0001322021484375,
      "grad_norm": 0.13981309533119202,
      "learning_rate": 7.580699966740201e-05,
      "loss": 0.0516,
      "step": 21660
    },
    {
      "epoch": 0.0001322021484375,
      "model_forward_time": 0.1150202751159668,
      "step": 21660
    },
    {
      "epoch": 0.0001322021484375,
      "step": 21660,
      "training_step_time": 0.40932202339172363
    },
    {
      "epoch": 0.000132208251953125,
      "model_forward_time": 0.11469221115112305,
      "step": 21661
    },
    {
      "epoch": 0.000132208251953125,
      "step": 21661,
      "training_step_time": 0.3665642738342285
    },
    {
      "epoch": 0.00013221435546875,
      "model_forward_time": 0.11478781700134277,
      "step": 21662
    },
    {
      "epoch": 0.00013221435546875,
      "step": 21662,
      "training_step_time": 0.4648418426513672
    },
    {
      "epoch": 0.000132220458984375,
      "model_forward_time": 0.11532711982727051,
      "step": 21663
    },
    {
      "epoch": 0.000132220458984375,
      "step": 21663,
      "training_step_time": 0.4034459590911865
    },
    {
      "epoch": 0.0001322265625,
      "model_forward_time": 0.1147918701171875,
      "step": 21664
    },
    {
      "epoch": 0.0001322265625,
      "step": 21664,
      "training_step_time": 0.39517760276794434
    },
    {
      "epoch": 0.000132232666015625,
      "model_forward_time": 0.11593365669250488,
      "step": 21665
    },
    {
      "epoch": 0.000132232666015625,
      "step": 21665,
      "training_step_time": 0.38491034507751465
    },
    {
      "epoch": 0.00013223876953125,
      "model_forward_time": 0.11485552787780762,
      "step": 21666
    },
    {
      "epoch": 0.00013223876953125,
      "step": 21666,
      "training_step_time": 0.4114186763763428
    },
    {
      "epoch": 0.000132244873046875,
      "model_forward_time": 0.11500787734985352,
      "step": 21667
    },
    {
      "epoch": 0.000132244873046875,
      "step": 21667,
      "training_step_time": 0.3971219062805176
    },
    {
      "epoch": 0.0001322509765625,
      "model_forward_time": 0.11480975151062012,
      "step": 21668
    },
    {
      "epoch": 0.0001322509765625,
      "step": 21668,
      "training_step_time": 0.39797544479370117
    },
    {
      "epoch": 0.000132257080078125,
      "model_forward_time": 0.11539530754089355,
      "step": 21669
    },
    {
      "epoch": 0.000132257080078125,
      "step": 21669,
      "training_step_time": 0.3909311294555664
    },
    {
      "epoch": 0.00013226318359375,
      "grad_norm": 0.14131982624530792,
      "learning_rate": 7.578339235045637e-05,
      "loss": 0.0553,
      "step": 21670
    },
    {
      "epoch": 0.00013226318359375,
      "model_forward_time": 0.11713671684265137,
      "step": 21670
    },
    {
      "epoch": 0.00013226318359375,
      "step": 21670,
      "training_step_time": 0.40167832374572754
    },
    {
      "epoch": 0.000132269287109375,
      "model_forward_time": 0.11533498764038086,
      "step": 21671
    },
    {
      "epoch": 0.000132269287109375,
      "step": 21671,
      "training_step_time": 0.3858935832977295
    },
    {
      "epoch": 0.000132275390625,
      "model_forward_time": 0.11607909202575684,
      "step": 21672
    },
    {
      "epoch": 0.000132275390625,
      "step": 21672,
      "training_step_time": 0.3832974433898926
    },
    {
      "epoch": 0.000132281494140625,
      "model_forward_time": 0.11557316780090332,
      "step": 21673
    },
    {
      "epoch": 0.000132281494140625,
      "step": 21673,
      "training_step_time": 0.4545431137084961
    },
    {
      "epoch": 0.00013228759765625,
      "model_forward_time": 0.1150660514831543,
      "step": 21674
    },
    {
      "epoch": 0.00013228759765625,
      "step": 21674,
      "training_step_time": 0.3964664936065674
    },
    {
      "epoch": 0.000132293701171875,
      "model_forward_time": 0.11536359786987305,
      "step": 21675
    },
    {
      "epoch": 0.000132293701171875,
      "step": 21675,
      "training_step_time": 0.3985865116119385
    },
    {
      "epoch": 0.0001322998046875,
      "model_forward_time": 0.1158437728881836,
      "step": 21676
    },
    {
      "epoch": 0.0001322998046875,
      "step": 21676,
      "training_step_time": 0.4175083637237549
    },
    {
      "epoch": 0.000132305908203125,
      "model_forward_time": 0.11571335792541504,
      "step": 21677
    },
    {
      "epoch": 0.000132305908203125,
      "step": 21677,
      "training_step_time": 0.41309547424316406
    },
    {
      "epoch": 0.00013231201171875,
      "model_forward_time": 0.115386962890625,
      "step": 21678
    },
    {
      "epoch": 0.00013231201171875,
      "step": 21678,
      "training_step_time": 0.5048134326934814
    },
    {
      "epoch": 0.000132318115234375,
      "model_forward_time": 0.11518120765686035,
      "step": 21679
    },
    {
      "epoch": 0.000132318115234375,
      "step": 21679,
      "training_step_time": 0.3951849937438965
    },
    {
      "epoch": 0.00013232421875,
      "grad_norm": 0.14310167729854584,
      "learning_rate": 7.57597772011969e-05,
      "loss": 0.0533,
      "step": 21680
    },
    {
      "epoch": 0.00013232421875,
      "model_forward_time": 0.11532020568847656,
      "step": 21680
    },
    {
      "epoch": 0.00013232421875,
      "step": 21680,
      "training_step_time": 0.4043123722076416
    },
    {
      "epoch": 0.000132330322265625,
      "model_forward_time": 0.11493325233459473,
      "step": 21681
    },
    {
      "epoch": 0.000132330322265625,
      "step": 21681,
      "training_step_time": 0.39470720291137695
    },
    {
      "epoch": 0.00013233642578125,
      "model_forward_time": 0.1154472827911377,
      "step": 21682
    },
    {
      "epoch": 0.00013233642578125,
      "step": 21682,
      "training_step_time": 0.4040665626525879
    },
    {
      "epoch": 0.000132342529296875,
      "model_forward_time": 0.11467385292053223,
      "step": 21683
    },
    {
      "epoch": 0.000132342529296875,
      "step": 21683,
      "training_step_time": 0.3982250690460205
    },
    {
      "epoch": 0.0001323486328125,
      "model_forward_time": 0.11589837074279785,
      "step": 21684
    },
    {
      "epoch": 0.0001323486328125,
      "step": 21684,
      "training_step_time": 0.6729915142059326
    },
    {
      "epoch": 0.000132354736328125,
      "model_forward_time": 0.11490249633789062,
      "step": 21685
    },
    {
      "epoch": 0.000132354736328125,
      "step": 21685,
      "training_step_time": 0.3908052444458008
    },
    {
      "epoch": 0.00013236083984375,
      "model_forward_time": 0.11475753784179688,
      "step": 21686
    },
    {
      "epoch": 0.00013236083984375,
      "step": 21686,
      "training_step_time": 0.4530456066131592
    },
    {
      "epoch": 0.000132366943359375,
      "model_forward_time": 0.11554884910583496,
      "step": 21687
    },
    {
      "epoch": 0.000132366943359375,
      "step": 21687,
      "training_step_time": 0.4000225067138672
    },
    {
      "epoch": 0.000132373046875,
      "model_forward_time": 0.11482524871826172,
      "step": 21688
    },
    {
      "epoch": 0.000132373046875,
      "step": 21688,
      "training_step_time": 0.41489243507385254
    },
    {
      "epoch": 0.000132379150390625,
      "model_forward_time": 0.11511063575744629,
      "step": 21689
    },
    {
      "epoch": 0.000132379150390625,
      "step": 21689,
      "training_step_time": 0.38895678520202637
    },
    {
      "epoch": 0.00013238525390625,
      "grad_norm": 0.15149728953838348,
      "learning_rate": 7.573615422679726e-05,
      "loss": 0.0556,
      "step": 21690
    },
    {
      "epoch": 0.00013238525390625,
      "model_forward_time": 0.11509895324707031,
      "step": 21690
    },
    {
      "epoch": 0.00013238525390625,
      "step": 21690,
      "training_step_time": 0.6792325973510742
    },
    {
      "epoch": 0.000132391357421875,
      "model_forward_time": 0.11455869674682617,
      "step": 21691
    },
    {
      "epoch": 0.000132391357421875,
      "step": 21691,
      "training_step_time": 0.4432864189147949
    },
    {
      "epoch": 0.0001323974609375,
      "model_forward_time": 0.11458730697631836,
      "step": 21692
    },
    {
      "epoch": 0.0001323974609375,
      "step": 21692,
      "training_step_time": 0.39496445655822754
    },
    {
      "epoch": 0.000132403564453125,
      "model_forward_time": 0.11463117599487305,
      "step": 21693
    },
    {
      "epoch": 0.000132403564453125,
      "step": 21693,
      "training_step_time": 0.4182155132293701
    },
    {
      "epoch": 0.00013240966796875,
      "model_forward_time": 0.11433792114257812,
      "step": 21694
    },
    {
      "epoch": 0.00013240966796875,
      "step": 21694,
      "training_step_time": 0.4538455009460449
    },
    {
      "epoch": 0.000132415771484375,
      "model_forward_time": 0.11416506767272949,
      "step": 21695
    },
    {
      "epoch": 0.000132415771484375,
      "step": 21695,
      "training_step_time": 0.3942549228668213
    },
    {
      "epoch": 0.000132421875,
      "model_forward_time": 0.11484384536743164,
      "step": 21696
    },
    {
      "epoch": 0.000132421875,
      "step": 21696,
      "training_step_time": 0.5888853073120117
    },
    {
      "epoch": 0.000132427978515625,
      "model_forward_time": 0.11429548263549805,
      "step": 21697
    },
    {
      "epoch": 0.000132427978515625,
      "step": 21697,
      "training_step_time": 0.39286088943481445
    },
    {
      "epoch": 0.00013243408203125,
      "model_forward_time": 0.11509251594543457,
      "step": 21698
    },
    {
      "epoch": 0.00013243408203125,
      "step": 21698,
      "training_step_time": 0.38884973526000977
    },
    {
      "epoch": 0.000132440185546875,
      "model_forward_time": 0.11564445495605469,
      "step": 21699
    },
    {
      "epoch": 0.000132440185546875,
      "step": 21699,
      "training_step_time": 0.3986659049987793
    },
    {
      "epoch": 0.0001324462890625,
      "grad_norm": 0.14339855313301086,
      "learning_rate": 7.571252343443349e-05,
      "loss": 0.0568,
      "step": 21700
    },
    {
      "epoch": 0.0001324462890625,
      "model_forward_time": 0.11414766311645508,
      "step": 21700
    },
    {
      "epoch": 0.0001324462890625,
      "step": 21700,
      "training_step_time": 0.4929313659667969
    },
    {
      "epoch": 0.000132452392578125,
      "model_forward_time": 0.11495304107666016,
      "step": 21701
    },
    {
      "epoch": 0.000132452392578125,
      "step": 21701,
      "training_step_time": 0.4871971607208252
    },
    {
      "epoch": 0.00013245849609375,
      "model_forward_time": 0.1154317855834961,
      "step": 21702
    },
    {
      "epoch": 0.00013245849609375,
      "step": 21702,
      "training_step_time": 0.5420501232147217
    },
    {
      "epoch": 0.000132464599609375,
      "model_forward_time": 0.11577367782592773,
      "step": 21703
    },
    {
      "epoch": 0.000132464599609375,
      "step": 21703,
      "training_step_time": 0.40785813331604004
    },
    {
      "epoch": 0.000132470703125,
      "model_forward_time": 0.1142892837524414,
      "step": 21704
    },
    {
      "epoch": 0.000132470703125,
      "step": 21704,
      "training_step_time": 0.5015068054199219
    },
    {
      "epoch": 0.000132476806640625,
      "model_forward_time": 0.11450767517089844,
      "step": 21705
    },
    {
      "epoch": 0.000132476806640625,
      "step": 21705,
      "training_step_time": 0.44648265838623047
    },
    {
      "epoch": 0.00013248291015625,
      "model_forward_time": 0.11440801620483398,
      "step": 21706
    },
    {
      "epoch": 0.00013248291015625,
      "step": 21706,
      "training_step_time": 0.3982701301574707
    },
    {
      "epoch": 0.000132489013671875,
      "model_forward_time": 0.11542296409606934,
      "step": 21707
    },
    {
      "epoch": 0.000132489013671875,
      "step": 21707,
      "training_step_time": 0.41239285469055176
    },
    {
      "epoch": 0.0001324951171875,
      "model_forward_time": 0.11472344398498535,
      "step": 21708
    },
    {
      "epoch": 0.0001324951171875,
      "step": 21708,
      "training_step_time": 0.5915036201477051
    },
    {
      "epoch": 0.000132501220703125,
      "model_forward_time": 0.1147000789642334,
      "step": 21709
    },
    {
      "epoch": 0.000132501220703125,
      "step": 21709,
      "training_step_time": 0.3942732810974121
    },
    {
      "epoch": 0.00013250732421875,
      "grad_norm": 0.14159028232097626,
      "learning_rate": 7.5688884831284e-05,
      "loss": 0.0508,
      "step": 21710
    },
    {
      "epoch": 0.00013250732421875,
      "model_forward_time": 0.11407136917114258,
      "step": 21710
    },
    {
      "epoch": 0.00013250732421875,
      "step": 21710,
      "training_step_time": 0.387082576751709
    },
    {
      "epoch": 0.000132513427734375,
      "model_forward_time": 0.1150519847869873,
      "step": 21711
    },
    {
      "epoch": 0.000132513427734375,
      "step": 21711,
      "training_step_time": 0.38370299339294434
    },
    {
      "epoch": 0.00013251953125,
      "model_forward_time": 0.11432862281799316,
      "step": 21712
    },
    {
      "epoch": 0.00013251953125,
      "step": 21712,
      "training_step_time": 0.38968658447265625
    },
    {
      "epoch": 0.000132525634765625,
      "model_forward_time": 0.11481642723083496,
      "step": 21713
    },
    {
      "epoch": 0.000132525634765625,
      "step": 21713,
      "training_step_time": 0.3978450298309326
    },
    {
      "epoch": 0.00013253173828125,
      "model_forward_time": 0.11552143096923828,
      "step": 21714
    },
    {
      "epoch": 0.00013253173828125,
      "step": 21714,
      "training_step_time": 0.8103535175323486
    },
    {
      "epoch": 0.000132537841796875,
      "model_forward_time": 0.1148526668548584,
      "step": 21715
    },
    {
      "epoch": 0.000132537841796875,
      "step": 21715,
      "training_step_time": 0.4448575973510742
    },
    {
      "epoch": 0.0001325439453125,
      "model_forward_time": 0.1146080493927002,
      "step": 21716
    },
    {
      "epoch": 0.0001325439453125,
      "step": 21716,
      "training_step_time": 0.4607264995574951
    },
    {
      "epoch": 0.000132550048828125,
      "model_forward_time": 0.11459755897521973,
      "step": 21717
    },
    {
      "epoch": 0.000132550048828125,
      "step": 21717,
      "training_step_time": 0.4002559185028076
    },
    {
      "epoch": 0.00013255615234375,
      "model_forward_time": 0.11453723907470703,
      "step": 21718
    },
    {
      "epoch": 0.00013255615234375,
      "step": 21718,
      "training_step_time": 0.4785327911376953
    },
    {
      "epoch": 0.000132562255859375,
      "model_forward_time": 0.11481881141662598,
      "step": 21719
    },
    {
      "epoch": 0.000132562255859375,
      "step": 21719,
      "training_step_time": 0.40114855766296387
    },
    {
      "epoch": 0.000132568359375,
      "grad_norm": 0.16465862095355988,
      "learning_rate": 7.566523842452958e-05,
      "loss": 0.0539,
      "step": 21720
    },
    {
      "epoch": 0.000132568359375,
      "model_forward_time": 0.1150507926940918,
      "step": 21720
    },
    {
      "epoch": 0.000132568359375,
      "step": 21720,
      "training_step_time": 0.5258698463439941
    },
    {
      "epoch": 0.000132574462890625,
      "model_forward_time": 0.11472630500793457,
      "step": 21721
    },
    {
      "epoch": 0.000132574462890625,
      "step": 21721,
      "training_step_time": 0.3914508819580078
    },
    {
      "epoch": 0.00013258056640625,
      "model_forward_time": 0.11427712440490723,
      "step": 21722
    },
    {
      "epoch": 0.00013258056640625,
      "step": 21722,
      "training_step_time": 0.3960709571838379
    },
    {
      "epoch": 0.000132586669921875,
      "model_forward_time": 0.11499714851379395,
      "step": 21723
    },
    {
      "epoch": 0.000132586669921875,
      "step": 21723,
      "training_step_time": 0.38727307319641113
    },
    {
      "epoch": 0.0001325927734375,
      "model_forward_time": 0.11500716209411621,
      "step": 21724
    },
    {
      "epoch": 0.0001325927734375,
      "step": 21724,
      "training_step_time": 0.4136466979980469
    },
    {
      "epoch": 0.000132598876953125,
      "model_forward_time": 0.11498498916625977,
      "step": 21725
    },
    {
      "epoch": 0.000132598876953125,
      "step": 21725,
      "training_step_time": 0.38632655143737793
    },
    {
      "epoch": 0.00013260498046875,
      "model_forward_time": 0.11495256423950195,
      "step": 21726
    },
    {
      "epoch": 0.00013260498046875,
      "step": 21726,
      "training_step_time": 0.8182523250579834
    },
    {
      "epoch": 0.000132611083984375,
      "model_forward_time": 0.11444425582885742,
      "step": 21727
    },
    {
      "epoch": 0.000132611083984375,
      "step": 21727,
      "training_step_time": 0.4057343006134033
    },
    {
      "epoch": 0.0001326171875,
      "model_forward_time": 0.11409235000610352,
      "step": 21728
    },
    {
      "epoch": 0.0001326171875,
      "step": 21728,
      "training_step_time": 0.3859293460845947
    },
    {
      "epoch": 0.000132623291015625,
      "model_forward_time": 0.11428952217102051,
      "step": 21729
    },
    {
      "epoch": 0.000132623291015625,
      "step": 21729,
      "training_step_time": 0.40506601333618164
    },
    {
      "epoch": 0.00013262939453125,
      "grad_norm": 0.13048703968524933,
      "learning_rate": 7.564158422135337e-05,
      "loss": 0.0535,
      "step": 21730
    },
    {
      "epoch": 0.00013262939453125,
      "model_forward_time": 0.1151127815246582,
      "step": 21730
    },
    {
      "epoch": 0.00013262939453125,
      "step": 21730,
      "training_step_time": 0.42318129539489746
    },
    {
      "epoch": 0.000132635498046875,
      "model_forward_time": 0.11424422264099121,
      "step": 21731
    },
    {
      "epoch": 0.000132635498046875,
      "step": 21731,
      "training_step_time": 0.4427971839904785
    },
    {
      "epoch": 0.0001326416015625,
      "model_forward_time": 0.11512351036071777,
      "step": 21732
    },
    {
      "epoch": 0.0001326416015625,
      "step": 21732,
      "training_step_time": 0.5462741851806641
    },
    {
      "epoch": 0.000132647705078125,
      "model_forward_time": 0.11477780342102051,
      "step": 21733
    },
    {
      "epoch": 0.000132647705078125,
      "step": 21733,
      "training_step_time": 0.3906824588775635
    },
    {
      "epoch": 0.00013265380859375,
      "model_forward_time": 0.11439156532287598,
      "step": 21734
    },
    {
      "epoch": 0.00013265380859375,
      "step": 21734,
      "training_step_time": 0.38248157501220703
    },
    {
      "epoch": 0.000132659912109375,
      "model_forward_time": 0.11475992202758789,
      "step": 21735
    },
    {
      "epoch": 0.000132659912109375,
      "step": 21735,
      "training_step_time": 0.38254356384277344
    },
    {
      "epoch": 0.000132666015625,
      "model_forward_time": 0.11481666564941406,
      "step": 21736
    },
    {
      "epoch": 0.000132666015625,
      "step": 21736,
      "training_step_time": 0.3844261169433594
    },
    {
      "epoch": 0.000132672119140625,
      "model_forward_time": 0.11492228507995605,
      "step": 21737
    },
    {
      "epoch": 0.000132672119140625,
      "step": 21737,
      "training_step_time": 0.3891725540161133
    },
    {
      "epoch": 0.00013267822265625,
      "model_forward_time": 0.11495327949523926,
      "step": 21738
    },
    {
      "epoch": 0.00013267822265625,
      "step": 21738,
      "training_step_time": 0.8170759677886963
    },
    {
      "epoch": 0.000132684326171875,
      "model_forward_time": 0.11490440368652344,
      "step": 21739
    },
    {
      "epoch": 0.000132684326171875,
      "step": 21739,
      "training_step_time": 0.3951568603515625
    },
    {
      "epoch": 0.0001326904296875,
      "grad_norm": 0.11729031801223755,
      "learning_rate": 7.561792222894091e-05,
      "loss": 0.0482,
      "step": 21740
    },
    {
      "epoch": 0.0001326904296875,
      "model_forward_time": 0.11499166488647461,
      "step": 21740
    },
    {
      "epoch": 0.0001326904296875,
      "step": 21740,
      "training_step_time": 0.47907161712646484
    },
    {
      "epoch": 0.000132696533203125,
      "model_forward_time": 0.11455893516540527,
      "step": 21741
    },
    {
      "epoch": 0.000132696533203125,
      "step": 21741,
      "training_step_time": 0.38079261779785156
    },
    {
      "epoch": 0.00013270263671875,
      "model_forward_time": 0.1143500804901123,
      "step": 21742
    },
    {
      "epoch": 0.00013270263671875,
      "step": 21742,
      "training_step_time": 0.4435391426086426
    },
    {
      "epoch": 0.000132708740234375,
      "model_forward_time": 0.11434292793273926,
      "step": 21743
    },
    {
      "epoch": 0.000132708740234375,
      "step": 21743,
      "training_step_time": 0.395688533782959
    },
    {
      "epoch": 0.00013271484375,
      "model_forward_time": 0.11546444892883301,
      "step": 21744
    },
    {
      "epoch": 0.00013271484375,
      "step": 21744,
      "training_step_time": 0.5420897006988525
    },
    {
      "epoch": 0.000132720947265625,
      "model_forward_time": 0.11497020721435547,
      "step": 21745
    },
    {
      "epoch": 0.000132720947265625,
      "step": 21745,
      "training_step_time": 0.4744408130645752
    },
    {
      "epoch": 0.00013272705078125,
      "model_forward_time": 0.1150200366973877,
      "step": 21746
    },
    {
      "epoch": 0.00013272705078125,
      "step": 21746,
      "training_step_time": 0.38170671463012695
    },
    {
      "epoch": 0.000132733154296875,
      "model_forward_time": 0.11658382415771484,
      "step": 21747
    },
    {
      "epoch": 0.000132733154296875,
      "step": 21747,
      "training_step_time": 0.3853490352630615
    },
    {
      "epoch": 0.0001327392578125,
      "model_forward_time": 0.11454296112060547,
      "step": 21748
    },
    {
      "epoch": 0.0001327392578125,
      "step": 21748,
      "training_step_time": 0.38278889656066895
    },
    {
      "epoch": 0.000132745361328125,
      "model_forward_time": 0.11544585227966309,
      "step": 21749
    },
    {
      "epoch": 0.000132745361328125,
      "step": 21749,
      "training_step_time": 0.3877129554748535
    },
    {
      "epoch": 0.00013275146484375,
      "grad_norm": 0.13403208553791046,
      "learning_rate": 7.559425245448006e-05,
      "loss": 0.0509,
      "step": 21750
    },
    {
      "epoch": 0.00013275146484375,
      "model_forward_time": 0.1152791976928711,
      "step": 21750
    },
    {
      "epoch": 0.00013275146484375,
      "step": 21750,
      "training_step_time": 0.6494925022125244
    },
    {
      "epoch": 0.000132757568359375,
      "model_forward_time": 0.1144411563873291,
      "step": 21751
    },
    {
      "epoch": 0.000132757568359375,
      "step": 21751,
      "training_step_time": 0.39083123207092285
    },
    {
      "epoch": 0.000132763671875,
      "model_forward_time": 0.11429381370544434,
      "step": 21752
    },
    {
      "epoch": 0.000132763671875,
      "step": 21752,
      "training_step_time": 0.37833333015441895
    },
    {
      "epoch": 0.000132769775390625,
      "model_forward_time": 0.11492490768432617,
      "step": 21753
    },
    {
      "epoch": 0.000132769775390625,
      "step": 21753,
      "training_step_time": 0.3945028781890869
    },
    {
      "epoch": 0.00013277587890625,
      "model_forward_time": 0.11488604545593262,
      "step": 21754
    },
    {
      "epoch": 0.00013277587890625,
      "step": 21754,
      "training_step_time": 0.4911530017852783
    },
    {
      "epoch": 0.000132781982421875,
      "model_forward_time": 0.11429119110107422,
      "step": 21755
    },
    {
      "epoch": 0.000132781982421875,
      "step": 21755,
      "training_step_time": 0.4843170642852783
    },
    {
      "epoch": 0.0001327880859375,
      "model_forward_time": 0.1153266429901123,
      "step": 21756
    },
    {
      "epoch": 0.0001327880859375,
      "step": 21756,
      "training_step_time": 0.48170995712280273
    },
    {
      "epoch": 0.000132794189453125,
      "model_forward_time": 0.11471128463745117,
      "step": 21757
    },
    {
      "epoch": 0.000132794189453125,
      "step": 21757,
      "training_step_time": 0.43831372261047363
    },
    {
      "epoch": 0.00013280029296875,
      "model_forward_time": 0.11434650421142578,
      "step": 21758
    },
    {
      "epoch": 0.00013280029296875,
      "step": 21758,
      "training_step_time": 0.3641819953918457
    },
    {
      "epoch": 0.000132806396484375,
      "model_forward_time": 0.11452984809875488,
      "step": 21759
    },
    {
      "epoch": 0.000132806396484375,
      "step": 21759,
      "training_step_time": 0.40924835205078125
    },
    {
      "epoch": 0.0001328125,
      "grad_norm": 0.1384132355451584,
      "learning_rate": 7.557057490516111e-05,
      "loss": 0.0544,
      "step": 21760
    },
    {
      "epoch": 0.0001328125,
      "model_forward_time": 0.11452102661132812,
      "step": 21760
    },
    {
      "epoch": 0.0001328125,
      "step": 21760,
      "training_step_time": 0.4667973518371582
    },
    {
      "epoch": 0.000132818603515625,
      "model_forward_time": 0.11492395401000977,
      "step": 21761
    },
    {
      "epoch": 0.000132818603515625,
      "step": 21761,
      "training_step_time": 0.38434648513793945
    },
    {
      "epoch": 0.00013282470703125,
      "model_forward_time": 0.11472082138061523,
      "step": 21762
    },
    {
      "epoch": 0.00013282470703125,
      "step": 21762,
      "training_step_time": 0.5951416492462158
    },
    {
      "epoch": 0.000132830810546875,
      "model_forward_time": 0.1152336597442627,
      "step": 21763
    },
    {
      "epoch": 0.000132830810546875,
      "step": 21763,
      "training_step_time": 0.39951038360595703
    },
    {
      "epoch": 0.0001328369140625,
      "model_forward_time": 0.11424374580383301,
      "step": 21764
    },
    {
      "epoch": 0.0001328369140625,
      "step": 21764,
      "training_step_time": 0.3861525058746338
    },
    {
      "epoch": 0.000132843017578125,
      "model_forward_time": 0.1149590015411377,
      "step": 21765
    },
    {
      "epoch": 0.000132843017578125,
      "step": 21765,
      "training_step_time": 0.39536404609680176
    },
    {
      "epoch": 0.00013284912109375,
      "model_forward_time": 0.11435866355895996,
      "step": 21766
    },
    {
      "epoch": 0.00013284912109375,
      "step": 21766,
      "training_step_time": 0.39197564125061035
    },
    {
      "epoch": 0.000132855224609375,
      "model_forward_time": 0.11514830589294434,
      "step": 21767
    },
    {
      "epoch": 0.000132855224609375,
      "step": 21767,
      "training_step_time": 0.38687729835510254
    },
    {
      "epoch": 0.000132861328125,
      "model_forward_time": 0.11502385139465332,
      "step": 21768
    },
    {
      "epoch": 0.000132861328125,
      "step": 21768,
      "training_step_time": 0.7478013038635254
    },
    {
      "epoch": 0.000132867431640625,
      "model_forward_time": 0.11409687995910645,
      "step": 21769
    },
    {
      "epoch": 0.000132867431640625,
      "step": 21769,
      "training_step_time": 0.4167196750640869
    },
    {
      "epoch": 0.00013287353515625,
      "grad_norm": 0.15684543550014496,
      "learning_rate": 7.554688958817664e-05,
      "loss": 0.0475,
      "step": 21770
    },
    {
      "epoch": 0.00013287353515625,
      "model_forward_time": 0.11478018760681152,
      "step": 21770
    },
    {
      "epoch": 0.00013287353515625,
      "step": 21770,
      "training_step_time": 0.4579579830169678
    },
    {
      "epoch": 0.000132879638671875,
      "model_forward_time": 0.11454463005065918,
      "step": 21771
    },
    {
      "epoch": 0.000132879638671875,
      "step": 21771,
      "training_step_time": 0.4688427448272705
    },
    {
      "epoch": 0.0001328857421875,
      "model_forward_time": 0.11443495750427246,
      "step": 21772
    },
    {
      "epoch": 0.0001328857421875,
      "step": 21772,
      "training_step_time": 0.5088198184967041
    },
    {
      "epoch": 0.000132891845703125,
      "model_forward_time": 0.11464786529541016,
      "step": 21773
    },
    {
      "epoch": 0.000132891845703125,
      "step": 21773,
      "training_step_time": 0.4731142520904541
    },
    {
      "epoch": 0.00013289794921875,
      "model_forward_time": 0.11410951614379883,
      "step": 21774
    },
    {
      "epoch": 0.00013289794921875,
      "step": 21774,
      "training_step_time": 0.40024757385253906
    },
    {
      "epoch": 0.000132904052734375,
      "model_forward_time": 0.1145637035369873,
      "step": 21775
    },
    {
      "epoch": 0.000132904052734375,
      "step": 21775,
      "training_step_time": 0.39623212814331055
    },
    {
      "epoch": 0.00013291015625,
      "model_forward_time": 0.11524128913879395,
      "step": 21776
    },
    {
      "epoch": 0.00013291015625,
      "step": 21776,
      "training_step_time": 0.3860747814178467
    },
    {
      "epoch": 0.000132916259765625,
      "model_forward_time": 0.1151885986328125,
      "step": 21777
    },
    {
      "epoch": 0.000132916259765625,
      "step": 21777,
      "training_step_time": 0.4022328853607178
    },
    {
      "epoch": 0.00013292236328125,
      "model_forward_time": 0.11543750762939453,
      "step": 21778
    },
    {
      "epoch": 0.00013292236328125,
      "step": 21778,
      "training_step_time": 0.39350247383117676
    },
    {
      "epoch": 0.000132928466796875,
      "model_forward_time": 0.11435937881469727,
      "step": 21779
    },
    {
      "epoch": 0.000132928466796875,
      "step": 21779,
      "training_step_time": 0.3917222023010254
    },
    {
      "epoch": 0.0001329345703125,
      "grad_norm": 0.14341293275356293,
      "learning_rate": 7.552319651072164e-05,
      "loss": 0.0491,
      "step": 21780
    },
    {
      "epoch": 0.0001329345703125,
      "model_forward_time": 0.11477017402648926,
      "step": 21780
    },
    {
      "epoch": 0.0001329345703125,
      "step": 21780,
      "training_step_time": 0.5558652877807617
    },
    {
      "epoch": 0.000132940673828125,
      "model_forward_time": 0.11474800109863281,
      "step": 21781
    },
    {
      "epoch": 0.000132940673828125,
      "step": 21781,
      "training_step_time": 0.44980335235595703
    },
    {
      "epoch": 0.00013294677734375,
      "model_forward_time": 0.1143503189086914,
      "step": 21782
    },
    {
      "epoch": 0.00013294677734375,
      "step": 21782,
      "training_step_time": 0.43404316902160645
    },
    {
      "epoch": 0.000132952880859375,
      "model_forward_time": 0.11560654640197754,
      "step": 21783
    },
    {
      "epoch": 0.000132952880859375,
      "step": 21783,
      "training_step_time": 0.40473508834838867
    },
    {
      "epoch": 0.000132958984375,
      "model_forward_time": 0.11499285697937012,
      "step": 21784
    },
    {
      "epoch": 0.000132958984375,
      "step": 21784,
      "training_step_time": 0.40003490447998047
    },
    {
      "epoch": 0.000132965087890625,
      "model_forward_time": 0.11480498313903809,
      "step": 21785
    },
    {
      "epoch": 0.000132965087890625,
      "step": 21785,
      "training_step_time": 0.39427971839904785
    },
    {
      "epoch": 0.00013297119140625,
      "model_forward_time": 0.11573266983032227,
      "step": 21786
    },
    {
      "epoch": 0.00013297119140625,
      "step": 21786,
      "training_step_time": 0.717524528503418
    },
    {
      "epoch": 0.000132977294921875,
      "model_forward_time": 0.11464262008666992,
      "step": 21787
    },
    {
      "epoch": 0.000132977294921875,
      "step": 21787,
      "training_step_time": 0.3921043872833252
    },
    {
      "epoch": 0.0001329833984375,
      "model_forward_time": 0.11483883857727051,
      "step": 21788
    },
    {
      "epoch": 0.0001329833984375,
      "step": 21788,
      "training_step_time": 0.3937358856201172
    },
    {
      "epoch": 0.000132989501953125,
      "model_forward_time": 0.11455392837524414,
      "step": 21789
    },
    {
      "epoch": 0.000132989501953125,
      "step": 21789,
      "training_step_time": 0.3868072032928467
    },
    {
      "epoch": 0.00013299560546875,
      "grad_norm": 0.18528766930103302,
      "learning_rate": 7.549949567999345e-05,
      "loss": 0.053,
      "step": 21790
    },
    {
      "epoch": 0.00013299560546875,
      "model_forward_time": 0.1146240234375,
      "step": 21790
    },
    {
      "epoch": 0.00013299560546875,
      "step": 21790,
      "training_step_time": 0.39195966720581055
    },
    {
      "epoch": 0.000133001708984375,
      "model_forward_time": 0.11445975303649902,
      "step": 21791
    },
    {
      "epoch": 0.000133001708984375,
      "step": 21791,
      "training_step_time": 0.3806493282318115
    },
    {
      "epoch": 0.0001330078125,
      "model_forward_time": 0.11510896682739258,
      "step": 21792
    },
    {
      "epoch": 0.0001330078125,
      "step": 21792,
      "training_step_time": 0.8311989307403564
    },
    {
      "epoch": 0.000133013916015625,
      "model_forward_time": 0.11457967758178711,
      "step": 21793
    },
    {
      "epoch": 0.000133013916015625,
      "step": 21793,
      "training_step_time": 0.3903076648712158
    },
    {
      "epoch": 0.00013302001953125,
      "model_forward_time": 0.11436843872070312,
      "step": 21794
    },
    {
      "epoch": 0.00013302001953125,
      "step": 21794,
      "training_step_time": 0.4018127918243408
    },
    {
      "epoch": 0.000133026123046875,
      "model_forward_time": 0.11480474472045898,
      "step": 21795
    },
    {
      "epoch": 0.000133026123046875,
      "step": 21795,
      "training_step_time": 0.42951321601867676
    },
    {
      "epoch": 0.0001330322265625,
      "model_forward_time": 0.1149752140045166,
      "step": 21796
    },
    {
      "epoch": 0.0001330322265625,
      "step": 21796,
      "training_step_time": 0.413557767868042
    },
    {
      "epoch": 0.000133038330078125,
      "model_forward_time": 0.1145792007446289,
      "step": 21797
    },
    {
      "epoch": 0.000133038330078125,
      "step": 21797,
      "training_step_time": 0.3907043933868408
    },
    {
      "epoch": 0.00013304443359375,
      "model_forward_time": 0.11498618125915527,
      "step": 21798
    },
    {
      "epoch": 0.00013304443359375,
      "step": 21798,
      "training_step_time": 0.6099421977996826
    },
    {
      "epoch": 0.000133050537109375,
      "model_forward_time": 0.11490488052368164,
      "step": 21799
    },
    {
      "epoch": 0.000133050537109375,
      "step": 21799,
      "training_step_time": 0.4253709316253662
    },
    {
      "epoch": 0.000133056640625,
      "grad_norm": 0.16138534247875214,
      "learning_rate": 7.547578710319174e-05,
      "loss": 0.0482,
      "step": 21800
    },
    {
      "epoch": 0.000133056640625,
      "model_forward_time": 0.11431455612182617,
      "step": 21800
    },
    {
      "epoch": 0.000133056640625,
      "step": 21800,
      "training_step_time": 0.46519994735717773
    },
    {
      "epoch": 0.000133062744140625,
      "model_forward_time": 0.11606502532958984,
      "step": 21801
    },
    {
      "epoch": 0.000133062744140625,
      "step": 21801,
      "training_step_time": 0.40586161613464355
    },
    {
      "epoch": 0.00013306884765625,
      "model_forward_time": 0.1155998706817627,
      "step": 21802
    },
    {
      "epoch": 0.00013306884765625,
      "step": 21802,
      "training_step_time": 0.38086509704589844
    },
    {
      "epoch": 0.000133074951171875,
      "model_forward_time": 0.11433124542236328,
      "step": 21803
    },
    {
      "epoch": 0.000133074951171875,
      "step": 21803,
      "training_step_time": 0.3767223358154297
    },
    {
      "epoch": 0.0001330810546875,
      "model_forward_time": 0.11472797393798828,
      "step": 21804
    },
    {
      "epoch": 0.0001330810546875,
      "step": 21804,
      "training_step_time": 0.5950911045074463
    },
    {
      "epoch": 0.000133087158203125,
      "model_forward_time": 0.11391592025756836,
      "step": 21805
    },
    {
      "epoch": 0.000133087158203125,
      "step": 21805,
      "training_step_time": 0.3967626094818115
    },
    {
      "epoch": 0.00013309326171875,
      "model_forward_time": 0.114471435546875,
      "step": 21806
    },
    {
      "epoch": 0.00013309326171875,
      "step": 21806,
      "training_step_time": 0.38759350776672363
    },
    {
      "epoch": 0.000133099365234375,
      "model_forward_time": 0.11532807350158691,
      "step": 21807
    },
    {
      "epoch": 0.000133099365234375,
      "step": 21807,
      "training_step_time": 0.3956618309020996
    },
    {
      "epoch": 0.00013310546875,
      "model_forward_time": 0.11515617370605469,
      "step": 21808
    },
    {
      "epoch": 0.00013310546875,
      "step": 21808,
      "training_step_time": 0.3981759548187256
    },
    {
      "epoch": 0.000133111572265625,
      "model_forward_time": 0.11511516571044922,
      "step": 21809
    },
    {
      "epoch": 0.000133111572265625,
      "step": 21809,
      "training_step_time": 0.46384572982788086
    },
    {
      "epoch": 0.00013311767578125,
      "grad_norm": 0.20114418864250183,
      "learning_rate": 7.545207078751857e-05,
      "loss": 0.0531,
      "step": 21810
    },
    {
      "epoch": 0.00013311767578125,
      "model_forward_time": 0.11488938331604004,
      "step": 21810
    },
    {
      "epoch": 0.00013311767578125,
      "step": 21810,
      "training_step_time": 0.6783661842346191
    },
    {
      "epoch": 0.000133123779296875,
      "model_forward_time": 0.11486124992370605,
      "step": 21811
    },
    {
      "epoch": 0.000133123779296875,
      "step": 21811,
      "training_step_time": 0.5032036304473877
    },
    {
      "epoch": 0.0001331298828125,
      "model_forward_time": 0.11414241790771484,
      "step": 21812
    },
    {
      "epoch": 0.0001331298828125,
      "step": 21812,
      "training_step_time": 0.3971743583679199
    },
    {
      "epoch": 0.000133135986328125,
      "model_forward_time": 0.11382031440734863,
      "step": 21813
    },
    {
      "epoch": 0.000133135986328125,
      "step": 21813,
      "training_step_time": 0.49187421798706055
    },
    {
      "epoch": 0.00013314208984375,
      "model_forward_time": 0.11493992805480957,
      "step": 21814
    },
    {
      "epoch": 0.00013314208984375,
      "step": 21814,
      "training_step_time": 0.48004603385925293
    },
    {
      "epoch": 0.000133148193359375,
      "model_forward_time": 0.1147909164428711,
      "step": 21815
    },
    {
      "epoch": 0.000133148193359375,
      "step": 21815,
      "training_step_time": 0.3753542900085449
    },
    {
      "epoch": 0.000133154296875,
      "model_forward_time": 0.11506962776184082,
      "step": 21816
    },
    {
      "epoch": 0.000133154296875,
      "step": 21816,
      "training_step_time": 0.42812252044677734
    },
    {
      "epoch": 0.000133160400390625,
      "model_forward_time": 0.11454486846923828,
      "step": 21817
    },
    {
      "epoch": 0.000133160400390625,
      "step": 21817,
      "training_step_time": 0.39607930183410645
    },
    {
      "epoch": 0.00013316650390625,
      "model_forward_time": 0.1150665283203125,
      "step": 21818
    },
    {
      "epoch": 0.00013316650390625,
      "step": 21818,
      "training_step_time": 0.3835105895996094
    },
    {
      "epoch": 0.000133172607421875,
      "model_forward_time": 0.11478137969970703,
      "step": 21819
    },
    {
      "epoch": 0.000133172607421875,
      "step": 21819,
      "training_step_time": 0.3967721462249756
    },
    {
      "epoch": 0.0001331787109375,
      "grad_norm": 0.14105090498924255,
      "learning_rate": 7.542834674017831e-05,
      "loss": 0.046,
      "step": 21820
    },
    {
      "epoch": 0.0001331787109375,
      "model_forward_time": 0.1153104305267334,
      "step": 21820
    },
    {
      "epoch": 0.0001331787109375,
      "step": 21820,
      "training_step_time": 0.3870687484741211
    },
    {
      "epoch": 0.000133184814453125,
      "model_forward_time": 0.11484885215759277,
      "step": 21821
    },
    {
      "epoch": 0.000133184814453125,
      "step": 21821,
      "training_step_time": 0.43633103370666504
    },
    {
      "epoch": 0.00013319091796875,
      "model_forward_time": 0.11501097679138184,
      "step": 21822
    },
    {
      "epoch": 0.00013319091796875,
      "step": 21822,
      "training_step_time": 0.7306227684020996
    },
    {
      "epoch": 0.000133197021484375,
      "model_forward_time": 0.11491870880126953,
      "step": 21823
    },
    {
      "epoch": 0.000133197021484375,
      "step": 21823,
      "training_step_time": 0.4525139331817627
    },
    {
      "epoch": 0.000133203125,
      "model_forward_time": 0.1140742301940918,
      "step": 21824
    },
    {
      "epoch": 0.000133203125,
      "step": 21824,
      "training_step_time": 0.38173532485961914
    },
    {
      "epoch": 0.000133209228515625,
      "model_forward_time": 0.11442184448242188,
      "step": 21825
    },
    {
      "epoch": 0.000133209228515625,
      "step": 21825,
      "training_step_time": 0.4519078731536865
    },
    {
      "epoch": 0.00013321533203125,
      "model_forward_time": 0.11433649063110352,
      "step": 21826
    },
    {
      "epoch": 0.00013321533203125,
      "step": 21826,
      "training_step_time": 0.364912748336792
    },
    {
      "epoch": 0.000133221435546875,
      "model_forward_time": 0.11399269104003906,
      "step": 21827
    },
    {
      "epoch": 0.000133221435546875,
      "step": 21827,
      "training_step_time": 0.45655369758605957
    },
    {
      "epoch": 0.0001332275390625,
      "model_forward_time": 0.11487674713134766,
      "step": 21828
    },
    {
      "epoch": 0.0001332275390625,
      "step": 21828,
      "training_step_time": 0.43614959716796875
    },
    {
      "epoch": 0.000133233642578125,
      "model_forward_time": 0.11511588096618652,
      "step": 21829
    },
    {
      "epoch": 0.000133233642578125,
      "step": 21829,
      "training_step_time": 0.38942575454711914
    },
    {
      "epoch": 0.00013323974609375,
      "grad_norm": 0.12732914090156555,
      "learning_rate": 7.54046149683777e-05,
      "loss": 0.0528,
      "step": 21830
    },
    {
      "epoch": 0.00013323974609375,
      "model_forward_time": 0.11478376388549805,
      "step": 21830
    },
    {
      "epoch": 0.00013323974609375,
      "step": 21830,
      "training_step_time": 0.3772776126861572
    },
    {
      "epoch": 0.000133245849609375,
      "model_forward_time": 0.11494588851928711,
      "step": 21831
    },
    {
      "epoch": 0.000133245849609375,
      "step": 21831,
      "training_step_time": 0.3995645046234131
    },
    {
      "epoch": 0.000133251953125,
      "model_forward_time": 0.11518716812133789,
      "step": 21832
    },
    {
      "epoch": 0.000133251953125,
      "step": 21832,
      "training_step_time": 0.39276719093322754
    },
    {
      "epoch": 0.000133258056640625,
      "model_forward_time": 0.11523222923278809,
      "step": 21833
    },
    {
      "epoch": 0.000133258056640625,
      "step": 21833,
      "training_step_time": 0.40018558502197266
    },
    {
      "epoch": 0.00013326416015625,
      "model_forward_time": 0.11499547958374023,
      "step": 21834
    },
    {
      "epoch": 0.00013326416015625,
      "step": 21834,
      "training_step_time": 0.697744607925415
    },
    {
      "epoch": 0.000133270263671875,
      "model_forward_time": 0.11467885971069336,
      "step": 21835
    },
    {
      "epoch": 0.000133270263671875,
      "step": 21835,
      "training_step_time": 0.446089506149292
    },
    {
      "epoch": 0.0001332763671875,
      "model_forward_time": 0.11482381820678711,
      "step": 21836
    },
    {
      "epoch": 0.0001332763671875,
      "step": 21836,
      "training_step_time": 0.4031860828399658
    },
    {
      "epoch": 0.000133282470703125,
      "model_forward_time": 0.1151282787322998,
      "step": 21837
    },
    {
      "epoch": 0.000133282470703125,
      "step": 21837,
      "training_step_time": 0.529106855392456
    },
    {
      "epoch": 0.00013328857421875,
      "model_forward_time": 0.11484646797180176,
      "step": 21838
    },
    {
      "epoch": 0.00013328857421875,
      "step": 21838,
      "training_step_time": 0.40163493156433105
    },
    {
      "epoch": 0.000133294677734375,
      "model_forward_time": 0.1143043041229248,
      "step": 21839
    },
    {
      "epoch": 0.000133294677734375,
      "step": 21839,
      "training_step_time": 0.4465830326080322
    },
    {
      "epoch": 0.00013330078125,
      "grad_norm": 0.12372080236673355,
      "learning_rate": 7.538087547932585e-05,
      "loss": 0.0478,
      "step": 21840
    },
    {
      "epoch": 0.00013330078125,
      "model_forward_time": 0.11473894119262695,
      "step": 21840
    },
    {
      "epoch": 0.00013330078125,
      "step": 21840,
      "training_step_time": 0.5015833377838135
    },
    {
      "epoch": 0.000133306884765625,
      "model_forward_time": 0.11454272270202637,
      "step": 21841
    },
    {
      "epoch": 0.000133306884765625,
      "step": 21841,
      "training_step_time": 0.4588429927825928
    },
    {
      "epoch": 0.00013331298828125,
      "model_forward_time": 0.11447978019714355,
      "step": 21842
    },
    {
      "epoch": 0.00013331298828125,
      "step": 21842,
      "training_step_time": 0.3874335289001465
    },
    {
      "epoch": 0.000133319091796875,
      "model_forward_time": 0.11498451232910156,
      "step": 21843
    },
    {
      "epoch": 0.000133319091796875,
      "step": 21843,
      "training_step_time": 0.39047789573669434
    },
    {
      "epoch": 0.0001333251953125,
      "model_forward_time": 0.11535453796386719,
      "step": 21844
    },
    {
      "epoch": 0.0001333251953125,
      "step": 21844,
      "training_step_time": 0.38705968856811523
    },
    {
      "epoch": 0.000133331298828125,
      "model_forward_time": 0.11501932144165039,
      "step": 21845
    },
    {
      "epoch": 0.000133331298828125,
      "step": 21845,
      "training_step_time": 0.3847620487213135
    },
    {
      "epoch": 0.00013333740234375,
      "model_forward_time": 0.11516475677490234,
      "step": 21846
    },
    {
      "epoch": 0.00013333740234375,
      "step": 21846,
      "training_step_time": 0.6731758117675781
    },
    {
      "epoch": 0.000133343505859375,
      "model_forward_time": 0.11508750915527344,
      "step": 21847
    },
    {
      "epoch": 0.000133343505859375,
      "step": 21847,
      "training_step_time": 0.4248192310333252
    },
    {
      "epoch": 0.000133349609375,
      "model_forward_time": 0.11376237869262695,
      "step": 21848
    },
    {
      "epoch": 0.000133349609375,
      "step": 21848,
      "training_step_time": 0.3989217281341553
    },
    {
      "epoch": 0.000133355712890625,
      "model_forward_time": 0.11436057090759277,
      "step": 21849
    },
    {
      "epoch": 0.000133355712890625,
      "step": 21849,
      "training_step_time": 0.5070679187774658
    },
    {
      "epoch": 0.00013336181640625,
      "grad_norm": 0.13471482694149017,
      "learning_rate": 7.535712828023416e-05,
      "loss": 0.0536,
      "step": 21850
    },
    {
      "epoch": 0.00013336181640625,
      "model_forward_time": 0.11386775970458984,
      "step": 21850
    },
    {
      "epoch": 0.00013336181640625,
      "step": 21850,
      "training_step_time": 0.41495323181152344
    },
    {
      "epoch": 0.000133367919921875,
      "model_forward_time": 0.11376452445983887,
      "step": 21851
    },
    {
      "epoch": 0.000133367919921875,
      "step": 21851,
      "training_step_time": 0.46208810806274414
    },
    {
      "epoch": 0.0001333740234375,
      "model_forward_time": 0.11484813690185547,
      "step": 21852
    },
    {
      "epoch": 0.0001333740234375,
      "step": 21852,
      "training_step_time": 0.6607646942138672
    },
    {
      "epoch": 0.000133380126953125,
      "model_forward_time": 0.11464476585388184,
      "step": 21853
    },
    {
      "epoch": 0.000133380126953125,
      "step": 21853,
      "training_step_time": 0.37480902671813965
    },
    {
      "epoch": 0.00013338623046875,
      "model_forward_time": 0.11431360244750977,
      "step": 21854
    },
    {
      "epoch": 0.00013338623046875,
      "step": 21854,
      "training_step_time": 0.4679758548736572
    },
    {
      "epoch": 0.000133392333984375,
      "model_forward_time": 0.1135861873626709,
      "step": 21855
    },
    {
      "epoch": 0.000133392333984375,
      "step": 21855,
      "training_step_time": 0.4738757610321045
    },
    {
      "epoch": 0.0001333984375,
      "model_forward_time": 0.11437511444091797,
      "step": 21856
    },
    {
      "epoch": 0.0001333984375,
      "step": 21856,
      "training_step_time": 0.37708163261413574
    },
    {
      "epoch": 0.000133404541015625,
      "model_forward_time": 0.11447334289550781,
      "step": 21857
    },
    {
      "epoch": 0.000133404541015625,
      "step": 21857,
      "training_step_time": 0.3824498653411865
    },
    {
      "epoch": 0.00013341064453125,
      "model_forward_time": 0.11468338966369629,
      "step": 21858
    },
    {
      "epoch": 0.00013341064453125,
      "step": 21858,
      "training_step_time": 0.4110572338104248
    },
    {
      "epoch": 0.000133416748046875,
      "model_forward_time": 0.11440730094909668,
      "step": 21859
    },
    {
      "epoch": 0.000133416748046875,
      "step": 21859,
      "training_step_time": 0.4042332172393799
    },
    {
      "epoch": 0.0001334228515625,
      "grad_norm": 0.11991109699010849,
      "learning_rate": 7.533337337831642e-05,
      "loss": 0.0495,
      "step": 21860
    },
    {
      "epoch": 0.0001334228515625,
      "model_forward_time": 0.11448979377746582,
      "step": 21860
    },
    {
      "epoch": 0.0001334228515625,
      "step": 21860,
      "training_step_time": 0.39553284645080566
    },
    {
      "epoch": 0.000133428955078125,
      "model_forward_time": 0.11539196968078613,
      "step": 21861
    },
    {
      "epoch": 0.000133428955078125,
      "step": 21861,
      "training_step_time": 0.4195699691772461
    },
    {
      "epoch": 0.00013343505859375,
      "model_forward_time": 0.11529254913330078,
      "step": 21862
    },
    {
      "epoch": 0.00013343505859375,
      "step": 21862,
      "training_step_time": 0.38938045501708984
    },
    {
      "epoch": 0.000133441162109375,
      "model_forward_time": 0.11492538452148438,
      "step": 21863
    },
    {
      "epoch": 0.000133441162109375,
      "step": 21863,
      "training_step_time": 0.4186532497406006
    },
    {
      "epoch": 0.000133447265625,
      "model_forward_time": 0.11494874954223633,
      "step": 21864
    },
    {
      "epoch": 0.000133447265625,
      "step": 21864,
      "training_step_time": 0.6670749187469482
    },
    {
      "epoch": 0.000133453369140625,
      "model_forward_time": 0.11444091796875,
      "step": 21865
    },
    {
      "epoch": 0.000133453369140625,
      "step": 21865,
      "training_step_time": 0.397200345993042
    },
    {
      "epoch": 0.00013345947265625,
      "model_forward_time": 0.11519980430603027,
      "step": 21866
    },
    {
      "epoch": 0.00013345947265625,
      "step": 21866,
      "training_step_time": 0.4057924747467041
    },
    {
      "epoch": 0.000133465576171875,
      "model_forward_time": 0.11459779739379883,
      "step": 21867
    },
    {
      "epoch": 0.000133465576171875,
      "step": 21867,
      "training_step_time": 0.3654642105102539
    },
    {
      "epoch": 0.0001334716796875,
      "model_forward_time": 0.11528348922729492,
      "step": 21868
    },
    {
      "epoch": 0.0001334716796875,
      "step": 21868,
      "training_step_time": 0.4517996311187744
    },
    {
      "epoch": 0.000133477783203125,
      "model_forward_time": 0.11462283134460449,
      "step": 21869
    },
    {
      "epoch": 0.000133477783203125,
      "step": 21869,
      "training_step_time": 0.41773509979248047
    },
    {
      "epoch": 0.00013348388671875,
      "grad_norm": 0.16173270344734192,
      "learning_rate": 7.530961078078873e-05,
      "loss": 0.0531,
      "step": 21870
    },
    {
      "epoch": 0.00013348388671875,
      "model_forward_time": 0.11440658569335938,
      "step": 21870
    },
    {
      "epoch": 0.00013348388671875,
      "step": 21870,
      "training_step_time": 0.45775437355041504
    },
    {
      "epoch": 0.000133489990234375,
      "model_forward_time": 0.11493444442749023,
      "step": 21871
    },
    {
      "epoch": 0.000133489990234375,
      "step": 21871,
      "training_step_time": 0.39228034019470215
    },
    {
      "epoch": 0.00013349609375,
      "model_forward_time": 0.11484503746032715,
      "step": 21872
    },
    {
      "epoch": 0.00013349609375,
      "step": 21872,
      "training_step_time": 0.38701868057250977
    },
    {
      "epoch": 0.000133502197265625,
      "model_forward_time": 0.11507892608642578,
      "step": 21873
    },
    {
      "epoch": 0.000133502197265625,
      "step": 21873,
      "training_step_time": 0.4066438674926758
    },
    {
      "epoch": 0.00013350830078125,
      "model_forward_time": 0.11539506912231445,
      "step": 21874
    },
    {
      "epoch": 0.00013350830078125,
      "step": 21874,
      "training_step_time": 0.38472867012023926
    },
    {
      "epoch": 0.000133514404296875,
      "model_forward_time": 0.11558270454406738,
      "step": 21875
    },
    {
      "epoch": 0.000133514404296875,
      "step": 21875,
      "training_step_time": 0.3958752155303955
    },
    {
      "epoch": 0.0001335205078125,
      "model_forward_time": 0.11568880081176758,
      "step": 21876
    },
    {
      "epoch": 0.0001335205078125,
      "step": 21876,
      "training_step_time": 0.6980457305908203
    },
    {
      "epoch": 0.000133526611328125,
      "model_forward_time": 0.11524677276611328,
      "step": 21877
    },
    {
      "epoch": 0.000133526611328125,
      "step": 21877,
      "training_step_time": 0.3893144130706787
    },
    {
      "epoch": 0.00013353271484375,
      "model_forward_time": 0.1145176887512207,
      "step": 21878
    },
    {
      "epoch": 0.00013353271484375,
      "step": 21878,
      "training_step_time": 0.4334089756011963
    },
    {
      "epoch": 0.000133538818359375,
      "model_forward_time": 0.11460590362548828,
      "step": 21879
    },
    {
      "epoch": 0.000133538818359375,
      "step": 21879,
      "training_step_time": 0.39326000213623047
    },
    {
      "epoch": 0.000133544921875,
      "grad_norm": 0.15405642986297607,
      "learning_rate": 7.528584049486955e-05,
      "loss": 0.0516,
      "step": 21880
    },
    {
      "epoch": 0.000133544921875,
      "model_forward_time": 0.11495280265808105,
      "step": 21880
    },
    {
      "epoch": 0.000133544921875,
      "step": 21880,
      "training_step_time": 0.4994082450866699
    },
    {
      "epoch": 0.000133551025390625,
      "model_forward_time": 0.11544156074523926,
      "step": 21881
    },
    {
      "epoch": 0.000133551025390625,
      "step": 21881,
      "training_step_time": 0.3826732635498047
    },
    {
      "epoch": 0.00013355712890625,
      "model_forward_time": 0.11544632911682129,
      "step": 21882
    },
    {
      "epoch": 0.00013355712890625,
      "step": 21882,
      "training_step_time": 0.523536205291748
    },
    {
      "epoch": 0.000133563232421875,
      "model_forward_time": 0.11507940292358398,
      "step": 21883
    },
    {
      "epoch": 0.000133563232421875,
      "step": 21883,
      "training_step_time": 0.4225320816040039
    },
    {
      "epoch": 0.0001335693359375,
      "model_forward_time": 0.11507654190063477,
      "step": 21884
    },
    {
      "epoch": 0.0001335693359375,
      "step": 21884,
      "training_step_time": 0.39627647399902344
    },
    {
      "epoch": 0.000133575439453125,
      "model_forward_time": 0.11493134498596191,
      "step": 21885
    },
    {
      "epoch": 0.000133575439453125,
      "step": 21885,
      "training_step_time": 0.38844919204711914
    },
    {
      "epoch": 0.00013358154296875,
      "model_forward_time": 0.1161813735961914,
      "step": 21886
    },
    {
      "epoch": 0.00013358154296875,
      "step": 21886,
      "training_step_time": 0.40086889266967773
    },
    {
      "epoch": 0.000133587646484375,
      "model_forward_time": 0.1151590347290039,
      "step": 21887
    },
    {
      "epoch": 0.000133587646484375,
      "step": 21887,
      "training_step_time": 0.3924281597137451
    },
    {
      "epoch": 0.00013359375,
      "model_forward_time": 0.11499643325805664,
      "step": 21888
    },
    {
      "epoch": 0.00013359375,
      "step": 21888,
      "training_step_time": 0.6891114711761475
    },
    {
      "epoch": 0.000133599853515625,
      "model_forward_time": 0.11582779884338379,
      "step": 21889
    },
    {
      "epoch": 0.000133599853515625,
      "step": 21889,
      "training_step_time": 0.37438488006591797
    },
    {
      "epoch": 0.00013360595703125,
      "grad_norm": 0.16071420907974243,
      "learning_rate": 7.526206252777968e-05,
      "loss": 0.0477,
      "step": 21890
    },
    {
      "epoch": 0.00013360595703125,
      "model_forward_time": 0.11462640762329102,
      "step": 21890
    },
    {
      "epoch": 0.00013360595703125,
      "step": 21890,
      "training_step_time": 0.4181849956512451
    },
    {
      "epoch": 0.000133612060546875,
      "model_forward_time": 0.1147603988647461,
      "step": 21891
    },
    {
      "epoch": 0.000133612060546875,
      "step": 21891,
      "training_step_time": 0.3884110450744629
    },
    {
      "epoch": 0.0001336181640625,
      "model_forward_time": 0.11522674560546875,
      "step": 21892
    },
    {
      "epoch": 0.0001336181640625,
      "step": 21892,
      "training_step_time": 0.3965294361114502
    },
    {
      "epoch": 0.000133624267578125,
      "model_forward_time": 0.1148078441619873,
      "step": 21893
    },
    {
      "epoch": 0.000133624267578125,
      "step": 21893,
      "training_step_time": 0.39121532440185547
    },
    {
      "epoch": 0.00013363037109375,
      "model_forward_time": 0.11551666259765625,
      "step": 21894
    },
    {
      "epoch": 0.00013363037109375,
      "step": 21894,
      "training_step_time": 0.6445579528808594
    },
    {
      "epoch": 0.000133636474609375,
      "model_forward_time": 0.11533617973327637,
      "step": 21895
    },
    {
      "epoch": 0.000133636474609375,
      "step": 21895,
      "training_step_time": 0.3937833309173584
    },
    {
      "epoch": 0.000133642578125,
      "model_forward_time": 0.11632609367370605,
      "step": 21896
    },
    {
      "epoch": 0.000133642578125,
      "step": 21896,
      "training_step_time": 0.4078793525695801
    },
    {
      "epoch": 0.000133648681640625,
      "model_forward_time": 0.11554646492004395,
      "step": 21897
    },
    {
      "epoch": 0.000133648681640625,
      "step": 21897,
      "training_step_time": 0.4875671863555908
    },
    {
      "epoch": 0.00013365478515625,
      "model_forward_time": 0.1151115894317627,
      "step": 21898
    },
    {
      "epoch": 0.00013365478515625,
      "step": 21898,
      "training_step_time": 0.388791561126709
    },
    {
      "epoch": 0.000133660888671875,
      "model_forward_time": 0.11485576629638672,
      "step": 21899
    },
    {
      "epoch": 0.000133660888671875,
      "step": 21899,
      "training_step_time": 0.3949875831604004
    },
    {
      "epoch": 0.0001336669921875,
      "grad_norm": 0.25101640820503235,
      "learning_rate": 7.52382768867422e-05,
      "loss": 0.0498,
      "step": 21900
    },
    {
      "epoch": 0.0001336669921875,
      "model_forward_time": 0.11508417129516602,
      "step": 21900
    },
    {
      "epoch": 0.0001336669921875,
      "step": 21900,
      "training_step_time": 0.6822609901428223
    },
    {
      "epoch": 0.000133673095703125,
      "model_forward_time": 0.11448526382446289,
      "step": 21901
    },
    {
      "epoch": 0.000133673095703125,
      "step": 21901,
      "training_step_time": 0.3815464973449707
    },
    {
      "epoch": 0.00013367919921875,
      "model_forward_time": 0.11478900909423828,
      "step": 21902
    },
    {
      "epoch": 0.00013367919921875,
      "step": 21902,
      "training_step_time": 0.3922710418701172
    },
    {
      "epoch": 0.000133685302734375,
      "model_forward_time": 0.11509537696838379,
      "step": 21903
    },
    {
      "epoch": 0.000133685302734375,
      "step": 21903,
      "training_step_time": 0.3910853862762451
    },
    {
      "epoch": 0.00013369140625,
      "model_forward_time": 0.1149747371673584,
      "step": 21904
    },
    {
      "epoch": 0.00013369140625,
      "step": 21904,
      "training_step_time": 0.475888729095459
    },
    {
      "epoch": 0.000133697509765625,
      "model_forward_time": 0.11494207382202148,
      "step": 21905
    },
    {
      "epoch": 0.000133697509765625,
      "step": 21905,
      "training_step_time": 0.47655415534973145
    },
    {
      "epoch": 0.00013370361328125,
      "model_forward_time": 0.11429929733276367,
      "step": 21906
    },
    {
      "epoch": 0.00013370361328125,
      "step": 21906,
      "training_step_time": 0.5833160877227783
    },
    {
      "epoch": 0.000133709716796875,
      "model_forward_time": 0.11469411849975586,
      "step": 21907
    },
    {
      "epoch": 0.000133709716796875,
      "step": 21907,
      "training_step_time": 0.413074254989624
    },
    {
      "epoch": 0.0001337158203125,
      "model_forward_time": 0.11458587646484375,
      "step": 21908
    },
    {
      "epoch": 0.0001337158203125,
      "step": 21908,
      "training_step_time": 0.361797571182251
    },
    {
      "epoch": 0.000133721923828125,
      "model_forward_time": 0.11462783813476562,
      "step": 21909
    },
    {
      "epoch": 0.000133721923828125,
      "step": 21909,
      "training_step_time": 0.4217801094055176
    },
    {
      "epoch": 0.00013372802734375,
      "grad_norm": 0.17376987636089325,
      "learning_rate": 7.521448357898258e-05,
      "loss": 0.0517,
      "step": 21910
    },
    {
      "epoch": 0.00013372802734375,
      "model_forward_time": 0.1143653392791748,
      "step": 21910
    },
    {
      "epoch": 0.00013372802734375,
      "step": 21910,
      "training_step_time": 0.4513092041015625
    },
    {
      "epoch": 0.000133734130859375,
      "model_forward_time": 0.11442947387695312,
      "step": 21911
    },
    {
      "epoch": 0.000133734130859375,
      "step": 21911,
      "training_step_time": 0.40859293937683105
    },
    {
      "epoch": 0.000133740234375,
      "model_forward_time": 0.11512613296508789,
      "step": 21912
    },
    {
      "epoch": 0.000133740234375,
      "step": 21912,
      "training_step_time": 0.5047407150268555
    },
    {
      "epoch": 0.000133746337890625,
      "model_forward_time": 0.11493706703186035,
      "step": 21913
    },
    {
      "epoch": 0.000133746337890625,
      "step": 21913,
      "training_step_time": 0.38126325607299805
    },
    {
      "epoch": 0.00013375244140625,
      "model_forward_time": 0.11493897438049316,
      "step": 21914
    },
    {
      "epoch": 0.00013375244140625,
      "step": 21914,
      "training_step_time": 0.39423656463623047
    },
    {
      "epoch": 0.000133758544921875,
      "model_forward_time": 0.11497759819030762,
      "step": 21915
    },
    {
      "epoch": 0.000133758544921875,
      "step": 21915,
      "training_step_time": 0.3957245349884033
    },
    {
      "epoch": 0.0001337646484375,
      "model_forward_time": 0.11539673805236816,
      "step": 21916
    },
    {
      "epoch": 0.0001337646484375,
      "step": 21916,
      "training_step_time": 0.38442397117614746
    },
    {
      "epoch": 0.000133770751953125,
      "model_forward_time": 0.11463260650634766,
      "step": 21917
    },
    {
      "epoch": 0.000133770751953125,
      "step": 21917,
      "training_step_time": 0.39771580696105957
    },
    {
      "epoch": 0.00013377685546875,
      "model_forward_time": 0.11559891700744629,
      "step": 21918
    },
    {
      "epoch": 0.00013377685546875,
      "step": 21918,
      "training_step_time": 0.9187064170837402
    },
    {
      "epoch": 0.000133782958984375,
      "model_forward_time": 0.1144711971282959,
      "step": 21919
    },
    {
      "epoch": 0.000133782958984375,
      "step": 21919,
      "training_step_time": 0.4047238826751709
    },
    {
      "epoch": 0.0001337890625,
      "grad_norm": 0.16428157687187195,
      "learning_rate": 7.519068261172859e-05,
      "loss": 0.0448,
      "step": 21920
    },
    {
      "epoch": 0.0001337890625,
      "model_forward_time": 0.1148533821105957,
      "step": 21920
    },
    {
      "epoch": 0.0001337890625,
      "step": 21920,
      "training_step_time": 0.38551998138427734
    },
    {
      "epoch": 0.000133795166015625,
      "model_forward_time": 0.11402344703674316,
      "step": 21921
    },
    {
      "epoch": 0.000133795166015625,
      "step": 21921,
      "training_step_time": 0.4008004665374756
    },
    {
      "epoch": 0.00013380126953125,
      "model_forward_time": 0.11446547508239746,
      "step": 21922
    },
    {
      "epoch": 0.00013380126953125,
      "step": 21922,
      "training_step_time": 0.3610062599182129
    },
    {
      "epoch": 0.000133807373046875,
      "model_forward_time": 0.11519742012023926,
      "step": 21923
    },
    {
      "epoch": 0.000133807373046875,
      "step": 21923,
      "training_step_time": 0.43588995933532715
    },
    {
      "epoch": 0.0001338134765625,
      "model_forward_time": 0.11520004272460938,
      "step": 21924
    },
    {
      "epoch": 0.0001338134765625,
      "step": 21924,
      "training_step_time": 0.5089004039764404
    },
    {
      "epoch": 0.000133819580078125,
      "model_forward_time": 0.11556410789489746,
      "step": 21925
    },
    {
      "epoch": 0.000133819580078125,
      "step": 21925,
      "training_step_time": 0.38631463050842285
    },
    {
      "epoch": 0.00013382568359375,
      "model_forward_time": 0.11469197273254395,
      "step": 21926
    },
    {
      "epoch": 0.00013382568359375,
      "step": 21926,
      "training_step_time": 0.3799459934234619
    },
    {
      "epoch": 0.000133831787109375,
      "model_forward_time": 0.11526846885681152,
      "step": 21927
    },
    {
      "epoch": 0.000133831787109375,
      "step": 21927,
      "training_step_time": 0.38483119010925293
    },
    {
      "epoch": 0.000133837890625,
      "model_forward_time": 0.11491656303405762,
      "step": 21928
    },
    {
      "epoch": 0.000133837890625,
      "step": 21928,
      "training_step_time": 0.3790750503540039
    },
    {
      "epoch": 0.000133843994140625,
      "model_forward_time": 0.11533880233764648,
      "step": 21929
    },
    {
      "epoch": 0.000133843994140625,
      "step": 21929,
      "training_step_time": 0.3964502811431885
    },
    {
      "epoch": 0.00013385009765625,
      "grad_norm": 0.12408223748207092,
      "learning_rate": 7.516687399221037e-05,
      "loss": 0.0555,
      "step": 21930
    },
    {
      "epoch": 0.00013385009765625,
      "model_forward_time": 0.11513614654541016,
      "step": 21930
    },
    {
      "epoch": 0.00013385009765625,
      "step": 21930,
      "training_step_time": 0.7113254070281982
    },
    {
      "epoch": 0.000133856201171875,
      "model_forward_time": 0.11462259292602539,
      "step": 21931
    },
    {
      "epoch": 0.000133856201171875,
      "step": 21931,
      "training_step_time": 0.389833927154541
    },
    {
      "epoch": 0.0001338623046875,
      "model_forward_time": 0.11508989334106445,
      "step": 21932
    },
    {
      "epoch": 0.0001338623046875,
      "step": 21932,
      "training_step_time": 0.4082808494567871
    },
    {
      "epoch": 0.000133868408203125,
      "model_forward_time": 0.11425065994262695,
      "step": 21933
    },
    {
      "epoch": 0.000133868408203125,
      "step": 21933,
      "training_step_time": 0.469357967376709
    },
    {
      "epoch": 0.00013387451171875,
      "model_forward_time": 0.1143190860748291,
      "step": 21934
    },
    {
      "epoch": 0.00013387451171875,
      "step": 21934,
      "training_step_time": 0.3825960159301758
    },
    {
      "epoch": 0.000133880615234375,
      "model_forward_time": 0.11474728584289551,
      "step": 21935
    },
    {
      "epoch": 0.000133880615234375,
      "step": 21935,
      "training_step_time": 0.40313124656677246
    },
    {
      "epoch": 0.00013388671875,
      "model_forward_time": 0.11523842811584473,
      "step": 21936
    },
    {
      "epoch": 0.00013388671875,
      "step": 21936,
      "training_step_time": 0.4324207305908203
    },
    {
      "epoch": 0.000133892822265625,
      "model_forward_time": 0.11523938179016113,
      "step": 21937
    },
    {
      "epoch": 0.000133892822265625,
      "step": 21937,
      "training_step_time": 0.4047372341156006
    },
    {
      "epoch": 0.00013389892578125,
      "model_forward_time": 0.11491036415100098,
      "step": 21938
    },
    {
      "epoch": 0.00013389892578125,
      "step": 21938,
      "training_step_time": 0.4278903007507324
    },
    {
      "epoch": 0.000133905029296875,
      "model_forward_time": 0.11446070671081543,
      "step": 21939
    },
    {
      "epoch": 0.000133905029296875,
      "step": 21939,
      "training_step_time": 0.38942575454711914
    },
    {
      "epoch": 0.0001339111328125,
      "grad_norm": 0.2041081041097641,
      "learning_rate": 7.514305772766031e-05,
      "loss": 0.055,
      "step": 21940
    },
    {
      "epoch": 0.0001339111328125,
      "model_forward_time": 0.11558985710144043,
      "step": 21940
    },
    {
      "epoch": 0.0001339111328125,
      "step": 21940,
      "training_step_time": 0.3935360908508301
    },
    {
      "epoch": 0.000133917236328125,
      "model_forward_time": 0.11510419845581055,
      "step": 21941
    },
    {
      "epoch": 0.000133917236328125,
      "step": 21941,
      "training_step_time": 0.39798855781555176
    },
    {
      "epoch": 0.00013392333984375,
      "model_forward_time": 0.11474800109863281,
      "step": 21942
    },
    {
      "epoch": 0.00013392333984375,
      "step": 21942,
      "training_step_time": 0.7296366691589355
    },
    {
      "epoch": 0.000133929443359375,
      "model_forward_time": 0.11491560935974121,
      "step": 21943
    },
    {
      "epoch": 0.000133929443359375,
      "step": 21943,
      "training_step_time": 0.3818092346191406
    },
    {
      "epoch": 0.000133935546875,
      "model_forward_time": 0.11440706253051758,
      "step": 21944
    },
    {
      "epoch": 0.000133935546875,
      "step": 21944,
      "training_step_time": 0.401641845703125
    },
    {
      "epoch": 0.000133941650390625,
      "model_forward_time": 0.11448311805725098,
      "step": 21945
    },
    {
      "epoch": 0.000133941650390625,
      "step": 21945,
      "training_step_time": 0.4772036075592041
    },
    {
      "epoch": 0.00013394775390625,
      "model_forward_time": 0.11423206329345703,
      "step": 21946
    },
    {
      "epoch": 0.00013394775390625,
      "step": 21946,
      "training_step_time": 0.4656517505645752
    },
    {
      "epoch": 0.000133953857421875,
      "model_forward_time": 0.11441922187805176,
      "step": 21947
    },
    {
      "epoch": 0.000133953857421875,
      "step": 21947,
      "training_step_time": 0.3827505111694336
    },
    {
      "epoch": 0.0001339599609375,
      "model_forward_time": 0.11468148231506348,
      "step": 21948
    },
    {
      "epoch": 0.0001339599609375,
      "step": 21948,
      "training_step_time": 0.5151791572570801
    },
    {
      "epoch": 0.000133966064453125,
      "model_forward_time": 0.11475586891174316,
      "step": 21949
    },
    {
      "epoch": 0.000133966064453125,
      "step": 21949,
      "training_step_time": 0.4020082950592041
    },
    {
      "epoch": 0.00013397216796875,
      "grad_norm": 0.14145420491695404,
      "learning_rate": 7.511923382531317e-05,
      "loss": 0.0471,
      "step": 21950
    },
    {
      "epoch": 0.00013397216796875,
      "model_forward_time": 0.11489415168762207,
      "step": 21950
    },
    {
      "epoch": 0.00013397216796875,
      "step": 21950,
      "training_step_time": 0.42914247512817383
    },
    {
      "epoch": 0.000133978271484375,
      "model_forward_time": 0.11611676216125488,
      "step": 21951
    },
    {
      "epoch": 0.000133978271484375,
      "step": 21951,
      "training_step_time": 0.39530110359191895
    },
    {
      "epoch": 0.000133984375,
      "model_forward_time": 0.11655855178833008,
      "step": 21952
    },
    {
      "epoch": 0.000133984375,
      "step": 21952,
      "training_step_time": 0.3871643543243408
    },
    {
      "epoch": 0.000133990478515625,
      "model_forward_time": 0.11527824401855469,
      "step": 21953
    },
    {
      "epoch": 0.000133990478515625,
      "step": 21953,
      "training_step_time": 0.39017629623413086
    },
    {
      "epoch": 0.00013399658203125,
      "model_forward_time": 0.11458730697631836,
      "step": 21954
    },
    {
      "epoch": 0.00013399658203125,
      "step": 21954,
      "training_step_time": 0.7607955932617188
    },
    {
      "epoch": 0.000134002685546875,
      "model_forward_time": 0.11482429504394531,
      "step": 21955
    },
    {
      "epoch": 0.000134002685546875,
      "step": 21955,
      "training_step_time": 0.37763071060180664
    },
    {
      "epoch": 0.0001340087890625,
      "model_forward_time": 0.11493897438049316,
      "step": 21956
    },
    {
      "epoch": 0.0001340087890625,
      "step": 21956,
      "training_step_time": 0.38260841369628906
    },
    {
      "epoch": 0.000134014892578125,
      "model_forward_time": 0.11518549919128418,
      "step": 21957
    },
    {
      "epoch": 0.000134014892578125,
      "step": 21957,
      "training_step_time": 0.3839578628540039
    },
    {
      "epoch": 0.00013402099609375,
      "model_forward_time": 0.11512517929077148,
      "step": 21958
    },
    {
      "epoch": 0.00013402099609375,
      "step": 21958,
      "training_step_time": 0.392592191696167
    },
    {
      "epoch": 0.000134027099609375,
      "model_forward_time": 0.11494755744934082,
      "step": 21959
    },
    {
      "epoch": 0.000134027099609375,
      "step": 21959,
      "training_step_time": 0.4490666389465332
    },
    {
      "epoch": 0.000134033203125,
      "grad_norm": 0.13924452662467957,
      "learning_rate": 7.509540229240601e-05,
      "loss": 0.0464,
      "step": 21960
    },
    {
      "epoch": 0.000134033203125,
      "model_forward_time": 0.11509990692138672,
      "step": 21960
    },
    {
      "epoch": 0.000134033203125,
      "step": 21960,
      "training_step_time": 0.5757303237915039
    },
    {
      "epoch": 0.000134039306640625,
      "model_forward_time": 0.11488151550292969,
      "step": 21961
    },
    {
      "epoch": 0.000134039306640625,
      "step": 21961,
      "training_step_time": 0.3762776851654053
    },
    {
      "epoch": 0.00013404541015625,
      "model_forward_time": 0.11484432220458984,
      "step": 21962
    },
    {
      "epoch": 0.00013404541015625,
      "step": 21962,
      "training_step_time": 0.3944675922393799
    },
    {
      "epoch": 0.000134051513671875,
      "model_forward_time": 0.11536574363708496,
      "step": 21963
    },
    {
      "epoch": 0.000134051513671875,
      "step": 21963,
      "training_step_time": 0.45781540870666504
    },
    {
      "epoch": 0.0001340576171875,
      "model_forward_time": 0.11476898193359375,
      "step": 21964
    },
    {
      "epoch": 0.0001340576171875,
      "step": 21964,
      "training_step_time": 0.484757661819458
    },
    {
      "epoch": 0.000134063720703125,
      "model_forward_time": 0.11479377746582031,
      "step": 21965
    },
    {
      "epoch": 0.000134063720703125,
      "step": 21965,
      "training_step_time": 0.46578526496887207
    },
    {
      "epoch": 0.00013406982421875,
      "model_forward_time": 0.11495828628540039,
      "step": 21966
    },
    {
      "epoch": 0.00013406982421875,
      "step": 21966,
      "training_step_time": 0.552619457244873
    },
    {
      "epoch": 0.000134075927734375,
      "model_forward_time": 0.11494588851928711,
      "step": 21967
    },
    {
      "epoch": 0.000134075927734375,
      "step": 21967,
      "training_step_time": 0.38221025466918945
    },
    {
      "epoch": 0.00013408203125,
      "model_forward_time": 0.11461210250854492,
      "step": 21968
    },
    {
      "epoch": 0.00013408203125,
      "step": 21968,
      "training_step_time": 0.3917531967163086
    },
    {
      "epoch": 0.000134088134765625,
      "model_forward_time": 0.11503148078918457,
      "step": 21969
    },
    {
      "epoch": 0.000134088134765625,
      "step": 21969,
      "training_step_time": 0.3898890018463135
    },
    {
      "epoch": 0.00013409423828125,
      "grad_norm": 0.18895204365253448,
      "learning_rate": 7.507156313617827e-05,
      "loss": 0.044,
      "step": 21970
    },
    {
      "epoch": 0.00013409423828125,
      "model_forward_time": 0.115966796875,
      "step": 21970
    },
    {
      "epoch": 0.00013409423828125,
      "step": 21970,
      "training_step_time": 0.39043092727661133
    },
    {
      "epoch": 0.000134100341796875,
      "model_forward_time": 0.11479854583740234,
      "step": 21971
    },
    {
      "epoch": 0.000134100341796875,
      "step": 21971,
      "training_step_time": 0.39742445945739746
    },
    {
      "epoch": 0.0001341064453125,
      "model_forward_time": 0.11510992050170898,
      "step": 21972
    },
    {
      "epoch": 0.0001341064453125,
      "step": 21972,
      "training_step_time": 0.757648229598999
    },
    {
      "epoch": 0.000134112548828125,
      "model_forward_time": 0.11446142196655273,
      "step": 21973
    },
    {
      "epoch": 0.000134112548828125,
      "step": 21973,
      "training_step_time": 0.47916245460510254
    },
    {
      "epoch": 0.00013411865234375,
      "model_forward_time": 0.11423063278198242,
      "step": 21974
    },
    {
      "epoch": 0.00013411865234375,
      "step": 21974,
      "training_step_time": 0.38913893699645996
    },
    {
      "epoch": 0.000134124755859375,
      "model_forward_time": 0.11539173126220703,
      "step": 21975
    },
    {
      "epoch": 0.000134124755859375,
      "step": 21975,
      "training_step_time": 0.44710421562194824
    },
    {
      "epoch": 0.000134130859375,
      "model_forward_time": 0.11517024040222168,
      "step": 21976
    },
    {
      "epoch": 0.000134130859375,
      "step": 21976,
      "training_step_time": 0.44423651695251465
    },
    {
      "epoch": 0.000134136962890625,
      "model_forward_time": 0.11490511894226074,
      "step": 21977
    },
    {
      "epoch": 0.000134136962890625,
      "step": 21977,
      "training_step_time": 0.396514892578125
    },
    {
      "epoch": 0.00013414306640625,
      "model_forward_time": 0.11498785018920898,
      "step": 21978
    },
    {
      "epoch": 0.00013414306640625,
      "step": 21978,
      "training_step_time": 0.4944286346435547
    },
    {
      "epoch": 0.000134149169921875,
      "model_forward_time": 0.11535096168518066,
      "step": 21979
    },
    {
      "epoch": 0.000134149169921875,
      "step": 21979,
      "training_step_time": 0.44993162155151367
    },
    {
      "epoch": 0.0001341552734375,
      "grad_norm": 0.1869424730539322,
      "learning_rate": 7.504771636387163e-05,
      "loss": 0.0606,
      "step": 21980
    },
    {
      "epoch": 0.0001341552734375,
      "model_forward_time": 0.11467313766479492,
      "step": 21980
    },
    {
      "epoch": 0.0001341552734375,
      "step": 21980,
      "training_step_time": 0.3846547603607178
    },
    {
      "epoch": 0.000134161376953125,
      "model_forward_time": 0.1157684326171875,
      "step": 21981
    },
    {
      "epoch": 0.000134161376953125,
      "step": 21981,
      "training_step_time": 0.39017677307128906
    },
    {
      "epoch": 0.00013416748046875,
      "model_forward_time": 0.11527228355407715,
      "step": 21982
    },
    {
      "epoch": 0.00013416748046875,
      "step": 21982,
      "training_step_time": 0.38680601119995117
    },
    {
      "epoch": 0.000134173583984375,
      "model_forward_time": 0.11485910415649414,
      "step": 21983
    },
    {
      "epoch": 0.000134173583984375,
      "step": 21983,
      "training_step_time": 0.39366769790649414
    },
    {
      "epoch": 0.0001341796875,
      "model_forward_time": 0.11496210098266602,
      "step": 21984
    },
    {
      "epoch": 0.0001341796875,
      "step": 21984,
      "training_step_time": 0.657118558883667
    },
    {
      "epoch": 0.000134185791015625,
      "model_forward_time": 0.11495208740234375,
      "step": 21985
    },
    {
      "epoch": 0.000134185791015625,
      "step": 21985,
      "training_step_time": 0.38682055473327637
    },
    {
      "epoch": 0.00013419189453125,
      "model_forward_time": 0.11468291282653809,
      "step": 21986
    },
    {
      "epoch": 0.00013419189453125,
      "step": 21986,
      "training_step_time": 0.3994295597076416
    },
    {
      "epoch": 0.000134197998046875,
      "model_forward_time": 0.11659073829650879,
      "step": 21987
    },
    {
      "epoch": 0.000134197998046875,
      "step": 21987,
      "training_step_time": 0.4906454086303711
    },
    {
      "epoch": 0.0001342041015625,
      "model_forward_time": 0.11488461494445801,
      "step": 21988
    },
    {
      "epoch": 0.0001342041015625,
      "step": 21988,
      "training_step_time": 0.3968801498413086
    },
    {
      "epoch": 0.000134210205078125,
      "model_forward_time": 0.11490464210510254,
      "step": 21989
    },
    {
      "epoch": 0.000134210205078125,
      "step": 21989,
      "training_step_time": 0.421001672744751
    },
    {
      "epoch": 0.00013421630859375,
      "grad_norm": 0.2474219799041748,
      "learning_rate": 7.50238619827301e-05,
      "loss": 0.0475,
      "step": 21990
    },
    {
      "epoch": 0.00013421630859375,
      "model_forward_time": 0.1148221492767334,
      "step": 21990
    },
    {
      "epoch": 0.00013421630859375,
      "step": 21990,
      "training_step_time": 0.5289008617401123
    },
    {
      "epoch": 0.000134222412109375,
      "model_forward_time": 0.11438441276550293,
      "step": 21991
    },
    {
      "epoch": 0.000134222412109375,
      "step": 21991,
      "training_step_time": 0.3632638454437256
    },
    {
      "epoch": 0.000134228515625,
      "model_forward_time": 0.1149594783782959,
      "step": 21992
    },
    {
      "epoch": 0.000134228515625,
      "step": 21992,
      "training_step_time": 0.45482850074768066
    },
    {
      "epoch": 0.000134234619140625,
      "model_forward_time": 0.11441826820373535,
      "step": 21993
    },
    {
      "epoch": 0.000134234619140625,
      "step": 21993,
      "training_step_time": 0.47775936126708984
    },
    {
      "epoch": 0.00013424072265625,
      "model_forward_time": 0.1147451400756836,
      "step": 21994
    },
    {
      "epoch": 0.00013424072265625,
      "step": 21994,
      "training_step_time": 0.38494062423706055
    },
    {
      "epoch": 0.000134246826171875,
      "model_forward_time": 0.1140739917755127,
      "step": 21995
    },
    {
      "epoch": 0.000134246826171875,
      "step": 21995,
      "training_step_time": 0.3819568157196045
    },
    {
      "epoch": 0.0001342529296875,
      "model_forward_time": 0.11475586891174316,
      "step": 21996
    },
    {
      "epoch": 0.0001342529296875,
      "step": 21996,
      "training_step_time": 0.43678832054138184
    },
    {
      "epoch": 0.000134259033203125,
      "model_forward_time": 0.1154642105102539,
      "step": 21997
    },
    {
      "epoch": 0.000134259033203125,
      "step": 21997,
      "training_step_time": 0.3746602535247803
    },
    {
      "epoch": 0.00013426513671875,
      "model_forward_time": 0.11491990089416504,
      "step": 21998
    },
    {
      "epoch": 0.00013426513671875,
      "step": 21998,
      "training_step_time": 0.4075496196746826
    },
    {
      "epoch": 0.000134271240234375,
      "model_forward_time": 0.11515021324157715,
      "step": 21999
    },
    {
      "epoch": 0.000134271240234375,
      "step": 21999,
      "training_step_time": 0.38934922218322754
    },
    {
      "epoch": 0.00013427734375,
      "grad_norm": 0.17434878647327423,
      "learning_rate": 7.500000000000001e-05,
      "loss": 0.0493,
      "step": 22000
    },
    {
      "epoch": 0.00013427734375,
      "model_forward_time": 0.11249995231628418,
      "step": 22000
    },
    {
      "epoch": 0.00013427734375,
      "step": 22000,
      "training_step_time": 0.35316991806030273
    },
    {
      "epoch": 0.000134283447265625,
      "model_forward_time": 0.11227297782897949,
      "step": 22001
    },
    {
      "epoch": 0.000134283447265625,
      "step": 22001,
      "training_step_time": 0.3811302185058594
    },
    {
      "epoch": 0.00013428955078125,
      "model_forward_time": 0.1132957935333252,
      "step": 22002
    },
    {
      "epoch": 0.00013428955078125,
      "step": 22002,
      "training_step_time": 0.39280200004577637
    },
    {
      "epoch": 0.000134295654296875,
      "model_forward_time": 0.1133430004119873,
      "step": 22003
    },
    {
      "epoch": 0.000134295654296875,
      "step": 22003,
      "training_step_time": 0.4212777614593506
    },
    {
      "epoch": 0.0001343017578125,
      "model_forward_time": 0.11392331123352051,
      "step": 22004
    },
    {
      "epoch": 0.0001343017578125,
      "step": 22004,
      "training_step_time": 0.40015459060668945
    },
    {
      "epoch": 0.000134307861328125,
      "model_forward_time": 0.11471748352050781,
      "step": 22005
    },
    {
      "epoch": 0.000134307861328125,
      "step": 22005,
      "training_step_time": 0.3855400085449219
    },
    {
      "epoch": 0.00013431396484375,
      "model_forward_time": 0.11463022232055664,
      "step": 22006
    },
    {
      "epoch": 0.00013431396484375,
      "step": 22006,
      "training_step_time": 0.4164609909057617
    },
    {
      "epoch": 0.000134320068359375,
      "model_forward_time": 0.11464715003967285,
      "step": 22007
    },
    {
      "epoch": 0.000134320068359375,
      "step": 22007,
      "training_step_time": 0.4620211124420166
    },
    {
      "epoch": 0.000134326171875,
      "model_forward_time": 0.11479020118713379,
      "step": 22008
    },
    {
      "epoch": 0.000134326171875,
      "step": 22008,
      "training_step_time": 0.4488537311553955
    },
    {
      "epoch": 0.000134332275390625,
      "model_forward_time": 0.11430573463439941,
      "step": 22009
    },
    {
      "epoch": 0.000134332275390625,
      "step": 22009,
      "training_step_time": 0.46675825119018555
    },
    {
      "epoch": 0.00013433837890625,
      "grad_norm": 0.1315019130706787,
      "learning_rate": 7.497613042293e-05,
      "loss": 0.0507,
      "step": 22010
    },
    {
      "epoch": 0.00013433837890625,
      "model_forward_time": 0.11511969566345215,
      "step": 22010
    },
    {
      "epoch": 0.00013433837890625,
      "step": 22010,
      "training_step_time": 0.38567185401916504
    },
    {
      "epoch": 0.000134344482421875,
      "model_forward_time": 0.1150047779083252,
      "step": 22011
    },
    {
      "epoch": 0.000134344482421875,
      "step": 22011,
      "training_step_time": 0.3912472724914551
    },
    {
      "epoch": 0.0001343505859375,
      "model_forward_time": 0.11527180671691895,
      "step": 22012
    },
    {
      "epoch": 0.0001343505859375,
      "step": 22012,
      "training_step_time": 0.3915550708770752
    },
    {
      "epoch": 0.000134356689453125,
      "model_forward_time": 0.1147918701171875,
      "step": 22013
    },
    {
      "epoch": 0.000134356689453125,
      "step": 22013,
      "training_step_time": 0.3925037384033203
    },
    {
      "epoch": 0.00013436279296875,
      "model_forward_time": 0.11533975601196289,
      "step": 22014
    },
    {
      "epoch": 0.00013436279296875,
      "step": 22014,
      "training_step_time": 0.39429497718811035
    },
    {
      "epoch": 0.000134368896484375,
      "model_forward_time": 0.11451411247253418,
      "step": 22015
    },
    {
      "epoch": 0.000134368896484375,
      "step": 22015,
      "training_step_time": 0.39719128608703613
    },
    {
      "epoch": 0.000134375,
      "model_forward_time": 0.11523604393005371,
      "step": 22016
    },
    {
      "epoch": 0.000134375,
      "step": 22016,
      "training_step_time": 0.4473557472229004
    },
    {
      "epoch": 0.000134381103515625,
      "model_forward_time": 0.11577439308166504,
      "step": 22017
    },
    {
      "epoch": 0.000134381103515625,
      "step": 22017,
      "training_step_time": 0.40471434593200684
    },
    {
      "epoch": 0.00013438720703125,
      "model_forward_time": 0.11508297920227051,
      "step": 22018
    },
    {
      "epoch": 0.00013438720703125,
      "step": 22018,
      "training_step_time": 0.4159567356109619
    },
    {
      "epoch": 0.000134393310546875,
      "model_forward_time": 0.11498069763183594,
      "step": 22019
    },
    {
      "epoch": 0.000134393310546875,
      "step": 22019,
      "training_step_time": 0.3841276168823242
    },
    {
      "epoch": 0.0001343994140625,
      "grad_norm": 0.15437649190425873,
      "learning_rate": 7.495225325877103e-05,
      "loss": 0.0503,
      "step": 22020
    },
    {
      "epoch": 0.0001343994140625,
      "model_forward_time": 0.11513233184814453,
      "step": 22020
    },
    {
      "epoch": 0.0001343994140625,
      "step": 22020,
      "training_step_time": 0.43564629554748535
    },
    {
      "epoch": 0.000134405517578125,
      "model_forward_time": 0.11499381065368652,
      "step": 22021
    },
    {
      "epoch": 0.000134405517578125,
      "step": 22021,
      "training_step_time": 0.39230847358703613
    },
    {
      "epoch": 0.00013441162109375,
      "model_forward_time": 0.11459589004516602,
      "step": 22022
    },
    {
      "epoch": 0.00013441162109375,
      "step": 22022,
      "training_step_time": 0.41500306129455566
    },
    {
      "epoch": 0.000134417724609375,
      "model_forward_time": 0.11486172676086426,
      "step": 22023
    },
    {
      "epoch": 0.000134417724609375,
      "step": 22023,
      "training_step_time": 0.42254066467285156
    },
    {
      "epoch": 0.000134423828125,
      "model_forward_time": 0.11475157737731934,
      "step": 22024
    },
    {
      "epoch": 0.000134423828125,
      "step": 22024,
      "training_step_time": 0.4067385196685791
    },
    {
      "epoch": 0.000134429931640625,
      "model_forward_time": 0.11508679389953613,
      "step": 22025
    },
    {
      "epoch": 0.000134429931640625,
      "step": 22025,
      "training_step_time": 0.3896751403808594
    },
    {
      "epoch": 0.00013443603515625,
      "model_forward_time": 0.11570525169372559,
      "step": 22026
    },
    {
      "epoch": 0.00013443603515625,
      "step": 22026,
      "training_step_time": 0.3904895782470703
    },
    {
      "epoch": 0.000134442138671875,
      "model_forward_time": 0.11478900909423828,
      "step": 22027
    },
    {
      "epoch": 0.000134442138671875,
      "step": 22027,
      "training_step_time": 0.39401817321777344
    },
    {
      "epoch": 0.0001344482421875,
      "model_forward_time": 0.11540651321411133,
      "step": 22028
    },
    {
      "epoch": 0.0001344482421875,
      "step": 22028,
      "training_step_time": 0.389965295791626
    },
    {
      "epoch": 0.000134454345703125,
      "model_forward_time": 0.11566162109375,
      "step": 22029
    },
    {
      "epoch": 0.000134454345703125,
      "step": 22029,
      "training_step_time": 0.39411449432373047
    },
    {
      "epoch": 0.00013446044921875,
      "grad_norm": 0.1804131269454956,
      "learning_rate": 7.492836851477636e-05,
      "loss": 0.0505,
      "step": 22030
    },
    {
      "epoch": 0.00013446044921875,
      "model_forward_time": 0.11868762969970703,
      "step": 22030
    },
    {
      "epoch": 0.00013446044921875,
      "step": 22030,
      "training_step_time": 0.3883826732635498
    },
    {
      "epoch": 0.000134466552734375,
      "model_forward_time": 0.11572909355163574,
      "step": 22031
    },
    {
      "epoch": 0.000134466552734375,
      "step": 22031,
      "training_step_time": 0.5122120380401611
    },
    {
      "epoch": 0.00013447265625,
      "model_forward_time": 0.11508321762084961,
      "step": 22032
    },
    {
      "epoch": 0.00013447265625,
      "step": 22032,
      "training_step_time": 0.3974180221557617
    },
    {
      "epoch": 0.000134478759765625,
      "model_forward_time": 0.11534690856933594,
      "step": 22033
    },
    {
      "epoch": 0.000134478759765625,
      "step": 22033,
      "training_step_time": 0.43915343284606934
    },
    {
      "epoch": 0.00013448486328125,
      "model_forward_time": 0.11500668525695801,
      "step": 22034
    },
    {
      "epoch": 0.00013448486328125,
      "step": 22034,
      "training_step_time": 0.3874387741088867
    },
    {
      "epoch": 0.000134490966796875,
      "model_forward_time": 0.11558818817138672,
      "step": 22035
    },
    {
      "epoch": 0.000134490966796875,
      "step": 22035,
      "training_step_time": 0.44599151611328125
    },
    {
      "epoch": 0.0001344970703125,
      "model_forward_time": 0.11529731750488281,
      "step": 22036
    },
    {
      "epoch": 0.0001344970703125,
      "step": 22036,
      "training_step_time": 0.44521522521972656
    },
    {
      "epoch": 0.000134503173828125,
      "model_forward_time": 0.11493921279907227,
      "step": 22037
    },
    {
      "epoch": 0.000134503173828125,
      "step": 22037,
      "training_step_time": 0.5693624019622803
    },
    {
      "epoch": 0.00013450927734375,
      "model_forward_time": 0.11467742919921875,
      "step": 22038
    },
    {
      "epoch": 0.00013450927734375,
      "step": 22038,
      "training_step_time": 0.48515939712524414
    },
    {
      "epoch": 0.000134515380859375,
      "model_forward_time": 0.11482596397399902,
      "step": 22039
    },
    {
      "epoch": 0.000134515380859375,
      "step": 22039,
      "training_step_time": 0.3900017738342285
    },
    {
      "epoch": 0.000134521484375,
      "grad_norm": 0.2007872313261032,
      "learning_rate": 7.490447619820152e-05,
      "loss": 0.0498,
      "step": 22040
    },
    {
      "epoch": 0.000134521484375,
      "model_forward_time": 0.11486339569091797,
      "step": 22040
    },
    {
      "epoch": 0.000134521484375,
      "step": 22040,
      "training_step_time": 0.3932158946990967
    },
    {
      "epoch": 0.000134527587890625,
      "model_forward_time": 0.11462736129760742,
      "step": 22041
    },
    {
      "epoch": 0.000134527587890625,
      "step": 22041,
      "training_step_time": 0.3889122009277344
    },
    {
      "epoch": 0.00013453369140625,
      "model_forward_time": 0.11478924751281738,
      "step": 22042
    },
    {
      "epoch": 0.00013453369140625,
      "step": 22042,
      "training_step_time": 0.388582706451416
    },
    {
      "epoch": 0.000134539794921875,
      "model_forward_time": 0.11489152908325195,
      "step": 22043
    },
    {
      "epoch": 0.000134539794921875,
      "step": 22043,
      "training_step_time": 0.40068817138671875
    },
    {
      "epoch": 0.0001345458984375,
      "model_forward_time": 0.11553740501403809,
      "step": 22044
    },
    {
      "epoch": 0.0001345458984375,
      "step": 22044,
      "training_step_time": 0.41066884994506836
    },
    {
      "epoch": 0.000134552001953125,
      "model_forward_time": 0.1151275634765625,
      "step": 22045
    },
    {
      "epoch": 0.000134552001953125,
      "step": 22045,
      "training_step_time": 0.44159460067749023
    },
    {
      "epoch": 0.00013455810546875,
      "model_forward_time": 0.1148829460144043,
      "step": 22046
    },
    {
      "epoch": 0.00013455810546875,
      "step": 22046,
      "training_step_time": 0.3990907669067383
    },
    {
      "epoch": 0.000134564208984375,
      "model_forward_time": 0.11533164978027344,
      "step": 22047
    },
    {
      "epoch": 0.000134564208984375,
      "step": 22047,
      "training_step_time": 0.46814632415771484
    },
    {
      "epoch": 0.0001345703125,
      "model_forward_time": 0.11424660682678223,
      "step": 22048
    },
    {
      "epoch": 0.0001345703125,
      "step": 22048,
      "training_step_time": 0.3881702423095703
    },
    {
      "epoch": 0.000134576416015625,
      "model_forward_time": 0.11588597297668457,
      "step": 22049
    },
    {
      "epoch": 0.000134576416015625,
      "step": 22049,
      "training_step_time": 0.42889881134033203
    },
    {
      "epoch": 0.00013458251953125,
      "grad_norm": 0.1219274029135704,
      "learning_rate": 7.488057631630437e-05,
      "loss": 0.0479,
      "step": 22050
    },
    {
      "epoch": 0.00013458251953125,
      "model_forward_time": 0.11574077606201172,
      "step": 22050
    },
    {
      "epoch": 0.00013458251953125,
      "step": 22050,
      "training_step_time": 0.44352173805236816
    },
    {
      "epoch": 0.000134588623046875,
      "model_forward_time": 0.11496210098266602,
      "step": 22051
    },
    {
      "epoch": 0.000134588623046875,
      "step": 22051,
      "training_step_time": 0.3668680191040039
    },
    {
      "epoch": 0.0001345947265625,
      "model_forward_time": 0.11480712890625,
      "step": 22052
    },
    {
      "epoch": 0.0001345947265625,
      "step": 22052,
      "training_step_time": 0.46410036087036133
    },
    {
      "epoch": 0.000134600830078125,
      "model_forward_time": 0.11488580703735352,
      "step": 22053
    },
    {
      "epoch": 0.000134600830078125,
      "step": 22053,
      "training_step_time": 0.4052436351776123
    },
    {
      "epoch": 0.00013460693359375,
      "model_forward_time": 0.11559224128723145,
      "step": 22054
    },
    {
      "epoch": 0.00013460693359375,
      "step": 22054,
      "training_step_time": 0.39113402366638184
    },
    {
      "epoch": 0.000134613037109375,
      "model_forward_time": 0.11482429504394531,
      "step": 22055
    },
    {
      "epoch": 0.000134613037109375,
      "step": 22055,
      "training_step_time": 0.39665651321411133
    },
    {
      "epoch": 0.000134619140625,
      "model_forward_time": 0.11463284492492676,
      "step": 22056
    },
    {
      "epoch": 0.000134619140625,
      "step": 22056,
      "training_step_time": 0.3943154811859131
    },
    {
      "epoch": 0.000134625244140625,
      "model_forward_time": 0.11471128463745117,
      "step": 22057
    },
    {
      "epoch": 0.000134625244140625,
      "step": 22057,
      "training_step_time": 0.4020218849182129
    },
    {
      "epoch": 0.00013463134765625,
      "model_forward_time": 0.1151127815246582,
      "step": 22058
    },
    {
      "epoch": 0.00013463134765625,
      "step": 22058,
      "training_step_time": 0.4346742630004883
    },
    {
      "epoch": 0.000134637451171875,
      "model_forward_time": 0.11489486694335938,
      "step": 22059
    },
    {
      "epoch": 0.000134637451171875,
      "step": 22059,
      "training_step_time": 0.39597535133361816
    },
    {
      "epoch": 0.0001346435546875,
      "grad_norm": 0.15091539919376373,
      "learning_rate": 7.485666887634506e-05,
      "loss": 0.0456,
      "step": 22060
    },
    {
      "epoch": 0.0001346435546875,
      "model_forward_time": 0.11553263664245605,
      "step": 22060
    },
    {
      "epoch": 0.0001346435546875,
      "step": 22060,
      "training_step_time": 0.4418928623199463
    },
    {
      "epoch": 0.000134649658203125,
      "model_forward_time": 0.11536812782287598,
      "step": 22061
    },
    {
      "epoch": 0.000134649658203125,
      "step": 22061,
      "training_step_time": 0.43430495262145996
    },
    {
      "epoch": 0.00013465576171875,
      "model_forward_time": 0.11501264572143555,
      "step": 22062
    },
    {
      "epoch": 0.00013465576171875,
      "step": 22062,
      "training_step_time": 0.3956637382507324
    },
    {
      "epoch": 0.000134661865234375,
      "model_forward_time": 0.11494922637939453,
      "step": 22063
    },
    {
      "epoch": 0.000134661865234375,
      "step": 22063,
      "training_step_time": 0.3932013511657715
    },
    {
      "epoch": 0.00013466796875,
      "model_forward_time": 0.1150054931640625,
      "step": 22064
    },
    {
      "epoch": 0.00013466796875,
      "step": 22064,
      "training_step_time": 0.3923308849334717
    },
    {
      "epoch": 0.000134674072265625,
      "model_forward_time": 0.11540722846984863,
      "step": 22065
    },
    {
      "epoch": 0.000134674072265625,
      "step": 22065,
      "training_step_time": 0.49378061294555664
    },
    {
      "epoch": 0.00013468017578125,
      "model_forward_time": 0.11620068550109863,
      "step": 22066
    },
    {
      "epoch": 0.00013468017578125,
      "step": 22066,
      "training_step_time": 0.3992021083831787
    },
    {
      "epoch": 0.000134686279296875,
      "model_forward_time": 0.11486959457397461,
      "step": 22067
    },
    {
      "epoch": 0.000134686279296875,
      "step": 22067,
      "training_step_time": 0.4017500877380371
    },
    {
      "epoch": 0.0001346923828125,
      "model_forward_time": 0.11513733863830566,
      "step": 22068
    },
    {
      "epoch": 0.0001346923828125,
      "step": 22068,
      "training_step_time": 0.42047691345214844
    },
    {
      "epoch": 0.000134698486328125,
      "model_forward_time": 0.1157369613647461,
      "step": 22069
    },
    {
      "epoch": 0.000134698486328125,
      "step": 22069,
      "training_step_time": 0.3861827850341797
    },
    {
      "epoch": 0.00013470458984375,
      "grad_norm": 0.12285124510526657,
      "learning_rate": 7.483275388558607e-05,
      "loss": 0.0459,
      "step": 22070
    },
    {
      "epoch": 0.00013470458984375,
      "model_forward_time": 0.11607766151428223,
      "step": 22070
    },
    {
      "epoch": 0.00013470458984375,
      "step": 22070,
      "training_step_time": 0.3660461902618408
    },
    {
      "epoch": 0.000134710693359375,
      "model_forward_time": 0.11489653587341309,
      "step": 22071
    },
    {
      "epoch": 0.000134710693359375,
      "step": 22071,
      "training_step_time": 0.3862478733062744
    },
    {
      "epoch": 0.000134716796875,
      "model_forward_time": 0.11522197723388672,
      "step": 22072
    },
    {
      "epoch": 0.000134716796875,
      "step": 22072,
      "training_step_time": 0.38481807708740234
    },
    {
      "epoch": 0.000134722900390625,
      "model_forward_time": 0.11508965492248535,
      "step": 22073
    },
    {
      "epoch": 0.000134722900390625,
      "step": 22073,
      "training_step_time": 0.42171263694763184
    },
    {
      "epoch": 0.00013472900390625,
      "model_forward_time": 0.1153876781463623,
      "step": 22074
    },
    {
      "epoch": 0.00013472900390625,
      "step": 22074,
      "training_step_time": 0.5332343578338623
    },
    {
      "epoch": 0.000134735107421875,
      "model_forward_time": 0.11454510688781738,
      "step": 22075
    },
    {
      "epoch": 0.000134735107421875,
      "step": 22075,
      "training_step_time": 0.404461145401001
    },
    {
      "epoch": 0.0001347412109375,
      "model_forward_time": 0.1152796745300293,
      "step": 22076
    },
    {
      "epoch": 0.0001347412109375,
      "step": 22076,
      "training_step_time": 0.40105390548706055
    },
    {
      "epoch": 0.000134747314453125,
      "model_forward_time": 0.11526894569396973,
      "step": 22077
    },
    {
      "epoch": 0.000134747314453125,
      "step": 22077,
      "training_step_time": 0.39774632453918457
    },
    {
      "epoch": 0.00013475341796875,
      "model_forward_time": 0.11498475074768066,
      "step": 22078
    },
    {
      "epoch": 0.00013475341796875,
      "step": 22078,
      "training_step_time": 0.38872218132019043
    },
    {
      "epoch": 0.000134759521484375,
      "model_forward_time": 0.1148076057434082,
      "step": 22079
    },
    {
      "epoch": 0.000134759521484375,
      "step": 22079,
      "training_step_time": 0.40966343879699707
    },
    {
      "epoch": 0.000134765625,
      "grad_norm": 0.13604868948459625,
      "learning_rate": 7.480883135129211e-05,
      "loss": 0.0508,
      "step": 22080
    },
    {
      "epoch": 0.000134765625,
      "model_forward_time": 0.1149287223815918,
      "step": 22080
    },
    {
      "epoch": 0.000134765625,
      "step": 22080,
      "training_step_time": 0.7018928527832031
    },
    {
      "epoch": 0.000134771728515625,
      "model_forward_time": 0.11470484733581543,
      "step": 22081
    },
    {
      "epoch": 0.000134771728515625,
      "step": 22081,
      "training_step_time": 0.4527871608734131
    },
    {
      "epoch": 0.00013477783203125,
      "model_forward_time": 0.11484742164611816,
      "step": 22082
    },
    {
      "epoch": 0.00013477783203125,
      "step": 22082,
      "training_step_time": 0.4030160903930664
    },
    {
      "epoch": 0.000134783935546875,
      "model_forward_time": 0.11455059051513672,
      "step": 22083
    },
    {
      "epoch": 0.000134783935546875,
      "step": 22083,
      "training_step_time": 0.385373592376709
    },
    {
      "epoch": 0.0001347900390625,
      "model_forward_time": 0.1141960620880127,
      "step": 22084
    },
    {
      "epoch": 0.0001347900390625,
      "step": 22084,
      "training_step_time": 0.38356709480285645
    },
    {
      "epoch": 0.000134796142578125,
      "model_forward_time": 0.11444258689880371,
      "step": 22085
    },
    {
      "epoch": 0.000134796142578125,
      "step": 22085,
      "training_step_time": 0.38345980644226074
    },
    {
      "epoch": 0.00013480224609375,
      "model_forward_time": 0.11505889892578125,
      "step": 22086
    },
    {
      "epoch": 0.00013480224609375,
      "step": 22086,
      "training_step_time": 0.6999366283416748
    },
    {
      "epoch": 0.000134808349609375,
      "model_forward_time": 0.11468172073364258,
      "step": 22087
    },
    {
      "epoch": 0.000134808349609375,
      "step": 22087,
      "training_step_time": 0.38846898078918457
    },
    {
      "epoch": 0.000134814453125,
      "model_forward_time": 0.1148519515991211,
      "step": 22088
    },
    {
      "epoch": 0.000134814453125,
      "step": 22088,
      "training_step_time": 0.38683319091796875
    },
    {
      "epoch": 0.000134820556640625,
      "model_forward_time": 0.11433649063110352,
      "step": 22089
    },
    {
      "epoch": 0.000134820556640625,
      "step": 22089,
      "training_step_time": 0.5584840774536133
    },
    {
      "epoch": 0.00013482666015625,
      "grad_norm": 0.15311585366725922,
      "learning_rate": 7.478490128073022e-05,
      "loss": 0.0503,
      "step": 22090
    },
    {
      "epoch": 0.00013482666015625,
      "model_forward_time": 0.11377120018005371,
      "step": 22090
    },
    {
      "epoch": 0.00013482666015625,
      "step": 22090,
      "training_step_time": 0.3922693729400635
    },
    {
      "epoch": 0.000134832763671875,
      "model_forward_time": 0.11534738540649414,
      "step": 22091
    },
    {
      "epoch": 0.000134832763671875,
      "step": 22091,
      "training_step_time": 0.39461588859558105
    },
    {
      "epoch": 0.0001348388671875,
      "model_forward_time": 0.11576557159423828,
      "step": 22092
    },
    {
      "epoch": 0.0001348388671875,
      "step": 22092,
      "training_step_time": 0.504967451095581
    },
    {
      "epoch": 0.000134844970703125,
      "model_forward_time": 0.11445283889770508,
      "step": 22093
    },
    {
      "epoch": 0.000134844970703125,
      "step": 22093,
      "training_step_time": 0.3880951404571533
    },
    {
      "epoch": 0.00013485107421875,
      "model_forward_time": 0.11460280418395996,
      "step": 22094
    },
    {
      "epoch": 0.00013485107421875,
      "step": 22094,
      "training_step_time": 0.36847710609436035
    },
    {
      "epoch": 0.000134857177734375,
      "model_forward_time": 0.11478257179260254,
      "step": 22095
    },
    {
      "epoch": 0.000134857177734375,
      "step": 22095,
      "training_step_time": 0.44947385787963867
    },
    {
      "epoch": 0.00013486328125,
      "model_forward_time": 0.11491560935974121,
      "step": 22096
    },
    {
      "epoch": 0.00013486328125,
      "step": 22096,
      "training_step_time": 0.4308969974517822
    },
    {
      "epoch": 0.000134869384765625,
      "model_forward_time": 0.11457180976867676,
      "step": 22097
    },
    {
      "epoch": 0.000134869384765625,
      "step": 22097,
      "training_step_time": 0.3932771682739258
    },
    {
      "epoch": 0.00013487548828125,
      "model_forward_time": 0.11542463302612305,
      "step": 22098
    },
    {
      "epoch": 0.00013487548828125,
      "step": 22098,
      "training_step_time": 0.6180949211120605
    },
    {
      "epoch": 0.000134881591796875,
      "model_forward_time": 0.11470580101013184,
      "step": 22099
    },
    {
      "epoch": 0.000134881591796875,
      "step": 22099,
      "training_step_time": 0.4020524024963379
    },
    {
      "epoch": 0.0001348876953125,
      "grad_norm": 0.1447518765926361,
      "learning_rate": 7.476096368116974e-05,
      "loss": 0.0478,
      "step": 22100
    },
    {
      "epoch": 0.0001348876953125,
      "model_forward_time": 0.11423873901367188,
      "step": 22100
    },
    {
      "epoch": 0.0001348876953125,
      "step": 22100,
      "training_step_time": 0.45591115951538086
    },
    {
      "epoch": 0.000134893798828125,
      "model_forward_time": 0.11530637741088867,
      "step": 22101
    },
    {
      "epoch": 0.000134893798828125,
      "step": 22101,
      "training_step_time": 0.41922664642333984
    },
    {
      "epoch": 0.00013489990234375,
      "model_forward_time": 0.11460208892822266,
      "step": 22102
    },
    {
      "epoch": 0.00013489990234375,
      "step": 22102,
      "training_step_time": 0.40485525131225586
    },
    {
      "epoch": 0.000134906005859375,
      "model_forward_time": 0.1148233413696289,
      "step": 22103
    },
    {
      "epoch": 0.000134906005859375,
      "step": 22103,
      "training_step_time": 0.4661867618560791
    },
    {
      "epoch": 0.000134912109375,
      "model_forward_time": 0.11490011215209961,
      "step": 22104
    },
    {
      "epoch": 0.000134912109375,
      "step": 22104,
      "training_step_time": 0.48259592056274414
    },
    {
      "epoch": 0.000134918212890625,
      "model_forward_time": 0.11428475379943848,
      "step": 22105
    },
    {
      "epoch": 0.000134918212890625,
      "step": 22105,
      "training_step_time": 0.4003610610961914
    },
    {
      "epoch": 0.00013492431640625,
      "model_forward_time": 0.1147148609161377,
      "step": 22106
    },
    {
      "epoch": 0.00013492431640625,
      "step": 22106,
      "training_step_time": 0.3830440044403076
    },
    {
      "epoch": 0.000134930419921875,
      "model_forward_time": 0.11494970321655273,
      "step": 22107
    },
    {
      "epoch": 0.000134930419921875,
      "step": 22107,
      "training_step_time": 0.40286779403686523
    },
    {
      "epoch": 0.0001349365234375,
      "model_forward_time": 0.11434412002563477,
      "step": 22108
    },
    {
      "epoch": 0.0001349365234375,
      "step": 22108,
      "training_step_time": 0.4543297290802002
    },
    {
      "epoch": 0.000134942626953125,
      "model_forward_time": 0.11494135856628418,
      "step": 22109
    },
    {
      "epoch": 0.000134942626953125,
      "step": 22109,
      "training_step_time": 0.43528103828430176
    },
    {
      "epoch": 0.00013494873046875,
      "grad_norm": 0.14439503848552704,
      "learning_rate": 7.473701855988227e-05,
      "loss": 0.0526,
      "step": 22110
    },
    {
      "epoch": 0.00013494873046875,
      "model_forward_time": 0.11573529243469238,
      "step": 22110
    },
    {
      "epoch": 0.00013494873046875,
      "step": 22110,
      "training_step_time": 0.6172842979431152
    },
    {
      "epoch": 0.000134954833984375,
      "model_forward_time": 0.11546850204467773,
      "step": 22111
    },
    {
      "epoch": 0.000134954833984375,
      "step": 22111,
      "training_step_time": 0.4302713871002197
    },
    {
      "epoch": 0.0001349609375,
      "model_forward_time": 0.11471676826477051,
      "step": 22112
    },
    {
      "epoch": 0.0001349609375,
      "step": 22112,
      "training_step_time": 0.4429056644439697
    },
    {
      "epoch": 0.000134967041015625,
      "model_forward_time": 0.11410951614379883,
      "step": 22113
    },
    {
      "epoch": 0.000134967041015625,
      "step": 22113,
      "training_step_time": 0.3805816173553467
    },
    {
      "epoch": 0.00013497314453125,
      "model_forward_time": 0.11486387252807617,
      "step": 22114
    },
    {
      "epoch": 0.00013497314453125,
      "step": 22114,
      "training_step_time": 0.38627123832702637
    },
    {
      "epoch": 0.000134979248046875,
      "model_forward_time": 0.11435794830322266,
      "step": 22115
    },
    {
      "epoch": 0.000134979248046875,
      "step": 22115,
      "training_step_time": 0.42737627029418945
    },
    {
      "epoch": 0.0001349853515625,
      "model_forward_time": 0.11477184295654297,
      "step": 22116
    },
    {
      "epoch": 0.0001349853515625,
      "step": 22116,
      "training_step_time": 0.4958348274230957
    },
    {
      "epoch": 0.000134991455078125,
      "model_forward_time": 0.11432576179504395,
      "step": 22117
    },
    {
      "epoch": 0.000134991455078125,
      "step": 22117,
      "training_step_time": 0.39468908309936523
    },
    {
      "epoch": 0.00013499755859375,
      "model_forward_time": 0.11509227752685547,
      "step": 22118
    },
    {
      "epoch": 0.00013499755859375,
      "step": 22118,
      "training_step_time": 0.39634108543395996
    },
    {
      "epoch": 0.000135003662109375,
      "model_forward_time": 0.11515402793884277,
      "step": 22119
    },
    {
      "epoch": 0.000135003662109375,
      "step": 22119,
      "training_step_time": 0.39512205123901367
    },
    {
      "epoch": 0.000135009765625,
      "grad_norm": 0.16399478912353516,
      "learning_rate": 7.471306592414168e-05,
      "loss": 0.0458,
      "step": 22120
    },
    {
      "epoch": 0.000135009765625,
      "model_forward_time": 0.11559867858886719,
      "step": 22120
    },
    {
      "epoch": 0.000135009765625,
      "step": 22120,
      "training_step_time": 0.3794436454772949
    },
    {
      "epoch": 0.000135015869140625,
      "model_forward_time": 0.11560511589050293,
      "step": 22121
    },
    {
      "epoch": 0.000135015869140625,
      "step": 22121,
      "training_step_time": 0.3789384365081787
    },
    {
      "epoch": 0.00013502197265625,
      "model_forward_time": 0.11531949043273926,
      "step": 22122
    },
    {
      "epoch": 0.00013502197265625,
      "step": 22122,
      "training_step_time": 0.6375300884246826
    },
    {
      "epoch": 0.000135028076171875,
      "model_forward_time": 0.11469054222106934,
      "step": 22123
    },
    {
      "epoch": 0.000135028076171875,
      "step": 22123,
      "training_step_time": 0.39426445960998535
    },
    {
      "epoch": 0.0001350341796875,
      "model_forward_time": 0.11429572105407715,
      "step": 22124
    },
    {
      "epoch": 0.0001350341796875,
      "step": 22124,
      "training_step_time": 0.40425658226013184
    },
    {
      "epoch": 0.000135040283203125,
      "model_forward_time": 0.11508536338806152,
      "step": 22125
    },
    {
      "epoch": 0.000135040283203125,
      "step": 22125,
      "training_step_time": 0.3951904773712158
    },
    {
      "epoch": 0.00013504638671875,
      "model_forward_time": 0.11457967758178711,
      "step": 22126
    },
    {
      "epoch": 0.00013504638671875,
      "step": 22126,
      "training_step_time": 0.39972758293151855
    },
    {
      "epoch": 0.000135052490234375,
      "model_forward_time": 0.1138157844543457,
      "step": 22127
    },
    {
      "epoch": 0.000135052490234375,
      "step": 22127,
      "training_step_time": 0.39252424240112305
    },
    {
      "epoch": 0.00013505859375,
      "model_forward_time": 0.11499166488647461,
      "step": 22128
    },
    {
      "epoch": 0.00013505859375,
      "step": 22128,
      "training_step_time": 0.6388900279998779
    },
    {
      "epoch": 0.000135064697265625,
      "model_forward_time": 0.11504840850830078,
      "step": 22129
    },
    {
      "epoch": 0.000135064697265625,
      "step": 22129,
      "training_step_time": 0.42056727409362793
    },
    {
      "epoch": 0.00013507080078125,
      "grad_norm": 0.10844124853610992,
      "learning_rate": 7.468910578122418e-05,
      "loss": 0.0453,
      "step": 22130
    },
    {
      "epoch": 0.00013507080078125,
      "model_forward_time": 0.11481857299804688,
      "step": 22130
    },
    {
      "epoch": 0.00013507080078125,
      "step": 22130,
      "training_step_time": 0.4278285503387451
    },
    {
      "epoch": 0.000135076904296875,
      "model_forward_time": 0.11529946327209473,
      "step": 22131
    },
    {
      "epoch": 0.000135076904296875,
      "step": 22131,
      "training_step_time": 0.38947367668151855
    },
    {
      "epoch": 0.0001350830078125,
      "model_forward_time": 0.11491537094116211,
      "step": 22132
    },
    {
      "epoch": 0.0001350830078125,
      "step": 22132,
      "training_step_time": 0.39881110191345215
    },
    {
      "epoch": 0.000135089111328125,
      "model_forward_time": 0.1146697998046875,
      "step": 22133
    },
    {
      "epoch": 0.000135089111328125,
      "step": 22133,
      "training_step_time": 0.3965778350830078
    },
    {
      "epoch": 0.00013509521484375,
      "model_forward_time": 0.11413931846618652,
      "step": 22134
    },
    {
      "epoch": 0.00013509521484375,
      "step": 22134,
      "training_step_time": 0.610541820526123
    },
    {
      "epoch": 0.000135101318359375,
      "model_forward_time": 0.11456441879272461,
      "step": 22135
    },
    {
      "epoch": 0.000135101318359375,
      "step": 22135,
      "training_step_time": 0.4456751346588135
    },
    {
      "epoch": 0.000135107421875,
      "model_forward_time": 0.11533284187316895,
      "step": 22136
    },
    {
      "epoch": 0.000135107421875,
      "step": 22136,
      "training_step_time": 0.4271359443664551
    },
    {
      "epoch": 0.000135113525390625,
      "model_forward_time": 0.11559891700744629,
      "step": 22137
    },
    {
      "epoch": 0.000135113525390625,
      "step": 22137,
      "training_step_time": 0.41303062438964844
    },
    {
      "epoch": 0.00013511962890625,
      "model_forward_time": 0.1150522232055664,
      "step": 22138
    },
    {
      "epoch": 0.00013511962890625,
      "step": 22138,
      "training_step_time": 0.4187917709350586
    },
    {
      "epoch": 0.000135125732421875,
      "model_forward_time": 0.11379551887512207,
      "step": 22139
    },
    {
      "epoch": 0.000135125732421875,
      "step": 22139,
      "training_step_time": 0.3859267234802246
    },
    {
      "epoch": 0.0001351318359375,
      "grad_norm": 0.10213814675807953,
      "learning_rate": 7.466513813840825e-05,
      "loss": 0.048,
      "step": 22140
    },
    {
      "epoch": 0.0001351318359375,
      "model_forward_time": 0.11497378349304199,
      "step": 22140
    },
    {
      "epoch": 0.0001351318359375,
      "step": 22140,
      "training_step_time": 0.5815918445587158
    },
    {
      "epoch": 0.000135137939453125,
      "model_forward_time": 0.11478137969970703,
      "step": 22141
    },
    {
      "epoch": 0.000135137939453125,
      "step": 22141,
      "training_step_time": 0.4076955318450928
    },
    {
      "epoch": 0.00013514404296875,
      "model_forward_time": 0.11420154571533203,
      "step": 22142
    },
    {
      "epoch": 0.00013514404296875,
      "step": 22142,
      "training_step_time": 0.43489789962768555
    },
    {
      "epoch": 0.000135150146484375,
      "model_forward_time": 0.1143500804901123,
      "step": 22143
    },
    {
      "epoch": 0.000135150146484375,
      "step": 22143,
      "training_step_time": 0.4157736301422119
    },
    {
      "epoch": 0.00013515625,
      "model_forward_time": 0.11425924301147461,
      "step": 22144
    },
    {
      "epoch": 0.00013515625,
      "step": 22144,
      "training_step_time": 0.5052502155303955
    },
    {
      "epoch": 0.000135162353515625,
      "model_forward_time": 0.11432671546936035,
      "step": 22145
    },
    {
      "epoch": 0.000135162353515625,
      "step": 22145,
      "training_step_time": 0.388763427734375
    },
    {
      "epoch": 0.00013516845703125,
      "model_forward_time": 0.11461615562438965,
      "step": 22146
    },
    {
      "epoch": 0.00013516845703125,
      "step": 22146,
      "training_step_time": 0.3978078365325928
    },
    {
      "epoch": 0.000135174560546875,
      "model_forward_time": 0.11580443382263184,
      "step": 22147
    },
    {
      "epoch": 0.000135174560546875,
      "step": 22147,
      "training_step_time": 0.3897109031677246
    },
    {
      "epoch": 0.0001351806640625,
      "model_forward_time": 0.11544156074523926,
      "step": 22148
    },
    {
      "epoch": 0.0001351806640625,
      "step": 22148,
      "training_step_time": 0.3877065181732178
    },
    {
      "epoch": 0.000135186767578125,
      "model_forward_time": 0.12245821952819824,
      "step": 22149
    },
    {
      "epoch": 0.000135186767578125,
      "step": 22149,
      "training_step_time": 0.39272260665893555
    },
    {
      "epoch": 0.00013519287109375,
      "grad_norm": 0.13950549066066742,
      "learning_rate": 7.464116300297458e-05,
      "loss": 0.0492,
      "step": 22150
    },
    {
      "epoch": 0.00013519287109375,
      "model_forward_time": 0.11507463455200195,
      "step": 22150
    },
    {
      "epoch": 0.00013519287109375,
      "step": 22150,
      "training_step_time": 0.3988661766052246
    },
    {
      "epoch": 0.000135198974609375,
      "model_forward_time": 0.11529994010925293,
      "step": 22151
    },
    {
      "epoch": 0.000135198974609375,
      "step": 22151,
      "training_step_time": 0.4478917121887207
    },
    {
      "epoch": 0.000135205078125,
      "model_forward_time": 0.11528539657592773,
      "step": 22152
    },
    {
      "epoch": 0.000135205078125,
      "step": 22152,
      "training_step_time": 0.5357398986816406
    },
    {
      "epoch": 0.000135211181640625,
      "model_forward_time": 0.11500358581542969,
      "step": 22153
    },
    {
      "epoch": 0.000135211181640625,
      "step": 22153,
      "training_step_time": 0.46184825897216797
    },
    {
      "epoch": 0.00013521728515625,
      "model_forward_time": 0.11487555503845215,
      "step": 22154
    },
    {
      "epoch": 0.00013521728515625,
      "step": 22154,
      "training_step_time": 0.3925139904022217
    },
    {
      "epoch": 0.000135223388671875,
      "model_forward_time": 0.11472129821777344,
      "step": 22155
    },
    {
      "epoch": 0.000135223388671875,
      "step": 22155,
      "training_step_time": 0.3891475200653076
    },
    {
      "epoch": 0.0001352294921875,
      "model_forward_time": 0.11581850051879883,
      "step": 22156
    },
    {
      "epoch": 0.0001352294921875,
      "step": 22156,
      "training_step_time": 0.39002442359924316
    },
    {
      "epoch": 0.000135235595703125,
      "model_forward_time": 0.11488127708435059,
      "step": 22157
    },
    {
      "epoch": 0.000135235595703125,
      "step": 22157,
      "training_step_time": 0.4034600257873535
    },
    {
      "epoch": 0.00013524169921875,
      "model_forward_time": 0.11571049690246582,
      "step": 22158
    },
    {
      "epoch": 0.00013524169921875,
      "step": 22158,
      "training_step_time": 0.5176689624786377
    },
    {
      "epoch": 0.000135247802734375,
      "model_forward_time": 0.11517333984375,
      "step": 22159
    },
    {
      "epoch": 0.000135247802734375,
      "step": 22159,
      "training_step_time": 0.39462900161743164
    },
    {
      "epoch": 0.00013525390625,
      "grad_norm": 0.13313333690166473,
      "learning_rate": 7.461718038220621e-05,
      "loss": 0.051,
      "step": 22160
    },
    {
      "epoch": 0.00013525390625,
      "model_forward_time": 0.11502361297607422,
      "step": 22160
    },
    {
      "epoch": 0.00013525390625,
      "step": 22160,
      "training_step_time": 0.3823213577270508
    },
    {
      "epoch": 0.000135260009765625,
      "model_forward_time": 0.1153872013092041,
      "step": 22161
    },
    {
      "epoch": 0.000135260009765625,
      "step": 22161,
      "training_step_time": 0.38966965675354004
    },
    {
      "epoch": 0.00013526611328125,
      "model_forward_time": 0.11446738243103027,
      "step": 22162
    },
    {
      "epoch": 0.00013526611328125,
      "step": 22162,
      "training_step_time": 0.390700101852417
    },
    {
      "epoch": 0.000135272216796875,
      "model_forward_time": 0.11492562294006348,
      "step": 22163
    },
    {
      "epoch": 0.000135272216796875,
      "step": 22163,
      "training_step_time": 0.47701215744018555
    },
    {
      "epoch": 0.0001352783203125,
      "model_forward_time": 0.11533570289611816,
      "step": 22164
    },
    {
      "epoch": 0.0001352783203125,
      "step": 22164,
      "training_step_time": 0.461728572845459
    },
    {
      "epoch": 0.000135284423828125,
      "model_forward_time": 0.115753173828125,
      "step": 22165
    },
    {
      "epoch": 0.000135284423828125,
      "step": 22165,
      "training_step_time": 0.4631171226501465
    },
    {
      "epoch": 0.00013529052734375,
      "model_forward_time": 0.11553645133972168,
      "step": 22166
    },
    {
      "epoch": 0.00013529052734375,
      "step": 22166,
      "training_step_time": 0.4714522361755371
    },
    {
      "epoch": 0.000135296630859375,
      "model_forward_time": 0.11812019348144531,
      "step": 22167
    },
    {
      "epoch": 0.000135296630859375,
      "step": 22167,
      "training_step_time": 0.504072904586792
    },
    {
      "epoch": 0.000135302734375,
      "model_forward_time": 0.1148836612701416,
      "step": 22168
    },
    {
      "epoch": 0.000135302734375,
      "step": 22168,
      "training_step_time": 0.39518141746520996
    },
    {
      "epoch": 0.000135308837890625,
      "model_forward_time": 0.11472153663635254,
      "step": 22169
    },
    {
      "epoch": 0.000135308837890625,
      "step": 22169,
      "training_step_time": 0.3828239440917969
    },
    {
      "epoch": 0.00013531494140625,
      "grad_norm": 0.1537587195634842,
      "learning_rate": 7.45931902833884e-05,
      "loss": 0.0437,
      "step": 22170
    },
    {
      "epoch": 0.00013531494140625,
      "model_forward_time": 0.11504793167114258,
      "step": 22170
    },
    {
      "epoch": 0.00013531494140625,
      "step": 22170,
      "training_step_time": 0.38823461532592773
    },
    {
      "epoch": 0.000135321044921875,
      "model_forward_time": 0.11508035659790039,
      "step": 22171
    },
    {
      "epoch": 0.000135321044921875,
      "step": 22171,
      "training_step_time": 0.43868350982666016
    },
    {
      "epoch": 0.0001353271484375,
      "model_forward_time": 0.11561751365661621,
      "step": 22172
    },
    {
      "epoch": 0.0001353271484375,
      "step": 22172,
      "training_step_time": 0.3848710060119629
    },
    {
      "epoch": 0.000135333251953125,
      "model_forward_time": 0.11507034301757812,
      "step": 22173
    },
    {
      "epoch": 0.000135333251953125,
      "step": 22173,
      "training_step_time": 0.3860769271850586
    },
    {
      "epoch": 0.00013533935546875,
      "model_forward_time": 0.1170809268951416,
      "step": 22174
    },
    {
      "epoch": 0.00013533935546875,
      "step": 22174,
      "training_step_time": 0.39389681816101074
    },
    {
      "epoch": 0.000135345458984375,
      "model_forward_time": 0.11580061912536621,
      "step": 22175
    },
    {
      "epoch": 0.000135345458984375,
      "step": 22175,
      "training_step_time": 0.38608694076538086
    },
    {
      "epoch": 0.0001353515625,
      "model_forward_time": 0.11528491973876953,
      "step": 22176
    },
    {
      "epoch": 0.0001353515625,
      "step": 22176,
      "training_step_time": 0.38355588912963867
    },
    {
      "epoch": 0.000135357666015625,
      "model_forward_time": 0.11638951301574707,
      "step": 22177
    },
    {
      "epoch": 0.000135357666015625,
      "step": 22177,
      "training_step_time": 0.39303088188171387
    },
    {
      "epoch": 0.00013536376953125,
      "model_forward_time": 0.11510896682739258,
      "step": 22178
    },
    {
      "epoch": 0.00013536376953125,
      "step": 22178,
      "training_step_time": 0.4410233497619629
    },
    {
      "epoch": 0.000135369873046875,
      "model_forward_time": 0.11507987976074219,
      "step": 22179
    },
    {
      "epoch": 0.000135369873046875,
      "step": 22179,
      "training_step_time": 0.4030575752258301
    },
    {
      "epoch": 0.0001353759765625,
      "grad_norm": 0.16291643679141998,
      "learning_rate": 7.456919271380875e-05,
      "loss": 0.0508,
      "step": 22180
    },
    {
      "epoch": 0.0001353759765625,
      "model_forward_time": 0.11542367935180664,
      "step": 22180
    },
    {
      "epoch": 0.0001353759765625,
      "step": 22180,
      "training_step_time": 0.36843371391296387
    },
    {
      "epoch": 0.000135382080078125,
      "model_forward_time": 0.11537432670593262,
      "step": 22181
    },
    {
      "epoch": 0.000135382080078125,
      "step": 22181,
      "training_step_time": 0.44209766387939453
    },
    {
      "epoch": 0.00013538818359375,
      "model_forward_time": 0.11649608612060547,
      "step": 22182
    },
    {
      "epoch": 0.00013538818359375,
      "step": 22182,
      "training_step_time": 0.48747777938842773
    },
    {
      "epoch": 0.000135394287109375,
      "model_forward_time": 0.11501693725585938,
      "step": 22183
    },
    {
      "epoch": 0.000135394287109375,
      "step": 22183,
      "training_step_time": 0.4297966957092285
    },
    {
      "epoch": 0.000135400390625,
      "model_forward_time": 0.11511921882629395,
      "step": 22184
    },
    {
      "epoch": 0.000135400390625,
      "step": 22184,
      "training_step_time": 0.38898754119873047
    },
    {
      "epoch": 0.000135406494140625,
      "model_forward_time": 0.11544418334960938,
      "step": 22185
    },
    {
      "epoch": 0.000135406494140625,
      "step": 22185,
      "training_step_time": 0.4075148105621338
    },
    {
      "epoch": 0.00013541259765625,
      "model_forward_time": 0.11511874198913574,
      "step": 22186
    },
    {
      "epoch": 0.00013541259765625,
      "step": 22186,
      "training_step_time": 0.40764784812927246
    },
    {
      "epoch": 0.000135418701171875,
      "model_forward_time": 0.11471152305603027,
      "step": 22187
    },
    {
      "epoch": 0.000135418701171875,
      "step": 22187,
      "training_step_time": 0.43685197830200195
    },
    {
      "epoch": 0.0001354248046875,
      "model_forward_time": 0.11490821838378906,
      "step": 22188
    },
    {
      "epoch": 0.0001354248046875,
      "step": 22188,
      "training_step_time": 0.40044283866882324
    },
    {
      "epoch": 0.000135430908203125,
      "model_forward_time": 0.11588215827941895,
      "step": 22189
    },
    {
      "epoch": 0.000135430908203125,
      "step": 22189,
      "training_step_time": 0.39698195457458496
    },
    {
      "epoch": 0.00013543701171875,
      "grad_norm": 0.1087411418557167,
      "learning_rate": 7.454518768075704e-05,
      "loss": 0.0457,
      "step": 22190
    },
    {
      "epoch": 0.00013543701171875,
      "model_forward_time": 0.11478447914123535,
      "step": 22190
    },
    {
      "epoch": 0.00013543701171875,
      "step": 22190,
      "training_step_time": 0.3930530548095703
    },
    {
      "epoch": 0.000135443115234375,
      "model_forward_time": 0.11500668525695801,
      "step": 22191
    },
    {
      "epoch": 0.000135443115234375,
      "step": 22191,
      "training_step_time": 0.4032926559448242
    },
    {
      "epoch": 0.00013544921875,
      "model_forward_time": 0.11635971069335938,
      "step": 22192
    },
    {
      "epoch": 0.00013544921875,
      "step": 22192,
      "training_step_time": 0.4166102409362793
    },
    {
      "epoch": 0.000135455322265625,
      "model_forward_time": 0.11539101600646973,
      "step": 22193
    },
    {
      "epoch": 0.000135455322265625,
      "step": 22193,
      "training_step_time": 0.4051082134246826
    },
    {
      "epoch": 0.00013546142578125,
      "model_forward_time": 0.11504793167114258,
      "step": 22194
    },
    {
      "epoch": 0.00013546142578125,
      "step": 22194,
      "training_step_time": 0.5293838977813721
    },
    {
      "epoch": 0.000135467529296875,
      "model_forward_time": 0.1155557632446289,
      "step": 22195
    },
    {
      "epoch": 0.000135467529296875,
      "step": 22195,
      "training_step_time": 0.46701478958129883
    },
    {
      "epoch": 0.0001354736328125,
      "model_forward_time": 0.11565613746643066,
      "step": 22196
    },
    {
      "epoch": 0.0001354736328125,
      "step": 22196,
      "training_step_time": 0.4385802745819092
    },
    {
      "epoch": 0.000135479736328125,
      "model_forward_time": 0.115692138671875,
      "step": 22197
    },
    {
      "epoch": 0.000135479736328125,
      "step": 22197,
      "training_step_time": 0.404494047164917
    },
    {
      "epoch": 0.00013548583984375,
      "model_forward_time": 0.11681556701660156,
      "step": 22198
    },
    {
      "epoch": 0.00013548583984375,
      "step": 22198,
      "training_step_time": 0.38985204696655273
    },
    {
      "epoch": 0.000135491943359375,
      "model_forward_time": 0.11515092849731445,
      "step": 22199
    },
    {
      "epoch": 0.000135491943359375,
      "step": 22199,
      "training_step_time": 0.4031548500061035
    },
    {
      "epoch": 0.000135498046875,
      "grad_norm": 0.08320341259241104,
      "learning_rate": 7.452117519152542e-05,
      "loss": 0.0473,
      "step": 22200
    },
    {
      "epoch": 0.000135498046875,
      "model_forward_time": 0.11435937881469727,
      "step": 22200
    },
    {
      "epoch": 0.000135498046875,
      "step": 22200,
      "training_step_time": 0.4177591800689697
    },
    {
      "epoch": 0.000135504150390625,
      "model_forward_time": 0.11627912521362305,
      "step": 22201
    },
    {
      "epoch": 0.000135504150390625,
      "step": 22201,
      "training_step_time": 0.5066239833831787
    },
    {
      "epoch": 0.00013551025390625,
      "model_forward_time": 0.11578774452209473,
      "step": 22202
    },
    {
      "epoch": 0.00013551025390625,
      "step": 22202,
      "training_step_time": 0.39937376976013184
    },
    {
      "epoch": 0.000135516357421875,
      "model_forward_time": 0.11494088172912598,
      "step": 22203
    },
    {
      "epoch": 0.000135516357421875,
      "step": 22203,
      "training_step_time": 0.39165449142456055
    },
    {
      "epoch": 0.0001355224609375,
      "model_forward_time": 0.11543941497802734,
      "step": 22204
    },
    {
      "epoch": 0.0001355224609375,
      "step": 22204,
      "training_step_time": 0.3967292308807373
    },
    {
      "epoch": 0.000135528564453125,
      "model_forward_time": 0.1151735782623291,
      "step": 22205
    },
    {
      "epoch": 0.000135528564453125,
      "step": 22205,
      "training_step_time": 0.39348602294921875
    },
    {
      "epoch": 0.00013553466796875,
      "model_forward_time": 0.1164708137512207,
      "step": 22206
    },
    {
      "epoch": 0.00013553466796875,
      "step": 22206,
      "training_step_time": 0.420459508895874
    },
    {
      "epoch": 0.000135540771484375,
      "model_forward_time": 0.11539983749389648,
      "step": 22207
    },
    {
      "epoch": 0.000135540771484375,
      "step": 22207,
      "training_step_time": 0.501610279083252
    },
    {
      "epoch": 0.000135546875,
      "model_forward_time": 0.11545348167419434,
      "step": 22208
    },
    {
      "epoch": 0.000135546875,
      "step": 22208,
      "training_step_time": 0.4091455936431885
    },
    {
      "epoch": 0.000135552978515625,
      "model_forward_time": 0.11647462844848633,
      "step": 22209
    },
    {
      "epoch": 0.000135552978515625,
      "step": 22209,
      "training_step_time": 0.37899160385131836
    },
    {
      "epoch": 0.00013555908203125,
      "grad_norm": 0.12234555929899216,
      "learning_rate": 7.44971552534082e-05,
      "loss": 0.0425,
      "step": 22210
    },
    {
      "epoch": 0.00013555908203125,
      "model_forward_time": 0.11518263816833496,
      "step": 22210
    },
    {
      "epoch": 0.00013555908203125,
      "step": 22210,
      "training_step_time": 0.469851016998291
    },
    {
      "epoch": 0.000135565185546875,
      "model_forward_time": 0.11528897285461426,
      "step": 22211
    },
    {
      "epoch": 0.000135565185546875,
      "step": 22211,
      "training_step_time": 0.487687349319458
    },
    {
      "epoch": 0.0001355712890625,
      "model_forward_time": 0.11474752426147461,
      "step": 22212
    },
    {
      "epoch": 0.0001355712890625,
      "step": 22212,
      "training_step_time": 0.39874792098999023
    },
    {
      "epoch": 0.000135577392578125,
      "model_forward_time": 0.11515092849731445,
      "step": 22213
    },
    {
      "epoch": 0.000135577392578125,
      "step": 22213,
      "training_step_time": 0.4159126281738281
    },
    {
      "epoch": 0.00013558349609375,
      "model_forward_time": 0.1152796745300293,
      "step": 22214
    },
    {
      "epoch": 0.00013558349609375,
      "step": 22214,
      "training_step_time": 0.4261636734008789
    },
    {
      "epoch": 0.000135589599609375,
      "model_forward_time": 0.11573648452758789,
      "step": 22215
    },
    {
      "epoch": 0.000135589599609375,
      "step": 22215,
      "training_step_time": 0.4331786632537842
    },
    {
      "epoch": 0.000135595703125,
      "model_forward_time": 0.11470818519592285,
      "step": 22216
    },
    {
      "epoch": 0.000135595703125,
      "step": 22216,
      "training_step_time": 0.3992331027984619
    },
    {
      "epoch": 0.000135601806640625,
      "model_forward_time": 0.11509013175964355,
      "step": 22217
    },
    {
      "epoch": 0.000135601806640625,
      "step": 22217,
      "training_step_time": 0.38927459716796875
    },
    {
      "epoch": 0.00013560791015625,
      "model_forward_time": 0.1156766414642334,
      "step": 22218
    },
    {
      "epoch": 0.00013560791015625,
      "step": 22218,
      "training_step_time": 0.3936605453491211
    },
    {
      "epoch": 0.000135614013671875,
      "model_forward_time": 0.11490416526794434,
      "step": 22219
    },
    {
      "epoch": 0.000135614013671875,
      "step": 22219,
      "training_step_time": 0.42074131965637207
    },
    {
      "epoch": 0.0001356201171875,
      "grad_norm": 0.16518570482730865,
      "learning_rate": 7.447312787370203e-05,
      "loss": 0.0527,
      "step": 22220
    },
    {
      "epoch": 0.0001356201171875,
      "model_forward_time": 0.11464905738830566,
      "step": 22220
    },
    {
      "epoch": 0.0001356201171875,
      "step": 22220,
      "training_step_time": 0.39131784439086914
    },
    {
      "epoch": 0.000135626220703125,
      "model_forward_time": 0.11533045768737793,
      "step": 22221
    },
    {
      "epoch": 0.000135626220703125,
      "step": 22221,
      "training_step_time": 0.39602065086364746
    },
    {
      "epoch": 0.00013563232421875,
      "model_forward_time": 0.11574625968933105,
      "step": 22222
    },
    {
      "epoch": 0.00013563232421875,
      "step": 22222,
      "training_step_time": 0.38759374618530273
    },
    {
      "epoch": 0.000135638427734375,
      "model_forward_time": 0.11507034301757812,
      "step": 22223
    },
    {
      "epoch": 0.000135638427734375,
      "step": 22223,
      "training_step_time": 0.43546223640441895
    },
    {
      "epoch": 0.00013564453125,
      "model_forward_time": 0.11774492263793945,
      "step": 22224
    },
    {
      "epoch": 0.00013564453125,
      "step": 22224,
      "training_step_time": 0.4684023857116699
    },
    {
      "epoch": 0.000135650634765625,
      "model_forward_time": 0.11497807502746582,
      "step": 22225
    },
    {
      "epoch": 0.000135650634765625,
      "step": 22225,
      "training_step_time": 0.4215734004974365
    },
    {
      "epoch": 0.00013565673828125,
      "model_forward_time": 0.11487197875976562,
      "step": 22226
    },
    {
      "epoch": 0.00013565673828125,
      "step": 22226,
      "training_step_time": 0.42487621307373047
    },
    {
      "epoch": 0.000135662841796875,
      "model_forward_time": 0.11562156677246094,
      "step": 22227
    },
    {
      "epoch": 0.000135662841796875,
      "step": 22227,
      "training_step_time": 0.3917992115020752
    },
    {
      "epoch": 0.0001356689453125,
      "model_forward_time": 0.1154940128326416,
      "step": 22228
    },
    {
      "epoch": 0.0001356689453125,
      "step": 22228,
      "training_step_time": 0.47005319595336914
    },
    {
      "epoch": 0.000135675048828125,
      "model_forward_time": 0.11501073837280273,
      "step": 22229
    },
    {
      "epoch": 0.000135675048828125,
      "step": 22229,
      "training_step_time": 0.4833698272705078
    },
    {
      "epoch": 0.00013568115234375,
      "grad_norm": 0.14984586834907532,
      "learning_rate": 7.444909305970578e-05,
      "loss": 0.0472,
      "step": 22230
    },
    {
      "epoch": 0.00013568115234375,
      "model_forward_time": 0.11986446380615234,
      "step": 22230
    },
    {
      "epoch": 0.00013568115234375,
      "step": 22230,
      "training_step_time": 0.398500919342041
    },
    {
      "epoch": 0.000135687255859375,
      "model_forward_time": 0.11483001708984375,
      "step": 22231
    },
    {
      "epoch": 0.000135687255859375,
      "step": 22231,
      "training_step_time": 0.39254260063171387
    },
    {
      "epoch": 0.000135693359375,
      "model_forward_time": 0.11540699005126953,
      "step": 22232
    },
    {
      "epoch": 0.000135693359375,
      "step": 22232,
      "training_step_time": 0.4022691249847412
    },
    {
      "epoch": 0.000135699462890625,
      "model_forward_time": 0.11598825454711914,
      "step": 22233
    },
    {
      "epoch": 0.000135699462890625,
      "step": 22233,
      "training_step_time": 0.45116209983825684
    },
    {
      "epoch": 0.00013570556640625,
      "model_forward_time": 0.11511468887329102,
      "step": 22234
    },
    {
      "epoch": 0.00013570556640625,
      "step": 22234,
      "training_step_time": 0.3977479934692383
    },
    {
      "epoch": 0.000135711669921875,
      "model_forward_time": 0.11496806144714355,
      "step": 22235
    },
    {
      "epoch": 0.000135711669921875,
      "step": 22235,
      "training_step_time": 0.4010136127471924
    },
    {
      "epoch": 0.0001357177734375,
      "model_forward_time": 0.1155087947845459,
      "step": 22236
    },
    {
      "epoch": 0.0001357177734375,
      "step": 22236,
      "training_step_time": 0.4107069969177246
    },
    {
      "epoch": 0.000135723876953125,
      "model_forward_time": 0.11482357978820801,
      "step": 22237
    },
    {
      "epoch": 0.000135723876953125,
      "step": 22237,
      "training_step_time": 0.3943750858306885
    },
    {
      "epoch": 0.00013572998046875,
      "model_forward_time": 0.11611819267272949,
      "step": 22238
    },
    {
      "epoch": 0.00013572998046875,
      "step": 22238,
      "training_step_time": 0.4001331329345703
    },
    {
      "epoch": 0.000135736083984375,
      "model_forward_time": 0.11494183540344238,
      "step": 22239
    },
    {
      "epoch": 0.000135736083984375,
      "step": 22239,
      "training_step_time": 0.47863173484802246
    },
    {
      "epoch": 0.0001357421875,
      "grad_norm": 0.15640617907047272,
      "learning_rate": 7.44250508187206e-05,
      "loss": 0.0461,
      "step": 22240
    },
    {
      "epoch": 0.0001357421875,
      "model_forward_time": 0.11524844169616699,
      "step": 22240
    },
    {
      "epoch": 0.0001357421875,
      "step": 22240,
      "training_step_time": 0.4936835765838623
    },
    {
      "epoch": 0.000135748291015625,
      "model_forward_time": 0.11516904830932617,
      "step": 22241
    },
    {
      "epoch": 0.000135748291015625,
      "step": 22241,
      "training_step_time": 0.3894951343536377
    },
    {
      "epoch": 0.00013575439453125,
      "model_forward_time": 0.11546826362609863,
      "step": 22242
    },
    {
      "epoch": 0.00013575439453125,
      "step": 22242,
      "training_step_time": 0.45406413078308105
    },
    {
      "epoch": 0.000135760498046875,
      "model_forward_time": 0.11527037620544434,
      "step": 22243
    },
    {
      "epoch": 0.000135760498046875,
      "step": 22243,
      "training_step_time": 0.43868350982666016
    },
    {
      "epoch": 0.0001357666015625,
      "model_forward_time": 0.1144554615020752,
      "step": 22244
    },
    {
      "epoch": 0.0001357666015625,
      "step": 22244,
      "training_step_time": 0.39014101028442383
    },
    {
      "epoch": 0.000135772705078125,
      "model_forward_time": 0.11530852317810059,
      "step": 22245
    },
    {
      "epoch": 0.000135772705078125,
      "step": 22245,
      "training_step_time": 0.3846766948699951
    },
    {
      "epoch": 0.00013577880859375,
      "model_forward_time": 0.1154327392578125,
      "step": 22246
    },
    {
      "epoch": 0.00013577880859375,
      "step": 22246,
      "training_step_time": 0.4211561679840088
    },
    {
      "epoch": 0.000135784912109375,
      "model_forward_time": 0.11507105827331543,
      "step": 22247
    },
    {
      "epoch": 0.000135784912109375,
      "step": 22247,
      "training_step_time": 0.39826130867004395
    },
    {
      "epoch": 0.000135791015625,
      "model_forward_time": 0.11562561988830566,
      "step": 22248
    },
    {
      "epoch": 0.000135791015625,
      "step": 22248,
      "training_step_time": 0.392733097076416
    },
    {
      "epoch": 0.000135797119140625,
      "model_forward_time": 0.11588501930236816,
      "step": 22249
    },
    {
      "epoch": 0.000135797119140625,
      "step": 22249,
      "training_step_time": 0.3856635093688965
    },
    {
      "epoch": 0.00013580322265625,
      "grad_norm": 0.18253982067108154,
      "learning_rate": 7.440100115804991e-05,
      "loss": 0.0501,
      "step": 22250
    },
    {
      "epoch": 0.00013580322265625,
      "model_forward_time": 0.11572456359863281,
      "step": 22250
    },
    {
      "epoch": 0.00013580322265625,
      "step": 22250,
      "training_step_time": 0.39165496826171875
    },
    {
      "epoch": 0.000135809326171875,
      "model_forward_time": 0.11565113067626953,
      "step": 22251
    },
    {
      "epoch": 0.000135809326171875,
      "step": 22251,
      "training_step_time": 0.4268796443939209
    },
    {
      "epoch": 0.0001358154296875,
      "model_forward_time": 0.11673736572265625,
      "step": 22252
    },
    {
      "epoch": 0.0001358154296875,
      "step": 22252,
      "training_step_time": 0.39525389671325684
    },
    {
      "epoch": 0.000135821533203125,
      "model_forward_time": 0.11565160751342773,
      "step": 22253
    },
    {
      "epoch": 0.000135821533203125,
      "step": 22253,
      "training_step_time": 0.41484737396240234
    },
    {
      "epoch": 0.00013582763671875,
      "model_forward_time": 0.11543917655944824,
      "step": 22254
    },
    {
      "epoch": 0.00013582763671875,
      "step": 22254,
      "training_step_time": 0.4685955047607422
    },
    {
      "epoch": 0.000135833740234375,
      "model_forward_time": 0.11504411697387695,
      "step": 22255
    },
    {
      "epoch": 0.000135833740234375,
      "step": 22255,
      "training_step_time": 0.4830808639526367
    },
    {
      "epoch": 0.00013583984375,
      "model_forward_time": 0.11519289016723633,
      "step": 22256
    },
    {
      "epoch": 0.00013583984375,
      "step": 22256,
      "training_step_time": 0.40964579582214355
    },
    {
      "epoch": 0.000135845947265625,
      "model_forward_time": 0.11549854278564453,
      "step": 22257
    },
    {
      "epoch": 0.000135845947265625,
      "step": 22257,
      "training_step_time": 0.39045143127441406
    },
    {
      "epoch": 0.00013585205078125,
      "model_forward_time": 0.11593079566955566,
      "step": 22258
    },
    {
      "epoch": 0.00013585205078125,
      "step": 22258,
      "training_step_time": 0.4429478645324707
    },
    {
      "epoch": 0.000135858154296875,
      "model_forward_time": 0.11478924751281738,
      "step": 22259
    },
    {
      "epoch": 0.000135858154296875,
      "step": 22259,
      "training_step_time": 0.4102044105529785
    },
    {
      "epoch": 0.0001358642578125,
      "grad_norm": 0.12098889797925949,
      "learning_rate": 7.437694408499933e-05,
      "loss": 0.0503,
      "step": 22260
    },
    {
      "epoch": 0.0001358642578125,
      "model_forward_time": 0.11493515968322754,
      "step": 22260
    },
    {
      "epoch": 0.0001358642578125,
      "step": 22260,
      "training_step_time": 0.397899866104126
    },
    {
      "epoch": 0.000135870361328125,
      "model_forward_time": 0.1155853271484375,
      "step": 22261
    },
    {
      "epoch": 0.000135870361328125,
      "step": 22261,
      "training_step_time": 0.40793561935424805
    },
    {
      "epoch": 0.00013587646484375,
      "model_forward_time": 0.11624336242675781,
      "step": 22262
    },
    {
      "epoch": 0.00013587646484375,
      "step": 22262,
      "training_step_time": 0.37571287155151367
    },
    {
      "epoch": 0.000135882568359375,
      "model_forward_time": 0.11469912528991699,
      "step": 22263
    },
    {
      "epoch": 0.000135882568359375,
      "step": 22263,
      "training_step_time": 0.3890104293823242
    },
    {
      "epoch": 0.000135888671875,
      "model_forward_time": 0.11676883697509766,
      "step": 22264
    },
    {
      "epoch": 0.000135888671875,
      "step": 22264,
      "training_step_time": 0.38929128646850586
    },
    {
      "epoch": 0.000135894775390625,
      "model_forward_time": 0.11563825607299805,
      "step": 22265
    },
    {
      "epoch": 0.000135894775390625,
      "step": 22265,
      "training_step_time": 0.38969969749450684
    },
    {
      "epoch": 0.00013590087890625,
      "model_forward_time": 0.11535859107971191,
      "step": 22266
    },
    {
      "epoch": 0.00013590087890625,
      "step": 22266,
      "training_step_time": 0.6059670448303223
    },
    {
      "epoch": 0.000135906982421875,
      "model_forward_time": 0.11741447448730469,
      "step": 22267
    },
    {
      "epoch": 0.000135906982421875,
      "step": 22267,
      "training_step_time": 0.43336939811706543
    },
    {
      "epoch": 0.0001359130859375,
      "model_forward_time": 0.11473965644836426,
      "step": 22268
    },
    {
      "epoch": 0.0001359130859375,
      "step": 22268,
      "training_step_time": 0.5114929676055908
    },
    {
      "epoch": 0.000135919189453125,
      "model_forward_time": 0.11452317237854004,
      "step": 22269
    },
    {
      "epoch": 0.000135919189453125,
      "step": 22269,
      "training_step_time": 0.48238372802734375
    },
    {
      "epoch": 0.00013592529296875,
      "grad_norm": 0.1842634677886963,
      "learning_rate": 7.435287960687675e-05,
      "loss": 0.0461,
      "step": 22270
    },
    {
      "epoch": 0.00013592529296875,
      "model_forward_time": 0.1149146556854248,
      "step": 22270
    },
    {
      "epoch": 0.00013592529296875,
      "step": 22270,
      "training_step_time": 0.4019331932067871
    },
    {
      "epoch": 0.000135931396484375,
      "model_forward_time": 0.11448216438293457,
      "step": 22271
    },
    {
      "epoch": 0.000135931396484375,
      "step": 22271,
      "training_step_time": 0.37870192527770996
    },
    {
      "epoch": 0.0001359375,
      "model_forward_time": 0.11489725112915039,
      "step": 22272
    },
    {
      "epoch": 0.0001359375,
      "step": 22272,
      "training_step_time": 0.4339416027069092
    },
    {
      "epoch": 0.000135943603515625,
      "model_forward_time": 0.11500716209411621,
      "step": 22273
    },
    {
      "epoch": 0.000135943603515625,
      "step": 22273,
      "training_step_time": 0.4007434844970703
    },
    {
      "epoch": 0.00013594970703125,
      "model_forward_time": 0.11551141738891602,
      "step": 22274
    },
    {
      "epoch": 0.00013594970703125,
      "step": 22274,
      "training_step_time": 0.38747715950012207
    },
    {
      "epoch": 0.000135955810546875,
      "model_forward_time": 0.1157827377319336,
      "step": 22275
    },
    {
      "epoch": 0.000135955810546875,
      "step": 22275,
      "training_step_time": 0.4220755100250244
    },
    {
      "epoch": 0.0001359619140625,
      "model_forward_time": 0.11485815048217773,
      "step": 22276
    },
    {
      "epoch": 0.0001359619140625,
      "step": 22276,
      "training_step_time": 0.38681864738464355
    },
    {
      "epoch": 0.000135968017578125,
      "model_forward_time": 0.11601996421813965,
      "step": 22277
    },
    {
      "epoch": 0.000135968017578125,
      "step": 22277,
      "training_step_time": 0.39783763885498047
    },
    {
      "epoch": 0.00013597412109375,
      "model_forward_time": 0.1157236099243164,
      "step": 22278
    },
    {
      "epoch": 0.00013597412109375,
      "step": 22278,
      "training_step_time": 0.39917778968811035
    },
    {
      "epoch": 0.000135980224609375,
      "model_forward_time": 0.11505532264709473,
      "step": 22279
    },
    {
      "epoch": 0.000135980224609375,
      "step": 22279,
      "training_step_time": 0.39509081840515137
    },
    {
      "epoch": 0.000135986328125,
      "grad_norm": 0.1597026288509369,
      "learning_rate": 7.432880773099237e-05,
      "loss": 0.046,
      "step": 22280
    },
    {
      "epoch": 0.000135986328125,
      "model_forward_time": 0.11565661430358887,
      "step": 22280
    },
    {
      "epoch": 0.000135986328125,
      "step": 22280,
      "training_step_time": 0.39718103408813477
    },
    {
      "epoch": 0.000135992431640625,
      "model_forward_time": 0.11565947532653809,
      "step": 22281
    },
    {
      "epoch": 0.000135992431640625,
      "step": 22281,
      "training_step_time": 0.4572722911834717
    },
    {
      "epoch": 0.00013599853515625,
      "model_forward_time": 0.11517024040222168,
      "step": 22282
    },
    {
      "epoch": 0.00013599853515625,
      "step": 22282,
      "training_step_time": 0.45359182357788086
    },
    {
      "epoch": 0.000136004638671875,
      "model_forward_time": 0.1157834529876709,
      "step": 22283
    },
    {
      "epoch": 0.000136004638671875,
      "step": 22283,
      "training_step_time": 0.3995819091796875
    },
    {
      "epoch": 0.0001360107421875,
      "model_forward_time": 0.11965274810791016,
      "step": 22284
    },
    {
      "epoch": 0.0001360107421875,
      "step": 22284,
      "training_step_time": 0.48595094680786133
    },
    {
      "epoch": 0.000136016845703125,
      "model_forward_time": 0.11822915077209473,
      "step": 22285
    },
    {
      "epoch": 0.000136016845703125,
      "step": 22285,
      "training_step_time": 0.3971061706542969
    },
    {
      "epoch": 0.00013602294921875,
      "model_forward_time": 0.11618232727050781,
      "step": 22286
    },
    {
      "epoch": 0.00013602294921875,
      "step": 22286,
      "training_step_time": 0.38639402389526367
    },
    {
      "epoch": 0.000136029052734375,
      "model_forward_time": 0.11584949493408203,
      "step": 22287
    },
    {
      "epoch": 0.000136029052734375,
      "step": 22287,
      "training_step_time": 0.42067670822143555
    },
    {
      "epoch": 0.00013603515625,
      "model_forward_time": 0.11547064781188965,
      "step": 22288
    },
    {
      "epoch": 0.00013603515625,
      "step": 22288,
      "training_step_time": 0.4582483768463135
    },
    {
      "epoch": 0.000136041259765625,
      "model_forward_time": 0.11531710624694824,
      "step": 22289
    },
    {
      "epoch": 0.000136041259765625,
      "step": 22289,
      "training_step_time": 0.39787793159484863
    },
    {
      "epoch": 0.00013604736328125,
      "grad_norm": 0.10871325433254242,
      "learning_rate": 7.430472846465856e-05,
      "loss": 0.0415,
      "step": 22290
    },
    {
      "epoch": 0.00013604736328125,
      "model_forward_time": 0.11603927612304688,
      "step": 22290
    },
    {
      "epoch": 0.00013604736328125,
      "step": 22290,
      "training_step_time": 0.542949914932251
    },
    {
      "epoch": 0.000136053466796875,
      "model_forward_time": 0.11496376991271973,
      "step": 22291
    },
    {
      "epoch": 0.000136053466796875,
      "step": 22291,
      "training_step_time": 0.38904690742492676
    },
    {
      "epoch": 0.0001360595703125,
      "model_forward_time": 0.11489701271057129,
      "step": 22292
    },
    {
      "epoch": 0.0001360595703125,
      "step": 22292,
      "training_step_time": 0.3784205913543701
    },
    {
      "epoch": 0.000136065673828125,
      "model_forward_time": 0.11584043502807617,
      "step": 22293
    },
    {
      "epoch": 0.000136065673828125,
      "step": 22293,
      "training_step_time": 0.3960106372833252
    },
    {
      "epoch": 0.00013607177734375,
      "model_forward_time": 0.11508369445800781,
      "step": 22294
    },
    {
      "epoch": 0.00013607177734375,
      "step": 22294,
      "training_step_time": 0.5774233341217041
    },
    {
      "epoch": 0.000136077880859375,
      "model_forward_time": 0.1145331859588623,
      "step": 22295
    },
    {
      "epoch": 0.000136077880859375,
      "step": 22295,
      "training_step_time": 0.4141864776611328
    },
    {
      "epoch": 0.000136083984375,
      "model_forward_time": 0.11541032791137695,
      "step": 22296
    },
    {
      "epoch": 0.000136083984375,
      "step": 22296,
      "training_step_time": 0.45891666412353516
    },
    {
      "epoch": 0.000136090087890625,
      "model_forward_time": 0.11480951309204102,
      "step": 22297
    },
    {
      "epoch": 0.000136090087890625,
      "step": 22297,
      "training_step_time": 0.39986300468444824
    },
    {
      "epoch": 0.00013609619140625,
      "model_forward_time": 0.11511731147766113,
      "step": 22298
    },
    {
      "epoch": 0.00013609619140625,
      "step": 22298,
      "training_step_time": 0.41820549964904785
    },
    {
      "epoch": 0.000136102294921875,
      "model_forward_time": 0.1150820255279541,
      "step": 22299
    },
    {
      "epoch": 0.000136102294921875,
      "step": 22299,
      "training_step_time": 0.41083526611328125
    },
    {
      "epoch": 0.0001361083984375,
      "grad_norm": 0.16504740715026855,
      "learning_rate": 7.428064181518997e-05,
      "loss": 0.0442,
      "step": 22300
    },
    {
      "epoch": 0.0001361083984375,
      "model_forward_time": 0.1152501106262207,
      "step": 22300
    },
    {
      "epoch": 0.0001361083984375,
      "step": 22300,
      "training_step_time": 0.40647268295288086
    },
    {
      "epoch": 0.000136114501953125,
      "model_forward_time": 0.11539673805236816,
      "step": 22301
    },
    {
      "epoch": 0.000136114501953125,
      "step": 22301,
      "training_step_time": 0.4564194679260254
    },
    {
      "epoch": 0.00013612060546875,
      "model_forward_time": 0.11495733261108398,
      "step": 22302
    },
    {
      "epoch": 0.00013612060546875,
      "step": 22302,
      "training_step_time": 0.4219233989715576
    },
    {
      "epoch": 0.000136126708984375,
      "model_forward_time": 0.11500763893127441,
      "step": 22303
    },
    {
      "epoch": 0.000136126708984375,
      "step": 22303,
      "training_step_time": 0.39308714866638184
    },
    {
      "epoch": 0.0001361328125,
      "model_forward_time": 0.11518239974975586,
      "step": 22304
    },
    {
      "epoch": 0.0001361328125,
      "step": 22304,
      "training_step_time": 0.38936924934387207
    },
    {
      "epoch": 0.000136138916015625,
      "model_forward_time": 0.11530566215515137,
      "step": 22305
    },
    {
      "epoch": 0.000136138916015625,
      "step": 22305,
      "training_step_time": 0.3880622386932373
    },
    {
      "epoch": 0.00013614501953125,
      "model_forward_time": 0.11444807052612305,
      "step": 22306
    },
    {
      "epoch": 0.00013614501953125,
      "step": 22306,
      "training_step_time": 0.5549321174621582
    },
    {
      "epoch": 0.000136151123046875,
      "model_forward_time": 0.11493563652038574,
      "step": 22307
    },
    {
      "epoch": 0.000136151123046875,
      "step": 22307,
      "training_step_time": 0.3922421932220459
    },
    {
      "epoch": 0.0001361572265625,
      "model_forward_time": 0.11516308784484863,
      "step": 22308
    },
    {
      "epoch": 0.0001361572265625,
      "step": 22308,
      "training_step_time": 0.4076871871948242
    },
    {
      "epoch": 0.000136163330078125,
      "model_forward_time": 0.11546969413757324,
      "step": 22309
    },
    {
      "epoch": 0.000136163330078125,
      "step": 22309,
      "training_step_time": 0.4432239532470703
    },
    {
      "epoch": 0.00013616943359375,
      "grad_norm": 0.14952990412712097,
      "learning_rate": 7.42565477899035e-05,
      "loss": 0.0468,
      "step": 22310
    },
    {
      "epoch": 0.00013616943359375,
      "model_forward_time": 0.1158895492553711,
      "step": 22310
    },
    {
      "epoch": 0.00013616943359375,
      "step": 22310,
      "training_step_time": 0.38826465606689453
    },
    {
      "epoch": 0.000136175537109375,
      "model_forward_time": 0.11481952667236328,
      "step": 22311
    },
    {
      "epoch": 0.000136175537109375,
      "step": 22311,
      "training_step_time": 0.4950273036956787
    },
    {
      "epoch": 0.000136181640625,
      "model_forward_time": 0.11447000503540039,
      "step": 22312
    },
    {
      "epoch": 0.000136181640625,
      "step": 22312,
      "training_step_time": 0.5601000785827637
    },
    {
      "epoch": 0.000136187744140625,
      "model_forward_time": 0.11474919319152832,
      "step": 22313
    },
    {
      "epoch": 0.000136187744140625,
      "step": 22313,
      "training_step_time": 0.4016599655151367
    },
    {
      "epoch": 0.00013619384765625,
      "model_forward_time": 0.11469817161560059,
      "step": 22314
    },
    {
      "epoch": 0.00013619384765625,
      "step": 22314,
      "training_step_time": 0.4395334720611572
    },
    {
      "epoch": 0.000136199951171875,
      "model_forward_time": 0.11454010009765625,
      "step": 22315
    },
    {
      "epoch": 0.000136199951171875,
      "step": 22315,
      "training_step_time": 0.390636682510376
    },
    {
      "epoch": 0.0001362060546875,
      "model_forward_time": 0.11459207534790039,
      "step": 22316
    },
    {
      "epoch": 0.0001362060546875,
      "step": 22316,
      "training_step_time": 0.39466261863708496
    },
    {
      "epoch": 0.000136212158203125,
      "model_forward_time": 0.11514759063720703,
      "step": 22317
    },
    {
      "epoch": 0.000136212158203125,
      "step": 22317,
      "training_step_time": 0.3875586986541748
    },
    {
      "epoch": 0.00013621826171875,
      "model_forward_time": 0.11517477035522461,
      "step": 22318
    },
    {
      "epoch": 0.00013621826171875,
      "step": 22318,
      "training_step_time": 0.6049635410308838
    },
    {
      "epoch": 0.000136224365234375,
      "model_forward_time": 0.11502218246459961,
      "step": 22319
    },
    {
      "epoch": 0.000136224365234375,
      "step": 22319,
      "training_step_time": 0.38608527183532715
    },
    {
      "epoch": 0.00013623046875,
      "grad_norm": 0.13525298237800598,
      "learning_rate": 7.423244639611826e-05,
      "loss": 0.0503,
      "step": 22320
    },
    {
      "epoch": 0.00013623046875,
      "model_forward_time": 0.11512064933776855,
      "step": 22320
    },
    {
      "epoch": 0.00013623046875,
      "step": 22320,
      "training_step_time": 0.39186763763427734
    },
    {
      "epoch": 0.000136236572265625,
      "model_forward_time": 0.11514163017272949,
      "step": 22321
    },
    {
      "epoch": 0.000136236572265625,
      "step": 22321,
      "training_step_time": 0.4073307514190674
    },
    {
      "epoch": 0.00013624267578125,
      "model_forward_time": 0.1160421371459961,
      "step": 22322
    },
    {
      "epoch": 0.00013624267578125,
      "step": 22322,
      "training_step_time": 0.38660311698913574
    },
    {
      "epoch": 0.000136248779296875,
      "model_forward_time": 0.11502814292907715,
      "step": 22323
    },
    {
      "epoch": 0.000136248779296875,
      "step": 22323,
      "training_step_time": 0.4918787479400635
    },
    {
      "epoch": 0.0001362548828125,
      "model_forward_time": 0.11510610580444336,
      "step": 22324
    },
    {
      "epoch": 0.0001362548828125,
      "step": 22324,
      "training_step_time": 0.3940620422363281
    },
    {
      "epoch": 0.000136260986328125,
      "model_forward_time": 0.11497187614440918,
      "step": 22325
    },
    {
      "epoch": 0.000136260986328125,
      "step": 22325,
      "training_step_time": 0.4340991973876953
    },
    {
      "epoch": 0.00013626708984375,
      "model_forward_time": 0.11506962776184082,
      "step": 22326
    },
    {
      "epoch": 0.00013626708984375,
      "step": 22326,
      "training_step_time": 0.48711657524108887
    },
    {
      "epoch": 0.000136273193359375,
      "model_forward_time": 0.1150665283203125,
      "step": 22327
    },
    {
      "epoch": 0.000136273193359375,
      "step": 22327,
      "training_step_time": 0.4419710636138916
    },
    {
      "epoch": 0.000136279296875,
      "model_forward_time": 0.11436319351196289,
      "step": 22328
    },
    {
      "epoch": 0.000136279296875,
      "step": 22328,
      "training_step_time": 0.457653284072876
    },
    {
      "epoch": 0.000136285400390625,
      "model_forward_time": 0.11509323120117188,
      "step": 22329
    },
    {
      "epoch": 0.000136285400390625,
      "step": 22329,
      "training_step_time": 0.3754448890686035
    },
    {
      "epoch": 0.00013629150390625,
      "grad_norm": 0.17334477603435516,
      "learning_rate": 7.420833764115561e-05,
      "loss": 0.0481,
      "step": 22330
    },
    {
      "epoch": 0.00013629150390625,
      "model_forward_time": 0.11435651779174805,
      "step": 22330
    },
    {
      "epoch": 0.00013629150390625,
      "step": 22330,
      "training_step_time": 0.3907043933868408
    },
    {
      "epoch": 0.000136297607421875,
      "model_forward_time": 0.1151278018951416,
      "step": 22331
    },
    {
      "epoch": 0.000136297607421875,
      "step": 22331,
      "training_step_time": 0.38245725631713867
    },
    {
      "epoch": 0.0001363037109375,
      "model_forward_time": 0.1155092716217041,
      "step": 22332
    },
    {
      "epoch": 0.0001363037109375,
      "step": 22332,
      "training_step_time": 0.39104413986206055
    },
    {
      "epoch": 0.000136309814453125,
      "model_forward_time": 0.11482548713684082,
      "step": 22333
    },
    {
      "epoch": 0.000136309814453125,
      "step": 22333,
      "training_step_time": 0.4014253616333008
    },
    {
      "epoch": 0.00013631591796875,
      "model_forward_time": 0.1156301498413086,
      "step": 22334
    },
    {
      "epoch": 0.00013631591796875,
      "step": 22334,
      "training_step_time": 0.3858518600463867
    },
    {
      "epoch": 0.000136322021484375,
      "model_forward_time": 0.1157693862915039,
      "step": 22335
    },
    {
      "epoch": 0.000136322021484375,
      "step": 22335,
      "training_step_time": 0.387176513671875
    },
    {
      "epoch": 0.000136328125,
      "model_forward_time": 0.11583614349365234,
      "step": 22336
    },
    {
      "epoch": 0.000136328125,
      "step": 22336,
      "training_step_time": 0.6856174468994141
    },
    {
      "epoch": 0.000136334228515625,
      "model_forward_time": 0.11497926712036133,
      "step": 22337
    },
    {
      "epoch": 0.000136334228515625,
      "step": 22337,
      "training_step_time": 0.3968384265899658
    },
    {
      "epoch": 0.00013634033203125,
      "model_forward_time": 0.11510872840881348,
      "step": 22338
    },
    {
      "epoch": 0.00013634033203125,
      "step": 22338,
      "training_step_time": 0.3909604549407959
    },
    {
      "epoch": 0.000136346435546875,
      "model_forward_time": 0.11497211456298828,
      "step": 22339
    },
    {
      "epoch": 0.000136346435546875,
      "step": 22339,
      "training_step_time": 0.44803500175476074
    },
    {
      "epoch": 0.0001363525390625,
      "grad_norm": 0.1895289272069931,
      "learning_rate": 7.418422153233919e-05,
      "loss": 0.0493,
      "step": 22340
    },
    {
      "epoch": 0.0001363525390625,
      "model_forward_time": 0.1157374382019043,
      "step": 22340
    },
    {
      "epoch": 0.0001363525390625,
      "step": 22340,
      "training_step_time": 0.4658071994781494
    },
    {
      "epoch": 0.000136358642578125,
      "model_forward_time": 0.11423993110656738,
      "step": 22341
    },
    {
      "epoch": 0.000136358642578125,
      "step": 22341,
      "training_step_time": 0.4234933853149414
    },
    {
      "epoch": 0.00013636474609375,
      "model_forward_time": 0.1156454086303711,
      "step": 22342
    },
    {
      "epoch": 0.00013636474609375,
      "step": 22342,
      "training_step_time": 0.46669983863830566
    },
    {
      "epoch": 0.000136370849609375,
      "model_forward_time": 0.11471414566040039,
      "step": 22343
    },
    {
      "epoch": 0.000136370849609375,
      "step": 22343,
      "training_step_time": 0.38928723335266113
    },
    {
      "epoch": 0.000136376953125,
      "model_forward_time": 0.11563873291015625,
      "step": 22344
    },
    {
      "epoch": 0.000136376953125,
      "step": 22344,
      "training_step_time": 0.4094409942626953
    },
    {
      "epoch": 0.000136383056640625,
      "model_forward_time": 0.1263718605041504,
      "step": 22345
    },
    {
      "epoch": 0.000136383056640625,
      "step": 22345,
      "training_step_time": 0.3996860980987549
    },
    {
      "epoch": 0.00013638916015625,
      "model_forward_time": 0.11551451683044434,
      "step": 22346
    },
    {
      "epoch": 0.00013638916015625,
      "step": 22346,
      "training_step_time": 0.38642144203186035
    },
    {
      "epoch": 0.000136395263671875,
      "model_forward_time": 0.11534786224365234,
      "step": 22347
    },
    {
      "epoch": 0.000136395263671875,
      "step": 22347,
      "training_step_time": 0.4027984142303467
    },
    {
      "epoch": 0.0001364013671875,
      "model_forward_time": 0.11485958099365234,
      "step": 22348
    },
    {
      "epoch": 0.0001364013671875,
      "step": 22348,
      "training_step_time": 0.5368945598602295
    },
    {
      "epoch": 0.000136407470703125,
      "model_forward_time": 0.11612820625305176,
      "step": 22349
    },
    {
      "epoch": 0.000136407470703125,
      "step": 22349,
      "training_step_time": 0.41447925567626953
    },
    {
      "epoch": 0.00013641357421875,
      "grad_norm": 0.12689456343650818,
      "learning_rate": 7.416009807699482e-05,
      "loss": 0.0457,
      "step": 22350
    },
    {
      "epoch": 0.00013641357421875,
      "model_forward_time": 0.11526203155517578,
      "step": 22350
    },
    {
      "epoch": 0.00013641357421875,
      "step": 22350,
      "training_step_time": 0.39760899543762207
    },
    {
      "epoch": 0.000136419677734375,
      "model_forward_time": 0.11564970016479492,
      "step": 22351
    },
    {
      "epoch": 0.000136419677734375,
      "step": 22351,
      "training_step_time": 0.4276304244995117
    },
    {
      "epoch": 0.00013642578125,
      "model_forward_time": 0.11516427993774414,
      "step": 22352
    },
    {
      "epoch": 0.00013642578125,
      "step": 22352,
      "training_step_time": 0.3969745635986328
    },
    {
      "epoch": 0.000136431884765625,
      "model_forward_time": 0.11589455604553223,
      "step": 22353
    },
    {
      "epoch": 0.000136431884765625,
      "step": 22353,
      "training_step_time": 0.46971917152404785
    },
    {
      "epoch": 0.00013643798828125,
      "model_forward_time": 0.11562776565551758,
      "step": 22354
    },
    {
      "epoch": 0.00013643798828125,
      "step": 22354,
      "training_step_time": 0.4931516647338867
    },
    {
      "epoch": 0.000136444091796875,
      "model_forward_time": 0.11524462699890137,
      "step": 22355
    },
    {
      "epoch": 0.000136444091796875,
      "step": 22355,
      "training_step_time": 0.4252760410308838
    },
    {
      "epoch": 0.0001364501953125,
      "model_forward_time": 0.1150522232055664,
      "step": 22356
    },
    {
      "epoch": 0.0001364501953125,
      "step": 22356,
      "training_step_time": 0.38956642150878906
    },
    {
      "epoch": 0.000136456298828125,
      "model_forward_time": 0.11616730690002441,
      "step": 22357
    },
    {
      "epoch": 0.000136456298828125,
      "step": 22357,
      "training_step_time": 0.39573121070861816
    },
    {
      "epoch": 0.00013646240234375,
      "model_forward_time": 0.11538267135620117,
      "step": 22358
    },
    {
      "epoch": 0.00013646240234375,
      "step": 22358,
      "training_step_time": 0.40027880668640137
    },
    {
      "epoch": 0.000136468505859375,
      "model_forward_time": 0.11615157127380371,
      "step": 22359
    },
    {
      "epoch": 0.000136468505859375,
      "step": 22359,
      "training_step_time": 0.3927426338195801
    },
    {
      "epoch": 0.000136474609375,
      "grad_norm": 0.15400876104831696,
      "learning_rate": 7.413596728245054e-05,
      "loss": 0.043,
      "step": 22360
    },
    {
      "epoch": 0.000136474609375,
      "model_forward_time": 0.1154325008392334,
      "step": 22360
    },
    {
      "epoch": 0.000136474609375,
      "step": 22360,
      "training_step_time": 0.6514179706573486
    },
    {
      "epoch": 0.000136480712890625,
      "model_forward_time": 0.11516427993774414,
      "step": 22361
    },
    {
      "epoch": 0.000136480712890625,
      "step": 22361,
      "training_step_time": 0.3840756416320801
    },
    {
      "epoch": 0.00013648681640625,
      "model_forward_time": 0.11546826362609863,
      "step": 22362
    },
    {
      "epoch": 0.00013648681640625,
      "step": 22362,
      "training_step_time": 0.39386749267578125
    },
    {
      "epoch": 0.000136492919921875,
      "model_forward_time": 0.11544013023376465,
      "step": 22363
    },
    {
      "epoch": 0.000136492919921875,
      "step": 22363,
      "training_step_time": 0.39574766159057617
    },
    {
      "epoch": 0.0001364990234375,
      "model_forward_time": 0.11518263816833496,
      "step": 22364
    },
    {
      "epoch": 0.0001364990234375,
      "step": 22364,
      "training_step_time": 0.39981865882873535
    },
    {
      "epoch": 0.000136505126953125,
      "model_forward_time": 0.1146993637084961,
      "step": 22365
    },
    {
      "epoch": 0.000136505126953125,
      "step": 22365,
      "training_step_time": 0.3993797302246094
    },
    {
      "epoch": 0.00013651123046875,
      "model_forward_time": 0.11583995819091797,
      "step": 22366
    },
    {
      "epoch": 0.00013651123046875,
      "step": 22366,
      "training_step_time": 0.5867416858673096
    },
    {
      "epoch": 0.000136517333984375,
      "model_forward_time": 0.11538481712341309,
      "step": 22367
    },
    {
      "epoch": 0.000136517333984375,
      "step": 22367,
      "training_step_time": 0.4963517189025879
    },
    {
      "epoch": 0.0001365234375,
      "model_forward_time": 0.11460995674133301,
      "step": 22368
    },
    {
      "epoch": 0.0001365234375,
      "step": 22368,
      "training_step_time": 0.46916961669921875
    },
    {
      "epoch": 0.000136529541015625,
      "model_forward_time": 0.11469292640686035,
      "step": 22369
    },
    {
      "epoch": 0.000136529541015625,
      "step": 22369,
      "training_step_time": 0.4212827682495117
    },
    {
      "epoch": 0.00013653564453125,
      "grad_norm": 0.18006539344787598,
      "learning_rate": 7.411182915603669e-05,
      "loss": 0.0507,
      "step": 22370
    },
    {
      "epoch": 0.00013653564453125,
      "model_forward_time": 0.11516499519348145,
      "step": 22370
    },
    {
      "epoch": 0.00013653564453125,
      "step": 22370,
      "training_step_time": 0.39282727241516113
    },
    {
      "epoch": 0.000136541748046875,
      "model_forward_time": 0.11438465118408203,
      "step": 22371
    },
    {
      "epoch": 0.000136541748046875,
      "step": 22371,
      "training_step_time": 0.38057398796081543
    },
    {
      "epoch": 0.0001365478515625,
      "model_forward_time": 0.11537289619445801,
      "step": 22372
    },
    {
      "epoch": 0.0001365478515625,
      "step": 22372,
      "training_step_time": 0.45452308654785156
    },
    {
      "epoch": 0.000136553955078125,
      "model_forward_time": 0.11492514610290527,
      "step": 22373
    },
    {
      "epoch": 0.000136553955078125,
      "step": 22373,
      "training_step_time": 0.38398003578186035
    },
    {
      "epoch": 0.00013656005859375,
      "model_forward_time": 0.11744141578674316,
      "step": 22374
    },
    {
      "epoch": 0.00013656005859375,
      "step": 22374,
      "training_step_time": 0.3920145034790039
    },
    {
      "epoch": 0.000136566162109375,
      "model_forward_time": 0.11591029167175293,
      "step": 22375
    },
    {
      "epoch": 0.000136566162109375,
      "step": 22375,
      "training_step_time": 0.3927881717681885
    },
    {
      "epoch": 0.000136572265625,
      "model_forward_time": 0.11569404602050781,
      "step": 22376
    },
    {
      "epoch": 0.000136572265625,
      "step": 22376,
      "training_step_time": 0.3954496383666992
    },
    {
      "epoch": 0.000136578369140625,
      "model_forward_time": 0.1152944564819336,
      "step": 22377
    },
    {
      "epoch": 0.000136578369140625,
      "step": 22377,
      "training_step_time": 0.39224910736083984
    },
    {
      "epoch": 0.00013658447265625,
      "model_forward_time": 0.11574649810791016,
      "step": 22378
    },
    {
      "epoch": 0.00013658447265625,
      "step": 22378,
      "training_step_time": 0.6451363563537598
    },
    {
      "epoch": 0.000136590576171875,
      "model_forward_time": 0.11518096923828125,
      "step": 22379
    },
    {
      "epoch": 0.000136590576171875,
      "step": 22379,
      "training_step_time": 0.40179920196533203
    },
    {
      "epoch": 0.0001365966796875,
      "grad_norm": 0.13670387864112854,
      "learning_rate": 7.408768370508576e-05,
      "loss": 0.05,
      "step": 22380
    },
    {
      "epoch": 0.0001365966796875,
      "model_forward_time": 0.1151270866394043,
      "step": 22380
    },
    {
      "epoch": 0.0001365966796875,
      "step": 22380,
      "training_step_time": 0.45784473419189453
    },
    {
      "epoch": 0.000136602783203125,
      "model_forward_time": 0.11584258079528809,
      "step": 22381
    },
    {
      "epoch": 0.000136602783203125,
      "step": 22381,
      "training_step_time": 0.4440882205963135
    },
    {
      "epoch": 0.00013660888671875,
      "model_forward_time": 0.11544299125671387,
      "step": 22382
    },
    {
      "epoch": 0.00013660888671875,
      "step": 22382,
      "training_step_time": 0.47594380378723145
    },
    {
      "epoch": 0.000136614990234375,
      "model_forward_time": 0.11511874198913574,
      "step": 22383
    },
    {
      "epoch": 0.000136614990234375,
      "step": 22383,
      "training_step_time": 0.42061352729797363
    },
    {
      "epoch": 0.00013662109375,
      "model_forward_time": 0.11494040489196777,
      "step": 22384
    },
    {
      "epoch": 0.00013662109375,
      "step": 22384,
      "training_step_time": 0.4538426399230957
    },
    {
      "epoch": 0.000136627197265625,
      "model_forward_time": 0.11562371253967285,
      "step": 22385
    },
    {
      "epoch": 0.000136627197265625,
      "step": 22385,
      "training_step_time": 0.3805668354034424
    },
    {
      "epoch": 0.00013663330078125,
      "model_forward_time": 0.11517572402954102,
      "step": 22386
    },
    {
      "epoch": 0.00013663330078125,
      "step": 22386,
      "training_step_time": 0.38878393173217773
    },
    {
      "epoch": 0.000136639404296875,
      "model_forward_time": 0.1151120662689209,
      "step": 22387
    },
    {
      "epoch": 0.000136639404296875,
      "step": 22387,
      "training_step_time": 0.39457058906555176
    },
    {
      "epoch": 0.0001366455078125,
      "model_forward_time": 0.1150062084197998,
      "step": 22388
    },
    {
      "epoch": 0.0001366455078125,
      "step": 22388,
      "training_step_time": 0.38414978981018066
    },
    {
      "epoch": 0.000136651611328125,
      "model_forward_time": 0.11580586433410645,
      "step": 22389
    },
    {
      "epoch": 0.000136651611328125,
      "step": 22389,
      "training_step_time": 0.3948934078216553
    },
    {
      "epoch": 0.00013665771484375,
      "grad_norm": 0.1746133267879486,
      "learning_rate": 7.406353093693253e-05,
      "loss": 0.0436,
      "step": 22390
    },
    {
      "epoch": 0.00013665771484375,
      "model_forward_time": 0.11536145210266113,
      "step": 22390
    },
    {
      "epoch": 0.00013665771484375,
      "step": 22390,
      "training_step_time": 0.587122917175293
    },
    {
      "epoch": 0.000136663818359375,
      "model_forward_time": 0.1153562068939209,
      "step": 22391
    },
    {
      "epoch": 0.000136663818359375,
      "step": 22391,
      "training_step_time": 0.3877387046813965
    },
    {
      "epoch": 0.000136669921875,
      "model_forward_time": 0.11560559272766113,
      "step": 22392
    },
    {
      "epoch": 0.000136669921875,
      "step": 22392,
      "training_step_time": 0.3930816650390625
    },
    {
      "epoch": 0.000136676025390625,
      "model_forward_time": 0.11566734313964844,
      "step": 22393
    },
    {
      "epoch": 0.000136676025390625,
      "step": 22393,
      "training_step_time": 0.39766740798950195
    },
    {
      "epoch": 0.00013668212890625,
      "model_forward_time": 0.11527800559997559,
      "step": 22394
    },
    {
      "epoch": 0.00013668212890625,
      "step": 22394,
      "training_step_time": 0.44909214973449707
    },
    {
      "epoch": 0.000136688232421875,
      "model_forward_time": 0.11483120918273926,
      "step": 22395
    },
    {
      "epoch": 0.000136688232421875,
      "step": 22395,
      "training_step_time": 0.40557026863098145
    },
    {
      "epoch": 0.0001366943359375,
      "model_forward_time": 0.11534357070922852,
      "step": 22396
    },
    {
      "epoch": 0.0001366943359375,
      "step": 22396,
      "training_step_time": 0.6390440464019775
    },
    {
      "epoch": 0.000136700439453125,
      "model_forward_time": 0.11484146118164062,
      "step": 22397
    },
    {
      "epoch": 0.000136700439453125,
      "step": 22397,
      "training_step_time": 0.39576292037963867
    },
    {
      "epoch": 0.00013670654296875,
      "model_forward_time": 0.11470270156860352,
      "step": 22398
    },
    {
      "epoch": 0.00013670654296875,
      "step": 22398,
      "training_step_time": 0.4716513156890869
    },
    {
      "epoch": 0.000136712646484375,
      "model_forward_time": 0.11509346961975098,
      "step": 22399
    },
    {
      "epoch": 0.000136712646484375,
      "step": 22399,
      "training_step_time": 0.3954811096191406
    },
    {
      "epoch": 0.00013671875,
      "grad_norm": 0.17454934120178223,
      "learning_rate": 7.403937085891397e-05,
      "loss": 0.0494,
      "step": 22400
    },
    {
      "epoch": 0.00013671875,
      "model_forward_time": 0.11391711235046387,
      "step": 22400
    },
    {
      "epoch": 0.00013671875,
      "step": 22400,
      "training_step_time": 0.396991491317749
    },
    {
      "epoch": 0.000136724853515625,
      "model_forward_time": 0.11485052108764648,
      "step": 22401
    },
    {
      "epoch": 0.000136724853515625,
      "step": 22401,
      "training_step_time": 0.3837282657623291
    },
    {
      "epoch": 0.00013673095703125,
      "model_forward_time": 0.11519742012023926,
      "step": 22402
    },
    {
      "epoch": 0.00013673095703125,
      "step": 22402,
      "training_step_time": 0.48050785064697266
    },
    {
      "epoch": 0.000136737060546875,
      "model_forward_time": 0.1151576042175293,
      "step": 22403
    },
    {
      "epoch": 0.000136737060546875,
      "step": 22403,
      "training_step_time": 0.3932688236236572
    },
    {
      "epoch": 0.0001367431640625,
      "model_forward_time": 0.11519002914428711,
      "step": 22404
    },
    {
      "epoch": 0.0001367431640625,
      "step": 22404,
      "training_step_time": 0.39574122428894043
    },
    {
      "epoch": 0.000136749267578125,
      "model_forward_time": 0.11490082740783691,
      "step": 22405
    },
    {
      "epoch": 0.000136749267578125,
      "step": 22405,
      "training_step_time": 0.3958709239959717
    },
    {
      "epoch": 0.00013675537109375,
      "model_forward_time": 0.11495590209960938,
      "step": 22406
    },
    {
      "epoch": 0.00013675537109375,
      "step": 22406,
      "training_step_time": 0.40272974967956543
    },
    {
      "epoch": 0.000136761474609375,
      "model_forward_time": 0.11591005325317383,
      "step": 22407
    },
    {
      "epoch": 0.000136761474609375,
      "step": 22407,
      "training_step_time": 0.41089439392089844
    },
    {
      "epoch": 0.000136767578125,
      "model_forward_time": 0.11605381965637207,
      "step": 22408
    },
    {
      "epoch": 0.000136767578125,
      "step": 22408,
      "training_step_time": 0.5823299884796143
    },
    {
      "epoch": 0.000136773681640625,
      "model_forward_time": 0.11462736129760742,
      "step": 22409
    },
    {
      "epoch": 0.000136773681640625,
      "step": 22409,
      "training_step_time": 0.36609768867492676
    },
    {
      "epoch": 0.00013677978515625,
      "grad_norm": 0.1515836864709854,
      "learning_rate": 7.401520347836926e-05,
      "loss": 0.0525,
      "step": 22410
    },
    {
      "epoch": 0.00013677978515625,
      "model_forward_time": 0.11503982543945312,
      "step": 22410
    },
    {
      "epoch": 0.00013677978515625,
      "step": 22410,
      "training_step_time": 0.47386622428894043
    },
    {
      "epoch": 0.000136785888671875,
      "model_forward_time": 0.11510133743286133,
      "step": 22411
    },
    {
      "epoch": 0.000136785888671875,
      "step": 22411,
      "training_step_time": 0.4147794246673584
    },
    {
      "epoch": 0.0001367919921875,
      "model_forward_time": 0.1152505874633789,
      "step": 22412
    },
    {
      "epoch": 0.0001367919921875,
      "step": 22412,
      "training_step_time": 0.4679746627807617
    },
    {
      "epoch": 0.000136798095703125,
      "model_forward_time": 0.11436176300048828,
      "step": 22413
    },
    {
      "epoch": 0.000136798095703125,
      "step": 22413,
      "training_step_time": 0.39206671714782715
    },
    {
      "epoch": 0.00013680419921875,
      "model_forward_time": 0.1147773265838623,
      "step": 22414
    },
    {
      "epoch": 0.00013680419921875,
      "step": 22414,
      "training_step_time": 0.39490461349487305
    },
    {
      "epoch": 0.000136810302734375,
      "model_forward_time": 0.11464905738830566,
      "step": 22415
    },
    {
      "epoch": 0.000136810302734375,
      "step": 22415,
      "training_step_time": 0.39541196823120117
    },
    {
      "epoch": 0.00013681640625,
      "model_forward_time": 0.11644864082336426,
      "step": 22416
    },
    {
      "epoch": 0.00013681640625,
      "step": 22416,
      "training_step_time": 0.3853950500488281
    },
    {
      "epoch": 0.000136822509765625,
      "model_forward_time": 0.11529731750488281,
      "step": 22417
    },
    {
      "epoch": 0.000136822509765625,
      "step": 22417,
      "training_step_time": 0.41965341567993164
    },
    {
      "epoch": 0.00013682861328125,
      "model_forward_time": 0.11462974548339844,
      "step": 22418
    },
    {
      "epoch": 0.00013682861328125,
      "step": 22418,
      "training_step_time": 0.39886951446533203
    },
    {
      "epoch": 0.000136834716796875,
      "model_forward_time": 0.11593985557556152,
      "step": 22419
    },
    {
      "epoch": 0.000136834716796875,
      "step": 22419,
      "training_step_time": 0.3902151584625244
    },
    {
      "epoch": 0.0001368408203125,
      "grad_norm": 0.10941260308027267,
      "learning_rate": 7.399102880263983e-05,
      "loss": 0.0435,
      "step": 22420
    },
    {
      "epoch": 0.0001368408203125,
      "model_forward_time": 0.1154928207397461,
      "step": 22420
    },
    {
      "epoch": 0.0001368408203125,
      "step": 22420,
      "training_step_time": 0.56638503074646
    },
    {
      "epoch": 0.000136846923828125,
      "model_forward_time": 0.11603617668151855,
      "step": 22421
    },
    {
      "epoch": 0.000136846923828125,
      "step": 22421,
      "training_step_time": 0.45400071144104004
    },
    {
      "epoch": 0.00013685302734375,
      "model_forward_time": 0.11581230163574219,
      "step": 22422
    },
    {
      "epoch": 0.00013685302734375,
      "step": 22422,
      "training_step_time": 0.3926990032196045
    },
    {
      "epoch": 0.000136859130859375,
      "model_forward_time": 0.11529731750488281,
      "step": 22423
    },
    {
      "epoch": 0.000136859130859375,
      "step": 22423,
      "training_step_time": 0.45566487312316895
    },
    {
      "epoch": 0.000136865234375,
      "model_forward_time": 0.11522126197814941,
      "step": 22424
    },
    {
      "epoch": 0.000136865234375,
      "step": 22424,
      "training_step_time": 0.4629364013671875
    },
    {
      "epoch": 0.000136871337890625,
      "model_forward_time": 0.11540436744689941,
      "step": 22425
    },
    {
      "epoch": 0.000136871337890625,
      "step": 22425,
      "training_step_time": 0.45629215240478516
    },
    {
      "epoch": 0.00013687744140625,
      "model_forward_time": 0.11594629287719727,
      "step": 22426
    },
    {
      "epoch": 0.00013687744140625,
      "step": 22426,
      "training_step_time": 0.46032094955444336
    },
    {
      "epoch": 0.000136883544921875,
      "model_forward_time": 0.11464214324951172,
      "step": 22427
    },
    {
      "epoch": 0.000136883544921875,
      "step": 22427,
      "training_step_time": 0.3869774341583252
    },
    {
      "epoch": 0.0001368896484375,
      "model_forward_time": 0.11488652229309082,
      "step": 22428
    },
    {
      "epoch": 0.0001368896484375,
      "step": 22428,
      "training_step_time": 0.3886911869049072
    },
    {
      "epoch": 0.000136895751953125,
      "model_forward_time": 0.11530685424804688,
      "step": 22429
    },
    {
      "epoch": 0.000136895751953125,
      "step": 22429,
      "training_step_time": 0.3948800563812256
    },
    {
      "epoch": 0.00013690185546875,
      "grad_norm": 0.09594539552927017,
      "learning_rate": 7.396684683906928e-05,
      "loss": 0.0466,
      "step": 22430
    },
    {
      "epoch": 0.00013690185546875,
      "model_forward_time": 0.11538338661193848,
      "step": 22430
    },
    {
      "epoch": 0.00013690185546875,
      "step": 22430,
      "training_step_time": 0.3924586772918701
    },
    {
      "epoch": 0.000136907958984375,
      "model_forward_time": 0.11538004875183105,
      "step": 22431
    },
    {
      "epoch": 0.000136907958984375,
      "step": 22431,
      "training_step_time": 0.38837122917175293
    },
    {
      "epoch": 0.0001369140625,
      "model_forward_time": 0.11586427688598633,
      "step": 22432
    },
    {
      "epoch": 0.0001369140625,
      "step": 22432,
      "training_step_time": 0.7805061340332031
    },
    {
      "epoch": 0.000136920166015625,
      "model_forward_time": 0.11472272872924805,
      "step": 22433
    },
    {
      "epoch": 0.000136920166015625,
      "step": 22433,
      "training_step_time": 0.4069030284881592
    },
    {
      "epoch": 0.00013692626953125,
      "model_forward_time": 0.11458373069763184,
      "step": 22434
    },
    {
      "epoch": 0.00013692626953125,
      "step": 22434,
      "training_step_time": 0.41545748710632324
    },
    {
      "epoch": 0.000136932373046875,
      "model_forward_time": 0.11536979675292969,
      "step": 22435
    },
    {
      "epoch": 0.000136932373046875,
      "step": 22435,
      "training_step_time": 0.4132986068725586
    },
    {
      "epoch": 0.0001369384765625,
      "model_forward_time": 0.11434125900268555,
      "step": 22436
    },
    {
      "epoch": 0.0001369384765625,
      "step": 22436,
      "training_step_time": 0.44792628288269043
    },
    {
      "epoch": 0.000136944580078125,
      "model_forward_time": 0.1144094467163086,
      "step": 22437
    },
    {
      "epoch": 0.000136944580078125,
      "step": 22437,
      "training_step_time": 0.4269280433654785
    },
    {
      "epoch": 0.00013695068359375,
      "model_forward_time": 0.11533308029174805,
      "step": 22438
    },
    {
      "epoch": 0.00013695068359375,
      "step": 22438,
      "training_step_time": 0.466935396194458
    },
    {
      "epoch": 0.000136956787109375,
      "model_forward_time": 0.11798453330993652,
      "step": 22439
    },
    {
      "epoch": 0.000136956787109375,
      "step": 22439,
      "training_step_time": 0.4618680477142334
    },
    {
      "epoch": 0.000136962890625,
      "grad_norm": 0.1821156144142151,
      "learning_rate": 7.394265759500348e-05,
      "loss": 0.0412,
      "step": 22440
    },
    {
      "epoch": 0.000136962890625,
      "model_forward_time": 0.11769843101501465,
      "step": 22440
    },
    {
      "epoch": 0.000136962890625,
      "step": 22440,
      "training_step_time": 0.41850709915161133
    },
    {
      "epoch": 0.000136968994140625,
      "model_forward_time": 0.11611557006835938,
      "step": 22441
    },
    {
      "epoch": 0.000136968994140625,
      "step": 22441,
      "training_step_time": 0.3971712589263916
    },
    {
      "epoch": 0.00013697509765625,
      "model_forward_time": 0.11496567726135254,
      "step": 22442
    },
    {
      "epoch": 0.00013697509765625,
      "step": 22442,
      "training_step_time": 0.3878481388092041
    },
    {
      "epoch": 0.000136981201171875,
      "model_forward_time": 0.11563372611999512,
      "step": 22443
    },
    {
      "epoch": 0.000136981201171875,
      "step": 22443,
      "training_step_time": 0.386807918548584
    },
    {
      "epoch": 0.0001369873046875,
      "model_forward_time": 0.1151123046875,
      "step": 22444
    },
    {
      "epoch": 0.0001369873046875,
      "step": 22444,
      "training_step_time": 0.4785292148590088
    },
    {
      "epoch": 0.000136993408203125,
      "model_forward_time": 0.11573123931884766,
      "step": 22445
    },
    {
      "epoch": 0.000136993408203125,
      "step": 22445,
      "training_step_time": 0.39529943466186523
    },
    {
      "epoch": 0.00013699951171875,
      "model_forward_time": 0.11499643325805664,
      "step": 22446
    },
    {
      "epoch": 0.00013699951171875,
      "step": 22446,
      "training_step_time": 0.403836727142334
    },
    {
      "epoch": 0.000137005615234375,
      "model_forward_time": 0.11514925956726074,
      "step": 22447
    },
    {
      "epoch": 0.000137005615234375,
      "step": 22447,
      "training_step_time": 0.42014431953430176
    },
    {
      "epoch": 0.00013701171875,
      "model_forward_time": 0.11581540107727051,
      "step": 22448
    },
    {
      "epoch": 0.00013701171875,
      "step": 22448,
      "training_step_time": 0.4340932369232178
    },
    {
      "epoch": 0.000137017822265625,
      "model_forward_time": 0.1149740219116211,
      "step": 22449
    },
    {
      "epoch": 0.000137017822265625,
      "step": 22449,
      "training_step_time": 0.40408778190612793
    },
    {
      "epoch": 0.00013702392578125,
      "grad_norm": 0.2044476568698883,
      "learning_rate": 7.391846107779047e-05,
      "loss": 0.0559,
      "step": 22450
    },
    {
      "epoch": 0.00013702392578125,
      "model_forward_time": 0.11571526527404785,
      "step": 22450
    },
    {
      "epoch": 0.00013702392578125,
      "step": 22450,
      "training_step_time": 0.5276553630828857
    },
    {
      "epoch": 0.000137030029296875,
      "model_forward_time": 0.11606907844543457,
      "step": 22451
    },
    {
      "epoch": 0.000137030029296875,
      "step": 22451,
      "training_step_time": 0.4585888385772705
    },
    {
      "epoch": 0.0001370361328125,
      "model_forward_time": 0.11498904228210449,
      "step": 22452
    },
    {
      "epoch": 0.0001370361328125,
      "step": 22452,
      "training_step_time": 0.47122907638549805
    },
    {
      "epoch": 0.000137042236328125,
      "model_forward_time": 0.11462044715881348,
      "step": 22453
    },
    {
      "epoch": 0.000137042236328125,
      "step": 22453,
      "training_step_time": 0.4120311737060547
    },
    {
      "epoch": 0.00013704833984375,
      "model_forward_time": 0.11453843116760254,
      "step": 22454
    },
    {
      "epoch": 0.00013704833984375,
      "step": 22454,
      "training_step_time": 0.47768282890319824
    },
    {
      "epoch": 0.000137054443359375,
      "model_forward_time": 0.1143794059753418,
      "step": 22455
    },
    {
      "epoch": 0.000137054443359375,
      "step": 22455,
      "training_step_time": 0.3861701488494873
    },
    {
      "epoch": 0.000137060546875,
      "model_forward_time": 0.11568379402160645,
      "step": 22456
    },
    {
      "epoch": 0.000137060546875,
      "step": 22456,
      "training_step_time": 0.3950037956237793
    },
    {
      "epoch": 0.000137066650390625,
      "model_forward_time": 0.11498737335205078,
      "step": 22457
    },
    {
      "epoch": 0.000137066650390625,
      "step": 22457,
      "training_step_time": 0.3909616470336914
    },
    {
      "epoch": 0.00013707275390625,
      "model_forward_time": 0.1154322624206543,
      "step": 22458
    },
    {
      "epoch": 0.00013707275390625,
      "step": 22458,
      "training_step_time": 0.4038841724395752
    },
    {
      "epoch": 0.000137078857421875,
      "model_forward_time": 0.11486339569091797,
      "step": 22459
    },
    {
      "epoch": 0.000137078857421875,
      "step": 22459,
      "training_step_time": 0.3973517417907715
    },
    {
      "epoch": 0.0001370849609375,
      "grad_norm": 0.15494178235530853,
      "learning_rate": 7.389425729478051e-05,
      "loss": 0.0503,
      "step": 22460
    },
    {
      "epoch": 0.0001370849609375,
      "model_forward_time": 0.11499595642089844,
      "step": 22460
    },
    {
      "epoch": 0.0001370849609375,
      "step": 22460,
      "training_step_time": 0.40714383125305176
    },
    {
      "epoch": 0.000137091064453125,
      "model_forward_time": 0.11817526817321777,
      "step": 22461
    },
    {
      "epoch": 0.000137091064453125,
      "step": 22461,
      "training_step_time": 0.38205838203430176
    },
    {
      "epoch": 0.00013709716796875,
      "model_forward_time": 0.11803483963012695,
      "step": 22462
    },
    {
      "epoch": 0.00013709716796875,
      "step": 22462,
      "training_step_time": 0.438495397567749
    },
    {
      "epoch": 0.000137103271484375,
      "model_forward_time": 0.1193232536315918,
      "step": 22463
    },
    {
      "epoch": 0.000137103271484375,
      "step": 22463,
      "training_step_time": 0.3810760974884033
    },
    {
      "epoch": 0.000137109375,
      "model_forward_time": 0.11842870712280273,
      "step": 22464
    },
    {
      "epoch": 0.000137109375,
      "step": 22464,
      "training_step_time": 0.380878210067749
    },
    {
      "epoch": 0.000137115478515625,
      "model_forward_time": 0.11909651756286621,
      "step": 22465
    },
    {
      "epoch": 0.000137115478515625,
      "step": 22465,
      "training_step_time": 0.4022789001464844
    },
    {
      "epoch": 0.00013712158203125,
      "model_forward_time": 0.11496424674987793,
      "step": 22466
    },
    {
      "epoch": 0.00013712158203125,
      "step": 22466,
      "training_step_time": 0.40833187103271484
    },
    {
      "epoch": 0.000137127685546875,
      "model_forward_time": 0.1153416633605957,
      "step": 22467
    },
    {
      "epoch": 0.000137127685546875,
      "step": 22467,
      "training_step_time": 0.4407808780670166
    },
    {
      "epoch": 0.0001371337890625,
      "model_forward_time": 0.11484885215759277,
      "step": 22468
    },
    {
      "epoch": 0.0001371337890625,
      "step": 22468,
      "training_step_time": 0.6023082733154297
    },
    {
      "epoch": 0.000137139892578125,
      "model_forward_time": 0.11496925354003906,
      "step": 22469
    },
    {
      "epoch": 0.000137139892578125,
      "step": 22469,
      "training_step_time": 0.45766353607177734
    },
    {
      "epoch": 0.00013714599609375,
      "grad_norm": 0.1108643114566803,
      "learning_rate": 7.387004625332608e-05,
      "loss": 0.0445,
      "step": 22470
    },
    {
      "epoch": 0.00013714599609375,
      "model_forward_time": 0.11474871635437012,
      "step": 22470
    },
    {
      "epoch": 0.00013714599609375,
      "step": 22470,
      "training_step_time": 0.39717769622802734
    },
    {
      "epoch": 0.000137152099609375,
      "model_forward_time": 0.11498594284057617,
      "step": 22471
    },
    {
      "epoch": 0.000137152099609375,
      "step": 22471,
      "training_step_time": 0.396547794342041
    },
    {
      "epoch": 0.000137158203125,
      "model_forward_time": 0.11504745483398438,
      "step": 22472
    },
    {
      "epoch": 0.000137158203125,
      "step": 22472,
      "training_step_time": 0.3827211856842041
    },
    {
      "epoch": 0.000137164306640625,
      "model_forward_time": 0.1145486831665039,
      "step": 22473
    },
    {
      "epoch": 0.000137164306640625,
      "step": 22473,
      "training_step_time": 0.4270908832550049
    },
    {
      "epoch": 0.00013717041015625,
      "model_forward_time": 0.1156003475189209,
      "step": 22474
    },
    {
      "epoch": 0.00013717041015625,
      "step": 22474,
      "training_step_time": 0.4936673641204834
    },
    {
      "epoch": 0.000137176513671875,
      "model_forward_time": 0.11499881744384766,
      "step": 22475
    },
    {
      "epoch": 0.000137176513671875,
      "step": 22475,
      "training_step_time": 0.3941066265106201
    },
    {
      "epoch": 0.0001371826171875,
      "model_forward_time": 0.11472487449645996,
      "step": 22476
    },
    {
      "epoch": 0.0001371826171875,
      "step": 22476,
      "training_step_time": 0.39440107345581055
    },
    {
      "epoch": 0.000137188720703125,
      "model_forward_time": 0.11496925354003906,
      "step": 22477
    },
    {
      "epoch": 0.000137188720703125,
      "step": 22477,
      "training_step_time": 0.39364027976989746
    },
    {
      "epoch": 0.00013719482421875,
      "model_forward_time": 0.11519908905029297,
      "step": 22478
    },
    {
      "epoch": 0.00013719482421875,
      "step": 22478,
      "training_step_time": 0.3834218978881836
    },
    {
      "epoch": 0.000137200927734375,
      "model_forward_time": 0.11492633819580078,
      "step": 22479
    },
    {
      "epoch": 0.000137200927734375,
      "step": 22479,
      "training_step_time": 0.4848926067352295
    },
    {
      "epoch": 0.00013720703125,
      "grad_norm": 0.13690152764320374,
      "learning_rate": 7.384582796078184e-05,
      "loss": 0.046,
      "step": 22480
    },
    {
      "epoch": 0.00013720703125,
      "model_forward_time": 0.11462974548339844,
      "step": 22480
    },
    {
      "epoch": 0.00013720703125,
      "step": 22480,
      "training_step_time": 0.4973173141479492
    },
    {
      "epoch": 0.000137213134765625,
      "model_forward_time": 0.11558127403259277,
      "step": 22481
    },
    {
      "epoch": 0.000137213134765625,
      "step": 22481,
      "training_step_time": 0.4378237724304199
    },
    {
      "epoch": 0.00013721923828125,
      "model_forward_time": 0.11536002159118652,
      "step": 22482
    },
    {
      "epoch": 0.00013721923828125,
      "step": 22482,
      "training_step_time": 0.4759223461151123
    },
    {
      "epoch": 0.000137225341796875,
      "model_forward_time": 0.11526012420654297,
      "step": 22483
    },
    {
      "epoch": 0.000137225341796875,
      "step": 22483,
      "training_step_time": 0.4836113452911377
    },
    {
      "epoch": 0.0001372314453125,
      "model_forward_time": 0.11461687088012695,
      "step": 22484
    },
    {
      "epoch": 0.0001372314453125,
      "step": 22484,
      "training_step_time": 0.3867967128753662
    },
    {
      "epoch": 0.000137237548828125,
      "model_forward_time": 0.11476325988769531,
      "step": 22485
    },
    {
      "epoch": 0.000137237548828125,
      "step": 22485,
      "training_step_time": 0.3802788257598877
    },
    {
      "epoch": 0.00013724365234375,
      "model_forward_time": 0.11446285247802734,
      "step": 22486
    },
    {
      "epoch": 0.00013724365234375,
      "step": 22486,
      "training_step_time": 0.47316694259643555
    },
    {
      "epoch": 0.000137249755859375,
      "model_forward_time": 0.11519026756286621,
      "step": 22487
    },
    {
      "epoch": 0.000137249755859375,
      "step": 22487,
      "training_step_time": 0.39820265769958496
    },
    {
      "epoch": 0.000137255859375,
      "model_forward_time": 0.11537384986877441,
      "step": 22488
    },
    {
      "epoch": 0.000137255859375,
      "step": 22488,
      "training_step_time": 0.4398021697998047
    },
    {
      "epoch": 0.000137261962890625,
      "model_forward_time": 0.12077665328979492,
      "step": 22489
    },
    {
      "epoch": 0.000137261962890625,
      "step": 22489,
      "training_step_time": 0.47591614723205566
    },
    {
      "epoch": 0.00013726806640625,
      "grad_norm": 0.1436770260334015,
      "learning_rate": 7.382160242450469e-05,
      "loss": 0.0501,
      "step": 22490
    },
    {
      "epoch": 0.00013726806640625,
      "model_forward_time": 0.11965465545654297,
      "step": 22490
    },
    {
      "epoch": 0.00013726806640625,
      "step": 22490,
      "training_step_time": 0.5482020378112793
    },
    {
      "epoch": 0.000137274169921875,
      "model_forward_time": 0.11879754066467285,
      "step": 22491
    },
    {
      "epoch": 0.000137274169921875,
      "step": 22491,
      "training_step_time": 0.6073706150054932
    },
    {
      "epoch": 0.0001372802734375,
      "model_forward_time": 0.11828947067260742,
      "step": 22492
    },
    {
      "epoch": 0.0001372802734375,
      "step": 22492,
      "training_step_time": 0.8325183391571045
    },
    {
      "epoch": 0.000137286376953125,
      "model_forward_time": 0.12071800231933594,
      "step": 22493
    },
    {
      "epoch": 0.000137286376953125,
      "step": 22493,
      "training_step_time": 0.7769298553466797
    },
    {
      "epoch": 0.00013729248046875,
      "model_forward_time": 0.1157684326171875,
      "step": 22494
    },
    {
      "epoch": 0.00013729248046875,
      "step": 22494,
      "training_step_time": 0.8326315879821777
    },
    {
      "epoch": 0.000137298583984375,
      "model_forward_time": 0.12032651901245117,
      "step": 22495
    },
    {
      "epoch": 0.000137298583984375,
      "step": 22495,
      "training_step_time": 0.7246794700622559
    },
    {
      "epoch": 0.0001373046875,
      "model_forward_time": 0.11966300010681152,
      "step": 22496
    },
    {
      "epoch": 0.0001373046875,
      "step": 22496,
      "training_step_time": 0.7496507167816162
    },
    {
      "epoch": 0.000137310791015625,
      "model_forward_time": 0.1183171272277832,
      "step": 22497
    },
    {
      "epoch": 0.000137310791015625,
      "step": 22497,
      "training_step_time": 0.6721155643463135
    },
    {
      "epoch": 0.00013731689453125,
      "model_forward_time": 0.11883878707885742,
      "step": 22498
    },
    {
      "epoch": 0.00013731689453125,
      "step": 22498,
      "training_step_time": 0.7661948204040527
    },
    {
      "epoch": 0.000137322998046875,
      "model_forward_time": 0.1222372055053711,
      "step": 22499
    },
    {
      "epoch": 0.000137322998046875,
      "step": 22499,
      "training_step_time": 0.6297883987426758
    },
    {
      "epoch": 0.0001373291015625,
      "grad_norm": 0.15875622630119324,
      "learning_rate": 7.379736965185368e-05,
      "loss": 0.0436,
      "step": 22500
    },
    {
      "epoch": 0.0001373291015625,
      "model_forward_time": 0.12113618850708008,
      "step": 22500
    },
    {
      "epoch": 0.0001373291015625,
      "step": 22500,
      "training_step_time": 0.6615824699401855
    },
    {
      "epoch": 0.000137335205078125,
      "model_forward_time": 0.11780357360839844,
      "step": 22501
    },
    {
      "epoch": 0.000137335205078125,
      "step": 22501,
      "training_step_time": 0.7008242607116699
    },
    {
      "epoch": 0.00013734130859375,
      "model_forward_time": 0.11654996871948242,
      "step": 22502
    },
    {
      "epoch": 0.00013734130859375,
      "step": 22502,
      "training_step_time": 0.6799886226654053
    },
    {
      "epoch": 0.000137347412109375,
      "model_forward_time": 0.12008213996887207,
      "step": 22503
    },
    {
      "epoch": 0.000137347412109375,
      "step": 22503,
      "training_step_time": 0.7241578102111816
    },
    {
      "epoch": 0.000137353515625,
      "model_forward_time": 0.12107110023498535,
      "step": 22504
    },
    {
      "epoch": 0.000137353515625,
      "step": 22504,
      "training_step_time": 0.6661207675933838
    },
    {
      "epoch": 0.000137359619140625,
      "model_forward_time": 0.12128424644470215,
      "step": 22505
    },
    {
      "epoch": 0.000137359619140625,
      "step": 22505,
      "training_step_time": 0.6778120994567871
    },
    {
      "epoch": 0.00013736572265625,
      "model_forward_time": 0.11919617652893066,
      "step": 22506
    },
    {
      "epoch": 0.00013736572265625,
      "step": 22506,
      "training_step_time": 0.6971480846405029
    },
    {
      "epoch": 0.000137371826171875,
      "model_forward_time": 0.11756348609924316,
      "step": 22507
    },
    {
      "epoch": 0.000137371826171875,
      "step": 22507,
      "training_step_time": 0.6986019611358643
    },
    {
      "epoch": 0.0001373779296875,
      "model_forward_time": 0.12481904029846191,
      "step": 22508
    },
    {
      "epoch": 0.0001373779296875,
      "step": 22508,
      "training_step_time": 0.6248002052307129
    },
    {
      "epoch": 0.000137384033203125,
      "model_forward_time": 0.12627840042114258,
      "step": 22509
    },
    {
      "epoch": 0.000137384033203125,
      "step": 22509,
      "training_step_time": 0.6682071685791016
    },
    {
      "epoch": 0.00013739013671875,
      "grad_norm": 0.16903255879878998,
      "learning_rate": 7.37731296501901e-05,
      "loss": 0.0527,
      "step": 22510
    },
    {
      "epoch": 0.00013739013671875,
      "model_forward_time": 0.11849021911621094,
      "step": 22510
    },
    {
      "epoch": 0.00013739013671875,
      "step": 22510,
      "training_step_time": 0.7011568546295166
    },
    {
      "epoch": 0.000137396240234375,
      "model_forward_time": 0.11757302284240723,
      "step": 22511
    },
    {
      "epoch": 0.000137396240234375,
      "step": 22511,
      "training_step_time": 0.7096536159515381
    },
    {
      "epoch": 0.00013740234375,
      "model_forward_time": 0.11910867691040039,
      "step": 22512
    },
    {
      "epoch": 0.00013740234375,
      "step": 22512,
      "training_step_time": 0.6666550636291504
    },
    {
      "epoch": 0.000137408447265625,
      "model_forward_time": 0.1166067123413086,
      "step": 22513
    },
    {
      "epoch": 0.000137408447265625,
      "step": 22513,
      "training_step_time": 0.7035338878631592
    },
    {
      "epoch": 0.00013741455078125,
      "model_forward_time": 0.12001276016235352,
      "step": 22514
    },
    {
      "epoch": 0.00013741455078125,
      "step": 22514,
      "training_step_time": 0.6459400653839111
    },
    {
      "epoch": 0.000137420654296875,
      "model_forward_time": 0.11635327339172363,
      "step": 22515
    },
    {
      "epoch": 0.000137420654296875,
      "step": 22515,
      "training_step_time": 0.6109144687652588
    },
    {
      "epoch": 0.0001374267578125,
      "model_forward_time": 0.1220865249633789,
      "step": 22516
    },
    {
      "epoch": 0.0001374267578125,
      "step": 22516,
      "training_step_time": 0.6842467784881592
    },
    {
      "epoch": 0.000137432861328125,
      "model_forward_time": 0.12207913398742676,
      "step": 22517
    },
    {
      "epoch": 0.000137432861328125,
      "step": 22517,
      "training_step_time": 0.6474471092224121
    },
    {
      "epoch": 0.00013743896484375,
      "model_forward_time": 0.12834620475769043,
      "step": 22518
    },
    {
      "epoch": 0.00013743896484375,
      "step": 22518,
      "training_step_time": 0.6360461711883545
    },
    {
      "epoch": 0.000137445068359375,
      "model_forward_time": 0.11981487274169922,
      "step": 22519
    },
    {
      "epoch": 0.000137445068359375,
      "step": 22519,
      "training_step_time": 0.7209422588348389
    },
    {
      "epoch": 0.000137451171875,
      "grad_norm": 0.2110680192708969,
      "learning_rate": 7.374888242687746e-05,
      "loss": 0.055,
      "step": 22520
    },
    {
      "epoch": 0.000137451171875,
      "model_forward_time": 0.11730742454528809,
      "step": 22520
    },
    {
      "epoch": 0.000137451171875,
      "step": 22520,
      "training_step_time": 0.6927545070648193
    },
    {
      "epoch": 0.000137457275390625,
      "model_forward_time": 0.1187129020690918,
      "step": 22521
    },
    {
      "epoch": 0.000137457275390625,
      "step": 22521,
      "training_step_time": 0.7134873867034912
    },
    {
      "epoch": 0.00013746337890625,
      "model_forward_time": 0.1204385757446289,
      "step": 22522
    },
    {
      "epoch": 0.00013746337890625,
      "step": 22522,
      "training_step_time": 0.5680623054504395
    },
    {
      "epoch": 0.000137469482421875,
      "model_forward_time": 0.12163162231445312,
      "step": 22523
    },
    {
      "epoch": 0.000137469482421875,
      "step": 22523,
      "training_step_time": 0.8435869216918945
    },
    {
      "epoch": 0.0001374755859375,
      "model_forward_time": 0.12103033065795898,
      "step": 22524
    },
    {
      "epoch": 0.0001374755859375,
      "step": 22524,
      "training_step_time": 0.719367504119873
    },
    {
      "epoch": 0.000137481689453125,
      "model_forward_time": 0.11937975883483887,
      "step": 22525
    },
    {
      "epoch": 0.000137481689453125,
      "step": 22525,
      "training_step_time": 0.7502005100250244
    },
    {
      "epoch": 0.00013748779296875,
      "model_forward_time": 0.11719584465026855,
      "step": 22526
    },
    {
      "epoch": 0.00013748779296875,
      "step": 22526,
      "training_step_time": 0.6748719215393066
    },
    {
      "epoch": 0.000137493896484375,
      "model_forward_time": 0.11858916282653809,
      "step": 22527
    },
    {
      "epoch": 0.000137493896484375,
      "step": 22527,
      "training_step_time": 0.6079807281494141
    },
    {
      "epoch": 0.0001375,
      "model_forward_time": 0.11931157112121582,
      "step": 22528
    },
    {
      "epoch": 0.0001375,
      "step": 22528,
      "training_step_time": 0.7217073440551758
    },
    {
      "epoch": 0.000137506103515625,
      "model_forward_time": 0.12355589866638184,
      "step": 22529
    },
    {
      "epoch": 0.000137506103515625,
      "step": 22529,
      "training_step_time": 0.6915643215179443
    },
    {
      "epoch": 0.00013751220703125,
      "grad_norm": 0.14077119529247284,
      "learning_rate": 7.372462798928137e-05,
      "loss": 0.0571,
      "step": 22530
    },
    {
      "epoch": 0.00013751220703125,
      "model_forward_time": 0.11893248558044434,
      "step": 22530
    },
    {
      "epoch": 0.00013751220703125,
      "step": 22530,
      "training_step_time": 0.7179009914398193
    },
    {
      "epoch": 0.000137518310546875,
      "model_forward_time": 0.13256549835205078,
      "step": 22531
    },
    {
      "epoch": 0.000137518310546875,
      "step": 22531,
      "training_step_time": 0.5306475162506104
    },
    {
      "epoch": 0.0001375244140625,
      "model_forward_time": 0.12251830101013184,
      "step": 22532
    },
    {
      "epoch": 0.0001375244140625,
      "step": 22532,
      "training_step_time": 0.6688196659088135
    },
    {
      "epoch": 0.000137530517578125,
      "model_forward_time": 0.1157541275024414,
      "step": 22533
    },
    {
      "epoch": 0.000137530517578125,
      "step": 22533,
      "training_step_time": 0.6902017593383789
    },
    {
      "epoch": 0.00013753662109375,
      "model_forward_time": 0.11858415603637695,
      "step": 22534
    },
    {
      "epoch": 0.00013753662109375,
      "step": 22534,
      "training_step_time": 0.638474702835083
    },
    {
      "epoch": 0.000137542724609375,
      "model_forward_time": 0.12370562553405762,
      "step": 22535
    },
    {
      "epoch": 0.000137542724609375,
      "step": 22535,
      "training_step_time": 0.6778805255889893
    },
    {
      "epoch": 0.000137548828125,
      "model_forward_time": 0.11864686012268066,
      "step": 22536
    },
    {
      "epoch": 0.000137548828125,
      "step": 22536,
      "training_step_time": 0.66168212890625
    },
    {
      "epoch": 0.000137554931640625,
      "model_forward_time": 0.1234273910522461,
      "step": 22537
    },
    {
      "epoch": 0.000137554931640625,
      "step": 22537,
      "training_step_time": 0.6728975772857666
    },
    {
      "epoch": 0.00013756103515625,
      "model_forward_time": 0.11728501319885254,
      "step": 22538
    },
    {
      "epoch": 0.00013756103515625,
      "step": 22538,
      "training_step_time": 0.775254487991333
    },
    {
      "epoch": 0.000137567138671875,
      "model_forward_time": 0.12332296371459961,
      "step": 22539
    },
    {
      "epoch": 0.000137567138671875,
      "step": 22539,
      "training_step_time": 0.7214314937591553
    },
    {
      "epoch": 0.0001375732421875,
      "grad_norm": 0.1463848054409027,
      "learning_rate": 7.37003663447697e-05,
      "loss": 0.0509,
      "step": 22540
    },
    {
      "epoch": 0.0001375732421875,
      "model_forward_time": 0.1233057975769043,
      "step": 22540
    },
    {
      "epoch": 0.0001375732421875,
      "step": 22540,
      "training_step_time": 0.7224836349487305
    },
    {
      "epoch": 0.000137579345703125,
      "model_forward_time": 0.12990880012512207,
      "step": 22541
    },
    {
      "epoch": 0.000137579345703125,
      "step": 22541,
      "training_step_time": 0.6525454521179199
    },
    {
      "epoch": 0.00013758544921875,
      "model_forward_time": 0.12235569953918457,
      "step": 22542
    },
    {
      "epoch": 0.00013758544921875,
      "step": 22542,
      "training_step_time": 0.6987080574035645
    },
    {
      "epoch": 0.000137591552734375,
      "model_forward_time": 0.11793041229248047,
      "step": 22543
    },
    {
      "epoch": 0.000137591552734375,
      "step": 22543,
      "training_step_time": 0.6641733646392822
    },
    {
      "epoch": 0.00013759765625,
      "model_forward_time": 0.1200265884399414,
      "step": 22544
    },
    {
      "epoch": 0.00013759765625,
      "step": 22544,
      "training_step_time": 0.6375203132629395
    },
    {
      "epoch": 0.000137603759765625,
      "model_forward_time": 0.12099242210388184,
      "step": 22545
    },
    {
      "epoch": 0.000137603759765625,
      "step": 22545,
      "training_step_time": 0.6360063552856445
    },
    {
      "epoch": 0.00013760986328125,
      "model_forward_time": 0.12381386756896973,
      "step": 22546
    },
    {
      "epoch": 0.00013760986328125,
      "step": 22546,
      "training_step_time": 0.641524076461792
    },
    {
      "epoch": 0.000137615966796875,
      "model_forward_time": 0.11722803115844727,
      "step": 22547
    },
    {
      "epoch": 0.000137615966796875,
      "step": 22547,
      "training_step_time": 0.6179072856903076
    },
    {
      "epoch": 0.0001376220703125,
      "model_forward_time": 0.11968374252319336,
      "step": 22548
    },
    {
      "epoch": 0.0001376220703125,
      "step": 22548,
      "training_step_time": 0.6757717132568359
    },
    {
      "epoch": 0.000137628173828125,
      "model_forward_time": 0.1168057918548584,
      "step": 22549
    },
    {
      "epoch": 0.000137628173828125,
      "step": 22549,
      "training_step_time": 0.7763645648956299
    },
    {
      "epoch": 0.00013763427734375,
      "grad_norm": 0.1969442516565323,
      "learning_rate": 7.367609750071252e-05,
      "loss": 0.0521,
      "step": 22550
    },
    {
      "epoch": 0.00013763427734375,
      "model_forward_time": 0.11903190612792969,
      "step": 22550
    },
    {
      "epoch": 0.00013763427734375,
      "step": 22550,
      "training_step_time": 0.6166915893554688
    },
    {
      "epoch": 0.000137640380859375,
      "model_forward_time": 0.11995220184326172,
      "step": 22551
    },
    {
      "epoch": 0.000137640380859375,
      "step": 22551,
      "training_step_time": 0.6794817447662354
    },
    {
      "epoch": 0.000137646484375,
      "model_forward_time": 0.12464189529418945,
      "step": 22552
    },
    {
      "epoch": 0.000137646484375,
      "step": 22552,
      "training_step_time": 0.5910422801971436
    },
    {
      "epoch": 0.000137652587890625,
      "model_forward_time": 0.11602473258972168,
      "step": 22553
    },
    {
      "epoch": 0.000137652587890625,
      "step": 22553,
      "training_step_time": 0.641749382019043
    },
    {
      "epoch": 0.00013765869140625,
      "model_forward_time": 0.12468934059143066,
      "step": 22554
    },
    {
      "epoch": 0.00013765869140625,
      "step": 22554,
      "training_step_time": 0.6199760437011719
    },
    {
      "epoch": 0.000137664794921875,
      "model_forward_time": 0.12573981285095215,
      "step": 22555
    },
    {
      "epoch": 0.000137664794921875,
      "step": 22555,
      "training_step_time": 0.6117415428161621
    },
    {
      "epoch": 0.0001376708984375,
      "model_forward_time": 0.1200873851776123,
      "step": 22556
    },
    {
      "epoch": 0.0001376708984375,
      "step": 22556,
      "training_step_time": 0.5924301147460938
    },
    {
      "epoch": 0.000137677001953125,
      "model_forward_time": 0.12357807159423828,
      "step": 22557
    },
    {
      "epoch": 0.000137677001953125,
      "step": 22557,
      "training_step_time": 0.5753641128540039
    },
    {
      "epoch": 0.00013768310546875,
      "model_forward_time": 0.11829090118408203,
      "step": 22558
    },
    {
      "epoch": 0.00013768310546875,
      "step": 22558,
      "training_step_time": 0.6284308433532715
    },
    {
      "epoch": 0.000137689208984375,
      "model_forward_time": 0.11801671981811523,
      "step": 22559
    },
    {
      "epoch": 0.000137689208984375,
      "step": 22559,
      "training_step_time": 0.6260087490081787
    },
    {
      "epoch": 0.0001376953125,
      "grad_norm": 0.15178829431533813,
      "learning_rate": 7.365182146448205e-05,
      "loss": 0.0519,
      "step": 22560
    },
    {
      "epoch": 0.0001376953125,
      "model_forward_time": 0.11914610862731934,
      "step": 22560
    },
    {
      "epoch": 0.0001376953125,
      "step": 22560,
      "training_step_time": 0.5555942058563232
    },
    {
      "epoch": 0.000137701416015625,
      "model_forward_time": 0.11915230751037598,
      "step": 22561
    },
    {
      "epoch": 0.000137701416015625,
      "step": 22561,
      "training_step_time": 0.4871025085449219
    },
    {
      "epoch": 0.00013770751953125,
      "model_forward_time": 0.11788010597229004,
      "step": 22562
    },
    {
      "epoch": 0.00013770751953125,
      "step": 22562,
      "training_step_time": 0.46956396102905273
    },
    {
      "epoch": 0.000137713623046875,
      "model_forward_time": 0.11591291427612305,
      "step": 22563
    },
    {
      "epoch": 0.000137713623046875,
      "step": 22563,
      "training_step_time": 0.45179176330566406
    },
    {
      "epoch": 0.0001377197265625,
      "model_forward_time": 0.11597514152526855,
      "step": 22564
    },
    {
      "epoch": 0.0001377197265625,
      "step": 22564,
      "training_step_time": 0.41248154640197754
    },
    {
      "epoch": 0.000137725830078125,
      "model_forward_time": 0.11532187461853027,
      "step": 22565
    },
    {
      "epoch": 0.000137725830078125,
      "step": 22565,
      "training_step_time": 0.409987211227417
    },
    {
      "epoch": 0.00013773193359375,
      "model_forward_time": 0.11575818061828613,
      "step": 22566
    },
    {
      "epoch": 0.00013773193359375,
      "step": 22566,
      "training_step_time": 0.41205596923828125
    },
    {
      "epoch": 0.000137738037109375,
      "model_forward_time": 0.11539602279663086,
      "step": 22567
    },
    {
      "epoch": 0.000137738037109375,
      "step": 22567,
      "training_step_time": 0.39013099670410156
    },
    {
      "epoch": 0.000137744140625,
      "model_forward_time": 0.11542463302612305,
      "step": 22568
    },
    {
      "epoch": 0.000137744140625,
      "step": 22568,
      "training_step_time": 0.3867661952972412
    },
    {
      "epoch": 0.000137750244140625,
      "model_forward_time": 0.1149287223815918,
      "step": 22569
    },
    {
      "epoch": 0.000137750244140625,
      "step": 22569,
      "training_step_time": 0.39028334617614746
    },
    {
      "epoch": 0.00013775634765625,
      "grad_norm": 0.13018544018268585,
      "learning_rate": 7.362753824345272e-05,
      "loss": 0.0539,
      "step": 22570
    },
    {
      "epoch": 0.00013775634765625,
      "model_forward_time": 0.11462855339050293,
      "step": 22570
    },
    {
      "epoch": 0.00013775634765625,
      "step": 22570,
      "training_step_time": 0.377713680267334
    },
    {
      "epoch": 0.000137762451171875,
      "model_forward_time": 0.11496639251708984,
      "step": 22571
    },
    {
      "epoch": 0.000137762451171875,
      "step": 22571,
      "training_step_time": 0.4190254211425781
    },
    {
      "epoch": 0.0001377685546875,
      "model_forward_time": 0.11554670333862305,
      "step": 22572
    },
    {
      "epoch": 0.0001377685546875,
      "step": 22572,
      "training_step_time": 0.47318172454833984
    },
    {
      "epoch": 0.000137774658203125,
      "model_forward_time": 0.11578822135925293,
      "step": 22573
    },
    {
      "epoch": 0.000137774658203125,
      "step": 22573,
      "training_step_time": 0.4703657627105713
    },
    {
      "epoch": 0.00013778076171875,
      "model_forward_time": 0.11527538299560547,
      "step": 22574
    },
    {
      "epoch": 0.00013778076171875,
      "step": 22574,
      "training_step_time": 0.4200174808502197
    },
    {
      "epoch": 0.000137786865234375,
      "model_forward_time": 0.11567521095275879,
      "step": 22575
    },
    {
      "epoch": 0.000137786865234375,
      "step": 22575,
      "training_step_time": 0.39635610580444336
    },
    {
      "epoch": 0.00013779296875,
      "model_forward_time": 0.11461305618286133,
      "step": 22576
    },
    {
      "epoch": 0.00013779296875,
      "step": 22576,
      "training_step_time": 0.36673545837402344
    },
    {
      "epoch": 0.000137799072265625,
      "model_forward_time": 0.11681818962097168,
      "step": 22577
    },
    {
      "epoch": 0.000137799072265625,
      "step": 22577,
      "training_step_time": 0.44617319107055664
    },
    {
      "epoch": 0.00013780517578125,
      "model_forward_time": 0.11536288261413574,
      "step": 22578
    },
    {
      "epoch": 0.00013780517578125,
      "step": 22578,
      "training_step_time": 0.4058551788330078
    },
    {
      "epoch": 0.000137811279296875,
      "model_forward_time": 0.11587762832641602,
      "step": 22579
    },
    {
      "epoch": 0.000137811279296875,
      "step": 22579,
      "training_step_time": 0.39187121391296387
    },
    {
      "epoch": 0.0001378173828125,
      "grad_norm": 0.13048771023750305,
      "learning_rate": 7.36032478450011e-05,
      "loss": 0.0484,
      "step": 22580
    },
    {
      "epoch": 0.0001378173828125,
      "model_forward_time": 0.1148383617401123,
      "step": 22580
    },
    {
      "epoch": 0.0001378173828125,
      "step": 22580,
      "training_step_time": 0.39173197746276855
    },
    {
      "epoch": 0.000137823486328125,
      "model_forward_time": 0.1148369312286377,
      "step": 22581
    },
    {
      "epoch": 0.000137823486328125,
      "step": 22581,
      "training_step_time": 0.3957836627960205
    },
    {
      "epoch": 0.00013782958984375,
      "model_forward_time": 0.11496305465698242,
      "step": 22582
    },
    {
      "epoch": 0.00013782958984375,
      "step": 22582,
      "training_step_time": 0.37808895111083984
    },
    {
      "epoch": 0.000137835693359375,
      "model_forward_time": 0.11663174629211426,
      "step": 22583
    },
    {
      "epoch": 0.000137835693359375,
      "step": 22583,
      "training_step_time": 0.3951566219329834
    },
    {
      "epoch": 0.000137841796875,
      "model_forward_time": 0.11473703384399414,
      "step": 22584
    },
    {
      "epoch": 0.000137841796875,
      "step": 22584,
      "training_step_time": 0.3882284164428711
    },
    {
      "epoch": 0.000137847900390625,
      "model_forward_time": 0.11506533622741699,
      "step": 22585
    },
    {
      "epoch": 0.000137847900390625,
      "step": 22585,
      "training_step_time": 0.44466304779052734
    },
    {
      "epoch": 0.00013785400390625,
      "model_forward_time": 0.11458230018615723,
      "step": 22586
    },
    {
      "epoch": 0.00013785400390625,
      "step": 22586,
      "training_step_time": 0.4068615436553955
    },
    {
      "epoch": 0.000137860107421875,
      "model_forward_time": 0.11487078666687012,
      "step": 22587
    },
    {
      "epoch": 0.000137860107421875,
      "step": 22587,
      "training_step_time": 0.47565412521362305
    },
    {
      "epoch": 0.0001378662109375,
      "model_forward_time": 0.11612248420715332,
      "step": 22588
    },
    {
      "epoch": 0.0001378662109375,
      "step": 22588,
      "training_step_time": 0.39105963706970215
    },
    {
      "epoch": 0.000137872314453125,
      "model_forward_time": 0.11523079872131348,
      "step": 22589
    },
    {
      "epoch": 0.000137872314453125,
      "step": 22589,
      "training_step_time": 0.4181075096130371
    },
    {
      "epoch": 0.00013787841796875,
      "grad_norm": 0.1576095074415207,
      "learning_rate": 7.357895027650598e-05,
      "loss": 0.0465,
      "step": 22590
    },
    {
      "epoch": 0.00013787841796875,
      "model_forward_time": 0.11448311805725098,
      "step": 22590
    },
    {
      "epoch": 0.00013787841796875,
      "step": 22590,
      "training_step_time": 0.3942396640777588
    },
    {
      "epoch": 0.000137884521484375,
      "model_forward_time": 0.11519360542297363,
      "step": 22591
    },
    {
      "epoch": 0.000137884521484375,
      "step": 22591,
      "training_step_time": 0.4550330638885498
    },
    {
      "epoch": 0.000137890625,
      "model_forward_time": 0.11503338813781738,
      "step": 22592
    },
    {
      "epoch": 0.000137890625,
      "step": 22592,
      "training_step_time": 0.552863597869873
    },
    {
      "epoch": 0.000137896728515625,
      "model_forward_time": 0.11544036865234375,
      "step": 22593
    },
    {
      "epoch": 0.000137896728515625,
      "step": 22593,
      "training_step_time": 0.4436607360839844
    },
    {
      "epoch": 0.00013790283203125,
      "model_forward_time": 0.11528944969177246,
      "step": 22594
    },
    {
      "epoch": 0.00013790283203125,
      "step": 22594,
      "training_step_time": 0.3714327812194824
    },
    {
      "epoch": 0.000137908935546875,
      "model_forward_time": 0.11566305160522461,
      "step": 22595
    },
    {
      "epoch": 0.000137908935546875,
      "step": 22595,
      "training_step_time": 0.3923342227935791
    },
    {
      "epoch": 0.0001379150390625,
      "model_forward_time": 0.11498332023620605,
      "step": 22596
    },
    {
      "epoch": 0.0001379150390625,
      "step": 22596,
      "training_step_time": 0.3906898498535156
    },
    {
      "epoch": 0.000137921142578125,
      "model_forward_time": 0.11580514907836914,
      "step": 22597
    },
    {
      "epoch": 0.000137921142578125,
      "step": 22597,
      "training_step_time": 0.39104604721069336
    },
    {
      "epoch": 0.00013792724609375,
      "model_forward_time": 0.11649656295776367,
      "step": 22598
    },
    {
      "epoch": 0.00013792724609375,
      "step": 22598,
      "training_step_time": 1.1118652820587158
    },
    {
      "epoch": 0.000137933349609375,
      "model_forward_time": 0.11437606811523438,
      "step": 22599
    },
    {
      "epoch": 0.000137933349609375,
      "step": 22599,
      "training_step_time": 0.42919182777404785
    },
    {
      "epoch": 0.000137939453125,
      "grad_norm": 0.13258780539035797,
      "learning_rate": 7.355464554534837e-05,
      "loss": 0.0513,
      "step": 22600
    },
    {
      "epoch": 0.000137939453125,
      "model_forward_time": 0.11365556716918945,
      "step": 22600
    },
    {
      "epoch": 0.000137939453125,
      "step": 22600,
      "training_step_time": 0.42066097259521484
    },
    {
      "epoch": 0.000137945556640625,
      "model_forward_time": 0.1140131950378418,
      "step": 22601
    },
    {
      "epoch": 0.000137945556640625,
      "step": 22601,
      "training_step_time": 0.49209022521972656
    },
    {
      "epoch": 0.00013795166015625,
      "model_forward_time": 0.11380982398986816,
      "step": 22602
    },
    {
      "epoch": 0.00013795166015625,
      "step": 22602,
      "training_step_time": 0.3885016441345215
    },
    {
      "epoch": 0.000137957763671875,
      "model_forward_time": 0.11429357528686523,
      "step": 22603
    },
    {
      "epoch": 0.000137957763671875,
      "step": 22603,
      "training_step_time": 0.3920881748199463
    },
    {
      "epoch": 0.0001379638671875,
      "model_forward_time": 0.11492204666137695,
      "step": 22604
    },
    {
      "epoch": 0.0001379638671875,
      "step": 22604,
      "training_step_time": 0.36334800720214844
    },
    {
      "epoch": 0.000137969970703125,
      "model_forward_time": 0.11514544486999512,
      "step": 22605
    },
    {
      "epoch": 0.000137969970703125,
      "step": 22605,
      "training_step_time": 0.43912220001220703
    },
    {
      "epoch": 0.00013797607421875,
      "model_forward_time": 0.11544132232666016,
      "step": 22606
    },
    {
      "epoch": 0.00013797607421875,
      "step": 22606,
      "training_step_time": 0.42178845405578613
    },
    {
      "epoch": 0.000137982177734375,
      "model_forward_time": 0.11485147476196289,
      "step": 22607
    },
    {
      "epoch": 0.000137982177734375,
      "step": 22607,
      "training_step_time": 0.38515615463256836
    },
    {
      "epoch": 0.00013798828125,
      "model_forward_time": 0.11474061012268066,
      "step": 22608
    },
    {
      "epoch": 0.00013798828125,
      "step": 22608,
      "training_step_time": 0.3833920955657959
    },
    {
      "epoch": 0.000137994384765625,
      "model_forward_time": 0.1161191463470459,
      "step": 22609
    },
    {
      "epoch": 0.000137994384765625,
      "step": 22609,
      "training_step_time": 0.4212656021118164
    },
    {
      "epoch": 0.00013800048828125,
      "grad_norm": 0.15641511976718903,
      "learning_rate": 7.353033365891134e-05,
      "loss": 0.0531,
      "step": 22610
    },
    {
      "epoch": 0.00013800048828125,
      "model_forward_time": 0.11526226997375488,
      "step": 22610
    },
    {
      "epoch": 0.00013800048828125,
      "step": 22610,
      "training_step_time": 0.39667391777038574
    },
    {
      "epoch": 0.000138006591796875,
      "model_forward_time": 0.11498737335205078,
      "step": 22611
    },
    {
      "epoch": 0.000138006591796875,
      "step": 22611,
      "training_step_time": 0.4237947463989258
    },
    {
      "epoch": 0.0001380126953125,
      "model_forward_time": 0.11581134796142578,
      "step": 22612
    },
    {
      "epoch": 0.0001380126953125,
      "step": 22612,
      "training_step_time": 0.3966190814971924
    },
    {
      "epoch": 0.000138018798828125,
      "model_forward_time": 0.11497282981872559,
      "step": 22613
    },
    {
      "epoch": 0.000138018798828125,
      "step": 22613,
      "training_step_time": 0.39739203453063965
    },
    {
      "epoch": 0.00013802490234375,
      "model_forward_time": 0.11533379554748535,
      "step": 22614
    },
    {
      "epoch": 0.00013802490234375,
      "step": 22614,
      "training_step_time": 0.4251065254211426
    },
    {
      "epoch": 0.000138031005859375,
      "model_forward_time": 0.11520576477050781,
      "step": 22615
    },
    {
      "epoch": 0.000138031005859375,
      "step": 22615,
      "training_step_time": 0.41732120513916016
    },
    {
      "epoch": 0.000138037109375,
      "model_forward_time": 0.11563897132873535,
      "step": 22616
    },
    {
      "epoch": 0.000138037109375,
      "step": 22616,
      "training_step_time": 0.43660855293273926
    },
    {
      "epoch": 0.000138043212890625,
      "model_forward_time": 0.11471247673034668,
      "step": 22617
    },
    {
      "epoch": 0.000138043212890625,
      "step": 22617,
      "training_step_time": 0.390460729598999
    },
    {
      "epoch": 0.00013804931640625,
      "model_forward_time": 0.11516499519348145,
      "step": 22618
    },
    {
      "epoch": 0.00013804931640625,
      "step": 22618,
      "training_step_time": 0.374164342880249
    },
    {
      "epoch": 0.000138055419921875,
      "model_forward_time": 0.11543679237365723,
      "step": 22619
    },
    {
      "epoch": 0.000138055419921875,
      "step": 22619,
      "training_step_time": 0.41315150260925293
    },
    {
      "epoch": 0.0001380615234375,
      "grad_norm": 0.14837117493152618,
      "learning_rate": 7.350601462458024e-05,
      "loss": 0.0504,
      "step": 22620
    },
    {
      "epoch": 0.0001380615234375,
      "model_forward_time": 0.1153266429901123,
      "step": 22620
    },
    {
      "epoch": 0.0001380615234375,
      "step": 22620,
      "training_step_time": 0.39778733253479004
    },
    {
      "epoch": 0.000138067626953125,
      "model_forward_time": 0.11602449417114258,
      "step": 22621
    },
    {
      "epoch": 0.000138067626953125,
      "step": 22621,
      "training_step_time": 0.3962118625640869
    },
    {
      "epoch": 0.00013807373046875,
      "model_forward_time": 0.11557507514953613,
      "step": 22622
    },
    {
      "epoch": 0.00013807373046875,
      "step": 22622,
      "training_step_time": 0.397855281829834
    },
    {
      "epoch": 0.000138079833984375,
      "model_forward_time": 0.11597490310668945,
      "step": 22623
    },
    {
      "epoch": 0.000138079833984375,
      "step": 22623,
      "training_step_time": 0.3906865119934082
    },
    {
      "epoch": 0.0001380859375,
      "model_forward_time": 0.11555743217468262,
      "step": 22624
    },
    {
      "epoch": 0.0001380859375,
      "step": 22624,
      "training_step_time": 0.37442755699157715
    },
    {
      "epoch": 0.000138092041015625,
      "model_forward_time": 0.11573004722595215,
      "step": 22625
    },
    {
      "epoch": 0.000138092041015625,
      "step": 22625,
      "training_step_time": 0.4248030185699463
    },
    {
      "epoch": 0.00013809814453125,
      "model_forward_time": 0.11477518081665039,
      "step": 22626
    },
    {
      "epoch": 0.00013809814453125,
      "step": 22626,
      "training_step_time": 0.43351316452026367
    },
    {
      "epoch": 0.000138104248046875,
      "model_forward_time": 0.11552143096923828,
      "step": 22627
    },
    {
      "epoch": 0.000138104248046875,
      "step": 22627,
      "training_step_time": 0.45477986335754395
    },
    {
      "epoch": 0.0001381103515625,
      "model_forward_time": 0.11525368690490723,
      "step": 22628
    },
    {
      "epoch": 0.0001381103515625,
      "step": 22628,
      "training_step_time": 0.5521578788757324
    },
    {
      "epoch": 0.000138116455078125,
      "model_forward_time": 0.11777806282043457,
      "step": 22629
    },
    {
      "epoch": 0.000138116455078125,
      "step": 22629,
      "training_step_time": 0.4339480400085449
    },
    {
      "epoch": 0.00013812255859375,
      "grad_norm": 0.17355935275554657,
      "learning_rate": 7.348168844974254e-05,
      "loss": 0.047,
      "step": 22630
    },
    {
      "epoch": 0.00013812255859375,
      "model_forward_time": 0.11453843116760254,
      "step": 22630
    },
    {
      "epoch": 0.00013812255859375,
      "step": 22630,
      "training_step_time": 0.3789234161376953
    },
    {
      "epoch": 0.000138128662109375,
      "model_forward_time": 0.11435580253601074,
      "step": 22631
    },
    {
      "epoch": 0.000138128662109375,
      "step": 22631,
      "training_step_time": 0.40352964401245117
    },
    {
      "epoch": 0.000138134765625,
      "model_forward_time": 0.11523556709289551,
      "step": 22632
    },
    {
      "epoch": 0.000138134765625,
      "step": 22632,
      "training_step_time": 0.3955061435699463
    },
    {
      "epoch": 0.000138140869140625,
      "model_forward_time": 0.11487984657287598,
      "step": 22633
    },
    {
      "epoch": 0.000138140869140625,
      "step": 22633,
      "training_step_time": 0.3895719051361084
    },
    {
      "epoch": 0.00013814697265625,
      "model_forward_time": 0.11561369895935059,
      "step": 22634
    },
    {
      "epoch": 0.00013814697265625,
      "step": 22634,
      "training_step_time": 0.5182619094848633
    },
    {
      "epoch": 0.000138153076171875,
      "model_forward_time": 0.11507654190063477,
      "step": 22635
    },
    {
      "epoch": 0.000138153076171875,
      "step": 22635,
      "training_step_time": 0.5190234184265137
    },
    {
      "epoch": 0.0001381591796875,
      "model_forward_time": 0.11488914489746094,
      "step": 22636
    },
    {
      "epoch": 0.0001381591796875,
      "step": 22636,
      "training_step_time": 0.37868475914001465
    },
    {
      "epoch": 0.000138165283203125,
      "model_forward_time": 0.11946272850036621,
      "step": 22637
    },
    {
      "epoch": 0.000138165283203125,
      "step": 22637,
      "training_step_time": 0.38454222679138184
    },
    {
      "epoch": 0.00013817138671875,
      "model_forward_time": 0.11568307876586914,
      "step": 22638
    },
    {
      "epoch": 0.00013817138671875,
      "step": 22638,
      "training_step_time": 0.417003870010376
    },
    {
      "epoch": 0.000138177490234375,
      "model_forward_time": 0.11818051338195801,
      "step": 22639
    },
    {
      "epoch": 0.000138177490234375,
      "step": 22639,
      "training_step_time": 0.43145155906677246
    },
    {
      "epoch": 0.00013818359375,
      "grad_norm": 0.14001747965812683,
      "learning_rate": 7.345735514178787e-05,
      "loss": 0.0529,
      "step": 22640
    },
    {
      "epoch": 0.00013818359375,
      "model_forward_time": 0.1182699203491211,
      "step": 22640
    },
    {
      "epoch": 0.00013818359375,
      "step": 22640,
      "training_step_time": 0.38779425621032715
    },
    {
      "epoch": 0.000138189697265625,
      "model_forward_time": 0.11810755729675293,
      "step": 22641
    },
    {
      "epoch": 0.000138189697265625,
      "step": 22641,
      "training_step_time": 0.3879988193511963
    },
    {
      "epoch": 0.00013819580078125,
      "model_forward_time": 0.11909246444702148,
      "step": 22642
    },
    {
      "epoch": 0.00013819580078125,
      "step": 22642,
      "training_step_time": 0.39229297637939453
    },
    {
      "epoch": 0.000138201904296875,
      "model_forward_time": 0.11698341369628906,
      "step": 22643
    },
    {
      "epoch": 0.000138201904296875,
      "step": 22643,
      "training_step_time": 0.46579837799072266
    },
    {
      "epoch": 0.0001382080078125,
      "model_forward_time": 0.11497879028320312,
      "step": 22644
    },
    {
      "epoch": 0.0001382080078125,
      "step": 22644,
      "training_step_time": 0.415149450302124
    },
    {
      "epoch": 0.000138214111328125,
      "model_forward_time": 0.11504054069519043,
      "step": 22645
    },
    {
      "epoch": 0.000138214111328125,
      "step": 22645,
      "training_step_time": 0.3993673324584961
    },
    {
      "epoch": 0.00013822021484375,
      "model_forward_time": 0.11482548713684082,
      "step": 22646
    },
    {
      "epoch": 0.00013822021484375,
      "step": 22646,
      "training_step_time": 0.4771876335144043
    },
    {
      "epoch": 0.000138226318359375,
      "model_forward_time": 0.11515998840332031,
      "step": 22647
    },
    {
      "epoch": 0.000138226318359375,
      "step": 22647,
      "training_step_time": 0.4045908451080322
    },
    {
      "epoch": 0.000138232421875,
      "model_forward_time": 0.11571717262268066,
      "step": 22648
    },
    {
      "epoch": 0.000138232421875,
      "step": 22648,
      "training_step_time": 0.37657976150512695
    },
    {
      "epoch": 0.000138238525390625,
      "model_forward_time": 0.11549806594848633,
      "step": 22649
    },
    {
      "epoch": 0.000138238525390625,
      "step": 22649,
      "training_step_time": 0.49398136138916016
    },
    {
      "epoch": 0.00013824462890625,
      "grad_norm": 0.14925862848758698,
      "learning_rate": 7.343301470810808e-05,
      "loss": 0.0509,
      "step": 22650
    },
    {
      "epoch": 0.00013824462890625,
      "model_forward_time": 0.11564517021179199,
      "step": 22650
    },
    {
      "epoch": 0.00013824462890625,
      "step": 22650,
      "training_step_time": 0.42071986198425293
    },
    {
      "epoch": 0.000138250732421875,
      "model_forward_time": 0.11511754989624023,
      "step": 22651
    },
    {
      "epoch": 0.000138250732421875,
      "step": 22651,
      "training_step_time": 0.4080169200897217
    },
    {
      "epoch": 0.0001382568359375,
      "model_forward_time": 0.1156003475189209,
      "step": 22652
    },
    {
      "epoch": 0.0001382568359375,
      "step": 22652,
      "training_step_time": 0.4116363525390625
    },
    {
      "epoch": 0.000138262939453125,
      "model_forward_time": 0.11486554145812988,
      "step": 22653
    },
    {
      "epoch": 0.000138262939453125,
      "step": 22653,
      "training_step_time": 0.3945577144622803
    },
    {
      "epoch": 0.00013826904296875,
      "model_forward_time": 0.11515951156616211,
      "step": 22654
    },
    {
      "epoch": 0.00013826904296875,
      "step": 22654,
      "training_step_time": 0.6491866111755371
    },
    {
      "epoch": 0.000138275146484375,
      "model_forward_time": 0.11454272270202637,
      "step": 22655
    },
    {
      "epoch": 0.000138275146484375,
      "step": 22655,
      "training_step_time": 0.43402814865112305
    },
    {
      "epoch": 0.00013828125,
      "model_forward_time": 0.11455869674682617,
      "step": 22656
    },
    {
      "epoch": 0.00013828125,
      "step": 22656,
      "training_step_time": 0.4379277229309082
    },
    {
      "epoch": 0.000138287353515625,
      "model_forward_time": 0.11453866958618164,
      "step": 22657
    },
    {
      "epoch": 0.000138287353515625,
      "step": 22657,
      "training_step_time": 0.4031643867492676
    },
    {
      "epoch": 0.00013829345703125,
      "model_forward_time": 0.11491823196411133,
      "step": 22658
    },
    {
      "epoch": 0.00013829345703125,
      "step": 22658,
      "training_step_time": 0.3938884735107422
    },
    {
      "epoch": 0.000138299560546875,
      "model_forward_time": 0.11506152153015137,
      "step": 22659
    },
    {
      "epoch": 0.000138299560546875,
      "step": 22659,
      "training_step_time": 0.413238525390625
    },
    {
      "epoch": 0.0001383056640625,
      "grad_norm": 0.139759823679924,
      "learning_rate": 7.340866715609712e-05,
      "loss": 0.053,
      "step": 22660
    },
    {
      "epoch": 0.0001383056640625,
      "model_forward_time": 0.11482858657836914,
      "step": 22660
    },
    {
      "epoch": 0.0001383056640625,
      "step": 22660,
      "training_step_time": 0.46822023391723633
    },
    {
      "epoch": 0.000138311767578125,
      "model_forward_time": 0.11558866500854492,
      "step": 22661
    },
    {
      "epoch": 0.000138311767578125,
      "step": 22661,
      "training_step_time": 0.3782503604888916
    },
    {
      "epoch": 0.00013831787109375,
      "model_forward_time": 0.11560463905334473,
      "step": 22662
    },
    {
      "epoch": 0.00013831787109375,
      "step": 22662,
      "training_step_time": 0.4498765468597412
    },
    {
      "epoch": 0.000138323974609375,
      "model_forward_time": 0.11466121673583984,
      "step": 22663
    },
    {
      "epoch": 0.000138323974609375,
      "step": 22663,
      "training_step_time": 0.4755103588104248
    },
    {
      "epoch": 0.000138330078125,
      "model_forward_time": 0.11530780792236328,
      "step": 22664
    },
    {
      "epoch": 0.000138330078125,
      "step": 22664,
      "training_step_time": 0.44659423828125
    },
    {
      "epoch": 0.000138336181640625,
      "model_forward_time": 0.11591577529907227,
      "step": 22665
    },
    {
      "epoch": 0.000138336181640625,
      "step": 22665,
      "training_step_time": 0.3952634334564209
    },
    {
      "epoch": 0.00013834228515625,
      "model_forward_time": 0.11516165733337402,
      "step": 22666
    },
    {
      "epoch": 0.00013834228515625,
      "step": 22666,
      "training_step_time": 0.38370251655578613
    },
    {
      "epoch": 0.000138348388671875,
      "model_forward_time": 0.12320804595947266,
      "step": 22667
    },
    {
      "epoch": 0.000138348388671875,
      "step": 22667,
      "training_step_time": 0.3925967216491699
    },
    {
      "epoch": 0.0001383544921875,
      "model_forward_time": 0.11511468887329102,
      "step": 22668
    },
    {
      "epoch": 0.0001383544921875,
      "step": 22668,
      "training_step_time": 0.39098191261291504
    },
    {
      "epoch": 0.000138360595703125,
      "model_forward_time": 0.11481523513793945,
      "step": 22669
    },
    {
      "epoch": 0.000138360595703125,
      "step": 22669,
      "training_step_time": 0.43563032150268555
    },
    {
      "epoch": 0.00013836669921875,
      "grad_norm": 0.12760886549949646,
      "learning_rate": 7.338431249315115e-05,
      "loss": 0.0493,
      "step": 22670
    },
    {
      "epoch": 0.00013836669921875,
      "model_forward_time": 0.11548995971679688,
      "step": 22670
    },
    {
      "epoch": 0.00013836669921875,
      "step": 22670,
      "training_step_time": 0.4153599739074707
    },
    {
      "epoch": 0.000138372802734375,
      "model_forward_time": 0.11496901512145996,
      "step": 22671
    },
    {
      "epoch": 0.000138372802734375,
      "step": 22671,
      "training_step_time": 0.48224449157714844
    },
    {
      "epoch": 0.00013837890625,
      "model_forward_time": 0.11567473411560059,
      "step": 22672
    },
    {
      "epoch": 0.00013837890625,
      "step": 22672,
      "training_step_time": 0.3914296627044678
    },
    {
      "epoch": 0.000138385009765625,
      "model_forward_time": 0.11488986015319824,
      "step": 22673
    },
    {
      "epoch": 0.000138385009765625,
      "step": 22673,
      "training_step_time": 0.46093130111694336
    },
    {
      "epoch": 0.00013839111328125,
      "model_forward_time": 0.11467576026916504,
      "step": 22674
    },
    {
      "epoch": 0.00013839111328125,
      "step": 22674,
      "training_step_time": 0.4247283935546875
    },
    {
      "epoch": 0.000138397216796875,
      "model_forward_time": 0.11530923843383789,
      "step": 22675
    },
    {
      "epoch": 0.000138397216796875,
      "step": 22675,
      "training_step_time": 0.4006350040435791
    },
    {
      "epoch": 0.0001384033203125,
      "model_forward_time": 0.11566734313964844,
      "step": 22676
    },
    {
      "epoch": 0.0001384033203125,
      "step": 22676,
      "training_step_time": 0.3948540687561035
    },
    {
      "epoch": 0.000138409423828125,
      "model_forward_time": 0.11593294143676758,
      "step": 22677
    },
    {
      "epoch": 0.000138409423828125,
      "step": 22677,
      "training_step_time": 0.4167797565460205
    },
    {
      "epoch": 0.00013841552734375,
      "model_forward_time": 0.11535525321960449,
      "step": 22678
    },
    {
      "epoch": 0.00013841552734375,
      "step": 22678,
      "training_step_time": 0.4853522777557373
    },
    {
      "epoch": 0.000138421630859375,
      "model_forward_time": 0.1155238151550293,
      "step": 22679
    },
    {
      "epoch": 0.000138421630859375,
      "step": 22679,
      "training_step_time": 0.4183826446533203
    },
    {
      "epoch": 0.000138427734375,
      "grad_norm": 0.15569789707660675,
      "learning_rate": 7.335995072666848e-05,
      "loss": 0.0524,
      "step": 22680
    },
    {
      "epoch": 0.000138427734375,
      "model_forward_time": 0.11489629745483398,
      "step": 22680
    },
    {
      "epoch": 0.000138427734375,
      "step": 22680,
      "training_step_time": 0.3924288749694824
    },
    {
      "epoch": 0.000138433837890625,
      "model_forward_time": 0.11548924446105957,
      "step": 22681
    },
    {
      "epoch": 0.000138433837890625,
      "step": 22681,
      "training_step_time": 0.39345502853393555
    },
    {
      "epoch": 0.00013843994140625,
      "model_forward_time": 0.1158134937286377,
      "step": 22682
    },
    {
      "epoch": 0.00013843994140625,
      "step": 22682,
      "training_step_time": 0.39372944831848145
    },
    {
      "epoch": 0.000138446044921875,
      "model_forward_time": 0.11592626571655273,
      "step": 22683
    },
    {
      "epoch": 0.000138446044921875,
      "step": 22683,
      "training_step_time": 0.3951730728149414
    },
    {
      "epoch": 0.0001384521484375,
      "model_forward_time": 0.11582803726196289,
      "step": 22684
    },
    {
      "epoch": 0.0001384521484375,
      "step": 22684,
      "training_step_time": 0.4051203727722168
    },
    {
      "epoch": 0.000138458251953125,
      "model_forward_time": 0.11475276947021484,
      "step": 22685
    },
    {
      "epoch": 0.000138458251953125,
      "step": 22685,
      "training_step_time": 0.5016908645629883
    },
    {
      "epoch": 0.00013846435546875,
      "model_forward_time": 0.11478686332702637,
      "step": 22686
    },
    {
      "epoch": 0.00013846435546875,
      "step": 22686,
      "training_step_time": 0.3825874328613281
    },
    {
      "epoch": 0.000138470458984375,
      "model_forward_time": 0.11495065689086914,
      "step": 22687
    },
    {
      "epoch": 0.000138470458984375,
      "step": 22687,
      "training_step_time": 0.39850854873657227
    },
    {
      "epoch": 0.0001384765625,
      "model_forward_time": 0.11481332778930664,
      "step": 22688
    },
    {
      "epoch": 0.0001384765625,
      "step": 22688,
      "training_step_time": 0.4473702907562256
    },
    {
      "epoch": 0.000138482666015625,
      "model_forward_time": 0.11504173278808594,
      "step": 22689
    },
    {
      "epoch": 0.000138482666015625,
      "step": 22689,
      "training_step_time": 0.3945598602294922
    },
    {
      "epoch": 0.00013848876953125,
      "grad_norm": 0.10811921209096909,
      "learning_rate": 7.333558186404958e-05,
      "loss": 0.0583,
      "step": 22690
    },
    {
      "epoch": 0.00013848876953125,
      "model_forward_time": 0.11564445495605469,
      "step": 22690
    },
    {
      "epoch": 0.00013848876953125,
      "step": 22690,
      "training_step_time": 0.3915371894836426
    },
    {
      "epoch": 0.000138494873046875,
      "model_forward_time": 0.11576461791992188,
      "step": 22691
    },
    {
      "epoch": 0.000138494873046875,
      "step": 22691,
      "training_step_time": 0.4501001834869385
    },
    {
      "epoch": 0.0001385009765625,
      "model_forward_time": 0.11630487442016602,
      "step": 22692
    },
    {
      "epoch": 0.0001385009765625,
      "step": 22692,
      "training_step_time": 0.405728816986084
    },
    {
      "epoch": 0.000138507080078125,
      "model_forward_time": 0.11550116539001465,
      "step": 22693
    },
    {
      "epoch": 0.000138507080078125,
      "step": 22693,
      "training_step_time": 0.4181790351867676
    },
    {
      "epoch": 0.00013851318359375,
      "model_forward_time": 0.11506223678588867,
      "step": 22694
    },
    {
      "epoch": 0.00013851318359375,
      "step": 22694,
      "training_step_time": 0.4431936740875244
    },
    {
      "epoch": 0.000138519287109375,
      "model_forward_time": 0.11605715751647949,
      "step": 22695
    },
    {
      "epoch": 0.000138519287109375,
      "step": 22695,
      "training_step_time": 0.38480663299560547
    },
    {
      "epoch": 0.000138525390625,
      "model_forward_time": 0.11550068855285645,
      "step": 22696
    },
    {
      "epoch": 0.000138525390625,
      "step": 22696,
      "training_step_time": 0.39012980461120605
    },
    {
      "epoch": 0.000138531494140625,
      "model_forward_time": 0.11479949951171875,
      "step": 22697
    },
    {
      "epoch": 0.000138531494140625,
      "step": 22697,
      "training_step_time": 0.39661550521850586
    },
    {
      "epoch": 0.00013853759765625,
      "model_forward_time": 0.11481809616088867,
      "step": 22698
    },
    {
      "epoch": 0.00013853759765625,
      "step": 22698,
      "training_step_time": 0.45230627059936523
    },
    {
      "epoch": 0.000138543701171875,
      "model_forward_time": 0.1159520149230957,
      "step": 22699
    },
    {
      "epoch": 0.000138543701171875,
      "step": 22699,
      "training_step_time": 0.43640995025634766
    },
    {
      "epoch": 0.0001385498046875,
      "grad_norm": 0.17567178606987,
      "learning_rate": 7.331120591269701e-05,
      "loss": 0.0544,
      "step": 22700
    },
    {
      "epoch": 0.0001385498046875,
      "model_forward_time": 0.11515092849731445,
      "step": 22700
    },
    {
      "epoch": 0.0001385498046875,
      "step": 22700,
      "training_step_time": 0.3936467170715332
    },
    {
      "epoch": 0.000138555908203125,
      "model_forward_time": 0.11490416526794434,
      "step": 22701
    },
    {
      "epoch": 0.000138555908203125,
      "step": 22701,
      "training_step_time": 0.4168875217437744
    },
    {
      "epoch": 0.00013856201171875,
      "model_forward_time": 0.11539554595947266,
      "step": 22702
    },
    {
      "epoch": 0.00013856201171875,
      "step": 22702,
      "training_step_time": 0.3931903839111328
    },
    {
      "epoch": 0.000138568115234375,
      "model_forward_time": 0.11463332176208496,
      "step": 22703
    },
    {
      "epoch": 0.000138568115234375,
      "step": 22703,
      "training_step_time": 0.4446902275085449
    },
    {
      "epoch": 0.00013857421875,
      "model_forward_time": 0.1149752140045166,
      "step": 22704
    },
    {
      "epoch": 0.00013857421875,
      "step": 22704,
      "training_step_time": 0.39402055740356445
    },
    {
      "epoch": 0.000138580322265625,
      "model_forward_time": 0.11563611030578613,
      "step": 22705
    },
    {
      "epoch": 0.000138580322265625,
      "step": 22705,
      "training_step_time": 0.4323897361755371
    },
    {
      "epoch": 0.00013858642578125,
      "model_forward_time": 0.11565351486206055,
      "step": 22706
    },
    {
      "epoch": 0.00013858642578125,
      "step": 22706,
      "training_step_time": 0.3983776569366455
    },
    {
      "epoch": 0.000138592529296875,
      "model_forward_time": 0.11480879783630371,
      "step": 22707
    },
    {
      "epoch": 0.000138592529296875,
      "step": 22707,
      "training_step_time": 0.4257063865661621
    },
    {
      "epoch": 0.0001385986328125,
      "model_forward_time": 0.11537003517150879,
      "step": 22708
    },
    {
      "epoch": 0.0001385986328125,
      "step": 22708,
      "training_step_time": 0.40494489669799805
    },
    {
      "epoch": 0.000138604736328125,
      "model_forward_time": 0.11536812782287598,
      "step": 22709
    },
    {
      "epoch": 0.000138604736328125,
      "step": 22709,
      "training_step_time": 0.39936280250549316
    },
    {
      "epoch": 0.00013861083984375,
      "grad_norm": 0.13165061175823212,
      "learning_rate": 7.328682288001561e-05,
      "loss": 0.0494,
      "step": 22710
    },
    {
      "epoch": 0.00013861083984375,
      "model_forward_time": 0.1177978515625,
      "step": 22710
    },
    {
      "epoch": 0.00013861083984375,
      "step": 22710,
      "training_step_time": 0.3927278518676758
    },
    {
      "epoch": 0.000138616943359375,
      "model_forward_time": 0.11505913734436035,
      "step": 22711
    },
    {
      "epoch": 0.000138616943359375,
      "step": 22711,
      "training_step_time": 0.3953378200531006
    },
    {
      "epoch": 0.000138623046875,
      "model_forward_time": 0.1154639720916748,
      "step": 22712
    },
    {
      "epoch": 0.000138623046875,
      "step": 22712,
      "training_step_time": 0.39179372787475586
    },
    {
      "epoch": 0.000138629150390625,
      "model_forward_time": 0.1151273250579834,
      "step": 22713
    },
    {
      "epoch": 0.000138629150390625,
      "step": 22713,
      "training_step_time": 0.42391347885131836
    },
    {
      "epoch": 0.00013863525390625,
      "model_forward_time": 0.1145482063293457,
      "step": 22714
    },
    {
      "epoch": 0.00013863525390625,
      "step": 22714,
      "training_step_time": 0.4080374240875244
    },
    {
      "epoch": 0.000138641357421875,
      "model_forward_time": 0.11486124992370605,
      "step": 22715
    },
    {
      "epoch": 0.000138641357421875,
      "step": 22715,
      "training_step_time": 0.3915987014770508
    },
    {
      "epoch": 0.0001386474609375,
      "model_forward_time": 0.11546182632446289,
      "step": 22716
    },
    {
      "epoch": 0.0001386474609375,
      "step": 22716,
      "training_step_time": 0.4150221347808838
    },
    {
      "epoch": 0.000138653564453125,
      "model_forward_time": 0.11554074287414551,
      "step": 22717
    },
    {
      "epoch": 0.000138653564453125,
      "step": 22717,
      "training_step_time": 0.4014444351196289
    },
    {
      "epoch": 0.00013865966796875,
      "model_forward_time": 0.11520195007324219,
      "step": 22718
    },
    {
      "epoch": 0.00013865966796875,
      "step": 22718,
      "training_step_time": 0.4613533020019531
    },
    {
      "epoch": 0.000138665771484375,
      "model_forward_time": 0.11521315574645996,
      "step": 22719
    },
    {
      "epoch": 0.000138665771484375,
      "step": 22719,
      "training_step_time": 0.46520113945007324
    },
    {
      "epoch": 0.000138671875,
      "grad_norm": 0.17176607251167297,
      "learning_rate": 7.326243277341227e-05,
      "loss": 0.0527,
      "step": 22720
    },
    {
      "epoch": 0.000138671875,
      "model_forward_time": 0.1149909496307373,
      "step": 22720
    },
    {
      "epoch": 0.000138671875,
      "step": 22720,
      "training_step_time": 0.39003849029541016
    },
    {
      "epoch": 0.000138677978515625,
      "model_forward_time": 0.11577105522155762,
      "step": 22721
    },
    {
      "epoch": 0.000138677978515625,
      "step": 22721,
      "training_step_time": 0.3827497959136963
    },
    {
      "epoch": 0.00013868408203125,
      "model_forward_time": 0.11559581756591797,
      "step": 22722
    },
    {
      "epoch": 0.00013868408203125,
      "step": 22722,
      "training_step_time": 0.5040338039398193
    },
    {
      "epoch": 0.000138690185546875,
      "model_forward_time": 0.1156301498413086,
      "step": 22723
    },
    {
      "epoch": 0.000138690185546875,
      "step": 22723,
      "training_step_time": 0.41718316078186035
    },
    {
      "epoch": 0.0001386962890625,
      "model_forward_time": 0.11490201950073242,
      "step": 22724
    },
    {
      "epoch": 0.0001386962890625,
      "step": 22724,
      "training_step_time": 0.3867807388305664
    },
    {
      "epoch": 0.000138702392578125,
      "model_forward_time": 0.11535787582397461,
      "step": 22725
    },
    {
      "epoch": 0.000138702392578125,
      "step": 22725,
      "training_step_time": 0.3869643211364746
    },
    {
      "epoch": 0.00013870849609375,
      "model_forward_time": 0.11549711227416992,
      "step": 22726
    },
    {
      "epoch": 0.00013870849609375,
      "step": 22726,
      "training_step_time": 0.382976770401001
    },
    {
      "epoch": 0.000138714599609375,
      "model_forward_time": 0.11499977111816406,
      "step": 22727
    },
    {
      "epoch": 0.000138714599609375,
      "step": 22727,
      "training_step_time": 0.44280004501342773
    },
    {
      "epoch": 0.000138720703125,
      "model_forward_time": 0.11515998840332031,
      "step": 22728
    },
    {
      "epoch": 0.000138720703125,
      "step": 22728,
      "training_step_time": 0.43972134590148926
    },
    {
      "epoch": 0.000138726806640625,
      "model_forward_time": 0.11509585380554199,
      "step": 22729
    },
    {
      "epoch": 0.000138726806640625,
      "step": 22729,
      "training_step_time": 0.4096105098724365
    },
    {
      "epoch": 0.00013873291015625,
      "grad_norm": 0.12616302073001862,
      "learning_rate": 7.323803560029605e-05,
      "loss": 0.0539,
      "step": 22730
    },
    {
      "epoch": 0.00013873291015625,
      "model_forward_time": 0.11494588851928711,
      "step": 22730
    },
    {
      "epoch": 0.00013873291015625,
      "step": 22730,
      "training_step_time": 0.39420557022094727
    },
    {
      "epoch": 0.000138739013671875,
      "model_forward_time": 0.11572885513305664,
      "step": 22731
    },
    {
      "epoch": 0.000138739013671875,
      "step": 22731,
      "training_step_time": 0.3951690196990967
    },
    {
      "epoch": 0.0001387451171875,
      "model_forward_time": 0.11532354354858398,
      "step": 22732
    },
    {
      "epoch": 0.0001387451171875,
      "step": 22732,
      "training_step_time": 0.3888375759124756
    },
    {
      "epoch": 0.000138751220703125,
      "model_forward_time": 0.11550021171569824,
      "step": 22733
    },
    {
      "epoch": 0.000138751220703125,
      "step": 22733,
      "training_step_time": 0.4157724380493164
    },
    {
      "epoch": 0.00013875732421875,
      "model_forward_time": 0.11453914642333984,
      "step": 22734
    },
    {
      "epoch": 0.00013875732421875,
      "step": 22734,
      "training_step_time": 0.421694278717041
    },
    {
      "epoch": 0.000138763427734375,
      "model_forward_time": 0.13213276863098145,
      "step": 22735
    },
    {
      "epoch": 0.000138763427734375,
      "step": 22735,
      "training_step_time": 0.4300210475921631
    },
    {
      "epoch": 0.00013876953125,
      "model_forward_time": 0.1153557300567627,
      "step": 22736
    },
    {
      "epoch": 0.00013876953125,
      "step": 22736,
      "training_step_time": 0.3948543071746826
    },
    {
      "epoch": 0.000138775634765625,
      "model_forward_time": 0.11567354202270508,
      "step": 22737
    },
    {
      "epoch": 0.000138775634765625,
      "step": 22737,
      "training_step_time": 0.4054291248321533
    },
    {
      "epoch": 0.00013878173828125,
      "model_forward_time": 0.11587405204772949,
      "step": 22738
    },
    {
      "epoch": 0.00013878173828125,
      "step": 22738,
      "training_step_time": 0.410214900970459
    },
    {
      "epoch": 0.000138787841796875,
      "model_forward_time": 0.11605596542358398,
      "step": 22739
    },
    {
      "epoch": 0.000138787841796875,
      "step": 22739,
      "training_step_time": 0.4116957187652588
    },
    {
      "epoch": 0.0001387939453125,
      "grad_norm": 0.1787623018026352,
      "learning_rate": 7.32136313680782e-05,
      "loss": 0.0521,
      "step": 22740
    },
    {
      "epoch": 0.0001387939453125,
      "model_forward_time": 0.11506247520446777,
      "step": 22740
    },
    {
      "epoch": 0.0001387939453125,
      "step": 22740,
      "training_step_time": 0.3820178508758545
    },
    {
      "epoch": 0.000138800048828125,
      "model_forward_time": 0.11701369285583496,
      "step": 22741
    },
    {
      "epoch": 0.000138800048828125,
      "step": 22741,
      "training_step_time": 0.39519691467285156
    },
    {
      "epoch": 0.00013880615234375,
      "model_forward_time": 0.11505007743835449,
      "step": 22742
    },
    {
      "epoch": 0.00013880615234375,
      "step": 22742,
      "training_step_time": 0.48383331298828125
    },
    {
      "epoch": 0.000138812255859375,
      "model_forward_time": 0.11573529243469238,
      "step": 22743
    },
    {
      "epoch": 0.000138812255859375,
      "step": 22743,
      "training_step_time": 0.3928253650665283
    },
    {
      "epoch": 0.000138818359375,
      "model_forward_time": 0.11504316329956055,
      "step": 22744
    },
    {
      "epoch": 0.000138818359375,
      "step": 22744,
      "training_step_time": 0.45642781257629395
    },
    {
      "epoch": 0.000138824462890625,
      "model_forward_time": 0.11578083038330078,
      "step": 22745
    },
    {
      "epoch": 0.000138824462890625,
      "step": 22745,
      "training_step_time": 0.38662147521972656
    },
    {
      "epoch": 0.00013883056640625,
      "model_forward_time": 0.11606049537658691,
      "step": 22746
    },
    {
      "epoch": 0.00013883056640625,
      "step": 22746,
      "training_step_time": 0.3879866600036621
    },
    {
      "epoch": 0.000138836669921875,
      "model_forward_time": 0.1151432991027832,
      "step": 22747
    },
    {
      "epoch": 0.000138836669921875,
      "step": 22747,
      "training_step_time": 0.48322486877441406
    },
    {
      "epoch": 0.0001388427734375,
      "model_forward_time": 0.11481189727783203,
      "step": 22748
    },
    {
      "epoch": 0.0001388427734375,
      "step": 22748,
      "training_step_time": 0.4304475784301758
    },
    {
      "epoch": 0.000138848876953125,
      "model_forward_time": 0.11517834663391113,
      "step": 22749
    },
    {
      "epoch": 0.000138848876953125,
      "step": 22749,
      "training_step_time": 0.49543142318725586
    },
    {
      "epoch": 0.00013885498046875,
      "grad_norm": 0.18146389722824097,
      "learning_rate": 7.318922008417203e-05,
      "loss": 0.0517,
      "step": 22750
    },
    {
      "epoch": 0.00013885498046875,
      "model_forward_time": 0.11525344848632812,
      "step": 22750
    },
    {
      "epoch": 0.00013885498046875,
      "step": 22750,
      "training_step_time": 0.3748948574066162
    },
    {
      "epoch": 0.000138861083984375,
      "model_forward_time": 0.11567234992980957,
      "step": 22751
    },
    {
      "epoch": 0.000138861083984375,
      "step": 22751,
      "training_step_time": 0.36672544479370117
    },
    {
      "epoch": 0.0001388671875,
      "model_forward_time": 0.11487221717834473,
      "step": 22752
    },
    {
      "epoch": 0.0001388671875,
      "step": 22752,
      "training_step_time": 0.42153263092041016
    },
    {
      "epoch": 0.000138873291015625,
      "model_forward_time": 0.11479306221008301,
      "step": 22753
    },
    {
      "epoch": 0.000138873291015625,
      "step": 22753,
      "training_step_time": 0.40825390815734863
    },
    {
      "epoch": 0.00013887939453125,
      "model_forward_time": 0.1160125732421875,
      "step": 22754
    },
    {
      "epoch": 0.00013887939453125,
      "step": 22754,
      "training_step_time": 0.39053893089294434
    },
    {
      "epoch": 0.000138885498046875,
      "model_forward_time": 0.11565709114074707,
      "step": 22755
    },
    {
      "epoch": 0.000138885498046875,
      "step": 22755,
      "training_step_time": 0.38656044006347656
    },
    {
      "epoch": 0.0001388916015625,
      "model_forward_time": 0.11530685424804688,
      "step": 22756
    },
    {
      "epoch": 0.0001388916015625,
      "step": 22756,
      "training_step_time": 0.4261901378631592
    },
    {
      "epoch": 0.000138897705078125,
      "model_forward_time": 0.11496114730834961,
      "step": 22757
    },
    {
      "epoch": 0.000138897705078125,
      "step": 22757,
      "training_step_time": 0.43190598487854004
    },
    {
      "epoch": 0.00013890380859375,
      "model_forward_time": 0.1144871711730957,
      "step": 22758
    },
    {
      "epoch": 0.00013890380859375,
      "step": 22758,
      "training_step_time": 0.4982454776763916
    },
    {
      "epoch": 0.000138909912109375,
      "model_forward_time": 0.11510419845581055,
      "step": 22759
    },
    {
      "epoch": 0.000138909912109375,
      "step": 22759,
      "training_step_time": 0.4013824462890625
    },
    {
      "epoch": 0.000138916015625,
      "grad_norm": 0.13991838693618774,
      "learning_rate": 7.316480175599309e-05,
      "loss": 0.0517,
      "step": 22760
    },
    {
      "epoch": 0.000138916015625,
      "model_forward_time": 0.11524724960327148,
      "step": 22760
    },
    {
      "epoch": 0.000138916015625,
      "step": 22760,
      "training_step_time": 0.41026997566223145
    },
    {
      "epoch": 0.000138922119140625,
      "model_forward_time": 0.11466217041015625,
      "step": 22761
    },
    {
      "epoch": 0.000138922119140625,
      "step": 22761,
      "training_step_time": 0.41129446029663086
    },
    {
      "epoch": 0.00013892822265625,
      "model_forward_time": 0.11519789695739746,
      "step": 22762
    },
    {
      "epoch": 0.00013892822265625,
      "step": 22762,
      "training_step_time": 0.4041919708251953
    },
    {
      "epoch": 0.000138934326171875,
      "model_forward_time": 0.11541962623596191,
      "step": 22763
    },
    {
      "epoch": 0.000138934326171875,
      "step": 22763,
      "training_step_time": 0.492978572845459
    },
    {
      "epoch": 0.0001389404296875,
      "model_forward_time": 0.1148672103881836,
      "step": 22764
    },
    {
      "epoch": 0.0001389404296875,
      "step": 22764,
      "training_step_time": 0.42409253120422363
    },
    {
      "epoch": 0.000138946533203125,
      "model_forward_time": 0.11528730392456055,
      "step": 22765
    },
    {
      "epoch": 0.000138946533203125,
      "step": 22765,
      "training_step_time": 0.3986670970916748
    },
    {
      "epoch": 0.00013895263671875,
      "model_forward_time": 0.11580681800842285,
      "step": 22766
    },
    {
      "epoch": 0.00013895263671875,
      "step": 22766,
      "training_step_time": 0.38187599182128906
    },
    {
      "epoch": 0.000138958740234375,
      "model_forward_time": 0.11505317687988281,
      "step": 22767
    },
    {
      "epoch": 0.000138958740234375,
      "step": 22767,
      "training_step_time": 0.4164705276489258
    },
    {
      "epoch": 0.00013896484375,
      "model_forward_time": 0.14379024505615234,
      "step": 22768
    },
    {
      "epoch": 0.00013896484375,
      "step": 22768,
      "training_step_time": 0.4077284336090088
    },
    {
      "epoch": 0.000138970947265625,
      "model_forward_time": 0.11983084678649902,
      "step": 22769
    },
    {
      "epoch": 0.000138970947265625,
      "step": 22769,
      "training_step_time": 0.3814239501953125
    },
    {
      "epoch": 0.00013897705078125,
      "grad_norm": 0.16896219551563263,
      "learning_rate": 7.3140376390959e-05,
      "loss": 0.0504,
      "step": 22770
    },
    {
      "epoch": 0.00013897705078125,
      "model_forward_time": 0.11777448654174805,
      "step": 22770
    },
    {
      "epoch": 0.00013897705078125,
      "step": 22770,
      "training_step_time": 0.46167564392089844
    },
    {
      "epoch": 0.000138983154296875,
      "model_forward_time": 0.11563658714294434,
      "step": 22771
    },
    {
      "epoch": 0.000138983154296875,
      "step": 22771,
      "training_step_time": 0.4024512767791748
    },
    {
      "epoch": 0.0001389892578125,
      "model_forward_time": 0.11511826515197754,
      "step": 22772
    },
    {
      "epoch": 0.0001389892578125,
      "step": 22772,
      "training_step_time": 0.3869640827178955
    },
    {
      "epoch": 0.000138995361328125,
      "model_forward_time": 0.11581587791442871,
      "step": 22773
    },
    {
      "epoch": 0.000138995361328125,
      "step": 22773,
      "training_step_time": 0.386430025100708
    },
    {
      "epoch": 0.00013900146484375,
      "model_forward_time": 0.1156768798828125,
      "step": 22774
    },
    {
      "epoch": 0.00013900146484375,
      "step": 22774,
      "training_step_time": 0.3987267017364502
    },
    {
      "epoch": 0.000139007568359375,
      "model_forward_time": 0.11542534828186035,
      "step": 22775
    },
    {
      "epoch": 0.000139007568359375,
      "step": 22775,
      "training_step_time": 0.4493708610534668
    },
    {
      "epoch": 0.000139013671875,
      "model_forward_time": 0.11677050590515137,
      "step": 22776
    },
    {
      "epoch": 0.000139013671875,
      "step": 22776,
      "training_step_time": 0.4027259349822998
    },
    {
      "epoch": 0.000139019775390625,
      "model_forward_time": 0.11500310897827148,
      "step": 22777
    },
    {
      "epoch": 0.000139019775390625,
      "step": 22777,
      "training_step_time": 0.43703341484069824
    },
    {
      "epoch": 0.00013902587890625,
      "model_forward_time": 0.1152501106262207,
      "step": 22778
    },
    {
      "epoch": 0.00013902587890625,
      "step": 22778,
      "training_step_time": 0.4115605354309082
    },
    {
      "epoch": 0.000139031982421875,
      "model_forward_time": 0.11539816856384277,
      "step": 22779
    },
    {
      "epoch": 0.000139031982421875,
      "step": 22779,
      "training_step_time": 0.38570070266723633
    },
    {
      "epoch": 0.0001390380859375,
      "grad_norm": 0.1369701772928238,
      "learning_rate": 7.311594399648957e-05,
      "loss": 0.0509,
      "step": 22780
    },
    {
      "epoch": 0.0001390380859375,
      "model_forward_time": 0.11553406715393066,
      "step": 22780
    },
    {
      "epoch": 0.0001390380859375,
      "step": 22780,
      "training_step_time": 0.3948090076446533
    },
    {
      "epoch": 0.000139044189453125,
      "model_forward_time": 0.11652994155883789,
      "step": 22781
    },
    {
      "epoch": 0.000139044189453125,
      "step": 22781,
      "training_step_time": 0.3876383304595947
    },
    {
      "epoch": 0.00013905029296875,
      "model_forward_time": 0.11548829078674316,
      "step": 22782
    },
    {
      "epoch": 0.00013905029296875,
      "step": 22782,
      "training_step_time": 0.4369657039642334
    },
    {
      "epoch": 0.000139056396484375,
      "model_forward_time": 0.11513400077819824,
      "step": 22783
    },
    {
      "epoch": 0.000139056396484375,
      "step": 22783,
      "training_step_time": 0.4425678253173828
    },
    {
      "epoch": 0.0001390625,
      "model_forward_time": 0.11549496650695801,
      "step": 22784
    },
    {
      "epoch": 0.0001390625,
      "step": 22784,
      "training_step_time": 0.3946807384490967
    },
    {
      "epoch": 0.000139068603515625,
      "model_forward_time": 0.11554884910583496,
      "step": 22785
    },
    {
      "epoch": 0.000139068603515625,
      "step": 22785,
      "training_step_time": 0.4393017292022705
    },
    {
      "epoch": 0.00013907470703125,
      "model_forward_time": 0.11678719520568848,
      "step": 22786
    },
    {
      "epoch": 0.00013907470703125,
      "step": 22786,
      "training_step_time": 0.4861729145050049
    },
    {
      "epoch": 0.000139080810546875,
      "model_forward_time": 0.11588692665100098,
      "step": 22787
    },
    {
      "epoch": 0.000139080810546875,
      "step": 22787,
      "training_step_time": 0.39957475662231445
    },
    {
      "epoch": 0.0001390869140625,
      "model_forward_time": 0.11583328247070312,
      "step": 22788
    },
    {
      "epoch": 0.0001390869140625,
      "step": 22788,
      "training_step_time": 0.45168566703796387
    },
    {
      "epoch": 0.000139093017578125,
      "model_forward_time": 0.11507582664489746,
      "step": 22789
    },
    {
      "epoch": 0.000139093017578125,
      "step": 22789,
      "training_step_time": 0.41195034980773926
    },
    {
      "epoch": 0.00013909912109375,
      "grad_norm": 0.12199736386537552,
      "learning_rate": 7.309150458000668e-05,
      "loss": 0.0525,
      "step": 22790
    },
    {
      "epoch": 0.00013909912109375,
      "model_forward_time": 0.11512923240661621,
      "step": 22790
    },
    {
      "epoch": 0.00013909912109375,
      "step": 22790,
      "training_step_time": 0.39357924461364746
    },
    {
      "epoch": 0.000139105224609375,
      "model_forward_time": 0.11568713188171387,
      "step": 22791
    },
    {
      "epoch": 0.000139105224609375,
      "step": 22791,
      "training_step_time": 0.43445301055908203
    },
    {
      "epoch": 0.000139111328125,
      "model_forward_time": 0.11609196662902832,
      "step": 22792
    },
    {
      "epoch": 0.000139111328125,
      "step": 22792,
      "training_step_time": 0.44288063049316406
    },
    {
      "epoch": 0.000139117431640625,
      "model_forward_time": 0.1151890754699707,
      "step": 22793
    },
    {
      "epoch": 0.000139117431640625,
      "step": 22793,
      "training_step_time": 0.4755280017852783
    },
    {
      "epoch": 0.00013912353515625,
      "model_forward_time": 0.11557960510253906,
      "step": 22794
    },
    {
      "epoch": 0.00013912353515625,
      "step": 22794,
      "training_step_time": 0.390824556350708
    },
    {
      "epoch": 0.000139129638671875,
      "model_forward_time": 0.11523604393005371,
      "step": 22795
    },
    {
      "epoch": 0.000139129638671875,
      "step": 22795,
      "training_step_time": 0.3662126064300537
    },
    {
      "epoch": 0.0001391357421875,
      "model_forward_time": 0.1159505844116211,
      "step": 22796
    },
    {
      "epoch": 0.0001391357421875,
      "step": 22796,
      "training_step_time": 0.4375326633453369
    },
    {
      "epoch": 0.000139141845703125,
      "model_forward_time": 0.11591053009033203,
      "step": 22797
    },
    {
      "epoch": 0.000139141845703125,
      "step": 22797,
      "training_step_time": 0.45487308502197266
    },
    {
      "epoch": 0.00013914794921875,
      "model_forward_time": 0.11551904678344727,
      "step": 22798
    },
    {
      "epoch": 0.00013914794921875,
      "step": 22798,
      "training_step_time": 0.3793330192565918
    },
    {
      "epoch": 0.000139154052734375,
      "model_forward_time": 0.11549973487854004,
      "step": 22799
    },
    {
      "epoch": 0.000139154052734375,
      "step": 22799,
      "training_step_time": 0.39408302307128906
    },
    {
      "epoch": 0.00013916015625,
      "grad_norm": 0.1263430118560791,
      "learning_rate": 7.30670581489344e-05,
      "loss": 0.0513,
      "step": 22800
    },
    {
      "epoch": 0.00013916015625,
      "model_forward_time": 0.11594438552856445,
      "step": 22800
    },
    {
      "epoch": 0.00013916015625,
      "step": 22800,
      "training_step_time": 0.48460984230041504
    },
    {
      "epoch": 0.000139166259765625,
      "model_forward_time": 0.11564421653747559,
      "step": 22801
    },
    {
      "epoch": 0.000139166259765625,
      "step": 22801,
      "training_step_time": 0.43123579025268555
    },
    {
      "epoch": 0.00013917236328125,
      "model_forward_time": 0.11473989486694336,
      "step": 22802
    },
    {
      "epoch": 0.00013917236328125,
      "step": 22802,
      "training_step_time": 0.3962986469268799
    },
    {
      "epoch": 0.000139178466796875,
      "model_forward_time": 0.11580562591552734,
      "step": 22803
    },
    {
      "epoch": 0.000139178466796875,
      "step": 22803,
      "training_step_time": 0.38666749000549316
    },
    {
      "epoch": 0.0001391845703125,
      "model_forward_time": 0.11558747291564941,
      "step": 22804
    },
    {
      "epoch": 0.0001391845703125,
      "step": 22804,
      "training_step_time": 0.396923303604126
    },
    {
      "epoch": 0.000139190673828125,
      "model_forward_time": 0.1156148910522461,
      "step": 22805
    },
    {
      "epoch": 0.000139190673828125,
      "step": 22805,
      "training_step_time": 0.4215872287750244
    },
    {
      "epoch": 0.00013919677734375,
      "model_forward_time": 0.1154639720916748,
      "step": 22806
    },
    {
      "epoch": 0.00013919677734375,
      "step": 22806,
      "training_step_time": 0.43291521072387695
    },
    {
      "epoch": 0.000139202880859375,
      "model_forward_time": 0.11581826210021973,
      "step": 22807
    },
    {
      "epoch": 0.000139202880859375,
      "step": 22807,
      "training_step_time": 0.4939150810241699
    },
    {
      "epoch": 0.000139208984375,
      "model_forward_time": 0.11568951606750488,
      "step": 22808
    },
    {
      "epoch": 0.000139208984375,
      "step": 22808,
      "training_step_time": 0.39925169944763184
    },
    {
      "epoch": 0.000139215087890625,
      "model_forward_time": 0.1156468391418457,
      "step": 22809
    },
    {
      "epoch": 0.000139215087890625,
      "step": 22809,
      "training_step_time": 0.38703417778015137
    },
    {
      "epoch": 0.00013922119140625,
      "grad_norm": 0.14429830014705658,
      "learning_rate": 7.30426047106989e-05,
      "loss": 0.052,
      "step": 22810
    },
    {
      "epoch": 0.00013922119140625,
      "model_forward_time": 0.11634278297424316,
      "step": 22810
    },
    {
      "epoch": 0.00013922119140625,
      "step": 22810,
      "training_step_time": 0.3834998607635498
    },
    {
      "epoch": 0.000139227294921875,
      "model_forward_time": 0.11534643173217773,
      "step": 22811
    },
    {
      "epoch": 0.000139227294921875,
      "step": 22811,
      "training_step_time": 0.43302440643310547
    },
    {
      "epoch": 0.0001392333984375,
      "model_forward_time": 0.11570549011230469,
      "step": 22812
    },
    {
      "epoch": 0.0001392333984375,
      "step": 22812,
      "training_step_time": 0.4055778980255127
    },
    {
      "epoch": 0.000139239501953125,
      "model_forward_time": 0.11634087562561035,
      "step": 22813
    },
    {
      "epoch": 0.000139239501953125,
      "step": 22813,
      "training_step_time": 0.4643843173980713
    },
    {
      "epoch": 0.00013924560546875,
      "model_forward_time": 0.11553430557250977,
      "step": 22814
    },
    {
      "epoch": 0.00013924560546875,
      "step": 22814,
      "training_step_time": 0.4277029037475586
    },
    {
      "epoch": 0.000139251708984375,
      "model_forward_time": 0.11598539352416992,
      "step": 22815
    },
    {
      "epoch": 0.000139251708984375,
      "step": 22815,
      "training_step_time": 0.44704484939575195
    },
    {
      "epoch": 0.0001392578125,
      "model_forward_time": 0.11570501327514648,
      "step": 22816
    },
    {
      "epoch": 0.0001392578125,
      "step": 22816,
      "training_step_time": 0.4706385135650635
    },
    {
      "epoch": 0.000139263916015625,
      "model_forward_time": 0.11508917808532715,
      "step": 22817
    },
    {
      "epoch": 0.000139263916015625,
      "step": 22817,
      "training_step_time": 0.3924560546875
    },
    {
      "epoch": 0.00013927001953125,
      "model_forward_time": 0.11577773094177246,
      "step": 22818
    },
    {
      "epoch": 0.00013927001953125,
      "step": 22818,
      "training_step_time": 0.38954615592956543
    },
    {
      "epoch": 0.000139276123046875,
      "model_forward_time": 0.11586499214172363,
      "step": 22819
    },
    {
      "epoch": 0.000139276123046875,
      "step": 22819,
      "training_step_time": 0.396099328994751
    },
    {
      "epoch": 0.0001392822265625,
      "grad_norm": 0.10297390818595886,
      "learning_rate": 7.301814427272849e-05,
      "loss": 0.049,
      "step": 22820
    },
    {
      "epoch": 0.0001392822265625,
      "model_forward_time": 0.11728191375732422,
      "step": 22820
    },
    {
      "epoch": 0.0001392822265625,
      "step": 22820,
      "training_step_time": 0.4822390079498291
    },
    {
      "epoch": 0.000139288330078125,
      "model_forward_time": 0.1157386302947998,
      "step": 22821
    },
    {
      "epoch": 0.000139288330078125,
      "step": 22821,
      "training_step_time": 0.44144487380981445
    },
    {
      "epoch": 0.00013929443359375,
      "model_forward_time": 0.11591386795043945,
      "step": 22822
    },
    {
      "epoch": 0.00013929443359375,
      "step": 22822,
      "training_step_time": 0.3777749538421631
    },
    {
      "epoch": 0.000139300537109375,
      "model_forward_time": 0.11529254913330078,
      "step": 22823
    },
    {
      "epoch": 0.000139300537109375,
      "step": 22823,
      "training_step_time": 0.39393186569213867
    },
    {
      "epoch": 0.000139306640625,
      "model_forward_time": 0.11633920669555664,
      "step": 22824
    },
    {
      "epoch": 0.000139306640625,
      "step": 22824,
      "training_step_time": 0.36856794357299805
    },
    {
      "epoch": 0.000139312744140625,
      "model_forward_time": 0.11490774154663086,
      "step": 22825
    },
    {
      "epoch": 0.000139312744140625,
      "step": 22825,
      "training_step_time": 0.40340352058410645
    },
    {
      "epoch": 0.00013931884765625,
      "model_forward_time": 0.11641907691955566,
      "step": 22826
    },
    {
      "epoch": 0.00013931884765625,
      "step": 22826,
      "training_step_time": 0.3992950916290283
    },
    {
      "epoch": 0.000139324951171875,
      "model_forward_time": 0.11569595336914062,
      "step": 22827
    },
    {
      "epoch": 0.000139324951171875,
      "step": 22827,
      "training_step_time": 0.3980410099029541
    },
    {
      "epoch": 0.0001393310546875,
      "model_forward_time": 0.11551141738891602,
      "step": 22828
    },
    {
      "epoch": 0.0001393310546875,
      "step": 22828,
      "training_step_time": 0.4125828742980957
    },
    {
      "epoch": 0.000139337158203125,
      "model_forward_time": 0.11530804634094238,
      "step": 22829
    },
    {
      "epoch": 0.000139337158203125,
      "step": 22829,
      "training_step_time": 0.4577062129974365
    },
    {
      "epoch": 0.00013934326171875,
      "grad_norm": 0.12265604734420776,
      "learning_rate": 7.299367684245362e-05,
      "loss": 0.045,
      "step": 22830
    },
    {
      "epoch": 0.00013934326171875,
      "model_forward_time": 0.11638712882995605,
      "step": 22830
    },
    {
      "epoch": 0.00013934326171875,
      "step": 22830,
      "training_step_time": 0.4802074432373047
    },
    {
      "epoch": 0.000139349365234375,
      "model_forward_time": 0.11510562896728516,
      "step": 22831
    },
    {
      "epoch": 0.000139349365234375,
      "step": 22831,
      "training_step_time": 0.39153504371643066
    },
    {
      "epoch": 0.00013935546875,
      "model_forward_time": 0.11809015274047852,
      "step": 22832
    },
    {
      "epoch": 0.00013935546875,
      "step": 22832,
      "training_step_time": 0.4045281410217285
    },
    {
      "epoch": 0.000139361572265625,
      "model_forward_time": 0.11553668975830078,
      "step": 22833
    },
    {
      "epoch": 0.000139361572265625,
      "step": 22833,
      "training_step_time": 0.3919045925140381
    },
    {
      "epoch": 0.00013936767578125,
      "model_forward_time": 0.11583232879638672,
      "step": 22834
    },
    {
      "epoch": 0.00013936767578125,
      "step": 22834,
      "training_step_time": 0.4666252136230469
    },
    {
      "epoch": 0.000139373779296875,
      "model_forward_time": 0.11525273323059082,
      "step": 22835
    },
    {
      "epoch": 0.000139373779296875,
      "step": 22835,
      "training_step_time": 0.43488025665283203
    },
    {
      "epoch": 0.0001393798828125,
      "model_forward_time": 0.1155235767364502,
      "step": 22836
    },
    {
      "epoch": 0.0001393798828125,
      "step": 22836,
      "training_step_time": 0.4909646511077881
    },
    {
      "epoch": 0.000139385986328125,
      "model_forward_time": 0.1152188777923584,
      "step": 22837
    },
    {
      "epoch": 0.000139385986328125,
      "step": 22837,
      "training_step_time": 0.39441704750061035
    },
    {
      "epoch": 0.00013939208984375,
      "model_forward_time": 0.11620950698852539,
      "step": 22838
    },
    {
      "epoch": 0.00013939208984375,
      "step": 22838,
      "training_step_time": 0.4033832550048828
    },
    {
      "epoch": 0.000139398193359375,
      "model_forward_time": 0.11564803123474121,
      "step": 22839
    },
    {
      "epoch": 0.000139398193359375,
      "step": 22839,
      "training_step_time": 0.4210796356201172
    },
    {
      "epoch": 0.000139404296875,
      "grad_norm": 0.14435920119285583,
      "learning_rate": 7.296920242730682e-05,
      "loss": 0.0481,
      "step": 22840
    },
    {
      "epoch": 0.000139404296875,
      "model_forward_time": 0.11515927314758301,
      "step": 22840
    },
    {
      "epoch": 0.000139404296875,
      "step": 22840,
      "training_step_time": 0.3814520835876465
    },
    {
      "epoch": 0.000139410400390625,
      "model_forward_time": 0.11612772941589355,
      "step": 22841
    },
    {
      "epoch": 0.000139410400390625,
      "step": 22841,
      "training_step_time": 0.3965134620666504
    },
    {
      "epoch": 0.00013941650390625,
      "model_forward_time": 0.11638021469116211,
      "step": 22842
    },
    {
      "epoch": 0.00013941650390625,
      "step": 22842,
      "training_step_time": 0.45157551765441895
    },
    {
      "epoch": 0.000139422607421875,
      "model_forward_time": 0.11609172821044922,
      "step": 22843
    },
    {
      "epoch": 0.000139422607421875,
      "step": 22843,
      "training_step_time": 0.43654608726501465
    },
    {
      "epoch": 0.0001394287109375,
      "model_forward_time": 0.11519622802734375,
      "step": 22844
    },
    {
      "epoch": 0.0001394287109375,
      "step": 22844,
      "training_step_time": 0.3991842269897461
    },
    {
      "epoch": 0.000139434814453125,
      "model_forward_time": 0.11584758758544922,
      "step": 22845
    },
    {
      "epoch": 0.000139434814453125,
      "step": 22845,
      "training_step_time": 0.3868675231933594
    },
    {
      "epoch": 0.00013944091796875,
      "model_forward_time": 0.11608409881591797,
      "step": 22846
    },
    {
      "epoch": 0.00013944091796875,
      "step": 22846,
      "training_step_time": 0.4015171527862549
    },
    {
      "epoch": 0.000139447021484375,
      "model_forward_time": 0.1152043342590332,
      "step": 22847
    },
    {
      "epoch": 0.000139447021484375,
      "step": 22847,
      "training_step_time": 0.5761866569519043
    },
    {
      "epoch": 0.000139453125,
      "model_forward_time": 0.11519289016723633,
      "step": 22848
    },
    {
      "epoch": 0.000139453125,
      "step": 22848,
      "training_step_time": 0.46076083183288574
    },
    {
      "epoch": 0.000139459228515625,
      "model_forward_time": 0.11717081069946289,
      "step": 22849
    },
    {
      "epoch": 0.000139459228515625,
      "step": 22849,
      "training_step_time": 0.49944019317626953
    },
    {
      "epoch": 0.00013946533203125,
      "grad_norm": 0.125179260969162,
      "learning_rate": 7.294472103472281e-05,
      "loss": 0.0464,
      "step": 22850
    },
    {
      "epoch": 0.00013946533203125,
      "model_forward_time": 0.11519789695739746,
      "step": 22850
    },
    {
      "epoch": 0.00013946533203125,
      "step": 22850,
      "training_step_time": 0.47354888916015625
    },
    {
      "epoch": 0.000139471435546875,
      "model_forward_time": 0.11497378349304199,
      "step": 22851
    },
    {
      "epoch": 0.000139471435546875,
      "step": 22851,
      "training_step_time": 0.38285279273986816
    },
    {
      "epoch": 0.0001394775390625,
      "model_forward_time": 0.11484074592590332,
      "step": 22852
    },
    {
      "epoch": 0.0001394775390625,
      "step": 22852,
      "training_step_time": 0.3793056011199951
    },
    {
      "epoch": 0.000139483642578125,
      "model_forward_time": 0.11484813690185547,
      "step": 22853
    },
    {
      "epoch": 0.000139483642578125,
      "step": 22853,
      "training_step_time": 0.39786195755004883
    },
    {
      "epoch": 0.00013948974609375,
      "model_forward_time": 0.11587309837341309,
      "step": 22854
    },
    {
      "epoch": 0.00013948974609375,
      "step": 22854,
      "training_step_time": 0.4228384494781494
    },
    {
      "epoch": 0.000139495849609375,
      "model_forward_time": 0.11603784561157227,
      "step": 22855
    },
    {
      "epoch": 0.000139495849609375,
      "step": 22855,
      "training_step_time": 0.49327731132507324
    },
    {
      "epoch": 0.000139501953125,
      "model_forward_time": 0.11561155319213867,
      "step": 22856
    },
    {
      "epoch": 0.000139501953125,
      "step": 22856,
      "training_step_time": 0.42241883277893066
    },
    {
      "epoch": 0.000139508056640625,
      "model_forward_time": 0.11653399467468262,
      "step": 22857
    },
    {
      "epoch": 0.000139508056640625,
      "step": 22857,
      "training_step_time": 0.42549824714660645
    },
    {
      "epoch": 0.00013951416015625,
      "model_forward_time": 0.11588454246520996,
      "step": 22858
    },
    {
      "epoch": 0.00013951416015625,
      "step": 22858,
      "training_step_time": 0.4942324161529541
    },
    {
      "epoch": 0.000139520263671875,
      "model_forward_time": 0.11551380157470703,
      "step": 22859
    },
    {
      "epoch": 0.000139520263671875,
      "step": 22859,
      "training_step_time": 0.3950343132019043
    },
    {
      "epoch": 0.0001395263671875,
      "grad_norm": 0.14117431640625,
      "learning_rate": 7.292023267213835e-05,
      "loss": 0.0526,
      "step": 22860
    },
    {
      "epoch": 0.0001395263671875,
      "model_forward_time": 0.11562490463256836,
      "step": 22860
    },
    {
      "epoch": 0.0001395263671875,
      "step": 22860,
      "training_step_time": 0.39024806022644043
    },
    {
      "epoch": 0.000139532470703125,
      "model_forward_time": 0.11548471450805664,
      "step": 22861
    },
    {
      "epoch": 0.000139532470703125,
      "step": 22861,
      "training_step_time": 0.38876891136169434
    },
    {
      "epoch": 0.00013953857421875,
      "model_forward_time": 0.11568093299865723,
      "step": 22862
    },
    {
      "epoch": 0.00013953857421875,
      "step": 22862,
      "training_step_time": 0.4036979675292969
    },
    {
      "epoch": 0.000139544677734375,
      "model_forward_time": 0.11595940589904785,
      "step": 22863
    },
    {
      "epoch": 0.000139544677734375,
      "step": 22863,
      "training_step_time": 0.3891606330871582
    },
    {
      "epoch": 0.00013955078125,
      "model_forward_time": 0.11579060554504395,
      "step": 22864
    },
    {
      "epoch": 0.00013955078125,
      "step": 22864,
      "training_step_time": 0.40073537826538086
    },
    {
      "epoch": 0.000139556884765625,
      "model_forward_time": 0.11648011207580566,
      "step": 22865
    },
    {
      "epoch": 0.000139556884765625,
      "step": 22865,
      "training_step_time": 0.41173219680786133
    },
    {
      "epoch": 0.00013956298828125,
      "model_forward_time": 0.11565947532653809,
      "step": 22866
    },
    {
      "epoch": 0.00013956298828125,
      "step": 22866,
      "training_step_time": 0.3907279968261719
    },
    {
      "epoch": 0.000139569091796875,
      "model_forward_time": 0.11666560173034668,
      "step": 22867
    },
    {
      "epoch": 0.000139569091796875,
      "step": 22867,
      "training_step_time": 0.395719051361084
    },
    {
      "epoch": 0.0001395751953125,
      "model_forward_time": 0.11544585227966309,
      "step": 22868
    },
    {
      "epoch": 0.0001395751953125,
      "step": 22868,
      "training_step_time": 0.3976929187774658
    },
    {
      "epoch": 0.000139581298828125,
      "model_forward_time": 0.11585426330566406,
      "step": 22869
    },
    {
      "epoch": 0.000139581298828125,
      "step": 22869,
      "training_step_time": 0.4437687397003174
    },
    {
      "epoch": 0.00013958740234375,
      "grad_norm": 0.10898037254810333,
      "learning_rate": 7.28957373469924e-05,
      "loss": 0.0448,
      "step": 22870
    },
    {
      "epoch": 0.00013958740234375,
      "model_forward_time": 0.11578559875488281,
      "step": 22870
    },
    {
      "epoch": 0.00013958740234375,
      "step": 22870,
      "training_step_time": 0.4526236057281494
    },
    {
      "epoch": 0.000139593505859375,
      "model_forward_time": 0.11619687080383301,
      "step": 22871
    },
    {
      "epoch": 0.000139593505859375,
      "step": 22871,
      "training_step_time": 0.46056413650512695
    },
    {
      "epoch": 0.000139599609375,
      "model_forward_time": 0.1156461238861084,
      "step": 22872
    },
    {
      "epoch": 0.000139599609375,
      "step": 22872,
      "training_step_time": 0.4791374206542969
    },
    {
      "epoch": 0.000139605712890625,
      "model_forward_time": 0.11529684066772461,
      "step": 22873
    },
    {
      "epoch": 0.000139605712890625,
      "step": 22873,
      "training_step_time": 0.405670166015625
    },
    {
      "epoch": 0.00013961181640625,
      "model_forward_time": 0.11561918258666992,
      "step": 22874
    },
    {
      "epoch": 0.00013961181640625,
      "step": 22874,
      "training_step_time": 0.4203817844390869
    },
    {
      "epoch": 0.000139617919921875,
      "model_forward_time": 0.11500358581542969,
      "step": 22875
    },
    {
      "epoch": 0.000139617919921875,
      "step": 22875,
      "training_step_time": 0.3901402950286865
    },
    {
      "epoch": 0.0001396240234375,
      "model_forward_time": 0.11527705192565918,
      "step": 22876
    },
    {
      "epoch": 0.0001396240234375,
      "step": 22876,
      "training_step_time": 0.384077787399292
    },
    {
      "epoch": 0.000139630126953125,
      "model_forward_time": 0.11525917053222656,
      "step": 22877
    },
    {
      "epoch": 0.000139630126953125,
      "step": 22877,
      "training_step_time": 0.4544355869293213
    },
    {
      "epoch": 0.00013963623046875,
      "model_forward_time": 0.11881899833679199,
      "step": 22878
    },
    {
      "epoch": 0.00013963623046875,
      "step": 22878,
      "training_step_time": 0.38661718368530273
    },
    {
      "epoch": 0.000139642333984375,
      "model_forward_time": 0.12054586410522461,
      "step": 22879
    },
    {
      "epoch": 0.000139642333984375,
      "step": 22879,
      "training_step_time": 0.38630175590515137
    },
    {
      "epoch": 0.0001396484375,
      "grad_norm": 0.09643398225307465,
      "learning_rate": 7.287123506672595e-05,
      "loss": 0.0495,
      "step": 22880
    },
    {
      "epoch": 0.0001396484375,
      "model_forward_time": 0.11752772331237793,
      "step": 22880
    },
    {
      "epoch": 0.0001396484375,
      "step": 22880,
      "training_step_time": 0.4084007740020752
    },
    {
      "epoch": 0.000139654541015625,
      "model_forward_time": 0.11827850341796875,
      "step": 22881
    },
    {
      "epoch": 0.000139654541015625,
      "step": 22881,
      "training_step_time": 0.38107848167419434
    },
    {
      "epoch": 0.00013966064453125,
      "model_forward_time": 0.11711001396179199,
      "step": 22882
    },
    {
      "epoch": 0.00013966064453125,
      "step": 22882,
      "training_step_time": 0.39545536041259766
    },
    {
      "epoch": 0.000139666748046875,
      "model_forward_time": 0.11603569984436035,
      "step": 22883
    },
    {
      "epoch": 0.000139666748046875,
      "step": 22883,
      "training_step_time": 0.3787577152252197
    },
    {
      "epoch": 0.0001396728515625,
      "model_forward_time": 0.11865639686584473,
      "step": 22884
    },
    {
      "epoch": 0.0001396728515625,
      "step": 22884,
      "training_step_time": 0.43428635597229004
    },
    {
      "epoch": 0.000139678955078125,
      "model_forward_time": 0.11832571029663086,
      "step": 22885
    },
    {
      "epoch": 0.000139678955078125,
      "step": 22885,
      "training_step_time": 0.44974517822265625
    },
    {
      "epoch": 0.00013968505859375,
      "model_forward_time": 0.11678171157836914,
      "step": 22886
    },
    {
      "epoch": 0.00013968505859375,
      "step": 22886,
      "training_step_time": 0.43267273902893066
    },
    {
      "epoch": 0.000139691162109375,
      "model_forward_time": 0.11541366577148438,
      "step": 22887
    },
    {
      "epoch": 0.000139691162109375,
      "step": 22887,
      "training_step_time": 0.4343607425689697
    },
    {
      "epoch": 0.000139697265625,
      "model_forward_time": 0.11574840545654297,
      "step": 22888
    },
    {
      "epoch": 0.000139697265625,
      "step": 22888,
      "training_step_time": 0.4038581848144531
    },
    {
      "epoch": 0.000139703369140625,
      "model_forward_time": 0.11548614501953125,
      "step": 22889
    },
    {
      "epoch": 0.000139703369140625,
      "step": 22889,
      "training_step_time": 0.39193153381347656
    },
    {
      "epoch": 0.00013970947265625,
      "grad_norm": 0.14701691269874573,
      "learning_rate": 7.284672583878219e-05,
      "loss": 0.0515,
      "step": 22890
    },
    {
      "epoch": 0.00013970947265625,
      "model_forward_time": 0.11542367935180664,
      "step": 22890
    },
    {
      "epoch": 0.00013970947265625,
      "step": 22890,
      "training_step_time": 0.4031345844268799
    },
    {
      "epoch": 0.000139715576171875,
      "model_forward_time": 0.1160593032836914,
      "step": 22891
    },
    {
      "epoch": 0.000139715576171875,
      "step": 22891,
      "training_step_time": 0.4440627098083496
    },
    {
      "epoch": 0.0001397216796875,
      "model_forward_time": 0.11529040336608887,
      "step": 22892
    },
    {
      "epoch": 0.0001397216796875,
      "step": 22892,
      "training_step_time": 0.4114818572998047
    },
    {
      "epoch": 0.000139727783203125,
      "model_forward_time": 0.11570382118225098,
      "step": 22893
    },
    {
      "epoch": 0.000139727783203125,
      "step": 22893,
      "training_step_time": 0.39102792739868164
    },
    {
      "epoch": 0.00013973388671875,
      "model_forward_time": 0.1170341968536377,
      "step": 22894
    },
    {
      "epoch": 0.00013973388671875,
      "step": 22894,
      "training_step_time": 0.3825247287750244
    },
    {
      "epoch": 0.000139739990234375,
      "model_forward_time": 0.11545825004577637,
      "step": 22895
    },
    {
      "epoch": 0.000139739990234375,
      "step": 22895,
      "training_step_time": 0.38553833961486816
    },
    {
      "epoch": 0.00013974609375,
      "model_forward_time": 0.11527323722839355,
      "step": 22896
    },
    {
      "epoch": 0.00013974609375,
      "step": 22896,
      "training_step_time": 0.6440825462341309
    },
    {
      "epoch": 0.000139752197265625,
      "model_forward_time": 0.11532759666442871,
      "step": 22897
    },
    {
      "epoch": 0.000139752197265625,
      "step": 22897,
      "training_step_time": 0.4461050033569336
    },
    {
      "epoch": 0.00013975830078125,
      "model_forward_time": 0.1156764030456543,
      "step": 22898
    },
    {
      "epoch": 0.00013975830078125,
      "step": 22898,
      "training_step_time": 0.42350196838378906
    },
    {
      "epoch": 0.000139764404296875,
      "model_forward_time": 0.11555075645446777,
      "step": 22899
    },
    {
      "epoch": 0.000139764404296875,
      "step": 22899,
      "training_step_time": 0.47139525413513184
    },
    {
      "epoch": 0.0001397705078125,
      "grad_norm": 0.17704331874847412,
      "learning_rate": 7.282220967060633e-05,
      "loss": 0.0477,
      "step": 22900
    },
    {
      "epoch": 0.0001397705078125,
      "model_forward_time": 0.11473655700683594,
      "step": 22900
    },
    {
      "epoch": 0.0001397705078125,
      "step": 22900,
      "training_step_time": 0.4388580322265625
    },
    {
      "epoch": 0.000139776611328125,
      "model_forward_time": 0.11470460891723633,
      "step": 22901
    },
    {
      "epoch": 0.000139776611328125,
      "step": 22901,
      "training_step_time": 0.4449467658996582
    },
    {
      "epoch": 0.00013978271484375,
      "model_forward_time": 0.11525130271911621,
      "step": 22902
    },
    {
      "epoch": 0.00013978271484375,
      "step": 22902,
      "training_step_time": 0.39490318298339844
    },
    {
      "epoch": 0.000139788818359375,
      "model_forward_time": 0.1151726245880127,
      "step": 22903
    },
    {
      "epoch": 0.000139788818359375,
      "step": 22903,
      "training_step_time": 0.39077067375183105
    },
    {
      "epoch": 0.000139794921875,
      "model_forward_time": 0.11538958549499512,
      "step": 22904
    },
    {
      "epoch": 0.000139794921875,
      "step": 22904,
      "training_step_time": 0.4003870487213135
    },
    {
      "epoch": 0.000139801025390625,
      "model_forward_time": 0.11572051048278809,
      "step": 22905
    },
    {
      "epoch": 0.000139801025390625,
      "step": 22905,
      "training_step_time": 0.39139437675476074
    },
    {
      "epoch": 0.00013980712890625,
      "model_forward_time": 0.11570215225219727,
      "step": 22906
    },
    {
      "epoch": 0.00013980712890625,
      "step": 22906,
      "training_step_time": 0.42847180366516113
    },
    {
      "epoch": 0.000139813232421875,
      "model_forward_time": 0.1157541275024414,
      "step": 22907
    },
    {
      "epoch": 0.000139813232421875,
      "step": 22907,
      "training_step_time": 0.44277095794677734
    },
    {
      "epoch": 0.0001398193359375,
      "model_forward_time": 0.1153714656829834,
      "step": 22908
    },
    {
      "epoch": 0.0001398193359375,
      "step": 22908,
      "training_step_time": 0.4786388874053955
    },
    {
      "epoch": 0.000139825439453125,
      "model_forward_time": 0.11522197723388672,
      "step": 22909
    },
    {
      "epoch": 0.000139825439453125,
      "step": 22909,
      "training_step_time": 0.39206600189208984
    },
    {
      "epoch": 0.00013983154296875,
      "grad_norm": 0.13117431104183197,
      "learning_rate": 7.279768656964577e-05,
      "loss": 0.0491,
      "step": 22910
    },
    {
      "epoch": 0.00013983154296875,
      "model_forward_time": 0.1154019832611084,
      "step": 22910
    },
    {
      "epoch": 0.00013983154296875,
      "step": 22910,
      "training_step_time": 0.4038243293762207
    },
    {
      "epoch": 0.000139837646484375,
      "model_forward_time": 0.11550092697143555,
      "step": 22911
    },
    {
      "epoch": 0.000139837646484375,
      "step": 22911,
      "training_step_time": 0.4055616855621338
    },
    {
      "epoch": 0.00013984375,
      "model_forward_time": 0.11511778831481934,
      "step": 22912
    },
    {
      "epoch": 0.00013984375,
      "step": 22912,
      "training_step_time": 0.36574792861938477
    },
    {
      "epoch": 0.000139849853515625,
      "model_forward_time": 0.11576271057128906,
      "step": 22913
    },
    {
      "epoch": 0.000139849853515625,
      "step": 22913,
      "training_step_time": 0.45598793029785156
    },
    {
      "epoch": 0.00013985595703125,
      "model_forward_time": 0.11570978164672852,
      "step": 22914
    },
    {
      "epoch": 0.00013985595703125,
      "step": 22914,
      "training_step_time": 0.40140771865844727
    },
    {
      "epoch": 0.000139862060546875,
      "model_forward_time": 0.11535334587097168,
      "step": 22915
    },
    {
      "epoch": 0.000139862060546875,
      "step": 22915,
      "training_step_time": 0.4759988784790039
    },
    {
      "epoch": 0.0001398681640625,
      "model_forward_time": 0.11565375328063965,
      "step": 22916
    },
    {
      "epoch": 0.0001398681640625,
      "step": 22916,
      "training_step_time": 0.39872169494628906
    },
    {
      "epoch": 0.000139874267578125,
      "model_forward_time": 0.12224650382995605,
      "step": 22917
    },
    {
      "epoch": 0.000139874267578125,
      "step": 22917,
      "training_step_time": 0.38655996322631836
    },
    {
      "epoch": 0.00013988037109375,
      "model_forward_time": 0.11518502235412598,
      "step": 22918
    },
    {
      "epoch": 0.00013988037109375,
      "step": 22918,
      "training_step_time": 0.37391042709350586
    },
    {
      "epoch": 0.000139886474609375,
      "model_forward_time": 0.11451983451843262,
      "step": 22919
    },
    {
      "epoch": 0.000139886474609375,
      "step": 22919,
      "training_step_time": 0.4016764163970947
    },
    {
      "epoch": 0.000139892578125,
      "grad_norm": 0.17239844799041748,
      "learning_rate": 7.277315654334997e-05,
      "loss": 0.05,
      "step": 22920
    },
    {
      "epoch": 0.000139892578125,
      "model_forward_time": 0.11527109146118164,
      "step": 22920
    },
    {
      "epoch": 0.000139892578125,
      "step": 22920,
      "training_step_time": 0.4023263454437256
    },
    {
      "epoch": 0.000139898681640625,
      "model_forward_time": 0.11503124237060547,
      "step": 22921
    },
    {
      "epoch": 0.000139898681640625,
      "step": 22921,
      "training_step_time": 0.40184473991394043
    },
    {
      "epoch": 0.00013990478515625,
      "model_forward_time": 0.11521506309509277,
      "step": 22922
    },
    {
      "epoch": 0.00013990478515625,
      "step": 22922,
      "training_step_time": 0.4034996032714844
    },
    {
      "epoch": 0.000139910888671875,
      "model_forward_time": 0.11600899696350098,
      "step": 22923
    },
    {
      "epoch": 0.000139910888671875,
      "step": 22923,
      "training_step_time": 0.4050257205963135
    },
    {
      "epoch": 0.0001399169921875,
      "model_forward_time": 0.11541056632995605,
      "step": 22924
    },
    {
      "epoch": 0.0001399169921875,
      "step": 22924,
      "training_step_time": 0.4057784080505371
    },
    {
      "epoch": 0.000139923095703125,
      "model_forward_time": 0.11537671089172363,
      "step": 22925
    },
    {
      "epoch": 0.000139923095703125,
      "step": 22925,
      "training_step_time": 0.4436960220336914
    },
    {
      "epoch": 0.00013992919921875,
      "model_forward_time": 0.11503100395202637,
      "step": 22926
    },
    {
      "epoch": 0.00013992919921875,
      "step": 22926,
      "training_step_time": 0.5149507522583008
    },
    {
      "epoch": 0.000139935302734375,
      "model_forward_time": 0.11556220054626465,
      "step": 22927
    },
    {
      "epoch": 0.000139935302734375,
      "step": 22927,
      "training_step_time": 0.4418504238128662
    },
    {
      "epoch": 0.00013994140625,
      "model_forward_time": 0.11456871032714844,
      "step": 22928
    },
    {
      "epoch": 0.00013994140625,
      "step": 22928,
      "training_step_time": 0.39519500732421875
    },
    {
      "epoch": 0.000139947509765625,
      "model_forward_time": 0.11544346809387207,
      "step": 22929
    },
    {
      "epoch": 0.000139947509765625,
      "step": 22929,
      "training_step_time": 0.4734382629394531
    },
    {
      "epoch": 0.00013995361328125,
      "grad_norm": 0.1633392721414566,
      "learning_rate": 7.27486195991705e-05,
      "loss": 0.0488,
      "step": 22930
    },
    {
      "epoch": 0.00013995361328125,
      "model_forward_time": 0.11512589454650879,
      "step": 22930
    },
    {
      "epoch": 0.00013995361328125,
      "step": 22930,
      "training_step_time": 0.38790059089660645
    },
    {
      "epoch": 0.000139959716796875,
      "model_forward_time": 0.11523294448852539,
      "step": 22931
    },
    {
      "epoch": 0.000139959716796875,
      "step": 22931,
      "training_step_time": 0.3879854679107666
    },
    {
      "epoch": 0.0001399658203125,
      "model_forward_time": 0.11510944366455078,
      "step": 22932
    },
    {
      "epoch": 0.0001399658203125,
      "step": 22932,
      "training_step_time": 0.47606372833251953
    },
    {
      "epoch": 0.000139971923828125,
      "model_forward_time": 0.1154470443725586,
      "step": 22933
    },
    {
      "epoch": 0.000139971923828125,
      "step": 22933,
      "training_step_time": 0.3881852626800537
    },
    {
      "epoch": 0.00013997802734375,
      "model_forward_time": 0.11524295806884766,
      "step": 22934
    },
    {
      "epoch": 0.00013997802734375,
      "step": 22934,
      "training_step_time": 0.39041876792907715
    },
    {
      "epoch": 0.000139984130859375,
      "model_forward_time": 0.11521458625793457,
      "step": 22935
    },
    {
      "epoch": 0.000139984130859375,
      "step": 22935,
      "training_step_time": 0.3973734378814697
    },
    {
      "epoch": 0.000139990234375,
      "model_forward_time": 0.11564421653747559,
      "step": 22936
    },
    {
      "epoch": 0.000139990234375,
      "step": 22936,
      "training_step_time": 0.3971996307373047
    },
    {
      "epoch": 0.000139996337890625,
      "model_forward_time": 0.11517024040222168,
      "step": 22937
    },
    {
      "epoch": 0.000139996337890625,
      "step": 22937,
      "training_step_time": 0.493114709854126
    },
    {
      "epoch": 0.00014000244140625,
      "model_forward_time": 0.11561727523803711,
      "step": 22938
    },
    {
      "epoch": 0.00014000244140625,
      "step": 22938,
      "training_step_time": 0.5016336441040039
    },
    {
      "epoch": 0.000140008544921875,
      "model_forward_time": 0.11467480659484863,
      "step": 22939
    },
    {
      "epoch": 0.000140008544921875,
      "step": 22939,
      "training_step_time": 0.4473073482513428
    },
    {
      "epoch": 0.0001400146484375,
      "grad_norm": 0.12180894613265991,
      "learning_rate": 7.272407574456103e-05,
      "loss": 0.0435,
      "step": 22940
    },
    {
      "epoch": 0.0001400146484375,
      "model_forward_time": 0.11498808860778809,
      "step": 22940
    },
    {
      "epoch": 0.0001400146484375,
      "step": 22940,
      "training_step_time": 0.4022223949432373
    },
    {
      "epoch": 0.000140020751953125,
      "model_forward_time": 0.1148533821105957,
      "step": 22941
    },
    {
      "epoch": 0.000140020751953125,
      "step": 22941,
      "training_step_time": 0.36273980140686035
    },
    {
      "epoch": 0.00014002685546875,
      "model_forward_time": 0.11513662338256836,
      "step": 22942
    },
    {
      "epoch": 0.00014002685546875,
      "step": 22942,
      "training_step_time": 0.47557854652404785
    },
    {
      "epoch": 0.000140032958984375,
      "model_forward_time": 0.1147761344909668,
      "step": 22943
    },
    {
      "epoch": 0.000140032958984375,
      "step": 22943,
      "training_step_time": 0.4919281005859375
    },
    {
      "epoch": 0.0001400390625,
      "model_forward_time": 0.11468100547790527,
      "step": 22944
    },
    {
      "epoch": 0.0001400390625,
      "step": 22944,
      "training_step_time": 0.4119231700897217
    },
    {
      "epoch": 0.000140045166015625,
      "model_forward_time": 0.11493325233459473,
      "step": 22945
    },
    {
      "epoch": 0.000140045166015625,
      "step": 22945,
      "training_step_time": 0.39436769485473633
    },
    {
      "epoch": 0.00014005126953125,
      "model_forward_time": 0.1145176887512207,
      "step": 22946
    },
    {
      "epoch": 0.00014005126953125,
      "step": 22946,
      "training_step_time": 0.417827844619751
    },
    {
      "epoch": 0.000140057373046875,
      "model_forward_time": 0.11494684219360352,
      "step": 22947
    },
    {
      "epoch": 0.000140057373046875,
      "step": 22947,
      "training_step_time": 0.3825817108154297
    },
    {
      "epoch": 0.0001400634765625,
      "model_forward_time": 0.1153099536895752,
      "step": 22948
    },
    {
      "epoch": 0.0001400634765625,
      "step": 22948,
      "training_step_time": 0.39377689361572266
    },
    {
      "epoch": 0.000140069580078125,
      "model_forward_time": 0.1144874095916748,
      "step": 22949
    },
    {
      "epoch": 0.000140069580078125,
      "step": 22949,
      "training_step_time": 0.40681004524230957
    },
    {
      "epoch": 0.00014007568359375,
      "grad_norm": 0.17462792992591858,
      "learning_rate": 7.269952498697734e-05,
      "loss": 0.0509,
      "step": 22950
    },
    {
      "epoch": 0.00014007568359375,
      "model_forward_time": 0.11517739295959473,
      "step": 22950
    },
    {
      "epoch": 0.00014007568359375,
      "step": 22950,
      "training_step_time": 0.40337038040161133
    },
    {
      "epoch": 0.000140081787109375,
      "model_forward_time": 0.11582660675048828,
      "step": 22951
    },
    {
      "epoch": 0.000140081787109375,
      "step": 22951,
      "training_step_time": 0.3969261646270752
    },
    {
      "epoch": 0.000140087890625,
      "model_forward_time": 0.11546611785888672,
      "step": 22952
    },
    {
      "epoch": 0.000140087890625,
      "step": 22952,
      "training_step_time": 0.44205689430236816
    },
    {
      "epoch": 0.000140093994140625,
      "model_forward_time": 0.11557769775390625,
      "step": 22953
    },
    {
      "epoch": 0.000140093994140625,
      "step": 22953,
      "training_step_time": 0.46992993354797363
    },
    {
      "epoch": 0.00014010009765625,
      "model_forward_time": 0.11586499214172363,
      "step": 22954
    },
    {
      "epoch": 0.00014010009765625,
      "step": 22954,
      "training_step_time": 0.3803720474243164
    },
    {
      "epoch": 0.000140106201171875,
      "model_forward_time": 0.11485981941223145,
      "step": 22955
    },
    {
      "epoch": 0.000140106201171875,
      "step": 22955,
      "training_step_time": 0.3957083225250244
    },
    {
      "epoch": 0.0001401123046875,
      "model_forward_time": 0.11573195457458496,
      "step": 22956
    },
    {
      "epoch": 0.0001401123046875,
      "step": 22956,
      "training_step_time": 0.4425933361053467
    },
    {
      "epoch": 0.000140118408203125,
      "model_forward_time": 0.11486482620239258,
      "step": 22957
    },
    {
      "epoch": 0.000140118408203125,
      "step": 22957,
      "training_step_time": 0.4201674461364746
    },
    {
      "epoch": 0.00014012451171875,
      "model_forward_time": 0.11491584777832031,
      "step": 22958
    },
    {
      "epoch": 0.00014012451171875,
      "step": 22958,
      "training_step_time": 0.4607839584350586
    },
    {
      "epoch": 0.000140130615234375,
      "model_forward_time": 0.1149742603302002,
      "step": 22959
    },
    {
      "epoch": 0.000140130615234375,
      "step": 22959,
      "training_step_time": 0.40564417839050293
    },
    {
      "epoch": 0.00014013671875,
      "grad_norm": 0.16259150207042694,
      "learning_rate": 7.267496733387731e-05,
      "loss": 0.0486,
      "step": 22960
    },
    {
      "epoch": 0.00014013671875,
      "model_forward_time": 0.11520099639892578,
      "step": 22960
    },
    {
      "epoch": 0.00014013671875,
      "step": 22960,
      "training_step_time": 0.403764009475708
    },
    {
      "epoch": 0.000140142822265625,
      "model_forward_time": 0.11500668525695801,
      "step": 22961
    },
    {
      "epoch": 0.000140142822265625,
      "step": 22961,
      "training_step_time": 0.39263033866882324
    },
    {
      "epoch": 0.00014014892578125,
      "model_forward_time": 0.11567473411560059,
      "step": 22962
    },
    {
      "epoch": 0.00014014892578125,
      "step": 22962,
      "training_step_time": 0.5079386234283447
    },
    {
      "epoch": 0.000140155029296875,
      "model_forward_time": 0.11481094360351562,
      "step": 22963
    },
    {
      "epoch": 0.000140155029296875,
      "step": 22963,
      "training_step_time": 0.39914393424987793
    },
    {
      "epoch": 0.0001401611328125,
      "model_forward_time": 0.11487293243408203,
      "step": 22964
    },
    {
      "epoch": 0.0001401611328125,
      "step": 22964,
      "training_step_time": 0.43296074867248535
    },
    {
      "epoch": 0.000140167236328125,
      "model_forward_time": 0.11536002159118652,
      "step": 22965
    },
    {
      "epoch": 0.000140167236328125,
      "step": 22965,
      "training_step_time": 0.40596532821655273
    },
    {
      "epoch": 0.00014017333984375,
      "model_forward_time": 0.11470770835876465,
      "step": 22966
    },
    {
      "epoch": 0.00014017333984375,
      "step": 22966,
      "training_step_time": 0.4418926239013672
    },
    {
      "epoch": 0.000140179443359375,
      "model_forward_time": 0.11482977867126465,
      "step": 22967
    },
    {
      "epoch": 0.000140179443359375,
      "step": 22967,
      "training_step_time": 0.3998122215270996
    },
    {
      "epoch": 0.000140185546875,
      "model_forward_time": 0.1153714656829834,
      "step": 22968
    },
    {
      "epoch": 0.000140185546875,
      "step": 22968,
      "training_step_time": 0.511103630065918
    },
    {
      "epoch": 0.000140191650390625,
      "model_forward_time": 0.11512947082519531,
      "step": 22969
    },
    {
      "epoch": 0.000140191650390625,
      "step": 22969,
      "training_step_time": 0.3935832977294922
    },
    {
      "epoch": 0.00014019775390625,
      "grad_norm": 0.12668560445308685,
      "learning_rate": 7.265040279272091e-05,
      "loss": 0.043,
      "step": 22970
    },
    {
      "epoch": 0.00014019775390625,
      "model_forward_time": 0.1150670051574707,
      "step": 22970
    },
    {
      "epoch": 0.00014019775390625,
      "step": 22970,
      "training_step_time": 0.36852169036865234
    },
    {
      "epoch": 0.000140203857421875,
      "model_forward_time": 0.1156315803527832,
      "step": 22971
    },
    {
      "epoch": 0.000140203857421875,
      "step": 22971,
      "training_step_time": 0.46376872062683105
    },
    {
      "epoch": 0.0001402099609375,
      "model_forward_time": 0.11884236335754395,
      "step": 22972
    },
    {
      "epoch": 0.0001402099609375,
      "step": 22972,
      "training_step_time": 0.47811031341552734
    },
    {
      "epoch": 0.000140216064453125,
      "model_forward_time": 0.11759829521179199,
      "step": 22973
    },
    {
      "epoch": 0.000140216064453125,
      "step": 22973,
      "training_step_time": 0.4007883071899414
    },
    {
      "epoch": 0.00014022216796875,
      "model_forward_time": 0.11838126182556152,
      "step": 22974
    },
    {
      "epoch": 0.00014022216796875,
      "step": 22974,
      "training_step_time": 0.38700008392333984
    },
    {
      "epoch": 0.000140228271484375,
      "model_forward_time": 0.11798667907714844,
      "step": 22975
    },
    {
      "epoch": 0.000140228271484375,
      "step": 22975,
      "training_step_time": 0.3890035152435303
    },
    {
      "epoch": 0.000140234375,
      "model_forward_time": 0.11905503273010254,
      "step": 22976
    },
    {
      "epoch": 0.000140234375,
      "step": 22976,
      "training_step_time": 0.40288281440734863
    },
    {
      "epoch": 0.000140240478515625,
      "model_forward_time": 0.11518001556396484,
      "step": 22977
    },
    {
      "epoch": 0.000140240478515625,
      "step": 22977,
      "training_step_time": 0.3836853504180908
    },
    {
      "epoch": 0.00014024658203125,
      "model_forward_time": 0.11579227447509766,
      "step": 22978
    },
    {
      "epoch": 0.00014024658203125,
      "step": 22978,
      "training_step_time": 0.39546704292297363
    },
    {
      "epoch": 0.000140252685546875,
      "model_forward_time": 0.11454105377197266,
      "step": 22979
    },
    {
      "epoch": 0.000140252685546875,
      "step": 22979,
      "training_step_time": 0.45977783203125
    },
    {
      "epoch": 0.0001402587890625,
      "grad_norm": 0.20096002519130707,
      "learning_rate": 7.262583137097018e-05,
      "loss": 0.052,
      "step": 22980
    },
    {
      "epoch": 0.0001402587890625,
      "model_forward_time": 0.11497712135314941,
      "step": 22980
    },
    {
      "epoch": 0.0001402587890625,
      "step": 22980,
      "training_step_time": 0.44591784477233887
    },
    {
      "epoch": 0.000140264892578125,
      "model_forward_time": 0.11516547203063965,
      "step": 22981
    },
    {
      "epoch": 0.000140264892578125,
      "step": 22981,
      "training_step_time": 0.4272630214691162
    },
    {
      "epoch": 0.00014027099609375,
      "model_forward_time": 0.11481857299804688,
      "step": 22982
    },
    {
      "epoch": 0.00014027099609375,
      "step": 22982,
      "training_step_time": 0.4018418788909912
    },
    {
      "epoch": 0.000140277099609375,
      "model_forward_time": 0.11518454551696777,
      "step": 22983
    },
    {
      "epoch": 0.000140277099609375,
      "step": 22983,
      "training_step_time": 0.40306735038757324
    },
    {
      "epoch": 0.000140283203125,
      "model_forward_time": 0.11589956283569336,
      "step": 22984
    },
    {
      "epoch": 0.000140283203125,
      "step": 22984,
      "training_step_time": 0.3960249423980713
    },
    {
      "epoch": 0.000140289306640625,
      "model_forward_time": 0.11531972885131836,
      "step": 22985
    },
    {
      "epoch": 0.000140289306640625,
      "step": 22985,
      "training_step_time": 0.4941873550415039
    },
    {
      "epoch": 0.00014029541015625,
      "model_forward_time": 0.11519837379455566,
      "step": 22986
    },
    {
      "epoch": 0.00014029541015625,
      "step": 22986,
      "training_step_time": 0.6407275199890137
    },
    {
      "epoch": 0.000140301513671875,
      "model_forward_time": 0.11496400833129883,
      "step": 22987
    },
    {
      "epoch": 0.000140301513671875,
      "step": 22987,
      "training_step_time": 0.3991703987121582
    },
    {
      "epoch": 0.0001403076171875,
      "model_forward_time": 0.11531734466552734,
      "step": 22988
    },
    {
      "epoch": 0.0001403076171875,
      "step": 22988,
      "training_step_time": 0.389293909072876
    },
    {
      "epoch": 0.000140313720703125,
      "model_forward_time": 0.11527228355407715,
      "step": 22989
    },
    {
      "epoch": 0.000140313720703125,
      "step": 22989,
      "training_step_time": 0.3803398609161377
    },
    {
      "epoch": 0.00014031982421875,
      "grad_norm": 0.16476595401763916,
      "learning_rate": 7.260125307608929e-05,
      "loss": 0.0457,
      "step": 22990
    },
    {
      "epoch": 0.00014031982421875,
      "model_forward_time": 0.11460018157958984,
      "step": 22990
    },
    {
      "epoch": 0.00014031982421875,
      "step": 22990,
      "training_step_time": 0.38616132736206055
    },
    {
      "epoch": 0.000140325927734375,
      "model_forward_time": 0.1150815486907959,
      "step": 22991
    },
    {
      "epoch": 0.000140325927734375,
      "step": 22991,
      "training_step_time": 0.39299464225769043
    },
    {
      "epoch": 0.00014033203125,
      "model_forward_time": 0.11545109748840332,
      "step": 22992
    },
    {
      "epoch": 0.00014033203125,
      "step": 22992,
      "training_step_time": 0.39307379722595215
    },
    {
      "epoch": 0.000140338134765625,
      "model_forward_time": 0.11448216438293457,
      "step": 22993
    },
    {
      "epoch": 0.000140338134765625,
      "step": 22993,
      "training_step_time": 0.4725940227508545
    },
    {
      "epoch": 0.00014034423828125,
      "model_forward_time": 0.11508727073669434,
      "step": 22994
    },
    {
      "epoch": 0.00014034423828125,
      "step": 22994,
      "training_step_time": 0.4068601131439209
    },
    {
      "epoch": 0.000140350341796875,
      "model_forward_time": 0.11480498313903809,
      "step": 22995
    },
    {
      "epoch": 0.000140350341796875,
      "step": 22995,
      "training_step_time": 0.48809075355529785
    },
    {
      "epoch": 0.0001403564453125,
      "model_forward_time": 0.11623716354370117,
      "step": 22996
    },
    {
      "epoch": 0.0001403564453125,
      "step": 22996,
      "training_step_time": 0.38801050186157227
    },
    {
      "epoch": 0.000140362548828125,
      "model_forward_time": 0.11539340019226074,
      "step": 22997
    },
    {
      "epoch": 0.000140362548828125,
      "step": 22997,
      "training_step_time": 0.3916795253753662
    },
    {
      "epoch": 0.00014036865234375,
      "model_forward_time": 0.11578178405761719,
      "step": 22998
    },
    {
      "epoch": 0.00014036865234375,
      "step": 22998,
      "training_step_time": 0.4794645309448242
    },
    {
      "epoch": 0.000140374755859375,
      "model_forward_time": 0.11536026000976562,
      "step": 22999
    },
    {
      "epoch": 0.000140374755859375,
      "step": 22999,
      "training_step_time": 0.41592884063720703
    },
    {
      "epoch": 0.000140380859375,
      "grad_norm": 0.140009343624115,
      "learning_rate": 7.257666791554448e-05,
      "loss": 0.0471,
      "step": 23000
    },
    {
      "epoch": 0.000140380859375,
      "model_forward_time": 0.11370468139648438,
      "step": 23000
    },
    {
      "epoch": 0.000140380859375,
      "step": 23000,
      "training_step_time": 0.35481882095336914
    },
    {
      "epoch": 0.000140386962890625,
      "model_forward_time": 0.11262106895446777,
      "step": 23001
    },
    {
      "epoch": 0.000140386962890625,
      "step": 23001,
      "training_step_time": 0.3635976314544678
    },
    {
      "epoch": 0.00014039306640625,
      "model_forward_time": 0.1129300594329834,
      "step": 23002
    },
    {
      "epoch": 0.00014039306640625,
      "step": 23002,
      "training_step_time": 0.3856019973754883
    },
    {
      "epoch": 0.000140399169921875,
      "model_forward_time": 0.1137089729309082,
      "step": 23003
    },
    {
      "epoch": 0.000140399169921875,
      "step": 23003,
      "training_step_time": 0.4827265739440918
    },
    {
      "epoch": 0.0001404052734375,
      "model_forward_time": 0.11329293251037598,
      "step": 23004
    },
    {
      "epoch": 0.0001404052734375,
      "step": 23004,
      "training_step_time": 0.378521203994751
    },
    {
      "epoch": 0.000140411376953125,
      "model_forward_time": 0.11406302452087402,
      "step": 23005
    },
    {
      "epoch": 0.000140411376953125,
      "step": 23005,
      "training_step_time": 0.39437198638916016
    },
    {
      "epoch": 0.00014041748046875,
      "model_forward_time": 0.11429333686828613,
      "step": 23006
    },
    {
      "epoch": 0.00014041748046875,
      "step": 23006,
      "training_step_time": 0.39526915550231934
    },
    {
      "epoch": 0.000140423583984375,
      "model_forward_time": 0.11519360542297363,
      "step": 23007
    },
    {
      "epoch": 0.000140423583984375,
      "step": 23007,
      "training_step_time": 0.42391467094421387
    },
    {
      "epoch": 0.0001404296875,
      "model_forward_time": 0.11522984504699707,
      "step": 23008
    },
    {
      "epoch": 0.0001404296875,
      "step": 23008,
      "training_step_time": 0.3893473148345947
    },
    {
      "epoch": 0.000140435791015625,
      "model_forward_time": 0.11449527740478516,
      "step": 23009
    },
    {
      "epoch": 0.000140435791015625,
      "step": 23009,
      "training_step_time": 0.4852628707885742
    },
    {
      "epoch": 0.00014044189453125,
      "grad_norm": 0.14500826597213745,
      "learning_rate": 7.255207589680402e-05,
      "loss": 0.0512,
      "step": 23010
    },
    {
      "epoch": 0.00014044189453125,
      "model_forward_time": 0.11489367485046387,
      "step": 23010
    },
    {
      "epoch": 0.00014044189453125,
      "step": 23010,
      "training_step_time": 0.4431767463684082
    },
    {
      "epoch": 0.000140447998046875,
      "model_forward_time": 0.11451911926269531,
      "step": 23011
    },
    {
      "epoch": 0.000140447998046875,
      "step": 23011,
      "training_step_time": 0.47370076179504395
    },
    {
      "epoch": 0.0001404541015625,
      "model_forward_time": 0.11533665657043457,
      "step": 23012
    },
    {
      "epoch": 0.0001404541015625,
      "step": 23012,
      "training_step_time": 0.3938484191894531
    },
    {
      "epoch": 0.000140460205078125,
      "model_forward_time": 0.1146543025970459,
      "step": 23013
    },
    {
      "epoch": 0.000140460205078125,
      "step": 23013,
      "training_step_time": 0.39369726181030273
    },
    {
      "epoch": 0.00014046630859375,
      "model_forward_time": 0.11538076400756836,
      "step": 23014
    },
    {
      "epoch": 0.00014046630859375,
      "step": 23014,
      "training_step_time": 0.405367374420166
    },
    {
      "epoch": 0.000140472412109375,
      "model_forward_time": 0.11530780792236328,
      "step": 23015
    },
    {
      "epoch": 0.000140472412109375,
      "step": 23015,
      "training_step_time": 0.5037622451782227
    },
    {
      "epoch": 0.000140478515625,
      "model_forward_time": 0.11525750160217285,
      "step": 23016
    },
    {
      "epoch": 0.000140478515625,
      "step": 23016,
      "training_step_time": 0.44835400581359863
    },
    {
      "epoch": 0.000140484619140625,
      "model_forward_time": 0.11515355110168457,
      "step": 23017
    },
    {
      "epoch": 0.000140484619140625,
      "step": 23017,
      "training_step_time": 0.47612810134887695
    },
    {
      "epoch": 0.00014049072265625,
      "model_forward_time": 0.11523294448852539,
      "step": 23018
    },
    {
      "epoch": 0.00014049072265625,
      "step": 23018,
      "training_step_time": 0.4519670009613037
    },
    {
      "epoch": 0.000140496826171875,
      "model_forward_time": 0.11508440971374512,
      "step": 23019
    },
    {
      "epoch": 0.000140496826171875,
      "step": 23019,
      "training_step_time": 0.38878345489501953
    },
    {
      "epoch": 0.0001405029296875,
      "grad_norm": 0.13300667703151703,
      "learning_rate": 7.25274770273384e-05,
      "loss": 0.0479,
      "step": 23020
    },
    {
      "epoch": 0.0001405029296875,
      "model_forward_time": 0.11450600624084473,
      "step": 23020
    },
    {
      "epoch": 0.0001405029296875,
      "step": 23020,
      "training_step_time": 0.39411377906799316
    },
    {
      "epoch": 0.000140509033203125,
      "model_forward_time": 0.11479687690734863,
      "step": 23021
    },
    {
      "epoch": 0.000140509033203125,
      "step": 23021,
      "training_step_time": 0.4093301296234131
    },
    {
      "epoch": 0.00014051513671875,
      "model_forward_time": 0.11527371406555176,
      "step": 23022
    },
    {
      "epoch": 0.00014051513671875,
      "step": 23022,
      "training_step_time": 0.39429306983947754
    },
    {
      "epoch": 0.000140521240234375,
      "model_forward_time": 0.11543464660644531,
      "step": 23023
    },
    {
      "epoch": 0.000140521240234375,
      "step": 23023,
      "training_step_time": 0.3966403007507324
    },
    {
      "epoch": 0.00014052734375,
      "model_forward_time": 0.11474728584289551,
      "step": 23024
    },
    {
      "epoch": 0.00014052734375,
      "step": 23024,
      "training_step_time": 0.39498376846313477
    },
    {
      "epoch": 0.000140533447265625,
      "model_forward_time": 0.11549949645996094,
      "step": 23025
    },
    {
      "epoch": 0.000140533447265625,
      "step": 23025,
      "training_step_time": 0.4497709274291992
    },
    {
      "epoch": 0.00014053955078125,
      "model_forward_time": 0.11594247817993164,
      "step": 23026
    },
    {
      "epoch": 0.00014053955078125,
      "step": 23026,
      "training_step_time": 0.3854336738586426
    },
    {
      "epoch": 0.000140545654296875,
      "model_forward_time": 0.11661005020141602,
      "step": 23027
    },
    {
      "epoch": 0.000140545654296875,
      "step": 23027,
      "training_step_time": 0.399202823638916
    },
    {
      "epoch": 0.0001405517578125,
      "model_forward_time": 0.11513543128967285,
      "step": 23028
    },
    {
      "epoch": 0.0001405517578125,
      "step": 23028,
      "training_step_time": 0.39504575729370117
    },
    {
      "epoch": 0.000140557861328125,
      "model_forward_time": 0.11510896682739258,
      "step": 23029
    },
    {
      "epoch": 0.000140557861328125,
      "step": 23029,
      "training_step_time": 0.4015235900878906
    },
    {
      "epoch": 0.00014056396484375,
      "grad_norm": 0.11002175509929657,
      "learning_rate": 7.250287131462004e-05,
      "loss": 0.0524,
      "step": 23030
    },
    {
      "epoch": 0.00014056396484375,
      "model_forward_time": 0.11522102355957031,
      "step": 23030
    },
    {
      "epoch": 0.00014056396484375,
      "step": 23030,
      "training_step_time": 0.4387524127960205
    },
    {
      "epoch": 0.000140570068359375,
      "model_forward_time": 0.1152658462524414,
      "step": 23031
    },
    {
      "epoch": 0.000140570068359375,
      "step": 23031,
      "training_step_time": 0.48108768463134766
    },
    {
      "epoch": 0.000140576171875,
      "model_forward_time": 0.11533665657043457,
      "step": 23032
    },
    {
      "epoch": 0.000140576171875,
      "step": 23032,
      "training_step_time": 0.50374436378479
    },
    {
      "epoch": 0.000140582275390625,
      "model_forward_time": 0.11518096923828125,
      "step": 23033
    },
    {
      "epoch": 0.000140582275390625,
      "step": 23033,
      "training_step_time": 0.38924717903137207
    },
    {
      "epoch": 0.00014058837890625,
      "model_forward_time": 0.11528229713439941,
      "step": 23034
    },
    {
      "epoch": 0.00014058837890625,
      "step": 23034,
      "training_step_time": 0.3865385055541992
    },
    {
      "epoch": 0.000140594482421875,
      "model_forward_time": 0.11539077758789062,
      "step": 23035
    },
    {
      "epoch": 0.000140594482421875,
      "step": 23035,
      "training_step_time": 0.40440869331359863
    },
    {
      "epoch": 0.0001406005859375,
      "model_forward_time": 0.11558818817138672,
      "step": 23036
    },
    {
      "epoch": 0.0001406005859375,
      "step": 23036,
      "training_step_time": 0.4093027114868164
    },
    {
      "epoch": 0.000140606689453125,
      "model_forward_time": 0.11556720733642578,
      "step": 23037
    },
    {
      "epoch": 0.000140606689453125,
      "step": 23037,
      "training_step_time": 0.38442444801330566
    },
    {
      "epoch": 0.00014061279296875,
      "model_forward_time": 0.11565566062927246,
      "step": 23038
    },
    {
      "epoch": 0.00014061279296875,
      "step": 23038,
      "training_step_time": 0.4469146728515625
    },
    {
      "epoch": 0.000140618896484375,
      "model_forward_time": 0.11530041694641113,
      "step": 23039
    },
    {
      "epoch": 0.000140618896484375,
      "step": 23039,
      "training_step_time": 0.409761905670166
    },
    {
      "epoch": 0.000140625,
      "grad_norm": 0.11322540789842606,
      "learning_rate": 7.247825876612353e-05,
      "loss": 0.0508,
      "step": 23040
    },
    {
      "epoch": 0.000140625,
      "model_forward_time": 0.11507296562194824,
      "step": 23040
    },
    {
      "epoch": 0.000140625,
      "step": 23040,
      "training_step_time": 0.5144038200378418
    },
    {
      "epoch": 0.000140631103515625,
      "model_forward_time": 0.11459851264953613,
      "step": 23041
    },
    {
      "epoch": 0.000140631103515625,
      "step": 23041,
      "training_step_time": 0.39121556282043457
    },
    {
      "epoch": 0.00014063720703125,
      "model_forward_time": 0.11444473266601562,
      "step": 23042
    },
    {
      "epoch": 0.00014063720703125,
      "step": 23042,
      "training_step_time": 0.3887298107147217
    },
    {
      "epoch": 0.000140643310546875,
      "model_forward_time": 0.11504364013671875,
      "step": 23043
    },
    {
      "epoch": 0.000140643310546875,
      "step": 23043,
      "training_step_time": 0.3922731876373291
    },
    {
      "epoch": 0.0001406494140625,
      "model_forward_time": 0.1146848201751709,
      "step": 23044
    },
    {
      "epoch": 0.0001406494140625,
      "step": 23044,
      "training_step_time": 0.38094615936279297
    },
    {
      "epoch": 0.000140655517578125,
      "model_forward_time": 0.1150820255279541,
      "step": 23045
    },
    {
      "epoch": 0.000140655517578125,
      "step": 23045,
      "training_step_time": 0.36707139015197754
    },
    {
      "epoch": 0.00014066162109375,
      "model_forward_time": 0.11535835266113281,
      "step": 23046
    },
    {
      "epoch": 0.00014066162109375,
      "step": 23046,
      "training_step_time": 0.5168344974517822
    },
    {
      "epoch": 0.000140667724609375,
      "model_forward_time": 0.11493849754333496,
      "step": 23047
    },
    {
      "epoch": 0.000140667724609375,
      "step": 23047,
      "training_step_time": 0.47447776794433594
    },
    {
      "epoch": 0.000140673828125,
      "model_forward_time": 0.11548233032226562,
      "step": 23048
    },
    {
      "epoch": 0.000140673828125,
      "step": 23048,
      "training_step_time": 0.39203572273254395
    },
    {
      "epoch": 0.000140679931640625,
      "model_forward_time": 0.1149287223815918,
      "step": 23049
    },
    {
      "epoch": 0.000140679931640625,
      "step": 23049,
      "training_step_time": 0.40321826934814453
    },
    {
      "epoch": 0.00014068603515625,
      "grad_norm": 0.1674901843070984,
      "learning_rate": 7.245363938932551e-05,
      "loss": 0.0569,
      "step": 23050
    },
    {
      "epoch": 0.00014068603515625,
      "model_forward_time": 0.11537790298461914,
      "step": 23050
    },
    {
      "epoch": 0.00014068603515625,
      "step": 23050,
      "training_step_time": 0.3888092041015625
    },
    {
      "epoch": 0.000140692138671875,
      "model_forward_time": 0.11792421340942383,
      "step": 23051
    },
    {
      "epoch": 0.000140692138671875,
      "step": 23051,
      "training_step_time": 0.3922691345214844
    },
    {
      "epoch": 0.0001406982421875,
      "model_forward_time": 0.11505770683288574,
      "step": 23052
    },
    {
      "epoch": 0.0001406982421875,
      "step": 23052,
      "training_step_time": 0.4039006233215332
    },
    {
      "epoch": 0.000140704345703125,
      "model_forward_time": 0.11525344848632812,
      "step": 23053
    },
    {
      "epoch": 0.000140704345703125,
      "step": 23053,
      "training_step_time": 0.43564796447753906
    },
    {
      "epoch": 0.00014071044921875,
      "model_forward_time": 0.11491203308105469,
      "step": 23054
    },
    {
      "epoch": 0.00014071044921875,
      "step": 23054,
      "training_step_time": 0.4097888469696045
    },
    {
      "epoch": 0.000140716552734375,
      "model_forward_time": 0.11507511138916016,
      "step": 23055
    },
    {
      "epoch": 0.000140716552734375,
      "step": 23055,
      "training_step_time": 0.6469817161560059
    },
    {
      "epoch": 0.00014072265625,
      "model_forward_time": 0.1146538257598877,
      "step": 23056
    },
    {
      "epoch": 0.00014072265625,
      "step": 23056,
      "training_step_time": 0.3819563388824463
    },
    {
      "epoch": 0.000140728759765625,
      "model_forward_time": 0.11511802673339844,
      "step": 23057
    },
    {
      "epoch": 0.000140728759765625,
      "step": 23057,
      "training_step_time": 0.3915867805480957
    },
    {
      "epoch": 0.00014073486328125,
      "model_forward_time": 0.11532068252563477,
      "step": 23058
    },
    {
      "epoch": 0.00014073486328125,
      "step": 23058,
      "training_step_time": 0.39171934127807617
    },
    {
      "epoch": 0.000140740966796875,
      "model_forward_time": 0.11432790756225586,
      "step": 23059
    },
    {
      "epoch": 0.000140740966796875,
      "step": 23059,
      "training_step_time": 0.3636493682861328
    },
    {
      "epoch": 0.0001407470703125,
      "grad_norm": 0.10104452818632126,
      "learning_rate": 7.242901319170471e-05,
      "loss": 0.048,
      "step": 23060
    },
    {
      "epoch": 0.0001407470703125,
      "model_forward_time": 0.11451029777526855,
      "step": 23060
    },
    {
      "epoch": 0.0001407470703125,
      "step": 23060,
      "training_step_time": 0.48981738090515137
    },
    {
      "epoch": 0.000140753173828125,
      "model_forward_time": 0.11481118202209473,
      "step": 23061
    },
    {
      "epoch": 0.000140753173828125,
      "step": 23061,
      "training_step_time": 0.57861328125
    },
    {
      "epoch": 0.00014075927734375,
      "model_forward_time": 0.11510324478149414,
      "step": 23062
    },
    {
      "epoch": 0.00014075927734375,
      "step": 23062,
      "training_step_time": 0.41886186599731445
    },
    {
      "epoch": 0.000140765380859375,
      "model_forward_time": 0.11439919471740723,
      "step": 23063
    },
    {
      "epoch": 0.000140765380859375,
      "step": 23063,
      "training_step_time": 0.38986802101135254
    },
    {
      "epoch": 0.000140771484375,
      "model_forward_time": 0.1147763729095459,
      "step": 23064
    },
    {
      "epoch": 0.000140771484375,
      "step": 23064,
      "training_step_time": 0.3865647315979004
    },
    {
      "epoch": 0.000140777587890625,
      "model_forward_time": 0.11462688446044922,
      "step": 23065
    },
    {
      "epoch": 0.000140777587890625,
      "step": 23065,
      "training_step_time": 0.39398622512817383
    },
    {
      "epoch": 0.00014078369140625,
      "model_forward_time": 0.11468768119812012,
      "step": 23066
    },
    {
      "epoch": 0.00014078369140625,
      "step": 23066,
      "training_step_time": 0.3906381130218506
    },
    {
      "epoch": 0.000140789794921875,
      "model_forward_time": 0.11520171165466309,
      "step": 23067
    },
    {
      "epoch": 0.000140789794921875,
      "step": 23067,
      "training_step_time": 0.5307843685150146
    },
    {
      "epoch": 0.0001407958984375,
      "model_forward_time": 0.11495637893676758,
      "step": 23068
    },
    {
      "epoch": 0.0001407958984375,
      "step": 23068,
      "training_step_time": 0.45911121368408203
    },
    {
      "epoch": 0.000140802001953125,
      "model_forward_time": 0.11519217491149902,
      "step": 23069
    },
    {
      "epoch": 0.000140802001953125,
      "step": 23069,
      "training_step_time": 0.3978383541107178
    },
    {
      "epoch": 0.00014080810546875,
      "grad_norm": 0.21300072968006134,
      "learning_rate": 7.240438018074189e-05,
      "loss": 0.0528,
      "step": 23070
    },
    {
      "epoch": 0.00014080810546875,
      "model_forward_time": 0.1151893138885498,
      "step": 23070
    },
    {
      "epoch": 0.00014080810546875,
      "step": 23070,
      "training_step_time": 0.38558459281921387
    },
    {
      "epoch": 0.000140814208984375,
      "model_forward_time": 0.11503219604492188,
      "step": 23071
    },
    {
      "epoch": 0.000140814208984375,
      "step": 23071,
      "training_step_time": 0.39203906059265137
    },
    {
      "epoch": 0.0001408203125,
      "model_forward_time": 0.1149139404296875,
      "step": 23072
    },
    {
      "epoch": 0.0001408203125,
      "step": 23072,
      "training_step_time": 0.4045743942260742
    },
    {
      "epoch": 0.000140826416015625,
      "model_forward_time": 0.11527776718139648,
      "step": 23073
    },
    {
      "epoch": 0.000140826416015625,
      "step": 23073,
      "training_step_time": 0.5302720069885254
    },
    {
      "epoch": 0.00014083251953125,
      "model_forward_time": 0.11521339416503906,
      "step": 23074
    },
    {
      "epoch": 0.00014083251953125,
      "step": 23074,
      "training_step_time": 0.44405364990234375
    },
    {
      "epoch": 0.000140838623046875,
      "model_forward_time": 0.1147620677947998,
      "step": 23075
    },
    {
      "epoch": 0.000140838623046875,
      "step": 23075,
      "training_step_time": 0.4435861110687256
    },
    {
      "epoch": 0.0001408447265625,
      "model_forward_time": 0.11484742164611816,
      "step": 23076
    },
    {
      "epoch": 0.0001408447265625,
      "step": 23076,
      "training_step_time": 0.43765735626220703
    },
    {
      "epoch": 0.000140850830078125,
      "model_forward_time": 0.11513924598693848,
      "step": 23077
    },
    {
      "epoch": 0.000140850830078125,
      "step": 23077,
      "training_step_time": 0.38789916038513184
    },
    {
      "epoch": 0.00014085693359375,
      "model_forward_time": 0.11550235748291016,
      "step": 23078
    },
    {
      "epoch": 0.00014085693359375,
      "step": 23078,
      "training_step_time": 0.38718533515930176
    },
    {
      "epoch": 0.000140863037109375,
      "model_forward_time": 0.11690545082092285,
      "step": 23079
    },
    {
      "epoch": 0.000140863037109375,
      "step": 23079,
      "training_step_time": 0.4594738483428955
    },
    {
      "epoch": 0.000140869140625,
      "grad_norm": 0.14589537680149078,
      "learning_rate": 7.237974036391992e-05,
      "loss": 0.051,
      "step": 23080
    },
    {
      "epoch": 0.000140869140625,
      "model_forward_time": 0.11528515815734863,
      "step": 23080
    },
    {
      "epoch": 0.000140869140625,
      "step": 23080,
      "training_step_time": 0.3927938938140869
    },
    {
      "epoch": 0.000140875244140625,
      "model_forward_time": 0.11542391777038574,
      "step": 23081
    },
    {
      "epoch": 0.000140875244140625,
      "step": 23081,
      "training_step_time": 0.44089651107788086
    },
    {
      "epoch": 0.00014088134765625,
      "model_forward_time": 0.11584854125976562,
      "step": 23082
    },
    {
      "epoch": 0.00014088134765625,
      "step": 23082,
      "training_step_time": 0.3964524269104004
    },
    {
      "epoch": 0.000140887451171875,
      "model_forward_time": 0.11484122276306152,
      "step": 23083
    },
    {
      "epoch": 0.000140887451171875,
      "step": 23083,
      "training_step_time": 0.4344973564147949
    },
    {
      "epoch": 0.0001408935546875,
      "model_forward_time": 0.11505508422851562,
      "step": 23084
    },
    {
      "epoch": 0.0001408935546875,
      "step": 23084,
      "training_step_time": 0.38666844367980957
    },
    {
      "epoch": 0.000140899658203125,
      "model_forward_time": 0.11529183387756348,
      "step": 23085
    },
    {
      "epoch": 0.000140899658203125,
      "step": 23085,
      "training_step_time": 0.385761022567749
    },
    {
      "epoch": 0.00014090576171875,
      "model_forward_time": 0.1150064468383789,
      "step": 23086
    },
    {
      "epoch": 0.00014090576171875,
      "step": 23086,
      "training_step_time": 0.4003267288208008
    },
    {
      "epoch": 0.000140911865234375,
      "model_forward_time": 0.11488723754882812,
      "step": 23087
    },
    {
      "epoch": 0.000140911865234375,
      "step": 23087,
      "training_step_time": 0.457827091217041
    },
    {
      "epoch": 0.00014091796875,
      "model_forward_time": 0.11473560333251953,
      "step": 23088
    },
    {
      "epoch": 0.00014091796875,
      "step": 23088,
      "training_step_time": 0.4728357791900635
    },
    {
      "epoch": 0.000140924072265625,
      "model_forward_time": 0.11564993858337402,
      "step": 23089
    },
    {
      "epoch": 0.000140924072265625,
      "step": 23089,
      "training_step_time": 0.43603086471557617
    },
    {
      "epoch": 0.00014093017578125,
      "grad_norm": 0.19512246549129486,
      "learning_rate": 7.235509374872373e-05,
      "loss": 0.0485,
      "step": 23090
    },
    {
      "epoch": 0.00014093017578125,
      "model_forward_time": 0.11478471755981445,
      "step": 23090
    },
    {
      "epoch": 0.00014093017578125,
      "step": 23090,
      "training_step_time": 0.5052692890167236
    },
    {
      "epoch": 0.000140936279296875,
      "model_forward_time": 0.11509156227111816,
      "step": 23091
    },
    {
      "epoch": 0.000140936279296875,
      "step": 23091,
      "training_step_time": 0.40360021591186523
    },
    {
      "epoch": 0.0001409423828125,
      "model_forward_time": 0.11503958702087402,
      "step": 23092
    },
    {
      "epoch": 0.0001409423828125,
      "step": 23092,
      "training_step_time": 0.4000837802886963
    },
    {
      "epoch": 0.000140948486328125,
      "model_forward_time": 0.11499214172363281,
      "step": 23093
    },
    {
      "epoch": 0.000140948486328125,
      "step": 23093,
      "training_step_time": 0.3854820728302002
    },
    {
      "epoch": 0.00014095458984375,
      "model_forward_time": 0.11522769927978516,
      "step": 23094
    },
    {
      "epoch": 0.00014095458984375,
      "step": 23094,
      "training_step_time": 0.4051096439361572
    },
    {
      "epoch": 0.000140960693359375,
      "model_forward_time": 0.11595320701599121,
      "step": 23095
    },
    {
      "epoch": 0.000140960693359375,
      "step": 23095,
      "training_step_time": 0.3966023921966553
    },
    {
      "epoch": 0.000140966796875,
      "model_forward_time": 0.11504673957824707,
      "step": 23096
    },
    {
      "epoch": 0.000140966796875,
      "step": 23096,
      "training_step_time": 0.4050407409667969
    },
    {
      "epoch": 0.000140972900390625,
      "model_forward_time": 0.11524796485900879,
      "step": 23097
    },
    {
      "epoch": 0.000140972900390625,
      "step": 23097,
      "training_step_time": 0.4456143379211426
    },
    {
      "epoch": 0.00014097900390625,
      "model_forward_time": 0.11543989181518555,
      "step": 23098
    },
    {
      "epoch": 0.00014097900390625,
      "step": 23098,
      "training_step_time": 0.3904097080230713
    },
    {
      "epoch": 0.000140985107421875,
      "model_forward_time": 0.1146235466003418,
      "step": 23099
    },
    {
      "epoch": 0.000140985107421875,
      "step": 23099,
      "training_step_time": 0.388843297958374
    },
    {
      "epoch": 0.0001409912109375,
      "grad_norm": 0.16234342753887177,
      "learning_rate": 7.233044034264034e-05,
      "loss": 0.0509,
      "step": 23100
    },
    {
      "epoch": 0.0001409912109375,
      "model_forward_time": 0.1146993637084961,
      "step": 23100
    },
    {
      "epoch": 0.0001409912109375,
      "step": 23100,
      "training_step_time": 0.390545129776001
    },
    {
      "epoch": 0.000140997314453125,
      "model_forward_time": 0.11501812934875488,
      "step": 23101
    },
    {
      "epoch": 0.000140997314453125,
      "step": 23101,
      "training_step_time": 0.40790653228759766
    },
    {
      "epoch": 0.00014100341796875,
      "model_forward_time": 0.11574101448059082,
      "step": 23102
    },
    {
      "epoch": 0.00014100341796875,
      "step": 23102,
      "training_step_time": 0.42426013946533203
    },
    {
      "epoch": 0.000141009521484375,
      "model_forward_time": 0.1151125431060791,
      "step": 23103
    },
    {
      "epoch": 0.000141009521484375,
      "step": 23103,
      "training_step_time": 0.5159938335418701
    },
    {
      "epoch": 0.000141015625,
      "model_forward_time": 0.11571550369262695,
      "step": 23104
    },
    {
      "epoch": 0.000141015625,
      "step": 23104,
      "training_step_time": 0.43404197692871094
    },
    {
      "epoch": 0.000141021728515625,
      "model_forward_time": 0.11502671241760254,
      "step": 23105
    },
    {
      "epoch": 0.000141021728515625,
      "step": 23105,
      "training_step_time": 0.4090604782104492
    },
    {
      "epoch": 0.00014102783203125,
      "model_forward_time": 0.11506867408752441,
      "step": 23106
    },
    {
      "epoch": 0.00014102783203125,
      "step": 23106,
      "training_step_time": 0.41335225105285645
    },
    {
      "epoch": 0.000141033935546875,
      "model_forward_time": 0.11547660827636719,
      "step": 23107
    },
    {
      "epoch": 0.000141033935546875,
      "step": 23107,
      "training_step_time": 0.3894956111907959
    },
    {
      "epoch": 0.0001410400390625,
      "model_forward_time": 0.11542534828186035,
      "step": 23108
    },
    {
      "epoch": 0.0001410400390625,
      "step": 23108,
      "training_step_time": 0.3873276710510254
    },
    {
      "epoch": 0.000141046142578125,
      "model_forward_time": 0.11553215980529785,
      "step": 23109
    },
    {
      "epoch": 0.000141046142578125,
      "step": 23109,
      "training_step_time": 0.39574503898620605
    },
    {
      "epoch": 0.00014105224609375,
      "grad_norm": 0.12025989592075348,
      "learning_rate": 7.230578015315876e-05,
      "loss": 0.0468,
      "step": 23110
    },
    {
      "epoch": 0.00014105224609375,
      "model_forward_time": 0.1153721809387207,
      "step": 23110
    },
    {
      "epoch": 0.00014105224609375,
      "step": 23110,
      "training_step_time": 0.3857154846191406
    },
    {
      "epoch": 0.000141058349609375,
      "model_forward_time": 0.11484360694885254,
      "step": 23111
    },
    {
      "epoch": 0.000141058349609375,
      "step": 23111,
      "training_step_time": 0.45704102516174316
    },
    {
      "epoch": 0.000141064453125,
      "model_forward_time": 0.11547732353210449,
      "step": 23112
    },
    {
      "epoch": 0.000141064453125,
      "step": 23112,
      "training_step_time": 0.4646463394165039
    },
    {
      "epoch": 0.000141070556640625,
      "model_forward_time": 0.11475205421447754,
      "step": 23113
    },
    {
      "epoch": 0.000141070556640625,
      "step": 23113,
      "training_step_time": 0.39223527908325195
    },
    {
      "epoch": 0.00014107666015625,
      "model_forward_time": 0.11575770378112793,
      "step": 23114
    },
    {
      "epoch": 0.00014107666015625,
      "step": 23114,
      "training_step_time": 0.41254615783691406
    },
    {
      "epoch": 0.000141082763671875,
      "model_forward_time": 0.11443328857421875,
      "step": 23115
    },
    {
      "epoch": 0.000141082763671875,
      "step": 23115,
      "training_step_time": 0.41815948486328125
    },
    {
      "epoch": 0.0001410888671875,
      "model_forward_time": 0.1145780086517334,
      "step": 23116
    },
    {
      "epoch": 0.0001410888671875,
      "step": 23116,
      "training_step_time": 0.4277470111846924
    },
    {
      "epoch": 0.000141094970703125,
      "model_forward_time": 0.11457538604736328,
      "step": 23117
    },
    {
      "epoch": 0.000141094970703125,
      "step": 23117,
      "training_step_time": 0.400407075881958
    },
    {
      "epoch": 0.00014110107421875,
      "model_forward_time": 0.11518526077270508,
      "step": 23118
    },
    {
      "epoch": 0.00014110107421875,
      "step": 23118,
      "training_step_time": 0.3980834484100342
    },
    {
      "epoch": 0.000141107177734375,
      "model_forward_time": 0.11538887023925781,
      "step": 23119
    },
    {
      "epoch": 0.000141107177734375,
      "step": 23119,
      "training_step_time": 0.41904735565185547
    },
    {
      "epoch": 0.00014111328125,
      "grad_norm": 0.17597636580467224,
      "learning_rate": 7.22811131877701e-05,
      "loss": 0.05,
      "step": 23120
    },
    {
      "epoch": 0.00014111328125,
      "model_forward_time": 0.11555099487304688,
      "step": 23120
    },
    {
      "epoch": 0.00014111328125,
      "step": 23120,
      "training_step_time": 0.4862227439880371
    },
    {
      "epoch": 0.000141119384765625,
      "model_forward_time": 0.11539387702941895,
      "step": 23121
    },
    {
      "epoch": 0.000141119384765625,
      "step": 23121,
      "training_step_time": 0.3950169086456299
    },
    {
      "epoch": 0.00014112548828125,
      "model_forward_time": 0.11533045768737793,
      "step": 23122
    },
    {
      "epoch": 0.00014112548828125,
      "step": 23122,
      "training_step_time": 0.3731679916381836
    },
    {
      "epoch": 0.000141131591796875,
      "model_forward_time": 0.11475777626037598,
      "step": 23123
    },
    {
      "epoch": 0.000141131591796875,
      "step": 23123,
      "training_step_time": 0.39275121688842773
    },
    {
      "epoch": 0.0001411376953125,
      "model_forward_time": 0.11520147323608398,
      "step": 23124
    },
    {
      "epoch": 0.0001411376953125,
      "step": 23124,
      "training_step_time": 0.3884928226470947
    },
    {
      "epoch": 0.000141143798828125,
      "model_forward_time": 0.11484861373901367,
      "step": 23125
    },
    {
      "epoch": 0.000141143798828125,
      "step": 23125,
      "training_step_time": 0.4796786308288574
    },
    {
      "epoch": 0.00014114990234375,
      "model_forward_time": 0.11551356315612793,
      "step": 23126
    },
    {
      "epoch": 0.00014114990234375,
      "step": 23126,
      "training_step_time": 0.3986184597015381
    },
    {
      "epoch": 0.000141156005859375,
      "model_forward_time": 0.11528801918029785,
      "step": 23127
    },
    {
      "epoch": 0.000141156005859375,
      "step": 23127,
      "training_step_time": 0.3835313320159912
    },
    {
      "epoch": 0.000141162109375,
      "model_forward_time": 0.11509108543395996,
      "step": 23128
    },
    {
      "epoch": 0.000141162109375,
      "step": 23128,
      "training_step_time": 0.4310801029205322
    },
    {
      "epoch": 0.000141168212890625,
      "model_forward_time": 0.11480951309204102,
      "step": 23129
    },
    {
      "epoch": 0.000141168212890625,
      "step": 23129,
      "training_step_time": 0.40378642082214355
    },
    {
      "epoch": 0.00014117431640625,
      "grad_norm": 0.20420600473880768,
      "learning_rate": 7.225643945396757e-05,
      "loss": 0.0467,
      "step": 23130
    },
    {
      "epoch": 0.00014117431640625,
      "model_forward_time": 0.11507415771484375,
      "step": 23130
    },
    {
      "epoch": 0.00014117431640625,
      "step": 23130,
      "training_step_time": 0.40650248527526855
    },
    {
      "epoch": 0.000141180419921875,
      "model_forward_time": 0.11477279663085938,
      "step": 23131
    },
    {
      "epoch": 0.000141180419921875,
      "step": 23131,
      "training_step_time": 0.44683146476745605
    },
    {
      "epoch": 0.0001411865234375,
      "model_forward_time": 0.11568021774291992,
      "step": 23132
    },
    {
      "epoch": 0.0001411865234375,
      "step": 23132,
      "training_step_time": 0.39838266372680664
    },
    {
      "epoch": 0.000141192626953125,
      "model_forward_time": 0.11514902114868164,
      "step": 23133
    },
    {
      "epoch": 0.000141192626953125,
      "step": 23133,
      "training_step_time": 0.3818237781524658
    },
    {
      "epoch": 0.00014119873046875,
      "model_forward_time": 0.11522841453552246,
      "step": 23134
    },
    {
      "epoch": 0.00014119873046875,
      "step": 23134,
      "training_step_time": 0.4839012622833252
    },
    {
      "epoch": 0.000141204833984375,
      "model_forward_time": 0.11486673355102539,
      "step": 23135
    },
    {
      "epoch": 0.000141204833984375,
      "step": 23135,
      "training_step_time": 0.5012598037719727
    },
    {
      "epoch": 0.0001412109375,
      "model_forward_time": 0.11563324928283691,
      "step": 23136
    },
    {
      "epoch": 0.0001412109375,
      "step": 23136,
      "training_step_time": 0.3933999538421631
    },
    {
      "epoch": 0.000141217041015625,
      "model_forward_time": 0.11580991744995117,
      "step": 23137
    },
    {
      "epoch": 0.000141217041015625,
      "step": 23137,
      "training_step_time": 0.3838634490966797
    },
    {
      "epoch": 0.00014122314453125,
      "model_forward_time": 0.11561918258666992,
      "step": 23138
    },
    {
      "epoch": 0.00014122314453125,
      "step": 23138,
      "training_step_time": 0.38608837127685547
    },
    {
      "epoch": 0.000141229248046875,
      "model_forward_time": 0.11493468284606934,
      "step": 23139
    },
    {
      "epoch": 0.000141229248046875,
      "step": 23139,
      "training_step_time": 0.3916900157928467
    },
    {
      "epoch": 0.0001412353515625,
      "grad_norm": 0.1351895034313202,
      "learning_rate": 7.223175895924638e-05,
      "loss": 0.0476,
      "step": 23140
    },
    {
      "epoch": 0.0001412353515625,
      "model_forward_time": 0.11536169052124023,
      "step": 23140
    },
    {
      "epoch": 0.0001412353515625,
      "step": 23140,
      "training_step_time": 0.40395069122314453
    },
    {
      "epoch": 0.000141241455078125,
      "model_forward_time": 0.11485862731933594,
      "step": 23141
    },
    {
      "epoch": 0.000141241455078125,
      "step": 23141,
      "training_step_time": 0.39351654052734375
    },
    {
      "epoch": 0.00014124755859375,
      "model_forward_time": 0.11538934707641602,
      "step": 23142
    },
    {
      "epoch": 0.00014124755859375,
      "step": 23142,
      "training_step_time": 0.4625396728515625
    },
    {
      "epoch": 0.000141253662109375,
      "model_forward_time": 0.1153266429901123,
      "step": 23143
    },
    {
      "epoch": 0.000141253662109375,
      "step": 23143,
      "training_step_time": 0.4204072952270508
    },
    {
      "epoch": 0.000141259765625,
      "model_forward_time": 0.11522507667541504,
      "step": 23144
    },
    {
      "epoch": 0.000141259765625,
      "step": 23144,
      "training_step_time": 0.449709415435791
    },
    {
      "epoch": 0.000141265869140625,
      "model_forward_time": 0.11545419692993164,
      "step": 23145
    },
    {
      "epoch": 0.000141265869140625,
      "step": 23145,
      "training_step_time": 0.3922615051269531
    },
    {
      "epoch": 0.00014127197265625,
      "model_forward_time": 0.11606884002685547,
      "step": 23146
    },
    {
      "epoch": 0.00014127197265625,
      "step": 23146,
      "training_step_time": 0.3943476676940918
    },
    {
      "epoch": 0.000141278076171875,
      "model_forward_time": 0.11516904830932617,
      "step": 23147
    },
    {
      "epoch": 0.000141278076171875,
      "step": 23147,
      "training_step_time": 0.392287015914917
    },
    {
      "epoch": 0.0001412841796875,
      "model_forward_time": 0.1153876781463623,
      "step": 23148
    },
    {
      "epoch": 0.0001412841796875,
      "step": 23148,
      "training_step_time": 0.36386990547180176
    },
    {
      "epoch": 0.000141290283203125,
      "model_forward_time": 0.1155557632446289,
      "step": 23149
    },
    {
      "epoch": 0.000141290283203125,
      "step": 23149,
      "training_step_time": 0.5205926895141602
    },
    {
      "epoch": 0.00014129638671875,
      "grad_norm": 0.1350976824760437,
      "learning_rate": 7.220707171110382e-05,
      "loss": 0.0486,
      "step": 23150
    },
    {
      "epoch": 0.00014129638671875,
      "model_forward_time": 0.11556053161621094,
      "step": 23150
    },
    {
      "epoch": 0.00014129638671875,
      "step": 23150,
      "training_step_time": 0.46193742752075195
    },
    {
      "epoch": 0.000141302490234375,
      "model_forward_time": 0.11548137664794922,
      "step": 23151
    },
    {
      "epoch": 0.000141302490234375,
      "step": 23151,
      "training_step_time": 0.3873629570007324
    },
    {
      "epoch": 0.00014130859375,
      "model_forward_time": 0.11532425880432129,
      "step": 23152
    },
    {
      "epoch": 0.00014130859375,
      "step": 23152,
      "training_step_time": 0.3846619129180908
    },
    {
      "epoch": 0.000141314697265625,
      "model_forward_time": 0.1143941879272461,
      "step": 23153
    },
    {
      "epoch": 0.000141314697265625,
      "step": 23153,
      "training_step_time": 0.38568830490112305
    },
    {
      "epoch": 0.00014132080078125,
      "model_forward_time": 0.11516618728637695,
      "step": 23154
    },
    {
      "epoch": 0.00014132080078125,
      "step": 23154,
      "training_step_time": 0.4034707546234131
    },
    {
      "epoch": 0.000141326904296875,
      "model_forward_time": 0.11627960205078125,
      "step": 23155
    },
    {
      "epoch": 0.000141326904296875,
      "step": 23155,
      "training_step_time": 0.46061015129089355
    },
    {
      "epoch": 0.0001413330078125,
      "model_forward_time": 0.11512207984924316,
      "step": 23156
    },
    {
      "epoch": 0.0001413330078125,
      "step": 23156,
      "training_step_time": 0.4065730571746826
    },
    {
      "epoch": 0.000141339111328125,
      "model_forward_time": 0.11461472511291504,
      "step": 23157
    },
    {
      "epoch": 0.000141339111328125,
      "step": 23157,
      "training_step_time": 0.45250535011291504
    },
    {
      "epoch": 0.00014134521484375,
      "model_forward_time": 0.11525559425354004,
      "step": 23158
    },
    {
      "epoch": 0.00014134521484375,
      "step": 23158,
      "training_step_time": 0.48894762992858887
    },
    {
      "epoch": 0.000141351318359375,
      "model_forward_time": 0.1145472526550293,
      "step": 23159
    },
    {
      "epoch": 0.000141351318359375,
      "step": 23159,
      "training_step_time": 0.49466753005981445
    },
    {
      "epoch": 0.000141357421875,
      "grad_norm": 0.09704049676656723,
      "learning_rate": 7.218237771703921e-05,
      "loss": 0.0533,
      "step": 23160
    },
    {
      "epoch": 0.000141357421875,
      "model_forward_time": 0.11394166946411133,
      "step": 23160
    },
    {
      "epoch": 0.000141357421875,
      "step": 23160,
      "training_step_time": 0.38829541206359863
    },
    {
      "epoch": 0.000141363525390625,
      "model_forward_time": 0.1146090030670166,
      "step": 23161
    },
    {
      "epoch": 0.000141363525390625,
      "step": 23161,
      "training_step_time": 0.39280223846435547
    },
    {
      "epoch": 0.00014136962890625,
      "model_forward_time": 0.11415743827819824,
      "step": 23162
    },
    {
      "epoch": 0.00014136962890625,
      "step": 23162,
      "training_step_time": 0.36539435386657715
    },
    {
      "epoch": 0.000141375732421875,
      "model_forward_time": 0.11455893516540527,
      "step": 23163
    },
    {
      "epoch": 0.000141375732421875,
      "step": 23163,
      "training_step_time": 0.4259610176086426
    },
    {
      "epoch": 0.0001413818359375,
      "model_forward_time": 0.11534714698791504,
      "step": 23164
    },
    {
      "epoch": 0.0001413818359375,
      "step": 23164,
      "training_step_time": 0.42713165283203125
    },
    {
      "epoch": 0.000141387939453125,
      "model_forward_time": 0.11599206924438477,
      "step": 23165
    },
    {
      "epoch": 0.000141387939453125,
      "step": 23165,
      "training_step_time": 0.39215755462646484
    },
    {
      "epoch": 0.00014139404296875,
      "model_forward_time": 0.11460614204406738,
      "step": 23166
    },
    {
      "epoch": 0.00014139404296875,
      "step": 23166,
      "training_step_time": 0.40244412422180176
    },
    {
      "epoch": 0.000141400146484375,
      "model_forward_time": 0.1152195930480957,
      "step": 23167
    },
    {
      "epoch": 0.000141400146484375,
      "step": 23167,
      "training_step_time": 0.39763784408569336
    },
    {
      "epoch": 0.00014140625,
      "model_forward_time": 0.11535072326660156,
      "step": 23168
    },
    {
      "epoch": 0.00014140625,
      "step": 23168,
      "training_step_time": 0.39565110206604004
    },
    {
      "epoch": 0.000141412353515625,
      "model_forward_time": 0.11464548110961914,
      "step": 23169
    },
    {
      "epoch": 0.000141412353515625,
      "step": 23169,
      "training_step_time": 0.39966297149658203
    },
    {
      "epoch": 0.00014141845703125,
      "grad_norm": 0.11502549797296524,
      "learning_rate": 7.215767698455394e-05,
      "loss": 0.0455,
      "step": 23170
    },
    {
      "epoch": 0.00014141845703125,
      "model_forward_time": 0.1146385669708252,
      "step": 23170
    },
    {
      "epoch": 0.00014141845703125,
      "step": 23170,
      "training_step_time": 0.40623998641967773
    },
    {
      "epoch": 0.000141424560546875,
      "model_forward_time": 0.11565518379211426,
      "step": 23171
    },
    {
      "epoch": 0.000141424560546875,
      "step": 23171,
      "training_step_time": 0.5230379104614258
    },
    {
      "epoch": 0.0001414306640625,
      "model_forward_time": 0.11639022827148438,
      "step": 23172
    },
    {
      "epoch": 0.0001414306640625,
      "step": 23172,
      "training_step_time": 0.4640059471130371
    },
    {
      "epoch": 0.000141436767578125,
      "model_forward_time": 0.11442947387695312,
      "step": 23173
    },
    {
      "epoch": 0.000141436767578125,
      "step": 23173,
      "training_step_time": 0.4440121650695801
    },
    {
      "epoch": 0.00014144287109375,
      "model_forward_time": 0.11517095565795898,
      "step": 23174
    },
    {
      "epoch": 0.00014144287109375,
      "step": 23174,
      "training_step_time": 0.3909327983856201
    },
    {
      "epoch": 0.000141448974609375,
      "model_forward_time": 0.11495709419250488,
      "step": 23175
    },
    {
      "epoch": 0.000141448974609375,
      "step": 23175,
      "training_step_time": 0.39557337760925293
    },
    {
      "epoch": 0.000141455078125,
      "model_forward_time": 0.11466455459594727,
      "step": 23176
    },
    {
      "epoch": 0.000141455078125,
      "step": 23176,
      "training_step_time": 0.3872337341308594
    },
    {
      "epoch": 0.000141461181640625,
      "model_forward_time": 0.1144404411315918,
      "step": 23177
    },
    {
      "epoch": 0.000141461181640625,
      "step": 23177,
      "training_step_time": 0.45038771629333496
    },
    {
      "epoch": 0.00014146728515625,
      "model_forward_time": 0.11506319046020508,
      "step": 23178
    },
    {
      "epoch": 0.00014146728515625,
      "step": 23178,
      "training_step_time": 0.46028685569763184
    },
    {
      "epoch": 0.000141473388671875,
      "model_forward_time": 0.11548852920532227,
      "step": 23179
    },
    {
      "epoch": 0.000141473388671875,
      "step": 23179,
      "training_step_time": 0.41872668266296387
    },
    {
      "epoch": 0.0001414794921875,
      "grad_norm": 0.14295822381973267,
      "learning_rate": 7.213296952115144e-05,
      "loss": 0.0489,
      "step": 23180
    },
    {
      "epoch": 0.0001414794921875,
      "model_forward_time": 0.11537384986877441,
      "step": 23180
    },
    {
      "epoch": 0.0001414794921875,
      "step": 23180,
      "training_step_time": 0.3843519687652588
    },
    {
      "epoch": 0.000141485595703125,
      "model_forward_time": 0.11468195915222168,
      "step": 23181
    },
    {
      "epoch": 0.000141485595703125,
      "step": 23181,
      "training_step_time": 0.3804471492767334
    },
    {
      "epoch": 0.00014149169921875,
      "model_forward_time": 0.11561965942382812,
      "step": 23182
    },
    {
      "epoch": 0.00014149169921875,
      "step": 23182,
      "training_step_time": 0.4038212299346924
    },
    {
      "epoch": 0.000141497802734375,
      "model_forward_time": 0.11571192741394043,
      "step": 23183
    },
    {
      "epoch": 0.000141497802734375,
      "step": 23183,
      "training_step_time": 0.3921642303466797
    },
    {
      "epoch": 0.00014150390625,
      "model_forward_time": 0.11588335037231445,
      "step": 23184
    },
    {
      "epoch": 0.00014150390625,
      "step": 23184,
      "training_step_time": 0.4475536346435547
    },
    {
      "epoch": 0.000141510009765625,
      "model_forward_time": 0.11521387100219727,
      "step": 23185
    },
    {
      "epoch": 0.000141510009765625,
      "step": 23185,
      "training_step_time": 0.3994879722595215
    },
    {
      "epoch": 0.00014151611328125,
      "model_forward_time": 0.11519718170166016,
      "step": 23186
    },
    {
      "epoch": 0.00014151611328125,
      "step": 23186,
      "training_step_time": 0.4598517417907715
    },
    {
      "epoch": 0.000141522216796875,
      "model_forward_time": 0.11520028114318848,
      "step": 23187
    },
    {
      "epoch": 0.000141522216796875,
      "step": 23187,
      "training_step_time": 0.3927493095397949
    },
    {
      "epoch": 0.0001415283203125,
      "model_forward_time": 0.11552691459655762,
      "step": 23188
    },
    {
      "epoch": 0.0001415283203125,
      "step": 23188,
      "training_step_time": 0.4450645446777344
    },
    {
      "epoch": 0.000141534423828125,
      "model_forward_time": 0.1150209903717041,
      "step": 23189
    },
    {
      "epoch": 0.000141534423828125,
      "step": 23189,
      "training_step_time": 0.3939554691314697
    },
    {
      "epoch": 0.00014154052734375,
      "grad_norm": 0.15042872726917267,
      "learning_rate": 7.210825533433719e-05,
      "loss": 0.0417,
      "step": 23190
    },
    {
      "epoch": 0.00014154052734375,
      "model_forward_time": 0.11540722846984863,
      "step": 23190
    },
    {
      "epoch": 0.00014154052734375,
      "step": 23190,
      "training_step_time": 0.3953258991241455
    },
    {
      "epoch": 0.000141546630859375,
      "model_forward_time": 0.11461400985717773,
      "step": 23191
    },
    {
      "epoch": 0.000141546630859375,
      "step": 23191,
      "training_step_time": 0.4489445686340332
    },
    {
      "epoch": 0.000141552734375,
      "model_forward_time": 0.11520099639892578,
      "step": 23192
    },
    {
      "epoch": 0.000141552734375,
      "step": 23192,
      "training_step_time": 0.4660789966583252
    },
    {
      "epoch": 0.000141558837890625,
      "model_forward_time": 0.114990234375,
      "step": 23193
    },
    {
      "epoch": 0.000141558837890625,
      "step": 23193,
      "training_step_time": 0.5005450248718262
    },
    {
      "epoch": 0.00014156494140625,
      "model_forward_time": 0.11544299125671387,
      "step": 23194
    },
    {
      "epoch": 0.00014156494140625,
      "step": 23194,
      "training_step_time": 0.3792908191680908
    },
    {
      "epoch": 0.000141571044921875,
      "model_forward_time": 0.11442828178405762,
      "step": 23195
    },
    {
      "epoch": 0.000141571044921875,
      "step": 23195,
      "training_step_time": 0.3933281898498535
    },
    {
      "epoch": 0.0001415771484375,
      "model_forward_time": 0.11440372467041016,
      "step": 23196
    },
    {
      "epoch": 0.0001415771484375,
      "step": 23196,
      "training_step_time": 0.4064323902130127
    },
    {
      "epoch": 0.000141583251953125,
      "model_forward_time": 0.1156613826751709,
      "step": 23197
    },
    {
      "epoch": 0.000141583251953125,
      "step": 23197,
      "training_step_time": 0.4432528018951416
    },
    {
      "epoch": 0.00014158935546875,
      "model_forward_time": 0.1144258975982666,
      "step": 23198
    },
    {
      "epoch": 0.00014158935546875,
      "step": 23198,
      "training_step_time": 0.45020484924316406
    },
    {
      "epoch": 0.000141595458984375,
      "model_forward_time": 0.1151268482208252,
      "step": 23199
    },
    {
      "epoch": 0.000141595458984375,
      "step": 23199,
      "training_step_time": 0.404099702835083
    },
    {
      "epoch": 0.0001416015625,
      "grad_norm": 0.19184476137161255,
      "learning_rate": 7.20835344316187e-05,
      "loss": 0.0501,
      "step": 23200
    },
    {
      "epoch": 0.0001416015625,
      "model_forward_time": 0.11542320251464844,
      "step": 23200
    },
    {
      "epoch": 0.0001416015625,
      "step": 23200,
      "training_step_time": 0.5116550922393799
    },
    {
      "epoch": 0.000141607666015625,
      "model_forward_time": 0.11547732353210449,
      "step": 23201
    },
    {
      "epoch": 0.000141607666015625,
      "step": 23201,
      "training_step_time": 0.39587998390197754
    },
    {
      "epoch": 0.00014161376953125,
      "model_forward_time": 0.11454939842224121,
      "step": 23202
    },
    {
      "epoch": 0.00014161376953125,
      "step": 23202,
      "training_step_time": 0.4328129291534424
    },
    {
      "epoch": 0.000141619873046875,
      "model_forward_time": 0.1158592700958252,
      "step": 23203
    },
    {
      "epoch": 0.000141619873046875,
      "step": 23203,
      "training_step_time": 0.3908274173736572
    },
    {
      "epoch": 0.0001416259765625,
      "model_forward_time": 0.11508989334106445,
      "step": 23204
    },
    {
      "epoch": 0.0001416259765625,
      "step": 23204,
      "training_step_time": 0.40524792671203613
    },
    {
      "epoch": 0.000141632080078125,
      "model_forward_time": 0.11524391174316406,
      "step": 23205
    },
    {
      "epoch": 0.000141632080078125,
      "step": 23205,
      "training_step_time": 0.3916158676147461
    },
    {
      "epoch": 0.00014163818359375,
      "model_forward_time": 0.11578226089477539,
      "step": 23206
    },
    {
      "epoch": 0.00014163818359375,
      "step": 23206,
      "training_step_time": 0.42072582244873047
    },
    {
      "epoch": 0.000141644287109375,
      "model_forward_time": 0.11546659469604492,
      "step": 23207
    },
    {
      "epoch": 0.000141644287109375,
      "step": 23207,
      "training_step_time": 0.39104151725769043
    },
    {
      "epoch": 0.000141650390625,
      "model_forward_time": 0.1144263744354248,
      "step": 23208
    },
    {
      "epoch": 0.000141650390625,
      "step": 23208,
      "training_step_time": 0.4331960678100586
    },
    {
      "epoch": 0.000141656494140625,
      "model_forward_time": 0.11614847183227539,
      "step": 23209
    },
    {
      "epoch": 0.000141656494140625,
      "step": 23209,
      "training_step_time": 0.4011499881744385
    },
    {
      "epoch": 0.00014166259765625,
      "grad_norm": 0.1389080137014389,
      "learning_rate": 7.205880682050554e-05,
      "loss": 0.0542,
      "step": 23210
    },
    {
      "epoch": 0.00014166259765625,
      "model_forward_time": 0.11468338966369629,
      "step": 23210
    },
    {
      "epoch": 0.00014166259765625,
      "step": 23210,
      "training_step_time": 0.3951394557952881
    },
    {
      "epoch": 0.000141668701171875,
      "model_forward_time": 0.11479401588439941,
      "step": 23211
    },
    {
      "epoch": 0.000141668701171875,
      "step": 23211,
      "training_step_time": 0.39888954162597656
    },
    {
      "epoch": 0.0001416748046875,
      "model_forward_time": 0.11539959907531738,
      "step": 23212
    },
    {
      "epoch": 0.0001416748046875,
      "step": 23212,
      "training_step_time": 0.39316868782043457
    },
    {
      "epoch": 0.000141680908203125,
      "model_forward_time": 0.11557197570800781,
      "step": 23213
    },
    {
      "epoch": 0.000141680908203125,
      "step": 23213,
      "training_step_time": 0.42481470108032227
    },
    {
      "epoch": 0.00014168701171875,
      "model_forward_time": 0.11479640007019043,
      "step": 23214
    },
    {
      "epoch": 0.00014168701171875,
      "step": 23214,
      "training_step_time": 0.403609037399292
    },
    {
      "epoch": 0.000141693115234375,
      "model_forward_time": 0.115570068359375,
      "step": 23215
    },
    {
      "epoch": 0.000141693115234375,
      "step": 23215,
      "training_step_time": 0.49739885330200195
    },
    {
      "epoch": 0.00014169921875,
      "model_forward_time": 0.11569666862487793,
      "step": 23216
    },
    {
      "epoch": 0.00014169921875,
      "step": 23216,
      "training_step_time": 0.4917628765106201
    },
    {
      "epoch": 0.000141705322265625,
      "model_forward_time": 0.1147763729095459,
      "step": 23217
    },
    {
      "epoch": 0.000141705322265625,
      "step": 23217,
      "training_step_time": 0.3884594440460205
    },
    {
      "epoch": 0.00014171142578125,
      "model_forward_time": 0.11571407318115234,
      "step": 23218
    },
    {
      "epoch": 0.00014171142578125,
      "step": 23218,
      "training_step_time": 0.3799729347229004
    },
    {
      "epoch": 0.000141717529296875,
      "model_forward_time": 0.11486434936523438,
      "step": 23219
    },
    {
      "epoch": 0.000141717529296875,
      "step": 23219,
      "training_step_time": 0.3830690383911133
    },
    {
      "epoch": 0.0001417236328125,
      "grad_norm": 0.16776400804519653,
      "learning_rate": 7.203407250850928e-05,
      "loss": 0.0473,
      "step": 23220
    },
    {
      "epoch": 0.0001417236328125,
      "model_forward_time": 0.1150503158569336,
      "step": 23220
    },
    {
      "epoch": 0.0001417236328125,
      "step": 23220,
      "training_step_time": 0.4006974697113037
    },
    {
      "epoch": 0.000141729736328125,
      "model_forward_time": 0.11573290824890137,
      "step": 23221
    },
    {
      "epoch": 0.000141729736328125,
      "step": 23221,
      "training_step_time": 0.47681617736816406
    },
    {
      "epoch": 0.00014173583984375,
      "model_forward_time": 0.11540746688842773,
      "step": 23222
    },
    {
      "epoch": 0.00014173583984375,
      "step": 23222,
      "training_step_time": 0.477832555770874
    },
    {
      "epoch": 0.000141741943359375,
      "model_forward_time": 0.11498880386352539,
      "step": 23223
    },
    {
      "epoch": 0.000141741943359375,
      "step": 23223,
      "training_step_time": 0.4455299377441406
    },
    {
      "epoch": 0.000141748046875,
      "model_forward_time": 0.11519193649291992,
      "step": 23224
    },
    {
      "epoch": 0.000141748046875,
      "step": 23224,
      "training_step_time": 0.38738131523132324
    },
    {
      "epoch": 0.000141754150390625,
      "model_forward_time": 0.11496520042419434,
      "step": 23225
    },
    {
      "epoch": 0.000141754150390625,
      "step": 23225,
      "training_step_time": 0.4470794200897217
    },
    {
      "epoch": 0.00014176025390625,
      "model_forward_time": 0.11511015892028809,
      "step": 23226
    },
    {
      "epoch": 0.00014176025390625,
      "step": 23226,
      "training_step_time": 0.42395758628845215
    },
    {
      "epoch": 0.000141766357421875,
      "model_forward_time": 0.11446571350097656,
      "step": 23227
    },
    {
      "epoch": 0.000141766357421875,
      "step": 23227,
      "training_step_time": 0.4122917652130127
    },
    {
      "epoch": 0.0001417724609375,
      "model_forward_time": 0.11558341979980469,
      "step": 23228
    },
    {
      "epoch": 0.0001417724609375,
      "step": 23228,
      "training_step_time": 0.41614651679992676
    },
    {
      "epoch": 0.000141778564453125,
      "model_forward_time": 0.11538362503051758,
      "step": 23229
    },
    {
      "epoch": 0.000141778564453125,
      "step": 23229,
      "training_step_time": 0.4879441261291504
    },
    {
      "epoch": 0.00014178466796875,
      "grad_norm": 0.16489703953266144,
      "learning_rate": 7.20093315031436e-05,
      "loss": 0.0542,
      "step": 23230
    },
    {
      "epoch": 0.00014178466796875,
      "model_forward_time": 0.11505985260009766,
      "step": 23230
    },
    {
      "epoch": 0.00014178466796875,
      "step": 23230,
      "training_step_time": 0.4372391700744629
    },
    {
      "epoch": 0.000141790771484375,
      "model_forward_time": 0.1147165298461914,
      "step": 23231
    },
    {
      "epoch": 0.000141790771484375,
      "step": 23231,
      "training_step_time": 0.39305734634399414
    },
    {
      "epoch": 0.000141796875,
      "model_forward_time": 0.11552214622497559,
      "step": 23232
    },
    {
      "epoch": 0.000141796875,
      "step": 23232,
      "training_step_time": 0.40891003608703613
    },
    {
      "epoch": 0.000141802978515625,
      "model_forward_time": 0.1148681640625,
      "step": 23233
    },
    {
      "epoch": 0.000141802978515625,
      "step": 23233,
      "training_step_time": 0.39582157135009766
    },
    {
      "epoch": 0.00014180908203125,
      "model_forward_time": 0.11496782302856445,
      "step": 23234
    },
    {
      "epoch": 0.00014180908203125,
      "step": 23234,
      "training_step_time": 0.3925666809082031
    },
    {
      "epoch": 0.000141815185546875,
      "model_forward_time": 0.11496376991271973,
      "step": 23235
    },
    {
      "epoch": 0.000141815185546875,
      "step": 23235,
      "training_step_time": 0.38527846336364746
    },
    {
      "epoch": 0.0001418212890625,
      "model_forward_time": 0.1151726245880127,
      "step": 23236
    },
    {
      "epoch": 0.0001418212890625,
      "step": 23236,
      "training_step_time": 0.4327540397644043
    },
    {
      "epoch": 0.000141827392578125,
      "model_forward_time": 0.11515116691589355,
      "step": 23237
    },
    {
      "epoch": 0.000141827392578125,
      "step": 23237,
      "training_step_time": 0.3983128070831299
    },
    {
      "epoch": 0.00014183349609375,
      "model_forward_time": 0.11515283584594727,
      "step": 23238
    },
    {
      "epoch": 0.00014183349609375,
      "step": 23238,
      "training_step_time": 0.41384100914001465
    },
    {
      "epoch": 0.000141839599609375,
      "model_forward_time": 0.11520886421203613,
      "step": 23239
    },
    {
      "epoch": 0.000141839599609375,
      "step": 23239,
      "training_step_time": 0.4732480049133301
    },
    {
      "epoch": 0.000141845703125,
      "grad_norm": 0.13688071072101593,
      "learning_rate": 7.19845838119241e-05,
      "loss": 0.0461,
      "step": 23240
    },
    {
      "epoch": 0.000141845703125,
      "model_forward_time": 0.11478757858276367,
      "step": 23240
    },
    {
      "epoch": 0.000141845703125,
      "step": 23240,
      "training_step_time": 0.3865795135498047
    },
    {
      "epoch": 0.000141851806640625,
      "model_forward_time": 0.11513233184814453,
      "step": 23241
    },
    {
      "epoch": 0.000141851806640625,
      "step": 23241,
      "training_step_time": 0.39209651947021484
    },
    {
      "epoch": 0.00014185791015625,
      "model_forward_time": 0.11484479904174805,
      "step": 23242
    },
    {
      "epoch": 0.00014185791015625,
      "step": 23242,
      "training_step_time": 0.4016876220703125
    },
    {
      "epoch": 0.000141864013671875,
      "model_forward_time": 0.11484694480895996,
      "step": 23243
    },
    {
      "epoch": 0.000141864013671875,
      "step": 23243,
      "training_step_time": 0.39856767654418945
    },
    {
      "epoch": 0.0001418701171875,
      "model_forward_time": 0.11571073532104492,
      "step": 23244
    },
    {
      "epoch": 0.0001418701171875,
      "step": 23244,
      "training_step_time": 0.48039674758911133
    },
    {
      "epoch": 0.000141876220703125,
      "model_forward_time": 0.11557292938232422,
      "step": 23245
    },
    {
      "epoch": 0.000141876220703125,
      "step": 23245,
      "training_step_time": 0.42611002922058105
    },
    {
      "epoch": 0.00014188232421875,
      "model_forward_time": 0.11481189727783203,
      "step": 23246
    },
    {
      "epoch": 0.00014188232421875,
      "step": 23246,
      "training_step_time": 0.39171266555786133
    },
    {
      "epoch": 0.000141888427734375,
      "model_forward_time": 0.1153099536895752,
      "step": 23247
    },
    {
      "epoch": 0.000141888427734375,
      "step": 23247,
      "training_step_time": 0.3855173587799072
    },
    {
      "epoch": 0.00014189453125,
      "model_forward_time": 0.11603951454162598,
      "step": 23248
    },
    {
      "epoch": 0.00014189453125,
      "step": 23248,
      "training_step_time": 0.39838290214538574
    },
    {
      "epoch": 0.000141900634765625,
      "model_forward_time": 0.11479926109313965,
      "step": 23249
    },
    {
      "epoch": 0.000141900634765625,
      "step": 23249,
      "training_step_time": 0.39963483810424805
    },
    {
      "epoch": 0.00014190673828125,
      "grad_norm": 0.16559065878391266,
      "learning_rate": 7.195982944236851e-05,
      "loss": 0.0556,
      "step": 23250
    },
    {
      "epoch": 0.00014190673828125,
      "model_forward_time": 0.1154780387878418,
      "step": 23250
    },
    {
      "epoch": 0.00014190673828125,
      "step": 23250,
      "training_step_time": 0.40937376022338867
    },
    {
      "epoch": 0.000141912841796875,
      "model_forward_time": 0.1149742603302002,
      "step": 23251
    },
    {
      "epoch": 0.000141912841796875,
      "step": 23251,
      "training_step_time": 0.48956727981567383
    },
    {
      "epoch": 0.0001419189453125,
      "model_forward_time": 0.11483097076416016,
      "step": 23252
    },
    {
      "epoch": 0.0001419189453125,
      "step": 23252,
      "training_step_time": 0.501183032989502
    },
    {
      "epoch": 0.000141925048828125,
      "model_forward_time": 0.1153104305267334,
      "step": 23253
    },
    {
      "epoch": 0.000141925048828125,
      "step": 23253,
      "training_step_time": 0.41544079780578613
    },
    {
      "epoch": 0.00014193115234375,
      "model_forward_time": 0.11532306671142578,
      "step": 23254
    },
    {
      "epoch": 0.00014193115234375,
      "step": 23254,
      "training_step_time": 0.39566969871520996
    },
    {
      "epoch": 0.000141937255859375,
      "model_forward_time": 0.11570882797241211,
      "step": 23255
    },
    {
      "epoch": 0.000141937255859375,
      "step": 23255,
      "training_step_time": 0.39960789680480957
    },
    {
      "epoch": 0.000141943359375,
      "model_forward_time": 0.11508679389953613,
      "step": 23256
    },
    {
      "epoch": 0.000141943359375,
      "step": 23256,
      "training_step_time": 0.40099382400512695
    },
    {
      "epoch": 0.000141949462890625,
      "model_forward_time": 0.1152338981628418,
      "step": 23257
    },
    {
      "epoch": 0.000141949462890625,
      "step": 23257,
      "training_step_time": 0.40813779830932617
    },
    {
      "epoch": 0.00014195556640625,
      "model_forward_time": 0.1152195930480957,
      "step": 23258
    },
    {
      "epoch": 0.00014195556640625,
      "step": 23258,
      "training_step_time": 0.5375409126281738
    },
    {
      "epoch": 0.000141961669921875,
      "model_forward_time": 0.11602663993835449,
      "step": 23259
    },
    {
      "epoch": 0.000141961669921875,
      "step": 23259,
      "training_step_time": 0.4363408088684082
    },
    {
      "epoch": 0.0001419677734375,
      "grad_norm": 0.15935857594013214,
      "learning_rate": 7.193506840199657e-05,
      "loss": 0.0537,
      "step": 23260
    },
    {
      "epoch": 0.0001419677734375,
      "model_forward_time": 0.11514115333557129,
      "step": 23260
    },
    {
      "epoch": 0.0001419677734375,
      "step": 23260,
      "training_step_time": 0.47498154640197754
    },
    {
      "epoch": 0.000141973876953125,
      "model_forward_time": 0.11489057540893555,
      "step": 23261
    },
    {
      "epoch": 0.000141973876953125,
      "step": 23261,
      "training_step_time": 0.3948853015899658
    },
    {
      "epoch": 0.00014197998046875,
      "model_forward_time": 0.11644768714904785,
      "step": 23262
    },
    {
      "epoch": 0.00014197998046875,
      "step": 23262,
      "training_step_time": 0.3924121856689453
    },
    {
      "epoch": 0.000141986083984375,
      "model_forward_time": 0.11460685729980469,
      "step": 23263
    },
    {
      "epoch": 0.000141986083984375,
      "step": 23263,
      "training_step_time": 0.39195704460144043
    },
    {
      "epoch": 0.0001419921875,
      "model_forward_time": 0.11534309387207031,
      "step": 23264
    },
    {
      "epoch": 0.0001419921875,
      "step": 23264,
      "training_step_time": 0.3836939334869385
    },
    {
      "epoch": 0.000141998291015625,
      "model_forward_time": 0.11564874649047852,
      "step": 23265
    },
    {
      "epoch": 0.000141998291015625,
      "step": 23265,
      "training_step_time": 0.3826727867126465
    },
    {
      "epoch": 0.00014200439453125,
      "model_forward_time": 0.11639237403869629,
      "step": 23266
    },
    {
      "epoch": 0.00014200439453125,
      "step": 23266,
      "training_step_time": 0.40636301040649414
    },
    {
      "epoch": 0.000142010498046875,
      "model_forward_time": 0.1162254810333252,
      "step": 23267
    },
    {
      "epoch": 0.000142010498046875,
      "step": 23267,
      "training_step_time": 0.4286470413208008
    },
    {
      "epoch": 0.0001420166015625,
      "model_forward_time": 0.11519002914428711,
      "step": 23268
    },
    {
      "epoch": 0.0001420166015625,
      "step": 23268,
      "training_step_time": 0.4547853469848633
    },
    {
      "epoch": 0.000142022705078125,
      "model_forward_time": 0.11635160446166992,
      "step": 23269
    },
    {
      "epoch": 0.000142022705078125,
      "step": 23269,
      "training_step_time": 0.39113616943359375
    },
    {
      "epoch": 0.00014202880859375,
      "grad_norm": 0.17614996433258057,
      "learning_rate": 7.191030069833001e-05,
      "loss": 0.047,
      "step": 23270
    },
    {
      "epoch": 0.00014202880859375,
      "model_forward_time": 0.11494255065917969,
      "step": 23270
    },
    {
      "epoch": 0.00014202880859375,
      "step": 23270,
      "training_step_time": 0.39178466796875
    },
    {
      "epoch": 0.000142034912109375,
      "model_forward_time": 0.11475634574890137,
      "step": 23271
    },
    {
      "epoch": 0.000142034912109375,
      "step": 23271,
      "training_step_time": 0.40840744972229004
    },
    {
      "epoch": 0.000142041015625,
      "model_forward_time": 0.11474776268005371,
      "step": 23272
    },
    {
      "epoch": 0.000142041015625,
      "step": 23272,
      "training_step_time": 0.3905832767486572
    },
    {
      "epoch": 0.000142047119140625,
      "model_forward_time": 0.11516118049621582,
      "step": 23273
    },
    {
      "epoch": 0.000142047119140625,
      "step": 23273,
      "training_step_time": 0.44704127311706543
    },
    {
      "epoch": 0.00014205322265625,
      "model_forward_time": 0.11568808555603027,
      "step": 23274
    },
    {
      "epoch": 0.00014205322265625,
      "step": 23274,
      "training_step_time": 0.501854658126831
    },
    {
      "epoch": 0.000142059326171875,
      "model_forward_time": 0.11563491821289062,
      "step": 23275
    },
    {
      "epoch": 0.000142059326171875,
      "step": 23275,
      "training_step_time": 0.3964526653289795
    },
    {
      "epoch": 0.0001420654296875,
      "model_forward_time": 0.11530685424804688,
      "step": 23276
    },
    {
      "epoch": 0.0001420654296875,
      "step": 23276,
      "training_step_time": 0.39003586769104004
    },
    {
      "epoch": 0.000142071533203125,
      "model_forward_time": 0.11490893363952637,
      "step": 23277
    },
    {
      "epoch": 0.000142071533203125,
      "step": 23277,
      "training_step_time": 0.38557887077331543
    },
    {
      "epoch": 0.00014207763671875,
      "model_forward_time": 0.11605024337768555,
      "step": 23278
    },
    {
      "epoch": 0.00014207763671875,
      "step": 23278,
      "training_step_time": 0.37227439880371094
    },
    {
      "epoch": 0.000142083740234375,
      "model_forward_time": 0.11464834213256836,
      "step": 23279
    },
    {
      "epoch": 0.000142083740234375,
      "step": 23279,
      "training_step_time": 0.39460110664367676
    },
    {
      "epoch": 0.00014208984375,
      "grad_norm": 0.12663774192333221,
      "learning_rate": 7.188552633889259e-05,
      "loss": 0.0471,
      "step": 23280
    },
    {
      "epoch": 0.00014208984375,
      "model_forward_time": 0.11525082588195801,
      "step": 23280
    },
    {
      "epoch": 0.00014208984375,
      "step": 23280,
      "training_step_time": 0.4530000686645508
    },
    {
      "epoch": 0.000142095947265625,
      "model_forward_time": 0.11559057235717773,
      "step": 23281
    },
    {
      "epoch": 0.000142095947265625,
      "step": 23281,
      "training_step_time": 0.3994755744934082
    },
    {
      "epoch": 0.00014210205078125,
      "model_forward_time": 0.1158592700958252,
      "step": 23282
    },
    {
      "epoch": 0.00014210205078125,
      "step": 23282,
      "training_step_time": 0.5137956142425537
    },
    {
      "epoch": 0.000142108154296875,
      "model_forward_time": 0.11571621894836426,
      "step": 23283
    },
    {
      "epoch": 0.000142108154296875,
      "step": 23283,
      "training_step_time": 0.4113750457763672
    },
    {
      "epoch": 0.0001421142578125,
      "model_forward_time": 0.11555695533752441,
      "step": 23284
    },
    {
      "epoch": 0.0001421142578125,
      "step": 23284,
      "training_step_time": 0.3884880542755127
    },
    {
      "epoch": 0.000142120361328125,
      "model_forward_time": 0.11555099487304688,
      "step": 23285
    },
    {
      "epoch": 0.000142120361328125,
      "step": 23285,
      "training_step_time": 0.39786577224731445
    },
    {
      "epoch": 0.00014212646484375,
      "model_forward_time": 0.11551856994628906,
      "step": 23286
    },
    {
      "epoch": 0.00014212646484375,
      "step": 23286,
      "training_step_time": 0.38349008560180664
    },
    {
      "epoch": 0.000142132568359375,
      "model_forward_time": 0.1154789924621582,
      "step": 23287
    },
    {
      "epoch": 0.000142132568359375,
      "step": 23287,
      "training_step_time": 0.42400264739990234
    },
    {
      "epoch": 0.000142138671875,
      "model_forward_time": 0.11690306663513184,
      "step": 23288
    },
    {
      "epoch": 0.000142138671875,
      "step": 23288,
      "training_step_time": 0.40581321716308594
    },
    {
      "epoch": 0.000142144775390625,
      "model_forward_time": 0.1151437759399414,
      "step": 23289
    },
    {
      "epoch": 0.000142144775390625,
      "step": 23289,
      "training_step_time": 0.4324352741241455
    },
    {
      "epoch": 0.00014215087890625,
      "grad_norm": 0.14593107998371124,
      "learning_rate": 7.186074533121013e-05,
      "loss": 0.0478,
      "step": 23290
    },
    {
      "epoch": 0.00014215087890625,
      "model_forward_time": 0.11487817764282227,
      "step": 23290
    },
    {
      "epoch": 0.00014215087890625,
      "step": 23290,
      "training_step_time": 0.3829491138458252
    },
    {
      "epoch": 0.000142156982421875,
      "model_forward_time": 0.11600947380065918,
      "step": 23291
    },
    {
      "epoch": 0.000142156982421875,
      "step": 23291,
      "training_step_time": 0.388258695602417
    },
    {
      "epoch": 0.0001421630859375,
      "model_forward_time": 0.11541104316711426,
      "step": 23292
    },
    {
      "epoch": 0.0001421630859375,
      "step": 23292,
      "training_step_time": 0.4035768508911133
    },
    {
      "epoch": 0.000142169189453125,
      "model_forward_time": 0.11454534530639648,
      "step": 23293
    },
    {
      "epoch": 0.000142169189453125,
      "step": 23293,
      "training_step_time": 0.3943021297454834
    },
    {
      "epoch": 0.00014217529296875,
      "model_forward_time": 0.11544013023376465,
      "step": 23294
    },
    {
      "epoch": 0.00014217529296875,
      "step": 23294,
      "training_step_time": 0.42249131202697754
    },
    {
      "epoch": 0.000142181396484375,
      "model_forward_time": 0.11505937576293945,
      "step": 23295
    },
    {
      "epoch": 0.000142181396484375,
      "step": 23295,
      "training_step_time": 0.40981602668762207
    },
    {
      "epoch": 0.0001421875,
      "model_forward_time": 0.11621928215026855,
      "step": 23296
    },
    {
      "epoch": 0.0001421875,
      "step": 23296,
      "training_step_time": 0.4136676788330078
    },
    {
      "epoch": 0.000142193603515625,
      "model_forward_time": 0.11616635322570801,
      "step": 23297
    },
    {
      "epoch": 0.000142193603515625,
      "step": 23297,
      "training_step_time": 0.4579789638519287
    },
    {
      "epoch": 0.00014219970703125,
      "model_forward_time": 0.11583256721496582,
      "step": 23298
    },
    {
      "epoch": 0.00014219970703125,
      "step": 23298,
      "training_step_time": 0.48270082473754883
    },
    {
      "epoch": 0.000142205810546875,
      "model_forward_time": 0.11561226844787598,
      "step": 23299
    },
    {
      "epoch": 0.000142205810546875,
      "step": 23299,
      "training_step_time": 0.3993558883666992
    },
    {
      "epoch": 0.0001422119140625,
      "grad_norm": 0.09837280958890915,
      "learning_rate": 7.183595768281043e-05,
      "loss": 0.0467,
      "step": 23300
    },
    {
      "epoch": 0.0001422119140625,
      "model_forward_time": 0.11481738090515137,
      "step": 23300
    },
    {
      "epoch": 0.0001422119140625,
      "step": 23300,
      "training_step_time": 0.38733720779418945
    },
    {
      "epoch": 0.000142218017578125,
      "model_forward_time": 0.11763119697570801,
      "step": 23301
    },
    {
      "epoch": 0.000142218017578125,
      "step": 23301,
      "training_step_time": 0.3925797939300537
    },
    {
      "epoch": 0.00014222412109375,
      "model_forward_time": 0.11545348167419434,
      "step": 23302
    },
    {
      "epoch": 0.00014222412109375,
      "step": 23302,
      "training_step_time": 0.48944091796875
    },
    {
      "epoch": 0.000142230224609375,
      "model_forward_time": 0.11525082588195801,
      "step": 23303
    },
    {
      "epoch": 0.000142230224609375,
      "step": 23303,
      "training_step_time": 0.4329650402069092
    },
    {
      "epoch": 0.000142236328125,
      "model_forward_time": 0.1152350902557373,
      "step": 23304
    },
    {
      "epoch": 0.000142236328125,
      "step": 23304,
      "training_step_time": 0.5273556709289551
    },
    {
      "epoch": 0.000142242431640625,
      "model_forward_time": 0.11381340026855469,
      "step": 23305
    },
    {
      "epoch": 0.000142242431640625,
      "step": 23305,
      "training_step_time": 0.3972797393798828
    },
    {
      "epoch": 0.00014224853515625,
      "model_forward_time": 0.11494731903076172,
      "step": 23306
    },
    {
      "epoch": 0.00014224853515625,
      "step": 23306,
      "training_step_time": 0.39156174659729004
    },
    {
      "epoch": 0.000142254638671875,
      "model_forward_time": 0.11506891250610352,
      "step": 23307
    },
    {
      "epoch": 0.000142254638671875,
      "step": 23307,
      "training_step_time": 0.3876771926879883
    },
    {
      "epoch": 0.0001422607421875,
      "model_forward_time": 0.11479496955871582,
      "step": 23308
    },
    {
      "epoch": 0.0001422607421875,
      "step": 23308,
      "training_step_time": 0.3967435359954834
    },
    {
      "epoch": 0.000142266845703125,
      "model_forward_time": 0.11504793167114258,
      "step": 23309
    },
    {
      "epoch": 0.000142266845703125,
      "step": 23309,
      "training_step_time": 0.39343714714050293
    },
    {
      "epoch": 0.00014227294921875,
      "grad_norm": 0.09561000764369965,
      "learning_rate": 7.181116340122336e-05,
      "loss": 0.044,
      "step": 23310
    },
    {
      "epoch": 0.00014227294921875,
      "model_forward_time": 0.11518073081970215,
      "step": 23310
    },
    {
      "epoch": 0.00014227294921875,
      "step": 23310,
      "training_step_time": 0.4907851219177246
    },
    {
      "epoch": 0.000142279052734375,
      "model_forward_time": 0.11494231224060059,
      "step": 23311
    },
    {
      "epoch": 0.000142279052734375,
      "step": 23311,
      "training_step_time": 0.4665834903717041
    },
    {
      "epoch": 0.00014228515625,
      "model_forward_time": 0.11525607109069824,
      "step": 23312
    },
    {
      "epoch": 0.00014228515625,
      "step": 23312,
      "training_step_time": 0.4244227409362793
    },
    {
      "epoch": 0.000142291259765625,
      "model_forward_time": 0.11544919013977051,
      "step": 23313
    },
    {
      "epoch": 0.000142291259765625,
      "step": 23313,
      "training_step_time": 0.3793365955352783
    },
    {
      "epoch": 0.00014229736328125,
      "model_forward_time": 0.11594033241271973,
      "step": 23314
    },
    {
      "epoch": 0.00014229736328125,
      "step": 23314,
      "training_step_time": 0.377699613571167
    },
    {
      "epoch": 0.000142303466796875,
      "model_forward_time": 0.11421418190002441,
      "step": 23315
    },
    {
      "epoch": 0.000142303466796875,
      "step": 23315,
      "training_step_time": 0.46199822425842285
    },
    {
      "epoch": 0.0001423095703125,
      "model_forward_time": 0.11643481254577637,
      "step": 23316
    },
    {
      "epoch": 0.0001423095703125,
      "step": 23316,
      "training_step_time": 0.5409581661224365
    },
    {
      "epoch": 0.000142315673828125,
      "model_forward_time": 0.11585307121276855,
      "step": 23317
    },
    {
      "epoch": 0.000142315673828125,
      "step": 23317,
      "training_step_time": 0.46381354331970215
    },
    {
      "epoch": 0.00014232177734375,
      "model_forward_time": 0.11489129066467285,
      "step": 23318
    },
    {
      "epoch": 0.00014232177734375,
      "step": 23318,
      "training_step_time": 0.3886723518371582
    },
    {
      "epoch": 0.000142327880859375,
      "model_forward_time": 0.11513018608093262,
      "step": 23319
    },
    {
      "epoch": 0.000142327880859375,
      "step": 23319,
      "training_step_time": 0.38457703590393066
    },
    {
      "epoch": 0.000142333984375,
      "grad_norm": 0.19278094172477722,
      "learning_rate": 7.178636249398072e-05,
      "loss": 0.0473,
      "step": 23320
    },
    {
      "epoch": 0.000142333984375,
      "model_forward_time": 0.11497688293457031,
      "step": 23320
    },
    {
      "epoch": 0.000142333984375,
      "step": 23320,
      "training_step_time": 0.44002366065979004
    },
    {
      "epoch": 0.000142340087890625,
      "model_forward_time": 0.11469602584838867,
      "step": 23321
    },
    {
      "epoch": 0.000142340087890625,
      "step": 23321,
      "training_step_time": 0.44734644889831543
    },
    {
      "epoch": 0.00014234619140625,
      "model_forward_time": 0.11473202705383301,
      "step": 23322
    },
    {
      "epoch": 0.00014234619140625,
      "step": 23322,
      "training_step_time": 0.3957490921020508
    },
    {
      "epoch": 0.000142352294921875,
      "model_forward_time": 0.11598801612854004,
      "step": 23323
    },
    {
      "epoch": 0.000142352294921875,
      "step": 23323,
      "training_step_time": 0.40445876121520996
    },
    {
      "epoch": 0.0001423583984375,
      "model_forward_time": 0.11519575119018555,
      "step": 23324
    },
    {
      "epoch": 0.0001423583984375,
      "step": 23324,
      "training_step_time": 0.39685750007629395
    },
    {
      "epoch": 0.000142364501953125,
      "model_forward_time": 0.11506247520446777,
      "step": 23325
    },
    {
      "epoch": 0.000142364501953125,
      "step": 23325,
      "training_step_time": 0.46393322944641113
    },
    {
      "epoch": 0.00014237060546875,
      "model_forward_time": 0.11517453193664551,
      "step": 23326
    },
    {
      "epoch": 0.00014237060546875,
      "step": 23326,
      "training_step_time": 0.3959314823150635
    },
    {
      "epoch": 0.000142376708984375,
      "model_forward_time": 0.11534595489501953,
      "step": 23327
    },
    {
      "epoch": 0.000142376708984375,
      "step": 23327,
      "training_step_time": 0.48325037956237793
    },
    {
      "epoch": 0.0001423828125,
      "model_forward_time": 0.11507964134216309,
      "step": 23328
    },
    {
      "epoch": 0.0001423828125,
      "step": 23328,
      "training_step_time": 0.39693284034729004
    },
    {
      "epoch": 0.000142388916015625,
      "model_forward_time": 0.11469173431396484,
      "step": 23329
    },
    {
      "epoch": 0.000142388916015625,
      "step": 23329,
      "training_step_time": 0.494171142578125
    },
    {
      "epoch": 0.00014239501953125,
      "grad_norm": 0.15395106375217438,
      "learning_rate": 7.176155496861638e-05,
      "loss": 0.0489,
      "step": 23330
    },
    {
      "epoch": 0.00014239501953125,
      "model_forward_time": 0.11470580101013184,
      "step": 23330
    },
    {
      "epoch": 0.00014239501953125,
      "step": 23330,
      "training_step_time": 0.41943955421447754
    },
    {
      "epoch": 0.000142401123046875,
      "model_forward_time": 0.11531829833984375,
      "step": 23331
    },
    {
      "epoch": 0.000142401123046875,
      "step": 23331,
      "training_step_time": 0.45221757888793945
    },
    {
      "epoch": 0.0001424072265625,
      "model_forward_time": 0.11499667167663574,
      "step": 23332
    },
    {
      "epoch": 0.0001424072265625,
      "step": 23332,
      "training_step_time": 0.39130377769470215
    },
    {
      "epoch": 0.000142413330078125,
      "model_forward_time": 0.11533975601196289,
      "step": 23333
    },
    {
      "epoch": 0.000142413330078125,
      "step": 23333,
      "training_step_time": 0.39289402961730957
    },
    {
      "epoch": 0.00014241943359375,
      "model_forward_time": 0.11419391632080078,
      "step": 23334
    },
    {
      "epoch": 0.00014241943359375,
      "step": 23334,
      "training_step_time": 0.42264771461486816
    },
    {
      "epoch": 0.000142425537109375,
      "model_forward_time": 0.11473441123962402,
      "step": 23335
    },
    {
      "epoch": 0.000142425537109375,
      "step": 23335,
      "training_step_time": 0.39272189140319824
    },
    {
      "epoch": 0.000142431640625,
      "model_forward_time": 0.11504030227661133,
      "step": 23336
    },
    {
      "epoch": 0.000142431640625,
      "step": 23336,
      "training_step_time": 0.38750243186950684
    },
    {
      "epoch": 0.000142437744140625,
      "model_forward_time": 0.11504411697387695,
      "step": 23337
    },
    {
      "epoch": 0.000142437744140625,
      "step": 23337,
      "training_step_time": 0.3878664970397949
    },
    {
      "epoch": 0.00014244384765625,
      "model_forward_time": 0.11548972129821777,
      "step": 23338
    },
    {
      "epoch": 0.00014244384765625,
      "step": 23338,
      "training_step_time": 0.3854079246520996
    },
    {
      "epoch": 0.000142449951171875,
      "model_forward_time": 0.11533617973327637,
      "step": 23339
    },
    {
      "epoch": 0.000142449951171875,
      "step": 23339,
      "training_step_time": 0.39425063133239746
    },
    {
      "epoch": 0.0001424560546875,
      "grad_norm": 0.19053372740745544,
      "learning_rate": 7.173674083266624e-05,
      "loss": 0.047,
      "step": 23340
    },
    {
      "epoch": 0.0001424560546875,
      "model_forward_time": 0.11488795280456543,
      "step": 23340
    },
    {
      "epoch": 0.0001424560546875,
      "step": 23340,
      "training_step_time": 0.6234502792358398
    },
    {
      "epoch": 0.000142462158203125,
      "model_forward_time": 0.11457705497741699,
      "step": 23341
    },
    {
      "epoch": 0.000142462158203125,
      "step": 23341,
      "training_step_time": 0.46689748764038086
    },
    {
      "epoch": 0.00014246826171875,
      "model_forward_time": 0.11492109298706055,
      "step": 23342
    },
    {
      "epoch": 0.00014246826171875,
      "step": 23342,
      "training_step_time": 0.45461344718933105
    },
    {
      "epoch": 0.000142474365234375,
      "model_forward_time": 0.1150670051574707,
      "step": 23343
    },
    {
      "epoch": 0.000142474365234375,
      "step": 23343,
      "training_step_time": 0.5055515766143799
    },
    {
      "epoch": 0.00014248046875,
      "model_forward_time": 0.11462521553039551,
      "step": 23344
    },
    {
      "epoch": 0.00014248046875,
      "step": 23344,
      "training_step_time": 0.4129199981689453
    },
    {
      "epoch": 0.000142486572265625,
      "model_forward_time": 0.11461520195007324,
      "step": 23345
    },
    {
      "epoch": 0.000142486572265625,
      "step": 23345,
      "training_step_time": 0.4333376884460449
    },
    {
      "epoch": 0.00014249267578125,
      "model_forward_time": 0.11479449272155762,
      "step": 23346
    },
    {
      "epoch": 0.00014249267578125,
      "step": 23346,
      "training_step_time": 0.3880767822265625
    },
    {
      "epoch": 0.000142498779296875,
      "model_forward_time": 0.11501383781433105,
      "step": 23347
    },
    {
      "epoch": 0.000142498779296875,
      "step": 23347,
      "training_step_time": 0.40810608863830566
    },
    {
      "epoch": 0.0001425048828125,
      "model_forward_time": 0.11541557312011719,
      "step": 23348
    },
    {
      "epoch": 0.0001425048828125,
      "step": 23348,
      "training_step_time": 0.392101526260376
    },
    {
      "epoch": 0.000142510986328125,
      "model_forward_time": 0.11520075798034668,
      "step": 23349
    },
    {
      "epoch": 0.000142510986328125,
      "step": 23349,
      "training_step_time": 0.3980221748352051
    },
    {
      "epoch": 0.00014251708984375,
      "grad_norm": 0.1916208565235138,
      "learning_rate": 7.171192009366814e-05,
      "loss": 0.0486,
      "step": 23350
    },
    {
      "epoch": 0.00014251708984375,
      "model_forward_time": 0.11571049690246582,
      "step": 23350
    },
    {
      "epoch": 0.00014251708984375,
      "step": 23350,
      "training_step_time": 0.39363956451416016
    },
    {
      "epoch": 0.000142523193359375,
      "model_forward_time": 0.115081787109375,
      "step": 23351
    },
    {
      "epoch": 0.000142523193359375,
      "step": 23351,
      "training_step_time": 0.394132137298584
    },
    {
      "epoch": 0.000142529296875,
      "model_forward_time": 0.11545419692993164,
      "step": 23352
    },
    {
      "epoch": 0.000142529296875,
      "step": 23352,
      "training_step_time": 0.5703198909759521
    },
    {
      "epoch": 0.000142535400390625,
      "model_forward_time": 0.11495280265808105,
      "step": 23353
    },
    {
      "epoch": 0.000142535400390625,
      "step": 23353,
      "training_step_time": 0.3965024948120117
    },
    {
      "epoch": 0.00014254150390625,
      "model_forward_time": 0.11597537994384766,
      "step": 23354
    },
    {
      "epoch": 0.00014254150390625,
      "step": 23354,
      "training_step_time": 0.4393343925476074
    },
    {
      "epoch": 0.000142547607421875,
      "model_forward_time": 0.11496734619140625,
      "step": 23355
    },
    {
      "epoch": 0.000142547607421875,
      "step": 23355,
      "training_step_time": 0.39698076248168945
    },
    {
      "epoch": 0.0001425537109375,
      "model_forward_time": 0.11449170112609863,
      "step": 23356
    },
    {
      "epoch": 0.0001425537109375,
      "step": 23356,
      "training_step_time": 0.48456501960754395
    },
    {
      "epoch": 0.000142559814453125,
      "model_forward_time": 0.11428999900817871,
      "step": 23357
    },
    {
      "epoch": 0.000142559814453125,
      "step": 23357,
      "training_step_time": 0.4136083126068115
    },
    {
      "epoch": 0.00014256591796875,
      "model_forward_time": 0.11545515060424805,
      "step": 23358
    },
    {
      "epoch": 0.00014256591796875,
      "step": 23358,
      "training_step_time": 0.4569530487060547
    },
    {
      "epoch": 0.000142572021484375,
      "model_forward_time": 0.1152658462524414,
      "step": 23359
    },
    {
      "epoch": 0.000142572021484375,
      "step": 23359,
      "training_step_time": 0.4231414794921875
    },
    {
      "epoch": 0.000142578125,
      "grad_norm": 0.212049201130867,
      "learning_rate": 7.1687092759162e-05,
      "loss": 0.0378,
      "step": 23360
    },
    {
      "epoch": 0.000142578125,
      "model_forward_time": 0.11487579345703125,
      "step": 23360
    },
    {
      "epoch": 0.000142578125,
      "step": 23360,
      "training_step_time": 0.44760775566101074
    },
    {
      "epoch": 0.000142584228515625,
      "model_forward_time": 0.11529994010925293,
      "step": 23361
    },
    {
      "epoch": 0.000142584228515625,
      "step": 23361,
      "training_step_time": 0.4216592311859131
    },
    {
      "epoch": 0.00014259033203125,
      "model_forward_time": 0.11531448364257812,
      "step": 23362
    },
    {
      "epoch": 0.00014259033203125,
      "step": 23362,
      "training_step_time": 0.3850421905517578
    },
    {
      "epoch": 0.000142596435546875,
      "model_forward_time": 0.11524796485900879,
      "step": 23363
    },
    {
      "epoch": 0.000142596435546875,
      "step": 23363,
      "training_step_time": 0.38550329208374023
    },
    {
      "epoch": 0.0001426025390625,
      "model_forward_time": 0.11530947685241699,
      "step": 23364
    },
    {
      "epoch": 0.0001426025390625,
      "step": 23364,
      "training_step_time": 0.5533959865570068
    },
    {
      "epoch": 0.000142608642578125,
      "model_forward_time": 0.11436009407043457,
      "step": 23365
    },
    {
      "epoch": 0.000142608642578125,
      "step": 23365,
      "training_step_time": 0.393524169921875
    },
    {
      "epoch": 0.00014261474609375,
      "model_forward_time": 0.11618399620056152,
      "step": 23366
    },
    {
      "epoch": 0.00014261474609375,
      "step": 23366,
      "training_step_time": 0.39028024673461914
    },
    {
      "epoch": 0.000142620849609375,
      "model_forward_time": 0.11579370498657227,
      "step": 23367
    },
    {
      "epoch": 0.000142620849609375,
      "step": 23367,
      "training_step_time": 0.38024353981018066
    },
    {
      "epoch": 0.000142626953125,
      "model_forward_time": 0.11521339416503906,
      "step": 23368
    },
    {
      "epoch": 0.000142626953125,
      "step": 23368,
      "training_step_time": 0.3979218006134033
    },
    {
      "epoch": 0.000142633056640625,
      "model_forward_time": 0.11450386047363281,
      "step": 23369
    },
    {
      "epoch": 0.000142633056640625,
      "step": 23369,
      "training_step_time": 0.440584659576416
    },
    {
      "epoch": 0.00014263916015625,
      "grad_norm": 0.16310285031795502,
      "learning_rate": 7.166225883668969e-05,
      "loss": 0.045,
      "step": 23370
    },
    {
      "epoch": 0.00014263916015625,
      "model_forward_time": 0.11488676071166992,
      "step": 23370
    },
    {
      "epoch": 0.00014263916015625,
      "step": 23370,
      "training_step_time": 0.6047036647796631
    },
    {
      "epoch": 0.000142645263671875,
      "model_forward_time": 0.11502790451049805,
      "step": 23371
    },
    {
      "epoch": 0.000142645263671875,
      "step": 23371,
      "training_step_time": 0.3880774974822998
    },
    {
      "epoch": 0.0001426513671875,
      "model_forward_time": 0.1150209903717041,
      "step": 23372
    },
    {
      "epoch": 0.0001426513671875,
      "step": 23372,
      "training_step_time": 0.5334172248840332
    },
    {
      "epoch": 0.000142657470703125,
      "model_forward_time": 0.1145625114440918,
      "step": 23373
    },
    {
      "epoch": 0.000142657470703125,
      "step": 23373,
      "training_step_time": 0.4577600955963135
    },
    {
      "epoch": 0.00014266357421875,
      "model_forward_time": 0.11447882652282715,
      "step": 23374
    },
    {
      "epoch": 0.00014266357421875,
      "step": 23374,
      "training_step_time": 0.42568087577819824
    },
    {
      "epoch": 0.000142669677734375,
      "model_forward_time": 0.11435294151306152,
      "step": 23375
    },
    {
      "epoch": 0.000142669677734375,
      "step": 23375,
      "training_step_time": 0.39061784744262695
    },
    {
      "epoch": 0.00014267578125,
      "model_forward_time": 0.1150367259979248,
      "step": 23376
    },
    {
      "epoch": 0.00014267578125,
      "step": 23376,
      "training_step_time": 0.3921165466308594
    },
    {
      "epoch": 0.000142681884765625,
      "model_forward_time": 0.11465597152709961,
      "step": 23377
    },
    {
      "epoch": 0.000142681884765625,
      "step": 23377,
      "training_step_time": 0.3986625671386719
    },
    {
      "epoch": 0.00014268798828125,
      "model_forward_time": 0.11550116539001465,
      "step": 23378
    },
    {
      "epoch": 0.00014268798828125,
      "step": 23378,
      "training_step_time": 0.38933634757995605
    },
    {
      "epoch": 0.000142694091796875,
      "model_forward_time": 0.1156308650970459,
      "step": 23379
    },
    {
      "epoch": 0.000142694091796875,
      "step": 23379,
      "training_step_time": 0.3806936740875244
    },
    {
      "epoch": 0.0001427001953125,
      "grad_norm": 0.13707906007766724,
      "learning_rate": 7.16374183337951e-05,
      "loss": 0.0475,
      "step": 23380
    },
    {
      "epoch": 0.0001427001953125,
      "model_forward_time": 0.11515188217163086,
      "step": 23380
    },
    {
      "epoch": 0.0001427001953125,
      "step": 23380,
      "training_step_time": 0.3949882984161377
    },
    {
      "epoch": 0.000142706298828125,
      "model_forward_time": 0.11568880081176758,
      "step": 23381
    },
    {
      "epoch": 0.000142706298828125,
      "step": 23381,
      "training_step_time": 0.3821072578430176
    },
    {
      "epoch": 0.00014271240234375,
      "model_forward_time": 0.11509251594543457,
      "step": 23382
    },
    {
      "epoch": 0.00014271240234375,
      "step": 23382,
      "training_step_time": 0.6109662055969238
    },
    {
      "epoch": 0.000142718505859375,
      "model_forward_time": 0.11523056030273438,
      "step": 23383
    },
    {
      "epoch": 0.000142718505859375,
      "step": 23383,
      "training_step_time": 0.3665194511413574
    },
    {
      "epoch": 0.000142724609375,
      "model_forward_time": 0.11497807502746582,
      "step": 23384
    },
    {
      "epoch": 0.000142724609375,
      "step": 23384,
      "training_step_time": 0.41544079780578613
    },
    {
      "epoch": 0.000142730712890625,
      "model_forward_time": 0.11462521553039551,
      "step": 23385
    },
    {
      "epoch": 0.000142730712890625,
      "step": 23385,
      "training_step_time": 0.4118483066558838
    },
    {
      "epoch": 0.00014273681640625,
      "model_forward_time": 0.11476707458496094,
      "step": 23386
    },
    {
      "epoch": 0.00014273681640625,
      "step": 23386,
      "training_step_time": 0.3960728645324707
    },
    {
      "epoch": 0.000142742919921875,
      "model_forward_time": 0.11465120315551758,
      "step": 23387
    },
    {
      "epoch": 0.000142742919921875,
      "step": 23387,
      "training_step_time": 0.45349931716918945
    },
    {
      "epoch": 0.0001427490234375,
      "model_forward_time": 0.11544299125671387,
      "step": 23388
    },
    {
      "epoch": 0.0001427490234375,
      "step": 23388,
      "training_step_time": 0.5041849613189697
    },
    {
      "epoch": 0.000142755126953125,
      "model_forward_time": 0.11509156227111816,
      "step": 23389
    },
    {
      "epoch": 0.000142755126953125,
      "step": 23389,
      "training_step_time": 0.3941161632537842
    },
    {
      "epoch": 0.00014276123046875,
      "grad_norm": 0.1312418133020401,
      "learning_rate": 7.161257125802413e-05,
      "loss": 0.0444,
      "step": 23390
    },
    {
      "epoch": 0.00014276123046875,
      "model_forward_time": 0.115020751953125,
      "step": 23390
    },
    {
      "epoch": 0.00014276123046875,
      "step": 23390,
      "training_step_time": 0.39807987213134766
    },
    {
      "epoch": 0.000142767333984375,
      "model_forward_time": 0.11536979675292969,
      "step": 23391
    },
    {
      "epoch": 0.000142767333984375,
      "step": 23391,
      "training_step_time": 0.39137721061706543
    },
    {
      "epoch": 0.0001427734375,
      "model_forward_time": 0.11548018455505371,
      "step": 23392
    },
    {
      "epoch": 0.0001427734375,
      "step": 23392,
      "training_step_time": 0.38501739501953125
    },
    {
      "epoch": 0.000142779541015625,
      "model_forward_time": 0.11540389060974121,
      "step": 23393
    },
    {
      "epoch": 0.000142779541015625,
      "step": 23393,
      "training_step_time": 0.3927271366119385
    },
    {
      "epoch": 0.00014278564453125,
      "model_forward_time": 0.11512255668640137,
      "step": 23394
    },
    {
      "epoch": 0.00014278564453125,
      "step": 23394,
      "training_step_time": 0.6654531955718994
    },
    {
      "epoch": 0.000142791748046875,
      "model_forward_time": 0.11507987976074219,
      "step": 23395
    },
    {
      "epoch": 0.000142791748046875,
      "step": 23395,
      "training_step_time": 0.3966553211212158
    },
    {
      "epoch": 0.0001427978515625,
      "model_forward_time": 0.11543989181518555,
      "step": 23396
    },
    {
      "epoch": 0.0001427978515625,
      "step": 23396,
      "training_step_time": 0.38031697273254395
    },
    {
      "epoch": 0.000142803955078125,
      "model_forward_time": 0.11537861824035645,
      "step": 23397
    },
    {
      "epoch": 0.000142803955078125,
      "step": 23397,
      "training_step_time": 0.3868887424468994
    },
    {
      "epoch": 0.00014281005859375,
      "model_forward_time": 0.11522030830383301,
      "step": 23398
    },
    {
      "epoch": 0.00014281005859375,
      "step": 23398,
      "training_step_time": 0.4315948486328125
    },
    {
      "epoch": 0.000142816162109375,
      "model_forward_time": 0.11537861824035645,
      "step": 23399
    },
    {
      "epoch": 0.000142816162109375,
      "step": 23399,
      "training_step_time": 0.3890111446380615
    },
    {
      "epoch": 0.000142822265625,
      "grad_norm": 0.09775767475366592,
      "learning_rate": 7.158771761692464e-05,
      "loss": 0.0452,
      "step": 23400
    },
    {
      "epoch": 0.000142822265625,
      "model_forward_time": 0.1159064769744873,
      "step": 23400
    },
    {
      "epoch": 0.000142822265625,
      "step": 23400,
      "training_step_time": 0.5133075714111328
    },
    {
      "epoch": 0.000142828369140625,
      "model_forward_time": 0.11559128761291504,
      "step": 23401
    },
    {
      "epoch": 0.000142828369140625,
      "step": 23401,
      "training_step_time": 0.44707489013671875
    },
    {
      "epoch": 0.00014283447265625,
      "model_forward_time": 0.11527395248413086,
      "step": 23402
    },
    {
      "epoch": 0.00014283447265625,
      "step": 23402,
      "training_step_time": 0.43907642364501953
    },
    {
      "epoch": 0.000142840576171875,
      "model_forward_time": 0.11569714546203613,
      "step": 23403
    },
    {
      "epoch": 0.000142840576171875,
      "step": 23403,
      "training_step_time": 0.3787500858306885
    },
    {
      "epoch": 0.0001428466796875,
      "model_forward_time": 0.11483168601989746,
      "step": 23404
    },
    {
      "epoch": 0.0001428466796875,
      "step": 23404,
      "training_step_time": 0.3885533809661865
    },
    {
      "epoch": 0.000142852783203125,
      "model_forward_time": 0.11644124984741211,
      "step": 23405
    },
    {
      "epoch": 0.000142852783203125,
      "step": 23405,
      "training_step_time": 0.3975832462310791
    },
    {
      "epoch": 0.00014285888671875,
      "model_forward_time": 0.11536645889282227,
      "step": 23406
    },
    {
      "epoch": 0.00014285888671875,
      "step": 23406,
      "training_step_time": 0.5650970935821533
    },
    {
      "epoch": 0.000142864990234375,
      "model_forward_time": 0.11583781242370605,
      "step": 23407
    },
    {
      "epoch": 0.000142864990234375,
      "step": 23407,
      "training_step_time": 0.39791059494018555
    },
    {
      "epoch": 0.00014287109375,
      "model_forward_time": 0.11474442481994629,
      "step": 23408
    },
    {
      "epoch": 0.00014287109375,
      "step": 23408,
      "training_step_time": 0.3884398937225342
    },
    {
      "epoch": 0.000142877197265625,
      "model_forward_time": 0.11486458778381348,
      "step": 23409
    },
    {
      "epoch": 0.000142877197265625,
      "step": 23409,
      "training_step_time": 0.38804149627685547
    },
    {
      "epoch": 0.00014288330078125,
      "grad_norm": 0.11715058982372284,
      "learning_rate": 7.156285741804656e-05,
      "loss": 0.0439,
      "step": 23410
    },
    {
      "epoch": 0.00014288330078125,
      "model_forward_time": 0.11470293998718262,
      "step": 23410
    },
    {
      "epoch": 0.00014288330078125,
      "step": 23410,
      "training_step_time": 0.39302754402160645
    },
    {
      "epoch": 0.000142889404296875,
      "model_forward_time": 0.11531400680541992,
      "step": 23411
    },
    {
      "epoch": 0.000142889404296875,
      "step": 23411,
      "training_step_time": 0.3968057632446289
    },
    {
      "epoch": 0.0001428955078125,
      "model_forward_time": 0.11509227752685547,
      "step": 23412
    },
    {
      "epoch": 0.0001428955078125,
      "step": 23412,
      "training_step_time": 0.6057004928588867
    },
    {
      "epoch": 0.000142901611328125,
      "model_forward_time": 0.11466336250305176,
      "step": 23413
    },
    {
      "epoch": 0.000142901611328125,
      "step": 23413,
      "training_step_time": 0.4362502098083496
    },
    {
      "epoch": 0.00014290771484375,
      "model_forward_time": 0.11504220962524414,
      "step": 23414
    },
    {
      "epoch": 0.00014290771484375,
      "step": 23414,
      "training_step_time": 0.45450782775878906
    },
    {
      "epoch": 0.000142913818359375,
      "model_forward_time": 0.11490464210510254,
      "step": 23415
    },
    {
      "epoch": 0.000142913818359375,
      "step": 23415,
      "training_step_time": 0.4152519702911377
    },
    {
      "epoch": 0.000142919921875,
      "model_forward_time": 0.11469292640686035,
      "step": 23416
    },
    {
      "epoch": 0.000142919921875,
      "step": 23416,
      "training_step_time": 0.452040433883667
    },
    {
      "epoch": 0.000142926025390625,
      "model_forward_time": 0.11410307884216309,
      "step": 23417
    },
    {
      "epoch": 0.000142926025390625,
      "step": 23417,
      "training_step_time": 0.3983008861541748
    },
    {
      "epoch": 0.00014293212890625,
      "model_forward_time": 0.1145772933959961,
      "step": 23418
    },
    {
      "epoch": 0.00014293212890625,
      "step": 23418,
      "training_step_time": 0.42142391204833984
    },
    {
      "epoch": 0.000142938232421875,
      "model_forward_time": 0.11494278907775879,
      "step": 23419
    },
    {
      "epoch": 0.000142938232421875,
      "step": 23419,
      "training_step_time": 0.39084696769714355
    },
    {
      "epoch": 0.0001429443359375,
      "grad_norm": 0.1342240422964096,
      "learning_rate": 7.153799066894171e-05,
      "loss": 0.0464,
      "step": 23420
    },
    {
      "epoch": 0.0001429443359375,
      "model_forward_time": 0.11472511291503906,
      "step": 23420
    },
    {
      "epoch": 0.0001429443359375,
      "step": 23420,
      "training_step_time": 0.3854835033416748
    },
    {
      "epoch": 0.000142950439453125,
      "model_forward_time": 0.11503219604492188,
      "step": 23421
    },
    {
      "epoch": 0.000142950439453125,
      "step": 23421,
      "training_step_time": 0.3888871669769287
    },
    {
      "epoch": 0.00014295654296875,
      "model_forward_time": 0.11510205268859863,
      "step": 23422
    },
    {
      "epoch": 0.00014295654296875,
      "step": 23422,
      "training_step_time": 0.39325380325317383
    },
    {
      "epoch": 0.000142962646484375,
      "model_forward_time": 0.1155846118927002,
      "step": 23423
    },
    {
      "epoch": 0.000142962646484375,
      "step": 23423,
      "training_step_time": 0.3862149715423584
    },
    {
      "epoch": 0.00014296875,
      "model_forward_time": 0.1148529052734375,
      "step": 23424
    },
    {
      "epoch": 0.00014296875,
      "step": 23424,
      "training_step_time": 0.6469316482543945
    },
    {
      "epoch": 0.000142974853515625,
      "model_forward_time": 0.11563467979431152,
      "step": 23425
    },
    {
      "epoch": 0.000142974853515625,
      "step": 23425,
      "training_step_time": 0.3997974395751953
    },
    {
      "epoch": 0.00014298095703125,
      "model_forward_time": 0.115875244140625,
      "step": 23426
    },
    {
      "epoch": 0.00014298095703125,
      "step": 23426,
      "training_step_time": 0.41465306282043457
    },
    {
      "epoch": 0.000142987060546875,
      "model_forward_time": 0.11673474311828613,
      "step": 23427
    },
    {
      "epoch": 0.000142987060546875,
      "step": 23427,
      "training_step_time": 0.6222484111785889
    },
    {
      "epoch": 0.0001429931640625,
      "model_forward_time": 0.1205434799194336,
      "step": 23428
    },
    {
      "epoch": 0.0001429931640625,
      "step": 23428,
      "training_step_time": 0.7110044956207275
    },
    {
      "epoch": 0.000142999267578125,
      "model_forward_time": 0.11941862106323242,
      "step": 23429
    },
    {
      "epoch": 0.000142999267578125,
      "step": 23429,
      "training_step_time": 0.6937272548675537
    },
    {
      "epoch": 0.00014300537109375,
      "grad_norm": 0.11939956247806549,
      "learning_rate": 7.151311737716397e-05,
      "loss": 0.0455,
      "step": 23430
    },
    {
      "epoch": 0.00014300537109375,
      "model_forward_time": 0.11732816696166992,
      "step": 23430
    },
    {
      "epoch": 0.00014300537109375,
      "step": 23430,
      "training_step_time": 0.6976418495178223
    },
    {
      "epoch": 0.000143011474609375,
      "model_forward_time": 0.11760330200195312,
      "step": 23431
    },
    {
      "epoch": 0.000143011474609375,
      "step": 23431,
      "training_step_time": 0.6604983806610107
    },
    {
      "epoch": 0.000143017578125,
      "model_forward_time": 0.12141823768615723,
      "step": 23432
    },
    {
      "epoch": 0.000143017578125,
      "step": 23432,
      "training_step_time": 0.6456303596496582
    },
    {
      "epoch": 0.000143023681640625,
      "model_forward_time": 0.11821365356445312,
      "step": 23433
    },
    {
      "epoch": 0.000143023681640625,
      "step": 23433,
      "training_step_time": 0.613518238067627
    },
    {
      "epoch": 0.00014302978515625,
      "model_forward_time": 0.11755657196044922,
      "step": 23434
    },
    {
      "epoch": 0.00014302978515625,
      "step": 23434,
      "training_step_time": 0.685966968536377
    },
    {
      "epoch": 0.000143035888671875,
      "model_forward_time": 0.12277364730834961,
      "step": 23435
    },
    {
      "epoch": 0.000143035888671875,
      "step": 23435,
      "training_step_time": 0.6632096767425537
    },
    {
      "epoch": 0.0001430419921875,
      "model_forward_time": 0.13161373138427734,
      "step": 23436
    },
    {
      "epoch": 0.0001430419921875,
      "step": 23436,
      "training_step_time": 0.6528840065002441
    },
    {
      "epoch": 0.000143048095703125,
      "model_forward_time": 0.11652874946594238,
      "step": 23437
    },
    {
      "epoch": 0.000143048095703125,
      "step": 23437,
      "training_step_time": 0.7238571643829346
    },
    {
      "epoch": 0.00014305419921875,
      "model_forward_time": 0.12060189247131348,
      "step": 23438
    },
    {
      "epoch": 0.00014305419921875,
      "step": 23438,
      "training_step_time": 0.7486555576324463
    },
    {
      "epoch": 0.000143060302734375,
      "model_forward_time": 0.12384939193725586,
      "step": 23439
    },
    {
      "epoch": 0.000143060302734375,
      "step": 23439,
      "training_step_time": 0.6584100723266602
    },
    {
      "epoch": 0.00014306640625,
      "grad_norm": 0.1547684222459793,
      "learning_rate": 7.148823755026921e-05,
      "loss": 0.0475,
      "step": 23440
    },
    {
      "epoch": 0.00014306640625,
      "model_forward_time": 0.11841464042663574,
      "step": 23440
    },
    {
      "epoch": 0.00014306640625,
      "step": 23440,
      "training_step_time": 0.639195442199707
    },
    {
      "epoch": 0.000143072509765625,
      "model_forward_time": 0.12647771835327148,
      "step": 23441
    },
    {
      "epoch": 0.000143072509765625,
      "step": 23441,
      "training_step_time": 0.6593635082244873
    },
    {
      "epoch": 0.00014307861328125,
      "model_forward_time": 0.1190333366394043,
      "step": 23442
    },
    {
      "epoch": 0.00014307861328125,
      "step": 23442,
      "training_step_time": 0.6618571281433105
    },
    {
      "epoch": 0.000143084716796875,
      "model_forward_time": 0.11808919906616211,
      "step": 23443
    },
    {
      "epoch": 0.000143084716796875,
      "step": 23443,
      "training_step_time": 0.6855828762054443
    },
    {
      "epoch": 0.0001430908203125,
      "model_forward_time": 0.11857223510742188,
      "step": 23444
    },
    {
      "epoch": 0.0001430908203125,
      "step": 23444,
      "training_step_time": 0.7034997940063477
    },
    {
      "epoch": 0.000143096923828125,
      "model_forward_time": 0.11749649047851562,
      "step": 23445
    },
    {
      "epoch": 0.000143096923828125,
      "step": 23445,
      "training_step_time": 0.6829748153686523
    },
    {
      "epoch": 0.00014310302734375,
      "model_forward_time": 0.12089180946350098,
      "step": 23446
    },
    {
      "epoch": 0.00014310302734375,
      "step": 23446,
      "training_step_time": 0.6681349277496338
    },
    {
      "epoch": 0.000143109130859375,
      "model_forward_time": 0.12278914451599121,
      "step": 23447
    },
    {
      "epoch": 0.000143109130859375,
      "step": 23447,
      "training_step_time": 0.7009468078613281
    },
    {
      "epoch": 0.000143115234375,
      "model_forward_time": 0.12066149711608887,
      "step": 23448
    },
    {
      "epoch": 0.000143115234375,
      "step": 23448,
      "training_step_time": 0.7402772903442383
    },
    {
      "epoch": 0.000143121337890625,
      "model_forward_time": 0.12005972862243652,
      "step": 23449
    },
    {
      "epoch": 0.000143121337890625,
      "step": 23449,
      "training_step_time": 0.7418105602264404
    },
    {
      "epoch": 0.00014312744140625,
      "grad_norm": 0.11061780154705048,
      "learning_rate": 7.146335119581523e-05,
      "loss": 0.0542,
      "step": 23450
    },
    {
      "epoch": 0.00014312744140625,
      "model_forward_time": 0.1318044662475586,
      "step": 23450
    },
    {
      "epoch": 0.00014312744140625,
      "step": 23450,
      "training_step_time": 0.6295228004455566
    },
    {
      "epoch": 0.000143133544921875,
      "model_forward_time": 0.11793351173400879,
      "step": 23451
    },
    {
      "epoch": 0.000143133544921875,
      "step": 23451,
      "training_step_time": 0.6313192844390869
    },
    {
      "epoch": 0.0001431396484375,
      "model_forward_time": 0.1238853931427002,
      "step": 23452
    },
    {
      "epoch": 0.0001431396484375,
      "step": 23452,
      "training_step_time": 0.6772866249084473
    },
    {
      "epoch": 0.000143145751953125,
      "model_forward_time": 0.11994433403015137,
      "step": 23453
    },
    {
      "epoch": 0.000143145751953125,
      "step": 23453,
      "training_step_time": 0.6737525463104248
    },
    {
      "epoch": 0.00014315185546875,
      "model_forward_time": 0.1188819408416748,
      "step": 23454
    },
    {
      "epoch": 0.00014315185546875,
      "step": 23454,
      "training_step_time": 0.6802458763122559
    },
    {
      "epoch": 0.000143157958984375,
      "model_forward_time": 0.12025809288024902,
      "step": 23455
    },
    {
      "epoch": 0.000143157958984375,
      "step": 23455,
      "training_step_time": 0.730994701385498
    },
    {
      "epoch": 0.0001431640625,
      "model_forward_time": 0.11899948120117188,
      "step": 23456
    },
    {
      "epoch": 0.0001431640625,
      "step": 23456,
      "training_step_time": 0.7168278694152832
    },
    {
      "epoch": 0.000143170166015625,
      "model_forward_time": 0.12345671653747559,
      "step": 23457
    },
    {
      "epoch": 0.000143170166015625,
      "step": 23457,
      "training_step_time": 0.7533676624298096
    },
    {
      "epoch": 0.00014317626953125,
      "model_forward_time": 0.1231241226196289,
      "step": 23458
    },
    {
      "epoch": 0.00014317626953125,
      "step": 23458,
      "training_step_time": 0.6844754219055176
    },
    {
      "epoch": 0.000143182373046875,
      "model_forward_time": 0.12149286270141602,
      "step": 23459
    },
    {
      "epoch": 0.000143182373046875,
      "step": 23459,
      "training_step_time": 0.6248888969421387
    },
    {
      "epoch": 0.0001431884765625,
      "grad_norm": 0.1266474574804306,
      "learning_rate": 7.143845832136188e-05,
      "loss": 0.0561,
      "step": 23460
    },
    {
      "epoch": 0.0001431884765625,
      "model_forward_time": 0.11738252639770508,
      "step": 23460
    },
    {
      "epoch": 0.0001431884765625,
      "step": 23460,
      "training_step_time": 0.6853775978088379
    },
    {
      "epoch": 0.000143194580078125,
      "model_forward_time": 0.12026286125183105,
      "step": 23461
    },
    {
      "epoch": 0.000143194580078125,
      "step": 23461,
      "training_step_time": 0.6654820442199707
    },
    {
      "epoch": 0.00014320068359375,
      "model_forward_time": 0.11597466468811035,
      "step": 23462
    },
    {
      "epoch": 0.00014320068359375,
      "step": 23462,
      "training_step_time": 0.614154577255249
    },
    {
      "epoch": 0.000143206787109375,
      "model_forward_time": 0.12054324150085449,
      "step": 23463
    },
    {
      "epoch": 0.000143206787109375,
      "step": 23463,
      "training_step_time": 0.694190263748169
    },
    {
      "epoch": 0.000143212890625,
      "model_forward_time": 0.11767697334289551,
      "step": 23464
    },
    {
      "epoch": 0.000143212890625,
      "step": 23464,
      "training_step_time": 0.5978803634643555
    },
    {
      "epoch": 0.000143218994140625,
      "model_forward_time": 0.11574769020080566,
      "step": 23465
    },
    {
      "epoch": 0.000143218994140625,
      "step": 23465,
      "training_step_time": 0.6822292804718018
    },
    {
      "epoch": 0.00014322509765625,
      "model_forward_time": 0.13420343399047852,
      "step": 23466
    },
    {
      "epoch": 0.00014322509765625,
      "step": 23466,
      "training_step_time": 0.6877424716949463
    },
    {
      "epoch": 0.000143231201171875,
      "model_forward_time": 0.11720013618469238,
      "step": 23467
    },
    {
      "epoch": 0.000143231201171875,
      "step": 23467,
      "training_step_time": 0.7267489433288574
    },
    {
      "epoch": 0.0001432373046875,
      "model_forward_time": 0.1213991641998291,
      "step": 23468
    },
    {
      "epoch": 0.0001432373046875,
      "step": 23468,
      "training_step_time": 0.77651047706604
    },
    {
      "epoch": 0.000143243408203125,
      "model_forward_time": 0.12207865715026855,
      "step": 23469
    },
    {
      "epoch": 0.000143243408203125,
      "step": 23469,
      "training_step_time": 0.6357436180114746
    },
    {
      "epoch": 0.00014324951171875,
      "grad_norm": 0.16499657928943634,
      "learning_rate": 7.141355893447092e-05,
      "loss": 0.0541,
      "step": 23470
    },
    {
      "epoch": 0.00014324951171875,
      "model_forward_time": 0.1204369068145752,
      "step": 23470
    },
    {
      "epoch": 0.00014324951171875,
      "step": 23470,
      "training_step_time": 0.6777117252349854
    },
    {
      "epoch": 0.000143255615234375,
      "model_forward_time": 0.11994266510009766,
      "step": 23471
    },
    {
      "epoch": 0.000143255615234375,
      "step": 23471,
      "training_step_time": 0.6241621971130371
    },
    {
      "epoch": 0.00014326171875,
      "model_forward_time": 0.11788201332092285,
      "step": 23472
    },
    {
      "epoch": 0.00014326171875,
      "step": 23472,
      "training_step_time": 0.6841309070587158
    },
    {
      "epoch": 0.000143267822265625,
      "model_forward_time": 0.11533522605895996,
      "step": 23473
    },
    {
      "epoch": 0.000143267822265625,
      "step": 23473,
      "training_step_time": 0.7366681098937988
    },
    {
      "epoch": 0.00014327392578125,
      "model_forward_time": 0.1220705509185791,
      "step": 23474
    },
    {
      "epoch": 0.00014327392578125,
      "step": 23474,
      "training_step_time": 0.5927698612213135
    },
    {
      "epoch": 0.000143280029296875,
      "model_forward_time": 0.11846351623535156,
      "step": 23475
    },
    {
      "epoch": 0.000143280029296875,
      "step": 23475,
      "training_step_time": 0.6631748676300049
    },
    {
      "epoch": 0.0001432861328125,
      "model_forward_time": 0.11893391609191895,
      "step": 23476
    },
    {
      "epoch": 0.0001432861328125,
      "step": 23476,
      "training_step_time": 0.7613368034362793
    },
    {
      "epoch": 0.000143292236328125,
      "model_forward_time": 0.11912870407104492,
      "step": 23477
    },
    {
      "epoch": 0.000143292236328125,
      "step": 23477,
      "training_step_time": 0.6076233386993408
    },
    {
      "epoch": 0.00014329833984375,
      "model_forward_time": 0.11839914321899414,
      "step": 23478
    },
    {
      "epoch": 0.00014329833984375,
      "step": 23478,
      "training_step_time": 0.6748971939086914
    },
    {
      "epoch": 0.000143304443359375,
      "model_forward_time": 0.11923050880432129,
      "step": 23479
    },
    {
      "epoch": 0.000143304443359375,
      "step": 23479,
      "training_step_time": 0.6620264053344727
    },
    {
      "epoch": 0.000143310546875,
      "grad_norm": 0.20505572855472565,
      "learning_rate": 7.138865304270616e-05,
      "loss": 0.0549,
      "step": 23480
    },
    {
      "epoch": 0.000143310546875,
      "model_forward_time": 0.11994457244873047,
      "step": 23480
    },
    {
      "epoch": 0.000143310546875,
      "step": 23480,
      "training_step_time": 0.655987024307251
    },
    {
      "epoch": 0.000143316650390625,
      "model_forward_time": 0.1193540096282959,
      "step": 23481
    },
    {
      "epoch": 0.000143316650390625,
      "step": 23481,
      "training_step_time": 0.6577963829040527
    },
    {
      "epoch": 0.00014332275390625,
      "model_forward_time": 0.11724662780761719,
      "step": 23482
    },
    {
      "epoch": 0.00014332275390625,
      "step": 23482,
      "training_step_time": 0.6448349952697754
    },
    {
      "epoch": 0.000143328857421875,
      "model_forward_time": 0.13704538345336914,
      "step": 23483
    },
    {
      "epoch": 0.000143328857421875,
      "step": 23483,
      "training_step_time": 0.587315559387207
    },
    {
      "epoch": 0.0001433349609375,
      "model_forward_time": 0.11724495887756348,
      "step": 23484
    },
    {
      "epoch": 0.0001433349609375,
      "step": 23484,
      "training_step_time": 0.6855378150939941
    },
    {
      "epoch": 0.000143341064453125,
      "model_forward_time": 0.11862802505493164,
      "step": 23485
    },
    {
      "epoch": 0.000143341064453125,
      "step": 23485,
      "training_step_time": 0.6812610626220703
    },
    {
      "epoch": 0.00014334716796875,
      "model_forward_time": 0.11885738372802734,
      "step": 23486
    },
    {
      "epoch": 0.00014334716796875,
      "step": 23486,
      "training_step_time": 0.6662225723266602
    },
    {
      "epoch": 0.000143353271484375,
      "model_forward_time": 0.11772775650024414,
      "step": 23487
    },
    {
      "epoch": 0.000143353271484375,
      "step": 23487,
      "training_step_time": 0.6235096454620361
    },
    {
      "epoch": 0.000143359375,
      "model_forward_time": 0.11689376831054688,
      "step": 23488
    },
    {
      "epoch": 0.000143359375,
      "step": 23488,
      "training_step_time": 0.6858165264129639
    },
    {
      "epoch": 0.000143365478515625,
      "model_forward_time": 0.11869621276855469,
      "step": 23489
    },
    {
      "epoch": 0.000143365478515625,
      "step": 23489,
      "training_step_time": 0.7390880584716797
    },
    {
      "epoch": 0.00014337158203125,
      "grad_norm": 0.19531920552253723,
      "learning_rate": 7.136374065363334e-05,
      "loss": 0.0552,
      "step": 23490
    },
    {
      "epoch": 0.00014337158203125,
      "model_forward_time": 0.12689518928527832,
      "step": 23490
    },
    {
      "epoch": 0.00014337158203125,
      "step": 23490,
      "training_step_time": 0.6275308132171631
    },
    {
      "epoch": 0.000143377685546875,
      "model_forward_time": 0.12142467498779297,
      "step": 23491
    },
    {
      "epoch": 0.000143377685546875,
      "step": 23491,
      "training_step_time": 0.6220231056213379
    },
    {
      "epoch": 0.0001433837890625,
      "model_forward_time": 0.11987972259521484,
      "step": 23492
    },
    {
      "epoch": 0.0001433837890625,
      "step": 23492,
      "training_step_time": 0.6221485137939453
    },
    {
      "epoch": 0.000143389892578125,
      "model_forward_time": 0.13751482963562012,
      "step": 23493
    },
    {
      "epoch": 0.000143389892578125,
      "step": 23493,
      "training_step_time": 0.5606629848480225
    },
    {
      "epoch": 0.00014339599609375,
      "model_forward_time": 0.12705683708190918,
      "step": 23494
    },
    {
      "epoch": 0.00014339599609375,
      "step": 23494,
      "training_step_time": 0.641871452331543
    },
    {
      "epoch": 0.000143402099609375,
      "model_forward_time": 0.1220550537109375,
      "step": 23495
    },
    {
      "epoch": 0.000143402099609375,
      "step": 23495,
      "training_step_time": 0.6815478801727295
    },
    {
      "epoch": 0.000143408203125,
      "model_forward_time": 0.11897826194763184,
      "step": 23496
    },
    {
      "epoch": 0.000143408203125,
      "step": 23496,
      "training_step_time": 0.6602902412414551
    },
    {
      "epoch": 0.000143414306640625,
      "model_forward_time": 0.11707592010498047,
      "step": 23497
    },
    {
      "epoch": 0.000143414306640625,
      "step": 23497,
      "training_step_time": 0.5396251678466797
    },
    {
      "epoch": 0.00014342041015625,
      "model_forward_time": 0.11913180351257324,
      "step": 23498
    },
    {
      "epoch": 0.00014342041015625,
      "step": 23498,
      "training_step_time": 0.46473121643066406
    },
    {
      "epoch": 0.000143426513671875,
      "model_forward_time": 0.11866021156311035,
      "step": 23499
    },
    {
      "epoch": 0.000143426513671875,
      "step": 23499,
      "training_step_time": 0.4592926502227783
    },
    {
      "epoch": 0.0001434326171875,
      "grad_norm": 0.1424846053123474,
      "learning_rate": 7.133882177482019e-05,
      "loss": 0.0543,
      "step": 23500
    },
    {
      "epoch": 0.0001434326171875,
      "model_forward_time": 0.11761736869812012,
      "step": 23500
    },
    {
      "epoch": 0.0001434326171875,
      "step": 23500,
      "training_step_time": 0.4394655227661133
    },
    {
      "epoch": 0.000143438720703125,
      "model_forward_time": 0.11794734001159668,
      "step": 23501
    },
    {
      "epoch": 0.000143438720703125,
      "step": 23501,
      "training_step_time": 0.4361855983734131
    },
    {
      "epoch": 0.00014344482421875,
      "model_forward_time": 0.1159508228302002,
      "step": 23502
    },
    {
      "epoch": 0.00014344482421875,
      "step": 23502,
      "training_step_time": 0.4155611991882324
    },
    {
      "epoch": 0.000143450927734375,
      "model_forward_time": 0.11587834358215332,
      "step": 23503
    },
    {
      "epoch": 0.000143450927734375,
      "step": 23503,
      "training_step_time": 0.41450023651123047
    },
    {
      "epoch": 0.00014345703125,
      "model_forward_time": 0.11654853820800781,
      "step": 23504
    },
    {
      "epoch": 0.00014345703125,
      "step": 23504,
      "training_step_time": 0.40558886528015137
    },
    {
      "epoch": 0.000143463134765625,
      "model_forward_time": 0.11601877212524414,
      "step": 23505
    },
    {
      "epoch": 0.000143463134765625,
      "step": 23505,
      "training_step_time": 0.40468859672546387
    },
    {
      "epoch": 0.00014346923828125,
      "model_forward_time": 0.11587929725646973,
      "step": 23506
    },
    {
      "epoch": 0.00014346923828125,
      "step": 23506,
      "training_step_time": 0.39528632164001465
    },
    {
      "epoch": 0.000143475341796875,
      "model_forward_time": 0.11603617668151855,
      "step": 23507
    },
    {
      "epoch": 0.000143475341796875,
      "step": 23507,
      "training_step_time": 0.4350268840789795
    },
    {
      "epoch": 0.0001434814453125,
      "model_forward_time": 0.11591553688049316,
      "step": 23508
    },
    {
      "epoch": 0.0001434814453125,
      "step": 23508,
      "training_step_time": 0.5109589099884033
    },
    {
      "epoch": 0.000143487548828125,
      "model_forward_time": 0.11591291427612305,
      "step": 23509
    },
    {
      "epoch": 0.000143487548828125,
      "step": 23509,
      "training_step_time": 0.40650010108947754
    },
    {
      "epoch": 0.00014349365234375,
      "grad_norm": 0.15051014721393585,
      "learning_rate": 7.131389641383639e-05,
      "loss": 0.0499,
      "step": 23510
    },
    {
      "epoch": 0.00014349365234375,
      "model_forward_time": 0.11548733711242676,
      "step": 23510
    },
    {
      "epoch": 0.00014349365234375,
      "step": 23510,
      "training_step_time": 0.43509459495544434
    },
    {
      "epoch": 0.000143499755859375,
      "model_forward_time": 0.11512970924377441,
      "step": 23511
    },
    {
      "epoch": 0.000143499755859375,
      "step": 23511,
      "training_step_time": 0.43010663986206055
    },
    {
      "epoch": 0.000143505859375,
      "model_forward_time": 0.1154017448425293,
      "step": 23512
    },
    {
      "epoch": 0.000143505859375,
      "step": 23512,
      "training_step_time": 0.3924422264099121
    },
    {
      "epoch": 0.000143511962890625,
      "model_forward_time": 0.11536359786987305,
      "step": 23513
    },
    {
      "epoch": 0.000143511962890625,
      "step": 23513,
      "training_step_time": 0.39154839515686035
    },
    {
      "epoch": 0.00014351806640625,
      "model_forward_time": 0.11530804634094238,
      "step": 23514
    },
    {
      "epoch": 0.00014351806640625,
      "step": 23514,
      "training_step_time": 0.3899517059326172
    },
    {
      "epoch": 0.000143524169921875,
      "model_forward_time": 0.11465120315551758,
      "step": 23515
    },
    {
      "epoch": 0.000143524169921875,
      "step": 23515,
      "training_step_time": 0.39083147048950195
    },
    {
      "epoch": 0.0001435302734375,
      "model_forward_time": 0.11588740348815918,
      "step": 23516
    },
    {
      "epoch": 0.0001435302734375,
      "step": 23516,
      "training_step_time": 0.39478564262390137
    },
    {
      "epoch": 0.000143536376953125,
      "model_forward_time": 0.11533403396606445,
      "step": 23517
    },
    {
      "epoch": 0.000143536376953125,
      "step": 23517,
      "training_step_time": 0.3924543857574463
    },
    {
      "epoch": 0.00014354248046875,
      "model_forward_time": 0.11534953117370605,
      "step": 23518
    },
    {
      "epoch": 0.00014354248046875,
      "step": 23518,
      "training_step_time": 0.41124701499938965
    },
    {
      "epoch": 0.000143548583984375,
      "model_forward_time": 0.1152799129486084,
      "step": 23519
    },
    {
      "epoch": 0.000143548583984375,
      "step": 23519,
      "training_step_time": 0.4061570167541504
    },
    {
      "epoch": 0.0001435546875,
      "grad_norm": 0.12290427088737488,
      "learning_rate": 7.128896457825364e-05,
      "loss": 0.0558,
      "step": 23520
    },
    {
      "epoch": 0.0001435546875,
      "model_forward_time": 0.11576008796691895,
      "step": 23520
    },
    {
      "epoch": 0.0001435546875,
      "step": 23520,
      "training_step_time": 0.40470457077026367
    },
    {
      "epoch": 0.000143560791015625,
      "model_forward_time": 0.11527466773986816,
      "step": 23521
    },
    {
      "epoch": 0.000143560791015625,
      "step": 23521,
      "training_step_time": 0.5031147003173828
    },
    {
      "epoch": 0.00014356689453125,
      "model_forward_time": 0.11494731903076172,
      "step": 23522
    },
    {
      "epoch": 0.00014356689453125,
      "step": 23522,
      "training_step_time": 0.4300825595855713
    },
    {
      "epoch": 0.000143572998046875,
      "model_forward_time": 0.11523795127868652,
      "step": 23523
    },
    {
      "epoch": 0.000143572998046875,
      "step": 23523,
      "training_step_time": 0.48780083656311035
    },
    {
      "epoch": 0.0001435791015625,
      "model_forward_time": 0.11447572708129883,
      "step": 23524
    },
    {
      "epoch": 0.0001435791015625,
      "step": 23524,
      "training_step_time": 0.45102930068969727
    },
    {
      "epoch": 0.000143585205078125,
      "model_forward_time": 0.11451435089111328,
      "step": 23525
    },
    {
      "epoch": 0.000143585205078125,
      "step": 23525,
      "training_step_time": 0.3945732116699219
    },
    {
      "epoch": 0.00014359130859375,
      "model_forward_time": 0.1153254508972168,
      "step": 23526
    },
    {
      "epoch": 0.00014359130859375,
      "step": 23526,
      "training_step_time": 0.4028816223144531
    },
    {
      "epoch": 0.000143597412109375,
      "model_forward_time": 0.11500120162963867,
      "step": 23527
    },
    {
      "epoch": 0.000143597412109375,
      "step": 23527,
      "training_step_time": 0.39719152450561523
    },
    {
      "epoch": 0.000143603515625,
      "model_forward_time": 0.11544919013977051,
      "step": 23528
    },
    {
      "epoch": 0.000143603515625,
      "step": 23528,
      "training_step_time": 0.394700288772583
    },
    {
      "epoch": 0.000143609619140625,
      "model_forward_time": 0.1157522201538086,
      "step": 23529
    },
    {
      "epoch": 0.000143609619140625,
      "step": 23529,
      "training_step_time": 0.4058823585510254
    },
    {
      "epoch": 0.00014361572265625,
      "grad_norm": 0.15598934888839722,
      "learning_rate": 7.126402627564555e-05,
      "loss": 0.0481,
      "step": 23530
    },
    {
      "epoch": 0.00014361572265625,
      "model_forward_time": 0.11559557914733887,
      "step": 23530
    },
    {
      "epoch": 0.00014361572265625,
      "step": 23530,
      "training_step_time": 0.3930375576019287
    },
    {
      "epoch": 0.000143621826171875,
      "model_forward_time": 0.11583209037780762,
      "step": 23531
    },
    {
      "epoch": 0.000143621826171875,
      "step": 23531,
      "training_step_time": 0.40546703338623047
    },
    {
      "epoch": 0.0001436279296875,
      "model_forward_time": 0.11541318893432617,
      "step": 23532
    },
    {
      "epoch": 0.0001436279296875,
      "step": 23532,
      "training_step_time": 0.3962416648864746
    },
    {
      "epoch": 0.000143634033203125,
      "model_forward_time": 0.11524677276611328,
      "step": 23533
    },
    {
      "epoch": 0.000143634033203125,
      "step": 23533,
      "training_step_time": 0.409496545791626
    },
    {
      "epoch": 0.00014364013671875,
      "model_forward_time": 0.11591768264770508,
      "step": 23534
    },
    {
      "epoch": 0.00014364013671875,
      "step": 23534,
      "training_step_time": 0.38773345947265625
    },
    {
      "epoch": 0.000143646240234375,
      "model_forward_time": 0.11650514602661133,
      "step": 23535
    },
    {
      "epoch": 0.000143646240234375,
      "step": 23535,
      "training_step_time": 0.3939487934112549
    },
    {
      "epoch": 0.00014365234375,
      "model_forward_time": 0.11545658111572266,
      "step": 23536
    },
    {
      "epoch": 0.00014365234375,
      "step": 23536,
      "training_step_time": 0.4356093406677246
    },
    {
      "epoch": 0.000143658447265625,
      "model_forward_time": 0.11481118202209473,
      "step": 23537
    },
    {
      "epoch": 0.000143658447265625,
      "step": 23537,
      "training_step_time": 0.4277913570404053
    },
    {
      "epoch": 0.00014366455078125,
      "model_forward_time": 0.1147298812866211,
      "step": 23538
    },
    {
      "epoch": 0.00014366455078125,
      "step": 23538,
      "training_step_time": 0.4653322696685791
    },
    {
      "epoch": 0.000143670654296875,
      "model_forward_time": 0.11611795425415039,
      "step": 23539
    },
    {
      "epoch": 0.000143670654296875,
      "step": 23539,
      "training_step_time": 0.4439513683319092
    },
    {
      "epoch": 0.0001436767578125,
      "grad_norm": 0.16390152275562286,
      "learning_rate": 7.12390815135877e-05,
      "loss": 0.0536,
      "step": 23540
    },
    {
      "epoch": 0.0001436767578125,
      "model_forward_time": 0.11506795883178711,
      "step": 23540
    },
    {
      "epoch": 0.0001436767578125,
      "step": 23540,
      "training_step_time": 0.42565417289733887
    },
    {
      "epoch": 0.000143682861328125,
      "model_forward_time": 0.11518025398254395,
      "step": 23541
    },
    {
      "epoch": 0.000143682861328125,
      "step": 23541,
      "training_step_time": 0.38256049156188965
    },
    {
      "epoch": 0.00014368896484375,
      "model_forward_time": 0.11536908149719238,
      "step": 23542
    },
    {
      "epoch": 0.00014368896484375,
      "step": 23542,
      "training_step_time": 0.392132043838501
    },
    {
      "epoch": 0.000143695068359375,
      "model_forward_time": 0.11775803565979004,
      "step": 23543
    },
    {
      "epoch": 0.000143695068359375,
      "step": 23543,
      "training_step_time": 0.3938100337982178
    },
    {
      "epoch": 0.000143701171875,
      "model_forward_time": 0.11482572555541992,
      "step": 23544
    },
    {
      "epoch": 0.000143701171875,
      "step": 23544,
      "training_step_time": 0.39775872230529785
    },
    {
      "epoch": 0.000143707275390625,
      "model_forward_time": 0.11564040184020996,
      "step": 23545
    },
    {
      "epoch": 0.000143707275390625,
      "step": 23545,
      "training_step_time": 0.39519762992858887
    },
    {
      "epoch": 0.00014371337890625,
      "model_forward_time": 0.11529231071472168,
      "step": 23546
    },
    {
      "epoch": 0.00014371337890625,
      "step": 23546,
      "training_step_time": 0.39642834663391113
    },
    {
      "epoch": 0.000143719482421875,
      "model_forward_time": 0.11593151092529297,
      "step": 23547
    },
    {
      "epoch": 0.000143719482421875,
      "step": 23547,
      "training_step_time": 0.4106118679046631
    },
    {
      "epoch": 0.0001437255859375,
      "model_forward_time": 0.11529040336608887,
      "step": 23548
    },
    {
      "epoch": 0.0001437255859375,
      "step": 23548,
      "training_step_time": 0.3920609951019287
    },
    {
      "epoch": 0.000143731689453125,
      "model_forward_time": 0.11574482917785645,
      "step": 23549
    },
    {
      "epoch": 0.000143731689453125,
      "step": 23549,
      "training_step_time": 0.387636661529541
    },
    {
      "epoch": 0.00014373779296875,
      "grad_norm": 0.1557324081659317,
      "learning_rate": 7.121413029965769e-05,
      "loss": 0.0443,
      "step": 23550
    },
    {
      "epoch": 0.00014373779296875,
      "model_forward_time": 0.11556029319763184,
      "step": 23550
    },
    {
      "epoch": 0.00014373779296875,
      "step": 23550,
      "training_step_time": 0.4088277816772461
    },
    {
      "epoch": 0.000143743896484375,
      "model_forward_time": 0.11619806289672852,
      "step": 23551
    },
    {
      "epoch": 0.000143743896484375,
      "step": 23551,
      "training_step_time": 0.38967108726501465
    },
    {
      "epoch": 0.00014375,
      "model_forward_time": 0.11552572250366211,
      "step": 23552
    },
    {
      "epoch": 0.00014375,
      "step": 23552,
      "training_step_time": 0.45225071907043457
    },
    {
      "epoch": 0.000143756103515625,
      "model_forward_time": 0.114959716796875,
      "step": 23553
    },
    {
      "epoch": 0.000143756103515625,
      "step": 23553,
      "training_step_time": 0.4510493278503418
    },
    {
      "epoch": 0.00014376220703125,
      "model_forward_time": 0.11476874351501465,
      "step": 23554
    },
    {
      "epoch": 0.00014376220703125,
      "step": 23554,
      "training_step_time": 0.39980387687683105
    },
    {
      "epoch": 0.000143768310546875,
      "model_forward_time": 0.11450529098510742,
      "step": 23555
    },
    {
      "epoch": 0.000143768310546875,
      "step": 23555,
      "training_step_time": 0.402606725692749
    },
    {
      "epoch": 0.0001437744140625,
      "model_forward_time": 0.11491894721984863,
      "step": 23556
    },
    {
      "epoch": 0.0001437744140625,
      "step": 23556,
      "training_step_time": 0.40008974075317383
    },
    {
      "epoch": 0.000143780517578125,
      "model_forward_time": 0.11582612991333008,
      "step": 23557
    },
    {
      "epoch": 0.000143780517578125,
      "step": 23557,
      "training_step_time": 0.39526963233947754
    },
    {
      "epoch": 0.00014378662109375,
      "model_forward_time": 0.11536431312561035,
      "step": 23558
    },
    {
      "epoch": 0.00014378662109375,
      "step": 23558,
      "training_step_time": 0.3870816230773926
    },
    {
      "epoch": 0.000143792724609375,
      "model_forward_time": 0.11512064933776855,
      "step": 23559
    },
    {
      "epoch": 0.000143792724609375,
      "step": 23559,
      "training_step_time": 0.37555909156799316
    },
    {
      "epoch": 0.000143798828125,
      "grad_norm": 0.14504064619541168,
      "learning_rate": 7.118917264143501e-05,
      "loss": 0.0482,
      "step": 23560
    },
    {
      "epoch": 0.000143798828125,
      "model_forward_time": 0.11522936820983887,
      "step": 23560
    },
    {
      "epoch": 0.000143798828125,
      "step": 23560,
      "training_step_time": 0.3926270008087158
    },
    {
      "epoch": 0.000143804931640625,
      "model_forward_time": 0.11562633514404297,
      "step": 23561
    },
    {
      "epoch": 0.000143804931640625,
      "step": 23561,
      "training_step_time": 0.40215158462524414
    },
    {
      "epoch": 0.00014381103515625,
      "model_forward_time": 0.11536979675292969,
      "step": 23562
    },
    {
      "epoch": 0.00014381103515625,
      "step": 23562,
      "training_step_time": 0.396533727645874
    },
    {
      "epoch": 0.000143817138671875,
      "model_forward_time": 0.11549139022827148,
      "step": 23563
    },
    {
      "epoch": 0.000143817138671875,
      "step": 23563,
      "training_step_time": 0.4007439613342285
    },
    {
      "epoch": 0.0001438232421875,
      "model_forward_time": 0.1154487133026123,
      "step": 23564
    },
    {
      "epoch": 0.0001438232421875,
      "step": 23564,
      "training_step_time": 0.38749051094055176
    },
    {
      "epoch": 0.000143829345703125,
      "model_forward_time": 0.11821389198303223,
      "step": 23565
    },
    {
      "epoch": 0.000143829345703125,
      "step": 23565,
      "training_step_time": 0.3936331272125244
    },
    {
      "epoch": 0.00014383544921875,
      "model_forward_time": 0.11536836624145508,
      "step": 23566
    },
    {
      "epoch": 0.00014383544921875,
      "step": 23566,
      "training_step_time": 0.369931697845459
    },
    {
      "epoch": 0.000143841552734375,
      "model_forward_time": 0.11548042297363281,
      "step": 23567
    },
    {
      "epoch": 0.000143841552734375,
      "step": 23567,
      "training_step_time": 0.5087714195251465
    },
    {
      "epoch": 0.00014384765625,
      "model_forward_time": 0.11536979675292969,
      "step": 23568
    },
    {
      "epoch": 0.00014384765625,
      "step": 23568,
      "training_step_time": 0.48599863052368164
    },
    {
      "epoch": 0.000143853759765625,
      "model_forward_time": 0.11570501327514648,
      "step": 23569
    },
    {
      "epoch": 0.000143853759765625,
      "step": 23569,
      "training_step_time": 0.4040970802307129
    },
    {
      "epoch": 0.00014385986328125,
      "grad_norm": 0.1652332991361618,
      "learning_rate": 7.116420854650117e-05,
      "loss": 0.0508,
      "step": 23570
    },
    {
      "epoch": 0.00014385986328125,
      "model_forward_time": 0.1144721508026123,
      "step": 23570
    },
    {
      "epoch": 0.00014385986328125,
      "step": 23570,
      "training_step_time": 0.45218873023986816
    },
    {
      "epoch": 0.000143865966796875,
      "model_forward_time": 0.11523103713989258,
      "step": 23571
    },
    {
      "epoch": 0.000143865966796875,
      "step": 23571,
      "training_step_time": 0.3844335079193115
    },
    {
      "epoch": 0.0001438720703125,
      "model_forward_time": 0.11584973335266113,
      "step": 23572
    },
    {
      "epoch": 0.0001438720703125,
      "step": 23572,
      "training_step_time": 0.4031190872192383
    },
    {
      "epoch": 0.000143878173828125,
      "model_forward_time": 0.11564755439758301,
      "step": 23573
    },
    {
      "epoch": 0.000143878173828125,
      "step": 23573,
      "training_step_time": 0.38795018196105957
    },
    {
      "epoch": 0.00014388427734375,
      "model_forward_time": 0.11513495445251465,
      "step": 23574
    },
    {
      "epoch": 0.00014388427734375,
      "step": 23574,
      "training_step_time": 0.39225053787231445
    },
    {
      "epoch": 0.000143890380859375,
      "model_forward_time": 0.11549043655395508,
      "step": 23575
    },
    {
      "epoch": 0.000143890380859375,
      "step": 23575,
      "training_step_time": 0.3967912197113037
    },
    {
      "epoch": 0.000143896484375,
      "model_forward_time": 0.11526870727539062,
      "step": 23576
    },
    {
      "epoch": 0.000143896484375,
      "step": 23576,
      "training_step_time": 0.3902153968811035
    },
    {
      "epoch": 0.000143902587890625,
      "model_forward_time": 0.1151275634765625,
      "step": 23577
    },
    {
      "epoch": 0.000143902587890625,
      "step": 23577,
      "training_step_time": 0.38922953605651855
    },
    {
      "epoch": 0.00014390869140625,
      "model_forward_time": 0.11572909355163574,
      "step": 23578
    },
    {
      "epoch": 0.00014390869140625,
      "step": 23578,
      "training_step_time": 0.39220476150512695
    },
    {
      "epoch": 0.000143914794921875,
      "model_forward_time": 0.11495113372802734,
      "step": 23579
    },
    {
      "epoch": 0.000143914794921875,
      "step": 23579,
      "training_step_time": 0.6123363971710205
    },
    {
      "epoch": 0.0001439208984375,
      "grad_norm": 0.1715744286775589,
      "learning_rate": 7.113923802243957e-05,
      "loss": 0.0499,
      "step": 23580
    },
    {
      "epoch": 0.0001439208984375,
      "model_forward_time": 0.11442399024963379,
      "step": 23580
    },
    {
      "epoch": 0.0001439208984375,
      "step": 23580,
      "training_step_time": 0.3658173084259033
    },
    {
      "epoch": 0.000143927001953125,
      "model_forward_time": 0.11495685577392578,
      "step": 23581
    },
    {
      "epoch": 0.000143927001953125,
      "step": 23581,
      "training_step_time": 0.406536340713501
    },
    {
      "epoch": 0.00014393310546875,
      "model_forward_time": 0.11631989479064941,
      "step": 23582
    },
    {
      "epoch": 0.00014393310546875,
      "step": 23582,
      "training_step_time": 0.4959995746612549
    },
    {
      "epoch": 0.000143939208984375,
      "model_forward_time": 0.11521720886230469,
      "step": 23583
    },
    {
      "epoch": 0.000143939208984375,
      "step": 23583,
      "training_step_time": 0.4044458866119385
    },
    {
      "epoch": 0.0001439453125,
      "model_forward_time": 0.11468005180358887,
      "step": 23584
    },
    {
      "epoch": 0.0001439453125,
      "step": 23584,
      "training_step_time": 0.5215580463409424
    },
    {
      "epoch": 0.000143951416015625,
      "model_forward_time": 0.11456918716430664,
      "step": 23585
    },
    {
      "epoch": 0.000143951416015625,
      "step": 23585,
      "training_step_time": 0.3889493942260742
    },
    {
      "epoch": 0.00014395751953125,
      "model_forward_time": 0.11454653739929199,
      "step": 23586
    },
    {
      "epoch": 0.00014395751953125,
      "step": 23586,
      "training_step_time": 0.39464426040649414
    },
    {
      "epoch": 0.000143963623046875,
      "model_forward_time": 0.11699342727661133,
      "step": 23587
    },
    {
      "epoch": 0.000143963623046875,
      "step": 23587,
      "training_step_time": 0.39646410942077637
    },
    {
      "epoch": 0.0001439697265625,
      "model_forward_time": 0.11486101150512695,
      "step": 23588
    },
    {
      "epoch": 0.0001439697265625,
      "step": 23588,
      "training_step_time": 0.41039013862609863
    },
    {
      "epoch": 0.000143975830078125,
      "model_forward_time": 0.11615300178527832,
      "step": 23589
    },
    {
      "epoch": 0.000143975830078125,
      "step": 23589,
      "training_step_time": 0.3830082416534424
    },
    {
      "epoch": 0.00014398193359375,
      "grad_norm": 0.16794709861278534,
      "learning_rate": 7.11142610768356e-05,
      "loss": 0.0532,
      "step": 23590
    },
    {
      "epoch": 0.00014398193359375,
      "model_forward_time": 0.11506915092468262,
      "step": 23590
    },
    {
      "epoch": 0.00014398193359375,
      "step": 23590,
      "training_step_time": 0.3936581611633301
    },
    {
      "epoch": 0.000143988037109375,
      "model_forward_time": 0.1157371997833252,
      "step": 23591
    },
    {
      "epoch": 0.000143988037109375,
      "step": 23591,
      "training_step_time": 0.4783322811126709
    },
    {
      "epoch": 0.000143994140625,
      "model_forward_time": 0.11519575119018555,
      "step": 23592
    },
    {
      "epoch": 0.000143994140625,
      "step": 23592,
      "training_step_time": 0.3985605239868164
    },
    {
      "epoch": 0.000144000244140625,
      "model_forward_time": 0.1146695613861084,
      "step": 23593
    },
    {
      "epoch": 0.000144000244140625,
      "step": 23593,
      "training_step_time": 0.3960275650024414
    },
    {
      "epoch": 0.00014400634765625,
      "model_forward_time": 0.11546158790588379,
      "step": 23594
    },
    {
      "epoch": 0.00014400634765625,
      "step": 23594,
      "training_step_time": 0.4633350372314453
    },
    {
      "epoch": 0.000144012451171875,
      "model_forward_time": 0.11516952514648438,
      "step": 23595
    },
    {
      "epoch": 0.000144012451171875,
      "step": 23595,
      "training_step_time": 0.504755973815918
    },
    {
      "epoch": 0.0001440185546875,
      "model_forward_time": 0.11491966247558594,
      "step": 23596
    },
    {
      "epoch": 0.0001440185546875,
      "step": 23596,
      "training_step_time": 0.4442439079284668
    },
    {
      "epoch": 0.000144024658203125,
      "model_forward_time": 0.11440563201904297,
      "step": 23597
    },
    {
      "epoch": 0.000144024658203125,
      "step": 23597,
      "training_step_time": 0.48776817321777344
    },
    {
      "epoch": 0.00014403076171875,
      "model_forward_time": 0.11440801620483398,
      "step": 23598
    },
    {
      "epoch": 0.00014403076171875,
      "step": 23598,
      "training_step_time": 0.4551529884338379
    },
    {
      "epoch": 0.000144036865234375,
      "model_forward_time": 0.11438536643981934,
      "step": 23599
    },
    {
      "epoch": 0.000144036865234375,
      "step": 23599,
      "training_step_time": 0.4320666790008545
    },
    {
      "epoch": 0.00014404296875,
      "grad_norm": 0.16240638494491577,
      "learning_rate": 7.108927771727661e-05,
      "loss": 0.0511,
      "step": 23600
    },
    {
      "epoch": 0.00014404296875,
      "model_forward_time": 0.11374664306640625,
      "step": 23600
    },
    {
      "epoch": 0.00014404296875,
      "step": 23600,
      "training_step_time": 0.3978292942047119
    },
    {
      "epoch": 0.000144049072265625,
      "model_forward_time": 0.11463665962219238,
      "step": 23601
    },
    {
      "epoch": 0.000144049072265625,
      "step": 23601,
      "training_step_time": 0.38875889778137207
    },
    {
      "epoch": 0.00014405517578125,
      "model_forward_time": 0.11509585380554199,
      "step": 23602
    },
    {
      "epoch": 0.00014405517578125,
      "step": 23602,
      "training_step_time": 0.40692663192749023
    },
    {
      "epoch": 0.000144061279296875,
      "model_forward_time": 0.11527848243713379,
      "step": 23603
    },
    {
      "epoch": 0.000144061279296875,
      "step": 23603,
      "training_step_time": 0.39441442489624023
    },
    {
      "epoch": 0.0001440673828125,
      "model_forward_time": 0.11580467224121094,
      "step": 23604
    },
    {
      "epoch": 0.0001440673828125,
      "step": 23604,
      "training_step_time": 0.3952908515930176
    },
    {
      "epoch": 0.000144073486328125,
      "model_forward_time": 0.11496663093566895,
      "step": 23605
    },
    {
      "epoch": 0.000144073486328125,
      "step": 23605,
      "training_step_time": 0.39273881912231445
    },
    {
      "epoch": 0.00014407958984375,
      "model_forward_time": 0.11543583869934082,
      "step": 23606
    },
    {
      "epoch": 0.00014407958984375,
      "step": 23606,
      "training_step_time": 0.5162715911865234
    },
    {
      "epoch": 0.000144085693359375,
      "model_forward_time": 0.11570549011230469,
      "step": 23607
    },
    {
      "epoch": 0.000144085693359375,
      "step": 23607,
      "training_step_time": 0.42299532890319824
    },
    {
      "epoch": 0.000144091796875,
      "model_forward_time": 0.11530470848083496,
      "step": 23608
    },
    {
      "epoch": 0.000144091796875,
      "step": 23608,
      "training_step_time": 0.4439713954925537
    },
    {
      "epoch": 0.000144097900390625,
      "model_forward_time": 0.11462092399597168,
      "step": 23609
    },
    {
      "epoch": 0.000144097900390625,
      "step": 23609,
      "training_step_time": 0.4937913417816162
    },
    {
      "epoch": 0.00014410400390625,
      "grad_norm": 0.14980119466781616,
      "learning_rate": 7.10642879513519e-05,
      "loss": 0.0526,
      "step": 23610
    },
    {
      "epoch": 0.00014410400390625,
      "model_forward_time": 0.1156611442565918,
      "step": 23610
    },
    {
      "epoch": 0.00014410400390625,
      "step": 23610,
      "training_step_time": 0.4830589294433594
    },
    {
      "epoch": 0.000144110107421875,
      "model_forward_time": 0.1149132251739502,
      "step": 23611
    },
    {
      "epoch": 0.000144110107421875,
      "step": 23611,
      "training_step_time": 0.4551424980163574
    },
    {
      "epoch": 0.0001441162109375,
      "model_forward_time": 0.11474061012268066,
      "step": 23612
    },
    {
      "epoch": 0.0001441162109375,
      "step": 23612,
      "training_step_time": 0.5331904888153076
    },
    {
      "epoch": 0.000144122314453125,
      "model_forward_time": 0.11409902572631836,
      "step": 23613
    },
    {
      "epoch": 0.000144122314453125,
      "step": 23613,
      "training_step_time": 0.42658519744873047
    },
    {
      "epoch": 0.00014412841796875,
      "model_forward_time": 0.11516284942626953,
      "step": 23614
    },
    {
      "epoch": 0.00014412841796875,
      "step": 23614,
      "training_step_time": 0.38738012313842773
    },
    {
      "epoch": 0.000144134521484375,
      "model_forward_time": 0.1151885986328125,
      "step": 23615
    },
    {
      "epoch": 0.000144134521484375,
      "step": 23615,
      "training_step_time": 0.3935091495513916
    },
    {
      "epoch": 0.000144140625,
      "model_forward_time": 0.11440086364746094,
      "step": 23616
    },
    {
      "epoch": 0.000144140625,
      "step": 23616,
      "training_step_time": 0.390047550201416
    },
    {
      "epoch": 0.000144146728515625,
      "model_forward_time": 0.11561107635498047,
      "step": 23617
    },
    {
      "epoch": 0.000144146728515625,
      "step": 23617,
      "training_step_time": 0.3924696445465088
    },
    {
      "epoch": 0.00014415283203125,
      "model_forward_time": 0.11525487899780273,
      "step": 23618
    },
    {
      "epoch": 0.00014415283203125,
      "step": 23618,
      "training_step_time": 0.5907948017120361
    },
    {
      "epoch": 0.000144158935546875,
      "model_forward_time": 0.1146237850189209,
      "step": 23619
    },
    {
      "epoch": 0.000144158935546875,
      "step": 23619,
      "training_step_time": 0.38690662384033203
    },
    {
      "epoch": 0.0001441650390625,
      "grad_norm": 0.11344710737466812,
      "learning_rate": 7.103929178665266e-05,
      "loss": 0.0529,
      "step": 23620
    },
    {
      "epoch": 0.0001441650390625,
      "model_forward_time": 0.11508440971374512,
      "step": 23620
    },
    {
      "epoch": 0.0001441650390625,
      "step": 23620,
      "training_step_time": 0.39656782150268555
    },
    {
      "epoch": 0.000144171142578125,
      "model_forward_time": 0.11525940895080566,
      "step": 23621
    },
    {
      "epoch": 0.000144171142578125,
      "step": 23621,
      "training_step_time": 0.42331814765930176
    },
    {
      "epoch": 0.00014417724609375,
      "model_forward_time": 0.11501574516296387,
      "step": 23622
    },
    {
      "epoch": 0.00014417724609375,
      "step": 23622,
      "training_step_time": 0.47843170166015625
    },
    {
      "epoch": 0.000144183349609375,
      "model_forward_time": 0.1148381233215332,
      "step": 23623
    },
    {
      "epoch": 0.000144183349609375,
      "step": 23623,
      "training_step_time": 0.4806675910949707
    },
    {
      "epoch": 0.000144189453125,
      "model_forward_time": 0.11492729187011719,
      "step": 23624
    },
    {
      "epoch": 0.000144189453125,
      "step": 23624,
      "training_step_time": 0.5878686904907227
    },
    {
      "epoch": 0.000144195556640625,
      "model_forward_time": 0.11493468284606934,
      "step": 23625
    },
    {
      "epoch": 0.000144195556640625,
      "step": 23625,
      "training_step_time": 0.46785449981689453
    },
    {
      "epoch": 0.00014420166015625,
      "model_forward_time": 0.11438941955566406,
      "step": 23626
    },
    {
      "epoch": 0.00014420166015625,
      "step": 23626,
      "training_step_time": 0.46410655975341797
    },
    {
      "epoch": 0.000144207763671875,
      "model_forward_time": 0.1143043041229248,
      "step": 23627
    },
    {
      "epoch": 0.000144207763671875,
      "step": 23627,
      "training_step_time": 0.5047729015350342
    },
    {
      "epoch": 0.0001442138671875,
      "model_forward_time": 0.11458778381347656,
      "step": 23628
    },
    {
      "epoch": 0.0001442138671875,
      "step": 23628,
      "training_step_time": 0.37932896614074707
    },
    {
      "epoch": 0.000144219970703125,
      "model_forward_time": 0.11479520797729492,
      "step": 23629
    },
    {
      "epoch": 0.000144219970703125,
      "step": 23629,
      "training_step_time": 0.38904738426208496
    },
    {
      "epoch": 0.00014422607421875,
      "grad_norm": 0.11296574771404266,
      "learning_rate": 7.101428923077209e-05,
      "loss": 0.048,
      "step": 23630
    },
    {
      "epoch": 0.00014422607421875,
      "model_forward_time": 0.11479401588439941,
      "step": 23630
    },
    {
      "epoch": 0.00014422607421875,
      "step": 23630,
      "training_step_time": 0.5298023223876953
    },
    {
      "epoch": 0.000144232177734375,
      "model_forward_time": 0.11505579948425293,
      "step": 23631
    },
    {
      "epoch": 0.000144232177734375,
      "step": 23631,
      "training_step_time": 0.3954625129699707
    },
    {
      "epoch": 0.00014423828125,
      "model_forward_time": 0.11509490013122559,
      "step": 23632
    },
    {
      "epoch": 0.00014423828125,
      "step": 23632,
      "training_step_time": 0.4041299819946289
    },
    {
      "epoch": 0.000144244384765625,
      "model_forward_time": 0.11527228355407715,
      "step": 23633
    },
    {
      "epoch": 0.000144244384765625,
      "step": 23633,
      "training_step_time": 0.43445348739624023
    },
    {
      "epoch": 0.00014425048828125,
      "model_forward_time": 0.11538505554199219,
      "step": 23634
    },
    {
      "epoch": 0.00014425048828125,
      "step": 23634,
      "training_step_time": 0.4143030643463135
    },
    {
      "epoch": 0.000144256591796875,
      "model_forward_time": 0.1151115894317627,
      "step": 23635
    },
    {
      "epoch": 0.000144256591796875,
      "step": 23635,
      "training_step_time": 0.4028463363647461
    },
    {
      "epoch": 0.0001442626953125,
      "model_forward_time": 0.11519622802734375,
      "step": 23636
    },
    {
      "epoch": 0.0001442626953125,
      "step": 23636,
      "training_step_time": 0.49121546745300293
    },
    {
      "epoch": 0.000144268798828125,
      "model_forward_time": 0.11522459983825684,
      "step": 23637
    },
    {
      "epoch": 0.000144268798828125,
      "step": 23637,
      "training_step_time": 0.4107365608215332
    },
    {
      "epoch": 0.00014427490234375,
      "model_forward_time": 0.11527395248413086,
      "step": 23638
    },
    {
      "epoch": 0.00014427490234375,
      "step": 23638,
      "training_step_time": 0.4011087417602539
    },
    {
      "epoch": 0.000144281005859375,
      "model_forward_time": 0.11504578590393066,
      "step": 23639
    },
    {
      "epoch": 0.000144281005859375,
      "step": 23639,
      "training_step_time": 0.4412379264831543
    },
    {
      "epoch": 0.000144287109375,
      "grad_norm": 0.13104547560214996,
      "learning_rate": 7.09892802913053e-05,
      "loss": 0.0567,
      "step": 23640
    },
    {
      "epoch": 0.000144287109375,
      "model_forward_time": 0.11498475074768066,
      "step": 23640
    },
    {
      "epoch": 0.000144287109375,
      "step": 23640,
      "training_step_time": 0.45484042167663574
    },
    {
      "epoch": 0.000144293212890625,
      "model_forward_time": 0.11489534378051758,
      "step": 23641
    },
    {
      "epoch": 0.000144293212890625,
      "step": 23641,
      "training_step_time": 0.4074568748474121
    },
    {
      "epoch": 0.00014429931640625,
      "model_forward_time": 0.11466336250305176,
      "step": 23642
    },
    {
      "epoch": 0.00014429931640625,
      "step": 23642,
      "training_step_time": 0.38680219650268555
    },
    {
      "epoch": 0.000144305419921875,
      "model_forward_time": 0.11545205116271973,
      "step": 23643
    },
    {
      "epoch": 0.000144305419921875,
      "step": 23643,
      "training_step_time": 0.3846104145050049
    },
    {
      "epoch": 0.0001443115234375,
      "model_forward_time": 0.11458516120910645,
      "step": 23644
    },
    {
      "epoch": 0.0001443115234375,
      "step": 23644,
      "training_step_time": 0.38939952850341797
    },
    {
      "epoch": 0.000144317626953125,
      "model_forward_time": 0.1154627799987793,
      "step": 23645
    },
    {
      "epoch": 0.000144317626953125,
      "step": 23645,
      "training_step_time": 0.399738073348999
    },
    {
      "epoch": 0.00014432373046875,
      "model_forward_time": 0.11471772193908691,
      "step": 23646
    },
    {
      "epoch": 0.00014432373046875,
      "step": 23646,
      "training_step_time": 0.41669487953186035
    },
    {
      "epoch": 0.000144329833984375,
      "model_forward_time": 0.11538004875183105,
      "step": 23647
    },
    {
      "epoch": 0.000144329833984375,
      "step": 23647,
      "training_step_time": 0.42529726028442383
    },
    {
      "epoch": 0.0001443359375,
      "model_forward_time": 0.1157073974609375,
      "step": 23648
    },
    {
      "epoch": 0.0001443359375,
      "step": 23648,
      "training_step_time": 0.3905754089355469
    },
    {
      "epoch": 0.000144342041015625,
      "model_forward_time": 0.11523628234863281,
      "step": 23649
    },
    {
      "epoch": 0.000144342041015625,
      "step": 23649,
      "training_step_time": 0.3932645320892334
    },
    {
      "epoch": 0.00014434814453125,
      "grad_norm": 0.170416459441185,
      "learning_rate": 7.096426497584933e-05,
      "loss": 0.0483,
      "step": 23650
    },
    {
      "epoch": 0.00014434814453125,
      "model_forward_time": 0.11492323875427246,
      "step": 23650
    },
    {
      "epoch": 0.00014434814453125,
      "step": 23650,
      "training_step_time": 0.4077303409576416
    },
    {
      "epoch": 0.000144354248046875,
      "model_forward_time": 0.11499524116516113,
      "step": 23651
    },
    {
      "epoch": 0.000144354248046875,
      "step": 23651,
      "training_step_time": 0.44943714141845703
    },
    {
      "epoch": 0.0001443603515625,
      "model_forward_time": 0.11565899848937988,
      "step": 23652
    },
    {
      "epoch": 0.0001443603515625,
      "step": 23652,
      "training_step_time": 0.3985323905944824
    },
    {
      "epoch": 0.000144366455078125,
      "model_forward_time": 0.11475491523742676,
      "step": 23653
    },
    {
      "epoch": 0.000144366455078125,
      "step": 23653,
      "training_step_time": 0.48633360862731934
    },
    {
      "epoch": 0.00014437255859375,
      "model_forward_time": 0.11531686782836914,
      "step": 23654
    },
    {
      "epoch": 0.00014437255859375,
      "step": 23654,
      "training_step_time": 0.4901769161224365
    },
    {
      "epoch": 0.000144378662109375,
      "model_forward_time": 0.11496543884277344,
      "step": 23655
    },
    {
      "epoch": 0.000144378662109375,
      "step": 23655,
      "training_step_time": 0.39397430419921875
    },
    {
      "epoch": 0.000144384765625,
      "model_forward_time": 0.11550354957580566,
      "step": 23656
    },
    {
      "epoch": 0.000144384765625,
      "step": 23656,
      "training_step_time": 0.48482775688171387
    },
    {
      "epoch": 0.000144390869140625,
      "model_forward_time": 0.1150217056274414,
      "step": 23657
    },
    {
      "epoch": 0.000144390869140625,
      "step": 23657,
      "training_step_time": 0.3857688903808594
    },
    {
      "epoch": 0.00014439697265625,
      "model_forward_time": 0.1150054931640625,
      "step": 23658
    },
    {
      "epoch": 0.00014439697265625,
      "step": 23658,
      "training_step_time": 0.38580751419067383
    },
    {
      "epoch": 0.000144403076171875,
      "model_forward_time": 0.11539912223815918,
      "step": 23659
    },
    {
      "epoch": 0.000144403076171875,
      "step": 23659,
      "training_step_time": 0.3904426097869873
    },
    {
      "epoch": 0.0001444091796875,
      "grad_norm": 0.12270268052816391,
      "learning_rate": 7.093924329200321e-05,
      "loss": 0.0488,
      "step": 23660
    },
    {
      "epoch": 0.0001444091796875,
      "model_forward_time": 0.11486601829528809,
      "step": 23660
    },
    {
      "epoch": 0.0001444091796875,
      "step": 23660,
      "training_step_time": 0.41156554222106934
    },
    {
      "epoch": 0.000144415283203125,
      "model_forward_time": 0.115325927734375,
      "step": 23661
    },
    {
      "epoch": 0.000144415283203125,
      "step": 23661,
      "training_step_time": 0.4105043411254883
    },
    {
      "epoch": 0.00014442138671875,
      "model_forward_time": 0.1159060001373291,
      "step": 23662
    },
    {
      "epoch": 0.00014442138671875,
      "step": 23662,
      "training_step_time": 0.38718461990356445
    },
    {
      "epoch": 0.000144427490234375,
      "model_forward_time": 0.11532139778137207,
      "step": 23663
    },
    {
      "epoch": 0.000144427490234375,
      "step": 23663,
      "training_step_time": 0.41486096382141113
    },
    {
      "epoch": 0.00014443359375,
      "model_forward_time": 0.11506390571594238,
      "step": 23664
    },
    {
      "epoch": 0.00014443359375,
      "step": 23664,
      "training_step_time": 0.5045826435089111
    },
    {
      "epoch": 0.000144439697265625,
      "model_forward_time": 0.11572670936584473,
      "step": 23665
    },
    {
      "epoch": 0.000144439697265625,
      "step": 23665,
      "training_step_time": 0.4903731346130371
    },
    {
      "epoch": 0.00014444580078125,
      "model_forward_time": 0.11500310897827148,
      "step": 23666
    },
    {
      "epoch": 0.00014444580078125,
      "step": 23666,
      "training_step_time": 0.47212696075439453
    },
    {
      "epoch": 0.000144451904296875,
      "model_forward_time": 0.11589956283569336,
      "step": 23667
    },
    {
      "epoch": 0.000144451904296875,
      "step": 23667,
      "training_step_time": 0.4257645606994629
    },
    {
      "epoch": 0.0001444580078125,
      "model_forward_time": 0.11533546447753906,
      "step": 23668
    },
    {
      "epoch": 0.0001444580078125,
      "step": 23668,
      "training_step_time": 0.417971134185791
    },
    {
      "epoch": 0.000144464111328125,
      "model_forward_time": 0.11487007141113281,
      "step": 23669
    },
    {
      "epoch": 0.000144464111328125,
      "step": 23669,
      "training_step_time": 0.39960598945617676
    },
    {
      "epoch": 0.00014447021484375,
      "grad_norm": 0.149044930934906,
      "learning_rate": 7.091421524736784e-05,
      "loss": 0.0527,
      "step": 23670
    },
    {
      "epoch": 0.00014447021484375,
      "model_forward_time": 0.11468911170959473,
      "step": 23670
    },
    {
      "epoch": 0.00014447021484375,
      "step": 23670,
      "training_step_time": 0.43944478034973145
    },
    {
      "epoch": 0.000144476318359375,
      "model_forward_time": 0.11505532264709473,
      "step": 23671
    },
    {
      "epoch": 0.000144476318359375,
      "step": 23671,
      "training_step_time": 0.3868434429168701
    },
    {
      "epoch": 0.000144482421875,
      "model_forward_time": 0.1150507926940918,
      "step": 23672
    },
    {
      "epoch": 0.000144482421875,
      "step": 23672,
      "training_step_time": 0.3860495090484619
    },
    {
      "epoch": 0.000144488525390625,
      "model_forward_time": 0.1150369644165039,
      "step": 23673
    },
    {
      "epoch": 0.000144488525390625,
      "step": 23673,
      "training_step_time": 0.4264955520629883
    },
    {
      "epoch": 0.00014449462890625,
      "model_forward_time": 0.1150515079498291,
      "step": 23674
    },
    {
      "epoch": 0.00014449462890625,
      "step": 23674,
      "training_step_time": 0.400618314743042
    },
    {
      "epoch": 0.000144500732421875,
      "model_forward_time": 0.11544060707092285,
      "step": 23675
    },
    {
      "epoch": 0.000144500732421875,
      "step": 23675,
      "training_step_time": 0.44904065132141113
    },
    {
      "epoch": 0.0001445068359375,
      "model_forward_time": 0.11501622200012207,
      "step": 23676
    },
    {
      "epoch": 0.0001445068359375,
      "step": 23676,
      "training_step_time": 0.3881711959838867
    },
    {
      "epoch": 0.000144512939453125,
      "model_forward_time": 0.11536502838134766,
      "step": 23677
    },
    {
      "epoch": 0.000144512939453125,
      "step": 23677,
      "training_step_time": 0.3916292190551758
    },
    {
      "epoch": 0.00014451904296875,
      "model_forward_time": 0.11498165130615234,
      "step": 23678
    },
    {
      "epoch": 0.00014451904296875,
      "step": 23678,
      "training_step_time": 0.3874239921569824
    },
    {
      "epoch": 0.000144525146484375,
      "model_forward_time": 0.11516141891479492,
      "step": 23679
    },
    {
      "epoch": 0.000144525146484375,
      "step": 23679,
      "training_step_time": 0.4243903160095215
    },
    {
      "epoch": 0.00014453125,
      "grad_norm": 0.16993139684200287,
      "learning_rate": 7.08891808495461e-05,
      "loss": 0.0526,
      "step": 23680
    },
    {
      "epoch": 0.00014453125,
      "model_forward_time": 0.11506271362304688,
      "step": 23680
    },
    {
      "epoch": 0.00014453125,
      "step": 23680,
      "training_step_time": 0.3826932907104492
    },
    {
      "epoch": 0.000144537353515625,
      "model_forward_time": 0.11528158187866211,
      "step": 23681
    },
    {
      "epoch": 0.000144537353515625,
      "step": 23681,
      "training_step_time": 0.44956183433532715
    },
    {
      "epoch": 0.00014454345703125,
      "model_forward_time": 0.11499667167663574,
      "step": 23682
    },
    {
      "epoch": 0.00014454345703125,
      "step": 23682,
      "training_step_time": 0.3959827423095703
    },
    {
      "epoch": 0.000144549560546875,
      "model_forward_time": 0.1153254508972168,
      "step": 23683
    },
    {
      "epoch": 0.000144549560546875,
      "step": 23683,
      "training_step_time": 0.4138755798339844
    },
    {
      "epoch": 0.0001445556640625,
      "model_forward_time": 0.11401534080505371,
      "step": 23684
    },
    {
      "epoch": 0.0001445556640625,
      "step": 23684,
      "training_step_time": 0.39575815200805664
    },
    {
      "epoch": 0.000144561767578125,
      "model_forward_time": 0.11534667015075684,
      "step": 23685
    },
    {
      "epoch": 0.000144561767578125,
      "step": 23685,
      "training_step_time": 0.4417917728424072
    },
    {
      "epoch": 0.00014456787109375,
      "model_forward_time": 0.11528277397155762,
      "step": 23686
    },
    {
      "epoch": 0.00014456787109375,
      "step": 23686,
      "training_step_time": 0.3834249973297119
    },
    {
      "epoch": 0.000144573974609375,
      "model_forward_time": 0.11506938934326172,
      "step": 23687
    },
    {
      "epoch": 0.000144573974609375,
      "step": 23687,
      "training_step_time": 0.5631692409515381
    },
    {
      "epoch": 0.000144580078125,
      "model_forward_time": 0.11470246315002441,
      "step": 23688
    },
    {
      "epoch": 0.000144580078125,
      "step": 23688,
      "training_step_time": 0.3852405548095703
    },
    {
      "epoch": 0.000144586181640625,
      "model_forward_time": 0.11498641967773438,
      "step": 23689
    },
    {
      "epoch": 0.000144586181640625,
      "step": 23689,
      "training_step_time": 0.39255833625793457
    },
    {
      "epoch": 0.00014459228515625,
      "grad_norm": 0.11271026730537415,
      "learning_rate": 7.086414010614276e-05,
      "loss": 0.0521,
      "step": 23690
    },
    {
      "epoch": 0.00014459228515625,
      "model_forward_time": 0.11530137062072754,
      "step": 23690
    },
    {
      "epoch": 0.00014459228515625,
      "step": 23690,
      "training_step_time": 0.39101338386535645
    },
    {
      "epoch": 0.000144598388671875,
      "model_forward_time": 0.11504006385803223,
      "step": 23691
    },
    {
      "epoch": 0.000144598388671875,
      "step": 23691,
      "training_step_time": 0.38479042053222656
    },
    {
      "epoch": 0.0001446044921875,
      "model_forward_time": 0.11536622047424316,
      "step": 23692
    },
    {
      "epoch": 0.0001446044921875,
      "step": 23692,
      "training_step_time": 0.4004173278808594
    },
    {
      "epoch": 0.000144610595703125,
      "model_forward_time": 0.11518454551696777,
      "step": 23693
    },
    {
      "epoch": 0.000144610595703125,
      "step": 23693,
      "training_step_time": 0.5720791816711426
    },
    {
      "epoch": 0.00014461669921875,
      "model_forward_time": 0.11553764343261719,
      "step": 23694
    },
    {
      "epoch": 0.00014461669921875,
      "step": 23694,
      "training_step_time": 0.44705772399902344
    },
    {
      "epoch": 0.000144622802734375,
      "model_forward_time": 0.11577320098876953,
      "step": 23695
    },
    {
      "epoch": 0.000144622802734375,
      "step": 23695,
      "training_step_time": 0.36934995651245117
    },
    {
      "epoch": 0.00014462890625,
      "model_forward_time": 0.1154031753540039,
      "step": 23696
    },
    {
      "epoch": 0.00014462890625,
      "step": 23696,
      "training_step_time": 0.44394588470458984
    },
    {
      "epoch": 0.000144635009765625,
      "model_forward_time": 0.11474847793579102,
      "step": 23697
    },
    {
      "epoch": 0.000144635009765625,
      "step": 23697,
      "training_step_time": 0.44069623947143555
    },
    {
      "epoch": 0.00014464111328125,
      "model_forward_time": 0.11474323272705078,
      "step": 23698
    },
    {
      "epoch": 0.00014464111328125,
      "step": 23698,
      "training_step_time": 0.43088674545288086
    },
    {
      "epoch": 0.000144647216796875,
      "model_forward_time": 0.11538028717041016,
      "step": 23699
    },
    {
      "epoch": 0.000144647216796875,
      "step": 23699,
      "training_step_time": 0.39003729820251465
    },
    {
      "epoch": 0.0001446533203125,
      "grad_norm": 0.1691327542066574,
      "learning_rate": 7.083909302476453e-05,
      "loss": 0.0486,
      "step": 23700
    },
    {
      "epoch": 0.0001446533203125,
      "model_forward_time": 0.11519122123718262,
      "step": 23700
    },
    {
      "epoch": 0.0001446533203125,
      "step": 23700,
      "training_step_time": 0.46936774253845215
    },
    {
      "epoch": 0.000144659423828125,
      "model_forward_time": 0.11505746841430664,
      "step": 23701
    },
    {
      "epoch": 0.000144659423828125,
      "step": 23701,
      "training_step_time": 0.3934500217437744
    },
    {
      "epoch": 0.00014466552734375,
      "model_forward_time": 0.11550164222717285,
      "step": 23702
    },
    {
      "epoch": 0.00014466552734375,
      "step": 23702,
      "training_step_time": 0.39959144592285156
    },
    {
      "epoch": 0.000144671630859375,
      "model_forward_time": 0.11495137214660645,
      "step": 23703
    },
    {
      "epoch": 0.000144671630859375,
      "step": 23703,
      "training_step_time": 0.3891112804412842
    },
    {
      "epoch": 0.000144677734375,
      "model_forward_time": 0.11481761932373047,
      "step": 23704
    },
    {
      "epoch": 0.000144677734375,
      "step": 23704,
      "training_step_time": 0.38660430908203125
    },
    {
      "epoch": 0.000144683837890625,
      "model_forward_time": 0.11549878120422363,
      "step": 23705
    },
    {
      "epoch": 0.000144683837890625,
      "step": 23705,
      "training_step_time": 0.49724793434143066
    },
    {
      "epoch": 0.00014468994140625,
      "model_forward_time": 0.11578917503356934,
      "step": 23706
    },
    {
      "epoch": 0.00014468994140625,
      "step": 23706,
      "training_step_time": 0.45369791984558105
    },
    {
      "epoch": 0.000144696044921875,
      "model_forward_time": 0.11589694023132324,
      "step": 23707
    },
    {
      "epoch": 0.000144696044921875,
      "step": 23707,
      "training_step_time": 0.4330024719238281
    },
    {
      "epoch": 0.0001447021484375,
      "model_forward_time": 0.11605501174926758,
      "step": 23708
    },
    {
      "epoch": 0.0001447021484375,
      "step": 23708,
      "training_step_time": 0.45023250579833984
    },
    {
      "epoch": 0.000144708251953125,
      "model_forward_time": 0.11472511291503906,
      "step": 23709
    },
    {
      "epoch": 0.000144708251953125,
      "step": 23709,
      "training_step_time": 0.36682963371276855
    },
    {
      "epoch": 0.00014471435546875,
      "grad_norm": 0.13313519954681396,
      "learning_rate": 7.081403961302006e-05,
      "loss": 0.0475,
      "step": 23710
    },
    {
      "epoch": 0.00014471435546875,
      "model_forward_time": 0.11535859107971191,
      "step": 23710
    },
    {
      "epoch": 0.00014471435546875,
      "step": 23710,
      "training_step_time": 0.38930773735046387
    },
    {
      "epoch": 0.000144720458984375,
      "model_forward_time": 0.11468362808227539,
      "step": 23711
    },
    {
      "epoch": 0.000144720458984375,
      "step": 23711,
      "training_step_time": 0.47946667671203613
    },
    {
      "epoch": 0.0001447265625,
      "model_forward_time": 0.11536312103271484,
      "step": 23712
    },
    {
      "epoch": 0.0001447265625,
      "step": 23712,
      "training_step_time": 0.46997857093811035
    },
    {
      "epoch": 0.000144732666015625,
      "model_forward_time": 0.11450552940368652,
      "step": 23713
    },
    {
      "epoch": 0.000144732666015625,
      "step": 23713,
      "training_step_time": 0.4534275531768799
    },
    {
      "epoch": 0.00014473876953125,
      "model_forward_time": 0.11581134796142578,
      "step": 23714
    },
    {
      "epoch": 0.00014473876953125,
      "step": 23714,
      "training_step_time": 0.3949472904205322
    },
    {
      "epoch": 0.000144744873046875,
      "model_forward_time": 0.11558222770690918,
      "step": 23715
    },
    {
      "epoch": 0.000144744873046875,
      "step": 23715,
      "training_step_time": 0.3870503902435303
    },
    {
      "epoch": 0.0001447509765625,
      "model_forward_time": 0.1149897575378418,
      "step": 23716
    },
    {
      "epoch": 0.0001447509765625,
      "step": 23716,
      "training_step_time": 0.3891634941101074
    },
    {
      "epoch": 0.000144757080078125,
      "model_forward_time": 0.11513209342956543,
      "step": 23717
    },
    {
      "epoch": 0.000144757080078125,
      "step": 23717,
      "training_step_time": 0.47246384620666504
    },
    {
      "epoch": 0.00014476318359375,
      "model_forward_time": 0.11522793769836426,
      "step": 23718
    },
    {
      "epoch": 0.00014476318359375,
      "step": 23718,
      "training_step_time": 0.38766908645629883
    },
    {
      "epoch": 0.000144769287109375,
      "model_forward_time": 0.1149134635925293,
      "step": 23719
    },
    {
      "epoch": 0.000144769287109375,
      "step": 23719,
      "training_step_time": 0.39174699783325195
    },
    {
      "epoch": 0.000144775390625,
      "grad_norm": 0.17308209836483002,
      "learning_rate": 7.078897987851993e-05,
      "loss": 0.0501,
      "step": 23720
    },
    {
      "epoch": 0.000144775390625,
      "model_forward_time": 0.11507749557495117,
      "step": 23720
    },
    {
      "epoch": 0.000144775390625,
      "step": 23720,
      "training_step_time": 0.38948535919189453
    },
    {
      "epoch": 0.000144781494140625,
      "model_forward_time": 0.11507582664489746,
      "step": 23721
    },
    {
      "epoch": 0.000144781494140625,
      "step": 23721,
      "training_step_time": 0.3992886543273926
    },
    {
      "epoch": 0.00014478759765625,
      "model_forward_time": 0.11531329154968262,
      "step": 23722
    },
    {
      "epoch": 0.00014478759765625,
      "step": 23722,
      "training_step_time": 0.44841551780700684
    },
    {
      "epoch": 0.000144793701171875,
      "model_forward_time": 0.11493396759033203,
      "step": 23723
    },
    {
      "epoch": 0.000144793701171875,
      "step": 23723,
      "training_step_time": 0.6140329837799072
    },
    {
      "epoch": 0.0001447998046875,
      "model_forward_time": 0.11487746238708496,
      "step": 23724
    },
    {
      "epoch": 0.0001447998046875,
      "step": 23724,
      "training_step_time": 0.4082920551300049
    },
    {
      "epoch": 0.000144805908203125,
      "model_forward_time": 0.11516594886779785,
      "step": 23725
    },
    {
      "epoch": 0.000144805908203125,
      "step": 23725,
      "training_step_time": 0.42244958877563477
    },
    {
      "epoch": 0.00014481201171875,
      "model_forward_time": 0.1149587631225586,
      "step": 23726
    },
    {
      "epoch": 0.00014481201171875,
      "step": 23726,
      "training_step_time": 0.44379520416259766
    },
    {
      "epoch": 0.000144818115234375,
      "model_forward_time": 0.11555314064025879,
      "step": 23727
    },
    {
      "epoch": 0.000144818115234375,
      "step": 23727,
      "training_step_time": 0.434009313583374
    },
    {
      "epoch": 0.00014482421875,
      "model_forward_time": 0.11467552185058594,
      "step": 23728
    },
    {
      "epoch": 0.00014482421875,
      "step": 23728,
      "training_step_time": 0.4036686420440674
    },
    {
      "epoch": 0.000144830322265625,
      "model_forward_time": 0.11454391479492188,
      "step": 23729
    },
    {
      "epoch": 0.000144830322265625,
      "step": 23729,
      "training_step_time": 0.6257350444793701
    },
    {
      "epoch": 0.00014483642578125,
      "grad_norm": 0.12172149866819382,
      "learning_rate": 7.076391382887661e-05,
      "loss": 0.0516,
      "step": 23730
    },
    {
      "epoch": 0.00014483642578125,
      "model_forward_time": 0.11458897590637207,
      "step": 23730
    },
    {
      "epoch": 0.00014483642578125,
      "step": 23730,
      "training_step_time": 0.39712095260620117
    },
    {
      "epoch": 0.000144842529296875,
      "model_forward_time": 0.11503362655639648,
      "step": 23731
    },
    {
      "epoch": 0.000144842529296875,
      "step": 23731,
      "training_step_time": 0.3889455795288086
    },
    {
      "epoch": 0.0001448486328125,
      "model_forward_time": 0.11451172828674316,
      "step": 23732
    },
    {
      "epoch": 0.0001448486328125,
      "step": 23732,
      "training_step_time": 0.38886451721191406
    },
    {
      "epoch": 0.000144854736328125,
      "model_forward_time": 0.11487460136413574,
      "step": 23733
    },
    {
      "epoch": 0.000144854736328125,
      "step": 23733,
      "training_step_time": 0.3926208019256592
    },
    {
      "epoch": 0.00014486083984375,
      "model_forward_time": 0.11461043357849121,
      "step": 23734
    },
    {
      "epoch": 0.00014486083984375,
      "step": 23734,
      "training_step_time": 0.45361804962158203
    },
    {
      "epoch": 0.000144866943359375,
      "model_forward_time": 0.11529779434204102,
      "step": 23735
    },
    {
      "epoch": 0.000144866943359375,
      "step": 23735,
      "training_step_time": 0.6608843803405762
    },
    {
      "epoch": 0.000144873046875,
      "model_forward_time": 0.11500930786132812,
      "step": 23736
    },
    {
      "epoch": 0.000144873046875,
      "step": 23736,
      "training_step_time": 0.38997364044189453
    },
    {
      "epoch": 0.000144879150390625,
      "model_forward_time": 0.11509943008422852,
      "step": 23737
    },
    {
      "epoch": 0.000144879150390625,
      "step": 23737,
      "training_step_time": 0.41605615615844727
    },
    {
      "epoch": 0.00014488525390625,
      "model_forward_time": 0.11464405059814453,
      "step": 23738
    },
    {
      "epoch": 0.00014488525390625,
      "step": 23738,
      "training_step_time": 0.4704010486602783
    },
    {
      "epoch": 0.000144891357421875,
      "model_forward_time": 0.11391472816467285,
      "step": 23739
    },
    {
      "epoch": 0.000144891357421875,
      "step": 23739,
      "training_step_time": 0.43976831436157227
    },
    {
      "epoch": 0.0001448974609375,
      "grad_norm": 0.13382016122341156,
      "learning_rate": 7.073884147170452e-05,
      "loss": 0.0455,
      "step": 23740
    },
    {
      "epoch": 0.0001448974609375,
      "model_forward_time": 0.11433839797973633,
      "step": 23740
    },
    {
      "epoch": 0.0001448974609375,
      "step": 23740,
      "training_step_time": 0.4196617603302002
    },
    {
      "epoch": 0.000144903564453125,
      "model_forward_time": 0.11497974395751953,
      "step": 23741
    },
    {
      "epoch": 0.000144903564453125,
      "step": 23741,
      "training_step_time": 0.4931061267852783
    },
    {
      "epoch": 0.00014490966796875,
      "model_forward_time": 0.11513829231262207,
      "step": 23742
    },
    {
      "epoch": 0.00014490966796875,
      "step": 23742,
      "training_step_time": 0.39993858337402344
    },
    {
      "epoch": 0.000144915771484375,
      "model_forward_time": 0.11422252655029297,
      "step": 23743
    },
    {
      "epoch": 0.000144915771484375,
      "step": 23743,
      "training_step_time": 0.4002211093902588
    },
    {
      "epoch": 0.000144921875,
      "model_forward_time": 0.11578893661499023,
      "step": 23744
    },
    {
      "epoch": 0.000144921875,
      "step": 23744,
      "training_step_time": 0.39571237564086914
    },
    {
      "epoch": 0.000144927978515625,
      "model_forward_time": 0.11481881141662598,
      "step": 23745
    },
    {
      "epoch": 0.000144927978515625,
      "step": 23745,
      "training_step_time": 0.38350987434387207
    },
    {
      "epoch": 0.00014493408203125,
      "model_forward_time": 0.11594104766845703,
      "step": 23746
    },
    {
      "epoch": 0.00014493408203125,
      "step": 23746,
      "training_step_time": 0.38866686820983887
    },
    {
      "epoch": 0.000144940185546875,
      "model_forward_time": 0.11570954322814941,
      "step": 23747
    },
    {
      "epoch": 0.000144940185546875,
      "step": 23747,
      "training_step_time": 0.3897111415863037
    },
    {
      "epoch": 0.0001449462890625,
      "model_forward_time": 0.11487197875976562,
      "step": 23748
    },
    {
      "epoch": 0.0001449462890625,
      "step": 23748,
      "training_step_time": 0.4765183925628662
    },
    {
      "epoch": 0.000144952392578125,
      "model_forward_time": 0.11530399322509766,
      "step": 23749
    },
    {
      "epoch": 0.000144952392578125,
      "step": 23749,
      "training_step_time": 0.42592883110046387
    },
    {
      "epoch": 0.00014495849609375,
      "grad_norm": 0.0979052409529686,
      "learning_rate": 7.071376281461994e-05,
      "loss": 0.0503,
      "step": 23750
    },
    {
      "epoch": 0.00014495849609375,
      "model_forward_time": 0.11511850357055664,
      "step": 23750
    },
    {
      "epoch": 0.00014495849609375,
      "step": 23750,
      "training_step_time": 0.48062729835510254
    },
    {
      "epoch": 0.000144964599609375,
      "model_forward_time": 0.11496734619140625,
      "step": 23751
    },
    {
      "epoch": 0.000144964599609375,
      "step": 23751,
      "training_step_time": 0.3889033794403076
    },
    {
      "epoch": 0.000144970703125,
      "model_forward_time": 0.11548733711242676,
      "step": 23752
    },
    {
      "epoch": 0.000144970703125,
      "step": 23752,
      "training_step_time": 0.40612101554870605
    },
    {
      "epoch": 0.000144976806640625,
      "model_forward_time": 0.11509013175964355,
      "step": 23753
    },
    {
      "epoch": 0.000144976806640625,
      "step": 23753,
      "training_step_time": 0.4426097869873047
    },
    {
      "epoch": 0.00014498291015625,
      "model_forward_time": 0.11532950401306152,
      "step": 23754
    },
    {
      "epoch": 0.00014498291015625,
      "step": 23754,
      "training_step_time": 0.48233580589294434
    },
    {
      "epoch": 0.000144989013671875,
      "model_forward_time": 0.11487102508544922,
      "step": 23755
    },
    {
      "epoch": 0.000144989013671875,
      "step": 23755,
      "training_step_time": 0.44710540771484375
    },
    {
      "epoch": 0.0001449951171875,
      "model_forward_time": 0.1152801513671875,
      "step": 23756
    },
    {
      "epoch": 0.0001449951171875,
      "step": 23756,
      "training_step_time": 0.39525532722473145
    },
    {
      "epoch": 0.000145001220703125,
      "model_forward_time": 0.11544585227966309,
      "step": 23757
    },
    {
      "epoch": 0.000145001220703125,
      "step": 23757,
      "training_step_time": 0.3883662223815918
    },
    {
      "epoch": 0.00014500732421875,
      "model_forward_time": 0.11587977409362793,
      "step": 23758
    },
    {
      "epoch": 0.00014500732421875,
      "step": 23758,
      "training_step_time": 0.3863837718963623
    },
    {
      "epoch": 0.000145013427734375,
      "model_forward_time": 0.11520910263061523,
      "step": 23759
    },
    {
      "epoch": 0.000145013427734375,
      "step": 23759,
      "training_step_time": 0.40325260162353516
    },
    {
      "epoch": 0.00014501953125,
      "grad_norm": 0.23248139023780823,
      "learning_rate": 7.068867786524116e-05,
      "loss": 0.0499,
      "step": 23760
    },
    {
      "epoch": 0.00014501953125,
      "model_forward_time": 0.11493086814880371,
      "step": 23760
    },
    {
      "epoch": 0.00014501953125,
      "step": 23760,
      "training_step_time": 0.3892683982849121
    },
    {
      "epoch": 0.000145025634765625,
      "model_forward_time": 0.1154489517211914,
      "step": 23761
    },
    {
      "epoch": 0.000145025634765625,
      "step": 23761,
      "training_step_time": 0.3955833911895752
    },
    {
      "epoch": 0.00014503173828125,
      "model_forward_time": 0.11495399475097656,
      "step": 23762
    },
    {
      "epoch": 0.00014503173828125,
      "step": 23762,
      "training_step_time": 0.4053187370300293
    },
    {
      "epoch": 0.000145037841796875,
      "model_forward_time": 0.11506271362304688,
      "step": 23763
    },
    {
      "epoch": 0.000145037841796875,
      "step": 23763,
      "training_step_time": 0.40569472312927246
    },
    {
      "epoch": 0.0001450439453125,
      "model_forward_time": 0.1150200366973877,
      "step": 23764
    },
    {
      "epoch": 0.0001450439453125,
      "step": 23764,
      "training_step_time": 0.45385003089904785
    },
    {
      "epoch": 0.000145050048828125,
      "model_forward_time": 0.11521673202514648,
      "step": 23765
    },
    {
      "epoch": 0.000145050048828125,
      "step": 23765,
      "training_step_time": 0.4950528144836426
    },
    {
      "epoch": 0.00014505615234375,
      "model_forward_time": 0.11493754386901855,
      "step": 23766
    },
    {
      "epoch": 0.00014505615234375,
      "step": 23766,
      "training_step_time": 0.36609983444213867
    },
    {
      "epoch": 0.000145062255859375,
      "model_forward_time": 0.11573076248168945,
      "step": 23767
    },
    {
      "epoch": 0.000145062255859375,
      "step": 23767,
      "training_step_time": 0.45466089248657227
    },
    {
      "epoch": 0.000145068359375,
      "model_forward_time": 0.11527848243713379,
      "step": 23768
    },
    {
      "epoch": 0.000145068359375,
      "step": 23768,
      "training_step_time": 0.4503464698791504
    },
    {
      "epoch": 0.000145074462890625,
      "model_forward_time": 0.11495208740234375,
      "step": 23769
    },
    {
      "epoch": 0.000145074462890625,
      "step": 23769,
      "training_step_time": 0.44882631301879883
    },
    {
      "epoch": 0.00014508056640625,
      "grad_norm": 0.13984541594982147,
      "learning_rate": 7.06635866311883e-05,
      "loss": 0.0466,
      "step": 23770
    },
    {
      "epoch": 0.00014508056640625,
      "model_forward_time": 0.11509871482849121,
      "step": 23770
    },
    {
      "epoch": 0.00014508056640625,
      "step": 23770,
      "training_step_time": 0.38806819915771484
    },
    {
      "epoch": 0.000145086669921875,
      "model_forward_time": 0.1156623363494873,
      "step": 23771
    },
    {
      "epoch": 0.000145086669921875,
      "step": 23771,
      "training_step_time": 0.39505553245544434
    },
    {
      "epoch": 0.0001450927734375,
      "model_forward_time": 0.1154172420501709,
      "step": 23772
    },
    {
      "epoch": 0.0001450927734375,
      "step": 23772,
      "training_step_time": 0.3886733055114746
    },
    {
      "epoch": 0.000145098876953125,
      "model_forward_time": 0.11492443084716797,
      "step": 23773
    },
    {
      "epoch": 0.000145098876953125,
      "step": 23773,
      "training_step_time": 0.37562131881713867
    },
    {
      "epoch": 0.00014510498046875,
      "model_forward_time": 0.11562085151672363,
      "step": 23774
    },
    {
      "epoch": 0.00014510498046875,
      "step": 23774,
      "training_step_time": 0.392117977142334
    },
    {
      "epoch": 0.000145111083984375,
      "model_forward_time": 0.11539220809936523,
      "step": 23775
    },
    {
      "epoch": 0.000145111083984375,
      "step": 23775,
      "training_step_time": 0.38923096656799316
    },
    {
      "epoch": 0.0001451171875,
      "model_forward_time": 0.11732745170593262,
      "step": 23776
    },
    {
      "epoch": 0.0001451171875,
      "step": 23776,
      "training_step_time": 0.39664769172668457
    },
    {
      "epoch": 0.000145123291015625,
      "model_forward_time": 0.11553263664245605,
      "step": 23777
    },
    {
      "epoch": 0.000145123291015625,
      "step": 23777,
      "training_step_time": 0.6862707138061523
    },
    {
      "epoch": 0.00014512939453125,
      "model_forward_time": 0.11574172973632812,
      "step": 23778
    },
    {
      "epoch": 0.00014512939453125,
      "step": 23778,
      "training_step_time": 0.46346402168273926
    },
    {
      "epoch": 0.000145135498046875,
      "model_forward_time": 0.11452889442443848,
      "step": 23779
    },
    {
      "epoch": 0.000145135498046875,
      "step": 23779,
      "training_step_time": 0.39634203910827637
    },
    {
      "epoch": 0.0001451416015625,
      "grad_norm": 0.1293359100818634,
      "learning_rate": 7.06384891200834e-05,
      "loss": 0.0485,
      "step": 23780
    },
    {
      "epoch": 0.0001451416015625,
      "model_forward_time": 0.11440753936767578,
      "step": 23780
    },
    {
      "epoch": 0.0001451416015625,
      "step": 23780,
      "training_step_time": 0.3635110855102539
    },
    {
      "epoch": 0.000145147705078125,
      "model_forward_time": 0.11955499649047852,
      "step": 23781
    },
    {
      "epoch": 0.000145147705078125,
      "step": 23781,
      "training_step_time": 0.4304215908050537
    },
    {
      "epoch": 0.00014515380859375,
      "model_forward_time": 0.11912322044372559,
      "step": 23782
    },
    {
      "epoch": 0.00014515380859375,
      "step": 23782,
      "training_step_time": 0.38375163078308105
    },
    {
      "epoch": 0.000145159912109375,
      "model_forward_time": 0.11833477020263672,
      "step": 23783
    },
    {
      "epoch": 0.000145159912109375,
      "step": 23783,
      "training_step_time": 0.4940667152404785
    },
    {
      "epoch": 0.000145166015625,
      "model_forward_time": 0.11817669868469238,
      "step": 23784
    },
    {
      "epoch": 0.000145166015625,
      "step": 23784,
      "training_step_time": 0.37978076934814453
    },
    {
      "epoch": 0.000145172119140625,
      "model_forward_time": 0.11815929412841797,
      "step": 23785
    },
    {
      "epoch": 0.000145172119140625,
      "step": 23785,
      "training_step_time": 0.38692617416381836
    },
    {
      "epoch": 0.00014517822265625,
      "model_forward_time": 0.11808443069458008,
      "step": 23786
    },
    {
      "epoch": 0.00014517822265625,
      "step": 23786,
      "training_step_time": 0.38353705406188965
    },
    {
      "epoch": 0.000145184326171875,
      "model_forward_time": 0.11834573745727539,
      "step": 23787
    },
    {
      "epoch": 0.000145184326171875,
      "step": 23787,
      "training_step_time": 0.3810853958129883
    },
    {
      "epoch": 0.0001451904296875,
      "model_forward_time": 0.11811971664428711,
      "step": 23788
    },
    {
      "epoch": 0.0001451904296875,
      "step": 23788,
      "training_step_time": 0.3816845417022705
    },
    {
      "epoch": 0.000145196533203125,
      "model_forward_time": 0.11627745628356934,
      "step": 23789
    },
    {
      "epoch": 0.000145196533203125,
      "step": 23789,
      "training_step_time": 0.6394152641296387
    },
    {
      "epoch": 0.00014520263671875,
      "grad_norm": 0.16955843567848206,
      "learning_rate": 7.061338533955043e-05,
      "loss": 0.0529,
      "step": 23790
    },
    {
      "epoch": 0.00014520263671875,
      "model_forward_time": 0.11464810371398926,
      "step": 23790
    },
    {
      "epoch": 0.00014520263671875,
      "step": 23790,
      "training_step_time": 0.492816686630249
    },
    {
      "epoch": 0.000145208740234375,
      "model_forward_time": 0.11451148986816406,
      "step": 23791
    },
    {
      "epoch": 0.000145208740234375,
      "step": 23791,
      "training_step_time": 0.408130407333374
    },
    {
      "epoch": 0.00014521484375,
      "model_forward_time": 0.1147146224975586,
      "step": 23792
    },
    {
      "epoch": 0.00014521484375,
      "step": 23792,
      "training_step_time": 0.47092700004577637
    },
    {
      "epoch": 0.000145220947265625,
      "model_forward_time": 0.11460375785827637,
      "step": 23793
    },
    {
      "epoch": 0.000145220947265625,
      "step": 23793,
      "training_step_time": 0.3911759853363037
    },
    {
      "epoch": 0.00014522705078125,
      "model_forward_time": 0.11525774002075195,
      "step": 23794
    },
    {
      "epoch": 0.00014522705078125,
      "step": 23794,
      "training_step_time": 0.38195204734802246
    },
    {
      "epoch": 0.000145233154296875,
      "model_forward_time": 0.1144561767578125,
      "step": 23795
    },
    {
      "epoch": 0.000145233154296875,
      "step": 23795,
      "training_step_time": 0.448319673538208
    },
    {
      "epoch": 0.0001452392578125,
      "model_forward_time": 0.11521172523498535,
      "step": 23796
    },
    {
      "epoch": 0.0001452392578125,
      "step": 23796,
      "training_step_time": 0.48733973503112793
    },
    {
      "epoch": 0.000145245361328125,
      "model_forward_time": 0.11465907096862793,
      "step": 23797
    },
    {
      "epoch": 0.000145245361328125,
      "step": 23797,
      "training_step_time": 0.4289076328277588
    },
    {
      "epoch": 0.00014525146484375,
      "model_forward_time": 0.11551499366760254,
      "step": 23798
    },
    {
      "epoch": 0.00014525146484375,
      "step": 23798,
      "training_step_time": 0.39122819900512695
    },
    {
      "epoch": 0.000145257568359375,
      "model_forward_time": 0.11501145362854004,
      "step": 23799
    },
    {
      "epoch": 0.000145257568359375,
      "step": 23799,
      "training_step_time": 0.38669252395629883
    },
    {
      "epoch": 0.000145263671875,
      "grad_norm": 0.09156502783298492,
      "learning_rate": 7.058827529721525e-05,
      "loss": 0.0468,
      "step": 23800
    },
    {
      "epoch": 0.000145263671875,
      "model_forward_time": 0.11570382118225098,
      "step": 23800
    },
    {
      "epoch": 0.000145263671875,
      "step": 23800,
      "training_step_time": 0.3943459987640381
    },
    {
      "epoch": 0.000145269775390625,
      "model_forward_time": 0.11516189575195312,
      "step": 23801
    },
    {
      "epoch": 0.000145269775390625,
      "step": 23801,
      "training_step_time": 0.545121431350708
    },
    {
      "epoch": 0.00014527587890625,
      "model_forward_time": 0.11480116844177246,
      "step": 23802
    },
    {
      "epoch": 0.00014527587890625,
      "step": 23802,
      "training_step_time": 0.3923466205596924
    },
    {
      "epoch": 0.000145281982421875,
      "model_forward_time": 0.11574959754943848,
      "step": 23803
    },
    {
      "epoch": 0.000145281982421875,
      "step": 23803,
      "training_step_time": 0.39254188537597656
    },
    {
      "epoch": 0.0001452880859375,
      "model_forward_time": 0.11473774909973145,
      "step": 23804
    },
    {
      "epoch": 0.0001452880859375,
      "step": 23804,
      "training_step_time": 0.5040664672851562
    },
    {
      "epoch": 0.000145294189453125,
      "model_forward_time": 0.11534309387207031,
      "step": 23805
    },
    {
      "epoch": 0.000145294189453125,
      "step": 23805,
      "training_step_time": 0.4249227046966553
    },
    {
      "epoch": 0.00014530029296875,
      "model_forward_time": 0.11497902870178223,
      "step": 23806
    },
    {
      "epoch": 0.00014530029296875,
      "step": 23806,
      "training_step_time": 0.4711635112762451
    },
    {
      "epoch": 0.000145306396484375,
      "model_forward_time": 0.11509084701538086,
      "step": 23807
    },
    {
      "epoch": 0.000145306396484375,
      "step": 23807,
      "training_step_time": 0.5449931621551514
    },
    {
      "epoch": 0.0001453125,
      "model_forward_time": 0.11433935165405273,
      "step": 23808
    },
    {
      "epoch": 0.0001453125,
      "step": 23808,
      "training_step_time": 0.40636348724365234
    },
    {
      "epoch": 0.000145318603515625,
      "model_forward_time": 0.1155405044555664,
      "step": 23809
    },
    {
      "epoch": 0.000145318603515625,
      "step": 23809,
      "training_step_time": 0.4868321418762207
    },
    {
      "epoch": 0.00014532470703125,
      "grad_norm": 0.2701468765735626,
      "learning_rate": 7.056315900070565e-05,
      "loss": 0.05,
      "step": 23810
    },
    {
      "epoch": 0.00014532470703125,
      "model_forward_time": 0.11432552337646484,
      "step": 23810
    },
    {
      "epoch": 0.00014532470703125,
      "step": 23810,
      "training_step_time": 0.4186854362487793
    },
    {
      "epoch": 0.000145330810546875,
      "model_forward_time": 0.11411547660827637,
      "step": 23811
    },
    {
      "epoch": 0.000145330810546875,
      "step": 23811,
      "training_step_time": 0.4642601013183594
    },
    {
      "epoch": 0.0001453369140625,
      "model_forward_time": 0.11500000953674316,
      "step": 23812
    },
    {
      "epoch": 0.0001453369140625,
      "step": 23812,
      "training_step_time": 0.38059115409851074
    },
    {
      "epoch": 0.000145343017578125,
      "model_forward_time": 0.11544322967529297,
      "step": 23813
    },
    {
      "epoch": 0.000145343017578125,
      "step": 23813,
      "training_step_time": 0.3874216079711914
    },
    {
      "epoch": 0.00014534912109375,
      "model_forward_time": 0.11533284187316895,
      "step": 23814
    },
    {
      "epoch": 0.00014534912109375,
      "step": 23814,
      "training_step_time": 0.3944394588470459
    },
    {
      "epoch": 0.000145355224609375,
      "model_forward_time": 0.1143190860748291,
      "step": 23815
    },
    {
      "epoch": 0.000145355224609375,
      "step": 23815,
      "training_step_time": 0.39629077911376953
    },
    {
      "epoch": 0.000145361328125,
      "model_forward_time": 0.11558699607849121,
      "step": 23816
    },
    {
      "epoch": 0.000145361328125,
      "step": 23816,
      "training_step_time": 0.3956458568572998
    },
    {
      "epoch": 0.000145367431640625,
      "model_forward_time": 0.11491680145263672,
      "step": 23817
    },
    {
      "epoch": 0.000145367431640625,
      "step": 23817,
      "training_step_time": 0.3820199966430664
    },
    {
      "epoch": 0.00014537353515625,
      "model_forward_time": 0.11495423316955566,
      "step": 23818
    },
    {
      "epoch": 0.00014537353515625,
      "step": 23818,
      "training_step_time": 0.5057246685028076
    },
    {
      "epoch": 0.000145379638671875,
      "model_forward_time": 0.11560702323913574,
      "step": 23819
    },
    {
      "epoch": 0.000145379638671875,
      "step": 23819,
      "training_step_time": 0.5134685039520264
    },
    {
      "epoch": 0.0001453857421875,
      "grad_norm": 0.12306518852710724,
      "learning_rate": 7.053803645765128e-05,
      "loss": 0.0442,
      "step": 23820
    },
    {
      "epoch": 0.0001453857421875,
      "model_forward_time": 0.11451482772827148,
      "step": 23820
    },
    {
      "epoch": 0.0001453857421875,
      "step": 23820,
      "training_step_time": 0.41297459602355957
    },
    {
      "epoch": 0.000145391845703125,
      "model_forward_time": 0.11481714248657227,
      "step": 23821
    },
    {
      "epoch": 0.000145391845703125,
      "step": 23821,
      "training_step_time": 0.39644551277160645
    },
    {
      "epoch": 0.00014539794921875,
      "model_forward_time": 0.11450362205505371,
      "step": 23822
    },
    {
      "epoch": 0.00014539794921875,
      "step": 23822,
      "training_step_time": 0.3656919002532959
    },
    {
      "epoch": 0.000145404052734375,
      "model_forward_time": 0.11848831176757812,
      "step": 23823
    },
    {
      "epoch": 0.000145404052734375,
      "step": 23823,
      "training_step_time": 0.432403564453125
    },
    {
      "epoch": 0.00014541015625,
      "model_forward_time": 0.11827278137207031,
      "step": 23824
    },
    {
      "epoch": 0.00014541015625,
      "step": 23824,
      "training_step_time": 0.4237642288208008
    },
    {
      "epoch": 0.000145416259765625,
      "model_forward_time": 0.11793017387390137,
      "step": 23825
    },
    {
      "epoch": 0.000145416259765625,
      "step": 23825,
      "training_step_time": 0.5646753311157227
    },
    {
      "epoch": 0.00014542236328125,
      "model_forward_time": 0.11801815032958984,
      "step": 23826
    },
    {
      "epoch": 0.00014542236328125,
      "step": 23826,
      "training_step_time": 0.3826444149017334
    },
    {
      "epoch": 0.000145428466796875,
      "model_forward_time": 0.11835527420043945,
      "step": 23827
    },
    {
      "epoch": 0.000145428466796875,
      "step": 23827,
      "training_step_time": 0.3789093494415283
    },
    {
      "epoch": 0.0001454345703125,
      "model_forward_time": 0.1148984432220459,
      "step": 23828
    },
    {
      "epoch": 0.0001454345703125,
      "step": 23828,
      "training_step_time": 0.379194974899292
    },
    {
      "epoch": 0.000145440673828125,
      "model_forward_time": 0.11503052711486816,
      "step": 23829
    },
    {
      "epoch": 0.000145440673828125,
      "step": 23829,
      "training_step_time": 0.38888120651245117
    },
    {
      "epoch": 0.00014544677734375,
      "grad_norm": 0.0962221771478653,
      "learning_rate": 7.051290767568371e-05,
      "loss": 0.0523,
      "step": 23830
    },
    {
      "epoch": 0.00014544677734375,
      "model_forward_time": 0.11477470397949219,
      "step": 23830
    },
    {
      "epoch": 0.00014544677734375,
      "step": 23830,
      "training_step_time": 0.38617873191833496
    },
    {
      "epoch": 0.000145452880859375,
      "model_forward_time": 0.11550474166870117,
      "step": 23831
    },
    {
      "epoch": 0.000145452880859375,
      "step": 23831,
      "training_step_time": 0.6254837512969971
    },
    {
      "epoch": 0.000145458984375,
      "model_forward_time": 0.11523652076721191,
      "step": 23832
    },
    {
      "epoch": 0.000145458984375,
      "step": 23832,
      "training_step_time": 0.4169905185699463
    },
    {
      "epoch": 0.000145465087890625,
      "model_forward_time": 0.1153554916381836,
      "step": 23833
    },
    {
      "epoch": 0.000145465087890625,
      "step": 23833,
      "training_step_time": 0.4893016815185547
    },
    {
      "epoch": 0.00014547119140625,
      "model_forward_time": 0.11459016799926758,
      "step": 23834
    },
    {
      "epoch": 0.00014547119140625,
      "step": 23834,
      "training_step_time": 0.39017724990844727
    },
    {
      "epoch": 0.000145477294921875,
      "model_forward_time": 0.11461353302001953,
      "step": 23835
    },
    {
      "epoch": 0.000145477294921875,
      "step": 23835,
      "training_step_time": 0.3845484256744385
    },
    {
      "epoch": 0.0001454833984375,
      "model_forward_time": 0.11433672904968262,
      "step": 23836
    },
    {
      "epoch": 0.0001454833984375,
      "step": 23836,
      "training_step_time": 0.3666229248046875
    },
    {
      "epoch": 0.000145489501953125,
      "model_forward_time": 0.11538577079772949,
      "step": 23837
    },
    {
      "epoch": 0.000145489501953125,
      "step": 23837,
      "training_step_time": 0.46065831184387207
    },
    {
      "epoch": 0.00014549560546875,
      "model_forward_time": 0.1151120662689209,
      "step": 23838
    },
    {
      "epoch": 0.00014549560546875,
      "step": 23838,
      "training_step_time": 0.4200620651245117
    },
    {
      "epoch": 0.000145501708984375,
      "model_forward_time": 0.11516094207763672,
      "step": 23839
    },
    {
      "epoch": 0.000145501708984375,
      "step": 23839,
      "training_step_time": 0.3909883499145508
    },
    {
      "epoch": 0.0001455078125,
      "grad_norm": 0.11536983400583267,
      "learning_rate": 7.04877726624364e-05,
      "loss": 0.0464,
      "step": 23840
    },
    {
      "epoch": 0.0001455078125,
      "model_forward_time": 0.1147298812866211,
      "step": 23840
    },
    {
      "epoch": 0.0001455078125,
      "step": 23840,
      "training_step_time": 0.3919684886932373
    },
    {
      "epoch": 0.000145513916015625,
      "model_forward_time": 0.11519742012023926,
      "step": 23841
    },
    {
      "epoch": 0.000145513916015625,
      "step": 23841,
      "training_step_time": 0.38199639320373535
    },
    {
      "epoch": 0.00014552001953125,
      "model_forward_time": 0.11478447914123535,
      "step": 23842
    },
    {
      "epoch": 0.00014552001953125,
      "step": 23842,
      "training_step_time": 0.3906543254852295
    },
    {
      "epoch": 0.000145526123046875,
      "model_forward_time": 0.11544227600097656,
      "step": 23843
    },
    {
      "epoch": 0.000145526123046875,
      "step": 23843,
      "training_step_time": 0.614525318145752
    },
    {
      "epoch": 0.0001455322265625,
      "model_forward_time": 0.1148843765258789,
      "step": 23844
    },
    {
      "epoch": 0.0001455322265625,
      "step": 23844,
      "training_step_time": 0.38762784004211426
    },
    {
      "epoch": 0.000145538330078125,
      "model_forward_time": 0.1150662899017334,
      "step": 23845
    },
    {
      "epoch": 0.000145538330078125,
      "step": 23845,
      "training_step_time": 0.424985408782959
    },
    {
      "epoch": 0.00014554443359375,
      "model_forward_time": 0.11519241333007812,
      "step": 23846
    },
    {
      "epoch": 0.00014554443359375,
      "step": 23846,
      "training_step_time": 0.4037020206451416
    },
    {
      "epoch": 0.000145550537109375,
      "model_forward_time": 0.11526155471801758,
      "step": 23847
    },
    {
      "epoch": 0.000145550537109375,
      "step": 23847,
      "training_step_time": 0.43056511878967285
    },
    {
      "epoch": 0.000145556640625,
      "model_forward_time": 0.11513924598693848,
      "step": 23848
    },
    {
      "epoch": 0.000145556640625,
      "step": 23848,
      "training_step_time": 0.399369478225708
    },
    {
      "epoch": 0.000145562744140625,
      "model_forward_time": 0.11507868766784668,
      "step": 23849
    },
    {
      "epoch": 0.000145562744140625,
      "step": 23849,
      "training_step_time": 0.5318489074707031
    },
    {
      "epoch": 0.00014556884765625,
      "grad_norm": 0.11634897440671921,
      "learning_rate": 7.04626314255447e-05,
      "loss": 0.0564,
      "step": 23850
    },
    {
      "epoch": 0.00014556884765625,
      "model_forward_time": 0.11548161506652832,
      "step": 23850
    },
    {
      "epoch": 0.00014556884765625,
      "step": 23850,
      "training_step_time": 0.3979618549346924
    },
    {
      "epoch": 0.000145574951171875,
      "model_forward_time": 0.11518573760986328,
      "step": 23851
    },
    {
      "epoch": 0.000145574951171875,
      "step": 23851,
      "training_step_time": 0.41019105911254883
    },
    {
      "epoch": 0.0001455810546875,
      "model_forward_time": 0.1152801513671875,
      "step": 23852
    },
    {
      "epoch": 0.0001455810546875,
      "step": 23852,
      "training_step_time": 0.4326136112213135
    },
    {
      "epoch": 0.000145587158203125,
      "model_forward_time": 0.11488819122314453,
      "step": 23853
    },
    {
      "epoch": 0.000145587158203125,
      "step": 23853,
      "training_step_time": 0.45891690254211426
    },
    {
      "epoch": 0.00014559326171875,
      "model_forward_time": 0.11558103561401367,
      "step": 23854
    },
    {
      "epoch": 0.00014559326171875,
      "step": 23854,
      "training_step_time": 0.39983296394348145
    },
    {
      "epoch": 0.000145599365234375,
      "model_forward_time": 0.11524677276611328,
      "step": 23855
    },
    {
      "epoch": 0.000145599365234375,
      "step": 23855,
      "training_step_time": 0.5115678310394287
    },
    {
      "epoch": 0.00014560546875,
      "model_forward_time": 0.1150672435760498,
      "step": 23856
    },
    {
      "epoch": 0.00014560546875,
      "step": 23856,
      "training_step_time": 0.3913700580596924
    },
    {
      "epoch": 0.000145611572265625,
      "model_forward_time": 0.11499357223510742,
      "step": 23857
    },
    {
      "epoch": 0.000145611572265625,
      "step": 23857,
      "training_step_time": 0.38425588607788086
    },
    {
      "epoch": 0.00014561767578125,
      "model_forward_time": 0.115203857421875,
      "step": 23858
    },
    {
      "epoch": 0.00014561767578125,
      "step": 23858,
      "training_step_time": 0.4024834632873535
    },
    {
      "epoch": 0.000145623779296875,
      "model_forward_time": 0.11462140083312988,
      "step": 23859
    },
    {
      "epoch": 0.000145623779296875,
      "step": 23859,
      "training_step_time": 0.39833641052246094
    },
    {
      "epoch": 0.0001456298828125,
      "grad_norm": 0.18547232449054718,
      "learning_rate": 7.043748397264587e-05,
      "loss": 0.0518,
      "step": 23860
    },
    {
      "epoch": 0.0001456298828125,
      "model_forward_time": 0.11465001106262207,
      "step": 23860
    },
    {
      "epoch": 0.0001456298828125,
      "step": 23860,
      "training_step_time": 0.40167665481567383
    },
    {
      "epoch": 0.000145635986328125,
      "model_forward_time": 0.11562752723693848,
      "step": 23861
    },
    {
      "epoch": 0.000145635986328125,
      "step": 23861,
      "training_step_time": 0.6396996974945068
    },
    {
      "epoch": 0.00014564208984375,
      "model_forward_time": 0.11454010009765625,
      "step": 23862
    },
    {
      "epoch": 0.00014564208984375,
      "step": 23862,
      "training_step_time": 0.3907475471496582
    },
    {
      "epoch": 0.000145648193359375,
      "model_forward_time": 0.11495566368103027,
      "step": 23863
    },
    {
      "epoch": 0.000145648193359375,
      "step": 23863,
      "training_step_time": 0.38742637634277344
    },
    {
      "epoch": 0.000145654296875,
      "model_forward_time": 0.11568713188171387,
      "step": 23864
    },
    {
      "epoch": 0.000145654296875,
      "step": 23864,
      "training_step_time": 0.3849325180053711
    },
    {
      "epoch": 0.000145660400390625,
      "model_forward_time": 0.11793279647827148,
      "step": 23865
    },
    {
      "epoch": 0.000145660400390625,
      "step": 23865,
      "training_step_time": 0.4193272590637207
    },
    {
      "epoch": 0.00014566650390625,
      "model_forward_time": 0.11791110038757324,
      "step": 23866
    },
    {
      "epoch": 0.00014566650390625,
      "step": 23866,
      "training_step_time": 0.46697258949279785
    },
    {
      "epoch": 0.000145672607421875,
      "model_forward_time": 0.12044382095336914,
      "step": 23867
    },
    {
      "epoch": 0.000145672607421875,
      "step": 23867,
      "training_step_time": 0.6011419296264648
    },
    {
      "epoch": 0.0001456787109375,
      "model_forward_time": 0.11536812782287598,
      "step": 23868
    },
    {
      "epoch": 0.0001456787109375,
      "step": 23868,
      "training_step_time": 0.4146888256072998
    },
    {
      "epoch": 0.000145684814453125,
      "model_forward_time": 0.11438131332397461,
      "step": 23869
    },
    {
      "epoch": 0.000145684814453125,
      "step": 23869,
      "training_step_time": 0.40967607498168945
    },
    {
      "epoch": 0.00014569091796875,
      "grad_norm": 0.16698738932609558,
      "learning_rate": 7.041233031137903e-05,
      "loss": 0.0516,
      "step": 23870
    },
    {
      "epoch": 0.00014569091796875,
      "model_forward_time": 0.11497926712036133,
      "step": 23870
    },
    {
      "epoch": 0.00014569091796875,
      "step": 23870,
      "training_step_time": 0.38692569732666016
    },
    {
      "epoch": 0.000145697021484375,
      "model_forward_time": 0.11455774307250977,
      "step": 23871
    },
    {
      "epoch": 0.000145697021484375,
      "step": 23871,
      "training_step_time": 0.39857029914855957
    },
    {
      "epoch": 0.000145703125,
      "model_forward_time": 0.11571741104125977,
      "step": 23872
    },
    {
      "epoch": 0.000145703125,
      "step": 23872,
      "training_step_time": 0.3833732604980469
    },
    {
      "epoch": 0.000145709228515625,
      "model_forward_time": 0.11642932891845703,
      "step": 23873
    },
    {
      "epoch": 0.000145709228515625,
      "step": 23873,
      "training_step_time": 0.5928912162780762
    },
    {
      "epoch": 0.00014571533203125,
      "model_forward_time": 0.11469602584838867,
      "step": 23874
    },
    {
      "epoch": 0.00014571533203125,
      "step": 23874,
      "training_step_time": 0.39067530632019043
    },
    {
      "epoch": 0.000145721435546875,
      "model_forward_time": 0.11584067344665527,
      "step": 23875
    },
    {
      "epoch": 0.000145721435546875,
      "step": 23875,
      "training_step_time": 0.4669673442840576
    },
    {
      "epoch": 0.0001457275390625,
      "model_forward_time": 0.11469125747680664,
      "step": 23876
    },
    {
      "epoch": 0.0001457275390625,
      "step": 23876,
      "training_step_time": 0.38636136054992676
    },
    {
      "epoch": 0.000145733642578125,
      "model_forward_time": 0.11458563804626465,
      "step": 23877
    },
    {
      "epoch": 0.000145733642578125,
      "step": 23877,
      "training_step_time": 0.38517069816589355
    },
    {
      "epoch": 0.00014573974609375,
      "model_forward_time": 0.11525559425354004,
      "step": 23878
    },
    {
      "epoch": 0.00014573974609375,
      "step": 23878,
      "training_step_time": 0.3830554485321045
    },
    {
      "epoch": 0.000145745849609375,
      "model_forward_time": 0.1150510311126709,
      "step": 23879
    },
    {
      "epoch": 0.000145745849609375,
      "step": 23879,
      "training_step_time": 0.5592823028564453
    },
    {
      "epoch": 0.000145751953125,
      "grad_norm": 0.16490702331066132,
      "learning_rate": 7.038717044938519e-05,
      "loss": 0.0582,
      "step": 23880
    },
    {
      "epoch": 0.000145751953125,
      "model_forward_time": 0.11502647399902344,
      "step": 23880
    },
    {
      "epoch": 0.000145751953125,
      "step": 23880,
      "training_step_time": 0.43299317359924316
    },
    {
      "epoch": 0.000145758056640625,
      "model_forward_time": 0.11560630798339844,
      "step": 23881
    },
    {
      "epoch": 0.000145758056640625,
      "step": 23881,
      "training_step_time": 0.48527956008911133
    },
    {
      "epoch": 0.00014576416015625,
      "model_forward_time": 0.11607694625854492,
      "step": 23882
    },
    {
      "epoch": 0.00014576416015625,
      "step": 23882,
      "training_step_time": 0.3788321018218994
    },
    {
      "epoch": 0.000145770263671875,
      "model_forward_time": 0.11462855339050293,
      "step": 23883
    },
    {
      "epoch": 0.000145770263671875,
      "step": 23883,
      "training_step_time": 0.3804161548614502
    },
    {
      "epoch": 0.0001457763671875,
      "model_forward_time": 0.11510753631591797,
      "step": 23884
    },
    {
      "epoch": 0.0001457763671875,
      "step": 23884,
      "training_step_time": 0.3826637268066406
    },
    {
      "epoch": 0.000145782470703125,
      "model_forward_time": 0.11529684066772461,
      "step": 23885
    },
    {
      "epoch": 0.000145782470703125,
      "step": 23885,
      "training_step_time": 0.5265729427337646
    },
    {
      "epoch": 0.00014578857421875,
      "model_forward_time": 0.1147916316986084,
      "step": 23886
    },
    {
      "epoch": 0.00014578857421875,
      "step": 23886,
      "training_step_time": 0.47485995292663574
    },
    {
      "epoch": 0.000145794677734375,
      "model_forward_time": 0.11488819122314453,
      "step": 23887
    },
    {
      "epoch": 0.000145794677734375,
      "step": 23887,
      "training_step_time": 0.4100475311279297
    },
    {
      "epoch": 0.00014580078125,
      "model_forward_time": 0.11490750312805176,
      "step": 23888
    },
    {
      "epoch": 0.00014580078125,
      "step": 23888,
      "training_step_time": 0.47498512268066406
    },
    {
      "epoch": 0.000145806884765625,
      "model_forward_time": 0.11493730545043945,
      "step": 23889
    },
    {
      "epoch": 0.000145806884765625,
      "step": 23889,
      "training_step_time": 0.38968801498413086
    },
    {
      "epoch": 0.00014581298828125,
      "grad_norm": 0.15940701961517334,
      "learning_rate": 7.036200439430725e-05,
      "loss": 0.0491,
      "step": 23890
    },
    {
      "epoch": 0.00014581298828125,
      "model_forward_time": 0.11455297470092773,
      "step": 23890
    },
    {
      "epoch": 0.00014581298828125,
      "step": 23890,
      "training_step_time": 0.4487769603729248
    },
    {
      "epoch": 0.000145819091796875,
      "model_forward_time": 0.115386962890625,
      "step": 23891
    },
    {
      "epoch": 0.000145819091796875,
      "step": 23891,
      "training_step_time": 0.4303576946258545
    },
    {
      "epoch": 0.0001458251953125,
      "model_forward_time": 0.11598491668701172,
      "step": 23892
    },
    {
      "epoch": 0.0001458251953125,
      "step": 23892,
      "training_step_time": 0.3966403007507324
    },
    {
      "epoch": 0.000145831298828125,
      "model_forward_time": 0.11508989334106445,
      "step": 23893
    },
    {
      "epoch": 0.000145831298828125,
      "step": 23893,
      "training_step_time": 0.4443650245666504
    },
    {
      "epoch": 0.00014583740234375,
      "model_forward_time": 0.11601805686950684,
      "step": 23894
    },
    {
      "epoch": 0.00014583740234375,
      "step": 23894,
      "training_step_time": 0.4953577518463135
    },
    {
      "epoch": 0.000145843505859375,
      "model_forward_time": 0.11510396003723145,
      "step": 23895
    },
    {
      "epoch": 0.000145843505859375,
      "step": 23895,
      "training_step_time": 0.43050169944763184
    },
    {
      "epoch": 0.000145849609375,
      "model_forward_time": 0.11474609375,
      "step": 23896
    },
    {
      "epoch": 0.000145849609375,
      "step": 23896,
      "training_step_time": 0.384260892868042
    },
    {
      "epoch": 0.000145855712890625,
      "model_forward_time": 0.11559367179870605,
      "step": 23897
    },
    {
      "epoch": 0.000145855712890625,
      "step": 23897,
      "training_step_time": 0.512986421585083
    },
    {
      "epoch": 0.00014586181640625,
      "model_forward_time": 0.11544609069824219,
      "step": 23898
    },
    {
      "epoch": 0.00014586181640625,
      "step": 23898,
      "training_step_time": 0.407001256942749
    },
    {
      "epoch": 0.000145867919921875,
      "model_forward_time": 0.11507296562194824,
      "step": 23899
    },
    {
      "epoch": 0.000145867919921875,
      "step": 23899,
      "training_step_time": 0.4093208312988281
    },
    {
      "epoch": 0.0001458740234375,
      "grad_norm": 0.12496775388717651,
      "learning_rate": 7.033683215379002e-05,
      "loss": 0.0439,
      "step": 23900
    },
    {
      "epoch": 0.0001458740234375,
      "model_forward_time": 0.11454653739929199,
      "step": 23900
    },
    {
      "epoch": 0.0001458740234375,
      "step": 23900,
      "training_step_time": 0.38557863235473633
    },
    {
      "epoch": 0.000145880126953125,
      "model_forward_time": 0.11579728126525879,
      "step": 23901
    },
    {
      "epoch": 0.000145880126953125,
      "step": 23901,
      "training_step_time": 0.3911144733428955
    },
    {
      "epoch": 0.00014588623046875,
      "model_forward_time": 0.11526632308959961,
      "step": 23902
    },
    {
      "epoch": 0.00014588623046875,
      "step": 23902,
      "training_step_time": 0.49666500091552734
    },
    {
      "epoch": 0.000145892333984375,
      "model_forward_time": 0.11667704582214355,
      "step": 23903
    },
    {
      "epoch": 0.000145892333984375,
      "step": 23903,
      "training_step_time": 0.6206045150756836
    },
    {
      "epoch": 0.0001458984375,
      "model_forward_time": 0.11455750465393066,
      "step": 23904
    },
    {
      "epoch": 0.0001458984375,
      "step": 23904,
      "training_step_time": 0.3914682865142822
    },
    {
      "epoch": 0.000145904541015625,
      "model_forward_time": 0.11534905433654785,
      "step": 23905
    },
    {
      "epoch": 0.000145904541015625,
      "step": 23905,
      "training_step_time": 0.390791654586792
    },
    {
      "epoch": 0.00014591064453125,
      "model_forward_time": 0.11464309692382812,
      "step": 23906
    },
    {
      "epoch": 0.00014591064453125,
      "step": 23906,
      "training_step_time": 0.38812947273254395
    },
    {
      "epoch": 0.000145916748046875,
      "model_forward_time": 0.1145780086517334,
      "step": 23907
    },
    {
      "epoch": 0.000145916748046875,
      "step": 23907,
      "training_step_time": 0.36470770835876465
    },
    {
      "epoch": 0.0001459228515625,
      "model_forward_time": 0.1150810718536377,
      "step": 23908
    },
    {
      "epoch": 0.0001459228515625,
      "step": 23908,
      "training_step_time": 0.4364030361175537
    },
    {
      "epoch": 0.000145928955078125,
      "model_forward_time": 0.1151123046875,
      "step": 23909
    },
    {
      "epoch": 0.000145928955078125,
      "step": 23909,
      "training_step_time": 0.4471254348754883
    },
    {
      "epoch": 0.00014593505859375,
      "grad_norm": 0.1399502456188202,
      "learning_rate": 7.031165373548014e-05,
      "loss": 0.0502,
      "step": 23910
    },
    {
      "epoch": 0.00014593505859375,
      "model_forward_time": 0.11536908149719238,
      "step": 23910
    },
    {
      "epoch": 0.00014593505859375,
      "step": 23910,
      "training_step_time": 0.3912975788116455
    },
    {
      "epoch": 0.000145941162109375,
      "model_forward_time": 0.11530470848083496,
      "step": 23911
    },
    {
      "epoch": 0.000145941162109375,
      "step": 23911,
      "training_step_time": 0.41477370262145996
    },
    {
      "epoch": 0.000145947265625,
      "model_forward_time": 0.11546993255615234,
      "step": 23912
    },
    {
      "epoch": 0.000145947265625,
      "step": 23912,
      "training_step_time": 0.4052557945251465
    },
    {
      "epoch": 0.000145953369140625,
      "model_forward_time": 0.11490416526794434,
      "step": 23913
    },
    {
      "epoch": 0.000145953369140625,
      "step": 23913,
      "training_step_time": 0.3910684585571289
    },
    {
      "epoch": 0.00014595947265625,
      "model_forward_time": 0.1154017448425293,
      "step": 23914
    },
    {
      "epoch": 0.00014595947265625,
      "step": 23914,
      "training_step_time": 0.3943800926208496
    },
    {
      "epoch": 0.000145965576171875,
      "model_forward_time": 0.11556029319763184,
      "step": 23915
    },
    {
      "epoch": 0.000145965576171875,
      "step": 23915,
      "training_step_time": 0.5554201602935791
    },
    {
      "epoch": 0.0001459716796875,
      "model_forward_time": 0.11559367179870605,
      "step": 23916
    },
    {
      "epoch": 0.0001459716796875,
      "step": 23916,
      "training_step_time": 0.46279239654541016
    },
    {
      "epoch": 0.000145977783203125,
      "model_forward_time": 0.11459565162658691,
      "step": 23917
    },
    {
      "epoch": 0.000145977783203125,
      "step": 23917,
      "training_step_time": 0.39477062225341797
    },
    {
      "epoch": 0.00014598388671875,
      "model_forward_time": 0.11498403549194336,
      "step": 23918
    },
    {
      "epoch": 0.00014598388671875,
      "step": 23918,
      "training_step_time": 0.38772130012512207
    },
    {
      "epoch": 0.000145989990234375,
      "model_forward_time": 0.1152641773223877,
      "step": 23919
    },
    {
      "epoch": 0.000145989990234375,
      "step": 23919,
      "training_step_time": 0.39891576766967773
    },
    {
      "epoch": 0.00014599609375,
      "grad_norm": 0.17305248975753784,
      "learning_rate": 7.028646914702614e-05,
      "loss": 0.0539,
      "step": 23920
    },
    {
      "epoch": 0.00014599609375,
      "model_forward_time": 0.11509442329406738,
      "step": 23920
    },
    {
      "epoch": 0.00014599609375,
      "step": 23920,
      "training_step_time": 0.38815975189208984
    },
    {
      "epoch": 0.000146002197265625,
      "model_forward_time": 0.11535811424255371,
      "step": 23921
    },
    {
      "epoch": 0.000146002197265625,
      "step": 23921,
      "training_step_time": 0.6685290336608887
    },
    {
      "epoch": 0.00014600830078125,
      "model_forward_time": 0.1145780086517334,
      "step": 23922
    },
    {
      "epoch": 0.00014600830078125,
      "step": 23922,
      "training_step_time": 0.43970179557800293
    },
    {
      "epoch": 0.000146014404296875,
      "model_forward_time": 0.11788725852966309,
      "step": 23923
    },
    {
      "epoch": 0.000146014404296875,
      "step": 23923,
      "training_step_time": 0.43471622467041016
    },
    {
      "epoch": 0.0001460205078125,
      "model_forward_time": 0.11740517616271973,
      "step": 23924
    },
    {
      "epoch": 0.0001460205078125,
      "step": 23924,
      "training_step_time": 0.412722110748291
    },
    {
      "epoch": 0.000146026611328125,
      "model_forward_time": 0.11739802360534668,
      "step": 23925
    },
    {
      "epoch": 0.000146026611328125,
      "step": 23925,
      "training_step_time": 0.38399624824523926
    },
    {
      "epoch": 0.00014603271484375,
      "model_forward_time": 0.11684346199035645,
      "step": 23926
    },
    {
      "epoch": 0.00014603271484375,
      "step": 23926,
      "training_step_time": 0.4082980155944824
    },
    {
      "epoch": 0.000146038818359375,
      "model_forward_time": 0.11516284942626953,
      "step": 23927
    },
    {
      "epoch": 0.000146038818359375,
      "step": 23927,
      "training_step_time": 0.4949615001678467
    },
    {
      "epoch": 0.000146044921875,
      "model_forward_time": 0.11468005180358887,
      "step": 23928
    },
    {
      "epoch": 0.000146044921875,
      "step": 23928,
      "training_step_time": 0.39281678199768066
    },
    {
      "epoch": 0.000146051025390625,
      "model_forward_time": 0.11472582817077637,
      "step": 23929
    },
    {
      "epoch": 0.000146051025390625,
      "step": 23929,
      "training_step_time": 0.48472070693969727
    },
    {
      "epoch": 0.00014605712890625,
      "grad_norm": 0.1387455314397812,
      "learning_rate": 7.026127839607847e-05,
      "loss": 0.0465,
      "step": 23930
    },
    {
      "epoch": 0.00014605712890625,
      "model_forward_time": 0.11536884307861328,
      "step": 23930
    },
    {
      "epoch": 0.00014605712890625,
      "step": 23930,
      "training_step_time": 0.41312193870544434
    },
    {
      "epoch": 0.000146063232421875,
      "model_forward_time": 0.11516118049621582,
      "step": 23931
    },
    {
      "epoch": 0.000146063232421875,
      "step": 23931,
      "training_step_time": 0.4708251953125
    },
    {
      "epoch": 0.0001460693359375,
      "model_forward_time": 0.11480951309204102,
      "step": 23932
    },
    {
      "epoch": 0.0001460693359375,
      "step": 23932,
      "training_step_time": 0.38695669174194336
    },
    {
      "epoch": 0.000146075439453125,
      "model_forward_time": 0.11499643325805664,
      "step": 23933
    },
    {
      "epoch": 0.000146075439453125,
      "step": 23933,
      "training_step_time": 0.41864752769470215
    },
    {
      "epoch": 0.00014608154296875,
      "model_forward_time": 0.11459851264953613,
      "step": 23934
    },
    {
      "epoch": 0.00014608154296875,
      "step": 23934,
      "training_step_time": 0.3902428150177002
    },
    {
      "epoch": 0.000146087646484375,
      "model_forward_time": 0.11529755592346191,
      "step": 23935
    },
    {
      "epoch": 0.000146087646484375,
      "step": 23935,
      "training_step_time": 0.37363696098327637
    },
    {
      "epoch": 0.00014609375,
      "model_forward_time": 0.1192328929901123,
      "step": 23936
    },
    {
      "epoch": 0.00014609375,
      "step": 23936,
      "training_step_time": 0.45971250534057617
    },
    {
      "epoch": 0.000146099853515625,
      "model_forward_time": 0.11825251579284668,
      "step": 23937
    },
    {
      "epoch": 0.000146099853515625,
      "step": 23937,
      "training_step_time": 0.4163022041320801
    },
    {
      "epoch": 0.00014610595703125,
      "model_forward_time": 0.11865758895874023,
      "step": 23938
    },
    {
      "epoch": 0.00014610595703125,
      "step": 23938,
      "training_step_time": 0.41825413703918457
    },
    {
      "epoch": 0.000146112060546875,
      "model_forward_time": 0.11892032623291016,
      "step": 23939
    },
    {
      "epoch": 0.000146112060546875,
      "step": 23939,
      "training_step_time": 0.4661128520965576
    },
    {
      "epoch": 0.0001461181640625,
      "grad_norm": 0.13242727518081665,
      "learning_rate": 7.023608149028937e-05,
      "loss": 0.0472,
      "step": 23940
    },
    {
      "epoch": 0.0001461181640625,
      "model_forward_time": 0.11736321449279785,
      "step": 23940
    },
    {
      "epoch": 0.0001461181640625,
      "step": 23940,
      "training_step_time": 0.3846290111541748
    },
    {
      "epoch": 0.000146124267578125,
      "model_forward_time": 0.11798810958862305,
      "step": 23941
    },
    {
      "epoch": 0.000146124267578125,
      "step": 23941,
      "training_step_time": 0.37908458709716797
    },
    {
      "epoch": 0.00014613037109375,
      "model_forward_time": 0.11769485473632812,
      "step": 23942
    },
    {
      "epoch": 0.00014613037109375,
      "step": 23942,
      "training_step_time": 0.38587474822998047
    },
    {
      "epoch": 0.000146136474609375,
      "model_forward_time": 0.11530113220214844,
      "step": 23943
    },
    {
      "epoch": 0.000146136474609375,
      "step": 23943,
      "training_step_time": 0.45854878425598145
    },
    {
      "epoch": 0.000146142578125,
      "model_forward_time": 0.1152486801147461,
      "step": 23944
    },
    {
      "epoch": 0.000146142578125,
      "step": 23944,
      "training_step_time": 0.4286201000213623
    },
    {
      "epoch": 0.000146148681640625,
      "model_forward_time": 0.11559057235717773,
      "step": 23945
    },
    {
      "epoch": 0.000146148681640625,
      "step": 23945,
      "training_step_time": 0.46976208686828613
    },
    {
      "epoch": 0.00014615478515625,
      "model_forward_time": 0.11564326286315918,
      "step": 23946
    },
    {
      "epoch": 0.00014615478515625,
      "step": 23946,
      "training_step_time": 0.40270543098449707
    },
    {
      "epoch": 0.000146160888671875,
      "model_forward_time": 0.11568045616149902,
      "step": 23947
    },
    {
      "epoch": 0.000146160888671875,
      "step": 23947,
      "training_step_time": 0.38987088203430176
    },
    {
      "epoch": 0.0001461669921875,
      "model_forward_time": 0.11539649963378906,
      "step": 23948
    },
    {
      "epoch": 0.0001461669921875,
      "step": 23948,
      "training_step_time": 0.3889045715332031
    },
    {
      "epoch": 0.000146173095703125,
      "model_forward_time": 0.11541008949279785,
      "step": 23949
    },
    {
      "epoch": 0.000146173095703125,
      "step": 23949,
      "training_step_time": 0.38820743560791016
    },
    {
      "epoch": 0.00014617919921875,
      "grad_norm": 0.16454152762889862,
      "learning_rate": 7.021087843731302e-05,
      "loss": 0.0496,
      "step": 23950
    },
    {
      "epoch": 0.00014617919921875,
      "model_forward_time": 0.11559939384460449,
      "step": 23950
    },
    {
      "epoch": 0.00014617919921875,
      "step": 23950,
      "training_step_time": 0.5096137523651123
    },
    {
      "epoch": 0.000146185302734375,
      "model_forward_time": 0.11791181564331055,
      "step": 23951
    },
    {
      "epoch": 0.000146185302734375,
      "step": 23951,
      "training_step_time": 0.5053770542144775
    },
    {
      "epoch": 0.00014619140625,
      "model_forward_time": 0.11637139320373535,
      "step": 23952
    },
    {
      "epoch": 0.00014619140625,
      "step": 23952,
      "training_step_time": 0.4208197593688965
    },
    {
      "epoch": 0.000146197509765625,
      "model_forward_time": 0.11514687538146973,
      "step": 23953
    },
    {
      "epoch": 0.000146197509765625,
      "step": 23953,
      "training_step_time": 0.39051151275634766
    },
    {
      "epoch": 0.00014620361328125,
      "model_forward_time": 0.1156003475189209,
      "step": 23954
    },
    {
      "epoch": 0.00014620361328125,
      "step": 23954,
      "training_step_time": 0.3968546390533447
    },
    {
      "epoch": 0.000146209716796875,
      "model_forward_time": 0.1151895523071289,
      "step": 23955
    },
    {
      "epoch": 0.000146209716796875,
      "step": 23955,
      "training_step_time": 0.3860914707183838
    },
    {
      "epoch": 0.0001462158203125,
      "model_forward_time": 0.11558890342712402,
      "step": 23956
    },
    {
      "epoch": 0.0001462158203125,
      "step": 23956,
      "training_step_time": 0.3821420669555664
    },
    {
      "epoch": 0.000146221923828125,
      "model_forward_time": 0.11554145812988281,
      "step": 23957
    },
    {
      "epoch": 0.000146221923828125,
      "step": 23957,
      "training_step_time": 0.6905319690704346
    },
    {
      "epoch": 0.00014622802734375,
      "model_forward_time": 0.11509895324707031,
      "step": 23958
    },
    {
      "epoch": 0.00014622802734375,
      "step": 23958,
      "training_step_time": 0.3972322940826416
    },
    {
      "epoch": 0.000146234130859375,
      "model_forward_time": 0.11512207984924316,
      "step": 23959
    },
    {
      "epoch": 0.000146234130859375,
      "step": 23959,
      "training_step_time": 0.4069027900695801
    },
    {
      "epoch": 0.000146240234375,
      "grad_norm": 0.13434197008609772,
      "learning_rate": 7.018566924480543e-05,
      "loss": 0.0474,
      "step": 23960
    },
    {
      "epoch": 0.000146240234375,
      "model_forward_time": 0.11441373825073242,
      "step": 23960
    },
    {
      "epoch": 0.000146240234375,
      "step": 23960,
      "training_step_time": 0.3831331729888916
    },
    {
      "epoch": 0.000146246337890625,
      "model_forward_time": 0.11513018608093262,
      "step": 23961
    },
    {
      "epoch": 0.000146246337890625,
      "step": 23961,
      "training_step_time": 0.3920719623565674
    },
    {
      "epoch": 0.00014625244140625,
      "model_forward_time": 0.11437773704528809,
      "step": 23962
    },
    {
      "epoch": 0.00014625244140625,
      "step": 23962,
      "training_step_time": 0.3913843631744385
    },
    {
      "epoch": 0.000146258544921875,
      "model_forward_time": 0.11584639549255371,
      "step": 23963
    },
    {
      "epoch": 0.000146258544921875,
      "step": 23963,
      "training_step_time": 0.6220791339874268
    },
    {
      "epoch": 0.0001462646484375,
      "model_forward_time": 0.11496591567993164,
      "step": 23964
    },
    {
      "epoch": 0.0001462646484375,
      "step": 23964,
      "training_step_time": 0.4644763469696045
    },
    {
      "epoch": 0.000146270751953125,
      "model_forward_time": 0.11527037620544434,
      "step": 23965
    },
    {
      "epoch": 0.000146270751953125,
      "step": 23965,
      "training_step_time": 0.403393030166626
    },
    {
      "epoch": 0.00014627685546875,
      "model_forward_time": 0.11419177055358887,
      "step": 23966
    },
    {
      "epoch": 0.00014627685546875,
      "step": 23966,
      "training_step_time": 0.4374992847442627
    },
    {
      "epoch": 0.000146282958984375,
      "model_forward_time": 0.11509537696838379,
      "step": 23967
    },
    {
      "epoch": 0.000146282958984375,
      "step": 23967,
      "training_step_time": 0.3945293426513672
    },
    {
      "epoch": 0.0001462890625,
      "model_forward_time": 0.11456537246704102,
      "step": 23968
    },
    {
      "epoch": 0.0001462890625,
      "step": 23968,
      "training_step_time": 0.3897099494934082
    },
    {
      "epoch": 0.000146295166015625,
      "model_forward_time": 0.11504125595092773,
      "step": 23969
    },
    {
      "epoch": 0.000146295166015625,
      "step": 23969,
      "training_step_time": 0.5269021987915039
    },
    {
      "epoch": 0.00014630126953125,
      "grad_norm": 0.1364661008119583,
      "learning_rate": 7.016045392042452e-05,
      "loss": 0.0437,
      "step": 23970
    },
    {
      "epoch": 0.00014630126953125,
      "model_forward_time": 0.11467742919921875,
      "step": 23970
    },
    {
      "epoch": 0.00014630126953125,
      "step": 23970,
      "training_step_time": 0.393221378326416
    },
    {
      "epoch": 0.000146307373046875,
      "model_forward_time": 0.11458730697631836,
      "step": 23971
    },
    {
      "epoch": 0.000146307373046875,
      "step": 23971,
      "training_step_time": 0.4660835266113281
    },
    {
      "epoch": 0.0001463134765625,
      "model_forward_time": 0.11438584327697754,
      "step": 23972
    },
    {
      "epoch": 0.0001463134765625,
      "step": 23972,
      "training_step_time": 0.44301867485046387
    },
    {
      "epoch": 0.000146319580078125,
      "model_forward_time": 0.11440062522888184,
      "step": 23973
    },
    {
      "epoch": 0.000146319580078125,
      "step": 23973,
      "training_step_time": 0.39099931716918945
    },
    {
      "epoch": 0.00014632568359375,
      "model_forward_time": 0.11525917053222656,
      "step": 23974
    },
    {
      "epoch": 0.00014632568359375,
      "step": 23974,
      "training_step_time": 0.39477086067199707
    },
    {
      "epoch": 0.000146331787109375,
      "model_forward_time": 0.11503481864929199,
      "step": 23975
    },
    {
      "epoch": 0.000146331787109375,
      "step": 23975,
      "training_step_time": 0.6715645790100098
    },
    {
      "epoch": 0.000146337890625,
      "model_forward_time": 0.11475515365600586,
      "step": 23976
    },
    {
      "epoch": 0.000146337890625,
      "step": 23976,
      "training_step_time": 0.3885993957519531
    },
    {
      "epoch": 0.000146343994140625,
      "model_forward_time": 0.11548686027526855,
      "step": 23977
    },
    {
      "epoch": 0.000146343994140625,
      "step": 23977,
      "training_step_time": 0.3961520195007324
    },
    {
      "epoch": 0.00014635009765625,
      "model_forward_time": 0.11459994316101074,
      "step": 23978
    },
    {
      "epoch": 0.00014635009765625,
      "step": 23978,
      "training_step_time": 0.4191887378692627
    },
    {
      "epoch": 0.000146356201171875,
      "model_forward_time": 0.11478161811828613,
      "step": 23979
    },
    {
      "epoch": 0.000146356201171875,
      "step": 23979,
      "training_step_time": 0.4258730411529541
    },
    {
      "epoch": 0.0001463623046875,
      "grad_norm": 0.13990341126918793,
      "learning_rate": 7.013523247183e-05,
      "loss": 0.0453,
      "step": 23980
    },
    {
      "epoch": 0.0001463623046875,
      "model_forward_time": 0.11430716514587402,
      "step": 23980
    },
    {
      "epoch": 0.0001463623046875,
      "step": 23980,
      "training_step_time": 0.3983776569366455
    },
    {
      "epoch": 0.000146368408203125,
      "model_forward_time": 0.11437702178955078,
      "step": 23981
    },
    {
      "epoch": 0.000146368408203125,
      "step": 23981,
      "training_step_time": 0.5818195343017578
    },
    {
      "epoch": 0.00014637451171875,
      "model_forward_time": 0.11394476890563965,
      "step": 23982
    },
    {
      "epoch": 0.00014637451171875,
      "step": 23982,
      "training_step_time": 0.39420628547668457
    },
    {
      "epoch": 0.000146380615234375,
      "model_forward_time": 0.11446118354797363,
      "step": 23983
    },
    {
      "epoch": 0.000146380615234375,
      "step": 23983,
      "training_step_time": 0.39188098907470703
    },
    {
      "epoch": 0.00014638671875,
      "model_forward_time": 0.11604881286621094,
      "step": 23984
    },
    {
      "epoch": 0.00014638671875,
      "step": 23984,
      "training_step_time": 0.40259575843811035
    },
    {
      "epoch": 0.000146392822265625,
      "model_forward_time": 0.11578750610351562,
      "step": 23985
    },
    {
      "epoch": 0.000146392822265625,
      "step": 23985,
      "training_step_time": 0.4588921070098877
    },
    {
      "epoch": 0.00014639892578125,
      "model_forward_time": 0.1151883602142334,
      "step": 23986
    },
    {
      "epoch": 0.00014639892578125,
      "step": 23986,
      "training_step_time": 0.38933825492858887
    },
    {
      "epoch": 0.000146405029296875,
      "model_forward_time": 0.11590170860290527,
      "step": 23987
    },
    {
      "epoch": 0.000146405029296875,
      "step": 23987,
      "training_step_time": 0.6862123012542725
    },
    {
      "epoch": 0.0001464111328125,
      "model_forward_time": 0.11459898948669434,
      "step": 23988
    },
    {
      "epoch": 0.0001464111328125,
      "step": 23988,
      "training_step_time": 0.39116621017456055
    },
    {
      "epoch": 0.000146417236328125,
      "model_forward_time": 0.11399126052856445,
      "step": 23989
    },
    {
      "epoch": 0.000146417236328125,
      "step": 23989,
      "training_step_time": 0.3869473934173584
    },
    {
      "epoch": 0.00014642333984375,
      "grad_norm": 0.1857733130455017,
      "learning_rate": 7.01100049066835e-05,
      "loss": 0.0501,
      "step": 23990
    },
    {
      "epoch": 0.00014642333984375,
      "model_forward_time": 0.11465287208557129,
      "step": 23990
    },
    {
      "epoch": 0.00014642333984375,
      "step": 23990,
      "training_step_time": 0.4332771301269531
    },
    {
      "epoch": 0.000146429443359375,
      "model_forward_time": 0.11501073837280273,
      "step": 23991
    },
    {
      "epoch": 0.000146429443359375,
      "step": 23991,
      "training_step_time": 0.4098501205444336
    },
    {
      "epoch": 0.000146435546875,
      "model_forward_time": 0.11507558822631836,
      "step": 23992
    },
    {
      "epoch": 0.000146435546875,
      "step": 23992,
      "training_step_time": 0.48056840896606445
    },
    {
      "epoch": 0.000146441650390625,
      "model_forward_time": 0.11481118202209473,
      "step": 23993
    },
    {
      "epoch": 0.000146441650390625,
      "step": 23993,
      "training_step_time": 0.509488582611084
    },
    {
      "epoch": 0.00014644775390625,
      "model_forward_time": 0.1160740852355957,
      "step": 23994
    },
    {
      "epoch": 0.00014644775390625,
      "step": 23994,
      "training_step_time": 0.39569997787475586
    },
    {
      "epoch": 0.000146453857421875,
      "model_forward_time": 0.11495804786682129,
      "step": 23995
    },
    {
      "epoch": 0.000146453857421875,
      "step": 23995,
      "training_step_time": 0.38737940788269043
    },
    {
      "epoch": 0.0001464599609375,
      "model_forward_time": 0.11480855941772461,
      "step": 23996
    },
    {
      "epoch": 0.0001464599609375,
      "step": 23996,
      "training_step_time": 0.3858296871185303
    },
    {
      "epoch": 0.000146466064453125,
      "model_forward_time": 0.11488842964172363,
      "step": 23997
    },
    {
      "epoch": 0.000146466064453125,
      "step": 23997,
      "training_step_time": 0.3850669860839844
    },
    {
      "epoch": 0.00014647216796875,
      "model_forward_time": 0.1150364875793457,
      "step": 23998
    },
    {
      "epoch": 0.00014647216796875,
      "step": 23998,
      "training_step_time": 0.3915557861328125
    },
    {
      "epoch": 0.000146478271484375,
      "model_forward_time": 0.11580896377563477,
      "step": 23999
    },
    {
      "epoch": 0.000146478271484375,
      "step": 23999,
      "training_step_time": 0.6455600261688232
    },
    {
      "epoch": 0.000146484375,
      "grad_norm": 0.1553080826997757,
      "learning_rate": 7.008477123264848e-05,
      "loss": 0.05,
      "step": 24000
    },
    {
      "epoch": 0.000146484375,
      "model_forward_time": 0.11240339279174805,
      "step": 24000
    },
    {
      "epoch": 0.000146484375,
      "step": 24000,
      "training_step_time": 0.356478214263916
    },
    {
      "epoch": 0.000146490478515625,
      "model_forward_time": 0.11238288879394531,
      "step": 24001
    },
    {
      "epoch": 0.000146490478515625,
      "step": 24001,
      "training_step_time": 0.37383031845092773
    },
    {
      "epoch": 0.00014649658203125,
      "model_forward_time": 0.11261296272277832,
      "step": 24002
    },
    {
      "epoch": 0.00014649658203125,
      "step": 24002,
      "training_step_time": 0.3830220699310303
    },
    {
      "epoch": 0.000146502685546875,
      "model_forward_time": 0.11461663246154785,
      "step": 24003
    },
    {
      "epoch": 0.000146502685546875,
      "step": 24003,
      "training_step_time": 0.37540435791015625
    },
    {
      "epoch": 0.0001465087890625,
      "model_forward_time": 0.11388945579528809,
      "step": 24004
    },
    {
      "epoch": 0.0001465087890625,
      "step": 24004,
      "training_step_time": 0.4108154773712158
    },
    {
      "epoch": 0.000146514892578125,
      "model_forward_time": 0.11481428146362305,
      "step": 24005
    },
    {
      "epoch": 0.000146514892578125,
      "step": 24005,
      "training_step_time": 0.4208054542541504
    },
    {
      "epoch": 0.00014652099609375,
      "model_forward_time": 0.11487340927124023,
      "step": 24006
    },
    {
      "epoch": 0.00014652099609375,
      "step": 24006,
      "training_step_time": 0.42032814025878906
    },
    {
      "epoch": 0.000146527099609375,
      "model_forward_time": 0.11543464660644531,
      "step": 24007
    },
    {
      "epoch": 0.000146527099609375,
      "step": 24007,
      "training_step_time": 0.44643545150756836
    },
    {
      "epoch": 0.000146533203125,
      "model_forward_time": 0.11550378799438477,
      "step": 24008
    },
    {
      "epoch": 0.000146533203125,
      "step": 24008,
      "training_step_time": 0.4877448081970215
    },
    {
      "epoch": 0.000146539306640625,
      "model_forward_time": 0.11457037925720215,
      "step": 24009
    },
    {
      "epoch": 0.000146539306640625,
      "step": 24009,
      "training_step_time": 0.44640612602233887
    },
    {
      "epoch": 0.00014654541015625,
      "grad_norm": 0.12787586450576782,
      "learning_rate": 7.005953145739026e-05,
      "loss": 0.0485,
      "step": 24010
    },
    {
      "epoch": 0.00014654541015625,
      "model_forward_time": 0.1150972843170166,
      "step": 24010
    },
    {
      "epoch": 0.00014654541015625,
      "step": 24010,
      "training_step_time": 0.4601931571960449
    },
    {
      "epoch": 0.000146551513671875,
      "model_forward_time": 0.11439085006713867,
      "step": 24011
    },
    {
      "epoch": 0.000146551513671875,
      "step": 24011,
      "training_step_time": 0.3786346912384033
    },
    {
      "epoch": 0.0001465576171875,
      "model_forward_time": 0.11515498161315918,
      "step": 24012
    },
    {
      "epoch": 0.0001465576171875,
      "step": 24012,
      "training_step_time": 0.3999366760253906
    },
    {
      "epoch": 0.000146563720703125,
      "model_forward_time": 0.11480402946472168,
      "step": 24013
    },
    {
      "epoch": 0.000146563720703125,
      "step": 24013,
      "training_step_time": 0.39992642402648926
    },
    {
      "epoch": 0.00014656982421875,
      "model_forward_time": 0.11443328857421875,
      "step": 24014
    },
    {
      "epoch": 0.00014656982421875,
      "step": 24014,
      "training_step_time": 0.391308069229126
    },
    {
      "epoch": 0.000146575927734375,
      "model_forward_time": 0.11594104766845703,
      "step": 24015
    },
    {
      "epoch": 0.000146575927734375,
      "step": 24015,
      "training_step_time": 0.39496755599975586
    },
    {
      "epoch": 0.00014658203125,
      "model_forward_time": 0.11533761024475098,
      "step": 24016
    },
    {
      "epoch": 0.00014658203125,
      "step": 24016,
      "training_step_time": 0.44637608528137207
    },
    {
      "epoch": 0.000146588134765625,
      "model_forward_time": 0.11489152908325195,
      "step": 24017
    },
    {
      "epoch": 0.000146588134765625,
      "step": 24017,
      "training_step_time": 0.3996608257293701
    },
    {
      "epoch": 0.00014659423828125,
      "model_forward_time": 0.11525082588195801,
      "step": 24018
    },
    {
      "epoch": 0.00014659423828125,
      "step": 24018,
      "training_step_time": 0.4185354709625244
    },
    {
      "epoch": 0.000146600341796875,
      "model_forward_time": 0.11452460289001465,
      "step": 24019
    },
    {
      "epoch": 0.000146600341796875,
      "step": 24019,
      "training_step_time": 0.41089439392089844
    },
    {
      "epoch": 0.0001466064453125,
      "grad_norm": 0.19925150275230408,
      "learning_rate": 7.003428558857604e-05,
      "loss": 0.052,
      "step": 24020
    },
    {
      "epoch": 0.0001466064453125,
      "model_forward_time": 0.11536693572998047,
      "step": 24020
    },
    {
      "epoch": 0.0001466064453125,
      "step": 24020,
      "training_step_time": 0.39307212829589844
    },
    {
      "epoch": 0.000146612548828125,
      "model_forward_time": 0.11550426483154297,
      "step": 24021
    },
    {
      "epoch": 0.000146612548828125,
      "step": 24021,
      "training_step_time": 0.39254021644592285
    },
    {
      "epoch": 0.00014661865234375,
      "model_forward_time": 0.11513352394104004,
      "step": 24022
    },
    {
      "epoch": 0.00014661865234375,
      "step": 24022,
      "training_step_time": 0.49718499183654785
    },
    {
      "epoch": 0.000146624755859375,
      "model_forward_time": 0.11516022682189941,
      "step": 24023
    },
    {
      "epoch": 0.000146624755859375,
      "step": 24023,
      "training_step_time": 0.5064990520477295
    },
    {
      "epoch": 0.000146630859375,
      "model_forward_time": 0.11486577987670898,
      "step": 24024
    },
    {
      "epoch": 0.000146630859375,
      "step": 24024,
      "training_step_time": 0.45753908157348633
    },
    {
      "epoch": 0.000146636962890625,
      "model_forward_time": 0.11525607109069824,
      "step": 24025
    },
    {
      "epoch": 0.000146636962890625,
      "step": 24025,
      "training_step_time": 0.3995842933654785
    },
    {
      "epoch": 0.00014664306640625,
      "model_forward_time": 0.11531901359558105,
      "step": 24026
    },
    {
      "epoch": 0.00014664306640625,
      "step": 24026,
      "training_step_time": 0.397491455078125
    },
    {
      "epoch": 0.000146649169921875,
      "model_forward_time": 0.11511802673339844,
      "step": 24027
    },
    {
      "epoch": 0.000146649169921875,
      "step": 24027,
      "training_step_time": 0.38547372817993164
    },
    {
      "epoch": 0.0001466552734375,
      "model_forward_time": 0.11641860008239746,
      "step": 24028
    },
    {
      "epoch": 0.0001466552734375,
      "step": 24028,
      "training_step_time": 0.40134119987487793
    },
    {
      "epoch": 0.000146661376953125,
      "model_forward_time": 0.11480164527893066,
      "step": 24029
    },
    {
      "epoch": 0.000146661376953125,
      "step": 24029,
      "training_step_time": 0.4439563751220703
    },
    {
      "epoch": 0.00014666748046875,
      "grad_norm": 0.13953417539596558,
      "learning_rate": 7.000903363387482e-05,
      "loss": 0.0418,
      "step": 24030
    },
    {
      "epoch": 0.00014666748046875,
      "model_forward_time": 0.11466574668884277,
      "step": 24030
    },
    {
      "epoch": 0.00014666748046875,
      "step": 24030,
      "training_step_time": 0.4265785217285156
    },
    {
      "epoch": 0.000146673583984375,
      "model_forward_time": 0.11524772644042969,
      "step": 24031
    },
    {
      "epoch": 0.000146673583984375,
      "step": 24031,
      "training_step_time": 0.42439794540405273
    },
    {
      "epoch": 0.0001466796875,
      "model_forward_time": 0.11504960060119629,
      "step": 24032
    },
    {
      "epoch": 0.0001466796875,
      "step": 24032,
      "training_step_time": 0.4625661373138428
    },
    {
      "epoch": 0.000146685791015625,
      "model_forward_time": 0.1158299446105957,
      "step": 24033
    },
    {
      "epoch": 0.000146685791015625,
      "step": 24033,
      "training_step_time": 0.38869524002075195
    },
    {
      "epoch": 0.00014669189453125,
      "model_forward_time": 0.11553192138671875,
      "step": 24034
    },
    {
      "epoch": 0.00014669189453125,
      "step": 24034,
      "training_step_time": 0.3889479637145996
    },
    {
      "epoch": 0.000146697998046875,
      "model_forward_time": 0.11563253402709961,
      "step": 24035
    },
    {
      "epoch": 0.000146697998046875,
      "step": 24035,
      "training_step_time": 0.4061565399169922
    },
    {
      "epoch": 0.0001467041015625,
      "model_forward_time": 0.1155996322631836,
      "step": 24036
    },
    {
      "epoch": 0.0001467041015625,
      "step": 24036,
      "training_step_time": 0.366863489151001
    },
    {
      "epoch": 0.000146710205078125,
      "model_forward_time": 0.11500930786132812,
      "step": 24037
    },
    {
      "epoch": 0.000146710205078125,
      "step": 24037,
      "training_step_time": 0.4607865810394287
    },
    {
      "epoch": 0.00014671630859375,
      "model_forward_time": 0.11593747138977051,
      "step": 24038
    },
    {
      "epoch": 0.00014671630859375,
      "step": 24038,
      "training_step_time": 0.43679380416870117
    },
    {
      "epoch": 0.000146722412109375,
      "model_forward_time": 0.11509943008422852,
      "step": 24039
    },
    {
      "epoch": 0.000146722412109375,
      "step": 24039,
      "training_step_time": 0.39844584465026855
    },
    {
      "epoch": 0.000146728515625,
      "grad_norm": 0.14962713420391083,
      "learning_rate": 6.99837756009575e-05,
      "loss": 0.0467,
      "step": 24040
    },
    {
      "epoch": 0.000146728515625,
      "model_forward_time": 0.11475300788879395,
      "step": 24040
    },
    {
      "epoch": 0.000146728515625,
      "step": 24040,
      "training_step_time": 0.3899514675140381
    },
    {
      "epoch": 0.000146734619140625,
      "model_forward_time": 0.11525893211364746,
      "step": 24041
    },
    {
      "epoch": 0.000146734619140625,
      "step": 24041,
      "training_step_time": 0.3928642272949219
    },
    {
      "epoch": 0.00014674072265625,
      "model_forward_time": 0.11526036262512207,
      "step": 24042
    },
    {
      "epoch": 0.00014674072265625,
      "step": 24042,
      "training_step_time": 0.3872108459472656
    },
    {
      "epoch": 0.000146746826171875,
      "model_forward_time": 0.11556601524353027,
      "step": 24043
    },
    {
      "epoch": 0.000146746826171875,
      "step": 24043,
      "training_step_time": 0.3981034755706787
    },
    {
      "epoch": 0.0001467529296875,
      "model_forward_time": 0.11575627326965332,
      "step": 24044
    },
    {
      "epoch": 0.0001467529296875,
      "step": 24044,
      "training_step_time": 0.40715646743774414
    },
    {
      "epoch": 0.000146759033203125,
      "model_forward_time": 0.11569499969482422,
      "step": 24045
    },
    {
      "epoch": 0.000146759033203125,
      "step": 24045,
      "training_step_time": 0.4241182804107666
    },
    {
      "epoch": 0.00014676513671875,
      "model_forward_time": 0.1147923469543457,
      "step": 24046
    },
    {
      "epoch": 0.00014676513671875,
      "step": 24046,
      "training_step_time": 0.4334261417388916
    },
    {
      "epoch": 0.000146771240234375,
      "model_forward_time": 0.1145474910736084,
      "step": 24047
    },
    {
      "epoch": 0.000146771240234375,
      "step": 24047,
      "training_step_time": 0.398517370223999
    },
    {
      "epoch": 0.00014677734375,
      "model_forward_time": 0.1150057315826416,
      "step": 24048
    },
    {
      "epoch": 0.00014677734375,
      "step": 24048,
      "training_step_time": 0.39459943771362305
    },
    {
      "epoch": 0.000146783447265625,
      "model_forward_time": 0.11489295959472656,
      "step": 24049
    },
    {
      "epoch": 0.000146783447265625,
      "step": 24049,
      "training_step_time": 0.40447521209716797
    },
    {
      "epoch": 0.00014678955078125,
      "grad_norm": 0.1459878385066986,
      "learning_rate": 6.99585114974968e-05,
      "loss": 0.0461,
      "step": 24050
    },
    {
      "epoch": 0.00014678955078125,
      "model_forward_time": 0.11509227752685547,
      "step": 24050
    },
    {
      "epoch": 0.00014678955078125,
      "step": 24050,
      "training_step_time": 0.391498327255249
    },
    {
      "epoch": 0.000146795654296875,
      "model_forward_time": 0.11504769325256348,
      "step": 24051
    },
    {
      "epoch": 0.000146795654296875,
      "step": 24051,
      "training_step_time": 0.36786842346191406
    },
    {
      "epoch": 0.0001468017578125,
      "model_forward_time": 0.11541390419006348,
      "step": 24052
    },
    {
      "epoch": 0.0001468017578125,
      "step": 24052,
      "training_step_time": 0.4511857032775879
    },
    {
      "epoch": 0.000146807861328125,
      "model_forward_time": 0.11483192443847656,
      "step": 24053
    },
    {
      "epoch": 0.000146807861328125,
      "step": 24053,
      "training_step_time": 0.4261620044708252
    },
    {
      "epoch": 0.00014681396484375,
      "model_forward_time": 0.1150054931640625,
      "step": 24054
    },
    {
      "epoch": 0.00014681396484375,
      "step": 24054,
      "training_step_time": 0.3941802978515625
    },
    {
      "epoch": 0.000146820068359375,
      "model_forward_time": 0.1153254508972168,
      "step": 24055
    },
    {
      "epoch": 0.000146820068359375,
      "step": 24055,
      "training_step_time": 0.3997058868408203
    },
    {
      "epoch": 0.000146826171875,
      "model_forward_time": 0.1149148941040039,
      "step": 24056
    },
    {
      "epoch": 0.000146826171875,
      "step": 24056,
      "training_step_time": 0.39850449562072754
    },
    {
      "epoch": 0.000146832275390625,
      "model_forward_time": 0.11515378952026367,
      "step": 24057
    },
    {
      "epoch": 0.000146832275390625,
      "step": 24057,
      "training_step_time": 0.3900272846221924
    },
    {
      "epoch": 0.00014683837890625,
      "model_forward_time": 0.11539077758789062,
      "step": 24058
    },
    {
      "epoch": 0.00014683837890625,
      "step": 24058,
      "training_step_time": 0.40975308418273926
    },
    {
      "epoch": 0.000146844482421875,
      "model_forward_time": 0.1152803897857666,
      "step": 24059
    },
    {
      "epoch": 0.000146844482421875,
      "step": 24059,
      "training_step_time": 0.4144430160522461
    },
    {
      "epoch": 0.0001468505859375,
      "grad_norm": 0.11261525750160217,
      "learning_rate": 6.993324133116726e-05,
      "loss": 0.0458,
      "step": 24060
    },
    {
      "epoch": 0.0001468505859375,
      "model_forward_time": 0.11505913734436035,
      "step": 24060
    },
    {
      "epoch": 0.0001468505859375,
      "step": 24060,
      "training_step_time": 0.44878482818603516
    },
    {
      "epoch": 0.000146856689453125,
      "model_forward_time": 0.11506772041320801,
      "step": 24061
    },
    {
      "epoch": 0.000146856689453125,
      "step": 24061,
      "training_step_time": 0.39899730682373047
    },
    {
      "epoch": 0.00014686279296875,
      "model_forward_time": 0.11532974243164062,
      "step": 24062
    },
    {
      "epoch": 0.00014686279296875,
      "step": 24062,
      "training_step_time": 0.38715624809265137
    },
    {
      "epoch": 0.000146868896484375,
      "model_forward_time": 0.11521196365356445,
      "step": 24063
    },
    {
      "epoch": 0.000146868896484375,
      "step": 24063,
      "training_step_time": 0.38518762588500977
    },
    {
      "epoch": 0.000146875,
      "model_forward_time": 0.1159522533416748,
      "step": 24064
    },
    {
      "epoch": 0.000146875,
      "step": 24064,
      "training_step_time": 0.3946809768676758
    },
    {
      "epoch": 0.000146881103515625,
      "model_forward_time": 0.1151423454284668,
      "step": 24065
    },
    {
      "epoch": 0.000146881103515625,
      "step": 24065,
      "training_step_time": 0.3983778953552246
    },
    {
      "epoch": 0.00014688720703125,
      "model_forward_time": 0.11470270156860352,
      "step": 24066
    },
    {
      "epoch": 0.00014688720703125,
      "step": 24066,
      "training_step_time": 0.3882889747619629
    },
    {
      "epoch": 0.000146893310546875,
      "model_forward_time": 0.11547279357910156,
      "step": 24067
    },
    {
      "epoch": 0.000146893310546875,
      "step": 24067,
      "training_step_time": 0.44234800338745117
    },
    {
      "epoch": 0.0001468994140625,
      "model_forward_time": 0.11589884757995605,
      "step": 24068
    },
    {
      "epoch": 0.0001468994140625,
      "step": 24068,
      "training_step_time": 0.444638729095459
    },
    {
      "epoch": 0.000146905517578125,
      "model_forward_time": 0.11543083190917969,
      "step": 24069
    },
    {
      "epoch": 0.000146905517578125,
      "step": 24069,
      "training_step_time": 0.3828697204589844
    },
    {
      "epoch": 0.00014691162109375,
      "grad_norm": 0.10519090294837952,
      "learning_rate": 6.990796510964531e-05,
      "loss": 0.0459,
      "step": 24070
    },
    {
      "epoch": 0.00014691162109375,
      "model_forward_time": 0.1154167652130127,
      "step": 24070
    },
    {
      "epoch": 0.00014691162109375,
      "step": 24070,
      "training_step_time": 0.38924098014831543
    },
    {
      "epoch": 0.000146917724609375,
      "model_forward_time": 0.11476492881774902,
      "step": 24071
    },
    {
      "epoch": 0.000146917724609375,
      "step": 24071,
      "training_step_time": 0.3898966312408447
    },
    {
      "epoch": 0.000146923828125,
      "model_forward_time": 0.11505007743835449,
      "step": 24072
    },
    {
      "epoch": 0.000146923828125,
      "step": 24072,
      "training_step_time": 0.3984675407409668
    },
    {
      "epoch": 0.000146929931640625,
      "model_forward_time": 0.11494922637939453,
      "step": 24073
    },
    {
      "epoch": 0.000146929931640625,
      "step": 24073,
      "training_step_time": 0.4485037326812744
    },
    {
      "epoch": 0.00014693603515625,
      "model_forward_time": 0.11553382873535156,
      "step": 24074
    },
    {
      "epoch": 0.00014693603515625,
      "step": 24074,
      "training_step_time": 0.4231741428375244
    },
    {
      "epoch": 0.000146942138671875,
      "model_forward_time": 0.11640119552612305,
      "step": 24075
    },
    {
      "epoch": 0.000146942138671875,
      "step": 24075,
      "training_step_time": 0.47371411323547363
    },
    {
      "epoch": 0.0001469482421875,
      "model_forward_time": 0.11443305015563965,
      "step": 24076
    },
    {
      "epoch": 0.0001469482421875,
      "step": 24076,
      "training_step_time": 0.40279531478881836
    },
    {
      "epoch": 0.000146954345703125,
      "model_forward_time": 0.11490702629089355,
      "step": 24077
    },
    {
      "epoch": 0.000146954345703125,
      "step": 24077,
      "training_step_time": 0.38582444190979004
    },
    {
      "epoch": 0.00014696044921875,
      "model_forward_time": 0.11492037773132324,
      "step": 24078
    },
    {
      "epoch": 0.00014696044921875,
      "step": 24078,
      "training_step_time": 0.39122438430786133
    },
    {
      "epoch": 0.000146966552734375,
      "model_forward_time": 0.11564326286315918,
      "step": 24079
    },
    {
      "epoch": 0.000146966552734375,
      "step": 24079,
      "training_step_time": 0.39232826232910156
    },
    {
      "epoch": 0.00014697265625,
      "grad_norm": 0.13820186257362366,
      "learning_rate": 6.988268284060922e-05,
      "loss": 0.0459,
      "step": 24080
    },
    {
      "epoch": 0.00014697265625,
      "model_forward_time": 0.11459970474243164,
      "step": 24080
    },
    {
      "epoch": 0.00014697265625,
      "step": 24080,
      "training_step_time": 0.39370155334472656
    },
    {
      "epoch": 0.000146978759765625,
      "model_forward_time": 0.11581873893737793,
      "step": 24081
    },
    {
      "epoch": 0.000146978759765625,
      "step": 24081,
      "training_step_time": 0.38918209075927734
    },
    {
      "epoch": 0.00014698486328125,
      "model_forward_time": 0.11534285545349121,
      "step": 24082
    },
    {
      "epoch": 0.00014698486328125,
      "step": 24082,
      "training_step_time": 0.45268988609313965
    },
    {
      "epoch": 0.000146990966796875,
      "model_forward_time": 0.11577653884887695,
      "step": 24083
    },
    {
      "epoch": 0.000146990966796875,
      "step": 24083,
      "training_step_time": 0.4228837490081787
    },
    {
      "epoch": 0.0001469970703125,
      "model_forward_time": 0.11469650268554688,
      "step": 24084
    },
    {
      "epoch": 0.0001469970703125,
      "step": 24084,
      "training_step_time": 0.3926718235015869
    },
    {
      "epoch": 0.000147003173828125,
      "model_forward_time": 0.11561107635498047,
      "step": 24085
    },
    {
      "epoch": 0.000147003173828125,
      "step": 24085,
      "training_step_time": 0.41387319564819336
    },
    {
      "epoch": 0.00014700927734375,
      "model_forward_time": 0.11473298072814941,
      "step": 24086
    },
    {
      "epoch": 0.00014700927734375,
      "step": 24086,
      "training_step_time": 0.39447808265686035
    },
    {
      "epoch": 0.000147015380859375,
      "model_forward_time": 0.11530280113220215,
      "step": 24087
    },
    {
      "epoch": 0.000147015380859375,
      "step": 24087,
      "training_step_time": 0.41532421112060547
    },
    {
      "epoch": 0.000147021484375,
      "model_forward_time": 0.11540842056274414,
      "step": 24088
    },
    {
      "epoch": 0.000147021484375,
      "step": 24088,
      "training_step_time": 0.40505361557006836
    },
    {
      "epoch": 0.000147027587890625,
      "model_forward_time": 0.11563777923583984,
      "step": 24089
    },
    {
      "epoch": 0.000147027587890625,
      "step": 24089,
      "training_step_time": 0.44507551193237305
    },
    {
      "epoch": 0.00014703369140625,
      "grad_norm": 0.18296092748641968,
      "learning_rate": 6.985739453173903e-05,
      "loss": 0.0517,
      "step": 24090
    },
    {
      "epoch": 0.00014703369140625,
      "model_forward_time": 0.11560630798339844,
      "step": 24090
    },
    {
      "epoch": 0.00014703369140625,
      "step": 24090,
      "training_step_time": 0.4554784297943115
    },
    {
      "epoch": 0.000147039794921875,
      "model_forward_time": 0.11666512489318848,
      "step": 24091
    },
    {
      "epoch": 0.000147039794921875,
      "step": 24091,
      "training_step_time": 0.40485525131225586
    },
    {
      "epoch": 0.0001470458984375,
      "model_forward_time": 0.1324927806854248,
      "step": 24092
    },
    {
      "epoch": 0.0001470458984375,
      "step": 24092,
      "training_step_time": 0.42883825302124023
    },
    {
      "epoch": 0.000147052001953125,
      "model_forward_time": 0.11470365524291992,
      "step": 24093
    },
    {
      "epoch": 0.000147052001953125,
      "step": 24093,
      "training_step_time": 0.3984682559967041
    },
    {
      "epoch": 0.00014705810546875,
      "model_forward_time": 0.1151432991027832,
      "step": 24094
    },
    {
      "epoch": 0.00014705810546875,
      "step": 24094,
      "training_step_time": 0.391679048538208
    },
    {
      "epoch": 0.000147064208984375,
      "model_forward_time": 0.11520075798034668,
      "step": 24095
    },
    {
      "epoch": 0.000147064208984375,
      "step": 24095,
      "training_step_time": 0.47928953170776367
    },
    {
      "epoch": 0.0001470703125,
      "model_forward_time": 0.11958861351013184,
      "step": 24096
    },
    {
      "epoch": 0.0001470703125,
      "step": 24096,
      "training_step_time": 0.4869570732116699
    },
    {
      "epoch": 0.000147076416015625,
      "model_forward_time": 0.11447954177856445,
      "step": 24097
    },
    {
      "epoch": 0.000147076416015625,
      "step": 24097,
      "training_step_time": 0.5043747425079346
    },
    {
      "epoch": 0.00014708251953125,
      "model_forward_time": 0.11503791809082031,
      "step": 24098
    },
    {
      "epoch": 0.00014708251953125,
      "step": 24098,
      "training_step_time": 0.3911151885986328
    },
    {
      "epoch": 0.000147088623046875,
      "model_forward_time": 0.11883425712585449,
      "step": 24099
    },
    {
      "epoch": 0.000147088623046875,
      "step": 24099,
      "training_step_time": 0.3883838653564453
    },
    {
      "epoch": 0.0001470947265625,
      "grad_norm": 0.12952832877635956,
      "learning_rate": 6.98321001907167e-05,
      "loss": 0.0426,
      "step": 24100
    },
    {
      "epoch": 0.0001470947265625,
      "model_forward_time": 0.11464595794677734,
      "step": 24100
    },
    {
      "epoch": 0.0001470947265625,
      "step": 24100,
      "training_step_time": 0.3833298683166504
    },
    {
      "epoch": 0.000147100830078125,
      "model_forward_time": 0.11411094665527344,
      "step": 24101
    },
    {
      "epoch": 0.000147100830078125,
      "step": 24101,
      "training_step_time": 0.433549165725708
    },
    {
      "epoch": 0.00014710693359375,
      "model_forward_time": 0.11551141738891602,
      "step": 24102
    },
    {
      "epoch": 0.00014710693359375,
      "step": 24102,
      "training_step_time": 0.42501235008239746
    },
    {
      "epoch": 0.000147113037109375,
      "model_forward_time": 0.11499142646789551,
      "step": 24103
    },
    {
      "epoch": 0.000147113037109375,
      "step": 24103,
      "training_step_time": 0.40945982933044434
    },
    {
      "epoch": 0.000147119140625,
      "model_forward_time": 0.1149590015411377,
      "step": 24104
    },
    {
      "epoch": 0.000147119140625,
      "step": 24104,
      "training_step_time": 0.4851343631744385
    },
    {
      "epoch": 0.000147125244140625,
      "model_forward_time": 0.11537337303161621,
      "step": 24105
    },
    {
      "epoch": 0.000147125244140625,
      "step": 24105,
      "training_step_time": 0.39298176765441895
    },
    {
      "epoch": 0.00014713134765625,
      "model_forward_time": 0.11492538452148438,
      "step": 24106
    },
    {
      "epoch": 0.00014713134765625,
      "step": 24106,
      "training_step_time": 0.38614344596862793
    },
    {
      "epoch": 0.000147137451171875,
      "model_forward_time": 0.11476945877075195,
      "step": 24107
    },
    {
      "epoch": 0.000147137451171875,
      "step": 24107,
      "training_step_time": 0.6043844223022461
    },
    {
      "epoch": 0.0001471435546875,
      "model_forward_time": 0.11445403099060059,
      "step": 24108
    },
    {
      "epoch": 0.0001471435546875,
      "step": 24108,
      "training_step_time": 0.38355350494384766
    },
    {
      "epoch": 0.000147149658203125,
      "model_forward_time": 0.11512041091918945,
      "step": 24109
    },
    {
      "epoch": 0.000147149658203125,
      "step": 24109,
      "training_step_time": 0.3971109390258789
    },
    {
      "epoch": 0.00014715576171875,
      "grad_norm": 0.12564407289028168,
      "learning_rate": 6.980679982522598e-05,
      "loss": 0.0487,
      "step": 24110
    },
    {
      "epoch": 0.00014715576171875,
      "model_forward_time": 0.11484193801879883,
      "step": 24110
    },
    {
      "epoch": 0.00014715576171875,
      "step": 24110,
      "training_step_time": 0.4329962730407715
    },
    {
      "epoch": 0.000147161865234375,
      "model_forward_time": 0.11560583114624023,
      "step": 24111
    },
    {
      "epoch": 0.000147161865234375,
      "step": 24111,
      "training_step_time": 0.4706268310546875
    },
    {
      "epoch": 0.00014716796875,
      "model_forward_time": 0.11548185348510742,
      "step": 24112
    },
    {
      "epoch": 0.00014716796875,
      "step": 24112,
      "training_step_time": 0.4164600372314453
    },
    {
      "epoch": 0.000147174072265625,
      "model_forward_time": 0.11478233337402344,
      "step": 24113
    },
    {
      "epoch": 0.000147174072265625,
      "step": 24113,
      "training_step_time": 0.5786676406860352
    },
    {
      "epoch": 0.00014718017578125,
      "model_forward_time": 0.11419367790222168,
      "step": 24114
    },
    {
      "epoch": 0.00014718017578125,
      "step": 24114,
      "training_step_time": 0.38726234436035156
    },
    {
      "epoch": 0.000147186279296875,
      "model_forward_time": 0.11575126647949219,
      "step": 24115
    },
    {
      "epoch": 0.000147186279296875,
      "step": 24115,
      "training_step_time": 0.42221617698669434
    },
    {
      "epoch": 0.0001471923828125,
      "model_forward_time": 0.11481523513793945,
      "step": 24116
    },
    {
      "epoch": 0.0001471923828125,
      "step": 24116,
      "training_step_time": 0.39730310440063477
    },
    {
      "epoch": 0.000147198486328125,
      "model_forward_time": 0.11516427993774414,
      "step": 24117
    },
    {
      "epoch": 0.000147198486328125,
      "step": 24117,
      "training_step_time": 0.4021463394165039
    },
    {
      "epoch": 0.00014720458984375,
      "model_forward_time": 0.11433911323547363,
      "step": 24118
    },
    {
      "epoch": 0.00014720458984375,
      "step": 24118,
      "training_step_time": 0.4552028179168701
    },
    {
      "epoch": 0.000147210693359375,
      "model_forward_time": 0.11484956741333008,
      "step": 24119
    },
    {
      "epoch": 0.000147210693359375,
      "step": 24119,
      "training_step_time": 0.5540347099304199
    },
    {
      "epoch": 0.000147216796875,
      "grad_norm": 0.15210354328155518,
      "learning_rate": 6.978149344295242e-05,
      "loss": 0.0462,
      "step": 24120
    },
    {
      "epoch": 0.000147216796875,
      "model_forward_time": 0.11445999145507812,
      "step": 24120
    },
    {
      "epoch": 0.000147216796875,
      "step": 24120,
      "training_step_time": 0.3887760639190674
    },
    {
      "epoch": 0.000147222900390625,
      "model_forward_time": 0.11464786529541016,
      "step": 24121
    },
    {
      "epoch": 0.000147222900390625,
      "step": 24121,
      "training_step_time": 0.39112138748168945
    },
    {
      "epoch": 0.00014722900390625,
      "model_forward_time": 0.1151268482208252,
      "step": 24122
    },
    {
      "epoch": 0.00014722900390625,
      "step": 24122,
      "training_step_time": 0.3821103572845459
    },
    {
      "epoch": 0.000147235107421875,
      "model_forward_time": 0.11616110801696777,
      "step": 24123
    },
    {
      "epoch": 0.000147235107421875,
      "step": 24123,
      "training_step_time": 0.44265317916870117
    },
    {
      "epoch": 0.0001472412109375,
      "model_forward_time": 0.11535406112670898,
      "step": 24124
    },
    {
      "epoch": 0.0001472412109375,
      "step": 24124,
      "training_step_time": 0.3921318054199219
    },
    {
      "epoch": 0.000147247314453125,
      "model_forward_time": 0.11525082588195801,
      "step": 24125
    },
    {
      "epoch": 0.000147247314453125,
      "step": 24125,
      "training_step_time": 0.6898751258850098
    },
    {
      "epoch": 0.00014725341796875,
      "model_forward_time": 0.11478662490844727,
      "step": 24126
    },
    {
      "epoch": 0.00014725341796875,
      "step": 24126,
      "training_step_time": 0.4539604187011719
    },
    {
      "epoch": 0.000147259521484375,
      "model_forward_time": 0.11481070518493652,
      "step": 24127
    },
    {
      "epoch": 0.000147259521484375,
      "step": 24127,
      "training_step_time": 0.38776540756225586
    },
    {
      "epoch": 0.000147265625,
      "model_forward_time": 0.11442422866821289,
      "step": 24128
    },
    {
      "epoch": 0.000147265625,
      "step": 24128,
      "training_step_time": 0.389171838760376
    },
    {
      "epoch": 0.000147271728515625,
      "model_forward_time": 0.11477899551391602,
      "step": 24129
    },
    {
      "epoch": 0.000147271728515625,
      "step": 24129,
      "training_step_time": 0.41846704483032227
    },
    {
      "epoch": 0.00014727783203125,
      "grad_norm": 0.16337436437606812,
      "learning_rate": 6.975618105158346e-05,
      "loss": 0.0433,
      "step": 24130
    },
    {
      "epoch": 0.00014727783203125,
      "model_forward_time": 0.11449575424194336,
      "step": 24130
    },
    {
      "epoch": 0.00014727783203125,
      "step": 24130,
      "training_step_time": 0.42234039306640625
    },
    {
      "epoch": 0.000147283935546875,
      "model_forward_time": 0.11472153663635254,
      "step": 24131
    },
    {
      "epoch": 0.000147283935546875,
      "step": 24131,
      "training_step_time": 0.5982904434204102
    },
    {
      "epoch": 0.0001472900390625,
      "model_forward_time": 0.11514067649841309,
      "step": 24132
    },
    {
      "epoch": 0.0001472900390625,
      "step": 24132,
      "training_step_time": 0.3989145755767822
    },
    {
      "epoch": 0.000147296142578125,
      "model_forward_time": 0.11507892608642578,
      "step": 24133
    },
    {
      "epoch": 0.000147296142578125,
      "step": 24133,
      "training_step_time": 0.3913264274597168
    },
    {
      "epoch": 0.00014730224609375,
      "model_forward_time": 0.11457157135009766,
      "step": 24134
    },
    {
      "epoch": 0.00014730224609375,
      "step": 24134,
      "training_step_time": 0.3924846649169922
    },
    {
      "epoch": 0.000147308349609375,
      "model_forward_time": 0.11442685127258301,
      "step": 24135
    },
    {
      "epoch": 0.000147308349609375,
      "step": 24135,
      "training_step_time": 0.391054630279541
    },
    {
      "epoch": 0.000147314453125,
      "model_forward_time": 0.1148686408996582,
      "step": 24136
    },
    {
      "epoch": 0.000147314453125,
      "step": 24136,
      "training_step_time": 0.3889739513397217
    },
    {
      "epoch": 0.000147320556640625,
      "model_forward_time": 0.11510729789733887,
      "step": 24137
    },
    {
      "epoch": 0.000147320556640625,
      "step": 24137,
      "training_step_time": 0.5799469947814941
    },
    {
      "epoch": 0.00014732666015625,
      "model_forward_time": 0.11476588249206543,
      "step": 24138
    },
    {
      "epoch": 0.00014732666015625,
      "step": 24138,
      "training_step_time": 0.36536288261413574
    },
    {
      "epoch": 0.000147332763671875,
      "model_forward_time": 0.11481523513793945,
      "step": 24139
    },
    {
      "epoch": 0.000147332763671875,
      "step": 24139,
      "training_step_time": 0.455230712890625
    },
    {
      "epoch": 0.0001473388671875,
      "grad_norm": 0.14218534529209137,
      "learning_rate": 6.973086265880833e-05,
      "loss": 0.0462,
      "step": 24140
    },
    {
      "epoch": 0.0001473388671875,
      "model_forward_time": 0.11499762535095215,
      "step": 24140
    },
    {
      "epoch": 0.0001473388671875,
      "step": 24140,
      "training_step_time": 0.41111326217651367
    },
    {
      "epoch": 0.000147344970703125,
      "model_forward_time": 0.11545133590698242,
      "step": 24141
    },
    {
      "epoch": 0.000147344970703125,
      "step": 24141,
      "training_step_time": 0.4404568672180176
    },
    {
      "epoch": 0.00014735107421875,
      "model_forward_time": 0.1141805648803711,
      "step": 24142
    },
    {
      "epoch": 0.00014735107421875,
      "step": 24142,
      "training_step_time": 0.3822059631347656
    },
    {
      "epoch": 0.000147357177734375,
      "model_forward_time": 0.11568355560302734,
      "step": 24143
    },
    {
      "epoch": 0.000147357177734375,
      "step": 24143,
      "training_step_time": 0.5151979923248291
    },
    {
      "epoch": 0.00014736328125,
      "model_forward_time": 0.11556267738342285,
      "step": 24144
    },
    {
      "epoch": 0.00014736328125,
      "step": 24144,
      "training_step_time": 0.40643310546875
    },
    {
      "epoch": 0.000147369384765625,
      "model_forward_time": 0.11519670486450195,
      "step": 24145
    },
    {
      "epoch": 0.000147369384765625,
      "step": 24145,
      "training_step_time": 0.39347076416015625
    },
    {
      "epoch": 0.00014737548828125,
      "model_forward_time": 0.11486697196960449,
      "step": 24146
    },
    {
      "epoch": 0.00014737548828125,
      "step": 24146,
      "training_step_time": 0.386538028717041
    },
    {
      "epoch": 0.000147381591796875,
      "model_forward_time": 0.11511564254760742,
      "step": 24147
    },
    {
      "epoch": 0.000147381591796875,
      "step": 24147,
      "training_step_time": 0.40012359619140625
    },
    {
      "epoch": 0.0001473876953125,
      "model_forward_time": 0.11523985862731934,
      "step": 24148
    },
    {
      "epoch": 0.0001473876953125,
      "step": 24148,
      "training_step_time": 0.3946807384490967
    },
    {
      "epoch": 0.000147393798828125,
      "model_forward_time": 0.11542272567749023,
      "step": 24149
    },
    {
      "epoch": 0.000147393798828125,
      "step": 24149,
      "training_step_time": 0.5992379188537598
    },
    {
      "epoch": 0.00014739990234375,
      "grad_norm": 0.12781693041324615,
      "learning_rate": 6.97055382723181e-05,
      "loss": 0.0435,
      "step": 24150
    },
    {
      "epoch": 0.00014739990234375,
      "model_forward_time": 0.11463618278503418,
      "step": 24150
    },
    {
      "epoch": 0.00014739990234375,
      "step": 24150,
      "training_step_time": 0.3912210464477539
    },
    {
      "epoch": 0.000147406005859375,
      "model_forward_time": 0.11516904830932617,
      "step": 24151
    },
    {
      "epoch": 0.000147406005859375,
      "step": 24151,
      "training_step_time": 0.40154147148132324
    },
    {
      "epoch": 0.000147412109375,
      "model_forward_time": 0.11555147171020508,
      "step": 24152
    },
    {
      "epoch": 0.000147412109375,
      "step": 24152,
      "training_step_time": 0.36518049240112305
    },
    {
      "epoch": 0.000147418212890625,
      "model_forward_time": 0.11486077308654785,
      "step": 24153
    },
    {
      "epoch": 0.000147418212890625,
      "step": 24153,
      "training_step_time": 0.503856897354126
    },
    {
      "epoch": 0.00014742431640625,
      "model_forward_time": 0.1148378849029541,
      "step": 24154
    },
    {
      "epoch": 0.00014742431640625,
      "step": 24154,
      "training_step_time": 0.40895795822143555
    },
    {
      "epoch": 0.000147430419921875,
      "model_forward_time": 0.11544585227966309,
      "step": 24155
    },
    {
      "epoch": 0.000147430419921875,
      "step": 24155,
      "training_step_time": 0.6094865798950195
    },
    {
      "epoch": 0.0001474365234375,
      "model_forward_time": 0.11479687690734863,
      "step": 24156
    },
    {
      "epoch": 0.0001474365234375,
      "step": 24156,
      "training_step_time": 0.3830251693725586
    },
    {
      "epoch": 0.000147442626953125,
      "model_forward_time": 0.11531257629394531,
      "step": 24157
    },
    {
      "epoch": 0.000147442626953125,
      "step": 24157,
      "training_step_time": 0.5018658638000488
    },
    {
      "epoch": 0.00014744873046875,
      "model_forward_time": 0.11585450172424316,
      "step": 24158
    },
    {
      "epoch": 0.00014744873046875,
      "step": 24158,
      "training_step_time": 0.39041900634765625
    },
    {
      "epoch": 0.000147454833984375,
      "model_forward_time": 0.1146092414855957,
      "step": 24159
    },
    {
      "epoch": 0.000147454833984375,
      "step": 24159,
      "training_step_time": 0.46112895011901855
    },
    {
      "epoch": 0.0001474609375,
      "grad_norm": 0.11236337572336197,
      "learning_rate": 6.968020789980562e-05,
      "loss": 0.0442,
      "step": 24160
    },
    {
      "epoch": 0.0001474609375,
      "model_forward_time": 0.11415433883666992,
      "step": 24160
    },
    {
      "epoch": 0.0001474609375,
      "step": 24160,
      "training_step_time": 0.40167903900146484
    },
    {
      "epoch": 0.000147467041015625,
      "model_forward_time": 0.11472320556640625,
      "step": 24161
    },
    {
      "epoch": 0.000147467041015625,
      "step": 24161,
      "training_step_time": 0.4719827175140381
    },
    {
      "epoch": 0.00014747314453125,
      "model_forward_time": 0.11458778381347656,
      "step": 24162
    },
    {
      "epoch": 0.00014747314453125,
      "step": 24162,
      "training_step_time": 0.4002993106842041
    },
    {
      "epoch": 0.000147479248046875,
      "model_forward_time": 0.11518216133117676,
      "step": 24163
    },
    {
      "epoch": 0.000147479248046875,
      "step": 24163,
      "training_step_time": 0.3921360969543457
    },
    {
      "epoch": 0.0001474853515625,
      "model_forward_time": 0.11475253105163574,
      "step": 24164
    },
    {
      "epoch": 0.0001474853515625,
      "step": 24164,
      "training_step_time": 0.38958024978637695
    },
    {
      "epoch": 0.000147491455078125,
      "model_forward_time": 0.11497759819030762,
      "step": 24165
    },
    {
      "epoch": 0.000147491455078125,
      "step": 24165,
      "training_step_time": 0.41074037551879883
    },
    {
      "epoch": 0.00014749755859375,
      "model_forward_time": 0.11481285095214844,
      "step": 24166
    },
    {
      "epoch": 0.00014749755859375,
      "step": 24166,
      "training_step_time": 0.38369107246398926
    },
    {
      "epoch": 0.000147503662109375,
      "model_forward_time": 0.11499285697937012,
      "step": 24167
    },
    {
      "epoch": 0.000147503662109375,
      "step": 24167,
      "training_step_time": 0.6330809593200684
    },
    {
      "epoch": 0.000147509765625,
      "model_forward_time": 0.11480450630187988,
      "step": 24168
    },
    {
      "epoch": 0.000147509765625,
      "step": 24168,
      "training_step_time": 0.4525277614593506
    },
    {
      "epoch": 0.000147515869140625,
      "model_forward_time": 0.11514496803283691,
      "step": 24169
    },
    {
      "epoch": 0.000147515869140625,
      "step": 24169,
      "training_step_time": 0.38428187370300293
    },
    {
      "epoch": 0.00014752197265625,
      "grad_norm": 0.141607403755188,
      "learning_rate": 6.96548715489656e-05,
      "loss": 0.0458,
      "step": 24170
    },
    {
      "epoch": 0.00014752197265625,
      "model_forward_time": 0.11472606658935547,
      "step": 24170
    },
    {
      "epoch": 0.00014752197265625,
      "step": 24170,
      "training_step_time": 0.3852870464324951
    },
    {
      "epoch": 0.000147528076171875,
      "model_forward_time": 0.11469244956970215,
      "step": 24171
    },
    {
      "epoch": 0.000147528076171875,
      "step": 24171,
      "training_step_time": 0.4476945400238037
    },
    {
      "epoch": 0.0001475341796875,
      "model_forward_time": 0.11461091041564941,
      "step": 24172
    },
    {
      "epoch": 0.0001475341796875,
      "step": 24172,
      "training_step_time": 0.4062168598175049
    },
    {
      "epoch": 0.000147540283203125,
      "model_forward_time": 0.11494207382202148,
      "step": 24173
    },
    {
      "epoch": 0.000147540283203125,
      "step": 24173,
      "training_step_time": 0.5996930599212646
    },
    {
      "epoch": 0.00014754638671875,
      "model_forward_time": 0.11449718475341797,
      "step": 24174
    },
    {
      "epoch": 0.00014754638671875,
      "step": 24174,
      "training_step_time": 0.3875613212585449
    },
    {
      "epoch": 0.000147552490234375,
      "model_forward_time": 0.11467909812927246,
      "step": 24175
    },
    {
      "epoch": 0.000147552490234375,
      "step": 24175,
      "training_step_time": 0.3853588104248047
    },
    {
      "epoch": 0.00014755859375,
      "model_forward_time": 0.11473894119262695,
      "step": 24176
    },
    {
      "epoch": 0.00014755859375,
      "step": 24176,
      "training_step_time": 0.3864929676055908
    },
    {
      "epoch": 0.000147564697265625,
      "model_forward_time": 0.11530375480651855,
      "step": 24177
    },
    {
      "epoch": 0.000147564697265625,
      "step": 24177,
      "training_step_time": 0.38422226905822754
    },
    {
      "epoch": 0.00014757080078125,
      "model_forward_time": 0.11513304710388184,
      "step": 24178
    },
    {
      "epoch": 0.00014757080078125,
      "step": 24178,
      "training_step_time": 0.3778870105743408
    },
    {
      "epoch": 0.000147576904296875,
      "model_forward_time": 0.11475229263305664,
      "step": 24179
    },
    {
      "epoch": 0.000147576904296875,
      "step": 24179,
      "training_step_time": 0.6965157985687256
    },
    {
      "epoch": 0.0001475830078125,
      "grad_norm": 0.18386012315750122,
      "learning_rate": 6.962952922749457e-05,
      "loss": 0.045,
      "step": 24180
    },
    {
      "epoch": 0.0001475830078125,
      "model_forward_time": 0.11467266082763672,
      "step": 24180
    },
    {
      "epoch": 0.0001475830078125,
      "step": 24180,
      "training_step_time": 0.4484736919403076
    },
    {
      "epoch": 0.000147589111328125,
      "model_forward_time": 0.11486458778381348,
      "step": 24181
    },
    {
      "epoch": 0.000147589111328125,
      "step": 24181,
      "training_step_time": 0.4056551456451416
    },
    {
      "epoch": 0.00014759521484375,
      "model_forward_time": 0.11443519592285156,
      "step": 24182
    },
    {
      "epoch": 0.00014759521484375,
      "step": 24182,
      "training_step_time": 0.4742414951324463
    },
    {
      "epoch": 0.000147601318359375,
      "model_forward_time": 0.11428236961364746,
      "step": 24183
    },
    {
      "epoch": 0.000147601318359375,
      "step": 24183,
      "training_step_time": 0.3836498260498047
    },
    {
      "epoch": 0.000147607421875,
      "model_forward_time": 0.11423230171203613,
      "step": 24184
    },
    {
      "epoch": 0.000147607421875,
      "step": 24184,
      "training_step_time": 0.38549232482910156
    },
    {
      "epoch": 0.000147613525390625,
      "model_forward_time": 0.11445283889770508,
      "step": 24185
    },
    {
      "epoch": 0.000147613525390625,
      "step": 24185,
      "training_step_time": 0.561690092086792
    },
    {
      "epoch": 0.00014761962890625,
      "model_forward_time": 0.11411499977111816,
      "step": 24186
    },
    {
      "epoch": 0.00014761962890625,
      "step": 24186,
      "training_step_time": 0.441849946975708
    },
    {
      "epoch": 0.000147625732421875,
      "model_forward_time": 0.1144874095916748,
      "step": 24187
    },
    {
      "epoch": 0.000147625732421875,
      "step": 24187,
      "training_step_time": 0.39253997802734375
    },
    {
      "epoch": 0.0001476318359375,
      "model_forward_time": 0.11437678337097168,
      "step": 24188
    },
    {
      "epoch": 0.0001476318359375,
      "step": 24188,
      "training_step_time": 0.3837757110595703
    },
    {
      "epoch": 0.000147637939453125,
      "model_forward_time": 0.11451339721679688,
      "step": 24189
    },
    {
      "epoch": 0.000147637939453125,
      "step": 24189,
      "training_step_time": 0.38985347747802734
    },
    {
      "epoch": 0.00014764404296875,
      "grad_norm": 0.13442452251911163,
      "learning_rate": 6.960418094309085e-05,
      "loss": 0.052,
      "step": 24190
    },
    {
      "epoch": 0.00014764404296875,
      "model_forward_time": 0.11712431907653809,
      "step": 24190
    },
    {
      "epoch": 0.00014764404296875,
      "step": 24190,
      "training_step_time": 0.38694214820861816
    },
    {
      "epoch": 0.000147650146484375,
      "model_forward_time": 0.11477971076965332,
      "step": 24191
    },
    {
      "epoch": 0.000147650146484375,
      "step": 24191,
      "training_step_time": 0.6484057903289795
    },
    {
      "epoch": 0.00014765625,
      "model_forward_time": 0.11450839042663574,
      "step": 24192
    },
    {
      "epoch": 0.00014765625,
      "step": 24192,
      "training_step_time": 0.4019479751586914
    },
    {
      "epoch": 0.000147662353515625,
      "model_forward_time": 0.1145162582397461,
      "step": 24193
    },
    {
      "epoch": 0.000147662353515625,
      "step": 24193,
      "training_step_time": 0.4448728561401367
    },
    {
      "epoch": 0.00014766845703125,
      "model_forward_time": 0.11514520645141602,
      "step": 24194
    },
    {
      "epoch": 0.00014766845703125,
      "step": 24194,
      "training_step_time": 0.40864038467407227
    },
    {
      "epoch": 0.000147674560546875,
      "model_forward_time": 0.11504054069519043,
      "step": 24195
    },
    {
      "epoch": 0.000147674560546875,
      "step": 24195,
      "training_step_time": 0.3888864517211914
    },
    {
      "epoch": 0.0001476806640625,
      "model_forward_time": 0.11492443084716797,
      "step": 24196
    },
    {
      "epoch": 0.0001476806640625,
      "step": 24196,
      "training_step_time": 0.4996366500854492
    },
    {
      "epoch": 0.000147686767578125,
      "model_forward_time": 0.11457252502441406,
      "step": 24197
    },
    {
      "epoch": 0.000147686767578125,
      "step": 24197,
      "training_step_time": 0.4753284454345703
    },
    {
      "epoch": 0.00014769287109375,
      "model_forward_time": 0.11434626579284668,
      "step": 24198
    },
    {
      "epoch": 0.00014769287109375,
      "step": 24198,
      "training_step_time": 0.4343383312225342
    },
    {
      "epoch": 0.000147698974609375,
      "model_forward_time": 0.11483359336853027,
      "step": 24199
    },
    {
      "epoch": 0.000147698974609375,
      "step": 24199,
      "training_step_time": 0.4082677364349365
    },
    {
      "epoch": 0.000147705078125,
      "grad_norm": 0.16692586243152618,
      "learning_rate": 6.957882670345458e-05,
      "loss": 0.0442,
      "step": 24200
    },
    {
      "epoch": 0.000147705078125,
      "model_forward_time": 0.1145474910736084,
      "step": 24200
    },
    {
      "epoch": 0.000147705078125,
      "step": 24200,
      "training_step_time": 0.47069811820983887
    },
    {
      "epoch": 0.000147711181640625,
      "model_forward_time": 0.11428642272949219,
      "step": 24201
    },
    {
      "epoch": 0.000147711181640625,
      "step": 24201,
      "training_step_time": 0.38642334938049316
    },
    {
      "epoch": 0.00014771728515625,
      "model_forward_time": 0.11467313766479492,
      "step": 24202
    },
    {
      "epoch": 0.00014771728515625,
      "step": 24202,
      "training_step_time": 0.3865211009979248
    },
    {
      "epoch": 0.000147723388671875,
      "model_forward_time": 0.11484098434448242,
      "step": 24203
    },
    {
      "epoch": 0.000147723388671875,
      "step": 24203,
      "training_step_time": 0.5663650035858154
    },
    {
      "epoch": 0.0001477294921875,
      "model_forward_time": 0.11555027961730957,
      "step": 24204
    },
    {
      "epoch": 0.0001477294921875,
      "step": 24204,
      "training_step_time": 0.3953547477722168
    },
    {
      "epoch": 0.000147735595703125,
      "model_forward_time": 0.1151425838470459,
      "step": 24205
    },
    {
      "epoch": 0.000147735595703125,
      "step": 24205,
      "training_step_time": 0.39577436447143555
    },
    {
      "epoch": 0.00014774169921875,
      "model_forward_time": 0.11468935012817383,
      "step": 24206
    },
    {
      "epoch": 0.00014774169921875,
      "step": 24206,
      "training_step_time": 0.3917403221130371
    },
    {
      "epoch": 0.000147747802734375,
      "model_forward_time": 0.11571311950683594,
      "step": 24207
    },
    {
      "epoch": 0.000147747802734375,
      "step": 24207,
      "training_step_time": 0.40604114532470703
    },
    {
      "epoch": 0.00014775390625,
      "model_forward_time": 0.11541914939880371,
      "step": 24208
    },
    {
      "epoch": 0.00014775390625,
      "step": 24208,
      "training_step_time": 0.48893284797668457
    },
    {
      "epoch": 0.000147760009765625,
      "model_forward_time": 0.1150674819946289,
      "step": 24209
    },
    {
      "epoch": 0.000147760009765625,
      "step": 24209,
      "training_step_time": 0.5176165103912354
    },
    {
      "epoch": 0.00014776611328125,
      "grad_norm": 0.16208453476428986,
      "learning_rate": 6.955346651628771e-05,
      "loss": 0.0472,
      "step": 24210
    },
    {
      "epoch": 0.00014776611328125,
      "model_forward_time": 0.1146097183227539,
      "step": 24210
    },
    {
      "epoch": 0.00014776611328125,
      "step": 24210,
      "training_step_time": 0.400026798248291
    },
    {
      "epoch": 0.000147772216796875,
      "model_forward_time": 0.11474442481994629,
      "step": 24211
    },
    {
      "epoch": 0.000147772216796875,
      "step": 24211,
      "training_step_time": 0.39043211936950684
    },
    {
      "epoch": 0.0001477783203125,
      "model_forward_time": 0.11553359031677246,
      "step": 24212
    },
    {
      "epoch": 0.0001477783203125,
      "step": 24212,
      "training_step_time": 0.3946802616119385
    },
    {
      "epoch": 0.000147784423828125,
      "model_forward_time": 0.11468911170959473,
      "step": 24213
    },
    {
      "epoch": 0.000147784423828125,
      "step": 24213,
      "training_step_time": 0.39501452445983887
    },
    {
      "epoch": 0.00014779052734375,
      "model_forward_time": 0.11574721336364746,
      "step": 24214
    },
    {
      "epoch": 0.00014779052734375,
      "step": 24214,
      "training_step_time": 0.4761812686920166
    },
    {
      "epoch": 0.000147796630859375,
      "model_forward_time": 0.11448955535888672,
      "step": 24215
    },
    {
      "epoch": 0.000147796630859375,
      "step": 24215,
      "training_step_time": 0.5700347423553467
    },
    {
      "epoch": 0.000147802734375,
      "model_forward_time": 0.11467337608337402,
      "step": 24216
    },
    {
      "epoch": 0.000147802734375,
      "step": 24216,
      "training_step_time": 0.40630030632019043
    },
    {
      "epoch": 0.000147808837890625,
      "model_forward_time": 0.11509966850280762,
      "step": 24217
    },
    {
      "epoch": 0.000147808837890625,
      "step": 24217,
      "training_step_time": 0.3984827995300293
    },
    {
      "epoch": 0.00014781494140625,
      "model_forward_time": 0.11474800109863281,
      "step": 24218
    },
    {
      "epoch": 0.00014781494140625,
      "step": 24218,
      "training_step_time": 0.3956642150878906
    },
    {
      "epoch": 0.000147821044921875,
      "model_forward_time": 0.11472415924072266,
      "step": 24219
    },
    {
      "epoch": 0.000147821044921875,
      "step": 24219,
      "training_step_time": 0.3972768783569336
    },
    {
      "epoch": 0.0001478271484375,
      "grad_norm": 0.1245088130235672,
      "learning_rate": 6.952810038929397e-05,
      "loss": 0.0474,
      "step": 24220
    },
    {
      "epoch": 0.0001478271484375,
      "model_forward_time": 0.11462020874023438,
      "step": 24220
    },
    {
      "epoch": 0.0001478271484375,
      "step": 24220,
      "training_step_time": 0.4127225875854492
    },
    {
      "epoch": 0.000147833251953125,
      "model_forward_time": 0.11498665809631348,
      "step": 24221
    },
    {
      "epoch": 0.000147833251953125,
      "step": 24221,
      "training_step_time": 0.5938971042633057
    },
    {
      "epoch": 0.00014783935546875,
      "model_forward_time": 0.11482429504394531,
      "step": 24222
    },
    {
      "epoch": 0.00014783935546875,
      "step": 24222,
      "training_step_time": 0.37619709968566895
    },
    {
      "epoch": 0.000147845458984375,
      "model_forward_time": 0.11505389213562012,
      "step": 24223
    },
    {
      "epoch": 0.000147845458984375,
      "step": 24223,
      "training_step_time": 0.45919203758239746
    },
    {
      "epoch": 0.0001478515625,
      "model_forward_time": 0.11473894119262695,
      "step": 24224
    },
    {
      "epoch": 0.0001478515625,
      "step": 24224,
      "training_step_time": 0.40549659729003906
    },
    {
      "epoch": 0.000147857666015625,
      "model_forward_time": 0.11472272872924805,
      "step": 24225
    },
    {
      "epoch": 0.000147857666015625,
      "step": 24225,
      "training_step_time": 0.37949323654174805
    },
    {
      "epoch": 0.00014786376953125,
      "model_forward_time": 0.11513972282409668,
      "step": 24226
    },
    {
      "epoch": 0.00014786376953125,
      "step": 24226,
      "training_step_time": 0.45855021476745605
    },
    {
      "epoch": 0.000147869873046875,
      "model_forward_time": 0.11482667922973633,
      "step": 24227
    },
    {
      "epoch": 0.000147869873046875,
      "step": 24227,
      "training_step_time": 0.556563138961792
    },
    {
      "epoch": 0.0001478759765625,
      "model_forward_time": 0.11521220207214355,
      "step": 24228
    },
    {
      "epoch": 0.0001478759765625,
      "step": 24228,
      "training_step_time": 0.39045286178588867
    },
    {
      "epoch": 0.000147882080078125,
      "model_forward_time": 0.1152794361114502,
      "step": 24229
    },
    {
      "epoch": 0.000147882080078125,
      "step": 24229,
      "training_step_time": 0.37548279762268066
    },
    {
      "epoch": 0.00014788818359375,
      "grad_norm": 0.1662716120481491,
      "learning_rate": 6.950272833017896e-05,
      "loss": 0.0452,
      "step": 24230
    },
    {
      "epoch": 0.00014788818359375,
      "model_forward_time": 0.11531352996826172,
      "step": 24230
    },
    {
      "epoch": 0.00014788818359375,
      "step": 24230,
      "training_step_time": 0.3772578239440918
    },
    {
      "epoch": 0.000147894287109375,
      "model_forward_time": 0.11539959907531738,
      "step": 24231
    },
    {
      "epoch": 0.000147894287109375,
      "step": 24231,
      "training_step_time": 0.3804800510406494
    },
    {
      "epoch": 0.000147900390625,
      "model_forward_time": 0.11553049087524414,
      "step": 24232
    },
    {
      "epoch": 0.000147900390625,
      "step": 24232,
      "training_step_time": 0.37595605850219727
    },
    {
      "epoch": 0.000147906494140625,
      "model_forward_time": 0.11525750160217285,
      "step": 24233
    },
    {
      "epoch": 0.000147906494140625,
      "step": 24233,
      "training_step_time": 0.6261396408081055
    },
    {
      "epoch": 0.00014791259765625,
      "model_forward_time": 0.11575627326965332,
      "step": 24234
    },
    {
      "epoch": 0.00014791259765625,
      "step": 24234,
      "training_step_time": 0.39076948165893555
    },
    {
      "epoch": 0.000147918701171875,
      "model_forward_time": 0.1150047779083252,
      "step": 24235
    },
    {
      "epoch": 0.000147918701171875,
      "step": 24235,
      "training_step_time": 0.3841371536254883
    },
    {
      "epoch": 0.0001479248046875,
      "model_forward_time": 0.11449980735778809,
      "step": 24236
    },
    {
      "epoch": 0.0001479248046875,
      "step": 24236,
      "training_step_time": 0.3654952049255371
    },
    {
      "epoch": 0.000147930908203125,
      "model_forward_time": 0.1152033805847168,
      "step": 24237
    },
    {
      "epoch": 0.000147930908203125,
      "step": 24237,
      "training_step_time": 0.468555212020874
    },
    {
      "epoch": 0.00014793701171875,
      "model_forward_time": 0.11474943161010742,
      "step": 24238
    },
    {
      "epoch": 0.00014793701171875,
      "step": 24238,
      "training_step_time": 0.47619175910949707
    },
    {
      "epoch": 0.000147943115234375,
      "model_forward_time": 0.11572718620300293,
      "step": 24239
    },
    {
      "epoch": 0.000147943115234375,
      "step": 24239,
      "training_step_time": 0.6336815357208252
    },
    {
      "epoch": 0.00014794921875,
      "grad_norm": 0.1620815396308899,
      "learning_rate": 6.947735034665002e-05,
      "loss": 0.0475,
      "step": 24240
    },
    {
      "epoch": 0.00014794921875,
      "model_forward_time": 0.11502695083618164,
      "step": 24240
    },
    {
      "epoch": 0.00014794921875,
      "step": 24240,
      "training_step_time": 0.40099120140075684
    },
    {
      "epoch": 0.000147955322265625,
      "model_forward_time": 0.11469268798828125,
      "step": 24241
    },
    {
      "epoch": 0.000147955322265625,
      "step": 24241,
      "training_step_time": 0.42182350158691406
    },
    {
      "epoch": 0.00014796142578125,
      "model_forward_time": 0.11366152763366699,
      "step": 24242
    },
    {
      "epoch": 0.00014796142578125,
      "step": 24242,
      "training_step_time": 0.38306570053100586
    },
    {
      "epoch": 0.000147967529296875,
      "model_forward_time": 0.11425566673278809,
      "step": 24243
    },
    {
      "epoch": 0.000147967529296875,
      "step": 24243,
      "training_step_time": 0.3936448097229004
    },
    {
      "epoch": 0.0001479736328125,
      "model_forward_time": 0.11523628234863281,
      "step": 24244
    },
    {
      "epoch": 0.0001479736328125,
      "step": 24244,
      "training_step_time": 0.3892345428466797
    },
    {
      "epoch": 0.000147979736328125,
      "model_forward_time": 0.11503338813781738,
      "step": 24245
    },
    {
      "epoch": 0.000147979736328125,
      "step": 24245,
      "training_step_time": 0.6141641139984131
    },
    {
      "epoch": 0.00014798583984375,
      "model_forward_time": 0.11502718925476074,
      "step": 24246
    },
    {
      "epoch": 0.00014798583984375,
      "step": 24246,
      "training_step_time": 0.4087531566619873
    },
    {
      "epoch": 0.000147991943359375,
      "model_forward_time": 0.11586165428161621,
      "step": 24247
    },
    {
      "epoch": 0.000147991943359375,
      "step": 24247,
      "training_step_time": 0.4126169681549072
    },
    {
      "epoch": 0.000147998046875,
      "model_forward_time": 0.11481618881225586,
      "step": 24248
    },
    {
      "epoch": 0.000147998046875,
      "step": 24248,
      "training_step_time": 0.38712167739868164
    },
    {
      "epoch": 0.000148004150390625,
      "model_forward_time": 0.11446976661682129,
      "step": 24249
    },
    {
      "epoch": 0.000148004150390625,
      "step": 24249,
      "training_step_time": 0.38785314559936523
    },
    {
      "epoch": 0.00014801025390625,
      "grad_norm": 0.1403183788061142,
      "learning_rate": 6.94519664464163e-05,
      "loss": 0.042,
      "step": 24250
    },
    {
      "epoch": 0.00014801025390625,
      "model_forward_time": 0.11503148078918457,
      "step": 24250
    },
    {
      "epoch": 0.00014801025390625,
      "step": 24250,
      "training_step_time": 0.45522356033325195
    },
    {
      "epoch": 0.000148016357421875,
      "model_forward_time": 0.11732888221740723,
      "step": 24251
    },
    {
      "epoch": 0.000148016357421875,
      "step": 24251,
      "training_step_time": 0.5097787380218506
    },
    {
      "epoch": 0.0001480224609375,
      "model_forward_time": 0.11493611335754395,
      "step": 24252
    },
    {
      "epoch": 0.0001480224609375,
      "step": 24252,
      "training_step_time": 0.41667747497558594
    },
    {
      "epoch": 0.000148028564453125,
      "model_forward_time": 0.1151583194732666,
      "step": 24253
    },
    {
      "epoch": 0.000148028564453125,
      "step": 24253,
      "training_step_time": 0.3969137668609619
    },
    {
      "epoch": 0.00014803466796875,
      "model_forward_time": 0.11548995971679688,
      "step": 24254
    },
    {
      "epoch": 0.00014803466796875,
      "step": 24254,
      "training_step_time": 0.4201343059539795
    },
    {
      "epoch": 0.000148040771484375,
      "model_forward_time": 0.1154789924621582,
      "step": 24255
    },
    {
      "epoch": 0.000148040771484375,
      "step": 24255,
      "training_step_time": 0.4685196876525879
    },
    {
      "epoch": 0.000148046875,
      "model_forward_time": 0.11527776718139648,
      "step": 24256
    },
    {
      "epoch": 0.000148046875,
      "step": 24256,
      "training_step_time": 0.4011039733886719
    },
    {
      "epoch": 0.000148052978515625,
      "model_forward_time": 0.1159977912902832,
      "step": 24257
    },
    {
      "epoch": 0.000148052978515625,
      "step": 24257,
      "training_step_time": 0.4476175308227539
    },
    {
      "epoch": 0.00014805908203125,
      "model_forward_time": 0.11533927917480469,
      "step": 24258
    },
    {
      "epoch": 0.00014805908203125,
      "step": 24258,
      "training_step_time": 0.39713311195373535
    },
    {
      "epoch": 0.000148065185546875,
      "model_forward_time": 0.11594438552856445,
      "step": 24259
    },
    {
      "epoch": 0.000148065185546875,
      "step": 24259,
      "training_step_time": 0.3932828903198242
    },
    {
      "epoch": 0.0001480712890625,
      "grad_norm": 0.1157677173614502,
      "learning_rate": 6.942657663718879e-05,
      "loss": 0.048,
      "step": 24260
    },
    {
      "epoch": 0.0001480712890625,
      "model_forward_time": 0.11473727226257324,
      "step": 24260
    },
    {
      "epoch": 0.0001480712890625,
      "step": 24260,
      "training_step_time": 0.4114108085632324
    },
    {
      "epoch": 0.000148077392578125,
      "model_forward_time": 0.11473822593688965,
      "step": 24261
    },
    {
      "epoch": 0.000148077392578125,
      "step": 24261,
      "training_step_time": 0.3853168487548828
    },
    {
      "epoch": 0.00014808349609375,
      "model_forward_time": 0.11543917655944824,
      "step": 24262
    },
    {
      "epoch": 0.00014808349609375,
      "step": 24262,
      "training_step_time": 0.3915598392486572
    },
    {
      "epoch": 0.000148089599609375,
      "model_forward_time": 0.11545705795288086,
      "step": 24263
    },
    {
      "epoch": 0.000148089599609375,
      "step": 24263,
      "training_step_time": 0.8146898746490479
    },
    {
      "epoch": 0.000148095703125,
      "model_forward_time": 0.11408138275146484,
      "step": 24264
    },
    {
      "epoch": 0.000148095703125,
      "step": 24264,
      "training_step_time": 0.4891958236694336
    },
    {
      "epoch": 0.000148101806640625,
      "model_forward_time": 0.11488938331604004,
      "step": 24265
    },
    {
      "epoch": 0.000148101806640625,
      "step": 24265,
      "training_step_time": 0.5055122375488281
    },
    {
      "epoch": 0.00014810791015625,
      "model_forward_time": 0.11459183692932129,
      "step": 24266
    },
    {
      "epoch": 0.00014810791015625,
      "step": 24266,
      "training_step_time": 0.3804655075073242
    },
    {
      "epoch": 0.000148114013671875,
      "model_forward_time": 0.11371445655822754,
      "step": 24267
    },
    {
      "epoch": 0.000148114013671875,
      "step": 24267,
      "training_step_time": 0.38284897804260254
    },
    {
      "epoch": 0.0001481201171875,
      "model_forward_time": 0.11392712593078613,
      "step": 24268
    },
    {
      "epoch": 0.0001481201171875,
      "step": 24268,
      "training_step_time": 0.40513062477111816
    },
    {
      "epoch": 0.000148126220703125,
      "model_forward_time": 0.11514401435852051,
      "step": 24269
    },
    {
      "epoch": 0.000148126220703125,
      "step": 24269,
      "training_step_time": 0.3922438621520996
    },
    {
      "epoch": 0.00014813232421875,
      "grad_norm": 0.17336519062519073,
      "learning_rate": 6.940118092668022e-05,
      "loss": 0.0418,
      "step": 24270
    },
    {
      "epoch": 0.00014813232421875,
      "model_forward_time": 0.11510610580444336,
      "step": 24270
    },
    {
      "epoch": 0.00014813232421875,
      "step": 24270,
      "training_step_time": 0.39519381523132324
    },
    {
      "epoch": 0.000148138427734375,
      "model_forward_time": 0.1160891056060791,
      "step": 24271
    },
    {
      "epoch": 0.000148138427734375,
      "step": 24271,
      "training_step_time": 0.38831257820129395
    },
    {
      "epoch": 0.00014814453125,
      "model_forward_time": 0.11549139022827148,
      "step": 24272
    },
    {
      "epoch": 0.00014814453125,
      "step": 24272,
      "training_step_time": 0.3948328495025635
    },
    {
      "epoch": 0.000148150634765625,
      "model_forward_time": 0.1146230697631836,
      "step": 24273
    },
    {
      "epoch": 0.000148150634765625,
      "step": 24273,
      "training_step_time": 0.38765454292297363
    },
    {
      "epoch": 0.00014815673828125,
      "model_forward_time": 0.1158456802368164,
      "step": 24274
    },
    {
      "epoch": 0.00014815673828125,
      "step": 24274,
      "training_step_time": 0.4370598793029785
    },
    {
      "epoch": 0.000148162841796875,
      "model_forward_time": 0.11575984954833984,
      "step": 24275
    },
    {
      "epoch": 0.000148162841796875,
      "step": 24275,
      "training_step_time": 0.5206897258758545
    },
    {
      "epoch": 0.0001481689453125,
      "model_forward_time": 0.11584305763244629,
      "step": 24276
    },
    {
      "epoch": 0.0001481689453125,
      "step": 24276,
      "training_step_time": 0.38915443420410156
    },
    {
      "epoch": 0.000148175048828125,
      "model_forward_time": 0.11557626724243164,
      "step": 24277
    },
    {
      "epoch": 0.000148175048828125,
      "step": 24277,
      "training_step_time": 0.40021252632141113
    },
    {
      "epoch": 0.00014818115234375,
      "model_forward_time": 0.11514019966125488,
      "step": 24278
    },
    {
      "epoch": 0.00014818115234375,
      "step": 24278,
      "training_step_time": 0.38289809226989746
    },
    {
      "epoch": 0.000148187255859375,
      "model_forward_time": 0.11480450630187988,
      "step": 24279
    },
    {
      "epoch": 0.000148187255859375,
      "step": 24279,
      "training_step_time": 0.41887474060058594
    },
    {
      "epoch": 0.000148193359375,
      "grad_norm": 0.13153775036334991,
      "learning_rate": 6.937577932260515e-05,
      "loss": 0.0417,
      "step": 24280
    },
    {
      "epoch": 0.000148193359375,
      "model_forward_time": 0.11542534828186035,
      "step": 24280
    },
    {
      "epoch": 0.000148193359375,
      "step": 24280,
      "training_step_time": 0.4176063537597656
    },
    {
      "epoch": 0.000148199462890625,
      "model_forward_time": 0.11506128311157227,
      "step": 24281
    },
    {
      "epoch": 0.000148199462890625,
      "step": 24281,
      "training_step_time": 0.5013246536254883
    },
    {
      "epoch": 0.00014820556640625,
      "model_forward_time": 0.11445808410644531,
      "step": 24282
    },
    {
      "epoch": 0.00014820556640625,
      "step": 24282,
      "training_step_time": 0.4540884494781494
    },
    {
      "epoch": 0.000148211669921875,
      "model_forward_time": 0.11483049392700195,
      "step": 24283
    },
    {
      "epoch": 0.000148211669921875,
      "step": 24283,
      "training_step_time": 0.39624619483947754
    },
    {
      "epoch": 0.0001482177734375,
      "model_forward_time": 0.11424517631530762,
      "step": 24284
    },
    {
      "epoch": 0.0001482177734375,
      "step": 24284,
      "training_step_time": 0.39554786682128906
    },
    {
      "epoch": 0.000148223876953125,
      "model_forward_time": 0.11545181274414062,
      "step": 24285
    },
    {
      "epoch": 0.000148223876953125,
      "step": 24285,
      "training_step_time": 0.38941431045532227
    },
    {
      "epoch": 0.00014822998046875,
      "model_forward_time": 0.11553359031677246,
      "step": 24286
    },
    {
      "epoch": 0.00014822998046875,
      "step": 24286,
      "training_step_time": 0.46325039863586426
    },
    {
      "epoch": 0.000148236083984375,
      "model_forward_time": 0.11495184898376465,
      "step": 24287
    },
    {
      "epoch": 0.000148236083984375,
      "step": 24287,
      "training_step_time": 0.4558830261230469
    },
    {
      "epoch": 0.0001482421875,
      "model_forward_time": 0.11536741256713867,
      "step": 24288
    },
    {
      "epoch": 0.0001482421875,
      "step": 24288,
      "training_step_time": 0.396498441696167
    },
    {
      "epoch": 0.000148248291015625,
      "model_forward_time": 0.11524510383605957,
      "step": 24289
    },
    {
      "epoch": 0.000148248291015625,
      "step": 24289,
      "training_step_time": 0.3916294574737549
    },
    {
      "epoch": 0.00014825439453125,
      "grad_norm": 0.11336513608694077,
      "learning_rate": 6.93503718326799e-05,
      "loss": 0.0417,
      "step": 24290
    },
    {
      "epoch": 0.00014825439453125,
      "model_forward_time": 0.11493349075317383,
      "step": 24290
    },
    {
      "epoch": 0.00014825439453125,
      "step": 24290,
      "training_step_time": 0.3893907070159912
    },
    {
      "epoch": 0.000148260498046875,
      "model_forward_time": 0.11501669883728027,
      "step": 24291
    },
    {
      "epoch": 0.000148260498046875,
      "step": 24291,
      "training_step_time": 0.3902711868286133
    },
    {
      "epoch": 0.0001482666015625,
      "model_forward_time": 0.11612558364868164,
      "step": 24292
    },
    {
      "epoch": 0.0001482666015625,
      "step": 24292,
      "training_step_time": 0.39159512519836426
    },
    {
      "epoch": 0.000148272705078125,
      "model_forward_time": 0.11530089378356934,
      "step": 24293
    },
    {
      "epoch": 0.000148272705078125,
      "step": 24293,
      "training_step_time": 0.6321344375610352
    },
    {
      "epoch": 0.00014827880859375,
      "model_forward_time": 0.11562323570251465,
      "step": 24294
    },
    {
      "epoch": 0.00014827880859375,
      "step": 24294,
      "training_step_time": 0.4236881732940674
    },
    {
      "epoch": 0.000148284912109375,
      "model_forward_time": 0.11750555038452148,
      "step": 24295
    },
    {
      "epoch": 0.000148284912109375,
      "step": 24295,
      "training_step_time": 0.4272916316986084
    },
    {
      "epoch": 0.000148291015625,
      "model_forward_time": 0.11527657508850098,
      "step": 24296
    },
    {
      "epoch": 0.000148291015625,
      "step": 24296,
      "training_step_time": 0.40325117111206055
    },
    {
      "epoch": 0.000148297119140625,
      "model_forward_time": 0.11497616767883301,
      "step": 24297
    },
    {
      "epoch": 0.000148297119140625,
      "step": 24297,
      "training_step_time": 0.4690864086151123
    },
    {
      "epoch": 0.00014830322265625,
      "model_forward_time": 0.11440610885620117,
      "step": 24298
    },
    {
      "epoch": 0.00014830322265625,
      "step": 24298,
      "training_step_time": 0.39191174507141113
    },
    {
      "epoch": 0.000148309326171875,
      "model_forward_time": 0.11507654190063477,
      "step": 24299
    },
    {
      "epoch": 0.000148309326171875,
      "step": 24299,
      "training_step_time": 0.501072883605957
    },
    {
      "epoch": 0.0001483154296875,
      "grad_norm": 0.08982454240322113,
      "learning_rate": 6.932495846462261e-05,
      "loss": 0.0452,
      "step": 24300
    },
    {
      "epoch": 0.0001483154296875,
      "model_forward_time": 0.11446285247802734,
      "step": 24300
    },
    {
      "epoch": 0.0001483154296875,
      "step": 24300,
      "training_step_time": 0.4025692939758301
    },
    {
      "epoch": 0.000148321533203125,
      "model_forward_time": 0.11455273628234863,
      "step": 24301
    },
    {
      "epoch": 0.000148321533203125,
      "step": 24301,
      "training_step_time": 0.397489070892334
    },
    {
      "epoch": 0.00014832763671875,
      "model_forward_time": 0.11505722999572754,
      "step": 24302
    },
    {
      "epoch": 0.00014832763671875,
      "step": 24302,
      "training_step_time": 0.3819096088409424
    },
    {
      "epoch": 0.000148333740234375,
      "model_forward_time": 0.11483001708984375,
      "step": 24303
    },
    {
      "epoch": 0.000148333740234375,
      "step": 24303,
      "training_step_time": 0.3990616798400879
    },
    {
      "epoch": 0.00014833984375,
      "model_forward_time": 0.11510992050170898,
      "step": 24304
    },
    {
      "epoch": 0.00014833984375,
      "step": 24304,
      "training_step_time": 0.40808677673339844
    },
    {
      "epoch": 0.000148345947265625,
      "model_forward_time": 0.1149139404296875,
      "step": 24305
    },
    {
      "epoch": 0.000148345947265625,
      "step": 24305,
      "training_step_time": 0.6235489845275879
    },
    {
      "epoch": 0.00014835205078125,
      "model_forward_time": 0.11440443992614746,
      "step": 24306
    },
    {
      "epoch": 0.00014835205078125,
      "step": 24306,
      "training_step_time": 0.367290735244751
    },
    {
      "epoch": 0.000148358154296875,
      "model_forward_time": 0.11501646041870117,
      "step": 24307
    },
    {
      "epoch": 0.000148358154296875,
      "step": 24307,
      "training_step_time": 0.4594242572784424
    },
    {
      "epoch": 0.0001483642578125,
      "model_forward_time": 0.11462974548339844,
      "step": 24308
    },
    {
      "epoch": 0.0001483642578125,
      "step": 24308,
      "training_step_time": 0.4026172161102295
    },
    {
      "epoch": 0.000148370361328125,
      "model_forward_time": 0.11463546752929688,
      "step": 24309
    },
    {
      "epoch": 0.000148370361328125,
      "step": 24309,
      "training_step_time": 0.45101237297058105
    },
    {
      "epoch": 0.00014837646484375,
      "grad_norm": 0.143073171377182,
      "learning_rate": 6.929953922615319e-05,
      "loss": 0.0425,
      "step": 24310
    },
    {
      "epoch": 0.00014837646484375,
      "model_forward_time": 0.1144258975982666,
      "step": 24310
    },
    {
      "epoch": 0.00014837646484375,
      "step": 24310,
      "training_step_time": 0.4330756664276123
    },
    {
      "epoch": 0.000148382568359375,
      "model_forward_time": 0.11508750915527344,
      "step": 24311
    },
    {
      "epoch": 0.000148382568359375,
      "step": 24311,
      "training_step_time": 0.4294252395629883
    },
    {
      "epoch": 0.000148388671875,
      "model_forward_time": 0.11525344848632812,
      "step": 24312
    },
    {
      "epoch": 0.000148388671875,
      "step": 24312,
      "training_step_time": 0.3925912380218506
    },
    {
      "epoch": 0.000148394775390625,
      "model_forward_time": 0.11508655548095703,
      "step": 24313
    },
    {
      "epoch": 0.000148394775390625,
      "step": 24313,
      "training_step_time": 0.4441404342651367
    },
    {
      "epoch": 0.00014840087890625,
      "model_forward_time": 0.11557269096374512,
      "step": 24314
    },
    {
      "epoch": 0.00014840087890625,
      "step": 24314,
      "training_step_time": 0.4236435890197754
    },
    {
      "epoch": 0.000148406982421875,
      "model_forward_time": 0.11508679389953613,
      "step": 24315
    },
    {
      "epoch": 0.000148406982421875,
      "step": 24315,
      "training_step_time": 0.3862020969390869
    },
    {
      "epoch": 0.0001484130859375,
      "model_forward_time": 0.11595630645751953,
      "step": 24316
    },
    {
      "epoch": 0.0001484130859375,
      "step": 24316,
      "training_step_time": 0.3913078308105469
    },
    {
      "epoch": 0.000148419189453125,
      "model_forward_time": 0.11487030982971191,
      "step": 24317
    },
    {
      "epoch": 0.000148419189453125,
      "step": 24317,
      "training_step_time": 0.5732958316802979
    },
    {
      "epoch": 0.00014842529296875,
      "model_forward_time": 0.11474156379699707,
      "step": 24318
    },
    {
      "epoch": 0.00014842529296875,
      "step": 24318,
      "training_step_time": 0.3984806537628174
    },
    {
      "epoch": 0.000148431396484375,
      "model_forward_time": 0.11498713493347168,
      "step": 24319
    },
    {
      "epoch": 0.000148431396484375,
      "step": 24319,
      "training_step_time": 0.3889956474304199
    },
    {
      "epoch": 0.0001484375,
      "grad_norm": 0.16611483693122864,
      "learning_rate": 6.927411412499332e-05,
      "loss": 0.049,
      "step": 24320
    },
    {
      "epoch": 0.0001484375,
      "model_forward_time": 0.11545133590698242,
      "step": 24320
    },
    {
      "epoch": 0.0001484375,
      "step": 24320,
      "training_step_time": 0.4399092197418213
    },
    {
      "epoch": 0.000148443603515625,
      "model_forward_time": 0.11480545997619629,
      "step": 24321
    },
    {
      "epoch": 0.000148443603515625,
      "step": 24321,
      "training_step_time": 0.42256832122802734
    },
    {
      "epoch": 0.00014844970703125,
      "model_forward_time": 0.1147310733795166,
      "step": 24322
    },
    {
      "epoch": 0.00014844970703125,
      "step": 24322,
      "training_step_time": 0.4878416061401367
    },
    {
      "epoch": 0.000148455810546875,
      "model_forward_time": 0.11461591720581055,
      "step": 24323
    },
    {
      "epoch": 0.000148455810546875,
      "step": 24323,
      "training_step_time": 0.47133421897888184
    },
    {
      "epoch": 0.0001484619140625,
      "model_forward_time": 0.11499142646789551,
      "step": 24324
    },
    {
      "epoch": 0.0001484619140625,
      "step": 24324,
      "training_step_time": 0.49040937423706055
    },
    {
      "epoch": 0.000148468017578125,
      "model_forward_time": 0.11394023895263672,
      "step": 24325
    },
    {
      "epoch": 0.000148468017578125,
      "step": 24325,
      "training_step_time": 0.3989131450653076
    },
    {
      "epoch": 0.00014847412109375,
      "model_forward_time": 0.1141669750213623,
      "step": 24326
    },
    {
      "epoch": 0.00014847412109375,
      "step": 24326,
      "training_step_time": 0.3978731632232666
    },
    {
      "epoch": 0.000148480224609375,
      "model_forward_time": 0.11469864845275879,
      "step": 24327
    },
    {
      "epoch": 0.000148480224609375,
      "step": 24327,
      "training_step_time": 0.39176154136657715
    },
    {
      "epoch": 0.000148486328125,
      "model_forward_time": 0.11530828475952148,
      "step": 24328
    },
    {
      "epoch": 0.000148486328125,
      "step": 24328,
      "training_step_time": 0.38596415519714355
    },
    {
      "epoch": 0.000148492431640625,
      "model_forward_time": 0.11566853523254395,
      "step": 24329
    },
    {
      "epoch": 0.000148492431640625,
      "step": 24329,
      "training_step_time": 0.4113645553588867
    },
    {
      "epoch": 0.00014849853515625,
      "grad_norm": 0.12387126684188843,
      "learning_rate": 6.924868316886649e-05,
      "loss": 0.0462,
      "step": 24330
    },
    {
      "epoch": 0.00014849853515625,
      "model_forward_time": 0.11510562896728516,
      "step": 24330
    },
    {
      "epoch": 0.00014849853515625,
      "step": 24330,
      "training_step_time": 0.402571439743042
    },
    {
      "epoch": 0.000148504638671875,
      "model_forward_time": 0.1151425838470459,
      "step": 24331
    },
    {
      "epoch": 0.000148504638671875,
      "step": 24331,
      "training_step_time": 0.3939833641052246
    },
    {
      "epoch": 0.0001485107421875,
      "model_forward_time": 0.11559820175170898,
      "step": 24332
    },
    {
      "epoch": 0.0001485107421875,
      "step": 24332,
      "training_step_time": 0.3975663185119629
    },
    {
      "epoch": 0.000148516845703125,
      "model_forward_time": 0.11500000953674316,
      "step": 24333
    },
    {
      "epoch": 0.000148516845703125,
      "step": 24333,
      "training_step_time": 0.39036035537719727
    },
    {
      "epoch": 0.00014852294921875,
      "model_forward_time": 0.11569905281066895,
      "step": 24334
    },
    {
      "epoch": 0.00014852294921875,
      "step": 24334,
      "training_step_time": 0.4404106140136719
    },
    {
      "epoch": 0.000148529052734375,
      "model_forward_time": 0.11568593978881836,
      "step": 24335
    },
    {
      "epoch": 0.000148529052734375,
      "step": 24335,
      "training_step_time": 0.5513801574707031
    },
    {
      "epoch": 0.00014853515625,
      "model_forward_time": 0.11564517021179199,
      "step": 24336
    },
    {
      "epoch": 0.00014853515625,
      "step": 24336,
      "training_step_time": 0.4350700378417969
    },
    {
      "epoch": 0.000148541259765625,
      "model_forward_time": 0.11494660377502441,
      "step": 24337
    },
    {
      "epoch": 0.000148541259765625,
      "step": 24337,
      "training_step_time": 0.45312929153442383
    },
    {
      "epoch": 0.00014854736328125,
      "model_forward_time": 0.11481785774230957,
      "step": 24338
    },
    {
      "epoch": 0.00014854736328125,
      "step": 24338,
      "training_step_time": 0.4082956314086914
    },
    {
      "epoch": 0.000148553466796875,
      "model_forward_time": 0.11442995071411133,
      "step": 24339
    },
    {
      "epoch": 0.000148553466796875,
      "step": 24339,
      "training_step_time": 0.46918535232543945
    },
    {
      "epoch": 0.0001485595703125,
      "grad_norm": 0.10356718301773071,
      "learning_rate": 6.922324636549795e-05,
      "loss": 0.0445,
      "step": 24340
    },
    {
      "epoch": 0.0001485595703125,
      "model_forward_time": 0.11452984809875488,
      "step": 24340
    },
    {
      "epoch": 0.0001485595703125,
      "step": 24340,
      "training_step_time": 0.41260790824890137
    },
    {
      "epoch": 0.000148565673828125,
      "model_forward_time": 0.11471962928771973,
      "step": 24341
    },
    {
      "epoch": 0.000148565673828125,
      "step": 24341,
      "training_step_time": 0.4653656482696533
    },
    {
      "epoch": 0.00014857177734375,
      "model_forward_time": 0.11455821990966797,
      "step": 24342
    },
    {
      "epoch": 0.00014857177734375,
      "step": 24342,
      "training_step_time": 0.4071924686431885
    },
    {
      "epoch": 0.000148577880859375,
      "model_forward_time": 0.11473226547241211,
      "step": 24343
    },
    {
      "epoch": 0.000148577880859375,
      "step": 24343,
      "training_step_time": 0.3959205150604248
    },
    {
      "epoch": 0.000148583984375,
      "model_forward_time": 0.11482095718383789,
      "step": 24344
    },
    {
      "epoch": 0.000148583984375,
      "step": 24344,
      "training_step_time": 0.40160083770751953
    },
    {
      "epoch": 0.000148590087890625,
      "model_forward_time": 0.11500072479248047,
      "step": 24345
    },
    {
      "epoch": 0.000148590087890625,
      "step": 24345,
      "training_step_time": 0.3893468379974365
    },
    {
      "epoch": 0.00014859619140625,
      "model_forward_time": 0.11470961570739746,
      "step": 24346
    },
    {
      "epoch": 0.00014859619140625,
      "step": 24346,
      "training_step_time": 0.3859419822692871
    },
    {
      "epoch": 0.000148602294921875,
      "model_forward_time": 0.11584353446960449,
      "step": 24347
    },
    {
      "epoch": 0.000148602294921875,
      "step": 24347,
      "training_step_time": 0.6766617298126221
    },
    {
      "epoch": 0.0001486083984375,
      "model_forward_time": 0.11522555351257324,
      "step": 24348
    },
    {
      "epoch": 0.0001486083984375,
      "step": 24348,
      "training_step_time": 0.45978450775146484
    },
    {
      "epoch": 0.000148614501953125,
      "model_forward_time": 0.11553645133972168,
      "step": 24349
    },
    {
      "epoch": 0.000148614501953125,
      "step": 24349,
      "training_step_time": 0.48456907272338867
    },
    {
      "epoch": 0.00014862060546875,
      "grad_norm": 0.16324283182621002,
      "learning_rate": 6.91978037226147e-05,
      "loss": 0.0428,
      "step": 24350
    },
    {
      "epoch": 0.00014862060546875,
      "model_forward_time": 0.11784029006958008,
      "step": 24350
    },
    {
      "epoch": 0.00014862060546875,
      "step": 24350,
      "training_step_time": 0.39775991439819336
    },
    {
      "epoch": 0.000148626708984375,
      "model_forward_time": 0.11449027061462402,
      "step": 24351
    },
    {
      "epoch": 0.000148626708984375,
      "step": 24351,
      "training_step_time": 0.4409351348876953
    },
    {
      "epoch": 0.0001486328125,
      "model_forward_time": 0.11592221260070801,
      "step": 24352
    },
    {
      "epoch": 0.0001486328125,
      "step": 24352,
      "training_step_time": 0.435133695602417
    },
    {
      "epoch": 0.000148638916015625,
      "model_forward_time": 0.11475181579589844,
      "step": 24353
    },
    {
      "epoch": 0.000148638916015625,
      "step": 24353,
      "training_step_time": 0.44916439056396484
    },
    {
      "epoch": 0.00014864501953125,
      "model_forward_time": 0.1151893138885498,
      "step": 24354
    },
    {
      "epoch": 0.00014864501953125,
      "step": 24354,
      "training_step_time": 0.3890259265899658
    },
    {
      "epoch": 0.000148651123046875,
      "model_forward_time": 0.1148989200592041,
      "step": 24355
    },
    {
      "epoch": 0.000148651123046875,
      "step": 24355,
      "training_step_time": 0.3913993835449219
    },
    {
      "epoch": 0.0001486572265625,
      "model_forward_time": 0.11542630195617676,
      "step": 24356
    },
    {
      "epoch": 0.0001486572265625,
      "step": 24356,
      "training_step_time": 0.384293794631958
    },
    {
      "epoch": 0.000148663330078125,
      "model_forward_time": 0.1147775650024414,
      "step": 24357
    },
    {
      "epoch": 0.000148663330078125,
      "step": 24357,
      "training_step_time": 0.3796374797821045
    },
    {
      "epoch": 0.00014866943359375,
      "model_forward_time": 0.1146707534790039,
      "step": 24358
    },
    {
      "epoch": 0.00014866943359375,
      "step": 24358,
      "training_step_time": 0.38527369499206543
    },
    {
      "epoch": 0.000148675537109375,
      "model_forward_time": 0.11512184143066406,
      "step": 24359
    },
    {
      "epoch": 0.000148675537109375,
      "step": 24359,
      "training_step_time": 0.7475361824035645
    },
    {
      "epoch": 0.000148681640625,
      "grad_norm": 0.14324301481246948,
      "learning_rate": 6.917235524794558e-05,
      "loss": 0.0498,
      "step": 24360
    },
    {
      "epoch": 0.000148681640625,
      "model_forward_time": 0.11444640159606934,
      "step": 24360
    },
    {
      "epoch": 0.000148681640625,
      "step": 24360,
      "training_step_time": 0.3890550136566162
    },
    {
      "epoch": 0.000148687744140625,
      "model_forward_time": 0.11548614501953125,
      "step": 24361
    },
    {
      "epoch": 0.000148687744140625,
      "step": 24361,
      "training_step_time": 0.3925745487213135
    },
    {
      "epoch": 0.00014869384765625,
      "model_forward_time": 0.11434578895568848,
      "step": 24362
    },
    {
      "epoch": 0.00014869384765625,
      "step": 24362,
      "training_step_time": 0.48670482635498047
    },
    {
      "epoch": 0.000148699951171875,
      "model_forward_time": 0.11541628837585449,
      "step": 24363
    },
    {
      "epoch": 0.000148699951171875,
      "step": 24363,
      "training_step_time": 0.4823915958404541
    },
    {
      "epoch": 0.0001487060546875,
      "model_forward_time": 0.11608624458312988,
      "step": 24364
    },
    {
      "epoch": 0.0001487060546875,
      "step": 24364,
      "training_step_time": 0.6392595767974854
    },
    {
      "epoch": 0.000148712158203125,
      "model_forward_time": 0.12155294418334961,
      "step": 24365
    },
    {
      "epoch": 0.000148712158203125,
      "step": 24365,
      "training_step_time": 0.6607420444488525
    },
    {
      "epoch": 0.00014871826171875,
      "model_forward_time": 0.11711788177490234,
      "step": 24366
    },
    {
      "epoch": 0.00014871826171875,
      "step": 24366,
      "training_step_time": 0.6559300422668457
    },
    {
      "epoch": 0.000148724365234375,
      "model_forward_time": 0.12077164649963379,
      "step": 24367
    },
    {
      "epoch": 0.000148724365234375,
      "step": 24367,
      "training_step_time": 0.7135922908782959
    },
    {
      "epoch": 0.00014873046875,
      "model_forward_time": 0.12436747550964355,
      "step": 24368
    },
    {
      "epoch": 0.00014873046875,
      "step": 24368,
      "training_step_time": 0.6490085124969482
    },
    {
      "epoch": 0.000148736572265625,
      "model_forward_time": 0.11742997169494629,
      "step": 24369
    },
    {
      "epoch": 0.000148736572265625,
      "step": 24369,
      "training_step_time": 0.6529755592346191
    },
    {
      "epoch": 0.00014874267578125,
      "grad_norm": 0.197037473320961,
      "learning_rate": 6.914690094922116e-05,
      "loss": 0.0424,
      "step": 24370
    },
    {
      "epoch": 0.00014874267578125,
      "model_forward_time": 0.11766576766967773,
      "step": 24370
    },
    {
      "epoch": 0.00014874267578125,
      "step": 24370,
      "training_step_time": 0.6610431671142578
    },
    {
      "epoch": 0.000148748779296875,
      "model_forward_time": 0.12114262580871582,
      "step": 24371
    },
    {
      "epoch": 0.000148748779296875,
      "step": 24371,
      "training_step_time": 0.7334215641021729
    },
    {
      "epoch": 0.0001487548828125,
      "model_forward_time": 0.12042498588562012,
      "step": 24372
    },
    {
      "epoch": 0.0001487548828125,
      "step": 24372,
      "training_step_time": 0.7363693714141846
    },
    {
      "epoch": 0.000148760986328125,
      "model_forward_time": 0.11785602569580078,
      "step": 24373
    },
    {
      "epoch": 0.000148760986328125,
      "step": 24373,
      "training_step_time": 0.6732730865478516
    },
    {
      "epoch": 0.00014876708984375,
      "model_forward_time": 0.13202381134033203,
      "step": 24374
    },
    {
      "epoch": 0.00014876708984375,
      "step": 24374,
      "training_step_time": 0.7445216178894043
    },
    {
      "epoch": 0.000148773193359375,
      "model_forward_time": 0.12277626991271973,
      "step": 24375
    },
    {
      "epoch": 0.000148773193359375,
      "step": 24375,
      "training_step_time": 0.6855525970458984
    },
    {
      "epoch": 0.000148779296875,
      "model_forward_time": 0.11860966682434082,
      "step": 24376
    },
    {
      "epoch": 0.000148779296875,
      "step": 24376,
      "training_step_time": 0.6486823558807373
    },
    {
      "epoch": 0.000148785400390625,
      "model_forward_time": 0.11720156669616699,
      "step": 24377
    },
    {
      "epoch": 0.000148785400390625,
      "step": 24377,
      "training_step_time": 0.6434760093688965
    },
    {
      "epoch": 0.00014879150390625,
      "model_forward_time": 0.12402105331420898,
      "step": 24378
    },
    {
      "epoch": 0.00014879150390625,
      "step": 24378,
      "training_step_time": 0.6462891101837158
    },
    {
      "epoch": 0.000148797607421875,
      "model_forward_time": 0.12266206741333008,
      "step": 24379
    },
    {
      "epoch": 0.000148797607421875,
      "step": 24379,
      "training_step_time": 0.7295441627502441
    },
    {
      "epoch": 0.0001488037109375,
      "grad_norm": 0.1966131031513214,
      "learning_rate": 6.912144083417376e-05,
      "loss": 0.0488,
      "step": 24380
    },
    {
      "epoch": 0.0001488037109375,
      "model_forward_time": 0.11959719657897949,
      "step": 24380
    },
    {
      "epoch": 0.0001488037109375,
      "step": 24380,
      "training_step_time": 0.6254642009735107
    },
    {
      "epoch": 0.000148809814453125,
      "model_forward_time": 0.12528777122497559,
      "step": 24381
    },
    {
      "epoch": 0.000148809814453125,
      "step": 24381,
      "training_step_time": 0.660315752029419
    },
    {
      "epoch": 0.00014881591796875,
      "model_forward_time": 0.1255350112915039,
      "step": 24382
    },
    {
      "epoch": 0.00014881591796875,
      "step": 24382,
      "training_step_time": 0.6066062450408936
    },
    {
      "epoch": 0.000148822021484375,
      "model_forward_time": 0.1226491928100586,
      "step": 24383
    },
    {
      "epoch": 0.000148822021484375,
      "step": 24383,
      "training_step_time": 0.7919785976409912
    },
    {
      "epoch": 0.000148828125,
      "model_forward_time": 0.12076210975646973,
      "step": 24384
    },
    {
      "epoch": 0.000148828125,
      "step": 24384,
      "training_step_time": 0.671705961227417
    },
    {
      "epoch": 0.000148834228515625,
      "model_forward_time": 0.11736702919006348,
      "step": 24385
    },
    {
      "epoch": 0.000148834228515625,
      "step": 24385,
      "training_step_time": 0.6633210182189941
    },
    {
      "epoch": 0.00014884033203125,
      "model_forward_time": 0.11664247512817383,
      "step": 24386
    },
    {
      "epoch": 0.00014884033203125,
      "step": 24386,
      "training_step_time": 0.6259679794311523
    },
    {
      "epoch": 0.000148846435546875,
      "model_forward_time": 0.11977791786193848,
      "step": 24387
    },
    {
      "epoch": 0.000148846435546875,
      "step": 24387,
      "training_step_time": 0.6733925342559814
    },
    {
      "epoch": 0.0001488525390625,
      "model_forward_time": 0.12160491943359375,
      "step": 24388
    },
    {
      "epoch": 0.0001488525390625,
      "step": 24388,
      "training_step_time": 0.6345481872558594
    },
    {
      "epoch": 0.000148858642578125,
      "model_forward_time": 0.1192317008972168,
      "step": 24389
    },
    {
      "epoch": 0.000148858642578125,
      "step": 24389,
      "training_step_time": 0.6950325965881348
    },
    {
      "epoch": 0.00014886474609375,
      "grad_norm": 0.139910027384758,
      "learning_rate": 6.909597491053751e-05,
      "loss": 0.0601,
      "step": 24390
    },
    {
      "epoch": 0.00014886474609375,
      "model_forward_time": 0.12638282775878906,
      "step": 24390
    },
    {
      "epoch": 0.00014886474609375,
      "step": 24390,
      "training_step_time": 0.6455645561218262
    },
    {
      "epoch": 0.000148870849609375,
      "model_forward_time": 0.11738848686218262,
      "step": 24391
    },
    {
      "epoch": 0.000148870849609375,
      "step": 24391,
      "training_step_time": 0.7060630321502686
    },
    {
      "epoch": 0.000148876953125,
      "model_forward_time": 0.12399768829345703,
      "step": 24392
    },
    {
      "epoch": 0.000148876953125,
      "step": 24392,
      "training_step_time": 0.7081649303436279
    },
    {
      "epoch": 0.000148883056640625,
      "model_forward_time": 0.11544418334960938,
      "step": 24393
    },
    {
      "epoch": 0.000148883056640625,
      "step": 24393,
      "training_step_time": 0.7408816814422607
    },
    {
      "epoch": 0.00014888916015625,
      "model_forward_time": 0.12266826629638672,
      "step": 24394
    },
    {
      "epoch": 0.00014888916015625,
      "step": 24394,
      "training_step_time": 0.5542352199554443
    },
    {
      "epoch": 0.000148895263671875,
      "model_forward_time": 0.1209101676940918,
      "step": 24395
    },
    {
      "epoch": 0.000148895263671875,
      "step": 24395,
      "training_step_time": 0.6135687828063965
    },
    {
      "epoch": 0.0001489013671875,
      "model_forward_time": 0.11978316307067871,
      "step": 24396
    },
    {
      "epoch": 0.0001489013671875,
      "step": 24396,
      "training_step_time": 0.7069976329803467
    },
    {
      "epoch": 0.000148907470703125,
      "model_forward_time": 0.1192166805267334,
      "step": 24397
    },
    {
      "epoch": 0.000148907470703125,
      "step": 24397,
      "training_step_time": 0.7266573905944824
    },
    {
      "epoch": 0.00014891357421875,
      "model_forward_time": 0.12073206901550293,
      "step": 24398
    },
    {
      "epoch": 0.00014891357421875,
      "step": 24398,
      "training_step_time": 0.6396689414978027
    },
    {
      "epoch": 0.000148919677734375,
      "model_forward_time": 0.11860179901123047,
      "step": 24399
    },
    {
      "epoch": 0.000148919677734375,
      "step": 24399,
      "training_step_time": 0.6825547218322754
    },
    {
      "epoch": 0.00014892578125,
      "grad_norm": 0.15830321609973907,
      "learning_rate": 6.90705031860483e-05,
      "loss": 0.0538,
      "step": 24400
    },
    {
      "epoch": 0.00014892578125,
      "model_forward_time": 0.12080574035644531,
      "step": 24400
    },
    {
      "epoch": 0.00014892578125,
      "step": 24400,
      "training_step_time": 0.6083567142486572
    },
    {
      "epoch": 0.000148931884765625,
      "model_forward_time": 0.11692547798156738,
      "step": 24401
    },
    {
      "epoch": 0.000148931884765625,
      "step": 24401,
      "training_step_time": 0.6127955913543701
    },
    {
      "epoch": 0.00014893798828125,
      "model_forward_time": 0.1211845874786377,
      "step": 24402
    },
    {
      "epoch": 0.00014893798828125,
      "step": 24402,
      "training_step_time": 0.7576229572296143
    },
    {
      "epoch": 0.000148944091796875,
      "model_forward_time": 0.11748290061950684,
      "step": 24403
    },
    {
      "epoch": 0.000148944091796875,
      "step": 24403,
      "training_step_time": 0.7400350570678711
    },
    {
      "epoch": 0.0001489501953125,
      "model_forward_time": 0.12386393547058105,
      "step": 24404
    },
    {
      "epoch": 0.0001489501953125,
      "step": 24404,
      "training_step_time": 0.6550989151000977
    },
    {
      "epoch": 0.000148956298828125,
      "model_forward_time": 0.11555004119873047,
      "step": 24405
    },
    {
      "epoch": 0.000148956298828125,
      "step": 24405,
      "training_step_time": 0.6763906478881836
    },
    {
      "epoch": 0.00014896240234375,
      "model_forward_time": 0.11912059783935547,
      "step": 24406
    },
    {
      "epoch": 0.00014896240234375,
      "step": 24406,
      "training_step_time": 0.6686406135559082
    },
    {
      "epoch": 0.000148968505859375,
      "model_forward_time": 0.11874079704284668,
      "step": 24407
    },
    {
      "epoch": 0.000148968505859375,
      "step": 24407,
      "training_step_time": 0.6596519947052002
    },
    {
      "epoch": 0.000148974609375,
      "model_forward_time": 0.1191251277923584,
      "step": 24408
    },
    {
      "epoch": 0.000148974609375,
      "step": 24408,
      "training_step_time": 0.6693332195281982
    },
    {
      "epoch": 0.000148980712890625,
      "model_forward_time": 0.12113785743713379,
      "step": 24409
    },
    {
      "epoch": 0.000148980712890625,
      "step": 24409,
      "training_step_time": 0.7034189701080322
    },
    {
      "epoch": 0.00014898681640625,
      "grad_norm": 0.20646394789218903,
      "learning_rate": 6.904502566844374e-05,
      "loss": 0.0548,
      "step": 24410
    },
    {
      "epoch": 0.00014898681640625,
      "model_forward_time": 0.11815237998962402,
      "step": 24410
    },
    {
      "epoch": 0.00014898681640625,
      "step": 24410,
      "training_step_time": 0.6836748123168945
    },
    {
      "epoch": 0.000148992919921875,
      "model_forward_time": 0.12188172340393066,
      "step": 24411
    },
    {
      "epoch": 0.000148992919921875,
      "step": 24411,
      "training_step_time": 0.7196559906005859
    },
    {
      "epoch": 0.0001489990234375,
      "model_forward_time": 0.12070441246032715,
      "step": 24412
    },
    {
      "epoch": 0.0001489990234375,
      "step": 24412,
      "training_step_time": 0.6816558837890625
    },
    {
      "epoch": 0.000149005126953125,
      "model_forward_time": 0.1184537410736084,
      "step": 24413
    },
    {
      "epoch": 0.000149005126953125,
      "step": 24413,
      "training_step_time": 0.6395659446716309
    },
    {
      "epoch": 0.00014901123046875,
      "model_forward_time": 0.12332725524902344,
      "step": 24414
    },
    {
      "epoch": 0.00014901123046875,
      "step": 24414,
      "training_step_time": 0.645097017288208
    },
    {
      "epoch": 0.000149017333984375,
      "model_forward_time": 0.11712479591369629,
      "step": 24415
    },
    {
      "epoch": 0.000149017333984375,
      "step": 24415,
      "training_step_time": 0.7525568008422852
    },
    {
      "epoch": 0.0001490234375,
      "model_forward_time": 0.12393522262573242,
      "step": 24416
    },
    {
      "epoch": 0.0001490234375,
      "step": 24416,
      "training_step_time": 0.6296126842498779
    },
    {
      "epoch": 0.000149029541015625,
      "model_forward_time": 0.11673665046691895,
      "step": 24417
    },
    {
      "epoch": 0.000149029541015625,
      "step": 24417,
      "training_step_time": 0.631864070892334
    },
    {
      "epoch": 0.00014903564453125,
      "model_forward_time": 0.12006711959838867,
      "step": 24418
    },
    {
      "epoch": 0.00014903564453125,
      "step": 24418,
      "training_step_time": 0.7067897319793701
    },
    {
      "epoch": 0.000149041748046875,
      "model_forward_time": 0.12163233757019043,
      "step": 24419
    },
    {
      "epoch": 0.000149041748046875,
      "step": 24419,
      "training_step_time": 0.6548733711242676
    },
    {
      "epoch": 0.0001490478515625,
      "grad_norm": 0.21260966360569,
      "learning_rate": 6.901954236546323e-05,
      "loss": 0.0594,
      "step": 24420
    },
    {
      "epoch": 0.0001490478515625,
      "model_forward_time": 0.12248682975769043,
      "step": 24420
    },
    {
      "epoch": 0.0001490478515625,
      "step": 24420,
      "training_step_time": 0.711674690246582
    },
    {
      "epoch": 0.000149053955078125,
      "model_forward_time": 0.11919760704040527,
      "step": 24421
    },
    {
      "epoch": 0.000149053955078125,
      "step": 24421,
      "training_step_time": 0.7087256908416748
    },
    {
      "epoch": 0.00014906005859375,
      "model_forward_time": 0.11979269981384277,
      "step": 24422
    },
    {
      "epoch": 0.00014906005859375,
      "step": 24422,
      "training_step_time": 0.7338948249816895
    },
    {
      "epoch": 0.000149066162109375,
      "model_forward_time": 0.11687564849853516,
      "step": 24423
    },
    {
      "epoch": 0.000149066162109375,
      "step": 24423,
      "training_step_time": 0.6889357566833496
    },
    {
      "epoch": 0.000149072265625,
      "model_forward_time": 0.11800909042358398,
      "step": 24424
    },
    {
      "epoch": 0.000149072265625,
      "step": 24424,
      "training_step_time": 0.653630256652832
    },
    {
      "epoch": 0.000149078369140625,
      "model_forward_time": 0.12326574325561523,
      "step": 24425
    },
    {
      "epoch": 0.000149078369140625,
      "step": 24425,
      "training_step_time": 0.5977280139923096
    },
    {
      "epoch": 0.00014908447265625,
      "model_forward_time": 0.11608743667602539,
      "step": 24426
    },
    {
      "epoch": 0.00014908447265625,
      "step": 24426,
      "training_step_time": 0.6507854461669922
    },
    {
      "epoch": 0.000149090576171875,
      "model_forward_time": 0.12182855606079102,
      "step": 24427
    },
    {
      "epoch": 0.000149090576171875,
      "step": 24427,
      "training_step_time": 0.6611590385437012
    },
    {
      "epoch": 0.0001490966796875,
      "model_forward_time": 0.11973047256469727,
      "step": 24428
    },
    {
      "epoch": 0.0001490966796875,
      "step": 24428,
      "training_step_time": 0.6111376285552979
    },
    {
      "epoch": 0.000149102783203125,
      "model_forward_time": 0.11815905570983887,
      "step": 24429
    },
    {
      "epoch": 0.000149102783203125,
      "step": 24429,
      "training_step_time": 0.687903642654419
    },
    {
      "epoch": 0.00014910888671875,
      "grad_norm": 0.2486969381570816,
      "learning_rate": 6.899405328484794e-05,
      "loss": 0.0501,
      "step": 24430
    },
    {
      "epoch": 0.00014910888671875,
      "model_forward_time": 0.12896132469177246,
      "step": 24430
    },
    {
      "epoch": 0.00014910888671875,
      "step": 24430,
      "training_step_time": 0.5928354263305664
    },
    {
      "epoch": 0.000149114990234375,
      "model_forward_time": 0.11853599548339844,
      "step": 24431
    },
    {
      "epoch": 0.000149114990234375,
      "step": 24431,
      "training_step_time": 0.6687297821044922
    },
    {
      "epoch": 0.00014912109375,
      "model_forward_time": 0.11779356002807617,
      "step": 24432
    },
    {
      "epoch": 0.00014912109375,
      "step": 24432,
      "training_step_time": 0.5931611061096191
    },
    {
      "epoch": 0.000149127197265625,
      "model_forward_time": 0.12102580070495605,
      "step": 24433
    },
    {
      "epoch": 0.000149127197265625,
      "step": 24433,
      "training_step_time": 0.5037713050842285
    },
    {
      "epoch": 0.00014913330078125,
      "model_forward_time": 0.11890912055969238,
      "step": 24434
    },
    {
      "epoch": 0.00014913330078125,
      "step": 24434,
      "training_step_time": 0.5053379535675049
    },
    {
      "epoch": 0.000149139404296875,
      "model_forward_time": 0.1179654598236084,
      "step": 24435
    },
    {
      "epoch": 0.000149139404296875,
      "step": 24435,
      "training_step_time": 0.5262901782989502
    },
    {
      "epoch": 0.0001491455078125,
      "model_forward_time": 0.11709308624267578,
      "step": 24436
    },
    {
      "epoch": 0.0001491455078125,
      "step": 24436,
      "training_step_time": 0.49018383026123047
    },
    {
      "epoch": 0.000149151611328125,
      "model_forward_time": 0.11688780784606934,
      "step": 24437
    },
    {
      "epoch": 0.000149151611328125,
      "step": 24437,
      "training_step_time": 0.45171523094177246
    },
    {
      "epoch": 0.00014915771484375,
      "model_forward_time": 0.12085938453674316,
      "step": 24438
    },
    {
      "epoch": 0.00014915771484375,
      "step": 24438,
      "training_step_time": 0.4361236095428467
    },
    {
      "epoch": 0.000149163818359375,
      "model_forward_time": 0.1166074275970459,
      "step": 24439
    },
    {
      "epoch": 0.000149163818359375,
      "step": 24439,
      "training_step_time": 0.42003655433654785
    },
    {
      "epoch": 0.000149169921875,
      "grad_norm": 0.19816811382770538,
      "learning_rate": 6.896855843434078e-05,
      "loss": 0.0501,
      "step": 24440
    },
    {
      "epoch": 0.000149169921875,
      "model_forward_time": 0.11632513999938965,
      "step": 24440
    },
    {
      "epoch": 0.000149169921875,
      "step": 24440,
      "training_step_time": 0.408353328704834
    },
    {
      "epoch": 0.000149176025390625,
      "model_forward_time": 0.11606001853942871,
      "step": 24441
    },
    {
      "epoch": 0.000149176025390625,
      "step": 24441,
      "training_step_time": 0.4743504524230957
    },
    {
      "epoch": 0.00014918212890625,
      "model_forward_time": 0.11515116691589355,
      "step": 24442
    },
    {
      "epoch": 0.00014918212890625,
      "step": 24442,
      "training_step_time": 0.473879337310791
    },
    {
      "epoch": 0.000149188232421875,
      "model_forward_time": 0.11526274681091309,
      "step": 24443
    },
    {
      "epoch": 0.000149188232421875,
      "step": 24443,
      "training_step_time": 0.4585988521575928
    },
    {
      "epoch": 0.0001491943359375,
      "model_forward_time": 0.11580276489257812,
      "step": 24444
    },
    {
      "epoch": 0.0001491943359375,
      "step": 24444,
      "training_step_time": 0.4885702133178711
    },
    {
      "epoch": 0.000149200439453125,
      "model_forward_time": 0.11768794059753418,
      "step": 24445
    },
    {
      "epoch": 0.000149200439453125,
      "step": 24445,
      "training_step_time": 0.4290482997894287
    },
    {
      "epoch": 0.00014920654296875,
      "model_forward_time": 0.11654067039489746,
      "step": 24446
    },
    {
      "epoch": 0.00014920654296875,
      "step": 24446,
      "training_step_time": 0.3794364929199219
    },
    {
      "epoch": 0.000149212646484375,
      "model_forward_time": 0.11533880233764648,
      "step": 24447
    },
    {
      "epoch": 0.000149212646484375,
      "step": 24447,
      "training_step_time": 0.38587403297424316
    },
    {
      "epoch": 0.00014921875,
      "model_forward_time": 0.11488151550292969,
      "step": 24448
    },
    {
      "epoch": 0.00014921875,
      "step": 24448,
      "training_step_time": 0.3827979564666748
    },
    {
      "epoch": 0.000149224853515625,
      "model_forward_time": 0.1158895492553711,
      "step": 24449
    },
    {
      "epoch": 0.000149224853515625,
      "step": 24449,
      "training_step_time": 0.3936495780944824
    },
    {
      "epoch": 0.00014923095703125,
      "grad_norm": 0.181956484913826,
      "learning_rate": 6.894305782168638e-05,
      "loss": 0.0565,
      "step": 24450
    },
    {
      "epoch": 0.00014923095703125,
      "model_forward_time": 0.11542582511901855,
      "step": 24450
    },
    {
      "epoch": 0.00014923095703125,
      "step": 24450,
      "training_step_time": 0.4189720153808594
    },
    {
      "epoch": 0.000149237060546875,
      "model_forward_time": 0.11529541015625,
      "step": 24451
    },
    {
      "epoch": 0.000149237060546875,
      "step": 24451,
      "training_step_time": 0.4119377136230469
    },
    {
      "epoch": 0.0001492431640625,
      "model_forward_time": 0.11545062065124512,
      "step": 24452
    },
    {
      "epoch": 0.0001492431640625,
      "step": 24452,
      "training_step_time": 0.39161157608032227
    },
    {
      "epoch": 0.000149249267578125,
      "model_forward_time": 0.11533975601196289,
      "step": 24453
    },
    {
      "epoch": 0.000149249267578125,
      "step": 24453,
      "training_step_time": 0.3959946632385254
    },
    {
      "epoch": 0.00014925537109375,
      "model_forward_time": 0.11550235748291016,
      "step": 24454
    },
    {
      "epoch": 0.00014925537109375,
      "step": 24454,
      "training_step_time": 0.39496684074401855
    },
    {
      "epoch": 0.000149261474609375,
      "model_forward_time": 0.11787128448486328,
      "step": 24455
    },
    {
      "epoch": 0.000149261474609375,
      "step": 24455,
      "training_step_time": 0.3771393299102783
    },
    {
      "epoch": 0.000149267578125,
      "model_forward_time": 0.11623144149780273,
      "step": 24456
    },
    {
      "epoch": 0.000149267578125,
      "step": 24456,
      "training_step_time": 0.4873778820037842
    },
    {
      "epoch": 0.000149273681640625,
      "model_forward_time": 0.11585354804992676,
      "step": 24457
    },
    {
      "epoch": 0.000149273681640625,
      "step": 24457,
      "training_step_time": 0.4782595634460449
    },
    {
      "epoch": 0.00014927978515625,
      "model_forward_time": 0.11536908149719238,
      "step": 24458
    },
    {
      "epoch": 0.00014927978515625,
      "step": 24458,
      "training_step_time": 0.4940803050994873
    },
    {
      "epoch": 0.000149285888671875,
      "model_forward_time": 0.11549568176269531,
      "step": 24459
    },
    {
      "epoch": 0.000149285888671875,
      "step": 24459,
      "training_step_time": 0.4860508441925049
    },
    {
      "epoch": 0.0001492919921875,
      "grad_norm": 0.14828254282474518,
      "learning_rate": 6.89175514546312e-05,
      "loss": 0.0495,
      "step": 24460
    },
    {
      "epoch": 0.0001492919921875,
      "model_forward_time": 0.11520218849182129,
      "step": 24460
    },
    {
      "epoch": 0.0001492919921875,
      "step": 24460,
      "training_step_time": 0.41028618812561035
    },
    {
      "epoch": 0.000149298095703125,
      "model_forward_time": 0.1149740219116211,
      "step": 24461
    },
    {
      "epoch": 0.000149298095703125,
      "step": 24461,
      "training_step_time": 0.3836710453033447
    },
    {
      "epoch": 0.00014930419921875,
      "model_forward_time": 0.11509180068969727,
      "step": 24462
    },
    {
      "epoch": 0.00014930419921875,
      "step": 24462,
      "training_step_time": 0.4021875858306885
    },
    {
      "epoch": 0.000149310302734375,
      "model_forward_time": 0.11454653739929199,
      "step": 24463
    },
    {
      "epoch": 0.000149310302734375,
      "step": 24463,
      "training_step_time": 0.39713358879089355
    },
    {
      "epoch": 0.00014931640625,
      "model_forward_time": 0.1145164966583252,
      "step": 24464
    },
    {
      "epoch": 0.00014931640625,
      "step": 24464,
      "training_step_time": 0.4493277072906494
    },
    {
      "epoch": 0.000149322509765625,
      "model_forward_time": 0.11848616600036621,
      "step": 24465
    },
    {
      "epoch": 0.000149322509765625,
      "step": 24465,
      "training_step_time": 0.4036438465118408
    },
    {
      "epoch": 0.00014932861328125,
      "model_forward_time": 0.11540579795837402,
      "step": 24466
    },
    {
      "epoch": 0.00014932861328125,
      "step": 24466,
      "training_step_time": 0.414717435836792
    },
    {
      "epoch": 0.000149334716796875,
      "model_forward_time": 0.1153256893157959,
      "step": 24467
    },
    {
      "epoch": 0.000149334716796875,
      "step": 24467,
      "training_step_time": 0.4038655757904053
    },
    {
      "epoch": 0.0001493408203125,
      "model_forward_time": 0.11580967903137207,
      "step": 24468
    },
    {
      "epoch": 0.0001493408203125,
      "step": 24468,
      "training_step_time": 0.3667914867401123
    },
    {
      "epoch": 0.000149346923828125,
      "model_forward_time": 0.11545419692993164,
      "step": 24469
    },
    {
      "epoch": 0.000149346923828125,
      "step": 24469,
      "training_step_time": 0.3974776268005371
    },
    {
      "epoch": 0.00014935302734375,
      "grad_norm": 0.1632888913154602,
      "learning_rate": 6.889203934092336e-05,
      "loss": 0.0574,
      "step": 24470
    },
    {
      "epoch": 0.00014935302734375,
      "model_forward_time": 0.11532831192016602,
      "step": 24470
    },
    {
      "epoch": 0.00014935302734375,
      "step": 24470,
      "training_step_time": 0.4609086513519287
    },
    {
      "epoch": 0.000149359130859375,
      "model_forward_time": 0.11566925048828125,
      "step": 24471
    },
    {
      "epoch": 0.000149359130859375,
      "step": 24471,
      "training_step_time": 0.4642906188964844
    },
    {
      "epoch": 0.000149365234375,
      "model_forward_time": 0.11931514739990234,
      "step": 24472
    },
    {
      "epoch": 0.000149365234375,
      "step": 24472,
      "training_step_time": 0.4945504665374756
    },
    {
      "epoch": 0.000149371337890625,
      "model_forward_time": 0.1154179573059082,
      "step": 24473
    },
    {
      "epoch": 0.000149371337890625,
      "step": 24473,
      "training_step_time": 0.48087120056152344
    },
    {
      "epoch": 0.00014937744140625,
      "model_forward_time": 0.11489009857177734,
      "step": 24474
    },
    {
      "epoch": 0.00014937744140625,
      "step": 24474,
      "training_step_time": 0.41993141174316406
    },
    {
      "epoch": 0.000149383544921875,
      "model_forward_time": 0.11423945426940918,
      "step": 24475
    },
    {
      "epoch": 0.000149383544921875,
      "step": 24475,
      "training_step_time": 0.38510847091674805
    },
    {
      "epoch": 0.0001493896484375,
      "model_forward_time": 0.11538410186767578,
      "step": 24476
    },
    {
      "epoch": 0.0001493896484375,
      "step": 24476,
      "training_step_time": 0.38201189041137695
    },
    {
      "epoch": 0.000149395751953125,
      "model_forward_time": 0.11533641815185547,
      "step": 24477
    },
    {
      "epoch": 0.000149395751953125,
      "step": 24477,
      "training_step_time": 0.4074249267578125
    },
    {
      "epoch": 0.00014940185546875,
      "model_forward_time": 0.11491942405700684,
      "step": 24478
    },
    {
      "epoch": 0.00014940185546875,
      "step": 24478,
      "training_step_time": 0.4168515205383301
    },
    {
      "epoch": 0.000149407958984375,
      "model_forward_time": 0.11533308029174805,
      "step": 24479
    },
    {
      "epoch": 0.000149407958984375,
      "step": 24479,
      "training_step_time": 0.3899996280670166
    },
    {
      "epoch": 0.0001494140625,
      "grad_norm": 0.14596350491046906,
      "learning_rate": 6.886652148831279e-05,
      "loss": 0.052,
      "step": 24480
    },
    {
      "epoch": 0.0001494140625,
      "model_forward_time": 0.11520886421203613,
      "step": 24480
    },
    {
      "epoch": 0.0001494140625,
      "step": 24480,
      "training_step_time": 0.4010796546936035
    },
    {
      "epoch": 0.000149420166015625,
      "model_forward_time": 0.11554646492004395,
      "step": 24481
    },
    {
      "epoch": 0.000149420166015625,
      "step": 24481,
      "training_step_time": 0.3868074417114258
    },
    {
      "epoch": 0.00014942626953125,
      "model_forward_time": 0.11571645736694336,
      "step": 24482
    },
    {
      "epoch": 0.00014942626953125,
      "step": 24482,
      "training_step_time": 0.38873934745788574
    },
    {
      "epoch": 0.000149432373046875,
      "model_forward_time": 0.11534881591796875,
      "step": 24483
    },
    {
      "epoch": 0.000149432373046875,
      "step": 24483,
      "training_step_time": 0.38570451736450195
    },
    {
      "epoch": 0.0001494384765625,
      "model_forward_time": 0.11520624160766602,
      "step": 24484
    },
    {
      "epoch": 0.0001494384765625,
      "step": 24484,
      "training_step_time": 0.39081478118896484
    },
    {
      "epoch": 0.000149444580078125,
      "model_forward_time": 0.11562490463256836,
      "step": 24485
    },
    {
      "epoch": 0.000149444580078125,
      "step": 24485,
      "training_step_time": 0.4582526683807373
    },
    {
      "epoch": 0.00014945068359375,
      "model_forward_time": 0.11545610427856445,
      "step": 24486
    },
    {
      "epoch": 0.00014945068359375,
      "step": 24486,
      "training_step_time": 0.49827146530151367
    },
    {
      "epoch": 0.000149456787109375,
      "model_forward_time": 0.11545443534851074,
      "step": 24487
    },
    {
      "epoch": 0.000149456787109375,
      "step": 24487,
      "training_step_time": 0.42502713203430176
    },
    {
      "epoch": 0.000149462890625,
      "model_forward_time": 0.11598849296569824,
      "step": 24488
    },
    {
      "epoch": 0.000149462890625,
      "step": 24488,
      "training_step_time": 0.38625192642211914
    },
    {
      "epoch": 0.000149468994140625,
      "model_forward_time": 0.11552667617797852,
      "step": 24489
    },
    {
      "epoch": 0.000149468994140625,
      "step": 24489,
      "training_step_time": 0.38669419288635254
    },
    {
      "epoch": 0.00014947509765625,
      "grad_norm": 0.16015960276126862,
      "learning_rate": 6.884099790455113e-05,
      "loss": 0.0495,
      "step": 24490
    },
    {
      "epoch": 0.00014947509765625,
      "model_forward_time": 0.1146535873413086,
      "step": 24490
    },
    {
      "epoch": 0.00014947509765625,
      "step": 24490,
      "training_step_time": 0.39728808403015137
    },
    {
      "epoch": 0.000149481201171875,
      "model_forward_time": 0.11506795883178711,
      "step": 24491
    },
    {
      "epoch": 0.000149481201171875,
      "step": 24491,
      "training_step_time": 0.39151477813720703
    },
    {
      "epoch": 0.0001494873046875,
      "model_forward_time": 0.11524438858032227,
      "step": 24492
    },
    {
      "epoch": 0.0001494873046875,
      "step": 24492,
      "training_step_time": 0.4374542236328125
    },
    {
      "epoch": 0.000149493408203125,
      "model_forward_time": 0.11714863777160645,
      "step": 24493
    },
    {
      "epoch": 0.000149493408203125,
      "step": 24493,
      "training_step_time": 0.3993642330169678
    },
    {
      "epoch": 0.00014949951171875,
      "model_forward_time": 0.11477899551391602,
      "step": 24494
    },
    {
      "epoch": 0.00014949951171875,
      "step": 24494,
      "training_step_time": 0.4998180866241455
    },
    {
      "epoch": 0.000149505615234375,
      "model_forward_time": 0.11479496955871582,
      "step": 24495
    },
    {
      "epoch": 0.000149505615234375,
      "step": 24495,
      "training_step_time": 0.38930201530456543
    },
    {
      "epoch": 0.00014951171875,
      "model_forward_time": 0.11505913734436035,
      "step": 24496
    },
    {
      "epoch": 0.00014951171875,
      "step": 24496,
      "training_step_time": 0.3917582035064697
    },
    {
      "epoch": 0.000149517822265625,
      "model_forward_time": 0.1151423454284668,
      "step": 24497
    },
    {
      "epoch": 0.000149517822265625,
      "step": 24497,
      "training_step_time": 0.39649248123168945
    },
    {
      "epoch": 0.00014952392578125,
      "model_forward_time": 0.11540675163269043,
      "step": 24498
    },
    {
      "epoch": 0.00014952392578125,
      "step": 24498,
      "training_step_time": 0.3978769779205322
    },
    {
      "epoch": 0.000149530029296875,
      "model_forward_time": 0.11507916450500488,
      "step": 24499
    },
    {
      "epoch": 0.000149530029296875,
      "step": 24499,
      "training_step_time": 0.4096815586090088
    },
    {
      "epoch": 0.0001495361328125,
      "grad_norm": 0.11254945397377014,
      "learning_rate": 6.881546859739179e-05,
      "loss": 0.0476,
      "step": 24500
    },
    {
      "epoch": 0.0001495361328125,
      "model_forward_time": 0.11511421203613281,
      "step": 24500
    },
    {
      "epoch": 0.0001495361328125,
      "step": 24500,
      "training_step_time": 0.8308024406433105
    },
    {
      "epoch": 0.000149542236328125,
      "model_forward_time": 0.1146085262298584,
      "step": 24501
    },
    {
      "epoch": 0.000149542236328125,
      "step": 24501,
      "training_step_time": 0.460019588470459
    },
    {
      "epoch": 0.00014954833984375,
      "model_forward_time": 0.11528611183166504,
      "step": 24502
    },
    {
      "epoch": 0.00014954833984375,
      "step": 24502,
      "training_step_time": 0.38851022720336914
    },
    {
      "epoch": 0.000149554443359375,
      "model_forward_time": 0.11468935012817383,
      "step": 24503
    },
    {
      "epoch": 0.000149554443359375,
      "step": 24503,
      "training_step_time": 0.46660733222961426
    },
    {
      "epoch": 0.000149560546875,
      "model_forward_time": 0.11400222778320312,
      "step": 24504
    },
    {
      "epoch": 0.000149560546875,
      "step": 24504,
      "training_step_time": 0.4078669548034668
    },
    {
      "epoch": 0.000149566650390625,
      "model_forward_time": 0.11458563804626465,
      "step": 24505
    },
    {
      "epoch": 0.000149566650390625,
      "step": 24505,
      "training_step_time": 0.40173816680908203
    },
    {
      "epoch": 0.00014957275390625,
      "model_forward_time": 0.1148536205291748,
      "step": 24506
    },
    {
      "epoch": 0.00014957275390625,
      "step": 24506,
      "training_step_time": 0.5930948257446289
    },
    {
      "epoch": 0.000149578857421875,
      "model_forward_time": 0.11496090888977051,
      "step": 24507
    },
    {
      "epoch": 0.000149578857421875,
      "step": 24507,
      "training_step_time": 0.3926553726196289
    },
    {
      "epoch": 0.0001495849609375,
      "model_forward_time": 0.11501622200012207,
      "step": 24508
    },
    {
      "epoch": 0.0001495849609375,
      "step": 24508,
      "training_step_time": 0.38918566703796387
    },
    {
      "epoch": 0.000149591064453125,
      "model_forward_time": 0.11486482620239258,
      "step": 24509
    },
    {
      "epoch": 0.000149591064453125,
      "step": 24509,
      "training_step_time": 0.39565253257751465
    },
    {
      "epoch": 0.00014959716796875,
      "grad_norm": 0.16874556243419647,
      "learning_rate": 6.878993357458986e-05,
      "loss": 0.0485,
      "step": 24510
    },
    {
      "epoch": 0.00014959716796875,
      "model_forward_time": 0.11453008651733398,
      "step": 24510
    },
    {
      "epoch": 0.00014959716796875,
      "step": 24510,
      "training_step_time": 0.3966357707977295
    },
    {
      "epoch": 0.000149603271484375,
      "model_forward_time": 0.11520814895629883,
      "step": 24511
    },
    {
      "epoch": 0.000149603271484375,
      "step": 24511,
      "training_step_time": 0.39856553077697754
    },
    {
      "epoch": 0.000149609375,
      "model_forward_time": 0.1149892807006836,
      "step": 24512
    },
    {
      "epoch": 0.000149609375,
      "step": 24512,
      "training_step_time": 0.5603148937225342
    },
    {
      "epoch": 0.000149615478515625,
      "model_forward_time": 0.11490130424499512,
      "step": 24513
    },
    {
      "epoch": 0.000149615478515625,
      "step": 24513,
      "training_step_time": 0.45056700706481934
    },
    {
      "epoch": 0.00014962158203125,
      "model_forward_time": 0.11455059051513672,
      "step": 24514
    },
    {
      "epoch": 0.00014962158203125,
      "step": 24514,
      "training_step_time": 0.4534027576446533
    },
    {
      "epoch": 0.000149627685546875,
      "model_forward_time": 0.11521244049072266,
      "step": 24515
    },
    {
      "epoch": 0.000149627685546875,
      "step": 24515,
      "training_step_time": 0.4566068649291992
    },
    {
      "epoch": 0.0001496337890625,
      "model_forward_time": 0.11793398857116699,
      "step": 24516
    },
    {
      "epoch": 0.0001496337890625,
      "step": 24516,
      "training_step_time": 0.48632287979125977
    },
    {
      "epoch": 0.000149639892578125,
      "model_forward_time": 0.11774253845214844,
      "step": 24517
    },
    {
      "epoch": 0.000149639892578125,
      "step": 24517,
      "training_step_time": 0.45850062370300293
    },
    {
      "epoch": 0.00014964599609375,
      "model_forward_time": 0.12464213371276855,
      "step": 24518
    },
    {
      "epoch": 0.00014964599609375,
      "step": 24518,
      "training_step_time": 0.40334081649780273
    },
    {
      "epoch": 0.000149652099609375,
      "model_forward_time": 0.1177968978881836,
      "step": 24519
    },
    {
      "epoch": 0.000149652099609375,
      "step": 24519,
      "training_step_time": 0.38565516471862793
    },
    {
      "epoch": 0.000149658203125,
      "grad_norm": 0.11506611108779907,
      "learning_rate": 6.876439284390223e-05,
      "loss": 0.0522,
      "step": 24520
    },
    {
      "epoch": 0.000149658203125,
      "model_forward_time": 0.11780357360839844,
      "step": 24520
    },
    {
      "epoch": 0.000149658203125,
      "step": 24520,
      "training_step_time": 0.38176631927490234
    },
    {
      "epoch": 0.000149664306640625,
      "model_forward_time": 0.11915373802185059,
      "step": 24521
    },
    {
      "epoch": 0.000149664306640625,
      "step": 24521,
      "training_step_time": 0.37962770462036133
    },
    {
      "epoch": 0.00014967041015625,
      "model_forward_time": 0.11852908134460449,
      "step": 24522
    },
    {
      "epoch": 0.00014967041015625,
      "step": 24522,
      "training_step_time": 0.3945138454437256
    },
    {
      "epoch": 0.000149676513671875,
      "model_forward_time": 0.11576199531555176,
      "step": 24523
    },
    {
      "epoch": 0.000149676513671875,
      "step": 24523,
      "training_step_time": 0.38849854469299316
    },
    {
      "epoch": 0.0001496826171875,
      "model_forward_time": 0.11575436592102051,
      "step": 24524
    },
    {
      "epoch": 0.0001496826171875,
      "step": 24524,
      "training_step_time": 0.3962383270263672
    },
    {
      "epoch": 0.000149688720703125,
      "model_forward_time": 0.1153724193572998,
      "step": 24525
    },
    {
      "epoch": 0.000149688720703125,
      "step": 24525,
      "training_step_time": 0.3894362449645996
    },
    {
      "epoch": 0.00014969482421875,
      "model_forward_time": 0.11640524864196777,
      "step": 24526
    },
    {
      "epoch": 0.00014969482421875,
      "step": 24526,
      "training_step_time": 0.3928511142730713
    },
    {
      "epoch": 0.000149700927734375,
      "model_forward_time": 0.11577868461608887,
      "step": 24527
    },
    {
      "epoch": 0.000149700927734375,
      "step": 24527,
      "training_step_time": 0.42961859703063965
    },
    {
      "epoch": 0.00014970703125,
      "model_forward_time": 0.11538290977478027,
      "step": 24528
    },
    {
      "epoch": 0.00014970703125,
      "step": 24528,
      "training_step_time": 0.5011675357818604
    },
    {
      "epoch": 0.000149713134765625,
      "model_forward_time": 0.11678791046142578,
      "step": 24529
    },
    {
      "epoch": 0.000149713134765625,
      "step": 24529,
      "training_step_time": 0.4099924564361572
    },
    {
      "epoch": 0.00014971923828125,
      "grad_norm": 0.18125134706497192,
      "learning_rate": 6.873884641308752e-05,
      "loss": 0.0572,
      "step": 24530
    },
    {
      "epoch": 0.00014971923828125,
      "model_forward_time": 0.11605095863342285,
      "step": 24530
    },
    {
      "epoch": 0.00014971923828125,
      "step": 24530,
      "training_step_time": 0.40616297721862793
    },
    {
      "epoch": 0.000149725341796875,
      "model_forward_time": 0.11561203002929688,
      "step": 24531
    },
    {
      "epoch": 0.000149725341796875,
      "step": 24531,
      "training_step_time": 0.40825700759887695
    },
    {
      "epoch": 0.0001497314453125,
      "model_forward_time": 0.11524677276611328,
      "step": 24532
    },
    {
      "epoch": 0.0001497314453125,
      "step": 24532,
      "training_step_time": 0.4597477912902832
    },
    {
      "epoch": 0.000149737548828125,
      "model_forward_time": 0.11511659622192383,
      "step": 24533
    },
    {
      "epoch": 0.000149737548828125,
      "step": 24533,
      "training_step_time": 0.4014706611633301
    },
    {
      "epoch": 0.00014974365234375,
      "model_forward_time": 0.11568808555603027,
      "step": 24534
    },
    {
      "epoch": 0.00014974365234375,
      "step": 24534,
      "training_step_time": 0.40410876274108887
    },
    {
      "epoch": 0.000149749755859375,
      "model_forward_time": 0.11550641059875488,
      "step": 24535
    },
    {
      "epoch": 0.000149749755859375,
      "step": 24535,
      "training_step_time": 0.39407992362976074
    },
    {
      "epoch": 0.000149755859375,
      "model_forward_time": 0.11568903923034668,
      "step": 24536
    },
    {
      "epoch": 0.000149755859375,
      "step": 24536,
      "training_step_time": 0.389620304107666
    },
    {
      "epoch": 0.000149761962890625,
      "model_forward_time": 0.11555695533752441,
      "step": 24537
    },
    {
      "epoch": 0.000149761962890625,
      "step": 24537,
      "training_step_time": 0.3917829990386963
    },
    {
      "epoch": 0.00014976806640625,
      "model_forward_time": 0.11569929122924805,
      "step": 24538
    },
    {
      "epoch": 0.00014976806640625,
      "step": 24538,
      "training_step_time": 0.3944830894470215
    },
    {
      "epoch": 0.000149774169921875,
      "model_forward_time": 0.11536192893981934,
      "step": 24539
    },
    {
      "epoch": 0.000149774169921875,
      "step": 24539,
      "training_step_time": 0.39090752601623535
    },
    {
      "epoch": 0.0001497802734375,
      "grad_norm": 0.12380406260490417,
      "learning_rate": 6.871329428990602e-05,
      "loss": 0.0538,
      "step": 24540
    },
    {
      "epoch": 0.0001497802734375,
      "model_forward_time": 0.11552858352661133,
      "step": 24540
    },
    {
      "epoch": 0.0001497802734375,
      "step": 24540,
      "training_step_time": 0.39901256561279297
    },
    {
      "epoch": 0.000149786376953125,
      "model_forward_time": 0.11583638191223145,
      "step": 24541
    },
    {
      "epoch": 0.000149786376953125,
      "step": 24541,
      "training_step_time": 0.4157266616821289
    },
    {
      "epoch": 0.00014979248046875,
      "model_forward_time": 0.1144251823425293,
      "step": 24542
    },
    {
      "epoch": 0.00014979248046875,
      "step": 24542,
      "training_step_time": 0.5151605606079102
    },
    {
      "epoch": 0.000149798583984375,
      "model_forward_time": 0.11681079864501953,
      "step": 24543
    },
    {
      "epoch": 0.000149798583984375,
      "step": 24543,
      "training_step_time": 0.4603290557861328
    },
    {
      "epoch": 0.0001498046875,
      "model_forward_time": 0.11492443084716797,
      "step": 24544
    },
    {
      "epoch": 0.0001498046875,
      "step": 24544,
      "training_step_time": 0.5133552551269531
    },
    {
      "epoch": 0.000149810791015625,
      "model_forward_time": 0.11518979072570801,
      "step": 24545
    },
    {
      "epoch": 0.000149810791015625,
      "step": 24545,
      "training_step_time": 0.4947521686553955
    },
    {
      "epoch": 0.00014981689453125,
      "model_forward_time": 0.11463475227355957,
      "step": 24546
    },
    {
      "epoch": 0.00014981689453125,
      "step": 24546,
      "training_step_time": 0.393582820892334
    },
    {
      "epoch": 0.000149822998046875,
      "model_forward_time": 0.11503434181213379,
      "step": 24547
    },
    {
      "epoch": 0.000149822998046875,
      "step": 24547,
      "training_step_time": 0.3898465633392334
    },
    {
      "epoch": 0.0001498291015625,
      "model_forward_time": 0.11432123184204102,
      "step": 24548
    },
    {
      "epoch": 0.0001498291015625,
      "step": 24548,
      "training_step_time": 0.3867661952972412
    },
    {
      "epoch": 0.000149835205078125,
      "model_forward_time": 0.11618542671203613,
      "step": 24549
    },
    {
      "epoch": 0.000149835205078125,
      "step": 24549,
      "training_step_time": 0.384476900100708
    },
    {
      "epoch": 0.00014984130859375,
      "grad_norm": 0.12154129892587662,
      "learning_rate": 6.868773648211983e-05,
      "loss": 0.0505,
      "step": 24550
    },
    {
      "epoch": 0.00014984130859375,
      "model_forward_time": 0.11536121368408203,
      "step": 24550
    },
    {
      "epoch": 0.00014984130859375,
      "step": 24550,
      "training_step_time": 0.392071008682251
    },
    {
      "epoch": 0.000149847412109375,
      "model_forward_time": 0.11553001403808594,
      "step": 24551
    },
    {
      "epoch": 0.000149847412109375,
      "step": 24551,
      "training_step_time": 0.3968329429626465
    },
    {
      "epoch": 0.000149853515625,
      "model_forward_time": 0.1154930591583252,
      "step": 24552
    },
    {
      "epoch": 0.000149853515625,
      "step": 24552,
      "training_step_time": 0.4003565311431885
    },
    {
      "epoch": 0.000149859619140625,
      "model_forward_time": 0.11638784408569336,
      "step": 24553
    },
    {
      "epoch": 0.000149859619140625,
      "step": 24553,
      "training_step_time": 0.39540958404541016
    },
    {
      "epoch": 0.00014986572265625,
      "model_forward_time": 0.11548471450805664,
      "step": 24554
    },
    {
      "epoch": 0.00014986572265625,
      "step": 24554,
      "training_step_time": 0.40912914276123047
    },
    {
      "epoch": 0.000149871826171875,
      "model_forward_time": 0.11687779426574707,
      "step": 24555
    },
    {
      "epoch": 0.000149871826171875,
      "step": 24555,
      "training_step_time": 0.3905186653137207
    },
    {
      "epoch": 0.0001498779296875,
      "model_forward_time": 0.1156008243560791,
      "step": 24556
    },
    {
      "epoch": 0.0001498779296875,
      "step": 24556,
      "training_step_time": 0.42406630516052246
    },
    {
      "epoch": 0.000149884033203125,
      "model_forward_time": 0.11584091186523438,
      "step": 24557
    },
    {
      "epoch": 0.000149884033203125,
      "step": 24557,
      "training_step_time": 0.40074920654296875
    },
    {
      "epoch": 0.00014989013671875,
      "model_forward_time": 0.11536979675292969,
      "step": 24558
    },
    {
      "epoch": 0.00014989013671875,
      "step": 24558,
      "training_step_time": 0.45912718772888184
    },
    {
      "epoch": 0.000149896240234375,
      "model_forward_time": 0.11600637435913086,
      "step": 24559
    },
    {
      "epoch": 0.000149896240234375,
      "step": 24559,
      "training_step_time": 0.4023253917694092
    },
    {
      "epoch": 0.00014990234375,
      "grad_norm": 0.14858119189739227,
      "learning_rate": 6.86621729974927e-05,
      "loss": 0.0448,
      "step": 24560
    },
    {
      "epoch": 0.00014990234375,
      "model_forward_time": 0.11701536178588867,
      "step": 24560
    },
    {
      "epoch": 0.00014990234375,
      "step": 24560,
      "training_step_time": 0.4606766700744629
    },
    {
      "epoch": 0.000149908447265625,
      "model_forward_time": 0.11501717567443848,
      "step": 24561
    },
    {
      "epoch": 0.000149908447265625,
      "step": 24561,
      "training_step_time": 0.3886716365814209
    },
    {
      "epoch": 0.00014991455078125,
      "model_forward_time": 0.11510443687438965,
      "step": 24562
    },
    {
      "epoch": 0.00014991455078125,
      "step": 24562,
      "training_step_time": 0.3924872875213623
    },
    {
      "epoch": 0.000149920654296875,
      "model_forward_time": 0.11530256271362305,
      "step": 24563
    },
    {
      "epoch": 0.000149920654296875,
      "step": 24563,
      "training_step_time": 0.39353418350219727
    },
    {
      "epoch": 0.0001499267578125,
      "model_forward_time": 0.11519670486450195,
      "step": 24564
    },
    {
      "epoch": 0.0001499267578125,
      "step": 24564,
      "training_step_time": 0.40074753761291504
    },
    {
      "epoch": 0.000149932861328125,
      "model_forward_time": 0.11675548553466797,
      "step": 24565
    },
    {
      "epoch": 0.000149932861328125,
      "step": 24565,
      "training_step_time": 0.5612633228302002
    },
    {
      "epoch": 0.00014993896484375,
      "model_forward_time": 0.11569857597351074,
      "step": 24566
    },
    {
      "epoch": 0.00014993896484375,
      "step": 24566,
      "training_step_time": 0.4099435806274414
    },
    {
      "epoch": 0.000149945068359375,
      "model_forward_time": 0.11577200889587402,
      "step": 24567
    },
    {
      "epoch": 0.000149945068359375,
      "step": 24567,
      "training_step_time": 0.3905973434448242
    },
    {
      "epoch": 0.000149951171875,
      "model_forward_time": 0.11518692970275879,
      "step": 24568
    },
    {
      "epoch": 0.000149951171875,
      "step": 24568,
      "training_step_time": 0.3958892822265625
    },
    {
      "epoch": 0.000149957275390625,
      "model_forward_time": 0.11542487144470215,
      "step": 24569
    },
    {
      "epoch": 0.000149957275390625,
      "step": 24569,
      "training_step_time": 0.4002342224121094
    },
    {
      "epoch": 0.00014996337890625,
      "grad_norm": 0.1515740603208542,
      "learning_rate": 6.863660384379017e-05,
      "loss": 0.0455,
      "step": 24570
    },
    {
      "epoch": 0.00014996337890625,
      "model_forward_time": 0.11562967300415039,
      "step": 24570
    },
    {
      "epoch": 0.00014996337890625,
      "step": 24570,
      "training_step_time": 0.4421830177307129
    },
    {
      "epoch": 0.000149969482421875,
      "model_forward_time": 0.11515164375305176,
      "step": 24571
    },
    {
      "epoch": 0.000149969482421875,
      "step": 24571,
      "training_step_time": 0.4950246810913086
    },
    {
      "epoch": 0.0001499755859375,
      "model_forward_time": 0.11512494087219238,
      "step": 24572
    },
    {
      "epoch": 0.0001499755859375,
      "step": 24572,
      "training_step_time": 0.419696569442749
    },
    {
      "epoch": 0.000149981689453125,
      "model_forward_time": 0.11517715454101562,
      "step": 24573
    },
    {
      "epoch": 0.000149981689453125,
      "step": 24573,
      "training_step_time": 0.44713282585144043
    },
    {
      "epoch": 0.00014998779296875,
      "model_forward_time": 0.11589765548706055,
      "step": 24574
    },
    {
      "epoch": 0.00014998779296875,
      "step": 24574,
      "training_step_time": 0.4342341423034668
    },
    {
      "epoch": 0.000149993896484375,
      "model_forward_time": 0.11501288414001465,
      "step": 24575
    },
    {
      "epoch": 0.000149993896484375,
      "step": 24575,
      "training_step_time": 0.44011473655700684
    },
    {
      "epoch": 0.00015,
      "model_forward_time": 0.11539673805236816,
      "step": 24576
    },
    {
      "epoch": 0.00015,
      "step": 24576,
      "training_step_time": 0.3889448642730713
    },
    {
      "epoch": 0.000150006103515625,
      "model_forward_time": 0.11523628234863281,
      "step": 24577
    },
    {
      "epoch": 0.000150006103515625,
      "step": 24577,
      "training_step_time": 0.4054276943206787
    },
    {
      "epoch": 0.00015001220703125,
      "model_forward_time": 0.1157073974609375,
      "step": 24578
    },
    {
      "epoch": 0.00015001220703125,
      "step": 24578,
      "training_step_time": 0.38974833488464355
    },
    {
      "epoch": 0.000150018310546875,
      "model_forward_time": 0.11616897583007812,
      "step": 24579
    },
    {
      "epoch": 0.000150018310546875,
      "step": 24579,
      "training_step_time": 0.39211249351501465
    },
    {
      "epoch": 0.0001500244140625,
      "grad_norm": 0.07585003226995468,
      "learning_rate": 6.861102902877946e-05,
      "loss": 0.0436,
      "step": 24580
    },
    {
      "epoch": 0.0001500244140625,
      "model_forward_time": 0.11523127555847168,
      "step": 24580
    },
    {
      "epoch": 0.0001500244140625,
      "step": 24580,
      "training_step_time": 0.39324188232421875
    },
    {
      "epoch": 0.000150030517578125,
      "model_forward_time": 0.11521625518798828,
      "step": 24581
    },
    {
      "epoch": 0.000150030517578125,
      "step": 24581,
      "training_step_time": 0.4078679084777832
    },
    {
      "epoch": 0.00015003662109375,
      "model_forward_time": 0.1152336597442627,
      "step": 24582
    },
    {
      "epoch": 0.00015003662109375,
      "step": 24582,
      "training_step_time": 0.39138174057006836
    },
    {
      "epoch": 0.000150042724609375,
      "model_forward_time": 0.11624908447265625,
      "step": 24583
    },
    {
      "epoch": 0.000150042724609375,
      "step": 24583,
      "training_step_time": 0.6465766429901123
    },
    {
      "epoch": 0.000150048828125,
      "model_forward_time": 0.11565947532653809,
      "step": 24584
    },
    {
      "epoch": 0.000150048828125,
      "step": 24584,
      "training_step_time": 0.44516658782958984
    },
    {
      "epoch": 0.000150054931640625,
      "model_forward_time": 0.11497640609741211,
      "step": 24585
    },
    {
      "epoch": 0.000150054931640625,
      "step": 24585,
      "training_step_time": 0.4010186195373535
    },
    {
      "epoch": 0.00015006103515625,
      "model_forward_time": 0.11506509780883789,
      "step": 24586
    },
    {
      "epoch": 0.00015006103515625,
      "step": 24586,
      "training_step_time": 0.43583250045776367
    },
    {
      "epoch": 0.000150067138671875,
      "model_forward_time": 0.1151740550994873,
      "step": 24587
    },
    {
      "epoch": 0.000150067138671875,
      "step": 24587,
      "training_step_time": 0.3888669013977051
    },
    {
      "epoch": 0.0001500732421875,
      "model_forward_time": 0.11760973930358887,
      "step": 24588
    },
    {
      "epoch": 0.0001500732421875,
      "step": 24588,
      "training_step_time": 0.4714524745941162
    },
    {
      "epoch": 0.000150079345703125,
      "model_forward_time": 0.11576390266418457,
      "step": 24589
    },
    {
      "epoch": 0.000150079345703125,
      "step": 24589,
      "training_step_time": 0.4991123676300049
    },
    {
      "epoch": 0.00015008544921875,
      "grad_norm": 0.1737104207277298,
      "learning_rate": 6.858544856022952e-05,
      "loss": 0.0566,
      "step": 24590
    },
    {
      "epoch": 0.00015008544921875,
      "model_forward_time": 0.11556196212768555,
      "step": 24590
    },
    {
      "epoch": 0.00015008544921875,
      "step": 24590,
      "training_step_time": 0.4087097644805908
    },
    {
      "epoch": 0.000150091552734375,
      "model_forward_time": 0.11467242240905762,
      "step": 24591
    },
    {
      "epoch": 0.000150091552734375,
      "step": 24591,
      "training_step_time": 0.3854539394378662
    },
    {
      "epoch": 0.00015009765625,
      "model_forward_time": 0.11558675765991211,
      "step": 24592
    },
    {
      "epoch": 0.00015009765625,
      "step": 24592,
      "training_step_time": 0.3913736343383789
    },
    {
      "epoch": 0.000150103759765625,
      "model_forward_time": 0.1148691177368164,
      "step": 24593
    },
    {
      "epoch": 0.000150103759765625,
      "step": 24593,
      "training_step_time": 0.3918747901916504
    },
    {
      "epoch": 0.00015010986328125,
      "model_forward_time": 0.11568140983581543,
      "step": 24594
    },
    {
      "epoch": 0.00015010986328125,
      "step": 24594,
      "training_step_time": 0.40576887130737305
    },
    {
      "epoch": 0.000150115966796875,
      "model_forward_time": 0.1156005859375,
      "step": 24595
    },
    {
      "epoch": 0.000150115966796875,
      "step": 24595,
      "training_step_time": 0.4810473918914795
    },
    {
      "epoch": 0.0001501220703125,
      "model_forward_time": 0.11616063117980957,
      "step": 24596
    },
    {
      "epoch": 0.0001501220703125,
      "step": 24596,
      "training_step_time": 0.3870658874511719
    },
    {
      "epoch": 0.000150128173828125,
      "model_forward_time": 0.11564207077026367,
      "step": 24597
    },
    {
      "epoch": 0.000150128173828125,
      "step": 24597,
      "training_step_time": 0.427426815032959
    },
    {
      "epoch": 0.00015013427734375,
      "model_forward_time": 0.11559057235717773,
      "step": 24598
    },
    {
      "epoch": 0.00015013427734375,
      "step": 24598,
      "training_step_time": 0.4539668560028076
    },
    {
      "epoch": 0.000150140380859375,
      "model_forward_time": 0.11516427993774414,
      "step": 24599
    },
    {
      "epoch": 0.000150140380859375,
      "step": 24599,
      "training_step_time": 0.437347412109375
    },
    {
      "epoch": 0.000150146484375,
      "grad_norm": 0.14278727769851685,
      "learning_rate": 6.855986244591104e-05,
      "loss": 0.0545,
      "step": 24600
    },
    {
      "epoch": 0.000150146484375,
      "model_forward_time": 0.11528348922729492,
      "step": 24600
    },
    {
      "epoch": 0.000150146484375,
      "step": 24600,
      "training_step_time": 0.45900964736938477
    },
    {
      "epoch": 0.000150152587890625,
      "model_forward_time": 0.11544513702392578,
      "step": 24601
    },
    {
      "epoch": 0.000150152587890625,
      "step": 24601,
      "training_step_time": 0.39785242080688477
    },
    {
      "epoch": 0.00015015869140625,
      "model_forward_time": 0.11549830436706543,
      "step": 24602
    },
    {
      "epoch": 0.00015015869140625,
      "step": 24602,
      "training_step_time": 0.39891481399536133
    },
    {
      "epoch": 0.000150164794921875,
      "model_forward_time": 0.11777639389038086,
      "step": 24603
    },
    {
      "epoch": 0.000150164794921875,
      "step": 24603,
      "training_step_time": 0.4177696704864502
    },
    {
      "epoch": 0.0001501708984375,
      "model_forward_time": 0.12015271186828613,
      "step": 24604
    },
    {
      "epoch": 0.0001501708984375,
      "step": 24604,
      "training_step_time": 0.40137529373168945
    },
    {
      "epoch": 0.000150177001953125,
      "model_forward_time": 0.11802840232849121,
      "step": 24605
    },
    {
      "epoch": 0.000150177001953125,
      "step": 24605,
      "training_step_time": 0.3811793327331543
    },
    {
      "epoch": 0.00015018310546875,
      "model_forward_time": 0.11786580085754395,
      "step": 24606
    },
    {
      "epoch": 0.00015018310546875,
      "step": 24606,
      "training_step_time": 0.3915677070617676
    },
    {
      "epoch": 0.000150189208984375,
      "model_forward_time": 0.11805605888366699,
      "step": 24607
    },
    {
      "epoch": 0.000150189208984375,
      "step": 24607,
      "training_step_time": 0.5426814556121826
    },
    {
      "epoch": 0.0001501953125,
      "model_forward_time": 0.1158895492553711,
      "step": 24608
    },
    {
      "epoch": 0.0001501953125,
      "step": 24608,
      "training_step_time": 0.3893435001373291
    },
    {
      "epoch": 0.000150201416015625,
      "model_forward_time": 0.11513018608093262,
      "step": 24609
    },
    {
      "epoch": 0.000150201416015625,
      "step": 24609,
      "training_step_time": 0.3841698169708252
    },
    {
      "epoch": 0.00015020751953125,
      "grad_norm": 0.21218526363372803,
      "learning_rate": 6.85342706935964e-05,
      "loss": 0.0487,
      "step": 24610
    },
    {
      "epoch": 0.00015020751953125,
      "model_forward_time": 0.11591482162475586,
      "step": 24610
    },
    {
      "epoch": 0.00015020751953125,
      "step": 24610,
      "training_step_time": 0.39029741287231445
    },
    {
      "epoch": 0.000150213623046875,
      "model_forward_time": 0.11525917053222656,
      "step": 24611
    },
    {
      "epoch": 0.000150213623046875,
      "step": 24611,
      "training_step_time": 0.4041273593902588
    },
    {
      "epoch": 0.0001502197265625,
      "model_forward_time": 0.11517786979675293,
      "step": 24612
    },
    {
      "epoch": 0.0001502197265625,
      "step": 24612,
      "training_step_time": 0.45462489128112793
    },
    {
      "epoch": 0.000150225830078125,
      "model_forward_time": 0.11557483673095703,
      "step": 24613
    },
    {
      "epoch": 0.000150225830078125,
      "step": 24613,
      "training_step_time": 0.48300862312316895
    },
    {
      "epoch": 0.00015023193359375,
      "model_forward_time": 0.11569666862487793,
      "step": 24614
    },
    {
      "epoch": 0.00015023193359375,
      "step": 24614,
      "training_step_time": 0.437544584274292
    },
    {
      "epoch": 0.000150238037109375,
      "model_forward_time": 0.1147618293762207,
      "step": 24615
    },
    {
      "epoch": 0.000150238037109375,
      "step": 24615,
      "training_step_time": 0.3968629837036133
    },
    {
      "epoch": 0.000150244140625,
      "model_forward_time": 0.1148674488067627,
      "step": 24616
    },
    {
      "epoch": 0.000150244140625,
      "step": 24616,
      "training_step_time": 0.3715193271636963
    },
    {
      "epoch": 0.000150250244140625,
      "model_forward_time": 0.13202929496765137,
      "step": 24617
    },
    {
      "epoch": 0.000150250244140625,
      "step": 24617,
      "training_step_time": 0.41405391693115234
    },
    {
      "epoch": 0.00015025634765625,
      "model_forward_time": 0.11679601669311523,
      "step": 24618
    },
    {
      "epoch": 0.00015025634765625,
      "step": 24618,
      "training_step_time": 0.48647594451904297
    },
    {
      "epoch": 0.000150262451171875,
      "model_forward_time": 0.11511588096618652,
      "step": 24619
    },
    {
      "epoch": 0.000150262451171875,
      "step": 24619,
      "training_step_time": 0.43365907669067383
    },
    {
      "epoch": 0.0001502685546875,
      "grad_norm": 0.13488024473190308,
      "learning_rate": 6.850867331105967e-05,
      "loss": 0.0538,
      "step": 24620
    },
    {
      "epoch": 0.0001502685546875,
      "model_forward_time": 0.11530709266662598,
      "step": 24620
    },
    {
      "epoch": 0.0001502685546875,
      "step": 24620,
      "training_step_time": 0.38276243209838867
    },
    {
      "epoch": 0.000150274658203125,
      "model_forward_time": 0.11565899848937988,
      "step": 24621
    },
    {
      "epoch": 0.000150274658203125,
      "step": 24621,
      "training_step_time": 0.394972562789917
    },
    {
      "epoch": 0.00015028076171875,
      "model_forward_time": 0.11621618270874023,
      "step": 24622
    },
    {
      "epoch": 0.00015028076171875,
      "step": 24622,
      "training_step_time": 0.39872217178344727
    },
    {
      "epoch": 0.000150286865234375,
      "model_forward_time": 0.11610889434814453,
      "step": 24623
    },
    {
      "epoch": 0.000150286865234375,
      "step": 24623,
      "training_step_time": 0.4003787040710449
    },
    {
      "epoch": 0.00015029296875,
      "model_forward_time": 0.1149294376373291,
      "step": 24624
    },
    {
      "epoch": 0.00015029296875,
      "step": 24624,
      "training_step_time": 0.3964524269104004
    },
    {
      "epoch": 0.000150299072265625,
      "model_forward_time": 0.11566591262817383,
      "step": 24625
    },
    {
      "epoch": 0.000150299072265625,
      "step": 24625,
      "training_step_time": 0.6430315971374512
    },
    {
      "epoch": 0.00015030517578125,
      "model_forward_time": 0.1156461238861084,
      "step": 24626
    },
    {
      "epoch": 0.00015030517578125,
      "step": 24626,
      "training_step_time": 0.42800402641296387
    },
    {
      "epoch": 0.000150311279296875,
      "model_forward_time": 0.11501812934875488,
      "step": 24627
    },
    {
      "epoch": 0.000150311279296875,
      "step": 24627,
      "training_step_time": 0.4248185157775879
    },
    {
      "epoch": 0.0001503173828125,
      "model_forward_time": 0.11461329460144043,
      "step": 24628
    },
    {
      "epoch": 0.0001503173828125,
      "step": 24628,
      "training_step_time": 0.4068334102630615
    },
    {
      "epoch": 0.000150323486328125,
      "model_forward_time": 0.11486124992370605,
      "step": 24629
    },
    {
      "epoch": 0.000150323486328125,
      "step": 24629,
      "training_step_time": 0.3776090145111084
    },
    {
      "epoch": 0.00015032958984375,
      "grad_norm": 0.16353099048137665,
      "learning_rate": 6.84830703060767e-05,
      "loss": 0.0499,
      "step": 24630
    },
    {
      "epoch": 0.00015032958984375,
      "model_forward_time": 0.1154022216796875,
      "step": 24630
    },
    {
      "epoch": 0.00015032958984375,
      "step": 24630,
      "training_step_time": 0.387805700302124
    },
    {
      "epoch": 0.000150335693359375,
      "model_forward_time": 0.1157076358795166,
      "step": 24631
    },
    {
      "epoch": 0.000150335693359375,
      "step": 24631,
      "training_step_time": 0.6422014236450195
    },
    {
      "epoch": 0.000150341796875,
      "model_forward_time": 0.11465716361999512,
      "step": 24632
    },
    {
      "epoch": 0.000150341796875,
      "step": 24632,
      "training_step_time": 0.39286017417907715
    },
    {
      "epoch": 0.000150347900390625,
      "model_forward_time": 0.11955785751342773,
      "step": 24633
    },
    {
      "epoch": 0.000150347900390625,
      "step": 24633,
      "training_step_time": 0.38500118255615234
    },
    {
      "epoch": 0.00015035400390625,
      "model_forward_time": 0.11512064933776855,
      "step": 24634
    },
    {
      "epoch": 0.00015035400390625,
      "step": 24634,
      "training_step_time": 0.3961760997772217
    },
    {
      "epoch": 0.000150360107421875,
      "model_forward_time": 0.11544251441955566,
      "step": 24635
    },
    {
      "epoch": 0.000150360107421875,
      "step": 24635,
      "training_step_time": 0.38330078125
    },
    {
      "epoch": 0.0001503662109375,
      "model_forward_time": 0.11557984352111816,
      "step": 24636
    },
    {
      "epoch": 0.0001503662109375,
      "step": 24636,
      "training_step_time": 0.39584875106811523
    },
    {
      "epoch": 0.000150372314453125,
      "model_forward_time": 0.11507844924926758,
      "step": 24637
    },
    {
      "epoch": 0.000150372314453125,
      "step": 24637,
      "training_step_time": 0.543248176574707
    },
    {
      "epoch": 0.00015037841796875,
      "model_forward_time": 0.11485624313354492,
      "step": 24638
    },
    {
      "epoch": 0.00015037841796875,
      "step": 24638,
      "training_step_time": 0.38636231422424316
    },
    {
      "epoch": 0.000150384521484375,
      "model_forward_time": 0.11501240730285645,
      "step": 24639
    },
    {
      "epoch": 0.000150384521484375,
      "step": 24639,
      "training_step_time": 0.40296077728271484
    },
    {
      "epoch": 0.000150390625,
      "grad_norm": 0.15456907451152802,
      "learning_rate": 6.845746168642497e-05,
      "loss": 0.044,
      "step": 24640
    },
    {
      "epoch": 0.000150390625,
      "model_forward_time": 0.11621594429016113,
      "step": 24640
    },
    {
      "epoch": 0.000150390625,
      "step": 24640,
      "training_step_time": 0.462721586227417
    },
    {
      "epoch": 0.000150396728515625,
      "model_forward_time": 0.11521267890930176,
      "step": 24641
    },
    {
      "epoch": 0.000150396728515625,
      "step": 24641,
      "training_step_time": 0.4432661533355713
    },
    {
      "epoch": 0.00015040283203125,
      "model_forward_time": 0.11468315124511719,
      "step": 24642
    },
    {
      "epoch": 0.00015040283203125,
      "step": 24642,
      "training_step_time": 0.46375370025634766
    },
    {
      "epoch": 0.000150408935546875,
      "model_forward_time": 0.11642122268676758,
      "step": 24643
    },
    {
      "epoch": 0.000150408935546875,
      "step": 24643,
      "training_step_time": 0.42665672302246094
    },
    {
      "epoch": 0.0001504150390625,
      "model_forward_time": 0.11609005928039551,
      "step": 24644
    },
    {
      "epoch": 0.0001504150390625,
      "step": 24644,
      "training_step_time": 0.3936798572540283
    },
    {
      "epoch": 0.000150421142578125,
      "model_forward_time": 0.1156463623046875,
      "step": 24645
    },
    {
      "epoch": 0.000150421142578125,
      "step": 24645,
      "training_step_time": 0.49918222427368164
    },
    {
      "epoch": 0.00015042724609375,
      "model_forward_time": 0.11530876159667969,
      "step": 24646
    },
    {
      "epoch": 0.00015042724609375,
      "step": 24646,
      "training_step_time": 0.4685044288635254
    },
    {
      "epoch": 0.000150433349609375,
      "model_forward_time": 0.11638903617858887,
      "step": 24647
    },
    {
      "epoch": 0.000150433349609375,
      "step": 24647,
      "training_step_time": 0.3971364498138428
    },
    {
      "epoch": 0.000150439453125,
      "model_forward_time": 0.11591863632202148,
      "step": 24648
    },
    {
      "epoch": 0.000150439453125,
      "step": 24648,
      "training_step_time": 0.404130220413208
    },
    {
      "epoch": 0.000150445556640625,
      "model_forward_time": 0.1152958869934082,
      "step": 24649
    },
    {
      "epoch": 0.000150445556640625,
      "step": 24649,
      "training_step_time": 0.39467859268188477
    },
    {
      "epoch": 0.00015045166015625,
      "grad_norm": 0.11281415075063705,
      "learning_rate": 6.843184745988373e-05,
      "loss": 0.0477,
      "step": 24650
    },
    {
      "epoch": 0.00015045166015625,
      "model_forward_time": 0.11554121971130371,
      "step": 24650
    },
    {
      "epoch": 0.00015045166015625,
      "step": 24650,
      "training_step_time": 0.39113283157348633
    },
    {
      "epoch": 0.000150457763671875,
      "model_forward_time": 0.11513876914978027,
      "step": 24651
    },
    {
      "epoch": 0.000150457763671875,
      "step": 24651,
      "training_step_time": 0.41128993034362793
    },
    {
      "epoch": 0.0001504638671875,
      "model_forward_time": 0.11550045013427734,
      "step": 24652
    },
    {
      "epoch": 0.0001504638671875,
      "step": 24652,
      "training_step_time": 0.4009418487548828
    },
    {
      "epoch": 0.000150469970703125,
      "model_forward_time": 0.1156008243560791,
      "step": 24653
    },
    {
      "epoch": 0.000150469970703125,
      "step": 24653,
      "training_step_time": 0.4024791717529297
    },
    {
      "epoch": 0.00015047607421875,
      "model_forward_time": 0.11507296562194824,
      "step": 24654
    },
    {
      "epoch": 0.00015047607421875,
      "step": 24654,
      "training_step_time": 0.4498894214630127
    },
    {
      "epoch": 0.000150482177734375,
      "model_forward_time": 0.11604118347167969,
      "step": 24655
    },
    {
      "epoch": 0.000150482177734375,
      "step": 24655,
      "training_step_time": 0.4996764659881592
    },
    {
      "epoch": 0.00015048828125,
      "model_forward_time": 0.11585307121276855,
      "step": 24656
    },
    {
      "epoch": 0.00015048828125,
      "step": 24656,
      "training_step_time": 0.5002598762512207
    },
    {
      "epoch": 0.000150494384765625,
      "model_forward_time": 0.1149754524230957,
      "step": 24657
    },
    {
      "epoch": 0.000150494384765625,
      "step": 24657,
      "training_step_time": 0.3912215232849121
    },
    {
      "epoch": 0.00015050048828125,
      "model_forward_time": 0.11688780784606934,
      "step": 24658
    },
    {
      "epoch": 0.00015050048828125,
      "step": 24658,
      "training_step_time": 0.3833956718444824
    },
    {
      "epoch": 0.000150506591796875,
      "model_forward_time": 0.11755752563476562,
      "step": 24659
    },
    {
      "epoch": 0.000150506591796875,
      "step": 24659,
      "training_step_time": 0.4738335609436035
    },
    {
      "epoch": 0.0001505126953125,
      "grad_norm": 0.2061200737953186,
      "learning_rate": 6.840622763423391e-05,
      "loss": 0.0458,
      "step": 24660
    },
    {
      "epoch": 0.0001505126953125,
      "model_forward_time": 0.11737442016601562,
      "step": 24660
    },
    {
      "epoch": 0.0001505126953125,
      "step": 24660,
      "training_step_time": 0.40213727951049805
    },
    {
      "epoch": 0.000150518798828125,
      "model_forward_time": 0.11803221702575684,
      "step": 24661
    },
    {
      "epoch": 0.000150518798828125,
      "step": 24661,
      "training_step_time": 0.40071868896484375
    },
    {
      "epoch": 0.00015052490234375,
      "model_forward_time": 0.11763405799865723,
      "step": 24662
    },
    {
      "epoch": 0.00015052490234375,
      "step": 24662,
      "training_step_time": 0.39120006561279297
    },
    {
      "epoch": 0.000150531005859375,
      "model_forward_time": 0.11862707138061523,
      "step": 24663
    },
    {
      "epoch": 0.000150531005859375,
      "step": 24663,
      "training_step_time": 0.37720632553100586
    },
    {
      "epoch": 0.000150537109375,
      "model_forward_time": 0.11608338356018066,
      "step": 24664
    },
    {
      "epoch": 0.000150537109375,
      "step": 24664,
      "training_step_time": 0.3863065242767334
    },
    {
      "epoch": 0.000150543212890625,
      "model_forward_time": 0.11504220962524414,
      "step": 24665
    },
    {
      "epoch": 0.000150543212890625,
      "step": 24665,
      "training_step_time": 0.3804447650909424
    },
    {
      "epoch": 0.00015054931640625,
      "model_forward_time": 0.11519432067871094,
      "step": 24666
    },
    {
      "epoch": 0.00015054931640625,
      "step": 24666,
      "training_step_time": 0.4022247791290283
    },
    {
      "epoch": 0.000150555419921875,
      "model_forward_time": 0.11617255210876465,
      "step": 24667
    },
    {
      "epoch": 0.000150555419921875,
      "step": 24667,
      "training_step_time": 0.639373779296875
    },
    {
      "epoch": 0.0001505615234375,
      "model_forward_time": 0.11519694328308105,
      "step": 24668
    },
    {
      "epoch": 0.0001505615234375,
      "step": 24668,
      "training_step_time": 0.38412928581237793
    },
    {
      "epoch": 0.000150567626953125,
      "model_forward_time": 0.11583757400512695,
      "step": 24669
    },
    {
      "epoch": 0.000150567626953125,
      "step": 24669,
      "training_step_time": 0.42986631393432617
    },
    {
      "epoch": 0.00015057373046875,
      "grad_norm": 0.12870752811431885,
      "learning_rate": 6.838060221725811e-05,
      "loss": 0.0516,
      "step": 24670
    },
    {
      "epoch": 0.00015057373046875,
      "model_forward_time": 0.11572432518005371,
      "step": 24670
    },
    {
      "epoch": 0.00015057373046875,
      "step": 24670,
      "training_step_time": 0.47406816482543945
    },
    {
      "epoch": 0.000150579833984375,
      "model_forward_time": 0.11450648307800293,
      "step": 24671
    },
    {
      "epoch": 0.000150579833984375,
      "step": 24671,
      "training_step_time": 0.3947150707244873
    },
    {
      "epoch": 0.0001505859375,
      "model_forward_time": 0.11492037773132324,
      "step": 24672
    },
    {
      "epoch": 0.0001505859375,
      "step": 24672,
      "training_step_time": 0.3904554843902588
    },
    {
      "epoch": 0.000150592041015625,
      "model_forward_time": 0.11563491821289062,
      "step": 24673
    },
    {
      "epoch": 0.000150592041015625,
      "step": 24673,
      "training_step_time": 0.38529372215270996
    },
    {
      "epoch": 0.00015059814453125,
      "model_forward_time": 0.11542749404907227,
      "step": 24674
    },
    {
      "epoch": 0.00015059814453125,
      "step": 24674,
      "training_step_time": 0.4334757328033447
    },
    {
      "epoch": 0.000150604248046875,
      "model_forward_time": 0.11573386192321777,
      "step": 24675
    },
    {
      "epoch": 0.000150604248046875,
      "step": 24675,
      "training_step_time": 0.4770371913909912
    },
    {
      "epoch": 0.0001506103515625,
      "model_forward_time": 0.11494636535644531,
      "step": 24676
    },
    {
      "epoch": 0.0001506103515625,
      "step": 24676,
      "training_step_time": 0.4022068977355957
    },
    {
      "epoch": 0.000150616455078125,
      "model_forward_time": 0.11506152153015137,
      "step": 24677
    },
    {
      "epoch": 0.000150616455078125,
      "step": 24677,
      "training_step_time": 0.38805437088012695
    },
    {
      "epoch": 0.00015062255859375,
      "model_forward_time": 0.11557221412658691,
      "step": 24678
    },
    {
      "epoch": 0.00015062255859375,
      "step": 24678,
      "training_step_time": 0.3852365016937256
    },
    {
      "epoch": 0.000150628662109375,
      "model_forward_time": 0.1157538890838623,
      "step": 24679
    },
    {
      "epoch": 0.000150628662109375,
      "step": 24679,
      "training_step_time": 0.5539500713348389
    },
    {
      "epoch": 0.000150634765625,
      "grad_norm": 0.11760920286178589,
      "learning_rate": 6.835497121674066e-05,
      "loss": 0.0478,
      "step": 24680
    },
    {
      "epoch": 0.000150634765625,
      "model_forward_time": 0.11551666259765625,
      "step": 24680
    },
    {
      "epoch": 0.000150634765625,
      "step": 24680,
      "training_step_time": 0.3879892826080322
    },
    {
      "epoch": 0.000150640869140625,
      "model_forward_time": 0.11548495292663574,
      "step": 24681
    },
    {
      "epoch": 0.000150640869140625,
      "step": 24681,
      "training_step_time": 0.3981208801269531
    },
    {
      "epoch": 0.00015064697265625,
      "model_forward_time": 0.11582350730895996,
      "step": 24682
    },
    {
      "epoch": 0.00015064697265625,
      "step": 24682,
      "training_step_time": 0.398327112197876
    },
    {
      "epoch": 0.000150653076171875,
      "model_forward_time": 0.11577534675598145,
      "step": 24683
    },
    {
      "epoch": 0.000150653076171875,
      "step": 24683,
      "training_step_time": 0.3977510929107666
    },
    {
      "epoch": 0.0001506591796875,
      "model_forward_time": 0.11532282829284668,
      "step": 24684
    },
    {
      "epoch": 0.0001506591796875,
      "step": 24684,
      "training_step_time": 0.4403846263885498
    },
    {
      "epoch": 0.000150665283203125,
      "model_forward_time": 0.13329553604125977,
      "step": 24685
    },
    {
      "epoch": 0.000150665283203125,
      "step": 24685,
      "training_step_time": 0.6807312965393066
    },
    {
      "epoch": 0.00015067138671875,
      "model_forward_time": 0.11600112915039062,
      "step": 24686
    },
    {
      "epoch": 0.00015067138671875,
      "step": 24686,
      "training_step_time": 0.40318918228149414
    },
    {
      "epoch": 0.000150677490234375,
      "model_forward_time": 0.11486434936523438,
      "step": 24687
    },
    {
      "epoch": 0.000150677490234375,
      "step": 24687,
      "training_step_time": 0.3686199188232422
    },
    {
      "epoch": 0.00015068359375,
      "model_forward_time": 0.11580872535705566,
      "step": 24688
    },
    {
      "epoch": 0.00015068359375,
      "step": 24688,
      "training_step_time": 0.43709707260131836
    },
    {
      "epoch": 0.000150689697265625,
      "model_forward_time": 0.11517667770385742,
      "step": 24689
    },
    {
      "epoch": 0.000150689697265625,
      "step": 24689,
      "training_step_time": 0.39424777030944824
    },
    {
      "epoch": 0.00015069580078125,
      "grad_norm": 0.15615606307983398,
      "learning_rate": 6.83293346404676e-05,
      "loss": 0.0539,
      "step": 24690
    },
    {
      "epoch": 0.00015069580078125,
      "model_forward_time": 0.11549210548400879,
      "step": 24690
    },
    {
      "epoch": 0.00015069580078125,
      "step": 24690,
      "training_step_time": 0.3857102394104004
    },
    {
      "epoch": 0.000150701904296875,
      "model_forward_time": 0.11579179763793945,
      "step": 24691
    },
    {
      "epoch": 0.000150701904296875,
      "step": 24691,
      "training_step_time": 0.4658081531524658
    },
    {
      "epoch": 0.0001507080078125,
      "model_forward_time": 0.11591219902038574,
      "step": 24692
    },
    {
      "epoch": 0.0001507080078125,
      "step": 24692,
      "training_step_time": 0.3877990245819092
    },
    {
      "epoch": 0.000150714111328125,
      "model_forward_time": 0.1152040958404541,
      "step": 24693
    },
    {
      "epoch": 0.000150714111328125,
      "step": 24693,
      "training_step_time": 0.4183688163757324
    },
    {
      "epoch": 0.00015072021484375,
      "model_forward_time": 0.1154031753540039,
      "step": 24694
    },
    {
      "epoch": 0.00015072021484375,
      "step": 24694,
      "training_step_time": 0.4206271171569824
    },
    {
      "epoch": 0.000150726318359375,
      "model_forward_time": 0.11563801765441895,
      "step": 24695
    },
    {
      "epoch": 0.000150726318359375,
      "step": 24695,
      "training_step_time": 0.40033459663391113
    },
    {
      "epoch": 0.000150732421875,
      "model_forward_time": 0.11582279205322266,
      "step": 24696
    },
    {
      "epoch": 0.000150732421875,
      "step": 24696,
      "training_step_time": 0.3938157558441162
    },
    {
      "epoch": 0.000150738525390625,
      "model_forward_time": 0.11571621894836426,
      "step": 24697
    },
    {
      "epoch": 0.000150738525390625,
      "step": 24697,
      "training_step_time": 0.7239112854003906
    },
    {
      "epoch": 0.00015074462890625,
      "model_forward_time": 0.11487841606140137,
      "step": 24698
    },
    {
      "epoch": 0.00015074462890625,
      "step": 24698,
      "training_step_time": 0.4488813877105713
    },
    {
      "epoch": 0.000150750732421875,
      "model_forward_time": 0.11512517929077148,
      "step": 24699
    },
    {
      "epoch": 0.000150750732421875,
      "step": 24699,
      "training_step_time": 0.38747620582580566
    },
    {
      "epoch": 0.0001507568359375,
      "grad_norm": 0.18069884181022644,
      "learning_rate": 6.830369249622662e-05,
      "loss": 0.0494,
      "step": 24700
    },
    {
      "epoch": 0.0001507568359375,
      "model_forward_time": 0.11452746391296387,
      "step": 24700
    },
    {
      "epoch": 0.0001507568359375,
      "step": 24700,
      "training_step_time": 0.4022541046142578
    },
    {
      "epoch": 0.000150762939453125,
      "model_forward_time": 0.11447858810424805,
      "step": 24701
    },
    {
      "epoch": 0.000150762939453125,
      "step": 24701,
      "training_step_time": 0.36611270904541016
    },
    {
      "epoch": 0.00015076904296875,
      "model_forward_time": 0.11480116844177246,
      "step": 24702
    },
    {
      "epoch": 0.00015076904296875,
      "step": 24702,
      "training_step_time": 0.43338680267333984
    },
    {
      "epoch": 0.000150775146484375,
      "model_forward_time": 0.1158905029296875,
      "step": 24703
    },
    {
      "epoch": 0.000150775146484375,
      "step": 24703,
      "training_step_time": 0.43100547790527344
    },
    {
      "epoch": 0.00015078125,
      "model_forward_time": 0.1212928295135498,
      "step": 24704
    },
    {
      "epoch": 0.00015078125,
      "step": 24704,
      "training_step_time": 0.3789517879486084
    },
    {
      "epoch": 0.000150787353515625,
      "model_forward_time": 0.11591315269470215,
      "step": 24705
    },
    {
      "epoch": 0.000150787353515625,
      "step": 24705,
      "training_step_time": 0.3885791301727295
    },
    {
      "epoch": 0.00015079345703125,
      "model_forward_time": 0.1155252456665039,
      "step": 24706
    },
    {
      "epoch": 0.00015079345703125,
      "step": 24706,
      "training_step_time": 0.40985751152038574
    },
    {
      "epoch": 0.000150799560546875,
      "model_forward_time": 0.11554813385009766,
      "step": 24707
    },
    {
      "epoch": 0.000150799560546875,
      "step": 24707,
      "training_step_time": 0.4119575023651123
    },
    {
      "epoch": 0.0001508056640625,
      "model_forward_time": 0.1152486801147461,
      "step": 24708
    },
    {
      "epoch": 0.0001508056640625,
      "step": 24708,
      "training_step_time": 0.3942854404449463
    },
    {
      "epoch": 0.000150811767578125,
      "model_forward_time": 0.1162722110748291,
      "step": 24709
    },
    {
      "epoch": 0.000150811767578125,
      "step": 24709,
      "training_step_time": 0.5377504825592041
    },
    {
      "epoch": 0.00015081787109375,
      "grad_norm": 0.1129809245467186,
      "learning_rate": 6.827804479180716e-05,
      "loss": 0.043,
      "step": 24710
    },
    {
      "epoch": 0.00015081787109375,
      "model_forward_time": 0.12187314033508301,
      "step": 24710
    },
    {
      "epoch": 0.00015081787109375,
      "step": 24710,
      "training_step_time": 0.39070606231689453
    },
    {
      "epoch": 0.000150823974609375,
      "model_forward_time": 0.11534953117370605,
      "step": 24711
    },
    {
      "epoch": 0.000150823974609375,
      "step": 24711,
      "training_step_time": 0.48883628845214844
    },
    {
      "epoch": 0.000150830078125,
      "model_forward_time": 0.11502671241760254,
      "step": 24712
    },
    {
      "epoch": 0.000150830078125,
      "step": 24712,
      "training_step_time": 0.4189608097076416
    },
    {
      "epoch": 0.000150836181640625,
      "model_forward_time": 0.11571669578552246,
      "step": 24713
    },
    {
      "epoch": 0.000150836181640625,
      "step": 24713,
      "training_step_time": 0.5068759918212891
    },
    {
      "epoch": 0.00015084228515625,
      "model_forward_time": 0.11504530906677246,
      "step": 24714
    },
    {
      "epoch": 0.00015084228515625,
      "step": 24714,
      "training_step_time": 0.3943471908569336
    },
    {
      "epoch": 0.000150848388671875,
      "model_forward_time": 0.11473536491394043,
      "step": 24715
    },
    {
      "epoch": 0.000150848388671875,
      "step": 24715,
      "training_step_time": 0.36940598487854004
    },
    {
      "epoch": 0.0001508544921875,
      "model_forward_time": 0.11798954010009766,
      "step": 24716
    },
    {
      "epoch": 0.0001508544921875,
      "step": 24716,
      "training_step_time": 0.44846010208129883
    },
    {
      "epoch": 0.000150860595703125,
      "model_forward_time": 0.11818909645080566,
      "step": 24717
    },
    {
      "epoch": 0.000150860595703125,
      "step": 24717,
      "training_step_time": 0.38114309310913086
    },
    {
      "epoch": 0.00015086669921875,
      "model_forward_time": 0.11801743507385254,
      "step": 24718
    },
    {
      "epoch": 0.00015086669921875,
      "step": 24718,
      "training_step_time": 0.3886229991912842
    },
    {
      "epoch": 0.000150872802734375,
      "model_forward_time": 0.11787176132202148,
      "step": 24719
    },
    {
      "epoch": 0.000150872802734375,
      "step": 24719,
      "training_step_time": 0.37750816345214844
    },
    {
      "epoch": 0.00015087890625,
      "grad_norm": 0.17170913517475128,
      "learning_rate": 6.825239153500029e-05,
      "loss": 0.0498,
      "step": 24720
    },
    {
      "epoch": 0.00015087890625,
      "model_forward_time": 0.11754417419433594,
      "step": 24720
    },
    {
      "epoch": 0.00015087890625,
      "step": 24720,
      "training_step_time": 0.4136006832122803
    },
    {
      "epoch": 0.000150885009765625,
      "model_forward_time": 0.11867094039916992,
      "step": 24721
    },
    {
      "epoch": 0.000150885009765625,
      "step": 24721,
      "training_step_time": 0.3920938968658447
    },
    {
      "epoch": 0.00015089111328125,
      "model_forward_time": 0.11781835556030273,
      "step": 24722
    },
    {
      "epoch": 0.00015089111328125,
      "step": 24722,
      "training_step_time": 0.39016008377075195
    },
    {
      "epoch": 0.000150897216796875,
      "model_forward_time": 0.11775374412536621,
      "step": 24723
    },
    {
      "epoch": 0.000150897216796875,
      "step": 24723,
      "training_step_time": 0.3866763114929199
    },
    {
      "epoch": 0.0001509033203125,
      "model_forward_time": 0.11833906173706055,
      "step": 24724
    },
    {
      "epoch": 0.0001509033203125,
      "step": 24724,
      "training_step_time": 0.3855741024017334
    },
    {
      "epoch": 0.000150909423828125,
      "model_forward_time": 0.11771011352539062,
      "step": 24725
    },
    {
      "epoch": 0.000150909423828125,
      "step": 24725,
      "training_step_time": 0.3953876495361328
    },
    {
      "epoch": 0.00015091552734375,
      "model_forward_time": 0.11894369125366211,
      "step": 24726
    },
    {
      "epoch": 0.00015091552734375,
      "step": 24726,
      "training_step_time": 0.4503612518310547
    },
    {
      "epoch": 0.000150921630859375,
      "model_forward_time": 0.1173257827758789,
      "step": 24727
    },
    {
      "epoch": 0.000150921630859375,
      "step": 24727,
      "training_step_time": 0.5157594680786133
    },
    {
      "epoch": 0.000150927734375,
      "model_forward_time": 0.11788630485534668,
      "step": 24728
    },
    {
      "epoch": 0.000150927734375,
      "step": 24728,
      "training_step_time": 0.3924126625061035
    },
    {
      "epoch": 0.000150933837890625,
      "model_forward_time": 0.11731600761413574,
      "step": 24729
    },
    {
      "epoch": 0.000150933837890625,
      "step": 24729,
      "training_step_time": 0.42365312576293945
    },
    {
      "epoch": 0.00015093994140625,
      "grad_norm": 0.15669047832489014,
      "learning_rate": 6.82267327335988e-05,
      "loss": 0.0483,
      "step": 24730
    },
    {
      "epoch": 0.00015093994140625,
      "model_forward_time": 0.11753058433532715,
      "step": 24730
    },
    {
      "epoch": 0.00015093994140625,
      "step": 24730,
      "training_step_time": 0.4504215717315674
    },
    {
      "epoch": 0.000150946044921875,
      "model_forward_time": 0.11788630485534668,
      "step": 24731
    },
    {
      "epoch": 0.000150946044921875,
      "step": 24731,
      "training_step_time": 0.47866177558898926
    },
    {
      "epoch": 0.0001509521484375,
      "model_forward_time": 0.11786746978759766,
      "step": 24732
    },
    {
      "epoch": 0.0001509521484375,
      "step": 24732,
      "training_step_time": 0.3834853172302246
    },
    {
      "epoch": 0.000150958251953125,
      "model_forward_time": 0.11716628074645996,
      "step": 24733
    },
    {
      "epoch": 0.000150958251953125,
      "step": 24733,
      "training_step_time": 0.37787771224975586
    },
    {
      "epoch": 0.00015096435546875,
      "model_forward_time": 0.11612081527709961,
      "step": 24734
    },
    {
      "epoch": 0.00015096435546875,
      "step": 24734,
      "training_step_time": 0.4171016216278076
    },
    {
      "epoch": 0.000150970458984375,
      "model_forward_time": 0.11562705039978027,
      "step": 24735
    },
    {
      "epoch": 0.000150970458984375,
      "step": 24735,
      "training_step_time": 0.39067769050598145
    },
    {
      "epoch": 0.0001509765625,
      "model_forward_time": 0.1151118278503418,
      "step": 24736
    },
    {
      "epoch": 0.0001509765625,
      "step": 24736,
      "training_step_time": 0.39711594581604004
    },
    {
      "epoch": 0.000150982666015625,
      "model_forward_time": 0.11556506156921387,
      "step": 24737
    },
    {
      "epoch": 0.000150982666015625,
      "step": 24737,
      "training_step_time": 0.3945326805114746
    },
    {
      "epoch": 0.00015098876953125,
      "model_forward_time": 0.1160423755645752,
      "step": 24738
    },
    {
      "epoch": 0.00015098876953125,
      "step": 24738,
      "training_step_time": 0.40164780616760254
    },
    {
      "epoch": 0.000150994873046875,
      "model_forward_time": 0.13286924362182617,
      "step": 24739
    },
    {
      "epoch": 0.000150994873046875,
      "step": 24739,
      "training_step_time": 0.6055333614349365
    },
    {
      "epoch": 0.0001510009765625,
      "grad_norm": 0.16174732148647308,
      "learning_rate": 6.820106839539715e-05,
      "loss": 0.0431,
      "step": 24740
    },
    {
      "epoch": 0.0001510009765625,
      "model_forward_time": 0.1156778335571289,
      "step": 24740
    },
    {
      "epoch": 0.0001510009765625,
      "step": 24740,
      "training_step_time": 0.4319920539855957
    },
    {
      "epoch": 0.000151007080078125,
      "model_forward_time": 0.11557388305664062,
      "step": 24741
    },
    {
      "epoch": 0.000151007080078125,
      "step": 24741,
      "training_step_time": 0.5023050308227539
    },
    {
      "epoch": 0.00015101318359375,
      "model_forward_time": 0.1153264045715332,
      "step": 24742
    },
    {
      "epoch": 0.00015101318359375,
      "step": 24742,
      "training_step_time": 0.3952333927154541
    },
    {
      "epoch": 0.000151019287109375,
      "model_forward_time": 0.11540842056274414,
      "step": 24743
    },
    {
      "epoch": 0.000151019287109375,
      "step": 24743,
      "training_step_time": 0.389237642288208
    },
    {
      "epoch": 0.000151025390625,
      "model_forward_time": 0.1156764030456543,
      "step": 24744
    },
    {
      "epoch": 0.000151025390625,
      "step": 24744,
      "training_step_time": 0.3936007022857666
    },
    {
      "epoch": 0.000151031494140625,
      "model_forward_time": 0.11847829818725586,
      "step": 24745
    },
    {
      "epoch": 0.000151031494140625,
      "step": 24745,
      "training_step_time": 0.5011997222900391
    },
    {
      "epoch": 0.00015103759765625,
      "model_forward_time": 0.11705279350280762,
      "step": 24746
    },
    {
      "epoch": 0.00015103759765625,
      "step": 24746,
      "training_step_time": 0.4422299861907959
    },
    {
      "epoch": 0.000151043701171875,
      "model_forward_time": 0.11805534362792969,
      "step": 24747
    },
    {
      "epoch": 0.000151043701171875,
      "step": 24747,
      "training_step_time": 0.40680956840515137
    },
    {
      "epoch": 0.0001510498046875,
      "model_forward_time": 0.11621546745300293,
      "step": 24748
    },
    {
      "epoch": 0.0001510498046875,
      "step": 24748,
      "training_step_time": 0.3832969665527344
    },
    {
      "epoch": 0.000151055908203125,
      "model_forward_time": 0.11607956886291504,
      "step": 24749
    },
    {
      "epoch": 0.000151055908203125,
      "step": 24749,
      "training_step_time": 0.39570116996765137
    },
    {
      "epoch": 0.00015106201171875,
      "grad_norm": 0.11467674374580383,
      "learning_rate": 6.817539852819149e-05,
      "loss": 0.0484,
      "step": 24750
    },
    {
      "epoch": 0.00015106201171875,
      "model_forward_time": 0.11514973640441895,
      "step": 24750
    },
    {
      "epoch": 0.00015106201171875,
      "step": 24750,
      "training_step_time": 0.4027864933013916
    },
    {
      "epoch": 0.000151068115234375,
      "model_forward_time": 0.11601114273071289,
      "step": 24751
    },
    {
      "epoch": 0.000151068115234375,
      "step": 24751,
      "training_step_time": 0.4703366756439209
    },
    {
      "epoch": 0.00015107421875,
      "model_forward_time": 0.11571383476257324,
      "step": 24752
    },
    {
      "epoch": 0.00015107421875,
      "step": 24752,
      "training_step_time": 0.3803274631500244
    },
    {
      "epoch": 0.000151080322265625,
      "model_forward_time": 0.11544466018676758,
      "step": 24753
    },
    {
      "epoch": 0.000151080322265625,
      "step": 24753,
      "training_step_time": 0.4595317840576172
    },
    {
      "epoch": 0.00015108642578125,
      "model_forward_time": 0.11682391166687012,
      "step": 24754
    },
    {
      "epoch": 0.00015108642578125,
      "step": 24754,
      "training_step_time": 0.47104883193969727
    },
    {
      "epoch": 0.000151092529296875,
      "model_forward_time": 0.1162266731262207,
      "step": 24755
    },
    {
      "epoch": 0.000151092529296875,
      "step": 24755,
      "training_step_time": 0.3865222930908203
    },
    {
      "epoch": 0.0001510986328125,
      "model_forward_time": 0.1152641773223877,
      "step": 24756
    },
    {
      "epoch": 0.0001510986328125,
      "step": 24756,
      "training_step_time": 0.4055466651916504
    },
    {
      "epoch": 0.000151104736328125,
      "model_forward_time": 0.11608123779296875,
      "step": 24757
    },
    {
      "epoch": 0.000151104736328125,
      "step": 24757,
      "training_step_time": 0.5966310501098633
    },
    {
      "epoch": 0.00015111083984375,
      "model_forward_time": 0.11596059799194336,
      "step": 24758
    },
    {
      "epoch": 0.00015111083984375,
      "step": 24758,
      "training_step_time": 0.367595911026001
    },
    {
      "epoch": 0.000151116943359375,
      "model_forward_time": 0.11511111259460449,
      "step": 24759
    },
    {
      "epoch": 0.000151116943359375,
      "step": 24759,
      "training_step_time": 0.43195343017578125
    },
    {
      "epoch": 0.000151123046875,
      "grad_norm": 0.11829128861427307,
      "learning_rate": 6.814972313977967e-05,
      "loss": 0.0413,
      "step": 24760
    },
    {
      "epoch": 0.000151123046875,
      "model_forward_time": 0.1154024600982666,
      "step": 24760
    },
    {
      "epoch": 0.000151123046875,
      "step": 24760,
      "training_step_time": 0.40258026123046875
    },
    {
      "epoch": 0.000151129150390625,
      "model_forward_time": 0.11506056785583496,
      "step": 24761
    },
    {
      "epoch": 0.000151129150390625,
      "step": 24761,
      "training_step_time": 0.41440892219543457
    },
    {
      "epoch": 0.00015113525390625,
      "model_forward_time": 0.11500334739685059,
      "step": 24762
    },
    {
      "epoch": 0.00015113525390625,
      "step": 24762,
      "training_step_time": 0.393024206161499
    },
    {
      "epoch": 0.000151141357421875,
      "model_forward_time": 0.11570906639099121,
      "step": 24763
    },
    {
      "epoch": 0.000151141357421875,
      "step": 24763,
      "training_step_time": 0.5424442291259766
    },
    {
      "epoch": 0.0001511474609375,
      "model_forward_time": 0.11564970016479492,
      "step": 24764
    },
    {
      "epoch": 0.0001511474609375,
      "step": 24764,
      "training_step_time": 0.3797616958618164
    },
    {
      "epoch": 0.000151153564453125,
      "model_forward_time": 0.11611723899841309,
      "step": 24765
    },
    {
      "epoch": 0.000151153564453125,
      "step": 24765,
      "training_step_time": 0.4004368782043457
    },
    {
      "epoch": 0.00015115966796875,
      "model_forward_time": 0.11504912376403809,
      "step": 24766
    },
    {
      "epoch": 0.00015115966796875,
      "step": 24766,
      "training_step_time": 0.45031070709228516
    },
    {
      "epoch": 0.000151165771484375,
      "model_forward_time": 0.11571073532104492,
      "step": 24767
    },
    {
      "epoch": 0.000151165771484375,
      "step": 24767,
      "training_step_time": 0.4279804229736328
    },
    {
      "epoch": 0.000151171875,
      "model_forward_time": 0.11794447898864746,
      "step": 24768
    },
    {
      "epoch": 0.000151171875,
      "step": 24768,
      "training_step_time": 0.46580958366394043
    },
    {
      "epoch": 0.000151177978515625,
      "model_forward_time": 0.1171867847442627,
      "step": 24769
    },
    {
      "epoch": 0.000151177978515625,
      "step": 24769,
      "training_step_time": 0.44198155403137207
    },
    {
      "epoch": 0.00015118408203125,
      "grad_norm": 0.11028904467821121,
      "learning_rate": 6.812404223796116e-05,
      "loss": 0.0438,
      "step": 24770
    },
    {
      "epoch": 0.00015118408203125,
      "model_forward_time": 0.11803889274597168,
      "step": 24770
    },
    {
      "epoch": 0.00015118408203125,
      "step": 24770,
      "training_step_time": 0.3795146942138672
    },
    {
      "epoch": 0.000151190185546875,
      "model_forward_time": 0.11801695823669434,
      "step": 24771
    },
    {
      "epoch": 0.000151190185546875,
      "step": 24771,
      "training_step_time": 0.38770437240600586
    },
    {
      "epoch": 0.0001511962890625,
      "model_forward_time": 0.11790680885314941,
      "step": 24772
    },
    {
      "epoch": 0.0001511962890625,
      "step": 24772,
      "training_step_time": 0.37151598930358887
    },
    {
      "epoch": 0.000151202392578125,
      "model_forward_time": 0.11809754371643066,
      "step": 24773
    },
    {
      "epoch": 0.000151202392578125,
      "step": 24773,
      "training_step_time": 0.44853663444519043
    },
    {
      "epoch": 0.00015120849609375,
      "model_forward_time": 0.1179203987121582,
      "step": 24774
    },
    {
      "epoch": 0.00015120849609375,
      "step": 24774,
      "training_step_time": 0.4405789375305176
    },
    {
      "epoch": 0.000151214599609375,
      "model_forward_time": 0.11797356605529785,
      "step": 24775
    },
    {
      "epoch": 0.000151214599609375,
      "step": 24775,
      "training_step_time": 0.42984771728515625
    },
    {
      "epoch": 0.000151220703125,
      "model_forward_time": 0.12087631225585938,
      "step": 24776
    },
    {
      "epoch": 0.000151220703125,
      "step": 24776,
      "training_step_time": 0.37627434730529785
    },
    {
      "epoch": 0.000151226806640625,
      "model_forward_time": 0.1178286075592041,
      "step": 24777
    },
    {
      "epoch": 0.000151226806640625,
      "step": 24777,
      "training_step_time": 0.3806462287902832
    },
    {
      "epoch": 0.00015123291015625,
      "model_forward_time": 0.11731100082397461,
      "step": 24778
    },
    {
      "epoch": 0.00015123291015625,
      "step": 24778,
      "training_step_time": 0.376359224319458
    },
    {
      "epoch": 0.000151239013671875,
      "model_forward_time": 0.1155552864074707,
      "step": 24779
    },
    {
      "epoch": 0.000151239013671875,
      "step": 24779,
      "training_step_time": 0.39740633964538574
    },
    {
      "epoch": 0.0001512451171875,
      "grad_norm": 0.13224615156650543,
      "learning_rate": 6.809835583053715e-05,
      "loss": 0.0474,
      "step": 24780
    },
    {
      "epoch": 0.0001512451171875,
      "model_forward_time": 0.11553692817687988,
      "step": 24780
    },
    {
      "epoch": 0.0001512451171875,
      "step": 24780,
      "training_step_time": 0.400712251663208
    },
    {
      "epoch": 0.000151251220703125,
      "model_forward_time": 0.11586380004882812,
      "step": 24781
    },
    {
      "epoch": 0.000151251220703125,
      "step": 24781,
      "training_step_time": 0.5110523700714111
    },
    {
      "epoch": 0.00015125732421875,
      "model_forward_time": 0.11577939987182617,
      "step": 24782
    },
    {
      "epoch": 0.00015125732421875,
      "step": 24782,
      "training_step_time": 0.418795108795166
    },
    {
      "epoch": 0.000151263427734375,
      "model_forward_time": 0.11551165580749512,
      "step": 24783
    },
    {
      "epoch": 0.000151263427734375,
      "step": 24783,
      "training_step_time": 0.43483495712280273
    },
    {
      "epoch": 0.00015126953125,
      "model_forward_time": 0.11571907997131348,
      "step": 24784
    },
    {
      "epoch": 0.00015126953125,
      "step": 24784,
      "training_step_time": 0.39456605911254883
    },
    {
      "epoch": 0.000151275634765625,
      "model_forward_time": 0.11621713638305664,
      "step": 24785
    },
    {
      "epoch": 0.000151275634765625,
      "step": 24785,
      "training_step_time": 0.43297672271728516
    },
    {
      "epoch": 0.00015128173828125,
      "model_forward_time": 0.11590290069580078,
      "step": 24786
    },
    {
      "epoch": 0.00015128173828125,
      "step": 24786,
      "training_step_time": 0.3921360969543457
    },
    {
      "epoch": 0.000151287841796875,
      "model_forward_time": 0.11695647239685059,
      "step": 24787
    },
    {
      "epoch": 0.000151287841796875,
      "step": 24787,
      "training_step_time": 0.6559367179870605
    },
    {
      "epoch": 0.0001512939453125,
      "model_forward_time": 0.11548662185668945,
      "step": 24788
    },
    {
      "epoch": 0.0001512939453125,
      "step": 24788,
      "training_step_time": 0.3821883201599121
    },
    {
      "epoch": 0.000151300048828125,
      "model_forward_time": 0.11496686935424805,
      "step": 24789
    },
    {
      "epoch": 0.000151300048828125,
      "step": 24789,
      "training_step_time": 0.41126084327697754
    },
    {
      "epoch": 0.00015130615234375,
      "grad_norm": 0.1347363293170929,
      "learning_rate": 6.80726639253105e-05,
      "loss": 0.0479,
      "step": 24790
    },
    {
      "epoch": 0.00015130615234375,
      "model_forward_time": 0.11556243896484375,
      "step": 24790
    },
    {
      "epoch": 0.00015130615234375,
      "step": 24790,
      "training_step_time": 0.38818979263305664
    },
    {
      "epoch": 0.000151312255859375,
      "model_forward_time": 0.11506104469299316,
      "step": 24791
    },
    {
      "epoch": 0.000151312255859375,
      "step": 24791,
      "training_step_time": 0.3835115432739258
    },
    {
      "epoch": 0.000151318359375,
      "model_forward_time": 0.11636734008789062,
      "step": 24792
    },
    {
      "epoch": 0.000151318359375,
      "step": 24792,
      "training_step_time": 0.3916025161743164
    },
    {
      "epoch": 0.000151324462890625,
      "model_forward_time": 0.11556434631347656,
      "step": 24793
    },
    {
      "epoch": 0.000151324462890625,
      "step": 24793,
      "training_step_time": 0.664679765701294
    },
    {
      "epoch": 0.00015133056640625,
      "model_forward_time": 0.11502504348754883,
      "step": 24794
    },
    {
      "epoch": 0.00015133056640625,
      "step": 24794,
      "training_step_time": 0.45182371139526367
    },
    {
      "epoch": 0.000151336669921875,
      "model_forward_time": 0.11537671089172363,
      "step": 24795
    },
    {
      "epoch": 0.000151336669921875,
      "step": 24795,
      "training_step_time": 0.44674015045166016
    },
    {
      "epoch": 0.0001513427734375,
      "model_forward_time": 0.11478018760681152,
      "step": 24796
    },
    {
      "epoch": 0.0001513427734375,
      "step": 24796,
      "training_step_time": 0.3996608257293701
    },
    {
      "epoch": 0.000151348876953125,
      "model_forward_time": 0.11451959609985352,
      "step": 24797
    },
    {
      "epoch": 0.000151348876953125,
      "step": 24797,
      "training_step_time": 0.41507983207702637
    },
    {
      "epoch": 0.00015135498046875,
      "model_forward_time": 0.11536407470703125,
      "step": 24798
    },
    {
      "epoch": 0.00015135498046875,
      "step": 24798,
      "training_step_time": 0.39234042167663574
    },
    {
      "epoch": 0.000151361083984375,
      "model_forward_time": 0.1153116226196289,
      "step": 24799
    },
    {
      "epoch": 0.000151361083984375,
      "step": 24799,
      "training_step_time": 0.573493480682373
    },
    {
      "epoch": 0.0001513671875,
      "grad_norm": 0.17199084162712097,
      "learning_rate": 6.804696653008575e-05,
      "loss": 0.0468,
      "step": 24800
    },
    {
      "epoch": 0.0001513671875,
      "model_forward_time": 0.11508846282958984,
      "step": 24800
    },
    {
      "epoch": 0.0001513671875,
      "step": 24800,
      "training_step_time": 0.3904430866241455
    },
    {
      "epoch": 0.000151373291015625,
      "model_forward_time": 0.1157979965209961,
      "step": 24801
    },
    {
      "epoch": 0.000151373291015625,
      "step": 24801,
      "training_step_time": 0.3953857421875
    },
    {
      "epoch": 0.00015137939453125,
      "model_forward_time": 0.11560440063476562,
      "step": 24802
    },
    {
      "epoch": 0.00015137939453125,
      "step": 24802,
      "training_step_time": 0.4473564624786377
    },
    {
      "epoch": 0.000151385498046875,
      "model_forward_time": 0.11526370048522949,
      "step": 24803
    },
    {
      "epoch": 0.000151385498046875,
      "step": 24803,
      "training_step_time": 0.39574146270751953
    },
    {
      "epoch": 0.0001513916015625,
      "model_forward_time": 0.11528539657592773,
      "step": 24804
    },
    {
      "epoch": 0.0001513916015625,
      "step": 24804,
      "training_step_time": 0.4049842357635498
    },
    {
      "epoch": 0.000151397705078125,
      "model_forward_time": 0.11584305763244629,
      "step": 24805
    },
    {
      "epoch": 0.000151397705078125,
      "step": 24805,
      "training_step_time": 0.5347952842712402
    },
    {
      "epoch": 0.00015140380859375,
      "model_forward_time": 0.11513447761535645,
      "step": 24806
    },
    {
      "epoch": 0.00015140380859375,
      "step": 24806,
      "training_step_time": 0.39595723152160645
    },
    {
      "epoch": 0.000151409912109375,
      "model_forward_time": 0.1154317855834961,
      "step": 24807
    },
    {
      "epoch": 0.000151409912109375,
      "step": 24807,
      "training_step_time": 0.3888866901397705
    },
    {
      "epoch": 0.000151416015625,
      "model_forward_time": 0.11631917953491211,
      "step": 24808
    },
    {
      "epoch": 0.000151416015625,
      "step": 24808,
      "training_step_time": 0.39424848556518555
    },
    {
      "epoch": 0.000151422119140625,
      "model_forward_time": 0.1153876781463623,
      "step": 24809
    },
    {
      "epoch": 0.000151422119140625,
      "step": 24809,
      "training_step_time": 0.43010616302490234
    },
    {
      "epoch": 0.00015142822265625,
      "grad_norm": 0.12438739836215973,
      "learning_rate": 6.802126365266905e-05,
      "loss": 0.0461,
      "step": 24810
    },
    {
      "epoch": 0.00015142822265625,
      "model_forward_time": 0.11627554893493652,
      "step": 24810
    },
    {
      "epoch": 0.00015142822265625,
      "step": 24810,
      "training_step_time": 0.477200984954834
    },
    {
      "epoch": 0.000151434326171875,
      "model_forward_time": 0.11583757400512695,
      "step": 24811
    },
    {
      "epoch": 0.000151434326171875,
      "step": 24811,
      "training_step_time": 0.567166805267334
    },
    {
      "epoch": 0.0001514404296875,
      "model_forward_time": 0.11620306968688965,
      "step": 24812
    },
    {
      "epoch": 0.0001514404296875,
      "step": 24812,
      "training_step_time": 0.486255407333374
    },
    {
      "epoch": 0.000151446533203125,
      "model_forward_time": 0.11451077461242676,
      "step": 24813
    },
    {
      "epoch": 0.000151446533203125,
      "step": 24813,
      "training_step_time": 0.3876049518585205
    },
    {
      "epoch": 0.00015145263671875,
      "model_forward_time": 0.11463284492492676,
      "step": 24814
    },
    {
      "epoch": 0.00015145263671875,
      "step": 24814,
      "training_step_time": 0.3928353786468506
    },
    {
      "epoch": 0.000151458740234375,
      "model_forward_time": 0.1173543930053711,
      "step": 24815
    },
    {
      "epoch": 0.000151458740234375,
      "step": 24815,
      "training_step_time": 0.46097230911254883
    },
    {
      "epoch": 0.00015146484375,
      "model_forward_time": 0.11626482009887695,
      "step": 24816
    },
    {
      "epoch": 0.00015146484375,
      "step": 24816,
      "training_step_time": 0.43395447731018066
    },
    {
      "epoch": 0.000151470947265625,
      "model_forward_time": 0.11560249328613281,
      "step": 24817
    },
    {
      "epoch": 0.000151470947265625,
      "step": 24817,
      "training_step_time": 0.4167020320892334
    },
    {
      "epoch": 0.00015147705078125,
      "model_forward_time": 0.11559581756591797,
      "step": 24818
    },
    {
      "epoch": 0.00015147705078125,
      "step": 24818,
      "training_step_time": 0.3846397399902344
    },
    {
      "epoch": 0.000151483154296875,
      "model_forward_time": 0.11526107788085938,
      "step": 24819
    },
    {
      "epoch": 0.000151483154296875,
      "step": 24819,
      "training_step_time": 0.3877217769622803
    },
    {
      "epoch": 0.0001514892578125,
      "grad_norm": 0.15444552898406982,
      "learning_rate": 6.799555530086828e-05,
      "loss": 0.0414,
      "step": 24820
    },
    {
      "epoch": 0.0001514892578125,
      "model_forward_time": 0.11529016494750977,
      "step": 24820
    },
    {
      "epoch": 0.0001514892578125,
      "step": 24820,
      "training_step_time": 0.3905367851257324
    },
    {
      "epoch": 0.000151495361328125,
      "model_forward_time": 0.11506915092468262,
      "step": 24821
    },
    {
      "epoch": 0.000151495361328125,
      "step": 24821,
      "training_step_time": 0.3967263698577881
    },
    {
      "epoch": 0.00015150146484375,
      "model_forward_time": 0.1166234016418457,
      "step": 24822
    },
    {
      "epoch": 0.00015150146484375,
      "step": 24822,
      "training_step_time": 0.43449974060058594
    },
    {
      "epoch": 0.000151507568359375,
      "model_forward_time": 0.11552977561950684,
      "step": 24823
    },
    {
      "epoch": 0.000151507568359375,
      "step": 24823,
      "training_step_time": 0.800696611404419
    },
    {
      "epoch": 0.000151513671875,
      "model_forward_time": 0.11548280715942383,
      "step": 24824
    },
    {
      "epoch": 0.000151513671875,
      "step": 24824,
      "training_step_time": 0.4992485046386719
    },
    {
      "epoch": 0.000151519775390625,
      "model_forward_time": 0.11495566368103027,
      "step": 24825
    },
    {
      "epoch": 0.000151519775390625,
      "step": 24825,
      "training_step_time": 0.4534144401550293
    },
    {
      "epoch": 0.00015152587890625,
      "model_forward_time": 0.11486315727233887,
      "step": 24826
    },
    {
      "epoch": 0.00015152587890625,
      "step": 24826,
      "training_step_time": 0.42113661766052246
    },
    {
      "epoch": 0.000151531982421875,
      "model_forward_time": 0.11453914642333984,
      "step": 24827
    },
    {
      "epoch": 0.000151531982421875,
      "step": 24827,
      "training_step_time": 0.40058016777038574
    },
    {
      "epoch": 0.0001515380859375,
      "model_forward_time": 0.11405324935913086,
      "step": 24828
    },
    {
      "epoch": 0.0001515380859375,
      "step": 24828,
      "training_step_time": 0.3843801021575928
    },
    {
      "epoch": 0.000151544189453125,
      "model_forward_time": 0.11565232276916504,
      "step": 24829
    },
    {
      "epoch": 0.000151544189453125,
      "step": 24829,
      "training_step_time": 0.4335968494415283
    },
    {
      "epoch": 0.00015155029296875,
      "grad_norm": 0.15251652896404266,
      "learning_rate": 6.796984148249295e-05,
      "loss": 0.0442,
      "step": 24830
    },
    {
      "epoch": 0.00015155029296875,
      "model_forward_time": 0.11529135704040527,
      "step": 24830
    },
    {
      "epoch": 0.00015155029296875,
      "step": 24830,
      "training_step_time": 0.4457826614379883
    },
    {
      "epoch": 0.000151556396484375,
      "model_forward_time": 0.11568331718444824,
      "step": 24831
    },
    {
      "epoch": 0.000151556396484375,
      "step": 24831,
      "training_step_time": 0.40323901176452637
    },
    {
      "epoch": 0.0001515625,
      "model_forward_time": 0.11904072761535645,
      "step": 24832
    },
    {
      "epoch": 0.0001515625,
      "step": 24832,
      "training_step_time": 0.3998904228210449
    },
    {
      "epoch": 0.000151568603515625,
      "model_forward_time": 0.11591672897338867,
      "step": 24833
    },
    {
      "epoch": 0.000151568603515625,
      "step": 24833,
      "training_step_time": 0.4027221202850342
    },
    {
      "epoch": 0.00015157470703125,
      "model_forward_time": 0.11504006385803223,
      "step": 24834
    },
    {
      "epoch": 0.00015157470703125,
      "step": 24834,
      "training_step_time": 0.388211727142334
    },
    {
      "epoch": 0.000151580810546875,
      "model_forward_time": 0.1151578426361084,
      "step": 24835
    },
    {
      "epoch": 0.000151580810546875,
      "step": 24835,
      "training_step_time": 0.47971463203430176
    },
    {
      "epoch": 0.0001515869140625,
      "model_forward_time": 0.11538434028625488,
      "step": 24836
    },
    {
      "epoch": 0.0001515869140625,
      "step": 24836,
      "training_step_time": 0.44379591941833496
    },
    {
      "epoch": 0.000151593017578125,
      "model_forward_time": 0.1148993968963623,
      "step": 24837
    },
    {
      "epoch": 0.000151593017578125,
      "step": 24837,
      "training_step_time": 0.40828418731689453
    },
    {
      "epoch": 0.00015159912109375,
      "model_forward_time": 0.11549496650695801,
      "step": 24838
    },
    {
      "epoch": 0.00015159912109375,
      "step": 24838,
      "training_step_time": 0.4447023868560791
    },
    {
      "epoch": 0.000151605224609375,
      "model_forward_time": 0.11556315422058105,
      "step": 24839
    },
    {
      "epoch": 0.000151605224609375,
      "step": 24839,
      "training_step_time": 0.42629265785217285
    },
    {
      "epoch": 0.000151611328125,
      "grad_norm": 0.18596304953098297,
      "learning_rate": 6.794412220535426e-05,
      "loss": 0.0473,
      "step": 24840
    },
    {
      "epoch": 0.000151611328125,
      "model_forward_time": 0.1148366928100586,
      "step": 24840
    },
    {
      "epoch": 0.000151611328125,
      "step": 24840,
      "training_step_time": 0.4633181095123291
    },
    {
      "epoch": 0.000151617431640625,
      "model_forward_time": 0.11584663391113281,
      "step": 24841
    },
    {
      "epoch": 0.000151617431640625,
      "step": 24841,
      "training_step_time": 0.39468836784362793
    },
    {
      "epoch": 0.00015162353515625,
      "model_forward_time": 0.11418914794921875,
      "step": 24842
    },
    {
      "epoch": 0.00015162353515625,
      "step": 24842,
      "training_step_time": 0.3877837657928467
    },
    {
      "epoch": 0.000151629638671875,
      "model_forward_time": 0.11542844772338867,
      "step": 24843
    },
    {
      "epoch": 0.000151629638671875,
      "step": 24843,
      "training_step_time": 0.40412020683288574
    },
    {
      "epoch": 0.0001516357421875,
      "model_forward_time": 0.11505603790283203,
      "step": 24844
    },
    {
      "epoch": 0.0001516357421875,
      "step": 24844,
      "training_step_time": 0.4853808879852295
    },
    {
      "epoch": 0.000151641845703125,
      "model_forward_time": 0.1159200668334961,
      "step": 24845
    },
    {
      "epoch": 0.000151641845703125,
      "step": 24845,
      "training_step_time": 0.49382758140563965
    },
    {
      "epoch": 0.00015164794921875,
      "model_forward_time": 0.11546111106872559,
      "step": 24846
    },
    {
      "epoch": 0.00015164794921875,
      "step": 24846,
      "training_step_time": 0.38939785957336426
    },
    {
      "epoch": 0.000151654052734375,
      "model_forward_time": 0.11487221717834473,
      "step": 24847
    },
    {
      "epoch": 0.000151654052734375,
      "step": 24847,
      "training_step_time": 0.47310948371887207
    },
    {
      "epoch": 0.00015166015625,
      "model_forward_time": 0.11466813087463379,
      "step": 24848
    },
    {
      "epoch": 0.00015166015625,
      "step": 24848,
      "training_step_time": 0.38491082191467285
    },
    {
      "epoch": 0.000151666259765625,
      "model_forward_time": 0.11436343193054199,
      "step": 24849
    },
    {
      "epoch": 0.000151666259765625,
      "step": 24849,
      "training_step_time": 0.38903260231018066
    },
    {
      "epoch": 0.00015167236328125,
      "grad_norm": 0.16052323579788208,
      "learning_rate": 6.7918397477265e-05,
      "loss": 0.0525,
      "step": 24850
    },
    {
      "epoch": 0.00015167236328125,
      "model_forward_time": 0.11494922637939453,
      "step": 24850
    },
    {
      "epoch": 0.00015167236328125,
      "step": 24850,
      "training_step_time": 0.47654271125793457
    },
    {
      "epoch": 0.000151678466796875,
      "model_forward_time": 0.11497092247009277,
      "step": 24851
    },
    {
      "epoch": 0.000151678466796875,
      "step": 24851,
      "training_step_time": 0.4422004222869873
    },
    {
      "epoch": 0.0001516845703125,
      "model_forward_time": 0.11463332176208496,
      "step": 24852
    },
    {
      "epoch": 0.0001516845703125,
      "step": 24852,
      "training_step_time": 0.47685861587524414
    },
    {
      "epoch": 0.000151690673828125,
      "model_forward_time": 0.11443305015563965,
      "step": 24853
    },
    {
      "epoch": 0.000151690673828125,
      "step": 24853,
      "training_step_time": 0.4097161293029785
    },
    {
      "epoch": 0.00015169677734375,
      "model_forward_time": 0.11532115936279297,
      "step": 24854
    },
    {
      "epoch": 0.00015169677734375,
      "step": 24854,
      "training_step_time": 0.5207610130310059
    },
    {
      "epoch": 0.000151702880859375,
      "model_forward_time": 0.11458039283752441,
      "step": 24855
    },
    {
      "epoch": 0.000151702880859375,
      "step": 24855,
      "training_step_time": 0.3877711296081543
    },
    {
      "epoch": 0.000151708984375,
      "model_forward_time": 0.11495423316955566,
      "step": 24856
    },
    {
      "epoch": 0.000151708984375,
      "step": 24856,
      "training_step_time": 0.3848459720611572
    },
    {
      "epoch": 0.000151715087890625,
      "model_forward_time": 0.11516094207763672,
      "step": 24857
    },
    {
      "epoch": 0.000151715087890625,
      "step": 24857,
      "training_step_time": 0.3851335048675537
    },
    {
      "epoch": 0.00015172119140625,
      "model_forward_time": 0.11516189575195312,
      "step": 24858
    },
    {
      "epoch": 0.00015172119140625,
      "step": 24858,
      "training_step_time": 0.39899730682373047
    },
    {
      "epoch": 0.000151727294921875,
      "model_forward_time": 0.11528825759887695,
      "step": 24859
    },
    {
      "epoch": 0.000151727294921875,
      "step": 24859,
      "training_step_time": 0.5063762664794922
    },
    {
      "epoch": 0.0001517333984375,
      "grad_norm": 0.15088394284248352,
      "learning_rate": 6.789266730603974e-05,
      "loss": 0.0495,
      "step": 24860
    },
    {
      "epoch": 0.0001517333984375,
      "model_forward_time": 0.11605715751647949,
      "step": 24860
    },
    {
      "epoch": 0.0001517333984375,
      "step": 24860,
      "training_step_time": 0.41957569122314453
    },
    {
      "epoch": 0.000151739501953125,
      "model_forward_time": 0.11458897590637207,
      "step": 24861
    },
    {
      "epoch": 0.000151739501953125,
      "step": 24861,
      "training_step_time": 0.39075779914855957
    },
    {
      "epoch": 0.00015174560546875,
      "model_forward_time": 0.11549949645996094,
      "step": 24862
    },
    {
      "epoch": 0.00015174560546875,
      "step": 24862,
      "training_step_time": 0.3905179500579834
    },
    {
      "epoch": 0.000151751708984375,
      "model_forward_time": 0.11525440216064453,
      "step": 24863
    },
    {
      "epoch": 0.000151751708984375,
      "step": 24863,
      "training_step_time": 0.38808512687683105
    },
    {
      "epoch": 0.0001517578125,
      "model_forward_time": 0.11472654342651367,
      "step": 24864
    },
    {
      "epoch": 0.0001517578125,
      "step": 24864,
      "training_step_time": 0.39894723892211914
    },
    {
      "epoch": 0.000151763916015625,
      "model_forward_time": 0.11593127250671387,
      "step": 24865
    },
    {
      "epoch": 0.000151763916015625,
      "step": 24865,
      "training_step_time": 0.6121401786804199
    },
    {
      "epoch": 0.00015177001953125,
      "model_forward_time": 0.11504793167114258,
      "step": 24866
    },
    {
      "epoch": 0.00015177001953125,
      "step": 24866,
      "training_step_time": 0.48818540573120117
    },
    {
      "epoch": 0.000151776123046875,
      "model_forward_time": 0.11486506462097168,
      "step": 24867
    },
    {
      "epoch": 0.000151776123046875,
      "step": 24867,
      "training_step_time": 0.39658403396606445
    },
    {
      "epoch": 0.0001517822265625,
      "model_forward_time": 0.11452436447143555,
      "step": 24868
    },
    {
      "epoch": 0.0001517822265625,
      "step": 24868,
      "training_step_time": 0.41222310066223145
    },
    {
      "epoch": 0.000151788330078125,
      "model_forward_time": 0.11453485488891602,
      "step": 24869
    },
    {
      "epoch": 0.000151788330078125,
      "step": 24869,
      "training_step_time": 0.45997190475463867
    },
    {
      "epoch": 0.00015179443359375,
      "grad_norm": 0.11635014414787292,
      "learning_rate": 6.786693169949455e-05,
      "loss": 0.0397,
      "step": 24870
    },
    {
      "epoch": 0.00015179443359375,
      "model_forward_time": 0.11520719528198242,
      "step": 24870
    },
    {
      "epoch": 0.00015179443359375,
      "step": 24870,
      "training_step_time": 0.39334630966186523
    },
    {
      "epoch": 0.000151800537109375,
      "model_forward_time": 0.11545109748840332,
      "step": 24871
    },
    {
      "epoch": 0.000151800537109375,
      "step": 24871,
      "training_step_time": 0.4602632522583008
    },
    {
      "epoch": 0.000151806640625,
      "model_forward_time": 0.11502265930175781,
      "step": 24872
    },
    {
      "epoch": 0.000151806640625,
      "step": 24872,
      "training_step_time": 0.3908717632293701
    },
    {
      "epoch": 0.000151812744140625,
      "model_forward_time": 0.11490011215209961,
      "step": 24873
    },
    {
      "epoch": 0.000151812744140625,
      "step": 24873,
      "training_step_time": 0.4884490966796875
    },
    {
      "epoch": 0.00015181884765625,
      "model_forward_time": 0.11491131782531738,
      "step": 24874
    },
    {
      "epoch": 0.00015181884765625,
      "step": 24874,
      "training_step_time": 0.49547600746154785
    },
    {
      "epoch": 0.000151824951171875,
      "model_forward_time": 0.11467885971069336,
      "step": 24875
    },
    {
      "epoch": 0.000151824951171875,
      "step": 24875,
      "training_step_time": 0.3927946090698242
    },
    {
      "epoch": 0.0001518310546875,
      "model_forward_time": 0.1150517463684082,
      "step": 24876
    },
    {
      "epoch": 0.0001518310546875,
      "step": 24876,
      "training_step_time": 0.39029455184936523
    },
    {
      "epoch": 0.000151837158203125,
      "model_forward_time": 0.11598825454711914,
      "step": 24877
    },
    {
      "epoch": 0.000151837158203125,
      "step": 24877,
      "training_step_time": 0.39450693130493164
    },
    {
      "epoch": 0.00015184326171875,
      "model_forward_time": 0.11564207077026367,
      "step": 24878
    },
    {
      "epoch": 0.00015184326171875,
      "step": 24878,
      "training_step_time": 0.3946983814239502
    },
    {
      "epoch": 0.000151849365234375,
      "model_forward_time": 0.11513757705688477,
      "step": 24879
    },
    {
      "epoch": 0.000151849365234375,
      "step": 24879,
      "training_step_time": 0.4329853057861328
    },
    {
      "epoch": 0.00015185546875,
      "grad_norm": 0.0760016068816185,
      "learning_rate": 6.784119066544727e-05,
      "loss": 0.048,
      "step": 24880
    },
    {
      "epoch": 0.00015185546875,
      "model_forward_time": 0.11512255668640137,
      "step": 24880
    },
    {
      "epoch": 0.00015185546875,
      "step": 24880,
      "training_step_time": 0.47984933853149414
    },
    {
      "epoch": 0.000151861572265625,
      "model_forward_time": 0.11529040336608887,
      "step": 24881
    },
    {
      "epoch": 0.000151861572265625,
      "step": 24881,
      "training_step_time": 0.4900493621826172
    },
    {
      "epoch": 0.00015186767578125,
      "model_forward_time": 0.11464047431945801,
      "step": 24882
    },
    {
      "epoch": 0.00015186767578125,
      "step": 24882,
      "training_step_time": 0.4083836078643799
    },
    {
      "epoch": 0.000151873779296875,
      "model_forward_time": 0.11517715454101562,
      "step": 24883
    },
    {
      "epoch": 0.000151873779296875,
      "step": 24883,
      "training_step_time": 0.5180137157440186
    },
    {
      "epoch": 0.0001518798828125,
      "model_forward_time": 0.11463022232055664,
      "step": 24884
    },
    {
      "epoch": 0.0001518798828125,
      "step": 24884,
      "training_step_time": 0.3917360305786133
    },
    {
      "epoch": 0.000151885986328125,
      "model_forward_time": 0.11478877067565918,
      "step": 24885
    },
    {
      "epoch": 0.000151885986328125,
      "step": 24885,
      "training_step_time": 0.3923478126525879
    },
    {
      "epoch": 0.00015189208984375,
      "model_forward_time": 0.11546587944030762,
      "step": 24886
    },
    {
      "epoch": 0.00015189208984375,
      "step": 24886,
      "training_step_time": 0.390216588973999
    },
    {
      "epoch": 0.000151898193359375,
      "model_forward_time": 0.11861753463745117,
      "step": 24887
    },
    {
      "epoch": 0.000151898193359375,
      "step": 24887,
      "training_step_time": 0.48474693298339844
    },
    {
      "epoch": 0.000151904296875,
      "model_forward_time": 0.11960124969482422,
      "step": 24888
    },
    {
      "epoch": 0.000151904296875,
      "step": 24888,
      "training_step_time": 0.46678876876831055
    },
    {
      "epoch": 0.000151910400390625,
      "model_forward_time": 0.11862611770629883,
      "step": 24889
    },
    {
      "epoch": 0.000151910400390625,
      "step": 24889,
      "training_step_time": 0.4783661365509033
    },
    {
      "epoch": 0.00015191650390625,
      "grad_norm": 0.1251990795135498,
      "learning_rate": 6.781544421171732e-05,
      "loss": 0.0462,
      "step": 24890
    },
    {
      "epoch": 0.00015191650390625,
      "model_forward_time": 0.11797547340393066,
      "step": 24890
    },
    {
      "epoch": 0.00015191650390625,
      "step": 24890,
      "training_step_time": 0.3789021968841553
    },
    {
      "epoch": 0.000151922607421875,
      "model_forward_time": 0.11769819259643555,
      "step": 24891
    },
    {
      "epoch": 0.000151922607421875,
      "step": 24891,
      "training_step_time": 0.41225314140319824
    },
    {
      "epoch": 0.0001519287109375,
      "model_forward_time": 0.11533927917480469,
      "step": 24892
    },
    {
      "epoch": 0.0001519287109375,
      "step": 24892,
      "training_step_time": 0.5013291835784912
    },
    {
      "epoch": 0.000151934814453125,
      "model_forward_time": 0.11461448669433594,
      "step": 24893
    },
    {
      "epoch": 0.000151934814453125,
      "step": 24893,
      "training_step_time": 0.4055144786834717
    },
    {
      "epoch": 0.00015194091796875,
      "model_forward_time": 0.11565208435058594,
      "step": 24894
    },
    {
      "epoch": 0.00015194091796875,
      "step": 24894,
      "training_step_time": 0.4657933712005615
    },
    {
      "epoch": 0.000151947021484375,
      "model_forward_time": 0.11607098579406738,
      "step": 24895
    },
    {
      "epoch": 0.000151947021484375,
      "step": 24895,
      "training_step_time": 0.4552726745605469
    },
    {
      "epoch": 0.000151953125,
      "model_forward_time": 0.11559200286865234,
      "step": 24896
    },
    {
      "epoch": 0.000151953125,
      "step": 24896,
      "training_step_time": 0.4021286964416504
    },
    {
      "epoch": 0.000151959228515625,
      "model_forward_time": 0.11461544036865234,
      "step": 24897
    },
    {
      "epoch": 0.000151959228515625,
      "step": 24897,
      "training_step_time": 0.4499356746673584
    },
    {
      "epoch": 0.00015196533203125,
      "model_forward_time": 0.11547064781188965,
      "step": 24898
    },
    {
      "epoch": 0.00015196533203125,
      "step": 24898,
      "training_step_time": 0.39750027656555176
    },
    {
      "epoch": 0.000151971435546875,
      "model_forward_time": 0.1151740550994873,
      "step": 24899
    },
    {
      "epoch": 0.000151971435546875,
      "step": 24899,
      "training_step_time": 0.3911104202270508
    },
    {
      "epoch": 0.0001519775390625,
      "grad_norm": 0.21816124022006989,
      "learning_rate": 6.778969234612584e-05,
      "loss": 0.0459,
      "step": 24900
    },
    {
      "epoch": 0.0001519775390625,
      "model_forward_time": 0.11482620239257812,
      "step": 24900
    },
    {
      "epoch": 0.0001519775390625,
      "step": 24900,
      "training_step_time": 0.366260290145874
    },
    {
      "epoch": 0.000151983642578125,
      "model_forward_time": 0.11555075645446777,
      "step": 24901
    },
    {
      "epoch": 0.000151983642578125,
      "step": 24901,
      "training_step_time": 0.477400541305542
    },
    {
      "epoch": 0.00015198974609375,
      "model_forward_time": 0.11544346809387207,
      "step": 24902
    },
    {
      "epoch": 0.00015198974609375,
      "step": 24902,
      "training_step_time": 0.39464330673217773
    },
    {
      "epoch": 0.000151995849609375,
      "model_forward_time": 0.1166529655456543,
      "step": 24903
    },
    {
      "epoch": 0.000151995849609375,
      "step": 24903,
      "training_step_time": 0.391615629196167
    },
    {
      "epoch": 0.000152001953125,
      "model_forward_time": 0.11524319648742676,
      "step": 24904
    },
    {
      "epoch": 0.000152001953125,
      "step": 24904,
      "training_step_time": 0.41623783111572266
    },
    {
      "epoch": 0.000152008056640625,
      "model_forward_time": 0.11515212059020996,
      "step": 24905
    },
    {
      "epoch": 0.000152008056640625,
      "step": 24905,
      "training_step_time": 0.44844841957092285
    },
    {
      "epoch": 0.00015201416015625,
      "model_forward_time": 0.11586928367614746,
      "step": 24906
    },
    {
      "epoch": 0.00015201416015625,
      "step": 24906,
      "training_step_time": 0.451357364654541
    },
    {
      "epoch": 0.000152020263671875,
      "model_forward_time": 0.11579632759094238,
      "step": 24907
    },
    {
      "epoch": 0.000152020263671875,
      "step": 24907,
      "training_step_time": 0.4434244632720947
    },
    {
      "epoch": 0.0001520263671875,
      "model_forward_time": 0.11534738540649414,
      "step": 24908
    },
    {
      "epoch": 0.0001520263671875,
      "step": 24908,
      "training_step_time": 0.4454348087310791
    },
    {
      "epoch": 0.000152032470703125,
      "model_forward_time": 0.11553645133972168,
      "step": 24909
    },
    {
      "epoch": 0.000152032470703125,
      "step": 24909,
      "training_step_time": 0.39559364318847656
    },
    {
      "epoch": 0.00015203857421875,
      "grad_norm": 0.12891434133052826,
      "learning_rate": 6.77639350764955e-05,
      "loss": 0.0441,
      "step": 24910
    },
    {
      "epoch": 0.00015203857421875,
      "model_forward_time": 0.11480021476745605,
      "step": 24910
    },
    {
      "epoch": 0.00015203857421875,
      "step": 24910,
      "training_step_time": 0.40620994567871094
    },
    {
      "epoch": 0.000152044677734375,
      "model_forward_time": 0.11505866050720215,
      "step": 24911
    },
    {
      "epoch": 0.000152044677734375,
      "step": 24911,
      "training_step_time": 0.3965322971343994
    },
    {
      "epoch": 0.00015205078125,
      "model_forward_time": 0.11589527130126953,
      "step": 24912
    },
    {
      "epoch": 0.00015205078125,
      "step": 24912,
      "training_step_time": 0.49868273735046387
    },
    {
      "epoch": 0.000152056884765625,
      "model_forward_time": 0.1149141788482666,
      "step": 24913
    },
    {
      "epoch": 0.000152056884765625,
      "step": 24913,
      "training_step_time": 0.42911815643310547
    },
    {
      "epoch": 0.00015206298828125,
      "model_forward_time": 0.11530303955078125,
      "step": 24914
    },
    {
      "epoch": 0.00015206298828125,
      "step": 24914,
      "training_step_time": 0.36870408058166504
    },
    {
      "epoch": 0.000152069091796875,
      "model_forward_time": 0.11533236503601074,
      "step": 24915
    },
    {
      "epoch": 0.000152069091796875,
      "step": 24915,
      "training_step_time": 0.41632914543151855
    },
    {
      "epoch": 0.0001520751953125,
      "model_forward_time": 0.11545300483703613,
      "step": 24916
    },
    {
      "epoch": 0.0001520751953125,
      "step": 24916,
      "training_step_time": 0.49075794219970703
    },
    {
      "epoch": 0.000152081298828125,
      "model_forward_time": 0.11537623405456543,
      "step": 24917
    },
    {
      "epoch": 0.000152081298828125,
      "step": 24917,
      "training_step_time": 0.393247127532959
    },
    {
      "epoch": 0.00015208740234375,
      "model_forward_time": 0.11571907997131348,
      "step": 24918
    },
    {
      "epoch": 0.00015208740234375,
      "step": 24918,
      "training_step_time": 0.4781973361968994
    },
    {
      "epoch": 0.000152093505859375,
      "model_forward_time": 0.11570191383361816,
      "step": 24919
    },
    {
      "epoch": 0.000152093505859375,
      "step": 24919,
      "training_step_time": 0.3825066089630127
    },
    {
      "epoch": 0.000152099609375,
      "grad_norm": 0.11185479164123535,
      "learning_rate": 6.773817241065072e-05,
      "loss": 0.0424,
      "step": 24920
    },
    {
      "epoch": 0.000152099609375,
      "model_forward_time": 0.11530113220214844,
      "step": 24920
    },
    {
      "epoch": 0.000152099609375,
      "step": 24920,
      "training_step_time": 0.43276047706604004
    },
    {
      "epoch": 0.000152105712890625,
      "model_forward_time": 0.11549496650695801,
      "step": 24921
    },
    {
      "epoch": 0.000152105712890625,
      "step": 24921,
      "training_step_time": 0.42066407203674316
    },
    {
      "epoch": 0.00015211181640625,
      "model_forward_time": 0.11547541618347168,
      "step": 24922
    },
    {
      "epoch": 0.00015211181640625,
      "step": 24922,
      "training_step_time": 0.47928810119628906
    },
    {
      "epoch": 0.000152117919921875,
      "model_forward_time": 0.11514449119567871,
      "step": 24923
    },
    {
      "epoch": 0.000152117919921875,
      "step": 24923,
      "training_step_time": 0.3951423168182373
    },
    {
      "epoch": 0.0001521240234375,
      "model_forward_time": 0.11604571342468262,
      "step": 24924
    },
    {
      "epoch": 0.0001521240234375,
      "step": 24924,
      "training_step_time": 0.4239532947540283
    },
    {
      "epoch": 0.000152130126953125,
      "model_forward_time": 0.11533617973327637,
      "step": 24925
    },
    {
      "epoch": 0.000152130126953125,
      "step": 24925,
      "training_step_time": 0.40605950355529785
    },
    {
      "epoch": 0.00015213623046875,
      "model_forward_time": 0.11966657638549805,
      "step": 24926
    },
    {
      "epoch": 0.00015213623046875,
      "step": 24926,
      "training_step_time": 0.49503302574157715
    },
    {
      "epoch": 0.000152142333984375,
      "model_forward_time": 0.11507630348205566,
      "step": 24927
    },
    {
      "epoch": 0.000152142333984375,
      "step": 24927,
      "training_step_time": 0.3894462585449219
    },
    {
      "epoch": 0.0001521484375,
      "model_forward_time": 0.11506295204162598,
      "step": 24928
    },
    {
      "epoch": 0.0001521484375,
      "step": 24928,
      "training_step_time": 0.388092041015625
    },
    {
      "epoch": 0.000152154541015625,
      "model_forward_time": 0.11560177803039551,
      "step": 24929
    },
    {
      "epoch": 0.000152154541015625,
      "step": 24929,
      "training_step_time": 0.4251062870025635
    },
    {
      "epoch": 0.00015216064453125,
      "grad_norm": 0.124942347407341,
      "learning_rate": 6.771240435641754e-05,
      "loss": 0.0464,
      "step": 24930
    },
    {
      "epoch": 0.00015216064453125,
      "model_forward_time": 0.11554622650146484,
      "step": 24930
    },
    {
      "epoch": 0.00015216064453125,
      "step": 24930,
      "training_step_time": 0.42594337463378906
    },
    {
      "epoch": 0.000152166748046875,
      "model_forward_time": 0.1152803897857666,
      "step": 24931
    },
    {
      "epoch": 0.000152166748046875,
      "step": 24931,
      "training_step_time": 0.41014671325683594
    },
    {
      "epoch": 0.0001521728515625,
      "model_forward_time": 0.11786270141601562,
      "step": 24932
    },
    {
      "epoch": 0.0001521728515625,
      "step": 24932,
      "training_step_time": 0.44586920738220215
    },
    {
      "epoch": 0.000152178955078125,
      "model_forward_time": 0.11862802505493164,
      "step": 24933
    },
    {
      "epoch": 0.000152178955078125,
      "step": 24933,
      "training_step_time": 0.40804028511047363
    },
    {
      "epoch": 0.00015218505859375,
      "model_forward_time": 0.11890721321105957,
      "step": 24934
    },
    {
      "epoch": 0.00015218505859375,
      "step": 24934,
      "training_step_time": 0.43618226051330566
    },
    {
      "epoch": 0.000152191162109375,
      "model_forward_time": 0.1170358657836914,
      "step": 24935
    },
    {
      "epoch": 0.000152191162109375,
      "step": 24935,
      "training_step_time": 0.3768329620361328
    },
    {
      "epoch": 0.000152197265625,
      "model_forward_time": 0.11652398109436035,
      "step": 24936
    },
    {
      "epoch": 0.000152197265625,
      "step": 24936,
      "training_step_time": 0.4462893009185791
    },
    {
      "epoch": 0.000152203369140625,
      "model_forward_time": 0.1153867244720459,
      "step": 24937
    },
    {
      "epoch": 0.000152203369140625,
      "step": 24937,
      "training_step_time": 0.39804768562316895
    },
    {
      "epoch": 0.00015220947265625,
      "model_forward_time": 0.11597061157226562,
      "step": 24938
    },
    {
      "epoch": 0.00015220947265625,
      "step": 24938,
      "training_step_time": 0.39063048362731934
    },
    {
      "epoch": 0.000152215576171875,
      "model_forward_time": 0.11538505554199219,
      "step": 24939
    },
    {
      "epoch": 0.000152215576171875,
      "step": 24939,
      "training_step_time": 0.39701271057128906
    },
    {
      "epoch": 0.0001522216796875,
      "grad_norm": 0.12429454922676086,
      "learning_rate": 6.768663092162356e-05,
      "loss": 0.0531,
      "step": 24940
    },
    {
      "epoch": 0.0001522216796875,
      "model_forward_time": 0.11526775360107422,
      "step": 24940
    },
    {
      "epoch": 0.0001522216796875,
      "step": 24940,
      "training_step_time": 0.4014410972595215
    },
    {
      "epoch": 0.000152227783203125,
      "model_forward_time": 0.11577701568603516,
      "step": 24941
    },
    {
      "epoch": 0.000152227783203125,
      "step": 24941,
      "training_step_time": 0.42835235595703125
    },
    {
      "epoch": 0.00015223388671875,
      "model_forward_time": 0.11483001708984375,
      "step": 24942
    },
    {
      "epoch": 0.00015223388671875,
      "step": 24942,
      "training_step_time": 0.40002965927124023
    },
    {
      "epoch": 0.000152239990234375,
      "model_forward_time": 0.11638760566711426,
      "step": 24943
    },
    {
      "epoch": 0.000152239990234375,
      "step": 24943,
      "training_step_time": 0.6154429912567139
    },
    {
      "epoch": 0.00015224609375,
      "model_forward_time": 0.11549115180969238,
      "step": 24944
    },
    {
      "epoch": 0.00015224609375,
      "step": 24944,
      "training_step_time": 0.4354074001312256
    },
    {
      "epoch": 0.000152252197265625,
      "model_forward_time": 0.1154928207397461,
      "step": 24945
    },
    {
      "epoch": 0.000152252197265625,
      "step": 24945,
      "training_step_time": 0.4210789203643799
    },
    {
      "epoch": 0.00015225830078125,
      "model_forward_time": 0.11504411697387695,
      "step": 24946
    },
    {
      "epoch": 0.00015225830078125,
      "step": 24946,
      "training_step_time": 0.4532015323638916
    },
    {
      "epoch": 0.000152264404296875,
      "model_forward_time": 0.11540579795837402,
      "step": 24947
    },
    {
      "epoch": 0.000152264404296875,
      "step": 24947,
      "training_step_time": 0.397540807723999
    },
    {
      "epoch": 0.0001522705078125,
      "model_forward_time": 0.11479473114013672,
      "step": 24948
    },
    {
      "epoch": 0.0001522705078125,
      "step": 24948,
      "training_step_time": 0.3968172073364258
    },
    {
      "epoch": 0.000152276611328125,
      "model_forward_time": 0.11603736877441406,
      "step": 24949
    },
    {
      "epoch": 0.000152276611328125,
      "step": 24949,
      "training_step_time": 0.4419519901275635
    },
    {
      "epoch": 0.00015228271484375,
      "grad_norm": 0.16982698440551758,
      "learning_rate": 6.76608521140981e-05,
      "loss": 0.0447,
      "step": 24950
    },
    {
      "epoch": 0.00015228271484375,
      "model_forward_time": 0.11556148529052734,
      "step": 24950
    },
    {
      "epoch": 0.00015228271484375,
      "step": 24950,
      "training_step_time": 0.4830300807952881
    },
    {
      "epoch": 0.000152288818359375,
      "model_forward_time": 0.11518478393554688,
      "step": 24951
    },
    {
      "epoch": 0.000152288818359375,
      "step": 24951,
      "training_step_time": 0.38483405113220215
    },
    {
      "epoch": 0.000152294921875,
      "model_forward_time": 0.11575484275817871,
      "step": 24952
    },
    {
      "epoch": 0.000152294921875,
      "step": 24952,
      "training_step_time": 0.38805627822875977
    },
    {
      "epoch": 0.000152301025390625,
      "model_forward_time": 0.11555624008178711,
      "step": 24953
    },
    {
      "epoch": 0.000152301025390625,
      "step": 24953,
      "training_step_time": 0.3985097408294678
    },
    {
      "epoch": 0.00015230712890625,
      "model_forward_time": 0.11559820175170898,
      "step": 24954
    },
    {
      "epoch": 0.00015230712890625,
      "step": 24954,
      "training_step_time": 0.41922593116760254
    },
    {
      "epoch": 0.000152313232421875,
      "model_forward_time": 0.11514902114868164,
      "step": 24955
    },
    {
      "epoch": 0.000152313232421875,
      "step": 24955,
      "training_step_time": 0.4914259910583496
    },
    {
      "epoch": 0.0001523193359375,
      "model_forward_time": 0.11507153511047363,
      "step": 24956
    },
    {
      "epoch": 0.0001523193359375,
      "step": 24956,
      "training_step_time": 0.41010451316833496
    },
    {
      "epoch": 0.000152325439453125,
      "model_forward_time": 0.11560344696044922,
      "step": 24957
    },
    {
      "epoch": 0.000152325439453125,
      "step": 24957,
      "training_step_time": 0.3902573585510254
    },
    {
      "epoch": 0.00015233154296875,
      "model_forward_time": 0.11531877517700195,
      "step": 24958
    },
    {
      "epoch": 0.00015233154296875,
      "step": 24958,
      "training_step_time": 0.4674818515777588
    },
    {
      "epoch": 0.000152337646484375,
      "model_forward_time": 0.11610794067382812,
      "step": 24959
    },
    {
      "epoch": 0.000152337646484375,
      "step": 24959,
      "training_step_time": 0.41386938095092773
    },
    {
      "epoch": 0.00015234375,
      "grad_norm": 0.1706763654947281,
      "learning_rate": 6.763506794167208e-05,
      "loss": 0.0447,
      "step": 24960
    },
    {
      "epoch": 0.00015234375,
      "model_forward_time": 0.11515283584594727,
      "step": 24960
    },
    {
      "epoch": 0.00015234375,
      "step": 24960,
      "training_step_time": 0.4741358757019043
    },
    {
      "epoch": 0.000152349853515625,
      "model_forward_time": 0.11558127403259277,
      "step": 24961
    },
    {
      "epoch": 0.000152349853515625,
      "step": 24961,
      "training_step_time": 0.47356390953063965
    },
    {
      "epoch": 0.00015235595703125,
      "model_forward_time": 0.11483621597290039,
      "step": 24962
    },
    {
      "epoch": 0.00015235595703125,
      "step": 24962,
      "training_step_time": 0.3875081539154053
    },
    {
      "epoch": 0.000152362060546875,
      "model_forward_time": 0.1155385971069336,
      "step": 24963
    },
    {
      "epoch": 0.000152362060546875,
      "step": 24963,
      "training_step_time": 0.4297153949737549
    },
    {
      "epoch": 0.0001523681640625,
      "model_forward_time": 0.1147298812866211,
      "step": 24964
    },
    {
      "epoch": 0.0001523681640625,
      "step": 24964,
      "training_step_time": 0.4048116207122803
    },
    {
      "epoch": 0.000152374267578125,
      "model_forward_time": 0.1156611442565918,
      "step": 24965
    },
    {
      "epoch": 0.000152374267578125,
      "step": 24965,
      "training_step_time": 0.41234707832336426
    },
    {
      "epoch": 0.00015238037109375,
      "model_forward_time": 0.11504530906677246,
      "step": 24966
    },
    {
      "epoch": 0.00015238037109375,
      "step": 24966,
      "training_step_time": 0.4076981544494629
    },
    {
      "epoch": 0.000152386474609375,
      "model_forward_time": 0.11544990539550781,
      "step": 24967
    },
    {
      "epoch": 0.000152386474609375,
      "step": 24967,
      "training_step_time": 0.5032253265380859
    },
    {
      "epoch": 0.000152392578125,
      "model_forward_time": 0.11595630645751953,
      "step": 24968
    },
    {
      "epoch": 0.000152392578125,
      "step": 24968,
      "training_step_time": 0.4210398197174072
    },
    {
      "epoch": 0.000152398681640625,
      "model_forward_time": 0.11521387100219727,
      "step": 24969
    },
    {
      "epoch": 0.000152398681640625,
      "step": 24969,
      "training_step_time": 0.4461796283721924
    },
    {
      "epoch": 0.00015240478515625,
      "grad_norm": 0.2146972119808197,
      "learning_rate": 6.7609278412178e-05,
      "loss": 0.05,
      "step": 24970
    },
    {
      "epoch": 0.00015240478515625,
      "model_forward_time": 0.1157081127166748,
      "step": 24970
    },
    {
      "epoch": 0.00015240478515625,
      "step": 24970,
      "training_step_time": 0.39569664001464844
    },
    {
      "epoch": 0.000152410888671875,
      "model_forward_time": 0.11524176597595215,
      "step": 24971
    },
    {
      "epoch": 0.000152410888671875,
      "step": 24971,
      "training_step_time": 0.4116826057434082
    },
    {
      "epoch": 0.0001524169921875,
      "model_forward_time": 0.1151120662689209,
      "step": 24972
    },
    {
      "epoch": 0.0001524169921875,
      "step": 24972,
      "training_step_time": 0.3663055896759033
    },
    {
      "epoch": 0.000152423095703125,
      "model_forward_time": 0.11577701568603516,
      "step": 24973
    },
    {
      "epoch": 0.000152423095703125,
      "step": 24973,
      "training_step_time": 0.4509248733520508
    },
    {
      "epoch": 0.00015242919921875,
      "model_forward_time": 0.11652255058288574,
      "step": 24974
    },
    {
      "epoch": 0.00015242919921875,
      "step": 24974,
      "training_step_time": 0.4168717861175537
    },
    {
      "epoch": 0.000152435302734375,
      "model_forward_time": 0.11487317085266113,
      "step": 24975
    },
    {
      "epoch": 0.000152435302734375,
      "step": 24975,
      "training_step_time": 0.531212568283081
    },
    {
      "epoch": 0.00015244140625,
      "model_forward_time": 0.11548662185668945,
      "step": 24976
    },
    {
      "epoch": 0.00015244140625,
      "step": 24976,
      "training_step_time": 0.3997373580932617
    },
    {
      "epoch": 0.000152447509765625,
      "model_forward_time": 0.11476540565490723,
      "step": 24977
    },
    {
      "epoch": 0.000152447509765625,
      "step": 24977,
      "training_step_time": 0.4533679485321045
    },
    {
      "epoch": 0.00015245361328125,
      "model_forward_time": 0.11562681198120117,
      "step": 24978
    },
    {
      "epoch": 0.00015245361328125,
      "step": 24978,
      "training_step_time": 0.4431321620941162
    },
    {
      "epoch": 0.000152459716796875,
      "model_forward_time": 0.1152958869934082,
      "step": 24979
    },
    {
      "epoch": 0.000152459716796875,
      "step": 24979,
      "training_step_time": 0.4192521572113037
    },
    {
      "epoch": 0.0001524658203125,
      "grad_norm": 0.11258827894926071,
      "learning_rate": 6.758348353345014e-05,
      "loss": 0.0461,
      "step": 24980
    },
    {
      "epoch": 0.0001524658203125,
      "model_forward_time": 0.11443567276000977,
      "step": 24980
    },
    {
      "epoch": 0.0001524658203125,
      "step": 24980,
      "training_step_time": 0.38777923583984375
    },
    {
      "epoch": 0.000152471923828125,
      "model_forward_time": 0.11615896224975586,
      "step": 24981
    },
    {
      "epoch": 0.000152471923828125,
      "step": 24981,
      "training_step_time": 0.4928452968597412
    },
    {
      "epoch": 0.00015247802734375,
      "model_forward_time": 0.1150057315826416,
      "step": 24982
    },
    {
      "epoch": 0.00015247802734375,
      "step": 24982,
      "training_step_time": 0.39092254638671875
    },
    {
      "epoch": 0.000152484130859375,
      "model_forward_time": 0.11498618125915527,
      "step": 24983
    },
    {
      "epoch": 0.000152484130859375,
      "step": 24983,
      "training_step_time": 0.40262556076049805
    },
    {
      "epoch": 0.000152490234375,
      "model_forward_time": 0.11584162712097168,
      "step": 24984
    },
    {
      "epoch": 0.000152490234375,
      "step": 24984,
      "training_step_time": 0.391404390335083
    },
    {
      "epoch": 0.000152496337890625,
      "model_forward_time": 0.11504530906677246,
      "step": 24985
    },
    {
      "epoch": 0.000152496337890625,
      "step": 24985,
      "training_step_time": 0.39412832260131836
    },
    {
      "epoch": 0.00015250244140625,
      "model_forward_time": 0.11447978019714355,
      "step": 24986
    },
    {
      "epoch": 0.00015250244140625,
      "step": 24986,
      "training_step_time": 0.3655531406402588
    },
    {
      "epoch": 0.000152508544921875,
      "model_forward_time": 0.11601376533508301,
      "step": 24987
    },
    {
      "epoch": 0.000152508544921875,
      "step": 24987,
      "training_step_time": 0.49377012252807617
    },
    {
      "epoch": 0.0001525146484375,
      "model_forward_time": 0.11500191688537598,
      "step": 24988
    },
    {
      "epoch": 0.0001525146484375,
      "step": 24988,
      "training_step_time": 0.4616823196411133
    },
    {
      "epoch": 0.000152520751953125,
      "model_forward_time": 0.11494612693786621,
      "step": 24989
    },
    {
      "epoch": 0.000152520751953125,
      "step": 24989,
      "training_step_time": 0.3856227397918701
    },
    {
      "epoch": 0.00015252685546875,
      "grad_norm": 0.17122618854045868,
      "learning_rate": 6.755768331332424e-05,
      "loss": 0.0464,
      "step": 24990
    },
    {
      "epoch": 0.00015252685546875,
      "model_forward_time": 0.11506843566894531,
      "step": 24990
    },
    {
      "epoch": 0.00015252685546875,
      "step": 24990,
      "training_step_time": 0.3988063335418701
    },
    {
      "epoch": 0.000152532958984375,
      "model_forward_time": 0.11505293846130371,
      "step": 24991
    },
    {
      "epoch": 0.000152532958984375,
      "step": 24991,
      "training_step_time": 0.49826979637145996
    },
    {
      "epoch": 0.0001525390625,
      "model_forward_time": 0.11538982391357422,
      "step": 24992
    },
    {
      "epoch": 0.0001525390625,
      "step": 24992,
      "training_step_time": 0.38530397415161133
    },
    {
      "epoch": 0.000152545166015625,
      "model_forward_time": 0.11503314971923828,
      "step": 24993
    },
    {
      "epoch": 0.000152545166015625,
      "step": 24993,
      "training_step_time": 0.4488973617553711
    },
    {
      "epoch": 0.00015255126953125,
      "model_forward_time": 0.11515974998474121,
      "step": 24994
    },
    {
      "epoch": 0.00015255126953125,
      "step": 24994,
      "training_step_time": 0.4014859199523926
    },
    {
      "epoch": 0.000152557373046875,
      "model_forward_time": 0.11760902404785156,
      "step": 24995
    },
    {
      "epoch": 0.000152557373046875,
      "step": 24995,
      "training_step_time": 0.40334081649780273
    },
    {
      "epoch": 0.0001525634765625,
      "model_forward_time": 0.11580204963684082,
      "step": 24996
    },
    {
      "epoch": 0.0001525634765625,
      "step": 24996,
      "training_step_time": 0.4307122230529785
    },
    {
      "epoch": 0.000152569580078125,
      "model_forward_time": 0.11612343788146973,
      "step": 24997
    },
    {
      "epoch": 0.000152569580078125,
      "step": 24997,
      "training_step_time": 0.4017608165740967
    },
    {
      "epoch": 0.00015257568359375,
      "model_forward_time": 0.11492657661437988,
      "step": 24998
    },
    {
      "epoch": 0.00015257568359375,
      "step": 24998,
      "training_step_time": 0.4593536853790283
    },
    {
      "epoch": 0.000152581787109375,
      "model_forward_time": 0.11549687385559082,
      "step": 24999
    },
    {
      "epoch": 0.000152581787109375,
      "step": 24999,
      "training_step_time": 0.48150205612182617
    },
    {
      "epoch": 0.000152587890625,
      "grad_norm": 0.12286578863859177,
      "learning_rate": 6.753187775963773e-05,
      "loss": 0.0438,
      "step": 25000
    },
    {
      "epoch": 0.000152587890625,
      "model_forward_time": 0.11496257781982422,
      "step": 25000
    },
    {
      "epoch": 0.000152587890625,
      "step": 25000,
      "training_step_time": 0.35879993438720703
    },
    {
      "epoch": 0.000152593994140625,
      "model_forward_time": 0.11279606819152832,
      "step": 25001
    },
    {
      "epoch": 0.000152593994140625,
      "step": 25001,
      "training_step_time": 0.37602853775024414
    },
    {
      "epoch": 0.00015260009765625,
      "model_forward_time": 0.11421895027160645,
      "step": 25002
    },
    {
      "epoch": 0.00015260009765625,
      "step": 25002,
      "training_step_time": 0.3744540214538574
    },
    {
      "epoch": 0.000152606201171875,
      "model_forward_time": 0.11462664604187012,
      "step": 25003
    },
    {
      "epoch": 0.000152606201171875,
      "step": 25003,
      "training_step_time": 0.47686290740966797
    },
    {
      "epoch": 0.0001526123046875,
      "model_forward_time": 0.11438202857971191,
      "step": 25004
    },
    {
      "epoch": 0.0001526123046875,
      "step": 25004,
      "training_step_time": 0.43295812606811523
    },
    {
      "epoch": 0.000152618408203125,
      "model_forward_time": 0.1148383617401123,
      "step": 25005
    },
    {
      "epoch": 0.000152618408203125,
      "step": 25005,
      "training_step_time": 0.3769516944885254
    },
    {
      "epoch": 0.00015262451171875,
      "model_forward_time": 0.1150975227355957,
      "step": 25006
    },
    {
      "epoch": 0.00015262451171875,
      "step": 25006,
      "training_step_time": 0.4631943702697754
    },
    {
      "epoch": 0.000152630615234375,
      "model_forward_time": 0.11439657211303711,
      "step": 25007
    },
    {
      "epoch": 0.000152630615234375,
      "step": 25007,
      "training_step_time": 0.46697473526000977
    },
    {
      "epoch": 0.00015263671875,
      "model_forward_time": 0.11519384384155273,
      "step": 25008
    },
    {
      "epoch": 0.00015263671875,
      "step": 25008,
      "training_step_time": 0.3835573196411133
    },
    {
      "epoch": 0.000152642822265625,
      "model_forward_time": 0.11446452140808105,
      "step": 25009
    },
    {
      "epoch": 0.000152642822265625,
      "step": 25009,
      "training_step_time": 0.38797521591186523
    },
    {
      "epoch": 0.00015264892578125,
      "grad_norm": 0.16147032380104065,
      "learning_rate": 6.750606688022964e-05,
      "loss": 0.0489,
      "step": 25010
    },
    {
      "epoch": 0.00015264892578125,
      "model_forward_time": 0.11527347564697266,
      "step": 25010
    },
    {
      "epoch": 0.00015264892578125,
      "step": 25010,
      "training_step_time": 0.3855104446411133
    },
    {
      "epoch": 0.000152655029296875,
      "model_forward_time": 0.11514544486999512,
      "step": 25011
    },
    {
      "epoch": 0.000152655029296875,
      "step": 25011,
      "training_step_time": 0.3778655529022217
    },
    {
      "epoch": 0.0001526611328125,
      "model_forward_time": 0.11478376388549805,
      "step": 25012
    },
    {
      "epoch": 0.0001526611328125,
      "step": 25012,
      "training_step_time": 0.40560030937194824
    },
    {
      "epoch": 0.000152667236328125,
      "model_forward_time": 0.11564946174621582,
      "step": 25013
    },
    {
      "epoch": 0.000152667236328125,
      "step": 25013,
      "training_step_time": 0.41915106773376465
    },
    {
      "epoch": 0.00015267333984375,
      "model_forward_time": 0.11594796180725098,
      "step": 25014
    },
    {
      "epoch": 0.00015267333984375,
      "step": 25014,
      "training_step_time": 0.4298222064971924
    },
    {
      "epoch": 0.000152679443359375,
      "model_forward_time": 0.11562180519104004,
      "step": 25015
    },
    {
      "epoch": 0.000152679443359375,
      "step": 25015,
      "training_step_time": 0.3977670669555664
    },
    {
      "epoch": 0.000152685546875,
      "model_forward_time": 0.11583137512207031,
      "step": 25016
    },
    {
      "epoch": 0.000152685546875,
      "step": 25016,
      "training_step_time": 0.39904141426086426
    },
    {
      "epoch": 0.000152691650390625,
      "model_forward_time": 0.11480927467346191,
      "step": 25017
    },
    {
      "epoch": 0.000152691650390625,
      "step": 25017,
      "training_step_time": 0.5117650032043457
    },
    {
      "epoch": 0.00015269775390625,
      "model_forward_time": 0.11552143096923828,
      "step": 25018
    },
    {
      "epoch": 0.00015269775390625,
      "step": 25018,
      "training_step_time": 0.5011556148529053
    },
    {
      "epoch": 0.000152703857421875,
      "model_forward_time": 0.11803102493286133,
      "step": 25019
    },
    {
      "epoch": 0.000152703857421875,
      "step": 25019,
      "training_step_time": 0.395723819732666
    },
    {
      "epoch": 0.0001527099609375,
      "grad_norm": 0.14581525325775146,
      "learning_rate": 6.748025068294067e-05,
      "loss": 0.0479,
      "step": 25020
    },
    {
      "epoch": 0.0001527099609375,
      "model_forward_time": 0.11501669883728027,
      "step": 25020
    },
    {
      "epoch": 0.0001527099609375,
      "step": 25020,
      "training_step_time": 0.44838738441467285
    },
    {
      "epoch": 0.000152716064453125,
      "model_forward_time": 0.1150822639465332,
      "step": 25021
    },
    {
      "epoch": 0.000152716064453125,
      "step": 25021,
      "training_step_time": 0.40368175506591797
    },
    {
      "epoch": 0.00015272216796875,
      "model_forward_time": 0.11483454704284668,
      "step": 25022
    },
    {
      "epoch": 0.00015272216796875,
      "step": 25022,
      "training_step_time": 0.40279388427734375
    },
    {
      "epoch": 0.000152728271484375,
      "model_forward_time": 0.11648249626159668,
      "step": 25023
    },
    {
      "epoch": 0.000152728271484375,
      "step": 25023,
      "training_step_time": 0.39662790298461914
    },
    {
      "epoch": 0.000152734375,
      "model_forward_time": 0.11489152908325195,
      "step": 25024
    },
    {
      "epoch": 0.000152734375,
      "step": 25024,
      "training_step_time": 0.39657068252563477
    },
    {
      "epoch": 0.000152740478515625,
      "model_forward_time": 0.11739158630371094,
      "step": 25025
    },
    {
      "epoch": 0.000152740478515625,
      "step": 25025,
      "training_step_time": 0.3991992473602295
    },
    {
      "epoch": 0.00015274658203125,
      "model_forward_time": 0.1154625415802002,
      "step": 25026
    },
    {
      "epoch": 0.00015274658203125,
      "step": 25026,
      "training_step_time": 0.3945286273956299
    },
    {
      "epoch": 0.000152752685546875,
      "model_forward_time": 0.11580014228820801,
      "step": 25027
    },
    {
      "epoch": 0.000152752685546875,
      "step": 25027,
      "training_step_time": 0.43969178199768066
    },
    {
      "epoch": 0.0001527587890625,
      "model_forward_time": 0.11598873138427734,
      "step": 25028
    },
    {
      "epoch": 0.0001527587890625,
      "step": 25028,
      "training_step_time": 0.4016988277435303
    },
    {
      "epoch": 0.000152764892578125,
      "model_forward_time": 0.11522841453552246,
      "step": 25029
    },
    {
      "epoch": 0.000152764892578125,
      "step": 25029,
      "training_step_time": 0.39971423149108887
    },
    {
      "epoch": 0.00015277099609375,
      "grad_norm": 0.2103794664144516,
      "learning_rate": 6.745442917561309e-05,
      "loss": 0.0434,
      "step": 25030
    },
    {
      "epoch": 0.00015277099609375,
      "model_forward_time": 0.11555123329162598,
      "step": 25030
    },
    {
      "epoch": 0.00015277099609375,
      "step": 25030,
      "training_step_time": 0.39739346504211426
    },
    {
      "epoch": 0.000152777099609375,
      "model_forward_time": 0.11572504043579102,
      "step": 25031
    },
    {
      "epoch": 0.000152777099609375,
      "step": 25031,
      "training_step_time": 0.40844297409057617
    },
    {
      "epoch": 0.000152783203125,
      "model_forward_time": 0.1172337532043457,
      "step": 25032
    },
    {
      "epoch": 0.000152783203125,
      "step": 25032,
      "training_step_time": 0.40675973892211914
    },
    {
      "epoch": 0.000152789306640625,
      "model_forward_time": 0.1175379753112793,
      "step": 25033
    },
    {
      "epoch": 0.000152789306640625,
      "step": 25033,
      "training_step_time": 0.4902217388153076
    },
    {
      "epoch": 0.00015279541015625,
      "model_forward_time": 0.11942648887634277,
      "step": 25034
    },
    {
      "epoch": 0.00015279541015625,
      "step": 25034,
      "training_step_time": 0.40279626846313477
    },
    {
      "epoch": 0.000152801513671875,
      "model_forward_time": 0.11745381355285645,
      "step": 25035
    },
    {
      "epoch": 0.000152801513671875,
      "step": 25035,
      "training_step_time": 0.44504523277282715
    },
    {
      "epoch": 0.0001528076171875,
      "model_forward_time": 0.11726951599121094,
      "step": 25036
    },
    {
      "epoch": 0.0001528076171875,
      "step": 25036,
      "training_step_time": 0.4306182861328125
    },
    {
      "epoch": 0.000152813720703125,
      "model_forward_time": 0.11793971061706543,
      "step": 25037
    },
    {
      "epoch": 0.000152813720703125,
      "step": 25037,
      "training_step_time": 0.39025259017944336
    },
    {
      "epoch": 0.00015281982421875,
      "model_forward_time": 0.11703014373779297,
      "step": 25038
    },
    {
      "epoch": 0.00015281982421875,
      "step": 25038,
      "training_step_time": 0.3870563507080078
    },
    {
      "epoch": 0.000152825927734375,
      "model_forward_time": 0.11931824684143066,
      "step": 25039
    },
    {
      "epoch": 0.000152825927734375,
      "step": 25039,
      "training_step_time": 0.38473081588745117
    },
    {
      "epoch": 0.00015283203125,
      "grad_norm": 0.09563416987657547,
      "learning_rate": 6.742860236609077e-05,
      "loss": 0.0413,
      "step": 25040
    },
    {
      "epoch": 0.00015283203125,
      "model_forward_time": 0.11691021919250488,
      "step": 25040
    },
    {
      "epoch": 0.00015283203125,
      "step": 25040,
      "training_step_time": 0.37958359718322754
    },
    {
      "epoch": 0.000152838134765625,
      "model_forward_time": 0.11545944213867188,
      "step": 25041
    },
    {
      "epoch": 0.000152838134765625,
      "step": 25041,
      "training_step_time": 0.512470006942749
    },
    {
      "epoch": 0.00015284423828125,
      "model_forward_time": 0.11587023735046387,
      "step": 25042
    },
    {
      "epoch": 0.00015284423828125,
      "step": 25042,
      "training_step_time": 0.4382913112640381
    },
    {
      "epoch": 0.000152850341796875,
      "model_forward_time": 0.11557626724243164,
      "step": 25043
    },
    {
      "epoch": 0.000152850341796875,
      "step": 25043,
      "training_step_time": 0.4869117736816406
    },
    {
      "epoch": 0.0001528564453125,
      "model_forward_time": 0.11452794075012207,
      "step": 25044
    },
    {
      "epoch": 0.0001528564453125,
      "step": 25044,
      "training_step_time": 0.3920302391052246
    },
    {
      "epoch": 0.000152862548828125,
      "model_forward_time": 0.11525130271911621,
      "step": 25045
    },
    {
      "epoch": 0.000152862548828125,
      "step": 25045,
      "training_step_time": 0.3910815715789795
    },
    {
      "epoch": 0.00015286865234375,
      "model_forward_time": 0.11483049392700195,
      "step": 25046
    },
    {
      "epoch": 0.00015286865234375,
      "step": 25046,
      "training_step_time": 0.4713327884674072
    },
    {
      "epoch": 0.000152874755859375,
      "model_forward_time": 0.11547231674194336,
      "step": 25047
    },
    {
      "epoch": 0.000152874755859375,
      "step": 25047,
      "training_step_time": 0.49076128005981445
    },
    {
      "epoch": 0.000152880859375,
      "model_forward_time": 0.11537885665893555,
      "step": 25048
    },
    {
      "epoch": 0.000152880859375,
      "step": 25048,
      "training_step_time": 0.39508938789367676
    },
    {
      "epoch": 0.000152886962890625,
      "model_forward_time": 0.11554598808288574,
      "step": 25049
    },
    {
      "epoch": 0.000152886962890625,
      "step": 25049,
      "training_step_time": 0.4631943702697754
    },
    {
      "epoch": 0.00015289306640625,
      "grad_norm": 0.10866637527942657,
      "learning_rate": 6.740277026221923e-05,
      "loss": 0.0444,
      "step": 25050
    },
    {
      "epoch": 0.00015289306640625,
      "model_forward_time": 0.11593031883239746,
      "step": 25050
    },
    {
      "epoch": 0.00015289306640625,
      "step": 25050,
      "training_step_time": 0.4067056179046631
    },
    {
      "epoch": 0.000152899169921875,
      "model_forward_time": 0.11492657661437988,
      "step": 25051
    },
    {
      "epoch": 0.000152899169921875,
      "step": 25051,
      "training_step_time": 0.48409056663513184
    },
    {
      "epoch": 0.0001529052734375,
      "model_forward_time": 0.11482691764831543,
      "step": 25052
    },
    {
      "epoch": 0.0001529052734375,
      "step": 25052,
      "training_step_time": 0.3882174491882324
    },
    {
      "epoch": 0.000152911376953125,
      "model_forward_time": 0.11513638496398926,
      "step": 25053
    },
    {
      "epoch": 0.000152911376953125,
      "step": 25053,
      "training_step_time": 0.38834118843078613
    },
    {
      "epoch": 0.00015291748046875,
      "model_forward_time": 0.11557340621948242,
      "step": 25054
    },
    {
      "epoch": 0.00015291748046875,
      "step": 25054,
      "training_step_time": 0.41586947441101074
    },
    {
      "epoch": 0.000152923583984375,
      "model_forward_time": 0.11559581756591797,
      "step": 25055
    },
    {
      "epoch": 0.000152923583984375,
      "step": 25055,
      "training_step_time": 0.458632230758667
    },
    {
      "epoch": 0.0001529296875,
      "model_forward_time": 0.11554956436157227,
      "step": 25056
    },
    {
      "epoch": 0.0001529296875,
      "step": 25056,
      "training_step_time": 0.4108436107635498
    },
    {
      "epoch": 0.000152935791015625,
      "model_forward_time": 0.11540627479553223,
      "step": 25057
    },
    {
      "epoch": 0.000152935791015625,
      "step": 25057,
      "training_step_time": 0.39867377281188965
    },
    {
      "epoch": 0.00015294189453125,
      "model_forward_time": 0.11539506912231445,
      "step": 25058
    },
    {
      "epoch": 0.00015294189453125,
      "step": 25058,
      "training_step_time": 0.3930809497833252
    },
    {
      "epoch": 0.000152947998046875,
      "model_forward_time": 0.1164093017578125,
      "step": 25059
    },
    {
      "epoch": 0.000152947998046875,
      "step": 25059,
      "training_step_time": 0.3961925506591797
    },
    {
      "epoch": 0.0001529541015625,
      "grad_norm": 0.14373134076595306,
      "learning_rate": 6.737693287184557e-05,
      "loss": 0.0482,
      "step": 25060
    },
    {
      "epoch": 0.0001529541015625,
      "model_forward_time": 0.11603355407714844,
      "step": 25060
    },
    {
      "epoch": 0.0001529541015625,
      "step": 25060,
      "training_step_time": 0.37212276458740234
    },
    {
      "epoch": 0.000152960205078125,
      "model_forward_time": 0.11590981483459473,
      "step": 25061
    },
    {
      "epoch": 0.000152960205078125,
      "step": 25061,
      "training_step_time": 0.45754575729370117
    },
    {
      "epoch": 0.00015296630859375,
      "model_forward_time": 0.11603879928588867,
      "step": 25062
    },
    {
      "epoch": 0.00015296630859375,
      "step": 25062,
      "training_step_time": 0.4563162326812744
    },
    {
      "epoch": 0.000152972412109375,
      "model_forward_time": 0.11594295501708984,
      "step": 25063
    },
    {
      "epoch": 0.000152972412109375,
      "step": 25063,
      "training_step_time": 0.4535660743713379
    },
    {
      "epoch": 0.000152978515625,
      "model_forward_time": 0.11534333229064941,
      "step": 25064
    },
    {
      "epoch": 0.000152978515625,
      "step": 25064,
      "training_step_time": 0.385498046875
    },
    {
      "epoch": 0.000152984619140625,
      "model_forward_time": 0.11531686782836914,
      "step": 25065
    },
    {
      "epoch": 0.000152984619140625,
      "step": 25065,
      "training_step_time": 0.4439816474914551
    },
    {
      "epoch": 0.00015299072265625,
      "model_forward_time": 0.11770462989807129,
      "step": 25066
    },
    {
      "epoch": 0.00015299072265625,
      "step": 25066,
      "training_step_time": 0.4974806308746338
    },
    {
      "epoch": 0.000152996826171875,
      "model_forward_time": 0.11809873580932617,
      "step": 25067
    },
    {
      "epoch": 0.000152996826171875,
      "step": 25067,
      "training_step_time": 0.43698620796203613
    },
    {
      "epoch": 0.0001530029296875,
      "model_forward_time": 0.11808061599731445,
      "step": 25068
    },
    {
      "epoch": 0.0001530029296875,
      "step": 25068,
      "training_step_time": 0.414827823638916
    },
    {
      "epoch": 0.000153009033203125,
      "model_forward_time": 0.12460637092590332,
      "step": 25069
    },
    {
      "epoch": 0.000153009033203125,
      "step": 25069,
      "training_step_time": 0.44154810905456543
    },
    {
      "epoch": 0.00015301513671875,
      "grad_norm": 0.14566828310489655,
      "learning_rate": 6.735109020281852e-05,
      "loss": 0.044,
      "step": 25070
    },
    {
      "epoch": 0.00015301513671875,
      "model_forward_time": 0.1187753677368164,
      "step": 25070
    },
    {
      "epoch": 0.00015301513671875,
      "step": 25070,
      "training_step_time": 0.38283228874206543
    },
    {
      "epoch": 0.000153021240234375,
      "model_forward_time": 0.1161203384399414,
      "step": 25071
    },
    {
      "epoch": 0.000153021240234375,
      "step": 25071,
      "training_step_time": 0.4960148334503174
    },
    {
      "epoch": 0.00015302734375,
      "model_forward_time": 0.11542201042175293,
      "step": 25072
    },
    {
      "epoch": 0.00015302734375,
      "step": 25072,
      "training_step_time": 0.3972792625427246
    },
    {
      "epoch": 0.000153033447265625,
      "model_forward_time": 0.11563897132873535,
      "step": 25073
    },
    {
      "epoch": 0.000153033447265625,
      "step": 25073,
      "training_step_time": 0.40181875228881836
    },
    {
      "epoch": 0.00015303955078125,
      "model_forward_time": 0.11600780487060547,
      "step": 25074
    },
    {
      "epoch": 0.00015303955078125,
      "step": 25074,
      "training_step_time": 0.36853742599487305
    },
    {
      "epoch": 0.000153045654296875,
      "model_forward_time": 0.11566758155822754,
      "step": 25075
    },
    {
      "epoch": 0.000153045654296875,
      "step": 25075,
      "training_step_time": 0.4530608654022217
    },
    {
      "epoch": 0.0001530517578125,
      "model_forward_time": 0.11536788940429688,
      "step": 25076
    },
    {
      "epoch": 0.0001530517578125,
      "step": 25076,
      "training_step_time": 0.4027843475341797
    },
    {
      "epoch": 0.000153057861328125,
      "model_forward_time": 0.11560821533203125,
      "step": 25077
    },
    {
      "epoch": 0.000153057861328125,
      "step": 25077,
      "training_step_time": 0.4205608367919922
    },
    {
      "epoch": 0.00015306396484375,
      "model_forward_time": 0.11518335342407227,
      "step": 25078
    },
    {
      "epoch": 0.00015306396484375,
      "step": 25078,
      "training_step_time": 0.4249753952026367
    },
    {
      "epoch": 0.000153070068359375,
      "model_forward_time": 0.11549592018127441,
      "step": 25079
    },
    {
      "epoch": 0.000153070068359375,
      "step": 25079,
      "training_step_time": 0.4694175720214844
    },
    {
      "epoch": 0.000153076171875,
      "grad_norm": 0.11690228432416916,
      "learning_rate": 6.732524226298841e-05,
      "loss": 0.0422,
      "step": 25080
    },
    {
      "epoch": 0.000153076171875,
      "model_forward_time": 0.1177361011505127,
      "step": 25080
    },
    {
      "epoch": 0.000153076171875,
      "step": 25080,
      "training_step_time": 0.3949880599975586
    },
    {
      "epoch": 0.000153082275390625,
      "model_forward_time": 0.11572027206420898,
      "step": 25081
    },
    {
      "epoch": 0.000153082275390625,
      "step": 25081,
      "training_step_time": 0.4040510654449463
    },
    {
      "epoch": 0.00015308837890625,
      "model_forward_time": 0.1164548397064209,
      "step": 25082
    },
    {
      "epoch": 0.00015308837890625,
      "step": 25082,
      "training_step_time": 0.46231913566589355
    },
    {
      "epoch": 0.000153094482421875,
      "model_forward_time": 0.11548471450805664,
      "step": 25083
    },
    {
      "epoch": 0.000153094482421875,
      "step": 25083,
      "training_step_time": 0.4080009460449219
    },
    {
      "epoch": 0.0001531005859375,
      "model_forward_time": 0.11558222770690918,
      "step": 25084
    },
    {
      "epoch": 0.0001531005859375,
      "step": 25084,
      "training_step_time": 0.3959028720855713
    },
    {
      "epoch": 0.000153106689453125,
      "model_forward_time": 0.1150820255279541,
      "step": 25085
    },
    {
      "epoch": 0.000153106689453125,
      "step": 25085,
      "training_step_time": 0.4850146770477295
    },
    {
      "epoch": 0.00015311279296875,
      "model_forward_time": 0.11543869972229004,
      "step": 25086
    },
    {
      "epoch": 0.00015311279296875,
      "step": 25086,
      "training_step_time": 0.39504313468933105
    },
    {
      "epoch": 0.000153118896484375,
      "model_forward_time": 0.11555814743041992,
      "step": 25087
    },
    {
      "epoch": 0.000153118896484375,
      "step": 25087,
      "training_step_time": 0.3928194046020508
    },
    {
      "epoch": 0.000153125,
      "model_forward_time": 0.11551570892333984,
      "step": 25088
    },
    {
      "epoch": 0.000153125,
      "step": 25088,
      "training_step_time": 0.3906717300415039
    },
    {
      "epoch": 0.000153131103515625,
      "model_forward_time": 0.11643528938293457,
      "step": 25089
    },
    {
      "epoch": 0.000153131103515625,
      "step": 25089,
      "training_step_time": 0.49988436698913574
    },
    {
      "epoch": 0.00015313720703125,
      "grad_norm": 0.1778389811515808,
      "learning_rate": 6.729938906020713e-05,
      "loss": 0.0422,
      "step": 25090
    },
    {
      "epoch": 0.00015313720703125,
      "model_forward_time": 0.11538505554199219,
      "step": 25090
    },
    {
      "epoch": 0.00015313720703125,
      "step": 25090,
      "training_step_time": 0.5026123523712158
    },
    {
      "epoch": 0.000153143310546875,
      "model_forward_time": 0.11569046974182129,
      "step": 25091
    },
    {
      "epoch": 0.000153143310546875,
      "step": 25091,
      "training_step_time": 0.40917134284973145
    },
    {
      "epoch": 0.0001531494140625,
      "model_forward_time": 0.11527585983276367,
      "step": 25092
    },
    {
      "epoch": 0.0001531494140625,
      "step": 25092,
      "training_step_time": 0.42450666427612305
    },
    {
      "epoch": 0.000153155517578125,
      "model_forward_time": 0.11519622802734375,
      "step": 25093
    },
    {
      "epoch": 0.000153155517578125,
      "step": 25093,
      "training_step_time": 0.4745817184448242
    },
    {
      "epoch": 0.00015316162109375,
      "model_forward_time": 0.11556196212768555,
      "step": 25094
    },
    {
      "epoch": 0.00015316162109375,
      "step": 25094,
      "training_step_time": 0.38683485984802246
    },
    {
      "epoch": 0.000153167724609375,
      "model_forward_time": 0.11558413505554199,
      "step": 25095
    },
    {
      "epoch": 0.000153167724609375,
      "step": 25095,
      "training_step_time": 0.43979549407958984
    },
    {
      "epoch": 0.000153173828125,
      "model_forward_time": 0.11565208435058594,
      "step": 25096
    },
    {
      "epoch": 0.000153173828125,
      "step": 25096,
      "training_step_time": 0.38897228240966797
    },
    {
      "epoch": 0.000153179931640625,
      "model_forward_time": 0.1155242919921875,
      "step": 25097
    },
    {
      "epoch": 0.000153179931640625,
      "step": 25097,
      "training_step_time": 0.39436864852905273
    },
    {
      "epoch": 0.00015318603515625,
      "model_forward_time": 0.1152803897857666,
      "step": 25098
    },
    {
      "epoch": 0.00015318603515625,
      "step": 25098,
      "training_step_time": 0.42194366455078125
    },
    {
      "epoch": 0.000153192138671875,
      "model_forward_time": 0.11699700355529785,
      "step": 25099
    },
    {
      "epoch": 0.000153192138671875,
      "step": 25099,
      "training_step_time": 0.40378570556640625
    },
    {
      "epoch": 0.0001531982421875,
      "grad_norm": 0.19651754200458527,
      "learning_rate": 6.727353060232822e-05,
      "loss": 0.0462,
      "step": 25100
    },
    {
      "epoch": 0.0001531982421875,
      "model_forward_time": 0.11614727973937988,
      "step": 25100
    },
    {
      "epoch": 0.0001531982421875,
      "step": 25100,
      "training_step_time": 0.49254584312438965
    },
    {
      "epoch": 0.000153204345703125,
      "model_forward_time": 0.11756443977355957,
      "step": 25101
    },
    {
      "epoch": 0.000153204345703125,
      "step": 25101,
      "training_step_time": 0.3884308338165283
    },
    {
      "epoch": 0.00015321044921875,
      "model_forward_time": 0.11503720283508301,
      "step": 25102
    },
    {
      "epoch": 0.00015321044921875,
      "step": 25102,
      "training_step_time": 0.40494370460510254
    },
    {
      "epoch": 0.000153216552734375,
      "model_forward_time": 0.11645913124084473,
      "step": 25103
    },
    {
      "epoch": 0.000153216552734375,
      "step": 25103,
      "training_step_time": 0.4252779483795166
    },
    {
      "epoch": 0.00015322265625,
      "model_forward_time": 0.11604976654052734,
      "step": 25104
    },
    {
      "epoch": 0.00015322265625,
      "step": 25104,
      "training_step_time": 0.512622594833374
    },
    {
      "epoch": 0.000153228759765625,
      "model_forward_time": 0.11602282524108887,
      "step": 25105
    },
    {
      "epoch": 0.000153228759765625,
      "step": 25105,
      "training_step_time": 0.47238945960998535
    },
    {
      "epoch": 0.00015323486328125,
      "model_forward_time": 0.1155405044555664,
      "step": 25106
    },
    {
      "epoch": 0.00015323486328125,
      "step": 25106,
      "training_step_time": 0.5263752937316895
    },
    {
      "epoch": 0.000153240966796875,
      "model_forward_time": 0.11561870574951172,
      "step": 25107
    },
    {
      "epoch": 0.000153240966796875,
      "step": 25107,
      "training_step_time": 0.3961372375488281
    },
    {
      "epoch": 0.0001532470703125,
      "model_forward_time": 0.11661601066589355,
      "step": 25108
    },
    {
      "epoch": 0.0001532470703125,
      "step": 25108,
      "training_step_time": 0.4166874885559082
    },
    {
      "epoch": 0.000153253173828125,
      "model_forward_time": 0.11547064781188965,
      "step": 25109
    },
    {
      "epoch": 0.000153253173828125,
      "step": 25109,
      "training_step_time": 0.4613511562347412
    },
    {
      "epoch": 0.00015325927734375,
      "grad_norm": 0.16642045974731445,
      "learning_rate": 6.72476668972068e-05,
      "loss": 0.0503,
      "step": 25110
    },
    {
      "epoch": 0.00015325927734375,
      "model_forward_time": 0.11746478080749512,
      "step": 25110
    },
    {
      "epoch": 0.00015325927734375,
      "step": 25110,
      "training_step_time": 0.4090132713317871
    },
    {
      "epoch": 0.000153265380859375,
      "model_forward_time": 0.1153719425201416,
      "step": 25111
    },
    {
      "epoch": 0.000153265380859375,
      "step": 25111,
      "training_step_time": 0.3922154903411865
    },
    {
      "epoch": 0.000153271484375,
      "model_forward_time": 0.11613249778747559,
      "step": 25112
    },
    {
      "epoch": 0.000153271484375,
      "step": 25112,
      "training_step_time": 0.6072368621826172
    },
    {
      "epoch": 0.000153277587890625,
      "model_forward_time": 0.11556100845336914,
      "step": 25113
    },
    {
      "epoch": 0.000153277587890625,
      "step": 25113,
      "training_step_time": 0.4297642707824707
    },
    {
      "epoch": 0.00015328369140625,
      "model_forward_time": 0.11551094055175781,
      "step": 25114
    },
    {
      "epoch": 0.00015328369140625,
      "step": 25114,
      "training_step_time": 0.38676023483276367
    },
    {
      "epoch": 0.000153289794921875,
      "model_forward_time": 0.11611580848693848,
      "step": 25115
    },
    {
      "epoch": 0.000153289794921875,
      "step": 25115,
      "training_step_time": 0.39730119705200195
    },
    {
      "epoch": 0.0001532958984375,
      "model_forward_time": 0.11526823043823242,
      "step": 25116
    },
    {
      "epoch": 0.0001532958984375,
      "step": 25116,
      "training_step_time": 0.39994096755981445
    },
    {
      "epoch": 0.000153302001953125,
      "model_forward_time": 0.11589479446411133,
      "step": 25117
    },
    {
      "epoch": 0.000153302001953125,
      "step": 25117,
      "training_step_time": 0.501262903213501
    },
    {
      "epoch": 0.00015330810546875,
      "model_forward_time": 0.11585092544555664,
      "step": 25118
    },
    {
      "epoch": 0.00015330810546875,
      "step": 25118,
      "training_step_time": 0.6267764568328857
    },
    {
      "epoch": 0.000153314208984375,
      "model_forward_time": 0.11525988578796387,
      "step": 25119
    },
    {
      "epoch": 0.000153314208984375,
      "step": 25119,
      "training_step_time": 0.4114999771118164
    },
    {
      "epoch": 0.0001533203125,
      "grad_norm": 0.19608525931835175,
      "learning_rate": 6.722179795269956e-05,
      "loss": 0.049,
      "step": 25120
    },
    {
      "epoch": 0.0001533203125,
      "model_forward_time": 0.11516833305358887,
      "step": 25120
    },
    {
      "epoch": 0.0001533203125,
      "step": 25120,
      "training_step_time": 0.48659515380859375
    },
    {
      "epoch": 0.000153326416015625,
      "model_forward_time": 0.11532354354858398,
      "step": 25121
    },
    {
      "epoch": 0.000153326416015625,
      "step": 25121,
      "training_step_time": 0.3959338665008545
    },
    {
      "epoch": 0.00015333251953125,
      "model_forward_time": 0.11449790000915527,
      "step": 25122
    },
    {
      "epoch": 0.00015333251953125,
      "step": 25122,
      "training_step_time": 0.39905214309692383
    },
    {
      "epoch": 0.000153338623046875,
      "model_forward_time": 0.11545372009277344,
      "step": 25123
    },
    {
      "epoch": 0.000153338623046875,
      "step": 25123,
      "training_step_time": 0.3891463279724121
    },
    {
      "epoch": 0.0001533447265625,
      "model_forward_time": 0.11587190628051758,
      "step": 25124
    },
    {
      "epoch": 0.0001533447265625,
      "step": 25124,
      "training_step_time": 0.5562748908996582
    },
    {
      "epoch": 0.000153350830078125,
      "model_forward_time": 0.11541557312011719,
      "step": 25125
    },
    {
      "epoch": 0.000153350830078125,
      "step": 25125,
      "training_step_time": 0.47086572647094727
    },
    {
      "epoch": 0.00015335693359375,
      "model_forward_time": 0.11552834510803223,
      "step": 25126
    },
    {
      "epoch": 0.00015335693359375,
      "step": 25126,
      "training_step_time": 0.38983988761901855
    },
    {
      "epoch": 0.000153363037109375,
      "model_forward_time": 0.11507391929626465,
      "step": 25127
    },
    {
      "epoch": 0.000153363037109375,
      "step": 25127,
      "training_step_time": 0.4198324680328369
    },
    {
      "epoch": 0.000153369140625,
      "model_forward_time": 0.11490011215209961,
      "step": 25128
    },
    {
      "epoch": 0.000153369140625,
      "step": 25128,
      "training_step_time": 0.40363502502441406
    },
    {
      "epoch": 0.000153375244140625,
      "model_forward_time": 0.1157841682434082,
      "step": 25129
    },
    {
      "epoch": 0.000153375244140625,
      "step": 25129,
      "training_step_time": 0.3955872058868408
    },
    {
      "epoch": 0.00015338134765625,
      "grad_norm": 0.1518237441778183,
      "learning_rate": 6.719592377666483e-05,
      "loss": 0.0424,
      "step": 25130
    },
    {
      "epoch": 0.00015338134765625,
      "model_forward_time": 0.1153252124786377,
      "step": 25130
    },
    {
      "epoch": 0.00015338134765625,
      "step": 25130,
      "training_step_time": 0.5576746463775635
    },
    {
      "epoch": 0.000153387451171875,
      "model_forward_time": 0.11555123329162598,
      "step": 25131
    },
    {
      "epoch": 0.000153387451171875,
      "step": 25131,
      "training_step_time": 0.43977999687194824
    },
    {
      "epoch": 0.0001533935546875,
      "model_forward_time": 0.11918282508850098,
      "step": 25132
    },
    {
      "epoch": 0.0001533935546875,
      "step": 25132,
      "training_step_time": 0.47119712829589844
    },
    {
      "epoch": 0.000153399658203125,
      "model_forward_time": 0.11745834350585938,
      "step": 25133
    },
    {
      "epoch": 0.000153399658203125,
      "step": 25133,
      "training_step_time": 0.42467570304870605
    },
    {
      "epoch": 0.00015340576171875,
      "model_forward_time": 0.11679267883300781,
      "step": 25134
    },
    {
      "epoch": 0.00015340576171875,
      "step": 25134,
      "training_step_time": 0.43416523933410645
    },
    {
      "epoch": 0.000153411865234375,
      "model_forward_time": 0.1168067455291748,
      "step": 25135
    },
    {
      "epoch": 0.000153411865234375,
      "step": 25135,
      "training_step_time": 0.3771045207977295
    },
    {
      "epoch": 0.00015341796875,
      "model_forward_time": 0.11853313446044922,
      "step": 25136
    },
    {
      "epoch": 0.00015341796875,
      "step": 25136,
      "training_step_time": 0.588716983795166
    },
    {
      "epoch": 0.000153424072265625,
      "model_forward_time": 0.11784577369689941,
      "step": 25137
    },
    {
      "epoch": 0.000153424072265625,
      "step": 25137,
      "training_step_time": 0.3749048709869385
    },
    {
      "epoch": 0.00015343017578125,
      "model_forward_time": 0.1164865493774414,
      "step": 25138
    },
    {
      "epoch": 0.00015343017578125,
      "step": 25138,
      "training_step_time": 0.3779618740081787
    },
    {
      "epoch": 0.000153436279296875,
      "model_forward_time": 0.11848735809326172,
      "step": 25139
    },
    {
      "epoch": 0.000153436279296875,
      "step": 25139,
      "training_step_time": 0.38509654998779297
    },
    {
      "epoch": 0.0001534423828125,
      "grad_norm": 0.1704850047826767,
      "learning_rate": 6.71700443769625e-05,
      "loss": 0.0451,
      "step": 25140
    },
    {
      "epoch": 0.0001534423828125,
      "model_forward_time": 0.11598634719848633,
      "step": 25140
    },
    {
      "epoch": 0.0001534423828125,
      "step": 25140,
      "training_step_time": 0.4454364776611328
    },
    {
      "epoch": 0.000153448486328125,
      "model_forward_time": 0.11506009101867676,
      "step": 25141
    },
    {
      "epoch": 0.000153448486328125,
      "step": 25141,
      "training_step_time": 0.41619229316711426
    },
    {
      "epoch": 0.00015345458984375,
      "model_forward_time": 0.11584711074829102,
      "step": 25142
    },
    {
      "epoch": 0.00015345458984375,
      "step": 25142,
      "training_step_time": 0.6338069438934326
    },
    {
      "epoch": 0.000153460693359375,
      "model_forward_time": 0.11516308784484863,
      "step": 25143
    },
    {
      "epoch": 0.000153460693359375,
      "step": 25143,
      "training_step_time": 0.3628873825073242
    },
    {
      "epoch": 0.000153466796875,
      "model_forward_time": 0.11542415618896484,
      "step": 25144
    },
    {
      "epoch": 0.000153466796875,
      "step": 25144,
      "training_step_time": 0.434739351272583
    },
    {
      "epoch": 0.000153472900390625,
      "model_forward_time": 0.11503195762634277,
      "step": 25145
    },
    {
      "epoch": 0.000153472900390625,
      "step": 25145,
      "training_step_time": 0.3968045711517334
    },
    {
      "epoch": 0.00015347900390625,
      "model_forward_time": 0.11568450927734375,
      "step": 25146
    },
    {
      "epoch": 0.00015347900390625,
      "step": 25146,
      "training_step_time": 0.4374687671661377
    },
    {
      "epoch": 0.000153485107421875,
      "model_forward_time": 0.11485981941223145,
      "step": 25147
    },
    {
      "epoch": 0.000153485107421875,
      "step": 25147,
      "training_step_time": 0.4412879943847656
    },
    {
      "epoch": 0.0001534912109375,
      "model_forward_time": 0.11550045013427734,
      "step": 25148
    },
    {
      "epoch": 0.0001534912109375,
      "step": 25148,
      "training_step_time": 0.38513946533203125
    },
    {
      "epoch": 0.000153497314453125,
      "model_forward_time": 0.11645865440368652,
      "step": 25149
    },
    {
      "epoch": 0.000153497314453125,
      "step": 25149,
      "training_step_time": 0.38675379753112793
    },
    {
      "epoch": 0.00015350341796875,
      "grad_norm": 0.1242009699344635,
      "learning_rate": 6.714415976145402e-05,
      "loss": 0.0513,
      "step": 25150
    },
    {
      "epoch": 0.00015350341796875,
      "model_forward_time": 0.11619138717651367,
      "step": 25150
    },
    {
      "epoch": 0.00015350341796875,
      "step": 25150,
      "training_step_time": 0.39052462577819824
    },
    {
      "epoch": 0.000153509521484375,
      "model_forward_time": 0.11614298820495605,
      "step": 25151
    },
    {
      "epoch": 0.000153509521484375,
      "step": 25151,
      "training_step_time": 0.4125242233276367
    },
    {
      "epoch": 0.000153515625,
      "model_forward_time": 0.1158604621887207,
      "step": 25152
    },
    {
      "epoch": 0.000153515625,
      "step": 25152,
      "training_step_time": 0.3945343494415283
    },
    {
      "epoch": 0.000153521728515625,
      "model_forward_time": 0.11749553680419922,
      "step": 25153
    },
    {
      "epoch": 0.000153521728515625,
      "step": 25153,
      "training_step_time": 0.45951318740844727
    },
    {
      "epoch": 0.00015352783203125,
      "model_forward_time": 0.1161031723022461,
      "step": 25154
    },
    {
      "epoch": 0.00015352783203125,
      "step": 25154,
      "training_step_time": 0.5938534736633301
    },
    {
      "epoch": 0.000153533935546875,
      "model_forward_time": 0.11568689346313477,
      "step": 25155
    },
    {
      "epoch": 0.000153533935546875,
      "step": 25155,
      "training_step_time": 0.39105725288391113
    },
    {
      "epoch": 0.0001535400390625,
      "model_forward_time": 0.11643648147583008,
      "step": 25156
    },
    {
      "epoch": 0.0001535400390625,
      "step": 25156,
      "training_step_time": 0.39313173294067383
    },
    {
      "epoch": 0.000153546142578125,
      "model_forward_time": 0.11525201797485352,
      "step": 25157
    },
    {
      "epoch": 0.000153546142578125,
      "step": 25157,
      "training_step_time": 0.36606383323669434
    },
    {
      "epoch": 0.00015355224609375,
      "model_forward_time": 0.11593508720397949,
      "step": 25158
    },
    {
      "epoch": 0.00015355224609375,
      "step": 25158,
      "training_step_time": 0.3913435935974121
    },
    {
      "epoch": 0.000153558349609375,
      "model_forward_time": 0.11523127555847168,
      "step": 25159
    },
    {
      "epoch": 0.000153558349609375,
      "step": 25159,
      "training_step_time": 0.4537813663482666
    },
    {
      "epoch": 0.000153564453125,
      "grad_norm": 0.2343466430902481,
      "learning_rate": 6.711826993800248e-05,
      "loss": 0.0452,
      "step": 25160
    },
    {
      "epoch": 0.000153564453125,
      "model_forward_time": 0.11614203453063965,
      "step": 25160
    },
    {
      "epoch": 0.000153564453125,
      "step": 25160,
      "training_step_time": 0.49797654151916504
    },
    {
      "epoch": 0.000153570556640625,
      "model_forward_time": 0.11490488052368164,
      "step": 25161
    },
    {
      "epoch": 0.000153570556640625,
      "step": 25161,
      "training_step_time": 0.42838025093078613
    },
    {
      "epoch": 0.00015357666015625,
      "model_forward_time": 0.11505508422851562,
      "step": 25162
    },
    {
      "epoch": 0.00015357666015625,
      "step": 25162,
      "training_step_time": 0.38683247566223145
    },
    {
      "epoch": 0.000153582763671875,
      "model_forward_time": 0.11559605598449707,
      "step": 25163
    },
    {
      "epoch": 0.000153582763671875,
      "step": 25163,
      "training_step_time": 0.38292574882507324
    },
    {
      "epoch": 0.0001535888671875,
      "model_forward_time": 0.11511707305908203,
      "step": 25164
    },
    {
      "epoch": 0.0001535888671875,
      "step": 25164,
      "training_step_time": 0.40869903564453125
    },
    {
      "epoch": 0.000153594970703125,
      "model_forward_time": 0.11550307273864746,
      "step": 25165
    },
    {
      "epoch": 0.000153594970703125,
      "step": 25165,
      "training_step_time": 0.399094820022583
    },
    {
      "epoch": 0.00015360107421875,
      "model_forward_time": 0.11559224128723145,
      "step": 25166
    },
    {
      "epoch": 0.00015360107421875,
      "step": 25166,
      "training_step_time": 0.5989933013916016
    },
    {
      "epoch": 0.000153607177734375,
      "model_forward_time": 0.11549496650695801,
      "step": 25167
    },
    {
      "epoch": 0.000153607177734375,
      "step": 25167,
      "training_step_time": 0.4517369270324707
    },
    {
      "epoch": 0.00015361328125,
      "model_forward_time": 0.11544203758239746,
      "step": 25168
    },
    {
      "epoch": 0.00015361328125,
      "step": 25168,
      "training_step_time": 0.39937472343444824
    },
    {
      "epoch": 0.000153619384765625,
      "model_forward_time": 0.11568975448608398,
      "step": 25169
    },
    {
      "epoch": 0.000153619384765625,
      "step": 25169,
      "training_step_time": 0.41723012924194336
    },
    {
      "epoch": 0.00015362548828125,
      "grad_norm": 0.19888684153556824,
      "learning_rate": 6.709237491447249e-05,
      "loss": 0.0449,
      "step": 25170
    },
    {
      "epoch": 0.00015362548828125,
      "model_forward_time": 0.11601972579956055,
      "step": 25170
    },
    {
      "epoch": 0.00015362548828125,
      "step": 25170,
      "training_step_time": 0.39667677879333496
    },
    {
      "epoch": 0.000153631591796875,
      "model_forward_time": 0.11540436744689941,
      "step": 25171
    },
    {
      "epoch": 0.000153631591796875,
      "step": 25171,
      "training_step_time": 0.39920520782470703
    },
    {
      "epoch": 0.0001536376953125,
      "model_forward_time": 0.11583995819091797,
      "step": 25172
    },
    {
      "epoch": 0.0001536376953125,
      "step": 25172,
      "training_step_time": 0.5790386199951172
    },
    {
      "epoch": 0.000153643798828125,
      "model_forward_time": 0.11610627174377441,
      "step": 25173
    },
    {
      "epoch": 0.000153643798828125,
      "step": 25173,
      "training_step_time": 0.45543551445007324
    },
    {
      "epoch": 0.00015364990234375,
      "model_forward_time": 0.1155097484588623,
      "step": 25174
    },
    {
      "epoch": 0.00015364990234375,
      "step": 25174,
      "training_step_time": 0.4198164939880371
    },
    {
      "epoch": 0.000153656005859375,
      "model_forward_time": 0.11521458625793457,
      "step": 25175
    },
    {
      "epoch": 0.000153656005859375,
      "step": 25175,
      "training_step_time": 0.46811819076538086
    },
    {
      "epoch": 0.000153662109375,
      "model_forward_time": 0.11529421806335449,
      "step": 25176
    },
    {
      "epoch": 0.000153662109375,
      "step": 25176,
      "training_step_time": 0.4040496349334717
    },
    {
      "epoch": 0.000153668212890625,
      "model_forward_time": 0.12139654159545898,
      "step": 25177
    },
    {
      "epoch": 0.000153668212890625,
      "step": 25177,
      "training_step_time": 0.3881256580352783
    },
    {
      "epoch": 0.00015367431640625,
      "model_forward_time": 0.1167757511138916,
      "step": 25178
    },
    {
      "epoch": 0.00015367431640625,
      "step": 25178,
      "training_step_time": 0.5191121101379395
    },
    {
      "epoch": 0.000153680419921875,
      "model_forward_time": 0.11536073684692383,
      "step": 25179
    },
    {
      "epoch": 0.000153680419921875,
      "step": 25179,
      "training_step_time": 0.39688825607299805
    },
    {
      "epoch": 0.0001536865234375,
      "grad_norm": 0.1549924910068512,
      "learning_rate": 6.706647469873031e-05,
      "loss": 0.0432,
      "step": 25180
    },
    {
      "epoch": 0.0001536865234375,
      "model_forward_time": 0.1151115894317627,
      "step": 25180
    },
    {
      "epoch": 0.0001536865234375,
      "step": 25180,
      "training_step_time": 0.3982987403869629
    },
    {
      "epoch": 0.000153692626953125,
      "model_forward_time": 0.1155695915222168,
      "step": 25181
    },
    {
      "epoch": 0.000153692626953125,
      "step": 25181,
      "training_step_time": 0.4313075542449951
    },
    {
      "epoch": 0.00015369873046875,
      "model_forward_time": 0.11553597450256348,
      "step": 25182
    },
    {
      "epoch": 0.00015369873046875,
      "step": 25182,
      "training_step_time": 0.4311354160308838
    },
    {
      "epoch": 0.000153704833984375,
      "model_forward_time": 0.11527013778686523,
      "step": 25183
    },
    {
      "epoch": 0.000153704833984375,
      "step": 25183,
      "training_step_time": 0.39786219596862793
    },
    {
      "epoch": 0.0001537109375,
      "model_forward_time": 0.11584925651550293,
      "step": 25184
    },
    {
      "epoch": 0.0001537109375,
      "step": 25184,
      "training_step_time": 0.5836203098297119
    },
    {
      "epoch": 0.000153717041015625,
      "model_forward_time": 0.11462044715881348,
      "step": 25185
    },
    {
      "epoch": 0.000153717041015625,
      "step": 25185,
      "training_step_time": 0.3681192398071289
    },
    {
      "epoch": 0.00015372314453125,
      "model_forward_time": 0.11498069763183594,
      "step": 25186
    },
    {
      "epoch": 0.00015372314453125,
      "step": 25186,
      "training_step_time": 0.44048190116882324
    },
    {
      "epoch": 0.000153729248046875,
      "model_forward_time": 0.11477112770080566,
      "step": 25187
    },
    {
      "epoch": 0.000153729248046875,
      "step": 25187,
      "training_step_time": 0.4028036594390869
    },
    {
      "epoch": 0.0001537353515625,
      "model_forward_time": 0.11493587493896484,
      "step": 25188
    },
    {
      "epoch": 0.0001537353515625,
      "step": 25188,
      "training_step_time": 0.41806936264038086
    },
    {
      "epoch": 0.000153741455078125,
      "model_forward_time": 0.1153104305267334,
      "step": 25189
    },
    {
      "epoch": 0.000153741455078125,
      "step": 25189,
      "training_step_time": 0.48921942710876465
    },
    {
      "epoch": 0.00015374755859375,
      "grad_norm": 0.1221795305609703,
      "learning_rate": 6.704056929864376e-05,
      "loss": 0.0418,
      "step": 25190
    },
    {
      "epoch": 0.00015374755859375,
      "model_forward_time": 0.11541891098022461,
      "step": 25190
    },
    {
      "epoch": 0.00015374755859375,
      "step": 25190,
      "training_step_time": 0.45160627365112305
    },
    {
      "epoch": 0.000153753662109375,
      "model_forward_time": 0.11551189422607422,
      "step": 25191
    },
    {
      "epoch": 0.000153753662109375,
      "step": 25191,
      "training_step_time": 0.3837122917175293
    },
    {
      "epoch": 0.000153759765625,
      "model_forward_time": 0.1154937744140625,
      "step": 25192
    },
    {
      "epoch": 0.000153759765625,
      "step": 25192,
      "training_step_time": 0.3943154811859131
    },
    {
      "epoch": 0.000153765869140625,
      "model_forward_time": 0.11525821685791016,
      "step": 25193
    },
    {
      "epoch": 0.000153765869140625,
      "step": 25193,
      "training_step_time": 0.393604040145874
    },
    {
      "epoch": 0.00015377197265625,
      "model_forward_time": 0.11605358123779297,
      "step": 25194
    },
    {
      "epoch": 0.00015377197265625,
      "step": 25194,
      "training_step_time": 0.39366745948791504
    },
    {
      "epoch": 0.000153778076171875,
      "model_forward_time": 0.11460399627685547,
      "step": 25195
    },
    {
      "epoch": 0.000153778076171875,
      "step": 25195,
      "training_step_time": 0.39835357666015625
    },
    {
      "epoch": 0.0001537841796875,
      "model_forward_time": 0.11527276039123535,
      "step": 25196
    },
    {
      "epoch": 0.0001537841796875,
      "step": 25196,
      "training_step_time": 0.9079222679138184
    },
    {
      "epoch": 0.000153790283203125,
      "model_forward_time": 0.11507964134216309,
      "step": 25197
    },
    {
      "epoch": 0.000153790283203125,
      "step": 25197,
      "training_step_time": 0.38252806663513184
    },
    {
      "epoch": 0.00015379638671875,
      "model_forward_time": 0.11441159248352051,
      "step": 25198
    },
    {
      "epoch": 0.00015379638671875,
      "step": 25198,
      "training_step_time": 0.3931248188018799
    },
    {
      "epoch": 0.000153802490234375,
      "model_forward_time": 0.11438703536987305,
      "step": 25199
    },
    {
      "epoch": 0.000153802490234375,
      "step": 25199,
      "training_step_time": 0.49451208114624023
    },
    {
      "epoch": 0.00015380859375,
      "grad_norm": 0.11061792075634003,
      "learning_rate": 6.701465872208216e-05,
      "loss": 0.0451,
      "step": 25200
    },
    {
      "epoch": 0.00015380859375,
      "model_forward_time": 0.1147770881652832,
      "step": 25200
    },
    {
      "epoch": 0.00015380859375,
      "step": 25200,
      "training_step_time": 0.4652426242828369
    },
    {
      "epoch": 0.000153814697265625,
      "model_forward_time": 0.11752963066101074,
      "step": 25201
    },
    {
      "epoch": 0.000153814697265625,
      "step": 25201,
      "training_step_time": 0.37754392623901367
    },
    {
      "epoch": 0.00015382080078125,
      "model_forward_time": 0.11771750450134277,
      "step": 25202
    },
    {
      "epoch": 0.00015382080078125,
      "step": 25202,
      "training_step_time": 0.5264794826507568
    },
    {
      "epoch": 0.000153826904296875,
      "model_forward_time": 0.11819052696228027,
      "step": 25203
    },
    {
      "epoch": 0.000153826904296875,
      "step": 25203,
      "training_step_time": 0.37871336936950684
    },
    {
      "epoch": 0.0001538330078125,
      "model_forward_time": 0.11777591705322266,
      "step": 25204
    },
    {
      "epoch": 0.0001538330078125,
      "step": 25204,
      "training_step_time": 0.3840160369873047
    },
    {
      "epoch": 0.000153839111328125,
      "model_forward_time": 0.11801910400390625,
      "step": 25205
    },
    {
      "epoch": 0.000153839111328125,
      "step": 25205,
      "training_step_time": 0.3807401657104492
    },
    {
      "epoch": 0.00015384521484375,
      "model_forward_time": 0.11490893363952637,
      "step": 25206
    },
    {
      "epoch": 0.00015384521484375,
      "step": 25206,
      "training_step_time": 0.39721179008483887
    },
    {
      "epoch": 0.000153851318359375,
      "model_forward_time": 0.11521267890930176,
      "step": 25207
    },
    {
      "epoch": 0.000153851318359375,
      "step": 25207,
      "training_step_time": 0.39472293853759766
    },
    {
      "epoch": 0.000153857421875,
      "model_forward_time": 0.1151273250579834,
      "step": 25208
    },
    {
      "epoch": 0.000153857421875,
      "step": 25208,
      "training_step_time": 0.7125084400177002
    },
    {
      "epoch": 0.000153863525390625,
      "model_forward_time": 0.11519765853881836,
      "step": 25209
    },
    {
      "epoch": 0.000153863525390625,
      "step": 25209,
      "training_step_time": 0.4071164131164551
    },
    {
      "epoch": 0.00015386962890625,
      "grad_norm": 0.14810538291931152,
      "learning_rate": 6.69887429769165e-05,
      "loss": 0.0392,
      "step": 25210
    },
    {
      "epoch": 0.00015386962890625,
      "model_forward_time": 0.11423540115356445,
      "step": 25210
    },
    {
      "epoch": 0.00015386962890625,
      "step": 25210,
      "training_step_time": 0.41326236724853516
    },
    {
      "epoch": 0.000153875732421875,
      "model_forward_time": 0.11430883407592773,
      "step": 25211
    },
    {
      "epoch": 0.000153875732421875,
      "step": 25211,
      "training_step_time": 0.4029502868652344
    },
    {
      "epoch": 0.0001538818359375,
      "model_forward_time": 0.11469459533691406,
      "step": 25212
    },
    {
      "epoch": 0.0001538818359375,
      "step": 25212,
      "training_step_time": 0.36313462257385254
    },
    {
      "epoch": 0.000153887939453125,
      "model_forward_time": 0.11447429656982422,
      "step": 25213
    },
    {
      "epoch": 0.000153887939453125,
      "step": 25213,
      "training_step_time": 0.4390437602996826
    },
    {
      "epoch": 0.00015389404296875,
      "model_forward_time": 0.11581778526306152,
      "step": 25214
    },
    {
      "epoch": 0.00015389404296875,
      "step": 25214,
      "training_step_time": 0.5509626865386963
    },
    {
      "epoch": 0.000153900146484375,
      "model_forward_time": 0.11487126350402832,
      "step": 25215
    },
    {
      "epoch": 0.000153900146484375,
      "step": 25215,
      "training_step_time": 0.446117639541626
    },
    {
      "epoch": 0.00015390625,
      "model_forward_time": 0.1160895824432373,
      "step": 25216
    },
    {
      "epoch": 0.00015390625,
      "step": 25216,
      "training_step_time": 0.40776824951171875
    },
    {
      "epoch": 0.000153912353515625,
      "model_forward_time": 0.11458206176757812,
      "step": 25217
    },
    {
      "epoch": 0.000153912353515625,
      "step": 25217,
      "training_step_time": 0.3849191665649414
    },
    {
      "epoch": 0.00015391845703125,
      "model_forward_time": 0.11786699295043945,
      "step": 25218
    },
    {
      "epoch": 0.00015391845703125,
      "step": 25218,
      "training_step_time": 0.40134644508361816
    },
    {
      "epoch": 0.000153924560546875,
      "model_forward_time": 0.11422991752624512,
      "step": 25219
    },
    {
      "epoch": 0.000153924560546875,
      "step": 25219,
      "training_step_time": 0.39845943450927734
    },
    {
      "epoch": 0.0001539306640625,
      "grad_norm": 0.10765696316957474,
      "learning_rate": 6.696282207101928e-05,
      "loss": 0.0432,
      "step": 25220
    },
    {
      "epoch": 0.0001539306640625,
      "model_forward_time": 0.11503052711486816,
      "step": 25220
    },
    {
      "epoch": 0.0001539306640625,
      "step": 25220,
      "training_step_time": 0.5736889839172363
    },
    {
      "epoch": 0.000153936767578125,
      "model_forward_time": 0.11564183235168457,
      "step": 25221
    },
    {
      "epoch": 0.000153936767578125,
      "step": 25221,
      "training_step_time": 0.3870372772216797
    },
    {
      "epoch": 0.00015394287109375,
      "model_forward_time": 0.11615109443664551,
      "step": 25222
    },
    {
      "epoch": 0.00015394287109375,
      "step": 25222,
      "training_step_time": 0.3889932632446289
    },
    {
      "epoch": 0.000153948974609375,
      "model_forward_time": 0.11469531059265137,
      "step": 25223
    },
    {
      "epoch": 0.000153948974609375,
      "step": 25223,
      "training_step_time": 0.4256455898284912
    },
    {
      "epoch": 0.000153955078125,
      "model_forward_time": 0.11684989929199219,
      "step": 25224
    },
    {
      "epoch": 0.000153955078125,
      "step": 25224,
      "training_step_time": 0.4827537536621094
    },
    {
      "epoch": 0.000153961181640625,
      "model_forward_time": 0.11439824104309082,
      "step": 25225
    },
    {
      "epoch": 0.000153961181640625,
      "step": 25225,
      "training_step_time": 0.4179093837738037
    },
    {
      "epoch": 0.00015396728515625,
      "model_forward_time": 0.11485695838928223,
      "step": 25226
    },
    {
      "epoch": 0.00015396728515625,
      "step": 25226,
      "training_step_time": 0.5985734462738037
    },
    {
      "epoch": 0.000153973388671875,
      "model_forward_time": 0.11465048789978027,
      "step": 25227
    },
    {
      "epoch": 0.000153973388671875,
      "step": 25227,
      "training_step_time": 0.439769983291626
    },
    {
      "epoch": 0.0001539794921875,
      "model_forward_time": 0.11477375030517578,
      "step": 25228
    },
    {
      "epoch": 0.0001539794921875,
      "step": 25228,
      "training_step_time": 0.41494107246398926
    },
    {
      "epoch": 0.000153985595703125,
      "model_forward_time": 0.11486649513244629,
      "step": 25229
    },
    {
      "epoch": 0.000153985595703125,
      "step": 25229,
      "training_step_time": 0.3918039798736572
    },
    {
      "epoch": 0.00015399169921875,
      "grad_norm": 0.14947816729545593,
      "learning_rate": 6.693689601226458e-05,
      "loss": 0.0446,
      "step": 25230
    },
    {
      "epoch": 0.00015399169921875,
      "model_forward_time": 0.1146845817565918,
      "step": 25230
    },
    {
      "epoch": 0.00015399169921875,
      "step": 25230,
      "training_step_time": 0.3937232494354248
    },
    {
      "epoch": 0.000153997802734375,
      "model_forward_time": 0.11479568481445312,
      "step": 25231
    },
    {
      "epoch": 0.000153997802734375,
      "step": 25231,
      "training_step_time": 0.391416072845459
    },
    {
      "epoch": 0.00015400390625,
      "model_forward_time": 0.11479854583740234,
      "step": 25232
    },
    {
      "epoch": 0.00015400390625,
      "step": 25232,
      "training_step_time": 0.5817794799804688
    },
    {
      "epoch": 0.000154010009765625,
      "model_forward_time": 0.11467266082763672,
      "step": 25233
    },
    {
      "epoch": 0.000154010009765625,
      "step": 25233,
      "training_step_time": 0.3838787078857422
    },
    {
      "epoch": 0.00015401611328125,
      "model_forward_time": 0.11511611938476562,
      "step": 25234
    },
    {
      "epoch": 0.00015401611328125,
      "step": 25234,
      "training_step_time": 0.3866705894470215
    },
    {
      "epoch": 0.000154022216796875,
      "model_forward_time": 0.11511707305908203,
      "step": 25235
    },
    {
      "epoch": 0.000154022216796875,
      "step": 25235,
      "training_step_time": 0.39333009719848633
    },
    {
      "epoch": 0.0001540283203125,
      "model_forward_time": 0.11466670036315918,
      "step": 25236
    },
    {
      "epoch": 0.0001540283203125,
      "step": 25236,
      "training_step_time": 0.3951759338378906
    },
    {
      "epoch": 0.000154034423828125,
      "model_forward_time": 0.11483216285705566,
      "step": 25237
    },
    {
      "epoch": 0.000154034423828125,
      "step": 25237,
      "training_step_time": 0.3919992446899414
    },
    {
      "epoch": 0.00015404052734375,
      "model_forward_time": 0.11504101753234863,
      "step": 25238
    },
    {
      "epoch": 0.00015404052734375,
      "step": 25238,
      "training_step_time": 0.6989531517028809
    },
    {
      "epoch": 0.000154046630859375,
      "model_forward_time": 0.11430668830871582,
      "step": 25239
    },
    {
      "epoch": 0.000154046630859375,
      "step": 25239,
      "training_step_time": 0.38393640518188477
    },
    {
      "epoch": 0.000154052734375,
      "grad_norm": 0.11528000235557556,
      "learning_rate": 6.691096480852808e-05,
      "loss": 0.0405,
      "step": 25240
    },
    {
      "epoch": 0.000154052734375,
      "model_forward_time": 0.1148672103881836,
      "step": 25240
    },
    {
      "epoch": 0.000154052734375,
      "step": 25240,
      "training_step_time": 0.3656589984893799
    },
    {
      "epoch": 0.000154058837890625,
      "model_forward_time": 0.11446309089660645,
      "step": 25241
    },
    {
      "epoch": 0.000154058837890625,
      "step": 25241,
      "training_step_time": 0.44840574264526367
    },
    {
      "epoch": 0.00015406494140625,
      "model_forward_time": 0.11596798896789551,
      "step": 25242
    },
    {
      "epoch": 0.00015406494140625,
      "step": 25242,
      "training_step_time": 0.40462732315063477
    },
    {
      "epoch": 0.000154071044921875,
      "model_forward_time": 0.11503720283508301,
      "step": 25243
    },
    {
      "epoch": 0.000154071044921875,
      "step": 25243,
      "training_step_time": 0.41015052795410156
    },
    {
      "epoch": 0.0001540771484375,
      "model_forward_time": 0.11434626579284668,
      "step": 25244
    },
    {
      "epoch": 0.0001540771484375,
      "step": 25244,
      "training_step_time": 0.6270689964294434
    },
    {
      "epoch": 0.000154083251953125,
      "model_forward_time": 0.11501765251159668,
      "step": 25245
    },
    {
      "epoch": 0.000154083251953125,
      "step": 25245,
      "training_step_time": 0.39174795150756836
    },
    {
      "epoch": 0.00015408935546875,
      "model_forward_time": 0.11677384376525879,
      "step": 25246
    },
    {
      "epoch": 0.00015408935546875,
      "step": 25246,
      "training_step_time": 0.394603967666626
    },
    {
      "epoch": 0.000154095458984375,
      "model_forward_time": 0.1145315170288086,
      "step": 25247
    },
    {
      "epoch": 0.000154095458984375,
      "step": 25247,
      "training_step_time": 0.398301362991333
    },
    {
      "epoch": 0.0001541015625,
      "model_forward_time": 0.11477065086364746,
      "step": 25248
    },
    {
      "epoch": 0.0001541015625,
      "step": 25248,
      "training_step_time": 0.3942725658416748
    },
    {
      "epoch": 0.000154107666015625,
      "model_forward_time": 0.11541128158569336,
      "step": 25249
    },
    {
      "epoch": 0.000154107666015625,
      "step": 25249,
      "training_step_time": 0.3888125419616699
    },
    {
      "epoch": 0.00015411376953125,
      "grad_norm": 0.1147659495472908,
      "learning_rate": 6.688502846768696e-05,
      "loss": 0.0476,
      "step": 25250
    },
    {
      "epoch": 0.00015411376953125,
      "model_forward_time": 0.11557912826538086,
      "step": 25250
    },
    {
      "epoch": 0.00015411376953125,
      "step": 25250,
      "training_step_time": 0.6314406394958496
    },
    {
      "epoch": 0.000154119873046875,
      "model_forward_time": 0.1148679256439209,
      "step": 25251
    },
    {
      "epoch": 0.000154119873046875,
      "step": 25251,
      "training_step_time": 0.5019097328186035
    },
    {
      "epoch": 0.0001541259765625,
      "model_forward_time": 0.1145470142364502,
      "step": 25252
    },
    {
      "epoch": 0.0001541259765625,
      "step": 25252,
      "training_step_time": 0.392636775970459
    },
    {
      "epoch": 0.000154132080078125,
      "model_forward_time": 0.11532115936279297,
      "step": 25253
    },
    {
      "epoch": 0.000154132080078125,
      "step": 25253,
      "training_step_time": 0.38924288749694824
    },
    {
      "epoch": 0.00015413818359375,
      "model_forward_time": 0.1149756908416748,
      "step": 25254
    },
    {
      "epoch": 0.00015413818359375,
      "step": 25254,
      "training_step_time": 0.40934133529663086
    },
    {
      "epoch": 0.000154144287109375,
      "model_forward_time": 0.11507177352905273,
      "step": 25255
    },
    {
      "epoch": 0.000154144287109375,
      "step": 25255,
      "training_step_time": 0.43666744232177734
    },
    {
      "epoch": 0.000154150390625,
      "model_forward_time": 0.11512398719787598,
      "step": 25256
    },
    {
      "epoch": 0.000154150390625,
      "step": 25256,
      "training_step_time": 0.5971448421478271
    },
    {
      "epoch": 0.000154156494140625,
      "model_forward_time": 0.11539793014526367,
      "step": 25257
    },
    {
      "epoch": 0.000154156494140625,
      "step": 25257,
      "training_step_time": 0.41817212104797363
    },
    {
      "epoch": 0.00015416259765625,
      "model_forward_time": 0.11530637741088867,
      "step": 25258
    },
    {
      "epoch": 0.00015416259765625,
      "step": 25258,
      "training_step_time": 0.39714503288269043
    },
    {
      "epoch": 0.000154168701171875,
      "model_forward_time": 0.11458849906921387,
      "step": 25259
    },
    {
      "epoch": 0.000154168701171875,
      "step": 25259,
      "training_step_time": 0.39830756187438965
    },
    {
      "epoch": 0.0001541748046875,
      "grad_norm": 0.09649385511875153,
      "learning_rate": 6.685908699762002e-05,
      "loss": 0.0423,
      "step": 25260
    },
    {
      "epoch": 0.0001541748046875,
      "model_forward_time": 0.11439132690429688,
      "step": 25260
    },
    {
      "epoch": 0.0001541748046875,
      "step": 25260,
      "training_step_time": 0.3980741500854492
    },
    {
      "epoch": 0.000154180908203125,
      "model_forward_time": 0.11513543128967285,
      "step": 25261
    },
    {
      "epoch": 0.000154180908203125,
      "step": 25261,
      "training_step_time": 0.3986353874206543
    },
    {
      "epoch": 0.00015418701171875,
      "model_forward_time": 0.11467957496643066,
      "step": 25262
    },
    {
      "epoch": 0.00015418701171875,
      "step": 25262,
      "training_step_time": 0.7350914478302002
    },
    {
      "epoch": 0.000154193115234375,
      "model_forward_time": 0.11411881446838379,
      "step": 25263
    },
    {
      "epoch": 0.000154193115234375,
      "step": 25263,
      "training_step_time": 0.40628790855407715
    },
    {
      "epoch": 0.00015419921875,
      "model_forward_time": 0.11491274833679199,
      "step": 25264
    },
    {
      "epoch": 0.00015419921875,
      "step": 25264,
      "training_step_time": 0.3973090648651123
    },
    {
      "epoch": 0.000154205322265625,
      "model_forward_time": 0.11423707008361816,
      "step": 25265
    },
    {
      "epoch": 0.000154205322265625,
      "step": 25265,
      "training_step_time": 0.4558732509613037
    },
    {
      "epoch": 0.00015421142578125,
      "model_forward_time": 0.11447691917419434,
      "step": 25266
    },
    {
      "epoch": 0.00015421142578125,
      "step": 25266,
      "training_step_time": 0.39237380027770996
    },
    {
      "epoch": 0.000154217529296875,
      "model_forward_time": 0.11414408683776855,
      "step": 25267
    },
    {
      "epoch": 0.000154217529296875,
      "step": 25267,
      "training_step_time": 0.38769054412841797
    },
    {
      "epoch": 0.0001542236328125,
      "model_forward_time": 0.11566829681396484,
      "step": 25268
    },
    {
      "epoch": 0.0001542236328125,
      "step": 25268,
      "training_step_time": 0.5046148300170898
    },
    {
      "epoch": 0.000154229736328125,
      "model_forward_time": 0.11525225639343262,
      "step": 25269
    },
    {
      "epoch": 0.000154229736328125,
      "step": 25269,
      "training_step_time": 0.4944310188293457
    },
    {
      "epoch": 0.00015423583984375,
      "grad_norm": 0.1880837082862854,
      "learning_rate": 6.683314040620759e-05,
      "loss": 0.0469,
      "step": 25270
    },
    {
      "epoch": 0.00015423583984375,
      "model_forward_time": 0.11561870574951172,
      "step": 25270
    },
    {
      "epoch": 0.00015423583984375,
      "step": 25270,
      "training_step_time": 0.4130725860595703
    },
    {
      "epoch": 0.000154241943359375,
      "model_forward_time": 0.11470556259155273,
      "step": 25271
    },
    {
      "epoch": 0.000154241943359375,
      "step": 25271,
      "training_step_time": 0.3824901580810547
    },
    {
      "epoch": 0.000154248046875,
      "model_forward_time": 0.11511802673339844,
      "step": 25272
    },
    {
      "epoch": 0.000154248046875,
      "step": 25272,
      "training_step_time": 0.405289888381958
    },
    {
      "epoch": 0.000154254150390625,
      "model_forward_time": 0.11490917205810547,
      "step": 25273
    },
    {
      "epoch": 0.000154254150390625,
      "step": 25273,
      "training_step_time": 0.3889501094818115
    },
    {
      "epoch": 0.00015426025390625,
      "model_forward_time": 0.11516284942626953,
      "step": 25274
    },
    {
      "epoch": 0.00015426025390625,
      "step": 25274,
      "training_step_time": 0.47142457962036133
    },
    {
      "epoch": 0.000154266357421875,
      "model_forward_time": 0.11553573608398438,
      "step": 25275
    },
    {
      "epoch": 0.000154266357421875,
      "step": 25275,
      "training_step_time": 0.3862640857696533
    },
    {
      "epoch": 0.0001542724609375,
      "model_forward_time": 0.11508560180664062,
      "step": 25276
    },
    {
      "epoch": 0.0001542724609375,
      "step": 25276,
      "training_step_time": 0.3984713554382324
    },
    {
      "epoch": 0.000154278564453125,
      "model_forward_time": 0.11496376991271973,
      "step": 25277
    },
    {
      "epoch": 0.000154278564453125,
      "step": 25277,
      "training_step_time": 0.399090051651001
    },
    {
      "epoch": 0.00015428466796875,
      "model_forward_time": 0.11596274375915527,
      "step": 25278
    },
    {
      "epoch": 0.00015428466796875,
      "step": 25278,
      "training_step_time": 0.43413400650024414
    },
    {
      "epoch": 0.000154290771484375,
      "model_forward_time": 0.1148831844329834,
      "step": 25279
    },
    {
      "epoch": 0.000154290771484375,
      "step": 25279,
      "training_step_time": 0.3991713523864746
    },
    {
      "epoch": 0.000154296875,
      "grad_norm": 0.1741582453250885,
      "learning_rate": 6.680718870133156e-05,
      "loss": 0.0458,
      "step": 25280
    },
    {
      "epoch": 0.000154296875,
      "model_forward_time": 0.11491060256958008,
      "step": 25280
    },
    {
      "epoch": 0.000154296875,
      "step": 25280,
      "training_step_time": 0.6300671100616455
    },
    {
      "epoch": 0.000154302978515625,
      "model_forward_time": 0.11531901359558105,
      "step": 25281
    },
    {
      "epoch": 0.000154302978515625,
      "step": 25281,
      "training_step_time": 0.3887331485748291
    },
    {
      "epoch": 0.00015430908203125,
      "model_forward_time": 0.11505794525146484,
      "step": 25282
    },
    {
      "epoch": 0.00015430908203125,
      "step": 25282,
      "training_step_time": 0.4716031551361084
    },
    {
      "epoch": 0.000154315185546875,
      "model_forward_time": 0.11466455459594727,
      "step": 25283
    },
    {
      "epoch": 0.000154315185546875,
      "step": 25283,
      "training_step_time": 0.44944214820861816
    },
    {
      "epoch": 0.0001543212890625,
      "model_forward_time": 0.11460709571838379,
      "step": 25284
    },
    {
      "epoch": 0.0001543212890625,
      "step": 25284,
      "training_step_time": 0.44405269622802734
    },
    {
      "epoch": 0.000154327392578125,
      "model_forward_time": 0.11425971984863281,
      "step": 25285
    },
    {
      "epoch": 0.000154327392578125,
      "step": 25285,
      "training_step_time": 0.42073988914489746
    },
    {
      "epoch": 0.00015433349609375,
      "model_forward_time": 0.11497879028320312,
      "step": 25286
    },
    {
      "epoch": 0.00015433349609375,
      "step": 25286,
      "training_step_time": 0.404740571975708
    },
    {
      "epoch": 0.000154339599609375,
      "model_forward_time": 0.11518096923828125,
      "step": 25287
    },
    {
      "epoch": 0.000154339599609375,
      "step": 25287,
      "training_step_time": 0.38837504386901855
    },
    {
      "epoch": 0.000154345703125,
      "model_forward_time": 0.11523151397705078,
      "step": 25288
    },
    {
      "epoch": 0.000154345703125,
      "step": 25288,
      "training_step_time": 0.38423848152160645
    },
    {
      "epoch": 0.000154351806640625,
      "model_forward_time": 0.11468362808227539,
      "step": 25289
    },
    {
      "epoch": 0.000154351806640625,
      "step": 25289,
      "training_step_time": 0.3818497657775879
    },
    {
      "epoch": 0.00015435791015625,
      "grad_norm": 0.15673725306987762,
      "learning_rate": 6.67812318908754e-05,
      "loss": 0.0397,
      "step": 25290
    },
    {
      "epoch": 0.00015435791015625,
      "model_forward_time": 0.11596083641052246,
      "step": 25290
    },
    {
      "epoch": 0.00015435791015625,
      "step": 25290,
      "training_step_time": 0.456087589263916
    },
    {
      "epoch": 0.000154364013671875,
      "model_forward_time": 0.11593508720397949,
      "step": 25291
    },
    {
      "epoch": 0.000154364013671875,
      "step": 25291,
      "training_step_time": 0.39607930183410645
    },
    {
      "epoch": 0.0001543701171875,
      "model_forward_time": 0.11523056030273438,
      "step": 25292
    },
    {
      "epoch": 0.0001543701171875,
      "step": 25292,
      "training_step_time": 0.6545381546020508
    },
    {
      "epoch": 0.000154376220703125,
      "model_forward_time": 0.11501002311706543,
      "step": 25293
    },
    {
      "epoch": 0.000154376220703125,
      "step": 25293,
      "training_step_time": 0.41000866889953613
    },
    {
      "epoch": 0.00015438232421875,
      "model_forward_time": 0.1150665283203125,
      "step": 25294
    },
    {
      "epoch": 0.00015438232421875,
      "step": 25294,
      "training_step_time": 0.3868260383605957
    },
    {
      "epoch": 0.000154388427734375,
      "model_forward_time": 0.11464715003967285,
      "step": 25295
    },
    {
      "epoch": 0.000154388427734375,
      "step": 25295,
      "training_step_time": 0.38455963134765625
    },
    {
      "epoch": 0.00015439453125,
      "model_forward_time": 0.11483240127563477,
      "step": 25296
    },
    {
      "epoch": 0.00015439453125,
      "step": 25296,
      "training_step_time": 0.4140446186065674
    },
    {
      "epoch": 0.000154400634765625,
      "model_forward_time": 0.11562681198120117,
      "step": 25297
    },
    {
      "epoch": 0.000154400634765625,
      "step": 25297,
      "training_step_time": 0.5100414752960205
    },
    {
      "epoch": 0.00015440673828125,
      "model_forward_time": 0.11532855033874512,
      "step": 25298
    },
    {
      "epoch": 0.00015440673828125,
      "step": 25298,
      "training_step_time": 0.5808610916137695
    },
    {
      "epoch": 0.000154412841796875,
      "model_forward_time": 0.11480307579040527,
      "step": 25299
    },
    {
      "epoch": 0.000154412841796875,
      "step": 25299,
      "training_step_time": 0.4564387798309326
    },
    {
      "epoch": 0.0001544189453125,
      "grad_norm": 0.22171276807785034,
      "learning_rate": 6.675526998272405e-05,
      "loss": 0.0425,
      "step": 25300
    },
    {
      "epoch": 0.0001544189453125,
      "model_forward_time": 0.11505722999572754,
      "step": 25300
    },
    {
      "epoch": 0.0001544189453125,
      "step": 25300,
      "training_step_time": 0.40926098823547363
    },
    {
      "epoch": 0.000154425048828125,
      "model_forward_time": 0.1170961856842041,
      "step": 25301
    },
    {
      "epoch": 0.000154425048828125,
      "step": 25301,
      "training_step_time": 0.4604487419128418
    },
    {
      "epoch": 0.00015443115234375,
      "model_forward_time": 0.12699413299560547,
      "step": 25302
    },
    {
      "epoch": 0.00015443115234375,
      "step": 25302,
      "training_step_time": 0.5677878856658936
    },
    {
      "epoch": 0.000154437255859375,
      "model_forward_time": 0.11774682998657227,
      "step": 25303
    },
    {
      "epoch": 0.000154437255859375,
      "step": 25303,
      "training_step_time": 0.6238038539886475
    },
    {
      "epoch": 0.000154443359375,
      "model_forward_time": 0.11841726303100586,
      "step": 25304
    },
    {
      "epoch": 0.000154443359375,
      "step": 25304,
      "training_step_time": 0.8132364749908447
    },
    {
      "epoch": 0.000154449462890625,
      "model_forward_time": 0.11721611022949219,
      "step": 25305
    },
    {
      "epoch": 0.000154449462890625,
      "step": 25305,
      "training_step_time": 0.6928205490112305
    },
    {
      "epoch": 0.00015445556640625,
      "model_forward_time": 0.11948490142822266,
      "step": 25306
    },
    {
      "epoch": 0.00015445556640625,
      "step": 25306,
      "training_step_time": 0.7625021934509277
    },
    {
      "epoch": 0.000154461669921875,
      "model_forward_time": 0.1190328598022461,
      "step": 25307
    },
    {
      "epoch": 0.000154461669921875,
      "step": 25307,
      "training_step_time": 0.648291826248169
    },
    {
      "epoch": 0.0001544677734375,
      "model_forward_time": 0.11928558349609375,
      "step": 25308
    },
    {
      "epoch": 0.0001544677734375,
      "step": 25308,
      "training_step_time": 0.7896699905395508
    },
    {
      "epoch": 0.000154473876953125,
      "model_forward_time": 0.11785316467285156,
      "step": 25309
    },
    {
      "epoch": 0.000154473876953125,
      "step": 25309,
      "training_step_time": 0.7487435340881348
    },
    {
      "epoch": 0.00015447998046875,
      "grad_norm": 0.14479194581508636,
      "learning_rate": 6.672930298476407e-05,
      "loss": 0.0457,
      "step": 25310
    },
    {
      "epoch": 0.00015447998046875,
      "model_forward_time": 0.12575483322143555,
      "step": 25310
    },
    {
      "epoch": 0.00015447998046875,
      "step": 25310,
      "training_step_time": 0.6969401836395264
    },
    {
      "epoch": 0.000154486083984375,
      "model_forward_time": 0.12314724922180176,
      "step": 25311
    },
    {
      "epoch": 0.000154486083984375,
      "step": 25311,
      "training_step_time": 0.6949512958526611
    },
    {
      "epoch": 0.0001544921875,
      "model_forward_time": 0.12083077430725098,
      "step": 25312
    },
    {
      "epoch": 0.0001544921875,
      "step": 25312,
      "training_step_time": 0.6906273365020752
    },
    {
      "epoch": 0.000154498291015625,
      "model_forward_time": 0.11991620063781738,
      "step": 25313
    },
    {
      "epoch": 0.000154498291015625,
      "step": 25313,
      "training_step_time": 0.6528654098510742
    },
    {
      "epoch": 0.00015450439453125,
      "model_forward_time": 0.11684894561767578,
      "step": 25314
    },
    {
      "epoch": 0.00015450439453125,
      "step": 25314,
      "training_step_time": 0.7434659004211426
    },
    {
      "epoch": 0.000154510498046875,
      "model_forward_time": 0.12147402763366699,
      "step": 25315
    },
    {
      "epoch": 0.000154510498046875,
      "step": 25315,
      "training_step_time": 0.6571316719055176
    },
    {
      "epoch": 0.0001545166015625,
      "model_forward_time": 0.11784768104553223,
      "step": 25316
    },
    {
      "epoch": 0.0001545166015625,
      "step": 25316,
      "training_step_time": 0.6743981838226318
    },
    {
      "epoch": 0.000154522705078125,
      "model_forward_time": 0.11788773536682129,
      "step": 25317
    },
    {
      "epoch": 0.000154522705078125,
      "step": 25317,
      "training_step_time": 0.6747739315032959
    },
    {
      "epoch": 0.00015452880859375,
      "model_forward_time": 0.12191390991210938,
      "step": 25318
    },
    {
      "epoch": 0.00015452880859375,
      "step": 25318,
      "training_step_time": 0.6981449127197266
    },
    {
      "epoch": 0.000154534912109375,
      "model_forward_time": 0.12156486511230469,
      "step": 25319
    },
    {
      "epoch": 0.000154534912109375,
      "step": 25319,
      "training_step_time": 0.6265826225280762
    },
    {
      "epoch": 0.000154541015625,
      "grad_norm": 0.12863415479660034,
      "learning_rate": 6.670333090488356e-05,
      "loss": 0.0496,
      "step": 25320
    },
    {
      "epoch": 0.000154541015625,
      "model_forward_time": 0.1192164421081543,
      "step": 25320
    },
    {
      "epoch": 0.000154541015625,
      "step": 25320,
      "training_step_time": 0.627784013748169
    },
    {
      "epoch": 0.000154547119140625,
      "model_forward_time": 0.11880016326904297,
      "step": 25321
    },
    {
      "epoch": 0.000154547119140625,
      "step": 25321,
      "training_step_time": 0.6615433692932129
    },
    {
      "epoch": 0.00015455322265625,
      "model_forward_time": 0.12299704551696777,
      "step": 25322
    },
    {
      "epoch": 0.00015455322265625,
      "step": 25322,
      "training_step_time": 0.798539400100708
    },
    {
      "epoch": 0.000154559326171875,
      "model_forward_time": 0.1179499626159668,
      "step": 25323
    },
    {
      "epoch": 0.000154559326171875,
      "step": 25323,
      "training_step_time": 0.7480969429016113
    },
    {
      "epoch": 0.0001545654296875,
      "model_forward_time": 0.12192964553833008,
      "step": 25324
    },
    {
      "epoch": 0.0001545654296875,
      "step": 25324,
      "training_step_time": 0.7217845916748047
    },
    {
      "epoch": 0.000154571533203125,
      "model_forward_time": 0.11983013153076172,
      "step": 25325
    },
    {
      "epoch": 0.000154571533203125,
      "step": 25325,
      "training_step_time": 0.6045036315917969
    },
    {
      "epoch": 0.00015457763671875,
      "model_forward_time": 0.11662554740905762,
      "step": 25326
    },
    {
      "epoch": 0.00015457763671875,
      "step": 25326,
      "training_step_time": 0.7398321628570557
    },
    {
      "epoch": 0.000154583740234375,
      "model_forward_time": 0.1262035369873047,
      "step": 25327
    },
    {
      "epoch": 0.000154583740234375,
      "step": 25327,
      "training_step_time": 0.7206807136535645
    },
    {
      "epoch": 0.00015458984375,
      "model_forward_time": 0.11844038963317871,
      "step": 25328
    },
    {
      "epoch": 0.00015458984375,
      "step": 25328,
      "training_step_time": 0.6726760864257812
    },
    {
      "epoch": 0.000154595947265625,
      "model_forward_time": 0.11916851997375488,
      "step": 25329
    },
    {
      "epoch": 0.000154595947265625,
      "step": 25329,
      "training_step_time": 0.7153172492980957
    },
    {
      "epoch": 0.00015460205078125,
      "grad_norm": 0.1746147871017456,
      "learning_rate": 6.667735375097214e-05,
      "loss": 0.0553,
      "step": 25330
    },
    {
      "epoch": 0.00015460205078125,
      "model_forward_time": 0.11756277084350586,
      "step": 25330
    },
    {
      "epoch": 0.00015460205078125,
      "step": 25330,
      "training_step_time": 0.6958572864532471
    },
    {
      "epoch": 0.000154608154296875,
      "model_forward_time": 0.11611199378967285,
      "step": 25331
    },
    {
      "epoch": 0.000154608154296875,
      "step": 25331,
      "training_step_time": 0.6690371036529541
    },
    {
      "epoch": 0.0001546142578125,
      "model_forward_time": 0.11596202850341797,
      "step": 25332
    },
    {
      "epoch": 0.0001546142578125,
      "step": 25332,
      "training_step_time": 0.8172063827514648
    },
    {
      "epoch": 0.000154620361328125,
      "model_forward_time": 0.12134814262390137,
      "step": 25333
    },
    {
      "epoch": 0.000154620361328125,
      "step": 25333,
      "training_step_time": 0.658841609954834
    },
    {
      "epoch": 0.00015462646484375,
      "model_forward_time": 0.13919758796691895,
      "step": 25334
    },
    {
      "epoch": 0.00015462646484375,
      "step": 25334,
      "training_step_time": 0.6255228519439697
    },
    {
      "epoch": 0.000154632568359375,
      "model_forward_time": 0.11918139457702637,
      "step": 25335
    },
    {
      "epoch": 0.000154632568359375,
      "step": 25335,
      "training_step_time": 0.8105871677398682
    },
    {
      "epoch": 0.000154638671875,
      "model_forward_time": 0.11600685119628906,
      "step": 25336
    },
    {
      "epoch": 0.000154638671875,
      "step": 25336,
      "training_step_time": 0.7107353210449219
    },
    {
      "epoch": 0.000154644775390625,
      "model_forward_time": 0.11913800239562988,
      "step": 25337
    },
    {
      "epoch": 0.000154644775390625,
      "step": 25337,
      "training_step_time": 0.6498267650604248
    },
    {
      "epoch": 0.00015465087890625,
      "model_forward_time": 0.11728715896606445,
      "step": 25338
    },
    {
      "epoch": 0.00015465087890625,
      "step": 25338,
      "training_step_time": 0.660959005355835
    },
    {
      "epoch": 0.000154656982421875,
      "model_forward_time": 0.12006258964538574,
      "step": 25339
    },
    {
      "epoch": 0.000154656982421875,
      "step": 25339,
      "training_step_time": 0.6482629776000977
    },
    {
      "epoch": 0.0001546630859375,
      "grad_norm": 0.10623626410961151,
      "learning_rate": 6.6651371530921e-05,
      "loss": 0.0497,
      "step": 25340
    },
    {
      "epoch": 0.0001546630859375,
      "model_forward_time": 0.1195368766784668,
      "step": 25340
    },
    {
      "epoch": 0.0001546630859375,
      "step": 25340,
      "training_step_time": 0.743436336517334
    },
    {
      "epoch": 0.000154669189453125,
      "model_forward_time": 0.11964988708496094,
      "step": 25341
    },
    {
      "epoch": 0.000154669189453125,
      "step": 25341,
      "training_step_time": 0.6726319789886475
    },
    {
      "epoch": 0.00015467529296875,
      "model_forward_time": 0.12130475044250488,
      "step": 25342
    },
    {
      "epoch": 0.00015467529296875,
      "step": 25342,
      "training_step_time": 0.6221833229064941
    },
    {
      "epoch": 0.000154681396484375,
      "model_forward_time": 0.12109684944152832,
      "step": 25343
    },
    {
      "epoch": 0.000154681396484375,
      "step": 25343,
      "training_step_time": 0.6378488540649414
    },
    {
      "epoch": 0.0001546875,
      "model_forward_time": 0.11917972564697266,
      "step": 25344
    },
    {
      "epoch": 0.0001546875,
      "step": 25344,
      "training_step_time": 0.6948738098144531
    },
    {
      "epoch": 0.000154693603515625,
      "model_forward_time": 0.12059307098388672,
      "step": 25345
    },
    {
      "epoch": 0.000154693603515625,
      "step": 25345,
      "training_step_time": 0.6232495307922363
    },
    {
      "epoch": 0.00015469970703125,
      "model_forward_time": 0.11708283424377441,
      "step": 25346
    },
    {
      "epoch": 0.00015469970703125,
      "step": 25346,
      "training_step_time": 0.7024455070495605
    },
    {
      "epoch": 0.000154705810546875,
      "model_forward_time": 0.11960363388061523,
      "step": 25347
    },
    {
      "epoch": 0.000154705810546875,
      "step": 25347,
      "training_step_time": 0.7184536457061768
    },
    {
      "epoch": 0.0001547119140625,
      "model_forward_time": 0.11796140670776367,
      "step": 25348
    },
    {
      "epoch": 0.0001547119140625,
      "step": 25348,
      "training_step_time": 0.6665575504302979
    },
    {
      "epoch": 0.000154718017578125,
      "model_forward_time": 0.11751937866210938,
      "step": 25349
    },
    {
      "epoch": 0.000154718017578125,
      "step": 25349,
      "training_step_time": 0.6106536388397217
    },
    {
      "epoch": 0.00015472412109375,
      "grad_norm": 0.1298104226589203,
      "learning_rate": 6.662538425262285e-05,
      "loss": 0.055,
      "step": 25350
    },
    {
      "epoch": 0.00015472412109375,
      "model_forward_time": 0.11701107025146484,
      "step": 25350
    },
    {
      "epoch": 0.00015472412109375,
      "step": 25350,
      "training_step_time": 0.6679971218109131
    },
    {
      "epoch": 0.000154730224609375,
      "model_forward_time": 0.11924314498901367,
      "step": 25351
    },
    {
      "epoch": 0.000154730224609375,
      "step": 25351,
      "training_step_time": 0.7221682071685791
    },
    {
      "epoch": 0.000154736328125,
      "model_forward_time": 0.14493107795715332,
      "step": 25352
    },
    {
      "epoch": 0.000154736328125,
      "step": 25352,
      "training_step_time": 0.7471685409545898
    },
    {
      "epoch": 0.000154742431640625,
      "model_forward_time": 0.12015199661254883,
      "step": 25353
    },
    {
      "epoch": 0.000154742431640625,
      "step": 25353,
      "training_step_time": 0.5402390956878662
    },
    {
      "epoch": 0.00015474853515625,
      "model_forward_time": 0.12073516845703125,
      "step": 25354
    },
    {
      "epoch": 0.00015474853515625,
      "step": 25354,
      "training_step_time": 0.7443134784698486
    },
    {
      "epoch": 0.000154754638671875,
      "model_forward_time": 0.11849427223205566,
      "step": 25355
    },
    {
      "epoch": 0.000154754638671875,
      "step": 25355,
      "training_step_time": 0.6788439750671387
    },
    {
      "epoch": 0.0001547607421875,
      "model_forward_time": 0.116943359375,
      "step": 25356
    },
    {
      "epoch": 0.0001547607421875,
      "step": 25356,
      "training_step_time": 0.6899781227111816
    },
    {
      "epoch": 0.000154766845703125,
      "model_forward_time": 0.11933350563049316,
      "step": 25357
    },
    {
      "epoch": 0.000154766845703125,
      "step": 25357,
      "training_step_time": 0.6111056804656982
    },
    {
      "epoch": 0.00015477294921875,
      "model_forward_time": 0.12250947952270508,
      "step": 25358
    },
    {
      "epoch": 0.00015477294921875,
      "step": 25358,
      "training_step_time": 0.6475703716278076
    },
    {
      "epoch": 0.000154779052734375,
      "model_forward_time": 0.12203383445739746,
      "step": 25359
    },
    {
      "epoch": 0.000154779052734375,
      "step": 25359,
      "training_step_time": 0.6411356925964355
    },
    {
      "epoch": 0.00015478515625,
      "grad_norm": 0.1755296289920807,
      "learning_rate": 6.659939192397192e-05,
      "loss": 0.05,
      "step": 25360
    },
    {
      "epoch": 0.00015478515625,
      "model_forward_time": 0.12311482429504395,
      "step": 25360
    },
    {
      "epoch": 0.00015478515625,
      "step": 25360,
      "training_step_time": 0.6985476016998291
    },
    {
      "epoch": 0.000154791259765625,
      "model_forward_time": 0.11894488334655762,
      "step": 25361
    },
    {
      "epoch": 0.000154791259765625,
      "step": 25361,
      "training_step_time": 0.6460349559783936
    },
    {
      "epoch": 0.00015479736328125,
      "model_forward_time": 0.1301417350769043,
      "step": 25362
    },
    {
      "epoch": 0.00015479736328125,
      "step": 25362,
      "training_step_time": 0.6421732902526855
    },
    {
      "epoch": 0.000154803466796875,
      "model_forward_time": 0.11816811561584473,
      "step": 25363
    },
    {
      "epoch": 0.000154803466796875,
      "step": 25363,
      "training_step_time": 0.73602294921875
    },
    {
      "epoch": 0.0001548095703125,
      "model_forward_time": 0.12006330490112305,
      "step": 25364
    },
    {
      "epoch": 0.0001548095703125,
      "step": 25364,
      "training_step_time": 0.7398216724395752
    },
    {
      "epoch": 0.000154815673828125,
      "model_forward_time": 0.12026262283325195,
      "step": 25365
    },
    {
      "epoch": 0.000154815673828125,
      "step": 25365,
      "training_step_time": 0.6123349666595459
    },
    {
      "epoch": 0.00015482177734375,
      "model_forward_time": 0.12077999114990234,
      "step": 25366
    },
    {
      "epoch": 0.00015482177734375,
      "step": 25366,
      "training_step_time": 0.5773940086364746
    },
    {
      "epoch": 0.000154827880859375,
      "model_forward_time": 0.14114785194396973,
      "step": 25367
    },
    {
      "epoch": 0.000154827880859375,
      "step": 25367,
      "training_step_time": 0.5802414417266846
    },
    {
      "epoch": 0.000154833984375,
      "model_forward_time": 0.12214779853820801,
      "step": 25368
    },
    {
      "epoch": 0.000154833984375,
      "step": 25368,
      "training_step_time": 0.5779199600219727
    },
    {
      "epoch": 0.000154840087890625,
      "model_forward_time": 0.11958527565002441,
      "step": 25369
    },
    {
      "epoch": 0.000154840087890625,
      "step": 25369,
      "training_step_time": 0.6346309185028076
    },
    {
      "epoch": 0.00015484619140625,
      "grad_norm": 0.1410997062921524,
      "learning_rate": 6.6573394552864e-05,
      "loss": 0.0597,
      "step": 25370
    },
    {
      "epoch": 0.00015484619140625,
      "model_forward_time": 0.12004351615905762,
      "step": 25370
    },
    {
      "epoch": 0.00015484619140625,
      "step": 25370,
      "training_step_time": 0.7324817180633545
    },
    {
      "epoch": 0.000154852294921875,
      "model_forward_time": 0.11739921569824219,
      "step": 25371
    },
    {
      "epoch": 0.000154852294921875,
      "step": 25371,
      "training_step_time": 0.5830614566802979
    },
    {
      "epoch": 0.0001548583984375,
      "model_forward_time": 0.11811947822570801,
      "step": 25372
    },
    {
      "epoch": 0.0001548583984375,
      "step": 25372,
      "training_step_time": 0.5163569450378418
    },
    {
      "epoch": 0.000154864501953125,
      "model_forward_time": 0.11691832542419434,
      "step": 25373
    },
    {
      "epoch": 0.000154864501953125,
      "step": 25373,
      "training_step_time": 0.43622469902038574
    },
    {
      "epoch": 0.00015487060546875,
      "model_forward_time": 0.11775970458984375,
      "step": 25374
    },
    {
      "epoch": 0.00015487060546875,
      "step": 25374,
      "training_step_time": 0.5712523460388184
    },
    {
      "epoch": 0.000154876708984375,
      "model_forward_time": 0.11862468719482422,
      "step": 25375
    },
    {
      "epoch": 0.000154876708984375,
      "step": 25375,
      "training_step_time": 0.5372107028961182
    },
    {
      "epoch": 0.0001548828125,
      "model_forward_time": 0.11570906639099121,
      "step": 25376
    },
    {
      "epoch": 0.0001548828125,
      "step": 25376,
      "training_step_time": 0.4448821544647217
    },
    {
      "epoch": 0.000154888916015625,
      "model_forward_time": 0.11857008934020996,
      "step": 25377
    },
    {
      "epoch": 0.000154888916015625,
      "step": 25377,
      "training_step_time": 0.38886189460754395
    },
    {
      "epoch": 0.00015489501953125,
      "model_forward_time": 0.11870789527893066,
      "step": 25378
    },
    {
      "epoch": 0.00015489501953125,
      "step": 25378,
      "training_step_time": 0.39238905906677246
    },
    {
      "epoch": 0.000154901123046875,
      "model_forward_time": 0.11938810348510742,
      "step": 25379
    },
    {
      "epoch": 0.000154901123046875,
      "step": 25379,
      "training_step_time": 0.38089609146118164
    },
    {
      "epoch": 0.0001549072265625,
      "grad_norm": 0.18894051015377045,
      "learning_rate": 6.654739214719641e-05,
      "loss": 0.0506,
      "step": 25380
    },
    {
      "epoch": 0.0001549072265625,
      "model_forward_time": 0.11844182014465332,
      "step": 25380
    },
    {
      "epoch": 0.0001549072265625,
      "step": 25380,
      "training_step_time": 0.3978421688079834
    },
    {
      "epoch": 0.000154913330078125,
      "model_forward_time": 0.1162559986114502,
      "step": 25381
    },
    {
      "epoch": 0.000154913330078125,
      "step": 25381,
      "training_step_time": 0.38207197189331055
    },
    {
      "epoch": 0.00015491943359375,
      "model_forward_time": 0.11534571647644043,
      "step": 25382
    },
    {
      "epoch": 0.00015491943359375,
      "step": 25382,
      "training_step_time": 0.4286220073699951
    },
    {
      "epoch": 0.000154925537109375,
      "model_forward_time": 0.1154015064239502,
      "step": 25383
    },
    {
      "epoch": 0.000154925537109375,
      "step": 25383,
      "training_step_time": 0.430206298828125
    },
    {
      "epoch": 0.000154931640625,
      "model_forward_time": 0.11582517623901367,
      "step": 25384
    },
    {
      "epoch": 0.000154931640625,
      "step": 25384,
      "training_step_time": 0.5061957836151123
    },
    {
      "epoch": 0.000154937744140625,
      "model_forward_time": 0.11507463455200195,
      "step": 25385
    },
    {
      "epoch": 0.000154937744140625,
      "step": 25385,
      "training_step_time": 0.39246177673339844
    },
    {
      "epoch": 0.00015494384765625,
      "model_forward_time": 0.11516332626342773,
      "step": 25386
    },
    {
      "epoch": 0.00015494384765625,
      "step": 25386,
      "training_step_time": 0.394683837890625
    },
    {
      "epoch": 0.000154949951171875,
      "model_forward_time": 0.1150968074798584,
      "step": 25387
    },
    {
      "epoch": 0.000154949951171875,
      "step": 25387,
      "training_step_time": 0.3939633369445801
    },
    {
      "epoch": 0.0001549560546875,
      "model_forward_time": 0.11547565460205078,
      "step": 25388
    },
    {
      "epoch": 0.0001549560546875,
      "step": 25388,
      "training_step_time": 0.36824774742126465
    },
    {
      "epoch": 0.000154962158203125,
      "model_forward_time": 0.1146388053894043,
      "step": 25389
    },
    {
      "epoch": 0.000154962158203125,
      "step": 25389,
      "training_step_time": 0.4357035160064697
    },
    {
      "epoch": 0.00015496826171875,
      "grad_norm": 0.13417460024356842,
      "learning_rate": 6.6521384714868e-05,
      "loss": 0.0487,
      "step": 25390
    },
    {
      "epoch": 0.00015496826171875,
      "model_forward_time": 0.11849045753479004,
      "step": 25390
    },
    {
      "epoch": 0.00015496826171875,
      "step": 25390,
      "training_step_time": 0.46004676818847656
    },
    {
      "epoch": 0.000154974365234375,
      "model_forward_time": 0.11487436294555664,
      "step": 25391
    },
    {
      "epoch": 0.000154974365234375,
      "step": 25391,
      "training_step_time": 0.40709376335144043
    },
    {
      "epoch": 0.00015498046875,
      "model_forward_time": 0.11546087265014648,
      "step": 25392
    },
    {
      "epoch": 0.00015498046875,
      "step": 25392,
      "training_step_time": 0.39026951789855957
    },
    {
      "epoch": 0.000154986572265625,
      "model_forward_time": 0.1147150993347168,
      "step": 25393
    },
    {
      "epoch": 0.000154986572265625,
      "step": 25393,
      "training_step_time": 0.3804311752319336
    },
    {
      "epoch": 0.00015499267578125,
      "model_forward_time": 0.11598038673400879,
      "step": 25394
    },
    {
      "epoch": 0.00015499267578125,
      "step": 25394,
      "training_step_time": 0.394665002822876
    },
    {
      "epoch": 0.000154998779296875,
      "model_forward_time": 0.11494851112365723,
      "step": 25395
    },
    {
      "epoch": 0.000154998779296875,
      "step": 25395,
      "training_step_time": 0.39515042304992676
    },
    {
      "epoch": 0.0001550048828125,
      "model_forward_time": 0.11515426635742188,
      "step": 25396
    },
    {
      "epoch": 0.0001550048828125,
      "step": 25396,
      "training_step_time": 0.39984869956970215
    },
    {
      "epoch": 0.000155010986328125,
      "model_forward_time": 0.1154634952545166,
      "step": 25397
    },
    {
      "epoch": 0.000155010986328125,
      "step": 25397,
      "training_step_time": 0.41268038749694824
    },
    {
      "epoch": 0.00015501708984375,
      "model_forward_time": 0.11610245704650879,
      "step": 25398
    },
    {
      "epoch": 0.00015501708984375,
      "step": 25398,
      "training_step_time": 0.43314242362976074
    },
    {
      "epoch": 0.000155023193359375,
      "model_forward_time": 0.11545515060424805,
      "step": 25399
    },
    {
      "epoch": 0.000155023193359375,
      "step": 25399,
      "training_step_time": 0.45583128929138184
    },
    {
      "epoch": 0.000155029296875,
      "grad_norm": 0.1225440576672554,
      "learning_rate": 6.649537226377915e-05,
      "loss": 0.0474,
      "step": 25400
    },
    {
      "epoch": 0.000155029296875,
      "model_forward_time": 0.11569833755493164,
      "step": 25400
    },
    {
      "epoch": 0.000155029296875,
      "step": 25400,
      "training_step_time": 0.43885278701782227
    },
    {
      "epoch": 0.000155035400390625,
      "model_forward_time": 0.1161651611328125,
      "step": 25401
    },
    {
      "epoch": 0.000155035400390625,
      "step": 25401,
      "training_step_time": 0.39560961723327637
    },
    {
      "epoch": 0.00015504150390625,
      "model_forward_time": 0.11471748352050781,
      "step": 25402
    },
    {
      "epoch": 0.00015504150390625,
      "step": 25402,
      "training_step_time": 0.39048337936401367
    },
    {
      "epoch": 0.000155047607421875,
      "model_forward_time": 0.11516046524047852,
      "step": 25403
    },
    {
      "epoch": 0.000155047607421875,
      "step": 25403,
      "training_step_time": 0.43509769439697266
    },
    {
      "epoch": 0.0001550537109375,
      "model_forward_time": 0.11487770080566406,
      "step": 25404
    },
    {
      "epoch": 0.0001550537109375,
      "step": 25404,
      "training_step_time": 0.4942021369934082
    },
    {
      "epoch": 0.000155059814453125,
      "model_forward_time": 0.115325927734375,
      "step": 25405
    },
    {
      "epoch": 0.000155059814453125,
      "step": 25405,
      "training_step_time": 0.3929288387298584
    },
    {
      "epoch": 0.00015506591796875,
      "model_forward_time": 0.11556029319763184,
      "step": 25406
    },
    {
      "epoch": 0.00015506591796875,
      "step": 25406,
      "training_step_time": 0.40082359313964844
    },
    {
      "epoch": 0.000155072021484375,
      "model_forward_time": 0.11490941047668457,
      "step": 25407
    },
    {
      "epoch": 0.000155072021484375,
      "step": 25407,
      "training_step_time": 0.39585018157958984
    },
    {
      "epoch": 0.000155078125,
      "model_forward_time": 0.1152193546295166,
      "step": 25408
    },
    {
      "epoch": 0.000155078125,
      "step": 25408,
      "training_step_time": 0.42119932174682617
    },
    {
      "epoch": 0.000155084228515625,
      "model_forward_time": 0.11503887176513672,
      "step": 25409
    },
    {
      "epoch": 0.000155084228515625,
      "step": 25409,
      "training_step_time": 0.3932681083679199
    },
    {
      "epoch": 0.00015509033203125,
      "grad_norm": 0.1450709104537964,
      "learning_rate": 6.646935480183173e-05,
      "loss": 0.0487,
      "step": 25410
    },
    {
      "epoch": 0.00015509033203125,
      "model_forward_time": 0.11543607711791992,
      "step": 25410
    },
    {
      "epoch": 0.00015509033203125,
      "step": 25410,
      "training_step_time": 0.3933227062225342
    },
    {
      "epoch": 0.000155096435546875,
      "model_forward_time": 0.11499309539794922,
      "step": 25411
    },
    {
      "epoch": 0.000155096435546875,
      "step": 25411,
      "training_step_time": 0.45232486724853516
    },
    {
      "epoch": 0.0001551025390625,
      "model_forward_time": 0.11506819725036621,
      "step": 25412
    },
    {
      "epoch": 0.0001551025390625,
      "step": 25412,
      "training_step_time": 0.3941659927368164
    },
    {
      "epoch": 0.000155108642578125,
      "model_forward_time": 0.11449742317199707,
      "step": 25413
    },
    {
      "epoch": 0.000155108642578125,
      "step": 25413,
      "training_step_time": 0.4731104373931885
    },
    {
      "epoch": 0.00015511474609375,
      "model_forward_time": 0.11525130271911621,
      "step": 25414
    },
    {
      "epoch": 0.00015511474609375,
      "step": 25414,
      "training_step_time": 0.38962340354919434
    },
    {
      "epoch": 0.000155120849609375,
      "model_forward_time": 0.11449432373046875,
      "step": 25415
    },
    {
      "epoch": 0.000155120849609375,
      "step": 25415,
      "training_step_time": 0.43717098236083984
    },
    {
      "epoch": 0.000155126953125,
      "model_forward_time": 0.1147768497467041,
      "step": 25416
    },
    {
      "epoch": 0.000155126953125,
      "step": 25416,
      "training_step_time": 0.41242480278015137
    },
    {
      "epoch": 0.000155133056640625,
      "model_forward_time": 0.11509823799133301,
      "step": 25417
    },
    {
      "epoch": 0.000155133056640625,
      "step": 25417,
      "training_step_time": 0.41470885276794434
    },
    {
      "epoch": 0.00015513916015625,
      "model_forward_time": 0.11480307579040527,
      "step": 25418
    },
    {
      "epoch": 0.00015513916015625,
      "step": 25418,
      "training_step_time": 0.428133487701416
    },
    {
      "epoch": 0.000155145263671875,
      "model_forward_time": 0.11507153511047363,
      "step": 25419
    },
    {
      "epoch": 0.000155145263671875,
      "step": 25419,
      "training_step_time": 0.494107723236084
    },
    {
      "epoch": 0.0001551513671875,
      "grad_norm": 0.1550774723291397,
      "learning_rate": 6.644333233692916e-05,
      "loss": 0.0548,
      "step": 25420
    },
    {
      "epoch": 0.0001551513671875,
      "model_forward_time": 0.11503863334655762,
      "step": 25420
    },
    {
      "epoch": 0.0001551513671875,
      "step": 25420,
      "training_step_time": 0.3909482955932617
    },
    {
      "epoch": 0.000155157470703125,
      "model_forward_time": 0.11503982543945312,
      "step": 25421
    },
    {
      "epoch": 0.000155157470703125,
      "step": 25421,
      "training_step_time": 0.38942861557006836
    },
    {
      "epoch": 0.00015516357421875,
      "model_forward_time": 0.11494755744934082,
      "step": 25422
    },
    {
      "epoch": 0.00015516357421875,
      "step": 25422,
      "training_step_time": 0.3966789245605469
    },
    {
      "epoch": 0.000155169677734375,
      "model_forward_time": 0.11446571350097656,
      "step": 25423
    },
    {
      "epoch": 0.000155169677734375,
      "step": 25423,
      "training_step_time": 0.3976767063140869
    },
    {
      "epoch": 0.00015517578125,
      "model_forward_time": 0.11516380310058594,
      "step": 25424
    },
    {
      "epoch": 0.00015517578125,
      "step": 25424,
      "training_step_time": 0.3880119323730469
    },
    {
      "epoch": 0.000155181884765625,
      "model_forward_time": 0.11512231826782227,
      "step": 25425
    },
    {
      "epoch": 0.000155181884765625,
      "step": 25425,
      "training_step_time": 0.3883700370788574
    },
    {
      "epoch": 0.00015518798828125,
      "model_forward_time": 0.11531233787536621,
      "step": 25426
    },
    {
      "epoch": 0.00015518798828125,
      "step": 25426,
      "training_step_time": 0.474229097366333
    },
    {
      "epoch": 0.000155194091796875,
      "model_forward_time": 0.11486339569091797,
      "step": 25427
    },
    {
      "epoch": 0.000155194091796875,
      "step": 25427,
      "training_step_time": 0.40930724143981934
    },
    {
      "epoch": 0.0001552001953125,
      "model_forward_time": 0.11534953117370605,
      "step": 25428
    },
    {
      "epoch": 0.0001552001953125,
      "step": 25428,
      "training_step_time": 0.40459251403808594
    },
    {
      "epoch": 0.000155206298828125,
      "model_forward_time": 0.1151127815246582,
      "step": 25429
    },
    {
      "epoch": 0.000155206298828125,
      "step": 25429,
      "training_step_time": 0.41678714752197266
    },
    {
      "epoch": 0.00015521240234375,
      "grad_norm": 0.14828768372535706,
      "learning_rate": 6.641730487697639e-05,
      "loss": 0.053,
      "step": 25430
    },
    {
      "epoch": 0.00015521240234375,
      "model_forward_time": 0.11533689498901367,
      "step": 25430
    },
    {
      "epoch": 0.00015521240234375,
      "step": 25430,
      "training_step_time": 0.421541690826416
    },
    {
      "epoch": 0.000155218505859375,
      "model_forward_time": 0.11577630043029785,
      "step": 25431
    },
    {
      "epoch": 0.000155218505859375,
      "step": 25431,
      "training_step_time": 0.39305710792541504
    },
    {
      "epoch": 0.000155224609375,
      "model_forward_time": 0.11587953567504883,
      "step": 25432
    },
    {
      "epoch": 0.000155224609375,
      "step": 25432,
      "training_step_time": 0.4533367156982422
    },
    {
      "epoch": 0.000155230712890625,
      "model_forward_time": 0.11534833908081055,
      "step": 25433
    },
    {
      "epoch": 0.000155230712890625,
      "step": 25433,
      "training_step_time": 0.4776592254638672
    },
    {
      "epoch": 0.00015523681640625,
      "model_forward_time": 0.11547136306762695,
      "step": 25434
    },
    {
      "epoch": 0.00015523681640625,
      "step": 25434,
      "training_step_time": 0.437699556350708
    },
    {
      "epoch": 0.000155242919921875,
      "model_forward_time": 0.11485719680786133,
      "step": 25435
    },
    {
      "epoch": 0.000155242919921875,
      "step": 25435,
      "training_step_time": 0.3985936641693115
    },
    {
      "epoch": 0.0001552490234375,
      "model_forward_time": 0.11512446403503418,
      "step": 25436
    },
    {
      "epoch": 0.0001552490234375,
      "step": 25436,
      "training_step_time": 0.3796820640563965
    },
    {
      "epoch": 0.000155255126953125,
      "model_forward_time": 0.1156008243560791,
      "step": 25437
    },
    {
      "epoch": 0.000155255126953125,
      "step": 25437,
      "training_step_time": 0.38694334030151367
    },
    {
      "epoch": 0.00015526123046875,
      "model_forward_time": 0.11873722076416016,
      "step": 25438
    },
    {
      "epoch": 0.00015526123046875,
      "step": 25438,
      "training_step_time": 0.3839108943939209
    },
    {
      "epoch": 0.000155267333984375,
      "model_forward_time": 0.11551856994628906,
      "step": 25439
    },
    {
      "epoch": 0.000155267333984375,
      "step": 25439,
      "training_step_time": 0.5263800621032715
    },
    {
      "epoch": 0.0001552734375,
      "grad_norm": 0.22747255861759186,
      "learning_rate": 6.639127242987988e-05,
      "loss": 0.0567,
      "step": 25440
    },
    {
      "epoch": 0.0001552734375,
      "model_forward_time": 0.11532092094421387,
      "step": 25440
    },
    {
      "epoch": 0.0001552734375,
      "step": 25440,
      "training_step_time": 0.40349745750427246
    },
    {
      "epoch": 0.000155279541015625,
      "model_forward_time": 0.11517548561096191,
      "step": 25441
    },
    {
      "epoch": 0.000155279541015625,
      "step": 25441,
      "training_step_time": 0.4034450054168701
    },
    {
      "epoch": 0.00015528564453125,
      "model_forward_time": 0.11506485939025879,
      "step": 25442
    },
    {
      "epoch": 0.00015528564453125,
      "step": 25442,
      "training_step_time": 0.4494130611419678
    },
    {
      "epoch": 0.000155291748046875,
      "model_forward_time": 0.11545777320861816,
      "step": 25443
    },
    {
      "epoch": 0.000155291748046875,
      "step": 25443,
      "training_step_time": 0.5104269981384277
    },
    {
      "epoch": 0.0001552978515625,
      "model_forward_time": 0.11435389518737793,
      "step": 25444
    },
    {
      "epoch": 0.0001552978515625,
      "step": 25444,
      "training_step_time": 0.3991119861602783
    },
    {
      "epoch": 0.000155303955078125,
      "model_forward_time": 0.1147012710571289,
      "step": 25445
    },
    {
      "epoch": 0.000155303955078125,
      "step": 25445,
      "training_step_time": 0.3986351490020752
    },
    {
      "epoch": 0.00015531005859375,
      "model_forward_time": 0.11516427993774414,
      "step": 25446
    },
    {
      "epoch": 0.00015531005859375,
      "step": 25446,
      "training_step_time": 0.40538692474365234
    },
    {
      "epoch": 0.000155316162109375,
      "model_forward_time": 0.11770129203796387,
      "step": 25447
    },
    {
      "epoch": 0.000155316162109375,
      "step": 25447,
      "training_step_time": 0.4536294937133789
    },
    {
      "epoch": 0.000155322265625,
      "model_forward_time": 0.11885976791381836,
      "step": 25448
    },
    {
      "epoch": 0.000155322265625,
      "step": 25448,
      "training_step_time": 0.41362881660461426
    },
    {
      "epoch": 0.000155328369140625,
      "model_forward_time": 0.11794352531433105,
      "step": 25449
    },
    {
      "epoch": 0.000155328369140625,
      "step": 25449,
      "training_step_time": 0.3812122344970703
    },
    {
      "epoch": 0.00015533447265625,
      "grad_norm": 0.1504412293434143,
      "learning_rate": 6.636523500354759e-05,
      "loss": 0.0521,
      "step": 25450
    },
    {
      "epoch": 0.00015533447265625,
      "model_forward_time": 0.11868882179260254,
      "step": 25450
    },
    {
      "epoch": 0.00015533447265625,
      "step": 25450,
      "training_step_time": 0.38170886039733887
    },
    {
      "epoch": 0.000155340576171875,
      "model_forward_time": 0.11840701103210449,
      "step": 25451
    },
    {
      "epoch": 0.000155340576171875,
      "step": 25451,
      "training_step_time": 0.37891316413879395
    },
    {
      "epoch": 0.0001553466796875,
      "model_forward_time": 0.11524534225463867,
      "step": 25452
    },
    {
      "epoch": 0.0001553466796875,
      "step": 25452,
      "training_step_time": 0.3969588279724121
    },
    {
      "epoch": 0.000155352783203125,
      "model_forward_time": 0.11491155624389648,
      "step": 25453
    },
    {
      "epoch": 0.000155352783203125,
      "step": 25453,
      "training_step_time": 0.39919185638427734
    },
    {
      "epoch": 0.00015535888671875,
      "model_forward_time": 0.1156306266784668,
      "step": 25454
    },
    {
      "epoch": 0.00015535888671875,
      "step": 25454,
      "training_step_time": 0.4098329544067383
    },
    {
      "epoch": 0.000155364990234375,
      "model_forward_time": 0.11499929428100586,
      "step": 25455
    },
    {
      "epoch": 0.000155364990234375,
      "step": 25455,
      "training_step_time": 0.3832368850708008
    },
    {
      "epoch": 0.00015537109375,
      "model_forward_time": 0.11565661430358887,
      "step": 25456
    },
    {
      "epoch": 0.00015537109375,
      "step": 25456,
      "training_step_time": 0.4475212097167969
    },
    {
      "epoch": 0.000155377197265625,
      "model_forward_time": 0.11591601371765137,
      "step": 25457
    },
    {
      "epoch": 0.000155377197265625,
      "step": 25457,
      "training_step_time": 0.634326696395874
    },
    {
      "epoch": 0.00015538330078125,
      "model_forward_time": 0.11593437194824219,
      "step": 25458
    },
    {
      "epoch": 0.00015538330078125,
      "step": 25458,
      "training_step_time": 0.38276028633117676
    },
    {
      "epoch": 0.000155389404296875,
      "model_forward_time": 0.11495828628540039,
      "step": 25459
    },
    {
      "epoch": 0.000155389404296875,
      "step": 25459,
      "training_step_time": 0.38780927658081055
    },
    {
      "epoch": 0.0001553955078125,
      "grad_norm": 0.16160918772220612,
      "learning_rate": 6.6339192605889e-05,
      "loss": 0.0539,
      "step": 25460
    },
    {
      "epoch": 0.0001553955078125,
      "model_forward_time": 0.11469602584838867,
      "step": 25460
    },
    {
      "epoch": 0.0001553955078125,
      "step": 25460,
      "training_step_time": 0.4466207027435303
    },
    {
      "epoch": 0.000155401611328125,
      "model_forward_time": 0.11556077003479004,
      "step": 25461
    },
    {
      "epoch": 0.000155401611328125,
      "step": 25461,
      "training_step_time": 0.3914682865142822
    },
    {
      "epoch": 0.00015540771484375,
      "model_forward_time": 0.11590886116027832,
      "step": 25462
    },
    {
      "epoch": 0.00015540771484375,
      "step": 25462,
      "training_step_time": 0.5028362274169922
    },
    {
      "epoch": 0.000155413818359375,
      "model_forward_time": 0.11504840850830078,
      "step": 25463
    },
    {
      "epoch": 0.000155413818359375,
      "step": 25463,
      "training_step_time": 0.421048641204834
    },
    {
      "epoch": 0.000155419921875,
      "model_forward_time": 0.1146841049194336,
      "step": 25464
    },
    {
      "epoch": 0.000155419921875,
      "step": 25464,
      "training_step_time": 0.3938281536102295
    },
    {
      "epoch": 0.000155426025390625,
      "model_forward_time": 0.11540341377258301,
      "step": 25465
    },
    {
      "epoch": 0.000155426025390625,
      "step": 25465,
      "training_step_time": 0.4021430015563965
    },
    {
      "epoch": 0.00015543212890625,
      "model_forward_time": 0.11452889442443848,
      "step": 25466
    },
    {
      "epoch": 0.00015543212890625,
      "step": 25466,
      "training_step_time": 0.38474416732788086
    },
    {
      "epoch": 0.000155438232421875,
      "model_forward_time": 0.11532378196716309,
      "step": 25467
    },
    {
      "epoch": 0.000155438232421875,
      "step": 25467,
      "training_step_time": 0.40897035598754883
    },
    {
      "epoch": 0.0001554443359375,
      "model_forward_time": 0.11492705345153809,
      "step": 25468
    },
    {
      "epoch": 0.0001554443359375,
      "step": 25468,
      "training_step_time": 0.39384913444519043
    },
    {
      "epoch": 0.000155450439453125,
      "model_forward_time": 0.11534619331359863,
      "step": 25469
    },
    {
      "epoch": 0.000155450439453125,
      "step": 25469,
      "training_step_time": 0.46950197219848633
    },
    {
      "epoch": 0.00015545654296875,
      "grad_norm": 0.16412736475467682,
      "learning_rate": 6.631314524481513e-05,
      "loss": 0.0512,
      "step": 25470
    },
    {
      "epoch": 0.00015545654296875,
      "model_forward_time": 0.11521720886230469,
      "step": 25470
    },
    {
      "epoch": 0.00015545654296875,
      "step": 25470,
      "training_step_time": 0.4763302803039551
    },
    {
      "epoch": 0.000155462646484375,
      "model_forward_time": 0.11525440216064453,
      "step": 25471
    },
    {
      "epoch": 0.000155462646484375,
      "step": 25471,
      "training_step_time": 0.3988223075866699
    },
    {
      "epoch": 0.00015546875,
      "model_forward_time": 0.1151578426361084,
      "step": 25472
    },
    {
      "epoch": 0.00015546875,
      "step": 25472,
      "training_step_time": 0.46750712394714355
    },
    {
      "epoch": 0.000155474853515625,
      "model_forward_time": 0.11474847793579102,
      "step": 25473
    },
    {
      "epoch": 0.000155474853515625,
      "step": 25473,
      "training_step_time": 0.39165735244750977
    },
    {
      "epoch": 0.00015548095703125,
      "model_forward_time": 0.1149137020111084,
      "step": 25474
    },
    {
      "epoch": 0.00015548095703125,
      "step": 25474,
      "training_step_time": 0.3836677074432373
    },
    {
      "epoch": 0.000155487060546875,
      "model_forward_time": 0.11537337303161621,
      "step": 25475
    },
    {
      "epoch": 0.000155487060546875,
      "step": 25475,
      "training_step_time": 0.43998122215270996
    },
    {
      "epoch": 0.0001554931640625,
      "model_forward_time": 0.11565852165222168,
      "step": 25476
    },
    {
      "epoch": 0.0001554931640625,
      "step": 25476,
      "training_step_time": 0.459423303604126
    },
    {
      "epoch": 0.000155499267578125,
      "model_forward_time": 0.11472201347351074,
      "step": 25477
    },
    {
      "epoch": 0.000155499267578125,
      "step": 25477,
      "training_step_time": 0.4994361400604248
    },
    {
      "epoch": 0.00015550537109375,
      "model_forward_time": 0.11448383331298828,
      "step": 25478
    },
    {
      "epoch": 0.00015550537109375,
      "step": 25478,
      "training_step_time": 0.4140291213989258
    },
    {
      "epoch": 0.000155511474609375,
      "model_forward_time": 0.11490583419799805,
      "step": 25479
    },
    {
      "epoch": 0.000155511474609375,
      "step": 25479,
      "training_step_time": 0.383303165435791
    },
    {
      "epoch": 0.000155517578125,
      "grad_norm": 0.16619230806827545,
      "learning_rate": 6.628709292823844e-05,
      "loss": 0.0472,
      "step": 25480
    },
    {
      "epoch": 0.000155517578125,
      "model_forward_time": 0.1152806282043457,
      "step": 25480
    },
    {
      "epoch": 0.000155517578125,
      "step": 25480,
      "training_step_time": 0.38352251052856445
    },
    {
      "epoch": 0.000155523681640625,
      "model_forward_time": 0.11491060256958008,
      "step": 25481
    },
    {
      "epoch": 0.000155523681640625,
      "step": 25481,
      "training_step_time": 0.388232946395874
    },
    {
      "epoch": 0.00015552978515625,
      "model_forward_time": 0.11541533470153809,
      "step": 25482
    },
    {
      "epoch": 0.00015552978515625,
      "step": 25482,
      "training_step_time": 0.4370086193084717
    },
    {
      "epoch": 0.000155535888671875,
      "model_forward_time": 0.11573123931884766,
      "step": 25483
    },
    {
      "epoch": 0.000155535888671875,
      "step": 25483,
      "training_step_time": 0.4165327548980713
    },
    {
      "epoch": 0.0001555419921875,
      "model_forward_time": 0.11503171920776367,
      "step": 25484
    },
    {
      "epoch": 0.0001555419921875,
      "step": 25484,
      "training_step_time": 0.43132638931274414
    },
    {
      "epoch": 0.000155548095703125,
      "model_forward_time": 0.11526131629943848,
      "step": 25485
    },
    {
      "epoch": 0.000155548095703125,
      "step": 25485,
      "training_step_time": 0.3910491466522217
    },
    {
      "epoch": 0.00015555419921875,
      "model_forward_time": 0.11537790298461914,
      "step": 25486
    },
    {
      "epoch": 0.00015555419921875,
      "step": 25486,
      "training_step_time": 0.42613768577575684
    },
    {
      "epoch": 0.000155560302734375,
      "model_forward_time": 0.1159512996673584,
      "step": 25487
    },
    {
      "epoch": 0.000155560302734375,
      "step": 25487,
      "training_step_time": 0.4077341556549072
    },
    {
      "epoch": 0.00015556640625,
      "model_forward_time": 0.11527347564697266,
      "step": 25488
    },
    {
      "epoch": 0.00015556640625,
      "step": 25488,
      "training_step_time": 0.3811912536621094
    },
    {
      "epoch": 0.000155572509765625,
      "model_forward_time": 0.11516785621643066,
      "step": 25489
    },
    {
      "epoch": 0.000155572509765625,
      "step": 25489,
      "training_step_time": 0.4045224189758301
    },
    {
      "epoch": 0.00015557861328125,
      "grad_norm": 0.10950030386447906,
      "learning_rate": 6.626103566407295e-05,
      "loss": 0.05,
      "step": 25490
    },
    {
      "epoch": 0.00015557861328125,
      "model_forward_time": 0.11532711982727051,
      "step": 25490
    },
    {
      "epoch": 0.00015557861328125,
      "step": 25490,
      "training_step_time": 0.36470985412597656
    },
    {
      "epoch": 0.000155584716796875,
      "model_forward_time": 0.11491894721984863,
      "step": 25491
    },
    {
      "epoch": 0.000155584716796875,
      "step": 25491,
      "training_step_time": 0.4549713134765625
    },
    {
      "epoch": 0.0001555908203125,
      "model_forward_time": 0.1155390739440918,
      "step": 25492
    },
    {
      "epoch": 0.0001555908203125,
      "step": 25492,
      "training_step_time": 0.4099733829498291
    },
    {
      "epoch": 0.000155596923828125,
      "model_forward_time": 0.11515593528747559,
      "step": 25493
    },
    {
      "epoch": 0.000155596923828125,
      "step": 25493,
      "training_step_time": 0.4584009647369385
    },
    {
      "epoch": 0.00015560302734375,
      "model_forward_time": 0.1157066822052002,
      "step": 25494
    },
    {
      "epoch": 0.00015560302734375,
      "step": 25494,
      "training_step_time": 0.38401293754577637
    },
    {
      "epoch": 0.000155609130859375,
      "model_forward_time": 0.11546921730041504,
      "step": 25495
    },
    {
      "epoch": 0.000155609130859375,
      "step": 25495,
      "training_step_time": 0.403454065322876
    },
    {
      "epoch": 0.000155615234375,
      "model_forward_time": 0.11454343795776367,
      "step": 25496
    },
    {
      "epoch": 0.000155615234375,
      "step": 25496,
      "training_step_time": 0.42613959312438965
    },
    {
      "epoch": 0.000155621337890625,
      "model_forward_time": 0.1158139705657959,
      "step": 25497
    },
    {
      "epoch": 0.000155621337890625,
      "step": 25497,
      "training_step_time": 0.4149935245513916
    },
    {
      "epoch": 0.00015562744140625,
      "model_forward_time": 0.11539530754089355,
      "step": 25498
    },
    {
      "epoch": 0.00015562744140625,
      "step": 25498,
      "training_step_time": 0.38610148429870605
    },
    {
      "epoch": 0.000155633544921875,
      "model_forward_time": 0.11496806144714355,
      "step": 25499
    },
    {
      "epoch": 0.000155633544921875,
      "step": 25499,
      "training_step_time": 0.4755866527557373
    },
    {
      "epoch": 0.0001556396484375,
      "grad_norm": 0.18053555488586426,
      "learning_rate": 6.623497346023418e-05,
      "loss": 0.05,
      "step": 25500
    },
    {
      "epoch": 0.0001556396484375,
      "model_forward_time": 0.11613583564758301,
      "step": 25500
    },
    {
      "epoch": 0.0001556396484375,
      "step": 25500,
      "training_step_time": 0.39292144775390625
    },
    {
      "epoch": 0.000155645751953125,
      "model_forward_time": 0.1156454086303711,
      "step": 25501
    },
    {
      "epoch": 0.000155645751953125,
      "step": 25501,
      "training_step_time": 0.39778852462768555
    },
    {
      "epoch": 0.00015565185546875,
      "model_forward_time": 0.11477208137512207,
      "step": 25502
    },
    {
      "epoch": 0.00015565185546875,
      "step": 25502,
      "training_step_time": 0.3937807083129883
    },
    {
      "epoch": 0.000155657958984375,
      "model_forward_time": 0.11565136909484863,
      "step": 25503
    },
    {
      "epoch": 0.000155657958984375,
      "step": 25503,
      "training_step_time": 0.38901710510253906
    },
    {
      "epoch": 0.0001556640625,
      "model_forward_time": 0.11486601829528809,
      "step": 25504
    },
    {
      "epoch": 0.0001556640625,
      "step": 25504,
      "training_step_time": 0.40854573249816895
    },
    {
      "epoch": 0.000155670166015625,
      "model_forward_time": 0.11537480354309082,
      "step": 25505
    },
    {
      "epoch": 0.000155670166015625,
      "step": 25505,
      "training_step_time": 0.4910886287689209
    },
    {
      "epoch": 0.00015567626953125,
      "model_forward_time": 0.11586570739746094,
      "step": 25506
    },
    {
      "epoch": 0.00015567626953125,
      "step": 25506,
      "training_step_time": 0.403916597366333
    },
    {
      "epoch": 0.000155682373046875,
      "model_forward_time": 0.11524295806884766,
      "step": 25507
    },
    {
      "epoch": 0.000155682373046875,
      "step": 25507,
      "training_step_time": 0.42726826667785645
    },
    {
      "epoch": 0.0001556884765625,
      "model_forward_time": 0.11540842056274414,
      "step": 25508
    },
    {
      "epoch": 0.0001556884765625,
      "step": 25508,
      "training_step_time": 0.3891618251800537
    },
    {
      "epoch": 0.000155694580078125,
      "model_forward_time": 0.11486244201660156,
      "step": 25509
    },
    {
      "epoch": 0.000155694580078125,
      "step": 25509,
      "training_step_time": 0.39910244941711426
    },
    {
      "epoch": 0.00015570068359375,
      "grad_norm": 0.1572939157485962,
      "learning_rate": 6.620890632463915e-05,
      "loss": 0.0536,
      "step": 25510
    },
    {
      "epoch": 0.00015570068359375,
      "model_forward_time": 0.11517047882080078,
      "step": 25510
    },
    {
      "epoch": 0.00015570068359375,
      "step": 25510,
      "training_step_time": 0.40933823585510254
    },
    {
      "epoch": 0.000155706787109375,
      "model_forward_time": 0.11577010154724121,
      "step": 25511
    },
    {
      "epoch": 0.000155706787109375,
      "step": 25511,
      "training_step_time": 0.6165745258331299
    },
    {
      "epoch": 0.000155712890625,
      "model_forward_time": 0.11507582664489746,
      "step": 25512
    },
    {
      "epoch": 0.000155712890625,
      "step": 25512,
      "training_step_time": 0.39690589904785156
    },
    {
      "epoch": 0.000155718994140625,
      "model_forward_time": 0.1150658130645752,
      "step": 25513
    },
    {
      "epoch": 0.000155718994140625,
      "step": 25513,
      "training_step_time": 0.3972775936126709
    },
    {
      "epoch": 0.00015572509765625,
      "model_forward_time": 0.11556386947631836,
      "step": 25514
    },
    {
      "epoch": 0.00015572509765625,
      "step": 25514,
      "training_step_time": 0.3889901638031006
    },
    {
      "epoch": 0.000155731201171875,
      "model_forward_time": 0.11474084854125977,
      "step": 25515
    },
    {
      "epoch": 0.000155731201171875,
      "step": 25515,
      "training_step_time": 0.44946932792663574
    },
    {
      "epoch": 0.0001557373046875,
      "model_forward_time": 0.11434412002563477,
      "step": 25516
    },
    {
      "epoch": 0.0001557373046875,
      "step": 25516,
      "training_step_time": 0.4212772846221924
    },
    {
      "epoch": 0.000155743408203125,
      "model_forward_time": 0.11497235298156738,
      "step": 25517
    },
    {
      "epoch": 0.000155743408203125,
      "step": 25517,
      "training_step_time": 0.5806906223297119
    },
    {
      "epoch": 0.00015574951171875,
      "model_forward_time": 0.11423873901367188,
      "step": 25518
    },
    {
      "epoch": 0.00015574951171875,
      "step": 25518,
      "training_step_time": 0.4377326965332031
    },
    {
      "epoch": 0.000155755615234375,
      "model_forward_time": 0.11477923393249512,
      "step": 25519
    },
    {
      "epoch": 0.000155755615234375,
      "step": 25519,
      "training_step_time": 0.420285701751709
    },
    {
      "epoch": 0.00015576171875,
      "grad_norm": 0.13532990217208862,
      "learning_rate": 6.61828342652063e-05,
      "loss": 0.0515,
      "step": 25520
    },
    {
      "epoch": 0.00015576171875,
      "model_forward_time": 0.11499309539794922,
      "step": 25520
    },
    {
      "epoch": 0.00015576171875,
      "step": 25520,
      "training_step_time": 0.4785451889038086
    },
    {
      "epoch": 0.000155767822265625,
      "model_forward_time": 0.11516666412353516,
      "step": 25521
    },
    {
      "epoch": 0.000155767822265625,
      "step": 25521,
      "training_step_time": 0.4109189510345459
    },
    {
      "epoch": 0.00015577392578125,
      "model_forward_time": 0.11446118354797363,
      "step": 25522
    },
    {
      "epoch": 0.00015577392578125,
      "step": 25522,
      "training_step_time": 0.3837745189666748
    },
    {
      "epoch": 0.000155780029296875,
      "model_forward_time": 0.11605644226074219,
      "step": 25523
    },
    {
      "epoch": 0.000155780029296875,
      "step": 25523,
      "training_step_time": 0.48869943618774414
    },
    {
      "epoch": 0.0001557861328125,
      "model_forward_time": 0.11489486694335938,
      "step": 25524
    },
    {
      "epoch": 0.0001557861328125,
      "step": 25524,
      "training_step_time": 0.38629150390625
    },
    {
      "epoch": 0.000155792236328125,
      "model_forward_time": 0.11473321914672852,
      "step": 25525
    },
    {
      "epoch": 0.000155792236328125,
      "step": 25525,
      "training_step_time": 0.3893868923187256
    },
    {
      "epoch": 0.00015579833984375,
      "model_forward_time": 0.11763310432434082,
      "step": 25526
    },
    {
      "epoch": 0.00015579833984375,
      "step": 25526,
      "training_step_time": 0.3829336166381836
    },
    {
      "epoch": 0.000155804443359375,
      "model_forward_time": 0.11504745483398438,
      "step": 25527
    },
    {
      "epoch": 0.000155804443359375,
      "step": 25527,
      "training_step_time": 0.3835604190826416
    },
    {
      "epoch": 0.000155810546875,
      "model_forward_time": 0.11470842361450195,
      "step": 25528
    },
    {
      "epoch": 0.000155810546875,
      "step": 25528,
      "training_step_time": 0.4447669982910156
    },
    {
      "epoch": 0.000155816650390625,
      "model_forward_time": 0.11469864845275879,
      "step": 25529
    },
    {
      "epoch": 0.000155816650390625,
      "step": 25529,
      "training_step_time": 0.633885383605957
    },
    {
      "epoch": 0.00015582275390625,
      "grad_norm": 0.2055361419916153,
      "learning_rate": 6.615675728985572e-05,
      "loss": 0.0565,
      "step": 25530
    },
    {
      "epoch": 0.00015582275390625,
      "model_forward_time": 0.11487603187561035,
      "step": 25530
    },
    {
      "epoch": 0.00015582275390625,
      "step": 25530,
      "training_step_time": 0.4803810119628906
    },
    {
      "epoch": 0.000155828857421875,
      "model_forward_time": 0.11429905891418457,
      "step": 25531
    },
    {
      "epoch": 0.000155828857421875,
      "step": 25531,
      "training_step_time": 0.48052453994750977
    },
    {
      "epoch": 0.0001558349609375,
      "model_forward_time": 0.11483883857727051,
      "step": 25532
    },
    {
      "epoch": 0.0001558349609375,
      "step": 25532,
      "training_step_time": 0.405104398727417
    },
    {
      "epoch": 0.000155841064453125,
      "model_forward_time": 0.11464595794677734,
      "step": 25533
    },
    {
      "epoch": 0.000155841064453125,
      "step": 25533,
      "training_step_time": 0.36242055892944336
    },
    {
      "epoch": 0.00015584716796875,
      "model_forward_time": 0.11837911605834961,
      "step": 25534
    },
    {
      "epoch": 0.00015584716796875,
      "step": 25534,
      "training_step_time": 0.4189629554748535
    },
    {
      "epoch": 0.000155853271484375,
      "model_forward_time": 0.1177511215209961,
      "step": 25535
    },
    {
      "epoch": 0.000155853271484375,
      "step": 25535,
      "training_step_time": 0.40370631217956543
    },
    {
      "epoch": 0.000155859375,
      "model_forward_time": 0.11896109580993652,
      "step": 25536
    },
    {
      "epoch": 0.000155859375,
      "step": 25536,
      "training_step_time": 0.3815922737121582
    },
    {
      "epoch": 0.000155865478515625,
      "model_forward_time": 0.11796951293945312,
      "step": 25537
    },
    {
      "epoch": 0.000155865478515625,
      "step": 25537,
      "training_step_time": 0.39435243606567383
    },
    {
      "epoch": 0.00015587158203125,
      "model_forward_time": 0.11520671844482422,
      "step": 25538
    },
    {
      "epoch": 0.00015587158203125,
      "step": 25538,
      "training_step_time": 0.3782777786254883
    },
    {
      "epoch": 0.000155877685546875,
      "model_forward_time": 0.11571645736694336,
      "step": 25539
    },
    {
      "epoch": 0.000155877685546875,
      "step": 25539,
      "training_step_time": 0.38213133811950684
    },
    {
      "epoch": 0.0001558837890625,
      "grad_norm": 0.19349201023578644,
      "learning_rate": 6.613067540650886e-05,
      "loss": 0.0508,
      "step": 25540
    },
    {
      "epoch": 0.0001558837890625,
      "model_forward_time": 0.11499381065368652,
      "step": 25540
    },
    {
      "epoch": 0.0001558837890625,
      "step": 25540,
      "training_step_time": 0.3990952968597412
    },
    {
      "epoch": 0.000155889892578125,
      "model_forward_time": 0.11553168296813965,
      "step": 25541
    },
    {
      "epoch": 0.000155889892578125,
      "step": 25541,
      "training_step_time": 0.44881677627563477
    },
    {
      "epoch": 0.00015589599609375,
      "model_forward_time": 0.11519622802734375,
      "step": 25542
    },
    {
      "epoch": 0.00015589599609375,
      "step": 25542,
      "training_step_time": 0.3812255859375
    },
    {
      "epoch": 0.000155902099609375,
      "model_forward_time": 0.11605525016784668,
      "step": 25543
    },
    {
      "epoch": 0.000155902099609375,
      "step": 25543,
      "training_step_time": 0.41335034370422363
    },
    {
      "epoch": 0.000155908203125,
      "model_forward_time": 0.11510372161865234,
      "step": 25544
    },
    {
      "epoch": 0.000155908203125,
      "step": 25544,
      "training_step_time": 0.5104663372039795
    },
    {
      "epoch": 0.000155914306640625,
      "model_forward_time": 0.11548113822937012,
      "step": 25545
    },
    {
      "epoch": 0.000155914306640625,
      "step": 25545,
      "training_step_time": 0.3944230079650879
    },
    {
      "epoch": 0.00015592041015625,
      "model_forward_time": 0.11484956741333008,
      "step": 25546
    },
    {
      "epoch": 0.00015592041015625,
      "step": 25546,
      "training_step_time": 0.40762805938720703
    },
    {
      "epoch": 0.000155926513671875,
      "model_forward_time": 0.11538290977478027,
      "step": 25547
    },
    {
      "epoch": 0.000155926513671875,
      "step": 25547,
      "training_step_time": 0.5714256763458252
    },
    {
      "epoch": 0.0001559326171875,
      "model_forward_time": 0.11480045318603516,
      "step": 25548
    },
    {
      "epoch": 0.0001559326171875,
      "step": 25548,
      "training_step_time": 0.4263293743133545
    },
    {
      "epoch": 0.000155938720703125,
      "model_forward_time": 0.11488127708435059,
      "step": 25549
    },
    {
      "epoch": 0.000155938720703125,
      "step": 25549,
      "training_step_time": 0.4279494285583496
    },
    {
      "epoch": 0.00015594482421875,
      "grad_norm": 0.17318007349967957,
      "learning_rate": 6.610458862308872e-05,
      "loss": 0.048,
      "step": 25550
    },
    {
      "epoch": 0.00015594482421875,
      "model_forward_time": 0.11528825759887695,
      "step": 25550
    },
    {
      "epoch": 0.00015594482421875,
      "step": 25550,
      "training_step_time": 0.4735414981842041
    },
    {
      "epoch": 0.000155950927734375,
      "model_forward_time": 0.11484837532043457,
      "step": 25551
    },
    {
      "epoch": 0.000155950927734375,
      "step": 25551,
      "training_step_time": 0.38430261611938477
    },
    {
      "epoch": 0.00015595703125,
      "model_forward_time": 0.1145932674407959,
      "step": 25552
    },
    {
      "epoch": 0.00015595703125,
      "step": 25552,
      "training_step_time": 0.3940999507904053
    },
    {
      "epoch": 0.000155963134765625,
      "model_forward_time": 0.1151113510131836,
      "step": 25553
    },
    {
      "epoch": 0.000155963134765625,
      "step": 25553,
      "training_step_time": 0.38282275199890137
    },
    {
      "epoch": 0.00015596923828125,
      "model_forward_time": 0.11539602279663086,
      "step": 25554
    },
    {
      "epoch": 0.00015596923828125,
      "step": 25554,
      "training_step_time": 0.3906564712524414
    },
    {
      "epoch": 0.000155975341796875,
      "model_forward_time": 0.11493897438049316,
      "step": 25555
    },
    {
      "epoch": 0.000155975341796875,
      "step": 25555,
      "training_step_time": 0.41247010231018066
    },
    {
      "epoch": 0.0001559814453125,
      "model_forward_time": 0.11506032943725586,
      "step": 25556
    },
    {
      "epoch": 0.0001559814453125,
      "step": 25556,
      "training_step_time": 0.46009325981140137
    },
    {
      "epoch": 0.000155987548828125,
      "model_forward_time": 0.11439633369445801,
      "step": 25557
    },
    {
      "epoch": 0.000155987548828125,
      "step": 25557,
      "training_step_time": 0.393643856048584
    },
    {
      "epoch": 0.00015599365234375,
      "model_forward_time": 0.11605334281921387,
      "step": 25558
    },
    {
      "epoch": 0.00015599365234375,
      "step": 25558,
      "training_step_time": 0.39479970932006836
    },
    {
      "epoch": 0.000155999755859375,
      "model_forward_time": 0.11616873741149902,
      "step": 25559
    },
    {
      "epoch": 0.000155999755859375,
      "step": 25559,
      "training_step_time": 0.6373791694641113
    },
    {
      "epoch": 0.000156005859375,
      "grad_norm": 0.1345134824514389,
      "learning_rate": 6.607849694751977e-05,
      "loss": 0.0514,
      "step": 25560
    },
    {
      "epoch": 0.000156005859375,
      "model_forward_time": 0.11451268196105957,
      "step": 25560
    },
    {
      "epoch": 0.000156005859375,
      "step": 25560,
      "training_step_time": 0.44103026390075684
    },
    {
      "epoch": 0.000156011962890625,
      "model_forward_time": 0.11452531814575195,
      "step": 25561
    },
    {
      "epoch": 0.000156011962890625,
      "step": 25561,
      "training_step_time": 0.39911532402038574
    },
    {
      "epoch": 0.00015601806640625,
      "model_forward_time": 0.11455416679382324,
      "step": 25562
    },
    {
      "epoch": 0.00015601806640625,
      "step": 25562,
      "training_step_time": 0.40293145179748535
    },
    {
      "epoch": 0.000156024169921875,
      "model_forward_time": 0.11485433578491211,
      "step": 25563
    },
    {
      "epoch": 0.000156024169921875,
      "step": 25563,
      "training_step_time": 0.4505031108856201
    },
    {
      "epoch": 0.0001560302734375,
      "model_forward_time": 0.11488127708435059,
      "step": 25564
    },
    {
      "epoch": 0.0001560302734375,
      "step": 25564,
      "training_step_time": 0.4732046127319336
    },
    {
      "epoch": 0.000156036376953125,
      "model_forward_time": 0.11481857299804688,
      "step": 25565
    },
    {
      "epoch": 0.000156036376953125,
      "step": 25565,
      "training_step_time": 0.3917117118835449
    },
    {
      "epoch": 0.00015604248046875,
      "model_forward_time": 0.11537861824035645,
      "step": 25566
    },
    {
      "epoch": 0.00015604248046875,
      "step": 25566,
      "training_step_time": 0.3862452507019043
    },
    {
      "epoch": 0.000156048583984375,
      "model_forward_time": 0.1149137020111084,
      "step": 25567
    },
    {
      "epoch": 0.000156048583984375,
      "step": 25567,
      "training_step_time": 0.3912239074707031
    },
    {
      "epoch": 0.0001560546875,
      "model_forward_time": 0.11488127708435059,
      "step": 25568
    },
    {
      "epoch": 0.0001560546875,
      "step": 25568,
      "training_step_time": 0.3845951557159424
    },
    {
      "epoch": 0.000156060791015625,
      "model_forward_time": 0.11506247520446777,
      "step": 25569
    },
    {
      "epoch": 0.000156060791015625,
      "step": 25569,
      "training_step_time": 0.39484071731567383
    },
    {
      "epoch": 0.00015606689453125,
      "grad_norm": 0.14346151053905487,
      "learning_rate": 6.6052400387728e-05,
      "loss": 0.0533,
      "step": 25570
    },
    {
      "epoch": 0.00015606689453125,
      "model_forward_time": 0.11525869369506836,
      "step": 25570
    },
    {
      "epoch": 0.00015606689453125,
      "step": 25570,
      "training_step_time": 0.40065979957580566
    },
    {
      "epoch": 0.000156072998046875,
      "model_forward_time": 0.11534976959228516,
      "step": 25571
    },
    {
      "epoch": 0.000156072998046875,
      "step": 25571,
      "training_step_time": 0.6569576263427734
    },
    {
      "epoch": 0.0001560791015625,
      "model_forward_time": 0.11441493034362793,
      "step": 25572
    },
    {
      "epoch": 0.0001560791015625,
      "step": 25572,
      "training_step_time": 0.4154479503631592
    },
    {
      "epoch": 0.000156085205078125,
      "model_forward_time": 0.11506795883178711,
      "step": 25573
    },
    {
      "epoch": 0.000156085205078125,
      "step": 25573,
      "training_step_time": 0.44274425506591797
    },
    {
      "epoch": 0.00015609130859375,
      "model_forward_time": 0.11495399475097656,
      "step": 25574
    },
    {
      "epoch": 0.00015609130859375,
      "step": 25574,
      "training_step_time": 0.4227931499481201
    },
    {
      "epoch": 0.000156097412109375,
      "model_forward_time": 0.11496615409851074,
      "step": 25575
    },
    {
      "epoch": 0.000156097412109375,
      "step": 25575,
      "training_step_time": 0.45271873474121094
    },
    {
      "epoch": 0.000156103515625,
      "model_forward_time": 0.11470270156860352,
      "step": 25576
    },
    {
      "epoch": 0.000156103515625,
      "step": 25576,
      "training_step_time": 0.4036746025085449
    },
    {
      "epoch": 0.000156109619140625,
      "model_forward_time": 0.11441469192504883,
      "step": 25577
    },
    {
      "epoch": 0.000156109619140625,
      "step": 25577,
      "training_step_time": 0.44352102279663086
    },
    {
      "epoch": 0.00015611572265625,
      "model_forward_time": 0.11469602584838867,
      "step": 25578
    },
    {
      "epoch": 0.00015611572265625,
      "step": 25578,
      "training_step_time": 0.4787940979003906
    },
    {
      "epoch": 0.000156121826171875,
      "model_forward_time": 0.11483645439147949,
      "step": 25579
    },
    {
      "epoch": 0.000156121826171875,
      "step": 25579,
      "training_step_time": 0.4027566909790039
    },
    {
      "epoch": 0.0001561279296875,
      "grad_norm": 0.13312551379203796,
      "learning_rate": 6.602629895164081e-05,
      "loss": 0.0489,
      "step": 25580
    },
    {
      "epoch": 0.0001561279296875,
      "model_forward_time": 0.11528921127319336,
      "step": 25580
    },
    {
      "epoch": 0.0001561279296875,
      "step": 25580,
      "training_step_time": 0.3985555171966553
    },
    {
      "epoch": 0.000156134033203125,
      "model_forward_time": 0.11533665657043457,
      "step": 25581
    },
    {
      "epoch": 0.000156134033203125,
      "step": 25581,
      "training_step_time": 0.40239596366882324
    },
    {
      "epoch": 0.00015614013671875,
      "model_forward_time": 0.11445188522338867,
      "step": 25582
    },
    {
      "epoch": 0.00015614013671875,
      "step": 25582,
      "training_step_time": 0.3929252624511719
    },
    {
      "epoch": 0.000156146240234375,
      "model_forward_time": 0.11479043960571289,
      "step": 25583
    },
    {
      "epoch": 0.000156146240234375,
      "step": 25583,
      "training_step_time": 0.5118043422698975
    },
    {
      "epoch": 0.00015615234375,
      "model_forward_time": 0.11535310745239258,
      "step": 25584
    },
    {
      "epoch": 0.00015615234375,
      "step": 25584,
      "training_step_time": 0.40108180046081543
    },
    {
      "epoch": 0.000156158447265625,
      "model_forward_time": 0.11483359336853027,
      "step": 25585
    },
    {
      "epoch": 0.000156158447265625,
      "step": 25585,
      "training_step_time": 0.4019904136657715
    },
    {
      "epoch": 0.00015616455078125,
      "model_forward_time": 0.11570906639099121,
      "step": 25586
    },
    {
      "epoch": 0.00015616455078125,
      "step": 25586,
      "training_step_time": 0.41061902046203613
    },
    {
      "epoch": 0.000156170654296875,
      "model_forward_time": 0.11512899398803711,
      "step": 25587
    },
    {
      "epoch": 0.000156170654296875,
      "step": 25587,
      "training_step_time": 0.42935800552368164
    },
    {
      "epoch": 0.0001561767578125,
      "model_forward_time": 0.11422610282897949,
      "step": 25588
    },
    {
      "epoch": 0.0001561767578125,
      "step": 25588,
      "training_step_time": 0.41773247718811035
    },
    {
      "epoch": 0.000156182861328125,
      "model_forward_time": 0.11444568634033203,
      "step": 25589
    },
    {
      "epoch": 0.000156182861328125,
      "step": 25589,
      "training_step_time": 0.5521609783172607
    },
    {
      "epoch": 0.00015618896484375,
      "grad_norm": 0.16945558786392212,
      "learning_rate": 6.600019264718713e-05,
      "loss": 0.0489,
      "step": 25590
    },
    {
      "epoch": 0.00015618896484375,
      "model_forward_time": 0.11458730697631836,
      "step": 25590
    },
    {
      "epoch": 0.00015618896484375,
      "step": 25590,
      "training_step_time": 0.36624956130981445
    },
    {
      "epoch": 0.000156195068359375,
      "model_forward_time": 0.11504244804382324,
      "step": 25591
    },
    {
      "epoch": 0.000156195068359375,
      "step": 25591,
      "training_step_time": 0.43965649604797363
    },
    {
      "epoch": 0.000156201171875,
      "model_forward_time": 0.11584949493408203,
      "step": 25592
    },
    {
      "epoch": 0.000156201171875,
      "step": 25592,
      "training_step_time": 0.4055330753326416
    },
    {
      "epoch": 0.000156207275390625,
      "model_forward_time": 0.11439919471740723,
      "step": 25593
    },
    {
      "epoch": 0.000156207275390625,
      "step": 25593,
      "training_step_time": 0.3981151580810547
    },
    {
      "epoch": 0.00015621337890625,
      "model_forward_time": 0.11424446105957031,
      "step": 25594
    },
    {
      "epoch": 0.00015621337890625,
      "step": 25594,
      "training_step_time": 0.39348363876342773
    },
    {
      "epoch": 0.000156219482421875,
      "model_forward_time": 0.11534810066223145,
      "step": 25595
    },
    {
      "epoch": 0.000156219482421875,
      "step": 25595,
      "training_step_time": 0.43228769302368164
    },
    {
      "epoch": 0.0001562255859375,
      "model_forward_time": 0.11484789848327637,
      "step": 25596
    },
    {
      "epoch": 0.0001562255859375,
      "step": 25596,
      "training_step_time": 0.39429807662963867
    },
    {
      "epoch": 0.000156231689453125,
      "model_forward_time": 0.1146233081817627,
      "step": 25597
    },
    {
      "epoch": 0.000156231689453125,
      "step": 25597,
      "training_step_time": 0.39464473724365234
    },
    {
      "epoch": 0.00015623779296875,
      "model_forward_time": 0.11510419845581055,
      "step": 25598
    },
    {
      "epoch": 0.00015623779296875,
      "step": 25598,
      "training_step_time": 0.3841526508331299
    },
    {
      "epoch": 0.000156243896484375,
      "model_forward_time": 0.11496210098266602,
      "step": 25599
    },
    {
      "epoch": 0.000156243896484375,
      "step": 25599,
      "training_step_time": 0.4640026092529297
    },
    {
      "epoch": 0.00015625,
      "grad_norm": 0.13634167611598969,
      "learning_rate": 6.59740814822974e-05,
      "loss": 0.0471,
      "step": 25600
    },
    {
      "epoch": 0.00015625,
      "model_forward_time": 0.1146697998046875,
      "step": 25600
    },
    {
      "epoch": 0.00015625,
      "step": 25600,
      "training_step_time": 0.46285414695739746
    },
    {
      "epoch": 0.000156256103515625,
      "model_forward_time": 0.11541199684143066,
      "step": 25601
    },
    {
      "epoch": 0.000156256103515625,
      "step": 25601,
      "training_step_time": 0.49607014656066895
    },
    {
      "epoch": 0.00015626220703125,
      "model_forward_time": 0.11488747596740723,
      "step": 25602
    },
    {
      "epoch": 0.00015626220703125,
      "step": 25602,
      "training_step_time": 0.4870128631591797
    },
    {
      "epoch": 0.000156268310546875,
      "model_forward_time": 0.1147913932800293,
      "step": 25603
    },
    {
      "epoch": 0.000156268310546875,
      "step": 25603,
      "training_step_time": 0.44634175300598145
    },
    {
      "epoch": 0.0001562744140625,
      "model_forward_time": 0.1148841381072998,
      "step": 25604
    },
    {
      "epoch": 0.0001562744140625,
      "step": 25604,
      "training_step_time": 0.38189148902893066
    },
    {
      "epoch": 0.000156280517578125,
      "model_forward_time": 0.11484193801879883,
      "step": 25605
    },
    {
      "epoch": 0.000156280517578125,
      "step": 25605,
      "training_step_time": 0.43060922622680664
    },
    {
      "epoch": 0.00015628662109375,
      "model_forward_time": 0.11468243598937988,
      "step": 25606
    },
    {
      "epoch": 0.00015628662109375,
      "step": 25606,
      "training_step_time": 0.42835426330566406
    },
    {
      "epoch": 0.000156292724609375,
      "model_forward_time": 0.11439204216003418,
      "step": 25607
    },
    {
      "epoch": 0.000156292724609375,
      "step": 25607,
      "training_step_time": 0.3925323486328125
    },
    {
      "epoch": 0.000156298828125,
      "model_forward_time": 0.11471343040466309,
      "step": 25608
    },
    {
      "epoch": 0.000156298828125,
      "step": 25608,
      "training_step_time": 0.3926410675048828
    },
    {
      "epoch": 0.000156304931640625,
      "model_forward_time": 0.1149444580078125,
      "step": 25609
    },
    {
      "epoch": 0.000156304931640625,
      "step": 25609,
      "training_step_time": 0.3921022415161133
    },
    {
      "epoch": 0.00015631103515625,
      "grad_norm": 0.09491181373596191,
      "learning_rate": 6.594796546490351e-05,
      "loss": 0.046,
      "step": 25610
    },
    {
      "epoch": 0.00015631103515625,
      "model_forward_time": 0.11543703079223633,
      "step": 25610
    },
    {
      "epoch": 0.00015631103515625,
      "step": 25610,
      "training_step_time": 0.39593505859375
    },
    {
      "epoch": 0.000156317138671875,
      "model_forward_time": 0.11486530303955078,
      "step": 25611
    },
    {
      "epoch": 0.000156317138671875,
      "step": 25611,
      "training_step_time": 0.3896980285644531
    },
    {
      "epoch": 0.0001563232421875,
      "model_forward_time": 0.11509037017822266,
      "step": 25612
    },
    {
      "epoch": 0.0001563232421875,
      "step": 25612,
      "training_step_time": 0.3811507225036621
    },
    {
      "epoch": 0.000156329345703125,
      "model_forward_time": 0.11496520042419434,
      "step": 25613
    },
    {
      "epoch": 0.000156329345703125,
      "step": 25613,
      "training_step_time": 0.3959794044494629
    },
    {
      "epoch": 0.00015633544921875,
      "model_forward_time": 0.1157083511352539,
      "step": 25614
    },
    {
      "epoch": 0.00015633544921875,
      "step": 25614,
      "training_step_time": 0.40498900413513184
    },
    {
      "epoch": 0.000156341552734375,
      "model_forward_time": 0.11528420448303223,
      "step": 25615
    },
    {
      "epoch": 0.000156341552734375,
      "step": 25615,
      "training_step_time": 0.45359277725219727
    },
    {
      "epoch": 0.00015634765625,
      "model_forward_time": 0.11515092849731445,
      "step": 25616
    },
    {
      "epoch": 0.00015634765625,
      "step": 25616,
      "training_step_time": 0.4728546142578125
    },
    {
      "epoch": 0.000156353759765625,
      "model_forward_time": 0.11466050148010254,
      "step": 25617
    },
    {
      "epoch": 0.000156353759765625,
      "step": 25617,
      "training_step_time": 0.3953423500061035
    },
    {
      "epoch": 0.00015635986328125,
      "model_forward_time": 0.11499786376953125,
      "step": 25618
    },
    {
      "epoch": 0.00015635986328125,
      "step": 25618,
      "training_step_time": 0.3895530700683594
    },
    {
      "epoch": 0.000156365966796875,
      "model_forward_time": 0.11491751670837402,
      "step": 25619
    },
    {
      "epoch": 0.000156365966796875,
      "step": 25619,
      "training_step_time": 0.444777250289917
    },
    {
      "epoch": 0.0001563720703125,
      "grad_norm": 0.0815591961145401,
      "learning_rate": 6.592184460293877e-05,
      "loss": 0.0454,
      "step": 25620
    },
    {
      "epoch": 0.0001563720703125,
      "model_forward_time": 0.11535096168518066,
      "step": 25620
    },
    {
      "epoch": 0.0001563720703125,
      "step": 25620,
      "training_step_time": 0.49146270751953125
    },
    {
      "epoch": 0.000156378173828125,
      "model_forward_time": 0.11494588851928711,
      "step": 25621
    },
    {
      "epoch": 0.000156378173828125,
      "step": 25621,
      "training_step_time": 0.41760802268981934
    },
    {
      "epoch": 0.00015638427734375,
      "model_forward_time": 0.11521124839782715,
      "step": 25622
    },
    {
      "epoch": 0.00015638427734375,
      "step": 25622,
      "training_step_time": 0.39795875549316406
    },
    {
      "epoch": 0.000156390380859375,
      "model_forward_time": 0.11500859260559082,
      "step": 25623
    },
    {
      "epoch": 0.000156390380859375,
      "step": 25623,
      "training_step_time": 0.3883070945739746
    },
    {
      "epoch": 0.000156396484375,
      "model_forward_time": 0.1152181625366211,
      "step": 25624
    },
    {
      "epoch": 0.000156396484375,
      "step": 25624,
      "training_step_time": 0.3896613121032715
    },
    {
      "epoch": 0.000156402587890625,
      "model_forward_time": 0.11489582061767578,
      "step": 25625
    },
    {
      "epoch": 0.000156402587890625,
      "step": 25625,
      "training_step_time": 0.3854973316192627
    },
    {
      "epoch": 0.00015640869140625,
      "model_forward_time": 0.11530637741088867,
      "step": 25626
    },
    {
      "epoch": 0.00015640869140625,
      "step": 25626,
      "training_step_time": 0.3940303325653076
    },
    {
      "epoch": 0.000156414794921875,
      "model_forward_time": 0.11591124534606934,
      "step": 25627
    },
    {
      "epoch": 0.000156414794921875,
      "step": 25627,
      "training_step_time": 0.39284634590148926
    },
    {
      "epoch": 0.0001564208984375,
      "model_forward_time": 0.11525344848632812,
      "step": 25628
    },
    {
      "epoch": 0.0001564208984375,
      "step": 25628,
      "training_step_time": 0.3920609951019287
    },
    {
      "epoch": 0.000156427001953125,
      "model_forward_time": 0.1153266429901123,
      "step": 25629
    },
    {
      "epoch": 0.000156427001953125,
      "step": 25629,
      "training_step_time": 0.47511768341064453
    },
    {
      "epoch": 0.00015643310546875,
      "grad_norm": 0.11019597202539444,
      "learning_rate": 6.589571890433803e-05,
      "loss": 0.049,
      "step": 25630
    },
    {
      "epoch": 0.00015643310546875,
      "model_forward_time": 0.11511349678039551,
      "step": 25630
    },
    {
      "epoch": 0.00015643310546875,
      "step": 25630,
      "training_step_time": 0.43834352493286133
    },
    {
      "epoch": 0.000156439208984375,
      "model_forward_time": 0.11470913887023926,
      "step": 25631
    },
    {
      "epoch": 0.000156439208984375,
      "step": 25631,
      "training_step_time": 0.4411165714263916
    },
    {
      "epoch": 0.0001564453125,
      "model_forward_time": 0.1152801513671875,
      "step": 25632
    },
    {
      "epoch": 0.0001564453125,
      "step": 25632,
      "training_step_time": 0.39661669731140137
    },
    {
      "epoch": 0.000156451416015625,
      "model_forward_time": 0.11475515365600586,
      "step": 25633
    },
    {
      "epoch": 0.000156451416015625,
      "step": 25633,
      "training_step_time": 0.4019196033477783
    },
    {
      "epoch": 0.00015645751953125,
      "model_forward_time": 0.11536860466003418,
      "step": 25634
    },
    {
      "epoch": 0.00015645751953125,
      "step": 25634,
      "training_step_time": 0.4847230911254883
    },
    {
      "epoch": 0.000156463623046875,
      "model_forward_time": 0.11533260345458984,
      "step": 25635
    },
    {
      "epoch": 0.000156463623046875,
      "step": 25635,
      "training_step_time": 0.5013480186462402
    },
    {
      "epoch": 0.0001564697265625,
      "model_forward_time": 0.1150813102722168,
      "step": 25636
    },
    {
      "epoch": 0.0001564697265625,
      "step": 25636,
      "training_step_time": 0.38747477531433105
    },
    {
      "epoch": 0.000156475830078125,
      "model_forward_time": 0.11492276191711426,
      "step": 25637
    },
    {
      "epoch": 0.000156475830078125,
      "step": 25637,
      "training_step_time": 0.389789342880249
    },
    {
      "epoch": 0.00015648193359375,
      "model_forward_time": 0.11467385292053223,
      "step": 25638
    },
    {
      "epoch": 0.00015648193359375,
      "step": 25638,
      "training_step_time": 0.4086136817932129
    },
    {
      "epoch": 0.000156488037109375,
      "model_forward_time": 0.11437201499938965,
      "step": 25639
    },
    {
      "epoch": 0.000156488037109375,
      "step": 25639,
      "training_step_time": 0.39728856086730957
    },
    {
      "epoch": 0.000156494140625,
      "grad_norm": 0.11534415185451508,
      "learning_rate": 6.586958837703759e-05,
      "loss": 0.0464,
      "step": 25640
    },
    {
      "epoch": 0.000156494140625,
      "model_forward_time": 0.11508560180664062,
      "step": 25640
    },
    {
      "epoch": 0.000156494140625,
      "step": 25640,
      "training_step_time": 0.39232897758483887
    },
    {
      "epoch": 0.000156500244140625,
      "model_forward_time": 0.11563348770141602,
      "step": 25641
    },
    {
      "epoch": 0.000156500244140625,
      "step": 25641,
      "training_step_time": 0.3812854290008545
    },
    {
      "epoch": 0.00015650634765625,
      "model_forward_time": 0.11506009101867676,
      "step": 25642
    },
    {
      "epoch": 0.00015650634765625,
      "step": 25642,
      "training_step_time": 0.3906569480895996
    },
    {
      "epoch": 0.000156512451171875,
      "model_forward_time": 0.1156156063079834,
      "step": 25643
    },
    {
      "epoch": 0.000156512451171875,
      "step": 25643,
      "training_step_time": 0.40519094467163086
    },
    {
      "epoch": 0.0001565185546875,
      "model_forward_time": 0.11534953117370605,
      "step": 25644
    },
    {
      "epoch": 0.0001565185546875,
      "step": 25644,
      "training_step_time": 0.4706540107727051
    },
    {
      "epoch": 0.000156524658203125,
      "model_forward_time": 0.11529970169067383,
      "step": 25645
    },
    {
      "epoch": 0.000156524658203125,
      "step": 25645,
      "training_step_time": 0.40347957611083984
    },
    {
      "epoch": 0.00015653076171875,
      "model_forward_time": 0.11720919609069824,
      "step": 25646
    },
    {
      "epoch": 0.00015653076171875,
      "step": 25646,
      "training_step_time": 0.4431498050689697
    },
    {
      "epoch": 0.000156536865234375,
      "model_forward_time": 0.11528348922729492,
      "step": 25647
    },
    {
      "epoch": 0.000156536865234375,
      "step": 25647,
      "training_step_time": 0.38179564476013184
    },
    {
      "epoch": 0.00015654296875,
      "model_forward_time": 0.11523962020874023,
      "step": 25648
    },
    {
      "epoch": 0.00015654296875,
      "step": 25648,
      "training_step_time": 0.40204524993896484
    },
    {
      "epoch": 0.000156549072265625,
      "model_forward_time": 0.11619257926940918,
      "step": 25649
    },
    {
      "epoch": 0.000156549072265625,
      "step": 25649,
      "training_step_time": 0.4254436492919922
    },
    {
      "epoch": 0.00015655517578125,
      "grad_norm": 0.11186867207288742,
      "learning_rate": 6.584345302897523e-05,
      "loss": 0.0455,
      "step": 25650
    },
    {
      "epoch": 0.00015655517578125,
      "model_forward_time": 0.1165168285369873,
      "step": 25650
    },
    {
      "epoch": 0.00015655517578125,
      "step": 25650,
      "training_step_time": 0.43083858489990234
    },
    {
      "epoch": 0.000156561279296875,
      "model_forward_time": 0.11577582359313965,
      "step": 25651
    },
    {
      "epoch": 0.000156561279296875,
      "step": 25651,
      "training_step_time": 0.39705371856689453
    },
    {
      "epoch": 0.0001565673828125,
      "model_forward_time": 0.11548948287963867,
      "step": 25652
    },
    {
      "epoch": 0.0001565673828125,
      "step": 25652,
      "training_step_time": 0.39021801948547363
    },
    {
      "epoch": 0.000156573486328125,
      "model_forward_time": 0.11514496803283691,
      "step": 25653
    },
    {
      "epoch": 0.000156573486328125,
      "step": 25653,
      "training_step_time": 0.38110780715942383
    },
    {
      "epoch": 0.00015657958984375,
      "model_forward_time": 0.11535286903381348,
      "step": 25654
    },
    {
      "epoch": 0.00015657958984375,
      "step": 25654,
      "training_step_time": 0.4296598434448242
    },
    {
      "epoch": 0.000156585693359375,
      "model_forward_time": 0.11536026000976562,
      "step": 25655
    },
    {
      "epoch": 0.000156585693359375,
      "step": 25655,
      "training_step_time": 0.399219274520874
    },
    {
      "epoch": 0.000156591796875,
      "model_forward_time": 0.1155083179473877,
      "step": 25656
    },
    {
      "epoch": 0.000156591796875,
      "step": 25656,
      "training_step_time": 0.39747071266174316
    },
    {
      "epoch": 0.000156597900390625,
      "model_forward_time": 0.11504602432250977,
      "step": 25657
    },
    {
      "epoch": 0.000156597900390625,
      "step": 25657,
      "training_step_time": 0.4180724620819092
    },
    {
      "epoch": 0.00015660400390625,
      "model_forward_time": 0.11491227149963379,
      "step": 25658
    },
    {
      "epoch": 0.00015660400390625,
      "step": 25658,
      "training_step_time": 0.4205963611602783
    },
    {
      "epoch": 0.000156610107421875,
      "model_forward_time": 0.11480474472045898,
      "step": 25659
    },
    {
      "epoch": 0.000156610107421875,
      "step": 25659,
      "training_step_time": 0.4689218997955322
    },
    {
      "epoch": 0.0001566162109375,
      "grad_norm": 0.15895555913448334,
      "learning_rate": 6.581731286809014e-05,
      "loss": 0.0476,
      "step": 25660
    },
    {
      "epoch": 0.0001566162109375,
      "model_forward_time": 0.11592936515808105,
      "step": 25660
    },
    {
      "epoch": 0.0001566162109375,
      "step": 25660,
      "training_step_time": 0.48587751388549805
    },
    {
      "epoch": 0.000156622314453125,
      "model_forward_time": 0.11492180824279785,
      "step": 25661
    },
    {
      "epoch": 0.000156622314453125,
      "step": 25661,
      "training_step_time": 0.39754557609558105
    },
    {
      "epoch": 0.00015662841796875,
      "model_forward_time": 0.115264892578125,
      "step": 25662
    },
    {
      "epoch": 0.00015662841796875,
      "step": 25662,
      "training_step_time": 0.39286184310913086
    },
    {
      "epoch": 0.000156634521484375,
      "model_forward_time": 0.1151421070098877,
      "step": 25663
    },
    {
      "epoch": 0.000156634521484375,
      "step": 25663,
      "training_step_time": 0.3942282199859619
    },
    {
      "epoch": 0.000156640625,
      "model_forward_time": 0.11543107032775879,
      "step": 25664
    },
    {
      "epoch": 0.000156640625,
      "step": 25664,
      "training_step_time": 0.4931526184082031
    },
    {
      "epoch": 0.000156646728515625,
      "model_forward_time": 0.11612462997436523,
      "step": 25665
    },
    {
      "epoch": 0.000156646728515625,
      "step": 25665,
      "training_step_time": 0.4919121265411377
    },
    {
      "epoch": 0.00015665283203125,
      "model_forward_time": 0.11615228652954102,
      "step": 25666
    },
    {
      "epoch": 0.00015665283203125,
      "step": 25666,
      "training_step_time": 0.4129178524017334
    },
    {
      "epoch": 0.000156658935546875,
      "model_forward_time": 0.11564254760742188,
      "step": 25667
    },
    {
      "epoch": 0.000156658935546875,
      "step": 25667,
      "training_step_time": 0.40315818786621094
    },
    {
      "epoch": 0.0001566650390625,
      "model_forward_time": 0.11562561988830566,
      "step": 25668
    },
    {
      "epoch": 0.0001566650390625,
      "step": 25668,
      "training_step_time": 0.37937426567077637
    },
    {
      "epoch": 0.000156671142578125,
      "model_forward_time": 0.11543774604797363,
      "step": 25669
    },
    {
      "epoch": 0.000156671142578125,
      "step": 25669,
      "training_step_time": 0.3776414394378662
    },
    {
      "epoch": 0.00015667724609375,
      "grad_norm": 0.1359298974275589,
      "learning_rate": 6.579116790232305e-05,
      "loss": 0.044,
      "step": 25670
    },
    {
      "epoch": 0.00015667724609375,
      "model_forward_time": 0.1148688793182373,
      "step": 25670
    },
    {
      "epoch": 0.00015667724609375,
      "step": 25670,
      "training_step_time": 0.43360233306884766
    },
    {
      "epoch": 0.000156683349609375,
      "model_forward_time": 0.11498808860778809,
      "step": 25671
    },
    {
      "epoch": 0.000156683349609375,
      "step": 25671,
      "training_step_time": 0.3819248676300049
    },
    {
      "epoch": 0.000156689453125,
      "model_forward_time": 0.1151726245880127,
      "step": 25672
    },
    {
      "epoch": 0.000156689453125,
      "step": 25672,
      "training_step_time": 0.49454617500305176
    },
    {
      "epoch": 0.000156695556640625,
      "model_forward_time": 0.11448192596435547,
      "step": 25673
    },
    {
      "epoch": 0.000156695556640625,
      "step": 25673,
      "training_step_time": 0.4632527828216553
    },
    {
      "epoch": 0.00015670166015625,
      "model_forward_time": 0.11507439613342285,
      "step": 25674
    },
    {
      "epoch": 0.00015670166015625,
      "step": 25674,
      "training_step_time": 0.48904943466186523
    },
    {
      "epoch": 0.000156707763671875,
      "model_forward_time": 0.11464500427246094,
      "step": 25675
    },
    {
      "epoch": 0.000156707763671875,
      "step": 25675,
      "training_step_time": 0.4076404571533203
    },
    {
      "epoch": 0.0001567138671875,
      "model_forward_time": 0.11481094360351562,
      "step": 25676
    },
    {
      "epoch": 0.0001567138671875,
      "step": 25676,
      "training_step_time": 0.3849914073944092
    },
    {
      "epoch": 0.000156719970703125,
      "model_forward_time": 0.11449790000915527,
      "step": 25677
    },
    {
      "epoch": 0.000156719970703125,
      "step": 25677,
      "training_step_time": 0.39032793045043945
    },
    {
      "epoch": 0.00015672607421875,
      "model_forward_time": 0.12136983871459961,
      "step": 25678
    },
    {
      "epoch": 0.00015672607421875,
      "step": 25678,
      "training_step_time": 0.4750247001647949
    },
    {
      "epoch": 0.000156732177734375,
      "model_forward_time": 0.11800408363342285,
      "step": 25679
    },
    {
      "epoch": 0.000156732177734375,
      "step": 25679,
      "training_step_time": 0.4533042907714844
    },
    {
      "epoch": 0.00015673828125,
      "grad_norm": 0.12128701061010361,
      "learning_rate": 6.576501813961609e-05,
      "loss": 0.0463,
      "step": 25680
    },
    {
      "epoch": 0.00015673828125,
      "model_forward_time": 0.11892819404602051,
      "step": 25680
    },
    {
      "epoch": 0.00015673828125,
      "step": 25680,
      "training_step_time": 0.38771891593933105
    },
    {
      "epoch": 0.000156744384765625,
      "model_forward_time": 0.11806416511535645,
      "step": 25681
    },
    {
      "epoch": 0.000156744384765625,
      "step": 25681,
      "training_step_time": 0.3845973014831543
    },
    {
      "epoch": 0.00015675048828125,
      "model_forward_time": 0.12208271026611328,
      "step": 25682
    },
    {
      "epoch": 0.00015675048828125,
      "step": 25682,
      "training_step_time": 0.4112393856048584
    },
    {
      "epoch": 0.000156756591796875,
      "model_forward_time": 0.11986255645751953,
      "step": 25683
    },
    {
      "epoch": 0.000156756591796875,
      "step": 25683,
      "training_step_time": 0.3808426856994629
    },
    {
      "epoch": 0.0001567626953125,
      "model_forward_time": 0.1183629035949707,
      "step": 25684
    },
    {
      "epoch": 0.0001567626953125,
      "step": 25684,
      "training_step_time": 0.4551410675048828
    },
    {
      "epoch": 0.000156768798828125,
      "model_forward_time": 0.11795544624328613,
      "step": 25685
    },
    {
      "epoch": 0.000156768798828125,
      "step": 25685,
      "training_step_time": 0.38764262199401855
    },
    {
      "epoch": 0.00015677490234375,
      "model_forward_time": 0.1188654899597168,
      "step": 25686
    },
    {
      "epoch": 0.00015677490234375,
      "step": 25686,
      "training_step_time": 0.4463198184967041
    },
    {
      "epoch": 0.000156781005859375,
      "model_forward_time": 0.12043595314025879,
      "step": 25687
    },
    {
      "epoch": 0.000156781005859375,
      "step": 25687,
      "training_step_time": 0.4744722843170166
    },
    {
      "epoch": 0.000156787109375,
      "model_forward_time": 0.11568355560302734,
      "step": 25688
    },
    {
      "epoch": 0.000156787109375,
      "step": 25688,
      "training_step_time": 0.4146087169647217
    },
    {
      "epoch": 0.000156793212890625,
      "model_forward_time": 0.11557579040527344,
      "step": 25689
    },
    {
      "epoch": 0.000156793212890625,
      "step": 25689,
      "training_step_time": 0.38780808448791504
    },
    {
      "epoch": 0.00015679931640625,
      "grad_norm": 0.1553526073694229,
      "learning_rate": 6.573886358791285e-05,
      "loss": 0.0483,
      "step": 25690
    },
    {
      "epoch": 0.00015679931640625,
      "model_forward_time": 0.11550378799438477,
      "step": 25690
    },
    {
      "epoch": 0.00015679931640625,
      "step": 25690,
      "training_step_time": 0.3862462043762207
    },
    {
      "epoch": 0.000156805419921875,
      "model_forward_time": 0.11542129516601562,
      "step": 25691
    },
    {
      "epoch": 0.000156805419921875,
      "step": 25691,
      "training_step_time": 0.39020776748657227
    },
    {
      "epoch": 0.0001568115234375,
      "model_forward_time": 0.11634230613708496,
      "step": 25692
    },
    {
      "epoch": 0.0001568115234375,
      "step": 25692,
      "training_step_time": 0.5211589336395264
    },
    {
      "epoch": 0.000156817626953125,
      "model_forward_time": 0.11529779434204102,
      "step": 25693
    },
    {
      "epoch": 0.000156817626953125,
      "step": 25693,
      "training_step_time": 0.46681880950927734
    },
    {
      "epoch": 0.00015682373046875,
      "model_forward_time": 0.11580276489257812,
      "step": 25694
    },
    {
      "epoch": 0.00015682373046875,
      "step": 25694,
      "training_step_time": 0.41318678855895996
    },
    {
      "epoch": 0.000156829833984375,
      "model_forward_time": 0.1148366928100586,
      "step": 25695
    },
    {
      "epoch": 0.000156829833984375,
      "step": 25695,
      "training_step_time": 0.39336705207824707
    },
    {
      "epoch": 0.0001568359375,
      "model_forward_time": 0.11496925354003906,
      "step": 25696
    },
    {
      "epoch": 0.0001568359375,
      "step": 25696,
      "training_step_time": 0.3941333293914795
    },
    {
      "epoch": 0.000156842041015625,
      "model_forward_time": 0.11586880683898926,
      "step": 25697
    },
    {
      "epoch": 0.000156842041015625,
      "step": 25697,
      "training_step_time": 0.417834997177124
    },
    {
      "epoch": 0.00015684814453125,
      "model_forward_time": 0.11494636535644531,
      "step": 25698
    },
    {
      "epoch": 0.00015684814453125,
      "step": 25698,
      "training_step_time": 0.4540431499481201
    },
    {
      "epoch": 0.000156854248046875,
      "model_forward_time": 0.1155095100402832,
      "step": 25699
    },
    {
      "epoch": 0.000156854248046875,
      "step": 25699,
      "training_step_time": 0.40035533905029297
    },
    {
      "epoch": 0.0001568603515625,
      "grad_norm": 0.14603620767593384,
      "learning_rate": 6.571270425515843e-05,
      "loss": 0.0498,
      "step": 25700
    },
    {
      "epoch": 0.0001568603515625,
      "model_forward_time": 0.11542916297912598,
      "step": 25700
    },
    {
      "epoch": 0.0001568603515625,
      "step": 25700,
      "training_step_time": 0.40752267837524414
    },
    {
      "epoch": 0.000156866455078125,
      "model_forward_time": 0.11574268341064453,
      "step": 25701
    },
    {
      "epoch": 0.000156866455078125,
      "step": 25701,
      "training_step_time": 0.4628114700317383
    },
    {
      "epoch": 0.00015687255859375,
      "model_forward_time": 0.11543154716491699,
      "step": 25702
    },
    {
      "epoch": 0.00015687255859375,
      "step": 25702,
      "training_step_time": 0.44473862648010254
    },
    {
      "epoch": 0.000156878662109375,
      "model_forward_time": 0.11620783805847168,
      "step": 25703
    },
    {
      "epoch": 0.000156878662109375,
      "step": 25703,
      "training_step_time": 0.48870372772216797
    },
    {
      "epoch": 0.000156884765625,
      "model_forward_time": 0.11531543731689453,
      "step": 25704
    },
    {
      "epoch": 0.000156884765625,
      "step": 25704,
      "training_step_time": 0.4861578941345215
    },
    {
      "epoch": 0.000156890869140625,
      "model_forward_time": 0.1154024600982666,
      "step": 25705
    },
    {
      "epoch": 0.000156890869140625,
      "step": 25705,
      "training_step_time": 0.3923366069793701
    },
    {
      "epoch": 0.00015689697265625,
      "model_forward_time": 0.11497950553894043,
      "step": 25706
    },
    {
      "epoch": 0.00015689697265625,
      "step": 25706,
      "training_step_time": 0.3633723258972168
    },
    {
      "epoch": 0.000156903076171875,
      "model_forward_time": 0.11526226997375488,
      "step": 25707
    },
    {
      "epoch": 0.000156903076171875,
      "step": 25707,
      "training_step_time": 0.4543642997741699
    },
    {
      "epoch": 0.0001569091796875,
      "model_forward_time": 0.11956000328063965,
      "step": 25708
    },
    {
      "epoch": 0.0001569091796875,
      "step": 25708,
      "training_step_time": 0.427476167678833
    },
    {
      "epoch": 0.000156915283203125,
      "model_forward_time": 0.11801338195800781,
      "step": 25709
    },
    {
      "epoch": 0.000156915283203125,
      "step": 25709,
      "training_step_time": 0.3774142265319824
    },
    {
      "epoch": 0.00015692138671875,
      "grad_norm": 0.1699858456850052,
      "learning_rate": 6.568654014929932e-05,
      "loss": 0.0463,
      "step": 25710
    },
    {
      "epoch": 0.00015692138671875,
      "model_forward_time": 0.11768364906311035,
      "step": 25710
    },
    {
      "epoch": 0.00015692138671875,
      "step": 25710,
      "training_step_time": 0.5510311126708984
    },
    {
      "epoch": 0.000156927490234375,
      "model_forward_time": 0.11825990676879883,
      "step": 25711
    },
    {
      "epoch": 0.000156927490234375,
      "step": 25711,
      "training_step_time": 0.3826019763946533
    },
    {
      "epoch": 0.00015693359375,
      "model_forward_time": 0.11857914924621582,
      "step": 25712
    },
    {
      "epoch": 0.00015693359375,
      "step": 25712,
      "training_step_time": 0.38756513595581055
    },
    {
      "epoch": 0.000156939697265625,
      "model_forward_time": 0.13746881484985352,
      "step": 25713
    },
    {
      "epoch": 0.000156939697265625,
      "step": 25713,
      "training_step_time": 0.39995574951171875
    },
    {
      "epoch": 0.00015694580078125,
      "model_forward_time": 0.11745333671569824,
      "step": 25714
    },
    {
      "epoch": 0.00015694580078125,
      "step": 25714,
      "training_step_time": 0.3806278705596924
    },
    {
      "epoch": 0.000156951904296875,
      "model_forward_time": 0.11761903762817383,
      "step": 25715
    },
    {
      "epoch": 0.000156951904296875,
      "step": 25715,
      "training_step_time": 0.44094109535217285
    },
    {
      "epoch": 0.0001569580078125,
      "model_forward_time": 0.1160287857055664,
      "step": 25716
    },
    {
      "epoch": 0.0001569580078125,
      "step": 25716,
      "training_step_time": 0.5821781158447266
    },
    {
      "epoch": 0.000156964111328125,
      "model_forward_time": 0.11555314064025879,
      "step": 25717
    },
    {
      "epoch": 0.000156964111328125,
      "step": 25717,
      "training_step_time": 0.4114077091217041
    },
    {
      "epoch": 0.00015697021484375,
      "model_forward_time": 0.11539793014526367,
      "step": 25718
    },
    {
      "epoch": 0.00015697021484375,
      "step": 25718,
      "training_step_time": 0.3912637233734131
    },
    {
      "epoch": 0.000156976318359375,
      "model_forward_time": 0.11563253402709961,
      "step": 25719
    },
    {
      "epoch": 0.000156976318359375,
      "step": 25719,
      "training_step_time": 0.38729214668273926
    },
    {
      "epoch": 0.000156982421875,
      "grad_norm": 0.126305490732193,
      "learning_rate": 6.56603712782835e-05,
      "loss": 0.0481,
      "step": 25720
    },
    {
      "epoch": 0.000156982421875,
      "model_forward_time": 0.1149899959564209,
      "step": 25720
    },
    {
      "epoch": 0.000156982421875,
      "step": 25720,
      "training_step_time": 0.3674643039703369
    },
    {
      "epoch": 0.000156988525390625,
      "model_forward_time": 0.11520195007324219,
      "step": 25721
    },
    {
      "epoch": 0.000156988525390625,
      "step": 25721,
      "training_step_time": 0.4381899833679199
    },
    {
      "epoch": 0.00015699462890625,
      "model_forward_time": 0.11552000045776367,
      "step": 25722
    },
    {
      "epoch": 0.00015699462890625,
      "step": 25722,
      "training_step_time": 0.4028351306915283
    },
    {
      "epoch": 0.000157000732421875,
      "model_forward_time": 0.11531281471252441,
      "step": 25723
    },
    {
      "epoch": 0.000157000732421875,
      "step": 25723,
      "training_step_time": 0.41832852363586426
    },
    {
      "epoch": 0.0001570068359375,
      "model_forward_time": 0.11489129066467285,
      "step": 25724
    },
    {
      "epoch": 0.0001570068359375,
      "step": 25724,
      "training_step_time": 0.4017221927642822
    },
    {
      "epoch": 0.000157012939453125,
      "model_forward_time": 0.11654019355773926,
      "step": 25725
    },
    {
      "epoch": 0.000157012939453125,
      "step": 25725,
      "training_step_time": 0.3847229480743408
    },
    {
      "epoch": 0.00015701904296875,
      "model_forward_time": 0.11574602127075195,
      "step": 25726
    },
    {
      "epoch": 0.00015701904296875,
      "step": 25726,
      "training_step_time": 0.391176700592041
    },
    {
      "epoch": 0.000157025146484375,
      "model_forward_time": 0.11531519889831543,
      "step": 25727
    },
    {
      "epoch": 0.000157025146484375,
      "step": 25727,
      "training_step_time": 0.3909738063812256
    },
    {
      "epoch": 0.00015703125,
      "model_forward_time": 0.11604118347167969,
      "step": 25728
    },
    {
      "epoch": 0.00015703125,
      "step": 25728,
      "training_step_time": 0.6526060104370117
    },
    {
      "epoch": 0.000157037353515625,
      "model_forward_time": 0.11589956283569336,
      "step": 25729
    },
    {
      "epoch": 0.000157037353515625,
      "step": 25729,
      "training_step_time": 0.4416835308074951
    },
    {
      "epoch": 0.00015704345703125,
      "grad_norm": 0.19705000519752502,
      "learning_rate": 6.563419765006038e-05,
      "loss": 0.0483,
      "step": 25730
    },
    {
      "epoch": 0.00015704345703125,
      "model_forward_time": 0.11550688743591309,
      "step": 25730
    },
    {
      "epoch": 0.00015704345703125,
      "step": 25730,
      "training_step_time": 0.42405080795288086
    },
    {
      "epoch": 0.000157049560546875,
      "model_forward_time": 0.1154012680053711,
      "step": 25731
    },
    {
      "epoch": 0.000157049560546875,
      "step": 25731,
      "training_step_time": 0.4870917797088623
    },
    {
      "epoch": 0.0001570556640625,
      "model_forward_time": 0.11487483978271484,
      "step": 25732
    },
    {
      "epoch": 0.0001570556640625,
      "step": 25732,
      "training_step_time": 0.3889470100402832
    },
    {
      "epoch": 0.000157061767578125,
      "model_forward_time": 0.11473393440246582,
      "step": 25733
    },
    {
      "epoch": 0.000157061767578125,
      "step": 25733,
      "training_step_time": 0.3875858783721924
    },
    {
      "epoch": 0.00015706787109375,
      "model_forward_time": 0.11524224281311035,
      "step": 25734
    },
    {
      "epoch": 0.00015706787109375,
      "step": 25734,
      "training_step_time": 0.5100505352020264
    },
    {
      "epoch": 0.000157073974609375,
      "model_forward_time": 0.1157996654510498,
      "step": 25735
    },
    {
      "epoch": 0.000157073974609375,
      "step": 25735,
      "training_step_time": 0.4744081497192383
    },
    {
      "epoch": 0.000157080078125,
      "model_forward_time": 0.11487340927124023,
      "step": 25736
    },
    {
      "epoch": 0.000157080078125,
      "step": 25736,
      "training_step_time": 0.41223907470703125
    },
    {
      "epoch": 0.000157086181640625,
      "model_forward_time": 0.11520814895629883,
      "step": 25737
    },
    {
      "epoch": 0.000157086181640625,
      "step": 25737,
      "training_step_time": 0.3987455368041992
    },
    {
      "epoch": 0.00015709228515625,
      "model_forward_time": 0.11551237106323242,
      "step": 25738
    },
    {
      "epoch": 0.00015709228515625,
      "step": 25738,
      "training_step_time": 0.38494110107421875
    },
    {
      "epoch": 0.000157098388671875,
      "model_forward_time": 0.11688089370727539,
      "step": 25739
    },
    {
      "epoch": 0.000157098388671875,
      "step": 25739,
      "training_step_time": 0.39479565620422363
    },
    {
      "epoch": 0.0001571044921875,
      "grad_norm": 0.13347147405147552,
      "learning_rate": 6.56080192725808e-05,
      "loss": 0.047,
      "step": 25740
    },
    {
      "epoch": 0.0001571044921875,
      "model_forward_time": 0.11516928672790527,
      "step": 25740
    },
    {
      "epoch": 0.0001571044921875,
      "step": 25740,
      "training_step_time": 0.5169498920440674
    },
    {
      "epoch": 0.000157110595703125,
      "model_forward_time": 0.11559748649597168,
      "step": 25741
    },
    {
      "epoch": 0.000157110595703125,
      "step": 25741,
      "training_step_time": 0.4047200679779053
    },
    {
      "epoch": 0.00015711669921875,
      "model_forward_time": 0.11522722244262695,
      "step": 25742
    },
    {
      "epoch": 0.00015711669921875,
      "step": 25742,
      "training_step_time": 0.3945789337158203
    },
    {
      "epoch": 0.000157122802734375,
      "model_forward_time": 0.11599159240722656,
      "step": 25743
    },
    {
      "epoch": 0.000157122802734375,
      "step": 25743,
      "training_step_time": 0.4569542407989502
    },
    {
      "epoch": 0.00015712890625,
      "model_forward_time": 0.11688899993896484,
      "step": 25744
    },
    {
      "epoch": 0.00015712890625,
      "step": 25744,
      "training_step_time": 0.453671932220459
    },
    {
      "epoch": 0.000157135009765625,
      "model_forward_time": 0.11521577835083008,
      "step": 25745
    },
    {
      "epoch": 0.000157135009765625,
      "step": 25745,
      "training_step_time": 0.42542099952697754
    },
    {
      "epoch": 0.00015714111328125,
      "model_forward_time": 0.1160438060760498,
      "step": 25746
    },
    {
      "epoch": 0.00015714111328125,
      "step": 25746,
      "training_step_time": 0.5216398239135742
    },
    {
      "epoch": 0.000157147216796875,
      "model_forward_time": 0.11524772644042969,
      "step": 25747
    },
    {
      "epoch": 0.000157147216796875,
      "step": 25747,
      "training_step_time": 0.38765716552734375
    },
    {
      "epoch": 0.0001571533203125,
      "model_forward_time": 0.1165013313293457,
      "step": 25748
    },
    {
      "epoch": 0.0001571533203125,
      "step": 25748,
      "training_step_time": 0.3895573616027832
    },
    {
      "epoch": 0.000157159423828125,
      "model_forward_time": 0.11558008193969727,
      "step": 25749
    },
    {
      "epoch": 0.000157159423828125,
      "step": 25749,
      "training_step_time": 0.4292576313018799
    },
    {
      "epoch": 0.00015716552734375,
      "grad_norm": 0.17686133086681366,
      "learning_rate": 6.558183615379707e-05,
      "loss": 0.0426,
      "step": 25750
    },
    {
      "epoch": 0.00015716552734375,
      "model_forward_time": 0.11614990234375,
      "step": 25750
    },
    {
      "epoch": 0.00015716552734375,
      "step": 25750,
      "training_step_time": 0.48693108558654785
    },
    {
      "epoch": 0.000157171630859375,
      "model_forward_time": 0.11511945724487305,
      "step": 25751
    },
    {
      "epoch": 0.000157171630859375,
      "step": 25751,
      "training_step_time": 0.4016909599304199
    },
    {
      "epoch": 0.000157177734375,
      "model_forward_time": 0.11614298820495605,
      "step": 25752
    },
    {
      "epoch": 0.000157177734375,
      "step": 25752,
      "training_step_time": 0.555729866027832
    },
    {
      "epoch": 0.000157183837890625,
      "model_forward_time": 0.11524748802185059,
      "step": 25753
    },
    {
      "epoch": 0.000157183837890625,
      "step": 25753,
      "training_step_time": 0.396087646484375
    },
    {
      "epoch": 0.00015718994140625,
      "model_forward_time": 0.11542487144470215,
      "step": 25754
    },
    {
      "epoch": 0.00015718994140625,
      "step": 25754,
      "training_step_time": 0.38882923126220703
    },
    {
      "epoch": 0.000157196044921875,
      "model_forward_time": 0.11518287658691406,
      "step": 25755
    },
    {
      "epoch": 0.000157196044921875,
      "step": 25755,
      "training_step_time": 0.37976765632629395
    },
    {
      "epoch": 0.0001572021484375,
      "model_forward_time": 0.11614441871643066,
      "step": 25756
    },
    {
      "epoch": 0.0001572021484375,
      "step": 25756,
      "training_step_time": 0.3903810977935791
    },
    {
      "epoch": 0.000157208251953125,
      "model_forward_time": 0.1155540943145752,
      "step": 25757
    },
    {
      "epoch": 0.000157208251953125,
      "step": 25757,
      "training_step_time": 0.5065991878509521
    },
    {
      "epoch": 0.00015721435546875,
      "model_forward_time": 0.1154630184173584,
      "step": 25758
    },
    {
      "epoch": 0.00015721435546875,
      "step": 25758,
      "training_step_time": 0.6891038417816162
    },
    {
      "epoch": 0.000157220458984375,
      "model_forward_time": 0.1153256893157959,
      "step": 25759
    },
    {
      "epoch": 0.000157220458984375,
      "step": 25759,
      "training_step_time": 0.43029022216796875
    },
    {
      "epoch": 0.0001572265625,
      "grad_norm": 0.14953824877738953,
      "learning_rate": 6.555564830166293e-05,
      "loss": 0.0467,
      "step": 25760
    },
    {
      "epoch": 0.0001572265625,
      "model_forward_time": 0.11468815803527832,
      "step": 25760
    },
    {
      "epoch": 0.0001572265625,
      "step": 25760,
      "training_step_time": 0.3840494155883789
    },
    {
      "epoch": 0.000157232666015625,
      "model_forward_time": 0.11454463005065918,
      "step": 25761
    },
    {
      "epoch": 0.000157232666015625,
      "step": 25761,
      "training_step_time": 0.38574695587158203
    },
    {
      "epoch": 0.00015723876953125,
      "model_forward_time": 0.11657452583312988,
      "step": 25762
    },
    {
      "epoch": 0.00015723876953125,
      "step": 25762,
      "training_step_time": 0.38776540756225586
    },
    {
      "epoch": 0.000157244873046875,
      "model_forward_time": 0.1160118579864502,
      "step": 25763
    },
    {
      "epoch": 0.000157244873046875,
      "step": 25763,
      "training_step_time": 0.4251589775085449
    },
    {
      "epoch": 0.0001572509765625,
      "model_forward_time": 0.11780309677124023,
      "step": 25764
    },
    {
      "epoch": 0.0001572509765625,
      "step": 25764,
      "training_step_time": 0.5071673393249512
    },
    {
      "epoch": 0.000157257080078125,
      "model_forward_time": 0.11549925804138184,
      "step": 25765
    },
    {
      "epoch": 0.000157257080078125,
      "step": 25765,
      "training_step_time": 0.3896760940551758
    },
    {
      "epoch": 0.00015726318359375,
      "model_forward_time": 0.11605381965637207,
      "step": 25766
    },
    {
      "epoch": 0.00015726318359375,
      "step": 25766,
      "training_step_time": 0.37853288650512695
    },
    {
      "epoch": 0.000157269287109375,
      "model_forward_time": 0.11532402038574219,
      "step": 25767
    },
    {
      "epoch": 0.000157269287109375,
      "step": 25767,
      "training_step_time": 0.3968071937561035
    },
    {
      "epoch": 0.000157275390625,
      "model_forward_time": 0.11623549461364746,
      "step": 25768
    },
    {
      "epoch": 0.000157275390625,
      "step": 25768,
      "training_step_time": 0.39490771293640137
    },
    {
      "epoch": 0.000157281494140625,
      "model_forward_time": 0.11585092544555664,
      "step": 25769
    },
    {
      "epoch": 0.000157281494140625,
      "step": 25769,
      "training_step_time": 0.3949122428894043
    },
    {
      "epoch": 0.00015728759765625,
      "grad_norm": 0.1758849024772644,
      "learning_rate": 6.552945572413358e-05,
      "loss": 0.0465,
      "step": 25770
    },
    {
      "epoch": 0.00015728759765625,
      "model_forward_time": 0.11584210395812988,
      "step": 25770
    },
    {
      "epoch": 0.00015728759765625,
      "step": 25770,
      "training_step_time": 0.6008284091949463
    },
    {
      "epoch": 0.000157293701171875,
      "model_forward_time": 0.1151285171508789,
      "step": 25771
    },
    {
      "epoch": 0.000157293701171875,
      "step": 25771,
      "training_step_time": 0.4376070499420166
    },
    {
      "epoch": 0.0001572998046875,
      "model_forward_time": 0.11496329307556152,
      "step": 25772
    },
    {
      "epoch": 0.0001572998046875,
      "step": 25772,
      "training_step_time": 0.45679306983947754
    },
    {
      "epoch": 0.000157305908203125,
      "model_forward_time": 0.115447998046875,
      "step": 25773
    },
    {
      "epoch": 0.000157305908203125,
      "step": 25773,
      "training_step_time": 0.43210840225219727
    },
    {
      "epoch": 0.00015731201171875,
      "model_forward_time": 0.11552190780639648,
      "step": 25774
    },
    {
      "epoch": 0.00015731201171875,
      "step": 25774,
      "training_step_time": 0.39151835441589355
    },
    {
      "epoch": 0.000157318115234375,
      "model_forward_time": 0.11524009704589844,
      "step": 25775
    },
    {
      "epoch": 0.000157318115234375,
      "step": 25775,
      "training_step_time": 0.3892223834991455
    },
    {
      "epoch": 0.00015732421875,
      "model_forward_time": 0.11536574363708496,
      "step": 25776
    },
    {
      "epoch": 0.00015732421875,
      "step": 25776,
      "training_step_time": 0.5125682353973389
    },
    {
      "epoch": 0.000157330322265625,
      "model_forward_time": 0.11556482315063477,
      "step": 25777
    },
    {
      "epoch": 0.000157330322265625,
      "step": 25777,
      "training_step_time": 0.4786090850830078
    },
    {
      "epoch": 0.00015733642578125,
      "model_forward_time": 0.11538839340209961,
      "step": 25778
    },
    {
      "epoch": 0.00015733642578125,
      "step": 25778,
      "training_step_time": 0.41676855087280273
    },
    {
      "epoch": 0.000157342529296875,
      "model_forward_time": 0.11513042449951172,
      "step": 25779
    },
    {
      "epoch": 0.000157342529296875,
      "step": 25779,
      "training_step_time": 0.3856024742126465
    },
    {
      "epoch": 0.0001573486328125,
      "grad_norm": 0.21631555259227753,
      "learning_rate": 6.550325842916559e-05,
      "loss": 0.0532,
      "step": 25780
    },
    {
      "epoch": 0.0001573486328125,
      "model_forward_time": 0.11524534225463867,
      "step": 25780
    },
    {
      "epoch": 0.0001573486328125,
      "step": 25780,
      "training_step_time": 0.39470553398132324
    },
    {
      "epoch": 0.000157354736328125,
      "model_forward_time": 0.1156623363494873,
      "step": 25781
    },
    {
      "epoch": 0.000157354736328125,
      "step": 25781,
      "training_step_time": 0.38738203048706055
    },
    {
      "epoch": 0.00015736083984375,
      "model_forward_time": 0.11634159088134766,
      "step": 25782
    },
    {
      "epoch": 0.00015736083984375,
      "step": 25782,
      "training_step_time": 0.3824007511138916
    },
    {
      "epoch": 0.000157366943359375,
      "model_forward_time": 0.11585569381713867,
      "step": 25783
    },
    {
      "epoch": 0.000157366943359375,
      "step": 25783,
      "training_step_time": 0.40393614768981934
    },
    {
      "epoch": 0.000157373046875,
      "model_forward_time": 0.11615157127380371,
      "step": 25784
    },
    {
      "epoch": 0.000157373046875,
      "step": 25784,
      "training_step_time": 0.3918461799621582
    },
    {
      "epoch": 0.000157379150390625,
      "model_forward_time": 0.1150217056274414,
      "step": 25785
    },
    {
      "epoch": 0.000157379150390625,
      "step": 25785,
      "training_step_time": 0.4049091339111328
    },
    {
      "epoch": 0.00015738525390625,
      "model_forward_time": 0.11573457717895508,
      "step": 25786
    },
    {
      "epoch": 0.00015738525390625,
      "step": 25786,
      "training_step_time": 0.46029019355773926
    },
    {
      "epoch": 0.000157391357421875,
      "model_forward_time": 0.11577367782592773,
      "step": 25787
    },
    {
      "epoch": 0.000157391357421875,
      "step": 25787,
      "training_step_time": 0.4456627368927002
    },
    {
      "epoch": 0.0001573974609375,
      "model_forward_time": 0.11647939682006836,
      "step": 25788
    },
    {
      "epoch": 0.0001573974609375,
      "step": 25788,
      "training_step_time": 0.5956940650939941
    },
    {
      "epoch": 0.000157403564453125,
      "model_forward_time": 0.11518406867980957,
      "step": 25789
    },
    {
      "epoch": 0.000157403564453125,
      "step": 25789,
      "training_step_time": 0.38749003410339355
    },
    {
      "epoch": 0.00015740966796875,
      "grad_norm": 0.15005044639110565,
      "learning_rate": 6.547705642471703e-05,
      "loss": 0.0467,
      "step": 25790
    },
    {
      "epoch": 0.00015740966796875,
      "model_forward_time": 0.11510372161865234,
      "step": 25790
    },
    {
      "epoch": 0.00015740966796875,
      "step": 25790,
      "training_step_time": 0.3755655288696289
    },
    {
      "epoch": 0.000157415771484375,
      "model_forward_time": 0.11484098434448242,
      "step": 25791
    },
    {
      "epoch": 0.000157415771484375,
      "step": 25791,
      "training_step_time": 0.4347848892211914
    },
    {
      "epoch": 0.000157421875,
      "model_forward_time": 0.11522436141967773,
      "step": 25792
    },
    {
      "epoch": 0.000157421875,
      "step": 25792,
      "training_step_time": 0.3879508972167969
    },
    {
      "epoch": 0.000157427978515625,
      "model_forward_time": 0.11506295204162598,
      "step": 25793
    },
    {
      "epoch": 0.000157427978515625,
      "step": 25793,
      "training_step_time": 0.39829063415527344
    },
    {
      "epoch": 0.00015743408203125,
      "model_forward_time": 0.11585354804992676,
      "step": 25794
    },
    {
      "epoch": 0.00015743408203125,
      "step": 25794,
      "training_step_time": 0.602799654006958
    },
    {
      "epoch": 0.000157440185546875,
      "model_forward_time": 0.11547517776489258,
      "step": 25795
    },
    {
      "epoch": 0.000157440185546875,
      "step": 25795,
      "training_step_time": 0.38750576972961426
    },
    {
      "epoch": 0.0001574462890625,
      "model_forward_time": 0.11532044410705566,
      "step": 25796
    },
    {
      "epoch": 0.0001574462890625,
      "step": 25796,
      "training_step_time": 0.38742589950561523
    },
    {
      "epoch": 0.000157452392578125,
      "model_forward_time": 0.1158604621887207,
      "step": 25797
    },
    {
      "epoch": 0.000157452392578125,
      "step": 25797,
      "training_step_time": 0.37811946868896484
    },
    {
      "epoch": 0.00015745849609375,
      "model_forward_time": 0.11541533470153809,
      "step": 25798
    },
    {
      "epoch": 0.00015745849609375,
      "step": 25798,
      "training_step_time": 0.40023207664489746
    },
    {
      "epoch": 0.000157464599609375,
      "model_forward_time": 0.11502456665039062,
      "step": 25799
    },
    {
      "epoch": 0.000157464599609375,
      "step": 25799,
      "training_step_time": 0.4196491241455078
    },
    {
      "epoch": 0.000157470703125,
      "grad_norm": 0.1426912248134613,
      "learning_rate": 6.545084971874738e-05,
      "loss": 0.0452,
      "step": 25800
    },
    {
      "epoch": 0.000157470703125,
      "model_forward_time": 0.1147615909576416,
      "step": 25800
    },
    {
      "epoch": 0.000157470703125,
      "step": 25800,
      "training_step_time": 0.6505146026611328
    },
    {
      "epoch": 0.000157476806640625,
      "model_forward_time": 0.11573481559753418,
      "step": 25801
    },
    {
      "epoch": 0.000157476806640625,
      "step": 25801,
      "training_step_time": 0.4288594722747803
    },
    {
      "epoch": 0.00015748291015625,
      "model_forward_time": 0.11494755744934082,
      "step": 25802
    },
    {
      "epoch": 0.00015748291015625,
      "step": 25802,
      "training_step_time": 0.42594027519226074
    },
    {
      "epoch": 0.000157489013671875,
      "model_forward_time": 0.11663007736206055,
      "step": 25803
    },
    {
      "epoch": 0.000157489013671875,
      "step": 25803,
      "training_step_time": 0.4466221332550049
    },
    {
      "epoch": 0.0001574951171875,
      "model_forward_time": 0.11520934104919434,
      "step": 25804
    },
    {
      "epoch": 0.0001574951171875,
      "step": 25804,
      "training_step_time": 0.4004638195037842
    },
    {
      "epoch": 0.000157501220703125,
      "model_forward_time": 0.1157681941986084,
      "step": 25805
    },
    {
      "epoch": 0.000157501220703125,
      "step": 25805,
      "training_step_time": 0.42896270751953125
    },
    {
      "epoch": 0.00015750732421875,
      "model_forward_time": 0.11555004119873047,
      "step": 25806
    },
    {
      "epoch": 0.00015750732421875,
      "step": 25806,
      "training_step_time": 0.5546231269836426
    },
    {
      "epoch": 0.000157513427734375,
      "model_forward_time": 0.11531662940979004,
      "step": 25807
    },
    {
      "epoch": 0.000157513427734375,
      "step": 25807,
      "training_step_time": 0.38811302185058594
    },
    {
      "epoch": 0.00015751953125,
      "model_forward_time": 0.11521267890930176,
      "step": 25808
    },
    {
      "epoch": 0.00015751953125,
      "step": 25808,
      "training_step_time": 0.3828456401824951
    },
    {
      "epoch": 0.000157525634765625,
      "model_forward_time": 0.11549520492553711,
      "step": 25809
    },
    {
      "epoch": 0.000157525634765625,
      "step": 25809,
      "training_step_time": 0.38418149948120117
    },
    {
      "epoch": 0.00015753173828125,
      "grad_norm": 0.1834901124238968,
      "learning_rate": 6.542463831921754e-05,
      "loss": 0.0446,
      "step": 25810
    },
    {
      "epoch": 0.00015753173828125,
      "model_forward_time": 0.1155703067779541,
      "step": 25810
    },
    {
      "epoch": 0.00015753173828125,
      "step": 25810,
      "training_step_time": 0.38106584548950195
    },
    {
      "epoch": 0.000157537841796875,
      "model_forward_time": 0.11593770980834961,
      "step": 25811
    },
    {
      "epoch": 0.000157537841796875,
      "step": 25811,
      "training_step_time": 0.3806149959564209
    },
    {
      "epoch": 0.0001575439453125,
      "model_forward_time": 0.11572408676147461,
      "step": 25812
    },
    {
      "epoch": 0.0001575439453125,
      "step": 25812,
      "training_step_time": 0.7542622089385986
    },
    {
      "epoch": 0.000157550048828125,
      "model_forward_time": 0.11476874351501465,
      "step": 25813
    },
    {
      "epoch": 0.000157550048828125,
      "step": 25813,
      "training_step_time": 0.4234917163848877
    },
    {
      "epoch": 0.00015755615234375,
      "model_forward_time": 0.1144716739654541,
      "step": 25814
    },
    {
      "epoch": 0.00015755615234375,
      "step": 25814,
      "training_step_time": 0.4327728748321533
    },
    {
      "epoch": 0.000157562255859375,
      "model_forward_time": 0.11603140830993652,
      "step": 25815
    },
    {
      "epoch": 0.000157562255859375,
      "step": 25815,
      "training_step_time": 0.44971537590026855
    },
    {
      "epoch": 0.000157568359375,
      "model_forward_time": 0.11438679695129395,
      "step": 25816
    },
    {
      "epoch": 0.000157568359375,
      "step": 25816,
      "training_step_time": 0.39308595657348633
    },
    {
      "epoch": 0.000157574462890625,
      "model_forward_time": 0.11435270309448242,
      "step": 25817
    },
    {
      "epoch": 0.000157574462890625,
      "step": 25817,
      "training_step_time": 0.39237546920776367
    },
    {
      "epoch": 0.00015758056640625,
      "model_forward_time": 0.11565327644348145,
      "step": 25818
    },
    {
      "epoch": 0.00015758056640625,
      "step": 25818,
      "training_step_time": 0.4215097427368164
    },
    {
      "epoch": 0.000157586669921875,
      "model_forward_time": 0.11546659469604492,
      "step": 25819
    },
    {
      "epoch": 0.000157586669921875,
      "step": 25819,
      "training_step_time": 0.4037652015686035
    },
    {
      "epoch": 0.0001575927734375,
      "grad_norm": 0.1425512284040451,
      "learning_rate": 6.539842223408984e-05,
      "loss": 0.0425,
      "step": 25820
    },
    {
      "epoch": 0.0001575927734375,
      "model_forward_time": 0.12734389305114746,
      "step": 25820
    },
    {
      "epoch": 0.0001575927734375,
      "step": 25820,
      "training_step_time": 0.39519596099853516
    },
    {
      "epoch": 0.000157598876953125,
      "model_forward_time": 0.11570501327514648,
      "step": 25821
    },
    {
      "epoch": 0.000157598876953125,
      "step": 25821,
      "training_step_time": 0.41123008728027344
    },
    {
      "epoch": 0.00015760498046875,
      "model_forward_time": 0.11522889137268066,
      "step": 25822
    },
    {
      "epoch": 0.00015760498046875,
      "step": 25822,
      "training_step_time": 0.38862180709838867
    },
    {
      "epoch": 0.000157611083984375,
      "model_forward_time": 0.1159524917602539,
      "step": 25823
    },
    {
      "epoch": 0.000157611083984375,
      "step": 25823,
      "training_step_time": 0.3907313346862793
    },
    {
      "epoch": 0.0001576171875,
      "model_forward_time": 0.11554455757141113,
      "step": 25824
    },
    {
      "epoch": 0.0001576171875,
      "step": 25824,
      "training_step_time": 0.6812140941619873
    },
    {
      "epoch": 0.000157623291015625,
      "model_forward_time": 0.11511421203613281,
      "step": 25825
    },
    {
      "epoch": 0.000157623291015625,
      "step": 25825,
      "training_step_time": 0.3944263458251953
    },
    {
      "epoch": 0.00015762939453125,
      "model_forward_time": 0.11560416221618652,
      "step": 25826
    },
    {
      "epoch": 0.00015762939453125,
      "step": 25826,
      "training_step_time": 0.39219021797180176
    },
    {
      "epoch": 0.000157635498046875,
      "model_forward_time": 0.11546707153320312,
      "step": 25827
    },
    {
      "epoch": 0.000157635498046875,
      "step": 25827,
      "training_step_time": 0.4775834083557129
    },
    {
      "epoch": 0.0001576416015625,
      "model_forward_time": 0.11465573310852051,
      "step": 25828
    },
    {
      "epoch": 0.0001576416015625,
      "step": 25828,
      "training_step_time": 0.45986413955688477
    },
    {
      "epoch": 0.000157647705078125,
      "model_forward_time": 0.11444473266601562,
      "step": 25829
    },
    {
      "epoch": 0.000157647705078125,
      "step": 25829,
      "training_step_time": 0.4569697380065918
    },
    {
      "epoch": 0.00015765380859375,
      "grad_norm": 0.15933950245380402,
      "learning_rate": 6.537220147132805e-05,
      "loss": 0.0458,
      "step": 25830
    },
    {
      "epoch": 0.00015765380859375,
      "model_forward_time": 0.11514854431152344,
      "step": 25830
    },
    {
      "epoch": 0.00015765380859375,
      "step": 25830,
      "training_step_time": 0.5837588310241699
    },
    {
      "epoch": 0.000157659912109375,
      "model_forward_time": 0.11490082740783691,
      "step": 25831
    },
    {
      "epoch": 0.000157659912109375,
      "step": 25831,
      "training_step_time": 0.37804245948791504
    },
    {
      "epoch": 0.000157666015625,
      "model_forward_time": 0.11522841453552246,
      "step": 25832
    },
    {
      "epoch": 0.000157666015625,
      "step": 25832,
      "training_step_time": 0.47940731048583984
    },
    {
      "epoch": 0.000157672119140625,
      "model_forward_time": 0.11523199081420898,
      "step": 25833
    },
    {
      "epoch": 0.000157672119140625,
      "step": 25833,
      "training_step_time": 0.486102819442749
    },
    {
      "epoch": 0.00015767822265625,
      "model_forward_time": 0.11790060997009277,
      "step": 25834
    },
    {
      "epoch": 0.00015767822265625,
      "step": 25834,
      "training_step_time": 0.37911462783813477
    },
    {
      "epoch": 0.000157684326171875,
      "model_forward_time": 0.1152801513671875,
      "step": 25835
    },
    {
      "epoch": 0.000157684326171875,
      "step": 25835,
      "training_step_time": 0.3932194709777832
    },
    {
      "epoch": 0.0001576904296875,
      "model_forward_time": 0.11503028869628906,
      "step": 25836
    },
    {
      "epoch": 0.0001576904296875,
      "step": 25836,
      "training_step_time": 0.539966344833374
    },
    {
      "epoch": 0.000157696533203125,
      "model_forward_time": 0.11408066749572754,
      "step": 25837
    },
    {
      "epoch": 0.000157696533203125,
      "step": 25837,
      "training_step_time": 0.38889312744140625
    },
    {
      "epoch": 0.00015770263671875,
      "model_forward_time": 0.11572122573852539,
      "step": 25838
    },
    {
      "epoch": 0.00015770263671875,
      "step": 25838,
      "training_step_time": 0.38549137115478516
    },
    {
      "epoch": 0.000157708740234375,
      "model_forward_time": 0.11538910865783691,
      "step": 25839
    },
    {
      "epoch": 0.000157708740234375,
      "step": 25839,
      "training_step_time": 0.38785696029663086
    },
    {
      "epoch": 0.00015771484375,
      "grad_norm": 0.13437187671661377,
      "learning_rate": 6.534597603889732e-05,
      "loss": 0.0493,
      "step": 25840
    },
    {
      "epoch": 0.00015771484375,
      "model_forward_time": 0.11467814445495605,
      "step": 25840
    },
    {
      "epoch": 0.00015771484375,
      "step": 25840,
      "training_step_time": 0.3897578716278076
    },
    {
      "epoch": 0.000157720947265625,
      "model_forward_time": 0.11543989181518555,
      "step": 25841
    },
    {
      "epoch": 0.000157720947265625,
      "step": 25841,
      "training_step_time": 0.4116332530975342
    },
    {
      "epoch": 0.00015772705078125,
      "model_forward_time": 0.11520576477050781,
      "step": 25842
    },
    {
      "epoch": 0.00015772705078125,
      "step": 25842,
      "training_step_time": 0.6728124618530273
    },
    {
      "epoch": 0.000157733154296875,
      "model_forward_time": 0.11529254913330078,
      "step": 25843
    },
    {
      "epoch": 0.000157733154296875,
      "step": 25843,
      "training_step_time": 0.44533395767211914
    },
    {
      "epoch": 0.0001577392578125,
      "model_forward_time": 0.11450529098510742,
      "step": 25844
    },
    {
      "epoch": 0.0001577392578125,
      "step": 25844,
      "training_step_time": 0.4001903533935547
    },
    {
      "epoch": 0.000157745361328125,
      "model_forward_time": 0.11477375030517578,
      "step": 25845
    },
    {
      "epoch": 0.000157745361328125,
      "step": 25845,
      "training_step_time": 0.3623063564300537
    },
    {
      "epoch": 0.00015775146484375,
      "model_forward_time": 0.11409258842468262,
      "step": 25846
    },
    {
      "epoch": 0.00015775146484375,
      "step": 25846,
      "training_step_time": 0.4304158687591553
    },
    {
      "epoch": 0.000157757568359375,
      "model_forward_time": 0.11519551277160645,
      "step": 25847
    },
    {
      "epoch": 0.000157757568359375,
      "step": 25847,
      "training_step_time": 0.3939974308013916
    },
    {
      "epoch": 0.000157763671875,
      "model_forward_time": 0.11487388610839844,
      "step": 25848
    },
    {
      "epoch": 0.000157763671875,
      "step": 25848,
      "training_step_time": 0.5475635528564453
    },
    {
      "epoch": 0.000157769775390625,
      "model_forward_time": 0.11559629440307617,
      "step": 25849
    },
    {
      "epoch": 0.000157769775390625,
      "step": 25849,
      "training_step_time": 0.40540218353271484
    },
    {
      "epoch": 0.00015777587890625,
      "grad_norm": 0.22894741594791412,
      "learning_rate": 6.531974594476425e-05,
      "loss": 0.0525,
      "step": 25850
    },
    {
      "epoch": 0.00015777587890625,
      "model_forward_time": 0.11511778831481934,
      "step": 25850
    },
    {
      "epoch": 0.00015777587890625,
      "step": 25850,
      "training_step_time": 0.39508485794067383
    },
    {
      "epoch": 0.000157781982421875,
      "model_forward_time": 0.11522960662841797,
      "step": 25851
    },
    {
      "epoch": 0.000157781982421875,
      "step": 25851,
      "training_step_time": 0.4018726348876953
    },
    {
      "epoch": 0.0001577880859375,
      "model_forward_time": 0.11490297317504883,
      "step": 25852
    },
    {
      "epoch": 0.0001577880859375,
      "step": 25852,
      "training_step_time": 0.3906278610229492
    },
    {
      "epoch": 0.000157794189453125,
      "model_forward_time": 0.11534404754638672,
      "step": 25853
    },
    {
      "epoch": 0.000157794189453125,
      "step": 25853,
      "training_step_time": 0.39035820960998535
    },
    {
      "epoch": 0.00015780029296875,
      "model_forward_time": 0.11532425880432129,
      "step": 25854
    },
    {
      "epoch": 0.00015780029296875,
      "step": 25854,
      "training_step_time": 0.7665023803710938
    },
    {
      "epoch": 0.000157806396484375,
      "model_forward_time": 0.11414551734924316,
      "step": 25855
    },
    {
      "epoch": 0.000157806396484375,
      "step": 25855,
      "training_step_time": 0.46060800552368164
    },
    {
      "epoch": 0.0001578125,
      "model_forward_time": 0.11427187919616699,
      "step": 25856
    },
    {
      "epoch": 0.0001578125,
      "step": 25856,
      "training_step_time": 0.4910848140716553
    },
    {
      "epoch": 0.000157818603515625,
      "model_forward_time": 0.1143643856048584,
      "step": 25857
    },
    {
      "epoch": 0.000157818603515625,
      "step": 25857,
      "training_step_time": 0.37818241119384766
    },
    {
      "epoch": 0.00015782470703125,
      "model_forward_time": 0.11486601829528809,
      "step": 25858
    },
    {
      "epoch": 0.00015782470703125,
      "step": 25858,
      "training_step_time": 0.3824746608734131
    },
    {
      "epoch": 0.000157830810546875,
      "model_forward_time": 0.1148231029510498,
      "step": 25859
    },
    {
      "epoch": 0.000157830810546875,
      "step": 25859,
      "training_step_time": 0.39615941047668457
    },
    {
      "epoch": 0.0001578369140625,
      "grad_norm": 0.13015906512737274,
      "learning_rate": 6.529351119689688e-05,
      "loss": 0.0498,
      "step": 25860
    },
    {
      "epoch": 0.0001578369140625,
      "model_forward_time": 0.11562323570251465,
      "step": 25860
    },
    {
      "epoch": 0.0001578369140625,
      "step": 25860,
      "training_step_time": 0.4718904495239258
    },
    {
      "epoch": 0.000157843017578125,
      "model_forward_time": 0.11456942558288574,
      "step": 25861
    },
    {
      "epoch": 0.000157843017578125,
      "step": 25861,
      "training_step_time": 0.4095158576965332
    },
    {
      "epoch": 0.00015784912109375,
      "model_forward_time": 0.1182701587677002,
      "step": 25862
    },
    {
      "epoch": 0.00015784912109375,
      "step": 25862,
      "training_step_time": 0.3791520595550537
    },
    {
      "epoch": 0.000157855224609375,
      "model_forward_time": 0.11812305450439453,
      "step": 25863
    },
    {
      "epoch": 0.000157855224609375,
      "step": 25863,
      "training_step_time": 0.3907022476196289
    },
    {
      "epoch": 0.000157861328125,
      "model_forward_time": 0.11774325370788574,
      "step": 25864
    },
    {
      "epoch": 0.000157861328125,
      "step": 25864,
      "training_step_time": 0.3796107769012451
    },
    {
      "epoch": 0.000157867431640625,
      "model_forward_time": 0.1155247688293457,
      "step": 25865
    },
    {
      "epoch": 0.000157867431640625,
      "step": 25865,
      "training_step_time": 0.37732911109924316
    },
    {
      "epoch": 0.00015787353515625,
      "model_forward_time": 0.11516737937927246,
      "step": 25866
    },
    {
      "epoch": 0.00015787353515625,
      "step": 25866,
      "training_step_time": 0.7202260494232178
    },
    {
      "epoch": 0.000157879638671875,
      "model_forward_time": 0.11507439613342285,
      "step": 25867
    },
    {
      "epoch": 0.000157879638671875,
      "step": 25867,
      "training_step_time": 0.40633463859558105
    },
    {
      "epoch": 0.0001578857421875,
      "model_forward_time": 0.1139225959777832,
      "step": 25868
    },
    {
      "epoch": 0.0001578857421875,
      "step": 25868,
      "training_step_time": 0.43753552436828613
    },
    {
      "epoch": 0.000157891845703125,
      "model_forward_time": 0.11458539962768555,
      "step": 25869
    },
    {
      "epoch": 0.000157891845703125,
      "step": 25869,
      "training_step_time": 0.40831971168518066
    },
    {
      "epoch": 0.00015789794921875,
      "grad_norm": 0.14440453052520752,
      "learning_rate": 6.52672718032646e-05,
      "loss": 0.0433,
      "step": 25870
    },
    {
      "epoch": 0.00015789794921875,
      "model_forward_time": 0.1143331527709961,
      "step": 25870
    },
    {
      "epoch": 0.00015789794921875,
      "step": 25870,
      "training_step_time": 0.45029497146606445
    },
    {
      "epoch": 0.000157904052734375,
      "model_forward_time": 0.11436629295349121,
      "step": 25871
    },
    {
      "epoch": 0.000157904052734375,
      "step": 25871,
      "training_step_time": 0.40145230293273926
    },
    {
      "epoch": 0.00015791015625,
      "model_forward_time": 0.11521077156066895,
      "step": 25872
    },
    {
      "epoch": 0.00015791015625,
      "step": 25872,
      "training_step_time": 0.5430436134338379
    },
    {
      "epoch": 0.000157916259765625,
      "model_forward_time": 0.11464166641235352,
      "step": 25873
    },
    {
      "epoch": 0.000157916259765625,
      "step": 25873,
      "training_step_time": 0.5160424709320068
    },
    {
      "epoch": 0.00015792236328125,
      "model_forward_time": 0.11524152755737305,
      "step": 25874
    },
    {
      "epoch": 0.00015792236328125,
      "step": 25874,
      "training_step_time": 0.4754977226257324
    },
    {
      "epoch": 0.000157928466796875,
      "model_forward_time": 0.11412954330444336,
      "step": 25875
    },
    {
      "epoch": 0.000157928466796875,
      "step": 25875,
      "training_step_time": 0.38057374954223633
    },
    {
      "epoch": 0.0001579345703125,
      "model_forward_time": 0.11532449722290039,
      "step": 25876
    },
    {
      "epoch": 0.0001579345703125,
      "step": 25876,
      "training_step_time": 0.3822181224822998
    },
    {
      "epoch": 0.000157940673828125,
      "model_forward_time": 0.1169576644897461,
      "step": 25877
    },
    {
      "epoch": 0.000157940673828125,
      "step": 25877,
      "training_step_time": 0.38347864151000977
    },
    {
      "epoch": 0.00015794677734375,
      "model_forward_time": 0.1152801513671875,
      "step": 25878
    },
    {
      "epoch": 0.00015794677734375,
      "step": 25878,
      "training_step_time": 0.44283556938171387
    },
    {
      "epoch": 0.000157952880859375,
      "model_forward_time": 0.1147005558013916,
      "step": 25879
    },
    {
      "epoch": 0.000157952880859375,
      "step": 25879,
      "training_step_time": 0.3959462642669678
    },
    {
      "epoch": 0.000157958984375,
      "grad_norm": 0.11929652094841003,
      "learning_rate": 6.524102777183825e-05,
      "loss": 0.0455,
      "step": 25880
    },
    {
      "epoch": 0.000157958984375,
      "model_forward_time": 0.11571764945983887,
      "step": 25880
    },
    {
      "epoch": 0.000157958984375,
      "step": 25880,
      "training_step_time": 0.4144248962402344
    },
    {
      "epoch": 0.000157965087890625,
      "model_forward_time": 0.11534380912780762,
      "step": 25881
    },
    {
      "epoch": 0.000157965087890625,
      "step": 25881,
      "training_step_time": 0.39369988441467285
    },
    {
      "epoch": 0.00015797119140625,
      "model_forward_time": 0.11536026000976562,
      "step": 25882
    },
    {
      "epoch": 0.00015797119140625,
      "step": 25882,
      "training_step_time": 0.5061616897583008
    },
    {
      "epoch": 0.000157977294921875,
      "model_forward_time": 0.11523914337158203,
      "step": 25883
    },
    {
      "epoch": 0.000157977294921875,
      "step": 25883,
      "training_step_time": 0.4454348087310791
    },
    {
      "epoch": 0.0001579833984375,
      "model_forward_time": 0.11525630950927734,
      "step": 25884
    },
    {
      "epoch": 0.0001579833984375,
      "step": 25884,
      "training_step_time": 0.5000030994415283
    },
    {
      "epoch": 0.000157989501953125,
      "model_forward_time": 0.11476659774780273,
      "step": 25885
    },
    {
      "epoch": 0.000157989501953125,
      "step": 25885,
      "training_step_time": 0.40168237686157227
    },
    {
      "epoch": 0.00015799560546875,
      "model_forward_time": 0.11498045921325684,
      "step": 25886
    },
    {
      "epoch": 0.00015799560546875,
      "step": 25886,
      "training_step_time": 0.39077281951904297
    },
    {
      "epoch": 0.000158001708984375,
      "model_forward_time": 0.11545109748840332,
      "step": 25887
    },
    {
      "epoch": 0.000158001708984375,
      "step": 25887,
      "training_step_time": 0.5111851692199707
    },
    {
      "epoch": 0.0001580078125,
      "model_forward_time": 0.1149587631225586,
      "step": 25888
    },
    {
      "epoch": 0.0001580078125,
      "step": 25888,
      "training_step_time": 0.4866037368774414
    },
    {
      "epoch": 0.000158013916015625,
      "model_forward_time": 0.11541223526000977,
      "step": 25889
    },
    {
      "epoch": 0.000158013916015625,
      "step": 25889,
      "training_step_time": 0.39830732345581055
    },
    {
      "epoch": 0.00015802001953125,
      "grad_norm": 0.14526890218257904,
      "learning_rate": 6.521477911059008e-05,
      "loss": 0.0472,
      "step": 25890
    },
    {
      "epoch": 0.00015802001953125,
      "model_forward_time": 0.11514568328857422,
      "step": 25890
    },
    {
      "epoch": 0.00015802001953125,
      "step": 25890,
      "training_step_time": 0.4755122661590576
    },
    {
      "epoch": 0.000158026123046875,
      "model_forward_time": 0.11507582664489746,
      "step": 25891
    },
    {
      "epoch": 0.000158026123046875,
      "step": 25891,
      "training_step_time": 0.39162111282348633
    },
    {
      "epoch": 0.0001580322265625,
      "model_forward_time": 0.1149749755859375,
      "step": 25892
    },
    {
      "epoch": 0.0001580322265625,
      "step": 25892,
      "training_step_time": 0.39461660385131836
    },
    {
      "epoch": 0.000158038330078125,
      "model_forward_time": 0.11486005783081055,
      "step": 25893
    },
    {
      "epoch": 0.000158038330078125,
      "step": 25893,
      "training_step_time": 0.405426025390625
    },
    {
      "epoch": 0.00015804443359375,
      "model_forward_time": 0.11552023887634277,
      "step": 25894
    },
    {
      "epoch": 0.00015804443359375,
      "step": 25894,
      "training_step_time": 0.38939332962036133
    },
    {
      "epoch": 0.000158050537109375,
      "model_forward_time": 0.11633777618408203,
      "step": 25895
    },
    {
      "epoch": 0.000158050537109375,
      "step": 25895,
      "training_step_time": 0.399158239364624
    },
    {
      "epoch": 0.000158056640625,
      "model_forward_time": 0.1152501106262207,
      "step": 25896
    },
    {
      "epoch": 0.000158056640625,
      "step": 25896,
      "training_step_time": 0.6980948448181152
    },
    {
      "epoch": 0.000158062744140625,
      "model_forward_time": 0.11509346961975098,
      "step": 25897
    },
    {
      "epoch": 0.000158062744140625,
      "step": 25897,
      "training_step_time": 0.41614389419555664
    },
    {
      "epoch": 0.00015806884765625,
      "model_forward_time": 0.11497735977172852,
      "step": 25898
    },
    {
      "epoch": 0.00015806884765625,
      "step": 25898,
      "training_step_time": 0.4197678565979004
    },
    {
      "epoch": 0.000158074951171875,
      "model_forward_time": 0.11524772644042969,
      "step": 25899
    },
    {
      "epoch": 0.000158074951171875,
      "step": 25899,
      "training_step_time": 0.383983850479126
    },
    {
      "epoch": 0.0001580810546875,
      "grad_norm": 0.15250353515148163,
      "learning_rate": 6.518852582749373e-05,
      "loss": 0.046,
      "step": 25900
    },
    {
      "epoch": 0.0001580810546875,
      "model_forward_time": 0.11446213722229004,
      "step": 25900
    },
    {
      "epoch": 0.0001580810546875,
      "step": 25900,
      "training_step_time": 0.3664379119873047
    },
    {
      "epoch": 0.000158087158203125,
      "model_forward_time": 0.11477279663085938,
      "step": 25901
    },
    {
      "epoch": 0.000158087158203125,
      "step": 25901,
      "training_step_time": 0.42570066452026367
    },
    {
      "epoch": 0.00015809326171875,
      "model_forward_time": 0.11483383178710938,
      "step": 25902
    },
    {
      "epoch": 0.00015809326171875,
      "step": 25902,
      "training_step_time": 0.5186276435852051
    },
    {
      "epoch": 0.000158099365234375,
      "model_forward_time": 0.11633419990539551,
      "step": 25903
    },
    {
      "epoch": 0.000158099365234375,
      "step": 25903,
      "training_step_time": 0.3839292526245117
    },
    {
      "epoch": 0.00015810546875,
      "model_forward_time": 0.11498594284057617,
      "step": 25904
    },
    {
      "epoch": 0.00015810546875,
      "step": 25904,
      "training_step_time": 0.39196038246154785
    },
    {
      "epoch": 0.000158111572265625,
      "model_forward_time": 0.11492681503295898,
      "step": 25905
    },
    {
      "epoch": 0.000158111572265625,
      "step": 25905,
      "training_step_time": 0.4076054096221924
    },
    {
      "epoch": 0.00015811767578125,
      "model_forward_time": 0.11481952667236328,
      "step": 25906
    },
    {
      "epoch": 0.00015811767578125,
      "step": 25906,
      "training_step_time": 0.387082576751709
    },
    {
      "epoch": 0.000158123779296875,
      "model_forward_time": 0.11541032791137695,
      "step": 25907
    },
    {
      "epoch": 0.000158123779296875,
      "step": 25907,
      "training_step_time": 0.4452533721923828
    },
    {
      "epoch": 0.0001581298828125,
      "model_forward_time": 0.11488580703735352,
      "step": 25908
    },
    {
      "epoch": 0.0001581298828125,
      "step": 25908,
      "training_step_time": 0.6445825099945068
    },
    {
      "epoch": 0.000158135986328125,
      "model_forward_time": 0.11446118354797363,
      "step": 25909
    },
    {
      "epoch": 0.000158135986328125,
      "step": 25909,
      "training_step_time": 0.3934199810028076
    },
    {
      "epoch": 0.00015814208984375,
      "grad_norm": 0.16478779911994934,
      "learning_rate": 6.516226793052428e-05,
      "loss": 0.0471,
      "step": 25910
    },
    {
      "epoch": 0.00015814208984375,
      "model_forward_time": 0.11581921577453613,
      "step": 25910
    },
    {
      "epoch": 0.00015814208984375,
      "step": 25910,
      "training_step_time": 0.4538247585296631
    },
    {
      "epoch": 0.000158148193359375,
      "model_forward_time": 0.11466073989868164,
      "step": 25911
    },
    {
      "epoch": 0.000158148193359375,
      "step": 25911,
      "training_step_time": 0.4344501495361328
    },
    {
      "epoch": 0.000158154296875,
      "model_forward_time": 0.11533832550048828,
      "step": 25912
    },
    {
      "epoch": 0.000158154296875,
      "step": 25912,
      "training_step_time": 0.4395263195037842
    },
    {
      "epoch": 0.000158160400390625,
      "model_forward_time": 0.11415719985961914,
      "step": 25913
    },
    {
      "epoch": 0.000158160400390625,
      "step": 25913,
      "training_step_time": 0.388657808303833
    },
    {
      "epoch": 0.00015816650390625,
      "model_forward_time": 0.1158440113067627,
      "step": 25914
    },
    {
      "epoch": 0.00015816650390625,
      "step": 25914,
      "training_step_time": 0.5553429126739502
    },
    {
      "epoch": 0.000158172607421875,
      "model_forward_time": 0.11463713645935059,
      "step": 25915
    },
    {
      "epoch": 0.000158172607421875,
      "step": 25915,
      "training_step_time": 0.3918569087982178
    },
    {
      "epoch": 0.0001581787109375,
      "model_forward_time": 0.11430621147155762,
      "step": 25916
    },
    {
      "epoch": 0.0001581787109375,
      "step": 25916,
      "training_step_time": 0.5027549266815186
    },
    {
      "epoch": 0.000158184814453125,
      "model_forward_time": 0.1141209602355957,
      "step": 25917
    },
    {
      "epoch": 0.000158184814453125,
      "step": 25917,
      "training_step_time": 0.4009585380554199
    },
    {
      "epoch": 0.00015819091796875,
      "model_forward_time": 0.11550760269165039,
      "step": 25918
    },
    {
      "epoch": 0.00015819091796875,
      "step": 25918,
      "training_step_time": 0.38249683380126953
    },
    {
      "epoch": 0.000158197021484375,
      "model_forward_time": 0.11478281021118164,
      "step": 25919
    },
    {
      "epoch": 0.000158197021484375,
      "step": 25919,
      "training_step_time": 0.39595603942871094
    },
    {
      "epoch": 0.000158203125,
      "grad_norm": 0.1356395184993744,
      "learning_rate": 6.513600542765817e-05,
      "loss": 0.0427,
      "step": 25920
    },
    {
      "epoch": 0.000158203125,
      "model_forward_time": 0.1156919002532959,
      "step": 25920
    },
    {
      "epoch": 0.000158203125,
      "step": 25920,
      "training_step_time": 0.6750483512878418
    },
    {
      "epoch": 0.000158209228515625,
      "model_forward_time": 0.11485576629638672,
      "step": 25921
    },
    {
      "epoch": 0.000158209228515625,
      "step": 25921,
      "training_step_time": 0.39653611183166504
    },
    {
      "epoch": 0.00015821533203125,
      "model_forward_time": 0.11389732360839844,
      "step": 25922
    },
    {
      "epoch": 0.00015821533203125,
      "step": 25922,
      "training_step_time": 0.3898489475250244
    },
    {
      "epoch": 0.000158221435546875,
      "model_forward_time": 0.11580801010131836,
      "step": 25923
    },
    {
      "epoch": 0.000158221435546875,
      "step": 25923,
      "training_step_time": 0.3947596549987793
    },
    {
      "epoch": 0.0001582275390625,
      "model_forward_time": 0.11510348320007324,
      "step": 25924
    },
    {
      "epoch": 0.0001582275390625,
      "step": 25924,
      "training_step_time": 0.44055771827697754
    },
    {
      "epoch": 0.000158233642578125,
      "model_forward_time": 0.11500883102416992,
      "step": 25925
    },
    {
      "epoch": 0.000158233642578125,
      "step": 25925,
      "training_step_time": 0.4474811553955078
    },
    {
      "epoch": 0.00015823974609375,
      "model_forward_time": 0.11525917053222656,
      "step": 25926
    },
    {
      "epoch": 0.00015823974609375,
      "step": 25926,
      "training_step_time": 0.5964908599853516
    },
    {
      "epoch": 0.000158245849609375,
      "model_forward_time": 0.11465811729431152,
      "step": 25927
    },
    {
      "epoch": 0.000158245849609375,
      "step": 25927,
      "training_step_time": 0.3908841609954834
    },
    {
      "epoch": 0.000158251953125,
      "model_forward_time": 0.11498069763183594,
      "step": 25928
    },
    {
      "epoch": 0.000158251953125,
      "step": 25928,
      "training_step_time": 0.4182467460632324
    },
    {
      "epoch": 0.000158258056640625,
      "model_forward_time": 0.11489295959472656,
      "step": 25929
    },
    {
      "epoch": 0.000158258056640625,
      "step": 25929,
      "training_step_time": 0.38701915740966797
    },
    {
      "epoch": 0.00015826416015625,
      "grad_norm": 0.14954566955566406,
      "learning_rate": 6.510973832687323e-05,
      "loss": 0.0424,
      "step": 25930
    },
    {
      "epoch": 0.00015826416015625,
      "model_forward_time": 0.1152350902557373,
      "step": 25930
    },
    {
      "epoch": 0.00015826416015625,
      "step": 25930,
      "training_step_time": 0.49050164222717285
    },
    {
      "epoch": 0.000158270263671875,
      "model_forward_time": 0.11504077911376953,
      "step": 25931
    },
    {
      "epoch": 0.000158270263671875,
      "step": 25931,
      "training_step_time": 0.38992762565612793
    },
    {
      "epoch": 0.0001582763671875,
      "model_forward_time": 0.11554574966430664,
      "step": 25932
    },
    {
      "epoch": 0.0001582763671875,
      "step": 25932,
      "training_step_time": 0.47547364234924316
    },
    {
      "epoch": 0.000158282470703125,
      "model_forward_time": 0.11659455299377441,
      "step": 25933
    },
    {
      "epoch": 0.000158282470703125,
      "step": 25933,
      "training_step_time": 0.4306938648223877
    },
    {
      "epoch": 0.00015828857421875,
      "model_forward_time": 0.115020751953125,
      "step": 25934
    },
    {
      "epoch": 0.00015828857421875,
      "step": 25934,
      "training_step_time": 0.37929296493530273
    },
    {
      "epoch": 0.000158294677734375,
      "model_forward_time": 0.11480545997619629,
      "step": 25935
    },
    {
      "epoch": 0.000158294677734375,
      "step": 25935,
      "training_step_time": 0.40055084228515625
    },
    {
      "epoch": 0.00015830078125,
      "model_forward_time": 0.1150209903717041,
      "step": 25936
    },
    {
      "epoch": 0.00015830078125,
      "step": 25936,
      "training_step_time": 0.39807820320129395
    },
    {
      "epoch": 0.000158306884765625,
      "model_forward_time": 0.1147916316986084,
      "step": 25937
    },
    {
      "epoch": 0.000158306884765625,
      "step": 25937,
      "training_step_time": 0.4088287353515625
    },
    {
      "epoch": 0.00015831298828125,
      "model_forward_time": 0.11585307121276855,
      "step": 25938
    },
    {
      "epoch": 0.00015831298828125,
      "step": 25938,
      "training_step_time": 0.6288399696350098
    },
    {
      "epoch": 0.000158319091796875,
      "model_forward_time": 0.11507105827331543,
      "step": 25939
    },
    {
      "epoch": 0.000158319091796875,
      "step": 25939,
      "training_step_time": 0.4639873504638672
    },
    {
      "epoch": 0.0001583251953125,
      "grad_norm": 0.10528039187192917,
      "learning_rate": 6.508346663614878e-05,
      "loss": 0.0464,
      "step": 25940
    },
    {
      "epoch": 0.0001583251953125,
      "model_forward_time": 0.11420202255249023,
      "step": 25940
    },
    {
      "epoch": 0.0001583251953125,
      "step": 25940,
      "training_step_time": 0.3941676616668701
    },
    {
      "epoch": 0.000158331298828125,
      "model_forward_time": 0.11787581443786621,
      "step": 25941
    },
    {
      "epoch": 0.000158331298828125,
      "step": 25941,
      "training_step_time": 0.3868100643157959
    },
    {
      "epoch": 0.00015833740234375,
      "model_forward_time": 0.1147165298461914,
      "step": 25942
    },
    {
      "epoch": 0.00015833740234375,
      "step": 25942,
      "training_step_time": 0.3652920722961426
    },
    {
      "epoch": 0.000158343505859375,
      "model_forward_time": 0.1148223876953125,
      "step": 25943
    },
    {
      "epoch": 0.000158343505859375,
      "step": 25943,
      "training_step_time": 0.3922758102416992
    },
    {
      "epoch": 0.000158349609375,
      "model_forward_time": 0.11611533164978027,
      "step": 25944
    },
    {
      "epoch": 0.000158349609375,
      "step": 25944,
      "training_step_time": 0.6408519744873047
    },
    {
      "epoch": 0.000158355712890625,
      "model_forward_time": 0.11503958702087402,
      "step": 25945
    },
    {
      "epoch": 0.000158355712890625,
      "step": 25945,
      "training_step_time": 0.4116237163543701
    },
    {
      "epoch": 0.00015836181640625,
      "model_forward_time": 0.11511611938476562,
      "step": 25946
    },
    {
      "epoch": 0.00015836181640625,
      "step": 25946,
      "training_step_time": 0.4007596969604492
    },
    {
      "epoch": 0.000158367919921875,
      "model_forward_time": 0.11447000503540039,
      "step": 25947
    },
    {
      "epoch": 0.000158367919921875,
      "step": 25947,
      "training_step_time": 0.41550564765930176
    },
    {
      "epoch": 0.0001583740234375,
      "model_forward_time": 0.1149287223815918,
      "step": 25948
    },
    {
      "epoch": 0.0001583740234375,
      "step": 25948,
      "training_step_time": 0.3897690773010254
    },
    {
      "epoch": 0.000158380126953125,
      "model_forward_time": 0.11481642723083496,
      "step": 25949
    },
    {
      "epoch": 0.000158380126953125,
      "step": 25949,
      "training_step_time": 0.39868903160095215
    },
    {
      "epoch": 0.00015838623046875,
      "grad_norm": 0.19193530082702637,
      "learning_rate": 6.505719036346539e-05,
      "loss": 0.0469,
      "step": 25950
    },
    {
      "epoch": 0.00015838623046875,
      "model_forward_time": 0.11496853828430176,
      "step": 25950
    },
    {
      "epoch": 0.00015838623046875,
      "step": 25950,
      "training_step_time": 0.6293823719024658
    },
    {
      "epoch": 0.000158392333984375,
      "model_forward_time": 0.11416029930114746,
      "step": 25951
    },
    {
      "epoch": 0.000158392333984375,
      "step": 25951,
      "training_step_time": 0.5083181858062744
    },
    {
      "epoch": 0.0001583984375,
      "model_forward_time": 0.11463403701782227,
      "step": 25952
    },
    {
      "epoch": 0.0001583984375,
      "step": 25952,
      "training_step_time": 0.40903162956237793
    },
    {
      "epoch": 0.000158404541015625,
      "model_forward_time": 0.11462712287902832,
      "step": 25953
    },
    {
      "epoch": 0.000158404541015625,
      "step": 25953,
      "training_step_time": 0.4874997138977051
    },
    {
      "epoch": 0.00015841064453125,
      "model_forward_time": 0.11405611038208008,
      "step": 25954
    },
    {
      "epoch": 0.00015841064453125,
      "step": 25954,
      "training_step_time": 0.3896970748901367
    },
    {
      "epoch": 0.000158416748046875,
      "model_forward_time": 0.11472535133361816,
      "step": 25955
    },
    {
      "epoch": 0.000158416748046875,
      "step": 25955,
      "training_step_time": 0.38637733459472656
    },
    {
      "epoch": 0.0001584228515625,
      "model_forward_time": 0.11535096168518066,
      "step": 25956
    },
    {
      "epoch": 0.0001584228515625,
      "step": 25956,
      "training_step_time": 0.4476583003997803
    },
    {
      "epoch": 0.000158428955078125,
      "model_forward_time": 0.11531782150268555,
      "step": 25957
    },
    {
      "epoch": 0.000158428955078125,
      "step": 25957,
      "training_step_time": 0.4319181442260742
    },
    {
      "epoch": 0.00015843505859375,
      "model_forward_time": 0.11479043960571289,
      "step": 25958
    },
    {
      "epoch": 0.00015843505859375,
      "step": 25958,
      "training_step_time": 0.45489048957824707
    },
    {
      "epoch": 0.000158441162109375,
      "model_forward_time": 0.11517143249511719,
      "step": 25959
    },
    {
      "epoch": 0.000158441162109375,
      "step": 25959,
      "training_step_time": 0.40688538551330566
    },
    {
      "epoch": 0.000158447265625,
      "grad_norm": 0.13055036962032318,
      "learning_rate": 6.503090951680512e-05,
      "loss": 0.0425,
      "step": 25960
    },
    {
      "epoch": 0.000158447265625,
      "model_forward_time": 0.11469507217407227,
      "step": 25960
    },
    {
      "epoch": 0.000158447265625,
      "step": 25960,
      "training_step_time": 0.3874397277832031
    },
    {
      "epoch": 0.000158453369140625,
      "model_forward_time": 0.11472773551940918,
      "step": 25961
    },
    {
      "epoch": 0.000158453369140625,
      "step": 25961,
      "training_step_time": 0.3980240821838379
    },
    {
      "epoch": 0.00015845947265625,
      "model_forward_time": 0.1152186393737793,
      "step": 25962
    },
    {
      "epoch": 0.00015845947265625,
      "step": 25962,
      "training_step_time": 0.6462826728820801
    },
    {
      "epoch": 0.000158465576171875,
      "model_forward_time": 0.11493659019470215,
      "step": 25963
    },
    {
      "epoch": 0.000158465576171875,
      "step": 25963,
      "training_step_time": 0.38912391662597656
    },
    {
      "epoch": 0.0001584716796875,
      "model_forward_time": 0.11488461494445801,
      "step": 25964
    },
    {
      "epoch": 0.0001584716796875,
      "step": 25964,
      "training_step_time": 0.38956546783447266
    },
    {
      "epoch": 0.000158477783203125,
      "model_forward_time": 0.11449623107910156,
      "step": 25965
    },
    {
      "epoch": 0.000158477783203125,
      "step": 25965,
      "training_step_time": 0.4459981918334961
    },
    {
      "epoch": 0.00015848388671875,
      "model_forward_time": 0.11541342735290527,
      "step": 25966
    },
    {
      "epoch": 0.00015848388671875,
      "step": 25966,
      "training_step_time": 0.4127068519592285
    },
    {
      "epoch": 0.000158489990234375,
      "model_forward_time": 0.11455893516540527,
      "step": 25967
    },
    {
      "epoch": 0.000158489990234375,
      "step": 25967,
      "training_step_time": 0.4819362163543701
    },
    {
      "epoch": 0.00015849609375,
      "model_forward_time": 0.1153867244720459,
      "step": 25968
    },
    {
      "epoch": 0.00015849609375,
      "step": 25968,
      "training_step_time": 0.5528872013092041
    },
    {
      "epoch": 0.000158502197265625,
      "model_forward_time": 0.11492538452148438,
      "step": 25969
    },
    {
      "epoch": 0.000158502197265625,
      "step": 25969,
      "training_step_time": 0.40844225883483887
    },
    {
      "epoch": 0.00015850830078125,
      "grad_norm": 0.17229755222797394,
      "learning_rate": 6.500462410415142e-05,
      "loss": 0.0483,
      "step": 25970
    },
    {
      "epoch": 0.00015850830078125,
      "model_forward_time": 0.11474227905273438,
      "step": 25970
    },
    {
      "epoch": 0.00015850830078125,
      "step": 25970,
      "training_step_time": 0.3989596366882324
    },
    {
      "epoch": 0.000158514404296875,
      "model_forward_time": 0.11552786827087402,
      "step": 25971
    },
    {
      "epoch": 0.000158514404296875,
      "step": 25971,
      "training_step_time": 0.4128110408782959
    },
    {
      "epoch": 0.0001585205078125,
      "model_forward_time": 0.11777520179748535,
      "step": 25972
    },
    {
      "epoch": 0.0001585205078125,
      "step": 25972,
      "training_step_time": 0.4709765911102295
    },
    {
      "epoch": 0.000158526611328125,
      "model_forward_time": 0.1147298812866211,
      "step": 25973
    },
    {
      "epoch": 0.000158526611328125,
      "step": 25973,
      "training_step_time": 0.39229393005371094
    },
    {
      "epoch": 0.00015853271484375,
      "model_forward_time": 0.11516165733337402,
      "step": 25974
    },
    {
      "epoch": 0.00015853271484375,
      "step": 25974,
      "training_step_time": 0.708730936050415
    },
    {
      "epoch": 0.000158538818359375,
      "model_forward_time": 0.11528372764587402,
      "step": 25975
    },
    {
      "epoch": 0.000158538818359375,
      "step": 25975,
      "training_step_time": 0.38040590286254883
    },
    {
      "epoch": 0.000158544921875,
      "model_forward_time": 0.11475729942321777,
      "step": 25976
    },
    {
      "epoch": 0.000158544921875,
      "step": 25976,
      "training_step_time": 0.38395237922668457
    },
    {
      "epoch": 0.000158551025390625,
      "model_forward_time": 0.1148838996887207,
      "step": 25977
    },
    {
      "epoch": 0.000158551025390625,
      "step": 25977,
      "training_step_time": 0.3877377510070801
    },
    {
      "epoch": 0.00015855712890625,
      "model_forward_time": 0.1154165267944336,
      "step": 25978
    },
    {
      "epoch": 0.00015855712890625,
      "step": 25978,
      "training_step_time": 0.3939173221588135
    },
    {
      "epoch": 0.000158563232421875,
      "model_forward_time": 0.11467123031616211,
      "step": 25979
    },
    {
      "epoch": 0.000158563232421875,
      "step": 25979,
      "training_step_time": 0.4609065055847168
    },
    {
      "epoch": 0.0001585693359375,
      "grad_norm": 0.14469687640666962,
      "learning_rate": 6.497833413348909e-05,
      "loss": 0.04,
      "step": 25980
    },
    {
      "epoch": 0.0001585693359375,
      "model_forward_time": 0.1157989501953125,
      "step": 25980
    },
    {
      "epoch": 0.0001585693359375,
      "step": 25980,
      "training_step_time": 0.6026606559753418
    },
    {
      "epoch": 0.000158575439453125,
      "model_forward_time": 0.11510062217712402,
      "step": 25981
    },
    {
      "epoch": 0.000158575439453125,
      "step": 25981,
      "training_step_time": 0.492185115814209
    },
    {
      "epoch": 0.00015858154296875,
      "model_forward_time": 0.11481904983520508,
      "step": 25982
    },
    {
      "epoch": 0.00015858154296875,
      "step": 25982,
      "training_step_time": 0.3958711624145508
    },
    {
      "epoch": 0.000158587646484375,
      "model_forward_time": 0.11461305618286133,
      "step": 25983
    },
    {
      "epoch": 0.000158587646484375,
      "step": 25983,
      "training_step_time": 0.41699957847595215
    },
    {
      "epoch": 0.00015859375,
      "model_forward_time": 0.11448502540588379,
      "step": 25984
    },
    {
      "epoch": 0.00015859375,
      "step": 25984,
      "training_step_time": 0.49365997314453125
    },
    {
      "epoch": 0.000158599853515625,
      "model_forward_time": 0.11525464057922363,
      "step": 25985
    },
    {
      "epoch": 0.000158599853515625,
      "step": 25985,
      "training_step_time": 0.4783153533935547
    },
    {
      "epoch": 0.00015860595703125,
      "model_forward_time": 0.11488795280456543,
      "step": 25986
    },
    {
      "epoch": 0.00015860595703125,
      "step": 25986,
      "training_step_time": 0.3850996494293213
    },
    {
      "epoch": 0.000158612060546875,
      "model_forward_time": 0.11545801162719727,
      "step": 25987
    },
    {
      "epoch": 0.000158612060546875,
      "step": 25987,
      "training_step_time": 0.38805699348449707
    },
    {
      "epoch": 0.0001586181640625,
      "model_forward_time": 0.11492705345153809,
      "step": 25988
    },
    {
      "epoch": 0.0001586181640625,
      "step": 25988,
      "training_step_time": 0.3960409164428711
    },
    {
      "epoch": 0.000158624267578125,
      "model_forward_time": 0.11505270004272461,
      "step": 25989
    },
    {
      "epoch": 0.000158624267578125,
      "step": 25989,
      "training_step_time": 0.39161157608032227
    },
    {
      "epoch": 0.00015863037109375,
      "grad_norm": 0.12972065806388855,
      "learning_rate": 6.495203961280434e-05,
      "loss": 0.047,
      "step": 25990
    },
    {
      "epoch": 0.00015863037109375,
      "model_forward_time": 0.11568307876586914,
      "step": 25990
    },
    {
      "epoch": 0.00015863037109375,
      "step": 25990,
      "training_step_time": 0.38941192626953125
    },
    {
      "epoch": 0.000158636474609375,
      "model_forward_time": 0.1152200698852539,
      "step": 25991
    },
    {
      "epoch": 0.000158636474609375,
      "step": 25991,
      "training_step_time": 0.389751672744751
    },
    {
      "epoch": 0.000158642578125,
      "model_forward_time": 0.1156303882598877,
      "step": 25992
    },
    {
      "epoch": 0.000158642578125,
      "step": 25992,
      "training_step_time": 0.4763369560241699
    },
    {
      "epoch": 0.000158648681640625,
      "model_forward_time": 0.11497068405151367,
      "step": 25993
    },
    {
      "epoch": 0.000158648681640625,
      "step": 25993,
      "training_step_time": 0.4535949230194092
    },
    {
      "epoch": 0.00015865478515625,
      "model_forward_time": 0.11510276794433594,
      "step": 25994
    },
    {
      "epoch": 0.00015865478515625,
      "step": 25994,
      "training_step_time": 0.43700432777404785
    },
    {
      "epoch": 0.000158660888671875,
      "model_forward_time": 0.11516904830932617,
      "step": 25995
    },
    {
      "epoch": 0.000158660888671875,
      "step": 25995,
      "training_step_time": 0.4038839340209961
    },
    {
      "epoch": 0.0001586669921875,
      "model_forward_time": 0.11520099639892578,
      "step": 25996
    },
    {
      "epoch": 0.0001586669921875,
      "step": 25996,
      "training_step_time": 0.39007139205932617
    },
    {
      "epoch": 0.000158673095703125,
      "model_forward_time": 0.11574912071228027,
      "step": 25997
    },
    {
      "epoch": 0.000158673095703125,
      "step": 25997,
      "training_step_time": 0.3992915153503418
    },
    {
      "epoch": 0.00015867919921875,
      "model_forward_time": 0.11572480201721191,
      "step": 25998
    },
    {
      "epoch": 0.00015867919921875,
      "step": 25998,
      "training_step_time": 0.5197985172271729
    },
    {
      "epoch": 0.000158685302734375,
      "model_forward_time": 0.1156318187713623,
      "step": 25999
    },
    {
      "epoch": 0.000158685302734375,
      "step": 25999,
      "training_step_time": 0.4926021099090576
    },
    {
      "epoch": 0.00015869140625,
      "grad_norm": 0.18160399794578552,
      "learning_rate": 6.492574055008473e-05,
      "loss": 0.0473,
      "step": 26000
    },
    {
      "epoch": 0.00015869140625,
      "model_forward_time": 0.11349797248840332,
      "step": 26000
    },
    {
      "epoch": 0.00015869140625,
      "step": 26000,
      "training_step_time": 0.35703039169311523
    },
    {
      "epoch": 0.000158697509765625,
      "model_forward_time": 0.11343717575073242,
      "step": 26001
    },
    {
      "epoch": 0.000158697509765625,
      "step": 26001,
      "training_step_time": 0.38707399368286133
    },
    {
      "epoch": 0.00015870361328125,
      "model_forward_time": 0.1137690544128418,
      "step": 26002
    },
    {
      "epoch": 0.00015870361328125,
      "step": 26002,
      "training_step_time": 0.46547842025756836
    },
    {
      "epoch": 0.000158709716796875,
      "model_forward_time": 0.11406779289245605,
      "step": 26003
    },
    {
      "epoch": 0.000158709716796875,
      "step": 26003,
      "training_step_time": 0.37607479095458984
    },
    {
      "epoch": 0.0001587158203125,
      "model_forward_time": 0.11437010765075684,
      "step": 26004
    },
    {
      "epoch": 0.0001587158203125,
      "step": 26004,
      "training_step_time": 0.37788987159729004
    },
    {
      "epoch": 0.000158721923828125,
      "model_forward_time": 0.1141822338104248,
      "step": 26005
    },
    {
      "epoch": 0.000158721923828125,
      "step": 26005,
      "training_step_time": 0.39240217208862305
    },
    {
      "epoch": 0.00015872802734375,
      "model_forward_time": 0.11570048332214355,
      "step": 26006
    },
    {
      "epoch": 0.00015872802734375,
      "step": 26006,
      "training_step_time": 0.3851590156555176
    },
    {
      "epoch": 0.000158734130859375,
      "model_forward_time": 0.11510753631591797,
      "step": 26007
    },
    {
      "epoch": 0.000158734130859375,
      "step": 26007,
      "training_step_time": 0.38698363304138184
    },
    {
      "epoch": 0.000158740234375,
      "model_forward_time": 0.11541581153869629,
      "step": 26008
    },
    {
      "epoch": 0.000158740234375,
      "step": 26008,
      "training_step_time": 0.38414788246154785
    },
    {
      "epoch": 0.000158746337890625,
      "model_forward_time": 0.11546158790588379,
      "step": 26009
    },
    {
      "epoch": 0.000158746337890625,
      "step": 26009,
      "training_step_time": 0.4965019226074219
    },
    {
      "epoch": 0.00015875244140625,
      "grad_norm": 0.13486552238464355,
      "learning_rate": 6.489943695331923e-05,
      "loss": 0.044,
      "step": 26010
    },
    {
      "epoch": 0.00015875244140625,
      "model_forward_time": 0.11612129211425781,
      "step": 26010
    },
    {
      "epoch": 0.00015875244140625,
      "step": 26010,
      "training_step_time": 0.4803740978240967
    },
    {
      "epoch": 0.000158758544921875,
      "model_forward_time": 0.11495685577392578,
      "step": 26011
    },
    {
      "epoch": 0.000158758544921875,
      "step": 26011,
      "training_step_time": 0.4922916889190674
    },
    {
      "epoch": 0.0001587646484375,
      "model_forward_time": 0.114959716796875,
      "step": 26012
    },
    {
      "epoch": 0.0001587646484375,
      "step": 26012,
      "training_step_time": 0.4084963798522949
    },
    {
      "epoch": 0.000158770751953125,
      "model_forward_time": 0.11545205116271973,
      "step": 26013
    },
    {
      "epoch": 0.000158770751953125,
      "step": 26013,
      "training_step_time": 0.3960742950439453
    },
    {
      "epoch": 0.00015877685546875,
      "model_forward_time": 0.11478638648986816,
      "step": 26014
    },
    {
      "epoch": 0.00015877685546875,
      "step": 26014,
      "training_step_time": 0.38056206703186035
    },
    {
      "epoch": 0.000158782958984375,
      "model_forward_time": 0.1155850887298584,
      "step": 26015
    },
    {
      "epoch": 0.000158782958984375,
      "step": 26015,
      "training_step_time": 0.466322660446167
    },
    {
      "epoch": 0.0001587890625,
      "model_forward_time": 0.11535525321960449,
      "step": 26016
    },
    {
      "epoch": 0.0001587890625,
      "step": 26016,
      "training_step_time": 0.41994333267211914
    },
    {
      "epoch": 0.000158795166015625,
      "model_forward_time": 0.11570477485656738,
      "step": 26017
    },
    {
      "epoch": 0.000158795166015625,
      "step": 26017,
      "training_step_time": 0.414048433303833
    },
    {
      "epoch": 0.00015880126953125,
      "model_forward_time": 0.11559152603149414,
      "step": 26018
    },
    {
      "epoch": 0.00015880126953125,
      "step": 26018,
      "training_step_time": 0.3993399143218994
    },
    {
      "epoch": 0.000158807373046875,
      "model_forward_time": 0.11483883857727051,
      "step": 26019
    },
    {
      "epoch": 0.000158807373046875,
      "step": 26019,
      "training_step_time": 0.4008965492248535
    },
    {
      "epoch": 0.0001588134765625,
      "grad_norm": 0.20666185021400452,
      "learning_rate": 6.487312883049819e-05,
      "loss": 0.0478,
      "step": 26020
    },
    {
      "epoch": 0.0001588134765625,
      "model_forward_time": 0.11475610733032227,
      "step": 26020
    },
    {
      "epoch": 0.0001588134765625,
      "step": 26020,
      "training_step_time": 0.38314008712768555
    },
    {
      "epoch": 0.000158819580078125,
      "model_forward_time": 0.1154327392578125,
      "step": 26021
    },
    {
      "epoch": 0.000158819580078125,
      "step": 26021,
      "training_step_time": 0.4086291790008545
    },
    {
      "epoch": 0.00015882568359375,
      "model_forward_time": 0.11543440818786621,
      "step": 26022
    },
    {
      "epoch": 0.00015882568359375,
      "step": 26022,
      "training_step_time": 0.4101722240447998
    },
    {
      "epoch": 0.000158831787109375,
      "model_forward_time": 0.11544275283813477,
      "step": 26023
    },
    {
      "epoch": 0.000158831787109375,
      "step": 26023,
      "training_step_time": 0.40164875984191895
    },
    {
      "epoch": 0.000158837890625,
      "model_forward_time": 0.11517691612243652,
      "step": 26024
    },
    {
      "epoch": 0.000158837890625,
      "step": 26024,
      "training_step_time": 0.4872734546661377
    },
    {
      "epoch": 0.000158843994140625,
      "model_forward_time": 0.11542439460754395,
      "step": 26025
    },
    {
      "epoch": 0.000158843994140625,
      "step": 26025,
      "training_step_time": 0.42505908012390137
    },
    {
      "epoch": 0.00015885009765625,
      "model_forward_time": 0.11522603034973145,
      "step": 26026
    },
    {
      "epoch": 0.00015885009765625,
      "step": 26026,
      "training_step_time": 0.4849886894226074
    },
    {
      "epoch": 0.000158856201171875,
      "model_forward_time": 0.11492633819580078,
      "step": 26027
    },
    {
      "epoch": 0.000158856201171875,
      "step": 26027,
      "training_step_time": 0.42127418518066406
    },
    {
      "epoch": 0.0001588623046875,
      "model_forward_time": 0.1149895191192627,
      "step": 26028
    },
    {
      "epoch": 0.0001588623046875,
      "step": 26028,
      "training_step_time": 0.3959176540374756
    },
    {
      "epoch": 0.000158868408203125,
      "model_forward_time": 0.11514472961425781,
      "step": 26029
    },
    {
      "epoch": 0.000158868408203125,
      "step": 26029,
      "training_step_time": 0.3929107189178467
    },
    {
      "epoch": 0.00015887451171875,
      "grad_norm": 0.1814102828502655,
      "learning_rate": 6.484681618961331e-05,
      "loss": 0.0466,
      "step": 26030
    },
    {
      "epoch": 0.00015887451171875,
      "model_forward_time": 0.11551761627197266,
      "step": 26030
    },
    {
      "epoch": 0.00015887451171875,
      "step": 26030,
      "training_step_time": 0.3746058940887451
    },
    {
      "epoch": 0.000158880615234375,
      "model_forward_time": 0.11493921279907227,
      "step": 26031
    },
    {
      "epoch": 0.000158880615234375,
      "step": 26031,
      "training_step_time": 0.44411468505859375
    },
    {
      "epoch": 0.00015888671875,
      "model_forward_time": 0.11498761177062988,
      "step": 26032
    },
    {
      "epoch": 0.00015888671875,
      "step": 26032,
      "training_step_time": 0.41179919242858887
    },
    {
      "epoch": 0.000158892822265625,
      "model_forward_time": 0.11578512191772461,
      "step": 26033
    },
    {
      "epoch": 0.000158892822265625,
      "step": 26033,
      "training_step_time": 0.40842270851135254
    },
    {
      "epoch": 0.00015889892578125,
      "model_forward_time": 0.11521482467651367,
      "step": 26034
    },
    {
      "epoch": 0.00015889892578125,
      "step": 26034,
      "training_step_time": 0.39771318435668945
    },
    {
      "epoch": 0.000158905029296875,
      "model_forward_time": 0.11502742767333984,
      "step": 26035
    },
    {
      "epoch": 0.000158905029296875,
      "step": 26035,
      "training_step_time": 0.4009833335876465
    },
    {
      "epoch": 0.0001589111328125,
      "model_forward_time": 0.11515974998474121,
      "step": 26036
    },
    {
      "epoch": 0.0001589111328125,
      "step": 26036,
      "training_step_time": 0.40817689895629883
    },
    {
      "epoch": 0.000158917236328125,
      "model_forward_time": 0.11510419845581055,
      "step": 26037
    },
    {
      "epoch": 0.000158917236328125,
      "step": 26037,
      "training_step_time": 0.39005327224731445
    },
    {
      "epoch": 0.00015892333984375,
      "model_forward_time": 0.11576008796691895,
      "step": 26038
    },
    {
      "epoch": 0.00015892333984375,
      "step": 26038,
      "training_step_time": 0.4119274616241455
    },
    {
      "epoch": 0.000158929443359375,
      "model_forward_time": 0.11508059501647949,
      "step": 26039
    },
    {
      "epoch": 0.000158929443359375,
      "step": 26039,
      "training_step_time": 0.4186592102050781
    },
    {
      "epoch": 0.000158935546875,
      "grad_norm": 0.14114417135715485,
      "learning_rate": 6.48204990386577e-05,
      "loss": 0.0433,
      "step": 26040
    },
    {
      "epoch": 0.000158935546875,
      "model_forward_time": 0.11466073989868164,
      "step": 26040
    },
    {
      "epoch": 0.000158935546875,
      "step": 26040,
      "training_step_time": 0.41723155975341797
    },
    {
      "epoch": 0.000158941650390625,
      "model_forward_time": 0.11529827117919922,
      "step": 26041
    },
    {
      "epoch": 0.000158941650390625,
      "step": 26041,
      "training_step_time": 0.43073272705078125
    },
    {
      "epoch": 0.00015894775390625,
      "model_forward_time": 0.11489748954772949,
      "step": 26042
    },
    {
      "epoch": 0.00015894775390625,
      "step": 26042,
      "training_step_time": 0.40371251106262207
    },
    {
      "epoch": 0.000158953857421875,
      "model_forward_time": 0.11514830589294434,
      "step": 26043
    },
    {
      "epoch": 0.000158953857421875,
      "step": 26043,
      "training_step_time": 0.38822460174560547
    },
    {
      "epoch": 0.0001589599609375,
      "model_forward_time": 0.11514544486999512,
      "step": 26044
    },
    {
      "epoch": 0.0001589599609375,
      "step": 26044,
      "training_step_time": 0.3881356716156006
    },
    {
      "epoch": 0.000158966064453125,
      "model_forward_time": 0.11551523208618164,
      "step": 26045
    },
    {
      "epoch": 0.000158966064453125,
      "step": 26045,
      "training_step_time": 0.4587714672088623
    },
    {
      "epoch": 0.00015897216796875,
      "model_forward_time": 0.11579489707946777,
      "step": 26046
    },
    {
      "epoch": 0.00015897216796875,
      "step": 26046,
      "training_step_time": 0.5068669319152832
    },
    {
      "epoch": 0.000158978271484375,
      "model_forward_time": 0.11514520645141602,
      "step": 26047
    },
    {
      "epoch": 0.000158978271484375,
      "step": 26047,
      "training_step_time": 0.42351675033569336
    },
    {
      "epoch": 0.000158984375,
      "model_forward_time": 0.11507630348205566,
      "step": 26048
    },
    {
      "epoch": 0.000158984375,
      "step": 26048,
      "training_step_time": 0.3859138488769531
    },
    {
      "epoch": 0.000158990478515625,
      "model_forward_time": 0.1149911880493164,
      "step": 26049
    },
    {
      "epoch": 0.000158990478515625,
      "step": 26049,
      "training_step_time": 0.37810635566711426
    },
    {
      "epoch": 0.00015899658203125,
      "grad_norm": 0.16765446960926056,
      "learning_rate": 6.479417738562576e-05,
      "loss": 0.0402,
      "step": 26050
    },
    {
      "epoch": 0.00015899658203125,
      "model_forward_time": 0.11555218696594238,
      "step": 26050
    },
    {
      "epoch": 0.00015899658203125,
      "step": 26050,
      "training_step_time": 0.3842966556549072
    },
    {
      "epoch": 0.000159002685546875,
      "model_forward_time": 0.11543869972229004,
      "step": 26051
    },
    {
      "epoch": 0.000159002685546875,
      "step": 26051,
      "training_step_time": 0.39452433586120605
    },
    {
      "epoch": 0.0001590087890625,
      "model_forward_time": 0.11548066139221191,
      "step": 26052
    },
    {
      "epoch": 0.0001590087890625,
      "step": 26052,
      "training_step_time": 0.4049971103668213
    },
    {
      "epoch": 0.000159014892578125,
      "model_forward_time": 0.11556315422058105,
      "step": 26053
    },
    {
      "epoch": 0.000159014892578125,
      "step": 26053,
      "training_step_time": 0.4185194969177246
    },
    {
      "epoch": 0.00015902099609375,
      "model_forward_time": 0.11542057991027832,
      "step": 26054
    },
    {
      "epoch": 0.00015902099609375,
      "step": 26054,
      "training_step_time": 0.46901607513427734
    },
    {
      "epoch": 0.000159027099609375,
      "model_forward_time": 0.11494302749633789,
      "step": 26055
    },
    {
      "epoch": 0.000159027099609375,
      "step": 26055,
      "training_step_time": 0.49236440658569336
    },
    {
      "epoch": 0.000159033203125,
      "model_forward_time": 0.11581993103027344,
      "step": 26056
    },
    {
      "epoch": 0.000159033203125,
      "step": 26056,
      "training_step_time": 0.39040613174438477
    },
    {
      "epoch": 0.000159039306640625,
      "model_forward_time": 0.11502838134765625,
      "step": 26057
    },
    {
      "epoch": 0.000159039306640625,
      "step": 26057,
      "training_step_time": 0.3934509754180908
    },
    {
      "epoch": 0.00015904541015625,
      "model_forward_time": 0.11556863784790039,
      "step": 26058
    },
    {
      "epoch": 0.00015904541015625,
      "step": 26058,
      "training_step_time": 0.3970029354095459
    },
    {
      "epoch": 0.000159051513671875,
      "model_forward_time": 0.11554503440856934,
      "step": 26059
    },
    {
      "epoch": 0.000159051513671875,
      "step": 26059,
      "training_step_time": 0.38942670822143555
    },
    {
      "epoch": 0.0001590576171875,
      "grad_norm": 0.1859961450099945,
      "learning_rate": 6.476785123851336e-05,
      "loss": 0.0449,
      "step": 26060
    },
    {
      "epoch": 0.0001590576171875,
      "model_forward_time": 0.1148686408996582,
      "step": 26060
    },
    {
      "epoch": 0.0001590576171875,
      "step": 26060,
      "training_step_time": 0.3946528434753418
    },
    {
      "epoch": 0.000159063720703125,
      "model_forward_time": 0.11531710624694824,
      "step": 26061
    },
    {
      "epoch": 0.000159063720703125,
      "step": 26061,
      "training_step_time": 0.47025394439697266
    },
    {
      "epoch": 0.00015906982421875,
      "model_forward_time": 0.11496305465698242,
      "step": 26062
    },
    {
      "epoch": 0.00015906982421875,
      "step": 26062,
      "training_step_time": 0.4115731716156006
    },
    {
      "epoch": 0.000159075927734375,
      "model_forward_time": 0.11472916603088379,
      "step": 26063
    },
    {
      "epoch": 0.000159075927734375,
      "step": 26063,
      "training_step_time": 0.4060640335083008
    },
    {
      "epoch": 0.00015908203125,
      "model_forward_time": 0.11509227752685547,
      "step": 26064
    },
    {
      "epoch": 0.00015908203125,
      "step": 26064,
      "training_step_time": 0.3893415927886963
    },
    {
      "epoch": 0.000159088134765625,
      "model_forward_time": 0.11629343032836914,
      "step": 26065
    },
    {
      "epoch": 0.000159088134765625,
      "step": 26065,
      "training_step_time": 0.40915942192077637
    },
    {
      "epoch": 0.00015909423828125,
      "model_forward_time": 0.11538934707641602,
      "step": 26066
    },
    {
      "epoch": 0.00015909423828125,
      "step": 26066,
      "training_step_time": 0.3831489086151123
    },
    {
      "epoch": 0.000159100341796875,
      "model_forward_time": 0.11490201950073242,
      "step": 26067
    },
    {
      "epoch": 0.000159100341796875,
      "step": 26067,
      "training_step_time": 0.4381864070892334
    },
    {
      "epoch": 0.0001591064453125,
      "model_forward_time": 0.11638784408569336,
      "step": 26068
    },
    {
      "epoch": 0.0001591064453125,
      "step": 26068,
      "training_step_time": 0.4807438850402832
    },
    {
      "epoch": 0.000159112548828125,
      "model_forward_time": 0.11514997482299805,
      "step": 26069
    },
    {
      "epoch": 0.000159112548828125,
      "step": 26069,
      "training_step_time": 0.4141385555267334
    },
    {
      "epoch": 0.00015911865234375,
      "grad_norm": 0.12778054177761078,
      "learning_rate": 6.474152060531768e-05,
      "loss": 0.0393,
      "step": 26070
    },
    {
      "epoch": 0.00015911865234375,
      "model_forward_time": 0.11464786529541016,
      "step": 26070
    },
    {
      "epoch": 0.00015911865234375,
      "step": 26070,
      "training_step_time": 0.4681065082550049
    },
    {
      "epoch": 0.000159124755859375,
      "model_forward_time": 0.11559677124023438,
      "step": 26071
    },
    {
      "epoch": 0.000159124755859375,
      "step": 26071,
      "training_step_time": 0.38803887367248535
    },
    {
      "epoch": 0.000159130859375,
      "model_forward_time": 0.11525130271911621,
      "step": 26072
    },
    {
      "epoch": 0.000159130859375,
      "step": 26072,
      "training_step_time": 0.381176233291626
    },
    {
      "epoch": 0.000159136962890625,
      "model_forward_time": 0.11542391777038574,
      "step": 26073
    },
    {
      "epoch": 0.000159136962890625,
      "step": 26073,
      "training_step_time": 0.38436412811279297
    },
    {
      "epoch": 0.00015914306640625,
      "model_forward_time": 0.11499381065368652,
      "step": 26074
    },
    {
      "epoch": 0.00015914306640625,
      "step": 26074,
      "training_step_time": 0.39962315559387207
    },
    {
      "epoch": 0.000159149169921875,
      "model_forward_time": 0.11562418937683105,
      "step": 26075
    },
    {
      "epoch": 0.000159149169921875,
      "step": 26075,
      "training_step_time": 0.40260791778564453
    },
    {
      "epoch": 0.0001591552734375,
      "model_forward_time": 0.11560630798339844,
      "step": 26076
    },
    {
      "epoch": 0.0001591552734375,
      "step": 26076,
      "training_step_time": 0.41216516494750977
    },
    {
      "epoch": 0.000159161376953125,
      "model_forward_time": 0.11555767059326172,
      "step": 26077
    },
    {
      "epoch": 0.000159161376953125,
      "step": 26077,
      "training_step_time": 0.46596407890319824
    },
    {
      "epoch": 0.00015916748046875,
      "model_forward_time": 0.11592483520507812,
      "step": 26078
    },
    {
      "epoch": 0.00015916748046875,
      "step": 26078,
      "training_step_time": 0.39105939865112305
    },
    {
      "epoch": 0.000159173583984375,
      "model_forward_time": 0.11565613746643066,
      "step": 26079
    },
    {
      "epoch": 0.000159173583984375,
      "step": 26079,
      "training_step_time": 0.3877577781677246
    },
    {
      "epoch": 0.0001591796875,
      "grad_norm": 0.09489580243825912,
      "learning_rate": 6.471518549403726e-05,
      "loss": 0.0452,
      "step": 26080
    },
    {
      "epoch": 0.0001591796875,
      "model_forward_time": 0.11486077308654785,
      "step": 26080
    },
    {
      "epoch": 0.0001591796875,
      "step": 26080,
      "training_step_time": 0.391355037689209
    },
    {
      "epoch": 0.000159185791015625,
      "model_forward_time": 0.11539030075073242,
      "step": 26081
    },
    {
      "epoch": 0.000159185791015625,
      "step": 26081,
      "training_step_time": 0.3907966613769531
    },
    {
      "epoch": 0.00015919189453125,
      "model_forward_time": 0.11510062217712402,
      "step": 26082
    },
    {
      "epoch": 0.00015919189453125,
      "step": 26082,
      "training_step_time": 0.41399455070495605
    },
    {
      "epoch": 0.000159197998046875,
      "model_forward_time": 0.11501550674438477,
      "step": 26083
    },
    {
      "epoch": 0.000159197998046875,
      "step": 26083,
      "training_step_time": 0.4353976249694824
    },
    {
      "epoch": 0.0001592041015625,
      "model_forward_time": 0.11472296714782715,
      "step": 26084
    },
    {
      "epoch": 0.0001592041015625,
      "step": 26084,
      "training_step_time": 0.4233360290527344
    },
    {
      "epoch": 0.000159210205078125,
      "model_forward_time": 0.1147468090057373,
      "step": 26085
    },
    {
      "epoch": 0.000159210205078125,
      "step": 26085,
      "training_step_time": 0.4825172424316406
    },
    {
      "epoch": 0.00015921630859375,
      "model_forward_time": 0.11547350883483887,
      "step": 26086
    },
    {
      "epoch": 0.00015921630859375,
      "step": 26086,
      "training_step_time": 0.38576650619506836
    },
    {
      "epoch": 0.000159222412109375,
      "model_forward_time": 0.11502695083618164,
      "step": 26087
    },
    {
      "epoch": 0.000159222412109375,
      "step": 26087,
      "training_step_time": 0.3885977268218994
    },
    {
      "epoch": 0.000159228515625,
      "model_forward_time": 0.11462616920471191,
      "step": 26088
    },
    {
      "epoch": 0.000159228515625,
      "step": 26088,
      "training_step_time": 0.6765854358673096
    },
    {
      "epoch": 0.000159234619140625,
      "model_forward_time": 0.11742305755615234,
      "step": 26089
    },
    {
      "epoch": 0.000159234619140625,
      "step": 26089,
      "training_step_time": 0.3744668960571289
    },
    {
      "epoch": 0.00015924072265625,
      "grad_norm": 0.12812331318855286,
      "learning_rate": 6.468884591267204e-05,
      "loss": 0.0439,
      "step": 26090
    },
    {
      "epoch": 0.00015924072265625,
      "model_forward_time": 0.11822652816772461,
      "step": 26090
    },
    {
      "epoch": 0.00015924072265625,
      "step": 26090,
      "training_step_time": 0.40887880325317383
    },
    {
      "epoch": 0.000159246826171875,
      "model_forward_time": 0.1181187629699707,
      "step": 26091
    },
    {
      "epoch": 0.000159246826171875,
      "step": 26091,
      "training_step_time": 0.41190266609191895
    },
    {
      "epoch": 0.0001592529296875,
      "model_forward_time": 0.11545276641845703,
      "step": 26092
    },
    {
      "epoch": 0.0001592529296875,
      "step": 26092,
      "training_step_time": 0.38895416259765625
    },
    {
      "epoch": 0.000159259033203125,
      "model_forward_time": 0.11460399627685547,
      "step": 26093
    },
    {
      "epoch": 0.000159259033203125,
      "step": 26093,
      "training_step_time": 0.392864465713501
    },
    {
      "epoch": 0.00015926513671875,
      "model_forward_time": 0.11538505554199219,
      "step": 26094
    },
    {
      "epoch": 0.00015926513671875,
      "step": 26094,
      "training_step_time": 0.4356520175933838
    },
    {
      "epoch": 0.000159271240234375,
      "model_forward_time": 0.11472487449645996,
      "step": 26095
    },
    {
      "epoch": 0.000159271240234375,
      "step": 26095,
      "training_step_time": 0.42983555793762207
    },
    {
      "epoch": 0.00015927734375,
      "model_forward_time": 0.11505413055419922,
      "step": 26096
    },
    {
      "epoch": 0.00015927734375,
      "step": 26096,
      "training_step_time": 0.3897116184234619
    },
    {
      "epoch": 0.000159283447265625,
      "model_forward_time": 0.11504173278808594,
      "step": 26097
    },
    {
      "epoch": 0.000159283447265625,
      "step": 26097,
      "training_step_time": 0.4574756622314453
    },
    {
      "epoch": 0.00015928955078125,
      "model_forward_time": 0.11490654945373535,
      "step": 26098
    },
    {
      "epoch": 0.00015928955078125,
      "step": 26098,
      "training_step_time": 0.44739270210266113
    },
    {
      "epoch": 0.000159295654296875,
      "model_forward_time": 0.11472964286804199,
      "step": 26099
    },
    {
      "epoch": 0.000159295654296875,
      "step": 26099,
      "training_step_time": 0.46782875061035156
    },
    {
      "epoch": 0.0001593017578125,
      "grad_norm": 0.1635517030954361,
      "learning_rate": 6.466250186922325e-05,
      "loss": 0.0487,
      "step": 26100
    },
    {
      "epoch": 0.0001593017578125,
      "model_forward_time": 0.11495852470397949,
      "step": 26100
    },
    {
      "epoch": 0.0001593017578125,
      "step": 26100,
      "training_step_time": 0.6390571594238281
    },
    {
      "epoch": 0.000159307861328125,
      "model_forward_time": 0.11469793319702148,
      "step": 26101
    },
    {
      "epoch": 0.000159307861328125,
      "step": 26101,
      "training_step_time": 0.38739633560180664
    },
    {
      "epoch": 0.00015931396484375,
      "model_forward_time": 0.11430621147155762,
      "step": 26102
    },
    {
      "epoch": 0.00015931396484375,
      "step": 26102,
      "training_step_time": 0.38335514068603516
    },
    {
      "epoch": 0.000159320068359375,
      "model_forward_time": 0.11455368995666504,
      "step": 26103
    },
    {
      "epoch": 0.000159320068359375,
      "step": 26103,
      "training_step_time": 0.36313843727111816
    },
    {
      "epoch": 0.000159326171875,
      "model_forward_time": 0.11499810218811035,
      "step": 26104
    },
    {
      "epoch": 0.000159326171875,
      "step": 26104,
      "training_step_time": 0.4383842945098877
    },
    {
      "epoch": 0.000159332275390625,
      "model_forward_time": 0.11548733711242676,
      "step": 26105
    },
    {
      "epoch": 0.000159332275390625,
      "step": 26105,
      "training_step_time": 0.3940622806549072
    },
    {
      "epoch": 0.00015933837890625,
      "model_forward_time": 0.11546444892883301,
      "step": 26106
    },
    {
      "epoch": 0.00015933837890625,
      "step": 26106,
      "training_step_time": 0.672605037689209
    },
    {
      "epoch": 0.000159344482421875,
      "model_forward_time": 0.1149749755859375,
      "step": 26107
    },
    {
      "epoch": 0.000159344482421875,
      "step": 26107,
      "training_step_time": 0.3952033519744873
    },
    {
      "epoch": 0.0001593505859375,
      "model_forward_time": 0.1147911548614502,
      "step": 26108
    },
    {
      "epoch": 0.0001593505859375,
      "step": 26108,
      "training_step_time": 0.38741636276245117
    },
    {
      "epoch": 0.000159356689453125,
      "model_forward_time": 0.11543750762939453,
      "step": 26109
    },
    {
      "epoch": 0.000159356689453125,
      "step": 26109,
      "training_step_time": 0.41737961769104004
    },
    {
      "epoch": 0.00015936279296875,
      "grad_norm": 0.14549106359481812,
      "learning_rate": 6.463615337169353e-05,
      "loss": 0.0462,
      "step": 26110
    },
    {
      "epoch": 0.00015936279296875,
      "model_forward_time": 0.11483311653137207,
      "step": 26110
    },
    {
      "epoch": 0.00015936279296875,
      "step": 26110,
      "training_step_time": 0.4250495433807373
    },
    {
      "epoch": 0.000159368896484375,
      "model_forward_time": 0.11461234092712402,
      "step": 26111
    },
    {
      "epoch": 0.000159368896484375,
      "step": 26111,
      "training_step_time": 0.45421695709228516
    },
    {
      "epoch": 0.000159375,
      "model_forward_time": 0.11607146263122559,
      "step": 26112
    },
    {
      "epoch": 0.000159375,
      "step": 26112,
      "training_step_time": 0.6041719913482666
    },
    {
      "epoch": 0.000159381103515625,
      "model_forward_time": 0.11503362655639648,
      "step": 26113
    },
    {
      "epoch": 0.000159381103515625,
      "step": 26113,
      "training_step_time": 0.38735246658325195
    },
    {
      "epoch": 0.00015938720703125,
      "model_forward_time": 0.1144554615020752,
      "step": 26114
    },
    {
      "epoch": 0.00015938720703125,
      "step": 26114,
      "training_step_time": 0.39011430740356445
    },
    {
      "epoch": 0.000159393310546875,
      "model_forward_time": 0.11453437805175781,
      "step": 26115
    },
    {
      "epoch": 0.000159393310546875,
      "step": 26115,
      "training_step_time": 0.3938784599304199
    },
    {
      "epoch": 0.0001593994140625,
      "model_forward_time": 0.11437845230102539,
      "step": 26116
    },
    {
      "epoch": 0.0001593994140625,
      "step": 26116,
      "training_step_time": 0.4012110233306885
    },
    {
      "epoch": 0.000159405517578125,
      "model_forward_time": 0.11713480949401855,
      "step": 26117
    },
    {
      "epoch": 0.000159405517578125,
      "step": 26117,
      "training_step_time": 0.3753621578216553
    },
    {
      "epoch": 0.00015941162109375,
      "model_forward_time": 0.11571907997131348,
      "step": 26118
    },
    {
      "epoch": 0.00015941162109375,
      "step": 26118,
      "training_step_time": 0.6003985404968262
    },
    {
      "epoch": 0.000159417724609375,
      "model_forward_time": 0.11549997329711914,
      "step": 26119
    },
    {
      "epoch": 0.000159417724609375,
      "step": 26119,
      "training_step_time": 0.39032673835754395
    },
    {
      "epoch": 0.000159423828125,
      "grad_norm": 0.1367143839597702,
      "learning_rate": 6.460980042808687e-05,
      "loss": 0.0444,
      "step": 26120
    },
    {
      "epoch": 0.000159423828125,
      "model_forward_time": 0.11512017250061035,
      "step": 26120
    },
    {
      "epoch": 0.000159423828125,
      "step": 26120,
      "training_step_time": 0.4015340805053711
    },
    {
      "epoch": 0.000159429931640625,
      "model_forward_time": 0.11573648452758789,
      "step": 26121
    },
    {
      "epoch": 0.000159429931640625,
      "step": 26121,
      "training_step_time": 0.3959040641784668
    },
    {
      "epoch": 0.00015943603515625,
      "model_forward_time": 0.11521720886230469,
      "step": 26122
    },
    {
      "epoch": 0.00015943603515625,
      "step": 26122,
      "training_step_time": 0.38689184188842773
    },
    {
      "epoch": 0.000159442138671875,
      "model_forward_time": 0.11536860466003418,
      "step": 26123
    },
    {
      "epoch": 0.000159442138671875,
      "step": 26123,
      "training_step_time": 0.39449405670166016
    },
    {
      "epoch": 0.0001594482421875,
      "model_forward_time": 0.11529970169067383,
      "step": 26124
    },
    {
      "epoch": 0.0001594482421875,
      "step": 26124,
      "training_step_time": 0.54453444480896
    },
    {
      "epoch": 0.000159454345703125,
      "model_forward_time": 0.1152956485748291,
      "step": 26125
    },
    {
      "epoch": 0.000159454345703125,
      "step": 26125,
      "training_step_time": 0.4412417411804199
    },
    {
      "epoch": 0.00015946044921875,
      "model_forward_time": 0.11525487899780273,
      "step": 26126
    },
    {
      "epoch": 0.00015946044921875,
      "step": 26126,
      "training_step_time": 0.44626951217651367
    },
    {
      "epoch": 0.000159466552734375,
      "model_forward_time": 0.11674976348876953,
      "step": 26127
    },
    {
      "epoch": 0.000159466552734375,
      "step": 26127,
      "training_step_time": 0.4685804843902588
    },
    {
      "epoch": 0.00015947265625,
      "model_forward_time": 0.1145780086517334,
      "step": 26128
    },
    {
      "epoch": 0.00015947265625,
      "step": 26128,
      "training_step_time": 0.395857572555542
    },
    {
      "epoch": 0.000159478759765625,
      "model_forward_time": 0.11486577987670898,
      "step": 26129
    },
    {
      "epoch": 0.000159478759765625,
      "step": 26129,
      "training_step_time": 0.392592191696167
    },
    {
      "epoch": 0.00015948486328125,
      "grad_norm": 0.1480177342891693,
      "learning_rate": 6.458344304640858e-05,
      "loss": 0.0423,
      "step": 26130
    },
    {
      "epoch": 0.00015948486328125,
      "model_forward_time": 0.11516594886779785,
      "step": 26130
    },
    {
      "epoch": 0.00015948486328125,
      "step": 26130,
      "training_step_time": 0.43070507049560547
    },
    {
      "epoch": 0.000159490966796875,
      "model_forward_time": 0.11564278602600098,
      "step": 26131
    },
    {
      "epoch": 0.000159490966796875,
      "step": 26131,
      "training_step_time": 0.3929727077484131
    },
    {
      "epoch": 0.0001594970703125,
      "model_forward_time": 0.11812472343444824,
      "step": 26132
    },
    {
      "epoch": 0.0001594970703125,
      "step": 26132,
      "training_step_time": 0.4854469299316406
    },
    {
      "epoch": 0.000159503173828125,
      "model_forward_time": 0.11902546882629395,
      "step": 26133
    },
    {
      "epoch": 0.000159503173828125,
      "step": 26133,
      "training_step_time": 0.47660255432128906
    },
    {
      "epoch": 0.00015950927734375,
      "model_forward_time": 0.11871075630187988,
      "step": 26134
    },
    {
      "epoch": 0.00015950927734375,
      "step": 26134,
      "training_step_time": 0.4196310043334961
    },
    {
      "epoch": 0.000159515380859375,
      "model_forward_time": 0.12428092956542969,
      "step": 26135
    },
    {
      "epoch": 0.000159515380859375,
      "step": 26135,
      "training_step_time": 0.3936197757720947
    },
    {
      "epoch": 0.000159521484375,
      "model_forward_time": 0.12324833869934082,
      "step": 26136
    },
    {
      "epoch": 0.000159521484375,
      "step": 26136,
      "training_step_time": 0.38715076446533203
    },
    {
      "epoch": 0.000159527587890625,
      "model_forward_time": 0.11661052703857422,
      "step": 26137
    },
    {
      "epoch": 0.000159527587890625,
      "step": 26137,
      "training_step_time": 0.3744778633117676
    },
    {
      "epoch": 0.00015953369140625,
      "model_forward_time": 0.11558675765991211,
      "step": 26138
    },
    {
      "epoch": 0.00015953369140625,
      "step": 26138,
      "training_step_time": 0.38883471488952637
    },
    {
      "epoch": 0.000159539794921875,
      "model_forward_time": 0.11546850204467773,
      "step": 26139
    },
    {
      "epoch": 0.000159539794921875,
      "step": 26139,
      "training_step_time": 0.4379873275756836
    },
    {
      "epoch": 0.0001595458984375,
      "grad_norm": 0.17175817489624023,
      "learning_rate": 6.455708123466536e-05,
      "loss": 0.039,
      "step": 26140
    },
    {
      "epoch": 0.0001595458984375,
      "model_forward_time": 0.115692138671875,
      "step": 26140
    },
    {
      "epoch": 0.0001595458984375,
      "step": 26140,
      "training_step_time": 0.4437410831451416
    },
    {
      "epoch": 0.000159552001953125,
      "model_forward_time": 0.11545729637145996,
      "step": 26141
    },
    {
      "epoch": 0.000159552001953125,
      "step": 26141,
      "training_step_time": 0.39812231063842773
    },
    {
      "epoch": 0.00015955810546875,
      "model_forward_time": 0.11589574813842773,
      "step": 26142
    },
    {
      "epoch": 0.00015955810546875,
      "step": 26142,
      "training_step_time": 0.5790755748748779
    },
    {
      "epoch": 0.000159564208984375,
      "model_forward_time": 0.11551141738891602,
      "step": 26143
    },
    {
      "epoch": 0.000159564208984375,
      "step": 26143,
      "training_step_time": 0.39510393142700195
    },
    {
      "epoch": 0.0001595703125,
      "model_forward_time": 0.11542868614196777,
      "step": 26144
    },
    {
      "epoch": 0.0001595703125,
      "step": 26144,
      "training_step_time": 0.3930189609527588
    },
    {
      "epoch": 0.000159576416015625,
      "model_forward_time": 0.1161956787109375,
      "step": 26145
    },
    {
      "epoch": 0.000159576416015625,
      "step": 26145,
      "training_step_time": 0.3862795829772949
    },
    {
      "epoch": 0.00015958251953125,
      "model_forward_time": 0.1167449951171875,
      "step": 26146
    },
    {
      "epoch": 0.00015958251953125,
      "step": 26146,
      "training_step_time": 0.3957531452178955
    },
    {
      "epoch": 0.000159588623046875,
      "model_forward_time": 0.1158444881439209,
      "step": 26147
    },
    {
      "epoch": 0.000159588623046875,
      "step": 26147,
      "training_step_time": 0.40262818336486816
    },
    {
      "epoch": 0.0001595947265625,
      "model_forward_time": 0.11692547798156738,
      "step": 26148
    },
    {
      "epoch": 0.0001595947265625,
      "step": 26148,
      "training_step_time": 0.7318403720855713
    },
    {
      "epoch": 0.000159600830078125,
      "model_forward_time": 0.1150059700012207,
      "step": 26149
    },
    {
      "epoch": 0.000159600830078125,
      "step": 26149,
      "training_step_time": 0.3946347236633301
    },
    {
      "epoch": 0.00015960693359375,
      "grad_norm": 0.17973078787326813,
      "learning_rate": 6.45307150008652e-05,
      "loss": 0.0505,
      "step": 26150
    },
    {
      "epoch": 0.00015960693359375,
      "model_forward_time": 0.11534857749938965,
      "step": 26150
    },
    {
      "epoch": 0.00015960693359375,
      "step": 26150,
      "training_step_time": 0.38810062408447266
    },
    {
      "epoch": 0.000159613037109375,
      "model_forward_time": 0.11478090286254883,
      "step": 26151
    },
    {
      "epoch": 0.000159613037109375,
      "step": 26151,
      "training_step_time": 0.38994741439819336
    },
    {
      "epoch": 0.000159619140625,
      "model_forward_time": 0.11486315727233887,
      "step": 26152
    },
    {
      "epoch": 0.000159619140625,
      "step": 26152,
      "training_step_time": 0.3864467144012451
    },
    {
      "epoch": 0.000159625244140625,
      "model_forward_time": 0.11497831344604492,
      "step": 26153
    },
    {
      "epoch": 0.000159625244140625,
      "step": 26153,
      "training_step_time": 0.4504208564758301
    },
    {
      "epoch": 0.00015963134765625,
      "model_forward_time": 0.1155703067779541,
      "step": 26154
    },
    {
      "epoch": 0.00015963134765625,
      "step": 26154,
      "training_step_time": 0.7023077011108398
    },
    {
      "epoch": 0.000159637451171875,
      "model_forward_time": 0.1151742935180664,
      "step": 26155
    },
    {
      "epoch": 0.000159637451171875,
      "step": 26155,
      "training_step_time": 0.3931708335876465
    },
    {
      "epoch": 0.0001596435546875,
      "model_forward_time": 0.11509561538696289,
      "step": 26156
    },
    {
      "epoch": 0.0001596435546875,
      "step": 26156,
      "training_step_time": 0.39513635635375977
    },
    {
      "epoch": 0.000159649658203125,
      "model_forward_time": 0.1149146556854248,
      "step": 26157
    },
    {
      "epoch": 0.000159649658203125,
      "step": 26157,
      "training_step_time": 0.39194250106811523
    },
    {
      "epoch": 0.00015965576171875,
      "model_forward_time": 0.11483287811279297,
      "step": 26158
    },
    {
      "epoch": 0.00015965576171875,
      "step": 26158,
      "training_step_time": 0.3986639976501465
    },
    {
      "epoch": 0.000159661865234375,
      "model_forward_time": 0.11499524116516113,
      "step": 26159
    },
    {
      "epoch": 0.000159661865234375,
      "step": 26159,
      "training_step_time": 0.40057969093322754
    },
    {
      "epoch": 0.00015966796875,
      "grad_norm": 0.11727569252252579,
      "learning_rate": 6.450434435301751e-05,
      "loss": 0.0484,
      "step": 26160
    },
    {
      "epoch": 0.00015966796875,
      "model_forward_time": 0.11540937423706055,
      "step": 26160
    },
    {
      "epoch": 0.00015966796875,
      "step": 26160,
      "training_step_time": 0.653958797454834
    },
    {
      "epoch": 0.000159674072265625,
      "model_forward_time": 0.11746430397033691,
      "step": 26161
    },
    {
      "epoch": 0.000159674072265625,
      "step": 26161,
      "training_step_time": 0.38155245780944824
    },
    {
      "epoch": 0.00015968017578125,
      "model_forward_time": 0.11753153800964355,
      "step": 26162
    },
    {
      "epoch": 0.00015968017578125,
      "step": 26162,
      "training_step_time": 0.41126203536987305
    },
    {
      "epoch": 0.000159686279296875,
      "model_forward_time": 0.11774373054504395,
      "step": 26163
    },
    {
      "epoch": 0.000159686279296875,
      "step": 26163,
      "training_step_time": 0.3934617042541504
    },
    {
      "epoch": 0.0001596923828125,
      "model_forward_time": 0.11806130409240723,
      "step": 26164
    },
    {
      "epoch": 0.0001596923828125,
      "step": 26164,
      "training_step_time": 0.4125244617462158
    },
    {
      "epoch": 0.000159698486328125,
      "model_forward_time": 0.11903953552246094,
      "step": 26165
    },
    {
      "epoch": 0.000159698486328125,
      "step": 26165,
      "training_step_time": 0.37677812576293945
    },
    {
      "epoch": 0.00015970458984375,
      "model_forward_time": 0.12534713745117188,
      "step": 26166
    },
    {
      "epoch": 0.00015970458984375,
      "step": 26166,
      "training_step_time": 0.74310302734375
    },
    {
      "epoch": 0.000159710693359375,
      "model_forward_time": 0.11821985244750977,
      "step": 26167
    },
    {
      "epoch": 0.000159710693359375,
      "step": 26167,
      "training_step_time": 0.4336247444152832
    },
    {
      "epoch": 0.000159716796875,
      "model_forward_time": 0.11737489700317383,
      "step": 26168
    },
    {
      "epoch": 0.000159716796875,
      "step": 26168,
      "training_step_time": 0.4365501403808594
    },
    {
      "epoch": 0.000159722900390625,
      "model_forward_time": 0.11775851249694824,
      "step": 26169
    },
    {
      "epoch": 0.000159722900390625,
      "step": 26169,
      "training_step_time": 0.37790656089782715
    },
    {
      "epoch": 0.00015972900390625,
      "grad_norm": 0.13666769862174988,
      "learning_rate": 6.447796929913294e-05,
      "loss": 0.0437,
      "step": 26170
    },
    {
      "epoch": 0.00015972900390625,
      "model_forward_time": 0.11620974540710449,
      "step": 26170
    },
    {
      "epoch": 0.00015972900390625,
      "step": 26170,
      "training_step_time": 0.3909733295440674
    },
    {
      "epoch": 0.000159735107421875,
      "model_forward_time": 0.11701130867004395,
      "step": 26171
    },
    {
      "epoch": 0.000159735107421875,
      "step": 26171,
      "training_step_time": 0.3743751049041748
    },
    {
      "epoch": 0.0001597412109375,
      "model_forward_time": 0.11699795722961426,
      "step": 26172
    },
    {
      "epoch": 0.0001597412109375,
      "step": 26172,
      "training_step_time": 0.47381019592285156
    },
    {
      "epoch": 0.000159747314453125,
      "model_forward_time": 0.11799860000610352,
      "step": 26173
    },
    {
      "epoch": 0.000159747314453125,
      "step": 26173,
      "training_step_time": 0.4403378963470459
    },
    {
      "epoch": 0.00015975341796875,
      "model_forward_time": 0.11861228942871094,
      "step": 26174
    },
    {
      "epoch": 0.00015975341796875,
      "step": 26174,
      "training_step_time": 0.48426055908203125
    },
    {
      "epoch": 0.000159759521484375,
      "model_forward_time": 0.11785721778869629,
      "step": 26175
    },
    {
      "epoch": 0.000159759521484375,
      "step": 26175,
      "training_step_time": 0.4296085834503174
    },
    {
      "epoch": 0.000159765625,
      "model_forward_time": 0.12629151344299316,
      "step": 26176
    },
    {
      "epoch": 0.000159765625,
      "step": 26176,
      "training_step_time": 0.38771510124206543
    },
    {
      "epoch": 0.000159771728515625,
      "model_forward_time": 0.11825418472290039,
      "step": 26177
    },
    {
      "epoch": 0.000159771728515625,
      "step": 26177,
      "training_step_time": 0.3833193778991699
    },
    {
      "epoch": 0.00015977783203125,
      "model_forward_time": 0.1166539192199707,
      "step": 26178
    },
    {
      "epoch": 0.00015977783203125,
      "step": 26178,
      "training_step_time": 0.6405417919158936
    },
    {
      "epoch": 0.000159783935546875,
      "model_forward_time": 0.11601805686950684,
      "step": 26179
    },
    {
      "epoch": 0.000159783935546875,
      "step": 26179,
      "training_step_time": 0.4021339416503906
    },
    {
      "epoch": 0.0001597900390625,
      "grad_norm": 0.12650837004184723,
      "learning_rate": 6.445158984722358e-05,
      "loss": 0.0479,
      "step": 26180
    },
    {
      "epoch": 0.0001597900390625,
      "model_forward_time": 0.11652445793151855,
      "step": 26180
    },
    {
      "epoch": 0.0001597900390625,
      "step": 26180,
      "training_step_time": 0.48895978927612305
    },
    {
      "epoch": 0.000159796142578125,
      "model_forward_time": 0.11676859855651855,
      "step": 26181
    },
    {
      "epoch": 0.000159796142578125,
      "step": 26181,
      "training_step_time": 0.46420788764953613
    },
    {
      "epoch": 0.00015980224609375,
      "model_forward_time": 0.11563849449157715,
      "step": 26182
    },
    {
      "epoch": 0.00015980224609375,
      "step": 26182,
      "training_step_time": 0.42881107330322266
    },
    {
      "epoch": 0.000159808349609375,
      "model_forward_time": 0.1163179874420166,
      "step": 26183
    },
    {
      "epoch": 0.000159808349609375,
      "step": 26183,
      "training_step_time": 0.3812878131866455
    },
    {
      "epoch": 0.000159814453125,
      "model_forward_time": 0.11694669723510742,
      "step": 26184
    },
    {
      "epoch": 0.000159814453125,
      "step": 26184,
      "training_step_time": 0.5273478031158447
    },
    {
      "epoch": 0.000159820556640625,
      "model_forward_time": 0.11633586883544922,
      "step": 26185
    },
    {
      "epoch": 0.000159820556640625,
      "step": 26185,
      "training_step_time": 0.38935303688049316
    },
    {
      "epoch": 0.00015982666015625,
      "model_forward_time": 0.11620259284973145,
      "step": 26186
    },
    {
      "epoch": 0.00015982666015625,
      "step": 26186,
      "training_step_time": 0.38663411140441895
    },
    {
      "epoch": 0.000159832763671875,
      "model_forward_time": 0.11745977401733398,
      "step": 26187
    },
    {
      "epoch": 0.000159832763671875,
      "step": 26187,
      "training_step_time": 0.41469335556030273
    },
    {
      "epoch": 0.0001598388671875,
      "model_forward_time": 0.11701631546020508,
      "step": 26188
    },
    {
      "epoch": 0.0001598388671875,
      "step": 26188,
      "training_step_time": 0.4756946563720703
    },
    {
      "epoch": 0.000159844970703125,
      "model_forward_time": 0.1165773868560791,
      "step": 26189
    },
    {
      "epoch": 0.000159844970703125,
      "step": 26189,
      "training_step_time": 0.4244406223297119
    },
    {
      "epoch": 0.00015985107421875,
      "grad_norm": 0.16083618998527527,
      "learning_rate": 6.44252060053028e-05,
      "loss": 0.04,
      "step": 26190
    },
    {
      "epoch": 0.00015985107421875,
      "model_forward_time": 0.11706733703613281,
      "step": 26190
    },
    {
      "epoch": 0.00015985107421875,
      "step": 26190,
      "training_step_time": 0.4193909168243408
    },
    {
      "epoch": 0.000159857177734375,
      "model_forward_time": 0.11648702621459961,
      "step": 26191
    },
    {
      "epoch": 0.000159857177734375,
      "step": 26191,
      "training_step_time": 0.39203405380249023
    },
    {
      "epoch": 0.00015986328125,
      "model_forward_time": 0.11643505096435547,
      "step": 26192
    },
    {
      "epoch": 0.00015986328125,
      "step": 26192,
      "training_step_time": 0.39119672775268555
    },
    {
      "epoch": 0.000159869384765625,
      "model_forward_time": 0.1165926456451416,
      "step": 26193
    },
    {
      "epoch": 0.000159869384765625,
      "step": 26193,
      "training_step_time": 0.38683319091796875
    },
    {
      "epoch": 0.00015987548828125,
      "model_forward_time": 0.11745524406433105,
      "step": 26194
    },
    {
      "epoch": 0.00015987548828125,
      "step": 26194,
      "training_step_time": 0.41765379905700684
    },
    {
      "epoch": 0.000159881591796875,
      "model_forward_time": 0.11646151542663574,
      "step": 26195
    },
    {
      "epoch": 0.000159881591796875,
      "step": 26195,
      "training_step_time": 0.389340877532959
    },
    {
      "epoch": 0.0001598876953125,
      "model_forward_time": 0.11724734306335449,
      "step": 26196
    },
    {
      "epoch": 0.0001598876953125,
      "step": 26196,
      "training_step_time": 0.6111938953399658
    },
    {
      "epoch": 0.000159893798828125,
      "model_forward_time": 0.1165456771850586,
      "step": 26197
    },
    {
      "epoch": 0.000159893798828125,
      "step": 26197,
      "training_step_time": 0.4233665466308594
    },
    {
      "epoch": 0.00015989990234375,
      "model_forward_time": 0.11603331565856934,
      "step": 26198
    },
    {
      "epoch": 0.00015989990234375,
      "step": 26198,
      "training_step_time": 0.3870420455932617
    },
    {
      "epoch": 0.000159906005859375,
      "model_forward_time": 0.11635398864746094,
      "step": 26199
    },
    {
      "epoch": 0.000159906005859375,
      "step": 26199,
      "training_step_time": 0.3908100128173828
    },
    {
      "epoch": 0.000159912109375,
      "grad_norm": 0.1837824136018753,
      "learning_rate": 6.439881778138531e-05,
      "loss": 0.0472,
      "step": 26200
    },
    {
      "epoch": 0.000159912109375,
      "model_forward_time": 0.11623835563659668,
      "step": 26200
    },
    {
      "epoch": 0.000159912109375,
      "step": 26200,
      "training_step_time": 0.41007494926452637
    },
    {
      "epoch": 0.000159918212890625,
      "model_forward_time": 0.1159508228302002,
      "step": 26201
    },
    {
      "epoch": 0.000159918212890625,
      "step": 26201,
      "training_step_time": 0.3677842617034912
    },
    {
      "epoch": 0.00015992431640625,
      "model_forward_time": 0.11642694473266602,
      "step": 26202
    },
    {
      "epoch": 0.00015992431640625,
      "step": 26202,
      "training_step_time": 0.5506608486175537
    },
    {
      "epoch": 0.000159930419921875,
      "model_forward_time": 0.11652851104736328,
      "step": 26203
    },
    {
      "epoch": 0.000159930419921875,
      "step": 26203,
      "training_step_time": 0.4217069149017334
    },
    {
      "epoch": 0.0001599365234375,
      "model_forward_time": 0.11596512794494629,
      "step": 26204
    },
    {
      "epoch": 0.0001599365234375,
      "step": 26204,
      "training_step_time": 0.39299941062927246
    },
    {
      "epoch": 0.000159942626953125,
      "model_forward_time": 0.1160738468170166,
      "step": 26205
    },
    {
      "epoch": 0.000159942626953125,
      "step": 26205,
      "training_step_time": 0.4023911952972412
    },
    {
      "epoch": 0.00015994873046875,
      "model_forward_time": 0.11635184288024902,
      "step": 26206
    },
    {
      "epoch": 0.00015994873046875,
      "step": 26206,
      "training_step_time": 0.3948087692260742
    },
    {
      "epoch": 0.000159954833984375,
      "model_forward_time": 0.11661601066589355,
      "step": 26207
    },
    {
      "epoch": 0.000159954833984375,
      "step": 26207,
      "training_step_time": 0.39226627349853516
    },
    {
      "epoch": 0.0001599609375,
      "model_forward_time": 0.11781907081604004,
      "step": 26208
    },
    {
      "epoch": 0.0001599609375,
      "step": 26208,
      "training_step_time": 0.7432632446289062
    },
    {
      "epoch": 0.000159967041015625,
      "model_forward_time": 0.11626386642456055,
      "step": 26209
    },
    {
      "epoch": 0.000159967041015625,
      "step": 26209,
      "training_step_time": 0.41014695167541504
    },
    {
      "epoch": 0.00015997314453125,
      "grad_norm": 0.1761930286884308,
      "learning_rate": 6.437242518348716e-05,
      "loss": 0.0444,
      "step": 26210
    },
    {
      "epoch": 0.00015997314453125,
      "model_forward_time": 0.11563491821289062,
      "step": 26210
    },
    {
      "epoch": 0.00015997314453125,
      "step": 26210,
      "training_step_time": 0.3992764949798584
    },
    {
      "epoch": 0.000159979248046875,
      "model_forward_time": 0.11616015434265137,
      "step": 26211
    },
    {
      "epoch": 0.000159979248046875,
      "step": 26211,
      "training_step_time": 0.3902297019958496
    },
    {
      "epoch": 0.0001599853515625,
      "model_forward_time": 0.11592245101928711,
      "step": 26212
    },
    {
      "epoch": 0.0001599853515625,
      "step": 26212,
      "training_step_time": 0.3984954357147217
    },
    {
      "epoch": 0.000159991455078125,
      "model_forward_time": 0.11702966690063477,
      "step": 26213
    },
    {
      "epoch": 0.000159991455078125,
      "step": 26213,
      "training_step_time": 0.42920446395874023
    },
    {
      "epoch": 0.00015999755859375,
      "model_forward_time": 0.11709237098693848,
      "step": 26214
    },
    {
      "epoch": 0.00015999755859375,
      "step": 26214,
      "training_step_time": 0.5469977855682373
    },
    {
      "epoch": 0.000160003662109375,
      "model_forward_time": 0.11692643165588379,
      "step": 26215
    },
    {
      "epoch": 0.000160003662109375,
      "step": 26215,
      "training_step_time": 0.3742489814758301
    },
    {
      "epoch": 0.000160009765625,
      "model_forward_time": 0.11648392677307129,
      "step": 26216
    },
    {
      "epoch": 0.000160009765625,
      "step": 26216,
      "training_step_time": 0.4492647647857666
    },
    {
      "epoch": 0.000160015869140625,
      "model_forward_time": 0.11673378944396973,
      "step": 26217
    },
    {
      "epoch": 0.000160015869140625,
      "step": 26217,
      "training_step_time": 0.3948068618774414
    },
    {
      "epoch": 0.00016002197265625,
      "model_forward_time": 0.11858487129211426,
      "step": 26218
    },
    {
      "epoch": 0.00016002197265625,
      "step": 26218,
      "training_step_time": 0.3973860740661621
    },
    {
      "epoch": 0.000160028076171875,
      "model_forward_time": 0.1179351806640625,
      "step": 26219
    },
    {
      "epoch": 0.000160028076171875,
      "step": 26219,
      "training_step_time": 0.38071537017822266
    },
    {
      "epoch": 0.0001600341796875,
      "grad_norm": 0.1246371641755104,
      "learning_rate": 6.43460282196257e-05,
      "loss": 0.045,
      "step": 26220
    },
    {
      "epoch": 0.0001600341796875,
      "model_forward_time": 0.11634039878845215,
      "step": 26220
    },
    {
      "epoch": 0.0001600341796875,
      "step": 26220,
      "training_step_time": 0.5230624675750732
    },
    {
      "epoch": 0.000160040283203125,
      "model_forward_time": 0.11773061752319336,
      "step": 26221
    },
    {
      "epoch": 0.000160040283203125,
      "step": 26221,
      "training_step_time": 0.5218594074249268
    },
    {
      "epoch": 0.00016004638671875,
      "model_forward_time": 0.12032341957092285,
      "step": 26222
    },
    {
      "epoch": 0.00016004638671875,
      "step": 26222,
      "training_step_time": 0.4250953197479248
    },
    {
      "epoch": 0.000160052490234375,
      "model_forward_time": 0.11585593223571777,
      "step": 26223
    },
    {
      "epoch": 0.000160052490234375,
      "step": 26223,
      "training_step_time": 0.43601202964782715
    },
    {
      "epoch": 0.00016005859375,
      "model_forward_time": 0.12609386444091797,
      "step": 26224
    },
    {
      "epoch": 0.00016005859375,
      "step": 26224,
      "training_step_time": 0.4622914791107178
    },
    {
      "epoch": 0.000160064697265625,
      "model_forward_time": 0.11549663543701172,
      "step": 26225
    },
    {
      "epoch": 0.000160064697265625,
      "step": 26225,
      "training_step_time": 0.41934871673583984
    },
    {
      "epoch": 0.00016007080078125,
      "model_forward_time": 0.11554217338562012,
      "step": 26226
    },
    {
      "epoch": 0.00016007080078125,
      "step": 26226,
      "training_step_time": 0.45322537422180176
    },
    {
      "epoch": 0.000160076904296875,
      "model_forward_time": 0.11641979217529297,
      "step": 26227
    },
    {
      "epoch": 0.000160076904296875,
      "step": 26227,
      "training_step_time": 0.39294981956481934
    },
    {
      "epoch": 0.0001600830078125,
      "model_forward_time": 0.11560988426208496,
      "step": 26228
    },
    {
      "epoch": 0.0001600830078125,
      "step": 26228,
      "training_step_time": 0.3876528739929199
    },
    {
      "epoch": 0.000160089111328125,
      "model_forward_time": 0.11528635025024414,
      "step": 26229
    },
    {
      "epoch": 0.000160089111328125,
      "step": 26229,
      "training_step_time": 0.3678295612335205
    },
    {
      "epoch": 0.00016009521484375,
      "grad_norm": 0.12213561683893204,
      "learning_rate": 6.431962689781969e-05,
      "loss": 0.0464,
      "step": 26230
    },
    {
      "epoch": 0.00016009521484375,
      "model_forward_time": 0.11979222297668457,
      "step": 26230
    },
    {
      "epoch": 0.00016009521484375,
      "step": 26230,
      "training_step_time": 0.44725751876831055
    },
    {
      "epoch": 0.000160101318359375,
      "model_forward_time": 0.11824440956115723,
      "step": 26231
    },
    {
      "epoch": 0.000160101318359375,
      "step": 26231,
      "training_step_time": 0.39992427825927734
    },
    {
      "epoch": 0.000160107421875,
      "model_forward_time": 0.11828088760375977,
      "step": 26232
    },
    {
      "epoch": 0.000160107421875,
      "step": 26232,
      "training_step_time": 0.42185020446777344
    },
    {
      "epoch": 0.000160113525390625,
      "model_forward_time": 0.11830353736877441,
      "step": 26233
    },
    {
      "epoch": 0.000160113525390625,
      "step": 26233,
      "training_step_time": 0.3928203582763672
    },
    {
      "epoch": 0.00016011962890625,
      "model_forward_time": 0.11761331558227539,
      "step": 26234
    },
    {
      "epoch": 0.00016011962890625,
      "step": 26234,
      "training_step_time": 0.377652645111084
    },
    {
      "epoch": 0.000160125732421875,
      "model_forward_time": 0.1175694465637207,
      "step": 26235
    },
    {
      "epoch": 0.000160125732421875,
      "step": 26235,
      "training_step_time": 0.4545938968658447
    },
    {
      "epoch": 0.0001601318359375,
      "model_forward_time": 0.11808061599731445,
      "step": 26236
    },
    {
      "epoch": 0.0001601318359375,
      "step": 26236,
      "training_step_time": 0.41092610359191895
    },
    {
      "epoch": 0.000160137939453125,
      "model_forward_time": 0.11819005012512207,
      "step": 26237
    },
    {
      "epoch": 0.000160137939453125,
      "step": 26237,
      "training_step_time": 0.49634408950805664
    },
    {
      "epoch": 0.00016014404296875,
      "model_forward_time": 0.11859679222106934,
      "step": 26238
    },
    {
      "epoch": 0.00016014404296875,
      "step": 26238,
      "training_step_time": 0.654761791229248
    },
    {
      "epoch": 0.000160150146484375,
      "model_forward_time": 0.1267564296722412,
      "step": 26239
    },
    {
      "epoch": 0.000160150146484375,
      "step": 26239,
      "training_step_time": 0.4739222526550293
    },
    {
      "epoch": 0.00016015625,
      "grad_norm": 0.1742703765630722,
      "learning_rate": 6.42932212260891e-05,
      "loss": 0.0396,
      "step": 26240
    },
    {
      "epoch": 0.00016015625,
      "model_forward_time": 0.11873817443847656,
      "step": 26240
    },
    {
      "epoch": 0.00016015625,
      "step": 26240,
      "training_step_time": 0.5661525726318359
    },
    {
      "epoch": 0.000160162353515625,
      "model_forward_time": 0.11590576171875,
      "step": 26241
    },
    {
      "epoch": 0.000160162353515625,
      "step": 26241,
      "training_step_time": 0.6007187366485596
    },
    {
      "epoch": 0.00016016845703125,
      "model_forward_time": 0.11848068237304688,
      "step": 26242
    },
    {
      "epoch": 0.00016016845703125,
      "step": 26242,
      "training_step_time": 0.6301214694976807
    },
    {
      "epoch": 0.000160174560546875,
      "model_forward_time": 0.12416434288024902,
      "step": 26243
    },
    {
      "epoch": 0.000160174560546875,
      "step": 26243,
      "training_step_time": 0.8123302459716797
    },
    {
      "epoch": 0.0001601806640625,
      "model_forward_time": 0.12343120574951172,
      "step": 26244
    },
    {
      "epoch": 0.0001601806640625,
      "step": 26244,
      "training_step_time": 0.6244776248931885
    },
    {
      "epoch": 0.000160186767578125,
      "model_forward_time": 0.11837506294250488,
      "step": 26245
    },
    {
      "epoch": 0.000160186767578125,
      "step": 26245,
      "training_step_time": 0.7074460983276367
    },
    {
      "epoch": 0.00016019287109375,
      "model_forward_time": 0.12709498405456543,
      "step": 26246
    },
    {
      "epoch": 0.00016019287109375,
      "step": 26246,
      "training_step_time": 0.6907088756561279
    },
    {
      "epoch": 0.000160198974609375,
      "model_forward_time": 0.12027096748352051,
      "step": 26247
    },
    {
      "epoch": 0.000160198974609375,
      "step": 26247,
      "training_step_time": 0.71954345703125
    },
    {
      "epoch": 0.000160205078125,
      "model_forward_time": 0.11646771430969238,
      "step": 26248
    },
    {
      "epoch": 0.000160205078125,
      "step": 26248,
      "training_step_time": 0.7824440002441406
    },
    {
      "epoch": 0.000160211181640625,
      "model_forward_time": 0.11920690536499023,
      "step": 26249
    },
    {
      "epoch": 0.000160211181640625,
      "step": 26249,
      "training_step_time": 0.6859745979309082
    },
    {
      "epoch": 0.00016021728515625,
      "grad_norm": 0.11423065513372421,
      "learning_rate": 6.426681121245527e-05,
      "loss": 0.0452,
      "step": 26250
    },
    {
      "epoch": 0.00016021728515625,
      "model_forward_time": 0.11632966995239258,
      "step": 26250
    },
    {
      "epoch": 0.00016021728515625,
      "step": 26250,
      "training_step_time": 0.6556956768035889
    },
    {
      "epoch": 0.000160223388671875,
      "model_forward_time": 0.12158465385437012,
      "step": 26251
    },
    {
      "epoch": 0.000160223388671875,
      "step": 26251,
      "training_step_time": 0.6790099143981934
    },
    {
      "epoch": 0.0001602294921875,
      "model_forward_time": 0.11732363700866699,
      "step": 26252
    },
    {
      "epoch": 0.0001602294921875,
      "step": 26252,
      "training_step_time": 0.6892774105072021
    },
    {
      "epoch": 0.000160235595703125,
      "model_forward_time": 0.1252121925354004,
      "step": 26253
    },
    {
      "epoch": 0.000160235595703125,
      "step": 26253,
      "training_step_time": 0.6176819801330566
    },
    {
      "epoch": 0.00016024169921875,
      "model_forward_time": 0.12894773483276367,
      "step": 26254
    },
    {
      "epoch": 0.00016024169921875,
      "step": 26254,
      "training_step_time": 0.6849825382232666
    },
    {
      "epoch": 0.000160247802734375,
      "model_forward_time": 0.11910033226013184,
      "step": 26255
    },
    {
      "epoch": 0.000160247802734375,
      "step": 26255,
      "training_step_time": 0.6311802864074707
    },
    {
      "epoch": 0.00016025390625,
      "model_forward_time": 0.1226046085357666,
      "step": 26256
    },
    {
      "epoch": 0.00016025390625,
      "step": 26256,
      "training_step_time": 0.6620690822601318
    },
    {
      "epoch": 0.000160260009765625,
      "model_forward_time": 0.11566615104675293,
      "step": 26257
    },
    {
      "epoch": 0.000160260009765625,
      "step": 26257,
      "training_step_time": 0.7076358795166016
    },
    {
      "epoch": 0.00016026611328125,
      "model_forward_time": 0.11851692199707031,
      "step": 26258
    },
    {
      "epoch": 0.00016026611328125,
      "step": 26258,
      "training_step_time": 0.7376577854156494
    },
    {
      "epoch": 0.000160272216796875,
      "model_forward_time": 0.1392374038696289,
      "step": 26259
    },
    {
      "epoch": 0.000160272216796875,
      "step": 26259,
      "training_step_time": 0.7254106998443604
    },
    {
      "epoch": 0.0001602783203125,
      "grad_norm": 0.12662386894226074,
      "learning_rate": 6.42403968649409e-05,
      "loss": 0.0539,
      "step": 26260
    },
    {
      "epoch": 0.0001602783203125,
      "model_forward_time": 0.12053370475769043,
      "step": 26260
    },
    {
      "epoch": 0.0001602783203125,
      "step": 26260,
      "training_step_time": 0.6103565692901611
    },
    {
      "epoch": 0.000160284423828125,
      "model_forward_time": 0.11905908584594727,
      "step": 26261
    },
    {
      "epoch": 0.000160284423828125,
      "step": 26261,
      "training_step_time": 0.6588258743286133
    },
    {
      "epoch": 0.00016029052734375,
      "model_forward_time": 0.12108612060546875,
      "step": 26262
    },
    {
      "epoch": 0.00016029052734375,
      "step": 26262,
      "training_step_time": 0.7503478527069092
    },
    {
      "epoch": 0.000160296630859375,
      "model_forward_time": 0.11958146095275879,
      "step": 26263
    },
    {
      "epoch": 0.000160296630859375,
      "step": 26263,
      "training_step_time": 0.6875591278076172
    },
    {
      "epoch": 0.000160302734375,
      "model_forward_time": 0.11805081367492676,
      "step": 26264
    },
    {
      "epoch": 0.000160302734375,
      "step": 26264,
      "training_step_time": 0.720526933670044
    },
    {
      "epoch": 0.000160308837890625,
      "model_forward_time": 0.12290024757385254,
      "step": 26265
    },
    {
      "epoch": 0.000160308837890625,
      "step": 26265,
      "training_step_time": 0.7524893283843994
    },
    {
      "epoch": 0.00016031494140625,
      "model_forward_time": 0.11930346488952637,
      "step": 26266
    },
    {
      "epoch": 0.00016031494140625,
      "step": 26266,
      "training_step_time": 0.8081419467926025
    },
    {
      "epoch": 0.000160321044921875,
      "model_forward_time": 0.12479925155639648,
      "step": 26267
    },
    {
      "epoch": 0.000160321044921875,
      "step": 26267,
      "training_step_time": 0.6172177791595459
    },
    {
      "epoch": 0.0001603271484375,
      "model_forward_time": 0.1185150146484375,
      "step": 26268
    },
    {
      "epoch": 0.0001603271484375,
      "step": 26268,
      "training_step_time": 0.5964157581329346
    },
    {
      "epoch": 0.000160333251953125,
      "model_forward_time": 0.11777067184448242,
      "step": 26269
    },
    {
      "epoch": 0.000160333251953125,
      "step": 26269,
      "training_step_time": 0.589329719543457
    },
    {
      "epoch": 0.00016033935546875,
      "grad_norm": 0.14706909656524658,
      "learning_rate": 6.421397819156996e-05,
      "loss": 0.0513,
      "step": 26270
    },
    {
      "epoch": 0.00016033935546875,
      "model_forward_time": 0.12006187438964844,
      "step": 26270
    },
    {
      "epoch": 0.00016033935546875,
      "step": 26270,
      "training_step_time": 0.7560737133026123
    },
    {
      "epoch": 0.000160345458984375,
      "model_forward_time": 0.1199793815612793,
      "step": 26271
    },
    {
      "epoch": 0.000160345458984375,
      "step": 26271,
      "training_step_time": 0.6360073089599609
    },
    {
      "epoch": 0.0001603515625,
      "model_forward_time": 0.11642289161682129,
      "step": 26272
    },
    {
      "epoch": 0.0001603515625,
      "step": 26272,
      "training_step_time": 0.6604330539703369
    },
    {
      "epoch": 0.000160357666015625,
      "model_forward_time": 0.12563037872314453,
      "step": 26273
    },
    {
      "epoch": 0.000160357666015625,
      "step": 26273,
      "training_step_time": 0.6809742450714111
    },
    {
      "epoch": 0.00016036376953125,
      "model_forward_time": 0.12117576599121094,
      "step": 26274
    },
    {
      "epoch": 0.00016036376953125,
      "step": 26274,
      "training_step_time": 0.7446918487548828
    },
    {
      "epoch": 0.000160369873046875,
      "model_forward_time": 0.11715865135192871,
      "step": 26275
    },
    {
      "epoch": 0.000160369873046875,
      "step": 26275,
      "training_step_time": 0.6299076080322266
    },
    {
      "epoch": 0.0001603759765625,
      "model_forward_time": 0.1178889274597168,
      "step": 26276
    },
    {
      "epoch": 0.0001603759765625,
      "step": 26276,
      "training_step_time": 0.6592676639556885
    },
    {
      "epoch": 0.000160382080078125,
      "model_forward_time": 0.11742043495178223,
      "step": 26277
    },
    {
      "epoch": 0.000160382080078125,
      "step": 26277,
      "training_step_time": 0.695573091506958
    },
    {
      "epoch": 0.00016038818359375,
      "model_forward_time": 0.1347801685333252,
      "step": 26278
    },
    {
      "epoch": 0.00016038818359375,
      "step": 26278,
      "training_step_time": 0.6723353862762451
    },
    {
      "epoch": 0.000160394287109375,
      "model_forward_time": 0.1229088306427002,
      "step": 26279
    },
    {
      "epoch": 0.000160394287109375,
      "step": 26279,
      "training_step_time": 0.6112477779388428
    },
    {
      "epoch": 0.000160400390625,
      "grad_norm": 0.100775346159935,
      "learning_rate": 6.418755520036775e-05,
      "loss": 0.0489,
      "step": 26280
    },
    {
      "epoch": 0.000160400390625,
      "model_forward_time": 0.11896514892578125,
      "step": 26280
    },
    {
      "epoch": 0.000160400390625,
      "step": 26280,
      "training_step_time": 0.6985352039337158
    },
    {
      "epoch": 0.000160406494140625,
      "model_forward_time": 0.11902427673339844,
      "step": 26281
    },
    {
      "epoch": 0.000160406494140625,
      "step": 26281,
      "training_step_time": 0.6568377017974854
    },
    {
      "epoch": 0.00016041259765625,
      "model_forward_time": 0.12799906730651855,
      "step": 26282
    },
    {
      "epoch": 0.00016041259765625,
      "step": 26282,
      "training_step_time": 0.7436506748199463
    },
    {
      "epoch": 0.000160418701171875,
      "model_forward_time": 0.1241598129272461,
      "step": 26283
    },
    {
      "epoch": 0.000160418701171875,
      "step": 26283,
      "training_step_time": 0.6577131748199463
    },
    {
      "epoch": 0.0001604248046875,
      "model_forward_time": 0.1175074577331543,
      "step": 26284
    },
    {
      "epoch": 0.0001604248046875,
      "step": 26284,
      "training_step_time": 0.7132928371429443
    },
    {
      "epoch": 0.000160430908203125,
      "model_forward_time": 0.11950850486755371,
      "step": 26285
    },
    {
      "epoch": 0.000160430908203125,
      "step": 26285,
      "training_step_time": 0.754009485244751
    },
    {
      "epoch": 0.00016043701171875,
      "model_forward_time": 0.11710453033447266,
      "step": 26286
    },
    {
      "epoch": 0.00016043701171875,
      "step": 26286,
      "training_step_time": 0.673882007598877
    },
    {
      "epoch": 0.000160443115234375,
      "model_forward_time": 0.11683988571166992,
      "step": 26287
    },
    {
      "epoch": 0.000160443115234375,
      "step": 26287,
      "training_step_time": 0.7534444332122803
    },
    {
      "epoch": 0.00016044921875,
      "model_forward_time": 0.1184394359588623,
      "step": 26288
    },
    {
      "epoch": 0.00016044921875,
      "step": 26288,
      "training_step_time": 0.7232222557067871
    },
    {
      "epoch": 0.000160455322265625,
      "model_forward_time": 0.11746907234191895,
      "step": 26289
    },
    {
      "epoch": 0.000160455322265625,
      "step": 26289,
      "training_step_time": 0.5923278331756592
    },
    {
      "epoch": 0.00016046142578125,
      "grad_norm": 0.12325531989336014,
      "learning_rate": 6.416112789936086e-05,
      "loss": 0.053,
      "step": 26290
    },
    {
      "epoch": 0.00016046142578125,
      "model_forward_time": 0.1178126335144043,
      "step": 26290
    },
    {
      "epoch": 0.00016046142578125,
      "step": 26290,
      "training_step_time": 0.6602647304534912
    },
    {
      "epoch": 0.000160467529296875,
      "model_forward_time": 0.13451218605041504,
      "step": 26291
    },
    {
      "epoch": 0.000160467529296875,
      "step": 26291,
      "training_step_time": 0.6773011684417725
    },
    {
      "epoch": 0.0001604736328125,
      "model_forward_time": 0.11831331253051758,
      "step": 26292
    },
    {
      "epoch": 0.0001604736328125,
      "step": 26292,
      "training_step_time": 0.707960844039917
    },
    {
      "epoch": 0.000160479736328125,
      "model_forward_time": 0.12152838706970215,
      "step": 26293
    },
    {
      "epoch": 0.000160479736328125,
      "step": 26293,
      "training_step_time": 0.656146764755249
    },
    {
      "epoch": 0.00016048583984375,
      "model_forward_time": 0.12079095840454102,
      "step": 26294
    },
    {
      "epoch": 0.00016048583984375,
      "step": 26294,
      "training_step_time": 0.7588157653808594
    },
    {
      "epoch": 0.000160491943359375,
      "model_forward_time": 0.1309058666229248,
      "step": 26295
    },
    {
      "epoch": 0.000160491943359375,
      "step": 26295,
      "training_step_time": 0.6290059089660645
    },
    {
      "epoch": 0.000160498046875,
      "model_forward_time": 0.11848306655883789,
      "step": 26296
    },
    {
      "epoch": 0.000160498046875,
      "step": 26296,
      "training_step_time": 0.6944293975830078
    },
    {
      "epoch": 0.000160504150390625,
      "model_forward_time": 0.11526870727539062,
      "step": 26297
    },
    {
      "epoch": 0.000160504150390625,
      "step": 26297,
      "training_step_time": 0.6136510372161865
    },
    {
      "epoch": 0.00016051025390625,
      "model_forward_time": 0.11799144744873047,
      "step": 26298
    },
    {
      "epoch": 0.00016051025390625,
      "step": 26298,
      "training_step_time": 0.7059555053710938
    },
    {
      "epoch": 0.000160516357421875,
      "model_forward_time": 0.11972951889038086,
      "step": 26299
    },
    {
      "epoch": 0.000160516357421875,
      "step": 26299,
      "training_step_time": 0.6574957370758057
    },
    {
      "epoch": 0.0001605224609375,
      "grad_norm": 0.16784192621707916,
      "learning_rate": 6.413469629657723e-05,
      "loss": 0.0514,
      "step": 26300
    },
    {
      "epoch": 0.0001605224609375,
      "model_forward_time": 0.13616013526916504,
      "step": 26300
    },
    {
      "epoch": 0.0001605224609375,
      "step": 26300,
      "training_step_time": 0.6348056793212891
    },
    {
      "epoch": 0.000160528564453125,
      "model_forward_time": 0.12511110305786133,
      "step": 26301
    },
    {
      "epoch": 0.000160528564453125,
      "step": 26301,
      "training_step_time": 0.7179040908813477
    },
    {
      "epoch": 0.00016053466796875,
      "model_forward_time": 0.12726163864135742,
      "step": 26302
    },
    {
      "epoch": 0.00016053466796875,
      "step": 26302,
      "training_step_time": 0.7409260272979736
    },
    {
      "epoch": 0.000160540771484375,
      "model_forward_time": 0.11912274360656738,
      "step": 26303
    },
    {
      "epoch": 0.000160540771484375,
      "step": 26303,
      "training_step_time": 0.7074606418609619
    },
    {
      "epoch": 0.000160546875,
      "model_forward_time": 0.11943817138671875,
      "step": 26304
    },
    {
      "epoch": 0.000160546875,
      "step": 26304,
      "training_step_time": 0.5458605289459229
    },
    {
      "epoch": 0.000160552978515625,
      "model_forward_time": 0.11913228034973145,
      "step": 26305
    },
    {
      "epoch": 0.000160552978515625,
      "step": 26305,
      "training_step_time": 0.5402047634124756
    },
    {
      "epoch": 0.00016055908203125,
      "model_forward_time": 0.12042379379272461,
      "step": 26306
    },
    {
      "epoch": 0.00016055908203125,
      "step": 26306,
      "training_step_time": 0.5008058547973633
    },
    {
      "epoch": 0.000160565185546875,
      "model_forward_time": 0.12163996696472168,
      "step": 26307
    },
    {
      "epoch": 0.000160565185546875,
      "step": 26307,
      "training_step_time": 0.4881253242492676
    },
    {
      "epoch": 0.0001605712890625,
      "model_forward_time": 0.12021422386169434,
      "step": 26308
    },
    {
      "epoch": 0.0001605712890625,
      "step": 26308,
      "training_step_time": 0.500774621963501
    },
    {
      "epoch": 0.000160577392578125,
      "model_forward_time": 0.11807537078857422,
      "step": 26309
    },
    {
      "epoch": 0.000160577392578125,
      "step": 26309,
      "training_step_time": 0.4291238784790039
    },
    {
      "epoch": 0.00016058349609375,
      "grad_norm": 0.13760606944561005,
      "learning_rate": 6.410826040004607e-05,
      "loss": 0.0511,
      "step": 26310
    },
    {
      "epoch": 0.00016058349609375,
      "model_forward_time": 0.11812973022460938,
      "step": 26310
    },
    {
      "epoch": 0.00016058349609375,
      "step": 26310,
      "training_step_time": 0.45969343185424805
    },
    {
      "epoch": 0.000160589599609375,
      "model_forward_time": 0.11783337593078613,
      "step": 26311
    },
    {
      "epoch": 0.000160589599609375,
      "step": 26311,
      "training_step_time": 0.5003604888916016
    },
    {
      "epoch": 0.000160595703125,
      "model_forward_time": 0.11677837371826172,
      "step": 26312
    },
    {
      "epoch": 0.000160595703125,
      "step": 26312,
      "training_step_time": 0.40778517723083496
    },
    {
      "epoch": 0.000160601806640625,
      "model_forward_time": 0.11649084091186523,
      "step": 26313
    },
    {
      "epoch": 0.000160601806640625,
      "step": 26313,
      "training_step_time": 0.46815967559814453
    },
    {
      "epoch": 0.00016060791015625,
      "model_forward_time": 0.11612844467163086,
      "step": 26314
    },
    {
      "epoch": 0.00016060791015625,
      "step": 26314,
      "training_step_time": 0.4424011707305908
    },
    {
      "epoch": 0.000160614013671875,
      "model_forward_time": 0.11530542373657227,
      "step": 26315
    },
    {
      "epoch": 0.000160614013671875,
      "step": 26315,
      "training_step_time": 0.42064952850341797
    },
    {
      "epoch": 0.0001606201171875,
      "model_forward_time": 0.11539316177368164,
      "step": 26316
    },
    {
      "epoch": 0.0001606201171875,
      "step": 26316,
      "training_step_time": 0.4650700092315674
    },
    {
      "epoch": 0.000160626220703125,
      "model_forward_time": 0.11543655395507812,
      "step": 26317
    },
    {
      "epoch": 0.000160626220703125,
      "step": 26317,
      "training_step_time": 0.37790393829345703
    },
    {
      "epoch": 0.00016063232421875,
      "model_forward_time": 0.11499619483947754,
      "step": 26318
    },
    {
      "epoch": 0.00016063232421875,
      "step": 26318,
      "training_step_time": 0.3762814998626709
    },
    {
      "epoch": 0.000160638427734375,
      "model_forward_time": 0.11496448516845703,
      "step": 26319
    },
    {
      "epoch": 0.000160638427734375,
      "step": 26319,
      "training_step_time": 0.38121938705444336
    },
    {
      "epoch": 0.00016064453125,
      "grad_norm": 0.15651468932628632,
      "learning_rate": 6.408182021779791e-05,
      "loss": 0.0475,
      "step": 26320
    },
    {
      "epoch": 0.00016064453125,
      "model_forward_time": 0.11545920372009277,
      "step": 26320
    },
    {
      "epoch": 0.00016064453125,
      "step": 26320,
      "training_step_time": 0.39012980461120605
    },
    {
      "epoch": 0.000160650634765625,
      "model_forward_time": 0.11518621444702148,
      "step": 26321
    },
    {
      "epoch": 0.000160650634765625,
      "step": 26321,
      "training_step_time": 0.40390706062316895
    },
    {
      "epoch": 0.00016065673828125,
      "model_forward_time": 0.11523985862731934,
      "step": 26322
    },
    {
      "epoch": 0.00016065673828125,
      "step": 26322,
      "training_step_time": 0.39563870429992676
    },
    {
      "epoch": 0.000160662841796875,
      "model_forward_time": 0.11773347854614258,
      "step": 26323
    },
    {
      "epoch": 0.000160662841796875,
      "step": 26323,
      "training_step_time": 0.4134860038757324
    },
    {
      "epoch": 0.0001606689453125,
      "model_forward_time": 0.11896204948425293,
      "step": 26324
    },
    {
      "epoch": 0.0001606689453125,
      "step": 26324,
      "training_step_time": 0.4331507682800293
    },
    {
      "epoch": 0.000160675048828125,
      "model_forward_time": 0.11817336082458496,
      "step": 26325
    },
    {
      "epoch": 0.000160675048828125,
      "step": 26325,
      "training_step_time": 0.4332253932952881
    },
    {
      "epoch": 0.00016068115234375,
      "model_forward_time": 0.11785125732421875,
      "step": 26326
    },
    {
      "epoch": 0.00016068115234375,
      "step": 26326,
      "training_step_time": 0.3818788528442383
    },
    {
      "epoch": 0.000160687255859375,
      "model_forward_time": 0.1179044246673584,
      "step": 26327
    },
    {
      "epoch": 0.000160687255859375,
      "step": 26327,
      "training_step_time": 0.43635106086730957
    },
    {
      "epoch": 0.000160693359375,
      "model_forward_time": 0.11539769172668457,
      "step": 26328
    },
    {
      "epoch": 0.000160693359375,
      "step": 26328,
      "training_step_time": 0.3905348777770996
    },
    {
      "epoch": 0.000160699462890625,
      "model_forward_time": 0.11726617813110352,
      "step": 26329
    },
    {
      "epoch": 0.000160699462890625,
      "step": 26329,
      "training_step_time": 0.38791632652282715
    },
    {
      "epoch": 0.00016070556640625,
      "grad_norm": 0.13097922503948212,
      "learning_rate": 6.405537575786456e-05,
      "loss": 0.0588,
      "step": 26330
    },
    {
      "epoch": 0.00016070556640625,
      "model_forward_time": 0.11530852317810059,
      "step": 26330
    },
    {
      "epoch": 0.00016070556640625,
      "step": 26330,
      "training_step_time": 0.38578104972839355
    },
    {
      "epoch": 0.000160711669921875,
      "model_forward_time": 0.11537551879882812,
      "step": 26331
    },
    {
      "epoch": 0.000160711669921875,
      "step": 26331,
      "training_step_time": 0.41605257987976074
    },
    {
      "epoch": 0.0001607177734375,
      "model_forward_time": 0.11553168296813965,
      "step": 26332
    },
    {
      "epoch": 0.0001607177734375,
      "step": 26332,
      "training_step_time": 0.40836358070373535
    },
    {
      "epoch": 0.000160723876953125,
      "model_forward_time": 0.11545515060424805,
      "step": 26333
    },
    {
      "epoch": 0.000160723876953125,
      "step": 26333,
      "training_step_time": 0.3985433578491211
    },
    {
      "epoch": 0.00016072998046875,
      "model_forward_time": 0.11539268493652344,
      "step": 26334
    },
    {
      "epoch": 0.00016072998046875,
      "step": 26334,
      "training_step_time": 0.3969705104827881
    },
    {
      "epoch": 0.000160736083984375,
      "model_forward_time": 0.11507320404052734,
      "step": 26335
    },
    {
      "epoch": 0.000160736083984375,
      "step": 26335,
      "training_step_time": 0.37822484970092773
    },
    {
      "epoch": 0.0001607421875,
      "model_forward_time": 0.11653828620910645,
      "step": 26336
    },
    {
      "epoch": 0.0001607421875,
      "step": 26336,
      "training_step_time": 0.38019227981567383
    },
    {
      "epoch": 0.000160748291015625,
      "model_forward_time": 0.11618709564208984,
      "step": 26337
    },
    {
      "epoch": 0.000160748291015625,
      "step": 26337,
      "training_step_time": 0.4518318176269531
    },
    {
      "epoch": 0.00016075439453125,
      "model_forward_time": 0.11827683448791504,
      "step": 26338
    },
    {
      "epoch": 0.00016075439453125,
      "step": 26338,
      "training_step_time": 0.4637603759765625
    },
    {
      "epoch": 0.000160760498046875,
      "model_forward_time": 0.11842465400695801,
      "step": 26339
    },
    {
      "epoch": 0.000160760498046875,
      "step": 26339,
      "training_step_time": 1.0503156185150146
    },
    {
      "epoch": 0.0001607666015625,
      "grad_norm": 0.1761583536863327,
      "learning_rate": 6.402892702827916e-05,
      "loss": 0.0481,
      "step": 26340
    },
    {
      "epoch": 0.0001607666015625,
      "model_forward_time": 0.11745238304138184,
      "step": 26340
    },
    {
      "epoch": 0.0001607666015625,
      "step": 26340,
      "training_step_time": 0.4542217254638672
    },
    {
      "epoch": 0.000160772705078125,
      "model_forward_time": 0.1147606372833252,
      "step": 26341
    },
    {
      "epoch": 0.000160772705078125,
      "step": 26341,
      "training_step_time": 0.38512516021728516
    },
    {
      "epoch": 0.00016077880859375,
      "model_forward_time": 0.11416268348693848,
      "step": 26342
    },
    {
      "epoch": 0.00016077880859375,
      "step": 26342,
      "training_step_time": 0.46811938285827637
    },
    {
      "epoch": 0.000160784912109375,
      "model_forward_time": 0.114471435546875,
      "step": 26343
    },
    {
      "epoch": 0.000160784912109375,
      "step": 26343,
      "training_step_time": 0.3982408046722412
    },
    {
      "epoch": 0.000160791015625,
      "model_forward_time": 0.11458849906921387,
      "step": 26344
    },
    {
      "epoch": 0.000160791015625,
      "step": 26344,
      "training_step_time": 0.46731042861938477
    },
    {
      "epoch": 0.000160797119140625,
      "model_forward_time": 0.11482119560241699,
      "step": 26345
    },
    {
      "epoch": 0.000160797119140625,
      "step": 26345,
      "training_step_time": 0.6595683097839355
    },
    {
      "epoch": 0.00016080322265625,
      "model_forward_time": 0.11442279815673828,
      "step": 26346
    },
    {
      "epoch": 0.00016080322265625,
      "step": 26346,
      "training_step_time": 0.3798062801361084
    },
    {
      "epoch": 0.000160809326171875,
      "model_forward_time": 0.11406970024108887,
      "step": 26347
    },
    {
      "epoch": 0.000160809326171875,
      "step": 26347,
      "training_step_time": 0.38257360458374023
    },
    {
      "epoch": 0.0001608154296875,
      "model_forward_time": 0.11492609977722168,
      "step": 26348
    },
    {
      "epoch": 0.0001608154296875,
      "step": 26348,
      "training_step_time": 0.389540433883667
    },
    {
      "epoch": 0.000160821533203125,
      "model_forward_time": 0.11467719078063965,
      "step": 26349
    },
    {
      "epoch": 0.000160821533203125,
      "step": 26349,
      "training_step_time": 0.38665127754211426
    },
    {
      "epoch": 0.00016082763671875,
      "grad_norm": 0.22752192616462708,
      "learning_rate": 6.400247403707617e-05,
      "loss": 0.0548,
      "step": 26350
    },
    {
      "epoch": 0.00016082763671875,
      "model_forward_time": 0.11475062370300293,
      "step": 26350
    },
    {
      "epoch": 0.00016082763671875,
      "step": 26350,
      "training_step_time": 0.3675522804260254
    },
    {
      "epoch": 0.000160833740234375,
      "model_forward_time": 0.11565303802490234,
      "step": 26351
    },
    {
      "epoch": 0.000160833740234375,
      "step": 26351,
      "training_step_time": 0.4658222198486328
    },
    {
      "epoch": 0.00016083984375,
      "model_forward_time": 0.11823534965515137,
      "step": 26352
    },
    {
      "epoch": 0.00016083984375,
      "step": 26352,
      "training_step_time": 0.4688234329223633
    },
    {
      "epoch": 0.000160845947265625,
      "model_forward_time": 0.11462593078613281,
      "step": 26353
    },
    {
      "epoch": 0.000160845947265625,
      "step": 26353,
      "training_step_time": 0.3937828540802002
    },
    {
      "epoch": 0.00016085205078125,
      "model_forward_time": 0.11582255363464355,
      "step": 26354
    },
    {
      "epoch": 0.00016085205078125,
      "step": 26354,
      "training_step_time": 0.4149482250213623
    },
    {
      "epoch": 0.000160858154296875,
      "model_forward_time": 0.11528754234313965,
      "step": 26355
    },
    {
      "epoch": 0.000160858154296875,
      "step": 26355,
      "training_step_time": 0.39019775390625
    },
    {
      "epoch": 0.0001608642578125,
      "model_forward_time": 0.11509871482849121,
      "step": 26356
    },
    {
      "epoch": 0.0001608642578125,
      "step": 26356,
      "training_step_time": 0.40348124504089355
    },
    {
      "epoch": 0.000160870361328125,
      "model_forward_time": 0.11513757705688477,
      "step": 26357
    },
    {
      "epoch": 0.000160870361328125,
      "step": 26357,
      "training_step_time": 0.5522463321685791
    },
    {
      "epoch": 0.00016087646484375,
      "model_forward_time": 0.11555910110473633,
      "step": 26358
    },
    {
      "epoch": 0.00016087646484375,
      "step": 26358,
      "training_step_time": 0.4349045753479004
    },
    {
      "epoch": 0.000160882568359375,
      "model_forward_time": 0.11517119407653809,
      "step": 26359
    },
    {
      "epoch": 0.000160882568359375,
      "step": 26359,
      "training_step_time": 0.39614152908325195
    },
    {
      "epoch": 0.000160888671875,
      "grad_norm": 0.12883445620536804,
      "learning_rate": 6.397601679229126e-05,
      "loss": 0.0468,
      "step": 26360
    },
    {
      "epoch": 0.000160888671875,
      "model_forward_time": 0.11508607864379883,
      "step": 26360
    },
    {
      "epoch": 0.000160888671875,
      "step": 26360,
      "training_step_time": 0.4125242233276367
    },
    {
      "epoch": 0.000160894775390625,
      "model_forward_time": 0.11454987525939941,
      "step": 26361
    },
    {
      "epoch": 0.000160894775390625,
      "step": 26361,
      "training_step_time": 0.4093594551086426
    },
    {
      "epoch": 0.00016090087890625,
      "model_forward_time": 0.11507105827331543,
      "step": 26362
    },
    {
      "epoch": 0.00016090087890625,
      "step": 26362,
      "training_step_time": 0.38982295989990234
    },
    {
      "epoch": 0.000160906982421875,
      "model_forward_time": 0.11525940895080566,
      "step": 26363
    },
    {
      "epoch": 0.000160906982421875,
      "step": 26363,
      "training_step_time": 0.6398880481719971
    },
    {
      "epoch": 0.0001609130859375,
      "model_forward_time": 0.11535358428955078,
      "step": 26364
    },
    {
      "epoch": 0.0001609130859375,
      "step": 26364,
      "training_step_time": 0.3666670322418213
    },
    {
      "epoch": 0.000160919189453125,
      "model_forward_time": 0.1160581111907959,
      "step": 26365
    },
    {
      "epoch": 0.000160919189453125,
      "step": 26365,
      "training_step_time": 0.41401052474975586
    },
    {
      "epoch": 0.00016092529296875,
      "model_forward_time": 0.11586356163024902,
      "step": 26366
    },
    {
      "epoch": 0.00016092529296875,
      "step": 26366,
      "training_step_time": 0.4929022789001465
    },
    {
      "epoch": 0.000160931396484375,
      "model_forward_time": 0.1158607006072998,
      "step": 26367
    },
    {
      "epoch": 0.000160931396484375,
      "step": 26367,
      "training_step_time": 0.38948559761047363
    },
    {
      "epoch": 0.0001609375,
      "model_forward_time": 0.11436176300048828,
      "step": 26368
    },
    {
      "epoch": 0.0001609375,
      "step": 26368,
      "training_step_time": 0.4038691520690918
    },
    {
      "epoch": 0.000160943603515625,
      "model_forward_time": 0.11527109146118164,
      "step": 26369
    },
    {
      "epoch": 0.000160943603515625,
      "step": 26369,
      "training_step_time": 0.7516980171203613
    },
    {
      "epoch": 0.00016094970703125,
      "grad_norm": 0.1644098311662674,
      "learning_rate": 6.394955530196147e-05,
      "loss": 0.0535,
      "step": 26370
    },
    {
      "epoch": 0.00016094970703125,
      "model_forward_time": 0.1144261360168457,
      "step": 26370
    },
    {
      "epoch": 0.00016094970703125,
      "step": 26370,
      "training_step_time": 0.46436309814453125
    },
    {
      "epoch": 0.000160955810546875,
      "model_forward_time": 0.11415243148803711,
      "step": 26371
    },
    {
      "epoch": 0.000160955810546875,
      "step": 26371,
      "training_step_time": 0.46057844161987305
    },
    {
      "epoch": 0.0001609619140625,
      "model_forward_time": 0.1148078441619873,
      "step": 26372
    },
    {
      "epoch": 0.0001609619140625,
      "step": 26372,
      "training_step_time": 0.4021327495574951
    },
    {
      "epoch": 0.000160968017578125,
      "model_forward_time": 0.11417150497436523,
      "step": 26373
    },
    {
      "epoch": 0.000160968017578125,
      "step": 26373,
      "training_step_time": 0.3964371681213379
    },
    {
      "epoch": 0.00016097412109375,
      "model_forward_time": 0.11433887481689453,
      "step": 26374
    },
    {
      "epoch": 0.00016097412109375,
      "step": 26374,
      "training_step_time": 0.3963124752044678
    },
    {
      "epoch": 0.000160980224609375,
      "model_forward_time": 0.11510396003723145,
      "step": 26375
    },
    {
      "epoch": 0.000160980224609375,
      "step": 26375,
      "training_step_time": 0.6050972938537598
    },
    {
      "epoch": 0.000160986328125,
      "model_forward_time": 0.11556410789489746,
      "step": 26376
    },
    {
      "epoch": 0.000160986328125,
      "step": 26376,
      "training_step_time": 0.39571309089660645
    },
    {
      "epoch": 0.000160992431640625,
      "model_forward_time": 0.11431431770324707,
      "step": 26377
    },
    {
      "epoch": 0.000160992431640625,
      "step": 26377,
      "training_step_time": 0.3887810707092285
    },
    {
      "epoch": 0.00016099853515625,
      "model_forward_time": 0.11559486389160156,
      "step": 26378
    },
    {
      "epoch": 0.00016099853515625,
      "step": 26378,
      "training_step_time": 0.4109368324279785
    },
    {
      "epoch": 0.000161004638671875,
      "model_forward_time": 0.11529421806335449,
      "step": 26379
    },
    {
      "epoch": 0.000161004638671875,
      "step": 26379,
      "training_step_time": 0.40717029571533203
    },
    {
      "epoch": 0.0001610107421875,
      "grad_norm": 0.16638466715812683,
      "learning_rate": 6.39230895741251e-05,
      "loss": 0.0528,
      "step": 26380
    },
    {
      "epoch": 0.0001610107421875,
      "model_forward_time": 0.11868715286254883,
      "step": 26380
    },
    {
      "epoch": 0.0001610107421875,
      "step": 26380,
      "training_step_time": 0.4484291076660156
    },
    {
      "epoch": 0.000161016845703125,
      "model_forward_time": 0.1156778335571289,
      "step": 26381
    },
    {
      "epoch": 0.000161016845703125,
      "step": 26381,
      "training_step_time": 0.6333234310150146
    },
    {
      "epoch": 0.00016102294921875,
      "model_forward_time": 0.1149301528930664,
      "step": 26382
    },
    {
      "epoch": 0.00016102294921875,
      "step": 26382,
      "training_step_time": 0.39560556411743164
    },
    {
      "epoch": 0.000161029052734375,
      "model_forward_time": 0.11454367637634277,
      "step": 26383
    },
    {
      "epoch": 0.000161029052734375,
      "step": 26383,
      "training_step_time": 0.4420797824859619
    },
    {
      "epoch": 0.00016103515625,
      "model_forward_time": 0.11468195915222168,
      "step": 26384
    },
    {
      "epoch": 0.00016103515625,
      "step": 26384,
      "training_step_time": 0.38284850120544434
    },
    {
      "epoch": 0.000161041259765625,
      "model_forward_time": 0.11452579498291016,
      "step": 26385
    },
    {
      "epoch": 0.000161041259765625,
      "step": 26385,
      "training_step_time": 0.4339163303375244
    },
    {
      "epoch": 0.00016104736328125,
      "model_forward_time": 0.11473822593688965,
      "step": 26386
    },
    {
      "epoch": 0.00016104736328125,
      "step": 26386,
      "training_step_time": 0.4226255416870117
    },
    {
      "epoch": 0.000161053466796875,
      "model_forward_time": 0.11496496200561523,
      "step": 26387
    },
    {
      "epoch": 0.000161053466796875,
      "step": 26387,
      "training_step_time": 0.5835227966308594
    },
    {
      "epoch": 0.0001610595703125,
      "model_forward_time": 0.11451458930969238,
      "step": 26388
    },
    {
      "epoch": 0.0001610595703125,
      "step": 26388,
      "training_step_time": 0.4035027027130127
    },
    {
      "epoch": 0.000161065673828125,
      "model_forward_time": 0.11471295356750488,
      "step": 26389
    },
    {
      "epoch": 0.000161065673828125,
      "step": 26389,
      "training_step_time": 0.3990795612335205
    },
    {
      "epoch": 0.00016107177734375,
      "grad_norm": 0.15708357095718384,
      "learning_rate": 6.389661961682173e-05,
      "loss": 0.0415,
      "step": 26390
    },
    {
      "epoch": 0.00016107177734375,
      "model_forward_time": 0.11438584327697754,
      "step": 26390
    },
    {
      "epoch": 0.00016107177734375,
      "step": 26390,
      "training_step_time": 0.39673948287963867
    },
    {
      "epoch": 0.000161077880859375,
      "model_forward_time": 0.11492681503295898,
      "step": 26391
    },
    {
      "epoch": 0.000161077880859375,
      "step": 26391,
      "training_step_time": 0.38538622856140137
    },
    {
      "epoch": 0.000161083984375,
      "model_forward_time": 0.11478710174560547,
      "step": 26392
    },
    {
      "epoch": 0.000161083984375,
      "step": 26392,
      "training_step_time": 0.3810396194458008
    },
    {
      "epoch": 0.000161090087890625,
      "model_forward_time": 0.11609721183776855,
      "step": 26393
    },
    {
      "epoch": 0.000161090087890625,
      "step": 26393,
      "training_step_time": 0.609654426574707
    },
    {
      "epoch": 0.00016109619140625,
      "model_forward_time": 0.11517047882080078,
      "step": 26394
    },
    {
      "epoch": 0.00016109619140625,
      "step": 26394,
      "training_step_time": 0.4084179401397705
    },
    {
      "epoch": 0.000161102294921875,
      "model_forward_time": 0.11519956588745117,
      "step": 26395
    },
    {
      "epoch": 0.000161102294921875,
      "step": 26395,
      "training_step_time": 0.448819637298584
    },
    {
      "epoch": 0.0001611083984375,
      "model_forward_time": 0.11484241485595703,
      "step": 26396
    },
    {
      "epoch": 0.0001611083984375,
      "step": 26396,
      "training_step_time": 0.3902604579925537
    },
    {
      "epoch": 0.000161114501953125,
      "model_forward_time": 0.11461663246154785,
      "step": 26397
    },
    {
      "epoch": 0.000161114501953125,
      "step": 26397,
      "training_step_time": 0.4281332492828369
    },
    {
      "epoch": 0.00016112060546875,
      "model_forward_time": 0.11457037925720215,
      "step": 26398
    },
    {
      "epoch": 0.00016112060546875,
      "step": 26398,
      "training_step_time": 0.3999183177947998
    },
    {
      "epoch": 0.000161126708984375,
      "model_forward_time": 0.11538839340209961,
      "step": 26399
    },
    {
      "epoch": 0.000161126708984375,
      "step": 26399,
      "training_step_time": 0.7108137607574463
    },
    {
      "epoch": 0.0001611328125,
      "grad_norm": 0.1333642154932022,
      "learning_rate": 6.387014543809223e-05,
      "loss": 0.0478,
      "step": 26400
    },
    {
      "epoch": 0.0001611328125,
      "model_forward_time": 0.1147603988647461,
      "step": 26400
    },
    {
      "epoch": 0.0001611328125,
      "step": 26400,
      "training_step_time": 0.43420863151550293
    },
    {
      "epoch": 0.000161138916015625,
      "model_forward_time": 0.11435461044311523,
      "step": 26401
    },
    {
      "epoch": 0.000161138916015625,
      "step": 26401,
      "training_step_time": 0.3898634910583496
    },
    {
      "epoch": 0.00016114501953125,
      "model_forward_time": 0.11507010459899902,
      "step": 26402
    },
    {
      "epoch": 0.00016114501953125,
      "step": 26402,
      "training_step_time": 0.3953683376312256
    },
    {
      "epoch": 0.000161151123046875,
      "model_forward_time": 0.11485767364501953,
      "step": 26403
    },
    {
      "epoch": 0.000161151123046875,
      "step": 26403,
      "training_step_time": 0.38622307777404785
    },
    {
      "epoch": 0.0001611572265625,
      "model_forward_time": 0.11479711532592773,
      "step": 26404
    },
    {
      "epoch": 0.0001611572265625,
      "step": 26404,
      "training_step_time": 0.40309977531433105
    },
    {
      "epoch": 0.000161163330078125,
      "model_forward_time": 0.11533045768737793,
      "step": 26405
    },
    {
      "epoch": 0.000161163330078125,
      "step": 26405,
      "training_step_time": 0.7131569385528564
    },
    {
      "epoch": 0.00016116943359375,
      "model_forward_time": 0.11458182334899902,
      "step": 26406
    },
    {
      "epoch": 0.00016116943359375,
      "step": 26406,
      "training_step_time": 0.44218993186950684
    },
    {
      "epoch": 0.000161175537109375,
      "model_forward_time": 0.11503791809082031,
      "step": 26407
    },
    {
      "epoch": 0.000161175537109375,
      "step": 26407,
      "training_step_time": 0.4653944969177246
    },
    {
      "epoch": 0.000161181640625,
      "model_forward_time": 0.11410069465637207,
      "step": 26408
    },
    {
      "epoch": 0.000161181640625,
      "step": 26408,
      "training_step_time": 0.41076207160949707
    },
    {
      "epoch": 0.000161187744140625,
      "model_forward_time": 0.11500263214111328,
      "step": 26409
    },
    {
      "epoch": 0.000161187744140625,
      "step": 26409,
      "training_step_time": 0.3855276107788086
    },
    {
      "epoch": 0.00016119384765625,
      "grad_norm": 0.1467398852109909,
      "learning_rate": 6.384366704597879e-05,
      "loss": 0.0493,
      "step": 26410
    },
    {
      "epoch": 0.00016119384765625,
      "model_forward_time": 0.11432480812072754,
      "step": 26410
    },
    {
      "epoch": 0.00016119384765625,
      "step": 26410,
      "training_step_time": 0.40498948097229004
    },
    {
      "epoch": 0.000161199951171875,
      "model_forward_time": 0.11461234092712402,
      "step": 26411
    },
    {
      "epoch": 0.000161199951171875,
      "step": 26411,
      "training_step_time": 0.5599441528320312
    },
    {
      "epoch": 0.0001612060546875,
      "model_forward_time": 0.11568665504455566,
      "step": 26412
    },
    {
      "epoch": 0.0001612060546875,
      "step": 26412,
      "training_step_time": 0.4417905807495117
    },
    {
      "epoch": 0.000161212158203125,
      "model_forward_time": 0.11480879783630371,
      "step": 26413
    },
    {
      "epoch": 0.000161212158203125,
      "step": 26413,
      "training_step_time": 0.40376949310302734
    },
    {
      "epoch": 0.00016121826171875,
      "model_forward_time": 0.11500430107116699,
      "step": 26414
    },
    {
      "epoch": 0.00016121826171875,
      "step": 26414,
      "training_step_time": 0.4033331871032715
    },
    {
      "epoch": 0.000161224365234375,
      "model_forward_time": 0.11435604095458984,
      "step": 26415
    },
    {
      "epoch": 0.000161224365234375,
      "step": 26415,
      "training_step_time": 0.38675427436828613
    },
    {
      "epoch": 0.00016123046875,
      "model_forward_time": 0.11509203910827637,
      "step": 26416
    },
    {
      "epoch": 0.00016123046875,
      "step": 26416,
      "training_step_time": 0.38352036476135254
    },
    {
      "epoch": 0.000161236572265625,
      "model_forward_time": 0.11517977714538574,
      "step": 26417
    },
    {
      "epoch": 0.000161236572265625,
      "step": 26417,
      "training_step_time": 0.47843003273010254
    },
    {
      "epoch": 0.00016124267578125,
      "model_forward_time": 0.11589884757995605,
      "step": 26418
    },
    {
      "epoch": 0.00016124267578125,
      "step": 26418,
      "training_step_time": 0.4058094024658203
    },
    {
      "epoch": 0.000161248779296875,
      "model_forward_time": 0.11611056327819824,
      "step": 26419
    },
    {
      "epoch": 0.000161248779296875,
      "step": 26419,
      "training_step_time": 0.3971846103668213
    },
    {
      "epoch": 0.0001612548828125,
      "grad_norm": 0.14692741632461548,
      "learning_rate": 6.38171844485248e-05,
      "loss": 0.046,
      "step": 26420
    },
    {
      "epoch": 0.0001612548828125,
      "model_forward_time": 0.11603069305419922,
      "step": 26420
    },
    {
      "epoch": 0.0001612548828125,
      "step": 26420,
      "training_step_time": 0.47009825706481934
    },
    {
      "epoch": 0.000161260986328125,
      "model_forward_time": 0.11616802215576172,
      "step": 26421
    },
    {
      "epoch": 0.000161260986328125,
      "step": 26421,
      "training_step_time": 0.40743589401245117
    },
    {
      "epoch": 0.00016126708984375,
      "model_forward_time": 0.11572480201721191,
      "step": 26422
    },
    {
      "epoch": 0.00016126708984375,
      "step": 26422,
      "training_step_time": 0.4183177947998047
    },
    {
      "epoch": 0.000161273193359375,
      "model_forward_time": 0.11650538444519043,
      "step": 26423
    },
    {
      "epoch": 0.000161273193359375,
      "step": 26423,
      "training_step_time": 0.3979794979095459
    },
    {
      "epoch": 0.000161279296875,
      "model_forward_time": 0.11573958396911621,
      "step": 26424
    },
    {
      "epoch": 0.000161279296875,
      "step": 26424,
      "training_step_time": 0.46224474906921387
    },
    {
      "epoch": 0.000161285400390625,
      "model_forward_time": 0.11450409889221191,
      "step": 26425
    },
    {
      "epoch": 0.000161285400390625,
      "step": 26425,
      "training_step_time": 0.4374709129333496
    },
    {
      "epoch": 0.00016129150390625,
      "model_forward_time": 0.11487293243408203,
      "step": 26426
    },
    {
      "epoch": 0.00016129150390625,
      "step": 26426,
      "training_step_time": 0.48682212829589844
    },
    {
      "epoch": 0.000161297607421875,
      "model_forward_time": 0.1152651309967041,
      "step": 26427
    },
    {
      "epoch": 0.000161297607421875,
      "step": 26427,
      "training_step_time": 0.39596128463745117
    },
    {
      "epoch": 0.0001613037109375,
      "model_forward_time": 0.11485791206359863,
      "step": 26428
    },
    {
      "epoch": 0.0001613037109375,
      "step": 26428,
      "training_step_time": 0.3885664939880371
    },
    {
      "epoch": 0.000161309814453125,
      "model_forward_time": 0.11529302597045898,
      "step": 26429
    },
    {
      "epoch": 0.000161309814453125,
      "step": 26429,
      "training_step_time": 0.40396666526794434
    },
    {
      "epoch": 0.00016131591796875,
      "grad_norm": 0.1554567515850067,
      "learning_rate": 6.3790697653775e-05,
      "loss": 0.0476,
      "step": 26430
    },
    {
      "epoch": 0.00016131591796875,
      "model_forward_time": 0.11525774002075195,
      "step": 26430
    },
    {
      "epoch": 0.00016131591796875,
      "step": 26430,
      "training_step_time": 0.3995940685272217
    },
    {
      "epoch": 0.000161322021484375,
      "model_forward_time": 0.11483573913574219,
      "step": 26431
    },
    {
      "epoch": 0.000161322021484375,
      "step": 26431,
      "training_step_time": 0.401716947555542
    },
    {
      "epoch": 0.000161328125,
      "model_forward_time": 0.11487269401550293,
      "step": 26432
    },
    {
      "epoch": 0.000161328125,
      "step": 26432,
      "training_step_time": 0.40131497383117676
    },
    {
      "epoch": 0.000161334228515625,
      "model_forward_time": 0.11520624160766602,
      "step": 26433
    },
    {
      "epoch": 0.000161334228515625,
      "step": 26433,
      "training_step_time": 0.39316272735595703
    },
    {
      "epoch": 0.00016134033203125,
      "model_forward_time": 0.11545252799987793,
      "step": 26434
    },
    {
      "epoch": 0.00016134033203125,
      "step": 26434,
      "training_step_time": 0.39594459533691406
    },
    {
      "epoch": 0.000161346435546875,
      "model_forward_time": 0.11536812782287598,
      "step": 26435
    },
    {
      "epoch": 0.000161346435546875,
      "step": 26435,
      "training_step_time": 0.48854875564575195
    },
    {
      "epoch": 0.0001613525390625,
      "model_forward_time": 0.11447501182556152,
      "step": 26436
    },
    {
      "epoch": 0.0001613525390625,
      "step": 26436,
      "training_step_time": 0.5063891410827637
    },
    {
      "epoch": 0.000161358642578125,
      "model_forward_time": 0.11806273460388184,
      "step": 26437
    },
    {
      "epoch": 0.000161358642578125,
      "step": 26437,
      "training_step_time": 0.3993191719055176
    },
    {
      "epoch": 0.00016136474609375,
      "model_forward_time": 0.11509513854980469,
      "step": 26438
    },
    {
      "epoch": 0.00016136474609375,
      "step": 26438,
      "training_step_time": 0.37963104248046875
    },
    {
      "epoch": 0.000161370849609375,
      "model_forward_time": 0.11528277397155762,
      "step": 26439
    },
    {
      "epoch": 0.000161370849609375,
      "step": 26439,
      "training_step_time": 0.4403665065765381
    },
    {
      "epoch": 0.000161376953125,
      "grad_norm": 0.12597958743572235,
      "learning_rate": 6.376420666977538e-05,
      "loss": 0.0477,
      "step": 26440
    },
    {
      "epoch": 0.000161376953125,
      "model_forward_time": 0.11571788787841797,
      "step": 26440
    },
    {
      "epoch": 0.000161376953125,
      "step": 26440,
      "training_step_time": 0.46019411087036133
    },
    {
      "epoch": 0.000161383056640625,
      "model_forward_time": 0.1149449348449707,
      "step": 26441
    },
    {
      "epoch": 0.000161383056640625,
      "step": 26441,
      "training_step_time": 0.4047729969024658
    },
    {
      "epoch": 0.00016138916015625,
      "model_forward_time": 0.11483383178710938,
      "step": 26442
    },
    {
      "epoch": 0.00016138916015625,
      "step": 26442,
      "training_step_time": 0.40516233444213867
    },
    {
      "epoch": 0.000161395263671875,
      "model_forward_time": 0.11496782302856445,
      "step": 26443
    },
    {
      "epoch": 0.000161395263671875,
      "step": 26443,
      "training_step_time": 0.4030416011810303
    },
    {
      "epoch": 0.0001614013671875,
      "model_forward_time": 0.11453580856323242,
      "step": 26444
    },
    {
      "epoch": 0.0001614013671875,
      "step": 26444,
      "training_step_time": 0.4024994373321533
    },
    {
      "epoch": 0.000161407470703125,
      "model_forward_time": 0.11553192138671875,
      "step": 26445
    },
    {
      "epoch": 0.000161407470703125,
      "step": 26445,
      "training_step_time": 0.3919684886932373
    },
    {
      "epoch": 0.00016141357421875,
      "model_forward_time": 0.11532258987426758,
      "step": 26446
    },
    {
      "epoch": 0.00016141357421875,
      "step": 26446,
      "training_step_time": 0.40544724464416504
    },
    {
      "epoch": 0.000161419677734375,
      "model_forward_time": 0.11584997177124023,
      "step": 26447
    },
    {
      "epoch": 0.000161419677734375,
      "step": 26447,
      "training_step_time": 0.4682042598724365
    },
    {
      "epoch": 0.00016142578125,
      "model_forward_time": 0.11511421203613281,
      "step": 26448
    },
    {
      "epoch": 0.00016142578125,
      "step": 26448,
      "training_step_time": 0.4083535671234131
    },
    {
      "epoch": 0.000161431884765625,
      "model_forward_time": 0.11596083641052246,
      "step": 26449
    },
    {
      "epoch": 0.000161431884765625,
      "step": 26449,
      "training_step_time": 0.40375256538391113
    },
    {
      "epoch": 0.00016143798828125,
      "grad_norm": 0.11004895716905594,
      "learning_rate": 6.37377115045732e-05,
      "loss": 0.0492,
      "step": 26450
    },
    {
      "epoch": 0.00016143798828125,
      "model_forward_time": 0.11542105674743652,
      "step": 26450
    },
    {
      "epoch": 0.00016143798828125,
      "step": 26450,
      "training_step_time": 0.48957061767578125
    },
    {
      "epoch": 0.000161444091796875,
      "model_forward_time": 0.11553454399108887,
      "step": 26451
    },
    {
      "epoch": 0.000161444091796875,
      "step": 26451,
      "training_step_time": 0.46894311904907227
    },
    {
      "epoch": 0.0001614501953125,
      "model_forward_time": 0.11564350128173828,
      "step": 26452
    },
    {
      "epoch": 0.0001614501953125,
      "step": 26452,
      "training_step_time": 0.3981485366821289
    },
    {
      "epoch": 0.000161456298828125,
      "model_forward_time": 0.11481094360351562,
      "step": 26453
    },
    {
      "epoch": 0.000161456298828125,
      "step": 26453,
      "training_step_time": 0.49280714988708496
    },
    {
      "epoch": 0.00016146240234375,
      "model_forward_time": 0.11509990692138672,
      "step": 26454
    },
    {
      "epoch": 0.00016146240234375,
      "step": 26454,
      "training_step_time": 0.46048474311828613
    },
    {
      "epoch": 0.000161468505859375,
      "model_forward_time": 0.11501049995422363,
      "step": 26455
    },
    {
      "epoch": 0.000161468505859375,
      "step": 26455,
      "training_step_time": 0.3938107490539551
    },
    {
      "epoch": 0.000161474609375,
      "model_forward_time": 0.11495852470397949,
      "step": 26456
    },
    {
      "epoch": 0.000161474609375,
      "step": 26456,
      "training_step_time": 0.39742517471313477
    },
    {
      "epoch": 0.000161480712890625,
      "model_forward_time": 0.1151273250579834,
      "step": 26457
    },
    {
      "epoch": 0.000161480712890625,
      "step": 26457,
      "training_step_time": 0.39669322967529297
    },
    {
      "epoch": 0.00016148681640625,
      "model_forward_time": 0.11546111106872559,
      "step": 26458
    },
    {
      "epoch": 0.00016148681640625,
      "step": 26458,
      "training_step_time": 0.38886427879333496
    },
    {
      "epoch": 0.000161492919921875,
      "model_forward_time": 0.11517786979675293,
      "step": 26459
    },
    {
      "epoch": 0.000161492919921875,
      "step": 26459,
      "training_step_time": 0.6484701633453369
    },
    {
      "epoch": 0.0001614990234375,
      "grad_norm": 0.11980175226926804,
      "learning_rate": 6.371121216621698e-05,
      "loss": 0.0487,
      "step": 26460
    },
    {
      "epoch": 0.0001614990234375,
      "model_forward_time": 0.11441159248352051,
      "step": 26460
    },
    {
      "epoch": 0.0001614990234375,
      "step": 26460,
      "training_step_time": 0.3963785171508789
    },
    {
      "epoch": 0.000161505126953125,
      "model_forward_time": 0.11473751068115234,
      "step": 26461
    },
    {
      "epoch": 0.000161505126953125,
      "step": 26461,
      "training_step_time": 0.40283989906311035
    },
    {
      "epoch": 0.00016151123046875,
      "model_forward_time": 0.11445450782775879,
      "step": 26462
    },
    {
      "epoch": 0.00016151123046875,
      "step": 26462,
      "training_step_time": 0.3876340389251709
    },
    {
      "epoch": 0.000161517333984375,
      "model_forward_time": 0.11478018760681152,
      "step": 26463
    },
    {
      "epoch": 0.000161517333984375,
      "step": 26463,
      "training_step_time": 0.45116329193115234
    },
    {
      "epoch": 0.0001615234375,
      "model_forward_time": 0.11473774909973145,
      "step": 26464
    },
    {
      "epoch": 0.0001615234375,
      "step": 26464,
      "training_step_time": 0.5004792213439941
    },
    {
      "epoch": 0.000161529541015625,
      "model_forward_time": 0.11539530754089355,
      "step": 26465
    },
    {
      "epoch": 0.000161529541015625,
      "step": 26465,
      "training_step_time": 0.4508650302886963
    },
    {
      "epoch": 0.00016153564453125,
      "model_forward_time": 0.11484146118164062,
      "step": 26466
    },
    {
      "epoch": 0.00016153564453125,
      "step": 26466,
      "training_step_time": 0.4850285053253174
    },
    {
      "epoch": 0.000161541748046875,
      "model_forward_time": 0.11507320404052734,
      "step": 26467
    },
    {
      "epoch": 0.000161541748046875,
      "step": 26467,
      "training_step_time": 0.49934983253479004
    },
    {
      "epoch": 0.0001615478515625,
      "model_forward_time": 0.11459898948669434,
      "step": 26468
    },
    {
      "epoch": 0.0001615478515625,
      "step": 26468,
      "training_step_time": 0.4622304439544678
    },
    {
      "epoch": 0.000161553955078125,
      "model_forward_time": 0.11436748504638672,
      "step": 26469
    },
    {
      "epoch": 0.000161553955078125,
      "step": 26469,
      "training_step_time": 0.38132691383361816
    },
    {
      "epoch": 0.00016156005859375,
      "grad_norm": 0.11386534571647644,
      "learning_rate": 6.368470866275654e-05,
      "loss": 0.0437,
      "step": 26470
    },
    {
      "epoch": 0.00016156005859375,
      "model_forward_time": 0.11441826820373535,
      "step": 26470
    },
    {
      "epoch": 0.00016156005859375,
      "step": 26470,
      "training_step_time": 0.383164644241333
    },
    {
      "epoch": 0.000161566162109375,
      "model_forward_time": 0.11432886123657227,
      "step": 26471
    },
    {
      "epoch": 0.000161566162109375,
      "step": 26471,
      "training_step_time": 0.39460301399230957
    },
    {
      "epoch": 0.000161572265625,
      "model_forward_time": 0.1147768497467041,
      "step": 26472
    },
    {
      "epoch": 0.000161572265625,
      "step": 26472,
      "training_step_time": 0.39361119270324707
    },
    {
      "epoch": 0.000161578369140625,
      "model_forward_time": 0.11503887176513672,
      "step": 26473
    },
    {
      "epoch": 0.000161578369140625,
      "step": 26473,
      "training_step_time": 0.3970017433166504
    },
    {
      "epoch": 0.00016158447265625,
      "model_forward_time": 0.1150963306427002,
      "step": 26474
    },
    {
      "epoch": 0.00016158447265625,
      "step": 26474,
      "training_step_time": 0.39497900009155273
    },
    {
      "epoch": 0.000161590576171875,
      "model_forward_time": 0.11482644081115723,
      "step": 26475
    },
    {
      "epoch": 0.000161590576171875,
      "step": 26475,
      "training_step_time": 0.39030909538269043
    },
    {
      "epoch": 0.0001615966796875,
      "model_forward_time": 0.11561894416809082,
      "step": 26476
    },
    {
      "epoch": 0.0001615966796875,
      "step": 26476,
      "training_step_time": 0.3892505168914795
    },
    {
      "epoch": 0.000161602783203125,
      "model_forward_time": 0.11479997634887695,
      "step": 26477
    },
    {
      "epoch": 0.000161602783203125,
      "step": 26477,
      "training_step_time": 0.5401325225830078
    },
    {
      "epoch": 0.00016160888671875,
      "model_forward_time": 0.11568737030029297,
      "step": 26478
    },
    {
      "epoch": 0.00016160888671875,
      "step": 26478,
      "training_step_time": 0.44528889656066895
    },
    {
      "epoch": 0.000161614990234375,
      "model_forward_time": 0.11538863182067871,
      "step": 26479
    },
    {
      "epoch": 0.000161614990234375,
      "step": 26479,
      "training_step_time": 0.4852259159088135
    },
    {
      "epoch": 0.00016162109375,
      "grad_norm": 0.11773436516523361,
      "learning_rate": 6.365820100224292e-05,
      "loss": 0.0476,
      "step": 26480
    },
    {
      "epoch": 0.00016162109375,
      "model_forward_time": 0.11455893516540527,
      "step": 26480
    },
    {
      "epoch": 0.00016162109375,
      "step": 26480,
      "training_step_time": 0.5024523735046387
    },
    {
      "epoch": 0.000161627197265625,
      "model_forward_time": 0.11520695686340332,
      "step": 26481
    },
    {
      "epoch": 0.000161627197265625,
      "step": 26481,
      "training_step_time": 0.4281158447265625
    },
    {
      "epoch": 0.00016163330078125,
      "model_forward_time": 0.11502265930175781,
      "step": 26482
    },
    {
      "epoch": 0.00016163330078125,
      "step": 26482,
      "training_step_time": 0.46760082244873047
    },
    {
      "epoch": 0.000161639404296875,
      "model_forward_time": 0.1145164966583252,
      "step": 26483
    },
    {
      "epoch": 0.000161639404296875,
      "step": 26483,
      "training_step_time": 0.39114999771118164
    },
    {
      "epoch": 0.0001616455078125,
      "model_forward_time": 0.11365771293640137,
      "step": 26484
    },
    {
      "epoch": 0.0001616455078125,
      "step": 26484,
      "training_step_time": 0.3885326385498047
    },
    {
      "epoch": 0.000161651611328125,
      "model_forward_time": 0.11579608917236328,
      "step": 26485
    },
    {
      "epoch": 0.000161651611328125,
      "step": 26485,
      "training_step_time": 0.39908647537231445
    },
    {
      "epoch": 0.00016165771484375,
      "model_forward_time": 0.11499357223510742,
      "step": 26486
    },
    {
      "epoch": 0.00016165771484375,
      "step": 26486,
      "training_step_time": 0.3885023593902588
    },
    {
      "epoch": 0.000161663818359375,
      "model_forward_time": 0.11559605598449707,
      "step": 26487
    },
    {
      "epoch": 0.000161663818359375,
      "step": 26487,
      "training_step_time": 0.3916511535644531
    },
    {
      "epoch": 0.000161669921875,
      "model_forward_time": 0.11461329460144043,
      "step": 26488
    },
    {
      "epoch": 0.000161669921875,
      "step": 26488,
      "training_step_time": 0.39167118072509766
    },
    {
      "epoch": 0.000161676025390625,
      "model_forward_time": 0.11485004425048828,
      "step": 26489
    },
    {
      "epoch": 0.000161676025390625,
      "step": 26489,
      "training_step_time": 0.39321160316467285
    },
    {
      "epoch": 0.00016168212890625,
      "grad_norm": 0.12128469347953796,
      "learning_rate": 6.363168919272846e-05,
      "loss": 0.0424,
      "step": 26490
    },
    {
      "epoch": 0.00016168212890625,
      "model_forward_time": 0.11522746086120605,
      "step": 26490
    },
    {
      "epoch": 0.00016168212890625,
      "step": 26490,
      "training_step_time": 0.4093160629272461
    },
    {
      "epoch": 0.000161688232421875,
      "model_forward_time": 0.1150672435760498,
      "step": 26491
    },
    {
      "epoch": 0.000161688232421875,
      "step": 26491,
      "training_step_time": 0.4006946086883545
    },
    {
      "epoch": 0.0001616943359375,
      "model_forward_time": 0.11595010757446289,
      "step": 26492
    },
    {
      "epoch": 0.0001616943359375,
      "step": 26492,
      "training_step_time": 0.4739406108856201
    },
    {
      "epoch": 0.000161700439453125,
      "model_forward_time": 0.11483430862426758,
      "step": 26493
    },
    {
      "epoch": 0.000161700439453125,
      "step": 26493,
      "training_step_time": 0.44640183448791504
    },
    {
      "epoch": 0.00016170654296875,
      "model_forward_time": 0.11533546447753906,
      "step": 26494
    },
    {
      "epoch": 0.00016170654296875,
      "step": 26494,
      "training_step_time": 0.48528146743774414
    },
    {
      "epoch": 0.000161712646484375,
      "model_forward_time": 0.11533308029174805,
      "step": 26495
    },
    {
      "epoch": 0.000161712646484375,
      "step": 26495,
      "training_step_time": 0.38647937774658203
    },
    {
      "epoch": 0.00016171875,
      "model_forward_time": 0.11532163619995117,
      "step": 26496
    },
    {
      "epoch": 0.00016171875,
      "step": 26496,
      "training_step_time": 0.48340415954589844
    },
    {
      "epoch": 0.000161724853515625,
      "model_forward_time": 0.11474442481994629,
      "step": 26497
    },
    {
      "epoch": 0.000161724853515625,
      "step": 26497,
      "training_step_time": 0.4178125858306885
    },
    {
      "epoch": 0.00016173095703125,
      "model_forward_time": 0.11503291130065918,
      "step": 26498
    },
    {
      "epoch": 0.00016173095703125,
      "step": 26498,
      "training_step_time": 0.3931422233581543
    },
    {
      "epoch": 0.000161737060546875,
      "model_forward_time": 0.11516451835632324,
      "step": 26499
    },
    {
      "epoch": 0.000161737060546875,
      "step": 26499,
      "training_step_time": 0.38698887825012207
    },
    {
      "epoch": 0.0001617431640625,
      "grad_norm": 0.12061895430088043,
      "learning_rate": 6.360517324226676e-05,
      "loss": 0.0527,
      "step": 26500
    },
    {
      "epoch": 0.0001617431640625,
      "model_forward_time": 0.11508059501647949,
      "step": 26500
    },
    {
      "epoch": 0.0001617431640625,
      "step": 26500,
      "training_step_time": 0.39003634452819824
    },
    {
      "epoch": 0.000161749267578125,
      "model_forward_time": 0.11514067649841309,
      "step": 26501
    },
    {
      "epoch": 0.000161749267578125,
      "step": 26501,
      "training_step_time": 0.3899953365325928
    },
    {
      "epoch": 0.00016175537109375,
      "model_forward_time": 0.11468219757080078,
      "step": 26502
    },
    {
      "epoch": 0.00016175537109375,
      "step": 26502,
      "training_step_time": 0.39899206161499023
    },
    {
      "epoch": 0.000161761474609375,
      "model_forward_time": 0.11713266372680664,
      "step": 26503
    },
    {
      "epoch": 0.000161761474609375,
      "step": 26503,
      "training_step_time": 0.3982360363006592
    },
    {
      "epoch": 0.000161767578125,
      "model_forward_time": 0.11496496200561523,
      "step": 26504
    },
    {
      "epoch": 0.000161767578125,
      "step": 26504,
      "training_step_time": 0.3901221752166748
    },
    {
      "epoch": 0.000161773681640625,
      "model_forward_time": 0.11547040939331055,
      "step": 26505
    },
    {
      "epoch": 0.000161773681640625,
      "step": 26505,
      "training_step_time": 0.41193199157714844
    },
    {
      "epoch": 0.00016177978515625,
      "model_forward_time": 0.11572837829589844,
      "step": 26506
    },
    {
      "epoch": 0.00016177978515625,
      "step": 26506,
      "training_step_time": 0.4418177604675293
    },
    {
      "epoch": 0.000161785888671875,
      "model_forward_time": 0.11497855186462402,
      "step": 26507
    },
    {
      "epoch": 0.000161785888671875,
      "step": 26507,
      "training_step_time": 0.4241495132446289
    },
    {
      "epoch": 0.0001617919921875,
      "model_forward_time": 0.11548304557800293,
      "step": 26508
    },
    {
      "epoch": 0.0001617919921875,
      "step": 26508,
      "training_step_time": 0.4902775287628174
    },
    {
      "epoch": 0.000161798095703125,
      "model_forward_time": 0.11521601676940918,
      "step": 26509
    },
    {
      "epoch": 0.000161798095703125,
      "step": 26509,
      "training_step_time": 0.46700167655944824
    },
    {
      "epoch": 0.00016180419921875,
      "grad_norm": 0.0971674919128418,
      "learning_rate": 6.357865315891265e-05,
      "loss": 0.0467,
      "step": 26510
    },
    {
      "epoch": 0.00016180419921875,
      "model_forward_time": 0.11470222473144531,
      "step": 26510
    },
    {
      "epoch": 0.00016180419921875,
      "step": 26510,
      "training_step_time": 0.39722466468811035
    },
    {
      "epoch": 0.000161810302734375,
      "model_forward_time": 0.11471819877624512,
      "step": 26511
    },
    {
      "epoch": 0.000161810302734375,
      "step": 26511,
      "training_step_time": 0.48269104957580566
    },
    {
      "epoch": 0.00016181640625,
      "model_forward_time": 0.11447525024414062,
      "step": 26512
    },
    {
      "epoch": 0.00016181640625,
      "step": 26512,
      "training_step_time": 0.381000280380249
    },
    {
      "epoch": 0.000161822509765625,
      "model_forward_time": 0.11511826515197754,
      "step": 26513
    },
    {
      "epoch": 0.000161822509765625,
      "step": 26513,
      "training_step_time": 0.38368654251098633
    },
    {
      "epoch": 0.00016182861328125,
      "model_forward_time": 0.11580967903137207,
      "step": 26514
    },
    {
      "epoch": 0.00016182861328125,
      "step": 26514,
      "training_step_time": 0.39452052116394043
    },
    {
      "epoch": 0.000161834716796875,
      "model_forward_time": 0.11429476737976074,
      "step": 26515
    },
    {
      "epoch": 0.000161834716796875,
      "step": 26515,
      "training_step_time": 0.39901232719421387
    },
    {
      "epoch": 0.0001618408203125,
      "model_forward_time": 0.1150975227355957,
      "step": 26516
    },
    {
      "epoch": 0.0001618408203125,
      "step": 26516,
      "training_step_time": 0.38487672805786133
    },
    {
      "epoch": 0.000161846923828125,
      "model_forward_time": 0.11577582359313965,
      "step": 26517
    },
    {
      "epoch": 0.000161846923828125,
      "step": 26517,
      "training_step_time": 0.39330005645751953
    },
    {
      "epoch": 0.00016185302734375,
      "model_forward_time": 0.11609673500061035,
      "step": 26518
    },
    {
      "epoch": 0.00016185302734375,
      "step": 26518,
      "training_step_time": 0.40206122398376465
    },
    {
      "epoch": 0.000161859130859375,
      "model_forward_time": 0.11539673805236816,
      "step": 26519
    },
    {
      "epoch": 0.000161859130859375,
      "step": 26519,
      "training_step_time": 0.4101536273956299
    },
    {
      "epoch": 0.000161865234375,
      "grad_norm": 0.13342595100402832,
      "learning_rate": 6.355212895072223e-05,
      "loss": 0.0519,
      "step": 26520
    },
    {
      "epoch": 0.000161865234375,
      "model_forward_time": 0.11478543281555176,
      "step": 26520
    },
    {
      "epoch": 0.000161865234375,
      "step": 26520,
      "training_step_time": 0.40302538871765137
    },
    {
      "epoch": 0.000161871337890625,
      "model_forward_time": 0.114898681640625,
      "step": 26521
    },
    {
      "epoch": 0.000161871337890625,
      "step": 26521,
      "training_step_time": 0.5017869472503662
    },
    {
      "epoch": 0.00016187744140625,
      "model_forward_time": 0.1143484115600586,
      "step": 26522
    },
    {
      "epoch": 0.00016187744140625,
      "step": 26522,
      "training_step_time": 0.4924309253692627
    },
    {
      "epoch": 0.000161883544921875,
      "model_forward_time": 0.11687994003295898,
      "step": 26523
    },
    {
      "epoch": 0.000161883544921875,
      "step": 26523,
      "training_step_time": 0.4741201400756836
    },
    {
      "epoch": 0.0001618896484375,
      "model_forward_time": 0.11547279357910156,
      "step": 26524
    },
    {
      "epoch": 0.0001618896484375,
      "step": 26524,
      "training_step_time": 0.4268527030944824
    },
    {
      "epoch": 0.000161895751953125,
      "model_forward_time": 0.11543869972229004,
      "step": 26525
    },
    {
      "epoch": 0.000161895751953125,
      "step": 26525,
      "training_step_time": 0.45934510231018066
    },
    {
      "epoch": 0.00016190185546875,
      "model_forward_time": 0.11448788642883301,
      "step": 26526
    },
    {
      "epoch": 0.00016190185546875,
      "step": 26526,
      "training_step_time": 0.3896009922027588
    },
    {
      "epoch": 0.000161907958984375,
      "model_forward_time": 0.11475276947021484,
      "step": 26527
    },
    {
      "epoch": 0.000161907958984375,
      "step": 26527,
      "training_step_time": 0.39674901962280273
    },
    {
      "epoch": 0.0001619140625,
      "model_forward_time": 0.11466670036315918,
      "step": 26528
    },
    {
      "epoch": 0.0001619140625,
      "step": 26528,
      "training_step_time": 0.3854038715362549
    },
    {
      "epoch": 0.000161920166015625,
      "model_forward_time": 0.11518144607543945,
      "step": 26529
    },
    {
      "epoch": 0.000161920166015625,
      "step": 26529,
      "training_step_time": 0.39281582832336426
    },
    {
      "epoch": 0.00016192626953125,
      "grad_norm": 0.20114848017692566,
      "learning_rate": 6.352560062575284e-05,
      "loss": 0.0396,
      "step": 26530
    },
    {
      "epoch": 0.00016192626953125,
      "model_forward_time": 0.1150507926940918,
      "step": 26530
    },
    {
      "epoch": 0.00016192626953125,
      "step": 26530,
      "training_step_time": 0.3953838348388672
    },
    {
      "epoch": 0.000161932373046875,
      "model_forward_time": 0.11496686935424805,
      "step": 26531
    },
    {
      "epoch": 0.000161932373046875,
      "step": 26531,
      "training_step_time": 0.3903982639312744
    },
    {
      "epoch": 0.0001619384765625,
      "model_forward_time": 0.11580157279968262,
      "step": 26532
    },
    {
      "epoch": 0.0001619384765625,
      "step": 26532,
      "training_step_time": 0.3952298164367676
    },
    {
      "epoch": 0.000161944580078125,
      "model_forward_time": 0.11553692817687988,
      "step": 26533
    },
    {
      "epoch": 0.000161944580078125,
      "step": 26533,
      "training_step_time": 0.4123530387878418
    },
    {
      "epoch": 0.00016195068359375,
      "model_forward_time": 0.11529064178466797,
      "step": 26534
    },
    {
      "epoch": 0.00016195068359375,
      "step": 26534,
      "training_step_time": 0.4103670120239258
    },
    {
      "epoch": 0.000161956787109375,
      "model_forward_time": 0.11521649360656738,
      "step": 26535
    },
    {
      "epoch": 0.000161956787109375,
      "step": 26535,
      "training_step_time": 0.4491703510284424
    },
    {
      "epoch": 0.000161962890625,
      "model_forward_time": 0.1152505874633789,
      "step": 26536
    },
    {
      "epoch": 0.000161962890625,
      "step": 26536,
      "training_step_time": 0.42695140838623047
    },
    {
      "epoch": 0.000161968994140625,
      "model_forward_time": 0.11480093002319336,
      "step": 26537
    },
    {
      "epoch": 0.000161968994140625,
      "step": 26537,
      "training_step_time": 0.4072151184082031
    },
    {
      "epoch": 0.00016197509765625,
      "model_forward_time": 0.11558794975280762,
      "step": 26538
    },
    {
      "epoch": 0.00016197509765625,
      "step": 26538,
      "training_step_time": 0.4984011650085449
    },
    {
      "epoch": 0.000161981201171875,
      "model_forward_time": 0.1151578426361084,
      "step": 26539
    },
    {
      "epoch": 0.000161981201171875,
      "step": 26539,
      "training_step_time": 0.4684712886810303
    },
    {
      "epoch": 0.0001619873046875,
      "grad_norm": 0.15112821757793427,
      "learning_rate": 6.349906819206313e-05,
      "loss": 0.0495,
      "step": 26540
    },
    {
      "epoch": 0.0001619873046875,
      "model_forward_time": 0.11489725112915039,
      "step": 26540
    },
    {
      "epoch": 0.0001619873046875,
      "step": 26540,
      "training_step_time": 0.4852278232574463
    },
    {
      "epoch": 0.000161993408203125,
      "model_forward_time": 0.11481547355651855,
      "step": 26541
    },
    {
      "epoch": 0.000161993408203125,
      "step": 26541,
      "training_step_time": 0.40178728103637695
    },
    {
      "epoch": 0.00016199951171875,
      "model_forward_time": 0.11460542678833008,
      "step": 26542
    },
    {
      "epoch": 0.00016199951171875,
      "step": 26542,
      "training_step_time": 0.39953136444091797
    },
    {
      "epoch": 0.000162005615234375,
      "model_forward_time": 0.11448502540588379,
      "step": 26543
    },
    {
      "epoch": 0.000162005615234375,
      "step": 26543,
      "training_step_time": 0.39374828338623047
    },
    {
      "epoch": 0.00016201171875,
      "model_forward_time": 0.1144418716430664,
      "step": 26544
    },
    {
      "epoch": 0.00016201171875,
      "step": 26544,
      "training_step_time": 0.40744924545288086
    },
    {
      "epoch": 0.000162017822265625,
      "model_forward_time": 0.11446952819824219,
      "step": 26545
    },
    {
      "epoch": 0.000162017822265625,
      "step": 26545,
      "training_step_time": 0.4154820442199707
    },
    {
      "epoch": 0.00016202392578125,
      "model_forward_time": 0.11479449272155762,
      "step": 26546
    },
    {
      "epoch": 0.00016202392578125,
      "step": 26546,
      "training_step_time": 0.39676713943481445
    },
    {
      "epoch": 0.000162030029296875,
      "model_forward_time": 0.11498546600341797,
      "step": 26547
    },
    {
      "epoch": 0.000162030029296875,
      "step": 26547,
      "training_step_time": 0.4360365867614746
    },
    {
      "epoch": 0.0001620361328125,
      "model_forward_time": 0.11475086212158203,
      "step": 26548
    },
    {
      "epoch": 0.0001620361328125,
      "step": 26548,
      "training_step_time": 0.4075901508331299
    },
    {
      "epoch": 0.000162042236328125,
      "model_forward_time": 0.11562490463256836,
      "step": 26549
    },
    {
      "epoch": 0.000162042236328125,
      "step": 26549,
      "training_step_time": 0.3953578472137451
    },
    {
      "epoch": 0.00016204833984375,
      "grad_norm": 0.1140419989824295,
      "learning_rate": 6.34725316577129e-05,
      "loss": 0.0498,
      "step": 26550
    },
    {
      "epoch": 0.00016204833984375,
      "model_forward_time": 0.12482500076293945,
      "step": 26550
    },
    {
      "epoch": 0.00016204833984375,
      "step": 26550,
      "training_step_time": 0.37985825538635254
    },
    {
      "epoch": 0.000162054443359375,
      "model_forward_time": 0.11529135704040527,
      "step": 26551
    },
    {
      "epoch": 0.000162054443359375,
      "step": 26551,
      "training_step_time": 0.4527156352996826
    },
    {
      "epoch": 0.000162060546875,
      "model_forward_time": 0.11527824401855469,
      "step": 26552
    },
    {
      "epoch": 0.000162060546875,
      "step": 26552,
      "training_step_time": 0.3970484733581543
    },
    {
      "epoch": 0.000162066650390625,
      "model_forward_time": 0.11534523963928223,
      "step": 26553
    },
    {
      "epoch": 0.000162066650390625,
      "step": 26553,
      "training_step_time": 0.4100337028503418
    },
    {
      "epoch": 0.00016207275390625,
      "model_forward_time": 0.1154632568359375,
      "step": 26554
    },
    {
      "epoch": 0.00016207275390625,
      "step": 26554,
      "training_step_time": 0.4314899444580078
    },
    {
      "epoch": 0.000162078857421875,
      "model_forward_time": 0.11479854583740234,
      "step": 26555
    },
    {
      "epoch": 0.000162078857421875,
      "step": 26555,
      "training_step_time": 0.46425437927246094
    },
    {
      "epoch": 0.0001620849609375,
      "model_forward_time": 0.11582541465759277,
      "step": 26556
    },
    {
      "epoch": 0.0001620849609375,
      "step": 26556,
      "training_step_time": 0.3924996852874756
    },
    {
      "epoch": 0.000162091064453125,
      "model_forward_time": 0.1162254810333252,
      "step": 26557
    },
    {
      "epoch": 0.000162091064453125,
      "step": 26557,
      "training_step_time": 0.39183783531188965
    },
    {
      "epoch": 0.00016209716796875,
      "model_forward_time": 0.11528158187866211,
      "step": 26558
    },
    {
      "epoch": 0.00016209716796875,
      "step": 26558,
      "training_step_time": 0.4066193103790283
    },
    {
      "epoch": 0.000162103271484375,
      "model_forward_time": 0.11502933502197266,
      "step": 26559
    },
    {
      "epoch": 0.000162103271484375,
      "step": 26559,
      "training_step_time": 0.38824987411499023
    },
    {
      "epoch": 0.000162109375,
      "grad_norm": 0.08896100521087646,
      "learning_rate": 6.344599103076329e-05,
      "loss": 0.0419,
      "step": 26560
    },
    {
      "epoch": 0.000162109375,
      "model_forward_time": 0.1150662899017334,
      "step": 26560
    },
    {
      "epoch": 0.000162109375,
      "step": 26560,
      "training_step_time": 0.3887643814086914
    },
    {
      "epoch": 0.000162115478515625,
      "model_forward_time": 0.11532235145568848,
      "step": 26561
    },
    {
      "epoch": 0.000162115478515625,
      "step": 26561,
      "training_step_time": 0.4175903797149658
    },
    {
      "epoch": 0.00016212158203125,
      "model_forward_time": 0.11498570442199707,
      "step": 26562
    },
    {
      "epoch": 0.00016212158203125,
      "step": 26562,
      "training_step_time": 0.42179179191589355
    },
    {
      "epoch": 0.000162127685546875,
      "model_forward_time": 0.11546659469604492,
      "step": 26563
    },
    {
      "epoch": 0.000162127685546875,
      "step": 26563,
      "training_step_time": 0.3987112045288086
    },
    {
      "epoch": 0.0001621337890625,
      "model_forward_time": 0.1151278018951416,
      "step": 26564
    },
    {
      "epoch": 0.0001621337890625,
      "step": 26564,
      "training_step_time": 0.49067091941833496
    },
    {
      "epoch": 0.000162139892578125,
      "model_forward_time": 0.11480116844177246,
      "step": 26565
    },
    {
      "epoch": 0.000162139892578125,
      "step": 26565,
      "training_step_time": 0.43120384216308594
    },
    {
      "epoch": 0.00016214599609375,
      "model_forward_time": 0.11506342887878418,
      "step": 26566
    },
    {
      "epoch": 0.00016214599609375,
      "step": 26566,
      "training_step_time": 0.49277329444885254
    },
    {
      "epoch": 0.000162152099609375,
      "model_forward_time": 0.11484551429748535,
      "step": 26567
    },
    {
      "epoch": 0.000162152099609375,
      "step": 26567,
      "training_step_time": 0.4397416114807129
    },
    {
      "epoch": 0.000162158203125,
      "model_forward_time": 0.11484122276306152,
      "step": 26568
    },
    {
      "epoch": 0.000162158203125,
      "step": 26568,
      "training_step_time": 0.4067513942718506
    },
    {
      "epoch": 0.000162164306640625,
      "model_forward_time": 0.11456727981567383,
      "step": 26569
    },
    {
      "epoch": 0.000162164306640625,
      "step": 26569,
      "training_step_time": 0.47663092613220215
    },
    {
      "epoch": 0.00016217041015625,
      "grad_norm": 0.15031199157238007,
      "learning_rate": 6.341944631927662e-05,
      "loss": 0.0457,
      "step": 26570
    },
    {
      "epoch": 0.00016217041015625,
      "model_forward_time": 0.11487269401550293,
      "step": 26570
    },
    {
      "epoch": 0.00016217041015625,
      "step": 26570,
      "training_step_time": 0.4020090103149414
    },
    {
      "epoch": 0.000162176513671875,
      "model_forward_time": 0.1146390438079834,
      "step": 26571
    },
    {
      "epoch": 0.000162176513671875,
      "step": 26571,
      "training_step_time": 0.3855736255645752
    },
    {
      "epoch": 0.0001621826171875,
      "model_forward_time": 0.11526298522949219,
      "step": 26572
    },
    {
      "epoch": 0.0001621826171875,
      "step": 26572,
      "training_step_time": 0.39084601402282715
    },
    {
      "epoch": 0.000162188720703125,
      "model_forward_time": 0.11462187767028809,
      "step": 26573
    },
    {
      "epoch": 0.000162188720703125,
      "step": 26573,
      "training_step_time": 0.3877580165863037
    },
    {
      "epoch": 0.00016219482421875,
      "model_forward_time": 0.11489200592041016,
      "step": 26574
    },
    {
      "epoch": 0.00016219482421875,
      "step": 26574,
      "training_step_time": 0.39496445655822754
    },
    {
      "epoch": 0.000162200927734375,
      "model_forward_time": 0.1149146556854248,
      "step": 26575
    },
    {
      "epoch": 0.000162200927734375,
      "step": 26575,
      "training_step_time": 0.3992009162902832
    },
    {
      "epoch": 0.00016220703125,
      "model_forward_time": 0.11504006385803223,
      "step": 26576
    },
    {
      "epoch": 0.00016220703125,
      "step": 26576,
      "training_step_time": 0.4649391174316406
    },
    {
      "epoch": 0.000162213134765625,
      "model_forward_time": 0.11508989334106445,
      "step": 26577
    },
    {
      "epoch": 0.000162213134765625,
      "step": 26577,
      "training_step_time": 0.39761996269226074
    },
    {
      "epoch": 0.00016221923828125,
      "model_forward_time": 0.11486196517944336,
      "step": 26578
    },
    {
      "epoch": 0.00016221923828125,
      "step": 26578,
      "training_step_time": 0.42731380462646484
    },
    {
      "epoch": 0.000162225341796875,
      "model_forward_time": 0.11491775512695312,
      "step": 26579
    },
    {
      "epoch": 0.000162225341796875,
      "step": 26579,
      "training_step_time": 0.3639545440673828
    },
    {
      "epoch": 0.0001622314453125,
      "grad_norm": 0.1533225029706955,
      "learning_rate": 6.339289753131649e-05,
      "loss": 0.05,
      "step": 26580
    },
    {
      "epoch": 0.0001622314453125,
      "model_forward_time": 0.11538887023925781,
      "step": 26580
    },
    {
      "epoch": 0.0001622314453125,
      "step": 26580,
      "training_step_time": 0.45943355560302734
    },
    {
      "epoch": 0.000162237548828125,
      "model_forward_time": 0.11537981033325195,
      "step": 26581
    },
    {
      "epoch": 0.000162237548828125,
      "step": 26581,
      "training_step_time": 0.40934062004089355
    },
    {
      "epoch": 0.00016224365234375,
      "model_forward_time": 0.11540484428405762,
      "step": 26582
    },
    {
      "epoch": 0.00016224365234375,
      "step": 26582,
      "training_step_time": 0.43197107315063477
    },
    {
      "epoch": 0.000162249755859375,
      "model_forward_time": 0.11426758766174316,
      "step": 26583
    },
    {
      "epoch": 0.000162249755859375,
      "step": 26583,
      "training_step_time": 0.4386262893676758
    },
    {
      "epoch": 0.000162255859375,
      "model_forward_time": 0.11436724662780762,
      "step": 26584
    },
    {
      "epoch": 0.000162255859375,
      "step": 26584,
      "training_step_time": 0.4608166217803955
    },
    {
      "epoch": 0.000162261962890625,
      "model_forward_time": 0.11475110054016113,
      "step": 26585
    },
    {
      "epoch": 0.000162261962890625,
      "step": 26585,
      "training_step_time": 0.38910365104675293
    },
    {
      "epoch": 0.00016226806640625,
      "model_forward_time": 0.11497092247009277,
      "step": 26586
    },
    {
      "epoch": 0.00016226806640625,
      "step": 26586,
      "training_step_time": 0.3949418067932129
    },
    {
      "epoch": 0.000162274169921875,
      "model_forward_time": 0.11501955986022949,
      "step": 26587
    },
    {
      "epoch": 0.000162274169921875,
      "step": 26587,
      "training_step_time": 0.39879798889160156
    },
    {
      "epoch": 0.0001622802734375,
      "model_forward_time": 0.11501026153564453,
      "step": 26588
    },
    {
      "epoch": 0.0001622802734375,
      "step": 26588,
      "training_step_time": 0.4992191791534424
    },
    {
      "epoch": 0.000162286376953125,
      "model_forward_time": 0.1148383617401123,
      "step": 26589
    },
    {
      "epoch": 0.000162286376953125,
      "step": 26589,
      "training_step_time": 0.40311193466186523
    },
    {
      "epoch": 0.00016229248046875,
      "grad_norm": 0.12245866656303406,
      "learning_rate": 6.336634467494768e-05,
      "loss": 0.0441,
      "step": 26590
    },
    {
      "epoch": 0.00016229248046875,
      "model_forward_time": 0.1147913932800293,
      "step": 26590
    },
    {
      "epoch": 0.00016229248046875,
      "step": 26590,
      "training_step_time": 0.4319601058959961
    },
    {
      "epoch": 0.000162298583984375,
      "model_forward_time": 0.1143028736114502,
      "step": 26591
    },
    {
      "epoch": 0.000162298583984375,
      "step": 26591,
      "training_step_time": 0.40201258659362793
    },
    {
      "epoch": 0.0001623046875,
      "model_forward_time": 0.11513280868530273,
      "step": 26592
    },
    {
      "epoch": 0.0001623046875,
      "step": 26592,
      "training_step_time": 0.38593149185180664
    },
    {
      "epoch": 0.000162310791015625,
      "model_forward_time": 0.11500978469848633,
      "step": 26593
    },
    {
      "epoch": 0.000162310791015625,
      "step": 26593,
      "training_step_time": 0.4127190113067627
    },
    {
      "epoch": 0.00016231689453125,
      "model_forward_time": 0.11442351341247559,
      "step": 26594
    },
    {
      "epoch": 0.00016231689453125,
      "step": 26594,
      "training_step_time": 0.5585751533508301
    },
    {
      "epoch": 0.000162322998046875,
      "model_forward_time": 0.11487221717834473,
      "step": 26595
    },
    {
      "epoch": 0.000162322998046875,
      "step": 26595,
      "training_step_time": 0.4974031448364258
    },
    {
      "epoch": 0.0001623291015625,
      "model_forward_time": 0.11449027061462402,
      "step": 26596
    },
    {
      "epoch": 0.0001623291015625,
      "step": 26596,
      "training_step_time": 0.3843364715576172
    },
    {
      "epoch": 0.000162335205078125,
      "model_forward_time": 0.11503267288208008,
      "step": 26597
    },
    {
      "epoch": 0.000162335205078125,
      "step": 26597,
      "training_step_time": 0.4344635009765625
    },
    {
      "epoch": 0.00016234130859375,
      "model_forward_time": 0.11496567726135254,
      "step": 26598
    },
    {
      "epoch": 0.00016234130859375,
      "step": 26598,
      "training_step_time": 0.49736499786376953
    },
    {
      "epoch": 0.000162347412109375,
      "model_forward_time": 0.11463284492492676,
      "step": 26599
    },
    {
      "epoch": 0.000162347412109375,
      "step": 26599,
      "training_step_time": 0.4004037380218506
    },
    {
      "epoch": 0.000162353515625,
      "grad_norm": 0.1529158353805542,
      "learning_rate": 6.333978775823631e-05,
      "loss": 0.0476,
      "step": 26600
    },
    {
      "epoch": 0.000162353515625,
      "model_forward_time": 0.11469125747680664,
      "step": 26600
    },
    {
      "epoch": 0.000162353515625,
      "step": 26600,
      "training_step_time": 0.3889634609222412
    },
    {
      "epoch": 0.000162359619140625,
      "model_forward_time": 0.11542439460754395,
      "step": 26601
    },
    {
      "epoch": 0.000162359619140625,
      "step": 26601,
      "training_step_time": 0.387251615524292
    },
    {
      "epoch": 0.00016236572265625,
      "model_forward_time": 0.11503934860229492,
      "step": 26602
    },
    {
      "epoch": 0.00016236572265625,
      "step": 26602,
      "training_step_time": 0.4073910713195801
    },
    {
      "epoch": 0.000162371826171875,
      "model_forward_time": 0.11482644081115723,
      "step": 26603
    },
    {
      "epoch": 0.000162371826171875,
      "step": 26603,
      "training_step_time": 0.395038366317749
    },
    {
      "epoch": 0.0001623779296875,
      "model_forward_time": 0.11540770530700684,
      "step": 26604
    },
    {
      "epoch": 0.0001623779296875,
      "step": 26604,
      "training_step_time": 0.39795684814453125
    },
    {
      "epoch": 0.000162384033203125,
      "model_forward_time": 0.11510968208312988,
      "step": 26605
    },
    {
      "epoch": 0.000162384033203125,
      "step": 26605,
      "training_step_time": 0.4011361598968506
    },
    {
      "epoch": 0.00016239013671875,
      "model_forward_time": 0.11476254463195801,
      "step": 26606
    },
    {
      "epoch": 0.00016239013671875,
      "step": 26606,
      "training_step_time": 0.5469751358032227
    },
    {
      "epoch": 0.000162396240234375,
      "model_forward_time": 0.1149592399597168,
      "step": 26607
    },
    {
      "epoch": 0.000162396240234375,
      "step": 26607,
      "training_step_time": 0.4643416404724121
    },
    {
      "epoch": 0.00016240234375,
      "model_forward_time": 0.11437702178955078,
      "step": 26608
    },
    {
      "epoch": 0.00016240234375,
      "step": 26608,
      "training_step_time": 0.4165642261505127
    },
    {
      "epoch": 0.000162408447265625,
      "model_forward_time": 0.11494970321655273,
      "step": 26609
    },
    {
      "epoch": 0.000162408447265625,
      "step": 26609,
      "training_step_time": 0.4498114585876465
    },
    {
      "epoch": 0.00016241455078125,
      "grad_norm": 0.17055782675743103,
      "learning_rate": 6.331322678924962e-05,
      "loss": 0.049,
      "step": 26610
    },
    {
      "epoch": 0.00016241455078125,
      "model_forward_time": 0.11496901512145996,
      "step": 26610
    },
    {
      "epoch": 0.00016241455078125,
      "step": 26610,
      "training_step_time": 0.4715538024902344
    },
    {
      "epoch": 0.000162420654296875,
      "model_forward_time": 0.11477065086364746,
      "step": 26611
    },
    {
      "epoch": 0.000162420654296875,
      "step": 26611,
      "training_step_time": 0.40046095848083496
    },
    {
      "epoch": 0.0001624267578125,
      "model_forward_time": 0.11548471450805664,
      "step": 26612
    },
    {
      "epoch": 0.0001624267578125,
      "step": 26612,
      "training_step_time": 0.4371042251586914
    },
    {
      "epoch": 0.000162432861328125,
      "model_forward_time": 0.11558294296264648,
      "step": 26613
    },
    {
      "epoch": 0.000162432861328125,
      "step": 26613,
      "training_step_time": 0.3953819274902344
    },
    {
      "epoch": 0.00016243896484375,
      "model_forward_time": 0.11497950553894043,
      "step": 26614
    },
    {
      "epoch": 0.00016243896484375,
      "step": 26614,
      "training_step_time": 0.3856842517852783
    },
    {
      "epoch": 0.000162445068359375,
      "model_forward_time": 0.11508059501647949,
      "step": 26615
    },
    {
      "epoch": 0.000162445068359375,
      "step": 26615,
      "training_step_time": 0.39759302139282227
    },
    {
      "epoch": 0.000162451171875,
      "model_forward_time": 0.11448502540588379,
      "step": 26616
    },
    {
      "epoch": 0.000162451171875,
      "step": 26616,
      "training_step_time": 0.4026224613189697
    },
    {
      "epoch": 0.000162457275390625,
      "model_forward_time": 0.11515116691589355,
      "step": 26617
    },
    {
      "epoch": 0.000162457275390625,
      "step": 26617,
      "training_step_time": 0.45560359954833984
    },
    {
      "epoch": 0.00016246337890625,
      "model_forward_time": 0.11519002914428711,
      "step": 26618
    },
    {
      "epoch": 0.00016246337890625,
      "step": 26618,
      "training_step_time": 0.5649816989898682
    },
    {
      "epoch": 0.000162469482421875,
      "model_forward_time": 0.11591053009033203,
      "step": 26619
    },
    {
      "epoch": 0.000162469482421875,
      "step": 26619,
      "training_step_time": 0.39136624336242676
    },
    {
      "epoch": 0.0001624755859375,
      "grad_norm": 0.10229070484638214,
      "learning_rate": 6.328666177605616e-05,
      "loss": 0.0431,
      "step": 26620
    },
    {
      "epoch": 0.0001624755859375,
      "model_forward_time": 0.11519885063171387,
      "step": 26620
    },
    {
      "epoch": 0.0001624755859375,
      "step": 26620,
      "training_step_time": 0.3981914520263672
    },
    {
      "epoch": 0.000162481689453125,
      "model_forward_time": 0.11488604545593262,
      "step": 26621
    },
    {
      "epoch": 0.000162481689453125,
      "step": 26621,
      "training_step_time": 0.4396963119506836
    },
    {
      "epoch": 0.00016248779296875,
      "model_forward_time": 0.11586666107177734,
      "step": 26622
    },
    {
      "epoch": 0.00016248779296875,
      "step": 26622,
      "training_step_time": 0.4833254814147949
    },
    {
      "epoch": 0.000162493896484375,
      "model_forward_time": 0.11704587936401367,
      "step": 26623
    },
    {
      "epoch": 0.000162493896484375,
      "step": 26623,
      "training_step_time": 0.47758936882019043
    },
    {
      "epoch": 0.0001625,
      "model_forward_time": 0.11627817153930664,
      "step": 26624
    },
    {
      "epoch": 0.0001625,
      "step": 26624,
      "training_step_time": 0.44890308380126953
    },
    {
      "epoch": 0.000162506103515625,
      "model_forward_time": 0.11592245101928711,
      "step": 26625
    },
    {
      "epoch": 0.000162506103515625,
      "step": 26625,
      "training_step_time": 0.4173314571380615
    },
    {
      "epoch": 0.00016251220703125,
      "model_forward_time": 0.11470985412597656,
      "step": 26626
    },
    {
      "epoch": 0.00016251220703125,
      "step": 26626,
      "training_step_time": 0.4779787063598633
    },
    {
      "epoch": 0.000162518310546875,
      "model_forward_time": 0.11473536491394043,
      "step": 26627
    },
    {
      "epoch": 0.000162518310546875,
      "step": 26627,
      "training_step_time": 0.3802664279937744
    },
    {
      "epoch": 0.0001625244140625,
      "model_forward_time": 0.11445832252502441,
      "step": 26628
    },
    {
      "epoch": 0.0001625244140625,
      "step": 26628,
      "training_step_time": 0.39827394485473633
    },
    {
      "epoch": 0.000162530517578125,
      "model_forward_time": 0.1152029037475586,
      "step": 26629
    },
    {
      "epoch": 0.000162530517578125,
      "step": 26629,
      "training_step_time": 0.4143397808074951
    },
    {
      "epoch": 0.00016253662109375,
      "grad_norm": 0.17689797282218933,
      "learning_rate": 6.326009272672564e-05,
      "loss": 0.0505,
      "step": 26630
    },
    {
      "epoch": 0.00016253662109375,
      "model_forward_time": 0.11553311347961426,
      "step": 26630
    },
    {
      "epoch": 0.00016253662109375,
      "step": 26630,
      "training_step_time": 0.5119063854217529
    },
    {
      "epoch": 0.000162542724609375,
      "model_forward_time": 0.11520504951477051,
      "step": 26631
    },
    {
      "epoch": 0.000162542724609375,
      "step": 26631,
      "training_step_time": 0.4168074131011963
    },
    {
      "epoch": 0.000162548828125,
      "model_forward_time": 0.11521530151367188,
      "step": 26632
    },
    {
      "epoch": 0.000162548828125,
      "step": 26632,
      "training_step_time": 0.38499903678894043
    },
    {
      "epoch": 0.000162554931640625,
      "model_forward_time": 0.11489534378051758,
      "step": 26633
    },
    {
      "epoch": 0.000162554931640625,
      "step": 26633,
      "training_step_time": 0.39139795303344727
    },
    {
      "epoch": 0.00016256103515625,
      "model_forward_time": 0.1155099868774414,
      "step": 26634
    },
    {
      "epoch": 0.00016256103515625,
      "step": 26634,
      "training_step_time": 0.4064826965332031
    },
    {
      "epoch": 0.000162567138671875,
      "model_forward_time": 0.11481118202209473,
      "step": 26635
    },
    {
      "epoch": 0.000162567138671875,
      "step": 26635,
      "training_step_time": 0.40296125411987305
    },
    {
      "epoch": 0.0001625732421875,
      "model_forward_time": 0.11624813079833984,
      "step": 26636
    },
    {
      "epoch": 0.0001625732421875,
      "step": 26636,
      "training_step_time": 0.6774251461029053
    },
    {
      "epoch": 0.000162579345703125,
      "model_forward_time": 0.11544418334960938,
      "step": 26637
    },
    {
      "epoch": 0.000162579345703125,
      "step": 26637,
      "training_step_time": 0.3969569206237793
    },
    {
      "epoch": 0.00016258544921875,
      "model_forward_time": 0.1155097484588623,
      "step": 26638
    },
    {
      "epoch": 0.00016258544921875,
      "step": 26638,
      "training_step_time": 0.4135909080505371
    },
    {
      "epoch": 0.000162591552734375,
      "model_forward_time": 0.11523985862731934,
      "step": 26639
    },
    {
      "epoch": 0.000162591552734375,
      "step": 26639,
      "training_step_time": 0.42799925804138184
    },
    {
      "epoch": 0.00016259765625,
      "grad_norm": 0.1537339985370636,
      "learning_rate": 6.323351964932908e-05,
      "loss": 0.053,
      "step": 26640
    },
    {
      "epoch": 0.00016259765625,
      "model_forward_time": 0.11539530754089355,
      "step": 26640
    },
    {
      "epoch": 0.00016259765625,
      "step": 26640,
      "training_step_time": 0.4431765079498291
    },
    {
      "epoch": 0.000162603759765625,
      "model_forward_time": 0.11508059501647949,
      "step": 26641
    },
    {
      "epoch": 0.000162603759765625,
      "step": 26641,
      "training_step_time": 0.3899807929992676
    },
    {
      "epoch": 0.00016260986328125,
      "model_forward_time": 0.11520123481750488,
      "step": 26642
    },
    {
      "epoch": 0.00016260986328125,
      "step": 26642,
      "training_step_time": 0.45415568351745605
    },
    {
      "epoch": 0.000162615966796875,
      "model_forward_time": 0.11547493934631348,
      "step": 26643
    },
    {
      "epoch": 0.000162615966796875,
      "step": 26643,
      "training_step_time": 0.41103601455688477
    },
    {
      "epoch": 0.0001626220703125,
      "model_forward_time": 0.11571264266967773,
      "step": 26644
    },
    {
      "epoch": 0.0001626220703125,
      "step": 26644,
      "training_step_time": 0.40313243865966797
    },
    {
      "epoch": 0.000162628173828125,
      "model_forward_time": 0.11587095260620117,
      "step": 26645
    },
    {
      "epoch": 0.000162628173828125,
      "step": 26645,
      "training_step_time": 0.4046366214752197
    },
    {
      "epoch": 0.00016263427734375,
      "model_forward_time": 0.11530232429504395,
      "step": 26646
    },
    {
      "epoch": 0.00016263427734375,
      "step": 26646,
      "training_step_time": 0.3901853561401367
    },
    {
      "epoch": 0.000162640380859375,
      "model_forward_time": 0.11563825607299805,
      "step": 26647
    },
    {
      "epoch": 0.000162640380859375,
      "step": 26647,
      "training_step_time": 0.3948788642883301
    },
    {
      "epoch": 0.000162646484375,
      "model_forward_time": 0.11527419090270996,
      "step": 26648
    },
    {
      "epoch": 0.000162646484375,
      "step": 26648,
      "training_step_time": 0.5897493362426758
    },
    {
      "epoch": 0.000162652587890625,
      "model_forward_time": 0.11568212509155273,
      "step": 26649
    },
    {
      "epoch": 0.000162652587890625,
      "step": 26649,
      "training_step_time": 0.38561582565307617
    },
    {
      "epoch": 0.00016265869140625,
      "grad_norm": 0.148736372590065,
      "learning_rate": 6.320694255193867e-05,
      "loss": 0.0499,
      "step": 26650
    },
    {
      "epoch": 0.00016265869140625,
      "model_forward_time": 0.11538934707641602,
      "step": 26650
    },
    {
      "epoch": 0.00016265869140625,
      "step": 26650,
      "training_step_time": 0.39679765701293945
    },
    {
      "epoch": 0.000162664794921875,
      "model_forward_time": 0.11491179466247559,
      "step": 26651
    },
    {
      "epoch": 0.000162664794921875,
      "step": 26651,
      "training_step_time": 0.4433772563934326
    },
    {
      "epoch": 0.0001626708984375,
      "model_forward_time": 0.1153409481048584,
      "step": 26652
    },
    {
      "epoch": 0.0001626708984375,
      "step": 26652,
      "training_step_time": 0.4218883514404297
    },
    {
      "epoch": 0.000162677001953125,
      "model_forward_time": 0.11560368537902832,
      "step": 26653
    },
    {
      "epoch": 0.000162677001953125,
      "step": 26653,
      "training_step_time": 0.44806432723999023
    },
    {
      "epoch": 0.00016268310546875,
      "model_forward_time": 0.11539649963378906,
      "step": 26654
    },
    {
      "epoch": 0.00016268310546875,
      "step": 26654,
      "training_step_time": 0.5810909271240234
    },
    {
      "epoch": 0.000162689208984375,
      "model_forward_time": 0.11590862274169922,
      "step": 26655
    },
    {
      "epoch": 0.000162689208984375,
      "step": 26655,
      "training_step_time": 0.39327073097229004
    },
    {
      "epoch": 0.0001626953125,
      "model_forward_time": 0.11579084396362305,
      "step": 26656
    },
    {
      "epoch": 0.0001626953125,
      "step": 26656,
      "training_step_time": 0.3884291648864746
    },
    {
      "epoch": 0.000162701416015625,
      "model_forward_time": 0.11470770835876465,
      "step": 26657
    },
    {
      "epoch": 0.000162701416015625,
      "step": 26657,
      "training_step_time": 0.3814537525177002
    },
    {
      "epoch": 0.00016270751953125,
      "model_forward_time": 0.11511468887329102,
      "step": 26658
    },
    {
      "epoch": 0.00016270751953125,
      "step": 26658,
      "training_step_time": 0.390491247177124
    },
    {
      "epoch": 0.000162713623046875,
      "model_forward_time": 0.11488103866577148,
      "step": 26659
    },
    {
      "epoch": 0.000162713623046875,
      "step": 26659,
      "training_step_time": 0.3913993835449219
    },
    {
      "epoch": 0.0001627197265625,
      "grad_norm": 0.14382225275039673,
      "learning_rate": 6.31803614426278e-05,
      "loss": 0.0433,
      "step": 26660
    },
    {
      "epoch": 0.0001627197265625,
      "model_forward_time": 0.1152496337890625,
      "step": 26660
    },
    {
      "epoch": 0.0001627197265625,
      "step": 26660,
      "training_step_time": 0.49858736991882324
    },
    {
      "epoch": 0.000162725830078125,
      "model_forward_time": 0.11620044708251953,
      "step": 26661
    },
    {
      "epoch": 0.000162725830078125,
      "step": 26661,
      "training_step_time": 0.3898589611053467
    },
    {
      "epoch": 0.00016273193359375,
      "model_forward_time": 0.11570048332214355,
      "step": 26662
    },
    {
      "epoch": 0.00016273193359375,
      "step": 26662,
      "training_step_time": 0.4284541606903076
    },
    {
      "epoch": 0.000162738037109375,
      "model_forward_time": 0.11569690704345703,
      "step": 26663
    },
    {
      "epoch": 0.000162738037109375,
      "step": 26663,
      "training_step_time": 0.3971428871154785
    },
    {
      "epoch": 0.000162744140625,
      "model_forward_time": 0.11580038070678711,
      "step": 26664
    },
    {
      "epoch": 0.000162744140625,
      "step": 26664,
      "training_step_time": 0.4414827823638916
    },
    {
      "epoch": 0.000162750244140625,
      "model_forward_time": 0.11652350425720215,
      "step": 26665
    },
    {
      "epoch": 0.000162750244140625,
      "step": 26665,
      "training_step_time": 0.3700282573699951
    },
    {
      "epoch": 0.00016275634765625,
      "model_forward_time": 0.11555004119873047,
      "step": 26666
    },
    {
      "epoch": 0.00016275634765625,
      "step": 26666,
      "training_step_time": 0.5418341159820557
    },
    {
      "epoch": 0.000162762451171875,
      "model_forward_time": 0.11524724960327148,
      "step": 26667
    },
    {
      "epoch": 0.000162762451171875,
      "step": 26667,
      "training_step_time": 0.4616665840148926
    },
    {
      "epoch": 0.0001627685546875,
      "model_forward_time": 0.11492776870727539,
      "step": 26668
    },
    {
      "epoch": 0.0001627685546875,
      "step": 26668,
      "training_step_time": 0.4598119258880615
    },
    {
      "epoch": 0.000162774658203125,
      "model_forward_time": 0.11484003067016602,
      "step": 26669
    },
    {
      "epoch": 0.000162774658203125,
      "step": 26669,
      "training_step_time": 0.41565752029418945
    },
    {
      "epoch": 0.00016278076171875,
      "grad_norm": 0.1046186313033104,
      "learning_rate": 6.315377632947115e-05,
      "loss": 0.044,
      "step": 26670
    },
    {
      "epoch": 0.00016278076171875,
      "model_forward_time": 0.11540579795837402,
      "step": 26670
    },
    {
      "epoch": 0.00016278076171875,
      "step": 26670,
      "training_step_time": 0.39739394187927246
    },
    {
      "epoch": 0.000162786865234375,
      "model_forward_time": 0.1145181655883789,
      "step": 26671
    },
    {
      "epoch": 0.000162786865234375,
      "step": 26671,
      "training_step_time": 0.39495229721069336
    },
    {
      "epoch": 0.00016279296875,
      "model_forward_time": 0.11580324172973633,
      "step": 26672
    },
    {
      "epoch": 0.00016279296875,
      "step": 26672,
      "training_step_time": 0.458843469619751
    },
    {
      "epoch": 0.000162799072265625,
      "model_forward_time": 0.11522483825683594,
      "step": 26673
    },
    {
      "epoch": 0.000162799072265625,
      "step": 26673,
      "training_step_time": 0.39038586616516113
    },
    {
      "epoch": 0.00016280517578125,
      "model_forward_time": 0.1152641773223877,
      "step": 26674
    },
    {
      "epoch": 0.00016280517578125,
      "step": 26674,
      "training_step_time": 0.3984520435333252
    },
    {
      "epoch": 0.000162811279296875,
      "model_forward_time": 0.11705827713012695,
      "step": 26675
    },
    {
      "epoch": 0.000162811279296875,
      "step": 26675,
      "training_step_time": 0.38600707054138184
    },
    {
      "epoch": 0.0001628173828125,
      "model_forward_time": 0.11592245101928711,
      "step": 26676
    },
    {
      "epoch": 0.0001628173828125,
      "step": 26676,
      "training_step_time": 0.39136338233947754
    },
    {
      "epoch": 0.000162823486328125,
      "model_forward_time": 0.11533689498901367,
      "step": 26677
    },
    {
      "epoch": 0.000162823486328125,
      "step": 26677,
      "training_step_time": 0.4732050895690918
    },
    {
      "epoch": 0.00016282958984375,
      "model_forward_time": 0.11568450927734375,
      "step": 26678
    },
    {
      "epoch": 0.00016282958984375,
      "step": 26678,
      "training_step_time": 0.5534541606903076
    },
    {
      "epoch": 0.000162835693359375,
      "model_forward_time": 0.11623811721801758,
      "step": 26679
    },
    {
      "epoch": 0.000162835693359375,
      "step": 26679,
      "training_step_time": 0.3704979419708252
    },
    {
      "epoch": 0.000162841796875,
      "grad_norm": 0.13781042397022247,
      "learning_rate": 6.312718722054454e-05,
      "loss": 0.0447,
      "step": 26680
    },
    {
      "epoch": 0.000162841796875,
      "model_forward_time": 0.11513209342956543,
      "step": 26680
    },
    {
      "epoch": 0.000162841796875,
      "step": 26680,
      "training_step_time": 0.41954994201660156
    },
    {
      "epoch": 0.000162847900390625,
      "model_forward_time": 0.11487722396850586,
      "step": 26681
    },
    {
      "epoch": 0.000162847900390625,
      "step": 26681,
      "training_step_time": 0.4296858310699463
    },
    {
      "epoch": 0.00016285400390625,
      "model_forward_time": 0.1158289909362793,
      "step": 26682
    },
    {
      "epoch": 0.00016285400390625,
      "step": 26682,
      "training_step_time": 0.4118950366973877
    },
    {
      "epoch": 0.000162860107421875,
      "model_forward_time": 0.11493587493896484,
      "step": 26683
    },
    {
      "epoch": 0.000162860107421875,
      "step": 26683,
      "training_step_time": 0.450793981552124
    },
    {
      "epoch": 0.0001628662109375,
      "model_forward_time": 0.11492300033569336,
      "step": 26684
    },
    {
      "epoch": 0.0001628662109375,
      "step": 26684,
      "training_step_time": 0.3836638927459717
    },
    {
      "epoch": 0.000162872314453125,
      "model_forward_time": 0.1154179573059082,
      "step": 26685
    },
    {
      "epoch": 0.000162872314453125,
      "step": 26685,
      "training_step_time": 0.3836851119995117
    },
    {
      "epoch": 0.00016287841796875,
      "model_forward_time": 0.11540818214416504,
      "step": 26686
    },
    {
      "epoch": 0.00016287841796875,
      "step": 26686,
      "training_step_time": 0.3897738456726074
    },
    {
      "epoch": 0.000162884521484375,
      "model_forward_time": 0.11611485481262207,
      "step": 26687
    },
    {
      "epoch": 0.000162884521484375,
      "step": 26687,
      "training_step_time": 0.39380502700805664
    },
    {
      "epoch": 0.000162890625,
      "model_forward_time": 0.11595678329467773,
      "step": 26688
    },
    {
      "epoch": 0.000162890625,
      "step": 26688,
      "training_step_time": 0.4001131057739258
    },
    {
      "epoch": 0.000162896728515625,
      "model_forward_time": 0.1156165599822998,
      "step": 26689
    },
    {
      "epoch": 0.000162896728515625,
      "step": 26689,
      "training_step_time": 0.395266056060791
    },
    {
      "epoch": 0.00016290283203125,
      "grad_norm": 0.16621024906635284,
      "learning_rate": 6.310059412392505e-05,
      "loss": 0.0448,
      "step": 26690
    },
    {
      "epoch": 0.00016290283203125,
      "model_forward_time": 0.11643409729003906,
      "step": 26690
    },
    {
      "epoch": 0.00016290283203125,
      "step": 26690,
      "training_step_time": 0.584038257598877
    },
    {
      "epoch": 0.000162908935546875,
      "model_forward_time": 0.11514973640441895,
      "step": 26691
    },
    {
      "epoch": 0.000162908935546875,
      "step": 26691,
      "training_step_time": 0.44152331352233887
    },
    {
      "epoch": 0.0001629150390625,
      "model_forward_time": 0.11596274375915527,
      "step": 26692
    },
    {
      "epoch": 0.0001629150390625,
      "step": 26692,
      "training_step_time": 0.4437112808227539
    },
    {
      "epoch": 0.000162921142578125,
      "model_forward_time": 0.11518621444702148,
      "step": 26693
    },
    {
      "epoch": 0.000162921142578125,
      "step": 26693,
      "training_step_time": 0.36412858963012695
    },
    {
      "epoch": 0.00016292724609375,
      "model_forward_time": 0.11517763137817383,
      "step": 26694
    },
    {
      "epoch": 0.00016292724609375,
      "step": 26694,
      "training_step_time": 0.4220695495605469
    },
    {
      "epoch": 0.000162933349609375,
      "model_forward_time": 0.11503791809082031,
      "step": 26695
    },
    {
      "epoch": 0.000162933349609375,
      "step": 26695,
      "training_step_time": 0.4173085689544678
    },
    {
      "epoch": 0.000162939453125,
      "model_forward_time": 0.11577081680297852,
      "step": 26696
    },
    {
      "epoch": 0.000162939453125,
      "step": 26696,
      "training_step_time": 0.4278416633605957
    },
    {
      "epoch": 0.000162945556640625,
      "model_forward_time": 0.11501288414001465,
      "step": 26697
    },
    {
      "epoch": 0.000162945556640625,
      "step": 26697,
      "training_step_time": 0.4643282890319824
    },
    {
      "epoch": 0.00016295166015625,
      "model_forward_time": 0.1165013313293457,
      "step": 26698
    },
    {
      "epoch": 0.00016295166015625,
      "step": 26698,
      "training_step_time": 0.4106411933898926
    },
    {
      "epoch": 0.000162957763671875,
      "model_forward_time": 0.11482405662536621,
      "step": 26699
    },
    {
      "epoch": 0.000162957763671875,
      "step": 26699,
      "training_step_time": 0.39064669609069824
    },
    {
      "epoch": 0.0001629638671875,
      "grad_norm": 0.11961454898118973,
      "learning_rate": 6.307399704769099e-05,
      "loss": 0.0474,
      "step": 26700
    },
    {
      "epoch": 0.0001629638671875,
      "model_forward_time": 0.11518406867980957,
      "step": 26700
    },
    {
      "epoch": 0.0001629638671875,
      "step": 26700,
      "training_step_time": 0.40204834938049316
    },
    {
      "epoch": 0.000162969970703125,
      "model_forward_time": 0.11516833305358887,
      "step": 26701
    },
    {
      "epoch": 0.000162969970703125,
      "step": 26701,
      "training_step_time": 0.4012892246246338
    },
    {
      "epoch": 0.00016297607421875,
      "model_forward_time": 0.11517119407653809,
      "step": 26702
    },
    {
      "epoch": 0.00016297607421875,
      "step": 26702,
      "training_step_time": 0.5198535919189453
    },
    {
      "epoch": 0.000162982177734375,
      "model_forward_time": 0.11539196968078613,
      "step": 26703
    },
    {
      "epoch": 0.000162982177734375,
      "step": 26703,
      "training_step_time": 0.3967607021331787
    },
    {
      "epoch": 0.00016298828125,
      "model_forward_time": 0.11624383926391602,
      "step": 26704
    },
    {
      "epoch": 0.00016298828125,
      "step": 26704,
      "training_step_time": 0.4898810386657715
    },
    {
      "epoch": 0.000162994384765625,
      "model_forward_time": 0.11536431312561035,
      "step": 26705
    },
    {
      "epoch": 0.000162994384765625,
      "step": 26705,
      "training_step_time": 0.4364142417907715
    },
    {
      "epoch": 0.00016300048828125,
      "model_forward_time": 0.11518740653991699,
      "step": 26706
    },
    {
      "epoch": 0.00016300048828125,
      "step": 26706,
      "training_step_time": 0.4816906452178955
    },
    {
      "epoch": 0.000163006591796875,
      "model_forward_time": 0.11504077911376953,
      "step": 26707
    },
    {
      "epoch": 0.000163006591796875,
      "step": 26707,
      "training_step_time": 0.36766600608825684
    },
    {
      "epoch": 0.0001630126953125,
      "model_forward_time": 0.11498379707336426,
      "step": 26708
    },
    {
      "epoch": 0.0001630126953125,
      "step": 26708,
      "training_step_time": 0.48296380043029785
    },
    {
      "epoch": 0.000163018798828125,
      "model_forward_time": 0.11526799201965332,
      "step": 26709
    },
    {
      "epoch": 0.000163018798828125,
      "step": 26709,
      "training_step_time": 0.47585391998291016
    },
    {
      "epoch": 0.00016302490234375,
      "grad_norm": 0.13904130458831787,
      "learning_rate": 6.304739599992183e-05,
      "loss": 0.0492,
      "step": 26710
    },
    {
      "epoch": 0.00016302490234375,
      "model_forward_time": 0.11457943916320801,
      "step": 26710
    },
    {
      "epoch": 0.00016302490234375,
      "step": 26710,
      "training_step_time": 0.40504884719848633
    },
    {
      "epoch": 0.000163031005859375,
      "model_forward_time": 0.11495804786682129,
      "step": 26711
    },
    {
      "epoch": 0.000163031005859375,
      "step": 26711,
      "training_step_time": 0.42154622077941895
    },
    {
      "epoch": 0.000163037109375,
      "model_forward_time": 0.11508059501647949,
      "step": 26712
    },
    {
      "epoch": 0.000163037109375,
      "step": 26712,
      "training_step_time": 0.40789318084716797
    },
    {
      "epoch": 0.000163043212890625,
      "model_forward_time": 0.11521029472351074,
      "step": 26713
    },
    {
      "epoch": 0.000163043212890625,
      "step": 26713,
      "training_step_time": 0.3970661163330078
    },
    {
      "epoch": 0.00016304931640625,
      "model_forward_time": 0.11499500274658203,
      "step": 26714
    },
    {
      "epoch": 0.00016304931640625,
      "step": 26714,
      "training_step_time": 0.4405217170715332
    },
    {
      "epoch": 0.000163055419921875,
      "model_forward_time": 0.11608171463012695,
      "step": 26715
    },
    {
      "epoch": 0.000163055419921875,
      "step": 26715,
      "training_step_time": 0.3956315517425537
    },
    {
      "epoch": 0.0001630615234375,
      "model_forward_time": 0.11526942253112793,
      "step": 26716
    },
    {
      "epoch": 0.0001630615234375,
      "step": 26716,
      "training_step_time": 0.40397024154663086
    },
    {
      "epoch": 0.000163067626953125,
      "model_forward_time": 0.11540389060974121,
      "step": 26717
    },
    {
      "epoch": 0.000163067626953125,
      "step": 26717,
      "training_step_time": 0.3932309150695801
    },
    {
      "epoch": 0.00016307373046875,
      "model_forward_time": 0.11545276641845703,
      "step": 26718
    },
    {
      "epoch": 0.00016307373046875,
      "step": 26718,
      "training_step_time": 0.4486372470855713
    },
    {
      "epoch": 0.000163079833984375,
      "model_forward_time": 0.11469602584838867,
      "step": 26719
    },
    {
      "epoch": 0.000163079833984375,
      "step": 26719,
      "training_step_time": 0.4013824462890625
    },
    {
      "epoch": 0.0001630859375,
      "grad_norm": 0.13738778233528137,
      "learning_rate": 6.302079098869824e-05,
      "loss": 0.0497,
      "step": 26720
    },
    {
      "epoch": 0.0001630859375,
      "model_forward_time": 0.11508703231811523,
      "step": 26720
    },
    {
      "epoch": 0.0001630859375,
      "step": 26720,
      "training_step_time": 0.559359073638916
    },
    {
      "epoch": 0.000163092041015625,
      "model_forward_time": 0.11481094360351562,
      "step": 26721
    },
    {
      "epoch": 0.000163092041015625,
      "step": 26721,
      "training_step_time": 0.40121936798095703
    },
    {
      "epoch": 0.00016309814453125,
      "model_forward_time": 0.11586952209472656,
      "step": 26722
    },
    {
      "epoch": 0.00016309814453125,
      "step": 26722,
      "training_step_time": 0.4163978099822998
    },
    {
      "epoch": 0.000163104248046875,
      "model_forward_time": 0.11539435386657715,
      "step": 26723
    },
    {
      "epoch": 0.000163104248046875,
      "step": 26723,
      "training_step_time": 0.41878724098205566
    },
    {
      "epoch": 0.0001631103515625,
      "model_forward_time": 0.11543560028076172,
      "step": 26724
    },
    {
      "epoch": 0.0001631103515625,
      "step": 26724,
      "training_step_time": 0.4561021327972412
    },
    {
      "epoch": 0.000163116455078125,
      "model_forward_time": 0.11500144004821777,
      "step": 26725
    },
    {
      "epoch": 0.000163116455078125,
      "step": 26725,
      "training_step_time": 0.46614694595336914
    },
    {
      "epoch": 0.00016312255859375,
      "model_forward_time": 0.11780977249145508,
      "step": 26726
    },
    {
      "epoch": 0.00016312255859375,
      "step": 26726,
      "training_step_time": 0.4982309341430664
    },
    {
      "epoch": 0.000163128662109375,
      "model_forward_time": 0.11526131629943848,
      "step": 26727
    },
    {
      "epoch": 0.000163128662109375,
      "step": 26727,
      "training_step_time": 0.38633131980895996
    },
    {
      "epoch": 0.000163134765625,
      "model_forward_time": 0.11469054222106934,
      "step": 26728
    },
    {
      "epoch": 0.000163134765625,
      "step": 26728,
      "training_step_time": 0.3988192081451416
    },
    {
      "epoch": 0.000163140869140625,
      "model_forward_time": 0.11495137214660645,
      "step": 26729
    },
    {
      "epoch": 0.000163140869140625,
      "step": 26729,
      "training_step_time": 0.3913416862487793
    },
    {
      "epoch": 0.00016314697265625,
      "grad_norm": 0.14200367033481598,
      "learning_rate": 6.299418202210214e-05,
      "loss": 0.0421,
      "step": 26730
    },
    {
      "epoch": 0.00016314697265625,
      "model_forward_time": 0.11548757553100586,
      "step": 26730
    },
    {
      "epoch": 0.00016314697265625,
      "step": 26730,
      "training_step_time": 0.39671874046325684
    },
    {
      "epoch": 0.000163153076171875,
      "model_forward_time": 0.11498260498046875,
      "step": 26731
    },
    {
      "epoch": 0.000163153076171875,
      "step": 26731,
      "training_step_time": 0.3870093822479248
    },
    {
      "epoch": 0.0001631591796875,
      "model_forward_time": 0.11570477485656738,
      "step": 26732
    },
    {
      "epoch": 0.0001631591796875,
      "step": 26732,
      "training_step_time": 0.637664794921875
    },
    {
      "epoch": 0.000163165283203125,
      "model_forward_time": 0.11484408378601074,
      "step": 26733
    },
    {
      "epoch": 0.000163165283203125,
      "step": 26733,
      "training_step_time": 0.39287567138671875
    },
    {
      "epoch": 0.00016317138671875,
      "model_forward_time": 0.1155996322631836,
      "step": 26734
    },
    {
      "epoch": 0.00016317138671875,
      "step": 26734,
      "training_step_time": 0.4993302822113037
    },
    {
      "epoch": 0.000163177490234375,
      "model_forward_time": 0.11468839645385742,
      "step": 26735
    },
    {
      "epoch": 0.000163177490234375,
      "step": 26735,
      "training_step_time": 0.38749051094055176
    },
    {
      "epoch": 0.00016318359375,
      "model_forward_time": 0.1150672435760498,
      "step": 26736
    },
    {
      "epoch": 0.00016318359375,
      "step": 26736,
      "training_step_time": 0.40596771240234375
    },
    {
      "epoch": 0.000163189697265625,
      "model_forward_time": 0.11521077156066895,
      "step": 26737
    },
    {
      "epoch": 0.000163189697265625,
      "step": 26737,
      "training_step_time": 0.48267388343811035
    },
    {
      "epoch": 0.00016319580078125,
      "model_forward_time": 0.11473464965820312,
      "step": 26738
    },
    {
      "epoch": 0.00016319580078125,
      "step": 26738,
      "training_step_time": 0.42859554290771484
    },
    {
      "epoch": 0.000163201904296875,
      "model_forward_time": 0.11536097526550293,
      "step": 26739
    },
    {
      "epoch": 0.000163201904296875,
      "step": 26739,
      "training_step_time": 0.3855862617492676
    },
    {
      "epoch": 0.0001632080078125,
      "grad_norm": 0.12634314596652985,
      "learning_rate": 6.296756910821666e-05,
      "loss": 0.0421,
      "step": 26740
    },
    {
      "epoch": 0.0001632080078125,
      "model_forward_time": 0.11520910263061523,
      "step": 26740
    },
    {
      "epoch": 0.0001632080078125,
      "step": 26740,
      "training_step_time": 0.39139580726623535
    },
    {
      "epoch": 0.000163214111328125,
      "model_forward_time": 0.11573576927185059,
      "step": 26741
    },
    {
      "epoch": 0.000163214111328125,
      "step": 26741,
      "training_step_time": 0.3903534412384033
    },
    {
      "epoch": 0.00016322021484375,
      "model_forward_time": 0.1157376766204834,
      "step": 26742
    },
    {
      "epoch": 0.00016322021484375,
      "step": 26742,
      "training_step_time": 0.4011862277984619
    },
    {
      "epoch": 0.000163226318359375,
      "model_forward_time": 0.11617636680603027,
      "step": 26743
    },
    {
      "epoch": 0.000163226318359375,
      "step": 26743,
      "training_step_time": 0.4006376266479492
    },
    {
      "epoch": 0.000163232421875,
      "model_forward_time": 0.11570477485656738,
      "step": 26744
    },
    {
      "epoch": 0.000163232421875,
      "step": 26744,
      "training_step_time": 0.7062063217163086
    },
    {
      "epoch": 0.000163238525390625,
      "model_forward_time": 0.11515688896179199,
      "step": 26745
    },
    {
      "epoch": 0.000163238525390625,
      "step": 26745,
      "training_step_time": 0.38278889656066895
    },
    {
      "epoch": 0.00016324462890625,
      "model_forward_time": 0.11470222473144531,
      "step": 26746
    },
    {
      "epoch": 0.00016324462890625,
      "step": 26746,
      "training_step_time": 0.38628244400024414
    },
    {
      "epoch": 0.000163250732421875,
      "model_forward_time": 0.11466622352600098,
      "step": 26747
    },
    {
      "epoch": 0.000163250732421875,
      "step": 26747,
      "training_step_time": 0.45718908309936523
    },
    {
      "epoch": 0.0001632568359375,
      "model_forward_time": 0.11544513702392578,
      "step": 26748
    },
    {
      "epoch": 0.0001632568359375,
      "step": 26748,
      "training_step_time": 0.4621541500091553
    },
    {
      "epoch": 0.000163262939453125,
      "model_forward_time": 0.11523866653442383,
      "step": 26749
    },
    {
      "epoch": 0.000163262939453125,
      "step": 26749,
      "training_step_time": 0.4027681350708008
    },
    {
      "epoch": 0.00016326904296875,
      "grad_norm": 0.17461718618869781,
      "learning_rate": 6.294095225512603e-05,
      "loss": 0.0448,
      "step": 26750
    },
    {
      "epoch": 0.00016326904296875,
      "model_forward_time": 0.1157078742980957,
      "step": 26750
    },
    {
      "epoch": 0.00016326904296875,
      "step": 26750,
      "training_step_time": 0.4904599189758301
    },
    {
      "epoch": 0.000163275146484375,
      "model_forward_time": 0.11525392532348633,
      "step": 26751
    },
    {
      "epoch": 0.000163275146484375,
      "step": 26751,
      "training_step_time": 0.5086870193481445
    },
    {
      "epoch": 0.00016328125,
      "model_forward_time": 0.11507868766784668,
      "step": 26752
    },
    {
      "epoch": 0.00016328125,
      "step": 26752,
      "training_step_time": 0.42714548110961914
    },
    {
      "epoch": 0.000163287353515625,
      "model_forward_time": 0.11542344093322754,
      "step": 26753
    },
    {
      "epoch": 0.000163287353515625,
      "step": 26753,
      "training_step_time": 0.4700284004211426
    },
    {
      "epoch": 0.00016329345703125,
      "model_forward_time": 0.11513638496398926,
      "step": 26754
    },
    {
      "epoch": 0.00016329345703125,
      "step": 26754,
      "training_step_time": 0.39447665214538574
    },
    {
      "epoch": 0.000163299560546875,
      "model_forward_time": 0.11499905586242676,
      "step": 26755
    },
    {
      "epoch": 0.000163299560546875,
      "step": 26755,
      "training_step_time": 0.39919209480285645
    },
    {
      "epoch": 0.0001633056640625,
      "model_forward_time": 0.11521005630493164,
      "step": 26756
    },
    {
      "epoch": 0.0001633056640625,
      "step": 26756,
      "training_step_time": 0.4083726406097412
    },
    {
      "epoch": 0.000163311767578125,
      "model_forward_time": 0.11548352241516113,
      "step": 26757
    },
    {
      "epoch": 0.000163311767578125,
      "step": 26757,
      "training_step_time": 0.3903689384460449
    },
    {
      "epoch": 0.00016331787109375,
      "model_forward_time": 0.11517572402954102,
      "step": 26758
    },
    {
      "epoch": 0.00016331787109375,
      "step": 26758,
      "training_step_time": 0.4055356979370117
    },
    {
      "epoch": 0.000163323974609375,
      "model_forward_time": 0.11560368537902832,
      "step": 26759
    },
    {
      "epoch": 0.000163323974609375,
      "step": 26759,
      "training_step_time": 0.39586925506591797
    },
    {
      "epoch": 0.000163330078125,
      "grad_norm": 0.17674323916435242,
      "learning_rate": 6.291433147091583e-05,
      "loss": 0.0457,
      "step": 26760
    },
    {
      "epoch": 0.000163330078125,
      "model_forward_time": 0.11515927314758301,
      "step": 26760
    },
    {
      "epoch": 0.000163330078125,
      "step": 26760,
      "training_step_time": 0.41673874855041504
    },
    {
      "epoch": 0.000163336181640625,
      "model_forward_time": 0.11657261848449707,
      "step": 26761
    },
    {
      "epoch": 0.000163336181640625,
      "step": 26761,
      "training_step_time": 0.4361917972564697
    },
    {
      "epoch": 0.00016334228515625,
      "model_forward_time": 0.11586403846740723,
      "step": 26762
    },
    {
      "epoch": 0.00016334228515625,
      "step": 26762,
      "training_step_time": 0.6385471820831299
    },
    {
      "epoch": 0.000163348388671875,
      "model_forward_time": 0.11529016494750977,
      "step": 26763
    },
    {
      "epoch": 0.000163348388671875,
      "step": 26763,
      "training_step_time": 0.3838009834289551
    },
    {
      "epoch": 0.0001633544921875,
      "model_forward_time": 0.11540412902832031,
      "step": 26764
    },
    {
      "epoch": 0.0001633544921875,
      "step": 26764,
      "training_step_time": 0.36597418785095215
    },
    {
      "epoch": 0.000163360595703125,
      "model_forward_time": 0.11592888832092285,
      "step": 26765
    },
    {
      "epoch": 0.000163360595703125,
      "step": 26765,
      "training_step_time": 0.45076847076416016
    },
    {
      "epoch": 0.00016336669921875,
      "model_forward_time": 0.11511826515197754,
      "step": 26766
    },
    {
      "epoch": 0.00016336669921875,
      "step": 26766,
      "training_step_time": 0.399294376373291
    },
    {
      "epoch": 0.000163372802734375,
      "model_forward_time": 0.11449623107910156,
      "step": 26767
    },
    {
      "epoch": 0.000163372802734375,
      "step": 26767,
      "training_step_time": 0.3980889320373535
    },
    {
      "epoch": 0.00016337890625,
      "model_forward_time": 0.11513280868530273,
      "step": 26768
    },
    {
      "epoch": 0.00016337890625,
      "step": 26768,
      "training_step_time": 0.5344128608703613
    },
    {
      "epoch": 0.000163385009765625,
      "model_forward_time": 0.11472487449645996,
      "step": 26769
    },
    {
      "epoch": 0.000163385009765625,
      "step": 26769,
      "training_step_time": 0.381350040435791
    },
    {
      "epoch": 0.00016339111328125,
      "grad_norm": 0.14769326150417328,
      "learning_rate": 6.28877067636727e-05,
      "loss": 0.044,
      "step": 26770
    },
    {
      "epoch": 0.00016339111328125,
      "model_forward_time": 0.11587786674499512,
      "step": 26770
    },
    {
      "epoch": 0.00016339111328125,
      "step": 26770,
      "training_step_time": 0.38442134857177734
    },
    {
      "epoch": 0.000163397216796875,
      "model_forward_time": 0.11513543128967285,
      "step": 26771
    },
    {
      "epoch": 0.000163397216796875,
      "step": 26771,
      "training_step_time": 0.3943295478820801
    },
    {
      "epoch": 0.0001634033203125,
      "model_forward_time": 0.11614656448364258,
      "step": 26772
    },
    {
      "epoch": 0.0001634033203125,
      "step": 26772,
      "training_step_time": 0.3820946216583252
    },
    {
      "epoch": 0.000163409423828125,
      "model_forward_time": 0.1148066520690918,
      "step": 26773
    },
    {
      "epoch": 0.000163409423828125,
      "step": 26773,
      "training_step_time": 0.394817590713501
    },
    {
      "epoch": 0.00016341552734375,
      "model_forward_time": 0.11553478240966797,
      "step": 26774
    },
    {
      "epoch": 0.00016341552734375,
      "step": 26774,
      "training_step_time": 0.6690642833709717
    },
    {
      "epoch": 0.000163421630859375,
      "model_forward_time": 0.11493158340454102,
      "step": 26775
    },
    {
      "epoch": 0.000163421630859375,
      "step": 26775,
      "training_step_time": 0.3942222595214844
    },
    {
      "epoch": 0.000163427734375,
      "model_forward_time": 0.1185460090637207,
      "step": 26776
    },
    {
      "epoch": 0.000163427734375,
      "step": 26776,
      "training_step_time": 0.4438347816467285
    },
    {
      "epoch": 0.000163433837890625,
      "model_forward_time": 0.11673188209533691,
      "step": 26777
    },
    {
      "epoch": 0.000163433837890625,
      "step": 26777,
      "training_step_time": 0.39522719383239746
    },
    {
      "epoch": 0.00016343994140625,
      "model_forward_time": 0.11481404304504395,
      "step": 26778
    },
    {
      "epoch": 0.00016343994140625,
      "step": 26778,
      "training_step_time": 0.36623120307922363
    },
    {
      "epoch": 0.000163446044921875,
      "model_forward_time": 0.1147303581237793,
      "step": 26779
    },
    {
      "epoch": 0.000163446044921875,
      "step": 26779,
      "training_step_time": 0.4203007221221924
    },
    {
      "epoch": 0.0001634521484375,
      "grad_norm": 0.18144109845161438,
      "learning_rate": 6.286107814148454e-05,
      "loss": 0.0431,
      "step": 26780
    },
    {
      "epoch": 0.0001634521484375,
      "model_forward_time": 0.1153721809387207,
      "step": 26780
    },
    {
      "epoch": 0.0001634521484375,
      "step": 26780,
      "training_step_time": 0.4453275203704834
    },
    {
      "epoch": 0.000163458251953125,
      "model_forward_time": 0.11591315269470215,
      "step": 26781
    },
    {
      "epoch": 0.000163458251953125,
      "step": 26781,
      "training_step_time": 0.39647555351257324
    },
    {
      "epoch": 0.00016346435546875,
      "model_forward_time": 0.11591839790344238,
      "step": 26782
    },
    {
      "epoch": 0.00016346435546875,
      "step": 26782,
      "training_step_time": 0.4242370128631592
    },
    {
      "epoch": 0.000163470458984375,
      "model_forward_time": 0.11618900299072266,
      "step": 26783
    },
    {
      "epoch": 0.000163470458984375,
      "step": 26783,
      "training_step_time": 0.38675904273986816
    },
    {
      "epoch": 0.0001634765625,
      "model_forward_time": 0.11574077606201172,
      "step": 26784
    },
    {
      "epoch": 0.0001634765625,
      "step": 26784,
      "training_step_time": 0.3973274230957031
    },
    {
      "epoch": 0.000163482666015625,
      "model_forward_time": 0.11576318740844727,
      "step": 26785
    },
    {
      "epoch": 0.000163482666015625,
      "step": 26785,
      "training_step_time": 0.39395809173583984
    },
    {
      "epoch": 0.00016348876953125,
      "model_forward_time": 0.1148982048034668,
      "step": 26786
    },
    {
      "epoch": 0.00016348876953125,
      "step": 26786,
      "training_step_time": 0.6223833560943604
    },
    {
      "epoch": 0.000163494873046875,
      "model_forward_time": 0.1148531436920166,
      "step": 26787
    },
    {
      "epoch": 0.000163494873046875,
      "step": 26787,
      "training_step_time": 0.42054057121276855
    },
    {
      "epoch": 0.0001635009765625,
      "model_forward_time": 0.11562561988830566,
      "step": 26788
    },
    {
      "epoch": 0.0001635009765625,
      "step": 26788,
      "training_step_time": 0.4162411689758301
    },
    {
      "epoch": 0.000163507080078125,
      "model_forward_time": 0.11535906791687012,
      "step": 26789
    },
    {
      "epoch": 0.000163507080078125,
      "step": 26789,
      "training_step_time": 0.40883660316467285
    },
    {
      "epoch": 0.00016351318359375,
      "grad_norm": 0.1159949079155922,
      "learning_rate": 6.283444561244042e-05,
      "loss": 0.0455,
      "step": 26790
    },
    {
      "epoch": 0.00016351318359375,
      "model_forward_time": 0.11519861221313477,
      "step": 26790
    },
    {
      "epoch": 0.00016351318359375,
      "step": 26790,
      "training_step_time": 0.47690367698669434
    },
    {
      "epoch": 0.000163519287109375,
      "model_forward_time": 0.11509895324707031,
      "step": 26791
    },
    {
      "epoch": 0.000163519287109375,
      "step": 26791,
      "training_step_time": 0.3892629146575928
    },
    {
      "epoch": 0.000163525390625,
      "model_forward_time": 0.1156466007232666,
      "step": 26792
    },
    {
      "epoch": 0.000163525390625,
      "step": 26792,
      "training_step_time": 0.4572408199310303
    },
    {
      "epoch": 0.000163531494140625,
      "model_forward_time": 0.1152803897857666,
      "step": 26793
    },
    {
      "epoch": 0.000163531494140625,
      "step": 26793,
      "training_step_time": 0.44045233726501465
    },
    {
      "epoch": 0.00016353759765625,
      "model_forward_time": 0.11518692970275879,
      "step": 26794
    },
    {
      "epoch": 0.00016353759765625,
      "step": 26794,
      "training_step_time": 0.4277527332305908
    },
    {
      "epoch": 0.000163543701171875,
      "model_forward_time": 0.11611509323120117,
      "step": 26795
    },
    {
      "epoch": 0.000163543701171875,
      "step": 26795,
      "training_step_time": 0.47679829597473145
    },
    {
      "epoch": 0.0001635498046875,
      "model_forward_time": 0.11510705947875977,
      "step": 26796
    },
    {
      "epoch": 0.0001635498046875,
      "step": 26796,
      "training_step_time": 0.38691210746765137
    },
    {
      "epoch": 0.000163555908203125,
      "model_forward_time": 0.11485719680786133,
      "step": 26797
    },
    {
      "epoch": 0.000163555908203125,
      "step": 26797,
      "training_step_time": 0.3924422264099121
    },
    {
      "epoch": 0.00016356201171875,
      "model_forward_time": 0.11595296859741211,
      "step": 26798
    },
    {
      "epoch": 0.00016356201171875,
      "step": 26798,
      "training_step_time": 0.5261969566345215
    },
    {
      "epoch": 0.000163568115234375,
      "model_forward_time": 0.1152341365814209,
      "step": 26799
    },
    {
      "epoch": 0.000163568115234375,
      "step": 26799,
      "training_step_time": 0.39366793632507324
    },
    {
      "epoch": 0.00016357421875,
      "grad_norm": 0.09807555377483368,
      "learning_rate": 6.280780918463057e-05,
      "loss": 0.0469,
      "step": 26800
    },
    {
      "epoch": 0.00016357421875,
      "model_forward_time": 0.11471128463745117,
      "step": 26800
    },
    {
      "epoch": 0.00016357421875,
      "step": 26800,
      "training_step_time": 0.4197351932525635
    },
    {
      "epoch": 0.000163580322265625,
      "model_forward_time": 0.11531758308410645,
      "step": 26801
    },
    {
      "epoch": 0.000163580322265625,
      "step": 26801,
      "training_step_time": 0.38423585891723633
    },
    {
      "epoch": 0.00016358642578125,
      "model_forward_time": 0.11540532112121582,
      "step": 26802
    },
    {
      "epoch": 0.00016358642578125,
      "step": 26802,
      "training_step_time": 0.49327969551086426
    },
    {
      "epoch": 0.000163592529296875,
      "model_forward_time": 0.11558818817138672,
      "step": 26803
    },
    {
      "epoch": 0.000163592529296875,
      "step": 26803,
      "training_step_time": 0.4384572505950928
    },
    {
      "epoch": 0.0001635986328125,
      "model_forward_time": 0.11521530151367188,
      "step": 26804
    },
    {
      "epoch": 0.0001635986328125,
      "step": 26804,
      "training_step_time": 0.5365684032440186
    },
    {
      "epoch": 0.000163604736328125,
      "model_forward_time": 0.11530804634094238,
      "step": 26805
    },
    {
      "epoch": 0.000163604736328125,
      "step": 26805,
      "training_step_time": 0.40316247940063477
    },
    {
      "epoch": 0.00016361083984375,
      "model_forward_time": 0.11463165283203125,
      "step": 26806
    },
    {
      "epoch": 0.00016361083984375,
      "step": 26806,
      "training_step_time": 0.36443185806274414
    },
    {
      "epoch": 0.000163616943359375,
      "model_forward_time": 0.11469697952270508,
      "step": 26807
    },
    {
      "epoch": 0.000163616943359375,
      "step": 26807,
      "training_step_time": 0.44432711601257324
    },
    {
      "epoch": 0.000163623046875,
      "model_forward_time": 0.1148676872253418,
      "step": 26808
    },
    {
      "epoch": 0.000163623046875,
      "step": 26808,
      "training_step_time": 0.4215519428253174
    },
    {
      "epoch": 0.000163629150390625,
      "model_forward_time": 0.11493563652038574,
      "step": 26809
    },
    {
      "epoch": 0.000163629150390625,
      "step": 26809,
      "training_step_time": 0.4795966148376465
    },
    {
      "epoch": 0.00016363525390625,
      "grad_norm": 0.1600031852722168,
      "learning_rate": 6.278116886614648e-05,
      "loss": 0.0467,
      "step": 26810
    },
    {
      "epoch": 0.00016363525390625,
      "model_forward_time": 0.11538839340209961,
      "step": 26810
    },
    {
      "epoch": 0.00016363525390625,
      "step": 26810,
      "training_step_time": 0.3921384811401367
    },
    {
      "epoch": 0.000163641357421875,
      "model_forward_time": 0.1146092414855957,
      "step": 26811
    },
    {
      "epoch": 0.000163641357421875,
      "step": 26811,
      "training_step_time": 0.3967299461364746
    },
    {
      "epoch": 0.0001636474609375,
      "model_forward_time": 0.11571764945983887,
      "step": 26812
    },
    {
      "epoch": 0.0001636474609375,
      "step": 26812,
      "training_step_time": 0.3901798725128174
    },
    {
      "epoch": 0.000163653564453125,
      "model_forward_time": 0.11490702629089355,
      "step": 26813
    },
    {
      "epoch": 0.000163653564453125,
      "step": 26813,
      "training_step_time": 0.39462804794311523
    },
    {
      "epoch": 0.00016365966796875,
      "model_forward_time": 0.11604142189025879,
      "step": 26814
    },
    {
      "epoch": 0.00016365966796875,
      "step": 26814,
      "training_step_time": 0.4085419178009033
    },
    {
      "epoch": 0.000163665771484375,
      "model_forward_time": 0.1154165267944336,
      "step": 26815
    },
    {
      "epoch": 0.000163665771484375,
      "step": 26815,
      "training_step_time": 0.40536975860595703
    },
    {
      "epoch": 0.000163671875,
      "model_forward_time": 0.11610746383666992,
      "step": 26816
    },
    {
      "epoch": 0.000163671875,
      "step": 26816,
      "training_step_time": 0.6660177707672119
    },
    {
      "epoch": 0.000163677978515625,
      "model_forward_time": 0.11480093002319336,
      "step": 26817
    },
    {
      "epoch": 0.000163677978515625,
      "step": 26817,
      "training_step_time": 0.4694504737854004
    },
    {
      "epoch": 0.00016368408203125,
      "model_forward_time": 0.11502671241760254,
      "step": 26818
    },
    {
      "epoch": 0.00016368408203125,
      "step": 26818,
      "training_step_time": 0.39057159423828125
    },
    {
      "epoch": 0.000163690185546875,
      "model_forward_time": 0.11486935615539551,
      "step": 26819
    },
    {
      "epoch": 0.000163690185546875,
      "step": 26819,
      "training_step_time": 0.3922078609466553
    },
    {
      "epoch": 0.0001636962890625,
      "grad_norm": 0.12574411928653717,
      "learning_rate": 6.275452466508077e-05,
      "loss": 0.0444,
      "step": 26820
    },
    {
      "epoch": 0.0001636962890625,
      "model_forward_time": 0.11538362503051758,
      "step": 26820
    },
    {
      "epoch": 0.0001636962890625,
      "step": 26820,
      "training_step_time": 0.36803245544433594
    },
    {
      "epoch": 0.000163702392578125,
      "model_forward_time": 0.1146547794342041,
      "step": 26821
    },
    {
      "epoch": 0.000163702392578125,
      "step": 26821,
      "training_step_time": 0.40404844284057617
    },
    {
      "epoch": 0.00016370849609375,
      "model_forward_time": 0.11549735069274902,
      "step": 26822
    },
    {
      "epoch": 0.00016370849609375,
      "step": 26822,
      "training_step_time": 0.5538442134857178
    },
    {
      "epoch": 0.000163714599609375,
      "model_forward_time": 0.11521005630493164,
      "step": 26823
    },
    {
      "epoch": 0.000163714599609375,
      "step": 26823,
      "training_step_time": 0.4154694080352783
    },
    {
      "epoch": 0.000163720703125,
      "model_forward_time": 0.11548233032226562,
      "step": 26824
    },
    {
      "epoch": 0.000163720703125,
      "step": 26824,
      "training_step_time": 0.3837869167327881
    },
    {
      "epoch": 0.000163726806640625,
      "model_forward_time": 0.11498832702636719,
      "step": 26825
    },
    {
      "epoch": 0.000163726806640625,
      "step": 26825,
      "training_step_time": 0.3933863639831543
    },
    {
      "epoch": 0.00016373291015625,
      "model_forward_time": 0.11660289764404297,
      "step": 26826
    },
    {
      "epoch": 0.00016373291015625,
      "step": 26826,
      "training_step_time": 0.3930323123931885
    },
    {
      "epoch": 0.000163739013671875,
      "model_forward_time": 0.1150364875793457,
      "step": 26827
    },
    {
      "epoch": 0.000163739013671875,
      "step": 26827,
      "training_step_time": 0.4322183132171631
    },
    {
      "epoch": 0.0001637451171875,
      "model_forward_time": 0.11568236351013184,
      "step": 26828
    },
    {
      "epoch": 0.0001637451171875,
      "step": 26828,
      "training_step_time": 0.6204679012298584
    },
    {
      "epoch": 0.000163751220703125,
      "model_forward_time": 0.11553049087524414,
      "step": 26829
    },
    {
      "epoch": 0.000163751220703125,
      "step": 26829,
      "training_step_time": 0.45218706130981445
    },
    {
      "epoch": 0.00016375732421875,
      "grad_norm": 0.14589379727840424,
      "learning_rate": 6.27278765895272e-05,
      "loss": 0.0459,
      "step": 26830
    },
    {
      "epoch": 0.00016375732421875,
      "model_forward_time": 0.11470460891723633,
      "step": 26830
    },
    {
      "epoch": 0.00016375732421875,
      "step": 26830,
      "training_step_time": 0.41449475288391113
    },
    {
      "epoch": 0.000163763427734375,
      "model_forward_time": 0.11513805389404297,
      "step": 26831
    },
    {
      "epoch": 0.000163763427734375,
      "step": 26831,
      "training_step_time": 0.4713168144226074
    },
    {
      "epoch": 0.00016376953125,
      "model_forward_time": 0.11503148078918457,
      "step": 26832
    },
    {
      "epoch": 0.00016376953125,
      "step": 26832,
      "training_step_time": 0.4007260799407959
    },
    {
      "epoch": 0.000163775634765625,
      "model_forward_time": 0.1143956184387207,
      "step": 26833
    },
    {
      "epoch": 0.000163775634765625,
      "step": 26833,
      "training_step_time": 0.39672303199768066
    },
    {
      "epoch": 0.00016378173828125,
      "model_forward_time": 0.11545944213867188,
      "step": 26834
    },
    {
      "epoch": 0.00016378173828125,
      "step": 26834,
      "training_step_time": 0.39606165885925293
    },
    {
      "epoch": 0.000163787841796875,
      "model_forward_time": 0.1151130199432373,
      "step": 26835
    },
    {
      "epoch": 0.000163787841796875,
      "step": 26835,
      "training_step_time": 0.41304802894592285
    },
    {
      "epoch": 0.0001637939453125,
      "model_forward_time": 0.11509966850280762,
      "step": 26836
    },
    {
      "epoch": 0.0001637939453125,
      "step": 26836,
      "training_step_time": 0.40587663650512695
    },
    {
      "epoch": 0.000163800048828125,
      "model_forward_time": 0.11501693725585938,
      "step": 26837
    },
    {
      "epoch": 0.000163800048828125,
      "step": 26837,
      "training_step_time": 0.47843289375305176
    },
    {
      "epoch": 0.00016380615234375,
      "model_forward_time": 0.11554551124572754,
      "step": 26838
    },
    {
      "epoch": 0.00016380615234375,
      "step": 26838,
      "training_step_time": 0.4004952907562256
    },
    {
      "epoch": 0.000163812255859375,
      "model_forward_time": 0.11509299278259277,
      "step": 26839
    },
    {
      "epoch": 0.000163812255859375,
      "step": 26839,
      "training_step_time": 0.39201807975769043
    },
    {
      "epoch": 0.000163818359375,
      "grad_norm": 0.1485985666513443,
      "learning_rate": 6.27012246475808e-05,
      "loss": 0.0499,
      "step": 26840
    },
    {
      "epoch": 0.000163818359375,
      "model_forward_time": 0.11539030075073242,
      "step": 26840
    },
    {
      "epoch": 0.000163818359375,
      "step": 26840,
      "training_step_time": 0.45316147804260254
    },
    {
      "epoch": 0.000163824462890625,
      "model_forward_time": 0.1152958869934082,
      "step": 26841
    },
    {
      "epoch": 0.000163824462890625,
      "step": 26841,
      "training_step_time": 0.4285159111022949
    },
    {
      "epoch": 0.00016383056640625,
      "model_forward_time": 0.11474204063415527,
      "step": 26842
    },
    {
      "epoch": 0.00016383056640625,
      "step": 26842,
      "training_step_time": 0.38646578788757324
    },
    {
      "epoch": 0.000163836669921875,
      "model_forward_time": 0.11539793014526367,
      "step": 26843
    },
    {
      "epoch": 0.000163836669921875,
      "step": 26843,
      "training_step_time": 0.41753196716308594
    },
    {
      "epoch": 0.0001638427734375,
      "model_forward_time": 0.11605024337768555,
      "step": 26844
    },
    {
      "epoch": 0.0001638427734375,
      "step": 26844,
      "training_step_time": 0.40224456787109375
    },
    {
      "epoch": 0.000163848876953125,
      "model_forward_time": 0.11533188819885254,
      "step": 26845
    },
    {
      "epoch": 0.000163848876953125,
      "step": 26845,
      "training_step_time": 0.4457418918609619
    },
    {
      "epoch": 0.00016385498046875,
      "model_forward_time": 0.11590409278869629,
      "step": 26846
    },
    {
      "epoch": 0.00016385498046875,
      "step": 26846,
      "training_step_time": 0.5401458740234375
    },
    {
      "epoch": 0.000163861083984375,
      "model_forward_time": 0.11563587188720703,
      "step": 26847
    },
    {
      "epoch": 0.000163861083984375,
      "step": 26847,
      "training_step_time": 0.3783910274505615
    },
    {
      "epoch": 0.0001638671875,
      "model_forward_time": 0.11550092697143555,
      "step": 26848
    },
    {
      "epoch": 0.0001638671875,
      "step": 26848,
      "training_step_time": 0.3770887851715088
    },
    {
      "epoch": 0.000163873291015625,
      "model_forward_time": 0.11482071876525879,
      "step": 26849
    },
    {
      "epoch": 0.000163873291015625,
      "step": 26849,
      "training_step_time": 0.4806482791900635
    },
    {
      "epoch": 0.00016387939453125,
      "grad_norm": 0.14721113443374634,
      "learning_rate": 6.26745688473377e-05,
      "loss": 0.0462,
      "step": 26850
    },
    {
      "epoch": 0.00016387939453125,
      "model_forward_time": 0.11552000045776367,
      "step": 26850
    },
    {
      "epoch": 0.00016387939453125,
      "step": 26850,
      "training_step_time": 0.46357178688049316
    },
    {
      "epoch": 0.000163885498046875,
      "model_forward_time": 0.11455416679382324,
      "step": 26851
    },
    {
      "epoch": 0.000163885498046875,
      "step": 26851,
      "training_step_time": 0.4448070526123047
    },
    {
      "epoch": 0.0001638916015625,
      "model_forward_time": 0.11493921279907227,
      "step": 26852
    },
    {
      "epoch": 0.0001638916015625,
      "step": 26852,
      "training_step_time": 0.39551377296447754
    },
    {
      "epoch": 0.000163897705078125,
      "model_forward_time": 0.11463809013366699,
      "step": 26853
    },
    {
      "epoch": 0.000163897705078125,
      "step": 26853,
      "training_step_time": 0.42535853385925293
    },
    {
      "epoch": 0.00016390380859375,
      "model_forward_time": 0.11499619483947754,
      "step": 26854
    },
    {
      "epoch": 0.00016390380859375,
      "step": 26854,
      "training_step_time": 0.38806772232055664
    },
    {
      "epoch": 0.000163909912109375,
      "model_forward_time": 0.11556553840637207,
      "step": 26855
    },
    {
      "epoch": 0.000163909912109375,
      "step": 26855,
      "training_step_time": 0.3897380828857422
    },
    {
      "epoch": 0.000163916015625,
      "model_forward_time": 0.1155095100402832,
      "step": 26856
    },
    {
      "epoch": 0.000163916015625,
      "step": 26856,
      "training_step_time": 0.3956928253173828
    },
    {
      "epoch": 0.000163922119140625,
      "model_forward_time": 0.115692138671875,
      "step": 26857
    },
    {
      "epoch": 0.000163922119140625,
      "step": 26857,
      "training_step_time": 0.512535810470581
    },
    {
      "epoch": 0.00016392822265625,
      "model_forward_time": 0.11515164375305176,
      "step": 26858
    },
    {
      "epoch": 0.00016392822265625,
      "step": 26858,
      "training_step_time": 0.4281189441680908
    },
    {
      "epoch": 0.000163934326171875,
      "model_forward_time": 0.11487102508544922,
      "step": 26859
    },
    {
      "epoch": 0.000163934326171875,
      "step": 26859,
      "training_step_time": 0.4814639091491699
    },
    {
      "epoch": 0.0001639404296875,
      "grad_norm": 0.14697040617465973,
      "learning_rate": 6.264790919689525e-05,
      "loss": 0.0499,
      "step": 26860
    },
    {
      "epoch": 0.0001639404296875,
      "model_forward_time": 0.11500263214111328,
      "step": 26860
    },
    {
      "epoch": 0.0001639404296875,
      "step": 26860,
      "training_step_time": 0.3891165256500244
    },
    {
      "epoch": 0.000163946533203125,
      "model_forward_time": 0.11539483070373535,
      "step": 26861
    },
    {
      "epoch": 0.000163946533203125,
      "step": 26861,
      "training_step_time": 0.3959238529205322
    },
    {
      "epoch": 0.00016395263671875,
      "model_forward_time": 0.11845946311950684,
      "step": 26862
    },
    {
      "epoch": 0.00016395263671875,
      "step": 26862,
      "training_step_time": 0.3713264465332031
    },
    {
      "epoch": 0.000163958740234375,
      "model_forward_time": 0.11868739128112793,
      "step": 26863
    },
    {
      "epoch": 0.000163958740234375,
      "step": 26863,
      "training_step_time": 0.4131448268890381
    },
    {
      "epoch": 0.00016396484375,
      "model_forward_time": 0.11770105361938477,
      "step": 26864
    },
    {
      "epoch": 0.00016396484375,
      "step": 26864,
      "training_step_time": 0.47998523712158203
    },
    {
      "epoch": 0.000163970947265625,
      "model_forward_time": 0.11772370338439941,
      "step": 26865
    },
    {
      "epoch": 0.000163970947265625,
      "step": 26865,
      "training_step_time": 0.3872203826904297
    },
    {
      "epoch": 0.00016397705078125,
      "model_forward_time": 0.11830973625183105,
      "step": 26866
    },
    {
      "epoch": 0.00016397705078125,
      "step": 26866,
      "training_step_time": 0.43884754180908203
    },
    {
      "epoch": 0.000163983154296875,
      "model_forward_time": 0.1194000244140625,
      "step": 26867
    },
    {
      "epoch": 0.000163983154296875,
      "step": 26867,
      "training_step_time": 0.4392826557159424
    },
    {
      "epoch": 0.0001639892578125,
      "model_forward_time": 0.11738729476928711,
      "step": 26868
    },
    {
      "epoch": 0.0001639892578125,
      "step": 26868,
      "training_step_time": 0.38068222999572754
    },
    {
      "epoch": 0.000163995361328125,
      "model_forward_time": 0.11592340469360352,
      "step": 26869
    },
    {
      "epoch": 0.000163995361328125,
      "step": 26869,
      "training_step_time": 0.3921678066253662
    },
    {
      "epoch": 0.00016400146484375,
      "grad_norm": 0.16055168211460114,
      "learning_rate": 6.262124570435193e-05,
      "loss": 0.0455,
      "step": 26870
    },
    {
      "epoch": 0.00016400146484375,
      "model_forward_time": 0.1157839298248291,
      "step": 26870
    },
    {
      "epoch": 0.00016400146484375,
      "step": 26870,
      "training_step_time": 0.5378570556640625
    },
    {
      "epoch": 0.000164007568359375,
      "model_forward_time": 0.11502408981323242,
      "step": 26871
    },
    {
      "epoch": 0.000164007568359375,
      "step": 26871,
      "training_step_time": 0.45257019996643066
    },
    {
      "epoch": 0.000164013671875,
      "model_forward_time": 0.11563920974731445,
      "step": 26872
    },
    {
      "epoch": 0.000164013671875,
      "step": 26872,
      "training_step_time": 0.4123384952545166
    },
    {
      "epoch": 0.000164019775390625,
      "model_forward_time": 0.11508631706237793,
      "step": 26873
    },
    {
      "epoch": 0.000164019775390625,
      "step": 26873,
      "training_step_time": 0.47443175315856934
    },
    {
      "epoch": 0.00016402587890625,
      "model_forward_time": 0.11565041542053223,
      "step": 26874
    },
    {
      "epoch": 0.00016402587890625,
      "step": 26874,
      "training_step_time": 0.39677000045776367
    },
    {
      "epoch": 0.000164031982421875,
      "model_forward_time": 0.11481595039367676,
      "step": 26875
    },
    {
      "epoch": 0.000164031982421875,
      "step": 26875,
      "training_step_time": 0.4013533592224121
    },
    {
      "epoch": 0.0001640380859375,
      "model_forward_time": 0.11632466316223145,
      "step": 26876
    },
    {
      "epoch": 0.0001640380859375,
      "step": 26876,
      "training_step_time": 0.387981653213501
    },
    {
      "epoch": 0.000164044189453125,
      "model_forward_time": 0.1157381534576416,
      "step": 26877
    },
    {
      "epoch": 0.000164044189453125,
      "step": 26877,
      "training_step_time": 0.42348742485046387
    },
    {
      "epoch": 0.00016405029296875,
      "model_forward_time": 0.11614561080932617,
      "step": 26878
    },
    {
      "epoch": 0.00016405029296875,
      "step": 26878,
      "training_step_time": 0.4739365577697754
    },
    {
      "epoch": 0.000164056396484375,
      "model_forward_time": 0.11549258232116699,
      "step": 26879
    },
    {
      "epoch": 0.000164056396484375,
      "step": 26879,
      "training_step_time": 0.4229543209075928
    },
    {
      "epoch": 0.0001640625,
      "grad_norm": 0.10524081438779831,
      "learning_rate": 6.259457837780742e-05,
      "loss": 0.048,
      "step": 26880
    },
    {
      "epoch": 0.0001640625,
      "model_forward_time": 0.11554980278015137,
      "step": 26880
    },
    {
      "epoch": 0.0001640625,
      "step": 26880,
      "training_step_time": 0.4322166442871094
    },
    {
      "epoch": 0.000164068603515625,
      "model_forward_time": 0.11556172370910645,
      "step": 26881
    },
    {
      "epoch": 0.000164068603515625,
      "step": 26881,
      "training_step_time": 0.46558427810668945
    },
    {
      "epoch": 0.00016407470703125,
      "model_forward_time": 0.11527776718139648,
      "step": 26882
    },
    {
      "epoch": 0.00016407470703125,
      "step": 26882,
      "training_step_time": 0.4492075443267822
    },
    {
      "epoch": 0.000164080810546875,
      "model_forward_time": 0.11559581756591797,
      "step": 26883
    },
    {
      "epoch": 0.000164080810546875,
      "step": 26883,
      "training_step_time": 0.39966392517089844
    },
    {
      "epoch": 0.0001640869140625,
      "model_forward_time": 0.11554408073425293,
      "step": 26884
    },
    {
      "epoch": 0.0001640869140625,
      "step": 26884,
      "training_step_time": 0.3922455310821533
    },
    {
      "epoch": 0.000164093017578125,
      "model_forward_time": 0.11564302444458008,
      "step": 26885
    },
    {
      "epoch": 0.000164093017578125,
      "step": 26885,
      "training_step_time": 0.39206767082214355
    },
    {
      "epoch": 0.00016409912109375,
      "model_forward_time": 0.11580085754394531,
      "step": 26886
    },
    {
      "epoch": 0.00016409912109375,
      "step": 26886,
      "training_step_time": 0.4734005928039551
    },
    {
      "epoch": 0.000164105224609375,
      "model_forward_time": 0.11574769020080566,
      "step": 26887
    },
    {
      "epoch": 0.000164105224609375,
      "step": 26887,
      "training_step_time": 0.4878506660461426
    },
    {
      "epoch": 0.000164111328125,
      "model_forward_time": 0.11548995971679688,
      "step": 26888
    },
    {
      "epoch": 0.000164111328125,
      "step": 26888,
      "training_step_time": 0.5495567321777344
    },
    {
      "epoch": 0.000164117431640625,
      "model_forward_time": 0.11506295204162598,
      "step": 26889
    },
    {
      "epoch": 0.000164117431640625,
      "step": 26889,
      "training_step_time": 0.39077067375183105
    },
    {
      "epoch": 0.00016412353515625,
      "grad_norm": 0.11425582319498062,
      "learning_rate": 6.256790722536251e-05,
      "loss": 0.0408,
      "step": 26890
    },
    {
      "epoch": 0.00016412353515625,
      "model_forward_time": 0.11530876159667969,
      "step": 26890
    },
    {
      "epoch": 0.00016412353515625,
      "step": 26890,
      "training_step_time": 0.38945627212524414
    },
    {
      "epoch": 0.000164129638671875,
      "model_forward_time": 0.1173715591430664,
      "step": 26891
    },
    {
      "epoch": 0.000164129638671875,
      "step": 26891,
      "training_step_time": 0.44018077850341797
    },
    {
      "epoch": 0.0001641357421875,
      "model_forward_time": 0.11807417869567871,
      "step": 26892
    },
    {
      "epoch": 0.0001641357421875,
      "step": 26892,
      "training_step_time": 0.46512770652770996
    },
    {
      "epoch": 0.000164141845703125,
      "model_forward_time": 0.1171865463256836,
      "step": 26893
    },
    {
      "epoch": 0.000164141845703125,
      "step": 26893,
      "training_step_time": 0.432706356048584
    },
    {
      "epoch": 0.00016414794921875,
      "model_forward_time": 0.1177518367767334,
      "step": 26894
    },
    {
      "epoch": 0.00016414794921875,
      "step": 26894,
      "training_step_time": 0.45994138717651367
    },
    {
      "epoch": 0.000164154052734375,
      "model_forward_time": 0.1176142692565918,
      "step": 26895
    },
    {
      "epoch": 0.000164154052734375,
      "step": 26895,
      "training_step_time": 0.379840612411499
    },
    {
      "epoch": 0.00016416015625,
      "model_forward_time": 0.11525273323059082,
      "step": 26896
    },
    {
      "epoch": 0.00016416015625,
      "step": 26896,
      "training_step_time": 0.3923630714416504
    },
    {
      "epoch": 0.000164166259765625,
      "model_forward_time": 0.11636662483215332,
      "step": 26897
    },
    {
      "epoch": 0.000164166259765625,
      "step": 26897,
      "training_step_time": 0.404294490814209
    },
    {
      "epoch": 0.00016417236328125,
      "model_forward_time": 0.11514592170715332,
      "step": 26898
    },
    {
      "epoch": 0.00016417236328125,
      "step": 26898,
      "training_step_time": 0.391862154006958
    },
    {
      "epoch": 0.000164178466796875,
      "model_forward_time": 0.11496329307556152,
      "step": 26899
    },
    {
      "epoch": 0.000164178466796875,
      "step": 26899,
      "training_step_time": 0.4053778648376465
    },
    {
      "epoch": 0.0001641845703125,
      "grad_norm": 0.09584876894950867,
      "learning_rate": 6.254123225511923e-05,
      "loss": 0.0461,
      "step": 26900
    },
    {
      "epoch": 0.0001641845703125,
      "model_forward_time": 0.11531233787536621,
      "step": 26900
    },
    {
      "epoch": 0.0001641845703125,
      "step": 26900,
      "training_step_time": 0.6815359592437744
    },
    {
      "epoch": 0.000164190673828125,
      "model_forward_time": 0.11598086357116699,
      "step": 26901
    },
    {
      "epoch": 0.000164190673828125,
      "step": 26901,
      "training_step_time": 0.42882442474365234
    },
    {
      "epoch": 0.00016419677734375,
      "model_forward_time": 0.1147153377532959,
      "step": 26902
    },
    {
      "epoch": 0.00016419677734375,
      "step": 26902,
      "training_step_time": 0.3843998908996582
    },
    {
      "epoch": 0.000164202880859375,
      "model_forward_time": 0.11579608917236328,
      "step": 26903
    },
    {
      "epoch": 0.000164202880859375,
      "step": 26903,
      "training_step_time": 0.40156030654907227
    },
    {
      "epoch": 0.000164208984375,
      "model_forward_time": 0.11501431465148926,
      "step": 26904
    },
    {
      "epoch": 0.000164208984375,
      "step": 26904,
      "training_step_time": 0.39495301246643066
    },
    {
      "epoch": 0.000164215087890625,
      "model_forward_time": 0.11503434181213379,
      "step": 26905
    },
    {
      "epoch": 0.000164215087890625,
      "step": 26905,
      "training_step_time": 0.367154598236084
    },
    {
      "epoch": 0.00016422119140625,
      "model_forward_time": 0.11552548408508301,
      "step": 26906
    },
    {
      "epoch": 0.00016422119140625,
      "step": 26906,
      "training_step_time": 0.5938894748687744
    },
    {
      "epoch": 0.000164227294921875,
      "model_forward_time": 0.11590409278869629,
      "step": 26907
    },
    {
      "epoch": 0.000164227294921875,
      "step": 26907,
      "training_step_time": 0.43677830696105957
    },
    {
      "epoch": 0.0001642333984375,
      "model_forward_time": 0.11488580703735352,
      "step": 26908
    },
    {
      "epoch": 0.0001642333984375,
      "step": 26908,
      "training_step_time": 0.43096327781677246
    },
    {
      "epoch": 0.000164239501953125,
      "model_forward_time": 0.11594533920288086,
      "step": 26909
    },
    {
      "epoch": 0.000164239501953125,
      "step": 26909,
      "training_step_time": 0.3916049003601074
    },
    {
      "epoch": 0.00016424560546875,
      "grad_norm": 0.11711102724075317,
      "learning_rate": 6.251455347518073e-05,
      "loss": 0.0441,
      "step": 26910
    },
    {
      "epoch": 0.00016424560546875,
      "model_forward_time": 0.1148841381072998,
      "step": 26910
    },
    {
      "epoch": 0.00016424560546875,
      "step": 26910,
      "training_step_time": 0.40266942977905273
    },
    {
      "epoch": 0.000164251708984375,
      "model_forward_time": 0.1146383285522461,
      "step": 26911
    },
    {
      "epoch": 0.000164251708984375,
      "step": 26911,
      "training_step_time": 0.3964698314666748
    },
    {
      "epoch": 0.0001642578125,
      "model_forward_time": 0.11543893814086914,
      "step": 26912
    },
    {
      "epoch": 0.0001642578125,
      "step": 26912,
      "training_step_time": 0.487109899520874
    },
    {
      "epoch": 0.000164263916015625,
      "model_forward_time": 0.11537957191467285,
      "step": 26913
    },
    {
      "epoch": 0.000164263916015625,
      "step": 26913,
      "training_step_time": 0.3989996910095215
    },
    {
      "epoch": 0.00016427001953125,
      "model_forward_time": 0.11565399169921875,
      "step": 26914
    },
    {
      "epoch": 0.00016427001953125,
      "step": 26914,
      "training_step_time": 0.4153571128845215
    },
    {
      "epoch": 0.000164276123046875,
      "model_forward_time": 0.11571931838989258,
      "step": 26915
    },
    {
      "epoch": 0.000164276123046875,
      "step": 26915,
      "training_step_time": 0.4995768070220947
    },
    {
      "epoch": 0.0001642822265625,
      "model_forward_time": 0.1153864860534668,
      "step": 26916
    },
    {
      "epoch": 0.0001642822265625,
      "step": 26916,
      "training_step_time": 0.3876621723175049
    },
    {
      "epoch": 0.000164288330078125,
      "model_forward_time": 0.11528444290161133,
      "step": 26917
    },
    {
      "epoch": 0.000164288330078125,
      "step": 26917,
      "training_step_time": 0.39359259605407715
    },
    {
      "epoch": 0.00016429443359375,
      "model_forward_time": 0.11511468887329102,
      "step": 26918
    },
    {
      "epoch": 0.00016429443359375,
      "step": 26918,
      "training_step_time": 0.5939950942993164
    },
    {
      "epoch": 0.000164300537109375,
      "model_forward_time": 0.11560559272766113,
      "step": 26919
    },
    {
      "epoch": 0.000164300537109375,
      "step": 26919,
      "training_step_time": 0.387188196182251
    },
    {
      "epoch": 0.000164306640625,
      "grad_norm": 0.09472664445638657,
      "learning_rate": 6.248787089365133e-05,
      "loss": 0.0405,
      "step": 26920
    },
    {
      "epoch": 0.000164306640625,
      "model_forward_time": 0.1164395809173584,
      "step": 26920
    },
    {
      "epoch": 0.000164306640625,
      "step": 26920,
      "training_step_time": 0.4395411014556885
    },
    {
      "epoch": 0.000164312744140625,
      "model_forward_time": 0.11657333374023438,
      "step": 26921
    },
    {
      "epoch": 0.000164312744140625,
      "step": 26921,
      "training_step_time": 0.4754338264465332
    },
    {
      "epoch": 0.00016431884765625,
      "model_forward_time": 0.11479330062866211,
      "step": 26922
    },
    {
      "epoch": 0.00016431884765625,
      "step": 26922,
      "training_step_time": 0.4556763172149658
    },
    {
      "epoch": 0.000164324951171875,
      "model_forward_time": 0.11547207832336426,
      "step": 26923
    },
    {
      "epoch": 0.000164324951171875,
      "step": 26923,
      "training_step_time": 0.38495302200317383
    },
    {
      "epoch": 0.0001643310546875,
      "model_forward_time": 0.11573958396911621,
      "step": 26924
    },
    {
      "epoch": 0.0001643310546875,
      "step": 26924,
      "training_step_time": 0.46172022819519043
    },
    {
      "epoch": 0.000164337158203125,
      "model_forward_time": 0.11572384834289551,
      "step": 26925
    },
    {
      "epoch": 0.000164337158203125,
      "step": 26925,
      "training_step_time": 0.3863558769226074
    },
    {
      "epoch": 0.00016434326171875,
      "model_forward_time": 0.11481261253356934,
      "step": 26926
    },
    {
      "epoch": 0.00016434326171875,
      "step": 26926,
      "training_step_time": 0.38159799575805664
    },
    {
      "epoch": 0.000164349365234375,
      "model_forward_time": 0.11641049385070801,
      "step": 26927
    },
    {
      "epoch": 0.000164349365234375,
      "step": 26927,
      "training_step_time": 0.39183902740478516
    },
    {
      "epoch": 0.00016435546875,
      "model_forward_time": 0.1152336597442627,
      "step": 26928
    },
    {
      "epoch": 0.00016435546875,
      "step": 26928,
      "training_step_time": 0.4638240337371826
    },
    {
      "epoch": 0.000164361572265625,
      "model_forward_time": 0.11541342735290527,
      "step": 26929
    },
    {
      "epoch": 0.000164361572265625,
      "step": 26929,
      "training_step_time": 0.5031018257141113
    },
    {
      "epoch": 0.00016436767578125,
      "grad_norm": 0.14089170098304749,
      "learning_rate": 6.246118451863646e-05,
      "loss": 0.0382,
      "step": 26930
    },
    {
      "epoch": 0.00016436767578125,
      "model_forward_time": 0.11540412902832031,
      "step": 26930
    },
    {
      "epoch": 0.00016436767578125,
      "step": 26930,
      "training_step_time": 0.4831106662750244
    },
    {
      "epoch": 0.000164373779296875,
      "model_forward_time": 0.1157681941986084,
      "step": 26931
    },
    {
      "epoch": 0.000164373779296875,
      "step": 26931,
      "training_step_time": 0.4047884941101074
    },
    {
      "epoch": 0.0001643798828125,
      "model_forward_time": 0.11574983596801758,
      "step": 26932
    },
    {
      "epoch": 0.0001643798828125,
      "step": 26932,
      "training_step_time": 0.414318323135376
    },
    {
      "epoch": 0.000164385986328125,
      "model_forward_time": 0.11555671691894531,
      "step": 26933
    },
    {
      "epoch": 0.000164385986328125,
      "step": 26933,
      "training_step_time": 0.40180444717407227
    },
    {
      "epoch": 0.00016439208984375,
      "model_forward_time": 0.11530661582946777,
      "step": 26934
    },
    {
      "epoch": 0.00016439208984375,
      "step": 26934,
      "training_step_time": 0.48976755142211914
    },
    {
      "epoch": 0.000164398193359375,
      "model_forward_time": 0.11467838287353516,
      "step": 26935
    },
    {
      "epoch": 0.000164398193359375,
      "step": 26935,
      "training_step_time": 0.5041682720184326
    },
    {
      "epoch": 0.000164404296875,
      "model_forward_time": 0.11594748497009277,
      "step": 26936
    },
    {
      "epoch": 0.000164404296875,
      "step": 26936,
      "training_step_time": 0.4122321605682373
    },
    {
      "epoch": 0.000164410400390625,
      "model_forward_time": 0.11449217796325684,
      "step": 26937
    },
    {
      "epoch": 0.000164410400390625,
      "step": 26937,
      "training_step_time": 0.38759636878967285
    },
    {
      "epoch": 0.00016441650390625,
      "model_forward_time": 0.1149294376373291,
      "step": 26938
    },
    {
      "epoch": 0.00016441650390625,
      "step": 26938,
      "training_step_time": 0.3840751647949219
    },
    {
      "epoch": 0.000164422607421875,
      "model_forward_time": 0.11565971374511719,
      "step": 26939
    },
    {
      "epoch": 0.000164422607421875,
      "step": 26939,
      "training_step_time": 0.3917257785797119
    },
    {
      "epoch": 0.0001644287109375,
      "grad_norm": 0.18221509456634521,
      "learning_rate": 6.243449435824276e-05,
      "loss": 0.0516,
      "step": 26940
    },
    {
      "epoch": 0.0001644287109375,
      "model_forward_time": 0.11526608467102051,
      "step": 26940
    },
    {
      "epoch": 0.0001644287109375,
      "step": 26940,
      "training_step_time": 0.3943343162536621
    },
    {
      "epoch": 0.000164434814453125,
      "model_forward_time": 0.11515402793884277,
      "step": 26941
    },
    {
      "epoch": 0.000164434814453125,
      "step": 26941,
      "training_step_time": 0.3964359760284424
    },
    {
      "epoch": 0.00016444091796875,
      "model_forward_time": 0.11524677276611328,
      "step": 26942
    },
    {
      "epoch": 0.00016444091796875,
      "step": 26942,
      "training_step_time": 0.6271302700042725
    },
    {
      "epoch": 0.000164447021484375,
      "model_forward_time": 0.11496925354003906,
      "step": 26943
    },
    {
      "epoch": 0.000164447021484375,
      "step": 26943,
      "training_step_time": 0.5366408824920654
    },
    {
      "epoch": 0.000164453125,
      "model_forward_time": 0.11489105224609375,
      "step": 26944
    },
    {
      "epoch": 0.000164453125,
      "step": 26944,
      "training_step_time": 0.389066219329834
    },
    {
      "epoch": 0.000164459228515625,
      "model_forward_time": 0.11549806594848633,
      "step": 26945
    },
    {
      "epoch": 0.000164459228515625,
      "step": 26945,
      "training_step_time": 0.38645291328430176
    },
    {
      "epoch": 0.00016446533203125,
      "model_forward_time": 0.11500930786132812,
      "step": 26946
    },
    {
      "epoch": 0.00016446533203125,
      "step": 26946,
      "training_step_time": 0.38591456413269043
    },
    {
      "epoch": 0.000164471435546875,
      "model_forward_time": 0.1152503490447998,
      "step": 26947
    },
    {
      "epoch": 0.000164471435546875,
      "step": 26947,
      "training_step_time": 0.46595215797424316
    },
    {
      "epoch": 0.0001644775390625,
      "model_forward_time": 0.11556196212768555,
      "step": 26948
    },
    {
      "epoch": 0.0001644775390625,
      "step": 26948,
      "training_step_time": 0.5347180366516113
    },
    {
      "epoch": 0.000164483642578125,
      "model_forward_time": 0.11590456962585449,
      "step": 26949
    },
    {
      "epoch": 0.000164483642578125,
      "step": 26949,
      "training_step_time": 0.45267772674560547
    },
    {
      "epoch": 0.00016448974609375,
      "grad_norm": 0.15604160726070404,
      "learning_rate": 6.240780042057796e-05,
      "loss": 0.0441,
      "step": 26950
    },
    {
      "epoch": 0.00016448974609375,
      "model_forward_time": 0.11484217643737793,
      "step": 26950
    },
    {
      "epoch": 0.00016448974609375,
      "step": 26950,
      "training_step_time": 0.4006199836730957
    },
    {
      "epoch": 0.000164495849609375,
      "model_forward_time": 0.11591553688049316,
      "step": 26951
    },
    {
      "epoch": 0.000164495849609375,
      "step": 26951,
      "training_step_time": 0.3941617012023926
    },
    {
      "epoch": 0.000164501953125,
      "model_forward_time": 0.1155555248260498,
      "step": 26952
    },
    {
      "epoch": 0.000164501953125,
      "step": 26952,
      "training_step_time": 0.39825963973999023
    },
    {
      "epoch": 0.000164508056640625,
      "model_forward_time": 0.11566638946533203,
      "step": 26953
    },
    {
      "epoch": 0.000164508056640625,
      "step": 26953,
      "training_step_time": 0.3923473358154297
    },
    {
      "epoch": 0.00016451416015625,
      "model_forward_time": 0.11565446853637695,
      "step": 26954
    },
    {
      "epoch": 0.00016451416015625,
      "step": 26954,
      "training_step_time": 0.5472242832183838
    },
    {
      "epoch": 0.000164520263671875,
      "model_forward_time": 0.1149754524230957,
      "step": 26955
    },
    {
      "epoch": 0.000164520263671875,
      "step": 26955,
      "training_step_time": 0.3882269859313965
    },
    {
      "epoch": 0.0001645263671875,
      "model_forward_time": 0.11550164222717285,
      "step": 26956
    },
    {
      "epoch": 0.0001645263671875,
      "step": 26956,
      "training_step_time": 0.43346524238586426
    },
    {
      "epoch": 0.000164532470703125,
      "model_forward_time": 0.11509394645690918,
      "step": 26957
    },
    {
      "epoch": 0.000164532470703125,
      "step": 26957,
      "training_step_time": 0.3948478698730469
    },
    {
      "epoch": 0.00016453857421875,
      "model_forward_time": 0.11482644081115723,
      "step": 26958
    },
    {
      "epoch": 0.00016453857421875,
      "step": 26958,
      "training_step_time": 0.4534456729888916
    },
    {
      "epoch": 0.000164544677734375,
      "model_forward_time": 0.11508321762084961,
      "step": 26959
    },
    {
      "epoch": 0.000164544677734375,
      "step": 26959,
      "training_step_time": 0.3967933654785156
    },
    {
      "epoch": 0.00016455078125,
      "grad_norm": 0.1679237186908722,
      "learning_rate": 6.238110271375102e-05,
      "loss": 0.0444,
      "step": 26960
    },
    {
      "epoch": 0.00016455078125,
      "model_forward_time": 0.11536836624145508,
      "step": 26960
    },
    {
      "epoch": 0.00016455078125,
      "step": 26960,
      "training_step_time": 0.513117790222168
    },
    {
      "epoch": 0.000164556884765625,
      "model_forward_time": 0.11560249328613281,
      "step": 26961
    },
    {
      "epoch": 0.000164556884765625,
      "step": 26961,
      "training_step_time": 0.38764524459838867
    },
    {
      "epoch": 0.00016456298828125,
      "model_forward_time": 0.11523270606994629,
      "step": 26962
    },
    {
      "epoch": 0.00016456298828125,
      "step": 26962,
      "training_step_time": 0.4684619903564453
    },
    {
      "epoch": 0.000164569091796875,
      "model_forward_time": 0.11595797538757324,
      "step": 26963
    },
    {
      "epoch": 0.000164569091796875,
      "step": 26963,
      "training_step_time": 0.4271371364593506
    },
    {
      "epoch": 0.0001645751953125,
      "model_forward_time": 0.11562252044677734,
      "step": 26964
    },
    {
      "epoch": 0.0001645751953125,
      "step": 26964,
      "training_step_time": 0.48140835762023926
    },
    {
      "epoch": 0.000164581298828125,
      "model_forward_time": 0.11467885971069336,
      "step": 26965
    },
    {
      "epoch": 0.000164581298828125,
      "step": 26965,
      "training_step_time": 0.38884854316711426
    },
    {
      "epoch": 0.00016458740234375,
      "model_forward_time": 0.11550688743591309,
      "step": 26966
    },
    {
      "epoch": 0.00016458740234375,
      "step": 26966,
      "training_step_time": 0.473438024520874
    },
    {
      "epoch": 0.000164593505859375,
      "model_forward_time": 0.11518096923828125,
      "step": 26967
    },
    {
      "epoch": 0.000164593505859375,
      "step": 26967,
      "training_step_time": 0.3873727321624756
    },
    {
      "epoch": 0.000164599609375,
      "model_forward_time": 0.1151282787322998,
      "step": 26968
    },
    {
      "epoch": 0.000164599609375,
      "step": 26968,
      "training_step_time": 0.3925447463989258
    },
    {
      "epoch": 0.000164605712890625,
      "model_forward_time": 0.11594843864440918,
      "step": 26969
    },
    {
      "epoch": 0.000164605712890625,
      "step": 26969,
      "training_step_time": 0.3963148593902588
    },
    {
      "epoch": 0.00016461181640625,
      "grad_norm": 0.12028717994689941,
      "learning_rate": 6.235440124587198e-05,
      "loss": 0.0435,
      "step": 26970
    },
    {
      "epoch": 0.00016461181640625,
      "model_forward_time": 0.11536860466003418,
      "step": 26970
    },
    {
      "epoch": 0.00016461181640625,
      "step": 26970,
      "training_step_time": 0.45197200775146484
    },
    {
      "epoch": 0.000164617919921875,
      "model_forward_time": 0.11511063575744629,
      "step": 26971
    },
    {
      "epoch": 0.000164617919921875,
      "step": 26971,
      "training_step_time": 0.3966186046600342
    },
    {
      "epoch": 0.0001646240234375,
      "model_forward_time": 0.11513805389404297,
      "step": 26972
    },
    {
      "epoch": 0.0001646240234375,
      "step": 26972,
      "training_step_time": 0.6085734367370605
    },
    {
      "epoch": 0.000164630126953125,
      "model_forward_time": 0.11538839340209961,
      "step": 26973
    },
    {
      "epoch": 0.000164630126953125,
      "step": 26973,
      "training_step_time": 0.41348934173583984
    },
    {
      "epoch": 0.00016463623046875,
      "model_forward_time": 0.11496853828430176,
      "step": 26974
    },
    {
      "epoch": 0.00016463623046875,
      "step": 26974,
      "training_step_time": 0.45528364181518555
    },
    {
      "epoch": 0.000164642333984375,
      "model_forward_time": 0.1151282787322998,
      "step": 26975
    },
    {
      "epoch": 0.000164642333984375,
      "step": 26975,
      "training_step_time": 0.3893914222717285
    },
    {
      "epoch": 0.0001646484375,
      "model_forward_time": 0.11509990692138672,
      "step": 26976
    },
    {
      "epoch": 0.0001646484375,
      "step": 26976,
      "training_step_time": 0.4440882205963135
    },
    {
      "epoch": 0.000164654541015625,
      "model_forward_time": 0.11529684066772461,
      "step": 26977
    },
    {
      "epoch": 0.000164654541015625,
      "step": 26977,
      "training_step_time": 0.46361517906188965
    },
    {
      "epoch": 0.00016466064453125,
      "model_forward_time": 0.1161797046661377,
      "step": 26978
    },
    {
      "epoch": 0.00016466064453125,
      "step": 26978,
      "training_step_time": 0.43825483322143555
    },
    {
      "epoch": 0.000164666748046875,
      "model_forward_time": 0.11500740051269531,
      "step": 26979
    },
    {
      "epoch": 0.000164666748046875,
      "step": 26979,
      "training_step_time": 0.4071478843688965
    },
    {
      "epoch": 0.0001646728515625,
      "grad_norm": 0.1527199000120163,
      "learning_rate": 6.232769602505203e-05,
      "loss": 0.0412,
      "step": 26980
    },
    {
      "epoch": 0.0001646728515625,
      "model_forward_time": 0.11470818519592285,
      "step": 26980
    },
    {
      "epoch": 0.0001646728515625,
      "step": 26980,
      "training_step_time": 0.3881194591522217
    },
    {
      "epoch": 0.000164678955078125,
      "model_forward_time": 0.11499524116516113,
      "step": 26981
    },
    {
      "epoch": 0.000164678955078125,
      "step": 26981,
      "training_step_time": 0.39104580879211426
    },
    {
      "epoch": 0.00016468505859375,
      "model_forward_time": 0.1153719425201416,
      "step": 26982
    },
    {
      "epoch": 0.00016468505859375,
      "step": 26982,
      "training_step_time": 0.4008333683013916
    },
    {
      "epoch": 0.000164691162109375,
      "model_forward_time": 0.11538434028625488,
      "step": 26983
    },
    {
      "epoch": 0.000164691162109375,
      "step": 26983,
      "training_step_time": 0.399646520614624
    },
    {
      "epoch": 0.000164697265625,
      "model_forward_time": 0.1160125732421875,
      "step": 26984
    },
    {
      "epoch": 0.000164697265625,
      "step": 26984,
      "training_step_time": 0.6321184635162354
    },
    {
      "epoch": 0.000164703369140625,
      "model_forward_time": 0.11541318893432617,
      "step": 26985
    },
    {
      "epoch": 0.000164703369140625,
      "step": 26985,
      "training_step_time": 0.4320685863494873
    },
    {
      "epoch": 0.00016470947265625,
      "model_forward_time": 0.11472010612487793,
      "step": 26986
    },
    {
      "epoch": 0.00016470947265625,
      "step": 26986,
      "training_step_time": 0.41853785514831543
    },
    {
      "epoch": 0.000164715576171875,
      "model_forward_time": 0.11486959457397461,
      "step": 26987
    },
    {
      "epoch": 0.000164715576171875,
      "step": 26987,
      "training_step_time": 0.4275681972503662
    },
    {
      "epoch": 0.0001647216796875,
      "model_forward_time": 0.11519932746887207,
      "step": 26988
    },
    {
      "epoch": 0.0001647216796875,
      "step": 26988,
      "training_step_time": 0.3882482051849365
    },
    {
      "epoch": 0.000164727783203125,
      "model_forward_time": 0.11538362503051758,
      "step": 26989
    },
    {
      "epoch": 0.000164727783203125,
      "step": 26989,
      "training_step_time": 0.39301156997680664
    },
    {
      "epoch": 0.00016473388671875,
      "grad_norm": 0.14519977569580078,
      "learning_rate": 6.230098705940354e-05,
      "loss": 0.0465,
      "step": 26990
    },
    {
      "epoch": 0.00016473388671875,
      "model_forward_time": 0.11584615707397461,
      "step": 26990
    },
    {
      "epoch": 0.00016473388671875,
      "step": 26990,
      "training_step_time": 0.48833250999450684
    },
    {
      "epoch": 0.000164739990234375,
      "model_forward_time": 0.115325927734375,
      "step": 26991
    },
    {
      "epoch": 0.000164739990234375,
      "step": 26991,
      "training_step_time": 0.43561697006225586
    },
    {
      "epoch": 0.00016474609375,
      "model_forward_time": 0.11583256721496582,
      "step": 26992
    },
    {
      "epoch": 0.00016474609375,
      "step": 26992,
      "training_step_time": 0.5051491260528564
    },
    {
      "epoch": 0.000164752197265625,
      "model_forward_time": 0.1149148941040039,
      "step": 26993
    },
    {
      "epoch": 0.000164752197265625,
      "step": 26993,
      "training_step_time": 0.40569186210632324
    },
    {
      "epoch": 0.00016475830078125,
      "model_forward_time": 0.11554622650146484,
      "step": 26994
    },
    {
      "epoch": 0.00016475830078125,
      "step": 26994,
      "training_step_time": 0.3902626037597656
    },
    {
      "epoch": 0.000164764404296875,
      "model_forward_time": 0.11494588851928711,
      "step": 26995
    },
    {
      "epoch": 0.000164764404296875,
      "step": 26995,
      "training_step_time": 0.39194560050964355
    },
    {
      "epoch": 0.0001647705078125,
      "model_forward_time": 0.11569643020629883,
      "step": 26996
    },
    {
      "epoch": 0.0001647705078125,
      "step": 26996,
      "training_step_time": 2.854456901550293
    },
    {
      "epoch": 0.000164776611328125,
      "model_forward_time": 0.1120615005493164,
      "step": 26997
    },
    {
      "epoch": 0.000164776611328125,
      "step": 26997,
      "training_step_time": 0.3643970489501953
    },
    {
      "epoch": 0.00016478271484375,
      "model_forward_time": 0.11254024505615234,
      "step": 26998
    },
    {
      "epoch": 0.00016478271484375,
      "step": 26998,
      "training_step_time": 0.3786468505859375
    },
    {
      "epoch": 0.000164788818359375,
      "model_forward_time": 0.1134345531463623,
      "step": 26999
    },
    {
      "epoch": 0.000164788818359375,
      "step": 26999,
      "training_step_time": 0.3585052490234375
    },
    {
      "epoch": 0.000164794921875,
      "grad_norm": 0.14759686589241028,
      "learning_rate": 6.227427435703997e-05,
      "loss": 0.0449,
      "step": 27000
    },
    {
      "epoch": 0.000164794921875,
      "model_forward_time": 0.11471414566040039,
      "step": 27000
    },
    {
      "epoch": 0.000164794921875,
      "step": 27000,
      "training_step_time": 0.3612027168273926
    },
    {
      "epoch": 0.000164801025390625,
      "model_forward_time": 0.11564755439758301,
      "step": 27001
    },
    {
      "epoch": 0.000164801025390625,
      "step": 27001,
      "training_step_time": 0.4194307327270508
    },
    {
      "epoch": 0.00016480712890625,
      "model_forward_time": 0.11400961875915527,
      "step": 27002
    },
    {
      "epoch": 0.00016480712890625,
      "step": 27002,
      "training_step_time": 0.42422986030578613
    },
    {
      "epoch": 0.000164813232421875,
      "model_forward_time": 0.11348915100097656,
      "step": 27003
    },
    {
      "epoch": 0.000164813232421875,
      "step": 27003,
      "training_step_time": 0.38159728050231934
    },
    {
      "epoch": 0.0001648193359375,
      "model_forward_time": 0.11618661880493164,
      "step": 27004
    },
    {
      "epoch": 0.0001648193359375,
      "step": 27004,
      "training_step_time": 0.3825395107269287
    },
    {
      "epoch": 0.000164825439453125,
      "model_forward_time": 0.11772465705871582,
      "step": 27005
    },
    {
      "epoch": 0.000164825439453125,
      "step": 27005,
      "training_step_time": 0.372882604598999
    },
    {
      "epoch": 0.00016483154296875,
      "model_forward_time": 0.11718606948852539,
      "step": 27006
    },
    {
      "epoch": 0.00016483154296875,
      "step": 27006,
      "training_step_time": 0.3837089538574219
    },
    {
      "epoch": 0.000164837646484375,
      "model_forward_time": 0.11805319786071777,
      "step": 27007
    },
    {
      "epoch": 0.000164837646484375,
      "step": 27007,
      "training_step_time": 0.4340505599975586
    },
    {
      "epoch": 0.00016484375,
      "model_forward_time": 0.11763620376586914,
      "step": 27008
    },
    {
      "epoch": 0.00016484375,
      "step": 27008,
      "training_step_time": 0.37911272048950195
    },
    {
      "epoch": 0.000164849853515625,
      "model_forward_time": 0.11765265464782715,
      "step": 27009
    },
    {
      "epoch": 0.000164849853515625,
      "step": 27009,
      "training_step_time": 0.41538238525390625
    },
    {
      "epoch": 0.00016485595703125,
      "grad_norm": 0.16707202792167664,
      "learning_rate": 6.22475579260759e-05,
      "loss": 0.0503,
      "step": 27010
    },
    {
      "epoch": 0.00016485595703125,
      "model_forward_time": 0.1154928207397461,
      "step": 27010
    },
    {
      "epoch": 0.00016485595703125,
      "step": 27010,
      "training_step_time": 0.4023904800415039
    },
    {
      "epoch": 0.000164862060546875,
      "model_forward_time": 0.11516284942626953,
      "step": 27011
    },
    {
      "epoch": 0.000164862060546875,
      "step": 27011,
      "training_step_time": 0.39269495010375977
    },
    {
      "epoch": 0.0001648681640625,
      "model_forward_time": 0.11544537544250488,
      "step": 27012
    },
    {
      "epoch": 0.0001648681640625,
      "step": 27012,
      "training_step_time": 0.40019750595092773
    },
    {
      "epoch": 0.000164874267578125,
      "model_forward_time": 0.11510324478149414,
      "step": 27013
    },
    {
      "epoch": 0.000164874267578125,
      "step": 27013,
      "training_step_time": 0.4035298824310303
    },
    {
      "epoch": 0.00016488037109375,
      "model_forward_time": 0.12140917778015137,
      "step": 27014
    },
    {
      "epoch": 0.00016488037109375,
      "step": 27014,
      "training_step_time": 0.37558627128601074
    },
    {
      "epoch": 0.000164886474609375,
      "model_forward_time": 0.11780261993408203,
      "step": 27015
    },
    {
      "epoch": 0.000164886474609375,
      "step": 27015,
      "training_step_time": 0.43894195556640625
    },
    {
      "epoch": 0.000164892578125,
      "model_forward_time": 0.11602210998535156,
      "step": 27016
    },
    {
      "epoch": 0.000164892578125,
      "step": 27016,
      "training_step_time": 0.45072293281555176
    },
    {
      "epoch": 0.000164898681640625,
      "model_forward_time": 0.1143038272857666,
      "step": 27017
    },
    {
      "epoch": 0.000164898681640625,
      "step": 27017,
      "training_step_time": 0.4445466995239258
    },
    {
      "epoch": 0.00016490478515625,
      "model_forward_time": 0.11506915092468262,
      "step": 27018
    },
    {
      "epoch": 0.00016490478515625,
      "step": 27018,
      "training_step_time": 0.39960765838623047
    },
    {
      "epoch": 0.000164910888671875,
      "model_forward_time": 0.11584186553955078,
      "step": 27019
    },
    {
      "epoch": 0.000164910888671875,
      "step": 27019,
      "training_step_time": 0.3960914611816406
    },
    {
      "epoch": 0.0001649169921875,
      "grad_norm": 0.16439229249954224,
      "learning_rate": 6.222083777462715e-05,
      "loss": 0.0435,
      "step": 27020
    },
    {
      "epoch": 0.0001649169921875,
      "model_forward_time": 0.11499476432800293,
      "step": 27020
    },
    {
      "epoch": 0.0001649169921875,
      "step": 27020,
      "training_step_time": 0.404003381729126
    },
    {
      "epoch": 0.000164923095703125,
      "model_forward_time": 0.11569690704345703,
      "step": 27021
    },
    {
      "epoch": 0.000164923095703125,
      "step": 27021,
      "training_step_time": 0.40580296516418457
    },
    {
      "epoch": 0.00016492919921875,
      "model_forward_time": 0.11548447608947754,
      "step": 27022
    },
    {
      "epoch": 0.00016492919921875,
      "step": 27022,
      "training_step_time": 0.4367372989654541
    },
    {
      "epoch": 0.000164935302734375,
      "model_forward_time": 0.11575007438659668,
      "step": 27023
    },
    {
      "epoch": 0.000164935302734375,
      "step": 27023,
      "training_step_time": 0.3971421718597412
    },
    {
      "epoch": 0.00016494140625,
      "model_forward_time": 0.11515355110168457,
      "step": 27024
    },
    {
      "epoch": 0.00016494140625,
      "step": 27024,
      "training_step_time": 0.41956257820129395
    },
    {
      "epoch": 0.000164947509765625,
      "model_forward_time": 0.11546182632446289,
      "step": 27025
    },
    {
      "epoch": 0.000164947509765625,
      "step": 27025,
      "training_step_time": 0.4126710891723633
    },
    {
      "epoch": 0.00016495361328125,
      "model_forward_time": 0.11501860618591309,
      "step": 27026
    },
    {
      "epoch": 0.00016495361328125,
      "step": 27026,
      "training_step_time": 0.39105701446533203
    },
    {
      "epoch": 0.000164959716796875,
      "model_forward_time": 0.11562609672546387,
      "step": 27027
    },
    {
      "epoch": 0.000164959716796875,
      "step": 27027,
      "training_step_time": 0.38660645484924316
    },
    {
      "epoch": 0.0001649658203125,
      "model_forward_time": 0.11544656753540039,
      "step": 27028
    },
    {
      "epoch": 0.0001649658203125,
      "step": 27028,
      "training_step_time": 0.3939657211303711
    },
    {
      "epoch": 0.000164971923828125,
      "model_forward_time": 0.11734199523925781,
      "step": 27029
    },
    {
      "epoch": 0.000164971923828125,
      "step": 27029,
      "training_step_time": 0.41993188858032227
    },
    {
      "epoch": 0.00016497802734375,
      "grad_norm": 0.16061119735240936,
      "learning_rate": 6.219411391081055e-05,
      "loss": 0.0427,
      "step": 27030
    },
    {
      "epoch": 0.00016497802734375,
      "model_forward_time": 0.1257920265197754,
      "step": 27030
    },
    {
      "epoch": 0.00016497802734375,
      "step": 27030,
      "training_step_time": 0.49765515327453613
    },
    {
      "epoch": 0.000164984130859375,
      "model_forward_time": 0.12525272369384766,
      "step": 27031
    },
    {
      "epoch": 0.000164984130859375,
      "step": 27031,
      "training_step_time": 0.5099108219146729
    },
    {
      "epoch": 0.000164990234375,
      "model_forward_time": 0.11775565147399902,
      "step": 27032
    },
    {
      "epoch": 0.000164990234375,
      "step": 27032,
      "training_step_time": 0.3881514072418213
    },
    {
      "epoch": 0.000164996337890625,
      "model_forward_time": 0.11862897872924805,
      "step": 27033
    },
    {
      "epoch": 0.000164996337890625,
      "step": 27033,
      "training_step_time": 0.3860893249511719
    },
    {
      "epoch": 0.00016500244140625,
      "model_forward_time": 0.11789274215698242,
      "step": 27034
    },
    {
      "epoch": 0.00016500244140625,
      "step": 27034,
      "training_step_time": 0.4011397361755371
    },
    {
      "epoch": 0.000165008544921875,
      "model_forward_time": 0.1177213191986084,
      "step": 27035
    },
    {
      "epoch": 0.000165008544921875,
      "step": 27035,
      "training_step_time": 0.4385988712310791
    },
    {
      "epoch": 0.0001650146484375,
      "model_forward_time": 0.119140625,
      "step": 27036
    },
    {
      "epoch": 0.0001650146484375,
      "step": 27036,
      "training_step_time": 0.5018560886383057
    },
    {
      "epoch": 0.000165020751953125,
      "model_forward_time": 0.11736845970153809,
      "step": 27037
    },
    {
      "epoch": 0.000165020751953125,
      "step": 27037,
      "training_step_time": 0.46946096420288086
    },
    {
      "epoch": 0.00016502685546875,
      "model_forward_time": 0.11667346954345703,
      "step": 27038
    },
    {
      "epoch": 0.00016502685546875,
      "step": 27038,
      "training_step_time": 0.44914770126342773
    },
    {
      "epoch": 0.000165032958984375,
      "model_forward_time": 0.11824703216552734,
      "step": 27039
    },
    {
      "epoch": 0.000165032958984375,
      "step": 27039,
      "training_step_time": 0.37977099418640137
    },
    {
      "epoch": 0.0001650390625,
      "grad_norm": 0.14392147958278656,
      "learning_rate": 6.216738634274411e-05,
      "loss": 0.0413,
      "step": 27040
    },
    {
      "epoch": 0.0001650390625,
      "model_forward_time": 0.11590743064880371,
      "step": 27040
    },
    {
      "epoch": 0.0001650390625,
      "step": 27040,
      "training_step_time": 0.3885362148284912
    },
    {
      "epoch": 0.000165045166015625,
      "model_forward_time": 0.11562252044677734,
      "step": 27041
    },
    {
      "epoch": 0.000165045166015625,
      "step": 27041,
      "training_step_time": 0.4069936275482178
    },
    {
      "epoch": 0.00016505126953125,
      "model_forward_time": 0.11669278144836426,
      "step": 27042
    },
    {
      "epoch": 0.00016505126953125,
      "step": 27042,
      "training_step_time": 0.3955249786376953
    },
    {
      "epoch": 0.000165057373046875,
      "model_forward_time": 0.11738276481628418,
      "step": 27043
    },
    {
      "epoch": 0.000165057373046875,
      "step": 27043,
      "training_step_time": 0.450392484664917
    },
    {
      "epoch": 0.0001650634765625,
      "model_forward_time": 0.11664772033691406,
      "step": 27044
    },
    {
      "epoch": 0.0001650634765625,
      "step": 27044,
      "training_step_time": 0.4971320629119873
    },
    {
      "epoch": 0.000165069580078125,
      "model_forward_time": 0.11933445930480957,
      "step": 27045
    },
    {
      "epoch": 0.000165069580078125,
      "step": 27045,
      "training_step_time": 0.40215110778808594
    },
    {
      "epoch": 0.00016507568359375,
      "model_forward_time": 0.11731624603271484,
      "step": 27046
    },
    {
      "epoch": 0.00016507568359375,
      "step": 27046,
      "training_step_time": 0.45394182205200195
    },
    {
      "epoch": 0.000165081787109375,
      "model_forward_time": 0.12737059593200684,
      "step": 27047
    },
    {
      "epoch": 0.000165081787109375,
      "step": 27047,
      "training_step_time": 0.3892483711242676
    },
    {
      "epoch": 0.000165087890625,
      "model_forward_time": 0.1170501708984375,
      "step": 27048
    },
    {
      "epoch": 0.000165087890625,
      "step": 27048,
      "training_step_time": 0.4408149719238281
    },
    {
      "epoch": 0.000165093994140625,
      "model_forward_time": 0.11734604835510254,
      "step": 27049
    },
    {
      "epoch": 0.000165093994140625,
      "step": 27049,
      "training_step_time": 0.39603519439697266
    },
    {
      "epoch": 0.00016510009765625,
      "grad_norm": 0.20141789317131042,
      "learning_rate": 6.214065507854693e-05,
      "loss": 0.0426,
      "step": 27050
    },
    {
      "epoch": 0.00016510009765625,
      "model_forward_time": 0.11736464500427246,
      "step": 27050
    },
    {
      "epoch": 0.00016510009765625,
      "step": 27050,
      "training_step_time": 0.43871593475341797
    },
    {
      "epoch": 0.000165106201171875,
      "model_forward_time": 0.11614799499511719,
      "step": 27051
    },
    {
      "epoch": 0.000165106201171875,
      "step": 27051,
      "training_step_time": 0.3915669918060303
    },
    {
      "epoch": 0.0001651123046875,
      "model_forward_time": 0.11605215072631836,
      "step": 27052
    },
    {
      "epoch": 0.0001651123046875,
      "step": 27052,
      "training_step_time": 0.4164140224456787
    },
    {
      "epoch": 0.000165118408203125,
      "model_forward_time": 0.11574602127075195,
      "step": 27053
    },
    {
      "epoch": 0.000165118408203125,
      "step": 27053,
      "training_step_time": 0.3909780979156494
    },
    {
      "epoch": 0.00016512451171875,
      "model_forward_time": 0.11749744415283203,
      "step": 27054
    },
    {
      "epoch": 0.00016512451171875,
      "step": 27054,
      "training_step_time": 0.3987565040588379
    },
    {
      "epoch": 0.000165130615234375,
      "model_forward_time": 0.11590790748596191,
      "step": 27055
    },
    {
      "epoch": 0.000165130615234375,
      "step": 27055,
      "training_step_time": 0.4021120071411133
    },
    {
      "epoch": 0.00016513671875,
      "model_forward_time": 0.11685514450073242,
      "step": 27056
    },
    {
      "epoch": 0.00016513671875,
      "step": 27056,
      "training_step_time": 0.389554500579834
    },
    {
      "epoch": 0.000165142822265625,
      "model_forward_time": 0.11687111854553223,
      "step": 27057
    },
    {
      "epoch": 0.000165142822265625,
      "step": 27057,
      "training_step_time": 0.3987448215484619
    },
    {
      "epoch": 0.00016514892578125,
      "model_forward_time": 0.1175222396850586,
      "step": 27058
    },
    {
      "epoch": 0.00016514892578125,
      "step": 27058,
      "training_step_time": 0.3929729461669922
    },
    {
      "epoch": 0.000165155029296875,
      "model_forward_time": 0.11693167686462402,
      "step": 27059
    },
    {
      "epoch": 0.000165155029296875,
      "step": 27059,
      "training_step_time": 0.4393925666809082
    },
    {
      "epoch": 0.0001651611328125,
      "grad_norm": 0.11701865494251251,
      "learning_rate": 6.211392012633932e-05,
      "loss": 0.0416,
      "step": 27060
    },
    {
      "epoch": 0.0001651611328125,
      "model_forward_time": 0.11652040481567383,
      "step": 27060
    },
    {
      "epoch": 0.0001651611328125,
      "step": 27060,
      "training_step_time": 0.43743133544921875
    },
    {
      "epoch": 0.000165167236328125,
      "model_forward_time": 0.11716723442077637,
      "step": 27061
    },
    {
      "epoch": 0.000165167236328125,
      "step": 27061,
      "training_step_time": 0.4296128749847412
    },
    {
      "epoch": 0.00016517333984375,
      "model_forward_time": 0.11513686180114746,
      "step": 27062
    },
    {
      "epoch": 0.00016517333984375,
      "step": 27062,
      "training_step_time": 0.3924398422241211
    },
    {
      "epoch": 0.000165179443359375,
      "model_forward_time": 0.11663269996643066,
      "step": 27063
    },
    {
      "epoch": 0.000165179443359375,
      "step": 27063,
      "training_step_time": 0.400799036026001
    },
    {
      "epoch": 0.000165185546875,
      "model_forward_time": 0.11598825454711914,
      "step": 27064
    },
    {
      "epoch": 0.000165185546875,
      "step": 27064,
      "training_step_time": 0.3931915760040283
    },
    {
      "epoch": 0.000165191650390625,
      "model_forward_time": 0.11664056777954102,
      "step": 27065
    },
    {
      "epoch": 0.000165191650390625,
      "step": 27065,
      "training_step_time": 0.4831044673919678
    },
    {
      "epoch": 0.00016519775390625,
      "model_forward_time": 0.11652231216430664,
      "step": 27066
    },
    {
      "epoch": 0.00016519775390625,
      "step": 27066,
      "training_step_time": 0.4000532627105713
    },
    {
      "epoch": 0.000165203857421875,
      "model_forward_time": 0.11693930625915527,
      "step": 27067
    },
    {
      "epoch": 0.000165203857421875,
      "step": 27067,
      "training_step_time": 0.4249694347381592
    },
    {
      "epoch": 0.0001652099609375,
      "model_forward_time": 0.1163480281829834,
      "step": 27068
    },
    {
      "epoch": 0.0001652099609375,
      "step": 27068,
      "training_step_time": 0.41370582580566406
    },
    {
      "epoch": 0.000165216064453125,
      "model_forward_time": 0.11685991287231445,
      "step": 27069
    },
    {
      "epoch": 0.000165216064453125,
      "step": 27069,
      "training_step_time": 0.38543701171875
    },
    {
      "epoch": 0.00016522216796875,
      "grad_norm": 0.12514063715934753,
      "learning_rate": 6.208718149424263e-05,
      "loss": 0.0454,
      "step": 27070
    },
    {
      "epoch": 0.00016522216796875,
      "model_forward_time": 0.11559820175170898,
      "step": 27070
    },
    {
      "epoch": 0.00016522216796875,
      "step": 27070,
      "training_step_time": 0.3992910385131836
    },
    {
      "epoch": 0.000165228271484375,
      "model_forward_time": 0.11826872825622559,
      "step": 27071
    },
    {
      "epoch": 0.000165228271484375,
      "step": 27071,
      "training_step_time": 0.3882935047149658
    },
    {
      "epoch": 0.000165234375,
      "model_forward_time": 0.11618423461914062,
      "step": 27072
    },
    {
      "epoch": 0.000165234375,
      "step": 27072,
      "training_step_time": 0.4001023769378662
    },
    {
      "epoch": 0.000165240478515625,
      "model_forward_time": 0.11712384223937988,
      "step": 27073
    },
    {
      "epoch": 0.000165240478515625,
      "step": 27073,
      "training_step_time": 0.3955826759338379
    },
    {
      "epoch": 0.00016524658203125,
      "model_forward_time": 0.11626362800598145,
      "step": 27074
    },
    {
      "epoch": 0.00016524658203125,
      "step": 27074,
      "training_step_time": 0.42703700065612793
    },
    {
      "epoch": 0.000165252685546875,
      "model_forward_time": 0.11627006530761719,
      "step": 27075
    },
    {
      "epoch": 0.000165252685546875,
      "step": 27075,
      "training_step_time": 0.43772053718566895
    },
    {
      "epoch": 0.0001652587890625,
      "model_forward_time": 0.11587786674499512,
      "step": 27076
    },
    {
      "epoch": 0.0001652587890625,
      "step": 27076,
      "training_step_time": 0.47049498558044434
    },
    {
      "epoch": 0.000165264892578125,
      "model_forward_time": 0.11633563041687012,
      "step": 27077
    },
    {
      "epoch": 0.000165264892578125,
      "step": 27077,
      "training_step_time": 0.39002132415771484
    },
    {
      "epoch": 0.00016527099609375,
      "model_forward_time": 0.11700773239135742,
      "step": 27078
    },
    {
      "epoch": 0.00016527099609375,
      "step": 27078,
      "training_step_time": 0.39311647415161133
    },
    {
      "epoch": 0.000165277099609375,
      "model_forward_time": 0.11568570137023926,
      "step": 27079
    },
    {
      "epoch": 0.000165277099609375,
      "step": 27079,
      "training_step_time": 0.3972458839416504
    },
    {
      "epoch": 0.000165283203125,
      "grad_norm": 0.16050127148628235,
      "learning_rate": 6.206043919037933e-05,
      "loss": 0.0438,
      "step": 27080
    },
    {
      "epoch": 0.000165283203125,
      "model_forward_time": 0.11612462997436523,
      "step": 27080
    },
    {
      "epoch": 0.000165283203125,
      "step": 27080,
      "training_step_time": 0.39556431770324707
    },
    {
      "epoch": 0.000165289306640625,
      "model_forward_time": 0.11627364158630371,
      "step": 27081
    },
    {
      "epoch": 0.000165289306640625,
      "step": 27081,
      "training_step_time": 0.5302464962005615
    },
    {
      "epoch": 0.00016529541015625,
      "model_forward_time": 0.11560535430908203,
      "step": 27082
    },
    {
      "epoch": 0.00016529541015625,
      "step": 27082,
      "training_step_time": 0.38469719886779785
    },
    {
      "epoch": 0.000165301513671875,
      "model_forward_time": 0.11677694320678711,
      "step": 27083
    },
    {
      "epoch": 0.000165301513671875,
      "step": 27083,
      "training_step_time": 0.5858194828033447
    },
    {
      "epoch": 0.0001653076171875,
      "model_forward_time": 0.11541914939880371,
      "step": 27084
    },
    {
      "epoch": 0.0001653076171875,
      "step": 27084,
      "training_step_time": 0.38697338104248047
    },
    {
      "epoch": 0.000165313720703125,
      "model_forward_time": 0.11575770378112793,
      "step": 27085
    },
    {
      "epoch": 0.000165313720703125,
      "step": 27085,
      "training_step_time": 0.3968544006347656
    },
    {
      "epoch": 0.00016531982421875,
      "model_forward_time": 0.1159367561340332,
      "step": 27086
    },
    {
      "epoch": 0.00016531982421875,
      "step": 27086,
      "training_step_time": 0.3865315914154053
    },
    {
      "epoch": 0.000165325927734375,
      "model_forward_time": 0.11798477172851562,
      "step": 27087
    },
    {
      "epoch": 0.000165325927734375,
      "step": 27087,
      "training_step_time": 0.3924288749694824
    },
    {
      "epoch": 0.00016533203125,
      "model_forward_time": 0.1172645092010498,
      "step": 27088
    },
    {
      "epoch": 0.00016533203125,
      "step": 27088,
      "training_step_time": 0.40223050117492676
    },
    {
      "epoch": 0.000165338134765625,
      "model_forward_time": 0.11735677719116211,
      "step": 27089
    },
    {
      "epoch": 0.000165338134765625,
      "step": 27089,
      "training_step_time": 0.588994026184082
    },
    {
      "epoch": 0.00016534423828125,
      "grad_norm": 0.12697555124759674,
      "learning_rate": 6.203369322287306e-05,
      "loss": 0.0462,
      "step": 27090
    },
    {
      "epoch": 0.00016534423828125,
      "model_forward_time": 0.11762213706970215,
      "step": 27090
    },
    {
      "epoch": 0.00016534423828125,
      "step": 27090,
      "training_step_time": 0.3856039047241211
    },
    {
      "epoch": 0.000165350341796875,
      "model_forward_time": 0.11772584915161133,
      "step": 27091
    },
    {
      "epoch": 0.000165350341796875,
      "step": 27091,
      "training_step_time": 0.38590478897094727
    },
    {
      "epoch": 0.0001653564453125,
      "model_forward_time": 0.11748385429382324,
      "step": 27092
    },
    {
      "epoch": 0.0001653564453125,
      "step": 27092,
      "training_step_time": 0.3862767219543457
    },
    {
      "epoch": 0.000165362548828125,
      "model_forward_time": 0.11619257926940918,
      "step": 27093
    },
    {
      "epoch": 0.000165362548828125,
      "step": 27093,
      "training_step_time": 0.39451026916503906
    },
    {
      "epoch": 0.00016536865234375,
      "model_forward_time": 0.11640048027038574,
      "step": 27094
    },
    {
      "epoch": 0.00016536865234375,
      "step": 27094,
      "training_step_time": 0.3771212100982666
    },
    {
      "epoch": 0.000165374755859375,
      "model_forward_time": 0.11998629570007324,
      "step": 27095
    },
    {
      "epoch": 0.000165374755859375,
      "step": 27095,
      "training_step_time": 0.8954229354858398
    },
    {
      "epoch": 0.000165380859375,
      "model_forward_time": 0.11616683006286621,
      "step": 27096
    },
    {
      "epoch": 0.000165380859375,
      "step": 27096,
      "training_step_time": 0.38478732109069824
    },
    {
      "epoch": 0.000165386962890625,
      "model_forward_time": 0.11537599563598633,
      "step": 27097
    },
    {
      "epoch": 0.000165386962890625,
      "step": 27097,
      "training_step_time": 0.39198732376098633
    },
    {
      "epoch": 0.00016539306640625,
      "model_forward_time": 0.11650657653808594,
      "step": 27098
    },
    {
      "epoch": 0.00016539306640625,
      "step": 27098,
      "training_step_time": 0.38065147399902344
    },
    {
      "epoch": 0.000165399169921875,
      "model_forward_time": 0.1148066520690918,
      "step": 27099
    },
    {
      "epoch": 0.000165399169921875,
      "step": 27099,
      "training_step_time": 0.38178062438964844
    },
    {
      "epoch": 0.0001654052734375,
      "grad_norm": 0.1377277374267578,
      "learning_rate": 6.200694359984849e-05,
      "loss": 0.0449,
      "step": 27100
    },
    {
      "epoch": 0.0001654052734375,
      "model_forward_time": 0.1156003475189209,
      "step": 27100
    },
    {
      "epoch": 0.0001654052734375,
      "step": 27100,
      "training_step_time": 0.38466978073120117
    },
    {
      "epoch": 0.000165411376953125,
      "model_forward_time": 0.1158750057220459,
      "step": 27101
    },
    {
      "epoch": 0.000165411376953125,
      "step": 27101,
      "training_step_time": 0.5903608798980713
    },
    {
      "epoch": 0.00016541748046875,
      "model_forward_time": 0.11613607406616211,
      "step": 27102
    },
    {
      "epoch": 0.00016541748046875,
      "step": 27102,
      "training_step_time": 0.41550588607788086
    },
    {
      "epoch": 0.000165423583984375,
      "model_forward_time": 0.11603498458862305,
      "step": 27103
    },
    {
      "epoch": 0.000165423583984375,
      "step": 27103,
      "training_step_time": 0.4500906467437744
    },
    {
      "epoch": 0.0001654296875,
      "model_forward_time": 0.11861920356750488,
      "step": 27104
    },
    {
      "epoch": 0.0001654296875,
      "step": 27104,
      "training_step_time": 0.389176607131958
    },
    {
      "epoch": 0.000165435791015625,
      "model_forward_time": 0.11550045013427734,
      "step": 27105
    },
    {
      "epoch": 0.000165435791015625,
      "step": 27105,
      "training_step_time": 0.3891892433166504
    },
    {
      "epoch": 0.00016544189453125,
      "model_forward_time": 0.11616063117980957,
      "step": 27106
    },
    {
      "epoch": 0.00016544189453125,
      "step": 27106,
      "training_step_time": 0.4843716621398926
    },
    {
      "epoch": 0.000165447998046875,
      "model_forward_time": 0.11629152297973633,
      "step": 27107
    },
    {
      "epoch": 0.000165447998046875,
      "step": 27107,
      "training_step_time": 0.648909330368042
    },
    {
      "epoch": 0.0001654541015625,
      "model_forward_time": 0.11572265625,
      "step": 27108
    },
    {
      "epoch": 0.0001654541015625,
      "step": 27108,
      "training_step_time": 0.4271414279937744
    },
    {
      "epoch": 0.000165460205078125,
      "model_forward_time": 0.11580324172973633,
      "step": 27109
    },
    {
      "epoch": 0.000165460205078125,
      "step": 27109,
      "training_step_time": 0.4015383720397949
    },
    {
      "epoch": 0.00016546630859375,
      "grad_norm": 0.13466733694076538,
      "learning_rate": 6.19801903294315e-05,
      "loss": 0.0466,
      "step": 27110
    },
    {
      "epoch": 0.00016546630859375,
      "model_forward_time": 0.11694836616516113,
      "step": 27110
    },
    {
      "epoch": 0.00016546630859375,
      "step": 27110,
      "training_step_time": 0.39130187034606934
    },
    {
      "epoch": 0.000165472412109375,
      "model_forward_time": 0.11604499816894531,
      "step": 27111
    },
    {
      "epoch": 0.000165472412109375,
      "step": 27111,
      "training_step_time": 0.3825397491455078
    },
    {
      "epoch": 0.000165478515625,
      "model_forward_time": 0.12612104415893555,
      "step": 27112
    },
    {
      "epoch": 0.000165478515625,
      "step": 27112,
      "training_step_time": 0.3912804126739502
    },
    {
      "epoch": 0.000165484619140625,
      "model_forward_time": 0.11685347557067871,
      "step": 27113
    },
    {
      "epoch": 0.000165484619140625,
      "step": 27113,
      "training_step_time": 0.7438533306121826
    },
    {
      "epoch": 0.00016549072265625,
      "model_forward_time": 0.11533784866333008,
      "step": 27114
    },
    {
      "epoch": 0.00016549072265625,
      "step": 27114,
      "training_step_time": 0.36945295333862305
    },
    {
      "epoch": 0.000165496826171875,
      "model_forward_time": 0.11692333221435547,
      "step": 27115
    },
    {
      "epoch": 0.000165496826171875,
      "step": 27115,
      "training_step_time": 0.4396648406982422
    },
    {
      "epoch": 0.0001655029296875,
      "model_forward_time": 0.11573481559753418,
      "step": 27116
    },
    {
      "epoch": 0.0001655029296875,
      "step": 27116,
      "training_step_time": 0.47784948348999023
    },
    {
      "epoch": 0.000165509033203125,
      "model_forward_time": 0.11601924896240234,
      "step": 27117
    },
    {
      "epoch": 0.000165509033203125,
      "step": 27117,
      "training_step_time": 0.37914276123046875
    },
    {
      "epoch": 0.00016551513671875,
      "model_forward_time": 0.11561393737792969,
      "step": 27118
    },
    {
      "epoch": 0.00016551513671875,
      "step": 27118,
      "training_step_time": 0.3843710422515869
    },
    {
      "epoch": 0.000165521240234375,
      "model_forward_time": 0.1166079044342041,
      "step": 27119
    },
    {
      "epoch": 0.000165521240234375,
      "step": 27119,
      "training_step_time": 0.7165300846099854
    },
    {
      "epoch": 0.00016552734375,
      "grad_norm": 0.20208439230918884,
      "learning_rate": 6.195343341974899e-05,
      "loss": 0.0431,
      "step": 27120
    },
    {
      "epoch": 0.00016552734375,
      "model_forward_time": 0.11499309539794922,
      "step": 27120
    },
    {
      "epoch": 0.00016552734375,
      "step": 27120,
      "training_step_time": 0.44049668312072754
    },
    {
      "epoch": 0.000165533447265625,
      "model_forward_time": 0.11518597602844238,
      "step": 27121
    },
    {
      "epoch": 0.000165533447265625,
      "step": 27121,
      "training_step_time": 0.4655296802520752
    },
    {
      "epoch": 0.00016553955078125,
      "model_forward_time": 0.11462211608886719,
      "step": 27122
    },
    {
      "epoch": 0.00016553955078125,
      "step": 27122,
      "training_step_time": 0.38600587844848633
    },
    {
      "epoch": 0.000165545654296875,
      "model_forward_time": 0.11590361595153809,
      "step": 27123
    },
    {
      "epoch": 0.000165545654296875,
      "step": 27123,
      "training_step_time": 0.38373422622680664
    },
    {
      "epoch": 0.0001655517578125,
      "model_forward_time": 0.11562299728393555,
      "step": 27124
    },
    {
      "epoch": 0.0001655517578125,
      "step": 27124,
      "training_step_time": 0.391432523727417
    },
    {
      "epoch": 0.000165557861328125,
      "model_forward_time": 0.1162118911743164,
      "step": 27125
    },
    {
      "epoch": 0.000165557861328125,
      "step": 27125,
      "training_step_time": 0.48400306701660156
    },
    {
      "epoch": 0.00016556396484375,
      "model_forward_time": 0.11556363105773926,
      "step": 27126
    },
    {
      "epoch": 0.00016556396484375,
      "step": 27126,
      "training_step_time": 0.40058159828186035
    },
    {
      "epoch": 0.000165570068359375,
      "model_forward_time": 0.11597180366516113,
      "step": 27127
    },
    {
      "epoch": 0.000165570068359375,
      "step": 27127,
      "training_step_time": 0.4179525375366211
    },
    {
      "epoch": 0.000165576171875,
      "model_forward_time": 0.11592960357666016,
      "step": 27128
    },
    {
      "epoch": 0.000165576171875,
      "step": 27128,
      "training_step_time": 0.3734574317932129
    },
    {
      "epoch": 0.000165582275390625,
      "model_forward_time": 0.11559200286865234,
      "step": 27129
    },
    {
      "epoch": 0.000165582275390625,
      "step": 27129,
      "training_step_time": 0.45133447647094727
    },
    {
      "epoch": 0.00016558837890625,
      "grad_norm": 0.17611174285411835,
      "learning_rate": 6.192667287892905e-05,
      "loss": 0.0468,
      "step": 27130
    },
    {
      "epoch": 0.00016558837890625,
      "model_forward_time": 0.11543154716491699,
      "step": 27130
    },
    {
      "epoch": 0.00016558837890625,
      "step": 27130,
      "training_step_time": 0.3992624282836914
    },
    {
      "epoch": 0.000165594482421875,
      "model_forward_time": 0.1163477897644043,
      "step": 27131
    },
    {
      "epoch": 0.000165594482421875,
      "step": 27131,
      "training_step_time": 0.5016555786132812
    },
    {
      "epoch": 0.0001656005859375,
      "model_forward_time": 0.1164243221282959,
      "step": 27132
    },
    {
      "epoch": 0.0001656005859375,
      "step": 27132,
      "training_step_time": 0.40743207931518555
    },
    {
      "epoch": 0.000165606689453125,
      "model_forward_time": 0.11526107788085938,
      "step": 27133
    },
    {
      "epoch": 0.000165606689453125,
      "step": 27133,
      "training_step_time": 0.4419362545013428
    },
    {
      "epoch": 0.00016561279296875,
      "model_forward_time": 0.1153876781463623,
      "step": 27134
    },
    {
      "epoch": 0.00016561279296875,
      "step": 27134,
      "training_step_time": 0.4508094787597656
    },
    {
      "epoch": 0.000165618896484375,
      "model_forward_time": 0.11503386497497559,
      "step": 27135
    },
    {
      "epoch": 0.000165618896484375,
      "step": 27135,
      "training_step_time": 0.42385077476501465
    },
    {
      "epoch": 0.000165625,
      "model_forward_time": 0.11482858657836914,
      "step": 27136
    },
    {
      "epoch": 0.000165625,
      "step": 27136,
      "training_step_time": 0.4927995204925537
    },
    {
      "epoch": 0.000165631103515625,
      "model_forward_time": 0.11569333076477051,
      "step": 27137
    },
    {
      "epoch": 0.000165631103515625,
      "step": 27137,
      "training_step_time": 0.6672744750976562
    },
    {
      "epoch": 0.00016563720703125,
      "model_forward_time": 0.11503911018371582,
      "step": 27138
    },
    {
      "epoch": 0.00016563720703125,
      "step": 27138,
      "training_step_time": 0.3916175365447998
    },
    {
      "epoch": 0.000165643310546875,
      "model_forward_time": 0.11478829383850098,
      "step": 27139
    },
    {
      "epoch": 0.000165643310546875,
      "step": 27139,
      "training_step_time": 0.4204216003417969
    },
    {
      "epoch": 0.0001656494140625,
      "grad_norm": 0.09220810234546661,
      "learning_rate": 6.189990871510078e-05,
      "loss": 0.0433,
      "step": 27140
    },
    {
      "epoch": 0.0001656494140625,
      "model_forward_time": 0.11529874801635742,
      "step": 27140
    },
    {
      "epoch": 0.0001656494140625,
      "step": 27140,
      "training_step_time": 0.38103365898132324
    },
    {
      "epoch": 0.000165655517578125,
      "model_forward_time": 0.11631011962890625,
      "step": 27141
    },
    {
      "epoch": 0.000165655517578125,
      "step": 27141,
      "training_step_time": 0.388897180557251
    },
    {
      "epoch": 0.00016566162109375,
      "model_forward_time": 0.11486077308654785,
      "step": 27142
    },
    {
      "epoch": 0.00016566162109375,
      "step": 27142,
      "training_step_time": 0.4204535484313965
    },
    {
      "epoch": 0.000165667724609375,
      "model_forward_time": 0.11772322654724121,
      "step": 27143
    },
    {
      "epoch": 0.000165667724609375,
      "step": 27143,
      "training_step_time": 0.6795809268951416
    },
    {
      "epoch": 0.000165673828125,
      "model_forward_time": 0.11523675918579102,
      "step": 27144
    },
    {
      "epoch": 0.000165673828125,
      "step": 27144,
      "training_step_time": 0.4348776340484619
    },
    {
      "epoch": 0.000165679931640625,
      "model_forward_time": 0.1152501106262207,
      "step": 27145
    },
    {
      "epoch": 0.000165679931640625,
      "step": 27145,
      "training_step_time": 0.3930025100708008
    },
    {
      "epoch": 0.00016568603515625,
      "model_forward_time": 0.1151590347290039,
      "step": 27146
    },
    {
      "epoch": 0.00016568603515625,
      "step": 27146,
      "training_step_time": 0.38884973526000977
    },
    {
      "epoch": 0.000165692138671875,
      "model_forward_time": 0.11579084396362305,
      "step": 27147
    },
    {
      "epoch": 0.000165692138671875,
      "step": 27147,
      "training_step_time": 0.38321805000305176
    },
    {
      "epoch": 0.0001656982421875,
      "model_forward_time": 0.11539530754089355,
      "step": 27148
    },
    {
      "epoch": 0.0001656982421875,
      "step": 27148,
      "training_step_time": 0.48084497451782227
    },
    {
      "epoch": 0.000165704345703125,
      "model_forward_time": 0.11626434326171875,
      "step": 27149
    },
    {
      "epoch": 0.000165704345703125,
      "step": 27149,
      "training_step_time": 0.4777803421020508
    },
    {
      "epoch": 0.00016571044921875,
      "grad_norm": 0.14563478529453278,
      "learning_rate": 6.187314093639444e-05,
      "loss": 0.0473,
      "step": 27150
    },
    {
      "epoch": 0.00016571044921875,
      "model_forward_time": 0.11509418487548828,
      "step": 27150
    },
    {
      "epoch": 0.00016571044921875,
      "step": 27150,
      "training_step_time": 0.39099574089050293
    },
    {
      "epoch": 0.000165716552734375,
      "model_forward_time": 0.1155087947845459,
      "step": 27151
    },
    {
      "epoch": 0.000165716552734375,
      "step": 27151,
      "training_step_time": 0.400465726852417
    },
    {
      "epoch": 0.00016572265625,
      "model_forward_time": 0.11546492576599121,
      "step": 27152
    },
    {
      "epoch": 0.00016572265625,
      "step": 27152,
      "training_step_time": 0.41509485244750977
    },
    {
      "epoch": 0.000165728759765625,
      "model_forward_time": 0.11525607109069824,
      "step": 27153
    },
    {
      "epoch": 0.000165728759765625,
      "step": 27153,
      "training_step_time": 0.42301034927368164
    },
    {
      "epoch": 0.00016573486328125,
      "model_forward_time": 0.12444114685058594,
      "step": 27154
    },
    {
      "epoch": 0.00016573486328125,
      "step": 27154,
      "training_step_time": 0.39072680473327637
    },
    {
      "epoch": 0.000165740966796875,
      "model_forward_time": 0.11573243141174316,
      "step": 27155
    },
    {
      "epoch": 0.000165740966796875,
      "step": 27155,
      "training_step_time": 0.6858537197113037
    },
    {
      "epoch": 0.0001657470703125,
      "model_forward_time": 0.1154329776763916,
      "step": 27156
    },
    {
      "epoch": 0.0001657470703125,
      "step": 27156,
      "training_step_time": 0.4517509937286377
    },
    {
      "epoch": 0.000165753173828125,
      "model_forward_time": 0.11539912223815918,
      "step": 27157
    },
    {
      "epoch": 0.000165753173828125,
      "step": 27157,
      "training_step_time": 0.4816403388977051
    },
    {
      "epoch": 0.00016575927734375,
      "model_forward_time": 0.11582112312316895,
      "step": 27158
    },
    {
      "epoch": 0.00016575927734375,
      "step": 27158,
      "training_step_time": 0.41454076766967773
    },
    {
      "epoch": 0.000165765380859375,
      "model_forward_time": 0.11511945724487305,
      "step": 27159
    },
    {
      "epoch": 0.000165765380859375,
      "step": 27159,
      "training_step_time": 0.388547420501709
    },
    {
      "epoch": 0.000165771484375,
      "grad_norm": 0.16143639385700226,
      "learning_rate": 6.184636955094138e-05,
      "loss": 0.0423,
      "step": 27160
    },
    {
      "epoch": 0.000165771484375,
      "model_forward_time": 0.11517715454101562,
      "step": 27160
    },
    {
      "epoch": 0.000165771484375,
      "step": 27160,
      "training_step_time": 0.38173460960388184
    },
    {
      "epoch": 0.000165777587890625,
      "model_forward_time": 0.11528873443603516,
      "step": 27161
    },
    {
      "epoch": 0.000165777587890625,
      "step": 27161,
      "training_step_time": 0.5100798606872559
    },
    {
      "epoch": 0.00016578369140625,
      "model_forward_time": 0.11514782905578613,
      "step": 27162
    },
    {
      "epoch": 0.00016578369140625,
      "step": 27162,
      "training_step_time": 0.43817687034606934
    },
    {
      "epoch": 0.000165789794921875,
      "model_forward_time": 0.1150827407836914,
      "step": 27163
    },
    {
      "epoch": 0.000165789794921875,
      "step": 27163,
      "training_step_time": 0.5054824352264404
    },
    {
      "epoch": 0.0001657958984375,
      "model_forward_time": 0.11572837829589844,
      "step": 27164
    },
    {
      "epoch": 0.0001657958984375,
      "step": 27164,
      "training_step_time": 0.4409902095794678
    },
    {
      "epoch": 0.000165802001953125,
      "model_forward_time": 0.11551856994628906,
      "step": 27165
    },
    {
      "epoch": 0.000165802001953125,
      "step": 27165,
      "training_step_time": 0.44693946838378906
    },
    {
      "epoch": 0.00016580810546875,
      "model_forward_time": 0.11558985710144043,
      "step": 27166
    },
    {
      "epoch": 0.00016580810546875,
      "step": 27166,
      "training_step_time": 0.4212169647216797
    },
    {
      "epoch": 0.000165814208984375,
      "model_forward_time": 0.1153416633605957,
      "step": 27167
    },
    {
      "epoch": 0.000165814208984375,
      "step": 27167,
      "training_step_time": 0.5285613536834717
    },
    {
      "epoch": 0.0001658203125,
      "model_forward_time": 0.11459493637084961,
      "step": 27168
    },
    {
      "epoch": 0.0001658203125,
      "step": 27168,
      "training_step_time": 0.39569926261901855
    },
    {
      "epoch": 0.000165826416015625,
      "model_forward_time": 0.11490631103515625,
      "step": 27169
    },
    {
      "epoch": 0.000165826416015625,
      "step": 27169,
      "training_step_time": 0.3682823181152344
    },
    {
      "epoch": 0.00016583251953125,
      "grad_norm": 0.15435439348220825,
      "learning_rate": 6.181959456687402e-05,
      "loss": 0.0396,
      "step": 27170
    },
    {
      "epoch": 0.00016583251953125,
      "model_forward_time": 0.11540436744689941,
      "step": 27170
    },
    {
      "epoch": 0.00016583251953125,
      "step": 27170,
      "training_step_time": 0.43410587310791016
    },
    {
      "epoch": 0.000165838623046875,
      "model_forward_time": 0.11489176750183105,
      "step": 27171
    },
    {
      "epoch": 0.000165838623046875,
      "step": 27171,
      "training_step_time": 0.3987140655517578
    },
    {
      "epoch": 0.0001658447265625,
      "model_forward_time": 0.11529898643493652,
      "step": 27172
    },
    {
      "epoch": 0.0001658447265625,
      "step": 27172,
      "training_step_time": 0.40402746200561523
    },
    {
      "epoch": 0.000165850830078125,
      "model_forward_time": 0.11504435539245605,
      "step": 27173
    },
    {
      "epoch": 0.000165850830078125,
      "step": 27173,
      "training_step_time": 0.4756042957305908
    },
    {
      "epoch": 0.00016585693359375,
      "model_forward_time": 0.11546468734741211,
      "step": 27174
    },
    {
      "epoch": 0.00016585693359375,
      "step": 27174,
      "training_step_time": 0.3916754722595215
    },
    {
      "epoch": 0.000165863037109375,
      "model_forward_time": 0.11540365219116211,
      "step": 27175
    },
    {
      "epoch": 0.000165863037109375,
      "step": 27175,
      "training_step_time": 0.4483978748321533
    },
    {
      "epoch": 0.000165869140625,
      "model_forward_time": 0.11700820922851562,
      "step": 27176
    },
    {
      "epoch": 0.000165869140625,
      "step": 27176,
      "training_step_time": 0.5030148029327393
    },
    {
      "epoch": 0.000165875244140625,
      "model_forward_time": 0.11924624443054199,
      "step": 27177
    },
    {
      "epoch": 0.000165875244140625,
      "step": 27177,
      "training_step_time": 0.6686031818389893
    },
    {
      "epoch": 0.00016588134765625,
      "model_forward_time": 0.1187741756439209,
      "step": 27178
    },
    {
      "epoch": 0.00016588134765625,
      "step": 27178,
      "training_step_time": 0.6218466758728027
    },
    {
      "epoch": 0.000165887451171875,
      "model_forward_time": 0.11703848838806152,
      "step": 27179
    },
    {
      "epoch": 0.000165887451171875,
      "step": 27179,
      "training_step_time": 0.7062880992889404
    },
    {
      "epoch": 0.0001658935546875,
      "grad_norm": 0.13085225224494934,
      "learning_rate": 6.179281599232591e-05,
      "loss": 0.0506,
      "step": 27180
    },
    {
      "epoch": 0.0001658935546875,
      "model_forward_time": 0.12100458145141602,
      "step": 27180
    },
    {
      "epoch": 0.0001658935546875,
      "step": 27180,
      "training_step_time": 0.7237203121185303
    },
    {
      "epoch": 0.000165899658203125,
      "model_forward_time": 0.1189427375793457,
      "step": 27181
    },
    {
      "epoch": 0.000165899658203125,
      "step": 27181,
      "training_step_time": 0.6000466346740723
    },
    {
      "epoch": 0.00016590576171875,
      "model_forward_time": 0.12986135482788086,
      "step": 27182
    },
    {
      "epoch": 0.00016590576171875,
      "step": 27182,
      "training_step_time": 0.7498059272766113
    },
    {
      "epoch": 0.000165911865234375,
      "model_forward_time": 0.12018799781799316,
      "step": 27183
    },
    {
      "epoch": 0.000165911865234375,
      "step": 27183,
      "training_step_time": 0.7566637992858887
    },
    {
      "epoch": 0.00016591796875,
      "model_forward_time": 0.11770296096801758,
      "step": 27184
    },
    {
      "epoch": 0.00016591796875,
      "step": 27184,
      "training_step_time": 0.6299381256103516
    },
    {
      "epoch": 0.000165924072265625,
      "model_forward_time": 0.12001562118530273,
      "step": 27185
    },
    {
      "epoch": 0.000165924072265625,
      "step": 27185,
      "training_step_time": 0.6982722282409668
    },
    {
      "epoch": 0.00016593017578125,
      "model_forward_time": 0.11859703063964844,
      "step": 27186
    },
    {
      "epoch": 0.00016593017578125,
      "step": 27186,
      "training_step_time": 0.8023443222045898
    },
    {
      "epoch": 0.000165936279296875,
      "model_forward_time": 0.14088821411132812,
      "step": 27187
    },
    {
      "epoch": 0.000165936279296875,
      "step": 27187,
      "training_step_time": 0.6401338577270508
    },
    {
      "epoch": 0.0001659423828125,
      "model_forward_time": 0.11969161033630371,
      "step": 27188
    },
    {
      "epoch": 0.0001659423828125,
      "step": 27188,
      "training_step_time": 0.6350493431091309
    },
    {
      "epoch": 0.000165948486328125,
      "model_forward_time": 0.11975669860839844,
      "step": 27189
    },
    {
      "epoch": 0.000165948486328125,
      "step": 27189,
      "training_step_time": 0.7010989189147949
    },
    {
      "epoch": 0.00016595458984375,
      "grad_norm": 0.1932903528213501,
      "learning_rate": 6.17660338354317e-05,
      "loss": 0.0462,
      "step": 27190
    },
    {
      "epoch": 0.00016595458984375,
      "model_forward_time": 0.11983346939086914,
      "step": 27190
    },
    {
      "epoch": 0.00016595458984375,
      "step": 27190,
      "training_step_time": 0.6866817474365234
    },
    {
      "epoch": 0.000165960693359375,
      "model_forward_time": 0.11665582656860352,
      "step": 27191
    },
    {
      "epoch": 0.000165960693359375,
      "step": 27191,
      "training_step_time": 0.6968226432800293
    },
    {
      "epoch": 0.000165966796875,
      "model_forward_time": 0.12099695205688477,
      "step": 27192
    },
    {
      "epoch": 0.000165966796875,
      "step": 27192,
      "training_step_time": 0.6245770454406738
    },
    {
      "epoch": 0.000165972900390625,
      "model_forward_time": 0.11999917030334473,
      "step": 27193
    },
    {
      "epoch": 0.000165972900390625,
      "step": 27193,
      "training_step_time": 0.6431396007537842
    },
    {
      "epoch": 0.00016597900390625,
      "model_forward_time": 0.119720458984375,
      "step": 27194
    },
    {
      "epoch": 0.00016597900390625,
      "step": 27194,
      "training_step_time": 0.6216378211975098
    },
    {
      "epoch": 0.000165985107421875,
      "model_forward_time": 0.13359665870666504,
      "step": 27195
    },
    {
      "epoch": 0.000165985107421875,
      "step": 27195,
      "training_step_time": 0.7265350818634033
    },
    {
      "epoch": 0.0001659912109375,
      "model_forward_time": 0.12838029861450195,
      "step": 27196
    },
    {
      "epoch": 0.0001659912109375,
      "step": 27196,
      "training_step_time": 0.719130277633667
    },
    {
      "epoch": 0.000165997314453125,
      "model_forward_time": 0.1195676326751709,
      "step": 27197
    },
    {
      "epoch": 0.000165997314453125,
      "step": 27197,
      "training_step_time": 0.6581871509552002
    },
    {
      "epoch": 0.00016600341796875,
      "model_forward_time": 0.11765551567077637,
      "step": 27198
    },
    {
      "epoch": 0.00016600341796875,
      "step": 27198,
      "training_step_time": 0.7000501155853271
    },
    {
      "epoch": 0.000166009521484375,
      "model_forward_time": 0.11566948890686035,
      "step": 27199
    },
    {
      "epoch": 0.000166009521484375,
      "step": 27199,
      "training_step_time": 0.6421041488647461
    },
    {
      "epoch": 0.000166015625,
      "grad_norm": 0.14019916951656342,
      "learning_rate": 6.173924810432705e-05,
      "loss": 0.0511,
      "step": 27200
    },
    {
      "epoch": 0.000166015625,
      "model_forward_time": 0.12531757354736328,
      "step": 27200
    },
    {
      "epoch": 0.000166015625,
      "step": 27200,
      "training_step_time": 0.7778723239898682
    },
    {
      "epoch": 0.000166021728515625,
      "model_forward_time": 0.1198129653930664,
      "step": 27201
    },
    {
      "epoch": 0.000166021728515625,
      "step": 27201,
      "training_step_time": 0.780559778213501
    },
    {
      "epoch": 0.00016602783203125,
      "model_forward_time": 0.11837553977966309,
      "step": 27202
    },
    {
      "epoch": 0.00016602783203125,
      "step": 27202,
      "training_step_time": 0.7072184085845947
    },
    {
      "epoch": 0.000166033935546875,
      "model_forward_time": 0.11766242980957031,
      "step": 27203
    },
    {
      "epoch": 0.000166033935546875,
      "step": 27203,
      "training_step_time": 0.6867797374725342
    },
    {
      "epoch": 0.0001660400390625,
      "model_forward_time": 0.11890554428100586,
      "step": 27204
    },
    {
      "epoch": 0.0001660400390625,
      "step": 27204,
      "training_step_time": 0.6448695659637451
    },
    {
      "epoch": 0.000166046142578125,
      "model_forward_time": 0.1219637393951416,
      "step": 27205
    },
    {
      "epoch": 0.000166046142578125,
      "step": 27205,
      "training_step_time": 0.6594936847686768
    },
    {
      "epoch": 0.00016605224609375,
      "model_forward_time": 0.1208353042602539,
      "step": 27206
    },
    {
      "epoch": 0.00016605224609375,
      "step": 27206,
      "training_step_time": 0.6306076049804688
    },
    {
      "epoch": 0.000166058349609375,
      "model_forward_time": 0.12006878852844238,
      "step": 27207
    },
    {
      "epoch": 0.000166058349609375,
      "step": 27207,
      "training_step_time": 0.6930372714996338
    },
    {
      "epoch": 0.000166064453125,
      "model_forward_time": 0.1183781623840332,
      "step": 27208
    },
    {
      "epoch": 0.000166064453125,
      "step": 27208,
      "training_step_time": 0.6576411724090576
    },
    {
      "epoch": 0.000166070556640625,
      "model_forward_time": 0.11984848976135254,
      "step": 27209
    },
    {
      "epoch": 0.000166070556640625,
      "step": 27209,
      "training_step_time": 0.6432862281799316
    },
    {
      "epoch": 0.00016607666015625,
      "grad_norm": 0.16181862354278564,
      "learning_rate": 6.17124588071488e-05,
      "loss": 0.0489,
      "step": 27210
    },
    {
      "epoch": 0.00016607666015625,
      "model_forward_time": 0.12599492073059082,
      "step": 27210
    },
    {
      "epoch": 0.00016607666015625,
      "step": 27210,
      "training_step_time": 0.7321140766143799
    },
    {
      "epoch": 0.000166082763671875,
      "model_forward_time": 0.12652015686035156,
      "step": 27211
    },
    {
      "epoch": 0.000166082763671875,
      "step": 27211,
      "training_step_time": 0.7115120887756348
    },
    {
      "epoch": 0.0001660888671875,
      "model_forward_time": 0.12049293518066406,
      "step": 27212
    },
    {
      "epoch": 0.0001660888671875,
      "step": 27212,
      "training_step_time": 0.7188167572021484
    },
    {
      "epoch": 0.000166094970703125,
      "model_forward_time": 0.1193077564239502,
      "step": 27213
    },
    {
      "epoch": 0.000166094970703125,
      "step": 27213,
      "training_step_time": 0.6325275897979736
    },
    {
      "epoch": 0.00016610107421875,
      "model_forward_time": 0.11916303634643555,
      "step": 27214
    },
    {
      "epoch": 0.00016610107421875,
      "step": 27214,
      "training_step_time": 0.7513227462768555
    },
    {
      "epoch": 0.000166107177734375,
      "model_forward_time": 0.11948204040527344,
      "step": 27215
    },
    {
      "epoch": 0.000166107177734375,
      "step": 27215,
      "training_step_time": 0.6809229850769043
    },
    {
      "epoch": 0.00016611328125,
      "model_forward_time": 0.12506580352783203,
      "step": 27216
    },
    {
      "epoch": 0.00016611328125,
      "step": 27216,
      "training_step_time": 0.6387848854064941
    },
    {
      "epoch": 0.000166119384765625,
      "model_forward_time": 0.12282633781433105,
      "step": 27217
    },
    {
      "epoch": 0.000166119384765625,
      "step": 27217,
      "training_step_time": 0.6506109237670898
    },
    {
      "epoch": 0.00016612548828125,
      "model_forward_time": 0.12207293510437012,
      "step": 27218
    },
    {
      "epoch": 0.00016612548828125,
      "step": 27218,
      "training_step_time": 0.6672394275665283
    },
    {
      "epoch": 0.000166131591796875,
      "model_forward_time": 0.1196589469909668,
      "step": 27219
    },
    {
      "epoch": 0.000166131591796875,
      "step": 27219,
      "training_step_time": 0.7014853954315186
    },
    {
      "epoch": 0.0001661376953125,
      "grad_norm": 0.09277259558439255,
      "learning_rate": 6.168566595203479e-05,
      "loss": 0.0495,
      "step": 27220
    },
    {
      "epoch": 0.0001661376953125,
      "model_forward_time": 0.11806344985961914,
      "step": 27220
    },
    {
      "epoch": 0.0001661376953125,
      "step": 27220,
      "training_step_time": 0.7438094615936279
    },
    {
      "epoch": 0.000166143798828125,
      "model_forward_time": 0.11995148658752441,
      "step": 27221
    },
    {
      "epoch": 0.000166143798828125,
      "step": 27221,
      "training_step_time": 0.6498293876647949
    },
    {
      "epoch": 0.00016614990234375,
      "model_forward_time": 0.1181795597076416,
      "step": 27222
    },
    {
      "epoch": 0.00016614990234375,
      "step": 27222,
      "training_step_time": 0.7125289440155029
    },
    {
      "epoch": 0.000166156005859375,
      "model_forward_time": 0.11611652374267578,
      "step": 27223
    },
    {
      "epoch": 0.000166156005859375,
      "step": 27223,
      "training_step_time": 0.724717378616333
    },
    {
      "epoch": 0.000166162109375,
      "model_forward_time": 0.11917471885681152,
      "step": 27224
    },
    {
      "epoch": 0.000166162109375,
      "step": 27224,
      "training_step_time": 0.6722917556762695
    },
    {
      "epoch": 0.000166168212890625,
      "model_forward_time": 0.11619091033935547,
      "step": 27225
    },
    {
      "epoch": 0.000166168212890625,
      "step": 27225,
      "training_step_time": 0.6582245826721191
    },
    {
      "epoch": 0.00016617431640625,
      "model_forward_time": 0.12589383125305176,
      "step": 27226
    },
    {
      "epoch": 0.00016617431640625,
      "step": 27226,
      "training_step_time": 0.631319522857666
    },
    {
      "epoch": 0.000166180419921875,
      "model_forward_time": 0.1190481185913086,
      "step": 27227
    },
    {
      "epoch": 0.000166180419921875,
      "step": 27227,
      "training_step_time": 0.6626424789428711
    },
    {
      "epoch": 0.0001661865234375,
      "model_forward_time": 0.11973810195922852,
      "step": 27228
    },
    {
      "epoch": 0.0001661865234375,
      "step": 27228,
      "training_step_time": 0.6405735015869141
    },
    {
      "epoch": 0.000166192626953125,
      "model_forward_time": 0.1216118335723877,
      "step": 27229
    },
    {
      "epoch": 0.000166192626953125,
      "step": 27229,
      "training_step_time": 0.699437141418457
    },
    {
      "epoch": 0.00016619873046875,
      "grad_norm": 0.15407715737819672,
      "learning_rate": 6.165886954712401e-05,
      "loss": 0.0511,
      "step": 27230
    },
    {
      "epoch": 0.00016619873046875,
      "model_forward_time": 0.1207115650177002,
      "step": 27230
    },
    {
      "epoch": 0.00016619873046875,
      "step": 27230,
      "training_step_time": 0.7686381340026855
    },
    {
      "epoch": 0.000166204833984375,
      "model_forward_time": 0.11606812477111816,
      "step": 27231
    },
    {
      "epoch": 0.000166204833984375,
      "step": 27231,
      "training_step_time": 0.6556339263916016
    },
    {
      "epoch": 0.0001662109375,
      "model_forward_time": 0.12084293365478516,
      "step": 27232
    },
    {
      "epoch": 0.0001662109375,
      "step": 27232,
      "training_step_time": 0.6634430885314941
    },
    {
      "epoch": 0.000166217041015625,
      "model_forward_time": 0.1361403465270996,
      "step": 27233
    },
    {
      "epoch": 0.000166217041015625,
      "step": 27233,
      "training_step_time": 0.6814649105072021
    },
    {
      "epoch": 0.00016622314453125,
      "model_forward_time": 0.12121438980102539,
      "step": 27234
    },
    {
      "epoch": 0.00016622314453125,
      "step": 27234,
      "training_step_time": 0.6745932102203369
    },
    {
      "epoch": 0.000166229248046875,
      "model_forward_time": 0.11687564849853516,
      "step": 27235
    },
    {
      "epoch": 0.000166229248046875,
      "step": 27235,
      "training_step_time": 0.6390655040740967
    },
    {
      "epoch": 0.0001662353515625,
      "model_forward_time": 0.1259908676147461,
      "step": 27236
    },
    {
      "epoch": 0.0001662353515625,
      "step": 27236,
      "training_step_time": 0.7062487602233887
    },
    {
      "epoch": 0.000166241455078125,
      "model_forward_time": 0.12108349800109863,
      "step": 27237
    },
    {
      "epoch": 0.000166241455078125,
      "step": 27237,
      "training_step_time": 0.608487606048584
    },
    {
      "epoch": 0.00016624755859375,
      "model_forward_time": 0.12651824951171875,
      "step": 27238
    },
    {
      "epoch": 0.00016624755859375,
      "step": 27238,
      "training_step_time": 0.6941802501678467
    },
    {
      "epoch": 0.000166253662109375,
      "model_forward_time": 0.13603687286376953,
      "step": 27239
    },
    {
      "epoch": 0.000166253662109375,
      "step": 27239,
      "training_step_time": 0.6437954902648926
    },
    {
      "epoch": 0.000166259765625,
      "grad_norm": 0.1669512540102005,
      "learning_rate": 6.163206960055651e-05,
      "loss": 0.0519,
      "step": 27240
    },
    {
      "epoch": 0.000166259765625,
      "model_forward_time": 0.11843991279602051,
      "step": 27240
    },
    {
      "epoch": 0.000166259765625,
      "step": 27240,
      "training_step_time": 0.6953537464141846
    },
    {
      "epoch": 0.000166265869140625,
      "model_forward_time": 0.12013387680053711,
      "step": 27241
    },
    {
      "epoch": 0.000166265869140625,
      "step": 27241,
      "training_step_time": 0.7269001007080078
    },
    {
      "epoch": 0.00016627197265625,
      "model_forward_time": 0.12553763389587402,
      "step": 27242
    },
    {
      "epoch": 0.00016627197265625,
      "step": 27242,
      "training_step_time": 0.6101207733154297
    },
    {
      "epoch": 0.000166278076171875,
      "model_forward_time": 0.12220144271850586,
      "step": 27243
    },
    {
      "epoch": 0.000166278076171875,
      "step": 27243,
      "training_step_time": 0.5841457843780518
    },
    {
      "epoch": 0.0001662841796875,
      "model_forward_time": 0.1310274600982666,
      "step": 27244
    },
    {
      "epoch": 0.0001662841796875,
      "step": 27244,
      "training_step_time": 0.5624241828918457
    },
    {
      "epoch": 0.000166290283203125,
      "model_forward_time": 0.12052035331726074,
      "step": 27245
    },
    {
      "epoch": 0.000166290283203125,
      "step": 27245,
      "training_step_time": 0.5663223266601562
    },
    {
      "epoch": 0.00016629638671875,
      "model_forward_time": 0.11917281150817871,
      "step": 27246
    },
    {
      "epoch": 0.00016629638671875,
      "step": 27246,
      "training_step_time": 0.5935471057891846
    },
    {
      "epoch": 0.000166302490234375,
      "model_forward_time": 0.11795234680175781,
      "step": 27247
    },
    {
      "epoch": 0.000166302490234375,
      "step": 27247,
      "training_step_time": 0.5222077369689941
    },
    {
      "epoch": 0.00016630859375,
      "model_forward_time": 0.11856937408447266,
      "step": 27248
    },
    {
      "epoch": 0.00016630859375,
      "step": 27248,
      "training_step_time": 0.5610101222991943
    },
    {
      "epoch": 0.000166314697265625,
      "model_forward_time": 0.11710929870605469,
      "step": 27249
    },
    {
      "epoch": 0.000166314697265625,
      "step": 27249,
      "training_step_time": 0.5516061782836914
    },
    {
      "epoch": 0.00016632080078125,
      "grad_norm": 0.1546250730752945,
      "learning_rate": 6.16052661204734e-05,
      "loss": 0.0482,
      "step": 27250
    },
    {
      "epoch": 0.00016632080078125,
      "model_forward_time": 0.11612319946289062,
      "step": 27250
    },
    {
      "epoch": 0.00016632080078125,
      "step": 27250,
      "training_step_time": 0.5086421966552734
    },
    {
      "epoch": 0.000166326904296875,
      "model_forward_time": 0.1156160831451416,
      "step": 27251
    },
    {
      "epoch": 0.000166326904296875,
      "step": 27251,
      "training_step_time": 0.455904483795166
    },
    {
      "epoch": 0.0001663330078125,
      "model_forward_time": 0.11601567268371582,
      "step": 27252
    },
    {
      "epoch": 0.0001663330078125,
      "step": 27252,
      "training_step_time": 0.5421760082244873
    },
    {
      "epoch": 0.000166339111328125,
      "model_forward_time": 0.11814451217651367,
      "step": 27253
    },
    {
      "epoch": 0.000166339111328125,
      "step": 27253,
      "training_step_time": 0.48807716369628906
    },
    {
      "epoch": 0.00016634521484375,
      "model_forward_time": 0.1146996021270752,
      "step": 27254
    },
    {
      "epoch": 0.00016634521484375,
      "step": 27254,
      "training_step_time": 0.5217418670654297
    },
    {
      "epoch": 0.000166351318359375,
      "model_forward_time": 0.1147000789642334,
      "step": 27255
    },
    {
      "epoch": 0.000166351318359375,
      "step": 27255,
      "training_step_time": 0.3902285099029541
    },
    {
      "epoch": 0.000166357421875,
      "model_forward_time": 0.11469292640686035,
      "step": 27256
    },
    {
      "epoch": 0.000166357421875,
      "step": 27256,
      "training_step_time": 0.3998582363128662
    },
    {
      "epoch": 0.000166363525390625,
      "model_forward_time": 0.11471939086914062,
      "step": 27257
    },
    {
      "epoch": 0.000166363525390625,
      "step": 27257,
      "training_step_time": 0.3997979164123535
    },
    {
      "epoch": 0.00016636962890625,
      "model_forward_time": 0.1161646842956543,
      "step": 27258
    },
    {
      "epoch": 0.00016636962890625,
      "step": 27258,
      "training_step_time": 0.3998596668243408
    },
    {
      "epoch": 0.000166375732421875,
      "model_forward_time": 0.11500144004821777,
      "step": 27259
    },
    {
      "epoch": 0.000166375732421875,
      "step": 27259,
      "training_step_time": 0.4047260284423828
    },
    {
      "epoch": 0.0001663818359375,
      "grad_norm": 0.09848806262016296,
      "learning_rate": 6.157845911501684e-05,
      "loss": 0.0528,
      "step": 27260
    },
    {
      "epoch": 0.0001663818359375,
      "model_forward_time": 0.11679434776306152,
      "step": 27260
    },
    {
      "epoch": 0.0001663818359375,
      "step": 27260,
      "training_step_time": 0.3948495388031006
    },
    {
      "epoch": 0.000166387939453125,
      "model_forward_time": 0.11576223373413086,
      "step": 27261
    },
    {
      "epoch": 0.000166387939453125,
      "step": 27261,
      "training_step_time": 0.39954566955566406
    },
    {
      "epoch": 0.00016639404296875,
      "model_forward_time": 0.1155691146850586,
      "step": 27262
    },
    {
      "epoch": 0.00016639404296875,
      "step": 27262,
      "training_step_time": 0.4699077606201172
    },
    {
      "epoch": 0.000166400146484375,
      "model_forward_time": 0.11538481712341309,
      "step": 27263
    },
    {
      "epoch": 0.000166400146484375,
      "step": 27263,
      "training_step_time": 0.3639967441558838
    },
    {
      "epoch": 0.00016640625,
      "model_forward_time": 0.11591029167175293,
      "step": 27264
    },
    {
      "epoch": 0.00016640625,
      "step": 27264,
      "training_step_time": 0.49048352241516113
    },
    {
      "epoch": 0.000166412353515625,
      "model_forward_time": 0.1186525821685791,
      "step": 27265
    },
    {
      "epoch": 0.000166412353515625,
      "step": 27265,
      "training_step_time": 0.4008321762084961
    },
    {
      "epoch": 0.00016641845703125,
      "model_forward_time": 0.1179819107055664,
      "step": 27266
    },
    {
      "epoch": 0.00016641845703125,
      "step": 27266,
      "training_step_time": 0.3871779441833496
    },
    {
      "epoch": 0.000166424560546875,
      "model_forward_time": 0.11786818504333496,
      "step": 27267
    },
    {
      "epoch": 0.000166424560546875,
      "step": 27267,
      "training_step_time": 0.404468297958374
    },
    {
      "epoch": 0.0001664306640625,
      "model_forward_time": 0.11768817901611328,
      "step": 27268
    },
    {
      "epoch": 0.0001664306640625,
      "step": 27268,
      "training_step_time": 0.4300045967102051
    },
    {
      "epoch": 0.000166436767578125,
      "model_forward_time": 0.11800098419189453,
      "step": 27269
    },
    {
      "epoch": 0.000166436767578125,
      "step": 27269,
      "training_step_time": 0.38649773597717285
    },
    {
      "epoch": 0.00016644287109375,
      "grad_norm": 0.12293583154678345,
      "learning_rate": 6.155164859233012e-05,
      "loss": 0.0467,
      "step": 27270
    },
    {
      "epoch": 0.00016644287109375,
      "model_forward_time": 0.11583685874938965,
      "step": 27270
    },
    {
      "epoch": 0.00016644287109375,
      "step": 27270,
      "training_step_time": 0.40628695487976074
    },
    {
      "epoch": 0.000166448974609375,
      "model_forward_time": 0.11498808860778809,
      "step": 27271
    },
    {
      "epoch": 0.000166448974609375,
      "step": 27271,
      "training_step_time": 0.39830803871154785
    },
    {
      "epoch": 0.000166455078125,
      "model_forward_time": 0.11524510383605957,
      "step": 27272
    },
    {
      "epoch": 0.000166455078125,
      "step": 27272,
      "training_step_time": 0.39724254608154297
    },
    {
      "epoch": 0.000166461181640625,
      "model_forward_time": 0.11528897285461426,
      "step": 27273
    },
    {
      "epoch": 0.000166461181640625,
      "step": 27273,
      "training_step_time": 0.3962821960449219
    },
    {
      "epoch": 0.00016646728515625,
      "model_forward_time": 0.11559295654296875,
      "step": 27274
    },
    {
      "epoch": 0.00016646728515625,
      "step": 27274,
      "training_step_time": 0.4223318099975586
    },
    {
      "epoch": 0.000166473388671875,
      "model_forward_time": 0.11495304107666016,
      "step": 27275
    },
    {
      "epoch": 0.000166473388671875,
      "step": 27275,
      "training_step_time": 0.41498422622680664
    },
    {
      "epoch": 0.0001664794921875,
      "model_forward_time": 0.11589932441711426,
      "step": 27276
    },
    {
      "epoch": 0.0001664794921875,
      "step": 27276,
      "training_step_time": 0.3961310386657715
    },
    {
      "epoch": 0.000166485595703125,
      "model_forward_time": 0.11670613288879395,
      "step": 27277
    },
    {
      "epoch": 0.000166485595703125,
      "step": 27277,
      "training_step_time": 0.48316526412963867
    },
    {
      "epoch": 0.00016649169921875,
      "model_forward_time": 0.11605334281921387,
      "step": 27278
    },
    {
      "epoch": 0.00016649169921875,
      "step": 27278,
      "training_step_time": 0.4047415256500244
    },
    {
      "epoch": 0.000166497802734375,
      "model_forward_time": 0.11554718017578125,
      "step": 27279
    },
    {
      "epoch": 0.000166497802734375,
      "step": 27279,
      "training_step_time": 0.47260451316833496
    },
    {
      "epoch": 0.00016650390625,
      "grad_norm": 0.12122435122728348,
      "learning_rate": 6.152483456055756e-05,
      "loss": 0.052,
      "step": 27280
    },
    {
      "epoch": 0.00016650390625,
      "model_forward_time": 0.11517906188964844,
      "step": 27280
    },
    {
      "epoch": 0.00016650390625,
      "step": 27280,
      "training_step_time": 0.3937263488769531
    },
    {
      "epoch": 0.000166510009765625,
      "model_forward_time": 0.11542677879333496,
      "step": 27281
    },
    {
      "epoch": 0.000166510009765625,
      "step": 27281,
      "training_step_time": 0.43109798431396484
    },
    {
      "epoch": 0.00016651611328125,
      "model_forward_time": 0.11610174179077148,
      "step": 27282
    },
    {
      "epoch": 0.00016651611328125,
      "step": 27282,
      "training_step_time": 0.39256906509399414
    },
    {
      "epoch": 0.000166522216796875,
      "model_forward_time": 0.1161048412322998,
      "step": 27283
    },
    {
      "epoch": 0.000166522216796875,
      "step": 27283,
      "training_step_time": 0.41885876655578613
    },
    {
      "epoch": 0.0001665283203125,
      "model_forward_time": 0.11500859260559082,
      "step": 27284
    },
    {
      "epoch": 0.0001665283203125,
      "step": 27284,
      "training_step_time": 0.4107093811035156
    },
    {
      "epoch": 0.000166534423828125,
      "model_forward_time": 0.11571478843688965,
      "step": 27285
    },
    {
      "epoch": 0.000166534423828125,
      "step": 27285,
      "training_step_time": 0.4068124294281006
    },
    {
      "epoch": 0.00016654052734375,
      "model_forward_time": 0.11584877967834473,
      "step": 27286
    },
    {
      "epoch": 0.00016654052734375,
      "step": 27286,
      "training_step_time": 0.39844465255737305
    },
    {
      "epoch": 0.000166546630859375,
      "model_forward_time": 0.11575460433959961,
      "step": 27287
    },
    {
      "epoch": 0.000166546630859375,
      "step": 27287,
      "training_step_time": 0.40425682067871094
    },
    {
      "epoch": 0.000166552734375,
      "model_forward_time": 0.11591887474060059,
      "step": 27288
    },
    {
      "epoch": 0.000166552734375,
      "step": 27288,
      "training_step_time": 0.3823864459991455
    },
    {
      "epoch": 0.000166558837890625,
      "model_forward_time": 0.11571359634399414,
      "step": 27289
    },
    {
      "epoch": 0.000166558837890625,
      "step": 27289,
      "training_step_time": 0.4158468246459961
    },
    {
      "epoch": 0.00016656494140625,
      "grad_norm": 0.15095092356204987,
      "learning_rate": 6.149801702784456e-05,
      "loss": 0.0509,
      "step": 27290
    },
    {
      "epoch": 0.00016656494140625,
      "model_forward_time": 0.11541056632995605,
      "step": 27290
    },
    {
      "epoch": 0.00016656494140625,
      "step": 27290,
      "training_step_time": 0.4012289047241211
    },
    {
      "epoch": 0.000166571044921875,
      "model_forward_time": 0.1153726577758789,
      "step": 27291
    },
    {
      "epoch": 0.000166571044921875,
      "step": 27291,
      "training_step_time": 0.3850102424621582
    },
    {
      "epoch": 0.0001665771484375,
      "model_forward_time": 0.11511087417602539,
      "step": 27292
    },
    {
      "epoch": 0.0001665771484375,
      "step": 27292,
      "training_step_time": 0.4012947082519531
    },
    {
      "epoch": 0.000166583251953125,
      "model_forward_time": 0.1153862476348877,
      "step": 27293
    },
    {
      "epoch": 0.000166583251953125,
      "step": 27293,
      "training_step_time": 0.45282602310180664
    },
    {
      "epoch": 0.00016658935546875,
      "model_forward_time": 0.11522483825683594,
      "step": 27294
    },
    {
      "epoch": 0.00016658935546875,
      "step": 27294,
      "training_step_time": 0.5021257400512695
    },
    {
      "epoch": 0.000166595458984375,
      "model_forward_time": 0.11587715148925781,
      "step": 27295
    },
    {
      "epoch": 0.000166595458984375,
      "step": 27295,
      "training_step_time": 0.4298286437988281
    },
    {
      "epoch": 0.0001666015625,
      "model_forward_time": 0.11545801162719727,
      "step": 27296
    },
    {
      "epoch": 0.0001666015625,
      "step": 27296,
      "training_step_time": 0.5005593299865723
    },
    {
      "epoch": 0.000166607666015625,
      "model_forward_time": 0.11736035346984863,
      "step": 27297
    },
    {
      "epoch": 0.000166607666015625,
      "step": 27297,
      "training_step_time": 0.3912169933319092
    },
    {
      "epoch": 0.00016661376953125,
      "model_forward_time": 0.11552834510803223,
      "step": 27298
    },
    {
      "epoch": 0.00016661376953125,
      "step": 27298,
      "training_step_time": 0.4058091640472412
    },
    {
      "epoch": 0.000166619873046875,
      "model_forward_time": 0.11455154418945312,
      "step": 27299
    },
    {
      "epoch": 0.000166619873046875,
      "step": 27299,
      "training_step_time": 0.39377284049987793
    },
    {
      "epoch": 0.0001666259765625,
      "grad_norm": 0.12247274070978165,
      "learning_rate": 6.147119600233758e-05,
      "loss": 0.0505,
      "step": 27300
    },
    {
      "epoch": 0.0001666259765625,
      "model_forward_time": 0.1156606674194336,
      "step": 27300
    },
    {
      "epoch": 0.0001666259765625,
      "step": 27300,
      "training_step_time": 0.3954763412475586
    },
    {
      "epoch": 0.000166632080078125,
      "model_forward_time": 0.11542296409606934,
      "step": 27301
    },
    {
      "epoch": 0.000166632080078125,
      "step": 27301,
      "training_step_time": 0.39414143562316895
    },
    {
      "epoch": 0.00016663818359375,
      "model_forward_time": 0.1156005859375,
      "step": 27302
    },
    {
      "epoch": 0.00016663818359375,
      "step": 27302,
      "training_step_time": 0.4367940425872803
    },
    {
      "epoch": 0.000166644287109375,
      "model_forward_time": 0.11561417579650879,
      "step": 27303
    },
    {
      "epoch": 0.000166644287109375,
      "step": 27303,
      "training_step_time": 0.38857316970825195
    },
    {
      "epoch": 0.000166650390625,
      "model_forward_time": 0.11482095718383789,
      "step": 27304
    },
    {
      "epoch": 0.000166650390625,
      "step": 27304,
      "training_step_time": 0.4302530288696289
    },
    {
      "epoch": 0.000166656494140625,
      "model_forward_time": 0.11532330513000488,
      "step": 27305
    },
    {
      "epoch": 0.000166656494140625,
      "step": 27305,
      "training_step_time": 0.3992598056793213
    },
    {
      "epoch": 0.00016666259765625,
      "model_forward_time": 0.11597251892089844,
      "step": 27306
    },
    {
      "epoch": 0.00016666259765625,
      "step": 27306,
      "training_step_time": 0.4034717082977295
    },
    {
      "epoch": 0.000166668701171875,
      "model_forward_time": 0.11508703231811523,
      "step": 27307
    },
    {
      "epoch": 0.000166668701171875,
      "step": 27307,
      "training_step_time": 0.36580324172973633
    },
    {
      "epoch": 0.0001666748046875,
      "model_forward_time": 0.11484646797180176,
      "step": 27308
    },
    {
      "epoch": 0.0001666748046875,
      "step": 27308,
      "training_step_time": 0.45707106590270996
    },
    {
      "epoch": 0.000166680908203125,
      "model_forward_time": 0.11523079872131348,
      "step": 27309
    },
    {
      "epoch": 0.000166680908203125,
      "step": 27309,
      "training_step_time": 0.44597578048706055
    },
    {
      "epoch": 0.00016668701171875,
      "grad_norm": 0.11646618694067001,
      "learning_rate": 6.144437149218415e-05,
      "loss": 0.0456,
      "step": 27310
    },
    {
      "epoch": 0.00016668701171875,
      "model_forward_time": 0.1155707836151123,
      "step": 27310
    },
    {
      "epoch": 0.00016668701171875,
      "step": 27310,
      "training_step_time": 0.4020695686340332
    },
    {
      "epoch": 0.000166693115234375,
      "model_forward_time": 0.11514449119567871,
      "step": 27311
    },
    {
      "epoch": 0.000166693115234375,
      "step": 27311,
      "training_step_time": 0.4069709777832031
    },
    {
      "epoch": 0.00016669921875,
      "model_forward_time": 0.11531853675842285,
      "step": 27312
    },
    {
      "epoch": 0.00016669921875,
      "step": 27312,
      "training_step_time": 0.48749351501464844
    },
    {
      "epoch": 0.000166705322265625,
      "model_forward_time": 0.11525726318359375,
      "step": 27313
    },
    {
      "epoch": 0.000166705322265625,
      "step": 27313,
      "training_step_time": 0.39901304244995117
    },
    {
      "epoch": 0.00016671142578125,
      "model_forward_time": 0.11800765991210938,
      "step": 27314
    },
    {
      "epoch": 0.00016671142578125,
      "step": 27314,
      "training_step_time": 0.3870532512664795
    },
    {
      "epoch": 0.000166717529296875,
      "model_forward_time": 0.11507225036621094,
      "step": 27315
    },
    {
      "epoch": 0.000166717529296875,
      "step": 27315,
      "training_step_time": 0.3863992691040039
    },
    {
      "epoch": 0.0001667236328125,
      "model_forward_time": 0.11539554595947266,
      "step": 27316
    },
    {
      "epoch": 0.0001667236328125,
      "step": 27316,
      "training_step_time": 0.4014897346496582
    },
    {
      "epoch": 0.000166729736328125,
      "model_forward_time": 0.11494708061218262,
      "step": 27317
    },
    {
      "epoch": 0.000166729736328125,
      "step": 27317,
      "training_step_time": 0.42291736602783203
    },
    {
      "epoch": 0.00016673583984375,
      "model_forward_time": 0.1154789924621582,
      "step": 27318
    },
    {
      "epoch": 0.00016673583984375,
      "step": 27318,
      "training_step_time": 0.4040055274963379
    },
    {
      "epoch": 0.000166741943359375,
      "model_forward_time": 0.11492085456848145,
      "step": 27319
    },
    {
      "epoch": 0.000166741943359375,
      "step": 27319,
      "training_step_time": 0.3981337547302246
    },
    {
      "epoch": 0.000166748046875,
      "grad_norm": 0.11983133852481842,
      "learning_rate": 6.141754350553279e-05,
      "loss": 0.0477,
      "step": 27320
    },
    {
      "epoch": 0.000166748046875,
      "model_forward_time": 0.11582541465759277,
      "step": 27320
    },
    {
      "epoch": 0.000166748046875,
      "step": 27320,
      "training_step_time": 0.4274711608886719
    },
    {
      "epoch": 0.000166754150390625,
      "model_forward_time": 0.11535310745239258,
      "step": 27321
    },
    {
      "epoch": 0.000166754150390625,
      "step": 27321,
      "training_step_time": 0.4002106189727783
    },
    {
      "epoch": 0.00016676025390625,
      "model_forward_time": 0.11572909355163574,
      "step": 27322
    },
    {
      "epoch": 0.00016676025390625,
      "step": 27322,
      "training_step_time": 0.4535379409790039
    },
    {
      "epoch": 0.000166766357421875,
      "model_forward_time": 0.11517620086669922,
      "step": 27323
    },
    {
      "epoch": 0.000166766357421875,
      "step": 27323,
      "training_step_time": 0.46050548553466797
    },
    {
      "epoch": 0.0001667724609375,
      "model_forward_time": 0.1156926155090332,
      "step": 27324
    },
    {
      "epoch": 0.0001667724609375,
      "step": 27324,
      "training_step_time": 0.4295957088470459
    },
    {
      "epoch": 0.000166778564453125,
      "model_forward_time": 0.11510372161865234,
      "step": 27325
    },
    {
      "epoch": 0.000166778564453125,
      "step": 27325,
      "training_step_time": 0.43556976318359375
    },
    {
      "epoch": 0.00016678466796875,
      "model_forward_time": 0.11457586288452148,
      "step": 27326
    },
    {
      "epoch": 0.00016678466796875,
      "step": 27326,
      "training_step_time": 0.45353269577026367
    },
    {
      "epoch": 0.000166790771484375,
      "model_forward_time": 0.1152503490447998,
      "step": 27327
    },
    {
      "epoch": 0.000166790771484375,
      "step": 27327,
      "training_step_time": 0.39339303970336914
    },
    {
      "epoch": 0.000166796875,
      "model_forward_time": 0.11560535430908203,
      "step": 27328
    },
    {
      "epoch": 0.000166796875,
      "step": 27328,
      "training_step_time": 0.3940281867980957
    },
    {
      "epoch": 0.000166802978515625,
      "model_forward_time": 0.11541080474853516,
      "step": 27329
    },
    {
      "epoch": 0.000166802978515625,
      "step": 27329,
      "training_step_time": 0.38453125953674316
    },
    {
      "epoch": 0.00016680908203125,
      "grad_norm": 0.14027531445026398,
      "learning_rate": 6.13907120505332e-05,
      "loss": 0.0494,
      "step": 27330
    },
    {
      "epoch": 0.00016680908203125,
      "model_forward_time": 0.11512446403503418,
      "step": 27330
    },
    {
      "epoch": 0.00016680908203125,
      "step": 27330,
      "training_step_time": 0.4542100429534912
    },
    {
      "epoch": 0.000166815185546875,
      "model_forward_time": 0.1149442195892334,
      "step": 27331
    },
    {
      "epoch": 0.000166815185546875,
      "step": 27331,
      "training_step_time": 0.4037928581237793
    },
    {
      "epoch": 0.0001668212890625,
      "model_forward_time": 0.11474132537841797,
      "step": 27332
    },
    {
      "epoch": 0.0001668212890625,
      "step": 27332,
      "training_step_time": 0.3988323211669922
    },
    {
      "epoch": 0.000166827392578125,
      "model_forward_time": 0.11512136459350586,
      "step": 27333
    },
    {
      "epoch": 0.000166827392578125,
      "step": 27333,
      "training_step_time": 0.38874316215515137
    },
    {
      "epoch": 0.00016683349609375,
      "model_forward_time": 0.1154625415802002,
      "step": 27334
    },
    {
      "epoch": 0.00016683349609375,
      "step": 27334,
      "training_step_time": 0.39142560958862305
    },
    {
      "epoch": 0.000166839599609375,
      "model_forward_time": 0.1154778003692627,
      "step": 27335
    },
    {
      "epoch": 0.000166839599609375,
      "step": 27335,
      "training_step_time": 0.45281028747558594
    },
    {
      "epoch": 0.000166845703125,
      "model_forward_time": 0.11494755744934082,
      "step": 27336
    },
    {
      "epoch": 0.000166845703125,
      "step": 27336,
      "training_step_time": 0.39469003677368164
    },
    {
      "epoch": 0.000166851806640625,
      "model_forward_time": 0.11556529998779297,
      "step": 27337
    },
    {
      "epoch": 0.000166851806640625,
      "step": 27337,
      "training_step_time": 0.4670534133911133
    },
    {
      "epoch": 0.00016685791015625,
      "model_forward_time": 0.11490011215209961,
      "step": 27338
    },
    {
      "epoch": 0.00016685791015625,
      "step": 27338,
      "training_step_time": 0.4223041534423828
    },
    {
      "epoch": 0.000166864013671875,
      "model_forward_time": 0.11589527130126953,
      "step": 27339
    },
    {
      "epoch": 0.000166864013671875,
      "step": 27339,
      "training_step_time": 0.3866868019104004
    },
    {
      "epoch": 0.0001668701171875,
      "grad_norm": 0.1174837127327919,
      "learning_rate": 6.136387713533603e-05,
      "loss": 0.049,
      "step": 27340
    },
    {
      "epoch": 0.0001668701171875,
      "model_forward_time": 0.11572885513305664,
      "step": 27340
    },
    {
      "epoch": 0.0001668701171875,
      "step": 27340,
      "training_step_time": 0.44211244583129883
    },
    {
      "epoch": 0.000166876220703125,
      "model_forward_time": 0.1150350570678711,
      "step": 27341
    },
    {
      "epoch": 0.000166876220703125,
      "step": 27341,
      "training_step_time": 0.42528200149536133
    },
    {
      "epoch": 0.00016688232421875,
      "model_forward_time": 0.11521267890930176,
      "step": 27342
    },
    {
      "epoch": 0.00016688232421875,
      "step": 27342,
      "training_step_time": 0.40278148651123047
    },
    {
      "epoch": 0.000166888427734375,
      "model_forward_time": 0.11641168594360352,
      "step": 27343
    },
    {
      "epoch": 0.000166888427734375,
      "step": 27343,
      "training_step_time": 0.38832783699035645
    },
    {
      "epoch": 0.00016689453125,
      "model_forward_time": 0.11545157432556152,
      "step": 27344
    },
    {
      "epoch": 0.00016689453125,
      "step": 27344,
      "training_step_time": 0.38709092140197754
    },
    {
      "epoch": 0.000166900634765625,
      "model_forward_time": 0.11522102355957031,
      "step": 27345
    },
    {
      "epoch": 0.000166900634765625,
      "step": 27345,
      "training_step_time": 0.400540828704834
    },
    {
      "epoch": 0.00016690673828125,
      "model_forward_time": 0.11562919616699219,
      "step": 27346
    },
    {
      "epoch": 0.00016690673828125,
      "step": 27346,
      "training_step_time": 0.3904421329498291
    },
    {
      "epoch": 0.000166912841796875,
      "model_forward_time": 0.11568951606750488,
      "step": 27347
    },
    {
      "epoch": 0.000166912841796875,
      "step": 27347,
      "training_step_time": 0.38651084899902344
    },
    {
      "epoch": 0.0001669189453125,
      "model_forward_time": 0.11575651168823242,
      "step": 27348
    },
    {
      "epoch": 0.0001669189453125,
      "step": 27348,
      "training_step_time": 0.39566993713378906
    },
    {
      "epoch": 0.000166925048828125,
      "model_forward_time": 0.11722135543823242,
      "step": 27349
    },
    {
      "epoch": 0.000166925048828125,
      "step": 27349,
      "training_step_time": 0.3925936222076416
    },
    {
      "epoch": 0.00016693115234375,
      "grad_norm": 0.12333639711141586,
      "learning_rate": 6.133703876809305e-05,
      "loss": 0.0539,
      "step": 27350
    },
    {
      "epoch": 0.00016693115234375,
      "model_forward_time": 0.11562752723693848,
      "step": 27350
    },
    {
      "epoch": 0.00016693115234375,
      "step": 27350,
      "training_step_time": 0.449857234954834
    },
    {
      "epoch": 0.000166937255859375,
      "model_forward_time": 0.11461377143859863,
      "step": 27351
    },
    {
      "epoch": 0.000166937255859375,
      "step": 27351,
      "training_step_time": 0.47758960723876953
    },
    {
      "epoch": 0.000166943359375,
      "model_forward_time": 0.11483287811279297,
      "step": 27352
    },
    {
      "epoch": 0.000166943359375,
      "step": 27352,
      "training_step_time": 0.46346616744995117
    },
    {
      "epoch": 0.000166949462890625,
      "model_forward_time": 0.11497712135314941,
      "step": 27353
    },
    {
      "epoch": 0.000166949462890625,
      "step": 27353,
      "training_step_time": 0.4099307060241699
    },
    {
      "epoch": 0.00016695556640625,
      "model_forward_time": 0.11487102508544922,
      "step": 27354
    },
    {
      "epoch": 0.00016695556640625,
      "step": 27354,
      "training_step_time": 0.49375391006469727
    },
    {
      "epoch": 0.000166961669921875,
      "model_forward_time": 0.11438512802124023,
      "step": 27355
    },
    {
      "epoch": 0.000166961669921875,
      "step": 27355,
      "training_step_time": 0.4527726173400879
    },
    {
      "epoch": 0.0001669677734375,
      "model_forward_time": 0.11477112770080566,
      "step": 27356
    },
    {
      "epoch": 0.0001669677734375,
      "step": 27356,
      "training_step_time": 0.494534969329834
    },
    {
      "epoch": 0.000166973876953125,
      "model_forward_time": 0.11449694633483887,
      "step": 27357
    },
    {
      "epoch": 0.000166973876953125,
      "step": 27357,
      "training_step_time": 0.44551873207092285
    },
    {
      "epoch": 0.00016697998046875,
      "model_forward_time": 0.1149606704711914,
      "step": 27358
    },
    {
      "epoch": 0.00016697998046875,
      "step": 27358,
      "training_step_time": 0.3888270854949951
    },
    {
      "epoch": 0.000166986083984375,
      "model_forward_time": 0.11486005783081055,
      "step": 27359
    },
    {
      "epoch": 0.000166986083984375,
      "step": 27359,
      "training_step_time": 0.4019129276275635
    },
    {
      "epoch": 0.0001669921875,
      "grad_norm": 0.14786171913146973,
      "learning_rate": 6.131019695695702e-05,
      "loss": 0.0544,
      "step": 27360
    },
    {
      "epoch": 0.0001669921875,
      "model_forward_time": 0.11468982696533203,
      "step": 27360
    },
    {
      "epoch": 0.0001669921875,
      "step": 27360,
      "training_step_time": 0.39963340759277344
    },
    {
      "epoch": 0.000166998291015625,
      "model_forward_time": 0.11523985862731934,
      "step": 27361
    },
    {
      "epoch": 0.000166998291015625,
      "step": 27361,
      "training_step_time": 0.3957936763763428
    },
    {
      "epoch": 0.00016700439453125,
      "model_forward_time": 0.11511731147766113,
      "step": 27362
    },
    {
      "epoch": 0.00016700439453125,
      "step": 27362,
      "training_step_time": 0.3819394111633301
    },
    {
      "epoch": 0.000167010498046875,
      "model_forward_time": 0.11482930183410645,
      "step": 27363
    },
    {
      "epoch": 0.000167010498046875,
      "step": 27363,
      "training_step_time": 0.468977689743042
    },
    {
      "epoch": 0.0001670166015625,
      "model_forward_time": 0.11526703834533691,
      "step": 27364
    },
    {
      "epoch": 0.0001670166015625,
      "step": 27364,
      "training_step_time": 0.39786314964294434
    },
    {
      "epoch": 0.000167022705078125,
      "model_forward_time": 0.11475539207458496,
      "step": 27365
    },
    {
      "epoch": 0.000167022705078125,
      "step": 27365,
      "training_step_time": 0.3666090965270996
    },
    {
      "epoch": 0.00016702880859375,
      "model_forward_time": 0.11555600166320801,
      "step": 27366
    },
    {
      "epoch": 0.00016702880859375,
      "step": 27366,
      "training_step_time": 0.46379804611206055
    },
    {
      "epoch": 0.000167034912109375,
      "model_forward_time": 0.11460757255554199,
      "step": 27367
    },
    {
      "epoch": 0.000167034912109375,
      "step": 27367,
      "training_step_time": 0.41314005851745605
    },
    {
      "epoch": 0.000167041015625,
      "model_forward_time": 0.11568927764892578,
      "step": 27368
    },
    {
      "epoch": 0.000167041015625,
      "step": 27368,
      "training_step_time": 0.4882197380065918
    },
    {
      "epoch": 0.000167047119140625,
      "model_forward_time": 0.1153411865234375,
      "step": 27369
    },
    {
      "epoch": 0.000167047119140625,
      "step": 27369,
      "training_step_time": 0.40215253829956055
    },
    {
      "epoch": 0.00016705322265625,
      "grad_norm": 0.148872971534729,
      "learning_rate": 6.12833517100818e-05,
      "loss": 0.0509,
      "step": 27370
    },
    {
      "epoch": 0.00016705322265625,
      "model_forward_time": 0.1152951717376709,
      "step": 27370
    },
    {
      "epoch": 0.00016705322265625,
      "step": 27370,
      "training_step_time": 0.48742103576660156
    },
    {
      "epoch": 0.000167059326171875,
      "model_forward_time": 0.11492586135864258,
      "step": 27371
    },
    {
      "epoch": 0.000167059326171875,
      "step": 27371,
      "training_step_time": 0.41916370391845703
    },
    {
      "epoch": 0.0001670654296875,
      "model_forward_time": 0.11466526985168457,
      "step": 27372
    },
    {
      "epoch": 0.0001670654296875,
      "step": 27372,
      "training_step_time": 0.39294981956481934
    },
    {
      "epoch": 0.000167071533203125,
      "model_forward_time": 0.11476254463195801,
      "step": 27373
    },
    {
      "epoch": 0.000167071533203125,
      "step": 27373,
      "training_step_time": 0.3940722942352295
    },
    {
      "epoch": 0.00016707763671875,
      "model_forward_time": 0.11507749557495117,
      "step": 27374
    },
    {
      "epoch": 0.00016707763671875,
      "step": 27374,
      "training_step_time": 0.4015226364135742
    },
    {
      "epoch": 0.000167083740234375,
      "model_forward_time": 0.11513209342956543,
      "step": 27375
    },
    {
      "epoch": 0.000167083740234375,
      "step": 27375,
      "training_step_time": 0.6856119632720947
    },
    {
      "epoch": 0.00016708984375,
      "model_forward_time": 0.11464762687683105,
      "step": 27376
    },
    {
      "epoch": 0.00016708984375,
      "step": 27376,
      "training_step_time": 0.3906838893890381
    },
    {
      "epoch": 0.000167095947265625,
      "model_forward_time": 0.11472082138061523,
      "step": 27377
    },
    {
      "epoch": 0.000167095947265625,
      "step": 27377,
      "training_step_time": 0.38764166831970215
    },
    {
      "epoch": 0.00016710205078125,
      "model_forward_time": 0.11489629745483398,
      "step": 27378
    },
    {
      "epoch": 0.00016710205078125,
      "step": 27378,
      "training_step_time": 0.4177675247192383
    },
    {
      "epoch": 0.000167108154296875,
      "model_forward_time": 0.1150350570678711,
      "step": 27379
    },
    {
      "epoch": 0.000167108154296875,
      "step": 27379,
      "training_step_time": 0.3770639896392822
    },
    {
      "epoch": 0.0001671142578125,
      "grad_norm": 0.13455811142921448,
      "learning_rate": 6.125650303562221e-05,
      "loss": 0.0449,
      "step": 27380
    },
    {
      "epoch": 0.0001671142578125,
      "model_forward_time": 0.1146240234375,
      "step": 27380
    },
    {
      "epoch": 0.0001671142578125,
      "step": 27380,
      "training_step_time": 0.4349982738494873
    },
    {
      "epoch": 0.000167120361328125,
      "model_forward_time": 0.1146540641784668,
      "step": 27381
    },
    {
      "epoch": 0.000167120361328125,
      "step": 27381,
      "training_step_time": 0.6448891162872314
    },
    {
      "epoch": 0.00016712646484375,
      "model_forward_time": 0.11458778381347656,
      "step": 27382
    },
    {
      "epoch": 0.00016712646484375,
      "step": 27382,
      "training_step_time": 0.4090909957885742
    },
    {
      "epoch": 0.000167132568359375,
      "model_forward_time": 0.11435127258300781,
      "step": 27383
    },
    {
      "epoch": 0.000167132568359375,
      "step": 27383,
      "training_step_time": 0.5173461437225342
    },
    {
      "epoch": 0.000167138671875,
      "model_forward_time": 0.11477851867675781,
      "step": 27384
    },
    {
      "epoch": 0.000167138671875,
      "step": 27384,
      "training_step_time": 0.42667126655578613
    },
    {
      "epoch": 0.000167144775390625,
      "model_forward_time": 0.11422610282897949,
      "step": 27385
    },
    {
      "epoch": 0.000167144775390625,
      "step": 27385,
      "training_step_time": 0.38814878463745117
    },
    {
      "epoch": 0.00016715087890625,
      "model_forward_time": 0.11462855339050293,
      "step": 27386
    },
    {
      "epoch": 0.00016715087890625,
      "step": 27386,
      "training_step_time": 0.3952939510345459
    },
    {
      "epoch": 0.000167156982421875,
      "model_forward_time": 0.11457395553588867,
      "step": 27387
    },
    {
      "epoch": 0.000167156982421875,
      "step": 27387,
      "training_step_time": 0.46610260009765625
    },
    {
      "epoch": 0.0001671630859375,
      "model_forward_time": 0.11491703987121582,
      "step": 27388
    },
    {
      "epoch": 0.0001671630859375,
      "step": 27388,
      "training_step_time": 0.3988962173461914
    },
    {
      "epoch": 0.000167169189453125,
      "model_forward_time": 0.11489582061767578,
      "step": 27389
    },
    {
      "epoch": 0.000167169189453125,
      "step": 27389,
      "training_step_time": 0.3917708396911621
    },
    {
      "epoch": 0.00016717529296875,
      "grad_norm": 0.1646643429994583,
      "learning_rate": 6.122965094173424e-05,
      "loss": 0.0481,
      "step": 27390
    },
    {
      "epoch": 0.00016717529296875,
      "model_forward_time": 0.11458015441894531,
      "step": 27390
    },
    {
      "epoch": 0.00016717529296875,
      "step": 27390,
      "training_step_time": 0.3998396396636963
    },
    {
      "epoch": 0.000167181396484375,
      "model_forward_time": 0.11548542976379395,
      "step": 27391
    },
    {
      "epoch": 0.000167181396484375,
      "step": 27391,
      "training_step_time": 0.40807604789733887
    },
    {
      "epoch": 0.0001671875,
      "model_forward_time": 0.11522197723388672,
      "step": 27392
    },
    {
      "epoch": 0.0001671875,
      "step": 27392,
      "training_step_time": 0.4902822971343994
    },
    {
      "epoch": 0.000167193603515625,
      "model_forward_time": 0.11571383476257324,
      "step": 27393
    },
    {
      "epoch": 0.000167193603515625,
      "step": 27393,
      "training_step_time": 0.5480453968048096
    },
    {
      "epoch": 0.00016719970703125,
      "model_forward_time": 0.11545157432556152,
      "step": 27394
    },
    {
      "epoch": 0.00016719970703125,
      "step": 27394,
      "training_step_time": 0.42485904693603516
    },
    {
      "epoch": 0.000167205810546875,
      "model_forward_time": 0.11489176750183105,
      "step": 27395
    },
    {
      "epoch": 0.000167205810546875,
      "step": 27395,
      "training_step_time": 0.5032341480255127
    },
    {
      "epoch": 0.0001672119140625,
      "model_forward_time": 0.11472201347351074,
      "step": 27396
    },
    {
      "epoch": 0.0001672119140625,
      "step": 27396,
      "training_step_time": 0.45440125465393066
    },
    {
      "epoch": 0.000167218017578125,
      "model_forward_time": 0.11400413513183594,
      "step": 27397
    },
    {
      "epoch": 0.000167218017578125,
      "step": 27397,
      "training_step_time": 0.49611711502075195
    },
    {
      "epoch": 0.00016722412109375,
      "model_forward_time": 0.11462259292602539,
      "step": 27398
    },
    {
      "epoch": 0.00016722412109375,
      "step": 27398,
      "training_step_time": 0.39031291007995605
    },
    {
      "epoch": 0.000167230224609375,
      "model_forward_time": 0.11425304412841797,
      "step": 27399
    },
    {
      "epoch": 0.000167230224609375,
      "step": 27399,
      "training_step_time": 0.3936140537261963
    },
    {
      "epoch": 0.000167236328125,
      "grad_norm": 0.15344610810279846,
      "learning_rate": 6.12027954365748e-05,
      "loss": 0.0505,
      "step": 27400
    },
    {
      "epoch": 0.000167236328125,
      "model_forward_time": 0.11452937126159668,
      "step": 27400
    },
    {
      "epoch": 0.000167236328125,
      "step": 27400,
      "training_step_time": 0.4000120162963867
    },
    {
      "epoch": 0.000167242431640625,
      "model_forward_time": 0.11448812484741211,
      "step": 27401
    },
    {
      "epoch": 0.000167242431640625,
      "step": 27401,
      "training_step_time": 0.388761043548584
    },
    {
      "epoch": 0.00016724853515625,
      "model_forward_time": 0.1158285140991211,
      "step": 27402
    },
    {
      "epoch": 0.00016724853515625,
      "step": 27402,
      "training_step_time": 0.39646267890930176
    },
    {
      "epoch": 0.000167254638671875,
      "model_forward_time": 0.11521100997924805,
      "step": 27403
    },
    {
      "epoch": 0.000167254638671875,
      "step": 27403,
      "training_step_time": 0.40259885787963867
    },
    {
      "epoch": 0.0001672607421875,
      "model_forward_time": 0.11507892608642578,
      "step": 27404
    },
    {
      "epoch": 0.0001672607421875,
      "step": 27404,
      "training_step_time": 0.39905858039855957
    },
    {
      "epoch": 0.000167266845703125,
      "model_forward_time": 0.11542129516601562,
      "step": 27405
    },
    {
      "epoch": 0.000167266845703125,
      "step": 27405,
      "training_step_time": 0.635969877243042
    },
    {
      "epoch": 0.00016727294921875,
      "model_forward_time": 0.11526274681091309,
      "step": 27406
    },
    {
      "epoch": 0.00016727294921875,
      "step": 27406,
      "training_step_time": 0.4091198444366455
    },
    {
      "epoch": 0.000167279052734375,
      "model_forward_time": 0.11473894119262695,
      "step": 27407
    },
    {
      "epoch": 0.000167279052734375,
      "step": 27407,
      "training_step_time": 0.41654062271118164
    },
    {
      "epoch": 0.00016728515625,
      "model_forward_time": 0.11483430862426758,
      "step": 27408
    },
    {
      "epoch": 0.00016728515625,
      "step": 27408,
      "training_step_time": 0.4947788715362549
    },
    {
      "epoch": 0.000167291259765625,
      "model_forward_time": 0.11526942253112793,
      "step": 27409
    },
    {
      "epoch": 0.000167291259765625,
      "step": 27409,
      "training_step_time": 0.42287421226501465
    },
    {
      "epoch": 0.00016729736328125,
      "grad_norm": 0.1600673496723175,
      "learning_rate": 6.117593652830191e-05,
      "loss": 0.0479,
      "step": 27410
    },
    {
      "epoch": 0.00016729736328125,
      "model_forward_time": 0.11472892761230469,
      "step": 27410
    },
    {
      "epoch": 0.00016729736328125,
      "step": 27410,
      "training_step_time": 0.39841580390930176
    },
    {
      "epoch": 0.000167303466796875,
      "model_forward_time": 0.11529779434204102,
      "step": 27411
    },
    {
      "epoch": 0.000167303466796875,
      "step": 27411,
      "training_step_time": 0.4613914489746094
    },
    {
      "epoch": 0.0001673095703125,
      "model_forward_time": 0.11475872993469238,
      "step": 27412
    },
    {
      "epoch": 0.0001673095703125,
      "step": 27412,
      "training_step_time": 0.40053248405456543
    },
    {
      "epoch": 0.000167315673828125,
      "model_forward_time": 0.11454582214355469,
      "step": 27413
    },
    {
      "epoch": 0.000167315673828125,
      "step": 27413,
      "training_step_time": 0.3924689292907715
    },
    {
      "epoch": 0.00016732177734375,
      "model_forward_time": 0.11556220054626465,
      "step": 27414
    },
    {
      "epoch": 0.00016732177734375,
      "step": 27414,
      "training_step_time": 0.36745119094848633
    },
    {
      "epoch": 0.000167327880859375,
      "model_forward_time": 0.11575698852539062,
      "step": 27415
    },
    {
      "epoch": 0.000167327880859375,
      "step": 27415,
      "training_step_time": 0.3906877040863037
    },
    {
      "epoch": 0.000167333984375,
      "model_forward_time": 0.11557316780090332,
      "step": 27416
    },
    {
      "epoch": 0.000167333984375,
      "step": 27416,
      "training_step_time": 0.38426661491394043
    },
    {
      "epoch": 0.000167340087890625,
      "model_forward_time": 0.11482834815979004,
      "step": 27417
    },
    {
      "epoch": 0.000167340087890625,
      "step": 27417,
      "training_step_time": 0.6317040920257568
    },
    {
      "epoch": 0.00016734619140625,
      "model_forward_time": 0.11639189720153809,
      "step": 27418
    },
    {
      "epoch": 0.00016734619140625,
      "step": 27418,
      "training_step_time": 0.38820981979370117
    },
    {
      "epoch": 0.000167352294921875,
      "model_forward_time": 0.11487531661987305,
      "step": 27419
    },
    {
      "epoch": 0.000167352294921875,
      "step": 27419,
      "training_step_time": 0.40406012535095215
    },
    {
      "epoch": 0.0001673583984375,
      "grad_norm": 0.20569033920764923,
      "learning_rate": 6.11490742250746e-05,
      "loss": 0.0512,
      "step": 27420
    },
    {
      "epoch": 0.0001673583984375,
      "model_forward_time": 0.11497902870178223,
      "step": 27420
    },
    {
      "epoch": 0.0001673583984375,
      "step": 27420,
      "training_step_time": 0.38765621185302734
    },
    {
      "epoch": 0.000167364501953125,
      "model_forward_time": 0.11412525177001953,
      "step": 27421
    },
    {
      "epoch": 0.000167364501953125,
      "step": 27421,
      "training_step_time": 0.38310885429382324
    },
    {
      "epoch": 0.00016737060546875,
      "model_forward_time": 0.11543822288513184,
      "step": 27422
    },
    {
      "epoch": 0.00016737060546875,
      "step": 27422,
      "training_step_time": 0.46338939666748047
    },
    {
      "epoch": 0.000167376708984375,
      "model_forward_time": 0.11491036415100098,
      "step": 27423
    },
    {
      "epoch": 0.000167376708984375,
      "step": 27423,
      "training_step_time": 0.49549198150634766
    },
    {
      "epoch": 0.0001673828125,
      "model_forward_time": 0.11511754989624023,
      "step": 27424
    },
    {
      "epoch": 0.0001673828125,
      "step": 27424,
      "training_step_time": 0.39754700660705566
    },
    {
      "epoch": 0.000167388916015625,
      "model_forward_time": 0.11503982543945312,
      "step": 27425
    },
    {
      "epoch": 0.000167388916015625,
      "step": 27425,
      "training_step_time": 0.46284937858581543
    },
    {
      "epoch": 0.00016739501953125,
      "model_forward_time": 0.1146399974822998,
      "step": 27426
    },
    {
      "epoch": 0.00016739501953125,
      "step": 27426,
      "training_step_time": 0.39421892166137695
    },
    {
      "epoch": 0.000167401123046875,
      "model_forward_time": 0.11513638496398926,
      "step": 27427
    },
    {
      "epoch": 0.000167401123046875,
      "step": 27427,
      "training_step_time": 0.39502620697021484
    },
    {
      "epoch": 0.0001674072265625,
      "model_forward_time": 0.11559844017028809,
      "step": 27428
    },
    {
      "epoch": 0.0001674072265625,
      "step": 27428,
      "training_step_time": 0.3787269592285156
    },
    {
      "epoch": 0.000167413330078125,
      "model_forward_time": 0.11537504196166992,
      "step": 27429
    },
    {
      "epoch": 0.000167413330078125,
      "step": 27429,
      "training_step_time": 0.6009771823883057
    },
    {
      "epoch": 0.00016741943359375,
      "grad_norm": 0.10582298040390015,
      "learning_rate": 6.112220853505288e-05,
      "loss": 0.0444,
      "step": 27430
    },
    {
      "epoch": 0.00016741943359375,
      "model_forward_time": 0.11515474319458008,
      "step": 27430
    },
    {
      "epoch": 0.00016741943359375,
      "step": 27430,
      "training_step_time": 0.3861110210418701
    },
    {
      "epoch": 0.000167425537109375,
      "model_forward_time": 0.11497688293457031,
      "step": 27431
    },
    {
      "epoch": 0.000167425537109375,
      "step": 27431,
      "training_step_time": 0.39626026153564453
    },
    {
      "epoch": 0.000167431640625,
      "model_forward_time": 0.11514139175415039,
      "step": 27432
    },
    {
      "epoch": 0.000167431640625,
      "step": 27432,
      "training_step_time": 0.39723753929138184
    },
    {
      "epoch": 0.000167437744140625,
      "model_forward_time": 0.11469554901123047,
      "step": 27433
    },
    {
      "epoch": 0.000167437744140625,
      "step": 27433,
      "training_step_time": 0.3920304775238037
    },
    {
      "epoch": 0.00016744384765625,
      "model_forward_time": 0.11546158790588379,
      "step": 27434
    },
    {
      "epoch": 0.00016744384765625,
      "step": 27434,
      "training_step_time": 0.38595032691955566
    },
    {
      "epoch": 0.000167449951171875,
      "model_forward_time": 0.11536622047424316,
      "step": 27435
    },
    {
      "epoch": 0.000167449951171875,
      "step": 27435,
      "training_step_time": 0.6995398998260498
    },
    {
      "epoch": 0.0001674560546875,
      "model_forward_time": 0.11478972434997559,
      "step": 27436
    },
    {
      "epoch": 0.0001674560546875,
      "step": 27436,
      "training_step_time": 0.4294929504394531
    },
    {
      "epoch": 0.000167462158203125,
      "model_forward_time": 0.1149747371673584,
      "step": 27437
    },
    {
      "epoch": 0.000167462158203125,
      "step": 27437,
      "training_step_time": 0.4503307342529297
    },
    {
      "epoch": 0.00016746826171875,
      "model_forward_time": 0.11481428146362305,
      "step": 27438
    },
    {
      "epoch": 0.00016746826171875,
      "step": 27438,
      "training_step_time": 0.4082024097442627
    },
    {
      "epoch": 0.000167474365234375,
      "model_forward_time": 0.11476254463195801,
      "step": 27439
    },
    {
      "epoch": 0.000167474365234375,
      "step": 27439,
      "training_step_time": 0.4025084972381592
    },
    {
      "epoch": 0.00016748046875,
      "grad_norm": 0.1194121316075325,
      "learning_rate": 6.10953394663979e-05,
      "loss": 0.0441,
      "step": 27440
    },
    {
      "epoch": 0.00016748046875,
      "model_forward_time": 0.11431264877319336,
      "step": 27440
    },
    {
      "epoch": 0.00016748046875,
      "step": 27440,
      "training_step_time": 0.3828849792480469
    },
    {
      "epoch": 0.000167486572265625,
      "model_forward_time": 0.11520957946777344,
      "step": 27441
    },
    {
      "epoch": 0.000167486572265625,
      "step": 27441,
      "training_step_time": 0.5179767608642578
    },
    {
      "epoch": 0.00016749267578125,
      "model_forward_time": 0.11509847640991211,
      "step": 27442
    },
    {
      "epoch": 0.00016749267578125,
      "step": 27442,
      "training_step_time": 0.38980627059936523
    },
    {
      "epoch": 0.000167498779296875,
      "model_forward_time": 0.11529922485351562,
      "step": 27443
    },
    {
      "epoch": 0.000167498779296875,
      "step": 27443,
      "training_step_time": 0.3900160789489746
    },
    {
      "epoch": 0.0001675048828125,
      "model_forward_time": 0.11554479598999023,
      "step": 27444
    },
    {
      "epoch": 0.0001675048828125,
      "step": 27444,
      "training_step_time": 0.38671112060546875
    },
    {
      "epoch": 0.000167510986328125,
      "model_forward_time": 0.1149909496307373,
      "step": 27445
    },
    {
      "epoch": 0.000167510986328125,
      "step": 27445,
      "training_step_time": 0.39719271659851074
    },
    {
      "epoch": 0.00016751708984375,
      "model_forward_time": 0.11624836921691895,
      "step": 27446
    },
    {
      "epoch": 0.00016751708984375,
      "step": 27446,
      "training_step_time": 0.38598132133483887
    },
    {
      "epoch": 0.000167523193359375,
      "model_forward_time": 0.11540389060974121,
      "step": 27447
    },
    {
      "epoch": 0.000167523193359375,
      "step": 27447,
      "training_step_time": 0.6830184459686279
    },
    {
      "epoch": 0.000167529296875,
      "model_forward_time": 0.11472010612487793,
      "step": 27448
    },
    {
      "epoch": 0.000167529296875,
      "step": 27448,
      "training_step_time": 0.3867685794830322
    },
    {
      "epoch": 0.000167535400390625,
      "model_forward_time": 0.11469578742980957,
      "step": 27449
    },
    {
      "epoch": 0.000167535400390625,
      "step": 27449,
      "training_step_time": 0.3629791736602783
    },
    {
      "epoch": 0.00016754150390625,
      "grad_norm": 0.17399494349956512,
      "learning_rate": 6.106846702727172e-05,
      "loss": 0.0469,
      "step": 27450
    },
    {
      "epoch": 0.00016754150390625,
      "model_forward_time": 0.11496329307556152,
      "step": 27450
    },
    {
      "epoch": 0.00016754150390625,
      "step": 27450,
      "training_step_time": 0.5204751491546631
    },
    {
      "epoch": 0.000167547607421875,
      "model_forward_time": 0.11447620391845703,
      "step": 27451
    },
    {
      "epoch": 0.000167547607421875,
      "step": 27451,
      "training_step_time": 0.479525089263916
    },
    {
      "epoch": 0.0001675537109375,
      "model_forward_time": 0.11427855491638184,
      "step": 27452
    },
    {
      "epoch": 0.0001675537109375,
      "step": 27452,
      "training_step_time": 0.4617600440979004
    },
    {
      "epoch": 0.000167559814453125,
      "model_forward_time": 0.11493444442749023,
      "step": 27453
    },
    {
      "epoch": 0.000167559814453125,
      "step": 27453,
      "training_step_time": 0.38596272468566895
    },
    {
      "epoch": 0.00016756591796875,
      "model_forward_time": 0.11497974395751953,
      "step": 27454
    },
    {
      "epoch": 0.00016756591796875,
      "step": 27454,
      "training_step_time": 0.3917698860168457
    },
    {
      "epoch": 0.000167572021484375,
      "model_forward_time": 0.1145775318145752,
      "step": 27455
    },
    {
      "epoch": 0.000167572021484375,
      "step": 27455,
      "training_step_time": 0.39116621017456055
    },
    {
      "epoch": 0.000167578125,
      "model_forward_time": 0.1150522232055664,
      "step": 27456
    },
    {
      "epoch": 0.000167578125,
      "step": 27456,
      "training_step_time": 0.3849828243255615
    },
    {
      "epoch": 0.000167584228515625,
      "model_forward_time": 0.11500000953674316,
      "step": 27457
    },
    {
      "epoch": 0.000167584228515625,
      "step": 27457,
      "training_step_time": 0.403334379196167
    },
    {
      "epoch": 0.00016759033203125,
      "model_forward_time": 0.11564040184020996,
      "step": 27458
    },
    {
      "epoch": 0.00016759033203125,
      "step": 27458,
      "training_step_time": 0.4037160873413086
    },
    {
      "epoch": 0.000167596435546875,
      "model_forward_time": 0.11563706398010254,
      "step": 27459
    },
    {
      "epoch": 0.000167596435546875,
      "step": 27459,
      "training_step_time": 0.4334886074066162
    },
    {
      "epoch": 0.0001676025390625,
      "grad_norm": 0.10079830139875412,
      "learning_rate": 6.104159122583752e-05,
      "loss": 0.0512,
      "step": 27460
    },
    {
      "epoch": 0.0001676025390625,
      "model_forward_time": 0.11483168601989746,
      "step": 27460
    },
    {
      "epoch": 0.0001676025390625,
      "step": 27460,
      "training_step_time": 0.3855855464935303
    },
    {
      "epoch": 0.000167608642578125,
      "model_forward_time": 0.11561441421508789,
      "step": 27461
    },
    {
      "epoch": 0.000167608642578125,
      "step": 27461,
      "training_step_time": 0.4180152416229248
    },
    {
      "epoch": 0.00016761474609375,
      "model_forward_time": 0.11511540412902832,
      "step": 27462
    },
    {
      "epoch": 0.00016761474609375,
      "step": 27462,
      "training_step_time": 0.3922252655029297
    },
    {
      "epoch": 0.000167620849609375,
      "model_forward_time": 0.11530780792236328,
      "step": 27463
    },
    {
      "epoch": 0.000167620849609375,
      "step": 27463,
      "training_step_time": 0.5053324699401855
    },
    {
      "epoch": 0.000167626953125,
      "model_forward_time": 0.11463761329650879,
      "step": 27464
    },
    {
      "epoch": 0.000167626953125,
      "step": 27464,
      "training_step_time": 0.40491199493408203
    },
    {
      "epoch": 0.000167633056640625,
      "model_forward_time": 0.11502504348754883,
      "step": 27465
    },
    {
      "epoch": 0.000167633056640625,
      "step": 27465,
      "training_step_time": 0.5816261768341064
    },
    {
      "epoch": 0.00016763916015625,
      "model_forward_time": 0.11519885063171387,
      "step": 27466
    },
    {
      "epoch": 0.00016763916015625,
      "step": 27466,
      "training_step_time": 0.4403562545776367
    },
    {
      "epoch": 0.000167645263671875,
      "model_forward_time": 0.11496114730834961,
      "step": 27467
    },
    {
      "epoch": 0.000167645263671875,
      "step": 27467,
      "training_step_time": 0.39205336570739746
    },
    {
      "epoch": 0.0001676513671875,
      "model_forward_time": 0.1145632266998291,
      "step": 27468
    },
    {
      "epoch": 0.0001676513671875,
      "step": 27468,
      "training_step_time": 0.3935434818267822
    },
    {
      "epoch": 0.000167657470703125,
      "model_forward_time": 0.11459088325500488,
      "step": 27469
    },
    {
      "epoch": 0.000167657470703125,
      "step": 27469,
      "training_step_time": 0.3970780372619629
    },
    {
      "epoch": 0.00016766357421875,
      "grad_norm": 0.17775973677635193,
      "learning_rate": 6.101471207025945e-05,
      "loss": 0.0487,
      "step": 27470
    },
    {
      "epoch": 0.00016766357421875,
      "model_forward_time": 0.11504364013671875,
      "step": 27470
    },
    {
      "epoch": 0.00016766357421875,
      "step": 27470,
      "training_step_time": 0.3910655975341797
    },
    {
      "epoch": 0.000167669677734375,
      "model_forward_time": 0.11550712585449219,
      "step": 27471
    },
    {
      "epoch": 0.000167669677734375,
      "step": 27471,
      "training_step_time": 0.4987342357635498
    },
    {
      "epoch": 0.00016767578125,
      "model_forward_time": 0.11493515968322754,
      "step": 27472
    },
    {
      "epoch": 0.00016767578125,
      "step": 27472,
      "training_step_time": 0.4030177593231201
    },
    {
      "epoch": 0.000167681884765625,
      "model_forward_time": 0.1149296760559082,
      "step": 27473
    },
    {
      "epoch": 0.000167681884765625,
      "step": 27473,
      "training_step_time": 0.3994607925415039
    },
    {
      "epoch": 0.00016768798828125,
      "model_forward_time": 0.11571288108825684,
      "step": 27474
    },
    {
      "epoch": 0.00016768798828125,
      "step": 27474,
      "training_step_time": 0.4002223014831543
    },
    {
      "epoch": 0.000167694091796875,
      "model_forward_time": 0.11498570442199707,
      "step": 27475
    },
    {
      "epoch": 0.000167694091796875,
      "step": 27475,
      "training_step_time": 0.39722323417663574
    },
    {
      "epoch": 0.0001677001953125,
      "model_forward_time": 0.11489272117614746,
      "step": 27476
    },
    {
      "epoch": 0.0001677001953125,
      "step": 27476,
      "training_step_time": 0.39926958084106445
    },
    {
      "epoch": 0.000167706298828125,
      "model_forward_time": 0.11514639854431152,
      "step": 27477
    },
    {
      "epoch": 0.000167706298828125,
      "step": 27477,
      "training_step_time": 0.5805182456970215
    },
    {
      "epoch": 0.00016771240234375,
      "model_forward_time": 0.11559200286865234,
      "step": 27478
    },
    {
      "epoch": 0.00016771240234375,
      "step": 27478,
      "training_step_time": 0.36664319038391113
    },
    {
      "epoch": 0.000167718505859375,
      "model_forward_time": 0.114715576171875,
      "step": 27479
    },
    {
      "epoch": 0.000167718505859375,
      "step": 27479,
      "training_step_time": 0.4933204650878906
    },
    {
      "epoch": 0.000167724609375,
      "grad_norm": 0.11886745691299438,
      "learning_rate": 6.0987829568702656e-05,
      "loss": 0.0442,
      "step": 27480
    },
    {
      "epoch": 0.000167724609375,
      "model_forward_time": 0.11486411094665527,
      "step": 27480
    },
    {
      "epoch": 0.000167724609375,
      "step": 27480,
      "training_step_time": 0.48468852043151855
    },
    {
      "epoch": 0.000167730712890625,
      "model_forward_time": 0.11421895027160645,
      "step": 27481
    },
    {
      "epoch": 0.000167730712890625,
      "step": 27481,
      "training_step_time": 0.41578173637390137
    },
    {
      "epoch": 0.00016773681640625,
      "model_forward_time": 0.11405038833618164,
      "step": 27482
    },
    {
      "epoch": 0.00016773681640625,
      "step": 27482,
      "training_step_time": 0.3893911838531494
    },
    {
      "epoch": 0.000167742919921875,
      "model_forward_time": 0.11474180221557617,
      "step": 27483
    },
    {
      "epoch": 0.000167742919921875,
      "step": 27483,
      "training_step_time": 0.42510294914245605
    },
    {
      "epoch": 0.0001677490234375,
      "model_forward_time": 0.11430573463439941,
      "step": 27484
    },
    {
      "epoch": 0.0001677490234375,
      "step": 27484,
      "training_step_time": 0.39077258110046387
    },
    {
      "epoch": 0.000167755126953125,
      "model_forward_time": 0.11533522605895996,
      "step": 27485
    },
    {
      "epoch": 0.000167755126953125,
      "step": 27485,
      "training_step_time": 0.38178586959838867
    },
    {
      "epoch": 0.00016776123046875,
      "model_forward_time": 0.11510086059570312,
      "step": 27486
    },
    {
      "epoch": 0.00016776123046875,
      "step": 27486,
      "training_step_time": 0.39749765396118164
    },
    {
      "epoch": 0.000167767333984375,
      "model_forward_time": 0.11540746688842773,
      "step": 27487
    },
    {
      "epoch": 0.000167767333984375,
      "step": 27487,
      "training_step_time": 0.4039485454559326
    },
    {
      "epoch": 0.0001677734375,
      "model_forward_time": 0.11487388610839844,
      "step": 27488
    },
    {
      "epoch": 0.0001677734375,
      "step": 27488,
      "training_step_time": 0.38872671127319336
    },
    {
      "epoch": 0.000167779541015625,
      "model_forward_time": 0.11547350883483887,
      "step": 27489
    },
    {
      "epoch": 0.000167779541015625,
      "step": 27489,
      "training_step_time": 0.7010366916656494
    },
    {
      "epoch": 0.00016778564453125,
      "grad_norm": 0.16872477531433105,
      "learning_rate": 6.0960943729333374e-05,
      "loss": 0.05,
      "step": 27490
    },
    {
      "epoch": 0.00016778564453125,
      "model_forward_time": 0.11476826667785645,
      "step": 27490
    },
    {
      "epoch": 0.00016778564453125,
      "step": 27490,
      "training_step_time": 0.3957219123840332
    },
    {
      "epoch": 0.000167791748046875,
      "model_forward_time": 0.11446666717529297,
      "step": 27491
    },
    {
      "epoch": 0.000167791748046875,
      "step": 27491,
      "training_step_time": 0.4853982925415039
    },
    {
      "epoch": 0.0001677978515625,
      "model_forward_time": 0.11622118949890137,
      "step": 27492
    },
    {
      "epoch": 0.0001677978515625,
      "step": 27492,
      "training_step_time": 0.40819787979125977
    },
    {
      "epoch": 0.000167803955078125,
      "model_forward_time": 0.11404800415039062,
      "step": 27493
    },
    {
      "epoch": 0.000167803955078125,
      "step": 27493,
      "training_step_time": 0.47192883491516113
    },
    {
      "epoch": 0.00016781005859375,
      "model_forward_time": 0.11432528495788574,
      "step": 27494
    },
    {
      "epoch": 0.00016781005859375,
      "step": 27494,
      "training_step_time": 0.4967777729034424
    },
    {
      "epoch": 0.000167816162109375,
      "model_forward_time": 0.11415219306945801,
      "step": 27495
    },
    {
      "epoch": 0.000167816162109375,
      "step": 27495,
      "training_step_time": 0.399646520614624
    },
    {
      "epoch": 0.000167822265625,
      "model_forward_time": 0.1142725944519043,
      "step": 27496
    },
    {
      "epoch": 0.000167822265625,
      "step": 27496,
      "training_step_time": 0.38665080070495605
    },
    {
      "epoch": 0.000167828369140625,
      "model_forward_time": 0.11493802070617676,
      "step": 27497
    },
    {
      "epoch": 0.000167828369140625,
      "step": 27497,
      "training_step_time": 0.395099401473999
    },
    {
      "epoch": 0.00016783447265625,
      "model_forward_time": 0.11504268646240234,
      "step": 27498
    },
    {
      "epoch": 0.00016783447265625,
      "step": 27498,
      "training_step_time": 0.3960702419281006
    },
    {
      "epoch": 0.000167840576171875,
      "model_forward_time": 0.11558723449707031,
      "step": 27499
    },
    {
      "epoch": 0.000167840576171875,
      "step": 27499,
      "training_step_time": 0.39852285385131836
    },
    {
      "epoch": 0.0001678466796875,
      "grad_norm": 0.1302187144756317,
      "learning_rate": 6.09340545603188e-05,
      "loss": 0.0515,
      "step": 27500
    },
    {
      "epoch": 0.0001678466796875,
      "model_forward_time": 0.11516571044921875,
      "step": 27500
    },
    {
      "epoch": 0.0001678466796875,
      "step": 27500,
      "training_step_time": 0.38536763191223145
    },
    {
      "epoch": 0.000167852783203125,
      "model_forward_time": 0.1157987117767334,
      "step": 27501
    },
    {
      "epoch": 0.000167852783203125,
      "step": 27501,
      "training_step_time": 0.5581545829772949
    },
    {
      "epoch": 0.00016785888671875,
      "model_forward_time": 0.1150813102722168,
      "step": 27502
    },
    {
      "epoch": 0.00016785888671875,
      "step": 27502,
      "training_step_time": 0.43750548362731934
    },
    {
      "epoch": 0.000167864990234375,
      "model_forward_time": 0.11481189727783203,
      "step": 27503
    },
    {
      "epoch": 0.000167864990234375,
      "step": 27503,
      "training_step_time": 0.4036540985107422
    },
    {
      "epoch": 0.00016787109375,
      "model_forward_time": 0.11477994918823242,
      "step": 27504
    },
    {
      "epoch": 0.00016787109375,
      "step": 27504,
      "training_step_time": 0.39815354347229004
    },
    {
      "epoch": 0.000167877197265625,
      "model_forward_time": 0.1151285171508789,
      "step": 27505
    },
    {
      "epoch": 0.000167877197265625,
      "step": 27505,
      "training_step_time": 0.4114537239074707
    },
    {
      "epoch": 0.00016788330078125,
      "model_forward_time": 0.1151590347290039,
      "step": 27506
    },
    {
      "epoch": 0.00016788330078125,
      "step": 27506,
      "training_step_time": 0.4260387420654297
    },
    {
      "epoch": 0.000167889404296875,
      "model_forward_time": 0.11495494842529297,
      "step": 27507
    },
    {
      "epoch": 0.000167889404296875,
      "step": 27507,
      "training_step_time": 0.6365408897399902
    },
    {
      "epoch": 0.0001678955078125,
      "model_forward_time": 0.11547994613647461,
      "step": 27508
    },
    {
      "epoch": 0.0001678955078125,
      "step": 27508,
      "training_step_time": 0.4559178352355957
    },
    {
      "epoch": 0.000167901611328125,
      "model_forward_time": 0.11540031433105469,
      "step": 27509
    },
    {
      "epoch": 0.000167901611328125,
      "step": 27509,
      "training_step_time": 0.38361144065856934
    },
    {
      "epoch": 0.00016790771484375,
      "grad_norm": 0.09944168478250504,
      "learning_rate": 6.090716206982714e-05,
      "loss": 0.0473,
      "step": 27510
    },
    {
      "epoch": 0.00016790771484375,
      "model_forward_time": 0.1144719123840332,
      "step": 27510
    },
    {
      "epoch": 0.00016790771484375,
      "step": 27510,
      "training_step_time": 0.3894028663635254
    },
    {
      "epoch": 0.000167913818359375,
      "model_forward_time": 0.11490654945373535,
      "step": 27511
    },
    {
      "epoch": 0.000167913818359375,
      "step": 27511,
      "training_step_time": 0.38822031021118164
    },
    {
      "epoch": 0.000167919921875,
      "model_forward_time": 0.11488080024719238,
      "step": 27512
    },
    {
      "epoch": 0.000167919921875,
      "step": 27512,
      "training_step_time": 0.3858492374420166
    },
    {
      "epoch": 0.000167926025390625,
      "model_forward_time": 0.1145319938659668,
      "step": 27513
    },
    {
      "epoch": 0.000167926025390625,
      "step": 27513,
      "training_step_time": 0.6922121047973633
    },
    {
      "epoch": 0.00016793212890625,
      "model_forward_time": 0.11489367485046387,
      "step": 27514
    },
    {
      "epoch": 0.00016793212890625,
      "step": 27514,
      "training_step_time": 0.3961677551269531
    },
    {
      "epoch": 0.000167938232421875,
      "model_forward_time": 0.1152806282043457,
      "step": 27515
    },
    {
      "epoch": 0.000167938232421875,
      "step": 27515,
      "training_step_time": 0.3851473331451416
    },
    {
      "epoch": 0.0001679443359375,
      "model_forward_time": 0.11497211456298828,
      "step": 27516
    },
    {
      "epoch": 0.0001679443359375,
      "step": 27516,
      "training_step_time": 0.4257204532623291
    },
    {
      "epoch": 0.000167950439453125,
      "model_forward_time": 0.11466646194458008,
      "step": 27517
    },
    {
      "epoch": 0.000167950439453125,
      "step": 27517,
      "training_step_time": 0.3955380916595459
    },
    {
      "epoch": 0.00016795654296875,
      "model_forward_time": 0.11480975151062012,
      "step": 27518
    },
    {
      "epoch": 0.00016795654296875,
      "step": 27518,
      "training_step_time": 0.3870508670806885
    },
    {
      "epoch": 0.000167962646484375,
      "model_forward_time": 0.1139521598815918,
      "step": 27519
    },
    {
      "epoch": 0.000167962646484375,
      "step": 27519,
      "training_step_time": 0.5116851329803467
    },
    {
      "epoch": 0.00016796875,
      "grad_norm": 0.17199701070785522,
      "learning_rate": 6.088026626602763e-05,
      "loss": 0.0487,
      "step": 27520
    },
    {
      "epoch": 0.00016796875,
      "model_forward_time": 0.11454558372497559,
      "step": 27520
    },
    {
      "epoch": 0.00016796875,
      "step": 27520,
      "training_step_time": 0.42787599563598633
    },
    {
      "epoch": 0.000167974853515625,
      "model_forward_time": 0.11498117446899414,
      "step": 27521
    },
    {
      "epoch": 0.000167974853515625,
      "step": 27521,
      "training_step_time": 0.47278380393981934
    },
    {
      "epoch": 0.00016798095703125,
      "model_forward_time": 0.11535096168518066,
      "step": 27522
    },
    {
      "epoch": 0.00016798095703125,
      "step": 27522,
      "training_step_time": 0.45318603515625
    },
    {
      "epoch": 0.000167987060546875,
      "model_forward_time": 0.11538481712341309,
      "step": 27523
    },
    {
      "epoch": 0.000167987060546875,
      "step": 27523,
      "training_step_time": 0.4023606777191162
    },
    {
      "epoch": 0.0001679931640625,
      "model_forward_time": 0.11501002311706543,
      "step": 27524
    },
    {
      "epoch": 0.0001679931640625,
      "step": 27524,
      "training_step_time": 0.38973116874694824
    },
    {
      "epoch": 0.000167999267578125,
      "model_forward_time": 0.1155245304107666,
      "step": 27525
    },
    {
      "epoch": 0.000167999267578125,
      "step": 27525,
      "training_step_time": 0.38516688346862793
    },
    {
      "epoch": 0.00016800537109375,
      "model_forward_time": 0.11501526832580566,
      "step": 27526
    },
    {
      "epoch": 0.00016800537109375,
      "step": 27526,
      "training_step_time": 0.3942389488220215
    },
    {
      "epoch": 0.000168011474609375,
      "model_forward_time": 0.11539244651794434,
      "step": 27527
    },
    {
      "epoch": 0.000168011474609375,
      "step": 27527,
      "training_step_time": 0.399198055267334
    },
    {
      "epoch": 0.000168017578125,
      "model_forward_time": 0.11564993858337402,
      "step": 27528
    },
    {
      "epoch": 0.000168017578125,
      "step": 27528,
      "training_step_time": 0.4007594585418701
    },
    {
      "epoch": 0.000168023681640625,
      "model_forward_time": 0.11563563346862793,
      "step": 27529
    },
    {
      "epoch": 0.000168023681640625,
      "step": 27529,
      "training_step_time": 0.41330838203430176
    },
    {
      "epoch": 0.00016802978515625,
      "grad_norm": 0.16885514557361603,
      "learning_rate": 6.085336715709049e-05,
      "loss": 0.0418,
      "step": 27530
    },
    {
      "epoch": 0.00016802978515625,
      "model_forward_time": 0.11532092094421387,
      "step": 27530
    },
    {
      "epoch": 0.00016802978515625,
      "step": 27530,
      "training_step_time": 0.4048619270324707
    },
    {
      "epoch": 0.000168035888671875,
      "model_forward_time": 0.11522746086120605,
      "step": 27531
    },
    {
      "epoch": 0.000168035888671875,
      "step": 27531,
      "training_step_time": 0.5699374675750732
    },
    {
      "epoch": 0.0001680419921875,
      "model_forward_time": 0.11451935768127441,
      "step": 27532
    },
    {
      "epoch": 0.0001680419921875,
      "step": 27532,
      "training_step_time": 0.3860507011413574
    },
    {
      "epoch": 0.000168048095703125,
      "model_forward_time": 0.11497950553894043,
      "step": 27533
    },
    {
      "epoch": 0.000168048095703125,
      "step": 27533,
      "training_step_time": 0.38985157012939453
    },
    {
      "epoch": 0.00016805419921875,
      "model_forward_time": 0.11693358421325684,
      "step": 27534
    },
    {
      "epoch": 0.00016805419921875,
      "step": 27534,
      "training_step_time": 0.40973377227783203
    },
    {
      "epoch": 0.000168060302734375,
      "model_forward_time": 0.11459565162658691,
      "step": 27535
    },
    {
      "epoch": 0.000168060302734375,
      "step": 27535,
      "training_step_time": 0.3624258041381836
    },
    {
      "epoch": 0.00016806640625,
      "model_forward_time": 0.11506104469299316,
      "step": 27536
    },
    {
      "epoch": 0.00016806640625,
      "step": 27536,
      "training_step_time": 0.4488201141357422
    },
    {
      "epoch": 0.000168072509765625,
      "model_forward_time": 0.11477780342102051,
      "step": 27537
    },
    {
      "epoch": 0.000168072509765625,
      "step": 27537,
      "training_step_time": 0.5606667995452881
    },
    {
      "epoch": 0.00016807861328125,
      "model_forward_time": 0.11492204666137695,
      "step": 27538
    },
    {
      "epoch": 0.00016807861328125,
      "step": 27538,
      "training_step_time": 0.38558125495910645
    },
    {
      "epoch": 0.000168084716796875,
      "model_forward_time": 0.11458110809326172,
      "step": 27539
    },
    {
      "epoch": 0.000168084716796875,
      "step": 27539,
      "training_step_time": 0.399829626083374
    },
    {
      "epoch": 0.0001680908203125,
      "grad_norm": 0.12010688334703445,
      "learning_rate": 6.0826464751186994e-05,
      "loss": 0.0469,
      "step": 27540
    },
    {
      "epoch": 0.0001680908203125,
      "model_forward_time": 0.11580157279968262,
      "step": 27540
    },
    {
      "epoch": 0.0001680908203125,
      "step": 27540,
      "training_step_time": 0.4043581485748291
    },
    {
      "epoch": 0.000168096923828125,
      "model_forward_time": 0.11503243446350098,
      "step": 27541
    },
    {
      "epoch": 0.000168096923828125,
      "step": 27541,
      "training_step_time": 0.3916785717010498
    },
    {
      "epoch": 0.00016810302734375,
      "model_forward_time": 0.1151583194732666,
      "step": 27542
    },
    {
      "epoch": 0.00016810302734375,
      "step": 27542,
      "training_step_time": 0.40426111221313477
    },
    {
      "epoch": 0.000168109130859375,
      "model_forward_time": 0.11461853981018066,
      "step": 27543
    },
    {
      "epoch": 0.000168109130859375,
      "step": 27543,
      "training_step_time": 0.6577117443084717
    },
    {
      "epoch": 0.000168115234375,
      "model_forward_time": 0.11541557312011719,
      "step": 27544
    },
    {
      "epoch": 0.000168115234375,
      "step": 27544,
      "training_step_time": 0.39292430877685547
    },
    {
      "epoch": 0.000168121337890625,
      "model_forward_time": 0.11551070213317871,
      "step": 27545
    },
    {
      "epoch": 0.000168121337890625,
      "step": 27545,
      "training_step_time": 0.3911399841308594
    },
    {
      "epoch": 0.00016812744140625,
      "model_forward_time": 0.11477899551391602,
      "step": 27546
    },
    {
      "epoch": 0.00016812744140625,
      "step": 27546,
      "training_step_time": 0.4038660526275635
    },
    {
      "epoch": 0.000168133544921875,
      "model_forward_time": 0.11502432823181152,
      "step": 27547
    },
    {
      "epoch": 0.000168133544921875,
      "step": 27547,
      "training_step_time": 0.41360950469970703
    },
    {
      "epoch": 0.0001681396484375,
      "model_forward_time": 0.11486196517944336,
      "step": 27548
    },
    {
      "epoch": 0.0001681396484375,
      "step": 27548,
      "training_step_time": 0.4754924774169922
    },
    {
      "epoch": 0.000168145751953125,
      "model_forward_time": 0.11450791358947754,
      "step": 27549
    },
    {
      "epoch": 0.000168145751953125,
      "step": 27549,
      "training_step_time": 0.37317323684692383
    },
    {
      "epoch": 0.00016815185546875,
      "grad_norm": 0.18225409090518951,
      "learning_rate": 6.079955905648934e-05,
      "loss": 0.0442,
      "step": 27550
    },
    {
      "epoch": 0.00016815185546875,
      "model_forward_time": 0.11524653434753418,
      "step": 27550
    },
    {
      "epoch": 0.00016815185546875,
      "step": 27550,
      "training_step_time": 0.455366849899292
    },
    {
      "epoch": 0.000168157958984375,
      "model_forward_time": 0.1149289608001709,
      "step": 27551
    },
    {
      "epoch": 0.000168157958984375,
      "step": 27551,
      "training_step_time": 0.4007301330566406
    },
    {
      "epoch": 0.0001681640625,
      "model_forward_time": 0.11488914489746094,
      "step": 27552
    },
    {
      "epoch": 0.0001681640625,
      "step": 27552,
      "training_step_time": 0.39640212059020996
    },
    {
      "epoch": 0.000168170166015625,
      "model_forward_time": 0.11439371109008789,
      "step": 27553
    },
    {
      "epoch": 0.000168170166015625,
      "step": 27553,
      "training_step_time": 0.3930997848510742
    },
    {
      "epoch": 0.00016817626953125,
      "model_forward_time": 0.11539793014526367,
      "step": 27554
    },
    {
      "epoch": 0.00016817626953125,
      "step": 27554,
      "training_step_time": 0.3828413486480713
    },
    {
      "epoch": 0.000168182373046875,
      "model_forward_time": 0.11474919319152832,
      "step": 27555
    },
    {
      "epoch": 0.000168182373046875,
      "step": 27555,
      "training_step_time": 0.5971183776855469
    },
    {
      "epoch": 0.0001681884765625,
      "model_forward_time": 0.11506462097167969,
      "step": 27556
    },
    {
      "epoch": 0.0001681884765625,
      "step": 27556,
      "training_step_time": 0.4068126678466797
    },
    {
      "epoch": 0.000168194580078125,
      "model_forward_time": 0.11425423622131348,
      "step": 27557
    },
    {
      "epoch": 0.000168194580078125,
      "step": 27557,
      "training_step_time": 0.3890247344970703
    },
    {
      "epoch": 0.00016820068359375,
      "model_forward_time": 0.11545658111572266,
      "step": 27558
    },
    {
      "epoch": 0.00016820068359375,
      "step": 27558,
      "training_step_time": 0.3806638717651367
    },
    {
      "epoch": 0.000168206787109375,
      "model_forward_time": 0.11510753631591797,
      "step": 27559
    },
    {
      "epoch": 0.000168206787109375,
      "step": 27559,
      "training_step_time": 0.39199090003967285
    },
    {
      "epoch": 0.000168212890625,
      "grad_norm": 0.16302542388439178,
      "learning_rate": 6.077265008117081e-05,
      "loss": 0.0469,
      "step": 27560
    },
    {
      "epoch": 0.000168212890625,
      "model_forward_time": 0.11562728881835938,
      "step": 27560
    },
    {
      "epoch": 0.000168212890625,
      "step": 27560,
      "training_step_time": 0.3949143886566162
    },
    {
      "epoch": 0.000168218994140625,
      "model_forward_time": 0.1151268482208252,
      "step": 27561
    },
    {
      "epoch": 0.000168218994140625,
      "step": 27561,
      "training_step_time": 0.7229132652282715
    },
    {
      "epoch": 0.00016822509765625,
      "model_forward_time": 0.11464071273803711,
      "step": 27562
    },
    {
      "epoch": 0.00016822509765625,
      "step": 27562,
      "training_step_time": 0.43146848678588867
    },
    {
      "epoch": 0.000168231201171875,
      "model_forward_time": 0.1152796745300293,
      "step": 27563
    },
    {
      "epoch": 0.000168231201171875,
      "step": 27563,
      "training_step_time": 0.4067683219909668
    },
    {
      "epoch": 0.0001682373046875,
      "model_forward_time": 0.11452293395996094,
      "step": 27564
    },
    {
      "epoch": 0.0001682373046875,
      "step": 27564,
      "training_step_time": 0.48705220222473145
    },
    {
      "epoch": 0.000168243408203125,
      "model_forward_time": 0.1149604320526123,
      "step": 27565
    },
    {
      "epoch": 0.000168243408203125,
      "step": 27565,
      "training_step_time": 0.4213278293609619
    },
    {
      "epoch": 0.00016824951171875,
      "model_forward_time": 0.11477470397949219,
      "step": 27566
    },
    {
      "epoch": 0.00016824951171875,
      "step": 27566,
      "training_step_time": 0.3750956058502197
    },
    {
      "epoch": 0.000168255615234375,
      "model_forward_time": 0.1147756576538086,
      "step": 27567
    },
    {
      "epoch": 0.000168255615234375,
      "step": 27567,
      "training_step_time": 0.5146083831787109
    },
    {
      "epoch": 0.00016826171875,
      "model_forward_time": 0.11486506462097168,
      "step": 27568
    },
    {
      "epoch": 0.00016826171875,
      "step": 27568,
      "training_step_time": 0.46307921409606934
    },
    {
      "epoch": 0.000168267822265625,
      "model_forward_time": 0.11496639251708984,
      "step": 27569
    },
    {
      "epoch": 0.000168267822265625,
      "step": 27569,
      "training_step_time": 0.39829015731811523
    },
    {
      "epoch": 0.00016827392578125,
      "grad_norm": 0.12220222502946854,
      "learning_rate": 6.074573783340562e-05,
      "loss": 0.0455,
      "step": 27570
    },
    {
      "epoch": 0.00016827392578125,
      "model_forward_time": 0.11493659019470215,
      "step": 27570
    },
    {
      "epoch": 0.00016827392578125,
      "step": 27570,
      "training_step_time": 0.39467763900756836
    },
    {
      "epoch": 0.000168280029296875,
      "model_forward_time": 0.1147603988647461,
      "step": 27571
    },
    {
      "epoch": 0.000168280029296875,
      "step": 27571,
      "training_step_time": 0.38715648651123047
    },
    {
      "epoch": 0.0001682861328125,
      "model_forward_time": 0.11478543281555176,
      "step": 27572
    },
    {
      "epoch": 0.0001682861328125,
      "step": 27572,
      "training_step_time": 0.38042330741882324
    },
    {
      "epoch": 0.000168292236328125,
      "model_forward_time": 0.11403393745422363,
      "step": 27573
    },
    {
      "epoch": 0.000168292236328125,
      "step": 27573,
      "training_step_time": 0.7085633277893066
    },
    {
      "epoch": 0.00016829833984375,
      "model_forward_time": 0.1155693531036377,
      "step": 27574
    },
    {
      "epoch": 0.00016829833984375,
      "step": 27574,
      "training_step_time": 0.3920750617980957
    },
    {
      "epoch": 0.000168304443359375,
      "model_forward_time": 0.1144096851348877,
      "step": 27575
    },
    {
      "epoch": 0.000168304443359375,
      "step": 27575,
      "training_step_time": 0.39364123344421387
    },
    {
      "epoch": 0.000168310546875,
      "model_forward_time": 0.1145927906036377,
      "step": 27576
    },
    {
      "epoch": 0.000168310546875,
      "step": 27576,
      "training_step_time": 0.48637890815734863
    },
    {
      "epoch": 0.000168316650390625,
      "model_forward_time": 0.11622190475463867,
      "step": 27577
    },
    {
      "epoch": 0.000168316650390625,
      "step": 27577,
      "training_step_time": 0.4782395362854004
    },
    {
      "epoch": 0.00016832275390625,
      "model_forward_time": 0.11779594421386719,
      "step": 27578
    },
    {
      "epoch": 0.00016832275390625,
      "step": 27578,
      "training_step_time": 0.47303032875061035
    },
    {
      "epoch": 0.000168328857421875,
      "model_forward_time": 0.11527585983276367,
      "step": 27579
    },
    {
      "epoch": 0.000168328857421875,
      "step": 27579,
      "training_step_time": 0.401594877243042
    },
    {
      "epoch": 0.0001683349609375,
      "grad_norm": 0.15527497231960297,
      "learning_rate": 6.071882232136901e-05,
      "loss": 0.0436,
      "step": 27580
    },
    {
      "epoch": 0.0001683349609375,
      "model_forward_time": 0.11454486846923828,
      "step": 27580
    },
    {
      "epoch": 0.0001683349609375,
      "step": 27580,
      "training_step_time": 0.3848457336425781
    },
    {
      "epoch": 0.000168341064453125,
      "model_forward_time": 0.11474490165710449,
      "step": 27581
    },
    {
      "epoch": 0.000168341064453125,
      "step": 27581,
      "training_step_time": 0.42653322219848633
    },
    {
      "epoch": 0.00016834716796875,
      "model_forward_time": 0.11512398719787598,
      "step": 27582
    },
    {
      "epoch": 0.00016834716796875,
      "step": 27582,
      "training_step_time": 0.39366626739501953
    },
    {
      "epoch": 0.000168353271484375,
      "model_forward_time": 0.11558985710144043,
      "step": 27583
    },
    {
      "epoch": 0.000168353271484375,
      "step": 27583,
      "training_step_time": 0.3913288116455078
    },
    {
      "epoch": 0.000168359375,
      "model_forward_time": 0.11484789848327637,
      "step": 27584
    },
    {
      "epoch": 0.000168359375,
      "step": 27584,
      "training_step_time": 0.3942759037017822
    },
    {
      "epoch": 0.000168365478515625,
      "model_forward_time": 0.11547994613647461,
      "step": 27585
    },
    {
      "epoch": 0.000168365478515625,
      "step": 27585,
      "training_step_time": 0.6288864612579346
    },
    {
      "epoch": 0.00016837158203125,
      "model_forward_time": 0.11469483375549316,
      "step": 27586
    },
    {
      "epoch": 0.00016837158203125,
      "step": 27586,
      "training_step_time": 0.3891019821166992
    },
    {
      "epoch": 0.000168377685546875,
      "model_forward_time": 0.11576533317565918,
      "step": 27587
    },
    {
      "epoch": 0.000168377685546875,
      "step": 27587,
      "training_step_time": 0.39065051078796387
    },
    {
      "epoch": 0.0001683837890625,
      "model_forward_time": 0.11451840400695801,
      "step": 27588
    },
    {
      "epoch": 0.0001683837890625,
      "step": 27588,
      "training_step_time": 0.382418155670166
    },
    {
      "epoch": 0.000168389892578125,
      "model_forward_time": 0.11460685729980469,
      "step": 27589
    },
    {
      "epoch": 0.000168389892578125,
      "step": 27589,
      "training_step_time": 0.3909270763397217
    },
    {
      "epoch": 0.00016839599609375,
      "grad_norm": 0.12250035256147385,
      "learning_rate": 6.069190355323717e-05,
      "loss": 0.0419,
      "step": 27590
    },
    {
      "epoch": 0.00016839599609375,
      "model_forward_time": 0.11519861221313477,
      "step": 27590
    },
    {
      "epoch": 0.00016839599609375,
      "step": 27590,
      "training_step_time": 0.46832871437072754
    },
    {
      "epoch": 0.000168402099609375,
      "model_forward_time": 0.11488151550292969,
      "step": 27591
    },
    {
      "epoch": 0.000168402099609375,
      "step": 27591,
      "training_step_time": 0.555401086807251
    },
    {
      "epoch": 0.000168408203125,
      "model_forward_time": 0.11500191688537598,
      "step": 27592
    },
    {
      "epoch": 0.000168408203125,
      "step": 27592,
      "training_step_time": 0.47655415534973145
    },
    {
      "epoch": 0.000168414306640625,
      "model_forward_time": 0.11423206329345703,
      "step": 27593
    },
    {
      "epoch": 0.000168414306640625,
      "step": 27593,
      "training_step_time": 0.4094967842102051
    },
    {
      "epoch": 0.00016842041015625,
      "model_forward_time": 0.11501431465148926,
      "step": 27594
    },
    {
      "epoch": 0.00016842041015625,
      "step": 27594,
      "training_step_time": 0.40877699851989746
    },
    {
      "epoch": 0.000168426513671875,
      "model_forward_time": 0.1151726245880127,
      "step": 27595
    },
    {
      "epoch": 0.000168426513671875,
      "step": 27595,
      "training_step_time": 0.40866661071777344
    },
    {
      "epoch": 0.0001684326171875,
      "model_forward_time": 0.11456751823425293,
      "step": 27596
    },
    {
      "epoch": 0.0001684326171875,
      "step": 27596,
      "training_step_time": 0.3951287269592285
    },
    {
      "epoch": 0.000168438720703125,
      "model_forward_time": 0.11467218399047852,
      "step": 27597
    },
    {
      "epoch": 0.000168438720703125,
      "step": 27597,
      "training_step_time": 0.40650200843811035
    },
    {
      "epoch": 0.00016844482421875,
      "model_forward_time": 0.11529326438903809,
      "step": 27598
    },
    {
      "epoch": 0.00016844482421875,
      "step": 27598,
      "training_step_time": 0.3883371353149414
    },
    {
      "epoch": 0.000168450927734375,
      "model_forward_time": 0.1153876781463623,
      "step": 27599
    },
    {
      "epoch": 0.000168450927734375,
      "step": 27599,
      "training_step_time": 0.3987541198730469
    },
    {
      "epoch": 0.00016845703125,
      "grad_norm": 0.11323562264442444,
      "learning_rate": 6.066498153718735e-05,
      "loss": 0.0492,
      "step": 27600
    },
    {
      "epoch": 0.00016845703125,
      "model_forward_time": 0.11503887176513672,
      "step": 27600
    },
    {
      "epoch": 0.00016845703125,
      "step": 27600,
      "training_step_time": 0.3904907703399658
    },
    {
      "epoch": 0.000168463134765625,
      "model_forward_time": 0.1150367259979248,
      "step": 27601
    },
    {
      "epoch": 0.000168463134765625,
      "step": 27601,
      "training_step_time": 0.3956265449523926
    },
    {
      "epoch": 0.00016846923828125,
      "model_forward_time": 0.11539435386657715,
      "step": 27602
    },
    {
      "epoch": 0.00016846923828125,
      "step": 27602,
      "training_step_time": 0.3758208751678467
    },
    {
      "epoch": 0.000168475341796875,
      "model_forward_time": 0.11537289619445801,
      "step": 27603
    },
    {
      "epoch": 0.000168475341796875,
      "step": 27603,
      "training_step_time": 0.5906555652618408
    },
    {
      "epoch": 0.0001684814453125,
      "model_forward_time": 0.11483597755432129,
      "step": 27604
    },
    {
      "epoch": 0.0001684814453125,
      "step": 27604,
      "training_step_time": 0.4832296371459961
    },
    {
      "epoch": 0.000168487548828125,
      "model_forward_time": 0.11483216285705566,
      "step": 27605
    },
    {
      "epoch": 0.000168487548828125,
      "step": 27605,
      "training_step_time": 0.3677997589111328
    },
    {
      "epoch": 0.00016849365234375,
      "model_forward_time": 0.11499261856079102,
      "step": 27606
    },
    {
      "epoch": 0.00016849365234375,
      "step": 27606,
      "training_step_time": 0.49477171897888184
    },
    {
      "epoch": 0.000168499755859375,
      "model_forward_time": 0.11436271667480469,
      "step": 27607
    },
    {
      "epoch": 0.000168499755859375,
      "step": 27607,
      "training_step_time": 0.4309051036834717
    },
    {
      "epoch": 0.000168505859375,
      "model_forward_time": 0.11436080932617188,
      "step": 27608
    },
    {
      "epoch": 0.000168505859375,
      "step": 27608,
      "training_step_time": 0.3902914524078369
    },
    {
      "epoch": 0.000168511962890625,
      "model_forward_time": 0.1144258975982666,
      "step": 27609
    },
    {
      "epoch": 0.000168511962890625,
      "step": 27609,
      "training_step_time": 0.38269877433776855
    },
    {
      "epoch": 0.00016851806640625,
      "grad_norm": 0.17815929651260376,
      "learning_rate": 6.0638056281397726e-05,
      "loss": 0.0464,
      "step": 27610
    },
    {
      "epoch": 0.00016851806640625,
      "model_forward_time": 0.11530232429504395,
      "step": 27610
    },
    {
      "epoch": 0.00016851806640625,
      "step": 27610,
      "training_step_time": 0.3921971321105957
    },
    {
      "epoch": 0.000168524169921875,
      "model_forward_time": 0.11525392532348633,
      "step": 27611
    },
    {
      "epoch": 0.000168524169921875,
      "step": 27611,
      "training_step_time": 0.38945627212524414
    },
    {
      "epoch": 0.0001685302734375,
      "model_forward_time": 0.11564159393310547,
      "step": 27612
    },
    {
      "epoch": 0.0001685302734375,
      "step": 27612,
      "training_step_time": 0.3950371742248535
    },
    {
      "epoch": 0.000168536376953125,
      "model_forward_time": 0.11566162109375,
      "step": 27613
    },
    {
      "epoch": 0.000168536376953125,
      "step": 27613,
      "training_step_time": 0.394456148147583
    },
    {
      "epoch": 0.00016854248046875,
      "model_forward_time": 0.11551189422607422,
      "step": 27614
    },
    {
      "epoch": 0.00016854248046875,
      "step": 27614,
      "training_step_time": 0.39185309410095215
    },
    {
      "epoch": 0.000168548583984375,
      "model_forward_time": 0.11519765853881836,
      "step": 27615
    },
    {
      "epoch": 0.000168548583984375,
      "step": 27615,
      "training_step_time": 0.47687196731567383
    },
    {
      "epoch": 0.0001685546875,
      "model_forward_time": 0.11564993858337402,
      "step": 27616
    },
    {
      "epoch": 0.0001685546875,
      "step": 27616,
      "training_step_time": 0.396465539932251
    },
    {
      "epoch": 0.000168560791015625,
      "model_forward_time": 0.1144876480102539,
      "step": 27617
    },
    {
      "epoch": 0.000168560791015625,
      "step": 27617,
      "training_step_time": 0.3952338695526123
    },
    {
      "epoch": 0.00016856689453125,
      "model_forward_time": 0.11595869064331055,
      "step": 27618
    },
    {
      "epoch": 0.00016856689453125,
      "step": 27618,
      "training_step_time": 0.42624688148498535
    },
    {
      "epoch": 0.000168572998046875,
      "model_forward_time": 0.11503338813781738,
      "step": 27619
    },
    {
      "epoch": 0.000168572998046875,
      "step": 27619,
      "training_step_time": 0.41352343559265137
    },
    {
      "epoch": 0.0001685791015625,
      "grad_norm": 0.11899714171886444,
      "learning_rate": 6.0611127794047486e-05,
      "loss": 0.0422,
      "step": 27620
    },
    {
      "epoch": 0.0001685791015625,
      "model_forward_time": 0.11516213417053223,
      "step": 27620
    },
    {
      "epoch": 0.0001685791015625,
      "step": 27620,
      "training_step_time": 0.4134025573730469
    },
    {
      "epoch": 0.000168585205078125,
      "model_forward_time": 0.11707329750061035,
      "step": 27621
    },
    {
      "epoch": 0.000168585205078125,
      "step": 27621,
      "training_step_time": 0.6143040657043457
    },
    {
      "epoch": 0.00016859130859375,
      "model_forward_time": 0.11502408981323242,
      "step": 27622
    },
    {
      "epoch": 0.00016859130859375,
      "step": 27622,
      "training_step_time": 0.4504692554473877
    },
    {
      "epoch": 0.000168597412109375,
      "model_forward_time": 0.11497950553894043,
      "step": 27623
    },
    {
      "epoch": 0.000168597412109375,
      "step": 27623,
      "training_step_time": 0.3877725601196289
    },
    {
      "epoch": 0.000168603515625,
      "model_forward_time": 0.11468148231506348,
      "step": 27624
    },
    {
      "epoch": 0.000168603515625,
      "step": 27624,
      "training_step_time": 0.40123486518859863
    },
    {
      "epoch": 0.000168609619140625,
      "model_forward_time": 0.11516094207763672,
      "step": 27625
    },
    {
      "epoch": 0.000168609619140625,
      "step": 27625,
      "training_step_time": 0.3970050811767578
    },
    {
      "epoch": 0.00016861572265625,
      "model_forward_time": 0.1147000789642334,
      "step": 27626
    },
    {
      "epoch": 0.00016861572265625,
      "step": 27626,
      "training_step_time": 0.3954160213470459
    },
    {
      "epoch": 0.000168621826171875,
      "model_forward_time": 0.11442923545837402,
      "step": 27627
    },
    {
      "epoch": 0.000168621826171875,
      "step": 27627,
      "training_step_time": 0.5654444694519043
    },
    {
      "epoch": 0.0001686279296875,
      "model_forward_time": 0.11594223976135254,
      "step": 27628
    },
    {
      "epoch": 0.0001686279296875,
      "step": 27628,
      "training_step_time": 0.3897387981414795
    },
    {
      "epoch": 0.000168634033203125,
      "model_forward_time": 0.11545896530151367,
      "step": 27629
    },
    {
      "epoch": 0.000168634033203125,
      "step": 27629,
      "training_step_time": 0.37917208671569824
    },
    {
      "epoch": 0.00016864013671875,
      "grad_norm": 0.11898228526115417,
      "learning_rate": 6.0584196083316794e-05,
      "loss": 0.046,
      "step": 27630
    },
    {
      "epoch": 0.00016864013671875,
      "model_forward_time": 0.11508631706237793,
      "step": 27630
    },
    {
      "epoch": 0.00016864013671875,
      "step": 27630,
      "training_step_time": 0.39398694038391113
    },
    {
      "epoch": 0.000168646240234375,
      "model_forward_time": 0.11467576026916504,
      "step": 27631
    },
    {
      "epoch": 0.000168646240234375,
      "step": 27631,
      "training_step_time": 0.4028143882751465
    },
    {
      "epoch": 0.00016865234375,
      "model_forward_time": 0.11540985107421875,
      "step": 27632
    },
    {
      "epoch": 0.00016865234375,
      "step": 27632,
      "training_step_time": 0.4165024757385254
    },
    {
      "epoch": 0.000168658447265625,
      "model_forward_time": 0.11539149284362793,
      "step": 27633
    },
    {
      "epoch": 0.000168658447265625,
      "step": 27633,
      "training_step_time": 0.6723525524139404
    },
    {
      "epoch": 0.00016866455078125,
      "model_forward_time": 0.11501646041870117,
      "step": 27634
    },
    {
      "epoch": 0.00016866455078125,
      "step": 27634,
      "training_step_time": 0.437335729598999
    },
    {
      "epoch": 0.000168670654296875,
      "model_forward_time": 0.11475944519042969,
      "step": 27635
    },
    {
      "epoch": 0.000168670654296875,
      "step": 27635,
      "training_step_time": 0.4175574779510498
    },
    {
      "epoch": 0.0001686767578125,
      "model_forward_time": 0.11607527732849121,
      "step": 27636
    },
    {
      "epoch": 0.0001686767578125,
      "step": 27636,
      "training_step_time": 0.4501938819885254
    },
    {
      "epoch": 0.000168682861328125,
      "model_forward_time": 0.11570405960083008,
      "step": 27637
    },
    {
      "epoch": 0.000168682861328125,
      "step": 27637,
      "training_step_time": 0.38695764541625977
    },
    {
      "epoch": 0.00016868896484375,
      "model_forward_time": 0.11496877670288086,
      "step": 27638
    },
    {
      "epoch": 0.00016868896484375,
      "step": 27638,
      "training_step_time": 0.38611412048339844
    },
    {
      "epoch": 0.000168695068359375,
      "model_forward_time": 0.11483287811279297,
      "step": 27639
    },
    {
      "epoch": 0.000168695068359375,
      "step": 27639,
      "training_step_time": 0.47776103019714355
    },
    {
      "epoch": 0.000168701171875,
      "grad_norm": 0.14427632093429565,
      "learning_rate": 6.055726115738678e-05,
      "loss": 0.0442,
      "step": 27640
    },
    {
      "epoch": 0.000168701171875,
      "model_forward_time": 0.11498022079467773,
      "step": 27640
    },
    {
      "epoch": 0.000168701171875,
      "step": 27640,
      "training_step_time": 0.39694714546203613
    },
    {
      "epoch": 0.000168707275390625,
      "model_forward_time": 0.11460566520690918,
      "step": 27641
    },
    {
      "epoch": 0.000168707275390625,
      "step": 27641,
      "training_step_time": 0.3902900218963623
    },
    {
      "epoch": 0.00016871337890625,
      "model_forward_time": 0.11468744277954102,
      "step": 27642
    },
    {
      "epoch": 0.00016871337890625,
      "step": 27642,
      "training_step_time": 0.3970832824707031
    },
    {
      "epoch": 0.000168719482421875,
      "model_forward_time": 0.11528491973876953,
      "step": 27643
    },
    {
      "epoch": 0.000168719482421875,
      "step": 27643,
      "training_step_time": 0.39995574951171875
    },
    {
      "epoch": 0.0001687255859375,
      "model_forward_time": 0.114837646484375,
      "step": 27644
    },
    {
      "epoch": 0.0001687255859375,
      "step": 27644,
      "training_step_time": 0.39655089378356934
    },
    {
      "epoch": 0.000168731689453125,
      "model_forward_time": 0.11463308334350586,
      "step": 27645
    },
    {
      "epoch": 0.000168731689453125,
      "step": 27645,
      "training_step_time": 0.5464107990264893
    },
    {
      "epoch": 0.00016873779296875,
      "model_forward_time": 0.11471438407897949,
      "step": 27646
    },
    {
      "epoch": 0.00016873779296875,
      "step": 27646,
      "training_step_time": 0.4059488773345947
    },
    {
      "epoch": 0.000168743896484375,
      "model_forward_time": 0.1148378849029541,
      "step": 27647
    },
    {
      "epoch": 0.000168743896484375,
      "step": 27647,
      "training_step_time": 0.41020631790161133
    },
    {
      "epoch": 0.00016875,
      "model_forward_time": 0.11510348320007324,
      "step": 27648
    },
    {
      "epoch": 0.00016875,
      "step": 27648,
      "training_step_time": 0.4688129425048828
    },
    {
      "epoch": 0.000168756103515625,
      "model_forward_time": 0.11537933349609375,
      "step": 27649
    },
    {
      "epoch": 0.000168756103515625,
      "step": 27649,
      "training_step_time": 0.4750547409057617
    },
    {
      "epoch": 0.00016876220703125,
      "grad_norm": 0.13342586159706116,
      "learning_rate": 6.053032302443959e-05,
      "loss": 0.0512,
      "step": 27650
    },
    {
      "epoch": 0.00016876220703125,
      "model_forward_time": 0.1152803897857666,
      "step": 27650
    },
    {
      "epoch": 0.00016876220703125,
      "step": 27650,
      "training_step_time": 0.396831750869751
    },
    {
      "epoch": 0.000168768310546875,
      "model_forward_time": 0.11549496650695801,
      "step": 27651
    },
    {
      "epoch": 0.000168768310546875,
      "step": 27651,
      "training_step_time": 0.4380621910095215
    },
    {
      "epoch": 0.0001687744140625,
      "model_forward_time": 0.11476325988769531,
      "step": 27652
    },
    {
      "epoch": 0.0001687744140625,
      "step": 27652,
      "training_step_time": 0.4031200408935547
    },
    {
      "epoch": 0.000168780517578125,
      "model_forward_time": 0.11447978019714355,
      "step": 27653
    },
    {
      "epoch": 0.000168780517578125,
      "step": 27653,
      "training_step_time": 0.39609789848327637
    },
    {
      "epoch": 0.00016878662109375,
      "model_forward_time": 0.11540603637695312,
      "step": 27654
    },
    {
      "epoch": 0.00016878662109375,
      "step": 27654,
      "training_step_time": 0.39240479469299316
    },
    {
      "epoch": 0.000168792724609375,
      "model_forward_time": 0.11510777473449707,
      "step": 27655
    },
    {
      "epoch": 0.000168792724609375,
      "step": 27655,
      "training_step_time": 0.3972322940826416
    },
    {
      "epoch": 0.000168798828125,
      "model_forward_time": 0.1166086196899414,
      "step": 27656
    },
    {
      "epoch": 0.000168798828125,
      "step": 27656,
      "training_step_time": 0.3941004276275635
    },
    {
      "epoch": 0.000168804931640625,
      "model_forward_time": 0.11529779434204102,
      "step": 27657
    },
    {
      "epoch": 0.000168804931640625,
      "step": 27657,
      "training_step_time": 0.6296017169952393
    },
    {
      "epoch": 0.00016881103515625,
      "model_forward_time": 0.1150212287902832,
      "step": 27658
    },
    {
      "epoch": 0.00016881103515625,
      "step": 27658,
      "training_step_time": 0.3902411460876465
    },
    {
      "epoch": 0.000168817138671875,
      "model_forward_time": 0.11520576477050781,
      "step": 27659
    },
    {
      "epoch": 0.000168817138671875,
      "step": 27659,
      "training_step_time": 0.37979793548583984
    },
    {
      "epoch": 0.0001688232421875,
      "grad_norm": 0.11947178095579147,
      "learning_rate": 6.05033816926583e-05,
      "loss": 0.0453,
      "step": 27660
    },
    {
      "epoch": 0.0001688232421875,
      "model_forward_time": 0.11438250541687012,
      "step": 27660
    },
    {
      "epoch": 0.0001688232421875,
      "step": 27660,
      "training_step_time": 0.4810781478881836
    },
    {
      "epoch": 0.000168829345703125,
      "model_forward_time": 0.11514592170715332,
      "step": 27661
    },
    {
      "epoch": 0.000168829345703125,
      "step": 27661,
      "training_step_time": 0.4052693843841553
    },
    {
      "epoch": 0.00016883544921875,
      "model_forward_time": 0.11560273170471191,
      "step": 27662
    },
    {
      "epoch": 0.00016883544921875,
      "step": 27662,
      "training_step_time": 0.49152541160583496
    },
    {
      "epoch": 0.000168841552734375,
      "model_forward_time": 0.11565566062927246,
      "step": 27663
    },
    {
      "epoch": 0.000168841552734375,
      "step": 27663,
      "training_step_time": 0.48692774772644043
    },
    {
      "epoch": 0.00016884765625,
      "model_forward_time": 0.11525774002075195,
      "step": 27664
    },
    {
      "epoch": 0.00016884765625,
      "step": 27664,
      "training_step_time": 0.43961191177368164
    },
    {
      "epoch": 0.000168853759765625,
      "model_forward_time": 0.1158602237701416,
      "step": 27665
    },
    {
      "epoch": 0.000168853759765625,
      "step": 27665,
      "training_step_time": 0.3797144889831543
    },
    {
      "epoch": 0.00016885986328125,
      "model_forward_time": 0.11438393592834473,
      "step": 27666
    },
    {
      "epoch": 0.00016885986328125,
      "step": 27666,
      "training_step_time": 0.40486717224121094
    },
    {
      "epoch": 0.000168865966796875,
      "model_forward_time": 0.1151275634765625,
      "step": 27667
    },
    {
      "epoch": 0.000168865966796875,
      "step": 27667,
      "training_step_time": 0.39116668701171875
    },
    {
      "epoch": 0.0001688720703125,
      "model_forward_time": 0.11462068557739258,
      "step": 27668
    },
    {
      "epoch": 0.0001688720703125,
      "step": 27668,
      "training_step_time": 0.3884274959564209
    },
    {
      "epoch": 0.000168878173828125,
      "model_forward_time": 0.11545968055725098,
      "step": 27669
    },
    {
      "epoch": 0.000168878173828125,
      "step": 27669,
      "training_step_time": 0.5809593200683594
    },
    {
      "epoch": 0.00016888427734375,
      "grad_norm": 0.17784978449344635,
      "learning_rate": 6.0476437170226975e-05,
      "loss": 0.0489,
      "step": 27670
    },
    {
      "epoch": 0.00016888427734375,
      "model_forward_time": 0.11460089683532715,
      "step": 27670
    },
    {
      "epoch": 0.00016888427734375,
      "step": 27670,
      "training_step_time": 0.396160364151001
    },
    {
      "epoch": 0.000168890380859375,
      "model_forward_time": 0.11459612846374512,
      "step": 27671
    },
    {
      "epoch": 0.000168890380859375,
      "step": 27671,
      "training_step_time": 0.39233875274658203
    },
    {
      "epoch": 0.000168896484375,
      "model_forward_time": 0.11510705947875977,
      "step": 27672
    },
    {
      "epoch": 0.000168896484375,
      "step": 27672,
      "training_step_time": 0.403261661529541
    },
    {
      "epoch": 0.000168902587890625,
      "model_forward_time": 0.1151728630065918,
      "step": 27673
    },
    {
      "epoch": 0.000168902587890625,
      "step": 27673,
      "training_step_time": 0.3959650993347168
    },
    {
      "epoch": 0.00016890869140625,
      "model_forward_time": 0.11480140686035156,
      "step": 27674
    },
    {
      "epoch": 0.00016890869140625,
      "step": 27674,
      "training_step_time": 0.48633646965026855
    },
    {
      "epoch": 0.000168914794921875,
      "model_forward_time": 0.11538887023925781,
      "step": 27675
    },
    {
      "epoch": 0.000168914794921875,
      "step": 27675,
      "training_step_time": 0.49346423149108887
    },
    {
      "epoch": 0.0001689208984375,
      "model_forward_time": 0.11524724960327148,
      "step": 27676
    },
    {
      "epoch": 0.0001689208984375,
      "step": 27676,
      "training_step_time": 0.4333477020263672
    },
    {
      "epoch": 0.000168927001953125,
      "model_forward_time": 0.11532068252563477,
      "step": 27677
    },
    {
      "epoch": 0.000168927001953125,
      "step": 27677,
      "training_step_time": 0.4574160575866699
    },
    {
      "epoch": 0.00016893310546875,
      "model_forward_time": 0.1154778003692627,
      "step": 27678
    },
    {
      "epoch": 0.00016893310546875,
      "step": 27678,
      "training_step_time": 0.4560577869415283
    },
    {
      "epoch": 0.000168939208984375,
      "model_forward_time": 0.11829924583435059,
      "step": 27679
    },
    {
      "epoch": 0.000168939208984375,
      "step": 27679,
      "training_step_time": 0.37825822830200195
    },
    {
      "epoch": 0.0001689453125,
      "grad_norm": 0.132706880569458,
      "learning_rate": 6.044948946533064e-05,
      "loss": 0.0456,
      "step": 27680
    },
    {
      "epoch": 0.0001689453125,
      "model_forward_time": 0.11552286148071289,
      "step": 27680
    },
    {
      "epoch": 0.0001689453125,
      "step": 27680,
      "training_step_time": 0.3855555057525635
    },
    {
      "epoch": 0.000168951416015625,
      "model_forward_time": 0.11494708061218262,
      "step": 27681
    },
    {
      "epoch": 0.000168951416015625,
      "step": 27681,
      "training_step_time": 0.38150787353515625
    },
    {
      "epoch": 0.00016895751953125,
      "model_forward_time": 0.11585259437561035,
      "step": 27682
    },
    {
      "epoch": 0.00016895751953125,
      "step": 27682,
      "training_step_time": 0.38577771186828613
    },
    {
      "epoch": 0.000168963623046875,
      "model_forward_time": 0.11521267890930176,
      "step": 27683
    },
    {
      "epoch": 0.000168963623046875,
      "step": 27683,
      "training_step_time": 0.3892843723297119
    },
    {
      "epoch": 0.0001689697265625,
      "model_forward_time": 0.11602783203125,
      "step": 27684
    },
    {
      "epoch": 0.0001689697265625,
      "step": 27684,
      "training_step_time": 0.3929622173309326
    },
    {
      "epoch": 0.000168975830078125,
      "model_forward_time": 0.11557555198669434,
      "step": 27685
    },
    {
      "epoch": 0.000168975830078125,
      "step": 27685,
      "training_step_time": 0.3864402770996094
    },
    {
      "epoch": 0.00016898193359375,
      "model_forward_time": 0.11541986465454102,
      "step": 27686
    },
    {
      "epoch": 0.00016898193359375,
      "step": 27686,
      "training_step_time": 0.3936307430267334
    },
    {
      "epoch": 0.000168988037109375,
      "model_forward_time": 0.11521720886230469,
      "step": 27687
    },
    {
      "epoch": 0.000168988037109375,
      "step": 27687,
      "training_step_time": 0.6634848117828369
    },
    {
      "epoch": 0.000168994140625,
      "model_forward_time": 0.11472439765930176,
      "step": 27688
    },
    {
      "epoch": 0.000168994140625,
      "step": 27688,
      "training_step_time": 0.42295360565185547
    },
    {
      "epoch": 0.000169000244140625,
      "model_forward_time": 0.11469292640686035,
      "step": 27689
    },
    {
      "epoch": 0.000169000244140625,
      "step": 27689,
      "training_step_time": 0.40200281143188477
    },
    {
      "epoch": 0.00016900634765625,
      "grad_norm": 0.1272280365228653,
      "learning_rate": 6.042253858615532e-05,
      "loss": 0.0428,
      "step": 27690
    },
    {
      "epoch": 0.00016900634765625,
      "model_forward_time": 0.11475825309753418,
      "step": 27690
    },
    {
      "epoch": 0.00016900634765625,
      "step": 27690,
      "training_step_time": 0.4936709403991699
    },
    {
      "epoch": 0.000169012451171875,
      "model_forward_time": 0.11396002769470215,
      "step": 27691
    },
    {
      "epoch": 0.000169012451171875,
      "step": 27691,
      "training_step_time": 0.43506503105163574
    },
    {
      "epoch": 0.0001690185546875,
      "model_forward_time": 0.11483049392700195,
      "step": 27692
    },
    {
      "epoch": 0.0001690185546875,
      "step": 27692,
      "training_step_time": 0.4220309257507324
    },
    {
      "epoch": 0.000169024658203125,
      "model_forward_time": 0.11455202102661133,
      "step": 27693
    },
    {
      "epoch": 0.000169024658203125,
      "step": 27693,
      "training_step_time": 0.45261359214782715
    },
    {
      "epoch": 0.00016903076171875,
      "model_forward_time": 0.11515188217163086,
      "step": 27694
    },
    {
      "epoch": 0.00016903076171875,
      "step": 27694,
      "training_step_time": 0.3980872631072998
    },
    {
      "epoch": 0.000169036865234375,
      "model_forward_time": 0.11487817764282227,
      "step": 27695
    },
    {
      "epoch": 0.000169036865234375,
      "step": 27695,
      "training_step_time": 0.3910102844238281
    },
    {
      "epoch": 0.00016904296875,
      "model_forward_time": 0.11475515365600586,
      "step": 27696
    },
    {
      "epoch": 0.00016904296875,
      "step": 27696,
      "training_step_time": 0.39882898330688477
    },
    {
      "epoch": 0.000169049072265625,
      "model_forward_time": 0.11585783958435059,
      "step": 27697
    },
    {
      "epoch": 0.000169049072265625,
      "step": 27697,
      "training_step_time": 0.3871123790740967
    },
    {
      "epoch": 0.00016905517578125,
      "model_forward_time": 0.11507034301757812,
      "step": 27698
    },
    {
      "epoch": 0.00016905517578125,
      "step": 27698,
      "training_step_time": 0.4001033306121826
    },
    {
      "epoch": 0.000169061279296875,
      "model_forward_time": 0.11539268493652344,
      "step": 27699
    },
    {
      "epoch": 0.000169061279296875,
      "step": 27699,
      "training_step_time": 0.6915607452392578
    },
    {
      "epoch": 0.0001690673828125,
      "grad_norm": 0.13840191066265106,
      "learning_rate": 6.0395584540887963e-05,
      "loss": 0.0471,
      "step": 27700
    },
    {
      "epoch": 0.0001690673828125,
      "model_forward_time": 0.11475706100463867,
      "step": 27700
    },
    {
      "epoch": 0.0001690673828125,
      "step": 27700,
      "training_step_time": 0.3928215503692627
    },
    {
      "epoch": 0.000169073486328125,
      "model_forward_time": 0.11612486839294434,
      "step": 27701
    },
    {
      "epoch": 0.000169073486328125,
      "step": 27701,
      "training_step_time": 0.40622806549072266
    },
    {
      "epoch": 0.00016907958984375,
      "model_forward_time": 0.11468338966369629,
      "step": 27702
    },
    {
      "epoch": 0.00016907958984375,
      "step": 27702,
      "training_step_time": 0.4255504608154297
    },
    {
      "epoch": 0.000169085693359375,
      "model_forward_time": 0.1165764331817627,
      "step": 27703
    },
    {
      "epoch": 0.000169085693359375,
      "step": 27703,
      "training_step_time": 0.4113352298736572
    },
    {
      "epoch": 0.000169091796875,
      "model_forward_time": 0.1147775650024414,
      "step": 27704
    },
    {
      "epoch": 0.000169091796875,
      "step": 27704,
      "training_step_time": 0.3615584373474121
    },
    {
      "epoch": 0.000169097900390625,
      "model_forward_time": 0.11538028717041016,
      "step": 27705
    },
    {
      "epoch": 0.000169097900390625,
      "step": 27705,
      "training_step_time": 0.452242374420166
    },
    {
      "epoch": 0.00016910400390625,
      "model_forward_time": 0.11536741256713867,
      "step": 27706
    },
    {
      "epoch": 0.00016910400390625,
      "step": 27706,
      "training_step_time": 0.4265320301055908
    },
    {
      "epoch": 0.000169110107421875,
      "model_forward_time": 0.11554121971130371,
      "step": 27707
    },
    {
      "epoch": 0.000169110107421875,
      "step": 27707,
      "training_step_time": 0.3911139965057373
    },
    {
      "epoch": 0.0001691162109375,
      "model_forward_time": 0.11569070816040039,
      "step": 27708
    },
    {
      "epoch": 0.0001691162109375,
      "step": 27708,
      "training_step_time": 0.39759159088134766
    },
    {
      "epoch": 0.000169122314453125,
      "model_forward_time": 0.1147916316986084,
      "step": 27709
    },
    {
      "epoch": 0.000169122314453125,
      "step": 27709,
      "training_step_time": 0.40054988861083984
    },
    {
      "epoch": 0.00016912841796875,
      "grad_norm": 0.1354454904794693,
      "learning_rate": 6.036862733771651e-05,
      "loss": 0.0402,
      "step": 27710
    },
    {
      "epoch": 0.00016912841796875,
      "model_forward_time": 0.11492538452148438,
      "step": 27710
    },
    {
      "epoch": 0.00016912841796875,
      "step": 27710,
      "training_step_time": 0.3836655616760254
    },
    {
      "epoch": 0.000169134521484375,
      "model_forward_time": 0.11439847946166992,
      "step": 27711
    },
    {
      "epoch": 0.000169134521484375,
      "step": 27711,
      "training_step_time": 0.5614633560180664
    },
    {
      "epoch": 0.000169140625,
      "model_forward_time": 0.115631103515625,
      "step": 27712
    },
    {
      "epoch": 0.000169140625,
      "step": 27712,
      "training_step_time": 0.3876476287841797
    },
    {
      "epoch": 0.000169146728515625,
      "model_forward_time": 0.11484622955322266,
      "step": 27713
    },
    {
      "epoch": 0.000169146728515625,
      "step": 27713,
      "training_step_time": 0.38552045822143555
    },
    {
      "epoch": 0.00016915283203125,
      "model_forward_time": 0.11566281318664551,
      "step": 27714
    },
    {
      "epoch": 0.00016915283203125,
      "step": 27714,
      "training_step_time": 0.400256872177124
    },
    {
      "epoch": 0.000169158935546875,
      "model_forward_time": 0.11572861671447754,
      "step": 27715
    },
    {
      "epoch": 0.000169158935546875,
      "step": 27715,
      "training_step_time": 0.4592757225036621
    },
    {
      "epoch": 0.0001691650390625,
      "model_forward_time": 0.11518979072570801,
      "step": 27716
    },
    {
      "epoch": 0.0001691650390625,
      "step": 27716,
      "training_step_time": 0.4752316474914551
    },
    {
      "epoch": 0.000169171142578125,
      "model_forward_time": 0.11520552635192871,
      "step": 27717
    },
    {
      "epoch": 0.000169171142578125,
      "step": 27717,
      "training_step_time": 0.5975558757781982
    },
    {
      "epoch": 0.00016917724609375,
      "model_forward_time": 0.1143183708190918,
      "step": 27718
    },
    {
      "epoch": 0.00016917724609375,
      "step": 27718,
      "training_step_time": 0.4058494567871094
    },
    {
      "epoch": 0.000169183349609375,
      "model_forward_time": 0.11462140083312988,
      "step": 27719
    },
    {
      "epoch": 0.000169183349609375,
      "step": 27719,
      "training_step_time": 0.43798232078552246
    },
    {
      "epoch": 0.000169189453125,
      "grad_norm": 0.10430086404085159,
      "learning_rate": 6.034166698482984e-05,
      "loss": 0.0433,
      "step": 27720
    },
    {
      "epoch": 0.000169189453125,
      "model_forward_time": 0.11464715003967285,
      "step": 27720
    },
    {
      "epoch": 0.000169189453125,
      "step": 27720,
      "training_step_time": 0.43825387954711914
    },
    {
      "epoch": 0.000169195556640625,
      "model_forward_time": 0.11451888084411621,
      "step": 27721
    },
    {
      "epoch": 0.000169195556640625,
      "step": 27721,
      "training_step_time": 0.39011549949645996
    },
    {
      "epoch": 0.00016920166015625,
      "model_forward_time": 0.11468291282653809,
      "step": 27722
    },
    {
      "epoch": 0.00016920166015625,
      "step": 27722,
      "training_step_time": 0.38297057151794434
    },
    {
      "epoch": 0.000169207763671875,
      "model_forward_time": 0.11505508422851562,
      "step": 27723
    },
    {
      "epoch": 0.000169207763671875,
      "step": 27723,
      "training_step_time": 0.419694185256958
    },
    {
      "epoch": 0.0001692138671875,
      "model_forward_time": 0.1149435043334961,
      "step": 27724
    },
    {
      "epoch": 0.0001692138671875,
      "step": 27724,
      "training_step_time": 0.40018558502197266
    },
    {
      "epoch": 0.000169219970703125,
      "model_forward_time": 0.11527419090270996,
      "step": 27725
    },
    {
      "epoch": 0.000169219970703125,
      "step": 27725,
      "training_step_time": 0.3997206687927246
    },
    {
      "epoch": 0.00016922607421875,
      "model_forward_time": 0.11556005477905273,
      "step": 27726
    },
    {
      "epoch": 0.00016922607421875,
      "step": 27726,
      "training_step_time": 0.39954137802124023
    },
    {
      "epoch": 0.000169232177734375,
      "model_forward_time": 0.11556029319763184,
      "step": 27727
    },
    {
      "epoch": 0.000169232177734375,
      "step": 27727,
      "training_step_time": 0.41079187393188477
    },
    {
      "epoch": 0.00016923828125,
      "model_forward_time": 0.11472845077514648,
      "step": 27728
    },
    {
      "epoch": 0.00016923828125,
      "step": 27728,
      "training_step_time": 0.4556770324707031
    },
    {
      "epoch": 0.000169244384765625,
      "model_forward_time": 0.11540079116821289,
      "step": 27729
    },
    {
      "epoch": 0.000169244384765625,
      "step": 27729,
      "training_step_time": 0.6002168655395508
    },
    {
      "epoch": 0.00016925048828125,
      "grad_norm": 0.12128791213035583,
      "learning_rate": 6.03147034904178e-05,
      "loss": 0.0487,
      "step": 27730
    },
    {
      "epoch": 0.00016925048828125,
      "model_forward_time": 0.1154639720916748,
      "step": 27730
    },
    {
      "epoch": 0.00016925048828125,
      "step": 27730,
      "training_step_time": 0.43232083320617676
    },
    {
      "epoch": 0.000169256591796875,
      "model_forward_time": 0.11532878875732422,
      "step": 27731
    },
    {
      "epoch": 0.000169256591796875,
      "step": 27731,
      "training_step_time": 0.43739914894104004
    },
    {
      "epoch": 0.0001692626953125,
      "model_forward_time": 0.11474227905273438,
      "step": 27732
    },
    {
      "epoch": 0.0001692626953125,
      "step": 27732,
      "training_step_time": 0.36908936500549316
    },
    {
      "epoch": 0.000169268798828125,
      "model_forward_time": 0.11391854286193848,
      "step": 27733
    },
    {
      "epoch": 0.000169268798828125,
      "step": 27733,
      "training_step_time": 0.43682050704956055
    },
    {
      "epoch": 0.00016927490234375,
      "model_forward_time": 0.1150507926940918,
      "step": 27734
    },
    {
      "epoch": 0.00016927490234375,
      "step": 27734,
      "training_step_time": 0.42345452308654785
    },
    {
      "epoch": 0.000169281005859375,
      "model_forward_time": 0.11580109596252441,
      "step": 27735
    },
    {
      "epoch": 0.000169281005859375,
      "step": 27735,
      "training_step_time": 0.3947460651397705
    },
    {
      "epoch": 0.000169287109375,
      "model_forward_time": 0.11491584777832031,
      "step": 27736
    },
    {
      "epoch": 0.000169287109375,
      "step": 27736,
      "training_step_time": 0.39711475372314453
    },
    {
      "epoch": 0.000169293212890625,
      "model_forward_time": 0.11567401885986328,
      "step": 27737
    },
    {
      "epoch": 0.000169293212890625,
      "step": 27737,
      "training_step_time": 0.3992342948913574
    },
    {
      "epoch": 0.00016929931640625,
      "model_forward_time": 0.11490726470947266,
      "step": 27738
    },
    {
      "epoch": 0.00016929931640625,
      "step": 27738,
      "training_step_time": 0.3935582637786865
    },
    {
      "epoch": 0.000169305419921875,
      "model_forward_time": 0.1155388355255127,
      "step": 27739
    },
    {
      "epoch": 0.000169305419921875,
      "step": 27739,
      "training_step_time": 0.41065120697021484
    },
    {
      "epoch": 0.0001693115234375,
      "grad_norm": 0.09838340431451797,
      "learning_rate": 6.0287736862671175e-05,
      "loss": 0.0483,
      "step": 27740
    },
    {
      "epoch": 0.0001693115234375,
      "model_forward_time": 0.11473393440246582,
      "step": 27740
    },
    {
      "epoch": 0.0001693115234375,
      "step": 27740,
      "training_step_time": 0.3899714946746826
    },
    {
      "epoch": 0.000169317626953125,
      "model_forward_time": 0.11592364311218262,
      "step": 27741
    },
    {
      "epoch": 0.000169317626953125,
      "step": 27741,
      "training_step_time": 0.5679948329925537
    },
    {
      "epoch": 0.00016932373046875,
      "model_forward_time": 0.11461806297302246,
      "step": 27742
    },
    {
      "epoch": 0.00016932373046875,
      "step": 27742,
      "training_step_time": 0.4316122531890869
    },
    {
      "epoch": 0.000169329833984375,
      "model_forward_time": 0.1144721508026123,
      "step": 27743
    },
    {
      "epoch": 0.000169329833984375,
      "step": 27743,
      "training_step_time": 0.3895912170410156
    },
    {
      "epoch": 0.0001693359375,
      "model_forward_time": 0.11549592018127441,
      "step": 27744
    },
    {
      "epoch": 0.0001693359375,
      "step": 27744,
      "training_step_time": 0.48316454887390137
    },
    {
      "epoch": 0.000169342041015625,
      "model_forward_time": 0.11470985412597656,
      "step": 27745
    },
    {
      "epoch": 0.000169342041015625,
      "step": 27745,
      "training_step_time": 0.43880748748779297
    },
    {
      "epoch": 0.00016934814453125,
      "model_forward_time": 0.11450695991516113,
      "step": 27746
    },
    {
      "epoch": 0.00016934814453125,
      "step": 27746,
      "training_step_time": 0.4131495952606201
    },
    {
      "epoch": 0.000169354248046875,
      "model_forward_time": 0.11505007743835449,
      "step": 27747
    },
    {
      "epoch": 0.000169354248046875,
      "step": 27747,
      "training_step_time": 0.5020291805267334
    },
    {
      "epoch": 0.0001693603515625,
      "model_forward_time": 0.11438155174255371,
      "step": 27748
    },
    {
      "epoch": 0.0001693603515625,
      "step": 27748,
      "training_step_time": 0.3965640068054199
    },
    {
      "epoch": 0.000169366455078125,
      "model_forward_time": 0.1145620346069336,
      "step": 27749
    },
    {
      "epoch": 0.000169366455078125,
      "step": 27749,
      "training_step_time": 0.38464832305908203
    },
    {
      "epoch": 0.00016937255859375,
      "grad_norm": 0.112092524766922,
      "learning_rate": 6.026076710978171e-05,
      "loss": 0.0451,
      "step": 27750
    },
    {
      "epoch": 0.00016937255859375,
      "model_forward_time": 0.11479902267456055,
      "step": 27750
    },
    {
      "epoch": 0.00016937255859375,
      "step": 27750,
      "training_step_time": 0.38863086700439453
    },
    {
      "epoch": 0.000169378662109375,
      "model_forward_time": 0.11478924751281738,
      "step": 27751
    },
    {
      "epoch": 0.000169378662109375,
      "step": 27751,
      "training_step_time": 0.38927268981933594
    },
    {
      "epoch": 0.000169384765625,
      "model_forward_time": 0.11564135551452637,
      "step": 27752
    },
    {
      "epoch": 0.000169384765625,
      "step": 27752,
      "training_step_time": 0.394773006439209
    },
    {
      "epoch": 0.000169390869140625,
      "model_forward_time": 0.11477088928222656,
      "step": 27753
    },
    {
      "epoch": 0.000169390869140625,
      "step": 27753,
      "training_step_time": 0.587496280670166
    },
    {
      "epoch": 0.00016939697265625,
      "model_forward_time": 0.11495733261108398,
      "step": 27754
    },
    {
      "epoch": 0.00016939697265625,
      "step": 27754,
      "training_step_time": 0.3999905586242676
    },
    {
      "epoch": 0.000169403076171875,
      "model_forward_time": 0.11434292793273926,
      "step": 27755
    },
    {
      "epoch": 0.000169403076171875,
      "step": 27755,
      "training_step_time": 0.44574499130249023
    },
    {
      "epoch": 0.0001694091796875,
      "model_forward_time": 0.11480236053466797,
      "step": 27756
    },
    {
      "epoch": 0.0001694091796875,
      "step": 27756,
      "training_step_time": 0.38879895210266113
    },
    {
      "epoch": 0.000169415283203125,
      "model_forward_time": 0.1147768497467041,
      "step": 27757
    },
    {
      "epoch": 0.000169415283203125,
      "step": 27757,
      "training_step_time": 0.3992481231689453
    },
    {
      "epoch": 0.00016942138671875,
      "model_forward_time": 0.11464953422546387,
      "step": 27758
    },
    {
      "epoch": 0.00016942138671875,
      "step": 27758,
      "training_step_time": 0.3967618942260742
    },
    {
      "epoch": 0.000169427490234375,
      "model_forward_time": 0.11496400833129883,
      "step": 27759
    },
    {
      "epoch": 0.000169427490234375,
      "step": 27759,
      "training_step_time": 0.6651778221130371
    },
    {
      "epoch": 0.00016943359375,
      "grad_norm": 0.10832028836011887,
      "learning_rate": 6.023379423994214e-05,
      "loss": 0.0461,
      "step": 27760
    },
    {
      "epoch": 0.00016943359375,
      "model_forward_time": 0.11493134498596191,
      "step": 27760
    },
    {
      "epoch": 0.00016943359375,
      "step": 27760,
      "training_step_time": 0.4179801940917969
    },
    {
      "epoch": 0.000169439697265625,
      "model_forward_time": 0.1148366928100586,
      "step": 27761
    },
    {
      "epoch": 0.000169439697265625,
      "step": 27761,
      "training_step_time": 0.5020537376403809
    },
    {
      "epoch": 0.00016944580078125,
      "model_forward_time": 0.11464500427246094,
      "step": 27762
    },
    {
      "epoch": 0.00016944580078125,
      "step": 27762,
      "training_step_time": 0.40666961669921875
    },
    {
      "epoch": 0.000169451904296875,
      "model_forward_time": 0.11433720588684082,
      "step": 27763
    },
    {
      "epoch": 0.000169451904296875,
      "step": 27763,
      "training_step_time": 0.38927507400512695
    },
    {
      "epoch": 0.0001694580078125,
      "model_forward_time": 0.11443352699279785,
      "step": 27764
    },
    {
      "epoch": 0.0001694580078125,
      "step": 27764,
      "training_step_time": 0.38446998596191406
    },
    {
      "epoch": 0.000169464111328125,
      "model_forward_time": 0.1148674488067627,
      "step": 27765
    },
    {
      "epoch": 0.000169464111328125,
      "step": 27765,
      "training_step_time": 0.5150353908538818
    },
    {
      "epoch": 0.00016947021484375,
      "model_forward_time": 0.11574673652648926,
      "step": 27766
    },
    {
      "epoch": 0.00016947021484375,
      "step": 27766,
      "training_step_time": 0.3945932388305664
    },
    {
      "epoch": 0.000169476318359375,
      "model_forward_time": 0.11503267288208008,
      "step": 27767
    },
    {
      "epoch": 0.000169476318359375,
      "step": 27767,
      "training_step_time": 0.3958582878112793
    },
    {
      "epoch": 0.000169482421875,
      "model_forward_time": 0.11533689498901367,
      "step": 27768
    },
    {
      "epoch": 0.000169482421875,
      "step": 27768,
      "training_step_time": 0.39328527450561523
    },
    {
      "epoch": 0.000169488525390625,
      "model_forward_time": 0.11550426483154297,
      "step": 27769
    },
    {
      "epoch": 0.000169488525390625,
      "step": 27769,
      "training_step_time": 0.39960813522338867
    },
    {
      "epoch": 0.00016949462890625,
      "grad_norm": 0.14017002284526825,
      "learning_rate": 6.020681826134609e-05,
      "loss": 0.0458,
      "step": 27770
    },
    {
      "epoch": 0.00016949462890625,
      "model_forward_time": 0.11469507217407227,
      "step": 27770
    },
    {
      "epoch": 0.00016949462890625,
      "step": 27770,
      "training_step_time": 0.5003259181976318
    },
    {
      "epoch": 0.000169500732421875,
      "model_forward_time": 0.11493301391601562,
      "step": 27771
    },
    {
      "epoch": 0.000169500732421875,
      "step": 27771,
      "training_step_time": 0.5707366466522217
    },
    {
      "epoch": 0.0001695068359375,
      "model_forward_time": 0.11520671844482422,
      "step": 27772
    },
    {
      "epoch": 0.0001695068359375,
      "step": 27772,
      "training_step_time": 0.47254419326782227
    },
    {
      "epoch": 0.000169512939453125,
      "model_forward_time": 0.11502671241760254,
      "step": 27773
    },
    {
      "epoch": 0.000169512939453125,
      "step": 27773,
      "training_step_time": 0.40203356742858887
    },
    {
      "epoch": 0.00016951904296875,
      "model_forward_time": 0.11437678337097168,
      "step": 27774
    },
    {
      "epoch": 0.00016951904296875,
      "step": 27774,
      "training_step_time": 0.41625428199768066
    },
    {
      "epoch": 0.000169525146484375,
      "model_forward_time": 0.11510467529296875,
      "step": 27775
    },
    {
      "epoch": 0.000169525146484375,
      "step": 27775,
      "training_step_time": 0.4597740173339844
    },
    {
      "epoch": 0.00016953125,
      "model_forward_time": 0.11432623863220215,
      "step": 27776
    },
    {
      "epoch": 0.00016953125,
      "step": 27776,
      "training_step_time": 0.4374527931213379
    },
    {
      "epoch": 0.000169537353515625,
      "model_forward_time": 0.11457467079162598,
      "step": 27777
    },
    {
      "epoch": 0.000169537353515625,
      "step": 27777,
      "training_step_time": 0.41829967498779297
    },
    {
      "epoch": 0.00016954345703125,
      "model_forward_time": 0.11521244049072266,
      "step": 27778
    },
    {
      "epoch": 0.00016954345703125,
      "step": 27778,
      "training_step_time": 0.393887996673584
    },
    {
      "epoch": 0.000169549560546875,
      "model_forward_time": 0.11457681655883789,
      "step": 27779
    },
    {
      "epoch": 0.000169549560546875,
      "step": 27779,
      "training_step_time": 0.3962254524230957
    },
    {
      "epoch": 0.0001695556640625,
      "grad_norm": 0.1311589479446411,
      "learning_rate": 6.017983918218812e-05,
      "loss": 0.0474,
      "step": 27780
    },
    {
      "epoch": 0.0001695556640625,
      "model_forward_time": 0.11503362655639648,
      "step": 27780
    },
    {
      "epoch": 0.0001695556640625,
      "step": 27780,
      "training_step_time": 0.39385485649108887
    },
    {
      "epoch": 0.000169561767578125,
      "model_forward_time": 0.11569595336914062,
      "step": 27781
    },
    {
      "epoch": 0.000169561767578125,
      "step": 27781,
      "training_step_time": 0.40676355361938477
    },
    {
      "epoch": 0.00016956787109375,
      "model_forward_time": 0.11491894721984863,
      "step": 27782
    },
    {
      "epoch": 0.00016956787109375,
      "step": 27782,
      "training_step_time": 0.39403748512268066
    },
    {
      "epoch": 0.000169573974609375,
      "model_forward_time": 0.1152489185333252,
      "step": 27783
    },
    {
      "epoch": 0.000169573974609375,
      "step": 27783,
      "training_step_time": 0.5596024990081787
    },
    {
      "epoch": 0.000169580078125,
      "model_forward_time": 0.11510467529296875,
      "step": 27784
    },
    {
      "epoch": 0.000169580078125,
      "step": 27784,
      "training_step_time": 0.3858935832977295
    },
    {
      "epoch": 0.000169586181640625,
      "model_forward_time": 0.11556220054626465,
      "step": 27785
    },
    {
      "epoch": 0.000169586181640625,
      "step": 27785,
      "training_step_time": 0.38547849655151367
    },
    {
      "epoch": 0.00016959228515625,
      "model_forward_time": 0.1154930591583252,
      "step": 27786
    },
    {
      "epoch": 0.00016959228515625,
      "step": 27786,
      "training_step_time": 0.4283921718597412
    },
    {
      "epoch": 0.000169598388671875,
      "model_forward_time": 0.11496758460998535,
      "step": 27787
    },
    {
      "epoch": 0.000169598388671875,
      "step": 27787,
      "training_step_time": 0.42824721336364746
    },
    {
      "epoch": 0.0001696044921875,
      "model_forward_time": 0.11548566818237305,
      "step": 27788
    },
    {
      "epoch": 0.0001696044921875,
      "step": 27788,
      "training_step_time": 0.49701428413391113
    },
    {
      "epoch": 0.000169610595703125,
      "model_forward_time": 0.11492681503295898,
      "step": 27789
    },
    {
      "epoch": 0.000169610595703125,
      "step": 27789,
      "training_step_time": 0.43222999572753906
    },
    {
      "epoch": 0.00016961669921875,
      "grad_norm": 0.1396709829568863,
      "learning_rate": 6.015285701066382e-05,
      "loss": 0.047,
      "step": 27790
    },
    {
      "epoch": 0.00016961669921875,
      "model_forward_time": 0.11559391021728516,
      "step": 27790
    },
    {
      "epoch": 0.00016961669921875,
      "step": 27790,
      "training_step_time": 0.48822951316833496
    },
    {
      "epoch": 0.000169622802734375,
      "model_forward_time": 0.11455082893371582,
      "step": 27791
    },
    {
      "epoch": 0.000169622802734375,
      "step": 27791,
      "training_step_time": 0.39151477813720703
    },
    {
      "epoch": 0.00016962890625,
      "model_forward_time": 0.11526966094970703,
      "step": 27792
    },
    {
      "epoch": 0.00016962890625,
      "step": 27792,
      "training_step_time": 0.3933415412902832
    },
    {
      "epoch": 0.000169635009765625,
      "model_forward_time": 0.11505508422851562,
      "step": 27793
    },
    {
      "epoch": 0.000169635009765625,
      "step": 27793,
      "training_step_time": 0.3963592052459717
    },
    {
      "epoch": 0.00016964111328125,
      "model_forward_time": 0.11500954627990723,
      "step": 27794
    },
    {
      "epoch": 0.00016964111328125,
      "step": 27794,
      "training_step_time": 0.3854057788848877
    },
    {
      "epoch": 0.000169647216796875,
      "model_forward_time": 0.11505746841430664,
      "step": 27795
    },
    {
      "epoch": 0.000169647216796875,
      "step": 27795,
      "training_step_time": 0.5796041488647461
    },
    {
      "epoch": 0.0001696533203125,
      "model_forward_time": 0.11517953872680664,
      "step": 27796
    },
    {
      "epoch": 0.0001696533203125,
      "step": 27796,
      "training_step_time": 0.39649057388305664
    },
    {
      "epoch": 0.000169659423828125,
      "model_forward_time": 0.1156320571899414,
      "step": 27797
    },
    {
      "epoch": 0.000169659423828125,
      "step": 27797,
      "training_step_time": 0.39778852462768555
    },
    {
      "epoch": 0.00016966552734375,
      "model_forward_time": 0.11527204513549805,
      "step": 27798
    },
    {
      "epoch": 0.00016966552734375,
      "step": 27798,
      "training_step_time": 0.3881556987762451
    },
    {
      "epoch": 0.000169671630859375,
      "model_forward_time": 0.11550736427307129,
      "step": 27799
    },
    {
      "epoch": 0.000169671630859375,
      "step": 27799,
      "training_step_time": 0.3904597759246826
    },
    {
      "epoch": 0.000169677734375,
      "grad_norm": 0.09384989738464355,
      "learning_rate": 6.012587175496961e-05,
      "loss": 0.0409,
      "step": 27800
    },
    {
      "epoch": 0.000169677734375,
      "model_forward_time": 0.11533069610595703,
      "step": 27800
    },
    {
      "epoch": 0.000169677734375,
      "step": 27800,
      "training_step_time": 0.38883304595947266
    },
    {
      "epoch": 0.000169683837890625,
      "model_forward_time": 0.11505722999572754,
      "step": 27801
    },
    {
      "epoch": 0.000169683837890625,
      "step": 27801,
      "training_step_time": 0.7159430980682373
    },
    {
      "epoch": 0.00016968994140625,
      "model_forward_time": 0.11471199989318848,
      "step": 27802
    },
    {
      "epoch": 0.00016968994140625,
      "step": 27802,
      "training_step_time": 0.4145030975341797
    },
    {
      "epoch": 0.000169696044921875,
      "model_forward_time": 0.11453676223754883,
      "step": 27803
    },
    {
      "epoch": 0.000169696044921875,
      "step": 27803,
      "training_step_time": 0.4406759738922119
    },
    {
      "epoch": 0.0001697021484375,
      "model_forward_time": 0.11411142349243164,
      "step": 27804
    },
    {
      "epoch": 0.0001697021484375,
      "step": 27804,
      "training_step_time": 0.41373777389526367
    },
    {
      "epoch": 0.000169708251953125,
      "model_forward_time": 0.1147317886352539,
      "step": 27805
    },
    {
      "epoch": 0.000169708251953125,
      "step": 27805,
      "training_step_time": 0.38814783096313477
    },
    {
      "epoch": 0.00016971435546875,
      "model_forward_time": 0.1142115592956543,
      "step": 27806
    },
    {
      "epoch": 0.00016971435546875,
      "step": 27806,
      "training_step_time": 0.38907670974731445
    },
    {
      "epoch": 0.000169720458984375,
      "model_forward_time": 0.11527752876281738,
      "step": 27807
    },
    {
      "epoch": 0.000169720458984375,
      "step": 27807,
      "training_step_time": 0.5285351276397705
    },
    {
      "epoch": 0.0001697265625,
      "model_forward_time": 0.11422586441040039,
      "step": 27808
    },
    {
      "epoch": 0.0001697265625,
      "step": 27808,
      "training_step_time": 0.41020870208740234
    },
    {
      "epoch": 0.000169732666015625,
      "model_forward_time": 0.11487579345703125,
      "step": 27809
    },
    {
      "epoch": 0.000169732666015625,
      "step": 27809,
      "training_step_time": 0.3886988162994385
    },
    {
      "epoch": 0.00016973876953125,
      "grad_norm": 0.12807509303092957,
      "learning_rate": 6.009888342330292e-05,
      "loss": 0.0488,
      "step": 27810
    },
    {
      "epoch": 0.00016973876953125,
      "model_forward_time": 0.11543488502502441,
      "step": 27810
    },
    {
      "epoch": 0.00016973876953125,
      "step": 27810,
      "training_step_time": 0.41274452209472656
    },
    {
      "epoch": 0.000169744873046875,
      "model_forward_time": 0.1143186092376709,
      "step": 27811
    },
    {
      "epoch": 0.000169744873046875,
      "step": 27811,
      "training_step_time": 0.392641544342041
    },
    {
      "epoch": 0.0001697509765625,
      "model_forward_time": 0.11581897735595703,
      "step": 27812
    },
    {
      "epoch": 0.0001697509765625,
      "step": 27812,
      "training_step_time": 0.3911106586456299
    },
    {
      "epoch": 0.000169757080078125,
      "model_forward_time": 0.11487627029418945,
      "step": 27813
    },
    {
      "epoch": 0.000169757080078125,
      "step": 27813,
      "training_step_time": 0.6406910419464111
    },
    {
      "epoch": 0.00016976318359375,
      "model_forward_time": 0.11473941802978516,
      "step": 27814
    },
    {
      "epoch": 0.00016976318359375,
      "step": 27814,
      "training_step_time": 0.406721830368042
    },
    {
      "epoch": 0.000169769287109375,
      "model_forward_time": 0.11509323120117188,
      "step": 27815
    },
    {
      "epoch": 0.000169769287109375,
      "step": 27815,
      "training_step_time": 0.42240262031555176
    },
    {
      "epoch": 0.000169775390625,
      "model_forward_time": 0.11458277702331543,
      "step": 27816
    },
    {
      "epoch": 0.000169775390625,
      "step": 27816,
      "training_step_time": 0.5140929222106934
    },
    {
      "epoch": 0.000169781494140625,
      "model_forward_time": 0.11436033248901367,
      "step": 27817
    },
    {
      "epoch": 0.000169781494140625,
      "step": 27817,
      "training_step_time": 0.3935413360595703
    },
    {
      "epoch": 0.00016978759765625,
      "model_forward_time": 0.11507010459899902,
      "step": 27818
    },
    {
      "epoch": 0.00016978759765625,
      "step": 27818,
      "training_step_time": 0.48792576789855957
    },
    {
      "epoch": 0.000169793701171875,
      "model_forward_time": 0.11444711685180664,
      "step": 27819
    },
    {
      "epoch": 0.000169793701171875,
      "step": 27819,
      "training_step_time": 0.4073607921600342
    },
    {
      "epoch": 0.0001697998046875,
      "grad_norm": 0.16611354053020477,
      "learning_rate": 6.0071892023862105e-05,
      "loss": 0.0474,
      "step": 27820
    },
    {
      "epoch": 0.0001697998046875,
      "model_forward_time": 0.11425280570983887,
      "step": 27820
    },
    {
      "epoch": 0.0001697998046875,
      "step": 27820,
      "training_step_time": 0.3877296447753906
    },
    {
      "epoch": 0.000169805908203125,
      "model_forward_time": 0.11456704139709473,
      "step": 27821
    },
    {
      "epoch": 0.000169805908203125,
      "step": 27821,
      "training_step_time": 0.44106006622314453
    },
    {
      "epoch": 0.00016981201171875,
      "model_forward_time": 0.11469817161560059,
      "step": 27822
    },
    {
      "epoch": 0.00016981201171875,
      "step": 27822,
      "training_step_time": 0.399883508682251
    },
    {
      "epoch": 0.000169818115234375,
      "model_forward_time": 0.11485981941223145,
      "step": 27823
    },
    {
      "epoch": 0.000169818115234375,
      "step": 27823,
      "training_step_time": 0.38765597343444824
    },
    {
      "epoch": 0.00016982421875,
      "model_forward_time": 0.11446881294250488,
      "step": 27824
    },
    {
      "epoch": 0.00016982421875,
      "step": 27824,
      "training_step_time": 0.3900625705718994
    },
    {
      "epoch": 0.000169830322265625,
      "model_forward_time": 0.11487269401550293,
      "step": 27825
    },
    {
      "epoch": 0.000169830322265625,
      "step": 27825,
      "training_step_time": 0.6767518520355225
    },
    {
      "epoch": 0.00016983642578125,
      "model_forward_time": 0.11542201042175293,
      "step": 27826
    },
    {
      "epoch": 0.00016983642578125,
      "step": 27826,
      "training_step_time": 0.3941915035247803
    },
    {
      "epoch": 0.000169842529296875,
      "model_forward_time": 0.11423468589782715,
      "step": 27827
    },
    {
      "epoch": 0.000169842529296875,
      "step": 27827,
      "training_step_time": 0.3927333354949951
    },
    {
      "epoch": 0.0001698486328125,
      "model_forward_time": 0.11572456359863281,
      "step": 27828
    },
    {
      "epoch": 0.0001698486328125,
      "step": 27828,
      "training_step_time": 0.4582407474517822
    },
    {
      "epoch": 0.000169854736328125,
      "model_forward_time": 0.11455821990966797,
      "step": 27829
    },
    {
      "epoch": 0.000169854736328125,
      "step": 27829,
      "training_step_time": 0.4732184410095215
    },
    {
      "epoch": 0.00016986083984375,
      "grad_norm": 0.11539790034294128,
      "learning_rate": 6.004489756484641e-05,
      "loss": 0.0415,
      "step": 27830
    },
    {
      "epoch": 0.00016986083984375,
      "model_forward_time": 0.11493921279907227,
      "step": 27830
    },
    {
      "epoch": 0.00016986083984375,
      "step": 27830,
      "training_step_time": 0.36072659492492676
    },
    {
      "epoch": 0.000169866943359375,
      "model_forward_time": 0.11440372467041016,
      "step": 27831
    },
    {
      "epoch": 0.000169866943359375,
      "step": 27831,
      "training_step_time": 0.43758606910705566
    },
    {
      "epoch": 0.000169873046875,
      "model_forward_time": 0.11501407623291016,
      "step": 27832
    },
    {
      "epoch": 0.000169873046875,
      "step": 27832,
      "training_step_time": 0.5100953578948975
    },
    {
      "epoch": 0.000169879150390625,
      "model_forward_time": 0.11436724662780762,
      "step": 27833
    },
    {
      "epoch": 0.000169879150390625,
      "step": 27833,
      "training_step_time": 0.3817291259765625
    },
    {
      "epoch": 0.00016988525390625,
      "model_forward_time": 0.11433696746826172,
      "step": 27834
    },
    {
      "epoch": 0.00016988525390625,
      "step": 27834,
      "training_step_time": 0.41135239601135254
    },
    {
      "epoch": 0.000169891357421875,
      "model_forward_time": 0.1162712574005127,
      "step": 27835
    },
    {
      "epoch": 0.000169891357421875,
      "step": 27835,
      "training_step_time": 0.3850252628326416
    },
    {
      "epoch": 0.0001698974609375,
      "model_forward_time": 0.11515307426452637,
      "step": 27836
    },
    {
      "epoch": 0.0001698974609375,
      "step": 27836,
      "training_step_time": 0.39534878730773926
    },
    {
      "epoch": 0.000169903564453125,
      "model_forward_time": 0.11483263969421387,
      "step": 27837
    },
    {
      "epoch": 0.000169903564453125,
      "step": 27837,
      "training_step_time": 0.4348301887512207
    },
    {
      "epoch": 0.00016990966796875,
      "model_forward_time": 0.1149895191192627,
      "step": 27838
    },
    {
      "epoch": 0.00016990966796875,
      "step": 27838,
      "training_step_time": 0.3971986770629883
    },
    {
      "epoch": 0.000169915771484375,
      "model_forward_time": 0.11462569236755371,
      "step": 27839
    },
    {
      "epoch": 0.000169915771484375,
      "step": 27839,
      "training_step_time": 0.39792537689208984
    },
    {
      "epoch": 0.000169921875,
      "grad_norm": 0.12292928993701935,
      "learning_rate": 6.001790005445607e-05,
      "loss": 0.0447,
      "step": 27840
    },
    {
      "epoch": 0.000169921875,
      "model_forward_time": 0.11515402793884277,
      "step": 27840
    },
    {
      "epoch": 0.000169921875,
      "step": 27840,
      "training_step_time": 0.39962267875671387
    },
    {
      "epoch": 0.000169927978515625,
      "model_forward_time": 0.1153254508972168,
      "step": 27841
    },
    {
      "epoch": 0.000169927978515625,
      "step": 27841,
      "training_step_time": 0.4196648597717285
    },
    {
      "epoch": 0.00016993408203125,
      "model_forward_time": 0.11540889739990234,
      "step": 27842
    },
    {
      "epoch": 0.00016993408203125,
      "step": 27842,
      "training_step_time": 0.4877333641052246
    },
    {
      "epoch": 0.000169940185546875,
      "model_forward_time": 0.11551237106323242,
      "step": 27843
    },
    {
      "epoch": 0.000169940185546875,
      "step": 27843,
      "training_step_time": 0.4441382884979248
    },
    {
      "epoch": 0.0001699462890625,
      "model_forward_time": 0.11516618728637695,
      "step": 27844
    },
    {
      "epoch": 0.0001699462890625,
      "step": 27844,
      "training_step_time": 0.4593183994293213
    },
    {
      "epoch": 0.000169952392578125,
      "model_forward_time": 0.11523008346557617,
      "step": 27845
    },
    {
      "epoch": 0.000169952392578125,
      "step": 27845,
      "training_step_time": 0.4437272548675537
    },
    {
      "epoch": 0.00016995849609375,
      "model_forward_time": 0.1148536205291748,
      "step": 27846
    },
    {
      "epoch": 0.00016995849609375,
      "step": 27846,
      "training_step_time": 0.47836875915527344
    },
    {
      "epoch": 0.000169964599609375,
      "model_forward_time": 0.11522865295410156,
      "step": 27847
    },
    {
      "epoch": 0.000169964599609375,
      "step": 27847,
      "training_step_time": 0.4458589553833008
    },
    {
      "epoch": 0.000169970703125,
      "model_forward_time": 0.11514163017272949,
      "step": 27848
    },
    {
      "epoch": 0.000169970703125,
      "step": 27848,
      "training_step_time": 0.4184284210205078
    },
    {
      "epoch": 0.000169976806640625,
      "model_forward_time": 0.1149144172668457,
      "step": 27849
    },
    {
      "epoch": 0.000169976806640625,
      "step": 27849,
      "training_step_time": 0.4697089195251465
    },
    {
      "epoch": 0.00016998291015625,
      "grad_norm": 0.07966271042823792,
      "learning_rate": 5.999089950089218e-05,
      "loss": 0.043,
      "step": 27850
    },
    {
      "epoch": 0.00016998291015625,
      "model_forward_time": 0.11576414108276367,
      "step": 27850
    },
    {
      "epoch": 0.00016998291015625,
      "step": 27850,
      "training_step_time": 0.39655208587646484
    },
    {
      "epoch": 0.000169989013671875,
      "model_forward_time": 0.114013671875,
      "step": 27851
    },
    {
      "epoch": 0.000169989013671875,
      "step": 27851,
      "training_step_time": 0.39735984802246094
    },
    {
      "epoch": 0.0001699951171875,
      "model_forward_time": 0.11549639701843262,
      "step": 27852
    },
    {
      "epoch": 0.0001699951171875,
      "step": 27852,
      "training_step_time": 0.3823208808898926
    },
    {
      "epoch": 0.000170001220703125,
      "model_forward_time": 0.11490464210510254,
      "step": 27853
    },
    {
      "epoch": 0.000170001220703125,
      "step": 27853,
      "training_step_time": 0.39261674880981445
    },
    {
      "epoch": 0.00017000732421875,
      "model_forward_time": 0.1149289608001709,
      "step": 27854
    },
    {
      "epoch": 0.00017000732421875,
      "step": 27854,
      "training_step_time": 0.3910694122314453
    },
    {
      "epoch": 0.000170013427734375,
      "model_forward_time": 0.11534452438354492,
      "step": 27855
    },
    {
      "epoch": 0.000170013427734375,
      "step": 27855,
      "training_step_time": 0.7261452674865723
    },
    {
      "epoch": 0.00017001953125,
      "model_forward_time": 0.1148979663848877,
      "step": 27856
    },
    {
      "epoch": 0.00017001953125,
      "step": 27856,
      "training_step_time": 0.40174412727355957
    },
    {
      "epoch": 0.000170025634765625,
      "model_forward_time": 0.11496949195861816,
      "step": 27857
    },
    {
      "epoch": 0.000170025634765625,
      "step": 27857,
      "training_step_time": 0.3944571018218994
    },
    {
      "epoch": 0.00017003173828125,
      "model_forward_time": 0.11502838134765625,
      "step": 27858
    },
    {
      "epoch": 0.00017003173828125,
      "step": 27858,
      "training_step_time": 0.39617085456848145
    },
    {
      "epoch": 0.000170037841796875,
      "model_forward_time": 0.11445879936218262,
      "step": 27859
    },
    {
      "epoch": 0.000170037841796875,
      "step": 27859,
      "training_step_time": 0.39307427406311035
    },
    {
      "epoch": 0.0001700439453125,
      "grad_norm": 0.17728550732135773,
      "learning_rate": 5.9963895912356836e-05,
      "loss": 0.0394,
      "step": 27860
    },
    {
      "epoch": 0.0001700439453125,
      "model_forward_time": 0.11560893058776855,
      "step": 27860
    },
    {
      "epoch": 0.0001700439453125,
      "step": 27860,
      "training_step_time": 0.48934364318847656
    },
    {
      "epoch": 0.000170050048828125,
      "model_forward_time": 0.11496686935424805,
      "step": 27861
    },
    {
      "epoch": 0.000170050048828125,
      "step": 27861,
      "training_step_time": 0.5446584224700928
    },
    {
      "epoch": 0.00017005615234375,
      "model_forward_time": 0.1149587631225586,
      "step": 27862
    },
    {
      "epoch": 0.00017005615234375,
      "step": 27862,
      "training_step_time": 0.3954157829284668
    },
    {
      "epoch": 0.000170062255859375,
      "model_forward_time": 0.11425900459289551,
      "step": 27863
    },
    {
      "epoch": 0.000170062255859375,
      "step": 27863,
      "training_step_time": 0.3986964225769043
    },
    {
      "epoch": 0.000170068359375,
      "model_forward_time": 0.11475992202758789,
      "step": 27864
    },
    {
      "epoch": 0.000170068359375,
      "step": 27864,
      "training_step_time": 0.39821314811706543
    },
    {
      "epoch": 0.000170074462890625,
      "model_forward_time": 0.11464142799377441,
      "step": 27865
    },
    {
      "epoch": 0.000170074462890625,
      "step": 27865,
      "training_step_time": 0.3900716304779053
    },
    {
      "epoch": 0.00017008056640625,
      "model_forward_time": 0.11494016647338867,
      "step": 27866
    },
    {
      "epoch": 0.00017008056640625,
      "step": 27866,
      "training_step_time": 0.38097357749938965
    },
    {
      "epoch": 0.000170086669921875,
      "model_forward_time": 0.11536598205566406,
      "step": 27867
    },
    {
      "epoch": 0.000170086669921875,
      "step": 27867,
      "training_step_time": 0.5996952056884766
    },
    {
      "epoch": 0.0001700927734375,
      "model_forward_time": 0.11467266082763672,
      "step": 27868
    },
    {
      "epoch": 0.0001700927734375,
      "step": 27868,
      "training_step_time": 0.38878703117370605
    },
    {
      "epoch": 0.000170098876953125,
      "model_forward_time": 0.11653351783752441,
      "step": 27869
    },
    {
      "epoch": 0.000170098876953125,
      "step": 27869,
      "training_step_time": 0.44637417793273926
    },
    {
      "epoch": 0.00017010498046875,
      "grad_norm": 0.10465192049741745,
      "learning_rate": 5.9936889297052986e-05,
      "loss": 0.0435,
      "step": 27870
    },
    {
      "epoch": 0.00017010498046875,
      "model_forward_time": 0.11426520347595215,
      "step": 27870
    },
    {
      "epoch": 0.00017010498046875,
      "step": 27870,
      "training_step_time": 0.42987513542175293
    },
    {
      "epoch": 0.000170111083984375,
      "model_forward_time": 0.11485815048217773,
      "step": 27871
    },
    {
      "epoch": 0.000170111083984375,
      "step": 27871,
      "training_step_time": 0.4117283821105957
    },
    {
      "epoch": 0.0001701171875,
      "model_forward_time": 0.11506032943725586,
      "step": 27872
    },
    {
      "epoch": 0.0001701171875,
      "step": 27872,
      "training_step_time": 0.4522898197174072
    },
    {
      "epoch": 0.000170123291015625,
      "model_forward_time": 0.1151425838470459,
      "step": 27873
    },
    {
      "epoch": 0.000170123291015625,
      "step": 27873,
      "training_step_time": 0.4638242721557617
    },
    {
      "epoch": 0.00017012939453125,
      "model_forward_time": 0.11486458778381348,
      "step": 27874
    },
    {
      "epoch": 0.00017012939453125,
      "step": 27874,
      "training_step_time": 0.42815709114074707
    },
    {
      "epoch": 0.000170135498046875,
      "model_forward_time": 0.11508822441101074,
      "step": 27875
    },
    {
      "epoch": 0.000170135498046875,
      "step": 27875,
      "training_step_time": 0.47252845764160156
    },
    {
      "epoch": 0.0001701416015625,
      "model_forward_time": 0.11497640609741211,
      "step": 27876
    },
    {
      "epoch": 0.0001701416015625,
      "step": 27876,
      "training_step_time": 0.39446377754211426
    },
    {
      "epoch": 0.000170147705078125,
      "model_forward_time": 0.11510634422302246,
      "step": 27877
    },
    {
      "epoch": 0.000170147705078125,
      "step": 27877,
      "training_step_time": 0.39702272415161133
    },
    {
      "epoch": 0.00017015380859375,
      "model_forward_time": 0.11552643775939941,
      "step": 27878
    },
    {
      "epoch": 0.00017015380859375,
      "step": 27878,
      "training_step_time": 0.39574480056762695
    },
    {
      "epoch": 0.000170159912109375,
      "model_forward_time": 0.11491942405700684,
      "step": 27879
    },
    {
      "epoch": 0.000170159912109375,
      "step": 27879,
      "training_step_time": 0.6055924892425537
    },
    {
      "epoch": 0.000170166015625,
      "grad_norm": 0.13619226217269897,
      "learning_rate": 5.9909879663184544e-05,
      "loss": 0.0389,
      "step": 27880
    },
    {
      "epoch": 0.000170166015625,
      "model_forward_time": 0.11455702781677246,
      "step": 27880
    },
    {
      "epoch": 0.000170166015625,
      "step": 27880,
      "training_step_time": 0.39337635040283203
    },
    {
      "epoch": 0.000170172119140625,
      "model_forward_time": 0.1147308349609375,
      "step": 27881
    },
    {
      "epoch": 0.000170172119140625,
      "step": 27881,
      "training_step_time": 0.39487600326538086
    },
    {
      "epoch": 0.00017017822265625,
      "model_forward_time": 0.11490035057067871,
      "step": 27882
    },
    {
      "epoch": 0.00017017822265625,
      "step": 27882,
      "training_step_time": 0.3963487148284912
    },
    {
      "epoch": 0.000170184326171875,
      "model_forward_time": 0.11542868614196777,
      "step": 27883
    },
    {
      "epoch": 0.000170184326171875,
      "step": 27883,
      "training_step_time": 0.5075359344482422
    },
    {
      "epoch": 0.0001701904296875,
      "model_forward_time": 0.11545920372009277,
      "step": 27884
    },
    {
      "epoch": 0.0001701904296875,
      "step": 27884,
      "training_step_time": 0.4108872413635254
    },
    {
      "epoch": 0.000170196533203125,
      "model_forward_time": 0.11490178108215332,
      "step": 27885
    },
    {
      "epoch": 0.000170196533203125,
      "step": 27885,
      "training_step_time": 0.5880072116851807
    },
    {
      "epoch": 0.00017020263671875,
      "model_forward_time": 0.11507058143615723,
      "step": 27886
    },
    {
      "epoch": 0.00017020263671875,
      "step": 27886,
      "training_step_time": 0.42158985137939453
    },
    {
      "epoch": 0.000170208740234375,
      "model_forward_time": 0.11490678787231445,
      "step": 27887
    },
    {
      "epoch": 0.000170208740234375,
      "step": 27887,
      "training_step_time": 0.4042389392852783
    },
    {
      "epoch": 0.00017021484375,
      "model_forward_time": 0.11464762687683105,
      "step": 27888
    },
    {
      "epoch": 0.00017021484375,
      "step": 27888,
      "training_step_time": 0.4082763195037842
    },
    {
      "epoch": 0.000170220947265625,
      "model_forward_time": 0.11799430847167969,
      "step": 27889
    },
    {
      "epoch": 0.000170220947265625,
      "step": 27889,
      "training_step_time": 0.4728665351867676
    },
    {
      "epoch": 0.00017022705078125,
      "grad_norm": 0.11592576652765274,
      "learning_rate": 5.988286701895631e-05,
      "loss": 0.0469,
      "step": 27890
    },
    {
      "epoch": 0.00017022705078125,
      "model_forward_time": 0.11509990692138672,
      "step": 27890
    },
    {
      "epoch": 0.00017022705078125,
      "step": 27890,
      "training_step_time": 0.3874189853668213
    },
    {
      "epoch": 0.000170233154296875,
      "model_forward_time": 0.11518263816833496,
      "step": 27891
    },
    {
      "epoch": 0.000170233154296875,
      "step": 27891,
      "training_step_time": 0.487612247467041
    },
    {
      "epoch": 0.0001702392578125,
      "model_forward_time": 0.11394524574279785,
      "step": 27892
    },
    {
      "epoch": 0.0001702392578125,
      "step": 27892,
      "training_step_time": 0.3866007328033447
    },
    {
      "epoch": 0.000170245361328125,
      "model_forward_time": 0.11485505104064941,
      "step": 27893
    },
    {
      "epoch": 0.000170245361328125,
      "step": 27893,
      "training_step_time": 0.38808703422546387
    },
    {
      "epoch": 0.00017025146484375,
      "model_forward_time": 0.11539673805236816,
      "step": 27894
    },
    {
      "epoch": 0.00017025146484375,
      "step": 27894,
      "training_step_time": 0.4063389301300049
    },
    {
      "epoch": 0.000170257568359375,
      "model_forward_time": 0.11431598663330078,
      "step": 27895
    },
    {
      "epoch": 0.000170257568359375,
      "step": 27895,
      "training_step_time": 0.40102171897888184
    },
    {
      "epoch": 0.000170263671875,
      "model_forward_time": 0.11474943161010742,
      "step": 27896
    },
    {
      "epoch": 0.000170263671875,
      "step": 27896,
      "training_step_time": 0.3965909481048584
    },
    {
      "epoch": 0.000170269775390625,
      "model_forward_time": 0.11520886421203613,
      "step": 27897
    },
    {
      "epoch": 0.000170269775390625,
      "step": 27897,
      "training_step_time": 0.5508453845977783
    },
    {
      "epoch": 0.00017027587890625,
      "model_forward_time": 0.11466264724731445,
      "step": 27898
    },
    {
      "epoch": 0.00017027587890625,
      "step": 27898,
      "training_step_time": 0.444854736328125
    },
    {
      "epoch": 0.000170281982421875,
      "model_forward_time": 0.1151878833770752,
      "step": 27899
    },
    {
      "epoch": 0.000170281982421875,
      "step": 27899,
      "training_step_time": 0.4931976795196533
    },
    {
      "epoch": 0.0001702880859375,
      "grad_norm": 0.14337566494941711,
      "learning_rate": 5.985585137257401e-05,
      "loss": 0.043,
      "step": 27900
    },
    {
      "epoch": 0.0001702880859375,
      "model_forward_time": 0.11469411849975586,
      "step": 27900
    },
    {
      "epoch": 0.0001702880859375,
      "step": 27900,
      "training_step_time": 0.4165031909942627
    },
    {
      "epoch": 0.000170294189453125,
      "model_forward_time": 0.11518001556396484,
      "step": 27901
    },
    {
      "epoch": 0.000170294189453125,
      "step": 27901,
      "training_step_time": 0.4190185070037842
    },
    {
      "epoch": 0.00017030029296875,
      "model_forward_time": 0.11442112922668457,
      "step": 27902
    },
    {
      "epoch": 0.00017030029296875,
      "step": 27902,
      "training_step_time": 0.41973209381103516
    },
    {
      "epoch": 0.000170306396484375,
      "model_forward_time": 0.11422872543334961,
      "step": 27903
    },
    {
      "epoch": 0.000170306396484375,
      "step": 27903,
      "training_step_time": 0.48942995071411133
    },
    {
      "epoch": 0.0001703125,
      "model_forward_time": 0.11465263366699219,
      "step": 27904
    },
    {
      "epoch": 0.0001703125,
      "step": 27904,
      "training_step_time": 0.38779211044311523
    },
    {
      "epoch": 0.000170318603515625,
      "model_forward_time": 0.11526632308959961,
      "step": 27905
    },
    {
      "epoch": 0.000170318603515625,
      "step": 27905,
      "training_step_time": 0.39380764961242676
    },
    {
      "epoch": 0.00017032470703125,
      "model_forward_time": 0.11501765251159668,
      "step": 27906
    },
    {
      "epoch": 0.00017032470703125,
      "step": 27906,
      "training_step_time": 0.3909335136413574
    },
    {
      "epoch": 0.000170330810546875,
      "model_forward_time": 0.11525630950927734,
      "step": 27907
    },
    {
      "epoch": 0.000170330810546875,
      "step": 27907,
      "training_step_time": 0.40136218070983887
    },
    {
      "epoch": 0.0001703369140625,
      "model_forward_time": 0.11456990242004395,
      "step": 27908
    },
    {
      "epoch": 0.0001703369140625,
      "step": 27908,
      "training_step_time": 0.39170289039611816
    },
    {
      "epoch": 0.000170343017578125,
      "model_forward_time": 0.11520004272460938,
      "step": 27909
    },
    {
      "epoch": 0.000170343017578125,
      "step": 27909,
      "training_step_time": 0.6063580513000488
    },
    {
      "epoch": 0.00017034912109375,
      "grad_norm": 0.15589778125286102,
      "learning_rate": 5.982883273224431e-05,
      "loss": 0.0468,
      "step": 27910
    },
    {
      "epoch": 0.00017034912109375,
      "model_forward_time": 0.11471295356750488,
      "step": 27910
    },
    {
      "epoch": 0.00017034912109375,
      "step": 27910,
      "training_step_time": 0.39745092391967773
    },
    {
      "epoch": 0.000170355224609375,
      "model_forward_time": 0.11500263214111328,
      "step": 27911
    },
    {
      "epoch": 0.000170355224609375,
      "step": 27911,
      "training_step_time": 0.4847140312194824
    },
    {
      "epoch": 0.000170361328125,
      "model_forward_time": 0.11488914489746094,
      "step": 27912
    },
    {
      "epoch": 0.000170361328125,
      "step": 27912,
      "training_step_time": 0.43286871910095215
    },
    {
      "epoch": 0.000170367431640625,
      "model_forward_time": 0.11477899551391602,
      "step": 27913
    },
    {
      "epoch": 0.000170367431640625,
      "step": 27913,
      "training_step_time": 0.506523609161377
    },
    {
      "epoch": 0.00017037353515625,
      "model_forward_time": 0.11533761024475098,
      "step": 27914
    },
    {
      "epoch": 0.00017037353515625,
      "step": 27914,
      "training_step_time": 0.4283266067504883
    },
    {
      "epoch": 0.000170379638671875,
      "model_forward_time": 0.11564087867736816,
      "step": 27915
    },
    {
      "epoch": 0.000170379638671875,
      "step": 27915,
      "training_step_time": 0.40881896018981934
    },
    {
      "epoch": 0.0001703857421875,
      "model_forward_time": 0.11549973487854004,
      "step": 27916
    },
    {
      "epoch": 0.0001703857421875,
      "step": 27916,
      "training_step_time": 0.41326093673706055
    },
    {
      "epoch": 0.000170391845703125,
      "model_forward_time": 0.11562347412109375,
      "step": 27917
    },
    {
      "epoch": 0.000170391845703125,
      "step": 27917,
      "training_step_time": 0.4229111671447754
    },
    {
      "epoch": 0.00017039794921875,
      "model_forward_time": 0.11463165283203125,
      "step": 27918
    },
    {
      "epoch": 0.00017039794921875,
      "step": 27918,
      "training_step_time": 0.39632463455200195
    },
    {
      "epoch": 0.000170404052734375,
      "model_forward_time": 0.11461138725280762,
      "step": 27919
    },
    {
      "epoch": 0.000170404052734375,
      "step": 27919,
      "training_step_time": 0.3974933624267578
    },
    {
      "epoch": 0.00017041015625,
      "grad_norm": 0.12353859096765518,
      "learning_rate": 5.980181110617473e-05,
      "loss": 0.0468,
      "step": 27920
    },
    {
      "epoch": 0.00017041015625,
      "model_forward_time": 0.11483478546142578,
      "step": 27920
    },
    {
      "epoch": 0.00017041015625,
      "step": 27920,
      "training_step_time": 0.39174938201904297
    },
    {
      "epoch": 0.000170416259765625,
      "model_forward_time": 0.11534857749938965,
      "step": 27921
    },
    {
      "epoch": 0.000170416259765625,
      "step": 27921,
      "training_step_time": 0.5703530311584473
    },
    {
      "epoch": 0.00017042236328125,
      "model_forward_time": 0.1154179573059082,
      "step": 27922
    },
    {
      "epoch": 0.00017042236328125,
      "step": 27922,
      "training_step_time": 0.38937950134277344
    },
    {
      "epoch": 0.000170428466796875,
      "model_forward_time": 0.11482501029968262,
      "step": 27923
    },
    {
      "epoch": 0.000170428466796875,
      "step": 27923,
      "training_step_time": 0.3877291679382324
    },
    {
      "epoch": 0.0001704345703125,
      "model_forward_time": 0.1154329776763916,
      "step": 27924
    },
    {
      "epoch": 0.0001704345703125,
      "step": 27924,
      "training_step_time": 0.39557480812072754
    },
    {
      "epoch": 0.000170440673828125,
      "model_forward_time": 0.11487483978271484,
      "step": 27925
    },
    {
      "epoch": 0.000170440673828125,
      "step": 27925,
      "training_step_time": 0.4269683361053467
    },
    {
      "epoch": 0.00017044677734375,
      "model_forward_time": 0.11475944519042969,
      "step": 27926
    },
    {
      "epoch": 0.00017044677734375,
      "step": 27926,
      "training_step_time": 0.3925323486328125
    },
    {
      "epoch": 0.000170452880859375,
      "model_forward_time": 0.11506915092468262,
      "step": 27927
    },
    {
      "epoch": 0.000170452880859375,
      "step": 27927,
      "training_step_time": 0.7099530696868896
    },
    {
      "epoch": 0.000170458984375,
      "model_forward_time": 0.11456489562988281,
      "step": 27928
    },
    {
      "epoch": 0.000170458984375,
      "step": 27928,
      "training_step_time": 0.42229127883911133
    },
    {
      "epoch": 0.000170465087890625,
      "model_forward_time": 0.11591529846191406,
      "step": 27929
    },
    {
      "epoch": 0.000170465087890625,
      "step": 27929,
      "training_step_time": 0.3671104907989502
    },
    {
      "epoch": 0.00017047119140625,
      "grad_norm": 0.16615955531597137,
      "learning_rate": 5.977478650257374e-05,
      "loss": 0.05,
      "step": 27930
    },
    {
      "epoch": 0.00017047119140625,
      "model_forward_time": 0.11469531059265137,
      "step": 27930
    },
    {
      "epoch": 0.00017047119140625,
      "step": 27930,
      "training_step_time": 0.43063902854919434
    },
    {
      "epoch": 0.000170477294921875,
      "model_forward_time": 0.11530900001525879,
      "step": 27931
    },
    {
      "epoch": 0.000170477294921875,
      "step": 27931,
      "training_step_time": 0.38865017890930176
    },
    {
      "epoch": 0.0001704833984375,
      "model_forward_time": 0.1150352954864502,
      "step": 27932
    },
    {
      "epoch": 0.0001704833984375,
      "step": 27932,
      "training_step_time": 0.38538384437561035
    },
    {
      "epoch": 0.000170489501953125,
      "model_forward_time": 0.11567902565002441,
      "step": 27933
    },
    {
      "epoch": 0.000170489501953125,
      "step": 27933,
      "training_step_time": 0.5598125457763672
    },
    {
      "epoch": 0.00017049560546875,
      "model_forward_time": 0.11517095565795898,
      "step": 27934
    },
    {
      "epoch": 0.00017049560546875,
      "step": 27934,
      "training_step_time": 0.3799619674682617
    },
    {
      "epoch": 0.000170501708984375,
      "model_forward_time": 0.11657595634460449,
      "step": 27935
    },
    {
      "epoch": 0.000170501708984375,
      "step": 27935,
      "training_step_time": 0.3940002918243408
    },
    {
      "epoch": 0.0001705078125,
      "model_forward_time": 0.11512327194213867,
      "step": 27936
    },
    {
      "epoch": 0.0001705078125,
      "step": 27936,
      "training_step_time": 0.41003990173339844
    },
    {
      "epoch": 0.000170513916015625,
      "model_forward_time": 0.11536478996276855,
      "step": 27937
    },
    {
      "epoch": 0.000170513916015625,
      "step": 27937,
      "training_step_time": 0.401947021484375
    },
    {
      "epoch": 0.00017052001953125,
      "model_forward_time": 0.11550116539001465,
      "step": 27938
    },
    {
      "epoch": 0.00017052001953125,
      "step": 27938,
      "training_step_time": 0.39368605613708496
    },
    {
      "epoch": 0.000170526123046875,
      "model_forward_time": 0.11509251594543457,
      "step": 27939
    },
    {
      "epoch": 0.000170526123046875,
      "step": 27939,
      "training_step_time": 0.6231236457824707
    },
    {
      "epoch": 0.0001705322265625,
      "grad_norm": 0.0906481146812439,
      "learning_rate": 5.974775892965071e-05,
      "loss": 0.046,
      "step": 27940
    },
    {
      "epoch": 0.0001705322265625,
      "model_forward_time": 0.11516046524047852,
      "step": 27940
    },
    {
      "epoch": 0.0001705322265625,
      "step": 27940,
      "training_step_time": 0.41227149963378906
    },
    {
      "epoch": 0.000170538330078125,
      "model_forward_time": 0.11488842964172363,
      "step": 27941
    },
    {
      "epoch": 0.000170538330078125,
      "step": 27941,
      "training_step_time": 0.44976115226745605
    },
    {
      "epoch": 0.00017054443359375,
      "model_forward_time": 0.11515617370605469,
      "step": 27942
    },
    {
      "epoch": 0.00017054443359375,
      "step": 27942,
      "training_step_time": 0.4388155937194824
    },
    {
      "epoch": 0.000170550537109375,
      "model_forward_time": 0.11487817764282227,
      "step": 27943
    },
    {
      "epoch": 0.000170550537109375,
      "step": 27943,
      "training_step_time": 0.384352445602417
    },
    {
      "epoch": 0.000170556640625,
      "model_forward_time": 0.11461210250854492,
      "step": 27944
    },
    {
      "epoch": 0.000170556640625,
      "step": 27944,
      "training_step_time": 0.4797627925872803
    },
    {
      "epoch": 0.000170562744140625,
      "model_forward_time": 0.11532831192016602,
      "step": 27945
    },
    {
      "epoch": 0.000170562744140625,
      "step": 27945,
      "training_step_time": 0.4549226760864258
    },
    {
      "epoch": 0.00017056884765625,
      "model_forward_time": 0.1152803897857666,
      "step": 27946
    },
    {
      "epoch": 0.00017056884765625,
      "step": 27946,
      "training_step_time": 0.38797569274902344
    },
    {
      "epoch": 0.000170574951171875,
      "model_forward_time": 0.11501646041870117,
      "step": 27947
    },
    {
      "epoch": 0.000170574951171875,
      "step": 27947,
      "training_step_time": 0.39618468284606934
    },
    {
      "epoch": 0.0001705810546875,
      "model_forward_time": 0.11499214172363281,
      "step": 27948
    },
    {
      "epoch": 0.0001705810546875,
      "step": 27948,
      "training_step_time": 0.40013718605041504
    },
    {
      "epoch": 0.000170587158203125,
      "model_forward_time": 0.11571168899536133,
      "step": 27949
    },
    {
      "epoch": 0.000170587158203125,
      "step": 27949,
      "training_step_time": 0.3971831798553467
    },
    {
      "epoch": 0.00017059326171875,
      "grad_norm": 0.12564396858215332,
      "learning_rate": 5.9720728395615875e-05,
      "loss": 0.0454,
      "step": 27950
    },
    {
      "epoch": 0.00017059326171875,
      "model_forward_time": 0.11545896530151367,
      "step": 27950
    },
    {
      "epoch": 0.00017059326171875,
      "step": 27950,
      "training_step_time": 0.3931889533996582
    },
    {
      "epoch": 0.000170599365234375,
      "model_forward_time": 0.11539530754089355,
      "step": 27951
    },
    {
      "epoch": 0.000170599365234375,
      "step": 27951,
      "training_step_time": 0.5678458213806152
    },
    {
      "epoch": 0.00017060546875,
      "model_forward_time": 0.11583304405212402,
      "step": 27952
    },
    {
      "epoch": 0.00017060546875,
      "step": 27952,
      "training_step_time": 0.3888561725616455
    },
    {
      "epoch": 0.000170611572265625,
      "model_forward_time": 0.11513733863830566,
      "step": 27953
    },
    {
      "epoch": 0.000170611572265625,
      "step": 27953,
      "training_step_time": 0.41916441917419434
    },
    {
      "epoch": 0.00017061767578125,
      "model_forward_time": 0.11586427688598633,
      "step": 27954
    },
    {
      "epoch": 0.00017061767578125,
      "step": 27954,
      "training_step_time": 0.4084804058074951
    },
    {
      "epoch": 0.000170623779296875,
      "model_forward_time": 0.115081787109375,
      "step": 27955
    },
    {
      "epoch": 0.000170623779296875,
      "step": 27955,
      "training_step_time": 0.5085251331329346
    },
    {
      "epoch": 0.0001706298828125,
      "model_forward_time": 0.11514949798583984,
      "step": 27956
    },
    {
      "epoch": 0.0001706298828125,
      "step": 27956,
      "training_step_time": 0.4822242259979248
    },
    {
      "epoch": 0.000170635986328125,
      "model_forward_time": 0.11533212661743164,
      "step": 27957
    },
    {
      "epoch": 0.000170635986328125,
      "step": 27957,
      "training_step_time": 0.4180576801300049
    },
    {
      "epoch": 0.00017064208984375,
      "model_forward_time": 0.11527633666992188,
      "step": 27958
    },
    {
      "epoch": 0.00017064208984375,
      "step": 27958,
      "training_step_time": 0.47901272773742676
    },
    {
      "epoch": 0.000170648193359375,
      "model_forward_time": 0.11470842361450195,
      "step": 27959
    },
    {
      "epoch": 0.000170648193359375,
      "step": 27959,
      "training_step_time": 0.4468576908111572
    },
    {
      "epoch": 0.000170654296875,
      "grad_norm": 0.10714376717805862,
      "learning_rate": 5.969369490868042e-05,
      "loss": 0.0418,
      "step": 27960
    },
    {
      "epoch": 0.000170654296875,
      "model_forward_time": 0.11456060409545898,
      "step": 27960
    },
    {
      "epoch": 0.000170654296875,
      "step": 27960,
      "training_step_time": 0.3876469135284424
    },
    {
      "epoch": 0.000170660400390625,
      "model_forward_time": 0.11503243446350098,
      "step": 27961
    },
    {
      "epoch": 0.000170660400390625,
      "step": 27961,
      "training_step_time": 0.4097318649291992
    },
    {
      "epoch": 0.00017066650390625,
      "model_forward_time": 0.11536788940429688,
      "step": 27962
    },
    {
      "epoch": 0.00017066650390625,
      "step": 27962,
      "training_step_time": 0.3818683624267578
    },
    {
      "epoch": 0.000170672607421875,
      "model_forward_time": 0.11590027809143066,
      "step": 27963
    },
    {
      "epoch": 0.000170672607421875,
      "step": 27963,
      "training_step_time": 0.38742709159851074
    },
    {
      "epoch": 0.0001706787109375,
      "model_forward_time": 0.11574125289916992,
      "step": 27964
    },
    {
      "epoch": 0.0001706787109375,
      "step": 27964,
      "training_step_time": 0.40596795082092285
    },
    {
      "epoch": 0.000170684814453125,
      "model_forward_time": 0.1150667667388916,
      "step": 27965
    },
    {
      "epoch": 0.000170684814453125,
      "step": 27965,
      "training_step_time": 0.3908977508544922
    },
    {
      "epoch": 0.00017069091796875,
      "model_forward_time": 0.1156005859375,
      "step": 27966
    },
    {
      "epoch": 0.00017069091796875,
      "step": 27966,
      "training_step_time": 0.4053623676300049
    },
    {
      "epoch": 0.000170697021484375,
      "model_forward_time": 0.11485838890075684,
      "step": 27967
    },
    {
      "epoch": 0.000170697021484375,
      "step": 27967,
      "training_step_time": 0.49121594429016113
    },
    {
      "epoch": 0.000170703125,
      "model_forward_time": 0.11532354354858398,
      "step": 27968
    },
    {
      "epoch": 0.000170703125,
      "step": 27968,
      "training_step_time": 0.4256877899169922
    },
    {
      "epoch": 0.000170709228515625,
      "model_forward_time": 0.11458277702331543,
      "step": 27969
    },
    {
      "epoch": 0.000170709228515625,
      "step": 27969,
      "training_step_time": 0.5146305561065674
    },
    {
      "epoch": 0.00017071533203125,
      "grad_norm": 0.11131157726049423,
      "learning_rate": 5.966665847705639e-05,
      "loss": 0.043,
      "step": 27970
    },
    {
      "epoch": 0.00017071533203125,
      "model_forward_time": 0.1150062084197998,
      "step": 27970
    },
    {
      "epoch": 0.00017071533203125,
      "step": 27970,
      "training_step_time": 0.40030336380004883
    },
    {
      "epoch": 0.000170721435546875,
      "model_forward_time": 0.11465787887573242,
      "step": 27971
    },
    {
      "epoch": 0.000170721435546875,
      "step": 27971,
      "training_step_time": 0.36685919761657715
    },
    {
      "epoch": 0.0001707275390625,
      "model_forward_time": 0.11540460586547852,
      "step": 27972
    },
    {
      "epoch": 0.0001707275390625,
      "step": 27972,
      "training_step_time": 0.4939920902252197
    },
    {
      "epoch": 0.000170733642578125,
      "model_forward_time": 0.11463403701782227,
      "step": 27973
    },
    {
      "epoch": 0.000170733642578125,
      "step": 27973,
      "training_step_time": 0.39603447914123535
    },
    {
      "epoch": 0.00017073974609375,
      "model_forward_time": 0.11446571350097656,
      "step": 27974
    },
    {
      "epoch": 0.00017073974609375,
      "step": 27974,
      "training_step_time": 0.3960895538330078
    },
    {
      "epoch": 0.000170745849609375,
      "model_forward_time": 0.1149284839630127,
      "step": 27975
    },
    {
      "epoch": 0.000170745849609375,
      "step": 27975,
      "training_step_time": 0.48653697967529297
    },
    {
      "epoch": 0.000170751953125,
      "model_forward_time": 0.11632561683654785,
      "step": 27976
    },
    {
      "epoch": 0.000170751953125,
      "step": 27976,
      "training_step_time": 0.39147186279296875
    },
    {
      "epoch": 0.000170758056640625,
      "model_forward_time": 0.11495637893676758,
      "step": 27977
    },
    {
      "epoch": 0.000170758056640625,
      "step": 27977,
      "training_step_time": 0.3939480781555176
    },
    {
      "epoch": 0.00017076416015625,
      "model_forward_time": 0.11501646041870117,
      "step": 27978
    },
    {
      "epoch": 0.00017076416015625,
      "step": 27978,
      "training_step_time": 0.39917755126953125
    },
    {
      "epoch": 0.000170770263671875,
      "model_forward_time": 0.11489677429199219,
      "step": 27979
    },
    {
      "epoch": 0.000170770263671875,
      "step": 27979,
      "training_step_time": 0.3949704170227051
    },
    {
      "epoch": 0.0001707763671875,
      "grad_norm": 0.17322884500026703,
      "learning_rate": 5.963961910895676e-05,
      "loss": 0.0433,
      "step": 27980
    },
    {
      "epoch": 0.0001707763671875,
      "model_forward_time": 0.11625194549560547,
      "step": 27980
    },
    {
      "epoch": 0.0001707763671875,
      "step": 27980,
      "training_step_time": 0.43319129943847656
    },
    {
      "epoch": 0.000170782470703125,
      "model_forward_time": 0.11474394798278809,
      "step": 27981
    },
    {
      "epoch": 0.000170782470703125,
      "step": 27981,
      "training_step_time": 0.6014242172241211
    },
    {
      "epoch": 0.00017078857421875,
      "model_forward_time": 0.1146233081817627,
      "step": 27982
    },
    {
      "epoch": 0.00017078857421875,
      "step": 27982,
      "training_step_time": 0.3929178714752197
    },
    {
      "epoch": 0.000170794677734375,
      "model_forward_time": 0.11505699157714844,
      "step": 27983
    },
    {
      "epoch": 0.000170794677734375,
      "step": 27983,
      "training_step_time": 0.42928600311279297
    },
    {
      "epoch": 0.00017080078125,
      "model_forward_time": 0.11512184143066406,
      "step": 27984
    },
    {
      "epoch": 0.00017080078125,
      "step": 27984,
      "training_step_time": 0.3897435665130615
    },
    {
      "epoch": 0.000170806884765625,
      "model_forward_time": 0.11506009101867676,
      "step": 27985
    },
    {
      "epoch": 0.000170806884765625,
      "step": 27985,
      "training_step_time": 0.3866543769836426
    },
    {
      "epoch": 0.00017081298828125,
      "model_forward_time": 0.11534857749938965,
      "step": 27986
    },
    {
      "epoch": 0.00017081298828125,
      "step": 27986,
      "training_step_time": 0.4918360710144043
    },
    {
      "epoch": 0.000170819091796875,
      "model_forward_time": 0.11522793769836426,
      "step": 27987
    },
    {
      "epoch": 0.000170819091796875,
      "step": 27987,
      "training_step_time": 0.5545759201049805
    },
    {
      "epoch": 0.0001708251953125,
      "model_forward_time": 0.11439919471740723,
      "step": 27988
    },
    {
      "epoch": 0.0001708251953125,
      "step": 27988,
      "training_step_time": 0.3892636299133301
    },
    {
      "epoch": 0.000170831298828125,
      "model_forward_time": 0.11495566368103027,
      "step": 27989
    },
    {
      "epoch": 0.000170831298828125,
      "step": 27989,
      "training_step_time": 0.3869013786315918
    },
    {
      "epoch": 0.00017083740234375,
      "grad_norm": 0.12996259331703186,
      "learning_rate": 5.961257681259535e-05,
      "loss": 0.0394,
      "step": 27990
    },
    {
      "epoch": 0.00017083740234375,
      "model_forward_time": 0.11532354354858398,
      "step": 27990
    },
    {
      "epoch": 0.00017083740234375,
      "step": 27990,
      "training_step_time": 0.3970797061920166
    },
    {
      "epoch": 0.000170843505859375,
      "model_forward_time": 0.11513376235961914,
      "step": 27991
    },
    {
      "epoch": 0.000170843505859375,
      "step": 27991,
      "training_step_time": 0.387423038482666
    },
    {
      "epoch": 0.000170849609375,
      "model_forward_time": 0.11568665504455566,
      "step": 27992
    },
    {
      "epoch": 0.000170849609375,
      "step": 27992,
      "training_step_time": 0.38254714012145996
    },
    {
      "epoch": 0.000170855712890625,
      "model_forward_time": 0.11486506462097168,
      "step": 27993
    },
    {
      "epoch": 0.000170855712890625,
      "step": 27993,
      "training_step_time": 0.6528909206390381
    },
    {
      "epoch": 0.00017086181640625,
      "model_forward_time": 0.11557364463806152,
      "step": 27994
    },
    {
      "epoch": 0.00017086181640625,
      "step": 27994,
      "training_step_time": 0.3958101272583008
    },
    {
      "epoch": 0.000170867919921875,
      "model_forward_time": 0.11562156677246094,
      "step": 27995
    },
    {
      "epoch": 0.000170867919921875,
      "step": 27995,
      "training_step_time": 0.4042477607727051
    },
    {
      "epoch": 0.0001708740234375,
      "model_forward_time": 0.11519122123718262,
      "step": 27996
    },
    {
      "epoch": 0.0001708740234375,
      "step": 27996,
      "training_step_time": 0.414597749710083
    },
    {
      "epoch": 0.000170880126953125,
      "model_forward_time": 0.11462068557739258,
      "step": 27997
    },
    {
      "epoch": 0.000170880126953125,
      "step": 27997,
      "training_step_time": 0.41982269287109375
    },
    {
      "epoch": 0.00017088623046875,
      "model_forward_time": 0.11514997482299805,
      "step": 27998
    },
    {
      "epoch": 0.00017088623046875,
      "step": 27998,
      "training_step_time": 0.3946261405944824
    },
    {
      "epoch": 0.000170892333984375,
      "model_forward_time": 0.11487340927124023,
      "step": 27999
    },
    {
      "epoch": 0.000170892333984375,
      "step": 27999,
      "training_step_time": 0.5311176776885986
    },
    {
      "epoch": 0.0001708984375,
      "grad_norm": 0.11430022865533829,
      "learning_rate": 5.958553159618693e-05,
      "loss": 0.0416,
      "step": 28000
    },
    {
      "epoch": 0.0001708984375,
      "model_forward_time": 0.11435270309448242,
      "step": 28000
    },
    {
      "epoch": 0.0001708984375,
      "step": 28000,
      "training_step_time": 0.35640883445739746
    },
    {
      "epoch": 0.000170904541015625,
      "model_forward_time": 0.11267566680908203,
      "step": 28001
    },
    {
      "epoch": 0.000170904541015625,
      "step": 28001,
      "training_step_time": 0.41964006423950195
    },
    {
      "epoch": 0.00017091064453125,
      "model_forward_time": 0.11315512657165527,
      "step": 28002
    },
    {
      "epoch": 0.00017091064453125,
      "step": 28002,
      "training_step_time": 0.4506235122680664
    },
    {
      "epoch": 0.000170916748046875,
      "model_forward_time": 0.11328363418579102,
      "step": 28003
    },
    {
      "epoch": 0.000170916748046875,
      "step": 28003,
      "training_step_time": 0.4204273223876953
    },
    {
      "epoch": 0.0001709228515625,
      "model_forward_time": 0.11448884010314941,
      "step": 28004
    },
    {
      "epoch": 0.0001709228515625,
      "step": 28004,
      "training_step_time": 0.3820369243621826
    },
    {
      "epoch": 0.000170928955078125,
      "model_forward_time": 0.11391353607177734,
      "step": 28005
    },
    {
      "epoch": 0.000170928955078125,
      "step": 28005,
      "training_step_time": 0.37807369232177734
    },
    {
      "epoch": 0.00017093505859375,
      "model_forward_time": 0.11436080932617188,
      "step": 28006
    },
    {
      "epoch": 0.00017093505859375,
      "step": 28006,
      "training_step_time": 0.4008634090423584
    },
    {
      "epoch": 0.000170941162109375,
      "model_forward_time": 0.11461114883422852,
      "step": 28007
    },
    {
      "epoch": 0.000170941162109375,
      "step": 28007,
      "training_step_time": 0.4029538631439209
    },
    {
      "epoch": 0.000170947265625,
      "model_forward_time": 0.1148676872253418,
      "step": 28008
    },
    {
      "epoch": 0.000170947265625,
      "step": 28008,
      "training_step_time": 0.3913733959197998
    },
    {
      "epoch": 0.000170953369140625,
      "model_forward_time": 0.11505317687988281,
      "step": 28009
    },
    {
      "epoch": 0.000170953369140625,
      "step": 28009,
      "training_step_time": 0.394817590713501
    },
    {
      "epoch": 0.00017095947265625,
      "grad_norm": 0.13475561141967773,
      "learning_rate": 5.955848346794708e-05,
      "loss": 0.0415,
      "step": 28010
    },
    {
      "epoch": 0.00017095947265625,
      "model_forward_time": 0.1157536506652832,
      "step": 28010
    },
    {
      "epoch": 0.00017095947265625,
      "step": 28010,
      "training_step_time": 0.387864351272583
    },
    {
      "epoch": 0.000170965576171875,
      "model_forward_time": 0.11555933952331543,
      "step": 28011
    },
    {
      "epoch": 0.000170965576171875,
      "step": 28011,
      "training_step_time": 0.3924241065979004
    },
    {
      "epoch": 0.0001709716796875,
      "model_forward_time": 0.11626291275024414,
      "step": 28012
    },
    {
      "epoch": 0.0001709716796875,
      "step": 28012,
      "training_step_time": 0.4344143867492676
    },
    {
      "epoch": 0.000170977783203125,
      "model_forward_time": 0.11841869354248047,
      "step": 28013
    },
    {
      "epoch": 0.000170977783203125,
      "step": 28013,
      "training_step_time": 0.48442649841308594
    },
    {
      "epoch": 0.00017098388671875,
      "model_forward_time": 0.11873626708984375,
      "step": 28014
    },
    {
      "epoch": 0.00017098388671875,
      "step": 28014,
      "training_step_time": 0.4050898551940918
    },
    {
      "epoch": 0.000170989990234375,
      "model_forward_time": 0.11581802368164062,
      "step": 28015
    },
    {
      "epoch": 0.000170989990234375,
      "step": 28015,
      "training_step_time": 0.3980233669281006
    },
    {
      "epoch": 0.00017099609375,
      "model_forward_time": 0.11589241027832031,
      "step": 28016
    },
    {
      "epoch": 0.00017099609375,
      "step": 28016,
      "training_step_time": 0.43981075286865234
    },
    {
      "epoch": 0.000171002197265625,
      "model_forward_time": 0.11976456642150879,
      "step": 28017
    },
    {
      "epoch": 0.000171002197265625,
      "step": 28017,
      "training_step_time": 0.4820129871368408
    },
    {
      "epoch": 0.00017100830078125,
      "model_forward_time": 0.12079238891601562,
      "step": 28018
    },
    {
      "epoch": 0.00017100830078125,
      "step": 28018,
      "training_step_time": 0.41446447372436523
    },
    {
      "epoch": 0.000171014404296875,
      "model_forward_time": 0.11992025375366211,
      "step": 28019
    },
    {
      "epoch": 0.000171014404296875,
      "step": 28019,
      "training_step_time": 0.38700294494628906
    },
    {
      "epoch": 0.0001710205078125,
      "grad_norm": 0.1578114628791809,
      "learning_rate": 5.953143243609235e-05,
      "loss": 0.0495,
      "step": 28020
    },
    {
      "epoch": 0.0001710205078125,
      "model_forward_time": 0.11951351165771484,
      "step": 28020
    },
    {
      "epoch": 0.0001710205078125,
      "step": 28020,
      "training_step_time": 0.41359472274780273
    },
    {
      "epoch": 0.000171026611328125,
      "model_forward_time": 0.11739945411682129,
      "step": 28021
    },
    {
      "epoch": 0.000171026611328125,
      "step": 28021,
      "training_step_time": 0.4663107395172119
    },
    {
      "epoch": 0.00017103271484375,
      "model_forward_time": 0.115264892578125,
      "step": 28022
    },
    {
      "epoch": 0.00017103271484375,
      "step": 28022,
      "training_step_time": 0.3955423831939697
    },
    {
      "epoch": 0.000171038818359375,
      "model_forward_time": 0.11491990089416504,
      "step": 28023
    },
    {
      "epoch": 0.000171038818359375,
      "step": 28023,
      "training_step_time": 0.38358187675476074
    },
    {
      "epoch": 0.000171044921875,
      "model_forward_time": 0.115264892578125,
      "step": 28024
    },
    {
      "epoch": 0.000171044921875,
      "step": 28024,
      "training_step_time": 0.38279032707214355
    },
    {
      "epoch": 0.000171051025390625,
      "model_forward_time": 0.1153416633605957,
      "step": 28025
    },
    {
      "epoch": 0.000171051025390625,
      "step": 28025,
      "training_step_time": 0.41144394874572754
    },
    {
      "epoch": 0.00017105712890625,
      "model_forward_time": 0.11508393287658691,
      "step": 28026
    },
    {
      "epoch": 0.00017105712890625,
      "step": 28026,
      "training_step_time": 0.3968203067779541
    },
    {
      "epoch": 0.000171063232421875,
      "model_forward_time": 0.1162111759185791,
      "step": 28027
    },
    {
      "epoch": 0.000171063232421875,
      "step": 28027,
      "training_step_time": 0.4532608985900879
    },
    {
      "epoch": 0.0001710693359375,
      "model_forward_time": 0.11558771133422852,
      "step": 28028
    },
    {
      "epoch": 0.0001710693359375,
      "step": 28028,
      "training_step_time": 0.39235854148864746
    },
    {
      "epoch": 0.000171075439453125,
      "model_forward_time": 0.1160271167755127,
      "step": 28029
    },
    {
      "epoch": 0.000171075439453125,
      "step": 28029,
      "training_step_time": 0.4419288635253906
    },
    {
      "epoch": 0.00017108154296875,
      "grad_norm": 0.1107306182384491,
      "learning_rate": 5.95043785088401e-05,
      "loss": 0.0422,
      "step": 28030
    },
    {
      "epoch": 0.00017108154296875,
      "model_forward_time": 0.1153562068939209,
      "step": 28030
    },
    {
      "epoch": 0.00017108154296875,
      "step": 28030,
      "training_step_time": 0.41721296310424805
    },
    {
      "epoch": 0.000171087646484375,
      "model_forward_time": 0.1245431900024414,
      "step": 28031
    },
    {
      "epoch": 0.000171087646484375,
      "step": 28031,
      "training_step_time": 0.49045705795288086
    },
    {
      "epoch": 0.00017109375,
      "model_forward_time": 0.11791586875915527,
      "step": 28032
    },
    {
      "epoch": 0.00017109375,
      "step": 28032,
      "training_step_time": 0.4578220844268799
    },
    {
      "epoch": 0.000171099853515625,
      "model_forward_time": 0.11846613883972168,
      "step": 28033
    },
    {
      "epoch": 0.000171099853515625,
      "step": 28033,
      "training_step_time": 0.40720415115356445
    },
    {
      "epoch": 0.00017110595703125,
      "model_forward_time": 0.11771512031555176,
      "step": 28034
    },
    {
      "epoch": 0.00017110595703125,
      "step": 28034,
      "training_step_time": 0.39673638343811035
    },
    {
      "epoch": 0.000171112060546875,
      "model_forward_time": 0.11555671691894531,
      "step": 28035
    },
    {
      "epoch": 0.000171112060546875,
      "step": 28035,
      "training_step_time": 0.38733625411987305
    },
    {
      "epoch": 0.0001711181640625,
      "model_forward_time": 0.11556816101074219,
      "step": 28036
    },
    {
      "epoch": 0.0001711181640625,
      "step": 28036,
      "training_step_time": 0.4007439613342285
    },
    {
      "epoch": 0.000171124267578125,
      "model_forward_time": 0.1161956787109375,
      "step": 28037
    },
    {
      "epoch": 0.000171124267578125,
      "step": 28037,
      "training_step_time": 0.40845513343811035
    },
    {
      "epoch": 0.00017113037109375,
      "model_forward_time": 0.11527085304260254,
      "step": 28038
    },
    {
      "epoch": 0.00017113037109375,
      "step": 28038,
      "training_step_time": 0.4055650234222412
    },
    {
      "epoch": 0.000171136474609375,
      "model_forward_time": 0.1162116527557373,
      "step": 28039
    },
    {
      "epoch": 0.000171136474609375,
      "step": 28039,
      "training_step_time": 0.4026072025299072
    },
    {
      "epoch": 0.000171142578125,
      "grad_norm": 0.13533666729927063,
      "learning_rate": 5.9477321694408606e-05,
      "loss": 0.048,
      "step": 28040
    },
    {
      "epoch": 0.000171142578125,
      "model_forward_time": 0.11517024040222168,
      "step": 28040
    },
    {
      "epoch": 0.000171142578125,
      "step": 28040,
      "training_step_time": 0.43779516220092773
    },
    {
      "epoch": 0.000171148681640625,
      "model_forward_time": 0.11548995971679688,
      "step": 28041
    },
    {
      "epoch": 0.000171148681640625,
      "step": 28041,
      "training_step_time": 0.49866461753845215
    },
    {
      "epoch": 0.00017115478515625,
      "model_forward_time": 0.11443901062011719,
      "step": 28042
    },
    {
      "epoch": 0.00017115478515625,
      "step": 28042,
      "training_step_time": 0.39035820960998535
    },
    {
      "epoch": 0.000171160888671875,
      "model_forward_time": 0.11539483070373535,
      "step": 28043
    },
    {
      "epoch": 0.000171160888671875,
      "step": 28043,
      "training_step_time": 0.3840813636779785
    },
    {
      "epoch": 0.0001711669921875,
      "model_forward_time": 0.11573648452758789,
      "step": 28044
    },
    {
      "epoch": 0.0001711669921875,
      "step": 28044,
      "training_step_time": 0.44129443168640137
    },
    {
      "epoch": 0.000171173095703125,
      "model_forward_time": 0.11913323402404785,
      "step": 28045
    },
    {
      "epoch": 0.000171173095703125,
      "step": 28045,
      "training_step_time": 0.413562536239624
    },
    {
      "epoch": 0.00017117919921875,
      "model_forward_time": 0.12108540534973145,
      "step": 28046
    },
    {
      "epoch": 0.00017117919921875,
      "step": 28046,
      "training_step_time": 0.3836634159088135
    },
    {
      "epoch": 0.000171185302734375,
      "model_forward_time": 0.12205386161804199,
      "step": 28047
    },
    {
      "epoch": 0.000171185302734375,
      "step": 28047,
      "training_step_time": 0.4192631244659424
    },
    {
      "epoch": 0.00017119140625,
      "model_forward_time": 0.11799454689025879,
      "step": 28048
    },
    {
      "epoch": 0.00017119140625,
      "step": 28048,
      "training_step_time": 0.4250626564025879
    },
    {
      "epoch": 0.000171197509765625,
      "model_forward_time": 0.11756634712219238,
      "step": 28049
    },
    {
      "epoch": 0.000171197509765625,
      "step": 28049,
      "training_step_time": 0.39183902740478516
    },
    {
      "epoch": 0.00017120361328125,
      "grad_norm": 0.1564134955406189,
      "learning_rate": 5.945026200101702e-05,
      "loss": 0.0411,
      "step": 28050
    },
    {
      "epoch": 0.00017120361328125,
      "model_forward_time": 0.11690258979797363,
      "step": 28050
    },
    {
      "epoch": 0.00017120361328125,
      "step": 28050,
      "training_step_time": 0.3877875804901123
    },
    {
      "epoch": 0.000171209716796875,
      "model_forward_time": 0.1156013011932373,
      "step": 28051
    },
    {
      "epoch": 0.000171209716796875,
      "step": 28051,
      "training_step_time": 0.4068591594696045
    },
    {
      "epoch": 0.0001712158203125,
      "model_forward_time": 0.11557650566101074,
      "step": 28052
    },
    {
      "epoch": 0.0001712158203125,
      "step": 28052,
      "training_step_time": 0.3979313373565674
    },
    {
      "epoch": 0.000171221923828125,
      "model_forward_time": 0.1154012680053711,
      "step": 28053
    },
    {
      "epoch": 0.000171221923828125,
      "step": 28053,
      "training_step_time": 0.44208621978759766
    },
    {
      "epoch": 0.00017122802734375,
      "model_forward_time": 0.11597704887390137,
      "step": 28054
    },
    {
      "epoch": 0.00017122802734375,
      "step": 28054,
      "training_step_time": 0.4173595905303955
    },
    {
      "epoch": 0.000171234130859375,
      "model_forward_time": 0.11600732803344727,
      "step": 28055
    },
    {
      "epoch": 0.000171234130859375,
      "step": 28055,
      "training_step_time": 0.5076682567596436
    },
    {
      "epoch": 0.000171240234375,
      "model_forward_time": 0.11539793014526367,
      "step": 28056
    },
    {
      "epoch": 0.000171240234375,
      "step": 28056,
      "training_step_time": 0.4262228012084961
    },
    {
      "epoch": 0.000171246337890625,
      "model_forward_time": 0.11554694175720215,
      "step": 28057
    },
    {
      "epoch": 0.000171246337890625,
      "step": 28057,
      "training_step_time": 0.40096116065979004
    },
    {
      "epoch": 0.00017125244140625,
      "model_forward_time": 0.11521506309509277,
      "step": 28058
    },
    {
      "epoch": 0.00017125244140625,
      "step": 28058,
      "training_step_time": 0.40537452697753906
    },
    {
      "epoch": 0.000171258544921875,
      "model_forward_time": 0.11547684669494629,
      "step": 28059
    },
    {
      "epoch": 0.000171258544921875,
      "step": 28059,
      "training_step_time": 0.4049389362335205
    },
    {
      "epoch": 0.0001712646484375,
      "grad_norm": 0.1363169252872467,
      "learning_rate": 5.9423199436885345e-05,
      "loss": 0.0417,
      "step": 28060
    },
    {
      "epoch": 0.0001712646484375,
      "model_forward_time": 0.11614847183227539,
      "step": 28060
    },
    {
      "epoch": 0.0001712646484375,
      "step": 28060,
      "training_step_time": 0.4281456470489502
    },
    {
      "epoch": 0.000171270751953125,
      "model_forward_time": 0.11577773094177246,
      "step": 28061
    },
    {
      "epoch": 0.000171270751953125,
      "step": 28061,
      "training_step_time": 0.4529390335083008
    },
    {
      "epoch": 0.00017127685546875,
      "model_forward_time": 0.1173715591430664,
      "step": 28062
    },
    {
      "epoch": 0.00017127685546875,
      "step": 28062,
      "training_step_time": 0.4382438659667969
    },
    {
      "epoch": 0.000171282958984375,
      "model_forward_time": 0.1186375617980957,
      "step": 28063
    },
    {
      "epoch": 0.000171282958984375,
      "step": 28063,
      "training_step_time": 0.38274550437927246
    },
    {
      "epoch": 0.0001712890625,
      "model_forward_time": 0.1158456802368164,
      "step": 28064
    },
    {
      "epoch": 0.0001712890625,
      "step": 28064,
      "training_step_time": 0.38299059867858887
    },
    {
      "epoch": 0.000171295166015625,
      "model_forward_time": 0.11534643173217773,
      "step": 28065
    },
    {
      "epoch": 0.000171295166015625,
      "step": 28065,
      "training_step_time": 0.3994266986846924
    },
    {
      "epoch": 0.00017130126953125,
      "model_forward_time": 0.11546158790588379,
      "step": 28066
    },
    {
      "epoch": 0.00017130126953125,
      "step": 28066,
      "training_step_time": 0.41346168518066406
    },
    {
      "epoch": 0.000171307373046875,
      "model_forward_time": 0.1169435977935791,
      "step": 28067
    },
    {
      "epoch": 0.000171307373046875,
      "step": 28067,
      "training_step_time": 0.4060099124908447
    },
    {
      "epoch": 0.0001713134765625,
      "model_forward_time": 0.11645746231079102,
      "step": 28068
    },
    {
      "epoch": 0.0001713134765625,
      "step": 28068,
      "training_step_time": 0.41188573837280273
    },
    {
      "epoch": 0.000171319580078125,
      "model_forward_time": 0.11510777473449707,
      "step": 28069
    },
    {
      "epoch": 0.000171319580078125,
      "step": 28069,
      "training_step_time": 0.4228365421295166
    },
    {
      "epoch": 0.00017132568359375,
      "grad_norm": 0.08581265062093735,
      "learning_rate": 5.939613401023451e-05,
      "loss": 0.0371,
      "step": 28070
    },
    {
      "epoch": 0.00017132568359375,
      "model_forward_time": 0.11742615699768066,
      "step": 28070
    },
    {
      "epoch": 0.00017132568359375,
      "step": 28070,
      "training_step_time": 0.4900679588317871
    },
    {
      "epoch": 0.000171331787109375,
      "model_forward_time": 0.11603140830993652,
      "step": 28071
    },
    {
      "epoch": 0.000171331787109375,
      "step": 28071,
      "training_step_time": 0.38442564010620117
    },
    {
      "epoch": 0.000171337890625,
      "model_forward_time": 0.11741352081298828,
      "step": 28072
    },
    {
      "epoch": 0.000171337890625,
      "step": 28072,
      "training_step_time": 0.41268157958984375
    },
    {
      "epoch": 0.000171343994140625,
      "model_forward_time": 0.11634945869445801,
      "step": 28073
    },
    {
      "epoch": 0.000171343994140625,
      "step": 28073,
      "training_step_time": 0.4409017562866211
    },
    {
      "epoch": 0.00017135009765625,
      "model_forward_time": 0.11574220657348633,
      "step": 28074
    },
    {
      "epoch": 0.00017135009765625,
      "step": 28074,
      "training_step_time": 0.39281558990478516
    },
    {
      "epoch": 0.000171356201171875,
      "model_forward_time": 0.11613988876342773,
      "step": 28075
    },
    {
      "epoch": 0.000171356201171875,
      "step": 28075,
      "training_step_time": 0.4345986843109131
    },
    {
      "epoch": 0.0001713623046875,
      "model_forward_time": 0.11593246459960938,
      "step": 28076
    },
    {
      "epoch": 0.0001713623046875,
      "step": 28076,
      "training_step_time": 0.4006631374359131
    },
    {
      "epoch": 0.000171368408203125,
      "model_forward_time": 0.1158604621887207,
      "step": 28077
    },
    {
      "epoch": 0.000171368408203125,
      "step": 28077,
      "training_step_time": 0.3968052864074707
    },
    {
      "epoch": 0.00017137451171875,
      "model_forward_time": 0.1157524585723877,
      "step": 28078
    },
    {
      "epoch": 0.00017137451171875,
      "step": 28078,
      "training_step_time": 0.39058494567871094
    },
    {
      "epoch": 0.000171380615234375,
      "model_forward_time": 0.11538553237915039,
      "step": 28079
    },
    {
      "epoch": 0.000171380615234375,
      "step": 28079,
      "training_step_time": 0.3910202980041504
    },
    {
      "epoch": 0.00017138671875,
      "grad_norm": 0.12865863740444183,
      "learning_rate": 5.9369065729286245e-05,
      "loss": 0.0426,
      "step": 28080
    },
    {
      "epoch": 0.00017138671875,
      "model_forward_time": 0.11559724807739258,
      "step": 28080
    },
    {
      "epoch": 0.00017138671875,
      "step": 28080,
      "training_step_time": 0.3940393924713135
    },
    {
      "epoch": 0.000171392822265625,
      "model_forward_time": 0.12845420837402344,
      "step": 28081
    },
    {
      "epoch": 0.000171392822265625,
      "step": 28081,
      "training_step_time": 0.3955204486846924
    },
    {
      "epoch": 0.00017139892578125,
      "model_forward_time": 0.11543011665344238,
      "step": 28082
    },
    {
      "epoch": 0.00017139892578125,
      "step": 28082,
      "training_step_time": 0.39240336418151855
    },
    {
      "epoch": 0.000171405029296875,
      "model_forward_time": 0.11712503433227539,
      "step": 28083
    },
    {
      "epoch": 0.000171405029296875,
      "step": 28083,
      "training_step_time": 0.4705672264099121
    },
    {
      "epoch": 0.0001714111328125,
      "model_forward_time": 0.1160125732421875,
      "step": 28084
    },
    {
      "epoch": 0.0001714111328125,
      "step": 28084,
      "training_step_time": 0.4886608123779297
    },
    {
      "epoch": 0.000171417236328125,
      "model_forward_time": 0.11560964584350586,
      "step": 28085
    },
    {
      "epoch": 0.000171417236328125,
      "step": 28085,
      "training_step_time": 0.3832364082336426
    },
    {
      "epoch": 0.00017142333984375,
      "model_forward_time": 0.11522126197814941,
      "step": 28086
    },
    {
      "epoch": 0.00017142333984375,
      "step": 28086,
      "training_step_time": 0.3889007568359375
    },
    {
      "epoch": 0.000171429443359375,
      "model_forward_time": 0.11634659767150879,
      "step": 28087
    },
    {
      "epoch": 0.000171429443359375,
      "step": 28087,
      "training_step_time": 0.4796929359436035
    },
    {
      "epoch": 0.000171435546875,
      "model_forward_time": 0.11642146110534668,
      "step": 28088
    },
    {
      "epoch": 0.000171435546875,
      "step": 28088,
      "training_step_time": 0.45662975311279297
    },
    {
      "epoch": 0.000171441650390625,
      "model_forward_time": 0.11498808860778809,
      "step": 28089
    },
    {
      "epoch": 0.000171441650390625,
      "step": 28089,
      "training_step_time": 0.46400022506713867
    },
    {
      "epoch": 0.00017144775390625,
      "grad_norm": 0.11232323944568634,
      "learning_rate": 5.934199460226317e-05,
      "loss": 0.0447,
      "step": 28090
    },
    {
      "epoch": 0.00017144775390625,
      "model_forward_time": 0.11579179763793945,
      "step": 28090
    },
    {
      "epoch": 0.00017144775390625,
      "step": 28090,
      "training_step_time": 0.4714479446411133
    },
    {
      "epoch": 0.000171453857421875,
      "model_forward_time": 0.11482000350952148,
      "step": 28091
    },
    {
      "epoch": 0.000171453857421875,
      "step": 28091,
      "training_step_time": 0.4247548580169678
    },
    {
      "epoch": 0.0001714599609375,
      "model_forward_time": 0.11503911018371582,
      "step": 28092
    },
    {
      "epoch": 0.0001714599609375,
      "step": 28092,
      "training_step_time": 0.38928747177124023
    },
    {
      "epoch": 0.000171466064453125,
      "model_forward_time": 0.11588740348815918,
      "step": 28093
    },
    {
      "epoch": 0.000171466064453125,
      "step": 28093,
      "training_step_time": 0.4072072505950928
    },
    {
      "epoch": 0.00017147216796875,
      "model_forward_time": 0.11577439308166504,
      "step": 28094
    },
    {
      "epoch": 0.00017147216796875,
      "step": 28094,
      "training_step_time": 0.3886895179748535
    },
    {
      "epoch": 0.000171478271484375,
      "model_forward_time": 0.11544227600097656,
      "step": 28095
    },
    {
      "epoch": 0.000171478271484375,
      "step": 28095,
      "training_step_time": 0.3814661502838135
    },
    {
      "epoch": 0.000171484375,
      "model_forward_time": 0.11515617370605469,
      "step": 28096
    },
    {
      "epoch": 0.000171484375,
      "step": 28096,
      "training_step_time": 0.39971470832824707
    },
    {
      "epoch": 0.000171490478515625,
      "model_forward_time": 0.11716651916503906,
      "step": 28097
    },
    {
      "epoch": 0.000171490478515625,
      "step": 28097,
      "training_step_time": 0.5067195892333984
    },
    {
      "epoch": 0.00017149658203125,
      "model_forward_time": 0.11575698852539062,
      "step": 28098
    },
    {
      "epoch": 0.00017149658203125,
      "step": 28098,
      "training_step_time": 0.39577603340148926
    },
    {
      "epoch": 0.000171502685546875,
      "model_forward_time": 0.11575460433959961,
      "step": 28099
    },
    {
      "epoch": 0.000171502685546875,
      "step": 28099,
      "training_step_time": 0.4350149631500244
    },
    {
      "epoch": 0.0001715087890625,
      "grad_norm": 0.11024501174688339,
      "learning_rate": 5.9314920637388815e-05,
      "loss": 0.0461,
      "step": 28100
    },
    {
      "epoch": 0.0001715087890625,
      "model_forward_time": 0.1149284839630127,
      "step": 28100
    },
    {
      "epoch": 0.0001715087890625,
      "step": 28100,
      "training_step_time": 0.39569902420043945
    },
    {
      "epoch": 0.000171514892578125,
      "model_forward_time": 0.11832976341247559,
      "step": 28101
    },
    {
      "epoch": 0.000171514892578125,
      "step": 28101,
      "training_step_time": 0.4404423236846924
    },
    {
      "epoch": 0.00017152099609375,
      "model_forward_time": 0.11508822441101074,
      "step": 28102
    },
    {
      "epoch": 0.00017152099609375,
      "step": 28102,
      "training_step_time": 0.456740140914917
    },
    {
      "epoch": 0.000171527099609375,
      "model_forward_time": 0.11495852470397949,
      "step": 28103
    },
    {
      "epoch": 0.000171527099609375,
      "step": 28103,
      "training_step_time": 0.38448381423950195
    },
    {
      "epoch": 0.000171533203125,
      "model_forward_time": 0.11522436141967773,
      "step": 28104
    },
    {
      "epoch": 0.000171533203125,
      "step": 28104,
      "training_step_time": 0.42505979537963867
    },
    {
      "epoch": 0.000171539306640625,
      "model_forward_time": 0.11476516723632812,
      "step": 28105
    },
    {
      "epoch": 0.000171539306640625,
      "step": 28105,
      "training_step_time": 0.393540620803833
    },
    {
      "epoch": 0.00017154541015625,
      "model_forward_time": 0.11493945121765137,
      "step": 28106
    },
    {
      "epoch": 0.00017154541015625,
      "step": 28106,
      "training_step_time": 0.40515851974487305
    },
    {
      "epoch": 0.000171551513671875,
      "model_forward_time": 0.11505389213562012,
      "step": 28107
    },
    {
      "epoch": 0.000171551513671875,
      "step": 28107,
      "training_step_time": 0.39394688606262207
    },
    {
      "epoch": 0.0001715576171875,
      "model_forward_time": 0.11534357070922852,
      "step": 28108
    },
    {
      "epoch": 0.0001715576171875,
      "step": 28108,
      "training_step_time": 0.39418816566467285
    },
    {
      "epoch": 0.000171563720703125,
      "model_forward_time": 0.11505842208862305,
      "step": 28109
    },
    {
      "epoch": 0.000171563720703125,
      "step": 28109,
      "training_step_time": 0.4015970230102539
    },
    {
      "epoch": 0.00017156982421875,
      "grad_norm": 0.1255953013896942,
      "learning_rate": 5.92878438428875e-05,
      "loss": 0.0402,
      "step": 28110
    },
    {
      "epoch": 0.00017156982421875,
      "model_forward_time": 0.11581039428710938,
      "step": 28110
    },
    {
      "epoch": 0.00017156982421875,
      "step": 28110,
      "training_step_time": 0.3849618434906006
    },
    {
      "epoch": 0.000171575927734375,
      "model_forward_time": 0.11488962173461914,
      "step": 28111
    },
    {
      "epoch": 0.000171575927734375,
      "step": 28111,
      "training_step_time": 0.5075788497924805
    },
    {
      "epoch": 0.00017158203125,
      "model_forward_time": 0.11556529998779297,
      "step": 28112
    },
    {
      "epoch": 0.00017158203125,
      "step": 28112,
      "training_step_time": 0.4335975646972656
    },
    {
      "epoch": 0.000171588134765625,
      "model_forward_time": 0.11724424362182617,
      "step": 28113
    },
    {
      "epoch": 0.000171588134765625,
      "step": 28113,
      "training_step_time": 0.5734312534332275
    },
    {
      "epoch": 0.00017159423828125,
      "model_forward_time": 0.1191554069519043,
      "step": 28114
    },
    {
      "epoch": 0.00017159423828125,
      "step": 28114,
      "training_step_time": 0.5005531311035156
    },
    {
      "epoch": 0.000171600341796875,
      "model_forward_time": 0.11808967590332031,
      "step": 28115
    },
    {
      "epoch": 0.000171600341796875,
      "step": 28115,
      "training_step_time": 0.7009737491607666
    },
    {
      "epoch": 0.0001716064453125,
      "model_forward_time": 0.12543129920959473,
      "step": 28116
    },
    {
      "epoch": 0.0001716064453125,
      "step": 28116,
      "training_step_time": 0.6185393333435059
    },
    {
      "epoch": 0.000171612548828125,
      "model_forward_time": 0.1179816722869873,
      "step": 28117
    },
    {
      "epoch": 0.000171612548828125,
      "step": 28117,
      "training_step_time": 0.7118206024169922
    },
    {
      "epoch": 0.00017161865234375,
      "model_forward_time": 0.13448810577392578,
      "step": 28118
    },
    {
      "epoch": 0.00017161865234375,
      "step": 28118,
      "training_step_time": 0.6650083065032959
    },
    {
      "epoch": 0.000171624755859375,
      "model_forward_time": 0.11679339408874512,
      "step": 28119
    },
    {
      "epoch": 0.000171624755859375,
      "step": 28119,
      "training_step_time": 0.6489036083221436
    },
    {
      "epoch": 0.000171630859375,
      "grad_norm": 0.11203125864267349,
      "learning_rate": 5.9260764226984476e-05,
      "loss": 0.0458,
      "step": 28120
    },
    {
      "epoch": 0.000171630859375,
      "model_forward_time": 0.13103032112121582,
      "step": 28120
    },
    {
      "epoch": 0.000171630859375,
      "step": 28120,
      "training_step_time": 0.6645967960357666
    },
    {
      "epoch": 0.000171636962890625,
      "model_forward_time": 0.11935043334960938,
      "step": 28121
    },
    {
      "epoch": 0.000171636962890625,
      "step": 28121,
      "training_step_time": 0.6542959213256836
    },
    {
      "epoch": 0.00017164306640625,
      "model_forward_time": 0.14049315452575684,
      "step": 28122
    },
    {
      "epoch": 0.00017164306640625,
      "step": 28122,
      "training_step_time": 0.6453468799591064
    },
    {
      "epoch": 0.000171649169921875,
      "model_forward_time": 0.1413106918334961,
      "step": 28123
    },
    {
      "epoch": 0.000171649169921875,
      "step": 28123,
      "training_step_time": 0.703726053237915
    },
    {
      "epoch": 0.0001716552734375,
      "model_forward_time": 0.12478470802307129,
      "step": 28124
    },
    {
      "epoch": 0.0001716552734375,
      "step": 28124,
      "training_step_time": 0.6279985904693604
    },
    {
      "epoch": 0.000171661376953125,
      "model_forward_time": 0.11860036849975586,
      "step": 28125
    },
    {
      "epoch": 0.000171661376953125,
      "step": 28125,
      "training_step_time": 0.7145919799804688
    },
    {
      "epoch": 0.00017166748046875,
      "model_forward_time": 0.11830472946166992,
      "step": 28126
    },
    {
      "epoch": 0.00017166748046875,
      "step": 28126,
      "training_step_time": 0.6652169227600098
    },
    {
      "epoch": 0.000171673583984375,
      "model_forward_time": 0.12955880165100098,
      "step": 28127
    },
    {
      "epoch": 0.000171673583984375,
      "step": 28127,
      "training_step_time": 0.6658363342285156
    },
    {
      "epoch": 0.0001716796875,
      "model_forward_time": 0.11988377571105957,
      "step": 28128
    },
    {
      "epoch": 0.0001716796875,
      "step": 28128,
      "training_step_time": 0.6697897911071777
    },
    {
      "epoch": 0.000171685791015625,
      "model_forward_time": 0.12167143821716309,
      "step": 28129
    },
    {
      "epoch": 0.000171685791015625,
      "step": 28129,
      "training_step_time": 0.6555449962615967
    },
    {
      "epoch": 0.00017169189453125,
      "grad_norm": 0.10572905093431473,
      "learning_rate": 5.9233681797905785e-05,
      "loss": 0.0439,
      "step": 28130
    },
    {
      "epoch": 0.00017169189453125,
      "model_forward_time": 0.12159109115600586,
      "step": 28130
    },
    {
      "epoch": 0.00017169189453125,
      "step": 28130,
      "training_step_time": 0.6390666961669922
    },
    {
      "epoch": 0.000171697998046875,
      "model_forward_time": 0.12551355361938477,
      "step": 28131
    },
    {
      "epoch": 0.000171697998046875,
      "step": 28131,
      "training_step_time": 0.7407569885253906
    },
    {
      "epoch": 0.0001717041015625,
      "model_forward_time": 0.13928723335266113,
      "step": 28132
    },
    {
      "epoch": 0.0001717041015625,
      "step": 28132,
      "training_step_time": 0.673283576965332
    },
    {
      "epoch": 0.000171710205078125,
      "model_forward_time": 0.11985468864440918,
      "step": 28133
    },
    {
      "epoch": 0.000171710205078125,
      "step": 28133,
      "training_step_time": 0.7036893367767334
    },
    {
      "epoch": 0.00017171630859375,
      "model_forward_time": 0.11897110939025879,
      "step": 28134
    },
    {
      "epoch": 0.00017171630859375,
      "step": 28134,
      "training_step_time": 0.7233140468597412
    },
    {
      "epoch": 0.000171722412109375,
      "model_forward_time": 0.11716103553771973,
      "step": 28135
    },
    {
      "epoch": 0.000171722412109375,
      "step": 28135,
      "training_step_time": 0.7509253025054932
    },
    {
      "epoch": 0.000171728515625,
      "model_forward_time": 0.11765885353088379,
      "step": 28136
    },
    {
      "epoch": 0.000171728515625,
      "step": 28136,
      "training_step_time": 0.7392716407775879
    },
    {
      "epoch": 0.000171734619140625,
      "model_forward_time": 0.12115049362182617,
      "step": 28137
    },
    {
      "epoch": 0.000171734619140625,
      "step": 28137,
      "training_step_time": 0.6093947887420654
    },
    {
      "epoch": 0.00017174072265625,
      "model_forward_time": 0.1202399730682373,
      "step": 28138
    },
    {
      "epoch": 0.00017174072265625,
      "step": 28138,
      "training_step_time": 0.6271703243255615
    },
    {
      "epoch": 0.000171746826171875,
      "model_forward_time": 0.12453246116638184,
      "step": 28139
    },
    {
      "epoch": 0.000171746826171875,
      "step": 28139,
      "training_step_time": 0.6645944118499756
    },
    {
      "epoch": 0.0001717529296875,
      "grad_norm": 0.12356070429086685,
      "learning_rate": 5.9206596563878357e-05,
      "loss": 0.0431,
      "step": 28140
    },
    {
      "epoch": 0.0001717529296875,
      "model_forward_time": 0.12318873405456543,
      "step": 28140
    },
    {
      "epoch": 0.0001717529296875,
      "step": 28140,
      "training_step_time": 0.6854641437530518
    },
    {
      "epoch": 0.000171759033203125,
      "model_forward_time": 0.12639546394348145,
      "step": 28141
    },
    {
      "epoch": 0.000171759033203125,
      "step": 28141,
      "training_step_time": 0.6662304401397705
    },
    {
      "epoch": 0.00017176513671875,
      "model_forward_time": 0.12037539482116699,
      "step": 28142
    },
    {
      "epoch": 0.00017176513671875,
      "step": 28142,
      "training_step_time": 0.6514089107513428
    },
    {
      "epoch": 0.000171771240234375,
      "model_forward_time": 0.12125611305236816,
      "step": 28143
    },
    {
      "epoch": 0.000171771240234375,
      "step": 28143,
      "training_step_time": 0.6203529834747314
    },
    {
      "epoch": 0.00017177734375,
      "model_forward_time": 0.13191628456115723,
      "step": 28144
    },
    {
      "epoch": 0.00017177734375,
      "step": 28144,
      "training_step_time": 0.7252790927886963
    },
    {
      "epoch": 0.000171783447265625,
      "model_forward_time": 0.12284016609191895,
      "step": 28145
    },
    {
      "epoch": 0.000171783447265625,
      "step": 28145,
      "training_step_time": 0.7400527000427246
    },
    {
      "epoch": 0.00017178955078125,
      "model_forward_time": 0.12008857727050781,
      "step": 28146
    },
    {
      "epoch": 0.00017178955078125,
      "step": 28146,
      "training_step_time": 0.6844255924224854
    },
    {
      "epoch": 0.000171795654296875,
      "model_forward_time": 0.11854887008666992,
      "step": 28147
    },
    {
      "epoch": 0.000171795654296875,
      "step": 28147,
      "training_step_time": 0.661205530166626
    },
    {
      "epoch": 0.0001718017578125,
      "model_forward_time": 0.11792421340942383,
      "step": 28148
    },
    {
      "epoch": 0.0001718017578125,
      "step": 28148,
      "training_step_time": 0.6544811725616455
    },
    {
      "epoch": 0.000171807861328125,
      "model_forward_time": 0.11995315551757812,
      "step": 28149
    },
    {
      "epoch": 0.000171807861328125,
      "step": 28149,
      "training_step_time": 0.6228294372558594
    },
    {
      "epoch": 0.00017181396484375,
      "grad_norm": 0.10763143748044968,
      "learning_rate": 5.9179508533130004e-05,
      "loss": 0.0469,
      "step": 28150
    },
    {
      "epoch": 0.00017181396484375,
      "model_forward_time": 0.12095117568969727,
      "step": 28150
    },
    {
      "epoch": 0.00017181396484375,
      "step": 28150,
      "training_step_time": 0.6770901679992676
    },
    {
      "epoch": 0.000171820068359375,
      "model_forward_time": 0.1185293197631836,
      "step": 28151
    },
    {
      "epoch": 0.000171820068359375,
      "step": 28151,
      "training_step_time": 0.7216720581054688
    },
    {
      "epoch": 0.000171826171875,
      "model_forward_time": 0.11824393272399902,
      "step": 28152
    },
    {
      "epoch": 0.000171826171875,
      "step": 28152,
      "training_step_time": 0.7102725505828857
    },
    {
      "epoch": 0.000171832275390625,
      "model_forward_time": 0.11903190612792969,
      "step": 28153
    },
    {
      "epoch": 0.000171832275390625,
      "step": 28153,
      "training_step_time": 0.7342092990875244
    },
    {
      "epoch": 0.00017183837890625,
      "model_forward_time": 0.12224078178405762,
      "step": 28154
    },
    {
      "epoch": 0.00017183837890625,
      "step": 28154,
      "training_step_time": 0.6910552978515625
    },
    {
      "epoch": 0.000171844482421875,
      "model_forward_time": 0.14948701858520508,
      "step": 28155
    },
    {
      "epoch": 0.000171844482421875,
      "step": 28155,
      "training_step_time": 0.7039961814880371
    },
    {
      "epoch": 0.0001718505859375,
      "model_forward_time": 0.12436056137084961,
      "step": 28156
    },
    {
      "epoch": 0.0001718505859375,
      "step": 28156,
      "training_step_time": 0.6623222827911377
    },
    {
      "epoch": 0.000171856689453125,
      "model_forward_time": 0.12321209907531738,
      "step": 28157
    },
    {
      "epoch": 0.000171856689453125,
      "step": 28157,
      "training_step_time": 0.6372740268707275
    },
    {
      "epoch": 0.00017186279296875,
      "model_forward_time": 0.12067818641662598,
      "step": 28158
    },
    {
      "epoch": 0.00017186279296875,
      "step": 28158,
      "training_step_time": 0.6742172241210938
    },
    {
      "epoch": 0.000171868896484375,
      "model_forward_time": 0.11916804313659668,
      "step": 28159
    },
    {
      "epoch": 0.000171868896484375,
      "step": 28159,
      "training_step_time": 0.7317194938659668
    },
    {
      "epoch": 0.000171875,
      "grad_norm": 0.1194123700261116,
      "learning_rate": 5.915241771388931e-05,
      "loss": 0.0487,
      "step": 28160
    },
    {
      "epoch": 0.000171875,
      "model_forward_time": 0.12107276916503906,
      "step": 28160
    },
    {
      "epoch": 0.000171875,
      "step": 28160,
      "training_step_time": 0.7577528953552246
    },
    {
      "epoch": 0.000171881103515625,
      "model_forward_time": 0.12257528305053711,
      "step": 28161
    },
    {
      "epoch": 0.000171881103515625,
      "step": 28161,
      "training_step_time": 0.6079730987548828
    },
    {
      "epoch": 0.00017188720703125,
      "model_forward_time": 0.12611007690429688,
      "step": 28162
    },
    {
      "epoch": 0.00017188720703125,
      "step": 28162,
      "training_step_time": 0.7126657962799072
    },
    {
      "epoch": 0.000171893310546875,
      "model_forward_time": 0.11948466300964355,
      "step": 28163
    },
    {
      "epoch": 0.000171893310546875,
      "step": 28163,
      "training_step_time": 0.6733443737030029
    },
    {
      "epoch": 0.0001718994140625,
      "model_forward_time": 0.11992311477661133,
      "step": 28164
    },
    {
      "epoch": 0.0001718994140625,
      "step": 28164,
      "training_step_time": 0.7160708904266357
    },
    {
      "epoch": 0.000171905517578125,
      "model_forward_time": 0.11902737617492676,
      "step": 28165
    },
    {
      "epoch": 0.000171905517578125,
      "step": 28165,
      "training_step_time": 0.705413818359375
    },
    {
      "epoch": 0.00017191162109375,
      "model_forward_time": 0.12268829345703125,
      "step": 28166
    },
    {
      "epoch": 0.00017191162109375,
      "step": 28166,
      "training_step_time": 0.6609866619110107
    },
    {
      "epoch": 0.000171917724609375,
      "model_forward_time": 0.11814284324645996,
      "step": 28167
    },
    {
      "epoch": 0.000171917724609375,
      "step": 28167,
      "training_step_time": 0.6552164554595947
    },
    {
      "epoch": 0.000171923828125,
      "model_forward_time": 0.12625527381896973,
      "step": 28168
    },
    {
      "epoch": 0.000171923828125,
      "step": 28168,
      "training_step_time": 0.6714730262756348
    },
    {
      "epoch": 0.000171929931640625,
      "model_forward_time": 0.12440967559814453,
      "step": 28169
    },
    {
      "epoch": 0.000171929931640625,
      "step": 28169,
      "training_step_time": 0.7126843929290771
    },
    {
      "epoch": 0.00017193603515625,
      "grad_norm": 0.11291562765836716,
      "learning_rate": 5.912532411438576e-05,
      "loss": 0.0469,
      "step": 28170
    },
    {
      "epoch": 0.00017193603515625,
      "model_forward_time": 0.12016844749450684,
      "step": 28170
    },
    {
      "epoch": 0.00017193603515625,
      "step": 28170,
      "training_step_time": 0.6612546443939209
    },
    {
      "epoch": 0.000171942138671875,
      "model_forward_time": 0.12009835243225098,
      "step": 28171
    },
    {
      "epoch": 0.000171942138671875,
      "step": 28171,
      "training_step_time": 0.6849396228790283
    },
    {
      "epoch": 0.0001719482421875,
      "model_forward_time": 0.11641645431518555,
      "step": 28172
    },
    {
      "epoch": 0.0001719482421875,
      "step": 28172,
      "training_step_time": 0.729017972946167
    },
    {
      "epoch": 0.000171954345703125,
      "model_forward_time": 0.12263774871826172,
      "step": 28173
    },
    {
      "epoch": 0.000171954345703125,
      "step": 28173,
      "training_step_time": 0.7199223041534424
    },
    {
      "epoch": 0.00017196044921875,
      "model_forward_time": 0.11879372596740723,
      "step": 28174
    },
    {
      "epoch": 0.00017196044921875,
      "step": 28174,
      "training_step_time": 0.6799778938293457
    },
    {
      "epoch": 0.000171966552734375,
      "model_forward_time": 0.11995816230773926,
      "step": 28175
    },
    {
      "epoch": 0.000171966552734375,
      "step": 28175,
      "training_step_time": 0.6191375255584717
    },
    {
      "epoch": 0.00017197265625,
      "model_forward_time": 0.12232637405395508,
      "step": 28176
    },
    {
      "epoch": 0.00017197265625,
      "step": 28176,
      "training_step_time": 0.625192403793335
    },
    {
      "epoch": 0.000171978759765625,
      "model_forward_time": 0.1285991668701172,
      "step": 28177
    },
    {
      "epoch": 0.000171978759765625,
      "step": 28177,
      "training_step_time": 0.6196255683898926
    },
    {
      "epoch": 0.00017198486328125,
      "model_forward_time": 0.11830425262451172,
      "step": 28178
    },
    {
      "epoch": 0.00017198486328125,
      "step": 28178,
      "training_step_time": 0.6656701564788818
    },
    {
      "epoch": 0.000171990966796875,
      "model_forward_time": 0.12238240242004395,
      "step": 28179
    },
    {
      "epoch": 0.000171990966796875,
      "step": 28179,
      "training_step_time": 0.6897153854370117
    },
    {
      "epoch": 0.0001719970703125,
      "grad_norm": 0.13282574713230133,
      "learning_rate": 5.909822774284971e-05,
      "loss": 0.0484,
      "step": 28180
    },
    {
      "epoch": 0.0001719970703125,
      "model_forward_time": 0.12687420845031738,
      "step": 28180
    },
    {
      "epoch": 0.0001719970703125,
      "step": 28180,
      "training_step_time": 0.6369187831878662
    },
    {
      "epoch": 0.000172003173828125,
      "model_forward_time": 0.12322616577148438,
      "step": 28181
    },
    {
      "epoch": 0.000172003173828125,
      "step": 28181,
      "training_step_time": 0.6160759925842285
    },
    {
      "epoch": 0.00017200927734375,
      "model_forward_time": 0.11794734001159668,
      "step": 28182
    },
    {
      "epoch": 0.00017200927734375,
      "step": 28182,
      "training_step_time": 0.5813384056091309
    },
    {
      "epoch": 0.000172015380859375,
      "model_forward_time": 0.12213015556335449,
      "step": 28183
    },
    {
      "epoch": 0.000172015380859375,
      "step": 28183,
      "training_step_time": 0.6436498165130615
    },
    {
      "epoch": 0.000172021484375,
      "model_forward_time": 0.11686038970947266,
      "step": 28184
    },
    {
      "epoch": 0.000172021484375,
      "step": 28184,
      "training_step_time": 0.654062032699585
    },
    {
      "epoch": 0.000172027587890625,
      "model_forward_time": 0.11953949928283691,
      "step": 28185
    },
    {
      "epoch": 0.000172027587890625,
      "step": 28185,
      "training_step_time": 0.55621337890625
    },
    {
      "epoch": 0.00017203369140625,
      "model_forward_time": 0.1162559986114502,
      "step": 28186
    },
    {
      "epoch": 0.00017203369140625,
      "step": 28186,
      "training_step_time": 0.5438878536224365
    },
    {
      "epoch": 0.000172039794921875,
      "model_forward_time": 0.11620521545410156,
      "step": 28187
    },
    {
      "epoch": 0.000172039794921875,
      "step": 28187,
      "training_step_time": 0.4929051399230957
    },
    {
      "epoch": 0.0001720458984375,
      "model_forward_time": 0.11542558670043945,
      "step": 28188
    },
    {
      "epoch": 0.0001720458984375,
      "step": 28188,
      "training_step_time": 0.5192041397094727
    },
    {
      "epoch": 0.000172052001953125,
      "model_forward_time": 0.11522078514099121,
      "step": 28189
    },
    {
      "epoch": 0.000172052001953125,
      "step": 28189,
      "training_step_time": 0.43077874183654785
    },
    {
      "epoch": 0.00017205810546875,
      "grad_norm": 0.11894652992486954,
      "learning_rate": 5.9071128607512285e-05,
      "loss": 0.0431,
      "step": 28190
    },
    {
      "epoch": 0.00017205810546875,
      "model_forward_time": 0.11529135704040527,
      "step": 28190
    },
    {
      "epoch": 0.00017205810546875,
      "step": 28190,
      "training_step_time": 0.5078845024108887
    },
    {
      "epoch": 0.000172064208984375,
      "model_forward_time": 0.11463451385498047,
      "step": 28191
    },
    {
      "epoch": 0.000172064208984375,
      "step": 28191,
      "training_step_time": 0.4167478084564209
    },
    {
      "epoch": 0.0001720703125,
      "model_forward_time": 0.11524105072021484,
      "step": 28192
    },
    {
      "epoch": 0.0001720703125,
      "step": 28192,
      "training_step_time": 0.43232059478759766
    },
    {
      "epoch": 0.000172076416015625,
      "model_forward_time": 0.11420989036560059,
      "step": 28193
    },
    {
      "epoch": 0.000172076416015625,
      "step": 28193,
      "training_step_time": 0.39894700050354004
    },
    {
      "epoch": 0.00017208251953125,
      "model_forward_time": 0.11488080024719238,
      "step": 28194
    },
    {
      "epoch": 0.00017208251953125,
      "step": 28194,
      "training_step_time": 0.4016754627227783
    },
    {
      "epoch": 0.000172088623046875,
      "model_forward_time": 0.11606335639953613,
      "step": 28195
    },
    {
      "epoch": 0.000172088623046875,
      "step": 28195,
      "training_step_time": 0.4846653938293457
    },
    {
      "epoch": 0.0001720947265625,
      "model_forward_time": 0.11694574356079102,
      "step": 28196
    },
    {
      "epoch": 0.0001720947265625,
      "step": 28196,
      "training_step_time": 0.44850754737854004
    },
    {
      "epoch": 0.000172100830078125,
      "model_forward_time": 0.11791348457336426,
      "step": 28197
    },
    {
      "epoch": 0.000172100830078125,
      "step": 28197,
      "training_step_time": 0.5174930095672607
    },
    {
      "epoch": 0.00017210693359375,
      "model_forward_time": 0.11692523956298828,
      "step": 28198
    },
    {
      "epoch": 0.00017210693359375,
      "step": 28198,
      "training_step_time": 0.4548630714416504
    },
    {
      "epoch": 0.000172113037109375,
      "model_forward_time": 0.11668920516967773,
      "step": 28199
    },
    {
      "epoch": 0.000172113037109375,
      "step": 28199,
      "training_step_time": 0.4275391101837158
    },
    {
      "epoch": 0.000172119140625,
      "grad_norm": 0.13031496107578278,
      "learning_rate": 5.90440267166055e-05,
      "loss": 0.0522,
      "step": 28200
    },
    {
      "epoch": 0.000172119140625,
      "model_forward_time": 0.11446762084960938,
      "step": 28200
    },
    {
      "epoch": 0.000172119140625,
      "step": 28200,
      "training_step_time": 0.41536498069763184
    },
    {
      "epoch": 0.000172125244140625,
      "model_forward_time": 0.11486554145812988,
      "step": 28201
    },
    {
      "epoch": 0.000172125244140625,
      "step": 28201,
      "training_step_time": 0.4107480049133301
    },
    {
      "epoch": 0.00017213134765625,
      "model_forward_time": 0.11504101753234863,
      "step": 28202
    },
    {
      "epoch": 0.00017213134765625,
      "step": 28202,
      "training_step_time": 0.4013545513153076
    },
    {
      "epoch": 0.000172137451171875,
      "model_forward_time": 0.11560773849487305,
      "step": 28203
    },
    {
      "epoch": 0.000172137451171875,
      "step": 28203,
      "training_step_time": 0.49394774436950684
    },
    {
      "epoch": 0.0001721435546875,
      "model_forward_time": 0.11498236656188965,
      "step": 28204
    },
    {
      "epoch": 0.0001721435546875,
      "step": 28204,
      "training_step_time": 0.49166440963745117
    },
    {
      "epoch": 0.000172149658203125,
      "model_forward_time": 0.11482763290405273,
      "step": 28205
    },
    {
      "epoch": 0.000172149658203125,
      "step": 28205,
      "training_step_time": 0.3977384567260742
    },
    {
      "epoch": 0.00017215576171875,
      "model_forward_time": 0.1155545711517334,
      "step": 28206
    },
    {
      "epoch": 0.00017215576171875,
      "step": 28206,
      "training_step_time": 0.39613866806030273
    },
    {
      "epoch": 0.000172161865234375,
      "model_forward_time": 0.11524462699890137,
      "step": 28207
    },
    {
      "epoch": 0.000172161865234375,
      "step": 28207,
      "training_step_time": 0.40306615829467773
    },
    {
      "epoch": 0.00017216796875,
      "model_forward_time": 0.11462068557739258,
      "step": 28208
    },
    {
      "epoch": 0.00017216796875,
      "step": 28208,
      "training_step_time": 0.4000561237335205
    },
    {
      "epoch": 0.000172174072265625,
      "model_forward_time": 0.11491703987121582,
      "step": 28209
    },
    {
      "epoch": 0.000172174072265625,
      "step": 28209,
      "training_step_time": 0.41440248489379883
    },
    {
      "epoch": 0.00017218017578125,
      "grad_norm": 0.14243565499782562,
      "learning_rate": 5.901692207836219e-05,
      "loss": 0.05,
      "step": 28210
    },
    {
      "epoch": 0.00017218017578125,
      "model_forward_time": 0.11486148834228516,
      "step": 28210
    },
    {
      "epoch": 0.00017218017578125,
      "step": 28210,
      "training_step_time": 0.46945786476135254
    },
    {
      "epoch": 0.000172186279296875,
      "model_forward_time": 0.11489510536193848,
      "step": 28211
    },
    {
      "epoch": 0.000172186279296875,
      "step": 28211,
      "training_step_time": 0.48374366760253906
    },
    {
      "epoch": 0.0001721923828125,
      "model_forward_time": 0.1176764965057373,
      "step": 28212
    },
    {
      "epoch": 0.0001721923828125,
      "step": 28212,
      "training_step_time": 0.4421374797821045
    },
    {
      "epoch": 0.000172198486328125,
      "model_forward_time": 0.12231087684631348,
      "step": 28213
    },
    {
      "epoch": 0.000172198486328125,
      "step": 28213,
      "training_step_time": 0.4143869876861572
    },
    {
      "epoch": 0.00017220458984375,
      "model_forward_time": 0.11771631240844727,
      "step": 28214
    },
    {
      "epoch": 0.00017220458984375,
      "step": 28214,
      "training_step_time": 0.3793172836303711
    },
    {
      "epoch": 0.000172210693359375,
      "model_forward_time": 0.1156008243560791,
      "step": 28215
    },
    {
      "epoch": 0.000172210693359375,
      "step": 28215,
      "training_step_time": 0.37824082374572754
    },
    {
      "epoch": 0.000172216796875,
      "model_forward_time": 0.1154930591583252,
      "step": 28216
    },
    {
      "epoch": 0.000172216796875,
      "step": 28216,
      "training_step_time": 0.397611141204834
    },
    {
      "epoch": 0.000172222900390625,
      "model_forward_time": 0.11479902267456055,
      "step": 28217
    },
    {
      "epoch": 0.000172222900390625,
      "step": 28217,
      "training_step_time": 0.46628522872924805
    },
    {
      "epoch": 0.00017222900390625,
      "model_forward_time": 0.11583662033081055,
      "step": 28218
    },
    {
      "epoch": 0.00017222900390625,
      "step": 28218,
      "training_step_time": 0.453906774520874
    },
    {
      "epoch": 0.000172235107421875,
      "model_forward_time": 0.11479592323303223,
      "step": 28219
    },
    {
      "epoch": 0.000172235107421875,
      "step": 28219,
      "training_step_time": 0.4034698009490967
    },
    {
      "epoch": 0.0001722412109375,
      "grad_norm": 0.14365611970424652,
      "learning_rate": 5.8989814701016035e-05,
      "loss": 0.051,
      "step": 28220
    },
    {
      "epoch": 0.0001722412109375,
      "model_forward_time": 0.11517715454101562,
      "step": 28220
    },
    {
      "epoch": 0.0001722412109375,
      "step": 28220,
      "training_step_time": 0.42012739181518555
    },
    {
      "epoch": 0.000172247314453125,
      "model_forward_time": 0.11535048484802246,
      "step": 28221
    },
    {
      "epoch": 0.000172247314453125,
      "step": 28221,
      "training_step_time": 0.37651705741882324
    },
    {
      "epoch": 0.00017225341796875,
      "model_forward_time": 0.11592483520507812,
      "step": 28222
    },
    {
      "epoch": 0.00017225341796875,
      "step": 28222,
      "training_step_time": 0.38861751556396484
    },
    {
      "epoch": 0.000172259521484375,
      "model_forward_time": 0.1157999038696289,
      "step": 28223
    },
    {
      "epoch": 0.000172259521484375,
      "step": 28223,
      "training_step_time": 0.3854193687438965
    },
    {
      "epoch": 0.000172265625,
      "model_forward_time": 0.11562037467956543,
      "step": 28224
    },
    {
      "epoch": 0.000172265625,
      "step": 28224,
      "training_step_time": 0.42215609550476074
    },
    {
      "epoch": 0.000172271728515625,
      "model_forward_time": 0.11577343940734863,
      "step": 28225
    },
    {
      "epoch": 0.000172271728515625,
      "step": 28225,
      "training_step_time": 0.4102933406829834
    },
    {
      "epoch": 0.00017227783203125,
      "model_forward_time": 0.11547136306762695,
      "step": 28226
    },
    {
      "epoch": 0.00017227783203125,
      "step": 28226,
      "training_step_time": 0.48716306686401367
    },
    {
      "epoch": 0.000172283935546875,
      "model_forward_time": 0.11819958686828613,
      "step": 28227
    },
    {
      "epoch": 0.000172283935546875,
      "step": 28227,
      "training_step_time": 0.38960933685302734
    },
    {
      "epoch": 0.0001722900390625,
      "model_forward_time": 0.11563849449157715,
      "step": 28228
    },
    {
      "epoch": 0.0001722900390625,
      "step": 28228,
      "training_step_time": 0.4147038459777832
    },
    {
      "epoch": 0.000172296142578125,
      "model_forward_time": 0.1156308650970459,
      "step": 28229
    },
    {
      "epoch": 0.000172296142578125,
      "step": 28229,
      "training_step_time": 0.37935447692871094
    },
    {
      "epoch": 0.00017230224609375,
      "grad_norm": 0.1216956377029419,
      "learning_rate": 5.896270459280153e-05,
      "loss": 0.0528,
      "step": 28230
    },
    {
      "epoch": 0.00017230224609375,
      "model_forward_time": 0.11565637588500977,
      "step": 28230
    },
    {
      "epoch": 0.00017230224609375,
      "step": 28230,
      "training_step_time": 0.38841891288757324
    },
    {
      "epoch": 0.000172308349609375,
      "model_forward_time": 0.11612749099731445,
      "step": 28231
    },
    {
      "epoch": 0.000172308349609375,
      "step": 28231,
      "training_step_time": 0.39792323112487793
    },
    {
      "epoch": 0.000172314453125,
      "model_forward_time": 0.11540961265563965,
      "step": 28232
    },
    {
      "epoch": 0.000172314453125,
      "step": 28232,
      "training_step_time": 0.38770365715026855
    },
    {
      "epoch": 0.000172320556640625,
      "model_forward_time": 0.11556601524353027,
      "step": 28233
    },
    {
      "epoch": 0.000172320556640625,
      "step": 28233,
      "training_step_time": 0.40589332580566406
    },
    {
      "epoch": 0.00017232666015625,
      "model_forward_time": 0.11519527435302734,
      "step": 28234
    },
    {
      "epoch": 0.00017232666015625,
      "step": 28234,
      "training_step_time": 0.40585947036743164
    },
    {
      "epoch": 0.000172332763671875,
      "model_forward_time": 0.11568641662597656,
      "step": 28235
    },
    {
      "epoch": 0.000172332763671875,
      "step": 28235,
      "training_step_time": 0.41965794563293457
    },
    {
      "epoch": 0.0001723388671875,
      "model_forward_time": 0.11620497703552246,
      "step": 28236
    },
    {
      "epoch": 0.0001723388671875,
      "step": 28236,
      "training_step_time": 0.40041041374206543
    },
    {
      "epoch": 0.000172344970703125,
      "model_forward_time": 0.1151583194732666,
      "step": 28237
    },
    {
      "epoch": 0.000172344970703125,
      "step": 28237,
      "training_step_time": 0.3974795341491699
    },
    {
      "epoch": 0.00017235107421875,
      "model_forward_time": 0.1150968074798584,
      "step": 28238
    },
    {
      "epoch": 0.00017235107421875,
      "step": 28238,
      "training_step_time": 0.3897969722747803
    },
    {
      "epoch": 0.000172357177734375,
      "model_forward_time": 0.11508011817932129,
      "step": 28239
    },
    {
      "epoch": 0.000172357177734375,
      "step": 28239,
      "training_step_time": 0.4428293704986572
    },
    {
      "epoch": 0.00017236328125,
      "grad_norm": 0.07946385443210602,
      "learning_rate": 5.8935591761954025e-05,
      "loss": 0.0476,
      "step": 28240
    },
    {
      "epoch": 0.00017236328125,
      "model_forward_time": 0.11518073081970215,
      "step": 28240
    },
    {
      "epoch": 0.00017236328125,
      "step": 28240,
      "training_step_time": 0.40197277069091797
    },
    {
      "epoch": 0.000172369384765625,
      "model_forward_time": 0.11594438552856445,
      "step": 28241
    },
    {
      "epoch": 0.000172369384765625,
      "step": 28241,
      "training_step_time": 0.47283411026000977
    },
    {
      "epoch": 0.00017237548828125,
      "model_forward_time": 0.11501955986022949,
      "step": 28242
    },
    {
      "epoch": 0.00017237548828125,
      "step": 28242,
      "training_step_time": 0.44980430603027344
    },
    {
      "epoch": 0.000172381591796875,
      "model_forward_time": 0.11550235748291016,
      "step": 28243
    },
    {
      "epoch": 0.000172381591796875,
      "step": 28243,
      "training_step_time": 0.39879369735717773
    },
    {
      "epoch": 0.0001723876953125,
      "model_forward_time": 0.1151115894317627,
      "step": 28244
    },
    {
      "epoch": 0.0001723876953125,
      "step": 28244,
      "training_step_time": 0.39121365547180176
    },
    {
      "epoch": 0.000172393798828125,
      "model_forward_time": 0.11537694931030273,
      "step": 28245
    },
    {
      "epoch": 0.000172393798828125,
      "step": 28245,
      "training_step_time": 0.3851783275604248
    },
    {
      "epoch": 0.00017239990234375,
      "model_forward_time": 0.11577033996582031,
      "step": 28246
    },
    {
      "epoch": 0.00017239990234375,
      "step": 28246,
      "training_step_time": 0.39497923851013184
    },
    {
      "epoch": 0.000172406005859375,
      "model_forward_time": 0.11609292030334473,
      "step": 28247
    },
    {
      "epoch": 0.000172406005859375,
      "step": 28247,
      "training_step_time": 0.44669127464294434
    },
    {
      "epoch": 0.000172412109375,
      "model_forward_time": 0.11513757705688477,
      "step": 28248
    },
    {
      "epoch": 0.000172412109375,
      "step": 28248,
      "training_step_time": 0.4461507797241211
    },
    {
      "epoch": 0.000172418212890625,
      "model_forward_time": 0.11593842506408691,
      "step": 28249
    },
    {
      "epoch": 0.000172418212890625,
      "step": 28249,
      "training_step_time": 0.4004404544830322
    },
    {
      "epoch": 0.00017242431640625,
      "grad_norm": 0.13251720368862152,
      "learning_rate": 5.890847621670966e-05,
      "loss": 0.0479,
      "step": 28250
    },
    {
      "epoch": 0.00017242431640625,
      "model_forward_time": 0.11672759056091309,
      "step": 28250
    },
    {
      "epoch": 0.00017242431640625,
      "step": 28250,
      "training_step_time": 0.39818239212036133
    },
    {
      "epoch": 0.000172430419921875,
      "model_forward_time": 0.11474013328552246,
      "step": 28251
    },
    {
      "epoch": 0.000172430419921875,
      "step": 28251,
      "training_step_time": 0.39069032669067383
    },
    {
      "epoch": 0.0001724365234375,
      "model_forward_time": 0.11575841903686523,
      "step": 28252
    },
    {
      "epoch": 0.0001724365234375,
      "step": 28252,
      "training_step_time": 0.41032862663269043
    },
    {
      "epoch": 0.000172442626953125,
      "model_forward_time": 0.11549758911132812,
      "step": 28253
    },
    {
      "epoch": 0.000172442626953125,
      "step": 28253,
      "training_step_time": 0.442779541015625
    },
    {
      "epoch": 0.00017244873046875,
      "model_forward_time": 0.11529159545898438,
      "step": 28254
    },
    {
      "epoch": 0.00017244873046875,
      "step": 28254,
      "training_step_time": 0.513023853302002
    },
    {
      "epoch": 0.000172454833984375,
      "model_forward_time": 0.11757588386535645,
      "step": 28255
    },
    {
      "epoch": 0.000172454833984375,
      "step": 28255,
      "training_step_time": 0.47887730598449707
    },
    {
      "epoch": 0.0001724609375,
      "model_forward_time": 0.11514973640441895,
      "step": 28256
    },
    {
      "epoch": 0.0001724609375,
      "step": 28256,
      "training_step_time": 0.43416261672973633
    },
    {
      "epoch": 0.000172467041015625,
      "model_forward_time": 0.11562180519104004,
      "step": 28257
    },
    {
      "epoch": 0.000172467041015625,
      "step": 28257,
      "training_step_time": 0.49212121963500977
    },
    {
      "epoch": 0.00017247314453125,
      "model_forward_time": 0.11531805992126465,
      "step": 28258
    },
    {
      "epoch": 0.00017247314453125,
      "step": 28258,
      "training_step_time": 0.4045295715332031
    },
    {
      "epoch": 0.000172479248046875,
      "model_forward_time": 0.11499452590942383,
      "step": 28259
    },
    {
      "epoch": 0.000172479248046875,
      "step": 28259,
      "training_step_time": 0.39214324951171875
    },
    {
      "epoch": 0.0001724853515625,
      "grad_norm": 0.17116083204746246,
      "learning_rate": 5.888135796530544e-05,
      "loss": 0.0505,
      "step": 28260
    },
    {
      "epoch": 0.0001724853515625,
      "model_forward_time": 0.11559414863586426,
      "step": 28260
    },
    {
      "epoch": 0.0001724853515625,
      "step": 28260,
      "training_step_time": 0.4007444381713867
    },
    {
      "epoch": 0.000172491455078125,
      "model_forward_time": 0.11482429504394531,
      "step": 28261
    },
    {
      "epoch": 0.000172491455078125,
      "step": 28261,
      "training_step_time": 0.3948543071746826
    },
    {
      "epoch": 0.00017249755859375,
      "model_forward_time": 0.11684560775756836,
      "step": 28262
    },
    {
      "epoch": 0.00017249755859375,
      "step": 28262,
      "training_step_time": 0.5076496601104736
    },
    {
      "epoch": 0.000172503662109375,
      "model_forward_time": 0.11504864692687988,
      "step": 28263
    },
    {
      "epoch": 0.000172503662109375,
      "step": 28263,
      "training_step_time": 0.39856863021850586
    },
    {
      "epoch": 0.000172509765625,
      "model_forward_time": 0.11539268493652344,
      "step": 28264
    },
    {
      "epoch": 0.000172509765625,
      "step": 28264,
      "training_step_time": 0.3821675777435303
    },
    {
      "epoch": 0.000172515869140625,
      "model_forward_time": 0.11564755439758301,
      "step": 28265
    },
    {
      "epoch": 0.000172515869140625,
      "step": 28265,
      "training_step_time": 0.3955059051513672
    },
    {
      "epoch": 0.00017252197265625,
      "model_forward_time": 0.11610889434814453,
      "step": 28266
    },
    {
      "epoch": 0.00017252197265625,
      "step": 28266,
      "training_step_time": 0.39577722549438477
    },
    {
      "epoch": 0.000172528076171875,
      "model_forward_time": 0.11515665054321289,
      "step": 28267
    },
    {
      "epoch": 0.000172528076171875,
      "step": 28267,
      "training_step_time": 0.4055595397949219
    },
    {
      "epoch": 0.0001725341796875,
      "model_forward_time": 0.11446237564086914,
      "step": 28268
    },
    {
      "epoch": 0.0001725341796875,
      "step": 28268,
      "training_step_time": 0.4702434539794922
    },
    {
      "epoch": 0.000172540283203125,
      "model_forward_time": 0.1151885986328125,
      "step": 28269
    },
    {
      "epoch": 0.000172540283203125,
      "step": 28269,
      "training_step_time": 0.4590022563934326
    },
    {
      "epoch": 0.00017254638671875,
      "grad_norm": 0.15004503726959229,
      "learning_rate": 5.885423701597917e-05,
      "loss": 0.0504,
      "step": 28270
    },
    {
      "epoch": 0.00017254638671875,
      "model_forward_time": 0.11638689041137695,
      "step": 28270
    },
    {
      "epoch": 0.00017254638671875,
      "step": 28270,
      "training_step_time": 0.3908112049102783
    },
    {
      "epoch": 0.000172552490234375,
      "model_forward_time": 0.11532163619995117,
      "step": 28271
    },
    {
      "epoch": 0.000172552490234375,
      "step": 28271,
      "training_step_time": 0.3909895420074463
    },
    {
      "epoch": 0.00017255859375,
      "model_forward_time": 0.11579298973083496,
      "step": 28272
    },
    {
      "epoch": 0.00017255859375,
      "step": 28272,
      "training_step_time": 0.4219484329223633
    },
    {
      "epoch": 0.000172564697265625,
      "model_forward_time": 0.11616635322570801,
      "step": 28273
    },
    {
      "epoch": 0.000172564697265625,
      "step": 28273,
      "training_step_time": 0.39260077476501465
    },
    {
      "epoch": 0.00017257080078125,
      "model_forward_time": 0.11553382873535156,
      "step": 28274
    },
    {
      "epoch": 0.00017257080078125,
      "step": 28274,
      "training_step_time": 0.39874744415283203
    },
    {
      "epoch": 0.000172576904296875,
      "model_forward_time": 0.11493301391601562,
      "step": 28275
    },
    {
      "epoch": 0.000172576904296875,
      "step": 28275,
      "training_step_time": 0.40786266326904297
    },
    {
      "epoch": 0.0001725830078125,
      "model_forward_time": 0.11519217491149902,
      "step": 28276
    },
    {
      "epoch": 0.0001725830078125,
      "step": 28276,
      "training_step_time": 0.4155735969543457
    },
    {
      "epoch": 0.000172589111328125,
      "model_forward_time": 0.11520624160766602,
      "step": 28277
    },
    {
      "epoch": 0.000172589111328125,
      "step": 28277,
      "training_step_time": 0.39045214653015137
    },
    {
      "epoch": 0.00017259521484375,
      "model_forward_time": 0.11519002914428711,
      "step": 28278
    },
    {
      "epoch": 0.00017259521484375,
      "step": 28278,
      "training_step_time": 0.39960217475891113
    },
    {
      "epoch": 0.000172601318359375,
      "model_forward_time": 0.11524081230163574,
      "step": 28279
    },
    {
      "epoch": 0.000172601318359375,
      "step": 28279,
      "training_step_time": 0.41233110427856445
    },
    {
      "epoch": 0.000172607421875,
      "grad_norm": 0.1487535834312439,
      "learning_rate": 5.8827113376969465e-05,
      "loss": 0.0429,
      "step": 28280
    },
    {
      "epoch": 0.000172607421875,
      "model_forward_time": 0.1154012680053711,
      "step": 28280
    },
    {
      "epoch": 0.000172607421875,
      "step": 28280,
      "training_step_time": 0.39081406593322754
    },
    {
      "epoch": 0.000172613525390625,
      "model_forward_time": 0.11589479446411133,
      "step": 28281
    },
    {
      "epoch": 0.000172613525390625,
      "step": 28281,
      "training_step_time": 0.42868566513061523
    },
    {
      "epoch": 0.00017261962890625,
      "model_forward_time": 0.11541199684143066,
      "step": 28282
    },
    {
      "epoch": 0.00017261962890625,
      "step": 28282,
      "training_step_time": 0.3979189395904541
    },
    {
      "epoch": 0.000172625732421875,
      "model_forward_time": 0.11536717414855957,
      "step": 28283
    },
    {
      "epoch": 0.000172625732421875,
      "step": 28283,
      "training_step_time": 0.3987729549407959
    },
    {
      "epoch": 0.0001726318359375,
      "model_forward_time": 0.11546468734741211,
      "step": 28284
    },
    {
      "epoch": 0.0001726318359375,
      "step": 28284,
      "training_step_time": 0.4039022922515869
    },
    {
      "epoch": 0.000172637939453125,
      "model_forward_time": 0.11921882629394531,
      "step": 28285
    },
    {
      "epoch": 0.000172637939453125,
      "step": 28285,
      "training_step_time": 0.39391160011291504
    },
    {
      "epoch": 0.00017264404296875,
      "model_forward_time": 0.11847329139709473,
      "step": 28286
    },
    {
      "epoch": 0.00017264404296875,
      "step": 28286,
      "training_step_time": 0.42400169372558594
    },
    {
      "epoch": 0.000172650146484375,
      "model_forward_time": 0.11919164657592773,
      "step": 28287
    },
    {
      "epoch": 0.000172650146484375,
      "step": 28287,
      "training_step_time": 0.4284648895263672
    },
    {
      "epoch": 0.00017265625,
      "model_forward_time": 0.1193702220916748,
      "step": 28288
    },
    {
      "epoch": 0.00017265625,
      "step": 28288,
      "training_step_time": 0.3903470039367676
    },
    {
      "epoch": 0.000172662353515625,
      "model_forward_time": 0.11658644676208496,
      "step": 28289
    },
    {
      "epoch": 0.000172662353515625,
      "step": 28289,
      "training_step_time": 0.37901949882507324
    },
    {
      "epoch": 0.00017266845703125,
      "grad_norm": 0.18746165931224823,
      "learning_rate": 5.8799987056515804e-05,
      "loss": 0.0426,
      "step": 28290
    },
    {
      "epoch": 0.00017266845703125,
      "model_forward_time": 0.11570620536804199,
      "step": 28290
    },
    {
      "epoch": 0.00017266845703125,
      "step": 28290,
      "training_step_time": 0.40325236320495605
    },
    {
      "epoch": 0.000172674560546875,
      "model_forward_time": 0.11514973640441895,
      "step": 28291
    },
    {
      "epoch": 0.000172674560546875,
      "step": 28291,
      "training_step_time": 0.48636651039123535
    },
    {
      "epoch": 0.0001726806640625,
      "model_forward_time": 0.11497163772583008,
      "step": 28292
    },
    {
      "epoch": 0.0001726806640625,
      "step": 28292,
      "training_step_time": 0.39899659156799316
    },
    {
      "epoch": 0.000172686767578125,
      "model_forward_time": 0.11526036262512207,
      "step": 28293
    },
    {
      "epoch": 0.000172686767578125,
      "step": 28293,
      "training_step_time": 0.43408203125
    },
    {
      "epoch": 0.00017269287109375,
      "model_forward_time": 0.11506438255310059,
      "step": 28294
    },
    {
      "epoch": 0.00017269287109375,
      "step": 28294,
      "training_step_time": 0.3993721008300781
    },
    {
      "epoch": 0.000172698974609375,
      "model_forward_time": 0.11478590965270996,
      "step": 28295
    },
    {
      "epoch": 0.000172698974609375,
      "step": 28295,
      "training_step_time": 0.4128835201263428
    },
    {
      "epoch": 0.000172705078125,
      "model_forward_time": 0.11645388603210449,
      "step": 28296
    },
    {
      "epoch": 0.000172705078125,
      "step": 28296,
      "training_step_time": 0.44975709915161133
    },
    {
      "epoch": 0.000172711181640625,
      "model_forward_time": 0.11503314971923828,
      "step": 28297
    },
    {
      "epoch": 0.000172711181640625,
      "step": 28297,
      "training_step_time": 0.46094822883605957
    },
    {
      "epoch": 0.00017271728515625,
      "model_forward_time": 0.11530518531799316,
      "step": 28298
    },
    {
      "epoch": 0.00017271728515625,
      "step": 28298,
      "training_step_time": 0.43983888626098633
    },
    {
      "epoch": 0.000172723388671875,
      "model_forward_time": 0.11450004577636719,
      "step": 28299
    },
    {
      "epoch": 0.000172723388671875,
      "step": 28299,
      "training_step_time": 0.3620944023132324
    },
    {
      "epoch": 0.0001727294921875,
      "grad_norm": 0.12313180416822433,
      "learning_rate": 5.8772858062858416e-05,
      "loss": 0.0445,
      "step": 28300
    },
    {
      "epoch": 0.0001727294921875,
      "model_forward_time": 0.11562824249267578,
      "step": 28300
    },
    {
      "epoch": 0.0001727294921875,
      "step": 28300,
      "training_step_time": 0.4569566249847412
    },
    {
      "epoch": 0.000172735595703125,
      "model_forward_time": 0.11561179161071777,
      "step": 28301
    },
    {
      "epoch": 0.000172735595703125,
      "step": 28301,
      "training_step_time": 0.40048670768737793
    },
    {
      "epoch": 0.00017274169921875,
      "model_forward_time": 0.11499285697937012,
      "step": 28302
    },
    {
      "epoch": 0.00017274169921875,
      "step": 28302,
      "training_step_time": 0.3870723247528076
    },
    {
      "epoch": 0.000172747802734375,
      "model_forward_time": 0.11512041091918945,
      "step": 28303
    },
    {
      "epoch": 0.000172747802734375,
      "step": 28303,
      "training_step_time": 0.39574384689331055
    },
    {
      "epoch": 0.00017275390625,
      "model_forward_time": 0.1304454803466797,
      "step": 28304
    },
    {
      "epoch": 0.00017275390625,
      "step": 28304,
      "training_step_time": 0.42269396781921387
    },
    {
      "epoch": 0.000172760009765625,
      "model_forward_time": 0.11474752426147461,
      "step": 28305
    },
    {
      "epoch": 0.000172760009765625,
      "step": 28305,
      "training_step_time": 0.5507574081420898
    },
    {
      "epoch": 0.00017276611328125,
      "model_forward_time": 0.1154484748840332,
      "step": 28306
    },
    {
      "epoch": 0.00017276611328125,
      "step": 28306,
      "training_step_time": 0.3901970386505127
    },
    {
      "epoch": 0.000172772216796875,
      "model_forward_time": 0.11579394340515137,
      "step": 28307
    },
    {
      "epoch": 0.000172772216796875,
      "step": 28307,
      "training_step_time": 0.40224266052246094
    },
    {
      "epoch": 0.0001727783203125,
      "model_forward_time": 0.11484837532043457,
      "step": 28308
    },
    {
      "epoch": 0.0001727783203125,
      "step": 28308,
      "training_step_time": 0.4279031753540039
    },
    {
      "epoch": 0.000172784423828125,
      "model_forward_time": 0.11462616920471191,
      "step": 28309
    },
    {
      "epoch": 0.000172784423828125,
      "step": 28309,
      "training_step_time": 0.46944141387939453
    },
    {
      "epoch": 0.00017279052734375,
      "grad_norm": 0.11403761059045792,
      "learning_rate": 5.874572640423839e-05,
      "loss": 0.0436,
      "step": 28310
    },
    {
      "epoch": 0.00017279052734375,
      "model_forward_time": 0.11542510986328125,
      "step": 28310
    },
    {
      "epoch": 0.00017279052734375,
      "step": 28310,
      "training_step_time": 0.3908271789550781
    },
    {
      "epoch": 0.000172796630859375,
      "model_forward_time": 0.11457443237304688,
      "step": 28311
    },
    {
      "epoch": 0.000172796630859375,
      "step": 28311,
      "training_step_time": 0.6285171508789062
    },
    {
      "epoch": 0.000172802734375,
      "model_forward_time": 0.11503005027770996,
      "step": 28312
    },
    {
      "epoch": 0.000172802734375,
      "step": 28312,
      "training_step_time": 0.42759084701538086
    },
    {
      "epoch": 0.000172808837890625,
      "model_forward_time": 0.11491274833679199,
      "step": 28313
    },
    {
      "epoch": 0.000172808837890625,
      "step": 28313,
      "training_step_time": 0.4041931629180908
    },
    {
      "epoch": 0.00017281494140625,
      "model_forward_time": 0.11508774757385254,
      "step": 28314
    },
    {
      "epoch": 0.00017281494140625,
      "step": 28314,
      "training_step_time": 0.40154361724853516
    },
    {
      "epoch": 0.000172821044921875,
      "model_forward_time": 0.115509033203125,
      "step": 28315
    },
    {
      "epoch": 0.000172821044921875,
      "step": 28315,
      "training_step_time": 0.39969325065612793
    },
    {
      "epoch": 0.0001728271484375,
      "model_forward_time": 0.11528635025024414,
      "step": 28316
    },
    {
      "epoch": 0.0001728271484375,
      "step": 28316,
      "training_step_time": 0.39441657066345215
    },
    {
      "epoch": 0.000172833251953125,
      "model_forward_time": 0.11519908905029297,
      "step": 28317
    },
    {
      "epoch": 0.000172833251953125,
      "step": 28317,
      "training_step_time": 0.5628712177276611
    },
    {
      "epoch": 0.00017283935546875,
      "model_forward_time": 0.11517930030822754,
      "step": 28318
    },
    {
      "epoch": 0.00017283935546875,
      "step": 28318,
      "training_step_time": 0.3882460594177246
    },
    {
      "epoch": 0.000172845458984375,
      "model_forward_time": 0.11556482315063477,
      "step": 28319
    },
    {
      "epoch": 0.000172845458984375,
      "step": 28319,
      "training_step_time": 0.4784536361694336
    },
    {
      "epoch": 0.0001728515625,
      "grad_norm": 0.15362414717674255,
      "learning_rate": 5.871859208889759e-05,
      "loss": 0.0476,
      "step": 28320
    },
    {
      "epoch": 0.0001728515625,
      "model_forward_time": 0.11484289169311523,
      "step": 28320
    },
    {
      "epoch": 0.0001728515625,
      "step": 28320,
      "training_step_time": 0.39080047607421875
    },
    {
      "epoch": 0.000172857666015625,
      "model_forward_time": 0.1151115894317627,
      "step": 28321
    },
    {
      "epoch": 0.000172857666015625,
      "step": 28321,
      "training_step_time": 0.4428372383117676
    },
    {
      "epoch": 0.00017286376953125,
      "model_forward_time": 0.11504459381103516,
      "step": 28322
    },
    {
      "epoch": 0.00017286376953125,
      "step": 28322,
      "training_step_time": 0.4685847759246826
    },
    {
      "epoch": 0.000172869873046875,
      "model_forward_time": 0.11522769927978516,
      "step": 28323
    },
    {
      "epoch": 0.000172869873046875,
      "step": 28323,
      "training_step_time": 0.5520670413970947
    },
    {
      "epoch": 0.0001728759765625,
      "model_forward_time": 0.11499214172363281,
      "step": 28324
    },
    {
      "epoch": 0.0001728759765625,
      "step": 28324,
      "training_step_time": 0.38792896270751953
    },
    {
      "epoch": 0.000172882080078125,
      "model_forward_time": 0.1158595085144043,
      "step": 28325
    },
    {
      "epoch": 0.000172882080078125,
      "step": 28325,
      "training_step_time": 0.47318553924560547
    },
    {
      "epoch": 0.00017288818359375,
      "model_forward_time": 0.11492729187011719,
      "step": 28326
    },
    {
      "epoch": 0.00017288818359375,
      "step": 28326,
      "training_step_time": 0.4156458377838135
    },
    {
      "epoch": 0.000172894287109375,
      "model_forward_time": 0.11491560935974121,
      "step": 28327
    },
    {
      "epoch": 0.000172894287109375,
      "step": 28327,
      "training_step_time": 0.43389368057250977
    },
    {
      "epoch": 0.000172900390625,
      "model_forward_time": 0.1153872013092041,
      "step": 28328
    },
    {
      "epoch": 0.000172900390625,
      "step": 28328,
      "training_step_time": 0.4483671188354492
    },
    {
      "epoch": 0.000172906494140625,
      "model_forward_time": 0.1151895523071289,
      "step": 28329
    },
    {
      "epoch": 0.000172906494140625,
      "step": 28329,
      "training_step_time": 0.39429569244384766
    },
    {
      "epoch": 0.00017291259765625,
      "grad_norm": 0.1286923885345459,
      "learning_rate": 5.869145512507872e-05,
      "loss": 0.0489,
      "step": 28330
    },
    {
      "epoch": 0.00017291259765625,
      "model_forward_time": 0.11532711982727051,
      "step": 28330
    },
    {
      "epoch": 0.00017291259765625,
      "step": 28330,
      "training_step_time": 0.3794209957122803
    },
    {
      "epoch": 0.000172918701171875,
      "model_forward_time": 0.1154787540435791,
      "step": 28331
    },
    {
      "epoch": 0.000172918701171875,
      "step": 28331,
      "training_step_time": 0.3942990303039551
    },
    {
      "epoch": 0.0001729248046875,
      "model_forward_time": 0.11503839492797852,
      "step": 28332
    },
    {
      "epoch": 0.0001729248046875,
      "step": 28332,
      "training_step_time": 0.40437984466552734
    },
    {
      "epoch": 0.000172930908203125,
      "model_forward_time": 0.11531710624694824,
      "step": 28333
    },
    {
      "epoch": 0.000172930908203125,
      "step": 28333,
      "training_step_time": 0.4995424747467041
    },
    {
      "epoch": 0.00017293701171875,
      "model_forward_time": 0.11670136451721191,
      "step": 28334
    },
    {
      "epoch": 0.00017293701171875,
      "step": 28334,
      "training_step_time": 0.39980292320251465
    },
    {
      "epoch": 0.000172943115234375,
      "model_forward_time": 0.1148061752319336,
      "step": 28335
    },
    {
      "epoch": 0.000172943115234375,
      "step": 28335,
      "training_step_time": 0.49381542205810547
    },
    {
      "epoch": 0.00017294921875,
      "model_forward_time": 0.11543655395507812,
      "step": 28336
    },
    {
      "epoch": 0.00017294921875,
      "step": 28336,
      "training_step_time": 0.39123988151550293
    },
    {
      "epoch": 0.000172955322265625,
      "model_forward_time": 0.11494255065917969,
      "step": 28337
    },
    {
      "epoch": 0.000172955322265625,
      "step": 28337,
      "training_step_time": 0.3922250270843506
    },
    {
      "epoch": 0.00017296142578125,
      "model_forward_time": 0.11474394798278809,
      "step": 28338
    },
    {
      "epoch": 0.00017296142578125,
      "step": 28338,
      "training_step_time": 0.3986349105834961
    },
    {
      "epoch": 0.000172967529296875,
      "model_forward_time": 0.11519002914428711,
      "step": 28339
    },
    {
      "epoch": 0.000172967529296875,
      "step": 28339,
      "training_step_time": 0.39653515815734863
    },
    {
      "epoch": 0.0001729736328125,
      "grad_norm": 0.17038846015930176,
      "learning_rate": 5.866431552102526e-05,
      "loss": 0.0456,
      "step": 28340
    },
    {
      "epoch": 0.0001729736328125,
      "model_forward_time": 0.11509919166564941,
      "step": 28340
    },
    {
      "epoch": 0.0001729736328125,
      "step": 28340,
      "training_step_time": 0.3981952667236328
    },
    {
      "epoch": 0.000172979736328125,
      "model_forward_time": 0.11560177803039551,
      "step": 28341
    },
    {
      "epoch": 0.000172979736328125,
      "step": 28341,
      "training_step_time": 0.49541139602661133
    },
    {
      "epoch": 0.00017298583984375,
      "model_forward_time": 0.11515092849731445,
      "step": 28342
    },
    {
      "epoch": 0.00017298583984375,
      "step": 28342,
      "training_step_time": 0.4402589797973633
    },
    {
      "epoch": 0.000172991943359375,
      "model_forward_time": 0.11501336097717285,
      "step": 28343
    },
    {
      "epoch": 0.000172991943359375,
      "step": 28343,
      "training_step_time": 0.4126875400543213
    },
    {
      "epoch": 0.000172998046875,
      "model_forward_time": 0.11527085304260254,
      "step": 28344
    },
    {
      "epoch": 0.000172998046875,
      "step": 28344,
      "training_step_time": 0.39500904083251953
    },
    {
      "epoch": 0.000173004150390625,
      "model_forward_time": 0.11511063575744629,
      "step": 28345
    },
    {
      "epoch": 0.000173004150390625,
      "step": 28345,
      "training_step_time": 0.4004685878753662
    },
    {
      "epoch": 0.00017301025390625,
      "model_forward_time": 0.11476373672485352,
      "step": 28346
    },
    {
      "epoch": 0.00017301025390625,
      "step": 28346,
      "training_step_time": 0.45171499252319336
    },
    {
      "epoch": 0.000173016357421875,
      "model_forward_time": 0.11529302597045898,
      "step": 28347
    },
    {
      "epoch": 0.000173016357421875,
      "step": 28347,
      "training_step_time": 0.7614672183990479
    },
    {
      "epoch": 0.0001730224609375,
      "model_forward_time": 0.11441373825073242,
      "step": 28348
    },
    {
      "epoch": 0.0001730224609375,
      "step": 28348,
      "training_step_time": 0.4144320487976074
    },
    {
      "epoch": 0.000173028564453125,
      "model_forward_time": 0.11533975601196289,
      "step": 28349
    },
    {
      "epoch": 0.000173028564453125,
      "step": 28349,
      "training_step_time": 0.40417933464050293
    },
    {
      "epoch": 0.00017303466796875,
      "grad_norm": 0.16173091530799866,
      "learning_rate": 5.8637173284981526e-05,
      "loss": 0.0463,
      "step": 28350
    },
    {
      "epoch": 0.00017303466796875,
      "model_forward_time": 0.11402177810668945,
      "step": 28350
    },
    {
      "epoch": 0.00017303466796875,
      "step": 28350,
      "training_step_time": 0.40112948417663574
    },
    {
      "epoch": 0.000173040771484375,
      "model_forward_time": 0.11473655700683594,
      "step": 28351
    },
    {
      "epoch": 0.000173040771484375,
      "step": 28351,
      "training_step_time": 0.39241981506347656
    },
    {
      "epoch": 0.000173046875,
      "model_forward_time": 0.11408209800720215,
      "step": 28352
    },
    {
      "epoch": 0.000173046875,
      "step": 28352,
      "training_step_time": 0.38306641578674316
    },
    {
      "epoch": 0.000173052978515625,
      "model_forward_time": 0.11484408378601074,
      "step": 28353
    },
    {
      "epoch": 0.000173052978515625,
      "step": 28353,
      "training_step_time": 0.39380311965942383
    },
    {
      "epoch": 0.00017305908203125,
      "model_forward_time": 0.11512589454650879,
      "step": 28354
    },
    {
      "epoch": 0.00017305908203125,
      "step": 28354,
      "training_step_time": 0.4447498321533203
    },
    {
      "epoch": 0.000173065185546875,
      "model_forward_time": 0.1149437427520752,
      "step": 28355
    },
    {
      "epoch": 0.000173065185546875,
      "step": 28355,
      "training_step_time": 0.4803586006164551
    },
    {
      "epoch": 0.0001730712890625,
      "model_forward_time": 0.11506056785583496,
      "step": 28356
    },
    {
      "epoch": 0.0001730712890625,
      "step": 28356,
      "training_step_time": 0.4117922782897949
    },
    {
      "epoch": 0.000173077392578125,
      "model_forward_time": 0.11567354202270508,
      "step": 28357
    },
    {
      "epoch": 0.000173077392578125,
      "step": 28357,
      "training_step_time": 0.48265719413757324
    },
    {
      "epoch": 0.00017308349609375,
      "model_forward_time": 0.1150212287902832,
      "step": 28358
    },
    {
      "epoch": 0.00017308349609375,
      "step": 28358,
      "training_step_time": 0.39966726303100586
    },
    {
      "epoch": 0.000173089599609375,
      "model_forward_time": 0.11512494087219238,
      "step": 28359
    },
    {
      "epoch": 0.000173089599609375,
      "step": 28359,
      "training_step_time": 0.396054744720459
    },
    {
      "epoch": 0.000173095703125,
      "grad_norm": 0.1649608165025711,
      "learning_rate": 5.861002842519259e-05,
      "loss": 0.0427,
      "step": 28360
    },
    {
      "epoch": 0.000173095703125,
      "model_forward_time": 0.11519360542297363,
      "step": 28360
    },
    {
      "epoch": 0.000173095703125,
      "step": 28360,
      "training_step_time": 0.43006014823913574
    },
    {
      "epoch": 0.000173101806640625,
      "model_forward_time": 0.11544680595397949,
      "step": 28361
    },
    {
      "epoch": 0.000173101806640625,
      "step": 28361,
      "training_step_time": 0.4266164302825928
    },
    {
      "epoch": 0.00017310791015625,
      "model_forward_time": 0.11455202102661133,
      "step": 28362
    },
    {
      "epoch": 0.00017310791015625,
      "step": 28362,
      "training_step_time": 0.41048765182495117
    },
    {
      "epoch": 0.000173114013671875,
      "model_forward_time": 0.1151728630065918,
      "step": 28363
    },
    {
      "epoch": 0.000173114013671875,
      "step": 28363,
      "training_step_time": 0.41005444526672363
    },
    {
      "epoch": 0.0001731201171875,
      "model_forward_time": 0.11542844772338867,
      "step": 28364
    },
    {
      "epoch": 0.0001731201171875,
      "step": 28364,
      "training_step_time": 0.39257311820983887
    },
    {
      "epoch": 0.000173126220703125,
      "model_forward_time": 0.11545872688293457,
      "step": 28365
    },
    {
      "epoch": 0.000173126220703125,
      "step": 28365,
      "training_step_time": 0.5533967018127441
    },
    {
      "epoch": 0.00017313232421875,
      "model_forward_time": 0.11553812026977539,
      "step": 28366
    },
    {
      "epoch": 0.00017313232421875,
      "step": 28366,
      "training_step_time": 0.39737915992736816
    },
    {
      "epoch": 0.000173138427734375,
      "model_forward_time": 0.11507058143615723,
      "step": 28367
    },
    {
      "epoch": 0.000173138427734375,
      "step": 28367,
      "training_step_time": 0.39066457748413086
    },
    {
      "epoch": 0.00017314453125,
      "model_forward_time": 0.11567187309265137,
      "step": 28368
    },
    {
      "epoch": 0.00017314453125,
      "step": 28368,
      "training_step_time": 0.39519667625427246
    },
    {
      "epoch": 0.000173150634765625,
      "model_forward_time": 0.11543393135070801,
      "step": 28369
    },
    {
      "epoch": 0.000173150634765625,
      "step": 28369,
      "training_step_time": 0.405717134475708
    },
    {
      "epoch": 0.00017315673828125,
      "grad_norm": 0.210051029920578,
      "learning_rate": 5.858288094990436e-05,
      "loss": 0.0463,
      "step": 28370
    },
    {
      "epoch": 0.00017315673828125,
      "model_forward_time": 0.11506366729736328,
      "step": 28370
    },
    {
      "epoch": 0.00017315673828125,
      "step": 28370,
      "training_step_time": 0.3658936023712158
    },
    {
      "epoch": 0.000173162841796875,
      "model_forward_time": 0.11618447303771973,
      "step": 28371
    },
    {
      "epoch": 0.000173162841796875,
      "step": 28371,
      "training_step_time": 0.45174717903137207
    },
    {
      "epoch": 0.0001731689453125,
      "model_forward_time": 0.11545991897583008,
      "step": 28372
    },
    {
      "epoch": 0.0001731689453125,
      "step": 28372,
      "training_step_time": 0.4087793827056885
    },
    {
      "epoch": 0.000173175048828125,
      "model_forward_time": 0.11521267890930176,
      "step": 28373
    },
    {
      "epoch": 0.000173175048828125,
      "step": 28373,
      "training_step_time": 0.39249706268310547
    },
    {
      "epoch": 0.00017318115234375,
      "model_forward_time": 0.11556220054626465,
      "step": 28374
    },
    {
      "epoch": 0.00017318115234375,
      "step": 28374,
      "training_step_time": 0.4116837978363037
    },
    {
      "epoch": 0.000173187255859375,
      "model_forward_time": 0.11540627479553223,
      "step": 28375
    },
    {
      "epoch": 0.000173187255859375,
      "step": 28375,
      "training_step_time": 0.42076969146728516
    },
    {
      "epoch": 0.000173193359375,
      "model_forward_time": 0.11572074890136719,
      "step": 28376
    },
    {
      "epoch": 0.000173193359375,
      "step": 28376,
      "training_step_time": 0.3914608955383301
    },
    {
      "epoch": 0.000173199462890625,
      "model_forward_time": 0.11558866500854492,
      "step": 28377
    },
    {
      "epoch": 0.000173199462890625,
      "step": 28377,
      "training_step_time": 0.7369430065155029
    },
    {
      "epoch": 0.00017320556640625,
      "model_forward_time": 0.11446452140808105,
      "step": 28378
    },
    {
      "epoch": 0.00017320556640625,
      "step": 28378,
      "training_step_time": 0.38454389572143555
    },
    {
      "epoch": 0.000173211669921875,
      "model_forward_time": 0.11538910865783691,
      "step": 28379
    },
    {
      "epoch": 0.000173211669921875,
      "step": 28379,
      "training_step_time": 0.3790576457977295
    },
    {
      "epoch": 0.0001732177734375,
      "grad_norm": 0.13934451341629028,
      "learning_rate": 5.85557308673635e-05,
      "loss": 0.0457,
      "step": 28380
    },
    {
      "epoch": 0.0001732177734375,
      "model_forward_time": 0.11516666412353516,
      "step": 28380
    },
    {
      "epoch": 0.0001732177734375,
      "step": 28380,
      "training_step_time": 0.39306068420410156
    },
    {
      "epoch": 0.000173223876953125,
      "model_forward_time": 0.11385846138000488,
      "step": 28381
    },
    {
      "epoch": 0.000173223876953125,
      "step": 28381,
      "training_step_time": 0.41153669357299805
    },
    {
      "epoch": 0.00017322998046875,
      "model_forward_time": 0.11517882347106934,
      "step": 28382
    },
    {
      "epoch": 0.00017322998046875,
      "step": 28382,
      "training_step_time": 0.40337133407592773
    },
    {
      "epoch": 0.000173236083984375,
      "model_forward_time": 0.11546587944030762,
      "step": 28383
    },
    {
      "epoch": 0.000173236083984375,
      "step": 28383,
      "training_step_time": 0.6554558277130127
    },
    {
      "epoch": 0.0001732421875,
      "model_forward_time": 0.11543941497802734,
      "step": 28384
    },
    {
      "epoch": 0.0001732421875,
      "step": 28384,
      "training_step_time": 0.40436530113220215
    },
    {
      "epoch": 0.000173248291015625,
      "model_forward_time": 0.11587285995483398,
      "step": 28385
    },
    {
      "epoch": 0.000173248291015625,
      "step": 28385,
      "training_step_time": 0.485426664352417
    },
    {
      "epoch": 0.00017325439453125,
      "model_forward_time": 0.11478161811828613,
      "step": 28386
    },
    {
      "epoch": 0.00017325439453125,
      "step": 28386,
      "training_step_time": 0.40843701362609863
    },
    {
      "epoch": 0.000173260498046875,
      "model_forward_time": 0.11490035057067871,
      "step": 28387
    },
    {
      "epoch": 0.000173260498046875,
      "step": 28387,
      "training_step_time": 0.3900868892669678
    },
    {
      "epoch": 0.0001732666015625,
      "model_forward_time": 0.11500167846679688,
      "step": 28388
    },
    {
      "epoch": 0.0001732666015625,
      "step": 28388,
      "training_step_time": 0.39128780364990234
    },
    {
      "epoch": 0.000173272705078125,
      "model_forward_time": 0.1155557632446289,
      "step": 28389
    },
    {
      "epoch": 0.000173272705078125,
      "step": 28389,
      "training_step_time": 0.5791997909545898
    },
    {
      "epoch": 0.00017327880859375,
      "grad_norm": 0.13350211083889008,
      "learning_rate": 5.8528578185817514e-05,
      "loss": 0.0434,
      "step": 28390
    },
    {
      "epoch": 0.00017327880859375,
      "model_forward_time": 0.11531853675842285,
      "step": 28390
    },
    {
      "epoch": 0.00017327880859375,
      "step": 28390,
      "training_step_time": 0.386523962020874
    },
    {
      "epoch": 0.000173284912109375,
      "model_forward_time": 0.11430573463439941,
      "step": 28391
    },
    {
      "epoch": 0.000173284912109375,
      "step": 28391,
      "training_step_time": 0.39316272735595703
    },
    {
      "epoch": 0.000173291015625,
      "model_forward_time": 0.11517071723937988,
      "step": 28392
    },
    {
      "epoch": 0.000173291015625,
      "step": 28392,
      "training_step_time": 0.3940601348876953
    },
    {
      "epoch": 0.000173297119140625,
      "model_forward_time": 0.11639642715454102,
      "step": 28393
    },
    {
      "epoch": 0.000173297119140625,
      "step": 28393,
      "training_step_time": 0.4079282283782959
    },
    {
      "epoch": 0.00017330322265625,
      "model_forward_time": 0.11520504951477051,
      "step": 28394
    },
    {
      "epoch": 0.00017330322265625,
      "step": 28394,
      "training_step_time": 0.3877604007720947
    },
    {
      "epoch": 0.000173309326171875,
      "model_forward_time": 0.11552309989929199,
      "step": 28395
    },
    {
      "epoch": 0.000173309326171875,
      "step": 28395,
      "training_step_time": 0.5893528461456299
    },
    {
      "epoch": 0.0001733154296875,
      "model_forward_time": 0.11492681503295898,
      "step": 28396
    },
    {
      "epoch": 0.0001733154296875,
      "step": 28396,
      "training_step_time": 0.38994479179382324
    },
    {
      "epoch": 0.000173321533203125,
      "model_forward_time": 0.11549806594848633,
      "step": 28397
    },
    {
      "epoch": 0.000173321533203125,
      "step": 28397,
      "training_step_time": 0.42718076705932617
    },
    {
      "epoch": 0.00017332763671875,
      "model_forward_time": 0.11548066139221191,
      "step": 28398
    },
    {
      "epoch": 0.00017332763671875,
      "step": 28398,
      "training_step_time": 0.45573973655700684
    },
    {
      "epoch": 0.000173333740234375,
      "model_forward_time": 0.11520648002624512,
      "step": 28399
    },
    {
      "epoch": 0.000173333740234375,
      "step": 28399,
      "training_step_time": 0.42111730575561523
    },
    {
      "epoch": 0.00017333984375,
      "grad_norm": 0.1174938976764679,
      "learning_rate": 5.850142291351466e-05,
      "loss": 0.045,
      "step": 28400
    },
    {
      "epoch": 0.00017333984375,
      "model_forward_time": 0.11462783813476562,
      "step": 28400
    },
    {
      "epoch": 0.00017333984375,
      "step": 28400,
      "training_step_time": 0.4485480785369873
    },
    {
      "epoch": 0.000173345947265625,
      "model_forward_time": 0.1188511848449707,
      "step": 28401
    },
    {
      "epoch": 0.000173345947265625,
      "step": 28401,
      "training_step_time": 0.38671398162841797
    },
    {
      "epoch": 0.00017335205078125,
      "model_forward_time": 0.11765027046203613,
      "step": 28402
    },
    {
      "epoch": 0.00017335205078125,
      "step": 28402,
      "training_step_time": 0.402681827545166
    },
    {
      "epoch": 0.000173358154296875,
      "model_forward_time": 0.11855340003967285,
      "step": 28403
    },
    {
      "epoch": 0.000173358154296875,
      "step": 28403,
      "training_step_time": 0.39433813095092773
    },
    {
      "epoch": 0.0001733642578125,
      "model_forward_time": 0.11795544624328613,
      "step": 28404
    },
    {
      "epoch": 0.0001733642578125,
      "step": 28404,
      "training_step_time": 0.3931007385253906
    },
    {
      "epoch": 0.000173370361328125,
      "model_forward_time": 0.11805367469787598,
      "step": 28405
    },
    {
      "epoch": 0.000173370361328125,
      "step": 28405,
      "training_step_time": 0.3841981887817383
    },
    {
      "epoch": 0.00017337646484375,
      "model_forward_time": 0.11606144905090332,
      "step": 28406
    },
    {
      "epoch": 0.00017337646484375,
      "step": 28406,
      "training_step_time": 0.3952047824859619
    },
    {
      "epoch": 0.000173382568359375,
      "model_forward_time": 0.11532139778137207,
      "step": 28407
    },
    {
      "epoch": 0.000173382568359375,
      "step": 28407,
      "training_step_time": 0.6853926181793213
    },
    {
      "epoch": 0.000173388671875,
      "model_forward_time": 0.11509346961975098,
      "step": 28408
    },
    {
      "epoch": 0.000173388671875,
      "step": 28408,
      "training_step_time": 0.383991003036499
    },
    {
      "epoch": 0.000173394775390625,
      "model_forward_time": 0.11517739295959473,
      "step": 28409
    },
    {
      "epoch": 0.000173394775390625,
      "step": 28409,
      "training_step_time": 0.3908205032348633
    },
    {
      "epoch": 0.00017340087890625,
      "grad_norm": 0.1899152547121048,
      "learning_rate": 5.847426505870399e-05,
      "loss": 0.05,
      "step": 28410
    },
    {
      "epoch": 0.00017340087890625,
      "model_forward_time": 0.1156005859375,
      "step": 28410
    },
    {
      "epoch": 0.00017340087890625,
      "step": 28410,
      "training_step_time": 0.3877115249633789
    },
    {
      "epoch": 0.000173406982421875,
      "model_forward_time": 0.11531400680541992,
      "step": 28411
    },
    {
      "epoch": 0.000173406982421875,
      "step": 28411,
      "training_step_time": 0.45258259773254395
    },
    {
      "epoch": 0.0001734130859375,
      "model_forward_time": 0.11498308181762695,
      "step": 28412
    },
    {
      "epoch": 0.0001734130859375,
      "step": 28412,
      "training_step_time": 0.38819384574890137
    },
    {
      "epoch": 0.000173419189453125,
      "model_forward_time": 0.1150810718536377,
      "step": 28413
    },
    {
      "epoch": 0.000173419189453125,
      "step": 28413,
      "training_step_time": 0.5712010860443115
    },
    {
      "epoch": 0.00017342529296875,
      "model_forward_time": 0.11481475830078125,
      "step": 28414
    },
    {
      "epoch": 0.00017342529296875,
      "step": 28414,
      "training_step_time": 0.4910581111907959
    },
    {
      "epoch": 0.000173431396484375,
      "model_forward_time": 0.11741089820861816,
      "step": 28415
    },
    {
      "epoch": 0.000173431396484375,
      "step": 28415,
      "training_step_time": 0.4155607223510742
    },
    {
      "epoch": 0.0001734375,
      "model_forward_time": 0.11885571479797363,
      "step": 28416
    },
    {
      "epoch": 0.0001734375,
      "step": 28416,
      "training_step_time": 0.395369291305542
    },
    {
      "epoch": 0.000173443603515625,
      "model_forward_time": 0.11517834663391113,
      "step": 28417
    },
    {
      "epoch": 0.000173443603515625,
      "step": 28417,
      "training_step_time": 0.4472529888153076
    },
    {
      "epoch": 0.00017344970703125,
      "model_forward_time": 0.11603116989135742,
      "step": 28418
    },
    {
      "epoch": 0.00017344970703125,
      "step": 28418,
      "training_step_time": 0.39513468742370605
    },
    {
      "epoch": 0.000173455810546875,
      "model_forward_time": 0.1148066520690918,
      "step": 28419
    },
    {
      "epoch": 0.000173455810546875,
      "step": 28419,
      "training_step_time": 0.39243340492248535
    },
    {
      "epoch": 0.0001734619140625,
      "grad_norm": 0.1464206427335739,
      "learning_rate": 5.8447104629635344e-05,
      "loss": 0.046,
      "step": 28420
    },
    {
      "epoch": 0.0001734619140625,
      "model_forward_time": 0.11475491523742676,
      "step": 28420
    },
    {
      "epoch": 0.0001734619140625,
      "step": 28420,
      "training_step_time": 0.40278124809265137
    },
    {
      "epoch": 0.000173468017578125,
      "model_forward_time": 0.1157066822052002,
      "step": 28421
    },
    {
      "epoch": 0.000173468017578125,
      "step": 28421,
      "training_step_time": 0.393174409866333
    },
    {
      "epoch": 0.00017347412109375,
      "model_forward_time": 0.11514925956726074,
      "step": 28422
    },
    {
      "epoch": 0.00017347412109375,
      "step": 28422,
      "training_step_time": 0.3929588794708252
    },
    {
      "epoch": 0.000173480224609375,
      "model_forward_time": 0.11533021926879883,
      "step": 28423
    },
    {
      "epoch": 0.000173480224609375,
      "step": 28423,
      "training_step_time": 0.39464902877807617
    },
    {
      "epoch": 0.000173486328125,
      "model_forward_time": 0.11751437187194824,
      "step": 28424
    },
    {
      "epoch": 0.000173486328125,
      "step": 28424,
      "training_step_time": 0.39081335067749023
    },
    {
      "epoch": 0.000173492431640625,
      "model_forward_time": 0.1155240535736084,
      "step": 28425
    },
    {
      "epoch": 0.000173492431640625,
      "step": 28425,
      "training_step_time": 0.5605006217956543
    },
    {
      "epoch": 0.00017349853515625,
      "model_forward_time": 0.11508297920227051,
      "step": 28426
    },
    {
      "epoch": 0.00017349853515625,
      "step": 28426,
      "training_step_time": 0.3661983013153076
    },
    {
      "epoch": 0.000173504638671875,
      "model_forward_time": 0.11592626571655273,
      "step": 28427
    },
    {
      "epoch": 0.000173504638671875,
      "step": 28427,
      "training_step_time": 0.48342108726501465
    },
    {
      "epoch": 0.0001735107421875,
      "model_forward_time": 0.11455678939819336,
      "step": 28428
    },
    {
      "epoch": 0.0001735107421875,
      "step": 28428,
      "training_step_time": 0.40796875953674316
    },
    {
      "epoch": 0.000173516845703125,
      "model_forward_time": 0.11491084098815918,
      "step": 28429
    },
    {
      "epoch": 0.000173516845703125,
      "step": 28429,
      "training_step_time": 0.4017157554626465
    },
    {
      "epoch": 0.00017352294921875,
      "grad_norm": 0.22331370413303375,
      "learning_rate": 5.841994163455934e-05,
      "loss": 0.0445,
      "step": 28430
    },
    {
      "epoch": 0.00017352294921875,
      "model_forward_time": 0.11646485328674316,
      "step": 28430
    },
    {
      "epoch": 0.00017352294921875,
      "step": 28430,
      "training_step_time": 0.44103240966796875
    },
    {
      "epoch": 0.000173529052734375,
      "model_forward_time": 0.11629819869995117,
      "step": 28431
    },
    {
      "epoch": 0.000173529052734375,
      "step": 28431,
      "training_step_time": 0.47211623191833496
    },
    {
      "epoch": 0.00017353515625,
      "model_forward_time": 0.11515069007873535,
      "step": 28432
    },
    {
      "epoch": 0.00017353515625,
      "step": 28432,
      "training_step_time": 0.3960764408111572
    },
    {
      "epoch": 0.000173541259765625,
      "model_forward_time": 0.11533260345458984,
      "step": 28433
    },
    {
      "epoch": 0.000173541259765625,
      "step": 28433,
      "training_step_time": 0.3837313652038574
    },
    {
      "epoch": 0.00017354736328125,
      "model_forward_time": 0.11523675918579102,
      "step": 28434
    },
    {
      "epoch": 0.00017354736328125,
      "step": 28434,
      "training_step_time": 0.386523962020874
    },
    {
      "epoch": 0.000173553466796875,
      "model_forward_time": 0.11543917655944824,
      "step": 28435
    },
    {
      "epoch": 0.000173553466796875,
      "step": 28435,
      "training_step_time": 0.42406201362609863
    },
    {
      "epoch": 0.0001735595703125,
      "model_forward_time": 0.11539053916931152,
      "step": 28436
    },
    {
      "epoch": 0.0001735595703125,
      "step": 28436,
      "training_step_time": 0.399965763092041
    },
    {
      "epoch": 0.000173565673828125,
      "model_forward_time": 0.11543488502502441,
      "step": 28437
    },
    {
      "epoch": 0.000173565673828125,
      "step": 28437,
      "training_step_time": 0.798865795135498
    },
    {
      "epoch": 0.00017357177734375,
      "model_forward_time": 0.11514496803283691,
      "step": 28438
    },
    {
      "epoch": 0.00017357177734375,
      "step": 28438,
      "training_step_time": 0.4063255786895752
    },
    {
      "epoch": 0.000173577880859375,
      "model_forward_time": 0.11468887329101562,
      "step": 28439
    },
    {
      "epoch": 0.000173577880859375,
      "step": 28439,
      "training_step_time": 0.4206068515777588
    },
    {
      "epoch": 0.000173583984375,
      "grad_norm": 0.14419934153556824,
      "learning_rate": 5.8392776081727385e-05,
      "loss": 0.0495,
      "step": 28440
    },
    {
      "epoch": 0.000173583984375,
      "model_forward_time": 0.11463189125061035,
      "step": 28440
    },
    {
      "epoch": 0.000173583984375,
      "step": 28440,
      "training_step_time": 0.4562652111053467
    },
    {
      "epoch": 0.000173590087890625,
      "model_forward_time": 0.1146693229675293,
      "step": 28441
    },
    {
      "epoch": 0.000173590087890625,
      "step": 28441,
      "training_step_time": 0.46573519706726074
    },
    {
      "epoch": 0.00017359619140625,
      "model_forward_time": 0.1145937442779541,
      "step": 28442
    },
    {
      "epoch": 0.00017359619140625,
      "step": 28442,
      "training_step_time": 0.4157850742340088
    },
    {
      "epoch": 0.000173602294921875,
      "model_forward_time": 0.11525368690490723,
      "step": 28443
    },
    {
      "epoch": 0.000173602294921875,
      "step": 28443,
      "training_step_time": 0.42223572731018066
    },
    {
      "epoch": 0.0001736083984375,
      "model_forward_time": 0.11556482315063477,
      "step": 28444
    },
    {
      "epoch": 0.0001736083984375,
      "step": 28444,
      "training_step_time": 0.477703332901001
    },
    {
      "epoch": 0.000173614501953125,
      "model_forward_time": 0.11480927467346191,
      "step": 28445
    },
    {
      "epoch": 0.000173614501953125,
      "step": 28445,
      "training_step_time": 0.39681529998779297
    },
    {
      "epoch": 0.00017362060546875,
      "model_forward_time": 0.1153721809387207,
      "step": 28446
    },
    {
      "epoch": 0.00017362060546875,
      "step": 28446,
      "training_step_time": 0.3926539421081543
    },
    {
      "epoch": 0.000173626708984375,
      "model_forward_time": 0.11681389808654785,
      "step": 28447
    },
    {
      "epoch": 0.000173626708984375,
      "step": 28447,
      "training_step_time": 0.3864278793334961
    },
    {
      "epoch": 0.0001736328125,
      "model_forward_time": 0.11490273475646973,
      "step": 28448
    },
    {
      "epoch": 0.0001736328125,
      "step": 28448,
      "training_step_time": 0.3885815143585205
    },
    {
      "epoch": 0.000173638916015625,
      "model_forward_time": 0.11519861221313477,
      "step": 28449
    },
    {
      "epoch": 0.000173638916015625,
      "step": 28449,
      "training_step_time": 0.39130330085754395
    },
    {
      "epoch": 0.00017364501953125,
      "grad_norm": 0.15284407138824463,
      "learning_rate": 5.8365607979391666e-05,
      "loss": 0.0432,
      "step": 28450
    },
    {
      "epoch": 0.00017364501953125,
      "model_forward_time": 0.11544108390808105,
      "step": 28450
    },
    {
      "epoch": 0.00017364501953125,
      "step": 28450,
      "training_step_time": 0.4049670696258545
    },
    {
      "epoch": 0.000173651123046875,
      "model_forward_time": 0.11557149887084961,
      "step": 28451
    },
    {
      "epoch": 0.000173651123046875,
      "step": 28451,
      "training_step_time": 0.40435004234313965
    },
    {
      "epoch": 0.0001736572265625,
      "model_forward_time": 0.11653971672058105,
      "step": 28452
    },
    {
      "epoch": 0.0001736572265625,
      "step": 28452,
      "training_step_time": 0.41484832763671875
    },
    {
      "epoch": 0.000173663330078125,
      "model_forward_time": 0.11571574211120605,
      "step": 28453
    },
    {
      "epoch": 0.000173663330078125,
      "step": 28453,
      "training_step_time": 0.4613075256347656
    },
    {
      "epoch": 0.00017366943359375,
      "model_forward_time": 0.11511731147766113,
      "step": 28454
    },
    {
      "epoch": 0.00017366943359375,
      "step": 28454,
      "training_step_time": 0.3678627014160156
    },
    {
      "epoch": 0.000173675537109375,
      "model_forward_time": 0.11454129219055176,
      "step": 28455
    },
    {
      "epoch": 0.000173675537109375,
      "step": 28455,
      "training_step_time": 0.42699742317199707
    },
    {
      "epoch": 0.000173681640625,
      "model_forward_time": 0.11511683464050293,
      "step": 28456
    },
    {
      "epoch": 0.000173681640625,
      "step": 28456,
      "training_step_time": 0.4946293830871582
    },
    {
      "epoch": 0.000173687744140625,
      "model_forward_time": 0.11538529396057129,
      "step": 28457
    },
    {
      "epoch": 0.000173687744140625,
      "step": 28457,
      "training_step_time": 0.515946626663208
    },
    {
      "epoch": 0.00017369384765625,
      "model_forward_time": 0.11469793319702148,
      "step": 28458
    },
    {
      "epoch": 0.00017369384765625,
      "step": 28458,
      "training_step_time": 0.4041578769683838
    },
    {
      "epoch": 0.000173699951171875,
      "model_forward_time": 0.11513829231262207,
      "step": 28459
    },
    {
      "epoch": 0.000173699951171875,
      "step": 28459,
      "training_step_time": 0.4973726272583008
    },
    {
      "epoch": 0.0001737060546875,
      "grad_norm": 0.15343163907527924,
      "learning_rate": 5.833843733580512e-05,
      "loss": 0.0509,
      "step": 28460
    },
    {
      "epoch": 0.0001737060546875,
      "model_forward_time": 0.1143956184387207,
      "step": 28460
    },
    {
      "epoch": 0.0001737060546875,
      "step": 28460,
      "training_step_time": 0.3943037986755371
    },
    {
      "epoch": 0.000173712158203125,
      "model_forward_time": 0.11432957649230957,
      "step": 28461
    },
    {
      "epoch": 0.000173712158203125,
      "step": 28461,
      "training_step_time": 0.38637351989746094
    },
    {
      "epoch": 0.00017371826171875,
      "model_forward_time": 0.11534357070922852,
      "step": 28462
    },
    {
      "epoch": 0.00017371826171875,
      "step": 28462,
      "training_step_time": 0.3933112621307373
    },
    {
      "epoch": 0.000173724365234375,
      "model_forward_time": 0.11526346206665039,
      "step": 28463
    },
    {
      "epoch": 0.000173724365234375,
      "step": 28463,
      "training_step_time": 0.3926072120666504
    },
    {
      "epoch": 0.00017373046875,
      "model_forward_time": 0.11513328552246094,
      "step": 28464
    },
    {
      "epoch": 0.00017373046875,
      "step": 28464,
      "training_step_time": 0.39308619499206543
    },
    {
      "epoch": 0.000173736572265625,
      "model_forward_time": 0.11499738693237305,
      "step": 28465
    },
    {
      "epoch": 0.000173736572265625,
      "step": 28465,
      "training_step_time": 0.40789079666137695
    },
    {
      "epoch": 0.00017374267578125,
      "model_forward_time": 0.1153554916381836,
      "step": 28466
    },
    {
      "epoch": 0.00017374267578125,
      "step": 28466,
      "training_step_time": 0.39315104484558105
    },
    {
      "epoch": 0.000173748779296875,
      "model_forward_time": 0.11585855484008789,
      "step": 28467
    },
    {
      "epoch": 0.000173748779296875,
      "step": 28467,
      "training_step_time": 0.4371066093444824
    },
    {
      "epoch": 0.0001737548828125,
      "model_forward_time": 0.11544513702392578,
      "step": 28468
    },
    {
      "epoch": 0.0001737548828125,
      "step": 28468,
      "training_step_time": 0.40673279762268066
    },
    {
      "epoch": 0.000173760986328125,
      "model_forward_time": 0.11591315269470215,
      "step": 28469
    },
    {
      "epoch": 0.000173760986328125,
      "step": 28469,
      "training_step_time": 0.41762399673461914
    },
    {
      "epoch": 0.00017376708984375,
      "grad_norm": 0.14113079011440277,
      "learning_rate": 5.831126415922148e-05,
      "loss": 0.0443,
      "step": 28470
    },
    {
      "epoch": 0.00017376708984375,
      "model_forward_time": 0.11522769927978516,
      "step": 28470
    },
    {
      "epoch": 0.00017376708984375,
      "step": 28470,
      "training_step_time": 0.4045746326446533
    },
    {
      "epoch": 0.000173773193359375,
      "model_forward_time": 0.1149899959564209,
      "step": 28471
    },
    {
      "epoch": 0.000173773193359375,
      "step": 28471,
      "training_step_time": 0.406264066696167
    },
    {
      "epoch": 0.000173779296875,
      "model_forward_time": 0.11543107032775879,
      "step": 28472
    },
    {
      "epoch": 0.000173779296875,
      "step": 28472,
      "training_step_time": 0.44406723976135254
    },
    {
      "epoch": 0.000173785400390625,
      "model_forward_time": 0.11517453193664551,
      "step": 28473
    },
    {
      "epoch": 0.000173785400390625,
      "step": 28473,
      "training_step_time": 0.584958553314209
    },
    {
      "epoch": 0.00017379150390625,
      "model_forward_time": 0.1145322322845459,
      "step": 28474
    },
    {
      "epoch": 0.00017379150390625,
      "step": 28474,
      "training_step_time": 0.3885767459869385
    },
    {
      "epoch": 0.000173797607421875,
      "model_forward_time": 0.11526608467102051,
      "step": 28475
    },
    {
      "epoch": 0.000173797607421875,
      "step": 28475,
      "training_step_time": 0.3901021480560303
    },
    {
      "epoch": 0.0001738037109375,
      "model_forward_time": 0.1154787540435791,
      "step": 28476
    },
    {
      "epoch": 0.0001738037109375,
      "step": 28476,
      "training_step_time": 0.4177560806274414
    },
    {
      "epoch": 0.000173809814453125,
      "model_forward_time": 0.11476731300354004,
      "step": 28477
    },
    {
      "epoch": 0.000173809814453125,
      "step": 28477,
      "training_step_time": 0.3962557315826416
    },
    {
      "epoch": 0.00017381591796875,
      "model_forward_time": 0.11485958099365234,
      "step": 28478
    },
    {
      "epoch": 0.00017381591796875,
      "step": 28478,
      "training_step_time": 0.39241790771484375
    },
    {
      "epoch": 0.000173822021484375,
      "model_forward_time": 0.11516118049621582,
      "step": 28479
    },
    {
      "epoch": 0.000173822021484375,
      "step": 28479,
      "training_step_time": 0.48575401306152344
    },
    {
      "epoch": 0.000173828125,
      "grad_norm": 0.14904263615608215,
      "learning_rate": 5.828408845789522e-05,
      "loss": 0.0478,
      "step": 28480
    },
    {
      "epoch": 0.000173828125,
      "model_forward_time": 0.11446142196655273,
      "step": 28480
    },
    {
      "epoch": 0.000173828125,
      "step": 28480,
      "training_step_time": 0.39046597480773926
    },
    {
      "epoch": 0.000173834228515625,
      "model_forward_time": 0.11582350730895996,
      "step": 28481
    },
    {
      "epoch": 0.000173834228515625,
      "step": 28481,
      "training_step_time": 0.4050102233886719
    },
    {
      "epoch": 0.00017384033203125,
      "model_forward_time": 0.11489033699035645,
      "step": 28482
    },
    {
      "epoch": 0.00017384033203125,
      "step": 28482,
      "training_step_time": 0.4352719783782959
    },
    {
      "epoch": 0.000173846435546875,
      "model_forward_time": 0.1153717041015625,
      "step": 28483
    },
    {
      "epoch": 0.000173846435546875,
      "step": 28483,
      "training_step_time": 0.3654506206512451
    },
    {
      "epoch": 0.0001738525390625,
      "model_forward_time": 0.11500120162963867,
      "step": 28484
    },
    {
      "epoch": 0.0001738525390625,
      "step": 28484,
      "training_step_time": 0.4535388946533203
    },
    {
      "epoch": 0.000173858642578125,
      "model_forward_time": 0.11508393287658691,
      "step": 28485
    },
    {
      "epoch": 0.000173858642578125,
      "step": 28485,
      "training_step_time": 0.45476841926574707
    },
    {
      "epoch": 0.00017386474609375,
      "model_forward_time": 0.11449003219604492,
      "step": 28486
    },
    {
      "epoch": 0.00017386474609375,
      "step": 28486,
      "training_step_time": 0.4133744239807129
    },
    {
      "epoch": 0.000173870849609375,
      "model_forward_time": 0.11457037925720215,
      "step": 28487
    },
    {
      "epoch": 0.000173870849609375,
      "step": 28487,
      "training_step_time": 0.4167823791503906
    },
    {
      "epoch": 0.000173876953125,
      "model_forward_time": 0.1151723861694336,
      "step": 28488
    },
    {
      "epoch": 0.000173876953125,
      "step": 28488,
      "training_step_time": 0.3948554992675781
    },
    {
      "epoch": 0.000173883056640625,
      "model_forward_time": 0.11541867256164551,
      "step": 28489
    },
    {
      "epoch": 0.000173883056640625,
      "step": 28489,
      "training_step_time": 0.38950562477111816
    },
    {
      "epoch": 0.00017388916015625,
      "grad_norm": 0.08733385056257248,
      "learning_rate": 5.825691024008162e-05,
      "loss": 0.0505,
      "step": 28490
    },
    {
      "epoch": 0.00017388916015625,
      "model_forward_time": 0.11509299278259277,
      "step": 28490
    },
    {
      "epoch": 0.00017388916015625,
      "step": 28490,
      "training_step_time": 0.38807106018066406
    },
    {
      "epoch": 0.000173895263671875,
      "model_forward_time": 0.1150507926940918,
      "step": 28491
    },
    {
      "epoch": 0.000173895263671875,
      "step": 28491,
      "training_step_time": 0.5912163257598877
    },
    {
      "epoch": 0.0001739013671875,
      "model_forward_time": 0.11551189422607422,
      "step": 28492
    },
    {
      "epoch": 0.0001739013671875,
      "step": 28492,
      "training_step_time": 0.38907885551452637
    },
    {
      "epoch": 0.000173907470703125,
      "model_forward_time": 0.11490917205810547,
      "step": 28493
    },
    {
      "epoch": 0.000173907470703125,
      "step": 28493,
      "training_step_time": 0.38837552070617676
    },
    {
      "epoch": 0.00017391357421875,
      "model_forward_time": 0.1155858039855957,
      "step": 28494
    },
    {
      "epoch": 0.00017391357421875,
      "step": 28494,
      "training_step_time": 0.3910560607910156
    },
    {
      "epoch": 0.000173919677734375,
      "model_forward_time": 0.1151280403137207,
      "step": 28495
    },
    {
      "epoch": 0.000173919677734375,
      "step": 28495,
      "training_step_time": 0.3987617492675781
    },
    {
      "epoch": 0.00017392578125,
      "model_forward_time": 0.1150367259979248,
      "step": 28496
    },
    {
      "epoch": 0.00017392578125,
      "step": 28496,
      "training_step_time": 0.45052003860473633
    },
    {
      "epoch": 0.000173931884765625,
      "model_forward_time": 0.11445975303649902,
      "step": 28497
    },
    {
      "epoch": 0.000173931884765625,
      "step": 28497,
      "training_step_time": 0.6458320617675781
    },
    {
      "epoch": 0.00017393798828125,
      "model_forward_time": 0.11530423164367676,
      "step": 28498
    },
    {
      "epoch": 0.00017393798828125,
      "step": 28498,
      "training_step_time": 0.4122192859649658
    },
    {
      "epoch": 0.000173944091796875,
      "model_forward_time": 0.11495256423950195,
      "step": 28499
    },
    {
      "epoch": 0.000173944091796875,
      "step": 28499,
      "training_step_time": 0.4595351219177246
    },
    {
      "epoch": 0.0001739501953125,
      "grad_norm": 0.12925995886325836,
      "learning_rate": 5.8229729514036705e-05,
      "loss": 0.0464,
      "step": 28500
    },
    {
      "epoch": 0.0001739501953125,
      "model_forward_time": 0.11492228507995605,
      "step": 28500
    },
    {
      "epoch": 0.0001739501953125,
      "step": 28500,
      "training_step_time": 0.4142487049102783
    },
    {
      "epoch": 0.000173956298828125,
      "model_forward_time": 0.11480450630187988,
      "step": 28501
    },
    {
      "epoch": 0.000173956298828125,
      "step": 28501,
      "training_step_time": 0.405198335647583
    },
    {
      "epoch": 0.00017396240234375,
      "model_forward_time": 0.1151270866394043,
      "step": 28502
    },
    {
      "epoch": 0.00017396240234375,
      "step": 28502,
      "training_step_time": 0.39698314666748047
    },
    {
      "epoch": 0.000173968505859375,
      "model_forward_time": 0.11500406265258789,
      "step": 28503
    },
    {
      "epoch": 0.000173968505859375,
      "step": 28503,
      "training_step_time": 0.47707438468933105
    },
    {
      "epoch": 0.000173974609375,
      "model_forward_time": 0.11534261703491211,
      "step": 28504
    },
    {
      "epoch": 0.000173974609375,
      "step": 28504,
      "training_step_time": 0.39723753929138184
    },
    {
      "epoch": 0.000173980712890625,
      "model_forward_time": 0.11507892608642578,
      "step": 28505
    },
    {
      "epoch": 0.000173980712890625,
      "step": 28505,
      "training_step_time": 0.39015841484069824
    },
    {
      "epoch": 0.00017398681640625,
      "model_forward_time": 0.11453819274902344,
      "step": 28506
    },
    {
      "epoch": 0.00017398681640625,
      "step": 28506,
      "training_step_time": 0.39720582962036133
    },
    {
      "epoch": 0.000173992919921875,
      "model_forward_time": 0.11631298065185547,
      "step": 28507
    },
    {
      "epoch": 0.000173992919921875,
      "step": 28507,
      "training_step_time": 0.3998692035675049
    },
    {
      "epoch": 0.0001739990234375,
      "model_forward_time": 0.11503005027770996,
      "step": 28508
    },
    {
      "epoch": 0.0001739990234375,
      "step": 28508,
      "training_step_time": 0.38573312759399414
    },
    {
      "epoch": 0.000174005126953125,
      "model_forward_time": 0.11509037017822266,
      "step": 28509
    },
    {
      "epoch": 0.000174005126953125,
      "step": 28509,
      "training_step_time": 0.5757768154144287
    },
    {
      "epoch": 0.00017401123046875,
      "grad_norm": 0.1815137267112732,
      "learning_rate": 5.820254628801726e-05,
      "loss": 0.0422,
      "step": 28510
    },
    {
      "epoch": 0.00017401123046875,
      "model_forward_time": 0.11521768569946289,
      "step": 28510
    },
    {
      "epoch": 0.00017401123046875,
      "step": 28510,
      "training_step_time": 0.435319185256958
    },
    {
      "epoch": 0.000174017333984375,
      "model_forward_time": 0.11551642417907715,
      "step": 28511
    },
    {
      "epoch": 0.000174017333984375,
      "step": 28511,
      "training_step_time": 0.39173269271850586
    },
    {
      "epoch": 0.0001740234375,
      "model_forward_time": 0.11524415016174316,
      "step": 28512
    },
    {
      "epoch": 0.0001740234375,
      "step": 28512,
      "training_step_time": 0.4986908435821533
    },
    {
      "epoch": 0.000174029541015625,
      "model_forward_time": 0.11511492729187012,
      "step": 28513
    },
    {
      "epoch": 0.000174029541015625,
      "step": 28513,
      "training_step_time": 0.48950743675231934
    },
    {
      "epoch": 0.00017403564453125,
      "model_forward_time": 0.12072515487670898,
      "step": 28514
    },
    {
      "epoch": 0.00017403564453125,
      "step": 28514,
      "training_step_time": 0.4368159770965576
    },
    {
      "epoch": 0.000174041748046875,
      "model_forward_time": 0.11831092834472656,
      "step": 28515
    },
    {
      "epoch": 0.000174041748046875,
      "step": 28515,
      "training_step_time": 0.4715454578399658
    },
    {
      "epoch": 0.0001740478515625,
      "model_forward_time": 0.118896484375,
      "step": 28516
    },
    {
      "epoch": 0.0001740478515625,
      "step": 28516,
      "training_step_time": 0.37768077850341797
    },
    {
      "epoch": 0.000174053955078125,
      "model_forward_time": 0.11537623405456543,
      "step": 28517
    },
    {
      "epoch": 0.000174053955078125,
      "step": 28517,
      "training_step_time": 0.3935532569885254
    },
    {
      "epoch": 0.00017406005859375,
      "model_forward_time": 0.11468935012817383,
      "step": 28518
    },
    {
      "epoch": 0.00017406005859375,
      "step": 28518,
      "training_step_time": 0.38843321800231934
    },
    {
      "epoch": 0.000174066162109375,
      "model_forward_time": 0.11612200736999512,
      "step": 28519
    },
    {
      "epoch": 0.000174066162109375,
      "step": 28519,
      "training_step_time": 0.3878438472747803
    },
    {
      "epoch": 0.000174072265625,
      "grad_norm": 0.11375611275434494,
      "learning_rate": 5.817536057028081e-05,
      "loss": 0.046,
      "step": 28520
    },
    {
      "epoch": 0.000174072265625,
      "model_forward_time": 0.11508965492248535,
      "step": 28520
    },
    {
      "epoch": 0.000174072265625,
      "step": 28520,
      "training_step_time": 0.39824557304382324
    },
    {
      "epoch": 0.000174078369140625,
      "model_forward_time": 0.11513137817382812,
      "step": 28521
    },
    {
      "epoch": 0.000174078369140625,
      "step": 28521,
      "training_step_time": 0.40779542922973633
    },
    {
      "epoch": 0.00017408447265625,
      "model_forward_time": 0.11494064331054688,
      "step": 28522
    },
    {
      "epoch": 0.00017408447265625,
      "step": 28522,
      "training_step_time": 0.42038869857788086
    },
    {
      "epoch": 0.000174090576171875,
      "model_forward_time": 0.1161048412322998,
      "step": 28523
    },
    {
      "epoch": 0.000174090576171875,
      "step": 28523,
      "training_step_time": 0.416553258895874
    },
    {
      "epoch": 0.0001740966796875,
      "model_forward_time": 0.1157829761505127,
      "step": 28524
    },
    {
      "epoch": 0.0001740966796875,
      "step": 28524,
      "training_step_time": 0.3980100154876709
    },
    {
      "epoch": 0.000174102783203125,
      "model_forward_time": 0.11548638343811035,
      "step": 28525
    },
    {
      "epoch": 0.000174102783203125,
      "step": 28525,
      "training_step_time": 0.39681243896484375
    },
    {
      "epoch": 0.00017410888671875,
      "model_forward_time": 0.11551833152770996,
      "step": 28526
    },
    {
      "epoch": 0.00017410888671875,
      "step": 28526,
      "training_step_time": 0.3683803081512451
    },
    {
      "epoch": 0.000174114990234375,
      "model_forward_time": 0.11577200889587402,
      "step": 28527
    },
    {
      "epoch": 0.000174114990234375,
      "step": 28527,
      "training_step_time": 0.5701897144317627
    },
    {
      "epoch": 0.00017412109375,
      "model_forward_time": 0.11510443687438965,
      "step": 28528
    },
    {
      "epoch": 0.00017412109375,
      "step": 28528,
      "training_step_time": 0.4378845691680908
    },
    {
      "epoch": 0.000174127197265625,
      "model_forward_time": 0.11545324325561523,
      "step": 28529
    },
    {
      "epoch": 0.000174127197265625,
      "step": 28529,
      "training_step_time": 0.415785551071167
    },
    {
      "epoch": 0.00017413330078125,
      "grad_norm": 0.11197192221879959,
      "learning_rate": 5.8148172369085686e-05,
      "loss": 0.0418,
      "step": 28530
    },
    {
      "epoch": 0.00017413330078125,
      "model_forward_time": 0.11441469192504883,
      "step": 28530
    },
    {
      "epoch": 0.00017413330078125,
      "step": 28530,
      "training_step_time": 0.3916587829589844
    },
    {
      "epoch": 0.000174139404296875,
      "model_forward_time": 0.11524009704589844,
      "step": 28531
    },
    {
      "epoch": 0.000174139404296875,
      "step": 28531,
      "training_step_time": 0.4049718379974365
    },
    {
      "epoch": 0.0001741455078125,
      "model_forward_time": 0.11518597602844238,
      "step": 28532
    },
    {
      "epoch": 0.0001741455078125,
      "step": 28532,
      "training_step_time": 0.39162254333496094
    },
    {
      "epoch": 0.000174151611328125,
      "model_forward_time": 0.1153724193572998,
      "step": 28533
    },
    {
      "epoch": 0.000174151611328125,
      "step": 28533,
      "training_step_time": 0.4761018753051758
    },
    {
      "epoch": 0.00017415771484375,
      "model_forward_time": 0.11545658111572266,
      "step": 28534
    },
    {
      "epoch": 0.00017415771484375,
      "step": 28534,
      "training_step_time": 0.39530062675476074
    },
    {
      "epoch": 0.000174163818359375,
      "model_forward_time": 0.11514902114868164,
      "step": 28535
    },
    {
      "epoch": 0.000174163818359375,
      "step": 28535,
      "training_step_time": 0.39380359649658203
    },
    {
      "epoch": 0.000174169921875,
      "model_forward_time": 0.11644101142883301,
      "step": 28536
    },
    {
      "epoch": 0.000174169921875,
      "step": 28536,
      "training_step_time": 0.42274022102355957
    },
    {
      "epoch": 0.000174176025390625,
      "model_forward_time": 0.11455392837524414,
      "step": 28537
    },
    {
      "epoch": 0.000174176025390625,
      "step": 28537,
      "training_step_time": 0.44286179542541504
    },
    {
      "epoch": 0.00017418212890625,
      "model_forward_time": 0.11477994918823242,
      "step": 28538
    },
    {
      "epoch": 0.00017418212890625,
      "step": 28538,
      "training_step_time": 0.4212920665740967
    },
    {
      "epoch": 0.000174188232421875,
      "model_forward_time": 0.11504006385803223,
      "step": 28539
    },
    {
      "epoch": 0.000174188232421875,
      "step": 28539,
      "training_step_time": 0.47260379791259766
    },
    {
      "epoch": 0.0001741943359375,
      "grad_norm": 0.18704265356063843,
      "learning_rate": 5.812098169269094e-05,
      "loss": 0.048,
      "step": 28540
    },
    {
      "epoch": 0.0001741943359375,
      "model_forward_time": 0.11548566818237305,
      "step": 28540
    },
    {
      "epoch": 0.0001741943359375,
      "step": 28540,
      "training_step_time": 0.43978261947631836
    },
    {
      "epoch": 0.000174200439453125,
      "model_forward_time": 0.11507773399353027,
      "step": 28541
    },
    {
      "epoch": 0.000174200439453125,
      "step": 28541,
      "training_step_time": 0.46791505813598633
    },
    {
      "epoch": 0.00017420654296875,
      "model_forward_time": 0.11562132835388184,
      "step": 28542
    },
    {
      "epoch": 0.00017420654296875,
      "step": 28542,
      "training_step_time": 0.47455859184265137
    },
    {
      "epoch": 0.000174212646484375,
      "model_forward_time": 0.11528992652893066,
      "step": 28543
    },
    {
      "epoch": 0.000174212646484375,
      "step": 28543,
      "training_step_time": 0.41275477409362793
    },
    {
      "epoch": 0.00017421875,
      "model_forward_time": 0.11541438102722168,
      "step": 28544
    },
    {
      "epoch": 0.00017421875,
      "step": 28544,
      "training_step_time": 0.3962588310241699
    },
    {
      "epoch": 0.000174224853515625,
      "model_forward_time": 0.11530375480651855,
      "step": 28545
    },
    {
      "epoch": 0.000174224853515625,
      "step": 28545,
      "training_step_time": 0.3860659599304199
    },
    {
      "epoch": 0.00017423095703125,
      "model_forward_time": 0.11521553993225098,
      "step": 28546
    },
    {
      "epoch": 0.00017423095703125,
      "step": 28546,
      "training_step_time": 0.39665722846984863
    },
    {
      "epoch": 0.000174237060546875,
      "model_forward_time": 0.11523771286010742,
      "step": 28547
    },
    {
      "epoch": 0.000174237060546875,
      "step": 28547,
      "training_step_time": 0.4108593463897705
    },
    {
      "epoch": 0.0001742431640625,
      "model_forward_time": 0.11578249931335449,
      "step": 28548
    },
    {
      "epoch": 0.0001742431640625,
      "step": 28548,
      "training_step_time": 0.397291898727417
    },
    {
      "epoch": 0.000174249267578125,
      "model_forward_time": 0.11598396301269531,
      "step": 28549
    },
    {
      "epoch": 0.000174249267578125,
      "step": 28549,
      "training_step_time": 0.41118574142456055
    },
    {
      "epoch": 0.00017425537109375,
      "grad_norm": 0.14407141506671906,
      "learning_rate": 5.809378854935639e-05,
      "loss": 0.0456,
      "step": 28550
    },
    {
      "epoch": 0.00017425537109375,
      "model_forward_time": 0.11562442779541016,
      "step": 28550
    },
    {
      "epoch": 0.00017425537109375,
      "step": 28550,
      "training_step_time": 0.4015955924987793
    },
    {
      "epoch": 0.000174261474609375,
      "model_forward_time": 0.11514425277709961,
      "step": 28551
    },
    {
      "epoch": 0.000174261474609375,
      "step": 28551,
      "training_step_time": 0.5764071941375732
    },
    {
      "epoch": 0.000174267578125,
      "model_forward_time": 0.11513185501098633,
      "step": 28552
    },
    {
      "epoch": 0.000174267578125,
      "step": 28552,
      "training_step_time": 0.39168858528137207
    },
    {
      "epoch": 0.000174273681640625,
      "model_forward_time": 0.11593985557556152,
      "step": 28553
    },
    {
      "epoch": 0.000174273681640625,
      "step": 28553,
      "training_step_time": 0.39976072311401367
    },
    {
      "epoch": 0.00017427978515625,
      "model_forward_time": 0.1153116226196289,
      "step": 28554
    },
    {
      "epoch": 0.00017427978515625,
      "step": 28554,
      "training_step_time": 0.39299893379211426
    },
    {
      "epoch": 0.000174285888671875,
      "model_forward_time": 0.11606836318969727,
      "step": 28555
    },
    {
      "epoch": 0.000174285888671875,
      "step": 28555,
      "training_step_time": 0.4748365879058838
    },
    {
      "epoch": 0.0001742919921875,
      "model_forward_time": 0.11595797538757324,
      "step": 28556
    },
    {
      "epoch": 0.0001742919921875,
      "step": 28556,
      "training_step_time": 0.49654579162597656
    },
    {
      "epoch": 0.000174298095703125,
      "model_forward_time": 0.11562299728393555,
      "step": 28557
    },
    {
      "epoch": 0.000174298095703125,
      "step": 28557,
      "training_step_time": 0.6684062480926514
    },
    {
      "epoch": 0.00017430419921875,
      "model_forward_time": 0.11488056182861328,
      "step": 28558
    },
    {
      "epoch": 0.00017430419921875,
      "step": 28558,
      "training_step_time": 0.3837015628814697
    },
    {
      "epoch": 0.000174310302734375,
      "model_forward_time": 0.11445093154907227,
      "step": 28559
    },
    {
      "epoch": 0.000174310302734375,
      "step": 28559,
      "training_step_time": 0.39373064041137695
    },
    {
      "epoch": 0.00017431640625,
      "grad_norm": 0.16333651542663574,
      "learning_rate": 5.8066592947342555e-05,
      "loss": 0.0401,
      "step": 28560
    },
    {
      "epoch": 0.00017431640625,
      "model_forward_time": 0.11490488052368164,
      "step": 28560
    },
    {
      "epoch": 0.00017431640625,
      "step": 28560,
      "training_step_time": 0.3915238380432129
    },
    {
      "epoch": 0.000174322509765625,
      "model_forward_time": 0.11490750312805176,
      "step": 28561
    },
    {
      "epoch": 0.000174322509765625,
      "step": 28561,
      "training_step_time": 0.39444565773010254
    },
    {
      "epoch": 0.00017432861328125,
      "model_forward_time": 0.11504650115966797,
      "step": 28562
    },
    {
      "epoch": 0.00017432861328125,
      "step": 28562,
      "training_step_time": 0.3925158977508545
    },
    {
      "epoch": 0.000174334716796875,
      "model_forward_time": 0.11480474472045898,
      "step": 28563
    },
    {
      "epoch": 0.000174334716796875,
      "step": 28563,
      "training_step_time": 0.5082786083221436
    },
    {
      "epoch": 0.0001743408203125,
      "model_forward_time": 0.11584258079528809,
      "step": 28564
    },
    {
      "epoch": 0.0001743408203125,
      "step": 28564,
      "training_step_time": 0.400209903717041
    },
    {
      "epoch": 0.000174346923828125,
      "model_forward_time": 0.11580252647399902,
      "step": 28565
    },
    {
      "epoch": 0.000174346923828125,
      "step": 28565,
      "training_step_time": 0.41689300537109375
    },
    {
      "epoch": 0.00017435302734375,
      "model_forward_time": 0.11523056030273438,
      "step": 28566
    },
    {
      "epoch": 0.00017435302734375,
      "step": 28566,
      "training_step_time": 0.4058098793029785
    },
    {
      "epoch": 0.000174359130859375,
      "model_forward_time": 0.11566638946533203,
      "step": 28567
    },
    {
      "epoch": 0.000174359130859375,
      "step": 28567,
      "training_step_time": 0.4051828384399414
    },
    {
      "epoch": 0.000174365234375,
      "model_forward_time": 0.11485099792480469,
      "step": 28568
    },
    {
      "epoch": 0.000174365234375,
      "step": 28568,
      "training_step_time": 0.39590883255004883
    },
    {
      "epoch": 0.000174371337890625,
      "model_forward_time": 0.11623907089233398,
      "step": 28569
    },
    {
      "epoch": 0.000174371337890625,
      "step": 28569,
      "training_step_time": 0.7021160125732422
    },
    {
      "epoch": 0.00017437744140625,
      "grad_norm": 0.15356102585792542,
      "learning_rate": 5.8039394894910796e-05,
      "loss": 0.0416,
      "step": 28570
    },
    {
      "epoch": 0.00017437744140625,
      "model_forward_time": 0.11566877365112305,
      "step": 28570
    },
    {
      "epoch": 0.00017437744140625,
      "step": 28570,
      "training_step_time": 0.511315107345581
    },
    {
      "epoch": 0.000174383544921875,
      "model_forward_time": 0.1145474910736084,
      "step": 28571
    },
    {
      "epoch": 0.000174383544921875,
      "step": 28571,
      "training_step_time": 0.4962787628173828
    },
    {
      "epoch": 0.0001743896484375,
      "model_forward_time": 0.11498069763183594,
      "step": 28572
    },
    {
      "epoch": 0.0001743896484375,
      "step": 28572,
      "training_step_time": 0.39227724075317383
    },
    {
      "epoch": 0.000174395751953125,
      "model_forward_time": 0.11436820030212402,
      "step": 28573
    },
    {
      "epoch": 0.000174395751953125,
      "step": 28573,
      "training_step_time": 0.39641618728637695
    },
    {
      "epoch": 0.00017440185546875,
      "model_forward_time": 0.11506819725036621,
      "step": 28574
    },
    {
      "epoch": 0.00017440185546875,
      "step": 28574,
      "training_step_time": 0.3945751190185547
    },
    {
      "epoch": 0.000174407958984375,
      "model_forward_time": 0.11699652671813965,
      "step": 28575
    },
    {
      "epoch": 0.000174407958984375,
      "step": 28575,
      "training_step_time": 0.4206058979034424
    },
    {
      "epoch": 0.0001744140625,
      "model_forward_time": 0.11503195762634277,
      "step": 28576
    },
    {
      "epoch": 0.0001744140625,
      "step": 28576,
      "training_step_time": 0.4568791389465332
    },
    {
      "epoch": 0.000174420166015625,
      "model_forward_time": 0.1151578426361084,
      "step": 28577
    },
    {
      "epoch": 0.000174420166015625,
      "step": 28577,
      "training_step_time": 0.38173747062683105
    },
    {
      "epoch": 0.00017442626953125,
      "model_forward_time": 0.11606597900390625,
      "step": 28578
    },
    {
      "epoch": 0.00017442626953125,
      "step": 28578,
      "training_step_time": 0.39333343505859375
    },
    {
      "epoch": 0.000174432373046875,
      "model_forward_time": 0.11572122573852539,
      "step": 28579
    },
    {
      "epoch": 0.000174432373046875,
      "step": 28579,
      "training_step_time": 0.394742488861084
    },
    {
      "epoch": 0.0001744384765625,
      "grad_norm": 0.10834735631942749,
      "learning_rate": 5.8012194400323116e-05,
      "loss": 0.0447,
      "step": 28580
    },
    {
      "epoch": 0.0001744384765625,
      "model_forward_time": 0.11562204360961914,
      "step": 28580
    },
    {
      "epoch": 0.0001744384765625,
      "step": 28580,
      "training_step_time": 0.39737868309020996
    },
    {
      "epoch": 0.000174444580078125,
      "model_forward_time": 0.11529016494750977,
      "step": 28581
    },
    {
      "epoch": 0.000174444580078125,
      "step": 28581,
      "training_step_time": 0.5986213684082031
    },
    {
      "epoch": 0.00017445068359375,
      "model_forward_time": 0.11557149887084961,
      "step": 28582
    },
    {
      "epoch": 0.00017445068359375,
      "step": 28582,
      "training_step_time": 0.4997823238372803
    },
    {
      "epoch": 0.000174456787109375,
      "model_forward_time": 0.11634707450866699,
      "step": 28583
    },
    {
      "epoch": 0.000174456787109375,
      "step": 28583,
      "training_step_time": 0.47769665718078613
    },
    {
      "epoch": 0.000174462890625,
      "model_forward_time": 0.11518502235412598,
      "step": 28584
    },
    {
      "epoch": 0.000174462890625,
      "step": 28584,
      "training_step_time": 0.4079864025115967
    },
    {
      "epoch": 0.000174468994140625,
      "model_forward_time": 0.11864447593688965,
      "step": 28585
    },
    {
      "epoch": 0.000174468994140625,
      "step": 28585,
      "training_step_time": 0.46866822242736816
    },
    {
      "epoch": 0.00017447509765625,
      "model_forward_time": 0.11456799507141113,
      "step": 28586
    },
    {
      "epoch": 0.00017447509765625,
      "step": 28586,
      "training_step_time": 0.3976140022277832
    },
    {
      "epoch": 0.000174481201171875,
      "model_forward_time": 0.11571907997131348,
      "step": 28587
    },
    {
      "epoch": 0.000174481201171875,
      "step": 28587,
      "training_step_time": 0.38885068893432617
    },
    {
      "epoch": 0.0001744873046875,
      "model_forward_time": 0.11489105224609375,
      "step": 28588
    },
    {
      "epoch": 0.0001744873046875,
      "step": 28588,
      "training_step_time": 0.41684389114379883
    },
    {
      "epoch": 0.000174493408203125,
      "model_forward_time": 0.11562705039978027,
      "step": 28589
    },
    {
      "epoch": 0.000174493408203125,
      "step": 28589,
      "training_step_time": 0.45665884017944336
    },
    {
      "epoch": 0.00017449951171875,
      "grad_norm": 0.1401326209306717,
      "learning_rate": 5.798499147184233e-05,
      "loss": 0.0434,
      "step": 28590
    },
    {
      "epoch": 0.00017449951171875,
      "model_forward_time": 0.11506295204162598,
      "step": 28590
    },
    {
      "epoch": 0.00017449951171875,
      "step": 28590,
      "training_step_time": 0.4192843437194824
    },
    {
      "epoch": 0.000174505615234375,
      "model_forward_time": 0.11574792861938477,
      "step": 28591
    },
    {
      "epoch": 0.000174505615234375,
      "step": 28591,
      "training_step_time": 0.3990590572357178
    },
    {
      "epoch": 0.00017451171875,
      "model_forward_time": 0.11571407318115234,
      "step": 28592
    },
    {
      "epoch": 0.00017451171875,
      "step": 28592,
      "training_step_time": 0.4100806713104248
    },
    {
      "epoch": 0.000174517822265625,
      "model_forward_time": 0.11525201797485352,
      "step": 28593
    },
    {
      "epoch": 0.000174517822265625,
      "step": 28593,
      "training_step_time": 0.4038712978363037
    },
    {
      "epoch": 0.00017452392578125,
      "model_forward_time": 0.11527252197265625,
      "step": 28594
    },
    {
      "epoch": 0.00017452392578125,
      "step": 28594,
      "training_step_time": 0.3937952518463135
    },
    {
      "epoch": 0.000174530029296875,
      "model_forward_time": 0.11561012268066406,
      "step": 28595
    },
    {
      "epoch": 0.000174530029296875,
      "step": 28595,
      "training_step_time": 0.40412163734436035
    },
    {
      "epoch": 0.0001745361328125,
      "model_forward_time": 0.11550235748291016,
      "step": 28596
    },
    {
      "epoch": 0.0001745361328125,
      "step": 28596,
      "training_step_time": 0.4109354019165039
    },
    {
      "epoch": 0.000174542236328125,
      "model_forward_time": 0.11530137062072754,
      "step": 28597
    },
    {
      "epoch": 0.000174542236328125,
      "step": 28597,
      "training_step_time": 0.5023989677429199
    },
    {
      "epoch": 0.00017454833984375,
      "model_forward_time": 0.1155099868774414,
      "step": 28598
    },
    {
      "epoch": 0.00017454833984375,
      "step": 28598,
      "training_step_time": 0.43772006034851074
    },
    {
      "epoch": 0.000174554443359375,
      "model_forward_time": 0.11541414260864258,
      "step": 28599
    },
    {
      "epoch": 0.000174554443359375,
      "step": 28599,
      "training_step_time": 0.5753402709960938
    },
    {
      "epoch": 0.000174560546875,
      "grad_norm": 0.13622350990772247,
      "learning_rate": 5.795778611773197e-05,
      "loss": 0.0469,
      "step": 28600
    },
    {
      "epoch": 0.000174560546875,
      "model_forward_time": 0.11504912376403809,
      "step": 28600
    },
    {
      "epoch": 0.000174560546875,
      "step": 28600,
      "training_step_time": 0.4019463062286377
    },
    {
      "epoch": 0.000174566650390625,
      "model_forward_time": 0.11455488204956055,
      "step": 28601
    },
    {
      "epoch": 0.000174566650390625,
      "step": 28601,
      "training_step_time": 0.389237642288208
    },
    {
      "epoch": 0.00017457275390625,
      "model_forward_time": 0.11450839042663574,
      "step": 28602
    },
    {
      "epoch": 0.00017457275390625,
      "step": 28602,
      "training_step_time": 0.4209117889404297
    },
    {
      "epoch": 0.000174578857421875,
      "model_forward_time": 0.11800408363342285,
      "step": 28603
    },
    {
      "epoch": 0.000174578857421875,
      "step": 28603,
      "training_step_time": 0.4429306983947754
    },
    {
      "epoch": 0.0001745849609375,
      "model_forward_time": 0.11416101455688477,
      "step": 28604
    },
    {
      "epoch": 0.0001745849609375,
      "step": 28604,
      "training_step_time": 0.39322590827941895
    },
    {
      "epoch": 0.000174591064453125,
      "model_forward_time": 0.11514568328857422,
      "step": 28605
    },
    {
      "epoch": 0.000174591064453125,
      "step": 28605,
      "training_step_time": 0.3874213695526123
    },
    {
      "epoch": 0.00017459716796875,
      "model_forward_time": 0.11504197120666504,
      "step": 28606
    },
    {
      "epoch": 0.00017459716796875,
      "step": 28606,
      "training_step_time": 0.3874521255493164
    },
    {
      "epoch": 0.000174603271484375,
      "model_forward_time": 0.11600327491760254,
      "step": 28607
    },
    {
      "epoch": 0.000174603271484375,
      "step": 28607,
      "training_step_time": 0.40073513984680176
    },
    {
      "epoch": 0.000174609375,
      "model_forward_time": 0.115020751953125,
      "step": 28608
    },
    {
      "epoch": 0.000174609375,
      "step": 28608,
      "training_step_time": 0.38514041900634766
    },
    {
      "epoch": 0.000174615478515625,
      "model_forward_time": 0.11609220504760742,
      "step": 28609
    },
    {
      "epoch": 0.000174615478515625,
      "step": 28609,
      "training_step_time": 0.7017314434051514
    },
    {
      "epoch": 0.00017462158203125,
      "grad_norm": 0.14438900351524353,
      "learning_rate": 5.7930578346256305e-05,
      "loss": 0.043,
      "step": 28610
    },
    {
      "epoch": 0.00017462158203125,
      "model_forward_time": 0.11475944519042969,
      "step": 28610
    },
    {
      "epoch": 0.00017462158203125,
      "step": 28610,
      "training_step_time": 0.3647801876068115
    },
    {
      "epoch": 0.000174627685546875,
      "model_forward_time": 0.11537003517150879,
      "step": 28611
    },
    {
      "epoch": 0.000174627685546875,
      "step": 28611,
      "training_step_time": 0.46474528312683105
    },
    {
      "epoch": 0.0001746337890625,
      "model_forward_time": 0.11448407173156738,
      "step": 28612
    },
    {
      "epoch": 0.0001746337890625,
      "step": 28612,
      "training_step_time": 0.4126551151275635
    },
    {
      "epoch": 0.000174639892578125,
      "model_forward_time": 0.11476683616638184,
      "step": 28613
    },
    {
      "epoch": 0.000174639892578125,
      "step": 28613,
      "training_step_time": 0.472017765045166
    },
    {
      "epoch": 0.00017464599609375,
      "model_forward_time": 0.11394953727722168,
      "step": 28614
    },
    {
      "epoch": 0.00017464599609375,
      "step": 28614,
      "training_step_time": 0.3904900550842285
    },
    {
      "epoch": 0.000174652099609375,
      "model_forward_time": 0.11580252647399902,
      "step": 28615
    },
    {
      "epoch": 0.000174652099609375,
      "step": 28615,
      "training_step_time": 0.3893895149230957
    },
    {
      "epoch": 0.000174658203125,
      "model_forward_time": 0.11428213119506836,
      "step": 28616
    },
    {
      "epoch": 0.000174658203125,
      "step": 28616,
      "training_step_time": 0.45814967155456543
    },
    {
      "epoch": 0.000174664306640625,
      "model_forward_time": 0.11542367935180664,
      "step": 28617
    },
    {
      "epoch": 0.000174664306640625,
      "step": 28617,
      "training_step_time": 0.3940865993499756
    },
    {
      "epoch": 0.00017467041015625,
      "model_forward_time": 0.11541152000427246,
      "step": 28618
    },
    {
      "epoch": 0.00017467041015625,
      "step": 28618,
      "training_step_time": 0.3907296657562256
    },
    {
      "epoch": 0.000174676513671875,
      "model_forward_time": 0.11477184295654297,
      "step": 28619
    },
    {
      "epoch": 0.000174676513671875,
      "step": 28619,
      "training_step_time": 0.3984191417694092
    },
    {
      "epoch": 0.0001746826171875,
      "grad_norm": 0.1599978804588318,
      "learning_rate": 5.7903368165680327e-05,
      "loss": 0.0512,
      "step": 28620
    },
    {
      "epoch": 0.0001746826171875,
      "model_forward_time": 0.11537694931030273,
      "step": 28620
    },
    {
      "epoch": 0.0001746826171875,
      "step": 28620,
      "training_step_time": 0.3933074474334717
    },
    {
      "epoch": 0.000174688720703125,
      "model_forward_time": 0.11577749252319336,
      "step": 28621
    },
    {
      "epoch": 0.000174688720703125,
      "step": 28621,
      "training_step_time": 0.721768856048584
    },
    {
      "epoch": 0.00017469482421875,
      "model_forward_time": 0.11555147171020508,
      "step": 28622
    },
    {
      "epoch": 0.00017469482421875,
      "step": 28622,
      "training_step_time": 0.393237829208374
    },
    {
      "epoch": 0.000174700927734375,
      "model_forward_time": 0.11521530151367188,
      "step": 28623
    },
    {
      "epoch": 0.000174700927734375,
      "step": 28623,
      "training_step_time": 0.4373950958251953
    },
    {
      "epoch": 0.00017470703125,
      "model_forward_time": 0.11473226547241211,
      "step": 28624
    },
    {
      "epoch": 0.00017470703125,
      "step": 28624,
      "training_step_time": 0.40383243560791016
    },
    {
      "epoch": 0.000174713134765625,
      "model_forward_time": 0.11513686180114746,
      "step": 28625
    },
    {
      "epoch": 0.000174713134765625,
      "step": 28625,
      "training_step_time": 0.43043017387390137
    },
    {
      "epoch": 0.00017471923828125,
      "model_forward_time": 0.11426138877868652,
      "step": 28626
    },
    {
      "epoch": 0.00017471923828125,
      "step": 28626,
      "training_step_time": 0.4793968200683594
    },
    {
      "epoch": 0.000174725341796875,
      "model_forward_time": 0.11470198631286621,
      "step": 28627
    },
    {
      "epoch": 0.000174725341796875,
      "step": 28627,
      "training_step_time": 0.4017019271850586
    },
    {
      "epoch": 0.0001747314453125,
      "model_forward_time": 0.11539578437805176,
      "step": 28628
    },
    {
      "epoch": 0.0001747314453125,
      "step": 28628,
      "training_step_time": 0.43036985397338867
    },
    {
      "epoch": 0.000174737548828125,
      "model_forward_time": 0.11528134346008301,
      "step": 28629
    },
    {
      "epoch": 0.000174737548828125,
      "step": 28629,
      "training_step_time": 0.45415806770324707
    },
    {
      "epoch": 0.00017474365234375,
      "grad_norm": 0.16731519997119904,
      "learning_rate": 5.7876155584269785e-05,
      "loss": 0.0447,
      "step": 28630
    },
    {
      "epoch": 0.00017474365234375,
      "model_forward_time": 0.11493372917175293,
      "step": 28630
    },
    {
      "epoch": 0.00017474365234375,
      "step": 28630,
      "training_step_time": 0.3833918571472168
    },
    {
      "epoch": 0.000174749755859375,
      "model_forward_time": 0.11532235145568848,
      "step": 28631
    },
    {
      "epoch": 0.000174749755859375,
      "step": 28631,
      "training_step_time": 0.4075167179107666
    },
    {
      "epoch": 0.000174755859375,
      "model_forward_time": 0.11551189422607422,
      "step": 28632
    },
    {
      "epoch": 0.000174755859375,
      "step": 28632,
      "training_step_time": 0.3997764587402344
    },
    {
      "epoch": 0.000174761962890625,
      "model_forward_time": 0.11509895324707031,
      "step": 28633
    },
    {
      "epoch": 0.000174761962890625,
      "step": 28633,
      "training_step_time": 0.6033086776733398
    },
    {
      "epoch": 0.00017476806640625,
      "model_forward_time": 0.11473608016967773,
      "step": 28634
    },
    {
      "epoch": 0.00017476806640625,
      "step": 28634,
      "training_step_time": 0.39300537109375
    },
    {
      "epoch": 0.000174774169921875,
      "model_forward_time": 0.11487531661987305,
      "step": 28635
    },
    {
      "epoch": 0.000174774169921875,
      "step": 28635,
      "training_step_time": 0.38798999786376953
    },
    {
      "epoch": 0.0001747802734375,
      "model_forward_time": 0.11569070816040039,
      "step": 28636
    },
    {
      "epoch": 0.0001747802734375,
      "step": 28636,
      "training_step_time": 0.38828158378601074
    },
    {
      "epoch": 0.000174786376953125,
      "model_forward_time": 0.1155235767364502,
      "step": 28637
    },
    {
      "epoch": 0.000174786376953125,
      "step": 28637,
      "training_step_time": 0.3893425464630127
    },
    {
      "epoch": 0.00017479248046875,
      "model_forward_time": 0.11458992958068848,
      "step": 28638
    },
    {
      "epoch": 0.00017479248046875,
      "step": 28638,
      "training_step_time": 0.3625791072845459
    },
    {
      "epoch": 0.000174798583984375,
      "model_forward_time": 0.11534595489501953,
      "step": 28639
    },
    {
      "epoch": 0.000174798583984375,
      "step": 28639,
      "training_step_time": 0.5448899269104004
    },
    {
      "epoch": 0.0001748046875,
      "grad_norm": 0.10862533003091812,
      "learning_rate": 5.7848940610291125e-05,
      "loss": 0.0455,
      "step": 28640
    },
    {
      "epoch": 0.0001748046875,
      "model_forward_time": 0.11509513854980469,
      "step": 28640
    },
    {
      "epoch": 0.0001748046875,
      "step": 28640,
      "training_step_time": 0.4435434341430664
    },
    {
      "epoch": 0.000174810791015625,
      "model_forward_time": 0.11558341979980469,
      "step": 28641
    },
    {
      "epoch": 0.000174810791015625,
      "step": 28641,
      "training_step_time": 0.43348002433776855
    },
    {
      "epoch": 0.00017481689453125,
      "model_forward_time": 0.11502242088317871,
      "step": 28642
    },
    {
      "epoch": 0.00017481689453125,
      "step": 28642,
      "training_step_time": 0.4006471633911133
    },
    {
      "epoch": 0.000174822998046875,
      "model_forward_time": 0.11513924598693848,
      "step": 28643
    },
    {
      "epoch": 0.000174822998046875,
      "step": 28643,
      "training_step_time": 0.38931870460510254
    },
    {
      "epoch": 0.0001748291015625,
      "model_forward_time": 0.11523056030273438,
      "step": 28644
    },
    {
      "epoch": 0.0001748291015625,
      "step": 28644,
      "training_step_time": 0.393923282623291
    },
    {
      "epoch": 0.000174835205078125,
      "model_forward_time": 0.11509561538696289,
      "step": 28645
    },
    {
      "epoch": 0.000174835205078125,
      "step": 28645,
      "training_step_time": 0.5813300609588623
    },
    {
      "epoch": 0.00017484130859375,
      "model_forward_time": 0.11474728584289551,
      "step": 28646
    },
    {
      "epoch": 0.00017484130859375,
      "step": 28646,
      "training_step_time": 0.39220094680786133
    },
    {
      "epoch": 0.000174847412109375,
      "model_forward_time": 0.1156001091003418,
      "step": 28647
    },
    {
      "epoch": 0.000174847412109375,
      "step": 28647,
      "training_step_time": 0.38977909088134766
    },
    {
      "epoch": 0.000174853515625,
      "model_forward_time": 0.11544561386108398,
      "step": 28648
    },
    {
      "epoch": 0.000174853515625,
      "step": 28648,
      "training_step_time": 0.38853907585144043
    },
    {
      "epoch": 0.000174859619140625,
      "model_forward_time": 0.11575722694396973,
      "step": 28649
    },
    {
      "epoch": 0.000174859619140625,
      "step": 28649,
      "training_step_time": 0.43026208877563477
    },
    {
      "epoch": 0.00017486572265625,
      "grad_norm": 0.1560385525226593,
      "learning_rate": 5.782172325201155e-05,
      "loss": 0.0437,
      "step": 28650
    },
    {
      "epoch": 0.00017486572265625,
      "model_forward_time": 0.11471700668334961,
      "step": 28650
    },
    {
      "epoch": 0.00017486572265625,
      "step": 28650,
      "training_step_time": 0.39664745330810547
    },
    {
      "epoch": 0.000174871826171875,
      "model_forward_time": 0.11535763740539551,
      "step": 28651
    },
    {
      "epoch": 0.000174871826171875,
      "step": 28651,
      "training_step_time": 0.6861114501953125
    },
    {
      "epoch": 0.0001748779296875,
      "model_forward_time": 0.11605596542358398,
      "step": 28652
    },
    {
      "epoch": 0.0001748779296875,
      "step": 28652,
      "training_step_time": 0.37244129180908203
    },
    {
      "epoch": 0.000174884033203125,
      "model_forward_time": 0.11506390571594238,
      "step": 28653
    },
    {
      "epoch": 0.000174884033203125,
      "step": 28653,
      "training_step_time": 0.4510195255279541
    },
    {
      "epoch": 0.00017489013671875,
      "model_forward_time": 0.1148672103881836,
      "step": 28654
    },
    {
      "epoch": 0.00017489013671875,
      "step": 28654,
      "training_step_time": 0.483689546585083
    },
    {
      "epoch": 0.000174896240234375,
      "model_forward_time": 0.11520528793334961,
      "step": 28655
    },
    {
      "epoch": 0.000174896240234375,
      "step": 28655,
      "training_step_time": 0.4529235363006592
    },
    {
      "epoch": 0.00017490234375,
      "model_forward_time": 0.11509132385253906,
      "step": 28656
    },
    {
      "epoch": 0.00017490234375,
      "step": 28656,
      "training_step_time": 0.3841540813446045
    },
    {
      "epoch": 0.000174908447265625,
      "model_forward_time": 0.11502885818481445,
      "step": 28657
    },
    {
      "epoch": 0.000174908447265625,
      "step": 28657,
      "training_step_time": 0.4691450595855713
    },
    {
      "epoch": 0.00017491455078125,
      "model_forward_time": 0.11530280113220215,
      "step": 28658
    },
    {
      "epoch": 0.00017491455078125,
      "step": 28658,
      "training_step_time": 0.3962888717651367
    },
    {
      "epoch": 0.000174920654296875,
      "model_forward_time": 0.11591148376464844,
      "step": 28659
    },
    {
      "epoch": 0.000174920654296875,
      "step": 28659,
      "training_step_time": 0.3883821964263916
    },
    {
      "epoch": 0.0001749267578125,
      "grad_norm": 0.11622701585292816,
      "learning_rate": 5.7794503517698974e-05,
      "loss": 0.0404,
      "step": 28660
    },
    {
      "epoch": 0.0001749267578125,
      "model_forward_time": 0.11577415466308594,
      "step": 28660
    },
    {
      "epoch": 0.0001749267578125,
      "step": 28660,
      "training_step_time": 0.3978142738342285
    },
    {
      "epoch": 0.000174932861328125,
      "model_forward_time": 0.1150825023651123,
      "step": 28661
    },
    {
      "epoch": 0.000174932861328125,
      "step": 28661,
      "training_step_time": 0.39461326599121094
    },
    {
      "epoch": 0.00017493896484375,
      "model_forward_time": 0.11498808860778809,
      "step": 28662
    },
    {
      "epoch": 0.00017493896484375,
      "step": 28662,
      "training_step_time": 0.39646267890930176
    },
    {
      "epoch": 0.000174945068359375,
      "model_forward_time": 0.11493325233459473,
      "step": 28663
    },
    {
      "epoch": 0.000174945068359375,
      "step": 28663,
      "training_step_time": 0.6252305507659912
    },
    {
      "epoch": 0.000174951171875,
      "model_forward_time": 0.11714553833007812,
      "step": 28664
    },
    {
      "epoch": 0.000174951171875,
      "step": 28664,
      "training_step_time": 0.5129003524780273
    },
    {
      "epoch": 0.000174957275390625,
      "model_forward_time": 0.115081787109375,
      "step": 28665
    },
    {
      "epoch": 0.000174957275390625,
      "step": 28665,
      "training_step_time": 0.3873255252838135
    },
    {
      "epoch": 0.00017496337890625,
      "model_forward_time": 0.11788678169250488,
      "step": 28666
    },
    {
      "epoch": 0.00017496337890625,
      "step": 28666,
      "training_step_time": 0.43828630447387695
    },
    {
      "epoch": 0.000174969482421875,
      "model_forward_time": 0.11841392517089844,
      "step": 28667
    },
    {
      "epoch": 0.000174969482421875,
      "step": 28667,
      "training_step_time": 0.4613802433013916
    },
    {
      "epoch": 0.0001749755859375,
      "model_forward_time": 0.11700105667114258,
      "step": 28668
    },
    {
      "epoch": 0.0001749755859375,
      "step": 28668,
      "training_step_time": 0.3910243511199951
    },
    {
      "epoch": 0.000174981689453125,
      "model_forward_time": 0.11790823936462402,
      "step": 28669
    },
    {
      "epoch": 0.000174981689453125,
      "step": 28669,
      "training_step_time": 0.40654897689819336
    },
    {
      "epoch": 0.00017498779296875,
      "grad_norm": 0.12426888942718506,
      "learning_rate": 5.776728141562203e-05,
      "loss": 0.0481,
      "step": 28670
    },
    {
      "epoch": 0.00017498779296875,
      "model_forward_time": 0.11549568176269531,
      "step": 28670
    },
    {
      "epoch": 0.00017498779296875,
      "step": 28670,
      "training_step_time": 0.40202975273132324
    },
    {
      "epoch": 0.000174993896484375,
      "model_forward_time": 0.11506867408752441,
      "step": 28671
    },
    {
      "epoch": 0.000174993896484375,
      "step": 28671,
      "training_step_time": 0.3913896083831787
    },
    {
      "epoch": 0.000175,
      "model_forward_time": 0.11555647850036621,
      "step": 28672
    },
    {
      "epoch": 0.000175,
      "step": 28672,
      "training_step_time": 0.3992576599121094
    },
    {
      "epoch": 0.000175006103515625,
      "model_forward_time": 0.11543989181518555,
      "step": 28673
    },
    {
      "epoch": 0.000175006103515625,
      "step": 28673,
      "training_step_time": 0.3966059684753418
    },
    {
      "epoch": 0.00017501220703125,
      "model_forward_time": 0.11531186103820801,
      "step": 28674
    },
    {
      "epoch": 0.00017501220703125,
      "step": 28674,
      "training_step_time": 0.3933429718017578
    },
    {
      "epoch": 0.000175018310546875,
      "model_forward_time": 0.11513662338256836,
      "step": 28675
    },
    {
      "epoch": 0.000175018310546875,
      "step": 28675,
      "training_step_time": 0.7401473522186279
    },
    {
      "epoch": 0.0001750244140625,
      "model_forward_time": 0.11510467529296875,
      "step": 28676
    },
    {
      "epoch": 0.0001750244140625,
      "step": 28676,
      "training_step_time": 0.3894832134246826
    },
    {
      "epoch": 0.000175030517578125,
      "model_forward_time": 0.11519265174865723,
      "step": 28677
    },
    {
      "epoch": 0.000175030517578125,
      "step": 28677,
      "training_step_time": 0.3885612487792969
    },
    {
      "epoch": 0.00017503662109375,
      "model_forward_time": 0.11578845977783203,
      "step": 28678
    },
    {
      "epoch": 0.00017503662109375,
      "step": 28678,
      "training_step_time": 0.41721105575561523
    },
    {
      "epoch": 0.000175042724609375,
      "model_forward_time": 0.11494231224060059,
      "step": 28679
    },
    {
      "epoch": 0.000175042724609375,
      "step": 28679,
      "training_step_time": 0.47705626487731934
    },
    {
      "epoch": 0.000175048828125,
      "grad_norm": 0.14608623087406158,
      "learning_rate": 5.7740056954050084e-05,
      "loss": 0.0471,
      "step": 28680
    },
    {
      "epoch": 0.000175048828125,
      "model_forward_time": 0.11484265327453613,
      "step": 28680
    },
    {
      "epoch": 0.000175048828125,
      "step": 28680,
      "training_step_time": 0.48908066749572754
    },
    {
      "epoch": 0.000175054931640625,
      "model_forward_time": 0.11473202705383301,
      "step": 28681
    },
    {
      "epoch": 0.000175054931640625,
      "step": 28681,
      "training_step_time": 0.510612964630127
    },
    {
      "epoch": 0.00017506103515625,
      "model_forward_time": 0.11440539360046387,
      "step": 28682
    },
    {
      "epoch": 0.00017506103515625,
      "step": 28682,
      "training_step_time": 0.43082308769226074
    },
    {
      "epoch": 0.000175067138671875,
      "model_forward_time": 0.11493158340454102,
      "step": 28683
    },
    {
      "epoch": 0.000175067138671875,
      "step": 28683,
      "training_step_time": 0.38614368438720703
    },
    {
      "epoch": 0.0001750732421875,
      "model_forward_time": 0.11477851867675781,
      "step": 28684
    },
    {
      "epoch": 0.0001750732421875,
      "step": 28684,
      "training_step_time": 0.4010891914367676
    },
    {
      "epoch": 0.000175079345703125,
      "model_forward_time": 0.11480998992919922,
      "step": 28685
    },
    {
      "epoch": 0.000175079345703125,
      "step": 28685,
      "training_step_time": 0.4027528762817383
    },
    {
      "epoch": 0.00017508544921875,
      "model_forward_time": 0.11508679389953613,
      "step": 28686
    },
    {
      "epoch": 0.00017508544921875,
      "step": 28686,
      "training_step_time": 0.40337538719177246
    },
    {
      "epoch": 0.000175091552734375,
      "model_forward_time": 0.11588144302368164,
      "step": 28687
    },
    {
      "epoch": 0.000175091552734375,
      "step": 28687,
      "training_step_time": 0.5831210613250732
    },
    {
      "epoch": 0.00017509765625,
      "model_forward_time": 0.1147298812866211,
      "step": 28688
    },
    {
      "epoch": 0.00017509765625,
      "step": 28688,
      "training_step_time": 0.39306116104125977
    },
    {
      "epoch": 0.000175103759765625,
      "model_forward_time": 0.11481308937072754,
      "step": 28689
    },
    {
      "epoch": 0.000175103759765625,
      "step": 28689,
      "training_step_time": 0.3860146999359131
    },
    {
      "epoch": 0.00017510986328125,
      "grad_norm": 0.12085560709238052,
      "learning_rate": 5.771283014125317e-05,
      "loss": 0.042,
      "step": 28690
    },
    {
      "epoch": 0.00017510986328125,
      "model_forward_time": 0.11563229560852051,
      "step": 28690
    },
    {
      "epoch": 0.00017510986328125,
      "step": 28690,
      "training_step_time": 0.3972456455230713
    },
    {
      "epoch": 0.000175115966796875,
      "model_forward_time": 0.11478352546691895,
      "step": 28691
    },
    {
      "epoch": 0.000175115966796875,
      "step": 28691,
      "training_step_time": 0.38578367233276367
    },
    {
      "epoch": 0.0001751220703125,
      "model_forward_time": 0.11561727523803711,
      "step": 28692
    },
    {
      "epoch": 0.0001751220703125,
      "step": 28692,
      "training_step_time": 0.42289161682128906
    },
    {
      "epoch": 0.000175128173828125,
      "model_forward_time": 0.11488580703735352,
      "step": 28693
    },
    {
      "epoch": 0.000175128173828125,
      "step": 28693,
      "training_step_time": 0.714047908782959
    },
    {
      "epoch": 0.00017513427734375,
      "model_forward_time": 0.11539816856384277,
      "step": 28694
    },
    {
      "epoch": 0.00017513427734375,
      "step": 28694,
      "training_step_time": 0.4535031318664551
    },
    {
      "epoch": 0.000175140380859375,
      "model_forward_time": 0.11536359786987305,
      "step": 28695
    },
    {
      "epoch": 0.000175140380859375,
      "step": 28695,
      "training_step_time": 0.47332334518432617
    },
    {
      "epoch": 0.000175146484375,
      "model_forward_time": 0.11503815650939941,
      "step": 28696
    },
    {
      "epoch": 0.000175146484375,
      "step": 28696,
      "training_step_time": 0.4114406108856201
    },
    {
      "epoch": 0.000175152587890625,
      "model_forward_time": 0.11419248580932617,
      "step": 28697
    },
    {
      "epoch": 0.000175152587890625,
      "step": 28697,
      "training_step_time": 0.39459991455078125
    },
    {
      "epoch": 0.00017515869140625,
      "model_forward_time": 0.11396527290344238,
      "step": 28698
    },
    {
      "epoch": 0.00017515869140625,
      "step": 28698,
      "training_step_time": 0.3858201503753662
    },
    {
      "epoch": 0.000175164794921875,
      "model_forward_time": 0.11523318290710449,
      "step": 28699
    },
    {
      "epoch": 0.000175164794921875,
      "step": 28699,
      "training_step_time": 0.4532148838043213
    },
    {
      "epoch": 0.0001751708984375,
      "grad_norm": 0.11825176328420639,
      "learning_rate": 5.768560098550213e-05,
      "loss": 0.0419,
      "step": 28700
    },
    {
      "epoch": 0.0001751708984375,
      "model_forward_time": 0.11463761329650879,
      "step": 28700
    },
    {
      "epoch": 0.0001751708984375,
      "step": 28700,
      "training_step_time": 0.393110990524292
    },
    {
      "epoch": 0.000175177001953125,
      "model_forward_time": 0.11548805236816406,
      "step": 28701
    },
    {
      "epoch": 0.000175177001953125,
      "step": 28701,
      "training_step_time": 0.397686243057251
    },
    {
      "epoch": 0.00017518310546875,
      "model_forward_time": 0.11527299880981445,
      "step": 28702
    },
    {
      "epoch": 0.00017518310546875,
      "step": 28702,
      "training_step_time": 0.39785218238830566
    },
    {
      "epoch": 0.000175189208984375,
      "model_forward_time": 0.11539864540100098,
      "step": 28703
    },
    {
      "epoch": 0.000175189208984375,
      "step": 28703,
      "training_step_time": 0.4007530212402344
    },
    {
      "epoch": 0.0001751953125,
      "model_forward_time": 0.11560559272766113,
      "step": 28704
    },
    {
      "epoch": 0.0001751953125,
      "step": 28704,
      "training_step_time": 0.40233635902404785
    },
    {
      "epoch": 0.000175201416015625,
      "model_forward_time": 0.11562299728393555,
      "step": 28705
    },
    {
      "epoch": 0.000175201416015625,
      "step": 28705,
      "training_step_time": 0.6613199710845947
    },
    {
      "epoch": 0.00017520751953125,
      "model_forward_time": 0.11498022079467773,
      "step": 28706
    },
    {
      "epoch": 0.00017520751953125,
      "step": 28706,
      "training_step_time": 0.40216827392578125
    },
    {
      "epoch": 0.000175213623046875,
      "model_forward_time": 0.11459589004516602,
      "step": 28707
    },
    {
      "epoch": 0.000175213623046875,
      "step": 28707,
      "training_step_time": 0.47841358184814453
    },
    {
      "epoch": 0.0001752197265625,
      "model_forward_time": 0.11864209175109863,
      "step": 28708
    },
    {
      "epoch": 0.0001752197265625,
      "step": 28708,
      "training_step_time": 0.4482893943786621
    },
    {
      "epoch": 0.000175225830078125,
      "model_forward_time": 0.11785268783569336,
      "step": 28709
    },
    {
      "epoch": 0.000175225830078125,
      "step": 28709,
      "training_step_time": 0.44257354736328125
    },
    {
      "epoch": 0.00017523193359375,
      "grad_norm": 0.10898851603269577,
      "learning_rate": 5.765836949506843e-05,
      "loss": 0.0414,
      "step": 28710
    },
    {
      "epoch": 0.00017523193359375,
      "model_forward_time": 0.11733222007751465,
      "step": 28710
    },
    {
      "epoch": 0.00017523193359375,
      "step": 28710,
      "training_step_time": 0.41240859031677246
    },
    {
      "epoch": 0.000175238037109375,
      "model_forward_time": 0.11750268936157227,
      "step": 28711
    },
    {
      "epoch": 0.000175238037109375,
      "step": 28711,
      "training_step_time": 0.4129331111907959
    },
    {
      "epoch": 0.000175244140625,
      "model_forward_time": 0.1180269718170166,
      "step": 28712
    },
    {
      "epoch": 0.000175244140625,
      "step": 28712,
      "training_step_time": 0.38426971435546875
    },
    {
      "epoch": 0.000175250244140625,
      "model_forward_time": 0.11745285987854004,
      "step": 28713
    },
    {
      "epoch": 0.000175250244140625,
      "step": 28713,
      "training_step_time": 0.37546300888061523
    },
    {
      "epoch": 0.00017525634765625,
      "model_forward_time": 0.12208962440490723,
      "step": 28714
    },
    {
      "epoch": 0.00017525634765625,
      "step": 28714,
      "training_step_time": 0.3867056369781494
    },
    {
      "epoch": 0.000175262451171875,
      "model_forward_time": 0.11702322959899902,
      "step": 28715
    },
    {
      "epoch": 0.000175262451171875,
      "step": 28715,
      "training_step_time": 0.3872251510620117
    },
    {
      "epoch": 0.0001752685546875,
      "model_forward_time": 0.11554622650146484,
      "step": 28716
    },
    {
      "epoch": 0.0001752685546875,
      "step": 28716,
      "training_step_time": 0.39678478240966797
    },
    {
      "epoch": 0.000175274658203125,
      "model_forward_time": 0.1154170036315918,
      "step": 28717
    },
    {
      "epoch": 0.000175274658203125,
      "step": 28717,
      "training_step_time": 0.7279977798461914
    },
    {
      "epoch": 0.00017528076171875,
      "model_forward_time": 0.11577320098876953,
      "step": 28718
    },
    {
      "epoch": 0.00017528076171875,
      "step": 28718,
      "training_step_time": 0.396730899810791
    },
    {
      "epoch": 0.000175286865234375,
      "model_forward_time": 0.11454200744628906,
      "step": 28719
    },
    {
      "epoch": 0.000175286865234375,
      "step": 28719,
      "training_step_time": 0.39671754837036133
    },
    {
      "epoch": 0.00017529296875,
      "grad_norm": 0.12012369185686111,
      "learning_rate": 5.763113567822429e-05,
      "loss": 0.0439,
      "step": 28720
    },
    {
      "epoch": 0.00017529296875,
      "model_forward_time": 0.11493229866027832,
      "step": 28720
    },
    {
      "epoch": 0.00017529296875,
      "step": 28720,
      "training_step_time": 0.39127063751220703
    },
    {
      "epoch": 0.000175299072265625,
      "model_forward_time": 0.11543130874633789,
      "step": 28721
    },
    {
      "epoch": 0.000175299072265625,
      "step": 28721,
      "training_step_time": 0.4798741340637207
    },
    {
      "epoch": 0.00017530517578125,
      "model_forward_time": 0.11478877067565918,
      "step": 28722
    },
    {
      "epoch": 0.00017530517578125,
      "step": 28722,
      "training_step_time": 0.4334371089935303
    },
    {
      "epoch": 0.000175311279296875,
      "model_forward_time": 0.1168673038482666,
      "step": 28723
    },
    {
      "epoch": 0.000175311279296875,
      "step": 28723,
      "training_step_time": 0.6414625644683838
    },
    {
      "epoch": 0.0001753173828125,
      "model_forward_time": 0.11483073234558105,
      "step": 28724
    },
    {
      "epoch": 0.0001753173828125,
      "step": 28724,
      "training_step_time": 0.3972282409667969
    },
    {
      "epoch": 0.000175323486328125,
      "model_forward_time": 0.11518430709838867,
      "step": 28725
    },
    {
      "epoch": 0.000175323486328125,
      "step": 28725,
      "training_step_time": 0.3968183994293213
    },
    {
      "epoch": 0.00017532958984375,
      "model_forward_time": 0.11505722999572754,
      "step": 28726
    },
    {
      "epoch": 0.00017532958984375,
      "step": 28726,
      "training_step_time": 0.3874037265777588
    },
    {
      "epoch": 0.000175335693359375,
      "model_forward_time": 0.11512160301208496,
      "step": 28727
    },
    {
      "epoch": 0.000175335693359375,
      "step": 28727,
      "training_step_time": 0.38564634323120117
    },
    {
      "epoch": 0.000175341796875,
      "model_forward_time": 0.11507964134216309,
      "step": 28728
    },
    {
      "epoch": 0.000175341796875,
      "step": 28728,
      "training_step_time": 0.39360570907592773
    },
    {
      "epoch": 0.000175347900390625,
      "model_forward_time": 0.11629366874694824,
      "step": 28729
    },
    {
      "epoch": 0.000175347900390625,
      "step": 28729,
      "training_step_time": 0.744490385055542
    },
    {
      "epoch": 0.00017535400390625,
      "grad_norm": 0.1477191001176834,
      "learning_rate": 5.760389954324261e-05,
      "loss": 0.0431,
      "step": 28730
    },
    {
      "epoch": 0.00017535400390625,
      "model_forward_time": 0.11452746391296387,
      "step": 28730
    },
    {
      "epoch": 0.00017535400390625,
      "step": 28730,
      "training_step_time": 0.3865978717803955
    },
    {
      "epoch": 0.000175360107421875,
      "model_forward_time": 0.11439871788024902,
      "step": 28731
    },
    {
      "epoch": 0.000175360107421875,
      "step": 28731,
      "training_step_time": 0.4181642532348633
    },
    {
      "epoch": 0.0001753662109375,
      "model_forward_time": 0.11488795280456543,
      "step": 28732
    },
    {
      "epoch": 0.0001753662109375,
      "step": 28732,
      "training_step_time": 0.3975083827972412
    },
    {
      "epoch": 0.000175372314453125,
      "model_forward_time": 0.11447334289550781,
      "step": 28733
    },
    {
      "epoch": 0.000175372314453125,
      "step": 28733,
      "training_step_time": 0.3889615535736084
    },
    {
      "epoch": 0.00017537841796875,
      "model_forward_time": 0.11493301391601562,
      "step": 28734
    },
    {
      "epoch": 0.00017537841796875,
      "step": 28734,
      "training_step_time": 0.438262939453125
    },
    {
      "epoch": 0.000175384521484375,
      "model_forward_time": 0.11557388305664062,
      "step": 28735
    },
    {
      "epoch": 0.000175384521484375,
      "step": 28735,
      "training_step_time": 0.5341870784759521
    },
    {
      "epoch": 0.000175390625,
      "model_forward_time": 0.1151125431060791,
      "step": 28736
    },
    {
      "epoch": 0.000175390625,
      "step": 28736,
      "training_step_time": 0.46665263175964355
    },
    {
      "epoch": 0.000175396728515625,
      "model_forward_time": 0.11545157432556152,
      "step": 28737
    },
    {
      "epoch": 0.000175396728515625,
      "step": 28737,
      "training_step_time": 0.4908630847930908
    },
    {
      "epoch": 0.00017540283203125,
      "model_forward_time": 0.11499881744384766,
      "step": 28738
    },
    {
      "epoch": 0.00017540283203125,
      "step": 28738,
      "training_step_time": 0.39806246757507324
    },
    {
      "epoch": 0.000175408935546875,
      "model_forward_time": 0.11583876609802246,
      "step": 28739
    },
    {
      "epoch": 0.000175408935546875,
      "step": 28739,
      "training_step_time": 0.3913109302520752
    },
    {
      "epoch": 0.0001754150390625,
      "grad_norm": 0.1077595204114914,
      "learning_rate": 5.757666109839702e-05,
      "loss": 0.0393,
      "step": 28740
    },
    {
      "epoch": 0.0001754150390625,
      "model_forward_time": 0.11503148078918457,
      "step": 28740
    },
    {
      "epoch": 0.0001754150390625,
      "step": 28740,
      "training_step_time": 0.3936889171600342
    },
    {
      "epoch": 0.000175421142578125,
      "model_forward_time": 0.1152641773223877,
      "step": 28741
    },
    {
      "epoch": 0.000175421142578125,
      "step": 28741,
      "training_step_time": 0.5280797481536865
    },
    {
      "epoch": 0.00017542724609375,
      "model_forward_time": 0.11479496955871582,
      "step": 28742
    },
    {
      "epoch": 0.00017542724609375,
      "step": 28742,
      "training_step_time": 0.40305304527282715
    },
    {
      "epoch": 0.000175433349609375,
      "model_forward_time": 0.11538457870483398,
      "step": 28743
    },
    {
      "epoch": 0.000175433349609375,
      "step": 28743,
      "training_step_time": 0.3920724391937256
    },
    {
      "epoch": 0.000175439453125,
      "model_forward_time": 0.11530065536499023,
      "step": 28744
    },
    {
      "epoch": 0.000175439453125,
      "step": 28744,
      "training_step_time": 0.43341064453125
    },
    {
      "epoch": 0.000175445556640625,
      "model_forward_time": 0.1153111457824707,
      "step": 28745
    },
    {
      "epoch": 0.000175445556640625,
      "step": 28745,
      "training_step_time": 0.4074435234069824
    },
    {
      "epoch": 0.00017545166015625,
      "model_forward_time": 0.11607027053833008,
      "step": 28746
    },
    {
      "epoch": 0.00017545166015625,
      "step": 28746,
      "training_step_time": 0.3993234634399414
    },
    {
      "epoch": 0.000175457763671875,
      "model_forward_time": 0.11528778076171875,
      "step": 28747
    },
    {
      "epoch": 0.000175457763671875,
      "step": 28747,
      "training_step_time": 0.6350493431091309
    },
    {
      "epoch": 0.0001754638671875,
      "model_forward_time": 0.11497378349304199,
      "step": 28748
    },
    {
      "epoch": 0.0001754638671875,
      "step": 28748,
      "training_step_time": 0.41062188148498535
    },
    {
      "epoch": 0.000175469970703125,
      "model_forward_time": 0.11496686935424805,
      "step": 28749
    },
    {
      "epoch": 0.000175469970703125,
      "step": 28749,
      "training_step_time": 0.36703991889953613
    },
    {
      "epoch": 0.00017547607421875,
      "grad_norm": 0.24708549678325653,
      "learning_rate": 5.7549420351961844e-05,
      "loss": 0.0408,
      "step": 28750
    },
    {
      "epoch": 0.00017547607421875,
      "model_forward_time": 0.1174933910369873,
      "step": 28750
    },
    {
      "epoch": 0.00017547607421875,
      "step": 28750,
      "training_step_time": 0.42063331604003906
    },
    {
      "epoch": 0.000175482177734375,
      "model_forward_time": 0.11953926086425781,
      "step": 28751
    },
    {
      "epoch": 0.000175482177734375,
      "step": 28751,
      "training_step_time": 0.38732433319091797
    },
    {
      "epoch": 0.00017548828125,
      "model_forward_time": 0.11752915382385254,
      "step": 28752
    },
    {
      "epoch": 0.00017548828125,
      "step": 28752,
      "training_step_time": 0.37602686882019043
    },
    {
      "epoch": 0.000175494384765625,
      "model_forward_time": 0.1185302734375,
      "step": 28753
    },
    {
      "epoch": 0.000175494384765625,
      "step": 28753,
      "training_step_time": 0.4607553482055664
    },
    {
      "epoch": 0.00017550048828125,
      "model_forward_time": 0.11896944046020508,
      "step": 28754
    },
    {
      "epoch": 0.00017550048828125,
      "step": 28754,
      "training_step_time": 0.3837754726409912
    },
    {
      "epoch": 0.000175506591796875,
      "model_forward_time": 0.11578989028930664,
      "step": 28755
    },
    {
      "epoch": 0.000175506591796875,
      "step": 28755,
      "training_step_time": 0.3979933261871338
    },
    {
      "epoch": 0.0001755126953125,
      "model_forward_time": 0.11585402488708496,
      "step": 28756
    },
    {
      "epoch": 0.0001755126953125,
      "step": 28756,
      "training_step_time": 0.4143049716949463
    },
    {
      "epoch": 0.000175518798828125,
      "model_forward_time": 0.11544466018676758,
      "step": 28757
    },
    {
      "epoch": 0.000175518798828125,
      "step": 28757,
      "training_step_time": 0.41292691230773926
    },
    {
      "epoch": 0.00017552490234375,
      "model_forward_time": 0.11533474922180176,
      "step": 28758
    },
    {
      "epoch": 0.00017552490234375,
      "step": 28758,
      "training_step_time": 0.410017728805542
    },
    {
      "epoch": 0.000175531005859375,
      "model_forward_time": 0.11564016342163086,
      "step": 28759
    },
    {
      "epoch": 0.000175531005859375,
      "step": 28759,
      "training_step_time": 0.7519979476928711
    },
    {
      "epoch": 0.000175537109375,
      "grad_norm": 0.13254185020923615,
      "learning_rate": 5.75221773122121e-05,
      "loss": 0.0473,
      "step": 28760
    },
    {
      "epoch": 0.000175537109375,
      "model_forward_time": 0.11530375480651855,
      "step": 28760
    },
    {
      "epoch": 0.000175537109375,
      "step": 28760,
      "training_step_time": 0.39902758598327637
    },
    {
      "epoch": 0.000175543212890625,
      "model_forward_time": 0.11478018760681152,
      "step": 28761
    },
    {
      "epoch": 0.000175543212890625,
      "step": 28761,
      "training_step_time": 0.39514899253845215
    },
    {
      "epoch": 0.00017554931640625,
      "model_forward_time": 0.11461353302001953,
      "step": 28762
    },
    {
      "epoch": 0.00017554931640625,
      "step": 28762,
      "training_step_time": 0.44678449630737305
    },
    {
      "epoch": 0.000175555419921875,
      "model_forward_time": 0.1147007942199707,
      "step": 28763
    },
    {
      "epoch": 0.000175555419921875,
      "step": 28763,
      "training_step_time": 0.46204113960266113
    },
    {
      "epoch": 0.0001755615234375,
      "model_forward_time": 0.11541509628295898,
      "step": 28764
    },
    {
      "epoch": 0.0001755615234375,
      "step": 28764,
      "training_step_time": 0.4953644275665283
    },
    {
      "epoch": 0.000175567626953125,
      "model_forward_time": 0.11519789695739746,
      "step": 28765
    },
    {
      "epoch": 0.000175567626953125,
      "step": 28765,
      "training_step_time": 0.4549393653869629
    },
    {
      "epoch": 0.00017557373046875,
      "model_forward_time": 0.11473536491394043,
      "step": 28766
    },
    {
      "epoch": 0.00017557373046875,
      "step": 28766,
      "training_step_time": 0.3927161693572998
    },
    {
      "epoch": 0.000175579833984375,
      "model_forward_time": 0.11615562438964844,
      "step": 28767
    },
    {
      "epoch": 0.000175579833984375,
      "step": 28767,
      "training_step_time": 0.3957395553588867
    },
    {
      "epoch": 0.0001755859375,
      "model_forward_time": 0.11599946022033691,
      "step": 28768
    },
    {
      "epoch": 0.0001755859375,
      "step": 28768,
      "training_step_time": 0.384934663772583
    },
    {
      "epoch": 0.000175592041015625,
      "model_forward_time": 0.11543655395507812,
      "step": 28769
    },
    {
      "epoch": 0.000175592041015625,
      "step": 28769,
      "training_step_time": 0.3940725326538086
    },
    {
      "epoch": 0.00017559814453125,
      "grad_norm": 0.15502935647964478,
      "learning_rate": 5.74949319874235e-05,
      "loss": 0.0438,
      "step": 28770
    },
    {
      "epoch": 0.00017559814453125,
      "model_forward_time": 0.11618614196777344,
      "step": 28770
    },
    {
      "epoch": 0.00017559814453125,
      "step": 28770,
      "training_step_time": 0.43400073051452637
    },
    {
      "epoch": 0.000175604248046875,
      "model_forward_time": 0.11593794822692871,
      "step": 28771
    },
    {
      "epoch": 0.000175604248046875,
      "step": 28771,
      "training_step_time": 0.5651652812957764
    },
    {
      "epoch": 0.0001756103515625,
      "model_forward_time": 0.11486530303955078,
      "step": 28772
    },
    {
      "epoch": 0.0001756103515625,
      "step": 28772,
      "training_step_time": 0.37984728813171387
    },
    {
      "epoch": 0.000175616455078125,
      "model_forward_time": 0.11561059951782227,
      "step": 28773
    },
    {
      "epoch": 0.000175616455078125,
      "step": 28773,
      "training_step_time": 0.3920257091522217
    },
    {
      "epoch": 0.00017562255859375,
      "model_forward_time": 0.1154177188873291,
      "step": 28774
    },
    {
      "epoch": 0.00017562255859375,
      "step": 28774,
      "training_step_time": 0.3920152187347412
    },
    {
      "epoch": 0.000175628662109375,
      "model_forward_time": 0.1153404712677002,
      "step": 28775
    },
    {
      "epoch": 0.000175628662109375,
      "step": 28775,
      "training_step_time": 0.3848562240600586
    },
    {
      "epoch": 0.000175634765625,
      "model_forward_time": 0.11593389511108398,
      "step": 28776
    },
    {
      "epoch": 0.000175634765625,
      "step": 28776,
      "training_step_time": 0.483381986618042
    },
    {
      "epoch": 0.000175640869140625,
      "model_forward_time": 0.11607074737548828,
      "step": 28777
    },
    {
      "epoch": 0.000175640869140625,
      "step": 28777,
      "training_step_time": 0.6052646636962891
    },
    {
      "epoch": 0.00017564697265625,
      "model_forward_time": 0.1153256893157959,
      "step": 28778
    },
    {
      "epoch": 0.00017564697265625,
      "step": 28778,
      "training_step_time": 0.3942291736602783
    },
    {
      "epoch": 0.000175653076171875,
      "model_forward_time": 0.11638569831848145,
      "step": 28779
    },
    {
      "epoch": 0.000175653076171875,
      "step": 28779,
      "training_step_time": 0.41771984100341797
    },
    {
      "epoch": 0.0001756591796875,
      "grad_norm": 0.11598386615514755,
      "learning_rate": 5.746768438587245e-05,
      "loss": 0.0484,
      "step": 28780
    },
    {
      "epoch": 0.0001756591796875,
      "model_forward_time": 0.11560797691345215,
      "step": 28780
    },
    {
      "epoch": 0.0001756591796875,
      "step": 28780,
      "training_step_time": 0.38486671447753906
    },
    {
      "epoch": 0.000175665283203125,
      "model_forward_time": 0.11470866203308105,
      "step": 28781
    },
    {
      "epoch": 0.000175665283203125,
      "step": 28781,
      "training_step_time": 0.3880338668823242
    },
    {
      "epoch": 0.00017567138671875,
      "model_forward_time": 0.11507225036621094,
      "step": 28782
    },
    {
      "epoch": 0.00017567138671875,
      "step": 28782,
      "training_step_time": 0.39690113067626953
    },
    {
      "epoch": 0.000175677490234375,
      "model_forward_time": 0.1151878833770752,
      "step": 28783
    },
    {
      "epoch": 0.000175677490234375,
      "step": 28783,
      "training_step_time": 0.6860482692718506
    },
    {
      "epoch": 0.00017568359375,
      "model_forward_time": 0.11511516571044922,
      "step": 28784
    },
    {
      "epoch": 0.00017568359375,
      "step": 28784,
      "training_step_time": 0.4043459892272949
    },
    {
      "epoch": 0.000175689697265625,
      "model_forward_time": 0.11504101753234863,
      "step": 28785
    },
    {
      "epoch": 0.000175689697265625,
      "step": 28785,
      "training_step_time": 0.38840508460998535
    },
    {
      "epoch": 0.00017569580078125,
      "model_forward_time": 0.11492419242858887,
      "step": 28786
    },
    {
      "epoch": 0.00017569580078125,
      "step": 28786,
      "training_step_time": 0.3956434726715088
    },
    {
      "epoch": 0.000175701904296875,
      "model_forward_time": 0.11570286750793457,
      "step": 28787
    },
    {
      "epoch": 0.000175701904296875,
      "step": 28787,
      "training_step_time": 0.4124283790588379
    },
    {
      "epoch": 0.0001757080078125,
      "model_forward_time": 0.11469411849975586,
      "step": 28788
    },
    {
      "epoch": 0.0001757080078125,
      "step": 28788,
      "training_step_time": 0.4011831283569336
    },
    {
      "epoch": 0.000175714111328125,
      "model_forward_time": 0.11615228652954102,
      "step": 28789
    },
    {
      "epoch": 0.000175714111328125,
      "step": 28789,
      "training_step_time": 0.7010400295257568
    },
    {
      "epoch": 0.00017572021484375,
      "grad_norm": 0.16411451995372772,
      "learning_rate": 5.7440434515836064e-05,
      "loss": 0.0427,
      "step": 28790
    },
    {
      "epoch": 0.00017572021484375,
      "model_forward_time": 0.1148233413696289,
      "step": 28790
    },
    {
      "epoch": 0.00017572021484375,
      "step": 28790,
      "training_step_time": 0.44115781784057617
    },
    {
      "epoch": 0.000175726318359375,
      "model_forward_time": 0.11530637741088867,
      "step": 28791
    },
    {
      "epoch": 0.000175726318359375,
      "step": 28791,
      "training_step_time": 0.44533634185791016
    },
    {
      "epoch": 0.000175732421875,
      "model_forward_time": 0.1148233413696289,
      "step": 28792
    },
    {
      "epoch": 0.000175732421875,
      "step": 28792,
      "training_step_time": 0.49229907989501953
    },
    {
      "epoch": 0.000175738525390625,
      "model_forward_time": 0.11470746994018555,
      "step": 28793
    },
    {
      "epoch": 0.000175738525390625,
      "step": 28793,
      "training_step_time": 0.38945603370666504
    },
    {
      "epoch": 0.00017574462890625,
      "model_forward_time": 0.11461257934570312,
      "step": 28794
    },
    {
      "epoch": 0.00017574462890625,
      "step": 28794,
      "training_step_time": 0.3830068111419678
    },
    {
      "epoch": 0.000175750732421875,
      "model_forward_time": 0.11600422859191895,
      "step": 28795
    },
    {
      "epoch": 0.000175750732421875,
      "step": 28795,
      "training_step_time": 0.49854230880737305
    },
    {
      "epoch": 0.0001757568359375,
      "model_forward_time": 0.11527228355407715,
      "step": 28796
    },
    {
      "epoch": 0.0001757568359375,
      "step": 28796,
      "training_step_time": 0.4191315174102783
    },
    {
      "epoch": 0.000175762939453125,
      "model_forward_time": 0.11528706550598145,
      "step": 28797
    },
    {
      "epoch": 0.000175762939453125,
      "step": 28797,
      "training_step_time": 0.4009690284729004
    },
    {
      "epoch": 0.00017576904296875,
      "model_forward_time": 0.11483263969421387,
      "step": 28798
    },
    {
      "epoch": 0.00017576904296875,
      "step": 28798,
      "training_step_time": 0.38608622550964355
    },
    {
      "epoch": 0.000175775146484375,
      "model_forward_time": 0.11474967002868652,
      "step": 28799
    },
    {
      "epoch": 0.000175775146484375,
      "step": 28799,
      "training_step_time": 0.3909149169921875
    },
    {
      "epoch": 0.00017578125,
      "grad_norm": 0.12230910360813141,
      "learning_rate": 5.74131823855921e-05,
      "loss": 0.0416,
      "step": 28800
    },
    {
      "epoch": 0.00017578125,
      "model_forward_time": 0.11561274528503418,
      "step": 28800
    },
    {
      "epoch": 0.00017578125,
      "step": 28800,
      "training_step_time": 0.3948371410369873
    },
    {
      "epoch": 0.000175787353515625,
      "model_forward_time": 0.11523318290710449,
      "step": 28801
    },
    {
      "epoch": 0.000175787353515625,
      "step": 28801,
      "training_step_time": 0.7750387191772461
    },
    {
      "epoch": 0.00017579345703125,
      "model_forward_time": 0.11413192749023438,
      "step": 28802
    },
    {
      "epoch": 0.00017579345703125,
      "step": 28802,
      "training_step_time": 0.38229870796203613
    },
    {
      "epoch": 0.000175799560546875,
      "model_forward_time": 0.11507940292358398,
      "step": 28803
    },
    {
      "epoch": 0.000175799560546875,
      "step": 28803,
      "training_step_time": 0.4121830463409424
    },
    {
      "epoch": 0.0001758056640625,
      "model_forward_time": 0.11552739143371582,
      "step": 28804
    },
    {
      "epoch": 0.0001758056640625,
      "step": 28804,
      "training_step_time": 0.39496731758117676
    },
    {
      "epoch": 0.000175811767578125,
      "model_forward_time": 0.11488461494445801,
      "step": 28805
    },
    {
      "epoch": 0.000175811767578125,
      "step": 28805,
      "training_step_time": 0.44008731842041016
    },
    {
      "epoch": 0.00017581787109375,
      "model_forward_time": 0.11425185203552246,
      "step": 28806
    },
    {
      "epoch": 0.00017581787109375,
      "step": 28806,
      "training_step_time": 0.41061854362487793
    },
    {
      "epoch": 0.000175823974609375,
      "model_forward_time": 0.11503767967224121,
      "step": 28807
    },
    {
      "epoch": 0.000175823974609375,
      "step": 28807,
      "training_step_time": 0.4902524948120117
    },
    {
      "epoch": 0.000175830078125,
      "model_forward_time": 0.11524319648742676,
      "step": 28808
    },
    {
      "epoch": 0.000175830078125,
      "step": 28808,
      "training_step_time": 0.38973331451416016
    },
    {
      "epoch": 0.000175836181640625,
      "model_forward_time": 0.11502552032470703,
      "step": 28809
    },
    {
      "epoch": 0.000175836181640625,
      "step": 28809,
      "training_step_time": 0.4142735004425049
    },
    {
      "epoch": 0.00017584228515625,
      "grad_norm": 0.16971899569034576,
      "learning_rate": 5.7385928003419085e-05,
      "loss": 0.0429,
      "step": 28810
    },
    {
      "epoch": 0.00017584228515625,
      "model_forward_time": 0.11430835723876953,
      "step": 28810
    },
    {
      "epoch": 0.00017584228515625,
      "step": 28810,
      "training_step_time": 0.3825864791870117
    },
    {
      "epoch": 0.000175848388671875,
      "model_forward_time": 0.11579346656799316,
      "step": 28811
    },
    {
      "epoch": 0.000175848388671875,
      "step": 28811,
      "training_step_time": 0.3995521068572998
    },
    {
      "epoch": 0.0001758544921875,
      "model_forward_time": 0.1150217056274414,
      "step": 28812
    },
    {
      "epoch": 0.0001758544921875,
      "step": 28812,
      "training_step_time": 0.39569091796875
    },
    {
      "epoch": 0.000175860595703125,
      "model_forward_time": 0.11582422256469727,
      "step": 28813
    },
    {
      "epoch": 0.000175860595703125,
      "step": 28813,
      "training_step_time": 0.6066572666168213
    },
    {
      "epoch": 0.00017586669921875,
      "model_forward_time": 0.11532473564147949,
      "step": 28814
    },
    {
      "epoch": 0.00017586669921875,
      "step": 28814,
      "training_step_time": 0.3910963535308838
    },
    {
      "epoch": 0.000175872802734375,
      "model_forward_time": 0.11501932144165039,
      "step": 28815
    },
    {
      "epoch": 0.000175872802734375,
      "step": 28815,
      "training_step_time": 0.38159990310668945
    },
    {
      "epoch": 0.00017587890625,
      "model_forward_time": 0.11543941497802734,
      "step": 28816
    },
    {
      "epoch": 0.00017587890625,
      "step": 28816,
      "training_step_time": 0.38571763038635254
    },
    {
      "epoch": 0.000175885009765625,
      "model_forward_time": 0.11563754081726074,
      "step": 28817
    },
    {
      "epoch": 0.000175885009765625,
      "step": 28817,
      "training_step_time": 0.450559139251709
    },
    {
      "epoch": 0.00017589111328125,
      "model_forward_time": 0.1148684024810791,
      "step": 28818
    },
    {
      "epoch": 0.00017589111328125,
      "step": 28818,
      "training_step_time": 0.40105700492858887
    },
    {
      "epoch": 0.000175897216796875,
      "model_forward_time": 0.11508965492248535,
      "step": 28819
    },
    {
      "epoch": 0.000175897216796875,
      "step": 28819,
      "training_step_time": 0.5667762756347656
    },
    {
      "epoch": 0.0001759033203125,
      "grad_norm": 0.11488042026758194,
      "learning_rate": 5.735867137759615e-05,
      "loss": 0.0423,
      "step": 28820
    },
    {
      "epoch": 0.0001759033203125,
      "model_forward_time": 0.11552715301513672,
      "step": 28820
    },
    {
      "epoch": 0.0001759033203125,
      "step": 28820,
      "training_step_time": 0.4803438186645508
    },
    {
      "epoch": 0.000175909423828125,
      "model_forward_time": 0.11567521095275879,
      "step": 28821
    },
    {
      "epoch": 0.000175909423828125,
      "step": 28821,
      "training_step_time": 0.3937091827392578
    },
    {
      "epoch": 0.00017591552734375,
      "model_forward_time": 0.11448407173156738,
      "step": 28822
    },
    {
      "epoch": 0.00017591552734375,
      "step": 28822,
      "training_step_time": 0.44328808784484863
    },
    {
      "epoch": 0.000175921630859375,
      "model_forward_time": 0.11434054374694824,
      "step": 28823
    },
    {
      "epoch": 0.000175921630859375,
      "step": 28823,
      "training_step_time": 0.3925960063934326
    },
    {
      "epoch": 0.000175927734375,
      "model_forward_time": 0.1158304214477539,
      "step": 28824
    },
    {
      "epoch": 0.000175927734375,
      "step": 28824,
      "training_step_time": 0.4091947078704834
    },
    {
      "epoch": 0.000175933837890625,
      "model_forward_time": 0.11471748352050781,
      "step": 28825
    },
    {
      "epoch": 0.000175933837890625,
      "step": 28825,
      "training_step_time": 0.4675469398498535
    },
    {
      "epoch": 0.00017593994140625,
      "model_forward_time": 0.11573600769042969,
      "step": 28826
    },
    {
      "epoch": 0.00017593994140625,
      "step": 28826,
      "training_step_time": 0.3893101215362549
    },
    {
      "epoch": 0.000175946044921875,
      "model_forward_time": 0.1156773567199707,
      "step": 28827
    },
    {
      "epoch": 0.000175946044921875,
      "step": 28827,
      "training_step_time": 0.3875281810760498
    },
    {
      "epoch": 0.0001759521484375,
      "model_forward_time": 0.11504650115966797,
      "step": 28828
    },
    {
      "epoch": 0.0001759521484375,
      "step": 28828,
      "training_step_time": 0.3806643486022949
    },
    {
      "epoch": 0.000175958251953125,
      "model_forward_time": 0.11484718322753906,
      "step": 28829
    },
    {
      "epoch": 0.000175958251953125,
      "step": 28829,
      "training_step_time": 0.3891282081604004
    },
    {
      "epoch": 0.00017596435546875,
      "grad_norm": 0.10996624827384949,
      "learning_rate": 5.733141251640315e-05,
      "loss": 0.0376,
      "step": 28830
    },
    {
      "epoch": 0.00017596435546875,
      "model_forward_time": 0.1151731014251709,
      "step": 28830
    },
    {
      "epoch": 0.00017596435546875,
      "step": 28830,
      "training_step_time": 0.396714448928833
    },
    {
      "epoch": 0.000175970458984375,
      "model_forward_time": 0.11510705947875977,
      "step": 28831
    },
    {
      "epoch": 0.000175970458984375,
      "step": 28831,
      "training_step_time": 0.642899751663208
    },
    {
      "epoch": 0.0001759765625,
      "model_forward_time": 0.11496853828430176,
      "step": 28832
    },
    {
      "epoch": 0.0001759765625,
      "step": 28832,
      "training_step_time": 0.4449141025543213
    },
    {
      "epoch": 0.000175982666015625,
      "model_forward_time": 0.11476778984069824,
      "step": 28833
    },
    {
      "epoch": 0.000175982666015625,
      "step": 28833,
      "training_step_time": 0.39124560356140137
    },
    {
      "epoch": 0.00017598876953125,
      "model_forward_time": 0.11571264266967773,
      "step": 28834
    },
    {
      "epoch": 0.00017598876953125,
      "step": 28834,
      "training_step_time": 0.3930373191833496
    },
    {
      "epoch": 0.000175994873046875,
      "model_forward_time": 0.11487650871276855,
      "step": 28835
    },
    {
      "epoch": 0.000175994873046875,
      "step": 28835,
      "training_step_time": 0.40338706970214844
    },
    {
      "epoch": 0.0001760009765625,
      "model_forward_time": 0.11468982696533203,
      "step": 28836
    },
    {
      "epoch": 0.0001760009765625,
      "step": 28836,
      "training_step_time": 0.3927586078643799
    },
    {
      "epoch": 0.000176007080078125,
      "model_forward_time": 0.1147608757019043,
      "step": 28837
    },
    {
      "epoch": 0.000176007080078125,
      "step": 28837,
      "training_step_time": 0.49597620964050293
    },
    {
      "epoch": 0.00017601318359375,
      "model_forward_time": 0.11533665657043457,
      "step": 28838
    },
    {
      "epoch": 0.00017601318359375,
      "step": 28838,
      "training_step_time": 0.3865170478820801
    },
    {
      "epoch": 0.000176019287109375,
      "model_forward_time": 0.11562895774841309,
      "step": 28839
    },
    {
      "epoch": 0.000176019287109375,
      "step": 28839,
      "training_step_time": 0.4002079963684082
    },
    {
      "epoch": 0.000176025390625,
      "grad_norm": 0.1628565639257431,
      "learning_rate": 5.730415142812059e-05,
      "loss": 0.039,
      "step": 28840
    },
    {
      "epoch": 0.000176025390625,
      "model_forward_time": 0.11534953117370605,
      "step": 28840
    },
    {
      "epoch": 0.000176025390625,
      "step": 28840,
      "training_step_time": 0.38735127449035645
    },
    {
      "epoch": 0.000176031494140625,
      "model_forward_time": 0.11503887176513672,
      "step": 28841
    },
    {
      "epoch": 0.000176031494140625,
      "step": 28841,
      "training_step_time": 0.3862185478210449
    },
    {
      "epoch": 0.00017603759765625,
      "model_forward_time": 0.11500692367553711,
      "step": 28842
    },
    {
      "epoch": 0.00017603759765625,
      "step": 28842,
      "training_step_time": 0.4081442356109619
    },
    {
      "epoch": 0.000176043701171875,
      "model_forward_time": 0.11595892906188965,
      "step": 28843
    },
    {
      "epoch": 0.000176043701171875,
      "step": 28843,
      "training_step_time": 0.7123160362243652
    },
    {
      "epoch": 0.0001760498046875,
      "model_forward_time": 0.11458420753479004,
      "step": 28844
    },
    {
      "epoch": 0.0001760498046875,
      "step": 28844,
      "training_step_time": 0.440138578414917
    },
    {
      "epoch": 0.000176055908203125,
      "model_forward_time": 0.1156158447265625,
      "step": 28845
    },
    {
      "epoch": 0.000176055908203125,
      "step": 28845,
      "training_step_time": 0.3884255886077881
    },
    {
      "epoch": 0.00017606201171875,
      "model_forward_time": 0.12567663192749023,
      "step": 28846
    },
    {
      "epoch": 0.00017606201171875,
      "step": 28846,
      "training_step_time": 0.3835790157318115
    },
    {
      "epoch": 0.000176068115234375,
      "model_forward_time": 0.11755180358886719,
      "step": 28847
    },
    {
      "epoch": 0.000176068115234375,
      "step": 28847,
      "training_step_time": 0.4163820743560791
    },
    {
      "epoch": 0.00017607421875,
      "model_forward_time": 0.1179654598236084,
      "step": 28848
    },
    {
      "epoch": 0.00017607421875,
      "step": 28848,
      "training_step_time": 0.4019036293029785
    },
    {
      "epoch": 0.000176080322265625,
      "model_forward_time": 0.11853241920471191,
      "step": 28849
    },
    {
      "epoch": 0.000176080322265625,
      "step": 28849,
      "training_step_time": 0.4119105339050293
    },
    {
      "epoch": 0.00017608642578125,
      "grad_norm": 0.16865521669387817,
      "learning_rate": 5.727688812102967e-05,
      "loss": 0.0447,
      "step": 28850
    },
    {
      "epoch": 0.00017608642578125,
      "model_forward_time": 0.1178128719329834,
      "step": 28850
    },
    {
      "epoch": 0.00017608642578125,
      "step": 28850,
      "training_step_time": 0.38343071937561035
    },
    {
      "epoch": 0.000176092529296875,
      "model_forward_time": 0.11575627326965332,
      "step": 28851
    },
    {
      "epoch": 0.000176092529296875,
      "step": 28851,
      "training_step_time": 0.37955570220947266
    },
    {
      "epoch": 0.0001760986328125,
      "model_forward_time": 0.11643028259277344,
      "step": 28852
    },
    {
      "epoch": 0.0001760986328125,
      "step": 28852,
      "training_step_time": 0.3887333869934082
    },
    {
      "epoch": 0.000176104736328125,
      "model_forward_time": 0.11503148078918457,
      "step": 28853
    },
    {
      "epoch": 0.000176104736328125,
      "step": 28853,
      "training_step_time": 0.38886499404907227
    },
    {
      "epoch": 0.00017611083984375,
      "model_forward_time": 0.11609649658203125,
      "step": 28854
    },
    {
      "epoch": 0.00017611083984375,
      "step": 28854,
      "training_step_time": 0.3843545913696289
    },
    {
      "epoch": 0.000176116943359375,
      "model_forward_time": 0.11611652374267578,
      "step": 28855
    },
    {
      "epoch": 0.000176116943359375,
      "step": 28855,
      "training_step_time": 0.7892630100250244
    },
    {
      "epoch": 0.000176123046875,
      "model_forward_time": 0.11488986015319824,
      "step": 28856
    },
    {
      "epoch": 0.000176123046875,
      "step": 28856,
      "training_step_time": 0.40877461433410645
    },
    {
      "epoch": 0.000176129150390625,
      "model_forward_time": 0.11561775207519531,
      "step": 28857
    },
    {
      "epoch": 0.000176129150390625,
      "step": 28857,
      "training_step_time": 0.386380672454834
    },
    {
      "epoch": 0.00017613525390625,
      "model_forward_time": 0.11540699005126953,
      "step": 28858
    },
    {
      "epoch": 0.00017613525390625,
      "step": 28858,
      "training_step_time": 0.4887416362762451
    },
    {
      "epoch": 0.000176141357421875,
      "model_forward_time": 0.11494660377502441,
      "step": 28859
    },
    {
      "epoch": 0.000176141357421875,
      "step": 28859,
      "training_step_time": 0.4098789691925049
    },
    {
      "epoch": 0.0001761474609375,
      "grad_norm": 0.0945851132273674,
      "learning_rate": 5.72496226034123e-05,
      "loss": 0.0429,
      "step": 28860
    },
    {
      "epoch": 0.0001761474609375,
      "model_forward_time": 0.11441922187805176,
      "step": 28860
    },
    {
      "epoch": 0.0001761474609375,
      "step": 28860,
      "training_step_time": 0.3708028793334961
    },
    {
      "epoch": 0.000176153564453125,
      "model_forward_time": 0.11498284339904785,
      "step": 28861
    },
    {
      "epoch": 0.000176153564453125,
      "step": 28861,
      "training_step_time": 0.4325575828552246
    },
    {
      "epoch": 0.00017615966796875,
      "model_forward_time": 0.11483001708984375,
      "step": 28862
    },
    {
      "epoch": 0.00017615966796875,
      "step": 28862,
      "training_step_time": 0.4147005081176758
    },
    {
      "epoch": 0.000176165771484375,
      "model_forward_time": 0.11492061614990234,
      "step": 28863
    },
    {
      "epoch": 0.000176165771484375,
      "step": 28863,
      "training_step_time": 0.392486572265625
    },
    {
      "epoch": 0.000176171875,
      "model_forward_time": 0.11538147926330566,
      "step": 28864
    },
    {
      "epoch": 0.000176171875,
      "step": 28864,
      "training_step_time": 0.3972630500793457
    },
    {
      "epoch": 0.000176177978515625,
      "model_forward_time": 0.11516046524047852,
      "step": 28865
    },
    {
      "epoch": 0.000176177978515625,
      "step": 28865,
      "training_step_time": 0.40606236457824707
    },
    {
      "epoch": 0.00017618408203125,
      "model_forward_time": 0.11554527282714844,
      "step": 28866
    },
    {
      "epoch": 0.00017618408203125,
      "step": 28866,
      "training_step_time": 0.39977288246154785
    },
    {
      "epoch": 0.000176190185546875,
      "model_forward_time": 0.11521720886230469,
      "step": 28867
    },
    {
      "epoch": 0.000176190185546875,
      "step": 28867,
      "training_step_time": 0.5443334579467773
    },
    {
      "epoch": 0.0001761962890625,
      "model_forward_time": 0.1154942512512207,
      "step": 28868
    },
    {
      "epoch": 0.0001761962890625,
      "step": 28868,
      "training_step_time": 0.3897831439971924
    },
    {
      "epoch": 0.000176202392578125,
      "model_forward_time": 0.11539292335510254,
      "step": 28869
    },
    {
      "epoch": 0.000176202392578125,
      "step": 28869,
      "training_step_time": 0.39255785942077637
    },
    {
      "epoch": 0.00017620849609375,
      "grad_norm": 0.1585257649421692,
      "learning_rate": 5.722235488355099e-05,
      "loss": 0.0463,
      "step": 28870
    },
    {
      "epoch": 0.00017620849609375,
      "model_forward_time": 0.11513328552246094,
      "step": 28870
    },
    {
      "epoch": 0.00017620849609375,
      "step": 28870,
      "training_step_time": 0.3989553451538086
    },
    {
      "epoch": 0.000176214599609375,
      "model_forward_time": 0.11651444435119629,
      "step": 28871
    },
    {
      "epoch": 0.000176214599609375,
      "step": 28871,
      "training_step_time": 0.3843815326690674
    },
    {
      "epoch": 0.000176220703125,
      "model_forward_time": 0.11496829986572266,
      "step": 28872
    },
    {
      "epoch": 0.000176220703125,
      "step": 28872,
      "training_step_time": 0.5115640163421631
    },
    {
      "epoch": 0.000176226806640625,
      "model_forward_time": 0.1150517463684082,
      "step": 28873
    },
    {
      "epoch": 0.000176226806640625,
      "step": 28873,
      "training_step_time": 0.4989969730377197
    },
    {
      "epoch": 0.00017623291015625,
      "model_forward_time": 0.11465597152709961,
      "step": 28874
    },
    {
      "epoch": 0.00017623291015625,
      "step": 28874,
      "training_step_time": 0.36635494232177734
    },
    {
      "epoch": 0.000176239013671875,
      "model_forward_time": 0.11526608467102051,
      "step": 28875
    },
    {
      "epoch": 0.000176239013671875,
      "step": 28875,
      "training_step_time": 0.46454596519470215
    },
    {
      "epoch": 0.0001762451171875,
      "model_forward_time": 0.11476731300354004,
      "step": 28876
    },
    {
      "epoch": 0.0001762451171875,
      "step": 28876,
      "training_step_time": 0.4199187755584717
    },
    {
      "epoch": 0.000176251220703125,
      "model_forward_time": 0.11529922485351562,
      "step": 28877
    },
    {
      "epoch": 0.000176251220703125,
      "step": 28877,
      "training_step_time": 0.3844640254974365
    },
    {
      "epoch": 0.00017625732421875,
      "model_forward_time": 0.11593937873840332,
      "step": 28878
    },
    {
      "epoch": 0.00017625732421875,
      "step": 28878,
      "training_step_time": 0.3916339874267578
    },
    {
      "epoch": 0.000176263427734375,
      "model_forward_time": 0.11554908752441406,
      "step": 28879
    },
    {
      "epoch": 0.000176263427734375,
      "step": 28879,
      "training_step_time": 0.3937568664550781
    },
    {
      "epoch": 0.00017626953125,
      "grad_norm": 0.14111411571502686,
      "learning_rate": 5.719508496972896e-05,
      "loss": 0.0403,
      "step": 28880
    },
    {
      "epoch": 0.00017626953125,
      "model_forward_time": 0.11526155471801758,
      "step": 28880
    },
    {
      "epoch": 0.00017626953125,
      "step": 28880,
      "training_step_time": 0.3989260196685791
    },
    {
      "epoch": 0.000176275634765625,
      "model_forward_time": 0.11621952056884766,
      "step": 28881
    },
    {
      "epoch": 0.000176275634765625,
      "step": 28881,
      "training_step_time": 0.38781118392944336
    },
    {
      "epoch": 0.00017628173828125,
      "model_forward_time": 0.1150810718536377,
      "step": 28882
    },
    {
      "epoch": 0.00017628173828125,
      "step": 28882,
      "training_step_time": 0.38045501708984375
    },
    {
      "epoch": 0.000176287841796875,
      "model_forward_time": 0.11555314064025879,
      "step": 28883
    },
    {
      "epoch": 0.000176287841796875,
      "step": 28883,
      "training_step_time": 0.39749836921691895
    },
    {
      "epoch": 0.0001762939453125,
      "model_forward_time": 0.11536407470703125,
      "step": 28884
    },
    {
      "epoch": 0.0001762939453125,
      "step": 28884,
      "training_step_time": 0.3906998634338379
    },
    {
      "epoch": 0.000176300048828125,
      "model_forward_time": 0.11672806739807129,
      "step": 28885
    },
    {
      "epoch": 0.000176300048828125,
      "step": 28885,
      "training_step_time": 0.642493724822998
    },
    {
      "epoch": 0.00017630615234375,
      "model_forward_time": 0.11529207229614258,
      "step": 28886
    },
    {
      "epoch": 0.00017630615234375,
      "step": 28886,
      "training_step_time": 0.5208673477172852
    },
    {
      "epoch": 0.000176312255859375,
      "model_forward_time": 0.11520051956176758,
      "step": 28887
    },
    {
      "epoch": 0.000176312255859375,
      "step": 28887,
      "training_step_time": 0.40398550033569336
    },
    {
      "epoch": 0.000176318359375,
      "model_forward_time": 0.11491870880126953,
      "step": 28888
    },
    {
      "epoch": 0.000176318359375,
      "step": 28888,
      "training_step_time": 0.4978628158569336
    },
    {
      "epoch": 0.000176324462890625,
      "model_forward_time": 0.11502981185913086,
      "step": 28889
    },
    {
      "epoch": 0.000176324462890625,
      "step": 28889,
      "training_step_time": 0.38904714584350586
    },
    {
      "epoch": 0.00017633056640625,
      "grad_norm": 0.13689003884792328,
      "learning_rate": 5.7167812870230094e-05,
      "loss": 0.04,
      "step": 28890
    },
    {
      "epoch": 0.00017633056640625,
      "model_forward_time": 0.1147770881652832,
      "step": 28890
    },
    {
      "epoch": 0.00017633056640625,
      "step": 28890,
      "training_step_time": 0.4512636661529541
    },
    {
      "epoch": 0.000176336669921875,
      "model_forward_time": 0.11503720283508301,
      "step": 28891
    },
    {
      "epoch": 0.000176336669921875,
      "step": 28891,
      "training_step_time": 0.40890026092529297
    },
    {
      "epoch": 0.0001763427734375,
      "model_forward_time": 0.11565756797790527,
      "step": 28892
    },
    {
      "epoch": 0.0001763427734375,
      "step": 28892,
      "training_step_time": 0.39536356925964355
    },
    {
      "epoch": 0.000176348876953125,
      "model_forward_time": 0.11509323120117188,
      "step": 28893
    },
    {
      "epoch": 0.000176348876953125,
      "step": 28893,
      "training_step_time": 0.3967735767364502
    },
    {
      "epoch": 0.00017635498046875,
      "model_forward_time": 0.11497664451599121,
      "step": 28894
    },
    {
      "epoch": 0.00017635498046875,
      "step": 28894,
      "training_step_time": 0.3954601287841797
    },
    {
      "epoch": 0.000176361083984375,
      "model_forward_time": 0.11561870574951172,
      "step": 28895
    },
    {
      "epoch": 0.000176361083984375,
      "step": 28895,
      "training_step_time": 0.3902401924133301
    },
    {
      "epoch": 0.0001763671875,
      "model_forward_time": 0.11549735069274902,
      "step": 28896
    },
    {
      "epoch": 0.0001763671875,
      "step": 28896,
      "training_step_time": 0.3963601589202881
    },
    {
      "epoch": 0.000176373291015625,
      "model_forward_time": 0.11530089378356934,
      "step": 28897
    },
    {
      "epoch": 0.000176373291015625,
      "step": 28897,
      "training_step_time": 0.5434615612030029
    },
    {
      "epoch": 0.00017637939453125,
      "model_forward_time": 0.11524367332458496,
      "step": 28898
    },
    {
      "epoch": 0.00017637939453125,
      "step": 28898,
      "training_step_time": 0.390272855758667
    },
    {
      "epoch": 0.000176385498046875,
      "model_forward_time": 0.11467719078063965,
      "step": 28899
    },
    {
      "epoch": 0.000176385498046875,
      "step": 28899,
      "training_step_time": 0.38142943382263184
    },
    {
      "epoch": 0.0001763916015625,
      "grad_norm": 0.10555293411016464,
      "learning_rate": 5.714053859333893e-05,
      "loss": 0.0444,
      "step": 28900
    },
    {
      "epoch": 0.0001763916015625,
      "model_forward_time": 0.11516976356506348,
      "step": 28900
    },
    {
      "epoch": 0.0001763916015625,
      "step": 28900,
      "training_step_time": 0.3885366916656494
    },
    {
      "epoch": 0.000176397705078125,
      "model_forward_time": 0.11513614654541016,
      "step": 28901
    },
    {
      "epoch": 0.000176397705078125,
      "step": 28901,
      "training_step_time": 0.40873146057128906
    },
    {
      "epoch": 0.00017640380859375,
      "model_forward_time": 0.11458730697631836,
      "step": 28902
    },
    {
      "epoch": 0.00017640380859375,
      "step": 28902,
      "training_step_time": 0.4754924774169922
    },
    {
      "epoch": 0.000176409912109375,
      "model_forward_time": 0.11570096015930176,
      "step": 28903
    },
    {
      "epoch": 0.000176409912109375,
      "step": 28903,
      "training_step_time": 0.6566107273101807
    },
    {
      "epoch": 0.000176416015625,
      "model_forward_time": 0.11420583724975586,
      "step": 28904
    },
    {
      "epoch": 0.000176416015625,
      "step": 28904,
      "training_step_time": 0.4177432060241699
    },
    {
      "epoch": 0.000176422119140625,
      "model_forward_time": 0.11501812934875488,
      "step": 28905
    },
    {
      "epoch": 0.000176422119140625,
      "step": 28905,
      "training_step_time": 0.41579556465148926
    },
    {
      "epoch": 0.00017642822265625,
      "model_forward_time": 0.11577248573303223,
      "step": 28906
    },
    {
      "epoch": 0.00017642822265625,
      "step": 28906,
      "training_step_time": 0.39678525924682617
    },
    {
      "epoch": 0.000176434326171875,
      "model_forward_time": 0.11501431465148926,
      "step": 28907
    },
    {
      "epoch": 0.000176434326171875,
      "step": 28907,
      "training_step_time": 0.387087345123291
    },
    {
      "epoch": 0.0001764404296875,
      "model_forward_time": 0.11449909210205078,
      "step": 28908
    },
    {
      "epoch": 0.0001764404296875,
      "step": 28908,
      "training_step_time": 0.3996772766113281
    },
    {
      "epoch": 0.000176446533203125,
      "model_forward_time": 0.11539006233215332,
      "step": 28909
    },
    {
      "epoch": 0.000176446533203125,
      "step": 28909,
      "training_step_time": 0.6351630687713623
    },
    {
      "epoch": 0.00017645263671875,
      "grad_norm": 0.1125701442360878,
      "learning_rate": 5.711326214734069e-05,
      "loss": 0.042,
      "step": 28910
    },
    {
      "epoch": 0.00017645263671875,
      "model_forward_time": 0.11441421508789062,
      "step": 28910
    },
    {
      "epoch": 0.00017645263671875,
      "step": 28910,
      "training_step_time": 0.39201903343200684
    },
    {
      "epoch": 0.000176458740234375,
      "model_forward_time": 0.11513924598693848,
      "step": 28911
    },
    {
      "epoch": 0.000176458740234375,
      "step": 28911,
      "training_step_time": 0.3935263156890869
    },
    {
      "epoch": 0.00017646484375,
      "model_forward_time": 0.11648917198181152,
      "step": 28912
    },
    {
      "epoch": 0.00017646484375,
      "step": 28912,
      "training_step_time": 0.3867988586425781
    },
    {
      "epoch": 0.000176470947265625,
      "model_forward_time": 0.11484360694885254,
      "step": 28913
    },
    {
      "epoch": 0.000176470947265625,
      "step": 28913,
      "training_step_time": 0.40645766258239746
    },
    {
      "epoch": 0.00017647705078125,
      "model_forward_time": 0.11484813690185547,
      "step": 28914
    },
    {
      "epoch": 0.00017647705078125,
      "step": 28914,
      "training_step_time": 0.42531728744506836
    },
    {
      "epoch": 0.000176483154296875,
      "model_forward_time": 0.11542582511901855,
      "step": 28915
    },
    {
      "epoch": 0.000176483154296875,
      "step": 28915,
      "training_step_time": 0.6991567611694336
    },
    {
      "epoch": 0.0001764892578125,
      "model_forward_time": 0.11490178108215332,
      "step": 28916
    },
    {
      "epoch": 0.0001764892578125,
      "step": 28916,
      "training_step_time": 0.41271376609802246
    },
    {
      "epoch": 0.000176495361328125,
      "model_forward_time": 0.1147308349609375,
      "step": 28917
    },
    {
      "epoch": 0.000176495361328125,
      "step": 28917,
      "training_step_time": 0.4941086769104004
    },
    {
      "epoch": 0.00017650146484375,
      "model_forward_time": 0.11765122413635254,
      "step": 28918
    },
    {
      "epoch": 0.00017650146484375,
      "step": 28918,
      "training_step_time": 0.4660191535949707
    },
    {
      "epoch": 0.000176507568359375,
      "model_forward_time": 0.11572933197021484,
      "step": 28919
    },
    {
      "epoch": 0.000176507568359375,
      "step": 28919,
      "training_step_time": 0.3834562301635742
    },
    {
      "epoch": 0.000176513671875,
      "grad_norm": 0.11955591291189194,
      "learning_rate": 5.7085983540521216e-05,
      "loss": 0.0414,
      "step": 28920
    },
    {
      "epoch": 0.000176513671875,
      "model_forward_time": 0.11481690406799316,
      "step": 28920
    },
    {
      "epoch": 0.000176513671875,
      "step": 28920,
      "training_step_time": 0.38349413871765137
    },
    {
      "epoch": 0.000176519775390625,
      "model_forward_time": 0.11513066291809082,
      "step": 28921
    },
    {
      "epoch": 0.000176519775390625,
      "step": 28921,
      "training_step_time": 0.3982555866241455
    },
    {
      "epoch": 0.00017652587890625,
      "model_forward_time": 0.11502909660339355,
      "step": 28922
    },
    {
      "epoch": 0.00017652587890625,
      "step": 28922,
      "training_step_time": 0.3915596008300781
    },
    {
      "epoch": 0.000176531982421875,
      "model_forward_time": 0.11516547203063965,
      "step": 28923
    },
    {
      "epoch": 0.000176531982421875,
      "step": 28923,
      "training_step_time": 0.3806438446044922
    },
    {
      "epoch": 0.0001765380859375,
      "model_forward_time": 0.11526966094970703,
      "step": 28924
    },
    {
      "epoch": 0.0001765380859375,
      "step": 28924,
      "training_step_time": 0.3907127380371094
    },
    {
      "epoch": 0.000176544189453125,
      "model_forward_time": 0.11552214622497559,
      "step": 28925
    },
    {
      "epoch": 0.000176544189453125,
      "step": 28925,
      "training_step_time": 0.40107107162475586
    },
    {
      "epoch": 0.00017655029296875,
      "model_forward_time": 0.11536288261413574,
      "step": 28926
    },
    {
      "epoch": 0.00017655029296875,
      "step": 28926,
      "training_step_time": 0.408644437789917
    },
    {
      "epoch": 0.000176556396484375,
      "model_forward_time": 0.11614370346069336,
      "step": 28927
    },
    {
      "epoch": 0.000176556396484375,
      "step": 28927,
      "training_step_time": 0.5639157295227051
    },
    {
      "epoch": 0.0001765625,
      "model_forward_time": 0.11519694328308105,
      "step": 28928
    },
    {
      "epoch": 0.0001765625,
      "step": 28928,
      "training_step_time": 0.4766080379486084
    },
    {
      "epoch": 0.000176568603515625,
      "model_forward_time": 0.1155850887298584,
      "step": 28929
    },
    {
      "epoch": 0.000176568603515625,
      "step": 28929,
      "training_step_time": 0.40795350074768066
    },
    {
      "epoch": 0.00017657470703125,
      "grad_norm": 0.12002721428871155,
      "learning_rate": 5.705870278116703e-05,
      "loss": 0.0471,
      "step": 28930
    },
    {
      "epoch": 0.00017657470703125,
      "model_forward_time": 0.11498332023620605,
      "step": 28930
    },
    {
      "epoch": 0.00017657470703125,
      "step": 28930,
      "training_step_time": 0.48192501068115234
    },
    {
      "epoch": 0.000176580810546875,
      "model_forward_time": 0.11455559730529785,
      "step": 28931
    },
    {
      "epoch": 0.000176580810546875,
      "step": 28931,
      "training_step_time": 0.47475266456604004
    },
    {
      "epoch": 0.0001765869140625,
      "model_forward_time": 0.11561203002929688,
      "step": 28932
    },
    {
      "epoch": 0.0001765869140625,
      "step": 28932,
      "training_step_time": 0.48334574699401855
    },
    {
      "epoch": 0.000176593017578125,
      "model_forward_time": 0.11442756652832031,
      "step": 28933
    },
    {
      "epoch": 0.000176593017578125,
      "step": 28933,
      "training_step_time": 0.39433979988098145
    },
    {
      "epoch": 0.00017659912109375,
      "model_forward_time": 0.11505413055419922,
      "step": 28934
    },
    {
      "epoch": 0.00017659912109375,
      "step": 28934,
      "training_step_time": 0.3834207057952881
    },
    {
      "epoch": 0.000176605224609375,
      "model_forward_time": 0.11591982841491699,
      "step": 28935
    },
    {
      "epoch": 0.000176605224609375,
      "step": 28935,
      "training_step_time": 0.3965632915496826
    },
    {
      "epoch": 0.000176611328125,
      "model_forward_time": 0.11493730545043945,
      "step": 28936
    },
    {
      "epoch": 0.000176611328125,
      "step": 28936,
      "training_step_time": 0.3910558223724365
    },
    {
      "epoch": 0.000176617431640625,
      "model_forward_time": 0.11523175239562988,
      "step": 28937
    },
    {
      "epoch": 0.000176617431640625,
      "step": 28937,
      "training_step_time": 0.4082210063934326
    },
    {
      "epoch": 0.00017662353515625,
      "model_forward_time": 0.11552643775939941,
      "step": 28938
    },
    {
      "epoch": 0.00017662353515625,
      "step": 28938,
      "training_step_time": 0.404766321182251
    },
    {
      "epoch": 0.000176629638671875,
      "model_forward_time": 0.11553549766540527,
      "step": 28939
    },
    {
      "epoch": 0.000176629638671875,
      "step": 28939,
      "training_step_time": 0.4202747344970703
    },
    {
      "epoch": 0.0001766357421875,
      "grad_norm": 0.11505846679210663,
      "learning_rate": 5.7031419877565317e-05,
      "loss": 0.0352,
      "step": 28940
    },
    {
      "epoch": 0.0001766357421875,
      "model_forward_time": 0.11495089530944824,
      "step": 28940
    },
    {
      "epoch": 0.0001766357421875,
      "step": 28940,
      "training_step_time": 0.3734006881713867
    },
    {
      "epoch": 0.000176641845703125,
      "model_forward_time": 0.11508893966674805,
      "step": 28941
    },
    {
      "epoch": 0.000176641845703125,
      "step": 28941,
      "training_step_time": 0.3966491222381592
    },
    {
      "epoch": 0.00017664794921875,
      "model_forward_time": 0.11631274223327637,
      "step": 28942
    },
    {
      "epoch": 0.00017664794921875,
      "step": 28942,
      "training_step_time": 0.39717698097229004
    },
    {
      "epoch": 0.000176654052734375,
      "model_forward_time": 0.11630940437316895,
      "step": 28943
    },
    {
      "epoch": 0.000176654052734375,
      "step": 28943,
      "training_step_time": 0.44324803352355957
    },
    {
      "epoch": 0.00017666015625,
      "model_forward_time": 0.11562991142272949,
      "step": 28944
    },
    {
      "epoch": 0.00017666015625,
      "step": 28944,
      "training_step_time": 0.5217509269714355
    },
    {
      "epoch": 0.000176666259765625,
      "model_forward_time": 0.11561346054077148,
      "step": 28945
    },
    {
      "epoch": 0.000176666259765625,
      "step": 28945,
      "training_step_time": 0.4975152015686035
    },
    {
      "epoch": 0.00017667236328125,
      "model_forward_time": 0.11471319198608398,
      "step": 28946
    },
    {
      "epoch": 0.00017667236328125,
      "step": 28946,
      "training_step_time": 0.3777773380279541
    },
    {
      "epoch": 0.000176678466796875,
      "model_forward_time": 0.11514449119567871,
      "step": 28947
    },
    {
      "epoch": 0.000176678466796875,
      "step": 28947,
      "training_step_time": 0.3909764289855957
    },
    {
      "epoch": 0.0001766845703125,
      "model_forward_time": 0.11664628982543945,
      "step": 28948
    },
    {
      "epoch": 0.0001766845703125,
      "step": 28948,
      "training_step_time": 0.37831616401672363
    },
    {
      "epoch": 0.000176690673828125,
      "model_forward_time": 0.11461663246154785,
      "step": 28949
    },
    {
      "epoch": 0.000176690673828125,
      "step": 28949,
      "training_step_time": 0.3983266353607178
    },
    {
      "epoch": 0.00017669677734375,
      "grad_norm": 0.15481115877628326,
      "learning_rate": 5.70041348380039e-05,
      "loss": 0.04,
      "step": 28950
    },
    {
      "epoch": 0.00017669677734375,
      "model_forward_time": 0.1152796745300293,
      "step": 28950
    },
    {
      "epoch": 0.00017669677734375,
      "step": 28950,
      "training_step_time": 0.39373779296875
    },
    {
      "epoch": 0.000176702880859375,
      "model_forward_time": 0.11512541770935059,
      "step": 28951
    },
    {
      "epoch": 0.000176702880859375,
      "step": 28951,
      "training_step_time": 0.5813040733337402
    },
    {
      "epoch": 0.000176708984375,
      "model_forward_time": 0.11479687690734863,
      "step": 28952
    },
    {
      "epoch": 0.000176708984375,
      "step": 28952,
      "training_step_time": 0.4120030403137207
    },
    {
      "epoch": 0.000176715087890625,
      "model_forward_time": 0.11546659469604492,
      "step": 28953
    },
    {
      "epoch": 0.000176715087890625,
      "step": 28953,
      "training_step_time": 0.3813917636871338
    },
    {
      "epoch": 0.00017672119140625,
      "model_forward_time": 0.11618900299072266,
      "step": 28954
    },
    {
      "epoch": 0.00017672119140625,
      "step": 28954,
      "training_step_time": 0.40317440032958984
    },
    {
      "epoch": 0.000176727294921875,
      "model_forward_time": 0.11513185501098633,
      "step": 28955
    },
    {
      "epoch": 0.000176727294921875,
      "step": 28955,
      "training_step_time": 0.4036986827850342
    },
    {
      "epoch": 0.0001767333984375,
      "model_forward_time": 0.11578106880187988,
      "step": 28956
    },
    {
      "epoch": 0.0001767333984375,
      "step": 28956,
      "training_step_time": 0.4635159969329834
    },
    {
      "epoch": 0.000176739501953125,
      "model_forward_time": 0.1155233383178711,
      "step": 28957
    },
    {
      "epoch": 0.000176739501953125,
      "step": 28957,
      "training_step_time": 0.4846079349517822
    },
    {
      "epoch": 0.00017674560546875,
      "model_forward_time": 0.11531400680541992,
      "step": 28958
    },
    {
      "epoch": 0.00017674560546875,
      "step": 28958,
      "training_step_time": 0.41927266120910645
    },
    {
      "epoch": 0.000176751708984375,
      "model_forward_time": 0.11548423767089844,
      "step": 28959
    },
    {
      "epoch": 0.000176751708984375,
      "step": 28959,
      "training_step_time": 0.36840128898620605
    },
    {
      "epoch": 0.0001767578125,
      "grad_norm": 0.07967043668031693,
      "learning_rate": 5.697684767077125e-05,
      "loss": 0.0407,
      "step": 28960
    },
    {
      "epoch": 0.0001767578125,
      "model_forward_time": 0.1145172119140625,
      "step": 28960
    },
    {
      "epoch": 0.0001767578125,
      "step": 28960,
      "training_step_time": 0.4489004611968994
    },
    {
      "epoch": 0.000176763916015625,
      "model_forward_time": 0.11476445198059082,
      "step": 28961
    },
    {
      "epoch": 0.000176763916015625,
      "step": 28961,
      "training_step_time": 0.4420433044433594
    },
    {
      "epoch": 0.00017677001953125,
      "model_forward_time": 0.11490869522094727,
      "step": 28962
    },
    {
      "epoch": 0.00017677001953125,
      "step": 28962,
      "training_step_time": 0.39824867248535156
    },
    {
      "epoch": 0.000176776123046875,
      "model_forward_time": 0.1150662899017334,
      "step": 28963
    },
    {
      "epoch": 0.000176776123046875,
      "step": 28963,
      "training_step_time": 0.41087889671325684
    },
    {
      "epoch": 0.0001767822265625,
      "model_forward_time": 0.1154782772064209,
      "step": 28964
    },
    {
      "epoch": 0.0001767822265625,
      "step": 28964,
      "training_step_time": 0.390897274017334
    },
    {
      "epoch": 0.000176788330078125,
      "model_forward_time": 0.11542224884033203,
      "step": 28965
    },
    {
      "epoch": 0.000176788330078125,
      "step": 28965,
      "training_step_time": 0.3867619037628174
    },
    {
      "epoch": 0.00017679443359375,
      "model_forward_time": 0.11510181427001953,
      "step": 28966
    },
    {
      "epoch": 0.00017679443359375,
      "step": 28966,
      "training_step_time": 0.39929747581481934
    },
    {
      "epoch": 0.000176800537109375,
      "model_forward_time": 0.11529183387756348,
      "step": 28967
    },
    {
      "epoch": 0.000176800537109375,
      "step": 28967,
      "training_step_time": 0.40012526512145996
    },
    {
      "epoch": 0.000176806640625,
      "model_forward_time": 0.11512064933776855,
      "step": 28968
    },
    {
      "epoch": 0.000176806640625,
      "step": 28968,
      "training_step_time": 0.41667842864990234
    },
    {
      "epoch": 0.000176812744140625,
      "model_forward_time": 0.11532735824584961,
      "step": 28969
    },
    {
      "epoch": 0.000176812744140625,
      "step": 28969,
      "training_step_time": 0.5997228622436523
    },
    {
      "epoch": 0.00017681884765625,
      "grad_norm": 0.14935794472694397,
      "learning_rate": 5.69495583841565e-05,
      "loss": 0.0411,
      "step": 28970
    },
    {
      "epoch": 0.00017681884765625,
      "model_forward_time": 0.11545133590698242,
      "step": 28970
    },
    {
      "epoch": 0.00017681884765625,
      "step": 28970,
      "training_step_time": 0.42551660537719727
    },
    {
      "epoch": 0.000176824951171875,
      "model_forward_time": 0.11441349983215332,
      "step": 28971
    },
    {
      "epoch": 0.000176824951171875,
      "step": 28971,
      "training_step_time": 0.43306708335876465
    },
    {
      "epoch": 0.0001768310546875,
      "model_forward_time": 0.11484432220458984,
      "step": 28972
    },
    {
      "epoch": 0.0001768310546875,
      "step": 28972,
      "training_step_time": 0.4172840118408203
    },
    {
      "epoch": 0.000176837158203125,
      "model_forward_time": 0.1144566535949707,
      "step": 28973
    },
    {
      "epoch": 0.000176837158203125,
      "step": 28973,
      "training_step_time": 0.3912649154663086
    },
    {
      "epoch": 0.00017684326171875,
      "model_forward_time": 0.11468982696533203,
      "step": 28974
    },
    {
      "epoch": 0.00017684326171875,
      "step": 28974,
      "training_step_time": 0.4929847717285156
    },
    {
      "epoch": 0.000176849365234375,
      "model_forward_time": 0.11558222770690918,
      "step": 28975
    },
    {
      "epoch": 0.000176849365234375,
      "step": 28975,
      "training_step_time": 0.49439072608947754
    },
    {
      "epoch": 0.00017685546875,
      "model_forward_time": 0.11536121368408203,
      "step": 28976
    },
    {
      "epoch": 0.00017685546875,
      "step": 28976,
      "training_step_time": 0.3877589702606201
    },
    {
      "epoch": 0.000176861572265625,
      "model_forward_time": 0.11522150039672852,
      "step": 28977
    },
    {
      "epoch": 0.000176861572265625,
      "step": 28977,
      "training_step_time": 0.3787252902984619
    },
    {
      "epoch": 0.00017686767578125,
      "model_forward_time": 0.1152949333190918,
      "step": 28978
    },
    {
      "epoch": 0.00017686767578125,
      "step": 28978,
      "training_step_time": 0.3953101634979248
    },
    {
      "epoch": 0.000176873779296875,
      "model_forward_time": 0.11558723449707031,
      "step": 28979
    },
    {
      "epoch": 0.000176873779296875,
      "step": 28979,
      "training_step_time": 0.3923499584197998
    },
    {
      "epoch": 0.0001768798828125,
      "grad_norm": 0.17131580412387848,
      "learning_rate": 5.692226698644938e-05,
      "loss": 0.0382,
      "step": 28980
    },
    {
      "epoch": 0.0001768798828125,
      "model_forward_time": 0.11516857147216797,
      "step": 28980
    },
    {
      "epoch": 0.0001768798828125,
      "step": 28980,
      "training_step_time": 0.39647603034973145
    },
    {
      "epoch": 0.000176885986328125,
      "model_forward_time": 0.11519098281860352,
      "step": 28981
    },
    {
      "epoch": 0.000176885986328125,
      "step": 28981,
      "training_step_time": 0.6541686058044434
    },
    {
      "epoch": 0.00017689208984375,
      "model_forward_time": 0.11450338363647461,
      "step": 28982
    },
    {
      "epoch": 0.00017689208984375,
      "step": 28982,
      "training_step_time": 0.38897252082824707
    },
    {
      "epoch": 0.000176898193359375,
      "model_forward_time": 0.11496877670288086,
      "step": 28983
    },
    {
      "epoch": 0.000176898193359375,
      "step": 28983,
      "training_step_time": 0.4020371437072754
    },
    {
      "epoch": 0.000176904296875,
      "model_forward_time": 0.11518001556396484,
      "step": 28984
    },
    {
      "epoch": 0.000176904296875,
      "step": 28984,
      "training_step_time": 0.39716100692749023
    },
    {
      "epoch": 0.000176910400390625,
      "model_forward_time": 0.11476659774780273,
      "step": 28985
    },
    {
      "epoch": 0.000176910400390625,
      "step": 28985,
      "training_step_time": 0.4489414691925049
    },
    {
      "epoch": 0.00017691650390625,
      "model_forward_time": 0.11501669883728027,
      "step": 28986
    },
    {
      "epoch": 0.00017691650390625,
      "step": 28986,
      "training_step_time": 0.4064147472381592
    },
    {
      "epoch": 0.000176922607421875,
      "model_forward_time": 0.11504650115966797,
      "step": 28987
    },
    {
      "epoch": 0.000176922607421875,
      "step": 28987,
      "training_step_time": 0.6770749092102051
    },
    {
      "epoch": 0.0001769287109375,
      "model_forward_time": 0.11583209037780762,
      "step": 28988
    },
    {
      "epoch": 0.0001769287109375,
      "step": 28988,
      "training_step_time": 0.4131002426147461
    },
    {
      "epoch": 0.000176934814453125,
      "model_forward_time": 0.11492276191711426,
      "step": 28989
    },
    {
      "epoch": 0.000176934814453125,
      "step": 28989,
      "training_step_time": 0.4834635257720947
    },
    {
      "epoch": 0.00017694091796875,
      "grad_norm": 0.18583503365516663,
      "learning_rate": 5.689497348594035e-05,
      "loss": 0.0472,
      "step": 28990
    },
    {
      "epoch": 0.00017694091796875,
      "model_forward_time": 0.11588478088378906,
      "step": 28990
    },
    {
      "epoch": 0.00017694091796875,
      "step": 28990,
      "training_step_time": 0.39223766326904297
    },
    {
      "epoch": 0.000176947021484375,
      "model_forward_time": 0.1144571304321289,
      "step": 28991
    },
    {
      "epoch": 0.000176947021484375,
      "step": 28991,
      "training_step_time": 0.3929767608642578
    },
    {
      "epoch": 0.000176953125,
      "model_forward_time": 0.1143331527709961,
      "step": 28992
    },
    {
      "epoch": 0.000176953125,
      "step": 28992,
      "training_step_time": 0.390702486038208
    },
    {
      "epoch": 0.000176959228515625,
      "model_forward_time": 0.11467885971069336,
      "step": 28993
    },
    {
      "epoch": 0.000176959228515625,
      "step": 28993,
      "training_step_time": 0.411329984664917
    },
    {
      "epoch": 0.00017696533203125,
      "model_forward_time": 0.11549711227416992,
      "step": 28994
    },
    {
      "epoch": 0.00017696533203125,
      "step": 28994,
      "training_step_time": 0.3887293338775635
    },
    {
      "epoch": 0.000176971435546875,
      "model_forward_time": 0.11535191535949707,
      "step": 28995
    },
    {
      "epoch": 0.000176971435546875,
      "step": 28995,
      "training_step_time": 0.45179080963134766
    },
    {
      "epoch": 0.0001769775390625,
      "model_forward_time": 0.11562657356262207,
      "step": 28996
    },
    {
      "epoch": 0.0001769775390625,
      "step": 28996,
      "training_step_time": 0.40007996559143066
    },
    {
      "epoch": 0.000176983642578125,
      "model_forward_time": 0.11585688591003418,
      "step": 28997
    },
    {
      "epoch": 0.000176983642578125,
      "step": 28997,
      "training_step_time": 0.3992953300476074
    },
    {
      "epoch": 0.00017698974609375,
      "model_forward_time": 0.11537575721740723,
      "step": 28998
    },
    {
      "epoch": 0.00017698974609375,
      "step": 28998,
      "training_step_time": 0.3973205089569092
    },
    {
      "epoch": 0.000176995849609375,
      "model_forward_time": 0.11491227149963379,
      "step": 28999
    },
    {
      "epoch": 0.000176995849609375,
      "step": 28999,
      "training_step_time": 0.7444326877593994
    },
    {
      "epoch": 0.000177001953125,
      "grad_norm": 0.14286459982395172,
      "learning_rate": 5.686767789092041e-05,
      "loss": 0.0386,
      "step": 29000
    },
    {
      "epoch": 0.000177001953125,
      "model_forward_time": 0.11343932151794434,
      "step": 29000
    },
    {
      "epoch": 0.000177001953125,
      "step": 29000,
      "training_step_time": 0.35488367080688477
    },
    {
      "epoch": 0.000177008056640625,
      "model_forward_time": 0.11230039596557617,
      "step": 29001
    },
    {
      "epoch": 0.000177008056640625,
      "step": 29001,
      "training_step_time": 0.4651365280151367
    },
    {
      "epoch": 0.00017701416015625,
      "model_forward_time": 0.11437106132507324,
      "step": 29002
    },
    {
      "epoch": 0.00017701416015625,
      "step": 29002,
      "training_step_time": 0.37751030921936035
    },
    {
      "epoch": 0.000177020263671875,
      "model_forward_time": 0.11387085914611816,
      "step": 29003
    },
    {
      "epoch": 0.000177020263671875,
      "step": 29003,
      "training_step_time": 0.39481115341186523
    },
    {
      "epoch": 0.0001770263671875,
      "model_forward_time": 0.11394619941711426,
      "step": 29004
    },
    {
      "epoch": 0.0001770263671875,
      "step": 29004,
      "training_step_time": 0.473891019821167
    },
    {
      "epoch": 0.000177032470703125,
      "model_forward_time": 0.11499619483947754,
      "step": 29005
    },
    {
      "epoch": 0.000177032470703125,
      "step": 29005,
      "training_step_time": 0.38617730140686035
    },
    {
      "epoch": 0.00017703857421875,
      "model_forward_time": 0.1150965690612793,
      "step": 29006
    },
    {
      "epoch": 0.00017703857421875,
      "step": 29006,
      "training_step_time": 0.39157915115356445
    },
    {
      "epoch": 0.000177044677734375,
      "model_forward_time": 0.11491107940673828,
      "step": 29007
    },
    {
      "epoch": 0.000177044677734375,
      "step": 29007,
      "training_step_time": 0.4137542247772217
    },
    {
      "epoch": 0.00017705078125,
      "model_forward_time": 0.11475586891174316,
      "step": 29008
    },
    {
      "epoch": 0.00017705078125,
      "step": 29008,
      "training_step_time": 0.42748570442199707
    },
    {
      "epoch": 0.000177056884765625,
      "model_forward_time": 0.11508607864379883,
      "step": 29009
    },
    {
      "epoch": 0.000177056884765625,
      "step": 29009,
      "training_step_time": 0.3934953212738037
    },
    {
      "epoch": 0.00017706298828125,
      "grad_norm": 0.17048484086990356,
      "learning_rate": 5.6840380209681255e-05,
      "loss": 0.0447,
      "step": 29010
    },
    {
      "epoch": 0.00017706298828125,
      "model_forward_time": 0.1153717041015625,
      "step": 29010
    },
    {
      "epoch": 0.00017706298828125,
      "step": 29010,
      "training_step_time": 0.3961341381072998
    },
    {
      "epoch": 0.000177069091796875,
      "model_forward_time": 0.11471343040466309,
      "step": 29011
    },
    {
      "epoch": 0.000177069091796875,
      "step": 29011,
      "training_step_time": 0.394977331161499
    },
    {
      "epoch": 0.0001770751953125,
      "model_forward_time": 0.11530542373657227,
      "step": 29012
    },
    {
      "epoch": 0.0001770751953125,
      "step": 29012,
      "training_step_time": 0.428210973739624
    },
    {
      "epoch": 0.000177081298828125,
      "model_forward_time": 0.1153554916381836,
      "step": 29013
    },
    {
      "epoch": 0.000177081298828125,
      "step": 29013,
      "training_step_time": 0.3953533172607422
    },
    {
      "epoch": 0.00017708740234375,
      "model_forward_time": 0.11546087265014648,
      "step": 29014
    },
    {
      "epoch": 0.00017708740234375,
      "step": 29014,
      "training_step_time": 0.45534420013427734
    },
    {
      "epoch": 0.000177093505859375,
      "model_forward_time": 0.11495256423950195,
      "step": 29015
    },
    {
      "epoch": 0.000177093505859375,
      "step": 29015,
      "training_step_time": 0.4661860466003418
    },
    {
      "epoch": 0.000177099609375,
      "model_forward_time": 0.11621737480163574,
      "step": 29016
    },
    {
      "epoch": 0.000177099609375,
      "step": 29016,
      "training_step_time": 0.49218249320983887
    },
    {
      "epoch": 0.000177105712890625,
      "model_forward_time": 0.1147925853729248,
      "step": 29017
    },
    {
      "epoch": 0.000177105712890625,
      "step": 29017,
      "training_step_time": 0.5069873332977295
    },
    {
      "epoch": 0.00017711181640625,
      "model_forward_time": 0.11463379859924316,
      "step": 29018
    },
    {
      "epoch": 0.00017711181640625,
      "step": 29018,
      "training_step_time": 0.4874727725982666
    },
    {
      "epoch": 0.000177117919921875,
      "model_forward_time": 0.11440372467041016,
      "step": 29019
    },
    {
      "epoch": 0.000177117919921875,
      "step": 29019,
      "training_step_time": 0.3885157108306885
    },
    {
      "epoch": 0.0001771240234375,
      "grad_norm": 0.1454400271177292,
      "learning_rate": 5.681308045051522e-05,
      "loss": 0.0424,
      "step": 29020
    },
    {
      "epoch": 0.0001771240234375,
      "model_forward_time": 0.11392426490783691,
      "step": 29020
    },
    {
      "epoch": 0.0001771240234375,
      "step": 29020,
      "training_step_time": 0.44618797302246094
    },
    {
      "epoch": 0.000177130126953125,
      "model_forward_time": 0.11487054824829102,
      "step": 29021
    },
    {
      "epoch": 0.000177130126953125,
      "step": 29021,
      "training_step_time": 0.43825531005859375
    },
    {
      "epoch": 0.00017713623046875,
      "model_forward_time": 0.11461710929870605,
      "step": 29022
    },
    {
      "epoch": 0.00017713623046875,
      "step": 29022,
      "training_step_time": 0.40090036392211914
    },
    {
      "epoch": 0.000177142333984375,
      "model_forward_time": 0.11505317687988281,
      "step": 29023
    },
    {
      "epoch": 0.000177142333984375,
      "step": 29023,
      "training_step_time": 0.39692091941833496
    },
    {
      "epoch": 0.0001771484375,
      "model_forward_time": 0.11478424072265625,
      "step": 29024
    },
    {
      "epoch": 0.0001771484375,
      "step": 29024,
      "training_step_time": 0.4016706943511963
    },
    {
      "epoch": 0.000177154541015625,
      "model_forward_time": 0.11517715454101562,
      "step": 29025
    },
    {
      "epoch": 0.000177154541015625,
      "step": 29025,
      "training_step_time": 0.38976287841796875
    },
    {
      "epoch": 0.00017716064453125,
      "model_forward_time": 0.11443352699279785,
      "step": 29026
    },
    {
      "epoch": 0.00017716064453125,
      "step": 29026,
      "training_step_time": 0.3988659381866455
    },
    {
      "epoch": 0.000177166748046875,
      "model_forward_time": 0.1160287857055664,
      "step": 29027
    },
    {
      "epoch": 0.000177166748046875,
      "step": 29027,
      "training_step_time": 0.39156532287597656
    },
    {
      "epoch": 0.0001771728515625,
      "model_forward_time": 0.11539340019226074,
      "step": 29028
    },
    {
      "epoch": 0.0001771728515625,
      "step": 29028,
      "training_step_time": 0.480365514755249
    },
    {
      "epoch": 0.000177178955078125,
      "model_forward_time": 0.11552810668945312,
      "step": 29029
    },
    {
      "epoch": 0.000177178955078125,
      "step": 29029,
      "training_step_time": 0.5253522396087646
    },
    {
      "epoch": 0.00017718505859375,
      "grad_norm": 0.1578064262866974,
      "learning_rate": 5.6785778621715225e-05,
      "loss": 0.038,
      "step": 29030
    },
    {
      "epoch": 0.00017718505859375,
      "model_forward_time": 0.11552286148071289,
      "step": 29030
    },
    {
      "epoch": 0.00017718505859375,
      "step": 29030,
      "training_step_time": 0.40584444999694824
    },
    {
      "epoch": 0.000177191162109375,
      "model_forward_time": 0.11484813690185547,
      "step": 29031
    },
    {
      "epoch": 0.000177191162109375,
      "step": 29031,
      "training_step_time": 0.4586646556854248
    },
    {
      "epoch": 0.000177197265625,
      "model_forward_time": 0.12526822090148926,
      "step": 29032
    },
    {
      "epoch": 0.000177197265625,
      "step": 29032,
      "training_step_time": 0.4925868511199951
    },
    {
      "epoch": 0.000177203369140625,
      "model_forward_time": 0.11808276176452637,
      "step": 29033
    },
    {
      "epoch": 0.000177203369140625,
      "step": 29033,
      "training_step_time": 0.39940738677978516
    },
    {
      "epoch": 0.00017720947265625,
      "model_forward_time": 0.11756157875061035,
      "step": 29034
    },
    {
      "epoch": 0.00017720947265625,
      "step": 29034,
      "training_step_time": 0.39663028717041016
    },
    {
      "epoch": 0.000177215576171875,
      "model_forward_time": 0.11779165267944336,
      "step": 29035
    },
    {
      "epoch": 0.000177215576171875,
      "step": 29035,
      "training_step_time": 0.4057431221008301
    },
    {
      "epoch": 0.0001772216796875,
      "model_forward_time": 0.11969733238220215,
      "step": 29036
    },
    {
      "epoch": 0.0001772216796875,
      "step": 29036,
      "training_step_time": 0.38349199295043945
    },
    {
      "epoch": 0.000177227783203125,
      "model_forward_time": 0.11866617202758789,
      "step": 29037
    },
    {
      "epoch": 0.000177227783203125,
      "step": 29037,
      "training_step_time": 0.4011421203613281
    },
    {
      "epoch": 0.00017723388671875,
      "model_forward_time": 0.11908459663391113,
      "step": 29038
    },
    {
      "epoch": 0.00017723388671875,
      "step": 29038,
      "training_step_time": 0.381683349609375
    },
    {
      "epoch": 0.000177239990234375,
      "model_forward_time": 0.11836504936218262,
      "step": 29039
    },
    {
      "epoch": 0.000177239990234375,
      "step": 29039,
      "training_step_time": 0.38532161712646484
    },
    {
      "epoch": 0.00017724609375,
      "grad_norm": 0.1023765355348587,
      "learning_rate": 5.675847473157485e-05,
      "loss": 0.0466,
      "step": 29040
    },
    {
      "epoch": 0.00017724609375,
      "model_forward_time": 0.11976408958435059,
      "step": 29040
    },
    {
      "epoch": 0.00017724609375,
      "step": 29040,
      "training_step_time": 0.39997029304504395
    },
    {
      "epoch": 0.000177252197265625,
      "model_forward_time": 0.11843085289001465,
      "step": 29041
    },
    {
      "epoch": 0.000177252197265625,
      "step": 29041,
      "training_step_time": 0.40569329261779785
    },
    {
      "epoch": 0.00017725830078125,
      "model_forward_time": 0.12390470504760742,
      "step": 29042
    },
    {
      "epoch": 0.00017725830078125,
      "step": 29042,
      "training_step_time": 0.42459940910339355
    },
    {
      "epoch": 0.000177264404296875,
      "model_forward_time": 0.11809706687927246,
      "step": 29043
    },
    {
      "epoch": 0.000177264404296875,
      "step": 29043,
      "training_step_time": 0.48967528343200684
    },
    {
      "epoch": 0.0001772705078125,
      "model_forward_time": 0.11915445327758789,
      "step": 29044
    },
    {
      "epoch": 0.0001772705078125,
      "step": 29044,
      "training_step_time": 0.3917419910430908
    },
    {
      "epoch": 0.000177276611328125,
      "model_forward_time": 0.11785626411437988,
      "step": 29045
    },
    {
      "epoch": 0.000177276611328125,
      "step": 29045,
      "training_step_time": 0.37676358222961426
    },
    {
      "epoch": 0.00017728271484375,
      "model_forward_time": 0.11890482902526855,
      "step": 29046
    },
    {
      "epoch": 0.00017728271484375,
      "step": 29046,
      "training_step_time": 0.39223384857177734
    },
    {
      "epoch": 0.000177288818359375,
      "model_forward_time": 0.11966991424560547,
      "step": 29047
    },
    {
      "epoch": 0.000177288818359375,
      "step": 29047,
      "training_step_time": 0.4788072109222412
    },
    {
      "epoch": 0.000177294921875,
      "model_forward_time": 0.11838579177856445,
      "step": 29048
    },
    {
      "epoch": 0.000177294921875,
      "step": 29048,
      "training_step_time": 0.3931305408477783
    },
    {
      "epoch": 0.000177301025390625,
      "model_forward_time": 0.11871004104614258,
      "step": 29049
    },
    {
      "epoch": 0.000177301025390625,
      "step": 29049,
      "training_step_time": 0.3791017532348633
    },
    {
      "epoch": 0.00017730712890625,
      "grad_norm": 0.12828245759010315,
      "learning_rate": 5.67311687883883e-05,
      "loss": 0.0393,
      "step": 29050
    },
    {
      "epoch": 0.00017730712890625,
      "model_forward_time": 0.11579251289367676,
      "step": 29050
    },
    {
      "epoch": 0.00017730712890625,
      "step": 29050,
      "training_step_time": 0.41361069679260254
    },
    {
      "epoch": 0.000177313232421875,
      "model_forward_time": 0.11622858047485352,
      "step": 29051
    },
    {
      "epoch": 0.000177313232421875,
      "step": 29051,
      "training_step_time": 0.45641279220581055
    },
    {
      "epoch": 0.0001773193359375,
      "model_forward_time": 0.11852312088012695,
      "step": 29052
    },
    {
      "epoch": 0.0001773193359375,
      "step": 29052,
      "training_step_time": 0.5394449234008789
    },
    {
      "epoch": 0.000177325439453125,
      "model_forward_time": 0.11936378479003906,
      "step": 29053
    },
    {
      "epoch": 0.000177325439453125,
      "step": 29053,
      "training_step_time": 0.6598911285400391
    },
    {
      "epoch": 0.00017733154296875,
      "model_forward_time": 0.13573956489562988,
      "step": 29054
    },
    {
      "epoch": 0.00017733154296875,
      "step": 29054,
      "training_step_time": 0.813147783279419
    },
    {
      "epoch": 0.000177337646484375,
      "model_forward_time": 0.11837482452392578,
      "step": 29055
    },
    {
      "epoch": 0.000177337646484375,
      "step": 29055,
      "training_step_time": 0.7555668354034424
    },
    {
      "epoch": 0.00017734375,
      "model_forward_time": 0.1313033103942871,
      "step": 29056
    },
    {
      "epoch": 0.00017734375,
      "step": 29056,
      "training_step_time": 0.6640629768371582
    },
    {
      "epoch": 0.000177349853515625,
      "model_forward_time": 0.11874723434448242,
      "step": 29057
    },
    {
      "epoch": 0.000177349853515625,
      "step": 29057,
      "training_step_time": 0.6458759307861328
    },
    {
      "epoch": 0.00017735595703125,
      "model_forward_time": 0.12001609802246094,
      "step": 29058
    },
    {
      "epoch": 0.00017735595703125,
      "step": 29058,
      "training_step_time": 0.7326395511627197
    },
    {
      "epoch": 0.000177362060546875,
      "model_forward_time": 0.11813211441040039,
      "step": 29059
    },
    {
      "epoch": 0.000177362060546875,
      "step": 29059,
      "training_step_time": 0.629082441329956
    },
    {
      "epoch": 0.0001773681640625,
      "grad_norm": 0.14472129940986633,
      "learning_rate": 5.670386080045039e-05,
      "loss": 0.0456,
      "step": 29060
    },
    {
      "epoch": 0.0001773681640625,
      "model_forward_time": 0.12636470794677734,
      "step": 29060
    },
    {
      "epoch": 0.0001773681640625,
      "step": 29060,
      "training_step_time": 0.6264584064483643
    },
    {
      "epoch": 0.000177374267578125,
      "model_forward_time": 0.11872458457946777,
      "step": 29061
    },
    {
      "epoch": 0.000177374267578125,
      "step": 29061,
      "training_step_time": 0.6745436191558838
    },
    {
      "epoch": 0.00017738037109375,
      "model_forward_time": 0.11882781982421875,
      "step": 29062
    },
    {
      "epoch": 0.00017738037109375,
      "step": 29062,
      "training_step_time": 0.6713080406188965
    },
    {
      "epoch": 0.000177386474609375,
      "model_forward_time": 0.12208366394042969,
      "step": 29063
    },
    {
      "epoch": 0.000177386474609375,
      "step": 29063,
      "training_step_time": 0.7273187637329102
    },
    {
      "epoch": 0.000177392578125,
      "model_forward_time": 0.11907529830932617,
      "step": 29064
    },
    {
      "epoch": 0.000177392578125,
      "step": 29064,
      "training_step_time": 0.6383547782897949
    },
    {
      "epoch": 0.000177398681640625,
      "model_forward_time": 0.12403464317321777,
      "step": 29065
    },
    {
      "epoch": 0.000177398681640625,
      "step": 29065,
      "training_step_time": 0.7324666976928711
    },
    {
      "epoch": 0.00017740478515625,
      "model_forward_time": 0.12918710708618164,
      "step": 29066
    },
    {
      "epoch": 0.00017740478515625,
      "step": 29066,
      "training_step_time": 0.797374963760376
    },
    {
      "epoch": 0.000177410888671875,
      "model_forward_time": 0.12108755111694336,
      "step": 29067
    },
    {
      "epoch": 0.000177410888671875,
      "step": 29067,
      "training_step_time": 0.6073875427246094
    },
    {
      "epoch": 0.0001774169921875,
      "model_forward_time": 0.13336944580078125,
      "step": 29068
    },
    {
      "epoch": 0.0001774169921875,
      "step": 29068,
      "training_step_time": 0.710468053817749
    },
    {
      "epoch": 0.000177423095703125,
      "model_forward_time": 0.1197059154510498,
      "step": 29069
    },
    {
      "epoch": 0.000177423095703125,
      "step": 29069,
      "training_step_time": 0.6428449153900146
    },
    {
      "epoch": 0.00017742919921875,
      "grad_norm": 0.13818281888961792,
      "learning_rate": 5.667655077605659e-05,
      "loss": 0.0468,
      "step": 29070
    },
    {
      "epoch": 0.00017742919921875,
      "model_forward_time": 0.11671066284179688,
      "step": 29070
    },
    {
      "epoch": 0.00017742919921875,
      "step": 29070,
      "training_step_time": 0.7421705722808838
    },
    {
      "epoch": 0.000177435302734375,
      "model_forward_time": 0.12654781341552734,
      "step": 29071
    },
    {
      "epoch": 0.000177435302734375,
      "step": 29071,
      "training_step_time": 0.7708616256713867
    },
    {
      "epoch": 0.00017744140625,
      "model_forward_time": 0.11738157272338867,
      "step": 29072
    },
    {
      "epoch": 0.00017744140625,
      "step": 29072,
      "training_step_time": 0.621619701385498
    },
    {
      "epoch": 0.000177447509765625,
      "model_forward_time": 0.12036752700805664,
      "step": 29073
    },
    {
      "epoch": 0.000177447509765625,
      "step": 29073,
      "training_step_time": 0.6912376880645752
    },
    {
      "epoch": 0.00017745361328125,
      "model_forward_time": 0.12099218368530273,
      "step": 29074
    },
    {
      "epoch": 0.00017745361328125,
      "step": 29074,
      "training_step_time": 0.7282178401947021
    },
    {
      "epoch": 0.000177459716796875,
      "model_forward_time": 0.12155818939208984,
      "step": 29075
    },
    {
      "epoch": 0.000177459716796875,
      "step": 29075,
      "training_step_time": 0.6672792434692383
    },
    {
      "epoch": 0.0001774658203125,
      "model_forward_time": 0.11966109275817871,
      "step": 29076
    },
    {
      "epoch": 0.0001774658203125,
      "step": 29076,
      "training_step_time": 0.7612137794494629
    },
    {
      "epoch": 0.000177471923828125,
      "model_forward_time": 0.12179255485534668,
      "step": 29077
    },
    {
      "epoch": 0.000177471923828125,
      "step": 29077,
      "training_step_time": 0.6501553058624268
    },
    {
      "epoch": 0.00017747802734375,
      "model_forward_time": 0.1171407699584961,
      "step": 29078
    },
    {
      "epoch": 0.00017747802734375,
      "step": 29078,
      "training_step_time": 0.7371256351470947
    },
    {
      "epoch": 0.000177484130859375,
      "model_forward_time": 0.1197817325592041,
      "step": 29079
    },
    {
      "epoch": 0.000177484130859375,
      "step": 29079,
      "training_step_time": 0.6513831615447998
    },
    {
      "epoch": 0.000177490234375,
      "grad_norm": 0.13825790584087372,
      "learning_rate": 5.664923872350294e-05,
      "loss": 0.0505,
      "step": 29080
    },
    {
      "epoch": 0.000177490234375,
      "model_forward_time": 0.11929035186767578,
      "step": 29080
    },
    {
      "epoch": 0.000177490234375,
      "step": 29080,
      "training_step_time": 0.6873037815093994
    },
    {
      "epoch": 0.000177496337890625,
      "model_forward_time": 0.11901688575744629,
      "step": 29081
    },
    {
      "epoch": 0.000177496337890625,
      "step": 29081,
      "training_step_time": 0.7095615863800049
    },
    {
      "epoch": 0.00017750244140625,
      "model_forward_time": 0.11546015739440918,
      "step": 29082
    },
    {
      "epoch": 0.00017750244140625,
      "step": 29082,
      "training_step_time": 0.7611122131347656
    },
    {
      "epoch": 0.000177508544921875,
      "model_forward_time": 0.12039375305175781,
      "step": 29083
    },
    {
      "epoch": 0.000177508544921875,
      "step": 29083,
      "training_step_time": 0.716254711151123
    },
    {
      "epoch": 0.0001775146484375,
      "model_forward_time": 0.1193838119506836,
      "step": 29084
    },
    {
      "epoch": 0.0001775146484375,
      "step": 29084,
      "training_step_time": 0.6548774242401123
    },
    {
      "epoch": 0.000177520751953125,
      "model_forward_time": 0.11693811416625977,
      "step": 29085
    },
    {
      "epoch": 0.000177520751953125,
      "step": 29085,
      "training_step_time": 0.6596741676330566
    },
    {
      "epoch": 0.00017752685546875,
      "model_forward_time": 0.11906933784484863,
      "step": 29086
    },
    {
      "epoch": 0.00017752685546875,
      "step": 29086,
      "training_step_time": 0.6935381889343262
    },
    {
      "epoch": 0.000177532958984375,
      "model_forward_time": 0.11907291412353516,
      "step": 29087
    },
    {
      "epoch": 0.000177532958984375,
      "step": 29087,
      "training_step_time": 0.7210268974304199
    },
    {
      "epoch": 0.0001775390625,
      "model_forward_time": 0.11817693710327148,
      "step": 29088
    },
    {
      "epoch": 0.0001775390625,
      "step": 29088,
      "training_step_time": 0.6958632469177246
    },
    {
      "epoch": 0.000177545166015625,
      "model_forward_time": 0.1212623119354248,
      "step": 29089
    },
    {
      "epoch": 0.000177545166015625,
      "step": 29089,
      "training_step_time": 0.6472001075744629
    },
    {
      "epoch": 0.00017755126953125,
      "grad_norm": 0.14784665405750275,
      "learning_rate": 5.662192465108613e-05,
      "loss": 0.0567,
      "step": 29090
    },
    {
      "epoch": 0.00017755126953125,
      "model_forward_time": 0.12977981567382812,
      "step": 29090
    },
    {
      "epoch": 0.00017755126953125,
      "step": 29090,
      "training_step_time": 0.6580827236175537
    },
    {
      "epoch": 0.000177557373046875,
      "model_forward_time": 0.11780023574829102,
      "step": 29091
    },
    {
      "epoch": 0.000177557373046875,
      "step": 29091,
      "training_step_time": 0.7461130619049072
    },
    {
      "epoch": 0.0001775634765625,
      "model_forward_time": 0.12038874626159668,
      "step": 29092
    },
    {
      "epoch": 0.0001775634765625,
      "step": 29092,
      "training_step_time": 0.7574260234832764
    },
    {
      "epoch": 0.000177569580078125,
      "model_forward_time": 0.1268174648284912,
      "step": 29093
    },
    {
      "epoch": 0.000177569580078125,
      "step": 29093,
      "training_step_time": 0.573328971862793
    },
    {
      "epoch": 0.00017757568359375,
      "model_forward_time": 0.1201016902923584,
      "step": 29094
    },
    {
      "epoch": 0.00017757568359375,
      "step": 29094,
      "training_step_time": 0.6486477851867676
    },
    {
      "epoch": 0.000177581787109375,
      "model_forward_time": 0.12080812454223633,
      "step": 29095
    },
    {
      "epoch": 0.000177581787109375,
      "step": 29095,
      "training_step_time": 0.6700625419616699
    },
    {
      "epoch": 0.000177587890625,
      "model_forward_time": 0.11872291564941406,
      "step": 29096
    },
    {
      "epoch": 0.000177587890625,
      "step": 29096,
      "training_step_time": 0.7028787136077881
    },
    {
      "epoch": 0.000177593994140625,
      "model_forward_time": 0.11867642402648926,
      "step": 29097
    },
    {
      "epoch": 0.000177593994140625,
      "step": 29097,
      "training_step_time": 0.657351016998291
    },
    {
      "epoch": 0.00017760009765625,
      "model_forward_time": 0.12034463882446289,
      "step": 29098
    },
    {
      "epoch": 0.00017760009765625,
      "step": 29098,
      "training_step_time": 0.617262601852417
    },
    {
      "epoch": 0.000177606201171875,
      "model_forward_time": 0.11838674545288086,
      "step": 29099
    },
    {
      "epoch": 0.000177606201171875,
      "step": 29099,
      "training_step_time": 0.6253888607025146
    },
    {
      "epoch": 0.0001776123046875,
      "grad_norm": 0.12240737676620483,
      "learning_rate": 5.6594608567103456e-05,
      "loss": 0.0483,
      "step": 29100
    },
    {
      "epoch": 0.0001776123046875,
      "model_forward_time": 0.11634945869445801,
      "step": 29100
    },
    {
      "epoch": 0.0001776123046875,
      "step": 29100,
      "training_step_time": 0.7007851600646973
    },
    {
      "epoch": 0.000177618408203125,
      "model_forward_time": 0.12361907958984375,
      "step": 29101
    },
    {
      "epoch": 0.000177618408203125,
      "step": 29101,
      "training_step_time": 0.731114387512207
    },
    {
      "epoch": 0.00017762451171875,
      "model_forward_time": 0.1188507080078125,
      "step": 29102
    },
    {
      "epoch": 0.00017762451171875,
      "step": 29102,
      "training_step_time": 0.7501835823059082
    },
    {
      "epoch": 0.000177630615234375,
      "model_forward_time": 0.12098813056945801,
      "step": 29103
    },
    {
      "epoch": 0.000177630615234375,
      "step": 29103,
      "training_step_time": 0.6235637664794922
    },
    {
      "epoch": 0.00017763671875,
      "model_forward_time": 0.1203620433807373,
      "step": 29104
    },
    {
      "epoch": 0.00017763671875,
      "step": 29104,
      "training_step_time": 0.7246999740600586
    },
    {
      "epoch": 0.000177642822265625,
      "model_forward_time": 0.12288117408752441,
      "step": 29105
    },
    {
      "epoch": 0.000177642822265625,
      "step": 29105,
      "training_step_time": 0.697939395904541
    },
    {
      "epoch": 0.00017764892578125,
      "model_forward_time": 0.11840081214904785,
      "step": 29106
    },
    {
      "epoch": 0.00017764892578125,
      "step": 29106,
      "training_step_time": 0.6154510974884033
    },
    {
      "epoch": 0.000177655029296875,
      "model_forward_time": 0.12177395820617676,
      "step": 29107
    },
    {
      "epoch": 0.000177655029296875,
      "step": 29107,
      "training_step_time": 0.6474747657775879
    },
    {
      "epoch": 0.0001776611328125,
      "model_forward_time": 0.11936163902282715,
      "step": 29108
    },
    {
      "epoch": 0.0001776611328125,
      "step": 29108,
      "training_step_time": 0.6482133865356445
    },
    {
      "epoch": 0.000177667236328125,
      "model_forward_time": 0.12264823913574219,
      "step": 29109
    },
    {
      "epoch": 0.000177667236328125,
      "step": 29109,
      "training_step_time": 0.649796724319458
    },
    {
      "epoch": 0.00017767333984375,
      "grad_norm": 0.17628949880599976,
      "learning_rate": 5.656729047985281e-05,
      "loss": 0.0549,
      "step": 29110
    },
    {
      "epoch": 0.00017767333984375,
      "model_forward_time": 0.12603425979614258,
      "step": 29110
    },
    {
      "epoch": 0.00017767333984375,
      "step": 29110,
      "training_step_time": 0.6332330703735352
    },
    {
      "epoch": 0.000177679443359375,
      "model_forward_time": 0.11797642707824707,
      "step": 29111
    },
    {
      "epoch": 0.000177679443359375,
      "step": 29111,
      "training_step_time": 0.8174324035644531
    },
    {
      "epoch": 0.000177685546875,
      "model_forward_time": 0.11761617660522461,
      "step": 29112
    },
    {
      "epoch": 0.000177685546875,
      "step": 29112,
      "training_step_time": 0.6108269691467285
    },
    {
      "epoch": 0.000177691650390625,
      "model_forward_time": 0.12023735046386719,
      "step": 29113
    },
    {
      "epoch": 0.000177691650390625,
      "step": 29113,
      "training_step_time": 0.6621408462524414
    },
    {
      "epoch": 0.00017769775390625,
      "model_forward_time": 0.12203145027160645,
      "step": 29114
    },
    {
      "epoch": 0.00017769775390625,
      "step": 29114,
      "training_step_time": 0.7134017944335938
    },
    {
      "epoch": 0.000177703857421875,
      "model_forward_time": 0.12000846862792969,
      "step": 29115
    },
    {
      "epoch": 0.000177703857421875,
      "step": 29115,
      "training_step_time": 0.6134142875671387
    },
    {
      "epoch": 0.0001777099609375,
      "model_forward_time": 0.11925792694091797,
      "step": 29116
    },
    {
      "epoch": 0.0001777099609375,
      "step": 29116,
      "training_step_time": 0.6383466720581055
    },
    {
      "epoch": 0.000177716064453125,
      "model_forward_time": 0.12102723121643066,
      "step": 29117
    },
    {
      "epoch": 0.000177716064453125,
      "step": 29117,
      "training_step_time": 0.5647919178009033
    },
    {
      "epoch": 0.00017772216796875,
      "model_forward_time": 0.1223447322845459,
      "step": 29118
    },
    {
      "epoch": 0.00017772216796875,
      "step": 29118,
      "training_step_time": 0.6703805923461914
    },
    {
      "epoch": 0.000177728271484375,
      "model_forward_time": 0.12170290946960449,
      "step": 29119
    },
    {
      "epoch": 0.000177728271484375,
      "step": 29119,
      "training_step_time": 0.6715853214263916
    },
    {
      "epoch": 0.000177734375,
      "grad_norm": 0.12458892911672592,
      "learning_rate": 5.653997039763273e-05,
      "loss": 0.0478,
      "step": 29120
    },
    {
      "epoch": 0.000177734375,
      "model_forward_time": 0.12331438064575195,
      "step": 29120
    },
    {
      "epoch": 0.000177734375,
      "step": 29120,
      "training_step_time": 0.7195131778717041
    },
    {
      "epoch": 0.000177740478515625,
      "model_forward_time": 0.11860203742980957,
      "step": 29121
    },
    {
      "epoch": 0.000177740478515625,
      "step": 29121,
      "training_step_time": 0.6630480289459229
    },
    {
      "epoch": 0.00017774658203125,
      "model_forward_time": 0.11655211448669434,
      "step": 29122
    },
    {
      "epoch": 0.00017774658203125,
      "step": 29122,
      "training_step_time": 0.4763789176940918
    },
    {
      "epoch": 0.000177752685546875,
      "model_forward_time": 0.11706161499023438,
      "step": 29123
    },
    {
      "epoch": 0.000177752685546875,
      "step": 29123,
      "training_step_time": 0.5136289596557617
    },
    {
      "epoch": 0.0001777587890625,
      "model_forward_time": 0.11729216575622559,
      "step": 29124
    },
    {
      "epoch": 0.0001777587890625,
      "step": 29124,
      "training_step_time": 0.4621455669403076
    },
    {
      "epoch": 0.000177764892578125,
      "model_forward_time": 0.11769223213195801,
      "step": 29125
    },
    {
      "epoch": 0.000177764892578125,
      "step": 29125,
      "training_step_time": 0.47371578216552734
    },
    {
      "epoch": 0.00017777099609375,
      "model_forward_time": 0.11553525924682617,
      "step": 29126
    },
    {
      "epoch": 0.00017777099609375,
      "step": 29126,
      "training_step_time": 0.4380378723144531
    },
    {
      "epoch": 0.000177777099609375,
      "model_forward_time": 0.11595773696899414,
      "step": 29127
    },
    {
      "epoch": 0.000177777099609375,
      "step": 29127,
      "training_step_time": 0.40288591384887695
    },
    {
      "epoch": 0.000177783203125,
      "model_forward_time": 0.11627078056335449,
      "step": 29128
    },
    {
      "epoch": 0.000177783203125,
      "step": 29128,
      "training_step_time": 0.390153169631958
    },
    {
      "epoch": 0.000177789306640625,
      "model_forward_time": 0.1155550479888916,
      "step": 29129
    },
    {
      "epoch": 0.000177789306640625,
      "step": 29129,
      "training_step_time": 0.39891505241394043
    },
    {
      "epoch": 0.00017779541015625,
      "grad_norm": 0.1187533289194107,
      "learning_rate": 5.65126483287423e-05,
      "loss": 0.0461,
      "step": 29130
    },
    {
      "epoch": 0.00017779541015625,
      "model_forward_time": 0.11580276489257812,
      "step": 29130
    },
    {
      "epoch": 0.00017779541015625,
      "step": 29130,
      "training_step_time": 0.40467333793640137
    },
    {
      "epoch": 0.000177801513671875,
      "model_forward_time": 0.11646914482116699,
      "step": 29131
    },
    {
      "epoch": 0.000177801513671875,
      "step": 29131,
      "training_step_time": 0.4185018539428711
    },
    {
      "epoch": 0.0001778076171875,
      "model_forward_time": 0.11562132835388184,
      "step": 29132
    },
    {
      "epoch": 0.0001778076171875,
      "step": 29132,
      "training_step_time": 0.3721585273742676
    },
    {
      "epoch": 0.000177813720703125,
      "model_forward_time": 0.11582112312316895,
      "step": 29133
    },
    {
      "epoch": 0.000177813720703125,
      "step": 29133,
      "training_step_time": 0.4121122360229492
    },
    {
      "epoch": 0.00017781982421875,
      "model_forward_time": 0.11528754234313965,
      "step": 29134
    },
    {
      "epoch": 0.00017781982421875,
      "step": 29134,
      "training_step_time": 0.48265957832336426
    },
    {
      "epoch": 0.000177825927734375,
      "model_forward_time": 0.11502218246459961,
      "step": 29135
    },
    {
      "epoch": 0.000177825927734375,
      "step": 29135,
      "training_step_time": 0.4441642761230469
    },
    {
      "epoch": 0.00017783203125,
      "model_forward_time": 0.11583328247070312,
      "step": 29136
    },
    {
      "epoch": 0.00017783203125,
      "step": 29136,
      "training_step_time": 0.4552440643310547
    },
    {
      "epoch": 0.000177838134765625,
      "model_forward_time": 0.11620283126831055,
      "step": 29137
    },
    {
      "epoch": 0.000177838134765625,
      "step": 29137,
      "training_step_time": 0.5117464065551758
    },
    {
      "epoch": 0.00017784423828125,
      "model_forward_time": 0.11545491218566895,
      "step": 29138
    },
    {
      "epoch": 0.00017784423828125,
      "step": 29138,
      "training_step_time": 0.3703041076660156
    },
    {
      "epoch": 0.000177850341796875,
      "model_forward_time": 0.11783123016357422,
      "step": 29139
    },
    {
      "epoch": 0.000177850341796875,
      "step": 29139,
      "training_step_time": 0.41057705879211426
    },
    {
      "epoch": 0.0001778564453125,
      "grad_norm": 0.15079261362552643,
      "learning_rate": 5.648532428148128e-05,
      "loss": 0.0493,
      "step": 29140
    },
    {
      "epoch": 0.0001778564453125,
      "model_forward_time": 0.11570215225219727,
      "step": 29140
    },
    {
      "epoch": 0.0001778564453125,
      "step": 29140,
      "training_step_time": 0.4151039123535156
    },
    {
      "epoch": 0.000177862548828125,
      "model_forward_time": 0.1158437728881836,
      "step": 29141
    },
    {
      "epoch": 0.000177862548828125,
      "step": 29141,
      "training_step_time": 0.4318838119506836
    },
    {
      "epoch": 0.00017786865234375,
      "model_forward_time": 0.1156303882598877,
      "step": 29142
    },
    {
      "epoch": 0.00017786865234375,
      "step": 29142,
      "training_step_time": 0.4087190628051758
    },
    {
      "epoch": 0.000177874755859375,
      "model_forward_time": 0.1152963638305664,
      "step": 29143
    },
    {
      "epoch": 0.000177874755859375,
      "step": 29143,
      "training_step_time": 0.400409460067749
    },
    {
      "epoch": 0.000177880859375,
      "model_forward_time": 0.11552572250366211,
      "step": 29144
    },
    {
      "epoch": 0.000177880859375,
      "step": 29144,
      "training_step_time": 0.4084005355834961
    },
    {
      "epoch": 0.000177886962890625,
      "model_forward_time": 0.11660885810852051,
      "step": 29145
    },
    {
      "epoch": 0.000177886962890625,
      "step": 29145,
      "training_step_time": 0.4540119171142578
    },
    {
      "epoch": 0.00017789306640625,
      "model_forward_time": 0.11525821685791016,
      "step": 29146
    },
    {
      "epoch": 0.00017789306640625,
      "step": 29146,
      "training_step_time": 0.39583444595336914
    },
    {
      "epoch": 0.000177899169921875,
      "model_forward_time": 0.11530590057373047,
      "step": 29147
    },
    {
      "epoch": 0.000177899169921875,
      "step": 29147,
      "training_step_time": 0.45018720626831055
    },
    {
      "epoch": 0.0001779052734375,
      "model_forward_time": 0.11534380912780762,
      "step": 29148
    },
    {
      "epoch": 0.0001779052734375,
      "step": 29148,
      "training_step_time": 0.39084553718566895
    },
    {
      "epoch": 0.000177911376953125,
      "model_forward_time": 0.11518645286560059,
      "step": 29149
    },
    {
      "epoch": 0.000177911376953125,
      "step": 29149,
      "training_step_time": 0.40059971809387207
    },
    {
      "epoch": 0.00017791748046875,
      "grad_norm": 0.16962991654872894,
      "learning_rate": 5.645799826414997e-05,
      "loss": 0.0457,
      "step": 29150
    },
    {
      "epoch": 0.00017791748046875,
      "model_forward_time": 0.11519145965576172,
      "step": 29150
    },
    {
      "epoch": 0.00017791748046875,
      "step": 29150,
      "training_step_time": 0.3775761127471924
    },
    {
      "epoch": 0.000177923583984375,
      "model_forward_time": 0.11484050750732422,
      "step": 29151
    },
    {
      "epoch": 0.000177923583984375,
      "step": 29151,
      "training_step_time": 0.4060182571411133
    },
    {
      "epoch": 0.0001779296875,
      "model_forward_time": 0.11499905586242676,
      "step": 29152
    },
    {
      "epoch": 0.0001779296875,
      "step": 29152,
      "training_step_time": 0.39386677742004395
    },
    {
      "epoch": 0.000177935791015625,
      "model_forward_time": 0.11570167541503906,
      "step": 29153
    },
    {
      "epoch": 0.000177935791015625,
      "step": 29153,
      "training_step_time": 0.398104190826416
    },
    {
      "epoch": 0.00017794189453125,
      "model_forward_time": 0.11576294898986816,
      "step": 29154
    },
    {
      "epoch": 0.00017794189453125,
      "step": 29154,
      "training_step_time": 0.4537489414215088
    },
    {
      "epoch": 0.000177947998046875,
      "model_forward_time": 0.1214902400970459,
      "step": 29155
    },
    {
      "epoch": 0.000177947998046875,
      "step": 29155,
      "training_step_time": 0.4044373035430908
    },
    {
      "epoch": 0.0001779541015625,
      "model_forward_time": 0.11602306365966797,
      "step": 29156
    },
    {
      "epoch": 0.0001779541015625,
      "step": 29156,
      "training_step_time": 0.39766669273376465
    },
    {
      "epoch": 0.000177960205078125,
      "model_forward_time": 0.11512160301208496,
      "step": 29157
    },
    {
      "epoch": 0.000177960205078125,
      "step": 29157,
      "training_step_time": 0.3830094337463379
    },
    {
      "epoch": 0.00017796630859375,
      "model_forward_time": 0.11542749404907227,
      "step": 29158
    },
    {
      "epoch": 0.00017796630859375,
      "step": 29158,
      "training_step_time": 0.40238261222839355
    },
    {
      "epoch": 0.000177972412109375,
      "model_forward_time": 0.11494016647338867,
      "step": 29159
    },
    {
      "epoch": 0.000177972412109375,
      "step": 29159,
      "training_step_time": 0.4156217575073242
    },
    {
      "epoch": 0.000177978515625,
      "grad_norm": 0.14739808440208435,
      "learning_rate": 5.6430670285049314e-05,
      "loss": 0.0475,
      "step": 29160
    },
    {
      "epoch": 0.000177978515625,
      "model_forward_time": 0.11478352546691895,
      "step": 29160
    },
    {
      "epoch": 0.000177978515625,
      "step": 29160,
      "training_step_time": 0.3963022232055664
    },
    {
      "epoch": 0.000177984619140625,
      "model_forward_time": 0.11564445495605469,
      "step": 29161
    },
    {
      "epoch": 0.000177984619140625,
      "step": 29161,
      "training_step_time": 0.5905206203460693
    },
    {
      "epoch": 0.00017799072265625,
      "model_forward_time": 0.11488580703735352,
      "step": 29162
    },
    {
      "epoch": 0.00017799072265625,
      "step": 29162,
      "training_step_time": 0.3985910415649414
    },
    {
      "epoch": 0.000177996826171875,
      "model_forward_time": 0.11481761932373047,
      "step": 29163
    },
    {
      "epoch": 0.000177996826171875,
      "step": 29163,
      "training_step_time": 0.40065646171569824
    },
    {
      "epoch": 0.0001780029296875,
      "model_forward_time": 0.11472058296203613,
      "step": 29164
    },
    {
      "epoch": 0.0001780029296875,
      "step": 29164,
      "training_step_time": 0.4913973808288574
    },
    {
      "epoch": 0.000178009033203125,
      "model_forward_time": 0.11508035659790039,
      "step": 29165
    },
    {
      "epoch": 0.000178009033203125,
      "step": 29165,
      "training_step_time": 0.4194831848144531
    },
    {
      "epoch": 0.00017801513671875,
      "model_forward_time": 0.11622810363769531,
      "step": 29166
    },
    {
      "epoch": 0.00017801513671875,
      "step": 29166,
      "training_step_time": 0.49633073806762695
    },
    {
      "epoch": 0.000178021240234375,
      "model_forward_time": 0.1149742603302002,
      "step": 29167
    },
    {
      "epoch": 0.000178021240234375,
      "step": 29167,
      "training_step_time": 0.474977970123291
    },
    {
      "epoch": 0.00017802734375,
      "model_forward_time": 0.11521434783935547,
      "step": 29168
    },
    {
      "epoch": 0.00017802734375,
      "step": 29168,
      "training_step_time": 0.42939281463623047
    },
    {
      "epoch": 0.000178033447265625,
      "model_forward_time": 0.11500024795532227,
      "step": 29169
    },
    {
      "epoch": 0.000178033447265625,
      "step": 29169,
      "training_step_time": 0.4210996627807617
    },
    {
      "epoch": 0.00017803955078125,
      "grad_norm": 0.1632203459739685,
      "learning_rate": 5.6403340352480806e-05,
      "loss": 0.0463,
      "step": 29170
    },
    {
      "epoch": 0.00017803955078125,
      "model_forward_time": 0.11483287811279297,
      "step": 29170
    },
    {
      "epoch": 0.00017803955078125,
      "step": 29170,
      "training_step_time": 0.40327930450439453
    },
    {
      "epoch": 0.000178045654296875,
      "model_forward_time": 0.11517453193664551,
      "step": 29171
    },
    {
      "epoch": 0.000178045654296875,
      "step": 29171,
      "training_step_time": 0.3887197971343994
    },
    {
      "epoch": 0.0001780517578125,
      "model_forward_time": 0.11564850807189941,
      "step": 29172
    },
    {
      "epoch": 0.0001780517578125,
      "step": 29172,
      "training_step_time": 0.4008796215057373
    },
    {
      "epoch": 0.000178057861328125,
      "model_forward_time": 0.11507773399353027,
      "step": 29173
    },
    {
      "epoch": 0.000178057861328125,
      "step": 29173,
      "training_step_time": 0.4040074348449707
    },
    {
      "epoch": 0.00017806396484375,
      "model_forward_time": 0.11543726921081543,
      "step": 29174
    },
    {
      "epoch": 0.00017806396484375,
      "step": 29174,
      "training_step_time": 0.39123082160949707
    },
    {
      "epoch": 0.000178070068359375,
      "model_forward_time": 0.11485099792480469,
      "step": 29175
    },
    {
      "epoch": 0.000178070068359375,
      "step": 29175,
      "training_step_time": 0.38760995864868164
    },
    {
      "epoch": 0.000178076171875,
      "model_forward_time": 0.11515569686889648,
      "step": 29176
    },
    {
      "epoch": 0.000178076171875,
      "step": 29176,
      "training_step_time": 0.6877567768096924
    },
    {
      "epoch": 0.000178082275390625,
      "model_forward_time": 0.11552691459655762,
      "step": 29177
    },
    {
      "epoch": 0.000178082275390625,
      "step": 29177,
      "training_step_time": 0.44150686264038086
    },
    {
      "epoch": 0.00017808837890625,
      "model_forward_time": 0.11528801918029785,
      "step": 29178
    },
    {
      "epoch": 0.00017808837890625,
      "step": 29178,
      "training_step_time": 0.42206835746765137
    },
    {
      "epoch": 0.000178094482421875,
      "model_forward_time": 0.11463475227355957,
      "step": 29179
    },
    {
      "epoch": 0.000178094482421875,
      "step": 29179,
      "training_step_time": 0.4354076385498047
    },
    {
      "epoch": 0.0001781005859375,
      "grad_norm": 0.10773859173059464,
      "learning_rate": 5.637600847474656e-05,
      "loss": 0.0499,
      "step": 29180
    },
    {
      "epoch": 0.0001781005859375,
      "model_forward_time": 0.11525726318359375,
      "step": 29180
    },
    {
      "epoch": 0.0001781005859375,
      "step": 29180,
      "training_step_time": 0.3903021812438965
    },
    {
      "epoch": 0.000178106689453125,
      "model_forward_time": 0.11472487449645996,
      "step": 29181
    },
    {
      "epoch": 0.000178106689453125,
      "step": 29181,
      "training_step_time": 0.5166342258453369
    },
    {
      "epoch": 0.00017811279296875,
      "model_forward_time": 0.11444377899169922,
      "step": 29182
    },
    {
      "epoch": 0.00017811279296875,
      "step": 29182,
      "training_step_time": 0.6734309196472168
    },
    {
      "epoch": 0.000178118896484375,
      "model_forward_time": 0.11497902870178223,
      "step": 29183
    },
    {
      "epoch": 0.000178118896484375,
      "step": 29183,
      "training_step_time": 0.4837007522583008
    },
    {
      "epoch": 0.000178125,
      "model_forward_time": 0.1146848201751709,
      "step": 29184
    },
    {
      "epoch": 0.000178125,
      "step": 29184,
      "training_step_time": 0.4194936752319336
    },
    {
      "epoch": 0.000178131103515625,
      "model_forward_time": 0.11559867858886719,
      "step": 29185
    },
    {
      "epoch": 0.000178131103515625,
      "step": 29185,
      "training_step_time": 0.40462827682495117
    },
    {
      "epoch": 0.00017813720703125,
      "model_forward_time": 0.11439204216003418,
      "step": 29186
    },
    {
      "epoch": 0.00017813720703125,
      "step": 29186,
      "training_step_time": 0.3852379322052002
    },
    {
      "epoch": 0.000178143310546875,
      "model_forward_time": 0.1144096851348877,
      "step": 29187
    },
    {
      "epoch": 0.000178143310546875,
      "step": 29187,
      "training_step_time": 0.3975536823272705
    },
    {
      "epoch": 0.0001781494140625,
      "model_forward_time": 0.114227294921875,
      "step": 29188
    },
    {
      "epoch": 0.0001781494140625,
      "step": 29188,
      "training_step_time": 0.6692228317260742
    },
    {
      "epoch": 0.000178155517578125,
      "model_forward_time": 0.11521792411804199,
      "step": 29189
    },
    {
      "epoch": 0.000178155517578125,
      "step": 29189,
      "training_step_time": 0.44818973541259766
    },
    {
      "epoch": 0.00017816162109375,
      "grad_norm": 0.13837648928165436,
      "learning_rate": 5.634867466014932e-05,
      "loss": 0.0448,
      "step": 29190
    },
    {
      "epoch": 0.00017816162109375,
      "model_forward_time": 0.11460232734680176,
      "step": 29190
    },
    {
      "epoch": 0.00017816162109375,
      "step": 29190,
      "training_step_time": 0.485278844833374
    },
    {
      "epoch": 0.000178167724609375,
      "model_forward_time": 0.11425304412841797,
      "step": 29191
    },
    {
      "epoch": 0.000178167724609375,
      "step": 29191,
      "training_step_time": 0.394742488861084
    },
    {
      "epoch": 0.000178173828125,
      "model_forward_time": 0.11521410942077637,
      "step": 29192
    },
    {
      "epoch": 0.000178173828125,
      "step": 29192,
      "training_step_time": 0.4086172580718994
    },
    {
      "epoch": 0.000178179931640625,
      "model_forward_time": 0.1147007942199707,
      "step": 29193
    },
    {
      "epoch": 0.000178179931640625,
      "step": 29193,
      "training_step_time": 0.4668760299682617
    },
    {
      "epoch": 0.00017818603515625,
      "model_forward_time": 0.11459541320800781,
      "step": 29194
    },
    {
      "epoch": 0.00017818603515625,
      "step": 29194,
      "training_step_time": 0.6057329177856445
    },
    {
      "epoch": 0.000178192138671875,
      "model_forward_time": 0.11487388610839844,
      "step": 29195
    },
    {
      "epoch": 0.000178192138671875,
      "step": 29195,
      "training_step_time": 0.43285536766052246
    },
    {
      "epoch": 0.0001781982421875,
      "model_forward_time": 0.11484074592590332,
      "step": 29196
    },
    {
      "epoch": 0.0001781982421875,
      "step": 29196,
      "training_step_time": 0.4811570644378662
    },
    {
      "epoch": 0.000178204345703125,
      "model_forward_time": 0.1142268180847168,
      "step": 29197
    },
    {
      "epoch": 0.000178204345703125,
      "step": 29197,
      "training_step_time": 0.41482019424438477
    },
    {
      "epoch": 0.00017821044921875,
      "model_forward_time": 0.11483883857727051,
      "step": 29198
    },
    {
      "epoch": 0.00017821044921875,
      "step": 29198,
      "training_step_time": 0.38704824447631836
    },
    {
      "epoch": 0.000178216552734375,
      "model_forward_time": 0.11560893058776855,
      "step": 29199
    },
    {
      "epoch": 0.000178216552734375,
      "step": 29199,
      "training_step_time": 0.3908367156982422
    },
    {
      "epoch": 0.00017822265625,
      "grad_norm": 0.11941671371459961,
      "learning_rate": 5.6321338916992315e-05,
      "loss": 0.0507,
      "step": 29200
    },
    {
      "epoch": 0.00017822265625,
      "model_forward_time": 0.11532878875732422,
      "step": 29200
    },
    {
      "epoch": 0.00017822265625,
      "step": 29200,
      "training_step_time": 0.5187742710113525
    },
    {
      "epoch": 0.000178228759765625,
      "model_forward_time": 0.11467337608337402,
      "step": 29201
    },
    {
      "epoch": 0.000178228759765625,
      "step": 29201,
      "training_step_time": 0.38568115234375
    },
    {
      "epoch": 0.00017823486328125,
      "model_forward_time": 0.11606907844543457,
      "step": 29202
    },
    {
      "epoch": 0.00017823486328125,
      "step": 29202,
      "training_step_time": 0.4559934139251709
    },
    {
      "epoch": 0.000178240966796875,
      "model_forward_time": 0.11466693878173828,
      "step": 29203
    },
    {
      "epoch": 0.000178240966796875,
      "step": 29203,
      "training_step_time": 0.4447944164276123
    },
    {
      "epoch": 0.0001782470703125,
      "model_forward_time": 0.11496162414550781,
      "step": 29204
    },
    {
      "epoch": 0.0001782470703125,
      "step": 29204,
      "training_step_time": 0.3958430290222168
    },
    {
      "epoch": 0.000178253173828125,
      "model_forward_time": 0.11487102508544922,
      "step": 29205
    },
    {
      "epoch": 0.000178253173828125,
      "step": 29205,
      "training_step_time": 0.44062256813049316
    },
    {
      "epoch": 0.00017825927734375,
      "model_forward_time": 0.1152031421661377,
      "step": 29206
    },
    {
      "epoch": 0.00017825927734375,
      "step": 29206,
      "training_step_time": 0.6403443813323975
    },
    {
      "epoch": 0.000178265380859375,
      "model_forward_time": 0.1146540641784668,
      "step": 29207
    },
    {
      "epoch": 0.000178265380859375,
      "step": 29207,
      "training_step_time": 0.43973469734191895
    },
    {
      "epoch": 0.000178271484375,
      "model_forward_time": 0.11453604698181152,
      "step": 29208
    },
    {
      "epoch": 0.000178271484375,
      "step": 29208,
      "training_step_time": 0.3988971710205078
    },
    {
      "epoch": 0.000178277587890625,
      "model_forward_time": 0.11449527740478516,
      "step": 29209
    },
    {
      "epoch": 0.000178277587890625,
      "step": 29209,
      "training_step_time": 0.4974961280822754
    },
    {
      "epoch": 0.00017828369140625,
      "grad_norm": 0.1437421292066574,
      "learning_rate": 5.6294001253579475e-05,
      "loss": 0.0486,
      "step": 29210
    },
    {
      "epoch": 0.00017828369140625,
      "model_forward_time": 0.11473989486694336,
      "step": 29210
    },
    {
      "epoch": 0.00017828369140625,
      "step": 29210,
      "training_step_time": 0.4765307903289795
    },
    {
      "epoch": 0.000178289794921875,
      "model_forward_time": 0.11502718925476074,
      "step": 29211
    },
    {
      "epoch": 0.000178289794921875,
      "step": 29211,
      "training_step_time": 0.3967857360839844
    },
    {
      "epoch": 0.0001782958984375,
      "model_forward_time": 0.11516141891479492,
      "step": 29212
    },
    {
      "epoch": 0.0001782958984375,
      "step": 29212,
      "training_step_time": 0.6002957820892334
    },
    {
      "epoch": 0.000178302001953125,
      "model_forward_time": 0.11577939987182617,
      "step": 29213
    },
    {
      "epoch": 0.000178302001953125,
      "step": 29213,
      "training_step_time": 0.3825984001159668
    },
    {
      "epoch": 0.00017830810546875,
      "model_forward_time": 0.1152338981628418,
      "step": 29214
    },
    {
      "epoch": 0.00017830810546875,
      "step": 29214,
      "training_step_time": 0.38299989700317383
    },
    {
      "epoch": 0.000178314208984375,
      "model_forward_time": 0.114349365234375,
      "step": 29215
    },
    {
      "epoch": 0.000178314208984375,
      "step": 29215,
      "training_step_time": 0.3958907127380371
    },
    {
      "epoch": 0.0001783203125,
      "model_forward_time": 0.1146395206451416,
      "step": 29216
    },
    {
      "epoch": 0.0001783203125,
      "step": 29216,
      "training_step_time": 0.4002997875213623
    },
    {
      "epoch": 0.000178326416015625,
      "model_forward_time": 0.11500811576843262,
      "step": 29217
    },
    {
      "epoch": 0.000178326416015625,
      "step": 29217,
      "training_step_time": 0.515519380569458
    },
    {
      "epoch": 0.00017833251953125,
      "model_forward_time": 0.11551880836486816,
      "step": 29218
    },
    {
      "epoch": 0.00017833251953125,
      "step": 29218,
      "training_step_time": 0.6020004749298096
    },
    {
      "epoch": 0.000178338623046875,
      "model_forward_time": 0.11478257179260254,
      "step": 29219
    },
    {
      "epoch": 0.000178338623046875,
      "step": 29219,
      "training_step_time": 0.4445946216583252
    },
    {
      "epoch": 0.0001783447265625,
      "grad_norm": 0.12105084955692291,
      "learning_rate": 5.6266661678215216e-05,
      "loss": 0.05,
      "step": 29220
    },
    {
      "epoch": 0.0001783447265625,
      "model_forward_time": 0.11497330665588379,
      "step": 29220
    },
    {
      "epoch": 0.0001783447265625,
      "step": 29220,
      "training_step_time": 0.45542454719543457
    },
    {
      "epoch": 0.000178350830078125,
      "model_forward_time": 0.11465144157409668,
      "step": 29221
    },
    {
      "epoch": 0.000178350830078125,
      "step": 29221,
      "training_step_time": 0.4144885540008545
    },
    {
      "epoch": 0.00017835693359375,
      "model_forward_time": 0.11480045318603516,
      "step": 29222
    },
    {
      "epoch": 0.00017835693359375,
      "step": 29222,
      "training_step_time": 0.43640613555908203
    },
    {
      "epoch": 0.000178363037109375,
      "model_forward_time": 0.11514568328857422,
      "step": 29223
    },
    {
      "epoch": 0.000178363037109375,
      "step": 29223,
      "training_step_time": 0.4139857292175293
    },
    {
      "epoch": 0.000178369140625,
      "model_forward_time": 0.11490201950073242,
      "step": 29224
    },
    {
      "epoch": 0.000178369140625,
      "step": 29224,
      "training_step_time": 0.5677890777587891
    },
    {
      "epoch": 0.000178375244140625,
      "model_forward_time": 0.11448073387145996,
      "step": 29225
    },
    {
      "epoch": 0.000178375244140625,
      "step": 29225,
      "training_step_time": 0.3822143077850342
    },
    {
      "epoch": 0.00017838134765625,
      "model_forward_time": 0.1145777702331543,
      "step": 29226
    },
    {
      "epoch": 0.00017838134765625,
      "step": 29226,
      "training_step_time": 0.39916253089904785
    },
    {
      "epoch": 0.000178387451171875,
      "model_forward_time": 0.11494827270507812,
      "step": 29227
    },
    {
      "epoch": 0.000178387451171875,
      "step": 29227,
      "training_step_time": 0.3949282169342041
    },
    {
      "epoch": 0.0001783935546875,
      "model_forward_time": 0.11461162567138672,
      "step": 29228
    },
    {
      "epoch": 0.0001783935546875,
      "step": 29228,
      "training_step_time": 0.3935737609863281
    },
    {
      "epoch": 0.000178399658203125,
      "model_forward_time": 0.11463403701782227,
      "step": 29229
    },
    {
      "epoch": 0.000178399658203125,
      "step": 29229,
      "training_step_time": 0.4294719696044922
    },
    {
      "epoch": 0.00017840576171875,
      "grad_norm": 0.11901682615280151,
      "learning_rate": 5.6239320199204616e-05,
      "loss": 0.047,
      "step": 29230
    },
    {
      "epoch": 0.00017840576171875,
      "model_forward_time": 0.11611747741699219,
      "step": 29230
    },
    {
      "epoch": 0.00017840576171875,
      "step": 29230,
      "training_step_time": 0.7288107872009277
    },
    {
      "epoch": 0.000178411865234375,
      "model_forward_time": 0.11441850662231445,
      "step": 29231
    },
    {
      "epoch": 0.000178411865234375,
      "step": 29231,
      "training_step_time": 0.38427209854125977
    },
    {
      "epoch": 0.00017841796875,
      "model_forward_time": 0.1148979663848877,
      "step": 29232
    },
    {
      "epoch": 0.00017841796875,
      "step": 29232,
      "training_step_time": 0.3960285186767578
    },
    {
      "epoch": 0.000178424072265625,
      "model_forward_time": 0.11437416076660156,
      "step": 29233
    },
    {
      "epoch": 0.000178424072265625,
      "step": 29233,
      "training_step_time": 0.4455864429473877
    },
    {
      "epoch": 0.00017843017578125,
      "model_forward_time": 0.11472892761230469,
      "step": 29234
    },
    {
      "epoch": 0.00017843017578125,
      "step": 29234,
      "training_step_time": 0.43825340270996094
    },
    {
      "epoch": 0.000178436279296875,
      "model_forward_time": 0.11471366882324219,
      "step": 29235
    },
    {
      "epoch": 0.000178436279296875,
      "step": 29235,
      "training_step_time": 0.38636064529418945
    },
    {
      "epoch": 0.0001784423828125,
      "model_forward_time": 0.11438536643981934,
      "step": 29236
    },
    {
      "epoch": 0.0001784423828125,
      "step": 29236,
      "training_step_time": 0.6909253597259521
    },
    {
      "epoch": 0.000178448486328125,
      "model_forward_time": 0.11474108695983887,
      "step": 29237
    },
    {
      "epoch": 0.000178448486328125,
      "step": 29237,
      "training_step_time": 0.3926069736480713
    },
    {
      "epoch": 0.00017845458984375,
      "model_forward_time": 0.11462259292602539,
      "step": 29238
    },
    {
      "epoch": 0.00017845458984375,
      "step": 29238,
      "training_step_time": 0.3778681755065918
    },
    {
      "epoch": 0.000178460693359375,
      "model_forward_time": 0.11422991752624512,
      "step": 29239
    },
    {
      "epoch": 0.000178460693359375,
      "step": 29239,
      "training_step_time": 0.39305663108825684
    },
    {
      "epoch": 0.000178466796875,
      "grad_norm": 0.10523343831300735,
      "learning_rate": 5.621197682485327e-05,
      "loss": 0.0476,
      "step": 29240
    },
    {
      "epoch": 0.000178466796875,
      "model_forward_time": 0.11467623710632324,
      "step": 29240
    },
    {
      "epoch": 0.000178466796875,
      "step": 29240,
      "training_step_time": 0.40147900581359863
    },
    {
      "epoch": 0.000178472900390625,
      "model_forward_time": 0.1146240234375,
      "step": 29241
    },
    {
      "epoch": 0.000178472900390625,
      "step": 29241,
      "training_step_time": 0.3981764316558838
    },
    {
      "epoch": 0.00017847900390625,
      "model_forward_time": 0.11480259895324707,
      "step": 29242
    },
    {
      "epoch": 0.00017847900390625,
      "step": 29242,
      "training_step_time": 0.8534929752349854
    },
    {
      "epoch": 0.000178485107421875,
      "model_forward_time": 0.11379671096801758,
      "step": 29243
    },
    {
      "epoch": 0.000178485107421875,
      "step": 29243,
      "training_step_time": 0.48348045349121094
    },
    {
      "epoch": 0.0001784912109375,
      "model_forward_time": 0.1144552230834961,
      "step": 29244
    },
    {
      "epoch": 0.0001784912109375,
      "step": 29244,
      "training_step_time": 0.3830111026763916
    },
    {
      "epoch": 0.000178497314453125,
      "model_forward_time": 0.11463212966918945,
      "step": 29245
    },
    {
      "epoch": 0.000178497314453125,
      "step": 29245,
      "training_step_time": 0.3920719623565674
    },
    {
      "epoch": 0.00017850341796875,
      "model_forward_time": 0.11428213119506836,
      "step": 29246
    },
    {
      "epoch": 0.00017850341796875,
      "step": 29246,
      "training_step_time": 0.4073784351348877
    },
    {
      "epoch": 0.000178509521484375,
      "model_forward_time": 0.11492657661437988,
      "step": 29247
    },
    {
      "epoch": 0.000178509521484375,
      "step": 29247,
      "training_step_time": 0.4702115058898926
    },
    {
      "epoch": 0.000178515625,
      "model_forward_time": 0.11450934410095215,
      "step": 29248
    },
    {
      "epoch": 0.000178515625,
      "step": 29248,
      "training_step_time": 0.6544773578643799
    },
    {
      "epoch": 0.000178521728515625,
      "model_forward_time": 0.11451578140258789,
      "step": 29249
    },
    {
      "epoch": 0.000178521728515625,
      "step": 29249,
      "training_step_time": 0.4046146869659424
    },
    {
      "epoch": 0.00017852783203125,
      "grad_norm": 0.15052886307239532,
      "learning_rate": 5.618463156346739e-05,
      "loss": 0.0426,
      "step": 29250
    },
    {
      "epoch": 0.00017852783203125,
      "model_forward_time": 0.11447405815124512,
      "step": 29250
    },
    {
      "epoch": 0.00017852783203125,
      "step": 29250,
      "training_step_time": 0.43785667419433594
    },
    {
      "epoch": 0.000178533935546875,
      "model_forward_time": 0.11407589912414551,
      "step": 29251
    },
    {
      "epoch": 0.000178533935546875,
      "step": 29251,
      "training_step_time": 0.3946714401245117
    },
    {
      "epoch": 0.0001785400390625,
      "model_forward_time": 0.11496853828430176,
      "step": 29252
    },
    {
      "epoch": 0.0001785400390625,
      "step": 29252,
      "training_step_time": 0.39748597145080566
    },
    {
      "epoch": 0.000178546142578125,
      "model_forward_time": 0.11557221412658691,
      "step": 29253
    },
    {
      "epoch": 0.000178546142578125,
      "step": 29253,
      "training_step_time": 0.38300061225891113
    },
    {
      "epoch": 0.00017855224609375,
      "model_forward_time": 0.1154944896697998,
      "step": 29254
    },
    {
      "epoch": 0.00017855224609375,
      "step": 29254,
      "training_step_time": 0.7350497245788574
    },
    {
      "epoch": 0.000178558349609375,
      "model_forward_time": 0.11441254615783691,
      "step": 29255
    },
    {
      "epoch": 0.000178558349609375,
      "step": 29255,
      "training_step_time": 0.3846449851989746
    },
    {
      "epoch": 0.000178564453125,
      "model_forward_time": 0.11466598510742188,
      "step": 29256
    },
    {
      "epoch": 0.000178564453125,
      "step": 29256,
      "training_step_time": 0.4793224334716797
    },
    {
      "epoch": 0.000178570556640625,
      "model_forward_time": 0.11451411247253418,
      "step": 29257
    },
    {
      "epoch": 0.000178570556640625,
      "step": 29257,
      "training_step_time": 0.3886868953704834
    },
    {
      "epoch": 0.00017857666015625,
      "model_forward_time": 0.11485791206359863,
      "step": 29258
    },
    {
      "epoch": 0.00017857666015625,
      "step": 29258,
      "training_step_time": 0.4124915599822998
    },
    {
      "epoch": 0.000178582763671875,
      "model_forward_time": 0.11438608169555664,
      "step": 29259
    },
    {
      "epoch": 0.000178582763671875,
      "step": 29259,
      "training_step_time": 0.39581990242004395
    },
    {
      "epoch": 0.0001785888671875,
      "grad_norm": 0.11502952128648758,
      "learning_rate": 5.615728442335373e-05,
      "loss": 0.0499,
      "step": 29260
    },
    {
      "epoch": 0.0001785888671875,
      "model_forward_time": 0.11554241180419922,
      "step": 29260
    },
    {
      "epoch": 0.0001785888671875,
      "step": 29260,
      "training_step_time": 0.7009201049804688
    },
    {
      "epoch": 0.000178594970703125,
      "model_forward_time": 0.11442422866821289,
      "step": 29261
    },
    {
      "epoch": 0.000178594970703125,
      "step": 29261,
      "training_step_time": 0.38654541969299316
    },
    {
      "epoch": 0.00017860107421875,
      "model_forward_time": 0.1141672134399414,
      "step": 29262
    },
    {
      "epoch": 0.00017860107421875,
      "step": 29262,
      "training_step_time": 0.46042966842651367
    },
    {
      "epoch": 0.000178607177734375,
      "model_forward_time": 0.11467385292053223,
      "step": 29263
    },
    {
      "epoch": 0.000178607177734375,
      "step": 29263,
      "training_step_time": 0.4696052074432373
    },
    {
      "epoch": 0.00017861328125,
      "model_forward_time": 0.11522197723388672,
      "step": 29264
    },
    {
      "epoch": 0.00017861328125,
      "step": 29264,
      "training_step_time": 0.40370821952819824
    },
    {
      "epoch": 0.000178619384765625,
      "model_forward_time": 0.11418700218200684,
      "step": 29265
    },
    {
      "epoch": 0.000178619384765625,
      "step": 29265,
      "training_step_time": 0.38474607467651367
    },
    {
      "epoch": 0.00017862548828125,
      "model_forward_time": 0.11461663246154785,
      "step": 29266
    },
    {
      "epoch": 0.00017862548828125,
      "step": 29266,
      "training_step_time": 0.5212233066558838
    },
    {
      "epoch": 0.000178631591796875,
      "model_forward_time": 0.11498665809631348,
      "step": 29267
    },
    {
      "epoch": 0.000178631591796875,
      "step": 29267,
      "training_step_time": 0.38634562492370605
    },
    {
      "epoch": 0.0001786376953125,
      "model_forward_time": 0.11448526382446289,
      "step": 29268
    },
    {
      "epoch": 0.0001786376953125,
      "step": 29268,
      "training_step_time": 0.4389801025390625
    },
    {
      "epoch": 0.000178643798828125,
      "model_forward_time": 0.11499261856079102,
      "step": 29269
    },
    {
      "epoch": 0.000178643798828125,
      "step": 29269,
      "training_step_time": 0.4258582592010498
    },
    {
      "epoch": 0.00017864990234375,
      "grad_norm": 0.08974218368530273,
      "learning_rate": 5.6129935412819635e-05,
      "loss": 0.0515,
      "step": 29270
    },
    {
      "epoch": 0.00017864990234375,
      "model_forward_time": 0.11589336395263672,
      "step": 29270
    },
    {
      "epoch": 0.00017864990234375,
      "step": 29270,
      "training_step_time": 0.45854806900024414
    },
    {
      "epoch": 0.000178656005859375,
      "model_forward_time": 0.1158914566040039,
      "step": 29271
    },
    {
      "epoch": 0.000178656005859375,
      "step": 29271,
      "training_step_time": 0.37993955612182617
    },
    {
      "epoch": 0.000178662109375,
      "model_forward_time": 0.11494755744934082,
      "step": 29272
    },
    {
      "epoch": 0.000178662109375,
      "step": 29272,
      "training_step_time": 0.8586380481719971
    },
    {
      "epoch": 0.000178668212890625,
      "model_forward_time": 0.11416888236999512,
      "step": 29273
    },
    {
      "epoch": 0.000178668212890625,
      "step": 29273,
      "training_step_time": 0.4204530715942383
    },
    {
      "epoch": 0.00017867431640625,
      "model_forward_time": 0.11465907096862793,
      "step": 29274
    },
    {
      "epoch": 0.00017867431640625,
      "step": 29274,
      "training_step_time": 0.4047684669494629
    },
    {
      "epoch": 0.000178680419921875,
      "model_forward_time": 0.11592864990234375,
      "step": 29275
    },
    {
      "epoch": 0.000178680419921875,
      "step": 29275,
      "training_step_time": 0.3986046314239502
    },
    {
      "epoch": 0.0001786865234375,
      "model_forward_time": 0.11484122276306152,
      "step": 29276
    },
    {
      "epoch": 0.0001786865234375,
      "step": 29276,
      "training_step_time": 0.401261568069458
    },
    {
      "epoch": 0.000178692626953125,
      "model_forward_time": 0.11464881896972656,
      "step": 29277
    },
    {
      "epoch": 0.000178692626953125,
      "step": 29277,
      "training_step_time": 0.4821145534515381
    },
    {
      "epoch": 0.00017869873046875,
      "model_forward_time": 0.11424446105957031,
      "step": 29278
    },
    {
      "epoch": 0.00017869873046875,
      "step": 29278,
      "training_step_time": 0.5879695415496826
    },
    {
      "epoch": 0.000178704833984375,
      "model_forward_time": 0.11458539962768555,
      "step": 29279
    },
    {
      "epoch": 0.000178704833984375,
      "step": 29279,
      "training_step_time": 0.39121556282043457
    },
    {
      "epoch": 0.0001787109375,
      "grad_norm": 0.15443572402000427,
      "learning_rate": 5.6102584540173006e-05,
      "loss": 0.0455,
      "step": 29280
    },
    {
      "epoch": 0.0001787109375,
      "model_forward_time": 0.11475419998168945,
      "step": 29280
    },
    {
      "epoch": 0.0001787109375,
      "step": 29280,
      "training_step_time": 0.39873266220092773
    },
    {
      "epoch": 0.000178717041015625,
      "model_forward_time": 0.11451315879821777,
      "step": 29281
    },
    {
      "epoch": 0.000178717041015625,
      "step": 29281,
      "training_step_time": 0.3987100124359131
    },
    {
      "epoch": 0.00017872314453125,
      "model_forward_time": 0.11466765403747559,
      "step": 29282
    },
    {
      "epoch": 0.00017872314453125,
      "step": 29282,
      "training_step_time": 0.4391517639160156
    },
    {
      "epoch": 0.000178729248046875,
      "model_forward_time": 0.11487841606140137,
      "step": 29283
    },
    {
      "epoch": 0.000178729248046875,
      "step": 29283,
      "training_step_time": 0.5361449718475342
    },
    {
      "epoch": 0.0001787353515625,
      "model_forward_time": 0.1147465705871582,
      "step": 29284
    },
    {
      "epoch": 0.0001787353515625,
      "step": 29284,
      "training_step_time": 0.7612285614013672
    },
    {
      "epoch": 0.000178741455078125,
      "model_forward_time": 0.1148521900177002,
      "step": 29285
    },
    {
      "epoch": 0.000178741455078125,
      "step": 29285,
      "training_step_time": 0.3804633617401123
    },
    {
      "epoch": 0.00017874755859375,
      "model_forward_time": 0.11384177207946777,
      "step": 29286
    },
    {
      "epoch": 0.00017874755859375,
      "step": 29286,
      "training_step_time": 0.460798978805542
    },
    {
      "epoch": 0.000178753662109375,
      "model_forward_time": 0.11507916450500488,
      "step": 29287
    },
    {
      "epoch": 0.000178753662109375,
      "step": 29287,
      "training_step_time": 0.38108229637145996
    },
    {
      "epoch": 0.000178759765625,
      "model_forward_time": 0.11449003219604492,
      "step": 29288
    },
    {
      "epoch": 0.000178759765625,
      "step": 29288,
      "training_step_time": 0.3614637851715088
    },
    {
      "epoch": 0.000178765869140625,
      "model_forward_time": 0.11461734771728516,
      "step": 29289
    },
    {
      "epoch": 0.000178765869140625,
      "step": 29289,
      "training_step_time": 0.43674612045288086
    },
    {
      "epoch": 0.00017877197265625,
      "grad_norm": 0.14444883167743683,
      "learning_rate": 5.607523181372234e-05,
      "loss": 0.0459,
      "step": 29290
    },
    {
      "epoch": 0.00017877197265625,
      "model_forward_time": 0.11520838737487793,
      "step": 29290
    },
    {
      "epoch": 0.00017877197265625,
      "step": 29290,
      "training_step_time": 0.6032969951629639
    },
    {
      "epoch": 0.000178778076171875,
      "model_forward_time": 0.11407947540283203,
      "step": 29291
    },
    {
      "epoch": 0.000178778076171875,
      "step": 29291,
      "training_step_time": 0.38673949241638184
    },
    {
      "epoch": 0.0001787841796875,
      "model_forward_time": 0.11633181571960449,
      "step": 29292
    },
    {
      "epoch": 0.0001787841796875,
      "step": 29292,
      "training_step_time": 0.38863611221313477
    },
    {
      "epoch": 0.000178790283203125,
      "model_forward_time": 0.11389017105102539,
      "step": 29293
    },
    {
      "epoch": 0.000178790283203125,
      "step": 29293,
      "training_step_time": 0.4082503318786621
    },
    {
      "epoch": 0.00017879638671875,
      "model_forward_time": 0.11493587493896484,
      "step": 29294
    },
    {
      "epoch": 0.00017879638671875,
      "step": 29294,
      "training_step_time": 0.3909173011779785
    },
    {
      "epoch": 0.000178802490234375,
      "model_forward_time": 0.1144566535949707,
      "step": 29295
    },
    {
      "epoch": 0.000178802490234375,
      "step": 29295,
      "training_step_time": 0.4078812599182129
    },
    {
      "epoch": 0.00017880859375,
      "model_forward_time": 0.11484336853027344,
      "step": 29296
    },
    {
      "epoch": 0.00017880859375,
      "step": 29296,
      "training_step_time": 0.734553337097168
    },
    {
      "epoch": 0.000178814697265625,
      "model_forward_time": 0.11434149742126465,
      "step": 29297
    },
    {
      "epoch": 0.000178814697265625,
      "step": 29297,
      "training_step_time": 0.41291213035583496
    },
    {
      "epoch": 0.00017882080078125,
      "model_forward_time": 0.11501669883728027,
      "step": 29298
    },
    {
      "epoch": 0.00017882080078125,
      "step": 29298,
      "training_step_time": 0.4249112606048584
    },
    {
      "epoch": 0.000178826904296875,
      "model_forward_time": 0.11412334442138672,
      "step": 29299
    },
    {
      "epoch": 0.000178826904296875,
      "step": 29299,
      "training_step_time": 0.44719696044921875
    },
    {
      "epoch": 0.0001788330078125,
      "grad_norm": 0.14326296746730804,
      "learning_rate": 5.604787724177666e-05,
      "loss": 0.0421,
      "step": 29300
    },
    {
      "epoch": 0.0001788330078125,
      "model_forward_time": 0.11423182487487793,
      "step": 29300
    },
    {
      "epoch": 0.0001788330078125,
      "step": 29300,
      "training_step_time": 0.41358447074890137
    },
    {
      "epoch": 0.000178839111328125,
      "model_forward_time": 0.11486244201660156,
      "step": 29301
    },
    {
      "epoch": 0.000178839111328125,
      "step": 29301,
      "training_step_time": 0.3833928108215332
    },
    {
      "epoch": 0.00017884521484375,
      "model_forward_time": 0.11452436447143555,
      "step": 29302
    },
    {
      "epoch": 0.00017884521484375,
      "step": 29302,
      "training_step_time": 0.6626565456390381
    },
    {
      "epoch": 0.000178851318359375,
      "model_forward_time": 0.11458277702331543,
      "step": 29303
    },
    {
      "epoch": 0.000178851318359375,
      "step": 29303,
      "training_step_time": 0.3850862979888916
    },
    {
      "epoch": 0.000178857421875,
      "model_forward_time": 0.11420869827270508,
      "step": 29304
    },
    {
      "epoch": 0.000178857421875,
      "step": 29304,
      "training_step_time": 0.3904273509979248
    },
    {
      "epoch": 0.000178863525390625,
      "model_forward_time": 0.11516690254211426,
      "step": 29305
    },
    {
      "epoch": 0.000178863525390625,
      "step": 29305,
      "training_step_time": 0.39562511444091797
    },
    {
      "epoch": 0.00017886962890625,
      "model_forward_time": 0.11442923545837402,
      "step": 29306
    },
    {
      "epoch": 0.00017886962890625,
      "step": 29306,
      "training_step_time": 0.38419032096862793
    },
    {
      "epoch": 0.000178875732421875,
      "model_forward_time": 0.11526370048522949,
      "step": 29307
    },
    {
      "epoch": 0.000178875732421875,
      "step": 29307,
      "training_step_time": 0.3972189426422119
    },
    {
      "epoch": 0.0001788818359375,
      "model_forward_time": 0.11542057991027832,
      "step": 29308
    },
    {
      "epoch": 0.0001788818359375,
      "step": 29308,
      "training_step_time": 0.778623104095459
    },
    {
      "epoch": 0.000178887939453125,
      "model_forward_time": 0.11425971984863281,
      "step": 29309
    },
    {
      "epoch": 0.000178887939453125,
      "step": 29309,
      "training_step_time": 0.43085193634033203
    },
    {
      "epoch": 0.00017889404296875,
      "grad_norm": 0.14618222415447235,
      "learning_rate": 5.602052083264555e-05,
      "loss": 0.0464,
      "step": 29310
    },
    {
      "epoch": 0.00017889404296875,
      "model_forward_time": 0.11461138725280762,
      "step": 29310
    },
    {
      "epoch": 0.00017889404296875,
      "step": 29310,
      "training_step_time": 0.3972959518432617
    },
    {
      "epoch": 0.000178900146484375,
      "model_forward_time": 0.11514043807983398,
      "step": 29311
    },
    {
      "epoch": 0.000178900146484375,
      "step": 29311,
      "training_step_time": 0.39621543884277344
    },
    {
      "epoch": 0.00017890625,
      "model_forward_time": 0.11495780944824219,
      "step": 29312
    },
    {
      "epoch": 0.00017890625,
      "step": 29312,
      "training_step_time": 0.4277610778808594
    },
    {
      "epoch": 0.000178912353515625,
      "model_forward_time": 0.11547589302062988,
      "step": 29313
    },
    {
      "epoch": 0.000178912353515625,
      "step": 29313,
      "training_step_time": 0.44359421730041504
    },
    {
      "epoch": 0.00017891845703125,
      "model_forward_time": 0.11481881141662598,
      "step": 29314
    },
    {
      "epoch": 0.00017891845703125,
      "step": 29314,
      "training_step_time": 0.5054335594177246
    },
    {
      "epoch": 0.000178924560546875,
      "model_forward_time": 0.11507105827331543,
      "step": 29315
    },
    {
      "epoch": 0.000178924560546875,
      "step": 29315,
      "training_step_time": 0.4010505676269531
    },
    {
      "epoch": 0.0001789306640625,
      "model_forward_time": 0.11513209342956543,
      "step": 29316
    },
    {
      "epoch": 0.0001789306640625,
      "step": 29316,
      "training_step_time": 0.5030755996704102
    },
    {
      "epoch": 0.000178936767578125,
      "model_forward_time": 0.1149759292602539,
      "step": 29317
    },
    {
      "epoch": 0.000178936767578125,
      "step": 29317,
      "training_step_time": 0.4155900478363037
    },
    {
      "epoch": 0.00017894287109375,
      "model_forward_time": 0.11493110656738281,
      "step": 29318
    },
    {
      "epoch": 0.00017894287109375,
      "step": 29318,
      "training_step_time": 0.3873729705810547
    },
    {
      "epoch": 0.000178948974609375,
      "model_forward_time": 0.11493420600891113,
      "step": 29319
    },
    {
      "epoch": 0.000178948974609375,
      "step": 29319,
      "training_step_time": 0.384502649307251
    },
    {
      "epoch": 0.000178955078125,
      "grad_norm": 0.08802282810211182,
      "learning_rate": 5.599316259463916e-05,
      "loss": 0.0439,
      "step": 29320
    },
    {
      "epoch": 0.000178955078125,
      "model_forward_time": 0.11491847038269043,
      "step": 29320
    },
    {
      "epoch": 0.000178955078125,
      "step": 29320,
      "training_step_time": 0.7930548191070557
    },
    {
      "epoch": 0.000178961181640625,
      "model_forward_time": 0.1142876148223877,
      "step": 29321
    },
    {
      "epoch": 0.000178961181640625,
      "step": 29321,
      "training_step_time": 0.47882771492004395
    },
    {
      "epoch": 0.00017896728515625,
      "model_forward_time": 0.11405253410339355,
      "step": 29322
    },
    {
      "epoch": 0.00017896728515625,
      "step": 29322,
      "training_step_time": 0.4783906936645508
    },
    {
      "epoch": 0.000178973388671875,
      "model_forward_time": 0.11507558822631836,
      "step": 29323
    },
    {
      "epoch": 0.000178973388671875,
      "step": 29323,
      "training_step_time": 0.3899214267730713
    },
    {
      "epoch": 0.0001789794921875,
      "model_forward_time": 0.11414217948913574,
      "step": 29324
    },
    {
      "epoch": 0.0001789794921875,
      "step": 29324,
      "training_step_time": 0.38252758979797363
    },
    {
      "epoch": 0.000178985595703125,
      "model_forward_time": 0.11431670188903809,
      "step": 29325
    },
    {
      "epoch": 0.000178985595703125,
      "step": 29325,
      "training_step_time": 0.4014251232147217
    },
    {
      "epoch": 0.00017899169921875,
      "model_forward_time": 0.11423325538635254,
      "step": 29326
    },
    {
      "epoch": 0.00017899169921875,
      "step": 29326,
      "training_step_time": 0.5422706604003906
    },
    {
      "epoch": 0.000178997802734375,
      "model_forward_time": 0.11960792541503906,
      "step": 29327
    },
    {
      "epoch": 0.000178997802734375,
      "step": 29327,
      "training_step_time": 0.48383665084838867
    },
    {
      "epoch": 0.00017900390625,
      "model_forward_time": 0.11504244804382324,
      "step": 29328
    },
    {
      "epoch": 0.00017900390625,
      "step": 29328,
      "training_step_time": 0.39772629737854004
    },
    {
      "epoch": 0.000179010009765625,
      "model_forward_time": 0.11551070213317871,
      "step": 29329
    },
    {
      "epoch": 0.000179010009765625,
      "step": 29329,
      "training_step_time": 0.45442867279052734
    },
    {
      "epoch": 0.00017901611328125,
      "grad_norm": 0.20652799308300018,
      "learning_rate": 5.596580253606824e-05,
      "loss": 0.0535,
      "step": 29330
    },
    {
      "epoch": 0.00017901611328125,
      "model_forward_time": 0.11546969413757324,
      "step": 29330
    },
    {
      "epoch": 0.00017901611328125,
      "step": 29330,
      "training_step_time": 0.48578548431396484
    },
    {
      "epoch": 0.000179022216796875,
      "model_forward_time": 0.11432361602783203,
      "step": 29331
    },
    {
      "epoch": 0.000179022216796875,
      "step": 29331,
      "training_step_time": 0.3764464855194092
    },
    {
      "epoch": 0.0001790283203125,
      "model_forward_time": 0.11524319648742676,
      "step": 29332
    },
    {
      "epoch": 0.0001790283203125,
      "step": 29332,
      "training_step_time": 0.4971292018890381
    },
    {
      "epoch": 0.000179034423828125,
      "model_forward_time": 0.11470317840576172,
      "step": 29333
    },
    {
      "epoch": 0.000179034423828125,
      "step": 29333,
      "training_step_time": 0.3864130973815918
    },
    {
      "epoch": 0.00017904052734375,
      "model_forward_time": 0.11537289619445801,
      "step": 29334
    },
    {
      "epoch": 0.00017904052734375,
      "step": 29334,
      "training_step_time": 0.42075490951538086
    },
    {
      "epoch": 0.000179046630859375,
      "model_forward_time": 0.11519813537597656,
      "step": 29335
    },
    {
      "epoch": 0.000179046630859375,
      "step": 29335,
      "training_step_time": 0.44852209091186523
    },
    {
      "epoch": 0.000179052734375,
      "model_forward_time": 0.11577939987182617,
      "step": 29336
    },
    {
      "epoch": 0.000179052734375,
      "step": 29336,
      "training_step_time": 0.42019057273864746
    },
    {
      "epoch": 0.000179058837890625,
      "model_forward_time": 0.11492490768432617,
      "step": 29337
    },
    {
      "epoch": 0.000179058837890625,
      "step": 29337,
      "training_step_time": 0.5129108428955078
    },
    {
      "epoch": 0.00017906494140625,
      "model_forward_time": 0.11560606956481934,
      "step": 29338
    },
    {
      "epoch": 0.00017906494140625,
      "step": 29338,
      "training_step_time": 0.63702392578125
    },
    {
      "epoch": 0.000179071044921875,
      "model_forward_time": 0.1143195629119873,
      "step": 29339
    },
    {
      "epoch": 0.000179071044921875,
      "step": 29339,
      "training_step_time": 0.3823235034942627
    },
    {
      "epoch": 0.0001790771484375,
      "grad_norm": 0.12475118041038513,
      "learning_rate": 5.5938440665244006e-05,
      "loss": 0.0441,
      "step": 29340
    },
    {
      "epoch": 0.0001790771484375,
      "model_forward_time": 0.11516427993774414,
      "step": 29340
    },
    {
      "epoch": 0.0001790771484375,
      "step": 29340,
      "training_step_time": 0.4068312644958496
    },
    {
      "epoch": 0.000179083251953125,
      "model_forward_time": 0.11436128616333008,
      "step": 29341
    },
    {
      "epoch": 0.000179083251953125,
      "step": 29341,
      "training_step_time": 0.4379727840423584
    },
    {
      "epoch": 0.00017908935546875,
      "model_forward_time": 0.11456441879272461,
      "step": 29342
    },
    {
      "epoch": 0.00017908935546875,
      "step": 29342,
      "training_step_time": 0.365309476852417
    },
    {
      "epoch": 0.000179095458984375,
      "model_forward_time": 0.115478515625,
      "step": 29343
    },
    {
      "epoch": 0.000179095458984375,
      "step": 29343,
      "training_step_time": 0.4388890266418457
    },
    {
      "epoch": 0.0001791015625,
      "model_forward_time": 0.11572146415710449,
      "step": 29344
    },
    {
      "epoch": 0.0001791015625,
      "step": 29344,
      "training_step_time": 0.592261552810669
    },
    {
      "epoch": 0.000179107666015625,
      "model_forward_time": 0.11441731452941895,
      "step": 29345
    },
    {
      "epoch": 0.000179107666015625,
      "step": 29345,
      "training_step_time": 0.38114237785339355
    },
    {
      "epoch": 0.00017911376953125,
      "model_forward_time": 0.11485481262207031,
      "step": 29346
    },
    {
      "epoch": 0.00017911376953125,
      "step": 29346,
      "training_step_time": 0.38975000381469727
    },
    {
      "epoch": 0.000179119873046875,
      "model_forward_time": 0.11522316932678223,
      "step": 29347
    },
    {
      "epoch": 0.000179119873046875,
      "step": 29347,
      "training_step_time": 0.40326571464538574
    },
    {
      "epoch": 0.0001791259765625,
      "model_forward_time": 0.11502552032470703,
      "step": 29348
    },
    {
      "epoch": 0.0001791259765625,
      "step": 29348,
      "training_step_time": 0.4474151134490967
    },
    {
      "epoch": 0.000179132080078125,
      "model_forward_time": 0.1149897575378418,
      "step": 29349
    },
    {
      "epoch": 0.000179132080078125,
      "step": 29349,
      "training_step_time": 0.39490222930908203
    },
    {
      "epoch": 0.00017913818359375,
      "grad_norm": 0.15781864523887634,
      "learning_rate": 5.5911076990478304e-05,
      "loss": 0.0473,
      "step": 29350
    },
    {
      "epoch": 0.00017913818359375,
      "model_forward_time": 0.1164400577545166,
      "step": 29350
    },
    {
      "epoch": 0.00017913818359375,
      "step": 29350,
      "training_step_time": 0.7342796325683594
    },
    {
      "epoch": 0.000179144287109375,
      "model_forward_time": 0.11412692070007324,
      "step": 29351
    },
    {
      "epoch": 0.000179144287109375,
      "step": 29351,
      "training_step_time": 0.3955671787261963
    },
    {
      "epoch": 0.000179150390625,
      "model_forward_time": 0.11455655097961426,
      "step": 29352
    },
    {
      "epoch": 0.000179150390625,
      "step": 29352,
      "training_step_time": 0.38692450523376465
    },
    {
      "epoch": 0.000179156494140625,
      "model_forward_time": 0.11485624313354492,
      "step": 29353
    },
    {
      "epoch": 0.000179156494140625,
      "step": 29353,
      "training_step_time": 0.40189337730407715
    },
    {
      "epoch": 0.00017916259765625,
      "model_forward_time": 0.11487197875976562,
      "step": 29354
    },
    {
      "epoch": 0.00017916259765625,
      "step": 29354,
      "training_step_time": 0.4785163402557373
    },
    {
      "epoch": 0.000179168701171875,
      "model_forward_time": 0.11730575561523438,
      "step": 29355
    },
    {
      "epoch": 0.000179168701171875,
      "step": 29355,
      "training_step_time": 0.3737361431121826
    },
    {
      "epoch": 0.0001791748046875,
      "model_forward_time": 0.11575889587402344,
      "step": 29356
    },
    {
      "epoch": 0.0001791748046875,
      "step": 29356,
      "training_step_time": 0.8226261138916016
    },
    {
      "epoch": 0.000179180908203125,
      "model_forward_time": 0.11427521705627441,
      "step": 29357
    },
    {
      "epoch": 0.000179180908203125,
      "step": 29357,
      "training_step_time": 0.4753234386444092
    },
    {
      "epoch": 0.00017918701171875,
      "model_forward_time": 0.11461973190307617,
      "step": 29358
    },
    {
      "epoch": 0.00017918701171875,
      "step": 29358,
      "training_step_time": 0.3946259021759033
    },
    {
      "epoch": 0.000179193115234375,
      "model_forward_time": 0.11423540115356445,
      "step": 29359
    },
    {
      "epoch": 0.000179193115234375,
      "step": 29359,
      "training_step_time": 0.39246511459350586
    },
    {
      "epoch": 0.00017919921875,
      "grad_norm": 0.13711680471897125,
      "learning_rate": 5.588371152008349e-05,
      "loss": 0.043,
      "step": 29360
    },
    {
      "epoch": 0.00017919921875,
      "model_forward_time": 0.11432695388793945,
      "step": 29360
    },
    {
      "epoch": 0.00017919921875,
      "step": 29360,
      "training_step_time": 0.4183690547943115
    },
    {
      "epoch": 0.000179205322265625,
      "model_forward_time": 0.11469554901123047,
      "step": 29361
    },
    {
      "epoch": 0.000179205322265625,
      "step": 29361,
      "training_step_time": 0.4071085453033447
    },
    {
      "epoch": 0.00017921142578125,
      "model_forward_time": 0.11522865295410156,
      "step": 29362
    },
    {
      "epoch": 0.00017921142578125,
      "step": 29362,
      "training_step_time": 0.6063430309295654
    },
    {
      "epoch": 0.000179217529296875,
      "model_forward_time": 0.1148521900177002,
      "step": 29363
    },
    {
      "epoch": 0.000179217529296875,
      "step": 29363,
      "training_step_time": 0.4242284297943115
    },
    {
      "epoch": 0.0001792236328125,
      "model_forward_time": 0.11476826667785645,
      "step": 29364
    },
    {
      "epoch": 0.0001792236328125,
      "step": 29364,
      "training_step_time": 0.3929600715637207
    },
    {
      "epoch": 0.000179229736328125,
      "model_forward_time": 0.11483001708984375,
      "step": 29365
    },
    {
      "epoch": 0.000179229736328125,
      "step": 29365,
      "training_step_time": 0.39304280281066895
    },
    {
      "epoch": 0.00017923583984375,
      "model_forward_time": 0.11467790603637695,
      "step": 29366
    },
    {
      "epoch": 0.00017923583984375,
      "step": 29366,
      "training_step_time": 0.39479732513427734
    },
    {
      "epoch": 0.000179241943359375,
      "model_forward_time": 0.11481618881225586,
      "step": 29367
    },
    {
      "epoch": 0.000179241943359375,
      "step": 29367,
      "training_step_time": 0.38736486434936523
    },
    {
      "epoch": 0.000179248046875,
      "model_forward_time": 0.11521005630493164,
      "step": 29368
    },
    {
      "epoch": 0.000179248046875,
      "step": 29368,
      "training_step_time": 0.8203756809234619
    },
    {
      "epoch": 0.000179254150390625,
      "model_forward_time": 0.1148674488067627,
      "step": 29369
    },
    {
      "epoch": 0.000179254150390625,
      "step": 29369,
      "training_step_time": 0.4608750343322754
    },
    {
      "epoch": 0.00017926025390625,
      "grad_norm": 0.1428811401128769,
      "learning_rate": 5.585634426237246e-05,
      "loss": 0.0411,
      "step": 29370
    },
    {
      "epoch": 0.00017926025390625,
      "model_forward_time": 0.11435461044311523,
      "step": 29370
    },
    {
      "epoch": 0.00017926025390625,
      "step": 29370,
      "training_step_time": 0.48229455947875977
    },
    {
      "epoch": 0.000179266357421875,
      "model_forward_time": 0.11413288116455078,
      "step": 29371
    },
    {
      "epoch": 0.000179266357421875,
      "step": 29371,
      "training_step_time": 0.39267468452453613
    },
    {
      "epoch": 0.0001792724609375,
      "model_forward_time": 0.11482930183410645,
      "step": 29372
    },
    {
      "epoch": 0.0001792724609375,
      "step": 29372,
      "training_step_time": 0.39644575119018555
    },
    {
      "epoch": 0.000179278564453125,
      "model_forward_time": 0.11451387405395508,
      "step": 29373
    },
    {
      "epoch": 0.000179278564453125,
      "step": 29373,
      "training_step_time": 0.3881399631500244
    },
    {
      "epoch": 0.00017928466796875,
      "model_forward_time": 0.11508655548095703,
      "step": 29374
    },
    {
      "epoch": 0.00017928466796875,
      "step": 29374,
      "training_step_time": 0.6936094760894775
    },
    {
      "epoch": 0.000179290771484375,
      "model_forward_time": 0.11446762084960938,
      "step": 29375
    },
    {
      "epoch": 0.000179290771484375,
      "step": 29375,
      "training_step_time": 0.4097316265106201
    },
    {
      "epoch": 0.000179296875,
      "model_forward_time": 0.11504459381103516,
      "step": 29376
    },
    {
      "epoch": 0.000179296875,
      "step": 29376,
      "training_step_time": 0.4321153163909912
    },
    {
      "epoch": 0.000179302978515625,
      "model_forward_time": 0.11496400833129883,
      "step": 29377
    },
    {
      "epoch": 0.000179302978515625,
      "step": 29377,
      "training_step_time": 0.394484281539917
    },
    {
      "epoch": 0.00017930908203125,
      "model_forward_time": 0.11470818519592285,
      "step": 29378
    },
    {
      "epoch": 0.00017930908203125,
      "step": 29378,
      "training_step_time": 0.3836355209350586
    },
    {
      "epoch": 0.000179315185546875,
      "model_forward_time": 0.11469101905822754,
      "step": 29379
    },
    {
      "epoch": 0.000179315185546875,
      "step": 29379,
      "training_step_time": 0.4021484851837158
    },
    {
      "epoch": 0.0001793212890625,
      "grad_norm": 0.13198508322238922,
      "learning_rate": 5.5828975225658666e-05,
      "loss": 0.0459,
      "step": 29380
    },
    {
      "epoch": 0.0001793212890625,
      "model_forward_time": 0.11479973793029785,
      "step": 29380
    },
    {
      "epoch": 0.0001793212890625,
      "step": 29380,
      "training_step_time": 0.7219498157501221
    },
    {
      "epoch": 0.000179327392578125,
      "model_forward_time": 0.11445426940917969,
      "step": 29381
    },
    {
      "epoch": 0.000179327392578125,
      "step": 29381,
      "training_step_time": 0.3816978931427002
    },
    {
      "epoch": 0.00017933349609375,
      "model_forward_time": 0.11448526382446289,
      "step": 29382
    },
    {
      "epoch": 0.00017933349609375,
      "step": 29382,
      "training_step_time": 0.3764829635620117
    },
    {
      "epoch": 0.000179339599609375,
      "model_forward_time": 0.11488604545593262,
      "step": 29383
    },
    {
      "epoch": 0.000179339599609375,
      "step": 29383,
      "training_step_time": 0.42876219749450684
    },
    {
      "epoch": 0.000179345703125,
      "model_forward_time": 0.1147301197052002,
      "step": 29384
    },
    {
      "epoch": 0.000179345703125,
      "step": 29384,
      "training_step_time": 0.4203026294708252
    },
    {
      "epoch": 0.000179351806640625,
      "model_forward_time": 0.11448240280151367,
      "step": 29385
    },
    {
      "epoch": 0.000179351806640625,
      "step": 29385,
      "training_step_time": 0.4235694408416748
    },
    {
      "epoch": 0.00017935791015625,
      "model_forward_time": 0.1152346134185791,
      "step": 29386
    },
    {
      "epoch": 0.00017935791015625,
      "step": 29386,
      "training_step_time": 0.7050585746765137
    },
    {
      "epoch": 0.000179364013671875,
      "model_forward_time": 0.11481690406799316,
      "step": 29387
    },
    {
      "epoch": 0.000179364013671875,
      "step": 29387,
      "training_step_time": 0.3837752342224121
    },
    {
      "epoch": 0.0001793701171875,
      "model_forward_time": 0.11453938484191895,
      "step": 29388
    },
    {
      "epoch": 0.0001793701171875,
      "step": 29388,
      "training_step_time": 0.40726447105407715
    },
    {
      "epoch": 0.000179376220703125,
      "model_forward_time": 0.1151726245880127,
      "step": 29389
    },
    {
      "epoch": 0.000179376220703125,
      "step": 29389,
      "training_step_time": 0.4691019058227539
    },
    {
      "epoch": 0.00017938232421875,
      "grad_norm": 0.16898728907108307,
      "learning_rate": 5.5801604418256117e-05,
      "loss": 0.0475,
      "step": 29390
    },
    {
      "epoch": 0.00017938232421875,
      "model_forward_time": 0.1143946647644043,
      "step": 29390
    },
    {
      "epoch": 0.00017938232421875,
      "step": 29390,
      "training_step_time": 0.39624667167663574
    },
    {
      "epoch": 0.000179388427734375,
      "model_forward_time": 0.1146392822265625,
      "step": 29391
    },
    {
      "epoch": 0.000179388427734375,
      "step": 29391,
      "training_step_time": 0.38466334342956543
    },
    {
      "epoch": 0.00017939453125,
      "model_forward_time": 0.11545443534851074,
      "step": 29392
    },
    {
      "epoch": 0.00017939453125,
      "step": 29392,
      "training_step_time": 0.6247110366821289
    },
    {
      "epoch": 0.000179400634765625,
      "model_forward_time": 0.11465811729431152,
      "step": 29393
    },
    {
      "epoch": 0.000179400634765625,
      "step": 29393,
      "training_step_time": 0.39023661613464355
    },
    {
      "epoch": 0.00017940673828125,
      "model_forward_time": 0.11494731903076172,
      "step": 29394
    },
    {
      "epoch": 0.00017940673828125,
      "step": 29394,
      "training_step_time": 0.45799779891967773
    },
    {
      "epoch": 0.000179412841796875,
      "model_forward_time": 0.11539196968078613,
      "step": 29395
    },
    {
      "epoch": 0.000179412841796875,
      "step": 29395,
      "training_step_time": 0.39740538597106934
    },
    {
      "epoch": 0.0001794189453125,
      "model_forward_time": 0.11516380310058594,
      "step": 29396
    },
    {
      "epoch": 0.0001794189453125,
      "step": 29396,
      "training_step_time": 0.4078857898712158
    },
    {
      "epoch": 0.000179425048828125,
      "model_forward_time": 0.11542630195617676,
      "step": 29397
    },
    {
      "epoch": 0.000179425048828125,
      "step": 29397,
      "training_step_time": 0.4894125461578369
    },
    {
      "epoch": 0.00017943115234375,
      "model_forward_time": 0.11603689193725586,
      "step": 29398
    },
    {
      "epoch": 0.00017943115234375,
      "step": 29398,
      "training_step_time": 0.7033283710479736
    },
    {
      "epoch": 0.000179437255859375,
      "model_forward_time": 0.11418819427490234,
      "step": 29399
    },
    {
      "epoch": 0.000179437255859375,
      "step": 29399,
      "training_step_time": 0.3863182067871094
    },
    {
      "epoch": 0.000179443359375,
      "grad_norm": 0.13612782955169678,
      "learning_rate": 5.577423184847932e-05,
      "loss": 0.0505,
      "step": 29400
    },
    {
      "epoch": 0.000179443359375,
      "model_forward_time": 0.1144113540649414,
      "step": 29400
    },
    {
      "epoch": 0.000179443359375,
      "step": 29400,
      "training_step_time": 0.3838379383087158
    },
    {
      "epoch": 0.000179449462890625,
      "model_forward_time": 0.1164712905883789,
      "step": 29401
    },
    {
      "epoch": 0.000179449462890625,
      "step": 29401,
      "training_step_time": 0.40607428550720215
    },
    {
      "epoch": 0.00017945556640625,
      "model_forward_time": 0.11476850509643555,
      "step": 29402
    },
    {
      "epoch": 0.00017945556640625,
      "step": 29402,
      "training_step_time": 0.40242695808410645
    },
    {
      "epoch": 0.000179461669921875,
      "model_forward_time": 0.11458396911621094,
      "step": 29403
    },
    {
      "epoch": 0.000179461669921875,
      "step": 29403,
      "training_step_time": 0.4742131233215332
    },
    {
      "epoch": 0.0001794677734375,
      "model_forward_time": 0.11533260345458984,
      "step": 29404
    },
    {
      "epoch": 0.0001794677734375,
      "step": 29404,
      "training_step_time": 0.623680830001831
    },
    {
      "epoch": 0.000179473876953125,
      "model_forward_time": 0.11433672904968262,
      "step": 29405
    },
    {
      "epoch": 0.000179473876953125,
      "step": 29405,
      "training_step_time": 0.39223289489746094
    },
    {
      "epoch": 0.00017947998046875,
      "model_forward_time": 0.11499238014221191,
      "step": 29406
    },
    {
      "epoch": 0.00017947998046875,
      "step": 29406,
      "training_step_time": 0.39643168449401855
    },
    {
      "epoch": 0.000179486083984375,
      "model_forward_time": 0.11480498313903809,
      "step": 29407
    },
    {
      "epoch": 0.000179486083984375,
      "step": 29407,
      "training_step_time": 0.42403388023376465
    },
    {
      "epoch": 0.0001794921875,
      "model_forward_time": 0.11473369598388672,
      "step": 29408
    },
    {
      "epoch": 0.0001794921875,
      "step": 29408,
      "training_step_time": 0.45490264892578125
    },
    {
      "epoch": 0.000179498291015625,
      "model_forward_time": 0.11473894119262695,
      "step": 29409
    },
    {
      "epoch": 0.000179498291015625,
      "step": 29409,
      "training_step_time": 0.36469173431396484
    },
    {
      "epoch": 0.00017950439453125,
      "grad_norm": 0.1205718144774437,
      "learning_rate": 5.574685752464334e-05,
      "loss": 0.0425,
      "step": 29410
    },
    {
      "epoch": 0.00017950439453125,
      "model_forward_time": 0.11523008346557617,
      "step": 29410
    },
    {
      "epoch": 0.00017950439453125,
      "step": 29410,
      "training_step_time": 0.551224946975708
    },
    {
      "epoch": 0.000179510498046875,
      "model_forward_time": 0.11395382881164551,
      "step": 29411
    },
    {
      "epoch": 0.000179510498046875,
      "step": 29411,
      "training_step_time": 0.4161703586578369
    },
    {
      "epoch": 0.0001795166015625,
      "model_forward_time": 0.11611580848693848,
      "step": 29412
    },
    {
      "epoch": 0.0001795166015625,
      "step": 29412,
      "training_step_time": 0.3787810802459717
    },
    {
      "epoch": 0.000179522705078125,
      "model_forward_time": 0.11464452743530273,
      "step": 29413
    },
    {
      "epoch": 0.000179522705078125,
      "step": 29413,
      "training_step_time": 0.3885304927825928
    },
    {
      "epoch": 0.00017952880859375,
      "model_forward_time": 0.11636662483215332,
      "step": 29414
    },
    {
      "epoch": 0.00017952880859375,
      "step": 29414,
      "training_step_time": 0.4018402099609375
    },
    {
      "epoch": 0.000179534912109375,
      "model_forward_time": 0.11461877822875977,
      "step": 29415
    },
    {
      "epoch": 0.000179534912109375,
      "step": 29415,
      "training_step_time": 0.47250962257385254
    },
    {
      "epoch": 0.000179541015625,
      "model_forward_time": 0.11563968658447266,
      "step": 29416
    },
    {
      "epoch": 0.000179541015625,
      "step": 29416,
      "training_step_time": 0.7637956142425537
    },
    {
      "epoch": 0.000179547119140625,
      "model_forward_time": 0.11424827575683594,
      "step": 29417
    },
    {
      "epoch": 0.000179547119140625,
      "step": 29417,
      "training_step_time": 0.3842802047729492
    },
    {
      "epoch": 0.00017955322265625,
      "model_forward_time": 0.1148982048034668,
      "step": 29418
    },
    {
      "epoch": 0.00017955322265625,
      "step": 29418,
      "training_step_time": 0.3875610828399658
    },
    {
      "epoch": 0.000179559326171875,
      "model_forward_time": 0.11450386047363281,
      "step": 29419
    },
    {
      "epoch": 0.000179559326171875,
      "step": 29419,
      "training_step_time": 0.3921799659729004
    },
    {
      "epoch": 0.0001795654296875,
      "grad_norm": 0.12977497279644012,
      "learning_rate": 5.5719481455063784e-05,
      "loss": 0.0439,
      "step": 29420
    },
    {
      "epoch": 0.0001795654296875,
      "model_forward_time": 0.11488151550292969,
      "step": 29420
    },
    {
      "epoch": 0.0001795654296875,
      "step": 29420,
      "training_step_time": 0.4221210479736328
    },
    {
      "epoch": 0.000179571533203125,
      "model_forward_time": 0.11529135704040527,
      "step": 29421
    },
    {
      "epoch": 0.000179571533203125,
      "step": 29421,
      "training_step_time": 0.4936537742614746
    },
    {
      "epoch": 0.00017957763671875,
      "model_forward_time": 0.11478471755981445,
      "step": 29422
    },
    {
      "epoch": 0.00017957763671875,
      "step": 29422,
      "training_step_time": 0.7642679214477539
    },
    {
      "epoch": 0.000179583740234375,
      "model_forward_time": 0.11450529098510742,
      "step": 29423
    },
    {
      "epoch": 0.000179583740234375,
      "step": 29423,
      "training_step_time": 0.39279723167419434
    },
    {
      "epoch": 0.00017958984375,
      "model_forward_time": 0.11497998237609863,
      "step": 29424
    },
    {
      "epoch": 0.00017958984375,
      "step": 29424,
      "training_step_time": 0.3875870704650879
    },
    {
      "epoch": 0.000179595947265625,
      "model_forward_time": 0.11447668075561523,
      "step": 29425
    },
    {
      "epoch": 0.000179595947265625,
      "step": 29425,
      "training_step_time": 0.39124536514282227
    },
    {
      "epoch": 0.00017960205078125,
      "model_forward_time": 0.11476278305053711,
      "step": 29426
    },
    {
      "epoch": 0.00017960205078125,
      "step": 29426,
      "training_step_time": 0.3898427486419678
    },
    {
      "epoch": 0.000179608154296875,
      "model_forward_time": 0.11496114730834961,
      "step": 29427
    },
    {
      "epoch": 0.000179608154296875,
      "step": 29427,
      "training_step_time": 0.3915128707885742
    },
    {
      "epoch": 0.0001796142578125,
      "model_forward_time": 0.11554265022277832,
      "step": 29428
    },
    {
      "epoch": 0.0001796142578125,
      "step": 29428,
      "training_step_time": 0.7963933944702148
    },
    {
      "epoch": 0.000179620361328125,
      "model_forward_time": 0.11455702781677246,
      "step": 29429
    },
    {
      "epoch": 0.000179620361328125,
      "step": 29429,
      "training_step_time": 0.39382171630859375
    },
    {
      "epoch": 0.00017962646484375,
      "grad_norm": 0.14381936192512512,
      "learning_rate": 5.569210364805677e-05,
      "loss": 0.0464,
      "step": 29430
    },
    {
      "epoch": 0.00017962646484375,
      "model_forward_time": 0.11487770080566406,
      "step": 29430
    },
    {
      "epoch": 0.00017962646484375,
      "step": 29430,
      "training_step_time": 0.385103702545166
    },
    {
      "epoch": 0.000179632568359375,
      "model_forward_time": 0.11599087715148926,
      "step": 29431
    },
    {
      "epoch": 0.000179632568359375,
      "step": 29431,
      "training_step_time": 0.3943004608154297
    },
    {
      "epoch": 0.000179638671875,
      "model_forward_time": 0.11457610130310059,
      "step": 29432
    },
    {
      "epoch": 0.000179638671875,
      "step": 29432,
      "training_step_time": 0.3819868564605713
    },
    {
      "epoch": 0.000179644775390625,
      "model_forward_time": 0.1148684024810791,
      "step": 29433
    },
    {
      "epoch": 0.000179644775390625,
      "step": 29433,
      "training_step_time": 0.47185182571411133
    },
    {
      "epoch": 0.00017965087890625,
      "model_forward_time": 0.11483168601989746,
      "step": 29434
    },
    {
      "epoch": 0.00017965087890625,
      "step": 29434,
      "training_step_time": 0.5321481227874756
    },
    {
      "epoch": 0.000179656982421875,
      "model_forward_time": 0.1146247386932373,
      "step": 29435
    },
    {
      "epoch": 0.000179656982421875,
      "step": 29435,
      "training_step_time": 0.44669413566589355
    },
    {
      "epoch": 0.0001796630859375,
      "model_forward_time": 0.11503815650939941,
      "step": 29436
    },
    {
      "epoch": 0.0001796630859375,
      "step": 29436,
      "training_step_time": 0.44451451301574707
    },
    {
      "epoch": 0.000179669189453125,
      "model_forward_time": 0.11521387100219727,
      "step": 29437
    },
    {
      "epoch": 0.000179669189453125,
      "step": 29437,
      "training_step_time": 0.43150901794433594
    },
    {
      "epoch": 0.00017967529296875,
      "model_forward_time": 0.11485028266906738,
      "step": 29438
    },
    {
      "epoch": 0.00017967529296875,
      "step": 29438,
      "training_step_time": 0.40313100814819336
    },
    {
      "epoch": 0.000179681396484375,
      "model_forward_time": 0.11452198028564453,
      "step": 29439
    },
    {
      "epoch": 0.000179681396484375,
      "step": 29439,
      "training_step_time": 0.38129353523254395
    },
    {
      "epoch": 0.0001796875,
      "grad_norm": 0.12073471397161484,
      "learning_rate": 5.566472411193897e-05,
      "loss": 0.0413,
      "step": 29440
    },
    {
      "epoch": 0.0001796875,
      "model_forward_time": 0.11557579040527344,
      "step": 29440
    },
    {
      "epoch": 0.0001796875,
      "step": 29440,
      "training_step_time": 0.6250834465026855
    },
    {
      "epoch": 0.000179693603515625,
      "model_forward_time": 0.11463689804077148,
      "step": 29441
    },
    {
      "epoch": 0.000179693603515625,
      "step": 29441,
      "training_step_time": 0.39060282707214355
    },
    {
      "epoch": 0.00017969970703125,
      "model_forward_time": 0.11488604545593262,
      "step": 29442
    },
    {
      "epoch": 0.00017969970703125,
      "step": 29442,
      "training_step_time": 0.40326905250549316
    },
    {
      "epoch": 0.000179705810546875,
      "model_forward_time": 0.11489629745483398,
      "step": 29443
    },
    {
      "epoch": 0.000179705810546875,
      "step": 29443,
      "training_step_time": 0.48058223724365234
    },
    {
      "epoch": 0.0001797119140625,
      "model_forward_time": 0.11466550827026367,
      "step": 29444
    },
    {
      "epoch": 0.0001797119140625,
      "step": 29444,
      "training_step_time": 0.38900303840637207
    },
    {
      "epoch": 0.000179718017578125,
      "model_forward_time": 0.11480093002319336,
      "step": 29445
    },
    {
      "epoch": 0.000179718017578125,
      "step": 29445,
      "training_step_time": 0.399838924407959
    },
    {
      "epoch": 0.00017972412109375,
      "model_forward_time": 0.11534476280212402,
      "step": 29446
    },
    {
      "epoch": 0.00017972412109375,
      "step": 29446,
      "training_step_time": 0.7018857002258301
    },
    {
      "epoch": 0.000179730224609375,
      "model_forward_time": 0.11520886421203613,
      "step": 29447
    },
    {
      "epoch": 0.000179730224609375,
      "step": 29447,
      "training_step_time": 0.4196968078613281
    },
    {
      "epoch": 0.000179736328125,
      "model_forward_time": 0.11448049545288086,
      "step": 29448
    },
    {
      "epoch": 0.000179736328125,
      "step": 29448,
      "training_step_time": 0.41637468338012695
    },
    {
      "epoch": 0.000179742431640625,
      "model_forward_time": 0.11538505554199219,
      "step": 29449
    },
    {
      "epoch": 0.000179742431640625,
      "step": 29449,
      "training_step_time": 0.36574339866638184
    },
    {
      "epoch": 0.00017974853515625,
      "grad_norm": 0.1054646298289299,
      "learning_rate": 5.5637342855027554e-05,
      "loss": 0.0448,
      "step": 29450
    },
    {
      "epoch": 0.00017974853515625,
      "model_forward_time": 0.1148838996887207,
      "step": 29450
    },
    {
      "epoch": 0.00017974853515625,
      "step": 29450,
      "training_step_time": 0.42926979064941406
    },
    {
      "epoch": 0.000179754638671875,
      "model_forward_time": 0.11521220207214355,
      "step": 29451
    },
    {
      "epoch": 0.000179754638671875,
      "step": 29451,
      "training_step_time": 0.4280083179473877
    },
    {
      "epoch": 0.0001797607421875,
      "model_forward_time": 0.11882519721984863,
      "step": 29452
    },
    {
      "epoch": 0.0001797607421875,
      "step": 29452,
      "training_step_time": 0.4950718879699707
    },
    {
      "epoch": 0.000179766845703125,
      "model_forward_time": 0.11747455596923828,
      "step": 29453
    },
    {
      "epoch": 0.000179766845703125,
      "step": 29453,
      "training_step_time": 0.37741780281066895
    },
    {
      "epoch": 0.00017977294921875,
      "model_forward_time": 0.11791300773620605,
      "step": 29454
    },
    {
      "epoch": 0.00017977294921875,
      "step": 29454,
      "training_step_time": 0.3791923522949219
    },
    {
      "epoch": 0.000179779052734375,
      "model_forward_time": 0.1187601089477539,
      "step": 29455
    },
    {
      "epoch": 0.000179779052734375,
      "step": 29455,
      "training_step_time": 0.3838958740234375
    },
    {
      "epoch": 0.00017978515625,
      "model_forward_time": 0.11527276039123535,
      "step": 29456
    },
    {
      "epoch": 0.00017978515625,
      "step": 29456,
      "training_step_time": 0.38914942741394043
    },
    {
      "epoch": 0.000179791259765625,
      "model_forward_time": 0.1155858039855957,
      "step": 29457
    },
    {
      "epoch": 0.000179791259765625,
      "step": 29457,
      "training_step_time": 0.4753763675689697
    },
    {
      "epoch": 0.00017979736328125,
      "model_forward_time": 0.11505436897277832,
      "step": 29458
    },
    {
      "epoch": 0.00017979736328125,
      "step": 29458,
      "training_step_time": 0.7806336879730225
    },
    {
      "epoch": 0.000179803466796875,
      "model_forward_time": 0.11400032043457031,
      "step": 29459
    },
    {
      "epoch": 0.000179803466796875,
      "step": 29459,
      "training_step_time": 0.38611292839050293
    },
    {
      "epoch": 0.0001798095703125,
      "grad_norm": 0.12270534038543701,
      "learning_rate": 5.560995988564023e-05,
      "loss": 0.0474,
      "step": 29460
    },
    {
      "epoch": 0.0001798095703125,
      "model_forward_time": 0.11421012878417969,
      "step": 29460
    },
    {
      "epoch": 0.0001798095703125,
      "step": 29460,
      "training_step_time": 0.4194653034210205
    },
    {
      "epoch": 0.000179815673828125,
      "model_forward_time": 0.11447834968566895,
      "step": 29461
    },
    {
      "epoch": 0.000179815673828125,
      "step": 29461,
      "training_step_time": 0.45376157760620117
    },
    {
      "epoch": 0.00017982177734375,
      "model_forward_time": 0.11448884010314941,
      "step": 29462
    },
    {
      "epoch": 0.00017982177734375,
      "step": 29462,
      "training_step_time": 0.39207935333251953
    },
    {
      "epoch": 0.000179827880859375,
      "model_forward_time": 0.11470270156860352,
      "step": 29463
    },
    {
      "epoch": 0.000179827880859375,
      "step": 29463,
      "training_step_time": 0.44737911224365234
    },
    {
      "epoch": 0.000179833984375,
      "model_forward_time": 0.1160116195678711,
      "step": 29464
    },
    {
      "epoch": 0.000179833984375,
      "step": 29464,
      "training_step_time": 0.5687530040740967
    },
    {
      "epoch": 0.000179840087890625,
      "model_forward_time": 0.11475324630737305,
      "step": 29465
    },
    {
      "epoch": 0.000179840087890625,
      "step": 29465,
      "training_step_time": 0.40279245376586914
    },
    {
      "epoch": 0.00017984619140625,
      "model_forward_time": 0.11500906944274902,
      "step": 29466
    },
    {
      "epoch": 0.00017984619140625,
      "step": 29466,
      "training_step_time": 0.38143324851989746
    },
    {
      "epoch": 0.000179852294921875,
      "model_forward_time": 0.11483120918273926,
      "step": 29467
    },
    {
      "epoch": 0.000179852294921875,
      "step": 29467,
      "training_step_time": 0.3972043991088867
    },
    {
      "epoch": 0.0001798583984375,
      "model_forward_time": 0.11504721641540527,
      "step": 29468
    },
    {
      "epoch": 0.0001798583984375,
      "step": 29468,
      "training_step_time": 0.3864731788635254
    },
    {
      "epoch": 0.000179864501953125,
      "model_forward_time": 0.11527562141418457,
      "step": 29469
    },
    {
      "epoch": 0.000179864501953125,
      "step": 29469,
      "training_step_time": 0.427990198135376
    },
    {
      "epoch": 0.00017987060546875,
      "grad_norm": 0.13800416886806488,
      "learning_rate": 5.558257521209521e-05,
      "loss": 0.0425,
      "step": 29470
    },
    {
      "epoch": 0.00017987060546875,
      "model_forward_time": 0.11568474769592285,
      "step": 29470
    },
    {
      "epoch": 0.00017987060546875,
      "step": 29470,
      "training_step_time": 0.748816728591919
    },
    {
      "epoch": 0.000179876708984375,
      "model_forward_time": 0.11474180221557617,
      "step": 29471
    },
    {
      "epoch": 0.000179876708984375,
      "step": 29471,
      "training_step_time": 0.38876843452453613
    },
    {
      "epoch": 0.0001798828125,
      "model_forward_time": 0.11466407775878906,
      "step": 29472
    },
    {
      "epoch": 0.0001798828125,
      "step": 29472,
      "training_step_time": 0.3929460048675537
    },
    {
      "epoch": 0.000179888916015625,
      "model_forward_time": 0.11498093605041504,
      "step": 29473
    },
    {
      "epoch": 0.000179888916015625,
      "step": 29473,
      "training_step_time": 0.47611427307128906
    },
    {
      "epoch": 0.00017989501953125,
      "model_forward_time": 0.1144411563873291,
      "step": 29474
    },
    {
      "epoch": 0.00017989501953125,
      "step": 29474,
      "training_step_time": 0.4175896644592285
    },
    {
      "epoch": 0.000179901123046875,
      "model_forward_time": 0.11434006690979004,
      "step": 29475
    },
    {
      "epoch": 0.000179901123046875,
      "step": 29475,
      "training_step_time": 0.4627523422241211
    },
    {
      "epoch": 0.0001799072265625,
      "model_forward_time": 0.11485147476196289,
      "step": 29476
    },
    {
      "epoch": 0.0001799072265625,
      "step": 29476,
      "training_step_time": 0.4623727798461914
    },
    {
      "epoch": 0.000179913330078125,
      "model_forward_time": 0.11473894119262695,
      "step": 29477
    },
    {
      "epoch": 0.000179913330078125,
      "step": 29477,
      "training_step_time": 0.4026143550872803
    },
    {
      "epoch": 0.00017991943359375,
      "model_forward_time": 0.11491870880126953,
      "step": 29478
    },
    {
      "epoch": 0.00017991943359375,
      "step": 29478,
      "training_step_time": 0.39406466484069824
    },
    {
      "epoch": 0.000179925537109375,
      "model_forward_time": 0.11588120460510254,
      "step": 29479
    },
    {
      "epoch": 0.000179925537109375,
      "step": 29479,
      "training_step_time": 0.3890719413757324
    },
    {
      "epoch": 0.000179931640625,
      "grad_norm": 0.11013773828744888,
      "learning_rate": 5.555518884271122e-05,
      "loss": 0.0428,
      "step": 29480
    },
    {
      "epoch": 0.000179931640625,
      "model_forward_time": 0.11494159698486328,
      "step": 29480
    },
    {
      "epoch": 0.000179931640625,
      "step": 29480,
      "training_step_time": 0.39060139656066895
    },
    {
      "epoch": 0.000179937744140625,
      "model_forward_time": 0.11483001708984375,
      "step": 29481
    },
    {
      "epoch": 0.000179937744140625,
      "step": 29481,
      "training_step_time": 0.3999931812286377
    },
    {
      "epoch": 0.00017994384765625,
      "model_forward_time": 0.1157388687133789,
      "step": 29482
    },
    {
      "epoch": 0.00017994384765625,
      "step": 29482,
      "training_step_time": 0.7143754959106445
    },
    {
      "epoch": 0.000179949951171875,
      "model_forward_time": 0.11457943916320801,
      "step": 29483
    },
    {
      "epoch": 0.000179949951171875,
      "step": 29483,
      "training_step_time": 0.384258508682251
    },
    {
      "epoch": 0.0001799560546875,
      "model_forward_time": 0.11442303657531738,
      "step": 29484
    },
    {
      "epoch": 0.0001799560546875,
      "step": 29484,
      "training_step_time": 0.46198248863220215
    },
    {
      "epoch": 0.000179962158203125,
      "model_forward_time": 0.11441707611083984,
      "step": 29485
    },
    {
      "epoch": 0.000179962158203125,
      "step": 29485,
      "training_step_time": 0.40552544593811035
    },
    {
      "epoch": 0.00017996826171875,
      "model_forward_time": 0.11504697799682617,
      "step": 29486
    },
    {
      "epoch": 0.00017996826171875,
      "step": 29486,
      "training_step_time": 0.41313672065734863
    },
    {
      "epoch": 0.000179974365234375,
      "model_forward_time": 0.11501741409301758,
      "step": 29487
    },
    {
      "epoch": 0.000179974365234375,
      "step": 29487,
      "training_step_time": 0.3890798091888428
    },
    {
      "epoch": 0.00017998046875,
      "model_forward_time": 0.1149601936340332,
      "step": 29488
    },
    {
      "epoch": 0.00017998046875,
      "step": 29488,
      "training_step_time": 0.5845582485198975
    },
    {
      "epoch": 0.000179986572265625,
      "model_forward_time": 0.11556529998779297,
      "step": 29489
    },
    {
      "epoch": 0.000179986572265625,
      "step": 29489,
      "training_step_time": 0.3946664333343506
    },
    {
      "epoch": 0.00017999267578125,
      "grad_norm": 0.1285310536623001,
      "learning_rate": 5.552780078580756e-05,
      "loss": 0.043,
      "step": 29490
    },
    {
      "epoch": 0.00017999267578125,
      "model_forward_time": 0.11543989181518555,
      "step": 29490
    },
    {
      "epoch": 0.00017999267578125,
      "step": 29490,
      "training_step_time": 0.46094727516174316
    },
    {
      "epoch": 0.000179998779296875,
      "model_forward_time": 0.11616730690002441,
      "step": 29491
    },
    {
      "epoch": 0.000179998779296875,
      "step": 29491,
      "training_step_time": 0.4888632297515869
    },
    {
      "epoch": 0.0001800048828125,
      "model_forward_time": 0.11482095718383789,
      "step": 29492
    },
    {
      "epoch": 0.0001800048828125,
      "step": 29492,
      "training_step_time": 0.403287410736084
    },
    {
      "epoch": 0.000180010986328125,
      "model_forward_time": 0.11478734016418457,
      "step": 29493
    },
    {
      "epoch": 0.000180010986328125,
      "step": 29493,
      "training_step_time": 0.38733887672424316
    },
    {
      "epoch": 0.00018001708984375,
      "model_forward_time": 0.1151888370513916,
      "step": 29494
    },
    {
      "epoch": 0.00018001708984375,
      "step": 29494,
      "training_step_time": 0.8136873245239258
    },
    {
      "epoch": 0.000180023193359375,
      "model_forward_time": 0.1145317554473877,
      "step": 29495
    },
    {
      "epoch": 0.000180023193359375,
      "step": 29495,
      "training_step_time": 0.4457743167877197
    },
    {
      "epoch": 0.000180029296875,
      "model_forward_time": 0.11474108695983887,
      "step": 29496
    },
    {
      "epoch": 0.000180029296875,
      "step": 29496,
      "training_step_time": 0.40300464630126953
    },
    {
      "epoch": 0.000180035400390625,
      "model_forward_time": 0.11472439765930176,
      "step": 29497
    },
    {
      "epoch": 0.000180035400390625,
      "step": 29497,
      "training_step_time": 0.47835302352905273
    },
    {
      "epoch": 0.00018004150390625,
      "model_forward_time": 0.11481571197509766,
      "step": 29498
    },
    {
      "epoch": 0.00018004150390625,
      "step": 29498,
      "training_step_time": 0.43340182304382324
    },
    {
      "epoch": 0.000180047607421875,
      "model_forward_time": 0.11423945426940918,
      "step": 29499
    },
    {
      "epoch": 0.000180047607421875,
      "step": 29499,
      "training_step_time": 0.38061094284057617
    },
    {
      "epoch": 0.0001800537109375,
      "grad_norm": 0.13655392825603485,
      "learning_rate": 5.550041104970397e-05,
      "loss": 0.041,
      "step": 29500
    },
    {
      "epoch": 0.0001800537109375,
      "model_forward_time": 0.11501288414001465,
      "step": 29500
    },
    {
      "epoch": 0.0001800537109375,
      "step": 29500,
      "training_step_time": 0.7471003532409668
    },
    {
      "epoch": 0.000180059814453125,
      "model_forward_time": 0.11429500579833984,
      "step": 29501
    },
    {
      "epoch": 0.000180059814453125,
      "step": 29501,
      "training_step_time": 0.4131462574005127
    },
    {
      "epoch": 0.00018006591796875,
      "model_forward_time": 0.11446714401245117,
      "step": 29502
    },
    {
      "epoch": 0.00018006591796875,
      "step": 29502,
      "training_step_time": 0.44821643829345703
    },
    {
      "epoch": 0.000180072021484375,
      "model_forward_time": 0.11443901062011719,
      "step": 29503
    },
    {
      "epoch": 0.000180072021484375,
      "step": 29503,
      "training_step_time": 0.43588733673095703
    },
    {
      "epoch": 0.000180078125,
      "model_forward_time": 0.11559534072875977,
      "step": 29504
    },
    {
      "epoch": 0.000180078125,
      "step": 29504,
      "training_step_time": 0.48784494400024414
    },
    {
      "epoch": 0.000180084228515625,
      "model_forward_time": 0.11454367637634277,
      "step": 29505
    },
    {
      "epoch": 0.000180084228515625,
      "step": 29505,
      "training_step_time": 0.46538686752319336
    },
    {
      "epoch": 0.00018009033203125,
      "model_forward_time": 0.11475563049316406,
      "step": 29506
    },
    {
      "epoch": 0.00018009033203125,
      "step": 29506,
      "training_step_time": 0.388164758682251
    },
    {
      "epoch": 0.000180096435546875,
      "model_forward_time": 0.11507344245910645,
      "step": 29507
    },
    {
      "epoch": 0.000180096435546875,
      "step": 29507,
      "training_step_time": 0.3832995891571045
    },
    {
      "epoch": 0.0001801025390625,
      "model_forward_time": 0.11571669578552246,
      "step": 29508
    },
    {
      "epoch": 0.0001801025390625,
      "step": 29508,
      "training_step_time": 0.4047529697418213
    },
    {
      "epoch": 0.000180108642578125,
      "model_forward_time": 0.11512207984924316,
      "step": 29509
    },
    {
      "epoch": 0.000180108642578125,
      "step": 29509,
      "training_step_time": 0.39895129203796387
    },
    {
      "epoch": 0.00018011474609375,
      "grad_norm": 0.10449449717998505,
      "learning_rate": 5.5473019642720734e-05,
      "loss": 0.0473,
      "step": 29510
    },
    {
      "epoch": 0.00018011474609375,
      "model_forward_time": 0.11560392379760742,
      "step": 29510
    },
    {
      "epoch": 0.00018011474609375,
      "step": 29510,
      "training_step_time": 0.5034992694854736
    },
    {
      "epoch": 0.000180120849609375,
      "model_forward_time": 0.11493325233459473,
      "step": 29511
    },
    {
      "epoch": 0.000180120849609375,
      "step": 29511,
      "training_step_time": 0.43784618377685547
    },
    {
      "epoch": 0.000180126953125,
      "model_forward_time": 0.11536288261413574,
      "step": 29512
    },
    {
      "epoch": 0.000180126953125,
      "step": 29512,
      "training_step_time": 0.5869936943054199
    },
    {
      "epoch": 0.000180133056640625,
      "model_forward_time": 0.1151430606842041,
      "step": 29513
    },
    {
      "epoch": 0.000180133056640625,
      "step": 29513,
      "training_step_time": 0.3836860656738281
    },
    {
      "epoch": 0.00018013916015625,
      "model_forward_time": 0.11542105674743652,
      "step": 29514
    },
    {
      "epoch": 0.00018013916015625,
      "step": 29514,
      "training_step_time": 0.39118480682373047
    },
    {
      "epoch": 0.000180145263671875,
      "model_forward_time": 0.11501932144165039,
      "step": 29515
    },
    {
      "epoch": 0.000180145263671875,
      "step": 29515,
      "training_step_time": 0.3923819065093994
    },
    {
      "epoch": 0.0001801513671875,
      "model_forward_time": 0.11487436294555664,
      "step": 29516
    },
    {
      "epoch": 0.0001801513671875,
      "step": 29516,
      "training_step_time": 0.4117710590362549
    },
    {
      "epoch": 0.000180157470703125,
      "model_forward_time": 0.1170654296875,
      "step": 29517
    },
    {
      "epoch": 0.000180157470703125,
      "step": 29517,
      "training_step_time": 0.440781831741333
    },
    {
      "epoch": 0.00018016357421875,
      "model_forward_time": 0.11515402793884277,
      "step": 29518
    },
    {
      "epoch": 0.00018016357421875,
      "step": 29518,
      "training_step_time": 0.8973350524902344
    },
    {
      "epoch": 0.000180169677734375,
      "model_forward_time": 0.11435627937316895,
      "step": 29519
    },
    {
      "epoch": 0.000180169677734375,
      "step": 29519,
      "training_step_time": 0.3823080062866211
    },
    {
      "epoch": 0.00018017578125,
      "grad_norm": 0.09235585480928421,
      "learning_rate": 5.544562657317863e-05,
      "loss": 0.0445,
      "step": 29520
    },
    {
      "epoch": 0.00018017578125,
      "model_forward_time": 0.11460542678833008,
      "step": 29520
    },
    {
      "epoch": 0.00018017578125,
      "step": 29520,
      "training_step_time": 0.3769383430480957
    },
    {
      "epoch": 0.000180181884765625,
      "model_forward_time": 0.1149604320526123,
      "step": 29521
    },
    {
      "epoch": 0.000180181884765625,
      "step": 29521,
      "training_step_time": 0.3922739028930664
    },
    {
      "epoch": 0.00018018798828125,
      "model_forward_time": 0.11444640159606934,
      "step": 29522
    },
    {
      "epoch": 0.00018018798828125,
      "step": 29522,
      "training_step_time": 0.47536754608154297
    },
    {
      "epoch": 0.000180194091796875,
      "model_forward_time": 0.11459875106811523,
      "step": 29523
    },
    {
      "epoch": 0.000180194091796875,
      "step": 29523,
      "training_step_time": 0.39800000190734863
    },
    {
      "epoch": 0.0001802001953125,
      "model_forward_time": 0.11485457420349121,
      "step": 29524
    },
    {
      "epoch": 0.0001802001953125,
      "step": 29524,
      "training_step_time": 0.95601487159729
    },
    {
      "epoch": 0.000180206298828125,
      "model_forward_time": 0.11465883255004883,
      "step": 29525
    },
    {
      "epoch": 0.000180206298828125,
      "step": 29525,
      "training_step_time": 0.38153910636901855
    },
    {
      "epoch": 0.00018021240234375,
      "model_forward_time": 0.1139519214630127,
      "step": 29526
    },
    {
      "epoch": 0.00018021240234375,
      "step": 29526,
      "training_step_time": 0.39102864265441895
    },
    {
      "epoch": 0.000180218505859375,
      "model_forward_time": 0.11482048034667969,
      "step": 29527
    },
    {
      "epoch": 0.000180218505859375,
      "step": 29527,
      "training_step_time": 0.39575958251953125
    },
    {
      "epoch": 0.000180224609375,
      "model_forward_time": 0.11433076858520508,
      "step": 29528
    },
    {
      "epoch": 0.000180224609375,
      "step": 29528,
      "training_step_time": 0.4950406551361084
    },
    {
      "epoch": 0.000180230712890625,
      "model_forward_time": 0.11459922790527344,
      "step": 29529
    },
    {
      "epoch": 0.000180230712890625,
      "step": 29529,
      "training_step_time": 0.36729931831359863
    },
    {
      "epoch": 0.00018023681640625,
      "grad_norm": 0.1382613182067871,
      "learning_rate": 5.541823184939896e-05,
      "loss": 0.0446,
      "step": 29530
    },
    {
      "epoch": 0.00018023681640625,
      "model_forward_time": 0.11503887176513672,
      "step": 29530
    },
    {
      "epoch": 0.00018023681640625,
      "step": 29530,
      "training_step_time": 0.5604503154754639
    },
    {
      "epoch": 0.000180242919921875,
      "model_forward_time": 0.11588501930236816,
      "step": 29531
    },
    {
      "epoch": 0.000180242919921875,
      "step": 29531,
      "training_step_time": 0.4122304916381836
    },
    {
      "epoch": 0.0001802490234375,
      "model_forward_time": 0.11508488655090332,
      "step": 29532
    },
    {
      "epoch": 0.0001802490234375,
      "step": 29532,
      "training_step_time": 0.42043375968933105
    },
    {
      "epoch": 0.000180255126953125,
      "model_forward_time": 0.1176142692565918,
      "step": 29533
    },
    {
      "epoch": 0.000180255126953125,
      "step": 29533,
      "training_step_time": 0.39175844192504883
    },
    {
      "epoch": 0.00018026123046875,
      "model_forward_time": 0.11735773086547852,
      "step": 29534
    },
    {
      "epoch": 0.00018026123046875,
      "step": 29534,
      "training_step_time": 0.3893606662750244
    },
    {
      "epoch": 0.000180267333984375,
      "model_forward_time": 0.11863994598388672,
      "step": 29535
    },
    {
      "epoch": 0.000180267333984375,
      "step": 29535,
      "training_step_time": 0.43191051483154297
    },
    {
      "epoch": 0.0001802734375,
      "model_forward_time": 0.11529016494750977,
      "step": 29536
    },
    {
      "epoch": 0.0001802734375,
      "step": 29536,
      "training_step_time": 0.5114295482635498
    },
    {
      "epoch": 0.000180279541015625,
      "model_forward_time": 0.11503338813781738,
      "step": 29537
    },
    {
      "epoch": 0.000180279541015625,
      "step": 29537,
      "training_step_time": 0.3866896629333496
    },
    {
      "epoch": 0.00018028564453125,
      "model_forward_time": 0.1149744987487793,
      "step": 29538
    },
    {
      "epoch": 0.00018028564453125,
      "step": 29538,
      "training_step_time": 0.4004523754119873
    },
    {
      "epoch": 0.000180291748046875,
      "model_forward_time": 0.11542892456054688,
      "step": 29539
    },
    {
      "epoch": 0.000180291748046875,
      "step": 29539,
      "training_step_time": 0.3915739059448242
    },
    {
      "epoch": 0.0001802978515625,
      "grad_norm": 0.10509584099054337,
      "learning_rate": 5.539083547970352e-05,
      "loss": 0.0431,
      "step": 29540
    },
    {
      "epoch": 0.0001802978515625,
      "model_forward_time": 0.11567449569702148,
      "step": 29540
    },
    {
      "epoch": 0.0001802978515625,
      "step": 29540,
      "training_step_time": 0.47403860092163086
    },
    {
      "epoch": 0.000180303955078125,
      "model_forward_time": 0.11454033851623535,
      "step": 29541
    },
    {
      "epoch": 0.000180303955078125,
      "step": 29541,
      "training_step_time": 0.42617201805114746
    },
    {
      "epoch": 0.00018031005859375,
      "model_forward_time": 0.1150660514831543,
      "step": 29542
    },
    {
      "epoch": 0.00018031005859375,
      "step": 29542,
      "training_step_time": 0.5567710399627686
    },
    {
      "epoch": 0.000180316162109375,
      "model_forward_time": 0.11442065238952637,
      "step": 29543
    },
    {
      "epoch": 0.000180316162109375,
      "step": 29543,
      "training_step_time": 0.3653895854949951
    },
    {
      "epoch": 0.000180322265625,
      "model_forward_time": 0.11527776718139648,
      "step": 29544
    },
    {
      "epoch": 0.000180322265625,
      "step": 29544,
      "training_step_time": 0.3937797546386719
    },
    {
      "epoch": 0.000180328369140625,
      "model_forward_time": 0.11572885513305664,
      "step": 29545
    },
    {
      "epoch": 0.000180328369140625,
      "step": 29545,
      "training_step_time": 0.4137105941772461
    },
    {
      "epoch": 0.00018033447265625,
      "model_forward_time": 0.11514830589294434,
      "step": 29546
    },
    {
      "epoch": 0.00018033447265625,
      "step": 29546,
      "training_step_time": 0.3873896598815918
    },
    {
      "epoch": 0.000180340576171875,
      "model_forward_time": 0.11477994918823242,
      "step": 29547
    },
    {
      "epoch": 0.000180340576171875,
      "step": 29547,
      "training_step_time": 0.3914504051208496
    },
    {
      "epoch": 0.0001803466796875,
      "model_forward_time": 0.11497330665588379,
      "step": 29548
    },
    {
      "epoch": 0.0001803466796875,
      "step": 29548,
      "training_step_time": 0.5935678482055664
    },
    {
      "epoch": 0.000180352783203125,
      "model_forward_time": 0.11554741859436035,
      "step": 29549
    },
    {
      "epoch": 0.000180352783203125,
      "step": 29549,
      "training_step_time": 0.4049098491668701
    },
    {
      "epoch": 0.00018035888671875,
      "grad_norm": 0.08919360488653183,
      "learning_rate": 5.5363437472414595e-05,
      "loss": 0.0413,
      "step": 29550
    },
    {
      "epoch": 0.00018035888671875,
      "model_forward_time": 0.11479592323303223,
      "step": 29550
    },
    {
      "epoch": 0.00018035888671875,
      "step": 29550,
      "training_step_time": 0.4891238212585449
    },
    {
      "epoch": 0.000180364990234375,
      "model_forward_time": 0.11470270156860352,
      "step": 29551
    },
    {
      "epoch": 0.000180364990234375,
      "step": 29551,
      "training_step_time": 0.4097893238067627
    },
    {
      "epoch": 0.00018037109375,
      "model_forward_time": 0.11467456817626953,
      "step": 29552
    },
    {
      "epoch": 0.00018037109375,
      "step": 29552,
      "training_step_time": 0.3886723518371582
    },
    {
      "epoch": 0.000180377197265625,
      "model_forward_time": 0.1149137020111084,
      "step": 29553
    },
    {
      "epoch": 0.000180377197265625,
      "step": 29553,
      "training_step_time": 0.3952341079711914
    },
    {
      "epoch": 0.00018038330078125,
      "model_forward_time": 0.11649203300476074,
      "step": 29554
    },
    {
      "epoch": 0.00018038330078125,
      "step": 29554,
      "training_step_time": 0.4914863109588623
    },
    {
      "epoch": 0.000180389404296875,
      "model_forward_time": 0.11484742164611816,
      "step": 29555
    },
    {
      "epoch": 0.000180389404296875,
      "step": 29555,
      "training_step_time": 0.41890645027160645
    },
    {
      "epoch": 0.0001803955078125,
      "model_forward_time": 0.11495351791381836,
      "step": 29556
    },
    {
      "epoch": 0.0001803955078125,
      "step": 29556,
      "training_step_time": 0.47112536430358887
    },
    {
      "epoch": 0.000180401611328125,
      "model_forward_time": 0.11504054069519043,
      "step": 29557
    },
    {
      "epoch": 0.000180401611328125,
      "step": 29557,
      "training_step_time": 0.36639833450317383
    },
    {
      "epoch": 0.00018040771484375,
      "model_forward_time": 0.11723566055297852,
      "step": 29558
    },
    {
      "epoch": 0.00018040771484375,
      "step": 29558,
      "training_step_time": 0.4003477096557617
    },
    {
      "epoch": 0.000180413818359375,
      "model_forward_time": 0.11515069007873535,
      "step": 29559
    },
    {
      "epoch": 0.000180413818359375,
      "step": 29559,
      "training_step_time": 0.4537985324859619
    },
    {
      "epoch": 0.000180419921875,
      "grad_norm": 0.1303931027650833,
      "learning_rate": 5.533603783585496e-05,
      "loss": 0.0476,
      "step": 29560
    },
    {
      "epoch": 0.000180419921875,
      "model_forward_time": 0.1153876781463623,
      "step": 29560
    },
    {
      "epoch": 0.000180419921875,
      "step": 29560,
      "training_step_time": 0.4531674385070801
    },
    {
      "epoch": 0.000180426025390625,
      "model_forward_time": 0.11513566970825195,
      "step": 29561
    },
    {
      "epoch": 0.000180426025390625,
      "step": 29561,
      "training_step_time": 0.3782007694244385
    },
    {
      "epoch": 0.00018043212890625,
      "model_forward_time": 0.11521172523498535,
      "step": 29562
    },
    {
      "epoch": 0.00018043212890625,
      "step": 29562,
      "training_step_time": 0.40749645233154297
    },
    {
      "epoch": 0.000180438232421875,
      "model_forward_time": 0.11525368690490723,
      "step": 29563
    },
    {
      "epoch": 0.000180438232421875,
      "step": 29563,
      "training_step_time": 0.43488526344299316
    },
    {
      "epoch": 0.0001804443359375,
      "model_forward_time": 0.11519789695739746,
      "step": 29564
    },
    {
      "epoch": 0.0001804443359375,
      "step": 29564,
      "training_step_time": 0.3891479969024658
    },
    {
      "epoch": 0.000180450439453125,
      "model_forward_time": 0.11514592170715332,
      "step": 29565
    },
    {
      "epoch": 0.000180450439453125,
      "step": 29565,
      "training_step_time": 0.44930315017700195
    },
    {
      "epoch": 0.00018045654296875,
      "model_forward_time": 0.11456298828125,
      "step": 29566
    },
    {
      "epoch": 0.00018045654296875,
      "step": 29566,
      "training_step_time": 0.5108439922332764
    },
    {
      "epoch": 0.000180462646484375,
      "model_forward_time": 0.11521625518798828,
      "step": 29567
    },
    {
      "epoch": 0.000180462646484375,
      "step": 29567,
      "training_step_time": 0.3808271884918213
    },
    {
      "epoch": 0.00018046875,
      "model_forward_time": 0.11548995971679688,
      "step": 29568
    },
    {
      "epoch": 0.00018046875,
      "step": 29568,
      "training_step_time": 0.3904566764831543
    },
    {
      "epoch": 0.000180474853515625,
      "model_forward_time": 0.11484718322753906,
      "step": 29569
    },
    {
      "epoch": 0.000180474853515625,
      "step": 29569,
      "training_step_time": 0.4015073776245117
    },
    {
      "epoch": 0.00018048095703125,
      "grad_norm": 0.1304766833782196,
      "learning_rate": 5.5308636578347936e-05,
      "loss": 0.0401,
      "step": 29570
    },
    {
      "epoch": 0.00018048095703125,
      "model_forward_time": 0.1154024600982666,
      "step": 29570
    },
    {
      "epoch": 0.00018048095703125,
      "step": 29570,
      "training_step_time": 0.49892425537109375
    },
    {
      "epoch": 0.000180487060546875,
      "model_forward_time": 0.11468315124511719,
      "step": 29571
    },
    {
      "epoch": 0.000180487060546875,
      "step": 29571,
      "training_step_time": 0.38901615142822266
    },
    {
      "epoch": 0.0001804931640625,
      "model_forward_time": 0.11607146263122559,
      "step": 29572
    },
    {
      "epoch": 0.0001804931640625,
      "step": 29572,
      "training_step_time": 0.4820873737335205
    },
    {
      "epoch": 0.000180499267578125,
      "model_forward_time": 0.11537528038024902,
      "step": 29573
    },
    {
      "epoch": 0.000180499267578125,
      "step": 29573,
      "training_step_time": 0.4730105400085449
    },
    {
      "epoch": 0.00018050537109375,
      "model_forward_time": 0.11459779739379883,
      "step": 29574
    },
    {
      "epoch": 0.00018050537109375,
      "step": 29574,
      "training_step_time": 0.39677953720092773
    },
    {
      "epoch": 0.000180511474609375,
      "model_forward_time": 0.11535215377807617,
      "step": 29575
    },
    {
      "epoch": 0.000180511474609375,
      "step": 29575,
      "training_step_time": 0.4586341381072998
    },
    {
      "epoch": 0.000180517578125,
      "model_forward_time": 0.11477351188659668,
      "step": 29576
    },
    {
      "epoch": 0.000180517578125,
      "step": 29576,
      "training_step_time": 0.4040505886077881
    },
    {
      "epoch": 0.000180523681640625,
      "model_forward_time": 0.11456751823425293,
      "step": 29577
    },
    {
      "epoch": 0.000180523681640625,
      "step": 29577,
      "training_step_time": 0.5030539035797119
    },
    {
      "epoch": 0.00018052978515625,
      "model_forward_time": 0.11379814147949219,
      "step": 29578
    },
    {
      "epoch": 0.00018052978515625,
      "step": 29578,
      "training_step_time": 0.4204435348510742
    },
    {
      "epoch": 0.000180535888671875,
      "model_forward_time": 0.11514830589294434,
      "step": 29579
    },
    {
      "epoch": 0.000180535888671875,
      "step": 29579,
      "training_step_time": 0.4876587390899658
    },
    {
      "epoch": 0.0001805419921875,
      "grad_norm": 0.1634603887796402,
      "learning_rate": 5.52812337082173e-05,
      "loss": 0.0446,
      "step": 29580
    },
    {
      "epoch": 0.0001805419921875,
      "model_forward_time": 0.11450982093811035,
      "step": 29580
    },
    {
      "epoch": 0.0001805419921875,
      "step": 29580,
      "training_step_time": 0.4008452892303467
    },
    {
      "epoch": 0.000180548095703125,
      "model_forward_time": 0.11501765251159668,
      "step": 29581
    },
    {
      "epoch": 0.000180548095703125,
      "step": 29581,
      "training_step_time": 0.39261794090270996
    },
    {
      "epoch": 0.00018055419921875,
      "model_forward_time": 0.11494708061218262,
      "step": 29582
    },
    {
      "epoch": 0.00018055419921875,
      "step": 29582,
      "training_step_time": 0.44879984855651855
    },
    {
      "epoch": 0.000180560302734375,
      "model_forward_time": 0.11530399322509766,
      "step": 29583
    },
    {
      "epoch": 0.000180560302734375,
      "step": 29583,
      "training_step_time": 0.3925192356109619
    },
    {
      "epoch": 0.00018056640625,
      "model_forward_time": 0.11499571800231934,
      "step": 29584
    },
    {
      "epoch": 0.00018056640625,
      "step": 29584,
      "training_step_time": 0.4549248218536377
    },
    {
      "epoch": 0.000180572509765625,
      "model_forward_time": 0.11562371253967285,
      "step": 29585
    },
    {
      "epoch": 0.000180572509765625,
      "step": 29585,
      "training_step_time": 0.37508153915405273
    },
    {
      "epoch": 0.00018057861328125,
      "model_forward_time": 0.1155550479888916,
      "step": 29586
    },
    {
      "epoch": 0.00018057861328125,
      "step": 29586,
      "training_step_time": 0.40408802032470703
    },
    {
      "epoch": 0.000180584716796875,
      "model_forward_time": 0.1154317855834961,
      "step": 29587
    },
    {
      "epoch": 0.000180584716796875,
      "step": 29587,
      "training_step_time": 0.40376830101013184
    },
    {
      "epoch": 0.0001805908203125,
      "model_forward_time": 0.1150975227355957,
      "step": 29588
    },
    {
      "epoch": 0.0001805908203125,
      "step": 29588,
      "training_step_time": 0.43051624298095703
    },
    {
      "epoch": 0.000180596923828125,
      "model_forward_time": 0.11533355712890625,
      "step": 29589
    },
    {
      "epoch": 0.000180596923828125,
      "step": 29589,
      "training_step_time": 0.41489386558532715
    },
    {
      "epoch": 0.00018060302734375,
      "grad_norm": 0.1657809615135193,
      "learning_rate": 5.525382923378728e-05,
      "loss": 0.0444,
      "step": 29590
    },
    {
      "epoch": 0.00018060302734375,
      "model_forward_time": 0.11546611785888672,
      "step": 29590
    },
    {
      "epoch": 0.00018060302734375,
      "step": 29590,
      "training_step_time": 0.41814541816711426
    },
    {
      "epoch": 0.000180609130859375,
      "model_forward_time": 0.1155087947845459,
      "step": 29591
    },
    {
      "epoch": 0.000180609130859375,
      "step": 29591,
      "training_step_time": 0.3957970142364502
    },
    {
      "epoch": 0.000180615234375,
      "model_forward_time": 0.11513090133666992,
      "step": 29592
    },
    {
      "epoch": 0.000180615234375,
      "step": 29592,
      "training_step_time": 0.42719435691833496
    },
    {
      "epoch": 0.000180621337890625,
      "model_forward_time": 0.1150205135345459,
      "step": 29593
    },
    {
      "epoch": 0.000180621337890625,
      "step": 29593,
      "training_step_time": 0.5081861019134521
    },
    {
      "epoch": 0.00018062744140625,
      "model_forward_time": 0.1151421070098877,
      "step": 29594
    },
    {
      "epoch": 0.00018062744140625,
      "step": 29594,
      "training_step_time": 0.40691184997558594
    },
    {
      "epoch": 0.000180633544921875,
      "model_forward_time": 0.1153111457824707,
      "step": 29595
    },
    {
      "epoch": 0.000180633544921875,
      "step": 29595,
      "training_step_time": 0.39095282554626465
    },
    {
      "epoch": 0.0001806396484375,
      "model_forward_time": 0.11607861518859863,
      "step": 29596
    },
    {
      "epoch": 0.0001806396484375,
      "step": 29596,
      "training_step_time": 0.5422899723052979
    },
    {
      "epoch": 0.000180645751953125,
      "model_forward_time": 0.11519789695739746,
      "step": 29597
    },
    {
      "epoch": 0.000180645751953125,
      "step": 29597,
      "training_step_time": 0.3788623809814453
    },
    {
      "epoch": 0.00018065185546875,
      "model_forward_time": 0.11507892608642578,
      "step": 29598
    },
    {
      "epoch": 0.00018065185546875,
      "step": 29598,
      "training_step_time": 0.3957054615020752
    },
    {
      "epoch": 0.000180657958984375,
      "model_forward_time": 0.11478686332702637,
      "step": 29599
    },
    {
      "epoch": 0.000180657958984375,
      "step": 29599,
      "training_step_time": 0.43556952476501465
    },
    {
      "epoch": 0.0001806640625,
      "grad_norm": 0.18251271545886993,
      "learning_rate": 5.522642316338268e-05,
      "loss": 0.0447,
      "step": 29600
    },
    {
      "epoch": 0.0001806640625,
      "model_forward_time": 0.11525607109069824,
      "step": 29600
    },
    {
      "epoch": 0.0001806640625,
      "step": 29600,
      "training_step_time": 0.4087216854095459
    },
    {
      "epoch": 0.000180670166015625,
      "model_forward_time": 0.11561870574951172,
      "step": 29601
    },
    {
      "epoch": 0.000180670166015625,
      "step": 29601,
      "training_step_time": 0.5132970809936523
    },
    {
      "epoch": 0.00018067626953125,
      "model_forward_time": 0.11632704734802246,
      "step": 29602
    },
    {
      "epoch": 0.00018067626953125,
      "step": 29602,
      "training_step_time": 0.49626803398132324
    },
    {
      "epoch": 0.000180682373046875,
      "model_forward_time": 0.11528205871582031,
      "step": 29603
    },
    {
      "epoch": 0.000180682373046875,
      "step": 29603,
      "training_step_time": 0.38787078857421875
    },
    {
      "epoch": 0.0001806884765625,
      "model_forward_time": 0.1144866943359375,
      "step": 29604
    },
    {
      "epoch": 0.0001806884765625,
      "step": 29604,
      "training_step_time": 0.38995790481567383
    },
    {
      "epoch": 0.000180694580078125,
      "model_forward_time": 0.1144106388092041,
      "step": 29605
    },
    {
      "epoch": 0.000180694580078125,
      "step": 29605,
      "training_step_time": 0.39479684829711914
    },
    {
      "epoch": 0.00018070068359375,
      "model_forward_time": 0.11537504196166992,
      "step": 29606
    },
    {
      "epoch": 0.00018070068359375,
      "step": 29606,
      "training_step_time": 0.42624926567077637
    },
    {
      "epoch": 0.000180706787109375,
      "model_forward_time": 0.11519122123718262,
      "step": 29607
    },
    {
      "epoch": 0.000180706787109375,
      "step": 29607,
      "training_step_time": 0.4771430492401123
    },
    {
      "epoch": 0.000180712890625,
      "model_forward_time": 0.11493682861328125,
      "step": 29608
    },
    {
      "epoch": 0.000180712890625,
      "step": 29608,
      "training_step_time": 0.459564208984375
    },
    {
      "epoch": 0.000180718994140625,
      "model_forward_time": 0.11592984199523926,
      "step": 29609
    },
    {
      "epoch": 0.000180718994140625,
      "step": 29609,
      "training_step_time": 0.3878920078277588
    },
    {
      "epoch": 0.00018072509765625,
      "grad_norm": 0.1443498283624649,
      "learning_rate": 5.519901550532871e-05,
      "loss": 0.0449,
      "step": 29610
    },
    {
      "epoch": 0.00018072509765625,
      "model_forward_time": 0.11486411094665527,
      "step": 29610
    },
    {
      "epoch": 0.00018072509765625,
      "step": 29610,
      "training_step_time": 0.3941056728363037
    },
    {
      "epoch": 0.000180731201171875,
      "model_forward_time": 0.11616683006286621,
      "step": 29611
    },
    {
      "epoch": 0.000180731201171875,
      "step": 29611,
      "training_step_time": 0.5084149837493896
    },
    {
      "epoch": 0.0001807373046875,
      "model_forward_time": 0.11418724060058594,
      "step": 29612
    },
    {
      "epoch": 0.0001807373046875,
      "step": 29612,
      "training_step_time": 0.43514204025268555
    },
    {
      "epoch": 0.000180743408203125,
      "model_forward_time": 0.11450052261352539,
      "step": 29613
    },
    {
      "epoch": 0.000180743408203125,
      "step": 29613,
      "training_step_time": 0.4809153079986572
    },
    {
      "epoch": 0.00018074951171875,
      "model_forward_time": 0.11471104621887207,
      "step": 29614
    },
    {
      "epoch": 0.00018074951171875,
      "step": 29614,
      "training_step_time": 0.38129568099975586
    },
    {
      "epoch": 0.000180755615234375,
      "model_forward_time": 0.11481094360351562,
      "step": 29615
    },
    {
      "epoch": 0.000180755615234375,
      "step": 29615,
      "training_step_time": 0.39697766304016113
    },
    {
      "epoch": 0.00018076171875,
      "model_forward_time": 0.1147463321685791,
      "step": 29616
    },
    {
      "epoch": 0.00018076171875,
      "step": 29616,
      "training_step_time": 0.4475257396697998
    },
    {
      "epoch": 0.000180767822265625,
      "model_forward_time": 0.11471438407897949,
      "step": 29617
    },
    {
      "epoch": 0.000180767822265625,
      "step": 29617,
      "training_step_time": 0.39655447006225586
    },
    {
      "epoch": 0.00018077392578125,
      "model_forward_time": 0.11525392532348633,
      "step": 29618
    },
    {
      "epoch": 0.00018077392578125,
      "step": 29618,
      "training_step_time": 0.39792299270629883
    },
    {
      "epoch": 0.000180780029296875,
      "model_forward_time": 0.11456823348999023,
      "step": 29619
    },
    {
      "epoch": 0.000180780029296875,
      "step": 29619,
      "training_step_time": 0.3910543918609619
    },
    {
      "epoch": 0.0001807861328125,
      "grad_norm": 0.07857806980609894,
      "learning_rate": 5.51716062679511e-05,
      "loss": 0.0409,
      "step": 29620
    },
    {
      "epoch": 0.0001807861328125,
      "model_forward_time": 0.1154625415802002,
      "step": 29620
    },
    {
      "epoch": 0.0001807861328125,
      "step": 29620,
      "training_step_time": 0.4504282474517822
    },
    {
      "epoch": 0.000180792236328125,
      "model_forward_time": 0.11551713943481445,
      "step": 29621
    },
    {
      "epoch": 0.000180792236328125,
      "step": 29621,
      "training_step_time": 0.4568769931793213
    },
    {
      "epoch": 0.00018079833984375,
      "model_forward_time": 0.11480498313903809,
      "step": 29622
    },
    {
      "epoch": 0.00018079833984375,
      "step": 29622,
      "training_step_time": 0.4023590087890625
    },
    {
      "epoch": 0.000180804443359375,
      "model_forward_time": 0.11590409278869629,
      "step": 29623
    },
    {
      "epoch": 0.000180804443359375,
      "step": 29623,
      "training_step_time": 0.40088796615600586
    },
    {
      "epoch": 0.000180810546875,
      "model_forward_time": 0.11484551429748535,
      "step": 29624
    },
    {
      "epoch": 0.000180810546875,
      "step": 29624,
      "training_step_time": 0.39033055305480957
    },
    {
      "epoch": 0.000180816650390625,
      "model_forward_time": 0.11581754684448242,
      "step": 29625
    },
    {
      "epoch": 0.000180816650390625,
      "step": 29625,
      "training_step_time": 0.3866443634033203
    },
    {
      "epoch": 0.00018082275390625,
      "model_forward_time": 0.11564421653747559,
      "step": 29626
    },
    {
      "epoch": 0.00018082275390625,
      "step": 29626,
      "training_step_time": 0.5429275035858154
    },
    {
      "epoch": 0.000180828857421875,
      "model_forward_time": 0.11611294746398926,
      "step": 29627
    },
    {
      "epoch": 0.000180828857421875,
      "step": 29627,
      "training_step_time": 0.4439711570739746
    },
    {
      "epoch": 0.0001808349609375,
      "model_forward_time": 0.11544251441955566,
      "step": 29628
    },
    {
      "epoch": 0.0001808349609375,
      "step": 29628,
      "training_step_time": 0.39183545112609863
    },
    {
      "epoch": 0.000180841064453125,
      "model_forward_time": 0.11501765251159668,
      "step": 29629
    },
    {
      "epoch": 0.000180841064453125,
      "step": 29629,
      "training_step_time": 0.5128722190856934
    },
    {
      "epoch": 0.00018084716796875,
      "grad_norm": 0.1151624396443367,
      "learning_rate": 5.514419545957606e-05,
      "loss": 0.0405,
      "step": 29630
    },
    {
      "epoch": 0.00018084716796875,
      "model_forward_time": 0.11501812934875488,
      "step": 29630
    },
    {
      "epoch": 0.00018084716796875,
      "step": 29630,
      "training_step_time": 0.49430298805236816
    },
    {
      "epoch": 0.000180853271484375,
      "model_forward_time": 0.11474490165710449,
      "step": 29631
    },
    {
      "epoch": 0.000180853271484375,
      "step": 29631,
      "training_step_time": 0.38959646224975586
    },
    {
      "epoch": 0.000180859375,
      "model_forward_time": 0.11515045166015625,
      "step": 29632
    },
    {
      "epoch": 0.000180859375,
      "step": 29632,
      "training_step_time": 0.41008949279785156
    },
    {
      "epoch": 0.000180865478515625,
      "model_forward_time": 0.11479306221008301,
      "step": 29633
    },
    {
      "epoch": 0.000180865478515625,
      "step": 29633,
      "training_step_time": 0.3866136074066162
    },
    {
      "epoch": 0.00018087158203125,
      "model_forward_time": 0.11464166641235352,
      "step": 29634
    },
    {
      "epoch": 0.00018087158203125,
      "step": 29634,
      "training_step_time": 0.44303226470947266
    },
    {
      "epoch": 0.000180877685546875,
      "model_forward_time": 0.11462831497192383,
      "step": 29635
    },
    {
      "epoch": 0.000180877685546875,
      "step": 29635,
      "training_step_time": 0.49204206466674805
    },
    {
      "epoch": 0.0001808837890625,
      "model_forward_time": 0.11568832397460938,
      "step": 29636
    },
    {
      "epoch": 0.0001808837890625,
      "step": 29636,
      "training_step_time": 0.3902714252471924
    },
    {
      "epoch": 0.000180889892578125,
      "model_forward_time": 0.11481070518493652,
      "step": 29637
    },
    {
      "epoch": 0.000180889892578125,
      "step": 29637,
      "training_step_time": 0.39591383934020996
    },
    {
      "epoch": 0.00018089599609375,
      "model_forward_time": 0.11586117744445801,
      "step": 29638
    },
    {
      "epoch": 0.00018089599609375,
      "step": 29638,
      "training_step_time": 0.3988380432128906
    },
    {
      "epoch": 0.000180902099609375,
      "model_forward_time": 0.1153707504272461,
      "step": 29639
    },
    {
      "epoch": 0.000180902099609375,
      "step": 29639,
      "training_step_time": 0.888185977935791
    },
    {
      "epoch": 0.000180908203125,
      "grad_norm": 0.12496791779994965,
      "learning_rate": 5.511678308853026e-05,
      "loss": 0.0421,
      "step": 29640
    },
    {
      "epoch": 0.000180908203125,
      "model_forward_time": 0.11458992958068848,
      "step": 29640
    },
    {
      "epoch": 0.000180908203125,
      "step": 29640,
      "training_step_time": 0.3998262882232666
    },
    {
      "epoch": 0.000180914306640625,
      "model_forward_time": 0.11391830444335938,
      "step": 29641
    },
    {
      "epoch": 0.000180914306640625,
      "step": 29641,
      "training_step_time": 0.47418951988220215
    },
    {
      "epoch": 0.00018092041015625,
      "model_forward_time": 0.11412525177001953,
      "step": 29642
    },
    {
      "epoch": 0.00018092041015625,
      "step": 29642,
      "training_step_time": 0.3621556758880615
    },
    {
      "epoch": 0.000180926513671875,
      "model_forward_time": 0.1142122745513916,
      "step": 29643
    },
    {
      "epoch": 0.000180926513671875,
      "step": 29643,
      "training_step_time": 0.3918578624725342
    },
    {
      "epoch": 0.0001809326171875,
      "model_forward_time": 0.11491513252258301,
      "step": 29644
    },
    {
      "epoch": 0.0001809326171875,
      "step": 29644,
      "training_step_time": 0.40451979637145996
    },
    {
      "epoch": 0.000180938720703125,
      "model_forward_time": 0.11545324325561523,
      "step": 29645
    },
    {
      "epoch": 0.000180938720703125,
      "step": 29645,
      "training_step_time": 0.693373441696167
    },
    {
      "epoch": 0.00018094482421875,
      "model_forward_time": 0.11428332328796387,
      "step": 29646
    },
    {
      "epoch": 0.00018094482421875,
      "step": 29646,
      "training_step_time": 0.4397900104522705
    },
    {
      "epoch": 0.000180950927734375,
      "model_forward_time": 0.11435103416442871,
      "step": 29647
    },
    {
      "epoch": 0.000180950927734375,
      "step": 29647,
      "training_step_time": 0.3973121643066406
    },
    {
      "epoch": 0.00018095703125,
      "model_forward_time": 0.11403775215148926,
      "step": 29648
    },
    {
      "epoch": 0.00018095703125,
      "step": 29648,
      "training_step_time": 0.4052143096923828
    },
    {
      "epoch": 0.000180963134765625,
      "model_forward_time": 0.11510229110717773,
      "step": 29649
    },
    {
      "epoch": 0.000180963134765625,
      "step": 29649,
      "training_step_time": 0.38986635208129883
    },
    {
      "epoch": 0.00018096923828125,
      "grad_norm": 0.12527452409267426,
      "learning_rate": 5.508936916314086e-05,
      "loss": 0.0451,
      "step": 29650
    },
    {
      "epoch": 0.00018096923828125,
      "model_forward_time": 0.11511063575744629,
      "step": 29650
    },
    {
      "epoch": 0.00018096923828125,
      "step": 29650,
      "training_step_time": 0.3840465545654297
    },
    {
      "epoch": 0.000180975341796875,
      "model_forward_time": 0.11504387855529785,
      "step": 29651
    },
    {
      "epoch": 0.000180975341796875,
      "step": 29651,
      "training_step_time": 0.9061152935028076
    },
    {
      "epoch": 0.0001809814453125,
      "model_forward_time": 0.11424589157104492,
      "step": 29652
    },
    {
      "epoch": 0.0001809814453125,
      "step": 29652,
      "training_step_time": 0.46311116218566895
    },
    {
      "epoch": 0.000180987548828125,
      "model_forward_time": 0.11435461044311523,
      "step": 29653
    },
    {
      "epoch": 0.000180987548828125,
      "step": 29653,
      "training_step_time": 0.419299840927124
    },
    {
      "epoch": 0.00018099365234375,
      "model_forward_time": 0.11465096473693848,
      "step": 29654
    },
    {
      "epoch": 0.00018099365234375,
      "step": 29654,
      "training_step_time": 0.47638487815856934
    },
    {
      "epoch": 0.000180999755859375,
      "model_forward_time": 0.11386728286743164,
      "step": 29655
    },
    {
      "epoch": 0.000180999755859375,
      "step": 29655,
      "training_step_time": 0.36186861991882324
    },
    {
      "epoch": 0.000181005859375,
      "model_forward_time": 0.11620044708251953,
      "step": 29656
    },
    {
      "epoch": 0.000181005859375,
      "step": 29656,
      "training_step_time": 0.39876818656921387
    },
    {
      "epoch": 0.000181011962890625,
      "model_forward_time": 0.1149451732635498,
      "step": 29657
    },
    {
      "epoch": 0.000181011962890625,
      "step": 29657,
      "training_step_time": 0.3842742443084717
    },
    {
      "epoch": 0.00018101806640625,
      "model_forward_time": 0.11462616920471191,
      "step": 29658
    },
    {
      "epoch": 0.00018101806640625,
      "step": 29658,
      "training_step_time": 0.3923506736755371
    },
    {
      "epoch": 0.000181024169921875,
      "model_forward_time": 0.11625146865844727,
      "step": 29659
    },
    {
      "epoch": 0.000181024169921875,
      "step": 29659,
      "training_step_time": 0.3975334167480469
    },
    {
      "epoch": 0.0001810302734375,
      "grad_norm": 0.12671752274036407,
      "learning_rate": 5.506195369173548e-05,
      "loss": 0.0435,
      "step": 29660
    },
    {
      "epoch": 0.0001810302734375,
      "model_forward_time": 0.11555671691894531,
      "step": 29660
    },
    {
      "epoch": 0.0001810302734375,
      "step": 29660,
      "training_step_time": 0.3939502239227295
    },
    {
      "epoch": 0.000181036376953125,
      "model_forward_time": 0.11550140380859375,
      "step": 29661
    },
    {
      "epoch": 0.000181036376953125,
      "step": 29661,
      "training_step_time": 0.39750051498413086
    },
    {
      "epoch": 0.00018104248046875,
      "model_forward_time": 0.1152493953704834,
      "step": 29662
    },
    {
      "epoch": 0.00018104248046875,
      "step": 29662,
      "training_step_time": 0.4006495475769043
    },
    {
      "epoch": 0.000181048583984375,
      "model_forward_time": 0.11621594429016113,
      "step": 29663
    },
    {
      "epoch": 0.000181048583984375,
      "step": 29663,
      "training_step_time": 0.3878953456878662
    },
    {
      "epoch": 0.0001810546875,
      "model_forward_time": 0.11591887474060059,
      "step": 29664
    },
    {
      "epoch": 0.0001810546875,
      "step": 29664,
      "training_step_time": 0.4074065685272217
    },
    {
      "epoch": 0.000181060791015625,
      "model_forward_time": 0.11539173126220703,
      "step": 29665
    },
    {
      "epoch": 0.000181060791015625,
      "step": 29665,
      "training_step_time": 0.4140346050262451
    },
    {
      "epoch": 0.00018106689453125,
      "model_forward_time": 0.11548852920532227,
      "step": 29666
    },
    {
      "epoch": 0.00018106689453125,
      "step": 29666,
      "training_step_time": 0.39412760734558105
    },
    {
      "epoch": 0.000181072998046875,
      "model_forward_time": 0.1160740852355957,
      "step": 29667
    },
    {
      "epoch": 0.000181072998046875,
      "step": 29667,
      "training_step_time": 0.41240644454956055
    },
    {
      "epoch": 0.0001810791015625,
      "model_forward_time": 0.1153860092163086,
      "step": 29668
    },
    {
      "epoch": 0.0001810791015625,
      "step": 29668,
      "training_step_time": 0.41040754318237305
    },
    {
      "epoch": 0.000181085205078125,
      "model_forward_time": 0.11678576469421387,
      "step": 29669
    },
    {
      "epoch": 0.000181085205078125,
      "step": 29669,
      "training_step_time": 0.43882036209106445
    },
    {
      "epoch": 0.00018109130859375,
      "grad_norm": 0.10970605164766312,
      "learning_rate": 5.5034536682642224e-05,
      "loss": 0.0446,
      "step": 29670
    },
    {
      "epoch": 0.00018109130859375,
      "model_forward_time": 0.11544299125671387,
      "step": 29670
    },
    {
      "epoch": 0.00018109130859375,
      "step": 29670,
      "training_step_time": 0.44752073287963867
    },
    {
      "epoch": 0.000181097412109375,
      "model_forward_time": 0.1147165298461914,
      "step": 29671
    },
    {
      "epoch": 0.000181097412109375,
      "step": 29671,
      "training_step_time": 0.5059936046600342
    },
    {
      "epoch": 0.000181103515625,
      "model_forward_time": 0.11606812477111816,
      "step": 29672
    },
    {
      "epoch": 0.000181103515625,
      "step": 29672,
      "training_step_time": 0.42369508743286133
    },
    {
      "epoch": 0.000181109619140625,
      "model_forward_time": 0.11535429954528809,
      "step": 29673
    },
    {
      "epoch": 0.000181109619140625,
      "step": 29673,
      "training_step_time": 0.38872647285461426
    },
    {
      "epoch": 0.00018111572265625,
      "model_forward_time": 0.11552667617797852,
      "step": 29674
    },
    {
      "epoch": 0.00018111572265625,
      "step": 29674,
      "training_step_time": 0.5041100978851318
    },
    {
      "epoch": 0.000181121826171875,
      "model_forward_time": 0.1152956485748291,
      "step": 29675
    },
    {
      "epoch": 0.000181121826171875,
      "step": 29675,
      "training_step_time": 0.4192483425140381
    },
    {
      "epoch": 0.0001811279296875,
      "model_forward_time": 0.11470222473144531,
      "step": 29676
    },
    {
      "epoch": 0.0001811279296875,
      "step": 29676,
      "training_step_time": 0.5001711845397949
    },
    {
      "epoch": 0.000181134033203125,
      "model_forward_time": 0.11450052261352539,
      "step": 29677
    },
    {
      "epoch": 0.000181134033203125,
      "step": 29677,
      "training_step_time": 0.387160062789917
    },
    {
      "epoch": 0.00018114013671875,
      "model_forward_time": 0.11553430557250977,
      "step": 29678
    },
    {
      "epoch": 0.00018114013671875,
      "step": 29678,
      "training_step_time": 0.38326263427734375
    },
    {
      "epoch": 0.000181146240234375,
      "model_forward_time": 0.11469578742980957,
      "step": 29679
    },
    {
      "epoch": 0.000181146240234375,
      "step": 29679,
      "training_step_time": 0.3856980800628662
    },
    {
      "epoch": 0.00018115234375,
      "grad_norm": 0.1747048944234848,
      "learning_rate": 5.500711814418966e-05,
      "loss": 0.0444,
      "step": 29680
    },
    {
      "epoch": 0.00018115234375,
      "model_forward_time": 0.11496710777282715,
      "step": 29680
    },
    {
      "epoch": 0.00018115234375,
      "step": 29680,
      "training_step_time": 0.4471092224121094
    },
    {
      "epoch": 0.000181158447265625,
      "model_forward_time": 0.11519861221313477,
      "step": 29681
    },
    {
      "epoch": 0.000181158447265625,
      "step": 29681,
      "training_step_time": 0.4735860824584961
    },
    {
      "epoch": 0.00018116455078125,
      "model_forward_time": 0.11485743522644043,
      "step": 29682
    },
    {
      "epoch": 0.00018116455078125,
      "step": 29682,
      "training_step_time": 0.42414283752441406
    },
    {
      "epoch": 0.000181170654296875,
      "model_forward_time": 0.11478376388549805,
      "step": 29683
    },
    {
      "epoch": 0.000181170654296875,
      "step": 29683,
      "training_step_time": 0.4773397445678711
    },
    {
      "epoch": 0.0001811767578125,
      "model_forward_time": 0.11551189422607422,
      "step": 29684
    },
    {
      "epoch": 0.0001811767578125,
      "step": 29684,
      "training_step_time": 0.4406595230102539
    },
    {
      "epoch": 0.000181182861328125,
      "model_forward_time": 0.11561155319213867,
      "step": 29685
    },
    {
      "epoch": 0.000181182861328125,
      "step": 29685,
      "training_step_time": 0.48792171478271484
    },
    {
      "epoch": 0.00018118896484375,
      "model_forward_time": 0.11480021476745605,
      "step": 29686
    },
    {
      "epoch": 0.00018118896484375,
      "step": 29686,
      "training_step_time": 0.42186951637268066
    },
    {
      "epoch": 0.000181195068359375,
      "model_forward_time": 0.1145632266998291,
      "step": 29687
    },
    {
      "epoch": 0.000181195068359375,
      "step": 29687,
      "training_step_time": 0.3874480724334717
    },
    {
      "epoch": 0.000181201171875,
      "model_forward_time": 0.11540627479553223,
      "step": 29688
    },
    {
      "epoch": 0.000181201171875,
      "step": 29688,
      "training_step_time": 0.49779367446899414
    },
    {
      "epoch": 0.000181207275390625,
      "model_forward_time": 0.1146554946899414,
      "step": 29689
    },
    {
      "epoch": 0.000181207275390625,
      "step": 29689,
      "training_step_time": 0.40290260314941406
    },
    {
      "epoch": 0.00018121337890625,
      "grad_norm": 0.13831843435764313,
      "learning_rate": 5.49796980847068e-05,
      "loss": 0.0419,
      "step": 29690
    },
    {
      "epoch": 0.00018121337890625,
      "model_forward_time": 0.11476325988769531,
      "step": 29690
    },
    {
      "epoch": 0.00018121337890625,
      "step": 29690,
      "training_step_time": 0.48462891578674316
    },
    {
      "epoch": 0.000181219482421875,
      "model_forward_time": 0.11493659019470215,
      "step": 29691
    },
    {
      "epoch": 0.000181219482421875,
      "step": 29691,
      "training_step_time": 0.3807368278503418
    },
    {
      "epoch": 0.0001812255859375,
      "model_forward_time": 0.11520767211914062,
      "step": 29692
    },
    {
      "epoch": 0.0001812255859375,
      "step": 29692,
      "training_step_time": 0.3869943618774414
    },
    {
      "epoch": 0.000181231689453125,
      "model_forward_time": 0.11447882652282715,
      "step": 29693
    },
    {
      "epoch": 0.000181231689453125,
      "step": 29693,
      "training_step_time": 0.3901233673095703
    },
    {
      "epoch": 0.00018123779296875,
      "model_forward_time": 0.1166391372680664,
      "step": 29694
    },
    {
      "epoch": 0.00018123779296875,
      "step": 29694,
      "training_step_time": 0.4096057415008545
    },
    {
      "epoch": 0.000181243896484375,
      "model_forward_time": 0.11512112617492676,
      "step": 29695
    },
    {
      "epoch": 0.000181243896484375,
      "step": 29695,
      "training_step_time": 0.4505150318145752
    },
    {
      "epoch": 0.00018125,
      "model_forward_time": 0.11491060256958008,
      "step": 29696
    },
    {
      "epoch": 0.00018125,
      "step": 29696,
      "training_step_time": 0.3865509033203125
    },
    {
      "epoch": 0.000181256103515625,
      "model_forward_time": 0.11559081077575684,
      "step": 29697
    },
    {
      "epoch": 0.000181256103515625,
      "step": 29697,
      "training_step_time": 0.4841125011444092
    },
    {
      "epoch": 0.00018126220703125,
      "model_forward_time": 0.11469411849975586,
      "step": 29698
    },
    {
      "epoch": 0.00018126220703125,
      "step": 29698,
      "training_step_time": 0.3661365509033203
    },
    {
      "epoch": 0.000181268310546875,
      "model_forward_time": 0.11550354957580566,
      "step": 29699
    },
    {
      "epoch": 0.000181268310546875,
      "step": 29699,
      "training_step_time": 0.49073123931884766
    },
    {
      "epoch": 0.0001812744140625,
      "grad_norm": 0.1214187890291214,
      "learning_rate": 5.495227651252315e-05,
      "loss": 0.0366,
      "step": 29700
    },
    {
      "epoch": 0.0001812744140625,
      "model_forward_time": 0.11467981338500977,
      "step": 29700
    },
    {
      "epoch": 0.0001812744140625,
      "step": 29700,
      "training_step_time": 0.4817080497741699
    },
    {
      "epoch": 0.000181280517578125,
      "model_forward_time": 0.11451506614685059,
      "step": 29701
    },
    {
      "epoch": 0.000181280517578125,
      "step": 29701,
      "training_step_time": 0.38535499572753906
    },
    {
      "epoch": 0.00018128662109375,
      "model_forward_time": 0.11467504501342773,
      "step": 29702
    },
    {
      "epoch": 0.00018128662109375,
      "step": 29702,
      "training_step_time": 0.4823489189147949
    },
    {
      "epoch": 0.000181292724609375,
      "model_forward_time": 0.11501502990722656,
      "step": 29703
    },
    {
      "epoch": 0.000181292724609375,
      "step": 29703,
      "training_step_time": 0.40904855728149414
    },
    {
      "epoch": 0.000181298828125,
      "model_forward_time": 0.1148681640625,
      "step": 29704
    },
    {
      "epoch": 0.000181298828125,
      "step": 29704,
      "training_step_time": 0.4800238609313965
    },
    {
      "epoch": 0.000181304931640625,
      "model_forward_time": 0.11485099792480469,
      "step": 29705
    },
    {
      "epoch": 0.000181304931640625,
      "step": 29705,
      "training_step_time": 0.38227272033691406
    },
    {
      "epoch": 0.00018131103515625,
      "model_forward_time": 0.11527419090270996,
      "step": 29706
    },
    {
      "epoch": 0.00018131103515625,
      "step": 29706,
      "training_step_time": 0.3961970806121826
    },
    {
      "epoch": 0.000181317138671875,
      "model_forward_time": 0.11479997634887695,
      "step": 29707
    },
    {
      "epoch": 0.000181317138671875,
      "step": 29707,
      "training_step_time": 0.4066162109375
    },
    {
      "epoch": 0.0001813232421875,
      "model_forward_time": 0.11594915390014648,
      "step": 29708
    },
    {
      "epoch": 0.0001813232421875,
      "step": 29708,
      "training_step_time": 0.40198516845703125
    },
    {
      "epoch": 0.000181329345703125,
      "model_forward_time": 0.11524081230163574,
      "step": 29709
    },
    {
      "epoch": 0.000181329345703125,
      "step": 29709,
      "training_step_time": 0.3995962142944336
    },
    {
      "epoch": 0.00018133544921875,
      "grad_norm": 0.15840770304203033,
      "learning_rate": 5.4924853435968635e-05,
      "loss": 0.0459,
      "step": 29710
    },
    {
      "epoch": 0.00018133544921875,
      "model_forward_time": 0.11538338661193848,
      "step": 29710
    },
    {
      "epoch": 0.00018133544921875,
      "step": 29710,
      "training_step_time": 0.516075611114502
    },
    {
      "epoch": 0.000181341552734375,
      "model_forward_time": 0.11484193801879883,
      "step": 29711
    },
    {
      "epoch": 0.000181341552734375,
      "step": 29711,
      "training_step_time": 0.44713354110717773
    },
    {
      "epoch": 0.00018134765625,
      "model_forward_time": 0.1148691177368164,
      "step": 29712
    },
    {
      "epoch": 0.00018134765625,
      "step": 29712,
      "training_step_time": 0.37078380584716797
    },
    {
      "epoch": 0.000181353759765625,
      "model_forward_time": 0.11506509780883789,
      "step": 29713
    },
    {
      "epoch": 0.000181353759765625,
      "step": 29713,
      "training_step_time": 0.41179966926574707
    },
    {
      "epoch": 0.00018135986328125,
      "model_forward_time": 0.11566543579101562,
      "step": 29714
    },
    {
      "epoch": 0.00018135986328125,
      "step": 29714,
      "training_step_time": 0.38874268531799316
    },
    {
      "epoch": 0.000181365966796875,
      "model_forward_time": 0.1153707504272461,
      "step": 29715
    },
    {
      "epoch": 0.000181365966796875,
      "step": 29715,
      "training_step_time": 0.37778520584106445
    },
    {
      "epoch": 0.0001813720703125,
      "model_forward_time": 0.11499285697937012,
      "step": 29716
    },
    {
      "epoch": 0.0001813720703125,
      "step": 29716,
      "training_step_time": 0.3892087936401367
    },
    {
      "epoch": 0.000181378173828125,
      "model_forward_time": 0.1157076358795166,
      "step": 29717
    },
    {
      "epoch": 0.000181378173828125,
      "step": 29717,
      "training_step_time": 0.47344541549682617
    },
    {
      "epoch": 0.00018138427734375,
      "model_forward_time": 0.11523938179016113,
      "step": 29718
    },
    {
      "epoch": 0.00018138427734375,
      "step": 29718,
      "training_step_time": 0.43857908248901367
    },
    {
      "epoch": 0.000181390380859375,
      "model_forward_time": 0.11521196365356445,
      "step": 29719
    },
    {
      "epoch": 0.000181390380859375,
      "step": 29719,
      "training_step_time": 0.41048765182495117
    },
    {
      "epoch": 0.000181396484375,
      "grad_norm": 0.15771250426769257,
      "learning_rate": 5.48974288633737e-05,
      "loss": 0.0434,
      "step": 29720
    },
    {
      "epoch": 0.000181396484375,
      "model_forward_time": 0.11437463760375977,
      "step": 29720
    },
    {
      "epoch": 0.000181396484375,
      "step": 29720,
      "training_step_time": 0.3893013000488281
    },
    {
      "epoch": 0.000181402587890625,
      "model_forward_time": 0.11553215980529785,
      "step": 29721
    },
    {
      "epoch": 0.000181402587890625,
      "step": 29721,
      "training_step_time": 0.3932209014892578
    },
    {
      "epoch": 0.00018140869140625,
      "model_forward_time": 0.11529970169067383,
      "step": 29722
    },
    {
      "epoch": 0.00018140869140625,
      "step": 29722,
      "training_step_time": 0.38938450813293457
    },
    {
      "epoch": 0.000181414794921875,
      "model_forward_time": 0.11498880386352539,
      "step": 29723
    },
    {
      "epoch": 0.000181414794921875,
      "step": 29723,
      "training_step_time": 0.39421582221984863
    },
    {
      "epoch": 0.0001814208984375,
      "model_forward_time": 0.1159813404083252,
      "step": 29724
    },
    {
      "epoch": 0.0001814208984375,
      "step": 29724,
      "training_step_time": 0.44902801513671875
    },
    {
      "epoch": 0.000181427001953125,
      "model_forward_time": 0.11523103713989258,
      "step": 29725
    },
    {
      "epoch": 0.000181427001953125,
      "step": 29725,
      "training_step_time": 0.4352865219116211
    },
    {
      "epoch": 0.00018143310546875,
      "model_forward_time": 0.11473870277404785,
      "step": 29726
    },
    {
      "epoch": 0.00018143310546875,
      "step": 29726,
      "training_step_time": 0.4713478088378906
    },
    {
      "epoch": 0.000181439208984375,
      "model_forward_time": 0.11583352088928223,
      "step": 29727
    },
    {
      "epoch": 0.000181439208984375,
      "step": 29727,
      "training_step_time": 0.44835734367370605
    },
    {
      "epoch": 0.0001814453125,
      "model_forward_time": 0.1154022216796875,
      "step": 29728
    },
    {
      "epoch": 0.0001814453125,
      "step": 29728,
      "training_step_time": 0.4087409973144531
    },
    {
      "epoch": 0.000181451416015625,
      "model_forward_time": 0.11786532402038574,
      "step": 29729
    },
    {
      "epoch": 0.000181451416015625,
      "step": 29729,
      "training_step_time": 0.46218204498291016
    },
    {
      "epoch": 0.00018145751953125,
      "grad_norm": 0.170836940407753,
      "learning_rate": 5.487000280306917e-05,
      "loss": 0.0437,
      "step": 29730
    },
    {
      "epoch": 0.00018145751953125,
      "model_forward_time": 0.11527037620544434,
      "step": 29730
    },
    {
      "epoch": 0.00018145751953125,
      "step": 29730,
      "training_step_time": 0.3943958282470703
    },
    {
      "epoch": 0.000181463623046875,
      "model_forward_time": 0.11474061012268066,
      "step": 29731
    },
    {
      "epoch": 0.000181463623046875,
      "step": 29731,
      "training_step_time": 0.39741039276123047
    },
    {
      "epoch": 0.0001814697265625,
      "model_forward_time": 0.1154780387878418,
      "step": 29732
    },
    {
      "epoch": 0.0001814697265625,
      "step": 29732,
      "training_step_time": 0.4689977169036865
    },
    {
      "epoch": 0.000181475830078125,
      "model_forward_time": 0.11456823348999023,
      "step": 29733
    },
    {
      "epoch": 0.000181475830078125,
      "step": 29733,
      "training_step_time": 0.44417309761047363
    },
    {
      "epoch": 0.00018148193359375,
      "model_forward_time": 0.11479926109313965,
      "step": 29734
    },
    {
      "epoch": 0.00018148193359375,
      "step": 29734,
      "training_step_time": 0.40623903274536133
    },
    {
      "epoch": 0.000181488037109375,
      "model_forward_time": 0.11455726623535156,
      "step": 29735
    },
    {
      "epoch": 0.000181488037109375,
      "step": 29735,
      "training_step_time": 0.39101147651672363
    },
    {
      "epoch": 0.000181494140625,
      "model_forward_time": 0.11576557159423828,
      "step": 29736
    },
    {
      "epoch": 0.000181494140625,
      "step": 29736,
      "training_step_time": 0.3972351551055908
    },
    {
      "epoch": 0.000181500244140625,
      "model_forward_time": 0.11413860321044922,
      "step": 29737
    },
    {
      "epoch": 0.000181500244140625,
      "step": 29737,
      "training_step_time": 0.38677144050598145
    },
    {
      "epoch": 0.00018150634765625,
      "model_forward_time": 0.11536502838134766,
      "step": 29738
    },
    {
      "epoch": 0.00018150634765625,
      "step": 29738,
      "training_step_time": 0.3891868591308594
    },
    {
      "epoch": 0.000181512451171875,
      "model_forward_time": 0.11505722999572754,
      "step": 29739
    },
    {
      "epoch": 0.000181512451171875,
      "step": 29739,
      "training_step_time": 0.3940708637237549
    },
    {
      "epoch": 0.0001815185546875,
      "grad_norm": 0.1383514553308487,
      "learning_rate": 5.4842575263386386e-05,
      "loss": 0.0453,
      "step": 29740
    },
    {
      "epoch": 0.0001815185546875,
      "model_forward_time": 0.11499929428100586,
      "step": 29740
    },
    {
      "epoch": 0.0001815185546875,
      "step": 29740,
      "training_step_time": 0.47243595123291016
    },
    {
      "epoch": 0.000181524658203125,
      "model_forward_time": 0.11471724510192871,
      "step": 29741
    },
    {
      "epoch": 0.000181524658203125,
      "step": 29741,
      "training_step_time": 0.3680603504180908
    },
    {
      "epoch": 0.00018153076171875,
      "model_forward_time": 0.1148228645324707,
      "step": 29742
    },
    {
      "epoch": 0.00018153076171875,
      "step": 29742,
      "training_step_time": 0.39429640769958496
    },
    {
      "epoch": 0.000181536865234375,
      "model_forward_time": 0.11519312858581543,
      "step": 29743
    },
    {
      "epoch": 0.000181536865234375,
      "step": 29743,
      "training_step_time": 0.4125380516052246
    },
    {
      "epoch": 0.00018154296875,
      "model_forward_time": 0.11494946479797363,
      "step": 29744
    },
    {
      "epoch": 0.00018154296875,
      "step": 29744,
      "training_step_time": 0.39641737937927246
    },
    {
      "epoch": 0.000181549072265625,
      "model_forward_time": 0.11514091491699219,
      "step": 29745
    },
    {
      "epoch": 0.000181549072265625,
      "step": 29745,
      "training_step_time": 0.3885154724121094
    },
    {
      "epoch": 0.00018155517578125,
      "model_forward_time": 0.11554098129272461,
      "step": 29746
    },
    {
      "epoch": 0.00018155517578125,
      "step": 29746,
      "training_step_time": 0.6827716827392578
    },
    {
      "epoch": 0.000181561279296875,
      "model_forward_time": 0.1147150993347168,
      "step": 29747
    },
    {
      "epoch": 0.000181561279296875,
      "step": 29747,
      "training_step_time": 0.4153270721435547
    },
    {
      "epoch": 0.0001815673828125,
      "model_forward_time": 0.11531734466552734,
      "step": 29748
    },
    {
      "epoch": 0.0001815673828125,
      "step": 29748,
      "training_step_time": 0.4459547996520996
    },
    {
      "epoch": 0.000181573486328125,
      "model_forward_time": 0.11501407623291016,
      "step": 29749
    },
    {
      "epoch": 0.000181573486328125,
      "step": 29749,
      "training_step_time": 0.4162294864654541
    },
    {
      "epoch": 0.00018157958984375,
      "grad_norm": 0.15440773963928223,
      "learning_rate": 5.481514625265709e-05,
      "loss": 0.0441,
      "step": 29750
    },
    {
      "epoch": 0.00018157958984375,
      "model_forward_time": 0.11461710929870605,
      "step": 29750
    },
    {
      "epoch": 0.00018157958984375,
      "step": 29750,
      "training_step_time": 0.38273096084594727
    },
    {
      "epoch": 0.000181585693359375,
      "model_forward_time": 0.11633801460266113,
      "step": 29751
    },
    {
      "epoch": 0.000181585693359375,
      "step": 29751,
      "training_step_time": 0.37960386276245117
    },
    {
      "epoch": 0.000181591796875,
      "model_forward_time": 0.11511993408203125,
      "step": 29752
    },
    {
      "epoch": 0.000181591796875,
      "step": 29752,
      "training_step_time": 0.701286792755127
    },
    {
      "epoch": 0.000181597900390625,
      "model_forward_time": 0.11456608772277832,
      "step": 29753
    },
    {
      "epoch": 0.000181597900390625,
      "step": 29753,
      "training_step_time": 0.43004584312438965
    },
    {
      "epoch": 0.00018160400390625,
      "model_forward_time": 0.11483383178710938,
      "step": 29754
    },
    {
      "epoch": 0.00018160400390625,
      "step": 29754,
      "training_step_time": 0.4135921001434326
    },
    {
      "epoch": 0.000181610107421875,
      "model_forward_time": 0.11528277397155762,
      "step": 29755
    },
    {
      "epoch": 0.000181610107421875,
      "step": 29755,
      "training_step_time": 0.4143049716949463
    },
    {
      "epoch": 0.0001816162109375,
      "model_forward_time": 0.11487865447998047,
      "step": 29756
    },
    {
      "epoch": 0.0001816162109375,
      "step": 29756,
      "training_step_time": 0.41187548637390137
    },
    {
      "epoch": 0.000181622314453125,
      "model_forward_time": 0.11473321914672852,
      "step": 29757
    },
    {
      "epoch": 0.000181622314453125,
      "step": 29757,
      "training_step_time": 0.40458154678344727
    },
    {
      "epoch": 0.00018162841796875,
      "model_forward_time": 0.11508512496948242,
      "step": 29758
    },
    {
      "epoch": 0.00018162841796875,
      "step": 29758,
      "training_step_time": 0.5223658084869385
    },
    {
      "epoch": 0.000181634521484375,
      "model_forward_time": 0.11462283134460449,
      "step": 29759
    },
    {
      "epoch": 0.000181634521484375,
      "step": 29759,
      "training_step_time": 0.39068007469177246
    },
    {
      "epoch": 0.000181640625,
      "grad_norm": 0.18559759855270386,
      "learning_rate": 5.478771577921351e-05,
      "loss": 0.0453,
      "step": 29760
    },
    {
      "epoch": 0.000181640625,
      "model_forward_time": 0.1146392822265625,
      "step": 29760
    },
    {
      "epoch": 0.000181640625,
      "step": 29760,
      "training_step_time": 0.41333794593811035
    },
    {
      "epoch": 0.000181646728515625,
      "model_forward_time": 0.11471319198608398,
      "step": 29761
    },
    {
      "epoch": 0.000181646728515625,
      "step": 29761,
      "training_step_time": 0.5032637119293213
    },
    {
      "epoch": 0.00018165283203125,
      "model_forward_time": 0.11480569839477539,
      "step": 29762
    },
    {
      "epoch": 0.00018165283203125,
      "step": 29762,
      "training_step_time": 0.4225656986236572
    },
    {
      "epoch": 0.000181658935546875,
      "model_forward_time": 0.11545920372009277,
      "step": 29763
    },
    {
      "epoch": 0.000181658935546875,
      "step": 29763,
      "training_step_time": 0.383944034576416
    },
    {
      "epoch": 0.0001816650390625,
      "model_forward_time": 0.11498284339904785,
      "step": 29764
    },
    {
      "epoch": 0.0001816650390625,
      "step": 29764,
      "training_step_time": 0.4333059787750244
    },
    {
      "epoch": 0.000181671142578125,
      "model_forward_time": 0.11480474472045898,
      "step": 29765
    },
    {
      "epoch": 0.000181671142578125,
      "step": 29765,
      "training_step_time": 0.4025578498840332
    },
    {
      "epoch": 0.00018167724609375,
      "model_forward_time": 0.11550378799438477,
      "step": 29766
    },
    {
      "epoch": 0.00018167724609375,
      "step": 29766,
      "training_step_time": 0.47338414192199707
    },
    {
      "epoch": 0.000181683349609375,
      "model_forward_time": 0.11524248123168945,
      "step": 29767
    },
    {
      "epoch": 0.000181683349609375,
      "step": 29767,
      "training_step_time": 0.42383575439453125
    },
    {
      "epoch": 0.000181689453125,
      "model_forward_time": 0.11549496650695801,
      "step": 29768
    },
    {
      "epoch": 0.000181689453125,
      "step": 29768,
      "training_step_time": 0.48536181449890137
    },
    {
      "epoch": 0.000181695556640625,
      "model_forward_time": 0.11515116691589355,
      "step": 29769
    },
    {
      "epoch": 0.000181695556640625,
      "step": 29769,
      "training_step_time": 0.43845272064208984
    },
    {
      "epoch": 0.00018170166015625,
      "grad_norm": 0.17613784968852997,
      "learning_rate": 5.476028385138828e-05,
      "loss": 0.042,
      "step": 29770
    },
    {
      "epoch": 0.00018170166015625,
      "model_forward_time": 0.1147925853729248,
      "step": 29770
    },
    {
      "epoch": 0.00018170166015625,
      "step": 29770,
      "training_step_time": 0.4816110134124756
    },
    {
      "epoch": 0.000181707763671875,
      "model_forward_time": 0.11478495597839355,
      "step": 29771
    },
    {
      "epoch": 0.000181707763671875,
      "step": 29771,
      "training_step_time": 0.41925597190856934
    },
    {
      "epoch": 0.0001817138671875,
      "model_forward_time": 0.1146538257598877,
      "step": 29772
    },
    {
      "epoch": 0.0001817138671875,
      "step": 29772,
      "training_step_time": 0.3972005844116211
    },
    {
      "epoch": 0.000181719970703125,
      "model_forward_time": 0.11431002616882324,
      "step": 29773
    },
    {
      "epoch": 0.000181719970703125,
      "step": 29773,
      "training_step_time": 0.39392924308776855
    },
    {
      "epoch": 0.00018172607421875,
      "model_forward_time": 0.11488676071166992,
      "step": 29774
    },
    {
      "epoch": 0.00018172607421875,
      "step": 29774,
      "training_step_time": 0.44615793228149414
    },
    {
      "epoch": 0.000181732177734375,
      "model_forward_time": 0.11473846435546875,
      "step": 29775
    },
    {
      "epoch": 0.000181732177734375,
      "step": 29775,
      "training_step_time": 0.526116132736206
    },
    {
      "epoch": 0.00018173828125,
      "model_forward_time": 0.11506962776184082,
      "step": 29776
    },
    {
      "epoch": 0.00018173828125,
      "step": 29776,
      "training_step_time": 0.456418514251709
    },
    {
      "epoch": 0.000181744384765625,
      "model_forward_time": 0.11480426788330078,
      "step": 29777
    },
    {
      "epoch": 0.000181744384765625,
      "step": 29777,
      "training_step_time": 0.3886864185333252
    },
    {
      "epoch": 0.00018175048828125,
      "model_forward_time": 0.11478543281555176,
      "step": 29778
    },
    {
      "epoch": 0.00018175048828125,
      "step": 29778,
      "training_step_time": 0.39357924461364746
    },
    {
      "epoch": 0.000181756591796875,
      "model_forward_time": 0.1147315502166748,
      "step": 29779
    },
    {
      "epoch": 0.000181756591796875,
      "step": 29779,
      "training_step_time": 0.39415502548217773
    },
    {
      "epoch": 0.0001817626953125,
      "grad_norm": 0.2064981460571289,
      "learning_rate": 5.473285047751451e-05,
      "loss": 0.0438,
      "step": 29780
    },
    {
      "epoch": 0.0001817626953125,
      "model_forward_time": 0.11517858505249023,
      "step": 29780
    },
    {
      "epoch": 0.0001817626953125,
      "step": 29780,
      "training_step_time": 0.4117908477783203
    },
    {
      "epoch": 0.000181768798828125,
      "model_forward_time": 0.11440300941467285,
      "step": 29781
    },
    {
      "epoch": 0.000181768798828125,
      "step": 29781,
      "training_step_time": 0.3886375427246094
    },
    {
      "epoch": 0.00018177490234375,
      "model_forward_time": 0.11508011817932129,
      "step": 29782
    },
    {
      "epoch": 0.00018177490234375,
      "step": 29782,
      "training_step_time": 0.5310063362121582
    },
    {
      "epoch": 0.000181781005859375,
      "model_forward_time": 0.11460614204406738,
      "step": 29783
    },
    {
      "epoch": 0.000181781005859375,
      "step": 29783,
      "training_step_time": 0.36824727058410645
    },
    {
      "epoch": 0.000181787109375,
      "model_forward_time": 0.1152048110961914,
      "step": 29784
    },
    {
      "epoch": 0.000181787109375,
      "step": 29784,
      "training_step_time": 0.4218783378601074
    },
    {
      "epoch": 0.000181793212890625,
      "model_forward_time": 0.1160740852355957,
      "step": 29785
    },
    {
      "epoch": 0.000181793212890625,
      "step": 29785,
      "training_step_time": 0.4011232852935791
    },
    {
      "epoch": 0.00018179931640625,
      "model_forward_time": 0.11523866653442383,
      "step": 29786
    },
    {
      "epoch": 0.00018179931640625,
      "step": 29786,
      "training_step_time": 0.38353443145751953
    },
    {
      "epoch": 0.000181805419921875,
      "model_forward_time": 0.1158604621887207,
      "step": 29787
    },
    {
      "epoch": 0.000181805419921875,
      "step": 29787,
      "training_step_time": 0.4528210163116455
    },
    {
      "epoch": 0.0001818115234375,
      "model_forward_time": 0.11524462699890137,
      "step": 29788
    },
    {
      "epoch": 0.0001818115234375,
      "step": 29788,
      "training_step_time": 0.4342193603515625
    },
    {
      "epoch": 0.000181817626953125,
      "model_forward_time": 0.11503791809082031,
      "step": 29789
    },
    {
      "epoch": 0.000181817626953125,
      "step": 29789,
      "training_step_time": 0.40288615226745605
    },
    {
      "epoch": 0.00018182373046875,
      "grad_norm": 0.12326443940401077,
      "learning_rate": 5.470541566592573e-05,
      "loss": 0.0461,
      "step": 29790
    },
    {
      "epoch": 0.00018182373046875,
      "model_forward_time": 0.11569547653198242,
      "step": 29790
    },
    {
      "epoch": 0.00018182373046875,
      "step": 29790,
      "training_step_time": 0.4036369323730469
    },
    {
      "epoch": 0.000181829833984375,
      "model_forward_time": 0.11571383476257324,
      "step": 29791
    },
    {
      "epoch": 0.000181829833984375,
      "step": 29791,
      "training_step_time": 0.4008502960205078
    },
    {
      "epoch": 0.0001818359375,
      "model_forward_time": 0.11687445640563965,
      "step": 29792
    },
    {
      "epoch": 0.0001818359375,
      "step": 29792,
      "training_step_time": 0.3860776424407959
    },
    {
      "epoch": 0.000181842041015625,
      "model_forward_time": 0.11504507064819336,
      "step": 29793
    },
    {
      "epoch": 0.000181842041015625,
      "step": 29793,
      "training_step_time": 0.39238548278808594
    },
    {
      "epoch": 0.00018184814453125,
      "model_forward_time": 0.11658287048339844,
      "step": 29794
    },
    {
      "epoch": 0.00018184814453125,
      "step": 29794,
      "training_step_time": 0.5829756259918213
    },
    {
      "epoch": 0.000181854248046875,
      "model_forward_time": 0.11523675918579102,
      "step": 29795
    },
    {
      "epoch": 0.000181854248046875,
      "step": 29795,
      "training_step_time": 0.3919093608856201
    },
    {
      "epoch": 0.0001818603515625,
      "model_forward_time": 0.11549663543701172,
      "step": 29796
    },
    {
      "epoch": 0.0001818603515625,
      "step": 29796,
      "training_step_time": 0.41246891021728516
    },
    {
      "epoch": 0.000181866455078125,
      "model_forward_time": 0.11509346961975098,
      "step": 29797
    },
    {
      "epoch": 0.000181866455078125,
      "step": 29797,
      "training_step_time": 0.43043971061706543
    },
    {
      "epoch": 0.00018187255859375,
      "model_forward_time": 0.1147298812866211,
      "step": 29798
    },
    {
      "epoch": 0.00018187255859375,
      "step": 29798,
      "training_step_time": 0.44104504585266113
    },
    {
      "epoch": 0.000181878662109375,
      "model_forward_time": 0.11451601982116699,
      "step": 29799
    },
    {
      "epoch": 0.000181878662109375,
      "step": 29799,
      "training_step_time": 0.49274516105651855
    },
    {
      "epoch": 0.000181884765625,
      "grad_norm": 0.15841351449489594,
      "learning_rate": 5.467797942495589e-05,
      "loss": 0.0482,
      "step": 29800
    },
    {
      "epoch": 0.000181884765625,
      "model_forward_time": 0.11523175239562988,
      "step": 29800
    },
    {
      "epoch": 0.000181884765625,
      "step": 29800,
      "training_step_time": 0.3884086608886719
    },
    {
      "epoch": 0.000181890869140625,
      "model_forward_time": 0.1153874397277832,
      "step": 29801
    },
    {
      "epoch": 0.000181890869140625,
      "step": 29801,
      "training_step_time": 0.43503689765930176
    },
    {
      "epoch": 0.00018189697265625,
      "model_forward_time": 0.1155543327331543,
      "step": 29802
    },
    {
      "epoch": 0.00018189697265625,
      "step": 29802,
      "training_step_time": 0.48444414138793945
    },
    {
      "epoch": 0.000181903076171875,
      "model_forward_time": 0.11487126350402832,
      "step": 29803
    },
    {
      "epoch": 0.000181903076171875,
      "step": 29803,
      "training_step_time": 0.39168667793273926
    },
    {
      "epoch": 0.0001819091796875,
      "model_forward_time": 0.1156458854675293,
      "step": 29804
    },
    {
      "epoch": 0.0001819091796875,
      "step": 29804,
      "training_step_time": 0.4397752285003662
    },
    {
      "epoch": 0.000181915283203125,
      "model_forward_time": 0.11518549919128418,
      "step": 29805
    },
    {
      "epoch": 0.000181915283203125,
      "step": 29805,
      "training_step_time": 0.3782155513763428
    },
    {
      "epoch": 0.00018192138671875,
      "model_forward_time": 0.11555147171020508,
      "step": 29806
    },
    {
      "epoch": 0.00018192138671875,
      "step": 29806,
      "training_step_time": 0.47550129890441895
    },
    {
      "epoch": 0.000181927490234375,
      "model_forward_time": 0.11489176750183105,
      "step": 29807
    },
    {
      "epoch": 0.000181927490234375,
      "step": 29807,
      "training_step_time": 0.38416147232055664
    },
    {
      "epoch": 0.00018193359375,
      "model_forward_time": 0.11510229110717773,
      "step": 29808
    },
    {
      "epoch": 0.00018193359375,
      "step": 29808,
      "training_step_time": 0.39358997344970703
    },
    {
      "epoch": 0.000181939697265625,
      "model_forward_time": 0.11529326438903809,
      "step": 29809
    },
    {
      "epoch": 0.000181939697265625,
      "step": 29809,
      "training_step_time": 0.44361066818237305
    },
    {
      "epoch": 0.00018194580078125,
      "grad_norm": 0.17291325330734253,
      "learning_rate": 5.4650541762939435e-05,
      "loss": 0.045,
      "step": 29810
    },
    {
      "epoch": 0.00018194580078125,
      "model_forward_time": 0.11497712135314941,
      "step": 29810
    },
    {
      "epoch": 0.00018194580078125,
      "step": 29810,
      "training_step_time": 0.4309067726135254
    },
    {
      "epoch": 0.000181951904296875,
      "model_forward_time": 0.11498880386352539,
      "step": 29811
    },
    {
      "epoch": 0.000181951904296875,
      "step": 29811,
      "training_step_time": 0.408846378326416
    },
    {
      "epoch": 0.0001819580078125,
      "model_forward_time": 0.11595654487609863,
      "step": 29812
    },
    {
      "epoch": 0.0001819580078125,
      "step": 29812,
      "training_step_time": 0.7222967147827148
    },
    {
      "epoch": 0.000181964111328125,
      "model_forward_time": 0.11480450630187988,
      "step": 29813
    },
    {
      "epoch": 0.000181964111328125,
      "step": 29813,
      "training_step_time": 0.4245922565460205
    },
    {
      "epoch": 0.00018197021484375,
      "model_forward_time": 0.11497640609741211,
      "step": 29814
    },
    {
      "epoch": 0.00018197021484375,
      "step": 29814,
      "training_step_time": 0.3929274082183838
    },
    {
      "epoch": 0.000181976318359375,
      "model_forward_time": 0.11439061164855957,
      "step": 29815
    },
    {
      "epoch": 0.000181976318359375,
      "step": 29815,
      "training_step_time": 0.4051637649536133
    },
    {
      "epoch": 0.000181982421875,
      "model_forward_time": 0.1144402027130127,
      "step": 29816
    },
    {
      "epoch": 0.000181982421875,
      "step": 29816,
      "training_step_time": 0.49939417839050293
    },
    {
      "epoch": 0.000181988525390625,
      "model_forward_time": 0.11387920379638672,
      "step": 29817
    },
    {
      "epoch": 0.000181988525390625,
      "step": 29817,
      "training_step_time": 0.39938879013061523
    },
    {
      "epoch": 0.00018199462890625,
      "model_forward_time": 0.1148536205291748,
      "step": 29818
    },
    {
      "epoch": 0.00018199462890625,
      "step": 29818,
      "training_step_time": 0.6439464092254639
    },
    {
      "epoch": 0.000182000732421875,
      "model_forward_time": 0.11457467079162598,
      "step": 29819
    },
    {
      "epoch": 0.000182000732421875,
      "step": 29819,
      "training_step_time": 0.3800697326660156
    },
    {
      "epoch": 0.0001820068359375,
      "grad_norm": 0.11531087756156921,
      "learning_rate": 5.462310268821118e-05,
      "loss": 0.0468,
      "step": 29820
    },
    {
      "epoch": 0.0001820068359375,
      "model_forward_time": 0.1142880916595459,
      "step": 29820
    },
    {
      "epoch": 0.0001820068359375,
      "step": 29820,
      "training_step_time": 0.3948221206665039
    },
    {
      "epoch": 0.000182012939453125,
      "model_forward_time": 0.11498355865478516,
      "step": 29821
    },
    {
      "epoch": 0.000182012939453125,
      "step": 29821,
      "training_step_time": 0.3979003429412842
    },
    {
      "epoch": 0.00018201904296875,
      "model_forward_time": 0.11464333534240723,
      "step": 29822
    },
    {
      "epoch": 0.00018201904296875,
      "step": 29822,
      "training_step_time": 0.3841099739074707
    },
    {
      "epoch": 0.000182025146484375,
      "model_forward_time": 0.1150820255279541,
      "step": 29823
    },
    {
      "epoch": 0.000182025146484375,
      "step": 29823,
      "training_step_time": 0.4372427463531494
    },
    {
      "epoch": 0.00018203125,
      "model_forward_time": 0.11499261856079102,
      "step": 29824
    },
    {
      "epoch": 0.00018203125,
      "step": 29824,
      "training_step_time": 0.6648695468902588
    },
    {
      "epoch": 0.000182037353515625,
      "model_forward_time": 0.11445331573486328,
      "step": 29825
    },
    {
      "epoch": 0.000182037353515625,
      "step": 29825,
      "training_step_time": 0.4047727584838867
    },
    {
      "epoch": 0.00018204345703125,
      "model_forward_time": 0.11560964584350586,
      "step": 29826
    },
    {
      "epoch": 0.00018204345703125,
      "step": 29826,
      "training_step_time": 0.48501110076904297
    },
    {
      "epoch": 0.000182049560546875,
      "model_forward_time": 0.11435747146606445,
      "step": 29827
    },
    {
      "epoch": 0.000182049560546875,
      "step": 29827,
      "training_step_time": 0.40258097648620605
    },
    {
      "epoch": 0.0001820556640625,
      "model_forward_time": 0.11401915550231934,
      "step": 29828
    },
    {
      "epoch": 0.0001820556640625,
      "step": 29828,
      "training_step_time": 0.39826512336730957
    },
    {
      "epoch": 0.000182061767578125,
      "model_forward_time": 0.11482024192810059,
      "step": 29829
    },
    {
      "epoch": 0.000182061767578125,
      "step": 29829,
      "training_step_time": 0.46672940254211426
    },
    {
      "epoch": 0.00018206787109375,
      "grad_norm": 0.14173801243305206,
      "learning_rate": 5.45956622091064e-05,
      "loss": 0.0388,
      "step": 29830
    },
    {
      "epoch": 0.00018206787109375,
      "model_forward_time": 0.11491870880126953,
      "step": 29830
    },
    {
      "epoch": 0.00018206787109375,
      "step": 29830,
      "training_step_time": 0.49098658561706543
    },
    {
      "epoch": 0.000182073974609375,
      "model_forward_time": 0.1150064468383789,
      "step": 29831
    },
    {
      "epoch": 0.000182073974609375,
      "step": 29831,
      "training_step_time": 0.40012383460998535
    },
    {
      "epoch": 0.000182080078125,
      "model_forward_time": 0.11544966697692871,
      "step": 29832
    },
    {
      "epoch": 0.000182080078125,
      "step": 29832,
      "training_step_time": 0.3969440460205078
    },
    {
      "epoch": 0.000182086181640625,
      "model_forward_time": 0.11520218849182129,
      "step": 29833
    },
    {
      "epoch": 0.000182086181640625,
      "step": 29833,
      "training_step_time": 0.3858306407928467
    },
    {
      "epoch": 0.00018209228515625,
      "model_forward_time": 0.11565113067626953,
      "step": 29834
    },
    {
      "epoch": 0.00018209228515625,
      "step": 29834,
      "training_step_time": 0.38784146308898926
    },
    {
      "epoch": 0.000182098388671875,
      "model_forward_time": 0.11484742164611816,
      "step": 29835
    },
    {
      "epoch": 0.000182098388671875,
      "step": 29835,
      "training_step_time": 0.4007298946380615
    },
    {
      "epoch": 0.0001821044921875,
      "model_forward_time": 0.11603116989135742,
      "step": 29836
    },
    {
      "epoch": 0.0001821044921875,
      "step": 29836,
      "training_step_time": 0.529090404510498
    },
    {
      "epoch": 0.000182110595703125,
      "model_forward_time": 0.11537933349609375,
      "step": 29837
    },
    {
      "epoch": 0.000182110595703125,
      "step": 29837,
      "training_step_time": 0.40558433532714844
    },
    {
      "epoch": 0.00018211669921875,
      "model_forward_time": 0.1154332160949707,
      "step": 29838
    },
    {
      "epoch": 0.00018211669921875,
      "step": 29838,
      "training_step_time": 0.4267890453338623
    },
    {
      "epoch": 0.000182122802734375,
      "model_forward_time": 0.11468076705932617,
      "step": 29839
    },
    {
      "epoch": 0.000182122802734375,
      "step": 29839,
      "training_step_time": 0.4682490825653076
    },
    {
      "epoch": 0.00018212890625,
      "grad_norm": 0.12142867594957352,
      "learning_rate": 5.456822033396076e-05,
      "loss": 0.0455,
      "step": 29840
    },
    {
      "epoch": 0.00018212890625,
      "model_forward_time": 0.11539816856384277,
      "step": 29840
    },
    {
      "epoch": 0.00018212890625,
      "step": 29840,
      "training_step_time": 0.4860210418701172
    },
    {
      "epoch": 0.000182135009765625,
      "model_forward_time": 0.1144871711730957,
      "step": 29841
    },
    {
      "epoch": 0.000182135009765625,
      "step": 29841,
      "training_step_time": 0.4323997497558594
    },
    {
      "epoch": 0.00018214111328125,
      "model_forward_time": 0.11435222625732422,
      "step": 29842
    },
    {
      "epoch": 0.00018214111328125,
      "step": 29842,
      "training_step_time": 0.38547849655151367
    },
    {
      "epoch": 0.000182147216796875,
      "model_forward_time": 0.11551761627197266,
      "step": 29843
    },
    {
      "epoch": 0.000182147216796875,
      "step": 29843,
      "training_step_time": 0.48108458518981934
    },
    {
      "epoch": 0.0001821533203125,
      "model_forward_time": 0.11405801773071289,
      "step": 29844
    },
    {
      "epoch": 0.0001821533203125,
      "step": 29844,
      "training_step_time": 0.41997361183166504
    },
    {
      "epoch": 0.000182159423828125,
      "model_forward_time": 0.11416506767272949,
      "step": 29845
    },
    {
      "epoch": 0.000182159423828125,
      "step": 29845,
      "training_step_time": 0.4949336051940918
    },
    {
      "epoch": 0.00018216552734375,
      "model_forward_time": 0.11485600471496582,
      "step": 29846
    },
    {
      "epoch": 0.00018216552734375,
      "step": 29846,
      "training_step_time": 0.3915081024169922
    },
    {
      "epoch": 0.000182171630859375,
      "model_forward_time": 0.11436843872070312,
      "step": 29847
    },
    {
      "epoch": 0.000182171630859375,
      "step": 29847,
      "training_step_time": 0.3936038017272949
    },
    {
      "epoch": 0.000182177734375,
      "model_forward_time": 0.11525273323059082,
      "step": 29848
    },
    {
      "epoch": 0.000182177734375,
      "step": 29848,
      "training_step_time": 0.4327516555786133
    },
    {
      "epoch": 0.000182183837890625,
      "model_forward_time": 0.11481618881225586,
      "step": 29849
    },
    {
      "epoch": 0.000182183837890625,
      "step": 29849,
      "training_step_time": 0.3887960910797119
    },
    {
      "epoch": 0.00018218994140625,
      "grad_norm": 0.11313559114933014,
      "learning_rate": 5.454077707111042e-05,
      "loss": 0.0441,
      "step": 29850
    },
    {
      "epoch": 0.00018218994140625,
      "model_forward_time": 0.11511397361755371,
      "step": 29850
    },
    {
      "epoch": 0.00018218994140625,
      "step": 29850,
      "training_step_time": 0.4002206325531006
    },
    {
      "epoch": 0.000182196044921875,
      "model_forward_time": 0.1172487735748291,
      "step": 29851
    },
    {
      "epoch": 0.000182196044921875,
      "step": 29851,
      "training_step_time": 0.3939247131347656
    },
    {
      "epoch": 0.0001822021484375,
      "model_forward_time": 0.11559486389160156,
      "step": 29852
    },
    {
      "epoch": 0.0001822021484375,
      "step": 29852,
      "training_step_time": 0.38782835006713867
    },
    {
      "epoch": 0.000182208251953125,
      "model_forward_time": 0.11600804328918457,
      "step": 29853
    },
    {
      "epoch": 0.000182208251953125,
      "step": 29853,
      "training_step_time": 0.46658754348754883
    },
    {
      "epoch": 0.00018221435546875,
      "model_forward_time": 0.11514949798583984,
      "step": 29854
    },
    {
      "epoch": 0.00018221435546875,
      "step": 29854,
      "training_step_time": 0.48407578468322754
    },
    {
      "epoch": 0.000182220458984375,
      "model_forward_time": 0.11479401588439941,
      "step": 29855
    },
    {
      "epoch": 0.000182220458984375,
      "step": 29855,
      "training_step_time": 0.4912242889404297
    },
    {
      "epoch": 0.0001822265625,
      "model_forward_time": 0.1154475212097168,
      "step": 29856
    },
    {
      "epoch": 0.0001822265625,
      "step": 29856,
      "training_step_time": 0.3860139846801758
    },
    {
      "epoch": 0.000182232666015625,
      "model_forward_time": 0.11444473266601562,
      "step": 29857
    },
    {
      "epoch": 0.000182232666015625,
      "step": 29857,
      "training_step_time": 0.3950679302215576
    },
    {
      "epoch": 0.00018223876953125,
      "model_forward_time": 0.11496853828430176,
      "step": 29858
    },
    {
      "epoch": 0.00018223876953125,
      "step": 29858,
      "training_step_time": 0.39272570610046387
    },
    {
      "epoch": 0.000182244873046875,
      "model_forward_time": 0.11487674713134766,
      "step": 29859
    },
    {
      "epoch": 0.000182244873046875,
      "step": 29859,
      "training_step_time": 0.4887416362762451
    },
    {
      "epoch": 0.0001822509765625,
      "grad_norm": 0.13601194322109222,
      "learning_rate": 5.4513332428891887e-05,
      "loss": 0.048,
      "step": 29860
    },
    {
      "epoch": 0.0001822509765625,
      "model_forward_time": 0.11499404907226562,
      "step": 29860
    },
    {
      "epoch": 0.0001822509765625,
      "step": 29860,
      "training_step_time": 0.49738574028015137
    },
    {
      "epoch": 0.000182257080078125,
      "model_forward_time": 0.11503791809082031,
      "step": 29861
    },
    {
      "epoch": 0.000182257080078125,
      "step": 29861,
      "training_step_time": 0.38620471954345703
    },
    {
      "epoch": 0.00018226318359375,
      "model_forward_time": 0.11492586135864258,
      "step": 29862
    },
    {
      "epoch": 0.00018226318359375,
      "step": 29862,
      "training_step_time": 0.3898801803588867
    },
    {
      "epoch": 0.000182269287109375,
      "model_forward_time": 0.11499381065368652,
      "step": 29863
    },
    {
      "epoch": 0.000182269287109375,
      "step": 29863,
      "training_step_time": 0.4022667407989502
    },
    {
      "epoch": 0.000182275390625,
      "model_forward_time": 0.11532974243164062,
      "step": 29864
    },
    {
      "epoch": 0.000182275390625,
      "step": 29864,
      "training_step_time": 0.3831465244293213
    },
    {
      "epoch": 0.000182281494140625,
      "model_forward_time": 0.11531305313110352,
      "step": 29865
    },
    {
      "epoch": 0.000182281494140625,
      "step": 29865,
      "training_step_time": 0.3795452117919922
    },
    {
      "epoch": 0.00018228759765625,
      "model_forward_time": 0.11489200592041016,
      "step": 29866
    },
    {
      "epoch": 0.00018228759765625,
      "step": 29866,
      "training_step_time": 0.6333112716674805
    },
    {
      "epoch": 0.000182293701171875,
      "model_forward_time": 0.11519861221313477,
      "step": 29867
    },
    {
      "epoch": 0.000182293701171875,
      "step": 29867,
      "training_step_time": 0.4625730514526367
    },
    {
      "epoch": 0.0001822998046875,
      "model_forward_time": 0.1183326244354248,
      "step": 29868
    },
    {
      "epoch": 0.0001822998046875,
      "step": 29868,
      "training_step_time": 0.44126319885253906
    },
    {
      "epoch": 0.000182305908203125,
      "model_forward_time": 0.11467361450195312,
      "step": 29869
    },
    {
      "epoch": 0.000182305908203125,
      "step": 29869,
      "training_step_time": 0.4835829734802246
    },
    {
      "epoch": 0.00018231201171875,
      "grad_norm": 0.09460529685020447,
      "learning_rate": 5.448588641564213e-05,
      "loss": 0.0473,
      "step": 29870
    },
    {
      "epoch": 0.00018231201171875,
      "model_forward_time": 0.11438465118408203,
      "step": 29870
    },
    {
      "epoch": 0.00018231201171875,
      "step": 29870,
      "training_step_time": 0.3805840015411377
    },
    {
      "epoch": 0.000182318115234375,
      "model_forward_time": 0.11471056938171387,
      "step": 29871
    },
    {
      "epoch": 0.000182318115234375,
      "step": 29871,
      "training_step_time": 0.37883663177490234
    },
    {
      "epoch": 0.00018232421875,
      "model_forward_time": 0.11529374122619629,
      "step": 29872
    },
    {
      "epoch": 0.00018232421875,
      "step": 29872,
      "training_step_time": 0.47681450843811035
    },
    {
      "epoch": 0.000182330322265625,
      "model_forward_time": 0.11449384689331055,
      "step": 29873
    },
    {
      "epoch": 0.000182330322265625,
      "step": 29873,
      "training_step_time": 0.3999216556549072
    },
    {
      "epoch": 0.00018233642578125,
      "model_forward_time": 0.11517453193664551,
      "step": 29874
    },
    {
      "epoch": 0.00018233642578125,
      "step": 29874,
      "training_step_time": 0.39322972297668457
    },
    {
      "epoch": 0.000182342529296875,
      "model_forward_time": 0.11500024795532227,
      "step": 29875
    },
    {
      "epoch": 0.000182342529296875,
      "step": 29875,
      "training_step_time": 0.3968379497528076
    },
    {
      "epoch": 0.0001823486328125,
      "model_forward_time": 0.11500191688537598,
      "step": 29876
    },
    {
      "epoch": 0.0001823486328125,
      "step": 29876,
      "training_step_time": 0.3888986110687256
    },
    {
      "epoch": 0.000182354736328125,
      "model_forward_time": 0.1153707504272461,
      "step": 29877
    },
    {
      "epoch": 0.000182354736328125,
      "step": 29877,
      "training_step_time": 0.3865199089050293
    },
    {
      "epoch": 0.00018236083984375,
      "model_forward_time": 0.11494803428649902,
      "step": 29878
    },
    {
      "epoch": 0.00018236083984375,
      "step": 29878,
      "training_step_time": 0.7258765697479248
    },
    {
      "epoch": 0.000182366943359375,
      "model_forward_time": 0.11496853828430176,
      "step": 29879
    },
    {
      "epoch": 0.000182366943359375,
      "step": 29879,
      "training_step_time": 0.423661470413208
    },
    {
      "epoch": 0.000182373046875,
      "grad_norm": 0.12493928521871567,
      "learning_rate": 5.445843903969854e-05,
      "loss": 0.0407,
      "step": 29880
    },
    {
      "epoch": 0.000182373046875,
      "model_forward_time": 0.11442923545837402,
      "step": 29880
    },
    {
      "epoch": 0.000182373046875,
      "step": 29880,
      "training_step_time": 0.42649054527282715
    },
    {
      "epoch": 0.000182379150390625,
      "model_forward_time": 0.11495637893676758,
      "step": 29881
    },
    {
      "epoch": 0.000182379150390625,
      "step": 29881,
      "training_step_time": 0.40384674072265625
    },
    {
      "epoch": 0.00018238525390625,
      "model_forward_time": 0.11542034149169922,
      "step": 29882
    },
    {
      "epoch": 0.00018238525390625,
      "step": 29882,
      "training_step_time": 0.40526604652404785
    },
    {
      "epoch": 0.000182391357421875,
      "model_forward_time": 0.1147317886352539,
      "step": 29883
    },
    {
      "epoch": 0.000182391357421875,
      "step": 29883,
      "training_step_time": 0.38328981399536133
    },
    {
      "epoch": 0.0001823974609375,
      "model_forward_time": 0.11515522003173828,
      "step": 29884
    },
    {
      "epoch": 0.0001823974609375,
      "step": 29884,
      "training_step_time": 0.46050024032592773
    },
    {
      "epoch": 0.000182403564453125,
      "model_forward_time": 0.11635828018188477,
      "step": 29885
    },
    {
      "epoch": 0.000182403564453125,
      "step": 29885,
      "training_step_time": 0.38442277908325195
    },
    {
      "epoch": 0.00018240966796875,
      "model_forward_time": 0.11442136764526367,
      "step": 29886
    },
    {
      "epoch": 0.00018240966796875,
      "step": 29886,
      "training_step_time": 0.451000452041626
    },
    {
      "epoch": 0.000182415771484375,
      "model_forward_time": 0.11499571800231934,
      "step": 29887
    },
    {
      "epoch": 0.000182415771484375,
      "step": 29887,
      "training_step_time": 0.4344217777252197
    },
    {
      "epoch": 0.000182421875,
      "model_forward_time": 0.11543393135070801,
      "step": 29888
    },
    {
      "epoch": 0.000182421875,
      "step": 29888,
      "training_step_time": 0.3983588218688965
    },
    {
      "epoch": 0.000182427978515625,
      "model_forward_time": 0.11534357070922852,
      "step": 29889
    },
    {
      "epoch": 0.000182427978515625,
      "step": 29889,
      "training_step_time": 0.3957223892211914
    },
    {
      "epoch": 0.00018243408203125,
      "grad_norm": 0.09059501439332962,
      "learning_rate": 5.443099030939887e-05,
      "loss": 0.0395,
      "step": 29890
    },
    {
      "epoch": 0.00018243408203125,
      "model_forward_time": 0.11504340171813965,
      "step": 29890
    },
    {
      "epoch": 0.00018243408203125,
      "step": 29890,
      "training_step_time": 0.4636259078979492
    },
    {
      "epoch": 0.000182440185546875,
      "model_forward_time": 0.1157219409942627,
      "step": 29891
    },
    {
      "epoch": 0.000182440185546875,
      "step": 29891,
      "training_step_time": 0.3971371650695801
    },
    {
      "epoch": 0.0001824462890625,
      "model_forward_time": 0.11530017852783203,
      "step": 29892
    },
    {
      "epoch": 0.0001824462890625,
      "step": 29892,
      "training_step_time": 0.4007887840270996
    },
    {
      "epoch": 0.000182452392578125,
      "model_forward_time": 0.11576509475708008,
      "step": 29893
    },
    {
      "epoch": 0.000182452392578125,
      "step": 29893,
      "training_step_time": 0.39552783966064453
    },
    {
      "epoch": 0.00018245849609375,
      "model_forward_time": 0.11519289016723633,
      "step": 29894
    },
    {
      "epoch": 0.00018245849609375,
      "step": 29894,
      "training_step_time": 0.4138808250427246
    },
    {
      "epoch": 0.000182464599609375,
      "model_forward_time": 0.11477446556091309,
      "step": 29895
    },
    {
      "epoch": 0.000182464599609375,
      "step": 29895,
      "training_step_time": 0.3964674472808838
    },
    {
      "epoch": 0.000182470703125,
      "model_forward_time": 0.11565971374511719,
      "step": 29896
    },
    {
      "epoch": 0.000182470703125,
      "step": 29896,
      "training_step_time": 0.5247776508331299
    },
    {
      "epoch": 0.000182476806640625,
      "model_forward_time": 0.11525440216064453,
      "step": 29897
    },
    {
      "epoch": 0.000182476806640625,
      "step": 29897,
      "training_step_time": 0.4124281406402588
    },
    {
      "epoch": 0.00018248291015625,
      "model_forward_time": 0.11510729789733887,
      "step": 29898
    },
    {
      "epoch": 0.00018248291015625,
      "step": 29898,
      "training_step_time": 0.4749481678009033
    },
    {
      "epoch": 0.000182489013671875,
      "model_forward_time": 0.11510062217712402,
      "step": 29899
    },
    {
      "epoch": 0.000182489013671875,
      "step": 29899,
      "training_step_time": 0.3970208168029785
    },
    {
      "epoch": 0.0001824951171875,
      "grad_norm": 0.12346416711807251,
      "learning_rate": 5.440354023308134e-05,
      "loss": 0.0433,
      "step": 29900
    },
    {
      "epoch": 0.0001824951171875,
      "model_forward_time": 0.11447668075561523,
      "step": 29900
    },
    {
      "epoch": 0.0001824951171875,
      "step": 29900,
      "training_step_time": 0.4023761749267578
    },
    {
      "epoch": 0.000182501220703125,
      "model_forward_time": 0.1154172420501709,
      "step": 29901
    },
    {
      "epoch": 0.000182501220703125,
      "step": 29901,
      "training_step_time": 0.41162109375
    },
    {
      "epoch": 0.00018250732421875,
      "model_forward_time": 0.11474251747131348,
      "step": 29902
    },
    {
      "epoch": 0.00018250732421875,
      "step": 29902,
      "training_step_time": 0.638965368270874
    },
    {
      "epoch": 0.000182513427734375,
      "model_forward_time": 0.11433720588684082,
      "step": 29903
    },
    {
      "epoch": 0.000182513427734375,
      "step": 29903,
      "training_step_time": 0.3912789821624756
    },
    {
      "epoch": 0.00018251953125,
      "model_forward_time": 0.11441659927368164,
      "step": 29904
    },
    {
      "epoch": 0.00018251953125,
      "step": 29904,
      "training_step_time": 0.3922257423400879
    },
    {
      "epoch": 0.000182525634765625,
      "model_forward_time": 0.11474943161010742,
      "step": 29905
    },
    {
      "epoch": 0.000182525634765625,
      "step": 29905,
      "training_step_time": 0.40049266815185547
    },
    {
      "epoch": 0.00018253173828125,
      "model_forward_time": 0.11525344848632812,
      "step": 29906
    },
    {
      "epoch": 0.00018253173828125,
      "step": 29906,
      "training_step_time": 0.3833351135253906
    },
    {
      "epoch": 0.000182537841796875,
      "model_forward_time": 0.11487174034118652,
      "step": 29907
    },
    {
      "epoch": 0.000182537841796875,
      "step": 29907,
      "training_step_time": 0.39565420150756836
    },
    {
      "epoch": 0.0001825439453125,
      "model_forward_time": 0.11548566818237305,
      "step": 29908
    },
    {
      "epoch": 0.0001825439453125,
      "step": 29908,
      "training_step_time": 0.5418305397033691
    },
    {
      "epoch": 0.000182550048828125,
      "model_forward_time": 0.11502575874328613,
      "step": 29909
    },
    {
      "epoch": 0.000182550048828125,
      "step": 29909,
      "training_step_time": 0.4300723075866699
    },
    {
      "epoch": 0.00018255615234375,
      "grad_norm": 0.13404306769371033,
      "learning_rate": 5.4376088819084556e-05,
      "loss": 0.0405,
      "step": 29910
    },
    {
      "epoch": 0.00018255615234375,
      "model_forward_time": 0.11562943458557129,
      "step": 29910
    },
    {
      "epoch": 0.00018255615234375,
      "step": 29910,
      "training_step_time": 0.37247180938720703
    },
    {
      "epoch": 0.000182562255859375,
      "model_forward_time": 0.1145942211151123,
      "step": 29911
    },
    {
      "epoch": 0.000182562255859375,
      "step": 29911,
      "training_step_time": 0.4318220615386963
    },
    {
      "epoch": 0.000182568359375,
      "model_forward_time": 0.1148843765258789,
      "step": 29912
    },
    {
      "epoch": 0.000182568359375,
      "step": 29912,
      "training_step_time": 0.45346641540527344
    },
    {
      "epoch": 0.000182574462890625,
      "model_forward_time": 0.11478638648986816,
      "step": 29913
    },
    {
      "epoch": 0.000182574462890625,
      "step": 29913,
      "training_step_time": 0.3979947566986084
    },
    {
      "epoch": 0.00018258056640625,
      "model_forward_time": 0.11552023887634277,
      "step": 29914
    },
    {
      "epoch": 0.00018258056640625,
      "step": 29914,
      "training_step_time": 0.49825286865234375
    },
    {
      "epoch": 0.000182586669921875,
      "model_forward_time": 0.11464333534240723,
      "step": 29915
    },
    {
      "epoch": 0.000182586669921875,
      "step": 29915,
      "training_step_time": 0.3949015140533447
    },
    {
      "epoch": 0.0001825927734375,
      "model_forward_time": 0.11539816856384277,
      "step": 29916
    },
    {
      "epoch": 0.0001825927734375,
      "step": 29916,
      "training_step_time": 0.4522693157196045
    },
    {
      "epoch": 0.000182598876953125,
      "model_forward_time": 0.1153876781463623,
      "step": 29917
    },
    {
      "epoch": 0.000182598876953125,
      "step": 29917,
      "training_step_time": 0.39626455307006836
    },
    {
      "epoch": 0.00018260498046875,
      "model_forward_time": 0.11511445045471191,
      "step": 29918
    },
    {
      "epoch": 0.00018260498046875,
      "step": 29918,
      "training_step_time": 0.3927268981933594
    },
    {
      "epoch": 0.000182611083984375,
      "model_forward_time": 0.11700296401977539,
      "step": 29919
    },
    {
      "epoch": 0.000182611083984375,
      "step": 29919,
      "training_step_time": 0.4028451442718506
    },
    {
      "epoch": 0.0001826171875,
      "grad_norm": 0.11480332165956497,
      "learning_rate": 5.4348636075747536e-05,
      "loss": 0.0376,
      "step": 29920
    },
    {
      "epoch": 0.0001826171875,
      "model_forward_time": 0.1147916316986084,
      "step": 29920
    },
    {
      "epoch": 0.0001826171875,
      "step": 29920,
      "training_step_time": 0.5264296531677246
    },
    {
      "epoch": 0.000182623291015625,
      "model_forward_time": 0.1154015064239502,
      "step": 29921
    },
    {
      "epoch": 0.000182623291015625,
      "step": 29921,
      "training_step_time": 0.3964271545410156
    },
    {
      "epoch": 0.00018262939453125,
      "model_forward_time": 0.11515665054321289,
      "step": 29922
    },
    {
      "epoch": 0.00018262939453125,
      "step": 29922,
      "training_step_time": 0.44750118255615234
    },
    {
      "epoch": 0.000182635498046875,
      "model_forward_time": 0.11574959754943848,
      "step": 29923
    },
    {
      "epoch": 0.000182635498046875,
      "step": 29923,
      "training_step_time": 0.4140598773956299
    },
    {
      "epoch": 0.0001826416015625,
      "model_forward_time": 0.11546993255615234,
      "step": 29924
    },
    {
      "epoch": 0.0001826416015625,
      "step": 29924,
      "training_step_time": 0.41264915466308594
    },
    {
      "epoch": 0.000182647705078125,
      "model_forward_time": 0.11513972282409668,
      "step": 29925
    },
    {
      "epoch": 0.000182647705078125,
      "step": 29925,
      "training_step_time": 0.46724581718444824
    },
    {
      "epoch": 0.00018265380859375,
      "model_forward_time": 0.11551165580749512,
      "step": 29926
    },
    {
      "epoch": 0.00018265380859375,
      "step": 29926,
      "training_step_time": 0.5861802101135254
    },
    {
      "epoch": 0.000182659912109375,
      "model_forward_time": 0.11502361297607422,
      "step": 29927
    },
    {
      "epoch": 0.000182659912109375,
      "step": 29927,
      "training_step_time": 0.4195747375488281
    },
    {
      "epoch": 0.000182666015625,
      "model_forward_time": 0.11492776870727539,
      "step": 29928
    },
    {
      "epoch": 0.000182666015625,
      "step": 29928,
      "training_step_time": 0.3974132537841797
    },
    {
      "epoch": 0.000182672119140625,
      "model_forward_time": 0.11451292037963867,
      "step": 29929
    },
    {
      "epoch": 0.000182672119140625,
      "step": 29929,
      "training_step_time": 0.5085208415985107
    },
    {
      "epoch": 0.00018267822265625,
      "grad_norm": 0.2076919823884964,
      "learning_rate": 5.43211820114097e-05,
      "loss": 0.043,
      "step": 29930
    },
    {
      "epoch": 0.00018267822265625,
      "model_forward_time": 0.11450910568237305,
      "step": 29930
    },
    {
      "epoch": 0.00018267822265625,
      "step": 29930,
      "training_step_time": 0.39942026138305664
    },
    {
      "epoch": 0.000182684326171875,
      "model_forward_time": 0.11450624465942383,
      "step": 29931
    },
    {
      "epoch": 0.000182684326171875,
      "step": 29931,
      "training_step_time": 0.40160250663757324
    },
    {
      "epoch": 0.0001826904296875,
      "model_forward_time": 0.11493802070617676,
      "step": 29932
    },
    {
      "epoch": 0.0001826904296875,
      "step": 29932,
      "training_step_time": 0.49805307388305664
    },
    {
      "epoch": 0.000182696533203125,
      "model_forward_time": 0.11475539207458496,
      "step": 29933
    },
    {
      "epoch": 0.000182696533203125,
      "step": 29933,
      "training_step_time": 0.39485621452331543
    },
    {
      "epoch": 0.00018270263671875,
      "model_forward_time": 0.11494278907775879,
      "step": 29934
    },
    {
      "epoch": 0.00018270263671875,
      "step": 29934,
      "training_step_time": 0.47550225257873535
    },
    {
      "epoch": 0.000182708740234375,
      "model_forward_time": 0.11469459533691406,
      "step": 29935
    },
    {
      "epoch": 0.000182708740234375,
      "step": 29935,
      "training_step_time": 0.41013050079345703
    },
    {
      "epoch": 0.00018271484375,
      "model_forward_time": 0.11536216735839844,
      "step": 29936
    },
    {
      "epoch": 0.00018271484375,
      "step": 29936,
      "training_step_time": 0.3866455554962158
    },
    {
      "epoch": 0.000182720947265625,
      "model_forward_time": 0.11539196968078613,
      "step": 29937
    },
    {
      "epoch": 0.000182720947265625,
      "step": 29937,
      "training_step_time": 0.3954195976257324
    },
    {
      "epoch": 0.00018272705078125,
      "model_forward_time": 0.1149899959564209,
      "step": 29938
    },
    {
      "epoch": 0.00018272705078125,
      "step": 29938,
      "training_step_time": 0.6936204433441162
    },
    {
      "epoch": 0.000182733154296875,
      "model_forward_time": 0.11482930183410645,
      "step": 29939
    },
    {
      "epoch": 0.000182733154296875,
      "step": 29939,
      "training_step_time": 0.44904422760009766
    },
    {
      "epoch": 0.0001827392578125,
      "grad_norm": 0.11391368508338928,
      "learning_rate": 5.4293726634410855e-05,
      "loss": 0.0424,
      "step": 29940
    },
    {
      "epoch": 0.0001827392578125,
      "model_forward_time": 0.11480832099914551,
      "step": 29940
    },
    {
      "epoch": 0.0001827392578125,
      "step": 29940,
      "training_step_time": 0.3860957622528076
    },
    {
      "epoch": 0.000182745361328125,
      "model_forward_time": 0.11495232582092285,
      "step": 29941
    },
    {
      "epoch": 0.000182745361328125,
      "step": 29941,
      "training_step_time": 0.43349385261535645
    },
    {
      "epoch": 0.00018275146484375,
      "model_forward_time": 0.11478614807128906,
      "step": 29942
    },
    {
      "epoch": 0.00018275146484375,
      "step": 29942,
      "training_step_time": 0.4035828113555908
    },
    {
      "epoch": 0.000182757568359375,
      "model_forward_time": 0.11454343795776367,
      "step": 29943
    },
    {
      "epoch": 0.000182757568359375,
      "step": 29943,
      "training_step_time": 0.4031970500946045
    },
    {
      "epoch": 0.000182763671875,
      "model_forward_time": 0.11492037773132324,
      "step": 29944
    },
    {
      "epoch": 0.000182763671875,
      "step": 29944,
      "training_step_time": 0.5504045486450195
    },
    {
      "epoch": 0.000182769775390625,
      "model_forward_time": 0.11564779281616211,
      "step": 29945
    },
    {
      "epoch": 0.000182769775390625,
      "step": 29945,
      "training_step_time": 0.3872807025909424
    },
    {
      "epoch": 0.00018277587890625,
      "model_forward_time": 0.1150352954864502,
      "step": 29946
    },
    {
      "epoch": 0.00018277587890625,
      "step": 29946,
      "training_step_time": 0.4076240062713623
    },
    {
      "epoch": 0.000182781982421875,
      "model_forward_time": 0.1143953800201416,
      "step": 29947
    },
    {
      "epoch": 0.000182781982421875,
      "step": 29947,
      "training_step_time": 0.4012267589569092
    },
    {
      "epoch": 0.0001827880859375,
      "model_forward_time": 0.11552166938781738,
      "step": 29948
    },
    {
      "epoch": 0.0001827880859375,
      "step": 29948,
      "training_step_time": 0.42180800437927246
    },
    {
      "epoch": 0.000182794189453125,
      "model_forward_time": 0.11488699913024902,
      "step": 29949
    },
    {
      "epoch": 0.000182794189453125,
      "step": 29949,
      "training_step_time": 0.38205814361572266
    },
    {
      "epoch": 0.00018280029296875,
      "grad_norm": 0.17863905429840088,
      "learning_rate": 5.426626995309123e-05,
      "loss": 0.0416,
      "step": 29950
    },
    {
      "epoch": 0.00018280029296875,
      "model_forward_time": 0.11536407470703125,
      "step": 29950
    },
    {
      "epoch": 0.00018280029296875,
      "step": 29950,
      "training_step_time": 0.6226775646209717
    },
    {
      "epoch": 0.000182806396484375,
      "model_forward_time": 0.11511707305908203,
      "step": 29951
    },
    {
      "epoch": 0.000182806396484375,
      "step": 29951,
      "training_step_time": 0.40143561363220215
    },
    {
      "epoch": 0.0001828125,
      "model_forward_time": 0.11577439308166504,
      "step": 29952
    },
    {
      "epoch": 0.0001828125,
      "step": 29952,
      "training_step_time": 0.4350316524505615
    },
    {
      "epoch": 0.000182818603515625,
      "model_forward_time": 0.11477184295654297,
      "step": 29953
    },
    {
      "epoch": 0.000182818603515625,
      "step": 29953,
      "training_step_time": 0.464768648147583
    },
    {
      "epoch": 0.00018282470703125,
      "model_forward_time": 0.11526370048522949,
      "step": 29954
    },
    {
      "epoch": 0.00018282470703125,
      "step": 29954,
      "training_step_time": 0.4078085422515869
    },
    {
      "epoch": 0.000182830810546875,
      "model_forward_time": 0.11506342887878418,
      "step": 29955
    },
    {
      "epoch": 0.000182830810546875,
      "step": 29955,
      "training_step_time": 0.5028684139251709
    },
    {
      "epoch": 0.0001828369140625,
      "model_forward_time": 0.11560249328613281,
      "step": 29956
    },
    {
      "epoch": 0.0001828369140625,
      "step": 29956,
      "training_step_time": 0.4744837284088135
    },
    {
      "epoch": 0.000182843017578125,
      "model_forward_time": 0.11519861221313477,
      "step": 29957
    },
    {
      "epoch": 0.000182843017578125,
      "step": 29957,
      "training_step_time": 0.41286468505859375
    },
    {
      "epoch": 0.00018284912109375,
      "model_forward_time": 0.1150059700012207,
      "step": 29958
    },
    {
      "epoch": 0.00018284912109375,
      "step": 29958,
      "training_step_time": 0.3929579257965088
    },
    {
      "epoch": 0.000182855224609375,
      "model_forward_time": 0.11451077461242676,
      "step": 29959
    },
    {
      "epoch": 0.000182855224609375,
      "step": 29959,
      "training_step_time": 0.3999209403991699
    },
    {
      "epoch": 0.000182861328125,
      "grad_norm": 0.10171350836753845,
      "learning_rate": 5.423881197579144e-05,
      "loss": 0.0436,
      "step": 29960
    },
    {
      "epoch": 0.000182861328125,
      "model_forward_time": 0.11512899398803711,
      "step": 29960
    },
    {
      "epoch": 0.000182861328125,
      "step": 29960,
      "training_step_time": 0.4696168899536133
    },
    {
      "epoch": 0.000182867431640625,
      "model_forward_time": 0.11492776870727539,
      "step": 29961
    },
    {
      "epoch": 0.000182867431640625,
      "step": 29961,
      "training_step_time": 0.40203428268432617
    },
    {
      "epoch": 0.00018287353515625,
      "model_forward_time": 0.11567258834838867,
      "step": 29962
    },
    {
      "epoch": 0.00018287353515625,
      "step": 29962,
      "training_step_time": 0.4170253276824951
    },
    {
      "epoch": 0.000182879638671875,
      "model_forward_time": 0.11459636688232422,
      "step": 29963
    },
    {
      "epoch": 0.000182879638671875,
      "step": 29963,
      "training_step_time": 0.40173983573913574
    },
    {
      "epoch": 0.0001828857421875,
      "model_forward_time": 0.11557888984680176,
      "step": 29964
    },
    {
      "epoch": 0.0001828857421875,
      "step": 29964,
      "training_step_time": 0.4416205883026123
    },
    {
      "epoch": 0.000182891845703125,
      "model_forward_time": 0.11546778678894043,
      "step": 29965
    },
    {
      "epoch": 0.000182891845703125,
      "step": 29965,
      "training_step_time": 0.4120829105377197
    },
    {
      "epoch": 0.00018289794921875,
      "model_forward_time": 0.11620163917541504,
      "step": 29966
    },
    {
      "epoch": 0.00018289794921875,
      "step": 29966,
      "training_step_time": 0.45687270164489746
    },
    {
      "epoch": 0.000182904052734375,
      "model_forward_time": 0.11561059951782227,
      "step": 29967
    },
    {
      "epoch": 0.000182904052734375,
      "step": 29967,
      "training_step_time": 0.4360949993133545
    },
    {
      "epoch": 0.00018291015625,
      "model_forward_time": 0.11508440971374512,
      "step": 29968
    },
    {
      "epoch": 0.00018291015625,
      "step": 29968,
      "training_step_time": 0.5119092464447021
    },
    {
      "epoch": 0.000182916259765625,
      "model_forward_time": 0.11532998085021973,
      "step": 29969
    },
    {
      "epoch": 0.000182916259765625,
      "step": 29969,
      "training_step_time": 0.4915580749511719
    },
    {
      "epoch": 0.00018292236328125,
      "grad_norm": 0.15071845054626465,
      "learning_rate": 5.4211352710852495e-05,
      "loss": 0.0417,
      "step": 29970
    },
    {
      "epoch": 0.00018292236328125,
      "model_forward_time": 0.11496138572692871,
      "step": 29970
    },
    {
      "epoch": 0.00018292236328125,
      "step": 29970,
      "training_step_time": 0.48500657081604004
    },
    {
      "epoch": 0.000182928466796875,
      "model_forward_time": 0.11429595947265625,
      "step": 29971
    },
    {
      "epoch": 0.000182928466796875,
      "step": 29971,
      "training_step_time": 0.39934873580932617
    },
    {
      "epoch": 0.0001829345703125,
      "model_forward_time": 0.11543488502502441,
      "step": 29972
    },
    {
      "epoch": 0.0001829345703125,
      "step": 29972,
      "training_step_time": 0.4013485908508301
    },
    {
      "epoch": 0.000182940673828125,
      "model_forward_time": 0.1144254207611084,
      "step": 29973
    },
    {
      "epoch": 0.000182940673828125,
      "step": 29973,
      "training_step_time": 0.4309971332550049
    },
    {
      "epoch": 0.00018294677734375,
      "model_forward_time": 0.11487078666687012,
      "step": 29974
    },
    {
      "epoch": 0.00018294677734375,
      "step": 29974,
      "training_step_time": 0.39022397994995117
    },
    {
      "epoch": 0.000182952880859375,
      "model_forward_time": 0.11496734619140625,
      "step": 29975
    },
    {
      "epoch": 0.000182952880859375,
      "step": 29975,
      "training_step_time": 0.3912546634674072
    },
    {
      "epoch": 0.000182958984375,
      "model_forward_time": 0.11590218544006348,
      "step": 29976
    },
    {
      "epoch": 0.000182958984375,
      "step": 29976,
      "training_step_time": 0.39317822456359863
    },
    {
      "epoch": 0.000182965087890625,
      "model_forward_time": 0.1146552562713623,
      "step": 29977
    },
    {
      "epoch": 0.000182965087890625,
      "step": 29977,
      "training_step_time": 0.403822660446167
    },
    {
      "epoch": 0.00018297119140625,
      "model_forward_time": 0.11568641662597656,
      "step": 29978
    },
    {
      "epoch": 0.00018297119140625,
      "step": 29978,
      "training_step_time": 0.4023776054382324
    },
    {
      "epoch": 0.000182977294921875,
      "model_forward_time": 0.11572504043579102,
      "step": 29979
    },
    {
      "epoch": 0.000182977294921875,
      "step": 29979,
      "training_step_time": 0.3919668197631836
    },
    {
      "epoch": 0.0001829833984375,
      "grad_norm": 0.1335233896970749,
      "learning_rate": 5.418389216661579e-05,
      "loss": 0.0431,
      "step": 29980
    },
    {
      "epoch": 0.0001829833984375,
      "model_forward_time": 0.1155862808227539,
      "step": 29980
    },
    {
      "epoch": 0.0001829833984375,
      "step": 29980,
      "training_step_time": 0.6130268573760986
    },
    {
      "epoch": 0.000182989501953125,
      "model_forward_time": 0.11570239067077637,
      "step": 29981
    },
    {
      "epoch": 0.000182989501953125,
      "step": 29981,
      "training_step_time": 0.3980989456176758
    },
    {
      "epoch": 0.00018299560546875,
      "model_forward_time": 0.11517047882080078,
      "step": 29982
    },
    {
      "epoch": 0.00018299560546875,
      "step": 29982,
      "training_step_time": 0.454845666885376
    },
    {
      "epoch": 0.000183001708984375,
      "model_forward_time": 0.11515378952026367,
      "step": 29983
    },
    {
      "epoch": 0.000183001708984375,
      "step": 29983,
      "training_step_time": 0.4390578269958496
    },
    {
      "epoch": 0.0001830078125,
      "model_forward_time": 0.11507940292358398,
      "step": 29984
    },
    {
      "epoch": 0.0001830078125,
      "step": 29984,
      "training_step_time": 0.4364185333251953
    },
    {
      "epoch": 0.000183013916015625,
      "model_forward_time": 0.11567187309265137,
      "step": 29985
    },
    {
      "epoch": 0.000183013916015625,
      "step": 29985,
      "training_step_time": 0.400784969329834
    },
    {
      "epoch": 0.00018302001953125,
      "model_forward_time": 0.1147451400756836,
      "step": 29986
    },
    {
      "epoch": 0.00018302001953125,
      "step": 29986,
      "training_step_time": 0.4953765869140625
    },
    {
      "epoch": 0.000183026123046875,
      "model_forward_time": 0.114776611328125,
      "step": 29987
    },
    {
      "epoch": 0.000183026123046875,
      "step": 29987,
      "training_step_time": 0.39483070373535156
    },
    {
      "epoch": 0.0001830322265625,
      "model_forward_time": 0.11546778678894043,
      "step": 29988
    },
    {
      "epoch": 0.0001830322265625,
      "step": 29988,
      "training_step_time": 0.4095478057861328
    },
    {
      "epoch": 0.000183038330078125,
      "model_forward_time": 0.11665821075439453,
      "step": 29989
    },
    {
      "epoch": 0.000183038330078125,
      "step": 29989,
      "training_step_time": 0.47672152519226074
    },
    {
      "epoch": 0.00018304443359375,
      "grad_norm": 0.13654831051826477,
      "learning_rate": 5.415643035142309e-05,
      "loss": 0.0389,
      "step": 29990
    },
    {
      "epoch": 0.00018304443359375,
      "model_forward_time": 0.1182701587677002,
      "step": 29990
    },
    {
      "epoch": 0.00018304443359375,
      "step": 29990,
      "training_step_time": 0.5705161094665527
    },
    {
      "epoch": 0.000183050537109375,
      "model_forward_time": 0.11968684196472168,
      "step": 29991
    },
    {
      "epoch": 0.000183050537109375,
      "step": 29991,
      "training_step_time": 0.6387419700622559
    },
    {
      "epoch": 0.000183056640625,
      "model_forward_time": 0.1213221549987793,
      "step": 29992
    },
    {
      "epoch": 0.000183056640625,
      "step": 29992,
      "training_step_time": 0.9779224395751953
    },
    {
      "epoch": 0.000183062744140625,
      "model_forward_time": 0.1210484504699707,
      "step": 29993
    },
    {
      "epoch": 0.000183062744140625,
      "step": 29993,
      "training_step_time": 0.7643203735351562
    },
    {
      "epoch": 0.00018306884765625,
      "model_forward_time": 0.11625409126281738,
      "step": 29994
    },
    {
      "epoch": 0.00018306884765625,
      "step": 29994,
      "training_step_time": 0.7276937961578369
    },
    {
      "epoch": 0.000183074951171875,
      "model_forward_time": 0.11645674705505371,
      "step": 29995
    },
    {
      "epoch": 0.000183074951171875,
      "step": 29995,
      "training_step_time": 0.6909959316253662
    },
    {
      "epoch": 0.0001830810546875,
      "model_forward_time": 0.12085247039794922,
      "step": 29996
    },
    {
      "epoch": 0.0001830810546875,
      "step": 29996,
      "training_step_time": 0.6934003829956055
    },
    {
      "epoch": 0.000183087158203125,
      "model_forward_time": 0.11844515800476074,
      "step": 29997
    },
    {
      "epoch": 0.000183087158203125,
      "step": 29997,
      "training_step_time": 0.7632198333740234
    },
    {
      "epoch": 0.00018309326171875,
      "model_forward_time": 0.12316322326660156,
      "step": 29998
    },
    {
      "epoch": 0.00018309326171875,
      "step": 29998,
      "training_step_time": 0.6238958835601807
    },
    {
      "epoch": 0.000183099365234375,
      "model_forward_time": 0.12728667259216309,
      "step": 29999
    },
    {
      "epoch": 0.000183099365234375,
      "step": 29999,
      "training_step_time": 0.6687626838684082
    },
    {
      "epoch": 0.00018310546875,
      "grad_norm": 0.1287834346294403,
      "learning_rate": 5.4128967273616625e-05,
      "loss": 0.04,
      "step": 30000
    },
    {
      "epoch": 0.00018310546875,
      "model_forward_time": 0.11583113670349121,
      "step": 30000
    },
    {
      "epoch": 0.00018310546875,
      "step": 30000,
      "training_step_time": 0.5043213367462158
    },
    {
      "epoch": 0.000183111572265625,
      "model_forward_time": 0.11525917053222656,
      "step": 30001
    },
    {
      "epoch": 0.000183111572265625,
      "step": 30001,
      "training_step_time": 0.6363389492034912
    },
    {
      "epoch": 0.00018311767578125,
      "model_forward_time": 0.11522626876831055,
      "step": 30002
    },
    {
      "epoch": 0.00018311767578125,
      "step": 30002,
      "training_step_time": 0.6405971050262451
    },
    {
      "epoch": 0.000183123779296875,
      "model_forward_time": 0.11879444122314453,
      "step": 30003
    },
    {
      "epoch": 0.000183123779296875,
      "step": 30003,
      "training_step_time": 0.7615482807159424
    },
    {
      "epoch": 0.0001831298828125,
      "model_forward_time": 0.11955761909484863,
      "step": 30004
    },
    {
      "epoch": 0.0001831298828125,
      "step": 30004,
      "training_step_time": 0.7458641529083252
    },
    {
      "epoch": 0.000183135986328125,
      "model_forward_time": 0.12024474143981934,
      "step": 30005
    },
    {
      "epoch": 0.000183135986328125,
      "step": 30005,
      "training_step_time": 0.6266860961914062
    },
    {
      "epoch": 0.00018314208984375,
      "model_forward_time": 0.11819839477539062,
      "step": 30006
    },
    {
      "epoch": 0.00018314208984375,
      "step": 30006,
      "training_step_time": 0.6559848785400391
    },
    {
      "epoch": 0.000183148193359375,
      "model_forward_time": 0.12105393409729004,
      "step": 30007
    },
    {
      "epoch": 0.000183148193359375,
      "step": 30007,
      "training_step_time": 0.6906924247741699
    },
    {
      "epoch": 0.000183154296875,
      "model_forward_time": 0.11957430839538574,
      "step": 30008
    },
    {
      "epoch": 0.000183154296875,
      "step": 30008,
      "training_step_time": 0.6807117462158203
    },
    {
      "epoch": 0.000183160400390625,
      "model_forward_time": 0.12868523597717285,
      "step": 30009
    },
    {
      "epoch": 0.000183160400390625,
      "step": 30009,
      "training_step_time": 0.6550989151000977
    },
    {
      "epoch": 0.00018316650390625,
      "grad_norm": 0.14266428351402283,
      "learning_rate": 5.4101502941538896e-05,
      "loss": 0.052,
      "step": 30010
    },
    {
      "epoch": 0.00018316650390625,
      "model_forward_time": 0.11980366706848145,
      "step": 30010
    },
    {
      "epoch": 0.00018316650390625,
      "step": 30010,
      "training_step_time": 0.6818883419036865
    },
    {
      "epoch": 0.000183172607421875,
      "model_forward_time": 0.12001609802246094,
      "step": 30011
    },
    {
      "epoch": 0.000183172607421875,
      "step": 30011,
      "training_step_time": 0.6502354145050049
    },
    {
      "epoch": 0.0001831787109375,
      "model_forward_time": 0.1182553768157959,
      "step": 30012
    },
    {
      "epoch": 0.0001831787109375,
      "step": 30012,
      "training_step_time": 0.7335052490234375
    },
    {
      "epoch": 0.000183184814453125,
      "model_forward_time": 0.11858820915222168,
      "step": 30013
    },
    {
      "epoch": 0.000183184814453125,
      "step": 30013,
      "training_step_time": 0.7198176383972168
    },
    {
      "epoch": 0.00018319091796875,
      "model_forward_time": 0.11900949478149414,
      "step": 30014
    },
    {
      "epoch": 0.00018319091796875,
      "step": 30014,
      "training_step_time": 0.6670167446136475
    },
    {
      "epoch": 0.000183197021484375,
      "model_forward_time": 0.11949801445007324,
      "step": 30015
    },
    {
      "epoch": 0.000183197021484375,
      "step": 30015,
      "training_step_time": 0.6918559074401855
    },
    {
      "epoch": 0.000183203125,
      "model_forward_time": 0.12146759033203125,
      "step": 30016
    },
    {
      "epoch": 0.000183203125,
      "step": 30016,
      "training_step_time": 0.6192152500152588
    },
    {
      "epoch": 0.000183209228515625,
      "model_forward_time": 0.12257766723632812,
      "step": 30017
    },
    {
      "epoch": 0.000183209228515625,
      "step": 30017,
      "training_step_time": 0.6733875274658203
    },
    {
      "epoch": 0.00018321533203125,
      "model_forward_time": 0.11828374862670898,
      "step": 30018
    },
    {
      "epoch": 0.00018321533203125,
      "step": 30018,
      "training_step_time": 0.7148010730743408
    },
    {
      "epoch": 0.000183221435546875,
      "model_forward_time": 0.13142132759094238,
      "step": 30019
    },
    {
      "epoch": 0.000183221435546875,
      "step": 30019,
      "training_step_time": 0.7130954265594482
    },
    {
      "epoch": 0.0001832275390625,
      "grad_norm": 0.14952774345874786,
      "learning_rate": 5.407403736353288e-05,
      "loss": 0.0515,
      "step": 30020
    },
    {
      "epoch": 0.0001832275390625,
      "model_forward_time": 0.11708521842956543,
      "step": 30020
    },
    {
      "epoch": 0.0001832275390625,
      "step": 30020,
      "training_step_time": 0.8123021125793457
    },
    {
      "epoch": 0.000183233642578125,
      "model_forward_time": 0.12312507629394531,
      "step": 30021
    },
    {
      "epoch": 0.000183233642578125,
      "step": 30021,
      "training_step_time": 0.7235543727874756
    },
    {
      "epoch": 0.00018323974609375,
      "model_forward_time": 0.12308287620544434,
      "step": 30022
    },
    {
      "epoch": 0.00018323974609375,
      "step": 30022,
      "training_step_time": 0.6927738189697266
    },
    {
      "epoch": 0.000183245849609375,
      "model_forward_time": 0.11896944046020508,
      "step": 30023
    },
    {
      "epoch": 0.000183245849609375,
      "step": 30023,
      "training_step_time": 0.6530923843383789
    },
    {
      "epoch": 0.000183251953125,
      "model_forward_time": 0.12065386772155762,
      "step": 30024
    },
    {
      "epoch": 0.000183251953125,
      "step": 30024,
      "training_step_time": 0.6781015396118164
    },
    {
      "epoch": 0.000183258056640625,
      "model_forward_time": 0.11615347862243652,
      "step": 30025
    },
    {
      "epoch": 0.000183258056640625,
      "step": 30025,
      "training_step_time": 0.6807036399841309
    },
    {
      "epoch": 0.00018326416015625,
      "model_forward_time": 0.1206510066986084,
      "step": 30026
    },
    {
      "epoch": 0.00018326416015625,
      "step": 30026,
      "training_step_time": 0.6412529945373535
    },
    {
      "epoch": 0.000183270263671875,
      "model_forward_time": 0.12362933158874512,
      "step": 30027
    },
    {
      "epoch": 0.000183270263671875,
      "step": 30027,
      "training_step_time": 0.6657314300537109
    },
    {
      "epoch": 0.0001832763671875,
      "model_forward_time": 0.12103533744812012,
      "step": 30028
    },
    {
      "epoch": 0.0001832763671875,
      "step": 30028,
      "training_step_time": 0.7311103343963623
    },
    {
      "epoch": 0.000183282470703125,
      "model_forward_time": 0.12441468238830566,
      "step": 30029
    },
    {
      "epoch": 0.000183282470703125,
      "step": 30029,
      "training_step_time": 0.6674942970275879
    },
    {
      "epoch": 0.00018328857421875,
      "grad_norm": 0.09914840757846832,
      "learning_rate": 5.404657054794189e-05,
      "loss": 0.0524,
      "step": 30030
    },
    {
      "epoch": 0.00018328857421875,
      "model_forward_time": 0.12262511253356934,
      "step": 30030
    },
    {
      "epoch": 0.00018328857421875,
      "step": 30030,
      "training_step_time": 0.708019495010376
    },
    {
      "epoch": 0.000183294677734375,
      "model_forward_time": 0.12024545669555664,
      "step": 30031
    },
    {
      "epoch": 0.000183294677734375,
      "step": 30031,
      "training_step_time": 0.6291897296905518
    },
    {
      "epoch": 0.00018330078125,
      "model_forward_time": 0.12277460098266602,
      "step": 30032
    },
    {
      "epoch": 0.00018330078125,
      "step": 30032,
      "training_step_time": 0.6826074123382568
    },
    {
      "epoch": 0.000183306884765625,
      "model_forward_time": 0.11876392364501953,
      "step": 30033
    },
    {
      "epoch": 0.000183306884765625,
      "step": 30033,
      "training_step_time": 0.6574389934539795
    },
    {
      "epoch": 0.00018331298828125,
      "model_forward_time": 0.12015652656555176,
      "step": 30034
    },
    {
      "epoch": 0.00018331298828125,
      "step": 30034,
      "training_step_time": 0.6721341609954834
    },
    {
      "epoch": 0.000183319091796875,
      "model_forward_time": 0.1236109733581543,
      "step": 30035
    },
    {
      "epoch": 0.000183319091796875,
      "step": 30035,
      "training_step_time": 0.6454992294311523
    },
    {
      "epoch": 0.0001833251953125,
      "model_forward_time": 0.11961555480957031,
      "step": 30036
    },
    {
      "epoch": 0.0001833251953125,
      "step": 30036,
      "training_step_time": 0.6378562450408936
    },
    {
      "epoch": 0.000183331298828125,
      "model_forward_time": 0.12907719612121582,
      "step": 30037
    },
    {
      "epoch": 0.000183331298828125,
      "step": 30037,
      "training_step_time": 0.6094427108764648
    },
    {
      "epoch": 0.00018333740234375,
      "model_forward_time": 0.12449288368225098,
      "step": 30038
    },
    {
      "epoch": 0.00018333740234375,
      "step": 30038,
      "training_step_time": 0.6166470050811768
    },
    {
      "epoch": 0.000183343505859375,
      "model_forward_time": 0.11922359466552734,
      "step": 30039
    },
    {
      "epoch": 0.000183343505859375,
      "step": 30039,
      "training_step_time": 0.7052421569824219
    },
    {
      "epoch": 0.000183349609375,
      "grad_norm": 0.1146458238363266,
      "learning_rate": 5.401910250310961e-05,
      "loss": 0.0529,
      "step": 30040
    },
    {
      "epoch": 0.000183349609375,
      "model_forward_time": 0.12161087989807129,
      "step": 30040
    },
    {
      "epoch": 0.000183349609375,
      "step": 30040,
      "training_step_time": 0.6896426677703857
    },
    {
      "epoch": 0.000183355712890625,
      "model_forward_time": 0.1235198974609375,
      "step": 30041
    },
    {
      "epoch": 0.000183355712890625,
      "step": 30041,
      "training_step_time": 0.670644998550415
    },
    {
      "epoch": 0.00018336181640625,
      "model_forward_time": 0.11562967300415039,
      "step": 30042
    },
    {
      "epoch": 0.00018336181640625,
      "step": 30042,
      "training_step_time": 0.6627457141876221
    },
    {
      "epoch": 0.000183367919921875,
      "model_forward_time": 0.1178140640258789,
      "step": 30043
    },
    {
      "epoch": 0.000183367919921875,
      "step": 30043,
      "training_step_time": 0.5248584747314453
    },
    {
      "epoch": 0.0001833740234375,
      "model_forward_time": 0.11697769165039062,
      "step": 30044
    },
    {
      "epoch": 0.0001833740234375,
      "step": 30044,
      "training_step_time": 0.5464861392974854
    },
    {
      "epoch": 0.000183380126953125,
      "model_forward_time": 0.11864328384399414,
      "step": 30045
    },
    {
      "epoch": 0.000183380126953125,
      "step": 30045,
      "training_step_time": 0.5290517807006836
    },
    {
      "epoch": 0.00018338623046875,
      "model_forward_time": 0.1167440414428711,
      "step": 30046
    },
    {
      "epoch": 0.00018338623046875,
      "step": 30046,
      "training_step_time": 0.5064361095428467
    },
    {
      "epoch": 0.000183392333984375,
      "model_forward_time": 0.11735868453979492,
      "step": 30047
    },
    {
      "epoch": 0.000183392333984375,
      "step": 30047,
      "training_step_time": 0.4391818046569824
    },
    {
      "epoch": 0.0001833984375,
      "model_forward_time": 0.11716389656066895,
      "step": 30048
    },
    {
      "epoch": 0.0001833984375,
      "step": 30048,
      "training_step_time": 0.4408743381500244
    },
    {
      "epoch": 0.000183404541015625,
      "model_forward_time": 0.11612319946289062,
      "step": 30049
    },
    {
      "epoch": 0.000183404541015625,
      "step": 30049,
      "training_step_time": 0.4615468978881836
    },
    {
      "epoch": 0.00018341064453125,
      "grad_norm": 0.16278934478759766,
      "learning_rate": 5.39916332373801e-05,
      "loss": 0.048,
      "step": 30050
    },
    {
      "epoch": 0.00018341064453125,
      "model_forward_time": 0.11536526679992676,
      "step": 30050
    },
    {
      "epoch": 0.00018341064453125,
      "step": 30050,
      "training_step_time": 0.4970133304595947
    },
    {
      "epoch": 0.000183416748046875,
      "model_forward_time": 0.11519575119018555,
      "step": 30051
    },
    {
      "epoch": 0.000183416748046875,
      "step": 30051,
      "training_step_time": 0.4267401695251465
    },
    {
      "epoch": 0.0001834228515625,
      "model_forward_time": 0.11649417877197266,
      "step": 30052
    },
    {
      "epoch": 0.0001834228515625,
      "step": 30052,
      "training_step_time": 0.3867957592010498
    },
    {
      "epoch": 0.000183428955078125,
      "model_forward_time": 0.11550474166870117,
      "step": 30053
    },
    {
      "epoch": 0.000183428955078125,
      "step": 30053,
      "training_step_time": 0.4375143051147461
    },
    {
      "epoch": 0.00018343505859375,
      "model_forward_time": 0.11535930633544922,
      "step": 30054
    },
    {
      "epoch": 0.00018343505859375,
      "step": 30054,
      "training_step_time": 0.4315338134765625
    },
    {
      "epoch": 0.000183441162109375,
      "model_forward_time": 0.11556148529052734,
      "step": 30055
    },
    {
      "epoch": 0.000183441162109375,
      "step": 30055,
      "training_step_time": 0.399975061416626
    },
    {
      "epoch": 0.000183447265625,
      "model_forward_time": 0.11508870124816895,
      "step": 30056
    },
    {
      "epoch": 0.000183447265625,
      "step": 30056,
      "training_step_time": 0.39456844329833984
    },
    {
      "epoch": 0.000183453369140625,
      "model_forward_time": 0.11645269393920898,
      "step": 30057
    },
    {
      "epoch": 0.000183453369140625,
      "step": 30057,
      "training_step_time": 0.4121110439300537
    },
    {
      "epoch": 0.00018345947265625,
      "model_forward_time": 0.11464309692382812,
      "step": 30058
    },
    {
      "epoch": 0.00018345947265625,
      "step": 30058,
      "training_step_time": 0.4075930118560791
    },
    {
      "epoch": 0.000183465576171875,
      "model_forward_time": 0.11511707305908203,
      "step": 30059
    },
    {
      "epoch": 0.000183465576171875,
      "step": 30059,
      "training_step_time": 0.42322230339050293
    },
    {
      "epoch": 0.0001834716796875,
      "grad_norm": 0.15138790011405945,
      "learning_rate": 5.396416275909779e-05,
      "loss": 0.0484,
      "step": 30060
    },
    {
      "epoch": 0.0001834716796875,
      "model_forward_time": 0.1146090030670166,
      "step": 30060
    },
    {
      "epoch": 0.0001834716796875,
      "step": 30060,
      "training_step_time": 0.3993372917175293
    },
    {
      "epoch": 0.000183477783203125,
      "model_forward_time": 0.11493420600891113,
      "step": 30061
    },
    {
      "epoch": 0.000183477783203125,
      "step": 30061,
      "training_step_time": 0.3857600688934326
    },
    {
      "epoch": 0.00018348388671875,
      "model_forward_time": 0.11500954627990723,
      "step": 30062
    },
    {
      "epoch": 0.00018348388671875,
      "step": 30062,
      "training_step_time": 0.39931178092956543
    },
    {
      "epoch": 0.000183489990234375,
      "model_forward_time": 0.11547684669494629,
      "step": 30063
    },
    {
      "epoch": 0.000183489990234375,
      "step": 30063,
      "training_step_time": 0.4083831310272217
    },
    {
      "epoch": 0.00018349609375,
      "model_forward_time": 0.11537766456604004,
      "step": 30064
    },
    {
      "epoch": 0.00018349609375,
      "step": 30064,
      "training_step_time": 0.5177321434020996
    },
    {
      "epoch": 0.000183502197265625,
      "model_forward_time": 0.11497664451599121,
      "step": 30065
    },
    {
      "epoch": 0.000183502197265625,
      "step": 30065,
      "training_step_time": 0.4405810832977295
    },
    {
      "epoch": 0.00018350830078125,
      "model_forward_time": 0.11629939079284668,
      "step": 30066
    },
    {
      "epoch": 0.00018350830078125,
      "step": 30066,
      "training_step_time": 0.4891016483306885
    },
    {
      "epoch": 0.000183514404296875,
      "model_forward_time": 0.11624503135681152,
      "step": 30067
    },
    {
      "epoch": 0.000183514404296875,
      "step": 30067,
      "training_step_time": 0.45049142837524414
    },
    {
      "epoch": 0.0001835205078125,
      "model_forward_time": 0.11508655548095703,
      "step": 30068
    },
    {
      "epoch": 0.0001835205078125,
      "step": 30068,
      "training_step_time": 0.42238473892211914
    },
    {
      "epoch": 0.000183526611328125,
      "model_forward_time": 0.11469388008117676,
      "step": 30069
    },
    {
      "epoch": 0.000183526611328125,
      "step": 30069,
      "training_step_time": 0.4878726005554199
    },
    {
      "epoch": 0.00018353271484375,
      "grad_norm": 0.15573060512542725,
      "learning_rate": 5.393669107660753e-05,
      "loss": 0.0474,
      "step": 30070
    },
    {
      "epoch": 0.00018353271484375,
      "model_forward_time": 0.11495828628540039,
      "step": 30070
    },
    {
      "epoch": 0.00018353271484375,
      "step": 30070,
      "training_step_time": 0.3917689323425293
    },
    {
      "epoch": 0.000183538818359375,
      "model_forward_time": 0.1144709587097168,
      "step": 30071
    },
    {
      "epoch": 0.000183538818359375,
      "step": 30071,
      "training_step_time": 0.396925687789917
    },
    {
      "epoch": 0.000183544921875,
      "model_forward_time": 0.11474442481994629,
      "step": 30072
    },
    {
      "epoch": 0.000183544921875,
      "step": 30072,
      "training_step_time": 0.4065675735473633
    },
    {
      "epoch": 0.000183551025390625,
      "model_forward_time": 0.11515116691589355,
      "step": 30073
    },
    {
      "epoch": 0.000183551025390625,
      "step": 30073,
      "training_step_time": 0.4101128578186035
    },
    {
      "epoch": 0.00018355712890625,
      "model_forward_time": 0.11700010299682617,
      "step": 30074
    },
    {
      "epoch": 0.00018355712890625,
      "step": 30074,
      "training_step_time": 0.40828895568847656
    },
    {
      "epoch": 0.000183563232421875,
      "model_forward_time": 0.11534786224365234,
      "step": 30075
    },
    {
      "epoch": 0.000183563232421875,
      "step": 30075,
      "training_step_time": 0.3852267265319824
    },
    {
      "epoch": 0.0001835693359375,
      "model_forward_time": 0.11541509628295898,
      "step": 30076
    },
    {
      "epoch": 0.0001835693359375,
      "step": 30076,
      "training_step_time": 0.39905309677124023
    },
    {
      "epoch": 0.000183575439453125,
      "model_forward_time": 0.11495494842529297,
      "step": 30077
    },
    {
      "epoch": 0.000183575439453125,
      "step": 30077,
      "training_step_time": 0.3851029872894287
    },
    {
      "epoch": 0.00018358154296875,
      "model_forward_time": 0.11531257629394531,
      "step": 30078
    },
    {
      "epoch": 0.00018358154296875,
      "step": 30078,
      "training_step_time": 0.40421557426452637
    },
    {
      "epoch": 0.000183587646484375,
      "model_forward_time": 0.11575698852539062,
      "step": 30079
    },
    {
      "epoch": 0.000183587646484375,
      "step": 30079,
      "training_step_time": 0.44242382049560547
    },
    {
      "epoch": 0.00018359375,
      "grad_norm": 0.14078417420387268,
      "learning_rate": 5.390921819825445e-05,
      "loss": 0.0469,
      "step": 30080
    },
    {
      "epoch": 0.00018359375,
      "model_forward_time": 0.11536192893981934,
      "step": 30080
    },
    {
      "epoch": 0.00018359375,
      "step": 30080,
      "training_step_time": 0.515345573425293
    },
    {
      "epoch": 0.000183599853515625,
      "model_forward_time": 0.11561393737792969,
      "step": 30081
    },
    {
      "epoch": 0.000183599853515625,
      "step": 30081,
      "training_step_time": 0.38454151153564453
    },
    {
      "epoch": 0.00018360595703125,
      "model_forward_time": 0.11505937576293945,
      "step": 30082
    },
    {
      "epoch": 0.00018360595703125,
      "step": 30082,
      "training_step_time": 0.5155806541442871
    },
    {
      "epoch": 0.000183612060546875,
      "model_forward_time": 0.11466097831726074,
      "step": 30083
    },
    {
      "epoch": 0.000183612060546875,
      "step": 30083,
      "training_step_time": 0.5067110061645508
    },
    {
      "epoch": 0.0001836181640625,
      "model_forward_time": 0.11663269996643066,
      "step": 30084
    },
    {
      "epoch": 0.0001836181640625,
      "step": 30084,
      "training_step_time": 0.3889336585998535
    },
    {
      "epoch": 0.000183624267578125,
      "model_forward_time": 0.11769771575927734,
      "step": 30085
    },
    {
      "epoch": 0.000183624267578125,
      "step": 30085,
      "training_step_time": 0.37849903106689453
    },
    {
      "epoch": 0.00018363037109375,
      "model_forward_time": 0.11829471588134766,
      "step": 30086
    },
    {
      "epoch": 0.00018363037109375,
      "step": 30086,
      "training_step_time": 0.3826429843902588
    },
    {
      "epoch": 0.000183636474609375,
      "model_forward_time": 0.11823749542236328,
      "step": 30087
    },
    {
      "epoch": 0.000183636474609375,
      "step": 30087,
      "training_step_time": 0.5942845344543457
    },
    {
      "epoch": 0.000183642578125,
      "model_forward_time": 0.11612486839294434,
      "step": 30088
    },
    {
      "epoch": 0.000183642578125,
      "step": 30088,
      "training_step_time": 0.37563228607177734
    },
    {
      "epoch": 0.000183648681640625,
      "model_forward_time": 0.1149449348449707,
      "step": 30089
    },
    {
      "epoch": 0.000183648681640625,
      "step": 30089,
      "training_step_time": 0.3949568271636963
    },
    {
      "epoch": 0.00018365478515625,
      "grad_norm": 0.13703034818172455,
      "learning_rate": 5.3881744132384104e-05,
      "loss": 0.0487,
      "step": 30090
    },
    {
      "epoch": 0.00018365478515625,
      "model_forward_time": 0.11490178108215332,
      "step": 30090
    },
    {
      "epoch": 0.00018365478515625,
      "step": 30090,
      "training_step_time": 0.40105319023132324
    },
    {
      "epoch": 0.000183660888671875,
      "model_forward_time": 0.11471748352050781,
      "step": 30091
    },
    {
      "epoch": 0.000183660888671875,
      "step": 30091,
      "training_step_time": 0.4478890895843506
    },
    {
      "epoch": 0.0001836669921875,
      "model_forward_time": 0.11460447311401367,
      "step": 30092
    },
    {
      "epoch": 0.0001836669921875,
      "step": 30092,
      "training_step_time": 0.4302492141723633
    },
    {
      "epoch": 0.000183673095703125,
      "model_forward_time": 0.11472868919372559,
      "step": 30093
    },
    {
      "epoch": 0.000183673095703125,
      "step": 30093,
      "training_step_time": 1.0027692317962646
    },
    {
      "epoch": 0.00018367919921875,
      "model_forward_time": 0.11464858055114746,
      "step": 30094
    },
    {
      "epoch": 0.00018367919921875,
      "step": 30094,
      "training_step_time": 0.4666581153869629
    },
    {
      "epoch": 0.000183685302734375,
      "model_forward_time": 0.11388254165649414,
      "step": 30095
    },
    {
      "epoch": 0.000183685302734375,
      "step": 30095,
      "training_step_time": 0.4974503517150879
    },
    {
      "epoch": 0.00018369140625,
      "model_forward_time": 0.11458134651184082,
      "step": 30096
    },
    {
      "epoch": 0.00018369140625,
      "step": 30096,
      "training_step_time": 0.4787569046020508
    },
    {
      "epoch": 0.000183697509765625,
      "model_forward_time": 0.11396384239196777,
      "step": 30097
    },
    {
      "epoch": 0.000183697509765625,
      "step": 30097,
      "training_step_time": 0.3826580047607422
    },
    {
      "epoch": 0.00018370361328125,
      "model_forward_time": 0.11461186408996582,
      "step": 30098
    },
    {
      "epoch": 0.00018370361328125,
      "step": 30098,
      "training_step_time": 0.3783538341522217
    },
    {
      "epoch": 0.000183709716796875,
      "model_forward_time": 0.11461687088012695,
      "step": 30099
    },
    {
      "epoch": 0.000183709716796875,
      "step": 30099,
      "training_step_time": 0.42472052574157715
    },
    {
      "epoch": 0.0001837158203125,
      "grad_norm": 0.09315164387226105,
      "learning_rate": 5.3854268887342374e-05,
      "loss": 0.0491,
      "step": 30100
    },
    {
      "epoch": 0.0001837158203125,
      "model_forward_time": 0.11421990394592285,
      "step": 30100
    },
    {
      "epoch": 0.0001837158203125,
      "step": 30100,
      "training_step_time": 0.3901035785675049
    },
    {
      "epoch": 0.000183721923828125,
      "model_forward_time": 0.11513066291809082,
      "step": 30101
    },
    {
      "epoch": 0.000183721923828125,
      "step": 30101,
      "training_step_time": 0.3851954936981201
    },
    {
      "epoch": 0.00018372802734375,
      "model_forward_time": 0.11522650718688965,
      "step": 30102
    },
    {
      "epoch": 0.00018372802734375,
      "step": 30102,
      "training_step_time": 0.4001758098602295
    },
    {
      "epoch": 0.000183734130859375,
      "model_forward_time": 0.11535859107971191,
      "step": 30103
    },
    {
      "epoch": 0.000183734130859375,
      "step": 30103,
      "training_step_time": 0.4141104221343994
    },
    {
      "epoch": 0.000183740234375,
      "model_forward_time": 0.11602210998535156,
      "step": 30104
    },
    {
      "epoch": 0.000183740234375,
      "step": 30104,
      "training_step_time": 0.41081857681274414
    },
    {
      "epoch": 0.000183746337890625,
      "model_forward_time": 0.11533713340759277,
      "step": 30105
    },
    {
      "epoch": 0.000183746337890625,
      "step": 30105,
      "training_step_time": 0.9681301116943359
    },
    {
      "epoch": 0.00018375244140625,
      "model_forward_time": 0.1150970458984375,
      "step": 30106
    },
    {
      "epoch": 0.00018375244140625,
      "step": 30106,
      "training_step_time": 0.4369850158691406
    },
    {
      "epoch": 0.000183758544921875,
      "model_forward_time": 0.11387944221496582,
      "step": 30107
    },
    {
      "epoch": 0.000183758544921875,
      "step": 30107,
      "training_step_time": 0.4058413505554199
    },
    {
      "epoch": 0.0001837646484375,
      "model_forward_time": 0.11396241188049316,
      "step": 30108
    },
    {
      "epoch": 0.0001837646484375,
      "step": 30108,
      "training_step_time": 0.465787410736084
    },
    {
      "epoch": 0.000183770751953125,
      "model_forward_time": 0.11376714706420898,
      "step": 30109
    },
    {
      "epoch": 0.000183770751953125,
      "step": 30109,
      "training_step_time": 0.44278645515441895
    },
    {
      "epoch": 0.00018377685546875,
      "grad_norm": 0.13942158222198486,
      "learning_rate": 5.3826792471475516e-05,
      "loss": 0.049,
      "step": 30110
    },
    {
      "epoch": 0.00018377685546875,
      "model_forward_time": 0.11466026306152344,
      "step": 30110
    },
    {
      "epoch": 0.00018377685546875,
      "step": 30110,
      "training_step_time": 0.4677734375
    },
    {
      "epoch": 0.000183782958984375,
      "model_forward_time": 0.11508846282958984,
      "step": 30111
    },
    {
      "epoch": 0.000183782958984375,
      "step": 30111,
      "training_step_time": 0.5459489822387695
    },
    {
      "epoch": 0.0001837890625,
      "model_forward_time": 0.11444401741027832,
      "step": 30112
    },
    {
      "epoch": 0.0001837890625,
      "step": 30112,
      "training_step_time": 0.38341760635375977
    },
    {
      "epoch": 0.000183795166015625,
      "model_forward_time": 0.11460566520690918,
      "step": 30113
    },
    {
      "epoch": 0.000183795166015625,
      "step": 30113,
      "training_step_time": 0.38842058181762695
    },
    {
      "epoch": 0.00018380126953125,
      "model_forward_time": 0.11520028114318848,
      "step": 30114
    },
    {
      "epoch": 0.00018380126953125,
      "step": 30114,
      "training_step_time": 0.38825011253356934
    },
    {
      "epoch": 0.000183807373046875,
      "model_forward_time": 0.1148378849029541,
      "step": 30115
    },
    {
      "epoch": 0.000183807373046875,
      "step": 30115,
      "training_step_time": 0.4326663017272949
    },
    {
      "epoch": 0.0001838134765625,
      "model_forward_time": 0.11503887176513672,
      "step": 30116
    },
    {
      "epoch": 0.0001838134765625,
      "step": 30116,
      "training_step_time": 0.3944563865661621
    },
    {
      "epoch": 0.000183819580078125,
      "model_forward_time": 0.11543607711791992,
      "step": 30117
    },
    {
      "epoch": 0.000183819580078125,
      "step": 30117,
      "training_step_time": 0.9494495391845703
    },
    {
      "epoch": 0.00018382568359375,
      "model_forward_time": 0.11503314971923828,
      "step": 30118
    },
    {
      "epoch": 0.00018382568359375,
      "step": 30118,
      "training_step_time": 0.4665052890777588
    },
    {
      "epoch": 0.000183831787109375,
      "model_forward_time": 0.1144874095916748,
      "step": 30119
    },
    {
      "epoch": 0.000183831787109375,
      "step": 30119,
      "training_step_time": 0.38467931747436523
    },
    {
      "epoch": 0.000183837890625,
      "grad_norm": 0.09883181750774384,
      "learning_rate": 5.379931489313016e-05,
      "loss": 0.0474,
      "step": 30120
    },
    {
      "epoch": 0.000183837890625,
      "model_forward_time": 0.11433744430541992,
      "step": 30120
    },
    {
      "epoch": 0.000183837890625,
      "step": 30120,
      "training_step_time": 0.40524768829345703
    },
    {
      "epoch": 0.000183843994140625,
      "model_forward_time": 0.11587977409362793,
      "step": 30121
    },
    {
      "epoch": 0.000183843994140625,
      "step": 30121,
      "training_step_time": 0.41240835189819336
    },
    {
      "epoch": 0.00018385009765625,
      "model_forward_time": 0.11466383934020996,
      "step": 30122
    },
    {
      "epoch": 0.00018385009765625,
      "step": 30122,
      "training_step_time": 0.45749521255493164
    },
    {
      "epoch": 0.000183856201171875,
      "model_forward_time": 0.11495637893676758,
      "step": 30123
    },
    {
      "epoch": 0.000183856201171875,
      "step": 30123,
      "training_step_time": 0.8014717102050781
    },
    {
      "epoch": 0.0001838623046875,
      "model_forward_time": 0.11427140235900879,
      "step": 30124
    },
    {
      "epoch": 0.0001838623046875,
      "step": 30124,
      "training_step_time": 0.3857107162475586
    },
    {
      "epoch": 0.000183868408203125,
      "model_forward_time": 0.11487603187561035,
      "step": 30125
    },
    {
      "epoch": 0.000183868408203125,
      "step": 30125,
      "training_step_time": 0.3837907314300537
    },
    {
      "epoch": 0.00018387451171875,
      "model_forward_time": 0.11375570297241211,
      "step": 30126
    },
    {
      "epoch": 0.00018387451171875,
      "step": 30126,
      "training_step_time": 0.3896191120147705
    },
    {
      "epoch": 0.000183880615234375,
      "model_forward_time": 0.11437106132507324,
      "step": 30127
    },
    {
      "epoch": 0.000183880615234375,
      "step": 30127,
      "training_step_time": 0.3971681594848633
    },
    {
      "epoch": 0.00018388671875,
      "model_forward_time": 0.11438107490539551,
      "step": 30128
    },
    {
      "epoch": 0.00018388671875,
      "step": 30128,
      "training_step_time": 0.422283411026001
    },
    {
      "epoch": 0.000183892822265625,
      "model_forward_time": 0.11469531059265137,
      "step": 30129
    },
    {
      "epoch": 0.000183892822265625,
      "step": 30129,
      "training_step_time": 0.9662153720855713
    },
    {
      "epoch": 0.00018389892578125,
      "grad_norm": 0.1874341517686844,
      "learning_rate": 5.3771836160653254e-05,
      "loss": 0.0511,
      "step": 30130
    },
    {
      "epoch": 0.00018389892578125,
      "model_forward_time": 0.1141057014465332,
      "step": 30130
    },
    {
      "epoch": 0.00018389892578125,
      "step": 30130,
      "training_step_time": 0.39820218086242676
    },
    {
      "epoch": 0.000183905029296875,
      "model_forward_time": 0.11404299736022949,
      "step": 30131
    },
    {
      "epoch": 0.000183905029296875,
      "step": 30131,
      "training_step_time": 0.4114205837249756
    },
    {
      "epoch": 0.0001839111328125,
      "model_forward_time": 0.11469697952270508,
      "step": 30132
    },
    {
      "epoch": 0.0001839111328125,
      "step": 30132,
      "training_step_time": 0.4164447784423828
    },
    {
      "epoch": 0.000183917236328125,
      "model_forward_time": 0.1143031120300293,
      "step": 30133
    },
    {
      "epoch": 0.000183917236328125,
      "step": 30133,
      "training_step_time": 0.4466438293457031
    },
    {
      "epoch": 0.00018392333984375,
      "model_forward_time": 0.11380982398986816,
      "step": 30134
    },
    {
      "epoch": 0.00018392333984375,
      "step": 30134,
      "training_step_time": 0.4184761047363281
    },
    {
      "epoch": 0.000183929443359375,
      "model_forward_time": 0.11519861221313477,
      "step": 30135
    },
    {
      "epoch": 0.000183929443359375,
      "step": 30135,
      "training_step_time": 0.649284839630127
    },
    {
      "epoch": 0.000183935546875,
      "model_forward_time": 0.11436796188354492,
      "step": 30136
    },
    {
      "epoch": 0.000183935546875,
      "step": 30136,
      "training_step_time": 0.38218259811401367
    },
    {
      "epoch": 0.000183941650390625,
      "model_forward_time": 0.11557745933532715,
      "step": 30137
    },
    {
      "epoch": 0.000183941650390625,
      "step": 30137,
      "training_step_time": 0.38311028480529785
    },
    {
      "epoch": 0.00018394775390625,
      "model_forward_time": 0.11528205871582031,
      "step": 30138
    },
    {
      "epoch": 0.00018394775390625,
      "step": 30138,
      "training_step_time": 0.3841545581817627
    },
    {
      "epoch": 0.000183953857421875,
      "model_forward_time": 0.11460638046264648,
      "step": 30139
    },
    {
      "epoch": 0.000183953857421875,
      "step": 30139,
      "training_step_time": 0.4249124526977539
    },
    {
      "epoch": 0.0001839599609375,
      "grad_norm": 0.13512066006660461,
      "learning_rate": 5.3744356282392104e-05,
      "loss": 0.0503,
      "step": 30140
    },
    {
      "epoch": 0.0001839599609375,
      "model_forward_time": 0.11500144004821777,
      "step": 30140
    },
    {
      "epoch": 0.0001839599609375,
      "step": 30140,
      "training_step_time": 0.38078904151916504
    },
    {
      "epoch": 0.000183966064453125,
      "model_forward_time": 0.11486172676086426,
      "step": 30141
    },
    {
      "epoch": 0.000183966064453125,
      "step": 30141,
      "training_step_time": 0.49103498458862305
    },
    {
      "epoch": 0.00018397216796875,
      "model_forward_time": 0.11531329154968262,
      "step": 30142
    },
    {
      "epoch": 0.00018397216796875,
      "step": 30142,
      "training_step_time": 0.3837299346923828
    },
    {
      "epoch": 0.000183978271484375,
      "model_forward_time": 0.11528968811035156,
      "step": 30143
    },
    {
      "epoch": 0.000183978271484375,
      "step": 30143,
      "training_step_time": 0.423659086227417
    },
    {
      "epoch": 0.000183984375,
      "model_forward_time": 0.1147606372833252,
      "step": 30144
    },
    {
      "epoch": 0.000183984375,
      "step": 30144,
      "training_step_time": 0.39360904693603516
    },
    {
      "epoch": 0.000183990478515625,
      "model_forward_time": 0.11549663543701172,
      "step": 30145
    },
    {
      "epoch": 0.000183990478515625,
      "step": 30145,
      "training_step_time": 0.46308374404907227
    },
    {
      "epoch": 0.00018399658203125,
      "model_forward_time": 0.1158299446105957,
      "step": 30146
    },
    {
      "epoch": 0.00018399658203125,
      "step": 30146,
      "training_step_time": 0.4069492816925049
    },
    {
      "epoch": 0.000184002685546875,
      "model_forward_time": 0.11547684669494629,
      "step": 30147
    },
    {
      "epoch": 0.000184002685546875,
      "step": 30147,
      "training_step_time": 0.4809455871582031
    },
    {
      "epoch": 0.0001840087890625,
      "model_forward_time": 0.11507582664489746,
      "step": 30148
    },
    {
      "epoch": 0.0001840087890625,
      "step": 30148,
      "training_step_time": 0.49065375328063965
    },
    {
      "epoch": 0.000184014892578125,
      "model_forward_time": 0.11566329002380371,
      "step": 30149
    },
    {
      "epoch": 0.000184014892578125,
      "step": 30149,
      "training_step_time": 0.4915733337402344
    },
    {
      "epoch": 0.00018402099609375,
      "grad_norm": 0.14412733912467957,
      "learning_rate": 5.371687526669439e-05,
      "loss": 0.0538,
      "step": 30150
    },
    {
      "epoch": 0.00018402099609375,
      "model_forward_time": 0.11496496200561523,
      "step": 30150
    },
    {
      "epoch": 0.00018402099609375,
      "step": 30150,
      "training_step_time": 0.39556169509887695
    },
    {
      "epoch": 0.000184027099609375,
      "model_forward_time": 0.11442732810974121,
      "step": 30151
    },
    {
      "epoch": 0.000184027099609375,
      "step": 30151,
      "training_step_time": 0.39082837104797363
    },
    {
      "epoch": 0.000184033203125,
      "model_forward_time": 0.1152961254119873,
      "step": 30152
    },
    {
      "epoch": 0.000184033203125,
      "step": 30152,
      "training_step_time": 0.4110543727874756
    },
    {
      "epoch": 0.000184039306640625,
      "model_forward_time": 0.11489701271057129,
      "step": 30153
    },
    {
      "epoch": 0.000184039306640625,
      "step": 30153,
      "training_step_time": 0.3874540328979492
    },
    {
      "epoch": 0.00018404541015625,
      "model_forward_time": 0.11499357223510742,
      "step": 30154
    },
    {
      "epoch": 0.00018404541015625,
      "step": 30154,
      "training_step_time": 0.39539599418640137
    },
    {
      "epoch": 0.000184051513671875,
      "model_forward_time": 0.11536312103271484,
      "step": 30155
    },
    {
      "epoch": 0.000184051513671875,
      "step": 30155,
      "training_step_time": 0.3971133232116699
    },
    {
      "epoch": 0.0001840576171875,
      "model_forward_time": 0.11521697044372559,
      "step": 30156
    },
    {
      "epoch": 0.0001840576171875,
      "step": 30156,
      "training_step_time": 0.3992009162902832
    },
    {
      "epoch": 0.000184063720703125,
      "model_forward_time": 0.11533689498901367,
      "step": 30157
    },
    {
      "epoch": 0.000184063720703125,
      "step": 30157,
      "training_step_time": 0.4208652973175049
    },
    {
      "epoch": 0.00018406982421875,
      "model_forward_time": 0.11528277397155762,
      "step": 30158
    },
    {
      "epoch": 0.00018406982421875,
      "step": 30158,
      "training_step_time": 0.38903141021728516
    },
    {
      "epoch": 0.000184075927734375,
      "model_forward_time": 0.11586689949035645,
      "step": 30159
    },
    {
      "epoch": 0.000184075927734375,
      "step": 30159,
      "training_step_time": 0.5015003681182861
    },
    {
      "epoch": 0.00018408203125,
      "grad_norm": 0.12815004587173462,
      "learning_rate": 5.368939312190808e-05,
      "loss": 0.0528,
      "step": 30160
    },
    {
      "epoch": 0.00018408203125,
      "model_forward_time": 0.11550736427307129,
      "step": 30160
    },
    {
      "epoch": 0.00018408203125,
      "step": 30160,
      "training_step_time": 0.465773344039917
    },
    {
      "epoch": 0.000184088134765625,
      "model_forward_time": 0.11527466773986816,
      "step": 30161
    },
    {
      "epoch": 0.000184088134765625,
      "step": 30161,
      "training_step_time": 0.4038815498352051
    },
    {
      "epoch": 0.00018409423828125,
      "model_forward_time": 0.11487269401550293,
      "step": 30162
    },
    {
      "epoch": 0.00018409423828125,
      "step": 30162,
      "training_step_time": 0.44347524642944336
    },
    {
      "epoch": 0.000184100341796875,
      "model_forward_time": 0.11552762985229492,
      "step": 30163
    },
    {
      "epoch": 0.000184100341796875,
      "step": 30163,
      "training_step_time": 0.5044143199920654
    },
    {
      "epoch": 0.0001841064453125,
      "model_forward_time": 0.11498761177062988,
      "step": 30164
    },
    {
      "epoch": 0.0001841064453125,
      "step": 30164,
      "training_step_time": 0.38689661026000977
    },
    {
      "epoch": 0.000184112548828125,
      "model_forward_time": 0.11522245407104492,
      "step": 30165
    },
    {
      "epoch": 0.000184112548828125,
      "step": 30165,
      "training_step_time": 0.44071459770202637
    },
    {
      "epoch": 0.00018411865234375,
      "model_forward_time": 0.11529064178466797,
      "step": 30166
    },
    {
      "epoch": 0.00018411865234375,
      "step": 30166,
      "training_step_time": 0.41312313079833984
    },
    {
      "epoch": 0.000184124755859375,
      "model_forward_time": 0.11600804328918457,
      "step": 30167
    },
    {
      "epoch": 0.000184124755859375,
      "step": 30167,
      "training_step_time": 0.3943812847137451
    },
    {
      "epoch": 0.000184130859375,
      "model_forward_time": 0.11545944213867188,
      "step": 30168
    },
    {
      "epoch": 0.000184130859375,
      "step": 30168,
      "training_step_time": 0.3974463939666748
    },
    {
      "epoch": 0.000184136962890625,
      "model_forward_time": 0.11613583564758301,
      "step": 30169
    },
    {
      "epoch": 0.000184136962890625,
      "step": 30169,
      "training_step_time": 0.4138453006744385
    },
    {
      "epoch": 0.00018414306640625,
      "grad_norm": 0.12909159064292908,
      "learning_rate": 5.366190985638159e-05,
      "loss": 0.0502,
      "step": 30170
    },
    {
      "epoch": 0.00018414306640625,
      "model_forward_time": 0.11487746238708496,
      "step": 30170
    },
    {
      "epoch": 0.00018414306640625,
      "step": 30170,
      "training_step_time": 0.39279818534851074
    },
    {
      "epoch": 0.000184149169921875,
      "model_forward_time": 0.11505913734436035,
      "step": 30171
    },
    {
      "epoch": 0.000184149169921875,
      "step": 30171,
      "training_step_time": 0.3978550434112549
    },
    {
      "epoch": 0.0001841552734375,
      "model_forward_time": 0.11571502685546875,
      "step": 30172
    },
    {
      "epoch": 0.0001841552734375,
      "step": 30172,
      "training_step_time": 0.37915611267089844
    },
    {
      "epoch": 0.000184161376953125,
      "model_forward_time": 0.11603641510009766,
      "step": 30173
    },
    {
      "epoch": 0.000184161376953125,
      "step": 30173,
      "training_step_time": 0.3830862045288086
    },
    {
      "epoch": 0.00018416748046875,
      "model_forward_time": 0.11539697647094727,
      "step": 30174
    },
    {
      "epoch": 0.00018416748046875,
      "step": 30174,
      "training_step_time": 0.3996272087097168
    },
    {
      "epoch": 0.000184173583984375,
      "model_forward_time": 0.11515426635742188,
      "step": 30175
    },
    {
      "epoch": 0.000184173583984375,
      "step": 30175,
      "training_step_time": 0.462374210357666
    },
    {
      "epoch": 0.0001841796875,
      "model_forward_time": 0.11604905128479004,
      "step": 30176
    },
    {
      "epoch": 0.0001841796875,
      "step": 30176,
      "training_step_time": 0.4617290496826172
    },
    {
      "epoch": 0.000184185791015625,
      "model_forward_time": 0.11522865295410156,
      "step": 30177
    },
    {
      "epoch": 0.000184185791015625,
      "step": 30177,
      "training_step_time": 0.5051336288452148
    },
    {
      "epoch": 0.00018419189453125,
      "model_forward_time": 0.11530566215515137,
      "step": 30178
    },
    {
      "epoch": 0.00018419189453125,
      "step": 30178,
      "training_step_time": 0.5020751953125
    },
    {
      "epoch": 0.000184197998046875,
      "model_forward_time": 0.11475849151611328,
      "step": 30179
    },
    {
      "epoch": 0.000184197998046875,
      "step": 30179,
      "training_step_time": 0.4142031669616699
    },
    {
      "epoch": 0.0001842041015625,
      "grad_norm": 0.14701859652996063,
      "learning_rate": 5.363442547846356e-05,
      "loss": 0.0504,
      "step": 30180
    },
    {
      "epoch": 0.0001842041015625,
      "model_forward_time": 0.11414432525634766,
      "step": 30180
    },
    {
      "epoch": 0.0001842041015625,
      "step": 30180,
      "training_step_time": 0.4020545482635498
    },
    {
      "epoch": 0.000184210205078125,
      "model_forward_time": 0.11538004875183105,
      "step": 30181
    },
    {
      "epoch": 0.000184210205078125,
      "step": 30181,
      "training_step_time": 0.40347933769226074
    },
    {
      "epoch": 0.00018421630859375,
      "model_forward_time": 0.11501049995422363,
      "step": 30182
    },
    {
      "epoch": 0.00018421630859375,
      "step": 30182,
      "training_step_time": 0.38022851943969727
    },
    {
      "epoch": 0.000184222412109375,
      "model_forward_time": 0.11486577987670898,
      "step": 30183
    },
    {
      "epoch": 0.000184222412109375,
      "step": 30183,
      "training_step_time": 0.388746976852417
    },
    {
      "epoch": 0.000184228515625,
      "model_forward_time": 0.11507654190063477,
      "step": 30184
    },
    {
      "epoch": 0.000184228515625,
      "step": 30184,
      "training_step_time": 0.389664888381958
    },
    {
      "epoch": 0.000184234619140625,
      "model_forward_time": 0.11541175842285156,
      "step": 30185
    },
    {
      "epoch": 0.000184234619140625,
      "step": 30185,
      "training_step_time": 0.4002721309661865
    },
    {
      "epoch": 0.00018424072265625,
      "model_forward_time": 0.11532187461853027,
      "step": 30186
    },
    {
      "epoch": 0.00018424072265625,
      "step": 30186,
      "training_step_time": 0.40436220169067383
    },
    {
      "epoch": 0.000184246826171875,
      "model_forward_time": 0.11525988578796387,
      "step": 30187
    },
    {
      "epoch": 0.000184246826171875,
      "step": 30187,
      "training_step_time": 0.4071624279022217
    },
    {
      "epoch": 0.0001842529296875,
      "model_forward_time": 0.11535978317260742,
      "step": 30188
    },
    {
      "epoch": 0.0001842529296875,
      "step": 30188,
      "training_step_time": 0.3986532688140869
    },
    {
      "epoch": 0.000184259033203125,
      "model_forward_time": 0.11570310592651367,
      "step": 30189
    },
    {
      "epoch": 0.000184259033203125,
      "step": 30189,
      "training_step_time": 0.43600893020629883
    },
    {
      "epoch": 0.00018426513671875,
      "grad_norm": 0.1530212163925171,
      "learning_rate": 5.360693999650303e-05,
      "loss": 0.0476,
      "step": 30190
    },
    {
      "epoch": 0.00018426513671875,
      "model_forward_time": 0.11545228958129883,
      "step": 30190
    },
    {
      "epoch": 0.00018426513671875,
      "step": 30190,
      "training_step_time": 0.44115757942199707
    },
    {
      "epoch": 0.000184271240234375,
      "model_forward_time": 0.1149604320526123,
      "step": 30191
    },
    {
      "epoch": 0.000184271240234375,
      "step": 30191,
      "training_step_time": 0.3753695487976074
    },
    {
      "epoch": 0.00018427734375,
      "model_forward_time": 0.11522483825683594,
      "step": 30192
    },
    {
      "epoch": 0.00018427734375,
      "step": 30192,
      "training_step_time": 0.44414663314819336
    },
    {
      "epoch": 0.000184283447265625,
      "model_forward_time": 0.11602616310119629,
      "step": 30193
    },
    {
      "epoch": 0.000184283447265625,
      "step": 30193,
      "training_step_time": 0.4448211193084717
    },
    {
      "epoch": 0.00018428955078125,
      "model_forward_time": 0.11556100845336914,
      "step": 30194
    },
    {
      "epoch": 0.00018428955078125,
      "step": 30194,
      "training_step_time": 0.42337989807128906
    },
    {
      "epoch": 0.000184295654296875,
      "model_forward_time": 0.114593505859375,
      "step": 30195
    },
    {
      "epoch": 0.000184295654296875,
      "step": 30195,
      "training_step_time": 0.38405585289001465
    },
    {
      "epoch": 0.0001843017578125,
      "model_forward_time": 0.11530613899230957,
      "step": 30196
    },
    {
      "epoch": 0.0001843017578125,
      "step": 30196,
      "training_step_time": 0.405886173248291
    },
    {
      "epoch": 0.000184307861328125,
      "model_forward_time": 0.11518001556396484,
      "step": 30197
    },
    {
      "epoch": 0.000184307861328125,
      "step": 30197,
      "training_step_time": 0.4009709358215332
    },
    {
      "epoch": 0.00018431396484375,
      "model_forward_time": 0.11545801162719727,
      "step": 30198
    },
    {
      "epoch": 0.00018431396484375,
      "step": 30198,
      "training_step_time": 0.3920731544494629
    },
    {
      "epoch": 0.000184320068359375,
      "model_forward_time": 0.11495518684387207,
      "step": 30199
    },
    {
      "epoch": 0.000184320068359375,
      "step": 30199,
      "training_step_time": 0.39283156394958496
    },
    {
      "epoch": 0.000184326171875,
      "grad_norm": 0.1117081493139267,
      "learning_rate": 5.357945341884936e-05,
      "loss": 0.048,
      "step": 30200
    },
    {
      "epoch": 0.000184326171875,
      "model_forward_time": 0.11497926712036133,
      "step": 30200
    },
    {
      "epoch": 0.000184326171875,
      "step": 30200,
      "training_step_time": 0.3870728015899658
    },
    {
      "epoch": 0.000184332275390625,
      "model_forward_time": 0.11558389663696289,
      "step": 30201
    },
    {
      "epoch": 0.000184332275390625,
      "step": 30201,
      "training_step_time": 1.1684596538543701
    },
    {
      "epoch": 0.00018433837890625,
      "model_forward_time": 0.11426329612731934,
      "step": 30202
    },
    {
      "epoch": 0.00018433837890625,
      "step": 30202,
      "training_step_time": 0.38744473457336426
    },
    {
      "epoch": 0.000184344482421875,
      "model_forward_time": 0.11468005180358887,
      "step": 30203
    },
    {
      "epoch": 0.000184344482421875,
      "step": 30203,
      "training_step_time": 0.449540376663208
    },
    {
      "epoch": 0.0001843505859375,
      "model_forward_time": 0.11408686637878418,
      "step": 30204
    },
    {
      "epoch": 0.0001843505859375,
      "step": 30204,
      "training_step_time": 0.35950207710266113
    },
    {
      "epoch": 0.000184356689453125,
      "model_forward_time": 0.11426138877868652,
      "step": 30205
    },
    {
      "epoch": 0.000184356689453125,
      "step": 30205,
      "training_step_time": 0.43704795837402344
    },
    {
      "epoch": 0.00018436279296875,
      "model_forward_time": 0.11394619941711426,
      "step": 30206
    },
    {
      "epoch": 0.00018436279296875,
      "step": 30206,
      "training_step_time": 0.48845672607421875
    },
    {
      "epoch": 0.000184368896484375,
      "model_forward_time": 0.11453914642333984,
      "step": 30207
    },
    {
      "epoch": 0.000184368896484375,
      "step": 30207,
      "training_step_time": 0.554929256439209
    },
    {
      "epoch": 0.000184375,
      "model_forward_time": 0.11445355415344238,
      "step": 30208
    },
    {
      "epoch": 0.000184375,
      "step": 30208,
      "training_step_time": 0.40773677825927734
    },
    {
      "epoch": 0.000184381103515625,
      "model_forward_time": 0.11503863334655762,
      "step": 30209
    },
    {
      "epoch": 0.000184381103515625,
      "step": 30209,
      "training_step_time": 0.387359619140625
    },
    {
      "epoch": 0.00018438720703125,
      "grad_norm": 0.16710089147090912,
      "learning_rate": 5.355196575385225e-05,
      "loss": 0.0457,
      "step": 30210
    },
    {
      "epoch": 0.00018438720703125,
      "model_forward_time": 0.11496829986572266,
      "step": 30210
    },
    {
      "epoch": 0.00018438720703125,
      "step": 30210,
      "training_step_time": 0.40424537658691406
    },
    {
      "epoch": 0.000184393310546875,
      "model_forward_time": 0.1146547794342041,
      "step": 30211
    },
    {
      "epoch": 0.000184393310546875,
      "step": 30211,
      "training_step_time": 0.39896130561828613
    },
    {
      "epoch": 0.0001843994140625,
      "model_forward_time": 0.11525201797485352,
      "step": 30212
    },
    {
      "epoch": 0.0001843994140625,
      "step": 30212,
      "training_step_time": 0.39467382431030273
    },
    {
      "epoch": 0.000184405517578125,
      "model_forward_time": 0.11482381820678711,
      "step": 30213
    },
    {
      "epoch": 0.000184405517578125,
      "step": 30213,
      "training_step_time": 0.8614461421966553
    },
    {
      "epoch": 0.00018441162109375,
      "model_forward_time": 0.11524677276611328,
      "step": 30214
    },
    {
      "epoch": 0.00018441162109375,
      "step": 30214,
      "training_step_time": 0.3925437927246094
    },
    {
      "epoch": 0.000184417724609375,
      "model_forward_time": 0.11431288719177246,
      "step": 30215
    },
    {
      "epoch": 0.000184417724609375,
      "step": 30215,
      "training_step_time": 0.38963842391967773
    },
    {
      "epoch": 0.000184423828125,
      "model_forward_time": 0.11500382423400879,
      "step": 30216
    },
    {
      "epoch": 0.000184423828125,
      "step": 30216,
      "training_step_time": 0.4521450996398926
    },
    {
      "epoch": 0.000184429931640625,
      "model_forward_time": 0.11499214172363281,
      "step": 30217
    },
    {
      "epoch": 0.000184429931640625,
      "step": 30217,
      "training_step_time": 0.44043612480163574
    },
    {
      "epoch": 0.00018443603515625,
      "model_forward_time": 0.1146705150604248,
      "step": 30218
    },
    {
      "epoch": 0.00018443603515625,
      "step": 30218,
      "training_step_time": 0.4518904685974121
    },
    {
      "epoch": 0.000184442138671875,
      "model_forward_time": 0.11429047584533691,
      "step": 30219
    },
    {
      "epoch": 0.000184442138671875,
      "step": 30219,
      "training_step_time": 0.49309659004211426
    },
    {
      "epoch": 0.0001844482421875,
      "grad_norm": 0.26220524311065674,
      "learning_rate": 5.352447700986173e-05,
      "loss": 0.0496,
      "step": 30220
    },
    {
      "epoch": 0.0001844482421875,
      "model_forward_time": 0.11409759521484375,
      "step": 30220
    },
    {
      "epoch": 0.0001844482421875,
      "step": 30220,
      "training_step_time": 0.38684511184692383
    },
    {
      "epoch": 0.000184454345703125,
      "model_forward_time": 0.11493968963623047,
      "step": 30221
    },
    {
      "epoch": 0.000184454345703125,
      "step": 30221,
      "training_step_time": 0.3930480480194092
    },
    {
      "epoch": 0.00018446044921875,
      "model_forward_time": 0.11635661125183105,
      "step": 30222
    },
    {
      "epoch": 0.00018446044921875,
      "step": 30222,
      "training_step_time": 0.4061288833618164
    },
    {
      "epoch": 0.000184466552734375,
      "model_forward_time": 0.11561775207519531,
      "step": 30223
    },
    {
      "epoch": 0.000184466552734375,
      "step": 30223,
      "training_step_time": 0.40269923210144043
    },
    {
      "epoch": 0.00018447265625,
      "model_forward_time": 0.11522841453552246,
      "step": 30224
    },
    {
      "epoch": 0.00018447265625,
      "step": 30224,
      "training_step_time": 0.39507508277893066
    },
    {
      "epoch": 0.000184478759765625,
      "model_forward_time": 0.11426687240600586,
      "step": 30225
    },
    {
      "epoch": 0.000184478759765625,
      "step": 30225,
      "training_step_time": 0.6991078853607178
    },
    {
      "epoch": 0.00018448486328125,
      "model_forward_time": 0.11446499824523926,
      "step": 30226
    },
    {
      "epoch": 0.00018448486328125,
      "step": 30226,
      "training_step_time": 0.39216089248657227
    },
    {
      "epoch": 0.000184490966796875,
      "model_forward_time": 0.11505961418151855,
      "step": 30227
    },
    {
      "epoch": 0.000184490966796875,
      "step": 30227,
      "training_step_time": 0.39151692390441895
    },
    {
      "epoch": 0.0001844970703125,
      "model_forward_time": 0.1148374080657959,
      "step": 30228
    },
    {
      "epoch": 0.0001844970703125,
      "step": 30228,
      "training_step_time": 0.39047861099243164
    },
    {
      "epoch": 0.000184503173828125,
      "model_forward_time": 0.11558818817138672,
      "step": 30229
    },
    {
      "epoch": 0.000184503173828125,
      "step": 30229,
      "training_step_time": 0.4018516540527344
    },
    {
      "epoch": 0.00018450927734375,
      "grad_norm": 0.09445925801992416,
      "learning_rate": 5.3496987195228156e-05,
      "loss": 0.0482,
      "step": 30230
    },
    {
      "epoch": 0.00018450927734375,
      "model_forward_time": 0.11478352546691895,
      "step": 30230
    },
    {
      "epoch": 0.00018450927734375,
      "step": 30230,
      "training_step_time": 0.45092320442199707
    },
    {
      "epoch": 0.000184515380859375,
      "model_forward_time": 0.11488747596740723,
      "step": 30231
    },
    {
      "epoch": 0.000184515380859375,
      "step": 30231,
      "training_step_time": 0.7598130702972412
    },
    {
      "epoch": 0.000184521484375,
      "model_forward_time": 0.11439013481140137,
      "step": 30232
    },
    {
      "epoch": 0.000184521484375,
      "step": 30232,
      "training_step_time": 0.42203617095947266
    },
    {
      "epoch": 0.000184527587890625,
      "model_forward_time": 0.11454629898071289,
      "step": 30233
    },
    {
      "epoch": 0.000184527587890625,
      "step": 30233,
      "training_step_time": 0.39739537239074707
    },
    {
      "epoch": 0.00018453369140625,
      "model_forward_time": 0.11464142799377441,
      "step": 30234
    },
    {
      "epoch": 0.00018453369140625,
      "step": 30234,
      "training_step_time": 0.3881242275238037
    },
    {
      "epoch": 0.000184539794921875,
      "model_forward_time": 0.11491250991821289,
      "step": 30235
    },
    {
      "epoch": 0.000184539794921875,
      "step": 30235,
      "training_step_time": 0.3921184539794922
    },
    {
      "epoch": 0.0001845458984375,
      "model_forward_time": 0.11473298072814941,
      "step": 30236
    },
    {
      "epoch": 0.0001845458984375,
      "step": 30236,
      "training_step_time": 0.3786489963531494
    },
    {
      "epoch": 0.000184552001953125,
      "model_forward_time": 0.11519789695739746,
      "step": 30237
    },
    {
      "epoch": 0.000184552001953125,
      "step": 30237,
      "training_step_time": 1.1530468463897705
    },
    {
      "epoch": 0.00018455810546875,
      "model_forward_time": 0.11427783966064453,
      "step": 30238
    },
    {
      "epoch": 0.00018455810546875,
      "step": 30238,
      "training_step_time": 0.3944423198699951
    },
    {
      "epoch": 0.000184564208984375,
      "model_forward_time": 0.11436986923217773,
      "step": 30239
    },
    {
      "epoch": 0.000184564208984375,
      "step": 30239,
      "training_step_time": 0.38364362716674805
    },
    {
      "epoch": 0.0001845703125,
      "grad_norm": 0.24218939244747162,
      "learning_rate": 5.3469496318302204e-05,
      "loss": 0.0457,
      "step": 30240
    },
    {
      "epoch": 0.0001845703125,
      "model_forward_time": 0.11349058151245117,
      "step": 30240
    },
    {
      "epoch": 0.0001845703125,
      "step": 30240,
      "training_step_time": 0.3889486789703369
    },
    {
      "epoch": 0.000184576416015625,
      "model_forward_time": 0.11439180374145508,
      "step": 30241
    },
    {
      "epoch": 0.000184576416015625,
      "step": 30241,
      "training_step_time": 0.3776130676269531
    },
    {
      "epoch": 0.00018458251953125,
      "model_forward_time": 0.11449146270751953,
      "step": 30242
    },
    {
      "epoch": 0.00018458251953125,
      "step": 30242,
      "training_step_time": 0.4268968105316162
    },
    {
      "epoch": 0.000184588623046875,
      "model_forward_time": 0.11519598960876465,
      "step": 30243
    },
    {
      "epoch": 0.000184588623046875,
      "step": 30243,
      "training_step_time": 0.4950857162475586
    },
    {
      "epoch": 0.0001845947265625,
      "model_forward_time": 0.11538338661193848,
      "step": 30244
    },
    {
      "epoch": 0.0001845947265625,
      "step": 30244,
      "training_step_time": 0.4319343566894531
    },
    {
      "epoch": 0.000184600830078125,
      "model_forward_time": 0.1153573989868164,
      "step": 30245
    },
    {
      "epoch": 0.000184600830078125,
      "step": 30245,
      "training_step_time": 0.39293336868286133
    },
    {
      "epoch": 0.00018460693359375,
      "model_forward_time": 0.11595797538757324,
      "step": 30246
    },
    {
      "epoch": 0.00018460693359375,
      "step": 30246,
      "training_step_time": 0.398007869720459
    },
    {
      "epoch": 0.000184613037109375,
      "model_forward_time": 0.11598491668701172,
      "step": 30247
    },
    {
      "epoch": 0.000184613037109375,
      "step": 30247,
      "training_step_time": 0.3820528984069824
    },
    {
      "epoch": 0.000184619140625,
      "model_forward_time": 0.11498403549194336,
      "step": 30248
    },
    {
      "epoch": 0.000184619140625,
      "step": 30248,
      "training_step_time": 0.37947893142700195
    },
    {
      "epoch": 0.000184625244140625,
      "model_forward_time": 0.11583399772644043,
      "step": 30249
    },
    {
      "epoch": 0.000184625244140625,
      "step": 30249,
      "training_step_time": 0.7880251407623291
    },
    {
      "epoch": 0.00018463134765625,
      "grad_norm": 0.11071117967367172,
      "learning_rate": 5.344200438743489e-05,
      "loss": 0.0421,
      "step": 30250
    },
    {
      "epoch": 0.00018463134765625,
      "model_forward_time": 0.1151580810546875,
      "step": 30250
    },
    {
      "epoch": 0.00018463134765625,
      "step": 30250,
      "training_step_time": 0.37847137451171875
    },
    {
      "epoch": 0.000184637451171875,
      "model_forward_time": 0.11510467529296875,
      "step": 30251
    },
    {
      "epoch": 0.000184637451171875,
      "step": 30251,
      "training_step_time": 0.38413166999816895
    },
    {
      "epoch": 0.0001846435546875,
      "model_forward_time": 0.11490368843078613,
      "step": 30252
    },
    {
      "epoch": 0.0001846435546875,
      "step": 30252,
      "training_step_time": 0.386462926864624
    },
    {
      "epoch": 0.000184649658203125,
      "model_forward_time": 0.11537027359008789,
      "step": 30253
    },
    {
      "epoch": 0.000184649658203125,
      "step": 30253,
      "training_step_time": 0.3980722427368164
    },
    {
      "epoch": 0.00018465576171875,
      "model_forward_time": 0.11441493034362793,
      "step": 30254
    },
    {
      "epoch": 0.00018465576171875,
      "step": 30254,
      "training_step_time": 0.384000301361084
    },
    {
      "epoch": 0.000184661865234375,
      "model_forward_time": 0.11562657356262207,
      "step": 30255
    },
    {
      "epoch": 0.000184661865234375,
      "step": 30255,
      "training_step_time": 0.5818538665771484
    },
    {
      "epoch": 0.00018466796875,
      "model_forward_time": 0.11484622955322266,
      "step": 30256
    },
    {
      "epoch": 0.00018466796875,
      "step": 30256,
      "training_step_time": 0.4576835632324219
    },
    {
      "epoch": 0.000184674072265625,
      "model_forward_time": 0.11508464813232422,
      "step": 30257
    },
    {
      "epoch": 0.000184674072265625,
      "step": 30257,
      "training_step_time": 0.42977046966552734
    },
    {
      "epoch": 0.00018468017578125,
      "model_forward_time": 0.11543130874633789,
      "step": 30258
    },
    {
      "epoch": 0.00018468017578125,
      "step": 30258,
      "training_step_time": 0.5002052783966064
    },
    {
      "epoch": 0.000184686279296875,
      "model_forward_time": 0.11639881134033203,
      "step": 30259
    },
    {
      "epoch": 0.000184686279296875,
      "step": 30259,
      "training_step_time": 0.4870421886444092
    },
    {
      "epoch": 0.0001846923828125,
      "grad_norm": 0.12317795306444168,
      "learning_rate": 5.341451141097751e-05,
      "loss": 0.0457,
      "step": 30260
    },
    {
      "epoch": 0.0001846923828125,
      "model_forward_time": 0.11528134346008301,
      "step": 30260
    },
    {
      "epoch": 0.0001846923828125,
      "step": 30260,
      "training_step_time": 0.39414262771606445
    },
    {
      "epoch": 0.000184698486328125,
      "model_forward_time": 0.11522173881530762,
      "step": 30261
    },
    {
      "epoch": 0.000184698486328125,
      "step": 30261,
      "training_step_time": 0.39348673820495605
    },
    {
      "epoch": 0.00018470458984375,
      "model_forward_time": 0.1148076057434082,
      "step": 30262
    },
    {
      "epoch": 0.00018470458984375,
      "step": 30262,
      "training_step_time": 0.3899097442626953
    },
    {
      "epoch": 0.000184710693359375,
      "model_forward_time": 0.11514973640441895,
      "step": 30263
    },
    {
      "epoch": 0.000184710693359375,
      "step": 30263,
      "training_step_time": 0.38327479362487793
    },
    {
      "epoch": 0.000184716796875,
      "model_forward_time": 0.11527872085571289,
      "step": 30264
    },
    {
      "epoch": 0.000184716796875,
      "step": 30264,
      "training_step_time": 0.39328455924987793
    },
    {
      "epoch": 0.000184722900390625,
      "model_forward_time": 0.11513209342956543,
      "step": 30265
    },
    {
      "epoch": 0.000184722900390625,
      "step": 30265,
      "training_step_time": 0.4002666473388672
    },
    {
      "epoch": 0.00018472900390625,
      "model_forward_time": 0.11549043655395508,
      "step": 30266
    },
    {
      "epoch": 0.00018472900390625,
      "step": 30266,
      "training_step_time": 0.38771915435791016
    },
    {
      "epoch": 0.000184735107421875,
      "model_forward_time": 0.11514401435852051,
      "step": 30267
    },
    {
      "epoch": 0.000184735107421875,
      "step": 30267,
      "training_step_time": 0.39663004875183105
    },
    {
      "epoch": 0.0001847412109375,
      "model_forward_time": 0.11480140686035156,
      "step": 30268
    },
    {
      "epoch": 0.0001847412109375,
      "step": 30268,
      "training_step_time": 0.4048640727996826
    },
    {
      "epoch": 0.000184747314453125,
      "model_forward_time": 0.11545753479003906,
      "step": 30269
    },
    {
      "epoch": 0.000184747314453125,
      "step": 30269,
      "training_step_time": 0.41771507263183594
    },
    {
      "epoch": 0.00018475341796875,
      "grad_norm": 0.11941902339458466,
      "learning_rate": 5.3387017397281704e-05,
      "loss": 0.0463,
      "step": 30270
    },
    {
      "epoch": 0.00018475341796875,
      "model_forward_time": 0.11522245407104492,
      "step": 30270
    },
    {
      "epoch": 0.00018475341796875,
      "step": 30270,
      "training_step_time": 0.44769906997680664
    },
    {
      "epoch": 0.000184759521484375,
      "model_forward_time": 0.11576533317565918,
      "step": 30271
    },
    {
      "epoch": 0.000184759521484375,
      "step": 30271,
      "training_step_time": 0.4116058349609375
    },
    {
      "epoch": 0.000184765625,
      "model_forward_time": 0.11537837982177734,
      "step": 30272
    },
    {
      "epoch": 0.000184765625,
      "step": 30272,
      "training_step_time": 0.36801600456237793
    },
    {
      "epoch": 0.000184771728515625,
      "model_forward_time": 0.1143488883972168,
      "step": 30273
    },
    {
      "epoch": 0.000184771728515625,
      "step": 30273,
      "training_step_time": 0.47298645973205566
    },
    {
      "epoch": 0.00018477783203125,
      "model_forward_time": 0.11464571952819824,
      "step": 30274
    },
    {
      "epoch": 0.00018477783203125,
      "step": 30274,
      "training_step_time": 0.39515018463134766
    },
    {
      "epoch": 0.000184783935546875,
      "model_forward_time": 0.11469531059265137,
      "step": 30275
    },
    {
      "epoch": 0.000184783935546875,
      "step": 30275,
      "training_step_time": 0.3822293281555176
    },
    {
      "epoch": 0.0001847900390625,
      "model_forward_time": 0.1156303882598877,
      "step": 30276
    },
    {
      "epoch": 0.0001847900390625,
      "step": 30276,
      "training_step_time": 0.3969886302947998
    },
    {
      "epoch": 0.000184796142578125,
      "model_forward_time": 0.11463642120361328,
      "step": 30277
    },
    {
      "epoch": 0.000184796142578125,
      "step": 30277,
      "training_step_time": 0.40250301361083984
    },
    {
      "epoch": 0.00018480224609375,
      "model_forward_time": 0.11496281623840332,
      "step": 30278
    },
    {
      "epoch": 0.00018480224609375,
      "step": 30278,
      "training_step_time": 0.388944149017334
    },
    {
      "epoch": 0.000184808349609375,
      "model_forward_time": 0.1152801513671875,
      "step": 30279
    },
    {
      "epoch": 0.000184808349609375,
      "step": 30279,
      "training_step_time": 0.4479820728302002
    },
    {
      "epoch": 0.000184814453125,
      "grad_norm": 0.12792256474494934,
      "learning_rate": 5.335952235469947e-05,
      "loss": 0.0494,
      "step": 30280
    },
    {
      "epoch": 0.000184814453125,
      "model_forward_time": 0.11489272117614746,
      "step": 30280
    },
    {
      "epoch": 0.000184814453125,
      "step": 30280,
      "training_step_time": 0.39554667472839355
    },
    {
      "epoch": 0.000184820556640625,
      "model_forward_time": 0.11519837379455566,
      "step": 30281
    },
    {
      "epoch": 0.000184820556640625,
      "step": 30281,
      "training_step_time": 0.40485596656799316
    },
    {
      "epoch": 0.00018482666015625,
      "model_forward_time": 0.11494016647338867,
      "step": 30282
    },
    {
      "epoch": 0.00018482666015625,
      "step": 30282,
      "training_step_time": 0.39666152000427246
    },
    {
      "epoch": 0.000184832763671875,
      "model_forward_time": 0.11666464805603027,
      "step": 30283
    },
    {
      "epoch": 0.000184832763671875,
      "step": 30283,
      "training_step_time": 0.4150094985961914
    },
    {
      "epoch": 0.0001848388671875,
      "model_forward_time": 0.11481881141662598,
      "step": 30284
    },
    {
      "epoch": 0.0001848388671875,
      "step": 30284,
      "training_step_time": 0.3960075378417969
    },
    {
      "epoch": 0.000184844970703125,
      "model_forward_time": 0.11583590507507324,
      "step": 30285
    },
    {
      "epoch": 0.000184844970703125,
      "step": 30285,
      "training_step_time": 0.49411988258361816
    },
    {
      "epoch": 0.00018485107421875,
      "model_forward_time": 0.11521244049072266,
      "step": 30286
    },
    {
      "epoch": 0.00018485107421875,
      "step": 30286,
      "training_step_time": 0.4391913414001465
    },
    {
      "epoch": 0.000184857177734375,
      "model_forward_time": 0.11496376991271973,
      "step": 30287
    },
    {
      "epoch": 0.000184857177734375,
      "step": 30287,
      "training_step_time": 0.5075559616088867
    },
    {
      "epoch": 0.00018486328125,
      "model_forward_time": 0.11519289016723633,
      "step": 30288
    },
    {
      "epoch": 0.00018486328125,
      "step": 30288,
      "training_step_time": 0.4947521686553955
    },
    {
      "epoch": 0.000184869384765625,
      "model_forward_time": 0.11584305763244629,
      "step": 30289
    },
    {
      "epoch": 0.000184869384765625,
      "step": 30289,
      "training_step_time": 0.3911895751953125
    },
    {
      "epoch": 0.00018487548828125,
      "grad_norm": 0.22120171785354614,
      "learning_rate": 5.3332026291583016e-05,
      "loss": 0.0438,
      "step": 30290
    },
    {
      "epoch": 0.00018487548828125,
      "model_forward_time": 0.114990234375,
      "step": 30290
    },
    {
      "epoch": 0.00018487548828125,
      "step": 30290,
      "training_step_time": 0.3874013423919678
    },
    {
      "epoch": 0.000184881591796875,
      "model_forward_time": 0.11471033096313477,
      "step": 30291
    },
    {
      "epoch": 0.000184881591796875,
      "step": 30291,
      "training_step_time": 0.3952949047088623
    },
    {
      "epoch": 0.0001848876953125,
      "model_forward_time": 0.11533784866333008,
      "step": 30292
    },
    {
      "epoch": 0.0001848876953125,
      "step": 30292,
      "training_step_time": 0.3853611946105957
    },
    {
      "epoch": 0.000184893798828125,
      "model_forward_time": 0.11490058898925781,
      "step": 30293
    },
    {
      "epoch": 0.000184893798828125,
      "step": 30293,
      "training_step_time": 0.3832848072052002
    },
    {
      "epoch": 0.00018489990234375,
      "model_forward_time": 0.11515545845031738,
      "step": 30294
    },
    {
      "epoch": 0.00018489990234375,
      "step": 30294,
      "training_step_time": 0.389937162399292
    },
    {
      "epoch": 0.000184906005859375,
      "model_forward_time": 0.11480545997619629,
      "step": 30295
    },
    {
      "epoch": 0.000184906005859375,
      "step": 30295,
      "training_step_time": 0.3961524963378906
    },
    {
      "epoch": 0.000184912109375,
      "model_forward_time": 0.11577677726745605,
      "step": 30296
    },
    {
      "epoch": 0.000184912109375,
      "step": 30296,
      "training_step_time": 0.43956494331359863
    },
    {
      "epoch": 0.000184918212890625,
      "model_forward_time": 0.1159677505493164,
      "step": 30297
    },
    {
      "epoch": 0.000184918212890625,
      "step": 30297,
      "training_step_time": 0.387204647064209
    },
    {
      "epoch": 0.00018492431640625,
      "model_forward_time": 0.1150205135345459,
      "step": 30298
    },
    {
      "epoch": 0.00018492431640625,
      "step": 30298,
      "training_step_time": 0.4084291458129883
    },
    {
      "epoch": 0.000184930419921875,
      "model_forward_time": 0.11517000198364258,
      "step": 30299
    },
    {
      "epoch": 0.000184930419921875,
      "step": 30299,
      "training_step_time": 0.3884546756744385
    },
    {
      "epoch": 0.0001849365234375,
      "grad_norm": 0.13645319640636444,
      "learning_rate": 5.330452921628497e-05,
      "loss": 0.0437,
      "step": 30300
    },
    {
      "epoch": 0.0001849365234375,
      "model_forward_time": 0.11681580543518066,
      "step": 30300
    },
    {
      "epoch": 0.0001849365234375,
      "step": 30300,
      "training_step_time": 0.44811582565307617
    },
    {
      "epoch": 0.000184942626953125,
      "model_forward_time": 0.11593174934387207,
      "step": 30301
    },
    {
      "epoch": 0.000184942626953125,
      "step": 30301,
      "training_step_time": 0.4378223419189453
    },
    {
      "epoch": 0.00018494873046875,
      "model_forward_time": 0.11611485481262207,
      "step": 30302
    },
    {
      "epoch": 0.00018494873046875,
      "step": 30302,
      "training_step_time": 0.44289612770080566
    },
    {
      "epoch": 0.000184954833984375,
      "model_forward_time": 0.11474490165710449,
      "step": 30303
    },
    {
      "epoch": 0.000184954833984375,
      "step": 30303,
      "training_step_time": 0.47480273246765137
    },
    {
      "epoch": 0.0001849609375,
      "model_forward_time": 0.11534523963928223,
      "step": 30304
    },
    {
      "epoch": 0.0001849609375,
      "step": 30304,
      "training_step_time": 0.3947718143463135
    },
    {
      "epoch": 0.000184967041015625,
      "model_forward_time": 0.11465191841125488,
      "step": 30305
    },
    {
      "epoch": 0.000184967041015625,
      "step": 30305,
      "training_step_time": 0.3829514980316162
    },
    {
      "epoch": 0.00018497314453125,
      "model_forward_time": 0.11491107940673828,
      "step": 30306
    },
    {
      "epoch": 0.00018497314453125,
      "step": 30306,
      "training_step_time": 0.39680933952331543
    },
    {
      "epoch": 0.000184979248046875,
      "model_forward_time": 0.1154947280883789,
      "step": 30307
    },
    {
      "epoch": 0.000184979248046875,
      "step": 30307,
      "training_step_time": 0.3883695602416992
    },
    {
      "epoch": 0.0001849853515625,
      "model_forward_time": 0.11598992347717285,
      "step": 30308
    },
    {
      "epoch": 0.0001849853515625,
      "step": 30308,
      "training_step_time": 0.38945579528808594
    },
    {
      "epoch": 0.000184991455078125,
      "model_forward_time": 0.11551094055175781,
      "step": 30309
    },
    {
      "epoch": 0.000184991455078125,
      "step": 30309,
      "training_step_time": 0.9056286811828613
    },
    {
      "epoch": 0.00018499755859375,
      "grad_norm": 0.11113350093364716,
      "learning_rate": 5.3277031137158205e-05,
      "loss": 0.0439,
      "step": 30310
    },
    {
      "epoch": 0.00018499755859375,
      "model_forward_time": 0.11403298377990723,
      "step": 30310
    },
    {
      "epoch": 0.00018499755859375,
      "step": 30310,
      "training_step_time": 0.41876697540283203
    },
    {
      "epoch": 0.000185003662109375,
      "model_forward_time": 0.11474943161010742,
      "step": 30311
    },
    {
      "epoch": 0.000185003662109375,
      "step": 30311,
      "training_step_time": 0.38544797897338867
    },
    {
      "epoch": 0.000185009765625,
      "model_forward_time": 0.11475205421447754,
      "step": 30312
    },
    {
      "epoch": 0.000185009765625,
      "step": 30312,
      "training_step_time": 0.39434123039245605
    },
    {
      "epoch": 0.000185015869140625,
      "model_forward_time": 0.11440181732177734,
      "step": 30313
    },
    {
      "epoch": 0.000185015869140625,
      "step": 30313,
      "training_step_time": 0.4078538417816162
    },
    {
      "epoch": 0.00018502197265625,
      "model_forward_time": 0.11446762084960938,
      "step": 30314
    },
    {
      "epoch": 0.00018502197265625,
      "step": 30314,
      "training_step_time": 0.3822648525238037
    },
    {
      "epoch": 0.000185028076171875,
      "model_forward_time": 0.11459159851074219,
      "step": 30315
    },
    {
      "epoch": 0.000185028076171875,
      "step": 30315,
      "training_step_time": 0.7282772064208984
    },
    {
      "epoch": 0.0001850341796875,
      "model_forward_time": 0.11492586135864258,
      "step": 30316
    },
    {
      "epoch": 0.0001850341796875,
      "step": 30316,
      "training_step_time": 0.446258544921875
    },
    {
      "epoch": 0.000185040283203125,
      "model_forward_time": 0.11524081230163574,
      "step": 30317
    },
    {
      "epoch": 0.000185040283203125,
      "step": 30317,
      "training_step_time": 0.37873220443725586
    },
    {
      "epoch": 0.00018504638671875,
      "model_forward_time": 0.11489081382751465,
      "step": 30318
    },
    {
      "epoch": 0.00018504638671875,
      "step": 30318,
      "training_step_time": 0.3875257968902588
    },
    {
      "epoch": 0.000185052490234375,
      "model_forward_time": 0.11453580856323242,
      "step": 30319
    },
    {
      "epoch": 0.000185052490234375,
      "step": 30319,
      "training_step_time": 0.3872852325439453
    },
    {
      "epoch": 0.00018505859375,
      "grad_norm": 0.1051531583070755,
      "learning_rate": 5.32495320625559e-05,
      "loss": 0.0413,
      "step": 30320
    },
    {
      "epoch": 0.00018505859375,
      "model_forward_time": 0.11481785774230957,
      "step": 30320
    },
    {
      "epoch": 0.00018505859375,
      "step": 30320,
      "training_step_time": 0.3872389793395996
    },
    {
      "epoch": 0.000185064697265625,
      "model_forward_time": 0.11597180366516113,
      "step": 30321
    },
    {
      "epoch": 0.000185064697265625,
      "step": 30321,
      "training_step_time": 0.6185963153839111
    },
    {
      "epoch": 0.00018507080078125,
      "model_forward_time": 0.11436247825622559,
      "step": 30322
    },
    {
      "epoch": 0.00018507080078125,
      "step": 30322,
      "training_step_time": 0.40261387825012207
    },
    {
      "epoch": 0.000185076904296875,
      "model_forward_time": 0.11574792861938477,
      "step": 30323
    },
    {
      "epoch": 0.000185076904296875,
      "step": 30323,
      "training_step_time": 0.40159010887145996
    },
    {
      "epoch": 0.0001850830078125,
      "model_forward_time": 0.1147925853729248,
      "step": 30324
    },
    {
      "epoch": 0.0001850830078125,
      "step": 30324,
      "training_step_time": 0.39518165588378906
    },
    {
      "epoch": 0.000185089111328125,
      "model_forward_time": 0.11544179916381836,
      "step": 30325
    },
    {
      "epoch": 0.000185089111328125,
      "step": 30325,
      "training_step_time": 0.399446964263916
    },
    {
      "epoch": 0.00018509521484375,
      "model_forward_time": 0.1154022216796875,
      "step": 30326
    },
    {
      "epoch": 0.00018509521484375,
      "step": 30326,
      "training_step_time": 0.3901553153991699
    },
    {
      "epoch": 0.000185101318359375,
      "model_forward_time": 0.11513710021972656,
      "step": 30327
    },
    {
      "epoch": 0.000185101318359375,
      "step": 30327,
      "training_step_time": 0.5742647647857666
    },
    {
      "epoch": 0.000185107421875,
      "model_forward_time": 0.11476993560791016,
      "step": 30328
    },
    {
      "epoch": 0.000185107421875,
      "step": 30328,
      "training_step_time": 0.4743046760559082
    },
    {
      "epoch": 0.000185113525390625,
      "model_forward_time": 0.11524605751037598,
      "step": 30329
    },
    {
      "epoch": 0.000185113525390625,
      "step": 30329,
      "training_step_time": 0.5095584392547607
    },
    {
      "epoch": 0.00018511962890625,
      "grad_norm": 0.18533803522586823,
      "learning_rate": 5.322203200083154e-05,
      "loss": 0.0499,
      "step": 30330
    },
    {
      "epoch": 0.00018511962890625,
      "model_forward_time": 0.11629867553710938,
      "step": 30330
    },
    {
      "epoch": 0.00018511962890625,
      "step": 30330,
      "training_step_time": 0.47882533073425293
    },
    {
      "epoch": 0.000185125732421875,
      "model_forward_time": 0.1142737865447998,
      "step": 30331
    },
    {
      "epoch": 0.000185125732421875,
      "step": 30331,
      "training_step_time": 0.4045145511627197
    },
    {
      "epoch": 0.0001851318359375,
      "model_forward_time": 0.11444330215454102,
      "step": 30332
    },
    {
      "epoch": 0.0001851318359375,
      "step": 30332,
      "training_step_time": 0.4087028503417969
    },
    {
      "epoch": 0.000185137939453125,
      "model_forward_time": 0.11469292640686035,
      "step": 30333
    },
    {
      "epoch": 0.000185137939453125,
      "step": 30333,
      "training_step_time": 0.38588690757751465
    },
    {
      "epoch": 0.00018514404296875,
      "model_forward_time": 0.11493206024169922,
      "step": 30334
    },
    {
      "epoch": 0.00018514404296875,
      "step": 30334,
      "training_step_time": 0.4032471179962158
    },
    {
      "epoch": 0.000185150146484375,
      "model_forward_time": 0.11444377899169922,
      "step": 30335
    },
    {
      "epoch": 0.000185150146484375,
      "step": 30335,
      "training_step_time": 0.3953394889831543
    },
    {
      "epoch": 0.00018515625,
      "model_forward_time": 0.11541581153869629,
      "step": 30336
    },
    {
      "epoch": 0.00018515625,
      "step": 30336,
      "training_step_time": 0.424915075302124
    },
    {
      "epoch": 0.000185162353515625,
      "model_forward_time": 0.11594605445861816,
      "step": 30337
    },
    {
      "epoch": 0.000185162353515625,
      "step": 30337,
      "training_step_time": 0.40008974075317383
    },
    {
      "epoch": 0.00018516845703125,
      "model_forward_time": 0.1149454116821289,
      "step": 30338
    },
    {
      "epoch": 0.00018516845703125,
      "step": 30338,
      "training_step_time": 0.3889608383178711
    },
    {
      "epoch": 0.000185174560546875,
      "model_forward_time": 0.11582159996032715,
      "step": 30339
    },
    {
      "epoch": 0.000185174560546875,
      "step": 30339,
      "training_step_time": 0.6778864860534668
    },
    {
      "epoch": 0.0001851806640625,
      "grad_norm": 0.17178142070770264,
      "learning_rate": 5.319453096033896e-05,
      "loss": 0.0425,
      "step": 30340
    },
    {
      "epoch": 0.0001851806640625,
      "model_forward_time": 0.11439919471740723,
      "step": 30340
    },
    {
      "epoch": 0.0001851806640625,
      "step": 30340,
      "training_step_time": 0.40371251106262207
    },
    {
      "epoch": 0.000185186767578125,
      "model_forward_time": 0.11563372611999512,
      "step": 30341
    },
    {
      "epoch": 0.000185186767578125,
      "step": 30341,
      "training_step_time": 0.4480772018432617
    },
    {
      "epoch": 0.00018519287109375,
      "model_forward_time": 0.11480259895324707,
      "step": 30342
    },
    {
      "epoch": 0.00018519287109375,
      "step": 30342,
      "training_step_time": 0.4698045253753662
    },
    {
      "epoch": 0.000185198974609375,
      "model_forward_time": 0.11435961723327637,
      "step": 30343
    },
    {
      "epoch": 0.000185198974609375,
      "step": 30343,
      "training_step_time": 0.36505794525146484
    },
    {
      "epoch": 0.000185205078125,
      "model_forward_time": 0.11534810066223145,
      "step": 30344
    },
    {
      "epoch": 0.000185205078125,
      "step": 30344,
      "training_step_time": 0.47521018981933594
    },
    {
      "epoch": 0.000185211181640625,
      "model_forward_time": 0.11456990242004395,
      "step": 30345
    },
    {
      "epoch": 0.000185211181640625,
      "step": 30345,
      "training_step_time": 0.5051290988922119
    },
    {
      "epoch": 0.00018521728515625,
      "model_forward_time": 0.11506509780883789,
      "step": 30346
    },
    {
      "epoch": 0.00018521728515625,
      "step": 30346,
      "training_step_time": 0.3887975215911865
    },
    {
      "epoch": 0.000185223388671875,
      "model_forward_time": 0.1141977310180664,
      "step": 30347
    },
    {
      "epoch": 0.000185223388671875,
      "step": 30347,
      "training_step_time": 0.3838012218475342
    },
    {
      "epoch": 0.0001852294921875,
      "model_forward_time": 0.11475062370300293,
      "step": 30348
    },
    {
      "epoch": 0.0001852294921875,
      "step": 30348,
      "training_step_time": 0.4443321228027344
    },
    {
      "epoch": 0.000185235595703125,
      "model_forward_time": 0.1148674488067627,
      "step": 30349
    },
    {
      "epoch": 0.000185235595703125,
      "step": 30349,
      "training_step_time": 0.43855905532836914
    },
    {
      "epoch": 0.00018524169921875,
      "grad_norm": 0.13881364464759827,
      "learning_rate": 5.316702894943221e-05,
      "loss": 0.0423,
      "step": 30350
    },
    {
      "epoch": 0.00018524169921875,
      "model_forward_time": 0.1151890754699707,
      "step": 30350
    },
    {
      "epoch": 0.00018524169921875,
      "step": 30350,
      "training_step_time": 0.3828010559082031
    },
    {
      "epoch": 0.000185247802734375,
      "model_forward_time": 0.1145627498626709,
      "step": 30351
    },
    {
      "epoch": 0.000185247802734375,
      "step": 30351,
      "training_step_time": 0.8581380844116211
    },
    {
      "epoch": 0.00018525390625,
      "model_forward_time": 0.11503124237060547,
      "step": 30352
    },
    {
      "epoch": 0.00018525390625,
      "step": 30352,
      "training_step_time": 0.38483262062072754
    },
    {
      "epoch": 0.000185260009765625,
      "model_forward_time": 0.11415362358093262,
      "step": 30353
    },
    {
      "epoch": 0.000185260009765625,
      "step": 30353,
      "training_step_time": 0.3851187229156494
    },
    {
      "epoch": 0.00018526611328125,
      "model_forward_time": 0.11480975151062012,
      "step": 30354
    },
    {
      "epoch": 0.00018526611328125,
      "step": 30354,
      "training_step_time": 0.48687052726745605
    },
    {
      "epoch": 0.000185272216796875,
      "model_forward_time": 0.11394667625427246,
      "step": 30355
    },
    {
      "epoch": 0.000185272216796875,
      "step": 30355,
      "training_step_time": 0.4394083023071289
    },
    {
      "epoch": 0.0001852783203125,
      "model_forward_time": 0.1145467758178711,
      "step": 30356
    },
    {
      "epoch": 0.0001852783203125,
      "step": 30356,
      "training_step_time": 0.4413938522338867
    },
    {
      "epoch": 0.000185284423828125,
      "model_forward_time": 0.11495375633239746,
      "step": 30357
    },
    {
      "epoch": 0.000185284423828125,
      "step": 30357,
      "training_step_time": 0.6897530555725098
    },
    {
      "epoch": 0.00018529052734375,
      "model_forward_time": 0.11488485336303711,
      "step": 30358
    },
    {
      "epoch": 0.00018529052734375,
      "step": 30358,
      "training_step_time": 0.4160606861114502
    },
    {
      "epoch": 0.000185296630859375,
      "model_forward_time": 0.11529684066772461,
      "step": 30359
    },
    {
      "epoch": 0.000185296630859375,
      "step": 30359,
      "training_step_time": 0.3854217529296875
    },
    {
      "epoch": 0.000185302734375,
      "grad_norm": 0.1737188845872879,
      "learning_rate": 5.313952597646568e-05,
      "loss": 0.0489,
      "step": 30360
    },
    {
      "epoch": 0.000185302734375,
      "model_forward_time": 0.11453127861022949,
      "step": 30360
    },
    {
      "epoch": 0.000185302734375,
      "step": 30360,
      "training_step_time": 0.3935546875
    },
    {
      "epoch": 0.000185308837890625,
      "model_forward_time": 0.11506366729736328,
      "step": 30361
    },
    {
      "epoch": 0.000185308837890625,
      "step": 30361,
      "training_step_time": 0.40637993812561035
    },
    {
      "epoch": 0.00018531494140625,
      "model_forward_time": 0.11554813385009766,
      "step": 30362
    },
    {
      "epoch": 0.00018531494140625,
      "step": 30362,
      "training_step_time": 0.3862159252166748
    },
    {
      "epoch": 0.000185321044921875,
      "model_forward_time": 0.11573219299316406,
      "step": 30363
    },
    {
      "epoch": 0.000185321044921875,
      "step": 30363,
      "training_step_time": 0.597327470779419
    },
    {
      "epoch": 0.0001853271484375,
      "model_forward_time": 0.11491942405700684,
      "step": 30364
    },
    {
      "epoch": 0.0001853271484375,
      "step": 30364,
      "training_step_time": 0.41550683975219727
    },
    {
      "epoch": 0.000185333251953125,
      "model_forward_time": 0.11525392532348633,
      "step": 30365
    },
    {
      "epoch": 0.000185333251953125,
      "step": 30365,
      "training_step_time": 0.38707947731018066
    },
    {
      "epoch": 0.00018533935546875,
      "model_forward_time": 0.11561918258666992,
      "step": 30366
    },
    {
      "epoch": 0.00018533935546875,
      "step": 30366,
      "training_step_time": 0.3931758403778076
    },
    {
      "epoch": 0.000185345458984375,
      "model_forward_time": 0.11478209495544434,
      "step": 30367
    },
    {
      "epoch": 0.000185345458984375,
      "step": 30367,
      "training_step_time": 0.45969104766845703
    },
    {
      "epoch": 0.0001853515625,
      "model_forward_time": 0.11514043807983398,
      "step": 30368
    },
    {
      "epoch": 0.0001853515625,
      "step": 30368,
      "training_step_time": 0.4782242774963379
    },
    {
      "epoch": 0.000185357666015625,
      "model_forward_time": 0.1150515079498291,
      "step": 30369
    },
    {
      "epoch": 0.000185357666015625,
      "step": 30369,
      "training_step_time": 0.7349457740783691
    },
    {
      "epoch": 0.00018536376953125,
      "grad_norm": 0.12450167536735535,
      "learning_rate": 5.311202204979404e-05,
      "loss": 0.0481,
      "step": 30370
    },
    {
      "epoch": 0.00018536376953125,
      "model_forward_time": 0.11426687240600586,
      "step": 30370
    },
    {
      "epoch": 0.00018536376953125,
      "step": 30370,
      "training_step_time": 0.45423197746276855
    },
    {
      "epoch": 0.000185369873046875,
      "model_forward_time": 0.11488938331604004,
      "step": 30371
    },
    {
      "epoch": 0.000185369873046875,
      "step": 30371,
      "training_step_time": 0.48700881004333496
    },
    {
      "epoch": 0.0001853759765625,
      "model_forward_time": 0.11503791809082031,
      "step": 30372
    },
    {
      "epoch": 0.0001853759765625,
      "step": 30372,
      "training_step_time": 0.376162052154541
    },
    {
      "epoch": 0.000185382080078125,
      "model_forward_time": 0.11380124092102051,
      "step": 30373
    },
    {
      "epoch": 0.000185382080078125,
      "step": 30373,
      "training_step_time": 0.41061854362487793
    },
    {
      "epoch": 0.00018538818359375,
      "model_forward_time": 0.11423540115356445,
      "step": 30374
    },
    {
      "epoch": 0.00018538818359375,
      "step": 30374,
      "training_step_time": 0.3976156711578369
    },
    {
      "epoch": 0.000185394287109375,
      "model_forward_time": 0.11432051658630371,
      "step": 30375
    },
    {
      "epoch": 0.000185394287109375,
      "step": 30375,
      "training_step_time": 0.5061476230621338
    },
    {
      "epoch": 0.000185400390625,
      "model_forward_time": 0.11609101295471191,
      "step": 30376
    },
    {
      "epoch": 0.000185400390625,
      "step": 30376,
      "training_step_time": 0.38834381103515625
    },
    {
      "epoch": 0.000185406494140625,
      "model_forward_time": 0.1152200698852539,
      "step": 30377
    },
    {
      "epoch": 0.000185406494140625,
      "step": 30377,
      "training_step_time": 0.37999796867370605
    },
    {
      "epoch": 0.00018541259765625,
      "model_forward_time": 0.11559724807739258,
      "step": 30378
    },
    {
      "epoch": 0.00018541259765625,
      "step": 30378,
      "training_step_time": 0.39640212059020996
    },
    {
      "epoch": 0.000185418701171875,
      "model_forward_time": 0.11565732955932617,
      "step": 30379
    },
    {
      "epoch": 0.000185418701171875,
      "step": 30379,
      "training_step_time": 0.396090030670166
    },
    {
      "epoch": 0.0001854248046875,
      "grad_norm": 0.20562683045864105,
      "learning_rate": 5.308451717777228e-05,
      "loss": 0.0479,
      "step": 30380
    },
    {
      "epoch": 0.0001854248046875,
      "model_forward_time": 0.11496162414550781,
      "step": 30380
    },
    {
      "epoch": 0.0001854248046875,
      "step": 30380,
      "training_step_time": 0.4051783084869385
    },
    {
      "epoch": 0.000185430908203125,
      "model_forward_time": 0.11562347412109375,
      "step": 30381
    },
    {
      "epoch": 0.000185430908203125,
      "step": 30381,
      "training_step_time": 0.9196364879608154
    },
    {
      "epoch": 0.00018543701171875,
      "model_forward_time": 0.11465167999267578,
      "step": 30382
    },
    {
      "epoch": 0.00018543701171875,
      "step": 30382,
      "training_step_time": 0.4259645938873291
    },
    {
      "epoch": 0.000185443115234375,
      "model_forward_time": 0.11472296714782715,
      "step": 30383
    },
    {
      "epoch": 0.000185443115234375,
      "step": 30383,
      "training_step_time": 0.444080114364624
    },
    {
      "epoch": 0.00018544921875,
      "model_forward_time": 0.11454367637634277,
      "step": 30384
    },
    {
      "epoch": 0.00018544921875,
      "step": 30384,
      "training_step_time": 0.45053863525390625
    },
    {
      "epoch": 0.000185455322265625,
      "model_forward_time": 0.1144859790802002,
      "step": 30385
    },
    {
      "epoch": 0.000185455322265625,
      "step": 30385,
      "training_step_time": 0.46547460556030273
    },
    {
      "epoch": 0.00018546142578125,
      "model_forward_time": 0.11443662643432617,
      "step": 30386
    },
    {
      "epoch": 0.00018546142578125,
      "step": 30386,
      "training_step_time": 0.42627954483032227
    },
    {
      "epoch": 0.000185467529296875,
      "model_forward_time": 0.11428546905517578,
      "step": 30387
    },
    {
      "epoch": 0.000185467529296875,
      "step": 30387,
      "training_step_time": 0.3904123306274414
    },
    {
      "epoch": 0.0001854736328125,
      "model_forward_time": 0.11491250991821289,
      "step": 30388
    },
    {
      "epoch": 0.0001854736328125,
      "step": 30388,
      "training_step_time": 0.38788414001464844
    },
    {
      "epoch": 0.000185479736328125,
      "model_forward_time": 0.11499381065368652,
      "step": 30389
    },
    {
      "epoch": 0.000185479736328125,
      "step": 30389,
      "training_step_time": 0.38968515396118164
    },
    {
      "epoch": 0.00018548583984375,
      "grad_norm": 0.17645682394504547,
      "learning_rate": 5.305701136875566e-05,
      "loss": 0.0465,
      "step": 30390
    },
    {
      "epoch": 0.00018548583984375,
      "model_forward_time": 0.11556029319763184,
      "step": 30390
    },
    {
      "epoch": 0.00018548583984375,
      "step": 30390,
      "training_step_time": 0.4015820026397705
    },
    {
      "epoch": 0.000185491943359375,
      "model_forward_time": 0.11590027809143066,
      "step": 30391
    },
    {
      "epoch": 0.000185491943359375,
      "step": 30391,
      "training_step_time": 0.40769529342651367
    },
    {
      "epoch": 0.000185498046875,
      "model_forward_time": 0.11564016342163086,
      "step": 30392
    },
    {
      "epoch": 0.000185498046875,
      "step": 30392,
      "training_step_time": 0.39275312423706055
    },
    {
      "epoch": 0.000185504150390625,
      "model_forward_time": 0.11532211303710938,
      "step": 30393
    },
    {
      "epoch": 0.000185504150390625,
      "step": 30393,
      "training_step_time": 0.5503261089324951
    },
    {
      "epoch": 0.00018551025390625,
      "model_forward_time": 0.11501741409301758,
      "step": 30394
    },
    {
      "epoch": 0.00018551025390625,
      "step": 30394,
      "training_step_time": 0.40059852600097656
    },
    {
      "epoch": 0.000185516357421875,
      "model_forward_time": 0.11485886573791504,
      "step": 30395
    },
    {
      "epoch": 0.000185516357421875,
      "step": 30395,
      "training_step_time": 0.42632365226745605
    },
    {
      "epoch": 0.0001855224609375,
      "model_forward_time": 0.11568880081176758,
      "step": 30396
    },
    {
      "epoch": 0.0001855224609375,
      "step": 30396,
      "training_step_time": 0.4399991035461426
    },
    {
      "epoch": 0.000185528564453125,
      "model_forward_time": 0.11454129219055176,
      "step": 30397
    },
    {
      "epoch": 0.000185528564453125,
      "step": 30397,
      "training_step_time": 0.36521172523498535
    },
    {
      "epoch": 0.00018553466796875,
      "model_forward_time": 0.11604642868041992,
      "step": 30398
    },
    {
      "epoch": 0.00018553466796875,
      "step": 30398,
      "training_step_time": 0.4588496685028076
    },
    {
      "epoch": 0.000185540771484375,
      "model_forward_time": 0.11547350883483887,
      "step": 30399
    },
    {
      "epoch": 0.000185540771484375,
      "step": 30399,
      "training_step_time": 0.5745439529418945
    },
    {
      "epoch": 0.000185546875,
      "grad_norm": 0.14940892159938812,
      "learning_rate": 5.3029504631099694e-05,
      "loss": 0.0441,
      "step": 30400
    },
    {
      "epoch": 0.000185546875,
      "model_forward_time": 0.11478662490844727,
      "step": 30400
    },
    {
      "epoch": 0.000185546875,
      "step": 30400,
      "training_step_time": 0.39926838874816895
    },
    {
      "epoch": 0.000185552978515625,
      "model_forward_time": 0.11441564559936523,
      "step": 30401
    },
    {
      "epoch": 0.000185552978515625,
      "step": 30401,
      "training_step_time": 0.3934774398803711
    },
    {
      "epoch": 0.00018555908203125,
      "model_forward_time": 0.11495232582092285,
      "step": 30402
    },
    {
      "epoch": 0.00018555908203125,
      "step": 30402,
      "training_step_time": 0.40442538261413574
    },
    {
      "epoch": 0.000185565185546875,
      "model_forward_time": 0.11462259292602539,
      "step": 30403
    },
    {
      "epoch": 0.000185565185546875,
      "step": 30403,
      "training_step_time": 0.3959813117980957
    },
    {
      "epoch": 0.0001855712890625,
      "model_forward_time": 0.11559104919433594,
      "step": 30404
    },
    {
      "epoch": 0.0001855712890625,
      "step": 30404,
      "training_step_time": 0.39441680908203125
    },
    {
      "epoch": 0.000185577392578125,
      "model_forward_time": 0.11610054969787598,
      "step": 30405
    },
    {
      "epoch": 0.000185577392578125,
      "step": 30405,
      "training_step_time": 0.5118167400360107
    },
    {
      "epoch": 0.00018558349609375,
      "model_forward_time": 0.11528992652893066,
      "step": 30406
    },
    {
      "epoch": 0.00018558349609375,
      "step": 30406,
      "training_step_time": 0.4114987850189209
    },
    {
      "epoch": 0.000185589599609375,
      "model_forward_time": 0.11525678634643555,
      "step": 30407
    },
    {
      "epoch": 0.000185589599609375,
      "step": 30407,
      "training_step_time": 0.39623308181762695
    },
    {
      "epoch": 0.000185595703125,
      "model_forward_time": 0.11537623405456543,
      "step": 30408
    },
    {
      "epoch": 0.000185595703125,
      "step": 30408,
      "training_step_time": 0.42648959159851074
    },
    {
      "epoch": 0.000185601806640625,
      "model_forward_time": 0.11631011962890625,
      "step": 30409
    },
    {
      "epoch": 0.000185601806640625,
      "step": 30409,
      "training_step_time": 0.4507181644439697
    },
    {
      "epoch": 0.00018560791015625,
      "grad_norm": 0.14073611795902252,
      "learning_rate": 5.3001996973160206e-05,
      "loss": 0.0429,
      "step": 30410
    },
    {
      "epoch": 0.00018560791015625,
      "model_forward_time": 0.11467838287353516,
      "step": 30410
    },
    {
      "epoch": 0.00018560791015625,
      "step": 30410,
      "training_step_time": 0.39922356605529785
    },
    {
      "epoch": 0.000185614013671875,
      "model_forward_time": 0.11481404304504395,
      "step": 30411
    },
    {
      "epoch": 0.000185614013671875,
      "step": 30411,
      "training_step_time": 0.43752598762512207
    },
    {
      "epoch": 0.0001856201171875,
      "model_forward_time": 0.11479520797729492,
      "step": 30412
    },
    {
      "epoch": 0.0001856201171875,
      "step": 30412,
      "training_step_time": 0.4517960548400879
    },
    {
      "epoch": 0.000185626220703125,
      "model_forward_time": 0.11560630798339844,
      "step": 30413
    },
    {
      "epoch": 0.000185626220703125,
      "step": 30413,
      "training_step_time": 0.40721583366394043
    },
    {
      "epoch": 0.00018563232421875,
      "model_forward_time": 0.1156001091003418,
      "step": 30414
    },
    {
      "epoch": 0.00018563232421875,
      "step": 30414,
      "training_step_time": 0.39743804931640625
    },
    {
      "epoch": 0.000185638427734375,
      "model_forward_time": 0.1154320240020752,
      "step": 30415
    },
    {
      "epoch": 0.000185638427734375,
      "step": 30415,
      "training_step_time": 0.39064931869506836
    },
    {
      "epoch": 0.00018564453125,
      "model_forward_time": 0.11555933952331543,
      "step": 30416
    },
    {
      "epoch": 0.00018564453125,
      "step": 30416,
      "training_step_time": 0.382828950881958
    },
    {
      "epoch": 0.000185650634765625,
      "model_forward_time": 0.11557936668395996,
      "step": 30417
    },
    {
      "epoch": 0.000185650634765625,
      "step": 30417,
      "training_step_time": 0.43645668029785156
    },
    {
      "epoch": 0.00018565673828125,
      "model_forward_time": 0.11640644073486328,
      "step": 30418
    },
    {
      "epoch": 0.00018565673828125,
      "step": 30418,
      "training_step_time": 0.4103507995605469
    },
    {
      "epoch": 0.000185662841796875,
      "model_forward_time": 0.11446046829223633,
      "step": 30419
    },
    {
      "epoch": 0.000185662841796875,
      "step": 30419,
      "training_step_time": 0.3977081775665283
    },
    {
      "epoch": 0.0001856689453125,
      "grad_norm": 0.18543235957622528,
      "learning_rate": 5.297448840329329e-05,
      "loss": 0.0464,
      "step": 30420
    },
    {
      "epoch": 0.0001856689453125,
      "model_forward_time": 0.11529755592346191,
      "step": 30420
    },
    {
      "epoch": 0.0001856689453125,
      "step": 30420,
      "training_step_time": 0.4026491641998291
    },
    {
      "epoch": 0.000185675048828125,
      "model_forward_time": 0.11569666862487793,
      "step": 30421
    },
    {
      "epoch": 0.000185675048828125,
      "step": 30421,
      "training_step_time": 0.49779415130615234
    },
    {
      "epoch": 0.00018568115234375,
      "model_forward_time": 0.11471343040466309,
      "step": 30422
    },
    {
      "epoch": 0.00018568115234375,
      "step": 30422,
      "training_step_time": 0.4110729694366455
    },
    {
      "epoch": 0.000185687255859375,
      "model_forward_time": 0.11566615104675293,
      "step": 30423
    },
    {
      "epoch": 0.000185687255859375,
      "step": 30423,
      "training_step_time": 0.44727063179016113
    },
    {
      "epoch": 0.000185693359375,
      "model_forward_time": 0.11568903923034668,
      "step": 30424
    },
    {
      "epoch": 0.000185693359375,
      "step": 30424,
      "training_step_time": 0.4821507930755615
    },
    {
      "epoch": 0.000185699462890625,
      "model_forward_time": 0.11583995819091797,
      "step": 30425
    },
    {
      "epoch": 0.000185699462890625,
      "step": 30425,
      "training_step_time": 0.4488053321838379
    },
    {
      "epoch": 0.00018570556640625,
      "model_forward_time": 0.11522698402404785,
      "step": 30426
    },
    {
      "epoch": 0.00018570556640625,
      "step": 30426,
      "training_step_time": 0.445070743560791
    },
    {
      "epoch": 0.000185711669921875,
      "model_forward_time": 0.11550021171569824,
      "step": 30427
    },
    {
      "epoch": 0.000185711669921875,
      "step": 30427,
      "training_step_time": 0.4850335121154785
    },
    {
      "epoch": 0.0001857177734375,
      "model_forward_time": 0.11506891250610352,
      "step": 30428
    },
    {
      "epoch": 0.0001857177734375,
      "step": 30428,
      "training_step_time": 0.393169641494751
    },
    {
      "epoch": 0.000185723876953125,
      "model_forward_time": 0.11428570747375488,
      "step": 30429
    },
    {
      "epoch": 0.000185723876953125,
      "step": 30429,
      "training_step_time": 0.39187073707580566
    },
    {
      "epoch": 0.00018572998046875,
      "grad_norm": 0.15760445594787598,
      "learning_rate": 5.294697892985534e-05,
      "loss": 0.0453,
      "step": 30430
    },
    {
      "epoch": 0.00018572998046875,
      "model_forward_time": 0.11458683013916016,
      "step": 30430
    },
    {
      "epoch": 0.00018572998046875,
      "step": 30430,
      "training_step_time": 0.4001641273498535
    },
    {
      "epoch": 0.000185736083984375,
      "model_forward_time": 0.11529231071472168,
      "step": 30431
    },
    {
      "epoch": 0.000185736083984375,
      "step": 30431,
      "training_step_time": 0.39129137992858887
    },
    {
      "epoch": 0.0001857421875,
      "model_forward_time": 0.11458730697631836,
      "step": 30432
    },
    {
      "epoch": 0.0001857421875,
      "step": 30432,
      "training_step_time": 0.3961012363433838
    },
    {
      "epoch": 0.000185748291015625,
      "model_forward_time": 0.11508417129516602,
      "step": 30433
    },
    {
      "epoch": 0.000185748291015625,
      "step": 30433,
      "training_step_time": 0.39488649368286133
    },
    {
      "epoch": 0.00018575439453125,
      "model_forward_time": 0.11577987670898438,
      "step": 30434
    },
    {
      "epoch": 0.00018575439453125,
      "step": 30434,
      "training_step_time": 0.4050464630126953
    },
    {
      "epoch": 0.000185760498046875,
      "model_forward_time": 0.11587882041931152,
      "step": 30435
    },
    {
      "epoch": 0.000185760498046875,
      "step": 30435,
      "training_step_time": 0.3949732780456543
    },
    {
      "epoch": 0.0001857666015625,
      "model_forward_time": 0.1151285171508789,
      "step": 30436
    },
    {
      "epoch": 0.0001857666015625,
      "step": 30436,
      "training_step_time": 0.38593149185180664
    },
    {
      "epoch": 0.000185772705078125,
      "model_forward_time": 0.1159353256225586,
      "step": 30437
    },
    {
      "epoch": 0.000185772705078125,
      "step": 30437,
      "training_step_time": 0.390608549118042
    },
    {
      "epoch": 0.00018577880859375,
      "model_forward_time": 0.11574530601501465,
      "step": 30438
    },
    {
      "epoch": 0.00018577880859375,
      "step": 30438,
      "training_step_time": 0.39467644691467285
    },
    {
      "epoch": 0.000185784912109375,
      "model_forward_time": 0.11447715759277344,
      "step": 30439
    },
    {
      "epoch": 0.000185784912109375,
      "step": 30439,
      "training_step_time": 0.4440038204193115
    },
    {
      "epoch": 0.000185791015625,
      "grad_norm": 0.10432295501232147,
      "learning_rate": 5.2919468561203e-05,
      "loss": 0.0482,
      "step": 30440
    },
    {
      "epoch": 0.000185791015625,
      "model_forward_time": 0.11496353149414062,
      "step": 30440
    },
    {
      "epoch": 0.000185791015625,
      "step": 30440,
      "training_step_time": 0.3627502918243408
    },
    {
      "epoch": 0.000185797119140625,
      "model_forward_time": 0.11507296562194824,
      "step": 30441
    },
    {
      "epoch": 0.000185797119140625,
      "step": 30441,
      "training_step_time": 0.4920670986175537
    },
    {
      "epoch": 0.00018580322265625,
      "model_forward_time": 0.11492061614990234,
      "step": 30442
    },
    {
      "epoch": 0.00018580322265625,
      "step": 30442,
      "training_step_time": 0.3979766368865967
    },
    {
      "epoch": 0.000185809326171875,
      "model_forward_time": 0.11503028869628906,
      "step": 30443
    },
    {
      "epoch": 0.000185809326171875,
      "step": 30443,
      "training_step_time": 0.39484643936157227
    },
    {
      "epoch": 0.0001858154296875,
      "model_forward_time": 0.11511898040771484,
      "step": 30444
    },
    {
      "epoch": 0.0001858154296875,
      "step": 30444,
      "training_step_time": 0.3934621810913086
    },
    {
      "epoch": 0.000185821533203125,
      "model_forward_time": 0.11492538452148438,
      "step": 30445
    },
    {
      "epoch": 0.000185821533203125,
      "step": 30445,
      "training_step_time": 0.3974165916442871
    },
    {
      "epoch": 0.00018582763671875,
      "model_forward_time": 0.11467528343200684,
      "step": 30446
    },
    {
      "epoch": 0.00018582763671875,
      "step": 30446,
      "training_step_time": 0.3954317569732666
    },
    {
      "epoch": 0.000185833740234375,
      "model_forward_time": 0.11571741104125977,
      "step": 30447
    },
    {
      "epoch": 0.000185833740234375,
      "step": 30447,
      "training_step_time": 0.926274299621582
    },
    {
      "epoch": 0.00018583984375,
      "model_forward_time": 0.11435818672180176,
      "step": 30448
    },
    {
      "epoch": 0.00018583984375,
      "step": 30448,
      "training_step_time": 0.38953423500061035
    },
    {
      "epoch": 0.000185845947265625,
      "model_forward_time": 0.11438560485839844,
      "step": 30449
    },
    {
      "epoch": 0.000185845947265625,
      "step": 30449,
      "training_step_time": 0.39574193954467773
    },
    {
      "epoch": 0.00018585205078125,
      "grad_norm": 0.11782266199588776,
      "learning_rate": 5.2891957305693205e-05,
      "loss": 0.0424,
      "step": 30450
    },
    {
      "epoch": 0.00018585205078125,
      "model_forward_time": 0.11492443084716797,
      "step": 30450
    },
    {
      "epoch": 0.00018585205078125,
      "step": 30450,
      "training_step_time": 0.42728614807128906
    },
    {
      "epoch": 0.000185858154296875,
      "model_forward_time": 0.1144568920135498,
      "step": 30451
    },
    {
      "epoch": 0.000185858154296875,
      "step": 30451,
      "training_step_time": 0.39320802688598633
    },
    {
      "epoch": 0.0001858642578125,
      "model_forward_time": 0.11478734016418457,
      "step": 30452
    },
    {
      "epoch": 0.0001858642578125,
      "step": 30452,
      "training_step_time": 0.4219644069671631
    },
    {
      "epoch": 0.000185870361328125,
      "model_forward_time": 0.11479043960571289,
      "step": 30453
    },
    {
      "epoch": 0.000185870361328125,
      "step": 30453,
      "training_step_time": 0.5395064353942871
    },
    {
      "epoch": 0.00018587646484375,
      "model_forward_time": 0.11498665809631348,
      "step": 30454
    },
    {
      "epoch": 0.00018587646484375,
      "step": 30454,
      "training_step_time": 0.40240049362182617
    },
    {
      "epoch": 0.000185882568359375,
      "model_forward_time": 0.11549496650695801,
      "step": 30455
    },
    {
      "epoch": 0.000185882568359375,
      "step": 30455,
      "training_step_time": 0.48818254470825195
    },
    {
      "epoch": 0.000185888671875,
      "model_forward_time": 0.11587309837341309,
      "step": 30456
    },
    {
      "epoch": 0.000185888671875,
      "step": 30456,
      "training_step_time": 0.42221975326538086
    },
    {
      "epoch": 0.000185894775390625,
      "model_forward_time": 0.114959716796875,
      "step": 30457
    },
    {
      "epoch": 0.000185894775390625,
      "step": 30457,
      "training_step_time": 0.3859069347381592
    },
    {
      "epoch": 0.00018590087890625,
      "model_forward_time": 0.11521792411804199,
      "step": 30458
    },
    {
      "epoch": 0.00018590087890625,
      "step": 30458,
      "training_step_time": 0.39156246185302734
    },
    {
      "epoch": 0.000185906982421875,
      "model_forward_time": 0.11547303199768066,
      "step": 30459
    },
    {
      "epoch": 0.000185906982421875,
      "step": 30459,
      "training_step_time": 0.6083323955535889
    },
    {
      "epoch": 0.0001859130859375,
      "grad_norm": 0.12493208795785904,
      "learning_rate": 5.286444517168313e-05,
      "loss": 0.0469,
      "step": 30460
    },
    {
      "epoch": 0.0001859130859375,
      "model_forward_time": 0.11484122276306152,
      "step": 30460
    },
    {
      "epoch": 0.0001859130859375,
      "step": 30460,
      "training_step_time": 0.4013967514038086
    },
    {
      "epoch": 0.000185919189453125,
      "model_forward_time": 0.11563491821289062,
      "step": 30461
    },
    {
      "epoch": 0.000185919189453125,
      "step": 30461,
      "training_step_time": 0.3844151496887207
    },
    {
      "epoch": 0.00018592529296875,
      "model_forward_time": 0.11527252197265625,
      "step": 30462
    },
    {
      "epoch": 0.00018592529296875,
      "step": 30462,
      "training_step_time": 0.3875465393066406
    },
    {
      "epoch": 0.000185931396484375,
      "model_forward_time": 0.11504960060119629,
      "step": 30463
    },
    {
      "epoch": 0.000185931396484375,
      "step": 30463,
      "training_step_time": 0.40985107421875
    },
    {
      "epoch": 0.0001859375,
      "model_forward_time": 0.11528182029724121,
      "step": 30464
    },
    {
      "epoch": 0.0001859375,
      "step": 30464,
      "training_step_time": 0.42182421684265137
    },
    {
      "epoch": 0.000185943603515625,
      "model_forward_time": 0.11573147773742676,
      "step": 30465
    },
    {
      "epoch": 0.000185943603515625,
      "step": 30465,
      "training_step_time": 1.2461929321289062
    },
    {
      "epoch": 0.00018594970703125,
      "model_forward_time": 0.1139528751373291,
      "step": 30466
    },
    {
      "epoch": 0.00018594970703125,
      "step": 30466,
      "training_step_time": 0.40176916122436523
    },
    {
      "epoch": 0.000185955810546875,
      "model_forward_time": 0.11439728736877441,
      "step": 30467
    },
    {
      "epoch": 0.000185955810546875,
      "step": 30467,
      "training_step_time": 0.3614218235015869
    },
    {
      "epoch": 0.0001859619140625,
      "model_forward_time": 0.11416339874267578,
      "step": 30468
    },
    {
      "epoch": 0.0001859619140625,
      "step": 30468,
      "training_step_time": 0.38338303565979004
    },
    {
      "epoch": 0.000185968017578125,
      "model_forward_time": 0.11401104927062988,
      "step": 30469
    },
    {
      "epoch": 0.000185968017578125,
      "step": 30469,
      "training_step_time": 0.46782827377319336
    },
    {
      "epoch": 0.00018597412109375,
      "grad_norm": 0.12844954431056976,
      "learning_rate": 5.283693216753025e-05,
      "loss": 0.0422,
      "step": 30470
    },
    {
      "epoch": 0.00018597412109375,
      "model_forward_time": 0.11412525177001953,
      "step": 30470
    },
    {
      "epoch": 0.00018597412109375,
      "step": 30470,
      "training_step_time": 0.3843817710876465
    },
    {
      "epoch": 0.000185980224609375,
      "model_forward_time": 0.11430168151855469,
      "step": 30471
    },
    {
      "epoch": 0.000185980224609375,
      "step": 30471,
      "training_step_time": 0.621166467666626
    },
    {
      "epoch": 0.000185986328125,
      "model_forward_time": 0.11442160606384277,
      "step": 30472
    },
    {
      "epoch": 0.000185986328125,
      "step": 30472,
      "training_step_time": 0.38733410835266113
    },
    {
      "epoch": 0.000185992431640625,
      "model_forward_time": 0.11440825462341309,
      "step": 30473
    },
    {
      "epoch": 0.000185992431640625,
      "step": 30473,
      "training_step_time": 0.3823075294494629
    },
    {
      "epoch": 0.00018599853515625,
      "model_forward_time": 0.1148836612701416,
      "step": 30474
    },
    {
      "epoch": 0.00018599853515625,
      "step": 30474,
      "training_step_time": 0.39322757720947266
    },
    {
      "epoch": 0.000186004638671875,
      "model_forward_time": 0.1154935359954834,
      "step": 30475
    },
    {
      "epoch": 0.000186004638671875,
      "step": 30475,
      "training_step_time": 0.389909029006958
    },
    {
      "epoch": 0.0001860107421875,
      "model_forward_time": 0.11549234390258789,
      "step": 30476
    },
    {
      "epoch": 0.0001860107421875,
      "step": 30476,
      "training_step_time": 0.43900132179260254
    },
    {
      "epoch": 0.000186016845703125,
      "model_forward_time": 0.11504364013671875,
      "step": 30477
    },
    {
      "epoch": 0.000186016845703125,
      "step": 30477,
      "training_step_time": 0.7589106559753418
    },
    {
      "epoch": 0.00018602294921875,
      "model_forward_time": 0.11427998542785645,
      "step": 30478
    },
    {
      "epoch": 0.00018602294921875,
      "step": 30478,
      "training_step_time": 0.44611072540283203
    },
    {
      "epoch": 0.000186029052734375,
      "model_forward_time": 0.11431598663330078,
      "step": 30479
    },
    {
      "epoch": 0.000186029052734375,
      "step": 30479,
      "training_step_time": 0.39763355255126953
    },
    {
      "epoch": 0.00018603515625,
      "grad_norm": 0.14251039922237396,
      "learning_rate": 5.280941830159227e-05,
      "loss": 0.0443,
      "step": 30480
    },
    {
      "epoch": 0.00018603515625,
      "model_forward_time": 0.11383533477783203,
      "step": 30480
    },
    {
      "epoch": 0.00018603515625,
      "step": 30480,
      "training_step_time": 0.4231235980987549
    },
    {
      "epoch": 0.000186041259765625,
      "model_forward_time": 0.11546039581298828,
      "step": 30481
    },
    {
      "epoch": 0.000186041259765625,
      "step": 30481,
      "training_step_time": 0.4152536392211914
    },
    {
      "epoch": 0.00018604736328125,
      "model_forward_time": 0.11433815956115723,
      "step": 30482
    },
    {
      "epoch": 0.00018604736328125,
      "step": 30482,
      "training_step_time": 0.45917439460754395
    },
    {
      "epoch": 0.000186053466796875,
      "model_forward_time": 0.11495065689086914,
      "step": 30483
    },
    {
      "epoch": 0.000186053466796875,
      "step": 30483,
      "training_step_time": 0.661881685256958
    },
    {
      "epoch": 0.0001860595703125,
      "model_forward_time": 0.11482882499694824,
      "step": 30484
    },
    {
      "epoch": 0.0001860595703125,
      "step": 30484,
      "training_step_time": 0.3849043846130371
    },
    {
      "epoch": 0.000186065673828125,
      "model_forward_time": 0.11449360847473145,
      "step": 30485
    },
    {
      "epoch": 0.000186065673828125,
      "step": 30485,
      "training_step_time": 0.39114809036254883
    },
    {
      "epoch": 0.00018607177734375,
      "model_forward_time": 0.11473727226257324,
      "step": 30486
    },
    {
      "epoch": 0.00018607177734375,
      "step": 30486,
      "training_step_time": 0.38722920417785645
    },
    {
      "epoch": 0.000186077880859375,
      "model_forward_time": 0.11452913284301758,
      "step": 30487
    },
    {
      "epoch": 0.000186077880859375,
      "step": 30487,
      "training_step_time": 0.39553117752075195
    },
    {
      "epoch": 0.000186083984375,
      "model_forward_time": 0.11519384384155273,
      "step": 30488
    },
    {
      "epoch": 0.000186083984375,
      "step": 30488,
      "training_step_time": 0.406782865524292
    },
    {
      "epoch": 0.000186090087890625,
      "model_forward_time": 0.11493086814880371,
      "step": 30489
    },
    {
      "epoch": 0.000186090087890625,
      "step": 30489,
      "training_step_time": 0.9368634223937988
    },
    {
      "epoch": 0.00018609619140625,
      "grad_norm": 0.1261654496192932,
      "learning_rate": 5.278190358222721e-05,
      "loss": 0.0451,
      "step": 30490
    },
    {
      "epoch": 0.00018609619140625,
      "model_forward_time": 0.11509084701538086,
      "step": 30490
    },
    {
      "epoch": 0.00018609619140625,
      "step": 30490,
      "training_step_time": 0.4009866714477539
    },
    {
      "epoch": 0.000186102294921875,
      "model_forward_time": 0.11471366882324219,
      "step": 30491
    },
    {
      "epoch": 0.000186102294921875,
      "step": 30491,
      "training_step_time": 0.4427318572998047
    },
    {
      "epoch": 0.0001861083984375,
      "model_forward_time": 0.11497187614440918,
      "step": 30492
    },
    {
      "epoch": 0.0001861083984375,
      "step": 30492,
      "training_step_time": 0.4649980068206787
    },
    {
      "epoch": 0.000186114501953125,
      "model_forward_time": 0.11445021629333496,
      "step": 30493
    },
    {
      "epoch": 0.000186114501953125,
      "step": 30493,
      "training_step_time": 0.447068452835083
    },
    {
      "epoch": 0.00018612060546875,
      "model_forward_time": 0.11418890953063965,
      "step": 30494
    },
    {
      "epoch": 0.00018612060546875,
      "step": 30494,
      "training_step_time": 0.43732547760009766
    },
    {
      "epoch": 0.000186126708984375,
      "model_forward_time": 0.11440825462341309,
      "step": 30495
    },
    {
      "epoch": 0.000186126708984375,
      "step": 30495,
      "training_step_time": 0.6821892261505127
    },
    {
      "epoch": 0.0001861328125,
      "model_forward_time": 0.11441707611083984,
      "step": 30496
    },
    {
      "epoch": 0.0001861328125,
      "step": 30496,
      "training_step_time": 0.37858009338378906
    },
    {
      "epoch": 0.000186138916015625,
      "model_forward_time": 0.1143031120300293,
      "step": 30497
    },
    {
      "epoch": 0.000186138916015625,
      "step": 30497,
      "training_step_time": 0.3698008060455322
    },
    {
      "epoch": 0.00018614501953125,
      "model_forward_time": 0.11419820785522461,
      "step": 30498
    },
    {
      "epoch": 0.00018614501953125,
      "step": 30498,
      "training_step_time": 0.37964463233947754
    },
    {
      "epoch": 0.000186151123046875,
      "model_forward_time": 0.1144874095916748,
      "step": 30499
    },
    {
      "epoch": 0.000186151123046875,
      "step": 30499,
      "training_step_time": 0.3800489902496338
    },
    {
      "epoch": 0.0001861572265625,
      "grad_norm": 0.1348700374364853,
      "learning_rate": 5.2754388017793274e-05,
      "loss": 0.0424,
      "step": 30500
    },
    {
      "epoch": 0.0001861572265625,
      "model_forward_time": 0.1154642105102539,
      "step": 30500
    },
    {
      "epoch": 0.0001861572265625,
      "step": 30500,
      "training_step_time": 0.38952088356018066
    },
    {
      "epoch": 0.000186163330078125,
      "model_forward_time": 0.11498165130615234,
      "step": 30501
    },
    {
      "epoch": 0.000186163330078125,
      "step": 30501,
      "training_step_time": 0.6181855201721191
    },
    {
      "epoch": 0.00018616943359375,
      "model_forward_time": 0.11459827423095703,
      "step": 30502
    },
    {
      "epoch": 0.00018616943359375,
      "step": 30502,
      "training_step_time": 0.3989255428314209
    },
    {
      "epoch": 0.000186175537109375,
      "model_forward_time": 0.115203857421875,
      "step": 30503
    },
    {
      "epoch": 0.000186175537109375,
      "step": 30503,
      "training_step_time": 0.4503772258758545
    },
    {
      "epoch": 0.000186181640625,
      "model_forward_time": 0.1151280403137207,
      "step": 30504
    },
    {
      "epoch": 0.000186181640625,
      "step": 30504,
      "training_step_time": 0.48162055015563965
    },
    {
      "epoch": 0.000186187744140625,
      "model_forward_time": 0.11421775817871094,
      "step": 30505
    },
    {
      "epoch": 0.000186187744140625,
      "step": 30505,
      "training_step_time": 0.4209024906158447
    },
    {
      "epoch": 0.00018619384765625,
      "model_forward_time": 0.11495757102966309,
      "step": 30506
    },
    {
      "epoch": 0.00018619384765625,
      "step": 30506,
      "training_step_time": 0.4249405860900879
    },
    {
      "epoch": 0.000186199951171875,
      "model_forward_time": 0.1147301197052002,
      "step": 30507
    },
    {
      "epoch": 0.000186199951171875,
      "step": 30507,
      "training_step_time": 0.5282206535339355
    },
    {
      "epoch": 0.0001862060546875,
      "model_forward_time": 0.11436867713928223,
      "step": 30508
    },
    {
      "epoch": 0.0001862060546875,
      "step": 30508,
      "training_step_time": 0.45325660705566406
    },
    {
      "epoch": 0.000186212158203125,
      "model_forward_time": 0.11476397514343262,
      "step": 30509
    },
    {
      "epoch": 0.000186212158203125,
      "step": 30509,
      "training_step_time": 0.41092586517333984
    },
    {
      "epoch": 0.00018621826171875,
      "grad_norm": 0.08311748504638672,
      "learning_rate": 5.2726871616649e-05,
      "loss": 0.042,
      "step": 30510
    },
    {
      "epoch": 0.00018621826171875,
      "model_forward_time": 0.1145167350769043,
      "step": 30510
    },
    {
      "epoch": 0.00018621826171875,
      "step": 30510,
      "training_step_time": 0.41234612464904785
    },
    {
      "epoch": 0.000186224365234375,
      "model_forward_time": 0.11463356018066406,
      "step": 30511
    },
    {
      "epoch": 0.000186224365234375,
      "step": 30511,
      "training_step_time": 0.3897514343261719
    },
    {
      "epoch": 0.00018623046875,
      "model_forward_time": 0.11472892761230469,
      "step": 30512
    },
    {
      "epoch": 0.00018623046875,
      "step": 30512,
      "training_step_time": 0.4354407787322998
    },
    {
      "epoch": 0.000186236572265625,
      "model_forward_time": 0.11448121070861816,
      "step": 30513
    },
    {
      "epoch": 0.000186236572265625,
      "step": 30513,
      "training_step_time": 0.5059816837310791
    },
    {
      "epoch": 0.00018624267578125,
      "model_forward_time": 0.11450338363647461,
      "step": 30514
    },
    {
      "epoch": 0.00018624267578125,
      "step": 30514,
      "training_step_time": 0.3920309543609619
    },
    {
      "epoch": 0.000186248779296875,
      "model_forward_time": 0.11417555809020996,
      "step": 30515
    },
    {
      "epoch": 0.000186248779296875,
      "step": 30515,
      "training_step_time": 0.37639570236206055
    },
    {
      "epoch": 0.0001862548828125,
      "model_forward_time": 0.11507272720336914,
      "step": 30516
    },
    {
      "epoch": 0.0001862548828125,
      "step": 30516,
      "training_step_time": 0.46744298934936523
    },
    {
      "epoch": 0.000186260986328125,
      "model_forward_time": 0.11492538452148438,
      "step": 30517
    },
    {
      "epoch": 0.000186260986328125,
      "step": 30517,
      "training_step_time": 0.46459364891052246
    },
    {
      "epoch": 0.00018626708984375,
      "model_forward_time": 0.11572456359863281,
      "step": 30518
    },
    {
      "epoch": 0.00018626708984375,
      "step": 30518,
      "training_step_time": 0.3930633068084717
    },
    {
      "epoch": 0.000186273193359375,
      "model_forward_time": 0.11478066444396973,
      "step": 30519
    },
    {
      "epoch": 0.000186273193359375,
      "step": 30519,
      "training_step_time": 1.099029541015625
    },
    {
      "epoch": 0.000186279296875,
      "grad_norm": 0.09428297728300095,
      "learning_rate": 5.269935438715312e-05,
      "loss": 0.0434,
      "step": 30520
    },
    {
      "epoch": 0.000186279296875,
      "model_forward_time": 0.11492633819580078,
      "step": 30520
    },
    {
      "epoch": 0.000186279296875,
      "step": 30520,
      "training_step_time": 0.4009239673614502
    },
    {
      "epoch": 0.000186285400390625,
      "model_forward_time": 0.11414527893066406,
      "step": 30521
    },
    {
      "epoch": 0.000186285400390625,
      "step": 30521,
      "training_step_time": 0.4637720584869385
    },
    {
      "epoch": 0.00018629150390625,
      "model_forward_time": 0.11376714706420898,
      "step": 30522
    },
    {
      "epoch": 0.00018629150390625,
      "step": 30522,
      "training_step_time": 0.3788924217224121
    },
    {
      "epoch": 0.000186297607421875,
      "model_forward_time": 0.11394762992858887,
      "step": 30523
    },
    {
      "epoch": 0.000186297607421875,
      "step": 30523,
      "training_step_time": 0.38553738594055176
    },
    {
      "epoch": 0.0001863037109375,
      "model_forward_time": 0.11416292190551758,
      "step": 30524
    },
    {
      "epoch": 0.0001863037109375,
      "step": 30524,
      "training_step_time": 0.3840758800506592
    },
    {
      "epoch": 0.000186309814453125,
      "model_forward_time": 0.11462759971618652,
      "step": 30525
    },
    {
      "epoch": 0.000186309814453125,
      "step": 30525,
      "training_step_time": 0.5229606628417969
    },
    {
      "epoch": 0.00018631591796875,
      "model_forward_time": 0.11457300186157227,
      "step": 30526
    },
    {
      "epoch": 0.00018631591796875,
      "step": 30526,
      "training_step_time": 0.3917672634124756
    },
    {
      "epoch": 0.000186322021484375,
      "model_forward_time": 0.11450481414794922,
      "step": 30527
    },
    {
      "epoch": 0.000186322021484375,
      "step": 30527,
      "training_step_time": 0.38072919845581055
    },
    {
      "epoch": 0.000186328125,
      "model_forward_time": 0.11576676368713379,
      "step": 30528
    },
    {
      "epoch": 0.000186328125,
      "step": 30528,
      "training_step_time": 0.3966636657714844
    },
    {
      "epoch": 0.000186334228515625,
      "model_forward_time": 0.11527681350708008,
      "step": 30529
    },
    {
      "epoch": 0.000186334228515625,
      "step": 30529,
      "training_step_time": 0.40497732162475586
    },
    {
      "epoch": 0.00018634033203125,
      "grad_norm": 0.1746889352798462,
      "learning_rate": 5.2671836337664634e-05,
      "loss": 0.0488,
      "step": 30530
    },
    {
      "epoch": 0.00018634033203125,
      "model_forward_time": 0.1156320571899414,
      "step": 30530
    },
    {
      "epoch": 0.00018634033203125,
      "step": 30530,
      "training_step_time": 0.4749886989593506
    },
    {
      "epoch": 0.000186346435546875,
      "model_forward_time": 0.11577272415161133,
      "step": 30531
    },
    {
      "epoch": 0.000186346435546875,
      "step": 30531,
      "training_step_time": 0.440291166305542
    },
    {
      "epoch": 0.0001863525390625,
      "model_forward_time": 0.11485791206359863,
      "step": 30532
    },
    {
      "epoch": 0.0001863525390625,
      "step": 30532,
      "training_step_time": 0.4206094741821289
    },
    {
      "epoch": 0.000186358642578125,
      "model_forward_time": 0.11548233032226562,
      "step": 30533
    },
    {
      "epoch": 0.000186358642578125,
      "step": 30533,
      "training_step_time": 0.4323906898498535
    },
    {
      "epoch": 0.00018636474609375,
      "model_forward_time": 0.11536002159118652,
      "step": 30534
    },
    {
      "epoch": 0.00018636474609375,
      "step": 30534,
      "training_step_time": 0.4580416679382324
    },
    {
      "epoch": 0.000186370849609375,
      "model_forward_time": 0.11595702171325684,
      "step": 30535
    },
    {
      "epoch": 0.000186370849609375,
      "step": 30535,
      "training_step_time": 0.4055163860321045
    },
    {
      "epoch": 0.000186376953125,
      "model_forward_time": 0.11619377136230469,
      "step": 30536
    },
    {
      "epoch": 0.000186376953125,
      "step": 30536,
      "training_step_time": 0.39748406410217285
    },
    {
      "epoch": 0.000186383056640625,
      "model_forward_time": 0.11516141891479492,
      "step": 30537
    },
    {
      "epoch": 0.000186383056640625,
      "step": 30537,
      "training_step_time": 0.45484113693237305
    },
    {
      "epoch": 0.00018638916015625,
      "model_forward_time": 0.1148533821105957,
      "step": 30538
    },
    {
      "epoch": 0.00018638916015625,
      "step": 30538,
      "training_step_time": 0.418445348739624
    },
    {
      "epoch": 0.000186395263671875,
      "model_forward_time": 0.11513280868530273,
      "step": 30539
    },
    {
      "epoch": 0.000186395263671875,
      "step": 30539,
      "training_step_time": 0.3931872844696045
    },
    {
      "epoch": 0.0001864013671875,
      "grad_norm": 0.15700417757034302,
      "learning_rate": 5.264431747654284e-05,
      "loss": 0.0432,
      "step": 30540
    },
    {
      "epoch": 0.0001864013671875,
      "model_forward_time": 0.11460185050964355,
      "step": 30540
    },
    {
      "epoch": 0.0001864013671875,
      "step": 30540,
      "training_step_time": 0.4020824432373047
    },
    {
      "epoch": 0.000186407470703125,
      "model_forward_time": 0.1156761646270752,
      "step": 30541
    },
    {
      "epoch": 0.000186407470703125,
      "step": 30541,
      "training_step_time": 0.4060993194580078
    },
    {
      "epoch": 0.00018641357421875,
      "model_forward_time": 0.11452150344848633,
      "step": 30542
    },
    {
      "epoch": 0.00018641357421875,
      "step": 30542,
      "training_step_time": 0.3871421813964844
    },
    {
      "epoch": 0.000186419677734375,
      "model_forward_time": 0.11527514457702637,
      "step": 30543
    },
    {
      "epoch": 0.000186419677734375,
      "step": 30543,
      "training_step_time": 0.7576615810394287
    },
    {
      "epoch": 0.00018642578125,
      "model_forward_time": 0.11477351188659668,
      "step": 30544
    },
    {
      "epoch": 0.00018642578125,
      "step": 30544,
      "training_step_time": 0.39530515670776367
    },
    {
      "epoch": 0.000186431884765625,
      "model_forward_time": 0.11588096618652344,
      "step": 30545
    },
    {
      "epoch": 0.000186431884765625,
      "step": 30545,
      "training_step_time": 0.3842587471008301
    },
    {
      "epoch": 0.00018643798828125,
      "model_forward_time": 0.11496353149414062,
      "step": 30546
    },
    {
      "epoch": 0.00018643798828125,
      "step": 30546,
      "training_step_time": 0.4275393486022949
    },
    {
      "epoch": 0.000186444091796875,
      "model_forward_time": 0.11472463607788086,
      "step": 30547
    },
    {
      "epoch": 0.000186444091796875,
      "step": 30547,
      "training_step_time": 0.42560911178588867
    },
    {
      "epoch": 0.0001864501953125,
      "model_forward_time": 0.11530303955078125,
      "step": 30548
    },
    {
      "epoch": 0.0001864501953125,
      "step": 30548,
      "training_step_time": 0.4398362636566162
    },
    {
      "epoch": 0.000186456298828125,
      "model_forward_time": 0.11545181274414062,
      "step": 30549
    },
    {
      "epoch": 0.000186456298828125,
      "step": 30549,
      "training_step_time": 0.46270179748535156
    },
    {
      "epoch": 0.00018646240234375,
      "grad_norm": 0.17381501197814941,
      "learning_rate": 5.26167978121472e-05,
      "loss": 0.0454,
      "step": 30550
    },
    {
      "epoch": 0.00018646240234375,
      "model_forward_time": 0.11643123626708984,
      "step": 30550
    },
    {
      "epoch": 0.00018646240234375,
      "step": 30550,
      "training_step_time": 0.44983649253845215
    },
    {
      "epoch": 0.000186468505859375,
      "model_forward_time": 0.11516904830932617,
      "step": 30551
    },
    {
      "epoch": 0.000186468505859375,
      "step": 30551,
      "training_step_time": 0.3994605541229248
    },
    {
      "epoch": 0.000186474609375,
      "model_forward_time": 0.11597919464111328,
      "step": 30552
    },
    {
      "epoch": 0.000186474609375,
      "step": 30552,
      "training_step_time": 0.3921535015106201
    },
    {
      "epoch": 0.000186480712890625,
      "model_forward_time": 0.1156611442565918,
      "step": 30553
    },
    {
      "epoch": 0.000186480712890625,
      "step": 30553,
      "training_step_time": 0.38414788246154785
    },
    {
      "epoch": 0.00018648681640625,
      "model_forward_time": 0.11567091941833496,
      "step": 30554
    },
    {
      "epoch": 0.00018648681640625,
      "step": 30554,
      "training_step_time": 0.38298892974853516
    },
    {
      "epoch": 0.000186492919921875,
      "model_forward_time": 0.1144247055053711,
      "step": 30555
    },
    {
      "epoch": 0.000186492919921875,
      "step": 30555,
      "training_step_time": 1.2064547538757324
    },
    {
      "epoch": 0.0001864990234375,
      "model_forward_time": 0.11377358436584473,
      "step": 30556
    },
    {
      "epoch": 0.0001864990234375,
      "step": 30556,
      "training_step_time": 0.3994622230529785
    },
    {
      "epoch": 0.000186505126953125,
      "model_forward_time": 0.11342787742614746,
      "step": 30557
    },
    {
      "epoch": 0.000186505126953125,
      "step": 30557,
      "training_step_time": 0.44547152519226074
    },
    {
      "epoch": 0.00018651123046875,
      "model_forward_time": 0.1143641471862793,
      "step": 30558
    },
    {
      "epoch": 0.00018651123046875,
      "step": 30558,
      "training_step_time": 0.37354230880737305
    },
    {
      "epoch": 0.000186517333984375,
      "model_forward_time": 0.11481118202209473,
      "step": 30559
    },
    {
      "epoch": 0.000186517333984375,
      "step": 30559,
      "training_step_time": 0.43759703636169434
    },
    {
      "epoch": 0.0001865234375,
      "grad_norm": 0.11686660349369049,
      "learning_rate": 5.258927735283748e-05,
      "loss": 0.0439,
      "step": 30560
    },
    {
      "epoch": 0.0001865234375,
      "model_forward_time": 0.1142721176147461,
      "step": 30560
    },
    {
      "epoch": 0.0001865234375,
      "step": 30560,
      "training_step_time": 0.45017433166503906
    },
    {
      "epoch": 0.000186529541015625,
      "model_forward_time": 0.11444926261901855,
      "step": 30561
    },
    {
      "epoch": 0.000186529541015625,
      "step": 30561,
      "training_step_time": 0.4537680149078369
    },
    {
      "epoch": 0.00018653564453125,
      "model_forward_time": 0.11430239677429199,
      "step": 30562
    },
    {
      "epoch": 0.00018653564453125,
      "step": 30562,
      "training_step_time": 0.44086647033691406
    },
    {
      "epoch": 0.000186541748046875,
      "model_forward_time": 0.1149282455444336,
      "step": 30563
    },
    {
      "epoch": 0.000186541748046875,
      "step": 30563,
      "training_step_time": 0.450359582901001
    },
    {
      "epoch": 0.0001865478515625,
      "model_forward_time": 0.11475658416748047,
      "step": 30564
    },
    {
      "epoch": 0.0001865478515625,
      "step": 30564,
      "training_step_time": 0.3917849063873291
    },
    {
      "epoch": 0.000186553955078125,
      "model_forward_time": 0.11477804183959961,
      "step": 30565
    },
    {
      "epoch": 0.000186553955078125,
      "step": 30565,
      "training_step_time": 0.39546871185302734
    },
    {
      "epoch": 0.00018656005859375,
      "model_forward_time": 0.11434721946716309,
      "step": 30566
    },
    {
      "epoch": 0.00018656005859375,
      "step": 30566,
      "training_step_time": 0.3824429512023926
    },
    {
      "epoch": 0.000186566162109375,
      "model_forward_time": 0.11587738990783691,
      "step": 30567
    },
    {
      "epoch": 0.000186566162109375,
      "step": 30567,
      "training_step_time": 0.7517082691192627
    },
    {
      "epoch": 0.000186572265625,
      "model_forward_time": 0.11469244956970215,
      "step": 30568
    },
    {
      "epoch": 0.000186572265625,
      "step": 30568,
      "training_step_time": 0.3909621238708496
    },
    {
      "epoch": 0.000186578369140625,
      "model_forward_time": 0.11452317237854004,
      "step": 30569
    },
    {
      "epoch": 0.000186578369140625,
      "step": 30569,
      "training_step_time": 0.44632792472839355
    },
    {
      "epoch": 0.00018658447265625,
      "grad_norm": 0.15905505418777466,
      "learning_rate": 5.2561756106973656e-05,
      "loss": 0.0443,
      "step": 30570
    },
    {
      "epoch": 0.00018658447265625,
      "model_forward_time": 0.11543512344360352,
      "step": 30570
    },
    {
      "epoch": 0.00018658447265625,
      "step": 30570,
      "training_step_time": 0.45405101776123047
    },
    {
      "epoch": 0.000186590576171875,
      "model_forward_time": 0.11482095718383789,
      "step": 30571
    },
    {
      "epoch": 0.000186590576171875,
      "step": 30571,
      "training_step_time": 0.3852980136871338
    },
    {
      "epoch": 0.0001865966796875,
      "model_forward_time": 0.11436128616333008,
      "step": 30572
    },
    {
      "epoch": 0.0001865966796875,
      "step": 30572,
      "training_step_time": 0.3800017833709717
    },
    {
      "epoch": 0.000186602783203125,
      "model_forward_time": 0.11448216438293457,
      "step": 30573
    },
    {
      "epoch": 0.000186602783203125,
      "step": 30573,
      "training_step_time": 0.8883712291717529
    },
    {
      "epoch": 0.00018660888671875,
      "model_forward_time": 0.11438417434692383,
      "step": 30574
    },
    {
      "epoch": 0.00018660888671875,
      "step": 30574,
      "training_step_time": 0.42835569381713867
    },
    {
      "epoch": 0.000186614990234375,
      "model_forward_time": 0.1144096851348877,
      "step": 30575
    },
    {
      "epoch": 0.000186614990234375,
      "step": 30575,
      "training_step_time": 0.46491479873657227
    },
    {
      "epoch": 0.00018662109375,
      "model_forward_time": 0.11464428901672363,
      "step": 30576
    },
    {
      "epoch": 0.00018662109375,
      "step": 30576,
      "training_step_time": 0.3740692138671875
    },
    {
      "epoch": 0.000186627197265625,
      "model_forward_time": 0.1141510009765625,
      "step": 30577
    },
    {
      "epoch": 0.000186627197265625,
      "step": 30577,
      "training_step_time": 0.3848261833190918
    },
    {
      "epoch": 0.00018663330078125,
      "model_forward_time": 0.1142423152923584,
      "step": 30578
    },
    {
      "epoch": 0.00018663330078125,
      "step": 30578,
      "training_step_time": 0.3912386894226074
    },
    {
      "epoch": 0.000186639404296875,
      "model_forward_time": 0.11474275588989258,
      "step": 30579
    },
    {
      "epoch": 0.000186639404296875,
      "step": 30579,
      "training_step_time": 0.3906991481781006
    },
    {
      "epoch": 0.0001866455078125,
      "grad_norm": 0.17922469973564148,
      "learning_rate": 5.2534234082915976e-05,
      "loss": 0.0432,
      "step": 30580
    },
    {
      "epoch": 0.0001866455078125,
      "model_forward_time": 0.11531257629394531,
      "step": 30580
    },
    {
      "epoch": 0.0001866455078125,
      "step": 30580,
      "training_step_time": 0.3971741199493408
    },
    {
      "epoch": 0.000186651611328125,
      "model_forward_time": 0.11484503746032715,
      "step": 30581
    },
    {
      "epoch": 0.000186651611328125,
      "step": 30581,
      "training_step_time": 0.3859217166900635
    },
    {
      "epoch": 0.00018665771484375,
      "model_forward_time": 0.11565184593200684,
      "step": 30582
    },
    {
      "epoch": 0.00018665771484375,
      "step": 30582,
      "training_step_time": 0.37573909759521484
    },
    {
      "epoch": 0.000186663818359375,
      "model_forward_time": 0.11483454704284668,
      "step": 30583
    },
    {
      "epoch": 0.000186663818359375,
      "step": 30583,
      "training_step_time": 0.4076414108276367
    },
    {
      "epoch": 0.000186669921875,
      "model_forward_time": 0.11503958702087402,
      "step": 30584
    },
    {
      "epoch": 0.000186669921875,
      "step": 30584,
      "training_step_time": 0.44974851608276367
    },
    {
      "epoch": 0.000186676025390625,
      "model_forward_time": 0.11517024040222168,
      "step": 30585
    },
    {
      "epoch": 0.000186676025390625,
      "step": 30585,
      "training_step_time": 0.7951569557189941
    },
    {
      "epoch": 0.00018668212890625,
      "model_forward_time": 0.11391401290893555,
      "step": 30586
    },
    {
      "epoch": 0.00018668212890625,
      "step": 30586,
      "training_step_time": 0.40518999099731445
    },
    {
      "epoch": 0.000186688232421875,
      "model_forward_time": 0.11486244201660156,
      "step": 30587
    },
    {
      "epoch": 0.000186688232421875,
      "step": 30587,
      "training_step_time": 0.38745903968811035
    },
    {
      "epoch": 0.0001866943359375,
      "model_forward_time": 0.11487793922424316,
      "step": 30588
    },
    {
      "epoch": 0.0001866943359375,
      "step": 30588,
      "training_step_time": 0.4637165069580078
    },
    {
      "epoch": 0.000186700439453125,
      "model_forward_time": 0.11570930480957031,
      "step": 30589
    },
    {
      "epoch": 0.000186700439453125,
      "step": 30589,
      "training_step_time": 0.4054093360900879
    },
    {
      "epoch": 0.00018670654296875,
      "grad_norm": 0.1188758984208107,
      "learning_rate": 5.25067112890249e-05,
      "loss": 0.0419,
      "step": 30590
    },
    {
      "epoch": 0.00018670654296875,
      "model_forward_time": 0.11474251747131348,
      "step": 30590
    },
    {
      "epoch": 0.00018670654296875,
      "step": 30590,
      "training_step_time": 0.46593379974365234
    },
    {
      "epoch": 0.000186712646484375,
      "model_forward_time": 0.1148977279663086,
      "step": 30591
    },
    {
      "epoch": 0.000186712646484375,
      "step": 30591,
      "training_step_time": 0.5042998790740967
    },
    {
      "epoch": 0.00018671875,
      "model_forward_time": 0.11444211006164551,
      "step": 30592
    },
    {
      "epoch": 0.00018671875,
      "step": 30592,
      "training_step_time": 0.40294647216796875
    },
    {
      "epoch": 0.000186724853515625,
      "model_forward_time": 0.11429190635681152,
      "step": 30593
    },
    {
      "epoch": 0.000186724853515625,
      "step": 30593,
      "training_step_time": 0.40070390701293945
    },
    {
      "epoch": 0.00018673095703125,
      "model_forward_time": 0.11555838584899902,
      "step": 30594
    },
    {
      "epoch": 0.00018673095703125,
      "step": 30594,
      "training_step_time": 0.3796079158782959
    },
    {
      "epoch": 0.000186737060546875,
      "model_forward_time": 0.11491751670837402,
      "step": 30595
    },
    {
      "epoch": 0.000186737060546875,
      "step": 30595,
      "training_step_time": 0.3959474563598633
    },
    {
      "epoch": 0.0001867431640625,
      "model_forward_time": 0.11500191688537598,
      "step": 30596
    },
    {
      "epoch": 0.0001867431640625,
      "step": 30596,
      "training_step_time": 0.43643832206726074
    },
    {
      "epoch": 0.000186749267578125,
      "model_forward_time": 0.11443257331848145,
      "step": 30597
    },
    {
      "epoch": 0.000186749267578125,
      "step": 30597,
      "training_step_time": 1.0120346546173096
    },
    {
      "epoch": 0.00018675537109375,
      "model_forward_time": 0.1138765811920166,
      "step": 30598
    },
    {
      "epoch": 0.00018675537109375,
      "step": 30598,
      "training_step_time": 0.398817777633667
    },
    {
      "epoch": 0.000186761474609375,
      "model_forward_time": 0.11464428901672363,
      "step": 30599
    },
    {
      "epoch": 0.000186761474609375,
      "step": 30599,
      "training_step_time": 0.47307682037353516
    },
    {
      "epoch": 0.000186767578125,
      "grad_norm": 0.12340454012155533,
      "learning_rate": 5.247918773366112e-05,
      "loss": 0.0468,
      "step": 30600
    },
    {
      "epoch": 0.000186767578125,
      "model_forward_time": 0.11457657814025879,
      "step": 30600
    },
    {
      "epoch": 0.000186767578125,
      "step": 30600,
      "training_step_time": 0.40415167808532715
    },
    {
      "epoch": 0.000186773681640625,
      "model_forward_time": 0.11490559577941895,
      "step": 30601
    },
    {
      "epoch": 0.000186773681640625,
      "step": 30601,
      "training_step_time": 0.46416711807250977
    },
    {
      "epoch": 0.00018677978515625,
      "model_forward_time": 0.11385750770568848,
      "step": 30602
    },
    {
      "epoch": 0.00018677978515625,
      "step": 30602,
      "training_step_time": 0.3836355209350586
    },
    {
      "epoch": 0.000186785888671875,
      "model_forward_time": 0.11510252952575684,
      "step": 30603
    },
    {
      "epoch": 0.000186785888671875,
      "step": 30603,
      "training_step_time": 0.6991102695465088
    },
    {
      "epoch": 0.0001867919921875,
      "model_forward_time": 0.1142127513885498,
      "step": 30604
    },
    {
      "epoch": 0.0001867919921875,
      "step": 30604,
      "training_step_time": 0.3953859806060791
    },
    {
      "epoch": 0.000186798095703125,
      "model_forward_time": 0.11420559883117676,
      "step": 30605
    },
    {
      "epoch": 0.000186798095703125,
      "step": 30605,
      "training_step_time": 0.3905210494995117
    },
    {
      "epoch": 0.00018680419921875,
      "model_forward_time": 0.11433529853820801,
      "step": 30606
    },
    {
      "epoch": 0.00018680419921875,
      "step": 30606,
      "training_step_time": 0.36450624465942383
    },
    {
      "epoch": 0.000186810302734375,
      "model_forward_time": 0.11459875106811523,
      "step": 30607
    },
    {
      "epoch": 0.000186810302734375,
      "step": 30607,
      "training_step_time": 0.38355278968811035
    },
    {
      "epoch": 0.00018681640625,
      "model_forward_time": 0.1148526668548584,
      "step": 30608
    },
    {
      "epoch": 0.00018681640625,
      "step": 30608,
      "training_step_time": 0.3834648132324219
    },
    {
      "epoch": 0.000186822509765625,
      "model_forward_time": 0.11500668525695801,
      "step": 30609
    },
    {
      "epoch": 0.000186822509765625,
      "step": 30609,
      "training_step_time": 0.6927785873413086
    },
    {
      "epoch": 0.00018682861328125,
      "grad_norm": 0.11483678966760635,
      "learning_rate": 5.245166342518556e-05,
      "loss": 0.0436,
      "step": 30610
    },
    {
      "epoch": 0.00018682861328125,
      "model_forward_time": 0.11487054824829102,
      "step": 30610
    },
    {
      "epoch": 0.00018682861328125,
      "step": 30610,
      "training_step_time": 0.48493313789367676
    },
    {
      "epoch": 0.000186834716796875,
      "model_forward_time": 0.11478686332702637,
      "step": 30611
    },
    {
      "epoch": 0.000186834716796875,
      "step": 30611,
      "training_step_time": 0.39286255836486816
    },
    {
      "epoch": 0.0001868408203125,
      "model_forward_time": 0.11566162109375,
      "step": 30612
    },
    {
      "epoch": 0.0001868408203125,
      "step": 30612,
      "training_step_time": 0.3831512928009033
    },
    {
      "epoch": 0.000186846923828125,
      "model_forward_time": 0.11483621597290039,
      "step": 30613
    },
    {
      "epoch": 0.000186846923828125,
      "step": 30613,
      "training_step_time": 0.41949963569641113
    },
    {
      "epoch": 0.00018685302734375,
      "model_forward_time": 0.11468267440795898,
      "step": 30614
    },
    {
      "epoch": 0.00018685302734375,
      "step": 30614,
      "training_step_time": 0.3631772994995117
    },
    {
      "epoch": 0.000186859130859375,
      "model_forward_time": 0.11516380310058594,
      "step": 30615
    },
    {
      "epoch": 0.000186859130859375,
      "step": 30615,
      "training_step_time": 0.7290716171264648
    },
    {
      "epoch": 0.000186865234375,
      "model_forward_time": 0.1153111457824707,
      "step": 30616
    },
    {
      "epoch": 0.000186865234375,
      "step": 30616,
      "training_step_time": 0.397322416305542
    },
    {
      "epoch": 0.000186871337890625,
      "model_forward_time": 0.11501812934875488,
      "step": 30617
    },
    {
      "epoch": 0.000186871337890625,
      "step": 30617,
      "training_step_time": 0.37813878059387207
    },
    {
      "epoch": 0.00018687744140625,
      "model_forward_time": 0.11487197875976562,
      "step": 30618
    },
    {
      "epoch": 0.00018687744140625,
      "step": 30618,
      "training_step_time": 0.3855476379394531
    },
    {
      "epoch": 0.000186883544921875,
      "model_forward_time": 0.11491608619689941,
      "step": 30619
    },
    {
      "epoch": 0.000186883544921875,
      "step": 30619,
      "training_step_time": 0.4018383026123047
    },
    {
      "epoch": 0.0001868896484375,
      "grad_norm": 0.18076713383197784,
      "learning_rate": 5.242413837195938e-05,
      "loss": 0.0397,
      "step": 30620
    },
    {
      "epoch": 0.0001868896484375,
      "model_forward_time": 0.11432456970214844,
      "step": 30620
    },
    {
      "epoch": 0.0001868896484375,
      "step": 30620,
      "training_step_time": 0.40748167037963867
    },
    {
      "epoch": 0.000186895751953125,
      "model_forward_time": 0.11544609069824219,
      "step": 30621
    },
    {
      "epoch": 0.000186895751953125,
      "step": 30621,
      "training_step_time": 0.5650434494018555
    },
    {
      "epoch": 0.00018690185546875,
      "model_forward_time": 0.11574411392211914,
      "step": 30622
    },
    {
      "epoch": 0.00018690185546875,
      "step": 30622,
      "training_step_time": 0.4051039218902588
    },
    {
      "epoch": 0.000186907958984375,
      "model_forward_time": 0.11491990089416504,
      "step": 30623
    },
    {
      "epoch": 0.000186907958984375,
      "step": 30623,
      "training_step_time": 0.3900597095489502
    },
    {
      "epoch": 0.0001869140625,
      "model_forward_time": 0.11541056632995605,
      "step": 30624
    },
    {
      "epoch": 0.0001869140625,
      "step": 30624,
      "training_step_time": 0.4481971263885498
    },
    {
      "epoch": 0.000186920166015625,
      "model_forward_time": 0.11530947685241699,
      "step": 30625
    },
    {
      "epoch": 0.000186920166015625,
      "step": 30625,
      "training_step_time": 0.3888559341430664
    },
    {
      "epoch": 0.00018692626953125,
      "model_forward_time": 0.11502599716186523,
      "step": 30626
    },
    {
      "epoch": 0.00018692626953125,
      "step": 30626,
      "training_step_time": 0.4079282283782959
    },
    {
      "epoch": 0.000186932373046875,
      "model_forward_time": 0.11493110656738281,
      "step": 30627
    },
    {
      "epoch": 0.000186932373046875,
      "step": 30627,
      "training_step_time": 0.4259963035583496
    },
    {
      "epoch": 0.0001869384765625,
      "model_forward_time": 0.11493229866027832,
      "step": 30628
    },
    {
      "epoch": 0.0001869384765625,
      "step": 30628,
      "training_step_time": 0.4700624942779541
    },
    {
      "epoch": 0.000186944580078125,
      "model_forward_time": 0.11603426933288574,
      "step": 30629
    },
    {
      "epoch": 0.000186944580078125,
      "step": 30629,
      "training_step_time": 0.49413275718688965
    },
    {
      "epoch": 0.00018695068359375,
      "grad_norm": 0.12275757640600204,
      "learning_rate": 5.2396612582343986e-05,
      "loss": 0.0413,
      "step": 30630
    },
    {
      "epoch": 0.00018695068359375,
      "model_forward_time": 0.11596059799194336,
      "step": 30630
    },
    {
      "epoch": 0.00018695068359375,
      "step": 30630,
      "training_step_time": 0.39953136444091797
    },
    {
      "epoch": 0.000186956787109375,
      "model_forward_time": 0.11539864540100098,
      "step": 30631
    },
    {
      "epoch": 0.000186956787109375,
      "step": 30631,
      "training_step_time": 0.39672231674194336
    },
    {
      "epoch": 0.000186962890625,
      "model_forward_time": 0.11534976959228516,
      "step": 30632
    },
    {
      "epoch": 0.000186962890625,
      "step": 30632,
      "training_step_time": 0.4092705249786377
    },
    {
      "epoch": 0.000186968994140625,
      "model_forward_time": 0.11547708511352539,
      "step": 30633
    },
    {
      "epoch": 0.000186968994140625,
      "step": 30633,
      "training_step_time": 0.618981122970581
    },
    {
      "epoch": 0.00018697509765625,
      "model_forward_time": 0.11437559127807617,
      "step": 30634
    },
    {
      "epoch": 0.00018697509765625,
      "step": 30634,
      "training_step_time": 0.40851545333862305
    },
    {
      "epoch": 0.000186981201171875,
      "model_forward_time": 0.11519265174865723,
      "step": 30635
    },
    {
      "epoch": 0.000186981201171875,
      "step": 30635,
      "training_step_time": 0.3961031436920166
    },
    {
      "epoch": 0.0001869873046875,
      "model_forward_time": 0.11534500122070312,
      "step": 30636
    },
    {
      "epoch": 0.0001869873046875,
      "step": 30636,
      "training_step_time": 0.4206368923187256
    },
    {
      "epoch": 0.000186993408203125,
      "model_forward_time": 0.11447858810424805,
      "step": 30637
    },
    {
      "epoch": 0.000186993408203125,
      "step": 30637,
      "training_step_time": 0.42330384254455566
    },
    {
      "epoch": 0.00018699951171875,
      "model_forward_time": 0.11518669128417969,
      "step": 30638
    },
    {
      "epoch": 0.00018699951171875,
      "step": 30638,
      "training_step_time": 0.39623260498046875
    },
    {
      "epoch": 0.000187005615234375,
      "model_forward_time": 0.11550164222717285,
      "step": 30639
    },
    {
      "epoch": 0.000187005615234375,
      "step": 30639,
      "training_step_time": 0.41724109649658203
    },
    {
      "epoch": 0.00018701171875,
      "grad_norm": 0.12219101935625076,
      "learning_rate": 5.2369086064700945e-05,
      "loss": 0.0364,
      "step": 30640
    },
    {
      "epoch": 0.00018701171875,
      "model_forward_time": 0.1147458553314209,
      "step": 30640
    },
    {
      "epoch": 0.00018701171875,
      "step": 30640,
      "training_step_time": 0.4931981563568115
    },
    {
      "epoch": 0.000187017822265625,
      "model_forward_time": 0.11525487899780273,
      "step": 30641
    },
    {
      "epoch": 0.000187017822265625,
      "step": 30641,
      "training_step_time": 0.4278583526611328
    },
    {
      "epoch": 0.00018702392578125,
      "model_forward_time": 0.1157684326171875,
      "step": 30642
    },
    {
      "epoch": 0.00018702392578125,
      "step": 30642,
      "training_step_time": 0.4109058380126953
    },
    {
      "epoch": 0.000187030029296875,
      "model_forward_time": 0.1173253059387207,
      "step": 30643
    },
    {
      "epoch": 0.000187030029296875,
      "step": 30643,
      "training_step_time": 0.3806941509246826
    },
    {
      "epoch": 0.0001870361328125,
      "model_forward_time": 0.11954903602600098,
      "step": 30644
    },
    {
      "epoch": 0.0001870361328125,
      "step": 30644,
      "training_step_time": 0.391308069229126
    },
    {
      "epoch": 0.000187042236328125,
      "model_forward_time": 0.11936473846435547,
      "step": 30645
    },
    {
      "epoch": 0.000187042236328125,
      "step": 30645,
      "training_step_time": 0.3808252811431885
    },
    {
      "epoch": 0.00018704833984375,
      "model_forward_time": 0.11585640907287598,
      "step": 30646
    },
    {
      "epoch": 0.00018704833984375,
      "step": 30646,
      "training_step_time": 0.3895127773284912
    },
    {
      "epoch": 0.000187054443359375,
      "model_forward_time": 0.11512255668640137,
      "step": 30647
    },
    {
      "epoch": 0.000187054443359375,
      "step": 30647,
      "training_step_time": 0.39018893241882324
    },
    {
      "epoch": 0.000187060546875,
      "model_forward_time": 0.11539506912231445,
      "step": 30648
    },
    {
      "epoch": 0.000187060546875,
      "step": 30648,
      "training_step_time": 0.38222575187683105
    },
    {
      "epoch": 0.000187066650390625,
      "model_forward_time": 0.11545658111572266,
      "step": 30649
    },
    {
      "epoch": 0.000187066650390625,
      "step": 30649,
      "training_step_time": 0.3949861526489258
    },
    {
      "epoch": 0.00018707275390625,
      "grad_norm": 0.10970891267061234,
      "learning_rate": 5.234155882739212e-05,
      "loss": 0.0379,
      "step": 30650
    },
    {
      "epoch": 0.00018707275390625,
      "model_forward_time": 0.11550188064575195,
      "step": 30650
    },
    {
      "epoch": 0.00018707275390625,
      "step": 30650,
      "training_step_time": 0.4512016773223877
    },
    {
      "epoch": 0.000187078857421875,
      "model_forward_time": 0.11505627632141113,
      "step": 30651
    },
    {
      "epoch": 0.000187078857421875,
      "step": 30651,
      "training_step_time": 0.43944668769836426
    },
    {
      "epoch": 0.0001870849609375,
      "model_forward_time": 0.117156982421875,
      "step": 30652
    },
    {
      "epoch": 0.0001870849609375,
      "step": 30652,
      "training_step_time": 0.42530298233032227
    },
    {
      "epoch": 0.000187091064453125,
      "model_forward_time": 0.11508607864379883,
      "step": 30653
    },
    {
      "epoch": 0.000187091064453125,
      "step": 30653,
      "training_step_time": 0.3956589698791504
    },
    {
      "epoch": 0.00018709716796875,
      "model_forward_time": 0.1149587631225586,
      "step": 30654
    },
    {
      "epoch": 0.00018709716796875,
      "step": 30654,
      "training_step_time": 0.4136476516723633
    },
    {
      "epoch": 0.000187103271484375,
      "model_forward_time": 0.11438322067260742,
      "step": 30655
    },
    {
      "epoch": 0.000187103271484375,
      "step": 30655,
      "training_step_time": 0.3896944522857666
    },
    {
      "epoch": 0.000187109375,
      "model_forward_time": 0.11519694328308105,
      "step": 30656
    },
    {
      "epoch": 0.000187109375,
      "step": 30656,
      "training_step_time": 0.3939051628112793
    },
    {
      "epoch": 0.000187115478515625,
      "model_forward_time": 0.11569094657897949,
      "step": 30657
    },
    {
      "epoch": 0.000187115478515625,
      "step": 30657,
      "training_step_time": 0.5255167484283447
    },
    {
      "epoch": 0.00018712158203125,
      "model_forward_time": 0.1153419017791748,
      "step": 30658
    },
    {
      "epoch": 0.00018712158203125,
      "step": 30658,
      "training_step_time": 0.4070248603820801
    },
    {
      "epoch": 0.000187127685546875,
      "model_forward_time": 0.11492204666137695,
      "step": 30659
    },
    {
      "epoch": 0.000187127685546875,
      "step": 30659,
      "training_step_time": 0.4171793460845947
    },
    {
      "epoch": 0.0001871337890625,
      "grad_norm": 0.09159567952156067,
      "learning_rate": 5.231403087877955e-05,
      "loss": 0.044,
      "step": 30660
    },
    {
      "epoch": 0.0001871337890625,
      "model_forward_time": 0.11484313011169434,
      "step": 30660
    },
    {
      "epoch": 0.0001871337890625,
      "step": 30660,
      "training_step_time": 0.39299964904785156
    },
    {
      "epoch": 0.000187139892578125,
      "model_forward_time": 0.1149899959564209,
      "step": 30661
    },
    {
      "epoch": 0.000187139892578125,
      "step": 30661,
      "training_step_time": 0.37591028213500977
    },
    {
      "epoch": 0.00018714599609375,
      "model_forward_time": 0.11461567878723145,
      "step": 30662
    },
    {
      "epoch": 0.00018714599609375,
      "step": 30662,
      "training_step_time": 0.388211727142334
    },
    {
      "epoch": 0.000187152099609375,
      "model_forward_time": 0.1153099536895752,
      "step": 30663
    },
    {
      "epoch": 0.000187152099609375,
      "step": 30663,
      "training_step_time": 0.792496919631958
    },
    {
      "epoch": 0.000187158203125,
      "model_forward_time": 0.11417245864868164,
      "step": 30664
    },
    {
      "epoch": 0.000187158203125,
      "step": 30664,
      "training_step_time": 0.48247218132019043
    },
    {
      "epoch": 0.000187164306640625,
      "model_forward_time": 0.11405587196350098,
      "step": 30665
    },
    {
      "epoch": 0.000187164306640625,
      "step": 30665,
      "training_step_time": 0.4096646308898926
    },
    {
      "epoch": 0.00018717041015625,
      "model_forward_time": 0.11436057090759277,
      "step": 30666
    },
    {
      "epoch": 0.00018717041015625,
      "step": 30666,
      "training_step_time": 0.4270963668823242
    },
    {
      "epoch": 0.000187176513671875,
      "model_forward_time": 0.11497354507446289,
      "step": 30667
    },
    {
      "epoch": 0.000187176513671875,
      "step": 30667,
      "training_step_time": 0.39336538314819336
    },
    {
      "epoch": 0.0001871826171875,
      "model_forward_time": 0.11469507217407227,
      "step": 30668
    },
    {
      "epoch": 0.0001871826171875,
      "step": 30668,
      "training_step_time": 0.41715168952941895
    },
    {
      "epoch": 0.000187188720703125,
      "model_forward_time": 0.1157536506652832,
      "step": 30669
    },
    {
      "epoch": 0.000187188720703125,
      "step": 30669,
      "training_step_time": 0.7646818161010742
    },
    {
      "epoch": 0.00018719482421875,
      "grad_norm": 0.12768305838108063,
      "learning_rate": 5.2286502227225506e-05,
      "loss": 0.0424,
      "step": 30670
    },
    {
      "epoch": 0.00018719482421875,
      "model_forward_time": 0.11476588249206543,
      "step": 30670
    },
    {
      "epoch": 0.00018719482421875,
      "step": 30670,
      "training_step_time": 0.4428069591522217
    },
    {
      "epoch": 0.000187200927734375,
      "model_forward_time": 0.11459946632385254,
      "step": 30671
    },
    {
      "epoch": 0.000187200927734375,
      "step": 30671,
      "training_step_time": 0.44066715240478516
    },
    {
      "epoch": 0.00018720703125,
      "model_forward_time": 0.11436867713928223,
      "step": 30672
    },
    {
      "epoch": 0.00018720703125,
      "step": 30672,
      "training_step_time": 0.3900187015533447
    },
    {
      "epoch": 0.000187213134765625,
      "model_forward_time": 0.1145634651184082,
      "step": 30673
    },
    {
      "epoch": 0.000187213134765625,
      "step": 30673,
      "training_step_time": 0.37561964988708496
    },
    {
      "epoch": 0.00018721923828125,
      "model_forward_time": 0.1149606704711914,
      "step": 30674
    },
    {
      "epoch": 0.00018721923828125,
      "step": 30674,
      "training_step_time": 0.37715911865234375
    },
    {
      "epoch": 0.000187225341796875,
      "model_forward_time": 0.11473345756530762,
      "step": 30675
    },
    {
      "epoch": 0.000187225341796875,
      "step": 30675,
      "training_step_time": 0.38347864151000977
    },
    {
      "epoch": 0.0001872314453125,
      "model_forward_time": 0.11501908302307129,
      "step": 30676
    },
    {
      "epoch": 0.0001872314453125,
      "step": 30676,
      "training_step_time": 0.4045259952545166
    },
    {
      "epoch": 0.000187237548828125,
      "model_forward_time": 0.1150815486907959,
      "step": 30677
    },
    {
      "epoch": 0.000187237548828125,
      "step": 30677,
      "training_step_time": 0.395033597946167
    },
    {
      "epoch": 0.00018724365234375,
      "model_forward_time": 0.1152505874633789,
      "step": 30678
    },
    {
      "epoch": 0.00018724365234375,
      "step": 30678,
      "training_step_time": 0.4971802234649658
    },
    {
      "epoch": 0.000187249755859375,
      "model_forward_time": 0.114990234375,
      "step": 30679
    },
    {
      "epoch": 0.000187249755859375,
      "step": 30679,
      "training_step_time": 0.40244626998901367
    },
    {
      "epoch": 0.000187255859375,
      "grad_norm": 0.1665414273738861,
      "learning_rate": 5.225897288109245e-05,
      "loss": 0.0456,
      "step": 30680
    },
    {
      "epoch": 0.000187255859375,
      "model_forward_time": 0.11473822593688965,
      "step": 30680
    },
    {
      "epoch": 0.000187255859375,
      "step": 30680,
      "training_step_time": 0.4063248634338379
    },
    {
      "epoch": 0.000187261962890625,
      "model_forward_time": 0.11495304107666016,
      "step": 30681
    },
    {
      "epoch": 0.000187261962890625,
      "step": 30681,
      "training_step_time": 0.7844629287719727
    },
    {
      "epoch": 0.00018726806640625,
      "model_forward_time": 0.11414766311645508,
      "step": 30682
    },
    {
      "epoch": 0.00018726806640625,
      "step": 30682,
      "training_step_time": 0.43305516242980957
    },
    {
      "epoch": 0.000187274169921875,
      "model_forward_time": 0.11491537094116211,
      "step": 30683
    },
    {
      "epoch": 0.000187274169921875,
      "step": 30683,
      "training_step_time": 0.3853762149810791
    },
    {
      "epoch": 0.0001872802734375,
      "model_forward_time": 0.11466431617736816,
      "step": 30684
    },
    {
      "epoch": 0.0001872802734375,
      "step": 30684,
      "training_step_time": 0.3918905258178711
    },
    {
      "epoch": 0.000187286376953125,
      "model_forward_time": 0.11432433128356934,
      "step": 30685
    },
    {
      "epoch": 0.000187286376953125,
      "step": 30685,
      "training_step_time": 0.4139745235443115
    },
    {
      "epoch": 0.00018729248046875,
      "model_forward_time": 0.1150825023651123,
      "step": 30686
    },
    {
      "epoch": 0.00018729248046875,
      "step": 30686,
      "training_step_time": 0.38655734062194824
    },
    {
      "epoch": 0.000187298583984375,
      "model_forward_time": 0.11482715606689453,
      "step": 30687
    },
    {
      "epoch": 0.000187298583984375,
      "step": 30687,
      "training_step_time": 0.7072281837463379
    },
    {
      "epoch": 0.0001873046875,
      "model_forward_time": 0.11476635932922363,
      "step": 30688
    },
    {
      "epoch": 0.0001873046875,
      "step": 30688,
      "training_step_time": 0.40766477584838867
    },
    {
      "epoch": 0.000187310791015625,
      "model_forward_time": 0.11475038528442383,
      "step": 30689
    },
    {
      "epoch": 0.000187310791015625,
      "step": 30689,
      "training_step_time": 0.3869326114654541
    },
    {
      "epoch": 0.00018731689453125,
      "grad_norm": 0.09759959578514099,
      "learning_rate": 5.2231442848743064e-05,
      "loss": 0.0424,
      "step": 30690
    },
    {
      "epoch": 0.00018731689453125,
      "model_forward_time": 0.11516857147216797,
      "step": 30690
    },
    {
      "epoch": 0.00018731689453125,
      "step": 30690,
      "training_step_time": 0.39515185356140137
    },
    {
      "epoch": 0.000187322998046875,
      "model_forward_time": 0.11468744277954102,
      "step": 30691
    },
    {
      "epoch": 0.000187322998046875,
      "step": 30691,
      "training_step_time": 0.4622642993927002
    },
    {
      "epoch": 0.0001873291015625,
      "model_forward_time": 0.11494088172912598,
      "step": 30692
    },
    {
      "epoch": 0.0001873291015625,
      "step": 30692,
      "training_step_time": 0.436140775680542
    },
    {
      "epoch": 0.000187335205078125,
      "model_forward_time": 0.11559343338012695,
      "step": 30693
    },
    {
      "epoch": 0.000187335205078125,
      "step": 30693,
      "training_step_time": 0.4790189266204834
    },
    {
      "epoch": 0.00018734130859375,
      "model_forward_time": 0.11490559577941895,
      "step": 30694
    },
    {
      "epoch": 0.00018734130859375,
      "step": 30694,
      "training_step_time": 0.39235687255859375
    },
    {
      "epoch": 0.000187347412109375,
      "model_forward_time": 0.11444997787475586,
      "step": 30695
    },
    {
      "epoch": 0.000187347412109375,
      "step": 30695,
      "training_step_time": 0.39481520652770996
    },
    {
      "epoch": 0.000187353515625,
      "model_forward_time": 0.11493778228759766,
      "step": 30696
    },
    {
      "epoch": 0.000187353515625,
      "step": 30696,
      "training_step_time": 0.4688301086425781
    },
    {
      "epoch": 0.000187359619140625,
      "model_forward_time": 0.11474108695983887,
      "step": 30697
    },
    {
      "epoch": 0.000187359619140625,
      "step": 30697,
      "training_step_time": 0.39709997177124023
    },
    {
      "epoch": 0.00018736572265625,
      "model_forward_time": 0.1149296760559082,
      "step": 30698
    },
    {
      "epoch": 0.00018736572265625,
      "step": 30698,
      "training_step_time": 0.4466972351074219
    },
    {
      "epoch": 0.000187371826171875,
      "model_forward_time": 0.11539745330810547,
      "step": 30699
    },
    {
      "epoch": 0.000187371826171875,
      "step": 30699,
      "training_step_time": 0.484893798828125
    },
    {
      "epoch": 0.0001873779296875,
      "grad_norm": 0.10396500676870346,
      "learning_rate": 5.220391213854028e-05,
      "loss": 0.042,
      "step": 30700
    },
    {
      "epoch": 0.0001873779296875,
      "model_forward_time": 0.11513829231262207,
      "step": 30700
    },
    {
      "epoch": 0.0001873779296875,
      "step": 30700,
      "training_step_time": 0.41786789894104004
    },
    {
      "epoch": 0.000187384033203125,
      "model_forward_time": 0.11522841453552246,
      "step": 30701
    },
    {
      "epoch": 0.000187384033203125,
      "step": 30701,
      "training_step_time": 0.4001898765563965
    },
    {
      "epoch": 0.00018739013671875,
      "model_forward_time": 0.11512088775634766,
      "step": 30702
    },
    {
      "epoch": 0.00018739013671875,
      "step": 30702,
      "training_step_time": 0.39079952239990234
    },
    {
      "epoch": 0.000187396240234375,
      "model_forward_time": 0.11509513854980469,
      "step": 30703
    },
    {
      "epoch": 0.000187396240234375,
      "step": 30703,
      "training_step_time": 0.39850354194641113
    },
    {
      "epoch": 0.00018740234375,
      "model_forward_time": 0.11515617370605469,
      "step": 30704
    },
    {
      "epoch": 0.00018740234375,
      "step": 30704,
      "training_step_time": 0.39881205558776855
    },
    {
      "epoch": 0.000187408447265625,
      "model_forward_time": 0.11716628074645996,
      "step": 30705
    },
    {
      "epoch": 0.000187408447265625,
      "step": 30705,
      "training_step_time": 0.4280409812927246
    },
    {
      "epoch": 0.00018741455078125,
      "model_forward_time": 0.11433601379394531,
      "step": 30706
    },
    {
      "epoch": 0.00018741455078125,
      "step": 30706,
      "training_step_time": 0.40233469009399414
    },
    {
      "epoch": 0.000187420654296875,
      "model_forward_time": 0.11480069160461426,
      "step": 30707
    },
    {
      "epoch": 0.000187420654296875,
      "step": 30707,
      "training_step_time": 0.5103349685668945
    },
    {
      "epoch": 0.0001874267578125,
      "model_forward_time": 0.11609554290771484,
      "step": 30708
    },
    {
      "epoch": 0.0001874267578125,
      "step": 30708,
      "training_step_time": 0.3933732509613037
    },
    {
      "epoch": 0.000187432861328125,
      "model_forward_time": 0.11478114128112793,
      "step": 30709
    },
    {
      "epoch": 0.000187432861328125,
      "step": 30709,
      "training_step_time": 0.4018256664276123
    },
    {
      "epoch": 0.00018743896484375,
      "grad_norm": 0.09824138134717941,
      "learning_rate": 5.217638075884716e-05,
      "loss": 0.0453,
      "step": 30710
    },
    {
      "epoch": 0.00018743896484375,
      "model_forward_time": 0.1151280403137207,
      "step": 30710
    },
    {
      "epoch": 0.00018743896484375,
      "step": 30710,
      "training_step_time": 0.42820167541503906
    },
    {
      "epoch": 0.000187445068359375,
      "model_forward_time": 0.11606168746948242,
      "step": 30711
    },
    {
      "epoch": 0.000187445068359375,
      "step": 30711,
      "training_step_time": 0.5201671123504639
    },
    {
      "epoch": 0.000187451171875,
      "model_forward_time": 0.11501955986022949,
      "step": 30712
    },
    {
      "epoch": 0.000187451171875,
      "step": 30712,
      "training_step_time": 0.4135308265686035
    },
    {
      "epoch": 0.000187457275390625,
      "model_forward_time": 0.11521649360656738,
      "step": 30713
    },
    {
      "epoch": 0.000187457275390625,
      "step": 30713,
      "training_step_time": 0.5107669830322266
    },
    {
      "epoch": 0.00018746337890625,
      "model_forward_time": 0.11923837661743164,
      "step": 30714
    },
    {
      "epoch": 0.00018746337890625,
      "step": 30714,
      "training_step_time": 0.48975467681884766
    },
    {
      "epoch": 0.000187469482421875,
      "model_forward_time": 0.11648058891296387,
      "step": 30715
    },
    {
      "epoch": 0.000187469482421875,
      "step": 30715,
      "training_step_time": 0.38019776344299316
    },
    {
      "epoch": 0.0001874755859375,
      "model_forward_time": 0.11509084701538086,
      "step": 30716
    },
    {
      "epoch": 0.0001874755859375,
      "step": 30716,
      "training_step_time": 0.3872952461242676
    },
    {
      "epoch": 0.000187481689453125,
      "model_forward_time": 0.1149740219116211,
      "step": 30717
    },
    {
      "epoch": 0.000187481689453125,
      "step": 30717,
      "training_step_time": 0.7703375816345215
    },
    {
      "epoch": 0.00018748779296875,
      "model_forward_time": 0.11420631408691406,
      "step": 30718
    },
    {
      "epoch": 0.00018748779296875,
      "step": 30718,
      "training_step_time": 0.4078350067138672
    },
    {
      "epoch": 0.000187493896484375,
      "model_forward_time": 0.11391687393188477,
      "step": 30719
    },
    {
      "epoch": 0.000187493896484375,
      "step": 30719,
      "training_step_time": 0.40427374839782715
    },
    {
      "epoch": 0.0001875,
      "grad_norm": 0.13171713054180145,
      "learning_rate": 5.214884871802703e-05,
      "loss": 0.044,
      "step": 30720
    },
    {
      "epoch": 0.0001875,
      "model_forward_time": 0.11462736129760742,
      "step": 30720
    },
    {
      "epoch": 0.0001875,
      "step": 30720,
      "training_step_time": 0.4438900947570801
    },
    {
      "epoch": 0.000187506103515625,
      "model_forward_time": 0.11403393745422363,
      "step": 30721
    },
    {
      "epoch": 0.000187506103515625,
      "step": 30721,
      "training_step_time": 0.38794851303100586
    },
    {
      "epoch": 0.00018751220703125,
      "model_forward_time": 0.11436772346496582,
      "step": 30722
    },
    {
      "epoch": 0.00018751220703125,
      "step": 30722,
      "training_step_time": 0.387164831161499
    },
    {
      "epoch": 0.000187518310546875,
      "model_forward_time": 0.11644577980041504,
      "step": 30723
    },
    {
      "epoch": 0.000187518310546875,
      "step": 30723,
      "training_step_time": 0.6646349430084229
    },
    {
      "epoch": 0.0001875244140625,
      "model_forward_time": 0.1145620346069336,
      "step": 30724
    },
    {
      "epoch": 0.0001875244140625,
      "step": 30724,
      "training_step_time": 0.40751123428344727
    },
    {
      "epoch": 0.000187530517578125,
      "model_forward_time": 0.115142822265625,
      "step": 30725
    },
    {
      "epoch": 0.000187530517578125,
      "step": 30725,
      "training_step_time": 0.40758419036865234
    },
    {
      "epoch": 0.00018753662109375,
      "model_forward_time": 0.11426448822021484,
      "step": 30726
    },
    {
      "epoch": 0.00018753662109375,
      "step": 30726,
      "training_step_time": 0.44155311584472656
    },
    {
      "epoch": 0.000187542724609375,
      "model_forward_time": 0.11529135704040527,
      "step": 30727
    },
    {
      "epoch": 0.000187542724609375,
      "step": 30727,
      "training_step_time": 0.4771697521209717
    },
    {
      "epoch": 0.000187548828125,
      "model_forward_time": 0.1151573657989502,
      "step": 30728
    },
    {
      "epoch": 0.000187548828125,
      "step": 30728,
      "training_step_time": 0.39445996284484863
    },
    {
      "epoch": 0.000187554931640625,
      "model_forward_time": 0.11540889739990234,
      "step": 30729
    },
    {
      "epoch": 0.000187554931640625,
      "step": 30729,
      "training_step_time": 0.5200810432434082
    },
    {
      "epoch": 0.00018756103515625,
      "grad_norm": 0.1013747826218605,
      "learning_rate": 5.2121316024443415e-05,
      "loss": 0.0503,
      "step": 30730
    },
    {
      "epoch": 0.00018756103515625,
      "model_forward_time": 0.11587357521057129,
      "step": 30730
    },
    {
      "epoch": 0.00018756103515625,
      "step": 30730,
      "training_step_time": 0.43827342987060547
    },
    {
      "epoch": 0.000187567138671875,
      "model_forward_time": 0.1146395206451416,
      "step": 30731
    },
    {
      "epoch": 0.000187567138671875,
      "step": 30731,
      "training_step_time": 0.3829801082611084
    },
    {
      "epoch": 0.0001875732421875,
      "model_forward_time": 0.1147618293762207,
      "step": 30732
    },
    {
      "epoch": 0.0001875732421875,
      "step": 30732,
      "training_step_time": 0.46715331077575684
    },
    {
      "epoch": 0.000187579345703125,
      "model_forward_time": 0.11455631256103516,
      "step": 30733
    },
    {
      "epoch": 0.000187579345703125,
      "step": 30733,
      "training_step_time": 0.39790916442871094
    },
    {
      "epoch": 0.00018758544921875,
      "model_forward_time": 0.11488652229309082,
      "step": 30734
    },
    {
      "epoch": 0.00018758544921875,
      "step": 30734,
      "training_step_time": 0.38909387588500977
    },
    {
      "epoch": 0.000187591552734375,
      "model_forward_time": 0.1147763729095459,
      "step": 30735
    },
    {
      "epoch": 0.000187591552734375,
      "step": 30735,
      "training_step_time": 1.0167863368988037
    },
    {
      "epoch": 0.00018759765625,
      "model_forward_time": 0.11450314521789551,
      "step": 30736
    },
    {
      "epoch": 0.00018759765625,
      "step": 30736,
      "training_step_time": 0.39544010162353516
    },
    {
      "epoch": 0.000187603759765625,
      "model_forward_time": 0.11388421058654785,
      "step": 30737
    },
    {
      "epoch": 0.000187603759765625,
      "step": 30737,
      "training_step_time": 0.40015292167663574
    },
    {
      "epoch": 0.00018760986328125,
      "model_forward_time": 0.11445927619934082,
      "step": 30738
    },
    {
      "epoch": 0.00018760986328125,
      "step": 30738,
      "training_step_time": 0.44588327407836914
    },
    {
      "epoch": 0.000187615966796875,
      "model_forward_time": 0.11437344551086426,
      "step": 30739
    },
    {
      "epoch": 0.000187615966796875,
      "step": 30739,
      "training_step_time": 0.4723856449127197
    },
    {
      "epoch": 0.0001876220703125,
      "grad_norm": 0.11952853202819824,
      "learning_rate": 5.209378268645998e-05,
      "loss": 0.0422,
      "step": 30740
    },
    {
      "epoch": 0.0001876220703125,
      "model_forward_time": 0.1142418384552002,
      "step": 30740
    },
    {
      "epoch": 0.0001876220703125,
      "step": 30740,
      "training_step_time": 0.4554300308227539
    },
    {
      "epoch": 0.000187628173828125,
      "model_forward_time": 0.11509060859680176,
      "step": 30741
    },
    {
      "epoch": 0.000187628173828125,
      "step": 30741,
      "training_step_time": 0.703115701675415
    },
    {
      "epoch": 0.00018763427734375,
      "model_forward_time": 0.11424970626831055,
      "step": 30742
    },
    {
      "epoch": 0.00018763427734375,
      "step": 30742,
      "training_step_time": 0.3972053527832031
    },
    {
      "epoch": 0.000187640380859375,
      "model_forward_time": 0.11409950256347656,
      "step": 30743
    },
    {
      "epoch": 0.000187640380859375,
      "step": 30743,
      "training_step_time": 0.38823747634887695
    },
    {
      "epoch": 0.000187646484375,
      "model_forward_time": 0.11372780799865723,
      "step": 30744
    },
    {
      "epoch": 0.000187646484375,
      "step": 30744,
      "training_step_time": 0.44707512855529785
    },
    {
      "epoch": 0.000187652587890625,
      "model_forward_time": 0.11433649063110352,
      "step": 30745
    },
    {
      "epoch": 0.000187652587890625,
      "step": 30745,
      "training_step_time": 0.3922269344329834
    },
    {
      "epoch": 0.00018765869140625,
      "model_forward_time": 0.11407947540283203,
      "step": 30746
    },
    {
      "epoch": 0.00018765869140625,
      "step": 30746,
      "training_step_time": 0.4065563678741455
    },
    {
      "epoch": 0.000187664794921875,
      "model_forward_time": 0.11520814895629883,
      "step": 30747
    },
    {
      "epoch": 0.000187664794921875,
      "step": 30747,
      "training_step_time": 0.9186851978302002
    },
    {
      "epoch": 0.0001876708984375,
      "model_forward_time": 0.11461138725280762,
      "step": 30748
    },
    {
      "epoch": 0.0001876708984375,
      "step": 30748,
      "training_step_time": 0.39604926109313965
    },
    {
      "epoch": 0.000187677001953125,
      "model_forward_time": 0.11386728286743164,
      "step": 30749
    },
    {
      "epoch": 0.000187677001953125,
      "step": 30749,
      "training_step_time": 0.3888697624206543
    },
    {
      "epoch": 0.00018768310546875,
      "grad_norm": 0.09443927556276321,
      "learning_rate": 5.2066248712440656e-05,
      "loss": 0.0423,
      "step": 30750
    },
    {
      "epoch": 0.00018768310546875,
      "model_forward_time": 0.11411237716674805,
      "step": 30750
    },
    {
      "epoch": 0.00018768310546875,
      "step": 30750,
      "training_step_time": 0.4563422203063965
    },
    {
      "epoch": 0.000187689208984375,
      "model_forward_time": 0.1145792007446289,
      "step": 30751
    },
    {
      "epoch": 0.000187689208984375,
      "step": 30751,
      "training_step_time": 0.38286447525024414
    },
    {
      "epoch": 0.0001876953125,
      "model_forward_time": 0.11410284042358398,
      "step": 30752
    },
    {
      "epoch": 0.0001876953125,
      "step": 30752,
      "training_step_time": 0.41263675689697266
    },
    {
      "epoch": 0.000187701416015625,
      "model_forward_time": 0.11517333984375,
      "step": 30753
    },
    {
      "epoch": 0.000187701416015625,
      "step": 30753,
      "training_step_time": 0.5108349323272705
    },
    {
      "epoch": 0.00018770751953125,
      "model_forward_time": 0.11443853378295898,
      "step": 30754
    },
    {
      "epoch": 0.00018770751953125,
      "step": 30754,
      "training_step_time": 0.4271121025085449
    },
    {
      "epoch": 0.000187713623046875,
      "model_forward_time": 0.11449790000915527,
      "step": 30755
    },
    {
      "epoch": 0.000187713623046875,
      "step": 30755,
      "training_step_time": 0.3896353244781494
    },
    {
      "epoch": 0.0001877197265625,
      "model_forward_time": 0.11466217041015625,
      "step": 30756
    },
    {
      "epoch": 0.0001877197265625,
      "step": 30756,
      "training_step_time": 0.3896148204803467
    },
    {
      "epoch": 0.000187725830078125,
      "model_forward_time": 0.11498403549194336,
      "step": 30757
    },
    {
      "epoch": 0.000187725830078125,
      "step": 30757,
      "training_step_time": 0.39327454566955566
    },
    {
      "epoch": 0.00018773193359375,
      "model_forward_time": 0.11543774604797363,
      "step": 30758
    },
    {
      "epoch": 0.00018773193359375,
      "step": 30758,
      "training_step_time": 0.3943607807159424
    },
    {
      "epoch": 0.000187738037109375,
      "model_forward_time": 0.11525940895080566,
      "step": 30759
    },
    {
      "epoch": 0.000187738037109375,
      "step": 30759,
      "training_step_time": 0.618079662322998
    },
    {
      "epoch": 0.000187744140625,
      "grad_norm": 0.1347246617078781,
      "learning_rate": 5.203871411074954e-05,
      "loss": 0.0404,
      "step": 30760
    },
    {
      "epoch": 0.000187744140625,
      "model_forward_time": 0.11497616767883301,
      "step": 30760
    },
    {
      "epoch": 0.000187744140625,
      "step": 30760,
      "training_step_time": 0.40039587020874023
    },
    {
      "epoch": 0.000187750244140625,
      "model_forward_time": 0.11481857299804688,
      "step": 30761
    },
    {
      "epoch": 0.000187750244140625,
      "step": 30761,
      "training_step_time": 0.38895726203918457
    },
    {
      "epoch": 0.00018775634765625,
      "model_forward_time": 0.11500120162963867,
      "step": 30762
    },
    {
      "epoch": 0.00018775634765625,
      "step": 30762,
      "training_step_time": 0.39893484115600586
    },
    {
      "epoch": 0.000187762451171875,
      "model_forward_time": 0.11526322364807129,
      "step": 30763
    },
    {
      "epoch": 0.000187762451171875,
      "step": 30763,
      "training_step_time": 0.3870127201080322
    },
    {
      "epoch": 0.0001877685546875,
      "model_forward_time": 0.11521601676940918,
      "step": 30764
    },
    {
      "epoch": 0.0001877685546875,
      "step": 30764,
      "training_step_time": 0.3982880115509033
    },
    {
      "epoch": 0.000187774658203125,
      "model_forward_time": 0.11520504951477051,
      "step": 30765
    },
    {
      "epoch": 0.000187774658203125,
      "step": 30765,
      "training_step_time": 0.5898184776306152
    },
    {
      "epoch": 0.00018778076171875,
      "model_forward_time": 0.1153409481048584,
      "step": 30766
    },
    {
      "epoch": 0.00018778076171875,
      "step": 30766,
      "training_step_time": 0.40777111053466797
    },
    {
      "epoch": 0.000187786865234375,
      "model_forward_time": 0.11510634422302246,
      "step": 30767
    },
    {
      "epoch": 0.000187786865234375,
      "step": 30767,
      "training_step_time": 0.48865389823913574
    },
    {
      "epoch": 0.00018779296875,
      "model_forward_time": 0.11623072624206543,
      "step": 30768
    },
    {
      "epoch": 0.00018779296875,
      "step": 30768,
      "training_step_time": 0.42856454849243164
    },
    {
      "epoch": 0.000187799072265625,
      "model_forward_time": 0.11604642868041992,
      "step": 30769
    },
    {
      "epoch": 0.000187799072265625,
      "step": 30769,
      "training_step_time": 0.39917755126953125
    },
    {
      "epoch": 0.00018780517578125,
      "grad_norm": 0.10646648705005646,
      "learning_rate": 5.20111788897509e-05,
      "loss": 0.0466,
      "step": 30770
    },
    {
      "epoch": 0.00018780517578125,
      "model_forward_time": 0.11493754386901855,
      "step": 30770
    },
    {
      "epoch": 0.00018780517578125,
      "step": 30770,
      "training_step_time": 0.3906593322753906
    },
    {
      "epoch": 0.000187811279296875,
      "model_forward_time": 0.11493349075317383,
      "step": 30771
    },
    {
      "epoch": 0.000187811279296875,
      "step": 30771,
      "training_step_time": 0.450716495513916
    },
    {
      "epoch": 0.0001878173828125,
      "model_forward_time": 0.11597585678100586,
      "step": 30772
    },
    {
      "epoch": 0.0001878173828125,
      "step": 30772,
      "training_step_time": 0.4019758701324463
    },
    {
      "epoch": 0.000187823486328125,
      "model_forward_time": 0.11426806449890137,
      "step": 30773
    },
    {
      "epoch": 0.000187823486328125,
      "step": 30773,
      "training_step_time": 0.42517566680908203
    },
    {
      "epoch": 0.00018782958984375,
      "model_forward_time": 0.1151125431060791,
      "step": 30774
    },
    {
      "epoch": 0.00018782958984375,
      "step": 30774,
      "training_step_time": 0.39890027046203613
    },
    {
      "epoch": 0.000187835693359375,
      "model_forward_time": 0.11587905883789062,
      "step": 30775
    },
    {
      "epoch": 0.000187835693359375,
      "step": 30775,
      "training_step_time": 0.38408470153808594
    },
    {
      "epoch": 0.000187841796875,
      "model_forward_time": 0.11433792114257812,
      "step": 30776
    },
    {
      "epoch": 0.000187841796875,
      "step": 30776,
      "training_step_time": 0.4142599105834961
    },
    {
      "epoch": 0.000187847900390625,
      "model_forward_time": 0.11477828025817871,
      "step": 30777
    },
    {
      "epoch": 0.000187847900390625,
      "step": 30777,
      "training_step_time": 0.3905200958251953
    },
    {
      "epoch": 0.00018785400390625,
      "model_forward_time": 0.11544060707092285,
      "step": 30778
    },
    {
      "epoch": 0.00018785400390625,
      "step": 30778,
      "training_step_time": 0.46214723587036133
    },
    {
      "epoch": 0.000187860107421875,
      "model_forward_time": 0.1151583194732666,
      "step": 30779
    },
    {
      "epoch": 0.000187860107421875,
      "step": 30779,
      "training_step_time": 0.42055559158325195
    },
    {
      "epoch": 0.0001878662109375,
      "grad_norm": 0.1307811588048935,
      "learning_rate": 5.198364305780922e-05,
      "loss": 0.043,
      "step": 30780
    },
    {
      "epoch": 0.0001878662109375,
      "model_forward_time": 0.11716532707214355,
      "step": 30780
    },
    {
      "epoch": 0.0001878662109375,
      "step": 30780,
      "training_step_time": 0.4247152805328369
    },
    {
      "epoch": 0.000187872314453125,
      "model_forward_time": 0.11521458625793457,
      "step": 30781
    },
    {
      "epoch": 0.000187872314453125,
      "step": 30781,
      "training_step_time": 0.4557149410247803
    },
    {
      "epoch": 0.00018787841796875,
      "model_forward_time": 0.11508631706237793,
      "step": 30782
    },
    {
      "epoch": 0.00018787841796875,
      "step": 30782,
      "training_step_time": 0.4898841381072998
    },
    {
      "epoch": 0.000187884521484375,
      "model_forward_time": 0.11552286148071289,
      "step": 30783
    },
    {
      "epoch": 0.000187884521484375,
      "step": 30783,
      "training_step_time": 0.3970756530761719
    },
    {
      "epoch": 0.000187890625,
      "model_forward_time": 0.1151423454284668,
      "step": 30784
    },
    {
      "epoch": 0.000187890625,
      "step": 30784,
      "training_step_time": 0.3852498531341553
    },
    {
      "epoch": 0.000187896728515625,
      "model_forward_time": 0.11462712287902832,
      "step": 30785
    },
    {
      "epoch": 0.000187896728515625,
      "step": 30785,
      "training_step_time": 0.39629411697387695
    },
    {
      "epoch": 0.00018790283203125,
      "model_forward_time": 0.11549520492553711,
      "step": 30786
    },
    {
      "epoch": 0.00018790283203125,
      "step": 30786,
      "training_step_time": 0.45145297050476074
    },
    {
      "epoch": 0.000187908935546875,
      "model_forward_time": 0.1146078109741211,
      "step": 30787
    },
    {
      "epoch": 0.000187908935546875,
      "step": 30787,
      "training_step_time": 0.4939308166503906
    },
    {
      "epoch": 0.0001879150390625,
      "model_forward_time": 0.11528849601745605,
      "step": 30788
    },
    {
      "epoch": 0.0001879150390625,
      "step": 30788,
      "training_step_time": 0.38963961601257324
    },
    {
      "epoch": 0.000187921142578125,
      "model_forward_time": 0.11557745933532715,
      "step": 30789
    },
    {
      "epoch": 0.000187921142578125,
      "step": 30789,
      "training_step_time": 0.8328573703765869
    },
    {
      "epoch": 0.00018792724609375,
      "grad_norm": 0.11685695499181747,
      "learning_rate": 5.1956106623289145e-05,
      "loss": 0.047,
      "step": 30790
    },
    {
      "epoch": 0.00018792724609375,
      "model_forward_time": 0.1137843132019043,
      "step": 30790
    },
    {
      "epoch": 0.00018792724609375,
      "step": 30790,
      "training_step_time": 0.39543652534484863
    },
    {
      "epoch": 0.000187933349609375,
      "model_forward_time": 0.11433815956115723,
      "step": 30791
    },
    {
      "epoch": 0.000187933349609375,
      "step": 30791,
      "training_step_time": 0.37905192375183105
    },
    {
      "epoch": 0.000187939453125,
      "model_forward_time": 0.11376190185546875,
      "step": 30792
    },
    {
      "epoch": 0.000187939453125,
      "step": 30792,
      "training_step_time": 0.40815114974975586
    },
    {
      "epoch": 0.000187945556640625,
      "model_forward_time": 0.11483430862426758,
      "step": 30793
    },
    {
      "epoch": 0.000187945556640625,
      "step": 30793,
      "training_step_time": 0.43892478942871094
    },
    {
      "epoch": 0.00018795166015625,
      "model_forward_time": 0.11532330513000488,
      "step": 30794
    },
    {
      "epoch": 0.00018795166015625,
      "step": 30794,
      "training_step_time": 0.36344075202941895
    },
    {
      "epoch": 0.000187957763671875,
      "model_forward_time": 0.11542177200317383,
      "step": 30795
    },
    {
      "epoch": 0.000187957763671875,
      "step": 30795,
      "training_step_time": 0.684354305267334
    },
    {
      "epoch": 0.0001879638671875,
      "model_forward_time": 0.11515951156616211,
      "step": 30796
    },
    {
      "epoch": 0.0001879638671875,
      "step": 30796,
      "training_step_time": 0.39769744873046875
    },
    {
      "epoch": 0.000187969970703125,
      "model_forward_time": 0.11453628540039062,
      "step": 30797
    },
    {
      "epoch": 0.000187969970703125,
      "step": 30797,
      "training_step_time": 0.3818831443786621
    },
    {
      "epoch": 0.00018797607421875,
      "model_forward_time": 0.11471939086914062,
      "step": 30798
    },
    {
      "epoch": 0.00018797607421875,
      "step": 30798,
      "training_step_time": 0.3860130310058594
    },
    {
      "epoch": 0.000187982177734375,
      "model_forward_time": 0.1154024600982666,
      "step": 30799
    },
    {
      "epoch": 0.000187982177734375,
      "step": 30799,
      "training_step_time": 0.3962383270263672
    },
    {
      "epoch": 0.00018798828125,
      "grad_norm": 0.20493297278881073,
      "learning_rate": 5.1928569594555524e-05,
      "loss": 0.0391,
      "step": 30800
    },
    {
      "epoch": 0.00018798828125,
      "model_forward_time": 0.11485552787780762,
      "step": 30800
    },
    {
      "epoch": 0.00018798828125,
      "step": 30800,
      "training_step_time": 0.43334031105041504
    },
    {
      "epoch": 0.000187994384765625,
      "model_forward_time": 0.11539149284362793,
      "step": 30801
    },
    {
      "epoch": 0.000187994384765625,
      "step": 30801,
      "training_step_time": 0.7236661911010742
    },
    {
      "epoch": 0.00018800048828125,
      "model_forward_time": 0.11455249786376953,
      "step": 30802
    },
    {
      "epoch": 0.00018800048828125,
      "step": 30802,
      "training_step_time": 0.39912891387939453
    },
    {
      "epoch": 0.000188006591796875,
      "model_forward_time": 0.1151587963104248,
      "step": 30803
    },
    {
      "epoch": 0.000188006591796875,
      "step": 30803,
      "training_step_time": 0.388782262802124
    },
    {
      "epoch": 0.0001880126953125,
      "model_forward_time": 0.11438488960266113,
      "step": 30804
    },
    {
      "epoch": 0.0001880126953125,
      "step": 30804,
      "training_step_time": 0.40259456634521484
    },
    {
      "epoch": 0.000188018798828125,
      "model_forward_time": 0.11443758010864258,
      "step": 30805
    },
    {
      "epoch": 0.000188018798828125,
      "step": 30805,
      "training_step_time": 0.4293179512023926
    },
    {
      "epoch": 0.00018802490234375,
      "model_forward_time": 0.11455321311950684,
      "step": 30806
    },
    {
      "epoch": 0.00018802490234375,
      "step": 30806,
      "training_step_time": 0.4165201187133789
    },
    {
      "epoch": 0.000188031005859375,
      "model_forward_time": 0.11504602432250977,
      "step": 30807
    },
    {
      "epoch": 0.000188031005859375,
      "step": 30807,
      "training_step_time": 0.55782151222229
    },
    {
      "epoch": 0.000188037109375,
      "model_forward_time": 0.11470580101013184,
      "step": 30808
    },
    {
      "epoch": 0.000188037109375,
      "step": 30808,
      "training_step_time": 0.4498744010925293
    },
    {
      "epoch": 0.000188043212890625,
      "model_forward_time": 0.1148374080657959,
      "step": 30809
    },
    {
      "epoch": 0.000188043212890625,
      "step": 30809,
      "training_step_time": 0.500478982925415
    },
    {
      "epoch": 0.00018804931640625,
      "grad_norm": 0.10887108743190765,
      "learning_rate": 5.1901031979973394e-05,
      "loss": 0.0371,
      "step": 30810
    },
    {
      "epoch": 0.00018804931640625,
      "model_forward_time": 0.1159508228302002,
      "step": 30810
    },
    {
      "epoch": 0.00018804931640625,
      "step": 30810,
      "training_step_time": 0.4016106128692627
    },
    {
      "epoch": 0.000188055419921875,
      "model_forward_time": 0.11449193954467773,
      "step": 30811
    },
    {
      "epoch": 0.000188055419921875,
      "step": 30811,
      "training_step_time": 0.3799099922180176
    },
    {
      "epoch": 0.0001880615234375,
      "model_forward_time": 0.11545276641845703,
      "step": 30812
    },
    {
      "epoch": 0.0001880615234375,
      "step": 30812,
      "training_step_time": 0.4134058952331543
    },
    {
      "epoch": 0.000188067626953125,
      "model_forward_time": 0.11535406112670898,
      "step": 30813
    },
    {
      "epoch": 0.000188067626953125,
      "step": 30813,
      "training_step_time": 0.5674762725830078
    },
    {
      "epoch": 0.00018807373046875,
      "model_forward_time": 0.11519408226013184,
      "step": 30814
    },
    {
      "epoch": 0.00018807373046875,
      "step": 30814,
      "training_step_time": 0.47273731231689453
    },
    {
      "epoch": 0.000188079833984375,
      "model_forward_time": 0.11462926864624023,
      "step": 30815
    },
    {
      "epoch": 0.000188079833984375,
      "step": 30815,
      "training_step_time": 0.3749420642852783
    },
    {
      "epoch": 0.0001880859375,
      "model_forward_time": 0.11474728584289551,
      "step": 30816
    },
    {
      "epoch": 0.0001880859375,
      "step": 30816,
      "training_step_time": 0.3775753974914551
    },
    {
      "epoch": 0.000188092041015625,
      "model_forward_time": 0.11491894721984863,
      "step": 30817
    },
    {
      "epoch": 0.000188092041015625,
      "step": 30817,
      "training_step_time": 0.3746199607849121
    },
    {
      "epoch": 0.00018809814453125,
      "model_forward_time": 0.1161196231842041,
      "step": 30818
    },
    {
      "epoch": 0.00018809814453125,
      "step": 30818,
      "training_step_time": 0.37947559356689453
    },
    {
      "epoch": 0.000188104248046875,
      "model_forward_time": 0.11519312858581543,
      "step": 30819
    },
    {
      "epoch": 0.000188104248046875,
      "step": 30819,
      "training_step_time": 0.9373230934143066
    },
    {
      "epoch": 0.0001881103515625,
      "grad_norm": 0.11189240217208862,
      "learning_rate": 5.187349378790793e-05,
      "loss": 0.0386,
      "step": 30820
    },
    {
      "epoch": 0.0001881103515625,
      "model_forward_time": 0.1141350269317627,
      "step": 30820
    },
    {
      "epoch": 0.0001881103515625,
      "step": 30820,
      "training_step_time": 0.4533243179321289
    },
    {
      "epoch": 0.000188116455078125,
      "model_forward_time": 0.11482501029968262,
      "step": 30821
    },
    {
      "epoch": 0.000188116455078125,
      "step": 30821,
      "training_step_time": 0.36356067657470703
    },
    {
      "epoch": 0.00018812255859375,
      "model_forward_time": 0.1145486831665039,
      "step": 30822
    },
    {
      "epoch": 0.00018812255859375,
      "step": 30822,
      "training_step_time": 0.4278223514556885
    },
    {
      "epoch": 0.000188128662109375,
      "model_forward_time": 0.11449527740478516,
      "step": 30823
    },
    {
      "epoch": 0.000188128662109375,
      "step": 30823,
      "training_step_time": 0.387326717376709
    },
    {
      "epoch": 0.000188134765625,
      "model_forward_time": 0.11402511596679688,
      "step": 30824
    },
    {
      "epoch": 0.000188134765625,
      "step": 30824,
      "training_step_time": 0.39890623092651367
    },
    {
      "epoch": 0.000188140869140625,
      "model_forward_time": 0.11502432823181152,
      "step": 30825
    },
    {
      "epoch": 0.000188140869140625,
      "step": 30825,
      "training_step_time": 0.7368273735046387
    },
    {
      "epoch": 0.00018814697265625,
      "model_forward_time": 0.11475968360900879,
      "step": 30826
    },
    {
      "epoch": 0.00018814697265625,
      "step": 30826,
      "training_step_time": 0.40824317932128906
    },
    {
      "epoch": 0.000188153076171875,
      "model_forward_time": 0.1142277717590332,
      "step": 30827
    },
    {
      "epoch": 0.000188153076171875,
      "step": 30827,
      "training_step_time": 0.39383840560913086
    },
    {
      "epoch": 0.0001881591796875,
      "model_forward_time": 0.11467695236206055,
      "step": 30828
    },
    {
      "epoch": 0.0001881591796875,
      "step": 30828,
      "training_step_time": 0.3870840072631836
    },
    {
      "epoch": 0.000188165283203125,
      "model_forward_time": 0.1149599552154541,
      "step": 30829
    },
    {
      "epoch": 0.000188165283203125,
      "step": 30829,
      "training_step_time": 0.38416290283203125
    },
    {
      "epoch": 0.00018817138671875,
      "grad_norm": 0.16616518795490265,
      "learning_rate": 5.1845955026724535e-05,
      "loss": 0.0483,
      "step": 30830
    },
    {
      "epoch": 0.00018817138671875,
      "model_forward_time": 0.1145162582397461,
      "step": 30830
    },
    {
      "epoch": 0.00018817138671875,
      "step": 30830,
      "training_step_time": 0.3836362361907959
    },
    {
      "epoch": 0.000188177490234375,
      "model_forward_time": 0.11481547355651855,
      "step": 30831
    },
    {
      "epoch": 0.000188177490234375,
      "step": 30831,
      "training_step_time": 0.9216976165771484
    },
    {
      "epoch": 0.00018818359375,
      "model_forward_time": 0.11467671394348145,
      "step": 30832
    },
    {
      "epoch": 0.00018818359375,
      "step": 30832,
      "training_step_time": 0.395906925201416
    },
    {
      "epoch": 0.000188189697265625,
      "model_forward_time": 0.11433243751525879,
      "step": 30833
    },
    {
      "epoch": 0.000188189697265625,
      "step": 30833,
      "training_step_time": 0.41236257553100586
    },
    {
      "epoch": 0.00018819580078125,
      "model_forward_time": 0.11429333686828613,
      "step": 30834
    },
    {
      "epoch": 0.00018819580078125,
      "step": 30834,
      "training_step_time": 0.3627662658691406
    },
    {
      "epoch": 0.000188201904296875,
      "model_forward_time": 0.11436009407043457,
      "step": 30835
    },
    {
      "epoch": 0.000188201904296875,
      "step": 30835,
      "training_step_time": 0.4323256015777588
    },
    {
      "epoch": 0.0001882080078125,
      "model_forward_time": 0.11445403099060059,
      "step": 30836
    },
    {
      "epoch": 0.0001882080078125,
      "step": 30836,
      "training_step_time": 0.42014265060424805
    },
    {
      "epoch": 0.000188214111328125,
      "model_forward_time": 0.11474609375,
      "step": 30837
    },
    {
      "epoch": 0.000188214111328125,
      "step": 30837,
      "training_step_time": 0.3981196880340576
    },
    {
      "epoch": 0.00018822021484375,
      "model_forward_time": 0.11509490013122559,
      "step": 30838
    },
    {
      "epoch": 0.00018822021484375,
      "step": 30838,
      "training_step_time": 0.3943347930908203
    },
    {
      "epoch": 0.000188226318359375,
      "model_forward_time": 0.1147618293762207,
      "step": 30839
    },
    {
      "epoch": 0.000188226318359375,
      "step": 30839,
      "training_step_time": 0.4033622741699219
    },
    {
      "epoch": 0.000188232421875,
      "grad_norm": 0.12887975573539734,
      "learning_rate": 5.1818415704788725e-05,
      "loss": 0.0443,
      "step": 30840
    },
    {
      "epoch": 0.000188232421875,
      "model_forward_time": 0.1154775619506836,
      "step": 30840
    },
    {
      "epoch": 0.000188232421875,
      "step": 30840,
      "training_step_time": 0.4855978488922119
    },
    {
      "epoch": 0.000188238525390625,
      "model_forward_time": 0.11540508270263672,
      "step": 30841
    },
    {
      "epoch": 0.000188238525390625,
      "step": 30841,
      "training_step_time": 0.39577484130859375
    },
    {
      "epoch": 0.00018824462890625,
      "model_forward_time": 0.1153411865234375,
      "step": 30842
    },
    {
      "epoch": 0.00018824462890625,
      "step": 30842,
      "training_step_time": 0.3802785873413086
    },
    {
      "epoch": 0.000188250732421875,
      "model_forward_time": 0.1149129867553711,
      "step": 30843
    },
    {
      "epoch": 0.000188250732421875,
      "step": 30843,
      "training_step_time": 0.45841526985168457
    },
    {
      "epoch": 0.0001882568359375,
      "model_forward_time": 0.11591935157775879,
      "step": 30844
    },
    {
      "epoch": 0.0001882568359375,
      "step": 30844,
      "training_step_time": 0.40113329887390137
    },
    {
      "epoch": 0.000188262939453125,
      "model_forward_time": 0.11530637741088867,
      "step": 30845
    },
    {
      "epoch": 0.000188262939453125,
      "step": 30845,
      "training_step_time": 0.3894813060760498
    },
    {
      "epoch": 0.00018826904296875,
      "model_forward_time": 0.11520051956176758,
      "step": 30846
    },
    {
      "epoch": 0.00018826904296875,
      "step": 30846,
      "training_step_time": 0.4338350296020508
    },
    {
      "epoch": 0.000188275146484375,
      "model_forward_time": 0.11539101600646973,
      "step": 30847
    },
    {
      "epoch": 0.000188275146484375,
      "step": 30847,
      "training_step_time": 0.4656357765197754
    },
    {
      "epoch": 0.00018828125,
      "model_forward_time": 0.11564397811889648,
      "step": 30848
    },
    {
      "epoch": 0.00018828125,
      "step": 30848,
      "training_step_time": 0.3942406177520752
    },
    {
      "epoch": 0.000188287353515625,
      "model_forward_time": 0.11623334884643555,
      "step": 30849
    },
    {
      "epoch": 0.000188287353515625,
      "step": 30849,
      "training_step_time": 0.6390085220336914
    },
    {
      "epoch": 0.00018829345703125,
      "grad_norm": 0.0935215875506401,
      "learning_rate": 5.179087583046625e-05,
      "loss": 0.0429,
      "step": 30850
    },
    {
      "epoch": 0.00018829345703125,
      "model_forward_time": 0.11448836326599121,
      "step": 30850
    },
    {
      "epoch": 0.00018829345703125,
      "step": 30850,
      "training_step_time": 0.4817957878112793
    },
    {
      "epoch": 0.000188299560546875,
      "model_forward_time": 0.11528253555297852,
      "step": 30851
    },
    {
      "epoch": 0.000188299560546875,
      "step": 30851,
      "training_step_time": 0.3938770294189453
    },
    {
      "epoch": 0.0001883056640625,
      "model_forward_time": 0.11561822891235352,
      "step": 30852
    },
    {
      "epoch": 0.0001883056640625,
      "step": 30852,
      "training_step_time": 0.3798089027404785
    },
    {
      "epoch": 0.000188311767578125,
      "model_forward_time": 0.114715576171875,
      "step": 30853
    },
    {
      "epoch": 0.000188311767578125,
      "step": 30853,
      "training_step_time": 0.4058399200439453
    },
    {
      "epoch": 0.00018831787109375,
      "model_forward_time": 0.1154336929321289,
      "step": 30854
    },
    {
      "epoch": 0.00018831787109375,
      "step": 30854,
      "training_step_time": 0.4851572513580322
    },
    {
      "epoch": 0.000188323974609375,
      "model_forward_time": 0.11494922637939453,
      "step": 30855
    },
    {
      "epoch": 0.000188323974609375,
      "step": 30855,
      "training_step_time": 0.7042756080627441
    },
    {
      "epoch": 0.000188330078125,
      "model_forward_time": 0.11440372467041016,
      "step": 30856
    },
    {
      "epoch": 0.000188330078125,
      "step": 30856,
      "training_step_time": 0.40172243118286133
    },
    {
      "epoch": 0.000188336181640625,
      "model_forward_time": 0.11467456817626953,
      "step": 30857
    },
    {
      "epoch": 0.000188336181640625,
      "step": 30857,
      "training_step_time": 0.39008617401123047
    },
    {
      "epoch": 0.00018834228515625,
      "model_forward_time": 0.11466765403747559,
      "step": 30858
    },
    {
      "epoch": 0.00018834228515625,
      "step": 30858,
      "training_step_time": 0.397247314453125
    },
    {
      "epoch": 0.000188348388671875,
      "model_forward_time": 0.11475920677185059,
      "step": 30859
    },
    {
      "epoch": 0.000188348388671875,
      "step": 30859,
      "training_step_time": 0.42088985443115234
    },
    {
      "epoch": 0.0001883544921875,
      "grad_norm": 0.11145149916410446,
      "learning_rate": 5.176333541212296e-05,
      "loss": 0.0404,
      "step": 30860
    },
    {
      "epoch": 0.0001883544921875,
      "model_forward_time": 0.11519694328308105,
      "step": 30860
    },
    {
      "epoch": 0.0001883544921875,
      "step": 30860,
      "training_step_time": 0.38355445861816406
    },
    {
      "epoch": 0.000188360595703125,
      "model_forward_time": 0.11495041847229004,
      "step": 30861
    },
    {
      "epoch": 0.000188360595703125,
      "step": 30861,
      "training_step_time": 0.7508633136749268
    },
    {
      "epoch": 0.00018836669921875,
      "model_forward_time": 0.11504721641540527,
      "step": 30862
    },
    {
      "epoch": 0.00018836669921875,
      "step": 30862,
      "training_step_time": 0.3776063919067383
    },
    {
      "epoch": 0.000188372802734375,
      "model_forward_time": 0.11503934860229492,
      "step": 30863
    },
    {
      "epoch": 0.000188372802734375,
      "step": 30863,
      "training_step_time": 0.41632580757141113
    },
    {
      "epoch": 0.00018837890625,
      "model_forward_time": 0.11423158645629883,
      "step": 30864
    },
    {
      "epoch": 0.00018837890625,
      "step": 30864,
      "training_step_time": 0.3932688236236572
    },
    {
      "epoch": 0.000188385009765625,
      "model_forward_time": 0.11465239524841309,
      "step": 30865
    },
    {
      "epoch": 0.000188385009765625,
      "step": 30865,
      "training_step_time": 0.38564586639404297
    },
    {
      "epoch": 0.00018839111328125,
      "model_forward_time": 0.11455488204956055,
      "step": 30866
    },
    {
      "epoch": 0.00018839111328125,
      "step": 30866,
      "training_step_time": 0.42553210258483887
    },
    {
      "epoch": 0.000188397216796875,
      "model_forward_time": 0.11577081680297852,
      "step": 30867
    },
    {
      "epoch": 0.000188397216796875,
      "step": 30867,
      "training_step_time": 0.6448957920074463
    },
    {
      "epoch": 0.0001884033203125,
      "model_forward_time": 0.11456084251403809,
      "step": 30868
    },
    {
      "epoch": 0.0001884033203125,
      "step": 30868,
      "training_step_time": 0.3997046947479248
    },
    {
      "epoch": 0.000188409423828125,
      "model_forward_time": 0.11714410781860352,
      "step": 30869
    },
    {
      "epoch": 0.000188409423828125,
      "step": 30869,
      "training_step_time": 0.4042484760284424
    },
    {
      "epoch": 0.00018841552734375,
      "grad_norm": 0.13961003720760345,
      "learning_rate": 5.1735794458124956e-05,
      "loss": 0.0455,
      "step": 30870
    },
    {
      "epoch": 0.00018841552734375,
      "model_forward_time": 0.11477780342102051,
      "step": 30870
    },
    {
      "epoch": 0.00018841552734375,
      "step": 30870,
      "training_step_time": 0.3968033790588379
    },
    {
      "epoch": 0.000188421630859375,
      "model_forward_time": 0.11436104774475098,
      "step": 30871
    },
    {
      "epoch": 0.000188421630859375,
      "step": 30871,
      "training_step_time": 0.38735318183898926
    },
    {
      "epoch": 0.000188427734375,
      "model_forward_time": 0.1149606704711914,
      "step": 30872
    },
    {
      "epoch": 0.000188427734375,
      "step": 30872,
      "training_step_time": 0.4537801742553711
    },
    {
      "epoch": 0.000188433837890625,
      "model_forward_time": 0.1146695613861084,
      "step": 30873
    },
    {
      "epoch": 0.000188433837890625,
      "step": 30873,
      "training_step_time": 0.9811632633209229
    },
    {
      "epoch": 0.00018843994140625,
      "model_forward_time": 0.11480879783630371,
      "step": 30874
    },
    {
      "epoch": 0.00018843994140625,
      "step": 30874,
      "training_step_time": 0.3965630531311035
    },
    {
      "epoch": 0.000188446044921875,
      "model_forward_time": 0.11459565162658691,
      "step": 30875
    },
    {
      "epoch": 0.000188446044921875,
      "step": 30875,
      "training_step_time": 0.376939058303833
    },
    {
      "epoch": 0.0001884521484375,
      "model_forward_time": 0.11476588249206543,
      "step": 30876
    },
    {
      "epoch": 0.0001884521484375,
      "step": 30876,
      "training_step_time": 0.40897631645202637
    },
    {
      "epoch": 0.000188458251953125,
      "model_forward_time": 0.11416935920715332,
      "step": 30877
    },
    {
      "epoch": 0.000188458251953125,
      "step": 30877,
      "training_step_time": 0.3832273483276367
    },
    {
      "epoch": 0.00018846435546875,
      "model_forward_time": 0.11463713645935059,
      "step": 30878
    },
    {
      "epoch": 0.00018846435546875,
      "step": 30878,
      "training_step_time": 0.38776540756225586
    },
    {
      "epoch": 0.000188470458984375,
      "model_forward_time": 0.11520147323608398,
      "step": 30879
    },
    {
      "epoch": 0.000188470458984375,
      "step": 30879,
      "training_step_time": 0.9070584774017334
    },
    {
      "epoch": 0.0001884765625,
      "grad_norm": 0.12278030812740326,
      "learning_rate": 5.170825297683841e-05,
      "loss": 0.0432,
      "step": 30880
    },
    {
      "epoch": 0.0001884765625,
      "model_forward_time": 0.11401510238647461,
      "step": 30880
    },
    {
      "epoch": 0.0001884765625,
      "step": 30880,
      "training_step_time": 0.3970308303833008
    },
    {
      "epoch": 0.000188482666015625,
      "model_forward_time": 0.11470723152160645,
      "step": 30881
    },
    {
      "epoch": 0.000188482666015625,
      "step": 30881,
      "training_step_time": 0.39966750144958496
    },
    {
      "epoch": 0.00018848876953125,
      "model_forward_time": 0.11443758010864258,
      "step": 30882
    },
    {
      "epoch": 0.00018848876953125,
      "step": 30882,
      "training_step_time": 0.40619587898254395
    },
    {
      "epoch": 0.000188494873046875,
      "model_forward_time": 0.11429309844970703,
      "step": 30883
    },
    {
      "epoch": 0.000188494873046875,
      "step": 30883,
      "training_step_time": 0.3825805187225342
    },
    {
      "epoch": 0.0001885009765625,
      "model_forward_time": 0.11476993560791016,
      "step": 30884
    },
    {
      "epoch": 0.0001885009765625,
      "step": 30884,
      "training_step_time": 0.38381409645080566
    },
    {
      "epoch": 0.000188507080078125,
      "model_forward_time": 0.11505317687988281,
      "step": 30885
    },
    {
      "epoch": 0.000188507080078125,
      "step": 30885,
      "training_step_time": 0.7558319568634033
    },
    {
      "epoch": 0.00018851318359375,
      "model_forward_time": 0.11481475830078125,
      "step": 30886
    },
    {
      "epoch": 0.00018851318359375,
      "step": 30886,
      "training_step_time": 0.41161322593688965
    },
    {
      "epoch": 0.000188519287109375,
      "model_forward_time": 0.11507940292358398,
      "step": 30887
    },
    {
      "epoch": 0.000188519287109375,
      "step": 30887,
      "training_step_time": 0.3873746395111084
    },
    {
      "epoch": 0.000188525390625,
      "model_forward_time": 0.1145486831665039,
      "step": 30888
    },
    {
      "epoch": 0.000188525390625,
      "step": 30888,
      "training_step_time": 0.3644890785217285
    },
    {
      "epoch": 0.000188531494140625,
      "model_forward_time": 0.11461949348449707,
      "step": 30889
    },
    {
      "epoch": 0.000188531494140625,
      "step": 30889,
      "training_step_time": 0.43358492851257324
    },
    {
      "epoch": 0.00018853759765625,
      "grad_norm": 0.10912112891674042,
      "learning_rate": 5.168071097662972e-05,
      "loss": 0.0393,
      "step": 30890
    },
    {
      "epoch": 0.00018853759765625,
      "model_forward_time": 0.1151576042175293,
      "step": 30890
    },
    {
      "epoch": 0.00018853759765625,
      "step": 30890,
      "training_step_time": 0.4131009578704834
    },
    {
      "epoch": 0.000188543701171875,
      "model_forward_time": 0.11505818367004395,
      "step": 30891
    },
    {
      "epoch": 0.000188543701171875,
      "step": 30891,
      "training_step_time": 0.7222018241882324
    },
    {
      "epoch": 0.0001885498046875,
      "model_forward_time": 0.1140141487121582,
      "step": 30892
    },
    {
      "epoch": 0.0001885498046875,
      "step": 30892,
      "training_step_time": 0.41741347312927246
    },
    {
      "epoch": 0.000188555908203125,
      "model_forward_time": 0.11384892463684082,
      "step": 30893
    },
    {
      "epoch": 0.000188555908203125,
      "step": 30893,
      "training_step_time": 0.4233405590057373
    },
    {
      "epoch": 0.00018856201171875,
      "model_forward_time": 0.11489486694335938,
      "step": 30894
    },
    {
      "epoch": 0.00018856201171875,
      "step": 30894,
      "training_step_time": 0.39569830894470215
    },
    {
      "epoch": 0.000188568115234375,
      "model_forward_time": 0.11448192596435547,
      "step": 30895
    },
    {
      "epoch": 0.000188568115234375,
      "step": 30895,
      "training_step_time": 0.39559483528137207
    },
    {
      "epoch": 0.00018857421875,
      "model_forward_time": 0.11460375785827637,
      "step": 30896
    },
    {
      "epoch": 0.00018857421875,
      "step": 30896,
      "training_step_time": 0.37998056411743164
    },
    {
      "epoch": 0.000188580322265625,
      "model_forward_time": 0.11547541618347168,
      "step": 30897
    },
    {
      "epoch": 0.000188580322265625,
      "step": 30897,
      "training_step_time": 0.8257453441619873
    },
    {
      "epoch": 0.00018858642578125,
      "model_forward_time": 0.11495327949523926,
      "step": 30898
    },
    {
      "epoch": 0.00018858642578125,
      "step": 30898,
      "training_step_time": 0.40231990814208984
    },
    {
      "epoch": 0.000188592529296875,
      "model_forward_time": 0.11409187316894531,
      "step": 30899
    },
    {
      "epoch": 0.000188592529296875,
      "step": 30899,
      "training_step_time": 0.38067030906677246
    },
    {
      "epoch": 0.0001885986328125,
      "grad_norm": 0.11814741045236588,
      "learning_rate": 5.165316846586541e-05,
      "loss": 0.0406,
      "step": 30900
    },
    {
      "epoch": 0.0001885986328125,
      "model_forward_time": 0.11452102661132812,
      "step": 30900
    },
    {
      "epoch": 0.0001885986328125,
      "step": 30900,
      "training_step_time": 0.4137256145477295
    },
    {
      "epoch": 0.000188604736328125,
      "model_forward_time": 0.11423039436340332,
      "step": 30901
    },
    {
      "epoch": 0.000188604736328125,
      "step": 30901,
      "training_step_time": 0.36321020126342773
    },
    {
      "epoch": 0.00018861083984375,
      "model_forward_time": 0.11432886123657227,
      "step": 30902
    },
    {
      "epoch": 0.00018861083984375,
      "step": 30902,
      "training_step_time": 0.43575501441955566
    },
    {
      "epoch": 0.000188616943359375,
      "model_forward_time": 0.11520743370056152,
      "step": 30903
    },
    {
      "epoch": 0.000188616943359375,
      "step": 30903,
      "training_step_time": 0.3874382972717285
    },
    {
      "epoch": 0.000188623046875,
      "model_forward_time": 0.11463475227355957,
      "step": 30904
    },
    {
      "epoch": 0.000188623046875,
      "step": 30904,
      "training_step_time": 0.39228272438049316
    },
    {
      "epoch": 0.000188629150390625,
      "model_forward_time": 0.11528348922729492,
      "step": 30905
    },
    {
      "epoch": 0.000188629150390625,
      "step": 30905,
      "training_step_time": 0.4701817035675049
    },
    {
      "epoch": 0.00018863525390625,
      "model_forward_time": 0.11583304405212402,
      "step": 30906
    },
    {
      "epoch": 0.00018863525390625,
      "step": 30906,
      "training_step_time": 0.48742246627807617
    },
    {
      "epoch": 0.000188641357421875,
      "model_forward_time": 0.11457633972167969,
      "step": 30907
    },
    {
      "epoch": 0.000188641357421875,
      "step": 30907,
      "training_step_time": 0.4394974708557129
    },
    {
      "epoch": 0.0001886474609375,
      "model_forward_time": 0.11467194557189941,
      "step": 30908
    },
    {
      "epoch": 0.0001886474609375,
      "step": 30908,
      "training_step_time": 0.3911864757537842
    },
    {
      "epoch": 0.000188653564453125,
      "model_forward_time": 0.11538815498352051,
      "step": 30909
    },
    {
      "epoch": 0.000188653564453125,
      "step": 30909,
      "training_step_time": 0.38674402236938477
    },
    {
      "epoch": 0.00018865966796875,
      "grad_norm": 0.1128213033080101,
      "learning_rate": 5.1625625452912174e-05,
      "loss": 0.0429,
      "step": 30910
    },
    {
      "epoch": 0.00018865966796875,
      "model_forward_time": 0.11477065086364746,
      "step": 30910
    },
    {
      "epoch": 0.00018865966796875,
      "step": 30910,
      "training_step_time": 0.39536285400390625
    },
    {
      "epoch": 0.000188665771484375,
      "model_forward_time": 0.11493921279907227,
      "step": 30911
    },
    {
      "epoch": 0.000188665771484375,
      "step": 30911,
      "training_step_time": 0.39186954498291016
    },
    {
      "epoch": 0.000188671875,
      "model_forward_time": 0.11616969108581543,
      "step": 30912
    },
    {
      "epoch": 0.000188671875,
      "step": 30912,
      "training_step_time": 0.4709303379058838
    },
    {
      "epoch": 0.000188677978515625,
      "model_forward_time": 0.11500763893127441,
      "step": 30913
    },
    {
      "epoch": 0.000188677978515625,
      "step": 30913,
      "training_step_time": 0.39893007278442383
    },
    {
      "epoch": 0.00018868408203125,
      "model_forward_time": 0.1153254508972168,
      "step": 30914
    },
    {
      "epoch": 0.00018868408203125,
      "step": 30914,
      "training_step_time": 0.40422558784484863
    },
    {
      "epoch": 0.000188690185546875,
      "model_forward_time": 0.11572003364562988,
      "step": 30915
    },
    {
      "epoch": 0.000188690185546875,
      "step": 30915,
      "training_step_time": 0.698737382888794
    },
    {
      "epoch": 0.0001886962890625,
      "model_forward_time": 0.11482453346252441,
      "step": 30916
    },
    {
      "epoch": 0.0001886962890625,
      "step": 30916,
      "training_step_time": 0.5002152919769287
    },
    {
      "epoch": 0.000188702392578125,
      "model_forward_time": 0.11515069007873535,
      "step": 30917
    },
    {
      "epoch": 0.000188702392578125,
      "step": 30917,
      "training_step_time": 0.48235535621643066
    },
    {
      "epoch": 0.00018870849609375,
      "model_forward_time": 0.11519575119018555,
      "step": 30918
    },
    {
      "epoch": 0.00018870849609375,
      "step": 30918,
      "training_step_time": 0.4270212650299072
    },
    {
      "epoch": 0.000188714599609375,
      "model_forward_time": 0.11450529098510742,
      "step": 30919
    },
    {
      "epoch": 0.000188714599609375,
      "step": 30919,
      "training_step_time": 0.39838337898254395
    },
    {
      "epoch": 0.000188720703125,
      "grad_norm": 0.12531952559947968,
      "learning_rate": 5.1598081946136843e-05,
      "loss": 0.0383,
      "step": 30920
    },
    {
      "epoch": 0.000188720703125,
      "model_forward_time": 0.1144101619720459,
      "step": 30920
    },
    {
      "epoch": 0.000188720703125,
      "step": 30920,
      "training_step_time": 0.37380313873291016
    },
    {
      "epoch": 0.000188726806640625,
      "model_forward_time": 0.1144866943359375,
      "step": 30921
    },
    {
      "epoch": 0.000188726806640625,
      "step": 30921,
      "training_step_time": 0.44661974906921387
    },
    {
      "epoch": 0.00018873291015625,
      "model_forward_time": 0.11519432067871094,
      "step": 30922
    },
    {
      "epoch": 0.00018873291015625,
      "step": 30922,
      "training_step_time": 0.3893144130706787
    },
    {
      "epoch": 0.000188739013671875,
      "model_forward_time": 0.11463308334350586,
      "step": 30923
    },
    {
      "epoch": 0.000188739013671875,
      "step": 30923,
      "training_step_time": 0.3772246837615967
    },
    {
      "epoch": 0.0001887451171875,
      "model_forward_time": 0.11503362655639648,
      "step": 30924
    },
    {
      "epoch": 0.0001887451171875,
      "step": 30924,
      "training_step_time": 0.3927168846130371
    },
    {
      "epoch": 0.000188751220703125,
      "model_forward_time": 0.11458778381347656,
      "step": 30925
    },
    {
      "epoch": 0.000188751220703125,
      "step": 30925,
      "training_step_time": 0.40480804443359375
    },
    {
      "epoch": 0.00018875732421875,
      "model_forward_time": 0.11613702774047852,
      "step": 30926
    },
    {
      "epoch": 0.00018875732421875,
      "step": 30926,
      "training_step_time": 0.535546064376831
    },
    {
      "epoch": 0.000188763427734375,
      "model_forward_time": 0.11753296852111816,
      "step": 30927
    },
    {
      "epoch": 0.000188763427734375,
      "step": 30927,
      "training_step_time": 0.7154252529144287
    },
    {
      "epoch": 0.00018876953125,
      "model_forward_time": 0.12040996551513672,
      "step": 30928
    },
    {
      "epoch": 0.00018876953125,
      "step": 30928,
      "training_step_time": 0.6606879234313965
    },
    {
      "epoch": 0.000188775634765625,
      "model_forward_time": 0.11828041076660156,
      "step": 30929
    },
    {
      "epoch": 0.000188775634765625,
      "step": 30929,
      "training_step_time": 0.7380430698394775
    },
    {
      "epoch": 0.00018878173828125,
      "grad_norm": 0.130204439163208,
      "learning_rate": 5.157053795390642e-05,
      "loss": 0.0385,
      "step": 30930
    },
    {
      "epoch": 0.00018878173828125,
      "model_forward_time": 0.12041735649108887,
      "step": 30930
    },
    {
      "epoch": 0.00018878173828125,
      "step": 30930,
      "training_step_time": 0.7263422012329102
    },
    {
      "epoch": 0.000188787841796875,
      "model_forward_time": 0.11890101432800293,
      "step": 30931
    },
    {
      "epoch": 0.000188787841796875,
      "step": 30931,
      "training_step_time": 0.7960901260375977
    },
    {
      "epoch": 0.0001887939453125,
      "model_forward_time": 0.12831473350524902,
      "step": 30932
    },
    {
      "epoch": 0.0001887939453125,
      "step": 30932,
      "training_step_time": 0.6928753852844238
    },
    {
      "epoch": 0.000188800048828125,
      "model_forward_time": 0.1191558837890625,
      "step": 30933
    },
    {
      "epoch": 0.000188800048828125,
      "step": 30933,
      "training_step_time": 0.6911904811859131
    },
    {
      "epoch": 0.00018880615234375,
      "model_forward_time": 0.12044239044189453,
      "step": 30934
    },
    {
      "epoch": 0.00018880615234375,
      "step": 30934,
      "training_step_time": 0.7034132480621338
    },
    {
      "epoch": 0.000188812255859375,
      "model_forward_time": 0.11933469772338867,
      "step": 30935
    },
    {
      "epoch": 0.000188812255859375,
      "step": 30935,
      "training_step_time": 0.6617612838745117
    },
    {
      "epoch": 0.000188818359375,
      "model_forward_time": 0.11932802200317383,
      "step": 30936
    },
    {
      "epoch": 0.000188818359375,
      "step": 30936,
      "training_step_time": 0.7600529193878174
    },
    {
      "epoch": 0.000188824462890625,
      "model_forward_time": 0.11627674102783203,
      "step": 30937
    },
    {
      "epoch": 0.000188824462890625,
      "step": 30937,
      "training_step_time": 0.7131466865539551
    },
    {
      "epoch": 0.00018883056640625,
      "model_forward_time": 0.1153876781463623,
      "step": 30938
    },
    {
      "epoch": 0.00018883056640625,
      "step": 30938,
      "training_step_time": 0.7278246879577637
    },
    {
      "epoch": 0.000188836669921875,
      "model_forward_time": 0.1181192398071289,
      "step": 30939
    },
    {
      "epoch": 0.000188836669921875,
      "step": 30939,
      "training_step_time": 0.7852704524993896
    },
    {
      "epoch": 0.0001888427734375,
      "grad_norm": 0.12003664672374725,
      "learning_rate": 5.154299348458803e-05,
      "loss": 0.0397,
      "step": 30940
    },
    {
      "epoch": 0.0001888427734375,
      "model_forward_time": 0.11595535278320312,
      "step": 30940
    },
    {
      "epoch": 0.0001888427734375,
      "step": 30940,
      "training_step_time": 0.6725006103515625
    },
    {
      "epoch": 0.000188848876953125,
      "model_forward_time": 0.11679410934448242,
      "step": 30941
    },
    {
      "epoch": 0.000188848876953125,
      "step": 30941,
      "training_step_time": 0.6105883121490479
    },
    {
      "epoch": 0.00018885498046875,
      "model_forward_time": 0.12017345428466797,
      "step": 30942
    },
    {
      "epoch": 0.00018885498046875,
      "step": 30942,
      "training_step_time": 0.6462221145629883
    },
    {
      "epoch": 0.000188861083984375,
      "model_forward_time": 0.12050557136535645,
      "step": 30943
    },
    {
      "epoch": 0.000188861083984375,
      "step": 30943,
      "training_step_time": 0.6768341064453125
    },
    {
      "epoch": 0.0001888671875,
      "model_forward_time": 0.11844253540039062,
      "step": 30944
    },
    {
      "epoch": 0.0001888671875,
      "step": 30944,
      "training_step_time": 0.6745498180389404
    },
    {
      "epoch": 0.000188873291015625,
      "model_forward_time": 0.11990618705749512,
      "step": 30945
    },
    {
      "epoch": 0.000188873291015625,
      "step": 30945,
      "training_step_time": 0.6693084239959717
    },
    {
      "epoch": 0.00018887939453125,
      "model_forward_time": 0.12567758560180664,
      "step": 30946
    },
    {
      "epoch": 0.00018887939453125,
      "step": 30946,
      "training_step_time": 0.7429125308990479
    },
    {
      "epoch": 0.000188885498046875,
      "model_forward_time": 0.11817359924316406,
      "step": 30947
    },
    {
      "epoch": 0.000188885498046875,
      "step": 30947,
      "training_step_time": 0.6907992362976074
    },
    {
      "epoch": 0.0001888916015625,
      "model_forward_time": 0.11747360229492188,
      "step": 30948
    },
    {
      "epoch": 0.0001888916015625,
      "step": 30948,
      "training_step_time": 0.6549155712127686
    },
    {
      "epoch": 0.000188897705078125,
      "model_forward_time": 0.12404084205627441,
      "step": 30949
    },
    {
      "epoch": 0.000188897705078125,
      "step": 30949,
      "training_step_time": 0.742476224899292
    },
    {
      "epoch": 0.00018890380859375,
      "grad_norm": 0.09355182945728302,
      "learning_rate": 5.151544854654895e-05,
      "loss": 0.0524,
      "step": 30950
    },
    {
      "epoch": 0.00018890380859375,
      "model_forward_time": 0.11666178703308105,
      "step": 30950
    },
    {
      "epoch": 0.00018890380859375,
      "step": 30950,
      "training_step_time": 0.6711044311523438
    },
    {
      "epoch": 0.000188909912109375,
      "model_forward_time": 0.12009453773498535,
      "step": 30951
    },
    {
      "epoch": 0.000188909912109375,
      "step": 30951,
      "training_step_time": 0.7572953701019287
    },
    {
      "epoch": 0.000188916015625,
      "model_forward_time": 0.11679339408874512,
      "step": 30952
    },
    {
      "epoch": 0.000188916015625,
      "step": 30952,
      "training_step_time": 0.6458230018615723
    },
    {
      "epoch": 0.000188922119140625,
      "model_forward_time": 0.1200711727142334,
      "step": 30953
    },
    {
      "epoch": 0.000188922119140625,
      "step": 30953,
      "training_step_time": 0.6556577682495117
    },
    {
      "epoch": 0.00018892822265625,
      "model_forward_time": 0.11936640739440918,
      "step": 30954
    },
    {
      "epoch": 0.00018892822265625,
      "step": 30954,
      "training_step_time": 0.6988270282745361
    },
    {
      "epoch": 0.000188934326171875,
      "model_forward_time": 0.11961770057678223,
      "step": 30955
    },
    {
      "epoch": 0.000188934326171875,
      "step": 30955,
      "training_step_time": 0.7109253406524658
    },
    {
      "epoch": 0.0001889404296875,
      "model_forward_time": 0.12238597869873047,
      "step": 30956
    },
    {
      "epoch": 0.0001889404296875,
      "step": 30956,
      "training_step_time": 0.7520456314086914
    },
    {
      "epoch": 0.000188946533203125,
      "model_forward_time": 0.11821341514587402,
      "step": 30957
    },
    {
      "epoch": 0.000188946533203125,
      "step": 30957,
      "training_step_time": 0.6640832424163818
    },
    {
      "epoch": 0.00018895263671875,
      "model_forward_time": 0.12839913368225098,
      "step": 30958
    },
    {
      "epoch": 0.00018895263671875,
      "step": 30958,
      "training_step_time": 0.7407288551330566
    },
    {
      "epoch": 0.000188958740234375,
      "model_forward_time": 0.1169583797454834,
      "step": 30959
    },
    {
      "epoch": 0.000188958740234375,
      "step": 30959,
      "training_step_time": 0.6978318691253662
    },
    {
      "epoch": 0.00018896484375,
      "grad_norm": 0.1481018364429474,
      "learning_rate": 5.148790314815663e-05,
      "loss": 0.0478,
      "step": 30960
    },
    {
      "epoch": 0.00018896484375,
      "model_forward_time": 0.11612892150878906,
      "step": 30960
    },
    {
      "epoch": 0.00018896484375,
      "step": 30960,
      "training_step_time": 0.684345006942749
    },
    {
      "epoch": 0.000188970947265625,
      "model_forward_time": 0.11731863021850586,
      "step": 30961
    },
    {
      "epoch": 0.000188970947265625,
      "step": 30961,
      "training_step_time": 0.6822946071624756
    },
    {
      "epoch": 0.00018897705078125,
      "model_forward_time": 0.11982274055480957,
      "step": 30962
    },
    {
      "epoch": 0.00018897705078125,
      "step": 30962,
      "training_step_time": 0.6718151569366455
    },
    {
      "epoch": 0.000188983154296875,
      "model_forward_time": 0.11738300323486328,
      "step": 30963
    },
    {
      "epoch": 0.000188983154296875,
      "step": 30963,
      "training_step_time": 0.6726765632629395
    },
    {
      "epoch": 0.0001889892578125,
      "model_forward_time": 0.11807417869567871,
      "step": 30964
    },
    {
      "epoch": 0.0001889892578125,
      "step": 30964,
      "training_step_time": 0.6741921901702881
    },
    {
      "epoch": 0.000188995361328125,
      "model_forward_time": 0.11763381958007812,
      "step": 30965
    },
    {
      "epoch": 0.000188995361328125,
      "step": 30965,
      "training_step_time": 0.5755267143249512
    },
    {
      "epoch": 0.00018900146484375,
      "model_forward_time": 0.12282872200012207,
      "step": 30966
    },
    {
      "epoch": 0.00018900146484375,
      "step": 30966,
      "training_step_time": 0.6973068714141846
    },
    {
      "epoch": 0.000189007568359375,
      "model_forward_time": 0.11943817138671875,
      "step": 30967
    },
    {
      "epoch": 0.000189007568359375,
      "step": 30967,
      "training_step_time": 0.6911051273345947
    },
    {
      "epoch": 0.000189013671875,
      "model_forward_time": 0.11863899230957031,
      "step": 30968
    },
    {
      "epoch": 0.000189013671875,
      "step": 30968,
      "training_step_time": 0.7431683540344238
    },
    {
      "epoch": 0.000189019775390625,
      "model_forward_time": 0.11843419075012207,
      "step": 30969
    },
    {
      "epoch": 0.000189019775390625,
      "step": 30969,
      "training_step_time": 0.6818840503692627
    },
    {
      "epoch": 0.00018902587890625,
      "grad_norm": 0.19548028707504272,
      "learning_rate": 5.1460357297778615e-05,
      "loss": 0.0517,
      "step": 30970
    },
    {
      "epoch": 0.00018902587890625,
      "model_forward_time": 0.11752724647521973,
      "step": 30970
    },
    {
      "epoch": 0.00018902587890625,
      "step": 30970,
      "training_step_time": 0.6988997459411621
    },
    {
      "epoch": 0.000189031982421875,
      "model_forward_time": 0.1161344051361084,
      "step": 30971
    },
    {
      "epoch": 0.000189031982421875,
      "step": 30971,
      "training_step_time": 0.656663179397583
    },
    {
      "epoch": 0.0001890380859375,
      "model_forward_time": 0.11876988410949707,
      "step": 30972
    },
    {
      "epoch": 0.0001890380859375,
      "step": 30972,
      "training_step_time": 0.7030177116394043
    },
    {
      "epoch": 0.000189044189453125,
      "model_forward_time": 0.11827969551086426,
      "step": 30973
    },
    {
      "epoch": 0.000189044189453125,
      "step": 30973,
      "training_step_time": 0.7112808227539062
    },
    {
      "epoch": 0.00018905029296875,
      "model_forward_time": 0.12302517890930176,
      "step": 30974
    },
    {
      "epoch": 0.00018905029296875,
      "step": 30974,
      "training_step_time": 0.712090253829956
    },
    {
      "epoch": 0.000189056396484375,
      "model_forward_time": 0.11689639091491699,
      "step": 30975
    },
    {
      "epoch": 0.000189056396484375,
      "step": 30975,
      "training_step_time": 0.7149443626403809
    },
    {
      "epoch": 0.0001890625,
      "model_forward_time": 0.1168217658996582,
      "step": 30976
    },
    {
      "epoch": 0.0001890625,
      "step": 30976,
      "training_step_time": 0.652888298034668
    },
    {
      "epoch": 0.000189068603515625,
      "model_forward_time": 0.11827707290649414,
      "step": 30977
    },
    {
      "epoch": 0.000189068603515625,
      "step": 30977,
      "training_step_time": 0.7184057235717773
    },
    {
      "epoch": 0.00018907470703125,
      "model_forward_time": 0.12161397933959961,
      "step": 30978
    },
    {
      "epoch": 0.00018907470703125,
      "step": 30978,
      "training_step_time": 0.6675524711608887
    },
    {
      "epoch": 0.000189080810546875,
      "model_forward_time": 0.11682653427124023,
      "step": 30979
    },
    {
      "epoch": 0.000189080810546875,
      "step": 30979,
      "training_step_time": 0.6799886226654053
    },
    {
      "epoch": 0.0001890869140625,
      "grad_norm": 0.12080669403076172,
      "learning_rate": 5.143281100378261e-05,
      "loss": 0.0487,
      "step": 30980
    },
    {
      "epoch": 0.0001890869140625,
      "model_forward_time": 0.12385106086730957,
      "step": 30980
    },
    {
      "epoch": 0.0001890869140625,
      "step": 30980,
      "training_step_time": 0.779733419418335
    },
    {
      "epoch": 0.000189093017578125,
      "model_forward_time": 0.12430667877197266,
      "step": 30981
    },
    {
      "epoch": 0.000189093017578125,
      "step": 30981,
      "training_step_time": 0.6634438037872314
    },
    {
      "epoch": 0.00018909912109375,
      "model_forward_time": 0.11980199813842773,
      "step": 30982
    },
    {
      "epoch": 0.00018909912109375,
      "step": 30982,
      "training_step_time": 0.693293571472168
    },
    {
      "epoch": 0.000189105224609375,
      "model_forward_time": 0.12164425849914551,
      "step": 30983
    },
    {
      "epoch": 0.000189105224609375,
      "step": 30983,
      "training_step_time": 0.7141618728637695
    },
    {
      "epoch": 0.000189111328125,
      "model_forward_time": 0.11823701858520508,
      "step": 30984
    },
    {
      "epoch": 0.000189111328125,
      "step": 30984,
      "training_step_time": 0.6403374671936035
    },
    {
      "epoch": 0.000189117431640625,
      "model_forward_time": 0.12013101577758789,
      "step": 30985
    },
    {
      "epoch": 0.000189117431640625,
      "step": 30985,
      "training_step_time": 0.6117923259735107
    },
    {
      "epoch": 0.00018912353515625,
      "model_forward_time": 0.12229728698730469,
      "step": 30986
    },
    {
      "epoch": 0.00018912353515625,
      "step": 30986,
      "training_step_time": 0.7535529136657715
    },
    {
      "epoch": 0.000189129638671875,
      "model_forward_time": 0.1173403263092041,
      "step": 30987
    },
    {
      "epoch": 0.000189129638671875,
      "step": 30987,
      "training_step_time": 0.7231540679931641
    },
    {
      "epoch": 0.0001891357421875,
      "model_forward_time": 0.11809730529785156,
      "step": 30988
    },
    {
      "epoch": 0.0001891357421875,
      "step": 30988,
      "training_step_time": 0.583702802658081
    },
    {
      "epoch": 0.000189141845703125,
      "model_forward_time": 0.11856436729431152,
      "step": 30989
    },
    {
      "epoch": 0.000189141845703125,
      "step": 30989,
      "training_step_time": 0.6279327869415283
    },
    {
      "epoch": 0.00018914794921875,
      "grad_norm": 0.14305931329727173,
      "learning_rate": 5.1405264274536445e-05,
      "loss": 0.0493,
      "step": 30990
    },
    {
      "epoch": 0.00018914794921875,
      "model_forward_time": 0.11753201484680176,
      "step": 30990
    },
    {
      "epoch": 0.00018914794921875,
      "step": 30990,
      "training_step_time": 0.625140905380249
    },
    {
      "epoch": 0.000189154052734375,
      "model_forward_time": 0.12149262428283691,
      "step": 30991
    },
    {
      "epoch": 0.000189154052734375,
      "step": 30991,
      "training_step_time": 0.7621848583221436
    },
    {
      "epoch": 0.00018916015625,
      "model_forward_time": 0.11780595779418945,
      "step": 30992
    },
    {
      "epoch": 0.00018916015625,
      "step": 30992,
      "training_step_time": 0.7571210861206055
    },
    {
      "epoch": 0.000189166259765625,
      "model_forward_time": 0.12042784690856934,
      "step": 30993
    },
    {
      "epoch": 0.000189166259765625,
      "step": 30993,
      "training_step_time": 0.6589252948760986
    },
    {
      "epoch": 0.00018917236328125,
      "model_forward_time": 0.12050533294677734,
      "step": 30994
    },
    {
      "epoch": 0.00018917236328125,
      "step": 30994,
      "training_step_time": 0.48556017875671387
    },
    {
      "epoch": 0.000189178466796875,
      "model_forward_time": 0.11893749237060547,
      "step": 30995
    },
    {
      "epoch": 0.000189178466796875,
      "step": 30995,
      "training_step_time": 0.5399825572967529
    },
    {
      "epoch": 0.0001891845703125,
      "model_forward_time": 0.12100005149841309,
      "step": 30996
    },
    {
      "epoch": 0.0001891845703125,
      "step": 30996,
      "training_step_time": 0.4479103088378906
    },
    {
      "epoch": 0.000189190673828125,
      "model_forward_time": 0.11968088150024414,
      "step": 30997
    },
    {
      "epoch": 0.000189190673828125,
      "step": 30997,
      "training_step_time": 0.48972272872924805
    },
    {
      "epoch": 0.00018919677734375,
      "model_forward_time": 0.11976766586303711,
      "step": 30998
    },
    {
      "epoch": 0.00018919677734375,
      "step": 30998,
      "training_step_time": 0.4101285934448242
    },
    {
      "epoch": 0.000189202880859375,
      "model_forward_time": 0.11763525009155273,
      "step": 30999
    },
    {
      "epoch": 0.000189202880859375,
      "step": 30999,
      "training_step_time": 0.418259859085083
    },
    {
      "epoch": 0.000189208984375,
      "grad_norm": 0.16421157121658325,
      "learning_rate": 5.1377717118408105e-05,
      "loss": 0.05,
      "step": 31000
    },
    {
      "epoch": 0.000189208984375,
      "model_forward_time": 0.11435842514038086,
      "step": 31000
    },
    {
      "epoch": 0.000189208984375,
      "step": 31000,
      "training_step_time": 0.3551967144012451
    },
    {
      "epoch": 0.000189215087890625,
      "model_forward_time": 0.1128237247467041,
      "step": 31001
    },
    {
      "epoch": 0.000189215087890625,
      "step": 31001,
      "training_step_time": 0.37802648544311523
    },
    {
      "epoch": 0.00018922119140625,
      "model_forward_time": 0.11310696601867676,
      "step": 31002
    },
    {
      "epoch": 0.00018922119140625,
      "step": 31002,
      "training_step_time": 0.36832094192504883
    },
    {
      "epoch": 0.000189227294921875,
      "model_forward_time": 0.11322736740112305,
      "step": 31003
    },
    {
      "epoch": 0.000189227294921875,
      "step": 31003,
      "training_step_time": 0.3869478702545166
    },
    {
      "epoch": 0.0001892333984375,
      "model_forward_time": 0.11352372169494629,
      "step": 31004
    },
    {
      "epoch": 0.0001892333984375,
      "step": 31004,
      "training_step_time": 0.39618778228759766
    },
    {
      "epoch": 0.000189239501953125,
      "model_forward_time": 0.11537766456604004,
      "step": 31005
    },
    {
      "epoch": 0.000189239501953125,
      "step": 31005,
      "training_step_time": 0.3978745937347412
    },
    {
      "epoch": 0.00018924560546875,
      "model_forward_time": 0.11463260650634766,
      "step": 31006
    },
    {
      "epoch": 0.00018924560546875,
      "step": 31006,
      "training_step_time": 0.3903775215148926
    },
    {
      "epoch": 0.000189251708984375,
      "model_forward_time": 0.11525607109069824,
      "step": 31007
    },
    {
      "epoch": 0.000189251708984375,
      "step": 31007,
      "training_step_time": 0.41312074661254883
    },
    {
      "epoch": 0.0001892578125,
      "model_forward_time": 0.11472392082214355,
      "step": 31008
    },
    {
      "epoch": 0.0001892578125,
      "step": 31008,
      "training_step_time": 0.40476083755493164
    },
    {
      "epoch": 0.000189263916015625,
      "model_forward_time": 0.11544609069824219,
      "step": 31009
    },
    {
      "epoch": 0.000189263916015625,
      "step": 31009,
      "training_step_time": 0.4358823299407959
    },
    {
      "epoch": 0.00018927001953125,
      "grad_norm": 0.08269799500703812,
      "learning_rate": 5.135016954376567e-05,
      "loss": 0.0477,
      "step": 31010
    },
    {
      "epoch": 0.00018927001953125,
      "model_forward_time": 0.11479043960571289,
      "step": 31010
    },
    {
      "epoch": 0.00018927001953125,
      "step": 31010,
      "training_step_time": 0.3870365619659424
    },
    {
      "epoch": 0.000189276123046875,
      "model_forward_time": 0.11445879936218262,
      "step": 31011
    },
    {
      "epoch": 0.000189276123046875,
      "step": 31011,
      "training_step_time": 0.4355759620666504
    },
    {
      "epoch": 0.0001892822265625,
      "model_forward_time": 0.11516308784484863,
      "step": 31012
    },
    {
      "epoch": 0.0001892822265625,
      "step": 31012,
      "training_step_time": 0.5071945190429688
    },
    {
      "epoch": 0.000189288330078125,
      "model_forward_time": 0.1156318187713623,
      "step": 31013
    },
    {
      "epoch": 0.000189288330078125,
      "step": 31013,
      "training_step_time": 0.4645380973815918
    },
    {
      "epoch": 0.00018929443359375,
      "model_forward_time": 0.11442327499389648,
      "step": 31014
    },
    {
      "epoch": 0.00018929443359375,
      "step": 31014,
      "training_step_time": 0.5044920444488525
    },
    {
      "epoch": 0.000189300537109375,
      "model_forward_time": 0.11430692672729492,
      "step": 31015
    },
    {
      "epoch": 0.000189300537109375,
      "step": 31015,
      "training_step_time": 0.39017462730407715
    },
    {
      "epoch": 0.000189306640625,
      "model_forward_time": 0.11500382423400879,
      "step": 31016
    },
    {
      "epoch": 0.000189306640625,
      "step": 31016,
      "training_step_time": 0.38628363609313965
    },
    {
      "epoch": 0.000189312744140625,
      "model_forward_time": 0.11522126197814941,
      "step": 31017
    },
    {
      "epoch": 0.000189312744140625,
      "step": 31017,
      "training_step_time": 0.4518165588378906
    },
    {
      "epoch": 0.00018931884765625,
      "model_forward_time": 0.11492395401000977,
      "step": 31018
    },
    {
      "epoch": 0.00018931884765625,
      "step": 31018,
      "training_step_time": 0.45386314392089844
    },
    {
      "epoch": 0.000189324951171875,
      "model_forward_time": 0.11541247367858887,
      "step": 31019
    },
    {
      "epoch": 0.000189324951171875,
      "step": 31019,
      "training_step_time": 0.398622989654541
    },
    {
      "epoch": 0.0001893310546875,
      "grad_norm": 0.15339016914367676,
      "learning_rate": 5.132262155897739e-05,
      "loss": 0.0482,
      "step": 31020
    },
    {
      "epoch": 0.0001893310546875,
      "model_forward_time": 0.11441230773925781,
      "step": 31020
    },
    {
      "epoch": 0.0001893310546875,
      "step": 31020,
      "training_step_time": 0.40424346923828125
    },
    {
      "epoch": 0.000189337158203125,
      "model_forward_time": 0.11525678634643555,
      "step": 31021
    },
    {
      "epoch": 0.000189337158203125,
      "step": 31021,
      "training_step_time": 0.48497462272644043
    },
    {
      "epoch": 0.00018934326171875,
      "model_forward_time": 0.1149594783782959,
      "step": 31022
    },
    {
      "epoch": 0.00018934326171875,
      "step": 31022,
      "training_step_time": 0.4088096618652344
    },
    {
      "epoch": 0.000189349365234375,
      "model_forward_time": 0.11511015892028809,
      "step": 31023
    },
    {
      "epoch": 0.000189349365234375,
      "step": 31023,
      "training_step_time": 0.44692039489746094
    },
    {
      "epoch": 0.00018935546875,
      "model_forward_time": 0.11497616767883301,
      "step": 31024
    },
    {
      "epoch": 0.00018935546875,
      "step": 31024,
      "training_step_time": 0.4088001251220703
    },
    {
      "epoch": 0.000189361572265625,
      "model_forward_time": 0.1159060001373291,
      "step": 31025
    },
    {
      "epoch": 0.000189361572265625,
      "step": 31025,
      "training_step_time": 0.45649075508117676
    },
    {
      "epoch": 0.00018936767578125,
      "model_forward_time": 0.11564397811889648,
      "step": 31026
    },
    {
      "epoch": 0.00018936767578125,
      "step": 31026,
      "training_step_time": 0.49323225021362305
    },
    {
      "epoch": 0.000189373779296875,
      "model_forward_time": 0.11541199684143066,
      "step": 31027
    },
    {
      "epoch": 0.000189373779296875,
      "step": 31027,
      "training_step_time": 0.4271671772003174
    },
    {
      "epoch": 0.0001893798828125,
      "model_forward_time": 0.11462140083312988,
      "step": 31028
    },
    {
      "epoch": 0.0001893798828125,
      "step": 31028,
      "training_step_time": 0.38965487480163574
    },
    {
      "epoch": 0.000189385986328125,
      "model_forward_time": 0.11522531509399414,
      "step": 31029
    },
    {
      "epoch": 0.000189385986328125,
      "step": 31029,
      "training_step_time": 0.39931225776672363
    },
    {
      "epoch": 0.00018939208984375,
      "grad_norm": 0.19338740408420563,
      "learning_rate": 5.12950731724116e-05,
      "loss": 0.0514,
      "step": 31030
    },
    {
      "epoch": 0.00018939208984375,
      "model_forward_time": 0.11487030982971191,
      "step": 31030
    },
    {
      "epoch": 0.00018939208984375,
      "step": 31030,
      "training_step_time": 0.4177205562591553
    },
    {
      "epoch": 0.000189398193359375,
      "model_forward_time": 0.11490106582641602,
      "step": 31031
    },
    {
      "epoch": 0.000189398193359375,
      "step": 31031,
      "training_step_time": 0.3908994197845459
    },
    {
      "epoch": 0.000189404296875,
      "model_forward_time": 0.11537671089172363,
      "step": 31032
    },
    {
      "epoch": 0.000189404296875,
      "step": 31032,
      "training_step_time": 0.39082789421081543
    },
    {
      "epoch": 0.000189410400390625,
      "model_forward_time": 0.11536884307861328,
      "step": 31033
    },
    {
      "epoch": 0.000189410400390625,
      "step": 31033,
      "training_step_time": 0.39067602157592773
    },
    {
      "epoch": 0.00018941650390625,
      "model_forward_time": 0.11575937271118164,
      "step": 31034
    },
    {
      "epoch": 0.00018941650390625,
      "step": 31034,
      "training_step_time": 0.3884570598602295
    },
    {
      "epoch": 0.000189422607421875,
      "model_forward_time": 0.11582422256469727,
      "step": 31035
    },
    {
      "epoch": 0.000189422607421875,
      "step": 31035,
      "training_step_time": 0.3998751640319824
    },
    {
      "epoch": 0.0001894287109375,
      "model_forward_time": 0.11503100395202637,
      "step": 31036
    },
    {
      "epoch": 0.0001894287109375,
      "step": 31036,
      "training_step_time": 0.4172677993774414
    },
    {
      "epoch": 0.000189434814453125,
      "model_forward_time": 0.11599850654602051,
      "step": 31037
    },
    {
      "epoch": 0.000189434814453125,
      "step": 31037,
      "training_step_time": 0.40519094467163086
    },
    {
      "epoch": 0.00018944091796875,
      "model_forward_time": 0.11518263816833496,
      "step": 31038
    },
    {
      "epoch": 0.00018944091796875,
      "step": 31038,
      "training_step_time": 0.505225419998169
    },
    {
      "epoch": 0.000189447021484375,
      "model_forward_time": 0.1148843765258789,
      "step": 31039
    },
    {
      "epoch": 0.000189447021484375,
      "step": 31039,
      "training_step_time": 0.3993363380432129
    },
    {
      "epoch": 0.000189453125,
      "grad_norm": 0.14438262581825256,
      "learning_rate": 5.1267524392436784e-05,
      "loss": 0.0488,
      "step": 31040
    },
    {
      "epoch": 0.000189453125,
      "model_forward_time": 0.11538934707641602,
      "step": 31040
    },
    {
      "epoch": 0.000189453125,
      "step": 31040,
      "training_step_time": 0.5066790580749512
    },
    {
      "epoch": 0.000189459228515625,
      "model_forward_time": 0.11501717567443848,
      "step": 31041
    },
    {
      "epoch": 0.000189459228515625,
      "step": 31041,
      "training_step_time": 0.5019381046295166
    },
    {
      "epoch": 0.00018946533203125,
      "model_forward_time": 0.11514115333557129,
      "step": 31042
    },
    {
      "epoch": 0.00018946533203125,
      "step": 31042,
      "training_step_time": 0.48376917839050293
    },
    {
      "epoch": 0.000189471435546875,
      "model_forward_time": 0.11424708366394043,
      "step": 31043
    },
    {
      "epoch": 0.000189471435546875,
      "step": 31043,
      "training_step_time": 0.3927023410797119
    },
    {
      "epoch": 0.0001894775390625,
      "model_forward_time": 0.11513447761535645,
      "step": 31044
    },
    {
      "epoch": 0.0001894775390625,
      "step": 31044,
      "training_step_time": 0.43506288528442383
    },
    {
      "epoch": 0.000189483642578125,
      "model_forward_time": 0.11508631706237793,
      "step": 31045
    },
    {
      "epoch": 0.000189483642578125,
      "step": 31045,
      "training_step_time": 0.3888428211212158
    },
    {
      "epoch": 0.00018948974609375,
      "model_forward_time": 0.11611413955688477,
      "step": 31046
    },
    {
      "epoch": 0.00018948974609375,
      "step": 31046,
      "training_step_time": 0.3916645050048828
    },
    {
      "epoch": 0.000189495849609375,
      "model_forward_time": 0.11436080932617188,
      "step": 31047
    },
    {
      "epoch": 0.000189495849609375,
      "step": 31047,
      "training_step_time": 0.3961188793182373
    },
    {
      "epoch": 0.000189501953125,
      "model_forward_time": 0.11522698402404785,
      "step": 31048
    },
    {
      "epoch": 0.000189501953125,
      "step": 31048,
      "training_step_time": 0.3928372859954834
    },
    {
      "epoch": 0.000189508056640625,
      "model_forward_time": 0.11548709869384766,
      "step": 31049
    },
    {
      "epoch": 0.000189508056640625,
      "step": 31049,
      "training_step_time": 0.4075632095336914
    },
    {
      "epoch": 0.00018951416015625,
      "grad_norm": 0.15892814099788666,
      "learning_rate": 5.123997522742151e-05,
      "loss": 0.0432,
      "step": 31050
    },
    {
      "epoch": 0.00018951416015625,
      "model_forward_time": 0.11594605445861816,
      "step": 31050
    },
    {
      "epoch": 0.00018951416015625,
      "step": 31050,
      "training_step_time": 0.39794254302978516
    },
    {
      "epoch": 0.000189520263671875,
      "model_forward_time": 0.11568689346313477,
      "step": 31051
    },
    {
      "epoch": 0.000189520263671875,
      "step": 31051,
      "training_step_time": 0.45863866806030273
    },
    {
      "epoch": 0.0001895263671875,
      "model_forward_time": 0.11509060859680176,
      "step": 31052
    },
    {
      "epoch": 0.0001895263671875,
      "step": 31052,
      "training_step_time": 0.47693729400634766
    },
    {
      "epoch": 0.000189532470703125,
      "model_forward_time": 0.11463546752929688,
      "step": 31053
    },
    {
      "epoch": 0.000189532470703125,
      "step": 31053,
      "training_step_time": 0.39801716804504395
    },
    {
      "epoch": 0.00018953857421875,
      "model_forward_time": 0.11507749557495117,
      "step": 31054
    },
    {
      "epoch": 0.00018953857421875,
      "step": 31054,
      "training_step_time": 0.3660905361175537
    },
    {
      "epoch": 0.000189544677734375,
      "model_forward_time": 0.1148977279663086,
      "step": 31055
    },
    {
      "epoch": 0.000189544677734375,
      "step": 31055,
      "training_step_time": 0.4548656940460205
    },
    {
      "epoch": 0.00018955078125,
      "model_forward_time": 0.11529922485351562,
      "step": 31056
    },
    {
      "epoch": 0.00018955078125,
      "step": 31056,
      "training_step_time": 0.43393802642822266
    },
    {
      "epoch": 0.000189556884765625,
      "model_forward_time": 0.11398434638977051,
      "step": 31057
    },
    {
      "epoch": 0.000189556884765625,
      "step": 31057,
      "training_step_time": 0.4587118625640869
    },
    {
      "epoch": 0.00018956298828125,
      "model_forward_time": 0.11524534225463867,
      "step": 31058
    },
    {
      "epoch": 0.00018956298828125,
      "step": 31058,
      "training_step_time": 0.39563584327697754
    },
    {
      "epoch": 0.000189569091796875,
      "model_forward_time": 0.11573147773742676,
      "step": 31059
    },
    {
      "epoch": 0.000189569091796875,
      "step": 31059,
      "training_step_time": 0.3964107036590576
    },
    {
      "epoch": 0.0001895751953125,
      "grad_norm": 0.12388328462839127,
      "learning_rate": 5.12124256857345e-05,
      "loss": 0.047,
      "step": 31060
    },
    {
      "epoch": 0.0001895751953125,
      "model_forward_time": 0.11505699157714844,
      "step": 31060
    },
    {
      "epoch": 0.0001895751953125,
      "step": 31060,
      "training_step_time": 0.39423513412475586
    },
    {
      "epoch": 0.000189581298828125,
      "model_forward_time": 0.11498689651489258,
      "step": 31061
    },
    {
      "epoch": 0.000189581298828125,
      "step": 31061,
      "training_step_time": 0.39655351638793945
    },
    {
      "epoch": 0.00018958740234375,
      "model_forward_time": 0.11473298072814941,
      "step": 31062
    },
    {
      "epoch": 0.00018958740234375,
      "step": 31062,
      "training_step_time": 0.39379239082336426
    },
    {
      "epoch": 0.000189593505859375,
      "model_forward_time": 0.11563491821289062,
      "step": 31063
    },
    {
      "epoch": 0.000189593505859375,
      "step": 31063,
      "training_step_time": 0.39147400856018066
    },
    {
      "epoch": 0.000189599609375,
      "model_forward_time": 0.11523652076721191,
      "step": 31064
    },
    {
      "epoch": 0.000189599609375,
      "step": 31064,
      "training_step_time": 0.41255831718444824
    },
    {
      "epoch": 0.000189605712890625,
      "model_forward_time": 0.11549782752990723,
      "step": 31065
    },
    {
      "epoch": 0.000189605712890625,
      "step": 31065,
      "training_step_time": 0.4794178009033203
    },
    {
      "epoch": 0.00018961181640625,
      "model_forward_time": 0.11526656150817871,
      "step": 31066
    },
    {
      "epoch": 0.00018961181640625,
      "step": 31066,
      "training_step_time": 0.43103957176208496
    },
    {
      "epoch": 0.000189617919921875,
      "model_forward_time": 0.11516928672790527,
      "step": 31067
    },
    {
      "epoch": 0.000189617919921875,
      "step": 31067,
      "training_step_time": 0.5123305320739746
    },
    {
      "epoch": 0.0001896240234375,
      "model_forward_time": 0.11479663848876953,
      "step": 31068
    },
    {
      "epoch": 0.0001896240234375,
      "step": 31068,
      "training_step_time": 0.3658123016357422
    },
    {
      "epoch": 0.000189630126953125,
      "model_forward_time": 0.11506891250610352,
      "step": 31069
    },
    {
      "epoch": 0.000189630126953125,
      "step": 31069,
      "training_step_time": 0.4187600612640381
    },
    {
      "epoch": 0.00018963623046875,
      "grad_norm": 0.16440421342849731,
      "learning_rate": 5.118487577574459e-05,
      "loss": 0.0478,
      "step": 31070
    },
    {
      "epoch": 0.00018963623046875,
      "model_forward_time": 0.11479711532592773,
      "step": 31070
    },
    {
      "epoch": 0.00018963623046875,
      "step": 31070,
      "training_step_time": 0.48594093322753906
    },
    {
      "epoch": 0.000189642333984375,
      "model_forward_time": 0.11466550827026367,
      "step": 31071
    },
    {
      "epoch": 0.000189642333984375,
      "step": 31071,
      "training_step_time": 0.43416309356689453
    },
    {
      "epoch": 0.0001896484375,
      "model_forward_time": 0.11480855941772461,
      "step": 31072
    },
    {
      "epoch": 0.0001896484375,
      "step": 31072,
      "training_step_time": 0.3906686305999756
    },
    {
      "epoch": 0.000189654541015625,
      "model_forward_time": 0.11496472358703613,
      "step": 31073
    },
    {
      "epoch": 0.000189654541015625,
      "step": 31073,
      "training_step_time": 0.38141632080078125
    },
    {
      "epoch": 0.00018966064453125,
      "model_forward_time": 0.11503219604492188,
      "step": 31074
    },
    {
      "epoch": 0.00018966064453125,
      "step": 31074,
      "training_step_time": 0.4220287799835205
    },
    {
      "epoch": 0.000189666748046875,
      "model_forward_time": 0.1156458854675293,
      "step": 31075
    },
    {
      "epoch": 0.000189666748046875,
      "step": 31075,
      "training_step_time": 0.4046895503997803
    },
    {
      "epoch": 0.0001896728515625,
      "model_forward_time": 0.11559629440307617,
      "step": 31076
    },
    {
      "epoch": 0.0001896728515625,
      "step": 31076,
      "training_step_time": 0.3918609619140625
    },
    {
      "epoch": 0.000189678955078125,
      "model_forward_time": 0.1150350570678711,
      "step": 31077
    },
    {
      "epoch": 0.000189678955078125,
      "step": 31077,
      "training_step_time": 0.38817405700683594
    },
    {
      "epoch": 0.00018968505859375,
      "model_forward_time": 0.1153860092163086,
      "step": 31078
    },
    {
      "epoch": 0.00018968505859375,
      "step": 31078,
      "training_step_time": 0.39859771728515625
    },
    {
      "epoch": 0.000189691162109375,
      "model_forward_time": 0.11608171463012695,
      "step": 31079
    },
    {
      "epoch": 0.000189691162109375,
      "step": 31079,
      "training_step_time": 0.3903021812438965
    },
    {
      "epoch": 0.000189697265625,
      "grad_norm": 0.14855745434761047,
      "learning_rate": 5.1157325505820694e-05,
      "loss": 0.045,
      "step": 31080
    },
    {
      "epoch": 0.000189697265625,
      "model_forward_time": 0.1155080795288086,
      "step": 31080
    },
    {
      "epoch": 0.000189697265625,
      "step": 31080,
      "training_step_time": 0.39844560623168945
    },
    {
      "epoch": 0.000189703369140625,
      "model_forward_time": 0.11558938026428223,
      "step": 31081
    },
    {
      "epoch": 0.000189703369140625,
      "step": 31081,
      "training_step_time": 0.40282559394836426
    },
    {
      "epoch": 0.00018970947265625,
      "model_forward_time": 0.11528825759887695,
      "step": 31082
    },
    {
      "epoch": 0.00018970947265625,
      "step": 31082,
      "training_step_time": 0.4355044364929199
    },
    {
      "epoch": 0.000189715576171875,
      "model_forward_time": 0.11494874954223633,
      "step": 31083
    },
    {
      "epoch": 0.000189715576171875,
      "step": 31083,
      "training_step_time": 0.3663787841796875
    },
    {
      "epoch": 0.0001897216796875,
      "model_forward_time": 0.11545062065124512,
      "step": 31084
    },
    {
      "epoch": 0.0001897216796875,
      "step": 31084,
      "training_step_time": 0.4757988452911377
    },
    {
      "epoch": 0.000189727783203125,
      "model_forward_time": 0.11467838287353516,
      "step": 31085
    },
    {
      "epoch": 0.000189727783203125,
      "step": 31085,
      "training_step_time": 0.5022532939910889
    },
    {
      "epoch": 0.00018973388671875,
      "model_forward_time": 0.11550712585449219,
      "step": 31086
    },
    {
      "epoch": 0.00018973388671875,
      "step": 31086,
      "training_step_time": 0.402585506439209
    },
    {
      "epoch": 0.000189739990234375,
      "model_forward_time": 0.11446189880371094,
      "step": 31087
    },
    {
      "epoch": 0.000189739990234375,
      "step": 31087,
      "training_step_time": 0.38550567626953125
    },
    {
      "epoch": 0.00018974609375,
      "model_forward_time": 0.11500787734985352,
      "step": 31088
    },
    {
      "epoch": 0.00018974609375,
      "step": 31088,
      "training_step_time": 0.39305758476257324
    },
    {
      "epoch": 0.000189752197265625,
      "model_forward_time": 0.1148674488067627,
      "step": 31089
    },
    {
      "epoch": 0.000189752197265625,
      "step": 31089,
      "training_step_time": 0.37891459465026855
    },
    {
      "epoch": 0.00018975830078125,
      "grad_norm": 0.1533687710762024,
      "learning_rate": 5.112977488433188e-05,
      "loss": 0.0506,
      "step": 31090
    },
    {
      "epoch": 0.00018975830078125,
      "model_forward_time": 0.1151585578918457,
      "step": 31090
    },
    {
      "epoch": 0.00018975830078125,
      "step": 31090,
      "training_step_time": 0.49674177169799805
    },
    {
      "epoch": 0.000189764404296875,
      "model_forward_time": 0.1152796745300293,
      "step": 31091
    },
    {
      "epoch": 0.000189764404296875,
      "step": 31091,
      "training_step_time": 0.40460753440856934
    },
    {
      "epoch": 0.0001897705078125,
      "model_forward_time": 0.11616921424865723,
      "step": 31092
    },
    {
      "epoch": 0.0001897705078125,
      "step": 31092,
      "training_step_time": 0.3901228904724121
    },
    {
      "epoch": 0.000189776611328125,
      "model_forward_time": 0.11467289924621582,
      "step": 31093
    },
    {
      "epoch": 0.000189776611328125,
      "step": 31093,
      "training_step_time": 0.3911755084991455
    },
    {
      "epoch": 0.00018978271484375,
      "model_forward_time": 0.11570501327514648,
      "step": 31094
    },
    {
      "epoch": 0.00018978271484375,
      "step": 31094,
      "training_step_time": 0.435086727142334
    },
    {
      "epoch": 0.000189788818359375,
      "model_forward_time": 0.11489319801330566,
      "step": 31095
    },
    {
      "epoch": 0.000189788818359375,
      "step": 31095,
      "training_step_time": 0.3936786651611328
    },
    {
      "epoch": 0.000189794921875,
      "model_forward_time": 0.11602044105529785,
      "step": 31096
    },
    {
      "epoch": 0.000189794921875,
      "step": 31096,
      "training_step_time": 0.8164463043212891
    },
    {
      "epoch": 0.000189801025390625,
      "model_forward_time": 0.1145017147064209,
      "step": 31097
    },
    {
      "epoch": 0.000189801025390625,
      "step": 31097,
      "training_step_time": 0.4261038303375244
    },
    {
      "epoch": 0.00018980712890625,
      "model_forward_time": 0.1147470474243164,
      "step": 31098
    },
    {
      "epoch": 0.00018980712890625,
      "step": 31098,
      "training_step_time": 0.4173469543457031
    },
    {
      "epoch": 0.000189813232421875,
      "model_forward_time": 0.11466574668884277,
      "step": 31099
    },
    {
      "epoch": 0.000189813232421875,
      "step": 31099,
      "training_step_time": 0.4588136672973633
    },
    {
      "epoch": 0.0001898193359375,
      "grad_norm": 0.14726349711418152,
      "learning_rate": 5.110222391964728e-05,
      "loss": 0.0447,
      "step": 31100
    },
    {
      "epoch": 0.0001898193359375,
      "model_forward_time": 0.11481571197509766,
      "step": 31100
    },
    {
      "epoch": 0.0001898193359375,
      "step": 31100,
      "training_step_time": 0.39834070205688477
    },
    {
      "epoch": 0.000189825439453125,
      "model_forward_time": 0.11444449424743652,
      "step": 31101
    },
    {
      "epoch": 0.000189825439453125,
      "step": 31101,
      "training_step_time": 0.386401891708374
    },
    {
      "epoch": 0.00018983154296875,
      "model_forward_time": 0.11585807800292969,
      "step": 31102
    },
    {
      "epoch": 0.00018983154296875,
      "step": 31102,
      "training_step_time": 0.6643111705780029
    },
    {
      "epoch": 0.000189837646484375,
      "model_forward_time": 0.1137533187866211,
      "step": 31103
    },
    {
      "epoch": 0.000189837646484375,
      "step": 31103,
      "training_step_time": 0.3861825466156006
    },
    {
      "epoch": 0.00018984375,
      "model_forward_time": 0.11445116996765137,
      "step": 31104
    },
    {
      "epoch": 0.00018984375,
      "step": 31104,
      "training_step_time": 0.39090704917907715
    },
    {
      "epoch": 0.000189849853515625,
      "model_forward_time": 0.11548137664794922,
      "step": 31105
    },
    {
      "epoch": 0.000189849853515625,
      "step": 31105,
      "training_step_time": 0.38663244247436523
    },
    {
      "epoch": 0.00018985595703125,
      "model_forward_time": 0.11488485336303711,
      "step": 31106
    },
    {
      "epoch": 0.00018985595703125,
      "step": 31106,
      "training_step_time": 0.3899881839752197
    },
    {
      "epoch": 0.000189862060546875,
      "model_forward_time": 0.11487126350402832,
      "step": 31107
    },
    {
      "epoch": 0.000189862060546875,
      "step": 31107,
      "training_step_time": 0.3968496322631836
    },
    {
      "epoch": 0.0001898681640625,
      "model_forward_time": 0.11554718017578125,
      "step": 31108
    },
    {
      "epoch": 0.0001898681640625,
      "step": 31108,
      "training_step_time": 0.7265045642852783
    },
    {
      "epoch": 0.000189874267578125,
      "model_forward_time": 0.11499285697937012,
      "step": 31109
    },
    {
      "epoch": 0.000189874267578125,
      "step": 31109,
      "training_step_time": 0.4974782466888428
    },
    {
      "epoch": 0.00018988037109375,
      "grad_norm": 0.23741474747657776,
      "learning_rate": 5.107467262013614e-05,
      "loss": 0.0532,
      "step": 31110
    },
    {
      "epoch": 0.00018988037109375,
      "model_forward_time": 0.11460733413696289,
      "step": 31110
    },
    {
      "epoch": 0.00018988037109375,
      "step": 31110,
      "training_step_time": 0.3631925582885742
    },
    {
      "epoch": 0.000189886474609375,
      "model_forward_time": 0.11527585983276367,
      "step": 31111
    },
    {
      "epoch": 0.000189886474609375,
      "step": 31111,
      "training_step_time": 0.44162893295288086
    },
    {
      "epoch": 0.000189892578125,
      "model_forward_time": 0.1149129867553711,
      "step": 31112
    },
    {
      "epoch": 0.000189892578125,
      "step": 31112,
      "training_step_time": 0.5119893550872803
    },
    {
      "epoch": 0.000189898681640625,
      "model_forward_time": 0.11442446708679199,
      "step": 31113
    },
    {
      "epoch": 0.000189898681640625,
      "step": 31113,
      "training_step_time": 0.39429450035095215
    },
    {
      "epoch": 0.00018990478515625,
      "model_forward_time": 0.11507749557495117,
      "step": 31114
    },
    {
      "epoch": 0.00018990478515625,
      "step": 31114,
      "training_step_time": 0.6088721752166748
    },
    {
      "epoch": 0.000189910888671875,
      "model_forward_time": 0.11445903778076172,
      "step": 31115
    },
    {
      "epoch": 0.000189910888671875,
      "step": 31115,
      "training_step_time": 0.3885023593902588
    },
    {
      "epoch": 0.0001899169921875,
      "model_forward_time": 0.11509418487548828,
      "step": 31116
    },
    {
      "epoch": 0.0001899169921875,
      "step": 31116,
      "training_step_time": 0.3812727928161621
    },
    {
      "epoch": 0.000189923095703125,
      "model_forward_time": 0.11471796035766602,
      "step": 31117
    },
    {
      "epoch": 0.000189923095703125,
      "step": 31117,
      "training_step_time": 0.3982210159301758
    },
    {
      "epoch": 0.00018992919921875,
      "model_forward_time": 0.11467432975769043,
      "step": 31118
    },
    {
      "epoch": 0.00018992919921875,
      "step": 31118,
      "training_step_time": 0.40009498596191406
    },
    {
      "epoch": 0.000189935302734375,
      "model_forward_time": 0.11519837379455566,
      "step": 31119
    },
    {
      "epoch": 0.000189935302734375,
      "step": 31119,
      "training_step_time": 0.3911468982696533
    },
    {
      "epoch": 0.00018994140625,
      "grad_norm": 0.15797404944896698,
      "learning_rate": 5.104712099416785e-05,
      "loss": 0.0456,
      "step": 31120
    },
    {
      "epoch": 0.00018994140625,
      "model_forward_time": 0.1147928237915039,
      "step": 31120
    },
    {
      "epoch": 0.00018994140625,
      "step": 31120,
      "training_step_time": 0.8801357746124268
    },
    {
      "epoch": 0.000189947509765625,
      "model_forward_time": 0.11568307876586914,
      "step": 31121
    },
    {
      "epoch": 0.000189947509765625,
      "step": 31121,
      "training_step_time": 0.4035472869873047
    },
    {
      "epoch": 0.00018995361328125,
      "model_forward_time": 0.11469125747680664,
      "step": 31122
    },
    {
      "epoch": 0.00018995361328125,
      "step": 31122,
      "training_step_time": 0.4190511703491211
    },
    {
      "epoch": 0.000189959716796875,
      "model_forward_time": 0.11460494995117188,
      "step": 31123
    },
    {
      "epoch": 0.000189959716796875,
      "step": 31123,
      "training_step_time": 0.3603196144104004
    },
    {
      "epoch": 0.0001899658203125,
      "model_forward_time": 0.11402225494384766,
      "step": 31124
    },
    {
      "epoch": 0.0001899658203125,
      "step": 31124,
      "training_step_time": 0.44457197189331055
    },
    {
      "epoch": 0.000189971923828125,
      "model_forward_time": 0.1146390438079834,
      "step": 31125
    },
    {
      "epoch": 0.000189971923828125,
      "step": 31125,
      "training_step_time": 0.43907618522644043
    },
    {
      "epoch": 0.00018997802734375,
      "model_forward_time": 0.11517071723937988,
      "step": 31126
    },
    {
      "epoch": 0.00018997802734375,
      "step": 31126,
      "training_step_time": 0.5330750942230225
    },
    {
      "epoch": 0.000189984130859375,
      "model_forward_time": 0.1145327091217041,
      "step": 31127
    },
    {
      "epoch": 0.000189984130859375,
      "step": 31127,
      "training_step_time": 0.39528918266296387
    },
    {
      "epoch": 0.000189990234375,
      "model_forward_time": 0.11424827575683594,
      "step": 31128
    },
    {
      "epoch": 0.000189990234375,
      "step": 31128,
      "training_step_time": 0.38169431686401367
    },
    {
      "epoch": 0.000189996337890625,
      "model_forward_time": 0.11497926712036133,
      "step": 31129
    },
    {
      "epoch": 0.000189996337890625,
      "step": 31129,
      "training_step_time": 0.3907177448272705
    },
    {
      "epoch": 0.00019000244140625,
      "grad_norm": 0.16416341066360474,
      "learning_rate": 5.101956905011185e-05,
      "loss": 0.0472,
      "step": 31130
    },
    {
      "epoch": 0.00019000244140625,
      "model_forward_time": 0.11537837982177734,
      "step": 31130
    },
    {
      "epoch": 0.00019000244140625,
      "step": 31130,
      "training_step_time": 0.39945220947265625
    },
    {
      "epoch": 0.000190008544921875,
      "model_forward_time": 0.11549687385559082,
      "step": 31131
    },
    {
      "epoch": 0.000190008544921875,
      "step": 31131,
      "training_step_time": 0.3884718418121338
    },
    {
      "epoch": 0.0001900146484375,
      "model_forward_time": 0.11566781997680664,
      "step": 31132
    },
    {
      "epoch": 0.0001900146484375,
      "step": 31132,
      "training_step_time": 0.813077449798584
    },
    {
      "epoch": 0.000190020751953125,
      "model_forward_time": 0.11421585083007812,
      "step": 31133
    },
    {
      "epoch": 0.000190020751953125,
      "step": 31133,
      "training_step_time": 0.3839302062988281
    },
    {
      "epoch": 0.00019002685546875,
      "model_forward_time": 0.11426663398742676,
      "step": 31134
    },
    {
      "epoch": 0.00019002685546875,
      "step": 31134,
      "training_step_time": 0.47457003593444824
    },
    {
      "epoch": 0.000190032958984375,
      "model_forward_time": 0.11403703689575195,
      "step": 31135
    },
    {
      "epoch": 0.000190032958984375,
      "step": 31135,
      "training_step_time": 0.4060854911804199
    },
    {
      "epoch": 0.0001900390625,
      "model_forward_time": 0.11458039283752441,
      "step": 31136
    },
    {
      "epoch": 0.0001900390625,
      "step": 31136,
      "training_step_time": 0.4736328125
    },
    {
      "epoch": 0.000190045166015625,
      "model_forward_time": 0.11423015594482422,
      "step": 31137
    },
    {
      "epoch": 0.000190045166015625,
      "step": 31137,
      "training_step_time": 0.4208950996398926
    },
    {
      "epoch": 0.00019005126953125,
      "model_forward_time": 0.1153569221496582,
      "step": 31138
    },
    {
      "epoch": 0.00019005126953125,
      "step": 31138,
      "training_step_time": 0.48147106170654297
    },
    {
      "epoch": 0.000190057373046875,
      "model_forward_time": 0.11481761932373047,
      "step": 31139
    },
    {
      "epoch": 0.000190057373046875,
      "step": 31139,
      "training_step_time": 0.410977840423584
    },
    {
      "epoch": 0.0001900634765625,
      "grad_norm": 0.20338968932628632,
      "learning_rate": 5.0992016796337686e-05,
      "loss": 0.0502,
      "step": 31140
    },
    {
      "epoch": 0.0001900634765625,
      "model_forward_time": 0.11441445350646973,
      "step": 31140
    },
    {
      "epoch": 0.0001900634765625,
      "step": 31140,
      "training_step_time": 0.40244078636169434
    },
    {
      "epoch": 0.000190069580078125,
      "model_forward_time": 0.11577987670898438,
      "step": 31141
    },
    {
      "epoch": 0.000190069580078125,
      "step": 31141,
      "training_step_time": 0.3988349437713623
    },
    {
      "epoch": 0.00019007568359375,
      "model_forward_time": 0.11541056632995605,
      "step": 31142
    },
    {
      "epoch": 0.00019007568359375,
      "step": 31142,
      "training_step_time": 0.39629292488098145
    },
    {
      "epoch": 0.000190081787109375,
      "model_forward_time": 0.11560916900634766,
      "step": 31143
    },
    {
      "epoch": 0.000190081787109375,
      "step": 31143,
      "training_step_time": 0.39726781845092773
    },
    {
      "epoch": 0.000190087890625,
      "model_forward_time": 0.11535763740539551,
      "step": 31144
    },
    {
      "epoch": 0.000190087890625,
      "step": 31144,
      "training_step_time": 0.4032566547393799
    },
    {
      "epoch": 0.000190093994140625,
      "model_forward_time": 0.11492562294006348,
      "step": 31145
    },
    {
      "epoch": 0.000190093994140625,
      "step": 31145,
      "training_step_time": 0.38948798179626465
    },
    {
      "epoch": 0.00019010009765625,
      "model_forward_time": 0.11504149436950684,
      "step": 31146
    },
    {
      "epoch": 0.00019010009765625,
      "step": 31146,
      "training_step_time": 0.39268994331359863
    },
    {
      "epoch": 0.000190106201171875,
      "model_forward_time": 0.11547350883483887,
      "step": 31147
    },
    {
      "epoch": 0.000190106201171875,
      "step": 31147,
      "training_step_time": 0.4067251682281494
    },
    {
      "epoch": 0.0001901123046875,
      "model_forward_time": 0.11664462089538574,
      "step": 31148
    },
    {
      "epoch": 0.0001901123046875,
      "step": 31148,
      "training_step_time": 0.3947608470916748
    },
    {
      "epoch": 0.000190118408203125,
      "model_forward_time": 0.11539626121520996,
      "step": 31149
    },
    {
      "epoch": 0.000190118408203125,
      "step": 31149,
      "training_step_time": 0.4379568099975586
    },
    {
      "epoch": 0.00019012451171875,
      "grad_norm": 0.16349323093891144,
      "learning_rate": 5.096446424121502e-05,
      "loss": 0.0498,
      "step": 31150
    },
    {
      "epoch": 0.00019012451171875,
      "model_forward_time": 0.11435413360595703,
      "step": 31150
    },
    {
      "epoch": 0.00019012451171875,
      "step": 31150,
      "training_step_time": 0.39878273010253906
    },
    {
      "epoch": 0.000190130615234375,
      "model_forward_time": 0.11575174331665039,
      "step": 31151
    },
    {
      "epoch": 0.000190130615234375,
      "step": 31151,
      "training_step_time": 0.44675302505493164
    },
    {
      "epoch": 0.00019013671875,
      "model_forward_time": 0.11535096168518066,
      "step": 31152
    },
    {
      "epoch": 0.00019013671875,
      "step": 31152,
      "training_step_time": 0.43331265449523926
    },
    {
      "epoch": 0.000190142822265625,
      "model_forward_time": 0.11598467826843262,
      "step": 31153
    },
    {
      "epoch": 0.000190142822265625,
      "step": 31153,
      "training_step_time": 0.41649627685546875
    },
    {
      "epoch": 0.00019014892578125,
      "model_forward_time": 0.11586523056030273,
      "step": 31154
    },
    {
      "epoch": 0.00019014892578125,
      "step": 31154,
      "training_step_time": 0.4236311912536621
    },
    {
      "epoch": 0.000190155029296875,
      "model_forward_time": 0.11473679542541504,
      "step": 31155
    },
    {
      "epoch": 0.000190155029296875,
      "step": 31155,
      "training_step_time": 0.38811612129211426
    },
    {
      "epoch": 0.0001901611328125,
      "model_forward_time": 0.11492347717285156,
      "step": 31156
    },
    {
      "epoch": 0.0001901611328125,
      "step": 31156,
      "training_step_time": 0.3910398483276367
    },
    {
      "epoch": 0.000190167236328125,
      "model_forward_time": 0.11519622802734375,
      "step": 31157
    },
    {
      "epoch": 0.000190167236328125,
      "step": 31157,
      "training_step_time": 0.40607523918151855
    },
    {
      "epoch": 0.00019017333984375,
      "model_forward_time": 0.1154024600982666,
      "step": 31158
    },
    {
      "epoch": 0.00019017333984375,
      "step": 31158,
      "training_step_time": 0.3991363048553467
    },
    {
      "epoch": 0.000190179443359375,
      "model_forward_time": 0.11514806747436523,
      "step": 31159
    },
    {
      "epoch": 0.000190179443359375,
      "step": 31159,
      "training_step_time": 0.3887603282928467
    },
    {
      "epoch": 0.000190185546875,
      "grad_norm": 0.11956976354122162,
      "learning_rate": 5.093691139311356e-05,
      "loss": 0.0449,
      "step": 31160
    },
    {
      "epoch": 0.000190185546875,
      "model_forward_time": 0.11509466171264648,
      "step": 31160
    },
    {
      "epoch": 0.000190185546875,
      "step": 31160,
      "training_step_time": 0.3911097049713135
    },
    {
      "epoch": 0.000190191650390625,
      "model_forward_time": 0.11518716812133789,
      "step": 31161
    },
    {
      "epoch": 0.000190191650390625,
      "step": 31161,
      "training_step_time": 0.4020683765411377
    },
    {
      "epoch": 0.00019019775390625,
      "model_forward_time": 0.11543631553649902,
      "step": 31162
    },
    {
      "epoch": 0.00019019775390625,
      "step": 31162,
      "training_step_time": 0.6894550323486328
    },
    {
      "epoch": 0.000190203857421875,
      "model_forward_time": 0.11486220359802246,
      "step": 31163
    },
    {
      "epoch": 0.000190203857421875,
      "step": 31163,
      "training_step_time": 0.40692806243896484
    },
    {
      "epoch": 0.0001902099609375,
      "model_forward_time": 0.1146388053894043,
      "step": 31164
    },
    {
      "epoch": 0.0001902099609375,
      "step": 31164,
      "training_step_time": 0.4119737148284912
    },
    {
      "epoch": 0.000190216064453125,
      "model_forward_time": 0.11423134803771973,
      "step": 31165
    },
    {
      "epoch": 0.000190216064453125,
      "step": 31165,
      "training_step_time": 0.45716333389282227
    },
    {
      "epoch": 0.00019022216796875,
      "model_forward_time": 0.11462259292602539,
      "step": 31166
    },
    {
      "epoch": 0.00019022216796875,
      "step": 31166,
      "training_step_time": 0.3652377128601074
    },
    {
      "epoch": 0.000190228271484375,
      "model_forward_time": 0.11458230018615723,
      "step": 31167
    },
    {
      "epoch": 0.000190228271484375,
      "step": 31167,
      "training_step_time": 0.505469560623169
    },
    {
      "epoch": 0.000190234375,
      "model_forward_time": 0.11493802070617676,
      "step": 31168
    },
    {
      "epoch": 0.000190234375,
      "step": 31168,
      "training_step_time": 0.4813826084136963
    },
    {
      "epoch": 0.000190240478515625,
      "model_forward_time": 0.11510658264160156,
      "step": 31169
    },
    {
      "epoch": 0.000190240478515625,
      "step": 31169,
      "training_step_time": 0.3815593719482422
    },
    {
      "epoch": 0.00019024658203125,
      "grad_norm": 0.09815631806850433,
      "learning_rate": 5.0909358260403186e-05,
      "loss": 0.0409,
      "step": 31170
    },
    {
      "epoch": 0.00019024658203125,
      "model_forward_time": 0.11483430862426758,
      "step": 31170
    },
    {
      "epoch": 0.00019024658203125,
      "step": 31170,
      "training_step_time": 0.3906373977661133
    },
    {
      "epoch": 0.000190252685546875,
      "model_forward_time": 0.11487030982971191,
      "step": 31171
    },
    {
      "epoch": 0.000190252685546875,
      "step": 31171,
      "training_step_time": 0.39649319648742676
    },
    {
      "epoch": 0.0001902587890625,
      "model_forward_time": 0.11521744728088379,
      "step": 31172
    },
    {
      "epoch": 0.0001902587890625,
      "step": 31172,
      "training_step_time": 0.3871171474456787
    },
    {
      "epoch": 0.000190264892578125,
      "model_forward_time": 0.11464595794677734,
      "step": 31173
    },
    {
      "epoch": 0.000190264892578125,
      "step": 31173,
      "training_step_time": 0.39222240447998047
    },
    {
      "epoch": 0.00019027099609375,
      "model_forward_time": 0.11837124824523926,
      "step": 31174
    },
    {
      "epoch": 0.00019027099609375,
      "step": 31174,
      "training_step_time": 0.49530029296875
    },
    {
      "epoch": 0.000190277099609375,
      "model_forward_time": 0.11551618576049805,
      "step": 31175
    },
    {
      "epoch": 0.000190277099609375,
      "step": 31175,
      "training_step_time": 0.4004364013671875
    },
    {
      "epoch": 0.000190283203125,
      "model_forward_time": 0.11554360389709473,
      "step": 31176
    },
    {
      "epoch": 0.000190283203125,
      "step": 31176,
      "training_step_time": 0.39916372299194336
    },
    {
      "epoch": 0.000190289306640625,
      "model_forward_time": 0.11479878425598145,
      "step": 31177
    },
    {
      "epoch": 0.000190289306640625,
      "step": 31177,
      "training_step_time": 0.408097505569458
    },
    {
      "epoch": 0.00019029541015625,
      "model_forward_time": 0.11494302749633789,
      "step": 31178
    },
    {
      "epoch": 0.00019029541015625,
      "step": 31178,
      "training_step_time": 0.4411137104034424
    },
    {
      "epoch": 0.000190301513671875,
      "model_forward_time": 0.11569523811340332,
      "step": 31179
    },
    {
      "epoch": 0.000190301513671875,
      "step": 31179,
      "training_step_time": 0.4893200397491455
    },
    {
      "epoch": 0.0001903076171875,
      "grad_norm": 0.1249084323644638,
      "learning_rate": 5.088180485145378e-05,
      "loss": 0.0408,
      "step": 31180
    },
    {
      "epoch": 0.0001903076171875,
      "model_forward_time": 0.11537408828735352,
      "step": 31180
    },
    {
      "epoch": 0.0001903076171875,
      "step": 31180,
      "training_step_time": 0.5658636093139648
    },
    {
      "epoch": 0.000190313720703125,
      "model_forward_time": 0.13290071487426758,
      "step": 31181
    },
    {
      "epoch": 0.000190313720703125,
      "step": 31181,
      "training_step_time": 0.47063159942626953
    },
    {
      "epoch": 0.00019031982421875,
      "model_forward_time": 0.11481189727783203,
      "step": 31182
    },
    {
      "epoch": 0.00019031982421875,
      "step": 31182,
      "training_step_time": 0.4061605930328369
    },
    {
      "epoch": 0.000190325927734375,
      "model_forward_time": 0.11467862129211426,
      "step": 31183
    },
    {
      "epoch": 0.000190325927734375,
      "step": 31183,
      "training_step_time": 0.39635491371154785
    },
    {
      "epoch": 0.00019033203125,
      "model_forward_time": 0.11625289916992188,
      "step": 31184
    },
    {
      "epoch": 0.00019033203125,
      "step": 31184,
      "training_step_time": 0.3916919231414795
    },
    {
      "epoch": 0.000190338134765625,
      "model_forward_time": 0.11487126350402832,
      "step": 31185
    },
    {
      "epoch": 0.000190338134765625,
      "step": 31185,
      "training_step_time": 0.3835437297821045
    },
    {
      "epoch": 0.00019034423828125,
      "model_forward_time": 0.11482858657836914,
      "step": 31186
    },
    {
      "epoch": 0.00019034423828125,
      "step": 31186,
      "training_step_time": 0.504096269607544
    },
    {
      "epoch": 0.000190350341796875,
      "model_forward_time": 0.11553478240966797,
      "step": 31187
    },
    {
      "epoch": 0.000190350341796875,
      "step": 31187,
      "training_step_time": 0.3943977355957031
    },
    {
      "epoch": 0.0001903564453125,
      "model_forward_time": 0.11534929275512695,
      "step": 31188
    },
    {
      "epoch": 0.0001903564453125,
      "step": 31188,
      "training_step_time": 0.400022029876709
    },
    {
      "epoch": 0.000190362548828125,
      "model_forward_time": 0.11537599563598633,
      "step": 31189
    },
    {
      "epoch": 0.000190362548828125,
      "step": 31189,
      "training_step_time": 0.42020463943481445
    },
    {
      "epoch": 0.00019036865234375,
      "grad_norm": 0.10342900454998016,
      "learning_rate": 5.085425117463533e-05,
      "loss": 0.0439,
      "step": 31190
    },
    {
      "epoch": 0.00019036865234375,
      "model_forward_time": 0.11534404754638672,
      "step": 31190
    },
    {
      "epoch": 0.00019036865234375,
      "step": 31190,
      "training_step_time": 0.39475274085998535
    },
    {
      "epoch": 0.000190374755859375,
      "model_forward_time": 0.11527609825134277,
      "step": 31191
    },
    {
      "epoch": 0.000190374755859375,
      "step": 31191,
      "training_step_time": 0.40465617179870605
    },
    {
      "epoch": 0.000190380859375,
      "model_forward_time": 0.1152200698852539,
      "step": 31192
    },
    {
      "epoch": 0.000190380859375,
      "step": 31192,
      "training_step_time": 0.40523791313171387
    },
    {
      "epoch": 0.000190386962890625,
      "model_forward_time": 0.11594414710998535,
      "step": 31193
    },
    {
      "epoch": 0.000190386962890625,
      "step": 31193,
      "training_step_time": 0.4639887809753418
    },
    {
      "epoch": 0.00019039306640625,
      "model_forward_time": 0.11524391174316406,
      "step": 31194
    },
    {
      "epoch": 0.00019039306640625,
      "step": 31194,
      "training_step_time": 0.5087804794311523
    },
    {
      "epoch": 0.000190399169921875,
      "model_forward_time": 0.11574912071228027,
      "step": 31195
    },
    {
      "epoch": 0.000190399169921875,
      "step": 31195,
      "training_step_time": 0.4773690700531006
    },
    {
      "epoch": 0.0001904052734375,
      "model_forward_time": 0.11499691009521484,
      "step": 31196
    },
    {
      "epoch": 0.0001904052734375,
      "step": 31196,
      "training_step_time": 0.39351963996887207
    },
    {
      "epoch": 0.000190411376953125,
      "model_forward_time": 0.1146535873413086,
      "step": 31197
    },
    {
      "epoch": 0.000190411376953125,
      "step": 31197,
      "training_step_time": 0.38913869857788086
    },
    {
      "epoch": 0.00019041748046875,
      "model_forward_time": 0.1153402328491211,
      "step": 31198
    },
    {
      "epoch": 0.00019041748046875,
      "step": 31198,
      "training_step_time": 0.3936154842376709
    },
    {
      "epoch": 0.000190423583984375,
      "model_forward_time": 0.11528515815734863,
      "step": 31199
    },
    {
      "epoch": 0.000190423583984375,
      "step": 31199,
      "training_step_time": 0.39566898345947266
    },
    {
      "epoch": 0.0001904296875,
      "grad_norm": 0.16053296625614166,
      "learning_rate": 5.0826697238317935e-05,
      "loss": 0.0448,
      "step": 31200
    },
    {
      "epoch": 0.0001904296875,
      "model_forward_time": 0.11518049240112305,
      "step": 31200
    },
    {
      "epoch": 0.0001904296875,
      "step": 31200,
      "training_step_time": 0.39191532135009766
    },
    {
      "epoch": 0.000190435791015625,
      "model_forward_time": 0.11624312400817871,
      "step": 31201
    },
    {
      "epoch": 0.000190435791015625,
      "step": 31201,
      "training_step_time": 0.40721988677978516
    },
    {
      "epoch": 0.00019044189453125,
      "model_forward_time": 0.11519908905029297,
      "step": 31202
    },
    {
      "epoch": 0.00019044189453125,
      "step": 31202,
      "training_step_time": 0.39214038848876953
    },
    {
      "epoch": 0.000190447998046875,
      "model_forward_time": 0.11555790901184082,
      "step": 31203
    },
    {
      "epoch": 0.000190447998046875,
      "step": 31203,
      "training_step_time": 0.46560144424438477
    },
    {
      "epoch": 0.0001904541015625,
      "model_forward_time": 0.11481022834777832,
      "step": 31204
    },
    {
      "epoch": 0.0001904541015625,
      "step": 31204,
      "training_step_time": 0.48961901664733887
    },
    {
      "epoch": 0.000190460205078125,
      "model_forward_time": 0.11516427993774414,
      "step": 31205
    },
    {
      "epoch": 0.000190460205078125,
      "step": 31205,
      "training_step_time": 0.3874638080596924
    },
    {
      "epoch": 0.00019046630859375,
      "model_forward_time": 0.11655902862548828,
      "step": 31206
    },
    {
      "epoch": 0.00019046630859375,
      "step": 31206,
      "training_step_time": 0.4826936721801758
    },
    {
      "epoch": 0.000190472412109375,
      "model_forward_time": 0.11502790451049805,
      "step": 31207
    },
    {
      "epoch": 0.000190472412109375,
      "step": 31207,
      "training_step_time": 0.3958287239074707
    },
    {
      "epoch": 0.000190478515625,
      "model_forward_time": 0.11528825759887695,
      "step": 31208
    },
    {
      "epoch": 0.000190478515625,
      "step": 31208,
      "training_step_time": 0.4344596862792969
    },
    {
      "epoch": 0.000190484619140625,
      "model_forward_time": 0.1155083179473877,
      "step": 31209
    },
    {
      "epoch": 0.000190484619140625,
      "step": 31209,
      "training_step_time": 0.43907618522644043
    },
    {
      "epoch": 0.00019049072265625,
      "grad_norm": 0.098274827003479,
      "learning_rate": 5.0799143050871746e-05,
      "loss": 0.0407,
      "step": 31210
    },
    {
      "epoch": 0.00019049072265625,
      "model_forward_time": 0.11552119255065918,
      "step": 31210
    },
    {
      "epoch": 0.00019049072265625,
      "step": 31210,
      "training_step_time": 0.44080376625061035
    },
    {
      "epoch": 0.000190496826171875,
      "model_forward_time": 0.11557149887084961,
      "step": 31211
    },
    {
      "epoch": 0.000190496826171875,
      "step": 31211,
      "training_step_time": 0.4189767837524414
    },
    {
      "epoch": 0.0001905029296875,
      "model_forward_time": 0.11513996124267578,
      "step": 31212
    },
    {
      "epoch": 0.0001905029296875,
      "step": 31212,
      "training_step_time": 0.40160703659057617
    },
    {
      "epoch": 0.000190509033203125,
      "model_forward_time": 0.1152961254119873,
      "step": 31213
    },
    {
      "epoch": 0.000190509033203125,
      "step": 31213,
      "training_step_time": 0.39139461517333984
    },
    {
      "epoch": 0.00019051513671875,
      "model_forward_time": 0.11527442932128906,
      "step": 31214
    },
    {
      "epoch": 0.00019051513671875,
      "step": 31214,
      "training_step_time": 0.3859272003173828
    },
    {
      "epoch": 0.000190521240234375,
      "model_forward_time": 0.11498379707336426,
      "step": 31215
    },
    {
      "epoch": 0.000190521240234375,
      "step": 31215,
      "training_step_time": 0.39601778984069824
    },
    {
      "epoch": 0.00019052734375,
      "model_forward_time": 0.1154017448425293,
      "step": 31216
    },
    {
      "epoch": 0.00019052734375,
      "step": 31216,
      "training_step_time": 0.9038615226745605
    },
    {
      "epoch": 0.000190533447265625,
      "model_forward_time": 0.11479949951171875,
      "step": 31217
    },
    {
      "epoch": 0.000190533447265625,
      "step": 31217,
      "training_step_time": 0.41551780700683594
    },
    {
      "epoch": 0.00019053955078125,
      "model_forward_time": 0.11404633522033691,
      "step": 31218
    },
    {
      "epoch": 0.00019053955078125,
      "step": 31218,
      "training_step_time": 0.39556169509887695
    },
    {
      "epoch": 0.000190545654296875,
      "model_forward_time": 0.11466455459594727,
      "step": 31219
    },
    {
      "epoch": 0.000190545654296875,
      "step": 31219,
      "training_step_time": 0.39143848419189453
    },
    {
      "epoch": 0.0001905517578125,
      "grad_norm": 0.1366557627916336,
      "learning_rate": 5.077158862066699e-05,
      "loss": 0.0498,
      "step": 31220
    },
    {
      "epoch": 0.0001905517578125,
      "model_forward_time": 0.1137540340423584,
      "step": 31220
    },
    {
      "epoch": 0.0001905517578125,
      "step": 31220,
      "training_step_time": 0.45926856994628906
    },
    {
      "epoch": 0.000190557861328125,
      "model_forward_time": 0.11416316032409668,
      "step": 31221
    },
    {
      "epoch": 0.000190557861328125,
      "step": 31221,
      "training_step_time": 0.4406247138977051
    },
    {
      "epoch": 0.00019056396484375,
      "model_forward_time": 0.11547207832336426,
      "step": 31222
    },
    {
      "epoch": 0.00019056396484375,
      "step": 31222,
      "training_step_time": 0.713036060333252
    },
    {
      "epoch": 0.000190570068359375,
      "model_forward_time": 0.1146390438079834,
      "step": 31223
    },
    {
      "epoch": 0.000190570068359375,
      "step": 31223,
      "training_step_time": 0.40211963653564453
    },
    {
      "epoch": 0.000190576171875,
      "model_forward_time": 0.11456418037414551,
      "step": 31224
    },
    {
      "epoch": 0.000190576171875,
      "step": 31224,
      "training_step_time": 0.4277489185333252
    },
    {
      "epoch": 0.000190582275390625,
      "model_forward_time": 0.1146080493927002,
      "step": 31225
    },
    {
      "epoch": 0.000190582275390625,
      "step": 31225,
      "training_step_time": 0.390852689743042
    },
    {
      "epoch": 0.00019058837890625,
      "model_forward_time": 0.11413908004760742,
      "step": 31226
    },
    {
      "epoch": 0.00019058837890625,
      "step": 31226,
      "training_step_time": 0.39939308166503906
    },
    {
      "epoch": 0.000190594482421875,
      "model_forward_time": 0.1145486831665039,
      "step": 31227
    },
    {
      "epoch": 0.000190594482421875,
      "step": 31227,
      "training_step_time": 0.3818514347076416
    },
    {
      "epoch": 0.0001906005859375,
      "model_forward_time": 0.11517047882080078,
      "step": 31228
    },
    {
      "epoch": 0.0001906005859375,
      "step": 31228,
      "training_step_time": 0.396608829498291
    },
    {
      "epoch": 0.000190606689453125,
      "model_forward_time": 0.11599850654602051,
      "step": 31229
    },
    {
      "epoch": 0.000190606689453125,
      "step": 31229,
      "training_step_time": 0.4374542236328125
    },
    {
      "epoch": 0.00019061279296875,
      "grad_norm": 0.1697777807712555,
      "learning_rate": 5.074403395607399e-05,
      "loss": 0.0387,
      "step": 31230
    },
    {
      "epoch": 0.00019061279296875,
      "model_forward_time": 0.11744427680969238,
      "step": 31230
    },
    {
      "epoch": 0.00019061279296875,
      "step": 31230,
      "training_step_time": 0.39795398712158203
    },
    {
      "epoch": 0.000190618896484375,
      "model_forward_time": 0.11559081077575684,
      "step": 31231
    },
    {
      "epoch": 0.000190618896484375,
      "step": 31231,
      "training_step_time": 0.400984525680542
    },
    {
      "epoch": 0.000190625,
      "model_forward_time": 0.11567473411560059,
      "step": 31232
    },
    {
      "epoch": 0.000190625,
      "step": 31232,
      "training_step_time": 0.4026944637298584
    },
    {
      "epoch": 0.000190631103515625,
      "model_forward_time": 0.11508011817932129,
      "step": 31233
    },
    {
      "epoch": 0.000190631103515625,
      "step": 31233,
      "training_step_time": 0.4082801342010498
    },
    {
      "epoch": 0.00019063720703125,
      "model_forward_time": 0.1159372329711914,
      "step": 31234
    },
    {
      "epoch": 0.00019063720703125,
      "step": 31234,
      "training_step_time": 1.1067376136779785
    },
    {
      "epoch": 0.000190643310546875,
      "model_forward_time": 0.11416029930114746,
      "step": 31235
    },
    {
      "epoch": 0.000190643310546875,
      "step": 31235,
      "training_step_time": 0.4506964683532715
    },
    {
      "epoch": 0.0001906494140625,
      "model_forward_time": 0.11398863792419434,
      "step": 31236
    },
    {
      "epoch": 0.0001906494140625,
      "step": 31236,
      "training_step_time": 0.4214482307434082
    },
    {
      "epoch": 0.000190655517578125,
      "model_forward_time": 0.11429357528686523,
      "step": 31237
    },
    {
      "epoch": 0.000190655517578125,
      "step": 31237,
      "training_step_time": 0.4605216979980469
    },
    {
      "epoch": 0.00019066162109375,
      "model_forward_time": 0.11385679244995117,
      "step": 31238
    },
    {
      "epoch": 0.00019066162109375,
      "step": 31238,
      "training_step_time": 0.39895081520080566
    },
    {
      "epoch": 0.000190667724609375,
      "model_forward_time": 0.11426377296447754,
      "step": 31239
    },
    {
      "epoch": 0.000190667724609375,
      "step": 31239,
      "training_step_time": 0.3840663433074951
    },
    {
      "epoch": 0.000190673828125,
      "grad_norm": 0.10167473554611206,
      "learning_rate": 5.071647906546312e-05,
      "loss": 0.0446,
      "step": 31240
    },
    {
      "epoch": 0.000190673828125,
      "model_forward_time": 0.11472320556640625,
      "step": 31240
    },
    {
      "epoch": 0.000190673828125,
      "step": 31240,
      "training_step_time": 0.47801876068115234
    },
    {
      "epoch": 0.000190679931640625,
      "model_forward_time": 0.11634540557861328,
      "step": 31241
    },
    {
      "epoch": 0.000190679931640625,
      "step": 31241,
      "training_step_time": 0.4444732666015625
    },
    {
      "epoch": 0.00019068603515625,
      "model_forward_time": 0.11429905891418457,
      "step": 31242
    },
    {
      "epoch": 0.00019068603515625,
      "step": 31242,
      "training_step_time": 0.4126245975494385
    },
    {
      "epoch": 0.000190692138671875,
      "model_forward_time": 0.1148686408996582,
      "step": 31243
    },
    {
      "epoch": 0.000190692138671875,
      "step": 31243,
      "training_step_time": 0.4173722267150879
    },
    {
      "epoch": 0.0001906982421875,
      "model_forward_time": 0.11481595039367676,
      "step": 31244
    },
    {
      "epoch": 0.0001906982421875,
      "step": 31244,
      "training_step_time": 0.3952298164367676
    },
    {
      "epoch": 0.000190704345703125,
      "model_forward_time": 0.11580634117126465,
      "step": 31245
    },
    {
      "epoch": 0.000190704345703125,
      "step": 31245,
      "training_step_time": 0.3855290412902832
    },
    {
      "epoch": 0.00019071044921875,
      "model_forward_time": 0.11475658416748047,
      "step": 31246
    },
    {
      "epoch": 0.00019071044921875,
      "step": 31246,
      "training_step_time": 0.6772806644439697
    },
    {
      "epoch": 0.000190716552734375,
      "model_forward_time": 0.11443948745727539,
      "step": 31247
    },
    {
      "epoch": 0.000190716552734375,
      "step": 31247,
      "training_step_time": 0.46440792083740234
    },
    {
      "epoch": 0.00019072265625,
      "model_forward_time": 0.11438322067260742,
      "step": 31248
    },
    {
      "epoch": 0.00019072265625,
      "step": 31248,
      "training_step_time": 0.5274257659912109
    },
    {
      "epoch": 0.000190728759765625,
      "model_forward_time": 0.11498880386352539,
      "step": 31249
    },
    {
      "epoch": 0.000190728759765625,
      "step": 31249,
      "training_step_time": 0.5019822120666504
    },
    {
      "epoch": 0.00019073486328125,
      "grad_norm": 0.11700793355703354,
      "learning_rate": 5.068892395720483e-05,
      "loss": 0.0433,
      "step": 31250
    },
    {
      "epoch": 0.00019073486328125,
      "model_forward_time": 0.11434650421142578,
      "step": 31250
    },
    {
      "epoch": 0.00019073486328125,
      "step": 31250,
      "training_step_time": 0.43648600578308105
    },
    {
      "epoch": 0.000190740966796875,
      "model_forward_time": 0.11490678787231445,
      "step": 31251
    },
    {
      "epoch": 0.000190740966796875,
      "step": 31251,
      "training_step_time": 0.45484280586242676
    },
    {
      "epoch": 0.0001907470703125,
      "model_forward_time": 0.11554622650146484,
      "step": 31252
    },
    {
      "epoch": 0.0001907470703125,
      "step": 31252,
      "training_step_time": 0.39928650856018066
    },
    {
      "epoch": 0.000190753173828125,
      "model_forward_time": 0.11426687240600586,
      "step": 31253
    },
    {
      "epoch": 0.000190753173828125,
      "step": 31253,
      "training_step_time": 0.38531947135925293
    },
    {
      "epoch": 0.00019075927734375,
      "model_forward_time": 0.11504602432250977,
      "step": 31254
    },
    {
      "epoch": 0.00019075927734375,
      "step": 31254,
      "training_step_time": 0.4233365058898926
    },
    {
      "epoch": 0.000190765380859375,
      "model_forward_time": 0.11586332321166992,
      "step": 31255
    },
    {
      "epoch": 0.000190765380859375,
      "step": 31255,
      "training_step_time": 0.47103404998779297
    },
    {
      "epoch": 0.000190771484375,
      "model_forward_time": 0.11488533020019531,
      "step": 31256
    },
    {
      "epoch": 0.000190771484375,
      "step": 31256,
      "training_step_time": 0.40128350257873535
    },
    {
      "epoch": 0.000190777587890625,
      "model_forward_time": 0.11453843116760254,
      "step": 31257
    },
    {
      "epoch": 0.000190777587890625,
      "step": 31257,
      "training_step_time": 0.40028858184814453
    },
    {
      "epoch": 0.00019078369140625,
      "model_forward_time": 0.11510014533996582,
      "step": 31258
    },
    {
      "epoch": 0.00019078369140625,
      "step": 31258,
      "training_step_time": 0.4076242446899414
    },
    {
      "epoch": 0.000190789794921875,
      "model_forward_time": 0.11482763290405273,
      "step": 31259
    },
    {
      "epoch": 0.000190789794921875,
      "step": 31259,
      "training_step_time": 0.39879488945007324
    },
    {
      "epoch": 0.0001907958984375,
      "grad_norm": 0.14184711873531342,
      "learning_rate": 5.066136863966963e-05,
      "loss": 0.0408,
      "step": 31260
    },
    {
      "epoch": 0.0001907958984375,
      "model_forward_time": 0.11464118957519531,
      "step": 31260
    },
    {
      "epoch": 0.0001907958984375,
      "step": 31260,
      "training_step_time": 0.40179896354675293
    },
    {
      "epoch": 0.000190802001953125,
      "model_forward_time": 0.11577653884887695,
      "step": 31261
    },
    {
      "epoch": 0.000190802001953125,
      "step": 31261,
      "training_step_time": 0.45754551887512207
    },
    {
      "epoch": 0.00019080810546875,
      "model_forward_time": 0.11511874198913574,
      "step": 31262
    },
    {
      "epoch": 0.00019080810546875,
      "step": 31262,
      "training_step_time": 0.4969010353088379
    },
    {
      "epoch": 0.000190814208984375,
      "model_forward_time": 0.11539530754089355,
      "step": 31263
    },
    {
      "epoch": 0.000190814208984375,
      "step": 31263,
      "training_step_time": 0.3936033248901367
    },
    {
      "epoch": 0.0001908203125,
      "model_forward_time": 0.114990234375,
      "step": 31264
    },
    {
      "epoch": 0.0001908203125,
      "step": 31264,
      "training_step_time": 0.4384160041809082
    },
    {
      "epoch": 0.000190826416015625,
      "model_forward_time": 0.1158292293548584,
      "step": 31265
    },
    {
      "epoch": 0.000190826416015625,
      "step": 31265,
      "training_step_time": 0.4025120735168457
    },
    {
      "epoch": 0.00019083251953125,
      "model_forward_time": 0.11478018760681152,
      "step": 31266
    },
    {
      "epoch": 0.00019083251953125,
      "step": 31266,
      "training_step_time": 0.39072489738464355
    },
    {
      "epoch": 0.000190838623046875,
      "model_forward_time": 0.11530065536499023,
      "step": 31267
    },
    {
      "epoch": 0.000190838623046875,
      "step": 31267,
      "training_step_time": 0.390362024307251
    },
    {
      "epoch": 0.0001908447265625,
      "model_forward_time": 0.11563396453857422,
      "step": 31268
    },
    {
      "epoch": 0.0001908447265625,
      "step": 31268,
      "training_step_time": 0.4083671569824219
    },
    {
      "epoch": 0.000190850830078125,
      "model_forward_time": 0.11559796333312988,
      "step": 31269
    },
    {
      "epoch": 0.000190850830078125,
      "step": 31269,
      "training_step_time": 0.4532346725463867
    },
    {
      "epoch": 0.00019085693359375,
      "grad_norm": 0.09122388809919357,
      "learning_rate": 5.063381312122809e-05,
      "loss": 0.0451,
      "step": 31270
    },
    {
      "epoch": 0.00019085693359375,
      "model_forward_time": 0.11586499214172363,
      "step": 31270
    },
    {
      "epoch": 0.00019085693359375,
      "step": 31270,
      "training_step_time": 0.5265839099884033
    },
    {
      "epoch": 0.000190863037109375,
      "model_forward_time": 0.11446499824523926,
      "step": 31271
    },
    {
      "epoch": 0.000190863037109375,
      "step": 31271,
      "training_step_time": 0.39734697341918945
    },
    {
      "epoch": 0.000190869140625,
      "model_forward_time": 0.11496162414550781,
      "step": 31272
    },
    {
      "epoch": 0.000190869140625,
      "step": 31272,
      "training_step_time": 0.3929755687713623
    },
    {
      "epoch": 0.000190875244140625,
      "model_forward_time": 0.11456441879272461,
      "step": 31273
    },
    {
      "epoch": 0.000190875244140625,
      "step": 31273,
      "training_step_time": 0.39092588424682617
    },
    {
      "epoch": 0.00019088134765625,
      "model_forward_time": 0.11476492881774902,
      "step": 31274
    },
    {
      "epoch": 0.00019088134765625,
      "step": 31274,
      "training_step_time": 0.39078736305236816
    },
    {
      "epoch": 0.000190887451171875,
      "model_forward_time": 0.11546969413757324,
      "step": 31275
    },
    {
      "epoch": 0.000190887451171875,
      "step": 31275,
      "training_step_time": 0.3970828056335449
    },
    {
      "epoch": 0.0001908935546875,
      "model_forward_time": 0.11515665054321289,
      "step": 31276
    },
    {
      "epoch": 0.0001908935546875,
      "step": 31276,
      "training_step_time": 0.855975866317749
    },
    {
      "epoch": 0.000190899658203125,
      "model_forward_time": 0.11463642120361328,
      "step": 31277
    },
    {
      "epoch": 0.000190899658203125,
      "step": 31277,
      "training_step_time": 0.4798777103424072
    },
    {
      "epoch": 0.00019090576171875,
      "model_forward_time": 0.11469554901123047,
      "step": 31278
    },
    {
      "epoch": 0.00019090576171875,
      "step": 31278,
      "training_step_time": 0.4440891742706299
    },
    {
      "epoch": 0.000190911865234375,
      "model_forward_time": 0.11425137519836426,
      "step": 31279
    },
    {
      "epoch": 0.000190911865234375,
      "step": 31279,
      "training_step_time": 0.3890516757965088
    },
    {
      "epoch": 0.00019091796875,
      "grad_norm": 0.14381451904773712,
      "learning_rate": 5.0606257410250866e-05,
      "loss": 0.0415,
      "step": 31280
    },
    {
      "epoch": 0.00019091796875,
      "model_forward_time": 0.11405539512634277,
      "step": 31280
    },
    {
      "epoch": 0.00019091796875,
      "step": 31280,
      "training_step_time": 0.4440934658050537
    },
    {
      "epoch": 0.000190924072265625,
      "model_forward_time": 0.11459946632385254,
      "step": 31281
    },
    {
      "epoch": 0.000190924072265625,
      "step": 31281,
      "training_step_time": 0.4203619956970215
    },
    {
      "epoch": 0.00019093017578125,
      "model_forward_time": 0.11562132835388184,
      "step": 31282
    },
    {
      "epoch": 0.00019093017578125,
      "step": 31282,
      "training_step_time": 0.3920435905456543
    },
    {
      "epoch": 0.000190936279296875,
      "model_forward_time": 0.11496686935424805,
      "step": 31283
    },
    {
      "epoch": 0.000190936279296875,
      "step": 31283,
      "training_step_time": 0.38615894317626953
    },
    {
      "epoch": 0.0001909423828125,
      "model_forward_time": 0.11501550674438477,
      "step": 31284
    },
    {
      "epoch": 0.0001909423828125,
      "step": 31284,
      "training_step_time": 0.40004825592041016
    },
    {
      "epoch": 0.000190948486328125,
      "model_forward_time": 0.11489605903625488,
      "step": 31285
    },
    {
      "epoch": 0.000190948486328125,
      "step": 31285,
      "training_step_time": 0.40252685546875
    },
    {
      "epoch": 0.00019095458984375,
      "model_forward_time": 0.11449480056762695,
      "step": 31286
    },
    {
      "epoch": 0.00019095458984375,
      "step": 31286,
      "training_step_time": 0.38523221015930176
    },
    {
      "epoch": 0.000190960693359375,
      "model_forward_time": 0.11537671089172363,
      "step": 31287
    },
    {
      "epoch": 0.000190960693359375,
      "step": 31287,
      "training_step_time": 0.3850233554840088
    },
    {
      "epoch": 0.000190966796875,
      "model_forward_time": 0.11575818061828613,
      "step": 31288
    },
    {
      "epoch": 0.000190966796875,
      "step": 31288,
      "training_step_time": 0.4019758701324463
    },
    {
      "epoch": 0.000190972900390625,
      "model_forward_time": 0.11571812629699707,
      "step": 31289
    },
    {
      "epoch": 0.000190972900390625,
      "step": 31289,
      "training_step_time": 0.42809343338012695
    },
    {
      "epoch": 0.00019097900390625,
      "grad_norm": 0.1306833028793335,
      "learning_rate": 5.057870151510864e-05,
      "loss": 0.0456,
      "step": 31290
    },
    {
      "epoch": 0.00019097900390625,
      "model_forward_time": 0.11500263214111328,
      "step": 31290
    },
    {
      "epoch": 0.00019097900390625,
      "step": 31290,
      "training_step_time": 0.4648721218109131
    },
    {
      "epoch": 0.000190985107421875,
      "model_forward_time": 0.11483430862426758,
      "step": 31291
    },
    {
      "epoch": 0.000190985107421875,
      "step": 31291,
      "training_step_time": 0.4073159694671631
    },
    {
      "epoch": 0.0001909912109375,
      "model_forward_time": 0.11509203910827637,
      "step": 31292
    },
    {
      "epoch": 0.0001909912109375,
      "step": 31292,
      "training_step_time": 0.4117293357849121
    },
    {
      "epoch": 0.000190997314453125,
      "model_forward_time": 0.11501336097717285,
      "step": 31293
    },
    {
      "epoch": 0.000190997314453125,
      "step": 31293,
      "training_step_time": 0.449779748916626
    },
    {
      "epoch": 0.00019100341796875,
      "model_forward_time": 0.11545014381408691,
      "step": 31294
    },
    {
      "epoch": 0.00019100341796875,
      "step": 31294,
      "training_step_time": 0.44897985458374023
    },
    {
      "epoch": 0.000191009521484375,
      "model_forward_time": 0.11502432823181152,
      "step": 31295
    },
    {
      "epoch": 0.000191009521484375,
      "step": 31295,
      "training_step_time": 0.39040327072143555
    },
    {
      "epoch": 0.000191015625,
      "model_forward_time": 0.11473202705383301,
      "step": 31296
    },
    {
      "epoch": 0.000191015625,
      "step": 31296,
      "training_step_time": 0.3946211338043213
    },
    {
      "epoch": 0.000191021728515625,
      "model_forward_time": 0.1154639720916748,
      "step": 31297
    },
    {
      "epoch": 0.000191021728515625,
      "step": 31297,
      "training_step_time": 0.3941652774810791
    },
    {
      "epoch": 0.00019102783203125,
      "model_forward_time": 0.11488199234008789,
      "step": 31298
    },
    {
      "epoch": 0.00019102783203125,
      "step": 31298,
      "training_step_time": 0.39395618438720703
    },
    {
      "epoch": 0.000191033935546875,
      "model_forward_time": 0.1151437759399414,
      "step": 31299
    },
    {
      "epoch": 0.000191033935546875,
      "step": 31299,
      "training_step_time": 0.3872971534729004
    },
    {
      "epoch": 0.0001910400390625,
      "grad_norm": 0.10465063899755478,
      "learning_rate": 5.0551145444172186e-05,
      "loss": 0.0402,
      "step": 31300
    },
    {
      "epoch": 0.0001910400390625,
      "model_forward_time": 0.11561226844787598,
      "step": 31300
    },
    {
      "epoch": 0.0001910400390625,
      "step": 31300,
      "training_step_time": 0.39516544342041016
    },
    {
      "epoch": 0.000191046142578125,
      "model_forward_time": 0.11486053466796875,
      "step": 31301
    },
    {
      "epoch": 0.000191046142578125,
      "step": 31301,
      "training_step_time": 0.3997917175292969
    },
    {
      "epoch": 0.00019105224609375,
      "model_forward_time": 0.11504793167114258,
      "step": 31302
    },
    {
      "epoch": 0.00019105224609375,
      "step": 31302,
      "training_step_time": 0.41909241676330566
    },
    {
      "epoch": 0.000191058349609375,
      "model_forward_time": 0.11504769325256348,
      "step": 31303
    },
    {
      "epoch": 0.000191058349609375,
      "step": 31303,
      "training_step_time": 0.49941420555114746
    },
    {
      "epoch": 0.000191064453125,
      "model_forward_time": 0.11515378952026367,
      "step": 31304
    },
    {
      "epoch": 0.000191064453125,
      "step": 31304,
      "training_step_time": 0.4225497245788574
    },
    {
      "epoch": 0.000191070556640625,
      "model_forward_time": 0.11474418640136719,
      "step": 31305
    },
    {
      "epoch": 0.000191070556640625,
      "step": 31305,
      "training_step_time": 0.4961891174316406
    },
    {
      "epoch": 0.00019107666015625,
      "model_forward_time": 0.11533260345458984,
      "step": 31306
    },
    {
      "epoch": 0.00019107666015625,
      "step": 31306,
      "training_step_time": 0.47959232330322266
    },
    {
      "epoch": 0.000191082763671875,
      "model_forward_time": 0.11492586135864258,
      "step": 31307
    },
    {
      "epoch": 0.000191082763671875,
      "step": 31307,
      "training_step_time": 0.4965031147003174
    },
    {
      "epoch": 0.0001910888671875,
      "model_forward_time": 0.11482667922973633,
      "step": 31308
    },
    {
      "epoch": 0.0001910888671875,
      "step": 31308,
      "training_step_time": 0.3883392810821533
    },
    {
      "epoch": 0.000191094970703125,
      "model_forward_time": 0.11422991752624512,
      "step": 31309
    },
    {
      "epoch": 0.000191094970703125,
      "step": 31309,
      "training_step_time": 0.38970112800598145
    },
    {
      "epoch": 0.00019110107421875,
      "grad_norm": 0.11094014346599579,
      "learning_rate": 5.052358920581229e-05,
      "loss": 0.041,
      "step": 31310
    },
    {
      "epoch": 0.00019110107421875,
      "model_forward_time": 0.11461663246154785,
      "step": 31310
    },
    {
      "epoch": 0.00019110107421875,
      "step": 31310,
      "training_step_time": 0.38591504096984863
    },
    {
      "epoch": 0.000191107177734375,
      "model_forward_time": 0.11510848999023438,
      "step": 31311
    },
    {
      "epoch": 0.000191107177734375,
      "step": 31311,
      "training_step_time": 0.3834409713745117
    },
    {
      "epoch": 0.00019111328125,
      "model_forward_time": 0.11517071723937988,
      "step": 31312
    },
    {
      "epoch": 0.00019111328125,
      "step": 31312,
      "training_step_time": 0.4650845527648926
    },
    {
      "epoch": 0.000191119384765625,
      "model_forward_time": 0.11602473258972168,
      "step": 31313
    },
    {
      "epoch": 0.000191119384765625,
      "step": 31313,
      "training_step_time": 0.3950016498565674
    },
    {
      "epoch": 0.00019112548828125,
      "model_forward_time": 0.11533689498901367,
      "step": 31314
    },
    {
      "epoch": 0.00019112548828125,
      "step": 31314,
      "training_step_time": 0.39896178245544434
    },
    {
      "epoch": 0.000191131591796875,
      "model_forward_time": 0.11510348320007324,
      "step": 31315
    },
    {
      "epoch": 0.000191131591796875,
      "step": 31315,
      "training_step_time": 0.39072251319885254
    },
    {
      "epoch": 0.0001911376953125,
      "model_forward_time": 0.11534643173217773,
      "step": 31316
    },
    {
      "epoch": 0.0001911376953125,
      "step": 31316,
      "training_step_time": 0.3893430233001709
    },
    {
      "epoch": 0.000191143798828125,
      "model_forward_time": 0.11477804183959961,
      "step": 31317
    },
    {
      "epoch": 0.000191143798828125,
      "step": 31317,
      "training_step_time": 0.3814065456390381
    },
    {
      "epoch": 0.00019114990234375,
      "model_forward_time": 0.1151585578918457,
      "step": 31318
    },
    {
      "epoch": 0.00019114990234375,
      "step": 31318,
      "training_step_time": 0.6098480224609375
    },
    {
      "epoch": 0.000191156005859375,
      "model_forward_time": 0.11504578590393066,
      "step": 31319
    },
    {
      "epoch": 0.000191156005859375,
      "step": 31319,
      "training_step_time": 0.4232053756713867
    },
    {
      "epoch": 0.000191162109375,
      "grad_norm": 0.14980268478393555,
      "learning_rate": 5.0496032808399815e-05,
      "loss": 0.0466,
      "step": 31320
    },
    {
      "epoch": 0.000191162109375,
      "model_forward_time": 0.11494207382202148,
      "step": 31320
    },
    {
      "epoch": 0.000191162109375,
      "step": 31320,
      "training_step_time": 0.46377015113830566
    },
    {
      "epoch": 0.000191168212890625,
      "model_forward_time": 0.11548805236816406,
      "step": 31321
    },
    {
      "epoch": 0.000191168212890625,
      "step": 31321,
      "training_step_time": 0.491330623626709
    },
    {
      "epoch": 0.00019117431640625,
      "model_forward_time": 0.11509060859680176,
      "step": 31322
    },
    {
      "epoch": 0.00019117431640625,
      "step": 31322,
      "training_step_time": 0.4023616313934326
    },
    {
      "epoch": 0.000191180419921875,
      "model_forward_time": 0.11506509780883789,
      "step": 31323
    },
    {
      "epoch": 0.000191180419921875,
      "step": 31323,
      "training_step_time": 0.3822040557861328
    },
    {
      "epoch": 0.0001911865234375,
      "model_forward_time": 0.11519169807434082,
      "step": 31324
    },
    {
      "epoch": 0.0001911865234375,
      "step": 31324,
      "training_step_time": 0.38901472091674805
    },
    {
      "epoch": 0.000191192626953125,
      "model_forward_time": 0.11481475830078125,
      "step": 31325
    },
    {
      "epoch": 0.000191192626953125,
      "step": 31325,
      "training_step_time": 0.38391661643981934
    },
    {
      "epoch": 0.00019119873046875,
      "model_forward_time": 0.11475634574890137,
      "step": 31326
    },
    {
      "epoch": 0.00019119873046875,
      "step": 31326,
      "training_step_time": 0.39923095703125
    },
    {
      "epoch": 0.000191204833984375,
      "model_forward_time": 0.11582756042480469,
      "step": 31327
    },
    {
      "epoch": 0.000191204833984375,
      "step": 31327,
      "training_step_time": 0.3980751037597656
    },
    {
      "epoch": 0.0001912109375,
      "model_forward_time": 0.11591506004333496,
      "step": 31328
    },
    {
      "epoch": 0.0001912109375,
      "step": 31328,
      "training_step_time": 0.38457345962524414
    },
    {
      "epoch": 0.000191217041015625,
      "model_forward_time": 0.11544322967529297,
      "step": 31329
    },
    {
      "epoch": 0.000191217041015625,
      "step": 31329,
      "training_step_time": 0.38959622383117676
    },
    {
      "epoch": 0.00019122314453125,
      "grad_norm": 0.14418141543865204,
      "learning_rate": 5.046847626030569e-05,
      "loss": 0.0418,
      "step": 31330
    },
    {
      "epoch": 0.00019122314453125,
      "model_forward_time": 0.11548876762390137,
      "step": 31330
    },
    {
      "epoch": 0.00019122314453125,
      "step": 31330,
      "training_step_time": 0.5212280750274658
    },
    {
      "epoch": 0.000191229248046875,
      "model_forward_time": 0.11460089683532715,
      "step": 31331
    },
    {
      "epoch": 0.000191229248046875,
      "step": 31331,
      "training_step_time": 0.3873119354248047
    },
    {
      "epoch": 0.0001912353515625,
      "model_forward_time": 0.11519098281860352,
      "step": 31332
    },
    {
      "epoch": 0.0001912353515625,
      "step": 31332,
      "training_step_time": 0.45316004753112793
    },
    {
      "epoch": 0.000191241455078125,
      "model_forward_time": 0.11572933197021484,
      "step": 31333
    },
    {
      "epoch": 0.000191241455078125,
      "step": 31333,
      "training_step_time": 0.4850897789001465
    },
    {
      "epoch": 0.00019124755859375,
      "model_forward_time": 0.11548328399658203,
      "step": 31334
    },
    {
      "epoch": 0.00019124755859375,
      "step": 31334,
      "training_step_time": 0.4184756278991699
    },
    {
      "epoch": 0.000191253662109375,
      "model_forward_time": 0.11494207382202148,
      "step": 31335
    },
    {
      "epoch": 0.000191253662109375,
      "step": 31335,
      "training_step_time": 0.4273350238800049
    },
    {
      "epoch": 0.000191259765625,
      "model_forward_time": 0.1156013011932373,
      "step": 31336
    },
    {
      "epoch": 0.000191259765625,
      "step": 31336,
      "training_step_time": 0.4978468418121338
    },
    {
      "epoch": 0.000191265869140625,
      "model_forward_time": 0.1154320240020752,
      "step": 31337
    },
    {
      "epoch": 0.000191265869140625,
      "step": 31337,
      "training_step_time": 0.41390442848205566
    },
    {
      "epoch": 0.00019127197265625,
      "model_forward_time": 0.11490297317504883,
      "step": 31338
    },
    {
      "epoch": 0.00019127197265625,
      "step": 31338,
      "training_step_time": 0.3967134952545166
    },
    {
      "epoch": 0.000191278076171875,
      "model_forward_time": 0.11483383178710938,
      "step": 31339
    },
    {
      "epoch": 0.000191278076171875,
      "step": 31339,
      "training_step_time": 0.3978245258331299
    },
    {
      "epoch": 0.0001912841796875,
      "grad_norm": 0.1297147423028946,
      "learning_rate": 5.0440919569900835e-05,
      "loss": 0.0409,
      "step": 31340
    },
    {
      "epoch": 0.0001912841796875,
      "model_forward_time": 0.1148233413696289,
      "step": 31340
    },
    {
      "epoch": 0.0001912841796875,
      "step": 31340,
      "training_step_time": 0.3894679546356201
    },
    {
      "epoch": 0.000191290283203125,
      "model_forward_time": 0.1151268482208252,
      "step": 31341
    },
    {
      "epoch": 0.000191290283203125,
      "step": 31341,
      "training_step_time": 0.3973522186279297
    },
    {
      "epoch": 0.00019129638671875,
      "model_forward_time": 0.11508750915527344,
      "step": 31342
    },
    {
      "epoch": 0.00019129638671875,
      "step": 31342,
      "training_step_time": 0.6044116020202637
    },
    {
      "epoch": 0.000191302490234375,
      "model_forward_time": 0.11620926856994629,
      "step": 31343
    },
    {
      "epoch": 0.000191302490234375,
      "step": 31343,
      "training_step_time": 0.401914119720459
    },
    {
      "epoch": 0.00019130859375,
      "model_forward_time": 0.1145622730255127,
      "step": 31344
    },
    {
      "epoch": 0.00019130859375,
      "step": 31344,
      "training_step_time": 0.3975508213043213
    },
    {
      "epoch": 0.000191314697265625,
      "model_forward_time": 0.11555171012878418,
      "step": 31345
    },
    {
      "epoch": 0.000191314697265625,
      "step": 31345,
      "training_step_time": 0.4007115364074707
    },
    {
      "epoch": 0.00019132080078125,
      "model_forward_time": 0.11433744430541992,
      "step": 31346
    },
    {
      "epoch": 0.00019132080078125,
      "step": 31346,
      "training_step_time": 0.3974618911743164
    },
    {
      "epoch": 0.000191326904296875,
      "model_forward_time": 0.11514687538146973,
      "step": 31347
    },
    {
      "epoch": 0.000191326904296875,
      "step": 31347,
      "training_step_time": 0.4084012508392334
    },
    {
      "epoch": 0.0001913330078125,
      "model_forward_time": 0.11491060256958008,
      "step": 31348
    },
    {
      "epoch": 0.0001913330078125,
      "step": 31348,
      "training_step_time": 0.5140359401702881
    },
    {
      "epoch": 0.000191339111328125,
      "model_forward_time": 0.11509537696838379,
      "step": 31349
    },
    {
      "epoch": 0.000191339111328125,
      "step": 31349,
      "training_step_time": 0.47945284843444824
    },
    {
      "epoch": 0.00019134521484375,
      "grad_norm": 0.1458357572555542,
      "learning_rate": 5.041336274555625e-05,
      "loss": 0.0394,
      "step": 31350
    },
    {
      "epoch": 0.00019134521484375,
      "model_forward_time": 0.11543416976928711,
      "step": 31350
    },
    {
      "epoch": 0.00019134521484375,
      "step": 31350,
      "training_step_time": 0.4258840084075928
    },
    {
      "epoch": 0.000191351318359375,
      "model_forward_time": 0.11460185050964355,
      "step": 31351
    },
    {
      "epoch": 0.000191351318359375,
      "step": 31351,
      "training_step_time": 0.4000723361968994
    },
    {
      "epoch": 0.000191357421875,
      "model_forward_time": 0.11557960510253906,
      "step": 31352
    },
    {
      "epoch": 0.000191357421875,
      "step": 31352,
      "training_step_time": 0.3765718936920166
    },
    {
      "epoch": 0.000191363525390625,
      "model_forward_time": 0.11506271362304688,
      "step": 31353
    },
    {
      "epoch": 0.000191363525390625,
      "step": 31353,
      "training_step_time": 0.39012718200683594
    },
    {
      "epoch": 0.00019136962890625,
      "model_forward_time": 0.11493659019470215,
      "step": 31354
    },
    {
      "epoch": 0.00019136962890625,
      "step": 31354,
      "training_step_time": 0.4040205478668213
    },
    {
      "epoch": 0.000191375732421875,
      "model_forward_time": 0.1152350902557373,
      "step": 31355
    },
    {
      "epoch": 0.000191375732421875,
      "step": 31355,
      "training_step_time": 0.39679765701293945
    },
    {
      "epoch": 0.0001913818359375,
      "model_forward_time": 0.1157841682434082,
      "step": 31356
    },
    {
      "epoch": 0.0001913818359375,
      "step": 31356,
      "training_step_time": 0.3967781066894531
    },
    {
      "epoch": 0.000191387939453125,
      "model_forward_time": 0.11504030227661133,
      "step": 31357
    },
    {
      "epoch": 0.000191387939453125,
      "step": 31357,
      "training_step_time": 0.39478397369384766
    },
    {
      "epoch": 0.00019139404296875,
      "model_forward_time": 0.11558699607849121,
      "step": 31358
    },
    {
      "epoch": 0.00019139404296875,
      "step": 31358,
      "training_step_time": 0.3891117572784424
    },
    {
      "epoch": 0.000191400146484375,
      "model_forward_time": 0.1149897575378418,
      "step": 31359
    },
    {
      "epoch": 0.000191400146484375,
      "step": 31359,
      "training_step_time": 0.3967781066894531
    },
    {
      "epoch": 0.00019140625,
      "grad_norm": 0.10221780091524124,
      "learning_rate": 5.038580579564298e-05,
      "loss": 0.0446,
      "step": 31360
    },
    {
      "epoch": 0.00019140625,
      "model_forward_time": 0.11501264572143555,
      "step": 31360
    },
    {
      "epoch": 0.00019140625,
      "step": 31360,
      "training_step_time": 0.7176721096038818
    },
    {
      "epoch": 0.000191412353515625,
      "model_forward_time": 0.1162259578704834,
      "step": 31361
    },
    {
      "epoch": 0.000191412353515625,
      "step": 31361,
      "training_step_time": 0.4046359062194824
    },
    {
      "epoch": 0.00019141845703125,
      "model_forward_time": 0.11518192291259766,
      "step": 31362
    },
    {
      "epoch": 0.00019141845703125,
      "step": 31362,
      "training_step_time": 0.47959208488464355
    },
    {
      "epoch": 0.000191424560546875,
      "model_forward_time": 0.11476445198059082,
      "step": 31363
    },
    {
      "epoch": 0.000191424560546875,
      "step": 31363,
      "training_step_time": 0.3900792598724365
    },
    {
      "epoch": 0.0001914306640625,
      "model_forward_time": 0.11417150497436523,
      "step": 31364
    },
    {
      "epoch": 0.0001914306640625,
      "step": 31364,
      "training_step_time": 0.417097806930542
    },
    {
      "epoch": 0.000191436767578125,
      "model_forward_time": 0.11473464965820312,
      "step": 31365
    },
    {
      "epoch": 0.000191436767578125,
      "step": 31365,
      "training_step_time": 0.40857434272766113
    },
    {
      "epoch": 0.00019144287109375,
      "model_forward_time": 0.11444830894470215,
      "step": 31366
    },
    {
      "epoch": 0.00019144287109375,
      "step": 31366,
      "training_step_time": 0.3963468074798584
    },
    {
      "epoch": 0.000191448974609375,
      "model_forward_time": 0.11546707153320312,
      "step": 31367
    },
    {
      "epoch": 0.000191448974609375,
      "step": 31367,
      "training_step_time": 0.39447498321533203
    },
    {
      "epoch": 0.000191455078125,
      "model_forward_time": 0.11639833450317383,
      "step": 31368
    },
    {
      "epoch": 0.000191455078125,
      "step": 31368,
      "training_step_time": 0.3919367790222168
    },
    {
      "epoch": 0.000191461181640625,
      "model_forward_time": 0.11560249328613281,
      "step": 31369
    },
    {
      "epoch": 0.000191461181640625,
      "step": 31369,
      "training_step_time": 0.39837002754211426
    },
    {
      "epoch": 0.00019146728515625,
      "grad_norm": 0.10791539400815964,
      "learning_rate": 5.0358248728532096e-05,
      "loss": 0.0396,
      "step": 31370
    },
    {
      "epoch": 0.00019146728515625,
      "model_forward_time": 0.11508774757385254,
      "step": 31370
    },
    {
      "epoch": 0.00019146728515625,
      "step": 31370,
      "training_step_time": 0.392688512802124
    },
    {
      "epoch": 0.000191473388671875,
      "model_forward_time": 0.11522722244262695,
      "step": 31371
    },
    {
      "epoch": 0.000191473388671875,
      "step": 31371,
      "training_step_time": 0.3836178779602051
    },
    {
      "epoch": 0.0001914794921875,
      "model_forward_time": 0.11508488655090332,
      "step": 31372
    },
    {
      "epoch": 0.0001914794921875,
      "step": 31372,
      "training_step_time": 0.4634218215942383
    },
    {
      "epoch": 0.000191485595703125,
      "model_forward_time": 0.1157839298248291,
      "step": 31373
    },
    {
      "epoch": 0.000191485595703125,
      "step": 31373,
      "training_step_time": 0.3945913314819336
    },
    {
      "epoch": 0.00019149169921875,
      "model_forward_time": 0.11539649963378906,
      "step": 31374
    },
    {
      "epoch": 0.00019149169921875,
      "step": 31374,
      "training_step_time": 0.4122917652130127
    },
    {
      "epoch": 0.000191497802734375,
      "model_forward_time": 0.11503410339355469,
      "step": 31375
    },
    {
      "epoch": 0.000191497802734375,
      "step": 31375,
      "training_step_time": 0.4180104732513428
    },
    {
      "epoch": 0.00019150390625,
      "model_forward_time": 0.11523723602294922,
      "step": 31376
    },
    {
      "epoch": 0.00019150390625,
      "step": 31376,
      "training_step_time": 0.4983222484588623
    },
    {
      "epoch": 0.000191510009765625,
      "model_forward_time": 0.11474943161010742,
      "step": 31377
    },
    {
      "epoch": 0.000191510009765625,
      "step": 31377,
      "training_step_time": 0.4628868103027344
    },
    {
      "epoch": 0.00019151611328125,
      "model_forward_time": 0.1150674819946289,
      "step": 31378
    },
    {
      "epoch": 0.00019151611328125,
      "step": 31378,
      "training_step_time": 0.4569087028503418
    },
    {
      "epoch": 0.000191522216796875,
      "model_forward_time": 0.1154634952545166,
      "step": 31379
    },
    {
      "epoch": 0.000191522216796875,
      "step": 31379,
      "training_step_time": 0.49903225898742676
    },
    {
      "epoch": 0.0001915283203125,
      "grad_norm": 0.1082310751080513,
      "learning_rate": 5.033069155259471e-05,
      "loss": 0.0408,
      "step": 31380
    },
    {
      "epoch": 0.0001915283203125,
      "model_forward_time": 0.11493897438049316,
      "step": 31380
    },
    {
      "epoch": 0.0001915283203125,
      "step": 31380,
      "training_step_time": 0.3942232131958008
    },
    {
      "epoch": 0.000191534423828125,
      "model_forward_time": 0.11482572555541992,
      "step": 31381
    },
    {
      "epoch": 0.000191534423828125,
      "step": 31381,
      "training_step_time": 0.40241384506225586
    },
    {
      "epoch": 0.00019154052734375,
      "model_forward_time": 0.11484193801879883,
      "step": 31382
    },
    {
      "epoch": 0.00019154052734375,
      "step": 31382,
      "training_step_time": 0.3921835422515869
    },
    {
      "epoch": 0.000191546630859375,
      "model_forward_time": 0.11489272117614746,
      "step": 31383
    },
    {
      "epoch": 0.000191546630859375,
      "step": 31383,
      "training_step_time": 0.38021302223205566
    },
    {
      "epoch": 0.000191552734375,
      "model_forward_time": 0.11531496047973633,
      "step": 31384
    },
    {
      "epoch": 0.000191552734375,
      "step": 31384,
      "training_step_time": 0.42458486557006836
    },
    {
      "epoch": 0.000191558837890625,
      "model_forward_time": 0.11509180068969727,
      "step": 31385
    },
    {
      "epoch": 0.000191558837890625,
      "step": 31385,
      "training_step_time": 0.38806819915771484
    },
    {
      "epoch": 0.00019156494140625,
      "model_forward_time": 0.11629056930541992,
      "step": 31386
    },
    {
      "epoch": 0.00019156494140625,
      "step": 31386,
      "training_step_time": 0.39920568466186523
    },
    {
      "epoch": 0.000191571044921875,
      "model_forward_time": 0.11531972885131836,
      "step": 31387
    },
    {
      "epoch": 0.000191571044921875,
      "step": 31387,
      "training_step_time": 0.4370920658111572
    },
    {
      "epoch": 0.0001915771484375,
      "model_forward_time": 0.11480236053466797,
      "step": 31388
    },
    {
      "epoch": 0.0001915771484375,
      "step": 31388,
      "training_step_time": 0.3852686882019043
    },
    {
      "epoch": 0.000191583251953125,
      "model_forward_time": 0.11526703834533691,
      "step": 31389
    },
    {
      "epoch": 0.000191583251953125,
      "step": 31389,
      "training_step_time": 0.40398216247558594
    },
    {
      "epoch": 0.00019158935546875,
      "grad_norm": 0.1495252400636673,
      "learning_rate": 5.030313427620197e-05,
      "loss": 0.0419,
      "step": 31390
    },
    {
      "epoch": 0.00019158935546875,
      "model_forward_time": 0.11550688743591309,
      "step": 31390
    },
    {
      "epoch": 0.00019158935546875,
      "step": 31390,
      "training_step_time": 0.5605928897857666
    },
    {
      "epoch": 0.000191595458984375,
      "model_forward_time": 0.1156165599822998,
      "step": 31391
    },
    {
      "epoch": 0.000191595458984375,
      "step": 31391,
      "training_step_time": 0.4181540012359619
    },
    {
      "epoch": 0.0001916015625,
      "model_forward_time": 0.11486935615539551,
      "step": 31392
    },
    {
      "epoch": 0.0001916015625,
      "step": 31392,
      "training_step_time": 0.458629846572876
    },
    {
      "epoch": 0.000191607666015625,
      "model_forward_time": 0.11483931541442871,
      "step": 31393
    },
    {
      "epoch": 0.000191607666015625,
      "step": 31393,
      "training_step_time": 0.49714040756225586
    },
    {
      "epoch": 0.00019161376953125,
      "model_forward_time": 0.11492919921875,
      "step": 31394
    },
    {
      "epoch": 0.00019161376953125,
      "step": 31394,
      "training_step_time": 0.4207468032836914
    },
    {
      "epoch": 0.000191619873046875,
      "model_forward_time": 0.11449861526489258,
      "step": 31395
    },
    {
      "epoch": 0.000191619873046875,
      "step": 31395,
      "training_step_time": 0.3894617557525635
    },
    {
      "epoch": 0.0001916259765625,
      "model_forward_time": 0.11572790145874023,
      "step": 31396
    },
    {
      "epoch": 0.0001916259765625,
      "step": 31396,
      "training_step_time": 0.44712281227111816
    },
    {
      "epoch": 0.000191632080078125,
      "model_forward_time": 0.1151583194732666,
      "step": 31397
    },
    {
      "epoch": 0.000191632080078125,
      "step": 31397,
      "training_step_time": 0.38683319091796875
    },
    {
      "epoch": 0.00019163818359375,
      "model_forward_time": 0.11538338661193848,
      "step": 31398
    },
    {
      "epoch": 0.00019163818359375,
      "step": 31398,
      "training_step_time": 0.3906993865966797
    },
    {
      "epoch": 0.000191644287109375,
      "model_forward_time": 0.11518287658691406,
      "step": 31399
    },
    {
      "epoch": 0.000191644287109375,
      "step": 31399,
      "training_step_time": 0.3881840705871582
    },
    {
      "epoch": 0.000191650390625,
      "grad_norm": 0.10675181448459625,
      "learning_rate": 5.027557690772503e-05,
      "loss": 0.0402,
      "step": 31400
    },
    {
      "epoch": 0.000191650390625,
      "model_forward_time": 0.11449718475341797,
      "step": 31400
    },
    {
      "epoch": 0.000191650390625,
      "step": 31400,
      "training_step_time": 0.4484066963195801
    },
    {
      "epoch": 0.000191656494140625,
      "model_forward_time": 0.11457943916320801,
      "step": 31401
    },
    {
      "epoch": 0.000191656494140625,
      "step": 31401,
      "training_step_time": 0.4100954532623291
    },
    {
      "epoch": 0.00019166259765625,
      "model_forward_time": 0.11478519439697266,
      "step": 31402
    },
    {
      "epoch": 0.00019166259765625,
      "step": 31402,
      "training_step_time": 1.2336764335632324
    },
    {
      "epoch": 0.000191668701171875,
      "model_forward_time": 0.11413455009460449,
      "step": 31403
    },
    {
      "epoch": 0.000191668701171875,
      "step": 31403,
      "training_step_time": 0.4194314479827881
    },
    {
      "epoch": 0.0001916748046875,
      "model_forward_time": 0.11348867416381836,
      "step": 31404
    },
    {
      "epoch": 0.0001916748046875,
      "step": 31404,
      "training_step_time": 0.4402775764465332
    },
    {
      "epoch": 0.000191680908203125,
      "model_forward_time": 0.11385178565979004,
      "step": 31405
    },
    {
      "epoch": 0.000191680908203125,
      "step": 31405,
      "training_step_time": 0.4246230125427246
    },
    {
      "epoch": 0.00019168701171875,
      "model_forward_time": 0.11362767219543457,
      "step": 31406
    },
    {
      "epoch": 0.00019168701171875,
      "step": 31406,
      "training_step_time": 0.46534037590026855
    },
    {
      "epoch": 0.000191693115234375,
      "model_forward_time": 0.1146690845489502,
      "step": 31407
    },
    {
      "epoch": 0.000191693115234375,
      "step": 31407,
      "training_step_time": 0.45604777336120605
    },
    {
      "epoch": 0.00019169921875,
      "model_forward_time": 0.11472511291503906,
      "step": 31408
    },
    {
      "epoch": 0.00019169921875,
      "step": 31408,
      "training_step_time": 0.40795087814331055
    },
    {
      "epoch": 0.000191705322265625,
      "model_forward_time": 0.11529254913330078,
      "step": 31409
    },
    {
      "epoch": 0.000191705322265625,
      "step": 31409,
      "training_step_time": 0.39505434036254883
    },
    {
      "epoch": 0.00019171142578125,
      "grad_norm": 0.19277872145175934,
      "learning_rate": 5.02480194555351e-05,
      "loss": 0.047,
      "step": 31410
    },
    {
      "epoch": 0.00019171142578125,
      "model_forward_time": 0.11487603187561035,
      "step": 31410
    },
    {
      "epoch": 0.00019171142578125,
      "step": 31410,
      "training_step_time": 0.39667844772338867
    },
    {
      "epoch": 0.000191717529296875,
      "model_forward_time": 0.11538028717041016,
      "step": 31411
    },
    {
      "epoch": 0.000191717529296875,
      "step": 31411,
      "training_step_time": 0.3954963684082031
    },
    {
      "epoch": 0.0001917236328125,
      "model_forward_time": 0.11474990844726562,
      "step": 31412
    },
    {
      "epoch": 0.0001917236328125,
      "step": 31412,
      "training_step_time": 0.42330193519592285
    },
    {
      "epoch": 0.000191729736328125,
      "model_forward_time": 0.1150209903717041,
      "step": 31413
    },
    {
      "epoch": 0.000191729736328125,
      "step": 31413,
      "training_step_time": 0.4579591751098633
    },
    {
      "epoch": 0.00019173583984375,
      "model_forward_time": 0.11481595039367676,
      "step": 31414
    },
    {
      "epoch": 0.00019173583984375,
      "step": 31414,
      "training_step_time": 0.508786678314209
    },
    {
      "epoch": 0.000191741943359375,
      "model_forward_time": 0.11497783660888672,
      "step": 31415
    },
    {
      "epoch": 0.000191741943359375,
      "step": 31415,
      "training_step_time": 0.38843703269958496
    },
    {
      "epoch": 0.000191748046875,
      "model_forward_time": 0.11508607864379883,
      "step": 31416
    },
    {
      "epoch": 0.000191748046875,
      "step": 31416,
      "training_step_time": 0.4683339595794678
    },
    {
      "epoch": 0.000191754150390625,
      "model_forward_time": 0.11502861976623535,
      "step": 31417
    },
    {
      "epoch": 0.000191754150390625,
      "step": 31417,
      "training_step_time": 0.4421987533569336
    },
    {
      "epoch": 0.00019176025390625,
      "model_forward_time": 0.11535429954528809,
      "step": 31418
    },
    {
      "epoch": 0.00019176025390625,
      "step": 31418,
      "training_step_time": 0.45688438415527344
    },
    {
      "epoch": 0.000191766357421875,
      "model_forward_time": 0.11516451835632324,
      "step": 31419
    },
    {
      "epoch": 0.000191766357421875,
      "step": 31419,
      "training_step_time": 0.42805981636047363
    },
    {
      "epoch": 0.0001917724609375,
      "grad_norm": 0.1472652107477188,
      "learning_rate": 5.0220461928003406e-05,
      "loss": 0.039,
      "step": 31420
    },
    {
      "epoch": 0.0001917724609375,
      "model_forward_time": 0.11528682708740234,
      "step": 31420
    },
    {
      "epoch": 0.0001917724609375,
      "step": 31420,
      "training_step_time": 0.8711738586425781
    },
    {
      "epoch": 0.000191778564453125,
      "model_forward_time": 0.11486220359802246,
      "step": 31421
    },
    {
      "epoch": 0.000191778564453125,
      "step": 31421,
      "training_step_time": 0.37664055824279785
    },
    {
      "epoch": 0.00019178466796875,
      "model_forward_time": 0.11419439315795898,
      "step": 31422
    },
    {
      "epoch": 0.00019178466796875,
      "step": 31422,
      "training_step_time": 0.38747072219848633
    },
    {
      "epoch": 0.000191790771484375,
      "model_forward_time": 0.11458230018615723,
      "step": 31423
    },
    {
      "epoch": 0.000191790771484375,
      "step": 31423,
      "training_step_time": 0.39652299880981445
    },
    {
      "epoch": 0.000191796875,
      "model_forward_time": 0.11391949653625488,
      "step": 31424
    },
    {
      "epoch": 0.000191796875,
      "step": 31424,
      "training_step_time": 0.38237690925598145
    },
    {
      "epoch": 0.000191802978515625,
      "model_forward_time": 0.11458396911621094,
      "step": 31425
    },
    {
      "epoch": 0.000191802978515625,
      "step": 31425,
      "training_step_time": 0.39700865745544434
    },
    {
      "epoch": 0.00019180908203125,
      "model_forward_time": 0.11609053611755371,
      "step": 31426
    },
    {
      "epoch": 0.00019180908203125,
      "step": 31426,
      "training_step_time": 0.470287561416626
    },
    {
      "epoch": 0.000191815185546875,
      "model_forward_time": 0.11537671089172363,
      "step": 31427
    },
    {
      "epoch": 0.000191815185546875,
      "step": 31427,
      "training_step_time": 0.3913452625274658
    },
    {
      "epoch": 0.0001918212890625,
      "model_forward_time": 0.1147925853729248,
      "step": 31428
    },
    {
      "epoch": 0.0001918212890625,
      "step": 31428,
      "training_step_time": 0.4066746234893799
    },
    {
      "epoch": 0.000191827392578125,
      "model_forward_time": 0.11507320404052734,
      "step": 31429
    },
    {
      "epoch": 0.000191827392578125,
      "step": 31429,
      "training_step_time": 0.38156843185424805
    },
    {
      "epoch": 0.00019183349609375,
      "grad_norm": 0.10448144376277924,
      "learning_rate": 5.0192904333501214e-05,
      "loss": 0.0379,
      "step": 31430
    },
    {
      "epoch": 0.00019183349609375,
      "model_forward_time": 0.11588191986083984,
      "step": 31430
    },
    {
      "epoch": 0.00019183349609375,
      "step": 31430,
      "training_step_time": 0.4465451240539551
    },
    {
      "epoch": 0.000191839599609375,
      "model_forward_time": 0.11504006385803223,
      "step": 31431
    },
    {
      "epoch": 0.000191839599609375,
      "step": 31431,
      "training_step_time": 0.4313986301422119
    },
    {
      "epoch": 0.000191845703125,
      "model_forward_time": 0.11467266082763672,
      "step": 31432
    },
    {
      "epoch": 0.000191845703125,
      "step": 31432,
      "training_step_time": 0.44426608085632324
    },
    {
      "epoch": 0.000191851806640625,
      "model_forward_time": 0.11452078819274902,
      "step": 31433
    },
    {
      "epoch": 0.000191851806640625,
      "step": 31433,
      "training_step_time": 0.38588929176330566
    },
    {
      "epoch": 0.00019185791015625,
      "model_forward_time": 0.11544537544250488,
      "step": 31434
    },
    {
      "epoch": 0.00019185791015625,
      "step": 31434,
      "training_step_time": 0.4219660758972168
    },
    {
      "epoch": 0.000191864013671875,
      "model_forward_time": 0.11521768569946289,
      "step": 31435
    },
    {
      "epoch": 0.000191864013671875,
      "step": 31435,
      "training_step_time": 0.3867321014404297
    },
    {
      "epoch": 0.0001918701171875,
      "model_forward_time": 0.11504697799682617,
      "step": 31436
    },
    {
      "epoch": 0.0001918701171875,
      "step": 31436,
      "training_step_time": 0.39026737213134766
    },
    {
      "epoch": 0.000191876220703125,
      "model_forward_time": 0.11500167846679688,
      "step": 31437
    },
    {
      "epoch": 0.000191876220703125,
      "step": 31437,
      "training_step_time": 0.3951864242553711
    },
    {
      "epoch": 0.00019188232421875,
      "model_forward_time": 0.11551928520202637,
      "step": 31438
    },
    {
      "epoch": 0.00019188232421875,
      "step": 31438,
      "training_step_time": 0.46376681327819824
    },
    {
      "epoch": 0.000191888427734375,
      "model_forward_time": 0.11479806900024414,
      "step": 31439
    },
    {
      "epoch": 0.000191888427734375,
      "step": 31439,
      "training_step_time": 0.43111753463745117
    },
    {
      "epoch": 0.00019189453125,
      "grad_norm": 0.088248111307621,
      "learning_rate": 5.016534668039976e-05,
      "loss": 0.0422,
      "step": 31440
    },
    {
      "epoch": 0.00019189453125,
      "model_forward_time": 0.1147756576538086,
      "step": 31440
    },
    {
      "epoch": 0.00019189453125,
      "step": 31440,
      "training_step_time": 0.39481043815612793
    },
    {
      "epoch": 0.000191900634765625,
      "model_forward_time": 0.11529016494750977,
      "step": 31441
    },
    {
      "epoch": 0.000191900634765625,
      "step": 31441,
      "training_step_time": 0.38456273078918457
    },
    {
      "epoch": 0.00019190673828125,
      "model_forward_time": 0.11513042449951172,
      "step": 31442
    },
    {
      "epoch": 0.00019190673828125,
      "step": 31442,
      "training_step_time": 0.3883187770843506
    },
    {
      "epoch": 0.000191912841796875,
      "model_forward_time": 0.11459231376647949,
      "step": 31443
    },
    {
      "epoch": 0.000191912841796875,
      "step": 31443,
      "training_step_time": 0.3783082962036133
    },
    {
      "epoch": 0.0001919189453125,
      "model_forward_time": 0.11519551277160645,
      "step": 31444
    },
    {
      "epoch": 0.0001919189453125,
      "step": 31444,
      "training_step_time": 0.7422065734863281
    },
    {
      "epoch": 0.000191925048828125,
      "model_forward_time": 0.11486268043518066,
      "step": 31445
    },
    {
      "epoch": 0.000191925048828125,
      "step": 31445,
      "training_step_time": 0.5096065998077393
    },
    {
      "epoch": 0.00019193115234375,
      "model_forward_time": 0.11490249633789062,
      "step": 31446
    },
    {
      "epoch": 0.00019193115234375,
      "step": 31446,
      "training_step_time": 0.4013786315917969
    },
    {
      "epoch": 0.000191937255859375,
      "model_forward_time": 0.1149590015411377,
      "step": 31447
    },
    {
      "epoch": 0.000191937255859375,
      "step": 31447,
      "training_step_time": 0.3859257698059082
    },
    {
      "epoch": 0.000191943359375,
      "model_forward_time": 0.1147465705871582,
      "step": 31448
    },
    {
      "epoch": 0.000191943359375,
      "step": 31448,
      "training_step_time": 0.4077906608581543
    },
    {
      "epoch": 0.000191949462890625,
      "model_forward_time": 0.11493206024169922,
      "step": 31449
    },
    {
      "epoch": 0.000191949462890625,
      "step": 31449,
      "training_step_time": 0.3692820072174072
    },
    {
      "epoch": 0.00019195556640625,
      "grad_norm": 0.11736884713172913,
      "learning_rate": 5.0137788977070353e-05,
      "loss": 0.0402,
      "step": 31450
    },
    {
      "epoch": 0.00019195556640625,
      "model_forward_time": 0.11526060104370117,
      "step": 31450
    },
    {
      "epoch": 0.00019195556640625,
      "step": 31450,
      "training_step_time": 0.3886420726776123
    },
    {
      "epoch": 0.000191961669921875,
      "model_forward_time": 0.1158900260925293,
      "step": 31451
    },
    {
      "epoch": 0.000191961669921875,
      "step": 31451,
      "training_step_time": 0.392594575881958
    },
    {
      "epoch": 0.0001919677734375,
      "model_forward_time": 0.11519074440002441,
      "step": 31452
    },
    {
      "epoch": 0.0001919677734375,
      "step": 31452,
      "training_step_time": 0.4143686294555664
    },
    {
      "epoch": 0.000191973876953125,
      "model_forward_time": 0.11546730995178223,
      "step": 31453
    },
    {
      "epoch": 0.000191973876953125,
      "step": 31453,
      "training_step_time": 0.4305250644683838
    },
    {
      "epoch": 0.00019197998046875,
      "model_forward_time": 0.11525511741638184,
      "step": 31454
    },
    {
      "epoch": 0.00019197998046875,
      "step": 31454,
      "training_step_time": 0.38958024978637695
    },
    {
      "epoch": 0.000191986083984375,
      "model_forward_time": 0.1154787540435791,
      "step": 31455
    },
    {
      "epoch": 0.000191986083984375,
      "step": 31455,
      "training_step_time": 0.3770613670349121
    },
    {
      "epoch": 0.0001919921875,
      "model_forward_time": 0.11525106430053711,
      "step": 31456
    },
    {
      "epoch": 0.0001919921875,
      "step": 31456,
      "training_step_time": 0.44776034355163574
    },
    {
      "epoch": 0.000191998291015625,
      "model_forward_time": 0.11497187614440918,
      "step": 31457
    },
    {
      "epoch": 0.000191998291015625,
      "step": 31457,
      "training_step_time": 0.4005882740020752
    },
    {
      "epoch": 0.00019200439453125,
      "model_forward_time": 0.11518454551696777,
      "step": 31458
    },
    {
      "epoch": 0.00019200439453125,
      "step": 31458,
      "training_step_time": 0.40253710746765137
    },
    {
      "epoch": 0.000192010498046875,
      "model_forward_time": 0.1153573989868164,
      "step": 31459
    },
    {
      "epoch": 0.000192010498046875,
      "step": 31459,
      "training_step_time": 0.446211576461792
    },
    {
      "epoch": 0.0001920166015625,
      "grad_norm": 0.10819578915834427,
      "learning_rate": 5.011023123188431e-05,
      "loss": 0.0409,
      "step": 31460
    },
    {
      "epoch": 0.0001920166015625,
      "model_forward_time": 0.1148989200592041,
      "step": 31460
    },
    {
      "epoch": 0.0001920166015625,
      "step": 31460,
      "training_step_time": 0.5049595832824707
    },
    {
      "epoch": 0.000192022705078125,
      "model_forward_time": 0.11463141441345215,
      "step": 31461
    },
    {
      "epoch": 0.000192022705078125,
      "step": 31461,
      "training_step_time": 0.4933180809020996
    },
    {
      "epoch": 0.00019202880859375,
      "model_forward_time": 0.11539578437805176,
      "step": 31462
    },
    {
      "epoch": 0.00019202880859375,
      "step": 31462,
      "training_step_time": 0.49516725540161133
    },
    {
      "epoch": 0.000192034912109375,
      "model_forward_time": 0.11459469795227051,
      "step": 31463
    },
    {
      "epoch": 0.000192034912109375,
      "step": 31463,
      "training_step_time": 0.41008710861206055
    },
    {
      "epoch": 0.000192041015625,
      "model_forward_time": 0.11460685729980469,
      "step": 31464
    },
    {
      "epoch": 0.000192041015625,
      "step": 31464,
      "training_step_time": 0.37877917289733887
    },
    {
      "epoch": 0.000192047119140625,
      "model_forward_time": 0.11444211006164551,
      "step": 31465
    },
    {
      "epoch": 0.000192047119140625,
      "step": 31465,
      "training_step_time": 0.4161508083343506
    },
    {
      "epoch": 0.00019205322265625,
      "model_forward_time": 0.11450719833374023,
      "step": 31466
    },
    {
      "epoch": 0.00019205322265625,
      "step": 31466,
      "training_step_time": 0.39864325523376465
    },
    {
      "epoch": 0.000192059326171875,
      "model_forward_time": 0.11464405059814453,
      "step": 31467
    },
    {
      "epoch": 0.000192059326171875,
      "step": 31467,
      "training_step_time": 0.4419252872467041
    },
    {
      "epoch": 0.0001920654296875,
      "model_forward_time": 0.1149442195892334,
      "step": 31468
    },
    {
      "epoch": 0.0001920654296875,
      "step": 31468,
      "training_step_time": 0.38025355339050293
    },
    {
      "epoch": 0.000192071533203125,
      "model_forward_time": 0.11548686027526855,
      "step": 31469
    },
    {
      "epoch": 0.000192071533203125,
      "step": 31469,
      "training_step_time": 0.3918616771697998
    },
    {
      "epoch": 0.00019207763671875,
      "grad_norm": 0.11829913407564163,
      "learning_rate": 5.0082673453212914e-05,
      "loss": 0.0427,
      "step": 31470
    },
    {
      "epoch": 0.00019207763671875,
      "model_forward_time": 0.11459016799926758,
      "step": 31470
    },
    {
      "epoch": 0.00019207763671875,
      "step": 31470,
      "training_step_time": 0.39490556716918945
    },
    {
      "epoch": 0.000192083740234375,
      "model_forward_time": 0.11486244201660156,
      "step": 31471
    },
    {
      "epoch": 0.000192083740234375,
      "step": 31471,
      "training_step_time": 0.3964879512786865
    },
    {
      "epoch": 0.00019208984375,
      "model_forward_time": 0.11513614654541016,
      "step": 31472
    },
    {
      "epoch": 0.00019208984375,
      "step": 31472,
      "training_step_time": 0.4011991024017334
    },
    {
      "epoch": 0.000192095947265625,
      "model_forward_time": 0.11567044258117676,
      "step": 31473
    },
    {
      "epoch": 0.000192095947265625,
      "step": 31473,
      "training_step_time": 0.43334293365478516
    },
    {
      "epoch": 0.00019210205078125,
      "model_forward_time": 0.114959716796875,
      "step": 31474
    },
    {
      "epoch": 0.00019210205078125,
      "step": 31474,
      "training_step_time": 0.6080663204193115
    },
    {
      "epoch": 0.000192108154296875,
      "model_forward_time": 0.11449670791625977,
      "step": 31475
    },
    {
      "epoch": 0.000192108154296875,
      "step": 31475,
      "training_step_time": 0.48007774353027344
    },
    {
      "epoch": 0.0001921142578125,
      "model_forward_time": 0.11508798599243164,
      "step": 31476
    },
    {
      "epoch": 0.0001921142578125,
      "step": 31476,
      "training_step_time": 0.5014317035675049
    },
    {
      "epoch": 0.000192120361328125,
      "model_forward_time": 0.11439108848571777,
      "step": 31477
    },
    {
      "epoch": 0.000192120361328125,
      "step": 31477,
      "training_step_time": 0.39024806022644043
    },
    {
      "epoch": 0.00019212646484375,
      "model_forward_time": 0.11493682861328125,
      "step": 31478
    },
    {
      "epoch": 0.00019212646484375,
      "step": 31478,
      "training_step_time": 0.3870363235473633
    },
    {
      "epoch": 0.000192132568359375,
      "model_forward_time": 0.11387872695922852,
      "step": 31479
    },
    {
      "epoch": 0.000192132568359375,
      "step": 31479,
      "training_step_time": 0.40790820121765137
    },
    {
      "epoch": 0.000192138671875,
      "grad_norm": 0.14794892072677612,
      "learning_rate": 5.005511564942751e-05,
      "loss": 0.0392,
      "step": 31480
    },
    {
      "epoch": 0.000192138671875,
      "model_forward_time": 0.1148691177368164,
      "step": 31480
    },
    {
      "epoch": 0.000192138671875,
      "step": 31480,
      "training_step_time": 0.39821457862854004
    },
    {
      "epoch": 0.000192144775390625,
      "model_forward_time": 0.11623477935791016,
      "step": 31481
    },
    {
      "epoch": 0.000192144775390625,
      "step": 31481,
      "training_step_time": 0.39775848388671875
    },
    {
      "epoch": 0.00019215087890625,
      "model_forward_time": 0.11490988731384277,
      "step": 31482
    },
    {
      "epoch": 0.00019215087890625,
      "step": 31482,
      "training_step_time": 0.39249348640441895
    },
    {
      "epoch": 0.000192156982421875,
      "model_forward_time": 0.11665081977844238,
      "step": 31483
    },
    {
      "epoch": 0.000192156982421875,
      "step": 31483,
      "training_step_time": 0.3999500274658203
    },
    {
      "epoch": 0.0001921630859375,
      "model_forward_time": 0.11504626274108887,
      "step": 31484
    },
    {
      "epoch": 0.0001921630859375,
      "step": 31484,
      "training_step_time": 0.3963277339935303
    },
    {
      "epoch": 0.000192169189453125,
      "model_forward_time": 0.11514496803283691,
      "step": 31485
    },
    {
      "epoch": 0.000192169189453125,
      "step": 31485,
      "training_step_time": 0.3963303565979004
    },
    {
      "epoch": 0.00019217529296875,
      "model_forward_time": 0.11543750762939453,
      "step": 31486
    },
    {
      "epoch": 0.00019217529296875,
      "step": 31486,
      "training_step_time": 0.500877857208252
    },
    {
      "epoch": 0.000192181396484375,
      "model_forward_time": 0.11476778984069824,
      "step": 31487
    },
    {
      "epoch": 0.000192181396484375,
      "step": 31487,
      "training_step_time": 0.40097951889038086
    },
    {
      "epoch": 0.0001921875,
      "model_forward_time": 0.11522817611694336,
      "step": 31488
    },
    {
      "epoch": 0.0001921875,
      "step": 31488,
      "training_step_time": 0.4428822994232178
    },
    {
      "epoch": 0.000192193603515625,
      "model_forward_time": 0.1151895523071289,
      "step": 31489
    },
    {
      "epoch": 0.000192193603515625,
      "step": 31489,
      "training_step_time": 0.4140486717224121
    },
    {
      "epoch": 0.00019219970703125,
      "grad_norm": 0.10287663340568542,
      "learning_rate": 5.0027557828899426e-05,
      "loss": 0.0443,
      "step": 31490
    },
    {
      "epoch": 0.00019219970703125,
      "model_forward_time": 0.1149296760559082,
      "step": 31490
    },
    {
      "epoch": 0.00019219970703125,
      "step": 31490,
      "training_step_time": 0.4175252914428711
    },
    {
      "epoch": 0.000192205810546875,
      "model_forward_time": 0.11526751518249512,
      "step": 31491
    },
    {
      "epoch": 0.000192205810546875,
      "step": 31491,
      "training_step_time": 0.3937866687774658
    },
    {
      "epoch": 0.0001922119140625,
      "model_forward_time": 0.11494135856628418,
      "step": 31492
    },
    {
      "epoch": 0.0001922119140625,
      "step": 31492,
      "training_step_time": 0.4049568176269531
    },
    {
      "epoch": 0.000192218017578125,
      "model_forward_time": 0.11497616767883301,
      "step": 31493
    },
    {
      "epoch": 0.000192218017578125,
      "step": 31493,
      "training_step_time": 0.4008140563964844
    },
    {
      "epoch": 0.00019222412109375,
      "model_forward_time": 0.11490750312805176,
      "step": 31494
    },
    {
      "epoch": 0.00019222412109375,
      "step": 31494,
      "training_step_time": 0.4382312297821045
    },
    {
      "epoch": 0.000192230224609375,
      "model_forward_time": 0.11497092247009277,
      "step": 31495
    },
    {
      "epoch": 0.000192230224609375,
      "step": 31495,
      "training_step_time": 0.4028143882751465
    },
    {
      "epoch": 0.000192236328125,
      "model_forward_time": 0.11473941802978516,
      "step": 31496
    },
    {
      "epoch": 0.000192236328125,
      "step": 31496,
      "training_step_time": 0.40393900871276855
    },
    {
      "epoch": 0.000192242431640625,
      "model_forward_time": 0.11427783966064453,
      "step": 31497
    },
    {
      "epoch": 0.000192242431640625,
      "step": 31497,
      "training_step_time": 0.39638781547546387
    },
    {
      "epoch": 0.00019224853515625,
      "model_forward_time": 0.11522197723388672,
      "step": 31498
    },
    {
      "epoch": 0.00019224853515625,
      "step": 31498,
      "training_step_time": 0.642521858215332
    },
    {
      "epoch": 0.000192254638671875,
      "model_forward_time": 0.11508774757385254,
      "step": 31499
    },
    {
      "epoch": 0.000192254638671875,
      "step": 31499,
      "training_step_time": 0.4116356372833252
    },
    {
      "epoch": 0.0001922607421875,
      "grad_norm": 0.14509694278240204,
      "learning_rate": 5e-05,
      "loss": 0.0444,
      "step": 31500
    },
    {
      "epoch": 0.0001922607421875,
      "model_forward_time": 0.11476778984069824,
      "step": 31500
    },
    {
      "epoch": 0.0001922607421875,
      "step": 31500,
      "training_step_time": 0.3819770812988281
    },
    {
      "epoch": 0.000192266845703125,
      "model_forward_time": 0.11490368843078613,
      "step": 31501
    },
    {
      "epoch": 0.000192266845703125,
      "step": 31501,
      "training_step_time": 0.42922234535217285
    },
    {
      "epoch": 0.00019227294921875,
      "model_forward_time": 0.11542749404907227,
      "step": 31502
    },
    {
      "epoch": 0.00019227294921875,
      "step": 31502,
      "training_step_time": 0.4185826778411865
    },
    {
      "epoch": 0.000192279052734375,
      "model_forward_time": 0.11424541473388672,
      "step": 31503
    },
    {
      "epoch": 0.000192279052734375,
      "step": 31503,
      "training_step_time": 0.3603541851043701
    },
    {
      "epoch": 0.00019228515625,
      "model_forward_time": 0.11465597152709961,
      "step": 31504
    },
    {
      "epoch": 0.00019228515625,
      "step": 31504,
      "training_step_time": 0.44676709175109863
    },
    {
      "epoch": 0.000192291259765625,
      "model_forward_time": 0.11498880386352539,
      "step": 31505
    },
    {
      "epoch": 0.000192291259765625,
      "step": 31505,
      "training_step_time": 0.4050881862640381
    },
    {
      "epoch": 0.00019229736328125,
      "model_forward_time": 0.11428523063659668,
      "step": 31506
    },
    {
      "epoch": 0.00019229736328125,
      "step": 31506,
      "training_step_time": 0.44870996475219727
    },
    {
      "epoch": 0.000192303466796875,
      "model_forward_time": 0.1146857738494873,
      "step": 31507
    },
    {
      "epoch": 0.000192303466796875,
      "step": 31507,
      "training_step_time": 0.3957967758178711
    },
    {
      "epoch": 0.0001923095703125,
      "model_forward_time": 0.11494064331054688,
      "step": 31508
    },
    {
      "epoch": 0.0001923095703125,
      "step": 31508,
      "training_step_time": 0.3941831588745117
    },
    {
      "epoch": 0.000192315673828125,
      "model_forward_time": 0.11486220359802246,
      "step": 31509
    },
    {
      "epoch": 0.000192315673828125,
      "step": 31509,
      "training_step_time": 0.38347387313842773
    },
    {
      "epoch": 0.00019232177734375,
      "grad_norm": 0.10497065633535385,
      "learning_rate": 4.997244217110058e-05,
      "loss": 0.0442,
      "step": 31510
    },
    {
      "epoch": 0.00019232177734375,
      "model_forward_time": 0.11501812934875488,
      "step": 31510
    },
    {
      "epoch": 0.00019232177734375,
      "step": 31510,
      "training_step_time": 0.8571860790252686
    },
    {
      "epoch": 0.000192327880859375,
      "model_forward_time": 0.11440134048461914,
      "step": 31511
    },
    {
      "epoch": 0.000192327880859375,
      "step": 31511,
      "training_step_time": 0.3824479579925537
    },
    {
      "epoch": 0.000192333984375,
      "model_forward_time": 0.11496806144714355,
      "step": 31512
    },
    {
      "epoch": 0.000192333984375,
      "step": 31512,
      "training_step_time": 0.3939347267150879
    },
    {
      "epoch": 0.000192340087890625,
      "model_forward_time": 0.11446619033813477,
      "step": 31513
    },
    {
      "epoch": 0.000192340087890625,
      "step": 31513,
      "training_step_time": 0.39260005950927734
    },
    {
      "epoch": 0.00019234619140625,
      "model_forward_time": 0.11401534080505371,
      "step": 31514
    },
    {
      "epoch": 0.00019234619140625,
      "step": 31514,
      "training_step_time": 0.39373230934143066
    },
    {
      "epoch": 0.000192352294921875,
      "model_forward_time": 0.11402606964111328,
      "step": 31515
    },
    {
      "epoch": 0.000192352294921875,
      "step": 31515,
      "training_step_time": 0.4374511241912842
    },
    {
      "epoch": 0.0001923583984375,
      "model_forward_time": 0.11527061462402344,
      "step": 31516
    },
    {
      "epoch": 0.0001923583984375,
      "step": 31516,
      "training_step_time": 0.5829696655273438
    },
    {
      "epoch": 0.000192364501953125,
      "model_forward_time": 0.11480927467346191,
      "step": 31517
    },
    {
      "epoch": 0.000192364501953125,
      "step": 31517,
      "training_step_time": 0.41234803199768066
    },
    {
      "epoch": 0.00019237060546875,
      "model_forward_time": 0.11512947082519531,
      "step": 31518
    },
    {
      "epoch": 0.00019237060546875,
      "step": 31518,
      "training_step_time": 0.4093029499053955
    },
    {
      "epoch": 0.000192376708984375,
      "model_forward_time": 0.11470532417297363,
      "step": 31519
    },
    {
      "epoch": 0.000192376708984375,
      "step": 31519,
      "training_step_time": 0.4036233425140381
    },
    {
      "epoch": 0.0001923828125,
      "grad_norm": 0.11998683959245682,
      "learning_rate": 4.994488435057251e-05,
      "loss": 0.0385,
      "step": 31520
    },
    {
      "epoch": 0.0001923828125,
      "model_forward_time": 0.11424589157104492,
      "step": 31520
    },
    {
      "epoch": 0.0001923828125,
      "step": 31520,
      "training_step_time": 0.4045069217681885
    },
    {
      "epoch": 0.000192388916015625,
      "model_forward_time": 0.11432647705078125,
      "step": 31521
    },
    {
      "epoch": 0.000192388916015625,
      "step": 31521,
      "training_step_time": 0.3964705467224121
    },
    {
      "epoch": 0.00019239501953125,
      "model_forward_time": 0.1146993637084961,
      "step": 31522
    },
    {
      "epoch": 0.00019239501953125,
      "step": 31522,
      "training_step_time": 0.666517972946167
    },
    {
      "epoch": 0.000192401123046875,
      "model_forward_time": 0.11461257934570312,
      "step": 31523
    },
    {
      "epoch": 0.000192401123046875,
      "step": 31523,
      "training_step_time": 0.396151065826416
    },
    {
      "epoch": 0.0001924072265625,
      "model_forward_time": 0.1156611442565918,
      "step": 31524
    },
    {
      "epoch": 0.0001924072265625,
      "step": 31524,
      "training_step_time": 0.39392590522766113
    },
    {
      "epoch": 0.000192413330078125,
      "model_forward_time": 0.11486029624938965,
      "step": 31525
    },
    {
      "epoch": 0.000192413330078125,
      "step": 31525,
      "training_step_time": 0.39386510848999023
    },
    {
      "epoch": 0.00019241943359375,
      "model_forward_time": 0.11460161209106445,
      "step": 31526
    },
    {
      "epoch": 0.00019241943359375,
      "step": 31526,
      "training_step_time": 0.3986177444458008
    },
    {
      "epoch": 0.000192425537109375,
      "model_forward_time": 0.11448454856872559,
      "step": 31527
    },
    {
      "epoch": 0.000192425537109375,
      "step": 31527,
      "training_step_time": 0.3884241580963135
    },
    {
      "epoch": 0.000192431640625,
      "model_forward_time": 0.1142885684967041,
      "step": 31528
    },
    {
      "epoch": 0.000192431640625,
      "step": 31528,
      "training_step_time": 0.5472366809844971
    },
    {
      "epoch": 0.000192437744140625,
      "model_forward_time": 0.11471080780029297,
      "step": 31529
    },
    {
      "epoch": 0.000192437744140625,
      "step": 31529,
      "training_step_time": 0.4257662296295166
    },
    {
      "epoch": 0.00019244384765625,
      "grad_norm": 0.13987141847610474,
      "learning_rate": 4.991732654678709e-05,
      "loss": 0.0443,
      "step": 31530
    },
    {
      "epoch": 0.00019244384765625,
      "model_forward_time": 0.11537718772888184,
      "step": 31530
    },
    {
      "epoch": 0.00019244384765625,
      "step": 31530,
      "training_step_time": 0.4590282440185547
    },
    {
      "epoch": 0.000192449951171875,
      "model_forward_time": 0.1149892807006836,
      "step": 31531
    },
    {
      "epoch": 0.000192449951171875,
      "step": 31531,
      "training_step_time": 0.48630189895629883
    },
    {
      "epoch": 0.0001924560546875,
      "model_forward_time": 0.11450552940368652,
      "step": 31532
    },
    {
      "epoch": 0.0001924560546875,
      "step": 31532,
      "training_step_time": 0.49211978912353516
    },
    {
      "epoch": 0.000192462158203125,
      "model_forward_time": 0.11547374725341797,
      "step": 31533
    },
    {
      "epoch": 0.000192462158203125,
      "step": 31533,
      "training_step_time": 0.40780043601989746
    },
    {
      "epoch": 0.00019246826171875,
      "model_forward_time": 0.11465620994567871,
      "step": 31534
    },
    {
      "epoch": 0.00019246826171875,
      "step": 31534,
      "training_step_time": 0.3943769931793213
    },
    {
      "epoch": 0.000192474365234375,
      "model_forward_time": 0.1155095100402832,
      "step": 31535
    },
    {
      "epoch": 0.000192474365234375,
      "step": 31535,
      "training_step_time": 0.3877227306365967
    },
    {
      "epoch": 0.00019248046875,
      "model_forward_time": 0.11446666717529297,
      "step": 31536
    },
    {
      "epoch": 0.00019248046875,
      "step": 31536,
      "training_step_time": 0.3986234664916992
    },
    {
      "epoch": 0.000192486572265625,
      "model_forward_time": 0.11494636535644531,
      "step": 31537
    },
    {
      "epoch": 0.000192486572265625,
      "step": 31537,
      "training_step_time": 0.4040718078613281
    },
    {
      "epoch": 0.00019249267578125,
      "model_forward_time": 0.11479973793029785,
      "step": 31538
    },
    {
      "epoch": 0.00019249267578125,
      "step": 31538,
      "training_step_time": 0.39551591873168945
    },
    {
      "epoch": 0.000192498779296875,
      "model_forward_time": 0.11507272720336914,
      "step": 31539
    },
    {
      "epoch": 0.000192498779296875,
      "step": 31539,
      "training_step_time": 0.3858966827392578
    },
    {
      "epoch": 0.0001925048828125,
      "grad_norm": 0.08409582078456879,
      "learning_rate": 4.988976876811571e-05,
      "loss": 0.0432,
      "step": 31540
    },
    {
      "epoch": 0.0001925048828125,
      "model_forward_time": 0.11493849754333496,
      "step": 31540
    },
    {
      "epoch": 0.0001925048828125,
      "step": 31540,
      "training_step_time": 0.41472578048706055
    },
    {
      "epoch": 0.000192510986328125,
      "model_forward_time": 0.11547279357910156,
      "step": 31541
    },
    {
      "epoch": 0.000192510986328125,
      "step": 31541,
      "training_step_time": 0.39490437507629395
    },
    {
      "epoch": 0.00019251708984375,
      "model_forward_time": 0.11470842361450195,
      "step": 31542
    },
    {
      "epoch": 0.00019251708984375,
      "step": 31542,
      "training_step_time": 0.396892786026001
    },
    {
      "epoch": 0.000192523193359375,
      "model_forward_time": 0.11522316932678223,
      "step": 31543
    },
    {
      "epoch": 0.000192523193359375,
      "step": 31543,
      "training_step_time": 0.4750087261199951
    },
    {
      "epoch": 0.000192529296875,
      "model_forward_time": 0.11483097076416016,
      "step": 31544
    },
    {
      "epoch": 0.000192529296875,
      "step": 31544,
      "training_step_time": 0.4205012321472168
    },
    {
      "epoch": 0.000192535400390625,
      "model_forward_time": 0.11513876914978027,
      "step": 31545
    },
    {
      "epoch": 0.000192535400390625,
      "step": 31545,
      "training_step_time": 0.4236743450164795
    },
    {
      "epoch": 0.00019254150390625,
      "model_forward_time": 0.11423373222351074,
      "step": 31546
    },
    {
      "epoch": 0.00019254150390625,
      "step": 31546,
      "training_step_time": 0.539968729019165
    },
    {
      "epoch": 0.000192547607421875,
      "model_forward_time": 0.11489987373352051,
      "step": 31547
    },
    {
      "epoch": 0.000192547607421875,
      "step": 31547,
      "training_step_time": 0.44522714614868164
    },
    {
      "epoch": 0.0001925537109375,
      "model_forward_time": 0.11504745483398438,
      "step": 31548
    },
    {
      "epoch": 0.0001925537109375,
      "step": 31548,
      "training_step_time": 0.40633463859558105
    },
    {
      "epoch": 0.000192559814453125,
      "model_forward_time": 0.11406993865966797,
      "step": 31549
    },
    {
      "epoch": 0.000192559814453125,
      "step": 31549,
      "training_step_time": 0.395824670791626
    },
    {
      "epoch": 0.00019256591796875,
      "grad_norm": 0.10218343138694763,
      "learning_rate": 4.986221102292965e-05,
      "loss": 0.0385,
      "step": 31550
    },
    {
      "epoch": 0.00019256591796875,
      "model_forward_time": 0.11506938934326172,
      "step": 31550
    },
    {
      "epoch": 0.00019256591796875,
      "step": 31550,
      "training_step_time": 0.3851442337036133
    },
    {
      "epoch": 0.000192572021484375,
      "model_forward_time": 0.11462688446044922,
      "step": 31551
    },
    {
      "epoch": 0.000192572021484375,
      "step": 31551,
      "training_step_time": 0.38720250129699707
    },
    {
      "epoch": 0.000192578125,
      "model_forward_time": 0.1153111457824707,
      "step": 31552
    },
    {
      "epoch": 0.000192578125,
      "step": 31552,
      "training_step_time": 0.38856029510498047
    },
    {
      "epoch": 0.000192584228515625,
      "model_forward_time": 0.11507463455200195,
      "step": 31553
    },
    {
      "epoch": 0.000192584228515625,
      "step": 31553,
      "training_step_time": 0.39571666717529297
    },
    {
      "epoch": 0.00019259033203125,
      "model_forward_time": 0.1153411865234375,
      "step": 31554
    },
    {
      "epoch": 0.00019259033203125,
      "step": 31554,
      "training_step_time": 0.394392728805542
    },
    {
      "epoch": 0.000192596435546875,
      "model_forward_time": 0.11515998840332031,
      "step": 31555
    },
    {
      "epoch": 0.000192596435546875,
      "step": 31555,
      "training_step_time": 0.4005272388458252
    },
    {
      "epoch": 0.0001926025390625,
      "model_forward_time": 0.11568212509155273,
      "step": 31556
    },
    {
      "epoch": 0.0001926025390625,
      "step": 31556,
      "training_step_time": 0.48003458976745605
    },
    {
      "epoch": 0.000192608642578125,
      "model_forward_time": 0.11538124084472656,
      "step": 31557
    },
    {
      "epoch": 0.000192608642578125,
      "step": 31557,
      "training_step_time": 0.39026641845703125
    },
    {
      "epoch": 0.00019261474609375,
      "model_forward_time": 0.11476445198059082,
      "step": 31558
    },
    {
      "epoch": 0.00019261474609375,
      "step": 31558,
      "training_step_time": 0.47916531562805176
    },
    {
      "epoch": 0.000192620849609375,
      "model_forward_time": 0.1141977310180664,
      "step": 31559
    },
    {
      "epoch": 0.000192620849609375,
      "step": 31559,
      "training_step_time": 0.4447472095489502
    },
    {
      "epoch": 0.000192626953125,
      "grad_norm": 0.17789754271507263,
      "learning_rate": 4.9834653319600246e-05,
      "loss": 0.0473,
      "step": 31560
    },
    {
      "epoch": 0.000192626953125,
      "model_forward_time": 0.11645126342773438,
      "step": 31560
    },
    {
      "epoch": 0.000192626953125,
      "step": 31560,
      "training_step_time": 0.4527778625488281
    },
    {
      "epoch": 0.000192633056640625,
      "model_forward_time": 0.11476564407348633,
      "step": 31561
    },
    {
      "epoch": 0.000192633056640625,
      "step": 31561,
      "training_step_time": 0.4256401062011719
    },
    {
      "epoch": 0.00019263916015625,
      "model_forward_time": 0.11549615859985352,
      "step": 31562
    },
    {
      "epoch": 0.00019263916015625,
      "step": 31562,
      "training_step_time": 0.533052921295166
    },
    {
      "epoch": 0.000192645263671875,
      "model_forward_time": 0.11465215682983398,
      "step": 31563
    },
    {
      "epoch": 0.000192645263671875,
      "step": 31563,
      "training_step_time": 0.3818087577819824
    },
    {
      "epoch": 0.0001926513671875,
      "model_forward_time": 0.11539411544799805,
      "step": 31564
    },
    {
      "epoch": 0.0001926513671875,
      "step": 31564,
      "training_step_time": 0.49353957176208496
    },
    {
      "epoch": 0.000192657470703125,
      "model_forward_time": 0.11453437805175781,
      "step": 31565
    },
    {
      "epoch": 0.000192657470703125,
      "step": 31565,
      "training_step_time": 0.39414238929748535
    },
    {
      "epoch": 0.00019266357421875,
      "model_forward_time": 0.1147770881652832,
      "step": 31566
    },
    {
      "epoch": 0.00019266357421875,
      "step": 31566,
      "training_step_time": 0.3942899703979492
    },
    {
      "epoch": 0.000192669677734375,
      "model_forward_time": 0.11499786376953125,
      "step": 31567
    },
    {
      "epoch": 0.000192669677734375,
      "step": 31567,
      "training_step_time": 0.3927149772644043
    },
    {
      "epoch": 0.00019267578125,
      "model_forward_time": 0.11490464210510254,
      "step": 31568
    },
    {
      "epoch": 0.00019267578125,
      "step": 31568,
      "training_step_time": 0.6422567367553711
    },
    {
      "epoch": 0.000192681884765625,
      "model_forward_time": 0.11452794075012207,
      "step": 31569
    },
    {
      "epoch": 0.000192681884765625,
      "step": 31569,
      "training_step_time": 0.39277052879333496
    },
    {
      "epoch": 0.00019268798828125,
      "grad_norm": 0.1252693086862564,
      "learning_rate": 4.98070956664988e-05,
      "loss": 0.0472,
      "step": 31570
    },
    {
      "epoch": 0.00019268798828125,
      "model_forward_time": 0.11460304260253906,
      "step": 31570
    },
    {
      "epoch": 0.00019268798828125,
      "step": 31570,
      "training_step_time": 0.39488744735717773
    },
    {
      "epoch": 0.000192694091796875,
      "model_forward_time": 0.11509895324707031,
      "step": 31571
    },
    {
      "epoch": 0.000192694091796875,
      "step": 31571,
      "training_step_time": 0.3926844596862793
    },
    {
      "epoch": 0.0001927001953125,
      "model_forward_time": 0.11490249633789062,
      "step": 31572
    },
    {
      "epoch": 0.0001927001953125,
      "step": 31572,
      "training_step_time": 0.44649767875671387
    },
    {
      "epoch": 0.000192706298828125,
      "model_forward_time": 0.11699366569519043,
      "step": 31573
    },
    {
      "epoch": 0.000192706298828125,
      "step": 31573,
      "training_step_time": 0.46333789825439453
    },
    {
      "epoch": 0.00019271240234375,
      "model_forward_time": 0.11526131629943848,
      "step": 31574
    },
    {
      "epoch": 0.00019271240234375,
      "step": 31574,
      "training_step_time": 0.6948585510253906
    },
    {
      "epoch": 0.000192718505859375,
      "model_forward_time": 0.11462092399597168,
      "step": 31575
    },
    {
      "epoch": 0.000192718505859375,
      "step": 31575,
      "training_step_time": 0.4527921676635742
    },
    {
      "epoch": 0.000192724609375,
      "model_forward_time": 0.11467623710632324,
      "step": 31576
    },
    {
      "epoch": 0.000192724609375,
      "step": 31576,
      "training_step_time": 0.39274048805236816
    },
    {
      "epoch": 0.000192730712890625,
      "model_forward_time": 0.11440014839172363,
      "step": 31577
    },
    {
      "epoch": 0.000192730712890625,
      "step": 31577,
      "training_step_time": 0.38390445709228516
    },
    {
      "epoch": 0.00019273681640625,
      "model_forward_time": 0.11492490768432617,
      "step": 31578
    },
    {
      "epoch": 0.00019273681640625,
      "step": 31578,
      "training_step_time": 0.38385534286499023
    },
    {
      "epoch": 0.000192742919921875,
      "model_forward_time": 0.11455225944519043,
      "step": 31579
    },
    {
      "epoch": 0.000192742919921875,
      "step": 31579,
      "training_step_time": 0.3948521614074707
    },
    {
      "epoch": 0.0001927490234375,
      "grad_norm": 0.11486061662435532,
      "learning_rate": 4.97795380719966e-05,
      "loss": 0.0419,
      "step": 31580
    },
    {
      "epoch": 0.0001927490234375,
      "model_forward_time": 0.11457633972167969,
      "step": 31580
    },
    {
      "epoch": 0.0001927490234375,
      "step": 31580,
      "training_step_time": 0.4746091365814209
    },
    {
      "epoch": 0.000192755126953125,
      "model_forward_time": 0.11504507064819336,
      "step": 31581
    },
    {
      "epoch": 0.000192755126953125,
      "step": 31581,
      "training_step_time": 0.4100759029388428
    },
    {
      "epoch": 0.00019276123046875,
      "model_forward_time": 0.11467695236206055,
      "step": 31582
    },
    {
      "epoch": 0.00019276123046875,
      "step": 31582,
      "training_step_time": 0.3879268169403076
    },
    {
      "epoch": 0.000192767333984375,
      "model_forward_time": 0.11609673500061035,
      "step": 31583
    },
    {
      "epoch": 0.000192767333984375,
      "step": 31583,
      "training_step_time": 0.4029386043548584
    },
    {
      "epoch": 0.0001927734375,
      "model_forward_time": 0.11486649513244629,
      "step": 31584
    },
    {
      "epoch": 0.0001927734375,
      "step": 31584,
      "training_step_time": 0.38768649101257324
    },
    {
      "epoch": 0.000192779541015625,
      "model_forward_time": 0.11540627479553223,
      "step": 31585
    },
    {
      "epoch": 0.000192779541015625,
      "step": 31585,
      "training_step_time": 0.418088436126709
    },
    {
      "epoch": 0.00019278564453125,
      "model_forward_time": 0.11487054824829102,
      "step": 31586
    },
    {
      "epoch": 0.00019278564453125,
      "step": 31586,
      "training_step_time": 0.4774045944213867
    },
    {
      "epoch": 0.000192791748046875,
      "model_forward_time": 0.1162118911743164,
      "step": 31587
    },
    {
      "epoch": 0.000192791748046875,
      "step": 31587,
      "training_step_time": 0.5222041606903076
    },
    {
      "epoch": 0.0001927978515625,
      "model_forward_time": 0.11547565460205078,
      "step": 31588
    },
    {
      "epoch": 0.0001927978515625,
      "step": 31588,
      "training_step_time": 0.4279959201812744
    },
    {
      "epoch": 0.000192803955078125,
      "model_forward_time": 0.11457562446594238,
      "step": 31589
    },
    {
      "epoch": 0.000192803955078125,
      "step": 31589,
      "training_step_time": 0.48790788650512695
    },
    {
      "epoch": 0.00019281005859375,
      "grad_norm": 0.1326921135187149,
      "learning_rate": 4.975198054446492e-05,
      "loss": 0.0384,
      "step": 31590
    },
    {
      "epoch": 0.00019281005859375,
      "model_forward_time": 0.11471247673034668,
      "step": 31590
    },
    {
      "epoch": 0.00019281005859375,
      "step": 31590,
      "training_step_time": 0.39144468307495117
    },
    {
      "epoch": 0.000192816162109375,
      "model_forward_time": 0.11475586891174316,
      "step": 31591
    },
    {
      "epoch": 0.000192816162109375,
      "step": 31591,
      "training_step_time": 0.3902406692504883
    },
    {
      "epoch": 0.000192822265625,
      "model_forward_time": 0.11415362358093262,
      "step": 31592
    },
    {
      "epoch": 0.000192822265625,
      "step": 31592,
      "training_step_time": 0.39153170585632324
    },
    {
      "epoch": 0.000192828369140625,
      "model_forward_time": 0.11550784111022949,
      "step": 31593
    },
    {
      "epoch": 0.000192828369140625,
      "step": 31593,
      "training_step_time": 0.3925442695617676
    },
    {
      "epoch": 0.00019283447265625,
      "model_forward_time": 0.11492276191711426,
      "step": 31594
    },
    {
      "epoch": 0.00019283447265625,
      "step": 31594,
      "training_step_time": 0.38913798332214355
    },
    {
      "epoch": 0.000192840576171875,
      "model_forward_time": 0.11555171012878418,
      "step": 31595
    },
    {
      "epoch": 0.000192840576171875,
      "step": 31595,
      "training_step_time": 0.39720892906188965
    },
    {
      "epoch": 0.0001928466796875,
      "model_forward_time": 0.11569976806640625,
      "step": 31596
    },
    {
      "epoch": 0.0001928466796875,
      "step": 31596,
      "training_step_time": 0.40120649337768555
    },
    {
      "epoch": 0.000192852783203125,
      "model_forward_time": 0.11464524269104004,
      "step": 31597
    },
    {
      "epoch": 0.000192852783203125,
      "step": 31597,
      "training_step_time": 0.3963284492492676
    },
    {
      "epoch": 0.00019285888671875,
      "model_forward_time": 0.11506080627441406,
      "step": 31598
    },
    {
      "epoch": 0.00019285888671875,
      "step": 31598,
      "training_step_time": 0.38944363594055176
    },
    {
      "epoch": 0.000192864990234375,
      "model_forward_time": 0.11476325988769531,
      "step": 31599
    },
    {
      "epoch": 0.000192864990234375,
      "step": 31599,
      "training_step_time": 0.45604634284973145
    },
    {
      "epoch": 0.00019287109375,
      "grad_norm": 0.15975357592105865,
      "learning_rate": 4.972442309227498e-05,
      "loss": 0.0496,
      "step": 31600
    },
    {
      "epoch": 0.00019287109375,
      "model_forward_time": 0.11491775512695312,
      "step": 31600
    },
    {
      "epoch": 0.00019287109375,
      "step": 31600,
      "training_step_time": 0.47072839736938477
    },
    {
      "epoch": 0.000192877197265625,
      "model_forward_time": 0.11539697647094727,
      "step": 31601
    },
    {
      "epoch": 0.000192877197265625,
      "step": 31601,
      "training_step_time": 0.47849154472351074
    },
    {
      "epoch": 0.00019288330078125,
      "model_forward_time": 0.11551666259765625,
      "step": 31602
    },
    {
      "epoch": 0.00019288330078125,
      "step": 31602,
      "training_step_time": 0.41623640060424805
    },
    {
      "epoch": 0.000192889404296875,
      "model_forward_time": 0.11506533622741699,
      "step": 31603
    },
    {
      "epoch": 0.000192889404296875,
      "step": 31603,
      "training_step_time": 0.39626169204711914
    },
    {
      "epoch": 0.0001928955078125,
      "model_forward_time": 0.11559796333312988,
      "step": 31604
    },
    {
      "epoch": 0.0001928955078125,
      "step": 31604,
      "training_step_time": 0.5029425621032715
    },
    {
      "epoch": 0.000192901611328125,
      "model_forward_time": 0.11519622802734375,
      "step": 31605
    },
    {
      "epoch": 0.000192901611328125,
      "step": 31605,
      "training_step_time": 0.4035515785217285
    },
    {
      "epoch": 0.00019290771484375,
      "model_forward_time": 0.11429405212402344,
      "step": 31606
    },
    {
      "epoch": 0.00019290771484375,
      "step": 31606,
      "training_step_time": 0.3922576904296875
    },
    {
      "epoch": 0.000192913818359375,
      "model_forward_time": 0.11489534378051758,
      "step": 31607
    },
    {
      "epoch": 0.000192913818359375,
      "step": 31607,
      "training_step_time": 0.38213372230529785
    },
    {
      "epoch": 0.000192919921875,
      "model_forward_time": 0.11458325386047363,
      "step": 31608
    },
    {
      "epoch": 0.000192919921875,
      "step": 31608,
      "training_step_time": 0.38942646980285645
    },
    {
      "epoch": 0.000192926025390625,
      "model_forward_time": 0.11557841300964355,
      "step": 31609
    },
    {
      "epoch": 0.000192926025390625,
      "step": 31609,
      "training_step_time": 0.40291810035705566
    },
    {
      "epoch": 0.00019293212890625,
      "grad_norm": 0.13683223724365234,
      "learning_rate": 4.9696865723798035e-05,
      "loss": 0.0423,
      "step": 31610
    },
    {
      "epoch": 0.00019293212890625,
      "model_forward_time": 0.11506152153015137,
      "step": 31610
    },
    {
      "epoch": 0.00019293212890625,
      "step": 31610,
      "training_step_time": 0.39299821853637695
    },
    {
      "epoch": 0.000192938232421875,
      "model_forward_time": 0.11537718772888184,
      "step": 31611
    },
    {
      "epoch": 0.000192938232421875,
      "step": 31611,
      "training_step_time": 0.3947911262512207
    },
    {
      "epoch": 0.0001929443359375,
      "model_forward_time": 0.11569094657897949,
      "step": 31612
    },
    {
      "epoch": 0.0001929443359375,
      "step": 31612,
      "training_step_time": 0.7261710166931152
    },
    {
      "epoch": 0.000192950439453125,
      "model_forward_time": 0.11519885063171387,
      "step": 31613
    },
    {
      "epoch": 0.000192950439453125,
      "step": 31613,
      "training_step_time": 0.38622045516967773
    },
    {
      "epoch": 0.00019295654296875,
      "model_forward_time": 0.1150057315826416,
      "step": 31614
    },
    {
      "epoch": 0.00019295654296875,
      "step": 31614,
      "training_step_time": 0.4020195007324219
    },
    {
      "epoch": 0.000192962646484375,
      "model_forward_time": 0.11453509330749512,
      "step": 31615
    },
    {
      "epoch": 0.000192962646484375,
      "step": 31615,
      "training_step_time": 0.46563291549682617
    },
    {
      "epoch": 0.00019296875,
      "model_forward_time": 0.11466860771179199,
      "step": 31616
    },
    {
      "epoch": 0.00019296875,
      "step": 31616,
      "training_step_time": 0.36631298065185547
    },
    {
      "epoch": 0.000192974853515625,
      "model_forward_time": 0.11446690559387207,
      "step": 31617
    },
    {
      "epoch": 0.000192974853515625,
      "step": 31617,
      "training_step_time": 0.4339563846588135
    },
    {
      "epoch": 0.00019298095703125,
      "model_forward_time": 0.11502647399902344,
      "step": 31618
    },
    {
      "epoch": 0.00019298095703125,
      "step": 31618,
      "training_step_time": 0.5924923419952393
    },
    {
      "epoch": 0.000192987060546875,
      "model_forward_time": 0.11481952667236328,
      "step": 31619
    },
    {
      "epoch": 0.000192987060546875,
      "step": 31619,
      "training_step_time": 0.36905765533447266
    },
    {
      "epoch": 0.0001929931640625,
      "grad_norm": 0.09972502291202545,
      "learning_rate": 4.96693084474053e-05,
      "loss": 0.0363,
      "step": 31620
    },
    {
      "epoch": 0.0001929931640625,
      "model_forward_time": 0.11387991905212402,
      "step": 31620
    },
    {
      "epoch": 0.0001929931640625,
      "step": 31620,
      "training_step_time": 0.38953351974487305
    },
    {
      "epoch": 0.000192999267578125,
      "model_forward_time": 0.11468219757080078,
      "step": 31621
    },
    {
      "epoch": 0.000192999267578125,
      "step": 31621,
      "training_step_time": 0.3964729309082031
    },
    {
      "epoch": 0.00019300537109375,
      "model_forward_time": 0.11501717567443848,
      "step": 31622
    },
    {
      "epoch": 0.00019300537109375,
      "step": 31622,
      "training_step_time": 0.38939714431762695
    },
    {
      "epoch": 0.000193011474609375,
      "model_forward_time": 0.11503100395202637,
      "step": 31623
    },
    {
      "epoch": 0.000193011474609375,
      "step": 31623,
      "training_step_time": 0.38422322273254395
    },
    {
      "epoch": 0.000193017578125,
      "model_forward_time": 0.1148843765258789,
      "step": 31624
    },
    {
      "epoch": 0.000193017578125,
      "step": 31624,
      "training_step_time": 0.7187883853912354
    },
    {
      "epoch": 0.000193023681640625,
      "model_forward_time": 0.11430144309997559,
      "step": 31625
    },
    {
      "epoch": 0.000193023681640625,
      "step": 31625,
      "training_step_time": 0.38879871368408203
    },
    {
      "epoch": 0.00019302978515625,
      "model_forward_time": 0.11463046073913574,
      "step": 31626
    },
    {
      "epoch": 0.00019302978515625,
      "step": 31626,
      "training_step_time": 0.4692509174346924
    },
    {
      "epoch": 0.000193035888671875,
      "model_forward_time": 0.11453080177307129,
      "step": 31627
    },
    {
      "epoch": 0.000193035888671875,
      "step": 31627,
      "training_step_time": 0.5008432865142822
    },
    {
      "epoch": 0.0001930419921875,
      "model_forward_time": 0.11410951614379883,
      "step": 31628
    },
    {
      "epoch": 0.0001930419921875,
      "step": 31628,
      "training_step_time": 0.4202601909637451
    },
    {
      "epoch": 0.000193048095703125,
      "model_forward_time": 0.11453914642333984,
      "step": 31629
    },
    {
      "epoch": 0.000193048095703125,
      "step": 31629,
      "training_step_time": 0.42943525314331055
    },
    {
      "epoch": 0.00019305419921875,
      "grad_norm": 0.16812077164649963,
      "learning_rate": 4.96417512714679e-05,
      "loss": 0.0404,
      "step": 31630
    },
    {
      "epoch": 0.00019305419921875,
      "model_forward_time": 0.11551713943481445,
      "step": 31630
    },
    {
      "epoch": 0.00019305419921875,
      "step": 31630,
      "training_step_time": 0.5263111591339111
    },
    {
      "epoch": 0.000193060302734375,
      "model_forward_time": 0.11449885368347168,
      "step": 31631
    },
    {
      "epoch": 0.000193060302734375,
      "step": 31631,
      "training_step_time": 0.4499473571777344
    },
    {
      "epoch": 0.00019306640625,
      "model_forward_time": 0.11476874351501465,
      "step": 31632
    },
    {
      "epoch": 0.00019306640625,
      "step": 31632,
      "training_step_time": 0.3904101848602295
    },
    {
      "epoch": 0.000193072509765625,
      "model_forward_time": 0.11475038528442383,
      "step": 31633
    },
    {
      "epoch": 0.000193072509765625,
      "step": 31633,
      "training_step_time": 0.39458370208740234
    },
    {
      "epoch": 0.00019307861328125,
      "model_forward_time": 0.11578202247619629,
      "step": 31634
    },
    {
      "epoch": 0.00019307861328125,
      "step": 31634,
      "training_step_time": 0.3940873146057129
    },
    {
      "epoch": 0.000193084716796875,
      "model_forward_time": 0.11477446556091309,
      "step": 31635
    },
    {
      "epoch": 0.000193084716796875,
      "step": 31635,
      "training_step_time": 0.38587188720703125
    },
    {
      "epoch": 0.0001930908203125,
      "model_forward_time": 0.11527252197265625,
      "step": 31636
    },
    {
      "epoch": 0.0001930908203125,
      "step": 31636,
      "training_step_time": 0.6300806999206543
    },
    {
      "epoch": 0.000193096923828125,
      "model_forward_time": 0.11458539962768555,
      "step": 31637
    },
    {
      "epoch": 0.000193096923828125,
      "step": 31637,
      "training_step_time": 0.4150078296661377
    },
    {
      "epoch": 0.00019310302734375,
      "model_forward_time": 0.11481690406799316,
      "step": 31638
    },
    {
      "epoch": 0.00019310302734375,
      "step": 31638,
      "training_step_time": 0.4563934803009033
    },
    {
      "epoch": 0.000193109130859375,
      "model_forward_time": 0.11529779434204102,
      "step": 31639
    },
    {
      "epoch": 0.000193109130859375,
      "step": 31639,
      "training_step_time": 0.39743947982788086
    },
    {
      "epoch": 0.000193115234375,
      "grad_norm": 0.1341429352760315,
      "learning_rate": 4.961419420435703e-05,
      "loss": 0.0426,
      "step": 31640
    },
    {
      "epoch": 0.000193115234375,
      "model_forward_time": 0.11480069160461426,
      "step": 31640
    },
    {
      "epoch": 0.000193115234375,
      "step": 31640,
      "training_step_time": 0.43947529792785645
    },
    {
      "epoch": 0.000193121337890625,
      "model_forward_time": 0.11516141891479492,
      "step": 31641
    },
    {
      "epoch": 0.000193121337890625,
      "step": 31641,
      "training_step_time": 0.38565659523010254
    },
    {
      "epoch": 0.00019312744140625,
      "model_forward_time": 0.11520719528198242,
      "step": 31642
    },
    {
      "epoch": 0.00019312744140625,
      "step": 31642,
      "training_step_time": 0.5214114189147949
    },
    {
      "epoch": 0.000193133544921875,
      "model_forward_time": 0.11599898338317871,
      "step": 31643
    },
    {
      "epoch": 0.000193133544921875,
      "step": 31643,
      "training_step_time": 0.3902597427368164
    },
    {
      "epoch": 0.0001931396484375,
      "model_forward_time": 0.11536478996276855,
      "step": 31644
    },
    {
      "epoch": 0.0001931396484375,
      "step": 31644,
      "training_step_time": 0.42961692810058594
    },
    {
      "epoch": 0.000193145751953125,
      "model_forward_time": 0.11544418334960938,
      "step": 31645
    },
    {
      "epoch": 0.000193145751953125,
      "step": 31645,
      "training_step_time": 0.4823892116546631
    },
    {
      "epoch": 0.00019315185546875,
      "model_forward_time": 0.1150522232055664,
      "step": 31646
    },
    {
      "epoch": 0.00019315185546875,
      "step": 31646,
      "training_step_time": 0.38031697273254395
    },
    {
      "epoch": 0.000193157958984375,
      "model_forward_time": 0.11506867408752441,
      "step": 31647
    },
    {
      "epoch": 0.000193157958984375,
      "step": 31647,
      "training_step_time": 0.378772497177124
    },
    {
      "epoch": 0.0001931640625,
      "model_forward_time": 0.11513948440551758,
      "step": 31648
    },
    {
      "epoch": 0.0001931640625,
      "step": 31648,
      "training_step_time": 0.37886905670166016
    },
    {
      "epoch": 0.000193170166015625,
      "model_forward_time": 0.11561274528503418,
      "step": 31649
    },
    {
      "epoch": 0.000193170166015625,
      "step": 31649,
      "training_step_time": 0.3854649066925049
    },
    {
      "epoch": 0.00019317626953125,
      "grad_norm": 0.12421392649412155,
      "learning_rate": 4.9586637254443756e-05,
      "loss": 0.0381,
      "step": 31650
    },
    {
      "epoch": 0.00019317626953125,
      "model_forward_time": 0.11599564552307129,
      "step": 31650
    },
    {
      "epoch": 0.00019317626953125,
      "step": 31650,
      "training_step_time": 0.4055061340332031
    },
    {
      "epoch": 0.000193182373046875,
      "model_forward_time": 0.11491847038269043,
      "step": 31651
    },
    {
      "epoch": 0.000193182373046875,
      "step": 31651,
      "training_step_time": 0.4266476631164551
    },
    {
      "epoch": 0.0001931884765625,
      "model_forward_time": 0.11464691162109375,
      "step": 31652
    },
    {
      "epoch": 0.0001931884765625,
      "step": 31652,
      "training_step_time": 0.4093499183654785
    },
    {
      "epoch": 0.000193194580078125,
      "model_forward_time": 0.11531472206115723,
      "step": 31653
    },
    {
      "epoch": 0.000193194580078125,
      "step": 31653,
      "training_step_time": 0.3897740840911865
    },
    {
      "epoch": 0.00019320068359375,
      "model_forward_time": 0.1153411865234375,
      "step": 31654
    },
    {
      "epoch": 0.00019320068359375,
      "step": 31654,
      "training_step_time": 0.506871223449707
    },
    {
      "epoch": 0.000193206787109375,
      "model_forward_time": 0.11455988883972168,
      "step": 31655
    },
    {
      "epoch": 0.000193206787109375,
      "step": 31655,
      "training_step_time": 0.46894216537475586
    },
    {
      "epoch": 0.000193212890625,
      "model_forward_time": 0.11523818969726562,
      "step": 31656
    },
    {
      "epoch": 0.000193212890625,
      "step": 31656,
      "training_step_time": 0.3985786437988281
    },
    {
      "epoch": 0.000193218994140625,
      "model_forward_time": 0.11551713943481445,
      "step": 31657
    },
    {
      "epoch": 0.000193218994140625,
      "step": 31657,
      "training_step_time": 0.5109970569610596
    },
    {
      "epoch": 0.00019322509765625,
      "model_forward_time": 0.11447501182556152,
      "step": 31658
    },
    {
      "epoch": 0.00019322509765625,
      "step": 31658,
      "training_step_time": 0.4320826530456543
    },
    {
      "epoch": 0.000193231201171875,
      "model_forward_time": 0.11479878425598145,
      "step": 31659
    },
    {
      "epoch": 0.000193231201171875,
      "step": 31659,
      "training_step_time": 0.4749419689178467
    },
    {
      "epoch": 0.0001932373046875,
      "grad_norm": 0.12412121146917343,
      "learning_rate": 4.955908043009917e-05,
      "loss": 0.0417,
      "step": 31660
    },
    {
      "epoch": 0.0001932373046875,
      "model_forward_time": 0.11475896835327148,
      "step": 31660
    },
    {
      "epoch": 0.0001932373046875,
      "step": 31660,
      "training_step_time": 0.3887321949005127
    },
    {
      "epoch": 0.000193243408203125,
      "model_forward_time": 0.11540341377258301,
      "step": 31661
    },
    {
      "epoch": 0.000193243408203125,
      "step": 31661,
      "training_step_time": 0.3927273750305176
    },
    {
      "epoch": 0.00019324951171875,
      "model_forward_time": 0.11489295959472656,
      "step": 31662
    },
    {
      "epoch": 0.00019324951171875,
      "step": 31662,
      "training_step_time": 0.39410853385925293
    },
    {
      "epoch": 0.000193255615234375,
      "model_forward_time": 0.11522269248962402,
      "step": 31663
    },
    {
      "epoch": 0.000193255615234375,
      "step": 31663,
      "training_step_time": 0.38936829566955566
    },
    {
      "epoch": 0.00019326171875,
      "model_forward_time": 0.11454939842224121,
      "step": 31664
    },
    {
      "epoch": 0.00019326171875,
      "step": 31664,
      "training_step_time": 0.3951742649078369
    },
    {
      "epoch": 0.000193267822265625,
      "model_forward_time": 0.11525464057922363,
      "step": 31665
    },
    {
      "epoch": 0.000193267822265625,
      "step": 31665,
      "training_step_time": 0.40155625343322754
    },
    {
      "epoch": 0.00019327392578125,
      "model_forward_time": 0.11566758155822754,
      "step": 31666
    },
    {
      "epoch": 0.00019327392578125,
      "step": 31666,
      "training_step_time": 0.3954951763153076
    },
    {
      "epoch": 0.000193280029296875,
      "model_forward_time": 0.11494755744934082,
      "step": 31667
    },
    {
      "epoch": 0.000193280029296875,
      "step": 31667,
      "training_step_time": 0.3864424228668213
    },
    {
      "epoch": 0.0001932861328125,
      "model_forward_time": 0.11545038223266602,
      "step": 31668
    },
    {
      "epoch": 0.0001932861328125,
      "step": 31668,
      "training_step_time": 0.3986215591430664
    },
    {
      "epoch": 0.000193292236328125,
      "model_forward_time": 0.11566805839538574,
      "step": 31669
    },
    {
      "epoch": 0.000193292236328125,
      "step": 31669,
      "training_step_time": 0.4033317565917969
    },
    {
      "epoch": 0.00019329833984375,
      "grad_norm": 0.15425676107406616,
      "learning_rate": 4.9531523739694325e-05,
      "loss": 0.0469,
      "step": 31670
    },
    {
      "epoch": 0.00019329833984375,
      "model_forward_time": 0.11500239372253418,
      "step": 31670
    },
    {
      "epoch": 0.00019329833984375,
      "step": 31670,
      "training_step_time": 0.47748541831970215
    },
    {
      "epoch": 0.000193304443359375,
      "model_forward_time": 0.11514878273010254,
      "step": 31671
    },
    {
      "epoch": 0.000193304443359375,
      "step": 31671,
      "training_step_time": 0.4159665107727051
    },
    {
      "epoch": 0.000193310546875,
      "model_forward_time": 0.11513376235961914,
      "step": 31672
    },
    {
      "epoch": 0.000193310546875,
      "step": 31672,
      "training_step_time": 0.42791247367858887
    },
    {
      "epoch": 0.000193316650390625,
      "model_forward_time": 0.11564850807189941,
      "step": 31673
    },
    {
      "epoch": 0.000193316650390625,
      "step": 31673,
      "training_step_time": 0.48329734802246094
    },
    {
      "epoch": 0.00019332275390625,
      "model_forward_time": 0.11539840698242188,
      "step": 31674
    },
    {
      "epoch": 0.00019332275390625,
      "step": 31674,
      "training_step_time": 0.4369525909423828
    },
    {
      "epoch": 0.000193328857421875,
      "model_forward_time": 0.11516690254211426,
      "step": 31675
    },
    {
      "epoch": 0.000193328857421875,
      "step": 31675,
      "training_step_time": 0.39745306968688965
    },
    {
      "epoch": 0.0001933349609375,
      "model_forward_time": 0.1149742603302002,
      "step": 31676
    },
    {
      "epoch": 0.0001933349609375,
      "step": 31676,
      "training_step_time": 0.39743494987487793
    },
    {
      "epoch": 0.000193341064453125,
      "model_forward_time": 0.11570429801940918,
      "step": 31677
    },
    {
      "epoch": 0.000193341064453125,
      "step": 31677,
      "training_step_time": 0.40109992027282715
    },
    {
      "epoch": 0.00019334716796875,
      "model_forward_time": 0.1148228645324707,
      "step": 31678
    },
    {
      "epoch": 0.00019334716796875,
      "step": 31678,
      "training_step_time": 0.4232816696166992
    },
    {
      "epoch": 0.000193353271484375,
      "model_forward_time": 0.11515164375305176,
      "step": 31679
    },
    {
      "epoch": 0.000193353271484375,
      "step": 31679,
      "training_step_time": 0.4172401428222656
    },
    {
      "epoch": 0.000193359375,
      "grad_norm": 0.11207219213247299,
      "learning_rate": 4.950396719160018e-05,
      "loss": 0.036,
      "step": 31680
    },
    {
      "epoch": 0.000193359375,
      "model_forward_time": 0.11610221862792969,
      "step": 31680
    },
    {
      "epoch": 0.000193359375,
      "step": 31680,
      "training_step_time": 0.40297698974609375
    },
    {
      "epoch": 0.000193365478515625,
      "model_forward_time": 0.11548113822937012,
      "step": 31681
    },
    {
      "epoch": 0.000193365478515625,
      "step": 31681,
      "training_step_time": 0.4135289192199707
    },
    {
      "epoch": 0.00019337158203125,
      "model_forward_time": 0.11548733711242676,
      "step": 31682
    },
    {
      "epoch": 0.00019337158203125,
      "step": 31682,
      "training_step_time": 0.3884453773498535
    },
    {
      "epoch": 0.000193377685546875,
      "model_forward_time": 0.11629056930541992,
      "step": 31683
    },
    {
      "epoch": 0.000193377685546875,
      "step": 31683,
      "training_step_time": 0.39536023139953613
    },
    {
      "epoch": 0.0001933837890625,
      "model_forward_time": 0.11455297470092773,
      "step": 31684
    },
    {
      "epoch": 0.0001933837890625,
      "step": 31684,
      "training_step_time": 0.39163756370544434
    },
    {
      "epoch": 0.000193389892578125,
      "model_forward_time": 0.1150503158569336,
      "step": 31685
    },
    {
      "epoch": 0.000193389892578125,
      "step": 31685,
      "training_step_time": 0.4618990421295166
    },
    {
      "epoch": 0.00019339599609375,
      "model_forward_time": 0.11549830436706543,
      "step": 31686
    },
    {
      "epoch": 0.00019339599609375,
      "step": 31686,
      "training_step_time": 0.4915885925292969
    },
    {
      "epoch": 0.000193402099609375,
      "model_forward_time": 0.11555719375610352,
      "step": 31687
    },
    {
      "epoch": 0.000193402099609375,
      "step": 31687,
      "training_step_time": 0.4055297374725342
    },
    {
      "epoch": 0.000193408203125,
      "model_forward_time": 0.1152346134185791,
      "step": 31688
    },
    {
      "epoch": 0.000193408203125,
      "step": 31688,
      "training_step_time": 0.49698543548583984
    },
    {
      "epoch": 0.000193414306640625,
      "model_forward_time": 0.11531662940979004,
      "step": 31689
    },
    {
      "epoch": 0.000193414306640625,
      "step": 31689,
      "training_step_time": 0.4288933277130127
    },
    {
      "epoch": 0.00019342041015625,
      "grad_norm": 0.1442069709300995,
      "learning_rate": 4.947641079418773e-05,
      "loss": 0.0469,
      "step": 31690
    },
    {
      "epoch": 0.00019342041015625,
      "model_forward_time": 0.11625552177429199,
      "step": 31690
    },
    {
      "epoch": 0.00019342041015625,
      "step": 31690,
      "training_step_time": 0.38919782638549805
    },
    {
      "epoch": 0.000193426513671875,
      "model_forward_time": 0.11474275588989258,
      "step": 31691
    },
    {
      "epoch": 0.000193426513671875,
      "step": 31691,
      "training_step_time": 0.38495326042175293
    },
    {
      "epoch": 0.0001934326171875,
      "model_forward_time": 0.11519932746887207,
      "step": 31692
    },
    {
      "epoch": 0.0001934326171875,
      "step": 31692,
      "training_step_time": 0.4070465564727783
    },
    {
      "epoch": 0.000193438720703125,
      "model_forward_time": 0.11498785018920898,
      "step": 31693
    },
    {
      "epoch": 0.000193438720703125,
      "step": 31693,
      "training_step_time": 0.46387505531311035
    },
    {
      "epoch": 0.00019344482421875,
      "model_forward_time": 0.11520743370056152,
      "step": 31694
    },
    {
      "epoch": 0.00019344482421875,
      "step": 31694,
      "training_step_time": 0.39297962188720703
    },
    {
      "epoch": 0.000193450927734375,
      "model_forward_time": 0.11599111557006836,
      "step": 31695
    },
    {
      "epoch": 0.000193450927734375,
      "step": 31695,
      "training_step_time": 0.3921844959259033
    },
    {
      "epoch": 0.00019345703125,
      "model_forward_time": 0.11572384834289551,
      "step": 31696
    },
    {
      "epoch": 0.00019345703125,
      "step": 31696,
      "training_step_time": 0.887676477432251
    },
    {
      "epoch": 0.000193463134765625,
      "model_forward_time": 0.11487507820129395,
      "step": 31697
    },
    {
      "epoch": 0.000193463134765625,
      "step": 31697,
      "training_step_time": 0.38303184509277344
    },
    {
      "epoch": 0.00019346923828125,
      "model_forward_time": 0.11408042907714844,
      "step": 31698
    },
    {
      "epoch": 0.00019346923828125,
      "step": 31698,
      "training_step_time": 0.4664733409881592
    },
    {
      "epoch": 0.000193475341796875,
      "model_forward_time": 0.1145317554473877,
      "step": 31699
    },
    {
      "epoch": 0.000193475341796875,
      "step": 31699,
      "training_step_time": 0.4442715644836426
    },
    {
      "epoch": 0.0001934814453125,
      "grad_norm": 0.10345295816659927,
      "learning_rate": 4.9448854555827825e-05,
      "loss": 0.0406,
      "step": 31700
    },
    {
      "epoch": 0.0001934814453125,
      "model_forward_time": 0.11433792114257812,
      "step": 31700
    },
    {
      "epoch": 0.0001934814453125,
      "step": 31700,
      "training_step_time": 0.38465237617492676
    },
    {
      "epoch": 0.000193487548828125,
      "model_forward_time": 0.11573123931884766,
      "step": 31701
    },
    {
      "epoch": 0.000193487548828125,
      "step": 31701,
      "training_step_time": 0.49510812759399414
    },
    {
      "epoch": 0.00019349365234375,
      "model_forward_time": 0.11524653434753418,
      "step": 31702
    },
    {
      "epoch": 0.00019349365234375,
      "step": 31702,
      "training_step_time": 0.5758769512176514
    },
    {
      "epoch": 0.000193499755859375,
      "model_forward_time": 0.11498641967773438,
      "step": 31703
    },
    {
      "epoch": 0.000193499755859375,
      "step": 31703,
      "training_step_time": 0.395388126373291
    },
    {
      "epoch": 0.000193505859375,
      "model_forward_time": 0.11549806594848633,
      "step": 31704
    },
    {
      "epoch": 0.000193505859375,
      "step": 31704,
      "training_step_time": 0.39008522033691406
    },
    {
      "epoch": 0.000193511962890625,
      "model_forward_time": 0.11471843719482422,
      "step": 31705
    },
    {
      "epoch": 0.000193511962890625,
      "step": 31705,
      "training_step_time": 0.39004039764404297
    },
    {
      "epoch": 0.00019351806640625,
      "model_forward_time": 0.11527585983276367,
      "step": 31706
    },
    {
      "epoch": 0.00019351806640625,
      "step": 31706,
      "training_step_time": 0.38603949546813965
    },
    {
      "epoch": 0.000193524169921875,
      "model_forward_time": 0.11505532264709473,
      "step": 31707
    },
    {
      "epoch": 0.000193524169921875,
      "step": 31707,
      "training_step_time": 0.4029233455657959
    },
    {
      "epoch": 0.0001935302734375,
      "model_forward_time": 0.11548399925231934,
      "step": 31708
    },
    {
      "epoch": 0.0001935302734375,
      "step": 31708,
      "training_step_time": 0.931128740310669
    },
    {
      "epoch": 0.000193536376953125,
      "model_forward_time": 0.11423230171203613,
      "step": 31709
    },
    {
      "epoch": 0.000193536376953125,
      "step": 31709,
      "training_step_time": 0.38468170166015625
    },
    {
      "epoch": 0.00019354248046875,
      "grad_norm": 0.12240307033061981,
      "learning_rate": 4.942129848489137e-05,
      "loss": 0.0423,
      "step": 31710
    },
    {
      "epoch": 0.00019354248046875,
      "model_forward_time": 0.11414861679077148,
      "step": 31710
    },
    {
      "epoch": 0.00019354248046875,
      "step": 31710,
      "training_step_time": 0.38986635208129883
    },
    {
      "epoch": 0.000193548583984375,
      "model_forward_time": 0.11413931846618652,
      "step": 31711
    },
    {
      "epoch": 0.000193548583984375,
      "step": 31711,
      "training_step_time": 0.46403002738952637
    },
    {
      "epoch": 0.0001935546875,
      "model_forward_time": 0.11417984962463379,
      "step": 31712
    },
    {
      "epoch": 0.0001935546875,
      "step": 31712,
      "training_step_time": 0.4387500286102295
    },
    {
      "epoch": 0.000193560791015625,
      "model_forward_time": 0.11437749862670898,
      "step": 31713
    },
    {
      "epoch": 0.000193560791015625,
      "step": 31713,
      "training_step_time": 0.448427677154541
    },
    {
      "epoch": 0.00019356689453125,
      "model_forward_time": 0.11499714851379395,
      "step": 31714
    },
    {
      "epoch": 0.00019356689453125,
      "step": 31714,
      "training_step_time": 0.7052655220031738
    },
    {
      "epoch": 0.000193572998046875,
      "model_forward_time": 0.11463689804077148,
      "step": 31715
    },
    {
      "epoch": 0.000193572998046875,
      "step": 31715,
      "training_step_time": 0.4197533130645752
    },
    {
      "epoch": 0.0001935791015625,
      "model_forward_time": 0.11484813690185547,
      "step": 31716
    },
    {
      "epoch": 0.0001935791015625,
      "step": 31716,
      "training_step_time": 0.3872029781341553
    },
    {
      "epoch": 0.000193585205078125,
      "model_forward_time": 0.11445879936218262,
      "step": 31717
    },
    {
      "epoch": 0.000193585205078125,
      "step": 31717,
      "training_step_time": 0.44352245330810547
    },
    {
      "epoch": 0.00019359130859375,
      "model_forward_time": 0.11530590057373047,
      "step": 31718
    },
    {
      "epoch": 0.00019359130859375,
      "step": 31718,
      "training_step_time": 0.3749830722808838
    },
    {
      "epoch": 0.000193597412109375,
      "model_forward_time": 0.11508703231811523,
      "step": 31719
    },
    {
      "epoch": 0.000193597412109375,
      "step": 31719,
      "training_step_time": 0.3899722099304199
    },
    {
      "epoch": 0.000193603515625,
      "grad_norm": 0.09533485770225525,
      "learning_rate": 4.9393742589749145e-05,
      "loss": 0.0459,
      "step": 31720
    },
    {
      "epoch": 0.000193603515625,
      "model_forward_time": 0.11529254913330078,
      "step": 31720
    },
    {
      "epoch": 0.000193603515625,
      "step": 31720,
      "training_step_time": 0.9360971450805664
    },
    {
      "epoch": 0.000193609619140625,
      "model_forward_time": 0.11430621147155762,
      "step": 31721
    },
    {
      "epoch": 0.000193609619140625,
      "step": 31721,
      "training_step_time": 0.37900638580322266
    },
    {
      "epoch": 0.00019361572265625,
      "model_forward_time": 0.11424374580383301,
      "step": 31722
    },
    {
      "epoch": 0.00019361572265625,
      "step": 31722,
      "training_step_time": 0.39951443672180176
    },
    {
      "epoch": 0.000193621826171875,
      "model_forward_time": 0.1150813102722168,
      "step": 31723
    },
    {
      "epoch": 0.000193621826171875,
      "step": 31723,
      "training_step_time": 0.3907439708709717
    },
    {
      "epoch": 0.0001936279296875,
      "model_forward_time": 0.11414456367492676,
      "step": 31724
    },
    {
      "epoch": 0.0001936279296875,
      "step": 31724,
      "training_step_time": 0.3935835361480713
    },
    {
      "epoch": 0.000193634033203125,
      "model_forward_time": 0.11446142196655273,
      "step": 31725
    },
    {
      "epoch": 0.000193634033203125,
      "step": 31725,
      "training_step_time": 0.46719884872436523
    },
    {
      "epoch": 0.00019364013671875,
      "model_forward_time": 0.11495590209960938,
      "step": 31726
    },
    {
      "epoch": 0.00019364013671875,
      "step": 31726,
      "training_step_time": 0.5630671977996826
    },
    {
      "epoch": 0.000193646240234375,
      "model_forward_time": 0.11500048637390137,
      "step": 31727
    },
    {
      "epoch": 0.000193646240234375,
      "step": 31727,
      "training_step_time": 0.45546841621398926
    },
    {
      "epoch": 0.00019365234375,
      "model_forward_time": 0.11489748954772949,
      "step": 31728
    },
    {
      "epoch": 0.00019365234375,
      "step": 31728,
      "training_step_time": 0.4807009696960449
    },
    {
      "epoch": 0.000193658447265625,
      "model_forward_time": 0.1143488883972168,
      "step": 31729
    },
    {
      "epoch": 0.000193658447265625,
      "step": 31729,
      "training_step_time": 0.40503859519958496
    },
    {
      "epoch": 0.00019366455078125,
      "grad_norm": 0.13082128763198853,
      "learning_rate": 4.9366186878771926e-05,
      "loss": 0.0428,
      "step": 31730
    },
    {
      "epoch": 0.00019366455078125,
      "model_forward_time": 0.11417675018310547,
      "step": 31730
    },
    {
      "epoch": 0.00019366455078125,
      "step": 31730,
      "training_step_time": 0.44249391555786133
    },
    {
      "epoch": 0.000193670654296875,
      "model_forward_time": 0.11454892158508301,
      "step": 31731
    },
    {
      "epoch": 0.000193670654296875,
      "step": 31731,
      "training_step_time": 0.39525318145751953
    },
    {
      "epoch": 0.0001936767578125,
      "model_forward_time": 0.11540412902832031,
      "step": 31732
    },
    {
      "epoch": 0.0001936767578125,
      "step": 31732,
      "training_step_time": 0.39257216453552246
    },
    {
      "epoch": 0.000193682861328125,
      "model_forward_time": 0.11477041244506836,
      "step": 31733
    },
    {
      "epoch": 0.000193682861328125,
      "step": 31733,
      "training_step_time": 0.38803887367248535
    },
    {
      "epoch": 0.00019368896484375,
      "model_forward_time": 0.1147606372833252,
      "step": 31734
    },
    {
      "epoch": 0.00019368896484375,
      "step": 31734,
      "training_step_time": 0.3996090888977051
    },
    {
      "epoch": 0.000193695068359375,
      "model_forward_time": 0.11539316177368164,
      "step": 31735
    },
    {
      "epoch": 0.000193695068359375,
      "step": 31735,
      "training_step_time": 0.39721131324768066
    },
    {
      "epoch": 0.000193701171875,
      "model_forward_time": 0.11522293090820312,
      "step": 31736
    },
    {
      "epoch": 0.000193701171875,
      "step": 31736,
      "training_step_time": 0.4093492031097412
    },
    {
      "epoch": 0.000193707275390625,
      "model_forward_time": 0.11557650566101074,
      "step": 31737
    },
    {
      "epoch": 0.000193707275390625,
      "step": 31737,
      "training_step_time": 0.39476847648620605
    },
    {
      "epoch": 0.00019371337890625,
      "model_forward_time": 0.11562156677246094,
      "step": 31738
    },
    {
      "epoch": 0.00019371337890625,
      "step": 31738,
      "training_step_time": 0.5605404376983643
    },
    {
      "epoch": 0.000193719482421875,
      "model_forward_time": 0.11510181427001953,
      "step": 31739
    },
    {
      "epoch": 0.000193719482421875,
      "step": 31739,
      "training_step_time": 0.5057649612426758
    },
    {
      "epoch": 0.0001937255859375,
      "grad_norm": 0.17473402619361877,
      "learning_rate": 4.93386313603304e-05,
      "loss": 0.0409,
      "step": 31740
    },
    {
      "epoch": 0.0001937255859375,
      "model_forward_time": 0.11489152908325195,
      "step": 31740
    },
    {
      "epoch": 0.0001937255859375,
      "step": 31740,
      "training_step_time": 0.5067451000213623
    },
    {
      "epoch": 0.000193731689453125,
      "model_forward_time": 0.11506104469299316,
      "step": 31741
    },
    {
      "epoch": 0.000193731689453125,
      "step": 31741,
      "training_step_time": 0.4663987159729004
    },
    {
      "epoch": 0.00019373779296875,
      "model_forward_time": 0.11446619033813477,
      "step": 31742
    },
    {
      "epoch": 0.00019373779296875,
      "step": 31742,
      "training_step_time": 0.49128031730651855
    },
    {
      "epoch": 0.000193743896484375,
      "model_forward_time": 0.11530780792236328,
      "step": 31743
    },
    {
      "epoch": 0.000193743896484375,
      "step": 31743,
      "training_step_time": 0.4055781364440918
    },
    {
      "epoch": 0.00019375,
      "model_forward_time": 0.11495566368103027,
      "step": 31744
    },
    {
      "epoch": 0.00019375,
      "step": 31744,
      "training_step_time": 0.6262350082397461
    },
    {
      "epoch": 0.000193756103515625,
      "model_forward_time": 0.11455655097961426,
      "step": 31745
    },
    {
      "epoch": 0.000193756103515625,
      "step": 31745,
      "training_step_time": 0.38047146797180176
    },
    {
      "epoch": 0.00019376220703125,
      "model_forward_time": 0.11477208137512207,
      "step": 31746
    },
    {
      "epoch": 0.00019376220703125,
      "step": 31746,
      "training_step_time": 0.39440059661865234
    },
    {
      "epoch": 0.000193768310546875,
      "model_forward_time": 0.11475133895874023,
      "step": 31747
    },
    {
      "epoch": 0.000193768310546875,
      "step": 31747,
      "training_step_time": 0.39204907417297363
    },
    {
      "epoch": 0.0001937744140625,
      "model_forward_time": 0.11520171165466309,
      "step": 31748
    },
    {
      "epoch": 0.0001937744140625,
      "step": 31748,
      "training_step_time": 0.3922841548919678
    },
    {
      "epoch": 0.000193780517578125,
      "model_forward_time": 0.1151418685913086,
      "step": 31749
    },
    {
      "epoch": 0.000193780517578125,
      "step": 31749,
      "training_step_time": 0.38943934440612793
    },
    {
      "epoch": 0.00019378662109375,
      "grad_norm": 0.0839451402425766,
      "learning_rate": 4.9311076042795185e-05,
      "loss": 0.0393,
      "step": 31750
    },
    {
      "epoch": 0.00019378662109375,
      "model_forward_time": 0.11502504348754883,
      "step": 31750
    },
    {
      "epoch": 0.00019378662109375,
      "step": 31750,
      "training_step_time": 0.7456541061401367
    },
    {
      "epoch": 0.000193792724609375,
      "model_forward_time": 0.1144101619720459,
      "step": 31751
    },
    {
      "epoch": 0.000193792724609375,
      "step": 31751,
      "training_step_time": 0.40064430236816406
    },
    {
      "epoch": 0.000193798828125,
      "model_forward_time": 0.11411666870117188,
      "step": 31752
    },
    {
      "epoch": 0.000193798828125,
      "step": 31752,
      "training_step_time": 0.4125657081604004
    },
    {
      "epoch": 0.000193804931640625,
      "model_forward_time": 0.11475300788879395,
      "step": 31753
    },
    {
      "epoch": 0.000193804931640625,
      "step": 31753,
      "training_step_time": 0.46009254455566406
    },
    {
      "epoch": 0.00019381103515625,
      "model_forward_time": 0.11490440368652344,
      "step": 31754
    },
    {
      "epoch": 0.00019381103515625,
      "step": 31754,
      "training_step_time": 0.4247109889984131
    },
    {
      "epoch": 0.000193817138671875,
      "model_forward_time": 0.11456584930419922,
      "step": 31755
    },
    {
      "epoch": 0.000193817138671875,
      "step": 31755,
      "training_step_time": 0.46700263023376465
    },
    {
      "epoch": 0.0001938232421875,
      "model_forward_time": 0.11520576477050781,
      "step": 31756
    },
    {
      "epoch": 0.0001938232421875,
      "step": 31756,
      "training_step_time": 0.5825529098510742
    },
    {
      "epoch": 0.000193829345703125,
      "model_forward_time": 0.11467289924621582,
      "step": 31757
    },
    {
      "epoch": 0.000193829345703125,
      "step": 31757,
      "training_step_time": 0.3877291679382324
    },
    {
      "epoch": 0.00019383544921875,
      "model_forward_time": 0.11422204971313477,
      "step": 31758
    },
    {
      "epoch": 0.00019383544921875,
      "step": 31758,
      "training_step_time": 0.3849973678588867
    },
    {
      "epoch": 0.000193841552734375,
      "model_forward_time": 0.1147916316986084,
      "step": 31759
    },
    {
      "epoch": 0.000193841552734375,
      "step": 31759,
      "training_step_time": 0.3971521854400635
    },
    {
      "epoch": 0.00019384765625,
      "grad_norm": 0.11905327439308167,
      "learning_rate": 4.9283520934536904e-05,
      "loss": 0.0414,
      "step": 31760
    },
    {
      "epoch": 0.00019384765625,
      "model_forward_time": 0.11470198631286621,
      "step": 31760
    },
    {
      "epoch": 0.00019384765625,
      "step": 31760,
      "training_step_time": 0.4081876277923584
    },
    {
      "epoch": 0.000193853759765625,
      "model_forward_time": 0.11512207984924316,
      "step": 31761
    },
    {
      "epoch": 0.000193853759765625,
      "step": 31761,
      "training_step_time": 0.40093541145324707
    },
    {
      "epoch": 0.00019385986328125,
      "model_forward_time": 0.11433625221252441,
      "step": 31762
    },
    {
      "epoch": 0.00019385986328125,
      "step": 31762,
      "training_step_time": 1.064589262008667
    },
    {
      "epoch": 0.000193865966796875,
      "model_forward_time": 0.11439251899719238,
      "step": 31763
    },
    {
      "epoch": 0.000193865966796875,
      "step": 31763,
      "training_step_time": 0.38218164443969727
    },
    {
      "epoch": 0.0001938720703125,
      "model_forward_time": 0.11400294303894043,
      "step": 31764
    },
    {
      "epoch": 0.0001938720703125,
      "step": 31764,
      "training_step_time": 0.45925068855285645
    },
    {
      "epoch": 0.000193878173828125,
      "model_forward_time": 0.1135563850402832,
      "step": 31765
    },
    {
      "epoch": 0.000193878173828125,
      "step": 31765,
      "training_step_time": 0.4356522560119629
    },
    {
      "epoch": 0.00019388427734375,
      "model_forward_time": 0.1141209602355957,
      "step": 31766
    },
    {
      "epoch": 0.00019388427734375,
      "step": 31766,
      "training_step_time": 0.43033313751220703
    },
    {
      "epoch": 0.000193890380859375,
      "model_forward_time": 0.11447691917419434,
      "step": 31767
    },
    {
      "epoch": 0.000193890380859375,
      "step": 31767,
      "training_step_time": 0.43036818504333496
    },
    {
      "epoch": 0.000193896484375,
      "model_forward_time": 0.11446738243103027,
      "step": 31768
    },
    {
      "epoch": 0.000193896484375,
      "step": 31768,
      "training_step_time": 0.4929389953613281
    },
    {
      "epoch": 0.000193902587890625,
      "model_forward_time": 0.11499285697937012,
      "step": 31769
    },
    {
      "epoch": 0.000193902587890625,
      "step": 31769,
      "training_step_time": 0.39562344551086426
    },
    {
      "epoch": 0.00019390869140625,
      "grad_norm": 0.11584818363189697,
      "learning_rate": 4.925596604392603e-05,
      "loss": 0.0407,
      "step": 31770
    },
    {
      "epoch": 0.00019390869140625,
      "model_forward_time": 0.11471414566040039,
      "step": 31770
    },
    {
      "epoch": 0.00019390869140625,
      "step": 31770,
      "training_step_time": 0.3917083740234375
    },
    {
      "epoch": 0.000193914794921875,
      "model_forward_time": 0.11555027961730957,
      "step": 31771
    },
    {
      "epoch": 0.000193914794921875,
      "step": 31771,
      "training_step_time": 0.4072258472442627
    },
    {
      "epoch": 0.0001939208984375,
      "model_forward_time": 0.11530137062072754,
      "step": 31772
    },
    {
      "epoch": 0.0001939208984375,
      "step": 31772,
      "training_step_time": 0.4051511287689209
    },
    {
      "epoch": 0.000193927001953125,
      "model_forward_time": 0.11517214775085449,
      "step": 31773
    },
    {
      "epoch": 0.000193927001953125,
      "step": 31773,
      "training_step_time": 0.38925981521606445
    },
    {
      "epoch": 0.00019393310546875,
      "model_forward_time": 0.11570906639099121,
      "step": 31774
    },
    {
      "epoch": 0.00019393310546875,
      "step": 31774,
      "training_step_time": 0.6808648109436035
    },
    {
      "epoch": 0.000193939208984375,
      "model_forward_time": 0.1150510311126709,
      "step": 31775
    },
    {
      "epoch": 0.000193939208984375,
      "step": 31775,
      "training_step_time": 0.3924710750579834
    },
    {
      "epoch": 0.0001939453125,
      "model_forward_time": 0.11555933952331543,
      "step": 31776
    },
    {
      "epoch": 0.0001939453125,
      "step": 31776,
      "training_step_time": 0.39925575256347656
    },
    {
      "epoch": 0.000193951416015625,
      "model_forward_time": 0.1145772933959961,
      "step": 31777
    },
    {
      "epoch": 0.000193951416015625,
      "step": 31777,
      "training_step_time": 0.3977220058441162
    },
    {
      "epoch": 0.00019395751953125,
      "model_forward_time": 0.11513137817382812,
      "step": 31778
    },
    {
      "epoch": 0.00019395751953125,
      "step": 31778,
      "training_step_time": 0.3901536464691162
    },
    {
      "epoch": 0.000193963623046875,
      "model_forward_time": 0.11494231224060059,
      "step": 31779
    },
    {
      "epoch": 0.000193963623046875,
      "step": 31779,
      "training_step_time": 0.47380638122558594
    },
    {
      "epoch": 0.0001939697265625,
      "grad_norm": 0.10656201094388962,
      "learning_rate": 4.9228411379333014e-05,
      "loss": 0.048,
      "step": 31780
    },
    {
      "epoch": 0.0001939697265625,
      "model_forward_time": 0.11595463752746582,
      "step": 31780
    },
    {
      "epoch": 0.0001939697265625,
      "step": 31780,
      "training_step_time": 0.5015461444854736
    },
    {
      "epoch": 0.000193975830078125,
      "model_forward_time": 0.1160428524017334,
      "step": 31781
    },
    {
      "epoch": 0.000193975830078125,
      "step": 31781,
      "training_step_time": 0.46395039558410645
    },
    {
      "epoch": 0.00019398193359375,
      "model_forward_time": 0.11493897438049316,
      "step": 31782
    },
    {
      "epoch": 0.00019398193359375,
      "step": 31782,
      "training_step_time": 0.43896031379699707
    },
    {
      "epoch": 0.000193988037109375,
      "model_forward_time": 0.11636161804199219,
      "step": 31783
    },
    {
      "epoch": 0.000193988037109375,
      "step": 31783,
      "training_step_time": 0.4159259796142578
    },
    {
      "epoch": 0.000193994140625,
      "model_forward_time": 0.11542987823486328,
      "step": 31784
    },
    {
      "epoch": 0.000193994140625,
      "step": 31784,
      "training_step_time": 0.3853590488433838
    },
    {
      "epoch": 0.000194000244140625,
      "model_forward_time": 0.11520934104919434,
      "step": 31785
    },
    {
      "epoch": 0.000194000244140625,
      "step": 31785,
      "training_step_time": 0.394519567489624
    },
    {
      "epoch": 0.00019400634765625,
      "model_forward_time": 0.11520528793334961,
      "step": 31786
    },
    {
      "epoch": 0.00019400634765625,
      "step": 31786,
      "training_step_time": 1.300278902053833
    },
    {
      "epoch": 0.000194012451171875,
      "model_forward_time": 0.11406993865966797,
      "step": 31787
    },
    {
      "epoch": 0.000194012451171875,
      "step": 31787,
      "training_step_time": 0.3901824951171875
    },
    {
      "epoch": 0.0001940185546875,
      "model_forward_time": 0.11368846893310547,
      "step": 31788
    },
    {
      "epoch": 0.0001940185546875,
      "step": 31788,
      "training_step_time": 0.37967514991760254
    },
    {
      "epoch": 0.000194024658203125,
      "model_forward_time": 0.11402678489685059,
      "step": 31789
    },
    {
      "epoch": 0.000194024658203125,
      "step": 31789,
      "training_step_time": 0.37020206451416016
    },
    {
      "epoch": 0.00019403076171875,
      "grad_norm": 0.16084828972816467,
      "learning_rate": 4.920085694912828e-05,
      "loss": 0.0397,
      "step": 31790
    },
    {
      "epoch": 0.00019403076171875,
      "model_forward_time": 0.1137092113494873,
      "step": 31790
    },
    {
      "epoch": 0.00019403076171875,
      "step": 31790,
      "training_step_time": 0.44182705879211426
    },
    {
      "epoch": 0.000194036865234375,
      "model_forward_time": 0.11446523666381836,
      "step": 31791
    },
    {
      "epoch": 0.000194036865234375,
      "step": 31791,
      "training_step_time": 0.4024684429168701
    },
    {
      "epoch": 0.00019404296875,
      "model_forward_time": 0.11504292488098145,
      "step": 31792
    },
    {
      "epoch": 0.00019404296875,
      "step": 31792,
      "training_step_time": 0.7217268943786621
    },
    {
      "epoch": 0.000194049072265625,
      "model_forward_time": 0.11438822746276855,
      "step": 31793
    },
    {
      "epoch": 0.000194049072265625,
      "step": 31793,
      "training_step_time": 0.3769810199737549
    },
    {
      "epoch": 0.00019405517578125,
      "model_forward_time": 0.11405706405639648,
      "step": 31794
    },
    {
      "epoch": 0.00019405517578125,
      "step": 31794,
      "training_step_time": 0.4021596908569336
    },
    {
      "epoch": 0.000194061279296875,
      "model_forward_time": 0.11424946784973145,
      "step": 31795
    },
    {
      "epoch": 0.000194061279296875,
      "step": 31795,
      "training_step_time": 0.41016650199890137
    },
    {
      "epoch": 0.0001940673828125,
      "model_forward_time": 0.11431455612182617,
      "step": 31796
    },
    {
      "epoch": 0.0001940673828125,
      "step": 31796,
      "training_step_time": 0.3907816410064697
    },
    {
      "epoch": 0.000194073486328125,
      "model_forward_time": 0.11523890495300293,
      "step": 31797
    },
    {
      "epoch": 0.000194073486328125,
      "step": 31797,
      "training_step_time": 0.38545656204223633
    },
    {
      "epoch": 0.00019407958984375,
      "model_forward_time": 0.11503362655639648,
      "step": 31798
    },
    {
      "epoch": 0.00019407958984375,
      "step": 31798,
      "training_step_time": 0.7123129367828369
    },
    {
      "epoch": 0.000194085693359375,
      "model_forward_time": 0.1142578125,
      "step": 31799
    },
    {
      "epoch": 0.000194085693359375,
      "step": 31799,
      "training_step_time": 0.3789975643157959
    },
    {
      "epoch": 0.000194091796875,
      "grad_norm": 0.14935344457626343,
      "learning_rate": 4.917330276168208e-05,
      "loss": 0.0419,
      "step": 31800
    },
    {
      "epoch": 0.000194091796875,
      "model_forward_time": 0.11481523513793945,
      "step": 31800
    },
    {
      "epoch": 0.000194091796875,
      "step": 31800,
      "training_step_time": 0.40275120735168457
    },
    {
      "epoch": 0.000194097900390625,
      "model_forward_time": 0.11535000801086426,
      "step": 31801
    },
    {
      "epoch": 0.000194097900390625,
      "step": 31801,
      "training_step_time": 0.3818180561065674
    },
    {
      "epoch": 0.00019410400390625,
      "model_forward_time": 0.11428046226501465,
      "step": 31802
    },
    {
      "epoch": 0.00019410400390625,
      "step": 31802,
      "training_step_time": 0.39348435401916504
    },
    {
      "epoch": 0.000194110107421875,
      "model_forward_time": 0.11481165885925293,
      "step": 31803
    },
    {
      "epoch": 0.000194110107421875,
      "step": 31803,
      "training_step_time": 0.4508640766143799
    },
    {
      "epoch": 0.0001941162109375,
      "model_forward_time": 0.11492633819580078,
      "step": 31804
    },
    {
      "epoch": 0.0001941162109375,
      "step": 31804,
      "training_step_time": 0.7719178199768066
    },
    {
      "epoch": 0.000194122314453125,
      "model_forward_time": 0.11455297470092773,
      "step": 31805
    },
    {
      "epoch": 0.000194122314453125,
      "step": 31805,
      "training_step_time": 0.42096376419067383
    },
    {
      "epoch": 0.00019412841796875,
      "model_forward_time": 0.11434125900268555,
      "step": 31806
    },
    {
      "epoch": 0.00019412841796875,
      "step": 31806,
      "training_step_time": 0.3853588104248047
    },
    {
      "epoch": 0.000194134521484375,
      "model_forward_time": 0.11420059204101562,
      "step": 31807
    },
    {
      "epoch": 0.000194134521484375,
      "step": 31807,
      "training_step_time": 0.40198850631713867
    },
    {
      "epoch": 0.000194140625,
      "model_forward_time": 0.11453533172607422,
      "step": 31808
    },
    {
      "epoch": 0.000194140625,
      "step": 31808,
      "training_step_time": 0.4113032817840576
    },
    {
      "epoch": 0.000194146728515625,
      "model_forward_time": 0.11497306823730469,
      "step": 31809
    },
    {
      "epoch": 0.000194146728515625,
      "step": 31809,
      "training_step_time": 0.402148962020874
    },
    {
      "epoch": 0.00019415283203125,
      "grad_norm": 0.11888785660266876,
      "learning_rate": 4.91457488253647e-05,
      "loss": 0.0376,
      "step": 31810
    },
    {
      "epoch": 0.00019415283203125,
      "model_forward_time": 0.11486482620239258,
      "step": 31810
    },
    {
      "epoch": 0.00019415283203125,
      "step": 31810,
      "training_step_time": 0.7146432399749756
    },
    {
      "epoch": 0.000194158935546875,
      "model_forward_time": 0.11440491676330566,
      "step": 31811
    },
    {
      "epoch": 0.000194158935546875,
      "step": 31811,
      "training_step_time": 0.3893861770629883
    },
    {
      "epoch": 0.0001941650390625,
      "model_forward_time": 0.11475062370300293,
      "step": 31812
    },
    {
      "epoch": 0.0001941650390625,
      "step": 31812,
      "training_step_time": 0.38495898246765137
    },
    {
      "epoch": 0.000194171142578125,
      "model_forward_time": 0.11454606056213379,
      "step": 31813
    },
    {
      "epoch": 0.000194171142578125,
      "step": 31813,
      "training_step_time": 0.37697887420654297
    },
    {
      "epoch": 0.00019417724609375,
      "model_forward_time": 0.11462235450744629,
      "step": 31814
    },
    {
      "epoch": 0.00019417724609375,
      "step": 31814,
      "training_step_time": 0.38561081886291504
    },
    {
      "epoch": 0.000194183349609375,
      "model_forward_time": 0.11438584327697754,
      "step": 31815
    },
    {
      "epoch": 0.000194183349609375,
      "step": 31815,
      "training_step_time": 0.39298486709594727
    },
    {
      "epoch": 0.000194189453125,
      "model_forward_time": 0.11499190330505371,
      "step": 31816
    },
    {
      "epoch": 0.000194189453125,
      "step": 31816,
      "training_step_time": 1.0320565700531006
    },
    {
      "epoch": 0.000194195556640625,
      "model_forward_time": 0.1138761043548584,
      "step": 31817
    },
    {
      "epoch": 0.000194195556640625,
      "step": 31817,
      "training_step_time": 0.515740156173706
    },
    {
      "epoch": 0.00019420166015625,
      "model_forward_time": 0.11410880088806152,
      "step": 31818
    },
    {
      "epoch": 0.00019420166015625,
      "step": 31818,
      "training_step_time": 0.42931270599365234
    },
    {
      "epoch": 0.000194207763671875,
      "model_forward_time": 0.11393213272094727,
      "step": 31819
    },
    {
      "epoch": 0.000194207763671875,
      "step": 31819,
      "training_step_time": 0.4204878807067871
    },
    {
      "epoch": 0.0001942138671875,
      "grad_norm": 0.0878714993596077,
      "learning_rate": 4.911819514854624e-05,
      "loss": 0.0469,
      "step": 31820
    },
    {
      "epoch": 0.0001942138671875,
      "model_forward_time": 0.11391520500183105,
      "step": 31820
    },
    {
      "epoch": 0.0001942138671875,
      "step": 31820,
      "training_step_time": 0.4224264621734619
    },
    {
      "epoch": 0.000194219970703125,
      "model_forward_time": 0.11455512046813965,
      "step": 31821
    },
    {
      "epoch": 0.000194219970703125,
      "step": 31821,
      "training_step_time": 0.46817827224731445
    },
    {
      "epoch": 0.00019422607421875,
      "model_forward_time": 0.1149301528930664,
      "step": 31822
    },
    {
      "epoch": 0.00019422607421875,
      "step": 31822,
      "training_step_time": 0.39607858657836914
    },
    {
      "epoch": 0.000194232177734375,
      "model_forward_time": 0.11498141288757324,
      "step": 31823
    },
    {
      "epoch": 0.000194232177734375,
      "step": 31823,
      "training_step_time": 0.3928546905517578
    },
    {
      "epoch": 0.00019423828125,
      "model_forward_time": 0.11488628387451172,
      "step": 31824
    },
    {
      "epoch": 0.00019423828125,
      "step": 31824,
      "training_step_time": 0.39339542388916016
    },
    {
      "epoch": 0.000194244384765625,
      "model_forward_time": 0.11468315124511719,
      "step": 31825
    },
    {
      "epoch": 0.000194244384765625,
      "step": 31825,
      "training_step_time": 0.3862006664276123
    },
    {
      "epoch": 0.00019425048828125,
      "model_forward_time": 0.11438155174255371,
      "step": 31826
    },
    {
      "epoch": 0.00019425048828125,
      "step": 31826,
      "training_step_time": 0.40410423278808594
    },
    {
      "epoch": 0.000194256591796875,
      "model_forward_time": 0.11493873596191406,
      "step": 31827
    },
    {
      "epoch": 0.000194256591796875,
      "step": 31827,
      "training_step_time": 0.4113917350769043
    },
    {
      "epoch": 0.0001942626953125,
      "model_forward_time": 0.1147146224975586,
      "step": 31828
    },
    {
      "epoch": 0.0001942626953125,
      "step": 31828,
      "training_step_time": 0.4952425956726074
    },
    {
      "epoch": 0.000194268798828125,
      "model_forward_time": 0.11535334587097168,
      "step": 31829
    },
    {
      "epoch": 0.000194268798828125,
      "step": 31829,
      "training_step_time": 0.47572970390319824
    },
    {
      "epoch": 0.00019427490234375,
      "grad_norm": 0.12364932894706726,
      "learning_rate": 4.909064173959681e-05,
      "loss": 0.0401,
      "step": 31830
    },
    {
      "epoch": 0.00019427490234375,
      "model_forward_time": 0.11437320709228516,
      "step": 31830
    },
    {
      "epoch": 0.00019427490234375,
      "step": 31830,
      "training_step_time": 0.40355467796325684
    },
    {
      "epoch": 0.000194281005859375,
      "model_forward_time": 0.11516928672790527,
      "step": 31831
    },
    {
      "epoch": 0.000194281005859375,
      "step": 31831,
      "training_step_time": 0.4751420021057129
    },
    {
      "epoch": 0.000194287109375,
      "model_forward_time": 0.1152353286743164,
      "step": 31832
    },
    {
      "epoch": 0.000194287109375,
      "step": 31832,
      "training_step_time": 0.45363569259643555
    },
    {
      "epoch": 0.000194293212890625,
      "model_forward_time": 0.1150202751159668,
      "step": 31833
    },
    {
      "epoch": 0.000194293212890625,
      "step": 31833,
      "training_step_time": 0.4020388126373291
    },
    {
      "epoch": 0.00019429931640625,
      "model_forward_time": 0.11539220809936523,
      "step": 31834
    },
    {
      "epoch": 0.00019429931640625,
      "step": 31834,
      "training_step_time": 0.9406418800354004
    },
    {
      "epoch": 0.000194305419921875,
      "model_forward_time": 0.11455154418945312,
      "step": 31835
    },
    {
      "epoch": 0.000194305419921875,
      "step": 31835,
      "training_step_time": 0.39206838607788086
    },
    {
      "epoch": 0.0001943115234375,
      "model_forward_time": 0.11390209197998047,
      "step": 31836
    },
    {
      "epoch": 0.0001943115234375,
      "step": 31836,
      "training_step_time": 0.3911106586456299
    },
    {
      "epoch": 0.000194317626953125,
      "model_forward_time": 0.11419820785522461,
      "step": 31837
    },
    {
      "epoch": 0.000194317626953125,
      "step": 31837,
      "training_step_time": 0.38393187522888184
    },
    {
      "epoch": 0.00019432373046875,
      "model_forward_time": 0.11464834213256836,
      "step": 31838
    },
    {
      "epoch": 0.00019432373046875,
      "step": 31838,
      "training_step_time": 0.4020884037017822
    },
    {
      "epoch": 0.000194329833984375,
      "model_forward_time": 0.11416196823120117,
      "step": 31839
    },
    {
      "epoch": 0.000194329833984375,
      "step": 31839,
      "training_step_time": 0.378587007522583
    },
    {
      "epoch": 0.0001943359375,
      "grad_norm": 0.09614601731300354,
      "learning_rate": 4.906308860688645e-05,
      "loss": 0.0423,
      "step": 31840
    },
    {
      "epoch": 0.0001943359375,
      "model_forward_time": 0.11569738388061523,
      "step": 31840
    },
    {
      "epoch": 0.0001943359375,
      "step": 31840,
      "training_step_time": 0.5815904140472412
    },
    {
      "epoch": 0.000194342041015625,
      "model_forward_time": 0.1146552562713623,
      "step": 31841
    },
    {
      "epoch": 0.000194342041015625,
      "step": 31841,
      "training_step_time": 0.4257395267486572
    },
    {
      "epoch": 0.00019434814453125,
      "model_forward_time": 0.11495184898376465,
      "step": 31842
    },
    {
      "epoch": 0.00019434814453125,
      "step": 31842,
      "training_step_time": 0.4396064281463623
    },
    {
      "epoch": 0.000194354248046875,
      "model_forward_time": 0.11513400077819824,
      "step": 31843
    },
    {
      "epoch": 0.000194354248046875,
      "step": 31843,
      "training_step_time": 0.4067959785461426
    },
    {
      "epoch": 0.0001943603515625,
      "model_forward_time": 0.11445856094360352,
      "step": 31844
    },
    {
      "epoch": 0.0001943603515625,
      "step": 31844,
      "training_step_time": 0.4855325222015381
    },
    {
      "epoch": 0.000194366455078125,
      "model_forward_time": 0.11562800407409668,
      "step": 31845
    },
    {
      "epoch": 0.000194366455078125,
      "step": 31845,
      "training_step_time": 0.4029703140258789
    },
    {
      "epoch": 0.00019437255859375,
      "model_forward_time": 0.1150357723236084,
      "step": 31846
    },
    {
      "epoch": 0.00019437255859375,
      "step": 31846,
      "training_step_time": 0.6124660968780518
    },
    {
      "epoch": 0.000194378662109375,
      "model_forward_time": 0.1148984432220459,
      "step": 31847
    },
    {
      "epoch": 0.000194378662109375,
      "step": 31847,
      "training_step_time": 0.39853477478027344
    },
    {
      "epoch": 0.000194384765625,
      "model_forward_time": 0.11425518989562988,
      "step": 31848
    },
    {
      "epoch": 0.000194384765625,
      "step": 31848,
      "training_step_time": 0.4105191230773926
    },
    {
      "epoch": 0.000194390869140625,
      "model_forward_time": 0.11471724510192871,
      "step": 31849
    },
    {
      "epoch": 0.000194390869140625,
      "step": 31849,
      "training_step_time": 0.38588738441467285
    },
    {
      "epoch": 0.00019439697265625,
      "grad_norm": 0.09558308869600296,
      "learning_rate": 4.903553575878499e-05,
      "loss": 0.0402,
      "step": 31850
    },
    {
      "epoch": 0.00019439697265625,
      "model_forward_time": 0.11497879028320312,
      "step": 31850
    },
    {
      "epoch": 0.00019439697265625,
      "step": 31850,
      "training_step_time": 0.3925786018371582
    },
    {
      "epoch": 0.000194403076171875,
      "model_forward_time": 0.1145477294921875,
      "step": 31851
    },
    {
      "epoch": 0.000194403076171875,
      "step": 31851,
      "training_step_time": 0.38625001907348633
    },
    {
      "epoch": 0.0001944091796875,
      "model_forward_time": 0.11507296562194824,
      "step": 31852
    },
    {
      "epoch": 0.0001944091796875,
      "step": 31852,
      "training_step_time": 0.617673397064209
    },
    {
      "epoch": 0.000194415283203125,
      "model_forward_time": 0.11446738243103027,
      "step": 31853
    },
    {
      "epoch": 0.000194415283203125,
      "step": 31853,
      "training_step_time": 0.4140491485595703
    },
    {
      "epoch": 0.00019442138671875,
      "model_forward_time": 0.11468982696533203,
      "step": 31854
    },
    {
      "epoch": 0.00019442138671875,
      "step": 31854,
      "training_step_time": 0.45646047592163086
    },
    {
      "epoch": 0.000194427490234375,
      "model_forward_time": 0.11520171165466309,
      "step": 31855
    },
    {
      "epoch": 0.000194427490234375,
      "step": 31855,
      "training_step_time": 0.3955726623535156
    },
    {
      "epoch": 0.00019443359375,
      "model_forward_time": 0.11489081382751465,
      "step": 31856
    },
    {
      "epoch": 0.00019443359375,
      "step": 31856,
      "training_step_time": 0.4437904357910156
    },
    {
      "epoch": 0.000194439697265625,
      "model_forward_time": 0.11466050148010254,
      "step": 31857
    },
    {
      "epoch": 0.000194439697265625,
      "step": 31857,
      "training_step_time": 0.39118003845214844
    },
    {
      "epoch": 0.00019444580078125,
      "model_forward_time": 0.11522603034973145,
      "step": 31858
    },
    {
      "epoch": 0.00019444580078125,
      "step": 31858,
      "training_step_time": 0.9158339500427246
    },
    {
      "epoch": 0.000194451904296875,
      "model_forward_time": 0.11478137969970703,
      "step": 31859
    },
    {
      "epoch": 0.000194451904296875,
      "step": 31859,
      "training_step_time": 0.36295533180236816
    },
    {
      "epoch": 0.0001944580078125,
      "grad_norm": 0.10511505603790283,
      "learning_rate": 4.9007983203662326e-05,
      "loss": 0.0391,
      "step": 31860
    },
    {
      "epoch": 0.0001944580078125,
      "model_forward_time": 0.11401152610778809,
      "step": 31860
    },
    {
      "epoch": 0.0001944580078125,
      "step": 31860,
      "training_step_time": 0.41937971115112305
    },
    {
      "epoch": 0.000194464111328125,
      "model_forward_time": 0.11386895179748535,
      "step": 31861
    },
    {
      "epoch": 0.000194464111328125,
      "step": 31861,
      "training_step_time": 0.4176769256591797
    },
    {
      "epoch": 0.00019447021484375,
      "model_forward_time": 0.11460423469543457,
      "step": 31862
    },
    {
      "epoch": 0.00019447021484375,
      "step": 31862,
      "training_step_time": 0.39702343940734863
    },
    {
      "epoch": 0.000194476318359375,
      "model_forward_time": 0.1145172119140625,
      "step": 31863
    },
    {
      "epoch": 0.000194476318359375,
      "step": 31863,
      "training_step_time": 0.39606523513793945
    },
    {
      "epoch": 0.000194482421875,
      "model_forward_time": 0.11586213111877441,
      "step": 31864
    },
    {
      "epoch": 0.000194482421875,
      "step": 31864,
      "training_step_time": 1.2987146377563477
    },
    {
      "epoch": 0.000194488525390625,
      "model_forward_time": 0.11516928672790527,
      "step": 31865
    },
    {
      "epoch": 0.000194488525390625,
      "step": 31865,
      "training_step_time": 0.6513497829437256
    },
    {
      "epoch": 0.00019449462890625,
      "model_forward_time": 0.11891913414001465,
      "step": 31866
    },
    {
      "epoch": 0.00019449462890625,
      "step": 31866,
      "training_step_time": 0.6651344299316406
    },
    {
      "epoch": 0.000194500732421875,
      "model_forward_time": 0.11688113212585449,
      "step": 31867
    },
    {
      "epoch": 0.000194500732421875,
      "step": 31867,
      "training_step_time": 0.6882336139678955
    },
    {
      "epoch": 0.0001945068359375,
      "model_forward_time": 0.1179966926574707,
      "step": 31868
    },
    {
      "epoch": 0.0001945068359375,
      "step": 31868,
      "training_step_time": 0.8127415180206299
    },
    {
      "epoch": 0.000194512939453125,
      "model_forward_time": 0.12196683883666992,
      "step": 31869
    },
    {
      "epoch": 0.000194512939453125,
      "step": 31869,
      "training_step_time": 0.7226808071136475
    },
    {
      "epoch": 0.00019451904296875,
      "grad_norm": 0.09521785378456116,
      "learning_rate": 4.8980430949888156e-05,
      "loss": 0.0375,
      "step": 31870
    },
    {
      "epoch": 0.00019451904296875,
      "model_forward_time": 0.1156466007232666,
      "step": 31870
    },
    {
      "epoch": 0.00019451904296875,
      "step": 31870,
      "training_step_time": 0.6807935237884521
    },
    {
      "epoch": 0.000194525146484375,
      "model_forward_time": 0.12038421630859375,
      "step": 31871
    },
    {
      "epoch": 0.000194525146484375,
      "step": 31871,
      "training_step_time": 0.6112010478973389
    },
    {
      "epoch": 0.00019453125,
      "model_forward_time": 0.11920857429504395,
      "step": 31872
    },
    {
      "epoch": 0.00019453125,
      "step": 31872,
      "training_step_time": 0.6790037155151367
    },
    {
      "epoch": 0.000194537353515625,
      "model_forward_time": 0.12042880058288574,
      "step": 31873
    },
    {
      "epoch": 0.000194537353515625,
      "step": 31873,
      "training_step_time": 0.617363452911377
    },
    {
      "epoch": 0.00019454345703125,
      "model_forward_time": 0.11668062210083008,
      "step": 31874
    },
    {
      "epoch": 0.00019454345703125,
      "step": 31874,
      "training_step_time": 0.6369593143463135
    },
    {
      "epoch": 0.000194549560546875,
      "model_forward_time": 0.1312999725341797,
      "step": 31875
    },
    {
      "epoch": 0.000194549560546875,
      "step": 31875,
      "training_step_time": 0.678114652633667
    },
    {
      "epoch": 0.0001945556640625,
      "model_forward_time": 0.11635041236877441,
      "step": 31876
    },
    {
      "epoch": 0.0001945556640625,
      "step": 31876,
      "training_step_time": 0.6889059543609619
    },
    {
      "epoch": 0.000194561767578125,
      "model_forward_time": 0.1176137924194336,
      "step": 31877
    },
    {
      "epoch": 0.000194561767578125,
      "step": 31877,
      "training_step_time": 0.6999814510345459
    },
    {
      "epoch": 0.00019456787109375,
      "model_forward_time": 0.12670516967773438,
      "step": 31878
    },
    {
      "epoch": 0.00019456787109375,
      "step": 31878,
      "training_step_time": 0.5938012599945068
    },
    {
      "epoch": 0.000194573974609375,
      "model_forward_time": 0.11918282508850098,
      "step": 31879
    },
    {
      "epoch": 0.000194573974609375,
      "step": 31879,
      "training_step_time": 0.6500415802001953
    },
    {
      "epoch": 0.000194580078125,
      "grad_norm": 0.12716713547706604,
      "learning_rate": 4.895287900583216e-05,
      "loss": 0.0417,
      "step": 31880
    },
    {
      "epoch": 0.000194580078125,
      "model_forward_time": 0.11988306045532227,
      "step": 31880
    },
    {
      "epoch": 0.000194580078125,
      "step": 31880,
      "training_step_time": 0.7970223426818848
    },
    {
      "epoch": 0.000194586181640625,
      "model_forward_time": 0.1180882453918457,
      "step": 31881
    },
    {
      "epoch": 0.000194586181640625,
      "step": 31881,
      "training_step_time": 0.6503632068634033
    },
    {
      "epoch": 0.00019459228515625,
      "model_forward_time": 0.1182546615600586,
      "step": 31882
    },
    {
      "epoch": 0.00019459228515625,
      "step": 31882,
      "training_step_time": 0.6176769733428955
    },
    {
      "epoch": 0.000194598388671875,
      "model_forward_time": 0.11980056762695312,
      "step": 31883
    },
    {
      "epoch": 0.000194598388671875,
      "step": 31883,
      "training_step_time": 0.6645631790161133
    },
    {
      "epoch": 0.0001946044921875,
      "model_forward_time": 0.13976359367370605,
      "step": 31884
    },
    {
      "epoch": 0.0001946044921875,
      "step": 31884,
      "training_step_time": 0.6258862018585205
    },
    {
      "epoch": 0.000194610595703125,
      "model_forward_time": 0.11718058586120605,
      "step": 31885
    },
    {
      "epoch": 0.000194610595703125,
      "step": 31885,
      "training_step_time": 0.7545609474182129
    },
    {
      "epoch": 0.00019461669921875,
      "model_forward_time": 0.11745810508728027,
      "step": 31886
    },
    {
      "epoch": 0.00019461669921875,
      "step": 31886,
      "training_step_time": 0.693519115447998
    },
    {
      "epoch": 0.000194622802734375,
      "model_forward_time": 0.12373042106628418,
      "step": 31887
    },
    {
      "epoch": 0.000194622802734375,
      "step": 31887,
      "training_step_time": 0.6010022163391113
    },
    {
      "epoch": 0.00019462890625,
      "model_forward_time": 0.12000489234924316,
      "step": 31888
    },
    {
      "epoch": 0.00019462890625,
      "step": 31888,
      "training_step_time": 0.6416478157043457
    },
    {
      "epoch": 0.000194635009765625,
      "model_forward_time": 0.11663985252380371,
      "step": 31889
    },
    {
      "epoch": 0.000194635009765625,
      "step": 31889,
      "training_step_time": 0.6628208160400391
    },
    {
      "epoch": 0.00019464111328125,
      "grad_norm": 0.1099248081445694,
      "learning_rate": 4.892532737986387e-05,
      "loss": 0.0471,
      "step": 31890
    },
    {
      "epoch": 0.00019464111328125,
      "model_forward_time": 0.11571764945983887,
      "step": 31890
    },
    {
      "epoch": 0.00019464111328125,
      "step": 31890,
      "training_step_time": 0.7043085098266602
    },
    {
      "epoch": 0.000194647216796875,
      "model_forward_time": 0.12647366523742676,
      "step": 31891
    },
    {
      "epoch": 0.000194647216796875,
      "step": 31891,
      "training_step_time": 0.7146866321563721
    },
    {
      "epoch": 0.0001946533203125,
      "model_forward_time": 0.1189584732055664,
      "step": 31892
    },
    {
      "epoch": 0.0001946533203125,
      "step": 31892,
      "training_step_time": 0.6487760543823242
    },
    {
      "epoch": 0.000194659423828125,
      "model_forward_time": 0.11990904808044434,
      "step": 31893
    },
    {
      "epoch": 0.000194659423828125,
      "step": 31893,
      "training_step_time": 0.6131696701049805
    },
    {
      "epoch": 0.00019466552734375,
      "model_forward_time": 0.11865878105163574,
      "step": 31894
    },
    {
      "epoch": 0.00019466552734375,
      "step": 31894,
      "training_step_time": 0.6716282367706299
    },
    {
      "epoch": 0.000194671630859375,
      "model_forward_time": 0.11551046371459961,
      "step": 31895
    },
    {
      "epoch": 0.000194671630859375,
      "step": 31895,
      "training_step_time": 0.6881375312805176
    },
    {
      "epoch": 0.000194677734375,
      "model_forward_time": 0.1233518123626709,
      "step": 31896
    },
    {
      "epoch": 0.000194677734375,
      "step": 31896,
      "training_step_time": 0.7428877353668213
    },
    {
      "epoch": 0.000194683837890625,
      "model_forward_time": 0.11689949035644531,
      "step": 31897
    },
    {
      "epoch": 0.000194683837890625,
      "step": 31897,
      "training_step_time": 0.6881742477416992
    },
    {
      "epoch": 0.00019468994140625,
      "model_forward_time": 0.1508793830871582,
      "step": 31898
    },
    {
      "epoch": 0.00019468994140625,
      "step": 31898,
      "training_step_time": 0.7654612064361572
    },
    {
      "epoch": 0.000194696044921875,
      "model_forward_time": 0.1194608211517334,
      "step": 31899
    },
    {
      "epoch": 0.000194696044921875,
      "step": 31899,
      "training_step_time": 0.6950745582580566
    },
    {
      "epoch": 0.0001947021484375,
      "grad_norm": 0.18349799513816833,
      "learning_rate": 4.889777608035273e-05,
      "loss": 0.0552,
      "step": 31900
    },
    {
      "epoch": 0.0001947021484375,
      "model_forward_time": 0.11640381813049316,
      "step": 31900
    },
    {
      "epoch": 0.0001947021484375,
      "step": 31900,
      "training_step_time": 0.6506001949310303
    },
    {
      "epoch": 0.000194708251953125,
      "model_forward_time": 0.11917757987976074,
      "step": 31901
    },
    {
      "epoch": 0.000194708251953125,
      "step": 31901,
      "training_step_time": 0.697378396987915
    },
    {
      "epoch": 0.00019471435546875,
      "model_forward_time": 0.11661434173583984,
      "step": 31902
    },
    {
      "epoch": 0.00019471435546875,
      "step": 31902,
      "training_step_time": 0.6689863204956055
    },
    {
      "epoch": 0.000194720458984375,
      "model_forward_time": 0.12166357040405273,
      "step": 31903
    },
    {
      "epoch": 0.000194720458984375,
      "step": 31903,
      "training_step_time": 0.696603536605835
    },
    {
      "epoch": 0.0001947265625,
      "model_forward_time": 0.1157984733581543,
      "step": 31904
    },
    {
      "epoch": 0.0001947265625,
      "step": 31904,
      "training_step_time": 0.696021556854248
    },
    {
      "epoch": 0.000194732666015625,
      "model_forward_time": 0.11866974830627441,
      "step": 31905
    },
    {
      "epoch": 0.000194732666015625,
      "step": 31905,
      "training_step_time": 0.6751186847686768
    },
    {
      "epoch": 0.00019473876953125,
      "model_forward_time": 0.1176614761352539,
      "step": 31906
    },
    {
      "epoch": 0.00019473876953125,
      "step": 31906,
      "training_step_time": 0.7085931301116943
    },
    {
      "epoch": 0.000194744873046875,
      "model_forward_time": 0.12468123435974121,
      "step": 31907
    },
    {
      "epoch": 0.000194744873046875,
      "step": 31907,
      "training_step_time": 0.6918182373046875
    },
    {
      "epoch": 0.0001947509765625,
      "model_forward_time": 0.12039422988891602,
      "step": 31908
    },
    {
      "epoch": 0.0001947509765625,
      "step": 31908,
      "training_step_time": 0.7133767604827881
    },
    {
      "epoch": 0.000194757080078125,
      "model_forward_time": 0.13782715797424316,
      "step": 31909
    },
    {
      "epoch": 0.000194757080078125,
      "step": 31909,
      "training_step_time": 0.7014660835266113
    },
    {
      "epoch": 0.00019476318359375,
      "grad_norm": 0.14387448132038116,
      "learning_rate": 4.887022511566814e-05,
      "loss": 0.0459,
      "step": 31910
    },
    {
      "epoch": 0.00019476318359375,
      "model_forward_time": 0.11916756629943848,
      "step": 31910
    },
    {
      "epoch": 0.00019476318359375,
      "step": 31910,
      "training_step_time": 0.650336503982544
    },
    {
      "epoch": 0.000194769287109375,
      "model_forward_time": 0.12199687957763672,
      "step": 31911
    },
    {
      "epoch": 0.000194769287109375,
      "step": 31911,
      "training_step_time": 0.6561996936798096
    },
    {
      "epoch": 0.000194775390625,
      "model_forward_time": 0.11816048622131348,
      "step": 31912
    },
    {
      "epoch": 0.000194775390625,
      "step": 31912,
      "training_step_time": 0.6424915790557861
    },
    {
      "epoch": 0.000194781494140625,
      "model_forward_time": 0.11556649208068848,
      "step": 31913
    },
    {
      "epoch": 0.000194781494140625,
      "step": 31913,
      "training_step_time": 0.7599687576293945
    },
    {
      "epoch": 0.00019478759765625,
      "model_forward_time": 0.12255024909973145,
      "step": 31914
    },
    {
      "epoch": 0.00019478759765625,
      "step": 31914,
      "training_step_time": 0.7573666572570801
    },
    {
      "epoch": 0.000194793701171875,
      "model_forward_time": 0.12216663360595703,
      "step": 31915
    },
    {
      "epoch": 0.000194793701171875,
      "step": 31915,
      "training_step_time": 0.6809771060943604
    },
    {
      "epoch": 0.0001947998046875,
      "model_forward_time": 0.11977338790893555,
      "step": 31916
    },
    {
      "epoch": 0.0001947998046875,
      "step": 31916,
      "training_step_time": 0.7033581733703613
    },
    {
      "epoch": 0.000194805908203125,
      "model_forward_time": 0.12365365028381348,
      "step": 31917
    },
    {
      "epoch": 0.000194805908203125,
      "step": 31917,
      "training_step_time": 0.6712212562561035
    },
    {
      "epoch": 0.00019481201171875,
      "model_forward_time": 0.12012052536010742,
      "step": 31918
    },
    {
      "epoch": 0.00019481201171875,
      "step": 31918,
      "training_step_time": 0.7414095401763916
    },
    {
      "epoch": 0.000194818115234375,
      "model_forward_time": 0.11922287940979004,
      "step": 31919
    },
    {
      "epoch": 0.000194818115234375,
      "step": 31919,
      "training_step_time": 0.6097211837768555
    },
    {
      "epoch": 0.00019482421875,
      "grad_norm": 0.11420352756977081,
      "learning_rate": 4.884267449417931e-05,
      "loss": 0.0458,
      "step": 31920
    },
    {
      "epoch": 0.00019482421875,
      "model_forward_time": 0.12024617195129395,
      "step": 31920
    },
    {
      "epoch": 0.00019482421875,
      "step": 31920,
      "training_step_time": 0.667654275894165
    },
    {
      "epoch": 0.000194830322265625,
      "model_forward_time": 0.11649084091186523,
      "step": 31921
    },
    {
      "epoch": 0.000194830322265625,
      "step": 31921,
      "training_step_time": 0.6525280475616455
    },
    {
      "epoch": 0.00019483642578125,
      "model_forward_time": 0.12040829658508301,
      "step": 31922
    },
    {
      "epoch": 0.00019483642578125,
      "step": 31922,
      "training_step_time": 0.6878683567047119
    },
    {
      "epoch": 0.000194842529296875,
      "model_forward_time": 0.12243032455444336,
      "step": 31923
    },
    {
      "epoch": 0.000194842529296875,
      "step": 31923,
      "training_step_time": 0.6852250099182129
    },
    {
      "epoch": 0.0001948486328125,
      "model_forward_time": 0.1195838451385498,
      "step": 31924
    },
    {
      "epoch": 0.0001948486328125,
      "step": 31924,
      "training_step_time": 0.7300174236297607
    },
    {
      "epoch": 0.000194854736328125,
      "model_forward_time": 0.11973166465759277,
      "step": 31925
    },
    {
      "epoch": 0.000194854736328125,
      "step": 31925,
      "training_step_time": 0.6445963382720947
    },
    {
      "epoch": 0.00019486083984375,
      "model_forward_time": 0.12302041053771973,
      "step": 31926
    },
    {
      "epoch": 0.00019486083984375,
      "step": 31926,
      "training_step_time": 0.604637622833252
    },
    {
      "epoch": 0.000194866943359375,
      "model_forward_time": 0.11986279487609863,
      "step": 31927
    },
    {
      "epoch": 0.000194866943359375,
      "step": 31927,
      "training_step_time": 0.7283194065093994
    },
    {
      "epoch": 0.000194873046875,
      "model_forward_time": 0.12087821960449219,
      "step": 31928
    },
    {
      "epoch": 0.000194873046875,
      "step": 31928,
      "training_step_time": 0.6543223857879639
    },
    {
      "epoch": 0.000194879150390625,
      "model_forward_time": 0.11964726448059082,
      "step": 31929
    },
    {
      "epoch": 0.000194879150390625,
      "step": 31929,
      "training_step_time": 0.5811266899108887
    },
    {
      "epoch": 0.00019488525390625,
      "grad_norm": 0.10574917495250702,
      "learning_rate": 4.881512422425541e-05,
      "loss": 0.0467,
      "step": 31930
    },
    {
      "epoch": 0.00019488525390625,
      "model_forward_time": 0.12301969528198242,
      "step": 31930
    },
    {
      "epoch": 0.00019488525390625,
      "step": 31930,
      "training_step_time": 0.5513417720794678
    },
    {
      "epoch": 0.000194891357421875,
      "model_forward_time": 0.11998653411865234,
      "step": 31931
    },
    {
      "epoch": 0.000194891357421875,
      "step": 31931,
      "training_step_time": 0.5506501197814941
    },
    {
      "epoch": 0.0001948974609375,
      "model_forward_time": 0.1209874153137207,
      "step": 31932
    },
    {
      "epoch": 0.0001948974609375,
      "step": 31932,
      "training_step_time": 0.537562370300293
    },
    {
      "epoch": 0.000194903564453125,
      "model_forward_time": 0.12009239196777344,
      "step": 31933
    },
    {
      "epoch": 0.000194903564453125,
      "step": 31933,
      "training_step_time": 0.5530118942260742
    },
    {
      "epoch": 0.00019490966796875,
      "model_forward_time": 0.11965250968933105,
      "step": 31934
    },
    {
      "epoch": 0.00019490966796875,
      "step": 31934,
      "training_step_time": 0.5257923603057861
    },
    {
      "epoch": 0.000194915771484375,
      "model_forward_time": 0.11844801902770996,
      "step": 31935
    },
    {
      "epoch": 0.000194915771484375,
      "step": 31935,
      "training_step_time": 0.5784077644348145
    },
    {
      "epoch": 0.000194921875,
      "model_forward_time": 0.12201309204101562,
      "step": 31936
    },
    {
      "epoch": 0.000194921875,
      "step": 31936,
      "training_step_time": 0.46059703826904297
    },
    {
      "epoch": 0.000194927978515625,
      "model_forward_time": 0.12017512321472168,
      "step": 31937
    },
    {
      "epoch": 0.000194927978515625,
      "step": 31937,
      "training_step_time": 0.48165392875671387
    },
    {
      "epoch": 0.00019493408203125,
      "model_forward_time": 0.11737775802612305,
      "step": 31938
    },
    {
      "epoch": 0.00019493408203125,
      "step": 31938,
      "training_step_time": 0.5747842788696289
    },
    {
      "epoch": 0.000194940185546875,
      "model_forward_time": 0.11533355712890625,
      "step": 31939
    },
    {
      "epoch": 0.000194940185546875,
      "step": 31939,
      "training_step_time": 0.48647117614746094
    },
    {
      "epoch": 0.0001949462890625,
      "grad_norm": 0.162383034825325,
      "learning_rate": 4.878757431426551e-05,
      "loss": 0.052,
      "step": 31940
    },
    {
      "epoch": 0.0001949462890625,
      "model_forward_time": 0.11626362800598145,
      "step": 31940
    },
    {
      "epoch": 0.0001949462890625,
      "step": 31940,
      "training_step_time": 0.40422677993774414
    },
    {
      "epoch": 0.000194952392578125,
      "model_forward_time": 0.11545467376708984,
      "step": 31941
    },
    {
      "epoch": 0.000194952392578125,
      "step": 31941,
      "training_step_time": 0.3959178924560547
    },
    {
      "epoch": 0.00019495849609375,
      "model_forward_time": 0.11569952964782715,
      "step": 31942
    },
    {
      "epoch": 0.00019495849609375,
      "step": 31942,
      "training_step_time": 0.3799717426300049
    },
    {
      "epoch": 0.000194964599609375,
      "model_forward_time": 0.11472177505493164,
      "step": 31943
    },
    {
      "epoch": 0.000194964599609375,
      "step": 31943,
      "training_step_time": 0.42305564880371094
    },
    {
      "epoch": 0.000194970703125,
      "model_forward_time": 0.11506485939025879,
      "step": 31944
    },
    {
      "epoch": 0.000194970703125,
      "step": 31944,
      "training_step_time": 0.3975718021392822
    },
    {
      "epoch": 0.000194976806640625,
      "model_forward_time": 0.11510634422302246,
      "step": 31945
    },
    {
      "epoch": 0.000194976806640625,
      "step": 31945,
      "training_step_time": 0.38538503646850586
    },
    {
      "epoch": 0.00019498291015625,
      "model_forward_time": 0.11495256423950195,
      "step": 31946
    },
    {
      "epoch": 0.00019498291015625,
      "step": 31946,
      "training_step_time": 0.3814375400543213
    },
    {
      "epoch": 0.000194989013671875,
      "model_forward_time": 0.11544060707092285,
      "step": 31947
    },
    {
      "epoch": 0.000194989013671875,
      "step": 31947,
      "training_step_time": 0.47289299964904785
    },
    {
      "epoch": 0.0001949951171875,
      "model_forward_time": 0.11615729331970215,
      "step": 31948
    },
    {
      "epoch": 0.0001949951171875,
      "step": 31948,
      "training_step_time": 0.496509313583374
    },
    {
      "epoch": 0.000195001220703125,
      "model_forward_time": 0.11496710777282715,
      "step": 31949
    },
    {
      "epoch": 0.000195001220703125,
      "step": 31949,
      "training_step_time": 0.4104583263397217
    },
    {
      "epoch": 0.00019500732421875,
      "grad_norm": 0.16203905642032623,
      "learning_rate": 4.87600247725785e-05,
      "loss": 0.0433,
      "step": 31950
    },
    {
      "epoch": 0.00019500732421875,
      "model_forward_time": 0.1153116226196289,
      "step": 31950
    },
    {
      "epoch": 0.00019500732421875,
      "step": 31950,
      "training_step_time": 0.40531373023986816
    },
    {
      "epoch": 0.000195013427734375,
      "model_forward_time": 0.11687302589416504,
      "step": 31951
    },
    {
      "epoch": 0.000195013427734375,
      "step": 31951,
      "training_step_time": 0.4082334041595459
    },
    {
      "epoch": 0.00019501953125,
      "model_forward_time": 0.11496138572692871,
      "step": 31952
    },
    {
      "epoch": 0.00019501953125,
      "step": 31952,
      "training_step_time": 0.36304378509521484
    },
    {
      "epoch": 0.000195025634765625,
      "model_forward_time": 0.11585783958435059,
      "step": 31953
    },
    {
      "epoch": 0.000195025634765625,
      "step": 31953,
      "training_step_time": 0.4362006187438965
    },
    {
      "epoch": 0.00019503173828125,
      "model_forward_time": 0.11474084854125977,
      "step": 31954
    },
    {
      "epoch": 0.00019503173828125,
      "step": 31954,
      "training_step_time": 0.409271240234375
    },
    {
      "epoch": 0.000195037841796875,
      "model_forward_time": 0.11525821685791016,
      "step": 31955
    },
    {
      "epoch": 0.000195037841796875,
      "step": 31955,
      "training_step_time": 0.39368438720703125
    },
    {
      "epoch": 0.0001950439453125,
      "model_forward_time": 0.11459827423095703,
      "step": 31956
    },
    {
      "epoch": 0.0001950439453125,
      "step": 31956,
      "training_step_time": 0.39533281326293945
    },
    {
      "epoch": 0.000195050048828125,
      "model_forward_time": 0.11477375030517578,
      "step": 31957
    },
    {
      "epoch": 0.000195050048828125,
      "step": 31957,
      "training_step_time": 0.3977208137512207
    },
    {
      "epoch": 0.00019505615234375,
      "model_forward_time": 0.11502480506896973,
      "step": 31958
    },
    {
      "epoch": 0.00019505615234375,
      "step": 31958,
      "training_step_time": 0.3718538284301758
    },
    {
      "epoch": 0.000195062255859375,
      "model_forward_time": 0.1154172420501709,
      "step": 31959
    },
    {
      "epoch": 0.000195062255859375,
      "step": 31959,
      "training_step_time": 0.39983701705932617
    },
    {
      "epoch": 0.000195068359375,
      "grad_norm": 0.17714624106884003,
      "learning_rate": 4.873247560756324e-05,
      "loss": 0.0557,
      "step": 31960
    },
    {
      "epoch": 0.000195068359375,
      "model_forward_time": 0.11582279205322266,
      "step": 31960
    },
    {
      "epoch": 0.000195068359375,
      "step": 31960,
      "training_step_time": 0.40161633491516113
    },
    {
      "epoch": 0.000195074462890625,
      "model_forward_time": 0.11547708511352539,
      "step": 31961
    },
    {
      "epoch": 0.000195074462890625,
      "step": 31961,
      "training_step_time": 0.4231224060058594
    },
    {
      "epoch": 0.00019508056640625,
      "model_forward_time": 0.11396622657775879,
      "step": 31962
    },
    {
      "epoch": 0.00019508056640625,
      "step": 31962,
      "training_step_time": 0.40953612327575684
    },
    {
      "epoch": 0.000195086669921875,
      "model_forward_time": 0.11539101600646973,
      "step": 31963
    },
    {
      "epoch": 0.000195086669921875,
      "step": 31963,
      "training_step_time": 0.4398953914642334
    },
    {
      "epoch": 0.0001950927734375,
      "model_forward_time": 0.11503720283508301,
      "step": 31964
    },
    {
      "epoch": 0.0001950927734375,
      "step": 31964,
      "training_step_time": 0.46549034118652344
    },
    {
      "epoch": 0.000195098876953125,
      "model_forward_time": 0.11551213264465332,
      "step": 31965
    },
    {
      "epoch": 0.000195098876953125,
      "step": 31965,
      "training_step_time": 0.4353506565093994
    },
    {
      "epoch": 0.00019510498046875,
      "model_forward_time": 0.11519455909729004,
      "step": 31966
    },
    {
      "epoch": 0.00019510498046875,
      "step": 31966,
      "training_step_time": 0.4838593006134033
    },
    {
      "epoch": 0.000195111083984375,
      "model_forward_time": 0.11509847640991211,
      "step": 31967
    },
    {
      "epoch": 0.000195111083984375,
      "step": 31967,
      "training_step_time": 0.39429616928100586
    },
    {
      "epoch": 0.0001951171875,
      "model_forward_time": 0.11631393432617188,
      "step": 31968
    },
    {
      "epoch": 0.0001951171875,
      "step": 31968,
      "training_step_time": 0.4249916076660156
    },
    {
      "epoch": 0.000195123291015625,
      "model_forward_time": 0.11550188064575195,
      "step": 31969
    },
    {
      "epoch": 0.000195123291015625,
      "step": 31969,
      "training_step_time": 0.40073299407958984
    },
    {
      "epoch": 0.00019512939453125,
      "grad_norm": 0.11823325604200363,
      "learning_rate": 4.870492682758841e-05,
      "loss": 0.0435,
      "step": 31970
    },
    {
      "epoch": 0.00019512939453125,
      "model_forward_time": 0.11503052711486816,
      "step": 31970
    },
    {
      "epoch": 0.00019512939453125,
      "step": 31970,
      "training_step_time": 0.36984848976135254
    },
    {
      "epoch": 0.000195135498046875,
      "model_forward_time": 0.1154181957244873,
      "step": 31971
    },
    {
      "epoch": 0.000195135498046875,
      "step": 31971,
      "training_step_time": 0.39980268478393555
    },
    {
      "epoch": 0.0001951416015625,
      "model_forward_time": 0.1155402660369873,
      "step": 31972
    },
    {
      "epoch": 0.0001951416015625,
      "step": 31972,
      "training_step_time": 0.4115266799926758
    },
    {
      "epoch": 0.000195147705078125,
      "model_forward_time": 0.11547636985778809,
      "step": 31973
    },
    {
      "epoch": 0.000195147705078125,
      "step": 31973,
      "training_step_time": 0.39841413497924805
    },
    {
      "epoch": 0.00019515380859375,
      "model_forward_time": 0.11505889892578125,
      "step": 31974
    },
    {
      "epoch": 0.00019515380859375,
      "step": 31974,
      "training_step_time": 0.3941829204559326
    },
    {
      "epoch": 0.000195159912109375,
      "model_forward_time": 0.11537003517150879,
      "step": 31975
    },
    {
      "epoch": 0.000195159912109375,
      "step": 31975,
      "training_step_time": 0.39715003967285156
    },
    {
      "epoch": 0.000195166015625,
      "model_forward_time": 0.11541128158569336,
      "step": 31976
    },
    {
      "epoch": 0.000195166015625,
      "step": 31976,
      "training_step_time": 0.3762640953063965
    },
    {
      "epoch": 0.000195172119140625,
      "model_forward_time": 0.11510229110717773,
      "step": 31977
    },
    {
      "epoch": 0.000195172119140625,
      "step": 31977,
      "training_step_time": 0.40268754959106445
    },
    {
      "epoch": 0.00019517822265625,
      "model_forward_time": 0.11538124084472656,
      "step": 31978
    },
    {
      "epoch": 0.00019517822265625,
      "step": 31978,
      "training_step_time": 0.502852201461792
    },
    {
      "epoch": 0.000195184326171875,
      "model_forward_time": 0.11507630348205566,
      "step": 31979
    },
    {
      "epoch": 0.000195184326171875,
      "step": 31979,
      "training_step_time": 0.456878662109375
    },
    {
      "epoch": 0.0001951904296875,
      "grad_norm": 0.11520133167505264,
      "learning_rate": 4.867737844102261e-05,
      "loss": 0.0478,
      "step": 31980
    },
    {
      "epoch": 0.0001951904296875,
      "model_forward_time": 0.11484265327453613,
      "step": 31980
    },
    {
      "epoch": 0.0001951904296875,
      "step": 31980,
      "training_step_time": 0.4544820785522461
    },
    {
      "epoch": 0.000195196533203125,
      "model_forward_time": 0.11555337905883789,
      "step": 31981
    },
    {
      "epoch": 0.000195196533203125,
      "step": 31981,
      "training_step_time": 0.46559977531433105
    },
    {
      "epoch": 0.00019520263671875,
      "model_forward_time": 0.11482429504394531,
      "step": 31982
    },
    {
      "epoch": 0.00019520263671875,
      "step": 31982,
      "training_step_time": 0.43473052978515625
    },
    {
      "epoch": 0.000195208740234375,
      "model_forward_time": 0.11473417282104492,
      "step": 31983
    },
    {
      "epoch": 0.000195208740234375,
      "step": 31983,
      "training_step_time": 0.48703885078430176
    },
    {
      "epoch": 0.00019521484375,
      "model_forward_time": 0.11423945426940918,
      "step": 31984
    },
    {
      "epoch": 0.00019521484375,
      "step": 31984,
      "training_step_time": 0.40620875358581543
    },
    {
      "epoch": 0.000195220947265625,
      "model_forward_time": 0.11453843116760254,
      "step": 31985
    },
    {
      "epoch": 0.000195220947265625,
      "step": 31985,
      "training_step_time": 0.3874843120574951
    },
    {
      "epoch": 0.00019522705078125,
      "model_forward_time": 0.11507368087768555,
      "step": 31986
    },
    {
      "epoch": 0.00019522705078125,
      "step": 31986,
      "training_step_time": 0.3897120952606201
    },
    {
      "epoch": 0.000195233154296875,
      "model_forward_time": 0.11494851112365723,
      "step": 31987
    },
    {
      "epoch": 0.000195233154296875,
      "step": 31987,
      "training_step_time": 0.39819812774658203
    },
    {
      "epoch": 0.0001952392578125,
      "model_forward_time": 0.11528444290161133,
      "step": 31988
    },
    {
      "epoch": 0.0001952392578125,
      "step": 31988,
      "training_step_time": 0.4095895290374756
    },
    {
      "epoch": 0.000195245361328125,
      "model_forward_time": 0.11457490921020508,
      "step": 31989
    },
    {
      "epoch": 0.000195245361328125,
      "step": 31989,
      "training_step_time": 0.3969604969024658
    },
    {
      "epoch": 0.00019525146484375,
      "grad_norm": 0.17247408628463745,
      "learning_rate": 4.864983045623434e-05,
      "loss": 0.0451,
      "step": 31990
    },
    {
      "epoch": 0.00019525146484375,
      "model_forward_time": 0.11505508422851562,
      "step": 31990
    },
    {
      "epoch": 0.00019525146484375,
      "step": 31990,
      "training_step_time": 0.40430736541748047
    },
    {
      "epoch": 0.000195257568359375,
      "model_forward_time": 0.11499691009521484,
      "step": 31991
    },
    {
      "epoch": 0.000195257568359375,
      "step": 31991,
      "training_step_time": 0.45410585403442383
    },
    {
      "epoch": 0.000195263671875,
      "model_forward_time": 0.11491799354553223,
      "step": 31992
    },
    {
      "epoch": 0.000195263671875,
      "step": 31992,
      "training_step_time": 0.4071826934814453
    },
    {
      "epoch": 0.000195269775390625,
      "model_forward_time": 0.11459016799926758,
      "step": 31993
    },
    {
      "epoch": 0.000195269775390625,
      "step": 31993,
      "training_step_time": 0.4342036247253418
    },
    {
      "epoch": 0.00019527587890625,
      "model_forward_time": 0.11543440818786621,
      "step": 31994
    },
    {
      "epoch": 0.00019527587890625,
      "step": 31994,
      "training_step_time": 0.4002652168273926
    },
    {
      "epoch": 0.000195281982421875,
      "model_forward_time": 0.11502599716186523,
      "step": 31995
    },
    {
      "epoch": 0.000195281982421875,
      "step": 31995,
      "training_step_time": 0.47795915603637695
    },
    {
      "epoch": 0.0001952880859375,
      "model_forward_time": 0.11580610275268555,
      "step": 31996
    },
    {
      "epoch": 0.0001952880859375,
      "step": 31996,
      "training_step_time": 0.4102597236633301
    },
    {
      "epoch": 0.000195294189453125,
      "model_forward_time": 0.11571049690246582,
      "step": 31997
    },
    {
      "epoch": 0.000195294189453125,
      "step": 31997,
      "training_step_time": 0.4550623893737793
    },
    {
      "epoch": 0.00019530029296875,
      "model_forward_time": 0.11528730392456055,
      "step": 31998
    },
    {
      "epoch": 0.00019530029296875,
      "step": 31998,
      "training_step_time": 0.419050931930542
    },
    {
      "epoch": 0.000195306396484375,
      "model_forward_time": 0.11475753784179688,
      "step": 31999
    },
    {
      "epoch": 0.000195306396484375,
      "step": 31999,
      "training_step_time": 0.40361785888671875
    },
    {
      "epoch": 0.0001953125,
      "grad_norm": 0.09712039679288864,
      "learning_rate": 4.8622282881591906e-05,
      "loss": 0.0461,
      "step": 32000
    },
    {
      "epoch": 0.0001953125,
      "model_forward_time": 0.11418414115905762,
      "step": 32000
    },
    {
      "epoch": 0.0001953125,
      "step": 32000,
      "training_step_time": 0.3559598922729492
    },
    {
      "epoch": 0.000195318603515625,
      "model_forward_time": 0.11254262924194336,
      "step": 32001
    },
    {
      "epoch": 0.000195318603515625,
      "step": 32001,
      "training_step_time": 0.3851490020751953
    },
    {
      "epoch": 0.00019532470703125,
      "model_forward_time": 0.11240839958190918,
      "step": 32002
    },
    {
      "epoch": 0.00019532470703125,
      "step": 32002,
      "training_step_time": 0.3839094638824463
    },
    {
      "epoch": 0.000195330810546875,
      "model_forward_time": 0.11324834823608398,
      "step": 32003
    },
    {
      "epoch": 0.000195330810546875,
      "step": 32003,
      "training_step_time": 0.3931620121002197
    },
    {
      "epoch": 0.0001953369140625,
      "model_forward_time": 0.11423492431640625,
      "step": 32004
    },
    {
      "epoch": 0.0001953369140625,
      "step": 32004,
      "training_step_time": 0.3856799602508545
    },
    {
      "epoch": 0.000195343017578125,
      "model_forward_time": 0.11474275588989258,
      "step": 32005
    },
    {
      "epoch": 0.000195343017578125,
      "step": 32005,
      "training_step_time": 0.38979125022888184
    },
    {
      "epoch": 0.00019534912109375,
      "model_forward_time": 0.11462068557739258,
      "step": 32006
    },
    {
      "epoch": 0.00019534912109375,
      "step": 32006,
      "training_step_time": 0.40218496322631836
    },
    {
      "epoch": 0.000195355224609375,
      "model_forward_time": 0.11477208137512207,
      "step": 32007
    },
    {
      "epoch": 0.000195355224609375,
      "step": 32007,
      "training_step_time": 0.44744062423706055
    },
    {
      "epoch": 0.000195361328125,
      "model_forward_time": 0.1157219409942627,
      "step": 32008
    },
    {
      "epoch": 0.000195361328125,
      "step": 32008,
      "training_step_time": 0.4447343349456787
    },
    {
      "epoch": 0.000195367431640625,
      "model_forward_time": 0.11545300483703613,
      "step": 32009
    },
    {
      "epoch": 0.000195367431640625,
      "step": 32009,
      "training_step_time": 0.4285128116607666
    },
    {
      "epoch": 0.00019537353515625,
      "grad_norm": 0.0922030657529831,
      "learning_rate": 4.8594735725463567e-05,
      "loss": 0.0406,
      "step": 32010
    },
    {
      "epoch": 0.00019537353515625,
      "model_forward_time": 0.11477470397949219,
      "step": 32010
    },
    {
      "epoch": 0.00019537353515625,
      "step": 32010,
      "training_step_time": 0.4025907516479492
    },
    {
      "epoch": 0.000195379638671875,
      "model_forward_time": 0.11456298828125,
      "step": 32011
    },
    {
      "epoch": 0.000195379638671875,
      "step": 32011,
      "training_step_time": 0.41653013229370117
    },
    {
      "epoch": 0.0001953857421875,
      "model_forward_time": 0.11485099792480469,
      "step": 32012
    },
    {
      "epoch": 0.0001953857421875,
      "step": 32012,
      "training_step_time": 0.3667137622833252
    },
    {
      "epoch": 0.000195391845703125,
      "model_forward_time": 0.11522817611694336,
      "step": 32013
    },
    {
      "epoch": 0.000195391845703125,
      "step": 32013,
      "training_step_time": 0.4430229663848877
    },
    {
      "epoch": 0.00019539794921875,
      "model_forward_time": 0.11495518684387207,
      "step": 32014
    },
    {
      "epoch": 0.00019539794921875,
      "step": 32014,
      "training_step_time": 0.42029762268066406
    },
    {
      "epoch": 0.000195404052734375,
      "model_forward_time": 0.11522603034973145,
      "step": 32015
    },
    {
      "epoch": 0.000195404052734375,
      "step": 32015,
      "training_step_time": 0.4104032516479492
    },
    {
      "epoch": 0.00019541015625,
      "model_forward_time": 0.11482691764831543,
      "step": 32016
    },
    {
      "epoch": 0.00019541015625,
      "step": 32016,
      "training_step_time": 0.39629507064819336
    },
    {
      "epoch": 0.000195416259765625,
      "model_forward_time": 0.1143941879272461,
      "step": 32017
    },
    {
      "epoch": 0.000195416259765625,
      "step": 32017,
      "training_step_time": 0.3931009769439697
    },
    {
      "epoch": 0.00019542236328125,
      "model_forward_time": 0.11542701721191406,
      "step": 32018
    },
    {
      "epoch": 0.00019542236328125,
      "step": 32018,
      "training_step_time": 0.4145317077636719
    },
    {
      "epoch": 0.000195428466796875,
      "model_forward_time": 0.11535024642944336,
      "step": 32019
    },
    {
      "epoch": 0.000195428466796875,
      "step": 32019,
      "training_step_time": 0.3893284797668457
    },
    {
      "epoch": 0.0001954345703125,
      "grad_norm": 0.10245279967784882,
      "learning_rate": 4.85671889962174e-05,
      "loss": 0.0445,
      "step": 32020
    },
    {
      "epoch": 0.0001954345703125,
      "model_forward_time": 0.11517453193664551,
      "step": 32020
    },
    {
      "epoch": 0.0001954345703125,
      "step": 32020,
      "training_step_time": 0.3919491767883301
    },
    {
      "epoch": 0.000195440673828125,
      "model_forward_time": 0.11567115783691406,
      "step": 32021
    },
    {
      "epoch": 0.000195440673828125,
      "step": 32021,
      "training_step_time": 0.5003466606140137
    },
    {
      "epoch": 0.00019544677734375,
      "model_forward_time": 0.11481285095214844,
      "step": 32022
    },
    {
      "epoch": 0.00019544677734375,
      "step": 32022,
      "training_step_time": 0.4110264778137207
    },
    {
      "epoch": 0.000195452880859375,
      "model_forward_time": 0.11587834358215332,
      "step": 32023
    },
    {
      "epoch": 0.000195452880859375,
      "step": 32023,
      "training_step_time": 0.40905070304870605
    },
    {
      "epoch": 0.000195458984375,
      "model_forward_time": 0.11497235298156738,
      "step": 32024
    },
    {
      "epoch": 0.000195458984375,
      "step": 32024,
      "training_step_time": 0.4322688579559326
    },
    {
      "epoch": 0.000195465087890625,
      "model_forward_time": 0.11470413208007812,
      "step": 32025
    },
    {
      "epoch": 0.000195465087890625,
      "step": 32025,
      "training_step_time": 0.40870046615600586
    },
    {
      "epoch": 0.00019547119140625,
      "model_forward_time": 0.11670136451721191,
      "step": 32026
    },
    {
      "epoch": 0.00019547119140625,
      "step": 32026,
      "training_step_time": 0.43149757385253906
    },
    {
      "epoch": 0.000195477294921875,
      "model_forward_time": 0.11543035507202148,
      "step": 32027
    },
    {
      "epoch": 0.000195477294921875,
      "step": 32027,
      "training_step_time": 0.3986847400665283
    },
    {
      "epoch": 0.0001954833984375,
      "model_forward_time": 0.11549735069274902,
      "step": 32028
    },
    {
      "epoch": 0.0001954833984375,
      "step": 32028,
      "training_step_time": 0.48211169242858887
    },
    {
      "epoch": 0.000195489501953125,
      "model_forward_time": 0.11509203910827637,
      "step": 32029
    },
    {
      "epoch": 0.000195489501953125,
      "step": 32029,
      "training_step_time": 0.4340097904205322
    },
    {
      "epoch": 0.00019549560546875,
      "grad_norm": 0.13346846401691437,
      "learning_rate": 4.85396427022214e-05,
      "loss": 0.0466,
      "step": 32030
    },
    {
      "epoch": 0.00019549560546875,
      "model_forward_time": 0.1147918701171875,
      "step": 32030
    },
    {
      "epoch": 0.00019549560546875,
      "step": 32030,
      "training_step_time": 0.3972904682159424
    },
    {
      "epoch": 0.000195501708984375,
      "model_forward_time": 0.11482405662536621,
      "step": 32031
    },
    {
      "epoch": 0.000195501708984375,
      "step": 32031,
      "training_step_time": 0.3906371593475342
    },
    {
      "epoch": 0.0001955078125,
      "model_forward_time": 0.11501765251159668,
      "step": 32032
    },
    {
      "epoch": 0.0001955078125,
      "step": 32032,
      "training_step_time": 0.40235304832458496
    },
    {
      "epoch": 0.000195513916015625,
      "model_forward_time": 0.11532115936279297,
      "step": 32033
    },
    {
      "epoch": 0.000195513916015625,
      "step": 32033,
      "training_step_time": 0.4021131992340088
    },
    {
      "epoch": 0.00019552001953125,
      "model_forward_time": 0.11500740051269531,
      "step": 32034
    },
    {
      "epoch": 0.00019552001953125,
      "step": 32034,
      "training_step_time": 0.3980083465576172
    },
    {
      "epoch": 0.000195526123046875,
      "model_forward_time": 0.11475586891174316,
      "step": 32035
    },
    {
      "epoch": 0.000195526123046875,
      "step": 32035,
      "training_step_time": 0.4116842746734619
    },
    {
      "epoch": 0.0001955322265625,
      "model_forward_time": 0.11507225036621094,
      "step": 32036
    },
    {
      "epoch": 0.0001955322265625,
      "step": 32036,
      "training_step_time": 0.4176163673400879
    },
    {
      "epoch": 0.000195538330078125,
      "model_forward_time": 0.1151266098022461,
      "step": 32037
    },
    {
      "epoch": 0.000195538330078125,
      "step": 32037,
      "training_step_time": 0.42630577087402344
    },
    {
      "epoch": 0.00019554443359375,
      "model_forward_time": 0.11466050148010254,
      "step": 32038
    },
    {
      "epoch": 0.00019554443359375,
      "step": 32038,
      "training_step_time": 0.4394052028656006
    },
    {
      "epoch": 0.000195550537109375,
      "model_forward_time": 0.11433839797973633,
      "step": 32039
    },
    {
      "epoch": 0.000195550537109375,
      "step": 32039,
      "training_step_time": 0.40453076362609863
    },
    {
      "epoch": 0.000195556640625,
      "grad_norm": 0.1300564557313919,
      "learning_rate": 4.851209685184338e-05,
      "loss": 0.0483,
      "step": 32040
    },
    {
      "epoch": 0.000195556640625,
      "model_forward_time": 0.1144263744354248,
      "step": 32040
    },
    {
      "epoch": 0.000195556640625,
      "step": 32040,
      "training_step_time": 0.490276575088501
    },
    {
      "epoch": 0.000195562744140625,
      "model_forward_time": 0.11533212661743164,
      "step": 32041
    },
    {
      "epoch": 0.000195562744140625,
      "step": 32041,
      "training_step_time": 0.3680918216705322
    },
    {
      "epoch": 0.00019556884765625,
      "model_forward_time": 0.11577606201171875,
      "step": 32042
    },
    {
      "epoch": 0.00019556884765625,
      "step": 32042,
      "training_step_time": 0.41774702072143555
    },
    {
      "epoch": 0.000195574951171875,
      "model_forward_time": 0.1149897575378418,
      "step": 32043
    },
    {
      "epoch": 0.000195574951171875,
      "step": 32043,
      "training_step_time": 0.5119500160217285
    },
    {
      "epoch": 0.0001955810546875,
      "model_forward_time": 0.11496877670288086,
      "step": 32044
    },
    {
      "epoch": 0.0001955810546875,
      "step": 32044,
      "training_step_time": 0.38257503509521484
    },
    {
      "epoch": 0.000195587158203125,
      "model_forward_time": 0.11476516723632812,
      "step": 32045
    },
    {
      "epoch": 0.000195587158203125,
      "step": 32045,
      "training_step_time": 0.3747432231903076
    },
    {
      "epoch": 0.00019559326171875,
      "model_forward_time": 0.11468124389648438,
      "step": 32046
    },
    {
      "epoch": 0.00019559326171875,
      "step": 32046,
      "training_step_time": 0.39870691299438477
    },
    {
      "epoch": 0.000195599365234375,
      "model_forward_time": 0.11496353149414062,
      "step": 32047
    },
    {
      "epoch": 0.000195599365234375,
      "step": 32047,
      "training_step_time": 0.4027690887451172
    },
    {
      "epoch": 0.00019560546875,
      "model_forward_time": 0.11518716812133789,
      "step": 32048
    },
    {
      "epoch": 0.00019560546875,
      "step": 32048,
      "training_step_time": 0.42940568923950195
    },
    {
      "epoch": 0.000195611572265625,
      "model_forward_time": 0.11523628234863281,
      "step": 32049
    },
    {
      "epoch": 0.000195611572265625,
      "step": 32049,
      "training_step_time": 0.4130265712738037
    },
    {
      "epoch": 0.00019561767578125,
      "grad_norm": 0.1378902941942215,
      "learning_rate": 4.848455145345105e-05,
      "loss": 0.0404,
      "step": 32050
    },
    {
      "epoch": 0.00019561767578125,
      "model_forward_time": 0.1149449348449707,
      "step": 32050
    },
    {
      "epoch": 0.00019561767578125,
      "step": 32050,
      "training_step_time": 0.457932710647583
    },
    {
      "epoch": 0.000195623779296875,
      "model_forward_time": 0.11562895774841309,
      "step": 32051
    },
    {
      "epoch": 0.000195623779296875,
      "step": 32051,
      "training_step_time": 0.3941214084625244
    },
    {
      "epoch": 0.0001956298828125,
      "model_forward_time": 0.11591577529907227,
      "step": 32052
    },
    {
      "epoch": 0.0001956298828125,
      "step": 32052,
      "training_step_time": 0.5072975158691406
    },
    {
      "epoch": 0.000195635986328125,
      "model_forward_time": 0.1152181625366211,
      "step": 32053
    },
    {
      "epoch": 0.000195635986328125,
      "step": 32053,
      "training_step_time": 0.4207313060760498
    },
    {
      "epoch": 0.00019564208984375,
      "model_forward_time": 0.115203857421875,
      "step": 32054
    },
    {
      "epoch": 0.00019564208984375,
      "step": 32054,
      "training_step_time": 0.39376139640808105
    },
    {
      "epoch": 0.000195648193359375,
      "model_forward_time": 0.11515665054321289,
      "step": 32055
    },
    {
      "epoch": 0.000195648193359375,
      "step": 32055,
      "training_step_time": 0.4135856628417969
    },
    {
      "epoch": 0.000195654296875,
      "model_forward_time": 0.11563897132873535,
      "step": 32056
    },
    {
      "epoch": 0.000195654296875,
      "step": 32056,
      "training_step_time": 0.46919846534729004
    },
    {
      "epoch": 0.000195660400390625,
      "model_forward_time": 0.11485624313354492,
      "step": 32057
    },
    {
      "epoch": 0.000195660400390625,
      "step": 32057,
      "training_step_time": 0.4376399517059326
    },
    {
      "epoch": 0.00019566650390625,
      "model_forward_time": 0.11519122123718262,
      "step": 32058
    },
    {
      "epoch": 0.00019566650390625,
      "step": 32058,
      "training_step_time": 0.4171106815338135
    },
    {
      "epoch": 0.000195672607421875,
      "model_forward_time": 0.11479854583740234,
      "step": 32059
    },
    {
      "epoch": 0.000195672607421875,
      "step": 32059,
      "training_step_time": 0.3919553756713867
    },
    {
      "epoch": 0.0001956787109375,
      "grad_norm": 0.12913846969604492,
      "learning_rate": 4.845700651541199e-05,
      "loss": 0.0436,
      "step": 32060
    },
    {
      "epoch": 0.0001956787109375,
      "model_forward_time": 0.11510920524597168,
      "step": 32060
    },
    {
      "epoch": 0.0001956787109375,
      "step": 32060,
      "training_step_time": 0.3849191665649414
    },
    {
      "epoch": 0.000195684814453125,
      "model_forward_time": 0.11497974395751953,
      "step": 32061
    },
    {
      "epoch": 0.000195684814453125,
      "step": 32061,
      "training_step_time": 0.3978698253631592
    },
    {
      "epoch": 0.00019569091796875,
      "model_forward_time": 0.11494207382202148,
      "step": 32062
    },
    {
      "epoch": 0.00019569091796875,
      "step": 32062,
      "training_step_time": 0.40610432624816895
    },
    {
      "epoch": 0.000195697021484375,
      "model_forward_time": 0.11482572555541992,
      "step": 32063
    },
    {
      "epoch": 0.000195697021484375,
      "step": 32063,
      "training_step_time": 0.40574026107788086
    },
    {
      "epoch": 0.000195703125,
      "model_forward_time": 0.1150362491607666,
      "step": 32064
    },
    {
      "epoch": 0.000195703125,
      "step": 32064,
      "training_step_time": 0.3950974941253662
    },
    {
      "epoch": 0.000195709228515625,
      "model_forward_time": 0.11547565460205078,
      "step": 32065
    },
    {
      "epoch": 0.000195709228515625,
      "step": 32065,
      "training_step_time": 0.41257286071777344
    },
    {
      "epoch": 0.00019571533203125,
      "model_forward_time": 0.11594104766845703,
      "step": 32066
    },
    {
      "epoch": 0.00019571533203125,
      "step": 32066,
      "training_step_time": 0.47849011421203613
    },
    {
      "epoch": 0.000195721435546875,
      "model_forward_time": 0.11466312408447266,
      "step": 32067
    },
    {
      "epoch": 0.000195721435546875,
      "step": 32067,
      "training_step_time": 0.4035756587982178
    },
    {
      "epoch": 0.0001957275390625,
      "model_forward_time": 0.11510872840881348,
      "step": 32068
    },
    {
      "epoch": 0.0001957275390625,
      "step": 32068,
      "training_step_time": 0.4033639430999756
    },
    {
      "epoch": 0.000195733642578125,
      "model_forward_time": 0.11448121070861816,
      "step": 32069
    },
    {
      "epoch": 0.000195733642578125,
      "step": 32069,
      "training_step_time": 0.46758151054382324
    },
    {
      "epoch": 0.00019573974609375,
      "grad_norm": 0.12568511068820953,
      "learning_rate": 4.8429462046093585e-05,
      "loss": 0.0414,
      "step": 32070
    },
    {
      "epoch": 0.00019573974609375,
      "model_forward_time": 0.11511349678039551,
      "step": 32070
    },
    {
      "epoch": 0.00019573974609375,
      "step": 32070,
      "training_step_time": 0.3683431148529053
    },
    {
      "epoch": 0.000195745849609375,
      "model_forward_time": 0.1154322624206543,
      "step": 32071
    },
    {
      "epoch": 0.000195745849609375,
      "step": 32071,
      "training_step_time": 0.461780309677124
    },
    {
      "epoch": 0.000195751953125,
      "model_forward_time": 0.11569380760192871,
      "step": 32072
    },
    {
      "epoch": 0.000195751953125,
      "step": 32072,
      "training_step_time": 0.3993794918060303
    },
    {
      "epoch": 0.000195758056640625,
      "model_forward_time": 0.11517333984375,
      "step": 32073
    },
    {
      "epoch": 0.000195758056640625,
      "step": 32073,
      "training_step_time": 0.3947577476501465
    },
    {
      "epoch": 0.00019576416015625,
      "model_forward_time": 0.11585760116577148,
      "step": 32074
    },
    {
      "epoch": 0.00019576416015625,
      "step": 32074,
      "training_step_time": 0.3994119167327881
    },
    {
      "epoch": 0.000195770263671875,
      "model_forward_time": 0.11460161209106445,
      "step": 32075
    },
    {
      "epoch": 0.000195770263671875,
      "step": 32075,
      "training_step_time": 0.38568878173828125
    },
    {
      "epoch": 0.0001957763671875,
      "model_forward_time": 0.11495327949523926,
      "step": 32076
    },
    {
      "epoch": 0.0001957763671875,
      "step": 32076,
      "training_step_time": 0.41799497604370117
    },
    {
      "epoch": 0.000195782470703125,
      "model_forward_time": 0.11464548110961914,
      "step": 32077
    },
    {
      "epoch": 0.000195782470703125,
      "step": 32077,
      "training_step_time": 0.46303534507751465
    },
    {
      "epoch": 0.00019578857421875,
      "model_forward_time": 0.11567807197570801,
      "step": 32078
    },
    {
      "epoch": 0.00019578857421875,
      "step": 32078,
      "training_step_time": 0.38651442527770996
    },
    {
      "epoch": 0.000195794677734375,
      "model_forward_time": 0.11537313461303711,
      "step": 32079
    },
    {
      "epoch": 0.000195794677734375,
      "step": 32079,
      "training_step_time": 0.46362757682800293
    },
    {
      "epoch": 0.00019580078125,
      "grad_norm": 0.14358042180538177,
      "learning_rate": 4.840191805386315e-05,
      "loss": 0.0497,
      "step": 32080
    },
    {
      "epoch": 0.00019580078125,
      "model_forward_time": 0.11505484580993652,
      "step": 32080
    },
    {
      "epoch": 0.00019580078125,
      "step": 32080,
      "training_step_time": 0.4412119388580322
    },
    {
      "epoch": 0.000195806884765625,
      "model_forward_time": 0.11551737785339355,
      "step": 32081
    },
    {
      "epoch": 0.000195806884765625,
      "step": 32081,
      "training_step_time": 0.4048502445220947
    },
    {
      "epoch": 0.00019581298828125,
      "model_forward_time": 0.11542749404907227,
      "step": 32082
    },
    {
      "epoch": 0.00019581298828125,
      "step": 32082,
      "training_step_time": 0.4075441360473633
    },
    {
      "epoch": 0.000195819091796875,
      "model_forward_time": 0.11541008949279785,
      "step": 32083
    },
    {
      "epoch": 0.000195819091796875,
      "step": 32083,
      "training_step_time": 0.44564032554626465
    },
    {
      "epoch": 0.0001958251953125,
      "model_forward_time": 0.11535882949829102,
      "step": 32084
    },
    {
      "epoch": 0.0001958251953125,
      "step": 32084,
      "training_step_time": 0.4028651714324951
    },
    {
      "epoch": 0.000195831298828125,
      "model_forward_time": 0.11620306968688965,
      "step": 32085
    },
    {
      "epoch": 0.000195831298828125,
      "step": 32085,
      "training_step_time": 0.40666985511779785
    },
    {
      "epoch": 0.00019583740234375,
      "model_forward_time": 0.1162862777709961,
      "step": 32086
    },
    {
      "epoch": 0.00019583740234375,
      "step": 32086,
      "training_step_time": 0.4966604709625244
    },
    {
      "epoch": 0.000195843505859375,
      "model_forward_time": 0.11535024642944336,
      "step": 32087
    },
    {
      "epoch": 0.000195843505859375,
      "step": 32087,
      "training_step_time": 0.42645931243896484
    },
    {
      "epoch": 0.000195849609375,
      "model_forward_time": 0.11640644073486328,
      "step": 32088
    },
    {
      "epoch": 0.000195849609375,
      "step": 32088,
      "training_step_time": 0.39989233016967773
    },
    {
      "epoch": 0.000195855712890625,
      "model_forward_time": 0.11571216583251953,
      "step": 32089
    },
    {
      "epoch": 0.000195855712890625,
      "step": 32089,
      "training_step_time": 0.3951911926269531
    },
    {
      "epoch": 0.00019586181640625,
      "grad_norm": 0.16350157558918,
      "learning_rate": 4.837437454708784e-05,
      "loss": 0.048,
      "step": 32090
    },
    {
      "epoch": 0.00019586181640625,
      "model_forward_time": 0.11558413505554199,
      "step": 32090
    },
    {
      "epoch": 0.00019586181640625,
      "step": 32090,
      "training_step_time": 0.4069962501525879
    },
    {
      "epoch": 0.000195867919921875,
      "model_forward_time": 0.11548972129821777,
      "step": 32091
    },
    {
      "epoch": 0.000195867919921875,
      "step": 32091,
      "training_step_time": 0.4044482707977295
    },
    {
      "epoch": 0.0001958740234375,
      "model_forward_time": 0.11502766609191895,
      "step": 32092
    },
    {
      "epoch": 0.0001958740234375,
      "step": 32092,
      "training_step_time": 0.39829587936401367
    },
    {
      "epoch": 0.000195880126953125,
      "model_forward_time": 0.11639094352722168,
      "step": 32093
    },
    {
      "epoch": 0.000195880126953125,
      "step": 32093,
      "training_step_time": 0.4276556968688965
    },
    {
      "epoch": 0.00019588623046875,
      "model_forward_time": 0.11582040786743164,
      "step": 32094
    },
    {
      "epoch": 0.00019588623046875,
      "step": 32094,
      "training_step_time": 0.40437841415405273
    },
    {
      "epoch": 0.000195892333984375,
      "model_forward_time": 0.11544561386108398,
      "step": 32095
    },
    {
      "epoch": 0.000195892333984375,
      "step": 32095,
      "training_step_time": 0.438035249710083
    },
    {
      "epoch": 0.0001958984375,
      "model_forward_time": 0.11523675918579102,
      "step": 32096
    },
    {
      "epoch": 0.0001958984375,
      "step": 32096,
      "training_step_time": 0.3884246349334717
    },
    {
      "epoch": 0.000195904541015625,
      "model_forward_time": 0.11519861221313477,
      "step": 32097
    },
    {
      "epoch": 0.000195904541015625,
      "step": 32097,
      "training_step_time": 0.4016530513763428
    },
    {
      "epoch": 0.00019591064453125,
      "model_forward_time": 0.11482787132263184,
      "step": 32098
    },
    {
      "epoch": 0.00019591064453125,
      "step": 32098,
      "training_step_time": 0.4220890998840332
    },
    {
      "epoch": 0.000195916748046875,
      "model_forward_time": 0.11508297920227051,
      "step": 32099
    },
    {
      "epoch": 0.000195916748046875,
      "step": 32099,
      "training_step_time": 0.49330806732177734
    },
    {
      "epoch": 0.0001959228515625,
      "grad_norm": 0.15692190825939178,
      "learning_rate": 4.834683153413459e-05,
      "loss": 0.0446,
      "step": 32100
    },
    {
      "epoch": 0.0001959228515625,
      "model_forward_time": 0.11503958702087402,
      "step": 32100
    },
    {
      "epoch": 0.0001959228515625,
      "step": 32100,
      "training_step_time": 0.466249942779541
    },
    {
      "epoch": 0.000195928955078125,
      "model_forward_time": 0.11570334434509277,
      "step": 32101
    },
    {
      "epoch": 0.000195928955078125,
      "step": 32101,
      "training_step_time": 0.5088455677032471
    },
    {
      "epoch": 0.00019593505859375,
      "model_forward_time": 0.11588478088378906,
      "step": 32102
    },
    {
      "epoch": 0.00019593505859375,
      "step": 32102,
      "training_step_time": 0.3863041400909424
    },
    {
      "epoch": 0.000195941162109375,
      "model_forward_time": 0.11518335342407227,
      "step": 32103
    },
    {
      "epoch": 0.000195941162109375,
      "step": 32103,
      "training_step_time": 0.4222524166107178
    },
    {
      "epoch": 0.000195947265625,
      "model_forward_time": 0.11495780944824219,
      "step": 32104
    },
    {
      "epoch": 0.000195947265625,
      "step": 32104,
      "training_step_time": 0.41005539894104004
    },
    {
      "epoch": 0.000195953369140625,
      "model_forward_time": 0.11487340927124023,
      "step": 32105
    },
    {
      "epoch": 0.000195953369140625,
      "step": 32105,
      "training_step_time": 0.3965108394622803
    },
    {
      "epoch": 0.00019595947265625,
      "model_forward_time": 0.11557579040527344,
      "step": 32106
    },
    {
      "epoch": 0.00019595947265625,
      "step": 32106,
      "training_step_time": 0.40255022048950195
    },
    {
      "epoch": 0.000195965576171875,
      "model_forward_time": 0.11536383628845215,
      "step": 32107
    },
    {
      "epoch": 0.000195965576171875,
      "step": 32107,
      "training_step_time": 0.6877129077911377
    },
    {
      "epoch": 0.0001959716796875,
      "model_forward_time": 0.11499595642089844,
      "step": 32108
    },
    {
      "epoch": 0.0001959716796875,
      "step": 32108,
      "training_step_time": 0.40734338760375977
    },
    {
      "epoch": 0.000195977783203125,
      "model_forward_time": 0.1146094799041748,
      "step": 32109
    },
    {
      "epoch": 0.000195977783203125,
      "step": 32109,
      "training_step_time": 0.4806358814239502
    },
    {
      "epoch": 0.00019598388671875,
      "grad_norm": 0.10638392716646194,
      "learning_rate": 4.8319289023370296e-05,
      "loss": 0.0453,
      "step": 32110
    },
    {
      "epoch": 0.00019598388671875,
      "model_forward_time": 0.11462259292602539,
      "step": 32110
    },
    {
      "epoch": 0.00019598388671875,
      "step": 32110,
      "training_step_time": 0.3900759220123291
    },
    {
      "epoch": 0.000195989990234375,
      "model_forward_time": 0.11507320404052734,
      "step": 32111
    },
    {
      "epoch": 0.000195989990234375,
      "step": 32111,
      "training_step_time": 0.41241931915283203
    },
    {
      "epoch": 0.00019599609375,
      "model_forward_time": 0.11485910415649414,
      "step": 32112
    },
    {
      "epoch": 0.00019599609375,
      "step": 32112,
      "training_step_time": 0.43535351753234863
    },
    {
      "epoch": 0.000196002197265625,
      "model_forward_time": 0.11464715003967285,
      "step": 32113
    },
    {
      "epoch": 0.000196002197265625,
      "step": 32113,
      "training_step_time": 0.5335257053375244
    },
    {
      "epoch": 0.00019600830078125,
      "model_forward_time": 0.11504197120666504,
      "step": 32114
    },
    {
      "epoch": 0.00019600830078125,
      "step": 32114,
      "training_step_time": 0.473616361618042
    },
    {
      "epoch": 0.000196014404296875,
      "model_forward_time": 0.1149592399597168,
      "step": 32115
    },
    {
      "epoch": 0.000196014404296875,
      "step": 32115,
      "training_step_time": 0.39443159103393555
    },
    {
      "epoch": 0.0001960205078125,
      "model_forward_time": 0.11461305618286133,
      "step": 32116
    },
    {
      "epoch": 0.0001960205078125,
      "step": 32116,
      "training_step_time": 0.42424631118774414
    },
    {
      "epoch": 0.000196026611328125,
      "model_forward_time": 0.11455488204956055,
      "step": 32117
    },
    {
      "epoch": 0.000196026611328125,
      "step": 32117,
      "training_step_time": 0.4561948776245117
    },
    {
      "epoch": 0.00019603271484375,
      "model_forward_time": 0.11472511291503906,
      "step": 32118
    },
    {
      "epoch": 0.00019603271484375,
      "step": 32118,
      "training_step_time": 0.38918590545654297
    },
    {
      "epoch": 0.000196038818359375,
      "model_forward_time": 0.11519217491149902,
      "step": 32119
    },
    {
      "epoch": 0.000196038818359375,
      "step": 32119,
      "training_step_time": 0.45340681076049805
    },
    {
      "epoch": 0.000196044921875,
      "grad_norm": 0.11134807765483856,
      "learning_rate": 4.82917470231616e-05,
      "loss": 0.0485,
      "step": 32120
    },
    {
      "epoch": 0.000196044921875,
      "model_forward_time": 0.11527419090270996,
      "step": 32120
    },
    {
      "epoch": 0.000196044921875,
      "step": 32120,
      "training_step_time": 0.37811827659606934
    },
    {
      "epoch": 0.000196051025390625,
      "model_forward_time": 0.11506295204162598,
      "step": 32121
    },
    {
      "epoch": 0.000196051025390625,
      "step": 32121,
      "training_step_time": 0.49394798278808594
    },
    {
      "epoch": 0.00019605712890625,
      "model_forward_time": 0.11487221717834473,
      "step": 32122
    },
    {
      "epoch": 0.00019605712890625,
      "step": 32122,
      "training_step_time": 0.39893269538879395
    },
    {
      "epoch": 0.000196063232421875,
      "model_forward_time": 0.11522245407104492,
      "step": 32123
    },
    {
      "epoch": 0.000196063232421875,
      "step": 32123,
      "training_step_time": 0.3919236660003662
    },
    {
      "epoch": 0.0001960693359375,
      "model_forward_time": 0.11510753631591797,
      "step": 32124
    },
    {
      "epoch": 0.0001960693359375,
      "step": 32124,
      "training_step_time": 0.3826777935028076
    },
    {
      "epoch": 0.000196075439453125,
      "model_forward_time": 0.11521649360656738,
      "step": 32125
    },
    {
      "epoch": 0.000196075439453125,
      "step": 32125,
      "training_step_time": 0.6430172920227051
    },
    {
      "epoch": 0.00019608154296875,
      "model_forward_time": 0.11510109901428223,
      "step": 32126
    },
    {
      "epoch": 0.00019608154296875,
      "step": 32126,
      "training_step_time": 0.4384734630584717
    },
    {
      "epoch": 0.000196087646484375,
      "model_forward_time": 0.11574864387512207,
      "step": 32127
    },
    {
      "epoch": 0.000196087646484375,
      "step": 32127,
      "training_step_time": 0.437441349029541
    },
    {
      "epoch": 0.00019609375,
      "model_forward_time": 0.11521744728088379,
      "step": 32128
    },
    {
      "epoch": 0.00019609375,
      "step": 32128,
      "training_step_time": 0.48557591438293457
    },
    {
      "epoch": 0.000196099853515625,
      "model_forward_time": 0.1147301197052002,
      "step": 32129
    },
    {
      "epoch": 0.000196099853515625,
      "step": 32129,
      "training_step_time": 0.42420530319213867
    },
    {
      "epoch": 0.00019610595703125,
      "grad_norm": 0.09461110085248947,
      "learning_rate": 4.826420554187506e-05,
      "loss": 0.0419,
      "step": 32130
    },
    {
      "epoch": 0.00019610595703125,
      "model_forward_time": 0.11431574821472168,
      "step": 32130
    },
    {
      "epoch": 0.00019610595703125,
      "step": 32130,
      "training_step_time": 0.45125865936279297
    },
    {
      "epoch": 0.000196112060546875,
      "model_forward_time": 0.11461067199707031,
      "step": 32131
    },
    {
      "epoch": 0.000196112060546875,
      "step": 32131,
      "training_step_time": 0.48052287101745605
    },
    {
      "epoch": 0.0001961181640625,
      "model_forward_time": 0.11658644676208496,
      "step": 32132
    },
    {
      "epoch": 0.0001961181640625,
      "step": 32132,
      "training_step_time": 0.3949275016784668
    },
    {
      "epoch": 0.000196124267578125,
      "model_forward_time": 0.11493778228759766,
      "step": 32133
    },
    {
      "epoch": 0.000196124267578125,
      "step": 32133,
      "training_step_time": 0.409257173538208
    },
    {
      "epoch": 0.00019613037109375,
      "model_forward_time": 0.11511707305908203,
      "step": 32134
    },
    {
      "epoch": 0.00019613037109375,
      "step": 32134,
      "training_step_time": 0.402482271194458
    },
    {
      "epoch": 0.000196136474609375,
      "model_forward_time": 0.11522316932678223,
      "step": 32135
    },
    {
      "epoch": 0.000196136474609375,
      "step": 32135,
      "training_step_time": 0.40875959396362305
    },
    {
      "epoch": 0.000196142578125,
      "model_forward_time": 0.11523079872131348,
      "step": 32136
    },
    {
      "epoch": 0.000196142578125,
      "step": 32136,
      "training_step_time": 0.5188307762145996
    },
    {
      "epoch": 0.000196148681640625,
      "model_forward_time": 0.11510396003723145,
      "step": 32137
    },
    {
      "epoch": 0.000196148681640625,
      "step": 32137,
      "training_step_time": 0.45008397102355957
    },
    {
      "epoch": 0.00019615478515625,
      "model_forward_time": 0.11520981788635254,
      "step": 32138
    },
    {
      "epoch": 0.00019615478515625,
      "step": 32138,
      "training_step_time": 0.4107029438018799
    },
    {
      "epoch": 0.000196160888671875,
      "model_forward_time": 0.11546087265014648,
      "step": 32139
    },
    {
      "epoch": 0.000196160888671875,
      "step": 32139,
      "training_step_time": 0.4349706172943115
    },
    {
      "epoch": 0.0001961669921875,
      "grad_norm": 0.12778322398662567,
      "learning_rate": 4.823666458787705e-05,
      "loss": 0.0465,
      "step": 32140
    },
    {
      "epoch": 0.0001961669921875,
      "model_forward_time": 0.11488127708435059,
      "step": 32140
    },
    {
      "epoch": 0.0001961669921875,
      "step": 32140,
      "training_step_time": 0.41445231437683105
    },
    {
      "epoch": 0.000196173095703125,
      "model_forward_time": 0.11458873748779297,
      "step": 32141
    },
    {
      "epoch": 0.000196173095703125,
      "step": 32141,
      "training_step_time": 0.4883294105529785
    },
    {
      "epoch": 0.00019617919921875,
      "model_forward_time": 0.11522889137268066,
      "step": 32142
    },
    {
      "epoch": 0.00019617919921875,
      "step": 32142,
      "training_step_time": 0.4703657627105713
    },
    {
      "epoch": 0.000196185302734375,
      "model_forward_time": 0.11530900001525879,
      "step": 32143
    },
    {
      "epoch": 0.000196185302734375,
      "step": 32143,
      "training_step_time": 0.42369747161865234
    },
    {
      "epoch": 0.00019619140625,
      "model_forward_time": 0.11545705795288086,
      "step": 32144
    },
    {
      "epoch": 0.00019619140625,
      "step": 32144,
      "training_step_time": 0.377913236618042
    },
    {
      "epoch": 0.000196197509765625,
      "model_forward_time": 0.11606287956237793,
      "step": 32145
    },
    {
      "epoch": 0.000196197509765625,
      "step": 32145,
      "training_step_time": 0.3891627788543701
    },
    {
      "epoch": 0.00019620361328125,
      "model_forward_time": 0.11526727676391602,
      "step": 32146
    },
    {
      "epoch": 0.00019620361328125,
      "step": 32146,
      "training_step_time": 0.39548730850219727
    },
    {
      "epoch": 0.000196209716796875,
      "model_forward_time": 0.11569070816040039,
      "step": 32147
    },
    {
      "epoch": 0.000196209716796875,
      "step": 32147,
      "training_step_time": 0.3894507884979248
    },
    {
      "epoch": 0.0001962158203125,
      "model_forward_time": 0.11499190330505371,
      "step": 32148
    },
    {
      "epoch": 0.0001962158203125,
      "step": 32148,
      "training_step_time": 0.3915097713470459
    },
    {
      "epoch": 0.000196221923828125,
      "model_forward_time": 0.11545729637145996,
      "step": 32149
    },
    {
      "epoch": 0.000196221923828125,
      "step": 32149,
      "training_step_time": 0.5726284980773926
    },
    {
      "epoch": 0.00019622802734375,
      "grad_norm": 0.14405980706214905,
      "learning_rate": 4.820912416953377e-05,
      "loss": 0.0442,
      "step": 32150
    },
    {
      "epoch": 0.00019622802734375,
      "model_forward_time": 0.11548900604248047,
      "step": 32150
    },
    {
      "epoch": 0.00019622802734375,
      "step": 32150,
      "training_step_time": 0.4924740791320801
    },
    {
      "epoch": 0.000196234130859375,
      "model_forward_time": 0.1144561767578125,
      "step": 32151
    },
    {
      "epoch": 0.000196234130859375,
      "step": 32151,
      "training_step_time": 0.38594841957092285
    },
    {
      "epoch": 0.000196240234375,
      "model_forward_time": 0.1156928539276123,
      "step": 32152
    },
    {
      "epoch": 0.000196240234375,
      "step": 32152,
      "training_step_time": 0.3905375003814697
    },
    {
      "epoch": 0.000196246337890625,
      "model_forward_time": 0.11501336097717285,
      "step": 32153
    },
    {
      "epoch": 0.000196246337890625,
      "step": 32153,
      "training_step_time": 0.3952620029449463
    },
    {
      "epoch": 0.00019625244140625,
      "model_forward_time": 0.11507344245910645,
      "step": 32154
    },
    {
      "epoch": 0.00019625244140625,
      "step": 32154,
      "training_step_time": 0.40050268173217773
    },
    {
      "epoch": 0.000196258544921875,
      "model_forward_time": 0.11658787727355957,
      "step": 32155
    },
    {
      "epoch": 0.000196258544921875,
      "step": 32155,
      "training_step_time": 0.5851521492004395
    },
    {
      "epoch": 0.0001962646484375,
      "model_forward_time": 0.11481475830078125,
      "step": 32156
    },
    {
      "epoch": 0.0001962646484375,
      "step": 32156,
      "training_step_time": 0.4069674015045166
    },
    {
      "epoch": 0.000196270751953125,
      "model_forward_time": 0.11548972129821777,
      "step": 32157
    },
    {
      "epoch": 0.000196270751953125,
      "step": 32157,
      "training_step_time": 0.4706454277038574
    },
    {
      "epoch": 0.00019627685546875,
      "model_forward_time": 0.11519122123718262,
      "step": 32158
    },
    {
      "epoch": 0.00019627685546875,
      "step": 32158,
      "training_step_time": 0.3910050392150879
    },
    {
      "epoch": 0.000196282958984375,
      "model_forward_time": 0.11451005935668945,
      "step": 32159
    },
    {
      "epoch": 0.000196282958984375,
      "step": 32159,
      "training_step_time": 0.3955214023590088
    },
    {
      "epoch": 0.0001962890625,
      "grad_norm": 0.1358667016029358,
      "learning_rate": 4.818158429521129e-05,
      "loss": 0.0459,
      "step": 32160
    },
    {
      "epoch": 0.0001962890625,
      "model_forward_time": 0.1143186092376709,
      "step": 32160
    },
    {
      "epoch": 0.0001962890625,
      "step": 32160,
      "training_step_time": 0.3948802947998047
    },
    {
      "epoch": 0.000196295166015625,
      "model_forward_time": 0.11509394645690918,
      "step": 32161
    },
    {
      "epoch": 0.000196295166015625,
      "step": 32161,
      "training_step_time": 0.5781388282775879
    },
    {
      "epoch": 0.00019630126953125,
      "model_forward_time": 0.11571788787841797,
      "step": 32162
    },
    {
      "epoch": 0.00019630126953125,
      "step": 32162,
      "training_step_time": 0.38353943824768066
    },
    {
      "epoch": 0.000196307373046875,
      "model_forward_time": 0.11606407165527344,
      "step": 32163
    },
    {
      "epoch": 0.000196307373046875,
      "step": 32163,
      "training_step_time": 0.40073537826538086
    },
    {
      "epoch": 0.0001963134765625,
      "model_forward_time": 0.11423516273498535,
      "step": 32164
    },
    {
      "epoch": 0.0001963134765625,
      "step": 32164,
      "training_step_time": 0.41704440116882324
    },
    {
      "epoch": 0.000196319580078125,
      "model_forward_time": 0.11549735069274902,
      "step": 32165
    },
    {
      "epoch": 0.000196319580078125,
      "step": 32165,
      "training_step_time": 0.4465956687927246
    },
    {
      "epoch": 0.00019632568359375,
      "model_forward_time": 0.11494231224060059,
      "step": 32166
    },
    {
      "epoch": 0.00019632568359375,
      "step": 32166,
      "training_step_time": 0.3925313949584961
    },
    {
      "epoch": 0.000196331787109375,
      "model_forward_time": 0.11503982543945312,
      "step": 32167
    },
    {
      "epoch": 0.000196331787109375,
      "step": 32167,
      "training_step_time": 0.5906994342803955
    },
    {
      "epoch": 0.000196337890625,
      "model_forward_time": 0.11477828025817871,
      "step": 32168
    },
    {
      "epoch": 0.000196337890625,
      "step": 32168,
      "training_step_time": 0.45232725143432617
    },
    {
      "epoch": 0.000196343994140625,
      "model_forward_time": 0.11471915245056152,
      "step": 32169
    },
    {
      "epoch": 0.000196343994140625,
      "step": 32169,
      "training_step_time": 0.3745126724243164
    },
    {
      "epoch": 0.00019635009765625,
      "grad_norm": 0.14060455560684204,
      "learning_rate": 4.8154044973275484e-05,
      "loss": 0.0503,
      "step": 32170
    },
    {
      "epoch": 0.00019635009765625,
      "model_forward_time": 0.11468338966369629,
      "step": 32170
    },
    {
      "epoch": 0.00019635009765625,
      "step": 32170,
      "training_step_time": 0.40165233612060547
    },
    {
      "epoch": 0.000196356201171875,
      "model_forward_time": 0.11504864692687988,
      "step": 32171
    },
    {
      "epoch": 0.000196356201171875,
      "step": 32171,
      "training_step_time": 0.4162020683288574
    },
    {
      "epoch": 0.0001963623046875,
      "model_forward_time": 0.11447882652282715,
      "step": 32172
    },
    {
      "epoch": 0.0001963623046875,
      "step": 32172,
      "training_step_time": 0.4021003246307373
    },
    {
      "epoch": 0.000196368408203125,
      "model_forward_time": 0.11529064178466797,
      "step": 32173
    },
    {
      "epoch": 0.000196368408203125,
      "step": 32173,
      "training_step_time": 0.5422754287719727
    },
    {
      "epoch": 0.00019637451171875,
      "model_forward_time": 0.11524605751037598,
      "step": 32174
    },
    {
      "epoch": 0.00019637451171875,
      "step": 32174,
      "training_step_time": 0.3974802494049072
    },
    {
      "epoch": 0.000196380615234375,
      "model_forward_time": 0.11494040489196777,
      "step": 32175
    },
    {
      "epoch": 0.000196380615234375,
      "step": 32175,
      "training_step_time": 0.39306211471557617
    },
    {
      "epoch": 0.00019638671875,
      "model_forward_time": 0.11540889739990234,
      "step": 32176
    },
    {
      "epoch": 0.00019638671875,
      "step": 32176,
      "training_step_time": 0.3887054920196533
    },
    {
      "epoch": 0.000196392822265625,
      "model_forward_time": 0.11545014381408691,
      "step": 32177
    },
    {
      "epoch": 0.000196392822265625,
      "step": 32177,
      "training_step_time": 0.42374229431152344
    },
    {
      "epoch": 0.00019639892578125,
      "model_forward_time": 0.11495113372802734,
      "step": 32178
    },
    {
      "epoch": 0.00019639892578125,
      "step": 32178,
      "training_step_time": 0.39707136154174805
    },
    {
      "epoch": 0.000196405029296875,
      "model_forward_time": 0.11526060104370117,
      "step": 32179
    },
    {
      "epoch": 0.000196405029296875,
      "step": 32179,
      "training_step_time": 0.6869869232177734
    },
    {
      "epoch": 0.0001964111328125,
      "grad_norm": 0.1322186440229416,
      "learning_rate": 4.812650621209209e-05,
      "loss": 0.0467,
      "step": 32180
    },
    {
      "epoch": 0.0001964111328125,
      "model_forward_time": 0.11517024040222168,
      "step": 32180
    },
    {
      "epoch": 0.0001964111328125,
      "step": 32180,
      "training_step_time": 0.39406681060791016
    },
    {
      "epoch": 0.000196417236328125,
      "model_forward_time": 0.11469650268554688,
      "step": 32181
    },
    {
      "epoch": 0.000196417236328125,
      "step": 32181,
      "training_step_time": 0.4319477081298828
    },
    {
      "epoch": 0.00019642333984375,
      "model_forward_time": 0.11472320556640625,
      "step": 32182
    },
    {
      "epoch": 0.00019642333984375,
      "step": 32182,
      "training_step_time": 0.4367387294769287
    },
    {
      "epoch": 0.000196429443359375,
      "model_forward_time": 0.11568284034729004,
      "step": 32183
    },
    {
      "epoch": 0.000196429443359375,
      "step": 32183,
      "training_step_time": 0.3658738136291504
    },
    {
      "epoch": 0.000196435546875,
      "model_forward_time": 0.11440372467041016,
      "step": 32184
    },
    {
      "epoch": 0.000196435546875,
      "step": 32184,
      "training_step_time": 0.43741869926452637
    },
    {
      "epoch": 0.000196441650390625,
      "model_forward_time": 0.11567521095275879,
      "step": 32185
    },
    {
      "epoch": 0.000196441650390625,
      "step": 32185,
      "training_step_time": 0.424511194229126
    },
    {
      "epoch": 0.00019644775390625,
      "model_forward_time": 0.11494922637939453,
      "step": 32186
    },
    {
      "epoch": 0.00019644775390625,
      "step": 32186,
      "training_step_time": 0.38921570777893066
    },
    {
      "epoch": 0.000196453857421875,
      "model_forward_time": 0.11674022674560547,
      "step": 32187
    },
    {
      "epoch": 0.000196453857421875,
      "step": 32187,
      "training_step_time": 0.39004063606262207
    },
    {
      "epoch": 0.0001964599609375,
      "model_forward_time": 0.1150350570678711,
      "step": 32188
    },
    {
      "epoch": 0.0001964599609375,
      "step": 32188,
      "training_step_time": 0.39035534858703613
    },
    {
      "epoch": 0.000196466064453125,
      "model_forward_time": 0.11457300186157227,
      "step": 32189
    },
    {
      "epoch": 0.000196466064453125,
      "step": 32189,
      "training_step_time": 0.40856075286865234
    },
    {
      "epoch": 0.00019647216796875,
      "grad_norm": 0.1254129707813263,
      "learning_rate": 4.809896802002662e-05,
      "loss": 0.0401,
      "step": 32190
    },
    {
      "epoch": 0.00019647216796875,
      "model_forward_time": 0.11524629592895508,
      "step": 32190
    },
    {
      "epoch": 0.00019647216796875,
      "step": 32190,
      "training_step_time": 0.39746665954589844
    },
    {
      "epoch": 0.000196478271484375,
      "model_forward_time": 0.11489009857177734,
      "step": 32191
    },
    {
      "epoch": 0.000196478271484375,
      "step": 32191,
      "training_step_time": 0.6586487293243408
    },
    {
      "epoch": 0.000196484375,
      "model_forward_time": 0.11467194557189941,
      "step": 32192
    },
    {
      "epoch": 0.000196484375,
      "step": 32192,
      "training_step_time": 0.47819995880126953
    },
    {
      "epoch": 0.000196490478515625,
      "model_forward_time": 0.11470413208007812,
      "step": 32193
    },
    {
      "epoch": 0.000196490478515625,
      "step": 32193,
      "training_step_time": 0.38280725479125977
    },
    {
      "epoch": 0.00019649658203125,
      "model_forward_time": 0.1146857738494873,
      "step": 32194
    },
    {
      "epoch": 0.00019649658203125,
      "step": 32194,
      "training_step_time": 0.3924520015716553
    },
    {
      "epoch": 0.000196502685546875,
      "model_forward_time": 0.11445188522338867,
      "step": 32195
    },
    {
      "epoch": 0.000196502685546875,
      "step": 32195,
      "training_step_time": 0.5024669170379639
    },
    {
      "epoch": 0.0001965087890625,
      "model_forward_time": 0.11511111259460449,
      "step": 32196
    },
    {
      "epoch": 0.0001965087890625,
      "step": 32196,
      "training_step_time": 0.4163193702697754
    },
    {
      "epoch": 0.000196514892578125,
      "model_forward_time": 0.11494278907775879,
      "step": 32197
    },
    {
      "epoch": 0.000196514892578125,
      "step": 32197,
      "training_step_time": 0.4370596408843994
    },
    {
      "epoch": 0.00019652099609375,
      "model_forward_time": 0.11599135398864746,
      "step": 32198
    },
    {
      "epoch": 0.00019652099609375,
      "step": 32198,
      "training_step_time": 0.3986525535583496
    },
    {
      "epoch": 0.000196527099609375,
      "model_forward_time": 0.11536002159118652,
      "step": 32199
    },
    {
      "epoch": 0.000196527099609375,
      "step": 32199,
      "training_step_time": 0.4861576557159424
    },
    {
      "epoch": 0.000196533203125,
      "grad_norm": 0.16260544955730438,
      "learning_rate": 4.8071430405444474e-05,
      "loss": 0.0389,
      "step": 32200
    },
    {
      "epoch": 0.000196533203125,
      "model_forward_time": 0.11440205574035645,
      "step": 32200
    },
    {
      "epoch": 0.000196533203125,
      "step": 32200,
      "training_step_time": 0.3934612274169922
    },
    {
      "epoch": 0.000196539306640625,
      "model_forward_time": 0.11519551277160645,
      "step": 32201
    },
    {
      "epoch": 0.000196539306640625,
      "step": 32201,
      "training_step_time": 0.3947315216064453
    },
    {
      "epoch": 0.00019654541015625,
      "model_forward_time": 0.11532974243164062,
      "step": 32202
    },
    {
      "epoch": 0.00019654541015625,
      "step": 32202,
      "training_step_time": 0.39321446418762207
    },
    {
      "epoch": 0.000196551513671875,
      "model_forward_time": 0.11499643325805664,
      "step": 32203
    },
    {
      "epoch": 0.000196551513671875,
      "step": 32203,
      "training_step_time": 0.41506171226501465
    },
    {
      "epoch": 0.0001965576171875,
      "model_forward_time": 0.1159830093383789,
      "step": 32204
    },
    {
      "epoch": 0.0001965576171875,
      "step": 32204,
      "training_step_time": 0.4298574924468994
    },
    {
      "epoch": 0.000196563720703125,
      "model_forward_time": 0.11614990234375,
      "step": 32205
    },
    {
      "epoch": 0.000196563720703125,
      "step": 32205,
      "training_step_time": 0.40906858444213867
    },
    {
      "epoch": 0.00019656982421875,
      "model_forward_time": 0.1145322322845459,
      "step": 32206
    },
    {
      "epoch": 0.00019656982421875,
      "step": 32206,
      "training_step_time": 0.426332950592041
    },
    {
      "epoch": 0.000196575927734375,
      "model_forward_time": 0.11462020874023438,
      "step": 32207
    },
    {
      "epoch": 0.000196575927734375,
      "step": 32207,
      "training_step_time": 0.3966803550720215
    },
    {
      "epoch": 0.00019658203125,
      "model_forward_time": 0.11524653434753418,
      "step": 32208
    },
    {
      "epoch": 0.00019658203125,
      "step": 32208,
      "training_step_time": 0.42853736877441406
    },
    {
      "epoch": 0.000196588134765625,
      "model_forward_time": 0.11605620384216309,
      "step": 32209
    },
    {
      "epoch": 0.000196588134765625,
      "step": 32209,
      "training_step_time": 0.6611697673797607
    },
    {
      "epoch": 0.00019659423828125,
      "grad_norm": 0.14434140920639038,
      "learning_rate": 4.804389337671087e-05,
      "loss": 0.0452,
      "step": 32210
    },
    {
      "epoch": 0.00019659423828125,
      "model_forward_time": 0.11428952217102051,
      "step": 32210
    },
    {
      "epoch": 0.00019659423828125,
      "step": 32210,
      "training_step_time": 0.4203488826751709
    },
    {
      "epoch": 0.000196600341796875,
      "model_forward_time": 0.1147615909576416,
      "step": 32211
    },
    {
      "epoch": 0.000196600341796875,
      "step": 32211,
      "training_step_time": 0.49149346351623535
    },
    {
      "epoch": 0.0001966064453125,
      "model_forward_time": 0.11499977111816406,
      "step": 32212
    },
    {
      "epoch": 0.0001966064453125,
      "step": 32212,
      "training_step_time": 0.4921131134033203
    },
    {
      "epoch": 0.000196612548828125,
      "model_forward_time": 0.11501145362854004,
      "step": 32213
    },
    {
      "epoch": 0.000196612548828125,
      "step": 32213,
      "training_step_time": 0.4066603183746338
    },
    {
      "epoch": 0.00019661865234375,
      "model_forward_time": 0.11499190330505371,
      "step": 32214
    },
    {
      "epoch": 0.00019661865234375,
      "step": 32214,
      "training_step_time": 0.38790130615234375
    },
    {
      "epoch": 0.000196624755859375,
      "model_forward_time": 0.1153252124786377,
      "step": 32215
    },
    {
      "epoch": 0.000196624755859375,
      "step": 32215,
      "training_step_time": 0.40761446952819824
    },
    {
      "epoch": 0.000196630859375,
      "model_forward_time": 0.11452293395996094,
      "step": 32216
    },
    {
      "epoch": 0.000196630859375,
      "step": 32216,
      "training_step_time": 0.38622164726257324
    },
    {
      "epoch": 0.000196636962890625,
      "model_forward_time": 0.11542057991027832,
      "step": 32217
    },
    {
      "epoch": 0.000196636962890625,
      "step": 32217,
      "training_step_time": 0.38584303855895996
    },
    {
      "epoch": 0.00019664306640625,
      "model_forward_time": 0.11589503288269043,
      "step": 32218
    },
    {
      "epoch": 0.00019664306640625,
      "step": 32218,
      "training_step_time": 0.4258840084075928
    },
    {
      "epoch": 0.000196649169921875,
      "model_forward_time": 0.11513853073120117,
      "step": 32219
    },
    {
      "epoch": 0.000196649169921875,
      "step": 32219,
      "training_step_time": 0.3965327739715576
    },
    {
      "epoch": 0.0001966552734375,
      "grad_norm": 0.13549795746803284,
      "learning_rate": 4.801635694219079e-05,
      "loss": 0.0439,
      "step": 32220
    },
    {
      "epoch": 0.0001966552734375,
      "model_forward_time": 0.11565017700195312,
      "step": 32220
    },
    {
      "epoch": 0.0001966552734375,
      "step": 32220,
      "training_step_time": 0.4309961795806885
    },
    {
      "epoch": 0.000196661376953125,
      "model_forward_time": 0.1152656078338623,
      "step": 32221
    },
    {
      "epoch": 0.000196661376953125,
      "step": 32221,
      "training_step_time": 0.6362097263336182
    },
    {
      "epoch": 0.00019666748046875,
      "model_forward_time": 0.11469006538391113,
      "step": 32222
    },
    {
      "epoch": 0.00019666748046875,
      "step": 32222,
      "training_step_time": 0.4064199924468994
    },
    {
      "epoch": 0.000196673583984375,
      "model_forward_time": 0.11498498916625977,
      "step": 32223
    },
    {
      "epoch": 0.000196673583984375,
      "step": 32223,
      "training_step_time": 0.40216922760009766
    },
    {
      "epoch": 0.0001966796875,
      "model_forward_time": 0.11453032493591309,
      "step": 32224
    },
    {
      "epoch": 0.0001966796875,
      "step": 32224,
      "training_step_time": 0.4381539821624756
    },
    {
      "epoch": 0.000196685791015625,
      "model_forward_time": 0.11440896987915039,
      "step": 32225
    },
    {
      "epoch": 0.000196685791015625,
      "step": 32225,
      "training_step_time": 0.3620421886444092
    },
    {
      "epoch": 0.00019669189453125,
      "model_forward_time": 0.1147165298461914,
      "step": 32226
    },
    {
      "epoch": 0.00019669189453125,
      "step": 32226,
      "training_step_time": 0.4172186851501465
    },
    {
      "epoch": 0.000196697998046875,
      "model_forward_time": 0.11498045921325684,
      "step": 32227
    },
    {
      "epoch": 0.000196697998046875,
      "step": 32227,
      "training_step_time": 0.5681610107421875
    },
    {
      "epoch": 0.0001967041015625,
      "model_forward_time": 0.11433696746826172,
      "step": 32228
    },
    {
      "epoch": 0.0001967041015625,
      "step": 32228,
      "training_step_time": 0.3869166374206543
    },
    {
      "epoch": 0.000196710205078125,
      "model_forward_time": 0.11522483825683594,
      "step": 32229
    },
    {
      "epoch": 0.000196710205078125,
      "step": 32229,
      "training_step_time": 0.38768982887268066
    },
    {
      "epoch": 0.00019671630859375,
      "grad_norm": 0.10073858499526978,
      "learning_rate": 4.798882111024912e-05,
      "loss": 0.0447,
      "step": 32230
    },
    {
      "epoch": 0.00019671630859375,
      "model_forward_time": 0.11452889442443848,
      "step": 32230
    },
    {
      "epoch": 0.00019671630859375,
      "step": 32230,
      "training_step_time": 0.40645813941955566
    },
    {
      "epoch": 0.000196722412109375,
      "model_forward_time": 0.1148226261138916,
      "step": 32231
    },
    {
      "epoch": 0.000196722412109375,
      "step": 32231,
      "training_step_time": 0.3927886486053467
    },
    {
      "epoch": 0.000196728515625,
      "model_forward_time": 0.11532449722290039,
      "step": 32232
    },
    {
      "epoch": 0.000196728515625,
      "step": 32232,
      "training_step_time": 0.4082908630371094
    },
    {
      "epoch": 0.000196734619140625,
      "model_forward_time": 0.11591911315917969,
      "step": 32233
    },
    {
      "epoch": 0.000196734619140625,
      "step": 32233,
      "training_step_time": 0.761915922164917
    },
    {
      "epoch": 0.00019674072265625,
      "model_forward_time": 0.11439919471740723,
      "step": 32234
    },
    {
      "epoch": 0.00019674072265625,
      "step": 32234,
      "training_step_time": 0.4506850242614746
    },
    {
      "epoch": 0.000196746826171875,
      "model_forward_time": 0.11452531814575195,
      "step": 32235
    },
    {
      "epoch": 0.000196746826171875,
      "step": 32235,
      "training_step_time": 0.41707539558410645
    },
    {
      "epoch": 0.0001967529296875,
      "model_forward_time": 0.11452174186706543,
      "step": 32236
    },
    {
      "epoch": 0.0001967529296875,
      "step": 32236,
      "training_step_time": 0.39535045623779297
    },
    {
      "epoch": 0.000196759033203125,
      "model_forward_time": 0.11399269104003906,
      "step": 32237
    },
    {
      "epoch": 0.000196759033203125,
      "step": 32237,
      "training_step_time": 0.3951292037963867
    },
    {
      "epoch": 0.00019676513671875,
      "model_forward_time": 0.1150364875793457,
      "step": 32238
    },
    {
      "epoch": 0.00019676513671875,
      "step": 32238,
      "training_step_time": 0.42174196243286133
    },
    {
      "epoch": 0.000196771240234375,
      "model_forward_time": 0.11463117599487305,
      "step": 32239
    },
    {
      "epoch": 0.000196771240234375,
      "step": 32239,
      "training_step_time": 0.5611660480499268
    },
    {
      "epoch": 0.00019677734375,
      "grad_norm": 0.14548757672309875,
      "learning_rate": 4.7961285889250475e-05,
      "loss": 0.0428,
      "step": 32240
    },
    {
      "epoch": 0.00019677734375,
      "model_forward_time": 0.11493754386901855,
      "step": 32240
    },
    {
      "epoch": 0.00019677734375,
      "step": 32240,
      "training_step_time": 0.49646520614624023
    },
    {
      "epoch": 0.000196783447265625,
      "model_forward_time": 0.11503791809082031,
      "step": 32241
    },
    {
      "epoch": 0.000196783447265625,
      "step": 32241,
      "training_step_time": 0.4371674060821533
    },
    {
      "epoch": 0.00019678955078125,
      "model_forward_time": 0.11532068252563477,
      "step": 32242
    },
    {
      "epoch": 0.00019678955078125,
      "step": 32242,
      "training_step_time": 0.3874838352203369
    },
    {
      "epoch": 0.000196795654296875,
      "model_forward_time": 0.11586141586303711,
      "step": 32243
    },
    {
      "epoch": 0.000196795654296875,
      "step": 32243,
      "training_step_time": 0.38286781311035156
    },
    {
      "epoch": 0.0001968017578125,
      "model_forward_time": 0.11472082138061523,
      "step": 32244
    },
    {
      "epoch": 0.0001968017578125,
      "step": 32244,
      "training_step_time": 0.3912661075592041
    },
    {
      "epoch": 0.000196807861328125,
      "model_forward_time": 0.11523127555847168,
      "step": 32245
    },
    {
      "epoch": 0.000196807861328125,
      "step": 32245,
      "training_step_time": 0.4969780445098877
    },
    {
      "epoch": 0.00019681396484375,
      "model_forward_time": 0.11542987823486328,
      "step": 32246
    },
    {
      "epoch": 0.00019681396484375,
      "step": 32246,
      "training_step_time": 0.4169578552246094
    },
    {
      "epoch": 0.000196820068359375,
      "model_forward_time": 0.11507272720336914,
      "step": 32247
    },
    {
      "epoch": 0.000196820068359375,
      "step": 32247,
      "training_step_time": 0.5067994594573975
    },
    {
      "epoch": 0.000196826171875,
      "model_forward_time": 0.11481833457946777,
      "step": 32248
    },
    {
      "epoch": 0.000196826171875,
      "step": 32248,
      "training_step_time": 0.39265871047973633
    },
    {
      "epoch": 0.000196832275390625,
      "model_forward_time": 0.11492276191711426,
      "step": 32249
    },
    {
      "epoch": 0.000196832275390625,
      "step": 32249,
      "training_step_time": 0.38998866081237793
    },
    {
      "epoch": 0.00019683837890625,
      "grad_norm": 0.10043259710073471,
      "learning_rate": 4.7933751287559335e-05,
      "loss": 0.0432,
      "step": 32250
    },
    {
      "epoch": 0.00019683837890625,
      "model_forward_time": 0.11501145362854004,
      "step": 32250
    },
    {
      "epoch": 0.00019683837890625,
      "step": 32250,
      "training_step_time": 0.39583516120910645
    },
    {
      "epoch": 0.000196844482421875,
      "model_forward_time": 0.11568474769592285,
      "step": 32251
    },
    {
      "epoch": 0.000196844482421875,
      "step": 32251,
      "training_step_time": 0.4791903495788574
    },
    {
      "epoch": 0.0001968505859375,
      "model_forward_time": 0.11553382873535156,
      "step": 32252
    },
    {
      "epoch": 0.0001968505859375,
      "step": 32252,
      "training_step_time": 0.4145517349243164
    },
    {
      "epoch": 0.000196856689453125,
      "model_forward_time": 0.11481642723083496,
      "step": 32253
    },
    {
      "epoch": 0.000196856689453125,
      "step": 32253,
      "training_step_time": 0.36535191535949707
    },
    {
      "epoch": 0.00019686279296875,
      "model_forward_time": 0.1154787540435791,
      "step": 32254
    },
    {
      "epoch": 0.00019686279296875,
      "step": 32254,
      "training_step_time": 0.46125054359436035
    },
    {
      "epoch": 0.000196868896484375,
      "model_forward_time": 0.11524009704589844,
      "step": 32255
    },
    {
      "epoch": 0.000196868896484375,
      "step": 32255,
      "training_step_time": 0.4804069995880127
    },
    {
      "epoch": 0.000196875,
      "model_forward_time": 0.11516618728637695,
      "step": 32256
    },
    {
      "epoch": 0.000196875,
      "step": 32256,
      "training_step_time": 0.4032607078552246
    },
    {
      "epoch": 0.000196881103515625,
      "model_forward_time": 0.11539292335510254,
      "step": 32257
    },
    {
      "epoch": 0.000196881103515625,
      "step": 32257,
      "training_step_time": 0.3977229595184326
    },
    {
      "epoch": 0.00019688720703125,
      "model_forward_time": 0.11536693572998047,
      "step": 32258
    },
    {
      "epoch": 0.00019688720703125,
      "step": 32258,
      "training_step_time": 0.4063541889190674
    },
    {
      "epoch": 0.000196893310546875,
      "model_forward_time": 0.11485671997070312,
      "step": 32259
    },
    {
      "epoch": 0.000196893310546875,
      "step": 32259,
      "training_step_time": 0.48332738876342773
    },
    {
      "epoch": 0.0001968994140625,
      "grad_norm": 0.15952208638191223,
      "learning_rate": 4.790621731354003e-05,
      "loss": 0.0403,
      "step": 32260
    },
    {
      "epoch": 0.0001968994140625,
      "model_forward_time": 0.11451315879821777,
      "step": 32260
    },
    {
      "epoch": 0.0001968994140625,
      "step": 32260,
      "training_step_time": 0.41342902183532715
    },
    {
      "epoch": 0.000196905517578125,
      "model_forward_time": 0.11553740501403809,
      "step": 32261
    },
    {
      "epoch": 0.000196905517578125,
      "step": 32261,
      "training_step_time": 0.49730944633483887
    },
    {
      "epoch": 0.00019691162109375,
      "model_forward_time": 0.11440396308898926,
      "step": 32262
    },
    {
      "epoch": 0.00019691162109375,
      "step": 32262,
      "training_step_time": 0.3949315547943115
    },
    {
      "epoch": 0.000196917724609375,
      "model_forward_time": 0.11580181121826172,
      "step": 32263
    },
    {
      "epoch": 0.000196917724609375,
      "step": 32263,
      "training_step_time": 0.4935307502746582
    },
    {
      "epoch": 0.000196923828125,
      "model_forward_time": 0.11498689651489258,
      "step": 32264
    },
    {
      "epoch": 0.000196923828125,
      "step": 32264,
      "training_step_time": 0.38301753997802734
    },
    {
      "epoch": 0.000196929931640625,
      "model_forward_time": 0.1150960922241211,
      "step": 32265
    },
    {
      "epoch": 0.000196929931640625,
      "step": 32265,
      "training_step_time": 0.42946481704711914
    },
    {
      "epoch": 0.00019693603515625,
      "model_forward_time": 0.11516213417053223,
      "step": 32266
    },
    {
      "epoch": 0.00019693603515625,
      "step": 32266,
      "training_step_time": 0.5056178569793701
    },
    {
      "epoch": 0.000196942138671875,
      "model_forward_time": 0.11502456665039062,
      "step": 32267
    },
    {
      "epoch": 0.000196942138671875,
      "step": 32267,
      "training_step_time": 0.3639254570007324
    },
    {
      "epoch": 0.0001969482421875,
      "model_forward_time": 0.11664748191833496,
      "step": 32268
    },
    {
      "epoch": 0.0001969482421875,
      "step": 32268,
      "training_step_time": 0.46344447135925293
    },
    {
      "epoch": 0.000196954345703125,
      "model_forward_time": 0.1150507926940918,
      "step": 32269
    },
    {
      "epoch": 0.000196954345703125,
      "step": 32269,
      "training_step_time": 0.4731576442718506
    },
    {
      "epoch": 0.00019696044921875,
      "grad_norm": 0.1399180144071579,
      "learning_rate": 4.78786839755566e-05,
      "loss": 0.041,
      "step": 32270
    },
    {
      "epoch": 0.00019696044921875,
      "model_forward_time": 0.11405348777770996,
      "step": 32270
    },
    {
      "epoch": 0.00019696044921875,
      "step": 32270,
      "training_step_time": 0.38961052894592285
    },
    {
      "epoch": 0.000196966552734375,
      "model_forward_time": 0.11490607261657715,
      "step": 32271
    },
    {
      "epoch": 0.000196966552734375,
      "step": 32271,
      "training_step_time": 0.3896055221557617
    },
    {
      "epoch": 0.00019697265625,
      "model_forward_time": 0.11463022232055664,
      "step": 32272
    },
    {
      "epoch": 0.00019697265625,
      "step": 32272,
      "training_step_time": 0.38682103157043457
    },
    {
      "epoch": 0.000196978759765625,
      "model_forward_time": 0.1150972843170166,
      "step": 32273
    },
    {
      "epoch": 0.000196978759765625,
      "step": 32273,
      "training_step_time": 0.47675371170043945
    },
    {
      "epoch": 0.00019698486328125,
      "model_forward_time": 0.11519217491149902,
      "step": 32274
    },
    {
      "epoch": 0.00019698486328125,
      "step": 32274,
      "training_step_time": 0.43584680557250977
    },
    {
      "epoch": 0.000196990966796875,
      "model_forward_time": 0.11498475074768066,
      "step": 32275
    },
    {
      "epoch": 0.000196990966796875,
      "step": 32275,
      "training_step_time": 0.400665283203125
    },
    {
      "epoch": 0.0001969970703125,
      "model_forward_time": 0.11563920974731445,
      "step": 32276
    },
    {
      "epoch": 0.0001969970703125,
      "step": 32276,
      "training_step_time": 0.3860783576965332
    },
    {
      "epoch": 0.000197003173828125,
      "model_forward_time": 0.11504292488098145,
      "step": 32277
    },
    {
      "epoch": 0.000197003173828125,
      "step": 32277,
      "training_step_time": 0.3866710662841797
    },
    {
      "epoch": 0.00019700927734375,
      "model_forward_time": 0.1155705451965332,
      "step": 32278
    },
    {
      "epoch": 0.00019700927734375,
      "step": 32278,
      "training_step_time": 0.4003932476043701
    },
    {
      "epoch": 0.000197015380859375,
      "model_forward_time": 0.11535215377807617,
      "step": 32279
    },
    {
      "epoch": 0.000197015380859375,
      "step": 32279,
      "training_step_time": 0.4236176013946533
    },
    {
      "epoch": 0.000197021484375,
      "grad_norm": 0.11852388083934784,
      "learning_rate": 4.785115128197298e-05,
      "loss": 0.0443,
      "step": 32280
    },
    {
      "epoch": 0.000197021484375,
      "model_forward_time": 0.11466598510742188,
      "step": 32280
    },
    {
      "epoch": 0.000197021484375,
      "step": 32280,
      "training_step_time": 0.4052009582519531
    },
    {
      "epoch": 0.000197027587890625,
      "model_forward_time": 0.11573028564453125,
      "step": 32281
    },
    {
      "epoch": 0.000197027587890625,
      "step": 32281,
      "training_step_time": 0.6702365875244141
    },
    {
      "epoch": 0.00019703369140625,
      "model_forward_time": 0.11464762687683105,
      "step": 32282
    },
    {
      "epoch": 0.00019703369140625,
      "step": 32282,
      "training_step_time": 0.4271814823150635
    },
    {
      "epoch": 0.000197039794921875,
      "model_forward_time": 0.11502313613891602,
      "step": 32283
    },
    {
      "epoch": 0.000197039794921875,
      "step": 32283,
      "training_step_time": 0.3896484375
    },
    {
      "epoch": 0.0001970458984375,
      "model_forward_time": 0.11488580703735352,
      "step": 32284
    },
    {
      "epoch": 0.0001970458984375,
      "step": 32284,
      "training_step_time": 0.39330124855041504
    },
    {
      "epoch": 0.000197052001953125,
      "model_forward_time": 0.11464190483093262,
      "step": 32285
    },
    {
      "epoch": 0.000197052001953125,
      "step": 32285,
      "training_step_time": 0.3936586380004883
    },
    {
      "epoch": 0.00019705810546875,
      "model_forward_time": 0.11465048789978027,
      "step": 32286
    },
    {
      "epoch": 0.00019705810546875,
      "step": 32286,
      "training_step_time": 0.39847636222839355
    },
    {
      "epoch": 0.000197064208984375,
      "model_forward_time": 0.1152803897857666,
      "step": 32287
    },
    {
      "epoch": 0.000197064208984375,
      "step": 32287,
      "training_step_time": 0.7322580814361572
    },
    {
      "epoch": 0.0001970703125,
      "model_forward_time": 0.11415624618530273,
      "step": 32288
    },
    {
      "epoch": 0.0001970703125,
      "step": 32288,
      "training_step_time": 0.40020751953125
    },
    {
      "epoch": 0.000197076416015625,
      "model_forward_time": 0.11447477340698242,
      "step": 32289
    },
    {
      "epoch": 0.000197076416015625,
      "step": 32289,
      "training_step_time": 0.4046027660369873
    },
    {
      "epoch": 0.00019708251953125,
      "grad_norm": 0.10814917087554932,
      "learning_rate": 4.7823619241152854e-05,
      "loss": 0.0449,
      "step": 32290
    },
    {
      "epoch": 0.00019708251953125,
      "model_forward_time": 0.11476469039916992,
      "step": 32290
    },
    {
      "epoch": 0.00019708251953125,
      "step": 32290,
      "training_step_time": 0.386502742767334
    },
    {
      "epoch": 0.000197088623046875,
      "model_forward_time": 0.11472463607788086,
      "step": 32291
    },
    {
      "epoch": 0.000197088623046875,
      "step": 32291,
      "training_step_time": 0.3935129642486572
    },
    {
      "epoch": 0.0001970947265625,
      "model_forward_time": 0.11444544792175293,
      "step": 32292
    },
    {
      "epoch": 0.0001970947265625,
      "step": 32292,
      "training_step_time": 0.4264516830444336
    },
    {
      "epoch": 0.000197100830078125,
      "model_forward_time": 0.11488223075866699,
      "step": 32293
    },
    {
      "epoch": 0.000197100830078125,
      "step": 32293,
      "training_step_time": 0.6786620616912842
    },
    {
      "epoch": 0.00019710693359375,
      "model_forward_time": 0.11449527740478516,
      "step": 32294
    },
    {
      "epoch": 0.00019710693359375,
      "step": 32294,
      "training_step_time": 0.41789746284484863
    },
    {
      "epoch": 0.000197113037109375,
      "model_forward_time": 0.11461615562438965,
      "step": 32295
    },
    {
      "epoch": 0.000197113037109375,
      "step": 32295,
      "training_step_time": 0.41539907455444336
    },
    {
      "epoch": 0.000197119140625,
      "model_forward_time": 0.11478495597839355,
      "step": 32296
    },
    {
      "epoch": 0.000197119140625,
      "step": 32296,
      "training_step_time": 0.4798440933227539
    },
    {
      "epoch": 0.000197125244140625,
      "model_forward_time": 0.1140604019165039,
      "step": 32297
    },
    {
      "epoch": 0.000197125244140625,
      "step": 32297,
      "training_step_time": 0.42301297187805176
    },
    {
      "epoch": 0.00019713134765625,
      "model_forward_time": 0.11524057388305664,
      "step": 32298
    },
    {
      "epoch": 0.00019713134765625,
      "step": 32298,
      "training_step_time": 0.38132524490356445
    },
    {
      "epoch": 0.000197137451171875,
      "model_forward_time": 0.11524510383605957,
      "step": 32299
    },
    {
      "epoch": 0.000197137451171875,
      "step": 32299,
      "training_step_time": 0.5331592559814453
    },
    {
      "epoch": 0.0001971435546875,
      "grad_norm": 0.13348853588104248,
      "learning_rate": 4.779608786145974e-05,
      "loss": 0.04,
      "step": 32300
    },
    {
      "epoch": 0.0001971435546875,
      "model_forward_time": 0.11487579345703125,
      "step": 32300
    },
    {
      "epoch": 0.0001971435546875,
      "step": 32300,
      "training_step_time": 0.4178335666656494
    },
    {
      "epoch": 0.000197149658203125,
      "model_forward_time": 0.11505722999572754,
      "step": 32301
    },
    {
      "epoch": 0.000197149658203125,
      "step": 32301,
      "training_step_time": 0.42391347885131836
    },
    {
      "epoch": 0.00019715576171875,
      "model_forward_time": 0.11509513854980469,
      "step": 32302
    },
    {
      "epoch": 0.00019715576171875,
      "step": 32302,
      "training_step_time": 0.49965786933898926
    },
    {
      "epoch": 0.000197161865234375,
      "model_forward_time": 0.11497116088867188,
      "step": 32303
    },
    {
      "epoch": 0.000197161865234375,
      "step": 32303,
      "training_step_time": 0.39889049530029297
    },
    {
      "epoch": 0.00019716796875,
      "model_forward_time": 0.11383295059204102,
      "step": 32304
    },
    {
      "epoch": 0.00019716796875,
      "step": 32304,
      "training_step_time": 0.4010956287384033
    },
    {
      "epoch": 0.000197174072265625,
      "model_forward_time": 0.1144261360168457,
      "step": 32305
    },
    {
      "epoch": 0.000197174072265625,
      "step": 32305,
      "training_step_time": 0.3911881446838379
    },
    {
      "epoch": 0.00019718017578125,
      "model_forward_time": 0.11535096168518066,
      "step": 32306
    },
    {
      "epoch": 0.00019718017578125,
      "step": 32306,
      "training_step_time": 0.397324800491333
    },
    {
      "epoch": 0.000197186279296875,
      "model_forward_time": 0.11464762687683105,
      "step": 32307
    },
    {
      "epoch": 0.000197186279296875,
      "step": 32307,
      "training_step_time": 0.4295799732208252
    },
    {
      "epoch": 0.0001971923828125,
      "model_forward_time": 0.11524295806884766,
      "step": 32308
    },
    {
      "epoch": 0.0001971923828125,
      "step": 32308,
      "training_step_time": 0.49503087997436523
    },
    {
      "epoch": 0.000197198486328125,
      "model_forward_time": 0.11468386650085449,
      "step": 32309
    },
    {
      "epoch": 0.000197198486328125,
      "step": 32309,
      "training_step_time": 0.39195799827575684
    },
    {
      "epoch": 0.00019720458984375,
      "grad_norm": 0.14278921484947205,
      "learning_rate": 4.776855715125694e-05,
      "loss": 0.044,
      "step": 32310
    },
    {
      "epoch": 0.00019720458984375,
      "model_forward_time": 0.11533904075622559,
      "step": 32310
    },
    {
      "epoch": 0.00019720458984375,
      "step": 32310,
      "training_step_time": 0.40375828742980957
    },
    {
      "epoch": 0.000197210693359375,
      "model_forward_time": 0.11656332015991211,
      "step": 32311
    },
    {
      "epoch": 0.000197210693359375,
      "step": 32311,
      "training_step_time": 0.5482687950134277
    },
    {
      "epoch": 0.000197216796875,
      "model_forward_time": 0.11538481712341309,
      "step": 32312
    },
    {
      "epoch": 0.000197216796875,
      "step": 32312,
      "training_step_time": 0.4039130210876465
    },
    {
      "epoch": 0.000197222900390625,
      "model_forward_time": 0.11508011817932129,
      "step": 32313
    },
    {
      "epoch": 0.000197222900390625,
      "step": 32313,
      "training_step_time": 0.3970932960510254
    },
    {
      "epoch": 0.00019722900390625,
      "model_forward_time": 0.11545467376708984,
      "step": 32314
    },
    {
      "epoch": 0.00019722900390625,
      "step": 32314,
      "training_step_time": 0.3853938579559326
    },
    {
      "epoch": 0.000197235107421875,
      "model_forward_time": 0.11579751968383789,
      "step": 32315
    },
    {
      "epoch": 0.000197235107421875,
      "step": 32315,
      "training_step_time": 0.44393491744995117
    },
    {
      "epoch": 0.0001972412109375,
      "model_forward_time": 0.11531233787536621,
      "step": 32316
    },
    {
      "epoch": 0.0001972412109375,
      "step": 32316,
      "training_step_time": 0.4339287281036377
    },
    {
      "epoch": 0.000197247314453125,
      "model_forward_time": 0.11477947235107422,
      "step": 32317
    },
    {
      "epoch": 0.000197247314453125,
      "step": 32317,
      "training_step_time": 0.4650883674621582
    },
    {
      "epoch": 0.00019725341796875,
      "model_forward_time": 0.11584234237670898,
      "step": 32318
    },
    {
      "epoch": 0.00019725341796875,
      "step": 32318,
      "training_step_time": 0.3883051872253418
    },
    {
      "epoch": 0.000197259521484375,
      "model_forward_time": 0.11549019813537598,
      "step": 32319
    },
    {
      "epoch": 0.000197259521484375,
      "step": 32319,
      "training_step_time": 0.3917975425720215
    },
    {
      "epoch": 0.000197265625,
      "grad_norm": 0.09432201832532883,
      "learning_rate": 4.774102711890756e-05,
      "loss": 0.0428,
      "step": 32320
    },
    {
      "epoch": 0.000197265625,
      "model_forward_time": 0.1144108772277832,
      "step": 32320
    },
    {
      "epoch": 0.000197265625,
      "step": 32320,
      "training_step_time": 0.39481163024902344
    },
    {
      "epoch": 0.000197271728515625,
      "model_forward_time": 0.1158442497253418,
      "step": 32321
    },
    {
      "epoch": 0.000197271728515625,
      "step": 32321,
      "training_step_time": 0.39695239067077637
    },
    {
      "epoch": 0.00019727783203125,
      "model_forward_time": 0.1152191162109375,
      "step": 32322
    },
    {
      "epoch": 0.00019727783203125,
      "step": 32322,
      "training_step_time": 0.4002037048339844
    },
    {
      "epoch": 0.000197283935546875,
      "model_forward_time": 0.11567354202270508,
      "step": 32323
    },
    {
      "epoch": 0.000197283935546875,
      "step": 32323,
      "training_step_time": 0.6506047248840332
    },
    {
      "epoch": 0.0001972900390625,
      "model_forward_time": 0.11503005027770996,
      "step": 32324
    },
    {
      "epoch": 0.0001972900390625,
      "step": 32324,
      "training_step_time": 0.4253268241882324
    },
    {
      "epoch": 0.000197296142578125,
      "model_forward_time": 0.11478161811828613,
      "step": 32325
    },
    {
      "epoch": 0.000197296142578125,
      "step": 32325,
      "training_step_time": 0.3930094242095947
    },
    {
      "epoch": 0.00019730224609375,
      "model_forward_time": 0.11459589004516602,
      "step": 32326
    },
    {
      "epoch": 0.00019730224609375,
      "step": 32326,
      "training_step_time": 0.46221423149108887
    },
    {
      "epoch": 0.000197308349609375,
      "model_forward_time": 0.11454224586486816,
      "step": 32327
    },
    {
      "epoch": 0.000197308349609375,
      "step": 32327,
      "training_step_time": 0.37418460845947266
    },
    {
      "epoch": 0.000197314453125,
      "model_forward_time": 0.11449646949768066,
      "step": 32328
    },
    {
      "epoch": 0.000197314453125,
      "step": 32328,
      "training_step_time": 0.3920884132385254
    },
    {
      "epoch": 0.000197320556640625,
      "model_forward_time": 0.11487889289855957,
      "step": 32329
    },
    {
      "epoch": 0.000197320556640625,
      "step": 32329,
      "training_step_time": 0.5975418090820312
    },
    {
      "epoch": 0.00019732666015625,
      "grad_norm": 0.15380312502384186,
      "learning_rate": 4.771349777277452e-05,
      "loss": 0.0433,
      "step": 32330
    },
    {
      "epoch": 0.00019732666015625,
      "model_forward_time": 0.11458992958068848,
      "step": 32330
    },
    {
      "epoch": 0.00019732666015625,
      "step": 32330,
      "training_step_time": 0.42928028106689453
    },
    {
      "epoch": 0.000197332763671875,
      "model_forward_time": 0.11470723152160645,
      "step": 32331
    },
    {
      "epoch": 0.000197332763671875,
      "step": 32331,
      "training_step_time": 0.3940157890319824
    },
    {
      "epoch": 0.0001973388671875,
      "model_forward_time": 0.11499166488647461,
      "step": 32332
    },
    {
      "epoch": 0.0001973388671875,
      "step": 32332,
      "training_step_time": 0.4008297920227051
    },
    {
      "epoch": 0.000197344970703125,
      "model_forward_time": 0.11528229713439941,
      "step": 32333
    },
    {
      "epoch": 0.000197344970703125,
      "step": 32333,
      "training_step_time": 0.4132874011993408
    },
    {
      "epoch": 0.00019735107421875,
      "model_forward_time": 0.11449956893920898,
      "step": 32334
    },
    {
      "epoch": 0.00019735107421875,
      "step": 32334,
      "training_step_time": 0.39386892318725586
    },
    {
      "epoch": 0.000197357177734375,
      "model_forward_time": 0.11517453193664551,
      "step": 32335
    },
    {
      "epoch": 0.000197357177734375,
      "step": 32335,
      "training_step_time": 0.6112895011901855
    },
    {
      "epoch": 0.00019736328125,
      "model_forward_time": 0.11487650871276855,
      "step": 32336
    },
    {
      "epoch": 0.00019736328125,
      "step": 32336,
      "training_step_time": 0.5033080577850342
    },
    {
      "epoch": 0.000197369384765625,
      "model_forward_time": 0.11496233940124512,
      "step": 32337
    },
    {
      "epoch": 0.000197369384765625,
      "step": 32337,
      "training_step_time": 0.3890821933746338
    },
    {
      "epoch": 0.00019737548828125,
      "model_forward_time": 0.1148984432220459,
      "step": 32338
    },
    {
      "epoch": 0.00019737548828125,
      "step": 32338,
      "training_step_time": 0.4180140495300293
    },
    {
      "epoch": 0.000197381591796875,
      "model_forward_time": 0.11425614356994629,
      "step": 32339
    },
    {
      "epoch": 0.000197381591796875,
      "step": 32339,
      "training_step_time": 0.42090821266174316
    },
    {
      "epoch": 0.0001973876953125,
      "grad_norm": 0.1069984957575798,
      "learning_rate": 4.7685969121220456e-05,
      "loss": 0.0467,
      "step": 32340
    },
    {
      "epoch": 0.0001973876953125,
      "model_forward_time": 0.11496090888977051,
      "step": 32340
    },
    {
      "epoch": 0.0001973876953125,
      "step": 32340,
      "training_step_time": 0.44574451446533203
    },
    {
      "epoch": 0.000197393798828125,
      "model_forward_time": 0.11524128913879395,
      "step": 32341
    },
    {
      "epoch": 0.000197393798828125,
      "step": 32341,
      "training_step_time": 0.38738012313842773
    },
    {
      "epoch": 0.00019739990234375,
      "model_forward_time": 0.11474370956420898,
      "step": 32342
    },
    {
      "epoch": 0.00019739990234375,
      "step": 32342,
      "training_step_time": 0.38704371452331543
    },
    {
      "epoch": 0.000197406005859375,
      "model_forward_time": 0.11497783660888672,
      "step": 32343
    },
    {
      "epoch": 0.000197406005859375,
      "step": 32343,
      "training_step_time": 0.4566032886505127
    },
    {
      "epoch": 0.000197412109375,
      "model_forward_time": 0.11515045166015625,
      "step": 32344
    },
    {
      "epoch": 0.000197412109375,
      "step": 32344,
      "training_step_time": 0.42872047424316406
    },
    {
      "epoch": 0.000197418212890625,
      "model_forward_time": 0.11577010154724121,
      "step": 32345
    },
    {
      "epoch": 0.000197418212890625,
      "step": 32345,
      "training_step_time": 0.4013631343841553
    },
    {
      "epoch": 0.00019742431640625,
      "model_forward_time": 0.11466002464294434,
      "step": 32346
    },
    {
      "epoch": 0.00019742431640625,
      "step": 32346,
      "training_step_time": 0.3975260257720947
    },
    {
      "epoch": 0.000197430419921875,
      "model_forward_time": 0.11539268493652344,
      "step": 32347
    },
    {
      "epoch": 0.000197430419921875,
      "step": 32347,
      "training_step_time": 0.5558161735534668
    },
    {
      "epoch": 0.0001974365234375,
      "model_forward_time": 0.1144871711730957,
      "step": 32348
    },
    {
      "epoch": 0.0001974365234375,
      "step": 32348,
      "training_step_time": 0.48629069328308105
    },
    {
      "epoch": 0.000197442626953125,
      "model_forward_time": 0.11530256271362305,
      "step": 32349
    },
    {
      "epoch": 0.000197442626953125,
      "step": 32349,
      "training_step_time": 0.45180678367614746
    },
    {
      "epoch": 0.00019744873046875,
      "grad_norm": 0.11139299720525742,
      "learning_rate": 4.7658441172607876e-05,
      "loss": 0.0422,
      "step": 32350
    },
    {
      "epoch": 0.00019744873046875,
      "model_forward_time": 0.11433887481689453,
      "step": 32350
    },
    {
      "epoch": 0.00019744873046875,
      "step": 32350,
      "training_step_time": 0.4864161014556885
    },
    {
      "epoch": 0.000197454833984375,
      "model_forward_time": 0.11433601379394531,
      "step": 32351
    },
    {
      "epoch": 0.000197454833984375,
      "step": 32351,
      "training_step_time": 0.40132689476013184
    },
    {
      "epoch": 0.0001974609375,
      "model_forward_time": 0.11501002311706543,
      "step": 32352
    },
    {
      "epoch": 0.0001974609375,
      "step": 32352,
      "training_step_time": 0.39084815979003906
    },
    {
      "epoch": 0.000197467041015625,
      "model_forward_time": 0.11618947982788086,
      "step": 32353
    },
    {
      "epoch": 0.000197467041015625,
      "step": 32353,
      "training_step_time": 0.5897977352142334
    },
    {
      "epoch": 0.00019747314453125,
      "model_forward_time": 0.11534857749938965,
      "step": 32354
    },
    {
      "epoch": 0.00019747314453125,
      "step": 32354,
      "training_step_time": 0.3921244144439697
    },
    {
      "epoch": 0.000197479248046875,
      "model_forward_time": 0.11428689956665039,
      "step": 32355
    },
    {
      "epoch": 0.000197479248046875,
      "step": 32355,
      "training_step_time": 0.3858814239501953
    },
    {
      "epoch": 0.0001974853515625,
      "model_forward_time": 0.11497259140014648,
      "step": 32356
    },
    {
      "epoch": 0.0001974853515625,
      "step": 32356,
      "training_step_time": 0.3928842544555664
    },
    {
      "epoch": 0.000197491455078125,
      "model_forward_time": 0.11475324630737305,
      "step": 32357
    },
    {
      "epoch": 0.000197491455078125,
      "step": 32357,
      "training_step_time": 0.4171149730682373
    },
    {
      "epoch": 0.00019749755859375,
      "model_forward_time": 0.114776611328125,
      "step": 32358
    },
    {
      "epoch": 0.00019749755859375,
      "step": 32358,
      "training_step_time": 0.490741491317749
    },
    {
      "epoch": 0.000197503662109375,
      "model_forward_time": 0.11491680145263672,
      "step": 32359
    },
    {
      "epoch": 0.000197503662109375,
      "step": 32359,
      "training_step_time": 0.5762217044830322
    },
    {
      "epoch": 0.000197509765625,
      "grad_norm": 0.10639163106679916,
      "learning_rate": 4.7630913935299066e-05,
      "loss": 0.0441,
      "step": 32360
    },
    {
      "epoch": 0.000197509765625,
      "model_forward_time": 0.11411166191101074,
      "step": 32360
    },
    {
      "epoch": 0.000197509765625,
      "step": 32360,
      "training_step_time": 0.39067697525024414
    },
    {
      "epoch": 0.000197515869140625,
      "model_forward_time": 0.11516618728637695,
      "step": 32361
    },
    {
      "epoch": 0.000197515869140625,
      "step": 32361,
      "training_step_time": 0.3915741443634033
    },
    {
      "epoch": 0.00019752197265625,
      "model_forward_time": 0.11479711532592773,
      "step": 32362
    },
    {
      "epoch": 0.00019752197265625,
      "step": 32362,
      "training_step_time": 0.40405941009521484
    },
    {
      "epoch": 0.000197528076171875,
      "model_forward_time": 0.1154172420501709,
      "step": 32363
    },
    {
      "epoch": 0.000197528076171875,
      "step": 32363,
      "training_step_time": 0.41319704055786133
    },
    {
      "epoch": 0.0001975341796875,
      "model_forward_time": 0.1147468090057373,
      "step": 32364
    },
    {
      "epoch": 0.0001975341796875,
      "step": 32364,
      "training_step_time": 0.46013951301574707
    },
    {
      "epoch": 0.000197540283203125,
      "model_forward_time": 0.11525630950927734,
      "step": 32365
    },
    {
      "epoch": 0.000197540283203125,
      "step": 32365,
      "training_step_time": 0.6505630016326904
    },
    {
      "epoch": 0.00019754638671875,
      "model_forward_time": 0.1142878532409668,
      "step": 32366
    },
    {
      "epoch": 0.00019754638671875,
      "step": 32366,
      "training_step_time": 0.4495224952697754
    },
    {
      "epoch": 0.000197552490234375,
      "model_forward_time": 0.1145014762878418,
      "step": 32367
    },
    {
      "epoch": 0.000197552490234375,
      "step": 32367,
      "training_step_time": 0.472320556640625
    },
    {
      "epoch": 0.00019755859375,
      "model_forward_time": 0.11335492134094238,
      "step": 32368
    },
    {
      "epoch": 0.00019755859375,
      "step": 32368,
      "training_step_time": 0.39021730422973633
    },
    {
      "epoch": 0.000197564697265625,
      "model_forward_time": 0.11380577087402344,
      "step": 32369
    },
    {
      "epoch": 0.000197564697265625,
      "step": 32369,
      "training_step_time": 0.3837459087371826
    },
    {
      "epoch": 0.00019757080078125,
      "grad_norm": 0.1373109519481659,
      "learning_rate": 4.7603387417656026e-05,
      "loss": 0.0455,
      "step": 32370
    },
    {
      "epoch": 0.00019757080078125,
      "model_forward_time": 0.11443805694580078,
      "step": 32370
    },
    {
      "epoch": 0.00019757080078125,
      "step": 32370,
      "training_step_time": 0.4170994758605957
    },
    {
      "epoch": 0.000197576904296875,
      "model_forward_time": 0.11478471755981445,
      "step": 32371
    },
    {
      "epoch": 0.000197576904296875,
      "step": 32371,
      "training_step_time": 0.40839529037475586
    },
    {
      "epoch": 0.0001975830078125,
      "model_forward_time": 0.11538958549499512,
      "step": 32372
    },
    {
      "epoch": 0.0001975830078125,
      "step": 32372,
      "training_step_time": 0.4029881954193115
    },
    {
      "epoch": 0.000197589111328125,
      "model_forward_time": 0.11463475227355957,
      "step": 32373
    },
    {
      "epoch": 0.000197589111328125,
      "step": 32373,
      "training_step_time": 0.38727521896362305
    },
    {
      "epoch": 0.00019759521484375,
      "model_forward_time": 0.11565017700195312,
      "step": 32374
    },
    {
      "epoch": 0.00019759521484375,
      "step": 32374,
      "training_step_time": 0.39583635330200195
    },
    {
      "epoch": 0.000197601318359375,
      "model_forward_time": 0.11497306823730469,
      "step": 32375
    },
    {
      "epoch": 0.000197601318359375,
      "step": 32375,
      "training_step_time": 0.4055938720703125
    },
    {
      "epoch": 0.000197607421875,
      "model_forward_time": 0.11502313613891602,
      "step": 32376
    },
    {
      "epoch": 0.000197607421875,
      "step": 32376,
      "training_step_time": 0.4573099613189697
    },
    {
      "epoch": 0.000197613525390625,
      "model_forward_time": 0.11544227600097656,
      "step": 32377
    },
    {
      "epoch": 0.000197613525390625,
      "step": 32377,
      "training_step_time": 0.5028934478759766
    },
    {
      "epoch": 0.00019761962890625,
      "model_forward_time": 0.11528158187866211,
      "step": 32378
    },
    {
      "epoch": 0.00019761962890625,
      "step": 32378,
      "training_step_time": 0.4420053958892822
    },
    {
      "epoch": 0.000197625732421875,
      "model_forward_time": 0.11591410636901855,
      "step": 32379
    },
    {
      "epoch": 0.000197625732421875,
      "step": 32379,
      "training_step_time": 0.3661317825317383
    },
    {
      "epoch": 0.0001976318359375,
      "grad_norm": 0.16652195155620575,
      "learning_rate": 4.7575861628040635e-05,
      "loss": 0.0429,
      "step": 32380
    },
    {
      "epoch": 0.0001976318359375,
      "model_forward_time": 0.11466312408447266,
      "step": 32380
    },
    {
      "epoch": 0.0001976318359375,
      "step": 32380,
      "training_step_time": 0.4736289978027344
    },
    {
      "epoch": 0.000197637939453125,
      "model_forward_time": 0.11447978019714355,
      "step": 32381
    },
    {
      "epoch": 0.000197637939453125,
      "step": 32381,
      "training_step_time": 0.4236152172088623
    },
    {
      "epoch": 0.00019764404296875,
      "model_forward_time": 0.11461687088012695,
      "step": 32382
    },
    {
      "epoch": 0.00019764404296875,
      "step": 32382,
      "training_step_time": 0.3939089775085449
    },
    {
      "epoch": 0.000197650146484375,
      "model_forward_time": 0.1156926155090332,
      "step": 32383
    },
    {
      "epoch": 0.000197650146484375,
      "step": 32383,
      "training_step_time": 0.4819340705871582
    },
    {
      "epoch": 0.00019765625,
      "model_forward_time": 0.11490321159362793,
      "step": 32384
    },
    {
      "epoch": 0.00019765625,
      "step": 32384,
      "training_step_time": 0.40704989433288574
    },
    {
      "epoch": 0.000197662353515625,
      "model_forward_time": 0.11650466918945312,
      "step": 32385
    },
    {
      "epoch": 0.000197662353515625,
      "step": 32385,
      "training_step_time": 0.3864326477050781
    },
    {
      "epoch": 0.00019766845703125,
      "model_forward_time": 0.11515665054321289,
      "step": 32386
    },
    {
      "epoch": 0.00019766845703125,
      "step": 32386,
      "training_step_time": 0.44354724884033203
    },
    {
      "epoch": 0.000197674560546875,
      "model_forward_time": 0.11499571800231934,
      "step": 32387
    },
    {
      "epoch": 0.000197674560546875,
      "step": 32387,
      "training_step_time": 0.38356947898864746
    },
    {
      "epoch": 0.0001976806640625,
      "model_forward_time": 0.11447787284851074,
      "step": 32388
    },
    {
      "epoch": 0.0001976806640625,
      "step": 32388,
      "training_step_time": 0.4043083190917969
    },
    {
      "epoch": 0.000197686767578125,
      "model_forward_time": 0.11522841453552246,
      "step": 32389
    },
    {
      "epoch": 0.000197686767578125,
      "step": 32389,
      "training_step_time": 0.5766561031341553
    },
    {
      "epoch": 0.00019769287109375,
      "grad_norm": 0.15724310278892517,
      "learning_rate": 4.754833657481445e-05,
      "loss": 0.0468,
      "step": 32390
    },
    {
      "epoch": 0.00019769287109375,
      "model_forward_time": 0.11560726165771484,
      "step": 32390
    },
    {
      "epoch": 0.00019769287109375,
      "step": 32390,
      "training_step_time": 0.46534085273742676
    },
    {
      "epoch": 0.000197698974609375,
      "model_forward_time": 0.1143805980682373,
      "step": 32391
    },
    {
      "epoch": 0.000197698974609375,
      "step": 32391,
      "training_step_time": 0.41895437240600586
    },
    {
      "epoch": 0.000197705078125,
      "model_forward_time": 0.11455512046813965,
      "step": 32392
    },
    {
      "epoch": 0.000197705078125,
      "step": 32392,
      "training_step_time": 0.4745607376098633
    },
    {
      "epoch": 0.000197711181640625,
      "model_forward_time": 0.11476707458496094,
      "step": 32393
    },
    {
      "epoch": 0.000197711181640625,
      "step": 32393,
      "training_step_time": 0.3648056983947754
    },
    {
      "epoch": 0.00019771728515625,
      "model_forward_time": 0.11492371559143066,
      "step": 32394
    },
    {
      "epoch": 0.00019771728515625,
      "step": 32394,
      "training_step_time": 0.4311690330505371
    },
    {
      "epoch": 0.000197723388671875,
      "model_forward_time": 0.11527633666992188,
      "step": 32395
    },
    {
      "epoch": 0.000197723388671875,
      "step": 32395,
      "training_step_time": 0.48913025856018066
    },
    {
      "epoch": 0.0001977294921875,
      "model_forward_time": 0.11489510536193848,
      "step": 32396
    },
    {
      "epoch": 0.0001977294921875,
      "step": 32396,
      "training_step_time": 0.38900184631347656
    },
    {
      "epoch": 0.000197735595703125,
      "model_forward_time": 0.11498665809631348,
      "step": 32397
    },
    {
      "epoch": 0.000197735595703125,
      "step": 32397,
      "training_step_time": 0.3814280033111572
    },
    {
      "epoch": 0.00019774169921875,
      "model_forward_time": 0.11533999443054199,
      "step": 32398
    },
    {
      "epoch": 0.00019774169921875,
      "step": 32398,
      "training_step_time": 0.49422407150268555
    },
    {
      "epoch": 0.000197747802734375,
      "model_forward_time": 0.11449241638183594,
      "step": 32399
    },
    {
      "epoch": 0.000197747802734375,
      "step": 32399,
      "training_step_time": 0.4088134765625
    },
    {
      "epoch": 0.00019775390625,
      "grad_norm": 0.11021725088357925,
      "learning_rate": 4.7520812266338885e-05,
      "loss": 0.0429,
      "step": 32400
    },
    {
      "epoch": 0.00019775390625,
      "model_forward_time": 0.11485433578491211,
      "step": 32400
    },
    {
      "epoch": 0.00019775390625,
      "step": 32400,
      "training_step_time": 0.4960930347442627
    },
    {
      "epoch": 0.000197760009765625,
      "model_forward_time": 0.11542320251464844,
      "step": 32401
    },
    {
      "epoch": 0.000197760009765625,
      "step": 32401,
      "training_step_time": 0.39641499519348145
    },
    {
      "epoch": 0.00019776611328125,
      "model_forward_time": 0.1152334213256836,
      "step": 32402
    },
    {
      "epoch": 0.00019776611328125,
      "step": 32402,
      "training_step_time": 0.382648229598999
    },
    {
      "epoch": 0.000197772216796875,
      "model_forward_time": 0.11478281021118164,
      "step": 32403
    },
    {
      "epoch": 0.000197772216796875,
      "step": 32403,
      "training_step_time": 0.39917778968811035
    },
    {
      "epoch": 0.0001977783203125,
      "model_forward_time": 0.1148061752319336,
      "step": 32404
    },
    {
      "epoch": 0.0001977783203125,
      "step": 32404,
      "training_step_time": 0.4053914546966553
    },
    {
      "epoch": 0.000197784423828125,
      "model_forward_time": 0.11526846885681152,
      "step": 32405
    },
    {
      "epoch": 0.000197784423828125,
      "step": 32405,
      "training_step_time": 0.42061614990234375
    },
    {
      "epoch": 0.00019779052734375,
      "model_forward_time": 0.11484789848327637,
      "step": 32406
    },
    {
      "epoch": 0.00019779052734375,
      "step": 32406,
      "training_step_time": 0.42626094818115234
    },
    {
      "epoch": 0.000197796630859375,
      "model_forward_time": 0.11575651168823242,
      "step": 32407
    },
    {
      "epoch": 0.000197796630859375,
      "step": 32407,
      "training_step_time": 0.6117532253265381
    },
    {
      "epoch": 0.000197802734375,
      "model_forward_time": 0.11530852317810059,
      "step": 32408
    },
    {
      "epoch": 0.000197802734375,
      "step": 32408,
      "training_step_time": 0.398784875869751
    },
    {
      "epoch": 0.000197808837890625,
      "model_forward_time": 0.11545395851135254,
      "step": 32409
    },
    {
      "epoch": 0.000197808837890625,
      "step": 32409,
      "training_step_time": 0.42403244972229004
    },
    {
      "epoch": 0.00019781494140625,
      "grad_norm": 0.1828891485929489,
      "learning_rate": 4.749328871097512e-05,
      "loss": 0.0427,
      "step": 32410
    },
    {
      "epoch": 0.00019781494140625,
      "model_forward_time": 0.11493372917175293,
      "step": 32410
    },
    {
      "epoch": 0.00019781494140625,
      "step": 32410,
      "training_step_time": 0.39093685150146484
    },
    {
      "epoch": 0.000197821044921875,
      "model_forward_time": 0.1148672103881836,
      "step": 32411
    },
    {
      "epoch": 0.000197821044921875,
      "step": 32411,
      "training_step_time": 0.39433717727661133
    },
    {
      "epoch": 0.0001978271484375,
      "model_forward_time": 0.11531352996826172,
      "step": 32412
    },
    {
      "epoch": 0.0001978271484375,
      "step": 32412,
      "training_step_time": 0.40485239028930664
    },
    {
      "epoch": 0.000197833251953125,
      "model_forward_time": 0.11497211456298828,
      "step": 32413
    },
    {
      "epoch": 0.000197833251953125,
      "step": 32413,
      "training_step_time": 0.7125043869018555
    },
    {
      "epoch": 0.00019783935546875,
      "model_forward_time": 0.11502242088317871,
      "step": 32414
    },
    {
      "epoch": 0.00019783935546875,
      "step": 32414,
      "training_step_time": 0.3940255641937256
    },
    {
      "epoch": 0.000197845458984375,
      "model_forward_time": 0.11454153060913086,
      "step": 32415
    },
    {
      "epoch": 0.000197845458984375,
      "step": 32415,
      "training_step_time": 0.4001312255859375
    },
    {
      "epoch": 0.0001978515625,
      "model_forward_time": 0.11444926261901855,
      "step": 32416
    },
    {
      "epoch": 0.0001978515625,
      "step": 32416,
      "training_step_time": 0.41113758087158203
    },
    {
      "epoch": 0.000197857666015625,
      "model_forward_time": 0.11431717872619629,
      "step": 32417
    },
    {
      "epoch": 0.000197857666015625,
      "step": 32417,
      "training_step_time": 0.4482407569885254
    },
    {
      "epoch": 0.00019786376953125,
      "model_forward_time": 0.11461806297302246,
      "step": 32418
    },
    {
      "epoch": 0.00019786376953125,
      "step": 32418,
      "training_step_time": 0.4824075698852539
    },
    {
      "epoch": 0.000197869873046875,
      "model_forward_time": 0.11468267440795898,
      "step": 32419
    },
    {
      "epoch": 0.000197869873046875,
      "step": 32419,
      "training_step_time": 0.44271206855773926
    },
    {
      "epoch": 0.0001978759765625,
      "grad_norm": 0.23942165076732635,
      "learning_rate": 4.746576591708403e-05,
      "loss": 0.0447,
      "step": 32420
    },
    {
      "epoch": 0.0001978759765625,
      "model_forward_time": 0.11478710174560547,
      "step": 32420
    },
    {
      "epoch": 0.0001978759765625,
      "step": 32420,
      "training_step_time": 0.4600338935852051
    },
    {
      "epoch": 0.000197882080078125,
      "model_forward_time": 0.11572122573852539,
      "step": 32421
    },
    {
      "epoch": 0.000197882080078125,
      "step": 32421,
      "training_step_time": 0.38825178146362305
    },
    {
      "epoch": 0.00019788818359375,
      "model_forward_time": 0.11490845680236816,
      "step": 32422
    },
    {
      "epoch": 0.00019788818359375,
      "step": 32422,
      "training_step_time": 0.42655348777770996
    },
    {
      "epoch": 0.000197894287109375,
      "model_forward_time": 0.11455512046813965,
      "step": 32423
    },
    {
      "epoch": 0.000197894287109375,
      "step": 32423,
      "training_step_time": 0.39792656898498535
    },
    {
      "epoch": 0.000197900390625,
      "model_forward_time": 0.1151885986328125,
      "step": 32424
    },
    {
      "epoch": 0.000197900390625,
      "step": 32424,
      "training_step_time": 0.39225053787231445
    },
    {
      "epoch": 0.000197906494140625,
      "model_forward_time": 0.11518669128417969,
      "step": 32425
    },
    {
      "epoch": 0.000197906494140625,
      "step": 32425,
      "training_step_time": 0.5516929626464844
    },
    {
      "epoch": 0.00019791259765625,
      "model_forward_time": 0.1154181957244873,
      "step": 32426
    },
    {
      "epoch": 0.00019791259765625,
      "step": 32426,
      "training_step_time": 0.4492661952972412
    },
    {
      "epoch": 0.000197918701171875,
      "model_forward_time": 0.11513662338256836,
      "step": 32427
    },
    {
      "epoch": 0.000197918701171875,
      "step": 32427,
      "training_step_time": 0.42041516304016113
    },
    {
      "epoch": 0.0001979248046875,
      "model_forward_time": 0.11512088775634766,
      "step": 32428
    },
    {
      "epoch": 0.0001979248046875,
      "step": 32428,
      "training_step_time": 0.42169952392578125
    },
    {
      "epoch": 0.000197930908203125,
      "model_forward_time": 0.11477065086364746,
      "step": 32429
    },
    {
      "epoch": 0.000197930908203125,
      "step": 32429,
      "training_step_time": 0.3928864002227783
    },
    {
      "epoch": 0.00019793701171875,
      "grad_norm": 0.1110406368970871,
      "learning_rate": 4.743824389302635e-05,
      "loss": 0.0431,
      "step": 32430
    },
    {
      "epoch": 0.00019793701171875,
      "model_forward_time": 0.11482977867126465,
      "step": 32430
    },
    {
      "epoch": 0.00019793701171875,
      "step": 32430,
      "training_step_time": 0.42714858055114746
    },
    {
      "epoch": 0.000197943115234375,
      "model_forward_time": 0.11665487289428711,
      "step": 32431
    },
    {
      "epoch": 0.000197943115234375,
      "step": 32431,
      "training_step_time": 0.5267665386199951
    },
    {
      "epoch": 0.00019794921875,
      "model_forward_time": 0.11553263664245605,
      "step": 32432
    },
    {
      "epoch": 0.00019794921875,
      "step": 32432,
      "training_step_time": 0.42286181449890137
    },
    {
      "epoch": 0.000197955322265625,
      "model_forward_time": 0.11508631706237793,
      "step": 32433
    },
    {
      "epoch": 0.000197955322265625,
      "step": 32433,
      "training_step_time": 0.4010040760040283
    },
    {
      "epoch": 0.00019796142578125,
      "model_forward_time": 0.11526679992675781,
      "step": 32434
    },
    {
      "epoch": 0.00019796142578125,
      "step": 32434,
      "training_step_time": 0.505850076675415
    },
    {
      "epoch": 0.000197967529296875,
      "model_forward_time": 0.11486220359802246,
      "step": 32435
    },
    {
      "epoch": 0.000197967529296875,
      "step": 32435,
      "training_step_time": 0.389725923538208
    },
    {
      "epoch": 0.0001979736328125,
      "model_forward_time": 0.11495018005371094,
      "step": 32436
    },
    {
      "epoch": 0.0001979736328125,
      "step": 32436,
      "training_step_time": 0.4194166660308838
    },
    {
      "epoch": 0.000197979736328125,
      "model_forward_time": 0.1148989200592041,
      "step": 32437
    },
    {
      "epoch": 0.000197979736328125,
      "step": 32437,
      "training_step_time": 0.5010712146759033
    },
    {
      "epoch": 0.00019798583984375,
      "model_forward_time": 0.11408042907714844,
      "step": 32438
    },
    {
      "epoch": 0.00019798583984375,
      "step": 32438,
      "training_step_time": 0.38510799407958984
    },
    {
      "epoch": 0.000197991943359375,
      "model_forward_time": 0.11450910568237305,
      "step": 32439
    },
    {
      "epoch": 0.000197991943359375,
      "step": 32439,
      "training_step_time": 0.3995203971862793
    },
    {
      "epoch": 0.000197998046875,
      "grad_norm": 0.14249102771282196,
      "learning_rate": 4.741072264716252e-05,
      "loss": 0.0493,
      "step": 32440
    },
    {
      "epoch": 0.000197998046875,
      "model_forward_time": 0.11471986770629883,
      "step": 32440
    },
    {
      "epoch": 0.000197998046875,
      "step": 32440,
      "training_step_time": 0.39322876930236816
    },
    {
      "epoch": 0.000198004150390625,
      "model_forward_time": 0.11521697044372559,
      "step": 32441
    },
    {
      "epoch": 0.000198004150390625,
      "step": 32441,
      "training_step_time": 0.4226536750793457
    },
    {
      "epoch": 0.00019801025390625,
      "model_forward_time": 0.1150662899017334,
      "step": 32442
    },
    {
      "epoch": 0.00019801025390625,
      "step": 32442,
      "training_step_time": 0.44244384765625
    },
    {
      "epoch": 0.000198016357421875,
      "model_forward_time": 0.11487293243408203,
      "step": 32443
    },
    {
      "epoch": 0.000198016357421875,
      "step": 32443,
      "training_step_time": 0.5591328144073486
    },
    {
      "epoch": 0.0001980224609375,
      "model_forward_time": 0.11476612091064453,
      "step": 32444
    },
    {
      "epoch": 0.0001980224609375,
      "step": 32444,
      "training_step_time": 0.42646050453186035
    },
    {
      "epoch": 0.000198028564453125,
      "model_forward_time": 0.11551260948181152,
      "step": 32445
    },
    {
      "epoch": 0.000198028564453125,
      "step": 32445,
      "training_step_time": 0.39391636848449707
    },
    {
      "epoch": 0.00019803466796875,
      "model_forward_time": 0.11466670036315918,
      "step": 32446
    },
    {
      "epoch": 0.00019803466796875,
      "step": 32446,
      "training_step_time": 0.42194294929504395
    },
    {
      "epoch": 0.000198040771484375,
      "model_forward_time": 0.1144711971282959,
      "step": 32447
    },
    {
      "epoch": 0.000198040771484375,
      "step": 32447,
      "training_step_time": 0.4446690082550049
    },
    {
      "epoch": 0.000198046875,
      "model_forward_time": 0.11445832252502441,
      "step": 32448
    },
    {
      "epoch": 0.000198046875,
      "step": 32448,
      "training_step_time": 0.4072084426879883
    },
    {
      "epoch": 0.000198052978515625,
      "model_forward_time": 0.11484622955322266,
      "step": 32449
    },
    {
      "epoch": 0.000198052978515625,
      "step": 32449,
      "training_step_time": 0.5053126811981201
    },
    {
      "epoch": 0.00019805908203125,
      "grad_norm": 0.14122456312179565,
      "learning_rate": 4.738320218785281e-05,
      "loss": 0.0384,
      "step": 32450
    },
    {
      "epoch": 0.00019805908203125,
      "model_forward_time": 0.11489725112915039,
      "step": 32450
    },
    {
      "epoch": 0.00019805908203125,
      "step": 32450,
      "training_step_time": 0.48131680488586426
    },
    {
      "epoch": 0.000198065185546875,
      "model_forward_time": 0.11458420753479004,
      "step": 32451
    },
    {
      "epoch": 0.000198065185546875,
      "step": 32451,
      "training_step_time": 0.4115574359893799
    },
    {
      "epoch": 0.0001980712890625,
      "model_forward_time": 0.11486244201660156,
      "step": 32452
    },
    {
      "epoch": 0.0001980712890625,
      "step": 32452,
      "training_step_time": 0.4001040458679199
    },
    {
      "epoch": 0.000198077392578125,
      "model_forward_time": 0.1143045425415039,
      "step": 32453
    },
    {
      "epoch": 0.000198077392578125,
      "step": 32453,
      "training_step_time": 0.3930845260620117
    },
    {
      "epoch": 0.00019808349609375,
      "model_forward_time": 0.1145789623260498,
      "step": 32454
    },
    {
      "epoch": 0.00019808349609375,
      "step": 32454,
      "training_step_time": 0.4457285404205322
    },
    {
      "epoch": 0.000198089599609375,
      "model_forward_time": 0.11505746841430664,
      "step": 32455
    },
    {
      "epoch": 0.000198089599609375,
      "step": 32455,
      "training_step_time": 0.52549147605896
    },
    {
      "epoch": 0.000198095703125,
      "model_forward_time": 0.11515331268310547,
      "step": 32456
    },
    {
      "epoch": 0.000198095703125,
      "step": 32456,
      "training_step_time": 0.42740488052368164
    },
    {
      "epoch": 0.000198101806640625,
      "model_forward_time": 0.11478900909423828,
      "step": 32457
    },
    {
      "epoch": 0.000198101806640625,
      "step": 32457,
      "training_step_time": 0.42812061309814453
    },
    {
      "epoch": 0.00019810791015625,
      "model_forward_time": 0.11460113525390625,
      "step": 32458
    },
    {
      "epoch": 0.00019810791015625,
      "step": 32458,
      "training_step_time": 0.46519041061401367
    },
    {
      "epoch": 0.000198114013671875,
      "model_forward_time": 0.11500191688537598,
      "step": 32459
    },
    {
      "epoch": 0.000198114013671875,
      "step": 32459,
      "training_step_time": 0.3887672424316406
    },
    {
      "epoch": 0.0001981201171875,
      "grad_norm": 0.12096963822841644,
      "learning_rate": 4.735568252345718e-05,
      "loss": 0.0419,
      "step": 32460
    },
    {
      "epoch": 0.0001981201171875,
      "model_forward_time": 0.11500072479248047,
      "step": 32460
    },
    {
      "epoch": 0.0001981201171875,
      "step": 32460,
      "training_step_time": 0.429764986038208
    },
    {
      "epoch": 0.000198126220703125,
      "model_forward_time": 0.11609840393066406,
      "step": 32461
    },
    {
      "epoch": 0.000198126220703125,
      "step": 32461,
      "training_step_time": 0.5113661289215088
    },
    {
      "epoch": 0.00019813232421875,
      "model_forward_time": 0.11496496200561523,
      "step": 32462
    },
    {
      "epoch": 0.00019813232421875,
      "step": 32462,
      "training_step_time": 0.415865421295166
    },
    {
      "epoch": 0.000198138427734375,
      "model_forward_time": 0.11525201797485352,
      "step": 32463
    },
    {
      "epoch": 0.000198138427734375,
      "step": 32463,
      "training_step_time": 0.3860011100769043
    },
    {
      "epoch": 0.00019814453125,
      "model_forward_time": 0.11572551727294922,
      "step": 32464
    },
    {
      "epoch": 0.00019814453125,
      "step": 32464,
      "training_step_time": 0.40238213539123535
    },
    {
      "epoch": 0.000198150634765625,
      "model_forward_time": 0.11530280113220215,
      "step": 32465
    },
    {
      "epoch": 0.000198150634765625,
      "step": 32465,
      "training_step_time": 0.44370222091674805
    },
    {
      "epoch": 0.00019815673828125,
      "model_forward_time": 0.11479425430297852,
      "step": 32466
    },
    {
      "epoch": 0.00019815673828125,
      "step": 32466,
      "training_step_time": 0.3929760456085205
    },
    {
      "epoch": 0.000198162841796875,
      "model_forward_time": 0.11560201644897461,
      "step": 32467
    },
    {
      "epoch": 0.000198162841796875,
      "step": 32467,
      "training_step_time": 0.6866443157196045
    },
    {
      "epoch": 0.0001981689453125,
      "model_forward_time": 0.11557841300964355,
      "step": 32468
    },
    {
      "epoch": 0.0001981689453125,
      "step": 32468,
      "training_step_time": 0.43612146377563477
    },
    {
      "epoch": 0.000198175048828125,
      "model_forward_time": 0.11505556106567383,
      "step": 32469
    },
    {
      "epoch": 0.000198175048828125,
      "step": 32469,
      "training_step_time": 0.4327266216278076
    },
    {
      "epoch": 0.00019818115234375,
      "grad_norm": 0.16381147503852844,
      "learning_rate": 4.7328163662335364e-05,
      "loss": 0.043,
      "step": 32470
    },
    {
      "epoch": 0.00019818115234375,
      "model_forward_time": 0.11463022232055664,
      "step": 32470
    },
    {
      "epoch": 0.00019818115234375,
      "step": 32470,
      "training_step_time": 0.4012134075164795
    },
    {
      "epoch": 0.000198187255859375,
      "model_forward_time": 0.1172332763671875,
      "step": 32471
    },
    {
      "epoch": 0.000198187255859375,
      "step": 32471,
      "training_step_time": 0.4172513484954834
    },
    {
      "epoch": 0.000198193359375,
      "model_forward_time": 0.11450743675231934,
      "step": 32472
    },
    {
      "epoch": 0.000198193359375,
      "step": 32472,
      "training_step_time": 0.41321301460266113
    },
    {
      "epoch": 0.000198199462890625,
      "model_forward_time": 0.11517786979675293,
      "step": 32473
    },
    {
      "epoch": 0.000198199462890625,
      "step": 32473,
      "training_step_time": 0.5953540802001953
    },
    {
      "epoch": 0.00019820556640625,
      "model_forward_time": 0.11488103866577148,
      "step": 32474
    },
    {
      "epoch": 0.00019820556640625,
      "step": 32474,
      "training_step_time": 0.43428730964660645
    },
    {
      "epoch": 0.000198211669921875,
      "model_forward_time": 0.11572861671447754,
      "step": 32475
    },
    {
      "epoch": 0.000198211669921875,
      "step": 32475,
      "training_step_time": 0.4119744300842285
    },
    {
      "epoch": 0.0001982177734375,
      "model_forward_time": 0.11426401138305664,
      "step": 32476
    },
    {
      "epoch": 0.0001982177734375,
      "step": 32476,
      "training_step_time": 0.41738057136535645
    },
    {
      "epoch": 0.000198223876953125,
      "model_forward_time": 0.11532020568847656,
      "step": 32477
    },
    {
      "epoch": 0.000198223876953125,
      "step": 32477,
      "training_step_time": 0.4090578556060791
    },
    {
      "epoch": 0.00019822998046875,
      "model_forward_time": 0.11481881141662598,
      "step": 32478
    },
    {
      "epoch": 0.00019822998046875,
      "step": 32478,
      "training_step_time": 0.4725799560546875
    },
    {
      "epoch": 0.000198236083984375,
      "model_forward_time": 0.11561203002929688,
      "step": 32479
    },
    {
      "epoch": 0.000198236083984375,
      "step": 32479,
      "training_step_time": 0.5606498718261719
    },
    {
      "epoch": 0.0001982421875,
      "grad_norm": 0.1641829013824463,
      "learning_rate": 4.7300645612846907e-05,
      "loss": 0.0367,
      "step": 32480
    },
    {
      "epoch": 0.0001982421875,
      "model_forward_time": 0.11490535736083984,
      "step": 32480
    },
    {
      "epoch": 0.0001982421875,
      "step": 32480,
      "training_step_time": 0.39888525009155273
    },
    {
      "epoch": 0.000198248291015625,
      "model_forward_time": 0.11494135856628418,
      "step": 32481
    },
    {
      "epoch": 0.000198248291015625,
      "step": 32481,
      "training_step_time": 0.45579051971435547
    },
    {
      "epoch": 0.00019825439453125,
      "model_forward_time": 0.11494040489196777,
      "step": 32482
    },
    {
      "epoch": 0.00019825439453125,
      "step": 32482,
      "training_step_time": 0.4249913692474365
    },
    {
      "epoch": 0.000198260498046875,
      "model_forward_time": 0.11429166793823242,
      "step": 32483
    },
    {
      "epoch": 0.000198260498046875,
      "step": 32483,
      "training_step_time": 0.42020082473754883
    },
    {
      "epoch": 0.0001982666015625,
      "model_forward_time": 0.11548280715942383,
      "step": 32484
    },
    {
      "epoch": 0.0001982666015625,
      "step": 32484,
      "training_step_time": 0.46866655349731445
    },
    {
      "epoch": 0.000198272705078125,
      "model_forward_time": 0.11591601371765137,
      "step": 32485
    },
    {
      "epoch": 0.000198272705078125,
      "step": 32485,
      "training_step_time": 0.4822523593902588
    },
    {
      "epoch": 0.00019827880859375,
      "model_forward_time": 0.11453628540039062,
      "step": 32486
    },
    {
      "epoch": 0.00019827880859375,
      "step": 32486,
      "training_step_time": 0.3943932056427002
    },
    {
      "epoch": 0.000198284912109375,
      "model_forward_time": 0.11553025245666504,
      "step": 32487
    },
    {
      "epoch": 0.000198284912109375,
      "step": 32487,
      "training_step_time": 0.38753223419189453
    },
    {
      "epoch": 0.000198291015625,
      "model_forward_time": 0.11512088775634766,
      "step": 32488
    },
    {
      "epoch": 0.000198291015625,
      "step": 32488,
      "training_step_time": 0.40844297409057617
    },
    {
      "epoch": 0.000198297119140625,
      "model_forward_time": 0.11479997634887695,
      "step": 32489
    },
    {
      "epoch": 0.000198297119140625,
      "step": 32489,
      "training_step_time": 0.39825010299682617
    },
    {
      "epoch": 0.00019830322265625,
      "grad_norm": 0.16111676394939423,
      "learning_rate": 4.7273128383351015e-05,
      "loss": 0.0462,
      "step": 32490
    },
    {
      "epoch": 0.00019830322265625,
      "model_forward_time": 0.11499619483947754,
      "step": 32490
    },
    {
      "epoch": 0.00019830322265625,
      "step": 32490,
      "training_step_time": 0.4865844249725342
    },
    {
      "epoch": 0.000198309326171875,
      "model_forward_time": 0.11530113220214844,
      "step": 32491
    },
    {
      "epoch": 0.000198309326171875,
      "step": 32491,
      "training_step_time": 0.5095679759979248
    },
    {
      "epoch": 0.0001983154296875,
      "model_forward_time": 0.11577463150024414,
      "step": 32492
    },
    {
      "epoch": 0.0001983154296875,
      "step": 32492,
      "training_step_time": 0.401888370513916
    },
    {
      "epoch": 0.000198321533203125,
      "model_forward_time": 0.11655306816101074,
      "step": 32493
    },
    {
      "epoch": 0.000198321533203125,
      "step": 32493,
      "training_step_time": 0.4256715774536133
    },
    {
      "epoch": 0.00019832763671875,
      "model_forward_time": 0.11509418487548828,
      "step": 32494
    },
    {
      "epoch": 0.00019832763671875,
      "step": 32494,
      "training_step_time": 0.3933281898498535
    },
    {
      "epoch": 0.000198333740234375,
      "model_forward_time": 0.11527848243713379,
      "step": 32495
    },
    {
      "epoch": 0.000198333740234375,
      "step": 32495,
      "training_step_time": 0.47728705406188965
    },
    {
      "epoch": 0.00019833984375,
      "model_forward_time": 0.11492657661437988,
      "step": 32496
    },
    {
      "epoch": 0.00019833984375,
      "step": 32496,
      "training_step_time": 0.4982171058654785
    },
    {
      "epoch": 0.000198345947265625,
      "model_forward_time": 0.1146860122680664,
      "step": 32497
    },
    {
      "epoch": 0.000198345947265625,
      "step": 32497,
      "training_step_time": 0.47084665298461914
    },
    {
      "epoch": 0.00019835205078125,
      "model_forward_time": 0.11543726921081543,
      "step": 32498
    },
    {
      "epoch": 0.00019835205078125,
      "step": 32498,
      "training_step_time": 0.3964407444000244
    },
    {
      "epoch": 0.000198358154296875,
      "model_forward_time": 0.11603426933288574,
      "step": 32499
    },
    {
      "epoch": 0.000198358154296875,
      "step": 32499,
      "training_step_time": 0.39755725860595703
    },
    {
      "epoch": 0.0001983642578125,
      "grad_norm": 0.12840600311756134,
      "learning_rate": 4.7245611982206724e-05,
      "loss": 0.0425,
      "step": 32500
    },
    {
      "epoch": 0.0001983642578125,
      "model_forward_time": 0.11501646041870117,
      "step": 32500
    },
    {
      "epoch": 0.0001983642578125,
      "step": 32500,
      "training_step_time": 0.39820313453674316
    },
    {
      "epoch": 0.000198370361328125,
      "model_forward_time": 0.1148841381072998,
      "step": 32501
    },
    {
      "epoch": 0.000198370361328125,
      "step": 32501,
      "training_step_time": 0.3975811004638672
    },
    {
      "epoch": 0.00019837646484375,
      "model_forward_time": 0.11461949348449707,
      "step": 32502
    },
    {
      "epoch": 0.00019837646484375,
      "step": 32502,
      "training_step_time": 0.5072522163391113
    },
    {
      "epoch": 0.000198382568359375,
      "model_forward_time": 0.11473536491394043,
      "step": 32503
    },
    {
      "epoch": 0.000198382568359375,
      "step": 32503,
      "training_step_time": 0.6638655662536621
    },
    {
      "epoch": 0.000198388671875,
      "model_forward_time": 0.11522221565246582,
      "step": 32504
    },
    {
      "epoch": 0.000198388671875,
      "step": 32504,
      "training_step_time": 0.4444456100463867
    },
    {
      "epoch": 0.000198394775390625,
      "model_forward_time": 0.11609601974487305,
      "step": 32505
    },
    {
      "epoch": 0.000198394775390625,
      "step": 32505,
      "training_step_time": 0.4253571033477783
    },
    {
      "epoch": 0.00019840087890625,
      "model_forward_time": 0.11469459533691406,
      "step": 32506
    },
    {
      "epoch": 0.00019840087890625,
      "step": 32506,
      "training_step_time": 0.5005459785461426
    },
    {
      "epoch": 0.000198406982421875,
      "model_forward_time": 0.11466336250305176,
      "step": 32507
    },
    {
      "epoch": 0.000198406982421875,
      "step": 32507,
      "training_step_time": 0.384676456451416
    },
    {
      "epoch": 0.0001984130859375,
      "model_forward_time": 0.11483979225158691,
      "step": 32508
    },
    {
      "epoch": 0.0001984130859375,
      "step": 32508,
      "training_step_time": 0.4867246150970459
    },
    {
      "epoch": 0.000198419189453125,
      "model_forward_time": 0.1145775318145752,
      "step": 32509
    },
    {
      "epoch": 0.000198419189453125,
      "step": 32509,
      "training_step_time": 0.4317739009857178
    },
    {
      "epoch": 0.00019842529296875,
      "grad_norm": 0.1277080625295639,
      "learning_rate": 4.721809641777281e-05,
      "loss": 0.0437,
      "step": 32510
    },
    {
      "epoch": 0.00019842529296875,
      "model_forward_time": 0.11438608169555664,
      "step": 32510
    },
    {
      "epoch": 0.00019842529296875,
      "step": 32510,
      "training_step_time": 0.49506616592407227
    },
    {
      "epoch": 0.000198431396484375,
      "model_forward_time": 0.1143350601196289,
      "step": 32511
    },
    {
      "epoch": 0.000198431396484375,
      "step": 32511,
      "training_step_time": 0.39940690994262695
    },
    {
      "epoch": 0.0001984375,
      "model_forward_time": 0.11518335342407227,
      "step": 32512
    },
    {
      "epoch": 0.0001984375,
      "step": 32512,
      "training_step_time": 0.39494752883911133
    },
    {
      "epoch": 0.000198443603515625,
      "model_forward_time": 0.11531591415405273,
      "step": 32513
    },
    {
      "epoch": 0.000198443603515625,
      "step": 32513,
      "training_step_time": 0.3945937156677246
    },
    {
      "epoch": 0.00019844970703125,
      "model_forward_time": 0.11441302299499512,
      "step": 32514
    },
    {
      "epoch": 0.00019844970703125,
      "step": 32514,
      "training_step_time": 0.408111572265625
    },
    {
      "epoch": 0.000198455810546875,
      "model_forward_time": 0.11552095413208008,
      "step": 32515
    },
    {
      "epoch": 0.000198455810546875,
      "step": 32515,
      "training_step_time": 0.4064524173736572
    },
    {
      "epoch": 0.0001984619140625,
      "model_forward_time": 0.11492085456848145,
      "step": 32516
    },
    {
      "epoch": 0.0001984619140625,
      "step": 32516,
      "training_step_time": 0.4301905632019043
    },
    {
      "epoch": 0.000198468017578125,
      "model_forward_time": 0.1151726245880127,
      "step": 32517
    },
    {
      "epoch": 0.000198468017578125,
      "step": 32517,
      "training_step_time": 0.4009692668914795
    },
    {
      "epoch": 0.00019847412109375,
      "model_forward_time": 0.1151418685913086,
      "step": 32518
    },
    {
      "epoch": 0.00019847412109375,
      "step": 32518,
      "training_step_time": 0.43410706520080566
    },
    {
      "epoch": 0.000198480224609375,
      "model_forward_time": 0.11495304107666016,
      "step": 32519
    },
    {
      "epoch": 0.000198480224609375,
      "step": 32519,
      "training_step_time": 0.4075026512145996
    },
    {
      "epoch": 0.000198486328125,
      "grad_norm": 0.16149954497814178,
      "learning_rate": 4.7190581698407725e-05,
      "loss": 0.0413,
      "step": 32520
    },
    {
      "epoch": 0.000198486328125,
      "model_forward_time": 0.11552309989929199,
      "step": 32520
    },
    {
      "epoch": 0.000198486328125,
      "step": 32520,
      "training_step_time": 0.42484450340270996
    },
    {
      "epoch": 0.000198492431640625,
      "model_forward_time": 0.1164710521697998,
      "step": 32521
    },
    {
      "epoch": 0.000198492431640625,
      "step": 32521,
      "training_step_time": 0.7463095188140869
    },
    {
      "epoch": 0.00019849853515625,
      "model_forward_time": 0.11427950859069824,
      "step": 32522
    },
    {
      "epoch": 0.00019849853515625,
      "step": 32522,
      "training_step_time": 0.41866612434387207
    },
    {
      "epoch": 0.000198504638671875,
      "model_forward_time": 0.11453628540039062,
      "step": 32523
    },
    {
      "epoch": 0.000198504638671875,
      "step": 32523,
      "training_step_time": 0.410275936126709
    },
    {
      "epoch": 0.0001985107421875,
      "model_forward_time": 0.11416220664978027,
      "step": 32524
    },
    {
      "epoch": 0.0001985107421875,
      "step": 32524,
      "training_step_time": 0.4832446575164795
    },
    {
      "epoch": 0.000198516845703125,
      "model_forward_time": 0.11447978019714355,
      "step": 32525
    },
    {
      "epoch": 0.000198516845703125,
      "step": 32525,
      "training_step_time": 0.38719844818115234
    },
    {
      "epoch": 0.00019852294921875,
      "model_forward_time": 0.11436009407043457,
      "step": 32526
    },
    {
      "epoch": 0.00019852294921875,
      "step": 32526,
      "training_step_time": 0.3929736614227295
    },
    {
      "epoch": 0.000198529052734375,
      "model_forward_time": 0.11506295204162598,
      "step": 32527
    },
    {
      "epoch": 0.000198529052734375,
      "step": 32527,
      "training_step_time": 0.5411818027496338
    },
    {
      "epoch": 0.00019853515625,
      "model_forward_time": 0.1146230697631836,
      "step": 32528
    },
    {
      "epoch": 0.00019853515625,
      "step": 32528,
      "training_step_time": 0.38646697998046875
    },
    {
      "epoch": 0.000198541259765625,
      "model_forward_time": 0.11522078514099121,
      "step": 32529
    },
    {
      "epoch": 0.000198541259765625,
      "step": 32529,
      "training_step_time": 0.39949607849121094
    },
    {
      "epoch": 0.00019854736328125,
      "grad_norm": 0.17266136407852173,
      "learning_rate": 4.716306783246977e-05,
      "loss": 0.0417,
      "step": 32530
    },
    {
      "epoch": 0.00019854736328125,
      "model_forward_time": 0.11514902114868164,
      "step": 32530
    },
    {
      "epoch": 0.00019854736328125,
      "step": 32530,
      "training_step_time": 0.40199804306030273
    },
    {
      "epoch": 0.000198553466796875,
      "model_forward_time": 0.11505985260009766,
      "step": 32531
    },
    {
      "epoch": 0.000198553466796875,
      "step": 32531,
      "training_step_time": 0.4277679920196533
    },
    {
      "epoch": 0.0001985595703125,
      "model_forward_time": 0.1148519515991211,
      "step": 32532
    },
    {
      "epoch": 0.0001985595703125,
      "step": 32532,
      "training_step_time": 0.41399288177490234
    },
    {
      "epoch": 0.000198565673828125,
      "model_forward_time": 0.11541342735290527,
      "step": 32533
    },
    {
      "epoch": 0.000198565673828125,
      "step": 32533,
      "training_step_time": 0.573397159576416
    },
    {
      "epoch": 0.00019857177734375,
      "model_forward_time": 0.11479926109313965,
      "step": 32534
    },
    {
      "epoch": 0.00019857177734375,
      "step": 32534,
      "training_step_time": 0.4659876823425293
    },
    {
      "epoch": 0.000198577880859375,
      "model_forward_time": 0.11484551429748535,
      "step": 32535
    },
    {
      "epoch": 0.000198577880859375,
      "step": 32535,
      "training_step_time": 0.42663121223449707
    },
    {
      "epoch": 0.000198583984375,
      "model_forward_time": 0.11514735221862793,
      "step": 32536
    },
    {
      "epoch": 0.000198583984375,
      "step": 32536,
      "training_step_time": 0.4642970561981201
    },
    {
      "epoch": 0.000198590087890625,
      "model_forward_time": 0.11424803733825684,
      "step": 32537
    },
    {
      "epoch": 0.000198590087890625,
      "step": 32537,
      "training_step_time": 0.483518123626709
    },
    {
      "epoch": 0.00019859619140625,
      "model_forward_time": 0.11540794372558594,
      "step": 32538
    },
    {
      "epoch": 0.00019859619140625,
      "step": 32538,
      "training_step_time": 0.3989291191101074
    },
    {
      "epoch": 0.000198602294921875,
      "model_forward_time": 0.11596202850341797,
      "step": 32539
    },
    {
      "epoch": 0.000198602294921875,
      "step": 32539,
      "training_step_time": 0.3929586410522461
    },
    {
      "epoch": 0.0001986083984375,
      "grad_norm": 0.14773835241794586,
      "learning_rate": 4.713555482831688e-05,
      "loss": 0.0481,
      "step": 32540
    },
    {
      "epoch": 0.0001986083984375,
      "model_forward_time": 0.11482834815979004,
      "step": 32540
    },
    {
      "epoch": 0.0001986083984375,
      "step": 32540,
      "training_step_time": 0.39242053031921387
    },
    {
      "epoch": 0.000198614501953125,
      "model_forward_time": 0.11533331871032715,
      "step": 32541
    },
    {
      "epoch": 0.000198614501953125,
      "step": 32541,
      "training_step_time": 0.39238595962524414
    },
    {
      "epoch": 0.00019862060546875,
      "model_forward_time": 0.11468172073364258,
      "step": 32542
    },
    {
      "epoch": 0.00019862060546875,
      "step": 32542,
      "training_step_time": 0.3950328826904297
    },
    {
      "epoch": 0.000198626708984375,
      "model_forward_time": 0.11586523056030273,
      "step": 32543
    },
    {
      "epoch": 0.000198626708984375,
      "step": 32543,
      "training_step_time": 0.39276862144470215
    },
    {
      "epoch": 0.0001986328125,
      "model_forward_time": 0.11538529396057129,
      "step": 32544
    },
    {
      "epoch": 0.0001986328125,
      "step": 32544,
      "training_step_time": 0.44672346115112305
    },
    {
      "epoch": 0.000198638916015625,
      "model_forward_time": 0.11578226089477539,
      "step": 32545
    },
    {
      "epoch": 0.000198638916015625,
      "step": 32545,
      "training_step_time": 0.511014461517334
    },
    {
      "epoch": 0.00019864501953125,
      "model_forward_time": 0.11563754081726074,
      "step": 32546
    },
    {
      "epoch": 0.00019864501953125,
      "step": 32546,
      "training_step_time": 0.4412555694580078
    },
    {
      "epoch": 0.000198651123046875,
      "model_forward_time": 0.1153404712677002,
      "step": 32547
    },
    {
      "epoch": 0.000198651123046875,
      "step": 32547,
      "training_step_time": 0.4586951732635498
    },
    {
      "epoch": 0.0001986572265625,
      "model_forward_time": 0.11497855186462402,
      "step": 32548
    },
    {
      "epoch": 0.0001986572265625,
      "step": 32548,
      "training_step_time": 0.49502992630004883
    },
    {
      "epoch": 0.000198663330078125,
      "model_forward_time": 0.11451554298400879,
      "step": 32549
    },
    {
      "epoch": 0.000198663330078125,
      "step": 32549,
      "training_step_time": 0.387906551361084
    },
    {
      "epoch": 0.00019866943359375,
      "grad_norm": 0.10491228848695755,
      "learning_rate": 4.710804269430681e-05,
      "loss": 0.0414,
      "step": 32550
    },
    {
      "epoch": 0.00019866943359375,
      "model_forward_time": 0.11572098731994629,
      "step": 32550
    },
    {
      "epoch": 0.00019866943359375,
      "step": 32550,
      "training_step_time": 0.44788599014282227
    },
    {
      "epoch": 0.000198675537109375,
      "model_forward_time": 0.11462259292602539,
      "step": 32551
    },
    {
      "epoch": 0.000198675537109375,
      "step": 32551,
      "training_step_time": 0.4766356945037842
    },
    {
      "epoch": 0.000198681640625,
      "model_forward_time": 0.11495518684387207,
      "step": 32552
    },
    {
      "epoch": 0.000198681640625,
      "step": 32552,
      "training_step_time": 0.38472700119018555
    },
    {
      "epoch": 0.000198687744140625,
      "model_forward_time": 0.11498236656188965,
      "step": 32553
    },
    {
      "epoch": 0.000198687744140625,
      "step": 32553,
      "training_step_time": 0.3891713619232178
    },
    {
      "epoch": 0.00019869384765625,
      "model_forward_time": 0.11509418487548828,
      "step": 32554
    },
    {
      "epoch": 0.00019869384765625,
      "step": 32554,
      "training_step_time": 0.392026424407959
    },
    {
      "epoch": 0.000198699951171875,
      "model_forward_time": 0.11514496803283691,
      "step": 32555
    },
    {
      "epoch": 0.000198699951171875,
      "step": 32555,
      "training_step_time": 0.3943510055541992
    },
    {
      "epoch": 0.0001987060546875,
      "model_forward_time": 0.11545205116271973,
      "step": 32556
    },
    {
      "epoch": 0.0001987060546875,
      "step": 32556,
      "training_step_time": 0.40335536003112793
    },
    {
      "epoch": 0.000198712158203125,
      "model_forward_time": 0.11488962173461914,
      "step": 32557
    },
    {
      "epoch": 0.000198712158203125,
      "step": 32557,
      "training_step_time": 0.6350924968719482
    },
    {
      "epoch": 0.00019871826171875,
      "model_forward_time": 0.11562848091125488,
      "step": 32558
    },
    {
      "epoch": 0.00019871826171875,
      "step": 32558,
      "training_step_time": 0.4695403575897217
    },
    {
      "epoch": 0.000198724365234375,
      "model_forward_time": 0.1145029067993164,
      "step": 32559
    },
    {
      "epoch": 0.000198724365234375,
      "step": 32559,
      "training_step_time": 0.40329885482788086
    },
    {
      "epoch": 0.00019873046875,
      "grad_norm": 0.1087459996342659,
      "learning_rate": 4.708053143879701e-05,
      "loss": 0.0421,
      "step": 32560
    },
    {
      "epoch": 0.00019873046875,
      "model_forward_time": 0.11411452293395996,
      "step": 32560
    },
    {
      "epoch": 0.00019873046875,
      "step": 32560,
      "training_step_time": 0.40416646003723145
    },
    {
      "epoch": 0.000198736572265625,
      "model_forward_time": 0.11536717414855957,
      "step": 32561
    },
    {
      "epoch": 0.000198736572265625,
      "step": 32561,
      "training_step_time": 0.4378390312194824
    },
    {
      "epoch": 0.00019874267578125,
      "model_forward_time": 0.1149594783782959,
      "step": 32562
    },
    {
      "epoch": 0.00019874267578125,
      "step": 32562,
      "training_step_time": 0.4582028388977051
    },
    {
      "epoch": 0.000198748779296875,
      "model_forward_time": 0.11752057075500488,
      "step": 32563
    },
    {
      "epoch": 0.000198748779296875,
      "step": 32563,
      "training_step_time": 0.49616026878356934
    },
    {
      "epoch": 0.0001987548828125,
      "model_forward_time": 0.1148824691772461,
      "step": 32564
    },
    {
      "epoch": 0.0001987548828125,
      "step": 32564,
      "training_step_time": 0.38980627059936523
    },
    {
      "epoch": 0.000198760986328125,
      "model_forward_time": 0.11485171318054199,
      "step": 32565
    },
    {
      "epoch": 0.000198760986328125,
      "step": 32565,
      "training_step_time": 0.43647241592407227
    },
    {
      "epoch": 0.00019876708984375,
      "model_forward_time": 0.11485052108764648,
      "step": 32566
    },
    {
      "epoch": 0.00019876708984375,
      "step": 32566,
      "training_step_time": 0.39478230476379395
    },
    {
      "epoch": 0.000198773193359375,
      "model_forward_time": 0.11473321914672852,
      "step": 32567
    },
    {
      "epoch": 0.000198773193359375,
      "step": 32567,
      "training_step_time": 0.3882632255554199
    },
    {
      "epoch": 0.000198779296875,
      "model_forward_time": 0.11560487747192383,
      "step": 32568
    },
    {
      "epoch": 0.000198779296875,
      "step": 32568,
      "training_step_time": 0.4033024311065674
    },
    {
      "epoch": 0.000198785400390625,
      "model_forward_time": 0.11484503746032715,
      "step": 32569
    },
    {
      "epoch": 0.000198785400390625,
      "step": 32569,
      "training_step_time": 0.5927190780639648
    },
    {
      "epoch": 0.00019879150390625,
      "grad_norm": 0.13607551157474518,
      "learning_rate": 4.7053021070144664e-05,
      "loss": 0.0442,
      "step": 32570
    },
    {
      "epoch": 0.00019879150390625,
      "model_forward_time": 0.11470985412597656,
      "step": 32570
    },
    {
      "epoch": 0.00019879150390625,
      "step": 32570,
      "training_step_time": 0.38604021072387695
    },
    {
      "epoch": 0.000198797607421875,
      "model_forward_time": 0.11467337608337402,
      "step": 32571
    },
    {
      "epoch": 0.000198797607421875,
      "step": 32571,
      "training_step_time": 0.3937857151031494
    },
    {
      "epoch": 0.0001988037109375,
      "model_forward_time": 0.11520910263061523,
      "step": 32572
    },
    {
      "epoch": 0.0001988037109375,
      "step": 32572,
      "training_step_time": 0.3994925022125244
    },
    {
      "epoch": 0.000198809814453125,
      "model_forward_time": 0.11506128311157227,
      "step": 32573
    },
    {
      "epoch": 0.000198809814453125,
      "step": 32573,
      "training_step_time": 0.39523839950561523
    },
    {
      "epoch": 0.00019881591796875,
      "model_forward_time": 0.11610078811645508,
      "step": 32574
    },
    {
      "epoch": 0.00019881591796875,
      "step": 32574,
      "training_step_time": 0.48215222358703613
    },
    {
      "epoch": 0.000198822021484375,
      "model_forward_time": 0.11515641212463379,
      "step": 32575
    },
    {
      "epoch": 0.000198822021484375,
      "step": 32575,
      "training_step_time": 0.6732954978942871
    },
    {
      "epoch": 0.000198828125,
      "model_forward_time": 0.11495041847229004,
      "step": 32576
    },
    {
      "epoch": 0.000198828125,
      "step": 32576,
      "training_step_time": 0.397540807723999
    },
    {
      "epoch": 0.000198834228515625,
      "model_forward_time": 0.11497187614440918,
      "step": 32577
    },
    {
      "epoch": 0.000198834228515625,
      "step": 32577,
      "training_step_time": 0.4431922435760498
    },
    {
      "epoch": 0.00019884033203125,
      "model_forward_time": 0.11478781700134277,
      "step": 32578
    },
    {
      "epoch": 0.00019884033203125,
      "step": 32578,
      "training_step_time": 0.4173705577850342
    },
    {
      "epoch": 0.000198846435546875,
      "model_forward_time": 0.1148228645324707,
      "step": 32579
    },
    {
      "epoch": 0.000198846435546875,
      "step": 32579,
      "training_step_time": 0.4034006595611572
    },
    {
      "epoch": 0.0001988525390625,
      "grad_norm": 0.13543424010276794,
      "learning_rate": 4.702551159670672e-05,
      "loss": 0.0501,
      "step": 32580
    },
    {
      "epoch": 0.0001988525390625,
      "model_forward_time": 0.11433815956115723,
      "step": 32580
    },
    {
      "epoch": 0.0001988525390625,
      "step": 32580,
      "training_step_time": 0.39150476455688477
    },
    {
      "epoch": 0.000198858642578125,
      "model_forward_time": 0.11490964889526367,
      "step": 32581
    },
    {
      "epoch": 0.000198858642578125,
      "step": 32581,
      "training_step_time": 0.6630187034606934
    },
    {
      "epoch": 0.00019886474609375,
      "model_forward_time": 0.1148369312286377,
      "step": 32582
    },
    {
      "epoch": 0.00019886474609375,
      "step": 32582,
      "training_step_time": 0.38109302520751953
    },
    {
      "epoch": 0.000198870849609375,
      "model_forward_time": 0.11510634422302246,
      "step": 32583
    },
    {
      "epoch": 0.000198870849609375,
      "step": 32583,
      "training_step_time": 0.39507365226745605
    },
    {
      "epoch": 0.000198876953125,
      "model_forward_time": 0.11580562591552734,
      "step": 32584
    },
    {
      "epoch": 0.000198876953125,
      "step": 32584,
      "training_step_time": 0.3979525566101074
    },
    {
      "epoch": 0.000198883056640625,
      "model_forward_time": 0.11532473564147949,
      "step": 32585
    },
    {
      "epoch": 0.000198883056640625,
      "step": 32585,
      "training_step_time": 0.38613080978393555
    },
    {
      "epoch": 0.00019888916015625,
      "model_forward_time": 0.11504220962524414,
      "step": 32586
    },
    {
      "epoch": 0.00019888916015625,
      "step": 32586,
      "training_step_time": 0.4299125671386719
    },
    {
      "epoch": 0.000198895263671875,
      "model_forward_time": 0.11521506309509277,
      "step": 32587
    },
    {
      "epoch": 0.000198895263671875,
      "step": 32587,
      "training_step_time": 0.6469051837921143
    },
    {
      "epoch": 0.0001989013671875,
      "model_forward_time": 0.11536359786987305,
      "step": 32588
    },
    {
      "epoch": 0.0001989013671875,
      "step": 32588,
      "training_step_time": 0.41278719902038574
    },
    {
      "epoch": 0.000198907470703125,
      "model_forward_time": 0.11507725715637207,
      "step": 32589
    },
    {
      "epoch": 0.000198907470703125,
      "step": 32589,
      "training_step_time": 0.4469261169433594
    },
    {
      "epoch": 0.00019891357421875,
      "grad_norm": 0.10961833596229553,
      "learning_rate": 4.699800302683981e-05,
      "loss": 0.0412,
      "step": 32590
    },
    {
      "epoch": 0.00019891357421875,
      "model_forward_time": 0.11488890647888184,
      "step": 32590
    },
    {
      "epoch": 0.00019891357421875,
      "step": 32590,
      "training_step_time": 0.4106295108795166
    },
    {
      "epoch": 0.000198919677734375,
      "model_forward_time": 0.1146852970123291,
      "step": 32591
    },
    {
      "epoch": 0.000198919677734375,
      "step": 32591,
      "training_step_time": 0.4354739189147949
    },
    {
      "epoch": 0.00019892578125,
      "model_forward_time": 0.11510729789733887,
      "step": 32592
    },
    {
      "epoch": 0.00019892578125,
      "step": 32592,
      "training_step_time": 0.4260685443878174
    },
    {
      "epoch": 0.000198931884765625,
      "model_forward_time": 0.11529088020324707,
      "step": 32593
    },
    {
      "epoch": 0.000198931884765625,
      "step": 32593,
      "training_step_time": 0.5442097187042236
    },
    {
      "epoch": 0.00019893798828125,
      "model_forward_time": 0.11495327949523926,
      "step": 32594
    },
    {
      "epoch": 0.00019893798828125,
      "step": 32594,
      "training_step_time": 0.39645862579345703
    },
    {
      "epoch": 0.000198944091796875,
      "model_forward_time": 0.11475872993469238,
      "step": 32595
    },
    {
      "epoch": 0.000198944091796875,
      "step": 32595,
      "training_step_time": 0.388702392578125
    },
    {
      "epoch": 0.0001989501953125,
      "model_forward_time": 0.1162416934967041,
      "step": 32596
    },
    {
      "epoch": 0.0001989501953125,
      "step": 32596,
      "training_step_time": 0.38360595703125
    },
    {
      "epoch": 0.000198956298828125,
      "model_forward_time": 0.11507010459899902,
      "step": 32597
    },
    {
      "epoch": 0.000198956298828125,
      "step": 32597,
      "training_step_time": 0.38557934761047363
    },
    {
      "epoch": 0.00019896240234375,
      "model_forward_time": 0.11571025848388672,
      "step": 32598
    },
    {
      "epoch": 0.00019896240234375,
      "step": 32598,
      "training_step_time": 0.3872256278991699
    },
    {
      "epoch": 0.000198968505859375,
      "model_forward_time": 0.1156303882598877,
      "step": 32599
    },
    {
      "epoch": 0.000198968505859375,
      "step": 32599,
      "training_step_time": 0.7529590129852295
    },
    {
      "epoch": 0.000198974609375,
      "grad_norm": 0.14221486449241638,
      "learning_rate": 4.697049536890033e-05,
      "loss": 0.0465,
      "step": 32600
    },
    {
      "epoch": 0.000198974609375,
      "model_forward_time": 0.11483073234558105,
      "step": 32600
    },
    {
      "epoch": 0.000198974609375,
      "step": 32600,
      "training_step_time": 0.4268815517425537
    },
    {
      "epoch": 0.000198980712890625,
      "model_forward_time": 0.1146078109741211,
      "step": 32601
    },
    {
      "epoch": 0.000198980712890625,
      "step": 32601,
      "training_step_time": 0.5065896511077881
    },
    {
      "epoch": 0.00019898681640625,
      "model_forward_time": 0.11524701118469238,
      "step": 32602
    },
    {
      "epoch": 0.00019898681640625,
      "step": 32602,
      "training_step_time": 0.4565873146057129
    },
    {
      "epoch": 0.000198992919921875,
      "model_forward_time": 0.11531662940979004,
      "step": 32603
    },
    {
      "epoch": 0.000198992919921875,
      "step": 32603,
      "training_step_time": 0.49300098419189453
    },
    {
      "epoch": 0.0001989990234375,
      "model_forward_time": 0.11429548263549805,
      "step": 32604
    },
    {
      "epoch": 0.0001989990234375,
      "step": 32604,
      "training_step_time": 0.42032599449157715
    },
    {
      "epoch": 0.000199005126953125,
      "model_forward_time": 0.11435914039611816,
      "step": 32605
    },
    {
      "epoch": 0.000199005126953125,
      "step": 32605,
      "training_step_time": 0.4843144416809082
    },
    {
      "epoch": 0.00019901123046875,
      "model_forward_time": 0.11414933204650879,
      "step": 32606
    },
    {
      "epoch": 0.00019901123046875,
      "step": 32606,
      "training_step_time": 0.3937349319458008
    },
    {
      "epoch": 0.000199017333984375,
      "model_forward_time": 0.11556100845336914,
      "step": 32607
    },
    {
      "epoch": 0.000199017333984375,
      "step": 32607,
      "training_step_time": 0.3956871032714844
    },
    {
      "epoch": 0.0001990234375,
      "model_forward_time": 0.11478185653686523,
      "step": 32608
    },
    {
      "epoch": 0.0001990234375,
      "step": 32608,
      "training_step_time": 0.39598560333251953
    },
    {
      "epoch": 0.000199029541015625,
      "model_forward_time": 0.11538004875183105,
      "step": 32609
    },
    {
      "epoch": 0.000199029541015625,
      "step": 32609,
      "training_step_time": 0.3897404670715332
    },
    {
      "epoch": 0.00019903564453125,
      "grad_norm": 0.16713087260723114,
      "learning_rate": 4.694298863124435e-05,
      "loss": 0.0384,
      "step": 32610
    },
    {
      "epoch": 0.00019903564453125,
      "model_forward_time": 0.11499810218811035,
      "step": 32610
    },
    {
      "epoch": 0.00019903564453125,
      "step": 32610,
      "training_step_time": 0.39504241943359375
    },
    {
      "epoch": 0.000199041748046875,
      "model_forward_time": 0.11521100997924805,
      "step": 32611
    },
    {
      "epoch": 0.000199041748046875,
      "step": 32611,
      "training_step_time": 0.5043230056762695
    },
    {
      "epoch": 0.0001990478515625,
      "model_forward_time": 0.11538290977478027,
      "step": 32612
    },
    {
      "epoch": 0.0001990478515625,
      "step": 32612,
      "training_step_time": 0.39812445640563965
    },
    {
      "epoch": 0.000199053955078125,
      "model_forward_time": 0.11570286750793457,
      "step": 32613
    },
    {
      "epoch": 0.000199053955078125,
      "step": 32613,
      "training_step_time": 0.47312068939208984
    },
    {
      "epoch": 0.00019906005859375,
      "model_forward_time": 0.11488676071166992,
      "step": 32614
    },
    {
      "epoch": 0.00019906005859375,
      "step": 32614,
      "training_step_time": 0.4124155044555664
    },
    {
      "epoch": 0.000199066162109375,
      "model_forward_time": 0.11566805839538574,
      "step": 32615
    },
    {
      "epoch": 0.000199066162109375,
      "step": 32615,
      "training_step_time": 0.40492725372314453
    },
    {
      "epoch": 0.000199072265625,
      "model_forward_time": 0.11476802825927734,
      "step": 32616
    },
    {
      "epoch": 0.000199072265625,
      "step": 32616,
      "training_step_time": 0.4206533432006836
    },
    {
      "epoch": 0.000199078369140625,
      "model_forward_time": 0.11566019058227539,
      "step": 32617
    },
    {
      "epoch": 0.000199078369140625,
      "step": 32617,
      "training_step_time": 0.5004570484161377
    },
    {
      "epoch": 0.00019908447265625,
      "model_forward_time": 0.11557817459106445,
      "step": 32618
    },
    {
      "epoch": 0.00019908447265625,
      "step": 32618,
      "training_step_time": 0.5074257850646973
    },
    {
      "epoch": 0.000199090576171875,
      "model_forward_time": 0.11536335945129395,
      "step": 32619
    },
    {
      "epoch": 0.000199090576171875,
      "step": 32619,
      "training_step_time": 0.3966653347015381
    },
    {
      "epoch": 0.0001990966796875,
      "grad_norm": 0.11923585087060928,
      "learning_rate": 4.691548282222771e-05,
      "loss": 0.0409,
      "step": 32620
    },
    {
      "epoch": 0.0001990966796875,
      "model_forward_time": 0.11522889137268066,
      "step": 32620
    },
    {
      "epoch": 0.0001990966796875,
      "step": 32620,
      "training_step_time": 0.41165828704833984
    },
    {
      "epoch": 0.000199102783203125,
      "model_forward_time": 0.11513042449951172,
      "step": 32621
    },
    {
      "epoch": 0.000199102783203125,
      "step": 32621,
      "training_step_time": 0.39926695823669434
    },
    {
      "epoch": 0.00019910888671875,
      "model_forward_time": 0.11478686332702637,
      "step": 32622
    },
    {
      "epoch": 0.00019910888671875,
      "step": 32622,
      "training_step_time": 0.3951992988586426
    },
    {
      "epoch": 0.000199114990234375,
      "model_forward_time": 0.1152501106262207,
      "step": 32623
    },
    {
      "epoch": 0.000199114990234375,
      "step": 32623,
      "training_step_time": 0.45774245262145996
    },
    {
      "epoch": 0.00019912109375,
      "model_forward_time": 0.11547255516052246,
      "step": 32624
    },
    {
      "epoch": 0.00019912109375,
      "step": 32624,
      "training_step_time": 0.3921337127685547
    },
    {
      "epoch": 0.000199127197265625,
      "model_forward_time": 0.11463189125061035,
      "step": 32625
    },
    {
      "epoch": 0.000199127197265625,
      "step": 32625,
      "training_step_time": 0.4090840816497803
    },
    {
      "epoch": 0.00019913330078125,
      "model_forward_time": 0.11499309539794922,
      "step": 32626
    },
    {
      "epoch": 0.00019913330078125,
      "step": 32626,
      "training_step_time": 0.3995823860168457
    },
    {
      "epoch": 0.000199139404296875,
      "model_forward_time": 0.11583566665649414,
      "step": 32627
    },
    {
      "epoch": 0.000199139404296875,
      "step": 32627,
      "training_step_time": 0.47321081161499023
    },
    {
      "epoch": 0.0001991455078125,
      "model_forward_time": 0.11600494384765625,
      "step": 32628
    },
    {
      "epoch": 0.0001991455078125,
      "step": 32628,
      "training_step_time": 0.4131438732147217
    },
    {
      "epoch": 0.000199151611328125,
      "model_forward_time": 0.11559414863586426,
      "step": 32629
    },
    {
      "epoch": 0.000199151611328125,
      "step": 32629,
      "training_step_time": 0.6312224864959717
    },
    {
      "epoch": 0.00019915771484375,
      "grad_norm": 0.13374826312065125,
      "learning_rate": 4.688797795020597e-05,
      "loss": 0.0382,
      "step": 32630
    },
    {
      "epoch": 0.00019915771484375,
      "model_forward_time": 0.11461877822875977,
      "step": 32630
    },
    {
      "epoch": 0.00019915771484375,
      "step": 32630,
      "training_step_time": 0.5161693096160889
    },
    {
      "epoch": 0.000199163818359375,
      "model_forward_time": 0.11490583419799805,
      "step": 32631
    },
    {
      "epoch": 0.000199163818359375,
      "step": 32631,
      "training_step_time": 0.48939037322998047
    },
    {
      "epoch": 0.000199169921875,
      "model_forward_time": 0.11542057991027832,
      "step": 32632
    },
    {
      "epoch": 0.000199169921875,
      "step": 32632,
      "training_step_time": 0.41783857345581055
    },
    {
      "epoch": 0.000199176025390625,
      "model_forward_time": 0.11408185958862305,
      "step": 32633
    },
    {
      "epoch": 0.000199176025390625,
      "step": 32633,
      "training_step_time": 0.4796600341796875
    },
    {
      "epoch": 0.00019918212890625,
      "model_forward_time": 0.11460304260253906,
      "step": 32634
    },
    {
      "epoch": 0.00019918212890625,
      "step": 32634,
      "training_step_time": 0.3869776725769043
    },
    {
      "epoch": 0.000199188232421875,
      "model_forward_time": 0.11466050148010254,
      "step": 32635
    },
    {
      "epoch": 0.000199188232421875,
      "step": 32635,
      "training_step_time": 0.415071964263916
    },
    {
      "epoch": 0.0001991943359375,
      "model_forward_time": 0.11643385887145996,
      "step": 32636
    },
    {
      "epoch": 0.0001991943359375,
      "step": 32636,
      "training_step_time": 0.3835594654083252
    },
    {
      "epoch": 0.000199200439453125,
      "model_forward_time": 0.11578059196472168,
      "step": 32637
    },
    {
      "epoch": 0.000199200439453125,
      "step": 32637,
      "training_step_time": 0.3874082565307617
    },
    {
      "epoch": 0.00019920654296875,
      "model_forward_time": 0.11551237106323242,
      "step": 32638
    },
    {
      "epoch": 0.00019920654296875,
      "step": 32638,
      "training_step_time": 0.3984718322753906
    },
    {
      "epoch": 0.000199212646484375,
      "model_forward_time": 0.11574220657348633,
      "step": 32639
    },
    {
      "epoch": 0.000199212646484375,
      "step": 32639,
      "training_step_time": 0.38775038719177246
    },
    {
      "epoch": 0.00019921875,
      "grad_norm": 0.11305978149175644,
      "learning_rate": 4.6860474023534335e-05,
      "loss": 0.0434,
      "step": 32640
    },
    {
      "epoch": 0.00019921875,
      "model_forward_time": 0.11496901512145996,
      "step": 32640
    },
    {
      "epoch": 0.00019921875,
      "step": 32640,
      "training_step_time": 0.4077579975128174
    },
    {
      "epoch": 0.000199224853515625,
      "model_forward_time": 0.11480593681335449,
      "step": 32641
    },
    {
      "epoch": 0.000199224853515625,
      "step": 32641,
      "training_step_time": 0.7486159801483154
    },
    {
      "epoch": 0.00019923095703125,
      "model_forward_time": 0.11448931694030762,
      "step": 32642
    },
    {
      "epoch": 0.00019923095703125,
      "step": 32642,
      "training_step_time": 0.4019935131072998
    },
    {
      "epoch": 0.000199237060546875,
      "model_forward_time": 0.11436605453491211,
      "step": 32643
    },
    {
      "epoch": 0.000199237060546875,
      "step": 32643,
      "training_step_time": 0.4184238910675049
    },
    {
      "epoch": 0.0001992431640625,
      "model_forward_time": 0.11438941955566406,
      "step": 32644
    },
    {
      "epoch": 0.0001992431640625,
      "step": 32644,
      "training_step_time": 0.44730091094970703
    },
    {
      "epoch": 0.000199249267578125,
      "model_forward_time": 0.11423683166503906,
      "step": 32645
    },
    {
      "epoch": 0.000199249267578125,
      "step": 32645,
      "training_step_time": 0.48482370376586914
    },
    {
      "epoch": 0.00019925537109375,
      "model_forward_time": 0.11474418640136719,
      "step": 32646
    },
    {
      "epoch": 0.00019925537109375,
      "step": 32646,
      "training_step_time": 0.3894624710083008
    },
    {
      "epoch": 0.000199261474609375,
      "model_forward_time": 0.11463570594787598,
      "step": 32647
    },
    {
      "epoch": 0.000199261474609375,
      "step": 32647,
      "training_step_time": 0.5758850574493408
    },
    {
      "epoch": 0.000199267578125,
      "model_forward_time": 0.11442804336547852,
      "step": 32648
    },
    {
      "epoch": 0.000199267578125,
      "step": 32648,
      "training_step_time": 0.386019229888916
    },
    {
      "epoch": 0.000199273681640625,
      "model_forward_time": 0.11483216285705566,
      "step": 32649
    },
    {
      "epoch": 0.000199273681640625,
      "step": 32649,
      "training_step_time": 0.397707462310791
    },
    {
      "epoch": 0.00019927978515625,
      "grad_norm": 0.11156248301267624,
      "learning_rate": 4.683297105056782e-05,
      "loss": 0.0428,
      "step": 32650
    },
    {
      "epoch": 0.00019927978515625,
      "model_forward_time": 0.11514711380004883,
      "step": 32650
    },
    {
      "epoch": 0.00019927978515625,
      "step": 32650,
      "training_step_time": 0.39515113830566406
    },
    {
      "epoch": 0.000199285888671875,
      "model_forward_time": 0.1158900260925293,
      "step": 32651
    },
    {
      "epoch": 0.000199285888671875,
      "step": 32651,
      "training_step_time": 0.39048337936401367
    },
    {
      "epoch": 0.0001992919921875,
      "model_forward_time": 0.11464476585388184,
      "step": 32652
    },
    {
      "epoch": 0.0001992919921875,
      "step": 32652,
      "training_step_time": 0.451676607131958
    },
    {
      "epoch": 0.000199298095703125,
      "model_forward_time": 0.11728787422180176,
      "step": 32653
    },
    {
      "epoch": 0.000199298095703125,
      "step": 32653,
      "training_step_time": 0.5610463619232178
    },
    {
      "epoch": 0.00019930419921875,
      "model_forward_time": 0.1149299144744873,
      "step": 32654
    },
    {
      "epoch": 0.00019930419921875,
      "step": 32654,
      "training_step_time": 0.39153528213500977
    },
    {
      "epoch": 0.000199310302734375,
      "model_forward_time": 0.11476373672485352,
      "step": 32655
    },
    {
      "epoch": 0.000199310302734375,
      "step": 32655,
      "training_step_time": 0.4550282955169678
    },
    {
      "epoch": 0.00019931640625,
      "model_forward_time": 0.11523079872131348,
      "step": 32656
    },
    {
      "epoch": 0.00019931640625,
      "step": 32656,
      "training_step_time": 0.4305245876312256
    },
    {
      "epoch": 0.000199322509765625,
      "model_forward_time": 0.11470746994018555,
      "step": 32657
    },
    {
      "epoch": 0.000199322509765625,
      "step": 32657,
      "training_step_time": 0.36537909507751465
    },
    {
      "epoch": 0.00019932861328125,
      "model_forward_time": 0.11487007141113281,
      "step": 32658
    },
    {
      "epoch": 0.00019932861328125,
      "step": 32658,
      "training_step_time": 0.4356193542480469
    },
    {
      "epoch": 0.000199334716796875,
      "model_forward_time": 0.11565113067626953,
      "step": 32659
    },
    {
      "epoch": 0.000199334716796875,
      "step": 32659,
      "training_step_time": 0.4033517837524414
    },
    {
      "epoch": 0.0001993408203125,
      "grad_norm": 0.10568340867757797,
      "learning_rate": 4.680546903966106e-05,
      "loss": 0.0367,
      "step": 32660
    },
    {
      "epoch": 0.0001993408203125,
      "model_forward_time": 0.11539220809936523,
      "step": 32660
    },
    {
      "epoch": 0.0001993408203125,
      "step": 32660,
      "training_step_time": 0.38291025161743164
    },
    {
      "epoch": 0.000199346923828125,
      "model_forward_time": 0.11496853828430176,
      "step": 32661
    },
    {
      "epoch": 0.000199346923828125,
      "step": 32661,
      "training_step_time": 0.45047903060913086
    },
    {
      "epoch": 0.00019935302734375,
      "model_forward_time": 0.11495661735534668,
      "step": 32662
    },
    {
      "epoch": 0.00019935302734375,
      "step": 32662,
      "training_step_time": 0.39855217933654785
    },
    {
      "epoch": 0.000199359130859375,
      "model_forward_time": 0.11466026306152344,
      "step": 32663
    },
    {
      "epoch": 0.000199359130859375,
      "step": 32663,
      "training_step_time": 0.3966982364654541
    },
    {
      "epoch": 0.000199365234375,
      "model_forward_time": 0.11522126197814941,
      "step": 32664
    },
    {
      "epoch": 0.000199365234375,
      "step": 32664,
      "training_step_time": 0.39069390296936035
    },
    {
      "epoch": 0.000199371337890625,
      "model_forward_time": 0.1152353286743164,
      "step": 32665
    },
    {
      "epoch": 0.000199371337890625,
      "step": 32665,
      "training_step_time": 0.5629303455352783
    },
    {
      "epoch": 0.00019937744140625,
      "model_forward_time": 0.11469721794128418,
      "step": 32666
    },
    {
      "epoch": 0.00019937744140625,
      "step": 32666,
      "training_step_time": 0.42548346519470215
    },
    {
      "epoch": 0.000199383544921875,
      "model_forward_time": 0.11519312858581543,
      "step": 32667
    },
    {
      "epoch": 0.000199383544921875,
      "step": 32667,
      "training_step_time": 0.38480067253112793
    },
    {
      "epoch": 0.0001993896484375,
      "model_forward_time": 0.11501121520996094,
      "step": 32668
    },
    {
      "epoch": 0.0001993896484375,
      "step": 32668,
      "training_step_time": 0.4067270755767822
    },
    {
      "epoch": 0.000199395751953125,
      "model_forward_time": 0.11608195304870605,
      "step": 32669
    },
    {
      "epoch": 0.000199395751953125,
      "step": 32669,
      "training_step_time": 0.4435462951660156
    },
    {
      "epoch": 0.00019940185546875,
      "grad_norm": 0.1161312684416771,
      "learning_rate": 4.677796799916845e-05,
      "loss": 0.0454,
      "step": 32670
    },
    {
      "epoch": 0.00019940185546875,
      "model_forward_time": 0.11507725715637207,
      "step": 32670
    },
    {
      "epoch": 0.00019940185546875,
      "step": 32670,
      "training_step_time": 0.4056844711303711
    },
    {
      "epoch": 0.000199407958984375,
      "model_forward_time": 0.11466288566589355,
      "step": 32671
    },
    {
      "epoch": 0.000199407958984375,
      "step": 32671,
      "training_step_time": 0.5887537002563477
    },
    {
      "epoch": 0.0001994140625,
      "model_forward_time": 0.11504435539245605,
      "step": 32672
    },
    {
      "epoch": 0.0001994140625,
      "step": 32672,
      "training_step_time": 0.46910834312438965
    },
    {
      "epoch": 0.000199420166015625,
      "model_forward_time": 0.11497616767883301,
      "step": 32673
    },
    {
      "epoch": 0.000199420166015625,
      "step": 32673,
      "training_step_time": 0.4592280387878418
    },
    {
      "epoch": 0.00019942626953125,
      "model_forward_time": 0.1154639720916748,
      "step": 32674
    },
    {
      "epoch": 0.00019942626953125,
      "step": 32674,
      "training_step_time": 0.4095592498779297
    },
    {
      "epoch": 0.000199432373046875,
      "model_forward_time": 0.1145174503326416,
      "step": 32675
    },
    {
      "epoch": 0.000199432373046875,
      "step": 32675,
      "training_step_time": 0.48159170150756836
    },
    {
      "epoch": 0.0001994384765625,
      "model_forward_time": 0.11495804786682129,
      "step": 32676
    },
    {
      "epoch": 0.0001994384765625,
      "step": 32676,
      "training_step_time": 0.3942127227783203
    },
    {
      "epoch": 0.000199444580078125,
      "model_forward_time": 0.11536574363708496,
      "step": 32677
    },
    {
      "epoch": 0.000199444580078125,
      "step": 32677,
      "training_step_time": 0.4230678081512451
    },
    {
      "epoch": 0.00019945068359375,
      "model_forward_time": 0.11590456962585449,
      "step": 32678
    },
    {
      "epoch": 0.00019945068359375,
      "step": 32678,
      "training_step_time": 0.42696642875671387
    },
    {
      "epoch": 0.000199456787109375,
      "model_forward_time": 0.1151583194732666,
      "step": 32679
    },
    {
      "epoch": 0.000199456787109375,
      "step": 32679,
      "training_step_time": 0.4044983386993408
    },
    {
      "epoch": 0.000199462890625,
      "grad_norm": 0.1355273574590683,
      "learning_rate": 4.6750467937444115e-05,
      "loss": 0.0425,
      "step": 32680
    },
    {
      "epoch": 0.000199462890625,
      "model_forward_time": 0.11506533622741699,
      "step": 32680
    },
    {
      "epoch": 0.000199462890625,
      "step": 32680,
      "training_step_time": 0.41777467727661133
    },
    {
      "epoch": 0.000199468994140625,
      "model_forward_time": 0.11540532112121582,
      "step": 32681
    },
    {
      "epoch": 0.000199468994140625,
      "step": 32681,
      "training_step_time": 0.3935074806213379
    },
    {
      "epoch": 0.00019947509765625,
      "model_forward_time": 0.11494064331054688,
      "step": 32682
    },
    {
      "epoch": 0.00019947509765625,
      "step": 32682,
      "training_step_time": 0.4042482376098633
    },
    {
      "epoch": 0.000199481201171875,
      "model_forward_time": 0.11505866050720215,
      "step": 32683
    },
    {
      "epoch": 0.000199481201171875,
      "step": 32683,
      "training_step_time": 0.5314195156097412
    },
    {
      "epoch": 0.0001994873046875,
      "model_forward_time": 0.1149587631225586,
      "step": 32684
    },
    {
      "epoch": 0.0001994873046875,
      "step": 32684,
      "training_step_time": 0.41367006301879883
    },
    {
      "epoch": 0.000199493408203125,
      "model_forward_time": 0.11490845680236816,
      "step": 32685
    },
    {
      "epoch": 0.000199493408203125,
      "step": 32685,
      "training_step_time": 0.36656951904296875
    },
    {
      "epoch": 0.00019949951171875,
      "model_forward_time": 0.11464929580688477,
      "step": 32686
    },
    {
      "epoch": 0.00019949951171875,
      "step": 32686,
      "training_step_time": 0.4135899543762207
    },
    {
      "epoch": 0.000199505615234375,
      "model_forward_time": 0.11499166488647461,
      "step": 32687
    },
    {
      "epoch": 0.000199505615234375,
      "step": 32687,
      "training_step_time": 0.4350016117095947
    },
    {
      "epoch": 0.00019951171875,
      "model_forward_time": 0.11545610427856445,
      "step": 32688
    },
    {
      "epoch": 0.00019951171875,
      "step": 32688,
      "training_step_time": 0.4460721015930176
    },
    {
      "epoch": 0.000199517822265625,
      "model_forward_time": 0.11508631706237793,
      "step": 32689
    },
    {
      "epoch": 0.000199517822265625,
      "step": 32689,
      "training_step_time": 0.4683871269226074
    },
    {
      "epoch": 0.00019952392578125,
      "grad_norm": 0.1471358686685562,
      "learning_rate": 4.6722968862841806e-05,
      "loss": 0.0379,
      "step": 32690
    },
    {
      "epoch": 0.00019952392578125,
      "model_forward_time": 0.11474204063415527,
      "step": 32690
    },
    {
      "epoch": 0.00019952392578125,
      "step": 32690,
      "training_step_time": 0.39181971549987793
    },
    {
      "epoch": 0.000199530029296875,
      "model_forward_time": 0.11531376838684082,
      "step": 32691
    },
    {
      "epoch": 0.000199530029296875,
      "step": 32691,
      "training_step_time": 0.3865630626678467
    },
    {
      "epoch": 0.0001995361328125,
      "model_forward_time": 0.11519575119018555,
      "step": 32692
    },
    {
      "epoch": 0.0001995361328125,
      "step": 32692,
      "training_step_time": 0.41996335983276367
    },
    {
      "epoch": 0.000199542236328125,
      "model_forward_time": 0.1151576042175293,
      "step": 32693
    },
    {
      "epoch": 0.000199542236328125,
      "step": 32693,
      "training_step_time": 0.3943939208984375
    },
    {
      "epoch": 0.00019954833984375,
      "model_forward_time": 0.11485075950622559,
      "step": 32694
    },
    {
      "epoch": 0.00019954833984375,
      "step": 32694,
      "training_step_time": 0.4208498001098633
    },
    {
      "epoch": 0.000199554443359375,
      "model_forward_time": 0.11479520797729492,
      "step": 32695
    },
    {
      "epoch": 0.000199554443359375,
      "step": 32695,
      "training_step_time": 0.558194637298584
    },
    {
      "epoch": 0.000199560546875,
      "model_forward_time": 0.1150045394897461,
      "step": 32696
    },
    {
      "epoch": 0.000199560546875,
      "step": 32696,
      "training_step_time": 0.3924872875213623
    },
    {
      "epoch": 0.000199566650390625,
      "model_forward_time": 0.11512398719787598,
      "step": 32697
    },
    {
      "epoch": 0.000199566650390625,
      "step": 32697,
      "training_step_time": 0.4165976047515869
    },
    {
      "epoch": 0.00019957275390625,
      "model_forward_time": 0.11490082740783691,
      "step": 32698
    },
    {
      "epoch": 0.00019957275390625,
      "step": 32698,
      "training_step_time": 0.39673495292663574
    },
    {
      "epoch": 0.000199578857421875,
      "model_forward_time": 0.11528158187866211,
      "step": 32699
    },
    {
      "epoch": 0.000199578857421875,
      "step": 32699,
      "training_step_time": 0.42839908599853516
    },
    {
      "epoch": 0.0001995849609375,
      "grad_norm": 0.1416211873292923,
      "learning_rate": 4.669547078371504e-05,
      "loss": 0.0412,
      "step": 32700
    },
    {
      "epoch": 0.0001995849609375,
      "model_forward_time": 0.11478686332702637,
      "step": 32700
    },
    {
      "epoch": 0.0001995849609375,
      "step": 32700,
      "training_step_time": 0.4155611991882324
    },
    {
      "epoch": 0.000199591064453125,
      "model_forward_time": 0.11523199081420898,
      "step": 32701
    },
    {
      "epoch": 0.000199591064453125,
      "step": 32701,
      "training_step_time": 0.6572222709655762
    },
    {
      "epoch": 0.00019959716796875,
      "model_forward_time": 0.1145167350769043,
      "step": 32702
    },
    {
      "epoch": 0.00019959716796875,
      "step": 32702,
      "training_step_time": 0.4148569107055664
    },
    {
      "epoch": 0.000199603271484375,
      "model_forward_time": 0.1154019832611084,
      "step": 32703
    },
    {
      "epoch": 0.000199603271484375,
      "step": 32703,
      "training_step_time": 0.38798975944519043
    },
    {
      "epoch": 0.000199609375,
      "model_forward_time": 0.11429929733276367,
      "step": 32704
    },
    {
      "epoch": 0.000199609375,
      "step": 32704,
      "training_step_time": 0.38767552375793457
    },
    {
      "epoch": 0.000199615478515625,
      "model_forward_time": 0.11461186408996582,
      "step": 32705
    },
    {
      "epoch": 0.000199615478515625,
      "step": 32705,
      "training_step_time": 0.4074540138244629
    },
    {
      "epoch": 0.00019962158203125,
      "model_forward_time": 0.11473250389099121,
      "step": 32706
    },
    {
      "epoch": 0.00019962158203125,
      "step": 32706,
      "training_step_time": 0.39571356773376465
    },
    {
      "epoch": 0.000199627685546875,
      "model_forward_time": 0.1157522201538086,
      "step": 32707
    },
    {
      "epoch": 0.000199627685546875,
      "step": 32707,
      "training_step_time": 0.727808952331543
    },
    {
      "epoch": 0.0001996337890625,
      "model_forward_time": 0.11453819274902344,
      "step": 32708
    },
    {
      "epoch": 0.0001996337890625,
      "step": 32708,
      "training_step_time": 0.3881680965423584
    },
    {
      "epoch": 0.000199639892578125,
      "model_forward_time": 0.11527395248413086,
      "step": 32709
    },
    {
      "epoch": 0.000199639892578125,
      "step": 32709,
      "training_step_time": 0.38571929931640625
    },
    {
      "epoch": 0.00019964599609375,
      "grad_norm": 0.17561203241348267,
      "learning_rate": 4.666797370841699e-05,
      "loss": 0.0471,
      "step": 32710
    },
    {
      "epoch": 0.00019964599609375,
      "model_forward_time": 0.11445045471191406,
      "step": 32710
    },
    {
      "epoch": 0.00019964599609375,
      "step": 32710,
      "training_step_time": 0.39328455924987793
    },
    {
      "epoch": 0.000199652099609375,
      "model_forward_time": 0.11458945274353027,
      "step": 32711
    },
    {
      "epoch": 0.000199652099609375,
      "step": 32711,
      "training_step_time": 0.4469001293182373
    },
    {
      "epoch": 0.000199658203125,
      "model_forward_time": 0.11457037925720215,
      "step": 32712
    },
    {
      "epoch": 0.000199658203125,
      "step": 32712,
      "training_step_time": 0.3937094211578369
    },
    {
      "epoch": 0.000199664306640625,
      "model_forward_time": 0.11542892456054688,
      "step": 32713
    },
    {
      "epoch": 0.000199664306640625,
      "step": 32713,
      "training_step_time": 0.5131030082702637
    },
    {
      "epoch": 0.00019967041015625,
      "model_forward_time": 0.11539459228515625,
      "step": 32714
    },
    {
      "epoch": 0.00019967041015625,
      "step": 32714,
      "training_step_time": 0.4419221878051758
    },
    {
      "epoch": 0.000199676513671875,
      "model_forward_time": 0.1146540641784668,
      "step": 32715
    },
    {
      "epoch": 0.000199676513671875,
      "step": 32715,
      "training_step_time": 0.4152688980102539
    },
    {
      "epoch": 0.0001996826171875,
      "model_forward_time": 0.11470770835876465,
      "step": 32716
    },
    {
      "epoch": 0.0001996826171875,
      "step": 32716,
      "training_step_time": 0.4889798164367676
    },
    {
      "epoch": 0.000199688720703125,
      "model_forward_time": 0.11499214172363281,
      "step": 32717
    },
    {
      "epoch": 0.000199688720703125,
      "step": 32717,
      "training_step_time": 0.38675928115844727
    },
    {
      "epoch": 0.00019969482421875,
      "model_forward_time": 0.11494278907775879,
      "step": 32718
    },
    {
      "epoch": 0.00019969482421875,
      "step": 32718,
      "training_step_time": 0.4646944999694824
    },
    {
      "epoch": 0.000199700927734375,
      "model_forward_time": 0.11557626724243164,
      "step": 32719
    },
    {
      "epoch": 0.000199700927734375,
      "step": 32719,
      "training_step_time": 0.39539504051208496
    },
    {
      "epoch": 0.00019970703125,
      "grad_norm": 0.12543989717960358,
      "learning_rate": 4.664047764530055e-05,
      "loss": 0.04,
      "step": 32720
    },
    {
      "epoch": 0.00019970703125,
      "model_forward_time": 0.11491966247558594,
      "step": 32720
    },
    {
      "epoch": 0.00019970703125,
      "step": 32720,
      "training_step_time": 0.3902139663696289
    },
    {
      "epoch": 0.000199713134765625,
      "model_forward_time": 0.11529088020324707,
      "step": 32721
    },
    {
      "epoch": 0.000199713134765625,
      "step": 32721,
      "training_step_time": 0.4009549617767334
    },
    {
      "epoch": 0.00019971923828125,
      "model_forward_time": 0.11456298828125,
      "step": 32722
    },
    {
      "epoch": 0.00019971923828125,
      "step": 32722,
      "training_step_time": 0.3982884883880615
    },
    {
      "epoch": 0.000199725341796875,
      "model_forward_time": 0.11562776565551758,
      "step": 32723
    },
    {
      "epoch": 0.000199725341796875,
      "step": 32723,
      "training_step_time": 0.39537668228149414
    },
    {
      "epoch": 0.0001997314453125,
      "model_forward_time": 0.11507797241210938,
      "step": 32724
    },
    {
      "epoch": 0.0001997314453125,
      "step": 32724,
      "training_step_time": 0.40010714530944824
    },
    {
      "epoch": 0.000199737548828125,
      "model_forward_time": 0.11530113220214844,
      "step": 32725
    },
    {
      "epoch": 0.000199737548828125,
      "step": 32725,
      "training_step_time": 0.7371070384979248
    },
    {
      "epoch": 0.00019974365234375,
      "model_forward_time": 0.11582279205322266,
      "step": 32726
    },
    {
      "epoch": 0.00019974365234375,
      "step": 32726,
      "training_step_time": 0.41234564781188965
    },
    {
      "epoch": 0.000199749755859375,
      "model_forward_time": 0.1150062084197998,
      "step": 32727
    },
    {
      "epoch": 0.000199749755859375,
      "step": 32727,
      "training_step_time": 0.3911130428314209
    },
    {
      "epoch": 0.000199755859375,
      "model_forward_time": 0.11541962623596191,
      "step": 32728
    },
    {
      "epoch": 0.000199755859375,
      "step": 32728,
      "training_step_time": 0.4900820255279541
    },
    {
      "epoch": 0.000199761962890625,
      "model_forward_time": 0.11459589004516602,
      "step": 32729
    },
    {
      "epoch": 0.000199761962890625,
      "step": 32729,
      "training_step_time": 0.4100770950317383
    },
    {
      "epoch": 0.00019976806640625,
      "grad_norm": 0.08429937809705734,
      "learning_rate": 4.66129826027183e-05,
      "loss": 0.0367,
      "step": 32730
    },
    {
      "epoch": 0.00019976806640625,
      "model_forward_time": 0.11480879783630371,
      "step": 32730
    },
    {
      "epoch": 0.00019976806640625,
      "step": 32730,
      "training_step_time": 0.5041830539703369
    },
    {
      "epoch": 0.000199774169921875,
      "model_forward_time": 0.11624360084533691,
      "step": 32731
    },
    {
      "epoch": 0.000199774169921875,
      "step": 32731,
      "training_step_time": 0.3757038116455078
    },
    {
      "epoch": 0.0001997802734375,
      "model_forward_time": 0.1161961555480957,
      "step": 32732
    },
    {
      "epoch": 0.0001997802734375,
      "step": 32732,
      "training_step_time": 0.3894495964050293
    },
    {
      "epoch": 0.000199786376953125,
      "model_forward_time": 0.11619687080383301,
      "step": 32733
    },
    {
      "epoch": 0.000199786376953125,
      "step": 32733,
      "training_step_time": 0.3892951011657715
    },
    {
      "epoch": 0.00019979248046875,
      "model_forward_time": 0.11529731750488281,
      "step": 32734
    },
    {
      "epoch": 0.00019979248046875,
      "step": 32734,
      "training_step_time": 0.3954606056213379
    },
    {
      "epoch": 0.000199798583984375,
      "model_forward_time": 0.11459541320800781,
      "step": 32735
    },
    {
      "epoch": 0.000199798583984375,
      "step": 32735,
      "training_step_time": 0.4065709114074707
    },
    {
      "epoch": 0.0001998046875,
      "model_forward_time": 0.11485648155212402,
      "step": 32736
    },
    {
      "epoch": 0.0001998046875,
      "step": 32736,
      "training_step_time": 0.4059872627258301
    },
    {
      "epoch": 0.000199810791015625,
      "model_forward_time": 0.11579060554504395,
      "step": 32737
    },
    {
      "epoch": 0.000199810791015625,
      "step": 32737,
      "training_step_time": 0.6485848426818848
    },
    {
      "epoch": 0.00019981689453125,
      "model_forward_time": 0.11528921127319336,
      "step": 32738
    },
    {
      "epoch": 0.00019981689453125,
      "step": 32738,
      "training_step_time": 0.3894071578979492
    },
    {
      "epoch": 0.000199822998046875,
      "model_forward_time": 0.1158132553100586,
      "step": 32739
    },
    {
      "epoch": 0.000199822998046875,
      "step": 32739,
      "training_step_time": 0.41421008110046387
    },
    {
      "epoch": 0.0001998291015625,
      "grad_norm": 0.131517231464386,
      "learning_rate": 4.65854885890225e-05,
      "loss": 0.039,
      "step": 32740
    },
    {
      "epoch": 0.0001998291015625,
      "model_forward_time": 0.11543679237365723,
      "step": 32740
    },
    {
      "epoch": 0.0001998291015625,
      "step": 32740,
      "training_step_time": 0.40767526626586914
    },
    {
      "epoch": 0.000199835205078125,
      "model_forward_time": 0.11456966400146484,
      "step": 32741
    },
    {
      "epoch": 0.000199835205078125,
      "step": 32741,
      "training_step_time": 0.42614006996154785
    },
    {
      "epoch": 0.00019984130859375,
      "model_forward_time": 0.11452984809875488,
      "step": 32742
    },
    {
      "epoch": 0.00019984130859375,
      "step": 32742,
      "training_step_time": 0.4713892936706543
    },
    {
      "epoch": 0.000199847412109375,
      "model_forward_time": 0.11500382423400879,
      "step": 32743
    },
    {
      "epoch": 0.000199847412109375,
      "step": 32743,
      "training_step_time": 0.4490499496459961
    },
    {
      "epoch": 0.000199853515625,
      "model_forward_time": 0.11472105979919434,
      "step": 32744
    },
    {
      "epoch": 0.000199853515625,
      "step": 32744,
      "training_step_time": 0.4698166847229004
    },
    {
      "epoch": 0.000199859619140625,
      "model_forward_time": 0.11511898040771484,
      "step": 32745
    },
    {
      "epoch": 0.000199859619140625,
      "step": 32745,
      "training_step_time": 0.4202556610107422
    },
    {
      "epoch": 0.00019986572265625,
      "model_forward_time": 0.11627721786499023,
      "step": 32746
    },
    {
      "epoch": 0.00019986572265625,
      "step": 32746,
      "training_step_time": 0.3964836597442627
    },
    {
      "epoch": 0.000199871826171875,
      "model_forward_time": 0.1145167350769043,
      "step": 32747
    },
    {
      "epoch": 0.000199871826171875,
      "step": 32747,
      "training_step_time": 0.8163156509399414
    },
    {
      "epoch": 0.0001998779296875,
      "model_forward_time": 0.11443543434143066,
      "step": 32748
    },
    {
      "epoch": 0.0001998779296875,
      "step": 32748,
      "training_step_time": 0.386462926864624
    },
    {
      "epoch": 0.000199884033203125,
      "model_forward_time": 0.11400938034057617,
      "step": 32749
    },
    {
      "epoch": 0.000199884033203125,
      "step": 32749,
      "training_step_time": 0.3976104259490967
    },
    {
      "epoch": 0.00019989013671875,
      "grad_norm": 0.12104718387126923,
      "learning_rate": 4.6557995612565144e-05,
      "loss": 0.0413,
      "step": 32750
    },
    {
      "epoch": 0.00019989013671875,
      "model_forward_time": 0.11508536338806152,
      "step": 32750
    },
    {
      "epoch": 0.00019989013671875,
      "step": 32750,
      "training_step_time": 0.38904356956481934
    },
    {
      "epoch": 0.000199896240234375,
      "model_forward_time": 0.1143026351928711,
      "step": 32751
    },
    {
      "epoch": 0.000199896240234375,
      "step": 32751,
      "training_step_time": 0.38588762283325195
    },
    {
      "epoch": 0.00019990234375,
      "model_forward_time": 0.11436748504638672,
      "step": 32752
    },
    {
      "epoch": 0.00019990234375,
      "step": 32752,
      "training_step_time": 0.3888840675354004
    },
    {
      "epoch": 0.000199908447265625,
      "model_forward_time": 0.11476397514343262,
      "step": 32753
    },
    {
      "epoch": 0.000199908447265625,
      "step": 32753,
      "training_step_time": 0.48136138916015625
    },
    {
      "epoch": 0.00019991455078125,
      "model_forward_time": 0.11603450775146484,
      "step": 32754
    },
    {
      "epoch": 0.00019991455078125,
      "step": 32754,
      "training_step_time": 0.40667223930358887
    },
    {
      "epoch": 0.000199920654296875,
      "model_forward_time": 0.11581015586853027,
      "step": 32755
    },
    {
      "epoch": 0.000199920654296875,
      "step": 32755,
      "training_step_time": 0.511432409286499
    },
    {
      "epoch": 0.0001999267578125,
      "model_forward_time": 0.11493468284606934,
      "step": 32756
    },
    {
      "epoch": 0.0001999267578125,
      "step": 32756,
      "training_step_time": 0.47315287590026855
    },
    {
      "epoch": 0.000199932861328125,
      "model_forward_time": 0.1149296760559082,
      "step": 32757
    },
    {
      "epoch": 0.000199932861328125,
      "step": 32757,
      "training_step_time": 0.41736268997192383
    },
    {
      "epoch": 0.00019993896484375,
      "model_forward_time": 0.11449575424194336,
      "step": 32758
    },
    {
      "epoch": 0.00019993896484375,
      "step": 32758,
      "training_step_time": 0.473433256149292
    },
    {
      "epoch": 0.000199945068359375,
      "model_forward_time": 0.11519312858581543,
      "step": 32759
    },
    {
      "epoch": 0.000199945068359375,
      "step": 32759,
      "training_step_time": 0.3990199565887451
    },
    {
      "epoch": 0.000199951171875,
      "grad_norm": 0.1257047951221466,
      "learning_rate": 4.65305036816978e-05,
      "loss": 0.0382,
      "step": 32760
    },
    {
      "epoch": 0.000199951171875,
      "model_forward_time": 0.11442351341247559,
      "step": 32760
    },
    {
      "epoch": 0.000199951171875,
      "step": 32760,
      "training_step_time": 0.40781617164611816
    },
    {
      "epoch": 0.000199957275390625,
      "model_forward_time": 0.11526226997375488,
      "step": 32761
    },
    {
      "epoch": 0.000199957275390625,
      "step": 32761,
      "training_step_time": 0.5206921100616455
    },
    {
      "epoch": 0.00019996337890625,
      "model_forward_time": 0.11530303955078125,
      "step": 32762
    },
    {
      "epoch": 0.00019996337890625,
      "step": 32762,
      "training_step_time": 0.3827078342437744
    },
    {
      "epoch": 0.000199969482421875,
      "model_forward_time": 0.11567115783691406,
      "step": 32763
    },
    {
      "epoch": 0.000199969482421875,
      "step": 32763,
      "training_step_time": 0.38677406311035156
    },
    {
      "epoch": 0.0001999755859375,
      "model_forward_time": 0.11492800712585449,
      "step": 32764
    },
    {
      "epoch": 0.0001999755859375,
      "step": 32764,
      "training_step_time": 0.3953437805175781
    },
    {
      "epoch": 0.000199981689453125,
      "model_forward_time": 0.1148679256439209,
      "step": 32765
    },
    {
      "epoch": 0.000199981689453125,
      "step": 32765,
      "training_step_time": 0.39343738555908203
    },
    {
      "epoch": 0.00019998779296875,
      "model_forward_time": 0.1148676872253418,
      "step": 32766
    },
    {
      "epoch": 0.00019998779296875,
      "step": 32766,
      "training_step_time": 0.3968324661254883
    },
    {
      "epoch": 0.000199993896484375,
      "model_forward_time": 0.11516809463500977,
      "step": 32767
    },
    {
      "epoch": 0.000199993896484375,
      "step": 32767,
      "training_step_time": 0.7109375
    },
    {
      "epoch": 0.0002,
      "model_forward_time": 0.11485791206359863,
      "step": 32768
    },
    {
      "epoch": 0.0002,
      "step": 32768,
      "training_step_time": 0.4487731456756592
    },
    {
      "epoch": 0.000200006103515625,
      "model_forward_time": 0.11429786682128906,
      "step": 32769
    },
    {
      "epoch": 0.000200006103515625,
      "step": 32769,
      "training_step_time": 0.409041166305542
    },
    {
      "epoch": 0.00020001220703125,
      "grad_norm": 0.1387123167514801,
      "learning_rate": 4.650301280477184e-05,
      "loss": 0.0451,
      "step": 32770
    },
    {
      "epoch": 0.00020001220703125,
      "model_forward_time": 0.11501646041870117,
      "step": 32770
    },
    {
      "epoch": 0.00020001220703125,
      "step": 32770,
      "training_step_time": 0.5031776428222656
    },
    {
      "epoch": 0.000200018310546875,
      "model_forward_time": 0.11419463157653809,
      "step": 32771
    },
    {
      "epoch": 0.000200018310546875,
      "step": 32771,
      "training_step_time": 0.481461763381958
    },
    {
      "epoch": 0.0002000244140625,
      "model_forward_time": 0.11469030380249023,
      "step": 32772
    },
    {
      "epoch": 0.0002000244140625,
      "step": 32772,
      "training_step_time": 0.3953251838684082
    },
    {
      "epoch": 0.000200030517578125,
      "model_forward_time": 0.11451125144958496,
      "step": 32773
    },
    {
      "epoch": 0.000200030517578125,
      "step": 32773,
      "training_step_time": 0.39423179626464844
    },
    {
      "epoch": 0.00020003662109375,
      "model_forward_time": 0.1156473159790039,
      "step": 32774
    },
    {
      "epoch": 0.00020003662109375,
      "step": 32774,
      "training_step_time": 0.3930087089538574
    },
    {
      "epoch": 0.000200042724609375,
      "model_forward_time": 0.1151432991027832,
      "step": 32775
    },
    {
      "epoch": 0.000200042724609375,
      "step": 32775,
      "training_step_time": 0.3989691734313965
    },
    {
      "epoch": 0.000200048828125,
      "model_forward_time": 0.11459040641784668,
      "step": 32776
    },
    {
      "epoch": 0.000200048828125,
      "step": 32776,
      "training_step_time": 0.40233635902404785
    },
    {
      "epoch": 0.000200054931640625,
      "model_forward_time": 0.11471819877624512,
      "step": 32777
    },
    {
      "epoch": 0.000200054931640625,
      "step": 32777,
      "training_step_time": 0.40009284019470215
    },
    {
      "epoch": 0.00020006103515625,
      "model_forward_time": 0.11527061462402344,
      "step": 32778
    },
    {
      "epoch": 0.00020006103515625,
      "step": 32778,
      "training_step_time": 0.4036552906036377
    },
    {
      "epoch": 0.000200067138671875,
      "model_forward_time": 0.11556434631347656,
      "step": 32779
    },
    {
      "epoch": 0.000200067138671875,
      "step": 32779,
      "training_step_time": 0.7605404853820801
    },
    {
      "epoch": 0.0002000732421875,
      "grad_norm": 0.14065396785736084,
      "learning_rate": 4.647552299013828e-05,
      "loss": 0.0404,
      "step": 32780
    },
    {
      "epoch": 0.0002000732421875,
      "model_forward_time": 0.11450910568237305,
      "step": 32780
    },
    {
      "epoch": 0.0002000732421875,
      "step": 32780,
      "training_step_time": 0.39740419387817383
    },
    {
      "epoch": 0.000200079345703125,
      "model_forward_time": 0.11484766006469727,
      "step": 32781
    },
    {
      "epoch": 0.000200079345703125,
      "step": 32781,
      "training_step_time": 0.40323829650878906
    },
    {
      "epoch": 0.00020008544921875,
      "model_forward_time": 0.1144707202911377,
      "step": 32782
    },
    {
      "epoch": 0.00020008544921875,
      "step": 32782,
      "training_step_time": 0.39486122131347656
    },
    {
      "epoch": 0.000200091552734375,
      "model_forward_time": 0.11456727981567383,
      "step": 32783
    },
    {
      "epoch": 0.000200091552734375,
      "step": 32783,
      "training_step_time": 0.42420125007629395
    },
    {
      "epoch": 0.00020009765625,
      "model_forward_time": 0.1159217357635498,
      "step": 32784
    },
    {
      "epoch": 0.00020009765625,
      "step": 32784,
      "training_step_time": 0.4652872085571289
    },
    {
      "epoch": 0.000200103759765625,
      "model_forward_time": 0.11512064933776855,
      "step": 32785
    },
    {
      "epoch": 0.000200103759765625,
      "step": 32785,
      "training_step_time": 0.5456697940826416
    },
    {
      "epoch": 0.00020010986328125,
      "model_forward_time": 0.11495161056518555,
      "step": 32786
    },
    {
      "epoch": 0.00020010986328125,
      "step": 32786,
      "training_step_time": 0.37986183166503906
    },
    {
      "epoch": 0.000200115966796875,
      "model_forward_time": 0.11465287208557129,
      "step": 32787
    },
    {
      "epoch": 0.000200115966796875,
      "step": 32787,
      "training_step_time": 0.38451504707336426
    },
    {
      "epoch": 0.0002001220703125,
      "model_forward_time": 0.1143648624420166,
      "step": 32788
    },
    {
      "epoch": 0.0002001220703125,
      "step": 32788,
      "training_step_time": 0.39622926712036133
    },
    {
      "epoch": 0.000200128173828125,
      "model_forward_time": 0.11460590362548828,
      "step": 32789
    },
    {
      "epoch": 0.000200128173828125,
      "step": 32789,
      "training_step_time": 0.39643430709838867
    },
    {
      "epoch": 0.00020013427734375,
      "grad_norm": 0.11039458960294724,
      "learning_rate": 4.6448034246147754e-05,
      "loss": 0.0361,
      "step": 32790
    },
    {
      "epoch": 0.00020013427734375,
      "model_forward_time": 0.11523103713989258,
      "step": 32790
    },
    {
      "epoch": 0.00020013427734375,
      "step": 32790,
      "training_step_time": 0.3973872661590576
    },
    {
      "epoch": 0.000200140380859375,
      "model_forward_time": 0.11517786979675293,
      "step": 32791
    },
    {
      "epoch": 0.000200140380859375,
      "step": 32791,
      "training_step_time": 0.7269835472106934
    },
    {
      "epoch": 0.000200146484375,
      "model_forward_time": 0.11470866203308105,
      "step": 32792
    },
    {
      "epoch": 0.000200146484375,
      "step": 32792,
      "training_step_time": 0.39140892028808594
    },
    {
      "epoch": 0.000200152587890625,
      "model_forward_time": 0.1150200366973877,
      "step": 32793
    },
    {
      "epoch": 0.000200152587890625,
      "step": 32793,
      "training_step_time": 0.39520263671875
    },
    {
      "epoch": 0.00020015869140625,
      "model_forward_time": 0.11550521850585938,
      "step": 32794
    },
    {
      "epoch": 0.00020015869140625,
      "step": 32794,
      "training_step_time": 0.39876794815063477
    },
    {
      "epoch": 0.000200164794921875,
      "model_forward_time": 0.11504077911376953,
      "step": 32795
    },
    {
      "epoch": 0.000200164794921875,
      "step": 32795,
      "training_step_time": 0.49744510650634766
    },
    {
      "epoch": 0.0002001708984375,
      "model_forward_time": 0.11494708061218262,
      "step": 32796
    },
    {
      "epoch": 0.0002001708984375,
      "step": 32796,
      "training_step_time": 0.3924829959869385
    },
    {
      "epoch": 0.000200177001953125,
      "model_forward_time": 0.11633825302124023,
      "step": 32797
    },
    {
      "epoch": 0.000200177001953125,
      "step": 32797,
      "training_step_time": 0.5167453289031982
    },
    {
      "epoch": 0.00020018310546875,
      "model_forward_time": 0.11496877670288086,
      "step": 32798
    },
    {
      "epoch": 0.00020018310546875,
      "step": 32798,
      "training_step_time": 0.4467952251434326
    },
    {
      "epoch": 0.000200189208984375,
      "model_forward_time": 0.11501312255859375,
      "step": 32799
    },
    {
      "epoch": 0.000200189208984375,
      "step": 32799,
      "training_step_time": 0.4863743782043457
    },
    {
      "epoch": 0.0002001953125,
      "grad_norm": 0.15580064058303833,
      "learning_rate": 4.642054658115067e-05,
      "loss": 0.0424,
      "step": 32800
    },
    {
      "epoch": 0.0002001953125,
      "model_forward_time": 0.11492133140563965,
      "step": 32800
    },
    {
      "epoch": 0.0002001953125,
      "step": 32800,
      "training_step_time": 0.40436625480651855
    },
    {
      "epoch": 0.000200201416015625,
      "model_forward_time": 0.11738204956054688,
      "step": 32801
    },
    {
      "epoch": 0.000200201416015625,
      "step": 32801,
      "training_step_time": 1.0019423961639404
    },
    {
      "epoch": 0.00020020751953125,
      "model_forward_time": 0.11727333068847656,
      "step": 32802
    },
    {
      "epoch": 0.00020020751953125,
      "step": 32802,
      "training_step_time": 0.5800273418426514
    },
    {
      "epoch": 0.000200213623046875,
      "model_forward_time": 0.12021040916442871,
      "step": 32803
    },
    {
      "epoch": 0.000200213623046875,
      "step": 32803,
      "training_step_time": 0.6542370319366455
    },
    {
      "epoch": 0.0002002197265625,
      "model_forward_time": 0.11624813079833984,
      "step": 32804
    },
    {
      "epoch": 0.0002002197265625,
      "step": 32804,
      "training_step_time": 0.7130045890808105
    },
    {
      "epoch": 0.000200225830078125,
      "model_forward_time": 0.1201925277709961,
      "step": 32805
    },
    {
      "epoch": 0.000200225830078125,
      "step": 32805,
      "training_step_time": 0.6271615028381348
    },
    {
      "epoch": 0.00020023193359375,
      "model_forward_time": 0.11747074127197266,
      "step": 32806
    },
    {
      "epoch": 0.00020023193359375,
      "step": 32806,
      "training_step_time": 0.7149975299835205
    },
    {
      "epoch": 0.000200238037109375,
      "model_forward_time": 0.12543630599975586,
      "step": 32807
    },
    {
      "epoch": 0.000200238037109375,
      "step": 32807,
      "training_step_time": 0.5482177734375
    },
    {
      "epoch": 0.000200244140625,
      "model_forward_time": 0.11960172653198242,
      "step": 32808
    },
    {
      "epoch": 0.000200244140625,
      "step": 32808,
      "training_step_time": 0.6206519603729248
    },
    {
      "epoch": 0.000200250244140625,
      "model_forward_time": 0.1168053150177002,
      "step": 32809
    },
    {
      "epoch": 0.000200250244140625,
      "step": 32809,
      "training_step_time": 0.7391245365142822
    },
    {
      "epoch": 0.00020025634765625,
      "grad_norm": 0.12950418889522552,
      "learning_rate": 4.6393060003496984e-05,
      "loss": 0.0447,
      "step": 32810
    },
    {
      "epoch": 0.00020025634765625,
      "model_forward_time": 0.11598896980285645,
      "step": 32810
    },
    {
      "epoch": 0.00020025634765625,
      "step": 32810,
      "training_step_time": 0.6339616775512695
    },
    {
      "epoch": 0.000200262451171875,
      "model_forward_time": 0.11795759201049805,
      "step": 32811
    },
    {
      "epoch": 0.000200262451171875,
      "step": 32811,
      "training_step_time": 0.6129124164581299
    },
    {
      "epoch": 0.0002002685546875,
      "model_forward_time": 0.12095022201538086,
      "step": 32812
    },
    {
      "epoch": 0.0002002685546875,
      "step": 32812,
      "training_step_time": 0.6869802474975586
    },
    {
      "epoch": 0.000200274658203125,
      "model_forward_time": 0.1154184341430664,
      "step": 32813
    },
    {
      "epoch": 0.000200274658203125,
      "step": 32813,
      "training_step_time": 0.6633667945861816
    },
    {
      "epoch": 0.00020028076171875,
      "model_forward_time": 0.11960101127624512,
      "step": 32814
    },
    {
      "epoch": 0.00020028076171875,
      "step": 32814,
      "training_step_time": 0.6853246688842773
    },
    {
      "epoch": 0.000200286865234375,
      "model_forward_time": 0.12022709846496582,
      "step": 32815
    },
    {
      "epoch": 0.000200286865234375,
      "step": 32815,
      "training_step_time": 0.6854548454284668
    },
    {
      "epoch": 0.00020029296875,
      "model_forward_time": 0.12688755989074707,
      "step": 32816
    },
    {
      "epoch": 0.00020029296875,
      "step": 32816,
      "training_step_time": 0.704784631729126
    },
    {
      "epoch": 0.000200299072265625,
      "model_forward_time": 0.11676955223083496,
      "step": 32817
    },
    {
      "epoch": 0.000200299072265625,
      "step": 32817,
      "training_step_time": 0.6703076362609863
    },
    {
      "epoch": 0.00020030517578125,
      "model_forward_time": 0.11691951751708984,
      "step": 32818
    },
    {
      "epoch": 0.00020030517578125,
      "step": 32818,
      "training_step_time": 0.8110857009887695
    },
    {
      "epoch": 0.000200311279296875,
      "model_forward_time": 0.11722779273986816,
      "step": 32819
    },
    {
      "epoch": 0.000200311279296875,
      "step": 32819,
      "training_step_time": 0.6729691028594971
    },
    {
      "epoch": 0.0002003173828125,
      "grad_norm": 0.13129694759845734,
      "learning_rate": 4.6365574521536445e-05,
      "loss": 0.0423,
      "step": 32820
    },
    {
      "epoch": 0.0002003173828125,
      "model_forward_time": 0.11919307708740234,
      "step": 32820
    },
    {
      "epoch": 0.0002003173828125,
      "step": 32820,
      "training_step_time": 0.7785544395446777
    },
    {
      "epoch": 0.000200323486328125,
      "model_forward_time": 0.11703801155090332,
      "step": 32821
    },
    {
      "epoch": 0.000200323486328125,
      "step": 32821,
      "training_step_time": 0.6695661544799805
    },
    {
      "epoch": 0.00020032958984375,
      "model_forward_time": 0.11936450004577637,
      "step": 32822
    },
    {
      "epoch": 0.00020032958984375,
      "step": 32822,
      "training_step_time": 0.6966848373413086
    },
    {
      "epoch": 0.000200335693359375,
      "model_forward_time": 0.11805343627929688,
      "step": 32823
    },
    {
      "epoch": 0.000200335693359375,
      "step": 32823,
      "training_step_time": 0.675048828125
    },
    {
      "epoch": 0.000200341796875,
      "model_forward_time": 0.12314057350158691,
      "step": 32824
    },
    {
      "epoch": 0.000200341796875,
      "step": 32824,
      "training_step_time": 0.642127513885498
    },
    {
      "epoch": 0.000200347900390625,
      "model_forward_time": 0.11764860153198242,
      "step": 32825
    },
    {
      "epoch": 0.000200347900390625,
      "step": 32825,
      "training_step_time": 0.6436281204223633
    },
    {
      "epoch": 0.00020035400390625,
      "model_forward_time": 0.11758112907409668,
      "step": 32826
    },
    {
      "epoch": 0.00020035400390625,
      "step": 32826,
      "training_step_time": 0.6967437267303467
    },
    {
      "epoch": 0.000200360107421875,
      "model_forward_time": 0.1221613883972168,
      "step": 32827
    },
    {
      "epoch": 0.000200360107421875,
      "step": 32827,
      "training_step_time": 0.7595741748809814
    },
    {
      "epoch": 0.0002003662109375,
      "model_forward_time": 0.1204233169555664,
      "step": 32828
    },
    {
      "epoch": 0.0002003662109375,
      "step": 32828,
      "training_step_time": 0.73140549659729
    },
    {
      "epoch": 0.000200372314453125,
      "model_forward_time": 0.1241152286529541,
      "step": 32829
    },
    {
      "epoch": 0.000200372314453125,
      "step": 32829,
      "training_step_time": 0.6337528228759766
    },
    {
      "epoch": 0.00020037841796875,
      "grad_norm": 0.0952853187918663,
      "learning_rate": 4.633809014361843e-05,
      "loss": 0.047,
      "step": 32830
    },
    {
      "epoch": 0.00020037841796875,
      "model_forward_time": 0.11737895011901855,
      "step": 32830
    },
    {
      "epoch": 0.00020037841796875,
      "step": 32830,
      "training_step_time": 0.6445574760437012
    },
    {
      "epoch": 0.000200384521484375,
      "model_forward_time": 0.12415504455566406,
      "step": 32831
    },
    {
      "epoch": 0.000200384521484375,
      "step": 32831,
      "training_step_time": 0.6279592514038086
    },
    {
      "epoch": 0.000200390625,
      "model_forward_time": 0.1255812644958496,
      "step": 32832
    },
    {
      "epoch": 0.000200390625,
      "step": 32832,
      "training_step_time": 0.6839687824249268
    },
    {
      "epoch": 0.000200396728515625,
      "model_forward_time": 0.12002825736999512,
      "step": 32833
    },
    {
      "epoch": 0.000200396728515625,
      "step": 32833,
      "training_step_time": 0.7124342918395996
    },
    {
      "epoch": 0.00020040283203125,
      "model_forward_time": 0.11722636222839355,
      "step": 32834
    },
    {
      "epoch": 0.00020040283203125,
      "step": 32834,
      "training_step_time": 0.6449558734893799
    },
    {
      "epoch": 0.000200408935546875,
      "model_forward_time": 0.12265825271606445,
      "step": 32835
    },
    {
      "epoch": 0.000200408935546875,
      "step": 32835,
      "training_step_time": 0.7683777809143066
    },
    {
      "epoch": 0.0002004150390625,
      "model_forward_time": 0.11848759651184082,
      "step": 32836
    },
    {
      "epoch": 0.0002004150390625,
      "step": 32836,
      "training_step_time": 0.5521626472473145
    },
    {
      "epoch": 0.000200421142578125,
      "model_forward_time": 0.11873269081115723,
      "step": 32837
    },
    {
      "epoch": 0.000200421142578125,
      "step": 32837,
      "training_step_time": 0.7560558319091797
    },
    {
      "epoch": 0.00020042724609375,
      "model_forward_time": 0.11948966979980469,
      "step": 32838
    },
    {
      "epoch": 0.00020042724609375,
      "step": 32838,
      "training_step_time": 0.6125152111053467
    },
    {
      "epoch": 0.000200433349609375,
      "model_forward_time": 0.13182306289672852,
      "step": 32839
    },
    {
      "epoch": 0.000200433349609375,
      "step": 32839,
      "training_step_time": 0.7110745906829834
    },
    {
      "epoch": 0.000200439453125,
      "grad_norm": 0.10197187960147858,
      "learning_rate": 4.631060687809191e-05,
      "loss": 0.0487,
      "step": 32840
    },
    {
      "epoch": 0.000200439453125,
      "model_forward_time": 0.12151908874511719,
      "step": 32840
    },
    {
      "epoch": 0.000200439453125,
      "step": 32840,
      "training_step_time": 0.7224628925323486
    },
    {
      "epoch": 0.000200445556640625,
      "model_forward_time": 0.1176905632019043,
      "step": 32841
    },
    {
      "epoch": 0.000200445556640625,
      "step": 32841,
      "training_step_time": 0.6708276271820068
    },
    {
      "epoch": 0.00020045166015625,
      "model_forward_time": 0.11944270133972168,
      "step": 32842
    },
    {
      "epoch": 0.00020045166015625,
      "step": 32842,
      "training_step_time": 0.6396143436431885
    },
    {
      "epoch": 0.000200457763671875,
      "model_forward_time": 0.11656975746154785,
      "step": 32843
    },
    {
      "epoch": 0.000200457763671875,
      "step": 32843,
      "training_step_time": 0.8073327541351318
    },
    {
      "epoch": 0.0002004638671875,
      "model_forward_time": 0.12298727035522461,
      "step": 32844
    },
    {
      "epoch": 0.0002004638671875,
      "step": 32844,
      "training_step_time": 0.795175313949585
    },
    {
      "epoch": 0.000200469970703125,
      "model_forward_time": 0.12079286575317383,
      "step": 32845
    },
    {
      "epoch": 0.000200469970703125,
      "step": 32845,
      "training_step_time": 0.6870195865631104
    },
    {
      "epoch": 0.00020047607421875,
      "model_forward_time": 0.1249387264251709,
      "step": 32846
    },
    {
      "epoch": 0.00020047607421875,
      "step": 32846,
      "training_step_time": 0.6474854946136475
    },
    {
      "epoch": 0.000200482177734375,
      "model_forward_time": 0.11629104614257812,
      "step": 32847
    },
    {
      "epoch": 0.000200482177734375,
      "step": 32847,
      "training_step_time": 0.6737267971038818
    },
    {
      "epoch": 0.00020048828125,
      "model_forward_time": 0.11944913864135742,
      "step": 32848
    },
    {
      "epoch": 0.00020048828125,
      "step": 32848,
      "training_step_time": 0.6827685832977295
    },
    {
      "epoch": 0.000200494384765625,
      "model_forward_time": 0.1212911605834961,
      "step": 32849
    },
    {
      "epoch": 0.000200494384765625,
      "step": 32849,
      "training_step_time": 0.6512465476989746
    },
    {
      "epoch": 0.00020050048828125,
      "grad_norm": 0.14662876725196838,
      "learning_rate": 4.6283124733305624e-05,
      "loss": 0.0498,
      "step": 32850
    },
    {
      "epoch": 0.00020050048828125,
      "model_forward_time": 0.12408709526062012,
      "step": 32850
    },
    {
      "epoch": 0.00020050048828125,
      "step": 32850,
      "training_step_time": 0.6571688652038574
    },
    {
      "epoch": 0.000200506591796875,
      "model_forward_time": 0.11882424354553223,
      "step": 32851
    },
    {
      "epoch": 0.000200506591796875,
      "step": 32851,
      "training_step_time": 0.6596572399139404
    },
    {
      "epoch": 0.0002005126953125,
      "model_forward_time": 0.12020587921142578,
      "step": 32852
    },
    {
      "epoch": 0.0002005126953125,
      "step": 32852,
      "training_step_time": 0.7194602489471436
    },
    {
      "epoch": 0.000200518798828125,
      "model_forward_time": 0.11909699440002441,
      "step": 32853
    },
    {
      "epoch": 0.000200518798828125,
      "step": 32853,
      "training_step_time": 0.6887712478637695
    },
    {
      "epoch": 0.00020052490234375,
      "model_forward_time": 0.11598324775695801,
      "step": 32854
    },
    {
      "epoch": 0.00020052490234375,
      "step": 32854,
      "training_step_time": 0.7500877380371094
    },
    {
      "epoch": 0.000200531005859375,
      "model_forward_time": 0.116455078125,
      "step": 32855
    },
    {
      "epoch": 0.000200531005859375,
      "step": 32855,
      "training_step_time": 0.7255783081054688
    },
    {
      "epoch": 0.000200537109375,
      "model_forward_time": 0.12285852432250977,
      "step": 32856
    },
    {
      "epoch": 0.000200537109375,
      "step": 32856,
      "training_step_time": 0.7050065994262695
    },
    {
      "epoch": 0.000200543212890625,
      "model_forward_time": 0.12205100059509277,
      "step": 32857
    },
    {
      "epoch": 0.000200543212890625,
      "step": 32857,
      "training_step_time": 0.7077858448028564
    },
    {
      "epoch": 0.00020054931640625,
      "model_forward_time": 0.11928009986877441,
      "step": 32858
    },
    {
      "epoch": 0.00020054931640625,
      "step": 32858,
      "training_step_time": 0.6750564575195312
    },
    {
      "epoch": 0.000200555419921875,
      "model_forward_time": 0.1389610767364502,
      "step": 32859
    },
    {
      "epoch": 0.000200555419921875,
      "step": 32859,
      "training_step_time": 0.6370041370391846
    },
    {
      "epoch": 0.0002005615234375,
      "grad_norm": 0.12680751085281372,
      "learning_rate": 4.625564371760791e-05,
      "loss": 0.0433,
      "step": 32860
    },
    {
      "epoch": 0.0002005615234375,
      "model_forward_time": 0.1179342269897461,
      "step": 32860
    },
    {
      "epoch": 0.0002005615234375,
      "step": 32860,
      "training_step_time": 0.6529908180236816
    },
    {
      "epoch": 0.000200567626953125,
      "model_forward_time": 0.12113380432128906,
      "step": 32861
    },
    {
      "epoch": 0.000200567626953125,
      "step": 32861,
      "training_step_time": 0.6631953716278076
    },
    {
      "epoch": 0.00020057373046875,
      "model_forward_time": 0.12133121490478516,
      "step": 32862
    },
    {
      "epoch": 0.00020057373046875,
      "step": 32862,
      "training_step_time": 0.7380647659301758
    },
    {
      "epoch": 0.000200579833984375,
      "model_forward_time": 0.11829566955566406,
      "step": 32863
    },
    {
      "epoch": 0.000200579833984375,
      "step": 32863,
      "training_step_time": 0.5817863941192627
    },
    {
      "epoch": 0.0002005859375,
      "model_forward_time": 0.12770509719848633,
      "step": 32864
    },
    {
      "epoch": 0.0002005859375,
      "step": 32864,
      "training_step_time": 0.6755430698394775
    },
    {
      "epoch": 0.000200592041015625,
      "model_forward_time": 0.11914515495300293,
      "step": 32865
    },
    {
      "epoch": 0.000200592041015625,
      "step": 32865,
      "training_step_time": 0.6777386665344238
    },
    {
      "epoch": 0.00020059814453125,
      "model_forward_time": 0.12680482864379883,
      "step": 32866
    },
    {
      "epoch": 0.00020059814453125,
      "step": 32866,
      "training_step_time": 0.5979523658752441
    },
    {
      "epoch": 0.000200604248046875,
      "model_forward_time": 0.133528470993042,
      "step": 32867
    },
    {
      "epoch": 0.000200604248046875,
      "step": 32867,
      "training_step_time": 0.6192302703857422
    },
    {
      "epoch": 0.0002006103515625,
      "model_forward_time": 0.11999106407165527,
      "step": 32868
    },
    {
      "epoch": 0.0002006103515625,
      "step": 32868,
      "training_step_time": 0.5731453895568848
    },
    {
      "epoch": 0.000200616455078125,
      "model_forward_time": 0.12361884117126465,
      "step": 32869
    },
    {
      "epoch": 0.000200616455078125,
      "step": 32869,
      "training_step_time": 0.5863912105560303
    },
    {
      "epoch": 0.00020062255859375,
      "grad_norm": 0.12916353344917297,
      "learning_rate": 4.622816383934676e-05,
      "loss": 0.047,
      "step": 32870
    },
    {
      "epoch": 0.00020062255859375,
      "model_forward_time": 0.11875247955322266,
      "step": 32870
    },
    {
      "epoch": 0.00020062255859375,
      "step": 32870,
      "training_step_time": 0.5217580795288086
    },
    {
      "epoch": 0.000200628662109375,
      "model_forward_time": 0.12252449989318848,
      "step": 32871
    },
    {
      "epoch": 0.000200628662109375,
      "step": 32871,
      "training_step_time": 0.4815702438354492
    },
    {
      "epoch": 0.000200634765625,
      "model_forward_time": 0.11774754524230957,
      "step": 32872
    },
    {
      "epoch": 0.000200634765625,
      "step": 32872,
      "training_step_time": 0.4762544631958008
    },
    {
      "epoch": 0.000200640869140625,
      "model_forward_time": 0.11714529991149902,
      "step": 32873
    },
    {
      "epoch": 0.000200640869140625,
      "step": 32873,
      "training_step_time": 0.5150904655456543
    },
    {
      "epoch": 0.00020064697265625,
      "model_forward_time": 0.11766791343688965,
      "step": 32874
    },
    {
      "epoch": 0.00020064697265625,
      "step": 32874,
      "training_step_time": 0.42058491706848145
    },
    {
      "epoch": 0.000200653076171875,
      "model_forward_time": 0.11638975143432617,
      "step": 32875
    },
    {
      "epoch": 0.000200653076171875,
      "step": 32875,
      "training_step_time": 0.5373287200927734
    },
    {
      "epoch": 0.0002006591796875,
      "model_forward_time": 0.11597084999084473,
      "step": 32876
    },
    {
      "epoch": 0.0002006591796875,
      "step": 32876,
      "training_step_time": 0.4392096996307373
    },
    {
      "epoch": 0.000200665283203125,
      "model_forward_time": 0.11510038375854492,
      "step": 32877
    },
    {
      "epoch": 0.000200665283203125,
      "step": 32877,
      "training_step_time": 0.44693446159362793
    },
    {
      "epoch": 0.00020067138671875,
      "model_forward_time": 0.11535477638244629,
      "step": 32878
    },
    {
      "epoch": 0.00020067138671875,
      "step": 32878,
      "training_step_time": 0.40520739555358887
    },
    {
      "epoch": 0.000200677490234375,
      "model_forward_time": 0.11505937576293945,
      "step": 32879
    },
    {
      "epoch": 0.000200677490234375,
      "step": 32879,
      "training_step_time": 0.39966678619384766
    },
    {
      "epoch": 0.00020068359375,
      "grad_norm": 0.1208755299448967,
      "learning_rate": 4.620068510686985e-05,
      "loss": 0.0459,
      "step": 32880
    },
    {
      "epoch": 0.00020068359375,
      "model_forward_time": 0.11497831344604492,
      "step": 32880
    },
    {
      "epoch": 0.00020068359375,
      "step": 32880,
      "training_step_time": 0.45885515213012695
    },
    {
      "epoch": 0.000200689697265625,
      "model_forward_time": 0.11440825462341309,
      "step": 32881
    },
    {
      "epoch": 0.000200689697265625,
      "step": 32881,
      "training_step_time": 0.4001150131225586
    },
    {
      "epoch": 0.00020069580078125,
      "model_forward_time": 0.11458921432495117,
      "step": 32882
    },
    {
      "epoch": 0.00020069580078125,
      "step": 32882,
      "training_step_time": 0.39371180534362793
    },
    {
      "epoch": 0.000200701904296875,
      "model_forward_time": 0.11497020721435547,
      "step": 32883
    },
    {
      "epoch": 0.000200701904296875,
      "step": 32883,
      "training_step_time": 0.3988158702850342
    },
    {
      "epoch": 0.0002007080078125,
      "model_forward_time": 0.11483263969421387,
      "step": 32884
    },
    {
      "epoch": 0.0002007080078125,
      "step": 32884,
      "training_step_time": 0.3856639862060547
    },
    {
      "epoch": 0.000200714111328125,
      "model_forward_time": 0.11521291732788086,
      "step": 32885
    },
    {
      "epoch": 0.000200714111328125,
      "step": 32885,
      "training_step_time": 0.3938322067260742
    },
    {
      "epoch": 0.00020072021484375,
      "model_forward_time": 0.11571717262268066,
      "step": 32886
    },
    {
      "epoch": 0.00020072021484375,
      "step": 32886,
      "training_step_time": 0.3952622413635254
    },
    {
      "epoch": 0.000200726318359375,
      "model_forward_time": 0.11515116691589355,
      "step": 32887
    },
    {
      "epoch": 0.000200726318359375,
      "step": 32887,
      "training_step_time": 0.3869795799255371
    },
    {
      "epoch": 0.000200732421875,
      "model_forward_time": 0.11552834510803223,
      "step": 32888
    },
    {
      "epoch": 0.000200732421875,
      "step": 32888,
      "training_step_time": 0.4526355266571045
    },
    {
      "epoch": 0.000200738525390625,
      "model_forward_time": 0.11504673957824707,
      "step": 32889
    },
    {
      "epoch": 0.000200738525390625,
      "step": 32889,
      "training_step_time": 0.3988158702850342
    },
    {
      "epoch": 0.00020074462890625,
      "grad_norm": 0.13726429641246796,
      "learning_rate": 4.6173207528524476e-05,
      "loss": 0.0475,
      "step": 32890
    },
    {
      "epoch": 0.00020074462890625,
      "model_forward_time": 0.11535000801086426,
      "step": 32890
    },
    {
      "epoch": 0.00020074462890625,
      "step": 32890,
      "training_step_time": 0.5046284198760986
    },
    {
      "epoch": 0.000200750732421875,
      "model_forward_time": 0.11530256271362305,
      "step": 32891
    },
    {
      "epoch": 0.000200750732421875,
      "step": 32891,
      "training_step_time": 0.5068225860595703
    },
    {
      "epoch": 0.0002007568359375,
      "model_forward_time": 0.11570286750793457,
      "step": 32892
    },
    {
      "epoch": 0.0002007568359375,
      "step": 32892,
      "training_step_time": 0.39830946922302246
    },
    {
      "epoch": 0.000200762939453125,
      "model_forward_time": 0.1152186393737793,
      "step": 32893
    },
    {
      "epoch": 0.000200762939453125,
      "step": 32893,
      "training_step_time": 0.398406982421875
    },
    {
      "epoch": 0.00020076904296875,
      "model_forward_time": 0.11538529396057129,
      "step": 32894
    },
    {
      "epoch": 0.00020076904296875,
      "step": 32894,
      "training_step_time": 0.4059910774230957
    },
    {
      "epoch": 0.000200775146484375,
      "model_forward_time": 0.11532044410705566,
      "step": 32895
    },
    {
      "epoch": 0.000200775146484375,
      "step": 32895,
      "training_step_time": 0.3944685459136963
    },
    {
      "epoch": 0.00020078125,
      "model_forward_time": 0.11586904525756836,
      "step": 32896
    },
    {
      "epoch": 0.00020078125,
      "step": 32896,
      "training_step_time": 0.40860915184020996
    },
    {
      "epoch": 0.000200787353515625,
      "model_forward_time": 0.11511492729187012,
      "step": 32897
    },
    {
      "epoch": 0.000200787353515625,
      "step": 32897,
      "training_step_time": 0.3948988914489746
    },
    {
      "epoch": 0.00020079345703125,
      "model_forward_time": 0.11539745330810547,
      "step": 32898
    },
    {
      "epoch": 0.00020079345703125,
      "step": 32898,
      "training_step_time": 0.3852565288543701
    },
    {
      "epoch": 0.000200799560546875,
      "model_forward_time": 0.1151878833770752,
      "step": 32899
    },
    {
      "epoch": 0.000200799560546875,
      "step": 32899,
      "training_step_time": 0.4043745994567871
    },
    {
      "epoch": 0.0002008056640625,
      "grad_norm": 0.13524186611175537,
      "learning_rate": 4.6145731112657644e-05,
      "loss": 0.0494,
      "step": 32900
    },
    {
      "epoch": 0.0002008056640625,
      "model_forward_time": 0.11502599716186523,
      "step": 32900
    },
    {
      "epoch": 0.0002008056640625,
      "step": 32900,
      "training_step_time": 0.3961615562438965
    },
    {
      "epoch": 0.000200811767578125,
      "model_forward_time": 0.11491894721984863,
      "step": 32901
    },
    {
      "epoch": 0.000200811767578125,
      "step": 32901,
      "training_step_time": 0.39536333084106445
    },
    {
      "epoch": 0.00020081787109375,
      "model_forward_time": 0.11557745933532715,
      "step": 32902
    },
    {
      "epoch": 0.00020081787109375,
      "step": 32902,
      "training_step_time": 0.39939188957214355
    },
    {
      "epoch": 0.000200823974609375,
      "model_forward_time": 0.11513996124267578,
      "step": 32903
    },
    {
      "epoch": 0.000200823974609375,
      "step": 32903,
      "training_step_time": 0.9252753257751465
    },
    {
      "epoch": 0.000200830078125,
      "model_forward_time": 0.1145930290222168,
      "step": 32904
    },
    {
      "epoch": 0.000200830078125,
      "step": 32904,
      "training_step_time": 0.4478912353515625
    },
    {
      "epoch": 0.000200836181640625,
      "model_forward_time": 0.11453104019165039,
      "step": 32905
    },
    {
      "epoch": 0.000200836181640625,
      "step": 32905,
      "training_step_time": 0.5259599685668945
    },
    {
      "epoch": 0.00020084228515625,
      "model_forward_time": 0.11470746994018555,
      "step": 32906
    },
    {
      "epoch": 0.00020084228515625,
      "step": 32906,
      "training_step_time": 0.3979809284210205
    },
    {
      "epoch": 0.000200848388671875,
      "model_forward_time": 0.11412620544433594,
      "step": 32907
    },
    {
      "epoch": 0.000200848388671875,
      "step": 32907,
      "training_step_time": 0.38297200202941895
    },
    {
      "epoch": 0.0002008544921875,
      "model_forward_time": 0.11401534080505371,
      "step": 32908
    },
    {
      "epoch": 0.0002008544921875,
      "step": 32908,
      "training_step_time": 0.38642287254333496
    },
    {
      "epoch": 0.000200860595703125,
      "model_forward_time": 0.11515045166015625,
      "step": 32909
    },
    {
      "epoch": 0.000200860595703125,
      "step": 32909,
      "training_step_time": 0.7241668701171875
    },
    {
      "epoch": 0.00020086669921875,
      "grad_norm": 0.11421240866184235,
      "learning_rate": 4.611825586761591e-05,
      "loss": 0.042,
      "step": 32910
    },
    {
      "epoch": 0.00020086669921875,
      "model_forward_time": 0.11414790153503418,
      "step": 32910
    },
    {
      "epoch": 0.00020086669921875,
      "step": 32910,
      "training_step_time": 0.38300442695617676
    },
    {
      "epoch": 0.000200872802734375,
      "model_forward_time": 0.1148061752319336,
      "step": 32911
    },
    {
      "epoch": 0.000200872802734375,
      "step": 32911,
      "training_step_time": 0.4038074016571045
    },
    {
      "epoch": 0.00020087890625,
      "model_forward_time": 0.11456584930419922,
      "step": 32912
    },
    {
      "epoch": 0.00020087890625,
      "step": 32912,
      "training_step_time": 0.3961191177368164
    },
    {
      "epoch": 0.000200885009765625,
      "model_forward_time": 0.11554789543151855,
      "step": 32913
    },
    {
      "epoch": 0.000200885009765625,
      "step": 32913,
      "training_step_time": 0.39803600311279297
    },
    {
      "epoch": 0.00020089111328125,
      "model_forward_time": 0.11481809616088867,
      "step": 32914
    },
    {
      "epoch": 0.00020089111328125,
      "step": 32914,
      "training_step_time": 0.37718725204467773
    },
    {
      "epoch": 0.000200897216796875,
      "model_forward_time": 0.11525154113769531,
      "step": 32915
    },
    {
      "epoch": 0.000200897216796875,
      "step": 32915,
      "training_step_time": 0.6140997409820557
    },
    {
      "epoch": 0.0002009033203125,
      "model_forward_time": 0.11486339569091797,
      "step": 32916
    },
    {
      "epoch": 0.0002009033203125,
      "step": 32916,
      "training_step_time": 0.3921480178833008
    },
    {
      "epoch": 0.000200909423828125,
      "model_forward_time": 0.1150977611541748,
      "step": 32917
    },
    {
      "epoch": 0.000200909423828125,
      "step": 32917,
      "training_step_time": 0.4102952480316162
    },
    {
      "epoch": 0.00020091552734375,
      "model_forward_time": 0.1154944896697998,
      "step": 32918
    },
    {
      "epoch": 0.00020091552734375,
      "step": 32918,
      "training_step_time": 0.47542428970336914
    },
    {
      "epoch": 0.000200921630859375,
      "model_forward_time": 0.11488986015319824,
      "step": 32919
    },
    {
      "epoch": 0.000200921630859375,
      "step": 32919,
      "training_step_time": 0.48240208625793457
    },
    {
      "epoch": 0.000200927734375,
      "grad_norm": 0.12021562457084656,
      "learning_rate": 4.609078180174555e-05,
      "loss": 0.0402,
      "step": 32920
    },
    {
      "epoch": 0.000200927734375,
      "model_forward_time": 0.11481690406799316,
      "step": 32920
    },
    {
      "epoch": 0.000200927734375,
      "step": 32920,
      "training_step_time": 0.3776240348815918
    },
    {
      "epoch": 0.000200933837890625,
      "model_forward_time": 0.11479496955871582,
      "step": 32921
    },
    {
      "epoch": 0.000200933837890625,
      "step": 32921,
      "training_step_time": 0.5164387226104736
    },
    {
      "epoch": 0.00020093994140625,
      "model_forward_time": 0.11450386047363281,
      "step": 32922
    },
    {
      "epoch": 0.00020093994140625,
      "step": 32922,
      "training_step_time": 0.3965175151824951
    },
    {
      "epoch": 0.000200946044921875,
      "model_forward_time": 0.11530041694641113,
      "step": 32923
    },
    {
      "epoch": 0.000200946044921875,
      "step": 32923,
      "training_step_time": 0.3951890468597412
    },
    {
      "epoch": 0.0002009521484375,
      "model_forward_time": 0.11556410789489746,
      "step": 32924
    },
    {
      "epoch": 0.0002009521484375,
      "step": 32924,
      "training_step_time": 0.38867926597595215
    },
    {
      "epoch": 0.000200958251953125,
      "model_forward_time": 0.11516141891479492,
      "step": 32925
    },
    {
      "epoch": 0.000200958251953125,
      "step": 32925,
      "training_step_time": 0.394390344619751
    },
    {
      "epoch": 0.00020096435546875,
      "model_forward_time": 0.11512112617492676,
      "step": 32926
    },
    {
      "epoch": 0.00020096435546875,
      "step": 32926,
      "training_step_time": 0.3843882083892822
    },
    {
      "epoch": 0.000200970458984375,
      "model_forward_time": 0.1148691177368164,
      "step": 32927
    },
    {
      "epoch": 0.000200970458984375,
      "step": 32927,
      "training_step_time": 0.45617127418518066
    },
    {
      "epoch": 0.0002009765625,
      "model_forward_time": 0.11596035957336426,
      "step": 32928
    },
    {
      "epoch": 0.0002009765625,
      "step": 32928,
      "training_step_time": 0.39783740043640137
    },
    {
      "epoch": 0.000200982666015625,
      "model_forward_time": 0.1170194149017334,
      "step": 32929
    },
    {
      "epoch": 0.000200982666015625,
      "step": 32929,
      "training_step_time": 0.3871026039123535
    },
    {
      "epoch": 0.00020098876953125,
      "grad_norm": 0.12750674784183502,
      "learning_rate": 4.6063308923392485e-05,
      "loss": 0.0418,
      "step": 32930
    },
    {
      "epoch": 0.00020098876953125,
      "model_forward_time": 0.11557412147521973,
      "step": 32930
    },
    {
      "epoch": 0.00020098876953125,
      "step": 32930,
      "training_step_time": 0.3936166763305664
    },
    {
      "epoch": 0.000200994873046875,
      "model_forward_time": 0.11571145057678223,
      "step": 32931
    },
    {
      "epoch": 0.000200994873046875,
      "step": 32931,
      "training_step_time": 0.44946837425231934
    },
    {
      "epoch": 0.0002010009765625,
      "model_forward_time": 0.11549186706542969,
      "step": 32932
    },
    {
      "epoch": 0.0002010009765625,
      "step": 32932,
      "training_step_time": 0.47585296630859375
    },
    {
      "epoch": 0.000201007080078125,
      "model_forward_time": 0.11616635322570801,
      "step": 32933
    },
    {
      "epoch": 0.000201007080078125,
      "step": 32933,
      "training_step_time": 0.48883914947509766
    },
    {
      "epoch": 0.00020101318359375,
      "model_forward_time": 0.11549568176269531,
      "step": 32934
    },
    {
      "epoch": 0.00020101318359375,
      "step": 32934,
      "training_step_time": 0.4848635196685791
    },
    {
      "epoch": 0.000201019287109375,
      "model_forward_time": 0.11518311500549316,
      "step": 32935
    },
    {
      "epoch": 0.000201019287109375,
      "step": 32935,
      "training_step_time": 0.39233899116516113
    },
    {
      "epoch": 0.000201025390625,
      "model_forward_time": 0.11512279510498047,
      "step": 32936
    },
    {
      "epoch": 0.000201025390625,
      "step": 32936,
      "training_step_time": 0.38440871238708496
    },
    {
      "epoch": 0.000201031494140625,
      "model_forward_time": 0.11475491523742676,
      "step": 32937
    },
    {
      "epoch": 0.000201031494140625,
      "step": 32937,
      "training_step_time": 0.4030148983001709
    },
    {
      "epoch": 0.00020103759765625,
      "model_forward_time": 0.11546087265014648,
      "step": 32938
    },
    {
      "epoch": 0.00020103759765625,
      "step": 32938,
      "training_step_time": 0.39228367805480957
    },
    {
      "epoch": 0.000201043701171875,
      "model_forward_time": 0.11492729187011719,
      "step": 32939
    },
    {
      "epoch": 0.000201043701171875,
      "step": 32939,
      "training_step_time": 0.4296274185180664
    },
    {
      "epoch": 0.0002010498046875,
      "grad_norm": 0.1043524369597435,
      "learning_rate": 4.60358372409022e-05,
      "loss": 0.0458,
      "step": 32940
    },
    {
      "epoch": 0.0002010498046875,
      "model_forward_time": 0.11613869667053223,
      "step": 32940
    },
    {
      "epoch": 0.0002010498046875,
      "step": 32940,
      "training_step_time": 0.40448474884033203
    },
    {
      "epoch": 0.000201055908203125,
      "model_forward_time": 0.11527252197265625,
      "step": 32941
    },
    {
      "epoch": 0.000201055908203125,
      "step": 32941,
      "training_step_time": 0.40840816497802734
    },
    {
      "epoch": 0.00020106201171875,
      "model_forward_time": 0.11532115936279297,
      "step": 32942
    },
    {
      "epoch": 0.00020106201171875,
      "step": 32942,
      "training_step_time": 0.3831949234008789
    },
    {
      "epoch": 0.000201068115234375,
      "model_forward_time": 0.11478137969970703,
      "step": 32943
    },
    {
      "epoch": 0.000201068115234375,
      "step": 32943,
      "training_step_time": 0.3821108341217041
    },
    {
      "epoch": 0.00020107421875,
      "model_forward_time": 0.11577939987182617,
      "step": 32944
    },
    {
      "epoch": 0.00020107421875,
      "step": 32944,
      "training_step_time": 0.37711215019226074
    },
    {
      "epoch": 0.000201080322265625,
      "model_forward_time": 0.11554098129272461,
      "step": 32945
    },
    {
      "epoch": 0.000201080322265625,
      "step": 32945,
      "training_step_time": 0.5353813171386719
    },
    {
      "epoch": 0.00020108642578125,
      "model_forward_time": 0.11571264266967773,
      "step": 32946
    },
    {
      "epoch": 0.00020108642578125,
      "step": 32946,
      "training_step_time": 0.40265870094299316
    },
    {
      "epoch": 0.000201092529296875,
      "model_forward_time": 0.11508631706237793,
      "step": 32947
    },
    {
      "epoch": 0.000201092529296875,
      "step": 32947,
      "training_step_time": 0.48606038093566895
    },
    {
      "epoch": 0.0002010986328125,
      "model_forward_time": 0.11502265930175781,
      "step": 32948
    },
    {
      "epoch": 0.0002010986328125,
      "step": 32948,
      "training_step_time": 0.43416929244995117
    },
    {
      "epoch": 0.000201104736328125,
      "model_forward_time": 0.11477375030517578,
      "step": 32949
    },
    {
      "epoch": 0.000201104736328125,
      "step": 32949,
      "training_step_time": 0.3939695358276367
    },
    {
      "epoch": 0.00020111083984375,
      "grad_norm": 0.10148853808641434,
      "learning_rate": 4.6008366762619926e-05,
      "loss": 0.0476,
      "step": 32950
    },
    {
      "epoch": 0.00020111083984375,
      "model_forward_time": 0.11423659324645996,
      "step": 32950
    },
    {
      "epoch": 0.00020111083984375,
      "step": 32950,
      "training_step_time": 0.38381433486938477
    },
    {
      "epoch": 0.000201116943359375,
      "model_forward_time": 0.11503791809082031,
      "step": 32951
    },
    {
      "epoch": 0.000201116943359375,
      "step": 32951,
      "training_step_time": 0.45889830589294434
    },
    {
      "epoch": 0.000201123046875,
      "model_forward_time": 0.11530923843383789,
      "step": 32952
    },
    {
      "epoch": 0.000201123046875,
      "step": 32952,
      "training_step_time": 0.3943817615509033
    },
    {
      "epoch": 0.000201129150390625,
      "model_forward_time": 0.11477494239807129,
      "step": 32953
    },
    {
      "epoch": 0.000201129150390625,
      "step": 32953,
      "training_step_time": 0.39464449882507324
    },
    {
      "epoch": 0.00020113525390625,
      "model_forward_time": 0.11544346809387207,
      "step": 32954
    },
    {
      "epoch": 0.00020113525390625,
      "step": 32954,
      "training_step_time": 0.383267879486084
    },
    {
      "epoch": 0.000201141357421875,
      "model_forward_time": 0.11653923988342285,
      "step": 32955
    },
    {
      "epoch": 0.000201141357421875,
      "step": 32955,
      "training_step_time": 0.38044214248657227
    },
    {
      "epoch": 0.0002011474609375,
      "model_forward_time": 0.11513829231262207,
      "step": 32956
    },
    {
      "epoch": 0.0002011474609375,
      "step": 32956,
      "training_step_time": 0.3937098979949951
    },
    {
      "epoch": 0.000201153564453125,
      "model_forward_time": 0.11541366577148438,
      "step": 32957
    },
    {
      "epoch": 0.000201153564453125,
      "step": 32957,
      "training_step_time": 1.122016429901123
    },
    {
      "epoch": 0.00020115966796875,
      "model_forward_time": 0.11411809921264648,
      "step": 32958
    },
    {
      "epoch": 0.00020115966796875,
      "step": 32958,
      "training_step_time": 0.43819093704223633
    },
    {
      "epoch": 0.000201165771484375,
      "model_forward_time": 0.11384010314941406,
      "step": 32959
    },
    {
      "epoch": 0.000201165771484375,
      "step": 32959,
      "training_step_time": 0.40227413177490234
    },
    {
      "epoch": 0.000201171875,
      "grad_norm": 0.11451341956853867,
      "learning_rate": 4.598089749689041e-05,
      "loss": 0.046,
      "step": 32960
    },
    {
      "epoch": 0.000201171875,
      "model_forward_time": 0.1140134334564209,
      "step": 32960
    },
    {
      "epoch": 0.000201171875,
      "step": 32960,
      "training_step_time": 0.4640491008758545
    },
    {
      "epoch": 0.000201177978515625,
      "model_forward_time": 0.11374258995056152,
      "step": 32961
    },
    {
      "epoch": 0.000201177978515625,
      "step": 32961,
      "training_step_time": 0.46440792083740234
    },
    {
      "epoch": 0.00020118408203125,
      "model_forward_time": 0.11370539665222168,
      "step": 32962
    },
    {
      "epoch": 0.00020118408203125,
      "step": 32962,
      "training_step_time": 0.38865017890930176
    },
    {
      "epoch": 0.000201190185546875,
      "model_forward_time": 0.11449384689331055,
      "step": 32963
    },
    {
      "epoch": 0.000201190185546875,
      "step": 32963,
      "training_step_time": 0.3832540512084961
    },
    {
      "epoch": 0.0002011962890625,
      "model_forward_time": 0.11484384536743164,
      "step": 32964
    },
    {
      "epoch": 0.0002011962890625,
      "step": 32964,
      "training_step_time": 0.3904268741607666
    },
    {
      "epoch": 0.000201202392578125,
      "model_forward_time": 0.11487865447998047,
      "step": 32965
    },
    {
      "epoch": 0.000201202392578125,
      "step": 32965,
      "training_step_time": 0.38870954513549805
    },
    {
      "epoch": 0.00020120849609375,
      "model_forward_time": 0.11519050598144531,
      "step": 32966
    },
    {
      "epoch": 0.00020120849609375,
      "step": 32966,
      "training_step_time": 0.3869669437408447
    },
    {
      "epoch": 0.000201214599609375,
      "model_forward_time": 0.11558413505554199,
      "step": 32967
    },
    {
      "epoch": 0.000201214599609375,
      "step": 32967,
      "training_step_time": 0.3885385990142822
    },
    {
      "epoch": 0.000201220703125,
      "model_forward_time": 0.11470699310302734,
      "step": 32968
    },
    {
      "epoch": 0.000201220703125,
      "step": 32968,
      "training_step_time": 0.39793872833251953
    },
    {
      "epoch": 0.000201226806640625,
      "model_forward_time": 0.11561751365661621,
      "step": 32969
    },
    {
      "epoch": 0.000201226806640625,
      "step": 32969,
      "training_step_time": 0.38692331314086914
    },
    {
      "epoch": 0.00020123291015625,
      "grad_norm": 0.15027956664562225,
      "learning_rate": 4.5953429452058135e-05,
      "loss": 0.049,
      "step": 32970
    },
    {
      "epoch": 0.00020123291015625,
      "model_forward_time": 0.11548709869384766,
      "step": 32970
    },
    {
      "epoch": 0.00020123291015625,
      "step": 32970,
      "training_step_time": 0.3977978229522705
    },
    {
      "epoch": 0.000201239013671875,
      "model_forward_time": 0.11597418785095215,
      "step": 32971
    },
    {
      "epoch": 0.000201239013671875,
      "step": 32971,
      "training_step_time": 0.41474008560180664
    },
    {
      "epoch": 0.0002012451171875,
      "model_forward_time": 0.11500430107116699,
      "step": 32972
    },
    {
      "epoch": 0.0002012451171875,
      "step": 32972,
      "training_step_time": 0.4979674816131592
    },
    {
      "epoch": 0.000201251220703125,
      "model_forward_time": 0.11458492279052734,
      "step": 32973
    },
    {
      "epoch": 0.000201251220703125,
      "step": 32973,
      "training_step_time": 0.3951301574707031
    },
    {
      "epoch": 0.00020125732421875,
      "model_forward_time": 0.11452198028564453,
      "step": 32974
    },
    {
      "epoch": 0.00020125732421875,
      "step": 32974,
      "training_step_time": 0.47190165519714355
    },
    {
      "epoch": 0.000201263427734375,
      "model_forward_time": 0.11581563949584961,
      "step": 32975
    },
    {
      "epoch": 0.000201263427734375,
      "step": 32975,
      "training_step_time": 0.5057897567749023
    },
    {
      "epoch": 0.00020126953125,
      "model_forward_time": 0.1156158447265625,
      "step": 32976
    },
    {
      "epoch": 0.00020126953125,
      "step": 32976,
      "training_step_time": 0.4223959445953369
    },
    {
      "epoch": 0.000201275634765625,
      "model_forward_time": 0.11510562896728516,
      "step": 32977
    },
    {
      "epoch": 0.000201275634765625,
      "step": 32977,
      "training_step_time": 0.3979151248931885
    },
    {
      "epoch": 0.00020128173828125,
      "model_forward_time": 0.11517572402954102,
      "step": 32978
    },
    {
      "epoch": 0.00020128173828125,
      "step": 32978,
      "training_step_time": 0.38465023040771484
    },
    {
      "epoch": 0.000201287841796875,
      "model_forward_time": 0.11476826667785645,
      "step": 32979
    },
    {
      "epoch": 0.000201287841796875,
      "step": 32979,
      "training_step_time": 0.39914631843566895
    },
    {
      "epoch": 0.0002012939453125,
      "grad_norm": 0.1778744012117386,
      "learning_rate": 4.5925962636467126e-05,
      "loss": 0.0481,
      "step": 32980
    },
    {
      "epoch": 0.0002012939453125,
      "model_forward_time": 0.11434698104858398,
      "step": 32980
    },
    {
      "epoch": 0.0002012939453125,
      "step": 32980,
      "training_step_time": 0.4004802703857422
    },
    {
      "epoch": 0.000201300048828125,
      "model_forward_time": 0.11532425880432129,
      "step": 32981
    },
    {
      "epoch": 0.000201300048828125,
      "step": 32981,
      "training_step_time": 0.446056604385376
    },
    {
      "epoch": 0.00020130615234375,
      "model_forward_time": 0.11532282829284668,
      "step": 32982
    },
    {
      "epoch": 0.00020130615234375,
      "step": 32982,
      "training_step_time": 0.40203332901000977
    },
    {
      "epoch": 0.000201312255859375,
      "model_forward_time": 0.11515069007873535,
      "step": 32983
    },
    {
      "epoch": 0.000201312255859375,
      "step": 32983,
      "training_step_time": 0.403430700302124
    },
    {
      "epoch": 0.000201318359375,
      "model_forward_time": 0.11497902870178223,
      "step": 32984
    },
    {
      "epoch": 0.000201318359375,
      "step": 32984,
      "training_step_time": 0.4081757068634033
    },
    {
      "epoch": 0.000201324462890625,
      "model_forward_time": 0.11453962326049805,
      "step": 32985
    },
    {
      "epoch": 0.000201324462890625,
      "step": 32985,
      "training_step_time": 0.40747833251953125
    },
    {
      "epoch": 0.00020133056640625,
      "model_forward_time": 0.11504578590393066,
      "step": 32986
    },
    {
      "epoch": 0.00020133056640625,
      "step": 32986,
      "training_step_time": 0.3975045680999756
    },
    {
      "epoch": 0.000201336669921875,
      "model_forward_time": 0.11536884307861328,
      "step": 32987
    },
    {
      "epoch": 0.000201336669921875,
      "step": 32987,
      "training_step_time": 0.587151050567627
    },
    {
      "epoch": 0.0002013427734375,
      "model_forward_time": 0.11488008499145508,
      "step": 32988
    },
    {
      "epoch": 0.0002013427734375,
      "step": 32988,
      "training_step_time": 0.36958932876586914
    },
    {
      "epoch": 0.000201348876953125,
      "model_forward_time": 0.11472797393798828,
      "step": 32989
    },
    {
      "epoch": 0.000201348876953125,
      "step": 32989,
      "training_step_time": 0.45093774795532227
    },
    {
      "epoch": 0.00020135498046875,
      "grad_norm": 0.15236420929431915,
      "learning_rate": 4.58984970584611e-05,
      "loss": 0.039,
      "step": 32990
    },
    {
      "epoch": 0.00020135498046875,
      "model_forward_time": 0.11489057540893555,
      "step": 32990
    },
    {
      "epoch": 0.00020135498046875,
      "step": 32990,
      "training_step_time": 0.39839792251586914
    },
    {
      "epoch": 0.000201361083984375,
      "model_forward_time": 0.11490583419799805,
      "step": 32991
    },
    {
      "epoch": 0.000201361083984375,
      "step": 32991,
      "training_step_time": 0.39230918884277344
    },
    {
      "epoch": 0.0002013671875,
      "model_forward_time": 0.11461281776428223,
      "step": 32992
    },
    {
      "epoch": 0.0002013671875,
      "step": 32992,
      "training_step_time": 0.39428281784057617
    },
    {
      "epoch": 0.000201373291015625,
      "model_forward_time": 0.11505556106567383,
      "step": 32993
    },
    {
      "epoch": 0.000201373291015625,
      "step": 32993,
      "training_step_time": 0.3977534770965576
    },
    {
      "epoch": 0.00020137939453125,
      "model_forward_time": 0.1148834228515625,
      "step": 32994
    },
    {
      "epoch": 0.00020137939453125,
      "step": 32994,
      "training_step_time": 0.4055166244506836
    },
    {
      "epoch": 0.000201385498046875,
      "model_forward_time": 0.11492729187011719,
      "step": 32995
    },
    {
      "epoch": 0.000201385498046875,
      "step": 32995,
      "training_step_time": 0.3913276195526123
    },
    {
      "epoch": 0.0002013916015625,
      "model_forward_time": 0.11523938179016113,
      "step": 32996
    },
    {
      "epoch": 0.0002013916015625,
      "step": 32996,
      "training_step_time": 0.3973228931427002
    },
    {
      "epoch": 0.000201397705078125,
      "model_forward_time": 0.11603927612304688,
      "step": 32997
    },
    {
      "epoch": 0.000201397705078125,
      "step": 32997,
      "training_step_time": 0.4313323497772217
    },
    {
      "epoch": 0.00020140380859375,
      "model_forward_time": 0.11541604995727539,
      "step": 32998
    },
    {
      "epoch": 0.00020140380859375,
      "step": 32998,
      "training_step_time": 0.4763753414154053
    },
    {
      "epoch": 0.000201409912109375,
      "model_forward_time": 0.11510491371154785,
      "step": 32999
    },
    {
      "epoch": 0.000201409912109375,
      "step": 32999,
      "training_step_time": 0.39835405349731445
    },
    {
      "epoch": 0.000201416015625,
      "grad_norm": 0.1359669268131256,
      "learning_rate": 4.5871032726383386e-05,
      "loss": 0.0413,
      "step": 33000
    },
    {
      "epoch": 0.000201416015625,
      "model_forward_time": 0.11275100708007812,
      "step": 33000
    },
    {
      "epoch": 0.000201416015625,
      "step": 33000,
      "training_step_time": 0.3682229518890381
    },
    {
      "epoch": 0.000201422119140625,
      "model_forward_time": 0.11211538314819336,
      "step": 33001
    },
    {
      "epoch": 0.000201422119140625,
      "step": 33001,
      "training_step_time": 0.38483405113220215
    },
    {
      "epoch": 0.00020142822265625,
      "model_forward_time": 0.11244893074035645,
      "step": 33002
    },
    {
      "epoch": 0.00020142822265625,
      "step": 33002,
      "training_step_time": 0.37738752365112305
    },
    {
      "epoch": 0.000201434326171875,
      "model_forward_time": 0.11353111267089844,
      "step": 33003
    },
    {
      "epoch": 0.000201434326171875,
      "step": 33003,
      "training_step_time": 0.4328746795654297
    },
    {
      "epoch": 0.0002014404296875,
      "model_forward_time": 0.1138312816619873,
      "step": 33004
    },
    {
      "epoch": 0.0002014404296875,
      "step": 33004,
      "training_step_time": 0.44036388397216797
    },
    {
      "epoch": 0.000201446533203125,
      "model_forward_time": 0.11429262161254883,
      "step": 33005
    },
    {
      "epoch": 0.000201446533203125,
      "step": 33005,
      "training_step_time": 0.4058051109313965
    },
    {
      "epoch": 0.00020145263671875,
      "model_forward_time": 0.11437296867370605,
      "step": 33006
    },
    {
      "epoch": 0.00020145263671875,
      "step": 33006,
      "training_step_time": 0.4797964096069336
    },
    {
      "epoch": 0.000201458740234375,
      "model_forward_time": 0.11496138572692871,
      "step": 33007
    },
    {
      "epoch": 0.000201458740234375,
      "step": 33007,
      "training_step_time": 0.4112377166748047
    },
    {
      "epoch": 0.00020146484375,
      "model_forward_time": 0.11466312408447266,
      "step": 33008
    },
    {
      "epoch": 0.00020146484375,
      "step": 33008,
      "training_step_time": 0.38974428176879883
    },
    {
      "epoch": 0.000201470947265625,
      "model_forward_time": 0.11533451080322266,
      "step": 33009
    },
    {
      "epoch": 0.000201470947265625,
      "step": 33009,
      "training_step_time": 0.3824131488800049
    },
    {
      "epoch": 0.00020147705078125,
      "grad_norm": 0.09389323741197586,
      "learning_rate": 4.5843569648576913e-05,
      "loss": 0.0435,
      "step": 33010
    },
    {
      "epoch": 0.00020147705078125,
      "model_forward_time": 0.11510539054870605,
      "step": 33010
    },
    {
      "epoch": 0.00020147705078125,
      "step": 33010,
      "training_step_time": 0.39922404289245605
    },
    {
      "epoch": 0.000201483154296875,
      "model_forward_time": 0.11477255821228027,
      "step": 33011
    },
    {
      "epoch": 0.000201483154296875,
      "step": 33011,
      "training_step_time": 0.39724135398864746
    },
    {
      "epoch": 0.0002014892578125,
      "model_forward_time": 0.11478114128112793,
      "step": 33012
    },
    {
      "epoch": 0.0002014892578125,
      "step": 33012,
      "training_step_time": 0.4104635715484619
    },
    {
      "epoch": 0.000201495361328125,
      "model_forward_time": 0.13273191452026367,
      "step": 33013
    },
    {
      "epoch": 0.000201495361328125,
      "step": 33013,
      "training_step_time": 0.4413316249847412
    },
    {
      "epoch": 0.00020150146484375,
      "model_forward_time": 0.11514163017272949,
      "step": 33014
    },
    {
      "epoch": 0.00020150146484375,
      "step": 33014,
      "training_step_time": 0.39763903617858887
    },
    {
      "epoch": 0.000201507568359375,
      "model_forward_time": 0.11509132385253906,
      "step": 33015
    },
    {
      "epoch": 0.000201507568359375,
      "step": 33015,
      "training_step_time": 0.39821934700012207
    },
    {
      "epoch": 0.000201513671875,
      "model_forward_time": 0.11507248878479004,
      "step": 33016
    },
    {
      "epoch": 0.000201513671875,
      "step": 33016,
      "training_step_time": 0.4022541046142578
    },
    {
      "epoch": 0.000201519775390625,
      "model_forward_time": 0.11538171768188477,
      "step": 33017
    },
    {
      "epoch": 0.000201519775390625,
      "step": 33017,
      "training_step_time": 0.44591617584228516
    },
    {
      "epoch": 0.00020152587890625,
      "model_forward_time": 0.11555862426757812,
      "step": 33018
    },
    {
      "epoch": 0.00020152587890625,
      "step": 33018,
      "training_step_time": 0.4258594512939453
    },
    {
      "epoch": 0.000201531982421875,
      "model_forward_time": 0.11492776870727539,
      "step": 33019
    },
    {
      "epoch": 0.000201531982421875,
      "step": 33019,
      "training_step_time": 0.3866119384765625
    },
    {
      "epoch": 0.0002015380859375,
      "grad_norm": 0.15099306404590607,
      "learning_rate": 4.5816107833384234e-05,
      "loss": 0.0426,
      "step": 33020
    },
    {
      "epoch": 0.0002015380859375,
      "model_forward_time": 0.11573076248168945,
      "step": 33020
    },
    {
      "epoch": 0.0002015380859375,
      "step": 33020,
      "training_step_time": 0.4790973663330078
    },
    {
      "epoch": 0.000201544189453125,
      "model_forward_time": 0.11508345603942871,
      "step": 33021
    },
    {
      "epoch": 0.000201544189453125,
      "step": 33021,
      "training_step_time": 0.4810676574707031
    },
    {
      "epoch": 0.00020155029296875,
      "model_forward_time": 0.1147146224975586,
      "step": 33022
    },
    {
      "epoch": 0.00020155029296875,
      "step": 33022,
      "training_step_time": 0.39888596534729004
    },
    {
      "epoch": 0.000201556396484375,
      "model_forward_time": 0.11498904228210449,
      "step": 33023
    },
    {
      "epoch": 0.000201556396484375,
      "step": 33023,
      "training_step_time": 0.40616869926452637
    },
    {
      "epoch": 0.0002015625,
      "model_forward_time": 0.11490678787231445,
      "step": 33024
    },
    {
      "epoch": 0.0002015625,
      "step": 33024,
      "training_step_time": 0.39106297492980957
    },
    {
      "epoch": 0.000201568603515625,
      "model_forward_time": 0.11516714096069336,
      "step": 33025
    },
    {
      "epoch": 0.000201568603515625,
      "step": 33025,
      "training_step_time": 0.41261911392211914
    },
    {
      "epoch": 0.00020157470703125,
      "model_forward_time": 0.11512327194213867,
      "step": 33026
    },
    {
      "epoch": 0.00020157470703125,
      "step": 33026,
      "training_step_time": 0.4486844539642334
    },
    {
      "epoch": 0.000201580810546875,
      "model_forward_time": 0.11491560935974121,
      "step": 33027
    },
    {
      "epoch": 0.000201580810546875,
      "step": 33027,
      "training_step_time": 0.3949737548828125
    },
    {
      "epoch": 0.0002015869140625,
      "model_forward_time": 0.11562180519104004,
      "step": 33028
    },
    {
      "epoch": 0.0002015869140625,
      "step": 33028,
      "training_step_time": 0.406588077545166
    },
    {
      "epoch": 0.000201593017578125,
      "model_forward_time": 0.11484026908874512,
      "step": 33029
    },
    {
      "epoch": 0.000201593017578125,
      "step": 33029,
      "training_step_time": 0.4157369136810303
    },
    {
      "epoch": 0.00020159912109375,
      "grad_norm": 0.11702278256416321,
      "learning_rate": 4.5788647289147516e-05,
      "loss": 0.045,
      "step": 33030
    },
    {
      "epoch": 0.00020159912109375,
      "model_forward_time": 0.11515116691589355,
      "step": 33030
    },
    {
      "epoch": 0.00020159912109375,
      "step": 33030,
      "training_step_time": 0.4031376838684082
    },
    {
      "epoch": 0.000201605224609375,
      "model_forward_time": 0.11487984657287598,
      "step": 33031
    },
    {
      "epoch": 0.000201605224609375,
      "step": 33031,
      "training_step_time": 0.39658379554748535
    },
    {
      "epoch": 0.000201611328125,
      "model_forward_time": 0.11531972885131836,
      "step": 33032
    },
    {
      "epoch": 0.000201611328125,
      "step": 33032,
      "training_step_time": 0.4052004814147949
    },
    {
      "epoch": 0.000201617431640625,
      "model_forward_time": 0.11492800712585449,
      "step": 33033
    },
    {
      "epoch": 0.000201617431640625,
      "step": 33033,
      "training_step_time": 0.4622535705566406
    },
    {
      "epoch": 0.00020162353515625,
      "model_forward_time": 0.11600947380065918,
      "step": 33034
    },
    {
      "epoch": 0.00020162353515625,
      "step": 33034,
      "training_step_time": 0.48305630683898926
    },
    {
      "epoch": 0.000201629638671875,
      "model_forward_time": 0.11457180976867676,
      "step": 33035
    },
    {
      "epoch": 0.000201629638671875,
      "step": 33035,
      "training_step_time": 0.46706390380859375
    },
    {
      "epoch": 0.0002016357421875,
      "model_forward_time": 0.11501097679138184,
      "step": 33036
    },
    {
      "epoch": 0.0002016357421875,
      "step": 33036,
      "training_step_time": 0.47925400733947754
    },
    {
      "epoch": 0.000201641845703125,
      "model_forward_time": 0.11435842514038086,
      "step": 33037
    },
    {
      "epoch": 0.000201641845703125,
      "step": 33037,
      "training_step_time": 0.4003183841705322
    },
    {
      "epoch": 0.00020164794921875,
      "model_forward_time": 0.11599588394165039,
      "step": 33038
    },
    {
      "epoch": 0.00020164794921875,
      "step": 33038,
      "training_step_time": 0.38413405418395996
    },
    {
      "epoch": 0.000201654052734375,
      "model_forward_time": 0.1145942211151123,
      "step": 33039
    },
    {
      "epoch": 0.000201654052734375,
      "step": 33039,
      "training_step_time": 0.4214167594909668
    },
    {
      "epoch": 0.00020166015625,
      "grad_norm": 0.1483568400144577,
      "learning_rate": 4.576118802420856e-05,
      "loss": 0.0412,
      "step": 33040
    },
    {
      "epoch": 0.00020166015625,
      "model_forward_time": 0.1153252124786377,
      "step": 33040
    },
    {
      "epoch": 0.00020166015625,
      "step": 33040,
      "training_step_time": 0.4401552677154541
    },
    {
      "epoch": 0.000201666259765625,
      "model_forward_time": 0.11554694175720215,
      "step": 33041
    },
    {
      "epoch": 0.000201666259765625,
      "step": 33041,
      "training_step_time": 0.4009428024291992
    },
    {
      "epoch": 0.00020167236328125,
      "model_forward_time": 0.11506009101867676,
      "step": 33042
    },
    {
      "epoch": 0.00020167236328125,
      "step": 33042,
      "training_step_time": 0.3972439765930176
    },
    {
      "epoch": 0.000201678466796875,
      "model_forward_time": 0.11580348014831543,
      "step": 33043
    },
    {
      "epoch": 0.000201678466796875,
      "step": 33043,
      "training_step_time": 0.39719510078430176
    },
    {
      "epoch": 0.0002016845703125,
      "model_forward_time": 0.11524677276611328,
      "step": 33044
    },
    {
      "epoch": 0.0002016845703125,
      "step": 33044,
      "training_step_time": 0.3926084041595459
    },
    {
      "epoch": 0.000201690673828125,
      "model_forward_time": 0.1162264347076416,
      "step": 33045
    },
    {
      "epoch": 0.000201690673828125,
      "step": 33045,
      "training_step_time": 0.4001352787017822
    },
    {
      "epoch": 0.00020169677734375,
      "model_forward_time": 0.11516070365905762,
      "step": 33046
    },
    {
      "epoch": 0.00020169677734375,
      "step": 33046,
      "training_step_time": 0.4385843276977539
    },
    {
      "epoch": 0.000201702880859375,
      "model_forward_time": 0.11488842964172363,
      "step": 33047
    },
    {
      "epoch": 0.000201702880859375,
      "step": 33047,
      "training_step_time": 0.43394994735717773
    },
    {
      "epoch": 0.000201708984375,
      "model_forward_time": 0.11440825462341309,
      "step": 33048
    },
    {
      "epoch": 0.000201708984375,
      "step": 33048,
      "training_step_time": 0.40934205055236816
    },
    {
      "epoch": 0.000201715087890625,
      "model_forward_time": 0.11643791198730469,
      "step": 33049
    },
    {
      "epoch": 0.000201715087890625,
      "step": 33049,
      "training_step_time": 0.4184389114379883
    },
    {
      "epoch": 0.00020172119140625,
      "grad_norm": 0.10832040756940842,
      "learning_rate": 4.573373004690878e-05,
      "loss": 0.0472,
      "step": 33050
    },
    {
      "epoch": 0.00020172119140625,
      "model_forward_time": 0.11526203155517578,
      "step": 33050
    },
    {
      "epoch": 0.00020172119140625,
      "step": 33050,
      "training_step_time": 0.4166080951690674
    },
    {
      "epoch": 0.000201727294921875,
      "model_forward_time": 0.1149601936340332,
      "step": 33051
    },
    {
      "epoch": 0.000201727294921875,
      "step": 33051,
      "training_step_time": 0.39089059829711914
    },
    {
      "epoch": 0.0002017333984375,
      "model_forward_time": 0.114715576171875,
      "step": 33052
    },
    {
      "epoch": 0.0002017333984375,
      "step": 33052,
      "training_step_time": 0.3947422504425049
    },
    {
      "epoch": 0.000201739501953125,
      "model_forward_time": 0.11490321159362793,
      "step": 33053
    },
    {
      "epoch": 0.000201739501953125,
      "step": 33053,
      "training_step_time": 0.4409615993499756
    },
    {
      "epoch": 0.00020174560546875,
      "model_forward_time": 0.11482667922973633,
      "step": 33054
    },
    {
      "epoch": 0.00020174560546875,
      "step": 33054,
      "training_step_time": 0.46855664253234863
    },
    {
      "epoch": 0.000201751708984375,
      "model_forward_time": 0.11518406867980957,
      "step": 33055
    },
    {
      "epoch": 0.000201751708984375,
      "step": 33055,
      "training_step_time": 0.41115379333496094
    },
    {
      "epoch": 0.0002017578125,
      "model_forward_time": 0.11482429504394531,
      "step": 33056
    },
    {
      "epoch": 0.0002017578125,
      "step": 33056,
      "training_step_time": 0.3976762294769287
    },
    {
      "epoch": 0.000201763916015625,
      "model_forward_time": 0.11523199081420898,
      "step": 33057
    },
    {
      "epoch": 0.000201763916015625,
      "step": 33057,
      "training_step_time": 0.40244388580322266
    },
    {
      "epoch": 0.00020177001953125,
      "model_forward_time": 0.11585640907287598,
      "step": 33058
    },
    {
      "epoch": 0.00020177001953125,
      "step": 33058,
      "training_step_time": 0.3990490436553955
    },
    {
      "epoch": 0.000201776123046875,
      "model_forward_time": 0.11500000953674316,
      "step": 33059
    },
    {
      "epoch": 0.000201776123046875,
      "step": 33059,
      "training_step_time": 0.4139976501464844
    },
    {
      "epoch": 0.0002017822265625,
      "grad_norm": 0.12374894320964813,
      "learning_rate": 4.570627336558915e-05,
      "loss": 0.044,
      "step": 33060
    },
    {
      "epoch": 0.0002017822265625,
      "model_forward_time": 0.1147303581237793,
      "step": 33060
    },
    {
      "epoch": 0.0002017822265625,
      "step": 33060,
      "training_step_time": 0.3990962505340576
    },
    {
      "epoch": 0.000201788330078125,
      "model_forward_time": 0.1155238151550293,
      "step": 33061
    },
    {
      "epoch": 0.000201788330078125,
      "step": 33061,
      "training_step_time": 0.4446907043457031
    },
    {
      "epoch": 0.00020179443359375,
      "model_forward_time": 0.11426877975463867,
      "step": 33062
    },
    {
      "epoch": 0.00020179443359375,
      "step": 33062,
      "training_step_time": 0.5003607273101807
    },
    {
      "epoch": 0.000201800537109375,
      "model_forward_time": 0.1148993968963623,
      "step": 33063
    },
    {
      "epoch": 0.000201800537109375,
      "step": 33063,
      "training_step_time": 0.4594998359680176
    },
    {
      "epoch": 0.000201806640625,
      "model_forward_time": 0.11614799499511719,
      "step": 33064
    },
    {
      "epoch": 0.000201806640625,
      "step": 33064,
      "training_step_time": 0.48542332649230957
    },
    {
      "epoch": 0.000201812744140625,
      "model_forward_time": 0.11484122276306152,
      "step": 33065
    },
    {
      "epoch": 0.000201812744140625,
      "step": 33065,
      "training_step_time": 0.5063841342926025
    },
    {
      "epoch": 0.00020181884765625,
      "model_forward_time": 0.11444449424743652,
      "step": 33066
    },
    {
      "epoch": 0.00020181884765625,
      "step": 33066,
      "training_step_time": 0.40392422676086426
    },
    {
      "epoch": 0.000201824951171875,
      "model_forward_time": 0.11430811882019043,
      "step": 33067
    },
    {
      "epoch": 0.000201824951171875,
      "step": 33067,
      "training_step_time": 0.41228604316711426
    },
    {
      "epoch": 0.0002018310546875,
      "model_forward_time": 0.11414194107055664,
      "step": 33068
    },
    {
      "epoch": 0.0002018310546875,
      "step": 33068,
      "training_step_time": 0.3882575035095215
    },
    {
      "epoch": 0.000201837158203125,
      "model_forward_time": 0.11530423164367676,
      "step": 33069
    },
    {
      "epoch": 0.000201837158203125,
      "step": 33069,
      "training_step_time": 0.39807653427124023
    },
    {
      "epoch": 0.00020184326171875,
      "grad_norm": 0.1450580656528473,
      "learning_rate": 4.567881798859032e-05,
      "loss": 0.0406,
      "step": 33070
    },
    {
      "epoch": 0.00020184326171875,
      "model_forward_time": 0.11503744125366211,
      "step": 33070
    },
    {
      "epoch": 0.00020184326171875,
      "step": 33070,
      "training_step_time": 0.3960561752319336
    },
    {
      "epoch": 0.000201849365234375,
      "model_forward_time": 0.1146230697631836,
      "step": 33071
    },
    {
      "epoch": 0.000201849365234375,
      "step": 33071,
      "training_step_time": 0.39258527755737305
    },
    {
      "epoch": 0.00020185546875,
      "model_forward_time": 0.11496639251708984,
      "step": 33072
    },
    {
      "epoch": 0.00020185546875,
      "step": 33072,
      "training_step_time": 0.3964114189147949
    },
    {
      "epoch": 0.000201861572265625,
      "model_forward_time": 0.11490488052368164,
      "step": 33073
    },
    {
      "epoch": 0.000201861572265625,
      "step": 33073,
      "training_step_time": 0.39864063262939453
    },
    {
      "epoch": 0.00020186767578125,
      "model_forward_time": 0.11537766456604004,
      "step": 33074
    },
    {
      "epoch": 0.00020186767578125,
      "step": 33074,
      "training_step_time": 0.39269232749938965
    },
    {
      "epoch": 0.000201873779296875,
      "model_forward_time": 0.11547136306762695,
      "step": 33075
    },
    {
      "epoch": 0.000201873779296875,
      "step": 33075,
      "training_step_time": 0.5065264701843262
    },
    {
      "epoch": 0.0002018798828125,
      "model_forward_time": 0.11532211303710938,
      "step": 33076
    },
    {
      "epoch": 0.0002018798828125,
      "step": 33076,
      "training_step_time": 0.49823522567749023
    },
    {
      "epoch": 0.000201885986328125,
      "model_forward_time": 0.11457037925720215,
      "step": 33077
    },
    {
      "epoch": 0.000201885986328125,
      "step": 33077,
      "training_step_time": 0.37969279289245605
    },
    {
      "epoch": 0.00020189208984375,
      "model_forward_time": 0.11538577079772949,
      "step": 33078
    },
    {
      "epoch": 0.00020189208984375,
      "step": 33078,
      "training_step_time": 0.4653794765472412
    },
    {
      "epoch": 0.000201898193359375,
      "model_forward_time": 0.1145474910736084,
      "step": 33079
    },
    {
      "epoch": 0.000201898193359375,
      "step": 33079,
      "training_step_time": 0.46306896209716797
    },
    {
      "epoch": 0.000201904296875,
      "grad_norm": 0.13972754776477814,
      "learning_rate": 4.565136392425247e-05,
      "loss": 0.0408,
      "step": 33080
    },
    {
      "epoch": 0.000201904296875,
      "model_forward_time": 0.11447310447692871,
      "step": 33080
    },
    {
      "epoch": 0.000201904296875,
      "step": 33080,
      "training_step_time": 0.4162318706512451
    },
    {
      "epoch": 0.000201910400390625,
      "model_forward_time": 0.11513519287109375,
      "step": 33081
    },
    {
      "epoch": 0.000201910400390625,
      "step": 33081,
      "training_step_time": 0.40646839141845703
    },
    {
      "epoch": 0.00020191650390625,
      "model_forward_time": 0.11464667320251465,
      "step": 33082
    },
    {
      "epoch": 0.00020191650390625,
      "step": 33082,
      "training_step_time": 0.40709471702575684
    },
    {
      "epoch": 0.000201922607421875,
      "model_forward_time": 0.11451840400695801,
      "step": 33083
    },
    {
      "epoch": 0.000201922607421875,
      "step": 33083,
      "training_step_time": 0.3992176055908203
    },
    {
      "epoch": 0.0002019287109375,
      "model_forward_time": 0.11466860771179199,
      "step": 33084
    },
    {
      "epoch": 0.0002019287109375,
      "step": 33084,
      "training_step_time": 0.39246225357055664
    },
    {
      "epoch": 0.000201934814453125,
      "model_forward_time": 0.11510372161865234,
      "step": 33085
    },
    {
      "epoch": 0.000201934814453125,
      "step": 33085,
      "training_step_time": 0.40723276138305664
    },
    {
      "epoch": 0.00020194091796875,
      "model_forward_time": 0.11546087265014648,
      "step": 33086
    },
    {
      "epoch": 0.00020194091796875,
      "step": 33086,
      "training_step_time": 0.40128064155578613
    },
    {
      "epoch": 0.000201947021484375,
      "model_forward_time": 0.11510252952575684,
      "step": 33087
    },
    {
      "epoch": 0.000201947021484375,
      "step": 33087,
      "training_step_time": 0.40816617012023926
    },
    {
      "epoch": 0.000201953125,
      "model_forward_time": 0.11479425430297852,
      "step": 33088
    },
    {
      "epoch": 0.000201953125,
      "step": 33088,
      "training_step_time": 0.43358302116394043
    },
    {
      "epoch": 0.000201959228515625,
      "model_forward_time": 0.11595487594604492,
      "step": 33089
    },
    {
      "epoch": 0.000201959228515625,
      "step": 33089,
      "training_step_time": 0.4681403636932373
    },
    {
      "epoch": 0.00020196533203125,
      "grad_norm": 0.12713493406772614,
      "learning_rate": 4.562391118091544e-05,
      "loss": 0.0431,
      "step": 33090
    },
    {
      "epoch": 0.00020196533203125,
      "model_forward_time": 0.11558961868286133,
      "step": 33090
    },
    {
      "epoch": 0.00020196533203125,
      "step": 33090,
      "training_step_time": 0.4286198616027832
    },
    {
      "epoch": 0.000201971435546875,
      "model_forward_time": 0.1152348518371582,
      "step": 33091
    },
    {
      "epoch": 0.000201971435546875,
      "step": 33091,
      "training_step_time": 0.42383527755737305
    },
    {
      "epoch": 0.0002019775390625,
      "model_forward_time": 0.11477398872375488,
      "step": 33092
    },
    {
      "epoch": 0.0002019775390625,
      "step": 33092,
      "training_step_time": 0.5026073455810547
    },
    {
      "epoch": 0.000201983642578125,
      "model_forward_time": 0.11535096168518066,
      "step": 33093
    },
    {
      "epoch": 0.000201983642578125,
      "step": 33093,
      "training_step_time": 0.735637903213501
    },
    {
      "epoch": 0.00020198974609375,
      "model_forward_time": 0.11503458023071289,
      "step": 33094
    },
    {
      "epoch": 0.00020198974609375,
      "step": 33094,
      "training_step_time": 0.3898732662200928
    },
    {
      "epoch": 0.000201995849609375,
      "model_forward_time": 0.11374902725219727,
      "step": 33095
    },
    {
      "epoch": 0.000201995849609375,
      "step": 33095,
      "training_step_time": 0.3933427333831787
    },
    {
      "epoch": 0.000202001953125,
      "model_forward_time": 0.1143198013305664,
      "step": 33096
    },
    {
      "epoch": 0.000202001953125,
      "step": 33096,
      "training_step_time": 0.3796231746673584
    },
    {
      "epoch": 0.000202008056640625,
      "model_forward_time": 0.1143801212310791,
      "step": 33097
    },
    {
      "epoch": 0.000202008056640625,
      "step": 33097,
      "training_step_time": 0.38286614418029785
    },
    {
      "epoch": 0.00020201416015625,
      "model_forward_time": 0.11615729331970215,
      "step": 33098
    },
    {
      "epoch": 0.00020201416015625,
      "step": 33098,
      "training_step_time": 0.3837575912475586
    },
    {
      "epoch": 0.000202020263671875,
      "model_forward_time": 0.11544656753540039,
      "step": 33099
    },
    {
      "epoch": 0.000202020263671875,
      "step": 33099,
      "training_step_time": 0.9965846538543701
    },
    {
      "epoch": 0.0002020263671875,
      "grad_norm": 0.1555813103914261,
      "learning_rate": 4.559645976691868e-05,
      "loss": 0.0421,
      "step": 33100
    },
    {
      "epoch": 0.0002020263671875,
      "model_forward_time": 0.1143498420715332,
      "step": 33100
    },
    {
      "epoch": 0.0002020263671875,
      "step": 33100,
      "training_step_time": 0.3852858543395996
    },
    {
      "epoch": 0.000202032470703125,
      "model_forward_time": 0.11480379104614258,
      "step": 33101
    },
    {
      "epoch": 0.000202032470703125,
      "step": 33101,
      "training_step_time": 0.3882863521575928
    },
    {
      "epoch": 0.00020203857421875,
      "model_forward_time": 0.11386537551879883,
      "step": 33102
    },
    {
      "epoch": 0.00020203857421875,
      "step": 33102,
      "training_step_time": 0.4451301097869873
    },
    {
      "epoch": 0.000202044677734375,
      "model_forward_time": 0.11530423164367676,
      "step": 33103
    },
    {
      "epoch": 0.000202044677734375,
      "step": 33103,
      "training_step_time": 0.4873175621032715
    },
    {
      "epoch": 0.00020205078125,
      "model_forward_time": 0.11490893363952637,
      "step": 33104
    },
    {
      "epoch": 0.00020205078125,
      "step": 33104,
      "training_step_time": 0.3615539073944092
    },
    {
      "epoch": 0.000202056884765625,
      "model_forward_time": 0.11487865447998047,
      "step": 33105
    },
    {
      "epoch": 0.000202056884765625,
      "step": 33105,
      "training_step_time": 0.6551558971405029
    },
    {
      "epoch": 0.00020206298828125,
      "model_forward_time": 0.11398029327392578,
      "step": 33106
    },
    {
      "epoch": 0.00020206298828125,
      "step": 33106,
      "training_step_time": 0.4389035701751709
    },
    {
      "epoch": 0.000202069091796875,
      "model_forward_time": 0.1140599250793457,
      "step": 33107
    },
    {
      "epoch": 0.000202069091796875,
      "step": 33107,
      "training_step_time": 0.3875143527984619
    },
    {
      "epoch": 0.0002020751953125,
      "model_forward_time": 0.11435961723327637,
      "step": 33108
    },
    {
      "epoch": 0.0002020751953125,
      "step": 33108,
      "training_step_time": 0.3832204341888428
    },
    {
      "epoch": 0.000202081298828125,
      "model_forward_time": 0.1148533821105957,
      "step": 33109
    },
    {
      "epoch": 0.000202081298828125,
      "step": 33109,
      "training_step_time": 0.38651227951049805
    },
    {
      "epoch": 0.00020208740234375,
      "grad_norm": 0.12358053028583527,
      "learning_rate": 4.5569009690601136e-05,
      "loss": 0.0452,
      "step": 33110
    },
    {
      "epoch": 0.00020208740234375,
      "model_forward_time": 0.11432409286499023,
      "step": 33110
    },
    {
      "epoch": 0.00020208740234375,
      "step": 33110,
      "training_step_time": 0.38373780250549316
    },
    {
      "epoch": 0.000202093505859375,
      "model_forward_time": 0.11523675918579102,
      "step": 33111
    },
    {
      "epoch": 0.000202093505859375,
      "step": 33111,
      "training_step_time": 0.9917738437652588
    },
    {
      "epoch": 0.000202099609375,
      "model_forward_time": 0.1140584945678711,
      "step": 33112
    },
    {
      "epoch": 0.000202099609375,
      "step": 33112,
      "training_step_time": 0.3938331604003906
    },
    {
      "epoch": 0.000202105712890625,
      "model_forward_time": 0.11526179313659668,
      "step": 33113
    },
    {
      "epoch": 0.000202105712890625,
      "step": 33113,
      "training_step_time": 0.3889729976654053
    },
    {
      "epoch": 0.00020211181640625,
      "model_forward_time": 0.11370491981506348,
      "step": 33114
    },
    {
      "epoch": 0.00020211181640625,
      "step": 33114,
      "training_step_time": 0.387174129486084
    },
    {
      "epoch": 0.000202117919921875,
      "model_forward_time": 0.11560750007629395,
      "step": 33115
    },
    {
      "epoch": 0.000202117919921875,
      "step": 33115,
      "training_step_time": 0.44770169258117676
    },
    {
      "epoch": 0.0002021240234375,
      "model_forward_time": 0.1145012378692627,
      "step": 33116
    },
    {
      "epoch": 0.0002021240234375,
      "step": 33116,
      "training_step_time": 0.487276554107666
    },
    {
      "epoch": 0.000202130126953125,
      "model_forward_time": 0.11438655853271484,
      "step": 33117
    },
    {
      "epoch": 0.000202130126953125,
      "step": 33117,
      "training_step_time": 0.3681480884552002
    },
    {
      "epoch": 0.00020213623046875,
      "model_forward_time": 0.11385345458984375,
      "step": 33118
    },
    {
      "epoch": 0.00020213623046875,
      "step": 33118,
      "training_step_time": 0.4599752426147461
    },
    {
      "epoch": 0.000202142333984375,
      "model_forward_time": 0.11443209648132324,
      "step": 33119
    },
    {
      "epoch": 0.000202142333984375,
      "step": 33119,
      "training_step_time": 0.3918187618255615
    },
    {
      "epoch": 0.0002021484375,
      "grad_norm": 0.08982765674591064,
      "learning_rate": 4.554156096030149e-05,
      "loss": 0.0411,
      "step": 33120
    },
    {
      "epoch": 0.0002021484375,
      "model_forward_time": 0.11559748649597168,
      "step": 33120
    },
    {
      "epoch": 0.0002021484375,
      "step": 33120,
      "training_step_time": 0.38888096809387207
    },
    {
      "epoch": 0.000202154541015625,
      "model_forward_time": 0.11440896987915039,
      "step": 33121
    },
    {
      "epoch": 0.000202154541015625,
      "step": 33121,
      "training_step_time": 0.38318634033203125
    },
    {
      "epoch": 0.00020216064453125,
      "model_forward_time": 0.11535310745239258,
      "step": 33122
    },
    {
      "epoch": 0.00020216064453125,
      "step": 33122,
      "training_step_time": 0.3792910575866699
    },
    {
      "epoch": 0.000202166748046875,
      "model_forward_time": 0.11585283279418945,
      "step": 33123
    },
    {
      "epoch": 0.000202166748046875,
      "step": 33123,
      "training_step_time": 0.5557363033294678
    },
    {
      "epoch": 0.0002021728515625,
      "model_forward_time": 0.11449027061462402,
      "step": 33124
    },
    {
      "epoch": 0.0002021728515625,
      "step": 33124,
      "training_step_time": 0.3953533172607422
    },
    {
      "epoch": 0.000202178955078125,
      "model_forward_time": 0.11502647399902344,
      "step": 33125
    },
    {
      "epoch": 0.000202178955078125,
      "step": 33125,
      "training_step_time": 0.39457058906555176
    },
    {
      "epoch": 0.00020218505859375,
      "model_forward_time": 0.11439204216003418,
      "step": 33126
    },
    {
      "epoch": 0.00020218505859375,
      "step": 33126,
      "training_step_time": 0.39103055000305176
    },
    {
      "epoch": 0.000202191162109375,
      "model_forward_time": 0.11551737785339355,
      "step": 33127
    },
    {
      "epoch": 0.000202191162109375,
      "step": 33127,
      "training_step_time": 0.414461612701416
    },
    {
      "epoch": 0.000202197265625,
      "model_forward_time": 0.11508822441101074,
      "step": 33128
    },
    {
      "epoch": 0.000202197265625,
      "step": 33128,
      "training_step_time": 0.38693857192993164
    },
    {
      "epoch": 0.000202203369140625,
      "model_forward_time": 0.11614847183227539,
      "step": 33129
    },
    {
      "epoch": 0.000202203369140625,
      "step": 33129,
      "training_step_time": 0.9177591800689697
    },
    {
      "epoch": 0.00020220947265625,
      "grad_norm": 0.10936396569013596,
      "learning_rate": 4.5514113584357873e-05,
      "loss": 0.0409,
      "step": 33130
    },
    {
      "epoch": 0.00020220947265625,
      "model_forward_time": 0.11453938484191895,
      "step": 33130
    },
    {
      "epoch": 0.00020220947265625,
      "step": 33130,
      "training_step_time": 0.44219970703125
    },
    {
      "epoch": 0.000202215576171875,
      "model_forward_time": 0.11402297019958496,
      "step": 33131
    },
    {
      "epoch": 0.000202215576171875,
      "step": 33131,
      "training_step_time": 0.42938923835754395
    },
    {
      "epoch": 0.0002022216796875,
      "model_forward_time": 0.11458754539489746,
      "step": 33132
    },
    {
      "epoch": 0.0002022216796875,
      "step": 33132,
      "training_step_time": 0.4234473705291748
    },
    {
      "epoch": 0.000202227783203125,
      "model_forward_time": 0.11466145515441895,
      "step": 33133
    },
    {
      "epoch": 0.000202227783203125,
      "step": 33133,
      "training_step_time": 0.39221644401550293
    },
    {
      "epoch": 0.00020223388671875,
      "model_forward_time": 0.11382889747619629,
      "step": 33134
    },
    {
      "epoch": 0.00020223388671875,
      "step": 33134,
      "training_step_time": 0.3840322494506836
    },
    {
      "epoch": 0.000202239990234375,
      "model_forward_time": 0.11441874504089355,
      "step": 33135
    },
    {
      "epoch": 0.000202239990234375,
      "step": 33135,
      "training_step_time": 0.7025866508483887
    },
    {
      "epoch": 0.00020224609375,
      "model_forward_time": 0.11442708969116211,
      "step": 33136
    },
    {
      "epoch": 0.00020224609375,
      "step": 33136,
      "training_step_time": 0.38507795333862305
    },
    {
      "epoch": 0.000202252197265625,
      "model_forward_time": 0.11443257331848145,
      "step": 33137
    },
    {
      "epoch": 0.000202252197265625,
      "step": 33137,
      "training_step_time": 0.3859138488769531
    },
    {
      "epoch": 0.00020225830078125,
      "model_forward_time": 0.1146550178527832,
      "step": 33138
    },
    {
      "epoch": 0.00020225830078125,
      "step": 33138,
      "training_step_time": 0.3863565921783447
    },
    {
      "epoch": 0.000202264404296875,
      "model_forward_time": 0.1141669750213623,
      "step": 33139
    },
    {
      "epoch": 0.000202264404296875,
      "step": 33139,
      "training_step_time": 0.39494991302490234
    },
    {
      "epoch": 0.0002022705078125,
      "grad_norm": 0.11072847992181778,
      "learning_rate": 4.548666757110812e-05,
      "loss": 0.0455,
      "step": 33140
    },
    {
      "epoch": 0.0002022705078125,
      "model_forward_time": 0.11428117752075195,
      "step": 33140
    },
    {
      "epoch": 0.0002022705078125,
      "step": 33140,
      "training_step_time": 0.3881664276123047
    },
    {
      "epoch": 0.000202276611328125,
      "model_forward_time": 0.11520695686340332,
      "step": 33141
    },
    {
      "epoch": 0.000202276611328125,
      "step": 33141,
      "training_step_time": 0.5617036819458008
    },
    {
      "epoch": 0.00020228271484375,
      "model_forward_time": 0.11409306526184082,
      "step": 33142
    },
    {
      "epoch": 0.00020228271484375,
      "step": 33142,
      "training_step_time": 0.4079713821411133
    },
    {
      "epoch": 0.000202288818359375,
      "model_forward_time": 0.11483430862426758,
      "step": 33143
    },
    {
      "epoch": 0.000202288818359375,
      "step": 33143,
      "training_step_time": 0.4877502918243408
    },
    {
      "epoch": 0.000202294921875,
      "model_forward_time": 0.11513829231262207,
      "step": 33144
    },
    {
      "epoch": 0.000202294921875,
      "step": 33144,
      "training_step_time": 0.43991684913635254
    },
    {
      "epoch": 0.000202301025390625,
      "model_forward_time": 0.11511683464050293,
      "step": 33145
    },
    {
      "epoch": 0.000202301025390625,
      "step": 33145,
      "training_step_time": 0.4410271644592285
    },
    {
      "epoch": 0.00020230712890625,
      "model_forward_time": 0.11524057388305664,
      "step": 33146
    },
    {
      "epoch": 0.00020230712890625,
      "step": 33146,
      "training_step_time": 0.48778510093688965
    },
    {
      "epoch": 0.000202313232421875,
      "model_forward_time": 0.11470866203308105,
      "step": 33147
    },
    {
      "epoch": 0.000202313232421875,
      "step": 33147,
      "training_step_time": 0.3976290225982666
    },
    {
      "epoch": 0.0002023193359375,
      "model_forward_time": 0.11435151100158691,
      "step": 33148
    },
    {
      "epoch": 0.0002023193359375,
      "step": 33148,
      "training_step_time": 0.3963358402252197
    },
    {
      "epoch": 0.000202325439453125,
      "model_forward_time": 0.11525774002075195,
      "step": 33149
    },
    {
      "epoch": 0.000202325439453125,
      "step": 33149,
      "training_step_time": 0.3873326778411865
    },
    {
      "epoch": 0.00020233154296875,
      "grad_norm": 0.14560800790786743,
      "learning_rate": 4.545922292888959e-05,
      "loss": 0.0458,
      "step": 33150
    },
    {
      "epoch": 0.00020233154296875,
      "model_forward_time": 0.11531400680541992,
      "step": 33150
    },
    {
      "epoch": 0.00020233154296875,
      "step": 33150,
      "training_step_time": 0.39180946350097656
    },
    {
      "epoch": 0.000202337646484375,
      "model_forward_time": 0.11536812782287598,
      "step": 33151
    },
    {
      "epoch": 0.000202337646484375,
      "step": 33151,
      "training_step_time": 0.40420079231262207
    },
    {
      "epoch": 0.00020234375,
      "model_forward_time": 0.11494588851928711,
      "step": 33152
    },
    {
      "epoch": 0.00020234375,
      "step": 33152,
      "training_step_time": 0.38889598846435547
    },
    {
      "epoch": 0.000202349853515625,
      "model_forward_time": 0.11718177795410156,
      "step": 33153
    },
    {
      "epoch": 0.000202349853515625,
      "step": 33153,
      "training_step_time": 0.5548558235168457
    },
    {
      "epoch": 0.00020235595703125,
      "model_forward_time": 0.11499285697937012,
      "step": 33154
    },
    {
      "epoch": 0.00020235595703125,
      "step": 33154,
      "training_step_time": 0.39073634147644043
    },
    {
      "epoch": 0.000202362060546875,
      "model_forward_time": 0.1148521900177002,
      "step": 33155
    },
    {
      "epoch": 0.000202362060546875,
      "step": 33155,
      "training_step_time": 0.40579795837402344
    },
    {
      "epoch": 0.0002023681640625,
      "model_forward_time": 0.11530852317810059,
      "step": 33156
    },
    {
      "epoch": 0.0002023681640625,
      "step": 33156,
      "training_step_time": 0.4532608985900879
    },
    {
      "epoch": 0.000202374267578125,
      "model_forward_time": 0.1160287857055664,
      "step": 33157
    },
    {
      "epoch": 0.000202374267578125,
      "step": 33157,
      "training_step_time": 0.4767768383026123
    },
    {
      "epoch": 0.00020238037109375,
      "model_forward_time": 0.11552023887634277,
      "step": 33158
    },
    {
      "epoch": 0.00020238037109375,
      "step": 33158,
      "training_step_time": 0.40781092643737793
    },
    {
      "epoch": 0.000202386474609375,
      "model_forward_time": 0.11490154266357422,
      "step": 33159
    },
    {
      "epoch": 0.000202386474609375,
      "step": 33159,
      "training_step_time": 0.6492948532104492
    },
    {
      "epoch": 0.000202392578125,
      "grad_norm": 0.10284244269132614,
      "learning_rate": 4.543177966603925e-05,
      "loss": 0.0421,
      "step": 33160
    },
    {
      "epoch": 0.000202392578125,
      "model_forward_time": 0.11477470397949219,
      "step": 33160
    },
    {
      "epoch": 0.000202392578125,
      "step": 33160,
      "training_step_time": 0.4198575019836426
    },
    {
      "epoch": 0.000202398681640625,
      "model_forward_time": 0.11482024192810059,
      "step": 33161
    },
    {
      "epoch": 0.000202398681640625,
      "step": 33161,
      "training_step_time": 0.3901338577270508
    },
    {
      "epoch": 0.00020240478515625,
      "model_forward_time": 0.11473417282104492,
      "step": 33162
    },
    {
      "epoch": 0.00020240478515625,
      "step": 33162,
      "training_step_time": 0.3958137035369873
    },
    {
      "epoch": 0.000202410888671875,
      "model_forward_time": 0.11498308181762695,
      "step": 33163
    },
    {
      "epoch": 0.000202410888671875,
      "step": 33163,
      "training_step_time": 0.39332008361816406
    },
    {
      "epoch": 0.0002024169921875,
      "model_forward_time": 0.11464500427246094,
      "step": 33164
    },
    {
      "epoch": 0.0002024169921875,
      "step": 33164,
      "training_step_time": 0.3868834972381592
    },
    {
      "epoch": 0.000202423095703125,
      "model_forward_time": 0.1192772388458252,
      "step": 33165
    },
    {
      "epoch": 0.000202423095703125,
      "step": 33165,
      "training_step_time": 0.7091283798217773
    },
    {
      "epoch": 0.00020242919921875,
      "model_forward_time": 0.11549544334411621,
      "step": 33166
    },
    {
      "epoch": 0.00020242919921875,
      "step": 33166,
      "training_step_time": 0.38965773582458496
    },
    {
      "epoch": 0.000202435302734375,
      "model_forward_time": 0.11466145515441895,
      "step": 33167
    },
    {
      "epoch": 0.000202435302734375,
      "step": 33167,
      "training_step_time": 0.3909947872161865
    },
    {
      "epoch": 0.00020244140625,
      "model_forward_time": 0.1142416000366211,
      "step": 33168
    },
    {
      "epoch": 0.00020244140625,
      "step": 33168,
      "training_step_time": 0.3934047222137451
    },
    {
      "epoch": 0.000202447509765625,
      "model_forward_time": 0.11467838287353516,
      "step": 33169
    },
    {
      "epoch": 0.000202447509765625,
      "step": 33169,
      "training_step_time": 0.45432233810424805
    },
    {
      "epoch": 0.00020245361328125,
      "grad_norm": 0.1448093205690384,
      "learning_rate": 4.5404337790893626e-05,
      "loss": 0.045,
      "step": 33170
    },
    {
      "epoch": 0.00020245361328125,
      "model_forward_time": 0.11441302299499512,
      "step": 33170
    },
    {
      "epoch": 0.00020245361328125,
      "step": 33170,
      "training_step_time": 0.43711137771606445
    },
    {
      "epoch": 0.000202459716796875,
      "model_forward_time": 0.1151585578918457,
      "step": 33171
    },
    {
      "epoch": 0.000202459716796875,
      "step": 33171,
      "training_step_time": 0.5115394592285156
    },
    {
      "epoch": 0.0002024658203125,
      "model_forward_time": 0.11449718475341797,
      "step": 33172
    },
    {
      "epoch": 0.0002024658203125,
      "step": 33172,
      "training_step_time": 0.42529773712158203
    },
    {
      "epoch": 0.000202471923828125,
      "model_forward_time": 0.11498594284057617,
      "step": 33173
    },
    {
      "epoch": 0.000202471923828125,
      "step": 33173,
      "training_step_time": 0.43238306045532227
    },
    {
      "epoch": 0.00020247802734375,
      "model_forward_time": 0.11488842964172363,
      "step": 33174
    },
    {
      "epoch": 0.00020247802734375,
      "step": 33174,
      "training_step_time": 0.40976643562316895
    },
    {
      "epoch": 0.000202484130859375,
      "model_forward_time": 0.11512064933776855,
      "step": 33175
    },
    {
      "epoch": 0.000202484130859375,
      "step": 33175,
      "training_step_time": 0.399371862411499
    },
    {
      "epoch": 0.000202490234375,
      "model_forward_time": 0.11491847038269043,
      "step": 33176
    },
    {
      "epoch": 0.000202490234375,
      "step": 33176,
      "training_step_time": 0.38538336753845215
    },
    {
      "epoch": 0.000202496337890625,
      "model_forward_time": 0.11580181121826172,
      "step": 33177
    },
    {
      "epoch": 0.000202496337890625,
      "step": 33177,
      "training_step_time": 0.6370100975036621
    },
    {
      "epoch": 0.00020250244140625,
      "model_forward_time": 0.11431074142456055,
      "step": 33178
    },
    {
      "epoch": 0.00020250244140625,
      "step": 33178,
      "training_step_time": 0.3679502010345459
    },
    {
      "epoch": 0.000202508544921875,
      "model_forward_time": 0.11478018760681152,
      "step": 33179
    },
    {
      "epoch": 0.000202508544921875,
      "step": 33179,
      "training_step_time": 0.41240739822387695
    },
    {
      "epoch": 0.0002025146484375,
      "grad_norm": 0.12369855493307114,
      "learning_rate": 4.537689731178883e-05,
      "loss": 0.0433,
      "step": 33180
    },
    {
      "epoch": 0.0002025146484375,
      "model_forward_time": 0.11436986923217773,
      "step": 33180
    },
    {
      "epoch": 0.0002025146484375,
      "step": 33180,
      "training_step_time": 0.3871293067932129
    },
    {
      "epoch": 0.000202520751953125,
      "model_forward_time": 0.11484766006469727,
      "step": 33181
    },
    {
      "epoch": 0.000202520751953125,
      "step": 33181,
      "training_step_time": 0.4079620838165283
    },
    {
      "epoch": 0.00020252685546875,
      "model_forward_time": 0.11484956741333008,
      "step": 33182
    },
    {
      "epoch": 0.00020252685546875,
      "step": 33182,
      "training_step_time": 0.417039155960083
    },
    {
      "epoch": 0.000202532958984375,
      "model_forward_time": 0.11593914031982422,
      "step": 33183
    },
    {
      "epoch": 0.000202532958984375,
      "step": 33183,
      "training_step_time": 0.4549252986907959
    },
    {
      "epoch": 0.0002025390625,
      "model_forward_time": 0.11566638946533203,
      "step": 33184
    },
    {
      "epoch": 0.0002025390625,
      "step": 33184,
      "training_step_time": 0.41046953201293945
    },
    {
      "epoch": 0.000202545166015625,
      "model_forward_time": 0.11523604393005371,
      "step": 33185
    },
    {
      "epoch": 0.000202545166015625,
      "step": 33185,
      "training_step_time": 0.480302095413208
    },
    {
      "epoch": 0.00020255126953125,
      "model_forward_time": 0.11536383628845215,
      "step": 33186
    },
    {
      "epoch": 0.00020255126953125,
      "step": 33186,
      "training_step_time": 0.43697428703308105
    },
    {
      "epoch": 0.000202557373046875,
      "model_forward_time": 0.11506104469299316,
      "step": 33187
    },
    {
      "epoch": 0.000202557373046875,
      "step": 33187,
      "training_step_time": 0.46593499183654785
    },
    {
      "epoch": 0.0002025634765625,
      "model_forward_time": 0.11496400833129883,
      "step": 33188
    },
    {
      "epoch": 0.0002025634765625,
      "step": 33188,
      "training_step_time": 0.40286970138549805
    },
    {
      "epoch": 0.000202569580078125,
      "model_forward_time": 0.1149435043334961,
      "step": 33189
    },
    {
      "epoch": 0.000202569580078125,
      "step": 33189,
      "training_step_time": 0.8678743839263916
    },
    {
      "epoch": 0.00020257568359375,
      "grad_norm": 0.14620569348335266,
      "learning_rate": 4.534945823706056e-05,
      "loss": 0.0473,
      "step": 33190
    },
    {
      "epoch": 0.00020257568359375,
      "model_forward_time": 0.11459922790527344,
      "step": 33190
    },
    {
      "epoch": 0.00020257568359375,
      "step": 33190,
      "training_step_time": 0.37123751640319824
    },
    {
      "epoch": 0.000202581787109375,
      "model_forward_time": 0.11391115188598633,
      "step": 33191
    },
    {
      "epoch": 0.000202581787109375,
      "step": 33191,
      "training_step_time": 0.38654136657714844
    },
    {
      "epoch": 0.000202587890625,
      "model_forward_time": 0.11433577537536621,
      "step": 33192
    },
    {
      "epoch": 0.000202587890625,
      "step": 33192,
      "training_step_time": 0.3902008533477783
    },
    {
      "epoch": 0.000202593994140625,
      "model_forward_time": 0.11486554145812988,
      "step": 33193
    },
    {
      "epoch": 0.000202593994140625,
      "step": 33193,
      "training_step_time": 0.3884255886077881
    },
    {
      "epoch": 0.00020260009765625,
      "model_forward_time": 0.11422085762023926,
      "step": 33194
    },
    {
      "epoch": 0.00020260009765625,
      "step": 33194,
      "training_step_time": 0.3905649185180664
    },
    {
      "epoch": 0.000202606201171875,
      "model_forward_time": 0.11495709419250488,
      "step": 33195
    },
    {
      "epoch": 0.000202606201171875,
      "step": 33195,
      "training_step_time": 0.7187285423278809
    },
    {
      "epoch": 0.0002026123046875,
      "model_forward_time": 0.11459589004516602,
      "step": 33196
    },
    {
      "epoch": 0.0002026123046875,
      "step": 33196,
      "training_step_time": 0.37358617782592773
    },
    {
      "epoch": 0.000202618408203125,
      "model_forward_time": 0.11456608772277832,
      "step": 33197
    },
    {
      "epoch": 0.000202618408203125,
      "step": 33197,
      "training_step_time": 0.4674243927001953
    },
    {
      "epoch": 0.00020262451171875,
      "model_forward_time": 0.11492037773132324,
      "step": 33198
    },
    {
      "epoch": 0.00020262451171875,
      "step": 33198,
      "training_step_time": 0.4746863842010498
    },
    {
      "epoch": 0.000202630615234375,
      "model_forward_time": 0.11406970024108887,
      "step": 33199
    },
    {
      "epoch": 0.000202630615234375,
      "step": 33199,
      "training_step_time": 0.3904240131378174
    },
    {
      "epoch": 0.00020263671875,
      "grad_norm": 0.13727837800979614,
      "learning_rate": 4.5322020575044114e-05,
      "loss": 0.0467,
      "step": 33200
    },
    {
      "epoch": 0.00020263671875,
      "model_forward_time": 0.11422157287597656,
      "step": 33200
    },
    {
      "epoch": 0.00020263671875,
      "step": 33200,
      "training_step_time": 0.4438159465789795
    },
    {
      "epoch": 0.000202642822265625,
      "model_forward_time": 0.11489224433898926,
      "step": 33201
    },
    {
      "epoch": 0.000202642822265625,
      "step": 33201,
      "training_step_time": 0.6050760746002197
    },
    {
      "epoch": 0.00020264892578125,
      "model_forward_time": 0.11497831344604492,
      "step": 33202
    },
    {
      "epoch": 0.00020264892578125,
      "step": 33202,
      "training_step_time": 0.39913129806518555
    },
    {
      "epoch": 0.000202655029296875,
      "model_forward_time": 0.11438441276550293,
      "step": 33203
    },
    {
      "epoch": 0.000202655029296875,
      "step": 33203,
      "training_step_time": 0.3874068260192871
    },
    {
      "epoch": 0.0002026611328125,
      "model_forward_time": 0.11463046073913574,
      "step": 33204
    },
    {
      "epoch": 0.0002026611328125,
      "step": 33204,
      "training_step_time": 0.38991618156433105
    },
    {
      "epoch": 0.000202667236328125,
      "model_forward_time": 0.1146082878112793,
      "step": 33205
    },
    {
      "epoch": 0.000202667236328125,
      "step": 33205,
      "training_step_time": 0.3980896472930908
    },
    {
      "epoch": 0.00020267333984375,
      "model_forward_time": 0.11501240730285645,
      "step": 33206
    },
    {
      "epoch": 0.00020267333984375,
      "step": 33206,
      "training_step_time": 0.38846802711486816
    },
    {
      "epoch": 0.000202679443359375,
      "model_forward_time": 0.11505722999572754,
      "step": 33207
    },
    {
      "epoch": 0.000202679443359375,
      "step": 33207,
      "training_step_time": 0.7529592514038086
    },
    {
      "epoch": 0.000202685546875,
      "model_forward_time": 0.1143331527709961,
      "step": 33208
    },
    {
      "epoch": 0.000202685546875,
      "step": 33208,
      "training_step_time": 0.3845648765563965
    },
    {
      "epoch": 0.000202691650390625,
      "model_forward_time": 0.11453676223754883,
      "step": 33209
    },
    {
      "epoch": 0.000202691650390625,
      "step": 33209,
      "training_step_time": 0.39316415786743164
    },
    {
      "epoch": 0.00020269775390625,
      "grad_norm": 0.09067856520414352,
      "learning_rate": 4.529458433407429e-05,
      "loss": 0.042,
      "step": 33210
    },
    {
      "epoch": 0.00020269775390625,
      "model_forward_time": 0.11451220512390137,
      "step": 33210
    },
    {
      "epoch": 0.00020269775390625,
      "step": 33210,
      "training_step_time": 0.3961353302001953
    },
    {
      "epoch": 0.000202703857421875,
      "model_forward_time": 0.11467409133911133,
      "step": 33211
    },
    {
      "epoch": 0.000202703857421875,
      "step": 33211,
      "training_step_time": 0.41030240058898926
    },
    {
      "epoch": 0.0002027099609375,
      "model_forward_time": 0.1150820255279541,
      "step": 33212
    },
    {
      "epoch": 0.0002027099609375,
      "step": 33212,
      "training_step_time": 0.49976682662963867
    },
    {
      "epoch": 0.000202716064453125,
      "model_forward_time": 0.11493349075317383,
      "step": 33213
    },
    {
      "epoch": 0.000202716064453125,
      "step": 33213,
      "training_step_time": 0.7462999820709229
    },
    {
      "epoch": 0.00020272216796875,
      "model_forward_time": 0.11445426940917969,
      "step": 33214
    },
    {
      "epoch": 0.00020272216796875,
      "step": 33214,
      "training_step_time": 0.39125800132751465
    },
    {
      "epoch": 0.000202728271484375,
      "model_forward_time": 0.11417126655578613,
      "step": 33215
    },
    {
      "epoch": 0.000202728271484375,
      "step": 33215,
      "training_step_time": 0.4755551815032959
    },
    {
      "epoch": 0.000202734375,
      "model_forward_time": 0.11461567878723145,
      "step": 33216
    },
    {
      "epoch": 0.000202734375,
      "step": 33216,
      "training_step_time": 0.3849318027496338
    },
    {
      "epoch": 0.000202740478515625,
      "model_forward_time": 0.11406493186950684,
      "step": 33217
    },
    {
      "epoch": 0.000202740478515625,
      "step": 33217,
      "training_step_time": 0.38935089111328125
    },
    {
      "epoch": 0.00020274658203125,
      "model_forward_time": 0.1143198013305664,
      "step": 33218
    },
    {
      "epoch": 0.00020274658203125,
      "step": 33218,
      "training_step_time": 0.3769657611846924
    },
    {
      "epoch": 0.000202752685546875,
      "model_forward_time": 0.11489439010620117,
      "step": 33219
    },
    {
      "epoch": 0.000202752685546875,
      "step": 33219,
      "training_step_time": 0.7477796077728271
    },
    {
      "epoch": 0.0002027587890625,
      "grad_norm": 0.12149833887815475,
      "learning_rate": 4.526714952248551e-05,
      "loss": 0.0406,
      "step": 33220
    },
    {
      "epoch": 0.0002027587890625,
      "model_forward_time": 0.11431002616882324,
      "step": 33220
    },
    {
      "epoch": 0.0002027587890625,
      "step": 33220,
      "training_step_time": 0.39576101303100586
    },
    {
      "epoch": 0.000202764892578125,
      "model_forward_time": 0.11434030532836914,
      "step": 33221
    },
    {
      "epoch": 0.000202764892578125,
      "step": 33221,
      "training_step_time": 0.3962233066558838
    },
    {
      "epoch": 0.00020277099609375,
      "model_forward_time": 0.11436915397644043,
      "step": 33222
    },
    {
      "epoch": 0.00020277099609375,
      "step": 33222,
      "training_step_time": 0.3896148204803467
    },
    {
      "epoch": 0.000202777099609375,
      "model_forward_time": 0.11465072631835938,
      "step": 33223
    },
    {
      "epoch": 0.000202777099609375,
      "step": 33223,
      "training_step_time": 0.4140779972076416
    },
    {
      "epoch": 0.000202783203125,
      "model_forward_time": 0.11445188522338867,
      "step": 33224
    },
    {
      "epoch": 0.000202783203125,
      "step": 33224,
      "training_step_time": 0.44263195991516113
    },
    {
      "epoch": 0.000202789306640625,
      "model_forward_time": 0.1152350902557373,
      "step": 33225
    },
    {
      "epoch": 0.000202789306640625,
      "step": 33225,
      "training_step_time": 0.5819821357727051
    },
    {
      "epoch": 0.00020279541015625,
      "model_forward_time": 0.11497068405151367,
      "step": 33226
    },
    {
      "epoch": 0.00020279541015625,
      "step": 33226,
      "training_step_time": 0.4081747531890869
    },
    {
      "epoch": 0.000202801513671875,
      "model_forward_time": 0.11481285095214844,
      "step": 33227
    },
    {
      "epoch": 0.000202801513671875,
      "step": 33227,
      "training_step_time": 0.36531996726989746
    },
    {
      "epoch": 0.0002028076171875,
      "model_forward_time": 0.11542606353759766,
      "step": 33228
    },
    {
      "epoch": 0.0002028076171875,
      "step": 33228,
      "training_step_time": 0.4660813808441162
    },
    {
      "epoch": 0.000202813720703125,
      "model_forward_time": 0.11462068557739258,
      "step": 33229
    },
    {
      "epoch": 0.000202813720703125,
      "step": 33229,
      "training_step_time": 0.38967013359069824
    },
    {
      "epoch": 0.00020281982421875,
      "grad_norm": 0.122281514108181,
      "learning_rate": 4.5239716148611724e-05,
      "loss": 0.0422,
      "step": 33230
    },
    {
      "epoch": 0.00020281982421875,
      "model_forward_time": 0.11467099189758301,
      "step": 33230
    },
    {
      "epoch": 0.00020281982421875,
      "step": 33230,
      "training_step_time": 0.38270115852355957
    },
    {
      "epoch": 0.000202825927734375,
      "model_forward_time": 0.11535811424255371,
      "step": 33231
    },
    {
      "epoch": 0.000202825927734375,
      "step": 33231,
      "training_step_time": 0.6105890274047852
    },
    {
      "epoch": 0.00020283203125,
      "model_forward_time": 0.11462831497192383,
      "step": 33232
    },
    {
      "epoch": 0.00020283203125,
      "step": 33232,
      "training_step_time": 0.4295048713684082
    },
    {
      "epoch": 0.000202838134765625,
      "model_forward_time": 0.11496257781982422,
      "step": 33233
    },
    {
      "epoch": 0.000202838134765625,
      "step": 33233,
      "training_step_time": 0.386962890625
    },
    {
      "epoch": 0.00020284423828125,
      "model_forward_time": 0.11475753784179688,
      "step": 33234
    },
    {
      "epoch": 0.00020284423828125,
      "step": 33234,
      "training_step_time": 0.40329551696777344
    },
    {
      "epoch": 0.000202850341796875,
      "model_forward_time": 0.11522459983825684,
      "step": 33235
    },
    {
      "epoch": 0.000202850341796875,
      "step": 33235,
      "training_step_time": 0.3907346725463867
    },
    {
      "epoch": 0.0002028564453125,
      "model_forward_time": 0.11462235450744629,
      "step": 33236
    },
    {
      "epoch": 0.0002028564453125,
      "step": 33236,
      "training_step_time": 0.3991086483001709
    },
    {
      "epoch": 0.000202862548828125,
      "model_forward_time": 0.11584973335266113,
      "step": 33237
    },
    {
      "epoch": 0.000202862548828125,
      "step": 33237,
      "training_step_time": 0.6509814262390137
    },
    {
      "epoch": 0.00020286865234375,
      "model_forward_time": 0.11600708961486816,
      "step": 33238
    },
    {
      "epoch": 0.00020286865234375,
      "step": 33238,
      "training_step_time": 0.4082481861114502
    },
    {
      "epoch": 0.000202874755859375,
      "model_forward_time": 0.11487531661987305,
      "step": 33239
    },
    {
      "epoch": 0.000202874755859375,
      "step": 33239,
      "training_step_time": 0.4131002426147461
    },
    {
      "epoch": 0.000202880859375,
      "grad_norm": 0.1691587120294571,
      "learning_rate": 4.5212284220786494e-05,
      "loss": 0.0421,
      "step": 33240
    },
    {
      "epoch": 0.000202880859375,
      "model_forward_time": 0.11446094512939453,
      "step": 33240
    },
    {
      "epoch": 0.000202880859375,
      "step": 33240,
      "training_step_time": 0.42372941970825195
    },
    {
      "epoch": 0.000202886962890625,
      "model_forward_time": 0.11487007141113281,
      "step": 33241
    },
    {
      "epoch": 0.000202886962890625,
      "step": 33241,
      "training_step_time": 0.3750271797180176
    },
    {
      "epoch": 0.00020289306640625,
      "model_forward_time": 0.11493849754333496,
      "step": 33242
    },
    {
      "epoch": 0.00020289306640625,
      "step": 33242,
      "training_step_time": 0.4313790798187256
    },
    {
      "epoch": 0.000202899169921875,
      "model_forward_time": 0.11576056480407715,
      "step": 33243
    },
    {
      "epoch": 0.000202899169921875,
      "step": 33243,
      "training_step_time": 0.46815013885498047
    },
    {
      "epoch": 0.0002029052734375,
      "model_forward_time": 0.11513519287109375,
      "step": 33244
    },
    {
      "epoch": 0.0002029052734375,
      "step": 33244,
      "training_step_time": 0.3948214054107666
    },
    {
      "epoch": 0.000202911376953125,
      "model_forward_time": 0.11521434783935547,
      "step": 33245
    },
    {
      "epoch": 0.000202911376953125,
      "step": 33245,
      "training_step_time": 0.4136514663696289
    },
    {
      "epoch": 0.00020291748046875,
      "model_forward_time": 0.11622285842895508,
      "step": 33246
    },
    {
      "epoch": 0.00020291748046875,
      "step": 33246,
      "training_step_time": 0.43285226821899414
    },
    {
      "epoch": 0.000202923583984375,
      "model_forward_time": 0.11480426788330078,
      "step": 33247
    },
    {
      "epoch": 0.000202923583984375,
      "step": 33247,
      "training_step_time": 0.3951387405395508
    },
    {
      "epoch": 0.0002029296875,
      "model_forward_time": 0.11533713340759277,
      "step": 33248
    },
    {
      "epoch": 0.0002029296875,
      "step": 33248,
      "training_step_time": 0.3941044807434082
    },
    {
      "epoch": 0.000202935791015625,
      "model_forward_time": 0.1149301528930664,
      "step": 33249
    },
    {
      "epoch": 0.000202935791015625,
      "step": 33249,
      "training_step_time": 0.5124859809875488
    },
    {
      "epoch": 0.00020294189453125,
      "grad_norm": 0.12479383498430252,
      "learning_rate": 4.518485374734292e-05,
      "loss": 0.0417,
      "step": 33250
    },
    {
      "epoch": 0.00020294189453125,
      "model_forward_time": 0.11531186103820801,
      "step": 33250
    },
    {
      "epoch": 0.00020294189453125,
      "step": 33250,
      "training_step_time": 0.42080020904541016
    },
    {
      "epoch": 0.000202947998046875,
      "model_forward_time": 0.11452722549438477,
      "step": 33251
    },
    {
      "epoch": 0.000202947998046875,
      "step": 33251,
      "training_step_time": 0.4497663974761963
    },
    {
      "epoch": 0.0002029541015625,
      "model_forward_time": 0.11499142646789551,
      "step": 33252
    },
    {
      "epoch": 0.0002029541015625,
      "step": 33252,
      "training_step_time": 0.43824219703674316
    },
    {
      "epoch": 0.000202960205078125,
      "model_forward_time": 0.11448502540588379,
      "step": 33253
    },
    {
      "epoch": 0.000202960205078125,
      "step": 33253,
      "training_step_time": 0.40332603454589844
    },
    {
      "epoch": 0.00020296630859375,
      "model_forward_time": 0.11455416679382324,
      "step": 33254
    },
    {
      "epoch": 0.00020296630859375,
      "step": 33254,
      "training_step_time": 0.48304176330566406
    },
    {
      "epoch": 0.000202972412109375,
      "model_forward_time": 0.1148524284362793,
      "step": 33255
    },
    {
      "epoch": 0.000202972412109375,
      "step": 33255,
      "training_step_time": 0.4360346794128418
    },
    {
      "epoch": 0.000202978515625,
      "model_forward_time": 0.11450862884521484,
      "step": 33256
    },
    {
      "epoch": 0.000202978515625,
      "step": 33256,
      "training_step_time": 0.4141843318939209
    },
    {
      "epoch": 0.000202984619140625,
      "model_forward_time": 0.11481499671936035,
      "step": 33257
    },
    {
      "epoch": 0.000202984619140625,
      "step": 33257,
      "training_step_time": 0.4268481731414795
    },
    {
      "epoch": 0.00020299072265625,
      "model_forward_time": 0.11465954780578613,
      "step": 33258
    },
    {
      "epoch": 0.00020299072265625,
      "step": 33258,
      "training_step_time": 0.3991079330444336
    },
    {
      "epoch": 0.000202996826171875,
      "model_forward_time": 0.11556768417358398,
      "step": 33259
    },
    {
      "epoch": 0.000202996826171875,
      "step": 33259,
      "training_step_time": 0.4136683940887451
    },
    {
      "epoch": 0.0002030029296875,
      "grad_norm": 0.089149609208107,
      "learning_rate": 4.515742473661362e-05,
      "loss": 0.0418,
      "step": 33260
    },
    {
      "epoch": 0.0002030029296875,
      "model_forward_time": 0.11451005935668945,
      "step": 33260
    },
    {
      "epoch": 0.0002030029296875,
      "step": 33260,
      "training_step_time": 0.3999645709991455
    },
    {
      "epoch": 0.000203009033203125,
      "model_forward_time": 0.11519408226013184,
      "step": 33261
    },
    {
      "epoch": 0.000203009033203125,
      "step": 33261,
      "training_step_time": 0.535067081451416
    },
    {
      "epoch": 0.00020301513671875,
      "model_forward_time": 0.11504817008972168,
      "step": 33262
    },
    {
      "epoch": 0.00020301513671875,
      "step": 33262,
      "training_step_time": 0.3984987735748291
    },
    {
      "epoch": 0.000203021240234375,
      "model_forward_time": 0.11537528038024902,
      "step": 33263
    },
    {
      "epoch": 0.000203021240234375,
      "step": 33263,
      "training_step_time": 0.4002969264984131
    },
    {
      "epoch": 0.00020302734375,
      "model_forward_time": 0.11471009254455566,
      "step": 33264
    },
    {
      "epoch": 0.00020302734375,
      "step": 33264,
      "training_step_time": 0.4218909740447998
    },
    {
      "epoch": 0.000203033447265625,
      "model_forward_time": 0.11559486389160156,
      "step": 33265
    },
    {
      "epoch": 0.000203033447265625,
      "step": 33265,
      "training_step_time": 0.398789644241333
    },
    {
      "epoch": 0.00020303955078125,
      "model_forward_time": 0.11556434631347656,
      "step": 33266
    },
    {
      "epoch": 0.00020303955078125,
      "step": 33266,
      "training_step_time": 0.4646177291870117
    },
    {
      "epoch": 0.000203045654296875,
      "model_forward_time": 0.11640596389770508,
      "step": 33267
    },
    {
      "epoch": 0.000203045654296875,
      "step": 33267,
      "training_step_time": 0.48725295066833496
    },
    {
      "epoch": 0.0002030517578125,
      "model_forward_time": 0.11533021926879883,
      "step": 33268
    },
    {
      "epoch": 0.0002030517578125,
      "step": 33268,
      "training_step_time": 0.4471724033355713
    },
    {
      "epoch": 0.000203057861328125,
      "model_forward_time": 0.1151118278503418,
      "step": 33269
    },
    {
      "epoch": 0.000203057861328125,
      "step": 33269,
      "training_step_time": 0.4077951908111572
    },
    {
      "epoch": 0.00020306396484375,
      "grad_norm": 0.10567283630371094,
      "learning_rate": 4.5129997196930845e-05,
      "loss": 0.0447,
      "step": 33270
    },
    {
      "epoch": 0.00020306396484375,
      "model_forward_time": 0.11510205268859863,
      "step": 33270
    },
    {
      "epoch": 0.00020306396484375,
      "step": 33270,
      "training_step_time": 0.4523742198944092
    },
    {
      "epoch": 0.000203070068359375,
      "model_forward_time": 0.11503338813781738,
      "step": 33271
    },
    {
      "epoch": 0.000203070068359375,
      "step": 33271,
      "training_step_time": 0.49226903915405273
    },
    {
      "epoch": 0.000203076171875,
      "model_forward_time": 0.1147313117980957,
      "step": 33272
    },
    {
      "epoch": 0.000203076171875,
      "step": 33272,
      "training_step_time": 0.39746642112731934
    },
    {
      "epoch": 0.000203082275390625,
      "model_forward_time": 0.11486458778381348,
      "step": 33273
    },
    {
      "epoch": 0.000203082275390625,
      "step": 33273,
      "training_step_time": 0.5807657241821289
    },
    {
      "epoch": 0.00020308837890625,
      "model_forward_time": 0.11449265480041504,
      "step": 33274
    },
    {
      "epoch": 0.00020308837890625,
      "step": 33274,
      "training_step_time": 0.40395355224609375
    },
    {
      "epoch": 0.000203094482421875,
      "model_forward_time": 0.11451125144958496,
      "step": 33275
    },
    {
      "epoch": 0.000203094482421875,
      "step": 33275,
      "training_step_time": 0.39823412895202637
    },
    {
      "epoch": 0.0002031005859375,
      "model_forward_time": 0.1150062084197998,
      "step": 33276
    },
    {
      "epoch": 0.0002031005859375,
      "step": 33276,
      "training_step_time": 0.39025330543518066
    },
    {
      "epoch": 0.000203106689453125,
      "model_forward_time": 0.11534333229064941,
      "step": 33277
    },
    {
      "epoch": 0.000203106689453125,
      "step": 33277,
      "training_step_time": 0.3959789276123047
    },
    {
      "epoch": 0.00020311279296875,
      "model_forward_time": 0.11516642570495605,
      "step": 33278
    },
    {
      "epoch": 0.00020311279296875,
      "step": 33278,
      "training_step_time": 0.4349656105041504
    },
    {
      "epoch": 0.000203118896484375,
      "model_forward_time": 0.11547541618347168,
      "step": 33279
    },
    {
      "epoch": 0.000203118896484375,
      "step": 33279,
      "training_step_time": 0.5271379947662354
    },
    {
      "epoch": 0.000203125,
      "grad_norm": 0.15517015755176544,
      "learning_rate": 4.510257113662632e-05,
      "loss": 0.0445,
      "step": 33280
    },
    {
      "epoch": 0.000203125,
      "model_forward_time": 0.11544656753540039,
      "step": 33280
    },
    {
      "epoch": 0.000203125,
      "step": 33280,
      "training_step_time": 0.4432523250579834
    },
    {
      "epoch": 0.000203131103515625,
      "model_forward_time": 0.11512970924377441,
      "step": 33281
    },
    {
      "epoch": 0.000203131103515625,
      "step": 33281,
      "training_step_time": 0.40511298179626465
    },
    {
      "epoch": 0.00020313720703125,
      "model_forward_time": 0.11499357223510742,
      "step": 33282
    },
    {
      "epoch": 0.00020313720703125,
      "step": 33282,
      "training_step_time": 0.4435856342315674
    },
    {
      "epoch": 0.000203143310546875,
      "model_forward_time": 0.11467361450195312,
      "step": 33283
    },
    {
      "epoch": 0.000203143310546875,
      "step": 33283,
      "training_step_time": 0.36353611946105957
    },
    {
      "epoch": 0.0002031494140625,
      "model_forward_time": 0.11488628387451172,
      "step": 33284
    },
    {
      "epoch": 0.0002031494140625,
      "step": 33284,
      "training_step_time": 0.4421103000640869
    },
    {
      "epoch": 0.000203155517578125,
      "model_forward_time": 0.11535334587097168,
      "step": 33285
    },
    {
      "epoch": 0.000203155517578125,
      "step": 33285,
      "training_step_time": 0.41414880752563477
    },
    {
      "epoch": 0.00020316162109375,
      "model_forward_time": 0.11470651626586914,
      "step": 33286
    },
    {
      "epoch": 0.00020316162109375,
      "step": 33286,
      "training_step_time": 0.4223604202270508
    },
    {
      "epoch": 0.000203167724609375,
      "model_forward_time": 0.11520719528198242,
      "step": 33287
    },
    {
      "epoch": 0.000203167724609375,
      "step": 33287,
      "training_step_time": 0.4387638568878174
    },
    {
      "epoch": 0.000203173828125,
      "model_forward_time": 0.11474943161010742,
      "step": 33288
    },
    {
      "epoch": 0.000203173828125,
      "step": 33288,
      "training_step_time": 0.3986358642578125
    },
    {
      "epoch": 0.000203179931640625,
      "model_forward_time": 0.11561393737792969,
      "step": 33289
    },
    {
      "epoch": 0.000203179931640625,
      "step": 33289,
      "training_step_time": 0.4154832363128662
    },
    {
      "epoch": 0.00020318603515625,
      "grad_norm": 0.15027868747711182,
      "learning_rate": 4.507514656403137e-05,
      "loss": 0.0384,
      "step": 33290
    },
    {
      "epoch": 0.00020318603515625,
      "model_forward_time": 0.114898681640625,
      "step": 33290
    },
    {
      "epoch": 0.00020318603515625,
      "step": 33290,
      "training_step_time": 0.39470863342285156
    },
    {
      "epoch": 0.000203192138671875,
      "model_forward_time": 0.1149449348449707,
      "step": 33291
    },
    {
      "epoch": 0.000203192138671875,
      "step": 33291,
      "training_step_time": 0.7097136974334717
    },
    {
      "epoch": 0.0002031982421875,
      "model_forward_time": 0.11440086364746094,
      "step": 33292
    },
    {
      "epoch": 0.0002031982421875,
      "step": 33292,
      "training_step_time": 0.4262382984161377
    },
    {
      "epoch": 0.000203204345703125,
      "model_forward_time": 0.11489653587341309,
      "step": 33293
    },
    {
      "epoch": 0.000203204345703125,
      "step": 33293,
      "training_step_time": 0.4565010070800781
    },
    {
      "epoch": 0.00020321044921875,
      "model_forward_time": 0.11481261253356934,
      "step": 33294
    },
    {
      "epoch": 0.00020321044921875,
      "step": 33294,
      "training_step_time": 0.45351147651672363
    },
    {
      "epoch": 0.000203216552734375,
      "model_forward_time": 0.1141200065612793,
      "step": 33295
    },
    {
      "epoch": 0.000203216552734375,
      "step": 33295,
      "training_step_time": 0.47119617462158203
    },
    {
      "epoch": 0.00020322265625,
      "model_forward_time": 0.1139984130859375,
      "step": 33296
    },
    {
      "epoch": 0.00020322265625,
      "step": 33296,
      "training_step_time": 0.4773881435394287
    },
    {
      "epoch": 0.000203228759765625,
      "model_forward_time": 0.11481499671936035,
      "step": 33297
    },
    {
      "epoch": 0.000203228759765625,
      "step": 33297,
      "training_step_time": 0.7868614196777344
    },
    {
      "epoch": 0.00020323486328125,
      "model_forward_time": 0.11460518836975098,
      "step": 33298
    },
    {
      "epoch": 0.00020323486328125,
      "step": 33298,
      "training_step_time": 0.4182310104370117
    },
    {
      "epoch": 0.000203240966796875,
      "model_forward_time": 0.1154775619506836,
      "step": 33299
    },
    {
      "epoch": 0.000203240966796875,
      "step": 33299,
      "training_step_time": 0.3827056884765625
    },
    {
      "epoch": 0.0002032470703125,
      "grad_norm": 0.13728700578212738,
      "learning_rate": 4.504772348747687e-05,
      "loss": 0.044,
      "step": 33300
    },
    {
      "epoch": 0.0002032470703125,
      "model_forward_time": 0.11392903327941895,
      "step": 33300
    },
    {
      "epoch": 0.0002032470703125,
      "step": 33300,
      "training_step_time": 0.3887026309967041
    },
    {
      "epoch": 0.000203253173828125,
      "model_forward_time": 0.11484670639038086,
      "step": 33301
    },
    {
      "epoch": 0.000203253173828125,
      "step": 33301,
      "training_step_time": 0.3804962635040283
    },
    {
      "epoch": 0.00020325927734375,
      "model_forward_time": 0.11502385139465332,
      "step": 33302
    },
    {
      "epoch": 0.00020325927734375,
      "step": 33302,
      "training_step_time": 0.3951606750488281
    },
    {
      "epoch": 0.000203265380859375,
      "model_forward_time": 0.11575579643249512,
      "step": 33303
    },
    {
      "epoch": 0.000203265380859375,
      "step": 33303,
      "training_step_time": 0.826361894607544
    },
    {
      "epoch": 0.000203271484375,
      "model_forward_time": 0.11434006690979004,
      "step": 33304
    },
    {
      "epoch": 0.000203271484375,
      "step": 33304,
      "training_step_time": 0.3944215774536133
    },
    {
      "epoch": 0.000203277587890625,
      "model_forward_time": 0.11425566673278809,
      "step": 33305
    },
    {
      "epoch": 0.000203277587890625,
      "step": 33305,
      "training_step_time": 0.45516538619995117
    },
    {
      "epoch": 0.00020328369140625,
      "model_forward_time": 0.11565804481506348,
      "step": 33306
    },
    {
      "epoch": 0.00020328369140625,
      "step": 33306,
      "training_step_time": 0.42219996452331543
    },
    {
      "epoch": 0.000203289794921875,
      "model_forward_time": 0.11436128616333008,
      "step": 33307
    },
    {
      "epoch": 0.000203289794921875,
      "step": 33307,
      "training_step_time": 0.4426913261413574
    },
    {
      "epoch": 0.0002032958984375,
      "model_forward_time": 0.11428165435791016,
      "step": 33308
    },
    {
      "epoch": 0.0002032958984375,
      "step": 33308,
      "training_step_time": 0.4065268039703369
    },
    {
      "epoch": 0.000203302001953125,
      "model_forward_time": 0.11487841606140137,
      "step": 33309
    },
    {
      "epoch": 0.000203302001953125,
      "step": 33309,
      "training_step_time": 0.7553541660308838
    },
    {
      "epoch": 0.00020330810546875,
      "grad_norm": 0.1261044591665268,
      "learning_rate": 4.5020301915293214e-05,
      "loss": 0.0391,
      "step": 33310
    },
    {
      "epoch": 0.00020330810546875,
      "model_forward_time": 0.11432409286499023,
      "step": 33310
    },
    {
      "epoch": 0.00020330810546875,
      "step": 33310,
      "training_step_time": 0.43383145332336426
    },
    {
      "epoch": 0.000203314208984375,
      "model_forward_time": 0.11484527587890625,
      "step": 33311
    },
    {
      "epoch": 0.000203314208984375,
      "step": 33311,
      "training_step_time": 0.39499664306640625
    },
    {
      "epoch": 0.0002033203125,
      "model_forward_time": 0.11473321914672852,
      "step": 33312
    },
    {
      "epoch": 0.0002033203125,
      "step": 33312,
      "training_step_time": 0.38085317611694336
    },
    {
      "epoch": 0.000203326416015625,
      "model_forward_time": 0.11389398574829102,
      "step": 33313
    },
    {
      "epoch": 0.000203326416015625,
      "step": 33313,
      "training_step_time": 0.398531436920166
    },
    {
      "epoch": 0.00020333251953125,
      "model_forward_time": 0.1142129898071289,
      "step": 33314
    },
    {
      "epoch": 0.00020333251953125,
      "step": 33314,
      "training_step_time": 0.38655519485473633
    },
    {
      "epoch": 0.000203338623046875,
      "model_forward_time": 0.1154029369354248,
      "step": 33315
    },
    {
      "epoch": 0.000203338623046875,
      "step": 33315,
      "training_step_time": 0.7837934494018555
    },
    {
      "epoch": 0.0002033447265625,
      "model_forward_time": 0.11501073837280273,
      "step": 33316
    },
    {
      "epoch": 0.0002033447265625,
      "step": 33316,
      "training_step_time": 0.3882310390472412
    },
    {
      "epoch": 0.000203350830078125,
      "model_forward_time": 0.11411046981811523,
      "step": 33317
    },
    {
      "epoch": 0.000203350830078125,
      "step": 33317,
      "training_step_time": 0.39405155181884766
    },
    {
      "epoch": 0.00020335693359375,
      "model_forward_time": 0.11510801315307617,
      "step": 33318
    },
    {
      "epoch": 0.00020335693359375,
      "step": 33318,
      "training_step_time": 0.40671300888061523
    },
    {
      "epoch": 0.000203363037109375,
      "model_forward_time": 0.11446547508239746,
      "step": 33319
    },
    {
      "epoch": 0.000203363037109375,
      "step": 33319,
      "training_step_time": 0.41306567192077637
    },
    {
      "epoch": 0.000203369140625,
      "grad_norm": 0.11375579237937927,
      "learning_rate": 4.4992881855810366e-05,
      "loss": 0.0439,
      "step": 33320
    },
    {
      "epoch": 0.000203369140625,
      "model_forward_time": 0.1139216423034668,
      "step": 33320
    },
    {
      "epoch": 0.000203369140625,
      "step": 33320,
      "training_step_time": 0.4624643325805664
    },
    {
      "epoch": 0.000203375244140625,
      "model_forward_time": 0.1152505874633789,
      "step": 33321
    },
    {
      "epoch": 0.000203375244140625,
      "step": 33321,
      "training_step_time": 0.8631477355957031
    },
    {
      "epoch": 0.00020338134765625,
      "model_forward_time": 0.1144859790802002,
      "step": 33322
    },
    {
      "epoch": 0.00020338134765625,
      "step": 33322,
      "training_step_time": 0.37601256370544434
    },
    {
      "epoch": 0.000203387451171875,
      "model_forward_time": 0.11486959457397461,
      "step": 33323
    },
    {
      "epoch": 0.000203387451171875,
      "step": 33323,
      "training_step_time": 0.41799354553222656
    },
    {
      "epoch": 0.0002033935546875,
      "model_forward_time": 0.11484980583190918,
      "step": 33324
    },
    {
      "epoch": 0.0002033935546875,
      "step": 33324,
      "training_step_time": 0.48284173011779785
    },
    {
      "epoch": 0.000203399658203125,
      "model_forward_time": 0.11561942100524902,
      "step": 33325
    },
    {
      "epoch": 0.000203399658203125,
      "step": 33325,
      "training_step_time": 0.3916661739349365
    },
    {
      "epoch": 0.00020340576171875,
      "model_forward_time": 0.11406922340393066,
      "step": 33326
    },
    {
      "epoch": 0.00020340576171875,
      "step": 33326,
      "training_step_time": 0.3912973403930664
    },
    {
      "epoch": 0.000203411865234375,
      "model_forward_time": 0.1151881217956543,
      "step": 33327
    },
    {
      "epoch": 0.000203411865234375,
      "step": 33327,
      "training_step_time": 0.8183231353759766
    },
    {
      "epoch": 0.00020341796875,
      "model_forward_time": 0.11370134353637695,
      "step": 33328
    },
    {
      "epoch": 0.00020341796875,
      "step": 33328,
      "training_step_time": 0.38562679290771484
    },
    {
      "epoch": 0.000203424072265625,
      "model_forward_time": 0.1144261360168457,
      "step": 33329
    },
    {
      "epoch": 0.000203424072265625,
      "step": 33329,
      "training_step_time": 0.38020920753479004
    },
    {
      "epoch": 0.00020343017578125,
      "grad_norm": 0.16232281923294067,
      "learning_rate": 4.496546331735778e-05,
      "loss": 0.0472,
      "step": 33330
    },
    {
      "epoch": 0.00020343017578125,
      "model_forward_time": 0.11438655853271484,
      "step": 33330
    },
    {
      "epoch": 0.00020343017578125,
      "step": 33330,
      "training_step_time": 0.387113094329834
    },
    {
      "epoch": 0.000203436279296875,
      "model_forward_time": 0.11435580253601074,
      "step": 33331
    },
    {
      "epoch": 0.000203436279296875,
      "step": 33331,
      "training_step_time": 0.4429514408111572
    },
    {
      "epoch": 0.0002034423828125,
      "model_forward_time": 0.11448550224304199,
      "step": 33332
    },
    {
      "epoch": 0.0002034423828125,
      "step": 33332,
      "training_step_time": 0.4706292152404785
    },
    {
      "epoch": 0.000203448486328125,
      "model_forward_time": 0.11492729187011719,
      "step": 33333
    },
    {
      "epoch": 0.000203448486328125,
      "step": 33333,
      "training_step_time": 0.6087203025817871
    },
    {
      "epoch": 0.00020345458984375,
      "model_forward_time": 0.11440372467041016,
      "step": 33334
    },
    {
      "epoch": 0.00020345458984375,
      "step": 33334,
      "training_step_time": 0.4405941963195801
    },
    {
      "epoch": 0.000203460693359375,
      "model_forward_time": 0.11482954025268555,
      "step": 33335
    },
    {
      "epoch": 0.000203460693359375,
      "step": 33335,
      "training_step_time": 0.36494994163513184
    },
    {
      "epoch": 0.000203466796875,
      "model_forward_time": 0.11428999900817871,
      "step": 33336
    },
    {
      "epoch": 0.000203466796875,
      "step": 33336,
      "training_step_time": 0.3955044746398926
    },
    {
      "epoch": 0.000203472900390625,
      "model_forward_time": 0.11528325080871582,
      "step": 33337
    },
    {
      "epoch": 0.000203472900390625,
      "step": 33337,
      "training_step_time": 0.48302173614501953
    },
    {
      "epoch": 0.00020347900390625,
      "model_forward_time": 0.11485409736633301,
      "step": 33338
    },
    {
      "epoch": 0.00020347900390625,
      "step": 33338,
      "training_step_time": 0.4127836227416992
    },
    {
      "epoch": 0.000203485107421875,
      "model_forward_time": 0.1147000789642334,
      "step": 33339
    },
    {
      "epoch": 0.000203485107421875,
      "step": 33339,
      "training_step_time": 0.5449354648590088
    },
    {
      "epoch": 0.0002034912109375,
      "grad_norm": 0.1685090959072113,
      "learning_rate": 4.4938046308264544e-05,
      "loss": 0.0416,
      "step": 33340
    },
    {
      "epoch": 0.0002034912109375,
      "model_forward_time": 0.11486268043518066,
      "step": 33340
    },
    {
      "epoch": 0.0002034912109375,
      "step": 33340,
      "training_step_time": 0.38926243782043457
    },
    {
      "epoch": 0.000203497314453125,
      "model_forward_time": 0.11458539962768555,
      "step": 33341
    },
    {
      "epoch": 0.000203497314453125,
      "step": 33341,
      "training_step_time": 0.40601301193237305
    },
    {
      "epoch": 0.00020350341796875,
      "model_forward_time": 0.11476945877075195,
      "step": 33342
    },
    {
      "epoch": 0.00020350341796875,
      "step": 33342,
      "training_step_time": 0.3960897922515869
    },
    {
      "epoch": 0.000203509521484375,
      "model_forward_time": 0.11483478546142578,
      "step": 33343
    },
    {
      "epoch": 0.000203509521484375,
      "step": 33343,
      "training_step_time": 0.39969754219055176
    },
    {
      "epoch": 0.000203515625,
      "model_forward_time": 0.11544466018676758,
      "step": 33344
    },
    {
      "epoch": 0.000203515625,
      "step": 33344,
      "training_step_time": 0.38904285430908203
    },
    {
      "epoch": 0.000203521728515625,
      "model_forward_time": 0.11524844169616699,
      "step": 33345
    },
    {
      "epoch": 0.000203521728515625,
      "step": 33345,
      "training_step_time": 0.4451479911804199
    },
    {
      "epoch": 0.00020352783203125,
      "model_forward_time": 0.1155540943145752,
      "step": 33346
    },
    {
      "epoch": 0.00020352783203125,
      "step": 33346,
      "training_step_time": 0.491455078125
    },
    {
      "epoch": 0.000203533935546875,
      "model_forward_time": 0.11556172370910645,
      "step": 33347
    },
    {
      "epoch": 0.000203533935546875,
      "step": 33347,
      "training_step_time": 0.4155397415161133
    },
    {
      "epoch": 0.0002035400390625,
      "model_forward_time": 0.11457228660583496,
      "step": 33348
    },
    {
      "epoch": 0.0002035400390625,
      "step": 33348,
      "training_step_time": 0.5314655303955078
    },
    {
      "epoch": 0.000203546142578125,
      "model_forward_time": 0.11439990997314453,
      "step": 33349
    },
    {
      "epoch": 0.000203546142578125,
      "step": 33349,
      "training_step_time": 0.363445520401001
    },
    {
      "epoch": 0.00020355224609375,
      "grad_norm": 0.07850589603185654,
      "learning_rate": 4.491063083685916e-05,
      "loss": 0.0408,
      "step": 33350
    },
    {
      "epoch": 0.00020355224609375,
      "model_forward_time": 0.1181342601776123,
      "step": 33350
    },
    {
      "epoch": 0.00020355224609375,
      "step": 33350,
      "training_step_time": 0.43089771270751953
    },
    {
      "epoch": 0.000203558349609375,
      "model_forward_time": 0.11527824401855469,
      "step": 33351
    },
    {
      "epoch": 0.000203558349609375,
      "step": 33351,
      "training_step_time": 0.44626688957214355
    },
    {
      "epoch": 0.000203564453125,
      "model_forward_time": 0.11548280715942383,
      "step": 33352
    },
    {
      "epoch": 0.000203564453125,
      "step": 33352,
      "training_step_time": 0.3975493907928467
    },
    {
      "epoch": 0.000203570556640625,
      "model_forward_time": 0.11552786827087402,
      "step": 33353
    },
    {
      "epoch": 0.000203570556640625,
      "step": 33353,
      "training_step_time": 0.386002779006958
    },
    {
      "epoch": 0.00020357666015625,
      "model_forward_time": 0.11488699913024902,
      "step": 33354
    },
    {
      "epoch": 0.00020357666015625,
      "step": 33354,
      "training_step_time": 0.3905448913574219
    },
    {
      "epoch": 0.000203582763671875,
      "model_forward_time": 0.11539435386657715,
      "step": 33355
    },
    {
      "epoch": 0.000203582763671875,
      "step": 33355,
      "training_step_time": 0.39945006370544434
    },
    {
      "epoch": 0.0002035888671875,
      "model_forward_time": 0.11504292488098145,
      "step": 33356
    },
    {
      "epoch": 0.0002035888671875,
      "step": 33356,
      "training_step_time": 0.3955957889556885
    },
    {
      "epoch": 0.000203594970703125,
      "model_forward_time": 0.11556601524353027,
      "step": 33357
    },
    {
      "epoch": 0.000203594970703125,
      "step": 33357,
      "training_step_time": 0.448214054107666
    },
    {
      "epoch": 0.00020360107421875,
      "model_forward_time": 0.11595273017883301,
      "step": 33358
    },
    {
      "epoch": 0.00020360107421875,
      "step": 33358,
      "training_step_time": 0.39960670471191406
    },
    {
      "epoch": 0.000203607177734375,
      "model_forward_time": 0.11498618125915527,
      "step": 33359
    },
    {
      "epoch": 0.000203607177734375,
      "step": 33359,
      "training_step_time": 0.45276308059692383
    },
    {
      "epoch": 0.00020361328125,
      "grad_norm": 0.17887865006923676,
      "learning_rate": 4.488321691146975e-05,
      "loss": 0.044,
      "step": 33360
    },
    {
      "epoch": 0.00020361328125,
      "model_forward_time": 0.11580824851989746,
      "step": 33360
    },
    {
      "epoch": 0.00020361328125,
      "step": 33360,
      "training_step_time": 0.4491422176361084
    },
    {
      "epoch": 0.000203619384765625,
      "model_forward_time": 0.11523246765136719,
      "step": 33361
    },
    {
      "epoch": 0.000203619384765625,
      "step": 33361,
      "training_step_time": 0.3913254737854004
    },
    {
      "epoch": 0.00020362548828125,
      "model_forward_time": 0.11473250389099121,
      "step": 33362
    },
    {
      "epoch": 0.00020362548828125,
      "step": 33362,
      "training_step_time": 0.4327678680419922
    },
    {
      "epoch": 0.000203631591796875,
      "model_forward_time": 0.11541295051574707,
      "step": 33363
    },
    {
      "epoch": 0.000203631591796875,
      "step": 33363,
      "training_step_time": 0.48030853271484375
    },
    {
      "epoch": 0.0002036376953125,
      "model_forward_time": 0.114959716796875,
      "step": 33364
    },
    {
      "epoch": 0.0002036376953125,
      "step": 33364,
      "training_step_time": 0.40166497230529785
    },
    {
      "epoch": 0.000203643798828125,
      "model_forward_time": 0.11520838737487793,
      "step": 33365
    },
    {
      "epoch": 0.000203643798828125,
      "step": 33365,
      "training_step_time": 0.42417120933532715
    },
    {
      "epoch": 0.00020364990234375,
      "model_forward_time": 0.11549854278564453,
      "step": 33366
    },
    {
      "epoch": 0.00020364990234375,
      "step": 33366,
      "training_step_time": 0.40180468559265137
    },
    {
      "epoch": 0.000203656005859375,
      "model_forward_time": 0.11514902114868164,
      "step": 33367
    },
    {
      "epoch": 0.000203656005859375,
      "step": 33367,
      "training_step_time": 0.40147852897644043
    },
    {
      "epoch": 0.000203662109375,
      "model_forward_time": 0.11606073379516602,
      "step": 33368
    },
    {
      "epoch": 0.000203662109375,
      "step": 33368,
      "training_step_time": 0.38480687141418457
    },
    {
      "epoch": 0.000203668212890625,
      "model_forward_time": 0.1152811050415039,
      "step": 33369
    },
    {
      "epoch": 0.000203668212890625,
      "step": 33369,
      "training_step_time": 0.5182387828826904
    },
    {
      "epoch": 0.00020367431640625,
      "grad_norm": 0.1727030873298645,
      "learning_rate": 4.4855804540423964e-05,
      "loss": 0.0504,
      "step": 33370
    },
    {
      "epoch": 0.00020367431640625,
      "model_forward_time": 0.11501646041870117,
      "step": 33370
    },
    {
      "epoch": 0.00020367431640625,
      "step": 33370,
      "training_step_time": 0.401123046875
    },
    {
      "epoch": 0.000203680419921875,
      "model_forward_time": 0.11502385139465332,
      "step": 33371
    },
    {
      "epoch": 0.000203680419921875,
      "step": 33371,
      "training_step_time": 0.3963758945465088
    },
    {
      "epoch": 0.0002036865234375,
      "model_forward_time": 0.11604523658752441,
      "step": 33372
    },
    {
      "epoch": 0.0002036865234375,
      "step": 33372,
      "training_step_time": 0.3901517391204834
    },
    {
      "epoch": 0.000203692626953125,
      "model_forward_time": 0.11526083946228027,
      "step": 33373
    },
    {
      "epoch": 0.000203692626953125,
      "step": 33373,
      "training_step_time": 0.45652222633361816
    },
    {
      "epoch": 0.00020369873046875,
      "model_forward_time": 0.1157383918762207,
      "step": 33374
    },
    {
      "epoch": 0.00020369873046875,
      "step": 33374,
      "training_step_time": 0.43145036697387695
    },
    {
      "epoch": 0.000203704833984375,
      "model_forward_time": 0.11520051956176758,
      "step": 33375
    },
    {
      "epoch": 0.000203704833984375,
      "step": 33375,
      "training_step_time": 0.49845170974731445
    },
    {
      "epoch": 0.0002037109375,
      "model_forward_time": 0.11682891845703125,
      "step": 33376
    },
    {
      "epoch": 0.0002037109375,
      "step": 33376,
      "training_step_time": 0.39604783058166504
    },
    {
      "epoch": 0.000203717041015625,
      "model_forward_time": 0.11512875556945801,
      "step": 33377
    },
    {
      "epoch": 0.000203717041015625,
      "step": 33377,
      "training_step_time": 0.44835972785949707
    },
    {
      "epoch": 0.00020372314453125,
      "model_forward_time": 0.11529183387756348,
      "step": 33378
    },
    {
      "epoch": 0.00020372314453125,
      "step": 33378,
      "training_step_time": 0.38991546630859375
    },
    {
      "epoch": 0.000203729248046875,
      "model_forward_time": 0.11569786071777344,
      "step": 33379
    },
    {
      "epoch": 0.000203729248046875,
      "step": 33379,
      "training_step_time": 0.39887261390686035
    },
    {
      "epoch": 0.0002037353515625,
      "grad_norm": 0.14288467168807983,
      "learning_rate": 4.482839373204891e-05,
      "loss": 0.0412,
      "step": 33380
    },
    {
      "epoch": 0.0002037353515625,
      "model_forward_time": 0.11542224884033203,
      "step": 33380
    },
    {
      "epoch": 0.0002037353515625,
      "step": 33380,
      "training_step_time": 0.4460873603820801
    },
    {
      "epoch": 0.000203741455078125,
      "model_forward_time": 0.11716222763061523,
      "step": 33381
    },
    {
      "epoch": 0.000203741455078125,
      "step": 33381,
      "training_step_time": 0.5110511779785156
    },
    {
      "epoch": 0.00020374755859375,
      "model_forward_time": 0.11581254005432129,
      "step": 33382
    },
    {
      "epoch": 0.00020374755859375,
      "step": 33382,
      "training_step_time": 0.3956642150878906
    },
    {
      "epoch": 0.000203753662109375,
      "model_forward_time": 0.1157689094543457,
      "step": 33383
    },
    {
      "epoch": 0.000203753662109375,
      "step": 33383,
      "training_step_time": 0.3915090560913086
    },
    {
      "epoch": 0.000203759765625,
      "model_forward_time": 0.11508321762084961,
      "step": 33384
    },
    {
      "epoch": 0.000203759765625,
      "step": 33384,
      "training_step_time": 0.3932774066925049
    },
    {
      "epoch": 0.000203765869140625,
      "model_forward_time": 0.11509299278259277,
      "step": 33385
    },
    {
      "epoch": 0.000203765869140625,
      "step": 33385,
      "training_step_time": 0.3966360092163086
    },
    {
      "epoch": 0.00020377197265625,
      "model_forward_time": 0.11560583114624023,
      "step": 33386
    },
    {
      "epoch": 0.00020377197265625,
      "step": 33386,
      "training_step_time": 0.38335418701171875
    },
    {
      "epoch": 0.000203778076171875,
      "model_forward_time": 0.1155087947845459,
      "step": 33387
    },
    {
      "epoch": 0.000203778076171875,
      "step": 33387,
      "training_step_time": 0.5362513065338135
    },
    {
      "epoch": 0.0002037841796875,
      "model_forward_time": 0.11539697647094727,
      "step": 33388
    },
    {
      "epoch": 0.0002037841796875,
      "step": 33388,
      "training_step_time": 0.44707274436950684
    },
    {
      "epoch": 0.000203790283203125,
      "model_forward_time": 0.11590218544006348,
      "step": 33389
    },
    {
      "epoch": 0.000203790283203125,
      "step": 33389,
      "training_step_time": 0.40093302726745605
    },
    {
      "epoch": 0.00020379638671875,
      "grad_norm": 0.14822062849998474,
      "learning_rate": 4.480098449467132e-05,
      "loss": 0.0437,
      "step": 33390
    },
    {
      "epoch": 0.00020379638671875,
      "model_forward_time": 0.11552071571350098,
      "step": 33390
    },
    {
      "epoch": 0.00020379638671875,
      "step": 33390,
      "training_step_time": 0.4115476608276367
    },
    {
      "epoch": 0.000203802490234375,
      "model_forward_time": 0.11536121368408203,
      "step": 33391
    },
    {
      "epoch": 0.000203802490234375,
      "step": 33391,
      "training_step_time": 0.47470593452453613
    },
    {
      "epoch": 0.00020380859375,
      "model_forward_time": 0.11457109451293945,
      "step": 33392
    },
    {
      "epoch": 0.00020380859375,
      "step": 33392,
      "training_step_time": 0.38576483726501465
    },
    {
      "epoch": 0.000203814697265625,
      "model_forward_time": 0.11464858055114746,
      "step": 33393
    },
    {
      "epoch": 0.000203814697265625,
      "step": 33393,
      "training_step_time": 0.3656899929046631
    },
    {
      "epoch": 0.00020382080078125,
      "model_forward_time": 0.1153256893157959,
      "step": 33394
    },
    {
      "epoch": 0.00020382080078125,
      "step": 33394,
      "training_step_time": 0.4183962345123291
    },
    {
      "epoch": 0.000203826904296875,
      "model_forward_time": 0.11431241035461426,
      "step": 33395
    },
    {
      "epoch": 0.000203826904296875,
      "step": 33395,
      "training_step_time": 0.41644930839538574
    },
    {
      "epoch": 0.0002038330078125,
      "model_forward_time": 0.11524009704589844,
      "step": 33396
    },
    {
      "epoch": 0.0002038330078125,
      "step": 33396,
      "training_step_time": 0.3908529281616211
    },
    {
      "epoch": 0.000203839111328125,
      "model_forward_time": 0.11505770683288574,
      "step": 33397
    },
    {
      "epoch": 0.000203839111328125,
      "step": 33397,
      "training_step_time": 0.3821401596069336
    },
    {
      "epoch": 0.00020384521484375,
      "model_forward_time": 0.11653399467468262,
      "step": 33398
    },
    {
      "epoch": 0.00020384521484375,
      "step": 33398,
      "training_step_time": 0.3873143196105957
    },
    {
      "epoch": 0.000203851318359375,
      "model_forward_time": 0.11506295204162598,
      "step": 33399
    },
    {
      "epoch": 0.000203851318359375,
      "step": 33399,
      "training_step_time": 0.39194297790527344
    },
    {
      "epoch": 0.000203857421875,
      "grad_norm": 0.07152485847473145,
      "learning_rate": 4.477357683661734e-05,
      "loss": 0.0407,
      "step": 33400
    },
    {
      "epoch": 0.000203857421875,
      "model_forward_time": 0.11620092391967773,
      "step": 33400
    },
    {
      "epoch": 0.000203857421875,
      "step": 33400,
      "training_step_time": 0.40108728408813477
    },
    {
      "epoch": 0.000203863525390625,
      "model_forward_time": 0.11555910110473633,
      "step": 33401
    },
    {
      "epoch": 0.000203863525390625,
      "step": 33401,
      "training_step_time": 0.42518186569213867
    },
    {
      "epoch": 0.00020386962890625,
      "model_forward_time": 0.11508822441101074,
      "step": 33402
    },
    {
      "epoch": 0.00020386962890625,
      "step": 33402,
      "training_step_time": 0.4082157611846924
    },
    {
      "epoch": 0.000203875732421875,
      "model_forward_time": 0.11560821533203125,
      "step": 33403
    },
    {
      "epoch": 0.000203875732421875,
      "step": 33403,
      "training_step_time": 0.4407382011413574
    },
    {
      "epoch": 0.0002038818359375,
      "model_forward_time": 0.11622953414916992,
      "step": 33404
    },
    {
      "epoch": 0.0002038818359375,
      "step": 33404,
      "training_step_time": 0.4586522579193115
    },
    {
      "epoch": 0.000203887939453125,
      "model_forward_time": 0.11549568176269531,
      "step": 33405
    },
    {
      "epoch": 0.000203887939453125,
      "step": 33405,
      "training_step_time": 0.4523470401763916
    },
    {
      "epoch": 0.00020389404296875,
      "model_forward_time": 0.11548733711242676,
      "step": 33406
    },
    {
      "epoch": 0.00020389404296875,
      "step": 33406,
      "training_step_time": 0.4031524658203125
    },
    {
      "epoch": 0.000203900146484375,
      "model_forward_time": 0.1157376766204834,
      "step": 33407
    },
    {
      "epoch": 0.000203900146484375,
      "step": 33407,
      "training_step_time": 0.4016456604003906
    },
    {
      "epoch": 0.00020390625,
      "model_forward_time": 0.11539793014526367,
      "step": 33408
    },
    {
      "epoch": 0.00020390625,
      "step": 33408,
      "training_step_time": 0.45404911041259766
    },
    {
      "epoch": 0.000203912353515625,
      "model_forward_time": 0.11508035659790039,
      "step": 33409
    },
    {
      "epoch": 0.000203912353515625,
      "step": 33409,
      "training_step_time": 0.45549869537353516
    },
    {
      "epoch": 0.00020391845703125,
      "grad_norm": 0.14564459025859833,
      "learning_rate": 4.474617076621272e-05,
      "loss": 0.0403,
      "step": 33410
    },
    {
      "epoch": 0.00020391845703125,
      "model_forward_time": 0.11562943458557129,
      "step": 33410
    },
    {
      "epoch": 0.00020391845703125,
      "step": 33410,
      "training_step_time": 0.46387290954589844
    },
    {
      "epoch": 0.000203924560546875,
      "model_forward_time": 0.1153557300567627,
      "step": 33411
    },
    {
      "epoch": 0.000203924560546875,
      "step": 33411,
      "training_step_time": 0.809319019317627
    },
    {
      "epoch": 0.0002039306640625,
      "model_forward_time": 0.11464643478393555,
      "step": 33412
    },
    {
      "epoch": 0.0002039306640625,
      "step": 33412,
      "training_step_time": 0.38184237480163574
    },
    {
      "epoch": 0.000203936767578125,
      "model_forward_time": 0.11461949348449707,
      "step": 33413
    },
    {
      "epoch": 0.000203936767578125,
      "step": 33413,
      "training_step_time": 0.39734601974487305
    },
    {
      "epoch": 0.00020394287109375,
      "model_forward_time": 0.11463546752929688,
      "step": 33414
    },
    {
      "epoch": 0.00020394287109375,
      "step": 33414,
      "training_step_time": 0.3904116153717041
    },
    {
      "epoch": 0.000203948974609375,
      "model_forward_time": 0.11443614959716797,
      "step": 33415
    },
    {
      "epoch": 0.000203948974609375,
      "step": 33415,
      "training_step_time": 0.398784875869751
    },
    {
      "epoch": 0.000203955078125,
      "model_forward_time": 0.11471343040466309,
      "step": 33416
    },
    {
      "epoch": 0.000203955078125,
      "step": 33416,
      "training_step_time": 0.4940602779388428
    },
    {
      "epoch": 0.000203961181640625,
      "model_forward_time": 0.11497092247009277,
      "step": 33417
    },
    {
      "epoch": 0.000203961181640625,
      "step": 33417,
      "training_step_time": 0.7711477279663086
    },
    {
      "epoch": 0.00020396728515625,
      "model_forward_time": 0.11406970024108887,
      "step": 33418
    },
    {
      "epoch": 0.00020396728515625,
      "step": 33418,
      "training_step_time": 0.4367983341217041
    },
    {
      "epoch": 0.000203973388671875,
      "model_forward_time": 0.11445355415344238,
      "step": 33419
    },
    {
      "epoch": 0.000203973388671875,
      "step": 33419,
      "training_step_time": 0.3815133571624756
    },
    {
      "epoch": 0.0002039794921875,
      "grad_norm": 0.17754274606704712,
      "learning_rate": 4.471876629178273e-05,
      "loss": 0.0414,
      "step": 33420
    },
    {
      "epoch": 0.0002039794921875,
      "model_forward_time": 0.11393404006958008,
      "step": 33420
    },
    {
      "epoch": 0.0002039794921875,
      "step": 33420,
      "training_step_time": 0.3831183910369873
    },
    {
      "epoch": 0.000203985595703125,
      "model_forward_time": 0.11462783813476562,
      "step": 33421
    },
    {
      "epoch": 0.000203985595703125,
      "step": 33421,
      "training_step_time": 0.47415781021118164
    },
    {
      "epoch": 0.00020399169921875,
      "model_forward_time": 0.11478972434997559,
      "step": 33422
    },
    {
      "epoch": 0.00020399169921875,
      "step": 33422,
      "training_step_time": 0.5004739761352539
    },
    {
      "epoch": 0.000203997802734375,
      "model_forward_time": 0.11543655395507812,
      "step": 33423
    },
    {
      "epoch": 0.000203997802734375,
      "step": 33423,
      "training_step_time": 0.4741077423095703
    },
    {
      "epoch": 0.00020400390625,
      "model_forward_time": 0.11478710174560547,
      "step": 33424
    },
    {
      "epoch": 0.00020400390625,
      "step": 33424,
      "training_step_time": 0.39474034309387207
    },
    {
      "epoch": 0.000204010009765625,
      "model_forward_time": 0.11485862731933594,
      "step": 33425
    },
    {
      "epoch": 0.000204010009765625,
      "step": 33425,
      "training_step_time": 0.3886275291442871
    },
    {
      "epoch": 0.00020401611328125,
      "model_forward_time": 0.11518692970275879,
      "step": 33426
    },
    {
      "epoch": 0.00020401611328125,
      "step": 33426,
      "training_step_time": 0.39399242401123047
    },
    {
      "epoch": 0.000204022216796875,
      "model_forward_time": 0.11526846885681152,
      "step": 33427
    },
    {
      "epoch": 0.000204022216796875,
      "step": 33427,
      "training_step_time": 0.39875268936157227
    },
    {
      "epoch": 0.0002040283203125,
      "model_forward_time": 0.11544609069824219,
      "step": 33428
    },
    {
      "epoch": 0.0002040283203125,
      "step": 33428,
      "training_step_time": 0.46558189392089844
    },
    {
      "epoch": 0.000204034423828125,
      "model_forward_time": 0.11501336097717285,
      "step": 33429
    },
    {
      "epoch": 0.000204034423828125,
      "step": 33429,
      "training_step_time": 0.6344177722930908
    },
    {
      "epoch": 0.00020404052734375,
      "grad_norm": 0.1226801946759224,
      "learning_rate": 4.469136342165207e-05,
      "loss": 0.0407,
      "step": 33430
    },
    {
      "epoch": 0.00020404052734375,
      "model_forward_time": 0.11490726470947266,
      "step": 33430
    },
    {
      "epoch": 0.00020404052734375,
      "step": 33430,
      "training_step_time": 0.5077192783355713
    },
    {
      "epoch": 0.000204046630859375,
      "model_forward_time": 0.1143026351928711,
      "step": 33431
    },
    {
      "epoch": 0.000204046630859375,
      "step": 33431,
      "training_step_time": 0.42007994651794434
    },
    {
      "epoch": 0.000204052734375,
      "model_forward_time": 0.11455893516540527,
      "step": 33432
    },
    {
      "epoch": 0.000204052734375,
      "step": 33432,
      "training_step_time": 0.4728872776031494
    },
    {
      "epoch": 0.000204058837890625,
      "model_forward_time": 0.11423540115356445,
      "step": 33433
    },
    {
      "epoch": 0.000204058837890625,
      "step": 33433,
      "training_step_time": 0.4008979797363281
    },
    {
      "epoch": 0.00020406494140625,
      "model_forward_time": 0.1148526668548584,
      "step": 33434
    },
    {
      "epoch": 0.00020406494140625,
      "step": 33434,
      "training_step_time": 0.39042019844055176
    },
    {
      "epoch": 0.000204071044921875,
      "model_forward_time": 0.11440253257751465,
      "step": 33435
    },
    {
      "epoch": 0.000204071044921875,
      "step": 33435,
      "training_step_time": 0.5623342990875244
    },
    {
      "epoch": 0.0002040771484375,
      "model_forward_time": 0.11503148078918457,
      "step": 33436
    },
    {
      "epoch": 0.0002040771484375,
      "step": 33436,
      "training_step_time": 0.4052143096923828
    },
    {
      "epoch": 0.000204083251953125,
      "model_forward_time": 0.11469078063964844,
      "step": 33437
    },
    {
      "epoch": 0.000204083251953125,
      "step": 33437,
      "training_step_time": 0.455646276473999
    },
    {
      "epoch": 0.00020408935546875,
      "model_forward_time": 0.11571216583251953,
      "step": 33438
    },
    {
      "epoch": 0.00020408935546875,
      "step": 33438,
      "training_step_time": 0.39003467559814453
    },
    {
      "epoch": 0.000204095458984375,
      "model_forward_time": 0.11452531814575195,
      "step": 33439
    },
    {
      "epoch": 0.000204095458984375,
      "step": 33439,
      "training_step_time": 0.3955981731414795
    },
    {
      "epoch": 0.0002041015625,
      "grad_norm": 0.14169418811798096,
      "learning_rate": 4.4663962164145045e-05,
      "loss": 0.038,
      "step": 33440
    },
    {
      "epoch": 0.0002041015625,
      "model_forward_time": 0.11467432975769043,
      "step": 33440
    },
    {
      "epoch": 0.0002041015625,
      "step": 33440,
      "training_step_time": 0.410031795501709
    },
    {
      "epoch": 0.000204107666015625,
      "model_forward_time": 0.1151890754699707,
      "step": 33441
    },
    {
      "epoch": 0.000204107666015625,
      "step": 33441,
      "training_step_time": 0.46777939796447754
    },
    {
      "epoch": 0.00020411376953125,
      "model_forward_time": 0.11555314064025879,
      "step": 33442
    },
    {
      "epoch": 0.00020411376953125,
      "step": 33442,
      "training_step_time": 0.45430445671081543
    },
    {
      "epoch": 0.000204119873046875,
      "model_forward_time": 0.11499929428100586,
      "step": 33443
    },
    {
      "epoch": 0.000204119873046875,
      "step": 33443,
      "training_step_time": 0.39180946350097656
    },
    {
      "epoch": 0.0002041259765625,
      "model_forward_time": 0.1151895523071289,
      "step": 33444
    },
    {
      "epoch": 0.0002041259765625,
      "step": 33444,
      "training_step_time": 0.49292707443237305
    },
    {
      "epoch": 0.000204132080078125,
      "model_forward_time": 0.1152184009552002,
      "step": 33445
    },
    {
      "epoch": 0.000204132080078125,
      "step": 33445,
      "training_step_time": 0.4352245330810547
    },
    {
      "epoch": 0.00020413818359375,
      "model_forward_time": 0.11453938484191895,
      "step": 33446
    },
    {
      "epoch": 0.00020413818359375,
      "step": 33446,
      "training_step_time": 0.4991922378540039
    },
    {
      "epoch": 0.000204144287109375,
      "model_forward_time": 0.11468076705932617,
      "step": 33447
    },
    {
      "epoch": 0.000204144287109375,
      "step": 33447,
      "training_step_time": 0.4391007423400879
    },
    {
      "epoch": 0.000204150390625,
      "model_forward_time": 0.11519479751586914,
      "step": 33448
    },
    {
      "epoch": 0.000204150390625,
      "step": 33448,
      "training_step_time": 0.3912334442138672
    },
    {
      "epoch": 0.000204156494140625,
      "model_forward_time": 0.11485075950622559,
      "step": 33449
    },
    {
      "epoch": 0.000204156494140625,
      "step": 33449,
      "training_step_time": 0.4496462345123291
    },
    {
      "epoch": 0.00020416259765625,
      "grad_norm": 0.12738867104053497,
      "learning_rate": 4.463656252758542e-05,
      "loss": 0.0364,
      "step": 33450
    },
    {
      "epoch": 0.00020416259765625,
      "model_forward_time": 0.1150214672088623,
      "step": 33450
    },
    {
      "epoch": 0.00020416259765625,
      "step": 33450,
      "training_step_time": 0.48280763626098633
    },
    {
      "epoch": 0.000204168701171875,
      "model_forward_time": 0.11443829536437988,
      "step": 33451
    },
    {
      "epoch": 0.000204168701171875,
      "step": 33451,
      "training_step_time": 0.39855074882507324
    },
    {
      "epoch": 0.0002041748046875,
      "model_forward_time": 0.11645150184631348,
      "step": 33452
    },
    {
      "epoch": 0.0002041748046875,
      "step": 33452,
      "training_step_time": 0.3871035575866699
    },
    {
      "epoch": 0.000204180908203125,
      "model_forward_time": 0.11504483222961426,
      "step": 33453
    },
    {
      "epoch": 0.000204180908203125,
      "step": 33453,
      "training_step_time": 0.44068217277526855
    },
    {
      "epoch": 0.00020418701171875,
      "model_forward_time": 0.11551451683044434,
      "step": 33454
    },
    {
      "epoch": 0.00020418701171875,
      "step": 33454,
      "training_step_time": 0.39824914932250977
    },
    {
      "epoch": 0.000204193115234375,
      "model_forward_time": 0.11490154266357422,
      "step": 33455
    },
    {
      "epoch": 0.000204193115234375,
      "step": 33455,
      "training_step_time": 0.38841676712036133
    },
    {
      "epoch": 0.00020419921875,
      "model_forward_time": 0.11671805381774902,
      "step": 33456
    },
    {
      "epoch": 0.00020419921875,
      "step": 33456,
      "training_step_time": 0.46644020080566406
    },
    {
      "epoch": 0.000204205322265625,
      "model_forward_time": 0.11478185653686523,
      "step": 33457
    },
    {
      "epoch": 0.000204205322265625,
      "step": 33457,
      "training_step_time": 0.5021178722381592
    },
    {
      "epoch": 0.00020421142578125,
      "model_forward_time": 0.11501765251159668,
      "step": 33458
    },
    {
      "epoch": 0.00020421142578125,
      "step": 33458,
      "training_step_time": 0.46970295906066895
    },
    {
      "epoch": 0.000204217529296875,
      "model_forward_time": 0.11546587944030762,
      "step": 33459
    },
    {
      "epoch": 0.000204217529296875,
      "step": 33459,
      "training_step_time": 0.5095131397247314
    },
    {
      "epoch": 0.0002042236328125,
      "grad_norm": 0.10974394530057907,
      "learning_rate": 4.46091645202965e-05,
      "loss": 0.0436,
      "step": 33460
    },
    {
      "epoch": 0.0002042236328125,
      "model_forward_time": 0.11435413360595703,
      "step": 33460
    },
    {
      "epoch": 0.0002042236328125,
      "step": 33460,
      "training_step_time": 0.4205315113067627
    },
    {
      "epoch": 0.000204229736328125,
      "model_forward_time": 0.11479687690734863,
      "step": 33461
    },
    {
      "epoch": 0.000204229736328125,
      "step": 33461,
      "training_step_time": 0.3883943557739258
    },
    {
      "epoch": 0.00020423583984375,
      "model_forward_time": 0.11458277702331543,
      "step": 33462
    },
    {
      "epoch": 0.00020423583984375,
      "step": 33462,
      "training_step_time": 0.39980292320251465
    },
    {
      "epoch": 0.000204241943359375,
      "model_forward_time": 0.1160125732421875,
      "step": 33463
    },
    {
      "epoch": 0.000204241943359375,
      "step": 33463,
      "training_step_time": 0.4193589687347412
    },
    {
      "epoch": 0.000204248046875,
      "model_forward_time": 0.11559176445007324,
      "step": 33464
    },
    {
      "epoch": 0.000204248046875,
      "step": 33464,
      "training_step_time": 0.49126386642456055
    },
    {
      "epoch": 0.000204254150390625,
      "model_forward_time": 0.11631512641906738,
      "step": 33465
    },
    {
      "epoch": 0.000204254150390625,
      "step": 33465,
      "training_step_time": 0.6290538311004639
    },
    {
      "epoch": 0.00020426025390625,
      "model_forward_time": 0.11442804336547852,
      "step": 33466
    },
    {
      "epoch": 0.00020426025390625,
      "step": 33466,
      "training_step_time": 0.38927388191223145
    },
    {
      "epoch": 0.000204266357421875,
      "model_forward_time": 0.11471343040466309,
      "step": 33467
    },
    {
      "epoch": 0.000204266357421875,
      "step": 33467,
      "training_step_time": 0.43324804306030273
    },
    {
      "epoch": 0.0002042724609375,
      "model_forward_time": 0.11452221870422363,
      "step": 33468
    },
    {
      "epoch": 0.0002042724609375,
      "step": 33468,
      "training_step_time": 0.40131354331970215
    },
    {
      "epoch": 0.000204278564453125,
      "model_forward_time": 0.11526632308959961,
      "step": 33469
    },
    {
      "epoch": 0.000204278564453125,
      "step": 33469,
      "training_step_time": 0.39456748962402344
    },
    {
      "epoch": 0.00020428466796875,
      "grad_norm": 0.13464336097240448,
      "learning_rate": 4.4581768150601055e-05,
      "loss": 0.0391,
      "step": 33470
    },
    {
      "epoch": 0.00020428466796875,
      "model_forward_time": 0.1149446964263916,
      "step": 33470
    },
    {
      "epoch": 0.00020428466796875,
      "step": 33470,
      "training_step_time": 0.4400665760040283
    },
    {
      "epoch": 0.000204290771484375,
      "model_forward_time": 0.11478638648986816,
      "step": 33471
    },
    {
      "epoch": 0.000204290771484375,
      "step": 33471,
      "training_step_time": 0.4435245990753174
    },
    {
      "epoch": 0.000204296875,
      "model_forward_time": 0.11513590812683105,
      "step": 33472
    },
    {
      "epoch": 0.000204296875,
      "step": 33472,
      "training_step_time": 0.41646671295166016
    },
    {
      "epoch": 0.000204302978515625,
      "model_forward_time": 0.11470174789428711,
      "step": 33473
    },
    {
      "epoch": 0.000204302978515625,
      "step": 33473,
      "training_step_time": 0.42778611183166504
    },
    {
      "epoch": 0.00020430908203125,
      "model_forward_time": 0.11478519439697266,
      "step": 33474
    },
    {
      "epoch": 0.00020430908203125,
      "step": 33474,
      "training_step_time": 0.4746537208557129
    },
    {
      "epoch": 0.000204315185546875,
      "model_forward_time": 0.1152644157409668,
      "step": 33475
    },
    {
      "epoch": 0.000204315185546875,
      "step": 33475,
      "training_step_time": 0.3941347599029541
    },
    {
      "epoch": 0.0002043212890625,
      "model_forward_time": 0.1149284839630127,
      "step": 33476
    },
    {
      "epoch": 0.0002043212890625,
      "step": 33476,
      "training_step_time": 0.3899552822113037
    },
    {
      "epoch": 0.000204327392578125,
      "model_forward_time": 0.1147468090057373,
      "step": 33477
    },
    {
      "epoch": 0.000204327392578125,
      "step": 33477,
      "training_step_time": 0.5667603015899658
    },
    {
      "epoch": 0.00020433349609375,
      "model_forward_time": 0.11675786972045898,
      "step": 33478
    },
    {
      "epoch": 0.00020433349609375,
      "step": 33478,
      "training_step_time": 0.4346137046813965
    },
    {
      "epoch": 0.000204339599609375,
      "model_forward_time": 0.11528229713439941,
      "step": 33479
    },
    {
      "epoch": 0.000204339599609375,
      "step": 33479,
      "training_step_time": 0.4255356788635254
    },
    {
      "epoch": 0.000204345703125,
      "grad_norm": 0.11082686483860016,
      "learning_rate": 4.4554373426821374e-05,
      "loss": 0.0383,
      "step": 33480
    },
    {
      "epoch": 0.000204345703125,
      "model_forward_time": 0.11442732810974121,
      "step": 33480
    },
    {
      "epoch": 0.000204345703125,
      "step": 33480,
      "training_step_time": 0.38918614387512207
    },
    {
      "epoch": 0.000204351806640625,
      "model_forward_time": 0.11500358581542969,
      "step": 33481
    },
    {
      "epoch": 0.000204351806640625,
      "step": 33481,
      "training_step_time": 0.45293450355529785
    },
    {
      "epoch": 0.00020435791015625,
      "model_forward_time": 0.11497640609741211,
      "step": 33482
    },
    {
      "epoch": 0.00020435791015625,
      "step": 33482,
      "training_step_time": 0.39406442642211914
    },
    {
      "epoch": 0.000204364013671875,
      "model_forward_time": 0.1152803897857666,
      "step": 33483
    },
    {
      "epoch": 0.000204364013671875,
      "step": 33483,
      "training_step_time": 0.60390305519104
    },
    {
      "epoch": 0.0002043701171875,
      "model_forward_time": 0.11419296264648438,
      "step": 33484
    },
    {
      "epoch": 0.0002043701171875,
      "step": 33484,
      "training_step_time": 0.39742422103881836
    },
    {
      "epoch": 0.000204376220703125,
      "model_forward_time": 0.1144721508026123,
      "step": 33485
    },
    {
      "epoch": 0.000204376220703125,
      "step": 33485,
      "training_step_time": 0.4664287567138672
    },
    {
      "epoch": 0.00020438232421875,
      "model_forward_time": 0.11481046676635742,
      "step": 33486
    },
    {
      "epoch": 0.00020438232421875,
      "step": 33486,
      "training_step_time": 0.3891603946685791
    },
    {
      "epoch": 0.000204388427734375,
      "model_forward_time": 0.1143958568572998,
      "step": 33487
    },
    {
      "epoch": 0.000204388427734375,
      "step": 33487,
      "training_step_time": 0.42874789237976074
    },
    {
      "epoch": 0.00020439453125,
      "model_forward_time": 0.11528348922729492,
      "step": 33488
    },
    {
      "epoch": 0.00020439453125,
      "step": 33488,
      "training_step_time": 0.46416711807250977
    },
    {
      "epoch": 0.000204400634765625,
      "model_forward_time": 0.11478281021118164,
      "step": 33489
    },
    {
      "epoch": 0.000204400634765625,
      "step": 33489,
      "training_step_time": 0.39357542991638184
    },
    {
      "epoch": 0.00020440673828125,
      "grad_norm": 0.13000041246414185,
      "learning_rate": 4.452698035727929e-05,
      "loss": 0.0413,
      "step": 33490
    },
    {
      "epoch": 0.00020440673828125,
      "model_forward_time": 0.11470150947570801,
      "step": 33490
    },
    {
      "epoch": 0.00020440673828125,
      "step": 33490,
      "training_step_time": 0.396761417388916
    },
    {
      "epoch": 0.000204412841796875,
      "model_forward_time": 0.11524748802185059,
      "step": 33491
    },
    {
      "epoch": 0.000204412841796875,
      "step": 33491,
      "training_step_time": 0.47225022315979004
    },
    {
      "epoch": 0.0002044189453125,
      "model_forward_time": 0.11542463302612305,
      "step": 33492
    },
    {
      "epoch": 0.0002044189453125,
      "step": 33492,
      "training_step_time": 0.49663281440734863
    },
    {
      "epoch": 0.000204425048828125,
      "model_forward_time": 0.11600804328918457,
      "step": 33493
    },
    {
      "epoch": 0.000204425048828125,
      "step": 33493,
      "training_step_time": 0.45481157302856445
    },
    {
      "epoch": 0.00020443115234375,
      "model_forward_time": 0.11536765098571777,
      "step": 33494
    },
    {
      "epoch": 0.00020443115234375,
      "step": 33494,
      "training_step_time": 0.390871524810791
    },
    {
      "epoch": 0.000204437255859375,
      "model_forward_time": 0.11430501937866211,
      "step": 33495
    },
    {
      "epoch": 0.000204437255859375,
      "step": 33495,
      "training_step_time": 0.38678646087646484
    },
    {
      "epoch": 0.000204443359375,
      "model_forward_time": 0.11528229713439941,
      "step": 33496
    },
    {
      "epoch": 0.000204443359375,
      "step": 33496,
      "training_step_time": 0.3881397247314453
    },
    {
      "epoch": 0.000204449462890625,
      "model_forward_time": 0.11505532264709473,
      "step": 33497
    },
    {
      "epoch": 0.000204449462890625,
      "step": 33497,
      "training_step_time": 0.42006611824035645
    },
    {
      "epoch": 0.00020445556640625,
      "model_forward_time": 0.11570358276367188,
      "step": 33498
    },
    {
      "epoch": 0.00020445556640625,
      "step": 33498,
      "training_step_time": 0.3967628479003906
    },
    {
      "epoch": 0.000204461669921875,
      "model_forward_time": 0.1146087646484375,
      "step": 33499
    },
    {
      "epoch": 0.000204461669921875,
      "step": 33499,
      "training_step_time": 0.4318265914916992
    },
    {
      "epoch": 0.0002044677734375,
      "grad_norm": 0.19597409665584564,
      "learning_rate": 4.449958895029604e-05,
      "loss": 0.0445,
      "step": 33500
    },
    {
      "epoch": 0.0002044677734375,
      "model_forward_time": 0.11630558967590332,
      "step": 33500
    },
    {
      "epoch": 0.0002044677734375,
      "step": 33500,
      "training_step_time": 0.3897554874420166
    },
    {
      "epoch": 0.000204473876953125,
      "model_forward_time": 0.11588120460510254,
      "step": 33501
    },
    {
      "epoch": 0.000204473876953125,
      "step": 33501,
      "training_step_time": 0.4974539279937744
    },
    {
      "epoch": 0.00020447998046875,
      "model_forward_time": 0.11548137664794922,
      "step": 33502
    },
    {
      "epoch": 0.00020447998046875,
      "step": 33502,
      "training_step_time": 0.4616532325744629
    },
    {
      "epoch": 0.000204486083984375,
      "model_forward_time": 0.11544179916381836,
      "step": 33503
    },
    {
      "epoch": 0.000204486083984375,
      "step": 33503,
      "training_step_time": 0.3993830680847168
    },
    {
      "epoch": 0.0002044921875,
      "model_forward_time": 0.11450028419494629,
      "step": 33504
    },
    {
      "epoch": 0.0002044921875,
      "step": 33504,
      "training_step_time": 0.3910708427429199
    },
    {
      "epoch": 0.000204498291015625,
      "model_forward_time": 0.11598515510559082,
      "step": 33505
    },
    {
      "epoch": 0.000204498291015625,
      "step": 33505,
      "training_step_time": 0.4017143249511719
    },
    {
      "epoch": 0.00020450439453125,
      "model_forward_time": 0.11544489860534668,
      "step": 33506
    },
    {
      "epoch": 0.00020450439453125,
      "step": 33506,
      "training_step_time": 0.4777638912200928
    },
    {
      "epoch": 0.000204510498046875,
      "model_forward_time": 0.11537885665893555,
      "step": 33507
    },
    {
      "epoch": 0.000204510498046875,
      "step": 33507,
      "training_step_time": 0.4172959327697754
    },
    {
      "epoch": 0.0002045166015625,
      "model_forward_time": 0.11465692520141602,
      "step": 33508
    },
    {
      "epoch": 0.0002045166015625,
      "step": 33508,
      "training_step_time": 0.4090540409088135
    },
    {
      "epoch": 0.000204522705078125,
      "model_forward_time": 0.11519742012023926,
      "step": 33509
    },
    {
      "epoch": 0.000204522705078125,
      "step": 33509,
      "training_step_time": 0.3974428176879883
    },
    {
      "epoch": 0.00020452880859375,
      "grad_norm": 0.11240582913160324,
      "learning_rate": 4.447219921419244e-05,
      "loss": 0.0427,
      "step": 33510
    },
    {
      "epoch": 0.00020452880859375,
      "model_forward_time": 0.11462950706481934,
      "step": 33510
    },
    {
      "epoch": 0.00020452880859375,
      "step": 33510,
      "training_step_time": 0.40129709243774414
    },
    {
      "epoch": 0.000204534912109375,
      "model_forward_time": 0.11512255668640137,
      "step": 33511
    },
    {
      "epoch": 0.000204534912109375,
      "step": 33511,
      "training_step_time": 0.39586663246154785
    },
    {
      "epoch": 0.000204541015625,
      "model_forward_time": 0.11519503593444824,
      "step": 33512
    },
    {
      "epoch": 0.000204541015625,
      "step": 33512,
      "training_step_time": 0.468808650970459
    },
    {
      "epoch": 0.000204547119140625,
      "model_forward_time": 0.11481428146362305,
      "step": 33513
    },
    {
      "epoch": 0.000204547119140625,
      "step": 33513,
      "training_step_time": 0.39056944847106934
    },
    {
      "epoch": 0.00020455322265625,
      "model_forward_time": 0.11486506462097168,
      "step": 33514
    },
    {
      "epoch": 0.00020455322265625,
      "step": 33514,
      "training_step_time": 0.4147617816925049
    },
    {
      "epoch": 0.000204559326171875,
      "model_forward_time": 0.11485862731933594,
      "step": 33515
    },
    {
      "epoch": 0.000204559326171875,
      "step": 33515,
      "training_step_time": 0.4176595211029053
    },
    {
      "epoch": 0.0002045654296875,
      "model_forward_time": 0.11631131172180176,
      "step": 33516
    },
    {
      "epoch": 0.0002045654296875,
      "step": 33516,
      "training_step_time": 0.3955070972442627
    },
    {
      "epoch": 0.000204571533203125,
      "model_forward_time": 0.11493539810180664,
      "step": 33517
    },
    {
      "epoch": 0.000204571533203125,
      "step": 33517,
      "training_step_time": 0.46396493911743164
    },
    {
      "epoch": 0.00020457763671875,
      "model_forward_time": 0.1151275634765625,
      "step": 33518
    },
    {
      "epoch": 0.00020457763671875,
      "step": 33518,
      "training_step_time": 0.3885800838470459
    },
    {
      "epoch": 0.000204583740234375,
      "model_forward_time": 0.11543107032775879,
      "step": 33519
    },
    {
      "epoch": 0.000204583740234375,
      "step": 33519,
      "training_step_time": 0.683629035949707
    },
    {
      "epoch": 0.00020458984375,
      "grad_norm": 0.11025052517652512,
      "learning_rate": 4.444481115728878e-05,
      "loss": 0.0384,
      "step": 33520
    },
    {
      "epoch": 0.00020458984375,
      "model_forward_time": 0.11438488960266113,
      "step": 33520
    },
    {
      "epoch": 0.00020458984375,
      "step": 33520,
      "training_step_time": 0.45493030548095703
    },
    {
      "epoch": 0.000204595947265625,
      "model_forward_time": 0.11487197875976562,
      "step": 33521
    },
    {
      "epoch": 0.000204595947265625,
      "step": 33521,
      "training_step_time": 0.44825029373168945
    },
    {
      "epoch": 0.00020460205078125,
      "model_forward_time": 0.11486244201660156,
      "step": 33522
    },
    {
      "epoch": 0.00020460205078125,
      "step": 33522,
      "training_step_time": 0.38548922538757324
    },
    {
      "epoch": 0.000204608154296875,
      "model_forward_time": 0.11407661437988281,
      "step": 33523
    },
    {
      "epoch": 0.000204608154296875,
      "step": 33523,
      "training_step_time": 0.3818049430847168
    },
    {
      "epoch": 0.0002046142578125,
      "model_forward_time": 0.11487007141113281,
      "step": 33524
    },
    {
      "epoch": 0.0002046142578125,
      "step": 33524,
      "training_step_time": 0.3901975154876709
    },
    {
      "epoch": 0.000204620361328125,
      "model_forward_time": 0.11567974090576172,
      "step": 33525
    },
    {
      "epoch": 0.000204620361328125,
      "step": 33525,
      "training_step_time": 0.8452751636505127
    },
    {
      "epoch": 0.00020462646484375,
      "model_forward_time": 0.11438655853271484,
      "step": 33526
    },
    {
      "epoch": 0.00020462646484375,
      "step": 33526,
      "training_step_time": 0.4063713550567627
    },
    {
      "epoch": 0.000204632568359375,
      "model_forward_time": 0.11397242546081543,
      "step": 33527
    },
    {
      "epoch": 0.000204632568359375,
      "step": 33527,
      "training_step_time": 0.49039387702941895
    },
    {
      "epoch": 0.000204638671875,
      "model_forward_time": 0.11477899551391602,
      "step": 33528
    },
    {
      "epoch": 0.000204638671875,
      "step": 33528,
      "training_step_time": 0.44232821464538574
    },
    {
      "epoch": 0.000204644775390625,
      "model_forward_time": 0.11394786834716797,
      "step": 33529
    },
    {
      "epoch": 0.000204644775390625,
      "step": 33529,
      "training_step_time": 0.4226226806640625
    },
    {
      "epoch": 0.00020465087890625,
      "grad_norm": 0.1500457227230072,
      "learning_rate": 4.441742478790481e-05,
      "loss": 0.0413,
      "step": 33530
    },
    {
      "epoch": 0.00020465087890625,
      "model_forward_time": 0.1141810417175293,
      "step": 33530
    },
    {
      "epoch": 0.00020465087890625,
      "step": 33530,
      "training_step_time": 0.3897864818572998
    },
    {
      "epoch": 0.000204656982421875,
      "model_forward_time": 0.1148831844329834,
      "step": 33531
    },
    {
      "epoch": 0.000204656982421875,
      "step": 33531,
      "training_step_time": 0.683875560760498
    },
    {
      "epoch": 0.0002046630859375,
      "model_forward_time": 0.11420249938964844,
      "step": 33532
    },
    {
      "epoch": 0.0002046630859375,
      "step": 33532,
      "training_step_time": 0.3620636463165283
    },
    {
      "epoch": 0.000204669189453125,
      "model_forward_time": 0.11476993560791016,
      "step": 33533
    },
    {
      "epoch": 0.000204669189453125,
      "step": 33533,
      "training_step_time": 0.4316747188568115
    },
    {
      "epoch": 0.00020467529296875,
      "model_forward_time": 0.11459112167358398,
      "step": 33534
    },
    {
      "epoch": 0.00020467529296875,
      "step": 33534,
      "training_step_time": 0.39220714569091797
    },
    {
      "epoch": 0.000204681396484375,
      "model_forward_time": 0.11417937278747559,
      "step": 33535
    },
    {
      "epoch": 0.000204681396484375,
      "step": 33535,
      "training_step_time": 0.3877387046813965
    },
    {
      "epoch": 0.0002046875,
      "model_forward_time": 0.11449909210205078,
      "step": 33536
    },
    {
      "epoch": 0.0002046875,
      "step": 33536,
      "training_step_time": 0.3864157199859619
    },
    {
      "epoch": 0.000204693603515625,
      "model_forward_time": 0.11714339256286621,
      "step": 33537
    },
    {
      "epoch": 0.000204693603515625,
      "step": 33537,
      "training_step_time": 0.8227295875549316
    },
    {
      "epoch": 0.00020469970703125,
      "model_forward_time": 0.11378145217895508,
      "step": 33538
    },
    {
      "epoch": 0.00020469970703125,
      "step": 33538,
      "training_step_time": 0.39583396911621094
    },
    {
      "epoch": 0.000204705810546875,
      "model_forward_time": 0.1141657829284668,
      "step": 33539
    },
    {
      "epoch": 0.000204705810546875,
      "step": 33539,
      "training_step_time": 0.45371127128601074
    },
    {
      "epoch": 0.0002047119140625,
      "grad_norm": 0.08604619652032852,
      "learning_rate": 4.439004011435979e-05,
      "loss": 0.0393,
      "step": 33540
    },
    {
      "epoch": 0.0002047119140625,
      "model_forward_time": 0.11450433731079102,
      "step": 33540
    },
    {
      "epoch": 0.0002047119140625,
      "step": 33540,
      "training_step_time": 0.4145834445953369
    },
    {
      "epoch": 0.000204718017578125,
      "model_forward_time": 0.11466741561889648,
      "step": 33541
    },
    {
      "epoch": 0.000204718017578125,
      "step": 33541,
      "training_step_time": 0.3850066661834717
    },
    {
      "epoch": 0.00020472412109375,
      "model_forward_time": 0.11440062522888184,
      "step": 33542
    },
    {
      "epoch": 0.00020472412109375,
      "step": 33542,
      "training_step_time": 0.42800259590148926
    },
    {
      "epoch": 0.000204730224609375,
      "model_forward_time": 0.11465001106262207,
      "step": 33543
    },
    {
      "epoch": 0.000204730224609375,
      "step": 33543,
      "training_step_time": 0.6377298831939697
    },
    {
      "epoch": 0.000204736328125,
      "model_forward_time": 0.11444258689880371,
      "step": 33544
    },
    {
      "epoch": 0.000204736328125,
      "step": 33544,
      "training_step_time": 0.3884854316711426
    },
    {
      "epoch": 0.000204742431640625,
      "model_forward_time": 0.11468887329101562,
      "step": 33545
    },
    {
      "epoch": 0.000204742431640625,
      "step": 33545,
      "training_step_time": 0.4211115837097168
    },
    {
      "epoch": 0.00020474853515625,
      "model_forward_time": 0.11478996276855469,
      "step": 33546
    },
    {
      "epoch": 0.00020474853515625,
      "step": 33546,
      "training_step_time": 0.521367073059082
    },
    {
      "epoch": 0.000204754638671875,
      "model_forward_time": 0.11458897590637207,
      "step": 33547
    },
    {
      "epoch": 0.000204754638671875,
      "step": 33547,
      "training_step_time": 0.474501371383667
    },
    {
      "epoch": 0.0002047607421875,
      "model_forward_time": 0.11478400230407715,
      "step": 33548
    },
    {
      "epoch": 0.0002047607421875,
      "step": 33548,
      "training_step_time": 0.3851895332336426
    },
    {
      "epoch": 0.000204766845703125,
      "model_forward_time": 0.11397862434387207,
      "step": 33549
    },
    {
      "epoch": 0.000204766845703125,
      "step": 33549,
      "training_step_time": 0.6244850158691406
    },
    {
      "epoch": 0.00020477294921875,
      "grad_norm": 0.15250767767429352,
      "learning_rate": 4.436265714497245e-05,
      "loss": 0.0408,
      "step": 33550
    },
    {
      "epoch": 0.00020477294921875,
      "model_forward_time": 0.11462879180908203,
      "step": 33550
    },
    {
      "epoch": 0.00020477294921875,
      "step": 33550,
      "training_step_time": 0.3965766429901123
    },
    {
      "epoch": 0.000204779052734375,
      "model_forward_time": 0.11439919471740723,
      "step": 33551
    },
    {
      "epoch": 0.000204779052734375,
      "step": 33551,
      "training_step_time": 0.3940773010253906
    },
    {
      "epoch": 0.00020478515625,
      "model_forward_time": 0.11469054222106934,
      "step": 33552
    },
    {
      "epoch": 0.00020478515625,
      "step": 33552,
      "training_step_time": 0.4357473850250244
    },
    {
      "epoch": 0.000204791259765625,
      "model_forward_time": 0.11563777923583984,
      "step": 33553
    },
    {
      "epoch": 0.000204791259765625,
      "step": 33553,
      "training_step_time": 0.4796574115753174
    },
    {
      "epoch": 0.00020479736328125,
      "model_forward_time": 0.1145944595336914,
      "step": 33554
    },
    {
      "epoch": 0.00020479736328125,
      "step": 33554,
      "training_step_time": 0.38559722900390625
    },
    {
      "epoch": 0.000204803466796875,
      "model_forward_time": 0.11528158187866211,
      "step": 33555
    },
    {
      "epoch": 0.000204803466796875,
      "step": 33555,
      "training_step_time": 0.5037562847137451
    },
    {
      "epoch": 0.0002048095703125,
      "model_forward_time": 0.11551308631896973,
      "step": 33556
    },
    {
      "epoch": 0.0002048095703125,
      "step": 33556,
      "training_step_time": 0.3950767517089844
    },
    {
      "epoch": 0.000204815673828125,
      "model_forward_time": 0.11488533020019531,
      "step": 33557
    },
    {
      "epoch": 0.000204815673828125,
      "step": 33557,
      "training_step_time": 0.40944910049438477
    },
    {
      "epoch": 0.00020482177734375,
      "model_forward_time": 0.11515593528747559,
      "step": 33558
    },
    {
      "epoch": 0.00020482177734375,
      "step": 33558,
      "training_step_time": 0.46797919273376465
    },
    {
      "epoch": 0.000204827880859375,
      "model_forward_time": 0.11514735221862793,
      "step": 33559
    },
    {
      "epoch": 0.000204827880859375,
      "step": 33559,
      "training_step_time": 0.36834073066711426
    },
    {
      "epoch": 0.000204833984375,
      "grad_norm": 0.10957124084234238,
      "learning_rate": 4.433527588806103e-05,
      "loss": 0.0405,
      "step": 33560
    },
    {
      "epoch": 0.000204833984375,
      "model_forward_time": 0.11454391479492188,
      "step": 33560
    },
    {
      "epoch": 0.000204833984375,
      "step": 33560,
      "training_step_time": 0.44370579719543457
    },
    {
      "epoch": 0.000204840087890625,
      "model_forward_time": 0.1154167652130127,
      "step": 33561
    },
    {
      "epoch": 0.000204840087890625,
      "step": 33561,
      "training_step_time": 0.4030642509460449
    },
    {
      "epoch": 0.00020484619140625,
      "model_forward_time": 0.11485004425048828,
      "step": 33562
    },
    {
      "epoch": 0.00020484619140625,
      "step": 33562,
      "training_step_time": 0.39392638206481934
    },
    {
      "epoch": 0.000204852294921875,
      "model_forward_time": 0.11464095115661621,
      "step": 33563
    },
    {
      "epoch": 0.000204852294921875,
      "step": 33563,
      "training_step_time": 0.4023301601409912
    },
    {
      "epoch": 0.0002048583984375,
      "model_forward_time": 0.11462593078613281,
      "step": 33564
    },
    {
      "epoch": 0.0002048583984375,
      "step": 33564,
      "training_step_time": 0.3947713375091553
    },
    {
      "epoch": 0.000204864501953125,
      "model_forward_time": 0.11611342430114746,
      "step": 33565
    },
    {
      "epoch": 0.000204864501953125,
      "step": 33565,
      "training_step_time": 0.4141068458557129
    },
    {
      "epoch": 0.00020487060546875,
      "model_forward_time": 0.1150963306427002,
      "step": 33566
    },
    {
      "epoch": 0.00020487060546875,
      "step": 33566,
      "training_step_time": 0.3899400234222412
    },
    {
      "epoch": 0.000204876708984375,
      "model_forward_time": 0.11530733108520508,
      "step": 33567
    },
    {
      "epoch": 0.000204876708984375,
      "step": 33567,
      "training_step_time": 0.39012622833251953
    },
    {
      "epoch": 0.0002048828125,
      "model_forward_time": 0.11485433578491211,
      "step": 33568
    },
    {
      "epoch": 0.0002048828125,
      "step": 33568,
      "training_step_time": 0.5070991516113281
    },
    {
      "epoch": 0.000204888916015625,
      "model_forward_time": 0.11592888832092285,
      "step": 33569
    },
    {
      "epoch": 0.000204888916015625,
      "step": 33569,
      "training_step_time": 0.40050363540649414
    },
    {
      "epoch": 0.00020489501953125,
      "grad_norm": 0.08635739237070084,
      "learning_rate": 4.430789635194324e-05,
      "loss": 0.0372,
      "step": 33570
    },
    {
      "epoch": 0.00020489501953125,
      "model_forward_time": 0.11495590209960938,
      "step": 33570
    },
    {
      "epoch": 0.00020489501953125,
      "step": 33570,
      "training_step_time": 0.48618602752685547
    },
    {
      "epoch": 0.000204901123046875,
      "model_forward_time": 0.11479067802429199,
      "step": 33571
    },
    {
      "epoch": 0.000204901123046875,
      "step": 33571,
      "training_step_time": 0.419219970703125
    },
    {
      "epoch": 0.0002049072265625,
      "model_forward_time": 0.11455798149108887,
      "step": 33572
    },
    {
      "epoch": 0.0002049072265625,
      "step": 33572,
      "training_step_time": 0.5025613307952881
    },
    {
      "epoch": 0.000204913330078125,
      "model_forward_time": 0.11480522155761719,
      "step": 33573
    },
    {
      "epoch": 0.000204913330078125,
      "step": 33573,
      "training_step_time": 0.38775038719177246
    },
    {
      "epoch": 0.00020491943359375,
      "model_forward_time": 0.1149442195892334,
      "step": 33574
    },
    {
      "epoch": 0.00020491943359375,
      "step": 33574,
      "training_step_time": 0.4991769790649414
    },
    {
      "epoch": 0.000204925537109375,
      "model_forward_time": 0.1146399974822998,
      "step": 33575
    },
    {
      "epoch": 0.000204925537109375,
      "step": 33575,
      "training_step_time": 0.48523569107055664
    },
    {
      "epoch": 0.000204931640625,
      "model_forward_time": 0.11499643325805664,
      "step": 33576
    },
    {
      "epoch": 0.000204931640625,
      "step": 33576,
      "training_step_time": 0.39194798469543457
    },
    {
      "epoch": 0.000204937744140625,
      "model_forward_time": 0.11486959457397461,
      "step": 33577
    },
    {
      "epoch": 0.000204937744140625,
      "step": 33577,
      "training_step_time": 0.39053869247436523
    },
    {
      "epoch": 0.00020494384765625,
      "model_forward_time": 0.11542987823486328,
      "step": 33578
    },
    {
      "epoch": 0.00020494384765625,
      "step": 33578,
      "training_step_time": 0.3897056579589844
    },
    {
      "epoch": 0.000204949951171875,
      "model_forward_time": 0.11486506462097168,
      "step": 33579
    },
    {
      "epoch": 0.000204949951171875,
      "step": 33579,
      "training_step_time": 0.6379055976867676
    },
    {
      "epoch": 0.0002049560546875,
      "grad_norm": 0.11779601871967316,
      "learning_rate": 4.428051854493623e-05,
      "loss": 0.0403,
      "step": 33580
    },
    {
      "epoch": 0.0002049560546875,
      "model_forward_time": 0.1147308349609375,
      "step": 33580
    },
    {
      "epoch": 0.0002049560546875,
      "step": 33580,
      "training_step_time": 0.4928750991821289
    },
    {
      "epoch": 0.000204962158203125,
      "model_forward_time": 0.11493587493896484,
      "step": 33581
    },
    {
      "epoch": 0.000204962158203125,
      "step": 33581,
      "training_step_time": 0.4093668460845947
    },
    {
      "epoch": 0.00020496826171875,
      "model_forward_time": 0.11478209495544434,
      "step": 33582
    },
    {
      "epoch": 0.00020496826171875,
      "step": 33582,
      "training_step_time": 0.49014902114868164
    },
    {
      "epoch": 0.000204974365234375,
      "model_forward_time": 0.11443138122558594,
      "step": 33583
    },
    {
      "epoch": 0.000204974365234375,
      "step": 33583,
      "training_step_time": 0.3921020030975342
    },
    {
      "epoch": 0.00020498046875,
      "model_forward_time": 0.11443877220153809,
      "step": 33584
    },
    {
      "epoch": 0.00020498046875,
      "step": 33584,
      "training_step_time": 0.3859741687774658
    },
    {
      "epoch": 0.000204986572265625,
      "model_forward_time": 0.1142270565032959,
      "step": 33585
    },
    {
      "epoch": 0.000204986572265625,
      "step": 33585,
      "training_step_time": 0.6639869213104248
    },
    {
      "epoch": 0.00020499267578125,
      "model_forward_time": 0.11445784568786621,
      "step": 33586
    },
    {
      "epoch": 0.00020499267578125,
      "step": 33586,
      "training_step_time": 0.394268274307251
    },
    {
      "epoch": 0.000204998779296875,
      "model_forward_time": 0.11507344245910645,
      "step": 33587
    },
    {
      "epoch": 0.000204998779296875,
      "step": 33587,
      "training_step_time": 0.4051632881164551
    },
    {
      "epoch": 0.0002050048828125,
      "model_forward_time": 0.11411690711975098,
      "step": 33588
    },
    {
      "epoch": 0.0002050048828125,
      "step": 33588,
      "training_step_time": 0.39241695404052734
    },
    {
      "epoch": 0.000205010986328125,
      "model_forward_time": 0.11408472061157227,
      "step": 33589
    },
    {
      "epoch": 0.000205010986328125,
      "step": 33589,
      "training_step_time": 0.47721266746520996
    },
    {
      "epoch": 0.00020501708984375,
      "grad_norm": 0.13987334072589874,
      "learning_rate": 4.425314247535668e-05,
      "loss": 0.0438,
      "step": 33590
    },
    {
      "epoch": 0.00020501708984375,
      "model_forward_time": 0.11458730697631836,
      "step": 33590
    },
    {
      "epoch": 0.00020501708984375,
      "step": 33590,
      "training_step_time": 0.38718271255493164
    },
    {
      "epoch": 0.000205023193359375,
      "model_forward_time": 0.1161956787109375,
      "step": 33591
    },
    {
      "epoch": 0.000205023193359375,
      "step": 33591,
      "training_step_time": 0.9732840061187744
    },
    {
      "epoch": 0.000205029296875,
      "model_forward_time": 0.1135869026184082,
      "step": 33592
    },
    {
      "epoch": 0.000205029296875,
      "step": 33592,
      "training_step_time": 0.3917050361633301
    },
    {
      "epoch": 0.000205035400390625,
      "model_forward_time": 0.11367511749267578,
      "step": 33593
    },
    {
      "epoch": 0.000205035400390625,
      "step": 33593,
      "training_step_time": 0.445326566696167
    },
    {
      "epoch": 0.00020504150390625,
      "model_forward_time": 0.11507630348205566,
      "step": 33594
    },
    {
      "epoch": 0.00020504150390625,
      "step": 33594,
      "training_step_time": 0.4193263053894043
    },
    {
      "epoch": 0.000205047607421875,
      "model_forward_time": 0.11421847343444824,
      "step": 33595
    },
    {
      "epoch": 0.000205047607421875,
      "step": 33595,
      "training_step_time": 0.4033997058868408
    },
    {
      "epoch": 0.0002050537109375,
      "model_forward_time": 0.11354994773864746,
      "step": 33596
    },
    {
      "epoch": 0.0002050537109375,
      "step": 33596,
      "training_step_time": 0.4018118381500244
    },
    {
      "epoch": 0.000205059814453125,
      "model_forward_time": 0.11474776268005371,
      "step": 33597
    },
    {
      "epoch": 0.000205059814453125,
      "step": 33597,
      "training_step_time": 0.5874171257019043
    },
    {
      "epoch": 0.00020506591796875,
      "model_forward_time": 0.11485695838928223,
      "step": 33598
    },
    {
      "epoch": 0.00020506591796875,
      "step": 33598,
      "training_step_time": 0.4124622344970703
    },
    {
      "epoch": 0.000205072021484375,
      "model_forward_time": 0.11414003372192383,
      "step": 33599
    },
    {
      "epoch": 0.000205072021484375,
      "step": 33599,
      "training_step_time": 0.40555906295776367
    },
    {
      "epoch": 0.000205078125,
      "grad_norm": 0.08277905732393265,
      "learning_rate": 4.4225768151520694e-05,
      "loss": 0.0424,
      "step": 33600
    },
    {
      "epoch": 0.000205078125,
      "model_forward_time": 0.11494159698486328,
      "step": 33600
    },
    {
      "epoch": 0.000205078125,
      "step": 33600,
      "training_step_time": 0.36282825469970703
    },
    {
      "epoch": 0.000205084228515625,
      "model_forward_time": 0.11480450630187988,
      "step": 33601
    },
    {
      "epoch": 0.000205084228515625,
      "step": 33601,
      "training_step_time": 0.3957173824310303
    },
    {
      "epoch": 0.00020509033203125,
      "model_forward_time": 0.11481928825378418,
      "step": 33602
    },
    {
      "epoch": 0.00020509033203125,
      "step": 33602,
      "training_step_time": 0.45009303092956543
    },
    {
      "epoch": 0.000205096435546875,
      "model_forward_time": 0.11539745330810547,
      "step": 33603
    },
    {
      "epoch": 0.000205096435546875,
      "step": 33603,
      "training_step_time": 0.6066780090332031
    },
    {
      "epoch": 0.0002051025390625,
      "model_forward_time": 0.11444878578186035,
      "step": 33604
    },
    {
      "epoch": 0.0002051025390625,
      "step": 33604,
      "training_step_time": 0.38932371139526367
    },
    {
      "epoch": 0.000205108642578125,
      "model_forward_time": 0.11520648002624512,
      "step": 33605
    },
    {
      "epoch": 0.000205108642578125,
      "step": 33605,
      "training_step_time": 0.4018874168395996
    },
    {
      "epoch": 0.00020511474609375,
      "model_forward_time": 0.1148977279663086,
      "step": 33606
    },
    {
      "epoch": 0.00020511474609375,
      "step": 33606,
      "training_step_time": 0.39278650283813477
    },
    {
      "epoch": 0.000205120849609375,
      "model_forward_time": 0.11497712135314941,
      "step": 33607
    },
    {
      "epoch": 0.000205120849609375,
      "step": 33607,
      "training_step_time": 0.3967430591583252
    },
    {
      "epoch": 0.000205126953125,
      "model_forward_time": 0.11539316177368164,
      "step": 33608
    },
    {
      "epoch": 0.000205126953125,
      "step": 33608,
      "training_step_time": 0.5125868320465088
    },
    {
      "epoch": 0.000205133056640625,
      "model_forward_time": 0.11653256416320801,
      "step": 33609
    },
    {
      "epoch": 0.000205133056640625,
      "step": 33609,
      "training_step_time": 0.5926706790924072
    },
    {
      "epoch": 0.00020513916015625,
      "grad_norm": 0.1331992894411087,
      "learning_rate": 4.41983955817439e-05,
      "loss": 0.0396,
      "step": 33610
    },
    {
      "epoch": 0.00020513916015625,
      "model_forward_time": 0.1154031753540039,
      "step": 33610
    },
    {
      "epoch": 0.00020513916015625,
      "step": 33610,
      "training_step_time": 0.41103553771972656
    },
    {
      "epoch": 0.000205145263671875,
      "model_forward_time": 0.11489319801330566,
      "step": 33611
    },
    {
      "epoch": 0.000205145263671875,
      "step": 33611,
      "training_step_time": 0.4152393341064453
    },
    {
      "epoch": 0.0002051513671875,
      "model_forward_time": 0.11476707458496094,
      "step": 33612
    },
    {
      "epoch": 0.0002051513671875,
      "step": 33612,
      "training_step_time": 0.4251823425292969
    },
    {
      "epoch": 0.000205157470703125,
      "model_forward_time": 0.1149444580078125,
      "step": 33613
    },
    {
      "epoch": 0.000205157470703125,
      "step": 33613,
      "training_step_time": 0.39290499687194824
    },
    {
      "epoch": 0.00020516357421875,
      "model_forward_time": 0.11473989486694336,
      "step": 33614
    },
    {
      "epoch": 0.00020516357421875,
      "step": 33614,
      "training_step_time": 0.36303186416625977
    },
    {
      "epoch": 0.000205169677734375,
      "model_forward_time": 0.11521792411804199,
      "step": 33615
    },
    {
      "epoch": 0.000205169677734375,
      "step": 33615,
      "training_step_time": 0.4765346050262451
    },
    {
      "epoch": 0.00020517578125,
      "model_forward_time": 0.11475205421447754,
      "step": 33616
    },
    {
      "epoch": 0.00020517578125,
      "step": 33616,
      "training_step_time": 0.48058247566223145
    },
    {
      "epoch": 0.000205181884765625,
      "model_forward_time": 0.11518430709838867,
      "step": 33617
    },
    {
      "epoch": 0.000205181884765625,
      "step": 33617,
      "training_step_time": 0.38465118408203125
    },
    {
      "epoch": 0.00020518798828125,
      "model_forward_time": 0.1147313117980957,
      "step": 33618
    },
    {
      "epoch": 0.00020518798828125,
      "step": 33618,
      "training_step_time": 0.3997530937194824
    },
    {
      "epoch": 0.000205194091796875,
      "model_forward_time": 0.11564826965332031,
      "step": 33619
    },
    {
      "epoch": 0.000205194091796875,
      "step": 33619,
      "training_step_time": 0.3984222412109375
    },
    {
      "epoch": 0.0002052001953125,
      "grad_norm": 0.12760642170906067,
      "learning_rate": 4.4171024774341346e-05,
      "loss": 0.0437,
      "step": 33620
    },
    {
      "epoch": 0.0002052001953125,
      "model_forward_time": 0.11466860771179199,
      "step": 33620
    },
    {
      "epoch": 0.0002052001953125,
      "step": 33620,
      "training_step_time": 0.48524999618530273
    },
    {
      "epoch": 0.000205206298828125,
      "model_forward_time": 0.11477303504943848,
      "step": 33621
    },
    {
      "epoch": 0.000205206298828125,
      "step": 33621,
      "training_step_time": 0.8189353942871094
    },
    {
      "epoch": 0.00020521240234375,
      "model_forward_time": 0.1139838695526123,
      "step": 33622
    },
    {
      "epoch": 0.00020521240234375,
      "step": 33622,
      "training_step_time": 0.43067097663879395
    },
    {
      "epoch": 0.000205218505859375,
      "model_forward_time": 0.1162564754486084,
      "step": 33623
    },
    {
      "epoch": 0.000205218505859375,
      "step": 33623,
      "training_step_time": 0.4735119342803955
    },
    {
      "epoch": 0.000205224609375,
      "model_forward_time": 0.1141817569732666,
      "step": 33624
    },
    {
      "epoch": 0.000205224609375,
      "step": 33624,
      "training_step_time": 0.41266465187072754
    },
    {
      "epoch": 0.000205230712890625,
      "model_forward_time": 0.11423254013061523,
      "step": 33625
    },
    {
      "epoch": 0.000205230712890625,
      "step": 33625,
      "training_step_time": 0.47911953926086426
    },
    {
      "epoch": 0.00020523681640625,
      "model_forward_time": 0.11454486846923828,
      "step": 33626
    },
    {
      "epoch": 0.00020523681640625,
      "step": 33626,
      "training_step_time": 0.3893425464630127
    },
    {
      "epoch": 0.000205242919921875,
      "model_forward_time": 0.11586761474609375,
      "step": 33627
    },
    {
      "epoch": 0.000205242919921875,
      "step": 33627,
      "training_step_time": 0.5159308910369873
    },
    {
      "epoch": 0.0002052490234375,
      "model_forward_time": 0.1145322322845459,
      "step": 33628
    },
    {
      "epoch": 0.0002052490234375,
      "step": 33628,
      "training_step_time": 0.4262382984161377
    },
    {
      "epoch": 0.000205255126953125,
      "model_forward_time": 0.115234375,
      "step": 33629
    },
    {
      "epoch": 0.000205255126953125,
      "step": 33629,
      "training_step_time": 0.48664259910583496
    },
    {
      "epoch": 0.00020526123046875,
      "grad_norm": 0.08592433482408524,
      "learning_rate": 4.414365573762755e-05,
      "loss": 0.0385,
      "step": 33630
    },
    {
      "epoch": 0.00020526123046875,
      "model_forward_time": 0.1150968074798584,
      "step": 33630
    },
    {
      "epoch": 0.00020526123046875,
      "step": 33630,
      "training_step_time": 0.39305591583251953
    },
    {
      "epoch": 0.000205267333984375,
      "model_forward_time": 0.11827850341796875,
      "step": 33631
    },
    {
      "epoch": 0.000205267333984375,
      "step": 33631,
      "training_step_time": 0.3810896873474121
    },
    {
      "epoch": 0.0002052734375,
      "model_forward_time": 0.11814212799072266,
      "step": 33632
    },
    {
      "epoch": 0.0002052734375,
      "step": 33632,
      "training_step_time": 0.3833165168762207
    },
    {
      "epoch": 0.000205279541015625,
      "model_forward_time": 0.11529207229614258,
      "step": 33633
    },
    {
      "epoch": 0.000205279541015625,
      "step": 33633,
      "training_step_time": 0.8454976081848145
    },
    {
      "epoch": 0.00020528564453125,
      "model_forward_time": 0.1154928207397461,
      "step": 33634
    },
    {
      "epoch": 0.00020528564453125,
      "step": 33634,
      "training_step_time": 0.4778895378112793
    },
    {
      "epoch": 0.000205291748046875,
      "model_forward_time": 0.11467790603637695,
      "step": 33635
    },
    {
      "epoch": 0.000205291748046875,
      "step": 33635,
      "training_step_time": 0.38924384117126465
    },
    {
      "epoch": 0.0002052978515625,
      "model_forward_time": 0.11497855186462402,
      "step": 33636
    },
    {
      "epoch": 0.0002052978515625,
      "step": 33636,
      "training_step_time": 0.39051151275634766
    },
    {
      "epoch": 0.000205303955078125,
      "model_forward_time": 0.11413884162902832,
      "step": 33637
    },
    {
      "epoch": 0.000205303955078125,
      "step": 33637,
      "training_step_time": 0.42403221130371094
    },
    {
      "epoch": 0.00020531005859375,
      "model_forward_time": 0.11546921730041504,
      "step": 33638
    },
    {
      "epoch": 0.00020531005859375,
      "step": 33638,
      "training_step_time": 0.3895721435546875
    },
    {
      "epoch": 0.000205316162109375,
      "model_forward_time": 0.13283085823059082,
      "step": 33639
    },
    {
      "epoch": 0.000205316162109375,
      "step": 33639,
      "training_step_time": 0.8777098655700684
    },
    {
      "epoch": 0.000205322265625,
      "grad_norm": 0.10371135175228119,
      "learning_rate": 4.411628847991653e-05,
      "loss": 0.0397,
      "step": 33640
    },
    {
      "epoch": 0.000205322265625,
      "model_forward_time": 0.114410400390625,
      "step": 33640
    },
    {
      "epoch": 0.000205322265625,
      "step": 33640,
      "training_step_time": 0.3642423152923584
    },
    {
      "epoch": 0.000205328369140625,
      "model_forward_time": 0.11477804183959961,
      "step": 33641
    },
    {
      "epoch": 0.000205328369140625,
      "step": 33641,
      "training_step_time": 0.42154645919799805
    },
    {
      "epoch": 0.00020533447265625,
      "model_forward_time": 0.11554765701293945,
      "step": 33642
    },
    {
      "epoch": 0.00020533447265625,
      "step": 33642,
      "training_step_time": 0.3919260501861572
    },
    {
      "epoch": 0.000205340576171875,
      "model_forward_time": 0.1144568920135498,
      "step": 33643
    },
    {
      "epoch": 0.000205340576171875,
      "step": 33643,
      "training_step_time": 0.3802626132965088
    },
    {
      "epoch": 0.0002053466796875,
      "model_forward_time": 0.11532950401306152,
      "step": 33644
    },
    {
      "epoch": 0.0002053466796875,
      "step": 33644,
      "training_step_time": 0.3810427188873291
    },
    {
      "epoch": 0.000205352783203125,
      "model_forward_time": 0.11502623558044434,
      "step": 33645
    },
    {
      "epoch": 0.000205352783203125,
      "step": 33645,
      "training_step_time": 0.596613883972168
    },
    {
      "epoch": 0.00020535888671875,
      "model_forward_time": 0.11457443237304688,
      "step": 33646
    },
    {
      "epoch": 0.00020535888671875,
      "step": 33646,
      "training_step_time": 0.4359250068664551
    },
    {
      "epoch": 0.000205364990234375,
      "model_forward_time": 0.1157689094543457,
      "step": 33647
    },
    {
      "epoch": 0.000205364990234375,
      "step": 33647,
      "training_step_time": 0.4360342025756836
    },
    {
      "epoch": 0.00020537109375,
      "model_forward_time": 0.11457562446594238,
      "step": 33648
    },
    {
      "epoch": 0.00020537109375,
      "step": 33648,
      "training_step_time": 0.40038299560546875
    },
    {
      "epoch": 0.000205377197265625,
      "model_forward_time": 0.11490821838378906,
      "step": 33649
    },
    {
      "epoch": 0.000205377197265625,
      "step": 33649,
      "training_step_time": 0.3885786533355713
    },
    {
      "epoch": 0.00020538330078125,
      "grad_norm": 0.1356416642665863,
      "learning_rate": 4.408892300952171e-05,
      "loss": 0.0445,
      "step": 33650
    },
    {
      "epoch": 0.00020538330078125,
      "model_forward_time": 0.1145009994506836,
      "step": 33650
    },
    {
      "epoch": 0.00020538330078125,
      "step": 33650,
      "training_step_time": 0.4063911437988281
    },
    {
      "epoch": 0.000205389404296875,
      "model_forward_time": 0.11491227149963379,
      "step": 33651
    },
    {
      "epoch": 0.000205389404296875,
      "step": 33651,
      "training_step_time": 0.8360364437103271
    },
    {
      "epoch": 0.0002053955078125,
      "model_forward_time": 0.11503243446350098,
      "step": 33652
    },
    {
      "epoch": 0.0002053955078125,
      "step": 33652,
      "training_step_time": 0.3924400806427002
    },
    {
      "epoch": 0.000205401611328125,
      "model_forward_time": 0.11470270156860352,
      "step": 33653
    },
    {
      "epoch": 0.000205401611328125,
      "step": 33653,
      "training_step_time": 0.3948373794555664
    },
    {
      "epoch": 0.00020540771484375,
      "model_forward_time": 0.11444902420043945,
      "step": 33654
    },
    {
      "epoch": 0.00020540771484375,
      "step": 33654,
      "training_step_time": 0.4436759948730469
    },
    {
      "epoch": 0.000205413818359375,
      "model_forward_time": 0.1154317855834961,
      "step": 33655
    },
    {
      "epoch": 0.000205413818359375,
      "step": 33655,
      "training_step_time": 0.47704625129699707
    },
    {
      "epoch": 0.000205419921875,
      "model_forward_time": 0.11473727226257324,
      "step": 33656
    },
    {
      "epoch": 0.000205419921875,
      "step": 33656,
      "training_step_time": 0.3874659538269043
    },
    {
      "epoch": 0.000205426025390625,
      "model_forward_time": 0.11507797241210938,
      "step": 33657
    },
    {
      "epoch": 0.000205426025390625,
      "step": 33657,
      "training_step_time": 0.8866074085235596
    },
    {
      "epoch": 0.00020543212890625,
      "model_forward_time": 0.11451935768127441,
      "step": 33658
    },
    {
      "epoch": 0.00020543212890625,
      "step": 33658,
      "training_step_time": 0.48366498947143555
    },
    {
      "epoch": 0.000205438232421875,
      "model_forward_time": 0.11395764350891113,
      "step": 33659
    },
    {
      "epoch": 0.000205438232421875,
      "step": 33659,
      "training_step_time": 0.5088872909545898
    },
    {
      "epoch": 0.0002054443359375,
      "grad_norm": 0.18354062736034393,
      "learning_rate": 4.406155933475599e-05,
      "loss": 0.0393,
      "step": 33660
    },
    {
      "epoch": 0.0002054443359375,
      "model_forward_time": 0.11411690711975098,
      "step": 33660
    },
    {
      "epoch": 0.0002054443359375,
      "step": 33660,
      "training_step_time": 0.39683008193969727
    },
    {
      "epoch": 0.000205450439453125,
      "model_forward_time": 0.11549258232116699,
      "step": 33661
    },
    {
      "epoch": 0.000205450439453125,
      "step": 33661,
      "training_step_time": 0.39174532890319824
    },
    {
      "epoch": 0.00020545654296875,
      "model_forward_time": 0.11460423469543457,
      "step": 33662
    },
    {
      "epoch": 0.00020545654296875,
      "step": 33662,
      "training_step_time": 0.4792165756225586
    },
    {
      "epoch": 0.000205462646484375,
      "model_forward_time": 0.11484575271606445,
      "step": 33663
    },
    {
      "epoch": 0.000205462646484375,
      "step": 33663,
      "training_step_time": 0.6605503559112549
    },
    {
      "epoch": 0.00020546875,
      "model_forward_time": 0.11570000648498535,
      "step": 33664
    },
    {
      "epoch": 0.00020546875,
      "step": 33664,
      "training_step_time": 0.4838535785675049
    },
    {
      "epoch": 0.000205474853515625,
      "model_forward_time": 0.11445927619934082,
      "step": 33665
    },
    {
      "epoch": 0.000205474853515625,
      "step": 33665,
      "training_step_time": 0.40323686599731445
    },
    {
      "epoch": 0.00020548095703125,
      "model_forward_time": 0.1150057315826416,
      "step": 33666
    },
    {
      "epoch": 0.00020548095703125,
      "step": 33666,
      "training_step_time": 0.3648042678833008
    },
    {
      "epoch": 0.000205487060546875,
      "model_forward_time": 0.11492657661437988,
      "step": 33667
    },
    {
      "epoch": 0.000205487060546875,
      "step": 33667,
      "training_step_time": 0.39293622970581055
    },
    {
      "epoch": 0.0002054931640625,
      "model_forward_time": 0.11486482620239258,
      "step": 33668
    },
    {
      "epoch": 0.0002054931640625,
      "step": 33668,
      "training_step_time": 0.4740288257598877
    },
    {
      "epoch": 0.000205499267578125,
      "model_forward_time": 0.11436009407043457,
      "step": 33669
    },
    {
      "epoch": 0.000205499267578125,
      "step": 33669,
      "training_step_time": 0.5749843120574951
    },
    {
      "epoch": 0.00020550537109375,
      "grad_norm": 0.17170092463493347,
      "learning_rate": 4.4034197463931774e-05,
      "loss": 0.0405,
      "step": 33670
    },
    {
      "epoch": 0.00020550537109375,
      "model_forward_time": 0.11542534828186035,
      "step": 33670
    },
    {
      "epoch": 0.00020550537109375,
      "step": 33670,
      "training_step_time": 0.42915868759155273
    },
    {
      "epoch": 0.000205511474609375,
      "model_forward_time": 0.1146399974822998,
      "step": 33671
    },
    {
      "epoch": 0.000205511474609375,
      "step": 33671,
      "training_step_time": 0.4335474967956543
    },
    {
      "epoch": 0.000205517578125,
      "model_forward_time": 0.11484026908874512,
      "step": 33672
    },
    {
      "epoch": 0.000205517578125,
      "step": 33672,
      "training_step_time": 0.451291561126709
    },
    {
      "epoch": 0.000205523681640625,
      "model_forward_time": 0.11430931091308594,
      "step": 33673
    },
    {
      "epoch": 0.000205523681640625,
      "step": 33673,
      "training_step_time": 0.40625643730163574
    },
    {
      "epoch": 0.00020552978515625,
      "model_forward_time": 0.11417007446289062,
      "step": 33674
    },
    {
      "epoch": 0.00020552978515625,
      "step": 33674,
      "training_step_time": 0.3887956142425537
    },
    {
      "epoch": 0.000205535888671875,
      "model_forward_time": 0.11495161056518555,
      "step": 33675
    },
    {
      "epoch": 0.000205535888671875,
      "step": 33675,
      "training_step_time": 0.806368350982666
    },
    {
      "epoch": 0.0002055419921875,
      "model_forward_time": 0.11463332176208496,
      "step": 33676
    },
    {
      "epoch": 0.0002055419921875,
      "step": 33676,
      "training_step_time": 0.4812643527984619
    },
    {
      "epoch": 0.000205548095703125,
      "model_forward_time": 0.11506056785583496,
      "step": 33677
    },
    {
      "epoch": 0.000205548095703125,
      "step": 33677,
      "training_step_time": 0.44469690322875977
    },
    {
      "epoch": 0.00020555419921875,
      "model_forward_time": 0.11487746238708496,
      "step": 33678
    },
    {
      "epoch": 0.00020555419921875,
      "step": 33678,
      "training_step_time": 0.38270115852355957
    },
    {
      "epoch": 0.000205560302734375,
      "model_forward_time": 0.1145176887512207,
      "step": 33679
    },
    {
      "epoch": 0.000205560302734375,
      "step": 33679,
      "training_step_time": 0.395463228225708
    },
    {
      "epoch": 0.00020556640625,
      "grad_norm": 0.12680715322494507,
      "learning_rate": 4.400683740536083e-05,
      "loss": 0.0366,
      "step": 33680
    },
    {
      "epoch": 0.00020556640625,
      "model_forward_time": 0.11590743064880371,
      "step": 33680
    },
    {
      "epoch": 0.00020556640625,
      "step": 33680,
      "training_step_time": 0.4364919662475586
    },
    {
      "epoch": 0.000205572509765625,
      "model_forward_time": 0.1189579963684082,
      "step": 33681
    },
    {
      "epoch": 0.000205572509765625,
      "step": 33681,
      "training_step_time": 0.48912954330444336
    },
    {
      "epoch": 0.00020557861328125,
      "model_forward_time": 0.11875414848327637,
      "step": 33682
    },
    {
      "epoch": 0.00020557861328125,
      "step": 33682,
      "training_step_time": 0.3806459903717041
    },
    {
      "epoch": 0.000205584716796875,
      "model_forward_time": 0.11558103561401367,
      "step": 33683
    },
    {
      "epoch": 0.000205584716796875,
      "step": 33683,
      "training_step_time": 0.43692946434020996
    },
    {
      "epoch": 0.0002055908203125,
      "model_forward_time": 0.11533975601196289,
      "step": 33684
    },
    {
      "epoch": 0.0002055908203125,
      "step": 33684,
      "training_step_time": 0.4176795482635498
    },
    {
      "epoch": 0.000205596923828125,
      "model_forward_time": 0.11506414413452148,
      "step": 33685
    },
    {
      "epoch": 0.000205596923828125,
      "step": 33685,
      "training_step_time": 0.4175577163696289
    },
    {
      "epoch": 0.00020560302734375,
      "model_forward_time": 0.11510300636291504,
      "step": 33686
    },
    {
      "epoch": 0.00020560302734375,
      "step": 33686,
      "training_step_time": 0.49042463302612305
    },
    {
      "epoch": 0.000205609130859375,
      "model_forward_time": 0.11479473114013672,
      "step": 33687
    },
    {
      "epoch": 0.000205609130859375,
      "step": 33687,
      "training_step_time": 0.7644867897033691
    },
    {
      "epoch": 0.000205615234375,
      "model_forward_time": 0.1142575740814209,
      "step": 33688
    },
    {
      "epoch": 0.000205615234375,
      "step": 33688,
      "training_step_time": 0.3914940357208252
    },
    {
      "epoch": 0.000205621337890625,
      "model_forward_time": 0.11430621147155762,
      "step": 33689
    },
    {
      "epoch": 0.000205621337890625,
      "step": 33689,
      "training_step_time": 0.42561912536621094
    },
    {
      "epoch": 0.00020562744140625,
      "grad_norm": 0.15168508887290955,
      "learning_rate": 4.3979479167354477e-05,
      "loss": 0.0405,
      "step": 33690
    },
    {
      "epoch": 0.00020562744140625,
      "model_forward_time": 0.11462068557739258,
      "step": 33690
    },
    {
      "epoch": 0.00020562744140625,
      "step": 33690,
      "training_step_time": 0.47380638122558594
    },
    {
      "epoch": 0.000205633544921875,
      "model_forward_time": 0.11419224739074707,
      "step": 33691
    },
    {
      "epoch": 0.000205633544921875,
      "step": 33691,
      "training_step_time": 0.38103652000427246
    },
    {
      "epoch": 0.0002056396484375,
      "model_forward_time": 0.11415338516235352,
      "step": 33692
    },
    {
      "epoch": 0.0002056396484375,
      "step": 33692,
      "training_step_time": 0.3796358108520508
    },
    {
      "epoch": 0.000205645751953125,
      "model_forward_time": 0.11511087417602539,
      "step": 33693
    },
    {
      "epoch": 0.000205645751953125,
      "step": 33693,
      "training_step_time": 0.7910089492797852
    },
    {
      "epoch": 0.00020565185546875,
      "model_forward_time": 0.11402392387390137,
      "step": 33694
    },
    {
      "epoch": 0.00020565185546875,
      "step": 33694,
      "training_step_time": 0.40737366676330566
    },
    {
      "epoch": 0.000205657958984375,
      "model_forward_time": 0.11432480812072754,
      "step": 33695
    },
    {
      "epoch": 0.000205657958984375,
      "step": 33695,
      "training_step_time": 0.39653587341308594
    },
    {
      "epoch": 0.0002056640625,
      "model_forward_time": 0.11486983299255371,
      "step": 33696
    },
    {
      "epoch": 0.0002056640625,
      "step": 33696,
      "training_step_time": 0.4083242416381836
    },
    {
      "epoch": 0.000205670166015625,
      "model_forward_time": 0.11428523063659668,
      "step": 33697
    },
    {
      "epoch": 0.000205670166015625,
      "step": 33697,
      "training_step_time": 0.42254638671875
    },
    {
      "epoch": 0.00020567626953125,
      "model_forward_time": 0.11418795585632324,
      "step": 33698
    },
    {
      "epoch": 0.00020567626953125,
      "step": 33698,
      "training_step_time": 0.3997189998626709
    },
    {
      "epoch": 0.000205682373046875,
      "model_forward_time": 0.11517119407653809,
      "step": 33699
    },
    {
      "epoch": 0.000205682373046875,
      "step": 33699,
      "training_step_time": 0.6776700019836426
    },
    {
      "epoch": 0.0002056884765625,
      "grad_norm": 0.13692454993724823,
      "learning_rate": 4.3952122758223354e-05,
      "loss": 0.0409,
      "step": 33700
    },
    {
      "epoch": 0.0002056884765625,
      "model_forward_time": 0.11489295959472656,
      "step": 33700
    },
    {
      "epoch": 0.0002056884765625,
      "step": 33700,
      "training_step_time": 0.3923180103302002
    },
    {
      "epoch": 0.000205694580078125,
      "model_forward_time": 0.11477112770080566,
      "step": 33701
    },
    {
      "epoch": 0.000205694580078125,
      "step": 33701,
      "training_step_time": 0.3919401168823242
    },
    {
      "epoch": 0.00020570068359375,
      "model_forward_time": 0.11436295509338379,
      "step": 33702
    },
    {
      "epoch": 0.00020570068359375,
      "step": 33702,
      "training_step_time": 0.42710232734680176
    },
    {
      "epoch": 0.000205706787109375,
      "model_forward_time": 0.1146993637084961,
      "step": 33703
    },
    {
      "epoch": 0.000205706787109375,
      "step": 33703,
      "training_step_time": 0.4281961917877197
    },
    {
      "epoch": 0.000205712890625,
      "model_forward_time": 0.11463189125061035,
      "step": 33704
    },
    {
      "epoch": 0.000205712890625,
      "step": 33704,
      "training_step_time": 0.39528775215148926
    },
    {
      "epoch": 0.000205718994140625,
      "model_forward_time": 0.1159508228302002,
      "step": 33705
    },
    {
      "epoch": 0.000205718994140625,
      "step": 33705,
      "training_step_time": 0.5266411304473877
    },
    {
      "epoch": 0.00020572509765625,
      "model_forward_time": 0.11481380462646484,
      "step": 33706
    },
    {
      "epoch": 0.00020572509765625,
      "step": 33706,
      "training_step_time": 0.40493273735046387
    },
    {
      "epoch": 0.000205731201171875,
      "model_forward_time": 0.11511874198913574,
      "step": 33707
    },
    {
      "epoch": 0.000205731201171875,
      "step": 33707,
      "training_step_time": 0.4018435478210449
    },
    {
      "epoch": 0.0002057373046875,
      "model_forward_time": 0.11585330963134766,
      "step": 33708
    },
    {
      "epoch": 0.0002057373046875,
      "step": 33708,
      "training_step_time": 0.43355226516723633
    },
    {
      "epoch": 0.000205743408203125,
      "model_forward_time": 0.11609673500061035,
      "step": 33709
    },
    {
      "epoch": 0.000205743408203125,
      "step": 33709,
      "training_step_time": 0.4268190860748291
    },
    {
      "epoch": 0.00020574951171875,
      "grad_norm": 0.1269688904285431,
      "learning_rate": 4.392476818627765e-05,
      "loss": 0.0385,
      "step": 33710
    },
    {
      "epoch": 0.00020574951171875,
      "model_forward_time": 0.11447000503540039,
      "step": 33710
    },
    {
      "epoch": 0.00020574951171875,
      "step": 33710,
      "training_step_time": 0.38959240913391113
    },
    {
      "epoch": 0.000205755615234375,
      "model_forward_time": 0.11548423767089844,
      "step": 33711
    },
    {
      "epoch": 0.000205755615234375,
      "step": 33711,
      "training_step_time": 0.5244879722595215
    },
    {
      "epoch": 0.00020576171875,
      "model_forward_time": 0.11517453193664551,
      "step": 33712
    },
    {
      "epoch": 0.00020576171875,
      "step": 33712,
      "training_step_time": 0.48041248321533203
    },
    {
      "epoch": 0.000205767822265625,
      "model_forward_time": 0.11495852470397949,
      "step": 33713
    },
    {
      "epoch": 0.000205767822265625,
      "step": 33713,
      "training_step_time": 0.3950474262237549
    },
    {
      "epoch": 0.00020577392578125,
      "model_forward_time": 0.11488676071166992,
      "step": 33714
    },
    {
      "epoch": 0.00020577392578125,
      "step": 33714,
      "training_step_time": 0.3932375907897949
    },
    {
      "epoch": 0.000205780029296875,
      "model_forward_time": 0.11480832099914551,
      "step": 33715
    },
    {
      "epoch": 0.000205780029296875,
      "step": 33715,
      "training_step_time": 0.3950991630554199
    },
    {
      "epoch": 0.0002057861328125,
      "model_forward_time": 0.1153099536895752,
      "step": 33716
    },
    {
      "epoch": 0.0002057861328125,
      "step": 33716,
      "training_step_time": 0.3973090648651123
    },
    {
      "epoch": 0.000205792236328125,
      "model_forward_time": 0.11541318893432617,
      "step": 33717
    },
    {
      "epoch": 0.000205792236328125,
      "step": 33717,
      "training_step_time": 0.5758450031280518
    },
    {
      "epoch": 0.00020579833984375,
      "model_forward_time": 0.11523842811584473,
      "step": 33718
    },
    {
      "epoch": 0.00020579833984375,
      "step": 33718,
      "training_step_time": 0.4414527416229248
    },
    {
      "epoch": 0.000205804443359375,
      "model_forward_time": 0.11504316329956055,
      "step": 33719
    },
    {
      "epoch": 0.000205804443359375,
      "step": 33719,
      "training_step_time": 0.3884580135345459
    },
    {
      "epoch": 0.000205810546875,
      "grad_norm": 0.10778295993804932,
      "learning_rate": 4.3897415459827e-05,
      "loss": 0.0389,
      "step": 33720
    },
    {
      "epoch": 0.000205810546875,
      "model_forward_time": 0.11543822288513184,
      "step": 33720
    },
    {
      "epoch": 0.000205810546875,
      "step": 33720,
      "training_step_time": 0.4111318588256836
    },
    {
      "epoch": 0.000205816650390625,
      "model_forward_time": 0.1151270866394043,
      "step": 33721
    },
    {
      "epoch": 0.000205816650390625,
      "step": 33721,
      "training_step_time": 0.40000176429748535
    },
    {
      "epoch": 0.00020582275390625,
      "model_forward_time": 0.11492395401000977,
      "step": 33722
    },
    {
      "epoch": 0.00020582275390625,
      "step": 33722,
      "training_step_time": 0.39115238189697266
    },
    {
      "epoch": 0.000205828857421875,
      "model_forward_time": 0.11570405960083008,
      "step": 33723
    },
    {
      "epoch": 0.000205828857421875,
      "step": 33723,
      "training_step_time": 0.4024629592895508
    },
    {
      "epoch": 0.0002058349609375,
      "model_forward_time": 0.11605358123779297,
      "step": 33724
    },
    {
      "epoch": 0.0002058349609375,
      "step": 33724,
      "training_step_time": 0.3940279483795166
    },
    {
      "epoch": 0.000205841064453125,
      "model_forward_time": 0.11558961868286133,
      "step": 33725
    },
    {
      "epoch": 0.000205841064453125,
      "step": 33725,
      "training_step_time": 0.4420292377471924
    },
    {
      "epoch": 0.00020584716796875,
      "model_forward_time": 0.11540508270263672,
      "step": 33726
    },
    {
      "epoch": 0.00020584716796875,
      "step": 33726,
      "training_step_time": 0.49854183197021484
    },
    {
      "epoch": 0.000205853271484375,
      "model_forward_time": 0.11487746238708496,
      "step": 33727
    },
    {
      "epoch": 0.000205853271484375,
      "step": 33727,
      "training_step_time": 0.39725661277770996
    },
    {
      "epoch": 0.000205859375,
      "model_forward_time": 0.11593055725097656,
      "step": 33728
    },
    {
      "epoch": 0.000205859375,
      "step": 33728,
      "training_step_time": 0.392528772354126
    },
    {
      "epoch": 0.000205865478515625,
      "model_forward_time": 0.11590838432312012,
      "step": 33729
    },
    {
      "epoch": 0.000205865478515625,
      "step": 33729,
      "training_step_time": 0.4364759922027588
    },
    {
      "epoch": 0.00020587158203125,
      "grad_norm": 0.1267755776643753,
      "learning_rate": 4.387006458718037e-05,
      "loss": 0.0355,
      "step": 33730
    },
    {
      "epoch": 0.00020587158203125,
      "model_forward_time": 0.11544394493103027,
      "step": 33730
    },
    {
      "epoch": 0.00020587158203125,
      "step": 33730,
      "training_step_time": 0.3917570114135742
    },
    {
      "epoch": 0.000205877685546875,
      "model_forward_time": 0.11533188819885254,
      "step": 33731
    },
    {
      "epoch": 0.000205877685546875,
      "step": 33731,
      "training_step_time": 0.39174795150756836
    },
    {
      "epoch": 0.0002058837890625,
      "model_forward_time": 0.11485147476196289,
      "step": 33732
    },
    {
      "epoch": 0.0002058837890625,
      "step": 33732,
      "training_step_time": 0.43271923065185547
    },
    {
      "epoch": 0.000205889892578125,
      "model_forward_time": 0.1156158447265625,
      "step": 33733
    },
    {
      "epoch": 0.000205889892578125,
      "step": 33733,
      "training_step_time": 0.48892688751220703
    },
    {
      "epoch": 0.00020589599609375,
      "model_forward_time": 0.11563825607299805,
      "step": 33734
    },
    {
      "epoch": 0.00020589599609375,
      "step": 33734,
      "training_step_time": 0.39432668685913086
    },
    {
      "epoch": 0.000205902099609375,
      "model_forward_time": 0.11549901962280273,
      "step": 33735
    },
    {
      "epoch": 0.000205902099609375,
      "step": 33735,
      "training_step_time": 0.5213348865509033
    },
    {
      "epoch": 0.000205908203125,
      "model_forward_time": 0.11518049240112305,
      "step": 33736
    },
    {
      "epoch": 0.000205908203125,
      "step": 33736,
      "training_step_time": 0.45977282524108887
    },
    {
      "epoch": 0.000205914306640625,
      "model_forward_time": 0.11532807350158691,
      "step": 33737
    },
    {
      "epoch": 0.000205914306640625,
      "step": 33737,
      "training_step_time": 0.3992784023284912
    },
    {
      "epoch": 0.00020592041015625,
      "model_forward_time": 0.11530613899230957,
      "step": 33738
    },
    {
      "epoch": 0.00020592041015625,
      "step": 33738,
      "training_step_time": 0.40764451026916504
    },
    {
      "epoch": 0.000205926513671875,
      "model_forward_time": 0.11866474151611328,
      "step": 33739
    },
    {
      "epoch": 0.000205926513671875,
      "step": 33739,
      "training_step_time": 0.5796992778778076
    },
    {
      "epoch": 0.0002059326171875,
      "grad_norm": 0.15098953247070312,
      "learning_rate": 4.384271557664628e-05,
      "loss": 0.0405,
      "step": 33740
    },
    {
      "epoch": 0.0002059326171875,
      "model_forward_time": 0.11679458618164062,
      "step": 33740
    },
    {
      "epoch": 0.0002059326171875,
      "step": 33740,
      "training_step_time": 0.650933027267456
    },
    {
      "epoch": 0.000205938720703125,
      "model_forward_time": 0.1201629638671875,
      "step": 33741
    },
    {
      "epoch": 0.000205938720703125,
      "step": 33741,
      "training_step_time": 0.6315169334411621
    },
    {
      "epoch": 0.00020594482421875,
      "model_forward_time": 0.11785197257995605,
      "step": 33742
    },
    {
      "epoch": 0.00020594482421875,
      "step": 33742,
      "training_step_time": 0.6687459945678711
    },
    {
      "epoch": 0.000205950927734375,
      "model_forward_time": 0.12327289581298828,
      "step": 33743
    },
    {
      "epoch": 0.000205950927734375,
      "step": 33743,
      "training_step_time": 0.7199318408966064
    },
    {
      "epoch": 0.00020595703125,
      "model_forward_time": 0.11854958534240723,
      "step": 33744
    },
    {
      "epoch": 0.00020595703125,
      "step": 33744,
      "training_step_time": 0.7298798561096191
    },
    {
      "epoch": 0.000205963134765625,
      "model_forward_time": 0.11642885208129883,
      "step": 33745
    },
    {
      "epoch": 0.000205963134765625,
      "step": 33745,
      "training_step_time": 0.5262255668640137
    },
    {
      "epoch": 0.00020596923828125,
      "model_forward_time": 0.14339780807495117,
      "step": 33746
    },
    {
      "epoch": 0.00020596923828125,
      "step": 33746,
      "training_step_time": 0.6819865703582764
    },
    {
      "epoch": 0.000205975341796875,
      "model_forward_time": 0.11754751205444336,
      "step": 33747
    },
    {
      "epoch": 0.000205975341796875,
      "step": 33747,
      "training_step_time": 0.7266318798065186
    },
    {
      "epoch": 0.0002059814453125,
      "model_forward_time": 0.1211850643157959,
      "step": 33748
    },
    {
      "epoch": 0.0002059814453125,
      "step": 33748,
      "training_step_time": 0.7001452445983887
    },
    {
      "epoch": 0.000205987548828125,
      "model_forward_time": 0.12219953536987305,
      "step": 33749
    },
    {
      "epoch": 0.000205987548828125,
      "step": 33749,
      "training_step_time": 0.6781444549560547
    },
    {
      "epoch": 0.00020599365234375,
      "grad_norm": 0.08274275064468384,
      "learning_rate": 4.381536843653262e-05,
      "loss": 0.0369,
      "step": 33750
    },
    {
      "epoch": 0.00020599365234375,
      "model_forward_time": 0.12054300308227539,
      "step": 33750
    },
    {
      "epoch": 0.00020599365234375,
      "step": 33750,
      "training_step_time": 0.6292662620544434
    },
    {
      "epoch": 0.000205999755859375,
      "model_forward_time": 0.1191563606262207,
      "step": 33751
    },
    {
      "epoch": 0.000205999755859375,
      "step": 33751,
      "training_step_time": 0.6486763954162598
    },
    {
      "epoch": 0.000206005859375,
      "model_forward_time": 0.1198432445526123,
      "step": 33752
    },
    {
      "epoch": 0.000206005859375,
      "step": 33752,
      "training_step_time": 0.6177577972412109
    },
    {
      "epoch": 0.000206011962890625,
      "model_forward_time": 0.12942790985107422,
      "step": 33753
    },
    {
      "epoch": 0.000206011962890625,
      "step": 33753,
      "training_step_time": 0.6849155426025391
    },
    {
      "epoch": 0.00020601806640625,
      "model_forward_time": 0.12127232551574707,
      "step": 33754
    },
    {
      "epoch": 0.00020601806640625,
      "step": 33754,
      "training_step_time": 0.739551305770874
    },
    {
      "epoch": 0.000206024169921875,
      "model_forward_time": 0.1162879467010498,
      "step": 33755
    },
    {
      "epoch": 0.000206024169921875,
      "step": 33755,
      "training_step_time": 0.5587644577026367
    },
    {
      "epoch": 0.0002060302734375,
      "model_forward_time": 0.12319087982177734,
      "step": 33756
    },
    {
      "epoch": 0.0002060302734375,
      "step": 33756,
      "training_step_time": 0.745309591293335
    },
    {
      "epoch": 0.000206036376953125,
      "model_forward_time": 0.12091469764709473,
      "step": 33757
    },
    {
      "epoch": 0.000206036376953125,
      "step": 33757,
      "training_step_time": 0.5659995079040527
    },
    {
      "epoch": 0.00020604248046875,
      "model_forward_time": 0.11887884140014648,
      "step": 33758
    },
    {
      "epoch": 0.00020604248046875,
      "step": 33758,
      "training_step_time": 0.6708183288574219
    },
    {
      "epoch": 0.000206048583984375,
      "model_forward_time": 0.11959052085876465,
      "step": 33759
    },
    {
      "epoch": 0.000206048583984375,
      "step": 33759,
      "training_step_time": 0.7155439853668213
    },
    {
      "epoch": 0.0002060546875,
      "grad_norm": 0.11259225755929947,
      "learning_rate": 4.3788023175146747e-05,
      "loss": 0.05,
      "step": 33760
    },
    {
      "epoch": 0.0002060546875,
      "model_forward_time": 0.11919927597045898,
      "step": 33760
    },
    {
      "epoch": 0.0002060546875,
      "step": 33760,
      "training_step_time": 0.6645331382751465
    },
    {
      "epoch": 0.000206060791015625,
      "model_forward_time": 0.12662291526794434,
      "step": 33761
    },
    {
      "epoch": 0.000206060791015625,
      "step": 33761,
      "training_step_time": 0.6918160915374756
    },
    {
      "epoch": 0.00020606689453125,
      "model_forward_time": 0.12142515182495117,
      "step": 33762
    },
    {
      "epoch": 0.00020606689453125,
      "step": 33762,
      "training_step_time": 0.6606240272521973
    },
    {
      "epoch": 0.000206072998046875,
      "model_forward_time": 0.12220954895019531,
      "step": 33763
    },
    {
      "epoch": 0.000206072998046875,
      "step": 33763,
      "training_step_time": 0.7185165882110596
    },
    {
      "epoch": 0.0002060791015625,
      "model_forward_time": 0.1170804500579834,
      "step": 33764
    },
    {
      "epoch": 0.0002060791015625,
      "step": 33764,
      "training_step_time": 0.7090475559234619
    },
    {
      "epoch": 0.000206085205078125,
      "model_forward_time": 0.12014174461364746,
      "step": 33765
    },
    {
      "epoch": 0.000206085205078125,
      "step": 33765,
      "training_step_time": 0.7428481578826904
    },
    {
      "epoch": 0.00020609130859375,
      "model_forward_time": 0.11890697479248047,
      "step": 33766
    },
    {
      "epoch": 0.00020609130859375,
      "step": 33766,
      "training_step_time": 0.7263004779815674
    },
    {
      "epoch": 0.000206097412109375,
      "model_forward_time": 0.12248396873474121,
      "step": 33767
    },
    {
      "epoch": 0.000206097412109375,
      "step": 33767,
      "training_step_time": 0.6717615127563477
    },
    {
      "epoch": 0.000206103515625,
      "model_forward_time": 0.11651730537414551,
      "step": 33768
    },
    {
      "epoch": 0.000206103515625,
      "step": 33768,
      "training_step_time": 0.7373919486999512
    },
    {
      "epoch": 0.000206109619140625,
      "model_forward_time": 0.11659717559814453,
      "step": 33769
    },
    {
      "epoch": 0.000206109619140625,
      "step": 33769,
      "training_step_time": 0.5958003997802734
    },
    {
      "epoch": 0.00020611572265625,
      "grad_norm": 0.12881675362586975,
      "learning_rate": 4.3760679800795396e-05,
      "loss": 0.0456,
      "step": 33770
    },
    {
      "epoch": 0.00020611572265625,
      "model_forward_time": 0.12064075469970703,
      "step": 33770
    },
    {
      "epoch": 0.00020611572265625,
      "step": 33770,
      "training_step_time": 0.6952817440032959
    },
    {
      "epoch": 0.000206121826171875,
      "model_forward_time": 0.1176900863647461,
      "step": 33771
    },
    {
      "epoch": 0.000206121826171875,
      "step": 33771,
      "training_step_time": 0.6507468223571777
    },
    {
      "epoch": 0.0002061279296875,
      "model_forward_time": 0.1161189079284668,
      "step": 33772
    },
    {
      "epoch": 0.0002061279296875,
      "step": 33772,
      "training_step_time": 0.6936397552490234
    },
    {
      "epoch": 0.000206134033203125,
      "model_forward_time": 0.12155318260192871,
      "step": 33773
    },
    {
      "epoch": 0.000206134033203125,
      "step": 33773,
      "training_step_time": 0.705927848815918
    },
    {
      "epoch": 0.00020614013671875,
      "model_forward_time": 0.11896228790283203,
      "step": 33774
    },
    {
      "epoch": 0.00020614013671875,
      "step": 33774,
      "training_step_time": 0.5983905792236328
    },
    {
      "epoch": 0.000206146240234375,
      "model_forward_time": 0.11800575256347656,
      "step": 33775
    },
    {
      "epoch": 0.000206146240234375,
      "step": 33775,
      "training_step_time": 0.6375470161437988
    },
    {
      "epoch": 0.00020615234375,
      "model_forward_time": 0.12451863288879395,
      "step": 33776
    },
    {
      "epoch": 0.00020615234375,
      "step": 33776,
      "training_step_time": 0.6514358520507812
    },
    {
      "epoch": 0.000206158447265625,
      "model_forward_time": 0.11674332618713379,
      "step": 33777
    },
    {
      "epoch": 0.000206158447265625,
      "step": 33777,
      "training_step_time": 0.6871459484100342
    },
    {
      "epoch": 0.00020616455078125,
      "model_forward_time": 0.1217036247253418,
      "step": 33778
    },
    {
      "epoch": 0.00020616455078125,
      "step": 33778,
      "training_step_time": 0.6344661712646484
    },
    {
      "epoch": 0.000206170654296875,
      "model_forward_time": 0.11843180656433105,
      "step": 33779
    },
    {
      "epoch": 0.000206170654296875,
      "step": 33779,
      "training_step_time": 0.6644413471221924
    },
    {
      "epoch": 0.0002061767578125,
      "grad_norm": 0.12556768953800201,
      "learning_rate": 4.373333832178478e-05,
      "loss": 0.045,
      "step": 33780
    },
    {
      "epoch": 0.0002061767578125,
      "model_forward_time": 0.116363525390625,
      "step": 33780
    },
    {
      "epoch": 0.0002061767578125,
      "step": 33780,
      "training_step_time": 0.7160234451293945
    },
    {
      "epoch": 0.000206182861328125,
      "model_forward_time": 0.11997008323669434,
      "step": 33781
    },
    {
      "epoch": 0.000206182861328125,
      "step": 33781,
      "training_step_time": 0.663975715637207
    },
    {
      "epoch": 0.00020618896484375,
      "model_forward_time": 0.13400578498840332,
      "step": 33782
    },
    {
      "epoch": 0.00020618896484375,
      "step": 33782,
      "training_step_time": 0.7386143207550049
    },
    {
      "epoch": 0.000206195068359375,
      "model_forward_time": 0.11703109741210938,
      "step": 33783
    },
    {
      "epoch": 0.000206195068359375,
      "step": 33783,
      "training_step_time": 0.5763838291168213
    },
    {
      "epoch": 0.000206201171875,
      "model_forward_time": 0.121490478515625,
      "step": 33784
    },
    {
      "epoch": 0.000206201171875,
      "step": 33784,
      "training_step_time": 0.7590832710266113
    },
    {
      "epoch": 0.000206207275390625,
      "model_forward_time": 0.11995792388916016,
      "step": 33785
    },
    {
      "epoch": 0.000206207275390625,
      "step": 33785,
      "training_step_time": 0.6517729759216309
    },
    {
      "epoch": 0.00020621337890625,
      "model_forward_time": 0.1217658519744873,
      "step": 33786
    },
    {
      "epoch": 0.00020621337890625,
      "step": 33786,
      "training_step_time": 0.6540350914001465
    },
    {
      "epoch": 0.000206219482421875,
      "model_forward_time": 0.12677001953125,
      "step": 33787
    },
    {
      "epoch": 0.000206219482421875,
      "step": 33787,
      "training_step_time": 0.7501399517059326
    },
    {
      "epoch": 0.0002062255859375,
      "model_forward_time": 0.12022256851196289,
      "step": 33788
    },
    {
      "epoch": 0.0002062255859375,
      "step": 33788,
      "training_step_time": 0.647869348526001
    },
    {
      "epoch": 0.000206231689453125,
      "model_forward_time": 0.11819338798522949,
      "step": 33789
    },
    {
      "epoch": 0.000206231689453125,
      "step": 33789,
      "training_step_time": 0.6860337257385254
    },
    {
      "epoch": 0.00020623779296875,
      "grad_norm": 0.1000199168920517,
      "learning_rate": 4.370599874642055e-05,
      "loss": 0.044,
      "step": 33790
    },
    {
      "epoch": 0.00020623779296875,
      "model_forward_time": 0.11620664596557617,
      "step": 33790
    },
    {
      "epoch": 0.00020623779296875,
      "step": 33790,
      "training_step_time": 0.6946406364440918
    },
    {
      "epoch": 0.000206243896484375,
      "model_forward_time": 0.1339430809020996,
      "step": 33791
    },
    {
      "epoch": 0.000206243896484375,
      "step": 33791,
      "training_step_time": 0.6417734622955322
    },
    {
      "epoch": 0.00020625,
      "model_forward_time": 0.1183929443359375,
      "step": 33792
    },
    {
      "epoch": 0.00020625,
      "step": 33792,
      "training_step_time": 0.7360053062438965
    },
    {
      "epoch": 0.000206256103515625,
      "model_forward_time": 0.1189279556274414,
      "step": 33793
    },
    {
      "epoch": 0.000206256103515625,
      "step": 33793,
      "training_step_time": 0.5530264377593994
    },
    {
      "epoch": 0.00020626220703125,
      "model_forward_time": 0.11574172973632812,
      "step": 33794
    },
    {
      "epoch": 0.00020626220703125,
      "step": 33794,
      "training_step_time": 0.7388916015625
    },
    {
      "epoch": 0.000206268310546875,
      "model_forward_time": 0.11714553833007812,
      "step": 33795
    },
    {
      "epoch": 0.000206268310546875,
      "step": 33795,
      "training_step_time": 0.691213846206665
    },
    {
      "epoch": 0.0002062744140625,
      "model_forward_time": 0.11677122116088867,
      "step": 33796
    },
    {
      "epoch": 0.0002062744140625,
      "step": 33796,
      "training_step_time": 0.6280918121337891
    },
    {
      "epoch": 0.000206280517578125,
      "model_forward_time": 0.11823797225952148,
      "step": 33797
    },
    {
      "epoch": 0.000206280517578125,
      "step": 33797,
      "training_step_time": 0.6322226524353027
    },
    {
      "epoch": 0.00020628662109375,
      "model_forward_time": 0.12020134925842285,
      "step": 33798
    },
    {
      "epoch": 0.00020628662109375,
      "step": 33798,
      "training_step_time": 0.7097182273864746
    },
    {
      "epoch": 0.000206292724609375,
      "model_forward_time": 0.12238574028015137,
      "step": 33799
    },
    {
      "epoch": 0.000206292724609375,
      "step": 33799,
      "training_step_time": 0.6303434371948242
    },
    {
      "epoch": 0.000206298828125,
      "grad_norm": 0.1296117901802063,
      "learning_rate": 4.367866108300769e-05,
      "loss": 0.0508,
      "step": 33800
    },
    {
      "epoch": 0.000206298828125,
      "model_forward_time": 0.12789273262023926,
      "step": 33800
    },
    {
      "epoch": 0.000206298828125,
      "step": 33800,
      "training_step_time": 0.716050386428833
    },
    {
      "epoch": 0.000206304931640625,
      "model_forward_time": 0.11750268936157227,
      "step": 33801
    },
    {
      "epoch": 0.000206304931640625,
      "step": 33801,
      "training_step_time": 0.6928980350494385
    },
    {
      "epoch": 0.00020631103515625,
      "model_forward_time": 0.12029051780700684,
      "step": 33802
    },
    {
      "epoch": 0.00020631103515625,
      "step": 33802,
      "training_step_time": 0.6340386867523193
    },
    {
      "epoch": 0.000206317138671875,
      "model_forward_time": 0.12232828140258789,
      "step": 33803
    },
    {
      "epoch": 0.000206317138671875,
      "step": 33803,
      "training_step_time": 0.7455434799194336
    },
    {
      "epoch": 0.0002063232421875,
      "model_forward_time": 0.11756038665771484,
      "step": 33804
    },
    {
      "epoch": 0.0002063232421875,
      "step": 33804,
      "training_step_time": 0.6752064228057861
    },
    {
      "epoch": 0.000206329345703125,
      "model_forward_time": 0.11933445930480957,
      "step": 33805
    },
    {
      "epoch": 0.000206329345703125,
      "step": 33805,
      "training_step_time": 0.601201057434082
    },
    {
      "epoch": 0.00020633544921875,
      "model_forward_time": 0.1172943115234375,
      "step": 33806
    },
    {
      "epoch": 0.00020633544921875,
      "step": 33806,
      "training_step_time": 0.6160051822662354
    },
    {
      "epoch": 0.000206341552734375,
      "model_forward_time": 0.1178901195526123,
      "step": 33807
    },
    {
      "epoch": 0.000206341552734375,
      "step": 33807,
      "training_step_time": 0.5346558094024658
    },
    {
      "epoch": 0.00020634765625,
      "model_forward_time": 0.11965298652648926,
      "step": 33808
    },
    {
      "epoch": 0.00020634765625,
      "step": 33808,
      "training_step_time": 0.5057835578918457
    },
    {
      "epoch": 0.000206353759765625,
      "model_forward_time": 0.12372159957885742,
      "step": 33809
    },
    {
      "epoch": 0.000206353759765625,
      "step": 33809,
      "training_step_time": 0.5160396099090576
    },
    {
      "epoch": 0.00020635986328125,
      "grad_norm": 0.12771882116794586,
      "learning_rate": 4.365132533985071e-05,
      "loss": 0.0439,
      "step": 33810
    },
    {
      "epoch": 0.00020635986328125,
      "model_forward_time": 0.11801528930664062,
      "step": 33810
    },
    {
      "epoch": 0.00020635986328125,
      "step": 33810,
      "training_step_time": 0.5407123565673828
    },
    {
      "epoch": 0.000206365966796875,
      "model_forward_time": 0.1177983283996582,
      "step": 33811
    },
    {
      "epoch": 0.000206365966796875,
      "step": 33811,
      "training_step_time": 0.6082115173339844
    },
    {
      "epoch": 0.0002063720703125,
      "model_forward_time": 0.11724162101745605,
      "step": 33812
    },
    {
      "epoch": 0.0002063720703125,
      "step": 33812,
      "training_step_time": 0.489849328994751
    },
    {
      "epoch": 0.000206378173828125,
      "model_forward_time": 0.11599326133728027,
      "step": 33813
    },
    {
      "epoch": 0.000206378173828125,
      "step": 33813,
      "training_step_time": 0.43364977836608887
    },
    {
      "epoch": 0.00020638427734375,
      "model_forward_time": 0.11886453628540039,
      "step": 33814
    },
    {
      "epoch": 0.00020638427734375,
      "step": 33814,
      "training_step_time": 0.4013662338256836
    },
    {
      "epoch": 0.000206390380859375,
      "model_forward_time": 0.1174924373626709,
      "step": 33815
    },
    {
      "epoch": 0.000206390380859375,
      "step": 33815,
      "training_step_time": 0.4286632537841797
    },
    {
      "epoch": 0.000206396484375,
      "model_forward_time": 0.11530876159667969,
      "step": 33816
    },
    {
      "epoch": 0.000206396484375,
      "step": 33816,
      "training_step_time": 0.5205562114715576
    },
    {
      "epoch": 0.000206402587890625,
      "model_forward_time": 0.11705493927001953,
      "step": 33817
    },
    {
      "epoch": 0.000206402587890625,
      "step": 33817,
      "training_step_time": 0.45830845832824707
    },
    {
      "epoch": 0.00020640869140625,
      "model_forward_time": 0.11567282676696777,
      "step": 33818
    },
    {
      "epoch": 0.00020640869140625,
      "step": 33818,
      "training_step_time": 0.508669376373291
    },
    {
      "epoch": 0.000206414794921875,
      "model_forward_time": 0.11498141288757324,
      "step": 33819
    },
    {
      "epoch": 0.000206414794921875,
      "step": 33819,
      "training_step_time": 0.3977851867675781
    },
    {
      "epoch": 0.0002064208984375,
      "grad_norm": 0.12973305583000183,
      "learning_rate": 4.362399152525344e-05,
      "loss": 0.0444,
      "step": 33820
    },
    {
      "epoch": 0.0002064208984375,
      "model_forward_time": 0.11603522300720215,
      "step": 33820
    },
    {
      "epoch": 0.0002064208984375,
      "step": 33820,
      "training_step_time": 0.38568639755249023
    },
    {
      "epoch": 0.000206427001953125,
      "model_forward_time": 0.11469388008117676,
      "step": 33821
    },
    {
      "epoch": 0.000206427001953125,
      "step": 33821,
      "training_step_time": 0.3838622570037842
    },
    {
      "epoch": 0.00020643310546875,
      "model_forward_time": 0.11595892906188965,
      "step": 33822
    },
    {
      "epoch": 0.00020643310546875,
      "step": 33822,
      "training_step_time": 0.39359545707702637
    },
    {
      "epoch": 0.000206439208984375,
      "model_forward_time": 0.11517119407653809,
      "step": 33823
    },
    {
      "epoch": 0.000206439208984375,
      "step": 33823,
      "training_step_time": 0.3979485034942627
    },
    {
      "epoch": 0.0002064453125,
      "model_forward_time": 0.1161189079284668,
      "step": 33824
    },
    {
      "epoch": 0.0002064453125,
      "step": 33824,
      "training_step_time": 0.39893031120300293
    },
    {
      "epoch": 0.000206451416015625,
      "model_forward_time": 0.11645889282226562,
      "step": 33825
    },
    {
      "epoch": 0.000206451416015625,
      "step": 33825,
      "training_step_time": 0.44328880310058594
    },
    {
      "epoch": 0.00020645751953125,
      "model_forward_time": 0.11530661582946777,
      "step": 33826
    },
    {
      "epoch": 0.00020645751953125,
      "step": 33826,
      "training_step_time": 0.4796929359436035
    },
    {
      "epoch": 0.000206463623046875,
      "model_forward_time": 0.11467432975769043,
      "step": 33827
    },
    {
      "epoch": 0.000206463623046875,
      "step": 33827,
      "training_step_time": 0.3811664581298828
    },
    {
      "epoch": 0.0002064697265625,
      "model_forward_time": 0.11545753479003906,
      "step": 33828
    },
    {
      "epoch": 0.0002064697265625,
      "step": 33828,
      "training_step_time": 0.39824652671813965
    },
    {
      "epoch": 0.000206475830078125,
      "model_forward_time": 0.11569499969482422,
      "step": 33829
    },
    {
      "epoch": 0.000206475830078125,
      "step": 33829,
      "training_step_time": 0.40432000160217285
    },
    {
      "epoch": 0.00020648193359375,
      "grad_norm": 0.14667095243930817,
      "learning_rate": 4.35966596475192e-05,
      "loss": 0.0436,
      "step": 33830
    },
    {
      "epoch": 0.00020648193359375,
      "model_forward_time": 0.11554813385009766,
      "step": 33830
    },
    {
      "epoch": 0.00020648193359375,
      "step": 33830,
      "training_step_time": 0.4892578125
    },
    {
      "epoch": 0.000206488037109375,
      "model_forward_time": 0.11662697792053223,
      "step": 33831
    },
    {
      "epoch": 0.000206488037109375,
      "step": 33831,
      "training_step_time": 0.43041062355041504
    },
    {
      "epoch": 0.000206494140625,
      "model_forward_time": 0.1147310733795166,
      "step": 33832
    },
    {
      "epoch": 0.000206494140625,
      "step": 33832,
      "training_step_time": 0.46251440048217773
    },
    {
      "epoch": 0.000206500244140625,
      "model_forward_time": 0.11474823951721191,
      "step": 33833
    },
    {
      "epoch": 0.000206500244140625,
      "step": 33833,
      "training_step_time": 0.40741634368896484
    },
    {
      "epoch": 0.00020650634765625,
      "model_forward_time": 0.11522841453552246,
      "step": 33834
    },
    {
      "epoch": 0.00020650634765625,
      "step": 33834,
      "training_step_time": 0.3938636779785156
    },
    {
      "epoch": 0.000206512451171875,
      "model_forward_time": 0.11500191688537598,
      "step": 33835
    },
    {
      "epoch": 0.000206512451171875,
      "step": 33835,
      "training_step_time": 0.3922078609466553
    },
    {
      "epoch": 0.0002065185546875,
      "model_forward_time": 0.11562633514404297,
      "step": 33836
    },
    {
      "epoch": 0.0002065185546875,
      "step": 33836,
      "training_step_time": 0.3907027244567871
    },
    {
      "epoch": 0.000206524658203125,
      "model_forward_time": 0.11504197120666504,
      "step": 33837
    },
    {
      "epoch": 0.000206524658203125,
      "step": 33837,
      "training_step_time": 0.39097142219543457
    },
    {
      "epoch": 0.00020653076171875,
      "model_forward_time": 0.1153717041015625,
      "step": 33838
    },
    {
      "epoch": 0.00020653076171875,
      "step": 33838,
      "training_step_time": 0.394672155380249
    },
    {
      "epoch": 0.000206536865234375,
      "model_forward_time": 0.11504912376403809,
      "step": 33839
    },
    {
      "epoch": 0.000206536865234375,
      "step": 33839,
      "training_step_time": 0.4884974956512451
    },
    {
      "epoch": 0.00020654296875,
      "grad_norm": 0.15034587681293488,
      "learning_rate": 4.3569329714950704e-05,
      "loss": 0.0471,
      "step": 33840
    },
    {
      "epoch": 0.00020654296875,
      "model_forward_time": 0.11495137214660645,
      "step": 33840
    },
    {
      "epoch": 0.00020654296875,
      "step": 33840,
      "training_step_time": 0.41442012786865234
    },
    {
      "epoch": 0.000206549072265625,
      "model_forward_time": 0.1154623031616211,
      "step": 33841
    },
    {
      "epoch": 0.000206549072265625,
      "step": 33841,
      "training_step_time": 0.5176641941070557
    },
    {
      "epoch": 0.00020655517578125,
      "model_forward_time": 0.11450910568237305,
      "step": 33842
    },
    {
      "epoch": 0.00020655517578125,
      "step": 33842,
      "training_step_time": 0.39275550842285156
    },
    {
      "epoch": 0.000206561279296875,
      "model_forward_time": 0.11632156372070312,
      "step": 33843
    },
    {
      "epoch": 0.000206561279296875,
      "step": 33843,
      "training_step_time": 0.3864114284515381
    },
    {
      "epoch": 0.0002065673828125,
      "model_forward_time": 0.11528635025024414,
      "step": 33844
    },
    {
      "epoch": 0.0002065673828125,
      "step": 33844,
      "training_step_time": 0.49904322624206543
    },
    {
      "epoch": 0.000206573486328125,
      "model_forward_time": 0.11492156982421875,
      "step": 33845
    },
    {
      "epoch": 0.000206573486328125,
      "step": 33845,
      "training_step_time": 0.49552130699157715
    },
    {
      "epoch": 0.00020657958984375,
      "model_forward_time": 0.11511588096618652,
      "step": 33846
    },
    {
      "epoch": 0.00020657958984375,
      "step": 33846,
      "training_step_time": 0.43047547340393066
    },
    {
      "epoch": 0.000206585693359375,
      "model_forward_time": 0.11600732803344727,
      "step": 33847
    },
    {
      "epoch": 0.000206585693359375,
      "step": 33847,
      "training_step_time": 0.40847182273864746
    },
    {
      "epoch": 0.000206591796875,
      "model_forward_time": 0.11426091194152832,
      "step": 33848
    },
    {
      "epoch": 0.000206591796875,
      "step": 33848,
      "training_step_time": 0.3823049068450928
    },
    {
      "epoch": 0.000206597900390625,
      "model_forward_time": 0.11642765998840332,
      "step": 33849
    },
    {
      "epoch": 0.000206597900390625,
      "step": 33849,
      "training_step_time": 0.3905220031738281
    },
    {
      "epoch": 0.00020660400390625,
      "grad_norm": 0.10458351671695709,
      "learning_rate": 4.3542001735850034e-05,
      "loss": 0.0439,
      "step": 33850
    },
    {
      "epoch": 0.00020660400390625,
      "model_forward_time": 0.11463570594787598,
      "step": 33850
    },
    {
      "epoch": 0.00020660400390625,
      "step": 33850,
      "training_step_time": 0.4028129577636719
    },
    {
      "epoch": 0.000206610107421875,
      "model_forward_time": 0.11507320404052734,
      "step": 33851
    },
    {
      "epoch": 0.000206610107421875,
      "step": 33851,
      "training_step_time": 0.3894014358520508
    },
    {
      "epoch": 0.0002066162109375,
      "model_forward_time": 0.11570239067077637,
      "step": 33852
    },
    {
      "epoch": 0.0002066162109375,
      "step": 33852,
      "training_step_time": 0.3936491012573242
    },
    {
      "epoch": 0.000206622314453125,
      "model_forward_time": 0.11563277244567871,
      "step": 33853
    },
    {
      "epoch": 0.000206622314453125,
      "step": 33853,
      "training_step_time": 0.3981595039367676
    },
    {
      "epoch": 0.00020662841796875,
      "model_forward_time": 0.11595749855041504,
      "step": 33854
    },
    {
      "epoch": 0.00020662841796875,
      "step": 33854,
      "training_step_time": 0.5171372890472412
    },
    {
      "epoch": 0.000206634521484375,
      "model_forward_time": 0.11492276191711426,
      "step": 33855
    },
    {
      "epoch": 0.000206634521484375,
      "step": 33855,
      "training_step_time": 0.4990854263305664
    },
    {
      "epoch": 0.000206640625,
      "model_forward_time": 0.11528682708740234,
      "step": 33856
    },
    {
      "epoch": 0.000206640625,
      "step": 33856,
      "training_step_time": 0.3923838138580322
    },
    {
      "epoch": 0.000206646728515625,
      "model_forward_time": 0.11509013175964355,
      "step": 33857
    },
    {
      "epoch": 0.000206646728515625,
      "step": 33857,
      "training_step_time": 0.38852787017822266
    },
    {
      "epoch": 0.00020665283203125,
      "model_forward_time": 0.11555147171020508,
      "step": 33858
    },
    {
      "epoch": 0.00020665283203125,
      "step": 33858,
      "training_step_time": 0.4344625473022461
    },
    {
      "epoch": 0.000206658935546875,
      "model_forward_time": 0.1149892807006836,
      "step": 33859
    },
    {
      "epoch": 0.000206658935546875,
      "step": 33859,
      "training_step_time": 0.4044766426086426
    },
    {
      "epoch": 0.0002066650390625,
      "grad_norm": 0.10579092055559158,
      "learning_rate": 4.3514675718518734e-05,
      "loss": 0.0406,
      "step": 33860
    },
    {
      "epoch": 0.0002066650390625,
      "model_forward_time": 0.11521530151367188,
      "step": 33860
    },
    {
      "epoch": 0.0002066650390625,
      "step": 33860,
      "training_step_time": 0.49411797523498535
    },
    {
      "epoch": 0.000206671142578125,
      "model_forward_time": 0.1158590316772461,
      "step": 33861
    },
    {
      "epoch": 0.000206671142578125,
      "step": 33861,
      "training_step_time": 0.3937859535217285
    },
    {
      "epoch": 0.00020667724609375,
      "model_forward_time": 0.11495137214660645,
      "step": 33862
    },
    {
      "epoch": 0.00020667724609375,
      "step": 33862,
      "training_step_time": 0.39748311042785645
    },
    {
      "epoch": 0.000206683349609375,
      "model_forward_time": 0.11486363410949707,
      "step": 33863
    },
    {
      "epoch": 0.000206683349609375,
      "step": 33863,
      "training_step_time": 0.39142918586730957
    },
    {
      "epoch": 0.000206689453125,
      "model_forward_time": 0.11531782150268555,
      "step": 33864
    },
    {
      "epoch": 0.000206689453125,
      "step": 33864,
      "training_step_time": 0.39539146423339844
    },
    {
      "epoch": 0.000206695556640625,
      "model_forward_time": 0.1152188777923584,
      "step": 33865
    },
    {
      "epoch": 0.000206695556640625,
      "step": 33865,
      "training_step_time": 0.3970670700073242
    },
    {
      "epoch": 0.00020670166015625,
      "model_forward_time": 0.11565685272216797,
      "step": 33866
    },
    {
      "epoch": 0.00020670166015625,
      "step": 33866,
      "training_step_time": 0.40113401412963867
    },
    {
      "epoch": 0.000206707763671875,
      "model_forward_time": 0.11568284034729004,
      "step": 33867
    },
    {
      "epoch": 0.000206707763671875,
      "step": 33867,
      "training_step_time": 0.3929290771484375
    },
    {
      "epoch": 0.0002067138671875,
      "model_forward_time": 0.11544251441955566,
      "step": 33868
    },
    {
      "epoch": 0.0002067138671875,
      "step": 33868,
      "training_step_time": 0.4129476547241211
    },
    {
      "epoch": 0.000206719970703125,
      "model_forward_time": 0.11532354354858398,
      "step": 33869
    },
    {
      "epoch": 0.000206719970703125,
      "step": 33869,
      "training_step_time": 0.48101091384887695
    },
    {
      "epoch": 0.00020672607421875,
      "grad_norm": 0.09819471091032028,
      "learning_rate": 4.348735167125771e-05,
      "loss": 0.0397,
      "step": 33870
    },
    {
      "epoch": 0.00020672607421875,
      "model_forward_time": 0.11560535430908203,
      "step": 33870
    },
    {
      "epoch": 0.00020672607421875,
      "step": 33870,
      "training_step_time": 0.4591047763824463
    },
    {
      "epoch": 0.000206732177734375,
      "model_forward_time": 0.11520934104919434,
      "step": 33871
    },
    {
      "epoch": 0.000206732177734375,
      "step": 33871,
      "training_step_time": 0.40639424324035645
    },
    {
      "epoch": 0.00020673828125,
      "model_forward_time": 0.11526608467102051,
      "step": 33872
    },
    {
      "epoch": 0.00020673828125,
      "step": 33872,
      "training_step_time": 0.406280517578125
    },
    {
      "epoch": 0.000206744384765625,
      "model_forward_time": 0.11584305763244629,
      "step": 33873
    },
    {
      "epoch": 0.000206744384765625,
      "step": 33873,
      "training_step_time": 0.46503257751464844
    },
    {
      "epoch": 0.00020675048828125,
      "model_forward_time": 0.11581754684448242,
      "step": 33874
    },
    {
      "epoch": 0.00020675048828125,
      "step": 33874,
      "training_step_time": 0.4244875907897949
    },
    {
      "epoch": 0.000206756591796875,
      "model_forward_time": 0.11488127708435059,
      "step": 33875
    },
    {
      "epoch": 0.000206756591796875,
      "step": 33875,
      "training_step_time": 0.4952700138092041
    },
    {
      "epoch": 0.0002067626953125,
      "model_forward_time": 0.11504721641540527,
      "step": 33876
    },
    {
      "epoch": 0.0002067626953125,
      "step": 33876,
      "training_step_time": 0.4082627296447754
    },
    {
      "epoch": 0.000206768798828125,
      "model_forward_time": 0.11504054069519043,
      "step": 33877
    },
    {
      "epoch": 0.000206768798828125,
      "step": 33877,
      "training_step_time": 0.38698577880859375
    },
    {
      "epoch": 0.00020677490234375,
      "model_forward_time": 0.11484932899475098,
      "step": 33878
    },
    {
      "epoch": 0.00020677490234375,
      "step": 33878,
      "training_step_time": 0.3959527015686035
    },
    {
      "epoch": 0.000206781005859375,
      "model_forward_time": 0.11504578590393066,
      "step": 33879
    },
    {
      "epoch": 0.000206781005859375,
      "step": 33879,
      "training_step_time": 0.3902106285095215
    },
    {
      "epoch": 0.000206787109375,
      "grad_norm": 0.1426604986190796,
      "learning_rate": 4.3460029602367284e-05,
      "loss": 0.0433,
      "step": 33880
    },
    {
      "epoch": 0.000206787109375,
      "model_forward_time": 0.11553287506103516,
      "step": 33880
    },
    {
      "epoch": 0.000206787109375,
      "step": 33880,
      "training_step_time": 0.39691972732543945
    },
    {
      "epoch": 0.000206793212890625,
      "model_forward_time": 0.11565756797790527,
      "step": 33881
    },
    {
      "epoch": 0.000206793212890625,
      "step": 33881,
      "training_step_time": 0.39055395126342773
    },
    {
      "epoch": 0.00020679931640625,
      "model_forward_time": 0.11557602882385254,
      "step": 33882
    },
    {
      "epoch": 0.00020679931640625,
      "step": 33882,
      "training_step_time": 0.3901546001434326
    },
    {
      "epoch": 0.000206805419921875,
      "model_forward_time": 0.11507344245910645,
      "step": 33883
    },
    {
      "epoch": 0.000206805419921875,
      "step": 33883,
      "training_step_time": 0.40007710456848145
    },
    {
      "epoch": 0.0002068115234375,
      "model_forward_time": 0.11549162864685059,
      "step": 33884
    },
    {
      "epoch": 0.0002068115234375,
      "step": 33884,
      "training_step_time": 0.4308280944824219
    },
    {
      "epoch": 0.000206817626953125,
      "model_forward_time": 0.11574769020080566,
      "step": 33885
    },
    {
      "epoch": 0.000206817626953125,
      "step": 33885,
      "training_step_time": 0.5079543590545654
    },
    {
      "epoch": 0.00020682373046875,
      "model_forward_time": 0.11504578590393066,
      "step": 33886
    },
    {
      "epoch": 0.00020682373046875,
      "step": 33886,
      "training_step_time": 0.3855407238006592
    },
    {
      "epoch": 0.000206829833984375,
      "model_forward_time": 0.11898636817932129,
      "step": 33887
    },
    {
      "epoch": 0.000206829833984375,
      "step": 33887,
      "training_step_time": 0.37132883071899414
    },
    {
      "epoch": 0.0002068359375,
      "model_forward_time": 0.11528754234313965,
      "step": 33888
    },
    {
      "epoch": 0.0002068359375,
      "step": 33888,
      "training_step_time": 0.479320764541626
    },
    {
      "epoch": 0.000206842041015625,
      "model_forward_time": 0.11849164962768555,
      "step": 33889
    },
    {
      "epoch": 0.000206842041015625,
      "step": 33889,
      "training_step_time": 0.5060980319976807
    },
    {
      "epoch": 0.00020684814453125,
      "grad_norm": 0.10364055633544922,
      "learning_rate": 4.3432709520147205e-05,
      "loss": 0.0425,
      "step": 33890
    },
    {
      "epoch": 0.00020684814453125,
      "model_forward_time": 0.11846661567687988,
      "step": 33890
    },
    {
      "epoch": 0.00020684814453125,
      "step": 33890,
      "training_step_time": 0.4049811363220215
    },
    {
      "epoch": 0.000206854248046875,
      "model_forward_time": 0.11488699913024902,
      "step": 33891
    },
    {
      "epoch": 0.000206854248046875,
      "step": 33891,
      "training_step_time": 0.38814783096313477
    },
    {
      "epoch": 0.0002068603515625,
      "model_forward_time": 0.11486363410949707,
      "step": 33892
    },
    {
      "epoch": 0.0002068603515625,
      "step": 33892,
      "training_step_time": 0.3957066535949707
    },
    {
      "epoch": 0.000206866455078125,
      "model_forward_time": 0.11464667320251465,
      "step": 33893
    },
    {
      "epoch": 0.000206866455078125,
      "step": 33893,
      "training_step_time": 0.39276838302612305
    },
    {
      "epoch": 0.00020687255859375,
      "model_forward_time": 0.11534285545349121,
      "step": 33894
    },
    {
      "epoch": 0.00020687255859375,
      "step": 33894,
      "training_step_time": 0.3956475257873535
    },
    {
      "epoch": 0.000206878662109375,
      "model_forward_time": 0.11468815803527832,
      "step": 33895
    },
    {
      "epoch": 0.000206878662109375,
      "step": 33895,
      "training_step_time": 0.39808011054992676
    },
    {
      "epoch": 0.000206884765625,
      "model_forward_time": 0.11534619331359863,
      "step": 33896
    },
    {
      "epoch": 0.000206884765625,
      "step": 33896,
      "training_step_time": 0.3874034881591797
    },
    {
      "epoch": 0.000206890869140625,
      "model_forward_time": 0.11536288261413574,
      "step": 33897
    },
    {
      "epoch": 0.000206890869140625,
      "step": 33897,
      "training_step_time": 0.3963510990142822
    },
    {
      "epoch": 0.00020689697265625,
      "model_forward_time": 0.1154637336730957,
      "step": 33898
    },
    {
      "epoch": 0.00020689697265625,
      "step": 33898,
      "training_step_time": 0.3956582546234131
    },
    {
      "epoch": 0.000206903076171875,
      "model_forward_time": 0.11547994613647461,
      "step": 33899
    },
    {
      "epoch": 0.000206903076171875,
      "step": 33899,
      "training_step_time": 0.4218928813934326
    },
    {
      "epoch": 0.0002069091796875,
      "grad_norm": 0.10410003364086151,
      "learning_rate": 4.3405391432896555e-05,
      "loss": 0.0452,
      "step": 33900
    },
    {
      "epoch": 0.0002069091796875,
      "model_forward_time": 0.11551094055175781,
      "step": 33900
    },
    {
      "epoch": 0.0002069091796875,
      "step": 33900,
      "training_step_time": 0.6028735637664795
    },
    {
      "epoch": 0.000206915283203125,
      "model_forward_time": 0.11509275436401367,
      "step": 33901
    },
    {
      "epoch": 0.000206915283203125,
      "step": 33901,
      "training_step_time": 0.41080284118652344
    },
    {
      "epoch": 0.00020692138671875,
      "model_forward_time": 0.11481046676635742,
      "step": 33902
    },
    {
      "epoch": 0.00020692138671875,
      "step": 33902,
      "training_step_time": 0.47986769676208496
    },
    {
      "epoch": 0.000206927490234375,
      "model_forward_time": 0.11519145965576172,
      "step": 33903
    },
    {
      "epoch": 0.000206927490234375,
      "step": 33903,
      "training_step_time": 0.41759252548217773
    },
    {
      "epoch": 0.00020693359375,
      "model_forward_time": 0.11480093002319336,
      "step": 33904
    },
    {
      "epoch": 0.00020693359375,
      "step": 33904,
      "training_step_time": 0.48511385917663574
    },
    {
      "epoch": 0.000206939697265625,
      "model_forward_time": 0.11499786376953125,
      "step": 33905
    },
    {
      "epoch": 0.000206939697265625,
      "step": 33905,
      "training_step_time": 0.38913774490356445
    },
    {
      "epoch": 0.00020694580078125,
      "model_forward_time": 0.11548781394958496,
      "step": 33906
    },
    {
      "epoch": 0.00020694580078125,
      "step": 33906,
      "training_step_time": 0.4976527690887451
    },
    {
      "epoch": 0.000206951904296875,
      "model_forward_time": 0.11510634422302246,
      "step": 33907
    },
    {
      "epoch": 0.000206951904296875,
      "step": 33907,
      "training_step_time": 0.39089250564575195
    },
    {
      "epoch": 0.0002069580078125,
      "model_forward_time": 0.11510777473449707,
      "step": 33908
    },
    {
      "epoch": 0.0002069580078125,
      "step": 33908,
      "training_step_time": 0.381976842880249
    },
    {
      "epoch": 0.000206964111328125,
      "model_forward_time": 0.11548209190368652,
      "step": 33909
    },
    {
      "epoch": 0.000206964111328125,
      "step": 33909,
      "training_step_time": 0.3811028003692627
    },
    {
      "epoch": 0.00020697021484375,
      "grad_norm": 0.10560924559831619,
      "learning_rate": 4.3378075348913886e-05,
      "loss": 0.0422,
      "step": 33910
    },
    {
      "epoch": 0.00020697021484375,
      "model_forward_time": 0.11467289924621582,
      "step": 33910
    },
    {
      "epoch": 0.00020697021484375,
      "step": 33910,
      "training_step_time": 0.39293622970581055
    },
    {
      "epoch": 0.000206976318359375,
      "model_forward_time": 0.1160287857055664,
      "step": 33911
    },
    {
      "epoch": 0.000206976318359375,
      "step": 33911,
      "training_step_time": 0.3904578685760498
    },
    {
      "epoch": 0.000206982421875,
      "model_forward_time": 0.11502504348754883,
      "step": 33912
    },
    {
      "epoch": 0.000206982421875,
      "step": 33912,
      "training_step_time": 0.5998294353485107
    },
    {
      "epoch": 0.000206988525390625,
      "model_forward_time": 0.11492800712585449,
      "step": 33913
    },
    {
      "epoch": 0.000206988525390625,
      "step": 33913,
      "training_step_time": 0.4413771629333496
    },
    {
      "epoch": 0.00020699462890625,
      "model_forward_time": 0.11538362503051758,
      "step": 33914
    },
    {
      "epoch": 0.00020699462890625,
      "step": 33914,
      "training_step_time": 0.4009366035461426
    },
    {
      "epoch": 0.000207000732421875,
      "model_forward_time": 0.11504220962524414,
      "step": 33915
    },
    {
      "epoch": 0.000207000732421875,
      "step": 33915,
      "training_step_time": 0.3922998905181885
    },
    {
      "epoch": 0.0002070068359375,
      "model_forward_time": 0.11438822746276855,
      "step": 33916
    },
    {
      "epoch": 0.0002070068359375,
      "step": 33916,
      "training_step_time": 0.3642144203186035
    },
    {
      "epoch": 0.000207012939453125,
      "model_forward_time": 0.1146245002746582,
      "step": 33917
    },
    {
      "epoch": 0.000207012939453125,
      "step": 33917,
      "training_step_time": 0.47783613204956055
    },
    {
      "epoch": 0.00020701904296875,
      "model_forward_time": 0.11479425430297852,
      "step": 33918
    },
    {
      "epoch": 0.00020701904296875,
      "step": 33918,
      "training_step_time": 0.491274356842041
    },
    {
      "epoch": 0.000207025146484375,
      "model_forward_time": 0.11517500877380371,
      "step": 33919
    },
    {
      "epoch": 0.000207025146484375,
      "step": 33919,
      "training_step_time": 0.3916959762573242
    },
    {
      "epoch": 0.00020703125,
      "grad_norm": 0.09006021171808243,
      "learning_rate": 4.335076127649707e-05,
      "loss": 0.0412,
      "step": 33920
    },
    {
      "epoch": 0.00020703125,
      "model_forward_time": 0.11521053314208984,
      "step": 33920
    },
    {
      "epoch": 0.00020703125,
      "step": 33920,
      "training_step_time": 0.3964579105377197
    },
    {
      "epoch": 0.000207037353515625,
      "model_forward_time": 0.11464762687683105,
      "step": 33921
    },
    {
      "epoch": 0.000207037353515625,
      "step": 33921,
      "training_step_time": 0.38440680503845215
    },
    {
      "epoch": 0.00020704345703125,
      "model_forward_time": 0.11514472961425781,
      "step": 33922
    },
    {
      "epoch": 0.00020704345703125,
      "step": 33922,
      "training_step_time": 0.38834381103515625
    },
    {
      "epoch": 0.000207049560546875,
      "model_forward_time": 0.11610841751098633,
      "step": 33923
    },
    {
      "epoch": 0.000207049560546875,
      "step": 33923,
      "training_step_time": 0.39647626876831055
    },
    {
      "epoch": 0.0002070556640625,
      "model_forward_time": 0.11507463455200195,
      "step": 33924
    },
    {
      "epoch": 0.0002070556640625,
      "step": 33924,
      "training_step_time": 0.5049841403961182
    },
    {
      "epoch": 0.000207061767578125,
      "model_forward_time": 0.11558341979980469,
      "step": 33925
    },
    {
      "epoch": 0.000207061767578125,
      "step": 33925,
      "training_step_time": 0.3889768123626709
    },
    {
      "epoch": 0.00020706787109375,
      "model_forward_time": 0.11463665962219238,
      "step": 33926
    },
    {
      "epoch": 0.00020706787109375,
      "step": 33926,
      "training_step_time": 0.41287922859191895
    },
    {
      "epoch": 0.000207073974609375,
      "model_forward_time": 0.1150813102722168,
      "step": 33927
    },
    {
      "epoch": 0.000207073974609375,
      "step": 33927,
      "training_step_time": 0.42438840866088867
    },
    {
      "epoch": 0.000207080078125,
      "model_forward_time": 0.11565327644348145,
      "step": 33928
    },
    {
      "epoch": 0.000207080078125,
      "step": 33928,
      "training_step_time": 0.41565752029418945
    },
    {
      "epoch": 0.000207086181640625,
      "model_forward_time": 0.11544942855834961,
      "step": 33929
    },
    {
      "epoch": 0.000207086181640625,
      "step": 33929,
      "training_step_time": 0.38854002952575684
    },
    {
      "epoch": 0.00020709228515625,
      "grad_norm": 0.12006540596485138,
      "learning_rate": 4.3323449223943416e-05,
      "loss": 0.0402,
      "step": 33930
    },
    {
      "epoch": 0.00020709228515625,
      "model_forward_time": 0.11510181427001953,
      "step": 33930
    },
    {
      "epoch": 0.00020709228515625,
      "step": 33930,
      "training_step_time": 0.6530416011810303
    },
    {
      "epoch": 0.000207098388671875,
      "model_forward_time": 0.11508846282958984,
      "step": 33931
    },
    {
      "epoch": 0.000207098388671875,
      "step": 33931,
      "training_step_time": 0.4525623321533203
    },
    {
      "epoch": 0.0002071044921875,
      "model_forward_time": 0.11514782905578613,
      "step": 33932
    },
    {
      "epoch": 0.0002071044921875,
      "step": 33932,
      "training_step_time": 0.4289367198944092
    },
    {
      "epoch": 0.000207110595703125,
      "model_forward_time": 0.11546993255615234,
      "step": 33933
    },
    {
      "epoch": 0.000207110595703125,
      "step": 33933,
      "training_step_time": 0.3889303207397461
    },
    {
      "epoch": 0.00020711669921875,
      "model_forward_time": 0.11439847946166992,
      "step": 33934
    },
    {
      "epoch": 0.00020711669921875,
      "step": 33934,
      "training_step_time": 0.38636255264282227
    },
    {
      "epoch": 0.000207122802734375,
      "model_forward_time": 0.11492538452148438,
      "step": 33935
    },
    {
      "epoch": 0.000207122802734375,
      "step": 33935,
      "training_step_time": 0.4000680446624756
    },
    {
      "epoch": 0.00020712890625,
      "model_forward_time": 0.1149744987487793,
      "step": 33936
    },
    {
      "epoch": 0.00020712890625,
      "step": 33936,
      "training_step_time": 0.5483219623565674
    },
    {
      "epoch": 0.000207135009765625,
      "model_forward_time": 0.11487603187561035,
      "step": 33937
    },
    {
      "epoch": 0.000207135009765625,
      "step": 33937,
      "training_step_time": 0.4065086841583252
    },
    {
      "epoch": 0.00020714111328125,
      "model_forward_time": 0.11538362503051758,
      "step": 33938
    },
    {
      "epoch": 0.00020714111328125,
      "step": 33938,
      "training_step_time": 0.3902101516723633
    },
    {
      "epoch": 0.000207147216796875,
      "model_forward_time": 0.11511802673339844,
      "step": 33939
    },
    {
      "epoch": 0.000207147216796875,
      "step": 33939,
      "training_step_time": 0.3985123634338379
    },
    {
      "epoch": 0.0002071533203125,
      "grad_norm": 0.08754310756921768,
      "learning_rate": 4.329613919954962e-05,
      "loss": 0.0395,
      "step": 33940
    },
    {
      "epoch": 0.0002071533203125,
      "model_forward_time": 0.11589670181274414,
      "step": 33940
    },
    {
      "epoch": 0.0002071533203125,
      "step": 33940,
      "training_step_time": 0.40700817108154297
    },
    {
      "epoch": 0.000207159423828125,
      "model_forward_time": 0.11476397514343262,
      "step": 33941
    },
    {
      "epoch": 0.000207159423828125,
      "step": 33941,
      "training_step_time": 0.41693663597106934
    },
    {
      "epoch": 0.00020716552734375,
      "model_forward_time": 0.11539483070373535,
      "step": 33942
    },
    {
      "epoch": 0.00020716552734375,
      "step": 33942,
      "training_step_time": 0.6475198268890381
    },
    {
      "epoch": 0.000207171630859375,
      "model_forward_time": 0.11462783813476562,
      "step": 33943
    },
    {
      "epoch": 0.000207171630859375,
      "step": 33943,
      "training_step_time": 0.39634203910827637
    },
    {
      "epoch": 0.000207177734375,
      "model_forward_time": 0.11494731903076172,
      "step": 33944
    },
    {
      "epoch": 0.000207177734375,
      "step": 33944,
      "training_step_time": 0.4471883773803711
    },
    {
      "epoch": 0.000207183837890625,
      "model_forward_time": 0.1149590015411377,
      "step": 33945
    },
    {
      "epoch": 0.000207183837890625,
      "step": 33945,
      "training_step_time": 0.4343271255493164
    },
    {
      "epoch": 0.00020718994140625,
      "model_forward_time": 0.1147618293762207,
      "step": 33946
    },
    {
      "epoch": 0.00020718994140625,
      "step": 33946,
      "training_step_time": 0.4868910312652588
    },
    {
      "epoch": 0.000207196044921875,
      "model_forward_time": 0.11426067352294922,
      "step": 33947
    },
    {
      "epoch": 0.000207196044921875,
      "step": 33947,
      "training_step_time": 0.38931751251220703
    },
    {
      "epoch": 0.0002072021484375,
      "model_forward_time": 0.1148841381072998,
      "step": 33948
    },
    {
      "epoch": 0.0002072021484375,
      "step": 33948,
      "training_step_time": 0.44902896881103516
    },
    {
      "epoch": 0.000207208251953125,
      "model_forward_time": 0.11502885818481445,
      "step": 33949
    },
    {
      "epoch": 0.000207208251953125,
      "step": 33949,
      "training_step_time": 0.3939547538757324
    },
    {
      "epoch": 0.00020721435546875,
      "grad_norm": 0.1556338667869568,
      "learning_rate": 4.3268831211611707e-05,
      "loss": 0.0455,
      "step": 33950
    },
    {
      "epoch": 0.00020721435546875,
      "model_forward_time": 0.11602377891540527,
      "step": 33950
    },
    {
      "epoch": 0.00020721435546875,
      "step": 33950,
      "training_step_time": 0.3907902240753174
    },
    {
      "epoch": 0.000207220458984375,
      "model_forward_time": 0.11479997634887695,
      "step": 33951
    },
    {
      "epoch": 0.000207220458984375,
      "step": 33951,
      "training_step_time": 0.4035649299621582
    },
    {
      "epoch": 0.0002072265625,
      "model_forward_time": 0.11549592018127441,
      "step": 33952
    },
    {
      "epoch": 0.0002072265625,
      "step": 33952,
      "training_step_time": 0.3928966522216797
    },
    {
      "epoch": 0.000207232666015625,
      "model_forward_time": 0.114898681640625,
      "step": 33953
    },
    {
      "epoch": 0.000207232666015625,
      "step": 33953,
      "training_step_time": 0.41248512268066406
    },
    {
      "epoch": 0.00020723876953125,
      "model_forward_time": 0.11569094657897949,
      "step": 33954
    },
    {
      "epoch": 0.00020723876953125,
      "step": 33954,
      "training_step_time": 0.589031457901001
    },
    {
      "epoch": 0.000207244873046875,
      "model_forward_time": 0.11499619483947754,
      "step": 33955
    },
    {
      "epoch": 0.000207244873046875,
      "step": 33955,
      "training_step_time": 0.41529059410095215
    },
    {
      "epoch": 0.0002072509765625,
      "model_forward_time": 0.11529755592346191,
      "step": 33956
    },
    {
      "epoch": 0.0002072509765625,
      "step": 33956,
      "training_step_time": 0.4635601043701172
    },
    {
      "epoch": 0.000207257080078125,
      "model_forward_time": 0.11486601829528809,
      "step": 33957
    },
    {
      "epoch": 0.000207257080078125,
      "step": 33957,
      "training_step_time": 0.3896677494049072
    },
    {
      "epoch": 0.00020726318359375,
      "model_forward_time": 0.11460566520690918,
      "step": 33958
    },
    {
      "epoch": 0.00020726318359375,
      "step": 33958,
      "training_step_time": 0.4036431312561035
    },
    {
      "epoch": 0.000207269287109375,
      "model_forward_time": 0.11494016647338867,
      "step": 33959
    },
    {
      "epoch": 0.000207269287109375,
      "step": 33959,
      "training_step_time": 0.4184110164642334
    },
    {
      "epoch": 0.000207275390625,
      "grad_norm": 0.11422285437583923,
      "learning_rate": 4.324152526842517e-05,
      "loss": 0.0408,
      "step": 33960
    },
    {
      "epoch": 0.000207275390625,
      "model_forward_time": 0.11743640899658203,
      "step": 33960
    },
    {
      "epoch": 0.000207275390625,
      "step": 33960,
      "training_step_time": 0.6385324001312256
    },
    {
      "epoch": 0.000207281494140625,
      "model_forward_time": 0.11490392684936523,
      "step": 33961
    },
    {
      "epoch": 0.000207281494140625,
      "step": 33961,
      "training_step_time": 0.39951658248901367
    },
    {
      "epoch": 0.00020728759765625,
      "model_forward_time": 0.11605525016784668,
      "step": 33962
    },
    {
      "epoch": 0.00020728759765625,
      "step": 33962,
      "training_step_time": 0.3764486312866211
    },
    {
      "epoch": 0.000207293701171875,
      "model_forward_time": 0.11503076553344727,
      "step": 33963
    },
    {
      "epoch": 0.000207293701171875,
      "step": 33963,
      "training_step_time": 0.38654637336730957
    },
    {
      "epoch": 0.0002072998046875,
      "model_forward_time": 0.11416459083557129,
      "step": 33964
    },
    {
      "epoch": 0.0002072998046875,
      "step": 33964,
      "training_step_time": 0.38159894943237305
    },
    {
      "epoch": 0.000207305908203125,
      "model_forward_time": 0.11490797996520996,
      "step": 33965
    },
    {
      "epoch": 0.000207305908203125,
      "step": 33965,
      "training_step_time": 0.39049553871154785
    },
    {
      "epoch": 0.00020731201171875,
      "model_forward_time": 0.11520504951477051,
      "step": 33966
    },
    {
      "epoch": 0.00020731201171875,
      "step": 33966,
      "training_step_time": 0.5454761981964111
    },
    {
      "epoch": 0.000207318115234375,
      "model_forward_time": 0.1149742603302002,
      "step": 33967
    },
    {
      "epoch": 0.000207318115234375,
      "step": 33967,
      "training_step_time": 0.39675164222717285
    },
    {
      "epoch": 0.00020732421875,
      "model_forward_time": 0.11521792411804199,
      "step": 33968
    },
    {
      "epoch": 0.00020732421875,
      "step": 33968,
      "training_step_time": 0.4610140323638916
    },
    {
      "epoch": 0.000207330322265625,
      "model_forward_time": 0.1151425838470459,
      "step": 33969
    },
    {
      "epoch": 0.000207330322265625,
      "step": 33969,
      "training_step_time": 0.4224863052368164
    },
    {
      "epoch": 0.00020733642578125,
      "grad_norm": 0.1399438977241516,
      "learning_rate": 4.321422137828479e-05,
      "loss": 0.0475,
      "step": 33970
    },
    {
      "epoch": 0.00020733642578125,
      "model_forward_time": 0.11518502235412598,
      "step": 33970
    },
    {
      "epoch": 0.00020733642578125,
      "step": 33970,
      "training_step_time": 0.4904143810272217
    },
    {
      "epoch": 0.000207342529296875,
      "model_forward_time": 0.11532258987426758,
      "step": 33971
    },
    {
      "epoch": 0.000207342529296875,
      "step": 33971,
      "training_step_time": 0.39314699172973633
    },
    {
      "epoch": 0.0002073486328125,
      "model_forward_time": 0.11569499969482422,
      "step": 33972
    },
    {
      "epoch": 0.0002073486328125,
      "step": 33972,
      "training_step_time": 0.5282294750213623
    },
    {
      "epoch": 0.000207354736328125,
      "model_forward_time": 0.1143946647644043,
      "step": 33973
    },
    {
      "epoch": 0.000207354736328125,
      "step": 33973,
      "training_step_time": 0.46518564224243164
    },
    {
      "epoch": 0.00020736083984375,
      "model_forward_time": 0.1191246509552002,
      "step": 33974
    },
    {
      "epoch": 0.00020736083984375,
      "step": 33974,
      "training_step_time": 0.47632527351379395
    },
    {
      "epoch": 0.000207366943359375,
      "model_forward_time": 0.11930489540100098,
      "step": 33975
    },
    {
      "epoch": 0.000207366943359375,
      "step": 33975,
      "training_step_time": 0.3798818588256836
    },
    {
      "epoch": 0.000207373046875,
      "model_forward_time": 0.11883330345153809,
      "step": 33976
    },
    {
      "epoch": 0.000207373046875,
      "step": 33976,
      "training_step_time": 0.3942396640777588
    },
    {
      "epoch": 0.000207379150390625,
      "model_forward_time": 0.12177181243896484,
      "step": 33977
    },
    {
      "epoch": 0.000207379150390625,
      "step": 33977,
      "training_step_time": 0.41985249519348145
    },
    {
      "epoch": 0.00020738525390625,
      "model_forward_time": 0.11477994918823242,
      "step": 33978
    },
    {
      "epoch": 0.00020738525390625,
      "step": 33978,
      "training_step_time": 0.39687037467956543
    },
    {
      "epoch": 0.000207391357421875,
      "model_forward_time": 0.11656022071838379,
      "step": 33979
    },
    {
      "epoch": 0.000207391357421875,
      "step": 33979,
      "training_step_time": 0.4155764579772949
    },
    {
      "epoch": 0.0002073974609375,
      "grad_norm": 0.12730003893375397,
      "learning_rate": 4.3186919549484784e-05,
      "loss": 0.0432,
      "step": 33980
    },
    {
      "epoch": 0.0002073974609375,
      "model_forward_time": 0.1150062084197998,
      "step": 33980
    },
    {
      "epoch": 0.0002073974609375,
      "step": 33980,
      "training_step_time": 0.39521241188049316
    },
    {
      "epoch": 0.000207403564453125,
      "model_forward_time": 0.11472606658935547,
      "step": 33981
    },
    {
      "epoch": 0.000207403564453125,
      "step": 33981,
      "training_step_time": 0.3927485942840576
    },
    {
      "epoch": 0.00020740966796875,
      "model_forward_time": 0.11596798896789551,
      "step": 33982
    },
    {
      "epoch": 0.00020740966796875,
      "step": 33982,
      "training_step_time": 0.39031457901000977
    },
    {
      "epoch": 0.000207415771484375,
      "model_forward_time": 0.11632800102233887,
      "step": 33983
    },
    {
      "epoch": 0.000207415771484375,
      "step": 33983,
      "training_step_time": 0.39837646484375
    },
    {
      "epoch": 0.000207421875,
      "model_forward_time": 0.11496829986572266,
      "step": 33984
    },
    {
      "epoch": 0.000207421875,
      "step": 33984,
      "training_step_time": 0.6263976097106934
    },
    {
      "epoch": 0.000207427978515625,
      "model_forward_time": 0.11484766006469727,
      "step": 33985
    },
    {
      "epoch": 0.000207427978515625,
      "step": 33985,
      "training_step_time": 0.3951420783996582
    },
    {
      "epoch": 0.00020743408203125,
      "model_forward_time": 0.11539459228515625,
      "step": 33986
    },
    {
      "epoch": 0.00020743408203125,
      "step": 33986,
      "training_step_time": 0.40976881980895996
    },
    {
      "epoch": 0.000207440185546875,
      "model_forward_time": 0.1154782772064209,
      "step": 33987
    },
    {
      "epoch": 0.000207440185546875,
      "step": 33987,
      "training_step_time": 0.4245870113372803
    },
    {
      "epoch": 0.0002074462890625,
      "model_forward_time": 0.11557626724243164,
      "step": 33988
    },
    {
      "epoch": 0.0002074462890625,
      "step": 33988,
      "training_step_time": 0.4920215606689453
    },
    {
      "epoch": 0.000207452392578125,
      "model_forward_time": 0.11481499671936035,
      "step": 33989
    },
    {
      "epoch": 0.000207452392578125,
      "step": 33989,
      "training_step_time": 0.41933703422546387
    },
    {
      "epoch": 0.00020745849609375,
      "grad_norm": 0.1034703254699707,
      "learning_rate": 4.315961979031875e-05,
      "loss": 0.0404,
      "step": 33990
    },
    {
      "epoch": 0.00020745849609375,
      "model_forward_time": 0.11538577079772949,
      "step": 33990
    },
    {
      "epoch": 0.00020745849609375,
      "step": 33990,
      "training_step_time": 0.5491378307342529
    },
    {
      "epoch": 0.000207464599609375,
      "model_forward_time": 0.1146242618560791,
      "step": 33991
    },
    {
      "epoch": 0.000207464599609375,
      "step": 33991,
      "training_step_time": 0.3934760093688965
    },
    {
      "epoch": 0.000207470703125,
      "model_forward_time": 0.11494588851928711,
      "step": 33992
    },
    {
      "epoch": 0.000207470703125,
      "step": 33992,
      "training_step_time": 0.43448328971862793
    },
    {
      "epoch": 0.000207476806640625,
      "model_forward_time": 0.11488652229309082,
      "step": 33993
    },
    {
      "epoch": 0.000207476806640625,
      "step": 33993,
      "training_step_time": 0.4070556163787842
    },
    {
      "epoch": 0.00020748291015625,
      "model_forward_time": 0.1143636703491211,
      "step": 33994
    },
    {
      "epoch": 0.00020748291015625,
      "step": 33994,
      "training_step_time": 0.3846704959869385
    },
    {
      "epoch": 0.000207489013671875,
      "model_forward_time": 0.11453056335449219,
      "step": 33995
    },
    {
      "epoch": 0.000207489013671875,
      "step": 33995,
      "training_step_time": 0.3992156982421875
    },
    {
      "epoch": 0.0002074951171875,
      "model_forward_time": 0.11714744567871094,
      "step": 33996
    },
    {
      "epoch": 0.0002074951171875,
      "step": 33996,
      "training_step_time": 0.6272399425506592
    },
    {
      "epoch": 0.000207501220703125,
      "model_forward_time": 0.11512279510498047,
      "step": 33997
    },
    {
      "epoch": 0.000207501220703125,
      "step": 33997,
      "training_step_time": 0.4376070499420166
    },
    {
      "epoch": 0.00020750732421875,
      "model_forward_time": 0.11541485786437988,
      "step": 33998
    },
    {
      "epoch": 0.00020750732421875,
      "step": 33998,
      "training_step_time": 0.41398024559020996
    },
    {
      "epoch": 0.000207513427734375,
      "model_forward_time": 0.11537384986877441,
      "step": 33999
    },
    {
      "epoch": 0.000207513427734375,
      "step": 33999,
      "training_step_time": 0.3844897747039795
    },
    {
      "epoch": 0.00020751953125,
      "grad_norm": 0.1938857138156891,
      "learning_rate": 4.3132322109079596e-05,
      "loss": 0.0491,
      "step": 34000
    },
    {
      "epoch": 0.00020751953125,
      "model_forward_time": 0.11303257942199707,
      "step": 34000
    },
    {
      "epoch": 0.00020751953125,
      "step": 34000,
      "training_step_time": 0.3539876937866211
    },
    {
      "epoch": 0.000207525634765625,
      "model_forward_time": 0.1124122142791748,
      "step": 34001
    },
    {
      "epoch": 0.000207525634765625,
      "step": 34001,
      "training_step_time": 0.371657133102417
    },
    {
      "epoch": 0.00020753173828125,
      "model_forward_time": 0.11341047286987305,
      "step": 34002
    },
    {
      "epoch": 0.00020753173828125,
      "step": 34002,
      "training_step_time": 0.41860032081604004
    },
    {
      "epoch": 0.000207537841796875,
      "model_forward_time": 0.11322736740112305,
      "step": 34003
    },
    {
      "epoch": 0.000207537841796875,
      "step": 34003,
      "training_step_time": 0.42526984214782715
    },
    {
      "epoch": 0.0002075439453125,
      "model_forward_time": 0.11434149742126465,
      "step": 34004
    },
    {
      "epoch": 0.0002075439453125,
      "step": 34004,
      "training_step_time": 0.4548065662384033
    },
    {
      "epoch": 0.000207550048828125,
      "model_forward_time": 0.11509084701538086,
      "step": 34005
    },
    {
      "epoch": 0.000207550048828125,
      "step": 34005,
      "training_step_time": 0.392894983291626
    },
    {
      "epoch": 0.00020755615234375,
      "model_forward_time": 0.1147761344909668,
      "step": 34006
    },
    {
      "epoch": 0.00020755615234375,
      "step": 34006,
      "training_step_time": 0.41525745391845703
    },
    {
      "epoch": 0.000207562255859375,
      "model_forward_time": 0.11472392082214355,
      "step": 34007
    },
    {
      "epoch": 0.000207562255859375,
      "step": 34007,
      "training_step_time": 0.40898966789245605
    },
    {
      "epoch": 0.000207568359375,
      "model_forward_time": 0.11454463005065918,
      "step": 34008
    },
    {
      "epoch": 0.000207568359375,
      "step": 34008,
      "training_step_time": 0.3975367546081543
    },
    {
      "epoch": 0.000207574462890625,
      "model_forward_time": 0.11561703681945801,
      "step": 34009
    },
    {
      "epoch": 0.000207574462890625,
      "step": 34009,
      "training_step_time": 0.39192724227905273
    },
    {
      "epoch": 0.00020758056640625,
      "grad_norm": 0.13444329798221588,
      "learning_rate": 4.310502651405967e-05,
      "loss": 0.044,
      "step": 34010
    },
    {
      "epoch": 0.00020758056640625,
      "model_forward_time": 0.1151587963104248,
      "step": 34010
    },
    {
      "epoch": 0.00020758056640625,
      "step": 34010,
      "training_step_time": 0.3866889476776123
    },
    {
      "epoch": 0.000207586669921875,
      "model_forward_time": 0.11514401435852051,
      "step": 34011
    },
    {
      "epoch": 0.000207586669921875,
      "step": 34011,
      "training_step_time": 0.39228010177612305
    },
    {
      "epoch": 0.0002075927734375,
      "model_forward_time": 0.11537456512451172,
      "step": 34012
    },
    {
      "epoch": 0.0002075927734375,
      "step": 34012,
      "training_step_time": 0.3948054313659668
    },
    {
      "epoch": 0.000207598876953125,
      "model_forward_time": 0.11496186256408691,
      "step": 34013
    },
    {
      "epoch": 0.000207598876953125,
      "step": 34013,
      "training_step_time": 0.4361755847930908
    },
    {
      "epoch": 0.00020760498046875,
      "model_forward_time": 0.11493587493896484,
      "step": 34014
    },
    {
      "epoch": 0.00020760498046875,
      "step": 34014,
      "training_step_time": 0.4015223979949951
    },
    {
      "epoch": 0.000207611083984375,
      "model_forward_time": 0.1155543327331543,
      "step": 34015
    },
    {
      "epoch": 0.000207611083984375,
      "step": 34015,
      "training_step_time": 0.4357109069824219
    },
    {
      "epoch": 0.0002076171875,
      "model_forward_time": 0.11584925651550293,
      "step": 34016
    },
    {
      "epoch": 0.0002076171875,
      "step": 34016,
      "training_step_time": 0.44585561752319336
    },
    {
      "epoch": 0.000207623291015625,
      "model_forward_time": 0.11542391777038574,
      "step": 34017
    },
    {
      "epoch": 0.000207623291015625,
      "step": 34017,
      "training_step_time": 0.3725106716156006
    },
    {
      "epoch": 0.00020762939453125,
      "model_forward_time": 0.1161966323852539,
      "step": 34018
    },
    {
      "epoch": 0.00020762939453125,
      "step": 34018,
      "training_step_time": 0.4520528316497803
    },
    {
      "epoch": 0.000207635498046875,
      "model_forward_time": 0.11505556106567383,
      "step": 34019
    },
    {
      "epoch": 0.000207635498046875,
      "step": 34019,
      "training_step_time": 0.4503669738769531
    },
    {
      "epoch": 0.0002076416015625,
      "grad_norm": 0.15606607496738434,
      "learning_rate": 4.307773301355062e-05,
      "loss": 0.0442,
      "step": 34020
    },
    {
      "epoch": 0.0002076416015625,
      "model_forward_time": 0.11584687232971191,
      "step": 34020
    },
    {
      "epoch": 0.0002076416015625,
      "step": 34020,
      "training_step_time": 0.44211554527282715
    },
    {
      "epoch": 0.000207647705078125,
      "model_forward_time": 0.11554336547851562,
      "step": 34021
    },
    {
      "epoch": 0.000207647705078125,
      "step": 34021,
      "training_step_time": 0.4160008430480957
    },
    {
      "epoch": 0.00020765380859375,
      "model_forward_time": 0.11458444595336914,
      "step": 34022
    },
    {
      "epoch": 0.00020765380859375,
      "step": 34022,
      "training_step_time": 0.39226317405700684
    },
    {
      "epoch": 0.000207659912109375,
      "model_forward_time": 0.11558198928833008,
      "step": 34023
    },
    {
      "epoch": 0.000207659912109375,
      "step": 34023,
      "training_step_time": 0.3867912292480469
    },
    {
      "epoch": 0.000207666015625,
      "model_forward_time": 0.11500358581542969,
      "step": 34024
    },
    {
      "epoch": 0.000207666015625,
      "step": 34024,
      "training_step_time": 0.4013102054595947
    },
    {
      "epoch": 0.000207672119140625,
      "model_forward_time": 0.11513924598693848,
      "step": 34025
    },
    {
      "epoch": 0.000207672119140625,
      "step": 34025,
      "training_step_time": 0.39608073234558105
    },
    {
      "epoch": 0.00020767822265625,
      "model_forward_time": 0.11501431465148926,
      "step": 34026
    },
    {
      "epoch": 0.00020767822265625,
      "step": 34026,
      "training_step_time": 0.40007495880126953
    },
    {
      "epoch": 0.000207684326171875,
      "model_forward_time": 0.11560606956481934,
      "step": 34027
    },
    {
      "epoch": 0.000207684326171875,
      "step": 34027,
      "training_step_time": 0.498337984085083
    },
    {
      "epoch": 0.0002076904296875,
      "model_forward_time": 0.11551761627197266,
      "step": 34028
    },
    {
      "epoch": 0.0002076904296875,
      "step": 34028,
      "training_step_time": 0.403242826461792
    },
    {
      "epoch": 0.000207696533203125,
      "model_forward_time": 0.11484098434448242,
      "step": 34029
    },
    {
      "epoch": 0.000207696533203125,
      "step": 34029,
      "training_step_time": 0.39987754821777344
    },
    {
      "epoch": 0.00020770263671875,
      "grad_norm": 0.15087752044200897,
      "learning_rate": 4.305044161584352e-05,
      "loss": 0.0462,
      "step": 34030
    },
    {
      "epoch": 0.00020770263671875,
      "model_forward_time": 0.11511945724487305,
      "step": 34030
    },
    {
      "epoch": 0.00020770263671875,
      "step": 34030,
      "training_step_time": 0.4000251293182373
    },
    {
      "epoch": 0.000207708740234375,
      "model_forward_time": 0.11512637138366699,
      "step": 34031
    },
    {
      "epoch": 0.000207708740234375,
      "step": 34031,
      "training_step_time": 0.47577428817749023
    },
    {
      "epoch": 0.00020771484375,
      "model_forward_time": 0.1151585578918457,
      "step": 34032
    },
    {
      "epoch": 0.00020771484375,
      "step": 34032,
      "training_step_time": 0.39404773712158203
    },
    {
      "epoch": 0.000207720947265625,
      "model_forward_time": 0.11554670333862305,
      "step": 34033
    },
    {
      "epoch": 0.000207720947265625,
      "step": 34033,
      "training_step_time": 0.43161487579345703
    },
    {
      "epoch": 0.00020772705078125,
      "model_forward_time": 0.11528658866882324,
      "step": 34034
    },
    {
      "epoch": 0.00020772705078125,
      "step": 34034,
      "training_step_time": 0.43733644485473633
    },
    {
      "epoch": 0.000207733154296875,
      "model_forward_time": 0.11492633819580078,
      "step": 34035
    },
    {
      "epoch": 0.000207733154296875,
      "step": 34035,
      "training_step_time": 0.38759279251098633
    },
    {
      "epoch": 0.0002077392578125,
      "model_forward_time": 0.11586117744445801,
      "step": 34036
    },
    {
      "epoch": 0.0002077392578125,
      "step": 34036,
      "training_step_time": 0.4050943851470947
    },
    {
      "epoch": 0.000207745361328125,
      "model_forward_time": 0.1153554916381836,
      "step": 34037
    },
    {
      "epoch": 0.000207745361328125,
      "step": 34037,
      "training_step_time": 0.3976726531982422
    },
    {
      "epoch": 0.00020775146484375,
      "model_forward_time": 0.11533021926879883,
      "step": 34038
    },
    {
      "epoch": 0.00020775146484375,
      "step": 34038,
      "training_step_time": 0.4218587875366211
    },
    {
      "epoch": 0.000207757568359375,
      "model_forward_time": 0.11515426635742188,
      "step": 34039
    },
    {
      "epoch": 0.000207757568359375,
      "step": 34039,
      "training_step_time": 0.39533019065856934
    },
    {
      "epoch": 0.000207763671875,
      "grad_norm": 0.10721159726381302,
      "learning_rate": 4.302315232922876e-05,
      "loss": 0.0396,
      "step": 34040
    },
    {
      "epoch": 0.000207763671875,
      "model_forward_time": 0.1161649227142334,
      "step": 34040
    },
    {
      "epoch": 0.000207763671875,
      "step": 34040,
      "training_step_time": 0.387204647064209
    },
    {
      "epoch": 0.000207769775390625,
      "model_forward_time": 0.11451506614685059,
      "step": 34041
    },
    {
      "epoch": 0.000207769775390625,
      "step": 34041,
      "training_step_time": 0.38428640365600586
    },
    {
      "epoch": 0.00020777587890625,
      "model_forward_time": 0.11570382118225098,
      "step": 34042
    },
    {
      "epoch": 0.00020777587890625,
      "step": 34042,
      "training_step_time": 0.4379615783691406
    },
    {
      "epoch": 0.000207781982421875,
      "model_forward_time": 0.11521053314208984,
      "step": 34043
    },
    {
      "epoch": 0.000207781982421875,
      "step": 34043,
      "training_step_time": 0.4020833969116211
    },
    {
      "epoch": 0.0002077880859375,
      "model_forward_time": 0.11533236503601074,
      "step": 34044
    },
    {
      "epoch": 0.0002077880859375,
      "step": 34044,
      "training_step_time": 0.4401986598968506
    },
    {
      "epoch": 0.000207794189453125,
      "model_forward_time": 0.11653017997741699,
      "step": 34045
    },
    {
      "epoch": 0.000207794189453125,
      "step": 34045,
      "training_step_time": 0.43672919273376465
    },
    {
      "epoch": 0.00020780029296875,
      "model_forward_time": 0.11537861824035645,
      "step": 34046
    },
    {
      "epoch": 0.00020780029296875,
      "step": 34046,
      "training_step_time": 0.41864752769470215
    },
    {
      "epoch": 0.000207806396484375,
      "model_forward_time": 0.11574673652648926,
      "step": 34047
    },
    {
      "epoch": 0.000207806396484375,
      "step": 34047,
      "training_step_time": 0.4896407127380371
    },
    {
      "epoch": 0.0002078125,
      "model_forward_time": 0.11654472351074219,
      "step": 34048
    },
    {
      "epoch": 0.0002078125,
      "step": 34048,
      "training_step_time": 0.45149707794189453
    },
    {
      "epoch": 0.000207818603515625,
      "model_forward_time": 0.1152949333190918,
      "step": 34049
    },
    {
      "epoch": 0.000207818603515625,
      "step": 34049,
      "training_step_time": 0.4391963481903076
    },
    {
      "epoch": 0.00020782470703125,
      "grad_norm": 0.1231723502278328,
      "learning_rate": 4.2995865161996105e-05,
      "loss": 0.0417,
      "step": 34050
    },
    {
      "epoch": 0.00020782470703125,
      "model_forward_time": 0.11563372611999512,
      "step": 34050
    },
    {
      "epoch": 0.00020782470703125,
      "step": 34050,
      "training_step_time": 0.3951761722564697
    },
    {
      "epoch": 0.000207830810546875,
      "model_forward_time": 0.11501502990722656,
      "step": 34051
    },
    {
      "epoch": 0.000207830810546875,
      "step": 34051,
      "training_step_time": 0.3939206600189209
    },
    {
      "epoch": 0.0002078369140625,
      "model_forward_time": 0.11526226997375488,
      "step": 34052
    },
    {
      "epoch": 0.0002078369140625,
      "step": 34052,
      "training_step_time": 0.4008755683898926
    },
    {
      "epoch": 0.000207843017578125,
      "model_forward_time": 0.11466383934020996,
      "step": 34053
    },
    {
      "epoch": 0.000207843017578125,
      "step": 34053,
      "training_step_time": 0.3888125419616699
    },
    {
      "epoch": 0.00020784912109375,
      "model_forward_time": 0.11703944206237793,
      "step": 34054
    },
    {
      "epoch": 0.00020784912109375,
      "step": 34054,
      "training_step_time": 0.39378929138183594
    },
    {
      "epoch": 0.000207855224609375,
      "model_forward_time": 0.11554908752441406,
      "step": 34055
    },
    {
      "epoch": 0.000207855224609375,
      "step": 34055,
      "training_step_time": 0.3889031410217285
    },
    {
      "epoch": 0.000207861328125,
      "model_forward_time": 0.11586165428161621,
      "step": 34056
    },
    {
      "epoch": 0.000207861328125,
      "step": 34056,
      "training_step_time": 0.39667415618896484
    },
    {
      "epoch": 0.000207867431640625,
      "model_forward_time": 0.11508488655090332,
      "step": 34057
    },
    {
      "epoch": 0.000207867431640625,
      "step": 34057,
      "training_step_time": 0.4807243347167969
    },
    {
      "epoch": 0.00020787353515625,
      "model_forward_time": 0.11591339111328125,
      "step": 34058
    },
    {
      "epoch": 0.00020787353515625,
      "step": 34058,
      "training_step_time": 0.3978121280670166
    },
    {
      "epoch": 0.000207879638671875,
      "model_forward_time": 0.11587142944335938,
      "step": 34059
    },
    {
      "epoch": 0.000207879638671875,
      "step": 34059,
      "training_step_time": 0.4067862033843994
    },
    {
      "epoch": 0.0002078857421875,
      "grad_norm": 0.1824643462896347,
      "learning_rate": 4.29685801224347e-05,
      "loss": 0.0415,
      "step": 34060
    },
    {
      "epoch": 0.0002078857421875,
      "model_forward_time": 0.11538195610046387,
      "step": 34060
    },
    {
      "epoch": 0.0002078857421875,
      "step": 34060,
      "training_step_time": 0.415691614151001
    },
    {
      "epoch": 0.000207891845703125,
      "model_forward_time": 0.11496996879577637,
      "step": 34061
    },
    {
      "epoch": 0.000207891845703125,
      "step": 34061,
      "training_step_time": 0.3685479164123535
    },
    {
      "epoch": 0.00020789794921875,
      "model_forward_time": 0.11529374122619629,
      "step": 34062
    },
    {
      "epoch": 0.00020789794921875,
      "step": 34062,
      "training_step_time": 0.45046353340148926
    },
    {
      "epoch": 0.000207904052734375,
      "model_forward_time": 0.11479735374450684,
      "step": 34063
    },
    {
      "epoch": 0.000207904052734375,
      "step": 34063,
      "training_step_time": 0.4397614002227783
    },
    {
      "epoch": 0.00020791015625,
      "model_forward_time": 0.11523199081420898,
      "step": 34064
    },
    {
      "epoch": 0.00020791015625,
      "step": 34064,
      "training_step_time": 0.3835182189941406
    },
    {
      "epoch": 0.000207916259765625,
      "model_forward_time": 0.11489653587341309,
      "step": 34065
    },
    {
      "epoch": 0.000207916259765625,
      "step": 34065,
      "training_step_time": 0.39132213592529297
    },
    {
      "epoch": 0.00020792236328125,
      "model_forward_time": 0.11518692970275879,
      "step": 34066
    },
    {
      "epoch": 0.00020792236328125,
      "step": 34066,
      "training_step_time": 0.3984224796295166
    },
    {
      "epoch": 0.000207928466796875,
      "model_forward_time": 0.11478900909423828,
      "step": 34067
    },
    {
      "epoch": 0.000207928466796875,
      "step": 34067,
      "training_step_time": 0.40631628036499023
    },
    {
      "epoch": 0.0002079345703125,
      "model_forward_time": 0.11487460136413574,
      "step": 34068
    },
    {
      "epoch": 0.0002079345703125,
      "step": 34068,
      "training_step_time": 0.3977077007293701
    },
    {
      "epoch": 0.000207940673828125,
      "model_forward_time": 0.11484384536743164,
      "step": 34069
    },
    {
      "epoch": 0.000207940673828125,
      "step": 34069,
      "training_step_time": 0.398212194442749
    },
    {
      "epoch": 0.00020794677734375,
      "grad_norm": 0.12358852475881577,
      "learning_rate": 4.294129721883298e-05,
      "loss": 0.0436,
      "step": 34070
    },
    {
      "epoch": 0.00020794677734375,
      "model_forward_time": 0.11641955375671387,
      "step": 34070
    },
    {
      "epoch": 0.00020794677734375,
      "step": 34070,
      "training_step_time": 0.3906216621398926
    },
    {
      "epoch": 0.000207952880859375,
      "model_forward_time": 0.11667680740356445,
      "step": 34071
    },
    {
      "epoch": 0.000207952880859375,
      "step": 34071,
      "training_step_time": 0.5087618827819824
    },
    {
      "epoch": 0.000207958984375,
      "model_forward_time": 0.11534380912780762,
      "step": 34072
    },
    {
      "epoch": 0.000207958984375,
      "step": 34072,
      "training_step_time": 0.3923468589782715
    },
    {
      "epoch": 0.000207965087890625,
      "model_forward_time": 0.11569547653198242,
      "step": 34073
    },
    {
      "epoch": 0.000207965087890625,
      "step": 34073,
      "training_step_time": 0.40799546241760254
    },
    {
      "epoch": 0.00020797119140625,
      "model_forward_time": 0.11481738090515137,
      "step": 34074
    },
    {
      "epoch": 0.00020797119140625,
      "step": 34074,
      "training_step_time": 0.4455564022064209
    },
    {
      "epoch": 0.000207977294921875,
      "model_forward_time": 0.11530303955078125,
      "step": 34075
    },
    {
      "epoch": 0.000207977294921875,
      "step": 34075,
      "training_step_time": 0.4232513904571533
    },
    {
      "epoch": 0.0002079833984375,
      "model_forward_time": 0.11569857597351074,
      "step": 34076
    },
    {
      "epoch": 0.0002079833984375,
      "step": 34076,
      "training_step_time": 0.5060198307037354
    },
    {
      "epoch": 0.000207989501953125,
      "model_forward_time": 0.11640071868896484,
      "step": 34077
    },
    {
      "epoch": 0.000207989501953125,
      "step": 34077,
      "training_step_time": 0.4152040481567383
    },
    {
      "epoch": 0.00020799560546875,
      "model_forward_time": 0.1155233383178711,
      "step": 34078
    },
    {
      "epoch": 0.00020799560546875,
      "step": 34078,
      "training_step_time": 0.4353480339050293
    },
    {
      "epoch": 0.000208001708984375,
      "model_forward_time": 0.11503458023071289,
      "step": 34079
    },
    {
      "epoch": 0.000208001708984375,
      "step": 34079,
      "training_step_time": 0.389751672744751
    },
    {
      "epoch": 0.0002080078125,
      "grad_norm": 0.1045302003622055,
      "learning_rate": 4.291401645947879e-05,
      "loss": 0.0409,
      "step": 34080
    },
    {
      "epoch": 0.0002080078125,
      "model_forward_time": 0.11520600318908691,
      "step": 34080
    },
    {
      "epoch": 0.0002080078125,
      "step": 34080,
      "training_step_time": 0.39314794540405273
    },
    {
      "epoch": 0.000208013916015625,
      "model_forward_time": 0.11609220504760742,
      "step": 34081
    },
    {
      "epoch": 0.000208013916015625,
      "step": 34081,
      "training_step_time": 0.3906586170196533
    },
    {
      "epoch": 0.00020802001953125,
      "model_forward_time": 0.1154937744140625,
      "step": 34082
    },
    {
      "epoch": 0.00020802001953125,
      "step": 34082,
      "training_step_time": 0.3950474262237549
    },
    {
      "epoch": 0.000208026123046875,
      "model_forward_time": 0.11505270004272461,
      "step": 34083
    },
    {
      "epoch": 0.000208026123046875,
      "step": 34083,
      "training_step_time": 0.3841123580932617
    },
    {
      "epoch": 0.0002080322265625,
      "model_forward_time": 0.11474204063415527,
      "step": 34084
    },
    {
      "epoch": 0.0002080322265625,
      "step": 34084,
      "training_step_time": 0.3977069854736328
    },
    {
      "epoch": 0.000208038330078125,
      "model_forward_time": 0.11570262908935547,
      "step": 34085
    },
    {
      "epoch": 0.000208038330078125,
      "step": 34085,
      "training_step_time": 0.3955087661743164
    },
    {
      "epoch": 0.00020804443359375,
      "model_forward_time": 0.11547374725341797,
      "step": 34086
    },
    {
      "epoch": 0.00020804443359375,
      "step": 34086,
      "training_step_time": 0.4013853073120117
    },
    {
      "epoch": 0.000208050537109375,
      "model_forward_time": 0.11525821685791016,
      "step": 34087
    },
    {
      "epoch": 0.000208050537109375,
      "step": 34087,
      "training_step_time": 0.8312385082244873
    },
    {
      "epoch": 0.000208056640625,
      "model_forward_time": 0.1148371696472168,
      "step": 34088
    },
    {
      "epoch": 0.000208056640625,
      "step": 34088,
      "training_step_time": 0.4092259407043457
    },
    {
      "epoch": 0.000208062744140625,
      "model_forward_time": 0.11505460739135742,
      "step": 34089
    },
    {
      "epoch": 0.000208062744140625,
      "step": 34089,
      "training_step_time": 0.4218614101409912
    },
    {
      "epoch": 0.00020806884765625,
      "grad_norm": 0.0876389592885971,
      "learning_rate": 4.2886737852659325e-05,
      "loss": 0.0427,
      "step": 34090
    },
    {
      "epoch": 0.00020806884765625,
      "model_forward_time": 0.11481142044067383,
      "step": 34090
    },
    {
      "epoch": 0.00020806884765625,
      "step": 34090,
      "training_step_time": 0.44388628005981445
    },
    {
      "epoch": 0.000208074951171875,
      "model_forward_time": 0.11491513252258301,
      "step": 34091
    },
    {
      "epoch": 0.000208074951171875,
      "step": 34091,
      "training_step_time": 0.48363566398620605
    },
    {
      "epoch": 0.0002080810546875,
      "model_forward_time": 0.11460518836975098,
      "step": 34092
    },
    {
      "epoch": 0.0002080810546875,
      "step": 34092,
      "training_step_time": 0.40619874000549316
    },
    {
      "epoch": 0.000208087158203125,
      "model_forward_time": 0.11527109146118164,
      "step": 34093
    },
    {
      "epoch": 0.000208087158203125,
      "step": 34093,
      "training_step_time": 0.39013147354125977
    },
    {
      "epoch": 0.00020809326171875,
      "model_forward_time": 0.11623883247375488,
      "step": 34094
    },
    {
      "epoch": 0.00020809326171875,
      "step": 34094,
      "training_step_time": 0.40388989448547363
    },
    {
      "epoch": 0.000208099365234375,
      "model_forward_time": 0.11482787132263184,
      "step": 34095
    },
    {
      "epoch": 0.000208099365234375,
      "step": 34095,
      "training_step_time": 0.39362645149230957
    },
    {
      "epoch": 0.00020810546875,
      "model_forward_time": 0.11584615707397461,
      "step": 34096
    },
    {
      "epoch": 0.00020810546875,
      "step": 34096,
      "training_step_time": 0.39832425117492676
    },
    {
      "epoch": 0.000208111572265625,
      "model_forward_time": 0.11506462097167969,
      "step": 34097
    },
    {
      "epoch": 0.000208111572265625,
      "step": 34097,
      "training_step_time": 0.39456987380981445
    },
    {
      "epoch": 0.00020811767578125,
      "model_forward_time": 0.11517977714538574,
      "step": 34098
    },
    {
      "epoch": 0.00020811767578125,
      "step": 34098,
      "training_step_time": 0.4133107662200928
    },
    {
      "epoch": 0.000208123779296875,
      "model_forward_time": 0.11608695983886719,
      "step": 34099
    },
    {
      "epoch": 0.000208123779296875,
      "step": 34099,
      "training_step_time": 0.5791988372802734
    },
    {
      "epoch": 0.0002081298828125,
      "grad_norm": 0.10474143922328949,
      "learning_rate": 4.2859461406661065e-05,
      "loss": 0.0386,
      "step": 34100
    },
    {
      "epoch": 0.0002081298828125,
      "model_forward_time": 0.11545968055725098,
      "step": 34100
    },
    {
      "epoch": 0.0002081298828125,
      "step": 34100,
      "training_step_time": 0.4510810375213623
    },
    {
      "epoch": 0.000208135986328125,
      "model_forward_time": 0.11528682708740234,
      "step": 34101
    },
    {
      "epoch": 0.000208135986328125,
      "step": 34101,
      "training_step_time": 0.4647402763366699
    },
    {
      "epoch": 0.00020814208984375,
      "model_forward_time": 0.1142723560333252,
      "step": 34102
    },
    {
      "epoch": 0.00020814208984375,
      "step": 34102,
      "training_step_time": 0.4258401393890381
    },
    {
      "epoch": 0.000208148193359375,
      "model_forward_time": 0.1150658130645752,
      "step": 34103
    },
    {
      "epoch": 0.000208148193359375,
      "step": 34103,
      "training_step_time": 0.43550777435302734
    },
    {
      "epoch": 0.000208154296875,
      "model_forward_time": 0.11486124992370605,
      "step": 34104
    },
    {
      "epoch": 0.000208154296875,
      "step": 34104,
      "training_step_time": 0.3647334575653076
    },
    {
      "epoch": 0.000208160400390625,
      "model_forward_time": 0.1156768798828125,
      "step": 34105
    },
    {
      "epoch": 0.000208160400390625,
      "step": 34105,
      "training_step_time": 0.4318668842315674
    },
    {
      "epoch": 0.00020816650390625,
      "model_forward_time": 0.11565041542053223,
      "step": 34106
    },
    {
      "epoch": 0.00020816650390625,
      "step": 34106,
      "training_step_time": 0.41459226608276367
    },
    {
      "epoch": 0.000208172607421875,
      "model_forward_time": 0.11561751365661621,
      "step": 34107
    },
    {
      "epoch": 0.000208172607421875,
      "step": 34107,
      "training_step_time": 0.40131640434265137
    },
    {
      "epoch": 0.0002081787109375,
      "model_forward_time": 0.11455130577087402,
      "step": 34108
    },
    {
      "epoch": 0.0002081787109375,
      "step": 34108,
      "training_step_time": 0.40111446380615234
    },
    {
      "epoch": 0.000208184814453125,
      "model_forward_time": 0.11550402641296387,
      "step": 34109
    },
    {
      "epoch": 0.000208184814453125,
      "step": 34109,
      "training_step_time": 0.3993344306945801
    },
    {
      "epoch": 0.00020819091796875,
      "grad_norm": 0.14486220479011536,
      "learning_rate": 4.283218712976992e-05,
      "loss": 0.0456,
      "step": 34110
    },
    {
      "epoch": 0.00020819091796875,
      "model_forward_time": 0.11487579345703125,
      "step": 34110
    },
    {
      "epoch": 0.00020819091796875,
      "step": 34110,
      "training_step_time": 0.40247011184692383
    },
    {
      "epoch": 0.000208197021484375,
      "model_forward_time": 0.11570096015930176,
      "step": 34111
    },
    {
      "epoch": 0.000208197021484375,
      "step": 34111,
      "training_step_time": 0.3934798240661621
    },
    {
      "epoch": 0.000208203125,
      "model_forward_time": 0.11499381065368652,
      "step": 34112
    },
    {
      "epoch": 0.000208203125,
      "step": 34112,
      "training_step_time": 0.39098143577575684
    },
    {
      "epoch": 0.000208209228515625,
      "model_forward_time": 0.11535382270812988,
      "step": 34113
    },
    {
      "epoch": 0.000208209228515625,
      "step": 34113,
      "training_step_time": 0.3946986198425293
    },
    {
      "epoch": 0.00020821533203125,
      "model_forward_time": 0.11544537544250488,
      "step": 34114
    },
    {
      "epoch": 0.00020821533203125,
      "step": 34114,
      "training_step_time": 0.4516787528991699
    },
    {
      "epoch": 0.000208221435546875,
      "model_forward_time": 0.11523795127868652,
      "step": 34115
    },
    {
      "epoch": 0.000208221435546875,
      "step": 34115,
      "training_step_time": 0.4078943729400635
    },
    {
      "epoch": 0.0002082275390625,
      "model_forward_time": 0.11519217491149902,
      "step": 34116
    },
    {
      "epoch": 0.0002082275390625,
      "step": 34116,
      "training_step_time": 0.4983232021331787
    },
    {
      "epoch": 0.000208233642578125,
      "model_forward_time": 0.11599969863891602,
      "step": 34117
    },
    {
      "epoch": 0.000208233642578125,
      "step": 34117,
      "training_step_time": 0.40880918502807617
    },
    {
      "epoch": 0.00020823974609375,
      "model_forward_time": 0.11566448211669922,
      "step": 34118
    },
    {
      "epoch": 0.00020823974609375,
      "step": 34118,
      "training_step_time": 0.4554867744445801
    },
    {
      "epoch": 0.000208245849609375,
      "model_forward_time": 0.11533594131469727,
      "step": 34119
    },
    {
      "epoch": 0.000208245849609375,
      "step": 34119,
      "training_step_time": 0.44989991188049316
    },
    {
      "epoch": 0.000208251953125,
      "grad_norm": 0.15533478558063507,
      "learning_rate": 4.280491503027104e-05,
      "loss": 0.0442,
      "step": 34120
    },
    {
      "epoch": 0.000208251953125,
      "model_forward_time": 0.11539649963378906,
      "step": 34120
    },
    {
      "epoch": 0.000208251953125,
      "step": 34120,
      "training_step_time": 0.419994592666626
    },
    {
      "epoch": 0.000208258056640625,
      "model_forward_time": 0.11512207984924316,
      "step": 34121
    },
    {
      "epoch": 0.000208258056640625,
      "step": 34121,
      "training_step_time": 0.4150710105895996
    },
    {
      "epoch": 0.00020826416015625,
      "model_forward_time": 0.1157217025756836,
      "step": 34122
    },
    {
      "epoch": 0.00020826416015625,
      "step": 34122,
      "training_step_time": 0.3924441337585449
    },
    {
      "epoch": 0.000208270263671875,
      "model_forward_time": 0.115142822265625,
      "step": 34123
    },
    {
      "epoch": 0.000208270263671875,
      "step": 34123,
      "training_step_time": 0.41321444511413574
    },
    {
      "epoch": 0.0002082763671875,
      "model_forward_time": 0.11479902267456055,
      "step": 34124
    },
    {
      "epoch": 0.0002082763671875,
      "step": 34124,
      "training_step_time": 0.39004063606262207
    },
    {
      "epoch": 0.000208282470703125,
      "model_forward_time": 0.11550450325012207,
      "step": 34125
    },
    {
      "epoch": 0.000208282470703125,
      "step": 34125,
      "training_step_time": 0.3944377899169922
    },
    {
      "epoch": 0.00020828857421875,
      "model_forward_time": 0.11597013473510742,
      "step": 34126
    },
    {
      "epoch": 0.00020828857421875,
      "step": 34126,
      "training_step_time": 0.4029057025909424
    },
    {
      "epoch": 0.000208294677734375,
      "model_forward_time": 0.11533379554748535,
      "step": 34127
    },
    {
      "epoch": 0.000208294677734375,
      "step": 34127,
      "training_step_time": 0.4019017219543457
    },
    {
      "epoch": 0.00020830078125,
      "model_forward_time": 0.11502909660339355,
      "step": 34128
    },
    {
      "epoch": 0.00020830078125,
      "step": 34128,
      "training_step_time": 0.4010331630706787
    },
    {
      "epoch": 0.000208306884765625,
      "model_forward_time": 0.1151113510131836,
      "step": 34129
    },
    {
      "epoch": 0.000208306884765625,
      "step": 34129,
      "training_step_time": 0.46807003021240234
    },
    {
      "epoch": 0.00020831298828125,
      "grad_norm": 0.10184742510318756,
      "learning_rate": 4.2777645116449004e-05,
      "loss": 0.0366,
      "step": 34130
    },
    {
      "epoch": 0.00020831298828125,
      "model_forward_time": 0.11727094650268555,
      "step": 34130
    },
    {
      "epoch": 0.00020831298828125,
      "step": 34130,
      "training_step_time": 0.39830756187438965
    },
    {
      "epoch": 0.000208319091796875,
      "model_forward_time": 0.11474776268005371,
      "step": 34131
    },
    {
      "epoch": 0.000208319091796875,
      "step": 34131,
      "training_step_time": 0.44992685317993164
    },
    {
      "epoch": 0.0002083251953125,
      "model_forward_time": 0.11528539657592773,
      "step": 34132
    },
    {
      "epoch": 0.0002083251953125,
      "step": 34132,
      "training_step_time": 0.4230942726135254
    },
    {
      "epoch": 0.000208331298828125,
      "model_forward_time": 0.11484408378601074,
      "step": 34133
    },
    {
      "epoch": 0.000208331298828125,
      "step": 34133,
      "training_step_time": 0.3899557590484619
    },
    {
      "epoch": 0.00020833740234375,
      "model_forward_time": 0.11588287353515625,
      "step": 34134
    },
    {
      "epoch": 0.00020833740234375,
      "step": 34134,
      "training_step_time": 0.4555072784423828
    },
    {
      "epoch": 0.000208343505859375,
      "model_forward_time": 0.11524701118469238,
      "step": 34135
    },
    {
      "epoch": 0.000208343505859375,
      "step": 34135,
      "training_step_time": 0.4204714298248291
    },
    {
      "epoch": 0.000208349609375,
      "model_forward_time": 0.11495566368103027,
      "step": 34136
    },
    {
      "epoch": 0.000208349609375,
      "step": 34136,
      "training_step_time": 0.40738964080810547
    },
    {
      "epoch": 0.000208355712890625,
      "model_forward_time": 0.11466765403747559,
      "step": 34137
    },
    {
      "epoch": 0.000208355712890625,
      "step": 34137,
      "training_step_time": 0.3999180793762207
    },
    {
      "epoch": 0.00020836181640625,
      "model_forward_time": 0.114898681640625,
      "step": 34138
    },
    {
      "epoch": 0.00020836181640625,
      "step": 34138,
      "training_step_time": 0.38706135749816895
    },
    {
      "epoch": 0.000208367919921875,
      "model_forward_time": 0.11465907096862793,
      "step": 34139
    },
    {
      "epoch": 0.000208367919921875,
      "step": 34139,
      "training_step_time": 0.3911325931549072
    },
    {
      "epoch": 0.0002083740234375,
      "grad_norm": 0.12132421135902405,
      "learning_rate": 4.275037739658771e-05,
      "loss": 0.0401,
      "step": 34140
    },
    {
      "epoch": 0.0002083740234375,
      "model_forward_time": 0.11512231826782227,
      "step": 34140
    },
    {
      "epoch": 0.0002083740234375,
      "step": 34140,
      "training_step_time": 0.39644646644592285
    },
    {
      "epoch": 0.000208380126953125,
      "model_forward_time": 0.11511397361755371,
      "step": 34141
    },
    {
      "epoch": 0.000208380126953125,
      "step": 34141,
      "training_step_time": 0.39480161666870117
    },
    {
      "epoch": 0.00020838623046875,
      "model_forward_time": 0.11548256874084473,
      "step": 34142
    },
    {
      "epoch": 0.00020838623046875,
      "step": 34142,
      "training_step_time": 0.4008042812347412
    },
    {
      "epoch": 0.000208392333984375,
      "model_forward_time": 0.1150052547454834,
      "step": 34143
    },
    {
      "epoch": 0.000208392333984375,
      "step": 34143,
      "training_step_time": 0.42803120613098145
    },
    {
      "epoch": 0.0002083984375,
      "model_forward_time": 0.11530017852783203,
      "step": 34144
    },
    {
      "epoch": 0.0002083984375,
      "step": 34144,
      "training_step_time": 0.43836188316345215
    },
    {
      "epoch": 0.000208404541015625,
      "model_forward_time": 0.11470246315002441,
      "step": 34145
    },
    {
      "epoch": 0.000208404541015625,
      "step": 34145,
      "training_step_time": 0.40665602684020996
    },
    {
      "epoch": 0.00020841064453125,
      "model_forward_time": 0.11458086967468262,
      "step": 34146
    },
    {
      "epoch": 0.00020841064453125,
      "step": 34146,
      "training_step_time": 0.4299502372741699
    },
    {
      "epoch": 0.000208416748046875,
      "model_forward_time": 0.11541557312011719,
      "step": 34147
    },
    {
      "epoch": 0.000208416748046875,
      "step": 34147,
      "training_step_time": 0.5171830654144287
    },
    {
      "epoch": 0.0002084228515625,
      "model_forward_time": 0.11507391929626465,
      "step": 34148
    },
    {
      "epoch": 0.0002084228515625,
      "step": 34148,
      "training_step_time": 0.3660731315612793
    },
    {
      "epoch": 0.000208428955078125,
      "model_forward_time": 0.11531352996826172,
      "step": 34149
    },
    {
      "epoch": 0.000208428955078125,
      "step": 34149,
      "training_step_time": 0.3968794345855713
    },
    {
      "epoch": 0.00020843505859375,
      "grad_norm": 0.12860135734081268,
      "learning_rate": 4.2723111878970326e-05,
      "loss": 0.0452,
      "step": 34150
    },
    {
      "epoch": 0.00020843505859375,
      "model_forward_time": 0.11526250839233398,
      "step": 34150
    },
    {
      "epoch": 0.00020843505859375,
      "step": 34150,
      "training_step_time": 0.4489161968231201
    },
    {
      "epoch": 0.000208441162109375,
      "model_forward_time": 0.11487841606140137,
      "step": 34151
    },
    {
      "epoch": 0.000208441162109375,
      "step": 34151,
      "training_step_time": 0.38759684562683105
    },
    {
      "epoch": 0.000208447265625,
      "model_forward_time": 0.11565160751342773,
      "step": 34152
    },
    {
      "epoch": 0.000208447265625,
      "step": 34152,
      "training_step_time": 0.39666175842285156
    },
    {
      "epoch": 0.000208453369140625,
      "model_forward_time": 0.11532020568847656,
      "step": 34153
    },
    {
      "epoch": 0.000208453369140625,
      "step": 34153,
      "training_step_time": 0.3947467803955078
    },
    {
      "epoch": 0.00020845947265625,
      "model_forward_time": 0.11564922332763672,
      "step": 34154
    },
    {
      "epoch": 0.00020845947265625,
      "step": 34154,
      "training_step_time": 0.3852860927581787
    },
    {
      "epoch": 0.000208465576171875,
      "model_forward_time": 0.1149134635925293,
      "step": 34155
    },
    {
      "epoch": 0.000208465576171875,
      "step": 34155,
      "training_step_time": 0.38828182220458984
    },
    {
      "epoch": 0.0002084716796875,
      "model_forward_time": 0.11592936515808105,
      "step": 34156
    },
    {
      "epoch": 0.0002084716796875,
      "step": 34156,
      "training_step_time": 0.41300201416015625
    },
    {
      "epoch": 0.000208477783203125,
      "model_forward_time": 0.11489558219909668,
      "step": 34157
    },
    {
      "epoch": 0.000208477783203125,
      "step": 34157,
      "training_step_time": 0.4099268913269043
    },
    {
      "epoch": 0.00020848388671875,
      "model_forward_time": 0.11562609672546387,
      "step": 34158
    },
    {
      "epoch": 0.00020848388671875,
      "step": 34158,
      "training_step_time": 0.3951380252838135
    },
    {
      "epoch": 0.000208489990234375,
      "model_forward_time": 0.11526870727539062,
      "step": 34159
    },
    {
      "epoch": 0.000208489990234375,
      "step": 34159,
      "training_step_time": 0.4143977165222168
    },
    {
      "epoch": 0.00020849609375,
      "grad_norm": 0.12259794771671295,
      "learning_rate": 4.269584857187943e-05,
      "loss": 0.0402,
      "step": 34160
    },
    {
      "epoch": 0.00020849609375,
      "model_forward_time": 0.1151423454284668,
      "step": 34160
    },
    {
      "epoch": 0.00020849609375,
      "step": 34160,
      "training_step_time": 0.5045754909515381
    },
    {
      "epoch": 0.000208502197265625,
      "model_forward_time": 0.11556553840637207,
      "step": 34161
    },
    {
      "epoch": 0.000208502197265625,
      "step": 34161,
      "training_step_time": 0.4339590072631836
    },
    {
      "epoch": 0.00020850830078125,
      "model_forward_time": 0.11468744277954102,
      "step": 34162
    },
    {
      "epoch": 0.00020850830078125,
      "step": 34162,
      "training_step_time": 0.3912928104400635
    },
    {
      "epoch": 0.000208514404296875,
      "model_forward_time": 0.11485767364501953,
      "step": 34163
    },
    {
      "epoch": 0.000208514404296875,
      "step": 34163,
      "training_step_time": 0.3662443161010742
    },
    {
      "epoch": 0.0002085205078125,
      "model_forward_time": 0.11492252349853516,
      "step": 34164
    },
    {
      "epoch": 0.0002085205078125,
      "step": 34164,
      "training_step_time": 0.4626443386077881
    },
    {
      "epoch": 0.000208526611328125,
      "model_forward_time": 0.11467170715332031,
      "step": 34165
    },
    {
      "epoch": 0.000208526611328125,
      "step": 34165,
      "training_step_time": 0.39818477630615234
    },
    {
      "epoch": 0.00020853271484375,
      "model_forward_time": 0.11568284034729004,
      "step": 34166
    },
    {
      "epoch": 0.00020853271484375,
      "step": 34166,
      "training_step_time": 0.385329008102417
    },
    {
      "epoch": 0.000208538818359375,
      "model_forward_time": 0.114837646484375,
      "step": 34167
    },
    {
      "epoch": 0.000208538818359375,
      "step": 34167,
      "training_step_time": 0.38019537925720215
    },
    {
      "epoch": 0.000208544921875,
      "model_forward_time": 0.11600685119628906,
      "step": 34168
    },
    {
      "epoch": 0.000208544921875,
      "step": 34168,
      "training_step_time": 0.3842499256134033
    },
    {
      "epoch": 0.000208551025390625,
      "model_forward_time": 0.1156320571899414,
      "step": 34169
    },
    {
      "epoch": 0.000208551025390625,
      "step": 34169,
      "training_step_time": 0.3950800895690918
    },
    {
      "epoch": 0.00020855712890625,
      "grad_norm": 0.1885932832956314,
      "learning_rate": 4.2668587483596864e-05,
      "loss": 0.0425,
      "step": 34170
    },
    {
      "epoch": 0.00020855712890625,
      "model_forward_time": 0.11596083641052246,
      "step": 34170
    },
    {
      "epoch": 0.00020855712890625,
      "step": 34170,
      "training_step_time": 0.38698267936706543
    },
    {
      "epoch": 0.000208563232421875,
      "model_forward_time": 0.11542081832885742,
      "step": 34171
    },
    {
      "epoch": 0.000208563232421875,
      "step": 34171,
      "training_step_time": 0.4001631736755371
    },
    {
      "epoch": 0.0002085693359375,
      "model_forward_time": 0.11576509475708008,
      "step": 34172
    },
    {
      "epoch": 0.0002085693359375,
      "step": 34172,
      "training_step_time": 0.38747715950012207
    },
    {
      "epoch": 0.000208575439453125,
      "model_forward_time": 0.11482810974121094,
      "step": 34173
    },
    {
      "epoch": 0.000208575439453125,
      "step": 34173,
      "training_step_time": 0.41551947593688965
    },
    {
      "epoch": 0.00020858154296875,
      "model_forward_time": 0.1155405044555664,
      "step": 34174
    },
    {
      "epoch": 0.00020858154296875,
      "step": 34174,
      "training_step_time": 0.3933827877044678
    },
    {
      "epoch": 0.000208587646484375,
      "model_forward_time": 0.11565542221069336,
      "step": 34175
    },
    {
      "epoch": 0.000208587646484375,
      "step": 34175,
      "training_step_time": 0.4855649471282959
    },
    {
      "epoch": 0.00020859375,
      "model_forward_time": 0.11516022682189941,
      "step": 34176
    },
    {
      "epoch": 0.00020859375,
      "step": 34176,
      "training_step_time": 0.49610304832458496
    },
    {
      "epoch": 0.000208599853515625,
      "model_forward_time": 0.11539292335510254,
      "step": 34177
    },
    {
      "epoch": 0.000208599853515625,
      "step": 34177,
      "training_step_time": 0.38776111602783203
    },
    {
      "epoch": 0.00020860595703125,
      "model_forward_time": 0.1156456470489502,
      "step": 34178
    },
    {
      "epoch": 0.00020860595703125,
      "step": 34178,
      "training_step_time": 0.4307370185852051
    },
    {
      "epoch": 0.000208612060546875,
      "model_forward_time": 0.11565852165222168,
      "step": 34179
    },
    {
      "epoch": 0.000208612060546875,
      "step": 34179,
      "training_step_time": 0.4995148181915283
    },
    {
      "epoch": 0.0002086181640625,
      "grad_norm": 0.10323847830295563,
      "learning_rate": 4.264132862240387e-05,
      "loss": 0.0404,
      "step": 34180
    },
    {
      "epoch": 0.0002086181640625,
      "model_forward_time": 0.11496257781982422,
      "step": 34180
    },
    {
      "epoch": 0.0002086181640625,
      "step": 34180,
      "training_step_time": 0.42529797554016113
    },
    {
      "epoch": 0.000208624267578125,
      "model_forward_time": 0.11500358581542969,
      "step": 34181
    },
    {
      "epoch": 0.000208624267578125,
      "step": 34181,
      "training_step_time": 0.382152795791626
    },
    {
      "epoch": 0.00020863037109375,
      "model_forward_time": 0.11519217491149902,
      "step": 34182
    },
    {
      "epoch": 0.00020863037109375,
      "step": 34182,
      "training_step_time": 0.3950655460357666
    },
    {
      "epoch": 0.000208636474609375,
      "model_forward_time": 0.11515665054321289,
      "step": 34183
    },
    {
      "epoch": 0.000208636474609375,
      "step": 34183,
      "training_step_time": 0.39537525177001953
    },
    {
      "epoch": 0.000208642578125,
      "model_forward_time": 0.1153421401977539,
      "step": 34184
    },
    {
      "epoch": 0.000208642578125,
      "step": 34184,
      "training_step_time": 0.438413143157959
    },
    {
      "epoch": 0.000208648681640625,
      "model_forward_time": 0.11494326591491699,
      "step": 34185
    },
    {
      "epoch": 0.000208648681640625,
      "step": 34185,
      "training_step_time": 0.40724658966064453
    },
    {
      "epoch": 0.00020865478515625,
      "model_forward_time": 0.1153707504272461,
      "step": 34186
    },
    {
      "epoch": 0.00020865478515625,
      "step": 34186,
      "training_step_time": 0.3907296657562256
    },
    {
      "epoch": 0.000208660888671875,
      "model_forward_time": 0.11533308029174805,
      "step": 34187
    },
    {
      "epoch": 0.000208660888671875,
      "step": 34187,
      "training_step_time": 0.38368964195251465
    },
    {
      "epoch": 0.0002086669921875,
      "model_forward_time": 0.11546540260314941,
      "step": 34188
    },
    {
      "epoch": 0.0002086669921875,
      "step": 34188,
      "training_step_time": 0.42761945724487305
    },
    {
      "epoch": 0.000208673095703125,
      "model_forward_time": 0.11589860916137695,
      "step": 34189
    },
    {
      "epoch": 0.000208673095703125,
      "step": 34189,
      "training_step_time": 0.4167490005493164
    },
    {
      "epoch": 0.00020867919921875,
      "grad_norm": 0.16104060411453247,
      "learning_rate": 4.261407199658093e-05,
      "loss": 0.0411,
      "step": 34190
    },
    {
      "epoch": 0.00020867919921875,
      "model_forward_time": 0.11492538452148438,
      "step": 34190
    },
    {
      "epoch": 0.00020867919921875,
      "step": 34190,
      "training_step_time": 0.45302295684814453
    },
    {
      "epoch": 0.000208685302734375,
      "model_forward_time": 0.1148538589477539,
      "step": 34191
    },
    {
      "epoch": 0.000208685302734375,
      "step": 34191,
      "training_step_time": 0.44683003425598145
    },
    {
      "epoch": 0.00020869140625,
      "model_forward_time": 0.11486482620239258,
      "step": 34192
    },
    {
      "epoch": 0.00020869140625,
      "step": 34192,
      "training_step_time": 0.3882765769958496
    },
    {
      "epoch": 0.000208697509765625,
      "model_forward_time": 0.11483621597290039,
      "step": 34193
    },
    {
      "epoch": 0.000208697509765625,
      "step": 34193,
      "training_step_time": 0.553248405456543
    },
    {
      "epoch": 0.00020870361328125,
      "model_forward_time": 0.11619687080383301,
      "step": 34194
    },
    {
      "epoch": 0.00020870361328125,
      "step": 34194,
      "training_step_time": 0.45760154724121094
    },
    {
      "epoch": 0.000208709716796875,
      "model_forward_time": 0.11443614959716797,
      "step": 34195
    },
    {
      "epoch": 0.000208709716796875,
      "step": 34195,
      "training_step_time": 0.39789748191833496
    },
    {
      "epoch": 0.0002087158203125,
      "model_forward_time": 0.11515092849731445,
      "step": 34196
    },
    {
      "epoch": 0.0002087158203125,
      "step": 34196,
      "training_step_time": 0.38249993324279785
    },
    {
      "epoch": 0.000208721923828125,
      "model_forward_time": 0.11611652374267578,
      "step": 34197
    },
    {
      "epoch": 0.000208721923828125,
      "step": 34197,
      "training_step_time": 0.4163506031036377
    },
    {
      "epoch": 0.00020872802734375,
      "model_forward_time": 0.11497879028320312,
      "step": 34198
    },
    {
      "epoch": 0.00020872802734375,
      "step": 34198,
      "training_step_time": 0.4178769588470459
    },
    {
      "epoch": 0.000208734130859375,
      "model_forward_time": 0.11418318748474121,
      "step": 34199
    },
    {
      "epoch": 0.000208734130859375,
      "step": 34199,
      "training_step_time": 0.38010740280151367
    },
    {
      "epoch": 0.000208740234375,
      "grad_norm": 0.10132358223199844,
      "learning_rate": 4.2586817614407895e-05,
      "loss": 0.0417,
      "step": 34200
    },
    {
      "epoch": 0.000208740234375,
      "model_forward_time": 0.1157379150390625,
      "step": 34200
    },
    {
      "epoch": 0.000208740234375,
      "step": 34200,
      "training_step_time": 0.3920626640319824
    },
    {
      "epoch": 0.000208746337890625,
      "model_forward_time": 0.1156473159790039,
      "step": 34201
    },
    {
      "epoch": 0.000208746337890625,
      "step": 34201,
      "training_step_time": 0.4033224582672119
    },
    {
      "epoch": 0.00020875244140625,
      "model_forward_time": 0.1153566837310791,
      "step": 34202
    },
    {
      "epoch": 0.00020875244140625,
      "step": 34202,
      "training_step_time": 0.3882102966308594
    },
    {
      "epoch": 0.000208758544921875,
      "model_forward_time": 0.11554694175720215,
      "step": 34203
    },
    {
      "epoch": 0.000208758544921875,
      "step": 34203,
      "training_step_time": 0.4611196517944336
    },
    {
      "epoch": 0.0002087646484375,
      "model_forward_time": 0.11521339416503906,
      "step": 34204
    },
    {
      "epoch": 0.0002087646484375,
      "step": 34204,
      "training_step_time": 0.4633667469024658
    },
    {
      "epoch": 0.000208770751953125,
      "model_forward_time": 0.11514639854431152,
      "step": 34205
    },
    {
      "epoch": 0.000208770751953125,
      "step": 34205,
      "training_step_time": 0.3884608745574951
    },
    {
      "epoch": 0.00020877685546875,
      "model_forward_time": 0.11582207679748535,
      "step": 34206
    },
    {
      "epoch": 0.00020877685546875,
      "step": 34206,
      "training_step_time": 0.40094423294067383
    },
    {
      "epoch": 0.000208782958984375,
      "model_forward_time": 0.11542153358459473,
      "step": 34207
    },
    {
      "epoch": 0.000208782958984375,
      "step": 34207,
      "training_step_time": 0.4071085453033447
    },
    {
      "epoch": 0.0002087890625,
      "model_forward_time": 0.11537480354309082,
      "step": 34208
    },
    {
      "epoch": 0.0002087890625,
      "step": 34208,
      "training_step_time": 0.38971638679504395
    },
    {
      "epoch": 0.000208795166015625,
      "model_forward_time": 0.11586594581604004,
      "step": 34209
    },
    {
      "epoch": 0.000208795166015625,
      "step": 34209,
      "training_step_time": 0.44138216972351074
    },
    {
      "epoch": 0.00020880126953125,
      "grad_norm": 0.1480322629213333,
      "learning_rate": 4.255956548416397e-05,
      "loss": 0.0481,
      "step": 34210
    },
    {
      "epoch": 0.00020880126953125,
      "model_forward_time": 0.11544418334960938,
      "step": 34210
    },
    {
      "epoch": 0.00020880126953125,
      "step": 34210,
      "training_step_time": 0.3913841247558594
    },
    {
      "epoch": 0.000208807373046875,
      "model_forward_time": 0.11521792411804199,
      "step": 34211
    },
    {
      "epoch": 0.000208807373046875,
      "step": 34211,
      "training_step_time": 0.39949512481689453
    },
    {
      "epoch": 0.0002088134765625,
      "model_forward_time": 0.11513304710388184,
      "step": 34212
    },
    {
      "epoch": 0.0002088134765625,
      "step": 34212,
      "training_step_time": 0.40460920333862305
    },
    {
      "epoch": 0.000208819580078125,
      "model_forward_time": 0.11623001098632812,
      "step": 34213
    },
    {
      "epoch": 0.000208819580078125,
      "step": 34213,
      "training_step_time": 0.4042842388153076
    },
    {
      "epoch": 0.00020882568359375,
      "model_forward_time": 0.11489009857177734,
      "step": 34214
    },
    {
      "epoch": 0.00020882568359375,
      "step": 34214,
      "training_step_time": 0.40940356254577637
    },
    {
      "epoch": 0.000208831787109375,
      "model_forward_time": 0.11548519134521484,
      "step": 34215
    },
    {
      "epoch": 0.000208831787109375,
      "step": 34215,
      "training_step_time": 0.39572715759277344
    },
    {
      "epoch": 0.000208837890625,
      "model_forward_time": 0.11595892906188965,
      "step": 34216
    },
    {
      "epoch": 0.000208837890625,
      "step": 34216,
      "training_step_time": 0.40220165252685547
    },
    {
      "epoch": 0.000208843994140625,
      "model_forward_time": 0.11524105072021484,
      "step": 34217
    },
    {
      "epoch": 0.000208843994140625,
      "step": 34217,
      "training_step_time": 0.4020390510559082
    },
    {
      "epoch": 0.00020885009765625,
      "model_forward_time": 0.11524748802185059,
      "step": 34218
    },
    {
      "epoch": 0.00020885009765625,
      "step": 34218,
      "training_step_time": 0.4475831985473633
    },
    {
      "epoch": 0.000208856201171875,
      "model_forward_time": 0.11620855331420898,
      "step": 34219
    },
    {
      "epoch": 0.000208856201171875,
      "step": 34219,
      "training_step_time": 0.49623632431030273
    },
    {
      "epoch": 0.0002088623046875,
      "grad_norm": 0.10174297541379929,
      "learning_rate": 4.253231561412756e-05,
      "loss": 0.0448,
      "step": 34220
    },
    {
      "epoch": 0.0002088623046875,
      "model_forward_time": 0.11501097679138184,
      "step": 34220
    },
    {
      "epoch": 0.0002088623046875,
      "step": 34220,
      "training_step_time": 0.42386341094970703
    },
    {
      "epoch": 0.000208868408203125,
      "model_forward_time": 0.1147317886352539,
      "step": 34221
    },
    {
      "epoch": 0.000208868408203125,
      "step": 34221,
      "training_step_time": 0.3941686153411865
    },
    {
      "epoch": 0.00020887451171875,
      "model_forward_time": 0.11501955986022949,
      "step": 34222
    },
    {
      "epoch": 0.00020887451171875,
      "step": 34222,
      "training_step_time": 0.41469430923461914
    },
    {
      "epoch": 0.000208880615234375,
      "model_forward_time": 0.11533522605895996,
      "step": 34223
    },
    {
      "epoch": 0.000208880615234375,
      "step": 34223,
      "training_step_time": 0.43148064613342285
    },
    {
      "epoch": 0.00020888671875,
      "model_forward_time": 0.11576437950134277,
      "step": 34224
    },
    {
      "epoch": 0.00020888671875,
      "step": 34224,
      "training_step_time": 0.4020841121673584
    },
    {
      "epoch": 0.000208892822265625,
      "model_forward_time": 0.11509251594543457,
      "step": 34225
    },
    {
      "epoch": 0.000208892822265625,
      "step": 34225,
      "training_step_time": 0.4112703800201416
    },
    {
      "epoch": 0.00020889892578125,
      "model_forward_time": 0.11478042602539062,
      "step": 34226
    },
    {
      "epoch": 0.00020889892578125,
      "step": 34226,
      "training_step_time": 0.40460872650146484
    },
    {
      "epoch": 0.000208905029296875,
      "model_forward_time": 0.11583590507507324,
      "step": 34227
    },
    {
      "epoch": 0.000208905029296875,
      "step": 34227,
      "training_step_time": 0.3901021480560303
    },
    {
      "epoch": 0.0002089111328125,
      "model_forward_time": 0.11592912673950195,
      "step": 34228
    },
    {
      "epoch": 0.0002089111328125,
      "step": 34228,
      "training_step_time": 0.40092945098876953
    },
    {
      "epoch": 0.000208917236328125,
      "model_forward_time": 0.11588668823242188,
      "step": 34229
    },
    {
      "epoch": 0.000208917236328125,
      "step": 34229,
      "training_step_time": 0.4021472930908203
    },
    {
      "epoch": 0.00020892333984375,
      "grad_norm": 0.08984049409627914,
      "learning_rate": 4.250506801257653e-05,
      "loss": 0.0373,
      "step": 34230
    },
    {
      "epoch": 0.00020892333984375,
      "model_forward_time": 0.11473417282104492,
      "step": 34230
    },
    {
      "epoch": 0.00020892333984375,
      "step": 34230,
      "training_step_time": 0.407306432723999
    },
    {
      "epoch": 0.000208929443359375,
      "model_forward_time": 0.11558365821838379,
      "step": 34231
    },
    {
      "epoch": 0.000208929443359375,
      "step": 34231,
      "training_step_time": 0.39641356468200684
    },
    {
      "epoch": 0.000208935546875,
      "model_forward_time": 0.11557626724243164,
      "step": 34232
    },
    {
      "epoch": 0.000208935546875,
      "step": 34232,
      "training_step_time": 0.3951430320739746
    },
    {
      "epoch": 0.000208941650390625,
      "model_forward_time": 0.11601734161376953,
      "step": 34233
    },
    {
      "epoch": 0.000208941650390625,
      "step": 34233,
      "training_step_time": 0.4893767833709717
    },
    {
      "epoch": 0.00020894775390625,
      "model_forward_time": 0.11565542221069336,
      "step": 34234
    },
    {
      "epoch": 0.00020894775390625,
      "step": 34234,
      "training_step_time": 0.5074863433837891
    },
    {
      "epoch": 0.000208953857421875,
      "model_forward_time": 0.11482095718383789,
      "step": 34235
    },
    {
      "epoch": 0.000208953857421875,
      "step": 34235,
      "training_step_time": 0.4066770076751709
    },
    {
      "epoch": 0.0002089599609375,
      "model_forward_time": 0.1159512996673584,
      "step": 34236
    },
    {
      "epoch": 0.0002089599609375,
      "step": 34236,
      "training_step_time": 0.36770081520080566
    },
    {
      "epoch": 0.000208966064453125,
      "model_forward_time": 0.11545300483703613,
      "step": 34237
    },
    {
      "epoch": 0.000208966064453125,
      "step": 34237,
      "training_step_time": 0.4539015293121338
    },
    {
      "epoch": 0.00020897216796875,
      "model_forward_time": 0.1148989200592041,
      "step": 34238
    },
    {
      "epoch": 0.00020897216796875,
      "step": 34238,
      "training_step_time": 0.3964250087738037
    },
    {
      "epoch": 0.000208978271484375,
      "model_forward_time": 0.11574172973632812,
      "step": 34239
    },
    {
      "epoch": 0.000208978271484375,
      "step": 34239,
      "training_step_time": 0.44516777992248535
    },
    {
      "epoch": 0.000208984375,
      "grad_norm": 0.13867385685443878,
      "learning_rate": 4.247782268778791e-05,
      "loss": 0.0389,
      "step": 34240
    },
    {
      "epoch": 0.000208984375,
      "model_forward_time": 0.11496758460998535,
      "step": 34240
    },
    {
      "epoch": 0.000208984375,
      "step": 34240,
      "training_step_time": 0.38849329948425293
    },
    {
      "epoch": 0.000208990478515625,
      "model_forward_time": 0.11545491218566895,
      "step": 34241
    },
    {
      "epoch": 0.000208990478515625,
      "step": 34241,
      "training_step_time": 0.39115452766418457
    },
    {
      "epoch": 0.00020899658203125,
      "model_forward_time": 0.1149435043334961,
      "step": 34242
    },
    {
      "epoch": 0.00020899658203125,
      "step": 34242,
      "training_step_time": 0.3894655704498291
    },
    {
      "epoch": 0.000209002685546875,
      "model_forward_time": 0.11540532112121582,
      "step": 34243
    },
    {
      "epoch": 0.000209002685546875,
      "step": 34243,
      "training_step_time": 0.3911426067352295
    },
    {
      "epoch": 0.0002090087890625,
      "model_forward_time": 0.11536550521850586,
      "step": 34244
    },
    {
      "epoch": 0.0002090087890625,
      "step": 34244,
      "training_step_time": 0.39220499992370605
    },
    {
      "epoch": 0.000209014892578125,
      "model_forward_time": 0.1153111457824707,
      "step": 34245
    },
    {
      "epoch": 0.000209014892578125,
      "step": 34245,
      "training_step_time": 0.38329577445983887
    },
    {
      "epoch": 0.00020902099609375,
      "model_forward_time": 0.11802172660827637,
      "step": 34246
    },
    {
      "epoch": 0.00020902099609375,
      "step": 34246,
      "training_step_time": 0.393357515335083
    },
    {
      "epoch": 0.000209027099609375,
      "model_forward_time": 0.11571526527404785,
      "step": 34247
    },
    {
      "epoch": 0.000209027099609375,
      "step": 34247,
      "training_step_time": 0.43253397941589355
    },
    {
      "epoch": 0.000209033203125,
      "model_forward_time": 0.11525630950927734,
      "step": 34248
    },
    {
      "epoch": 0.000209033203125,
      "step": 34248,
      "training_step_time": 0.39653635025024414
    },
    {
      "epoch": 0.000209039306640625,
      "model_forward_time": 0.11577296257019043,
      "step": 34249
    },
    {
      "epoch": 0.000209039306640625,
      "step": 34249,
      "training_step_time": 0.4898104667663574
    },
    {
      "epoch": 0.00020904541015625,
      "grad_norm": 0.09400568902492523,
      "learning_rate": 4.2450579648038154e-05,
      "loss": 0.0422,
      "step": 34250
    },
    {
      "epoch": 0.00020904541015625,
      "model_forward_time": 0.11481046676635742,
      "step": 34250
    },
    {
      "epoch": 0.00020904541015625,
      "step": 34250,
      "training_step_time": 0.3847825527191162
    },
    {
      "epoch": 0.000209051513671875,
      "model_forward_time": 0.11568760871887207,
      "step": 34251
    },
    {
      "epoch": 0.000209051513671875,
      "step": 34251,
      "training_step_time": 0.4157288074493408
    },
    {
      "epoch": 0.0002090576171875,
      "model_forward_time": 0.1158144474029541,
      "step": 34252
    },
    {
      "epoch": 0.0002090576171875,
      "step": 34252,
      "training_step_time": 0.5058164596557617
    },
    {
      "epoch": 0.000209063720703125,
      "model_forward_time": 0.11481332778930664,
      "step": 34253
    },
    {
      "epoch": 0.000209063720703125,
      "step": 34253,
      "training_step_time": 0.4511451721191406
    },
    {
      "epoch": 0.00020906982421875,
      "model_forward_time": 0.11518335342407227,
      "step": 34254
    },
    {
      "epoch": 0.00020906982421875,
      "step": 34254,
      "training_step_time": 0.39583849906921387
    },
    {
      "epoch": 0.000209075927734375,
      "model_forward_time": 0.11508917808532715,
      "step": 34255
    },
    {
      "epoch": 0.000209075927734375,
      "step": 34255,
      "training_step_time": 0.3907444477081299
    },
    {
      "epoch": 0.00020908203125,
      "model_forward_time": 0.11534833908081055,
      "step": 34256
    },
    {
      "epoch": 0.00020908203125,
      "step": 34256,
      "training_step_time": 0.3987140655517578
    },
    {
      "epoch": 0.000209088134765625,
      "model_forward_time": 0.1149296760559082,
      "step": 34257
    },
    {
      "epoch": 0.000209088134765625,
      "step": 34257,
      "training_step_time": 0.3862297534942627
    },
    {
      "epoch": 0.00020909423828125,
      "model_forward_time": 0.11509919166564941,
      "step": 34258
    },
    {
      "epoch": 0.00020909423828125,
      "step": 34258,
      "training_step_time": 0.3948349952697754
    },
    {
      "epoch": 0.000209100341796875,
      "model_forward_time": 0.11509895324707031,
      "step": 34259
    },
    {
      "epoch": 0.000209100341796875,
      "step": 34259,
      "training_step_time": 0.6328718662261963
    },
    {
      "epoch": 0.0002091064453125,
      "grad_norm": 0.10225445032119751,
      "learning_rate": 4.2423338901602985e-05,
      "loss": 0.0393,
      "step": 34260
    },
    {
      "epoch": 0.0002091064453125,
      "model_forward_time": 0.11470842361450195,
      "step": 34260
    },
    {
      "epoch": 0.0002091064453125,
      "step": 34260,
      "training_step_time": 0.3908712863922119
    },
    {
      "epoch": 0.000209112548828125,
      "model_forward_time": 0.11519289016723633,
      "step": 34261
    },
    {
      "epoch": 0.000209112548828125,
      "step": 34261,
      "training_step_time": 0.44399428367614746
    },
    {
      "epoch": 0.00020911865234375,
      "model_forward_time": 0.11474442481994629,
      "step": 34262
    },
    {
      "epoch": 0.00020911865234375,
      "step": 34262,
      "training_step_time": 0.38827943801879883
    },
    {
      "epoch": 0.000209124755859375,
      "model_forward_time": 0.11547517776489258,
      "step": 34263
    },
    {
      "epoch": 0.000209124755859375,
      "step": 34263,
      "training_step_time": 0.5057852268218994
    },
    {
      "epoch": 0.000209130859375,
      "model_forward_time": 0.11451888084411621,
      "step": 34264
    },
    {
      "epoch": 0.000209130859375,
      "step": 34264,
      "training_step_time": 0.3838651180267334
    },
    {
      "epoch": 0.000209136962890625,
      "model_forward_time": 0.11487674713134766,
      "step": 34265
    },
    {
      "epoch": 0.000209136962890625,
      "step": 34265,
      "training_step_time": 0.40669679641723633
    },
    {
      "epoch": 0.00020914306640625,
      "model_forward_time": 0.11440634727478027,
      "step": 34266
    },
    {
      "epoch": 0.00020914306640625,
      "step": 34266,
      "training_step_time": 0.3990049362182617
    },
    {
      "epoch": 0.000209149169921875,
      "model_forward_time": 0.11518621444702148,
      "step": 34267
    },
    {
      "epoch": 0.000209149169921875,
      "step": 34267,
      "training_step_time": 0.4831576347351074
    },
    {
      "epoch": 0.0002091552734375,
      "model_forward_time": 0.11500144004821777,
      "step": 34268
    },
    {
      "epoch": 0.0002091552734375,
      "step": 34268,
      "training_step_time": 0.3838071823120117
    },
    {
      "epoch": 0.000209161376953125,
      "model_forward_time": 0.1155252456665039,
      "step": 34269
    },
    {
      "epoch": 0.000209161376953125,
      "step": 34269,
      "training_step_time": 0.4007985591888428
    },
    {
      "epoch": 0.00020916748046875,
      "grad_norm": 0.12320900708436966,
      "learning_rate": 4.239610045675739e-05,
      "loss": 0.0427,
      "step": 34270
    },
    {
      "epoch": 0.00020916748046875,
      "model_forward_time": 0.11453509330749512,
      "step": 34270
    },
    {
      "epoch": 0.00020916748046875,
      "step": 34270,
      "training_step_time": 0.3834872245788574
    },
    {
      "epoch": 0.000209173583984375,
      "model_forward_time": 0.11432218551635742,
      "step": 34271
    },
    {
      "epoch": 0.000209173583984375,
      "step": 34271,
      "training_step_time": 0.5153834819793701
    },
    {
      "epoch": 0.0002091796875,
      "model_forward_time": 0.11474323272705078,
      "step": 34272
    },
    {
      "epoch": 0.0002091796875,
      "step": 34272,
      "training_step_time": 0.39760541915893555
    },
    {
      "epoch": 0.000209185791015625,
      "model_forward_time": 0.11496853828430176,
      "step": 34273
    },
    {
      "epoch": 0.000209185791015625,
      "step": 34273,
      "training_step_time": 0.41489386558532715
    },
    {
      "epoch": 0.00020919189453125,
      "model_forward_time": 0.11566901206970215,
      "step": 34274
    },
    {
      "epoch": 0.00020919189453125,
      "step": 34274,
      "training_step_time": 0.3896183967590332
    },
    {
      "epoch": 0.000209197998046875,
      "model_forward_time": 0.11525893211364746,
      "step": 34275
    },
    {
      "epoch": 0.000209197998046875,
      "step": 34275,
      "training_step_time": 0.39313578605651855
    },
    {
      "epoch": 0.0002092041015625,
      "model_forward_time": 0.11471271514892578,
      "step": 34276
    },
    {
      "epoch": 0.0002092041015625,
      "step": 34276,
      "training_step_time": 0.40387630462646484
    },
    {
      "epoch": 0.000209210205078125,
      "model_forward_time": 0.11510539054870605,
      "step": 34277
    },
    {
      "epoch": 0.000209210205078125,
      "step": 34277,
      "training_step_time": 0.4792966842651367
    },
    {
      "epoch": 0.00020921630859375,
      "model_forward_time": 0.11486148834228516,
      "step": 34278
    },
    {
      "epoch": 0.00020921630859375,
      "step": 34278,
      "training_step_time": 0.4487416744232178
    },
    {
      "epoch": 0.000209222412109375,
      "model_forward_time": 0.1157233715057373,
      "step": 34279
    },
    {
      "epoch": 0.000209222412109375,
      "step": 34279,
      "training_step_time": 0.40503644943237305
    },
    {
      "epoch": 0.000209228515625,
      "grad_norm": 0.12248113751411438,
      "learning_rate": 4.236886432177572e-05,
      "loss": 0.0424,
      "step": 34280
    },
    {
      "epoch": 0.000209228515625,
      "model_forward_time": 0.1153104305267334,
      "step": 34280
    },
    {
      "epoch": 0.000209228515625,
      "step": 34280,
      "training_step_time": 0.46903061866760254
    },
    {
      "epoch": 0.000209234619140625,
      "model_forward_time": 0.11556124687194824,
      "step": 34281
    },
    {
      "epoch": 0.000209234619140625,
      "step": 34281,
      "training_step_time": 0.48095178604125977
    },
    {
      "epoch": 0.00020924072265625,
      "model_forward_time": 0.11477088928222656,
      "step": 34282
    },
    {
      "epoch": 0.00020924072265625,
      "step": 34282,
      "training_step_time": 0.4157121181488037
    },
    {
      "epoch": 0.000209246826171875,
      "model_forward_time": 0.1148838996887207,
      "step": 34283
    },
    {
      "epoch": 0.000209246826171875,
      "step": 34283,
      "training_step_time": 0.3863377571105957
    },
    {
      "epoch": 0.0002092529296875,
      "model_forward_time": 0.11543440818786621,
      "step": 34284
    },
    {
      "epoch": 0.0002092529296875,
      "step": 34284,
      "training_step_time": 0.3916144371032715
    },
    {
      "epoch": 0.000209259033203125,
      "model_forward_time": 0.11499881744384766,
      "step": 34285
    },
    {
      "epoch": 0.000209259033203125,
      "step": 34285,
      "training_step_time": 0.4034435749053955
    },
    {
      "epoch": 0.00020926513671875,
      "model_forward_time": 0.11501479148864746,
      "step": 34286
    },
    {
      "epoch": 0.00020926513671875,
      "step": 34286,
      "training_step_time": 0.3850891590118408
    },
    {
      "epoch": 0.000209271240234375,
      "model_forward_time": 0.11524486541748047,
      "step": 34287
    },
    {
      "epoch": 0.000209271240234375,
      "step": 34287,
      "training_step_time": 0.4007575511932373
    },
    {
      "epoch": 0.00020927734375,
      "model_forward_time": 0.11506056785583496,
      "step": 34288
    },
    {
      "epoch": 0.00020927734375,
      "step": 34288,
      "training_step_time": 0.4007117748260498
    },
    {
      "epoch": 0.000209283447265625,
      "model_forward_time": 0.11551380157470703,
      "step": 34289
    },
    {
      "epoch": 0.000209283447265625,
      "step": 34289,
      "training_step_time": 0.39843010902404785
    },
    {
      "epoch": 0.00020928955078125,
      "grad_norm": 0.11878420412540436,
      "learning_rate": 4.234163050493158e-05,
      "loss": 0.0435,
      "step": 34290
    },
    {
      "epoch": 0.00020928955078125,
      "model_forward_time": 0.1160426139831543,
      "step": 34290
    },
    {
      "epoch": 0.00020928955078125,
      "step": 34290,
      "training_step_time": 0.4185450077056885
    },
    {
      "epoch": 0.000209295654296875,
      "model_forward_time": 0.11530828475952148,
      "step": 34291
    },
    {
      "epoch": 0.000209295654296875,
      "step": 34291,
      "training_step_time": 0.40196776390075684
    },
    {
      "epoch": 0.0002093017578125,
      "model_forward_time": 0.11533498764038086,
      "step": 34292
    },
    {
      "epoch": 0.0002093017578125,
      "step": 34292,
      "training_step_time": 0.45512986183166504
    },
    {
      "epoch": 0.000209307861328125,
      "model_forward_time": 0.11526775360107422,
      "step": 34293
    },
    {
      "epoch": 0.000209307861328125,
      "step": 34293,
      "training_step_time": 0.45199108123779297
    },
    {
      "epoch": 0.00020931396484375,
      "model_forward_time": 0.11495351791381836,
      "step": 34294
    },
    {
      "epoch": 0.00020931396484375,
      "step": 34294,
      "training_step_time": 0.4002265930175781
    },
    {
      "epoch": 0.000209320068359375,
      "model_forward_time": 0.11468338966369629,
      "step": 34295
    },
    {
      "epoch": 0.000209320068359375,
      "step": 34295,
      "training_step_time": 0.4383518695831299
    },
    {
      "epoch": 0.000209326171875,
      "model_forward_time": 0.1150047779083252,
      "step": 34296
    },
    {
      "epoch": 0.000209326171875,
      "step": 34296,
      "training_step_time": 0.42891812324523926
    },
    {
      "epoch": 0.000209332275390625,
      "model_forward_time": 0.11542797088623047,
      "step": 34297
    },
    {
      "epoch": 0.000209332275390625,
      "step": 34297,
      "training_step_time": 0.4562563896179199
    },
    {
      "epoch": 0.00020933837890625,
      "model_forward_time": 0.11548089981079102,
      "step": 34298
    },
    {
      "epoch": 0.00020933837890625,
      "step": 34298,
      "training_step_time": 0.38471221923828125
    },
    {
      "epoch": 0.000209344482421875,
      "model_forward_time": 0.11416745185852051,
      "step": 34299
    },
    {
      "epoch": 0.000209344482421875,
      "step": 34299,
      "training_step_time": 0.3905344009399414
    },
    {
      "epoch": 0.0002093505859375,
      "grad_norm": 0.10087494552135468,
      "learning_rate": 4.231439901449788e-05,
      "loss": 0.0368,
      "step": 34300
    },
    {
      "epoch": 0.0002093505859375,
      "model_forward_time": 0.11550617218017578,
      "step": 34300
    },
    {
      "epoch": 0.0002093505859375,
      "step": 34300,
      "training_step_time": 0.3932676315307617
    },
    {
      "epoch": 0.000209356689453125,
      "model_forward_time": 0.11522340774536133,
      "step": 34301
    },
    {
      "epoch": 0.000209356689453125,
      "step": 34301,
      "training_step_time": 0.40158653259277344
    },
    {
      "epoch": 0.00020936279296875,
      "model_forward_time": 0.11525177955627441,
      "step": 34302
    },
    {
      "epoch": 0.00020936279296875,
      "step": 34302,
      "training_step_time": 0.3903849124908447
    },
    {
      "epoch": 0.000209368896484375,
      "model_forward_time": 0.11496591567993164,
      "step": 34303
    },
    {
      "epoch": 0.000209368896484375,
      "step": 34303,
      "training_step_time": 0.3861551284790039
    },
    {
      "epoch": 0.000209375,
      "model_forward_time": 0.11509895324707031,
      "step": 34304
    },
    {
      "epoch": 0.000209375,
      "step": 34304,
      "training_step_time": 0.39946508407592773
    },
    {
      "epoch": 0.000209381103515625,
      "model_forward_time": 0.11523175239562988,
      "step": 34305
    },
    {
      "epoch": 0.000209381103515625,
      "step": 34305,
      "training_step_time": 0.4749739170074463
    },
    {
      "epoch": 0.00020938720703125,
      "model_forward_time": 0.11487150192260742,
      "step": 34306
    },
    {
      "epoch": 0.00020938720703125,
      "step": 34306,
      "training_step_time": 0.41766357421875
    },
    {
      "epoch": 0.000209393310546875,
      "model_forward_time": 0.11518168449401855,
      "step": 34307
    },
    {
      "epoch": 0.000209393310546875,
      "step": 34307,
      "training_step_time": 0.4647674560546875
    },
    {
      "epoch": 0.0002093994140625,
      "model_forward_time": 0.1147761344909668,
      "step": 34308
    },
    {
      "epoch": 0.0002093994140625,
      "step": 34308,
      "training_step_time": 0.4928755760192871
    },
    {
      "epoch": 0.000209405517578125,
      "model_forward_time": 0.11543893814086914,
      "step": 34309
    },
    {
      "epoch": 0.000209405517578125,
      "step": 34309,
      "training_step_time": 0.37756896018981934
    },
    {
      "epoch": 0.00020941162109375,
      "grad_norm": 0.10067484527826309,
      "learning_rate": 4.2287169858746836e-05,
      "loss": 0.0399,
      "step": 34310
    },
    {
      "epoch": 0.00020941162109375,
      "model_forward_time": 0.11542129516601562,
      "step": 34310
    },
    {
      "epoch": 0.00020941162109375,
      "step": 34310,
      "training_step_time": 0.4110450744628906
    },
    {
      "epoch": 0.000209417724609375,
      "model_forward_time": 0.11560702323913574,
      "step": 34311
    },
    {
      "epoch": 0.000209417724609375,
      "step": 34311,
      "training_step_time": 0.4889869689941406
    },
    {
      "epoch": 0.000209423828125,
      "model_forward_time": 0.11539745330810547,
      "step": 34312
    },
    {
      "epoch": 0.000209423828125,
      "step": 34312,
      "training_step_time": 0.39965105056762695
    },
    {
      "epoch": 0.000209429931640625,
      "model_forward_time": 0.11458015441894531,
      "step": 34313
    },
    {
      "epoch": 0.000209429931640625,
      "step": 34313,
      "training_step_time": 0.3835461139678955
    },
    {
      "epoch": 0.00020943603515625,
      "model_forward_time": 0.11597633361816406,
      "step": 34314
    },
    {
      "epoch": 0.00020943603515625,
      "step": 34314,
      "training_step_time": 0.3945910930633545
    },
    {
      "epoch": 0.000209442138671875,
      "model_forward_time": 0.11463165283203125,
      "step": 34315
    },
    {
      "epoch": 0.000209442138671875,
      "step": 34315,
      "training_step_time": 0.3870546817779541
    },
    {
      "epoch": 0.0002094482421875,
      "model_forward_time": 0.11565518379211426,
      "step": 34316
    },
    {
      "epoch": 0.0002094482421875,
      "step": 34316,
      "training_step_time": 0.39021992683410645
    },
    {
      "epoch": 0.000209454345703125,
      "model_forward_time": 0.11563873291015625,
      "step": 34317
    },
    {
      "epoch": 0.000209454345703125,
      "step": 34317,
      "training_step_time": 0.4060964584350586
    },
    {
      "epoch": 0.00020946044921875,
      "model_forward_time": 0.11570906639099121,
      "step": 34318
    },
    {
      "epoch": 0.00020946044921875,
      "step": 34318,
      "training_step_time": 0.3935120105743408
    },
    {
      "epoch": 0.000209466552734375,
      "model_forward_time": 0.11562323570251465,
      "step": 34319
    },
    {
      "epoch": 0.000209466552734375,
      "step": 34319,
      "training_step_time": 0.38059043884277344
    },
    {
      "epoch": 0.00020947265625,
      "grad_norm": 0.09474916011095047,
      "learning_rate": 4.2259943045949934e-05,
      "loss": 0.0389,
      "step": 34320
    },
    {
      "epoch": 0.00020947265625,
      "model_forward_time": 0.11543941497802734,
      "step": 34320
    },
    {
      "epoch": 0.00020947265625,
      "step": 34320,
      "training_step_time": 0.4452528953552246
    },
    {
      "epoch": 0.000209478759765625,
      "model_forward_time": 0.11542606353759766,
      "step": 34321
    },
    {
      "epoch": 0.000209478759765625,
      "step": 34321,
      "training_step_time": 0.45537805557250977
    },
    {
      "epoch": 0.00020948486328125,
      "model_forward_time": 0.11558651924133301,
      "step": 34322
    },
    {
      "epoch": 0.00020948486328125,
      "step": 34322,
      "training_step_time": 0.42015528678894043
    },
    {
      "epoch": 0.000209490966796875,
      "model_forward_time": 0.11522507667541504,
      "step": 34323
    },
    {
      "epoch": 0.000209490966796875,
      "step": 34323,
      "training_step_time": 0.46851181983947754
    },
    {
      "epoch": 0.0002094970703125,
      "model_forward_time": 0.11458563804626465,
      "step": 34324
    },
    {
      "epoch": 0.0002094970703125,
      "step": 34324,
      "training_step_time": 0.3899414539337158
    },
    {
      "epoch": 0.000209503173828125,
      "model_forward_time": 0.11469411849975586,
      "step": 34325
    },
    {
      "epoch": 0.000209503173828125,
      "step": 34325,
      "training_step_time": 0.42781758308410645
    },
    {
      "epoch": 0.00020950927734375,
      "model_forward_time": 0.11536407470703125,
      "step": 34326
    },
    {
      "epoch": 0.00020950927734375,
      "step": 34326,
      "training_step_time": 0.4245762825012207
    },
    {
      "epoch": 0.000209515380859375,
      "model_forward_time": 0.11518454551696777,
      "step": 34327
    },
    {
      "epoch": 0.000209515380859375,
      "step": 34327,
      "training_step_time": 0.3906233310699463
    },
    {
      "epoch": 0.000209521484375,
      "model_forward_time": 0.11502480506896973,
      "step": 34328
    },
    {
      "epoch": 0.000209521484375,
      "step": 34328,
      "training_step_time": 0.3848111629486084
    },
    {
      "epoch": 0.000209527587890625,
      "model_forward_time": 0.11475205421447754,
      "step": 34329
    },
    {
      "epoch": 0.000209527587890625,
      "step": 34329,
      "training_step_time": 0.38148927688598633
    },
    {
      "epoch": 0.00020953369140625,
      "grad_norm": 0.12090189754962921,
      "learning_rate": 4.223271858437799e-05,
      "loss": 0.0402,
      "step": 34330
    },
    {
      "epoch": 0.00020953369140625,
      "model_forward_time": 0.11524033546447754,
      "step": 34330
    },
    {
      "epoch": 0.00020953369140625,
      "step": 34330,
      "training_step_time": 0.3888068199157715
    },
    {
      "epoch": 0.000209539794921875,
      "model_forward_time": 0.11542868614196777,
      "step": 34331
    },
    {
      "epoch": 0.000209539794921875,
      "step": 34331,
      "training_step_time": 0.386425256729126
    },
    {
      "epoch": 0.0002095458984375,
      "model_forward_time": 0.1161799430847168,
      "step": 34332
    },
    {
      "epoch": 0.0002095458984375,
      "step": 34332,
      "training_step_time": 0.4050142765045166
    },
    {
      "epoch": 0.000209552001953125,
      "model_forward_time": 0.11620354652404785,
      "step": 34333
    },
    {
      "epoch": 0.000209552001953125,
      "step": 34333,
      "training_step_time": 0.40894317626953125
    },
    {
      "epoch": 0.00020955810546875,
      "model_forward_time": 0.11551427841186523,
      "step": 34334
    },
    {
      "epoch": 0.00020955810546875,
      "step": 34334,
      "training_step_time": 0.43358683586120605
    },
    {
      "epoch": 0.000209564208984375,
      "model_forward_time": 0.11641478538513184,
      "step": 34335
    },
    {
      "epoch": 0.000209564208984375,
      "step": 34335,
      "training_step_time": 0.42324304580688477
    },
    {
      "epoch": 0.0002095703125,
      "model_forward_time": 0.11545896530151367,
      "step": 34336
    },
    {
      "epoch": 0.0002095703125,
      "step": 34336,
      "training_step_time": 0.42401862144470215
    },
    {
      "epoch": 0.000209576416015625,
      "model_forward_time": 0.11576581001281738,
      "step": 34337
    },
    {
      "epoch": 0.000209576416015625,
      "step": 34337,
      "training_step_time": 0.4971160888671875
    },
    {
      "epoch": 0.00020958251953125,
      "model_forward_time": 0.11513471603393555,
      "step": 34338
    },
    {
      "epoch": 0.00020958251953125,
      "step": 34338,
      "training_step_time": 0.3931703567504883
    },
    {
      "epoch": 0.000209588623046875,
      "model_forward_time": 0.11591577529907227,
      "step": 34339
    },
    {
      "epoch": 0.000209588623046875,
      "step": 34339,
      "training_step_time": 0.4268455505371094
    },
    {
      "epoch": 0.0002095947265625,
      "grad_norm": 0.1338001787662506,
      "learning_rate": 4.220549648230104e-05,
      "loss": 0.0397,
      "step": 34340
    },
    {
      "epoch": 0.0002095947265625,
      "model_forward_time": 0.11818265914916992,
      "step": 34340
    },
    {
      "epoch": 0.0002095947265625,
      "step": 34340,
      "training_step_time": 0.47072386741638184
    },
    {
      "epoch": 0.000209600830078125,
      "model_forward_time": 0.12082481384277344,
      "step": 34341
    },
    {
      "epoch": 0.000209600830078125,
      "step": 34341,
      "training_step_time": 0.41742944717407227
    },
    {
      "epoch": 0.00020960693359375,
      "model_forward_time": 0.1154627799987793,
      "step": 34342
    },
    {
      "epoch": 0.00020960693359375,
      "step": 34342,
      "training_step_time": 0.38529253005981445
    },
    {
      "epoch": 0.000209613037109375,
      "model_forward_time": 0.11513900756835938,
      "step": 34343
    },
    {
      "epoch": 0.000209613037109375,
      "step": 34343,
      "training_step_time": 0.38947319984436035
    },
    {
      "epoch": 0.000209619140625,
      "model_forward_time": 0.11518144607543945,
      "step": 34344
    },
    {
      "epoch": 0.000209619140625,
      "step": 34344,
      "training_step_time": 0.39864492416381836
    },
    {
      "epoch": 0.000209625244140625,
      "model_forward_time": 0.11537337303161621,
      "step": 34345
    },
    {
      "epoch": 0.000209625244140625,
      "step": 34345,
      "training_step_time": 0.38969874382019043
    },
    {
      "epoch": 0.00020963134765625,
      "model_forward_time": 0.11505746841430664,
      "step": 34346
    },
    {
      "epoch": 0.00020963134765625,
      "step": 34346,
      "training_step_time": 0.39774489402770996
    },
    {
      "epoch": 0.000209637451171875,
      "model_forward_time": 0.1159365177154541,
      "step": 34347
    },
    {
      "epoch": 0.000209637451171875,
      "step": 34347,
      "training_step_time": 0.4526526927947998
    },
    {
      "epoch": 0.0002096435546875,
      "model_forward_time": 0.11563396453857422,
      "step": 34348
    },
    {
      "epoch": 0.0002096435546875,
      "step": 34348,
      "training_step_time": 0.41188812255859375
    },
    {
      "epoch": 0.000209649658203125,
      "model_forward_time": 0.11521124839782715,
      "step": 34349
    },
    {
      "epoch": 0.000209649658203125,
      "step": 34349,
      "training_step_time": 0.4988226890563965
    },
    {
      "epoch": 0.00020965576171875,
      "grad_norm": 0.1782345175743103,
      "learning_rate": 4.2178276747988446e-05,
      "loss": 0.0444,
      "step": 34350
    },
    {
      "epoch": 0.00020965576171875,
      "model_forward_time": 0.11587738990783691,
      "step": 34350
    },
    {
      "epoch": 0.00020965576171875,
      "step": 34350,
      "training_step_time": 0.4026193618774414
    },
    {
      "epoch": 0.000209661865234375,
      "model_forward_time": 0.11497926712036133,
      "step": 34351
    },
    {
      "epoch": 0.000209661865234375,
      "step": 34351,
      "training_step_time": 0.4000742435455322
    },
    {
      "epoch": 0.00020966796875,
      "model_forward_time": 0.1151740550994873,
      "step": 34352
    },
    {
      "epoch": 0.00020966796875,
      "step": 34352,
      "training_step_time": 0.4757695198059082
    },
    {
      "epoch": 0.000209674072265625,
      "model_forward_time": 0.11530637741088867,
      "step": 34353
    },
    {
      "epoch": 0.000209674072265625,
      "step": 34353,
      "training_step_time": 0.3998594284057617
    },
    {
      "epoch": 0.00020968017578125,
      "model_forward_time": 0.1151437759399414,
      "step": 34354
    },
    {
      "epoch": 0.00020968017578125,
      "step": 34354,
      "training_step_time": 0.3996410369873047
    },
    {
      "epoch": 0.000209686279296875,
      "model_forward_time": 0.11579656600952148,
      "step": 34355
    },
    {
      "epoch": 0.000209686279296875,
      "step": 34355,
      "training_step_time": 0.4018242359161377
    },
    {
      "epoch": 0.0002096923828125,
      "model_forward_time": 0.11629056930541992,
      "step": 34356
    },
    {
      "epoch": 0.0002096923828125,
      "step": 34356,
      "training_step_time": 0.4090449810028076
    },
    {
      "epoch": 0.000209698486328125,
      "model_forward_time": 0.11458468437194824,
      "step": 34357
    },
    {
      "epoch": 0.000209698486328125,
      "step": 34357,
      "training_step_time": 0.5442767143249512
    },
    {
      "epoch": 0.00020970458984375,
      "model_forward_time": 0.11540055274963379,
      "step": 34358
    },
    {
      "epoch": 0.00020970458984375,
      "step": 34358,
      "training_step_time": 0.39305949211120605
    },
    {
      "epoch": 0.000209710693359375,
      "model_forward_time": 0.11503386497497559,
      "step": 34359
    },
    {
      "epoch": 0.000209710693359375,
      "step": 34359,
      "training_step_time": 0.39207935333251953
    },
    {
      "epoch": 0.000209716796875,
      "grad_norm": 0.11075621843338013,
      "learning_rate": 4.215105938970889e-05,
      "loss": 0.0443,
      "step": 34360
    },
    {
      "epoch": 0.000209716796875,
      "model_forward_time": 0.11513376235961914,
      "step": 34360
    },
    {
      "epoch": 0.000209716796875,
      "step": 34360,
      "training_step_time": 0.4026658535003662
    },
    {
      "epoch": 0.000209722900390625,
      "model_forward_time": 0.11560821533203125,
      "step": 34361
    },
    {
      "epoch": 0.000209722900390625,
      "step": 34361,
      "training_step_time": 0.49417710304260254
    },
    {
      "epoch": 0.00020972900390625,
      "model_forward_time": 0.11441159248352051,
      "step": 34362
    },
    {
      "epoch": 0.00020972900390625,
      "step": 34362,
      "training_step_time": 0.42882537841796875
    },
    {
      "epoch": 0.000209735107421875,
      "model_forward_time": 0.11553525924682617,
      "step": 34363
    },
    {
      "epoch": 0.000209735107421875,
      "step": 34363,
      "training_step_time": 0.43622779846191406
    },
    {
      "epoch": 0.0002097412109375,
      "model_forward_time": 0.11538004875183105,
      "step": 34364
    },
    {
      "epoch": 0.0002097412109375,
      "step": 34364,
      "training_step_time": 0.4866969585418701
    },
    {
      "epoch": 0.000209747314453125,
      "model_forward_time": 0.11486244201660156,
      "step": 34365
    },
    {
      "epoch": 0.000209747314453125,
      "step": 34365,
      "training_step_time": 0.4262580871582031
    },
    {
      "epoch": 0.00020975341796875,
      "model_forward_time": 0.11558699607849121,
      "step": 34366
    },
    {
      "epoch": 0.00020975341796875,
      "step": 34366,
      "training_step_time": 0.48015308380126953
    },
    {
      "epoch": 0.000209759521484375,
      "model_forward_time": 0.1154170036315918,
      "step": 34367
    },
    {
      "epoch": 0.000209759521484375,
      "step": 34367,
      "training_step_time": 0.40222692489624023
    },
    {
      "epoch": 0.000209765625,
      "model_forward_time": 0.11499953269958496,
      "step": 34368
    },
    {
      "epoch": 0.000209765625,
      "step": 34368,
      "training_step_time": 0.46213793754577637
    },
    {
      "epoch": 0.000209771728515625,
      "model_forward_time": 0.11582636833190918,
      "step": 34369
    },
    {
      "epoch": 0.000209771728515625,
      "step": 34369,
      "training_step_time": 0.40015411376953125
    },
    {
      "epoch": 0.00020977783203125,
      "grad_norm": 0.1025603860616684,
      "learning_rate": 4.212384441573023e-05,
      "loss": 0.0373,
      "step": 34370
    },
    {
      "epoch": 0.00020977783203125,
      "model_forward_time": 0.1158304214477539,
      "step": 34370
    },
    {
      "epoch": 0.00020977783203125,
      "step": 34370,
      "training_step_time": 0.40334296226501465
    },
    {
      "epoch": 0.000209783935546875,
      "model_forward_time": 0.11528325080871582,
      "step": 34371
    },
    {
      "epoch": 0.000209783935546875,
      "step": 34371,
      "training_step_time": 0.39496302604675293
    },
    {
      "epoch": 0.0002097900390625,
      "model_forward_time": 0.1157236099243164,
      "step": 34372
    },
    {
      "epoch": 0.0002097900390625,
      "step": 34372,
      "training_step_time": 0.3866126537322998
    },
    {
      "epoch": 0.000209796142578125,
      "model_forward_time": 0.11495852470397949,
      "step": 34373
    },
    {
      "epoch": 0.000209796142578125,
      "step": 34373,
      "training_step_time": 0.4625098705291748
    },
    {
      "epoch": 0.00020980224609375,
      "model_forward_time": 0.11496114730834961,
      "step": 34374
    },
    {
      "epoch": 0.00020980224609375,
      "step": 34374,
      "training_step_time": 0.46338391304016113
    },
    {
      "epoch": 0.000209808349609375,
      "model_forward_time": 0.11519479751586914,
      "step": 34375
    },
    {
      "epoch": 0.000209808349609375,
      "step": 34375,
      "training_step_time": 0.3916609287261963
    },
    {
      "epoch": 0.000209814453125,
      "model_forward_time": 0.11469221115112305,
      "step": 34376
    },
    {
      "epoch": 0.000209814453125,
      "step": 34376,
      "training_step_time": 0.3940880298614502
    },
    {
      "epoch": 0.000209820556640625,
      "model_forward_time": 0.11498355865478516,
      "step": 34377
    },
    {
      "epoch": 0.000209820556640625,
      "step": 34377,
      "training_step_time": 0.4026670455932617
    },
    {
      "epoch": 0.00020982666015625,
      "model_forward_time": 0.11522865295410156,
      "step": 34378
    },
    {
      "epoch": 0.00020982666015625,
      "step": 34378,
      "training_step_time": 0.45801258087158203
    },
    {
      "epoch": 0.000209832763671875,
      "model_forward_time": 0.11461305618286133,
      "step": 34379
    },
    {
      "epoch": 0.000209832763671875,
      "step": 34379,
      "training_step_time": 0.4167919158935547
    },
    {
      "epoch": 0.0002098388671875,
      "grad_norm": 0.08054761588573456,
      "learning_rate": 4.209663183431969e-05,
      "loss": 0.0414,
      "step": 34380
    },
    {
      "epoch": 0.0002098388671875,
      "model_forward_time": 0.11481451988220215,
      "step": 34380
    },
    {
      "epoch": 0.0002098388671875,
      "step": 34380,
      "training_step_time": 0.41599345207214355
    },
    {
      "epoch": 0.000209844970703125,
      "model_forward_time": 0.11577415466308594,
      "step": 34381
    },
    {
      "epoch": 0.000209844970703125,
      "step": 34381,
      "training_step_time": 0.414276123046875
    },
    {
      "epoch": 0.00020985107421875,
      "model_forward_time": 0.1180579662322998,
      "step": 34382
    },
    {
      "epoch": 0.00020985107421875,
      "step": 34382,
      "training_step_time": 0.37245726585388184
    },
    {
      "epoch": 0.000209857177734375,
      "model_forward_time": 0.11893153190612793,
      "step": 34383
    },
    {
      "epoch": 0.000209857177734375,
      "step": 34383,
      "training_step_time": 0.39526820182800293
    },
    {
      "epoch": 0.00020986328125,
      "model_forward_time": 0.11977934837341309,
      "step": 34384
    },
    {
      "epoch": 0.00020986328125,
      "step": 34384,
      "training_step_time": 0.487701416015625
    },
    {
      "epoch": 0.000209869384765625,
      "model_forward_time": 0.11896300315856934,
      "step": 34385
    },
    {
      "epoch": 0.000209869384765625,
      "step": 34385,
      "training_step_time": 0.46224546432495117
    },
    {
      "epoch": 0.00020987548828125,
      "model_forward_time": 0.11812257766723633,
      "step": 34386
    },
    {
      "epoch": 0.00020987548828125,
      "step": 34386,
      "training_step_time": 0.38656044006347656
    },
    {
      "epoch": 0.000209881591796875,
      "model_forward_time": 0.11622262001037598,
      "step": 34387
    },
    {
      "epoch": 0.000209881591796875,
      "step": 34387,
      "training_step_time": 0.38903164863586426
    },
    {
      "epoch": 0.0002098876953125,
      "model_forward_time": 0.11491727828979492,
      "step": 34388
    },
    {
      "epoch": 0.0002098876953125,
      "step": 34388,
      "training_step_time": 0.3865213394165039
    },
    {
      "epoch": 0.000209893798828125,
      "model_forward_time": 0.11600923538208008,
      "step": 34389
    },
    {
      "epoch": 0.000209893798828125,
      "step": 34389,
      "training_step_time": 0.3841259479522705
    },
    {
      "epoch": 0.00020989990234375,
      "grad_norm": 0.10604941099882126,
      "learning_rate": 4.2069421653743706e-05,
      "loss": 0.0393,
      "step": 34390
    },
    {
      "epoch": 0.00020989990234375,
      "model_forward_time": 0.11552691459655762,
      "step": 34390
    },
    {
      "epoch": 0.00020989990234375,
      "step": 34390,
      "training_step_time": 0.3883030414581299
    },
    {
      "epoch": 0.000209906005859375,
      "model_forward_time": 0.11501288414001465,
      "step": 34391
    },
    {
      "epoch": 0.000209906005859375,
      "step": 34391,
      "training_step_time": 0.5503900051116943
    },
    {
      "epoch": 0.000209912109375,
      "model_forward_time": 0.11574888229370117,
      "step": 34392
    },
    {
      "epoch": 0.000209912109375,
      "step": 34392,
      "training_step_time": 0.4093754291534424
    },
    {
      "epoch": 0.000209918212890625,
      "model_forward_time": 0.11479806900024414,
      "step": 34393
    },
    {
      "epoch": 0.000209918212890625,
      "step": 34393,
      "training_step_time": 0.44336676597595215
    },
    {
      "epoch": 0.00020992431640625,
      "model_forward_time": 0.11505889892578125,
      "step": 34394
    },
    {
      "epoch": 0.00020992431640625,
      "step": 34394,
      "training_step_time": 0.4283933639526367
    },
    {
      "epoch": 0.000209930419921875,
      "model_forward_time": 0.1150062084197998,
      "step": 34395
    },
    {
      "epoch": 0.000209930419921875,
      "step": 34395,
      "training_step_time": 0.496593713760376
    },
    {
      "epoch": 0.0002099365234375,
      "model_forward_time": 0.11573100090026855,
      "step": 34396
    },
    {
      "epoch": 0.0002099365234375,
      "step": 34396,
      "training_step_time": 0.38686037063598633
    },
    {
      "epoch": 0.000209942626953125,
      "model_forward_time": 0.11534285545349121,
      "step": 34397
    },
    {
      "epoch": 0.000209942626953125,
      "step": 34397,
      "training_step_time": 0.5052671432495117
    },
    {
      "epoch": 0.00020994873046875,
      "model_forward_time": 0.11836028099060059,
      "step": 34398
    },
    {
      "epoch": 0.00020994873046875,
      "step": 34398,
      "training_step_time": 0.43905162811279297
    },
    {
      "epoch": 0.000209954833984375,
      "model_forward_time": 0.11837029457092285,
      "step": 34399
    },
    {
      "epoch": 0.000209954833984375,
      "step": 34399,
      "training_step_time": 0.38077569007873535
    },
    {
      "epoch": 0.0002099609375,
      "grad_norm": 0.12225768715143204,
      "learning_rate": 4.2042213882268025e-05,
      "loss": 0.0424,
      "step": 34400
    },
    {
      "epoch": 0.0002099609375,
      "model_forward_time": 0.11471843719482422,
      "step": 34400
    },
    {
      "epoch": 0.0002099609375,
      "step": 34400,
      "training_step_time": 0.4252786636352539
    },
    {
      "epoch": 0.000209967041015625,
      "model_forward_time": 0.11920619010925293,
      "step": 34401
    },
    {
      "epoch": 0.000209967041015625,
      "step": 34401,
      "training_step_time": 0.39094066619873047
    },
    {
      "epoch": 0.00020997314453125,
      "model_forward_time": 0.11546015739440918,
      "step": 34402
    },
    {
      "epoch": 0.00020997314453125,
      "step": 34402,
      "training_step_time": 0.3971269130706787
    },
    {
      "epoch": 0.000209979248046875,
      "model_forward_time": 0.11537289619445801,
      "step": 34403
    },
    {
      "epoch": 0.000209979248046875,
      "step": 34403,
      "training_step_time": 0.38809967041015625
    },
    {
      "epoch": 0.0002099853515625,
      "model_forward_time": 0.11477351188659668,
      "step": 34404
    },
    {
      "epoch": 0.0002099853515625,
      "step": 34404,
      "training_step_time": 0.39451122283935547
    },
    {
      "epoch": 0.000209991455078125,
      "model_forward_time": 0.11548137664794922,
      "step": 34405
    },
    {
      "epoch": 0.000209991455078125,
      "step": 34405,
      "training_step_time": 0.41594624519348145
    },
    {
      "epoch": 0.00020999755859375,
      "model_forward_time": 0.11519622802734375,
      "step": 34406
    },
    {
      "epoch": 0.00020999755859375,
      "step": 34406,
      "training_step_time": 0.4346942901611328
    },
    {
      "epoch": 0.000210003662109375,
      "model_forward_time": 0.11499595642089844,
      "step": 34407
    },
    {
      "epoch": 0.000210003662109375,
      "step": 34407,
      "training_step_time": 0.39691710472106934
    },
    {
      "epoch": 0.000210009765625,
      "model_forward_time": 0.11561465263366699,
      "step": 34408
    },
    {
      "epoch": 0.000210009765625,
      "step": 34408,
      "training_step_time": 0.47189855575561523
    },
    {
      "epoch": 0.000210015869140625,
      "model_forward_time": 0.1144866943359375,
      "step": 34409
    },
    {
      "epoch": 0.000210015869140625,
      "step": 34409,
      "training_step_time": 0.48748326301574707
    },
    {
      "epoch": 0.00021002197265625,
      "grad_norm": 0.15279315412044525,
      "learning_rate": 4.201500852815768e-05,
      "loss": 0.0386,
      "step": 34410
    },
    {
      "epoch": 0.00021002197265625,
      "model_forward_time": 0.11568856239318848,
      "step": 34410
    },
    {
      "epoch": 0.00021002197265625,
      "step": 34410,
      "training_step_time": 0.39209461212158203
    },
    {
      "epoch": 0.000210028076171875,
      "model_forward_time": 0.11688637733459473,
      "step": 34411
    },
    {
      "epoch": 0.000210028076171875,
      "step": 34411,
      "training_step_time": 0.40938687324523926
    },
    {
      "epoch": 0.0002100341796875,
      "model_forward_time": 0.11619210243225098,
      "step": 34412
    },
    {
      "epoch": 0.0002100341796875,
      "step": 34412,
      "training_step_time": 0.4929931163787842
    },
    {
      "epoch": 0.000210040283203125,
      "model_forward_time": 0.11646795272827148,
      "step": 34413
    },
    {
      "epoch": 0.000210040283203125,
      "step": 34413,
      "training_step_time": 0.3962593078613281
    },
    {
      "epoch": 0.00021004638671875,
      "model_forward_time": 0.11541152000427246,
      "step": 34414
    },
    {
      "epoch": 0.00021004638671875,
      "step": 34414,
      "training_step_time": 0.45731115341186523
    },
    {
      "epoch": 0.000210052490234375,
      "model_forward_time": 0.11545348167419434,
      "step": 34415
    },
    {
      "epoch": 0.000210052490234375,
      "step": 34415,
      "training_step_time": 0.4703547954559326
    },
    {
      "epoch": 0.00021005859375,
      "model_forward_time": 0.11561393737792969,
      "step": 34416
    },
    {
      "epoch": 0.00021005859375,
      "step": 34416,
      "training_step_time": 0.39182376861572266
    },
    {
      "epoch": 0.000210064697265625,
      "model_forward_time": 0.11621403694152832,
      "step": 34417
    },
    {
      "epoch": 0.000210064697265625,
      "step": 34417,
      "training_step_time": 0.4001636505126953
    },
    {
      "epoch": 0.00021007080078125,
      "model_forward_time": 0.11554145812988281,
      "step": 34418
    },
    {
      "epoch": 0.00021007080078125,
      "step": 34418,
      "training_step_time": 0.38629627227783203
    },
    {
      "epoch": 0.000210076904296875,
      "model_forward_time": 0.11518263816833496,
      "step": 34419
    },
    {
      "epoch": 0.000210076904296875,
      "step": 34419,
      "training_step_time": 0.402238130569458
    },
    {
      "epoch": 0.0002100830078125,
      "grad_norm": 0.11741422116756439,
      "learning_rate": 4.1987805599676896e-05,
      "loss": 0.0397,
      "step": 34420
    },
    {
      "epoch": 0.0002100830078125,
      "model_forward_time": 0.11490273475646973,
      "step": 34420
    },
    {
      "epoch": 0.0002100830078125,
      "step": 34420,
      "training_step_time": 0.41887736320495605
    },
    {
      "epoch": 0.000210089111328125,
      "model_forward_time": 0.1155085563659668,
      "step": 34421
    },
    {
      "epoch": 0.000210089111328125,
      "step": 34421,
      "training_step_time": 0.5095615386962891
    },
    {
      "epoch": 0.00021009521484375,
      "model_forward_time": 0.11518454551696777,
      "step": 34422
    },
    {
      "epoch": 0.00021009521484375,
      "step": 34422,
      "training_step_time": 0.42128539085388184
    },
    {
      "epoch": 0.000210101318359375,
      "model_forward_time": 0.11583662033081055,
      "step": 34423
    },
    {
      "epoch": 0.000210101318359375,
      "step": 34423,
      "training_step_time": 0.40535831451416016
    },
    {
      "epoch": 0.000210107421875,
      "model_forward_time": 0.11503744125366211,
      "step": 34424
    },
    {
      "epoch": 0.000210107421875,
      "step": 34424,
      "training_step_time": 0.4338502883911133
    },
    {
      "epoch": 0.000210113525390625,
      "model_forward_time": 0.11516976356506348,
      "step": 34425
    },
    {
      "epoch": 0.000210113525390625,
      "step": 34425,
      "training_step_time": 0.3813209533691406
    },
    {
      "epoch": 0.00021011962890625,
      "model_forward_time": 0.11514592170715332,
      "step": 34426
    },
    {
      "epoch": 0.00021011962890625,
      "step": 34426,
      "training_step_time": 0.4074985980987549
    },
    {
      "epoch": 0.000210125732421875,
      "model_forward_time": 0.1150815486907959,
      "step": 34427
    },
    {
      "epoch": 0.000210125732421875,
      "step": 34427,
      "training_step_time": 0.4349384307861328
    },
    {
      "epoch": 0.0002101318359375,
      "model_forward_time": 0.11510443687438965,
      "step": 34428
    },
    {
      "epoch": 0.0002101318359375,
      "step": 34428,
      "training_step_time": 0.43349695205688477
    },
    {
      "epoch": 0.000210137939453125,
      "model_forward_time": 0.11494112014770508,
      "step": 34429
    },
    {
      "epoch": 0.000210137939453125,
      "step": 34429,
      "training_step_time": 0.3977186679840088
    },
    {
      "epoch": 0.00021014404296875,
      "grad_norm": 0.18451297283172607,
      "learning_rate": 4.196060510508922e-05,
      "loss": 0.0432,
      "step": 34430
    },
    {
      "epoch": 0.00021014404296875,
      "model_forward_time": 0.11524534225463867,
      "step": 34430
    },
    {
      "epoch": 0.00021014404296875,
      "step": 34430,
      "training_step_time": 0.3856544494628906
    },
    {
      "epoch": 0.000210150146484375,
      "model_forward_time": 0.1149301528930664,
      "step": 34431
    },
    {
      "epoch": 0.000210150146484375,
      "step": 34431,
      "training_step_time": 0.38283514976501465
    },
    {
      "epoch": 0.00021015625,
      "model_forward_time": 0.11537647247314453,
      "step": 34432
    },
    {
      "epoch": 0.00021015625,
      "step": 34432,
      "training_step_time": 0.39035844802856445
    },
    {
      "epoch": 0.000210162353515625,
      "model_forward_time": 0.11520051956176758,
      "step": 34433
    },
    {
      "epoch": 0.000210162353515625,
      "step": 34433,
      "training_step_time": 0.5450930595397949
    },
    {
      "epoch": 0.00021016845703125,
      "model_forward_time": 0.11547684669494629,
      "step": 34434
    },
    {
      "epoch": 0.00021016845703125,
      "step": 34434,
      "training_step_time": 0.4474823474884033
    },
    {
      "epoch": 0.000210174560546875,
      "model_forward_time": 0.11564445495605469,
      "step": 34435
    },
    {
      "epoch": 0.000210174560546875,
      "step": 34435,
      "training_step_time": 0.4982757568359375
    },
    {
      "epoch": 0.0002101806640625,
      "model_forward_time": 0.11533021926879883,
      "step": 34436
    },
    {
      "epoch": 0.0002101806640625,
      "step": 34436,
      "training_step_time": 0.4203054904937744
    },
    {
      "epoch": 0.000210186767578125,
      "model_forward_time": 0.11471819877624512,
      "step": 34437
    },
    {
      "epoch": 0.000210186767578125,
      "step": 34437,
      "training_step_time": 0.4008045196533203
    },
    {
      "epoch": 0.00021019287109375,
      "model_forward_time": 0.11668086051940918,
      "step": 34438
    },
    {
      "epoch": 0.00021019287109375,
      "step": 34438,
      "training_step_time": 0.45653510093688965
    },
    {
      "epoch": 0.000210198974609375,
      "model_forward_time": 0.11518335342407227,
      "step": 34439
    },
    {
      "epoch": 0.000210198974609375,
      "step": 34439,
      "training_step_time": 0.3661513328552246
    },
    {
      "epoch": 0.000210205078125,
      "grad_norm": 0.20909662544727325,
      "learning_rate": 4.1933407052657456e-05,
      "loss": 0.0406,
      "step": 34440
    },
    {
      "epoch": 0.000210205078125,
      "model_forward_time": 0.11515617370605469,
      "step": 34440
    },
    {
      "epoch": 0.000210205078125,
      "step": 34440,
      "training_step_time": 0.4042205810546875
    },
    {
      "epoch": 0.000210211181640625,
      "model_forward_time": 0.11663937568664551,
      "step": 34441
    },
    {
      "epoch": 0.000210211181640625,
      "step": 34441,
      "training_step_time": 0.4303610324859619
    },
    {
      "epoch": 0.00021021728515625,
      "model_forward_time": 0.11495351791381836,
      "step": 34442
    },
    {
      "epoch": 0.00021021728515625,
      "step": 34442,
      "training_step_time": 0.3889896869659424
    },
    {
      "epoch": 0.000210223388671875,
      "model_forward_time": 0.11533308029174805,
      "step": 34443
    },
    {
      "epoch": 0.000210223388671875,
      "step": 34443,
      "training_step_time": 0.3898286819458008
    },
    {
      "epoch": 0.0002102294921875,
      "model_forward_time": 0.11505913734436035,
      "step": 34444
    },
    {
      "epoch": 0.0002102294921875,
      "step": 34444,
      "training_step_time": 0.3950169086456299
    },
    {
      "epoch": 0.000210235595703125,
      "model_forward_time": 0.11470794677734375,
      "step": 34445
    },
    {
      "epoch": 0.000210235595703125,
      "step": 34445,
      "training_step_time": 0.39073967933654785
    },
    {
      "epoch": 0.00021024169921875,
      "model_forward_time": 0.11581754684448242,
      "step": 34446
    },
    {
      "epoch": 0.00021024169921875,
      "step": 34446,
      "training_step_time": 0.39414191246032715
    },
    {
      "epoch": 0.000210247802734375,
      "model_forward_time": 0.11584758758544922,
      "step": 34447
    },
    {
      "epoch": 0.000210247802734375,
      "step": 34447,
      "training_step_time": 0.5236532688140869
    },
    {
      "epoch": 0.00021025390625,
      "model_forward_time": 0.11530828475952148,
      "step": 34448
    },
    {
      "epoch": 0.00021025390625,
      "step": 34448,
      "training_step_time": 0.4089384078979492
    },
    {
      "epoch": 0.000210260009765625,
      "model_forward_time": 0.11510682106018066,
      "step": 34449
    },
    {
      "epoch": 0.000210260009765625,
      "step": 34449,
      "training_step_time": 0.4333980083465576
    },
    {
      "epoch": 0.00021026611328125,
      "grad_norm": 0.14615307748317719,
      "learning_rate": 4.190621145064363e-05,
      "loss": 0.0411,
      "step": 34450
    },
    {
      "epoch": 0.00021026611328125,
      "model_forward_time": 0.11532831192016602,
      "step": 34450
    },
    {
      "epoch": 0.00021026611328125,
      "step": 34450,
      "training_step_time": 0.43471336364746094
    },
    {
      "epoch": 0.000210272216796875,
      "model_forward_time": 0.1144413948059082,
      "step": 34451
    },
    {
      "epoch": 0.000210272216796875,
      "step": 34451,
      "training_step_time": 0.39580249786376953
    },
    {
      "epoch": 0.0002102783203125,
      "model_forward_time": 0.11524248123168945,
      "step": 34452
    },
    {
      "epoch": 0.0002102783203125,
      "step": 34452,
      "training_step_time": 0.46506690979003906
    },
    {
      "epoch": 0.000210284423828125,
      "model_forward_time": 0.11535978317260742,
      "step": 34453
    },
    {
      "epoch": 0.000210284423828125,
      "step": 34453,
      "training_step_time": 0.5024521350860596
    },
    {
      "epoch": 0.00021029052734375,
      "model_forward_time": 0.11622047424316406,
      "step": 34454
    },
    {
      "epoch": 0.00021029052734375,
      "step": 34454,
      "training_step_time": 0.44712376594543457
    },
    {
      "epoch": 0.000210296630859375,
      "model_forward_time": 0.11547183990478516,
      "step": 34455
    },
    {
      "epoch": 0.000210296630859375,
      "step": 34455,
      "training_step_time": 0.46241188049316406
    },
    {
      "epoch": 0.000210302734375,
      "model_forward_time": 0.11521649360656738,
      "step": 34456
    },
    {
      "epoch": 0.000210302734375,
      "step": 34456,
      "training_step_time": 0.4041097164154053
    },
    {
      "epoch": 0.000210308837890625,
      "model_forward_time": 0.11431574821472168,
      "step": 34457
    },
    {
      "epoch": 0.000210308837890625,
      "step": 34457,
      "training_step_time": 0.3887596130371094
    },
    {
      "epoch": 0.00021031494140625,
      "model_forward_time": 0.11606764793395996,
      "step": 34458
    },
    {
      "epoch": 0.00021031494140625,
      "step": 34458,
      "training_step_time": 0.40979599952697754
    },
    {
      "epoch": 0.000210321044921875,
      "model_forward_time": 0.11489129066467285,
      "step": 34459
    },
    {
      "epoch": 0.000210321044921875,
      "step": 34459,
      "training_step_time": 0.49613404273986816
    },
    {
      "epoch": 0.0002103271484375,
      "grad_norm": 0.1658438742160797,
      "learning_rate": 4.187901830730906e-05,
      "loss": 0.0437,
      "step": 34460
    },
    {
      "epoch": 0.0002103271484375,
      "model_forward_time": 0.1143038272857666,
      "step": 34460
    },
    {
      "epoch": 0.0002103271484375,
      "step": 34460,
      "training_step_time": 0.530580997467041
    },
    {
      "epoch": 0.000210333251953125,
      "model_forward_time": 0.11468315124511719,
      "step": 34461
    },
    {
      "epoch": 0.000210333251953125,
      "step": 34461,
      "training_step_time": 0.44135117530822754
    },
    {
      "epoch": 0.00021033935546875,
      "model_forward_time": 0.11464190483093262,
      "step": 34462
    },
    {
      "epoch": 0.00021033935546875,
      "step": 34462,
      "training_step_time": 0.42995214462280273
    },
    {
      "epoch": 0.000210345458984375,
      "model_forward_time": 0.11547088623046875,
      "step": 34463
    },
    {
      "epoch": 0.000210345458984375,
      "step": 34463,
      "training_step_time": 0.3931005001068115
    },
    {
      "epoch": 0.0002103515625,
      "model_forward_time": 0.11501765251159668,
      "step": 34464
    },
    {
      "epoch": 0.0002103515625,
      "step": 34464,
      "training_step_time": 0.44165539741516113
    },
    {
      "epoch": 0.000210357666015625,
      "model_forward_time": 0.11510992050170898,
      "step": 34465
    },
    {
      "epoch": 0.000210357666015625,
      "step": 34465,
      "training_step_time": 0.4330770969390869
    },
    {
      "epoch": 0.00021036376953125,
      "model_forward_time": 0.11482119560241699,
      "step": 34466
    },
    {
      "epoch": 0.00021036376953125,
      "step": 34466,
      "training_step_time": 0.40708208084106445
    },
    {
      "epoch": 0.000210369873046875,
      "model_forward_time": 0.11508679389953613,
      "step": 34467
    },
    {
      "epoch": 0.000210369873046875,
      "step": 34467,
      "training_step_time": 0.394118070602417
    },
    {
      "epoch": 0.0002103759765625,
      "model_forward_time": 0.11475062370300293,
      "step": 34468
    },
    {
      "epoch": 0.0002103759765625,
      "step": 34468,
      "training_step_time": 0.4322490692138672
    },
    {
      "epoch": 0.000210382080078125,
      "model_forward_time": 0.11741065979003906,
      "step": 34469
    },
    {
      "epoch": 0.000210382080078125,
      "step": 34469,
      "training_step_time": 0.39701223373413086
    },
    {
      "epoch": 0.00021038818359375,
      "grad_norm": 0.12124773114919662,
      "learning_rate": 4.1851827630914305e-05,
      "loss": 0.0403,
      "step": 34470
    },
    {
      "epoch": 0.00021038818359375,
      "model_forward_time": 0.11465620994567871,
      "step": 34470
    },
    {
      "epoch": 0.00021038818359375,
      "step": 34470,
      "training_step_time": 0.393801212310791
    },
    {
      "epoch": 0.000210394287109375,
      "model_forward_time": 0.11486530303955078,
      "step": 34471
    },
    {
      "epoch": 0.000210394287109375,
      "step": 34471,
      "training_step_time": 0.5597214698791504
    },
    {
      "epoch": 0.000210400390625,
      "model_forward_time": 0.11499285697937012,
      "step": 34472
    },
    {
      "epoch": 0.000210400390625,
      "step": 34472,
      "training_step_time": 0.39727210998535156
    },
    {
      "epoch": 0.000210406494140625,
      "model_forward_time": 0.11471724510192871,
      "step": 34473
    },
    {
      "epoch": 0.000210406494140625,
      "step": 34473,
      "training_step_time": 0.39050984382629395
    },
    {
      "epoch": 0.00021041259765625,
      "model_forward_time": 0.11470818519592285,
      "step": 34474
    },
    {
      "epoch": 0.00021041259765625,
      "step": 34474,
      "training_step_time": 0.38849568367004395
    },
    {
      "epoch": 0.000210418701171875,
      "model_forward_time": 0.11450839042663574,
      "step": 34475
    },
    {
      "epoch": 0.000210418701171875,
      "step": 34475,
      "training_step_time": 0.4071378707885742
    },
    {
      "epoch": 0.0002104248046875,
      "model_forward_time": 0.11473703384399414,
      "step": 34476
    },
    {
      "epoch": 0.0002104248046875,
      "step": 34476,
      "training_step_time": 0.3977670669555664
    },
    {
      "epoch": 0.000210430908203125,
      "model_forward_time": 0.11586809158325195,
      "step": 34477
    },
    {
      "epoch": 0.000210430908203125,
      "step": 34477,
      "training_step_time": 0.591803789138794
    },
    {
      "epoch": 0.00021043701171875,
      "model_forward_time": 0.11480545997619629,
      "step": 34478
    },
    {
      "epoch": 0.00021043701171875,
      "step": 34478,
      "training_step_time": 0.3999826908111572
    },
    {
      "epoch": 0.000210443115234375,
      "model_forward_time": 0.11501836776733398,
      "step": 34479
    },
    {
      "epoch": 0.000210443115234375,
      "step": 34479,
      "training_step_time": 0.39481544494628906
    },
    {
      "epoch": 0.00021044921875,
      "grad_norm": 0.15084198117256165,
      "learning_rate": 4.18246394297192e-05,
      "loss": 0.0406,
      "step": 34480
    },
    {
      "epoch": 0.00021044921875,
      "model_forward_time": 0.11472535133361816,
      "step": 34480
    },
    {
      "epoch": 0.00021044921875,
      "step": 34480,
      "training_step_time": 0.4029114246368408
    },
    {
      "epoch": 0.000210455322265625,
      "model_forward_time": 0.11490774154663086,
      "step": 34481
    },
    {
      "epoch": 0.000210455322265625,
      "step": 34481,
      "training_step_time": 0.4390726089477539
    },
    {
      "epoch": 0.00021046142578125,
      "model_forward_time": 0.11446642875671387,
      "step": 34482
    },
    {
      "epoch": 0.00021046142578125,
      "step": 34482,
      "training_step_time": 0.3653852939605713
    },
    {
      "epoch": 0.000210467529296875,
      "model_forward_time": 0.11954545974731445,
      "step": 34483
    },
    {
      "epoch": 0.000210467529296875,
      "step": 34483,
      "training_step_time": 0.4773116111755371
    },
    {
      "epoch": 0.0002104736328125,
      "model_forward_time": 0.11807751655578613,
      "step": 34484
    },
    {
      "epoch": 0.0002104736328125,
      "step": 34484,
      "training_step_time": 0.3850083351135254
    },
    {
      "epoch": 0.000210479736328125,
      "model_forward_time": 0.11825156211853027,
      "step": 34485
    },
    {
      "epoch": 0.000210479736328125,
      "step": 34485,
      "training_step_time": 0.3804619312286377
    },
    {
      "epoch": 0.00021048583984375,
      "model_forward_time": 0.11757993698120117,
      "step": 34486
    },
    {
      "epoch": 0.00021048583984375,
      "step": 34486,
      "training_step_time": 0.37656283378601074
    },
    {
      "epoch": 0.000210491943359375,
      "model_forward_time": 0.11850333213806152,
      "step": 34487
    },
    {
      "epoch": 0.000210491943359375,
      "step": 34487,
      "training_step_time": 0.3856778144836426
    },
    {
      "epoch": 0.000210498046875,
      "model_forward_time": 0.1179358959197998,
      "step": 34488
    },
    {
      "epoch": 0.000210498046875,
      "step": 34488,
      "training_step_time": 0.37813615798950195
    },
    {
      "epoch": 0.000210504150390625,
      "model_forward_time": 0.11877012252807617,
      "step": 34489
    },
    {
      "epoch": 0.000210504150390625,
      "step": 34489,
      "training_step_time": 0.534355640411377
    },
    {
      "epoch": 0.00021051025390625,
      "grad_norm": 0.10336373001337051,
      "learning_rate": 4.179745371198276e-05,
      "loss": 0.0446,
      "step": 34490
    },
    {
      "epoch": 0.00021051025390625,
      "model_forward_time": 0.11782956123352051,
      "step": 34490
    },
    {
      "epoch": 0.00021051025390625,
      "step": 34490,
      "training_step_time": 0.38558506965637207
    },
    {
      "epoch": 0.000210516357421875,
      "model_forward_time": 0.11789107322692871,
      "step": 34491
    },
    {
      "epoch": 0.000210516357421875,
      "step": 34491,
      "training_step_time": 0.4431900978088379
    },
    {
      "epoch": 0.0002105224609375,
      "model_forward_time": 0.11858105659484863,
      "step": 34492
    },
    {
      "epoch": 0.0002105224609375,
      "step": 34492,
      "training_step_time": 0.37796998023986816
    },
    {
      "epoch": 0.000210528564453125,
      "model_forward_time": 0.11531543731689453,
      "step": 34493
    },
    {
      "epoch": 0.000210528564453125,
      "step": 34493,
      "training_step_time": 0.3849780559539795
    },
    {
      "epoch": 0.00021053466796875,
      "model_forward_time": 0.11528968811035156,
      "step": 34494
    },
    {
      "epoch": 0.00021053466796875,
      "step": 34494,
      "training_step_time": 0.45149946212768555
    },
    {
      "epoch": 0.000210540771484375,
      "model_forward_time": 0.11565327644348145,
      "step": 34495
    },
    {
      "epoch": 0.000210540771484375,
      "step": 34495,
      "training_step_time": 0.6137170791625977
    },
    {
      "epoch": 0.000210546875,
      "model_forward_time": 0.11520814895629883,
      "step": 34496
    },
    {
      "epoch": 0.000210546875,
      "step": 34496,
      "training_step_time": 0.4254453182220459
    },
    {
      "epoch": 0.000210552978515625,
      "model_forward_time": 0.11528682708740234,
      "step": 34497
    },
    {
      "epoch": 0.000210552978515625,
      "step": 34497,
      "training_step_time": 0.4089345932006836
    },
    {
      "epoch": 0.00021055908203125,
      "model_forward_time": 0.11513447761535645,
      "step": 34498
    },
    {
      "epoch": 0.00021055908203125,
      "step": 34498,
      "training_step_time": 0.39614343643188477
    },
    {
      "epoch": 0.000210565185546875,
      "model_forward_time": 0.11484766006469727,
      "step": 34499
    },
    {
      "epoch": 0.000210565185546875,
      "step": 34499,
      "training_step_time": 0.381742000579834
    },
    {
      "epoch": 0.0002105712890625,
      "grad_norm": 0.12371746450662613,
      "learning_rate": 4.17702704859633e-05,
      "loss": 0.0373,
      "step": 34500
    },
    {
      "epoch": 0.0002105712890625,
      "model_forward_time": 0.11528587341308594,
      "step": 34500
    },
    {
      "epoch": 0.0002105712890625,
      "step": 34500,
      "training_step_time": 0.39635777473449707
    },
    {
      "epoch": 0.000210577392578125,
      "model_forward_time": 0.1153113842010498,
      "step": 34501
    },
    {
      "epoch": 0.000210577392578125,
      "step": 34501,
      "training_step_time": 0.5126745700836182
    },
    {
      "epoch": 0.00021058349609375,
      "model_forward_time": 0.11538219451904297,
      "step": 34502
    },
    {
      "epoch": 0.00021058349609375,
      "step": 34502,
      "training_step_time": 0.3823399543762207
    },
    {
      "epoch": 0.000210589599609375,
      "model_forward_time": 0.11586833000183105,
      "step": 34503
    },
    {
      "epoch": 0.000210589599609375,
      "step": 34503,
      "training_step_time": 0.565861701965332
    },
    {
      "epoch": 0.000210595703125,
      "model_forward_time": 0.11547136306762695,
      "step": 34504
    },
    {
      "epoch": 0.000210595703125,
      "step": 34504,
      "training_step_time": 0.4360771179199219
    },
    {
      "epoch": 0.000210601806640625,
      "model_forward_time": 0.11483454704284668,
      "step": 34505
    },
    {
      "epoch": 0.000210601806640625,
      "step": 34505,
      "training_step_time": 0.4852168560028076
    },
    {
      "epoch": 0.00021060791015625,
      "model_forward_time": 0.11506891250610352,
      "step": 34506
    },
    {
      "epoch": 0.00021060791015625,
      "step": 34506,
      "training_step_time": 0.3867182731628418
    },
    {
      "epoch": 0.000210614013671875,
      "model_forward_time": 0.1150364875793457,
      "step": 34507
    },
    {
      "epoch": 0.000210614013671875,
      "step": 34507,
      "training_step_time": 0.4411182403564453
    },
    {
      "epoch": 0.0002106201171875,
      "model_forward_time": 0.11492013931274414,
      "step": 34508
    },
    {
      "epoch": 0.0002106201171875,
      "step": 34508,
      "training_step_time": 0.45558714866638184
    },
    {
      "epoch": 0.000210626220703125,
      "model_forward_time": 0.11549210548400879,
      "step": 34509
    },
    {
      "epoch": 0.000210626220703125,
      "step": 34509,
      "training_step_time": 0.7443509101867676
    },
    {
      "epoch": 0.00021063232421875,
      "grad_norm": 0.17123065888881683,
      "learning_rate": 4.1743089759918394e-05,
      "loss": 0.0408,
      "step": 34510
    },
    {
      "epoch": 0.00021063232421875,
      "model_forward_time": 0.11490893363952637,
      "step": 34510
    },
    {
      "epoch": 0.00021063232421875,
      "step": 34510,
      "training_step_time": 0.3933706283569336
    },
    {
      "epoch": 0.000210638427734375,
      "model_forward_time": 0.11423778533935547,
      "step": 34511
    },
    {
      "epoch": 0.000210638427734375,
      "step": 34511,
      "training_step_time": 0.415130615234375
    },
    {
      "epoch": 0.00021064453125,
      "model_forward_time": 0.11503314971923828,
      "step": 34512
    },
    {
      "epoch": 0.00021064453125,
      "step": 34512,
      "training_step_time": 0.39802026748657227
    },
    {
      "epoch": 0.000210650634765625,
      "model_forward_time": 0.1139683723449707,
      "step": 34513
    },
    {
      "epoch": 0.000210650634765625,
      "step": 34513,
      "training_step_time": 0.39177989959716797
    },
    {
      "epoch": 0.00021065673828125,
      "model_forward_time": 0.1141507625579834,
      "step": 34514
    },
    {
      "epoch": 0.00021065673828125,
      "step": 34514,
      "training_step_time": 0.38605284690856934
    },
    {
      "epoch": 0.000210662841796875,
      "model_forward_time": 0.11534810066223145,
      "step": 34515
    },
    {
      "epoch": 0.000210662841796875,
      "step": 34515,
      "training_step_time": 0.5383195877075195
    },
    {
      "epoch": 0.0002106689453125,
      "model_forward_time": 0.11519956588745117,
      "step": 34516
    },
    {
      "epoch": 0.0002106689453125,
      "step": 34516,
      "training_step_time": 0.3894956111907959
    },
    {
      "epoch": 0.000210675048828125,
      "model_forward_time": 0.11603546142578125,
      "step": 34517
    },
    {
      "epoch": 0.000210675048828125,
      "step": 34517,
      "training_step_time": 0.4213864803314209
    },
    {
      "epoch": 0.00021068115234375,
      "model_forward_time": 0.11576724052429199,
      "step": 34518
    },
    {
      "epoch": 0.00021068115234375,
      "step": 34518,
      "training_step_time": 0.49860453605651855
    },
    {
      "epoch": 0.000210687255859375,
      "model_forward_time": 0.11464977264404297,
      "step": 34519
    },
    {
      "epoch": 0.000210687255859375,
      "step": 34519,
      "training_step_time": 0.39604759216308594
    },
    {
      "epoch": 0.000210693359375,
      "grad_norm": 0.10201361030340195,
      "learning_rate": 4.171591154210479e-05,
      "loss": 0.0398,
      "step": 34520
    },
    {
      "epoch": 0.000210693359375,
      "model_forward_time": 0.11445260047912598,
      "step": 34520
    },
    {
      "epoch": 0.000210693359375,
      "step": 34520,
      "training_step_time": 0.4556388854980469
    },
    {
      "epoch": 0.000210699462890625,
      "model_forward_time": 0.11541509628295898,
      "step": 34521
    },
    {
      "epoch": 0.000210699462890625,
      "step": 34521,
      "training_step_time": 0.4525763988494873
    },
    {
      "epoch": 0.00021070556640625,
      "model_forward_time": 0.11447715759277344,
      "step": 34522
    },
    {
      "epoch": 0.00021070556640625,
      "step": 34522,
      "training_step_time": 0.4012022018432617
    },
    {
      "epoch": 0.000210711669921875,
      "model_forward_time": 0.11467647552490234,
      "step": 34523
    },
    {
      "epoch": 0.000210711669921875,
      "step": 34523,
      "training_step_time": 0.427736759185791
    },
    {
      "epoch": 0.0002107177734375,
      "model_forward_time": 0.11516952514648438,
      "step": 34524
    },
    {
      "epoch": 0.0002107177734375,
      "step": 34524,
      "training_step_time": 0.3938572406768799
    },
    {
      "epoch": 0.000210723876953125,
      "model_forward_time": 0.1152348518371582,
      "step": 34525
    },
    {
      "epoch": 0.000210723876953125,
      "step": 34525,
      "training_step_time": 0.4160149097442627
    },
    {
      "epoch": 0.00021072998046875,
      "model_forward_time": 0.11760306358337402,
      "step": 34526
    },
    {
      "epoch": 0.00021072998046875,
      "step": 34526,
      "training_step_time": 0.40408778190612793
    },
    {
      "epoch": 0.000210736083984375,
      "model_forward_time": 0.11857199668884277,
      "step": 34527
    },
    {
      "epoch": 0.000210736083984375,
      "step": 34527,
      "training_step_time": 0.38542842864990234
    },
    {
      "epoch": 0.0002107421875,
      "model_forward_time": 0.12071490287780762,
      "step": 34528
    },
    {
      "epoch": 0.0002107421875,
      "step": 34528,
      "training_step_time": 0.38815832138061523
    },
    {
      "epoch": 0.000210748291015625,
      "model_forward_time": 0.11568474769592285,
      "step": 34529
    },
    {
      "epoch": 0.000210748291015625,
      "step": 34529,
      "training_step_time": 0.4079713821411133
    },
    {
      "epoch": 0.00021075439453125,
      "grad_norm": 0.20787042379379272,
      "learning_rate": 4.1688735840778546e-05,
      "loss": 0.0393,
      "step": 34530
    },
    {
      "epoch": 0.00021075439453125,
      "model_forward_time": 0.11740708351135254,
      "step": 34530
    },
    {
      "epoch": 0.00021075439453125,
      "step": 34530,
      "training_step_time": 0.39493799209594727
    },
    {
      "epoch": 0.000210760498046875,
      "model_forward_time": 0.11563491821289062,
      "step": 34531
    },
    {
      "epoch": 0.000210760498046875,
      "step": 34531,
      "training_step_time": 0.4614722728729248
    },
    {
      "epoch": 0.0002107666015625,
      "model_forward_time": 0.11546778678894043,
      "step": 34532
    },
    {
      "epoch": 0.0002107666015625,
      "step": 34532,
      "training_step_time": 0.42531776428222656
    },
    {
      "epoch": 0.000210772705078125,
      "model_forward_time": 0.11596012115478516,
      "step": 34533
    },
    {
      "epoch": 0.000210772705078125,
      "step": 34533,
      "training_step_time": 0.4903419017791748
    },
    {
      "epoch": 0.00021077880859375,
      "model_forward_time": 0.11582779884338379,
      "step": 34534
    },
    {
      "epoch": 0.00021077880859375,
      "step": 34534,
      "training_step_time": 0.4050283432006836
    },
    {
      "epoch": 0.000210784912109375,
      "model_forward_time": 0.11497282981872559,
      "step": 34535
    },
    {
      "epoch": 0.000210784912109375,
      "step": 34535,
      "training_step_time": 0.4302518367767334
    },
    {
      "epoch": 0.000210791015625,
      "model_forward_time": 0.11713457107543945,
      "step": 34536
    },
    {
      "epoch": 0.000210791015625,
      "step": 34536,
      "training_step_time": 0.44031381607055664
    },
    {
      "epoch": 0.000210797119140625,
      "model_forward_time": 0.11530351638793945,
      "step": 34537
    },
    {
      "epoch": 0.000210797119140625,
      "step": 34537,
      "training_step_time": 0.40419960021972656
    },
    {
      "epoch": 0.00021080322265625,
      "model_forward_time": 0.11527347564697266,
      "step": 34538
    },
    {
      "epoch": 0.00021080322265625,
      "step": 34538,
      "training_step_time": 0.3984408378601074
    },
    {
      "epoch": 0.000210809326171875,
      "model_forward_time": 0.11540436744689941,
      "step": 34539
    },
    {
      "epoch": 0.000210809326171875,
      "step": 34539,
      "training_step_time": 0.48684072494506836
    },
    {
      "epoch": 0.0002108154296875,
      "grad_norm": 0.10773158073425293,
      "learning_rate": 4.166156266419489e-05,
      "loss": 0.0349,
      "step": 34540
    },
    {
      "epoch": 0.0002108154296875,
      "model_forward_time": 0.11509013175964355,
      "step": 34540
    },
    {
      "epoch": 0.0002108154296875,
      "step": 34540,
      "training_step_time": 0.4986412525177002
    },
    {
      "epoch": 0.000210821533203125,
      "model_forward_time": 0.11532711982727051,
      "step": 34541
    },
    {
      "epoch": 0.000210821533203125,
      "step": 34541,
      "training_step_time": 0.39771270751953125
    },
    {
      "epoch": 0.00021082763671875,
      "model_forward_time": 0.1154019832611084,
      "step": 34542
    },
    {
      "epoch": 0.00021082763671875,
      "step": 34542,
      "training_step_time": 0.3963761329650879
    },
    {
      "epoch": 0.000210833740234375,
      "model_forward_time": 0.11479425430297852,
      "step": 34543
    },
    {
      "epoch": 0.000210833740234375,
      "step": 34543,
      "training_step_time": 0.40754270553588867
    },
    {
      "epoch": 0.00021083984375,
      "model_forward_time": 0.11531877517700195,
      "step": 34544
    },
    {
      "epoch": 0.00021083984375,
      "step": 34544,
      "training_step_time": 0.38850831985473633
    },
    {
      "epoch": 0.000210845947265625,
      "model_forward_time": 0.11493825912475586,
      "step": 34545
    },
    {
      "epoch": 0.000210845947265625,
      "step": 34545,
      "training_step_time": 0.39254140853881836
    },
    {
      "epoch": 0.00021085205078125,
      "model_forward_time": 0.1164093017578125,
      "step": 34546
    },
    {
      "epoch": 0.00021085205078125,
      "step": 34546,
      "training_step_time": 0.41716909408569336
    },
    {
      "epoch": 0.000210858154296875,
      "model_forward_time": 0.11588525772094727,
      "step": 34547
    },
    {
      "epoch": 0.000210858154296875,
      "step": 34547,
      "training_step_time": 0.49667882919311523
    },
    {
      "epoch": 0.0002108642578125,
      "model_forward_time": 0.1149444580078125,
      "step": 34548
    },
    {
      "epoch": 0.0002108642578125,
      "step": 34548,
      "training_step_time": 0.39425063133239746
    },
    {
      "epoch": 0.000210870361328125,
      "model_forward_time": 0.11534309387207031,
      "step": 34549
    },
    {
      "epoch": 0.000210870361328125,
      "step": 34549,
      "training_step_time": 0.39763879776000977
    },
    {
      "epoch": 0.00021087646484375,
      "grad_norm": 0.15104219317436218,
      "learning_rate": 4.163439202060833e-05,
      "loss": 0.0419,
      "step": 34550
    },
    {
      "epoch": 0.00021087646484375,
      "model_forward_time": 0.11480021476745605,
      "step": 34550
    },
    {
      "epoch": 0.00021087646484375,
      "step": 34550,
      "training_step_time": 0.42287230491638184
    },
    {
      "epoch": 0.000210882568359375,
      "model_forward_time": 0.11539626121520996,
      "step": 34551
    },
    {
      "epoch": 0.000210882568359375,
      "step": 34551,
      "training_step_time": 0.41397595405578613
    },
    {
      "epoch": 0.000210888671875,
      "model_forward_time": 0.11705279350280762,
      "step": 34552
    },
    {
      "epoch": 0.000210888671875,
      "step": 34552,
      "training_step_time": 0.4325242042541504
    },
    {
      "epoch": 0.000210894775390625,
      "model_forward_time": 0.11546850204467773,
      "step": 34553
    },
    {
      "epoch": 0.000210894775390625,
      "step": 34553,
      "training_step_time": 0.3683292865753174
    },
    {
      "epoch": 0.00021090087890625,
      "model_forward_time": 0.11613941192626953,
      "step": 34554
    },
    {
      "epoch": 0.00021090087890625,
      "step": 34554,
      "training_step_time": 0.45188307762145996
    },
    {
      "epoch": 0.000210906982421875,
      "model_forward_time": 0.11534285545349121,
      "step": 34555
    },
    {
      "epoch": 0.000210906982421875,
      "step": 34555,
      "training_step_time": 0.41158628463745117
    },
    {
      "epoch": 0.0002109130859375,
      "model_forward_time": 0.11524844169616699,
      "step": 34556
    },
    {
      "epoch": 0.0002109130859375,
      "step": 34556,
      "training_step_time": 0.39169883728027344
    },
    {
      "epoch": 0.000210919189453125,
      "model_forward_time": 0.11573052406311035,
      "step": 34557
    },
    {
      "epoch": 0.000210919189453125,
      "step": 34557,
      "training_step_time": 0.38071107864379883
    },
    {
      "epoch": 0.00021092529296875,
      "model_forward_time": 0.11588072776794434,
      "step": 34558
    },
    {
      "epoch": 0.00021092529296875,
      "step": 34558,
      "training_step_time": 0.3918125629425049
    },
    {
      "epoch": 0.000210931396484375,
      "model_forward_time": 0.1152808666229248,
      "step": 34559
    },
    {
      "epoch": 0.000210931396484375,
      "step": 34559,
      "training_step_time": 0.38680553436279297
    },
    {
      "epoch": 0.0002109375,
      "grad_norm": 0.10151467472314835,
      "learning_rate": 4.160722391827262e-05,
      "loss": 0.0407,
      "step": 34560
    },
    {
      "epoch": 0.0002109375,
      "model_forward_time": 0.11639165878295898,
      "step": 34560
    },
    {
      "epoch": 0.0002109375,
      "step": 34560,
      "training_step_time": 0.4503147602081299
    },
    {
      "epoch": 0.000210943603515625,
      "model_forward_time": 0.11664628982543945,
      "step": 34561
    },
    {
      "epoch": 0.000210943603515625,
      "step": 34561,
      "training_step_time": 0.44025206565856934
    },
    {
      "epoch": 0.00021094970703125,
      "model_forward_time": 0.11542320251464844,
      "step": 34562
    },
    {
      "epoch": 0.00021094970703125,
      "step": 34562,
      "training_step_time": 0.4711008071899414
    },
    {
      "epoch": 0.000210955810546875,
      "model_forward_time": 0.11554265022277832,
      "step": 34563
    },
    {
      "epoch": 0.000210955810546875,
      "step": 34563,
      "training_step_time": 0.39286231994628906
    },
    {
      "epoch": 0.0002109619140625,
      "model_forward_time": 0.11492323875427246,
      "step": 34564
    },
    {
      "epoch": 0.0002109619140625,
      "step": 34564,
      "training_step_time": 0.39037418365478516
    },
    {
      "epoch": 0.000210968017578125,
      "model_forward_time": 0.11500668525695801,
      "step": 34565
    },
    {
      "epoch": 0.000210968017578125,
      "step": 34565,
      "training_step_time": 0.4624166488647461
    },
    {
      "epoch": 0.00021097412109375,
      "model_forward_time": 0.11527252197265625,
      "step": 34566
    },
    {
      "epoch": 0.00021097412109375,
      "step": 34566,
      "training_step_time": 0.47528553009033203
    },
    {
      "epoch": 0.000210980224609375,
      "model_forward_time": 0.11536359786987305,
      "step": 34567
    },
    {
      "epoch": 0.000210980224609375,
      "step": 34567,
      "training_step_time": 0.389967679977417
    },
    {
      "epoch": 0.000210986328125,
      "model_forward_time": 0.11579275131225586,
      "step": 34568
    },
    {
      "epoch": 0.000210986328125,
      "step": 34568,
      "training_step_time": 0.4507942199707031
    },
    {
      "epoch": 0.000210992431640625,
      "model_forward_time": 0.11615324020385742,
      "step": 34569
    },
    {
      "epoch": 0.000210992431640625,
      "step": 34569,
      "training_step_time": 0.3906271457672119
    },
    {
      "epoch": 0.00021099853515625,
      "grad_norm": 0.12148237228393555,
      "learning_rate": 4.158005836544066e-05,
      "loss": 0.0353,
      "step": 34570
    },
    {
      "epoch": 0.00021099853515625,
      "model_forward_time": 0.11586833000183105,
      "step": 34570
    },
    {
      "epoch": 0.00021099853515625,
      "step": 34570,
      "training_step_time": 0.40474486351013184
    },
    {
      "epoch": 0.000211004638671875,
      "model_forward_time": 0.11607956886291504,
      "step": 34571
    },
    {
      "epoch": 0.000211004638671875,
      "step": 34571,
      "training_step_time": 0.3853774070739746
    },
    {
      "epoch": 0.0002110107421875,
      "model_forward_time": 0.11558699607849121,
      "step": 34572
    },
    {
      "epoch": 0.0002110107421875,
      "step": 34572,
      "training_step_time": 0.39472484588623047
    },
    {
      "epoch": 0.000211016845703125,
      "model_forward_time": 0.11583924293518066,
      "step": 34573
    },
    {
      "epoch": 0.000211016845703125,
      "step": 34573,
      "training_step_time": 0.40434718132019043
    },
    {
      "epoch": 0.00021102294921875,
      "model_forward_time": 0.11517977714538574,
      "step": 34574
    },
    {
      "epoch": 0.00021102294921875,
      "step": 34574,
      "training_step_time": 0.41353917121887207
    },
    {
      "epoch": 0.000211029052734375,
      "model_forward_time": 0.1161806583404541,
      "step": 34575
    },
    {
      "epoch": 0.000211029052734375,
      "step": 34575,
      "training_step_time": 0.4496469497680664
    },
    {
      "epoch": 0.00021103515625,
      "model_forward_time": 0.11667013168334961,
      "step": 34576
    },
    {
      "epoch": 0.00021103515625,
      "step": 34576,
      "training_step_time": 0.503852367401123
    },
    {
      "epoch": 0.000211041259765625,
      "model_forward_time": 0.1151118278503418,
      "step": 34577
    },
    {
      "epoch": 0.000211041259765625,
      "step": 34577,
      "training_step_time": 0.39438462257385254
    },
    {
      "epoch": 0.00021104736328125,
      "model_forward_time": 0.11586260795593262,
      "step": 34578
    },
    {
      "epoch": 0.00021104736328125,
      "step": 34578,
      "training_step_time": 0.39783644676208496
    },
    {
      "epoch": 0.000211053466796875,
      "model_forward_time": 0.1152353286743164,
      "step": 34579
    },
    {
      "epoch": 0.000211053466796875,
      "step": 34579,
      "training_step_time": 0.40025925636291504
    },
    {
      "epoch": 0.0002110595703125,
      "grad_norm": 0.0965229868888855,
      "learning_rate": 4.155289537036466e-05,
      "loss": 0.0385,
      "step": 34580
    },
    {
      "epoch": 0.0002110595703125,
      "model_forward_time": 0.115509033203125,
      "step": 34580
    },
    {
      "epoch": 0.0002110595703125,
      "step": 34580,
      "training_step_time": 0.41211795806884766
    },
    {
      "epoch": 0.000211065673828125,
      "model_forward_time": 0.11539506912231445,
      "step": 34581
    },
    {
      "epoch": 0.000211065673828125,
      "step": 34581,
      "training_step_time": 0.4940669536590576
    },
    {
      "epoch": 0.00021107177734375,
      "model_forward_time": 0.1160426139831543,
      "step": 34582
    },
    {
      "epoch": 0.00021107177734375,
      "step": 34582,
      "training_step_time": 0.3673396110534668
    },
    {
      "epoch": 0.000211077880859375,
      "model_forward_time": 0.11516451835632324,
      "step": 34583
    },
    {
      "epoch": 0.000211077880859375,
      "step": 34583,
      "training_step_time": 0.4573338031768799
    },
    {
      "epoch": 0.000211083984375,
      "model_forward_time": 0.11519360542297363,
      "step": 34584
    },
    {
      "epoch": 0.000211083984375,
      "step": 34584,
      "training_step_time": 0.43999314308166504
    },
    {
      "epoch": 0.000211090087890625,
      "model_forward_time": 0.1157224178314209,
      "step": 34585
    },
    {
      "epoch": 0.000211090087890625,
      "step": 34585,
      "training_step_time": 0.38190245628356934
    },
    {
      "epoch": 0.00021109619140625,
      "model_forward_time": 0.11489605903625488,
      "step": 34586
    },
    {
      "epoch": 0.00021109619140625,
      "step": 34586,
      "training_step_time": 0.4062509536743164
    },
    {
      "epoch": 0.000211102294921875,
      "model_forward_time": 0.11799001693725586,
      "step": 34587
    },
    {
      "epoch": 0.000211102294921875,
      "step": 34587,
      "training_step_time": 0.39681220054626465
    },
    {
      "epoch": 0.0002111083984375,
      "model_forward_time": 0.11501336097717285,
      "step": 34588
    },
    {
      "epoch": 0.0002111083984375,
      "step": 34588,
      "training_step_time": 0.45804262161254883
    },
    {
      "epoch": 0.000211114501953125,
      "model_forward_time": 0.11542272567749023,
      "step": 34589
    },
    {
      "epoch": 0.000211114501953125,
      "step": 34589,
      "training_step_time": 0.39830946922302246
    },
    {
      "epoch": 0.00021112060546875,
      "grad_norm": 0.16988974809646606,
      "learning_rate": 4.1525734941296026e-05,
      "loss": 0.0428,
      "step": 34590
    },
    {
      "epoch": 0.00021112060546875,
      "model_forward_time": 0.1154170036315918,
      "step": 34590
    },
    {
      "epoch": 0.00021112060546875,
      "step": 34590,
      "training_step_time": 0.39606261253356934
    },
    {
      "epoch": 0.000211126708984375,
      "model_forward_time": 0.11558389663696289,
      "step": 34591
    },
    {
      "epoch": 0.000211126708984375,
      "step": 34591,
      "training_step_time": 0.40869712829589844
    },
    {
      "epoch": 0.0002111328125,
      "model_forward_time": 0.1150979995727539,
      "step": 34592
    },
    {
      "epoch": 0.0002111328125,
      "step": 34592,
      "training_step_time": 0.3890712261199951
    },
    {
      "epoch": 0.000211138916015625,
      "model_forward_time": 0.11548900604248047,
      "step": 34593
    },
    {
      "epoch": 0.000211138916015625,
      "step": 34593,
      "training_step_time": 0.39856481552124023
    },
    {
      "epoch": 0.00021114501953125,
      "model_forward_time": 0.11586856842041016,
      "step": 34594
    },
    {
      "epoch": 0.00021114501953125,
      "step": 34594,
      "training_step_time": 0.40314817428588867
    },
    {
      "epoch": 0.000211151123046875,
      "model_forward_time": 0.11540794372558594,
      "step": 34595
    },
    {
      "epoch": 0.000211151123046875,
      "step": 34595,
      "training_step_time": 0.44762420654296875
    },
    {
      "epoch": 0.0002111572265625,
      "model_forward_time": 0.11576724052429199,
      "step": 34596
    },
    {
      "epoch": 0.0002111572265625,
      "step": 34596,
      "training_step_time": 0.4378480911254883
    },
    {
      "epoch": 0.000211163330078125,
      "model_forward_time": 0.11696100234985352,
      "step": 34597
    },
    {
      "epoch": 0.000211163330078125,
      "step": 34597,
      "training_step_time": 0.4050006866455078
    },
    {
      "epoch": 0.00021116943359375,
      "model_forward_time": 0.11621856689453125,
      "step": 34598
    },
    {
      "epoch": 0.00021116943359375,
      "step": 34598,
      "training_step_time": 0.49404478073120117
    },
    {
      "epoch": 0.000211175537109375,
      "model_forward_time": 0.1168527603149414,
      "step": 34599
    },
    {
      "epoch": 0.000211175537109375,
      "step": 34599,
      "training_step_time": 0.4015223979949951
    },
    {
      "epoch": 0.000211181640625,
      "grad_norm": 0.10483088344335556,
      "learning_rate": 4.149857708648536e-05,
      "loss": 0.0387,
      "step": 34600
    },
    {
      "epoch": 0.000211181640625,
      "model_forward_time": 0.11734819412231445,
      "step": 34600
    },
    {
      "epoch": 0.000211181640625,
      "step": 34600,
      "training_step_time": 0.39827537536621094
    },
    {
      "epoch": 0.000211187744140625,
      "model_forward_time": 0.11826300621032715,
      "step": 34601
    },
    {
      "epoch": 0.000211187744140625,
      "step": 34601,
      "training_step_time": 0.39936017990112305
    },
    {
      "epoch": 0.00021119384765625,
      "model_forward_time": 0.12168765068054199,
      "step": 34602
    },
    {
      "epoch": 0.00021119384765625,
      "step": 34602,
      "training_step_time": 0.38809919357299805
    },
    {
      "epoch": 0.000211199951171875,
      "model_forward_time": 0.11813831329345703,
      "step": 34603
    },
    {
      "epoch": 0.000211199951171875,
      "step": 34603,
      "training_step_time": 0.39870738983154297
    },
    {
      "epoch": 0.0002112060546875,
      "model_forward_time": 0.11875486373901367,
      "step": 34604
    },
    {
      "epoch": 0.0002112060546875,
      "step": 34604,
      "training_step_time": 0.38705992698669434
    },
    {
      "epoch": 0.000211212158203125,
      "model_forward_time": 0.11839938163757324,
      "step": 34605
    },
    {
      "epoch": 0.000211212158203125,
      "step": 34605,
      "training_step_time": 0.4584157466888428
    },
    {
      "epoch": 0.00021121826171875,
      "model_forward_time": 0.11510038375854492,
      "step": 34606
    },
    {
      "epoch": 0.00021121826171875,
      "step": 34606,
      "training_step_time": 0.3984992504119873
    },
    {
      "epoch": 0.000211224365234375,
      "model_forward_time": 0.11633181571960449,
      "step": 34607
    },
    {
      "epoch": 0.000211224365234375,
      "step": 34607,
      "training_step_time": 0.39316487312316895
    },
    {
      "epoch": 0.00021123046875,
      "model_forward_time": 0.11498785018920898,
      "step": 34608
    },
    {
      "epoch": 0.00021123046875,
      "step": 34608,
      "training_step_time": 0.39298152923583984
    },
    {
      "epoch": 0.000211236572265625,
      "model_forward_time": 0.11586809158325195,
      "step": 34609
    },
    {
      "epoch": 0.000211236572265625,
      "step": 34609,
      "training_step_time": 0.39701032638549805
    },
    {
      "epoch": 0.00021124267578125,
      "grad_norm": 0.09435476362705231,
      "learning_rate": 4.147142181418249e-05,
      "loss": 0.0382,
      "step": 34610
    },
    {
      "epoch": 0.00021124267578125,
      "model_forward_time": 0.11530423164367676,
      "step": 34610
    },
    {
      "epoch": 0.00021124267578125,
      "step": 34610,
      "training_step_time": 0.40598297119140625
    },
    {
      "epoch": 0.000211248779296875,
      "model_forward_time": 0.11574745178222656,
      "step": 34611
    },
    {
      "epoch": 0.000211248779296875,
      "step": 34611,
      "training_step_time": 0.4349334239959717
    },
    {
      "epoch": 0.0002112548828125,
      "model_forward_time": 0.11652827262878418,
      "step": 34612
    },
    {
      "epoch": 0.0002112548828125,
      "step": 34612,
      "training_step_time": 0.39169836044311523
    },
    {
      "epoch": 0.000211260986328125,
      "model_forward_time": 0.11708521842956543,
      "step": 34613
    },
    {
      "epoch": 0.000211260986328125,
      "step": 34613,
      "training_step_time": 0.4919438362121582
    },
    {
      "epoch": 0.00021126708984375,
      "model_forward_time": 0.1159212589263916,
      "step": 34614
    },
    {
      "epoch": 0.00021126708984375,
      "step": 34614,
      "training_step_time": 0.43538641929626465
    },
    {
      "epoch": 0.000211273193359375,
      "model_forward_time": 0.1159968376159668,
      "step": 34615
    },
    {
      "epoch": 0.000211273193359375,
      "step": 34615,
      "training_step_time": 0.4088613986968994
    },
    {
      "epoch": 0.000211279296875,
      "model_forward_time": 0.11505794525146484,
      "step": 34616
    },
    {
      "epoch": 0.000211279296875,
      "step": 34616,
      "training_step_time": 0.39818453788757324
    },
    {
      "epoch": 0.000211285400390625,
      "model_forward_time": 0.11523318290710449,
      "step": 34617
    },
    {
      "epoch": 0.000211285400390625,
      "step": 34617,
      "training_step_time": 0.394805908203125
    },
    {
      "epoch": 0.00021129150390625,
      "model_forward_time": 0.11589980125427246,
      "step": 34618
    },
    {
      "epoch": 0.00021129150390625,
      "step": 34618,
      "training_step_time": 0.4033477306365967
    },
    {
      "epoch": 0.000211297607421875,
      "model_forward_time": 0.11617398262023926,
      "step": 34619
    },
    {
      "epoch": 0.000211297607421875,
      "step": 34619,
      "training_step_time": 0.39876532554626465
    },
    {
      "epoch": 0.0002113037109375,
      "grad_norm": 0.138930544257164,
      "learning_rate": 4.14442691326365e-05,
      "loss": 0.0419,
      "step": 34620
    },
    {
      "epoch": 0.0002113037109375,
      "model_forward_time": 0.11540102958679199,
      "step": 34620
    },
    {
      "epoch": 0.0002113037109375,
      "step": 34620,
      "training_step_time": 0.5000884532928467
    },
    {
      "epoch": 0.000211309814453125,
      "model_forward_time": 0.11552691459655762,
      "step": 34621
    },
    {
      "epoch": 0.000211309814453125,
      "step": 34621,
      "training_step_time": 0.39537692070007324
    },
    {
      "epoch": 0.00021131591796875,
      "model_forward_time": 0.11563730239868164,
      "step": 34622
    },
    {
      "epoch": 0.00021131591796875,
      "step": 34622,
      "training_step_time": 0.3921084403991699
    },
    {
      "epoch": 0.000211322021484375,
      "model_forward_time": 0.11613702774047852,
      "step": 34623
    },
    {
      "epoch": 0.000211322021484375,
      "step": 34623,
      "training_step_time": 0.4596691131591797
    },
    {
      "epoch": 0.000211328125,
      "model_forward_time": 0.11520242691040039,
      "step": 34624
    },
    {
      "epoch": 0.000211328125,
      "step": 34624,
      "training_step_time": 0.3832206726074219
    },
    {
      "epoch": 0.000211334228515625,
      "model_forward_time": 0.11532783508300781,
      "step": 34625
    },
    {
      "epoch": 0.000211334228515625,
      "step": 34625,
      "training_step_time": 0.4716908931732178
    },
    {
      "epoch": 0.00021134033203125,
      "model_forward_time": 0.11510252952575684,
      "step": 34626
    },
    {
      "epoch": 0.00021134033203125,
      "step": 34626,
      "training_step_time": 0.3682725429534912
    },
    {
      "epoch": 0.000211346435546875,
      "model_forward_time": 0.11528778076171875,
      "step": 34627
    },
    {
      "epoch": 0.000211346435546875,
      "step": 34627,
      "training_step_time": 0.4609057903289795
    },
    {
      "epoch": 0.0002113525390625,
      "model_forward_time": 0.11501121520996094,
      "step": 34628
    },
    {
      "epoch": 0.0002113525390625,
      "step": 34628,
      "training_step_time": 0.39905381202697754
    },
    {
      "epoch": 0.000211358642578125,
      "model_forward_time": 0.11547112464904785,
      "step": 34629
    },
    {
      "epoch": 0.000211358642578125,
      "step": 34629,
      "training_step_time": 0.40213537216186523
    },
    {
      "epoch": 0.00021136474609375,
      "grad_norm": 0.1047290712594986,
      "learning_rate": 4.141711905009566e-05,
      "loss": 0.0443,
      "step": 34630
    },
    {
      "epoch": 0.00021136474609375,
      "model_forward_time": 0.11495232582092285,
      "step": 34630
    },
    {
      "epoch": 0.00021136474609375,
      "step": 34630,
      "training_step_time": 0.4170243740081787
    },
    {
      "epoch": 0.000211370849609375,
      "model_forward_time": 0.1150965690612793,
      "step": 34631
    },
    {
      "epoch": 0.000211370849609375,
      "step": 34631,
      "training_step_time": 0.391141414642334
    },
    {
      "epoch": 0.000211376953125,
      "model_forward_time": 0.1155998706817627,
      "step": 34632
    },
    {
      "epoch": 0.000211376953125,
      "step": 34632,
      "training_step_time": 0.5048828125
    },
    {
      "epoch": 0.000211383056640625,
      "model_forward_time": 0.1154012680053711,
      "step": 34633
    },
    {
      "epoch": 0.000211383056640625,
      "step": 34633,
      "training_step_time": 0.4583277702331543
    },
    {
      "epoch": 0.00021138916015625,
      "model_forward_time": 0.11500167846679688,
      "step": 34634
    },
    {
      "epoch": 0.00021138916015625,
      "step": 34634,
      "training_step_time": 0.3881855010986328
    },
    {
      "epoch": 0.000211395263671875,
      "model_forward_time": 0.11566519737243652,
      "step": 34635
    },
    {
      "epoch": 0.000211395263671875,
      "step": 34635,
      "training_step_time": 0.4007384777069092
    },
    {
      "epoch": 0.0002114013671875,
      "model_forward_time": 0.11550474166870117,
      "step": 34636
    },
    {
      "epoch": 0.0002114013671875,
      "step": 34636,
      "training_step_time": 0.39153623580932617
    },
    {
      "epoch": 0.000211407470703125,
      "model_forward_time": 0.11557626724243164,
      "step": 34637
    },
    {
      "epoch": 0.000211407470703125,
      "step": 34637,
      "training_step_time": 0.3945939540863037
    },
    {
      "epoch": 0.00021141357421875,
      "model_forward_time": 0.1159203052520752,
      "step": 34638
    },
    {
      "epoch": 0.00021141357421875,
      "step": 34638,
      "training_step_time": 0.5084054470062256
    },
    {
      "epoch": 0.000211419677734375,
      "model_forward_time": 0.11611604690551758,
      "step": 34639
    },
    {
      "epoch": 0.000211419677734375,
      "step": 34639,
      "training_step_time": 0.39834165573120117
    },
    {
      "epoch": 0.00021142578125,
      "grad_norm": 0.1399194300174713,
      "learning_rate": 4.1389971574807416e-05,
      "loss": 0.0386,
      "step": 34640
    },
    {
      "epoch": 0.00021142578125,
      "model_forward_time": 0.11534738540649414,
      "step": 34640
    },
    {
      "epoch": 0.00021142578125,
      "step": 34640,
      "training_step_time": 0.39393019676208496
    },
    {
      "epoch": 0.000211431884765625,
      "model_forward_time": 0.1153421401977539,
      "step": 34641
    },
    {
      "epoch": 0.000211431884765625,
      "step": 34641,
      "training_step_time": 0.49064016342163086
    },
    {
      "epoch": 0.00021143798828125,
      "model_forward_time": 0.1183021068572998,
      "step": 34642
    },
    {
      "epoch": 0.00021143798828125,
      "step": 34642,
      "training_step_time": 0.5176064968109131
    },
    {
      "epoch": 0.000211444091796875,
      "model_forward_time": 0.11904597282409668,
      "step": 34643
    },
    {
      "epoch": 0.000211444091796875,
      "step": 34643,
      "training_step_time": 0.384030818939209
    },
    {
      "epoch": 0.0002114501953125,
      "model_forward_time": 0.11781501770019531,
      "step": 34644
    },
    {
      "epoch": 0.0002114501953125,
      "step": 34644,
      "training_step_time": 0.3805363178253174
    },
    {
      "epoch": 0.000211456298828125,
      "model_forward_time": 0.11843085289001465,
      "step": 34645
    },
    {
      "epoch": 0.000211456298828125,
      "step": 34645,
      "training_step_time": 0.38718748092651367
    },
    {
      "epoch": 0.00021146240234375,
      "model_forward_time": 0.11486959457397461,
      "step": 34646
    },
    {
      "epoch": 0.00021146240234375,
      "step": 34646,
      "training_step_time": 0.3923962116241455
    },
    {
      "epoch": 0.000211468505859375,
      "model_forward_time": 0.11602187156677246,
      "step": 34647
    },
    {
      "epoch": 0.000211468505859375,
      "step": 34647,
      "training_step_time": 0.42344188690185547
    },
    {
      "epoch": 0.000211474609375,
      "model_forward_time": 0.11603474617004395,
      "step": 34648
    },
    {
      "epoch": 0.000211474609375,
      "step": 34648,
      "training_step_time": 0.4167330265045166
    },
    {
      "epoch": 0.000211480712890625,
      "model_forward_time": 0.11550188064575195,
      "step": 34649
    },
    {
      "epoch": 0.000211480712890625,
      "step": 34649,
      "training_step_time": 0.3833577632904053
    },
    {
      "epoch": 0.00021148681640625,
      "grad_norm": 0.17289799451828003,
      "learning_rate": 4.13628267150185e-05,
      "loss": 0.0407,
      "step": 34650
    },
    {
      "epoch": 0.00021148681640625,
      "model_forward_time": 0.11595964431762695,
      "step": 34650
    },
    {
      "epoch": 0.00021148681640625,
      "step": 34650,
      "training_step_time": 0.5028235912322998
    },
    {
      "epoch": 0.000211492919921875,
      "model_forward_time": 0.11592745780944824,
      "step": 34651
    },
    {
      "epoch": 0.000211492919921875,
      "step": 34651,
      "training_step_time": 0.38721370697021484
    },
    {
      "epoch": 0.0002114990234375,
      "model_forward_time": 0.11598372459411621,
      "step": 34652
    },
    {
      "epoch": 0.0002114990234375,
      "step": 34652,
      "training_step_time": 0.39311814308166504
    },
    {
      "epoch": 0.000211505126953125,
      "model_forward_time": 0.1150674819946289,
      "step": 34653
    },
    {
      "epoch": 0.000211505126953125,
      "step": 34653,
      "training_step_time": 0.4355478286743164
    },
    {
      "epoch": 0.00021151123046875,
      "model_forward_time": 0.11544275283813477,
      "step": 34654
    },
    {
      "epoch": 0.00021151123046875,
      "step": 34654,
      "training_step_time": 0.40351390838623047
    },
    {
      "epoch": 0.000211517333984375,
      "model_forward_time": 0.11592912673950195,
      "step": 34655
    },
    {
      "epoch": 0.000211517333984375,
      "step": 34655,
      "training_step_time": 0.42006468772888184
    },
    {
      "epoch": 0.0002115234375,
      "model_forward_time": 0.11597776412963867,
      "step": 34656
    },
    {
      "epoch": 0.0002115234375,
      "step": 34656,
      "training_step_time": 0.5006775856018066
    },
    {
      "epoch": 0.000211529541015625,
      "model_forward_time": 0.11506772041320801,
      "step": 34657
    },
    {
      "epoch": 0.000211529541015625,
      "step": 34657,
      "training_step_time": 0.4322218894958496
    },
    {
      "epoch": 0.00021153564453125,
      "model_forward_time": 0.11608695983886719,
      "step": 34658
    },
    {
      "epoch": 0.00021153564453125,
      "step": 34658,
      "training_step_time": 0.3943467140197754
    },
    {
      "epoch": 0.000211541748046875,
      "model_forward_time": 0.11601662635803223,
      "step": 34659
    },
    {
      "epoch": 0.000211541748046875,
      "step": 34659,
      "training_step_time": 0.3971877098083496
    },
    {
      "epoch": 0.0002115478515625,
      "grad_norm": 0.13341572880744934,
      "learning_rate": 4.1335684478974744e-05,
      "loss": 0.0386,
      "step": 34660
    },
    {
      "epoch": 0.0002115478515625,
      "model_forward_time": 0.11553263664245605,
      "step": 34660
    },
    {
      "epoch": 0.0002115478515625,
      "step": 34660,
      "training_step_time": 0.4438591003417969
    },
    {
      "epoch": 0.000211553955078125,
      "model_forward_time": 0.11571693420410156,
      "step": 34661
    },
    {
      "epoch": 0.000211553955078125,
      "step": 34661,
      "training_step_time": 0.4157123565673828
    },
    {
      "epoch": 0.00021156005859375,
      "model_forward_time": 0.11569547653198242,
      "step": 34662
    },
    {
      "epoch": 0.00021156005859375,
      "step": 34662,
      "training_step_time": 0.48339271545410156
    },
    {
      "epoch": 0.000211566162109375,
      "model_forward_time": 0.11531639099121094,
      "step": 34663
    },
    {
      "epoch": 0.000211566162109375,
      "step": 34663,
      "training_step_time": 0.39150142669677734
    },
    {
      "epoch": 0.000211572265625,
      "model_forward_time": 0.11896395683288574,
      "step": 34664
    },
    {
      "epoch": 0.000211572265625,
      "step": 34664,
      "training_step_time": 0.3845808506011963
    },
    {
      "epoch": 0.000211578369140625,
      "model_forward_time": 0.11602163314819336,
      "step": 34665
    },
    {
      "epoch": 0.000211578369140625,
      "step": 34665,
      "training_step_time": 0.3970003128051758
    },
    {
      "epoch": 0.00021158447265625,
      "model_forward_time": 0.11592888832092285,
      "step": 34666
    },
    {
      "epoch": 0.00021158447265625,
      "step": 34666,
      "training_step_time": 0.388303279876709
    },
    {
      "epoch": 0.000211590576171875,
      "model_forward_time": 0.11557531356811523,
      "step": 34667
    },
    {
      "epoch": 0.000211590576171875,
      "step": 34667,
      "training_step_time": 0.3849053382873535
    },
    {
      "epoch": 0.0002115966796875,
      "model_forward_time": 0.11530542373657227,
      "step": 34668
    },
    {
      "epoch": 0.0002115966796875,
      "step": 34668,
      "training_step_time": 0.5772261619567871
    },
    {
      "epoch": 0.000211602783203125,
      "model_forward_time": 0.11521267890930176,
      "step": 34669
    },
    {
      "epoch": 0.000211602783203125,
      "step": 34669,
      "training_step_time": 0.42143702507019043
    },
    {
      "epoch": 0.00021160888671875,
      "grad_norm": 0.1323704719543457,
      "learning_rate": 4.130854487492128e-05,
      "loss": 0.0336,
      "step": 34670
    },
    {
      "epoch": 0.00021160888671875,
      "model_forward_time": 0.11912918090820312,
      "step": 34670
    },
    {
      "epoch": 0.00021160888671875,
      "step": 34670,
      "training_step_time": 0.4091651439666748
    },
    {
      "epoch": 0.000211614990234375,
      "model_forward_time": 0.1219637393951416,
      "step": 34671
    },
    {
      "epoch": 0.000211614990234375,
      "step": 34671,
      "training_step_time": 0.405123233795166
    },
    {
      "epoch": 0.00021162109375,
      "model_forward_time": 0.11842823028564453,
      "step": 34672
    },
    {
      "epoch": 0.00021162109375,
      "step": 34672,
      "training_step_time": 0.4037294387817383
    },
    {
      "epoch": 0.000211627197265625,
      "model_forward_time": 0.11757397651672363,
      "step": 34673
    },
    {
      "epoch": 0.000211627197265625,
      "step": 34673,
      "training_step_time": 0.38957691192626953
    },
    {
      "epoch": 0.00021163330078125,
      "model_forward_time": 0.11725759506225586,
      "step": 34674
    },
    {
      "epoch": 0.00021163330078125,
      "step": 34674,
      "training_step_time": 0.5022644996643066
    },
    {
      "epoch": 0.000211639404296875,
      "model_forward_time": 0.1151421070098877,
      "step": 34675
    },
    {
      "epoch": 0.000211639404296875,
      "step": 34675,
      "training_step_time": 0.43076539039611816
    },
    {
      "epoch": 0.0002116455078125,
      "model_forward_time": 0.11617708206176758,
      "step": 34676
    },
    {
      "epoch": 0.0002116455078125,
      "step": 34676,
      "training_step_time": 0.5416195392608643
    },
    {
      "epoch": 0.000211651611328125,
      "model_forward_time": 0.11829447746276855,
      "step": 34677
    },
    {
      "epoch": 0.000211651611328125,
      "step": 34677,
      "training_step_time": 0.5574002265930176
    },
    {
      "epoch": 0.00021165771484375,
      "model_forward_time": 0.1183772087097168,
      "step": 34678
    },
    {
      "epoch": 0.00021165771484375,
      "step": 34678,
      "training_step_time": 0.5713827610015869
    },
    {
      "epoch": 0.000211663818359375,
      "model_forward_time": 0.12329673767089844,
      "step": 34679
    },
    {
      "epoch": 0.000211663818359375,
      "step": 34679,
      "training_step_time": 0.6654872894287109
    },
    {
      "epoch": 0.000211669921875,
      "grad_norm": 0.11003026366233826,
      "learning_rate": 4.1281407911102425e-05,
      "loss": 0.0365,
      "step": 34680
    },
    {
      "epoch": 0.000211669921875,
      "model_forward_time": 0.11814427375793457,
      "step": 34680
    },
    {
      "epoch": 0.000211669921875,
      "step": 34680,
      "training_step_time": 0.6859936714172363
    },
    {
      "epoch": 0.000211676025390625,
      "model_forward_time": 0.12452507019042969,
      "step": 34681
    },
    {
      "epoch": 0.000211676025390625,
      "step": 34681,
      "training_step_time": 0.7029390335083008
    },
    {
      "epoch": 0.00021168212890625,
      "model_forward_time": 0.1170201301574707,
      "step": 34682
    },
    {
      "epoch": 0.00021168212890625,
      "step": 34682,
      "training_step_time": 0.7086911201477051
    },
    {
      "epoch": 0.000211688232421875,
      "model_forward_time": 0.12520503997802734,
      "step": 34683
    },
    {
      "epoch": 0.000211688232421875,
      "step": 34683,
      "training_step_time": 0.7142207622528076
    },
    {
      "epoch": 0.0002116943359375,
      "model_forward_time": 0.11658978462219238,
      "step": 34684
    },
    {
      "epoch": 0.0002116943359375,
      "step": 34684,
      "training_step_time": 0.6308066844940186
    },
    {
      "epoch": 0.000211700439453125,
      "model_forward_time": 0.12497544288635254,
      "step": 34685
    },
    {
      "epoch": 0.000211700439453125,
      "step": 34685,
      "training_step_time": 0.6894650459289551
    },
    {
      "epoch": 0.00021170654296875,
      "model_forward_time": 0.11639094352722168,
      "step": 34686
    },
    {
      "epoch": 0.00021170654296875,
      "step": 34686,
      "training_step_time": 0.8616595268249512
    },
    {
      "epoch": 0.000211712646484375,
      "model_forward_time": 0.11608648300170898,
      "step": 34687
    },
    {
      "epoch": 0.000211712646484375,
      "step": 34687,
      "training_step_time": 0.6506457328796387
    },
    {
      "epoch": 0.00021171875,
      "model_forward_time": 0.12746334075927734,
      "step": 34688
    },
    {
      "epoch": 0.00021171875,
      "step": 34688,
      "training_step_time": 0.6114273071289062
    },
    {
      "epoch": 0.000211724853515625,
      "model_forward_time": 0.12391424179077148,
      "step": 34689
    },
    {
      "epoch": 0.000211724853515625,
      "step": 34689,
      "training_step_time": 0.7431125640869141
    },
    {
      "epoch": 0.00021173095703125,
      "grad_norm": 0.11586663126945496,
      "learning_rate": 4.125427359576162e-05,
      "loss": 0.0422,
      "step": 34690
    },
    {
      "epoch": 0.00021173095703125,
      "model_forward_time": 0.11726546287536621,
      "step": 34690
    },
    {
      "epoch": 0.00021173095703125,
      "step": 34690,
      "training_step_time": 0.6838865280151367
    },
    {
      "epoch": 0.000211737060546875,
      "model_forward_time": 0.1168513298034668,
      "step": 34691
    },
    {
      "epoch": 0.000211737060546875,
      "step": 34691,
      "training_step_time": 0.6877567768096924
    },
    {
      "epoch": 0.0002117431640625,
      "model_forward_time": 0.12337350845336914,
      "step": 34692
    },
    {
      "epoch": 0.0002117431640625,
      "step": 34692,
      "training_step_time": 0.7193119525909424
    },
    {
      "epoch": 0.000211749267578125,
      "model_forward_time": 0.13241028785705566,
      "step": 34693
    },
    {
      "epoch": 0.000211749267578125,
      "step": 34693,
      "training_step_time": 0.6620144844055176
    },
    {
      "epoch": 0.00021175537109375,
      "model_forward_time": 0.12962627410888672,
      "step": 34694
    },
    {
      "epoch": 0.00021175537109375,
      "step": 34694,
      "training_step_time": 0.7128677368164062
    },
    {
      "epoch": 0.000211761474609375,
      "model_forward_time": 0.11884188652038574,
      "step": 34695
    },
    {
      "epoch": 0.000211761474609375,
      "step": 34695,
      "training_step_time": 0.7732019424438477
    },
    {
      "epoch": 0.000211767578125,
      "model_forward_time": 0.11679458618164062,
      "step": 34696
    },
    {
      "epoch": 0.000211767578125,
      "step": 34696,
      "training_step_time": 0.6414048671722412
    },
    {
      "epoch": 0.000211773681640625,
      "model_forward_time": 0.11810636520385742,
      "step": 34697
    },
    {
      "epoch": 0.000211773681640625,
      "step": 34697,
      "training_step_time": 0.6920216083526611
    },
    {
      "epoch": 0.00021177978515625,
      "model_forward_time": 0.12143683433532715,
      "step": 34698
    },
    {
      "epoch": 0.00021177978515625,
      "step": 34698,
      "training_step_time": 0.8839058876037598
    },
    {
      "epoch": 0.000211785888671875,
      "model_forward_time": 0.11988568305969238,
      "step": 34699
    },
    {
      "epoch": 0.000211785888671875,
      "step": 34699,
      "training_step_time": 0.6453561782836914
    },
    {
      "epoch": 0.0002117919921875,
      "grad_norm": 0.13968683779239655,
      "learning_rate": 4.12271419371416e-05,
      "loss": 0.0459,
      "step": 34700
    },
    {
      "epoch": 0.0002117919921875,
      "model_forward_time": 0.12010741233825684,
      "step": 34700
    },
    {
      "epoch": 0.0002117919921875,
      "step": 34700,
      "training_step_time": 0.6894912719726562
    },
    {
      "epoch": 0.000211798095703125,
      "model_forward_time": 0.11839747428894043,
      "step": 34701
    },
    {
      "epoch": 0.000211798095703125,
      "step": 34701,
      "training_step_time": 0.7676761150360107
    },
    {
      "epoch": 0.00021180419921875,
      "model_forward_time": 0.12350082397460938,
      "step": 34702
    },
    {
      "epoch": 0.00021180419921875,
      "step": 34702,
      "training_step_time": 0.6492502689361572
    },
    {
      "epoch": 0.000211810302734375,
      "model_forward_time": 0.11665821075439453,
      "step": 34703
    },
    {
      "epoch": 0.000211810302734375,
      "step": 34703,
      "training_step_time": 0.7343440055847168
    },
    {
      "epoch": 0.00021181640625,
      "model_forward_time": 0.12006425857543945,
      "step": 34704
    },
    {
      "epoch": 0.00021181640625,
      "step": 34704,
      "training_step_time": 0.7013580799102783
    },
    {
      "epoch": 0.000211822509765625,
      "model_forward_time": 0.12203574180603027,
      "step": 34705
    },
    {
      "epoch": 0.000211822509765625,
      "step": 34705,
      "training_step_time": 0.6189005374908447
    },
    {
      "epoch": 0.00021182861328125,
      "model_forward_time": 0.12446165084838867,
      "step": 34706
    },
    {
      "epoch": 0.00021182861328125,
      "step": 34706,
      "training_step_time": 0.6702859401702881
    },
    {
      "epoch": 0.000211834716796875,
      "model_forward_time": 0.1263117790222168,
      "step": 34707
    },
    {
      "epoch": 0.000211834716796875,
      "step": 34707,
      "training_step_time": 0.6003351211547852
    },
    {
      "epoch": 0.0002118408203125,
      "model_forward_time": 0.12172484397888184,
      "step": 34708
    },
    {
      "epoch": 0.0002118408203125,
      "step": 34708,
      "training_step_time": 0.653702974319458
    },
    {
      "epoch": 0.000211846923828125,
      "model_forward_time": 0.11913800239562988,
      "step": 34709
    },
    {
      "epoch": 0.000211846923828125,
      "step": 34709,
      "training_step_time": 0.746100902557373
    },
    {
      "epoch": 0.00021185302734375,
      "grad_norm": 0.13638505339622498,
      "learning_rate": 4.120001294348421e-05,
      "loss": 0.0439,
      "step": 34710
    },
    {
      "epoch": 0.00021185302734375,
      "model_forward_time": 0.11859703063964844,
      "step": 34710
    },
    {
      "epoch": 0.00021185302734375,
      "step": 34710,
      "training_step_time": 0.7315175533294678
    },
    {
      "epoch": 0.000211859130859375,
      "model_forward_time": 0.12175226211547852,
      "step": 34711
    },
    {
      "epoch": 0.000211859130859375,
      "step": 34711,
      "training_step_time": 0.6658327579498291
    },
    {
      "epoch": 0.000211865234375,
      "model_forward_time": 0.1251540184020996,
      "step": 34712
    },
    {
      "epoch": 0.000211865234375,
      "step": 34712,
      "training_step_time": 0.6567080020904541
    },
    {
      "epoch": 0.000211871337890625,
      "model_forward_time": 0.12006425857543945,
      "step": 34713
    },
    {
      "epoch": 0.000211871337890625,
      "step": 34713,
      "training_step_time": 0.7084140777587891
    },
    {
      "epoch": 0.00021187744140625,
      "model_forward_time": 0.1221616268157959,
      "step": 34714
    },
    {
      "epoch": 0.00021187744140625,
      "step": 34714,
      "training_step_time": 0.6822600364685059
    },
    {
      "epoch": 0.000211883544921875,
      "model_forward_time": 0.12530088424682617,
      "step": 34715
    },
    {
      "epoch": 0.000211883544921875,
      "step": 34715,
      "training_step_time": 0.6277751922607422
    },
    {
      "epoch": 0.0002118896484375,
      "model_forward_time": 0.11662077903747559,
      "step": 34716
    },
    {
      "epoch": 0.0002118896484375,
      "step": 34716,
      "training_step_time": 0.7138016223907471
    },
    {
      "epoch": 0.000211895751953125,
      "model_forward_time": 0.1176910400390625,
      "step": 34717
    },
    {
      "epoch": 0.000211895751953125,
      "step": 34717,
      "training_step_time": 0.713068962097168
    },
    {
      "epoch": 0.00021190185546875,
      "model_forward_time": 0.12294840812683105,
      "step": 34718
    },
    {
      "epoch": 0.00021190185546875,
      "step": 34718,
      "training_step_time": 0.6948020458221436
    },
    {
      "epoch": 0.000211907958984375,
      "model_forward_time": 0.1201484203338623,
      "step": 34719
    },
    {
      "epoch": 0.000211907958984375,
      "step": 34719,
      "training_step_time": 0.6952552795410156
    },
    {
      "epoch": 0.0002119140625,
      "grad_norm": 0.13364793360233307,
      "learning_rate": 4.1172886623030526e-05,
      "loss": 0.0453,
      "step": 34720
    },
    {
      "epoch": 0.0002119140625,
      "model_forward_time": 0.11874771118164062,
      "step": 34720
    },
    {
      "epoch": 0.0002119140625,
      "step": 34720,
      "training_step_time": 0.6798045635223389
    },
    {
      "epoch": 0.000211920166015625,
      "model_forward_time": 0.11804509162902832,
      "step": 34721
    },
    {
      "epoch": 0.000211920166015625,
      "step": 34721,
      "training_step_time": 0.6618270874023438
    },
    {
      "epoch": 0.00021192626953125,
      "model_forward_time": 0.11905360221862793,
      "step": 34722
    },
    {
      "epoch": 0.00021192626953125,
      "step": 34722,
      "training_step_time": 0.6834657192230225
    },
    {
      "epoch": 0.000211932373046875,
      "model_forward_time": 0.11840343475341797,
      "step": 34723
    },
    {
      "epoch": 0.000211932373046875,
      "step": 34723,
      "training_step_time": 0.6803603172302246
    },
    {
      "epoch": 0.0002119384765625,
      "model_forward_time": 0.1210489273071289,
      "step": 34724
    },
    {
      "epoch": 0.0002119384765625,
      "step": 34724,
      "training_step_time": 0.6646003723144531
    },
    {
      "epoch": 0.000211944580078125,
      "model_forward_time": 0.12040519714355469,
      "step": 34725
    },
    {
      "epoch": 0.000211944580078125,
      "step": 34725,
      "training_step_time": 0.6716253757476807
    },
    {
      "epoch": 0.00021195068359375,
      "model_forward_time": 0.12158322334289551,
      "step": 34726
    },
    {
      "epoch": 0.00021195068359375,
      "step": 34726,
      "training_step_time": 0.671234130859375
    },
    {
      "epoch": 0.000211956787109375,
      "model_forward_time": 0.11944174766540527,
      "step": 34727
    },
    {
      "epoch": 0.000211956787109375,
      "step": 34727,
      "training_step_time": 0.6568558216094971
    },
    {
      "epoch": 0.000211962890625,
      "model_forward_time": 0.11580801010131836,
      "step": 34728
    },
    {
      "epoch": 0.000211962890625,
      "step": 34728,
      "training_step_time": 0.5754187107086182
    },
    {
      "epoch": 0.000211968994140625,
      "model_forward_time": 0.1191415786743164,
      "step": 34729
    },
    {
      "epoch": 0.000211968994140625,
      "step": 34729,
      "training_step_time": 0.6700513362884521
    },
    {
      "epoch": 0.00021197509765625,
      "grad_norm": 0.0884086936712265,
      "learning_rate": 4.114576298402084e-05,
      "loss": 0.0466,
      "step": 34730
    },
    {
      "epoch": 0.00021197509765625,
      "model_forward_time": 0.12432503700256348,
      "step": 34730
    },
    {
      "epoch": 0.00021197509765625,
      "step": 34730,
      "training_step_time": 0.6674785614013672
    },
    {
      "epoch": 0.000211981201171875,
      "model_forward_time": 0.11905288696289062,
      "step": 34731
    },
    {
      "epoch": 0.000211981201171875,
      "step": 34731,
      "training_step_time": 0.6795949935913086
    },
    {
      "epoch": 0.0002119873046875,
      "model_forward_time": 0.11715030670166016,
      "step": 34732
    },
    {
      "epoch": 0.0002119873046875,
      "step": 34732,
      "training_step_time": 0.6073014736175537
    },
    {
      "epoch": 0.000211993408203125,
      "model_forward_time": 0.11982107162475586,
      "step": 34733
    },
    {
      "epoch": 0.000211993408203125,
      "step": 34733,
      "training_step_time": 0.6785776615142822
    },
    {
      "epoch": 0.00021199951171875,
      "model_forward_time": 0.12232208251953125,
      "step": 34734
    },
    {
      "epoch": 0.00021199951171875,
      "step": 34734,
      "training_step_time": 0.7388200759887695
    },
    {
      "epoch": 0.000212005615234375,
      "model_forward_time": 0.12399601936340332,
      "step": 34735
    },
    {
      "epoch": 0.000212005615234375,
      "step": 34735,
      "training_step_time": 0.6420917510986328
    },
    {
      "epoch": 0.00021201171875,
      "model_forward_time": 0.12389683723449707,
      "step": 34736
    },
    {
      "epoch": 0.00021201171875,
      "step": 34736,
      "training_step_time": 0.6989657878875732
    },
    {
      "epoch": 0.000212017822265625,
      "model_forward_time": 0.13274669647216797,
      "step": 34737
    },
    {
      "epoch": 0.000212017822265625,
      "step": 34737,
      "training_step_time": 0.6151044368743896
    },
    {
      "epoch": 0.00021202392578125,
      "model_forward_time": 0.12907648086547852,
      "step": 34738
    },
    {
      "epoch": 0.00021202392578125,
      "step": 34738,
      "training_step_time": 0.6311507225036621
    },
    {
      "epoch": 0.000212030029296875,
      "model_forward_time": 0.11799335479736328,
      "step": 34739
    },
    {
      "epoch": 0.000212030029296875,
      "step": 34739,
      "training_step_time": 0.6871728897094727
    },
    {
      "epoch": 0.0002120361328125,
      "grad_norm": 0.10254441946744919,
      "learning_rate": 4.111864203469457e-05,
      "loss": 0.047,
      "step": 34740
    },
    {
      "epoch": 0.0002120361328125,
      "model_forward_time": 0.11959505081176758,
      "step": 34740
    },
    {
      "epoch": 0.0002120361328125,
      "step": 34740,
      "training_step_time": 0.6665170192718506
    },
    {
      "epoch": 0.000212042236328125,
      "model_forward_time": 0.12004685401916504,
      "step": 34741
    },
    {
      "epoch": 0.000212042236328125,
      "step": 34741,
      "training_step_time": 0.7205400466918945
    },
    {
      "epoch": 0.00021204833984375,
      "model_forward_time": 0.12043881416320801,
      "step": 34742
    },
    {
      "epoch": 0.00021204833984375,
      "step": 34742,
      "training_step_time": 0.6442000865936279
    },
    {
      "epoch": 0.000212054443359375,
      "model_forward_time": 0.12255358695983887,
      "step": 34743
    },
    {
      "epoch": 0.000212054443359375,
      "step": 34743,
      "training_step_time": 0.601952314376831
    },
    {
      "epoch": 0.000212060546875,
      "model_forward_time": 0.12096309661865234,
      "step": 34744
    },
    {
      "epoch": 0.000212060546875,
      "step": 34744,
      "training_step_time": 0.5839722156524658
    },
    {
      "epoch": 0.000212066650390625,
      "model_forward_time": 0.1269364356994629,
      "step": 34745
    },
    {
      "epoch": 0.000212066650390625,
      "step": 34745,
      "training_step_time": 0.5282289981842041
    },
    {
      "epoch": 0.00021207275390625,
      "model_forward_time": 0.12370538711547852,
      "step": 34746
    },
    {
      "epoch": 0.00021207275390625,
      "step": 34746,
      "training_step_time": 0.7013745307922363
    },
    {
      "epoch": 0.000212078857421875,
      "model_forward_time": 0.12045454978942871,
      "step": 34747
    },
    {
      "epoch": 0.000212078857421875,
      "step": 34747,
      "training_step_time": 0.5946729183197021
    },
    {
      "epoch": 0.0002120849609375,
      "model_forward_time": 0.11929607391357422,
      "step": 34748
    },
    {
      "epoch": 0.0002120849609375,
      "step": 34748,
      "training_step_time": 0.4803907871246338
    },
    {
      "epoch": 0.000212091064453125,
      "model_forward_time": 0.1177377700805664,
      "step": 34749
    },
    {
      "epoch": 0.000212091064453125,
      "step": 34749,
      "training_step_time": 0.4540824890136719
    },
    {
      "epoch": 0.00021209716796875,
      "grad_norm": 0.13563959300518036,
      "learning_rate": 4.109152378329036e-05,
      "loss": 0.0449,
      "step": 34750
    },
    {
      "epoch": 0.00021209716796875,
      "model_forward_time": 0.12437152862548828,
      "step": 34750
    },
    {
      "epoch": 0.00021209716796875,
      "step": 34750,
      "training_step_time": 0.5027031898498535
    },
    {
      "epoch": 0.000212103271484375,
      "model_forward_time": 0.11909222602844238,
      "step": 34751
    },
    {
      "epoch": 0.000212103271484375,
      "step": 34751,
      "training_step_time": 0.4054718017578125
    },
    {
      "epoch": 0.000212109375,
      "model_forward_time": 0.11910152435302734,
      "step": 34752
    },
    {
      "epoch": 0.000212109375,
      "step": 34752,
      "training_step_time": 0.4878368377685547
    },
    {
      "epoch": 0.000212115478515625,
      "model_forward_time": 0.11510896682739258,
      "step": 34753
    },
    {
      "epoch": 0.000212115478515625,
      "step": 34753,
      "training_step_time": 0.40959882736206055
    },
    {
      "epoch": 0.00021212158203125,
      "model_forward_time": 0.11565279960632324,
      "step": 34754
    },
    {
      "epoch": 0.00021212158203125,
      "step": 34754,
      "training_step_time": 0.39856505393981934
    },
    {
      "epoch": 0.000212127685546875,
      "model_forward_time": 0.11545443534851074,
      "step": 34755
    },
    {
      "epoch": 0.000212127685546875,
      "step": 34755,
      "training_step_time": 0.3945283889770508
    },
    {
      "epoch": 0.0002121337890625,
      "model_forward_time": 0.11534428596496582,
      "step": 34756
    },
    {
      "epoch": 0.0002121337890625,
      "step": 34756,
      "training_step_time": 0.4017932415008545
    },
    {
      "epoch": 0.000212139892578125,
      "model_forward_time": 0.11557698249816895,
      "step": 34757
    },
    {
      "epoch": 0.000212139892578125,
      "step": 34757,
      "training_step_time": 0.39868688583374023
    },
    {
      "epoch": 0.00021214599609375,
      "model_forward_time": 0.11479616165161133,
      "step": 34758
    },
    {
      "epoch": 0.00021214599609375,
      "step": 34758,
      "training_step_time": 0.39201927185058594
    },
    {
      "epoch": 0.000212152099609375,
      "model_forward_time": 0.11496233940124512,
      "step": 34759
    },
    {
      "epoch": 0.000212152099609375,
      "step": 34759,
      "training_step_time": 0.3950231075286865
    },
    {
      "epoch": 0.000212158203125,
      "grad_norm": 0.14093086123466492,
      "learning_rate": 4.1064408238045994e-05,
      "loss": 0.0458,
      "step": 34760
    },
    {
      "epoch": 0.000212158203125,
      "model_forward_time": 0.1156613826751709,
      "step": 34760
    },
    {
      "epoch": 0.000212158203125,
      "step": 34760,
      "training_step_time": 0.4013078212738037
    },
    {
      "epoch": 0.000212164306640625,
      "model_forward_time": 0.11541581153869629,
      "step": 34761
    },
    {
      "epoch": 0.000212164306640625,
      "step": 34761,
      "training_step_time": 0.4202136993408203
    },
    {
      "epoch": 0.00021217041015625,
      "model_forward_time": 0.11512231826782227,
      "step": 34762
    },
    {
      "epoch": 0.00021217041015625,
      "step": 34762,
      "training_step_time": 0.4562556743621826
    },
    {
      "epoch": 0.000212176513671875,
      "model_forward_time": 0.1153106689453125,
      "step": 34763
    },
    {
      "epoch": 0.000212176513671875,
      "step": 34763,
      "training_step_time": 0.36891794204711914
    },
    {
      "epoch": 0.0002121826171875,
      "model_forward_time": 0.11565876007080078,
      "step": 34764
    },
    {
      "epoch": 0.0002121826171875,
      "step": 34764,
      "training_step_time": 0.4000368118286133
    },
    {
      "epoch": 0.000212188720703125,
      "model_forward_time": 0.1153113842010498,
      "step": 34765
    },
    {
      "epoch": 0.000212188720703125,
      "step": 34765,
      "training_step_time": 0.4874000549316406
    },
    {
      "epoch": 0.00021219482421875,
      "model_forward_time": 0.11440062522888184,
      "step": 34766
    },
    {
      "epoch": 0.00021219482421875,
      "step": 34766,
      "training_step_time": 0.48972105979919434
    },
    {
      "epoch": 0.000212200927734375,
      "model_forward_time": 0.11510848999023438,
      "step": 34767
    },
    {
      "epoch": 0.000212200927734375,
      "step": 34767,
      "training_step_time": 0.4187459945678711
    },
    {
      "epoch": 0.00021220703125,
      "model_forward_time": 0.11456465721130371,
      "step": 34768
    },
    {
      "epoch": 0.00021220703125,
      "step": 34768,
      "training_step_time": 0.3918149471282959
    },
    {
      "epoch": 0.000212213134765625,
      "model_forward_time": 0.11533546447753906,
      "step": 34769
    },
    {
      "epoch": 0.000212213134765625,
      "step": 34769,
      "training_step_time": 0.39281535148620605
    },
    {
      "epoch": 0.00021221923828125,
      "grad_norm": 0.09343327581882477,
      "learning_rate": 4.103729540719847e-05,
      "loss": 0.0446,
      "step": 34770
    },
    {
      "epoch": 0.00021221923828125,
      "model_forward_time": 0.11644268035888672,
      "step": 34770
    },
    {
      "epoch": 0.00021221923828125,
      "step": 34770,
      "training_step_time": 0.38337135314941406
    },
    {
      "epoch": 0.000212225341796875,
      "model_forward_time": 0.11440610885620117,
      "step": 34771
    },
    {
      "epoch": 0.000212225341796875,
      "step": 34771,
      "training_step_time": 0.396226167678833
    },
    {
      "epoch": 0.0002122314453125,
      "model_forward_time": 0.1163640022277832,
      "step": 34772
    },
    {
      "epoch": 0.0002122314453125,
      "step": 34772,
      "training_step_time": 0.38703417778015137
    },
    {
      "epoch": 0.000212237548828125,
      "model_forward_time": 0.11607742309570312,
      "step": 34773
    },
    {
      "epoch": 0.000212237548828125,
      "step": 34773,
      "training_step_time": 0.39115262031555176
    },
    {
      "epoch": 0.00021224365234375,
      "model_forward_time": 0.11577439308166504,
      "step": 34774
    },
    {
      "epoch": 0.00021224365234375,
      "step": 34774,
      "training_step_time": 0.391404390335083
    },
    {
      "epoch": 0.000212249755859375,
      "model_forward_time": 0.11585021018981934,
      "step": 34775
    },
    {
      "epoch": 0.000212249755859375,
      "step": 34775,
      "training_step_time": 0.4497213363647461
    },
    {
      "epoch": 0.000212255859375,
      "model_forward_time": 0.11624360084533691,
      "step": 34776
    },
    {
      "epoch": 0.000212255859375,
      "step": 34776,
      "training_step_time": 0.48868298530578613
    },
    {
      "epoch": 0.000212261962890625,
      "model_forward_time": 0.11509966850280762,
      "step": 34777
    },
    {
      "epoch": 0.000212261962890625,
      "step": 34777,
      "training_step_time": 0.39156222343444824
    },
    {
      "epoch": 0.00021226806640625,
      "model_forward_time": 0.115631103515625,
      "step": 34778
    },
    {
      "epoch": 0.00021226806640625,
      "step": 34778,
      "training_step_time": 0.40955686569213867
    },
    {
      "epoch": 0.000212274169921875,
      "model_forward_time": 0.11571478843688965,
      "step": 34779
    },
    {
      "epoch": 0.000212274169921875,
      "step": 34779,
      "training_step_time": 0.4584822654724121
    },
    {
      "epoch": 0.0002122802734375,
      "grad_norm": 0.12956508994102478,
      "learning_rate": 4.1010185298983984e-05,
      "loss": 0.0466,
      "step": 34780
    },
    {
      "epoch": 0.0002122802734375,
      "model_forward_time": 0.11548137664794922,
      "step": 34780
    },
    {
      "epoch": 0.0002122802734375,
      "step": 34780,
      "training_step_time": 0.41919493675231934
    },
    {
      "epoch": 0.000212286376953125,
      "model_forward_time": 0.11523103713989258,
      "step": 34781
    },
    {
      "epoch": 0.000212286376953125,
      "step": 34781,
      "training_step_time": 0.4706873893737793
    },
    {
      "epoch": 0.00021229248046875,
      "model_forward_time": 0.11521625518798828,
      "step": 34782
    },
    {
      "epoch": 0.00021229248046875,
      "step": 34782,
      "training_step_time": 0.4015800952911377
    },
    {
      "epoch": 0.000212298583984375,
      "model_forward_time": 0.1149141788482666,
      "step": 34783
    },
    {
      "epoch": 0.000212298583984375,
      "step": 34783,
      "training_step_time": 0.3985569477081299
    },
    {
      "epoch": 0.0002123046875,
      "model_forward_time": 0.11548805236816406,
      "step": 34784
    },
    {
      "epoch": 0.0002123046875,
      "step": 34784,
      "training_step_time": 0.40381312370300293
    },
    {
      "epoch": 0.000212310791015625,
      "model_forward_time": 0.1155095100402832,
      "step": 34785
    },
    {
      "epoch": 0.000212310791015625,
      "step": 34785,
      "training_step_time": 0.377948522567749
    },
    {
      "epoch": 0.00021231689453125,
      "model_forward_time": 0.11547350883483887,
      "step": 34786
    },
    {
      "epoch": 0.00021231689453125,
      "step": 34786,
      "training_step_time": 0.7075512409210205
    },
    {
      "epoch": 0.000212322998046875,
      "model_forward_time": 0.11486601829528809,
      "step": 34787
    },
    {
      "epoch": 0.000212322998046875,
      "step": 34787,
      "training_step_time": 0.3901245594024658
    },
    {
      "epoch": 0.0002123291015625,
      "model_forward_time": 0.11509871482849121,
      "step": 34788
    },
    {
      "epoch": 0.0002123291015625,
      "step": 34788,
      "training_step_time": 0.3938412666320801
    },
    {
      "epoch": 0.000212335205078125,
      "model_forward_time": 0.1150510311126709,
      "step": 34789
    },
    {
      "epoch": 0.000212335205078125,
      "step": 34789,
      "training_step_time": 0.42285728454589844
    },
    {
      "epoch": 0.00021234130859375,
      "grad_norm": 0.11904122680425644,
      "learning_rate": 4.0983077921637815e-05,
      "loss": 0.0472,
      "step": 34790
    },
    {
      "epoch": 0.00021234130859375,
      "model_forward_time": 0.11445808410644531,
      "step": 34790
    },
    {
      "epoch": 0.00021234130859375,
      "step": 34790,
      "training_step_time": 0.38777780532836914
    },
    {
      "epoch": 0.000212347412109375,
      "model_forward_time": 0.11511039733886719,
      "step": 34791
    },
    {
      "epoch": 0.000212347412109375,
      "step": 34791,
      "training_step_time": 0.4620518684387207
    },
    {
      "epoch": 0.000212353515625,
      "model_forward_time": 0.11520886421203613,
      "step": 34792
    },
    {
      "epoch": 0.000212353515625,
      "step": 34792,
      "training_step_time": 0.841463565826416
    },
    {
      "epoch": 0.000212359619140625,
      "model_forward_time": 0.11460232734680176,
      "step": 34793
    },
    {
      "epoch": 0.000212359619140625,
      "step": 34793,
      "training_step_time": 0.4041602611541748
    },
    {
      "epoch": 0.00021236572265625,
      "model_forward_time": 0.11461639404296875,
      "step": 34794
    },
    {
      "epoch": 0.00021236572265625,
      "step": 34794,
      "training_step_time": 0.45146727561950684
    },
    {
      "epoch": 0.000212371826171875,
      "model_forward_time": 0.11452889442443848,
      "step": 34795
    },
    {
      "epoch": 0.000212371826171875,
      "step": 34795,
      "training_step_time": 0.3939189910888672
    },
    {
      "epoch": 0.0002123779296875,
      "model_forward_time": 0.11456847190856934,
      "step": 34796
    },
    {
      "epoch": 0.0002123779296875,
      "step": 34796,
      "training_step_time": 0.38094353675842285
    },
    {
      "epoch": 0.000212384033203125,
      "model_forward_time": 0.11458611488342285,
      "step": 34797
    },
    {
      "epoch": 0.000212384033203125,
      "step": 34797,
      "training_step_time": 0.3833584785461426
    },
    {
      "epoch": 0.00021239013671875,
      "model_forward_time": 0.11498022079467773,
      "step": 34798
    },
    {
      "epoch": 0.00021239013671875,
      "step": 34798,
      "training_step_time": 0.646681547164917
    },
    {
      "epoch": 0.000212396240234375,
      "model_forward_time": 0.11458039283752441,
      "step": 34799
    },
    {
      "epoch": 0.000212396240234375,
      "step": 34799,
      "training_step_time": 0.3912215232849121
    },
    {
      "epoch": 0.00021240234375,
      "grad_norm": 0.13541056215763092,
      "learning_rate": 4.095597328339452e-05,
      "loss": 0.0486,
      "step": 34800
    },
    {
      "epoch": 0.00021240234375,
      "model_forward_time": 0.1147003173828125,
      "step": 34800
    },
    {
      "epoch": 0.00021240234375,
      "step": 34800,
      "training_step_time": 0.3895456790924072
    },
    {
      "epoch": 0.000212408447265625,
      "model_forward_time": 0.11478996276855469,
      "step": 34801
    },
    {
      "epoch": 0.000212408447265625,
      "step": 34801,
      "training_step_time": 0.3915984630584717
    },
    {
      "epoch": 0.00021241455078125,
      "model_forward_time": 0.1149144172668457,
      "step": 34802
    },
    {
      "epoch": 0.00021241455078125,
      "step": 34802,
      "training_step_time": 0.40073633193969727
    },
    {
      "epoch": 0.000212420654296875,
      "model_forward_time": 0.11470413208007812,
      "step": 34803
    },
    {
      "epoch": 0.000212420654296875,
      "step": 34803,
      "training_step_time": 0.393186092376709
    },
    {
      "epoch": 0.0002124267578125,
      "model_forward_time": 0.11532139778137207,
      "step": 34804
    },
    {
      "epoch": 0.0002124267578125,
      "step": 34804,
      "training_step_time": 1.0206658840179443
    },
    {
      "epoch": 0.000212432861328125,
      "model_forward_time": 0.11426043510437012,
      "step": 34805
    },
    {
      "epoch": 0.000212432861328125,
      "step": 34805,
      "training_step_time": 0.4591054916381836
    },
    {
      "epoch": 0.00021243896484375,
      "model_forward_time": 0.11406278610229492,
      "step": 34806
    },
    {
      "epoch": 0.00021243896484375,
      "step": 34806,
      "training_step_time": 0.4621851444244385
    },
    {
      "epoch": 0.000212445068359375,
      "model_forward_time": 0.11452293395996094,
      "step": 34807
    },
    {
      "epoch": 0.000212445068359375,
      "step": 34807,
      "training_step_time": 0.40994691848754883
    },
    {
      "epoch": 0.000212451171875,
      "model_forward_time": 0.11418890953063965,
      "step": 34808
    },
    {
      "epoch": 0.000212451171875,
      "step": 34808,
      "training_step_time": 0.38597893714904785
    },
    {
      "epoch": 0.000212457275390625,
      "model_forward_time": 0.11365866661071777,
      "step": 34809
    },
    {
      "epoch": 0.000212457275390625,
      "step": 34809,
      "training_step_time": 0.3849680423736572
    },
    {
      "epoch": 0.00021246337890625,
      "grad_norm": 0.15125322341918945,
      "learning_rate": 4.092887139248772e-05,
      "loss": 0.0425,
      "step": 34810
    },
    {
      "epoch": 0.00021246337890625,
      "model_forward_time": 0.11449742317199707,
      "step": 34810
    },
    {
      "epoch": 0.00021246337890625,
      "step": 34810,
      "training_step_time": 0.4492483139038086
    },
    {
      "epoch": 0.000212469482421875,
      "model_forward_time": 0.11539912223815918,
      "step": 34811
    },
    {
      "epoch": 0.000212469482421875,
      "step": 34811,
      "training_step_time": 0.3966519832611084
    },
    {
      "epoch": 0.0002124755859375,
      "model_forward_time": 0.11612677574157715,
      "step": 34812
    },
    {
      "epoch": 0.0002124755859375,
      "step": 34812,
      "training_step_time": 0.39785051345825195
    },
    {
      "epoch": 0.000212481689453125,
      "model_forward_time": 0.11466002464294434,
      "step": 34813
    },
    {
      "epoch": 0.000212481689453125,
      "step": 34813,
      "training_step_time": 0.4001774787902832
    },
    {
      "epoch": 0.00021248779296875,
      "model_forward_time": 0.1154637336730957,
      "step": 34814
    },
    {
      "epoch": 0.00021248779296875,
      "step": 34814,
      "training_step_time": 0.39088010787963867
    },
    {
      "epoch": 0.000212493896484375,
      "model_forward_time": 0.11622834205627441,
      "step": 34815
    },
    {
      "epoch": 0.000212493896484375,
      "step": 34815,
      "training_step_time": 0.5122184753417969
    },
    {
      "epoch": 0.0002125,
      "model_forward_time": 0.11507010459899902,
      "step": 34816
    },
    {
      "epoch": 0.0002125,
      "step": 34816,
      "training_step_time": 0.720952033996582
    },
    {
      "epoch": 0.000212506103515625,
      "model_forward_time": 0.11452436447143555,
      "step": 34817
    },
    {
      "epoch": 0.000212506103515625,
      "step": 34817,
      "training_step_time": 0.39284181594848633
    },
    {
      "epoch": 0.00021251220703125,
      "model_forward_time": 0.11478137969970703,
      "step": 34818
    },
    {
      "epoch": 0.00021251220703125,
      "step": 34818,
      "training_step_time": 0.3608381748199463
    },
    {
      "epoch": 0.000212518310546875,
      "model_forward_time": 0.11415457725524902,
      "step": 34819
    },
    {
      "epoch": 0.000212518310546875,
      "step": 34819,
      "training_step_time": 0.42577481269836426
    },
    {
      "epoch": 0.0002125244140625,
      "grad_norm": 0.11197615414857864,
      "learning_rate": 4.09017722571503e-05,
      "loss": 0.0433,
      "step": 34820
    },
    {
      "epoch": 0.0002125244140625,
      "model_forward_time": 0.11520671844482422,
      "step": 34820
    },
    {
      "epoch": 0.0002125244140625,
      "step": 34820,
      "training_step_time": 0.43349170684814453
    },
    {
      "epoch": 0.000212530517578125,
      "model_forward_time": 0.11432600021362305,
      "step": 34821
    },
    {
      "epoch": 0.000212530517578125,
      "step": 34821,
      "training_step_time": 0.39388585090637207
    },
    {
      "epoch": 0.00021253662109375,
      "model_forward_time": 0.11431097984313965,
      "step": 34822
    },
    {
      "epoch": 0.00021253662109375,
      "step": 34822,
      "training_step_time": 0.6983537673950195
    },
    {
      "epoch": 0.000212542724609375,
      "model_forward_time": 0.1155397891998291,
      "step": 34823
    },
    {
      "epoch": 0.000212542724609375,
      "step": 34823,
      "training_step_time": 0.3934135437011719
    },
    {
      "epoch": 0.000212548828125,
      "model_forward_time": 0.1143350601196289,
      "step": 34824
    },
    {
      "epoch": 0.000212548828125,
      "step": 34824,
      "training_step_time": 0.3909134864807129
    },
    {
      "epoch": 0.000212554931640625,
      "model_forward_time": 0.11501955986022949,
      "step": 34825
    },
    {
      "epoch": 0.000212554931640625,
      "step": 34825,
      "training_step_time": 0.3903505802154541
    },
    {
      "epoch": 0.00021256103515625,
      "model_forward_time": 0.11432361602783203,
      "step": 34826
    },
    {
      "epoch": 0.00021256103515625,
      "step": 34826,
      "training_step_time": 0.38324522972106934
    },
    {
      "epoch": 0.000212567138671875,
      "model_forward_time": 0.11527800559997559,
      "step": 34827
    },
    {
      "epoch": 0.000212567138671875,
      "step": 34827,
      "training_step_time": 0.3895587921142578
    },
    {
      "epoch": 0.0002125732421875,
      "model_forward_time": 0.11481499671936035,
      "step": 34828
    },
    {
      "epoch": 0.0002125732421875,
      "step": 34828,
      "training_step_time": 0.9208834171295166
    },
    {
      "epoch": 0.000212579345703125,
      "model_forward_time": 0.11406469345092773,
      "step": 34829
    },
    {
      "epoch": 0.000212579345703125,
      "step": 34829,
      "training_step_time": 0.4244086742401123
    },
    {
      "epoch": 0.00021258544921875,
      "grad_norm": 0.14580439031124115,
      "learning_rate": 4.087467588561424e-05,
      "loss": 0.0377,
      "step": 34830
    },
    {
      "epoch": 0.00021258544921875,
      "model_forward_time": 0.11406087875366211,
      "step": 34830
    },
    {
      "epoch": 0.00021258544921875,
      "step": 34830,
      "training_step_time": 0.4476642608642578
    },
    {
      "epoch": 0.000212591552734375,
      "model_forward_time": 0.11404705047607422,
      "step": 34831
    },
    {
      "epoch": 0.000212591552734375,
      "step": 34831,
      "training_step_time": 0.36229372024536133
    },
    {
      "epoch": 0.00021259765625,
      "model_forward_time": 0.11443328857421875,
      "step": 34832
    },
    {
      "epoch": 0.00021259765625,
      "step": 34832,
      "training_step_time": 0.43049025535583496
    },
    {
      "epoch": 0.000212603759765625,
      "model_forward_time": 0.11440324783325195,
      "step": 34833
    },
    {
      "epoch": 0.000212603759765625,
      "step": 34833,
      "training_step_time": 0.45768308639526367
    },
    {
      "epoch": 0.00021260986328125,
      "model_forward_time": 0.11678433418273926,
      "step": 34834
    },
    {
      "epoch": 0.00021260986328125,
      "step": 34834,
      "training_step_time": 0.38970255851745605
    },
    {
      "epoch": 0.000212615966796875,
      "model_forward_time": 0.1145775318145752,
      "step": 34835
    },
    {
      "epoch": 0.000212615966796875,
      "step": 34835,
      "training_step_time": 0.3995089530944824
    },
    {
      "epoch": 0.0002126220703125,
      "model_forward_time": 0.11512541770935059,
      "step": 34836
    },
    {
      "epoch": 0.0002126220703125,
      "step": 34836,
      "training_step_time": 0.3919968605041504
    },
    {
      "epoch": 0.000212628173828125,
      "model_forward_time": 0.11542248725891113,
      "step": 34837
    },
    {
      "epoch": 0.000212628173828125,
      "step": 34837,
      "training_step_time": 0.38864588737487793
    },
    {
      "epoch": 0.00021263427734375,
      "model_forward_time": 0.11543893814086914,
      "step": 34838
    },
    {
      "epoch": 0.00021263427734375,
      "step": 34838,
      "training_step_time": 0.3949861526489258
    },
    {
      "epoch": 0.000212640380859375,
      "model_forward_time": 0.11457538604736328,
      "step": 34839
    },
    {
      "epoch": 0.000212640380859375,
      "step": 34839,
      "training_step_time": 0.3874390125274658
    },
    {
      "epoch": 0.000212646484375,
      "grad_norm": 0.1878957897424698,
      "learning_rate": 4.08475822861107e-05,
      "loss": 0.0463,
      "step": 34840
    },
    {
      "epoch": 0.000212646484375,
      "model_forward_time": 0.11630415916442871,
      "step": 34840
    },
    {
      "epoch": 0.000212646484375,
      "step": 34840,
      "training_step_time": 0.7195312976837158
    },
    {
      "epoch": 0.000212652587890625,
      "model_forward_time": 0.11539745330810547,
      "step": 34841
    },
    {
      "epoch": 0.000212652587890625,
      "step": 34841,
      "training_step_time": 0.39406728744506836
    },
    {
      "epoch": 0.00021265869140625,
      "model_forward_time": 0.11495137214660645,
      "step": 34842
    },
    {
      "epoch": 0.00021265869140625,
      "step": 34842,
      "training_step_time": 0.46228671073913574
    },
    {
      "epoch": 0.000212664794921875,
      "model_forward_time": 0.11453652381896973,
      "step": 34843
    },
    {
      "epoch": 0.000212664794921875,
      "step": 34843,
      "training_step_time": 0.4510955810546875
    },
    {
      "epoch": 0.0002126708984375,
      "model_forward_time": 0.11447691917419434,
      "step": 34844
    },
    {
      "epoch": 0.0002126708984375,
      "step": 34844,
      "training_step_time": 0.3972892761230469
    },
    {
      "epoch": 0.000212677001953125,
      "model_forward_time": 0.11456871032714844,
      "step": 34845
    },
    {
      "epoch": 0.000212677001953125,
      "step": 34845,
      "training_step_time": 0.36288976669311523
    },
    {
      "epoch": 0.00021268310546875,
      "model_forward_time": 0.12179446220397949,
      "step": 34846
    },
    {
      "epoch": 0.00021268310546875,
      "step": 34846,
      "training_step_time": 0.6144924163818359
    },
    {
      "epoch": 0.000212689208984375,
      "model_forward_time": 0.11476802825927734,
      "step": 34847
    },
    {
      "epoch": 0.000212689208984375,
      "step": 34847,
      "training_step_time": 0.41786956787109375
    },
    {
      "epoch": 0.0002126953125,
      "model_forward_time": 0.11428189277648926,
      "step": 34848
    },
    {
      "epoch": 0.0002126953125,
      "step": 34848,
      "training_step_time": 0.3890106678009033
    },
    {
      "epoch": 0.000212701416015625,
      "model_forward_time": 0.11401081085205078,
      "step": 34849
    },
    {
      "epoch": 0.000212701416015625,
      "step": 34849,
      "training_step_time": 0.39641404151916504
    },
    {
      "epoch": 0.00021270751953125,
      "grad_norm": 0.10094813257455826,
      "learning_rate": 4.082049146687003e-05,
      "loss": 0.0416,
      "step": 34850
    },
    {
      "epoch": 0.00021270751953125,
      "model_forward_time": 0.11527466773986816,
      "step": 34850
    },
    {
      "epoch": 0.00021270751953125,
      "step": 34850,
      "training_step_time": 0.39672350883483887
    },
    {
      "epoch": 0.000212713623046875,
      "model_forward_time": 0.11450886726379395,
      "step": 34851
    },
    {
      "epoch": 0.000212713623046875,
      "step": 34851,
      "training_step_time": 0.3885498046875
    },
    {
      "epoch": 0.0002127197265625,
      "model_forward_time": 0.11685299873352051,
      "step": 34852
    },
    {
      "epoch": 0.0002127197265625,
      "step": 34852,
      "training_step_time": 0.6733906269073486
    },
    {
      "epoch": 0.000212725830078125,
      "model_forward_time": 0.11432194709777832,
      "step": 34853
    },
    {
      "epoch": 0.000212725830078125,
      "step": 34853,
      "training_step_time": 0.3927474021911621
    },
    {
      "epoch": 0.00021273193359375,
      "model_forward_time": 0.11434555053710938,
      "step": 34854
    },
    {
      "epoch": 0.00021273193359375,
      "step": 34854,
      "training_step_time": 0.39000630378723145
    },
    {
      "epoch": 0.000212738037109375,
      "model_forward_time": 0.1150670051574707,
      "step": 34855
    },
    {
      "epoch": 0.000212738037109375,
      "step": 34855,
      "training_step_time": 0.4021570682525635
    },
    {
      "epoch": 0.000212744140625,
      "model_forward_time": 0.11464810371398926,
      "step": 34856
    },
    {
      "epoch": 0.000212744140625,
      "step": 34856,
      "training_step_time": 0.42619967460632324
    },
    {
      "epoch": 0.000212750244140625,
      "model_forward_time": 0.11445093154907227,
      "step": 34857
    },
    {
      "epoch": 0.000212750244140625,
      "step": 34857,
      "training_step_time": 0.4182150363922119
    },
    {
      "epoch": 0.00021275634765625,
      "model_forward_time": 0.11502408981323242,
      "step": 34858
    },
    {
      "epoch": 0.00021275634765625,
      "step": 34858,
      "training_step_time": 0.5925335884094238
    },
    {
      "epoch": 0.000212762451171875,
      "model_forward_time": 0.11446189880371094,
      "step": 34859
    },
    {
      "epoch": 0.000212762451171875,
      "step": 34859,
      "training_step_time": 0.3998432159423828
    },
    {
      "epoch": 0.0002127685546875,
      "grad_norm": 0.10493788868188858,
      "learning_rate": 4.079340343612165e-05,
      "loss": 0.0411,
      "step": 34860
    },
    {
      "epoch": 0.0002127685546875,
      "model_forward_time": 0.11473417282104492,
      "step": 34860
    },
    {
      "epoch": 0.0002127685546875,
      "step": 34860,
      "training_step_time": 0.4914727210998535
    },
    {
      "epoch": 0.000212774658203125,
      "model_forward_time": 0.11455440521240234,
      "step": 34861
    },
    {
      "epoch": 0.000212774658203125,
      "step": 34861,
      "training_step_time": 0.4382014274597168
    },
    {
      "epoch": 0.00021278076171875,
      "model_forward_time": 0.11448001861572266,
      "step": 34862
    },
    {
      "epoch": 0.00021278076171875,
      "step": 34862,
      "training_step_time": 0.3886685371398926
    },
    {
      "epoch": 0.000212786865234375,
      "model_forward_time": 0.11458754539489746,
      "step": 34863
    },
    {
      "epoch": 0.000212786865234375,
      "step": 34863,
      "training_step_time": 0.39099812507629395
    },
    {
      "epoch": 0.00021279296875,
      "model_forward_time": 0.11478781700134277,
      "step": 34864
    },
    {
      "epoch": 0.00021279296875,
      "step": 34864,
      "training_step_time": 0.7310278415679932
    },
    {
      "epoch": 0.000212799072265625,
      "model_forward_time": 0.11440420150756836,
      "step": 34865
    },
    {
      "epoch": 0.000212799072265625,
      "step": 34865,
      "training_step_time": 0.389819860458374
    },
    {
      "epoch": 0.00021280517578125,
      "model_forward_time": 0.11437606811523438,
      "step": 34866
    },
    {
      "epoch": 0.00021280517578125,
      "step": 34866,
      "training_step_time": 0.393310546875
    },
    {
      "epoch": 0.000212811279296875,
      "model_forward_time": 0.11474943161010742,
      "step": 34867
    },
    {
      "epoch": 0.000212811279296875,
      "step": 34867,
      "training_step_time": 0.38683104515075684
    },
    {
      "epoch": 0.0002128173828125,
      "model_forward_time": 0.1147763729095459,
      "step": 34868
    },
    {
      "epoch": 0.0002128173828125,
      "step": 34868,
      "training_step_time": 0.38709402084350586
    },
    {
      "epoch": 0.000212823486328125,
      "model_forward_time": 0.1145792007446289,
      "step": 34869
    },
    {
      "epoch": 0.000212823486328125,
      "step": 34869,
      "training_step_time": 0.45073890686035156
    },
    {
      "epoch": 0.00021282958984375,
      "grad_norm": 0.18539351224899292,
      "learning_rate": 4.076631820209422e-05,
      "loss": 0.0379,
      "step": 34870
    },
    {
      "epoch": 0.00021282958984375,
      "model_forward_time": 0.11512207984924316,
      "step": 34870
    },
    {
      "epoch": 0.00021282958984375,
      "step": 34870,
      "training_step_time": 0.8425107002258301
    },
    {
      "epoch": 0.000212835693359375,
      "model_forward_time": 0.11369705200195312,
      "step": 34871
    },
    {
      "epoch": 0.000212835693359375,
      "step": 34871,
      "training_step_time": 0.41837167739868164
    },
    {
      "epoch": 0.000212841796875,
      "model_forward_time": 0.1146707534790039,
      "step": 34872
    },
    {
      "epoch": 0.000212841796875,
      "step": 34872,
      "training_step_time": 0.4305436611175537
    },
    {
      "epoch": 0.000212847900390625,
      "model_forward_time": 0.11484050750732422,
      "step": 34873
    },
    {
      "epoch": 0.000212847900390625,
      "step": 34873,
      "training_step_time": 0.42083215713500977
    },
    {
      "epoch": 0.00021285400390625,
      "model_forward_time": 0.114105224609375,
      "step": 34874
    },
    {
      "epoch": 0.00021285400390625,
      "step": 34874,
      "training_step_time": 0.4347856044769287
    },
    {
      "epoch": 0.000212860107421875,
      "model_forward_time": 0.11366009712219238,
      "step": 34875
    },
    {
      "epoch": 0.000212860107421875,
      "step": 34875,
      "training_step_time": 0.3830404281616211
    },
    {
      "epoch": 0.0002128662109375,
      "model_forward_time": 0.11558866500854492,
      "step": 34876
    },
    {
      "epoch": 0.0002128662109375,
      "step": 34876,
      "training_step_time": 0.7849817276000977
    },
    {
      "epoch": 0.000212872314453125,
      "model_forward_time": 0.11457705497741699,
      "step": 34877
    },
    {
      "epoch": 0.000212872314453125,
      "step": 34877,
      "training_step_time": 0.3887300491333008
    },
    {
      "epoch": 0.00021287841796875,
      "model_forward_time": 0.11442828178405762,
      "step": 34878
    },
    {
      "epoch": 0.00021287841796875,
      "step": 34878,
      "training_step_time": 0.3908088207244873
    },
    {
      "epoch": 0.000212884521484375,
      "model_forward_time": 0.1144247055053711,
      "step": 34879
    },
    {
      "epoch": 0.000212884521484375,
      "step": 34879,
      "training_step_time": 0.3967757225036621
    },
    {
      "epoch": 0.000212890625,
      "grad_norm": 0.0689191073179245,
      "learning_rate": 4.0739235773015536e-05,
      "loss": 0.04,
      "step": 34880
    },
    {
      "epoch": 0.000212890625,
      "model_forward_time": 0.11404800415039062,
      "step": 34880
    },
    {
      "epoch": 0.000212890625,
      "step": 34880,
      "training_step_time": 0.38420605659484863
    },
    {
      "epoch": 0.000212896728515625,
      "model_forward_time": 0.1145787239074707,
      "step": 34881
    },
    {
      "epoch": 0.000212896728515625,
      "step": 34881,
      "training_step_time": 0.3926115036010742
    },
    {
      "epoch": 0.00021290283203125,
      "model_forward_time": 0.11599469184875488,
      "step": 34882
    },
    {
      "epoch": 0.00021290283203125,
      "step": 34882,
      "training_step_time": 0.6773650646209717
    },
    {
      "epoch": 0.000212908935546875,
      "model_forward_time": 0.11454057693481445,
      "step": 34883
    },
    {
      "epoch": 0.000212908935546875,
      "step": 34883,
      "training_step_time": 0.4367232322692871
    },
    {
      "epoch": 0.0002129150390625,
      "model_forward_time": 0.11503386497497559,
      "step": 34884
    },
    {
      "epoch": 0.0002129150390625,
      "step": 34884,
      "training_step_time": 0.48999452590942383
    },
    {
      "epoch": 0.000212921142578125,
      "model_forward_time": 0.11467552185058594,
      "step": 34885
    },
    {
      "epoch": 0.000212921142578125,
      "step": 34885,
      "training_step_time": 0.48189520835876465
    },
    {
      "epoch": 0.00021292724609375,
      "model_forward_time": 0.11569356918334961,
      "step": 34886
    },
    {
      "epoch": 0.00021292724609375,
      "step": 34886,
      "training_step_time": 0.42944931983947754
    },
    {
      "epoch": 0.000212933349609375,
      "model_forward_time": 0.11424732208251953,
      "step": 34887
    },
    {
      "epoch": 0.000212933349609375,
      "step": 34887,
      "training_step_time": 0.432466983795166
    },
    {
      "epoch": 0.000212939453125,
      "model_forward_time": 0.11481881141662598,
      "step": 34888
    },
    {
      "epoch": 0.000212939453125,
      "step": 34888,
      "training_step_time": 0.3997225761413574
    },
    {
      "epoch": 0.000212945556640625,
      "model_forward_time": 0.11504054069519043,
      "step": 34889
    },
    {
      "epoch": 0.000212945556640625,
      "step": 34889,
      "training_step_time": 0.39426088333129883
    },
    {
      "epoch": 0.00021295166015625,
      "grad_norm": 0.08519639819860458,
      "learning_rate": 4.07121561571125e-05,
      "loss": 0.0412,
      "step": 34890
    },
    {
      "epoch": 0.00021295166015625,
      "model_forward_time": 0.11482596397399902,
      "step": 34890
    },
    {
      "epoch": 0.00021295166015625,
      "step": 34890,
      "training_step_time": 0.4011232852935791
    },
    {
      "epoch": 0.000212957763671875,
      "model_forward_time": 0.1145482063293457,
      "step": 34891
    },
    {
      "epoch": 0.000212957763671875,
      "step": 34891,
      "training_step_time": 0.39823174476623535
    },
    {
      "epoch": 0.0002129638671875,
      "model_forward_time": 0.11531376838684082,
      "step": 34892
    },
    {
      "epoch": 0.0002129638671875,
      "step": 34892,
      "training_step_time": 0.3945786952972412
    },
    {
      "epoch": 0.000212969970703125,
      "model_forward_time": 0.1151120662689209,
      "step": 34893
    },
    {
      "epoch": 0.000212969970703125,
      "step": 34893,
      "training_step_time": 0.3883056640625
    },
    {
      "epoch": 0.00021297607421875,
      "model_forward_time": 0.11468744277954102,
      "step": 34894
    },
    {
      "epoch": 0.00021297607421875,
      "step": 34894,
      "training_step_time": 0.7764089107513428
    },
    {
      "epoch": 0.000212982177734375,
      "model_forward_time": 0.11518716812133789,
      "step": 34895
    },
    {
      "epoch": 0.000212982177734375,
      "step": 34895,
      "training_step_time": 0.4031519889831543
    },
    {
      "epoch": 0.00021298828125,
      "model_forward_time": 0.11434674263000488,
      "step": 34896
    },
    {
      "epoch": 0.00021298828125,
      "step": 34896,
      "training_step_time": 0.4388885498046875
    },
    {
      "epoch": 0.000212994384765625,
      "model_forward_time": 0.11485433578491211,
      "step": 34897
    },
    {
      "epoch": 0.000212994384765625,
      "step": 34897,
      "training_step_time": 0.45576000213623047
    },
    {
      "epoch": 0.00021300048828125,
      "model_forward_time": 0.11450767517089844,
      "step": 34898
    },
    {
      "epoch": 0.00021300048828125,
      "step": 34898,
      "training_step_time": 0.3947458267211914
    },
    {
      "epoch": 0.000213006591796875,
      "model_forward_time": 0.11436080932617188,
      "step": 34899
    },
    {
      "epoch": 0.000213006591796875,
      "step": 34899,
      "training_step_time": 0.40778565406799316
    },
    {
      "epoch": 0.0002130126953125,
      "grad_norm": 0.10711361467838287,
      "learning_rate": 4.0685079362611204e-05,
      "loss": 0.0408,
      "step": 34900
    },
    {
      "epoch": 0.0002130126953125,
      "model_forward_time": 0.1153726577758789,
      "step": 34900
    },
    {
      "epoch": 0.0002130126953125,
      "step": 34900,
      "training_step_time": 0.7310562133789062
    },
    {
      "epoch": 0.000213018798828125,
      "model_forward_time": 0.11746001243591309,
      "step": 34901
    },
    {
      "epoch": 0.000213018798828125,
      "step": 34901,
      "training_step_time": 0.45220136642456055
    },
    {
      "epoch": 0.00021302490234375,
      "model_forward_time": 0.11618685722351074,
      "step": 34902
    },
    {
      "epoch": 0.00021302490234375,
      "step": 34902,
      "training_step_time": 0.3871912956237793
    },
    {
      "epoch": 0.000213031005859375,
      "model_forward_time": 0.11681771278381348,
      "step": 34903
    },
    {
      "epoch": 0.000213031005859375,
      "step": 34903,
      "training_step_time": 0.38071203231811523
    },
    {
      "epoch": 0.000213037109375,
      "model_forward_time": 0.11751008033752441,
      "step": 34904
    },
    {
      "epoch": 0.000213037109375,
      "step": 34904,
      "training_step_time": 0.3865077495574951
    },
    {
      "epoch": 0.000213043212890625,
      "model_forward_time": 0.1176755428314209,
      "step": 34905
    },
    {
      "epoch": 0.000213043212890625,
      "step": 34905,
      "training_step_time": 0.3839073181152344
    },
    {
      "epoch": 0.00021304931640625,
      "model_forward_time": 0.11687493324279785,
      "step": 34906
    },
    {
      "epoch": 0.00021304931640625,
      "step": 34906,
      "training_step_time": 0.5644545555114746
    },
    {
      "epoch": 0.000213055419921875,
      "model_forward_time": 0.1154775619506836,
      "step": 34907
    },
    {
      "epoch": 0.000213055419921875,
      "step": 34907,
      "training_step_time": 0.3935978412628174
    },
    {
      "epoch": 0.0002130615234375,
      "model_forward_time": 0.11541008949279785,
      "step": 34908
    },
    {
      "epoch": 0.0002130615234375,
      "step": 34908,
      "training_step_time": 0.41465115547180176
    },
    {
      "epoch": 0.000213067626953125,
      "model_forward_time": 0.11526870727539062,
      "step": 34909
    },
    {
      "epoch": 0.000213067626953125,
      "step": 34909,
      "training_step_time": 0.3965122699737549
    },
    {
      "epoch": 0.00021307373046875,
      "grad_norm": 0.1305842101573944,
      "learning_rate": 4.065800539773683e-05,
      "loss": 0.0437,
      "step": 34910
    },
    {
      "epoch": 0.00021307373046875,
      "model_forward_time": 0.11462759971618652,
      "step": 34910
    },
    {
      "epoch": 0.00021307373046875,
      "step": 34910,
      "training_step_time": 0.4592015743255615
    },
    {
      "epoch": 0.000213079833984375,
      "model_forward_time": 0.11535263061523438,
      "step": 34911
    },
    {
      "epoch": 0.000213079833984375,
      "step": 34911,
      "training_step_time": 0.4238100051879883
    },
    {
      "epoch": 0.0002130859375,
      "model_forward_time": 0.11513519287109375,
      "step": 34912
    },
    {
      "epoch": 0.0002130859375,
      "step": 34912,
      "training_step_time": 0.6569390296936035
    },
    {
      "epoch": 0.000213092041015625,
      "model_forward_time": 0.1146702766418457,
      "step": 34913
    },
    {
      "epoch": 0.000213092041015625,
      "step": 34913,
      "training_step_time": 0.369398832321167
    },
    {
      "epoch": 0.00021309814453125,
      "model_forward_time": 0.11451435089111328,
      "step": 34914
    },
    {
      "epoch": 0.00021309814453125,
      "step": 34914,
      "training_step_time": 0.41913461685180664
    },
    {
      "epoch": 0.000213104248046875,
      "model_forward_time": 0.11386394500732422,
      "step": 34915
    },
    {
      "epoch": 0.000213104248046875,
      "step": 34915,
      "training_step_time": 0.4008204936981201
    },
    {
      "epoch": 0.0002131103515625,
      "model_forward_time": 0.11522507667541504,
      "step": 34916
    },
    {
      "epoch": 0.0002131103515625,
      "step": 34916,
      "training_step_time": 0.38263702392578125
    },
    {
      "epoch": 0.000213116455078125,
      "model_forward_time": 0.1148066520690918,
      "step": 34917
    },
    {
      "epoch": 0.000213116455078125,
      "step": 34917,
      "training_step_time": 0.38596272468566895
    },
    {
      "epoch": 0.00021312255859375,
      "model_forward_time": 0.11519694328308105,
      "step": 34918
    },
    {
      "epoch": 0.00021312255859375,
      "step": 34918,
      "training_step_time": 0.8930346965789795
    },
    {
      "epoch": 0.000213128662109375,
      "model_forward_time": 0.11379885673522949,
      "step": 34919
    },
    {
      "epoch": 0.000213128662109375,
      "step": 34919,
      "training_step_time": 0.3993721008300781
    },
    {
      "epoch": 0.000213134765625,
      "grad_norm": 0.17629922926425934,
      "learning_rate": 4.063093427071376e-05,
      "loss": 0.0491,
      "step": 34920
    },
    {
      "epoch": 0.000213134765625,
      "model_forward_time": 0.11408162117004395,
      "step": 34920
    },
    {
      "epoch": 0.000213134765625,
      "step": 34920,
      "training_step_time": 0.4011075496673584
    },
    {
      "epoch": 0.000213140869140625,
      "model_forward_time": 0.11390137672424316,
      "step": 34921
    },
    {
      "epoch": 0.000213140869140625,
      "step": 34921,
      "training_step_time": 0.39257383346557617
    },
    {
      "epoch": 0.00021314697265625,
      "model_forward_time": 0.11410832405090332,
      "step": 34922
    },
    {
      "epoch": 0.00021314697265625,
      "step": 34922,
      "training_step_time": 0.38572025299072266
    },
    {
      "epoch": 0.000213153076171875,
      "model_forward_time": 0.11413764953613281,
      "step": 34923
    },
    {
      "epoch": 0.000213153076171875,
      "step": 34923,
      "training_step_time": 0.39240074157714844
    },
    {
      "epoch": 0.0002131591796875,
      "model_forward_time": 0.11476254463195801,
      "step": 34924
    },
    {
      "epoch": 0.0002131591796875,
      "step": 34924,
      "training_step_time": 0.773566722869873
    },
    {
      "epoch": 0.000213165283203125,
      "model_forward_time": 0.11449456214904785,
      "step": 34925
    },
    {
      "epoch": 0.000213165283203125,
      "step": 34925,
      "training_step_time": 0.4557929039001465
    },
    {
      "epoch": 0.00021317138671875,
      "model_forward_time": 0.11488103866577148,
      "step": 34926
    },
    {
      "epoch": 0.00021317138671875,
      "step": 34926,
      "training_step_time": 0.3946259021759033
    },
    {
      "epoch": 0.000213177490234375,
      "model_forward_time": 0.11433744430541992,
      "step": 34927
    },
    {
      "epoch": 0.000213177490234375,
      "step": 34927,
      "training_step_time": 0.5141561031341553
    },
    {
      "epoch": 0.00021318359375,
      "model_forward_time": 0.11456012725830078,
      "step": 34928
    },
    {
      "epoch": 0.00021318359375,
      "step": 34928,
      "training_step_time": 0.3900928497314453
    },
    {
      "epoch": 0.000213189697265625,
      "model_forward_time": 0.11475634574890137,
      "step": 34929
    },
    {
      "epoch": 0.000213189697265625,
      "step": 34929,
      "training_step_time": 0.38553690910339355
    },
    {
      "epoch": 0.00021319580078125,
      "grad_norm": 0.09021896123886108,
      "learning_rate": 4.0603865989765504e-05,
      "loss": 0.0442,
      "step": 34930
    },
    {
      "epoch": 0.00021319580078125,
      "model_forward_time": 0.11590862274169922,
      "step": 34930
    },
    {
      "epoch": 0.00021319580078125,
      "step": 34930,
      "training_step_time": 0.48961877822875977
    },
    {
      "epoch": 0.000213201904296875,
      "model_forward_time": 0.1148526668548584,
      "step": 34931
    },
    {
      "epoch": 0.000213201904296875,
      "step": 34931,
      "training_step_time": 0.3852725028991699
    },
    {
      "epoch": 0.0002132080078125,
      "model_forward_time": 0.11530542373657227,
      "step": 34932
    },
    {
      "epoch": 0.0002132080078125,
      "step": 34932,
      "training_step_time": 0.3950486183166504
    },
    {
      "epoch": 0.000213214111328125,
      "model_forward_time": 0.11469435691833496,
      "step": 34933
    },
    {
      "epoch": 0.000213214111328125,
      "step": 34933,
      "training_step_time": 0.4372730255126953
    },
    {
      "epoch": 0.00021322021484375,
      "model_forward_time": 0.11459755897521973,
      "step": 34934
    },
    {
      "epoch": 0.00021322021484375,
      "step": 34934,
      "training_step_time": 0.3881261348724365
    },
    {
      "epoch": 0.000213226318359375,
      "model_forward_time": 0.11515212059020996,
      "step": 34935
    },
    {
      "epoch": 0.000213226318359375,
      "step": 34935,
      "training_step_time": 0.387561559677124
    },
    {
      "epoch": 0.000213232421875,
      "model_forward_time": 0.11479592323303223,
      "step": 34936
    },
    {
      "epoch": 0.000213232421875,
      "step": 34936,
      "training_step_time": 0.7939507961273193
    },
    {
      "epoch": 0.000213238525390625,
      "model_forward_time": 0.11588764190673828,
      "step": 34937
    },
    {
      "epoch": 0.000213238525390625,
      "step": 34937,
      "training_step_time": 0.48343491554260254
    },
    {
      "epoch": 0.00021324462890625,
      "model_forward_time": 0.11451005935668945,
      "step": 34938
    },
    {
      "epoch": 0.00021324462890625,
      "step": 34938,
      "training_step_time": 0.4779481887817383
    },
    {
      "epoch": 0.000213250732421875,
      "model_forward_time": 0.11411690711975098,
      "step": 34939
    },
    {
      "epoch": 0.000213250732421875,
      "step": 34939,
      "training_step_time": 0.3897979259490967
    },
    {
      "epoch": 0.0002132568359375,
      "grad_norm": 0.09386233985424042,
      "learning_rate": 4.0576800563114646e-05,
      "loss": 0.0413,
      "step": 34940
    },
    {
      "epoch": 0.0002132568359375,
      "model_forward_time": 0.11425256729125977,
      "step": 34940
    },
    {
      "epoch": 0.0002132568359375,
      "step": 34940,
      "training_step_time": 0.46927785873413086
    },
    {
      "epoch": 0.000213262939453125,
      "model_forward_time": 0.1144096851348877,
      "step": 34941
    },
    {
      "epoch": 0.000213262939453125,
      "step": 34941,
      "training_step_time": 0.48682641983032227
    },
    {
      "epoch": 0.00021326904296875,
      "model_forward_time": 0.11423897743225098,
      "step": 34942
    },
    {
      "epoch": 0.00021326904296875,
      "step": 34942,
      "training_step_time": 0.3850560188293457
    },
    {
      "epoch": 0.000213275146484375,
      "model_forward_time": 0.11467552185058594,
      "step": 34943
    },
    {
      "epoch": 0.000213275146484375,
      "step": 34943,
      "training_step_time": 0.4106309413909912
    },
    {
      "epoch": 0.00021328125,
      "model_forward_time": 0.1149442195892334,
      "step": 34944
    },
    {
      "epoch": 0.00021328125,
      "step": 34944,
      "training_step_time": 0.4431483745574951
    },
    {
      "epoch": 0.000213287353515625,
      "model_forward_time": 0.11468267440795898,
      "step": 34945
    },
    {
      "epoch": 0.000213287353515625,
      "step": 34945,
      "training_step_time": 0.4312148094177246
    },
    {
      "epoch": 0.00021329345703125,
      "model_forward_time": 0.11485719680786133,
      "step": 34946
    },
    {
      "epoch": 0.00021329345703125,
      "step": 34946,
      "training_step_time": 0.3910510540008545
    },
    {
      "epoch": 0.000213299560546875,
      "model_forward_time": 0.11535835266113281,
      "step": 34947
    },
    {
      "epoch": 0.000213299560546875,
      "step": 34947,
      "training_step_time": 0.3903069496154785
    },
    {
      "epoch": 0.0002133056640625,
      "model_forward_time": 0.1152951717376709,
      "step": 34948
    },
    {
      "epoch": 0.0002133056640625,
      "step": 34948,
      "training_step_time": 0.605104923248291
    },
    {
      "epoch": 0.000213311767578125,
      "model_forward_time": 0.1150822639465332,
      "step": 34949
    },
    {
      "epoch": 0.000213311767578125,
      "step": 34949,
      "training_step_time": 0.382127046585083
    },
    {
      "epoch": 0.00021331787109375,
      "grad_norm": 0.14840349555015564,
      "learning_rate": 4.0549737998983e-05,
      "loss": 0.0416,
      "step": 34950
    },
    {
      "epoch": 0.00021331787109375,
      "model_forward_time": 0.11641860008239746,
      "step": 34950
    },
    {
      "epoch": 0.00021331787109375,
      "step": 34950,
      "training_step_time": 0.39246320724487305
    },
    {
      "epoch": 0.000213323974609375,
      "model_forward_time": 0.11434245109558105,
      "step": 34951
    },
    {
      "epoch": 0.000213323974609375,
      "step": 34951,
      "training_step_time": 0.48301076889038086
    },
    {
      "epoch": 0.000213330078125,
      "model_forward_time": 0.1148679256439209,
      "step": 34952
    },
    {
      "epoch": 0.000213330078125,
      "step": 34952,
      "training_step_time": 0.4316415786743164
    },
    {
      "epoch": 0.000213336181640625,
      "model_forward_time": 0.11479783058166504,
      "step": 34953
    },
    {
      "epoch": 0.000213336181640625,
      "step": 34953,
      "training_step_time": 0.4205150604248047
    },
    {
      "epoch": 0.00021334228515625,
      "model_forward_time": 0.11563920974731445,
      "step": 34954
    },
    {
      "epoch": 0.00021334228515625,
      "step": 34954,
      "training_step_time": 0.7671442031860352
    },
    {
      "epoch": 0.000213348388671875,
      "model_forward_time": 0.11467385292053223,
      "step": 34955
    },
    {
      "epoch": 0.000213348388671875,
      "step": 34955,
      "training_step_time": 0.4057905673980713
    },
    {
      "epoch": 0.0002133544921875,
      "model_forward_time": 0.11510014533996582,
      "step": 34956
    },
    {
      "epoch": 0.0002133544921875,
      "step": 34956,
      "training_step_time": 0.38955068588256836
    },
    {
      "epoch": 0.000213360595703125,
      "model_forward_time": 0.11486148834228516,
      "step": 34957
    },
    {
      "epoch": 0.000213360595703125,
      "step": 34957,
      "training_step_time": 0.41318798065185547
    },
    {
      "epoch": 0.00021336669921875,
      "model_forward_time": 0.11445736885070801,
      "step": 34958
    },
    {
      "epoch": 0.00021336669921875,
      "step": 34958,
      "training_step_time": 0.4549102783203125
    },
    {
      "epoch": 0.000213372802734375,
      "model_forward_time": 0.11450433731079102,
      "step": 34959
    },
    {
      "epoch": 0.000213372802734375,
      "step": 34959,
      "training_step_time": 0.4035220146179199
    },
    {
      "epoch": 0.00021337890625,
      "grad_norm": 0.1682741641998291,
      "learning_rate": 4.05226783055914e-05,
      "loss": 0.0415,
      "step": 34960
    },
    {
      "epoch": 0.00021337890625,
      "model_forward_time": 0.11560869216918945,
      "step": 34960
    },
    {
      "epoch": 0.00021337890625,
      "step": 34960,
      "training_step_time": 0.6969985961914062
    },
    {
      "epoch": 0.000213385009765625,
      "model_forward_time": 0.11490440368652344,
      "step": 34961
    },
    {
      "epoch": 0.000213385009765625,
      "step": 34961,
      "training_step_time": 0.39908838272094727
    },
    {
      "epoch": 0.00021339111328125,
      "model_forward_time": 0.11456847190856934,
      "step": 34962
    },
    {
      "epoch": 0.00021339111328125,
      "step": 34962,
      "training_step_time": 0.39218640327453613
    },
    {
      "epoch": 0.000213397216796875,
      "model_forward_time": 0.11447906494140625,
      "step": 34963
    },
    {
      "epoch": 0.000213397216796875,
      "step": 34963,
      "training_step_time": 0.4933171272277832
    },
    {
      "epoch": 0.0002134033203125,
      "model_forward_time": 0.11478972434997559,
      "step": 34964
    },
    {
      "epoch": 0.0002134033203125,
      "step": 34964,
      "training_step_time": 0.48468828201293945
    },
    {
      "epoch": 0.000213409423828125,
      "model_forward_time": 0.11405444145202637,
      "step": 34965
    },
    {
      "epoch": 0.000213409423828125,
      "step": 34965,
      "training_step_time": 0.39727306365966797
    },
    {
      "epoch": 0.00021341552734375,
      "model_forward_time": 0.11471271514892578,
      "step": 34966
    },
    {
      "epoch": 0.00021341552734375,
      "step": 34966,
      "training_step_time": 0.4770662784576416
    },
    {
      "epoch": 0.000213421630859375,
      "model_forward_time": 0.11497211456298828,
      "step": 34967
    },
    {
      "epoch": 0.000213421630859375,
      "step": 34967,
      "training_step_time": 0.4417717456817627
    },
    {
      "epoch": 0.000213427734375,
      "model_forward_time": 0.11462664604187012,
      "step": 34968
    },
    {
      "epoch": 0.000213427734375,
      "step": 34968,
      "training_step_time": 0.4771127700805664
    },
    {
      "epoch": 0.000213433837890625,
      "model_forward_time": 0.11447644233703613,
      "step": 34969
    },
    {
      "epoch": 0.000213433837890625,
      "step": 34969,
      "training_step_time": 0.39000558853149414
    },
    {
      "epoch": 0.00021343994140625,
      "grad_norm": 0.10929984599351883,
      "learning_rate": 4.049562149115992e-05,
      "loss": 0.0419,
      "step": 34970
    },
    {
      "epoch": 0.00021343994140625,
      "model_forward_time": 0.11484885215759277,
      "step": 34970
    },
    {
      "epoch": 0.00021343994140625,
      "step": 34970,
      "training_step_time": 0.4329864978790283
    },
    {
      "epoch": 0.000213446044921875,
      "model_forward_time": 0.11563968658447266,
      "step": 34971
    },
    {
      "epoch": 0.000213446044921875,
      "step": 34971,
      "training_step_time": 0.40180349349975586
    },
    {
      "epoch": 0.0002134521484375,
      "model_forward_time": 0.11616086959838867,
      "step": 34972
    },
    {
      "epoch": 0.0002134521484375,
      "step": 34972,
      "training_step_time": 0.6584153175354004
    },
    {
      "epoch": 0.000213458251953125,
      "model_forward_time": 0.11482477188110352,
      "step": 34973
    },
    {
      "epoch": 0.000213458251953125,
      "step": 34973,
      "training_step_time": 0.40079259872436523
    },
    {
      "epoch": 0.00021346435546875,
      "model_forward_time": 0.11449313163757324,
      "step": 34974
    },
    {
      "epoch": 0.00021346435546875,
      "step": 34974,
      "training_step_time": 0.4152998924255371
    },
    {
      "epoch": 0.000213470458984375,
      "model_forward_time": 0.11451411247253418,
      "step": 34975
    },
    {
      "epoch": 0.000213470458984375,
      "step": 34975,
      "training_step_time": 0.40489888191223145
    },
    {
      "epoch": 0.0002134765625,
      "model_forward_time": 0.11531710624694824,
      "step": 34976
    },
    {
      "epoch": 0.0002134765625,
      "step": 34976,
      "training_step_time": 0.38942813873291016
    },
    {
      "epoch": 0.000213482666015625,
      "model_forward_time": 0.1145944595336914,
      "step": 34977
    },
    {
      "epoch": 0.000213482666015625,
      "step": 34977,
      "training_step_time": 0.4506492614746094
    },
    {
      "epoch": 0.00021348876953125,
      "model_forward_time": 0.11564493179321289,
      "step": 34978
    },
    {
      "epoch": 0.00021348876953125,
      "step": 34978,
      "training_step_time": 0.7731449604034424
    },
    {
      "epoch": 0.000213494873046875,
      "model_forward_time": 0.11430168151855469,
      "step": 34979
    },
    {
      "epoch": 0.000213494873046875,
      "step": 34979,
      "training_step_time": 0.3853020668029785
    },
    {
      "epoch": 0.0002135009765625,
      "grad_norm": 0.14260044693946838,
      "learning_rate": 4.046856756390767e-05,
      "loss": 0.042,
      "step": 34980
    },
    {
      "epoch": 0.0002135009765625,
      "model_forward_time": 0.11504101753234863,
      "step": 34980
    },
    {
      "epoch": 0.0002135009765625,
      "step": 34980,
      "training_step_time": 0.4807260036468506
    },
    {
      "epoch": 0.000213507080078125,
      "model_forward_time": 0.11401081085205078,
      "step": 34981
    },
    {
      "epoch": 0.000213507080078125,
      "step": 34981,
      "training_step_time": 0.47600388526916504
    },
    {
      "epoch": 0.00021351318359375,
      "model_forward_time": 0.11446404457092285,
      "step": 34982
    },
    {
      "epoch": 0.00021351318359375,
      "step": 34982,
      "training_step_time": 0.4147355556488037
    },
    {
      "epoch": 0.000213519287109375,
      "model_forward_time": 0.11628961563110352,
      "step": 34983
    },
    {
      "epoch": 0.000213519287109375,
      "step": 34983,
      "training_step_time": 0.38040733337402344
    },
    {
      "epoch": 0.000213525390625,
      "model_forward_time": 0.11496400833129883,
      "step": 34984
    },
    {
      "epoch": 0.000213525390625,
      "step": 34984,
      "training_step_time": 0.47681546211242676
    },
    {
      "epoch": 0.000213531494140625,
      "model_forward_time": 0.1148843765258789,
      "step": 34985
    },
    {
      "epoch": 0.000213531494140625,
      "step": 34985,
      "training_step_time": 0.39364004135131836
    },
    {
      "epoch": 0.00021353759765625,
      "model_forward_time": 0.11460614204406738,
      "step": 34986
    },
    {
      "epoch": 0.00021353759765625,
      "step": 34986,
      "training_step_time": 0.3909754753112793
    },
    {
      "epoch": 0.000213543701171875,
      "model_forward_time": 0.11570000648498535,
      "step": 34987
    },
    {
      "epoch": 0.000213543701171875,
      "step": 34987,
      "training_step_time": 0.3921780586242676
    },
    {
      "epoch": 0.0002135498046875,
      "model_forward_time": 0.1148827075958252,
      "step": 34988
    },
    {
      "epoch": 0.0002135498046875,
      "step": 34988,
      "training_step_time": 0.39017224311828613
    },
    {
      "epoch": 0.000213555908203125,
      "model_forward_time": 0.11569023132324219,
      "step": 34989
    },
    {
      "epoch": 0.000213555908203125,
      "step": 34989,
      "training_step_time": 0.38979434967041016
    },
    {
      "epoch": 0.00021356201171875,
      "grad_norm": 0.07739892601966858,
      "learning_rate": 4.044151653205292e-05,
      "loss": 0.0375,
      "step": 34990
    },
    {
      "epoch": 0.00021356201171875,
      "model_forward_time": 0.11509013175964355,
      "step": 34990
    },
    {
      "epoch": 0.00021356201171875,
      "step": 34990,
      "training_step_time": 1.0511608123779297
    },
    {
      "epoch": 0.000213568115234375,
      "model_forward_time": 0.11437845230102539,
      "step": 34991
    },
    {
      "epoch": 0.000213568115234375,
      "step": 34991,
      "training_step_time": 0.4833049774169922
    },
    {
      "epoch": 0.00021357421875,
      "model_forward_time": 0.11370563507080078,
      "step": 34992
    },
    {
      "epoch": 0.00021357421875,
      "step": 34992,
      "training_step_time": 0.3832418918609619
    },
    {
      "epoch": 0.000213580322265625,
      "model_forward_time": 0.11428332328796387,
      "step": 34993
    },
    {
      "epoch": 0.000213580322265625,
      "step": 34993,
      "training_step_time": 0.47886109352111816
    },
    {
      "epoch": 0.00021358642578125,
      "model_forward_time": 0.11420345306396484,
      "step": 34994
    },
    {
      "epoch": 0.00021358642578125,
      "step": 34994,
      "training_step_time": 0.4604489803314209
    },
    {
      "epoch": 0.000213592529296875,
      "model_forward_time": 0.11457085609436035,
      "step": 34995
    },
    {
      "epoch": 0.000213592529296875,
      "step": 34995,
      "training_step_time": 0.3859078884124756
    },
    {
      "epoch": 0.0002135986328125,
      "model_forward_time": 0.11472797393798828,
      "step": 34996
    },
    {
      "epoch": 0.0002135986328125,
      "step": 34996,
      "training_step_time": 0.6966640949249268
    },
    {
      "epoch": 0.000213604736328125,
      "model_forward_time": 0.11463141441345215,
      "step": 34997
    },
    {
      "epoch": 0.000213604736328125,
      "step": 34997,
      "training_step_time": 0.40392613410949707
    },
    {
      "epoch": 0.00021361083984375,
      "model_forward_time": 0.11417961120605469,
      "step": 34998
    },
    {
      "epoch": 0.00021361083984375,
      "step": 34998,
      "training_step_time": 0.38374876976013184
    },
    {
      "epoch": 0.000213616943359375,
      "model_forward_time": 0.11476874351501465,
      "step": 34999
    },
    {
      "epoch": 0.000213616943359375,
      "step": 34999,
      "training_step_time": 0.38846588134765625
    },
    {
      "epoch": 0.000213623046875,
      "grad_norm": 0.11240088939666748,
      "learning_rate": 4.0414468403813095e-05,
      "loss": 0.0428,
      "step": 35000
    },
    {
      "epoch": 0.000213623046875,
      "model_forward_time": 0.11368298530578613,
      "step": 35000
    },
    {
      "epoch": 0.000213623046875,
      "step": 35000,
      "training_step_time": 0.35922694206237793
    },
    {
      "epoch": 0.000213629150390625,
      "model_forward_time": 0.11308050155639648,
      "step": 35001
    },
    {
      "epoch": 0.000213629150390625,
      "step": 35001,
      "training_step_time": 0.36375904083251953
    },
    {
      "epoch": 0.00021363525390625,
      "model_forward_time": 0.1139366626739502,
      "step": 35002
    },
    {
      "epoch": 0.00021363525390625,
      "step": 35002,
      "training_step_time": 0.37300872802734375
    },
    {
      "epoch": 0.000213641357421875,
      "model_forward_time": 0.11342024803161621,
      "step": 35003
    },
    {
      "epoch": 0.000213641357421875,
      "step": 35003,
      "training_step_time": 0.3900337219238281
    },
    {
      "epoch": 0.0002136474609375,
      "model_forward_time": 0.11413049697875977,
      "step": 35004
    },
    {
      "epoch": 0.0002136474609375,
      "step": 35004,
      "training_step_time": 0.406923770904541
    },
    {
      "epoch": 0.000213653564453125,
      "model_forward_time": 0.11485433578491211,
      "step": 35005
    },
    {
      "epoch": 0.000213653564453125,
      "step": 35005,
      "training_step_time": 0.4450554847717285
    },
    {
      "epoch": 0.00021365966796875,
      "model_forward_time": 0.11482548713684082,
      "step": 35006
    },
    {
      "epoch": 0.00021365966796875,
      "step": 35006,
      "training_step_time": 0.42754268646240234
    },
    {
      "epoch": 0.000213665771484375,
      "model_forward_time": 0.11526036262512207,
      "step": 35007
    },
    {
      "epoch": 0.000213665771484375,
      "step": 35007,
      "training_step_time": 0.36325955390930176
    },
    {
      "epoch": 0.000213671875,
      "model_forward_time": 0.11441421508789062,
      "step": 35008
    },
    {
      "epoch": 0.000213671875,
      "step": 35008,
      "training_step_time": 0.4101245403289795
    },
    {
      "epoch": 0.000213677978515625,
      "model_forward_time": 0.1153724193572998,
      "step": 35009
    },
    {
      "epoch": 0.000213677978515625,
      "step": 35009,
      "training_step_time": 0.49352312088012695
    },
    {
      "epoch": 0.00021368408203125,
      "grad_norm": 0.19798900187015533,
      "learning_rate": 4.038742318740465e-05,
      "loss": 0.0402,
      "step": 35010
    },
    {
      "epoch": 0.00021368408203125,
      "model_forward_time": 0.11500930786132812,
      "step": 35010
    },
    {
      "epoch": 0.00021368408203125,
      "step": 35010,
      "training_step_time": 0.4070744514465332
    },
    {
      "epoch": 0.000213690185546875,
      "model_forward_time": 0.11561179161071777,
      "step": 35011
    },
    {
      "epoch": 0.000213690185546875,
      "step": 35011,
      "training_step_time": 0.3848292827606201
    },
    {
      "epoch": 0.0002136962890625,
      "model_forward_time": 0.11510014533996582,
      "step": 35012
    },
    {
      "epoch": 0.0002136962890625,
      "step": 35012,
      "training_step_time": 0.389786958694458
    },
    {
      "epoch": 0.000213702392578125,
      "model_forward_time": 0.1149742603302002,
      "step": 35013
    },
    {
      "epoch": 0.000213702392578125,
      "step": 35013,
      "training_step_time": 0.39567089080810547
    },
    {
      "epoch": 0.00021370849609375,
      "model_forward_time": 0.11528563499450684,
      "step": 35014
    },
    {
      "epoch": 0.00021370849609375,
      "step": 35014,
      "training_step_time": 0.39048266410827637
    },
    {
      "epoch": 0.000213714599609375,
      "model_forward_time": 0.1203606128692627,
      "step": 35015
    },
    {
      "epoch": 0.000213714599609375,
      "step": 35015,
      "training_step_time": 0.4196946620941162
    },
    {
      "epoch": 0.000213720703125,
      "model_forward_time": 0.11566400527954102,
      "step": 35016
    },
    {
      "epoch": 0.000213720703125,
      "step": 35016,
      "training_step_time": 0.40392112731933594
    },
    {
      "epoch": 0.000213726806640625,
      "model_forward_time": 0.11505293846130371,
      "step": 35017
    },
    {
      "epoch": 0.000213726806640625,
      "step": 35017,
      "training_step_time": 0.4224557876586914
    },
    {
      "epoch": 0.00021373291015625,
      "model_forward_time": 0.11596393585205078,
      "step": 35018
    },
    {
      "epoch": 0.00021373291015625,
      "step": 35018,
      "training_step_time": 0.3949434757232666
    },
    {
      "epoch": 0.000213739013671875,
      "model_forward_time": 0.1155250072479248,
      "step": 35019
    },
    {
      "epoch": 0.000213739013671875,
      "step": 35019,
      "training_step_time": 0.4566326141357422
    },
    {
      "epoch": 0.0002137451171875,
      "grad_norm": 0.15161944925785065,
      "learning_rate": 4.036038089104326e-05,
      "loss": 0.044,
      "step": 35020
    },
    {
      "epoch": 0.0002137451171875,
      "model_forward_time": 0.11522412300109863,
      "step": 35020
    },
    {
      "epoch": 0.0002137451171875,
      "step": 35020,
      "training_step_time": 0.42732977867126465
    },
    {
      "epoch": 0.000213751220703125,
      "model_forward_time": 0.11536836624145508,
      "step": 35021
    },
    {
      "epoch": 0.000213751220703125,
      "step": 35021,
      "training_step_time": 0.4929697513580322
    },
    {
      "epoch": 0.00021375732421875,
      "model_forward_time": 0.11862778663635254,
      "step": 35022
    },
    {
      "epoch": 0.00021375732421875,
      "step": 35022,
      "training_step_time": 0.4052889347076416
    },
    {
      "epoch": 0.000213763427734375,
      "model_forward_time": 0.11577677726745605,
      "step": 35023
    },
    {
      "epoch": 0.000213763427734375,
      "step": 35023,
      "training_step_time": 0.4678161144256592
    },
    {
      "epoch": 0.00021376953125,
      "model_forward_time": 0.11772799491882324,
      "step": 35024
    },
    {
      "epoch": 0.00021376953125,
      "step": 35024,
      "training_step_time": 0.41549062728881836
    },
    {
      "epoch": 0.000213775634765625,
      "model_forward_time": 0.11741209030151367,
      "step": 35025
    },
    {
      "epoch": 0.000213775634765625,
      "step": 35025,
      "training_step_time": 0.38144731521606445
    },
    {
      "epoch": 0.00021378173828125,
      "model_forward_time": 0.11823821067810059,
      "step": 35026
    },
    {
      "epoch": 0.00021378173828125,
      "step": 35026,
      "training_step_time": 0.37872910499572754
    },
    {
      "epoch": 0.000213787841796875,
      "model_forward_time": 0.11774015426635742,
      "step": 35027
    },
    {
      "epoch": 0.000213787841796875,
      "step": 35027,
      "training_step_time": 0.3819255828857422
    },
    {
      "epoch": 0.0002137939453125,
      "model_forward_time": 0.11808896064758301,
      "step": 35028
    },
    {
      "epoch": 0.0002137939453125,
      "step": 35028,
      "training_step_time": 0.39241957664489746
    },
    {
      "epoch": 0.000213800048828125,
      "model_forward_time": 0.11513328552246094,
      "step": 35029
    },
    {
      "epoch": 0.000213800048828125,
      "step": 35029,
      "training_step_time": 0.3963155746459961
    },
    {
      "epoch": 0.00021380615234375,
      "grad_norm": 0.1187896877527237,
      "learning_rate": 4.0333341522943614e-05,
      "loss": 0.0441,
      "step": 35030
    },
    {
      "epoch": 0.00021380615234375,
      "model_forward_time": 0.11546897888183594,
      "step": 35030
    },
    {
      "epoch": 0.00021380615234375,
      "step": 35030,
      "training_step_time": 0.39250755310058594
    },
    {
      "epoch": 0.000213812255859375,
      "model_forward_time": 0.11559939384460449,
      "step": 35031
    },
    {
      "epoch": 0.000213812255859375,
      "step": 35031,
      "training_step_time": 0.40079808235168457
    },
    {
      "epoch": 0.000213818359375,
      "model_forward_time": 0.11500024795532227,
      "step": 35032
    },
    {
      "epoch": 0.000213818359375,
      "step": 35032,
      "training_step_time": 0.39804697036743164
    },
    {
      "epoch": 0.000213824462890625,
      "model_forward_time": 0.11605477333068848,
      "step": 35033
    },
    {
      "epoch": 0.000213824462890625,
      "step": 35033,
      "training_step_time": 0.4084651470184326
    },
    {
      "epoch": 0.00021383056640625,
      "model_forward_time": 0.11525273323059082,
      "step": 35034
    },
    {
      "epoch": 0.00021383056640625,
      "step": 35034,
      "training_step_time": 0.4530029296875
    },
    {
      "epoch": 0.000213836669921875,
      "model_forward_time": 0.11476898193359375,
      "step": 35035
    },
    {
      "epoch": 0.000213836669921875,
      "step": 35035,
      "training_step_time": 0.47499752044677734
    },
    {
      "epoch": 0.0002138427734375,
      "model_forward_time": 0.11564350128173828,
      "step": 35036
    },
    {
      "epoch": 0.0002138427734375,
      "step": 35036,
      "training_step_time": 0.48848915100097656
    },
    {
      "epoch": 0.000213848876953125,
      "model_forward_time": 0.11472368240356445,
      "step": 35037
    },
    {
      "epoch": 0.000213848876953125,
      "step": 35037,
      "training_step_time": 0.5037837028503418
    },
    {
      "epoch": 0.00021385498046875,
      "model_forward_time": 0.11512041091918945,
      "step": 35038
    },
    {
      "epoch": 0.00021385498046875,
      "step": 35038,
      "training_step_time": 0.5083098411560059
    },
    {
      "epoch": 0.000213861083984375,
      "model_forward_time": 0.11501002311706543,
      "step": 35039
    },
    {
      "epoch": 0.000213861083984375,
      "step": 35039,
      "training_step_time": 0.38893771171569824
    },
    {
      "epoch": 0.0002138671875,
      "grad_norm": 0.19597508013248444,
      "learning_rate": 4.0306305091319595e-05,
      "loss": 0.0445,
      "step": 35040
    },
    {
      "epoch": 0.0002138671875,
      "model_forward_time": 0.1145162582397461,
      "step": 35040
    },
    {
      "epoch": 0.0002138671875,
      "step": 35040,
      "training_step_time": 0.40050554275512695
    },
    {
      "epoch": 0.000213873291015625,
      "model_forward_time": 0.1148824691772461,
      "step": 35041
    },
    {
      "epoch": 0.000213873291015625,
      "step": 35041,
      "training_step_time": 0.39988112449645996
    },
    {
      "epoch": 0.00021387939453125,
      "model_forward_time": 0.11572527885437012,
      "step": 35042
    },
    {
      "epoch": 0.00021387939453125,
      "step": 35042,
      "training_step_time": 0.38628244400024414
    },
    {
      "epoch": 0.000213885498046875,
      "model_forward_time": 0.11570429801940918,
      "step": 35043
    },
    {
      "epoch": 0.000213885498046875,
      "step": 35043,
      "training_step_time": 0.3839900493621826
    },
    {
      "epoch": 0.0002138916015625,
      "model_forward_time": 0.11510848999023438,
      "step": 35044
    },
    {
      "epoch": 0.0002138916015625,
      "step": 35044,
      "training_step_time": 0.3895728588104248
    },
    {
      "epoch": 0.000213897705078125,
      "model_forward_time": 0.11581110954284668,
      "step": 35045
    },
    {
      "epoch": 0.000213897705078125,
      "step": 35045,
      "training_step_time": 0.38202762603759766
    },
    {
      "epoch": 0.00021390380859375,
      "model_forward_time": 0.11602330207824707,
      "step": 35046
    },
    {
      "epoch": 0.00021390380859375,
      "step": 35046,
      "training_step_time": 0.39248037338256836
    },
    {
      "epoch": 0.000213909912109375,
      "model_forward_time": 0.11605644226074219,
      "step": 35047
    },
    {
      "epoch": 0.000213909912109375,
      "step": 35047,
      "training_step_time": 0.39853882789611816
    },
    {
      "epoch": 0.000213916015625,
      "model_forward_time": 0.11502289772033691,
      "step": 35048
    },
    {
      "epoch": 0.000213916015625,
      "step": 35048,
      "training_step_time": 0.39025378227233887
    },
    {
      "epoch": 0.000213922119140625,
      "model_forward_time": 0.11575746536254883,
      "step": 35049
    },
    {
      "epoch": 0.000213922119140625,
      "step": 35049,
      "training_step_time": 0.4425852298736572
    },
    {
      "epoch": 0.00021392822265625,
      "grad_norm": 0.10000809282064438,
      "learning_rate": 4.0279271604384144e-05,
      "loss": 0.0466,
      "step": 35050
    },
    {
      "epoch": 0.00021392822265625,
      "model_forward_time": 0.11623454093933105,
      "step": 35050
    },
    {
      "epoch": 0.00021392822265625,
      "step": 35050,
      "training_step_time": 0.49282407760620117
    },
    {
      "epoch": 0.000213934326171875,
      "model_forward_time": 0.11487340927124023,
      "step": 35051
    },
    {
      "epoch": 0.000213934326171875,
      "step": 35051,
      "training_step_time": 0.37810301780700684
    },
    {
      "epoch": 0.0002139404296875,
      "model_forward_time": 0.14351749420166016,
      "step": 35052
    },
    {
      "epoch": 0.0002139404296875,
      "step": 35052,
      "training_step_time": 0.438065767288208
    },
    {
      "epoch": 0.000213946533203125,
      "model_forward_time": 0.11889123916625977,
      "step": 35053
    },
    {
      "epoch": 0.000213946533203125,
      "step": 35053,
      "training_step_time": 0.4285147190093994
    },
    {
      "epoch": 0.00021395263671875,
      "model_forward_time": 0.11771249771118164,
      "step": 35054
    },
    {
      "epoch": 0.00021395263671875,
      "step": 35054,
      "training_step_time": 0.38970518112182617
    },
    {
      "epoch": 0.000213958740234375,
      "model_forward_time": 0.11856746673583984,
      "step": 35055
    },
    {
      "epoch": 0.000213958740234375,
      "step": 35055,
      "training_step_time": 0.38730597496032715
    },
    {
      "epoch": 0.00021396484375,
      "model_forward_time": 0.1172795295715332,
      "step": 35056
    },
    {
      "epoch": 0.00021396484375,
      "step": 35056,
      "training_step_time": 0.5817499160766602
    },
    {
      "epoch": 0.000213970947265625,
      "model_forward_time": 0.11536931991577148,
      "step": 35057
    },
    {
      "epoch": 0.000213970947265625,
      "step": 35057,
      "training_step_time": 0.3944852352142334
    },
    {
      "epoch": 0.00021397705078125,
      "model_forward_time": 0.1158297061920166,
      "step": 35058
    },
    {
      "epoch": 0.00021397705078125,
      "step": 35058,
      "training_step_time": 0.3957808017730713
    },
    {
      "epoch": 0.000213983154296875,
      "model_forward_time": 0.11857843399047852,
      "step": 35059
    },
    {
      "epoch": 0.000213983154296875,
      "step": 35059,
      "training_step_time": 0.40105390548706055
    },
    {
      "epoch": 0.0002139892578125,
      "grad_norm": 0.1557847112417221,
      "learning_rate": 4.0252241070349304e-05,
      "loss": 0.0414,
      "step": 35060
    },
    {
      "epoch": 0.0002139892578125,
      "model_forward_time": 0.11611151695251465,
      "step": 35060
    },
    {
      "epoch": 0.0002139892578125,
      "step": 35060,
      "training_step_time": 0.3904225826263428
    },
    {
      "epoch": 0.000213995361328125,
      "model_forward_time": 0.11576437950134277,
      "step": 35061
    },
    {
      "epoch": 0.000213995361328125,
      "step": 35061,
      "training_step_time": 0.3838324546813965
    },
    {
      "epoch": 0.00021400146484375,
      "model_forward_time": 0.11510729789733887,
      "step": 35062
    },
    {
      "epoch": 0.00021400146484375,
      "step": 35062,
      "training_step_time": 0.8285119533538818
    },
    {
      "epoch": 0.000214007568359375,
      "model_forward_time": 0.11558270454406738,
      "step": 35063
    },
    {
      "epoch": 0.000214007568359375,
      "step": 35063,
      "training_step_time": 0.5039000511169434
    },
    {
      "epoch": 0.000214013671875,
      "model_forward_time": 0.11508035659790039,
      "step": 35064
    },
    {
      "epoch": 0.000214013671875,
      "step": 35064,
      "training_step_time": 0.43941211700439453
    },
    {
      "epoch": 0.000214019775390625,
      "model_forward_time": 0.11503839492797852,
      "step": 35065
    },
    {
      "epoch": 0.000214019775390625,
      "step": 35065,
      "training_step_time": 0.479520320892334
    },
    {
      "epoch": 0.00021402587890625,
      "model_forward_time": 0.11398911476135254,
      "step": 35066
    },
    {
      "epoch": 0.00021402587890625,
      "step": 35066,
      "training_step_time": 0.4740734100341797
    },
    {
      "epoch": 0.000214031982421875,
      "model_forward_time": 0.1154489517211914,
      "step": 35067
    },
    {
      "epoch": 0.000214031982421875,
      "step": 35067,
      "training_step_time": 0.39666175842285156
    },
    {
      "epoch": 0.0002140380859375,
      "model_forward_time": 0.11508035659790039,
      "step": 35068
    },
    {
      "epoch": 0.0002140380859375,
      "step": 35068,
      "training_step_time": 0.627709150314331
    },
    {
      "epoch": 0.000214044189453125,
      "model_forward_time": 0.11692190170288086,
      "step": 35069
    },
    {
      "epoch": 0.000214044189453125,
      "step": 35069,
      "training_step_time": 0.38840413093566895
    },
    {
      "epoch": 0.00021405029296875,
      "grad_norm": 0.09480904787778854,
      "learning_rate": 4.0225213497426276e-05,
      "loss": 0.0462,
      "step": 35070
    },
    {
      "epoch": 0.00021405029296875,
      "model_forward_time": 0.11521196365356445,
      "step": 35070
    },
    {
      "epoch": 0.00021405029296875,
      "step": 35070,
      "training_step_time": 0.39302706718444824
    },
    {
      "epoch": 0.000214056396484375,
      "model_forward_time": 0.11528563499450684,
      "step": 35071
    },
    {
      "epoch": 0.000214056396484375,
      "step": 35071,
      "training_step_time": 0.3899815082550049
    },
    {
      "epoch": 0.0002140625,
      "model_forward_time": 0.11563563346862793,
      "step": 35072
    },
    {
      "epoch": 0.0002140625,
      "step": 35072,
      "training_step_time": 0.38182783126831055
    },
    {
      "epoch": 0.000214068603515625,
      "model_forward_time": 0.11598515510559082,
      "step": 35073
    },
    {
      "epoch": 0.000214068603515625,
      "step": 35073,
      "training_step_time": 0.38679981231689453
    },
    {
      "epoch": 0.00021407470703125,
      "model_forward_time": 0.11629509925842285,
      "step": 35074
    },
    {
      "epoch": 0.00021407470703125,
      "step": 35074,
      "training_step_time": 0.7983295917510986
    },
    {
      "epoch": 0.000214080810546875,
      "model_forward_time": 0.11434292793273926,
      "step": 35075
    },
    {
      "epoch": 0.000214080810546875,
      "step": 35075,
      "training_step_time": 0.4564993381500244
    },
    {
      "epoch": 0.0002140869140625,
      "model_forward_time": 0.11504340171813965,
      "step": 35076
    },
    {
      "epoch": 0.0002140869140625,
      "step": 35076,
      "training_step_time": 0.48926210403442383
    },
    {
      "epoch": 0.000214093017578125,
      "model_forward_time": 0.11450552940368652,
      "step": 35077
    },
    {
      "epoch": 0.000214093017578125,
      "step": 35077,
      "training_step_time": 0.48902177810668945
    },
    {
      "epoch": 0.00021409912109375,
      "model_forward_time": 0.11600732803344727,
      "step": 35078
    },
    {
      "epoch": 0.00021409912109375,
      "step": 35078,
      "training_step_time": 0.49631404876708984
    },
    {
      "epoch": 0.000214105224609375,
      "model_forward_time": 0.11490797996520996,
      "step": 35079
    },
    {
      "epoch": 0.000214105224609375,
      "step": 35079,
      "training_step_time": 0.4685378074645996
    },
    {
      "epoch": 0.000214111328125,
      "grad_norm": 0.166171595454216,
      "learning_rate": 4.019818889382528e-05,
      "loss": 0.0439,
      "step": 35080
    },
    {
      "epoch": 0.000214111328125,
      "model_forward_time": 0.11500167846679688,
      "step": 35080
    },
    {
      "epoch": 0.000214111328125,
      "step": 35080,
      "training_step_time": 0.3873472213745117
    },
    {
      "epoch": 0.000214117431640625,
      "model_forward_time": 0.11486244201660156,
      "step": 35081
    },
    {
      "epoch": 0.000214117431640625,
      "step": 35081,
      "training_step_time": 0.38335132598876953
    },
    {
      "epoch": 0.00021412353515625,
      "model_forward_time": 0.11477923393249512,
      "step": 35082
    },
    {
      "epoch": 0.00021412353515625,
      "step": 35082,
      "training_step_time": 0.403256893157959
    },
    {
      "epoch": 0.000214129638671875,
      "model_forward_time": 0.11578702926635742,
      "step": 35083
    },
    {
      "epoch": 0.000214129638671875,
      "step": 35083,
      "training_step_time": 0.37953734397888184
    },
    {
      "epoch": 0.0002141357421875,
      "model_forward_time": 0.1152040958404541,
      "step": 35084
    },
    {
      "epoch": 0.0002141357421875,
      "step": 35084,
      "training_step_time": 0.38184118270874023
    },
    {
      "epoch": 0.000214141845703125,
      "model_forward_time": 0.11610603332519531,
      "step": 35085
    },
    {
      "epoch": 0.000214141845703125,
      "step": 35085,
      "training_step_time": 0.3917734622955322
    },
    {
      "epoch": 0.00021414794921875,
      "model_forward_time": 0.11521315574645996,
      "step": 35086
    },
    {
      "epoch": 0.00021414794921875,
      "step": 35086,
      "training_step_time": 0.7370061874389648
    },
    {
      "epoch": 0.000214154052734375,
      "model_forward_time": 0.11494112014770508,
      "step": 35087
    },
    {
      "epoch": 0.000214154052734375,
      "step": 35087,
      "training_step_time": 0.40605688095092773
    },
    {
      "epoch": 0.00021416015625,
      "model_forward_time": 0.11588501930236816,
      "step": 35088
    },
    {
      "epoch": 0.00021416015625,
      "step": 35088,
      "training_step_time": 0.3977181911468506
    },
    {
      "epoch": 0.000214166259765625,
      "model_forward_time": 0.1153409481048584,
      "step": 35089
    },
    {
      "epoch": 0.000214166259765625,
      "step": 35089,
      "training_step_time": 0.45726823806762695
    },
    {
      "epoch": 0.00021417236328125,
      "grad_norm": 0.17015640437602997,
      "learning_rate": 4.0171167267755696e-05,
      "loss": 0.042,
      "step": 35090
    },
    {
      "epoch": 0.00021417236328125,
      "model_forward_time": 0.1147313117980957,
      "step": 35090
    },
    {
      "epoch": 0.00021417236328125,
      "step": 35090,
      "training_step_time": 0.48400282859802246
    },
    {
      "epoch": 0.000214178466796875,
      "model_forward_time": 0.11452436447143555,
      "step": 35091
    },
    {
      "epoch": 0.000214178466796875,
      "step": 35091,
      "training_step_time": 0.3633408546447754
    },
    {
      "epoch": 0.0002141845703125,
      "model_forward_time": 0.11511349678039551,
      "step": 35092
    },
    {
      "epoch": 0.0002141845703125,
      "step": 35092,
      "training_step_time": 0.4865999221801758
    },
    {
      "epoch": 0.000214190673828125,
      "model_forward_time": 0.11506199836730957,
      "step": 35093
    },
    {
      "epoch": 0.000214190673828125,
      "step": 35093,
      "training_step_time": 0.4784841537475586
    },
    {
      "epoch": 0.00021419677734375,
      "model_forward_time": 0.11419320106506348,
      "step": 35094
    },
    {
      "epoch": 0.00021419677734375,
      "step": 35094,
      "training_step_time": 0.39713144302368164
    },
    {
      "epoch": 0.000214202880859375,
      "model_forward_time": 0.11447858810424805,
      "step": 35095
    },
    {
      "epoch": 0.000214202880859375,
      "step": 35095,
      "training_step_time": 0.4051988124847412
    },
    {
      "epoch": 0.000214208984375,
      "model_forward_time": 0.1157681941986084,
      "step": 35096
    },
    {
      "epoch": 0.000214208984375,
      "step": 35096,
      "training_step_time": 0.4252746105194092
    },
    {
      "epoch": 0.000214215087890625,
      "model_forward_time": 0.11460304260253906,
      "step": 35097
    },
    {
      "epoch": 0.000214215087890625,
      "step": 35097,
      "training_step_time": 0.3941981792449951
    },
    {
      "epoch": 0.00021422119140625,
      "model_forward_time": 0.11518073081970215,
      "step": 35098
    },
    {
      "epoch": 0.00021422119140625,
      "step": 35098,
      "training_step_time": 0.6776926517486572
    },
    {
      "epoch": 0.000214227294921875,
      "model_forward_time": 0.11538910865783691,
      "step": 35099
    },
    {
      "epoch": 0.000214227294921875,
      "step": 35099,
      "training_step_time": 0.39243531227111816
    },
    {
      "epoch": 0.0002142333984375,
      "grad_norm": 0.14205777645111084,
      "learning_rate": 4.0144148627425993e-05,
      "loss": 0.0438,
      "step": 35100
    },
    {
      "epoch": 0.0002142333984375,
      "model_forward_time": 0.1150808334350586,
      "step": 35100
    },
    {
      "epoch": 0.0002142333984375,
      "step": 35100,
      "training_step_time": 0.40001988410949707
    },
    {
      "epoch": 0.000214239501953125,
      "model_forward_time": 0.11534452438354492,
      "step": 35101
    },
    {
      "epoch": 0.000214239501953125,
      "step": 35101,
      "training_step_time": 0.39400529861450195
    },
    {
      "epoch": 0.00021424560546875,
      "model_forward_time": 0.11492800712585449,
      "step": 35102
    },
    {
      "epoch": 0.00021424560546875,
      "step": 35102,
      "training_step_time": 0.4446239471435547
    },
    {
      "epoch": 0.000214251708984375,
      "model_forward_time": 0.11533689498901367,
      "step": 35103
    },
    {
      "epoch": 0.000214251708984375,
      "step": 35103,
      "training_step_time": 0.4158134460449219
    },
    {
      "epoch": 0.0002142578125,
      "model_forward_time": 0.11559033393859863,
      "step": 35104
    },
    {
      "epoch": 0.0002142578125,
      "step": 35104,
      "training_step_time": 0.7603883743286133
    },
    {
      "epoch": 0.000214263916015625,
      "model_forward_time": 0.11453509330749512,
      "step": 35105
    },
    {
      "epoch": 0.000214263916015625,
      "step": 35105,
      "training_step_time": 0.41039371490478516
    },
    {
      "epoch": 0.00021427001953125,
      "model_forward_time": 0.11516642570495605,
      "step": 35106
    },
    {
      "epoch": 0.00021427001953125,
      "step": 35106,
      "training_step_time": 0.413684606552124
    },
    {
      "epoch": 0.000214276123046875,
      "model_forward_time": 0.11461448669433594,
      "step": 35107
    },
    {
      "epoch": 0.000214276123046875,
      "step": 35107,
      "training_step_time": 0.38491296768188477
    },
    {
      "epoch": 0.0002142822265625,
      "model_forward_time": 0.11520147323608398,
      "step": 35108
    },
    {
      "epoch": 0.0002142822265625,
      "step": 35108,
      "training_step_time": 0.3918948173522949
    },
    {
      "epoch": 0.000214288330078125,
      "model_forward_time": 0.11455035209655762,
      "step": 35109
    },
    {
      "epoch": 0.000214288330078125,
      "step": 35109,
      "training_step_time": 0.4021115303039551
    },
    {
      "epoch": 0.00021429443359375,
      "grad_norm": 0.10603144764900208,
      "learning_rate": 4.0117132981043693e-05,
      "loss": 0.0424,
      "step": 35110
    },
    {
      "epoch": 0.00021429443359375,
      "model_forward_time": 0.11511468887329102,
      "step": 35110
    },
    {
      "epoch": 0.00021429443359375,
      "step": 35110,
      "training_step_time": 0.8225445747375488
    },
    {
      "epoch": 0.000214300537109375,
      "model_forward_time": 0.11400842666625977,
      "step": 35111
    },
    {
      "epoch": 0.000214300537109375,
      "step": 35111,
      "training_step_time": 0.4648759365081787
    },
    {
      "epoch": 0.000214306640625,
      "model_forward_time": 0.11470937728881836,
      "step": 35112
    },
    {
      "epoch": 0.000214306640625,
      "step": 35112,
      "training_step_time": 0.41458582878112793
    },
    {
      "epoch": 0.000214312744140625,
      "model_forward_time": 0.11435198783874512,
      "step": 35113
    },
    {
      "epoch": 0.000214312744140625,
      "step": 35113,
      "training_step_time": 0.38626980781555176
    },
    {
      "epoch": 0.00021431884765625,
      "model_forward_time": 0.11464309692382812,
      "step": 35114
    },
    {
      "epoch": 0.00021431884765625,
      "step": 35114,
      "training_step_time": 0.3815910816192627
    },
    {
      "epoch": 0.000214324951171875,
      "model_forward_time": 0.1145637035369873,
      "step": 35115
    },
    {
      "epoch": 0.000214324951171875,
      "step": 35115,
      "training_step_time": 0.4558422565460205
    },
    {
      "epoch": 0.0002143310546875,
      "model_forward_time": 0.11478734016418457,
      "step": 35116
    },
    {
      "epoch": 0.0002143310546875,
      "step": 35116,
      "training_step_time": 0.5506484508514404
    },
    {
      "epoch": 0.000214337158203125,
      "model_forward_time": 0.11523318290710449,
      "step": 35117
    },
    {
      "epoch": 0.000214337158203125,
      "step": 35117,
      "training_step_time": 0.48287081718444824
    },
    {
      "epoch": 0.00021434326171875,
      "model_forward_time": 0.11514496803283691,
      "step": 35118
    },
    {
      "epoch": 0.00021434326171875,
      "step": 35118,
      "training_step_time": 0.47173261642456055
    },
    {
      "epoch": 0.000214349365234375,
      "model_forward_time": 0.1146554946899414,
      "step": 35119
    },
    {
      "epoch": 0.000214349365234375,
      "step": 35119,
      "training_step_time": 0.48198413848876953
    },
    {
      "epoch": 0.00021435546875,
      "grad_norm": 0.11741150170564651,
      "learning_rate": 4.0090120336815474e-05,
      "loss": 0.0413,
      "step": 35120
    },
    {
      "epoch": 0.00021435546875,
      "model_forward_time": 0.11519002914428711,
      "step": 35120
    },
    {
      "epoch": 0.00021435546875,
      "step": 35120,
      "training_step_time": 0.39263224601745605
    },
    {
      "epoch": 0.000214361572265625,
      "model_forward_time": 0.11527347564697266,
      "step": 35121
    },
    {
      "epoch": 0.000214361572265625,
      "step": 35121,
      "training_step_time": 0.3886251449584961
    },
    {
      "epoch": 0.00021436767578125,
      "model_forward_time": 0.11513757705688477,
      "step": 35122
    },
    {
      "epoch": 0.00021436767578125,
      "step": 35122,
      "training_step_time": 0.4024832248687744
    },
    {
      "epoch": 0.000214373779296875,
      "model_forward_time": 0.11589574813842773,
      "step": 35123
    },
    {
      "epoch": 0.000214373779296875,
      "step": 35123,
      "training_step_time": 0.3896017074584961
    },
    {
      "epoch": 0.0002143798828125,
      "model_forward_time": 0.11642813682556152,
      "step": 35124
    },
    {
      "epoch": 0.0002143798828125,
      "step": 35124,
      "training_step_time": 0.3990001678466797
    },
    {
      "epoch": 0.000214385986328125,
      "model_forward_time": 0.11542844772338867,
      "step": 35125
    },
    {
      "epoch": 0.000214385986328125,
      "step": 35125,
      "training_step_time": 0.41028714179992676
    },
    {
      "epoch": 0.00021439208984375,
      "model_forward_time": 0.1157987117767334,
      "step": 35126
    },
    {
      "epoch": 0.00021439208984375,
      "step": 35126,
      "training_step_time": 0.4004533290863037
    },
    {
      "epoch": 0.000214398193359375,
      "model_forward_time": 0.11611342430114746,
      "step": 35127
    },
    {
      "epoch": 0.000214398193359375,
      "step": 35127,
      "training_step_time": 0.39771199226379395
    },
    {
      "epoch": 0.000214404296875,
      "model_forward_time": 0.11572694778442383,
      "step": 35128
    },
    {
      "epoch": 0.000214404296875,
      "step": 35128,
      "training_step_time": 0.8505494594573975
    },
    {
      "epoch": 0.000214410400390625,
      "model_forward_time": 0.11522483825683594,
      "step": 35129
    },
    {
      "epoch": 0.000214410400390625,
      "step": 35129,
      "training_step_time": 0.43404650688171387
    },
    {
      "epoch": 0.00021441650390625,
      "grad_norm": 0.12177587300539017,
      "learning_rate": 4.006311070294702e-05,
      "loss": 0.0436,
      "step": 35130
    },
    {
      "epoch": 0.00021441650390625,
      "model_forward_time": 0.11480545997619629,
      "step": 35130
    },
    {
      "epoch": 0.00021441650390625,
      "step": 35130,
      "training_step_time": 0.5181858539581299
    },
    {
      "epoch": 0.000214422607421875,
      "model_forward_time": 0.11436271667480469,
      "step": 35131
    },
    {
      "epoch": 0.000214422607421875,
      "step": 35131,
      "training_step_time": 0.36138367652893066
    },
    {
      "epoch": 0.0002144287109375,
      "model_forward_time": 0.11502742767333984,
      "step": 35132
    },
    {
      "epoch": 0.0002144287109375,
      "step": 35132,
      "training_step_time": 0.42888903617858887
    },
    {
      "epoch": 0.000214434814453125,
      "model_forward_time": 0.11449980735778809,
      "step": 35133
    },
    {
      "epoch": 0.000214434814453125,
      "step": 35133,
      "training_step_time": 0.37915778160095215
    },
    {
      "epoch": 0.00021444091796875,
      "model_forward_time": 0.11551380157470703,
      "step": 35134
    },
    {
      "epoch": 0.00021444091796875,
      "step": 35134,
      "training_step_time": 0.5798506736755371
    },
    {
      "epoch": 0.000214447021484375,
      "model_forward_time": 0.11439919471740723,
      "step": 35135
    },
    {
      "epoch": 0.000214447021484375,
      "step": 35135,
      "training_step_time": 0.38909316062927246
    },
    {
      "epoch": 0.000214453125,
      "model_forward_time": 0.11524486541748047,
      "step": 35136
    },
    {
      "epoch": 0.000214453125,
      "step": 35136,
      "training_step_time": 0.39519810676574707
    },
    {
      "epoch": 0.000214459228515625,
      "model_forward_time": 0.11510944366455078,
      "step": 35137
    },
    {
      "epoch": 0.000214459228515625,
      "step": 35137,
      "training_step_time": 0.41355228424072266
    },
    {
      "epoch": 0.00021446533203125,
      "model_forward_time": 0.1154325008392334,
      "step": 35138
    },
    {
      "epoch": 0.00021446533203125,
      "step": 35138,
      "training_step_time": 0.4279916286468506
    },
    {
      "epoch": 0.000214471435546875,
      "model_forward_time": 0.1151878833770752,
      "step": 35139
    },
    {
      "epoch": 0.000214471435546875,
      "step": 35139,
      "training_step_time": 0.3852827548980713
    },
    {
      "epoch": 0.0002144775390625,
      "grad_norm": 0.13118040561676025,
      "learning_rate": 4.003610408764317e-05,
      "loss": 0.0424,
      "step": 35140
    },
    {
      "epoch": 0.0002144775390625,
      "model_forward_time": 0.1147913932800293,
      "step": 35140
    },
    {
      "epoch": 0.0002144775390625,
      "step": 35140,
      "training_step_time": 0.6879723072052002
    },
    {
      "epoch": 0.000214483642578125,
      "model_forward_time": 0.11517786979675293,
      "step": 35141
    },
    {
      "epoch": 0.000214483642578125,
      "step": 35141,
      "training_step_time": 0.381040096282959
    },
    {
      "epoch": 0.00021448974609375,
      "model_forward_time": 0.11482548713684082,
      "step": 35142
    },
    {
      "epoch": 0.00021448974609375,
      "step": 35142,
      "training_step_time": 0.4303090572357178
    },
    {
      "epoch": 0.000214495849609375,
      "model_forward_time": 0.11507725715637207,
      "step": 35143
    },
    {
      "epoch": 0.000214495849609375,
      "step": 35143,
      "training_step_time": 0.48422813415527344
    },
    {
      "epoch": 0.000214501953125,
      "model_forward_time": 0.11499428749084473,
      "step": 35144
    },
    {
      "epoch": 0.000214501953125,
      "step": 35144,
      "training_step_time": 0.3892230987548828
    },
    {
      "epoch": 0.000214508056640625,
      "model_forward_time": 0.11513662338256836,
      "step": 35145
    },
    {
      "epoch": 0.000214508056640625,
      "step": 35145,
      "training_step_time": 0.38333749771118164
    },
    {
      "epoch": 0.00021451416015625,
      "model_forward_time": 0.11616373062133789,
      "step": 35146
    },
    {
      "epoch": 0.00021451416015625,
      "step": 35146,
      "training_step_time": 0.6150507926940918
    },
    {
      "epoch": 0.000214520263671875,
      "model_forward_time": 0.11522126197814941,
      "step": 35147
    },
    {
      "epoch": 0.000214520263671875,
      "step": 35147,
      "training_step_time": 0.41351318359375
    },
    {
      "epoch": 0.0002145263671875,
      "model_forward_time": 0.11510157585144043,
      "step": 35148
    },
    {
      "epoch": 0.0002145263671875,
      "step": 35148,
      "training_step_time": 0.3912978172302246
    },
    {
      "epoch": 0.000214532470703125,
      "model_forward_time": 0.11522912979125977,
      "step": 35149
    },
    {
      "epoch": 0.000214532470703125,
      "step": 35149,
      "training_step_time": 0.389880895614624
    },
    {
      "epoch": 0.00021453857421875,
      "grad_norm": 0.11604029685258865,
      "learning_rate": 4.0009100499107824e-05,
      "loss": 0.0466,
      "step": 35150
    },
    {
      "epoch": 0.00021453857421875,
      "model_forward_time": 0.1154775619506836,
      "step": 35150
    },
    {
      "epoch": 0.00021453857421875,
      "step": 35150,
      "training_step_time": 0.45850634574890137
    },
    {
      "epoch": 0.000214544677734375,
      "model_forward_time": 0.11469006538391113,
      "step": 35151
    },
    {
      "epoch": 0.000214544677734375,
      "step": 35151,
      "training_step_time": 0.40807318687438965
    },
    {
      "epoch": 0.00021455078125,
      "model_forward_time": 0.11503791809082031,
      "step": 35152
    },
    {
      "epoch": 0.00021455078125,
      "step": 35152,
      "training_step_time": 0.7874784469604492
    },
    {
      "epoch": 0.000214556884765625,
      "model_forward_time": 0.11465597152709961,
      "step": 35153
    },
    {
      "epoch": 0.000214556884765625,
      "step": 35153,
      "training_step_time": 0.3896901607513428
    },
    {
      "epoch": 0.00021456298828125,
      "model_forward_time": 0.11440062522888184,
      "step": 35154
    },
    {
      "epoch": 0.00021456298828125,
      "step": 35154,
      "training_step_time": 0.385103702545166
    },
    {
      "epoch": 0.000214569091796875,
      "model_forward_time": 0.11493086814880371,
      "step": 35155
    },
    {
      "epoch": 0.000214569091796875,
      "step": 35155,
      "training_step_time": 0.4428849220275879
    },
    {
      "epoch": 0.0002145751953125,
      "model_forward_time": 0.11475992202758789,
      "step": 35156
    },
    {
      "epoch": 0.0002145751953125,
      "step": 35156,
      "training_step_time": 0.467104434967041
    },
    {
      "epoch": 0.000214581298828125,
      "model_forward_time": 0.11557936668395996,
      "step": 35157
    },
    {
      "epoch": 0.000214581298828125,
      "step": 35157,
      "training_step_time": 0.4197549819946289
    },
    {
      "epoch": 0.00021458740234375,
      "model_forward_time": 0.11525201797485352,
      "step": 35158
    },
    {
      "epoch": 0.00021458740234375,
      "step": 35158,
      "training_step_time": 0.6690700054168701
    },
    {
      "epoch": 0.000214593505859375,
      "model_forward_time": 0.11454963684082031,
      "step": 35159
    },
    {
      "epoch": 0.000214593505859375,
      "step": 35159,
      "training_step_time": 0.3884158134460449
    },
    {
      "epoch": 0.000214599609375,
      "grad_norm": 0.13202044367790222,
      "learning_rate": 3.9982099945543945e-05,
      "loss": 0.0422,
      "step": 35160
    },
    {
      "epoch": 0.000214599609375,
      "model_forward_time": 0.11506462097167969,
      "step": 35160
    },
    {
      "epoch": 0.000214599609375,
      "step": 35160,
      "training_step_time": 0.4500150680541992
    },
    {
      "epoch": 0.000214605712890625,
      "model_forward_time": 0.117279052734375,
      "step": 35161
    },
    {
      "epoch": 0.000214605712890625,
      "step": 35161,
      "training_step_time": 0.3895444869995117
    },
    {
      "epoch": 0.00021461181640625,
      "model_forward_time": 0.11486148834228516,
      "step": 35162
    },
    {
      "epoch": 0.00021461181640625,
      "step": 35162,
      "training_step_time": 0.3938627243041992
    },
    {
      "epoch": 0.000214617919921875,
      "model_forward_time": 0.11512207984924316,
      "step": 35163
    },
    {
      "epoch": 0.000214617919921875,
      "step": 35163,
      "training_step_time": 0.4205892086029053
    },
    {
      "epoch": 0.0002146240234375,
      "model_forward_time": 0.11538147926330566,
      "step": 35164
    },
    {
      "epoch": 0.0002146240234375,
      "step": 35164,
      "training_step_time": 0.6979117393493652
    },
    {
      "epoch": 0.000214630126953125,
      "model_forward_time": 0.11491155624389648,
      "step": 35165
    },
    {
      "epoch": 0.000214630126953125,
      "step": 35165,
      "training_step_time": 0.39006948471069336
    },
    {
      "epoch": 0.00021463623046875,
      "model_forward_time": 0.1155691146850586,
      "step": 35166
    },
    {
      "epoch": 0.00021463623046875,
      "step": 35166,
      "training_step_time": 0.3897848129272461
    },
    {
      "epoch": 0.000214642333984375,
      "model_forward_time": 0.11500930786132812,
      "step": 35167
    },
    {
      "epoch": 0.000214642333984375,
      "step": 35167,
      "training_step_time": 0.393934965133667
    },
    {
      "epoch": 0.0002146484375,
      "model_forward_time": 0.11462545394897461,
      "step": 35168
    },
    {
      "epoch": 0.0002146484375,
      "step": 35168,
      "training_step_time": 0.4052555561065674
    },
    {
      "epoch": 0.000214654541015625,
      "model_forward_time": 0.11527848243713379,
      "step": 35169
    },
    {
      "epoch": 0.000214654541015625,
      "step": 35169,
      "training_step_time": 0.4141886234283447
    },
    {
      "epoch": 0.00021466064453125,
      "grad_norm": 0.12723416090011597,
      "learning_rate": 3.99551024351536e-05,
      "loss": 0.0469,
      "step": 35170
    },
    {
      "epoch": 0.00021466064453125,
      "model_forward_time": 0.1146843433380127,
      "step": 35170
    },
    {
      "epoch": 0.00021466064453125,
      "step": 35170,
      "training_step_time": 0.854602575302124
    },
    {
      "epoch": 0.000214666748046875,
      "model_forward_time": 0.11446809768676758,
      "step": 35171
    },
    {
      "epoch": 0.000214666748046875,
      "step": 35171,
      "training_step_time": 0.37764644622802734
    },
    {
      "epoch": 0.0002146728515625,
      "model_forward_time": 0.11448454856872559,
      "step": 35172
    },
    {
      "epoch": 0.0002146728515625,
      "step": 35172,
      "training_step_time": 0.42281603813171387
    },
    {
      "epoch": 0.000214678955078125,
      "model_forward_time": 0.11426019668579102,
      "step": 35173
    },
    {
      "epoch": 0.000214678955078125,
      "step": 35173,
      "training_step_time": 0.41356730461120605
    },
    {
      "epoch": 0.00021468505859375,
      "model_forward_time": 0.11481595039367676,
      "step": 35174
    },
    {
      "epoch": 0.00021468505859375,
      "step": 35174,
      "training_step_time": 0.384263277053833
    },
    {
      "epoch": 0.000214691162109375,
      "model_forward_time": 0.11402726173400879,
      "step": 35175
    },
    {
      "epoch": 0.000214691162109375,
      "step": 35175,
      "training_step_time": 0.41933131217956543
    },
    {
      "epoch": 0.000214697265625,
      "model_forward_time": 0.11507654190063477,
      "step": 35176
    },
    {
      "epoch": 0.000214697265625,
      "step": 35176,
      "training_step_time": 0.8076155185699463
    },
    {
      "epoch": 0.000214703369140625,
      "model_forward_time": 0.1139521598815918,
      "step": 35177
    },
    {
      "epoch": 0.000214703369140625,
      "step": 35177,
      "training_step_time": 0.3914306163787842
    },
    {
      "epoch": 0.00021470947265625,
      "model_forward_time": 0.11437869071960449,
      "step": 35178
    },
    {
      "epoch": 0.00021470947265625,
      "step": 35178,
      "training_step_time": 0.390458345413208
    },
    {
      "epoch": 0.000214715576171875,
      "model_forward_time": 0.11446642875671387,
      "step": 35179
    },
    {
      "epoch": 0.000214715576171875,
      "step": 35179,
      "training_step_time": 0.3923327922821045
    },
    {
      "epoch": 0.0002147216796875,
      "grad_norm": 0.16898806393146515,
      "learning_rate": 3.9928107976137906e-05,
      "loss": 0.0386,
      "step": 35180
    },
    {
      "epoch": 0.0002147216796875,
      "model_forward_time": 0.11461758613586426,
      "step": 35180
    },
    {
      "epoch": 0.0002147216796875,
      "step": 35180,
      "training_step_time": 0.3797471523284912
    },
    {
      "epoch": 0.000214727783203125,
      "model_forward_time": 0.11716794967651367,
      "step": 35181
    },
    {
      "epoch": 0.000214727783203125,
      "step": 35181,
      "training_step_time": 0.43897247314453125
    },
    {
      "epoch": 0.00021473388671875,
      "model_forward_time": 0.11486434936523438,
      "step": 35182
    },
    {
      "epoch": 0.00021473388671875,
      "step": 35182,
      "training_step_time": 0.7801644802093506
    },
    {
      "epoch": 0.000214739990234375,
      "model_forward_time": 0.11574578285217285,
      "step": 35183
    },
    {
      "epoch": 0.000214739990234375,
      "step": 35183,
      "training_step_time": 0.4273703098297119
    },
    {
      "epoch": 0.00021474609375,
      "model_forward_time": 0.11577200889587402,
      "step": 35184
    },
    {
      "epoch": 0.00021474609375,
      "step": 35184,
      "training_step_time": 0.36594176292419434
    },
    {
      "epoch": 0.000214752197265625,
      "model_forward_time": 0.11474990844726562,
      "step": 35185
    },
    {
      "epoch": 0.000214752197265625,
      "step": 35185,
      "training_step_time": 0.4107182025909424
    },
    {
      "epoch": 0.00021475830078125,
      "model_forward_time": 0.11446142196655273,
      "step": 35186
    },
    {
      "epoch": 0.00021475830078125,
      "step": 35186,
      "training_step_time": 0.4081146717071533
    },
    {
      "epoch": 0.000214764404296875,
      "model_forward_time": 0.11398124694824219,
      "step": 35187
    },
    {
      "epoch": 0.000214764404296875,
      "step": 35187,
      "training_step_time": 0.38783788681030273
    },
    {
      "epoch": 0.0002147705078125,
      "model_forward_time": 0.11526823043823242,
      "step": 35188
    },
    {
      "epoch": 0.0002147705078125,
      "step": 35188,
      "training_step_time": 0.5504708290100098
    },
    {
      "epoch": 0.000214776611328125,
      "model_forward_time": 0.11522054672241211,
      "step": 35189
    },
    {
      "epoch": 0.000214776611328125,
      "step": 35189,
      "training_step_time": 0.39352941513061523
    },
    {
      "epoch": 0.00021478271484375,
      "grad_norm": 0.15791413187980652,
      "learning_rate": 3.9901116576697083e-05,
      "loss": 0.0409,
      "step": 35190
    },
    {
      "epoch": 0.00021478271484375,
      "model_forward_time": 0.11544322967529297,
      "step": 35190
    },
    {
      "epoch": 0.00021478271484375,
      "step": 35190,
      "training_step_time": 0.3793148994445801
    },
    {
      "epoch": 0.000214788818359375,
      "model_forward_time": 0.11710000038146973,
      "step": 35191
    },
    {
      "epoch": 0.000214788818359375,
      "step": 35191,
      "training_step_time": 0.3947615623474121
    },
    {
      "epoch": 0.000214794921875,
      "model_forward_time": 0.11495518684387207,
      "step": 35192
    },
    {
      "epoch": 0.000214794921875,
      "step": 35192,
      "training_step_time": 0.391812801361084
    },
    {
      "epoch": 0.000214801025390625,
      "model_forward_time": 0.11551761627197266,
      "step": 35193
    },
    {
      "epoch": 0.000214801025390625,
      "step": 35193,
      "training_step_time": 0.39908814430236816
    },
    {
      "epoch": 0.00021480712890625,
      "model_forward_time": 0.11718058586120605,
      "step": 35194
    },
    {
      "epoch": 0.00021480712890625,
      "step": 35194,
      "training_step_time": 0.863239049911499
    },
    {
      "epoch": 0.000214813232421875,
      "model_forward_time": 0.11414933204650879,
      "step": 35195
    },
    {
      "epoch": 0.000214813232421875,
      "step": 35195,
      "training_step_time": 0.47887730598449707
    },
    {
      "epoch": 0.0002148193359375,
      "model_forward_time": 0.11433053016662598,
      "step": 35196
    },
    {
      "epoch": 0.0002148193359375,
      "step": 35196,
      "training_step_time": 0.3890550136566162
    },
    {
      "epoch": 0.000214825439453125,
      "model_forward_time": 0.11384820938110352,
      "step": 35197
    },
    {
      "epoch": 0.000214825439453125,
      "step": 35197,
      "training_step_time": 0.4644966125488281
    },
    {
      "epoch": 0.00021483154296875,
      "model_forward_time": 0.11449265480041504,
      "step": 35198
    },
    {
      "epoch": 0.00021483154296875,
      "step": 35198,
      "training_step_time": 0.3642239570617676
    },
    {
      "epoch": 0.000214837646484375,
      "model_forward_time": 0.11416029930114746,
      "step": 35199
    },
    {
      "epoch": 0.000214837646484375,
      "step": 35199,
      "training_step_time": 0.4364621639251709
    },
    {
      "epoch": 0.00021484375,
      "grad_norm": 0.1443646103143692,
      "learning_rate": 3.9874128245030404e-05,
      "loss": 0.0428,
      "step": 35200
    },
    {
      "epoch": 0.00021484375,
      "model_forward_time": 0.1147146224975586,
      "step": 35200
    },
    {
      "epoch": 0.00021484375,
      "step": 35200,
      "training_step_time": 0.4832801818847656
    },
    {
      "epoch": 0.000214849853515625,
      "model_forward_time": 0.11527395248413086,
      "step": 35201
    },
    {
      "epoch": 0.000214849853515625,
      "step": 35201,
      "training_step_time": 0.3886115550994873
    },
    {
      "epoch": 0.00021485595703125,
      "model_forward_time": 0.1145329475402832,
      "step": 35202
    },
    {
      "epoch": 0.00021485595703125,
      "step": 35202,
      "training_step_time": 0.38810110092163086
    },
    {
      "epoch": 0.000214862060546875,
      "model_forward_time": 0.11458659172058105,
      "step": 35203
    },
    {
      "epoch": 0.000214862060546875,
      "step": 35203,
      "training_step_time": 0.39756155014038086
    },
    {
      "epoch": 0.0002148681640625,
      "model_forward_time": 0.1156468391418457,
      "step": 35204
    },
    {
      "epoch": 0.0002148681640625,
      "step": 35204,
      "training_step_time": 0.39079809188842773
    },
    {
      "epoch": 0.000214874267578125,
      "model_forward_time": 0.11536431312561035,
      "step": 35205
    },
    {
      "epoch": 0.000214874267578125,
      "step": 35205,
      "training_step_time": 0.3934769630432129
    },
    {
      "epoch": 0.00021488037109375,
      "model_forward_time": 0.11535167694091797,
      "step": 35206
    },
    {
      "epoch": 0.00021488037109375,
      "step": 35206,
      "training_step_time": 0.7376565933227539
    },
    {
      "epoch": 0.000214886474609375,
      "model_forward_time": 0.11500978469848633,
      "step": 35207
    },
    {
      "epoch": 0.000214886474609375,
      "step": 35207,
      "training_step_time": 0.41289806365966797
    },
    {
      "epoch": 0.000214892578125,
      "model_forward_time": 0.11469697952270508,
      "step": 35208
    },
    {
      "epoch": 0.000214892578125,
      "step": 35208,
      "training_step_time": 0.4146726131439209
    },
    {
      "epoch": 0.000214898681640625,
      "model_forward_time": 0.11466693878173828,
      "step": 35209
    },
    {
      "epoch": 0.000214898681640625,
      "step": 35209,
      "training_step_time": 0.487811803817749
    },
    {
      "epoch": 0.00021490478515625,
      "grad_norm": 0.13261567056179047,
      "learning_rate": 3.984714298933619e-05,
      "loss": 0.0418,
      "step": 35210
    },
    {
      "epoch": 0.00021490478515625,
      "model_forward_time": 0.11466097831726074,
      "step": 35210
    },
    {
      "epoch": 0.00021490478515625,
      "step": 35210,
      "training_step_time": 0.4533274173736572
    },
    {
      "epoch": 0.000214910888671875,
      "model_forward_time": 0.11417579650878906,
      "step": 35211
    },
    {
      "epoch": 0.000214910888671875,
      "step": 35211,
      "training_step_time": 0.39842677116394043
    },
    {
      "epoch": 0.0002149169921875,
      "model_forward_time": 0.11512374877929688,
      "step": 35212
    },
    {
      "epoch": 0.0002149169921875,
      "step": 35212,
      "training_step_time": 0.5727450847625732
    },
    {
      "epoch": 0.000214923095703125,
      "model_forward_time": 0.11485815048217773,
      "step": 35213
    },
    {
      "epoch": 0.000214923095703125,
      "step": 35213,
      "training_step_time": 0.4202911853790283
    },
    {
      "epoch": 0.00021492919921875,
      "model_forward_time": 0.11555337905883789,
      "step": 35214
    },
    {
      "epoch": 0.00021492919921875,
      "step": 35214,
      "training_step_time": 0.4041266441345215
    },
    {
      "epoch": 0.000214935302734375,
      "model_forward_time": 0.11466646194458008,
      "step": 35215
    },
    {
      "epoch": 0.000214935302734375,
      "step": 35215,
      "training_step_time": 0.3908882141113281
    },
    {
      "epoch": 0.00021494140625,
      "model_forward_time": 0.11521387100219727,
      "step": 35216
    },
    {
      "epoch": 0.00021494140625,
      "step": 35216,
      "training_step_time": 0.4036226272583008
    },
    {
      "epoch": 0.000214947509765625,
      "model_forward_time": 0.11468362808227539,
      "step": 35217
    },
    {
      "epoch": 0.000214947509765625,
      "step": 35217,
      "training_step_time": 0.3934187889099121
    },
    {
      "epoch": 0.00021495361328125,
      "model_forward_time": 0.11709356307983398,
      "step": 35218
    },
    {
      "epoch": 0.00021495361328125,
      "step": 35218,
      "training_step_time": 0.8389089107513428
    },
    {
      "epoch": 0.000214959716796875,
      "model_forward_time": 0.1141667366027832,
      "step": 35219
    },
    {
      "epoch": 0.000214959716796875,
      "step": 35219,
      "training_step_time": 0.38137388229370117
    },
    {
      "epoch": 0.0002149658203125,
      "grad_norm": 0.15000726282596588,
      "learning_rate": 3.982016081781189e-05,
      "loss": 0.0401,
      "step": 35220
    },
    {
      "epoch": 0.0002149658203125,
      "model_forward_time": 0.11507630348205566,
      "step": 35220
    },
    {
      "epoch": 0.0002149658203125,
      "step": 35220,
      "training_step_time": 0.440950870513916
    },
    {
      "epoch": 0.000214971923828125,
      "model_forward_time": 0.11468148231506348,
      "step": 35221
    },
    {
      "epoch": 0.000214971923828125,
      "step": 35221,
      "training_step_time": 0.40549421310424805
    },
    {
      "epoch": 0.00021497802734375,
      "model_forward_time": 0.11377310752868652,
      "step": 35222
    },
    {
      "epoch": 0.00021497802734375,
      "step": 35222,
      "training_step_time": 0.45014381408691406
    },
    {
      "epoch": 0.000214984130859375,
      "model_forward_time": 0.11460304260253906,
      "step": 35223
    },
    {
      "epoch": 0.000214984130859375,
      "step": 35223,
      "training_step_time": 0.44172191619873047
    },
    {
      "epoch": 0.000214990234375,
      "model_forward_time": 0.1150503158569336,
      "step": 35224
    },
    {
      "epoch": 0.000214990234375,
      "step": 35224,
      "training_step_time": 0.5794596672058105
    },
    {
      "epoch": 0.000214996337890625,
      "model_forward_time": 0.11458468437194824,
      "step": 35225
    },
    {
      "epoch": 0.000214996337890625,
      "step": 35225,
      "training_step_time": 0.3700075149536133
    },
    {
      "epoch": 0.00021500244140625,
      "model_forward_time": 0.11463809013366699,
      "step": 35226
    },
    {
      "epoch": 0.00021500244140625,
      "step": 35226,
      "training_step_time": 0.4103434085845947
    },
    {
      "epoch": 0.000215008544921875,
      "model_forward_time": 0.11445474624633789,
      "step": 35227
    },
    {
      "epoch": 0.000215008544921875,
      "step": 35227,
      "training_step_time": 0.41189074516296387
    },
    {
      "epoch": 0.0002150146484375,
      "model_forward_time": 0.11420440673828125,
      "step": 35228
    },
    {
      "epoch": 0.0002150146484375,
      "step": 35228,
      "training_step_time": 0.38964271545410156
    },
    {
      "epoch": 0.000215020751953125,
      "model_forward_time": 0.11467671394348145,
      "step": 35229
    },
    {
      "epoch": 0.000215020751953125,
      "step": 35229,
      "training_step_time": 0.40227198600769043
    },
    {
      "epoch": 0.00021502685546875,
      "grad_norm": 0.08698596805334091,
      "learning_rate": 3.979318173865393e-05,
      "loss": 0.0359,
      "step": 35230
    },
    {
      "epoch": 0.00021502685546875,
      "model_forward_time": 0.11529922485351562,
      "step": 35230
    },
    {
      "epoch": 0.00021502685546875,
      "step": 35230,
      "training_step_time": 0.6502335071563721
    },
    {
      "epoch": 0.000215032958984375,
      "model_forward_time": 0.1146392822265625,
      "step": 35231
    },
    {
      "epoch": 0.000215032958984375,
      "step": 35231,
      "training_step_time": 0.39495277404785156
    },
    {
      "epoch": 0.0002150390625,
      "model_forward_time": 0.11482048034667969,
      "step": 35232
    },
    {
      "epoch": 0.0002150390625,
      "step": 35232,
      "training_step_time": 0.3886685371398926
    },
    {
      "epoch": 0.000215045166015625,
      "model_forward_time": 0.11479735374450684,
      "step": 35233
    },
    {
      "epoch": 0.000215045166015625,
      "step": 35233,
      "training_step_time": 0.40145134925842285
    },
    {
      "epoch": 0.00021505126953125,
      "model_forward_time": 0.11559367179870605,
      "step": 35234
    },
    {
      "epoch": 0.00021505126953125,
      "step": 35234,
      "training_step_time": 0.42888593673706055
    },
    {
      "epoch": 0.000215057373046875,
      "model_forward_time": 0.11496233940124512,
      "step": 35235
    },
    {
      "epoch": 0.000215057373046875,
      "step": 35235,
      "training_step_time": 0.426577091217041
    },
    {
      "epoch": 0.0002150634765625,
      "model_forward_time": 0.11550283432006836,
      "step": 35236
    },
    {
      "epoch": 0.0002150634765625,
      "step": 35236,
      "training_step_time": 0.9325652122497559
    },
    {
      "epoch": 0.000215069580078125,
      "model_forward_time": 0.11438632011413574,
      "step": 35237
    },
    {
      "epoch": 0.000215069580078125,
      "step": 35237,
      "training_step_time": 0.4798307418823242
    },
    {
      "epoch": 0.00021507568359375,
      "model_forward_time": 0.11441874504089355,
      "step": 35238
    },
    {
      "epoch": 0.00021507568359375,
      "step": 35238,
      "training_step_time": 0.3890223503112793
    },
    {
      "epoch": 0.000215081787109375,
      "model_forward_time": 0.11419463157653809,
      "step": 35239
    },
    {
      "epoch": 0.000215081787109375,
      "step": 35239,
      "training_step_time": 0.3897237777709961
    },
    {
      "epoch": 0.000215087890625,
      "grad_norm": 0.1200694590806961,
      "learning_rate": 3.976620576005786e-05,
      "loss": 0.04,
      "step": 35240
    },
    {
      "epoch": 0.000215087890625,
      "model_forward_time": 0.11462092399597168,
      "step": 35240
    },
    {
      "epoch": 0.000215087890625,
      "step": 35240,
      "training_step_time": 0.48606371879577637
    },
    {
      "epoch": 0.000215093994140625,
      "model_forward_time": 0.11452651023864746,
      "step": 35241
    },
    {
      "epoch": 0.000215093994140625,
      "step": 35241,
      "training_step_time": 0.3784668445587158
    },
    {
      "epoch": 0.00021510009765625,
      "model_forward_time": 0.11687779426574707,
      "step": 35242
    },
    {
      "epoch": 0.00021510009765625,
      "step": 35242,
      "training_step_time": 0.3928244113922119
    },
    {
      "epoch": 0.000215106201171875,
      "model_forward_time": 0.11504292488098145,
      "step": 35243
    },
    {
      "epoch": 0.000215106201171875,
      "step": 35243,
      "training_step_time": 0.39650464057922363
    },
    {
      "epoch": 0.0002151123046875,
      "model_forward_time": 0.11498379707336426,
      "step": 35244
    },
    {
      "epoch": 0.0002151123046875,
      "step": 35244,
      "training_step_time": 0.38907313346862793
    },
    {
      "epoch": 0.000215118408203125,
      "model_forward_time": 0.11547064781188965,
      "step": 35245
    },
    {
      "epoch": 0.000215118408203125,
      "step": 35245,
      "training_step_time": 0.3944084644317627
    },
    {
      "epoch": 0.00021512451171875,
      "model_forward_time": 0.11554694175720215,
      "step": 35246
    },
    {
      "epoch": 0.00021512451171875,
      "step": 35246,
      "training_step_time": 0.38483572006225586
    },
    {
      "epoch": 0.000215130615234375,
      "model_forward_time": 0.11570978164672852,
      "step": 35247
    },
    {
      "epoch": 0.000215130615234375,
      "step": 35247,
      "training_step_time": 0.4374425411224365
    },
    {
      "epoch": 0.00021513671875,
      "model_forward_time": 0.11507558822631836,
      "step": 35248
    },
    {
      "epoch": 0.00021513671875,
      "step": 35248,
      "training_step_time": 0.9900534152984619
    },
    {
      "epoch": 0.000215142822265625,
      "model_forward_time": 0.11474132537841797,
      "step": 35249
    },
    {
      "epoch": 0.000215142822265625,
      "step": 35249,
      "training_step_time": 0.4462296962738037
    },
    {
      "epoch": 0.00021514892578125,
      "grad_norm": 0.08688249439001083,
      "learning_rate": 3.973923289021829e-05,
      "loss": 0.0446,
      "step": 35250
    },
    {
      "epoch": 0.00021514892578125,
      "model_forward_time": 0.11409616470336914,
      "step": 35250
    },
    {
      "epoch": 0.00021514892578125,
      "step": 35250,
      "training_step_time": 0.41808342933654785
    },
    {
      "epoch": 0.000215155029296875,
      "model_forward_time": 0.11658763885498047,
      "step": 35251
    },
    {
      "epoch": 0.000215155029296875,
      "step": 35251,
      "training_step_time": 0.36637449264526367
    },
    {
      "epoch": 0.0002151611328125,
      "model_forward_time": 0.11728191375732422,
      "step": 35252
    },
    {
      "epoch": 0.0002151611328125,
      "step": 35252,
      "training_step_time": 0.3902466297149658
    },
    {
      "epoch": 0.000215167236328125,
      "model_forward_time": 0.11719799041748047,
      "step": 35253
    },
    {
      "epoch": 0.000215167236328125,
      "step": 35253,
      "training_step_time": 0.4067823886871338
    },
    {
      "epoch": 0.00021517333984375,
      "model_forward_time": 0.11760759353637695,
      "step": 35254
    },
    {
      "epoch": 0.00021517333984375,
      "step": 35254,
      "training_step_time": 0.4646110534667969
    },
    {
      "epoch": 0.000215179443359375,
      "model_forward_time": 0.11672425270080566,
      "step": 35255
    },
    {
      "epoch": 0.000215179443359375,
      "step": 35255,
      "training_step_time": 0.408489465713501
    },
    {
      "epoch": 0.000215185546875,
      "model_forward_time": 0.11771750450134277,
      "step": 35256
    },
    {
      "epoch": 0.000215185546875,
      "step": 35256,
      "training_step_time": 0.3861653804779053
    },
    {
      "epoch": 0.000215191650390625,
      "model_forward_time": 0.11763501167297363,
      "step": 35257
    },
    {
      "epoch": 0.000215191650390625,
      "step": 35257,
      "training_step_time": 0.39189577102661133
    },
    {
      "epoch": 0.00021519775390625,
      "model_forward_time": 0.11576223373413086,
      "step": 35258
    },
    {
      "epoch": 0.00021519775390625,
      "step": 35258,
      "training_step_time": 0.37926673889160156
    },
    {
      "epoch": 0.000215203857421875,
      "model_forward_time": 0.11593031883239746,
      "step": 35259
    },
    {
      "epoch": 0.000215203857421875,
      "step": 35259,
      "training_step_time": 0.38250088691711426
    },
    {
      "epoch": 0.0002152099609375,
      "grad_norm": 0.11872764676809311,
      "learning_rate": 3.9712263137328836e-05,
      "loss": 0.044,
      "step": 35260
    },
    {
      "epoch": 0.0002152099609375,
      "model_forward_time": 0.11586380004882812,
      "step": 35260
    },
    {
      "epoch": 0.0002152099609375,
      "step": 35260,
      "training_step_time": 0.7866256237030029
    },
    {
      "epoch": 0.000215216064453125,
      "model_forward_time": 0.11511731147766113,
      "step": 35261
    },
    {
      "epoch": 0.000215216064453125,
      "step": 35261,
      "training_step_time": 0.45006537437438965
    },
    {
      "epoch": 0.00021522216796875,
      "model_forward_time": 0.1149759292602539,
      "step": 35262
    },
    {
      "epoch": 0.00021522216796875,
      "step": 35262,
      "training_step_time": 0.4085977077484131
    },
    {
      "epoch": 0.000215228271484375,
      "model_forward_time": 0.11455035209655762,
      "step": 35263
    },
    {
      "epoch": 0.000215228271484375,
      "step": 35263,
      "training_step_time": 0.4284698963165283
    },
    {
      "epoch": 0.000215234375,
      "model_forward_time": 0.11525845527648926,
      "step": 35264
    },
    {
      "epoch": 0.000215234375,
      "step": 35264,
      "training_step_time": 0.42200732231140137
    },
    {
      "epoch": 0.000215240478515625,
      "model_forward_time": 0.11523914337158203,
      "step": 35265
    },
    {
      "epoch": 0.000215240478515625,
      "step": 35265,
      "training_step_time": 0.40655088424682617
    },
    {
      "epoch": 0.00021524658203125,
      "model_forward_time": 0.11864757537841797,
      "step": 35266
    },
    {
      "epoch": 0.00021524658203125,
      "step": 35266,
      "training_step_time": 0.7017717361450195
    },
    {
      "epoch": 0.000215252685546875,
      "model_forward_time": 0.11841154098510742,
      "step": 35267
    },
    {
      "epoch": 0.000215252685546875,
      "step": 35267,
      "training_step_time": 0.38661932945251465
    },
    {
      "epoch": 0.0002152587890625,
      "model_forward_time": 0.1232297420501709,
      "step": 35268
    },
    {
      "epoch": 0.0002152587890625,
      "step": 35268,
      "training_step_time": 0.3832371234893799
    },
    {
      "epoch": 0.000215264892578125,
      "model_forward_time": 0.11508798599243164,
      "step": 35269
    },
    {
      "epoch": 0.000215264892578125,
      "step": 35269,
      "training_step_time": 0.39873361587524414
    },
    {
      "epoch": 0.00021527099609375,
      "grad_norm": 0.10514960438013077,
      "learning_rate": 3.9685296509582224e-05,
      "loss": 0.0417,
      "step": 35270
    },
    {
      "epoch": 0.00021527099609375,
      "model_forward_time": 0.11506032943725586,
      "step": 35270
    },
    {
      "epoch": 0.00021527099609375,
      "step": 35270,
      "training_step_time": 0.38115382194519043
    },
    {
      "epoch": 0.000215277099609375,
      "model_forward_time": 0.1153101921081543,
      "step": 35271
    },
    {
      "epoch": 0.000215277099609375,
      "step": 35271,
      "training_step_time": 0.3828306198120117
    },
    {
      "epoch": 0.000215283203125,
      "model_forward_time": 0.11601591110229492,
      "step": 35272
    },
    {
      "epoch": 0.000215283203125,
      "step": 35272,
      "training_step_time": 0.8175852298736572
    },
    {
      "epoch": 0.000215289306640625,
      "model_forward_time": 0.11557126045227051,
      "step": 35273
    },
    {
      "epoch": 0.000215289306640625,
      "step": 35273,
      "training_step_time": 0.4190535545349121
    },
    {
      "epoch": 0.00021529541015625,
      "model_forward_time": 0.11473751068115234,
      "step": 35274
    },
    {
      "epoch": 0.00021529541015625,
      "step": 35274,
      "training_step_time": 0.43393468856811523
    },
    {
      "epoch": 0.000215301513671875,
      "model_forward_time": 0.11443662643432617,
      "step": 35275
    },
    {
      "epoch": 0.000215301513671875,
      "step": 35275,
      "training_step_time": 0.47289299964904785
    },
    {
      "epoch": 0.0002153076171875,
      "model_forward_time": 0.11475539207458496,
      "step": 35276
    },
    {
      "epoch": 0.0002153076171875,
      "step": 35276,
      "training_step_time": 0.38511037826538086
    },
    {
      "epoch": 0.000215313720703125,
      "model_forward_time": 0.11501026153564453,
      "step": 35277
    },
    {
      "epoch": 0.000215313720703125,
      "step": 35277,
      "training_step_time": 0.45981860160827637
    },
    {
      "epoch": 0.00021531982421875,
      "model_forward_time": 0.11535763740539551,
      "step": 35278
    },
    {
      "epoch": 0.00021531982421875,
      "step": 35278,
      "training_step_time": 0.5854196548461914
    },
    {
      "epoch": 0.000215325927734375,
      "model_forward_time": 0.11521601676940918,
      "step": 35279
    },
    {
      "epoch": 0.000215325927734375,
      "step": 35279,
      "training_step_time": 0.4075438976287842
    },
    {
      "epoch": 0.00021533203125,
      "grad_norm": 0.17609815299510956,
      "learning_rate": 3.965833301517017e-05,
      "loss": 0.0437,
      "step": 35280
    },
    {
      "epoch": 0.00021533203125,
      "model_forward_time": 0.11499643325805664,
      "step": 35280
    },
    {
      "epoch": 0.00021533203125,
      "step": 35280,
      "training_step_time": 0.4629533290863037
    },
    {
      "epoch": 0.000215338134765625,
      "model_forward_time": 0.11496376991271973,
      "step": 35281
    },
    {
      "epoch": 0.000215338134765625,
      "step": 35281,
      "training_step_time": 0.3882434368133545
    },
    {
      "epoch": 0.00021534423828125,
      "model_forward_time": 0.1151735782623291,
      "step": 35282
    },
    {
      "epoch": 0.00021534423828125,
      "step": 35282,
      "training_step_time": 0.38633108139038086
    },
    {
      "epoch": 0.000215350341796875,
      "model_forward_time": 0.11609053611755371,
      "step": 35283
    },
    {
      "epoch": 0.000215350341796875,
      "step": 35283,
      "training_step_time": 0.39115452766418457
    },
    {
      "epoch": 0.0002153564453125,
      "model_forward_time": 0.11593818664550781,
      "step": 35284
    },
    {
      "epoch": 0.0002153564453125,
      "step": 35284,
      "training_step_time": 0.7823827266693115
    },
    {
      "epoch": 0.000215362548828125,
      "model_forward_time": 0.11452198028564453,
      "step": 35285
    },
    {
      "epoch": 0.000215362548828125,
      "step": 35285,
      "training_step_time": 0.41706418991088867
    },
    {
      "epoch": 0.00021536865234375,
      "model_forward_time": 0.11510252952575684,
      "step": 35286
    },
    {
      "epoch": 0.00021536865234375,
      "step": 35286,
      "training_step_time": 0.4005441665649414
    },
    {
      "epoch": 0.000215374755859375,
      "model_forward_time": 0.11519718170166016,
      "step": 35287
    },
    {
      "epoch": 0.000215374755859375,
      "step": 35287,
      "training_step_time": 0.4037795066833496
    },
    {
      "epoch": 0.000215380859375,
      "model_forward_time": 0.11505866050720215,
      "step": 35288
    },
    {
      "epoch": 0.000215380859375,
      "step": 35288,
      "training_step_time": 0.3860042095184326
    },
    {
      "epoch": 0.000215386962890625,
      "model_forward_time": 0.11458754539489746,
      "step": 35289
    },
    {
      "epoch": 0.000215386962890625,
      "step": 35289,
      "training_step_time": 0.4179668426513672
    },
    {
      "epoch": 0.00021539306640625,
      "grad_norm": 0.12420377135276794,
      "learning_rate": 3.963137266228349e-05,
      "loss": 0.0426,
      "step": 35290
    },
    {
      "epoch": 0.00021539306640625,
      "model_forward_time": 0.11551904678344727,
      "step": 35290
    },
    {
      "epoch": 0.00021539306640625,
      "step": 35290,
      "training_step_time": 0.7590320110321045
    },
    {
      "epoch": 0.000215399169921875,
      "model_forward_time": 0.11533141136169434,
      "step": 35291
    },
    {
      "epoch": 0.000215399169921875,
      "step": 35291,
      "training_step_time": 0.36783456802368164
    },
    {
      "epoch": 0.0002154052734375,
      "model_forward_time": 0.1155998706817627,
      "step": 35292
    },
    {
      "epoch": 0.0002154052734375,
      "step": 35292,
      "training_step_time": 0.42530322074890137
    },
    {
      "epoch": 0.000215411376953125,
      "model_forward_time": 0.1149599552154541,
      "step": 35293
    },
    {
      "epoch": 0.000215411376953125,
      "step": 35293,
      "training_step_time": 0.4291808605194092
    },
    {
      "epoch": 0.00021541748046875,
      "model_forward_time": 0.11490702629089355,
      "step": 35294
    },
    {
      "epoch": 0.00021541748046875,
      "step": 35294,
      "training_step_time": 0.39201903343200684
    },
    {
      "epoch": 0.000215423583984375,
      "model_forward_time": 0.11504507064819336,
      "step": 35295
    },
    {
      "epoch": 0.000215423583984375,
      "step": 35295,
      "training_step_time": 0.42136693000793457
    },
    {
      "epoch": 0.0002154296875,
      "model_forward_time": 0.116241455078125,
      "step": 35296
    },
    {
      "epoch": 0.0002154296875,
      "step": 35296,
      "training_step_time": 0.7818000316619873
    },
    {
      "epoch": 0.000215435791015625,
      "model_forward_time": 0.11443543434143066,
      "step": 35297
    },
    {
      "epoch": 0.000215435791015625,
      "step": 35297,
      "training_step_time": 0.3882317543029785
    },
    {
      "epoch": 0.00021544189453125,
      "model_forward_time": 0.11508989334106445,
      "step": 35298
    },
    {
      "epoch": 0.00021544189453125,
      "step": 35298,
      "training_step_time": 0.48299098014831543
    },
    {
      "epoch": 0.000215447998046875,
      "model_forward_time": 0.11461305618286133,
      "step": 35299
    },
    {
      "epoch": 0.000215447998046875,
      "step": 35299,
      "training_step_time": 0.3996553421020508
    },
    {
      "epoch": 0.0002154541015625,
      "grad_norm": 0.12210335582494736,
      "learning_rate": 3.960441545911204e-05,
      "loss": 0.0421,
      "step": 35300
    },
    {
      "epoch": 0.0002154541015625,
      "model_forward_time": 0.11484551429748535,
      "step": 35300
    },
    {
      "epoch": 0.0002154541015625,
      "step": 35300,
      "training_step_time": 0.46258115768432617
    },
    {
      "epoch": 0.000215460205078125,
      "model_forward_time": 0.11425471305847168,
      "step": 35301
    },
    {
      "epoch": 0.000215460205078125,
      "step": 35301,
      "training_step_time": 0.3824434280395508
    },
    {
      "epoch": 0.00021546630859375,
      "model_forward_time": 0.11535477638244629,
      "step": 35302
    },
    {
      "epoch": 0.00021546630859375,
      "step": 35302,
      "training_step_time": 0.8032629489898682
    },
    {
      "epoch": 0.000215472412109375,
      "model_forward_time": 0.11479997634887695,
      "step": 35303
    },
    {
      "epoch": 0.000215472412109375,
      "step": 35303,
      "training_step_time": 0.4463958740234375
    },
    {
      "epoch": 0.000215478515625,
      "model_forward_time": 0.11554741859436035,
      "step": 35304
    },
    {
      "epoch": 0.000215478515625,
      "step": 35304,
      "training_step_time": 0.41623854637145996
    },
    {
      "epoch": 0.000215484619140625,
      "model_forward_time": 0.11475253105163574,
      "step": 35305
    },
    {
      "epoch": 0.000215484619140625,
      "step": 35305,
      "training_step_time": 0.470381498336792
    },
    {
      "epoch": 0.00021549072265625,
      "model_forward_time": 0.11548113822937012,
      "step": 35306
    },
    {
      "epoch": 0.00021549072265625,
      "step": 35306,
      "training_step_time": 0.40445375442504883
    },
    {
      "epoch": 0.000215496826171875,
      "model_forward_time": 0.11474084854125977,
      "step": 35307
    },
    {
      "epoch": 0.000215496826171875,
      "step": 35307,
      "training_step_time": 0.38085246086120605
    },
    {
      "epoch": 0.0002155029296875,
      "model_forward_time": 0.11547040939331055,
      "step": 35308
    },
    {
      "epoch": 0.0002155029296875,
      "step": 35308,
      "training_step_time": 0.6770825386047363
    },
    {
      "epoch": 0.000215509033203125,
      "model_forward_time": 0.11507225036621094,
      "step": 35309
    },
    {
      "epoch": 0.000215509033203125,
      "step": 35309,
      "training_step_time": 0.38738489151000977
    },
    {
      "epoch": 0.00021551513671875,
      "grad_norm": 0.18824957311153412,
      "learning_rate": 3.9577461413844684e-05,
      "loss": 0.0447,
      "step": 35310
    },
    {
      "epoch": 0.00021551513671875,
      "model_forward_time": 0.11486697196960449,
      "step": 35310
    },
    {
      "epoch": 0.00021551513671875,
      "step": 35310,
      "training_step_time": 0.3866562843322754
    },
    {
      "epoch": 0.000215521240234375,
      "model_forward_time": 0.11572551727294922,
      "step": 35311
    },
    {
      "epoch": 0.000215521240234375,
      "step": 35311,
      "training_step_time": 0.4632551670074463
    },
    {
      "epoch": 0.00021552734375,
      "model_forward_time": 0.11560797691345215,
      "step": 35312
    },
    {
      "epoch": 0.00021552734375,
      "step": 35312,
      "training_step_time": 0.4076504707336426
    },
    {
      "epoch": 0.000215533447265625,
      "model_forward_time": 0.11488151550292969,
      "step": 35313
    },
    {
      "epoch": 0.000215533447265625,
      "step": 35313,
      "training_step_time": 0.4735260009765625
    },
    {
      "epoch": 0.00021553955078125,
      "model_forward_time": 0.11517739295959473,
      "step": 35314
    },
    {
      "epoch": 0.00021553955078125,
      "step": 35314,
      "training_step_time": 0.578831672668457
    },
    {
      "epoch": 0.000215545654296875,
      "model_forward_time": 0.1145787239074707,
      "step": 35315
    },
    {
      "epoch": 0.000215545654296875,
      "step": 35315,
      "training_step_time": 0.45082592964172363
    },
    {
      "epoch": 0.0002155517578125,
      "model_forward_time": 0.11498165130615234,
      "step": 35316
    },
    {
      "epoch": 0.0002155517578125,
      "step": 35316,
      "training_step_time": 0.4607396125793457
    },
    {
      "epoch": 0.000215557861328125,
      "model_forward_time": 0.11716294288635254,
      "step": 35317
    },
    {
      "epoch": 0.000215557861328125,
      "step": 35317,
      "training_step_time": 0.36670827865600586
    },
    {
      "epoch": 0.00021556396484375,
      "model_forward_time": 0.1156005859375,
      "step": 35318
    },
    {
      "epoch": 0.00021556396484375,
      "step": 35318,
      "training_step_time": 0.4524528980255127
    },
    {
      "epoch": 0.000215570068359375,
      "model_forward_time": 0.1146843433380127,
      "step": 35319
    },
    {
      "epoch": 0.000215570068359375,
      "step": 35319,
      "training_step_time": 0.39231419563293457
    },
    {
      "epoch": 0.000215576171875,
      "grad_norm": 0.12033997476100922,
      "learning_rate": 3.955051053466937e-05,
      "loss": 0.0389,
      "step": 35320
    },
    {
      "epoch": 0.000215576171875,
      "model_forward_time": 0.11510229110717773,
      "step": 35320
    },
    {
      "epoch": 0.000215576171875,
      "step": 35320,
      "training_step_time": 0.5216312408447266
    },
    {
      "epoch": 0.000215582275390625,
      "model_forward_time": 0.11588835716247559,
      "step": 35321
    },
    {
      "epoch": 0.000215582275390625,
      "step": 35321,
      "training_step_time": 0.38274359703063965
    },
    {
      "epoch": 0.00021558837890625,
      "model_forward_time": 0.11570024490356445,
      "step": 35322
    },
    {
      "epoch": 0.00021558837890625,
      "step": 35322,
      "training_step_time": 0.3865392208099365
    },
    {
      "epoch": 0.000215594482421875,
      "model_forward_time": 0.11533665657043457,
      "step": 35323
    },
    {
      "epoch": 0.000215594482421875,
      "step": 35323,
      "training_step_time": 0.3928945064544678
    },
    {
      "epoch": 0.0002156005859375,
      "model_forward_time": 0.11537361145019531,
      "step": 35324
    },
    {
      "epoch": 0.0002156005859375,
      "step": 35324,
      "training_step_time": 0.40230536460876465
    },
    {
      "epoch": 0.000215606689453125,
      "model_forward_time": 0.11586332321166992,
      "step": 35325
    },
    {
      "epoch": 0.000215606689453125,
      "step": 35325,
      "training_step_time": 0.4553370475769043
    },
    {
      "epoch": 0.00021561279296875,
      "model_forward_time": 0.11546039581298828,
      "step": 35326
    },
    {
      "epoch": 0.00021561279296875,
      "step": 35326,
      "training_step_time": 0.6122870445251465
    },
    {
      "epoch": 0.000215618896484375,
      "model_forward_time": 0.1152341365814209,
      "step": 35327
    },
    {
      "epoch": 0.000215618896484375,
      "step": 35327,
      "training_step_time": 0.3882908821105957
    },
    {
      "epoch": 0.000215625,
      "model_forward_time": 0.11526823043823242,
      "step": 35328
    },
    {
      "epoch": 0.000215625,
      "step": 35328,
      "training_step_time": 0.38979315757751465
    },
    {
      "epoch": 0.000215631103515625,
      "model_forward_time": 0.11528301239013672,
      "step": 35329
    },
    {
      "epoch": 0.000215631103515625,
      "step": 35329,
      "training_step_time": 0.4605753421783447
    },
    {
      "epoch": 0.00021563720703125,
      "grad_norm": 0.13027329742908478,
      "learning_rate": 3.9523562829773036e-05,
      "loss": 0.0353,
      "step": 35330
    },
    {
      "epoch": 0.00021563720703125,
      "model_forward_time": 0.11547398567199707,
      "step": 35330
    },
    {
      "epoch": 0.00021563720703125,
      "step": 35330,
      "training_step_time": 0.45560240745544434
    },
    {
      "epoch": 0.000215643310546875,
      "model_forward_time": 0.11655116081237793,
      "step": 35331
    },
    {
      "epoch": 0.000215643310546875,
      "step": 35331,
      "training_step_time": 0.43541741371154785
    },
    {
      "epoch": 0.0002156494140625,
      "model_forward_time": 0.11584949493408203,
      "step": 35332
    },
    {
      "epoch": 0.0002156494140625,
      "step": 35332,
      "training_step_time": 0.5366425514221191
    },
    {
      "epoch": 0.000215655517578125,
      "model_forward_time": 0.11520218849182129,
      "step": 35333
    },
    {
      "epoch": 0.000215655517578125,
      "step": 35333,
      "training_step_time": 0.405987024307251
    },
    {
      "epoch": 0.00021566162109375,
      "model_forward_time": 0.11536288261413574,
      "step": 35334
    },
    {
      "epoch": 0.00021566162109375,
      "step": 35334,
      "training_step_time": 0.39380335807800293
    },
    {
      "epoch": 0.000215667724609375,
      "model_forward_time": 0.11506295204162598,
      "step": 35335
    },
    {
      "epoch": 0.000215667724609375,
      "step": 35335,
      "training_step_time": 0.39798450469970703
    },
    {
      "epoch": 0.000215673828125,
      "model_forward_time": 0.11985111236572266,
      "step": 35336
    },
    {
      "epoch": 0.000215673828125,
      "step": 35336,
      "training_step_time": 0.39592838287353516
    },
    {
      "epoch": 0.000215679931640625,
      "model_forward_time": 0.11548161506652832,
      "step": 35337
    },
    {
      "epoch": 0.000215679931640625,
      "step": 35337,
      "training_step_time": 0.44668054580688477
    },
    {
      "epoch": 0.00021568603515625,
      "model_forward_time": 0.11571574211120605,
      "step": 35338
    },
    {
      "epoch": 0.00021568603515625,
      "step": 35338,
      "training_step_time": 0.7921414375305176
    },
    {
      "epoch": 0.000215692138671875,
      "model_forward_time": 0.11469411849975586,
      "step": 35339
    },
    {
      "epoch": 0.000215692138671875,
      "step": 35339,
      "training_step_time": 0.4046945571899414
    },
    {
      "epoch": 0.0002156982421875,
      "grad_norm": 0.13843071460723877,
      "learning_rate": 3.949661830734172e-05,
      "loss": 0.0379,
      "step": 35340
    },
    {
      "epoch": 0.0002156982421875,
      "model_forward_time": 0.11470222473144531,
      "step": 35340
    },
    {
      "epoch": 0.0002156982421875,
      "step": 35340,
      "training_step_time": 0.46389198303222656
    },
    {
      "epoch": 0.000215704345703125,
      "model_forward_time": 0.11461853981018066,
      "step": 35341
    },
    {
      "epoch": 0.000215704345703125,
      "step": 35341,
      "training_step_time": 0.38975954055786133
    },
    {
      "epoch": 0.00021571044921875,
      "model_forward_time": 0.11453390121459961,
      "step": 35342
    },
    {
      "epoch": 0.00021571044921875,
      "step": 35342,
      "training_step_time": 0.4153459072113037
    },
    {
      "epoch": 0.000215716552734375,
      "model_forward_time": 0.11464476585388184,
      "step": 35343
    },
    {
      "epoch": 0.000215716552734375,
      "step": 35343,
      "training_step_time": 0.41461920738220215
    },
    {
      "epoch": 0.00021572265625,
      "model_forward_time": 0.11560702323913574,
      "step": 35344
    },
    {
      "epoch": 0.00021572265625,
      "step": 35344,
      "training_step_time": 0.698244571685791
    },
    {
      "epoch": 0.000215728759765625,
      "model_forward_time": 0.11515522003173828,
      "step": 35345
    },
    {
      "epoch": 0.000215728759765625,
      "step": 35345,
      "training_step_time": 0.4411196708679199
    },
    {
      "epoch": 0.00021573486328125,
      "model_forward_time": 0.11605143547058105,
      "step": 35346
    },
    {
      "epoch": 0.00021573486328125,
      "step": 35346,
      "training_step_time": 0.3876650333404541
    },
    {
      "epoch": 0.000215740966796875,
      "model_forward_time": 0.1149599552154541,
      "step": 35347
    },
    {
      "epoch": 0.000215740966796875,
      "step": 35347,
      "training_step_time": 0.39479565620422363
    },
    {
      "epoch": 0.0002157470703125,
      "model_forward_time": 0.11505627632141113,
      "step": 35348
    },
    {
      "epoch": 0.0002157470703125,
      "step": 35348,
      "training_step_time": 0.38395023345947266
    },
    {
      "epoch": 0.000215753173828125,
      "model_forward_time": 0.11519932746887207,
      "step": 35349
    },
    {
      "epoch": 0.000215753173828125,
      "step": 35349,
      "training_step_time": 0.4029576778411865
    },
    {
      "epoch": 0.00021575927734375,
      "grad_norm": 0.1400952935218811,
      "learning_rate": 3.946967697556042e-05,
      "loss": 0.0411,
      "step": 35350
    },
    {
      "epoch": 0.00021575927734375,
      "model_forward_time": 0.1157069206237793,
      "step": 35350
    },
    {
      "epoch": 0.00021575927734375,
      "step": 35350,
      "training_step_time": 0.6906952857971191
    },
    {
      "epoch": 0.000215765380859375,
      "model_forward_time": 0.11551713943481445,
      "step": 35351
    },
    {
      "epoch": 0.000215765380859375,
      "step": 35351,
      "training_step_time": 0.3825540542602539
    },
    {
      "epoch": 0.000215771484375,
      "model_forward_time": 0.11594104766845703,
      "step": 35352
    },
    {
      "epoch": 0.000215771484375,
      "step": 35352,
      "training_step_time": 0.4070296287536621
    },
    {
      "epoch": 0.000215777587890625,
      "model_forward_time": 0.11532759666442871,
      "step": 35353
    },
    {
      "epoch": 0.000215777587890625,
      "step": 35353,
      "training_step_time": 0.4899101257324219
    },
    {
      "epoch": 0.00021578369140625,
      "model_forward_time": 0.11523675918579102,
      "step": 35354
    },
    {
      "epoch": 0.00021578369140625,
      "step": 35354,
      "training_step_time": 0.38153672218322754
    },
    {
      "epoch": 0.000215789794921875,
      "model_forward_time": 0.11518049240112305,
      "step": 35355
    },
    {
      "epoch": 0.000215789794921875,
      "step": 35355,
      "training_step_time": 0.37799978256225586
    },
    {
      "epoch": 0.0002157958984375,
      "model_forward_time": 0.11533141136169434,
      "step": 35356
    },
    {
      "epoch": 0.0002157958984375,
      "step": 35356,
      "training_step_time": 0.7743411064147949
    },
    {
      "epoch": 0.000215802001953125,
      "model_forward_time": 0.11582708358764648,
      "step": 35357
    },
    {
      "epoch": 0.000215802001953125,
      "step": 35357,
      "training_step_time": 0.4458775520324707
    },
    {
      "epoch": 0.00021580810546875,
      "model_forward_time": 0.11498832702636719,
      "step": 35358
    },
    {
      "epoch": 0.00021580810546875,
      "step": 35358,
      "training_step_time": 0.40883612632751465
    },
    {
      "epoch": 0.000215814208984375,
      "model_forward_time": 0.11482858657836914,
      "step": 35359
    },
    {
      "epoch": 0.000215814208984375,
      "step": 35359,
      "training_step_time": 0.47777318954467773
    },
    {
      "epoch": 0.0002158203125,
      "grad_norm": 0.10542557388544083,
      "learning_rate": 3.944273884261322e-05,
      "loss": 0.0391,
      "step": 35360
    },
    {
      "epoch": 0.0002158203125,
      "model_forward_time": 0.114715576171875,
      "step": 35360
    },
    {
      "epoch": 0.0002158203125,
      "step": 35360,
      "training_step_time": 0.3986060619354248
    },
    {
      "epoch": 0.000215826416015625,
      "model_forward_time": 0.11497330665588379,
      "step": 35361
    },
    {
      "epoch": 0.000215826416015625,
      "step": 35361,
      "training_step_time": 0.3995223045349121
    },
    {
      "epoch": 0.00021583251953125,
      "model_forward_time": 0.11503934860229492,
      "step": 35362
    },
    {
      "epoch": 0.00021583251953125,
      "step": 35362,
      "training_step_time": 0.6617450714111328
    },
    {
      "epoch": 0.000215838623046875,
      "model_forward_time": 0.11510872840881348,
      "step": 35363
    },
    {
      "epoch": 0.000215838623046875,
      "step": 35363,
      "training_step_time": 0.38107776641845703
    },
    {
      "epoch": 0.0002158447265625,
      "model_forward_time": 0.11556053161621094,
      "step": 35364
    },
    {
      "epoch": 0.0002158447265625,
      "step": 35364,
      "training_step_time": 0.4391441345214844
    },
    {
      "epoch": 0.000215850830078125,
      "model_forward_time": 0.11589217185974121,
      "step": 35365
    },
    {
      "epoch": 0.000215850830078125,
      "step": 35365,
      "training_step_time": 0.41214990615844727
    },
    {
      "epoch": 0.00021585693359375,
      "model_forward_time": 0.11472964286804199,
      "step": 35366
    },
    {
      "epoch": 0.00021585693359375,
      "step": 35366,
      "training_step_time": 0.389923095703125
    },
    {
      "epoch": 0.000215863037109375,
      "model_forward_time": 0.1157083511352539,
      "step": 35367
    },
    {
      "epoch": 0.000215863037109375,
      "step": 35367,
      "training_step_time": 0.38599610328674316
    },
    {
      "epoch": 0.000215869140625,
      "model_forward_time": 0.11607050895690918,
      "step": 35368
    },
    {
      "epoch": 0.000215869140625,
      "step": 35368,
      "training_step_time": 0.7878909111022949
    },
    {
      "epoch": 0.000215875244140625,
      "model_forward_time": 0.11511063575744629,
      "step": 35369
    },
    {
      "epoch": 0.000215875244140625,
      "step": 35369,
      "training_step_time": 0.43460893630981445
    },
    {
      "epoch": 0.00021588134765625,
      "grad_norm": 0.1309065967798233,
      "learning_rate": 3.9415803916683224e-05,
      "loss": 0.0456,
      "step": 35370
    },
    {
      "epoch": 0.00021588134765625,
      "model_forward_time": 0.11516213417053223,
      "step": 35370
    },
    {
      "epoch": 0.00021588134765625,
      "step": 35370,
      "training_step_time": 0.4520864486694336
    },
    {
      "epoch": 0.000215887451171875,
      "model_forward_time": 0.11524152755737305,
      "step": 35371
    },
    {
      "epoch": 0.000215887451171875,
      "step": 35371,
      "training_step_time": 0.44777607917785645
    },
    {
      "epoch": 0.0002158935546875,
      "model_forward_time": 0.11513733863830566,
      "step": 35372
    },
    {
      "epoch": 0.0002158935546875,
      "step": 35372,
      "training_step_time": 0.46352553367614746
    },
    {
      "epoch": 0.000215899658203125,
      "model_forward_time": 0.11490869522094727,
      "step": 35373
    },
    {
      "epoch": 0.000215899658203125,
      "step": 35373,
      "training_step_time": 0.39899778366088867
    },
    {
      "epoch": 0.00021590576171875,
      "model_forward_time": 0.11506223678588867,
      "step": 35374
    },
    {
      "epoch": 0.00021590576171875,
      "step": 35374,
      "training_step_time": 0.4524214267730713
    },
    {
      "epoch": 0.000215911865234375,
      "model_forward_time": 0.11562824249267578,
      "step": 35375
    },
    {
      "epoch": 0.000215911865234375,
      "step": 35375,
      "training_step_time": 0.38320446014404297
    },
    {
      "epoch": 0.00021591796875,
      "model_forward_time": 0.1149601936340332,
      "step": 35376
    },
    {
      "epoch": 0.00021591796875,
      "step": 35376,
      "training_step_time": 0.4214468002319336
    },
    {
      "epoch": 0.000215924072265625,
      "model_forward_time": 0.11490678787231445,
      "step": 35377
    },
    {
      "epoch": 0.000215924072265625,
      "step": 35377,
      "training_step_time": 0.39547085762023926
    },
    {
      "epoch": 0.00021593017578125,
      "model_forward_time": 0.11524462699890137,
      "step": 35378
    },
    {
      "epoch": 0.00021593017578125,
      "step": 35378,
      "training_step_time": 0.45604372024536133
    },
    {
      "epoch": 0.000215936279296875,
      "model_forward_time": 0.1157083511352539,
      "step": 35379
    },
    {
      "epoch": 0.000215936279296875,
      "step": 35379,
      "training_step_time": 0.4347090721130371
    },
    {
      "epoch": 0.0002159423828125,
      "grad_norm": 0.11389949172735214,
      "learning_rate": 3.9388872205952526e-05,
      "loss": 0.0401,
      "step": 35380
    },
    {
      "epoch": 0.0002159423828125,
      "model_forward_time": 0.11525154113769531,
      "step": 35380
    },
    {
      "epoch": 0.0002159423828125,
      "step": 35380,
      "training_step_time": 0.8133227825164795
    },
    {
      "epoch": 0.000215948486328125,
      "model_forward_time": 0.11470389366149902,
      "step": 35381
    },
    {
      "epoch": 0.000215948486328125,
      "step": 35381,
      "training_step_time": 0.3951847553253174
    },
    {
      "epoch": 0.00021595458984375,
      "model_forward_time": 0.11598563194274902,
      "step": 35382
    },
    {
      "epoch": 0.00021595458984375,
      "step": 35382,
      "training_step_time": 0.3833489418029785
    },
    {
      "epoch": 0.000215960693359375,
      "model_forward_time": 0.11527276039123535,
      "step": 35383
    },
    {
      "epoch": 0.000215960693359375,
      "step": 35383,
      "training_step_time": 0.4045281410217285
    },
    {
      "epoch": 0.000215966796875,
      "model_forward_time": 0.11525201797485352,
      "step": 35384
    },
    {
      "epoch": 0.000215966796875,
      "step": 35384,
      "training_step_time": 0.47925734519958496
    },
    {
      "epoch": 0.000215972900390625,
      "model_forward_time": 0.11504888534545898,
      "step": 35385
    },
    {
      "epoch": 0.000215972900390625,
      "step": 35385,
      "training_step_time": 0.49216771125793457
    },
    {
      "epoch": 0.00021597900390625,
      "model_forward_time": 0.11831092834472656,
      "step": 35386
    },
    {
      "epoch": 0.00021597900390625,
      "step": 35386,
      "training_step_time": 0.42093515396118164
    },
    {
      "epoch": 0.000215985107421875,
      "model_forward_time": 0.1156167984008789,
      "step": 35387
    },
    {
      "epoch": 0.000215985107421875,
      "step": 35387,
      "training_step_time": 0.45389485359191895
    },
    {
      "epoch": 0.0002159912109375,
      "model_forward_time": 0.11500048637390137,
      "step": 35388
    },
    {
      "epoch": 0.0002159912109375,
      "step": 35388,
      "training_step_time": 0.39455318450927734
    },
    {
      "epoch": 0.000215997314453125,
      "model_forward_time": 0.11477875709533691,
      "step": 35389
    },
    {
      "epoch": 0.000215997314453125,
      "step": 35389,
      "training_step_time": 0.38689708709716797
    },
    {
      "epoch": 0.00021600341796875,
      "grad_norm": 0.10807513445615768,
      "learning_rate": 3.93619437186023e-05,
      "loss": 0.0379,
      "step": 35390
    },
    {
      "epoch": 0.00021600341796875,
      "model_forward_time": 0.11555123329162598,
      "step": 35390
    },
    {
      "epoch": 0.00021600341796875,
      "step": 35390,
      "training_step_time": 0.38344430923461914
    },
    {
      "epoch": 0.000216009521484375,
      "model_forward_time": 0.11605262756347656,
      "step": 35391
    },
    {
      "epoch": 0.000216009521484375,
      "step": 35391,
      "training_step_time": 0.47049689292907715
    },
    {
      "epoch": 0.000216015625,
      "model_forward_time": 0.1155405044555664,
      "step": 35392
    },
    {
      "epoch": 0.000216015625,
      "step": 35392,
      "training_step_time": 0.5686147212982178
    },
    {
      "epoch": 0.000216021728515625,
      "model_forward_time": 0.1149752140045166,
      "step": 35393
    },
    {
      "epoch": 0.000216021728515625,
      "step": 35393,
      "training_step_time": 0.41585350036621094
    },
    {
      "epoch": 0.00021602783203125,
      "model_forward_time": 0.11487889289855957,
      "step": 35394
    },
    {
      "epoch": 0.00021602783203125,
      "step": 35394,
      "training_step_time": 0.39663052558898926
    },
    {
      "epoch": 0.000216033935546875,
      "model_forward_time": 0.11561942100524902,
      "step": 35395
    },
    {
      "epoch": 0.000216033935546875,
      "step": 35395,
      "training_step_time": 0.39587974548339844
    },
    {
      "epoch": 0.0002160400390625,
      "model_forward_time": 0.11541366577148438,
      "step": 35396
    },
    {
      "epoch": 0.0002160400390625,
      "step": 35396,
      "training_step_time": 0.3890378475189209
    },
    {
      "epoch": 0.000216046142578125,
      "model_forward_time": 0.11500024795532227,
      "step": 35397
    },
    {
      "epoch": 0.000216046142578125,
      "step": 35397,
      "training_step_time": 0.3914368152618408
    },
    {
      "epoch": 0.00021605224609375,
      "model_forward_time": 0.11565661430358887,
      "step": 35398
    },
    {
      "epoch": 0.00021605224609375,
      "step": 35398,
      "training_step_time": 0.7425708770751953
    },
    {
      "epoch": 0.000216058349609375,
      "model_forward_time": 0.11423277854919434,
      "step": 35399
    },
    {
      "epoch": 0.000216058349609375,
      "step": 35399,
      "training_step_time": 0.44268178939819336
    },
    {
      "epoch": 0.000216064453125,
      "grad_norm": 0.12379425019025803,
      "learning_rate": 3.933501846281267e-05,
      "loss": 0.0362,
      "step": 35400
    },
    {
      "epoch": 0.000216064453125,
      "model_forward_time": 0.11406135559082031,
      "step": 35400
    },
    {
      "epoch": 0.000216064453125,
      "step": 35400,
      "training_step_time": 0.4512312412261963
    },
    {
      "epoch": 0.000216070556640625,
      "model_forward_time": 0.11484169960021973,
      "step": 35401
    },
    {
      "epoch": 0.000216070556640625,
      "step": 35401,
      "training_step_time": 0.38836240768432617
    },
    {
      "epoch": 0.00021607666015625,
      "model_forward_time": 0.11498475074768066,
      "step": 35402
    },
    {
      "epoch": 0.00021607666015625,
      "step": 35402,
      "training_step_time": 0.3926270008087158
    },
    {
      "epoch": 0.000216082763671875,
      "model_forward_time": 0.11464762687683105,
      "step": 35403
    },
    {
      "epoch": 0.000216082763671875,
      "step": 35403,
      "training_step_time": 0.3960232734680176
    },
    {
      "epoch": 0.0002160888671875,
      "model_forward_time": 0.11536693572998047,
      "step": 35404
    },
    {
      "epoch": 0.0002160888671875,
      "step": 35404,
      "training_step_time": 0.8266699314117432
    },
    {
      "epoch": 0.000216094970703125,
      "model_forward_time": 0.11492919921875,
      "step": 35405
    },
    {
      "epoch": 0.000216094970703125,
      "step": 35405,
      "training_step_time": 0.3854091167449951
    },
    {
      "epoch": 0.00021610107421875,
      "model_forward_time": 0.11420631408691406,
      "step": 35406
    },
    {
      "epoch": 0.00021610107421875,
      "step": 35406,
      "training_step_time": 0.47054004669189453
    },
    {
      "epoch": 0.000216107177734375,
      "model_forward_time": 0.11470842361450195,
      "step": 35407
    },
    {
      "epoch": 0.000216107177734375,
      "step": 35407,
      "training_step_time": 0.3904411792755127
    },
    {
      "epoch": 0.00021611328125,
      "model_forward_time": 0.11422085762023926,
      "step": 35408
    },
    {
      "epoch": 0.00021611328125,
      "step": 35408,
      "training_step_time": 0.3884308338165283
    },
    {
      "epoch": 0.000216119384765625,
      "model_forward_time": 0.11464190483093262,
      "step": 35409
    },
    {
      "epoch": 0.000216119384765625,
      "step": 35409,
      "training_step_time": 0.37868642807006836
    },
    {
      "epoch": 0.00021612548828125,
      "grad_norm": 0.139768585562706,
      "learning_rate": 3.930809644676283e-05,
      "loss": 0.0388,
      "step": 35410
    },
    {
      "epoch": 0.00021612548828125,
      "model_forward_time": 0.11553502082824707,
      "step": 35410
    },
    {
      "epoch": 0.00021612548828125,
      "step": 35410,
      "training_step_time": 0.6348581314086914
    },
    {
      "epoch": 0.000216131591796875,
      "model_forward_time": 0.11475372314453125,
      "step": 35411
    },
    {
      "epoch": 0.000216131591796875,
      "step": 35411,
      "training_step_time": 0.5262362957000732
    },
    {
      "epoch": 0.0002161376953125,
      "model_forward_time": 0.11488866806030273,
      "step": 35412
    },
    {
      "epoch": 0.0002161376953125,
      "step": 35412,
      "training_step_time": 0.49558043479919434
    },
    {
      "epoch": 0.000216143798828125,
      "model_forward_time": 0.11446595191955566,
      "step": 35413
    },
    {
      "epoch": 0.000216143798828125,
      "step": 35413,
      "training_step_time": 0.3892996311187744
    },
    {
      "epoch": 0.00021614990234375,
      "model_forward_time": 0.11474990844726562,
      "step": 35414
    },
    {
      "epoch": 0.00021614990234375,
      "step": 35414,
      "training_step_time": 0.3944997787475586
    },
    {
      "epoch": 0.000216156005859375,
      "model_forward_time": 0.11414074897766113,
      "step": 35415
    },
    {
      "epoch": 0.000216156005859375,
      "step": 35415,
      "training_step_time": 0.37946534156799316
    },
    {
      "epoch": 0.000216162109375,
      "model_forward_time": 0.11485528945922852,
      "step": 35416
    },
    {
      "epoch": 0.000216162109375,
      "step": 35416,
      "training_step_time": 0.40345287322998047
    },
    {
      "epoch": 0.000216168212890625,
      "model_forward_time": 0.11501812934875488,
      "step": 35417
    },
    {
      "epoch": 0.000216168212890625,
      "step": 35417,
      "training_step_time": 0.386368989944458
    },
    {
      "epoch": 0.00021617431640625,
      "model_forward_time": 0.11583352088928223,
      "step": 35418
    },
    {
      "epoch": 0.00021617431640625,
      "step": 35418,
      "training_step_time": 0.44321775436401367
    },
    {
      "epoch": 0.000216180419921875,
      "model_forward_time": 0.11482024192810059,
      "step": 35419
    },
    {
      "epoch": 0.000216180419921875,
      "step": 35419,
      "training_step_time": 0.4268186092376709
    },
    {
      "epoch": 0.0002161865234375,
      "grad_norm": 0.09494972229003906,
      "learning_rate": 3.928117767863102e-05,
      "loss": 0.0381,
      "step": 35420
    },
    {
      "epoch": 0.0002161865234375,
      "model_forward_time": 0.11576533317565918,
      "step": 35420
    },
    {
      "epoch": 0.0002161865234375,
      "step": 35420,
      "training_step_time": 0.5019643306732178
    },
    {
      "epoch": 0.000216192626953125,
      "model_forward_time": 0.11498475074768066,
      "step": 35421
    },
    {
      "epoch": 0.000216192626953125,
      "step": 35421,
      "training_step_time": 0.39153051376342773
    },
    {
      "epoch": 0.00021619873046875,
      "model_forward_time": 0.1155710220336914,
      "step": 35422
    },
    {
      "epoch": 0.00021619873046875,
      "step": 35422,
      "training_step_time": 0.5861339569091797
    },
    {
      "epoch": 0.000216204833984375,
      "model_forward_time": 0.1150352954864502,
      "step": 35423
    },
    {
      "epoch": 0.000216204833984375,
      "step": 35423,
      "training_step_time": 0.5003585815429688
    },
    {
      "epoch": 0.0002162109375,
      "model_forward_time": 0.11448121070861816,
      "step": 35424
    },
    {
      "epoch": 0.0002162109375,
      "step": 35424,
      "training_step_time": 0.3646688461303711
    },
    {
      "epoch": 0.000216217041015625,
      "model_forward_time": 0.1144866943359375,
      "step": 35425
    },
    {
      "epoch": 0.000216217041015625,
      "step": 35425,
      "training_step_time": 0.42861032485961914
    },
    {
      "epoch": 0.00021622314453125,
      "model_forward_time": 0.11508774757385254,
      "step": 35426
    },
    {
      "epoch": 0.00021622314453125,
      "step": 35426,
      "training_step_time": 0.39408326148986816
    },
    {
      "epoch": 0.000216229248046875,
      "model_forward_time": 0.11484575271606445,
      "step": 35427
    },
    {
      "epoch": 0.000216229248046875,
      "step": 35427,
      "training_step_time": 0.38625025749206543
    },
    {
      "epoch": 0.0002162353515625,
      "model_forward_time": 0.11528396606445312,
      "step": 35428
    },
    {
      "epoch": 0.0002162353515625,
      "step": 35428,
      "training_step_time": 0.3951692581176758
    },
    {
      "epoch": 0.000216241455078125,
      "model_forward_time": 0.11539840698242188,
      "step": 35429
    },
    {
      "epoch": 0.000216241455078125,
      "step": 35429,
      "training_step_time": 0.4153285026550293
    },
    {
      "epoch": 0.00021624755859375,
      "grad_norm": 0.1249450221657753,
      "learning_rate": 3.925426216659438e-05,
      "loss": 0.0356,
      "step": 35430
    },
    {
      "epoch": 0.00021624755859375,
      "model_forward_time": 0.11487698554992676,
      "step": 35430
    },
    {
      "epoch": 0.00021624755859375,
      "step": 35430,
      "training_step_time": 0.4011528491973877
    },
    {
      "epoch": 0.000216253662109375,
      "model_forward_time": 0.11511111259460449,
      "step": 35431
    },
    {
      "epoch": 0.000216253662109375,
      "step": 35431,
      "training_step_time": 0.3951585292816162
    },
    {
      "epoch": 0.000216259765625,
      "model_forward_time": 0.11551332473754883,
      "step": 35432
    },
    {
      "epoch": 0.000216259765625,
      "step": 35432,
      "training_step_time": 0.48078060150146484
    },
    {
      "epoch": 0.000216265869140625,
      "model_forward_time": 0.1152803897857666,
      "step": 35433
    },
    {
      "epoch": 0.000216265869140625,
      "step": 35433,
      "training_step_time": 0.4274430274963379
    },
    {
      "epoch": 0.00021627197265625,
      "model_forward_time": 0.11504006385803223,
      "step": 35434
    },
    {
      "epoch": 0.00021627197265625,
      "step": 35434,
      "training_step_time": 0.685584306716919
    },
    {
      "epoch": 0.000216278076171875,
      "model_forward_time": 0.11435532569885254,
      "step": 35435
    },
    {
      "epoch": 0.000216278076171875,
      "step": 35435,
      "training_step_time": 0.37347841262817383
    },
    {
      "epoch": 0.0002162841796875,
      "model_forward_time": 0.11508798599243164,
      "step": 35436
    },
    {
      "epoch": 0.0002162841796875,
      "step": 35436,
      "training_step_time": 0.40621232986450195
    },
    {
      "epoch": 0.000216290283203125,
      "model_forward_time": 0.11505699157714844,
      "step": 35437
    },
    {
      "epoch": 0.000216290283203125,
      "step": 35437,
      "training_step_time": 0.4520702362060547
    },
    {
      "epoch": 0.00021629638671875,
      "model_forward_time": 0.11429333686828613,
      "step": 35438
    },
    {
      "epoch": 0.00021629638671875,
      "step": 35438,
      "training_step_time": 0.36182427406311035
    },
    {
      "epoch": 0.000216302490234375,
      "model_forward_time": 0.11486244201660156,
      "step": 35439
    },
    {
      "epoch": 0.000216302490234375,
      "step": 35439,
      "training_step_time": 0.4670584201812744
    },
    {
      "epoch": 0.00021630859375,
      "grad_norm": 0.11356039345264435,
      "learning_rate": 3.92273499188292e-05,
      "loss": 0.0402,
      "step": 35440
    },
    {
      "epoch": 0.00021630859375,
      "model_forward_time": 0.11465930938720703,
      "step": 35440
    },
    {
      "epoch": 0.00021630859375,
      "step": 35440,
      "training_step_time": 0.6973745822906494
    },
    {
      "epoch": 0.000216314697265625,
      "model_forward_time": 0.1143198013305664,
      "step": 35441
    },
    {
      "epoch": 0.000216314697265625,
      "step": 35441,
      "training_step_time": 0.3929896354675293
    },
    {
      "epoch": 0.00021632080078125,
      "model_forward_time": 0.11435604095458984,
      "step": 35442
    },
    {
      "epoch": 0.00021632080078125,
      "step": 35442,
      "training_step_time": 0.3834865093231201
    },
    {
      "epoch": 0.000216326904296875,
      "model_forward_time": 0.11430072784423828,
      "step": 35443
    },
    {
      "epoch": 0.000216326904296875,
      "step": 35443,
      "training_step_time": 0.383908748626709
    },
    {
      "epoch": 0.0002163330078125,
      "model_forward_time": 0.11439704895019531,
      "step": 35444
    },
    {
      "epoch": 0.0002163330078125,
      "step": 35444,
      "training_step_time": 0.37897205352783203
    },
    {
      "epoch": 0.000216339111328125,
      "model_forward_time": 0.1152036190032959,
      "step": 35445
    },
    {
      "epoch": 0.000216339111328125,
      "step": 35445,
      "training_step_time": 0.4414184093475342
    },
    {
      "epoch": 0.00021634521484375,
      "model_forward_time": 0.11544346809387207,
      "step": 35446
    },
    {
      "epoch": 0.00021634521484375,
      "step": 35446,
      "training_step_time": 0.6892023086547852
    },
    {
      "epoch": 0.000216351318359375,
      "model_forward_time": 0.11717605590820312,
      "step": 35447
    },
    {
      "epoch": 0.000216351318359375,
      "step": 35447,
      "training_step_time": 0.3793528079986572
    },
    {
      "epoch": 0.000216357421875,
      "model_forward_time": 0.11472558975219727,
      "step": 35448
    },
    {
      "epoch": 0.000216357421875,
      "step": 35448,
      "training_step_time": 0.38865232467651367
    },
    {
      "epoch": 0.000216363525390625,
      "model_forward_time": 0.11483287811279297,
      "step": 35449
    },
    {
      "epoch": 0.000216363525390625,
      "step": 35449,
      "training_step_time": 0.4266834259033203
    },
    {
      "epoch": 0.00021636962890625,
      "grad_norm": 0.10576686263084412,
      "learning_rate": 3.9200440943510665e-05,
      "loss": 0.0426,
      "step": 35450
    },
    {
      "epoch": 0.00021636962890625,
      "model_forward_time": 0.11508846282958984,
      "step": 35450
    },
    {
      "epoch": 0.00021636962890625,
      "step": 35450,
      "training_step_time": 0.39452624320983887
    },
    {
      "epoch": 0.000216375732421875,
      "model_forward_time": 0.11520695686340332,
      "step": 35451
    },
    {
      "epoch": 0.000216375732421875,
      "step": 35451,
      "training_step_time": 0.4095001220703125
    },
    {
      "epoch": 0.0002163818359375,
      "model_forward_time": 0.11497831344604492,
      "step": 35452
    },
    {
      "epoch": 0.0002163818359375,
      "step": 35452,
      "training_step_time": 0.7845156192779541
    },
    {
      "epoch": 0.000216387939453125,
      "model_forward_time": 0.11523270606994629,
      "step": 35453
    },
    {
      "epoch": 0.000216387939453125,
      "step": 35453,
      "training_step_time": 0.42205810546875
    },
    {
      "epoch": 0.00021639404296875,
      "model_forward_time": 0.114501953125,
      "step": 35454
    },
    {
      "epoch": 0.00021639404296875,
      "step": 35454,
      "training_step_time": 0.4243769645690918
    },
    {
      "epoch": 0.000216400146484375,
      "model_forward_time": 0.11462688446044922,
      "step": 35455
    },
    {
      "epoch": 0.000216400146484375,
      "step": 35455,
      "training_step_time": 0.38405871391296387
    },
    {
      "epoch": 0.00021640625,
      "model_forward_time": 0.1147756576538086,
      "step": 35456
    },
    {
      "epoch": 0.00021640625,
      "step": 35456,
      "training_step_time": 0.38548707962036133
    },
    {
      "epoch": 0.000216412353515625,
      "model_forward_time": 0.1138925552368164,
      "step": 35457
    },
    {
      "epoch": 0.000216412353515625,
      "step": 35457,
      "training_step_time": 0.37873077392578125
    },
    {
      "epoch": 0.00021641845703125,
      "model_forward_time": 0.11498570442199707,
      "step": 35458
    },
    {
      "epoch": 0.00021641845703125,
      "step": 35458,
      "training_step_time": 0.8768408298492432
    },
    {
      "epoch": 0.000216424560546875,
      "model_forward_time": 0.11435723304748535,
      "step": 35459
    },
    {
      "epoch": 0.000216424560546875,
      "step": 35459,
      "training_step_time": 0.4596068859100342
    },
    {
      "epoch": 0.0002164306640625,
      "grad_norm": 0.13100923597812653,
      "learning_rate": 3.917353524881302e-05,
      "loss": 0.0372,
      "step": 35460
    },
    {
      "epoch": 0.0002164306640625,
      "model_forward_time": 0.11432242393493652,
      "step": 35460
    },
    {
      "epoch": 0.0002164306640625,
      "step": 35460,
      "training_step_time": 0.3947305679321289
    },
    {
      "epoch": 0.000216436767578125,
      "model_forward_time": 0.11457133293151855,
      "step": 35461
    },
    {
      "epoch": 0.000216436767578125,
      "step": 35461,
      "training_step_time": 0.38505125045776367
    },
    {
      "epoch": 0.00021644287109375,
      "model_forward_time": 0.1146249771118164,
      "step": 35462
    },
    {
      "epoch": 0.00021644287109375,
      "step": 35462,
      "training_step_time": 0.39662790298461914
    },
    {
      "epoch": 0.000216448974609375,
      "model_forward_time": 0.11473679542541504,
      "step": 35463
    },
    {
      "epoch": 0.000216448974609375,
      "step": 35463,
      "training_step_time": 0.40030622482299805
    },
    {
      "epoch": 0.000216455078125,
      "model_forward_time": 0.1146540641784668,
      "step": 35464
    },
    {
      "epoch": 0.000216455078125,
      "step": 35464,
      "training_step_time": 0.8646059036254883
    },
    {
      "epoch": 0.000216461181640625,
      "model_forward_time": 0.11591196060180664,
      "step": 35465
    },
    {
      "epoch": 0.000216461181640625,
      "step": 35465,
      "training_step_time": 0.48148059844970703
    },
    {
      "epoch": 0.00021646728515625,
      "model_forward_time": 0.11462235450744629,
      "step": 35466
    },
    {
      "epoch": 0.00021646728515625,
      "step": 35466,
      "training_step_time": 0.4043700695037842
    },
    {
      "epoch": 0.000216473388671875,
      "model_forward_time": 0.1148538589477539,
      "step": 35467
    },
    {
      "epoch": 0.000216473388671875,
      "step": 35467,
      "training_step_time": 0.41709303855895996
    },
    {
      "epoch": 0.0002164794921875,
      "model_forward_time": 0.11401653289794922,
      "step": 35468
    },
    {
      "epoch": 0.0002164794921875,
      "step": 35468,
      "training_step_time": 0.3826174736022949
    },
    {
      "epoch": 0.000216485595703125,
      "model_forward_time": 0.11506891250610352,
      "step": 35469
    },
    {
      "epoch": 0.000216485595703125,
      "step": 35469,
      "training_step_time": 0.3757805824279785
    },
    {
      "epoch": 0.00021649169921875,
      "grad_norm": 0.0979563370347023,
      "learning_rate": 3.914663284290952e-05,
      "loss": 0.0359,
      "step": 35470
    },
    {
      "epoch": 0.00021649169921875,
      "model_forward_time": 0.11531209945678711,
      "step": 35470
    },
    {
      "epoch": 0.00021649169921875,
      "step": 35470,
      "training_step_time": 0.5564751625061035
    },
    {
      "epoch": 0.000216497802734375,
      "model_forward_time": 0.11560440063476562,
      "step": 35471
    },
    {
      "epoch": 0.000216497802734375,
      "step": 35471,
      "training_step_time": 0.4070422649383545
    },
    {
      "epoch": 0.00021650390625,
      "model_forward_time": 0.11563253402709961,
      "step": 35472
    },
    {
      "epoch": 0.00021650390625,
      "step": 35472,
      "training_step_time": 0.5020482540130615
    },
    {
      "epoch": 0.000216510009765625,
      "model_forward_time": 0.11484313011169434,
      "step": 35473
    },
    {
      "epoch": 0.000216510009765625,
      "step": 35473,
      "training_step_time": 0.3887813091278076
    },
    {
      "epoch": 0.00021651611328125,
      "model_forward_time": 0.11498117446899414,
      "step": 35474
    },
    {
      "epoch": 0.00021651611328125,
      "step": 35474,
      "training_step_time": 0.4338674545288086
    },
    {
      "epoch": 0.000216522216796875,
      "model_forward_time": 0.11443209648132324,
      "step": 35475
    },
    {
      "epoch": 0.000216522216796875,
      "step": 35475,
      "training_step_time": 0.40120720863342285
    },
    {
      "epoch": 0.0002165283203125,
      "model_forward_time": 0.11566543579101562,
      "step": 35476
    },
    {
      "epoch": 0.0002165283203125,
      "step": 35476,
      "training_step_time": 0.6684086322784424
    },
    {
      "epoch": 0.000216534423828125,
      "model_forward_time": 0.11445879936218262,
      "step": 35477
    },
    {
      "epoch": 0.000216534423828125,
      "step": 35477,
      "training_step_time": 0.40325379371643066
    },
    {
      "epoch": 0.00021654052734375,
      "model_forward_time": 0.11879229545593262,
      "step": 35478
    },
    {
      "epoch": 0.00021654052734375,
      "step": 35478,
      "training_step_time": 0.3950986862182617
    },
    {
      "epoch": 0.000216546630859375,
      "model_forward_time": 0.11765694618225098,
      "step": 35479
    },
    {
      "epoch": 0.000216546630859375,
      "step": 35479,
      "training_step_time": 0.46648693084716797
    },
    {
      "epoch": 0.000216552734375,
      "grad_norm": 0.09011107683181763,
      "learning_rate": 3.9119733733972387e-05,
      "loss": 0.0348,
      "step": 35480
    },
    {
      "epoch": 0.000216552734375,
      "model_forward_time": 0.12085127830505371,
      "step": 35480
    },
    {
      "epoch": 0.000216552734375,
      "step": 35480,
      "training_step_time": 0.4512815475463867
    },
    {
      "epoch": 0.000216558837890625,
      "model_forward_time": 0.1143951416015625,
      "step": 35481
    },
    {
      "epoch": 0.000216558837890625,
      "step": 35481,
      "training_step_time": 0.37843799591064453
    },
    {
      "epoch": 0.00021656494140625,
      "model_forward_time": 0.11533832550048828,
      "step": 35482
    },
    {
      "epoch": 0.00021656494140625,
      "step": 35482,
      "training_step_time": 0.7347376346588135
    },
    {
      "epoch": 0.000216571044921875,
      "model_forward_time": 0.11507058143615723,
      "step": 35483
    },
    {
      "epoch": 0.000216571044921875,
      "step": 35483,
      "training_step_time": 0.38574743270874023
    },
    {
      "epoch": 0.0002165771484375,
      "model_forward_time": 0.11452436447143555,
      "step": 35484
    },
    {
      "epoch": 0.0002165771484375,
      "step": 35484,
      "training_step_time": 0.44420528411865234
    },
    {
      "epoch": 0.000216583251953125,
      "model_forward_time": 0.11490249633789062,
      "step": 35485
    },
    {
      "epoch": 0.000216583251953125,
      "step": 35485,
      "training_step_time": 0.46494603157043457
    },
    {
      "epoch": 0.00021658935546875,
      "model_forward_time": 0.11445736885070801,
      "step": 35486
    },
    {
      "epoch": 0.00021658935546875,
      "step": 35486,
      "training_step_time": 0.42139673233032227
    },
    {
      "epoch": 0.000216595458984375,
      "model_forward_time": 0.11454081535339355,
      "step": 35487
    },
    {
      "epoch": 0.000216595458984375,
      "step": 35487,
      "training_step_time": 0.4593980312347412
    },
    {
      "epoch": 0.0002166015625,
      "model_forward_time": 0.11531281471252441,
      "step": 35488
    },
    {
      "epoch": 0.0002166015625,
      "step": 35488,
      "training_step_time": 0.5906190872192383
    },
    {
      "epoch": 0.000216607666015625,
      "model_forward_time": 0.11443614959716797,
      "step": 35489
    },
    {
      "epoch": 0.000216607666015625,
      "step": 35489,
      "training_step_time": 0.38733673095703125
    },
    {
      "epoch": 0.00021661376953125,
      "grad_norm": 0.09506513923406601,
      "learning_rate": 3.9092837930172884e-05,
      "loss": 0.0391,
      "step": 35490
    },
    {
      "epoch": 0.00021661376953125,
      "model_forward_time": 0.11462783813476562,
      "step": 35490
    },
    {
      "epoch": 0.00021661376953125,
      "step": 35490,
      "training_step_time": 0.3967468738555908
    },
    {
      "epoch": 0.000216619873046875,
      "model_forward_time": 0.11458992958068848,
      "step": 35491
    },
    {
      "epoch": 0.000216619873046875,
      "step": 35491,
      "training_step_time": 0.36710166931152344
    },
    {
      "epoch": 0.0002166259765625,
      "model_forward_time": 0.11637163162231445,
      "step": 35492
    },
    {
      "epoch": 0.0002166259765625,
      "step": 35492,
      "training_step_time": 0.45109033584594727
    },
    {
      "epoch": 0.000216632080078125,
      "model_forward_time": 0.11463308334350586,
      "step": 35493
    },
    {
      "epoch": 0.000216632080078125,
      "step": 35493,
      "training_step_time": 0.4196174144744873
    },
    {
      "epoch": 0.00021663818359375,
      "model_forward_time": 0.11539936065673828,
      "step": 35494
    },
    {
      "epoch": 0.00021663818359375,
      "step": 35494,
      "training_step_time": 0.7768080234527588
    },
    {
      "epoch": 0.000216644287109375,
      "model_forward_time": 0.11432051658630371,
      "step": 35495
    },
    {
      "epoch": 0.000216644287109375,
      "step": 35495,
      "training_step_time": 0.3914759159088135
    },
    {
      "epoch": 0.000216650390625,
      "model_forward_time": 0.11448550224304199,
      "step": 35496
    },
    {
      "epoch": 0.000216650390625,
      "step": 35496,
      "training_step_time": 0.3895082473754883
    },
    {
      "epoch": 0.000216656494140625,
      "model_forward_time": 0.11389446258544922,
      "step": 35497
    },
    {
      "epoch": 0.000216656494140625,
      "step": 35497,
      "training_step_time": 0.45941638946533203
    },
    {
      "epoch": 0.00021666259765625,
      "model_forward_time": 0.11475801467895508,
      "step": 35498
    },
    {
      "epoch": 0.00021666259765625,
      "step": 35498,
      "training_step_time": 0.40746021270751953
    },
    {
      "epoch": 0.000216668701171875,
      "model_forward_time": 0.11416220664978027,
      "step": 35499
    },
    {
      "epoch": 0.000216668701171875,
      "step": 35499,
      "training_step_time": 0.4551737308502197
    },
    {
      "epoch": 0.0002166748046875,
      "grad_norm": 0.10398515313863754,
      "learning_rate": 3.9065945439681214e-05,
      "loss": 0.0389,
      "step": 35500
    },
    {
      "epoch": 0.0002166748046875,
      "model_forward_time": 0.11519169807434082,
      "step": 35500
    },
    {
      "epoch": 0.0002166748046875,
      "step": 35500,
      "training_step_time": 0.7037630081176758
    },
    {
      "epoch": 0.000216680908203125,
      "model_forward_time": 0.11455559730529785,
      "step": 35501
    },
    {
      "epoch": 0.000216680908203125,
      "step": 35501,
      "training_step_time": 0.39771580696105957
    },
    {
      "epoch": 0.00021668701171875,
      "model_forward_time": 0.11413383483886719,
      "step": 35502
    },
    {
      "epoch": 0.00021668701171875,
      "step": 35502,
      "training_step_time": 0.3846549987792969
    },
    {
      "epoch": 0.000216693115234375,
      "model_forward_time": 0.11461305618286133,
      "step": 35503
    },
    {
      "epoch": 0.000216693115234375,
      "step": 35503,
      "training_step_time": 0.4459421634674072
    },
    {
      "epoch": 0.00021669921875,
      "model_forward_time": 0.11456751823425293,
      "step": 35504
    },
    {
      "epoch": 0.00021669921875,
      "step": 35504,
      "training_step_time": 0.3650517463684082
    },
    {
      "epoch": 0.000216705322265625,
      "model_forward_time": 0.11519360542297363,
      "step": 35505
    },
    {
      "epoch": 0.000216705322265625,
      "step": 35505,
      "training_step_time": 0.4241492748260498
    },
    {
      "epoch": 0.00021671142578125,
      "model_forward_time": 0.11570596694946289,
      "step": 35506
    },
    {
      "epoch": 0.00021671142578125,
      "step": 35506,
      "training_step_time": 0.5553154945373535
    },
    {
      "epoch": 0.000216717529296875,
      "model_forward_time": 0.11475467681884766,
      "step": 35507
    },
    {
      "epoch": 0.000216717529296875,
      "step": 35507,
      "training_step_time": 0.38496875762939453
    },
    {
      "epoch": 0.0002167236328125,
      "model_forward_time": 0.11454892158508301,
      "step": 35508
    },
    {
      "epoch": 0.0002167236328125,
      "step": 35508,
      "training_step_time": 0.3886890411376953
    },
    {
      "epoch": 0.000216729736328125,
      "model_forward_time": 0.11418867111206055,
      "step": 35509
    },
    {
      "epoch": 0.000216729736328125,
      "step": 35509,
      "training_step_time": 0.39388203620910645
    },
    {
      "epoch": 0.00021673583984375,
      "grad_norm": 0.10312116891145706,
      "learning_rate": 3.903905627066662e-05,
      "loss": 0.0375,
      "step": 35510
    },
    {
      "epoch": 0.00021673583984375,
      "model_forward_time": 0.11519169807434082,
      "step": 35510
    },
    {
      "epoch": 0.00021673583984375,
      "step": 35510,
      "training_step_time": 0.38454222679138184
    },
    {
      "epoch": 0.000216741943359375,
      "model_forward_time": 0.11489677429199219,
      "step": 35511
    },
    {
      "epoch": 0.000216741943359375,
      "step": 35511,
      "training_step_time": 0.419996976852417
    },
    {
      "epoch": 0.000216748046875,
      "model_forward_time": 0.11565136909484863,
      "step": 35512
    },
    {
      "epoch": 0.000216748046875,
      "step": 35512,
      "training_step_time": 0.7546517848968506
    },
    {
      "epoch": 0.000216754150390625,
      "model_forward_time": 0.11581182479858398,
      "step": 35513
    },
    {
      "epoch": 0.000216754150390625,
      "step": 35513,
      "training_step_time": 0.3881657123565674
    },
    {
      "epoch": 0.00021676025390625,
      "model_forward_time": 0.11775636672973633,
      "step": 35514
    },
    {
      "epoch": 0.00021676025390625,
      "step": 35514,
      "training_step_time": 0.376833438873291
    },
    {
      "epoch": 0.000216766357421875,
      "model_forward_time": 0.12012410163879395,
      "step": 35515
    },
    {
      "epoch": 0.000216766357421875,
      "step": 35515,
      "training_step_time": 0.3821289539337158
    },
    {
      "epoch": 0.0002167724609375,
      "model_forward_time": 0.11864757537841797,
      "step": 35516
    },
    {
      "epoch": 0.0002167724609375,
      "step": 35516,
      "training_step_time": 0.3872711658477783
    },
    {
      "epoch": 0.000216778564453125,
      "model_forward_time": 0.11747097969055176,
      "step": 35517
    },
    {
      "epoch": 0.000216778564453125,
      "step": 35517,
      "training_step_time": 0.3772757053375244
    },
    {
      "epoch": 0.00021678466796875,
      "model_forward_time": 0.12205076217651367,
      "step": 35518
    },
    {
      "epoch": 0.00021678466796875,
      "step": 35518,
      "training_step_time": 0.7968540191650391
    },
    {
      "epoch": 0.000216790771484375,
      "model_forward_time": 0.11492800712585449,
      "step": 35519
    },
    {
      "epoch": 0.000216790771484375,
      "step": 35519,
      "training_step_time": 0.403031587600708
    },
    {
      "epoch": 0.000216796875,
      "grad_norm": 0.12214082479476929,
      "learning_rate": 3.901217043129735e-05,
      "loss": 0.0327,
      "step": 35520
    },
    {
      "epoch": 0.000216796875,
      "model_forward_time": 0.11403846740722656,
      "step": 35520
    },
    {
      "epoch": 0.000216796875,
      "step": 35520,
      "training_step_time": 0.3861973285675049
    },
    {
      "epoch": 0.000216802978515625,
      "model_forward_time": 0.11437702178955078,
      "step": 35521
    },
    {
      "epoch": 0.000216802978515625,
      "step": 35521,
      "training_step_time": 0.39505743980407715
    },
    {
      "epoch": 0.00021680908203125,
      "model_forward_time": 0.11475014686584473,
      "step": 35522
    },
    {
      "epoch": 0.00021680908203125,
      "step": 35522,
      "training_step_time": 0.3834803104400635
    },
    {
      "epoch": 0.000216815185546875,
      "model_forward_time": 0.11378884315490723,
      "step": 35523
    },
    {
      "epoch": 0.000216815185546875,
      "step": 35523,
      "training_step_time": 0.4022986888885498
    },
    {
      "epoch": 0.0002168212890625,
      "model_forward_time": 0.11591196060180664,
      "step": 35524
    },
    {
      "epoch": 0.0002168212890625,
      "step": 35524,
      "training_step_time": 0.7237367630004883
    },
    {
      "epoch": 0.000216827392578125,
      "model_forward_time": 0.1150822639465332,
      "step": 35525
    },
    {
      "epoch": 0.000216827392578125,
      "step": 35525,
      "training_step_time": 0.4218904972076416
    },
    {
      "epoch": 0.00021683349609375,
      "model_forward_time": 0.11455416679382324,
      "step": 35526
    },
    {
      "epoch": 0.00021683349609375,
      "step": 35526,
      "training_step_time": 0.38794445991516113
    },
    {
      "epoch": 0.000216839599609375,
      "model_forward_time": 0.11461234092712402,
      "step": 35527
    },
    {
      "epoch": 0.000216839599609375,
      "step": 35527,
      "training_step_time": 0.3890094757080078
    },
    {
      "epoch": 0.000216845703125,
      "model_forward_time": 0.11510491371154785,
      "step": 35528
    },
    {
      "epoch": 0.000216845703125,
      "step": 35528,
      "training_step_time": 0.38866138458251953
    },
    {
      "epoch": 0.000216851806640625,
      "model_forward_time": 0.11498641967773438,
      "step": 35529
    },
    {
      "epoch": 0.000216851806640625,
      "step": 35529,
      "training_step_time": 0.3856320381164551
    },
    {
      "epoch": 0.00021685791015625,
      "grad_norm": 0.16232630610466003,
      "learning_rate": 3.898528792974056e-05,
      "loss": 0.037,
      "step": 35530
    },
    {
      "epoch": 0.00021685791015625,
      "model_forward_time": 0.11580944061279297,
      "step": 35530
    },
    {
      "epoch": 0.00021685791015625,
      "step": 35530,
      "training_step_time": 0.8279666900634766
    },
    {
      "epoch": 0.000216864013671875,
      "model_forward_time": 0.11444473266601562,
      "step": 35531
    },
    {
      "epoch": 0.000216864013671875,
      "step": 35531,
      "training_step_time": 0.4245481491088867
    },
    {
      "epoch": 0.0002168701171875,
      "model_forward_time": 0.11532068252563477,
      "step": 35532
    },
    {
      "epoch": 0.0002168701171875,
      "step": 35532,
      "training_step_time": 0.4529759883880615
    },
    {
      "epoch": 0.000216876220703125,
      "model_forward_time": 0.11439180374145508,
      "step": 35533
    },
    {
      "epoch": 0.000216876220703125,
      "step": 35533,
      "training_step_time": 0.4063999652862549
    },
    {
      "epoch": 0.00021688232421875,
      "model_forward_time": 0.11465930938720703,
      "step": 35534
    },
    {
      "epoch": 0.00021688232421875,
      "step": 35534,
      "training_step_time": 0.38046956062316895
    },
    {
      "epoch": 0.000216888427734375,
      "model_forward_time": 0.11427807807922363,
      "step": 35535
    },
    {
      "epoch": 0.000216888427734375,
      "step": 35535,
      "training_step_time": 0.39930295944213867
    },
    {
      "epoch": 0.00021689453125,
      "model_forward_time": 0.11516451835632324,
      "step": 35536
    },
    {
      "epoch": 0.00021689453125,
      "step": 35536,
      "training_step_time": 0.8487308025360107
    },
    {
      "epoch": 0.000216900634765625,
      "model_forward_time": 0.1147153377532959,
      "step": 35537
    },
    {
      "epoch": 0.000216900634765625,
      "step": 35537,
      "training_step_time": 0.4318673610687256
    },
    {
      "epoch": 0.00021690673828125,
      "model_forward_time": 0.11449337005615234,
      "step": 35538
    },
    {
      "epoch": 0.00021690673828125,
      "step": 35538,
      "training_step_time": 0.47229766845703125
    },
    {
      "epoch": 0.000216912841796875,
      "model_forward_time": 0.11434507369995117,
      "step": 35539
    },
    {
      "epoch": 0.000216912841796875,
      "step": 35539,
      "training_step_time": 0.385911226272583
    },
    {
      "epoch": 0.0002169189453125,
      "grad_norm": 0.14156407117843628,
      "learning_rate": 3.895840877416249e-05,
      "loss": 0.04,
      "step": 35540
    },
    {
      "epoch": 0.0002169189453125,
      "model_forward_time": 0.11427140235900879,
      "step": 35540
    },
    {
      "epoch": 0.0002169189453125,
      "step": 35540,
      "training_step_time": 0.3856782913208008
    },
    {
      "epoch": 0.000216925048828125,
      "model_forward_time": 0.11445331573486328,
      "step": 35541
    },
    {
      "epoch": 0.000216925048828125,
      "step": 35541,
      "training_step_time": 0.38210535049438477
    },
    {
      "epoch": 0.00021693115234375,
      "model_forward_time": 0.11487722396850586,
      "step": 35542
    },
    {
      "epoch": 0.00021693115234375,
      "step": 35542,
      "training_step_time": 0.751152753829956
    },
    {
      "epoch": 0.000216937255859375,
      "model_forward_time": 0.11487150192260742,
      "step": 35543
    },
    {
      "epoch": 0.000216937255859375,
      "step": 35543,
      "training_step_time": 0.39384984970092773
    },
    {
      "epoch": 0.000216943359375,
      "model_forward_time": 0.11456060409545898,
      "step": 35544
    },
    {
      "epoch": 0.000216943359375,
      "step": 35544,
      "training_step_time": 0.38823914527893066
    },
    {
      "epoch": 0.000216949462890625,
      "model_forward_time": 0.11492633819580078,
      "step": 35545
    },
    {
      "epoch": 0.000216949462890625,
      "step": 35545,
      "training_step_time": 0.4505281448364258
    },
    {
      "epoch": 0.00021695556640625,
      "model_forward_time": 0.11441183090209961,
      "step": 35546
    },
    {
      "epoch": 0.00021695556640625,
      "step": 35546,
      "training_step_time": 0.475597620010376
    },
    {
      "epoch": 0.000216961669921875,
      "model_forward_time": 0.11542010307312012,
      "step": 35547
    },
    {
      "epoch": 0.000216961669921875,
      "step": 35547,
      "training_step_time": 0.40031933784484863
    },
    {
      "epoch": 0.0002169677734375,
      "model_forward_time": 0.11483931541442871,
      "step": 35548
    },
    {
      "epoch": 0.0002169677734375,
      "step": 35548,
      "training_step_time": 0.5685305595397949
    },
    {
      "epoch": 0.000216973876953125,
      "model_forward_time": 0.11426305770874023,
      "step": 35549
    },
    {
      "epoch": 0.000216973876953125,
      "step": 35549,
      "training_step_time": 0.3832223415374756
    },
    {
      "epoch": 0.00021697998046875,
      "grad_norm": 0.12180685997009277,
      "learning_rate": 3.8931532972728285e-05,
      "loss": 0.0378,
      "step": 35550
    },
    {
      "epoch": 0.00021697998046875,
      "model_forward_time": 0.1150057315826416,
      "step": 35550
    },
    {
      "epoch": 0.00021697998046875,
      "step": 35550,
      "training_step_time": 0.39240550994873047
    },
    {
      "epoch": 0.000216986083984375,
      "model_forward_time": 0.11480832099914551,
      "step": 35551
    },
    {
      "epoch": 0.000216986083984375,
      "step": 35551,
      "training_step_time": 0.5203380584716797
    },
    {
      "epoch": 0.0002169921875,
      "model_forward_time": 0.11498093605041504,
      "step": 35552
    },
    {
      "epoch": 0.0002169921875,
      "step": 35552,
      "training_step_time": 0.3935980796813965
    },
    {
      "epoch": 0.000216998291015625,
      "model_forward_time": 0.11463809013366699,
      "step": 35553
    },
    {
      "epoch": 0.000216998291015625,
      "step": 35553,
      "training_step_time": 0.3931465148925781
    },
    {
      "epoch": 0.00021700439453125,
      "model_forward_time": 0.11528706550598145,
      "step": 35554
    },
    {
      "epoch": 0.00021700439453125,
      "step": 35554,
      "training_step_time": 0.5719971656799316
    },
    {
      "epoch": 0.000217010498046875,
      "model_forward_time": 0.11516070365905762,
      "step": 35555
    },
    {
      "epoch": 0.000217010498046875,
      "step": 35555,
      "training_step_time": 0.3878974914550781
    },
    {
      "epoch": 0.0002170166015625,
      "model_forward_time": 0.11530876159667969,
      "step": 35556
    },
    {
      "epoch": 0.0002170166015625,
      "step": 35556,
      "training_step_time": 0.4358181953430176
    },
    {
      "epoch": 0.000217022705078125,
      "model_forward_time": 0.11462283134460449,
      "step": 35557
    },
    {
      "epoch": 0.000217022705078125,
      "step": 35557,
      "training_step_time": 0.4288923740386963
    },
    {
      "epoch": 0.00021702880859375,
      "model_forward_time": 0.11521220207214355,
      "step": 35558
    },
    {
      "epoch": 0.00021702880859375,
      "step": 35558,
      "training_step_time": 0.3998143672943115
    },
    {
      "epoch": 0.000217034912109375,
      "model_forward_time": 0.11697053909301758,
      "step": 35559
    },
    {
      "epoch": 0.000217034912109375,
      "step": 35559,
      "training_step_time": 0.4827134609222412
    },
    {
      "epoch": 0.000217041015625,
      "grad_norm": 0.1256524920463562,
      "learning_rate": 3.890466053360211e-05,
      "loss": 0.0384,
      "step": 35560
    },
    {
      "epoch": 0.000217041015625,
      "model_forward_time": 0.11583614349365234,
      "step": 35560
    },
    {
      "epoch": 0.000217041015625,
      "step": 35560,
      "training_step_time": 0.7597198486328125
    },
    {
      "epoch": 0.000217047119140625,
      "model_forward_time": 0.11401176452636719,
      "step": 35561
    },
    {
      "epoch": 0.000217047119140625,
      "step": 35561,
      "training_step_time": 0.38252687454223633
    },
    {
      "epoch": 0.00021705322265625,
      "model_forward_time": 0.11462831497192383,
      "step": 35562
    },
    {
      "epoch": 0.00021705322265625,
      "step": 35562,
      "training_step_time": 0.39514899253845215
    },
    {
      "epoch": 0.000217059326171875,
      "model_forward_time": 0.11450481414794922,
      "step": 35563
    },
    {
      "epoch": 0.000217059326171875,
      "step": 35563,
      "training_step_time": 0.4060521125793457
    },
    {
      "epoch": 0.0002170654296875,
      "model_forward_time": 0.11444878578186035,
      "step": 35564
    },
    {
      "epoch": 0.0002170654296875,
      "step": 35564,
      "training_step_time": 0.3814527988433838
    },
    {
      "epoch": 0.000217071533203125,
      "model_forward_time": 0.11556839942932129,
      "step": 35565
    },
    {
      "epoch": 0.000217071533203125,
      "step": 35565,
      "training_step_time": 0.45359015464782715
    },
    {
      "epoch": 0.00021707763671875,
      "model_forward_time": 0.11533284187316895,
      "step": 35566
    },
    {
      "epoch": 0.00021707763671875,
      "step": 35566,
      "training_step_time": 0.8441786766052246
    },
    {
      "epoch": 0.000217083740234375,
      "model_forward_time": 0.11438632011413574,
      "step": 35567
    },
    {
      "epoch": 0.000217083740234375,
      "step": 35567,
      "training_step_time": 0.3831338882446289
    },
    {
      "epoch": 0.00021708984375,
      "model_forward_time": 0.11492395401000977,
      "step": 35568
    },
    {
      "epoch": 0.00021708984375,
      "step": 35568,
      "training_step_time": 0.3929150104522705
    },
    {
      "epoch": 0.000217095947265625,
      "model_forward_time": 0.1151742935180664,
      "step": 35569
    },
    {
      "epoch": 0.000217095947265625,
      "step": 35569,
      "training_step_time": 0.39260268211364746
    },
    {
      "epoch": 0.00021710205078125,
      "grad_norm": 0.1243881955742836,
      "learning_rate": 3.8877791464947136e-05,
      "loss": 0.0383,
      "step": 35570
    },
    {
      "epoch": 0.00021710205078125,
      "model_forward_time": 0.11480021476745605,
      "step": 35570
    },
    {
      "epoch": 0.00021710205078125,
      "step": 35570,
      "training_step_time": 0.42543888092041016
    },
    {
      "epoch": 0.000217108154296875,
      "model_forward_time": 0.11437273025512695,
      "step": 35571
    },
    {
      "epoch": 0.000217108154296875,
      "step": 35571,
      "training_step_time": 0.39090919494628906
    },
    {
      "epoch": 0.0002171142578125,
      "model_forward_time": 0.11563324928283691,
      "step": 35572
    },
    {
      "epoch": 0.0002171142578125,
      "step": 35572,
      "training_step_time": 0.7428631782531738
    },
    {
      "epoch": 0.000217120361328125,
      "model_forward_time": 0.11443209648132324,
      "step": 35573
    },
    {
      "epoch": 0.000217120361328125,
      "step": 35573,
      "training_step_time": 0.38044214248657227
    },
    {
      "epoch": 0.00021712646484375,
      "model_forward_time": 0.11512303352355957,
      "step": 35574
    },
    {
      "epoch": 0.00021712646484375,
      "step": 35574,
      "training_step_time": 0.3835322856903076
    },
    {
      "epoch": 0.000217132568359375,
      "model_forward_time": 0.1150059700012207,
      "step": 35575
    },
    {
      "epoch": 0.000217132568359375,
      "step": 35575,
      "training_step_time": 0.39542722702026367
    },
    {
      "epoch": 0.000217138671875,
      "model_forward_time": 0.11440849304199219,
      "step": 35576
    },
    {
      "epoch": 0.000217138671875,
      "step": 35576,
      "training_step_time": 0.3932840824127197
    },
    {
      "epoch": 0.000217144775390625,
      "model_forward_time": 0.11491274833679199,
      "step": 35577
    },
    {
      "epoch": 0.000217144775390625,
      "step": 35577,
      "training_step_time": 0.4167187213897705
    },
    {
      "epoch": 0.00021715087890625,
      "model_forward_time": 0.1150517463684082,
      "step": 35578
    },
    {
      "epoch": 0.00021715087890625,
      "step": 35578,
      "training_step_time": 0.857905387878418
    },
    {
      "epoch": 0.000217156982421875,
      "model_forward_time": 0.11385774612426758,
      "step": 35579
    },
    {
      "epoch": 0.000217156982421875,
      "step": 35579,
      "training_step_time": 0.39296579360961914
    },
    {
      "epoch": 0.0002171630859375,
      "grad_norm": 0.16663993895053864,
      "learning_rate": 3.8850925774925425e-05,
      "loss": 0.0399,
      "step": 35580
    },
    {
      "epoch": 0.0002171630859375,
      "model_forward_time": 0.11424922943115234,
      "step": 35580
    },
    {
      "epoch": 0.0002171630859375,
      "step": 35580,
      "training_step_time": 0.39215946197509766
    },
    {
      "epoch": 0.000217169189453125,
      "model_forward_time": 0.1145639419555664,
      "step": 35581
    },
    {
      "epoch": 0.000217169189453125,
      "step": 35581,
      "training_step_time": 0.39557504653930664
    },
    {
      "epoch": 0.00021717529296875,
      "model_forward_time": 0.11523294448852539,
      "step": 35582
    },
    {
      "epoch": 0.00021717529296875,
      "step": 35582,
      "training_step_time": 0.37711644172668457
    },
    {
      "epoch": 0.000217181396484375,
      "model_forward_time": 0.11460065841674805,
      "step": 35583
    },
    {
      "epoch": 0.000217181396484375,
      "step": 35583,
      "training_step_time": 0.423520565032959
    },
    {
      "epoch": 0.0002171875,
      "model_forward_time": 0.11486935615539551,
      "step": 35584
    },
    {
      "epoch": 0.0002171875,
      "step": 35584,
      "training_step_time": 0.5336050987243652
    },
    {
      "epoch": 0.000217193603515625,
      "model_forward_time": 0.11515426635742188,
      "step": 35585
    },
    {
      "epoch": 0.000217193603515625,
      "step": 35585,
      "training_step_time": 0.49669408798217773
    },
    {
      "epoch": 0.00021719970703125,
      "model_forward_time": 0.11446619033813477,
      "step": 35586
    },
    {
      "epoch": 0.00021719970703125,
      "step": 35586,
      "training_step_time": 0.44815564155578613
    },
    {
      "epoch": 0.000217205810546875,
      "model_forward_time": 0.11537766456604004,
      "step": 35587
    },
    {
      "epoch": 0.000217205810546875,
      "step": 35587,
      "training_step_time": 0.3850259780883789
    },
    {
      "epoch": 0.0002172119140625,
      "model_forward_time": 0.11455035209655762,
      "step": 35588
    },
    {
      "epoch": 0.0002172119140625,
      "step": 35588,
      "training_step_time": 0.3790745735168457
    },
    {
      "epoch": 0.000217218017578125,
      "model_forward_time": 0.11512613296508789,
      "step": 35589
    },
    {
      "epoch": 0.000217218017578125,
      "step": 35589,
      "training_step_time": 0.38534069061279297
    },
    {
      "epoch": 0.00021722412109375,
      "grad_norm": 0.1269543617963791,
      "learning_rate": 3.8824063471698105e-05,
      "loss": 0.0348,
      "step": 35590
    },
    {
      "epoch": 0.00021722412109375,
      "model_forward_time": 0.11516261100769043,
      "step": 35590
    },
    {
      "epoch": 0.00021722412109375,
      "step": 35590,
      "training_step_time": 0.6313540935516357
    },
    {
      "epoch": 0.000217230224609375,
      "model_forward_time": 0.11485648155212402,
      "step": 35591
    },
    {
      "epoch": 0.000217230224609375,
      "step": 35591,
      "training_step_time": 0.38356566429138184
    },
    {
      "epoch": 0.000217236328125,
      "model_forward_time": 0.11568737030029297,
      "step": 35592
    },
    {
      "epoch": 0.000217236328125,
      "step": 35592,
      "training_step_time": 0.3946352005004883
    },
    {
      "epoch": 0.000217242431640625,
      "model_forward_time": 0.1149449348449707,
      "step": 35593
    },
    {
      "epoch": 0.000217242431640625,
      "step": 35593,
      "training_step_time": 0.3924565315246582
    },
    {
      "epoch": 0.00021724853515625,
      "model_forward_time": 0.11535072326660156,
      "step": 35594
    },
    {
      "epoch": 0.00021724853515625,
      "step": 35594,
      "training_step_time": 0.38590097427368164
    },
    {
      "epoch": 0.000217254638671875,
      "model_forward_time": 0.1150062084197998,
      "step": 35595
    },
    {
      "epoch": 0.000217254638671875,
      "step": 35595,
      "training_step_time": 0.39087700843811035
    },
    {
      "epoch": 0.0002172607421875,
      "model_forward_time": 0.11536026000976562,
      "step": 35596
    },
    {
      "epoch": 0.0002172607421875,
      "step": 35596,
      "training_step_time": 0.9148166179656982
    },
    {
      "epoch": 0.000217266845703125,
      "model_forward_time": 0.11548447608947754,
      "step": 35597
    },
    {
      "epoch": 0.000217266845703125,
      "step": 35597,
      "training_step_time": 0.375244140625
    },
    {
      "epoch": 0.00021727294921875,
      "model_forward_time": 0.11537051200866699,
      "step": 35598
    },
    {
      "epoch": 0.00021727294921875,
      "step": 35598,
      "training_step_time": 0.4329235553741455
    },
    {
      "epoch": 0.000217279052734375,
      "model_forward_time": 0.11460280418395996,
      "step": 35599
    },
    {
      "epoch": 0.000217279052734375,
      "step": 35599,
      "training_step_time": 0.43456292152404785
    },
    {
      "epoch": 0.00021728515625,
      "grad_norm": 0.13071689009666443,
      "learning_rate": 3.879720456342521e-05,
      "loss": 0.0394,
      "step": 35600
    },
    {
      "epoch": 0.00021728515625,
      "model_forward_time": 0.12079215049743652,
      "step": 35600
    },
    {
      "epoch": 0.00021728515625,
      "step": 35600,
      "training_step_time": 0.38480567932128906
    },
    {
      "epoch": 0.000217291259765625,
      "model_forward_time": 0.11443829536437988,
      "step": 35601
    },
    {
      "epoch": 0.000217291259765625,
      "step": 35601,
      "training_step_time": 0.38187742233276367
    },
    {
      "epoch": 0.00021729736328125,
      "model_forward_time": 0.11503434181213379,
      "step": 35602
    },
    {
      "epoch": 0.00021729736328125,
      "step": 35602,
      "training_step_time": 0.46146464347839355
    },
    {
      "epoch": 0.000217303466796875,
      "model_forward_time": 0.11515641212463379,
      "step": 35603
    },
    {
      "epoch": 0.000217303466796875,
      "step": 35603,
      "training_step_time": 0.4114711284637451
    },
    {
      "epoch": 0.0002173095703125,
      "model_forward_time": 0.11451554298400879,
      "step": 35604
    },
    {
      "epoch": 0.0002173095703125,
      "step": 35604,
      "training_step_time": 0.4685375690460205
    },
    {
      "epoch": 0.000217315673828125,
      "model_forward_time": 0.11567926406860352,
      "step": 35605
    },
    {
      "epoch": 0.000217315673828125,
      "step": 35605,
      "training_step_time": 0.39971375465393066
    },
    {
      "epoch": 0.00021732177734375,
      "model_forward_time": 0.11477088928222656,
      "step": 35606
    },
    {
      "epoch": 0.00021732177734375,
      "step": 35606,
      "training_step_time": 0.38680243492126465
    },
    {
      "epoch": 0.000217327880859375,
      "model_forward_time": 0.1154329776763916,
      "step": 35607
    },
    {
      "epoch": 0.000217327880859375,
      "step": 35607,
      "training_step_time": 0.38730740547180176
    },
    {
      "epoch": 0.000217333984375,
      "model_forward_time": 0.11566805839538574,
      "step": 35608
    },
    {
      "epoch": 0.000217333984375,
      "step": 35608,
      "training_step_time": 0.6809961795806885
    },
    {
      "epoch": 0.000217340087890625,
      "model_forward_time": 0.1145925521850586,
      "step": 35609
    },
    {
      "epoch": 0.000217340087890625,
      "step": 35609,
      "training_step_time": 0.39420247077941895
    },
    {
      "epoch": 0.00021734619140625,
      "grad_norm": 0.1627989262342453,
      "learning_rate": 3.877034905826577e-05,
      "loss": 0.0401,
      "step": 35610
    },
    {
      "epoch": 0.00021734619140625,
      "model_forward_time": 0.11505913734436035,
      "step": 35610
    },
    {
      "epoch": 0.00021734619140625,
      "step": 35610,
      "training_step_time": 0.4110598564147949
    },
    {
      "epoch": 0.000217352294921875,
      "model_forward_time": 0.11476397514343262,
      "step": 35611
    },
    {
      "epoch": 0.000217352294921875,
      "step": 35611,
      "training_step_time": 0.3611111640930176
    },
    {
      "epoch": 0.0002173583984375,
      "model_forward_time": 0.11479330062866211,
      "step": 35612
    },
    {
      "epoch": 0.0002173583984375,
      "step": 35612,
      "training_step_time": 0.416379451751709
    },
    {
      "epoch": 0.000217364501953125,
      "model_forward_time": 0.11577320098876953,
      "step": 35613
    },
    {
      "epoch": 0.000217364501953125,
      "step": 35613,
      "training_step_time": 0.39917445182800293
    },
    {
      "epoch": 0.00021737060546875,
      "model_forward_time": 0.11567854881286621,
      "step": 35614
    },
    {
      "epoch": 0.00021737060546875,
      "step": 35614,
      "training_step_time": 0.9247584342956543
    },
    {
      "epoch": 0.000217376708984375,
      "model_forward_time": 0.11574482917785645,
      "step": 35615
    },
    {
      "epoch": 0.000217376708984375,
      "step": 35615,
      "training_step_time": 0.622567892074585
    },
    {
      "epoch": 0.0002173828125,
      "model_forward_time": 0.11909365653991699,
      "step": 35616
    },
    {
      "epoch": 0.0002173828125,
      "step": 35616,
      "training_step_time": 0.7001018524169922
    },
    {
      "epoch": 0.000217388916015625,
      "model_forward_time": 0.11591958999633789,
      "step": 35617
    },
    {
      "epoch": 0.000217388916015625,
      "step": 35617,
      "training_step_time": 0.7179820537567139
    },
    {
      "epoch": 0.00021739501953125,
      "model_forward_time": 0.11603403091430664,
      "step": 35618
    },
    {
      "epoch": 0.00021739501953125,
      "step": 35618,
      "training_step_time": 0.6710860729217529
    },
    {
      "epoch": 0.000217401123046875,
      "model_forward_time": 0.12082433700561523,
      "step": 35619
    },
    {
      "epoch": 0.000217401123046875,
      "step": 35619,
      "training_step_time": 0.7057733535766602
    },
    {
      "epoch": 0.0002174072265625,
      "grad_norm": 0.1265566051006317,
      "learning_rate": 3.87434969643778e-05,
      "loss": 0.0375,
      "step": 35620
    },
    {
      "epoch": 0.0002174072265625,
      "model_forward_time": 0.11946606636047363,
      "step": 35620
    },
    {
      "epoch": 0.0002174072265625,
      "step": 35620,
      "training_step_time": 0.6774330139160156
    },
    {
      "epoch": 0.000217413330078125,
      "model_forward_time": 0.12112975120544434,
      "step": 35621
    },
    {
      "epoch": 0.000217413330078125,
      "step": 35621,
      "training_step_time": 0.5994882583618164
    },
    {
      "epoch": 0.00021741943359375,
      "model_forward_time": 0.12202000617980957,
      "step": 35622
    },
    {
      "epoch": 0.00021741943359375,
      "step": 35622,
      "training_step_time": 0.6588547229766846
    },
    {
      "epoch": 0.000217425537109375,
      "model_forward_time": 0.13112139701843262,
      "step": 35623
    },
    {
      "epoch": 0.000217425537109375,
      "step": 35623,
      "training_step_time": 0.6737155914306641
    },
    {
      "epoch": 0.000217431640625,
      "model_forward_time": 0.12097573280334473,
      "step": 35624
    },
    {
      "epoch": 0.000217431640625,
      "step": 35624,
      "training_step_time": 0.6552157402038574
    },
    {
      "epoch": 0.000217437744140625,
      "model_forward_time": 0.11704158782958984,
      "step": 35625
    },
    {
      "epoch": 0.000217437744140625,
      "step": 35625,
      "training_step_time": 0.6495010852813721
    },
    {
      "epoch": 0.00021744384765625,
      "model_forward_time": 0.11970758438110352,
      "step": 35626
    },
    {
      "epoch": 0.00021744384765625,
      "step": 35626,
      "training_step_time": 0.6648764610290527
    },
    {
      "epoch": 0.000217449951171875,
      "model_forward_time": 0.11821269989013672,
      "step": 35627
    },
    {
      "epoch": 0.000217449951171875,
      "step": 35627,
      "training_step_time": 0.7126832008361816
    },
    {
      "epoch": 0.0002174560546875,
      "model_forward_time": 0.12280964851379395,
      "step": 35628
    },
    {
      "epoch": 0.0002174560546875,
      "step": 35628,
      "training_step_time": 0.6201448440551758
    },
    {
      "epoch": 0.000217462158203125,
      "model_forward_time": 0.12878799438476562,
      "step": 35629
    },
    {
      "epoch": 0.000217462158203125,
      "step": 35629,
      "training_step_time": 0.6979999542236328
    },
    {
      "epoch": 0.00021746826171875,
      "grad_norm": 0.11844093352556229,
      "learning_rate": 3.871664828991822e-05,
      "loss": 0.0425,
      "step": 35630
    },
    {
      "epoch": 0.00021746826171875,
      "model_forward_time": 0.11766910552978516,
      "step": 35630
    },
    {
      "epoch": 0.00021746826171875,
      "step": 35630,
      "training_step_time": 0.6800656318664551
    },
    {
      "epoch": 0.000217474365234375,
      "model_forward_time": 0.11818838119506836,
      "step": 35631
    },
    {
      "epoch": 0.000217474365234375,
      "step": 35631,
      "training_step_time": 0.677208423614502
    },
    {
      "epoch": 0.00021748046875,
      "model_forward_time": 0.12740731239318848,
      "step": 35632
    },
    {
      "epoch": 0.00021748046875,
      "step": 35632,
      "training_step_time": 0.7716188430786133
    },
    {
      "epoch": 0.000217486572265625,
      "model_forward_time": 0.11923646926879883,
      "step": 35633
    },
    {
      "epoch": 0.000217486572265625,
      "step": 35633,
      "training_step_time": 0.6853575706481934
    },
    {
      "epoch": 0.00021749267578125,
      "model_forward_time": 0.11835503578186035,
      "step": 35634
    },
    {
      "epoch": 0.00021749267578125,
      "step": 35634,
      "training_step_time": 0.7904794216156006
    },
    {
      "epoch": 0.000217498779296875,
      "model_forward_time": 0.1181640625,
      "step": 35635
    },
    {
      "epoch": 0.000217498779296875,
      "step": 35635,
      "training_step_time": 0.7181715965270996
    },
    {
      "epoch": 0.0002175048828125,
      "model_forward_time": 0.1169736385345459,
      "step": 35636
    },
    {
      "epoch": 0.0002175048828125,
      "step": 35636,
      "training_step_time": 0.6556081771850586
    },
    {
      "epoch": 0.000217510986328125,
      "model_forward_time": 0.11883854866027832,
      "step": 35637
    },
    {
      "epoch": 0.000217510986328125,
      "step": 35637,
      "training_step_time": 0.6514060497283936
    },
    {
      "epoch": 0.00021751708984375,
      "model_forward_time": 0.11567378044128418,
      "step": 35638
    },
    {
      "epoch": 0.00021751708984375,
      "step": 35638,
      "training_step_time": 0.6411764621734619
    },
    {
      "epoch": 0.000217523193359375,
      "model_forward_time": 0.11638355255126953,
      "step": 35639
    },
    {
      "epoch": 0.000217523193359375,
      "step": 35639,
      "training_step_time": 0.6313924789428711
    },
    {
      "epoch": 0.000217529296875,
      "grad_norm": 0.12459925562143326,
      "learning_rate": 3.8689803043043e-05,
      "loss": 0.051,
      "step": 35640
    },
    {
      "epoch": 0.000217529296875,
      "model_forward_time": 0.12041401863098145,
      "step": 35640
    },
    {
      "epoch": 0.000217529296875,
      "step": 35640,
      "training_step_time": 0.770427942276001
    },
    {
      "epoch": 0.000217535400390625,
      "model_forward_time": 0.11670041084289551,
      "step": 35641
    },
    {
      "epoch": 0.000217535400390625,
      "step": 35641,
      "training_step_time": 0.7801759243011475
    },
    {
      "epoch": 0.00021754150390625,
      "model_forward_time": 0.11969971656799316,
      "step": 35642
    },
    {
      "epoch": 0.00021754150390625,
      "step": 35642,
      "training_step_time": 0.6291084289550781
    },
    {
      "epoch": 0.000217547607421875,
      "model_forward_time": 0.1213681697845459,
      "step": 35643
    },
    {
      "epoch": 0.000217547607421875,
      "step": 35643,
      "training_step_time": 0.6896052360534668
    },
    {
      "epoch": 0.0002175537109375,
      "model_forward_time": 0.12298917770385742,
      "step": 35644
    },
    {
      "epoch": 0.0002175537109375,
      "step": 35644,
      "training_step_time": 0.6624062061309814
    },
    {
      "epoch": 0.000217559814453125,
      "model_forward_time": 0.11655664443969727,
      "step": 35645
    },
    {
      "epoch": 0.000217559814453125,
      "step": 35645,
      "training_step_time": 0.6958444118499756
    },
    {
      "epoch": 0.00021756591796875,
      "model_forward_time": 0.12019610404968262,
      "step": 35646
    },
    {
      "epoch": 0.00021756591796875,
      "step": 35646,
      "training_step_time": 0.693701982498169
    },
    {
      "epoch": 0.000217572021484375,
      "model_forward_time": 0.11823391914367676,
      "step": 35647
    },
    {
      "epoch": 0.000217572021484375,
      "step": 35647,
      "training_step_time": 0.6800632476806641
    },
    {
      "epoch": 0.000217578125,
      "model_forward_time": 0.11987686157226562,
      "step": 35648
    },
    {
      "epoch": 0.000217578125,
      "step": 35648,
      "training_step_time": 0.6650643348693848
    },
    {
      "epoch": 0.000217584228515625,
      "model_forward_time": 0.1179053783416748,
      "step": 35649
    },
    {
      "epoch": 0.000217584228515625,
      "step": 35649,
      "training_step_time": 0.7209289073944092
    },
    {
      "epoch": 0.00021759033203125,
      "grad_norm": 0.09953147917985916,
      "learning_rate": 3.866296123190696e-05,
      "loss": 0.0448,
      "step": 35650
    },
    {
      "epoch": 0.00021759033203125,
      "model_forward_time": 0.12269353866577148,
      "step": 35650
    },
    {
      "epoch": 0.00021759033203125,
      "step": 35650,
      "training_step_time": 0.7246365547180176
    },
    {
      "epoch": 0.000217596435546875,
      "model_forward_time": 0.11937403678894043,
      "step": 35651
    },
    {
      "epoch": 0.000217596435546875,
      "step": 35651,
      "training_step_time": 0.6507740020751953
    },
    {
      "epoch": 0.0002176025390625,
      "model_forward_time": 0.12957429885864258,
      "step": 35652
    },
    {
      "epoch": 0.0002176025390625,
      "step": 35652,
      "training_step_time": 0.6912963390350342
    },
    {
      "epoch": 0.000217608642578125,
      "model_forward_time": 0.12172746658325195,
      "step": 35653
    },
    {
      "epoch": 0.000217608642578125,
      "step": 35653,
      "training_step_time": 0.768826961517334
    },
    {
      "epoch": 0.00021761474609375,
      "model_forward_time": 0.11654543876647949,
      "step": 35654
    },
    {
      "epoch": 0.00021761474609375,
      "step": 35654,
      "training_step_time": 0.6512405872344971
    },
    {
      "epoch": 0.000217620849609375,
      "model_forward_time": 0.1188511848449707,
      "step": 35655
    },
    {
      "epoch": 0.000217620849609375,
      "step": 35655,
      "training_step_time": 0.6624729633331299
    },
    {
      "epoch": 0.000217626953125,
      "model_forward_time": 0.11707782745361328,
      "step": 35656
    },
    {
      "epoch": 0.000217626953125,
      "step": 35656,
      "training_step_time": 0.7018804550170898
    },
    {
      "epoch": 0.000217633056640625,
      "model_forward_time": 0.11768722534179688,
      "step": 35657
    },
    {
      "epoch": 0.000217633056640625,
      "step": 35657,
      "training_step_time": 0.6429245471954346
    },
    {
      "epoch": 0.00021763916015625,
      "model_forward_time": 0.12094664573669434,
      "step": 35658
    },
    {
      "epoch": 0.00021763916015625,
      "step": 35658,
      "training_step_time": 0.7234146595001221
    },
    {
      "epoch": 0.000217645263671875,
      "model_forward_time": 0.1348404884338379,
      "step": 35659
    },
    {
      "epoch": 0.000217645263671875,
      "step": 35659,
      "training_step_time": 0.7590036392211914
    },
    {
      "epoch": 0.0002176513671875,
      "grad_norm": 0.10568614304065704,
      "learning_rate": 3.863612286466396e-05,
      "loss": 0.0415,
      "step": 35660
    },
    {
      "epoch": 0.0002176513671875,
      "model_forward_time": 0.12140607833862305,
      "step": 35660
    },
    {
      "epoch": 0.0002176513671875,
      "step": 35660,
      "training_step_time": 0.7040958404541016
    },
    {
      "epoch": 0.000217657470703125,
      "model_forward_time": 0.11913371086120605,
      "step": 35661
    },
    {
      "epoch": 0.000217657470703125,
      "step": 35661,
      "training_step_time": 0.6500918865203857
    },
    {
      "epoch": 0.00021766357421875,
      "model_forward_time": 0.11821174621582031,
      "step": 35662
    },
    {
      "epoch": 0.00021766357421875,
      "step": 35662,
      "training_step_time": 0.6929323673248291
    },
    {
      "epoch": 0.000217669677734375,
      "model_forward_time": 0.12220454216003418,
      "step": 35663
    },
    {
      "epoch": 0.000217669677734375,
      "step": 35663,
      "training_step_time": 0.6866469383239746
    },
    {
      "epoch": 0.00021767578125,
      "model_forward_time": 0.12127280235290527,
      "step": 35664
    },
    {
      "epoch": 0.00021767578125,
      "step": 35664,
      "training_step_time": 0.6868538856506348
    },
    {
      "epoch": 0.000217681884765625,
      "model_forward_time": 0.11616992950439453,
      "step": 35665
    },
    {
      "epoch": 0.000217681884765625,
      "step": 35665,
      "training_step_time": 0.6760303974151611
    },
    {
      "epoch": 0.00021768798828125,
      "model_forward_time": 0.11937427520751953,
      "step": 35666
    },
    {
      "epoch": 0.00021768798828125,
      "step": 35666,
      "training_step_time": 0.6469831466674805
    },
    {
      "epoch": 0.000217694091796875,
      "model_forward_time": 0.1191706657409668,
      "step": 35667
    },
    {
      "epoch": 0.000217694091796875,
      "step": 35667,
      "training_step_time": 0.6489608287811279
    },
    {
      "epoch": 0.0002177001953125,
      "model_forward_time": 0.12097692489624023,
      "step": 35668
    },
    {
      "epoch": 0.0002177001953125,
      "step": 35668,
      "training_step_time": 0.6507008075714111
    },
    {
      "epoch": 0.000217706298828125,
      "model_forward_time": 0.1260218620300293,
      "step": 35669
    },
    {
      "epoch": 0.000217706298828125,
      "step": 35669,
      "training_step_time": 0.7447550296783447
    },
    {
      "epoch": 0.00021771240234375,
      "grad_norm": 0.1401629000902176,
      "learning_rate": 3.860928794946682e-05,
      "loss": 0.0488,
      "step": 35670
    },
    {
      "epoch": 0.00021771240234375,
      "model_forward_time": 0.12113809585571289,
      "step": 35670
    },
    {
      "epoch": 0.00021771240234375,
      "step": 35670,
      "training_step_time": 0.6802172660827637
    },
    {
      "epoch": 0.000217718505859375,
      "model_forward_time": 0.11823844909667969,
      "step": 35671
    },
    {
      "epoch": 0.000217718505859375,
      "step": 35671,
      "training_step_time": 0.7190940380096436
    },
    {
      "epoch": 0.000217724609375,
      "model_forward_time": 0.11864638328552246,
      "step": 35672
    },
    {
      "epoch": 0.000217724609375,
      "step": 35672,
      "training_step_time": 0.6584148406982422
    },
    {
      "epoch": 0.000217730712890625,
      "model_forward_time": 0.11663365364074707,
      "step": 35673
    },
    {
      "epoch": 0.000217730712890625,
      "step": 35673,
      "training_step_time": 0.6330394744873047
    },
    {
      "epoch": 0.00021773681640625,
      "model_forward_time": 0.1322643756866455,
      "step": 35674
    },
    {
      "epoch": 0.00021773681640625,
      "step": 35674,
      "training_step_time": 0.712893009185791
    },
    {
      "epoch": 0.000217742919921875,
      "model_forward_time": 0.11847853660583496,
      "step": 35675
    },
    {
      "epoch": 0.000217742919921875,
      "step": 35675,
      "training_step_time": 0.6077752113342285
    },
    {
      "epoch": 0.0002177490234375,
      "model_forward_time": 0.11813068389892578,
      "step": 35676
    },
    {
      "epoch": 0.0002177490234375,
      "step": 35676,
      "training_step_time": 0.6617906093597412
    },
    {
      "epoch": 0.000217755126953125,
      "model_forward_time": 0.11969232559204102,
      "step": 35677
    },
    {
      "epoch": 0.000217755126953125,
      "step": 35677,
      "training_step_time": 0.662938117980957
    },
    {
      "epoch": 0.00021776123046875,
      "model_forward_time": 0.11952972412109375,
      "step": 35678
    },
    {
      "epoch": 0.00021776123046875,
      "step": 35678,
      "training_step_time": 0.7401480674743652
    },
    {
      "epoch": 0.000217767333984375,
      "model_forward_time": 0.12124061584472656,
      "step": 35679
    },
    {
      "epoch": 0.000217767333984375,
      "step": 35679,
      "training_step_time": 0.7199063301086426
    },
    {
      "epoch": 0.0002177734375,
      "grad_norm": 0.12679380178451538,
      "learning_rate": 3.858245649446721e-05,
      "loss": 0.0454,
      "step": 35680
    },
    {
      "epoch": 0.0002177734375,
      "model_forward_time": 0.11855936050415039,
      "step": 35680
    },
    {
      "epoch": 0.0002177734375,
      "step": 35680,
      "training_step_time": 0.6797544956207275
    },
    {
      "epoch": 0.000217779541015625,
      "model_forward_time": 0.12139081954956055,
      "step": 35681
    },
    {
      "epoch": 0.000217779541015625,
      "step": 35681,
      "training_step_time": 0.564359188079834
    },
    {
      "epoch": 0.00021778564453125,
      "model_forward_time": 0.12070560455322266,
      "step": 35682
    },
    {
      "epoch": 0.00021778564453125,
      "step": 35682,
      "training_step_time": 0.6125061511993408
    },
    {
      "epoch": 0.000217791748046875,
      "model_forward_time": 0.11768460273742676,
      "step": 35683
    },
    {
      "epoch": 0.000217791748046875,
      "step": 35683,
      "training_step_time": 0.5092051029205322
    },
    {
      "epoch": 0.0002177978515625,
      "model_forward_time": 0.1173396110534668,
      "step": 35684
    },
    {
      "epoch": 0.0002177978515625,
      "step": 35684,
      "training_step_time": 0.42493510246276855
    },
    {
      "epoch": 0.000217803955078125,
      "model_forward_time": 0.11720061302185059,
      "step": 35685
    },
    {
      "epoch": 0.000217803955078125,
      "step": 35685,
      "training_step_time": 0.44728779792785645
    },
    {
      "epoch": 0.00021781005859375,
      "model_forward_time": 0.11646199226379395,
      "step": 35686
    },
    {
      "epoch": 0.00021781005859375,
      "step": 35686,
      "training_step_time": 0.4391484260559082
    },
    {
      "epoch": 0.000217816162109375,
      "model_forward_time": 0.11739468574523926,
      "step": 35687
    },
    {
      "epoch": 0.000217816162109375,
      "step": 35687,
      "training_step_time": 0.4360783100128174
    },
    {
      "epoch": 0.000217822265625,
      "model_forward_time": 0.1166222095489502,
      "step": 35688
    },
    {
      "epoch": 0.000217822265625,
      "step": 35688,
      "training_step_time": 0.41503310203552246
    },
    {
      "epoch": 0.000217828369140625,
      "model_forward_time": 0.11670804023742676,
      "step": 35689
    },
    {
      "epoch": 0.000217828369140625,
      "step": 35689,
      "training_step_time": 0.4087698459625244
    },
    {
      "epoch": 0.00021783447265625,
      "grad_norm": 0.13767865300178528,
      "learning_rate": 3.855562850781589e-05,
      "loss": 0.0477,
      "step": 35690
    },
    {
      "epoch": 0.00021783447265625,
      "model_forward_time": 0.11663436889648438,
      "step": 35690
    },
    {
      "epoch": 0.00021783447265625,
      "step": 35690,
      "training_step_time": 0.438126802444458
    },
    {
      "epoch": 0.000217840576171875,
      "model_forward_time": 0.11553764343261719,
      "step": 35691
    },
    {
      "epoch": 0.000217840576171875,
      "step": 35691,
      "training_step_time": 0.421661376953125
    },
    {
      "epoch": 0.0002178466796875,
      "model_forward_time": 0.11537671089172363,
      "step": 35692
    },
    {
      "epoch": 0.0002178466796875,
      "step": 35692,
      "training_step_time": 0.4508955478668213
    },
    {
      "epoch": 0.000217852783203125,
      "model_forward_time": 0.115142822265625,
      "step": 35693
    },
    {
      "epoch": 0.000217852783203125,
      "step": 35693,
      "training_step_time": 0.37868809700012207
    },
    {
      "epoch": 0.00021785888671875,
      "model_forward_time": 0.11507821083068848,
      "step": 35694
    },
    {
      "epoch": 0.00021785888671875,
      "step": 35694,
      "training_step_time": 0.4803445339202881
    },
    {
      "epoch": 0.000217864990234375,
      "model_forward_time": 0.11516022682189941,
      "step": 35695
    },
    {
      "epoch": 0.000217864990234375,
      "step": 35695,
      "training_step_time": 0.4229865074157715
    },
    {
      "epoch": 0.00021787109375,
      "model_forward_time": 0.11471676826477051,
      "step": 35696
    },
    {
      "epoch": 0.00021787109375,
      "step": 35696,
      "training_step_time": 0.4208664894104004
    },
    {
      "epoch": 0.000217877197265625,
      "model_forward_time": 0.11579656600952148,
      "step": 35697
    },
    {
      "epoch": 0.000217877197265625,
      "step": 35697,
      "training_step_time": 0.41071248054504395
    },
    {
      "epoch": 0.00021788330078125,
      "model_forward_time": 0.11491751670837402,
      "step": 35698
    },
    {
      "epoch": 0.00021788330078125,
      "step": 35698,
      "training_step_time": 0.4028029441833496
    },
    {
      "epoch": 0.000217889404296875,
      "model_forward_time": 0.12027740478515625,
      "step": 35699
    },
    {
      "epoch": 0.000217889404296875,
      "step": 35699,
      "training_step_time": 0.38234663009643555
    },
    {
      "epoch": 0.0002178955078125,
      "grad_norm": 0.13448412716388702,
      "learning_rate": 3.852880399766243e-05,
      "loss": 0.0466,
      "step": 35700
    },
    {
      "epoch": 0.0002178955078125,
      "model_forward_time": 0.11529135704040527,
      "step": 35700
    },
    {
      "epoch": 0.0002178955078125,
      "step": 35700,
      "training_step_time": 0.3870728015899658
    },
    {
      "epoch": 0.000217901611328125,
      "model_forward_time": 0.11536478996276855,
      "step": 35701
    },
    {
      "epoch": 0.000217901611328125,
      "step": 35701,
      "training_step_time": 0.40320873260498047
    },
    {
      "epoch": 0.00021790771484375,
      "model_forward_time": 0.11519503593444824,
      "step": 35702
    },
    {
      "epoch": 0.00021790771484375,
      "step": 35702,
      "training_step_time": 0.3914937973022461
    },
    {
      "epoch": 0.000217913818359375,
      "model_forward_time": 0.11585378646850586,
      "step": 35703
    },
    {
      "epoch": 0.000217913818359375,
      "step": 35703,
      "training_step_time": 0.3858177661895752
    },
    {
      "epoch": 0.000217919921875,
      "model_forward_time": 0.11588716506958008,
      "step": 35704
    },
    {
      "epoch": 0.000217919921875,
      "step": 35704,
      "training_step_time": 0.4415743350982666
    },
    {
      "epoch": 0.000217926025390625,
      "model_forward_time": 0.11596345901489258,
      "step": 35705
    },
    {
      "epoch": 0.000217926025390625,
      "step": 35705,
      "training_step_time": 0.45928072929382324
    },
    {
      "epoch": 0.00021793212890625,
      "model_forward_time": 0.11485791206359863,
      "step": 35706
    },
    {
      "epoch": 0.00021793212890625,
      "step": 35706,
      "training_step_time": 0.5073931217193604
    },
    {
      "epoch": 0.000217938232421875,
      "model_forward_time": 0.11549091339111328,
      "step": 35707
    },
    {
      "epoch": 0.000217938232421875,
      "step": 35707,
      "training_step_time": 0.42856645584106445
    },
    {
      "epoch": 0.0002179443359375,
      "model_forward_time": 0.11521196365356445,
      "step": 35708
    },
    {
      "epoch": 0.0002179443359375,
      "step": 35708,
      "training_step_time": 0.4891035556793213
    },
    {
      "epoch": 0.000217950439453125,
      "model_forward_time": 0.11508417129516602,
      "step": 35709
    },
    {
      "epoch": 0.000217950439453125,
      "step": 35709,
      "training_step_time": 0.42932891845703125
    },
    {
      "epoch": 0.00021795654296875,
      "grad_norm": 0.13680368661880493,
      "learning_rate": 3.850198297215543e-05,
      "loss": 0.0444,
      "step": 35710
    },
    {
      "epoch": 0.00021795654296875,
      "model_forward_time": 0.11499190330505371,
      "step": 35710
    },
    {
      "epoch": 0.00021795654296875,
      "step": 35710,
      "training_step_time": 0.47092342376708984
    },
    {
      "epoch": 0.000217962646484375,
      "model_forward_time": 0.11485838890075684,
      "step": 35711
    },
    {
      "epoch": 0.000217962646484375,
      "step": 35711,
      "training_step_time": 0.3716466426849365
    },
    {
      "epoch": 0.00021796875,
      "model_forward_time": 0.11493921279907227,
      "step": 35712
    },
    {
      "epoch": 0.00021796875,
      "step": 35712,
      "training_step_time": 0.3993866443634033
    },
    {
      "epoch": 0.000217974853515625,
      "model_forward_time": 0.11453557014465332,
      "step": 35713
    },
    {
      "epoch": 0.000217974853515625,
      "step": 35713,
      "training_step_time": 0.40546488761901855
    },
    {
      "epoch": 0.00021798095703125,
      "model_forward_time": 0.1149442195892334,
      "step": 35714
    },
    {
      "epoch": 0.00021798095703125,
      "step": 35714,
      "training_step_time": 0.38986945152282715
    },
    {
      "epoch": 0.000217987060546875,
      "model_forward_time": 0.11571884155273438,
      "step": 35715
    },
    {
      "epoch": 0.000217987060546875,
      "step": 35715,
      "training_step_time": 0.3919398784637451
    },
    {
      "epoch": 0.0002179931640625,
      "model_forward_time": 0.11487460136413574,
      "step": 35716
    },
    {
      "epoch": 0.0002179931640625,
      "step": 35716,
      "training_step_time": 0.3850719928741455
    },
    {
      "epoch": 0.000217999267578125,
      "model_forward_time": 0.11528563499450684,
      "step": 35717
    },
    {
      "epoch": 0.000217999267578125,
      "step": 35717,
      "training_step_time": 0.3918039798736572
    },
    {
      "epoch": 0.00021800537109375,
      "model_forward_time": 0.11520242691040039,
      "step": 35718
    },
    {
      "epoch": 0.00021800537109375,
      "step": 35718,
      "training_step_time": 0.40183520317077637
    },
    {
      "epoch": 0.000218011474609375,
      "model_forward_time": 0.11578488349914551,
      "step": 35719
    },
    {
      "epoch": 0.000218011474609375,
      "step": 35719,
      "training_step_time": 0.4286487102508545
    },
    {
      "epoch": 0.000218017578125,
      "grad_norm": 0.11953434348106384,
      "learning_rate": 3.8475165439442446e-05,
      "loss": 0.044,
      "step": 35720
    },
    {
      "epoch": 0.000218017578125,
      "model_forward_time": 0.1151583194732666,
      "step": 35720
    },
    {
      "epoch": 0.000218017578125,
      "step": 35720,
      "training_step_time": 0.472853422164917
    },
    {
      "epoch": 0.000218023681640625,
      "model_forward_time": 0.11467719078063965,
      "step": 35721
    },
    {
      "epoch": 0.000218023681640625,
      "step": 35721,
      "training_step_time": 0.5131886005401611
    },
    {
      "epoch": 0.00021802978515625,
      "model_forward_time": 0.11528277397155762,
      "step": 35722
    },
    {
      "epoch": 0.00021802978515625,
      "step": 35722,
      "training_step_time": 0.42563319206237793
    },
    {
      "epoch": 0.000218035888671875,
      "model_forward_time": 0.11442184448242188,
      "step": 35723
    },
    {
      "epoch": 0.000218035888671875,
      "step": 35723,
      "training_step_time": 0.4108119010925293
    },
    {
      "epoch": 0.0002180419921875,
      "model_forward_time": 0.11501336097717285,
      "step": 35724
    },
    {
      "epoch": 0.0002180419921875,
      "step": 35724,
      "training_step_time": 0.42055201530456543
    },
    {
      "epoch": 0.000218048095703125,
      "model_forward_time": 0.11438393592834473,
      "step": 35725
    },
    {
      "epoch": 0.000218048095703125,
      "step": 35725,
      "training_step_time": 0.3938910961151123
    },
    {
      "epoch": 0.00021805419921875,
      "model_forward_time": 0.11566448211669922,
      "step": 35726
    },
    {
      "epoch": 0.00021805419921875,
      "step": 35726,
      "training_step_time": 0.39126014709472656
    },
    {
      "epoch": 0.000218060302734375,
      "model_forward_time": 0.11505746841430664,
      "step": 35727
    },
    {
      "epoch": 0.000218060302734375,
      "step": 35727,
      "training_step_time": 0.39951229095458984
    },
    {
      "epoch": 0.00021806640625,
      "model_forward_time": 0.11488556861877441,
      "step": 35728
    },
    {
      "epoch": 0.00021806640625,
      "step": 35728,
      "training_step_time": 0.40082454681396484
    },
    {
      "epoch": 0.000218072509765625,
      "model_forward_time": 0.11541438102722168,
      "step": 35729
    },
    {
      "epoch": 0.000218072509765625,
      "step": 35729,
      "training_step_time": 0.4167366027832031
    },
    {
      "epoch": 0.00021807861328125,
      "grad_norm": 0.14147675037384033,
      "learning_rate": 3.844835140766988e-05,
      "loss": 0.048,
      "step": 35730
    },
    {
      "epoch": 0.00021807861328125,
      "model_forward_time": 0.11467742919921875,
      "step": 35730
    },
    {
      "epoch": 0.00021807861328125,
      "step": 35730,
      "training_step_time": 0.39564990997314453
    },
    {
      "epoch": 0.000218084716796875,
      "model_forward_time": 0.1156625747680664,
      "step": 35731
    },
    {
      "epoch": 0.000218084716796875,
      "step": 35731,
      "training_step_time": 0.3875107765197754
    },
    {
      "epoch": 0.0002180908203125,
      "model_forward_time": 0.11576652526855469,
      "step": 35732
    },
    {
      "epoch": 0.0002180908203125,
      "step": 35732,
      "training_step_time": 0.39046549797058105
    },
    {
      "epoch": 0.000218096923828125,
      "model_forward_time": 0.11539769172668457,
      "step": 35733
    },
    {
      "epoch": 0.000218096923828125,
      "step": 35733,
      "training_step_time": 0.4694540500640869
    },
    {
      "epoch": 0.00021810302734375,
      "model_forward_time": 0.11510729789733887,
      "step": 35734
    },
    {
      "epoch": 0.00021810302734375,
      "step": 35734,
      "training_step_time": 0.3707003593444824
    },
    {
      "epoch": 0.000218109130859375,
      "model_forward_time": 0.13266372680664062,
      "step": 35735
    },
    {
      "epoch": 0.000218109130859375,
      "step": 35735,
      "training_step_time": 0.46779918670654297
    },
    {
      "epoch": 0.000218115234375,
      "model_forward_time": 0.11496329307556152,
      "step": 35736
    },
    {
      "epoch": 0.000218115234375,
      "step": 35736,
      "training_step_time": 0.40106844902038574
    },
    {
      "epoch": 0.000218121337890625,
      "model_forward_time": 0.11433100700378418,
      "step": 35737
    },
    {
      "epoch": 0.000218121337890625,
      "step": 35737,
      "training_step_time": 0.4189412593841553
    },
    {
      "epoch": 0.00021812744140625,
      "model_forward_time": 0.11542439460754395,
      "step": 35738
    },
    {
      "epoch": 0.00021812744140625,
      "step": 35738,
      "training_step_time": 0.49395322799682617
    },
    {
      "epoch": 0.000218133544921875,
      "model_forward_time": 0.11456012725830078,
      "step": 35739
    },
    {
      "epoch": 0.000218133544921875,
      "step": 35739,
      "training_step_time": 0.40648531913757324
    },
    {
      "epoch": 0.0002181396484375,
      "grad_norm": 0.11266127228736877,
      "learning_rate": 3.842154088498316e-05,
      "loss": 0.04,
      "step": 35740
    },
    {
      "epoch": 0.0002181396484375,
      "model_forward_time": 0.11577081680297852,
      "step": 35740
    },
    {
      "epoch": 0.0002181396484375,
      "step": 35740,
      "training_step_time": 0.3884263038635254
    },
    {
      "epoch": 0.000218145751953125,
      "model_forward_time": 0.11612749099731445,
      "step": 35741
    },
    {
      "epoch": 0.000218145751953125,
      "step": 35741,
      "training_step_time": 0.3977506160736084
    },
    {
      "epoch": 0.00021815185546875,
      "model_forward_time": 0.1148078441619873,
      "step": 35742
    },
    {
      "epoch": 0.00021815185546875,
      "step": 35742,
      "training_step_time": 0.39234447479248047
    },
    {
      "epoch": 0.000218157958984375,
      "model_forward_time": 0.11510610580444336,
      "step": 35743
    },
    {
      "epoch": 0.000218157958984375,
      "step": 35743,
      "training_step_time": 0.39632248878479004
    },
    {
      "epoch": 0.0002181640625,
      "model_forward_time": 0.11554598808288574,
      "step": 35744
    },
    {
      "epoch": 0.0002181640625,
      "step": 35744,
      "training_step_time": 0.38844776153564453
    },
    {
      "epoch": 0.000218170166015625,
      "model_forward_time": 0.11624908447265625,
      "step": 35745
    },
    {
      "epoch": 0.000218170166015625,
      "step": 35745,
      "training_step_time": 0.39226460456848145
    },
    {
      "epoch": 0.00021817626953125,
      "model_forward_time": 0.11564397811889648,
      "step": 35746
    },
    {
      "epoch": 0.00021817626953125,
      "step": 35746,
      "training_step_time": 0.3883018493652344
    },
    {
      "epoch": 0.000218182373046875,
      "model_forward_time": 0.11605095863342285,
      "step": 35747
    },
    {
      "epoch": 0.000218182373046875,
      "step": 35747,
      "training_step_time": 0.5054044723510742
    },
    {
      "epoch": 0.0002181884765625,
      "model_forward_time": 0.11533093452453613,
      "step": 35748
    },
    {
      "epoch": 0.0002181884765625,
      "step": 35748,
      "training_step_time": 0.44742608070373535
    },
    {
      "epoch": 0.000218194580078125,
      "model_forward_time": 0.11592459678649902,
      "step": 35749
    },
    {
      "epoch": 0.000218194580078125,
      "step": 35749,
      "training_step_time": 0.48534178733825684
    },
    {
      "epoch": 0.00021820068359375,
      "grad_norm": 0.09724124521017075,
      "learning_rate": 3.839473387952662e-05,
      "loss": 0.0383,
      "step": 35750
    },
    {
      "epoch": 0.00021820068359375,
      "model_forward_time": 0.11578726768493652,
      "step": 35750
    },
    {
      "epoch": 0.00021820068359375,
      "step": 35750,
      "training_step_time": 0.4537067413330078
    },
    {
      "epoch": 0.000218206787109375,
      "model_forward_time": 0.11662077903747559,
      "step": 35751
    },
    {
      "epoch": 0.000218206787109375,
      "step": 35751,
      "training_step_time": 0.4792368412017822
    },
    {
      "epoch": 0.000218212890625,
      "model_forward_time": 0.11459636688232422,
      "step": 35752
    },
    {
      "epoch": 0.000218212890625,
      "step": 35752,
      "training_step_time": 0.48439455032348633
    },
    {
      "epoch": 0.000218218994140625,
      "model_forward_time": 0.11484074592590332,
      "step": 35753
    },
    {
      "epoch": 0.000218218994140625,
      "step": 35753,
      "training_step_time": 0.39404964447021484
    },
    {
      "epoch": 0.00021822509765625,
      "model_forward_time": 0.11557745933532715,
      "step": 35754
    },
    {
      "epoch": 0.00021822509765625,
      "step": 35754,
      "training_step_time": 0.38542866706848145
    },
    {
      "epoch": 0.000218231201171875,
      "model_forward_time": 0.11474084854125977,
      "step": 35755
    },
    {
      "epoch": 0.000218231201171875,
      "step": 35755,
      "training_step_time": 0.39072299003601074
    },
    {
      "epoch": 0.0002182373046875,
      "model_forward_time": 0.11484146118164062,
      "step": 35756
    },
    {
      "epoch": 0.0002182373046875,
      "step": 35756,
      "training_step_time": 0.3889446258544922
    },
    {
      "epoch": 0.000218243408203125,
      "model_forward_time": 0.1146554946899414,
      "step": 35757
    },
    {
      "epoch": 0.000218243408203125,
      "step": 35757,
      "training_step_time": 0.40212225914001465
    },
    {
      "epoch": 0.00021824951171875,
      "model_forward_time": 0.11543560028076172,
      "step": 35758
    },
    {
      "epoch": 0.00021824951171875,
      "step": 35758,
      "training_step_time": 0.3883821964263916
    },
    {
      "epoch": 0.000218255615234375,
      "model_forward_time": 0.12020635604858398,
      "step": 35759
    },
    {
      "epoch": 0.000218255615234375,
      "step": 35759,
      "training_step_time": 0.40984129905700684
    },
    {
      "epoch": 0.00021826171875,
      "grad_norm": 0.10926903784275055,
      "learning_rate": 3.836793039944349e-05,
      "loss": 0.0425,
      "step": 35760
    },
    {
      "epoch": 0.00021826171875,
      "model_forward_time": 0.11865520477294922,
      "step": 35760
    },
    {
      "epoch": 0.00021826171875,
      "step": 35760,
      "training_step_time": 0.38672471046447754
    },
    {
      "epoch": 0.000218267822265625,
      "model_forward_time": 0.1178889274597168,
      "step": 35761
    },
    {
      "epoch": 0.000218267822265625,
      "step": 35761,
      "training_step_time": 0.3906886577606201
    },
    {
      "epoch": 0.00021827392578125,
      "model_forward_time": 0.11610150337219238,
      "step": 35762
    },
    {
      "epoch": 0.00021827392578125,
      "step": 35762,
      "training_step_time": 0.4044766426086426
    },
    {
      "epoch": 0.000218280029296875,
      "model_forward_time": 0.11541247367858887,
      "step": 35763
    },
    {
      "epoch": 0.000218280029296875,
      "step": 35763,
      "training_step_time": 0.3806416988372803
    },
    {
      "epoch": 0.0002182861328125,
      "model_forward_time": 0.11762380599975586,
      "step": 35764
    },
    {
      "epoch": 0.0002182861328125,
      "step": 35764,
      "training_step_time": 0.43440675735473633
    },
    {
      "epoch": 0.000218292236328125,
      "model_forward_time": 0.11818575859069824,
      "step": 35765
    },
    {
      "epoch": 0.000218292236328125,
      "step": 35765,
      "training_step_time": 0.40171241760253906
    },
    {
      "epoch": 0.00021829833984375,
      "model_forward_time": 0.13197875022888184,
      "step": 35766
    },
    {
      "epoch": 0.00021829833984375,
      "step": 35766,
      "training_step_time": 0.4216601848602295
    },
    {
      "epoch": 0.000218304443359375,
      "model_forward_time": 0.11583471298217773,
      "step": 35767
    },
    {
      "epoch": 0.000218304443359375,
      "step": 35767,
      "training_step_time": 0.41823744773864746
    },
    {
      "epoch": 0.000218310546875,
      "model_forward_time": 0.11501026153564453,
      "step": 35768
    },
    {
      "epoch": 0.000218310546875,
      "step": 35768,
      "training_step_time": 0.3846414089202881
    },
    {
      "epoch": 0.000218316650390625,
      "model_forward_time": 0.11516094207763672,
      "step": 35769
    },
    {
      "epoch": 0.000218316650390625,
      "step": 35769,
      "training_step_time": 0.3929281234741211
    },
    {
      "epoch": 0.00021832275390625,
      "grad_norm": 0.09874359518289566,
      "learning_rate": 3.834113045287599e-05,
      "loss": 0.0408,
      "step": 35770
    },
    {
      "epoch": 0.00021832275390625,
      "model_forward_time": 0.11582779884338379,
      "step": 35770
    },
    {
      "epoch": 0.00021832275390625,
      "step": 35770,
      "training_step_time": 0.38905978202819824
    },
    {
      "epoch": 0.000218328857421875,
      "model_forward_time": 0.11538815498352051,
      "step": 35771
    },
    {
      "epoch": 0.000218328857421875,
      "step": 35771,
      "training_step_time": 0.39135313034057617
    },
    {
      "epoch": 0.0002183349609375,
      "model_forward_time": 0.11550760269165039,
      "step": 35772
    },
    {
      "epoch": 0.0002183349609375,
      "step": 35772,
      "training_step_time": 0.3979313373565674
    },
    {
      "epoch": 0.000218341064453125,
      "model_forward_time": 0.11513471603393555,
      "step": 35773
    },
    {
      "epoch": 0.000218341064453125,
      "step": 35773,
      "training_step_time": 0.39876556396484375
    },
    {
      "epoch": 0.00021834716796875,
      "model_forward_time": 0.11677122116088867,
      "step": 35774
    },
    {
      "epoch": 0.00021834716796875,
      "step": 35774,
      "training_step_time": 0.38083910942077637
    },
    {
      "epoch": 0.000218353271484375,
      "model_forward_time": 0.11537885665893555,
      "step": 35775
    },
    {
      "epoch": 0.000218353271484375,
      "step": 35775,
      "training_step_time": 0.3950388431549072
    },
    {
      "epoch": 0.000218359375,
      "model_forward_time": 0.11559104919433594,
      "step": 35776
    },
    {
      "epoch": 0.000218359375,
      "step": 35776,
      "training_step_time": 0.4245741367340088
    },
    {
      "epoch": 0.000218365478515625,
      "model_forward_time": 0.11908245086669922,
      "step": 35777
    },
    {
      "epoch": 0.000218365478515625,
      "step": 35777,
      "training_step_time": 0.4230077266693115
    },
    {
      "epoch": 0.00021837158203125,
      "model_forward_time": 0.12166547775268555,
      "step": 35778
    },
    {
      "epoch": 0.00021837158203125,
      "step": 35778,
      "training_step_time": 0.43810176849365234
    },
    {
      "epoch": 0.000218377685546875,
      "model_forward_time": 0.12014150619506836,
      "step": 35779
    },
    {
      "epoch": 0.000218377685546875,
      "step": 35779,
      "training_step_time": 0.4844486713409424
    },
    {
      "epoch": 0.0002183837890625,
      "grad_norm": 0.1136813759803772,
      "learning_rate": 3.831433404796521e-05,
      "loss": 0.0414,
      "step": 35780
    },
    {
      "epoch": 0.0002183837890625,
      "model_forward_time": 0.11960363388061523,
      "step": 35780
    },
    {
      "epoch": 0.0002183837890625,
      "step": 35780,
      "training_step_time": 0.4019479751586914
    },
    {
      "epoch": 0.000218389892578125,
      "model_forward_time": 0.1192164421081543,
      "step": 35781
    },
    {
      "epoch": 0.000218389892578125,
      "step": 35781,
      "training_step_time": 0.48998188972473145
    },
    {
      "epoch": 0.00021839599609375,
      "model_forward_time": 0.11983966827392578,
      "step": 35782
    },
    {
      "epoch": 0.00021839599609375,
      "step": 35782,
      "training_step_time": 0.38297200202941895
    },
    {
      "epoch": 0.000218402099609375,
      "model_forward_time": 0.11864185333251953,
      "step": 35783
    },
    {
      "epoch": 0.000218402099609375,
      "step": 35783,
      "training_step_time": 0.3783257007598877
    },
    {
      "epoch": 0.000218408203125,
      "model_forward_time": 0.11605334281921387,
      "step": 35784
    },
    {
      "epoch": 0.000218408203125,
      "step": 35784,
      "training_step_time": 0.38765525817871094
    },
    {
      "epoch": 0.000218414306640625,
      "model_forward_time": 0.1156454086303711,
      "step": 35785
    },
    {
      "epoch": 0.000218414306640625,
      "step": 35785,
      "training_step_time": 0.4072136878967285
    },
    {
      "epoch": 0.00021842041015625,
      "model_forward_time": 0.11494636535644531,
      "step": 35786
    },
    {
      "epoch": 0.00021842041015625,
      "step": 35786,
      "training_step_time": 0.39283156394958496
    },
    {
      "epoch": 0.000218426513671875,
      "model_forward_time": 0.11472821235656738,
      "step": 35787
    },
    {
      "epoch": 0.000218426513671875,
      "step": 35787,
      "training_step_time": 0.3918454647064209
    },
    {
      "epoch": 0.0002184326171875,
      "model_forward_time": 0.11597251892089844,
      "step": 35788
    },
    {
      "epoch": 0.0002184326171875,
      "step": 35788,
      "training_step_time": 0.3858475685119629
    },
    {
      "epoch": 0.000218438720703125,
      "model_forward_time": 0.11542963981628418,
      "step": 35789
    },
    {
      "epoch": 0.000218438720703125,
      "step": 35789,
      "training_step_time": 0.4022061824798584
    },
    {
      "epoch": 0.00021844482421875,
      "grad_norm": 0.12956686317920685,
      "learning_rate": 3.828754119285123e-05,
      "loss": 0.0417,
      "step": 35790
    },
    {
      "epoch": 0.00021844482421875,
      "model_forward_time": 0.11522459983825684,
      "step": 35790
    },
    {
      "epoch": 0.00021844482421875,
      "step": 35790,
      "training_step_time": 0.40683603286743164
    },
    {
      "epoch": 0.000218450927734375,
      "model_forward_time": 0.11568927764892578,
      "step": 35791
    },
    {
      "epoch": 0.000218450927734375,
      "step": 35791,
      "training_step_time": 0.4012136459350586
    },
    {
      "epoch": 0.00021845703125,
      "model_forward_time": 0.11557292938232422,
      "step": 35792
    },
    {
      "epoch": 0.00021845703125,
      "step": 35792,
      "training_step_time": 0.36429452896118164
    },
    {
      "epoch": 0.000218463134765625,
      "model_forward_time": 0.11537528038024902,
      "step": 35793
    },
    {
      "epoch": 0.000218463134765625,
      "step": 35793,
      "training_step_time": 0.4298126697540283
    },
    {
      "epoch": 0.00021846923828125,
      "model_forward_time": 0.11518669128417969,
      "step": 35794
    },
    {
      "epoch": 0.00021846923828125,
      "step": 35794,
      "training_step_time": 0.5022773742675781
    },
    {
      "epoch": 0.000218475341796875,
      "model_forward_time": 0.11489748954772949,
      "step": 35795
    },
    {
      "epoch": 0.000218475341796875,
      "step": 35795,
      "training_step_time": 0.37599635124206543
    },
    {
      "epoch": 0.0002184814453125,
      "model_forward_time": 0.1156008243560791,
      "step": 35796
    },
    {
      "epoch": 0.0002184814453125,
      "step": 35796,
      "training_step_time": 0.4491560459136963
    },
    {
      "epoch": 0.000218487548828125,
      "model_forward_time": 0.11584901809692383,
      "step": 35797
    },
    {
      "epoch": 0.000218487548828125,
      "step": 35797,
      "training_step_time": 0.38214540481567383
    },
    {
      "epoch": 0.00021849365234375,
      "model_forward_time": 0.11544036865234375,
      "step": 35798
    },
    {
      "epoch": 0.00021849365234375,
      "step": 35798,
      "training_step_time": 0.3743128776550293
    },
    {
      "epoch": 0.000218499755859375,
      "model_forward_time": 0.11497211456298828,
      "step": 35799
    },
    {
      "epoch": 0.000218499755859375,
      "step": 35799,
      "training_step_time": 0.4106631278991699
    },
    {
      "epoch": 0.000218505859375,
      "grad_norm": 0.09693099558353424,
      "learning_rate": 3.826075189567296e-05,
      "loss": 0.0393,
      "step": 35800
    },
    {
      "epoch": 0.000218505859375,
      "model_forward_time": 0.11535334587097168,
      "step": 35800
    },
    {
      "epoch": 0.000218505859375,
      "step": 35800,
      "training_step_time": 0.40282320976257324
    },
    {
      "epoch": 0.000218511962890625,
      "model_forward_time": 0.11495161056518555,
      "step": 35801
    },
    {
      "epoch": 0.000218511962890625,
      "step": 35801,
      "training_step_time": 0.37929868698120117
    },
    {
      "epoch": 0.00021851806640625,
      "model_forward_time": 0.1148843765258789,
      "step": 35802
    },
    {
      "epoch": 0.00021851806640625,
      "step": 35802,
      "training_step_time": 0.3805351257324219
    },
    {
      "epoch": 0.000218524169921875,
      "model_forward_time": 0.11574935913085938,
      "step": 35803
    },
    {
      "epoch": 0.000218524169921875,
      "step": 35803,
      "training_step_time": 0.5242133140563965
    },
    {
      "epoch": 0.0002185302734375,
      "model_forward_time": 0.11497879028320312,
      "step": 35804
    },
    {
      "epoch": 0.0002185302734375,
      "step": 35804,
      "training_step_time": 0.4061155319213867
    },
    {
      "epoch": 0.000218536376953125,
      "model_forward_time": 0.11575055122375488,
      "step": 35805
    },
    {
      "epoch": 0.000218536376953125,
      "step": 35805,
      "training_step_time": 0.4344449043273926
    },
    {
      "epoch": 0.00021854248046875,
      "model_forward_time": 0.11550474166870117,
      "step": 35806
    },
    {
      "epoch": 0.00021854248046875,
      "step": 35806,
      "training_step_time": 0.387225866317749
    },
    {
      "epoch": 0.000218548583984375,
      "model_forward_time": 0.11636042594909668,
      "step": 35807
    },
    {
      "epoch": 0.000218548583984375,
      "step": 35807,
      "training_step_time": 0.4145064353942871
    },
    {
      "epoch": 0.0002185546875,
      "model_forward_time": 0.11520671844482422,
      "step": 35808
    },
    {
      "epoch": 0.0002185546875,
      "step": 35808,
      "training_step_time": 0.48297762870788574
    },
    {
      "epoch": 0.000218560791015625,
      "model_forward_time": 0.1159520149230957,
      "step": 35809
    },
    {
      "epoch": 0.000218560791015625,
      "step": 35809,
      "training_step_time": 0.5430247783660889
    },
    {
      "epoch": 0.00021856689453125,
      "grad_norm": 0.1432449370622635,
      "learning_rate": 3.823396616456833e-05,
      "loss": 0.0436,
      "step": 35810
    },
    {
      "epoch": 0.00021856689453125,
      "model_forward_time": 0.11516809463500977,
      "step": 35810
    },
    {
      "epoch": 0.00021856689453125,
      "step": 35810,
      "training_step_time": 0.402271032333374
    },
    {
      "epoch": 0.000218572998046875,
      "model_forward_time": 0.11519575119018555,
      "step": 35811
    },
    {
      "epoch": 0.000218572998046875,
      "step": 35811,
      "training_step_time": 0.38965797424316406
    },
    {
      "epoch": 0.0002185791015625,
      "model_forward_time": 0.11518597602844238,
      "step": 35812
    },
    {
      "epoch": 0.0002185791015625,
      "step": 35812,
      "training_step_time": 0.3940083980560303
    },
    {
      "epoch": 0.000218585205078125,
      "model_forward_time": 0.11513829231262207,
      "step": 35813
    },
    {
      "epoch": 0.000218585205078125,
      "step": 35813,
      "training_step_time": 0.390521764755249
    },
    {
      "epoch": 0.00021859130859375,
      "model_forward_time": 0.11578488349914551,
      "step": 35814
    },
    {
      "epoch": 0.00021859130859375,
      "step": 35814,
      "training_step_time": 0.394956111907959
    },
    {
      "epoch": 0.000218597412109375,
      "model_forward_time": 0.11587142944335938,
      "step": 35815
    },
    {
      "epoch": 0.000218597412109375,
      "step": 35815,
      "training_step_time": 0.6259627342224121
    },
    {
      "epoch": 0.000218603515625,
      "model_forward_time": 0.11563897132873535,
      "step": 35816
    },
    {
      "epoch": 0.000218603515625,
      "step": 35816,
      "training_step_time": 0.3994717597961426
    },
    {
      "epoch": 0.000218609619140625,
      "model_forward_time": 0.11484646797180176,
      "step": 35817
    },
    {
      "epoch": 0.000218609619140625,
      "step": 35817,
      "training_step_time": 0.42109155654907227
    },
    {
      "epoch": 0.00021861572265625,
      "model_forward_time": 0.11542725563049316,
      "step": 35818
    },
    {
      "epoch": 0.00021861572265625,
      "step": 35818,
      "training_step_time": 0.39865589141845703
    },
    {
      "epoch": 0.000218621826171875,
      "model_forward_time": 0.11461019515991211,
      "step": 35819
    },
    {
      "epoch": 0.000218621826171875,
      "step": 35819,
      "training_step_time": 0.3887641429901123
    },
    {
      "epoch": 0.0002186279296875,
      "grad_norm": 0.08783381432294846,
      "learning_rate": 3.820718400767409e-05,
      "loss": 0.0388,
      "step": 35820
    },
    {
      "epoch": 0.0002186279296875,
      "model_forward_time": 0.11538219451904297,
      "step": 35820
    },
    {
      "epoch": 0.0002186279296875,
      "step": 35820,
      "training_step_time": 0.4046778678894043
    },
    {
      "epoch": 0.000218634033203125,
      "model_forward_time": 0.1153404712677002,
      "step": 35821
    },
    {
      "epoch": 0.000218634033203125,
      "step": 35821,
      "training_step_time": 0.7178566455841064
    },
    {
      "epoch": 0.00021864013671875,
      "model_forward_time": 0.11531352996826172,
      "step": 35822
    },
    {
      "epoch": 0.00021864013671875,
      "step": 35822,
      "training_step_time": 0.47649335861206055
    },
    {
      "epoch": 0.000218646240234375,
      "model_forward_time": 0.11491036415100098,
      "step": 35823
    },
    {
      "epoch": 0.000218646240234375,
      "step": 35823,
      "training_step_time": 0.42401814460754395
    },
    {
      "epoch": 0.00021865234375,
      "model_forward_time": 0.11482667922973633,
      "step": 35824
    },
    {
      "epoch": 0.00021865234375,
      "step": 35824,
      "training_step_time": 0.4022185802459717
    },
    {
      "epoch": 0.000218658447265625,
      "model_forward_time": 0.114349365234375,
      "step": 35825
    },
    {
      "epoch": 0.000218658447265625,
      "step": 35825,
      "training_step_time": 0.3942253589630127
    },
    {
      "epoch": 0.00021866455078125,
      "model_forward_time": 0.11478233337402344,
      "step": 35826
    },
    {
      "epoch": 0.00021866455078125,
      "step": 35826,
      "training_step_time": 0.3872988224029541
    },
    {
      "epoch": 0.000218670654296875,
      "model_forward_time": 0.11564183235168457,
      "step": 35827
    },
    {
      "epoch": 0.000218670654296875,
      "step": 35827,
      "training_step_time": 0.4138486385345459
    },
    {
      "epoch": 0.0002186767578125,
      "model_forward_time": 0.11512184143066406,
      "step": 35828
    },
    {
      "epoch": 0.0002186767578125,
      "step": 35828,
      "training_step_time": 0.3905162811279297
    },
    {
      "epoch": 0.000218682861328125,
      "model_forward_time": 0.1163170337677002,
      "step": 35829
    },
    {
      "epoch": 0.000218682861328125,
      "step": 35829,
      "training_step_time": 0.39577817916870117
    },
    {
      "epoch": 0.00021868896484375,
      "grad_norm": 0.13371123373508453,
      "learning_rate": 3.818040543312598e-05,
      "loss": 0.0465,
      "step": 35830
    },
    {
      "epoch": 0.00021868896484375,
      "model_forward_time": 0.1155393123626709,
      "step": 35830
    },
    {
      "epoch": 0.00021868896484375,
      "step": 35830,
      "training_step_time": 0.3937685489654541
    },
    {
      "epoch": 0.000218695068359375,
      "model_forward_time": 0.11555075645446777,
      "step": 35831
    },
    {
      "epoch": 0.000218695068359375,
      "step": 35831,
      "training_step_time": 0.4119541645050049
    },
    {
      "epoch": 0.000218701171875,
      "model_forward_time": 0.11559915542602539,
      "step": 35832
    },
    {
      "epoch": 0.000218701171875,
      "step": 35832,
      "training_step_time": 0.39847755432128906
    },
    {
      "epoch": 0.000218707275390625,
      "model_forward_time": 0.1157221794128418,
      "step": 35833
    },
    {
      "epoch": 0.000218707275390625,
      "step": 35833,
      "training_step_time": 0.5823471546173096
    },
    {
      "epoch": 0.00021871337890625,
      "model_forward_time": 0.11543512344360352,
      "step": 35834
    },
    {
      "epoch": 0.00021871337890625,
      "step": 35834,
      "training_step_time": 0.5295584201812744
    },
    {
      "epoch": 0.000218719482421875,
      "model_forward_time": 0.11559796333312988,
      "step": 35835
    },
    {
      "epoch": 0.000218719482421875,
      "step": 35835,
      "training_step_time": 0.38500475883483887
    },
    {
      "epoch": 0.0002187255859375,
      "model_forward_time": 0.1147160530090332,
      "step": 35836
    },
    {
      "epoch": 0.0002187255859375,
      "step": 35836,
      "training_step_time": 0.4951789379119873
    },
    {
      "epoch": 0.000218731689453125,
      "model_forward_time": 0.1144404411315918,
      "step": 35837
    },
    {
      "epoch": 0.000218731689453125,
      "step": 35837,
      "training_step_time": 0.4407002925872803
    },
    {
      "epoch": 0.00021873779296875,
      "model_forward_time": 0.11525249481201172,
      "step": 35838
    },
    {
      "epoch": 0.00021873779296875,
      "step": 35838,
      "training_step_time": 0.3981454372406006
    },
    {
      "epoch": 0.000218743896484375,
      "model_forward_time": 0.11491608619689941,
      "step": 35839
    },
    {
      "epoch": 0.000218743896484375,
      "step": 35839,
      "training_step_time": 0.40000176429748535
    },
    {
      "epoch": 0.00021875,
      "grad_norm": 0.1605364829301834,
      "learning_rate": 3.8153630449058646e-05,
      "loss": 0.0421,
      "step": 35840
    },
    {
      "epoch": 0.00021875,
      "model_forward_time": 0.1165919303894043,
      "step": 35840
    },
    {
      "epoch": 0.00021875,
      "step": 35840,
      "training_step_time": 0.38942980766296387
    },
    {
      "epoch": 0.000218756103515625,
      "model_forward_time": 0.11539769172668457,
      "step": 35841
    },
    {
      "epoch": 0.000218756103515625,
      "step": 35841,
      "training_step_time": 0.3970985412597656
    },
    {
      "epoch": 0.00021876220703125,
      "model_forward_time": 0.11554479598999023,
      "step": 35842
    },
    {
      "epoch": 0.00021876220703125,
      "step": 35842,
      "training_step_time": 0.3949153423309326
    },
    {
      "epoch": 0.000218768310546875,
      "model_forward_time": 0.11590313911437988,
      "step": 35843
    },
    {
      "epoch": 0.000218768310546875,
      "step": 35843,
      "training_step_time": 0.39172816276550293
    },
    {
      "epoch": 0.0002187744140625,
      "model_forward_time": 0.11592245101928711,
      "step": 35844
    },
    {
      "epoch": 0.0002187744140625,
      "step": 35844,
      "training_step_time": 0.4173426628112793
    },
    {
      "epoch": 0.000218780517578125,
      "model_forward_time": 0.11530542373657227,
      "step": 35845
    },
    {
      "epoch": 0.000218780517578125,
      "step": 35845,
      "training_step_time": 0.507164478302002
    },
    {
      "epoch": 0.00021878662109375,
      "model_forward_time": 0.11531639099121094,
      "step": 35846
    },
    {
      "epoch": 0.00021878662109375,
      "step": 35846,
      "training_step_time": 0.39470386505126953
    },
    {
      "epoch": 0.000218792724609375,
      "model_forward_time": 0.1158604621887207,
      "step": 35847
    },
    {
      "epoch": 0.000218792724609375,
      "step": 35847,
      "training_step_time": 0.4007081985473633
    },
    {
      "epoch": 0.000218798828125,
      "model_forward_time": 0.11556029319763184,
      "step": 35848
    },
    {
      "epoch": 0.000218798828125,
      "step": 35848,
      "training_step_time": 0.39392590522766113
    },
    {
      "epoch": 0.000218804931640625,
      "model_forward_time": 0.11607527732849121,
      "step": 35849
    },
    {
      "epoch": 0.000218804931640625,
      "step": 35849,
      "training_step_time": 0.45804476737976074
    },
    {
      "epoch": 0.00021881103515625,
      "grad_norm": 0.1143249049782753,
      "learning_rate": 3.812685906360557e-05,
      "loss": 0.0403,
      "step": 35850
    },
    {
      "epoch": 0.00021881103515625,
      "model_forward_time": 0.11597943305969238,
      "step": 35850
    },
    {
      "epoch": 0.00021881103515625,
      "step": 35850,
      "training_step_time": 0.4959557056427002
    },
    {
      "epoch": 0.000218817138671875,
      "model_forward_time": 0.11620140075683594,
      "step": 35851
    },
    {
      "epoch": 0.000218817138671875,
      "step": 35851,
      "training_step_time": 0.5191085338592529
    },
    {
      "epoch": 0.0002188232421875,
      "model_forward_time": 0.11570453643798828,
      "step": 35852
    },
    {
      "epoch": 0.0002188232421875,
      "step": 35852,
      "training_step_time": 0.4574606418609619
    },
    {
      "epoch": 0.000218829345703125,
      "model_forward_time": 0.11490702629089355,
      "step": 35853
    },
    {
      "epoch": 0.000218829345703125,
      "step": 35853,
      "training_step_time": 0.3989412784576416
    },
    {
      "epoch": 0.00021883544921875,
      "model_forward_time": 0.1155555248260498,
      "step": 35854
    },
    {
      "epoch": 0.00021883544921875,
      "step": 35854,
      "training_step_time": 0.3940012454986572
    },
    {
      "epoch": 0.000218841552734375,
      "model_forward_time": 0.11480998992919922,
      "step": 35855
    },
    {
      "epoch": 0.000218841552734375,
      "step": 35855,
      "training_step_time": 0.3851633071899414
    },
    {
      "epoch": 0.00021884765625,
      "model_forward_time": 0.11501264572143555,
      "step": 35856
    },
    {
      "epoch": 0.00021884765625,
      "step": 35856,
      "training_step_time": 0.39214372634887695
    },
    {
      "epoch": 0.000218853759765625,
      "model_forward_time": 0.11509203910827637,
      "step": 35857
    },
    {
      "epoch": 0.000218853759765625,
      "step": 35857,
      "training_step_time": 0.46065759658813477
    },
    {
      "epoch": 0.00021885986328125,
      "model_forward_time": 0.11611652374267578,
      "step": 35858
    },
    {
      "epoch": 0.00021885986328125,
      "step": 35858,
      "training_step_time": 0.40611839294433594
    },
    {
      "epoch": 0.000218865966796875,
      "model_forward_time": 0.11462926864624023,
      "step": 35859
    },
    {
      "epoch": 0.000218865966796875,
      "step": 35859,
      "training_step_time": 0.3953573703765869
    },
    {
      "epoch": 0.0002188720703125,
      "grad_norm": 0.08240234106779099,
      "learning_rate": 3.810009128489925e-05,
      "loss": 0.0397,
      "step": 35860
    },
    {
      "epoch": 0.0002188720703125,
      "model_forward_time": 0.11527371406555176,
      "step": 35860
    },
    {
      "epoch": 0.0002188720703125,
      "step": 35860,
      "training_step_time": 0.3819761276245117
    },
    {
      "epoch": 0.000218878173828125,
      "model_forward_time": 0.11527681350708008,
      "step": 35861
    },
    {
      "epoch": 0.000218878173828125,
      "step": 35861,
      "training_step_time": 0.3909616470336914
    },
    {
      "epoch": 0.00021888427734375,
      "model_forward_time": 0.11637043952941895,
      "step": 35862
    },
    {
      "epoch": 0.00021888427734375,
      "step": 35862,
      "training_step_time": 0.4164278507232666
    },
    {
      "epoch": 0.000218890380859375,
      "model_forward_time": 0.11505722999572754,
      "step": 35863
    },
    {
      "epoch": 0.000218890380859375,
      "step": 35863,
      "training_step_time": 0.6654751300811768
    },
    {
      "epoch": 0.000218896484375,
      "model_forward_time": 0.11454463005065918,
      "step": 35864
    },
    {
      "epoch": 0.000218896484375,
      "step": 35864,
      "training_step_time": 0.47179102897644043
    },
    {
      "epoch": 0.000218902587890625,
      "model_forward_time": 0.11518669128417969,
      "step": 35865
    },
    {
      "epoch": 0.000218902587890625,
      "step": 35865,
      "training_step_time": 0.47809886932373047
    },
    {
      "epoch": 0.00021890869140625,
      "model_forward_time": 0.11442732810974121,
      "step": 35866
    },
    {
      "epoch": 0.00021890869140625,
      "step": 35866,
      "training_step_time": 0.38840651512145996
    },
    {
      "epoch": 0.000218914794921875,
      "model_forward_time": 0.11424946784973145,
      "step": 35867
    },
    {
      "epoch": 0.000218914794921875,
      "step": 35867,
      "training_step_time": 0.3830888271331787
    },
    {
      "epoch": 0.0002189208984375,
      "model_forward_time": 0.11472630500793457,
      "step": 35868
    },
    {
      "epoch": 0.0002189208984375,
      "step": 35868,
      "training_step_time": 0.39183950424194336
    },
    {
      "epoch": 0.000218927001953125,
      "model_forward_time": 0.11515569686889648,
      "step": 35869
    },
    {
      "epoch": 0.000218927001953125,
      "step": 35869,
      "training_step_time": 0.44019317626953125
    },
    {
      "epoch": 0.00021893310546875,
      "grad_norm": 0.11533713340759277,
      "learning_rate": 3.807332712107097e-05,
      "loss": 0.0425,
      "step": 35870
    },
    {
      "epoch": 0.00021893310546875,
      "model_forward_time": 0.1151740550994873,
      "step": 35870
    },
    {
      "epoch": 0.00021893310546875,
      "step": 35870,
      "training_step_time": 0.41802144050598145
    },
    {
      "epoch": 0.000218939208984375,
      "model_forward_time": 0.11467790603637695,
      "step": 35871
    },
    {
      "epoch": 0.000218939208984375,
      "step": 35871,
      "training_step_time": 0.3893136978149414
    },
    {
      "epoch": 0.0002189453125,
      "model_forward_time": 0.11555647850036621,
      "step": 35872
    },
    {
      "epoch": 0.0002189453125,
      "step": 35872,
      "training_step_time": 0.39367103576660156
    },
    {
      "epoch": 0.000218951416015625,
      "model_forward_time": 0.11519360542297363,
      "step": 35873
    },
    {
      "epoch": 0.000218951416015625,
      "step": 35873,
      "training_step_time": 0.3843541145324707
    },
    {
      "epoch": 0.00021895751953125,
      "model_forward_time": 0.11494994163513184,
      "step": 35874
    },
    {
      "epoch": 0.00021895751953125,
      "step": 35874,
      "training_step_time": 0.397918701171875
    },
    {
      "epoch": 0.000218963623046875,
      "model_forward_time": 0.11512160301208496,
      "step": 35875
    },
    {
      "epoch": 0.000218963623046875,
      "step": 35875,
      "training_step_time": 0.7565433979034424
    },
    {
      "epoch": 0.0002189697265625,
      "model_forward_time": 0.11490583419799805,
      "step": 35876
    },
    {
      "epoch": 0.0002189697265625,
      "step": 35876,
      "training_step_time": 0.39823317527770996
    },
    {
      "epoch": 0.000218975830078125,
      "model_forward_time": 0.11547112464904785,
      "step": 35877
    },
    {
      "epoch": 0.000218975830078125,
      "step": 35877,
      "training_step_time": 0.39731407165527344
    },
    {
      "epoch": 0.00021898193359375,
      "model_forward_time": 0.11531209945678711,
      "step": 35878
    },
    {
      "epoch": 0.00021898193359375,
      "step": 35878,
      "training_step_time": 0.49454712867736816
    },
    {
      "epoch": 0.000218988037109375,
      "model_forward_time": 0.11484551429748535,
      "step": 35879
    },
    {
      "epoch": 0.000218988037109375,
      "step": 35879,
      "training_step_time": 0.483318567276001
    },
    {
      "epoch": 0.000218994140625,
      "grad_norm": 0.08327588438987732,
      "learning_rate": 3.8046566580251e-05,
      "loss": 0.041,
      "step": 35880
    },
    {
      "epoch": 0.000218994140625,
      "model_forward_time": 0.11455273628234863,
      "step": 35880
    },
    {
      "epoch": 0.000218994140625,
      "step": 35880,
      "training_step_time": 0.410764217376709
    },
    {
      "epoch": 0.000219000244140625,
      "model_forward_time": 0.1152946949005127,
      "step": 35881
    },
    {
      "epoch": 0.000219000244140625,
      "step": 35881,
      "training_step_time": 0.41701245307922363
    },
    {
      "epoch": 0.00021900634765625,
      "model_forward_time": 0.11503744125366211,
      "step": 35882
    },
    {
      "epoch": 0.00021900634765625,
      "step": 35882,
      "training_step_time": 0.39313220977783203
    },
    {
      "epoch": 0.000219012451171875,
      "model_forward_time": 0.11692166328430176,
      "step": 35883
    },
    {
      "epoch": 0.000219012451171875,
      "step": 35883,
      "training_step_time": 0.42157721519470215
    },
    {
      "epoch": 0.0002190185546875,
      "model_forward_time": 0.11527013778686523,
      "step": 35884
    },
    {
      "epoch": 0.0002190185546875,
      "step": 35884,
      "training_step_time": 0.39037013053894043
    },
    {
      "epoch": 0.000219024658203125,
      "model_forward_time": 0.11564397811889648,
      "step": 35885
    },
    {
      "epoch": 0.000219024658203125,
      "step": 35885,
      "training_step_time": 0.3978261947631836
    },
    {
      "epoch": 0.00021903076171875,
      "model_forward_time": 0.11524295806884766,
      "step": 35886
    },
    {
      "epoch": 0.00021903076171875,
      "step": 35886,
      "training_step_time": 0.39562368392944336
    },
    {
      "epoch": 0.000219036865234375,
      "model_forward_time": 0.1153872013092041,
      "step": 35887
    },
    {
      "epoch": 0.000219036865234375,
      "step": 35887,
      "training_step_time": 0.5241296291351318
    },
    {
      "epoch": 0.00021904296875,
      "model_forward_time": 0.11511063575744629,
      "step": 35888
    },
    {
      "epoch": 0.00021904296875,
      "step": 35888,
      "training_step_time": 0.39086437225341797
    },
    {
      "epoch": 0.000219049072265625,
      "model_forward_time": 0.11533045768737793,
      "step": 35889
    },
    {
      "epoch": 0.000219049072265625,
      "step": 35889,
      "training_step_time": 0.39441680908203125
    },
    {
      "epoch": 0.00021905517578125,
      "grad_norm": 0.1269427090883255,
      "learning_rate": 3.801980967056851e-05,
      "loss": 0.039,
      "step": 35890
    },
    {
      "epoch": 0.00021905517578125,
      "model_forward_time": 0.11714315414428711,
      "step": 35890
    },
    {
      "epoch": 0.00021905517578125,
      "step": 35890,
      "training_step_time": 0.3905301094055176
    },
    {
      "epoch": 0.000219061279296875,
      "model_forward_time": 0.11494636535644531,
      "step": 35891
    },
    {
      "epoch": 0.000219061279296875,
      "step": 35891,
      "training_step_time": 0.39537763595581055
    },
    {
      "epoch": 0.0002190673828125,
      "model_forward_time": 0.11553025245666504,
      "step": 35892
    },
    {
      "epoch": 0.0002190673828125,
      "step": 35892,
      "training_step_time": 0.3692634105682373
    },
    {
      "epoch": 0.000219073486328125,
      "model_forward_time": 0.11549782752990723,
      "step": 35893
    },
    {
      "epoch": 0.000219073486328125,
      "step": 35893,
      "training_step_time": 0.5516440868377686
    },
    {
      "epoch": 0.00021907958984375,
      "model_forward_time": 0.1159825325012207,
      "step": 35894
    },
    {
      "epoch": 0.00021907958984375,
      "step": 35894,
      "training_step_time": 0.4432210922241211
    },
    {
      "epoch": 0.000219085693359375,
      "model_forward_time": 0.1149909496307373,
      "step": 35895
    },
    {
      "epoch": 0.000219085693359375,
      "step": 35895,
      "training_step_time": 0.3833131790161133
    },
    {
      "epoch": 0.000219091796875,
      "model_forward_time": 0.11696171760559082,
      "step": 35896
    },
    {
      "epoch": 0.000219091796875,
      "step": 35896,
      "training_step_time": 0.40047788619995117
    },
    {
      "epoch": 0.000219097900390625,
      "model_forward_time": 0.1152348518371582,
      "step": 35897
    },
    {
      "epoch": 0.000219097900390625,
      "step": 35897,
      "training_step_time": 0.39142608642578125
    },
    {
      "epoch": 0.00021910400390625,
      "model_forward_time": 0.1155996322631836,
      "step": 35898
    },
    {
      "epoch": 0.00021910400390625,
      "step": 35898,
      "training_step_time": 0.43044281005859375
    },
    {
      "epoch": 0.000219110107421875,
      "model_forward_time": 0.11571884155273438,
      "step": 35899
    },
    {
      "epoch": 0.000219110107421875,
      "step": 35899,
      "training_step_time": 0.5940206050872803
    },
    {
      "epoch": 0.0002191162109375,
      "grad_norm": 0.1358809620141983,
      "learning_rate": 3.799305640015152e-05,
      "loss": 0.0452,
      "step": 35900
    },
    {
      "epoch": 0.0002191162109375,
      "model_forward_time": 0.11519908905029297,
      "step": 35900
    },
    {
      "epoch": 0.0002191162109375,
      "step": 35900,
      "training_step_time": 0.39826297760009766
    },
    {
      "epoch": 0.000219122314453125,
      "model_forward_time": 0.11435484886169434,
      "step": 35901
    },
    {
      "epoch": 0.000219122314453125,
      "step": 35901,
      "training_step_time": 0.39328813552856445
    },
    {
      "epoch": 0.00021912841796875,
      "model_forward_time": 0.11546945571899414,
      "step": 35902
    },
    {
      "epoch": 0.00021912841796875,
      "step": 35902,
      "training_step_time": 0.38253259658813477
    },
    {
      "epoch": 0.000219134521484375,
      "model_forward_time": 0.11542034149169922,
      "step": 35903
    },
    {
      "epoch": 0.000219134521484375,
      "step": 35903,
      "training_step_time": 0.38783884048461914
    },
    {
      "epoch": 0.000219140625,
      "model_forward_time": 0.11483097076416016,
      "step": 35904
    },
    {
      "epoch": 0.000219140625,
      "step": 35904,
      "training_step_time": 0.3880770206451416
    },
    {
      "epoch": 0.000219146728515625,
      "model_forward_time": 0.11523079872131348,
      "step": 35905
    },
    {
      "epoch": 0.000219146728515625,
      "step": 35905,
      "training_step_time": 0.7003250122070312
    },
    {
      "epoch": 0.00021915283203125,
      "model_forward_time": 0.11441636085510254,
      "step": 35906
    },
    {
      "epoch": 0.00021915283203125,
      "step": 35906,
      "training_step_time": 0.38080835342407227
    },
    {
      "epoch": 0.000219158935546875,
      "model_forward_time": 0.11534571647644043,
      "step": 35907
    },
    {
      "epoch": 0.000219158935546875,
      "step": 35907,
      "training_step_time": 0.4377169609069824
    },
    {
      "epoch": 0.0002191650390625,
      "model_forward_time": 0.11486530303955078,
      "step": 35908
    },
    {
      "epoch": 0.0002191650390625,
      "step": 35908,
      "training_step_time": 0.4471442699432373
    },
    {
      "epoch": 0.000219171142578125,
      "model_forward_time": 0.11490535736083984,
      "step": 35909
    },
    {
      "epoch": 0.000219171142578125,
      "step": 35909,
      "training_step_time": 0.41352128982543945
    },
    {
      "epoch": 0.00021917724609375,
      "grad_norm": 0.11264143139123917,
      "learning_rate": 3.796630677712697e-05,
      "loss": 0.0403,
      "step": 35910
    },
    {
      "epoch": 0.00021917724609375,
      "model_forward_time": 0.11457490921020508,
      "step": 35910
    },
    {
      "epoch": 0.00021917724609375,
      "step": 35910,
      "training_step_time": 0.4238772392272949
    },
    {
      "epoch": 0.000219183349609375,
      "model_forward_time": 0.11494803428649902,
      "step": 35911
    },
    {
      "epoch": 0.000219183349609375,
      "step": 35911,
      "training_step_time": 0.3942430019378662
    },
    {
      "epoch": 0.000219189453125,
      "model_forward_time": 0.1144263744354248,
      "step": 35912
    },
    {
      "epoch": 0.000219189453125,
      "step": 35912,
      "training_step_time": 0.38643932342529297
    },
    {
      "epoch": 0.000219195556640625,
      "model_forward_time": 0.11581635475158691,
      "step": 35913
    },
    {
      "epoch": 0.000219195556640625,
      "step": 35913,
      "training_step_time": 0.3880326747894287
    },
    {
      "epoch": 0.00021920166015625,
      "model_forward_time": 0.1154487133026123,
      "step": 35914
    },
    {
      "epoch": 0.00021920166015625,
      "step": 35914,
      "training_step_time": 0.389451265335083
    },
    {
      "epoch": 0.000219207763671875,
      "model_forward_time": 0.11536359786987305,
      "step": 35915
    },
    {
      "epoch": 0.000219207763671875,
      "step": 35915,
      "training_step_time": 0.38962244987487793
    },
    {
      "epoch": 0.0002192138671875,
      "model_forward_time": 0.11618518829345703,
      "step": 35916
    },
    {
      "epoch": 0.0002192138671875,
      "step": 35916,
      "training_step_time": 0.3924264907836914
    },
    {
      "epoch": 0.000219219970703125,
      "model_forward_time": 0.11574959754943848,
      "step": 35917
    },
    {
      "epoch": 0.000219219970703125,
      "step": 35917,
      "training_step_time": 0.5394754409790039
    },
    {
      "epoch": 0.00021922607421875,
      "model_forward_time": 0.11576700210571289,
      "step": 35918
    },
    {
      "epoch": 0.00021922607421875,
      "step": 35918,
      "training_step_time": 0.38797688484191895
    },
    {
      "epoch": 0.000219232177734375,
      "model_forward_time": 0.11543011665344238,
      "step": 35919
    },
    {
      "epoch": 0.000219232177734375,
      "step": 35919,
      "training_step_time": 0.4013521671295166
    },
    {
      "epoch": 0.00021923828125,
      "grad_norm": 0.12655875086784363,
      "learning_rate": 3.793956080962068e-05,
      "loss": 0.0415,
      "step": 35920
    },
    {
      "epoch": 0.00021923828125,
      "model_forward_time": 0.1146087646484375,
      "step": 35920
    },
    {
      "epoch": 0.00021923828125,
      "step": 35920,
      "training_step_time": 0.40543389320373535
    },
    {
      "epoch": 0.000219244384765625,
      "model_forward_time": 0.11829304695129395,
      "step": 35921
    },
    {
      "epoch": 0.000219244384765625,
      "step": 35921,
      "training_step_time": 0.4378519058227539
    },
    {
      "epoch": 0.00021925048828125,
      "model_forward_time": 0.12421154975891113,
      "step": 35922
    },
    {
      "epoch": 0.00021925048828125,
      "step": 35922,
      "training_step_time": 0.5021064281463623
    },
    {
      "epoch": 0.000219256591796875,
      "model_forward_time": 0.11907124519348145,
      "step": 35923
    },
    {
      "epoch": 0.000219256591796875,
      "step": 35923,
      "training_step_time": 0.5030684471130371
    },
    {
      "epoch": 0.0002192626953125,
      "model_forward_time": 0.11721134185791016,
      "step": 35924
    },
    {
      "epoch": 0.0002192626953125,
      "step": 35924,
      "training_step_time": 0.3865222930908203
    },
    {
      "epoch": 0.000219268798828125,
      "model_forward_time": 0.11965203285217285,
      "step": 35925
    },
    {
      "epoch": 0.000219268798828125,
      "step": 35925,
      "training_step_time": 0.3872189521789551
    },
    {
      "epoch": 0.00021927490234375,
      "model_forward_time": 0.1152496337890625,
      "step": 35926
    },
    {
      "epoch": 0.00021927490234375,
      "step": 35926,
      "training_step_time": 0.3850717544555664
    },
    {
      "epoch": 0.000219281005859375,
      "model_forward_time": 0.11584711074829102,
      "step": 35927
    },
    {
      "epoch": 0.000219281005859375,
      "step": 35927,
      "training_step_time": 0.38532137870788574
    },
    {
      "epoch": 0.000219287109375,
      "model_forward_time": 0.11556005477905273,
      "step": 35928
    },
    {
      "epoch": 0.000219287109375,
      "step": 35928,
      "training_step_time": 0.3964977264404297
    },
    {
      "epoch": 0.000219293212890625,
      "model_forward_time": 0.11629509925842285,
      "step": 35929
    },
    {
      "epoch": 0.000219293212890625,
      "step": 35929,
      "training_step_time": 0.7082302570343018
    },
    {
      "epoch": 0.00021929931640625,
      "grad_norm": 0.10913077741861343,
      "learning_rate": 3.791281850575737e-05,
      "loss": 0.0454,
      "step": 35930
    },
    {
      "epoch": 0.00021929931640625,
      "model_forward_time": 0.11525416374206543,
      "step": 35930
    },
    {
      "epoch": 0.00021929931640625,
      "step": 35930,
      "training_step_time": 0.38762831687927246
    },
    {
      "epoch": 0.000219305419921875,
      "model_forward_time": 0.11573576927185059,
      "step": 35931
    },
    {
      "epoch": 0.000219305419921875,
      "step": 35931,
      "training_step_time": 0.3920431137084961
    },
    {
      "epoch": 0.0002193115234375,
      "model_forward_time": 0.11589455604553223,
      "step": 35932
    },
    {
      "epoch": 0.0002193115234375,
      "step": 35932,
      "training_step_time": 0.3907492160797119
    },
    {
      "epoch": 0.000219317626953125,
      "model_forward_time": 0.11536026000976562,
      "step": 35933
    },
    {
      "epoch": 0.000219317626953125,
      "step": 35933,
      "training_step_time": 0.3850526809692383
    },
    {
      "epoch": 0.00021932373046875,
      "model_forward_time": 0.11524152755737305,
      "step": 35934
    },
    {
      "epoch": 0.00021932373046875,
      "step": 35934,
      "training_step_time": 0.4038224220275879
    },
    {
      "epoch": 0.000219329833984375,
      "model_forward_time": 0.11787271499633789,
      "step": 35935
    },
    {
      "epoch": 0.000219329833984375,
      "step": 35935,
      "training_step_time": 0.5146651268005371
    },
    {
      "epoch": 0.0002193359375,
      "model_forward_time": 0.1154489517211914,
      "step": 35936
    },
    {
      "epoch": 0.0002193359375,
      "step": 35936,
      "training_step_time": 0.46794557571411133
    },
    {
      "epoch": 0.000219342041015625,
      "model_forward_time": 0.11584019660949707,
      "step": 35937
    },
    {
      "epoch": 0.000219342041015625,
      "step": 35937,
      "training_step_time": 0.46186327934265137
    },
    {
      "epoch": 0.00021934814453125,
      "model_forward_time": 0.11520552635192871,
      "step": 35938
    },
    {
      "epoch": 0.00021934814453125,
      "step": 35938,
      "training_step_time": 0.38840389251708984
    },
    {
      "epoch": 0.000219354248046875,
      "model_forward_time": 0.11462259292602539,
      "step": 35939
    },
    {
      "epoch": 0.000219354248046875,
      "step": 35939,
      "training_step_time": 0.3988053798675537
    },
    {
      "epoch": 0.0002193603515625,
      "grad_norm": 0.08439106494188309,
      "learning_rate": 3.788607987366069e-05,
      "loss": 0.0405,
      "step": 35940
    },
    {
      "epoch": 0.0002193603515625,
      "model_forward_time": 0.11567068099975586,
      "step": 35940
    },
    {
      "epoch": 0.0002193603515625,
      "step": 35940,
      "training_step_time": 0.40126609802246094
    },
    {
      "epoch": 0.000219366455078125,
      "model_forward_time": 0.12206649780273438,
      "step": 35941
    },
    {
      "epoch": 0.000219366455078125,
      "step": 35941,
      "training_step_time": 0.5222930908203125
    },
    {
      "epoch": 0.00021937255859375,
      "model_forward_time": 0.11798286437988281,
      "step": 35942
    },
    {
      "epoch": 0.00021937255859375,
      "step": 35942,
      "training_step_time": 0.3860206604003906
    },
    {
      "epoch": 0.000219378662109375,
      "model_forward_time": 0.1155707836151123,
      "step": 35943
    },
    {
      "epoch": 0.000219378662109375,
      "step": 35943,
      "training_step_time": 0.39245009422302246
    },
    {
      "epoch": 0.000219384765625,
      "model_forward_time": 0.1155543327331543,
      "step": 35944
    },
    {
      "epoch": 0.000219384765625,
      "step": 35944,
      "training_step_time": 0.39310479164123535
    },
    {
      "epoch": 0.000219390869140625,
      "model_forward_time": 0.11562442779541016,
      "step": 35945
    },
    {
      "epoch": 0.000219390869140625,
      "step": 35945,
      "training_step_time": 0.4112875461578369
    },
    {
      "epoch": 0.00021939697265625,
      "model_forward_time": 0.11529421806335449,
      "step": 35946
    },
    {
      "epoch": 0.00021939697265625,
      "step": 35946,
      "training_step_time": 0.4034459590911865
    },
    {
      "epoch": 0.000219403076171875,
      "model_forward_time": 0.1160280704498291,
      "step": 35947
    },
    {
      "epoch": 0.000219403076171875,
      "step": 35947,
      "training_step_time": 0.6272051334381104
    },
    {
      "epoch": 0.0002194091796875,
      "model_forward_time": 0.1159372329711914,
      "step": 35948
    },
    {
      "epoch": 0.0002194091796875,
      "step": 35948,
      "training_step_time": 0.5351603031158447
    },
    {
      "epoch": 0.000219415283203125,
      "model_forward_time": 0.11561036109924316,
      "step": 35949
    },
    {
      "epoch": 0.000219415283203125,
      "step": 35949,
      "training_step_time": 0.42891907691955566
    },
    {
      "epoch": 0.00021942138671875,
      "grad_norm": 0.10724297165870667,
      "learning_rate": 3.7859344921453064e-05,
      "loss": 0.0382,
      "step": 35950
    },
    {
      "epoch": 0.00021942138671875,
      "model_forward_time": 0.11516356468200684,
      "step": 35950
    },
    {
      "epoch": 0.00021942138671875,
      "step": 35950,
      "training_step_time": 0.44933438301086426
    },
    {
      "epoch": 0.000219427490234375,
      "model_forward_time": 0.11522078514099121,
      "step": 35951
    },
    {
      "epoch": 0.000219427490234375,
      "step": 35951,
      "training_step_time": 0.40736985206604004
    },
    {
      "epoch": 0.00021943359375,
      "model_forward_time": 0.1150209903717041,
      "step": 35952
    },
    {
      "epoch": 0.00021943359375,
      "step": 35952,
      "training_step_time": 0.3916652202606201
    },
    {
      "epoch": 0.000219439697265625,
      "model_forward_time": 0.11558985710144043,
      "step": 35953
    },
    {
      "epoch": 0.000219439697265625,
      "step": 35953,
      "training_step_time": 0.3952062129974365
    },
    {
      "epoch": 0.00021944580078125,
      "model_forward_time": 0.11754512786865234,
      "step": 35954
    },
    {
      "epoch": 0.00021944580078125,
      "step": 35954,
      "training_step_time": 0.38825273513793945
    },
    {
      "epoch": 0.000219451904296875,
      "model_forward_time": 0.11493515968322754,
      "step": 35955
    },
    {
      "epoch": 0.000219451904296875,
      "step": 35955,
      "training_step_time": 0.4022037982940674
    },
    {
      "epoch": 0.0002194580078125,
      "model_forward_time": 0.11549806594848633,
      "step": 35956
    },
    {
      "epoch": 0.0002194580078125,
      "step": 35956,
      "training_step_time": 0.40163230895996094
    },
    {
      "epoch": 0.000219464111328125,
      "model_forward_time": 0.11543488502502441,
      "step": 35957
    },
    {
      "epoch": 0.000219464111328125,
      "step": 35957,
      "training_step_time": 0.40229010581970215
    },
    {
      "epoch": 0.00021947021484375,
      "model_forward_time": 0.11591458320617676,
      "step": 35958
    },
    {
      "epoch": 0.00021947021484375,
      "step": 35958,
      "training_step_time": 0.39132237434387207
    },
    {
      "epoch": 0.000219476318359375,
      "model_forward_time": 0.11616158485412598,
      "step": 35959
    },
    {
      "epoch": 0.000219476318359375,
      "step": 35959,
      "training_step_time": 0.48148393630981445
    },
    {
      "epoch": 0.000219482421875,
      "grad_norm": 0.07181382924318314,
      "learning_rate": 3.783261365725592e-05,
      "loss": 0.0363,
      "step": 35960
    },
    {
      "epoch": 0.000219482421875,
      "model_forward_time": 0.11547112464904785,
      "step": 35960
    },
    {
      "epoch": 0.000219482421875,
      "step": 35960,
      "training_step_time": 0.3833951950073242
    },
    {
      "epoch": 0.000219488525390625,
      "model_forward_time": 0.11603879928588867,
      "step": 35961
    },
    {
      "epoch": 0.000219488525390625,
      "step": 35961,
      "training_step_time": 0.39292144775390625
    },
    {
      "epoch": 0.00021949462890625,
      "model_forward_time": 0.11534500122070312,
      "step": 35962
    },
    {
      "epoch": 0.00021949462890625,
      "step": 35962,
      "training_step_time": 0.39104771614074707
    },
    {
      "epoch": 0.000219500732421875,
      "model_forward_time": 0.1155397891998291,
      "step": 35963
    },
    {
      "epoch": 0.000219500732421875,
      "step": 35963,
      "training_step_time": 0.4694082736968994
    },
    {
      "epoch": 0.0002195068359375,
      "model_forward_time": 0.11531519889831543,
      "step": 35964
    },
    {
      "epoch": 0.0002195068359375,
      "step": 35964,
      "training_step_time": 0.45194005966186523
    },
    {
      "epoch": 0.000219512939453125,
      "model_forward_time": 0.11534523963928223,
      "step": 35965
    },
    {
      "epoch": 0.000219512939453125,
      "step": 35965,
      "training_step_time": 0.4658787250518799
    },
    {
      "epoch": 0.00021951904296875,
      "model_forward_time": 0.11702966690063477,
      "step": 35966
    },
    {
      "epoch": 0.00021951904296875,
      "step": 35966,
      "training_step_time": 0.41446471214294434
    },
    {
      "epoch": 0.000219525146484375,
      "model_forward_time": 0.11552190780639648,
      "step": 35967
    },
    {
      "epoch": 0.000219525146484375,
      "step": 35967,
      "training_step_time": 0.40958118438720703
    },
    {
      "epoch": 0.00021953125,
      "model_forward_time": 0.11508369445800781,
      "step": 35968
    },
    {
      "epoch": 0.00021953125,
      "step": 35968,
      "training_step_time": 0.38768434524536133
    },
    {
      "epoch": 0.000219537353515625,
      "model_forward_time": 0.11558079719543457,
      "step": 35969
    },
    {
      "epoch": 0.000219537353515625,
      "step": 35969,
      "training_step_time": 0.39510059356689453
    },
    {
      "epoch": 0.00021954345703125,
      "grad_norm": 0.1911986768245697,
      "learning_rate": 3.780588608918947e-05,
      "loss": 0.039,
      "step": 35970
    },
    {
      "epoch": 0.00021954345703125,
      "model_forward_time": 0.11484694480895996,
      "step": 35970
    },
    {
      "epoch": 0.00021954345703125,
      "step": 35970,
      "training_step_time": 0.3917069435119629
    },
    {
      "epoch": 0.000219549560546875,
      "model_forward_time": 0.11585831642150879,
      "step": 35971
    },
    {
      "epoch": 0.000219549560546875,
      "step": 35971,
      "training_step_time": 0.6014013290405273
    },
    {
      "epoch": 0.0002195556640625,
      "model_forward_time": 0.11505770683288574,
      "step": 35972
    },
    {
      "epoch": 0.0002195556640625,
      "step": 35972,
      "training_step_time": 0.39115357398986816
    },
    {
      "epoch": 0.000219561767578125,
      "model_forward_time": 0.11569380760192871,
      "step": 35973
    },
    {
      "epoch": 0.000219561767578125,
      "step": 35973,
      "training_step_time": 0.39469385147094727
    },
    {
      "epoch": 0.00021956787109375,
      "model_forward_time": 0.11583256721496582,
      "step": 35974
    },
    {
      "epoch": 0.00021956787109375,
      "step": 35974,
      "training_step_time": 0.3903012275695801
    },
    {
      "epoch": 0.000219573974609375,
      "model_forward_time": 0.11519169807434082,
      "step": 35975
    },
    {
      "epoch": 0.000219573974609375,
      "step": 35975,
      "training_step_time": 0.3951904773712158
    },
    {
      "epoch": 0.000219580078125,
      "model_forward_time": 0.11562252044677734,
      "step": 35976
    },
    {
      "epoch": 0.000219580078125,
      "step": 35976,
      "training_step_time": 0.3995335102081299
    },
    {
      "epoch": 0.000219586181640625,
      "model_forward_time": 0.11565399169921875,
      "step": 35977
    },
    {
      "epoch": 0.000219586181640625,
      "step": 35977,
      "training_step_time": 0.5805342197418213
    },
    {
      "epoch": 0.00021959228515625,
      "model_forward_time": 0.11545300483703613,
      "step": 35978
    },
    {
      "epoch": 0.00021959228515625,
      "step": 35978,
      "training_step_time": 0.367372989654541
    },
    {
      "epoch": 0.000219598388671875,
      "model_forward_time": 0.11578512191772461,
      "step": 35979
    },
    {
      "epoch": 0.000219598388671875,
      "step": 35979,
      "training_step_time": 0.45102524757385254
    },
    {
      "epoch": 0.0002196044921875,
      "grad_norm": 0.15321096777915955,
      "learning_rate": 3.777916222537285e-05,
      "loss": 0.0417,
      "step": 35980
    },
    {
      "epoch": 0.0002196044921875,
      "model_forward_time": 0.11505770683288574,
      "step": 35980
    },
    {
      "epoch": 0.0002196044921875,
      "step": 35980,
      "training_step_time": 0.4028806686401367
    },
    {
      "epoch": 0.000219610595703125,
      "model_forward_time": 0.11490797996520996,
      "step": 35981
    },
    {
      "epoch": 0.000219610595703125,
      "step": 35981,
      "training_step_time": 0.38440704345703125
    },
    {
      "epoch": 0.00021961669921875,
      "model_forward_time": 0.11522531509399414,
      "step": 35982
    },
    {
      "epoch": 0.00021961669921875,
      "step": 35982,
      "training_step_time": 0.39293527603149414
    },
    {
      "epoch": 0.000219622802734375,
      "model_forward_time": 0.11572146415710449,
      "step": 35983
    },
    {
      "epoch": 0.000219622802734375,
      "step": 35983,
      "training_step_time": 0.44815611839294434
    },
    {
      "epoch": 0.00021962890625,
      "model_forward_time": 0.11630010604858398,
      "step": 35984
    },
    {
      "epoch": 0.00021962890625,
      "step": 35984,
      "training_step_time": 0.38435983657836914
    },
    {
      "epoch": 0.000219635009765625,
      "model_forward_time": 0.11594128608703613,
      "step": 35985
    },
    {
      "epoch": 0.000219635009765625,
      "step": 35985,
      "training_step_time": 0.38541126251220703
    },
    {
      "epoch": 0.00021964111328125,
      "model_forward_time": 0.11622977256774902,
      "step": 35986
    },
    {
      "epoch": 0.00021964111328125,
      "step": 35986,
      "training_step_time": 0.3928818702697754
    },
    {
      "epoch": 0.000219647216796875,
      "model_forward_time": 0.11548399925231934,
      "step": 35987
    },
    {
      "epoch": 0.000219647216796875,
      "step": 35987,
      "training_step_time": 0.3849022388458252
    },
    {
      "epoch": 0.0002196533203125,
      "model_forward_time": 0.11780214309692383,
      "step": 35988
    },
    {
      "epoch": 0.0002196533203125,
      "step": 35988,
      "training_step_time": 0.40268969535827637
    },
    {
      "epoch": 0.000219659423828125,
      "model_forward_time": 0.11570286750793457,
      "step": 35989
    },
    {
      "epoch": 0.000219659423828125,
      "step": 35989,
      "training_step_time": 0.7520081996917725
    },
    {
      "epoch": 0.00021966552734375,
      "grad_norm": 0.10490307211875916,
      "learning_rate": 3.77524420739241e-05,
      "loss": 0.0388,
      "step": 35990
    },
    {
      "epoch": 0.00021966552734375,
      "model_forward_time": 0.11527204513549805,
      "step": 35990
    },
    {
      "epoch": 0.00021966552734375,
      "step": 35990,
      "training_step_time": 0.4081592559814453
    },
    {
      "epoch": 0.000219671630859375,
      "model_forward_time": 0.11482429504394531,
      "step": 35991
    },
    {
      "epoch": 0.000219671630859375,
      "step": 35991,
      "training_step_time": 0.4106276035308838
    },
    {
      "epoch": 0.000219677734375,
      "model_forward_time": 0.11494684219360352,
      "step": 35992
    },
    {
      "epoch": 0.000219677734375,
      "step": 35992,
      "training_step_time": 0.3847317695617676
    },
    {
      "epoch": 0.000219683837890625,
      "model_forward_time": 0.11460614204406738,
      "step": 35993
    },
    {
      "epoch": 0.000219683837890625,
      "step": 35993,
      "training_step_time": 0.44721078872680664
    },
    {
      "epoch": 0.00021968994140625,
      "model_forward_time": 0.11479425430297852,
      "step": 35994
    },
    {
      "epoch": 0.00021968994140625,
      "step": 35994,
      "training_step_time": 0.4534883499145508
    },
    {
      "epoch": 0.000219696044921875,
      "model_forward_time": 0.11541366577148438,
      "step": 35995
    },
    {
      "epoch": 0.000219696044921875,
      "step": 35995,
      "training_step_time": 0.39664292335510254
    },
    {
      "epoch": 0.0002197021484375,
      "model_forward_time": 0.1153111457824707,
      "step": 35996
    },
    {
      "epoch": 0.0002197021484375,
      "step": 35996,
      "training_step_time": 0.39438533782958984
    },
    {
      "epoch": 0.000219708251953125,
      "model_forward_time": 0.11524319648742676,
      "step": 35997
    },
    {
      "epoch": 0.000219708251953125,
      "step": 35997,
      "training_step_time": 0.38780713081359863
    },
    {
      "epoch": 0.00021971435546875,
      "model_forward_time": 0.11563944816589355,
      "step": 35998
    },
    {
      "epoch": 0.00021971435546875,
      "step": 35998,
      "training_step_time": 0.38486695289611816
    },
    {
      "epoch": 0.000219720458984375,
      "model_forward_time": 0.11573457717895508,
      "step": 35999
    },
    {
      "epoch": 0.000219720458984375,
      "step": 35999,
      "training_step_time": 0.3965468406677246
    },
    {
      "epoch": 0.0002197265625,
      "grad_norm": 0.10736547410488129,
      "learning_rate": 3.772572564296005e-05,
      "loss": 0.0411,
      "step": 36000
    },
    {
      "epoch": 0.0002197265625,
      "model_forward_time": 0.11321258544921875,
      "step": 36000
    },
    {
      "epoch": 0.0002197265625,
      "step": 36000,
      "training_step_time": 0.37563419342041016
    },
    {
      "epoch": 0.000219732666015625,
      "model_forward_time": 0.11300253868103027,
      "step": 36001
    },
    {
      "epoch": 0.000219732666015625,
      "step": 36001,
      "training_step_time": 0.3769996166229248
    },
    {
      "epoch": 0.00021973876953125,
      "model_forward_time": 0.11313676834106445,
      "step": 36002
    },
    {
      "epoch": 0.00021973876953125,
      "step": 36002,
      "training_step_time": 0.37792205810546875
    },
    {
      "epoch": 0.000219744873046875,
      "model_forward_time": 0.11328458786010742,
      "step": 36003
    },
    {
      "epoch": 0.000219744873046875,
      "step": 36003,
      "training_step_time": 0.37759900093078613
    },
    {
      "epoch": 0.0002197509765625,
      "model_forward_time": 0.11418342590332031,
      "step": 36004
    },
    {
      "epoch": 0.0002197509765625,
      "step": 36004,
      "training_step_time": 0.3823723793029785
    },
    {
      "epoch": 0.000219757080078125,
      "model_forward_time": 0.11420273780822754,
      "step": 36005
    },
    {
      "epoch": 0.000219757080078125,
      "step": 36005,
      "training_step_time": 0.39681315422058105
    },
    {
      "epoch": 0.00021976318359375,
      "model_forward_time": 0.11529922485351562,
      "step": 36006
    },
    {
      "epoch": 0.00021976318359375,
      "step": 36006,
      "training_step_time": 0.40088343620300293
    },
    {
      "epoch": 0.000219769287109375,
      "model_forward_time": 0.11548185348510742,
      "step": 36007
    },
    {
      "epoch": 0.000219769287109375,
      "step": 36007,
      "training_step_time": 0.4390592575073242
    },
    {
      "epoch": 0.000219775390625,
      "model_forward_time": 0.11532759666442871,
      "step": 36008
    },
    {
      "epoch": 0.000219775390625,
      "step": 36008,
      "training_step_time": 0.4158148765563965
    },
    {
      "epoch": 0.000219781494140625,
      "model_forward_time": 0.11698031425476074,
      "step": 36009
    },
    {
      "epoch": 0.000219781494140625,
      "step": 36009,
      "training_step_time": 0.42188334465026855
    },
    {
      "epoch": 0.00021978759765625,
      "grad_norm": 0.09292668849229813,
      "learning_rate": 3.7699012940596477e-05,
      "loss": 0.0394,
      "step": 36010
    },
    {
      "epoch": 0.00021978759765625,
      "model_forward_time": 0.11497020721435547,
      "step": 36010
    },
    {
      "epoch": 0.00021978759765625,
      "step": 36010,
      "training_step_time": 0.4363853931427002
    },
    {
      "epoch": 0.000219793701171875,
      "model_forward_time": 0.11619806289672852,
      "step": 36011
    },
    {
      "epoch": 0.000219793701171875,
      "step": 36011,
      "training_step_time": 0.39337968826293945
    },
    {
      "epoch": 0.0002197998046875,
      "model_forward_time": 0.11508965492248535,
      "step": 36012
    },
    {
      "epoch": 0.0002197998046875,
      "step": 36012,
      "training_step_time": 0.3839695453643799
    },
    {
      "epoch": 0.000219805908203125,
      "model_forward_time": 0.11553835868835449,
      "step": 36013
    },
    {
      "epoch": 0.000219805908203125,
      "step": 36013,
      "training_step_time": 0.39513134956359863
    },
    {
      "epoch": 0.00021981201171875,
      "model_forward_time": 0.11528468132019043,
      "step": 36014
    },
    {
      "epoch": 0.00021981201171875,
      "step": 36014,
      "training_step_time": 0.393949031829834
    },
    {
      "epoch": 0.000219818115234375,
      "model_forward_time": 0.11694526672363281,
      "step": 36015
    },
    {
      "epoch": 0.000219818115234375,
      "step": 36015,
      "training_step_time": 0.37311434745788574
    },
    {
      "epoch": 0.00021982421875,
      "model_forward_time": 0.11625504493713379,
      "step": 36016
    },
    {
      "epoch": 0.00021982421875,
      "step": 36016,
      "training_step_time": 0.4038219451904297
    },
    {
      "epoch": 0.000219830322265625,
      "model_forward_time": 0.11513400077819824,
      "step": 36017
    },
    {
      "epoch": 0.000219830322265625,
      "step": 36017,
      "training_step_time": 0.4032595157623291
    },
    {
      "epoch": 0.00021983642578125,
      "model_forward_time": 0.11494040489196777,
      "step": 36018
    },
    {
      "epoch": 0.00021983642578125,
      "step": 36018,
      "training_step_time": 0.3822975158691406
    },
    {
      "epoch": 0.000219842529296875,
      "model_forward_time": 0.1160283088684082,
      "step": 36019
    },
    {
      "epoch": 0.000219842529296875,
      "step": 36019,
      "training_step_time": 0.40271472930908203
    },
    {
      "epoch": 0.0002198486328125,
      "grad_norm": 0.0989331603050232,
      "learning_rate": 3.767230397494798e-05,
      "loss": 0.0374,
      "step": 36020
    },
    {
      "epoch": 0.0002198486328125,
      "model_forward_time": 0.1157534122467041,
      "step": 36020
    },
    {
      "epoch": 0.0002198486328125,
      "step": 36020,
      "training_step_time": 0.393876314163208
    },
    {
      "epoch": 0.000219854736328125,
      "model_forward_time": 0.11526775360107422,
      "step": 36021
    },
    {
      "epoch": 0.000219854736328125,
      "step": 36021,
      "training_step_time": 0.47118115425109863
    },
    {
      "epoch": 0.00021986083984375,
      "model_forward_time": 0.11506915092468262,
      "step": 36022
    },
    {
      "epoch": 0.00021986083984375,
      "step": 36022,
      "training_step_time": 0.4741401672363281
    },
    {
      "epoch": 0.000219866943359375,
      "model_forward_time": 0.11507987976074219,
      "step": 36023
    },
    {
      "epoch": 0.000219866943359375,
      "step": 36023,
      "training_step_time": 0.3900942802429199
    },
    {
      "epoch": 0.000219873046875,
      "model_forward_time": 0.11546778678894043,
      "step": 36024
    },
    {
      "epoch": 0.000219873046875,
      "step": 36024,
      "training_step_time": 0.4215078353881836
    },
    {
      "epoch": 0.000219879150390625,
      "model_forward_time": 0.11555790901184082,
      "step": 36025
    },
    {
      "epoch": 0.000219879150390625,
      "step": 36025,
      "training_step_time": 0.3974723815917969
    },
    {
      "epoch": 0.00021988525390625,
      "model_forward_time": 0.11493992805480957,
      "step": 36026
    },
    {
      "epoch": 0.00021988525390625,
      "step": 36026,
      "training_step_time": 0.40056371688842773
    },
    {
      "epoch": 0.000219891357421875,
      "model_forward_time": 0.11560797691345215,
      "step": 36027
    },
    {
      "epoch": 0.000219891357421875,
      "step": 36027,
      "training_step_time": 0.38720083236694336
    },
    {
      "epoch": 0.0002198974609375,
      "model_forward_time": 0.11521506309509277,
      "step": 36028
    },
    {
      "epoch": 0.0002198974609375,
      "step": 36028,
      "training_step_time": 0.39485788345336914
    },
    {
      "epoch": 0.000219903564453125,
      "model_forward_time": 0.11579346656799316,
      "step": 36029
    },
    {
      "epoch": 0.000219903564453125,
      "step": 36029,
      "training_step_time": 0.397693395614624
    },
    {
      "epoch": 0.00021990966796875,
      "grad_norm": 0.10386261343955994,
      "learning_rate": 3.764559875412803e-05,
      "loss": 0.0425,
      "step": 36030
    },
    {
      "epoch": 0.00021990966796875,
      "model_forward_time": 0.11594176292419434,
      "step": 36030
    },
    {
      "epoch": 0.00021990966796875,
      "step": 36030,
      "training_step_time": 0.38183069229125977
    },
    {
      "epoch": 0.000219915771484375,
      "model_forward_time": 0.11585307121276855,
      "step": 36031
    },
    {
      "epoch": 0.000219915771484375,
      "step": 36031,
      "training_step_time": 0.4132373332977295
    },
    {
      "epoch": 0.000219921875,
      "model_forward_time": 0.11595034599304199,
      "step": 36032
    },
    {
      "epoch": 0.000219921875,
      "step": 36032,
      "training_step_time": 0.4047384262084961
    },
    {
      "epoch": 0.000219927978515625,
      "model_forward_time": 0.11546158790588379,
      "step": 36033
    },
    {
      "epoch": 0.000219927978515625,
      "step": 36033,
      "training_step_time": 0.3875441551208496
    },
    {
      "epoch": 0.00021993408203125,
      "model_forward_time": 0.11645674705505371,
      "step": 36034
    },
    {
      "epoch": 0.00021993408203125,
      "step": 36034,
      "training_step_time": 0.40582275390625
    },
    {
      "epoch": 0.000219940185546875,
      "model_forward_time": 0.11553382873535156,
      "step": 36035
    },
    {
      "epoch": 0.000219940185546875,
      "step": 36035,
      "training_step_time": 0.4546065330505371
    },
    {
      "epoch": 0.0002199462890625,
      "model_forward_time": 0.11534476280212402,
      "step": 36036
    },
    {
      "epoch": 0.0002199462890625,
      "step": 36036,
      "training_step_time": 0.4220893383026123
    },
    {
      "epoch": 0.000219952392578125,
      "model_forward_time": 0.11597990989685059,
      "step": 36037
    },
    {
      "epoch": 0.000219952392578125,
      "step": 36037,
      "training_step_time": 0.4686293601989746
    },
    {
      "epoch": 0.00021995849609375,
      "model_forward_time": 0.11562705039978027,
      "step": 36038
    },
    {
      "epoch": 0.00021995849609375,
      "step": 36038,
      "training_step_time": 0.46185731887817383
    },
    {
      "epoch": 0.000219964599609375,
      "model_forward_time": 0.1158294677734375,
      "step": 36039
    },
    {
      "epoch": 0.000219964599609375,
      "step": 36039,
      "training_step_time": 0.4784097671508789
    },
    {
      "epoch": 0.000219970703125,
      "grad_norm": 0.10430040955543518,
      "learning_rate": 3.761889728624899e-05,
      "loss": 0.038,
      "step": 36040
    },
    {
      "epoch": 0.000219970703125,
      "model_forward_time": 0.11504983901977539,
      "step": 36040
    },
    {
      "epoch": 0.000219970703125,
      "step": 36040,
      "training_step_time": 0.41208767890930176
    },
    {
      "epoch": 0.000219976806640625,
      "model_forward_time": 0.11504101753234863,
      "step": 36041
    },
    {
      "epoch": 0.000219976806640625,
      "step": 36041,
      "training_step_time": 0.39243412017822266
    },
    {
      "epoch": 0.00021998291015625,
      "model_forward_time": 0.11554884910583496,
      "step": 36042
    },
    {
      "epoch": 0.00021998291015625,
      "step": 36042,
      "training_step_time": 0.3836843967437744
    },
    {
      "epoch": 0.000219989013671875,
      "model_forward_time": 0.11523962020874023,
      "step": 36043
    },
    {
      "epoch": 0.000219989013671875,
      "step": 36043,
      "training_step_time": 0.39182376861572266
    },
    {
      "epoch": 0.0002199951171875,
      "model_forward_time": 0.115264892578125,
      "step": 36044
    },
    {
      "epoch": 0.0002199951171875,
      "step": 36044,
      "training_step_time": 0.3852818012237549
    },
    {
      "epoch": 0.000220001220703125,
      "model_forward_time": 0.11501145362854004,
      "step": 36045
    },
    {
      "epoch": 0.000220001220703125,
      "step": 36045,
      "training_step_time": 0.40747857093811035
    },
    {
      "epoch": 0.00022000732421875,
      "model_forward_time": 0.11539506912231445,
      "step": 36046
    },
    {
      "epoch": 0.00022000732421875,
      "step": 36046,
      "training_step_time": 0.4089162349700928
    },
    {
      "epoch": 0.000220013427734375,
      "model_forward_time": 0.11587262153625488,
      "step": 36047
    },
    {
      "epoch": 0.000220013427734375,
      "step": 36047,
      "training_step_time": 0.3990974426269531
    },
    {
      "epoch": 0.00022001953125,
      "model_forward_time": 0.11512160301208496,
      "step": 36048
    },
    {
      "epoch": 0.00022001953125,
      "step": 36048,
      "training_step_time": 0.3822023868560791
    },
    {
      "epoch": 0.000220025634765625,
      "model_forward_time": 0.11568522453308105,
      "step": 36049
    },
    {
      "epoch": 0.000220025634765625,
      "step": 36049,
      "training_step_time": 0.3960120677947998
    },
    {
      "epoch": 0.00022003173828125,
      "grad_norm": 0.10382256656885147,
      "learning_rate": 3.7592199579422035e-05,
      "loss": 0.0402,
      "step": 36050
    },
    {
      "epoch": 0.00022003173828125,
      "model_forward_time": 0.11637306213378906,
      "step": 36050
    },
    {
      "epoch": 0.00022003173828125,
      "step": 36050,
      "training_step_time": 0.4498875141143799
    },
    {
      "epoch": 0.000220037841796875,
      "model_forward_time": 0.11573386192321777,
      "step": 36051
    },
    {
      "epoch": 0.000220037841796875,
      "step": 36051,
      "training_step_time": 0.4475209712982178
    },
    {
      "epoch": 0.0002200439453125,
      "model_forward_time": 0.1161653995513916,
      "step": 36052
    },
    {
      "epoch": 0.0002200439453125,
      "step": 36052,
      "training_step_time": 0.49835968017578125
    },
    {
      "epoch": 0.000220050048828125,
      "model_forward_time": 0.11542868614196777,
      "step": 36053
    },
    {
      "epoch": 0.000220050048828125,
      "step": 36053,
      "training_step_time": 0.4425382614135742
    },
    {
      "epoch": 0.00022005615234375,
      "model_forward_time": 0.11580276489257812,
      "step": 36054
    },
    {
      "epoch": 0.00022005615234375,
      "step": 36054,
      "training_step_time": 0.4657254219055176
    },
    {
      "epoch": 0.000220062255859375,
      "model_forward_time": 0.11489391326904297,
      "step": 36055
    },
    {
      "epoch": 0.000220062255859375,
      "step": 36055,
      "training_step_time": 0.3944242000579834
    },
    {
      "epoch": 0.000220068359375,
      "model_forward_time": 0.11603522300720215,
      "step": 36056
    },
    {
      "epoch": 0.000220068359375,
      "step": 36056,
      "training_step_time": 0.3853480815887451
    },
    {
      "epoch": 0.000220074462890625,
      "model_forward_time": 0.11539411544799805,
      "step": 36057
    },
    {
      "epoch": 0.000220074462890625,
      "step": 36057,
      "training_step_time": 0.3825523853302002
    },
    {
      "epoch": 0.00022008056640625,
      "model_forward_time": 0.11508965492248535,
      "step": 36058
    },
    {
      "epoch": 0.00022008056640625,
      "step": 36058,
      "training_step_time": 0.38053202629089355
    },
    {
      "epoch": 0.000220086669921875,
      "model_forward_time": 0.11515593528747559,
      "step": 36059
    },
    {
      "epoch": 0.000220086669921875,
      "step": 36059,
      "training_step_time": 0.40204954147338867
    },
    {
      "epoch": 0.0002200927734375,
      "grad_norm": 0.11500304192304611,
      "learning_rate": 3.756550564175727e-05,
      "loss": 0.0419,
      "step": 36060
    },
    {
      "epoch": 0.0002200927734375,
      "model_forward_time": 0.11470174789428711,
      "step": 36060
    },
    {
      "epoch": 0.0002200927734375,
      "step": 36060,
      "training_step_time": 0.3974738121032715
    },
    {
      "epoch": 0.000220098876953125,
      "model_forward_time": 0.11515378952026367,
      "step": 36061
    },
    {
      "epoch": 0.000220098876953125,
      "step": 36061,
      "training_step_time": 0.41242098808288574
    },
    {
      "epoch": 0.00022010498046875,
      "model_forward_time": 0.11567115783691406,
      "step": 36062
    },
    {
      "epoch": 0.00022010498046875,
      "step": 36062,
      "training_step_time": 0.3978257179260254
    },
    {
      "epoch": 0.000220111083984375,
      "model_forward_time": 0.1155390739440918,
      "step": 36063
    },
    {
      "epoch": 0.000220111083984375,
      "step": 36063,
      "training_step_time": 0.3966100215911865
    },
    {
      "epoch": 0.0002201171875,
      "model_forward_time": 0.11517119407653809,
      "step": 36064
    },
    {
      "epoch": 0.0002201171875,
      "step": 36064,
      "training_step_time": 0.4297599792480469
    },
    {
      "epoch": 0.000220123291015625,
      "model_forward_time": 0.1157982349395752,
      "step": 36065
    },
    {
      "epoch": 0.000220123291015625,
      "step": 36065,
      "training_step_time": 0.4042050838470459
    },
    {
      "epoch": 0.00022012939453125,
      "model_forward_time": 0.11547422409057617,
      "step": 36066
    },
    {
      "epoch": 0.00022012939453125,
      "step": 36066,
      "training_step_time": 0.46326661109924316
    },
    {
      "epoch": 0.000220135498046875,
      "model_forward_time": 0.11558842658996582,
      "step": 36067
    },
    {
      "epoch": 0.000220135498046875,
      "step": 36067,
      "training_step_time": 0.36829280853271484
    },
    {
      "epoch": 0.0002201416015625,
      "model_forward_time": 0.11795306205749512,
      "step": 36068
    },
    {
      "epoch": 0.0002201416015625,
      "step": 36068,
      "training_step_time": 0.45444822311401367
    },
    {
      "epoch": 0.000220147705078125,
      "model_forward_time": 0.11614418029785156,
      "step": 36069
    },
    {
      "epoch": 0.000220147705078125,
      "step": 36069,
      "training_step_time": 0.4111359119415283
    },
    {
      "epoch": 0.00022015380859375,
      "grad_norm": 0.13027700781822205,
      "learning_rate": 3.7538815481363554e-05,
      "loss": 0.0432,
      "step": 36070
    },
    {
      "epoch": 0.00022015380859375,
      "model_forward_time": 0.11522054672241211,
      "step": 36070
    },
    {
      "epoch": 0.00022015380859375,
      "step": 36070,
      "training_step_time": 0.3838529586791992
    },
    {
      "epoch": 0.000220159912109375,
      "model_forward_time": 0.11538839340209961,
      "step": 36071
    },
    {
      "epoch": 0.000220159912109375,
      "step": 36071,
      "training_step_time": 0.3865964412689209
    },
    {
      "epoch": 0.000220166015625,
      "model_forward_time": 0.11522698402404785,
      "step": 36072
    },
    {
      "epoch": 0.000220166015625,
      "step": 36072,
      "training_step_time": 0.3911619186401367
    },
    {
      "epoch": 0.000220172119140625,
      "model_forward_time": 0.11534261703491211,
      "step": 36073
    },
    {
      "epoch": 0.000220172119140625,
      "step": 36073,
      "training_step_time": 0.4665701389312744
    },
    {
      "epoch": 0.00022017822265625,
      "model_forward_time": 0.12019681930541992,
      "step": 36074
    },
    {
      "epoch": 0.00022017822265625,
      "step": 36074,
      "training_step_time": 0.386488676071167
    },
    {
      "epoch": 0.000220184326171875,
      "model_forward_time": 0.11547040939331055,
      "step": 36075
    },
    {
      "epoch": 0.000220184326171875,
      "step": 36075,
      "training_step_time": 0.39611339569091797
    },
    {
      "epoch": 0.0002201904296875,
      "model_forward_time": 0.1171731948852539,
      "step": 36076
    },
    {
      "epoch": 0.0002201904296875,
      "step": 36076,
      "training_step_time": 0.3924233913421631
    },
    {
      "epoch": 0.000220196533203125,
      "model_forward_time": 0.11588144302368164,
      "step": 36077
    },
    {
      "epoch": 0.000220196533203125,
      "step": 36077,
      "training_step_time": 0.3955397605895996
    },
    {
      "epoch": 0.00022020263671875,
      "model_forward_time": 0.11506247520446777,
      "step": 36078
    },
    {
      "epoch": 0.00022020263671875,
      "step": 36078,
      "training_step_time": 0.4175221920013428
    },
    {
      "epoch": 0.000220208740234375,
      "model_forward_time": 0.1151590347290039,
      "step": 36079
    },
    {
      "epoch": 0.000220208740234375,
      "step": 36079,
      "training_step_time": 0.44141483306884766
    },
    {
      "epoch": 0.00022021484375,
      "grad_norm": 0.09138980507850647,
      "learning_rate": 3.751212910634867e-05,
      "loss": 0.0421,
      "step": 36080
    },
    {
      "epoch": 0.00022021484375,
      "model_forward_time": 0.11626482009887695,
      "step": 36080
    },
    {
      "epoch": 0.00022021484375,
      "step": 36080,
      "training_step_time": 0.46038222312927246
    },
    {
      "epoch": 0.000220220947265625,
      "model_forward_time": 0.11542701721191406,
      "step": 36081
    },
    {
      "epoch": 0.000220220947265625,
      "step": 36081,
      "training_step_time": 0.4140186309814453
    },
    {
      "epoch": 0.00022022705078125,
      "model_forward_time": 0.11516761779785156,
      "step": 36082
    },
    {
      "epoch": 0.00022022705078125,
      "step": 36082,
      "training_step_time": 0.4321787357330322
    },
    {
      "epoch": 0.000220233154296875,
      "model_forward_time": 0.11540722846984863,
      "step": 36083
    },
    {
      "epoch": 0.000220233154296875,
      "step": 36083,
      "training_step_time": 0.4296553134918213
    },
    {
      "epoch": 0.0002202392578125,
      "model_forward_time": 0.11493873596191406,
      "step": 36084
    },
    {
      "epoch": 0.0002202392578125,
      "step": 36084,
      "training_step_time": 0.4046027660369873
    },
    {
      "epoch": 0.000220245361328125,
      "model_forward_time": 0.11541080474853516,
      "step": 36085
    },
    {
      "epoch": 0.000220245361328125,
      "step": 36085,
      "training_step_time": 0.39118337631225586
    },
    {
      "epoch": 0.00022025146484375,
      "model_forward_time": 0.11552119255065918,
      "step": 36086
    },
    {
      "epoch": 0.00022025146484375,
      "step": 36086,
      "training_step_time": 0.3880953788757324
    },
    {
      "epoch": 0.000220257568359375,
      "model_forward_time": 0.11491990089416504,
      "step": 36087
    },
    {
      "epoch": 0.000220257568359375,
      "step": 36087,
      "training_step_time": 0.3828132152557373
    },
    {
      "epoch": 0.000220263671875,
      "model_forward_time": 0.11507511138916016,
      "step": 36088
    },
    {
      "epoch": 0.000220263671875,
      "step": 36088,
      "training_step_time": 0.3928210735321045
    },
    {
      "epoch": 0.000220269775390625,
      "model_forward_time": 0.11537027359008789,
      "step": 36089
    },
    {
      "epoch": 0.000220269775390625,
      "step": 36089,
      "training_step_time": 0.3838670253753662
    },
    {
      "epoch": 0.00022027587890625,
      "grad_norm": 0.09855081886053085,
      "learning_rate": 3.748544652481927e-05,
      "loss": 0.0441,
      "step": 36090
    },
    {
      "epoch": 0.00022027587890625,
      "model_forward_time": 0.11516213417053223,
      "step": 36090
    },
    {
      "epoch": 0.00022027587890625,
      "step": 36090,
      "training_step_time": 0.4014904499053955
    },
    {
      "epoch": 0.000220281982421875,
      "model_forward_time": 0.11529302597045898,
      "step": 36091
    },
    {
      "epoch": 0.000220281982421875,
      "step": 36091,
      "training_step_time": 0.4014091491699219
    },
    {
      "epoch": 0.0002202880859375,
      "model_forward_time": 0.11694145202636719,
      "step": 36092
    },
    {
      "epoch": 0.0002202880859375,
      "step": 36092,
      "training_step_time": 0.38721132278442383
    },
    {
      "epoch": 0.000220294189453125,
      "model_forward_time": 0.11583876609802246,
      "step": 36093
    },
    {
      "epoch": 0.000220294189453125,
      "step": 36093,
      "training_step_time": 0.5018830299377441
    },
    {
      "epoch": 0.00022030029296875,
      "model_forward_time": 0.11507487297058105,
      "step": 36094
    },
    {
      "epoch": 0.00022030029296875,
      "step": 36094,
      "training_step_time": 0.4992051124572754
    },
    {
      "epoch": 0.000220306396484375,
      "model_forward_time": 0.11469697952270508,
      "step": 36095
    },
    {
      "epoch": 0.000220306396484375,
      "step": 36095,
      "training_step_time": 0.41119885444641113
    },
    {
      "epoch": 0.0002203125,
      "model_forward_time": 0.11521100997924805,
      "step": 36096
    },
    {
      "epoch": 0.0002203125,
      "step": 36096,
      "training_step_time": 0.44612956047058105
    },
    {
      "epoch": 0.000220318603515625,
      "model_forward_time": 0.11536550521850586,
      "step": 36097
    },
    {
      "epoch": 0.000220318603515625,
      "step": 36097,
      "training_step_time": 0.4688565731048584
    },
    {
      "epoch": 0.00022032470703125,
      "model_forward_time": 0.11539411544799805,
      "step": 36098
    },
    {
      "epoch": 0.00022032470703125,
      "step": 36098,
      "training_step_time": 0.4839131832122803
    },
    {
      "epoch": 0.000220330810546875,
      "model_forward_time": 0.11437654495239258,
      "step": 36099
    },
    {
      "epoch": 0.000220330810546875,
      "step": 36099,
      "training_step_time": 0.405670166015625
    },
    {
      "epoch": 0.0002203369140625,
      "grad_norm": 0.10826298594474792,
      "learning_rate": 3.7458767744880765e-05,
      "loss": 0.0385,
      "step": 36100
    },
    {
      "epoch": 0.0002203369140625,
      "model_forward_time": 0.11472129821777344,
      "step": 36100
    },
    {
      "epoch": 0.0002203369140625,
      "step": 36100,
      "training_step_time": 0.4137380123138428
    },
    {
      "epoch": 0.000220343017578125,
      "model_forward_time": 0.11469841003417969,
      "step": 36101
    },
    {
      "epoch": 0.000220343017578125,
      "step": 36101,
      "training_step_time": 0.39520835876464844
    },
    {
      "epoch": 0.00022034912109375,
      "model_forward_time": 0.11451101303100586,
      "step": 36102
    },
    {
      "epoch": 0.00022034912109375,
      "step": 36102,
      "training_step_time": 0.3906056880950928
    },
    {
      "epoch": 0.000220355224609375,
      "model_forward_time": 0.11613869667053223,
      "step": 36103
    },
    {
      "epoch": 0.000220355224609375,
      "step": 36103,
      "training_step_time": 0.3935282230377197
    },
    {
      "epoch": 0.000220361328125,
      "model_forward_time": 0.11547207832336426,
      "step": 36104
    },
    {
      "epoch": 0.000220361328125,
      "step": 36104,
      "training_step_time": 0.38042211532592773
    },
    {
      "epoch": 0.000220367431640625,
      "model_forward_time": 0.11562371253967285,
      "step": 36105
    },
    {
      "epoch": 0.000220367431640625,
      "step": 36105,
      "training_step_time": 0.38578295707702637
    },
    {
      "epoch": 0.00022037353515625,
      "model_forward_time": 0.11558127403259277,
      "step": 36106
    },
    {
      "epoch": 0.00022037353515625,
      "step": 36106,
      "training_step_time": 0.39551854133605957
    },
    {
      "epoch": 0.000220379638671875,
      "model_forward_time": 0.11589932441711426,
      "step": 36107
    },
    {
      "epoch": 0.000220379638671875,
      "step": 36107,
      "training_step_time": 0.6265127658843994
    },
    {
      "epoch": 0.0002203857421875,
      "model_forward_time": 0.11579370498657227,
      "step": 36108
    },
    {
      "epoch": 0.0002203857421875,
      "step": 36108,
      "training_step_time": 0.4132199287414551
    },
    {
      "epoch": 0.000220391845703125,
      "model_forward_time": 0.1151895523071289,
      "step": 36109
    },
    {
      "epoch": 0.000220391845703125,
      "step": 36109,
      "training_step_time": 0.39548563957214355
    },
    {
      "epoch": 0.00022039794921875,
      "grad_norm": 0.1253972053527832,
      "learning_rate": 3.7432092774637504e-05,
      "loss": 0.0414,
      "step": 36110
    },
    {
      "epoch": 0.00022039794921875,
      "model_forward_time": 0.11551308631896973,
      "step": 36110
    },
    {
      "epoch": 0.00022039794921875,
      "step": 36110,
      "training_step_time": 0.39055681228637695
    },
    {
      "epoch": 0.000220404052734375,
      "model_forward_time": 0.11485576629638672,
      "step": 36111
    },
    {
      "epoch": 0.000220404052734375,
      "step": 36111,
      "training_step_time": 0.4586219787597656
    },
    {
      "epoch": 0.00022041015625,
      "model_forward_time": 0.11566495895385742,
      "step": 36112
    },
    {
      "epoch": 0.00022041015625,
      "step": 36112,
      "training_step_time": 0.3965766429901123
    },
    {
      "epoch": 0.000220416259765625,
      "model_forward_time": 0.11870527267456055,
      "step": 36113
    },
    {
      "epoch": 0.000220416259765625,
      "step": 36113,
      "training_step_time": 0.5091185569763184
    },
    {
      "epoch": 0.00022042236328125,
      "model_forward_time": 0.11853861808776855,
      "step": 36114
    },
    {
      "epoch": 0.00022042236328125,
      "step": 36114,
      "training_step_time": 0.3838038444519043
    },
    {
      "epoch": 0.000220428466796875,
      "model_forward_time": 0.11801767349243164,
      "step": 36115
    },
    {
      "epoch": 0.000220428466796875,
      "step": 36115,
      "training_step_time": 0.38347315788269043
    },
    {
      "epoch": 0.0002204345703125,
      "model_forward_time": 0.1182396411895752,
      "step": 36116
    },
    {
      "epoch": 0.0002204345703125,
      "step": 36116,
      "training_step_time": 0.3810100555419922
    },
    {
      "epoch": 0.000220440673828125,
      "model_forward_time": 0.11754608154296875,
      "step": 36117
    },
    {
      "epoch": 0.000220440673828125,
      "step": 36117,
      "training_step_time": 0.3767356872558594
    },
    {
      "epoch": 0.00022044677734375,
      "model_forward_time": 0.1269543170928955,
      "step": 36118
    },
    {
      "epoch": 0.00022044677734375,
      "step": 36118,
      "training_step_time": 0.3904247283935547
    },
    {
      "epoch": 0.000220452880859375,
      "model_forward_time": 0.11768770217895508,
      "step": 36119
    },
    {
      "epoch": 0.000220452880859375,
      "step": 36119,
      "training_step_time": 0.5790045261383057
    },
    {
      "epoch": 0.000220458984375,
      "grad_norm": 0.10245300829410553,
      "learning_rate": 3.74054216221926e-05,
      "loss": 0.0364,
      "step": 36120
    },
    {
      "epoch": 0.000220458984375,
      "model_forward_time": 0.1177070140838623,
      "step": 36120
    },
    {
      "epoch": 0.000220458984375,
      "step": 36120,
      "training_step_time": 0.3877136707305908
    },
    {
      "epoch": 0.000220465087890625,
      "model_forward_time": 0.11508393287658691,
      "step": 36121
    },
    {
      "epoch": 0.000220465087890625,
      "step": 36121,
      "training_step_time": 0.4524354934692383
    },
    {
      "epoch": 0.00022047119140625,
      "model_forward_time": 0.11445403099060059,
      "step": 36122
    },
    {
      "epoch": 0.00022047119140625,
      "step": 36122,
      "training_step_time": 0.43293237686157227
    },
    {
      "epoch": 0.000220477294921875,
      "model_forward_time": 0.11671829223632812,
      "step": 36123
    },
    {
      "epoch": 0.000220477294921875,
      "step": 36123,
      "training_step_time": 0.4215073585510254
    },
    {
      "epoch": 0.0002204833984375,
      "model_forward_time": 0.11545205116271973,
      "step": 36124
    },
    {
      "epoch": 0.0002204833984375,
      "step": 36124,
      "training_step_time": 0.39107656478881836
    },
    {
      "epoch": 0.000220489501953125,
      "model_forward_time": 0.11553359031677246,
      "step": 36125
    },
    {
      "epoch": 0.000220489501953125,
      "step": 36125,
      "training_step_time": 1.0388240814208984
    },
    {
      "epoch": 0.00022049560546875,
      "model_forward_time": 0.11426568031311035,
      "step": 36126
    },
    {
      "epoch": 0.00022049560546875,
      "step": 36126,
      "training_step_time": 0.442737340927124
    },
    {
      "epoch": 0.000220501708984375,
      "model_forward_time": 0.11399435997009277,
      "step": 36127
    },
    {
      "epoch": 0.000220501708984375,
      "step": 36127,
      "training_step_time": 0.3860166072845459
    },
    {
      "epoch": 0.0002205078125,
      "model_forward_time": 0.11444377899169922,
      "step": 36128
    },
    {
      "epoch": 0.0002205078125,
      "step": 36128,
      "training_step_time": 0.3972322940826416
    },
    {
      "epoch": 0.000220513916015625,
      "model_forward_time": 0.11492538452148438,
      "step": 36129
    },
    {
      "epoch": 0.000220513916015625,
      "step": 36129,
      "training_step_time": 0.3948333263397217
    },
    {
      "epoch": 0.00022052001953125,
      "grad_norm": 0.1619223803281784,
      "learning_rate": 3.737875429564807e-05,
      "loss": 0.0411,
      "step": 36130
    },
    {
      "epoch": 0.00022052001953125,
      "model_forward_time": 0.11484789848327637,
      "step": 36130
    },
    {
      "epoch": 0.00022052001953125,
      "step": 36130,
      "training_step_time": 0.3913757801055908
    },
    {
      "epoch": 0.000220526123046875,
      "model_forward_time": 0.11535763740539551,
      "step": 36131
    },
    {
      "epoch": 0.000220526123046875,
      "step": 36131,
      "training_step_time": 0.5664124488830566
    },
    {
      "epoch": 0.0002205322265625,
      "model_forward_time": 0.1155080795288086,
      "step": 36132
    },
    {
      "epoch": 0.0002205322265625,
      "step": 36132,
      "training_step_time": 0.399014949798584
    },
    {
      "epoch": 0.000220538330078125,
      "model_forward_time": 0.11479640007019043,
      "step": 36133
    },
    {
      "epoch": 0.000220538330078125,
      "step": 36133,
      "training_step_time": 0.3926537036895752
    },
    {
      "epoch": 0.00022054443359375,
      "model_forward_time": 0.11520838737487793,
      "step": 36134
    },
    {
      "epoch": 0.00022054443359375,
      "step": 36134,
      "training_step_time": 0.44764208793640137
    },
    {
      "epoch": 0.000220550537109375,
      "model_forward_time": 0.11545515060424805,
      "step": 36135
    },
    {
      "epoch": 0.000220550537109375,
      "step": 36135,
      "training_step_time": 0.38334226608276367
    },
    {
      "epoch": 0.000220556640625,
      "model_forward_time": 0.13306069374084473,
      "step": 36136
    },
    {
      "epoch": 0.000220556640625,
      "step": 36136,
      "training_step_time": 0.4195849895477295
    },
    {
      "epoch": 0.000220562744140625,
      "model_forward_time": 0.11528825759887695,
      "step": 36137
    },
    {
      "epoch": 0.000220562744140625,
      "step": 36137,
      "training_step_time": 0.7996127605438232
    },
    {
      "epoch": 0.00022056884765625,
      "model_forward_time": 0.11556458473205566,
      "step": 36138
    },
    {
      "epoch": 0.00022056884765625,
      "step": 36138,
      "training_step_time": 0.4503622055053711
    },
    {
      "epoch": 0.000220574951171875,
      "model_forward_time": 0.11560630798339844,
      "step": 36139
    },
    {
      "epoch": 0.000220574951171875,
      "step": 36139,
      "training_step_time": 0.4090421199798584
    },
    {
      "epoch": 0.0002205810546875,
      "grad_norm": 0.16285297274589539,
      "learning_rate": 3.7352090803104765e-05,
      "loss": 0.0405,
      "step": 36140
    },
    {
      "epoch": 0.0002205810546875,
      "model_forward_time": 0.11505961418151855,
      "step": 36140
    },
    {
      "epoch": 0.0002205810546875,
      "step": 36140,
      "training_step_time": 0.38554954528808594
    },
    {
      "epoch": 0.000220587158203125,
      "model_forward_time": 0.11445021629333496,
      "step": 36141
    },
    {
      "epoch": 0.000220587158203125,
      "step": 36141,
      "training_step_time": 0.3881552219390869
    },
    {
      "epoch": 0.00022059326171875,
      "model_forward_time": 0.11486148834228516,
      "step": 36142
    },
    {
      "epoch": 0.00022059326171875,
      "step": 36142,
      "training_step_time": 0.3786895275115967
    },
    {
      "epoch": 0.000220599365234375,
      "model_forward_time": 0.11519074440002441,
      "step": 36143
    },
    {
      "epoch": 0.000220599365234375,
      "step": 36143,
      "training_step_time": 0.8438839912414551
    },
    {
      "epoch": 0.00022060546875,
      "model_forward_time": 0.11481213569641113,
      "step": 36144
    },
    {
      "epoch": 0.00022060546875,
      "step": 36144,
      "training_step_time": 0.3941686153411865
    },
    {
      "epoch": 0.000220611572265625,
      "model_forward_time": 0.11486387252807617,
      "step": 36145
    },
    {
      "epoch": 0.000220611572265625,
      "step": 36145,
      "training_step_time": 0.3836824893951416
    },
    {
      "epoch": 0.00022061767578125,
      "model_forward_time": 0.11464500427246094,
      "step": 36146
    },
    {
      "epoch": 0.00022061767578125,
      "step": 36146,
      "training_step_time": 0.38505005836486816
    },
    {
      "epoch": 0.000220623779296875,
      "model_forward_time": 0.11490631103515625,
      "step": 36147
    },
    {
      "epoch": 0.000220623779296875,
      "step": 36147,
      "training_step_time": 0.41344261169433594
    },
    {
      "epoch": 0.0002206298828125,
      "model_forward_time": 0.11424946784973145,
      "step": 36148
    },
    {
      "epoch": 0.0002206298828125,
      "step": 36148,
      "training_step_time": 0.4131746292114258
    },
    {
      "epoch": 0.000220635986328125,
      "model_forward_time": 0.1159052848815918,
      "step": 36149
    },
    {
      "epoch": 0.000220635986328125,
      "step": 36149,
      "training_step_time": 0.466442346572876
    },
    {
      "epoch": 0.00022064208984375,
      "grad_norm": 0.14691931009292603,
      "learning_rate": 3.73254311526623e-05,
      "loss": 0.0388,
      "step": 36150
    },
    {
      "epoch": 0.00022064208984375,
      "model_forward_time": 0.11513376235961914,
      "step": 36150
    },
    {
      "epoch": 0.00022064208984375,
      "step": 36150,
      "training_step_time": 0.45041537284851074
    },
    {
      "epoch": 0.000220648193359375,
      "model_forward_time": 0.11528730392456055,
      "step": 36151
    },
    {
      "epoch": 0.000220648193359375,
      "step": 36151,
      "training_step_time": 0.40464186668395996
    },
    {
      "epoch": 0.000220654296875,
      "model_forward_time": 0.11586546897888184,
      "step": 36152
    },
    {
      "epoch": 0.000220654296875,
      "step": 36152,
      "training_step_time": 0.4460759162902832
    },
    {
      "epoch": 0.000220660400390625,
      "model_forward_time": 0.1152200698852539,
      "step": 36153
    },
    {
      "epoch": 0.000220660400390625,
      "step": 36153,
      "training_step_time": 0.48354673385620117
    },
    {
      "epoch": 0.00022066650390625,
      "model_forward_time": 0.11509203910827637,
      "step": 36154
    },
    {
      "epoch": 0.00022066650390625,
      "step": 36154,
      "training_step_time": 0.38497209548950195
    },
    {
      "epoch": 0.000220672607421875,
      "model_forward_time": 0.11573386192321777,
      "step": 36155
    },
    {
      "epoch": 0.000220672607421875,
      "step": 36155,
      "training_step_time": 0.4058969020843506
    },
    {
      "epoch": 0.0002206787109375,
      "model_forward_time": 0.11498165130615234,
      "step": 36156
    },
    {
      "epoch": 0.0002206787109375,
      "step": 36156,
      "training_step_time": 0.38762474060058594
    },
    {
      "epoch": 0.000220684814453125,
      "model_forward_time": 0.115509033203125,
      "step": 36157
    },
    {
      "epoch": 0.000220684814453125,
      "step": 36157,
      "training_step_time": 0.39515256881713867
    },
    {
      "epoch": 0.00022069091796875,
      "model_forward_time": 0.11552762985229492,
      "step": 36158
    },
    {
      "epoch": 0.00022069091796875,
      "step": 36158,
      "training_step_time": 0.3988621234893799
    },
    {
      "epoch": 0.000220697021484375,
      "model_forward_time": 0.11537289619445801,
      "step": 36159
    },
    {
      "epoch": 0.000220697021484375,
      "step": 36159,
      "training_step_time": 0.3911612033843994
    },
    {
      "epoch": 0.000220703125,
      "grad_norm": 0.16238528490066528,
      "learning_rate": 3.7298775352419206e-05,
      "loss": 0.0399,
      "step": 36160
    },
    {
      "epoch": 0.000220703125,
      "model_forward_time": 0.1147928237915039,
      "step": 36160
    },
    {
      "epoch": 0.000220703125,
      "step": 36160,
      "training_step_time": 0.39656662940979004
    },
    {
      "epoch": 0.000220709228515625,
      "model_forward_time": 0.11574196815490723,
      "step": 36161
    },
    {
      "epoch": 0.000220709228515625,
      "step": 36161,
      "training_step_time": 0.62709641456604
    },
    {
      "epoch": 0.00022071533203125,
      "model_forward_time": 0.11679458618164062,
      "step": 36162
    },
    {
      "epoch": 0.00022071533203125,
      "step": 36162,
      "training_step_time": 0.42747020721435547
    },
    {
      "epoch": 0.000220721435546875,
      "model_forward_time": 0.11548900604248047,
      "step": 36163
    },
    {
      "epoch": 0.000220721435546875,
      "step": 36163,
      "training_step_time": 0.49027061462402344
    },
    {
      "epoch": 0.0002207275390625,
      "model_forward_time": 0.1150658130645752,
      "step": 36164
    },
    {
      "epoch": 0.0002207275390625,
      "step": 36164,
      "training_step_time": 0.4320657253265381
    },
    {
      "epoch": 0.000220733642578125,
      "model_forward_time": 0.1152031421661377,
      "step": 36165
    },
    {
      "epoch": 0.000220733642578125,
      "step": 36165,
      "training_step_time": 0.4315340518951416
    },
    {
      "epoch": 0.00022073974609375,
      "model_forward_time": 0.11490893363952637,
      "step": 36166
    },
    {
      "epoch": 0.00022073974609375,
      "step": 36166,
      "training_step_time": 0.48079347610473633
    },
    {
      "epoch": 0.000220745849609375,
      "model_forward_time": 0.11476826667785645,
      "step": 36167
    },
    {
      "epoch": 0.000220745849609375,
      "step": 36167,
      "training_step_time": 0.4601552486419678
    },
    {
      "epoch": 0.000220751953125,
      "model_forward_time": 0.11557888984680176,
      "step": 36168
    },
    {
      "epoch": 0.000220751953125,
      "step": 36168,
      "training_step_time": 0.38941335678100586
    },
    {
      "epoch": 0.000220758056640625,
      "model_forward_time": 0.11531424522399902,
      "step": 36169
    },
    {
      "epoch": 0.000220758056640625,
      "step": 36169,
      "training_step_time": 0.39279699325561523
    },
    {
      "epoch": 0.00022076416015625,
      "grad_norm": 0.1094384416937828,
      "learning_rate": 3.727212341047281e-05,
      "loss": 0.0371,
      "step": 36170
    },
    {
      "epoch": 0.00022076416015625,
      "model_forward_time": 0.1156153678894043,
      "step": 36170
    },
    {
      "epoch": 0.00022076416015625,
      "step": 36170,
      "training_step_time": 0.39979124069213867
    },
    {
      "epoch": 0.000220770263671875,
      "model_forward_time": 0.11566567420959473,
      "step": 36171
    },
    {
      "epoch": 0.000220770263671875,
      "step": 36171,
      "training_step_time": 0.39578914642333984
    },
    {
      "epoch": 0.0002207763671875,
      "model_forward_time": 0.11490511894226074,
      "step": 36172
    },
    {
      "epoch": 0.0002207763671875,
      "step": 36172,
      "training_step_time": 0.3858358860015869
    },
    {
      "epoch": 0.000220782470703125,
      "model_forward_time": 0.11553502082824707,
      "step": 36173
    },
    {
      "epoch": 0.000220782470703125,
      "step": 36173,
      "training_step_time": 0.4029526710510254
    },
    {
      "epoch": 0.00022078857421875,
      "model_forward_time": 0.11608648300170898,
      "step": 36174
    },
    {
      "epoch": 0.00022078857421875,
      "step": 36174,
      "training_step_time": 0.388608455657959
    },
    {
      "epoch": 0.000220794677734375,
      "model_forward_time": 0.11679434776306152,
      "step": 36175
    },
    {
      "epoch": 0.000220794677734375,
      "step": 36175,
      "training_step_time": 0.3841743469238281
    },
    {
      "epoch": 0.00022080078125,
      "model_forward_time": 0.11712384223937988,
      "step": 36176
    },
    {
      "epoch": 0.00022080078125,
      "step": 36176,
      "training_step_time": 0.4209427833557129
    },
    {
      "epoch": 0.000220806884765625,
      "model_forward_time": 0.11514925956726074,
      "step": 36177
    },
    {
      "epoch": 0.000220806884765625,
      "step": 36177,
      "training_step_time": 0.5084445476531982
    },
    {
      "epoch": 0.00022081298828125,
      "model_forward_time": 0.11641144752502441,
      "step": 36178
    },
    {
      "epoch": 0.00022081298828125,
      "step": 36178,
      "training_step_time": 0.39856886863708496
    },
    {
      "epoch": 0.000220819091796875,
      "model_forward_time": 0.11529541015625,
      "step": 36179
    },
    {
      "epoch": 0.000220819091796875,
      "step": 36179,
      "training_step_time": 0.4158811569213867
    },
    {
      "epoch": 0.0002208251953125,
      "grad_norm": 0.14058490097522736,
      "learning_rate": 3.7245475334919246e-05,
      "loss": 0.0419,
      "step": 36180
    },
    {
      "epoch": 0.0002208251953125,
      "model_forward_time": 0.11565899848937988,
      "step": 36180
    },
    {
      "epoch": 0.0002208251953125,
      "step": 36180,
      "training_step_time": 0.37104034423828125
    },
    {
      "epoch": 0.000220831298828125,
      "model_forward_time": 0.11538028717041016,
      "step": 36181
    },
    {
      "epoch": 0.000220831298828125,
      "step": 36181,
      "training_step_time": 0.447864294052124
    },
    {
      "epoch": 0.00022083740234375,
      "model_forward_time": 0.12143921852111816,
      "step": 36182
    },
    {
      "epoch": 0.00022083740234375,
      "step": 36182,
      "training_step_time": 0.4292762279510498
    },
    {
      "epoch": 0.000220843505859375,
      "model_forward_time": 0.1190805435180664,
      "step": 36183
    },
    {
      "epoch": 0.000220843505859375,
      "step": 36183,
      "training_step_time": 0.3777596950531006
    },
    {
      "epoch": 0.000220849609375,
      "model_forward_time": 0.12117576599121094,
      "step": 36184
    },
    {
      "epoch": 0.000220849609375,
      "step": 36184,
      "training_step_time": 0.38408803939819336
    },
    {
      "epoch": 0.000220855712890625,
      "model_forward_time": 0.11860370635986328,
      "step": 36185
    },
    {
      "epoch": 0.000220855712890625,
      "step": 36185,
      "training_step_time": 0.3899562358856201
    },
    {
      "epoch": 0.00022086181640625,
      "model_forward_time": 0.12325191497802734,
      "step": 36186
    },
    {
      "epoch": 0.00022086181640625,
      "step": 36186,
      "training_step_time": 0.3960397243499756
    },
    {
      "epoch": 0.000220867919921875,
      "model_forward_time": 0.11791706085205078,
      "step": 36187
    },
    {
      "epoch": 0.000220867919921875,
      "step": 36187,
      "training_step_time": 0.3813490867614746
    },
    {
      "epoch": 0.0002208740234375,
      "model_forward_time": 0.11667490005493164,
      "step": 36188
    },
    {
      "epoch": 0.0002208740234375,
      "step": 36188,
      "training_step_time": 0.38352417945861816
    },
    {
      "epoch": 0.000220880126953125,
      "model_forward_time": 0.116363525390625,
      "step": 36189
    },
    {
      "epoch": 0.000220880126953125,
      "step": 36189,
      "training_step_time": 0.3825061321258545
    },
    {
      "epoch": 0.00022088623046875,
      "grad_norm": 0.17292411625385284,
      "learning_rate": 3.721883113385353e-05,
      "loss": 0.0386,
      "step": 36190
    },
    {
      "epoch": 0.00022088623046875,
      "model_forward_time": 0.11707186698913574,
      "step": 36190
    },
    {
      "epoch": 0.00022088623046875,
      "step": 36190,
      "training_step_time": 0.3953702449798584
    },
    {
      "epoch": 0.000220892333984375,
      "model_forward_time": 0.1161963939666748,
      "step": 36191
    },
    {
      "epoch": 0.000220892333984375,
      "step": 36191,
      "training_step_time": 0.4582786560058594
    },
    {
      "epoch": 0.0002208984375,
      "model_forward_time": 0.11567878723144531,
      "step": 36192
    },
    {
      "epoch": 0.0002208984375,
      "step": 36192,
      "training_step_time": 0.48467469215393066
    },
    {
      "epoch": 0.000220904541015625,
      "model_forward_time": 0.11599278450012207,
      "step": 36193
    },
    {
      "epoch": 0.000220904541015625,
      "step": 36193,
      "training_step_time": 0.4849369525909424
    },
    {
      "epoch": 0.00022091064453125,
      "model_forward_time": 0.11635208129882812,
      "step": 36194
    },
    {
      "epoch": 0.00022091064453125,
      "step": 36194,
      "training_step_time": 0.4408249855041504
    },
    {
      "epoch": 0.000220916748046875,
      "model_forward_time": 0.11750316619873047,
      "step": 36195
    },
    {
      "epoch": 0.000220916748046875,
      "step": 36195,
      "training_step_time": 0.5063660144805908
    },
    {
      "epoch": 0.0002209228515625,
      "model_forward_time": 0.11590003967285156,
      "step": 36196
    },
    {
      "epoch": 0.0002209228515625,
      "step": 36196,
      "training_step_time": 0.44996142387390137
    },
    {
      "epoch": 0.000220928955078125,
      "model_forward_time": 0.11549162864685059,
      "step": 36197
    },
    {
      "epoch": 0.000220928955078125,
      "step": 36197,
      "training_step_time": 0.5452451705932617
    },
    {
      "epoch": 0.00022093505859375,
      "model_forward_time": 0.11517000198364258,
      "step": 36198
    },
    {
      "epoch": 0.00022093505859375,
      "step": 36198,
      "training_step_time": 0.38806962966918945
    },
    {
      "epoch": 0.000220941162109375,
      "model_forward_time": 0.11549687385559082,
      "step": 36199
    },
    {
      "epoch": 0.000220941162109375,
      "step": 36199,
      "training_step_time": 0.3865993022918701
    },
    {
      "epoch": 0.000220947265625,
      "grad_norm": 0.08988553285598755,
      "learning_rate": 3.719219081536942e-05,
      "loss": 0.0348,
      "step": 36200
    },
    {
      "epoch": 0.000220947265625,
      "model_forward_time": 0.11472296714782715,
      "step": 36200
    },
    {
      "epoch": 0.000220947265625,
      "step": 36200,
      "training_step_time": 0.4029562473297119
    },
    {
      "epoch": 0.000220953369140625,
      "model_forward_time": 0.11612153053283691,
      "step": 36201
    },
    {
      "epoch": 0.000220953369140625,
      "step": 36201,
      "training_step_time": 0.40022969245910645
    },
    {
      "epoch": 0.00022095947265625,
      "model_forward_time": 0.11587238311767578,
      "step": 36202
    },
    {
      "epoch": 0.00022095947265625,
      "step": 36202,
      "training_step_time": 0.397627592086792
    },
    {
      "epoch": 0.000220965576171875,
      "model_forward_time": 0.11578011512756348,
      "step": 36203
    },
    {
      "epoch": 0.000220965576171875,
      "step": 36203,
      "training_step_time": 0.7299623489379883
    },
    {
      "epoch": 0.0002209716796875,
      "model_forward_time": 0.11555290222167969,
      "step": 36204
    },
    {
      "epoch": 0.0002209716796875,
      "step": 36204,
      "training_step_time": 0.4131929874420166
    },
    {
      "epoch": 0.000220977783203125,
      "model_forward_time": 0.11560988426208496,
      "step": 36205
    },
    {
      "epoch": 0.000220977783203125,
      "step": 36205,
      "training_step_time": 0.446519136428833
    },
    {
      "epoch": 0.00022098388671875,
      "model_forward_time": 0.11497688293457031,
      "step": 36206
    },
    {
      "epoch": 0.00022098388671875,
      "step": 36206,
      "training_step_time": 0.4310157299041748
    },
    {
      "epoch": 0.000220989990234375,
      "model_forward_time": 0.1152193546295166,
      "step": 36207
    },
    {
      "epoch": 0.000220989990234375,
      "step": 36207,
      "training_step_time": 0.4215390682220459
    },
    {
      "epoch": 0.00022099609375,
      "model_forward_time": 0.1154470443725586,
      "step": 36208
    },
    {
      "epoch": 0.00022099609375,
      "step": 36208,
      "training_step_time": 0.3825516700744629
    },
    {
      "epoch": 0.000221002197265625,
      "model_forward_time": 0.11515450477600098,
      "step": 36209
    },
    {
      "epoch": 0.000221002197265625,
      "step": 36209,
      "training_step_time": 0.42397618293762207
    },
    {
      "epoch": 0.00022100830078125,
      "grad_norm": 0.0992082878947258,
      "learning_rate": 3.716555438755961e-05,
      "loss": 0.0387,
      "step": 36210
    },
    {
      "epoch": 0.00022100830078125,
      "model_forward_time": 0.11809444427490234,
      "step": 36210
    },
    {
      "epoch": 0.00022100830078125,
      "step": 36210,
      "training_step_time": 0.3845541477203369
    },
    {
      "epoch": 0.000221014404296875,
      "model_forward_time": 0.11814332008361816,
      "step": 36211
    },
    {
      "epoch": 0.000221014404296875,
      "step": 36211,
      "training_step_time": 0.3903181552886963
    },
    {
      "epoch": 0.0002210205078125,
      "model_forward_time": 0.11991405487060547,
      "step": 36212
    },
    {
      "epoch": 0.0002210205078125,
      "step": 36212,
      "training_step_time": 0.38396477699279785
    },
    {
      "epoch": 0.000221026611328125,
      "model_forward_time": 0.11663317680358887,
      "step": 36213
    },
    {
      "epoch": 0.000221026611328125,
      "step": 36213,
      "training_step_time": 0.3907618522644043
    },
    {
      "epoch": 0.00022103271484375,
      "model_forward_time": 0.11560893058776855,
      "step": 36214
    },
    {
      "epoch": 0.00022103271484375,
      "step": 36214,
      "training_step_time": 0.4180898666381836
    },
    {
      "epoch": 0.000221038818359375,
      "model_forward_time": 0.11587238311767578,
      "step": 36215
    },
    {
      "epoch": 0.000221038818359375,
      "step": 36215,
      "training_step_time": 0.39121437072753906
    },
    {
      "epoch": 0.000221044921875,
      "model_forward_time": 0.11634206771850586,
      "step": 36216
    },
    {
      "epoch": 0.000221044921875,
      "step": 36216,
      "training_step_time": 0.4033370018005371
    },
    {
      "epoch": 0.000221051025390625,
      "model_forward_time": 0.11629509925842285,
      "step": 36217
    },
    {
      "epoch": 0.000221051025390625,
      "step": 36217,
      "training_step_time": 0.39661169052124023
    },
    {
      "epoch": 0.00022105712890625,
      "model_forward_time": 0.11641716957092285,
      "step": 36218
    },
    {
      "epoch": 0.00022105712890625,
      "step": 36218,
      "training_step_time": 0.4456140995025635
    },
    {
      "epoch": 0.000221063232421875,
      "model_forward_time": 0.11645746231079102,
      "step": 36219
    },
    {
      "epoch": 0.000221063232421875,
      "step": 36219,
      "training_step_time": 0.4118764400482178
    },
    {
      "epoch": 0.0002210693359375,
      "grad_norm": 0.13493838906288147,
      "learning_rate": 3.713892185851548e-05,
      "loss": 0.0395,
      "step": 36220
    },
    {
      "epoch": 0.0002210693359375,
      "model_forward_time": 0.11559343338012695,
      "step": 36220
    },
    {
      "epoch": 0.0002210693359375,
      "step": 36220,
      "training_step_time": 0.45445895195007324
    },
    {
      "epoch": 0.000221075439453125,
      "model_forward_time": 0.1158134937286377,
      "step": 36221
    },
    {
      "epoch": 0.000221075439453125,
      "step": 36221,
      "training_step_time": 0.49170422554016113
    },
    {
      "epoch": 0.00022108154296875,
      "model_forward_time": 0.11509466171264648,
      "step": 36222
    },
    {
      "epoch": 0.00022108154296875,
      "step": 36222,
      "training_step_time": 0.45471715927124023
    },
    {
      "epoch": 0.000221087646484375,
      "model_forward_time": 0.11571693420410156,
      "step": 36223
    },
    {
      "epoch": 0.000221087646484375,
      "step": 36223,
      "training_step_time": 0.4473412036895752
    },
    {
      "epoch": 0.00022109375,
      "model_forward_time": 0.11718082427978516,
      "step": 36224
    },
    {
      "epoch": 0.00022109375,
      "step": 36224,
      "training_step_time": 0.40924930572509766
    },
    {
      "epoch": 0.000221099853515625,
      "model_forward_time": 0.11546087265014648,
      "step": 36225
    },
    {
      "epoch": 0.000221099853515625,
      "step": 36225,
      "training_step_time": 0.4721972942352295
    },
    {
      "epoch": 0.00022110595703125,
      "model_forward_time": 0.11574196815490723,
      "step": 36226
    },
    {
      "epoch": 0.00022110595703125,
      "step": 36226,
      "training_step_time": 0.3829169273376465
    },
    {
      "epoch": 0.000221112060546875,
      "model_forward_time": 0.11622166633605957,
      "step": 36227
    },
    {
      "epoch": 0.000221112060546875,
      "step": 36227,
      "training_step_time": 0.3997955322265625
    },
    {
      "epoch": 0.0002211181640625,
      "model_forward_time": 0.11540031433105469,
      "step": 36228
    },
    {
      "epoch": 0.0002211181640625,
      "step": 36228,
      "training_step_time": 0.39371514320373535
    },
    {
      "epoch": 0.000221124267578125,
      "model_forward_time": 0.11591410636901855,
      "step": 36229
    },
    {
      "epoch": 0.000221124267578125,
      "step": 36229,
      "training_step_time": 0.38870763778686523
    },
    {
      "epoch": 0.00022113037109375,
      "grad_norm": 0.13028967380523682,
      "learning_rate": 3.711229323632732e-05,
      "loss": 0.0371,
      "step": 36230
    },
    {
      "epoch": 0.00022113037109375,
      "model_forward_time": 0.11552548408508301,
      "step": 36230
    },
    {
      "epoch": 0.00022113037109375,
      "step": 36230,
      "training_step_time": 0.3827190399169922
    },
    {
      "epoch": 0.000221136474609375,
      "model_forward_time": 0.11560726165771484,
      "step": 36231
    },
    {
      "epoch": 0.000221136474609375,
      "step": 36231,
      "training_step_time": 0.3937034606933594
    },
    {
      "epoch": 0.000221142578125,
      "model_forward_time": 0.1159512996673584,
      "step": 36232
    },
    {
      "epoch": 0.000221142578125,
      "step": 36232,
      "training_step_time": 0.4178612232208252
    },
    {
      "epoch": 0.000221148681640625,
      "model_forward_time": 0.11585283279418945,
      "step": 36233
    },
    {
      "epoch": 0.000221148681640625,
      "step": 36233,
      "training_step_time": 0.5063815116882324
    },
    {
      "epoch": 0.00022115478515625,
      "model_forward_time": 0.11596941947937012,
      "step": 36234
    },
    {
      "epoch": 0.00022115478515625,
      "step": 36234,
      "training_step_time": 0.4917595386505127
    },
    {
      "epoch": 0.000221160888671875,
      "model_forward_time": 0.11525416374206543,
      "step": 36235
    },
    {
      "epoch": 0.000221160888671875,
      "step": 36235,
      "training_step_time": 0.38994383811950684
    },
    {
      "epoch": 0.0002211669921875,
      "model_forward_time": 0.11594223976135254,
      "step": 36236
    },
    {
      "epoch": 0.0002211669921875,
      "step": 36236,
      "training_step_time": 0.405590295791626
    },
    {
      "epoch": 0.000221173095703125,
      "model_forward_time": 0.11526751518249512,
      "step": 36237
    },
    {
      "epoch": 0.000221173095703125,
      "step": 36237,
      "training_step_time": 0.3707399368286133
    },
    {
      "epoch": 0.00022117919921875,
      "model_forward_time": 0.11560440063476562,
      "step": 36238
    },
    {
      "epoch": 0.00022117919921875,
      "step": 36238,
      "training_step_time": 0.4697892665863037
    },
    {
      "epoch": 0.000221185302734375,
      "model_forward_time": 0.11886167526245117,
      "step": 36239
    },
    {
      "epoch": 0.000221185302734375,
      "step": 36239,
      "training_step_time": 0.38741588592529297
    },
    {
      "epoch": 0.00022119140625,
      "grad_norm": 0.11111697554588318,
      "learning_rate": 3.7085668529084184e-05,
      "loss": 0.041,
      "step": 36240
    },
    {
      "epoch": 0.00022119140625,
      "model_forward_time": 0.11790251731872559,
      "step": 36240
    },
    {
      "epoch": 0.00022119140625,
      "step": 36240,
      "training_step_time": 0.38223886489868164
    },
    {
      "epoch": 0.000221197509765625,
      "model_forward_time": 0.11655855178833008,
      "step": 36241
    },
    {
      "epoch": 0.000221197509765625,
      "step": 36241,
      "training_step_time": 0.3905770778656006
    },
    {
      "epoch": 0.00022120361328125,
      "model_forward_time": 0.11700272560119629,
      "step": 36242
    },
    {
      "epoch": 0.00022120361328125,
      "step": 36242,
      "training_step_time": 0.3936464786529541
    },
    {
      "epoch": 0.000221209716796875,
      "model_forward_time": 0.11557698249816895,
      "step": 36243
    },
    {
      "epoch": 0.000221209716796875,
      "step": 36243,
      "training_step_time": 0.3964259624481201
    },
    {
      "epoch": 0.0002212158203125,
      "model_forward_time": 0.11559295654296875,
      "step": 36244
    },
    {
      "epoch": 0.0002212158203125,
      "step": 36244,
      "training_step_time": 0.3964962959289551
    },
    {
      "epoch": 0.000221221923828125,
      "model_forward_time": 0.11647486686706543,
      "step": 36245
    },
    {
      "epoch": 0.000221221923828125,
      "step": 36245,
      "training_step_time": 0.47029972076416016
    },
    {
      "epoch": 0.00022122802734375,
      "model_forward_time": 0.11545491218566895,
      "step": 36246
    },
    {
      "epoch": 0.00022122802734375,
      "step": 36246,
      "training_step_time": 0.42328667640686035
    },
    {
      "epoch": 0.000221234130859375,
      "model_forward_time": 0.11566781997680664,
      "step": 36247
    },
    {
      "epoch": 0.000221234130859375,
      "step": 36247,
      "training_step_time": 0.4479351043701172
    },
    {
      "epoch": 0.000221240234375,
      "model_forward_time": 0.11639046669006348,
      "step": 36248
    },
    {
      "epoch": 0.000221240234375,
      "step": 36248,
      "training_step_time": 0.4494001865386963
    },
    {
      "epoch": 0.000221246337890625,
      "model_forward_time": 0.11600232124328613,
      "step": 36249
    },
    {
      "epoch": 0.000221246337890625,
      "step": 36249,
      "training_step_time": 0.42998433113098145
    },
    {
      "epoch": 0.00022125244140625,
      "grad_norm": 0.09072213619947433,
      "learning_rate": 3.705904774487396e-05,
      "loss": 0.0321,
      "step": 36250
    },
    {
      "epoch": 0.00022125244140625,
      "model_forward_time": 0.11539268493652344,
      "step": 36250
    },
    {
      "epoch": 0.00022125244140625,
      "step": 36250,
      "training_step_time": 0.3970508575439453
    },
    {
      "epoch": 0.000221258544921875,
      "model_forward_time": 0.1155095100402832,
      "step": 36251
    },
    {
      "epoch": 0.000221258544921875,
      "step": 36251,
      "training_step_time": 0.3988838195800781
    },
    {
      "epoch": 0.0002212646484375,
      "model_forward_time": 0.11630654335021973,
      "step": 36252
    },
    {
      "epoch": 0.0002212646484375,
      "step": 36252,
      "training_step_time": 0.447052001953125
    },
    {
      "epoch": 0.000221270751953125,
      "model_forward_time": 0.11649107933044434,
      "step": 36253
    },
    {
      "epoch": 0.000221270751953125,
      "step": 36253,
      "training_step_time": 0.4986884593963623
    },
    {
      "epoch": 0.00022127685546875,
      "model_forward_time": 0.11628198623657227,
      "step": 36254
    },
    {
      "epoch": 0.00022127685546875,
      "step": 36254,
      "training_step_time": 0.42443203926086426
    },
    {
      "epoch": 0.000221282958984375,
      "model_forward_time": 0.11567974090576172,
      "step": 36255
    },
    {
      "epoch": 0.000221282958984375,
      "step": 36255,
      "training_step_time": 0.3888063430786133
    },
    {
      "epoch": 0.0002212890625,
      "model_forward_time": 0.11548495292663574,
      "step": 36256
    },
    {
      "epoch": 0.0002212890625,
      "step": 36256,
      "training_step_time": 0.3900127410888672
    },
    {
      "epoch": 0.000221295166015625,
      "model_forward_time": 0.11527585983276367,
      "step": 36257
    },
    {
      "epoch": 0.000221295166015625,
      "step": 36257,
      "training_step_time": 0.38660645484924316
    },
    {
      "epoch": 0.00022130126953125,
      "model_forward_time": 0.1161658763885498,
      "step": 36258
    },
    {
      "epoch": 0.00022130126953125,
      "step": 36258,
      "training_step_time": 0.39071011543273926
    },
    {
      "epoch": 0.000221307373046875,
      "model_forward_time": 0.11552000045776367,
      "step": 36259
    },
    {
      "epoch": 0.000221307373046875,
      "step": 36259,
      "training_step_time": 0.4013195037841797
    },
    {
      "epoch": 0.0002213134765625,
      "grad_norm": 0.10385026782751083,
      "learning_rate": 3.703243089178337e-05,
      "loss": 0.0411,
      "step": 36260
    },
    {
      "epoch": 0.0002213134765625,
      "model_forward_time": 0.11628460884094238,
      "step": 36260
    },
    {
      "epoch": 0.0002213134765625,
      "step": 36260,
      "training_step_time": 0.41516613960266113
    },
    {
      "epoch": 0.000221319580078125,
      "model_forward_time": 0.11603689193725586,
      "step": 36261
    },
    {
      "epoch": 0.000221319580078125,
      "step": 36261,
      "training_step_time": 0.4049978256225586
    },
    {
      "epoch": 0.00022132568359375,
      "model_forward_time": 0.11600995063781738,
      "step": 36262
    },
    {
      "epoch": 0.00022132568359375,
      "step": 36262,
      "training_step_time": 0.40669679641723633
    },
    {
      "epoch": 0.000221331787109375,
      "model_forward_time": 0.11590981483459473,
      "step": 36263
    },
    {
      "epoch": 0.000221331787109375,
      "step": 36263,
      "training_step_time": 0.5506124496459961
    },
    {
      "epoch": 0.000221337890625,
      "model_forward_time": 0.11566781997680664,
      "step": 36264
    },
    {
      "epoch": 0.000221337890625,
      "step": 36264,
      "training_step_time": 0.4009838104248047
    },
    {
      "epoch": 0.000221343994140625,
      "model_forward_time": 0.1164252758026123,
      "step": 36265
    },
    {
      "epoch": 0.000221343994140625,
      "step": 36265,
      "training_step_time": 0.40437936782836914
    },
    {
      "epoch": 0.00022135009765625,
      "model_forward_time": 0.11550116539001465,
      "step": 36266
    },
    {
      "epoch": 0.00022135009765625,
      "step": 36266,
      "training_step_time": 0.4431328773498535
    },
    {
      "epoch": 0.000221356201171875,
      "model_forward_time": 0.11582183837890625,
      "step": 36267
    },
    {
      "epoch": 0.000221356201171875,
      "step": 36267,
      "training_step_time": 0.4994339942932129
    },
    {
      "epoch": 0.0002213623046875,
      "model_forward_time": 0.1149899959564209,
      "step": 36268
    },
    {
      "epoch": 0.0002213623046875,
      "step": 36268,
      "training_step_time": 0.45725417137145996
    },
    {
      "epoch": 0.000221368408203125,
      "model_forward_time": 0.11598730087280273,
      "step": 36269
    },
    {
      "epoch": 0.000221368408203125,
      "step": 36269,
      "training_step_time": 0.3881833553314209
    },
    {
      "epoch": 0.00022137451171875,
      "grad_norm": 0.1257588416337967,
      "learning_rate": 3.700581797789786e-05,
      "loss": 0.0447,
      "step": 36270
    },
    {
      "epoch": 0.00022137451171875,
      "model_forward_time": 0.1156928539276123,
      "step": 36270
    },
    {
      "epoch": 0.00022137451171875,
      "step": 36270,
      "training_step_time": 0.40913915634155273
    },
    {
      "epoch": 0.000221380615234375,
      "model_forward_time": 0.11484146118164062,
      "step": 36271
    },
    {
      "epoch": 0.000221380615234375,
      "step": 36271,
      "training_step_time": 0.389068603515625
    },
    {
      "epoch": 0.00022138671875,
      "model_forward_time": 0.11618924140930176,
      "step": 36272
    },
    {
      "epoch": 0.00022138671875,
      "step": 36272,
      "training_step_time": 0.39669322967529297
    },
    {
      "epoch": 0.000221392822265625,
      "model_forward_time": 0.11650753021240234,
      "step": 36273
    },
    {
      "epoch": 0.000221392822265625,
      "step": 36273,
      "training_step_time": 0.4293982982635498
    },
    {
      "epoch": 0.00022139892578125,
      "model_forward_time": 0.11543798446655273,
      "step": 36274
    },
    {
      "epoch": 0.00022139892578125,
      "step": 36274,
      "training_step_time": 0.40363335609436035
    },
    {
      "epoch": 0.000221405029296875,
      "model_forward_time": 0.11648726463317871,
      "step": 36275
    },
    {
      "epoch": 0.000221405029296875,
      "step": 36275,
      "training_step_time": 0.4051191806793213
    },
    {
      "epoch": 0.0002214111328125,
      "model_forward_time": 0.11603951454162598,
      "step": 36276
    },
    {
      "epoch": 0.0002214111328125,
      "step": 36276,
      "training_step_time": 0.4119853973388672
    },
    {
      "epoch": 0.000221417236328125,
      "model_forward_time": 0.1179354190826416,
      "step": 36277
    },
    {
      "epoch": 0.000221417236328125,
      "step": 36277,
      "training_step_time": 0.45600390434265137
    },
    {
      "epoch": 0.00022142333984375,
      "model_forward_time": 0.11649012565612793,
      "step": 36278
    },
    {
      "epoch": 0.00022142333984375,
      "step": 36278,
      "training_step_time": 0.389934778213501
    },
    {
      "epoch": 0.000221429443359375,
      "model_forward_time": 0.11585307121276855,
      "step": 36279
    },
    {
      "epoch": 0.000221429443359375,
      "step": 36279,
      "training_step_time": 0.44228458404541016
    },
    {
      "epoch": 0.000221435546875,
      "grad_norm": 0.08475024998188019,
      "learning_rate": 3.697920901130178e-05,
      "loss": 0.046,
      "step": 36280
    },
    {
      "epoch": 0.000221435546875,
      "model_forward_time": 0.11526679992675781,
      "step": 36280
    },
    {
      "epoch": 0.000221435546875,
      "step": 36280,
      "training_step_time": 0.45493102073669434
    },
    {
      "epoch": 0.000221441650390625,
      "model_forward_time": 0.1250905990600586,
      "step": 36281
    },
    {
      "epoch": 0.000221441650390625,
      "step": 36281,
      "training_step_time": 0.4128689765930176
    },
    {
      "epoch": 0.00022144775390625,
      "model_forward_time": 0.11782526969909668,
      "step": 36282
    },
    {
      "epoch": 0.00022144775390625,
      "step": 36282,
      "training_step_time": 0.46481943130493164
    },
    {
      "epoch": 0.000221453857421875,
      "model_forward_time": 0.11792182922363281,
      "step": 36283
    },
    {
      "epoch": 0.000221453857421875,
      "step": 36283,
      "training_step_time": 0.41439056396484375
    },
    {
      "epoch": 0.0002214599609375,
      "model_forward_time": 0.11754727363586426,
      "step": 36284
    },
    {
      "epoch": 0.0002214599609375,
      "step": 36284,
      "training_step_time": 0.3795340061187744
    },
    {
      "epoch": 0.000221466064453125,
      "model_forward_time": 0.11601948738098145,
      "step": 36285
    },
    {
      "epoch": 0.000221466064453125,
      "step": 36285,
      "training_step_time": 0.4049046039581299
    },
    {
      "epoch": 0.00022147216796875,
      "model_forward_time": 0.11588287353515625,
      "step": 36286
    },
    {
      "epoch": 0.00022147216796875,
      "step": 36286,
      "training_step_time": 0.4232621192932129
    },
    {
      "epoch": 0.000221478271484375,
      "model_forward_time": 0.11560416221618652,
      "step": 36287
    },
    {
      "epoch": 0.000221478271484375,
      "step": 36287,
      "training_step_time": 0.4087810516357422
    },
    {
      "epoch": 0.000221484375,
      "model_forward_time": 0.11599993705749512,
      "step": 36288
    },
    {
      "epoch": 0.000221484375,
      "step": 36288,
      "training_step_time": 0.3909480571746826
    },
    {
      "epoch": 0.000221490478515625,
      "model_forward_time": 0.11550450325012207,
      "step": 36289
    },
    {
      "epoch": 0.000221490478515625,
      "step": 36289,
      "training_step_time": 0.39243268966674805
    },
    {
      "epoch": 0.00022149658203125,
      "grad_norm": 0.11501444876194,
      "learning_rate": 3.695260400007819e-05,
      "loss": 0.0374,
      "step": 36290
    },
    {
      "epoch": 0.00022149658203125,
      "model_forward_time": 0.11562752723693848,
      "step": 36290
    },
    {
      "epoch": 0.00022149658203125,
      "step": 36290,
      "training_step_time": 0.4645407199859619
    },
    {
      "epoch": 0.000221502685546875,
      "model_forward_time": 0.11577582359313965,
      "step": 36291
    },
    {
      "epoch": 0.000221502685546875,
      "step": 36291,
      "training_step_time": 0.4267294406890869
    },
    {
      "epoch": 0.0002215087890625,
      "model_forward_time": 0.11617064476013184,
      "step": 36292
    },
    {
      "epoch": 0.0002215087890625,
      "step": 36292,
      "training_step_time": 0.42715978622436523
    },
    {
      "epoch": 0.000221514892578125,
      "model_forward_time": 0.11672735214233398,
      "step": 36293
    },
    {
      "epoch": 0.000221514892578125,
      "step": 36293,
      "training_step_time": 0.3857541084289551
    },
    {
      "epoch": 0.00022152099609375,
      "model_forward_time": 0.11626720428466797,
      "step": 36294
    },
    {
      "epoch": 0.00022152099609375,
      "step": 36294,
      "training_step_time": 0.39354395866394043
    },
    {
      "epoch": 0.000221527099609375,
      "model_forward_time": 0.11585164070129395,
      "step": 36295
    },
    {
      "epoch": 0.000221527099609375,
      "step": 36295,
      "training_step_time": 0.5084846019744873
    },
    {
      "epoch": 0.000221533203125,
      "model_forward_time": 0.11629819869995117,
      "step": 36296
    },
    {
      "epoch": 0.000221533203125,
      "step": 36296,
      "training_step_time": 0.4276597499847412
    },
    {
      "epoch": 0.000221539306640625,
      "model_forward_time": 0.11531257629394531,
      "step": 36297
    },
    {
      "epoch": 0.000221539306640625,
      "step": 36297,
      "training_step_time": 0.5098581314086914
    },
    {
      "epoch": 0.00022154541015625,
      "model_forward_time": 0.11522078514099121,
      "step": 36298
    },
    {
      "epoch": 0.00022154541015625,
      "step": 36298,
      "training_step_time": 0.3866438865661621
    },
    {
      "epoch": 0.000221551513671875,
      "model_forward_time": 0.11542940139770508,
      "step": 36299
    },
    {
      "epoch": 0.000221551513671875,
      "step": 36299,
      "training_step_time": 0.38451528549194336
    },
    {
      "epoch": 0.0002215576171875,
      "grad_norm": 0.118508480489254,
      "learning_rate": 3.6926002952309016e-05,
      "loss": 0.0369,
      "step": 36300
    },
    {
      "epoch": 0.0002215576171875,
      "model_forward_time": 0.11567187309265137,
      "step": 36300
    },
    {
      "epoch": 0.0002215576171875,
      "step": 36300,
      "training_step_time": 0.45894503593444824
    },
    {
      "epoch": 0.000221563720703125,
      "model_forward_time": 0.11569428443908691,
      "step": 36301
    },
    {
      "epoch": 0.000221563720703125,
      "step": 36301,
      "training_step_time": 0.3944559097290039
    },
    {
      "epoch": 0.00022156982421875,
      "model_forward_time": 0.1162111759185791,
      "step": 36302
    },
    {
      "epoch": 0.00022156982421875,
      "step": 36302,
      "training_step_time": 0.39913487434387207
    },
    {
      "epoch": 0.000221575927734375,
      "model_forward_time": 0.11688399314880371,
      "step": 36303
    },
    {
      "epoch": 0.000221575927734375,
      "step": 36303,
      "training_step_time": 0.3925740718841553
    },
    {
      "epoch": 0.00022158203125,
      "model_forward_time": 0.1162109375,
      "step": 36304
    },
    {
      "epoch": 0.00022158203125,
      "step": 36304,
      "training_step_time": 0.45108795166015625
    },
    {
      "epoch": 0.000221588134765625,
      "model_forward_time": 0.1156163215637207,
      "step": 36305
    },
    {
      "epoch": 0.000221588134765625,
      "step": 36305,
      "training_step_time": 0.39421916007995605
    },
    {
      "epoch": 0.00022159423828125,
      "model_forward_time": 0.11629533767700195,
      "step": 36306
    },
    {
      "epoch": 0.00022159423828125,
      "step": 36306,
      "training_step_time": 0.4401700496673584
    },
    {
      "epoch": 0.000221600341796875,
      "model_forward_time": 0.11606717109680176,
      "step": 36307
    },
    {
      "epoch": 0.000221600341796875,
      "step": 36307,
      "training_step_time": 0.3985893726348877
    },
    {
      "epoch": 0.0002216064453125,
      "model_forward_time": 0.1161794662475586,
      "step": 36308
    },
    {
      "epoch": 0.0002216064453125,
      "step": 36308,
      "training_step_time": 0.40500593185424805
    },
    {
      "epoch": 0.000221612548828125,
      "model_forward_time": 0.11646890640258789,
      "step": 36309
    },
    {
      "epoch": 0.000221612548828125,
      "step": 36309,
      "training_step_time": 0.4062764644622803
    },
    {
      "epoch": 0.00022161865234375,
      "grad_norm": 0.1531248390674591,
      "learning_rate": 3.6899405876074944e-05,
      "loss": 0.0403,
      "step": 36310
    },
    {
      "epoch": 0.00022161865234375,
      "model_forward_time": 0.11632227897644043,
      "step": 36310
    },
    {
      "epoch": 0.00022161865234375,
      "step": 36310,
      "training_step_time": 0.4071335792541504
    },
    {
      "epoch": 0.000221624755859375,
      "model_forward_time": 0.11607050895690918,
      "step": 36311
    },
    {
      "epoch": 0.000221624755859375,
      "step": 36311,
      "training_step_time": 0.40147972106933594
    },
    {
      "epoch": 0.000221630859375,
      "model_forward_time": 0.11617851257324219,
      "step": 36312
    },
    {
      "epoch": 0.000221630859375,
      "step": 36312,
      "training_step_time": 0.4380667209625244
    },
    {
      "epoch": 0.000221636962890625,
      "model_forward_time": 0.11555624008178711,
      "step": 36313
    },
    {
      "epoch": 0.000221636962890625,
      "step": 36313,
      "training_step_time": 0.39325499534606934
    },
    {
      "epoch": 0.00022164306640625,
      "model_forward_time": 0.11564278602600098,
      "step": 36314
    },
    {
      "epoch": 0.00022164306640625,
      "step": 36314,
      "training_step_time": 0.39827537536621094
    },
    {
      "epoch": 0.000221649169921875,
      "model_forward_time": 0.11624383926391602,
      "step": 36315
    },
    {
      "epoch": 0.000221649169921875,
      "step": 36315,
      "training_step_time": 0.4061470031738281
    },
    {
      "epoch": 0.0002216552734375,
      "model_forward_time": 0.11554646492004395,
      "step": 36316
    },
    {
      "epoch": 0.0002216552734375,
      "step": 36316,
      "training_step_time": 0.38761234283447266
    },
    {
      "epoch": 0.000221661376953125,
      "model_forward_time": 0.1165623664855957,
      "step": 36317
    },
    {
      "epoch": 0.000221661376953125,
      "step": 36317,
      "training_step_time": 0.3980848789215088
    },
    {
      "epoch": 0.00022166748046875,
      "model_forward_time": 0.11853194236755371,
      "step": 36318
    },
    {
      "epoch": 0.00022166748046875,
      "step": 36318,
      "training_step_time": 0.3807957172393799
    },
    {
      "epoch": 0.000221673583984375,
      "model_forward_time": 0.11613941192626953,
      "step": 36319
    },
    {
      "epoch": 0.000221673583984375,
      "step": 36319,
      "training_step_time": 0.40590810775756836
    },
    {
      "epoch": 0.0002216796875,
      "grad_norm": 0.09978565573692322,
      "learning_rate": 3.687281277945547e-05,
      "loss": 0.039,
      "step": 36320
    },
    {
      "epoch": 0.0002216796875,
      "model_forward_time": 0.1165623664855957,
      "step": 36320
    },
    {
      "epoch": 0.0002216796875,
      "step": 36320,
      "training_step_time": 0.41599583625793457
    },
    {
      "epoch": 0.000221685791015625,
      "model_forward_time": 0.11645913124084473,
      "step": 36321
    },
    {
      "epoch": 0.000221685791015625,
      "step": 36321,
      "training_step_time": 0.5421676635742188
    },
    {
      "epoch": 0.00022169189453125,
      "model_forward_time": 0.11702680587768555,
      "step": 36322
    },
    {
      "epoch": 0.00022169189453125,
      "step": 36322,
      "training_step_time": 0.3828573226928711
    },
    {
      "epoch": 0.000221697998046875,
      "model_forward_time": 0.11648917198181152,
      "step": 36323
    },
    {
      "epoch": 0.000221697998046875,
      "step": 36323,
      "training_step_time": 0.3831753730773926
    },
    {
      "epoch": 0.0002217041015625,
      "model_forward_time": 0.11607193946838379,
      "step": 36324
    },
    {
      "epoch": 0.0002217041015625,
      "step": 36324,
      "training_step_time": 0.3913075923919678
    },
    {
      "epoch": 0.000221710205078125,
      "model_forward_time": 0.11615633964538574,
      "step": 36325
    },
    {
      "epoch": 0.000221710205078125,
      "step": 36325,
      "training_step_time": 0.42572593688964844
    },
    {
      "epoch": 0.00022171630859375,
      "model_forward_time": 0.11588025093078613,
      "step": 36326
    },
    {
      "epoch": 0.00022171630859375,
      "step": 36326,
      "training_step_time": 0.3923060894012451
    },
    {
      "epoch": 0.000221722412109375,
      "model_forward_time": 0.11642026901245117,
      "step": 36327
    },
    {
      "epoch": 0.000221722412109375,
      "step": 36327,
      "training_step_time": 0.5448546409606934
    },
    {
      "epoch": 0.000221728515625,
      "model_forward_time": 0.11632132530212402,
      "step": 36328
    },
    {
      "epoch": 0.000221728515625,
      "step": 36328,
      "training_step_time": 0.3883988857269287
    },
    {
      "epoch": 0.000221734619140625,
      "model_forward_time": 0.11621403694152832,
      "step": 36329
    },
    {
      "epoch": 0.000221734619140625,
      "step": 36329,
      "training_step_time": 0.41959381103515625
    },
    {
      "epoch": 0.00022174072265625,
      "grad_norm": 0.11887681484222412,
      "learning_rate": 3.684622367052887e-05,
      "loss": 0.0427,
      "step": 36330
    },
    {
      "epoch": 0.00022174072265625,
      "model_forward_time": 0.11617112159729004,
      "step": 36330
    },
    {
      "epoch": 0.00022174072265625,
      "step": 36330,
      "training_step_time": 0.38736939430236816
    },
    {
      "epoch": 0.000221746826171875,
      "model_forward_time": 0.11642956733703613,
      "step": 36331
    },
    {
      "epoch": 0.000221746826171875,
      "step": 36331,
      "training_step_time": 0.3952350616455078
    },
    {
      "epoch": 0.0002217529296875,
      "model_forward_time": 0.1164100170135498,
      "step": 36332
    },
    {
      "epoch": 0.0002217529296875,
      "step": 36332,
      "training_step_time": 0.38556599617004395
    },
    {
      "epoch": 0.000221759033203125,
      "model_forward_time": 0.11583518981933594,
      "step": 36333
    },
    {
      "epoch": 0.000221759033203125,
      "step": 36333,
      "training_step_time": 0.4758727550506592
    },
    {
      "epoch": 0.00022176513671875,
      "model_forward_time": 0.11566495895385742,
      "step": 36334
    },
    {
      "epoch": 0.00022176513671875,
      "step": 36334,
      "training_step_time": 0.4089319705963135
    },
    {
      "epoch": 0.000221771240234375,
      "model_forward_time": 0.11570191383361816,
      "step": 36335
    },
    {
      "epoch": 0.000221771240234375,
      "step": 36335,
      "training_step_time": 0.4911935329437256
    },
    {
      "epoch": 0.00022177734375,
      "model_forward_time": 0.11623692512512207,
      "step": 36336
    },
    {
      "epoch": 0.00022177734375,
      "step": 36336,
      "training_step_time": 0.39396095275878906
    },
    {
      "epoch": 0.000221783447265625,
      "model_forward_time": 0.11535954475402832,
      "step": 36337
    },
    {
      "epoch": 0.000221783447265625,
      "step": 36337,
      "training_step_time": 0.39778804779052734
    },
    {
      "epoch": 0.00022178955078125,
      "model_forward_time": 0.1156303882598877,
      "step": 36338
    },
    {
      "epoch": 0.00022178955078125,
      "step": 36338,
      "training_step_time": 0.3886876106262207
    },
    {
      "epoch": 0.000221795654296875,
      "model_forward_time": 0.11560606956481934,
      "step": 36339
    },
    {
      "epoch": 0.000221795654296875,
      "step": 36339,
      "training_step_time": 0.3688032627105713
    },
    {
      "epoch": 0.0002218017578125,
      "grad_norm": 0.10937051475048065,
      "learning_rate": 3.68196385573722e-05,
      "loss": 0.0381,
      "step": 36340
    },
    {
      "epoch": 0.0002218017578125,
      "model_forward_time": 0.11599040031433105,
      "step": 36340
    },
    {
      "epoch": 0.0002218017578125,
      "step": 36340,
      "training_step_time": 0.4444270133972168
    },
    {
      "epoch": 0.000221807861328125,
      "model_forward_time": 0.11564779281616211,
      "step": 36341
    },
    {
      "epoch": 0.000221807861328125,
      "step": 36341,
      "training_step_time": 0.4431467056274414
    },
    {
      "epoch": 0.00022181396484375,
      "model_forward_time": 0.11614513397216797,
      "step": 36342
    },
    {
      "epoch": 0.00022181396484375,
      "step": 36342,
      "training_step_time": 0.4002559185028076
    },
    {
      "epoch": 0.000221820068359375,
      "model_forward_time": 0.1159372329711914,
      "step": 36343
    },
    {
      "epoch": 0.000221820068359375,
      "step": 36343,
      "training_step_time": 0.396564245223999
    },
    {
      "epoch": 0.000221826171875,
      "model_forward_time": 0.11564254760742188,
      "step": 36344
    },
    {
      "epoch": 0.000221826171875,
      "step": 36344,
      "training_step_time": 0.39238786697387695
    },
    {
      "epoch": 0.000221832275390625,
      "model_forward_time": 0.11824202537536621,
      "step": 36345
    },
    {
      "epoch": 0.000221832275390625,
      "step": 36345,
      "training_step_time": 0.3944554328918457
    },
    {
      "epoch": 0.00022183837890625,
      "model_forward_time": 0.11589932441711426,
      "step": 36346
    },
    {
      "epoch": 0.00022183837890625,
      "step": 36346,
      "training_step_time": 0.39014387130737305
    },
    {
      "epoch": 0.000221844482421875,
      "model_forward_time": 0.12355947494506836,
      "step": 36347
    },
    {
      "epoch": 0.000221844482421875,
      "step": 36347,
      "training_step_time": 0.3980543613433838
    },
    {
      "epoch": 0.0002218505859375,
      "model_forward_time": 0.11600923538208008,
      "step": 36348
    },
    {
      "epoch": 0.0002218505859375,
      "step": 36348,
      "training_step_time": 0.416226863861084
    },
    {
      "epoch": 0.000221856689453125,
      "model_forward_time": 0.11594176292419434,
      "step": 36349
    },
    {
      "epoch": 0.000221856689453125,
      "step": 36349,
      "training_step_time": 0.4602468013763428
    },
    {
      "epoch": 0.00022186279296875,
      "grad_norm": 0.12316415458917618,
      "learning_rate": 3.679305744806134e-05,
      "loss": 0.0399,
      "step": 36350
    },
    {
      "epoch": 0.00022186279296875,
      "model_forward_time": 0.11594820022583008,
      "step": 36350
    },
    {
      "epoch": 0.00022186279296875,
      "step": 36350,
      "training_step_time": 0.42893457412719727
    },
    {
      "epoch": 0.000221868896484375,
      "model_forward_time": 0.11614823341369629,
      "step": 36351
    },
    {
      "epoch": 0.000221868896484375,
      "step": 36351,
      "training_step_time": 0.39998936653137207
    },
    {
      "epoch": 0.000221875,
      "model_forward_time": 0.11573338508605957,
      "step": 36352
    },
    {
      "epoch": 0.000221875,
      "step": 36352,
      "training_step_time": 0.396040678024292
    },
    {
      "epoch": 0.000221881103515625,
      "model_forward_time": 0.11572790145874023,
      "step": 36353
    },
    {
      "epoch": 0.000221881103515625,
      "step": 36353,
      "training_step_time": 0.3999595642089844
    },
    {
      "epoch": 0.00022188720703125,
      "model_forward_time": 0.1175386905670166,
      "step": 36354
    },
    {
      "epoch": 0.00022188720703125,
      "step": 36354,
      "training_step_time": 0.45924997329711914
    },
    {
      "epoch": 0.000221893310546875,
      "model_forward_time": 0.11856579780578613,
      "step": 36355
    },
    {
      "epoch": 0.000221893310546875,
      "step": 36355,
      "training_step_time": 0.4369950294494629
    },
    {
      "epoch": 0.0002218994140625,
      "model_forward_time": 0.1186983585357666,
      "step": 36356
    },
    {
      "epoch": 0.0002218994140625,
      "step": 36356,
      "training_step_time": 0.4860200881958008
    },
    {
      "epoch": 0.000221905517578125,
      "model_forward_time": 0.1179358959197998,
      "step": 36357
    },
    {
      "epoch": 0.000221905517578125,
      "step": 36357,
      "training_step_time": 0.380953311920166
    },
    {
      "epoch": 0.00022191162109375,
      "model_forward_time": 0.11553621292114258,
      "step": 36358
    },
    {
      "epoch": 0.00022191162109375,
      "step": 36358,
      "training_step_time": 0.3802649974822998
    },
    {
      "epoch": 0.000221917724609375,
      "model_forward_time": 0.11568784713745117,
      "step": 36359
    },
    {
      "epoch": 0.000221917724609375,
      "step": 36359,
      "training_step_time": 0.3880329132080078
    },
    {
      "epoch": 0.000221923828125,
      "grad_norm": 0.09017864614725113,
      "learning_rate": 3.676648035067093e-05,
      "loss": 0.0369,
      "step": 36360
    },
    {
      "epoch": 0.000221923828125,
      "model_forward_time": 0.11496973037719727,
      "step": 36360
    },
    {
      "epoch": 0.000221923828125,
      "step": 36360,
      "training_step_time": 0.4028918743133545
    },
    {
      "epoch": 0.000221929931640625,
      "model_forward_time": 0.1160585880279541,
      "step": 36361
    },
    {
      "epoch": 0.000221929931640625,
      "step": 36361,
      "training_step_time": 0.3960096836090088
    },
    {
      "epoch": 0.00022193603515625,
      "model_forward_time": 0.11530184745788574,
      "step": 36362
    },
    {
      "epoch": 0.00022193603515625,
      "step": 36362,
      "training_step_time": 0.3981785774230957
    },
    {
      "epoch": 0.000221942138671875,
      "model_forward_time": 0.11538815498352051,
      "step": 36363
    },
    {
      "epoch": 0.000221942138671875,
      "step": 36363,
      "training_step_time": 0.535123348236084
    },
    {
      "epoch": 0.0002219482421875,
      "model_forward_time": 0.11614799499511719,
      "step": 36364
    },
    {
      "epoch": 0.0002219482421875,
      "step": 36364,
      "training_step_time": 0.4083256721496582
    },
    {
      "epoch": 0.000221954345703125,
      "model_forward_time": 0.11540412902832031,
      "step": 36365
    },
    {
      "epoch": 0.000221954345703125,
      "step": 36365,
      "training_step_time": 0.3830838203430176
    },
    {
      "epoch": 0.00022196044921875,
      "model_forward_time": 0.1148526668548584,
      "step": 36366
    },
    {
      "epoch": 0.00022196044921875,
      "step": 36366,
      "training_step_time": 0.3892371654510498
    },
    {
      "epoch": 0.000221966552734375,
      "model_forward_time": 0.11643028259277344,
      "step": 36367
    },
    {
      "epoch": 0.000221966552734375,
      "step": 36367,
      "training_step_time": 0.3966376781463623
    },
    {
      "epoch": 0.00022197265625,
      "model_forward_time": 0.11533665657043457,
      "step": 36368
    },
    {
      "epoch": 0.00022197265625,
      "step": 36368,
      "training_step_time": 0.42838215827941895
    },
    {
      "epoch": 0.000221978759765625,
      "model_forward_time": 0.11822938919067383,
      "step": 36369
    },
    {
      "epoch": 0.000221978759765625,
      "step": 36369,
      "training_step_time": 0.4451103210449219
    },
    {
      "epoch": 0.00022198486328125,
      "grad_norm": 0.11146736890077591,
      "learning_rate": 3.673990727327435e-05,
      "loss": 0.0402,
      "step": 36370
    },
    {
      "epoch": 0.00022198486328125,
      "model_forward_time": 0.11835646629333496,
      "step": 36370
    },
    {
      "epoch": 0.00022198486328125,
      "step": 36370,
      "training_step_time": 0.4223923683166504
    },
    {
      "epoch": 0.000221990966796875,
      "model_forward_time": 0.11764383316040039,
      "step": 36371
    },
    {
      "epoch": 0.000221990966796875,
      "step": 36371,
      "training_step_time": 0.3839597702026367
    },
    {
      "epoch": 0.0002219970703125,
      "model_forward_time": 0.11715126037597656,
      "step": 36372
    },
    {
      "epoch": 0.0002219970703125,
      "step": 36372,
      "training_step_time": 0.42238497734069824
    },
    {
      "epoch": 0.000222003173828125,
      "model_forward_time": 0.11521768569946289,
      "step": 36373
    },
    {
      "epoch": 0.000222003173828125,
      "step": 36373,
      "training_step_time": 0.38965940475463867
    },
    {
      "epoch": 0.00022200927734375,
      "model_forward_time": 0.11585497856140137,
      "step": 36374
    },
    {
      "epoch": 0.00022200927734375,
      "step": 36374,
      "training_step_time": 0.3978593349456787
    },
    {
      "epoch": 0.000222015380859375,
      "model_forward_time": 0.11566805839538574,
      "step": 36375
    },
    {
      "epoch": 0.000222015380859375,
      "step": 36375,
      "training_step_time": 0.41066813468933105
    },
    {
      "epoch": 0.000222021484375,
      "model_forward_time": 0.11592578887939453,
      "step": 36376
    },
    {
      "epoch": 0.000222021484375,
      "step": 36376,
      "training_step_time": 0.3841433525085449
    },
    {
      "epoch": 0.000222027587890625,
      "model_forward_time": 0.11578822135925293,
      "step": 36377
    },
    {
      "epoch": 0.000222027587890625,
      "step": 36377,
      "training_step_time": 0.4406249523162842
    },
    {
      "epoch": 0.00022203369140625,
      "model_forward_time": 0.1178278923034668,
      "step": 36378
    },
    {
      "epoch": 0.00022203369140625,
      "step": 36378,
      "training_step_time": 0.3916492462158203
    },
    {
      "epoch": 0.000222039794921875,
      "model_forward_time": 0.11519694328308105,
      "step": 36379
    },
    {
      "epoch": 0.000222039794921875,
      "step": 36379,
      "training_step_time": 0.41515135765075684
    },
    {
      "epoch": 0.0002220458984375,
      "grad_norm": 0.11622750759124756,
      "learning_rate": 3.6713338223943867e-05,
      "loss": 0.0374,
      "step": 36380
    },
    {
      "epoch": 0.0002220458984375,
      "model_forward_time": 0.11530065536499023,
      "step": 36380
    },
    {
      "epoch": 0.0002220458984375,
      "step": 36380,
      "training_step_time": 0.4002711772918701
    },
    {
      "epoch": 0.000222052001953125,
      "model_forward_time": 0.11628913879394531,
      "step": 36381
    },
    {
      "epoch": 0.000222052001953125,
      "step": 36381,
      "training_step_time": 0.5178303718566895
    },
    {
      "epoch": 0.00022205810546875,
      "model_forward_time": 0.1156611442565918,
      "step": 36382
    },
    {
      "epoch": 0.00022205810546875,
      "step": 36382,
      "training_step_time": 0.3962850570678711
    },
    {
      "epoch": 0.000222064208984375,
      "model_forward_time": 0.11829733848571777,
      "step": 36383
    },
    {
      "epoch": 0.000222064208984375,
      "step": 36383,
      "training_step_time": 0.5130529403686523
    },
    {
      "epoch": 0.0002220703125,
      "model_forward_time": 0.12172985076904297,
      "step": 36384
    },
    {
      "epoch": 0.0002220703125,
      "step": 36384,
      "training_step_time": 0.4753577709197998
    },
    {
      "epoch": 0.000222076416015625,
      "model_forward_time": 0.11855816841125488,
      "step": 36385
    },
    {
      "epoch": 0.000222076416015625,
      "step": 36385,
      "training_step_time": 0.38178014755249023
    },
    {
      "epoch": 0.00022208251953125,
      "model_forward_time": 0.1238856315612793,
      "step": 36386
    },
    {
      "epoch": 0.00022208251953125,
      "step": 36386,
      "training_step_time": 0.38924479484558105
    },
    {
      "epoch": 0.000222088623046875,
      "model_forward_time": 0.1180429458618164,
      "step": 36387
    },
    {
      "epoch": 0.000222088623046875,
      "step": 36387,
      "training_step_time": 0.3759915828704834
    },
    {
      "epoch": 0.0002220947265625,
      "model_forward_time": 0.11575531959533691,
      "step": 36388
    },
    {
      "epoch": 0.0002220947265625,
      "step": 36388,
      "training_step_time": 0.38616180419921875
    },
    {
      "epoch": 0.000222100830078125,
      "model_forward_time": 0.11666488647460938,
      "step": 36389
    },
    {
      "epoch": 0.000222100830078125,
      "step": 36389,
      "training_step_time": 0.40029191970825195
    },
    {
      "epoch": 0.00022210693359375,
      "grad_norm": 0.08363638818264008,
      "learning_rate": 3.6686773210750385e-05,
      "loss": 0.0374,
      "step": 36390
    },
    {
      "epoch": 0.00022210693359375,
      "model_forward_time": 0.1155691146850586,
      "step": 36390
    },
    {
      "epoch": 0.00022210693359375,
      "step": 36390,
      "training_step_time": 0.4010779857635498
    },
    {
      "epoch": 0.000222113037109375,
      "model_forward_time": 0.11584687232971191,
      "step": 36391
    },
    {
      "epoch": 0.000222113037109375,
      "step": 36391,
      "training_step_time": 0.49181556701660156
    },
    {
      "epoch": 0.000222119140625,
      "model_forward_time": 0.11573457717895508,
      "step": 36392
    },
    {
      "epoch": 0.000222119140625,
      "step": 36392,
      "training_step_time": 0.43511366844177246
    },
    {
      "epoch": 0.000222125244140625,
      "model_forward_time": 0.11562800407409668,
      "step": 36393
    },
    {
      "epoch": 0.000222125244140625,
      "step": 36393,
      "training_step_time": 0.39218854904174805
    },
    {
      "epoch": 0.00022213134765625,
      "model_forward_time": 0.11580419540405273,
      "step": 36394
    },
    {
      "epoch": 0.00022213134765625,
      "step": 36394,
      "training_step_time": 0.3928079605102539
    },
    {
      "epoch": 0.000222137451171875,
      "model_forward_time": 0.11556339263916016,
      "step": 36395
    },
    {
      "epoch": 0.000222137451171875,
      "step": 36395,
      "training_step_time": 0.400698184967041
    },
    {
      "epoch": 0.0002221435546875,
      "model_forward_time": 0.11530590057373047,
      "step": 36396
    },
    {
      "epoch": 0.0002221435546875,
      "step": 36396,
      "training_step_time": 0.42990589141845703
    },
    {
      "epoch": 0.000222149658203125,
      "model_forward_time": 0.1170647144317627,
      "step": 36397
    },
    {
      "epoch": 0.000222149658203125,
      "step": 36397,
      "training_step_time": 0.4249870777130127
    },
    {
      "epoch": 0.00022215576171875,
      "model_forward_time": 0.11605072021484375,
      "step": 36398
    },
    {
      "epoch": 0.00022215576171875,
      "step": 36398,
      "training_step_time": 0.47499561309814453
    },
    {
      "epoch": 0.000222161865234375,
      "model_forward_time": 0.11622357368469238,
      "step": 36399
    },
    {
      "epoch": 0.000222161865234375,
      "step": 36399,
      "training_step_time": 0.5052342414855957
    },
    {
      "epoch": 0.00022216796875,
      "grad_norm": 0.12086072564125061,
      "learning_rate": 3.666021224176369e-05,
      "loss": 0.0358,
      "step": 36400
    },
    {
      "epoch": 0.00022216796875,
      "model_forward_time": 0.11548566818237305,
      "step": 36400
    },
    {
      "epoch": 0.00022216796875,
      "step": 36400,
      "training_step_time": 0.3972475528717041
    },
    {
      "epoch": 0.000222174072265625,
      "model_forward_time": 0.1155080795288086,
      "step": 36401
    },
    {
      "epoch": 0.000222174072265625,
      "step": 36401,
      "training_step_time": 0.39863157272338867
    },
    {
      "epoch": 0.00022218017578125,
      "model_forward_time": 0.11515974998474121,
      "step": 36402
    },
    {
      "epoch": 0.00022218017578125,
      "step": 36402,
      "training_step_time": 0.40282392501831055
    },
    {
      "epoch": 0.000222186279296875,
      "model_forward_time": 0.11613988876342773,
      "step": 36403
    },
    {
      "epoch": 0.000222186279296875,
      "step": 36403,
      "training_step_time": 0.4031076431274414
    },
    {
      "epoch": 0.0002221923828125,
      "model_forward_time": 0.11681890487670898,
      "step": 36404
    },
    {
      "epoch": 0.0002221923828125,
      "step": 36404,
      "training_step_time": 0.40424251556396484
    },
    {
      "epoch": 0.000222198486328125,
      "model_forward_time": 0.11593317985534668,
      "step": 36405
    },
    {
      "epoch": 0.000222198486328125,
      "step": 36405,
      "training_step_time": 0.4158632755279541
    },
    {
      "epoch": 0.00022220458984375,
      "model_forward_time": 0.11595749855041504,
      "step": 36406
    },
    {
      "epoch": 0.00022220458984375,
      "step": 36406,
      "training_step_time": 0.39212536811828613
    },
    {
      "epoch": 0.000222210693359375,
      "model_forward_time": 0.11661815643310547,
      "step": 36407
    },
    {
      "epoch": 0.000222210693359375,
      "step": 36407,
      "training_step_time": 0.40464186668395996
    },
    {
      "epoch": 0.000222216796875,
      "model_forward_time": 0.11554336547851562,
      "step": 36408
    },
    {
      "epoch": 0.000222216796875,
      "step": 36408,
      "training_step_time": 0.39686155319213867
    },
    {
      "epoch": 0.000222222900390625,
      "model_forward_time": 0.11609792709350586,
      "step": 36409
    },
    {
      "epoch": 0.000222222900390625,
      "step": 36409,
      "training_step_time": 0.39340901374816895
    },
    {
      "epoch": 0.00022222900390625,
      "grad_norm": 0.12096895277500153,
      "learning_rate": 3.663365532505232e-05,
      "loss": 0.0399,
      "step": 36410
    },
    {
      "epoch": 0.00022222900390625,
      "model_forward_time": 0.11650276184082031,
      "step": 36410
    },
    {
      "epoch": 0.00022222900390625,
      "step": 36410,
      "training_step_time": 0.5932152271270752
    },
    {
      "epoch": 0.000222235107421875,
      "model_forward_time": 0.11536693572998047,
      "step": 36411
    },
    {
      "epoch": 0.000222235107421875,
      "step": 36411,
      "training_step_time": 0.40949225425720215
    },
    {
      "epoch": 0.0002222412109375,
      "model_forward_time": 0.11601614952087402,
      "step": 36412
    },
    {
      "epoch": 0.0002222412109375,
      "step": 36412,
      "training_step_time": 0.49558091163635254
    },
    {
      "epoch": 0.000222247314453125,
      "model_forward_time": 0.11786246299743652,
      "step": 36413
    },
    {
      "epoch": 0.000222247314453125,
      "step": 36413,
      "training_step_time": 0.5318698883056641
    },
    {
      "epoch": 0.00022225341796875,
      "model_forward_time": 0.1168367862701416,
      "step": 36414
    },
    {
      "epoch": 0.00022225341796875,
      "step": 36414,
      "training_step_time": 0.3746328353881836
    },
    {
      "epoch": 0.000222259521484375,
      "model_forward_time": 0.11675214767456055,
      "step": 36415
    },
    {
      "epoch": 0.000222259521484375,
      "step": 36415,
      "training_step_time": 0.37832117080688477
    },
    {
      "epoch": 0.000222265625,
      "model_forward_time": 0.11790299415588379,
      "step": 36416
    },
    {
      "epoch": 0.000222265625,
      "step": 36416,
      "training_step_time": 0.4111170768737793
    },
    {
      "epoch": 0.000222271728515625,
      "model_forward_time": 0.11784601211547852,
      "step": 36417
    },
    {
      "epoch": 0.000222271728515625,
      "step": 36417,
      "training_step_time": 0.37733960151672363
    },
    {
      "epoch": 0.00022227783203125,
      "model_forward_time": 0.11838364601135254,
      "step": 36418
    },
    {
      "epoch": 0.00022227783203125,
      "step": 36418,
      "training_step_time": 0.37917065620422363
    },
    {
      "epoch": 0.000222283935546875,
      "model_forward_time": 0.11646127700805664,
      "step": 36419
    },
    {
      "epoch": 0.000222283935546875,
      "step": 36419,
      "training_step_time": 0.5528774261474609
    },
    {
      "epoch": 0.0002222900390625,
      "grad_norm": 0.10276485979557037,
      "learning_rate": 3.6607102468683526e-05,
      "loss": 0.033,
      "step": 36420
    },
    {
      "epoch": 0.0002222900390625,
      "model_forward_time": 0.11555838584899902,
      "step": 36420
    },
    {
      "epoch": 0.0002222900390625,
      "step": 36420,
      "training_step_time": 0.40035462379455566
    },
    {
      "epoch": 0.000222296142578125,
      "model_forward_time": 0.11589455604553223,
      "step": 36421
    },
    {
      "epoch": 0.000222296142578125,
      "step": 36421,
      "training_step_time": 0.4081249237060547
    },
    {
      "epoch": 0.00022230224609375,
      "model_forward_time": 0.1184391975402832,
      "step": 36422
    },
    {
      "epoch": 0.00022230224609375,
      "step": 36422,
      "training_step_time": 0.39350438117980957
    },
    {
      "epoch": 0.000222308349609375,
      "model_forward_time": 0.11596274375915527,
      "step": 36423
    },
    {
      "epoch": 0.000222308349609375,
      "step": 36423,
      "training_step_time": 0.42612433433532715
    },
    {
      "epoch": 0.000222314453125,
      "model_forward_time": 0.11573028564453125,
      "step": 36424
    },
    {
      "epoch": 0.000222314453125,
      "step": 36424,
      "training_step_time": 0.40619802474975586
    },
    {
      "epoch": 0.000222320556640625,
      "model_forward_time": 0.11590838432312012,
      "step": 36425
    },
    {
      "epoch": 0.000222320556640625,
      "step": 36425,
      "training_step_time": 0.3711702823638916
    },
    {
      "epoch": 0.00022232666015625,
      "model_forward_time": 0.11601901054382324,
      "step": 36426
    },
    {
      "epoch": 0.00022232666015625,
      "step": 36426,
      "training_step_time": 0.46263957023620605
    },
    {
      "epoch": 0.000222332763671875,
      "model_forward_time": 0.11580109596252441,
      "step": 36427
    },
    {
      "epoch": 0.000222332763671875,
      "step": 36427,
      "training_step_time": 0.4252805709838867
    },
    {
      "epoch": 0.0002223388671875,
      "model_forward_time": 0.1156315803527832,
      "step": 36428
    },
    {
      "epoch": 0.0002223388671875,
      "step": 36428,
      "training_step_time": 0.4852879047393799
    },
    {
      "epoch": 0.000222344970703125,
      "model_forward_time": 0.1155083179473877,
      "step": 36429
    },
    {
      "epoch": 0.000222344970703125,
      "step": 36429,
      "training_step_time": 0.3836822509765625
    },
    {
      "epoch": 0.00022235107421875,
      "grad_norm": 0.11679653078317642,
      "learning_rate": 3.658055368072339e-05,
      "loss": 0.0449,
      "step": 36430
    },
    {
      "epoch": 0.00022235107421875,
      "model_forward_time": 0.11631441116333008,
      "step": 36430
    },
    {
      "epoch": 0.00022235107421875,
      "step": 36430,
      "training_step_time": 0.39081501960754395
    },
    {
      "epoch": 0.000222357177734375,
      "model_forward_time": 0.11572098731994629,
      "step": 36431
    },
    {
      "epoch": 0.000222357177734375,
      "step": 36431,
      "training_step_time": 0.3868398666381836
    },
    {
      "epoch": 0.00022236328125,
      "model_forward_time": 0.11629080772399902,
      "step": 36432
    },
    {
      "epoch": 0.00022236328125,
      "step": 36432,
      "training_step_time": 0.4075193405151367
    },
    {
      "epoch": 0.000222369384765625,
      "model_forward_time": 0.11508011817932129,
      "step": 36433
    },
    {
      "epoch": 0.000222369384765625,
      "step": 36433,
      "training_step_time": 0.4019629955291748
    },
    {
      "epoch": 0.00022237548828125,
      "model_forward_time": 0.11590695381164551,
      "step": 36434
    },
    {
      "epoch": 0.00022237548828125,
      "step": 36434,
      "training_step_time": 0.6136124134063721
    },
    {
      "epoch": 0.000222381591796875,
      "model_forward_time": 0.11702466011047363,
      "step": 36435
    },
    {
      "epoch": 0.000222381591796875,
      "step": 36435,
      "training_step_time": 0.3822658061981201
    },
    {
      "epoch": 0.0002223876953125,
      "model_forward_time": 0.1178276538848877,
      "step": 36436
    },
    {
      "epoch": 0.0002223876953125,
      "step": 36436,
      "training_step_time": 0.3842148780822754
    },
    {
      "epoch": 0.000222393798828125,
      "model_forward_time": 0.11860394477844238,
      "step": 36437
    },
    {
      "epoch": 0.000222393798828125,
      "step": 36437,
      "training_step_time": 0.4435899257659912
    },
    {
      "epoch": 0.00022239990234375,
      "model_forward_time": 0.11646246910095215,
      "step": 36438
    },
    {
      "epoch": 0.00022239990234375,
      "step": 36438,
      "training_step_time": 0.38421177864074707
    },
    {
      "epoch": 0.000222406005859375,
      "model_forward_time": 0.11609888076782227,
      "step": 36439
    },
    {
      "epoch": 0.000222406005859375,
      "step": 36439,
      "training_step_time": 0.3947455883026123
    },
    {
      "epoch": 0.000222412109375,
      "grad_norm": 0.15504835546016693,
      "learning_rate": 3.655400896923672e-05,
      "loss": 0.0404,
      "step": 36440
    },
    {
      "epoch": 0.000222412109375,
      "model_forward_time": 0.11643362045288086,
      "step": 36440
    },
    {
      "epoch": 0.000222412109375,
      "step": 36440,
      "training_step_time": 0.6375973224639893
    },
    {
      "epoch": 0.000222418212890625,
      "model_forward_time": 0.11824631690979004,
      "step": 36441
    },
    {
      "epoch": 0.000222418212890625,
      "step": 36441,
      "training_step_time": 0.3892405033111572
    },
    {
      "epoch": 0.00022242431640625,
      "model_forward_time": 0.11766338348388672,
      "step": 36442
    },
    {
      "epoch": 0.00022242431640625,
      "step": 36442,
      "training_step_time": 0.4702491760253906
    },
    {
      "epoch": 0.000222430419921875,
      "model_forward_time": 0.11696410179138184,
      "step": 36443
    },
    {
      "epoch": 0.000222430419921875,
      "step": 36443,
      "training_step_time": 0.38111090660095215
    },
    {
      "epoch": 0.0002224365234375,
      "model_forward_time": 0.11768579483032227,
      "step": 36444
    },
    {
      "epoch": 0.0002224365234375,
      "step": 36444,
      "training_step_time": 0.37483716011047363
    },
    {
      "epoch": 0.000222442626953125,
      "model_forward_time": 0.11629796028137207,
      "step": 36445
    },
    {
      "epoch": 0.000222442626953125,
      "step": 36445,
      "training_step_time": 0.3889279365539551
    },
    {
      "epoch": 0.00022244873046875,
      "model_forward_time": 0.11607575416564941,
      "step": 36446
    },
    {
      "epoch": 0.00022244873046875,
      "step": 36446,
      "training_step_time": 0.5604758262634277
    },
    {
      "epoch": 0.000222454833984375,
      "model_forward_time": 0.11557602882385254,
      "step": 36447
    },
    {
      "epoch": 0.000222454833984375,
      "step": 36447,
      "training_step_time": 0.4882783889770508
    },
    {
      "epoch": 0.0002224609375,
      "model_forward_time": 0.1156625747680664,
      "step": 36448
    },
    {
      "epoch": 0.0002224609375,
      "step": 36448,
      "training_step_time": 0.41120481491088867
    },
    {
      "epoch": 0.000222467041015625,
      "model_forward_time": 0.11579012870788574,
      "step": 36449
    },
    {
      "epoch": 0.000222467041015625,
      "step": 36449,
      "training_step_time": 0.4026789665222168
    },
    {
      "epoch": 0.00022247314453125,
      "grad_norm": 0.0909717008471489,
      "learning_rate": 3.65274683422871e-05,
      "loss": 0.0355,
      "step": 36450
    },
    {
      "epoch": 0.00022247314453125,
      "model_forward_time": 0.11533331871032715,
      "step": 36450
    },
    {
      "epoch": 0.00022247314453125,
      "step": 36450,
      "training_step_time": 0.4132874011993408
    },
    {
      "epoch": 0.000222479248046875,
      "model_forward_time": 0.11607813835144043,
      "step": 36451
    },
    {
      "epoch": 0.000222479248046875,
      "step": 36451,
      "training_step_time": 0.41310858726501465
    },
    {
      "epoch": 0.0002224853515625,
      "model_forward_time": 0.11574530601501465,
      "step": 36452
    },
    {
      "epoch": 0.0002224853515625,
      "step": 36452,
      "training_step_time": 0.4387192726135254
    },
    {
      "epoch": 0.000222491455078125,
      "model_forward_time": 0.11590981483459473,
      "step": 36453
    },
    {
      "epoch": 0.000222491455078125,
      "step": 36453,
      "training_step_time": 0.36962175369262695
    },
    {
      "epoch": 0.00022249755859375,
      "model_forward_time": 0.11652445793151855,
      "step": 36454
    },
    {
      "epoch": 0.00022249755859375,
      "step": 36454,
      "training_step_time": 0.45496463775634766
    },
    {
      "epoch": 0.000222503662109375,
      "model_forward_time": 0.11578750610351562,
      "step": 36455
    },
    {
      "epoch": 0.000222503662109375,
      "step": 36455,
      "training_step_time": 0.41363978385925293
    },
    {
      "epoch": 0.000222509765625,
      "model_forward_time": 0.11590099334716797,
      "step": 36456
    },
    {
      "epoch": 0.000222509765625,
      "step": 36456,
      "training_step_time": 0.4537510871887207
    },
    {
      "epoch": 0.000222515869140625,
      "model_forward_time": 0.11555719375610352,
      "step": 36457
    },
    {
      "epoch": 0.000222515869140625,
      "step": 36457,
      "training_step_time": 0.39791321754455566
    },
    {
      "epoch": 0.00022252197265625,
      "model_forward_time": 0.11617732048034668,
      "step": 36458
    },
    {
      "epoch": 0.00022252197265625,
      "step": 36458,
      "training_step_time": 0.48510074615478516
    },
    {
      "epoch": 0.000222528076171875,
      "model_forward_time": 0.11528158187866211,
      "step": 36459
    },
    {
      "epoch": 0.000222528076171875,
      "step": 36459,
      "training_step_time": 0.3880012035369873
    },
    {
      "epoch": 0.0002225341796875,
      "grad_norm": 0.1216818168759346,
      "learning_rate": 3.650093180793689e-05,
      "loss": 0.0366,
      "step": 36460
    },
    {
      "epoch": 0.0002225341796875,
      "model_forward_time": 0.11516284942626953,
      "step": 36460
    },
    {
      "epoch": 0.0002225341796875,
      "step": 36460,
      "training_step_time": 0.4107484817504883
    },
    {
      "epoch": 0.000222540283203125,
      "model_forward_time": 0.11545705795288086,
      "step": 36461
    },
    {
      "epoch": 0.000222540283203125,
      "step": 36461,
      "training_step_time": 0.39110517501831055
    },
    {
      "epoch": 0.00022254638671875,
      "model_forward_time": 0.11665582656860352,
      "step": 36462
    },
    {
      "epoch": 0.00022254638671875,
      "step": 36462,
      "training_step_time": 0.49880409240722656
    },
    {
      "epoch": 0.000222552490234375,
      "model_forward_time": 0.11537003517150879,
      "step": 36463
    },
    {
      "epoch": 0.000222552490234375,
      "step": 36463,
      "training_step_time": 0.4606332778930664
    },
    {
      "epoch": 0.00022255859375,
      "model_forward_time": 0.11544609069824219,
      "step": 36464
    },
    {
      "epoch": 0.00022255859375,
      "step": 36464,
      "training_step_time": 0.40911197662353516
    },
    {
      "epoch": 0.000222564697265625,
      "model_forward_time": 0.11534500122070312,
      "step": 36465
    },
    {
      "epoch": 0.000222564697265625,
      "step": 36465,
      "training_step_time": 0.3815603256225586
    },
    {
      "epoch": 0.00022257080078125,
      "model_forward_time": 0.11533069610595703,
      "step": 36466
    },
    {
      "epoch": 0.00022257080078125,
      "step": 36466,
      "training_step_time": 0.39089155197143555
    },
    {
      "epoch": 0.000222576904296875,
      "model_forward_time": 0.11550188064575195,
      "step": 36467
    },
    {
      "epoch": 0.000222576904296875,
      "step": 36467,
      "training_step_time": 0.3668704032897949
    },
    {
      "epoch": 0.0002225830078125,
      "model_forward_time": 0.11598539352416992,
      "step": 36468
    },
    {
      "epoch": 0.0002225830078125,
      "step": 36468,
      "training_step_time": 0.41507983207702637
    },
    {
      "epoch": 0.000222589111328125,
      "model_forward_time": 0.11591243743896484,
      "step": 36469
    },
    {
      "epoch": 0.000222589111328125,
      "step": 36469,
      "training_step_time": 0.5022869110107422
    },
    {
      "epoch": 0.00022259521484375,
      "grad_norm": 0.13609057664871216,
      "learning_rate": 3.647439937424717e-05,
      "loss": 0.0403,
      "step": 36470
    },
    {
      "epoch": 0.00022259521484375,
      "model_forward_time": 0.11538362503051758,
      "step": 36470
    },
    {
      "epoch": 0.00022259521484375,
      "step": 36470,
      "training_step_time": 0.4051973819732666
    },
    {
      "epoch": 0.000222601318359375,
      "model_forward_time": 0.1152799129486084,
      "step": 36471
    },
    {
      "epoch": 0.000222601318359375,
      "step": 36471,
      "training_step_time": 0.4944877624511719
    },
    {
      "epoch": 0.000222607421875,
      "model_forward_time": 0.11577391624450684,
      "step": 36472
    },
    {
      "epoch": 0.000222607421875,
      "step": 36472,
      "training_step_time": 0.3971292972564697
    },
    {
      "epoch": 0.000222613525390625,
      "model_forward_time": 0.11559033393859863,
      "step": 36473
    },
    {
      "epoch": 0.000222613525390625,
      "step": 36473,
      "training_step_time": 0.386859655380249
    },
    {
      "epoch": 0.00022261962890625,
      "model_forward_time": 0.11577963829040527,
      "step": 36474
    },
    {
      "epoch": 0.00022261962890625,
      "step": 36474,
      "training_step_time": 0.39244842529296875
    },
    {
      "epoch": 0.000222625732421875,
      "model_forward_time": 0.11571311950683594,
      "step": 36475
    },
    {
      "epoch": 0.000222625732421875,
      "step": 36475,
      "training_step_time": 0.3903670310974121
    },
    {
      "epoch": 0.0002226318359375,
      "model_forward_time": 0.11609292030334473,
      "step": 36476
    },
    {
      "epoch": 0.0002226318359375,
      "step": 36476,
      "training_step_time": 0.5808947086334229
    },
    {
      "epoch": 0.000222637939453125,
      "model_forward_time": 0.11528635025024414,
      "step": 36477
    },
    {
      "epoch": 0.000222637939453125,
      "step": 36477,
      "training_step_time": 0.4493582248687744
    },
    {
      "epoch": 0.00022264404296875,
      "model_forward_time": 0.11589169502258301,
      "step": 36478
    },
    {
      "epoch": 0.00022264404296875,
      "step": 36478,
      "training_step_time": 0.393416166305542
    },
    {
      "epoch": 0.000222650146484375,
      "model_forward_time": 0.11706280708312988,
      "step": 36479
    },
    {
      "epoch": 0.000222650146484375,
      "step": 36479,
      "training_step_time": 0.40282559394836426
    },
    {
      "epoch": 0.00022265625,
      "grad_norm": 0.14117218554019928,
      "learning_rate": 3.6447871049277796e-05,
      "loss": 0.0376,
      "step": 36480
    },
    {
      "epoch": 0.00022265625,
      "model_forward_time": 0.11493945121765137,
      "step": 36480
    },
    {
      "epoch": 0.00022265625,
      "step": 36480,
      "training_step_time": 0.3932936191558838
    },
    {
      "epoch": 0.000222662353515625,
      "model_forward_time": 0.11527562141418457,
      "step": 36481
    },
    {
      "epoch": 0.000222662353515625,
      "step": 36481,
      "training_step_time": 0.39423489570617676
    },
    {
      "epoch": 0.00022266845703125,
      "model_forward_time": 0.11609268188476562,
      "step": 36482
    },
    {
      "epoch": 0.00022266845703125,
      "step": 36482,
      "training_step_time": 0.6809873580932617
    },
    {
      "epoch": 0.000222674560546875,
      "model_forward_time": 0.1161489486694336,
      "step": 36483
    },
    {
      "epoch": 0.000222674560546875,
      "step": 36483,
      "training_step_time": 0.41568517684936523
    },
    {
      "epoch": 0.0002226806640625,
      "model_forward_time": 0.11526679992675781,
      "step": 36484
    },
    {
      "epoch": 0.0002226806640625,
      "step": 36484,
      "training_step_time": 0.42014408111572266
    },
    {
      "epoch": 0.000222686767578125,
      "model_forward_time": 0.11475658416748047,
      "step": 36485
    },
    {
      "epoch": 0.000222686767578125,
      "step": 36485,
      "training_step_time": 0.48096799850463867
    },
    {
      "epoch": 0.00022269287109375,
      "model_forward_time": 0.11528325080871582,
      "step": 36486
    },
    {
      "epoch": 0.00022269287109375,
      "step": 36486,
      "training_step_time": 0.388228178024292
    },
    {
      "epoch": 0.000222698974609375,
      "model_forward_time": 0.1153557300567627,
      "step": 36487
    },
    {
      "epoch": 0.000222698974609375,
      "step": 36487,
      "training_step_time": 0.3872489929199219
    },
    {
      "epoch": 0.000222705078125,
      "model_forward_time": 0.11567282676696777,
      "step": 36488
    },
    {
      "epoch": 0.000222705078125,
      "step": 36488,
      "training_step_time": 0.5076596736907959
    },
    {
      "epoch": 0.000222711181640625,
      "model_forward_time": 0.11517477035522461,
      "step": 36489
    },
    {
      "epoch": 0.000222711181640625,
      "step": 36489,
      "training_step_time": 0.4126901626586914
    },
    {
      "epoch": 0.00022271728515625,
      "grad_norm": 0.13142246007919312,
      "learning_rate": 3.642134684108737e-05,
      "loss": 0.0395,
      "step": 36490
    },
    {
      "epoch": 0.00022271728515625,
      "model_forward_time": 0.11546874046325684,
      "step": 36490
    },
    {
      "epoch": 0.00022271728515625,
      "step": 36490,
      "training_step_time": 0.45698070526123047
    },
    {
      "epoch": 0.000222723388671875,
      "model_forward_time": 0.11556077003479004,
      "step": 36491
    },
    {
      "epoch": 0.000222723388671875,
      "step": 36491,
      "training_step_time": 0.42032432556152344
    },
    {
      "epoch": 0.0002227294921875,
      "model_forward_time": 0.11548018455505371,
      "step": 36492
    },
    {
      "epoch": 0.0002227294921875,
      "step": 36492,
      "training_step_time": 0.39461469650268555
    },
    {
      "epoch": 0.000222735595703125,
      "model_forward_time": 0.1150808334350586,
      "step": 36493
    },
    {
      "epoch": 0.000222735595703125,
      "step": 36493,
      "training_step_time": 0.39345502853393555
    },
    {
      "epoch": 0.00022274169921875,
      "model_forward_time": 0.11557292938232422,
      "step": 36494
    },
    {
      "epoch": 0.00022274169921875,
      "step": 36494,
      "training_step_time": 0.47998929023742676
    },
    {
      "epoch": 0.000222747802734375,
      "model_forward_time": 0.11532974243164062,
      "step": 36495
    },
    {
      "epoch": 0.000222747802734375,
      "step": 36495,
      "training_step_time": 0.36674046516418457
    },
    {
      "epoch": 0.00022275390625,
      "model_forward_time": 0.11624264717102051,
      "step": 36496
    },
    {
      "epoch": 0.00022275390625,
      "step": 36496,
      "training_step_time": 0.41989707946777344
    },
    {
      "epoch": 0.000222760009765625,
      "model_forward_time": 0.11588263511657715,
      "step": 36497
    },
    {
      "epoch": 0.000222760009765625,
      "step": 36497,
      "training_step_time": 0.4996788501739502
    },
    {
      "epoch": 0.00022276611328125,
      "model_forward_time": 0.11554861068725586,
      "step": 36498
    },
    {
      "epoch": 0.00022276611328125,
      "step": 36498,
      "training_step_time": 0.4034388065338135
    },
    {
      "epoch": 0.000222772216796875,
      "model_forward_time": 0.11570143699645996,
      "step": 36499
    },
    {
      "epoch": 0.000222772216796875,
      "step": 36499,
      "training_step_time": 0.4032135009765625
    },
    {
      "epoch": 0.0002227783203125,
      "grad_norm": 0.08838188648223877,
      "learning_rate": 3.639482675773324e-05,
      "loss": 0.0375,
      "step": 36500
    },
    {
      "epoch": 0.0002227783203125,
      "model_forward_time": 0.11480212211608887,
      "step": 36500
    },
    {
      "epoch": 0.0002227783203125,
      "step": 36500,
      "training_step_time": 0.4660208225250244
    },
    {
      "epoch": 0.000222784423828125,
      "model_forward_time": 0.11577439308166504,
      "step": 36501
    },
    {
      "epoch": 0.000222784423828125,
      "step": 36501,
      "training_step_time": 0.38627195358276367
    },
    {
      "epoch": 0.00022279052734375,
      "model_forward_time": 0.11553478240966797,
      "step": 36502
    },
    {
      "epoch": 0.00022279052734375,
      "step": 36502,
      "training_step_time": 0.3857128620147705
    },
    {
      "epoch": 0.000222796630859375,
      "model_forward_time": 0.11582303047180176,
      "step": 36503
    },
    {
      "epoch": 0.000222796630859375,
      "step": 36503,
      "training_step_time": 0.410015344619751
    },
    {
      "epoch": 0.000222802734375,
      "model_forward_time": 0.11520075798034668,
      "step": 36504
    },
    {
      "epoch": 0.000222802734375,
      "step": 36504,
      "training_step_time": 0.4615442752838135
    },
    {
      "epoch": 0.000222808837890625,
      "model_forward_time": 0.11507177352905273,
      "step": 36505
    },
    {
      "epoch": 0.000222808837890625,
      "step": 36505,
      "training_step_time": 0.4487302303314209
    },
    {
      "epoch": 0.00022281494140625,
      "model_forward_time": 0.11575913429260254,
      "step": 36506
    },
    {
      "epoch": 0.00022281494140625,
      "step": 36506,
      "training_step_time": 0.5386192798614502
    },
    {
      "epoch": 0.000222821044921875,
      "model_forward_time": 0.11547660827636719,
      "step": 36507
    },
    {
      "epoch": 0.000222821044921875,
      "step": 36507,
      "training_step_time": 0.39051318168640137
    },
    {
      "epoch": 0.0002228271484375,
      "model_forward_time": 0.11698150634765625,
      "step": 36508
    },
    {
      "epoch": 0.0002228271484375,
      "step": 36508,
      "training_step_time": 0.38894057273864746
    },
    {
      "epoch": 0.000222833251953125,
      "model_forward_time": 0.11622142791748047,
      "step": 36509
    },
    {
      "epoch": 0.000222833251953125,
      "step": 36509,
      "training_step_time": 0.39431142807006836
    },
    {
      "epoch": 0.00022283935546875,
      "grad_norm": 0.16556416451931,
      "learning_rate": 3.636831080727154e-05,
      "loss": 0.04,
      "step": 36510
    },
    {
      "epoch": 0.00022283935546875,
      "model_forward_time": 0.11685705184936523,
      "step": 36510
    },
    {
      "epoch": 0.00022283935546875,
      "step": 36510,
      "training_step_time": 0.5006155967712402
    },
    {
      "epoch": 0.000222845458984375,
      "model_forward_time": 0.11524629592895508,
      "step": 36511
    },
    {
      "epoch": 0.000222845458984375,
      "step": 36511,
      "training_step_time": 0.4941868782043457
    },
    {
      "epoch": 0.0002228515625,
      "model_forward_time": 0.11531209945678711,
      "step": 36512
    },
    {
      "epoch": 0.0002228515625,
      "step": 36512,
      "training_step_time": 0.5950782299041748
    },
    {
      "epoch": 0.000222857666015625,
      "model_forward_time": 0.11607170104980469,
      "step": 36513
    },
    {
      "epoch": 0.000222857666015625,
      "step": 36513,
      "training_step_time": 0.39781832695007324
    },
    {
      "epoch": 0.00022286376953125,
      "model_forward_time": 0.11513400077819824,
      "step": 36514
    },
    {
      "epoch": 0.00022286376953125,
      "step": 36514,
      "training_step_time": 0.3968639373779297
    },
    {
      "epoch": 0.000222869873046875,
      "model_forward_time": 0.11523556709289551,
      "step": 36515
    },
    {
      "epoch": 0.000222869873046875,
      "step": 36515,
      "training_step_time": 0.3873007297515869
    },
    {
      "epoch": 0.0002228759765625,
      "model_forward_time": 0.11555838584899902,
      "step": 36516
    },
    {
      "epoch": 0.0002228759765625,
      "step": 36516,
      "training_step_time": 0.4251713752746582
    },
    {
      "epoch": 0.000222882080078125,
      "model_forward_time": 0.11501932144165039,
      "step": 36517
    },
    {
      "epoch": 0.000222882080078125,
      "step": 36517,
      "training_step_time": 0.4199821949005127
    },
    {
      "epoch": 0.00022288818359375,
      "model_forward_time": 0.11588239669799805,
      "step": 36518
    },
    {
      "epoch": 0.00022288818359375,
      "step": 36518,
      "training_step_time": 0.546511173248291
    },
    {
      "epoch": 0.000222894287109375,
      "model_forward_time": 0.11541247367858887,
      "step": 36519
    },
    {
      "epoch": 0.000222894287109375,
      "step": 36519,
      "training_step_time": 0.3914480209350586
    },
    {
      "epoch": 0.000222900390625,
      "grad_norm": 0.1039649173617363,
      "learning_rate": 3.634179899775708e-05,
      "loss": 0.0373,
      "step": 36520
    },
    {
      "epoch": 0.000222900390625,
      "model_forward_time": 0.11531853675842285,
      "step": 36520
    },
    {
      "epoch": 0.000222900390625,
      "step": 36520,
      "training_step_time": 0.3909318447113037
    },
    {
      "epoch": 0.000222906494140625,
      "model_forward_time": 0.11563396453857422,
      "step": 36521
    },
    {
      "epoch": 0.000222906494140625,
      "step": 36521,
      "training_step_time": 0.38281679153442383
    },
    {
      "epoch": 0.00022291259765625,
      "model_forward_time": 0.1165015697479248,
      "step": 36522
    },
    {
      "epoch": 0.00022291259765625,
      "step": 36522,
      "training_step_time": 0.38793230056762695
    },
    {
      "epoch": 0.000222918701171875,
      "model_forward_time": 0.11504817008972168,
      "step": 36523
    },
    {
      "epoch": 0.000222918701171875,
      "step": 36523,
      "training_step_time": 0.40410757064819336
    },
    {
      "epoch": 0.0002229248046875,
      "model_forward_time": 0.11789965629577637,
      "step": 36524
    },
    {
      "epoch": 0.0002229248046875,
      "step": 36524,
      "training_step_time": 0.6531274318695068
    },
    {
      "epoch": 0.000222930908203125,
      "model_forward_time": 0.11605310440063477,
      "step": 36525
    },
    {
      "epoch": 0.000222930908203125,
      "step": 36525,
      "training_step_time": 0.4669661521911621
    },
    {
      "epoch": 0.00022293701171875,
      "model_forward_time": 0.11511564254760742,
      "step": 36526
    },
    {
      "epoch": 0.00022293701171875,
      "step": 36526,
      "training_step_time": 0.41110992431640625
    },
    {
      "epoch": 0.000222943115234375,
      "model_forward_time": 0.11538386344909668,
      "step": 36527
    },
    {
      "epoch": 0.000222943115234375,
      "step": 36527,
      "training_step_time": 0.4031558036804199
    },
    {
      "epoch": 0.00022294921875,
      "model_forward_time": 0.11549782752990723,
      "step": 36528
    },
    {
      "epoch": 0.00022294921875,
      "step": 36528,
      "training_step_time": 0.3980748653411865
    },
    {
      "epoch": 0.000222955322265625,
      "model_forward_time": 0.1146702766418457,
      "step": 36529
    },
    {
      "epoch": 0.000222955322265625,
      "step": 36529,
      "training_step_time": 0.39170074462890625
    },
    {
      "epoch": 0.00022296142578125,
      "grad_norm": 0.10941015183925629,
      "learning_rate": 3.631529133724348e-05,
      "loss": 0.0383,
      "step": 36530
    },
    {
      "epoch": 0.00022296142578125,
      "model_forward_time": 0.11599397659301758,
      "step": 36530
    },
    {
      "epoch": 0.00022296142578125,
      "step": 36530,
      "training_step_time": 0.5620529651641846
    },
    {
      "epoch": 0.000222967529296875,
      "model_forward_time": 0.11527419090270996,
      "step": 36531
    },
    {
      "epoch": 0.000222967529296875,
      "step": 36531,
      "training_step_time": 0.48334622383117676
    },
    {
      "epoch": 0.0002229736328125,
      "model_forward_time": 0.11490678787231445,
      "step": 36532
    },
    {
      "epoch": 0.0002229736328125,
      "step": 36532,
      "training_step_time": 0.48836350440979004
    },
    {
      "epoch": 0.000222979736328125,
      "model_forward_time": 0.11501884460449219,
      "step": 36533
    },
    {
      "epoch": 0.000222979736328125,
      "step": 36533,
      "training_step_time": 0.3762845993041992
    },
    {
      "epoch": 0.00022298583984375,
      "model_forward_time": 0.11745715141296387,
      "step": 36534
    },
    {
      "epoch": 0.00022298583984375,
      "step": 36534,
      "training_step_time": 0.3967628479003906
    },
    {
      "epoch": 0.000222991943359375,
      "model_forward_time": 0.11587762832641602,
      "step": 36535
    },
    {
      "epoch": 0.000222991943359375,
      "step": 36535,
      "training_step_time": 0.3866453170776367
    },
    {
      "epoch": 0.000222998046875,
      "model_forward_time": 0.11510324478149414,
      "step": 36536
    },
    {
      "epoch": 0.000222998046875,
      "step": 36536,
      "training_step_time": 0.4022495746612549
    },
    {
      "epoch": 0.000223004150390625,
      "model_forward_time": 0.11547040939331055,
      "step": 36537
    },
    {
      "epoch": 0.000223004150390625,
      "step": 36537,
      "training_step_time": 0.38305091857910156
    },
    {
      "epoch": 0.00022301025390625,
      "model_forward_time": 0.11617398262023926,
      "step": 36538
    },
    {
      "epoch": 0.00022301025390625,
      "step": 36538,
      "training_step_time": 0.4126296043395996
    },
    {
      "epoch": 0.000223016357421875,
      "model_forward_time": 0.11555719375610352,
      "step": 36539
    },
    {
      "epoch": 0.000223016357421875,
      "step": 36539,
      "training_step_time": 0.3992605209350586
    },
    {
      "epoch": 0.0002230224609375,
      "grad_norm": 0.10496625304222107,
      "learning_rate": 3.628878783378302e-05,
      "loss": 0.0396,
      "step": 36540
    },
    {
      "epoch": 0.0002230224609375,
      "model_forward_time": 0.11624002456665039,
      "step": 36540
    },
    {
      "epoch": 0.0002230224609375,
      "step": 36540,
      "training_step_time": 0.4431185722351074
    },
    {
      "epoch": 0.000223028564453125,
      "model_forward_time": 0.11543893814086914,
      "step": 36541
    },
    {
      "epoch": 0.000223028564453125,
      "step": 36541,
      "training_step_time": 0.4023091793060303
    },
    {
      "epoch": 0.00022303466796875,
      "model_forward_time": 0.11580634117126465,
      "step": 36542
    },
    {
      "epoch": 0.00022303466796875,
      "step": 36542,
      "training_step_time": 0.5765674114227295
    },
    {
      "epoch": 0.000223040771484375,
      "model_forward_time": 0.11528491973876953,
      "step": 36543
    },
    {
      "epoch": 0.000223040771484375,
      "step": 36543,
      "training_step_time": 0.3892788887023926
    },
    {
      "epoch": 0.000223046875,
      "model_forward_time": 0.11619019508361816,
      "step": 36544
    },
    {
      "epoch": 0.000223046875,
      "step": 36544,
      "training_step_time": 0.3904683589935303
    },
    {
      "epoch": 0.000223052978515625,
      "model_forward_time": 0.11577653884887695,
      "step": 36545
    },
    {
      "epoch": 0.000223052978515625,
      "step": 36545,
      "training_step_time": 0.4585113525390625
    },
    {
      "epoch": 0.00022305908203125,
      "model_forward_time": 0.1153862476348877,
      "step": 36546
    },
    {
      "epoch": 0.00022305908203125,
      "step": 36546,
      "training_step_time": 0.4183797836303711
    },
    {
      "epoch": 0.000223065185546875,
      "model_forward_time": 0.11494922637939453,
      "step": 36547
    },
    {
      "epoch": 0.000223065185546875,
      "step": 36547,
      "training_step_time": 0.4093949794769287
    },
    {
      "epoch": 0.0002230712890625,
      "model_forward_time": 0.11577701568603516,
      "step": 36548
    },
    {
      "epoch": 0.0002230712890625,
      "step": 36548,
      "training_step_time": 0.5267515182495117
    },
    {
      "epoch": 0.000223077392578125,
      "model_forward_time": 0.1152181625366211,
      "step": 36549
    },
    {
      "epoch": 0.000223077392578125,
      "step": 36549,
      "training_step_time": 0.3921225070953369
    },
    {
      "epoch": 0.00022308349609375,
      "grad_norm": 0.23435811698436737,
      "learning_rate": 3.62622884954268e-05,
      "loss": 0.0399,
      "step": 36550
    },
    {
      "epoch": 0.00022308349609375,
      "model_forward_time": 0.11663603782653809,
      "step": 36550
    },
    {
      "epoch": 0.00022308349609375,
      "step": 36550,
      "training_step_time": 0.41919636726379395
    },
    {
      "epoch": 0.000223089599609375,
      "model_forward_time": 0.11652064323425293,
      "step": 36551
    },
    {
      "epoch": 0.000223089599609375,
      "step": 36551,
      "training_step_time": 0.4718353748321533
    },
    {
      "epoch": 0.000223095703125,
      "model_forward_time": 0.11632871627807617,
      "step": 36552
    },
    {
      "epoch": 0.000223095703125,
      "step": 36552,
      "training_step_time": 0.5044150352478027
    },
    {
      "epoch": 0.000223101806640625,
      "model_forward_time": 0.12121891975402832,
      "step": 36553
    },
    {
      "epoch": 0.000223101806640625,
      "step": 36553,
      "training_step_time": 0.6953728199005127
    },
    {
      "epoch": 0.00022310791015625,
      "model_forward_time": 0.11768364906311035,
      "step": 36554
    },
    {
      "epoch": 0.00022310791015625,
      "step": 36554,
      "training_step_time": 0.6904840469360352
    },
    {
      "epoch": 0.000223114013671875,
      "model_forward_time": 0.1196892261505127,
      "step": 36555
    },
    {
      "epoch": 0.000223114013671875,
      "step": 36555,
      "training_step_time": 0.6534051895141602
    },
    {
      "epoch": 0.0002231201171875,
      "model_forward_time": 0.11719703674316406,
      "step": 36556
    },
    {
      "epoch": 0.0002231201171875,
      "step": 36556,
      "training_step_time": 0.6922521591186523
    },
    {
      "epoch": 0.000223126220703125,
      "model_forward_time": 0.12420892715454102,
      "step": 36557
    },
    {
      "epoch": 0.000223126220703125,
      "step": 36557,
      "training_step_time": 0.6914632320404053
    },
    {
      "epoch": 0.00022313232421875,
      "model_forward_time": 0.11775732040405273,
      "step": 36558
    },
    {
      "epoch": 0.00022313232421875,
      "step": 36558,
      "training_step_time": 0.7605798244476318
    },
    {
      "epoch": 0.000223138427734375,
      "model_forward_time": 0.11769723892211914,
      "step": 36559
    },
    {
      "epoch": 0.000223138427734375,
      "step": 36559,
      "training_step_time": 0.6568093299865723
    },
    {
      "epoch": 0.00022314453125,
      "grad_norm": 0.1663273125886917,
      "learning_rate": 3.6235793330224635e-05,
      "loss": 0.0366,
      "step": 36560
    },
    {
      "epoch": 0.00022314453125,
      "model_forward_time": 0.1221461296081543,
      "step": 36560
    },
    {
      "epoch": 0.00022314453125,
      "step": 36560,
      "training_step_time": 0.6481928825378418
    },
    {
      "epoch": 0.000223150634765625,
      "model_forward_time": 0.11884760856628418,
      "step": 36561
    },
    {
      "epoch": 0.000223150634765625,
      "step": 36561,
      "training_step_time": 0.6766190528869629
    },
    {
      "epoch": 0.00022315673828125,
      "model_forward_time": 0.11900210380554199,
      "step": 36562
    },
    {
      "epoch": 0.00022315673828125,
      "step": 36562,
      "training_step_time": 0.7839291095733643
    },
    {
      "epoch": 0.000223162841796875,
      "model_forward_time": 0.11874866485595703,
      "step": 36563
    },
    {
      "epoch": 0.000223162841796875,
      "step": 36563,
      "training_step_time": 0.7799637317657471
    },
    {
      "epoch": 0.0002231689453125,
      "model_forward_time": 0.11883902549743652,
      "step": 36564
    },
    {
      "epoch": 0.0002231689453125,
      "step": 36564,
      "training_step_time": 0.7434096336364746
    },
    {
      "epoch": 0.000223175048828125,
      "model_forward_time": 0.11969566345214844,
      "step": 36565
    },
    {
      "epoch": 0.000223175048828125,
      "step": 36565,
      "training_step_time": 0.6425747871398926
    },
    {
      "epoch": 0.00022318115234375,
      "model_forward_time": 0.11644196510314941,
      "step": 36566
    },
    {
      "epoch": 0.00022318115234375,
      "step": 36566,
      "training_step_time": 0.6660652160644531
    },
    {
      "epoch": 0.000223187255859375,
      "model_forward_time": 0.11810135841369629,
      "step": 36567
    },
    {
      "epoch": 0.000223187255859375,
      "step": 36567,
      "training_step_time": 0.7357723712921143
    },
    {
      "epoch": 0.000223193359375,
      "model_forward_time": 0.12077713012695312,
      "step": 36568
    },
    {
      "epoch": 0.000223193359375,
      "step": 36568,
      "training_step_time": 0.7054119110107422
    },
    {
      "epoch": 0.000223199462890625,
      "model_forward_time": 0.12426590919494629,
      "step": 36569
    },
    {
      "epoch": 0.000223199462890625,
      "step": 36569,
      "training_step_time": 0.608450174331665
    },
    {
      "epoch": 0.00022320556640625,
      "grad_norm": 0.1511181890964508,
      "learning_rate": 3.6209302346225006e-05,
      "loss": 0.0448,
      "step": 36570
    },
    {
      "epoch": 0.00022320556640625,
      "model_forward_time": 0.1365668773651123,
      "step": 36570
    },
    {
      "epoch": 0.00022320556640625,
      "step": 36570,
      "training_step_time": 0.5949220657348633
    },
    {
      "epoch": 0.000223211669921875,
      "model_forward_time": 0.12000298500061035,
      "step": 36571
    },
    {
      "epoch": 0.000223211669921875,
      "step": 36571,
      "training_step_time": 0.7362709045410156
    },
    {
      "epoch": 0.0002232177734375,
      "model_forward_time": 0.12471961975097656,
      "step": 36572
    },
    {
      "epoch": 0.0002232177734375,
      "step": 36572,
      "training_step_time": 0.6792643070220947
    },
    {
      "epoch": 0.000223223876953125,
      "model_forward_time": 0.12066411972045898,
      "step": 36573
    },
    {
      "epoch": 0.000223223876953125,
      "step": 36573,
      "training_step_time": 0.6972401142120361
    },
    {
      "epoch": 0.00022322998046875,
      "model_forward_time": 0.11734676361083984,
      "step": 36574
    },
    {
      "epoch": 0.00022322998046875,
      "step": 36574,
      "training_step_time": 0.6728684902191162
    },
    {
      "epoch": 0.000223236083984375,
      "model_forward_time": 0.11553478240966797,
      "step": 36575
    },
    {
      "epoch": 0.000223236083984375,
      "step": 36575,
      "training_step_time": 0.6165573596954346
    },
    {
      "epoch": 0.0002232421875,
      "model_forward_time": 0.1276705265045166,
      "step": 36576
    },
    {
      "epoch": 0.0002232421875,
      "step": 36576,
      "training_step_time": 0.6476280689239502
    },
    {
      "epoch": 0.000223248291015625,
      "model_forward_time": 0.11635136604309082,
      "step": 36577
    },
    {
      "epoch": 0.000223248291015625,
      "step": 36577,
      "training_step_time": 0.6817753314971924
    },
    {
      "epoch": 0.00022325439453125,
      "model_forward_time": 0.12286639213562012,
      "step": 36578
    },
    {
      "epoch": 0.00022325439453125,
      "step": 36578,
      "training_step_time": 0.6661660671234131
    },
    {
      "epoch": 0.000223260498046875,
      "model_forward_time": 0.12487149238586426,
      "step": 36579
    },
    {
      "epoch": 0.000223260498046875,
      "step": 36579,
      "training_step_time": 0.6524832248687744
    },
    {
      "epoch": 0.0002232666015625,
      "grad_norm": 0.13502323627471924,
      "learning_rate": 3.618281555147522e-05,
      "loss": 0.0465,
      "step": 36580
    },
    {
      "epoch": 0.0002232666015625,
      "model_forward_time": 0.15644121170043945,
      "step": 36580
    },
    {
      "epoch": 0.0002232666015625,
      "step": 36580,
      "training_step_time": 0.6452248096466064
    },
    {
      "epoch": 0.000223272705078125,
      "model_forward_time": 0.12193679809570312,
      "step": 36581
    },
    {
      "epoch": 0.000223272705078125,
      "step": 36581,
      "training_step_time": 0.7081024646759033
    },
    {
      "epoch": 0.00022327880859375,
      "model_forward_time": 0.12676620483398438,
      "step": 36582
    },
    {
      "epoch": 0.00022327880859375,
      "step": 36582,
      "training_step_time": 0.6921191215515137
    },
    {
      "epoch": 0.000223284912109375,
      "model_forward_time": 0.12234830856323242,
      "step": 36583
    },
    {
      "epoch": 0.000223284912109375,
      "step": 36583,
      "training_step_time": 0.6072962284088135
    },
    {
      "epoch": 0.000223291015625,
      "model_forward_time": 0.1268918514251709,
      "step": 36584
    },
    {
      "epoch": 0.000223291015625,
      "step": 36584,
      "training_step_time": 0.629833459854126
    },
    {
      "epoch": 0.000223297119140625,
      "model_forward_time": 0.11997866630554199,
      "step": 36585
    },
    {
      "epoch": 0.000223297119140625,
      "step": 36585,
      "training_step_time": 0.6677451133728027
    },
    {
      "epoch": 0.00022330322265625,
      "model_forward_time": 0.11899638175964355,
      "step": 36586
    },
    {
      "epoch": 0.00022330322265625,
      "step": 36586,
      "training_step_time": 0.6987414360046387
    },
    {
      "epoch": 0.000223309326171875,
      "model_forward_time": 0.11993408203125,
      "step": 36587
    },
    {
      "epoch": 0.000223309326171875,
      "step": 36587,
      "training_step_time": 0.6585402488708496
    },
    {
      "epoch": 0.0002233154296875,
      "model_forward_time": 0.11814570426940918,
      "step": 36588
    },
    {
      "epoch": 0.0002233154296875,
      "step": 36588,
      "training_step_time": 0.6524579524993896
    },
    {
      "epoch": 0.000223321533203125,
      "model_forward_time": 0.11669206619262695,
      "step": 36589
    },
    {
      "epoch": 0.000223321533203125,
      "step": 36589,
      "training_step_time": 0.637814998626709
    },
    {
      "epoch": 0.00022332763671875,
      "grad_norm": 0.09826237708330154,
      "learning_rate": 3.615633295402123e-05,
      "loss": 0.0474,
      "step": 36590
    },
    {
      "epoch": 0.00022332763671875,
      "model_forward_time": 0.11751437187194824,
      "step": 36590
    },
    {
      "epoch": 0.00022332763671875,
      "step": 36590,
      "training_step_time": 0.6558263301849365
    },
    {
      "epoch": 0.000223333740234375,
      "model_forward_time": 0.11834120750427246,
      "step": 36591
    },
    {
      "epoch": 0.000223333740234375,
      "step": 36591,
      "training_step_time": 0.774420976638794
    },
    {
      "epoch": 0.00022333984375,
      "model_forward_time": 0.1187598705291748,
      "step": 36592
    },
    {
      "epoch": 0.00022333984375,
      "step": 36592,
      "training_step_time": 0.667306661605835
    },
    {
      "epoch": 0.000223345947265625,
      "model_forward_time": 0.11670660972595215,
      "step": 36593
    },
    {
      "epoch": 0.000223345947265625,
      "step": 36593,
      "training_step_time": 0.6114656925201416
    },
    {
      "epoch": 0.00022335205078125,
      "model_forward_time": 0.12001395225524902,
      "step": 36594
    },
    {
      "epoch": 0.00022335205078125,
      "step": 36594,
      "training_step_time": 0.7323250770568848
    },
    {
      "epoch": 0.000223358154296875,
      "model_forward_time": 0.13167548179626465,
      "step": 36595
    },
    {
      "epoch": 0.000223358154296875,
      "step": 36595,
      "training_step_time": 0.6546711921691895
    },
    {
      "epoch": 0.0002233642578125,
      "model_forward_time": 0.1189413070678711,
      "step": 36596
    },
    {
      "epoch": 0.0002233642578125,
      "step": 36596,
      "training_step_time": 0.6655452251434326
    },
    {
      "epoch": 0.000223370361328125,
      "model_forward_time": 0.1244349479675293,
      "step": 36597
    },
    {
      "epoch": 0.000223370361328125,
      "step": 36597,
      "training_step_time": 0.6559607982635498
    },
    {
      "epoch": 0.00022337646484375,
      "model_forward_time": 0.12266898155212402,
      "step": 36598
    },
    {
      "epoch": 0.00022337646484375,
      "step": 36598,
      "training_step_time": 0.6225135326385498
    },
    {
      "epoch": 0.000223382568359375,
      "model_forward_time": 0.13150405883789062,
      "step": 36599
    },
    {
      "epoch": 0.000223382568359375,
      "step": 36599,
      "training_step_time": 0.677011251449585
    },
    {
      "epoch": 0.000223388671875,
      "grad_norm": 0.10672689229249954,
      "learning_rate": 3.612985456190778e-05,
      "loss": 0.0414,
      "step": 36600
    },
    {
      "epoch": 0.000223388671875,
      "model_forward_time": 0.11787652969360352,
      "step": 36600
    },
    {
      "epoch": 0.000223388671875,
      "step": 36600,
      "training_step_time": 0.647296667098999
    },
    {
      "epoch": 0.000223394775390625,
      "model_forward_time": 0.1156153678894043,
      "step": 36601
    },
    {
      "epoch": 0.000223394775390625,
      "step": 36601,
      "training_step_time": 0.6964945793151855
    },
    {
      "epoch": 0.00022340087890625,
      "model_forward_time": 0.11934185028076172,
      "step": 36602
    },
    {
      "epoch": 0.00022340087890625,
      "step": 36602,
      "training_step_time": 0.6425304412841797
    },
    {
      "epoch": 0.000223406982421875,
      "model_forward_time": 0.1215524673461914,
      "step": 36603
    },
    {
      "epoch": 0.000223406982421875,
      "step": 36603,
      "training_step_time": 0.6742219924926758
    },
    {
      "epoch": 0.0002234130859375,
      "model_forward_time": 0.11921000480651855,
      "step": 36604
    },
    {
      "epoch": 0.0002234130859375,
      "step": 36604,
      "training_step_time": 0.7044625282287598
    },
    {
      "epoch": 0.000223419189453125,
      "model_forward_time": 0.11863112449645996,
      "step": 36605
    },
    {
      "epoch": 0.000223419189453125,
      "step": 36605,
      "training_step_time": 0.6634323596954346
    },
    {
      "epoch": 0.00022342529296875,
      "model_forward_time": 0.11898303031921387,
      "step": 36606
    },
    {
      "epoch": 0.00022342529296875,
      "step": 36606,
      "training_step_time": 0.7238900661468506
    },
    {
      "epoch": 0.000223431396484375,
      "model_forward_time": 0.12043499946594238,
      "step": 36607
    },
    {
      "epoch": 0.000223431396484375,
      "step": 36607,
      "training_step_time": 0.6575260162353516
    },
    {
      "epoch": 0.0002234375,
      "model_forward_time": 0.12742328643798828,
      "step": 36608
    },
    {
      "epoch": 0.0002234375,
      "step": 36608,
      "training_step_time": 0.6389820575714111
    },
    {
      "epoch": 0.000223443603515625,
      "model_forward_time": 0.12580204010009766,
      "step": 36609
    },
    {
      "epoch": 0.000223443603515625,
      "step": 36609,
      "training_step_time": 0.6454401016235352
    },
    {
      "epoch": 0.00022344970703125,
      "grad_norm": 0.1277303695678711,
      "learning_rate": 3.610338038317828e-05,
      "loss": 0.0428,
      "step": 36610
    },
    {
      "epoch": 0.00022344970703125,
      "model_forward_time": 0.11820530891418457,
      "step": 36610
    },
    {
      "epoch": 0.00022344970703125,
      "step": 36610,
      "training_step_time": 0.701101541519165
    },
    {
      "epoch": 0.000223455810546875,
      "model_forward_time": 0.11876273155212402,
      "step": 36611
    },
    {
      "epoch": 0.000223455810546875,
      "step": 36611,
      "training_step_time": 0.7088594436645508
    },
    {
      "epoch": 0.0002234619140625,
      "model_forward_time": 0.12715435028076172,
      "step": 36612
    },
    {
      "epoch": 0.0002234619140625,
      "step": 36612,
      "training_step_time": 0.6127288341522217
    },
    {
      "epoch": 0.000223468017578125,
      "model_forward_time": 0.12199759483337402,
      "step": 36613
    },
    {
      "epoch": 0.000223468017578125,
      "step": 36613,
      "training_step_time": 0.6616053581237793
    },
    {
      "epoch": 0.00022347412109375,
      "model_forward_time": 0.11660408973693848,
      "step": 36614
    },
    {
      "epoch": 0.00022347412109375,
      "step": 36614,
      "training_step_time": 0.6865622997283936
    },
    {
      "epoch": 0.000223480224609375,
      "model_forward_time": 0.11995267868041992,
      "step": 36615
    },
    {
      "epoch": 0.000223480224609375,
      "step": 36615,
      "training_step_time": 0.6341798305511475
    },
    {
      "epoch": 0.000223486328125,
      "model_forward_time": 0.11837291717529297,
      "step": 36616
    },
    {
      "epoch": 0.000223486328125,
      "step": 36616,
      "training_step_time": 0.6655325889587402
    },
    {
      "epoch": 0.000223492431640625,
      "model_forward_time": 0.11844992637634277,
      "step": 36617
    },
    {
      "epoch": 0.000223492431640625,
      "step": 36617,
      "training_step_time": 0.6971144676208496
    },
    {
      "epoch": 0.00022349853515625,
      "model_forward_time": 0.12027287483215332,
      "step": 36618
    },
    {
      "epoch": 0.00022349853515625,
      "step": 36618,
      "training_step_time": 0.6594820022583008
    },
    {
      "epoch": 0.000223504638671875,
      "model_forward_time": 0.11858010292053223,
      "step": 36619
    },
    {
      "epoch": 0.000223504638671875,
      "step": 36619,
      "training_step_time": 0.727808952331543
    },
    {
      "epoch": 0.0002235107421875,
      "grad_norm": 0.11654394119977951,
      "learning_rate": 3.607691042587492e-05,
      "loss": 0.0492,
      "step": 36620
    },
    {
      "epoch": 0.0002235107421875,
      "model_forward_time": 0.12129616737365723,
      "step": 36620
    },
    {
      "epoch": 0.0002235107421875,
      "step": 36620,
      "training_step_time": 0.6451499462127686
    },
    {
      "epoch": 0.000223516845703125,
      "model_forward_time": 0.12767744064331055,
      "step": 36621
    },
    {
      "epoch": 0.000223516845703125,
      "step": 36621,
      "training_step_time": 0.5300629138946533
    },
    {
      "epoch": 0.00022352294921875,
      "model_forward_time": 0.11648941040039062,
      "step": 36622
    },
    {
      "epoch": 0.00022352294921875,
      "step": 36622,
      "training_step_time": 0.5154919624328613
    },
    {
      "epoch": 0.000223529052734375,
      "model_forward_time": 0.11651921272277832,
      "step": 36623
    },
    {
      "epoch": 0.000223529052734375,
      "step": 36623,
      "training_step_time": 0.6488814353942871
    },
    {
      "epoch": 0.00022353515625,
      "model_forward_time": 0.11643004417419434,
      "step": 36624
    },
    {
      "epoch": 0.00022353515625,
      "step": 36624,
      "training_step_time": 0.5628483295440674
    },
    {
      "epoch": 0.000223541259765625,
      "model_forward_time": 0.11603665351867676,
      "step": 36625
    },
    {
      "epoch": 0.000223541259765625,
      "step": 36625,
      "training_step_time": 0.5272095203399658
    },
    {
      "epoch": 0.00022354736328125,
      "model_forward_time": 0.1166996955871582,
      "step": 36626
    },
    {
      "epoch": 0.00022354736328125,
      "step": 36626,
      "training_step_time": 0.40990400314331055
    },
    {
      "epoch": 0.000223553466796875,
      "model_forward_time": 0.11601996421813965,
      "step": 36627
    },
    {
      "epoch": 0.000223553466796875,
      "step": 36627,
      "training_step_time": 0.4137396812438965
    },
    {
      "epoch": 0.0002235595703125,
      "model_forward_time": 0.11561441421508789,
      "step": 36628
    },
    {
      "epoch": 0.0002235595703125,
      "step": 36628,
      "training_step_time": 0.4116988182067871
    },
    {
      "epoch": 0.000223565673828125,
      "model_forward_time": 0.11536884307861328,
      "step": 36629
    },
    {
      "epoch": 0.000223565673828125,
      "step": 36629,
      "training_step_time": 0.4179251194000244
    },
    {
      "epoch": 0.00022357177734375,
      "grad_norm": 0.12068051844835281,
      "learning_rate": 3.605044469803854e-05,
      "loss": 0.0414,
      "step": 36630
    },
    {
      "epoch": 0.00022357177734375,
      "model_forward_time": 0.11539912223815918,
      "step": 36630
    },
    {
      "epoch": 0.00022357177734375,
      "step": 36630,
      "training_step_time": 0.40491366386413574
    },
    {
      "epoch": 0.000223577880859375,
      "model_forward_time": 0.1150364875793457,
      "step": 36631
    },
    {
      "epoch": 0.000223577880859375,
      "step": 36631,
      "training_step_time": 0.3754701614379883
    },
    {
      "epoch": 0.000223583984375,
      "model_forward_time": 0.11522483825683594,
      "step": 36632
    },
    {
      "epoch": 0.000223583984375,
      "step": 36632,
      "training_step_time": 0.476438045501709
    },
    {
      "epoch": 0.000223590087890625,
      "model_forward_time": 0.1149282455444336,
      "step": 36633
    },
    {
      "epoch": 0.000223590087890625,
      "step": 36633,
      "training_step_time": 0.4162271022796631
    },
    {
      "epoch": 0.00022359619140625,
      "model_forward_time": 0.11555290222167969,
      "step": 36634
    },
    {
      "epoch": 0.00022359619140625,
      "step": 36634,
      "training_step_time": 0.4629538059234619
    },
    {
      "epoch": 0.000223602294921875,
      "model_forward_time": 0.11474013328552246,
      "step": 36635
    },
    {
      "epoch": 0.000223602294921875,
      "step": 36635,
      "training_step_time": 0.4059422016143799
    },
    {
      "epoch": 0.0002236083984375,
      "model_forward_time": 0.11534857749938965,
      "step": 36636
    },
    {
      "epoch": 0.0002236083984375,
      "step": 36636,
      "training_step_time": 0.3999514579772949
    },
    {
      "epoch": 0.000223614501953125,
      "model_forward_time": 0.11487603187561035,
      "step": 36637
    },
    {
      "epoch": 0.000223614501953125,
      "step": 36637,
      "training_step_time": 0.5090844631195068
    },
    {
      "epoch": 0.00022362060546875,
      "model_forward_time": 0.11507558822631836,
      "step": 36638
    },
    {
      "epoch": 0.00022362060546875,
      "step": 36638,
      "training_step_time": 0.43045687675476074
    },
    {
      "epoch": 0.000223626708984375,
      "model_forward_time": 0.11518454551696777,
      "step": 36639
    },
    {
      "epoch": 0.000223626708984375,
      "step": 36639,
      "training_step_time": 0.5106096267700195
    },
    {
      "epoch": 0.0002236328125,
      "grad_norm": 0.11083158105611801,
      "learning_rate": 3.602398320770875e-05,
      "loss": 0.0451,
      "step": 36640
    },
    {
      "epoch": 0.0002236328125,
      "model_forward_time": 0.11562585830688477,
      "step": 36640
    },
    {
      "epoch": 0.0002236328125,
      "step": 36640,
      "training_step_time": 0.3963332176208496
    },
    {
      "epoch": 0.000223638916015625,
      "model_forward_time": 0.11464905738830566,
      "step": 36641
    },
    {
      "epoch": 0.000223638916015625,
      "step": 36641,
      "training_step_time": 0.388444185256958
    },
    {
      "epoch": 0.00022364501953125,
      "model_forward_time": 0.11459755897521973,
      "step": 36642
    },
    {
      "epoch": 0.00022364501953125,
      "step": 36642,
      "training_step_time": 0.3866455554962158
    },
    {
      "epoch": 0.000223651123046875,
      "model_forward_time": 0.1147153377532959,
      "step": 36643
    },
    {
      "epoch": 0.000223651123046875,
      "step": 36643,
      "training_step_time": 0.40469980239868164
    },
    {
      "epoch": 0.0002236572265625,
      "model_forward_time": 0.11651229858398438,
      "step": 36644
    },
    {
      "epoch": 0.0002236572265625,
      "step": 36644,
      "training_step_time": 0.3833596706390381
    },
    {
      "epoch": 0.000223663330078125,
      "model_forward_time": 0.11599993705749512,
      "step": 36645
    },
    {
      "epoch": 0.000223663330078125,
      "step": 36645,
      "training_step_time": 0.3671905994415283
    },
    {
      "epoch": 0.00022366943359375,
      "model_forward_time": 0.11561441421508789,
      "step": 36646
    },
    {
      "epoch": 0.00022366943359375,
      "step": 36646,
      "training_step_time": 0.4765164852142334
    },
    {
      "epoch": 0.000223675537109375,
      "model_forward_time": 0.1148827075958252,
      "step": 36647
    },
    {
      "epoch": 0.000223675537109375,
      "step": 36647,
      "training_step_time": 0.458568811416626
    },
    {
      "epoch": 0.000223681640625,
      "model_forward_time": 0.11596274375915527,
      "step": 36648
    },
    {
      "epoch": 0.000223681640625,
      "step": 36648,
      "training_step_time": 0.41413402557373047
    },
    {
      "epoch": 0.000223687744140625,
      "model_forward_time": 0.1151878833770752,
      "step": 36649
    },
    {
      "epoch": 0.000223687744140625,
      "step": 36649,
      "training_step_time": 0.38739442825317383
    },
    {
      "epoch": 0.00022369384765625,
      "grad_norm": 0.10903441160917282,
      "learning_rate": 3.599752596292386e-05,
      "loss": 0.0468,
      "step": 36650
    },
    {
      "epoch": 0.00022369384765625,
      "model_forward_time": 0.11523008346557617,
      "step": 36650
    },
    {
      "epoch": 0.00022369384765625,
      "step": 36650,
      "training_step_time": 0.38880467414855957
    },
    {
      "epoch": 0.000223699951171875,
      "model_forward_time": 0.11497616767883301,
      "step": 36651
    },
    {
      "epoch": 0.000223699951171875,
      "step": 36651,
      "training_step_time": 0.4138944149017334
    },
    {
      "epoch": 0.0002237060546875,
      "model_forward_time": 0.11554765701293945,
      "step": 36652
    },
    {
      "epoch": 0.0002237060546875,
      "step": 36652,
      "training_step_time": 0.39421701431274414
    },
    {
      "epoch": 0.000223712158203125,
      "model_forward_time": 0.11555099487304688,
      "step": 36653
    },
    {
      "epoch": 0.000223712158203125,
      "step": 36653,
      "training_step_time": 0.43984103202819824
    },
    {
      "epoch": 0.00022371826171875,
      "model_forward_time": 0.11514139175415039,
      "step": 36654
    },
    {
      "epoch": 0.00022371826171875,
      "step": 36654,
      "training_step_time": 0.4038987159729004
    },
    {
      "epoch": 0.000223724365234375,
      "model_forward_time": 0.11581826210021973,
      "step": 36655
    },
    {
      "epoch": 0.000223724365234375,
      "step": 36655,
      "training_step_time": 0.3959026336669922
    },
    {
      "epoch": 0.00022373046875,
      "model_forward_time": 0.1153862476348877,
      "step": 36656
    },
    {
      "epoch": 0.00022373046875,
      "step": 36656,
      "training_step_time": 0.3936586380004883
    },
    {
      "epoch": 0.000223736572265625,
      "model_forward_time": 0.11496829986572266,
      "step": 36657
    },
    {
      "epoch": 0.000223736572265625,
      "step": 36657,
      "training_step_time": 0.38148975372314453
    },
    {
      "epoch": 0.00022374267578125,
      "model_forward_time": 0.11565685272216797,
      "step": 36658
    },
    {
      "epoch": 0.00022374267578125,
      "step": 36658,
      "training_step_time": 0.38580942153930664
    },
    {
      "epoch": 0.000223748779296875,
      "model_forward_time": 0.11470890045166016,
      "step": 36659
    },
    {
      "epoch": 0.000223748779296875,
      "step": 36659,
      "training_step_time": 0.40476250648498535
    },
    {
      "epoch": 0.0002237548828125,
      "grad_norm": 0.11823435127735138,
      "learning_rate": 3.597107297172084e-05,
      "loss": 0.0463,
      "step": 36660
    },
    {
      "epoch": 0.0002237548828125,
      "model_forward_time": 0.11768460273742676,
      "step": 36660
    },
    {
      "epoch": 0.0002237548828125,
      "step": 36660,
      "training_step_time": 0.37184691429138184
    },
    {
      "epoch": 0.000223760986328125,
      "model_forward_time": 0.11535215377807617,
      "step": 36661
    },
    {
      "epoch": 0.000223760986328125,
      "step": 36661,
      "training_step_time": 0.39336490631103516
    },
    {
      "epoch": 0.00022376708984375,
      "model_forward_time": 0.11531805992126465,
      "step": 36662
    },
    {
      "epoch": 0.00022376708984375,
      "step": 36662,
      "training_step_time": 0.47756075859069824
    },
    {
      "epoch": 0.000223773193359375,
      "model_forward_time": 0.11512446403503418,
      "step": 36663
    },
    {
      "epoch": 0.000223773193359375,
      "step": 36663,
      "training_step_time": 0.469280481338501
    },
    {
      "epoch": 0.000223779296875,
      "model_forward_time": 0.11544299125671387,
      "step": 36664
    },
    {
      "epoch": 0.000223779296875,
      "step": 36664,
      "training_step_time": 0.38094305992126465
    },
    {
      "epoch": 0.000223785400390625,
      "model_forward_time": 0.11535239219665527,
      "step": 36665
    },
    {
      "epoch": 0.000223785400390625,
      "step": 36665,
      "training_step_time": 0.3837916851043701
    },
    {
      "epoch": 0.00022379150390625,
      "model_forward_time": 0.11501359939575195,
      "step": 36666
    },
    {
      "epoch": 0.00022379150390625,
      "step": 36666,
      "training_step_time": 0.4193596839904785
    },
    {
      "epoch": 0.000223797607421875,
      "model_forward_time": 0.1149587631225586,
      "step": 36667
    },
    {
      "epoch": 0.000223797607421875,
      "step": 36667,
      "training_step_time": 0.41013360023498535
    },
    {
      "epoch": 0.0002238037109375,
      "model_forward_time": 0.11796236038208008,
      "step": 36668
    },
    {
      "epoch": 0.0002238037109375,
      "step": 36668,
      "training_step_time": 0.47807836532592773
    },
    {
      "epoch": 0.000223809814453125,
      "model_forward_time": 0.11469364166259766,
      "step": 36669
    },
    {
      "epoch": 0.000223809814453125,
      "step": 36669,
      "training_step_time": 0.3813357353210449
    },
    {
      "epoch": 0.00022381591796875,
      "grad_norm": 0.1162787675857544,
      "learning_rate": 3.594462424213545e-05,
      "loss": 0.0412,
      "step": 36670
    },
    {
      "epoch": 0.00022381591796875,
      "model_forward_time": 0.11485481262207031,
      "step": 36670
    },
    {
      "epoch": 0.00022381591796875,
      "step": 36670,
      "training_step_time": 0.3918783664703369
    },
    {
      "epoch": 0.000223822021484375,
      "model_forward_time": 0.11556768417358398,
      "step": 36671
    },
    {
      "epoch": 0.000223822021484375,
      "step": 36671,
      "training_step_time": 0.39922547340393066
    },
    {
      "epoch": 0.000223828125,
      "model_forward_time": 0.11555910110473633,
      "step": 36672
    },
    {
      "epoch": 0.000223828125,
      "step": 36672,
      "training_step_time": 0.38767004013061523
    },
    {
      "epoch": 0.000223834228515625,
      "model_forward_time": 0.11559557914733887,
      "step": 36673
    },
    {
      "epoch": 0.000223834228515625,
      "step": 36673,
      "training_step_time": 0.3963956832885742
    },
    {
      "epoch": 0.00022384033203125,
      "model_forward_time": 0.11515140533447266,
      "step": 36674
    },
    {
      "epoch": 0.00022384033203125,
      "step": 36674,
      "training_step_time": 0.3893420696258545
    },
    {
      "epoch": 0.000223846435546875,
      "model_forward_time": 0.11584019660949707,
      "step": 36675
    },
    {
      "epoch": 0.000223846435546875,
      "step": 36675,
      "training_step_time": 0.41365838050842285
    },
    {
      "epoch": 0.0002238525390625,
      "model_forward_time": 0.11545515060424805,
      "step": 36676
    },
    {
      "epoch": 0.0002238525390625,
      "step": 36676,
      "training_step_time": 0.46184253692626953
    },
    {
      "epoch": 0.000223858642578125,
      "model_forward_time": 0.12115859985351562,
      "step": 36677
    },
    {
      "epoch": 0.000223858642578125,
      "step": 36677,
      "training_step_time": 0.45403194427490234
    },
    {
      "epoch": 0.00022386474609375,
      "model_forward_time": 0.11913704872131348,
      "step": 36678
    },
    {
      "epoch": 0.00022386474609375,
      "step": 36678,
      "training_step_time": 0.40142202377319336
    },
    {
      "epoch": 0.000223870849609375,
      "model_forward_time": 0.11870765686035156,
      "step": 36679
    },
    {
      "epoch": 0.000223870849609375,
      "step": 36679,
      "training_step_time": 0.37983083724975586
    },
    {
      "epoch": 0.000223876953125,
      "grad_norm": 0.10307661443948746,
      "learning_rate": 3.591817978220212e-05,
      "loss": 0.0466,
      "step": 36680
    },
    {
      "epoch": 0.000223876953125,
      "model_forward_time": 0.11517071723937988,
      "step": 36680
    },
    {
      "epoch": 0.000223876953125,
      "step": 36680,
      "training_step_time": 0.3867213726043701
    },
    {
      "epoch": 0.000223883056640625,
      "model_forward_time": 0.11516332626342773,
      "step": 36681
    },
    {
      "epoch": 0.000223883056640625,
      "step": 36681,
      "training_step_time": 0.41349315643310547
    },
    {
      "epoch": 0.00022388916015625,
      "model_forward_time": 0.11474084854125977,
      "step": 36682
    },
    {
      "epoch": 0.00022388916015625,
      "step": 36682,
      "training_step_time": 0.4779808521270752
    },
    {
      "epoch": 0.000223895263671875,
      "model_forward_time": 0.11542510986328125,
      "step": 36683
    },
    {
      "epoch": 0.000223895263671875,
      "step": 36683,
      "training_step_time": 0.38936948776245117
    },
    {
      "epoch": 0.0002239013671875,
      "model_forward_time": 0.1158895492553711,
      "step": 36684
    },
    {
      "epoch": 0.0002239013671875,
      "step": 36684,
      "training_step_time": 0.3914525508880615
    },
    {
      "epoch": 0.000223907470703125,
      "model_forward_time": 0.11517953872680664,
      "step": 36685
    },
    {
      "epoch": 0.000223907470703125,
      "step": 36685,
      "training_step_time": 0.3945760726928711
    },
    {
      "epoch": 0.00022391357421875,
      "model_forward_time": 0.11469745635986328,
      "step": 36686
    },
    {
      "epoch": 0.00022391357421875,
      "step": 36686,
      "training_step_time": 0.39409947395324707
    },
    {
      "epoch": 0.000223919677734375,
      "model_forward_time": 0.11507582664489746,
      "step": 36687
    },
    {
      "epoch": 0.000223919677734375,
      "step": 36687,
      "training_step_time": 0.40010714530944824
    },
    {
      "epoch": 0.00022392578125,
      "model_forward_time": 0.1162722110748291,
      "step": 36688
    },
    {
      "epoch": 0.00022392578125,
      "step": 36688,
      "training_step_time": 0.3911588191986084
    },
    {
      "epoch": 0.000223931884765625,
      "model_forward_time": 0.1154329776763916,
      "step": 36689
    },
    {
      "epoch": 0.000223931884765625,
      "step": 36689,
      "training_step_time": 0.38994407653808594
    },
    {
      "epoch": 0.00022393798828125,
      "grad_norm": 0.16855789721012115,
      "learning_rate": 3.5891739599953945e-05,
      "loss": 0.0428,
      "step": 36690
    },
    {
      "epoch": 0.00022393798828125,
      "model_forward_time": 0.11562061309814453,
      "step": 36690
    },
    {
      "epoch": 0.00022393798828125,
      "step": 36690,
      "training_step_time": 0.4152350425720215
    },
    {
      "epoch": 0.000223944091796875,
      "model_forward_time": 0.11570239067077637,
      "step": 36691
    },
    {
      "epoch": 0.000223944091796875,
      "step": 36691,
      "training_step_time": 0.3991549015045166
    },
    {
      "epoch": 0.0002239501953125,
      "model_forward_time": 0.11671733856201172,
      "step": 36692
    },
    {
      "epoch": 0.0002239501953125,
      "step": 36692,
      "training_step_time": 0.4425942897796631
    },
    {
      "epoch": 0.000223956298828125,
      "model_forward_time": 0.11522102355957031,
      "step": 36693
    },
    {
      "epoch": 0.000223956298828125,
      "step": 36693,
      "training_step_time": 0.4801516532897949
    },
    {
      "epoch": 0.00022396240234375,
      "model_forward_time": 0.11544179916381836,
      "step": 36694
    },
    {
      "epoch": 0.00022396240234375,
      "step": 36694,
      "training_step_time": 0.39890265464782715
    },
    {
      "epoch": 0.000223968505859375,
      "model_forward_time": 0.11544251441955566,
      "step": 36695
    },
    {
      "epoch": 0.000223968505859375,
      "step": 36695,
      "training_step_time": 0.3951127529144287
    },
    {
      "epoch": 0.000223974609375,
      "model_forward_time": 0.11541247367858887,
      "step": 36696
    },
    {
      "epoch": 0.000223974609375,
      "step": 36696,
      "training_step_time": 0.4451267719268799
    },
    {
      "epoch": 0.000223980712890625,
      "model_forward_time": 0.11551642417907715,
      "step": 36697
    },
    {
      "epoch": 0.000223980712890625,
      "step": 36697,
      "training_step_time": 0.4836153984069824
    },
    {
      "epoch": 0.00022398681640625,
      "model_forward_time": 0.11505126953125,
      "step": 36698
    },
    {
      "epoch": 0.00022398681640625,
      "step": 36698,
      "training_step_time": 0.4023008346557617
    },
    {
      "epoch": 0.000223992919921875,
      "model_forward_time": 0.1149294376373291,
      "step": 36699
    },
    {
      "epoch": 0.000223992919921875,
      "step": 36699,
      "training_step_time": 0.3876490592956543
    },
    {
      "epoch": 0.0002239990234375,
      "grad_norm": 0.13210323452949524,
      "learning_rate": 3.586530370342279e-05,
      "loss": 0.0413,
      "step": 36700
    },
    {
      "epoch": 0.0002239990234375,
      "model_forward_time": 0.11505675315856934,
      "step": 36700
    },
    {
      "epoch": 0.0002239990234375,
      "step": 36700,
      "training_step_time": 0.3997485637664795
    },
    {
      "epoch": 0.000224005126953125,
      "model_forward_time": 0.11490178108215332,
      "step": 36701
    },
    {
      "epoch": 0.000224005126953125,
      "step": 36701,
      "training_step_time": 0.3893547058105469
    },
    {
      "epoch": 0.00022401123046875,
      "model_forward_time": 0.1150355339050293,
      "step": 36702
    },
    {
      "epoch": 0.00022401123046875,
      "step": 36702,
      "training_step_time": 0.3961622714996338
    },
    {
      "epoch": 0.000224017333984375,
      "model_forward_time": 0.11534380912780762,
      "step": 36703
    },
    {
      "epoch": 0.000224017333984375,
      "step": 36703,
      "training_step_time": 0.3842804431915283
    },
    {
      "epoch": 0.0002240234375,
      "model_forward_time": 0.11573672294616699,
      "step": 36704
    },
    {
      "epoch": 0.0002240234375,
      "step": 36704,
      "training_step_time": 0.3923478126525879
    },
    {
      "epoch": 0.000224029541015625,
      "model_forward_time": 0.11635684967041016,
      "step": 36705
    },
    {
      "epoch": 0.000224029541015625,
      "step": 36705,
      "training_step_time": 0.43950581550598145
    },
    {
      "epoch": 0.00022403564453125,
      "model_forward_time": 0.11870265007019043,
      "step": 36706
    },
    {
      "epoch": 0.00022403564453125,
      "step": 36706,
      "training_step_time": 0.4909844398498535
    },
    {
      "epoch": 0.000224041748046875,
      "model_forward_time": 0.11896133422851562,
      "step": 36707
    },
    {
      "epoch": 0.000224041748046875,
      "step": 36707,
      "training_step_time": 0.42293787002563477
    },
    {
      "epoch": 0.0002240478515625,
      "model_forward_time": 0.1147756576538086,
      "step": 36708
    },
    {
      "epoch": 0.0002240478515625,
      "step": 36708,
      "training_step_time": 0.4540700912475586
    },
    {
      "epoch": 0.000224053955078125,
      "model_forward_time": 0.1151280403137207,
      "step": 36709
    },
    {
      "epoch": 0.000224053955078125,
      "step": 36709,
      "training_step_time": 0.4229416847229004
    },
    {
      "epoch": 0.00022406005859375,
      "grad_norm": 0.09236688911914825,
      "learning_rate": 3.5838872100639146e-05,
      "loss": 0.0451,
      "step": 36710
    },
    {
      "epoch": 0.00022406005859375,
      "model_forward_time": 0.1151132583618164,
      "step": 36710
    },
    {
      "epoch": 0.00022406005859375,
      "step": 36710,
      "training_step_time": 0.46448349952697754
    },
    {
      "epoch": 0.000224066162109375,
      "model_forward_time": 0.1150350570678711,
      "step": 36711
    },
    {
      "epoch": 0.000224066162109375,
      "step": 36711,
      "training_step_time": 0.43070435523986816
    },
    {
      "epoch": 0.000224072265625,
      "model_forward_time": 0.11512160301208496,
      "step": 36712
    },
    {
      "epoch": 0.000224072265625,
      "step": 36712,
      "training_step_time": 0.42656707763671875
    },
    {
      "epoch": 0.000224078369140625,
      "model_forward_time": 0.11516118049621582,
      "step": 36713
    },
    {
      "epoch": 0.000224078369140625,
      "step": 36713,
      "training_step_time": 0.39678192138671875
    },
    {
      "epoch": 0.00022408447265625,
      "model_forward_time": 0.1145477294921875,
      "step": 36714
    },
    {
      "epoch": 0.00022408447265625,
      "step": 36714,
      "training_step_time": 0.40163087844848633
    },
    {
      "epoch": 0.000224090576171875,
      "model_forward_time": 0.11520528793334961,
      "step": 36715
    },
    {
      "epoch": 0.000224090576171875,
      "step": 36715,
      "training_step_time": 0.39049744606018066
    },
    {
      "epoch": 0.0002240966796875,
      "model_forward_time": 0.11511492729187012,
      "step": 36716
    },
    {
      "epoch": 0.0002240966796875,
      "step": 36716,
      "training_step_time": 0.3905508518218994
    },
    {
      "epoch": 0.000224102783203125,
      "model_forward_time": 0.11515402793884277,
      "step": 36717
    },
    {
      "epoch": 0.000224102783203125,
      "step": 36717,
      "training_step_time": 0.40101051330566406
    },
    {
      "epoch": 0.00022410888671875,
      "model_forward_time": 0.11492705345153809,
      "step": 36718
    },
    {
      "epoch": 0.00022410888671875,
      "step": 36718,
      "training_step_time": 0.4043142795562744
    },
    {
      "epoch": 0.000224114990234375,
      "model_forward_time": 0.11568832397460938,
      "step": 36719
    },
    {
      "epoch": 0.000224114990234375,
      "step": 36719,
      "training_step_time": 0.6226918697357178
    },
    {
      "epoch": 0.00022412109375,
      "grad_norm": 0.09776035696268082,
      "learning_rate": 3.581244479963225e-05,
      "loss": 0.0392,
      "step": 36720
    },
    {
      "epoch": 0.00022412109375,
      "model_forward_time": 0.11554360389709473,
      "step": 36720
    },
    {
      "epoch": 0.00022412109375,
      "step": 36720,
      "training_step_time": 0.417569637298584
    },
    {
      "epoch": 0.000224127197265625,
      "model_forward_time": 0.11474728584289551,
      "step": 36721
    },
    {
      "epoch": 0.000224127197265625,
      "step": 36721,
      "training_step_time": 0.4348642826080322
    },
    {
      "epoch": 0.00022413330078125,
      "model_forward_time": 0.11432862281799316,
      "step": 36722
    },
    {
      "epoch": 0.00022413330078125,
      "step": 36722,
      "training_step_time": 0.4798407554626465
    },
    {
      "epoch": 0.000224139404296875,
      "model_forward_time": 0.1146383285522461,
      "step": 36723
    },
    {
      "epoch": 0.000224139404296875,
      "step": 36723,
      "training_step_time": 0.3847062587738037
    },
    {
      "epoch": 0.0002241455078125,
      "model_forward_time": 0.11778616905212402,
      "step": 36724
    },
    {
      "epoch": 0.0002241455078125,
      "step": 36724,
      "training_step_time": 0.4470992088317871
    },
    {
      "epoch": 0.000224151611328125,
      "model_forward_time": 0.11894702911376953,
      "step": 36725
    },
    {
      "epoch": 0.000224151611328125,
      "step": 36725,
      "training_step_time": 0.42946934700012207
    },
    {
      "epoch": 0.00022415771484375,
      "model_forward_time": 0.11605668067932129,
      "step": 36726
    },
    {
      "epoch": 0.00022415771484375,
      "step": 36726,
      "training_step_time": 0.38016486167907715
    },
    {
      "epoch": 0.000224163818359375,
      "model_forward_time": 0.11462664604187012,
      "step": 36727
    },
    {
      "epoch": 0.000224163818359375,
      "step": 36727,
      "training_step_time": 0.39177608489990234
    },
    {
      "epoch": 0.000224169921875,
      "model_forward_time": 0.11541414260864258,
      "step": 36728
    },
    {
      "epoch": 0.000224169921875,
      "step": 36728,
      "training_step_time": 0.39015698432922363
    },
    {
      "epoch": 0.000224176025390625,
      "model_forward_time": 0.1148066520690918,
      "step": 36729
    },
    {
      "epoch": 0.000224176025390625,
      "step": 36729,
      "training_step_time": 0.38317441940307617
    },
    {
      "epoch": 0.00022418212890625,
      "grad_norm": 0.10263240337371826,
      "learning_rate": 3.5786021808430054e-05,
      "loss": 0.0445,
      "step": 36730
    },
    {
      "epoch": 0.00022418212890625,
      "model_forward_time": 0.1151740550994873,
      "step": 36730
    },
    {
      "epoch": 0.00022418212890625,
      "step": 36730,
      "training_step_time": 0.38300085067749023
    },
    {
      "epoch": 0.000224188232421875,
      "model_forward_time": 0.11473226547241211,
      "step": 36731
    },
    {
      "epoch": 0.000224188232421875,
      "step": 36731,
      "training_step_time": 0.5568060874938965
    },
    {
      "epoch": 0.0002241943359375,
      "model_forward_time": 0.11487436294555664,
      "step": 36732
    },
    {
      "epoch": 0.0002241943359375,
      "step": 36732,
      "training_step_time": 0.40352845191955566
    },
    {
      "epoch": 0.000224200439453125,
      "model_forward_time": 0.11516737937927246,
      "step": 36733
    },
    {
      "epoch": 0.000224200439453125,
      "step": 36733,
      "training_step_time": 0.39473724365234375
    },
    {
      "epoch": 0.00022420654296875,
      "model_forward_time": 0.11523032188415527,
      "step": 36734
    },
    {
      "epoch": 0.00022420654296875,
      "step": 36734,
      "training_step_time": 0.48827481269836426
    },
    {
      "epoch": 0.000224212646484375,
      "model_forward_time": 0.11477231979370117,
      "step": 36735
    },
    {
      "epoch": 0.000224212646484375,
      "step": 36735,
      "training_step_time": 0.5096876621246338
    },
    {
      "epoch": 0.00022421875,
      "model_forward_time": 0.11454176902770996,
      "step": 36736
    },
    {
      "epoch": 0.00022421875,
      "step": 36736,
      "training_step_time": 0.3930802345275879
    },
    {
      "epoch": 0.000224224853515625,
      "model_forward_time": 0.1151285171508789,
      "step": 36737
    },
    {
      "epoch": 0.000224224853515625,
      "step": 36737,
      "training_step_time": 0.47246789932250977
    },
    {
      "epoch": 0.00022423095703125,
      "model_forward_time": 0.11588740348815918,
      "step": 36738
    },
    {
      "epoch": 0.00022423095703125,
      "step": 36738,
      "training_step_time": 0.44652509689331055
    },
    {
      "epoch": 0.000224237060546875,
      "model_forward_time": 0.1147165298461914,
      "step": 36739
    },
    {
      "epoch": 0.000224237060546875,
      "step": 36739,
      "training_step_time": 0.411440372467041
    },
    {
      "epoch": 0.0002242431640625,
      "grad_norm": 0.11271160840988159,
      "learning_rate": 3.57596031350591e-05,
      "loss": 0.0431,
      "step": 36740
    },
    {
      "epoch": 0.0002242431640625,
      "model_forward_time": 0.1148691177368164,
      "step": 36740
    },
    {
      "epoch": 0.0002242431640625,
      "step": 36740,
      "training_step_time": 0.4640345573425293
    },
    {
      "epoch": 0.000224249267578125,
      "model_forward_time": 0.11574077606201172,
      "step": 36741
    },
    {
      "epoch": 0.000224249267578125,
      "step": 36741,
      "training_step_time": 0.39485716819763184
    },
    {
      "epoch": 0.00022425537109375,
      "model_forward_time": 0.11437511444091797,
      "step": 36742
    },
    {
      "epoch": 0.00022425537109375,
      "step": 36742,
      "training_step_time": 0.3842494487762451
    },
    {
      "epoch": 0.000224261474609375,
      "model_forward_time": 0.11510920524597168,
      "step": 36743
    },
    {
      "epoch": 0.000224261474609375,
      "step": 36743,
      "training_step_time": 0.5309939384460449
    },
    {
      "epoch": 0.000224267578125,
      "model_forward_time": 0.11460018157958984,
      "step": 36744
    },
    {
      "epoch": 0.000224267578125,
      "step": 36744,
      "training_step_time": 0.3934311866760254
    },
    {
      "epoch": 0.000224273681640625,
      "model_forward_time": 0.11625361442565918,
      "step": 36745
    },
    {
      "epoch": 0.000224273681640625,
      "step": 36745,
      "training_step_time": 0.3934004306793213
    },
    {
      "epoch": 0.00022427978515625,
      "model_forward_time": 0.11472964286804199,
      "step": 36746
    },
    {
      "epoch": 0.00022427978515625,
      "step": 36746,
      "training_step_time": 0.3994932174682617
    },
    {
      "epoch": 0.000224285888671875,
      "model_forward_time": 0.11516284942626953,
      "step": 36747
    },
    {
      "epoch": 0.000224285888671875,
      "step": 36747,
      "training_step_time": 0.40651917457580566
    },
    {
      "epoch": 0.0002242919921875,
      "model_forward_time": 0.1148369312286377,
      "step": 36748
    },
    {
      "epoch": 0.0002242919921875,
      "step": 36748,
      "training_step_time": 0.49896764755249023
    },
    {
      "epoch": 0.000224298095703125,
      "model_forward_time": 0.11478590965270996,
      "step": 36749
    },
    {
      "epoch": 0.000224298095703125,
      "step": 36749,
      "training_step_time": 0.601994514465332
    },
    {
      "epoch": 0.00022430419921875,
      "grad_norm": 0.09359917044639587,
      "learning_rate": 3.5733188787544745e-05,
      "loss": 0.0495,
      "step": 36750
    },
    {
      "epoch": 0.00022430419921875,
      "model_forward_time": 0.11477327346801758,
      "step": 36750
    },
    {
      "epoch": 0.00022430419921875,
      "step": 36750,
      "training_step_time": 0.4374682903289795
    },
    {
      "epoch": 0.000224310302734375,
      "model_forward_time": 0.11526179313659668,
      "step": 36751
    },
    {
      "epoch": 0.000224310302734375,
      "step": 36751,
      "training_step_time": 0.3899526596069336
    },
    {
      "epoch": 0.00022431640625,
      "model_forward_time": 0.11467933654785156,
      "step": 36752
    },
    {
      "epoch": 0.00022431640625,
      "step": 36752,
      "training_step_time": 0.43065333366394043
    },
    {
      "epoch": 0.000224322509765625,
      "model_forward_time": 0.11458873748779297,
      "step": 36753
    },
    {
      "epoch": 0.000224322509765625,
      "step": 36753,
      "training_step_time": 0.3870563507080078
    },
    {
      "epoch": 0.00022432861328125,
      "model_forward_time": 0.11473441123962402,
      "step": 36754
    },
    {
      "epoch": 0.00022432861328125,
      "step": 36754,
      "training_step_time": 0.44367289543151855
    },
    {
      "epoch": 0.000224334716796875,
      "model_forward_time": 0.11577439308166504,
      "step": 36755
    },
    {
      "epoch": 0.000224334716796875,
      "step": 36755,
      "training_step_time": 0.626781702041626
    },
    {
      "epoch": 0.0002243408203125,
      "model_forward_time": 0.11502957344055176,
      "step": 36756
    },
    {
      "epoch": 0.0002243408203125,
      "step": 36756,
      "training_step_time": 0.3929729461669922
    },
    {
      "epoch": 0.000224346923828125,
      "model_forward_time": 0.11441230773925781,
      "step": 36757
    },
    {
      "epoch": 0.000224346923828125,
      "step": 36757,
      "training_step_time": 0.3881804943084717
    },
    {
      "epoch": 0.00022435302734375,
      "model_forward_time": 0.11539196968078613,
      "step": 36758
    },
    {
      "epoch": 0.00022435302734375,
      "step": 36758,
      "training_step_time": 0.39371776580810547
    },
    {
      "epoch": 0.000224359130859375,
      "model_forward_time": 0.11484789848327637,
      "step": 36759
    },
    {
      "epoch": 0.000224359130859375,
      "step": 36759,
      "training_step_time": 0.3998126983642578
    },
    {
      "epoch": 0.000224365234375,
      "grad_norm": 0.13578754663467407,
      "learning_rate": 3.570677877391092e-05,
      "loss": 0.0443,
      "step": 36760
    },
    {
      "epoch": 0.000224365234375,
      "model_forward_time": 0.1147148609161377,
      "step": 36760
    },
    {
      "epoch": 0.000224365234375,
      "step": 36760,
      "training_step_time": 0.39955759048461914
    },
    {
      "epoch": 0.000224371337890625,
      "model_forward_time": 0.11520028114318848,
      "step": 36761
    },
    {
      "epoch": 0.000224371337890625,
      "step": 36761,
      "training_step_time": 0.433652400970459
    },
    {
      "epoch": 0.00022437744140625,
      "model_forward_time": 0.11493730545043945,
      "step": 36762
    },
    {
      "epoch": 0.00022437744140625,
      "step": 36762,
      "training_step_time": 0.44525957107543945
    },
    {
      "epoch": 0.000224383544921875,
      "model_forward_time": 0.11572647094726562,
      "step": 36763
    },
    {
      "epoch": 0.000224383544921875,
      "step": 36763,
      "training_step_time": 0.47891974449157715
    },
    {
      "epoch": 0.0002243896484375,
      "model_forward_time": 0.11528730392456055,
      "step": 36764
    },
    {
      "epoch": 0.0002243896484375,
      "step": 36764,
      "training_step_time": 0.4435253143310547
    },
    {
      "epoch": 0.000224395751953125,
      "model_forward_time": 0.1143331527709961,
      "step": 36765
    },
    {
      "epoch": 0.000224395751953125,
      "step": 36765,
      "training_step_time": 0.3784668445587158
    },
    {
      "epoch": 0.00022440185546875,
      "model_forward_time": 0.1148991584777832,
      "step": 36766
    },
    {
      "epoch": 0.00022440185546875,
      "step": 36766,
      "training_step_time": 0.4799344539642334
    },
    {
      "epoch": 0.000224407958984375,
      "model_forward_time": 0.11529111862182617,
      "step": 36767
    },
    {
      "epoch": 0.000224407958984375,
      "step": 36767,
      "training_step_time": 0.3899526596069336
    },
    {
      "epoch": 0.0002244140625,
      "model_forward_time": 0.11498475074768066,
      "step": 36768
    },
    {
      "epoch": 0.0002244140625,
      "step": 36768,
      "training_step_time": 0.4605875015258789
    },
    {
      "epoch": 0.000224420166015625,
      "model_forward_time": 0.11522793769836426,
      "step": 36769
    },
    {
      "epoch": 0.000224420166015625,
      "step": 36769,
      "training_step_time": 0.4003622531890869
    },
    {
      "epoch": 0.00022442626953125,
      "grad_norm": 0.09631127864122391,
      "learning_rate": 3.568037310218033e-05,
      "loss": 0.0386,
      "step": 36770
    },
    {
      "epoch": 0.00022442626953125,
      "model_forward_time": 0.11528325080871582,
      "step": 36770
    },
    {
      "epoch": 0.00022442626953125,
      "step": 36770,
      "training_step_time": 0.3983898162841797
    },
    {
      "epoch": 0.000224432373046875,
      "model_forward_time": 0.11548519134521484,
      "step": 36771
    },
    {
      "epoch": 0.000224432373046875,
      "step": 36771,
      "training_step_time": 0.39633750915527344
    },
    {
      "epoch": 0.0002244384765625,
      "model_forward_time": 0.1151883602142334,
      "step": 36772
    },
    {
      "epoch": 0.0002244384765625,
      "step": 36772,
      "training_step_time": 0.40302157402038574
    },
    {
      "epoch": 0.000224444580078125,
      "model_forward_time": 0.1156158447265625,
      "step": 36773
    },
    {
      "epoch": 0.000224444580078125,
      "step": 36773,
      "training_step_time": 0.5773935317993164
    },
    {
      "epoch": 0.00022445068359375,
      "model_forward_time": 0.1156001091003418,
      "step": 36774
    },
    {
      "epoch": 0.00022445068359375,
      "step": 36774,
      "training_step_time": 0.4430882930755615
    },
    {
      "epoch": 0.000224456787109375,
      "model_forward_time": 0.11499309539794922,
      "step": 36775
    },
    {
      "epoch": 0.000224456787109375,
      "step": 36775,
      "training_step_time": 0.393662691116333
    },
    {
      "epoch": 0.000224462890625,
      "model_forward_time": 0.11492204666137695,
      "step": 36776
    },
    {
      "epoch": 0.000224462890625,
      "step": 36776,
      "training_step_time": 0.46878480911254883
    },
    {
      "epoch": 0.000224468994140625,
      "model_forward_time": 0.11480402946472168,
      "step": 36777
    },
    {
      "epoch": 0.000224468994140625,
      "step": 36777,
      "training_step_time": 0.4685862064361572
    },
    {
      "epoch": 0.00022447509765625,
      "model_forward_time": 0.11468768119812012,
      "step": 36778
    },
    {
      "epoch": 0.00022447509765625,
      "step": 36778,
      "training_step_time": 0.4068796634674072
    },
    {
      "epoch": 0.000224481201171875,
      "model_forward_time": 0.11416053771972656,
      "step": 36779
    },
    {
      "epoch": 0.000224481201171875,
      "step": 36779,
      "training_step_time": 0.38744473457336426
    },
    {
      "epoch": 0.0002244873046875,
      "grad_norm": 0.14070013165473938,
      "learning_rate": 3.5653971780374295e-05,
      "loss": 0.0423,
      "step": 36780
    },
    {
      "epoch": 0.0002244873046875,
      "model_forward_time": 0.11494612693786621,
      "step": 36780
    },
    {
      "epoch": 0.0002244873046875,
      "step": 36780,
      "training_step_time": 0.4236409664154053
    },
    {
      "epoch": 0.000224493408203125,
      "model_forward_time": 0.1151885986328125,
      "step": 36781
    },
    {
      "epoch": 0.000224493408203125,
      "step": 36781,
      "training_step_time": 0.44396281242370605
    },
    {
      "epoch": 0.00022449951171875,
      "model_forward_time": 0.11467456817626953,
      "step": 36782
    },
    {
      "epoch": 0.00022449951171875,
      "step": 36782,
      "training_step_time": 0.3801712989807129
    },
    {
      "epoch": 0.000224505615234375,
      "model_forward_time": 0.11542010307312012,
      "step": 36783
    },
    {
      "epoch": 0.000224505615234375,
      "step": 36783,
      "training_step_time": 0.3893086910247803
    },
    {
      "epoch": 0.00022451171875,
      "model_forward_time": 0.11492538452148438,
      "step": 36784
    },
    {
      "epoch": 0.00022451171875,
      "step": 36784,
      "training_step_time": 0.4011807441711426
    },
    {
      "epoch": 0.000224517822265625,
      "model_forward_time": 0.11529850959777832,
      "step": 36785
    },
    {
      "epoch": 0.000224517822265625,
      "step": 36785,
      "training_step_time": 0.5588083267211914
    },
    {
      "epoch": 0.00022452392578125,
      "model_forward_time": 0.11519408226013184,
      "step": 36786
    },
    {
      "epoch": 0.00022452392578125,
      "step": 36786,
      "training_step_time": 0.395230770111084
    },
    {
      "epoch": 0.000224530029296875,
      "model_forward_time": 0.11539864540100098,
      "step": 36787
    },
    {
      "epoch": 0.000224530029296875,
      "step": 36787,
      "training_step_time": 0.40847039222717285
    },
    {
      "epoch": 0.0002245361328125,
      "model_forward_time": 0.11497879028320312,
      "step": 36788
    },
    {
      "epoch": 0.0002245361328125,
      "step": 36788,
      "training_step_time": 0.39356374740600586
    },
    {
      "epoch": 0.000224542236328125,
      "model_forward_time": 0.11533617973327637,
      "step": 36789
    },
    {
      "epoch": 0.000224542236328125,
      "step": 36789,
      "training_step_time": 0.36806583404541016
    },
    {
      "epoch": 0.00022454833984375,
      "grad_norm": 0.12350904941558838,
      "learning_rate": 3.5627574816512846e-05,
      "loss": 0.0423,
      "step": 36790
    },
    {
      "epoch": 0.00022454833984375,
      "model_forward_time": 0.1154775619506836,
      "step": 36790
    },
    {
      "epoch": 0.00022454833984375,
      "step": 36790,
      "training_step_time": 0.4094357490539551
    },
    {
      "epoch": 0.000224554443359375,
      "model_forward_time": 0.11793351173400879,
      "step": 36791
    },
    {
      "epoch": 0.000224554443359375,
      "step": 36791,
      "training_step_time": 0.5439321994781494
    },
    {
      "epoch": 0.000224560546875,
      "model_forward_time": 0.11812448501586914,
      "step": 36792
    },
    {
      "epoch": 0.000224560546875,
      "step": 36792,
      "training_step_time": 0.4734787940979004
    },
    {
      "epoch": 0.000224566650390625,
      "model_forward_time": 0.11565589904785156,
      "step": 36793
    },
    {
      "epoch": 0.000224566650390625,
      "step": 36793,
      "training_step_time": 0.3854403495788574
    },
    {
      "epoch": 0.00022457275390625,
      "model_forward_time": 0.11585617065429688,
      "step": 36794
    },
    {
      "epoch": 0.00022457275390625,
      "step": 36794,
      "training_step_time": 0.4991588592529297
    },
    {
      "epoch": 0.000224578857421875,
      "model_forward_time": 0.11486148834228516,
      "step": 36795
    },
    {
      "epoch": 0.000224578857421875,
      "step": 36795,
      "training_step_time": 0.4006657600402832
    },
    {
      "epoch": 0.0002245849609375,
      "model_forward_time": 0.11447739601135254,
      "step": 36796
    },
    {
      "epoch": 0.0002245849609375,
      "step": 36796,
      "training_step_time": 0.4350001811981201
    },
    {
      "epoch": 0.000224591064453125,
      "model_forward_time": 0.11465740203857422,
      "step": 36797
    },
    {
      "epoch": 0.000224591064453125,
      "step": 36797,
      "training_step_time": 0.3817870616912842
    },
    {
      "epoch": 0.00022459716796875,
      "model_forward_time": 0.11497020721435547,
      "step": 36798
    },
    {
      "epoch": 0.00022459716796875,
      "step": 36798,
      "training_step_time": 0.3897373676300049
    },
    {
      "epoch": 0.000224603271484375,
      "model_forward_time": 0.11519265174865723,
      "step": 36799
    },
    {
      "epoch": 0.000224603271484375,
      "step": 36799,
      "training_step_time": 0.3981359004974365
    },
    {
      "epoch": 0.000224609375,
      "grad_norm": 0.1368856430053711,
      "learning_rate": 3.56011822186147e-05,
      "loss": 0.0458,
      "step": 36800
    },
    {
      "epoch": 0.000224609375,
      "model_forward_time": 0.11578059196472168,
      "step": 36800
    },
    {
      "epoch": 0.000224609375,
      "step": 36800,
      "training_step_time": 0.3969850540161133
    },
    {
      "epoch": 0.000224615478515625,
      "model_forward_time": 0.11554074287414551,
      "step": 36801
    },
    {
      "epoch": 0.000224615478515625,
      "step": 36801,
      "training_step_time": 0.40341925621032715
    },
    {
      "epoch": 0.00022462158203125,
      "model_forward_time": 0.11499714851379395,
      "step": 36802
    },
    {
      "epoch": 0.00022462158203125,
      "step": 36802,
      "training_step_time": 0.3913533687591553
    },
    {
      "epoch": 0.000224627685546875,
      "model_forward_time": 0.11539959907531738,
      "step": 36803
    },
    {
      "epoch": 0.000224627685546875,
      "step": 36803,
      "training_step_time": 0.45345425605773926
    },
    {
      "epoch": 0.0002246337890625,
      "model_forward_time": 0.11659002304077148,
      "step": 36804
    },
    {
      "epoch": 0.0002246337890625,
      "step": 36804,
      "training_step_time": 0.3953726291656494
    },
    {
      "epoch": 0.000224639892578125,
      "model_forward_time": 0.11546730995178223,
      "step": 36805
    },
    {
      "epoch": 0.000224639892578125,
      "step": 36805,
      "training_step_time": 0.5065453052520752
    },
    {
      "epoch": 0.00022464599609375,
      "model_forward_time": 0.11525511741638184,
      "step": 36806
    },
    {
      "epoch": 0.00022464599609375,
      "step": 36806,
      "training_step_time": 0.4590799808502197
    },
    {
      "epoch": 0.000224652099609375,
      "model_forward_time": 0.11764240264892578,
      "step": 36807
    },
    {
      "epoch": 0.000224652099609375,
      "step": 36807,
      "training_step_time": 0.3889443874359131
    },
    {
      "epoch": 0.000224658203125,
      "model_forward_time": 0.1145486831665039,
      "step": 36808
    },
    {
      "epoch": 0.000224658203125,
      "step": 36808,
      "training_step_time": 0.4201943874359131
    },
    {
      "epoch": 0.000224664306640625,
      "model_forward_time": 0.11574292182922363,
      "step": 36809
    },
    {
      "epoch": 0.000224664306640625,
      "step": 36809,
      "training_step_time": 0.5946803092956543
    },
    {
      "epoch": 0.00022467041015625,
      "grad_norm": 0.1408396065235138,
      "learning_rate": 3.557479399469721e-05,
      "loss": 0.0469,
      "step": 36810
    },
    {
      "epoch": 0.00022467041015625,
      "model_forward_time": 0.11462640762329102,
      "step": 36810
    },
    {
      "epoch": 0.00022467041015625,
      "step": 36810,
      "training_step_time": 0.42911744117736816
    },
    {
      "epoch": 0.000224676513671875,
      "model_forward_time": 0.11532211303710938,
      "step": 36811
    },
    {
      "epoch": 0.000224676513671875,
      "step": 36811,
      "training_step_time": 0.38921046257019043
    },
    {
      "epoch": 0.0002246826171875,
      "model_forward_time": 0.11474227905273438,
      "step": 36812
    },
    {
      "epoch": 0.0002246826171875,
      "step": 36812,
      "training_step_time": 0.3870229721069336
    },
    {
      "epoch": 0.000224688720703125,
      "model_forward_time": 0.11642837524414062,
      "step": 36813
    },
    {
      "epoch": 0.000224688720703125,
      "step": 36813,
      "training_step_time": 0.38350367546081543
    },
    {
      "epoch": 0.00022469482421875,
      "model_forward_time": 0.11491537094116211,
      "step": 36814
    },
    {
      "epoch": 0.00022469482421875,
      "step": 36814,
      "training_step_time": 0.3958709239959717
    },
    {
      "epoch": 0.000224700927734375,
      "model_forward_time": 0.11537456512451172,
      "step": 36815
    },
    {
      "epoch": 0.000224700927734375,
      "step": 36815,
      "training_step_time": 0.6208536624908447
    },
    {
      "epoch": 0.00022470703125,
      "model_forward_time": 0.11575675010681152,
      "step": 36816
    },
    {
      "epoch": 0.00022470703125,
      "step": 36816,
      "training_step_time": 0.394855260848999
    },
    {
      "epoch": 0.000224713134765625,
      "model_forward_time": 0.11533474922180176,
      "step": 36817
    },
    {
      "epoch": 0.000224713134765625,
      "step": 36817,
      "training_step_time": 0.3716273307800293
    },
    {
      "epoch": 0.00022471923828125,
      "model_forward_time": 0.11550593376159668,
      "step": 36818
    },
    {
      "epoch": 0.00022471923828125,
      "step": 36818,
      "training_step_time": 0.40696001052856445
    },
    {
      "epoch": 0.000224725341796875,
      "model_forward_time": 0.11607861518859863,
      "step": 36819
    },
    {
      "epoch": 0.000224725341796875,
      "step": 36819,
      "training_step_time": 0.4912099838256836
    },
    {
      "epoch": 0.0002247314453125,
      "grad_norm": 0.10900082439184189,
      "learning_rate": 3.554841015277641e-05,
      "loss": 0.043,
      "step": 36820
    },
    {
      "epoch": 0.0002247314453125,
      "model_forward_time": 0.11483931541442871,
      "step": 36820
    },
    {
      "epoch": 0.0002247314453125,
      "step": 36820,
      "training_step_time": 0.42186617851257324
    },
    {
      "epoch": 0.000224737548828125,
      "model_forward_time": 0.11556291580200195,
      "step": 36821
    },
    {
      "epoch": 0.000224737548828125,
      "step": 36821,
      "training_step_time": 0.5287935733795166
    },
    {
      "epoch": 0.00022474365234375,
      "model_forward_time": 0.11496996879577637,
      "step": 36822
    },
    {
      "epoch": 0.00022474365234375,
      "step": 36822,
      "training_step_time": 0.45352816581726074
    },
    {
      "epoch": 0.000224749755859375,
      "model_forward_time": 0.11463189125061035,
      "step": 36823
    },
    {
      "epoch": 0.000224749755859375,
      "step": 36823,
      "training_step_time": 0.38910579681396484
    },
    {
      "epoch": 0.000224755859375,
      "model_forward_time": 0.11500883102416992,
      "step": 36824
    },
    {
      "epoch": 0.000224755859375,
      "step": 36824,
      "training_step_time": 0.40930914878845215
    },
    {
      "epoch": 0.000224761962890625,
      "model_forward_time": 0.11451363563537598,
      "step": 36825
    },
    {
      "epoch": 0.000224761962890625,
      "step": 36825,
      "training_step_time": 0.388427734375
    },
    {
      "epoch": 0.00022476806640625,
      "model_forward_time": 0.11503887176513672,
      "step": 36826
    },
    {
      "epoch": 0.00022476806640625,
      "step": 36826,
      "training_step_time": 0.3887009620666504
    },
    {
      "epoch": 0.000224774169921875,
      "model_forward_time": 0.11526060104370117,
      "step": 36827
    },
    {
      "epoch": 0.000224774169921875,
      "step": 36827,
      "training_step_time": 0.5706984996795654
    },
    {
      "epoch": 0.0002247802734375,
      "model_forward_time": 0.11511468887329102,
      "step": 36828
    },
    {
      "epoch": 0.0002247802734375,
      "step": 36828,
      "training_step_time": 0.3904457092285156
    },
    {
      "epoch": 0.000224786376953125,
      "model_forward_time": 0.11588668823242188,
      "step": 36829
    },
    {
      "epoch": 0.000224786376953125,
      "step": 36829,
      "training_step_time": 0.37513256072998047
    },
    {
      "epoch": 0.00022479248046875,
      "grad_norm": 0.09987697750329971,
      "learning_rate": 3.552203070086707e-05,
      "loss": 0.0384,
      "step": 36830
    },
    {
      "epoch": 0.00022479248046875,
      "model_forward_time": 0.11552572250366211,
      "step": 36830
    },
    {
      "epoch": 0.00022479248046875,
      "step": 36830,
      "training_step_time": 0.40491795539855957
    },
    {
      "epoch": 0.000224798583984375,
      "model_forward_time": 0.11507844924926758,
      "step": 36831
    },
    {
      "epoch": 0.000224798583984375,
      "step": 36831,
      "training_step_time": 0.3941028118133545
    },
    {
      "epoch": 0.0002248046875,
      "model_forward_time": 0.11510157585144043,
      "step": 36832
    },
    {
      "epoch": 0.0002248046875,
      "step": 36832,
      "training_step_time": 0.3922841548919678
    },
    {
      "epoch": 0.000224810791015625,
      "model_forward_time": 0.11485505104064941,
      "step": 36833
    },
    {
      "epoch": 0.000224810791015625,
      "step": 36833,
      "training_step_time": 0.6056795120239258
    },
    {
      "epoch": 0.00022481689453125,
      "model_forward_time": 0.11487507820129395,
      "step": 36834
    },
    {
      "epoch": 0.00022481689453125,
      "step": 36834,
      "training_step_time": 0.45892930030822754
    },
    {
      "epoch": 0.000224822998046875,
      "model_forward_time": 0.11482596397399902,
      "step": 36835
    },
    {
      "epoch": 0.000224822998046875,
      "step": 36835,
      "training_step_time": 0.3711667060852051
    },
    {
      "epoch": 0.0002248291015625,
      "model_forward_time": 0.11495804786682129,
      "step": 36836
    },
    {
      "epoch": 0.0002248291015625,
      "step": 36836,
      "training_step_time": 0.5040502548217773
    },
    {
      "epoch": 0.000224835205078125,
      "model_forward_time": 0.11593389511108398,
      "step": 36837
    },
    {
      "epoch": 0.000224835205078125,
      "step": 36837,
      "training_step_time": 0.4720492362976074
    },
    {
      "epoch": 0.00022484130859375,
      "model_forward_time": 0.11397385597229004,
      "step": 36838
    },
    {
      "epoch": 0.00022484130859375,
      "step": 36838,
      "training_step_time": 0.390988826751709
    },
    {
      "epoch": 0.000224847412109375,
      "model_forward_time": 0.11487746238708496,
      "step": 36839
    },
    {
      "epoch": 0.000224847412109375,
      "step": 36839,
      "training_step_time": 0.4267408847808838
    },
    {
      "epoch": 0.000224853515625,
      "grad_norm": 0.10835663229227066,
      "learning_rate": 3.5495655646982505e-05,
      "loss": 0.0431,
      "step": 36840
    },
    {
      "epoch": 0.000224853515625,
      "model_forward_time": 0.11495566368103027,
      "step": 36840
    },
    {
      "epoch": 0.000224853515625,
      "step": 36840,
      "training_step_time": 0.4405035972595215
    },
    {
      "epoch": 0.000224859619140625,
      "model_forward_time": 0.11577701568603516,
      "step": 36841
    },
    {
      "epoch": 0.000224859619140625,
      "step": 36841,
      "training_step_time": 0.3742074966430664
    },
    {
      "epoch": 0.00022486572265625,
      "model_forward_time": 0.11537432670593262,
      "step": 36842
    },
    {
      "epoch": 0.00022486572265625,
      "step": 36842,
      "training_step_time": 0.38745999336242676
    },
    {
      "epoch": 0.000224871826171875,
      "model_forward_time": 0.11508607864379883,
      "step": 36843
    },
    {
      "epoch": 0.000224871826171875,
      "step": 36843,
      "training_step_time": 0.39819979667663574
    },
    {
      "epoch": 0.0002248779296875,
      "model_forward_time": 0.11597013473510742,
      "step": 36844
    },
    {
      "epoch": 0.0002248779296875,
      "step": 36844,
      "training_step_time": 0.38747310638427734
    },
    {
      "epoch": 0.000224884033203125,
      "model_forward_time": 0.11498141288757324,
      "step": 36845
    },
    {
      "epoch": 0.000224884033203125,
      "step": 36845,
      "training_step_time": 0.45658063888549805
    },
    {
      "epoch": 0.00022489013671875,
      "model_forward_time": 0.11492753028869629,
      "step": 36846
    },
    {
      "epoch": 0.00022489013671875,
      "step": 36846,
      "training_step_time": 0.36815452575683594
    },
    {
      "epoch": 0.000224896240234375,
      "model_forward_time": 0.1149129867553711,
      "step": 36847
    },
    {
      "epoch": 0.000224896240234375,
      "step": 36847,
      "training_step_time": 0.4065248966217041
    },
    {
      "epoch": 0.00022490234375,
      "model_forward_time": 0.11547136306762695,
      "step": 36848
    },
    {
      "epoch": 0.00022490234375,
      "step": 36848,
      "training_step_time": 0.4560701847076416
    },
    {
      "epoch": 0.000224908447265625,
      "model_forward_time": 0.11487412452697754,
      "step": 36849
    },
    {
      "epoch": 0.000224908447265625,
      "step": 36849,
      "training_step_time": 0.4111599922180176
    },
    {
      "epoch": 0.00022491455078125,
      "grad_norm": 0.22040320932865143,
      "learning_rate": 3.5469284999134815e-05,
      "loss": 0.0462,
      "step": 36850
    },
    {
      "epoch": 0.00022491455078125,
      "model_forward_time": 0.1170816421508789,
      "step": 36850
    },
    {
      "epoch": 0.00022491455078125,
      "step": 36850,
      "training_step_time": 0.3990764617919922
    },
    {
      "epoch": 0.000224920654296875,
      "model_forward_time": 0.11767148971557617,
      "step": 36851
    },
    {
      "epoch": 0.000224920654296875,
      "step": 36851,
      "training_step_time": 0.49251461029052734
    },
    {
      "epoch": 0.0002249267578125,
      "model_forward_time": 0.11807894706726074,
      "step": 36852
    },
    {
      "epoch": 0.0002249267578125,
      "step": 36852,
      "training_step_time": 0.4196929931640625
    },
    {
      "epoch": 0.000224932861328125,
      "model_forward_time": 0.11925053596496582,
      "step": 36853
    },
    {
      "epoch": 0.000224932861328125,
      "step": 36853,
      "training_step_time": 0.40309762954711914
    },
    {
      "epoch": 0.00022493896484375,
      "model_forward_time": 0.11667490005493164,
      "step": 36854
    },
    {
      "epoch": 0.00022493896484375,
      "step": 36854,
      "training_step_time": 0.4351532459259033
    },
    {
      "epoch": 0.000224945068359375,
      "model_forward_time": 0.11495184898376465,
      "step": 36855
    },
    {
      "epoch": 0.000224945068359375,
      "step": 36855,
      "training_step_time": 0.3811335563659668
    },
    {
      "epoch": 0.000224951171875,
      "model_forward_time": 0.1146247386932373,
      "step": 36856
    },
    {
      "epoch": 0.000224951171875,
      "step": 36856,
      "training_step_time": 0.3833956718444824
    },
    {
      "epoch": 0.000224957275390625,
      "model_forward_time": 0.11530375480651855,
      "step": 36857
    },
    {
      "epoch": 0.000224957275390625,
      "step": 36857,
      "training_step_time": 0.5116581916809082
    },
    {
      "epoch": 0.00022496337890625,
      "model_forward_time": 0.11523175239562988,
      "step": 36858
    },
    {
      "epoch": 0.00022496337890625,
      "step": 36858,
      "training_step_time": 0.3858642578125
    },
    {
      "epoch": 0.000224969482421875,
      "model_forward_time": 0.11490273475646973,
      "step": 36859
    },
    {
      "epoch": 0.000224969482421875,
      "step": 36859,
      "training_step_time": 0.3730318546295166
    },
    {
      "epoch": 0.0002249755859375,
      "grad_norm": 0.1941739171743393,
      "learning_rate": 3.544291876533466e-05,
      "loss": 0.0418,
      "step": 36860
    },
    {
      "epoch": 0.0002249755859375,
      "model_forward_time": 0.11508703231811523,
      "step": 36860
    },
    {
      "epoch": 0.0002249755859375,
      "step": 36860,
      "training_step_time": 0.3853030204772949
    },
    {
      "epoch": 0.000224981689453125,
      "model_forward_time": 0.11591982841491699,
      "step": 36861
    },
    {
      "epoch": 0.000224981689453125,
      "step": 36861,
      "training_step_time": 0.4209909439086914
    },
    {
      "epoch": 0.00022498779296875,
      "model_forward_time": 0.1151881217956543,
      "step": 36862
    },
    {
      "epoch": 0.00022498779296875,
      "step": 36862,
      "training_step_time": 0.5015473365783691
    },
    {
      "epoch": 0.000224993896484375,
      "model_forward_time": 0.11526250839233398,
      "step": 36863
    },
    {
      "epoch": 0.000224993896484375,
      "step": 36863,
      "training_step_time": 0.4950382709503174
    },
    {
      "epoch": 0.000225,
      "model_forward_time": 0.1163487434387207,
      "step": 36864
    },
    {
      "epoch": 0.000225,
      "step": 36864,
      "training_step_time": 0.39998960494995117
    },
    {
      "epoch": 0.000225006103515625,
      "model_forward_time": 0.11416506767272949,
      "step": 36865
    },
    {
      "epoch": 0.000225006103515625,
      "step": 36865,
      "training_step_time": 0.3977210521697998
    },
    {
      "epoch": 0.00022501220703125,
      "model_forward_time": 0.11463212966918945,
      "step": 36866
    },
    {
      "epoch": 0.00022501220703125,
      "step": 36866,
      "training_step_time": 0.4861025810241699
    },
    {
      "epoch": 0.000225018310546875,
      "model_forward_time": 0.11471343040466309,
      "step": 36867
    },
    {
      "epoch": 0.000225018310546875,
      "step": 36867,
      "training_step_time": 0.4506344795227051
    },
    {
      "epoch": 0.0002250244140625,
      "model_forward_time": 0.11470222473144531,
      "step": 36868
    },
    {
      "epoch": 0.0002250244140625,
      "step": 36868,
      "training_step_time": 0.4030873775482178
    },
    {
      "epoch": 0.000225030517578125,
      "model_forward_time": 0.11532044410705566,
      "step": 36869
    },
    {
      "epoch": 0.000225030517578125,
      "step": 36869,
      "training_step_time": 0.4751291275024414
    },
    {
      "epoch": 0.00022503662109375,
      "grad_norm": 0.17612852156162262,
      "learning_rate": 3.541655695359142e-05,
      "loss": 0.0354,
      "step": 36870
    },
    {
      "epoch": 0.00022503662109375,
      "model_forward_time": 0.11468768119812012,
      "step": 36870
    },
    {
      "epoch": 0.00022503662109375,
      "step": 36870,
      "training_step_time": 0.4057352542877197
    },
    {
      "epoch": 0.000225042724609375,
      "model_forward_time": 0.11481976509094238,
      "step": 36871
    },
    {
      "epoch": 0.000225042724609375,
      "step": 36871,
      "training_step_time": 0.3733673095703125
    },
    {
      "epoch": 0.000225048828125,
      "model_forward_time": 0.11534619331359863,
      "step": 36872
    },
    {
      "epoch": 0.000225048828125,
      "step": 36872,
      "training_step_time": 0.40270209312438965
    },
    {
      "epoch": 0.000225054931640625,
      "model_forward_time": 0.11452770233154297,
      "step": 36873
    },
    {
      "epoch": 0.000225054931640625,
      "step": 36873,
      "training_step_time": 0.3883190155029297
    },
    {
      "epoch": 0.00022506103515625,
      "model_forward_time": 0.11574602127075195,
      "step": 36874
    },
    {
      "epoch": 0.00022506103515625,
      "step": 36874,
      "training_step_time": 0.3854038715362549
    },
    {
      "epoch": 0.000225067138671875,
      "model_forward_time": 0.11662435531616211,
      "step": 36875
    },
    {
      "epoch": 0.000225067138671875,
      "step": 36875,
      "training_step_time": 0.62196946144104
    },
    {
      "epoch": 0.0002250732421875,
      "model_forward_time": 0.11528658866882324,
      "step": 36876
    },
    {
      "epoch": 0.0002250732421875,
      "step": 36876,
      "training_step_time": 0.4800527095794678
    },
    {
      "epoch": 0.000225079345703125,
      "model_forward_time": 0.1150672435760498,
      "step": 36877
    },
    {
      "epoch": 0.000225079345703125,
      "step": 36877,
      "training_step_time": 0.4679291248321533
    },
    {
      "epoch": 0.00022508544921875,
      "model_forward_time": 0.11499166488647461,
      "step": 36878
    },
    {
      "epoch": 0.00022508544921875,
      "step": 36878,
      "training_step_time": 0.45188212394714355
    },
    {
      "epoch": 0.000225091552734375,
      "model_forward_time": 0.11466479301452637,
      "step": 36879
    },
    {
      "epoch": 0.000225091552734375,
      "step": 36879,
      "training_step_time": 0.47235989570617676
    },
    {
      "epoch": 0.00022509765625,
      "grad_norm": 0.10520564764738083,
      "learning_rate": 3.539019957191315e-05,
      "loss": 0.0412,
      "step": 36880
    },
    {
      "epoch": 0.00022509765625,
      "model_forward_time": 0.11396384239196777,
      "step": 36880
    },
    {
      "epoch": 0.00022509765625,
      "step": 36880,
      "training_step_time": 0.4442880153656006
    },
    {
      "epoch": 0.000225103759765625,
      "model_forward_time": 0.11449742317199707,
      "step": 36881
    },
    {
      "epoch": 0.000225103759765625,
      "step": 36881,
      "training_step_time": 0.3906259536743164
    },
    {
      "epoch": 0.00022510986328125,
      "model_forward_time": 0.11618638038635254,
      "step": 36882
    },
    {
      "epoch": 0.00022510986328125,
      "step": 36882,
      "training_step_time": 0.39498138427734375
    },
    {
      "epoch": 0.000225115966796875,
      "model_forward_time": 0.1147007942199707,
      "step": 36883
    },
    {
      "epoch": 0.000225115966796875,
      "step": 36883,
      "training_step_time": 0.37290477752685547
    },
    {
      "epoch": 0.0002251220703125,
      "model_forward_time": 0.11460041999816895,
      "step": 36884
    },
    {
      "epoch": 0.0002251220703125,
      "step": 36884,
      "training_step_time": 0.3968937397003174
    },
    {
      "epoch": 0.000225128173828125,
      "model_forward_time": 0.11515522003173828,
      "step": 36885
    },
    {
      "epoch": 0.000225128173828125,
      "step": 36885,
      "training_step_time": 0.40249156951904297
    },
    {
      "epoch": 0.00022513427734375,
      "model_forward_time": 0.11413860321044922,
      "step": 36886
    },
    {
      "epoch": 0.00022513427734375,
      "step": 36886,
      "training_step_time": 0.38837504386901855
    },
    {
      "epoch": 0.000225140380859375,
      "model_forward_time": 0.11449074745178223,
      "step": 36887
    },
    {
      "epoch": 0.000225140380859375,
      "step": 36887,
      "training_step_time": 0.6425347328186035
    },
    {
      "epoch": 0.000225146484375,
      "model_forward_time": 0.11470437049865723,
      "step": 36888
    },
    {
      "epoch": 0.000225146484375,
      "step": 36888,
      "training_step_time": 0.3937227725982666
    },
    {
      "epoch": 0.000225152587890625,
      "model_forward_time": 0.11455440521240234,
      "step": 36889
    },
    {
      "epoch": 0.000225152587890625,
      "step": 36889,
      "training_step_time": 0.49494361877441406
    },
    {
      "epoch": 0.00022515869140625,
      "grad_norm": 0.18577426671981812,
      "learning_rate": 3.536384662830648e-05,
      "loss": 0.0428,
      "step": 36890
    },
    {
      "epoch": 0.00022515869140625,
      "model_forward_time": 0.11498165130615234,
      "step": 36890
    },
    {
      "epoch": 0.00022515869140625,
      "step": 36890,
      "training_step_time": 0.49149441719055176
    },
    {
      "epoch": 0.000225164794921875,
      "model_forward_time": 0.11446571350097656,
      "step": 36891
    },
    {
      "epoch": 0.000225164794921875,
      "step": 36891,
      "training_step_time": 0.4406452178955078
    },
    {
      "epoch": 0.0002251708984375,
      "model_forward_time": 0.11451959609985352,
      "step": 36892
    },
    {
      "epoch": 0.0002251708984375,
      "step": 36892,
      "training_step_time": 0.43917369842529297
    },
    {
      "epoch": 0.000225177001953125,
      "model_forward_time": 0.11452102661132812,
      "step": 36893
    },
    {
      "epoch": 0.000225177001953125,
      "step": 36893,
      "training_step_time": 0.4117560386657715
    },
    {
      "epoch": 0.00022518310546875,
      "model_forward_time": 0.11491131782531738,
      "step": 36894
    },
    {
      "epoch": 0.00022518310546875,
      "step": 36894,
      "training_step_time": 0.4101834297180176
    },
    {
      "epoch": 0.000225189208984375,
      "model_forward_time": 0.11476588249206543,
      "step": 36895
    },
    {
      "epoch": 0.000225189208984375,
      "step": 36895,
      "training_step_time": 0.3790321350097656
    },
    {
      "epoch": 0.0002251953125,
      "model_forward_time": 0.11495399475097656,
      "step": 36896
    },
    {
      "epoch": 0.0002251953125,
      "step": 36896,
      "training_step_time": 0.3998391628265381
    },
    {
      "epoch": 0.000225201416015625,
      "model_forward_time": 0.11537551879882812,
      "step": 36897
    },
    {
      "epoch": 0.000225201416015625,
      "step": 36897,
      "training_step_time": 0.3928539752960205
    },
    {
      "epoch": 0.00022520751953125,
      "model_forward_time": 0.1151278018951416,
      "step": 36898
    },
    {
      "epoch": 0.00022520751953125,
      "step": 36898,
      "training_step_time": 0.39197516441345215
    },
    {
      "epoch": 0.000225213623046875,
      "model_forward_time": 0.1146535873413086,
      "step": 36899
    },
    {
      "epoch": 0.000225213623046875,
      "step": 36899,
      "training_step_time": 0.48189401626586914
    },
    {
      "epoch": 0.0002252197265625,
      "grad_norm": 0.12164852023124695,
      "learning_rate": 3.533749813077677e-05,
      "loss": 0.0383,
      "step": 36900
    },
    {
      "epoch": 0.0002252197265625,
      "model_forward_time": 0.11521410942077637,
      "step": 36900
    },
    {
      "epoch": 0.0002252197265625,
      "step": 36900,
      "training_step_time": 0.3953564167022705
    },
    {
      "epoch": 0.000225225830078125,
      "model_forward_time": 0.11556267738342285,
      "step": 36901
    },
    {
      "epoch": 0.000225225830078125,
      "step": 36901,
      "training_step_time": 0.3684425354003906
    },
    {
      "epoch": 0.00022523193359375,
      "model_forward_time": 0.11508035659790039,
      "step": 36902
    },
    {
      "epoch": 0.00022523193359375,
      "step": 36902,
      "training_step_time": 0.39440155029296875
    },
    {
      "epoch": 0.000225238037109375,
      "model_forward_time": 0.1157982349395752,
      "step": 36903
    },
    {
      "epoch": 0.000225238037109375,
      "step": 36903,
      "training_step_time": 0.42865729331970215
    },
    {
      "epoch": 0.000225244140625,
      "model_forward_time": 0.1152195930480957,
      "step": 36904
    },
    {
      "epoch": 0.000225244140625,
      "step": 36904,
      "training_step_time": 0.45946574211120605
    },
    {
      "epoch": 0.000225250244140625,
      "model_forward_time": 0.11564230918884277,
      "step": 36905
    },
    {
      "epoch": 0.000225250244140625,
      "step": 36905,
      "training_step_time": 0.5052263736724854
    },
    {
      "epoch": 0.00022525634765625,
      "model_forward_time": 0.11460375785827637,
      "step": 36906
    },
    {
      "epoch": 0.00022525634765625,
      "step": 36906,
      "training_step_time": 0.4674410820007324
    },
    {
      "epoch": 0.000225262451171875,
      "model_forward_time": 0.1146397590637207,
      "step": 36907
    },
    {
      "epoch": 0.000225262451171875,
      "step": 36907,
      "training_step_time": 0.4669528007507324
    },
    {
      "epoch": 0.0002252685546875,
      "model_forward_time": 0.11518573760986328,
      "step": 36908
    },
    {
      "epoch": 0.0002252685546875,
      "step": 36908,
      "training_step_time": 0.399655818939209
    },
    {
      "epoch": 0.000225274658203125,
      "model_forward_time": 0.11443471908569336,
      "step": 36909
    },
    {
      "epoch": 0.000225274658203125,
      "step": 36909,
      "training_step_time": 0.3991987705230713
    },
    {
      "epoch": 0.00022528076171875,
      "grad_norm": 0.10368771851062775,
      "learning_rate": 3.5311154087327975e-05,
      "loss": 0.0417,
      "step": 36910
    },
    {
      "epoch": 0.00022528076171875,
      "model_forward_time": 0.11520028114318848,
      "step": 36910
    },
    {
      "epoch": 0.00022528076171875,
      "step": 36910,
      "training_step_time": 0.39077305793762207
    },
    {
      "epoch": 0.000225286865234375,
      "model_forward_time": 0.11446881294250488,
      "step": 36911
    },
    {
      "epoch": 0.000225286865234375,
      "step": 36911,
      "training_step_time": 0.4688723087310791
    },
    {
      "epoch": 0.00022529296875,
      "model_forward_time": 0.11501193046569824,
      "step": 36912
    },
    {
      "epoch": 0.00022529296875,
      "step": 36912,
      "training_step_time": 0.3974125385284424
    },
    {
      "epoch": 0.000225299072265625,
      "model_forward_time": 0.11459064483642578,
      "step": 36913
    },
    {
      "epoch": 0.000225299072265625,
      "step": 36913,
      "training_step_time": 0.3758730888366699
    },
    {
      "epoch": 0.00022530517578125,
      "model_forward_time": 0.11483240127563477,
      "step": 36914
    },
    {
      "epoch": 0.00022530517578125,
      "step": 36914,
      "training_step_time": 0.3920903205871582
    },
    {
      "epoch": 0.000225311279296875,
      "model_forward_time": 0.11574840545654297,
      "step": 36915
    },
    {
      "epoch": 0.000225311279296875,
      "step": 36915,
      "training_step_time": 0.3948485851287842
    },
    {
      "epoch": 0.0002253173828125,
      "model_forward_time": 0.11478495597839355,
      "step": 36916
    },
    {
      "epoch": 0.0002253173828125,
      "step": 36916,
      "training_step_time": 0.39333105087280273
    },
    {
      "epoch": 0.000225323486328125,
      "model_forward_time": 0.1168670654296875,
      "step": 36917
    },
    {
      "epoch": 0.000225323486328125,
      "step": 36917,
      "training_step_time": 0.5768411159515381
    },
    {
      "epoch": 0.00022532958984375,
      "model_forward_time": 0.11547708511352539,
      "step": 36918
    },
    {
      "epoch": 0.00022532958984375,
      "step": 36918,
      "training_step_time": 0.4953606128692627
    },
    {
      "epoch": 0.000225335693359375,
      "model_forward_time": 0.11521291732788086,
      "step": 36919
    },
    {
      "epoch": 0.000225335693359375,
      "step": 36919,
      "training_step_time": 0.41667819023132324
    },
    {
      "epoch": 0.000225341796875,
      "grad_norm": 0.10267892479896545,
      "learning_rate": 3.528481450596274e-05,
      "loss": 0.0436,
      "step": 36920
    },
    {
      "epoch": 0.000225341796875,
      "model_forward_time": 0.11519908905029297,
      "step": 36920
    },
    {
      "epoch": 0.000225341796875,
      "step": 36920,
      "training_step_time": 0.41928791999816895
    },
    {
      "epoch": 0.000225347900390625,
      "model_forward_time": 0.11512303352355957,
      "step": 36921
    },
    {
      "epoch": 0.000225347900390625,
      "step": 36921,
      "training_step_time": 0.36826562881469727
    },
    {
      "epoch": 0.00022535400390625,
      "model_forward_time": 0.11451530456542969,
      "step": 36922
    },
    {
      "epoch": 0.00022535400390625,
      "step": 36922,
      "training_step_time": 0.4126138687133789
    },
    {
      "epoch": 0.000225360107421875,
      "model_forward_time": 0.11463189125061035,
      "step": 36923
    },
    {
      "epoch": 0.000225360107421875,
      "step": 36923,
      "training_step_time": 0.38631224632263184
    },
    {
      "epoch": 0.0002253662109375,
      "model_forward_time": 0.11525940895080566,
      "step": 36924
    },
    {
      "epoch": 0.0002253662109375,
      "step": 36924,
      "training_step_time": 0.39403390884399414
    },
    {
      "epoch": 0.000225372314453125,
      "model_forward_time": 0.11683344841003418,
      "step": 36925
    },
    {
      "epoch": 0.000225372314453125,
      "step": 36925,
      "training_step_time": 0.37447071075439453
    },
    {
      "epoch": 0.00022537841796875,
      "model_forward_time": 0.11492633819580078,
      "step": 36926
    },
    {
      "epoch": 0.00022537841796875,
      "step": 36926,
      "training_step_time": 0.38593339920043945
    },
    {
      "epoch": 0.000225384521484375,
      "model_forward_time": 0.11503720283508301,
      "step": 36927
    },
    {
      "epoch": 0.000225384521484375,
      "step": 36927,
      "training_step_time": 0.40297412872314453
    },
    {
      "epoch": 0.000225390625,
      "model_forward_time": 0.11527204513549805,
      "step": 36928
    },
    {
      "epoch": 0.000225390625,
      "step": 36928,
      "training_step_time": 0.3891258239746094
    },
    {
      "epoch": 0.000225396728515625,
      "model_forward_time": 0.11467552185058594,
      "step": 36929
    },
    {
      "epoch": 0.000225396728515625,
      "step": 36929,
      "training_step_time": 0.6799683570861816
    },
    {
      "epoch": 0.00022540283203125,
      "grad_norm": 0.15380874276161194,
      "learning_rate": 3.525847939468233e-05,
      "loss": 0.0412,
      "step": 36930
    },
    {
      "epoch": 0.00022540283203125,
      "model_forward_time": 0.1153862476348877,
      "step": 36930
    },
    {
      "epoch": 0.00022540283203125,
      "step": 36930,
      "training_step_time": 0.3929450511932373
    },
    {
      "epoch": 0.000225408935546875,
      "model_forward_time": 0.11460590362548828,
      "step": 36931
    },
    {
      "epoch": 0.000225408935546875,
      "step": 36931,
      "training_step_time": 0.36696362495422363
    },
    {
      "epoch": 0.0002254150390625,
      "model_forward_time": 0.11466622352600098,
      "step": 36932
    },
    {
      "epoch": 0.0002254150390625,
      "step": 36932,
      "training_step_time": 0.41069650650024414
    },
    {
      "epoch": 0.000225421142578125,
      "model_forward_time": 0.11434721946716309,
      "step": 36933
    },
    {
      "epoch": 0.000225421142578125,
      "step": 36933,
      "training_step_time": 0.42145419120788574
    },
    {
      "epoch": 0.00022542724609375,
      "model_forward_time": 0.1146233081817627,
      "step": 36934
    },
    {
      "epoch": 0.00022542724609375,
      "step": 36934,
      "training_step_time": 0.4527549743652344
    },
    {
      "epoch": 0.000225433349609375,
      "model_forward_time": 0.11472272872924805,
      "step": 36935
    },
    {
      "epoch": 0.000225433349609375,
      "step": 36935,
      "training_step_time": 0.47848081588745117
    },
    {
      "epoch": 0.000225439453125,
      "model_forward_time": 0.11468648910522461,
      "step": 36936
    },
    {
      "epoch": 0.000225439453125,
      "step": 36936,
      "training_step_time": 0.4682748317718506
    },
    {
      "epoch": 0.000225445556640625,
      "model_forward_time": 0.11557126045227051,
      "step": 36937
    },
    {
      "epoch": 0.000225445556640625,
      "step": 36937,
      "training_step_time": 0.38472723960876465
    },
    {
      "epoch": 0.00022545166015625,
      "model_forward_time": 0.11584305763244629,
      "step": 36938
    },
    {
      "epoch": 0.00022545166015625,
      "step": 36938,
      "training_step_time": 0.3821065425872803
    },
    {
      "epoch": 0.000225457763671875,
      "model_forward_time": 0.1144552230834961,
      "step": 36939
    },
    {
      "epoch": 0.000225457763671875,
      "step": 36939,
      "training_step_time": 0.3774237632751465
    },
    {
      "epoch": 0.0002254638671875,
      "grad_norm": 0.14392736554145813,
      "learning_rate": 3.523214876148664e-05,
      "loss": 0.0361,
      "step": 36940
    },
    {
      "epoch": 0.0002254638671875,
      "model_forward_time": 0.11472415924072266,
      "step": 36940
    },
    {
      "epoch": 0.0002254638671875,
      "step": 36940,
      "training_step_time": 0.3994584083557129
    },
    {
      "epoch": 0.000225469970703125,
      "model_forward_time": 0.11497330665588379,
      "step": 36941
    },
    {
      "epoch": 0.000225469970703125,
      "step": 36941,
      "training_step_time": 0.6187484264373779
    },
    {
      "epoch": 0.00022547607421875,
      "model_forward_time": 0.11481237411499023,
      "step": 36942
    },
    {
      "epoch": 0.00022547607421875,
      "step": 36942,
      "training_step_time": 0.39919161796569824
    },
    {
      "epoch": 0.000225482177734375,
      "model_forward_time": 0.11508917808532715,
      "step": 36943
    },
    {
      "epoch": 0.000225482177734375,
      "step": 36943,
      "training_step_time": 0.37372779846191406
    },
    {
      "epoch": 0.00022548828125,
      "model_forward_time": 0.11575031280517578,
      "step": 36944
    },
    {
      "epoch": 0.00022548828125,
      "step": 36944,
      "training_step_time": 0.39691972732543945
    },
    {
      "epoch": 0.000225494384765625,
      "model_forward_time": 0.11488151550292969,
      "step": 36945
    },
    {
      "epoch": 0.000225494384765625,
      "step": 36945,
      "training_step_time": 0.3666224479675293
    },
    {
      "epoch": 0.00022550048828125,
      "model_forward_time": 0.11575961112976074,
      "step": 36946
    },
    {
      "epoch": 0.00022550048828125,
      "step": 36946,
      "training_step_time": 0.42822861671447754
    },
    {
      "epoch": 0.000225506591796875,
      "model_forward_time": 0.11532163619995117,
      "step": 36947
    },
    {
      "epoch": 0.000225506591796875,
      "step": 36947,
      "training_step_time": 0.5770673751831055
    },
    {
      "epoch": 0.0002255126953125,
      "model_forward_time": 0.11475014686584473,
      "step": 36948
    },
    {
      "epoch": 0.0002255126953125,
      "step": 36948,
      "training_step_time": 0.47533464431762695
    },
    {
      "epoch": 0.000225518798828125,
      "model_forward_time": 0.11491250991821289,
      "step": 36949
    },
    {
      "epoch": 0.000225518798828125,
      "step": 36949,
      "training_step_time": 0.3806338310241699
    },
    {
      "epoch": 0.00022552490234375,
      "grad_norm": 0.13446418941020966,
      "learning_rate": 3.5205822614374255e-05,
      "loss": 0.0394,
      "step": 36950
    },
    {
      "epoch": 0.00022552490234375,
      "model_forward_time": 0.11438751220703125,
      "step": 36950
    },
    {
      "epoch": 0.00022552490234375,
      "step": 36950,
      "training_step_time": 0.4548380374908447
    },
    {
      "epoch": 0.000225531005859375,
      "model_forward_time": 0.11390066146850586,
      "step": 36951
    },
    {
      "epoch": 0.000225531005859375,
      "step": 36951,
      "training_step_time": 0.3744347095489502
    },
    {
      "epoch": 0.000225537109375,
      "model_forward_time": 0.11566591262817383,
      "step": 36952
    },
    {
      "epoch": 0.000225537109375,
      "step": 36952,
      "training_step_time": 0.39903712272644043
    },
    {
      "epoch": 0.000225543212890625,
      "model_forward_time": 0.11441493034362793,
      "step": 36953
    },
    {
      "epoch": 0.000225543212890625,
      "step": 36953,
      "training_step_time": 0.47661590576171875
    },
    {
      "epoch": 0.00022554931640625,
      "model_forward_time": 0.11514401435852051,
      "step": 36954
    },
    {
      "epoch": 0.00022554931640625,
      "step": 36954,
      "training_step_time": 0.40160393714904785
    },
    {
      "epoch": 0.000225555419921875,
      "model_forward_time": 0.11488676071166992,
      "step": 36955
    },
    {
      "epoch": 0.000225555419921875,
      "step": 36955,
      "training_step_time": 0.3781402111053467
    },
    {
      "epoch": 0.0002255615234375,
      "model_forward_time": 0.11516642570495605,
      "step": 36956
    },
    {
      "epoch": 0.0002255615234375,
      "step": 36956,
      "training_step_time": 0.3927445411682129
    },
    {
      "epoch": 0.000225567626953125,
      "model_forward_time": 0.11580777168273926,
      "step": 36957
    },
    {
      "epoch": 0.000225567626953125,
      "step": 36957,
      "training_step_time": 0.3819563388824463
    },
    {
      "epoch": 0.00022557373046875,
      "model_forward_time": 0.11517190933227539,
      "step": 36958
    },
    {
      "epoch": 0.00022557373046875,
      "step": 36958,
      "training_step_time": 0.4410982131958008
    },
    {
      "epoch": 0.000225579833984375,
      "model_forward_time": 0.11475110054016113,
      "step": 36959
    },
    {
      "epoch": 0.000225579833984375,
      "step": 36959,
      "training_step_time": 0.5826807022094727
    },
    {
      "epoch": 0.0002255859375,
      "grad_norm": 0.14548544585704803,
      "learning_rate": 3.517950096134232e-05,
      "loss": 0.0395,
      "step": 36960
    },
    {
      "epoch": 0.0002255859375,
      "model_forward_time": 0.11583399772644043,
      "step": 36960
    },
    {
      "epoch": 0.0002255859375,
      "step": 36960,
      "training_step_time": 0.3794980049133301
    },
    {
      "epoch": 0.000225592041015625,
      "model_forward_time": 0.11488747596740723,
      "step": 36961
    },
    {
      "epoch": 0.000225592041015625,
      "step": 36961,
      "training_step_time": 0.4207572937011719
    },
    {
      "epoch": 0.00022559814453125,
      "model_forward_time": 0.11442303657531738,
      "step": 36962
    },
    {
      "epoch": 0.00022559814453125,
      "step": 36962,
      "training_step_time": 0.4405190944671631
    },
    {
      "epoch": 0.000225604248046875,
      "model_forward_time": 0.11472964286804199,
      "step": 36963
    },
    {
      "epoch": 0.000225604248046875,
      "step": 36963,
      "training_step_time": 0.4176814556121826
    },
    {
      "epoch": 0.0002256103515625,
      "model_forward_time": 0.11445975303649902,
      "step": 36964
    },
    {
      "epoch": 0.0002256103515625,
      "step": 36964,
      "training_step_time": 0.44666528701782227
    },
    {
      "epoch": 0.000225616455078125,
      "model_forward_time": 0.11715269088745117,
      "step": 36965
    },
    {
      "epoch": 0.000225616455078125,
      "step": 36965,
      "training_step_time": 0.49621033668518066
    },
    {
      "epoch": 0.00022562255859375,
      "model_forward_time": 0.11426758766174316,
      "step": 36966
    },
    {
      "epoch": 0.00022562255859375,
      "step": 36966,
      "training_step_time": 0.39147329330444336
    },
    {
      "epoch": 0.000225628662109375,
      "model_forward_time": 0.11497259140014648,
      "step": 36967
    },
    {
      "epoch": 0.000225628662109375,
      "step": 36967,
      "training_step_time": 0.4030749797821045
    },
    {
      "epoch": 0.000225634765625,
      "model_forward_time": 0.1148989200592041,
      "step": 36968
    },
    {
      "epoch": 0.000225634765625,
      "step": 36968,
      "training_step_time": 0.3930964469909668
    },
    {
      "epoch": 0.000225640869140625,
      "model_forward_time": 0.11525154113769531,
      "step": 36969
    },
    {
      "epoch": 0.000225640869140625,
      "step": 36969,
      "training_step_time": 0.39572572708129883
    },
    {
      "epoch": 0.00022564697265625,
      "grad_norm": 0.10977793484926224,
      "learning_rate": 3.515318381038668e-05,
      "loss": 0.0401,
      "step": 36970
    },
    {
      "epoch": 0.00022564697265625,
      "model_forward_time": 0.11596107482910156,
      "step": 36970
    },
    {
      "epoch": 0.00022564697265625,
      "step": 36970,
      "training_step_time": 0.39556169509887695
    },
    {
      "epoch": 0.000225653076171875,
      "model_forward_time": 0.11498665809631348,
      "step": 36971
    },
    {
      "epoch": 0.000225653076171875,
      "step": 36971,
      "training_step_time": 0.628340482711792
    },
    {
      "epoch": 0.0002256591796875,
      "model_forward_time": 0.11528801918029785,
      "step": 36972
    },
    {
      "epoch": 0.0002256591796875,
      "step": 36972,
      "training_step_time": 0.39218854904174805
    },
    {
      "epoch": 0.000225665283203125,
      "model_forward_time": 0.11440491676330566,
      "step": 36973
    },
    {
      "epoch": 0.000225665283203125,
      "step": 36973,
      "training_step_time": 0.37320590019226074
    },
    {
      "epoch": 0.00022567138671875,
      "model_forward_time": 0.11519098281860352,
      "step": 36974
    },
    {
      "epoch": 0.00022567138671875,
      "step": 36974,
      "training_step_time": 0.4198720455169678
    },
    {
      "epoch": 0.000225677490234375,
      "model_forward_time": 0.11426782608032227,
      "step": 36975
    },
    {
      "epoch": 0.000225677490234375,
      "step": 36975,
      "training_step_time": 0.4166128635406494
    },
    {
      "epoch": 0.00022568359375,
      "model_forward_time": 0.11478900909423828,
      "step": 36976
    },
    {
      "epoch": 0.00022568359375,
      "step": 36976,
      "training_step_time": 0.4104499816894531
    },
    {
      "epoch": 0.000225689697265625,
      "model_forward_time": 0.11446881294250488,
      "step": 36977
    },
    {
      "epoch": 0.000225689697265625,
      "step": 36977,
      "training_step_time": 0.4532301425933838
    },
    {
      "epoch": 0.00022569580078125,
      "model_forward_time": 0.11485600471496582,
      "step": 36978
    },
    {
      "epoch": 0.00022569580078125,
      "step": 36978,
      "training_step_time": 0.47304725646972656
    },
    {
      "epoch": 0.000225701904296875,
      "model_forward_time": 0.11458444595336914,
      "step": 36979
    },
    {
      "epoch": 0.000225701904296875,
      "step": 36979,
      "training_step_time": 0.4039733409881592
    },
    {
      "epoch": 0.0002257080078125,
      "grad_norm": 0.10290154069662094,
      "learning_rate": 3.512687116950182e-05,
      "loss": 0.0401,
      "step": 36980
    },
    {
      "epoch": 0.0002257080078125,
      "model_forward_time": 0.11457562446594238,
      "step": 36980
    },
    {
      "epoch": 0.0002257080078125,
      "step": 36980,
      "training_step_time": 0.39558959007263184
    },
    {
      "epoch": 0.000225714111328125,
      "model_forward_time": 0.11500906944274902,
      "step": 36981
    },
    {
      "epoch": 0.000225714111328125,
      "step": 36981,
      "training_step_time": 0.38936710357666016
    },
    {
      "epoch": 0.00022572021484375,
      "model_forward_time": 0.11487722396850586,
      "step": 36982
    },
    {
      "epoch": 0.00022572021484375,
      "step": 36982,
      "training_step_time": 0.3927948474884033
    },
    {
      "epoch": 0.000225726318359375,
      "model_forward_time": 0.11530303955078125,
      "step": 36983
    },
    {
      "epoch": 0.000225726318359375,
      "step": 36983,
      "training_step_time": 0.4808471202850342
    },
    {
      "epoch": 0.000225732421875,
      "model_forward_time": 0.1155393123626709,
      "step": 36984
    },
    {
      "epoch": 0.000225732421875,
      "step": 36984,
      "training_step_time": 0.4550328254699707
    },
    {
      "epoch": 0.000225738525390625,
      "model_forward_time": 0.11439657211303711,
      "step": 36985
    },
    {
      "epoch": 0.000225738525390625,
      "step": 36985,
      "training_step_time": 0.4477705955505371
    },
    {
      "epoch": 0.00022574462890625,
      "model_forward_time": 0.1152336597442627,
      "step": 36986
    },
    {
      "epoch": 0.00022574462890625,
      "step": 36986,
      "training_step_time": 0.41342878341674805
    },
    {
      "epoch": 0.000225750732421875,
      "model_forward_time": 0.11475777626037598,
      "step": 36987
    },
    {
      "epoch": 0.000225750732421875,
      "step": 36987,
      "training_step_time": 0.3662686347961426
    },
    {
      "epoch": 0.0002257568359375,
      "model_forward_time": 0.11438345909118652,
      "step": 36988
    },
    {
      "epoch": 0.0002257568359375,
      "step": 36988,
      "training_step_time": 0.41863036155700684
    },
    {
      "epoch": 0.000225762939453125,
      "model_forward_time": 0.11497735977172852,
      "step": 36989
    },
    {
      "epoch": 0.000225762939453125,
      "step": 36989,
      "training_step_time": 0.43562793731689453
    },
    {
      "epoch": 0.00022576904296875,
      "grad_norm": 0.14379394054412842,
      "learning_rate": 3.5100563046680764e-05,
      "loss": 0.0406,
      "step": 36990
    },
    {
      "epoch": 0.00022576904296875,
      "model_forward_time": 0.11519408226013184,
      "step": 36990
    },
    {
      "epoch": 0.00022576904296875,
      "step": 36990,
      "training_step_time": 0.4971001148223877
    },
    {
      "epoch": 0.000225775146484375,
      "model_forward_time": 0.11424040794372559,
      "step": 36991
    },
    {
      "epoch": 0.000225775146484375,
      "step": 36991,
      "training_step_time": 0.4231686592102051
    },
    {
      "epoch": 0.00022578125,
      "model_forward_time": 0.11477017402648926,
      "step": 36992
    },
    {
      "epoch": 0.00022578125,
      "step": 36992,
      "training_step_time": 0.40866565704345703
    },
    {
      "epoch": 0.000225787353515625,
      "model_forward_time": 0.11506915092468262,
      "step": 36993
    },
    {
      "epoch": 0.000225787353515625,
      "step": 36993,
      "training_step_time": 0.39701223373413086
    },
    {
      "epoch": 0.00022579345703125,
      "model_forward_time": 0.11458969116210938,
      "step": 36994
    },
    {
      "epoch": 0.00022579345703125,
      "step": 36994,
      "training_step_time": 0.40493297576904297
    },
    {
      "epoch": 0.000225799560546875,
      "model_forward_time": 0.11529946327209473,
      "step": 36995
    },
    {
      "epoch": 0.000225799560546875,
      "step": 36995,
      "training_step_time": 0.44863390922546387
    },
    {
      "epoch": 0.0002258056640625,
      "model_forward_time": 0.11514854431152344,
      "step": 36996
    },
    {
      "epoch": 0.0002258056640625,
      "step": 36996,
      "training_step_time": 0.39222145080566406
    },
    {
      "epoch": 0.000225811767578125,
      "model_forward_time": 0.11519074440002441,
      "step": 36997
    },
    {
      "epoch": 0.000225811767578125,
      "step": 36997,
      "training_step_time": 0.40187835693359375
    },
    {
      "epoch": 0.00022581787109375,
      "model_forward_time": 0.11496901512145996,
      "step": 36998
    },
    {
      "epoch": 0.00022581787109375,
      "step": 36998,
      "training_step_time": 0.43141698837280273
    },
    {
      "epoch": 0.000225823974609375,
      "model_forward_time": 0.1149744987487793,
      "step": 36999
    },
    {
      "epoch": 0.000225823974609375,
      "step": 36999,
      "training_step_time": 0.3972487449645996
    },
    {
      "epoch": 0.000225830078125,
      "grad_norm": 0.12613463401794434,
      "learning_rate": 3.5074259449915284e-05,
      "loss": 0.0415,
      "step": 37000
    },
    {
      "epoch": 0.000225830078125,
      "model_forward_time": 0.11383199691772461,
      "step": 37000
    },
    {
      "epoch": 0.000225830078125,
      "step": 37000,
      "training_step_time": 0.36638808250427246
    },
    {
      "epoch": 0.000225836181640625,
      "model_forward_time": 0.11231040954589844,
      "step": 37001
    },
    {
      "epoch": 0.000225836181640625,
      "step": 37001,
      "training_step_time": 0.378704309463501
    },
    {
      "epoch": 0.00022584228515625,
      "model_forward_time": 0.11341595649719238,
      "step": 37002
    },
    {
      "epoch": 0.00022584228515625,
      "step": 37002,
      "training_step_time": 0.48665332794189453
    },
    {
      "epoch": 0.000225848388671875,
      "model_forward_time": 0.1132059097290039,
      "step": 37003
    },
    {
      "epoch": 0.000225848388671875,
      "step": 37003,
      "training_step_time": 0.45142507553100586
    },
    {
      "epoch": 0.0002258544921875,
      "model_forward_time": 0.11371779441833496,
      "step": 37004
    },
    {
      "epoch": 0.0002258544921875,
      "step": 37004,
      "training_step_time": 0.4260852336883545
    },
    {
      "epoch": 0.000225860595703125,
      "model_forward_time": 0.1141042709350586,
      "step": 37005
    },
    {
      "epoch": 0.000225860595703125,
      "step": 37005,
      "training_step_time": 0.4158515930175781
    },
    {
      "epoch": 0.00022586669921875,
      "model_forward_time": 0.11419034004211426,
      "step": 37006
    },
    {
      "epoch": 0.00022586669921875,
      "step": 37006,
      "training_step_time": 0.3987243175506592
    },
    {
      "epoch": 0.000225872802734375,
      "model_forward_time": 0.11462283134460449,
      "step": 37007
    },
    {
      "epoch": 0.000225872802734375,
      "step": 37007,
      "training_step_time": 0.3793776035308838
    },
    {
      "epoch": 0.00022587890625,
      "model_forward_time": 0.11544680595397949,
      "step": 37008
    },
    {
      "epoch": 0.00022587890625,
      "step": 37008,
      "training_step_time": 0.3825662136077881
    },
    {
      "epoch": 0.000225885009765625,
      "model_forward_time": 0.1148226261138916,
      "step": 37009
    },
    {
      "epoch": 0.000225885009765625,
      "step": 37009,
      "training_step_time": 0.38776373863220215
    },
    {
      "epoch": 0.00022589111328125,
      "grad_norm": 0.12890544533729553,
      "learning_rate": 3.504796038719567e-05,
      "loss": 0.0418,
      "step": 37010
    },
    {
      "epoch": 0.00022589111328125,
      "model_forward_time": 0.1149590015411377,
      "step": 37010
    },
    {
      "epoch": 0.00022589111328125,
      "step": 37010,
      "training_step_time": 0.3854067325592041
    },
    {
      "epoch": 0.000225897216796875,
      "model_forward_time": 0.11528301239013672,
      "step": 37011
    },
    {
      "epoch": 0.000225897216796875,
      "step": 37011,
      "training_step_time": 0.4156363010406494
    },
    {
      "epoch": 0.0002259033203125,
      "model_forward_time": 0.11489343643188477,
      "step": 37012
    },
    {
      "epoch": 0.0002259033203125,
      "step": 37012,
      "training_step_time": 0.4231600761413574
    },
    {
      "epoch": 0.000225909423828125,
      "model_forward_time": 0.11479926109313965,
      "step": 37013
    },
    {
      "epoch": 0.000225909423828125,
      "step": 37013,
      "training_step_time": 0.3949408531188965
    },
    {
      "epoch": 0.00022591552734375,
      "model_forward_time": 0.11546874046325684,
      "step": 37014
    },
    {
      "epoch": 0.00022591552734375,
      "step": 37014,
      "training_step_time": 0.39716029167175293
    },
    {
      "epoch": 0.000225921630859375,
      "model_forward_time": 0.11507678031921387,
      "step": 37015
    },
    {
      "epoch": 0.000225921630859375,
      "step": 37015,
      "training_step_time": 0.3906404972076416
    },
    {
      "epoch": 0.000225927734375,
      "model_forward_time": 0.11592435836791992,
      "step": 37016
    },
    {
      "epoch": 0.000225927734375,
      "step": 37016,
      "training_step_time": 0.36972904205322266
    },
    {
      "epoch": 0.000225933837890625,
      "model_forward_time": 0.11496925354003906,
      "step": 37017
    },
    {
      "epoch": 0.000225933837890625,
      "step": 37017,
      "training_step_time": 0.46181321144104004
    },
    {
      "epoch": 0.00022593994140625,
      "model_forward_time": 0.11480522155761719,
      "step": 37018
    },
    {
      "epoch": 0.00022593994140625,
      "step": 37018,
      "training_step_time": 0.420177698135376
    },
    {
      "epoch": 0.000225946044921875,
      "model_forward_time": 0.11494827270507812,
      "step": 37019
    },
    {
      "epoch": 0.000225946044921875,
      "step": 37019,
      "training_step_time": 0.4910094738006592
    },
    {
      "epoch": 0.0002259521484375,
      "grad_norm": 0.096761554479599,
      "learning_rate": 3.5021665866510925e-05,
      "loss": 0.0429,
      "step": 37020
    },
    {
      "epoch": 0.0002259521484375,
      "model_forward_time": 0.11554718017578125,
      "step": 37020
    },
    {
      "epoch": 0.0002259521484375,
      "step": 37020,
      "training_step_time": 0.4018878936767578
    },
    {
      "epoch": 0.000225958251953125,
      "model_forward_time": 0.11485075950622559,
      "step": 37021
    },
    {
      "epoch": 0.000225958251953125,
      "step": 37021,
      "training_step_time": 0.4406471252441406
    },
    {
      "epoch": 0.00022596435546875,
      "model_forward_time": 0.11438608169555664,
      "step": 37022
    },
    {
      "epoch": 0.00022596435546875,
      "step": 37022,
      "training_step_time": 0.39017486572265625
    },
    {
      "epoch": 0.000225970458984375,
      "model_forward_time": 0.11524724960327148,
      "step": 37023
    },
    {
      "epoch": 0.000225970458984375,
      "step": 37023,
      "training_step_time": 0.38324642181396484
    },
    {
      "epoch": 0.0002259765625,
      "model_forward_time": 0.11484217643737793,
      "step": 37024
    },
    {
      "epoch": 0.0002259765625,
      "step": 37024,
      "training_step_time": 0.3921322822570801
    },
    {
      "epoch": 0.000225982666015625,
      "model_forward_time": 0.11471152305603027,
      "step": 37025
    },
    {
      "epoch": 0.000225982666015625,
      "step": 37025,
      "training_step_time": 0.45938944816589355
    },
    {
      "epoch": 0.00022598876953125,
      "model_forward_time": 0.11578512191772461,
      "step": 37026
    },
    {
      "epoch": 0.00022598876953125,
      "step": 37026,
      "training_step_time": 0.4257011413574219
    },
    {
      "epoch": 0.000225994873046875,
      "model_forward_time": 0.11498451232910156,
      "step": 37027
    },
    {
      "epoch": 0.000225994873046875,
      "step": 37027,
      "training_step_time": 0.3904755115509033
    },
    {
      "epoch": 0.0002260009765625,
      "model_forward_time": 0.11492228507995605,
      "step": 37028
    },
    {
      "epoch": 0.0002260009765625,
      "step": 37028,
      "training_step_time": 0.3904907703399658
    },
    {
      "epoch": 0.000226007080078125,
      "model_forward_time": 0.11535048484802246,
      "step": 37029
    },
    {
      "epoch": 0.000226007080078125,
      "step": 37029,
      "training_step_time": 0.40372347831726074
    },
    {
      "epoch": 0.00022601318359375,
      "grad_norm": 0.13254399597644806,
      "learning_rate": 3.499537589584859e-05,
      "loss": 0.0432,
      "step": 37030
    },
    {
      "epoch": 0.00022601318359375,
      "model_forward_time": 0.11508989334106445,
      "step": 37030
    },
    {
      "epoch": 0.00022601318359375,
      "step": 37030,
      "training_step_time": 0.3914623260498047
    },
    {
      "epoch": 0.000226019287109375,
      "model_forward_time": 0.11572504043579102,
      "step": 37031
    },
    {
      "epoch": 0.000226019287109375,
      "step": 37031,
      "training_step_time": 0.36562633514404297
    },
    {
      "epoch": 0.000226025390625,
      "model_forward_time": 0.11515164375305176,
      "step": 37032
    },
    {
      "epoch": 0.000226025390625,
      "step": 37032,
      "training_step_time": 0.4244999885559082
    },
    {
      "epoch": 0.000226031494140625,
      "model_forward_time": 0.11514163017272949,
      "step": 37033
    },
    {
      "epoch": 0.000226031494140625,
      "step": 37033,
      "training_step_time": 0.45150208473205566
    },
    {
      "epoch": 0.00022603759765625,
      "model_forward_time": 0.11481571197509766,
      "step": 37034
    },
    {
      "epoch": 0.00022603759765625,
      "step": 37034,
      "training_step_time": 0.42491817474365234
    },
    {
      "epoch": 0.000226043701171875,
      "model_forward_time": 0.11493182182312012,
      "step": 37035
    },
    {
      "epoch": 0.000226043701171875,
      "step": 37035,
      "training_step_time": 0.4373359680175781
    },
    {
      "epoch": 0.0002260498046875,
      "model_forward_time": 0.11481404304504395,
      "step": 37036
    },
    {
      "epoch": 0.0002260498046875,
      "step": 37036,
      "training_step_time": 0.39826512336730957
    },
    {
      "epoch": 0.000226055908203125,
      "model_forward_time": 0.11508560180664062,
      "step": 37037
    },
    {
      "epoch": 0.000226055908203125,
      "step": 37037,
      "training_step_time": 0.4051685333251953
    },
    {
      "epoch": 0.00022606201171875,
      "model_forward_time": 0.11515426635742188,
      "step": 37038
    },
    {
      "epoch": 0.00022606201171875,
      "step": 37038,
      "training_step_time": 0.41321253776550293
    },
    {
      "epoch": 0.000226068115234375,
      "model_forward_time": 0.11519360542297363,
      "step": 37039
    },
    {
      "epoch": 0.000226068115234375,
      "step": 37039,
      "training_step_time": 0.39780426025390625
    },
    {
      "epoch": 0.00022607421875,
      "grad_norm": 0.12479768693447113,
      "learning_rate": 3.496909048319489e-05,
      "loss": 0.0401,
      "step": 37040
    },
    {
      "epoch": 0.00022607421875,
      "model_forward_time": 0.11496353149414062,
      "step": 37040
    },
    {
      "epoch": 0.00022607421875,
      "step": 37040,
      "training_step_time": 0.4210379123687744
    },
    {
      "epoch": 0.000226080322265625,
      "model_forward_time": 0.11526799201965332,
      "step": 37041
    },
    {
      "epoch": 0.000226080322265625,
      "step": 37041,
      "training_step_time": 0.38025569915771484
    },
    {
      "epoch": 0.00022608642578125,
      "model_forward_time": 0.11510658264160156,
      "step": 37042
    },
    {
      "epoch": 0.00022608642578125,
      "step": 37042,
      "training_step_time": 0.3875584602355957
    },
    {
      "epoch": 0.000226092529296875,
      "model_forward_time": 0.11538815498352051,
      "step": 37043
    },
    {
      "epoch": 0.000226092529296875,
      "step": 37043,
      "training_step_time": 0.39440464973449707
    },
    {
      "epoch": 0.0002260986328125,
      "model_forward_time": 0.11546850204467773,
      "step": 37044
    },
    {
      "epoch": 0.0002260986328125,
      "step": 37044,
      "training_step_time": 0.40008997917175293
    },
    {
      "epoch": 0.000226104736328125,
      "model_forward_time": 0.1146094799041748,
      "step": 37045
    },
    {
      "epoch": 0.000226104736328125,
      "step": 37045,
      "training_step_time": 0.3949167728424072
    },
    {
      "epoch": 0.00022611083984375,
      "model_forward_time": 0.11613726615905762,
      "step": 37046
    },
    {
      "epoch": 0.00022611083984375,
      "step": 37046,
      "training_step_time": 0.4336216449737549
    },
    {
      "epoch": 0.000226116943359375,
      "model_forward_time": 0.11728954315185547,
      "step": 37047
    },
    {
      "epoch": 0.000226116943359375,
      "step": 37047,
      "training_step_time": 0.5090687274932861
    },
    {
      "epoch": 0.000226123046875,
      "model_forward_time": 0.11660027503967285,
      "step": 37048
    },
    {
      "epoch": 0.000226123046875,
      "step": 37048,
      "training_step_time": 0.5018396377563477
    },
    {
      "epoch": 0.000226129150390625,
      "model_forward_time": 0.11541247367858887,
      "step": 37049
    },
    {
      "epoch": 0.000226129150390625,
      "step": 37049,
      "training_step_time": 0.37819647789001465
    },
    {
      "epoch": 0.00022613525390625,
      "grad_norm": 0.17135505378246307,
      "learning_rate": 3.494280963653463e-05,
      "loss": 0.0388,
      "step": 37050
    },
    {
      "epoch": 0.00022613525390625,
      "model_forward_time": 0.11529135704040527,
      "step": 37050
    },
    {
      "epoch": 0.00022613525390625,
      "step": 37050,
      "training_step_time": 0.47914576530456543
    },
    {
      "epoch": 0.000226141357421875,
      "model_forward_time": 0.11466288566589355,
      "step": 37051
    },
    {
      "epoch": 0.000226141357421875,
      "step": 37051,
      "training_step_time": 0.38840484619140625
    },
    {
      "epoch": 0.0002261474609375,
      "model_forward_time": 0.11494660377502441,
      "step": 37052
    },
    {
      "epoch": 0.0002261474609375,
      "step": 37052,
      "training_step_time": 0.3940107822418213
    },
    {
      "epoch": 0.000226153564453125,
      "model_forward_time": 0.11479592323303223,
      "step": 37053
    },
    {
      "epoch": 0.000226153564453125,
      "step": 37053,
      "training_step_time": 0.3974649906158447
    },
    {
      "epoch": 0.00022615966796875,
      "model_forward_time": 0.11532187461853027,
      "step": 37054
    },
    {
      "epoch": 0.00022615966796875,
      "step": 37054,
      "training_step_time": 0.39916062355041504
    },
    {
      "epoch": 0.000226165771484375,
      "model_forward_time": 0.11477470397949219,
      "step": 37055
    },
    {
      "epoch": 0.000226165771484375,
      "step": 37055,
      "training_step_time": 0.4011952877044678
    },
    {
      "epoch": 0.000226171875,
      "model_forward_time": 0.11481046676635742,
      "step": 37056
    },
    {
      "epoch": 0.000226171875,
      "step": 37056,
      "training_step_time": 0.39736056327819824
    },
    {
      "epoch": 0.000226177978515625,
      "model_forward_time": 0.11582684516906738,
      "step": 37057
    },
    {
      "epoch": 0.000226177978515625,
      "step": 37057,
      "training_step_time": 0.3970947265625
    },
    {
      "epoch": 0.00022618408203125,
      "model_forward_time": 0.11673569679260254,
      "step": 37058
    },
    {
      "epoch": 0.00022618408203125,
      "step": 37058,
      "training_step_time": 0.38817811012268066
    },
    {
      "epoch": 0.000226190185546875,
      "model_forward_time": 0.11531352996826172,
      "step": 37059
    },
    {
      "epoch": 0.000226190185546875,
      "step": 37059,
      "training_step_time": 0.3881070613861084
    },
    {
      "epoch": 0.0002261962890625,
      "grad_norm": 0.15123488008975983,
      "learning_rate": 3.491653336385124e-05,
      "loss": 0.0378,
      "step": 37060
    },
    {
      "epoch": 0.0002261962890625,
      "model_forward_time": 0.1154775619506836,
      "step": 37060
    },
    {
      "epoch": 0.0002261962890625,
      "step": 37060,
      "training_step_time": 0.3682515621185303
    },
    {
      "epoch": 0.000226202392578125,
      "model_forward_time": 0.11565661430358887,
      "step": 37061
    },
    {
      "epoch": 0.000226202392578125,
      "step": 37061,
      "training_step_time": 0.4240272045135498
    },
    {
      "epoch": 0.00022620849609375,
      "model_forward_time": 0.11516427993774414,
      "step": 37062
    },
    {
      "epoch": 0.00022620849609375,
      "step": 37062,
      "training_step_time": 0.5051379203796387
    },
    {
      "epoch": 0.000226214599609375,
      "model_forward_time": 0.11558842658996582,
      "step": 37063
    },
    {
      "epoch": 0.000226214599609375,
      "step": 37063,
      "training_step_time": 0.4038879871368408
    },
    {
      "epoch": 0.000226220703125,
      "model_forward_time": 0.1152505874633789,
      "step": 37064
    },
    {
      "epoch": 0.000226220703125,
      "step": 37064,
      "training_step_time": 0.38355183601379395
    },
    {
      "epoch": 0.000226226806640625,
      "model_forward_time": 0.11496138572692871,
      "step": 37065
    },
    {
      "epoch": 0.000226226806640625,
      "step": 37065,
      "training_step_time": 0.3945283889770508
    },
    {
      "epoch": 0.00022623291015625,
      "model_forward_time": 0.11637687683105469,
      "step": 37066
    },
    {
      "epoch": 0.00022623291015625,
      "step": 37066,
      "training_step_time": 0.4028785228729248
    },
    {
      "epoch": 0.000226239013671875,
      "model_forward_time": 0.11555242538452148,
      "step": 37067
    },
    {
      "epoch": 0.000226239013671875,
      "step": 37067,
      "training_step_time": 0.4130423069000244
    },
    {
      "epoch": 0.0002262451171875,
      "model_forward_time": 0.11498618125915527,
      "step": 37068
    },
    {
      "epoch": 0.0002262451171875,
      "step": 37068,
      "training_step_time": 0.3994557857513428
    },
    {
      "epoch": 0.000226251220703125,
      "model_forward_time": 0.11538100242614746,
      "step": 37069
    },
    {
      "epoch": 0.000226251220703125,
      "step": 37069,
      "training_step_time": 0.3977375030517578
    },
    {
      "epoch": 0.00022625732421875,
      "grad_norm": 0.11601777374744415,
      "learning_rate": 3.489026167312678e-05,
      "loss": 0.0419,
      "step": 37070
    },
    {
      "epoch": 0.00022625732421875,
      "model_forward_time": 0.11551094055175781,
      "step": 37070
    },
    {
      "epoch": 0.00022625732421875,
      "step": 37070,
      "training_step_time": 0.40310239791870117
    },
    {
      "epoch": 0.000226263427734375,
      "model_forward_time": 0.11538243293762207,
      "step": 37071
    },
    {
      "epoch": 0.000226263427734375,
      "step": 37071,
      "training_step_time": 0.3907015323638916
    },
    {
      "epoch": 0.00022626953125,
      "model_forward_time": 0.11558914184570312,
      "step": 37072
    },
    {
      "epoch": 0.00022626953125,
      "step": 37072,
      "training_step_time": 0.3939030170440674
    },
    {
      "epoch": 0.000226275634765625,
      "model_forward_time": 0.11527729034423828,
      "step": 37073
    },
    {
      "epoch": 0.000226275634765625,
      "step": 37073,
      "training_step_time": 0.40082406997680664
    },
    {
      "epoch": 0.00022628173828125,
      "model_forward_time": 0.115570068359375,
      "step": 37074
    },
    {
      "epoch": 0.00022628173828125,
      "step": 37074,
      "training_step_time": 0.39175915718078613
    },
    {
      "epoch": 0.000226287841796875,
      "model_forward_time": 0.11559820175170898,
      "step": 37075
    },
    {
      "epoch": 0.000226287841796875,
      "step": 37075,
      "training_step_time": 0.39482951164245605
    },
    {
      "epoch": 0.0002262939453125,
      "model_forward_time": 0.11569070816040039,
      "step": 37076
    },
    {
      "epoch": 0.0002262939453125,
      "step": 37076,
      "training_step_time": 0.48337411880493164
    },
    {
      "epoch": 0.000226300048828125,
      "model_forward_time": 0.11517095565795898,
      "step": 37077
    },
    {
      "epoch": 0.000226300048828125,
      "step": 37077,
      "training_step_time": 0.5080749988555908
    },
    {
      "epoch": 0.00022630615234375,
      "model_forward_time": 0.11479568481445312,
      "step": 37078
    },
    {
      "epoch": 0.00022630615234375,
      "step": 37078,
      "training_step_time": 0.4150211811065674
    },
    {
      "epoch": 0.000226312255859375,
      "model_forward_time": 0.11480712890625,
      "step": 37079
    },
    {
      "epoch": 0.000226312255859375,
      "step": 37079,
      "training_step_time": 0.44629573822021484
    },
    {
      "epoch": 0.000226318359375,
      "grad_norm": 0.16216497123241425,
      "learning_rate": 3.4863994572341843e-05,
      "loss": 0.043,
      "step": 37080
    },
    {
      "epoch": 0.000226318359375,
      "model_forward_time": 0.1147298812866211,
      "step": 37080
    },
    {
      "epoch": 0.000226318359375,
      "step": 37080,
      "training_step_time": 0.4382612705230713
    },
    {
      "epoch": 0.000226324462890625,
      "model_forward_time": 0.11471366882324219,
      "step": 37081
    },
    {
      "epoch": 0.000226324462890625,
      "step": 37081,
      "training_step_time": 0.4638209342956543
    },
    {
      "epoch": 0.00022633056640625,
      "model_forward_time": 0.11487865447998047,
      "step": 37082
    },
    {
      "epoch": 0.00022633056640625,
      "step": 37082,
      "training_step_time": 0.3942301273345947
    },
    {
      "epoch": 0.000226336669921875,
      "model_forward_time": 0.11544346809387207,
      "step": 37083
    },
    {
      "epoch": 0.000226336669921875,
      "step": 37083,
      "training_step_time": 0.38490772247314453
    },
    {
      "epoch": 0.0002263427734375,
      "model_forward_time": 0.1159522533416748,
      "step": 37084
    },
    {
      "epoch": 0.0002263427734375,
      "step": 37084,
      "training_step_time": 0.387859582901001
    },
    {
      "epoch": 0.000226348876953125,
      "model_forward_time": 0.11485171318054199,
      "step": 37085
    },
    {
      "epoch": 0.000226348876953125,
      "step": 37085,
      "training_step_time": 0.3897223472595215
    },
    {
      "epoch": 0.00022635498046875,
      "model_forward_time": 0.11541152000427246,
      "step": 37086
    },
    {
      "epoch": 0.00022635498046875,
      "step": 37086,
      "training_step_time": 0.3895282745361328
    },
    {
      "epoch": 0.000226361083984375,
      "model_forward_time": 0.11575531959533691,
      "step": 37087
    },
    {
      "epoch": 0.000226361083984375,
      "step": 37087,
      "training_step_time": 0.39733171463012695
    },
    {
      "epoch": 0.0002263671875,
      "model_forward_time": 0.1157388687133789,
      "step": 37088
    },
    {
      "epoch": 0.0002263671875,
      "step": 37088,
      "training_step_time": 0.3860483169555664
    },
    {
      "epoch": 0.000226373291015625,
      "model_forward_time": 0.11533832550048828,
      "step": 37089
    },
    {
      "epoch": 0.000226373291015625,
      "step": 37089,
      "training_step_time": 0.38474559783935547
    },
    {
      "epoch": 0.00022637939453125,
      "grad_norm": 0.1355287730693817,
      "learning_rate": 3.483773206947572e-05,
      "loss": 0.041,
      "step": 37090
    },
    {
      "epoch": 0.00022637939453125,
      "model_forward_time": 0.11583137512207031,
      "step": 37090
    },
    {
      "epoch": 0.00022637939453125,
      "step": 37090,
      "training_step_time": 0.3662879467010498
    },
    {
      "epoch": 0.000226385498046875,
      "model_forward_time": 0.11534571647644043,
      "step": 37091
    },
    {
      "epoch": 0.000226385498046875,
      "step": 37091,
      "training_step_time": 0.4980583190917969
    },
    {
      "epoch": 0.0002263916015625,
      "model_forward_time": 0.11473417282104492,
      "step": 37092
    },
    {
      "epoch": 0.0002263916015625,
      "step": 37092,
      "training_step_time": 0.4964125156402588
    },
    {
      "epoch": 0.000226397705078125,
      "model_forward_time": 0.11497330665588379,
      "step": 37093
    },
    {
      "epoch": 0.000226397705078125,
      "step": 37093,
      "training_step_time": 0.5084788799285889
    },
    {
      "epoch": 0.00022640380859375,
      "model_forward_time": 0.11490988731384277,
      "step": 37094
    },
    {
      "epoch": 0.00022640380859375,
      "step": 37094,
      "training_step_time": 0.41283559799194336
    },
    {
      "epoch": 0.000226409912109375,
      "model_forward_time": 0.11452937126159668,
      "step": 37095
    },
    {
      "epoch": 0.000226409912109375,
      "step": 37095,
      "training_step_time": 0.3826932907104492
    },
    {
      "epoch": 0.000226416015625,
      "model_forward_time": 0.11463117599487305,
      "step": 37096
    },
    {
      "epoch": 0.000226416015625,
      "step": 37096,
      "training_step_time": 0.3903019428253174
    },
    {
      "epoch": 0.000226422119140625,
      "model_forward_time": 0.11429858207702637,
      "step": 37097
    },
    {
      "epoch": 0.000226422119140625,
      "step": 37097,
      "training_step_time": 0.4005753993988037
    },
    {
      "epoch": 0.00022642822265625,
      "model_forward_time": 0.11551308631896973,
      "step": 37098
    },
    {
      "epoch": 0.00022642822265625,
      "step": 37098,
      "training_step_time": 0.3900783061981201
    },
    {
      "epoch": 0.000226434326171875,
      "model_forward_time": 0.11470556259155273,
      "step": 37099
    },
    {
      "epoch": 0.000226434326171875,
      "step": 37099,
      "training_step_time": 0.39080023765563965
    },
    {
      "epoch": 0.0002264404296875,
      "grad_norm": 0.1261160522699356,
      "learning_rate": 3.4811474172506275e-05,
      "loss": 0.0394,
      "step": 37100
    },
    {
      "epoch": 0.0002264404296875,
      "model_forward_time": 0.11528849601745605,
      "step": 37100
    },
    {
      "epoch": 0.0002264404296875,
      "step": 37100,
      "training_step_time": 0.3899095058441162
    },
    {
      "epoch": 0.000226446533203125,
      "model_forward_time": 0.11546444892883301,
      "step": 37101
    },
    {
      "epoch": 0.000226446533203125,
      "step": 37101,
      "training_step_time": 0.38558292388916016
    },
    {
      "epoch": 0.00022645263671875,
      "model_forward_time": 0.11565661430358887,
      "step": 37102
    },
    {
      "epoch": 0.00022645263671875,
      "step": 37102,
      "training_step_time": 0.38972997665405273
    },
    {
      "epoch": 0.000226458740234375,
      "model_forward_time": 0.11551976203918457,
      "step": 37103
    },
    {
      "epoch": 0.000226458740234375,
      "step": 37103,
      "training_step_time": 0.3912029266357422
    },
    {
      "epoch": 0.00022646484375,
      "model_forward_time": 0.11519360542297363,
      "step": 37104
    },
    {
      "epoch": 0.00022646484375,
      "step": 37104,
      "training_step_time": 0.39380574226379395
    },
    {
      "epoch": 0.000226470947265625,
      "model_forward_time": 0.11614084243774414,
      "step": 37105
    },
    {
      "epoch": 0.000226470947265625,
      "step": 37105,
      "training_step_time": 0.4304981231689453
    },
    {
      "epoch": 0.00022647705078125,
      "model_forward_time": 0.11629033088684082,
      "step": 37106
    },
    {
      "epoch": 0.00022647705078125,
      "step": 37106,
      "training_step_time": 0.4628274440765381
    },
    {
      "epoch": 0.000226483154296875,
      "model_forward_time": 0.11616277694702148,
      "step": 37107
    },
    {
      "epoch": 0.000226483154296875,
      "step": 37107,
      "training_step_time": 0.40904760360717773
    },
    {
      "epoch": 0.0002264892578125,
      "model_forward_time": 0.11609101295471191,
      "step": 37108
    },
    {
      "epoch": 0.0002264892578125,
      "step": 37108,
      "training_step_time": 0.4115719795227051
    },
    {
      "epoch": 0.000226495361328125,
      "model_forward_time": 0.1159672737121582,
      "step": 37109
    },
    {
      "epoch": 0.000226495361328125,
      "step": 37109,
      "training_step_time": 0.38313722610473633
    },
    {
      "epoch": 0.00022650146484375,
      "grad_norm": 0.1375458836555481,
      "learning_rate": 3.478522088940993e-05,
      "loss": 0.0412,
      "step": 37110
    },
    {
      "epoch": 0.00022650146484375,
      "model_forward_time": 0.11506009101867676,
      "step": 37110
    },
    {
      "epoch": 0.00022650146484375,
      "step": 37110,
      "training_step_time": 0.4005284309387207
    },
    {
      "epoch": 0.000226507568359375,
      "model_forward_time": 0.1148536205291748,
      "step": 37111
    },
    {
      "epoch": 0.000226507568359375,
      "step": 37111,
      "training_step_time": 0.39646029472351074
    },
    {
      "epoch": 0.000226513671875,
      "model_forward_time": 0.1152346134185791,
      "step": 37112
    },
    {
      "epoch": 0.000226513671875,
      "step": 37112,
      "training_step_time": 0.3866419792175293
    },
    {
      "epoch": 0.000226519775390625,
      "model_forward_time": 0.11552619934082031,
      "step": 37113
    },
    {
      "epoch": 0.000226519775390625,
      "step": 37113,
      "training_step_time": 0.3838653564453125
    },
    {
      "epoch": 0.00022652587890625,
      "model_forward_time": 0.11685442924499512,
      "step": 37114
    },
    {
      "epoch": 0.00022652587890625,
      "step": 37114,
      "training_step_time": 0.39088892936706543
    },
    {
      "epoch": 0.000226531982421875,
      "model_forward_time": 0.11578512191772461,
      "step": 37115
    },
    {
      "epoch": 0.000226531982421875,
      "step": 37115,
      "training_step_time": 0.39301609992980957
    },
    {
      "epoch": 0.0002265380859375,
      "model_forward_time": 0.11537337303161621,
      "step": 37116
    },
    {
      "epoch": 0.0002265380859375,
      "step": 37116,
      "training_step_time": 0.3898496627807617
    },
    {
      "epoch": 0.000226544189453125,
      "model_forward_time": 0.11532068252563477,
      "step": 37117
    },
    {
      "epoch": 0.000226544189453125,
      "step": 37117,
      "training_step_time": 0.44581127166748047
    },
    {
      "epoch": 0.00022655029296875,
      "model_forward_time": 0.11572837829589844,
      "step": 37118
    },
    {
      "epoch": 0.00022655029296875,
      "step": 37118,
      "training_step_time": 0.4012117385864258
    },
    {
      "epoch": 0.000226556396484375,
      "model_forward_time": 0.11531758308410645,
      "step": 37119
    },
    {
      "epoch": 0.000226556396484375,
      "step": 37119,
      "training_step_time": 0.4108545780181885
    },
    {
      "epoch": 0.0002265625,
      "grad_norm": 0.11412259191274643,
      "learning_rate": 3.475897222816178e-05,
      "loss": 0.0374,
      "step": 37120
    },
    {
      "epoch": 0.0002265625,
      "model_forward_time": 0.11619114875793457,
      "step": 37120
    },
    {
      "epoch": 0.0002265625,
      "step": 37120,
      "training_step_time": 0.48744988441467285
    },
    {
      "epoch": 0.000226568603515625,
      "model_forward_time": 0.11618685722351074,
      "step": 37121
    },
    {
      "epoch": 0.000226568603515625,
      "step": 37121,
      "training_step_time": 0.4552037715911865
    },
    {
      "epoch": 0.00022657470703125,
      "model_forward_time": 0.11501574516296387,
      "step": 37122
    },
    {
      "epoch": 0.00022657470703125,
      "step": 37122,
      "training_step_time": 0.4671213626861572
    },
    {
      "epoch": 0.000226580810546875,
      "model_forward_time": 0.11611461639404297,
      "step": 37123
    },
    {
      "epoch": 0.000226580810546875,
      "step": 37123,
      "training_step_time": 0.5135953426361084
    },
    {
      "epoch": 0.0002265869140625,
      "model_forward_time": 0.11527609825134277,
      "step": 37124
    },
    {
      "epoch": 0.0002265869140625,
      "step": 37124,
      "training_step_time": 0.3818995952606201
    },
    {
      "epoch": 0.000226593017578125,
      "model_forward_time": 0.1145784854888916,
      "step": 37125
    },
    {
      "epoch": 0.000226593017578125,
      "step": 37125,
      "training_step_time": 0.38967084884643555
    },
    {
      "epoch": 0.00022659912109375,
      "model_forward_time": 0.1148519515991211,
      "step": 37126
    },
    {
      "epoch": 0.00022659912109375,
      "step": 37126,
      "training_step_time": 0.3902926445007324
    },
    {
      "epoch": 0.000226605224609375,
      "model_forward_time": 0.11545538902282715,
      "step": 37127
    },
    {
      "epoch": 0.000226605224609375,
      "step": 37127,
      "training_step_time": 0.388751745223999
    },
    {
      "epoch": 0.000226611328125,
      "model_forward_time": 0.11470651626586914,
      "step": 37128
    },
    {
      "epoch": 0.000226611328125,
      "step": 37128,
      "training_step_time": 0.38433384895324707
    },
    {
      "epoch": 0.000226617431640625,
      "model_forward_time": 0.11499786376953125,
      "step": 37129
    },
    {
      "epoch": 0.000226617431640625,
      "step": 37129,
      "training_step_time": 0.70841383934021
    },
    {
      "epoch": 0.00022662353515625,
      "grad_norm": 0.09961283206939697,
      "learning_rate": 3.473272819673542e-05,
      "loss": 0.0384,
      "step": 37130
    },
    {
      "epoch": 0.00022662353515625,
      "model_forward_time": 0.11513948440551758,
      "step": 37130
    },
    {
      "epoch": 0.00022662353515625,
      "step": 37130,
      "training_step_time": 0.37965869903564453
    },
    {
      "epoch": 0.000226629638671875,
      "model_forward_time": 0.11422944068908691,
      "step": 37131
    },
    {
      "epoch": 0.000226629638671875,
      "step": 37131,
      "training_step_time": 0.3988659381866455
    },
    {
      "epoch": 0.0002266357421875,
      "model_forward_time": 0.11466145515441895,
      "step": 37132
    },
    {
      "epoch": 0.0002266357421875,
      "step": 37132,
      "training_step_time": 0.3924117088317871
    },
    {
      "epoch": 0.000226641845703125,
      "model_forward_time": 0.11452770233154297,
      "step": 37133
    },
    {
      "epoch": 0.000226641845703125,
      "step": 37133,
      "training_step_time": 0.38805484771728516
    },
    {
      "epoch": 0.00022664794921875,
      "model_forward_time": 0.11514616012573242,
      "step": 37134
    },
    {
      "epoch": 0.00022664794921875,
      "step": 37134,
      "training_step_time": 0.4708113670349121
    },
    {
      "epoch": 0.000226654052734375,
      "model_forward_time": 0.11483097076416016,
      "step": 37135
    },
    {
      "epoch": 0.000226654052734375,
      "step": 37135,
      "training_step_time": 0.7012641429901123
    },
    {
      "epoch": 0.00022666015625,
      "model_forward_time": 0.11533260345458984,
      "step": 37136
    },
    {
      "epoch": 0.00022666015625,
      "step": 37136,
      "training_step_time": 0.3780186176300049
    },
    {
      "epoch": 0.000226666259765625,
      "model_forward_time": 0.11480998992919922,
      "step": 37137
    },
    {
      "epoch": 0.000226666259765625,
      "step": 37137,
      "training_step_time": 0.3853330612182617
    },
    {
      "epoch": 0.00022667236328125,
      "model_forward_time": 0.11501741409301758,
      "step": 37138
    },
    {
      "epoch": 0.00022667236328125,
      "step": 37138,
      "training_step_time": 0.4075350761413574
    },
    {
      "epoch": 0.000226678466796875,
      "model_forward_time": 0.11489629745483398,
      "step": 37139
    },
    {
      "epoch": 0.000226678466796875,
      "step": 37139,
      "training_step_time": 0.37299156188964844
    },
    {
      "epoch": 0.0002266845703125,
      "grad_norm": 0.1607598066329956,
      "learning_rate": 3.470648880310313e-05,
      "loss": 0.0451,
      "step": 37140
    },
    {
      "epoch": 0.0002266845703125,
      "model_forward_time": 0.11482000350952148,
      "step": 37140
    },
    {
      "epoch": 0.0002266845703125,
      "step": 37140,
      "training_step_time": 0.3786494731903076
    },
    {
      "epoch": 0.000226690673828125,
      "model_forward_time": 0.11610960960388184,
      "step": 37141
    },
    {
      "epoch": 0.000226690673828125,
      "step": 37141,
      "training_step_time": 0.5202827453613281
    },
    {
      "epoch": 0.00022669677734375,
      "model_forward_time": 0.11522221565246582,
      "step": 37142
    },
    {
      "epoch": 0.00022669677734375,
      "step": 37142,
      "training_step_time": 0.3809359073638916
    },
    {
      "epoch": 0.000226702880859375,
      "model_forward_time": 0.1152639389038086,
      "step": 37143
    },
    {
      "epoch": 0.000226702880859375,
      "step": 37143,
      "training_step_time": 0.39315319061279297
    },
    {
      "epoch": 0.000226708984375,
      "model_forward_time": 0.11515974998474121,
      "step": 37144
    },
    {
      "epoch": 0.000226708984375,
      "step": 37144,
      "training_step_time": 0.38965392112731934
    },
    {
      "epoch": 0.000226715087890625,
      "model_forward_time": 0.11495494842529297,
      "step": 37145
    },
    {
      "epoch": 0.000226715087890625,
      "step": 37145,
      "training_step_time": 0.39777588844299316
    },
    {
      "epoch": 0.00022672119140625,
      "model_forward_time": 0.11650800704956055,
      "step": 37146
    },
    {
      "epoch": 0.00022672119140625,
      "step": 37146,
      "training_step_time": 0.3964219093322754
    },
    {
      "epoch": 0.000226727294921875,
      "model_forward_time": 0.11500072479248047,
      "step": 37147
    },
    {
      "epoch": 0.000226727294921875,
      "step": 37147,
      "training_step_time": 0.6009011268615723
    },
    {
      "epoch": 0.0002267333984375,
      "model_forward_time": 0.11526203155517578,
      "step": 37148
    },
    {
      "epoch": 0.0002267333984375,
      "step": 37148,
      "training_step_time": 0.504223108291626
    },
    {
      "epoch": 0.000226739501953125,
      "model_forward_time": 0.1146392822265625,
      "step": 37149
    },
    {
      "epoch": 0.000226739501953125,
      "step": 37149,
      "training_step_time": 0.4990084171295166
    },
    {
      "epoch": 0.00022674560546875,
      "grad_norm": 0.12749318778514862,
      "learning_rate": 3.468025405523576e-05,
      "loss": 0.0397,
      "step": 37150
    },
    {
      "epoch": 0.00022674560546875,
      "model_forward_time": 0.11508822441101074,
      "step": 37150
    },
    {
      "epoch": 0.00022674560546875,
      "step": 37150,
      "training_step_time": 0.503150463104248
    },
    {
      "epoch": 0.000226751708984375,
      "model_forward_time": 0.11518502235412598,
      "step": 37151
    },
    {
      "epoch": 0.000226751708984375,
      "step": 37151,
      "training_step_time": 0.3739597797393799
    },
    {
      "epoch": 0.0002267578125,
      "model_forward_time": 0.11493206024169922,
      "step": 37152
    },
    {
      "epoch": 0.0002267578125,
      "step": 37152,
      "training_step_time": 0.37224674224853516
    },
    {
      "epoch": 0.000226763916015625,
      "model_forward_time": 0.11562657356262207,
      "step": 37153
    },
    {
      "epoch": 0.000226763916015625,
      "step": 37153,
      "training_step_time": 0.3862926959991455
    },
    {
      "epoch": 0.00022677001953125,
      "model_forward_time": 0.11560320854187012,
      "step": 37154
    },
    {
      "epoch": 0.00022677001953125,
      "step": 37154,
      "training_step_time": 0.3754563331604004
    },
    {
      "epoch": 0.000226776123046875,
      "model_forward_time": 0.11528325080871582,
      "step": 37155
    },
    {
      "epoch": 0.000226776123046875,
      "step": 37155,
      "training_step_time": 0.4026951789855957
    },
    {
      "epoch": 0.0002267822265625,
      "model_forward_time": 0.1156303882598877,
      "step": 37156
    },
    {
      "epoch": 0.0002267822265625,
      "step": 37156,
      "training_step_time": 0.383176326751709
    },
    {
      "epoch": 0.000226788330078125,
      "model_forward_time": 0.11497783660888672,
      "step": 37157
    },
    {
      "epoch": 0.000226788330078125,
      "step": 37157,
      "training_step_time": 0.3991832733154297
    },
    {
      "epoch": 0.00022679443359375,
      "model_forward_time": 0.11516976356506348,
      "step": 37158
    },
    {
      "epoch": 0.00022679443359375,
      "step": 37158,
      "training_step_time": 0.3854196071624756
    },
    {
      "epoch": 0.000226800537109375,
      "model_forward_time": 0.1152799129486084,
      "step": 37159
    },
    {
      "epoch": 0.000226800537109375,
      "step": 37159,
      "training_step_time": 0.4122653007507324
    },
    {
      "epoch": 0.000226806640625,
      "grad_norm": 0.07839801907539368,
      "learning_rate": 3.465402396110269e-05,
      "loss": 0.0335,
      "step": 37160
    },
    {
      "epoch": 0.000226806640625,
      "model_forward_time": 0.11503005027770996,
      "step": 37160
    },
    {
      "epoch": 0.000226806640625,
      "step": 37160,
      "training_step_time": 0.3869802951812744
    },
    {
      "epoch": 0.000226812744140625,
      "model_forward_time": 0.11601924896240234,
      "step": 37161
    },
    {
      "epoch": 0.000226812744140625,
      "step": 37161,
      "training_step_time": 0.4001893997192383
    },
    {
      "epoch": 0.00022681884765625,
      "model_forward_time": 0.1152048110961914,
      "step": 37162
    },
    {
      "epoch": 0.00022681884765625,
      "step": 37162,
      "training_step_time": 0.4295532703399658
    },
    {
      "epoch": 0.000226824951171875,
      "model_forward_time": 0.11556172370910645,
      "step": 37163
    },
    {
      "epoch": 0.000226824951171875,
      "step": 37163,
      "training_step_time": 0.41892528533935547
    },
    {
      "epoch": 0.0002268310546875,
      "model_forward_time": 0.11496949195861816,
      "step": 37164
    },
    {
      "epoch": 0.0002268310546875,
      "step": 37164,
      "training_step_time": 0.5051925182342529
    },
    {
      "epoch": 0.000226837158203125,
      "model_forward_time": 0.11627435684204102,
      "step": 37165
    },
    {
      "epoch": 0.000226837158203125,
      "step": 37165,
      "training_step_time": 0.49801158905029297
    },
    {
      "epoch": 0.00022684326171875,
      "model_forward_time": 0.11496567726135254,
      "step": 37166
    },
    {
      "epoch": 0.00022684326171875,
      "step": 37166,
      "training_step_time": 0.38024330139160156
    },
    {
      "epoch": 0.000226849365234375,
      "model_forward_time": 0.11548471450805664,
      "step": 37167
    },
    {
      "epoch": 0.000226849365234375,
      "step": 37167,
      "training_step_time": 0.38443493843078613
    },
    {
      "epoch": 0.00022685546875,
      "model_forward_time": 0.11424446105957031,
      "step": 37168
    },
    {
      "epoch": 0.00022685546875,
      "step": 37168,
      "training_step_time": 0.39009881019592285
    },
    {
      "epoch": 0.000226861572265625,
      "model_forward_time": 0.11495208740234375,
      "step": 37169
    },
    {
      "epoch": 0.000226861572265625,
      "step": 37169,
      "training_step_time": 0.3898334503173828
    },
    {
      "epoch": 0.00022686767578125,
      "grad_norm": 0.08908075839281082,
      "learning_rate": 3.462779852867197e-05,
      "loss": 0.0377,
      "step": 37170
    },
    {
      "epoch": 0.00022686767578125,
      "model_forward_time": 0.11508321762084961,
      "step": 37170
    },
    {
      "epoch": 0.00022686767578125,
      "step": 37170,
      "training_step_time": 0.39371156692504883
    },
    {
      "epoch": 0.000226873779296875,
      "model_forward_time": 0.11526823043823242,
      "step": 37171
    },
    {
      "epoch": 0.000226873779296875,
      "step": 37171,
      "training_step_time": 0.5829591751098633
    },
    {
      "epoch": 0.0002268798828125,
      "model_forward_time": 0.11470270156860352,
      "step": 37172
    },
    {
      "epoch": 0.0002268798828125,
      "step": 37172,
      "training_step_time": 0.3862490653991699
    },
    {
      "epoch": 0.000226885986328125,
      "model_forward_time": 0.11520862579345703,
      "step": 37173
    },
    {
      "epoch": 0.000226885986328125,
      "step": 37173,
      "training_step_time": 0.3814404010772705
    },
    {
      "epoch": 0.00022689208984375,
      "model_forward_time": 0.11496162414550781,
      "step": 37174
    },
    {
      "epoch": 0.00022689208984375,
      "step": 37174,
      "training_step_time": 0.4300096035003662
    },
    {
      "epoch": 0.000226898193359375,
      "model_forward_time": 0.11468243598937988,
      "step": 37175
    },
    {
      "epoch": 0.000226898193359375,
      "step": 37175,
      "training_step_time": 0.4011399745941162
    },
    {
      "epoch": 0.000226904296875,
      "model_forward_time": 0.11542081832885742,
      "step": 37176
    },
    {
      "epoch": 0.000226904296875,
      "step": 37176,
      "training_step_time": 0.3846879005432129
    },
    {
      "epoch": 0.000226910400390625,
      "model_forward_time": 0.1154336929321289,
      "step": 37177
    },
    {
      "epoch": 0.000226910400390625,
      "step": 37177,
      "training_step_time": 0.6326513290405273
    },
    {
      "epoch": 0.00022691650390625,
      "model_forward_time": 0.11478543281555176,
      "step": 37178
    },
    {
      "epoch": 0.00022691650390625,
      "step": 37178,
      "training_step_time": 0.47872471809387207
    },
    {
      "epoch": 0.000226922607421875,
      "model_forward_time": 0.11460399627685547,
      "step": 37179
    },
    {
      "epoch": 0.000226922607421875,
      "step": 37179,
      "training_step_time": 0.5100343227386475
    },
    {
      "epoch": 0.0002269287109375,
      "grad_norm": 0.13017955422401428,
      "learning_rate": 3.460157776591018e-05,
      "loss": 0.0421,
      "step": 37180
    },
    {
      "epoch": 0.0002269287109375,
      "model_forward_time": 0.11393380165100098,
      "step": 37180
    },
    {
      "epoch": 0.0002269287109375,
      "step": 37180,
      "training_step_time": 0.39794921875
    },
    {
      "epoch": 0.000226934814453125,
      "model_forward_time": 0.11433792114257812,
      "step": 37181
    },
    {
      "epoch": 0.000226934814453125,
      "step": 37181,
      "training_step_time": 0.3824801445007324
    },
    {
      "epoch": 0.00022694091796875,
      "model_forward_time": 0.11405348777770996,
      "step": 37182
    },
    {
      "epoch": 0.00022694091796875,
      "step": 37182,
      "training_step_time": 0.38520002365112305
    },
    {
      "epoch": 0.000226947021484375,
      "model_forward_time": 0.11531567573547363,
      "step": 37183
    },
    {
      "epoch": 0.000226947021484375,
      "step": 37183,
      "training_step_time": 0.49820494651794434
    },
    {
      "epoch": 0.000226953125,
      "model_forward_time": 0.11502695083618164,
      "step": 37184
    },
    {
      "epoch": 0.000226953125,
      "step": 37184,
      "training_step_time": 0.3865540027618408
    },
    {
      "epoch": 0.000226959228515625,
      "model_forward_time": 0.11499667167663574,
      "step": 37185
    },
    {
      "epoch": 0.000226959228515625,
      "step": 37185,
      "training_step_time": 0.3943750858306885
    },
    {
      "epoch": 0.00022696533203125,
      "model_forward_time": 0.11611032485961914,
      "step": 37186
    },
    {
      "epoch": 0.00022696533203125,
      "step": 37186,
      "training_step_time": 0.4001960754394531
    },
    {
      "epoch": 0.000226971435546875,
      "model_forward_time": 0.11514711380004883,
      "step": 37187
    },
    {
      "epoch": 0.000226971435546875,
      "step": 37187,
      "training_step_time": 0.42177844047546387
    },
    {
      "epoch": 0.0002269775390625,
      "model_forward_time": 0.11498737335205078,
      "step": 37188
    },
    {
      "epoch": 0.0002269775390625,
      "step": 37188,
      "training_step_time": 0.40245509147644043
    },
    {
      "epoch": 0.000226983642578125,
      "model_forward_time": 0.11548352241516113,
      "step": 37189
    },
    {
      "epoch": 0.000226983642578125,
      "step": 37189,
      "training_step_time": 0.6493170261383057
    },
    {
      "epoch": 0.00022698974609375,
      "grad_norm": 0.17921148240566254,
      "learning_rate": 3.457536168078247e-05,
      "loss": 0.0395,
      "step": 37190
    },
    {
      "epoch": 0.00022698974609375,
      "model_forward_time": 0.11532998085021973,
      "step": 37190
    },
    {
      "epoch": 0.00022698974609375,
      "step": 37190,
      "training_step_time": 0.5382719039916992
    },
    {
      "epoch": 0.000226995849609375,
      "model_forward_time": 0.11465620994567871,
      "step": 37191
    },
    {
      "epoch": 0.000226995849609375,
      "step": 37191,
      "training_step_time": 0.41245174407958984
    },
    {
      "epoch": 0.000227001953125,
      "model_forward_time": 0.11498737335205078,
      "step": 37192
    },
    {
      "epoch": 0.000227001953125,
      "step": 37192,
      "training_step_time": 0.5214567184448242
    },
    {
      "epoch": 0.000227008056640625,
      "model_forward_time": 0.11491966247558594,
      "step": 37193
    },
    {
      "epoch": 0.000227008056640625,
      "step": 37193,
      "training_step_time": 0.3776280879974365
    },
    {
      "epoch": 0.00022701416015625,
      "model_forward_time": 0.11489725112915039,
      "step": 37194
    },
    {
      "epoch": 0.00022701416015625,
      "step": 37194,
      "training_step_time": 0.3767716884613037
    },
    {
      "epoch": 0.000227020263671875,
      "model_forward_time": 0.11486172676086426,
      "step": 37195
    },
    {
      "epoch": 0.000227020263671875,
      "step": 37195,
      "training_step_time": 0.3983941078186035
    },
    {
      "epoch": 0.0002270263671875,
      "model_forward_time": 0.11545538902282715,
      "step": 37196
    },
    {
      "epoch": 0.0002270263671875,
      "step": 37196,
      "training_step_time": 0.38708949089050293
    },
    {
      "epoch": 0.000227032470703125,
      "model_forward_time": 0.11502861976623535,
      "step": 37197
    },
    {
      "epoch": 0.000227032470703125,
      "step": 37197,
      "training_step_time": 0.38361573219299316
    },
    {
      "epoch": 0.00022703857421875,
      "model_forward_time": 0.11550378799438477,
      "step": 37198
    },
    {
      "epoch": 0.00022703857421875,
      "step": 37198,
      "training_step_time": 0.3946504592895508
    },
    {
      "epoch": 0.000227044677734375,
      "model_forward_time": 0.11497735977172852,
      "step": 37199
    },
    {
      "epoch": 0.000227044677734375,
      "step": 37199,
      "training_step_time": 0.3991994857788086
    },
    {
      "epoch": 0.00022705078125,
      "grad_norm": 0.1296972930431366,
      "learning_rate": 3.4549150281252636e-05,
      "loss": 0.0404,
      "step": 37200
    },
    {
      "epoch": 0.00022705078125,
      "model_forward_time": 0.1149139404296875,
      "step": 37200
    },
    {
      "epoch": 0.00022705078125,
      "step": 37200,
      "training_step_time": 0.3995361328125
    },
    {
      "epoch": 0.000227056884765625,
      "model_forward_time": 0.1158590316772461,
      "step": 37201
    },
    {
      "epoch": 0.000227056884765625,
      "step": 37201,
      "training_step_time": 0.5660829544067383
    },
    {
      "epoch": 0.00022706298828125,
      "model_forward_time": 0.11546802520751953,
      "step": 37202
    },
    {
      "epoch": 0.00022706298828125,
      "step": 37202,
      "training_step_time": 0.38997840881347656
    },
    {
      "epoch": 0.000227069091796875,
      "model_forward_time": 0.11553144454956055,
      "step": 37203
    },
    {
      "epoch": 0.000227069091796875,
      "step": 37203,
      "training_step_time": 0.3914458751678467
    },
    {
      "epoch": 0.0002270751953125,
      "model_forward_time": 0.11512947082519531,
      "step": 37204
    },
    {
      "epoch": 0.0002270751953125,
      "step": 37204,
      "training_step_time": 0.390500545501709
    },
    {
      "epoch": 0.000227081298828125,
      "model_forward_time": 0.11488699913024902,
      "step": 37205
    },
    {
      "epoch": 0.000227081298828125,
      "step": 37205,
      "training_step_time": 0.46359920501708984
    },
    {
      "epoch": 0.00022708740234375,
      "model_forward_time": 0.11617469787597656,
      "step": 37206
    },
    {
      "epoch": 0.00022708740234375,
      "step": 37206,
      "training_step_time": 0.49698686599731445
    },
    {
      "epoch": 0.000227093505859375,
      "model_forward_time": 0.11518597602844238,
      "step": 37207
    },
    {
      "epoch": 0.000227093505859375,
      "step": 37207,
      "training_step_time": 0.5614442825317383
    },
    {
      "epoch": 0.000227099609375,
      "model_forward_time": 0.1146242618560791,
      "step": 37208
    },
    {
      "epoch": 0.000227099609375,
      "step": 37208,
      "training_step_time": 0.38455653190612793
    },
    {
      "epoch": 0.000227105712890625,
      "model_forward_time": 0.11492633819580078,
      "step": 37209
    },
    {
      "epoch": 0.000227105712890625,
      "step": 37209,
      "training_step_time": 0.3774728775024414
    },
    {
      "epoch": 0.00022711181640625,
      "grad_norm": 0.09673524647951126,
      "learning_rate": 3.452294357528297e-05,
      "loss": 0.0399,
      "step": 37210
    },
    {
      "epoch": 0.00022711181640625,
      "model_forward_time": 0.11437511444091797,
      "step": 37210
    },
    {
      "epoch": 0.00022711181640625,
      "step": 37210,
      "training_step_time": 0.38564133644104004
    },
    {
      "epoch": 0.000227117919921875,
      "model_forward_time": 0.11523818969726562,
      "step": 37211
    },
    {
      "epoch": 0.000227117919921875,
      "step": 37211,
      "training_step_time": 0.3992502689361572
    },
    {
      "epoch": 0.0002271240234375,
      "model_forward_time": 0.11599230766296387,
      "step": 37212
    },
    {
      "epoch": 0.0002271240234375,
      "step": 37212,
      "training_step_time": 0.38967180252075195
    },
    {
      "epoch": 0.000227130126953125,
      "model_forward_time": 0.11443209648132324,
      "step": 37213
    },
    {
      "epoch": 0.000227130126953125,
      "step": 37213,
      "training_step_time": 0.6079895496368408
    },
    {
      "epoch": 0.00022713623046875,
      "model_forward_time": 0.11613821983337402,
      "step": 37214
    },
    {
      "epoch": 0.00022713623046875,
      "step": 37214,
      "training_step_time": 0.39058732986450195
    },
    {
      "epoch": 0.000227142333984375,
      "model_forward_time": 0.11539840698242188,
      "step": 37215
    },
    {
      "epoch": 0.000227142333984375,
      "step": 37215,
      "training_step_time": 0.3867812156677246
    },
    {
      "epoch": 0.0002271484375,
      "model_forward_time": 0.11514663696289062,
      "step": 37216
    },
    {
      "epoch": 0.0002271484375,
      "step": 37216,
      "training_step_time": 0.3846166133880615
    },
    {
      "epoch": 0.000227154541015625,
      "model_forward_time": 0.11549997329711914,
      "step": 37217
    },
    {
      "epoch": 0.000227154541015625,
      "step": 37217,
      "training_step_time": 0.4036376476287842
    },
    {
      "epoch": 0.00022716064453125,
      "model_forward_time": 0.11514782905578613,
      "step": 37218
    },
    {
      "epoch": 0.00022716064453125,
      "step": 37218,
      "training_step_time": 0.3952031135559082
    },
    {
      "epoch": 0.000227166748046875,
      "model_forward_time": 0.11439323425292969,
      "step": 37219
    },
    {
      "epoch": 0.000227166748046875,
      "step": 37219,
      "training_step_time": 0.38762426376342773
    },
    {
      "epoch": 0.0002271728515625,
      "grad_norm": 0.1189919039607048,
      "learning_rate": 3.449674157083443e-05,
      "loss": 0.0381,
      "step": 37220
    },
    {
      "epoch": 0.0002271728515625,
      "model_forward_time": 0.11632204055786133,
      "step": 37220
    },
    {
      "epoch": 0.0002271728515625,
      "step": 37220,
      "training_step_time": 0.4319419860839844
    },
    {
      "epoch": 0.000227178955078125,
      "model_forward_time": 0.11529183387756348,
      "step": 37221
    },
    {
      "epoch": 0.000227178955078125,
      "step": 37221,
      "training_step_time": 0.4069540500640869
    },
    {
      "epoch": 0.00022718505859375,
      "model_forward_time": 0.11464190483093262,
      "step": 37222
    },
    {
      "epoch": 0.00022718505859375,
      "step": 37222,
      "training_step_time": 0.39487242698669434
    },
    {
      "epoch": 0.000227191162109375,
      "model_forward_time": 0.11531233787536621,
      "step": 37223
    },
    {
      "epoch": 0.000227191162109375,
      "step": 37223,
      "training_step_time": 0.4022698402404785
    },
    {
      "epoch": 0.000227197265625,
      "model_forward_time": 0.1156160831451416,
      "step": 37224
    },
    {
      "epoch": 0.000227197265625,
      "step": 37224,
      "training_step_time": 0.3922100067138672
    },
    {
      "epoch": 0.000227203369140625,
      "model_forward_time": 0.11545157432556152,
      "step": 37225
    },
    {
      "epoch": 0.000227203369140625,
      "step": 37225,
      "training_step_time": 0.4698984622955322
    },
    {
      "epoch": 0.00022720947265625,
      "model_forward_time": 0.11548399925231934,
      "step": 37226
    },
    {
      "epoch": 0.00022720947265625,
      "step": 37226,
      "training_step_time": 0.3979928493499756
    },
    {
      "epoch": 0.000227215576171875,
      "model_forward_time": 0.1150217056274414,
      "step": 37227
    },
    {
      "epoch": 0.000227215576171875,
      "step": 37227,
      "training_step_time": 0.4179716110229492
    },
    {
      "epoch": 0.0002272216796875,
      "model_forward_time": 0.11501264572143555,
      "step": 37228
    },
    {
      "epoch": 0.0002272216796875,
      "step": 37228,
      "training_step_time": 0.4294869899749756
    },
    {
      "epoch": 0.000227227783203125,
      "model_forward_time": 0.11588001251220703,
      "step": 37229
    },
    {
      "epoch": 0.000227227783203125,
      "step": 37229,
      "training_step_time": 0.40328049659729004
    },
    {
      "epoch": 0.00022723388671875,
      "grad_norm": 0.14121749997138977,
      "learning_rate": 3.447054427586644e-05,
      "loss": 0.0376,
      "step": 37230
    },
    {
      "epoch": 0.00022723388671875,
      "model_forward_time": 0.11563444137573242,
      "step": 37230
    },
    {
      "epoch": 0.00022723388671875,
      "step": 37230,
      "training_step_time": 0.3994314670562744
    },
    {
      "epoch": 0.000227239990234375,
      "model_forward_time": 0.1158285140991211,
      "step": 37231
    },
    {
      "epoch": 0.000227239990234375,
      "step": 37231,
      "training_step_time": 0.5864317417144775
    },
    {
      "epoch": 0.00022724609375,
      "model_forward_time": 0.11486005783081055,
      "step": 37232
    },
    {
      "epoch": 0.00022724609375,
      "step": 37232,
      "training_step_time": 0.41849637031555176
    },
    {
      "epoch": 0.000227252197265625,
      "model_forward_time": 0.11490464210510254,
      "step": 37233
    },
    {
      "epoch": 0.000227252197265625,
      "step": 37233,
      "training_step_time": 0.4959523677825928
    },
    {
      "epoch": 0.00022725830078125,
      "model_forward_time": 0.11535024642944336,
      "step": 37234
    },
    {
      "epoch": 0.00022725830078125,
      "step": 37234,
      "training_step_time": 0.5120387077331543
    },
    {
      "epoch": 0.000227264404296875,
      "model_forward_time": 0.11587953567504883,
      "step": 37235
    },
    {
      "epoch": 0.000227264404296875,
      "step": 37235,
      "training_step_time": 0.4326770305633545
    },
    {
      "epoch": 0.0002272705078125,
      "model_forward_time": 0.11550593376159668,
      "step": 37236
    },
    {
      "epoch": 0.0002272705078125,
      "step": 37236,
      "training_step_time": 0.39020276069641113
    },
    {
      "epoch": 0.000227276611328125,
      "model_forward_time": 0.11505794525146484,
      "step": 37237
    },
    {
      "epoch": 0.000227276611328125,
      "step": 37237,
      "training_step_time": 0.3848443031311035
    },
    {
      "epoch": 0.00022728271484375,
      "model_forward_time": 0.11530184745788574,
      "step": 37238
    },
    {
      "epoch": 0.00022728271484375,
      "step": 37238,
      "training_step_time": 0.3935549259185791
    },
    {
      "epoch": 0.000227288818359375,
      "model_forward_time": 0.11517977714538574,
      "step": 37239
    },
    {
      "epoch": 0.000227288818359375,
      "step": 37239,
      "training_step_time": 0.387066125869751
    },
    {
      "epoch": 0.000227294921875,
      "grad_norm": 0.10155322402715683,
      "learning_rate": 3.444435169833706e-05,
      "loss": 0.0363,
      "step": 37240
    },
    {
      "epoch": 0.000227294921875,
      "model_forward_time": 0.11539840698242188,
      "step": 37240
    },
    {
      "epoch": 0.000227294921875,
      "step": 37240,
      "training_step_time": 0.393796443939209
    },
    {
      "epoch": 0.000227301025390625,
      "model_forward_time": 0.1157994270324707,
      "step": 37241
    },
    {
      "epoch": 0.000227301025390625,
      "step": 37241,
      "training_step_time": 0.41162657737731934
    },
    {
      "epoch": 0.00022730712890625,
      "model_forward_time": 0.11480903625488281,
      "step": 37242
    },
    {
      "epoch": 0.00022730712890625,
      "step": 37242,
      "training_step_time": 0.3941938877105713
    },
    {
      "epoch": 0.000227313232421875,
      "model_forward_time": 0.11534738540649414,
      "step": 37243
    },
    {
      "epoch": 0.000227313232421875,
      "step": 37243,
      "training_step_time": 0.554875373840332
    },
    {
      "epoch": 0.0002273193359375,
      "model_forward_time": 0.1155405044555664,
      "step": 37244
    },
    {
      "epoch": 0.0002273193359375,
      "step": 37244,
      "training_step_time": 0.3843560218811035
    },
    {
      "epoch": 0.000227325439453125,
      "model_forward_time": 0.1155555248260498,
      "step": 37245
    },
    {
      "epoch": 0.000227325439453125,
      "step": 37245,
      "training_step_time": 0.41495561599731445
    },
    {
      "epoch": 0.00022733154296875,
      "model_forward_time": 0.11579680442810059,
      "step": 37246
    },
    {
      "epoch": 0.00022733154296875,
      "step": 37246,
      "training_step_time": 0.4320533275604248
    },
    {
      "epoch": 0.000227337646484375,
      "model_forward_time": 0.11516141891479492,
      "step": 37247
    },
    {
      "epoch": 0.000227337646484375,
      "step": 37247,
      "training_step_time": 0.4094526767730713
    },
    {
      "epoch": 0.00022734375,
      "model_forward_time": 0.1150355339050293,
      "step": 37248
    },
    {
      "epoch": 0.00022734375,
      "step": 37248,
      "training_step_time": 0.49756574630737305
    },
    {
      "epoch": 0.000227349853515625,
      "model_forward_time": 0.11594104766845703,
      "step": 37249
    },
    {
      "epoch": 0.000227349853515625,
      "step": 37249,
      "training_step_time": 0.5412874221801758
    },
    {
      "epoch": 0.00022735595703125,
      "grad_norm": 0.10656949132680893,
      "learning_rate": 3.4418163846202944e-05,
      "loss": 0.0389,
      "step": 37250
    },
    {
      "epoch": 0.00022735595703125,
      "model_forward_time": 0.11548995971679688,
      "step": 37250
    },
    {
      "epoch": 0.00022735595703125,
      "step": 37250,
      "training_step_time": 0.38974833488464355
    },
    {
      "epoch": 0.000227362060546875,
      "model_forward_time": 0.11503481864929199,
      "step": 37251
    },
    {
      "epoch": 0.000227362060546875,
      "step": 37251,
      "training_step_time": 0.37924957275390625
    },
    {
      "epoch": 0.0002273681640625,
      "model_forward_time": 0.11509037017822266,
      "step": 37252
    },
    {
      "epoch": 0.0002273681640625,
      "step": 37252,
      "training_step_time": 0.39471912384033203
    },
    {
      "epoch": 0.000227374267578125,
      "model_forward_time": 0.11525630950927734,
      "step": 37253
    },
    {
      "epoch": 0.000227374267578125,
      "step": 37253,
      "training_step_time": 0.3827688694000244
    },
    {
      "epoch": 0.00022738037109375,
      "model_forward_time": 0.11527037620544434,
      "step": 37254
    },
    {
      "epoch": 0.00022738037109375,
      "step": 37254,
      "training_step_time": 0.4184441566467285
    },
    {
      "epoch": 0.000227386474609375,
      "model_forward_time": 0.11513614654541016,
      "step": 37255
    },
    {
      "epoch": 0.000227386474609375,
      "step": 37255,
      "training_step_time": 0.6925678253173828
    },
    {
      "epoch": 0.000227392578125,
      "model_forward_time": 0.11462187767028809,
      "step": 37256
    },
    {
      "epoch": 0.000227392578125,
      "step": 37256,
      "training_step_time": 0.38802433013916016
    },
    {
      "epoch": 0.000227398681640625,
      "model_forward_time": 0.11465215682983398,
      "step": 37257
    },
    {
      "epoch": 0.000227398681640625,
      "step": 37257,
      "training_step_time": 0.37777113914489746
    },
    {
      "epoch": 0.00022740478515625,
      "model_forward_time": 0.11618852615356445,
      "step": 37258
    },
    {
      "epoch": 0.00022740478515625,
      "step": 37258,
      "training_step_time": 0.3691549301147461
    },
    {
      "epoch": 0.000227410888671875,
      "model_forward_time": 0.11392521858215332,
      "step": 37259
    },
    {
      "epoch": 0.000227410888671875,
      "step": 37259,
      "training_step_time": 0.38898539543151855
    },
    {
      "epoch": 0.0002274169921875,
      "grad_norm": 0.14368845522403717,
      "learning_rate": 3.439198072741921e-05,
      "loss": 0.0366,
      "step": 37260
    },
    {
      "epoch": 0.0002274169921875,
      "model_forward_time": 0.11530303955078125,
      "step": 37260
    },
    {
      "epoch": 0.0002274169921875,
      "step": 37260,
      "training_step_time": 0.4070250988006592
    },
    {
      "epoch": 0.000227423095703125,
      "model_forward_time": 0.11538577079772949,
      "step": 37261
    },
    {
      "epoch": 0.000227423095703125,
      "step": 37261,
      "training_step_time": 0.5903046131134033
    },
    {
      "epoch": 0.00022742919921875,
      "model_forward_time": 0.11531662940979004,
      "step": 37262
    },
    {
      "epoch": 0.00022742919921875,
      "step": 37262,
      "training_step_time": 0.4451894760131836
    },
    {
      "epoch": 0.000227435302734375,
      "model_forward_time": 0.1151583194732666,
      "step": 37263
    },
    {
      "epoch": 0.000227435302734375,
      "step": 37263,
      "training_step_time": 0.4100770950317383
    },
    {
      "epoch": 0.00022744140625,
      "model_forward_time": 0.11803746223449707,
      "step": 37264
    },
    {
      "epoch": 0.00022744140625,
      "step": 37264,
      "training_step_time": 0.3826735019683838
    },
    {
      "epoch": 0.000227447509765625,
      "model_forward_time": 0.1146690845489502,
      "step": 37265
    },
    {
      "epoch": 0.000227447509765625,
      "step": 37265,
      "training_step_time": 0.3850672245025635
    },
    {
      "epoch": 0.00022745361328125,
      "model_forward_time": 0.11516427993774414,
      "step": 37266
    },
    {
      "epoch": 0.00022745361328125,
      "step": 37266,
      "training_step_time": 0.41454243659973145
    },
    {
      "epoch": 0.000227459716796875,
      "model_forward_time": 0.11533761024475098,
      "step": 37267
    },
    {
      "epoch": 0.000227459716796875,
      "step": 37267,
      "training_step_time": 0.46837711334228516
    },
    {
      "epoch": 0.0002274658203125,
      "model_forward_time": 0.11492490768432617,
      "step": 37268
    },
    {
      "epoch": 0.0002274658203125,
      "step": 37268,
      "training_step_time": 0.3913145065307617
    },
    {
      "epoch": 0.000227471923828125,
      "model_forward_time": 0.11479496955871582,
      "step": 37269
    },
    {
      "epoch": 0.000227471923828125,
      "step": 37269,
      "training_step_time": 0.4029228687286377
    },
    {
      "epoch": 0.00022747802734375,
      "grad_norm": 0.08476567268371582,
      "learning_rate": 3.436580234993965e-05,
      "loss": 0.0355,
      "step": 37270
    },
    {
      "epoch": 0.00022747802734375,
      "model_forward_time": 0.1159358024597168,
      "step": 37270
    },
    {
      "epoch": 0.00022747802734375,
      "step": 37270,
      "training_step_time": 0.3777005672454834
    },
    {
      "epoch": 0.000227484130859375,
      "model_forward_time": 0.1151740550994873,
      "step": 37271
    },
    {
      "epoch": 0.000227484130859375,
      "step": 37271,
      "training_step_time": 0.3983309268951416
    },
    {
      "epoch": 0.000227490234375,
      "model_forward_time": 0.11548757553100586,
      "step": 37272
    },
    {
      "epoch": 0.000227490234375,
      "step": 37272,
      "training_step_time": 0.4019453525543213
    },
    {
      "epoch": 0.000227496337890625,
      "model_forward_time": 0.11489129066467285,
      "step": 37273
    },
    {
      "epoch": 0.000227496337890625,
      "step": 37273,
      "training_step_time": 0.5880434513092041
    },
    {
      "epoch": 0.00022750244140625,
      "model_forward_time": 0.11429238319396973,
      "step": 37274
    },
    {
      "epoch": 0.00022750244140625,
      "step": 37274,
      "training_step_time": 0.4814300537109375
    },
    {
      "epoch": 0.000227508544921875,
      "model_forward_time": 0.11467838287353516,
      "step": 37275
    },
    {
      "epoch": 0.000227508544921875,
      "step": 37275,
      "training_step_time": 0.42986011505126953
    },
    {
      "epoch": 0.0002275146484375,
      "model_forward_time": 0.11514163017272949,
      "step": 37276
    },
    {
      "epoch": 0.0002275146484375,
      "step": 37276,
      "training_step_time": 0.48864173889160156
    },
    {
      "epoch": 0.000227520751953125,
      "model_forward_time": 0.11467504501342773,
      "step": 37277
    },
    {
      "epoch": 0.000227520751953125,
      "step": 37277,
      "training_step_time": 0.4829256534576416
    },
    {
      "epoch": 0.00022752685546875,
      "model_forward_time": 0.11424636840820312,
      "step": 37278
    },
    {
      "epoch": 0.00022752685546875,
      "step": 37278,
      "training_step_time": 0.3960738182067871
    },
    {
      "epoch": 0.000227532958984375,
      "model_forward_time": 0.1142578125,
      "step": 37279
    },
    {
      "epoch": 0.000227532958984375,
      "step": 37279,
      "training_step_time": 0.4167947769165039
    },
    {
      "epoch": 0.0002275390625,
      "grad_norm": 0.0900130346417427,
      "learning_rate": 3.4339628721716505e-05,
      "loss": 0.0414,
      "step": 37280
    },
    {
      "epoch": 0.0002275390625,
      "model_forward_time": 0.11503219604492188,
      "step": 37280
    },
    {
      "epoch": 0.0002275390625,
      "step": 37280,
      "training_step_time": 0.3971867561340332
    },
    {
      "epoch": 0.000227545166015625,
      "model_forward_time": 0.11467409133911133,
      "step": 37281
    },
    {
      "epoch": 0.000227545166015625,
      "step": 37281,
      "training_step_time": 0.39187121391296387
    },
    {
      "epoch": 0.00022755126953125,
      "model_forward_time": 0.11575865745544434,
      "step": 37282
    },
    {
      "epoch": 0.00022755126953125,
      "step": 37282,
      "training_step_time": 0.38698840141296387
    },
    {
      "epoch": 0.000227557373046875,
      "model_forward_time": 0.11521625518798828,
      "step": 37283
    },
    {
      "epoch": 0.000227557373046875,
      "step": 37283,
      "training_step_time": 0.39728331565856934
    },
    {
      "epoch": 0.0002275634765625,
      "model_forward_time": 0.11583590507507324,
      "step": 37284
    },
    {
      "epoch": 0.0002275634765625,
      "step": 37284,
      "training_step_time": 0.39768052101135254
    },
    {
      "epoch": 0.000227569580078125,
      "model_forward_time": 0.11485028266906738,
      "step": 37285
    },
    {
      "epoch": 0.000227569580078125,
      "step": 37285,
      "training_step_time": 0.3977940082550049
    },
    {
      "epoch": 0.00022757568359375,
      "model_forward_time": 0.11498117446899414,
      "step": 37286
    },
    {
      "epoch": 0.00022757568359375,
      "step": 37286,
      "training_step_time": 0.3892960548400879
    },
    {
      "epoch": 0.000227581787109375,
      "model_forward_time": 0.11550498008728027,
      "step": 37287
    },
    {
      "epoch": 0.000227581787109375,
      "step": 37287,
      "training_step_time": 0.3783392906188965
    },
    {
      "epoch": 0.000227587890625,
      "model_forward_time": 0.11534547805786133,
      "step": 37288
    },
    {
      "epoch": 0.000227587890625,
      "step": 37288,
      "training_step_time": 0.3904128074645996
    },
    {
      "epoch": 0.000227593994140625,
      "model_forward_time": 0.11564040184020996,
      "step": 37289
    },
    {
      "epoch": 0.000227593994140625,
      "step": 37289,
      "training_step_time": 0.4129362106323242
    },
    {
      "epoch": 0.00022760009765625,
      "grad_norm": 0.08868085592985153,
      "learning_rate": 3.431345985070067e-05,
      "loss": 0.034,
      "step": 37290
    },
    {
      "epoch": 0.00022760009765625,
      "model_forward_time": 0.11524081230163574,
      "step": 37290
    },
    {
      "epoch": 0.00022760009765625,
      "step": 37290,
      "training_step_time": 0.36626195907592773
    },
    {
      "epoch": 0.000227606201171875,
      "model_forward_time": 0.11568331718444824,
      "step": 37291
    },
    {
      "epoch": 0.000227606201171875,
      "step": 37291,
      "training_step_time": 0.6209075450897217
    },
    {
      "epoch": 0.0002276123046875,
      "model_forward_time": 0.11447477340698242,
      "step": 37292
    },
    {
      "epoch": 0.0002276123046875,
      "step": 37292,
      "training_step_time": 0.4774308204650879
    },
    {
      "epoch": 0.000227618408203125,
      "model_forward_time": 0.11456108093261719,
      "step": 37293
    },
    {
      "epoch": 0.000227618408203125,
      "step": 37293,
      "training_step_time": 0.42934179306030273
    },
    {
      "epoch": 0.00022762451171875,
      "model_forward_time": 0.11396288871765137,
      "step": 37294
    },
    {
      "epoch": 0.00022762451171875,
      "step": 37294,
      "training_step_time": 0.39124250411987305
    },
    {
      "epoch": 0.000227630615234375,
      "model_forward_time": 0.11419868469238281,
      "step": 37295
    },
    {
      "epoch": 0.000227630615234375,
      "step": 37295,
      "training_step_time": 0.3898293972015381
    },
    {
      "epoch": 0.00022763671875,
      "model_forward_time": 0.11440443992614746,
      "step": 37296
    },
    {
      "epoch": 0.00022763671875,
      "step": 37296,
      "training_step_time": 0.41234827041625977
    },
    {
      "epoch": 0.000227642822265625,
      "model_forward_time": 0.11447834968566895,
      "step": 37297
    },
    {
      "epoch": 0.000227642822265625,
      "step": 37297,
      "training_step_time": 0.4339590072631836
    },
    {
      "epoch": 0.00022764892578125,
      "model_forward_time": 0.11520576477050781,
      "step": 37298
    },
    {
      "epoch": 0.00022764892578125,
      "step": 37298,
      "training_step_time": 0.3876662254333496
    },
    {
      "epoch": 0.000227655029296875,
      "model_forward_time": 0.11528897285461426,
      "step": 37299
    },
    {
      "epoch": 0.000227655029296875,
      "step": 37299,
      "training_step_time": 0.38186001777648926
    },
    {
      "epoch": 0.0002276611328125,
      "grad_norm": 0.13970714807510376,
      "learning_rate": 3.4287295744841586e-05,
      "loss": 0.0381,
      "step": 37300
    },
    {
      "epoch": 0.0002276611328125,
      "model_forward_time": 0.11506319046020508,
      "step": 37300
    },
    {
      "epoch": 0.0002276611328125,
      "step": 37300,
      "training_step_time": 0.40410780906677246
    },
    {
      "epoch": 0.000227667236328125,
      "model_forward_time": 0.11525940895080566,
      "step": 37301
    },
    {
      "epoch": 0.000227667236328125,
      "step": 37301,
      "training_step_time": 0.39139556884765625
    },
    {
      "epoch": 0.00022767333984375,
      "model_forward_time": 0.11548638343811035,
      "step": 37302
    },
    {
      "epoch": 0.00022767333984375,
      "step": 37302,
      "training_step_time": 0.40042757987976074
    },
    {
      "epoch": 0.000227679443359375,
      "model_forward_time": 0.11515951156616211,
      "step": 37303
    },
    {
      "epoch": 0.000227679443359375,
      "step": 37303,
      "training_step_time": 0.7043972015380859
    },
    {
      "epoch": 0.000227685546875,
      "model_forward_time": 0.11484766006469727,
      "step": 37304
    },
    {
      "epoch": 0.000227685546875,
      "step": 37304,
      "training_step_time": 0.43430089950561523
    },
    {
      "epoch": 0.000227691650390625,
      "model_forward_time": 0.11482930183410645,
      "step": 37305
    },
    {
      "epoch": 0.000227691650390625,
      "step": 37305,
      "training_step_time": 0.3907744884490967
    },
    {
      "epoch": 0.00022769775390625,
      "model_forward_time": 0.1143808364868164,
      "step": 37306
    },
    {
      "epoch": 0.00022769775390625,
      "step": 37306,
      "training_step_time": 0.40505313873291016
    },
    {
      "epoch": 0.000227703857421875,
      "model_forward_time": 0.11489105224609375,
      "step": 37307
    },
    {
      "epoch": 0.000227703857421875,
      "step": 37307,
      "training_step_time": 0.4028964042663574
    },
    {
      "epoch": 0.0002277099609375,
      "model_forward_time": 0.11430978775024414,
      "step": 37308
    },
    {
      "epoch": 0.0002277099609375,
      "step": 37308,
      "training_step_time": 0.3866002559661865
    },
    {
      "epoch": 0.000227716064453125,
      "model_forward_time": 0.11529827117919922,
      "step": 37309
    },
    {
      "epoch": 0.000227716064453125,
      "step": 37309,
      "training_step_time": 0.40566468238830566
    },
    {
      "epoch": 0.00022772216796875,
      "grad_norm": 0.09668941050767899,
      "learning_rate": 3.4261136412087155e-05,
      "loss": 0.0392,
      "step": 37310
    },
    {
      "epoch": 0.00022772216796875,
      "model_forward_time": 0.11538171768188477,
      "step": 37310
    },
    {
      "epoch": 0.00022772216796875,
      "step": 37310,
      "training_step_time": 0.3930976390838623
    },
    {
      "epoch": 0.000227728271484375,
      "model_forward_time": 0.11506128311157227,
      "step": 37311
    },
    {
      "epoch": 0.000227728271484375,
      "step": 37311,
      "training_step_time": 0.39708638191223145
    },
    {
      "epoch": 0.000227734375,
      "model_forward_time": 0.11533331871032715,
      "step": 37312
    },
    {
      "epoch": 0.000227734375,
      "step": 37312,
      "training_step_time": 0.4054560661315918
    },
    {
      "epoch": 0.000227740478515625,
      "model_forward_time": 0.11515378952026367,
      "step": 37313
    },
    {
      "epoch": 0.000227740478515625,
      "step": 37313,
      "training_step_time": 0.4001750946044922
    },
    {
      "epoch": 0.00022774658203125,
      "model_forward_time": 0.11522960662841797,
      "step": 37314
    },
    {
      "epoch": 0.00022774658203125,
      "step": 37314,
      "training_step_time": 0.41144275665283203
    },
    {
      "epoch": 0.000227752685546875,
      "model_forward_time": 0.1155850887298584,
      "step": 37315
    },
    {
      "epoch": 0.000227752685546875,
      "step": 37315,
      "training_step_time": 0.49196887016296387
    },
    {
      "epoch": 0.0002277587890625,
      "model_forward_time": 0.11461424827575684,
      "step": 37316
    },
    {
      "epoch": 0.0002277587890625,
      "step": 37316,
      "training_step_time": 0.39073944091796875
    },
    {
      "epoch": 0.000227764892578125,
      "model_forward_time": 0.11549520492553711,
      "step": 37317
    },
    {
      "epoch": 0.000227764892578125,
      "step": 37317,
      "training_step_time": 0.50589919090271
    },
    {
      "epoch": 0.00022777099609375,
      "model_forward_time": 0.11524772644042969,
      "step": 37318
    },
    {
      "epoch": 0.00022777099609375,
      "step": 37318,
      "training_step_time": 0.4721953868865967
    },
    {
      "epoch": 0.000227777099609375,
      "model_forward_time": 0.11529254913330078,
      "step": 37319
    },
    {
      "epoch": 0.000227777099609375,
      "step": 37319,
      "training_step_time": 0.46253013610839844
    },
    {
      "epoch": 0.000227783203125,
      "grad_norm": 0.10250180959701538,
      "learning_rate": 3.423498186038393e-05,
      "loss": 0.0342,
      "step": 37320
    },
    {
      "epoch": 0.000227783203125,
      "model_forward_time": 0.11456966400146484,
      "step": 37320
    },
    {
      "epoch": 0.000227783203125,
      "step": 37320,
      "training_step_time": 0.4846007823944092
    },
    {
      "epoch": 0.000227789306640625,
      "model_forward_time": 0.11537575721740723,
      "step": 37321
    },
    {
      "epoch": 0.000227789306640625,
      "step": 37321,
      "training_step_time": 0.39398789405822754
    },
    {
      "epoch": 0.00022779541015625,
      "model_forward_time": 0.11461949348449707,
      "step": 37322
    },
    {
      "epoch": 0.00022779541015625,
      "step": 37322,
      "training_step_time": 0.38338756561279297
    },
    {
      "epoch": 0.000227801513671875,
      "model_forward_time": 0.11495685577392578,
      "step": 37323
    },
    {
      "epoch": 0.000227801513671875,
      "step": 37323,
      "training_step_time": 0.3952946662902832
    },
    {
      "epoch": 0.0002278076171875,
      "model_forward_time": 0.11625814437866211,
      "step": 37324
    },
    {
      "epoch": 0.0002278076171875,
      "step": 37324,
      "training_step_time": 0.4012444019317627
    },
    {
      "epoch": 0.000227813720703125,
      "model_forward_time": 0.11527442932128906,
      "step": 37325
    },
    {
      "epoch": 0.000227813720703125,
      "step": 37325,
      "training_step_time": 0.39458322525024414
    },
    {
      "epoch": 0.00022781982421875,
      "model_forward_time": 0.11631321907043457,
      "step": 37326
    },
    {
      "epoch": 0.00022781982421875,
      "step": 37326,
      "training_step_time": 0.4012637138366699
    },
    {
      "epoch": 0.000227825927734375,
      "model_forward_time": 0.11554193496704102,
      "step": 37327
    },
    {
      "epoch": 0.000227825927734375,
      "step": 37327,
      "training_step_time": 0.3994762897491455
    },
    {
      "epoch": 0.00022783203125,
      "model_forward_time": 0.11553478240966797,
      "step": 37328
    },
    {
      "epoch": 0.00022783203125,
      "step": 37328,
      "training_step_time": 0.3891890048980713
    },
    {
      "epoch": 0.000227838134765625,
      "model_forward_time": 0.11622881889343262,
      "step": 37329
    },
    {
      "epoch": 0.000227838134765625,
      "step": 37329,
      "training_step_time": 0.3787808418273926
    },
    {
      "epoch": 0.00022784423828125,
      "grad_norm": 0.12330124527215958,
      "learning_rate": 3.420883209767697e-05,
      "loss": 0.0352,
      "step": 37330
    },
    {
      "epoch": 0.00022784423828125,
      "model_forward_time": 0.11535882949829102,
      "step": 37330
    },
    {
      "epoch": 0.00022784423828125,
      "step": 37330,
      "training_step_time": 0.39413905143737793
    },
    {
      "epoch": 0.000227850341796875,
      "model_forward_time": 0.11574602127075195,
      "step": 37331
    },
    {
      "epoch": 0.000227850341796875,
      "step": 37331,
      "training_step_time": 0.44972825050354004
    },
    {
      "epoch": 0.0002278564453125,
      "model_forward_time": 0.11484122276306152,
      "step": 37332
    },
    {
      "epoch": 0.0002278564453125,
      "step": 37332,
      "training_step_time": 0.44242000579833984
    },
    {
      "epoch": 0.000227862548828125,
      "model_forward_time": 0.11523842811584473,
      "step": 37333
    },
    {
      "epoch": 0.000227862548828125,
      "step": 37333,
      "training_step_time": 0.40636134147644043
    },
    {
      "epoch": 0.00022786865234375,
      "model_forward_time": 0.11488842964172363,
      "step": 37334
    },
    {
      "epoch": 0.00022786865234375,
      "step": 37334,
      "training_step_time": 0.45310163497924805
    },
    {
      "epoch": 0.000227874755859375,
      "model_forward_time": 0.11496353149414062,
      "step": 37335
    },
    {
      "epoch": 0.000227874755859375,
      "step": 37335,
      "training_step_time": 0.49356555938720703
    },
    {
      "epoch": 0.000227880859375,
      "model_forward_time": 0.11494827270507812,
      "step": 37336
    },
    {
      "epoch": 0.000227880859375,
      "step": 37336,
      "training_step_time": 0.39355969429016113
    },
    {
      "epoch": 0.000227886962890625,
      "model_forward_time": 0.1149752140045166,
      "step": 37337
    },
    {
      "epoch": 0.000227886962890625,
      "step": 37337,
      "training_step_time": 0.3923168182373047
    },
    {
      "epoch": 0.00022789306640625,
      "model_forward_time": 0.11506986618041992,
      "step": 37338
    },
    {
      "epoch": 0.00022789306640625,
      "step": 37338,
      "training_step_time": 0.3861510753631592
    },
    {
      "epoch": 0.000227899169921875,
      "model_forward_time": 0.1153109073638916,
      "step": 37339
    },
    {
      "epoch": 0.000227899169921875,
      "step": 37339,
      "training_step_time": 0.3975398540496826
    },
    {
      "epoch": 0.0002279052734375,
      "grad_norm": 0.11393376439809799,
      "learning_rate": 3.418268713190986e-05,
      "loss": 0.0328,
      "step": 37340
    },
    {
      "epoch": 0.0002279052734375,
      "model_forward_time": 0.11529135704040527,
      "step": 37340
    },
    {
      "epoch": 0.0002279052734375,
      "step": 37340,
      "training_step_time": 0.3857421875
    },
    {
      "epoch": 0.000227911376953125,
      "model_forward_time": 0.11530804634094238,
      "step": 37341
    },
    {
      "epoch": 0.000227911376953125,
      "step": 37341,
      "training_step_time": 0.38336968421936035
    },
    {
      "epoch": 0.00022791748046875,
      "model_forward_time": 0.11574816703796387,
      "step": 37342
    },
    {
      "epoch": 0.00022791748046875,
      "step": 37342,
      "training_step_time": 0.39916181564331055
    },
    {
      "epoch": 0.000227923583984375,
      "model_forward_time": 0.11525750160217285,
      "step": 37343
    },
    {
      "epoch": 0.000227923583984375,
      "step": 37343,
      "training_step_time": 0.4039649963378906
    },
    {
      "epoch": 0.0002279296875,
      "model_forward_time": 0.11539959907531738,
      "step": 37344
    },
    {
      "epoch": 0.0002279296875,
      "step": 37344,
      "training_step_time": 0.39919018745422363
    },
    {
      "epoch": 0.000227935791015625,
      "model_forward_time": 0.11551523208618164,
      "step": 37345
    },
    {
      "epoch": 0.000227935791015625,
      "step": 37345,
      "training_step_time": 0.413679838180542
    },
    {
      "epoch": 0.00022794189453125,
      "model_forward_time": 0.11535406112670898,
      "step": 37346
    },
    {
      "epoch": 0.00022794189453125,
      "step": 37346,
      "training_step_time": 0.44581127166748047
    },
    {
      "epoch": 0.000227947998046875,
      "model_forward_time": 0.11509966850280762,
      "step": 37347
    },
    {
      "epoch": 0.000227947998046875,
      "step": 37347,
      "training_step_time": 0.41394615173339844
    },
    {
      "epoch": 0.0002279541015625,
      "model_forward_time": 0.11525440216064453,
      "step": 37348
    },
    {
      "epoch": 0.0002279541015625,
      "step": 37348,
      "training_step_time": 0.4549384117126465
    },
    {
      "epoch": 0.000227960205078125,
      "model_forward_time": 0.11596369743347168,
      "step": 37349
    },
    {
      "epoch": 0.000227960205078125,
      "step": 37349,
      "training_step_time": 0.5071747303009033
    },
    {
      "epoch": 0.00022796630859375,
      "grad_norm": 0.1216658502817154,
      "learning_rate": 3.4156546971024784e-05,
      "loss": 0.0399,
      "step": 37350
    },
    {
      "epoch": 0.00022796630859375,
      "model_forward_time": 0.11586952209472656,
      "step": 37350
    },
    {
      "epoch": 0.00022796630859375,
      "step": 37350,
      "training_step_time": 0.4474060535430908
    },
    {
      "epoch": 0.000227972412109375,
      "model_forward_time": 0.1145620346069336,
      "step": 37351
    },
    {
      "epoch": 0.000227972412109375,
      "step": 37351,
      "training_step_time": 0.41856861114501953
    },
    {
      "epoch": 0.000227978515625,
      "model_forward_time": 0.11484217643737793,
      "step": 37352
    },
    {
      "epoch": 0.000227978515625,
      "step": 37352,
      "training_step_time": 0.389678955078125
    },
    {
      "epoch": 0.000227984619140625,
      "model_forward_time": 0.11494803428649902,
      "step": 37353
    },
    {
      "epoch": 0.000227984619140625,
      "step": 37353,
      "training_step_time": 0.3895690441131592
    },
    {
      "epoch": 0.00022799072265625,
      "model_forward_time": 0.11449050903320312,
      "step": 37354
    },
    {
      "epoch": 0.00022799072265625,
      "step": 37354,
      "training_step_time": 0.4191615581512451
    },
    {
      "epoch": 0.000227996826171875,
      "model_forward_time": 0.11542153358459473,
      "step": 37355
    },
    {
      "epoch": 0.000227996826171875,
      "step": 37355,
      "training_step_time": 0.3994452953338623
    },
    {
      "epoch": 0.0002280029296875,
      "model_forward_time": 0.11505937576293945,
      "step": 37356
    },
    {
      "epoch": 0.0002280029296875,
      "step": 37356,
      "training_step_time": 0.40373730659484863
    },
    {
      "epoch": 0.000228009033203125,
      "model_forward_time": 0.11500334739685059,
      "step": 37357
    },
    {
      "epoch": 0.000228009033203125,
      "step": 37357,
      "training_step_time": 0.40485596656799316
    },
    {
      "epoch": 0.00022801513671875,
      "model_forward_time": 0.11542582511901855,
      "step": 37358
    },
    {
      "epoch": 0.00022801513671875,
      "step": 37358,
      "training_step_time": 0.39728522300720215
    },
    {
      "epoch": 0.000228021240234375,
      "model_forward_time": 0.11499643325805664,
      "step": 37359
    },
    {
      "epoch": 0.000228021240234375,
      "step": 37359,
      "training_step_time": 0.39245104789733887
    },
    {
      "epoch": 0.00022802734375,
      "grad_norm": 0.12251472473144531,
      "learning_rate": 3.413041162296241e-05,
      "loss": 0.0396,
      "step": 37360
    },
    {
      "epoch": 0.00022802734375,
      "model_forward_time": 0.11529421806335449,
      "step": 37360
    },
    {
      "epoch": 0.00022802734375,
      "step": 37360,
      "training_step_time": 0.4773106575012207
    },
    {
      "epoch": 0.000228033447265625,
      "model_forward_time": 0.11506438255310059,
      "step": 37361
    },
    {
      "epoch": 0.000228033447265625,
      "step": 37361,
      "training_step_time": 0.42481398582458496
    },
    {
      "epoch": 0.00022803955078125,
      "model_forward_time": 0.1149301528930664,
      "step": 37362
    },
    {
      "epoch": 0.00022803955078125,
      "step": 37362,
      "training_step_time": 0.4904515743255615
    },
    {
      "epoch": 0.000228045654296875,
      "model_forward_time": 0.11496520042419434,
      "step": 37363
    },
    {
      "epoch": 0.000228045654296875,
      "step": 37363,
      "training_step_time": 0.44554567337036133
    },
    {
      "epoch": 0.0002280517578125,
      "model_forward_time": 0.11517071723937988,
      "step": 37364
    },
    {
      "epoch": 0.0002280517578125,
      "step": 37364,
      "training_step_time": 0.49296116828918457
    },
    {
      "epoch": 0.000228057861328125,
      "model_forward_time": 0.11426329612731934,
      "step": 37365
    },
    {
      "epoch": 0.000228057861328125,
      "step": 37365,
      "training_step_time": 0.38733553886413574
    },
    {
      "epoch": 0.00022806396484375,
      "model_forward_time": 0.11498665809631348,
      "step": 37366
    },
    {
      "epoch": 0.00022806396484375,
      "step": 37366,
      "training_step_time": 0.39076924324035645
    },
    {
      "epoch": 0.000228070068359375,
      "model_forward_time": 0.11481976509094238,
      "step": 37367
    },
    {
      "epoch": 0.000228070068359375,
      "step": 37367,
      "training_step_time": 0.38028860092163086
    },
    {
      "epoch": 0.000228076171875,
      "model_forward_time": 0.11527419090270996,
      "step": 37368
    },
    {
      "epoch": 0.000228076171875,
      "step": 37368,
      "training_step_time": 0.38861703872680664
    },
    {
      "epoch": 0.000228082275390625,
      "model_forward_time": 0.11493420600891113,
      "step": 37369
    },
    {
      "epoch": 0.000228082275390625,
      "step": 37369,
      "training_step_time": 0.39401674270629883
    },
    {
      "epoch": 0.00022808837890625,
      "grad_norm": 0.14980299770832062,
      "learning_rate": 3.410428109566198e-05,
      "loss": 0.039,
      "step": 37370
    },
    {
      "epoch": 0.00022808837890625,
      "model_forward_time": 0.11528158187866211,
      "step": 37370
    },
    {
      "epoch": 0.00022808837890625,
      "step": 37370,
      "training_step_time": 0.39810609817504883
    },
    {
      "epoch": 0.000228094482421875,
      "model_forward_time": 0.11430883407592773,
      "step": 37371
    },
    {
      "epoch": 0.000228094482421875,
      "step": 37371,
      "training_step_time": 0.39097094535827637
    },
    {
      "epoch": 0.0002281005859375,
      "model_forward_time": 0.11615347862243652,
      "step": 37372
    },
    {
      "epoch": 0.0002281005859375,
      "step": 37372,
      "training_step_time": 0.39609670639038086
    },
    {
      "epoch": 0.000228106689453125,
      "model_forward_time": 0.11559057235717773,
      "step": 37373
    },
    {
      "epoch": 0.000228106689453125,
      "step": 37373,
      "training_step_time": 0.3850228786468506
    },
    {
      "epoch": 0.00022811279296875,
      "model_forward_time": 0.11534452438354492,
      "step": 37374
    },
    {
      "epoch": 0.00022811279296875,
      "step": 37374,
      "training_step_time": 0.40295910835266113
    },
    {
      "epoch": 0.000228118896484375,
      "model_forward_time": 0.11505484580993652,
      "step": 37375
    },
    {
      "epoch": 0.000228118896484375,
      "step": 37375,
      "training_step_time": 0.40361523628234863
    },
    {
      "epoch": 0.000228125,
      "model_forward_time": 0.11532902717590332,
      "step": 37376
    },
    {
      "epoch": 0.000228125,
      "step": 37376,
      "training_step_time": 0.39431071281433105
    },
    {
      "epoch": 0.000228131103515625,
      "model_forward_time": 0.11480498313903809,
      "step": 37377
    },
    {
      "epoch": 0.000228131103515625,
      "step": 37377,
      "training_step_time": 0.4127388000488281
    },
    {
      "epoch": 0.00022813720703125,
      "model_forward_time": 0.11502504348754883,
      "step": 37378
    },
    {
      "epoch": 0.00022813720703125,
      "step": 37378,
      "training_step_time": 0.442096471786499
    },
    {
      "epoch": 0.000228143310546875,
      "model_forward_time": 0.1156919002532959,
      "step": 37379
    },
    {
      "epoch": 0.000228143310546875,
      "step": 37379,
      "training_step_time": 0.45923662185668945
    },
    {
      "epoch": 0.0002281494140625,
      "grad_norm": 0.11177075654268265,
      "learning_rate": 3.407815539706124e-05,
      "loss": 0.04,
      "step": 37380
    },
    {
      "epoch": 0.0002281494140625,
      "model_forward_time": 0.11509561538696289,
      "step": 37380
    },
    {
      "epoch": 0.0002281494140625,
      "step": 37380,
      "training_step_time": 0.3981773853302002
    },
    {
      "epoch": 0.000228155517578125,
      "model_forward_time": 0.11524605751037598,
      "step": 37381
    },
    {
      "epoch": 0.000228155517578125,
      "step": 37381,
      "training_step_time": 0.3926873207092285
    },
    {
      "epoch": 0.00022816162109375,
      "model_forward_time": 0.11544156074523926,
      "step": 37382
    },
    {
      "epoch": 0.00022816162109375,
      "step": 37382,
      "training_step_time": 0.38799500465393066
    },
    {
      "epoch": 0.000228167724609375,
      "model_forward_time": 0.11611795425415039,
      "step": 37383
    },
    {
      "epoch": 0.000228167724609375,
      "step": 37383,
      "training_step_time": 0.39518141746520996
    },
    {
      "epoch": 0.000228173828125,
      "model_forward_time": 0.1152031421661377,
      "step": 37384
    },
    {
      "epoch": 0.000228173828125,
      "step": 37384,
      "training_step_time": 0.3928537368774414
    },
    {
      "epoch": 0.000228179931640625,
      "model_forward_time": 0.11560583114624023,
      "step": 37385
    },
    {
      "epoch": 0.000228179931640625,
      "step": 37385,
      "training_step_time": 0.3968963623046875
    },
    {
      "epoch": 0.00022818603515625,
      "model_forward_time": 0.1152644157409668,
      "step": 37386
    },
    {
      "epoch": 0.00022818603515625,
      "step": 37386,
      "training_step_time": 0.3997964859008789
    },
    {
      "epoch": 0.000228192138671875,
      "model_forward_time": 0.11497783660888672,
      "step": 37387
    },
    {
      "epoch": 0.000228192138671875,
      "step": 37387,
      "training_step_time": 0.39324259757995605
    },
    {
      "epoch": 0.0002281982421875,
      "model_forward_time": 0.11595940589904785,
      "step": 37388
    },
    {
      "epoch": 0.0002281982421875,
      "step": 37388,
      "training_step_time": 0.4039900302886963
    },
    {
      "epoch": 0.000228204345703125,
      "model_forward_time": 0.11606907844543457,
      "step": 37389
    },
    {
      "epoch": 0.000228204345703125,
      "step": 37389,
      "training_step_time": 0.4129180908203125
    },
    {
      "epoch": 0.00022821044921875,
      "grad_norm": 0.13466259837150574,
      "learning_rate": 3.40520345350965e-05,
      "loss": 0.0356,
      "step": 37390
    },
    {
      "epoch": 0.00022821044921875,
      "model_forward_time": 0.11541414260864258,
      "step": 37390
    },
    {
      "epoch": 0.00022821044921875,
      "step": 37390,
      "training_step_time": 0.44681310653686523
    },
    {
      "epoch": 0.000228216552734375,
      "model_forward_time": 0.11503863334655762,
      "step": 37391
    },
    {
      "epoch": 0.000228216552734375,
      "step": 37391,
      "training_step_time": 0.5496182441711426
    },
    {
      "epoch": 0.00022822265625,
      "model_forward_time": 0.11576008796691895,
      "step": 37392
    },
    {
      "epoch": 0.00022822265625,
      "step": 37392,
      "training_step_time": 0.45415496826171875
    },
    {
      "epoch": 0.000228228759765625,
      "model_forward_time": 0.11545443534851074,
      "step": 37393
    },
    {
      "epoch": 0.000228228759765625,
      "step": 37393,
      "training_step_time": 0.4291081428527832
    },
    {
      "epoch": 0.00022823486328125,
      "model_forward_time": 0.11565804481506348,
      "step": 37394
    },
    {
      "epoch": 0.00022823486328125,
      "step": 37394,
      "training_step_time": 0.42328476905822754
    },
    {
      "epoch": 0.000228240966796875,
      "model_forward_time": 0.11453080177307129,
      "step": 37395
    },
    {
      "epoch": 0.000228240966796875,
      "step": 37395,
      "training_step_time": 0.3952486515045166
    },
    {
      "epoch": 0.0002282470703125,
      "model_forward_time": 0.11571860313415527,
      "step": 37396
    },
    {
      "epoch": 0.0002282470703125,
      "step": 37396,
      "training_step_time": 0.38624095916748047
    },
    {
      "epoch": 0.000228253173828125,
      "model_forward_time": 0.1146707534790039,
      "step": 37397
    },
    {
      "epoch": 0.000228253173828125,
      "step": 37397,
      "training_step_time": 0.5507895946502686
    },
    {
      "epoch": 0.00022825927734375,
      "model_forward_time": 0.11463379859924316,
      "step": 37398
    },
    {
      "epoch": 0.00022825927734375,
      "step": 37398,
      "training_step_time": 0.3907461166381836
    },
    {
      "epoch": 0.000228265380859375,
      "model_forward_time": 0.11434316635131836,
      "step": 37399
    },
    {
      "epoch": 0.000228265380859375,
      "step": 37399,
      "training_step_time": 0.3916890621185303
    },
    {
      "epoch": 0.000228271484375,
      "grad_norm": 0.21809855103492737,
      "learning_rate": 3.40259185177026e-05,
      "loss": 0.042,
      "step": 37400
    },
    {
      "epoch": 0.000228271484375,
      "model_forward_time": 0.11554098129272461,
      "step": 37400
    },
    {
      "epoch": 0.000228271484375,
      "step": 37400,
      "training_step_time": 0.39161157608032227
    },
    {
      "epoch": 0.000228277587890625,
      "model_forward_time": 0.11464118957519531,
      "step": 37401
    },
    {
      "epoch": 0.000228277587890625,
      "step": 37401,
      "training_step_time": 0.391660213470459
    },
    {
      "epoch": 0.00022828369140625,
      "model_forward_time": 0.11561894416809082,
      "step": 37402
    },
    {
      "epoch": 0.00022828369140625,
      "step": 37402,
      "training_step_time": 0.40018224716186523
    },
    {
      "epoch": 0.000228289794921875,
      "model_forward_time": 0.11539888381958008,
      "step": 37403
    },
    {
      "epoch": 0.000228289794921875,
      "step": 37403,
      "training_step_time": 0.6035017967224121
    },
    {
      "epoch": 0.0002282958984375,
      "model_forward_time": 0.11487483978271484,
      "step": 37404
    },
    {
      "epoch": 0.0002282958984375,
      "step": 37404,
      "training_step_time": 0.43625307083129883
    },
    {
      "epoch": 0.000228302001953125,
      "model_forward_time": 0.11462020874023438,
      "step": 37405
    },
    {
      "epoch": 0.000228302001953125,
      "step": 37405,
      "training_step_time": 0.42916083335876465
    },
    {
      "epoch": 0.00022830810546875,
      "model_forward_time": 0.11527824401855469,
      "step": 37406
    },
    {
      "epoch": 0.00022830810546875,
      "step": 37406,
      "training_step_time": 0.41863155364990234
    },
    {
      "epoch": 0.000228314208984375,
      "model_forward_time": 0.11513757705688477,
      "step": 37407
    },
    {
      "epoch": 0.000228314208984375,
      "step": 37407,
      "training_step_time": 0.3912844657897949
    },
    {
      "epoch": 0.0002283203125,
      "model_forward_time": 0.11466336250305176,
      "step": 37408
    },
    {
      "epoch": 0.0002283203125,
      "step": 37408,
      "training_step_time": 0.4248170852661133
    },
    {
      "epoch": 0.000228326416015625,
      "model_forward_time": 0.11498880386352539,
      "step": 37409
    },
    {
      "epoch": 0.000228326416015625,
      "step": 37409,
      "training_step_time": 0.38733625411987305
    },
    {
      "epoch": 0.00022833251953125,
      "grad_norm": 0.10288358479738235,
      "learning_rate": 3.399980735281286e-05,
      "loss": 0.0347,
      "step": 37410
    },
    {
      "epoch": 0.00022833251953125,
      "model_forward_time": 0.11559724807739258,
      "step": 37410
    },
    {
      "epoch": 0.00022833251953125,
      "step": 37410,
      "training_step_time": 0.3832690715789795
    },
    {
      "epoch": 0.000228338623046875,
      "model_forward_time": 0.11553573608398438,
      "step": 37411
    },
    {
      "epoch": 0.000228338623046875,
      "step": 37411,
      "training_step_time": 0.4005732536315918
    },
    {
      "epoch": 0.0002283447265625,
      "model_forward_time": 0.11529278755187988,
      "step": 37412
    },
    {
      "epoch": 0.0002283447265625,
      "step": 37412,
      "training_step_time": 0.37825965881347656
    },
    {
      "epoch": 0.000228350830078125,
      "model_forward_time": 0.11588907241821289,
      "step": 37413
    },
    {
      "epoch": 0.000228350830078125,
      "step": 37413,
      "training_step_time": 0.3967623710632324
    },
    {
      "epoch": 0.00022835693359375,
      "model_forward_time": 0.1168372631072998,
      "step": 37414
    },
    {
      "epoch": 0.00022835693359375,
      "step": 37414,
      "training_step_time": 0.3837149143218994
    },
    {
      "epoch": 0.000228363037109375,
      "model_forward_time": 0.11509370803833008,
      "step": 37415
    },
    {
      "epoch": 0.000228363037109375,
      "step": 37415,
      "training_step_time": 0.42655372619628906
    },
    {
      "epoch": 0.000228369140625,
      "model_forward_time": 0.11454486846923828,
      "step": 37416
    },
    {
      "epoch": 0.000228369140625,
      "step": 37416,
      "training_step_time": 0.4166853427886963
    },
    {
      "epoch": 0.000228375244140625,
      "model_forward_time": 0.11516880989074707,
      "step": 37417
    },
    {
      "epoch": 0.000228375244140625,
      "step": 37417,
      "training_step_time": 0.39330148696899414
    },
    {
      "epoch": 0.00022838134765625,
      "model_forward_time": 0.1160135269165039,
      "step": 37418
    },
    {
      "epoch": 0.00022838134765625,
      "step": 37418,
      "training_step_time": 0.44565677642822266
    },
    {
      "epoch": 0.000228387451171875,
      "model_forward_time": 0.11541891098022461,
      "step": 37419
    },
    {
      "epoch": 0.000228387451171875,
      "step": 37419,
      "training_step_time": 0.3834707736968994
    },
    {
      "epoch": 0.0002283935546875,
      "grad_norm": 0.2152787744998932,
      "learning_rate": 3.397370104835922e-05,
      "loss": 0.0402,
      "step": 37420
    },
    {
      "epoch": 0.0002283935546875,
      "model_forward_time": 0.11496877670288086,
      "step": 37420
    },
    {
      "epoch": 0.0002283935546875,
      "step": 37420,
      "training_step_time": 0.41475749015808105
    },
    {
      "epoch": 0.000228399658203125,
      "model_forward_time": 0.11558771133422852,
      "step": 37421
    },
    {
      "epoch": 0.000228399658203125,
      "step": 37421,
      "training_step_time": 0.44019389152526855
    },
    {
      "epoch": 0.00022840576171875,
      "model_forward_time": 0.11575865745544434,
      "step": 37422
    },
    {
      "epoch": 0.00022840576171875,
      "step": 37422,
      "training_step_time": 0.46883678436279297
    },
    {
      "epoch": 0.000228411865234375,
      "model_forward_time": 0.11498308181762695,
      "step": 37423
    },
    {
      "epoch": 0.000228411865234375,
      "step": 37423,
      "training_step_time": 0.418290376663208
    },
    {
      "epoch": 0.00022841796875,
      "model_forward_time": 0.11594748497009277,
      "step": 37424
    },
    {
      "epoch": 0.00022841796875,
      "step": 37424,
      "training_step_time": 0.40119099617004395
    },
    {
      "epoch": 0.000228424072265625,
      "model_forward_time": 0.11537504196166992,
      "step": 37425
    },
    {
      "epoch": 0.000228424072265625,
      "step": 37425,
      "training_step_time": 0.38756704330444336
    },
    {
      "epoch": 0.00022843017578125,
      "model_forward_time": 0.11547517776489258,
      "step": 37426
    },
    {
      "epoch": 0.00022843017578125,
      "step": 37426,
      "training_step_time": 0.39688825607299805
    },
    {
      "epoch": 0.000228436279296875,
      "model_forward_time": 0.11496496200561523,
      "step": 37427
    },
    {
      "epoch": 0.000228436279296875,
      "step": 37427,
      "training_step_time": 0.39206433296203613
    },
    {
      "epoch": 0.0002284423828125,
      "model_forward_time": 0.11462044715881348,
      "step": 37428
    },
    {
      "epoch": 0.0002284423828125,
      "step": 37428,
      "training_step_time": 0.39437150955200195
    },
    {
      "epoch": 0.000228448486328125,
      "model_forward_time": 0.11479306221008301,
      "step": 37429
    },
    {
      "epoch": 0.000228448486328125,
      "step": 37429,
      "training_step_time": 0.3943963050842285
    },
    {
      "epoch": 0.00022845458984375,
      "grad_norm": 0.1383461356163025,
      "learning_rate": 3.394759961227202e-05,
      "loss": 0.0396,
      "step": 37430
    },
    {
      "epoch": 0.00022845458984375,
      "model_forward_time": 0.11526775360107422,
      "step": 37430
    },
    {
      "epoch": 0.00022845458984375,
      "step": 37430,
      "training_step_time": 0.396897554397583
    },
    {
      "epoch": 0.000228460693359375,
      "model_forward_time": 0.11551880836486816,
      "step": 37431
    },
    {
      "epoch": 0.000228460693359375,
      "step": 37431,
      "training_step_time": 0.41346287727355957
    },
    {
      "epoch": 0.000228466796875,
      "model_forward_time": 0.1150064468383789,
      "step": 37432
    },
    {
      "epoch": 0.000228466796875,
      "step": 37432,
      "training_step_time": 0.39072275161743164
    },
    {
      "epoch": 0.000228472900390625,
      "model_forward_time": 0.1150355339050293,
      "step": 37433
    },
    {
      "epoch": 0.000228472900390625,
      "step": 37433,
      "training_step_time": 0.39871788024902344
    },
    {
      "epoch": 0.00022847900390625,
      "model_forward_time": 0.11529874801635742,
      "step": 37434
    },
    {
      "epoch": 0.00022847900390625,
      "step": 37434,
      "training_step_time": 0.3916621208190918
    },
    {
      "epoch": 0.000228485107421875,
      "model_forward_time": 0.11564087867736816,
      "step": 37435
    },
    {
      "epoch": 0.000228485107421875,
      "step": 37435,
      "training_step_time": 0.41512441635131836
    },
    {
      "epoch": 0.0002284912109375,
      "model_forward_time": 0.11549925804138184,
      "step": 37436
    },
    {
      "epoch": 0.0002284912109375,
      "step": 37436,
      "training_step_time": 0.42465972900390625
    },
    {
      "epoch": 0.000228497314453125,
      "model_forward_time": 0.1155853271484375,
      "step": 37437
    },
    {
      "epoch": 0.000228497314453125,
      "step": 37437,
      "training_step_time": 0.4434335231781006
    },
    {
      "epoch": 0.00022850341796875,
      "model_forward_time": 0.11490273475646973,
      "step": 37438
    },
    {
      "epoch": 0.00022850341796875,
      "step": 37438,
      "training_step_time": 0.42136478424072266
    },
    {
      "epoch": 0.000228509521484375,
      "model_forward_time": 0.11413812637329102,
      "step": 37439
    },
    {
      "epoch": 0.000228509521484375,
      "step": 37439,
      "training_step_time": 2.6785659790039062
    },
    {
      "epoch": 0.000228515625,
      "grad_norm": 0.11667484790086746,
      "learning_rate": 3.392150305248024e-05,
      "loss": 0.0363,
      "step": 37440
    },
    {
      "epoch": 0.000228515625,
      "model_forward_time": 0.11181187629699707,
      "step": 37440
    },
    {
      "epoch": 0.000228515625,
      "step": 37440,
      "training_step_time": 0.37578916549682617
    },
    {
      "epoch": 0.000228521728515625,
      "model_forward_time": 0.11228632926940918,
      "step": 37441
    },
    {
      "epoch": 0.000228521728515625,
      "step": 37441,
      "training_step_time": 0.37348437309265137
    },
    {
      "epoch": 0.00022852783203125,
      "model_forward_time": 0.11303353309631348,
      "step": 37442
    },
    {
      "epoch": 0.00022852783203125,
      "step": 37442,
      "training_step_time": 0.36885786056518555
    },
    {
      "epoch": 0.000228533935546875,
      "model_forward_time": 0.11376619338989258,
      "step": 37443
    },
    {
      "epoch": 0.000228533935546875,
      "step": 37443,
      "training_step_time": 0.388993501663208
    },
    {
      "epoch": 0.0002285400390625,
      "model_forward_time": 0.11435341835021973,
      "step": 37444
    },
    {
      "epoch": 0.0002285400390625,
      "step": 37444,
      "training_step_time": 0.4116554260253906
    },
    {
      "epoch": 0.000228546142578125,
      "model_forward_time": 0.11447787284851074,
      "step": 37445
    },
    {
      "epoch": 0.000228546142578125,
      "step": 37445,
      "training_step_time": 0.37820887565612793
    },
    {
      "epoch": 0.00022855224609375,
      "model_forward_time": 0.11384940147399902,
      "step": 37446
    },
    {
      "epoch": 0.00022855224609375,
      "step": 37446,
      "training_step_time": 0.3695371150970459
    },
    {
      "epoch": 0.000228558349609375,
      "model_forward_time": 0.11421680450439453,
      "step": 37447
    },
    {
      "epoch": 0.000228558349609375,
      "step": 37447,
      "training_step_time": 0.4848306179046631
    },
    {
      "epoch": 0.000228564453125,
      "model_forward_time": 0.11504340171813965,
      "step": 37448
    },
    {
      "epoch": 0.000228564453125,
      "step": 37448,
      "training_step_time": 0.47983264923095703
    },
    {
      "epoch": 0.000228570556640625,
      "model_forward_time": 0.11519718170166016,
      "step": 37449
    },
    {
      "epoch": 0.000228570556640625,
      "step": 37449,
      "training_step_time": 0.38938212394714355
    },
    {
      "epoch": 0.00022857666015625,
      "grad_norm": 0.13094377517700195,
      "learning_rate": 3.389541137691129e-05,
      "loss": 0.0357,
      "step": 37450
    },
    {
      "epoch": 0.00022857666015625,
      "model_forward_time": 0.11441731452941895,
      "step": 37450
    },
    {
      "epoch": 0.00022857666015625,
      "step": 37450,
      "training_step_time": 0.40705370903015137
    },
    {
      "epoch": 0.000228582763671875,
      "model_forward_time": 0.11446046829223633,
      "step": 37451
    },
    {
      "epoch": 0.000228582763671875,
      "step": 37451,
      "training_step_time": 0.3948543071746826
    },
    {
      "epoch": 0.0002285888671875,
      "model_forward_time": 0.11465787887573242,
      "step": 37452
    },
    {
      "epoch": 0.0002285888671875,
      "step": 37452,
      "training_step_time": 0.39490222930908203
    },
    {
      "epoch": 0.000228594970703125,
      "model_forward_time": 0.11467552185058594,
      "step": 37453
    },
    {
      "epoch": 0.000228594970703125,
      "step": 37453,
      "training_step_time": 0.4033782482147217
    },
    {
      "epoch": 0.00022860107421875,
      "model_forward_time": 0.11558341979980469,
      "step": 37454
    },
    {
      "epoch": 0.00022860107421875,
      "step": 37454,
      "training_step_time": 0.40909361839294434
    },
    {
      "epoch": 0.000228607177734375,
      "model_forward_time": 0.11519241333007812,
      "step": 37455
    },
    {
      "epoch": 0.000228607177734375,
      "step": 37455,
      "training_step_time": 0.38334202766418457
    },
    {
      "epoch": 0.00022861328125,
      "model_forward_time": 0.1154775619506836,
      "step": 37456
    },
    {
      "epoch": 0.00022861328125,
      "step": 37456,
      "training_step_time": 0.39052653312683105
    },
    {
      "epoch": 0.000228619384765625,
      "model_forward_time": 0.11540365219116211,
      "step": 37457
    },
    {
      "epoch": 0.000228619384765625,
      "step": 37457,
      "training_step_time": 0.4630308151245117
    },
    {
      "epoch": 0.00022862548828125,
      "model_forward_time": 0.11557316780090332,
      "step": 37458
    },
    {
      "epoch": 0.00022862548828125,
      "step": 37458,
      "training_step_time": 0.39933228492736816
    },
    {
      "epoch": 0.000228631591796875,
      "model_forward_time": 0.11500167846679688,
      "step": 37459
    },
    {
      "epoch": 0.000228631591796875,
      "step": 37459,
      "training_step_time": 0.44466376304626465
    },
    {
      "epoch": 0.0002286376953125,
      "grad_norm": 0.11664257198572159,
      "learning_rate": 3.386932459349114e-05,
      "loss": 0.0348,
      "step": 37460
    },
    {
      "epoch": 0.0002286376953125,
      "model_forward_time": 0.1147758960723877,
      "step": 37460
    },
    {
      "epoch": 0.0002286376953125,
      "step": 37460,
      "training_step_time": 0.4044363498687744
    },
    {
      "epoch": 0.000228643798828125,
      "model_forward_time": 0.11543560028076172,
      "step": 37461
    },
    {
      "epoch": 0.000228643798828125,
      "step": 37461,
      "training_step_time": 0.4471909999847412
    },
    {
      "epoch": 0.00022864990234375,
      "model_forward_time": 0.11520671844482422,
      "step": 37462
    },
    {
      "epoch": 0.00022864990234375,
      "step": 37462,
      "training_step_time": 0.43885135650634766
    },
    {
      "epoch": 0.000228656005859375,
      "model_forward_time": 0.11629915237426758,
      "step": 37463
    },
    {
      "epoch": 0.000228656005859375,
      "step": 37463,
      "training_step_time": 0.43016552925109863
    },
    {
      "epoch": 0.000228662109375,
      "model_forward_time": 0.11460232734680176,
      "step": 37464
    },
    {
      "epoch": 0.000228662109375,
      "step": 37464,
      "training_step_time": 0.3933382034301758
    },
    {
      "epoch": 0.000228668212890625,
      "model_forward_time": 0.11472177505493164,
      "step": 37465
    },
    {
      "epoch": 0.000228668212890625,
      "step": 37465,
      "training_step_time": 0.3984386920928955
    },
    {
      "epoch": 0.00022867431640625,
      "model_forward_time": 0.11491227149963379,
      "step": 37466
    },
    {
      "epoch": 0.00022867431640625,
      "step": 37466,
      "training_step_time": 0.39168286323547363
    },
    {
      "epoch": 0.000228680419921875,
      "model_forward_time": 0.11443877220153809,
      "step": 37467
    },
    {
      "epoch": 0.000228680419921875,
      "step": 37467,
      "training_step_time": 0.4104757308959961
    },
    {
      "epoch": 0.0002286865234375,
      "model_forward_time": 0.11507606506347656,
      "step": 37468
    },
    {
      "epoch": 0.0002286865234375,
      "step": 37468,
      "training_step_time": 0.40811967849731445
    },
    {
      "epoch": 0.000228692626953125,
      "model_forward_time": 0.11513137817382812,
      "step": 37469
    },
    {
      "epoch": 0.000228692626953125,
      "step": 37469,
      "training_step_time": 0.3872721195220947
    },
    {
      "epoch": 0.00022869873046875,
      "grad_norm": 0.13211894035339355,
      "learning_rate": 3.384324271014429e-05,
      "loss": 0.0427,
      "step": 37470
    },
    {
      "epoch": 0.00022869873046875,
      "model_forward_time": 0.11553430557250977,
      "step": 37470
    },
    {
      "epoch": 0.00022869873046875,
      "step": 37470,
      "training_step_time": 0.4027407169342041
    },
    {
      "epoch": 0.000228704833984375,
      "model_forward_time": 0.11529660224914551,
      "step": 37471
    },
    {
      "epoch": 0.000228704833984375,
      "step": 37471,
      "training_step_time": 0.40083980560302734
    },
    {
      "epoch": 0.0002287109375,
      "model_forward_time": 0.11497139930725098,
      "step": 37472
    },
    {
      "epoch": 0.0002287109375,
      "step": 37472,
      "training_step_time": 0.4110844135284424
    },
    {
      "epoch": 0.000228717041015625,
      "model_forward_time": 0.11506152153015137,
      "step": 37473
    },
    {
      "epoch": 0.000228717041015625,
      "step": 37473,
      "training_step_time": 0.4029405117034912
    },
    {
      "epoch": 0.00022872314453125,
      "model_forward_time": 0.11509990692138672,
      "step": 37474
    },
    {
      "epoch": 0.00022872314453125,
      "step": 37474,
      "training_step_time": 0.4147450923919678
    },
    {
      "epoch": 0.000228729248046875,
      "model_forward_time": 0.1153407096862793,
      "step": 37475
    },
    {
      "epoch": 0.000228729248046875,
      "step": 37475,
      "training_step_time": 0.4142467975616455
    },
    {
      "epoch": 0.0002287353515625,
      "model_forward_time": 0.11659860610961914,
      "step": 37476
    },
    {
      "epoch": 0.0002287353515625,
      "step": 37476,
      "training_step_time": 0.47591161727905273
    },
    {
      "epoch": 0.000228741455078125,
      "model_forward_time": 0.11522507667541504,
      "step": 37477
    },
    {
      "epoch": 0.000228741455078125,
      "step": 37477,
      "training_step_time": 0.4345846176147461
    },
    {
      "epoch": 0.00022874755859375,
      "model_forward_time": 0.11539959907531738,
      "step": 37478
    },
    {
      "epoch": 0.00022874755859375,
      "step": 37478,
      "training_step_time": 0.42946624755859375
    },
    {
      "epoch": 0.000228753662109375,
      "model_forward_time": 0.1151590347290039,
      "step": 37479
    },
    {
      "epoch": 0.000228753662109375,
      "step": 37479,
      "training_step_time": 0.4128882884979248
    },
    {
      "epoch": 0.000228759765625,
      "grad_norm": 0.07342521846294403,
      "learning_rate": 3.3817165734793705e-05,
      "loss": 0.0373,
      "step": 37480
    },
    {
      "epoch": 0.000228759765625,
      "model_forward_time": 0.11535048484802246,
      "step": 37480
    },
    {
      "epoch": 0.000228759765625,
      "step": 37480,
      "training_step_time": 0.3883190155029297
    },
    {
      "epoch": 0.000228765869140625,
      "model_forward_time": 0.11496257781982422,
      "step": 37481
    },
    {
      "epoch": 0.000228765869140625,
      "step": 37481,
      "training_step_time": 0.4327113628387451
    },
    {
      "epoch": 0.00022877197265625,
      "model_forward_time": 0.11482930183410645,
      "step": 37482
    },
    {
      "epoch": 0.00022877197265625,
      "step": 37482,
      "training_step_time": 0.4367043972015381
    },
    {
      "epoch": 0.000228778076171875,
      "model_forward_time": 0.11467576026916504,
      "step": 37483
    },
    {
      "epoch": 0.000228778076171875,
      "step": 37483,
      "training_step_time": 0.395782470703125
    },
    {
      "epoch": 0.0002287841796875,
      "model_forward_time": 0.11490321159362793,
      "step": 37484
    },
    {
      "epoch": 0.0002287841796875,
      "step": 37484,
      "training_step_time": 0.39412498474121094
    },
    {
      "epoch": 0.000228790283203125,
      "model_forward_time": 0.11512637138366699,
      "step": 37485
    },
    {
      "epoch": 0.000228790283203125,
      "step": 37485,
      "training_step_time": 0.38257932662963867
    },
    {
      "epoch": 0.00022879638671875,
      "model_forward_time": 0.11529350280761719,
      "step": 37486
    },
    {
      "epoch": 0.00022879638671875,
      "step": 37486,
      "training_step_time": 0.40895652770996094
    },
    {
      "epoch": 0.000228802490234375,
      "model_forward_time": 0.11551523208618164,
      "step": 37487
    },
    {
      "epoch": 0.000228802490234375,
      "step": 37487,
      "training_step_time": 0.4105653762817383
    },
    {
      "epoch": 0.00022880859375,
      "model_forward_time": 0.11560440063476562,
      "step": 37488
    },
    {
      "epoch": 0.00022880859375,
      "step": 37488,
      "training_step_time": 0.416168212890625
    },
    {
      "epoch": 0.000228814697265625,
      "model_forward_time": 0.11670708656311035,
      "step": 37489
    },
    {
      "epoch": 0.000228814697265625,
      "step": 37489,
      "training_step_time": 0.4598500728607178
    },
    {
      "epoch": 0.00022882080078125,
      "grad_norm": 0.1178349182009697,
      "learning_rate": 3.379109367536089e-05,
      "loss": 0.04,
      "step": 37490
    },
    {
      "epoch": 0.00022882080078125,
      "model_forward_time": 0.1166219711303711,
      "step": 37490
    },
    {
      "epoch": 0.00022882080078125,
      "step": 37490,
      "training_step_time": 0.5420882701873779
    },
    {
      "epoch": 0.000228826904296875,
      "model_forward_time": 0.11963295936584473,
      "step": 37491
    },
    {
      "epoch": 0.000228826904296875,
      "step": 37491,
      "training_step_time": 0.6834383010864258
    },
    {
      "epoch": 0.0002288330078125,
      "model_forward_time": 0.12016963958740234,
      "step": 37492
    },
    {
      "epoch": 0.0002288330078125,
      "step": 37492,
      "training_step_time": 0.6846506595611572
    },
    {
      "epoch": 0.000228839111328125,
      "model_forward_time": 0.11748504638671875,
      "step": 37493
    },
    {
      "epoch": 0.000228839111328125,
      "step": 37493,
      "training_step_time": 0.7365024089813232
    },
    {
      "epoch": 0.00022884521484375,
      "model_forward_time": 0.12065649032592773,
      "step": 37494
    },
    {
      "epoch": 0.00022884521484375,
      "step": 37494,
      "training_step_time": 0.7008256912231445
    },
    {
      "epoch": 0.000228851318359375,
      "model_forward_time": 0.1183779239654541,
      "step": 37495
    },
    {
      "epoch": 0.000228851318359375,
      "step": 37495,
      "training_step_time": 0.6999366283416748
    },
    {
      "epoch": 0.000228857421875,
      "model_forward_time": 0.11895322799682617,
      "step": 37496
    },
    {
      "epoch": 0.000228857421875,
      "step": 37496,
      "training_step_time": 0.6611487865447998
    },
    {
      "epoch": 0.000228863525390625,
      "model_forward_time": 0.11687731742858887,
      "step": 37497
    },
    {
      "epoch": 0.000228863525390625,
      "step": 37497,
      "training_step_time": 0.7707247734069824
    },
    {
      "epoch": 0.00022886962890625,
      "model_forward_time": 0.11863899230957031,
      "step": 37498
    },
    {
      "epoch": 0.00022886962890625,
      "step": 37498,
      "training_step_time": 0.6908962726593018
    },
    {
      "epoch": 0.000228875732421875,
      "model_forward_time": 0.11820697784423828,
      "step": 37499
    },
    {
      "epoch": 0.000228875732421875,
      "step": 37499,
      "training_step_time": 0.6350784301757812
    },
    {
      "epoch": 0.0002288818359375,
      "grad_norm": 0.1005997434258461,
      "learning_rate": 3.3765026539765834e-05,
      "loss": 0.0404,
      "step": 37500
    },
    {
      "epoch": 0.0002288818359375,
      "model_forward_time": 0.11884641647338867,
      "step": 37500
    },
    {
      "epoch": 0.0002288818359375,
      "step": 37500,
      "training_step_time": 0.7334692478179932
    },
    {
      "epoch": 0.000228887939453125,
      "model_forward_time": 0.1168060302734375,
      "step": 37501
    },
    {
      "epoch": 0.000228887939453125,
      "step": 37501,
      "training_step_time": 0.7044198513031006
    },
    {
      "epoch": 0.00022889404296875,
      "model_forward_time": 0.1201322078704834,
      "step": 37502
    },
    {
      "epoch": 0.00022889404296875,
      "step": 37502,
      "training_step_time": 0.6384983062744141
    },
    {
      "epoch": 0.000228900146484375,
      "model_forward_time": 0.12330436706542969,
      "step": 37503
    },
    {
      "epoch": 0.000228900146484375,
      "step": 37503,
      "training_step_time": 0.6991190910339355
    },
    {
      "epoch": 0.00022890625,
      "model_forward_time": 0.11905336380004883,
      "step": 37504
    },
    {
      "epoch": 0.00022890625,
      "step": 37504,
      "training_step_time": 0.6778781414031982
    },
    {
      "epoch": 0.000228912353515625,
      "model_forward_time": 0.11711525917053223,
      "step": 37505
    },
    {
      "epoch": 0.000228912353515625,
      "step": 37505,
      "training_step_time": 0.6473309993743896
    },
    {
      "epoch": 0.00022891845703125,
      "model_forward_time": 0.12602853775024414,
      "step": 37506
    },
    {
      "epoch": 0.00022891845703125,
      "step": 37506,
      "training_step_time": 0.731128454208374
    },
    {
      "epoch": 0.000228924560546875,
      "model_forward_time": 0.11831521987915039,
      "step": 37507
    },
    {
      "epoch": 0.000228924560546875,
      "step": 37507,
      "training_step_time": 0.6769895553588867
    },
    {
      "epoch": 0.0002289306640625,
      "model_forward_time": 0.11855196952819824,
      "step": 37508
    },
    {
      "epoch": 0.0002289306640625,
      "step": 37508,
      "training_step_time": 0.6413626670837402
    },
    {
      "epoch": 0.000228936767578125,
      "model_forward_time": 0.11667680740356445,
      "step": 37509
    },
    {
      "epoch": 0.000228936767578125,
      "step": 37509,
      "training_step_time": 0.6643173694610596
    },
    {
      "epoch": 0.00022894287109375,
      "grad_norm": 0.09897561371326447,
      "learning_rate": 3.373896433592705e-05,
      "loss": 0.0461,
      "step": 37510
    },
    {
      "epoch": 0.00022894287109375,
      "model_forward_time": 0.11967182159423828,
      "step": 37510
    },
    {
      "epoch": 0.00022894287109375,
      "step": 37510,
      "training_step_time": 0.6784892082214355
    },
    {
      "epoch": 0.000228948974609375,
      "model_forward_time": 0.12047028541564941,
      "step": 37511
    },
    {
      "epoch": 0.000228948974609375,
      "step": 37511,
      "training_step_time": 0.6675221920013428
    },
    {
      "epoch": 0.000228955078125,
      "model_forward_time": 0.11674284934997559,
      "step": 37512
    },
    {
      "epoch": 0.000228955078125,
      "step": 37512,
      "training_step_time": 0.6914186477661133
    },
    {
      "epoch": 0.000228961181640625,
      "model_forward_time": 0.11789274215698242,
      "step": 37513
    },
    {
      "epoch": 0.000228961181640625,
      "step": 37513,
      "training_step_time": 0.6570558547973633
    },
    {
      "epoch": 0.00022896728515625,
      "model_forward_time": 0.12941884994506836,
      "step": 37514
    },
    {
      "epoch": 0.00022896728515625,
      "step": 37514,
      "training_step_time": 0.6606688499450684
    },
    {
      "epoch": 0.000228973388671875,
      "model_forward_time": 0.12132143974304199,
      "step": 37515
    },
    {
      "epoch": 0.000228973388671875,
      "step": 37515,
      "training_step_time": 0.6778528690338135
    },
    {
      "epoch": 0.0002289794921875,
      "model_forward_time": 0.1206519603729248,
      "step": 37516
    },
    {
      "epoch": 0.0002289794921875,
      "step": 37516,
      "training_step_time": 0.6588134765625
    },
    {
      "epoch": 0.000228985595703125,
      "model_forward_time": 0.12017941474914551,
      "step": 37517
    },
    {
      "epoch": 0.000228985595703125,
      "step": 37517,
      "training_step_time": 0.6882705688476562
    },
    {
      "epoch": 0.00022899169921875,
      "model_forward_time": 0.12054944038391113,
      "step": 37518
    },
    {
      "epoch": 0.00022899169921875,
      "step": 37518,
      "training_step_time": 0.6722354888916016
    },
    {
      "epoch": 0.000228997802734375,
      "model_forward_time": 0.11892032623291016,
      "step": 37519
    },
    {
      "epoch": 0.000228997802734375,
      "step": 37519,
      "training_step_time": 0.6940288543701172
    },
    {
      "epoch": 0.00022900390625,
      "grad_norm": 0.11911989003419876,
      "learning_rate": 3.371290707176158e-05,
      "loss": 0.0435,
      "step": 37520
    },
    {
      "epoch": 0.00022900390625,
      "model_forward_time": 0.11895489692687988,
      "step": 37520
    },
    {
      "epoch": 0.00022900390625,
      "step": 37520,
      "training_step_time": 0.6628856658935547
    },
    {
      "epoch": 0.000229010009765625,
      "model_forward_time": 0.11841034889221191,
      "step": 37521
    },
    {
      "epoch": 0.000229010009765625,
      "step": 37521,
      "training_step_time": 0.6765000820159912
    },
    {
      "epoch": 0.00022901611328125,
      "model_forward_time": 0.11626291275024414,
      "step": 37522
    },
    {
      "epoch": 0.00022901611328125,
      "step": 37522,
      "training_step_time": 0.6533403396606445
    },
    {
      "epoch": 0.000229022216796875,
      "model_forward_time": 0.12277793884277344,
      "step": 37523
    },
    {
      "epoch": 0.000229022216796875,
      "step": 37523,
      "training_step_time": 0.6836638450622559
    },
    {
      "epoch": 0.0002290283203125,
      "model_forward_time": 0.13916444778442383,
      "step": 37524
    },
    {
      "epoch": 0.0002290283203125,
      "step": 37524,
      "training_step_time": 0.6460301876068115
    },
    {
      "epoch": 0.000229034423828125,
      "model_forward_time": 0.12130403518676758,
      "step": 37525
    },
    {
      "epoch": 0.000229034423828125,
      "step": 37525,
      "training_step_time": 0.7427351474761963
    },
    {
      "epoch": 0.00022904052734375,
      "model_forward_time": 0.12437796592712402,
      "step": 37526
    },
    {
      "epoch": 0.00022904052734375,
      "step": 37526,
      "training_step_time": 0.7585546970367432
    },
    {
      "epoch": 0.000229046630859375,
      "model_forward_time": 0.12457275390625,
      "step": 37527
    },
    {
      "epoch": 0.000229046630859375,
      "step": 37527,
      "training_step_time": 0.6739392280578613
    },
    {
      "epoch": 0.000229052734375,
      "model_forward_time": 0.11772012710571289,
      "step": 37528
    },
    {
      "epoch": 0.000229052734375,
      "step": 37528,
      "training_step_time": 0.7600114345550537
    },
    {
      "epoch": 0.000229058837890625,
      "model_forward_time": 0.12254643440246582,
      "step": 37529
    },
    {
      "epoch": 0.000229058837890625,
      "step": 37529,
      "training_step_time": 0.7519948482513428
    },
    {
      "epoch": 0.00022906494140625,
      "grad_norm": 0.12364600598812103,
      "learning_rate": 3.368685475518488e-05,
      "loss": 0.0437,
      "step": 37530
    },
    {
      "epoch": 0.00022906494140625,
      "model_forward_time": 0.11702156066894531,
      "step": 37530
    },
    {
      "epoch": 0.00022906494140625,
      "step": 37530,
      "training_step_time": 0.7940683364868164
    },
    {
      "epoch": 0.000229071044921875,
      "model_forward_time": 0.11712503433227539,
      "step": 37531
    },
    {
      "epoch": 0.000229071044921875,
      "step": 37531,
      "training_step_time": 0.6348650455474854
    },
    {
      "epoch": 0.0002290771484375,
      "model_forward_time": 0.11740708351135254,
      "step": 37532
    },
    {
      "epoch": 0.0002290771484375,
      "step": 37532,
      "training_step_time": 0.6306216716766357
    },
    {
      "epoch": 0.000229083251953125,
      "model_forward_time": 0.11892843246459961,
      "step": 37533
    },
    {
      "epoch": 0.000229083251953125,
      "step": 37533,
      "training_step_time": 0.6553335189819336
    },
    {
      "epoch": 0.00022908935546875,
      "model_forward_time": 0.11684203147888184,
      "step": 37534
    },
    {
      "epoch": 0.00022908935546875,
      "step": 37534,
      "training_step_time": 0.6865992546081543
    },
    {
      "epoch": 0.000229095458984375,
      "model_forward_time": 0.12003278732299805,
      "step": 37535
    },
    {
      "epoch": 0.000229095458984375,
      "step": 37535,
      "training_step_time": 0.7318630218505859
    },
    {
      "epoch": 0.0002291015625,
      "model_forward_time": 0.1187589168548584,
      "step": 37536
    },
    {
      "epoch": 0.0002291015625,
      "step": 37536,
      "training_step_time": 0.6652088165283203
    },
    {
      "epoch": 0.000229107666015625,
      "model_forward_time": 0.11655426025390625,
      "step": 37537
    },
    {
      "epoch": 0.000229107666015625,
      "step": 37537,
      "training_step_time": 0.550640344619751
    },
    {
      "epoch": 0.00022911376953125,
      "model_forward_time": 0.12449359893798828,
      "step": 37538
    },
    {
      "epoch": 0.00022911376953125,
      "step": 37538,
      "training_step_time": 0.7111408710479736
    },
    {
      "epoch": 0.000229119873046875,
      "model_forward_time": 0.11713171005249023,
      "step": 37539
    },
    {
      "epoch": 0.000229119873046875,
      "step": 37539,
      "training_step_time": 0.6663694381713867
    },
    {
      "epoch": 0.0002291259765625,
      "grad_norm": 0.10716302692890167,
      "learning_rate": 3.366080739411101e-05,
      "loss": 0.0431,
      "step": 37540
    },
    {
      "epoch": 0.0002291259765625,
      "model_forward_time": 0.11687660217285156,
      "step": 37540
    },
    {
      "epoch": 0.0002291259765625,
      "step": 37540,
      "training_step_time": 0.640850305557251
    },
    {
      "epoch": 0.000229132080078125,
      "model_forward_time": 0.11882710456848145,
      "step": 37541
    },
    {
      "epoch": 0.000229132080078125,
      "step": 37541,
      "training_step_time": 0.6285789012908936
    },
    {
      "epoch": 0.00022913818359375,
      "model_forward_time": 0.11737775802612305,
      "step": 37542
    },
    {
      "epoch": 0.00022913818359375,
      "step": 37542,
      "training_step_time": 0.6911799907684326
    },
    {
      "epoch": 0.000229144287109375,
      "model_forward_time": 0.12582755088806152,
      "step": 37543
    },
    {
      "epoch": 0.000229144287109375,
      "step": 37543,
      "training_step_time": 0.6778016090393066
    },
    {
      "epoch": 0.000229150390625,
      "model_forward_time": 0.12000703811645508,
      "step": 37544
    },
    {
      "epoch": 0.000229150390625,
      "step": 37544,
      "training_step_time": 0.758345365524292
    },
    {
      "epoch": 0.000229156494140625,
      "model_forward_time": 0.11908149719238281,
      "step": 37545
    },
    {
      "epoch": 0.000229156494140625,
      "step": 37545,
      "training_step_time": 0.6860251426696777
    },
    {
      "epoch": 0.00022916259765625,
      "model_forward_time": 0.11721348762512207,
      "step": 37546
    },
    {
      "epoch": 0.00022916259765625,
      "step": 37546,
      "training_step_time": 0.7021348476409912
    },
    {
      "epoch": 0.000229168701171875,
      "model_forward_time": 0.11938834190368652,
      "step": 37547
    },
    {
      "epoch": 0.000229168701171875,
      "step": 37547,
      "training_step_time": 0.6678807735443115
    },
    {
      "epoch": 0.0002291748046875,
      "model_forward_time": 0.1203618049621582,
      "step": 37548
    },
    {
      "epoch": 0.0002291748046875,
      "step": 37548,
      "training_step_time": 0.6889705657958984
    },
    {
      "epoch": 0.000229180908203125,
      "model_forward_time": 0.1190953254699707,
      "step": 37549
    },
    {
      "epoch": 0.000229180908203125,
      "step": 37549,
      "training_step_time": 0.676774263381958
    },
    {
      "epoch": 0.00022918701171875,
      "grad_norm": 0.08187562227249146,
      "learning_rate": 3.363476499645241e-05,
      "loss": 0.0435,
      "step": 37550
    },
    {
      "epoch": 0.00022918701171875,
      "model_forward_time": 0.11887121200561523,
      "step": 37550
    },
    {
      "epoch": 0.00022918701171875,
      "step": 37550,
      "training_step_time": 0.6483216285705566
    },
    {
      "epoch": 0.000229193115234375,
      "model_forward_time": 0.11704015731811523,
      "step": 37551
    },
    {
      "epoch": 0.000229193115234375,
      "step": 37551,
      "training_step_time": 0.6521878242492676
    },
    {
      "epoch": 0.00022919921875,
      "model_forward_time": 0.1272108554840088,
      "step": 37552
    },
    {
      "epoch": 0.00022919921875,
      "step": 37552,
      "training_step_time": 0.6766650676727295
    },
    {
      "epoch": 0.000229205322265625,
      "model_forward_time": 0.13157343864440918,
      "step": 37553
    },
    {
      "epoch": 0.000229205322265625,
      "step": 37553,
      "training_step_time": 0.6433892250061035
    },
    {
      "epoch": 0.00022921142578125,
      "model_forward_time": 0.11904215812683105,
      "step": 37554
    },
    {
      "epoch": 0.00022921142578125,
      "step": 37554,
      "training_step_time": 0.6614227294921875
    },
    {
      "epoch": 0.000229217529296875,
      "model_forward_time": 0.12952613830566406,
      "step": 37555
    },
    {
      "epoch": 0.000229217529296875,
      "step": 37555,
      "training_step_time": 0.6013345718383789
    },
    {
      "epoch": 0.0002292236328125,
      "model_forward_time": 0.1225748062133789,
      "step": 37556
    },
    {
      "epoch": 0.0002292236328125,
      "step": 37556,
      "training_step_time": 0.6952733993530273
    },
    {
      "epoch": 0.000229229736328125,
      "model_forward_time": 0.11828160285949707,
      "step": 37557
    },
    {
      "epoch": 0.000229229736328125,
      "step": 37557,
      "training_step_time": 0.7141678333282471
    },
    {
      "epoch": 0.00022923583984375,
      "model_forward_time": 0.1201474666595459,
      "step": 37558
    },
    {
      "epoch": 0.00022923583984375,
      "step": 37558,
      "training_step_time": 0.6415481567382812
    },
    {
      "epoch": 0.000229241943359375,
      "model_forward_time": 0.11823606491088867,
      "step": 37559
    },
    {
      "epoch": 0.000229241943359375,
      "step": 37559,
      "training_step_time": 0.49242424964904785
    },
    {
      "epoch": 0.000229248046875,
      "grad_norm": 0.09664426743984222,
      "learning_rate": 3.360872757012011e-05,
      "loss": 0.043,
      "step": 37560
    },
    {
      "epoch": 0.000229248046875,
      "model_forward_time": 0.11722612380981445,
      "step": 37560
    },
    {
      "epoch": 0.000229248046875,
      "step": 37560,
      "training_step_time": 0.46152210235595703
    },
    {
      "epoch": 0.000229254150390625,
      "model_forward_time": 0.11797547340393066,
      "step": 37561
    },
    {
      "epoch": 0.000229254150390625,
      "step": 37561,
      "training_step_time": 0.4602336883544922
    },
    {
      "epoch": 0.00022926025390625,
      "model_forward_time": 0.11817193031311035,
      "step": 37562
    },
    {
      "epoch": 0.00022926025390625,
      "step": 37562,
      "training_step_time": 0.442852258682251
    },
    {
      "epoch": 0.000229266357421875,
      "model_forward_time": 0.1176142692565918,
      "step": 37563
    },
    {
      "epoch": 0.000229266357421875,
      "step": 37563,
      "training_step_time": 0.43545007705688477
    },
    {
      "epoch": 0.0002292724609375,
      "model_forward_time": 0.11637759208679199,
      "step": 37564
    },
    {
      "epoch": 0.0002292724609375,
      "step": 37564,
      "training_step_time": 0.551978588104248
    },
    {
      "epoch": 0.000229278564453125,
      "model_forward_time": 0.11597108840942383,
      "step": 37565
    },
    {
      "epoch": 0.000229278564453125,
      "step": 37565,
      "training_step_time": 0.46411824226379395
    },
    {
      "epoch": 0.00022928466796875,
      "model_forward_time": 0.1165921688079834,
      "step": 37566
    },
    {
      "epoch": 0.00022928466796875,
      "step": 37566,
      "training_step_time": 0.43105435371398926
    },
    {
      "epoch": 0.000229290771484375,
      "model_forward_time": 0.11587667465209961,
      "step": 37567
    },
    {
      "epoch": 0.000229290771484375,
      "step": 37567,
      "training_step_time": 0.40430450439453125
    },
    {
      "epoch": 0.000229296875,
      "model_forward_time": 0.11639904975891113,
      "step": 37568
    },
    {
      "epoch": 0.000229296875,
      "step": 37568,
      "training_step_time": 0.39400267601013184
    },
    {
      "epoch": 0.000229302978515625,
      "model_forward_time": 0.11519813537597656,
      "step": 37569
    },
    {
      "epoch": 0.000229302978515625,
      "step": 37569,
      "training_step_time": 0.46696949005126953
    },
    {
      "epoch": 0.00022930908203125,
      "grad_norm": 0.10586734116077423,
      "learning_rate": 3.358269512302361e-05,
      "loss": 0.0455,
      "step": 37570
    },
    {
      "epoch": 0.00022930908203125,
      "model_forward_time": 0.11643719673156738,
      "step": 37570
    },
    {
      "epoch": 0.00022930908203125,
      "step": 37570,
      "training_step_time": 0.4170804023742676
    },
    {
      "epoch": 0.000229315185546875,
      "model_forward_time": 0.11521553993225098,
      "step": 37571
    },
    {
      "epoch": 0.000229315185546875,
      "step": 37571,
      "training_step_time": 0.4143960475921631
    },
    {
      "epoch": 0.0002293212890625,
      "model_forward_time": 0.11580944061279297,
      "step": 37572
    },
    {
      "epoch": 0.0002293212890625,
      "step": 37572,
      "training_step_time": 0.4353961944580078
    },
    {
      "epoch": 0.000229327392578125,
      "model_forward_time": 0.11620497703552246,
      "step": 37573
    },
    {
      "epoch": 0.000229327392578125,
      "step": 37573,
      "training_step_time": 0.398684024810791
    },
    {
      "epoch": 0.00022933349609375,
      "model_forward_time": 0.11538195610046387,
      "step": 37574
    },
    {
      "epoch": 0.00022933349609375,
      "step": 37574,
      "training_step_time": 0.38296008110046387
    },
    {
      "epoch": 0.000229339599609375,
      "model_forward_time": 0.1158447265625,
      "step": 37575
    },
    {
      "epoch": 0.000229339599609375,
      "step": 37575,
      "training_step_time": 0.3887014389038086
    },
    {
      "epoch": 0.000229345703125,
      "model_forward_time": 0.11538124084472656,
      "step": 37576
    },
    {
      "epoch": 0.000229345703125,
      "step": 37576,
      "training_step_time": 0.4027276039123535
    },
    {
      "epoch": 0.000229351806640625,
      "model_forward_time": 0.11556553840637207,
      "step": 37577
    },
    {
      "epoch": 0.000229351806640625,
      "step": 37577,
      "training_step_time": 0.3991997241973877
    },
    {
      "epoch": 0.00022935791015625,
      "model_forward_time": 0.11483097076416016,
      "step": 37578
    },
    {
      "epoch": 0.00022935791015625,
      "step": 37578,
      "training_step_time": 0.44040894508361816
    },
    {
      "epoch": 0.000229364013671875,
      "model_forward_time": 0.11589908599853516,
      "step": 37579
    },
    {
      "epoch": 0.000229364013671875,
      "step": 37579,
      "training_step_time": 0.4002676010131836
    },
    {
      "epoch": 0.0002293701171875,
      "grad_norm": 0.14188635349273682,
      "learning_rate": 3.355666766307084e-05,
      "loss": 0.0406,
      "step": 37580
    },
    {
      "epoch": 0.0002293701171875,
      "model_forward_time": 0.11601686477661133,
      "step": 37580
    },
    {
      "epoch": 0.0002293701171875,
      "step": 37580,
      "training_step_time": 0.39860987663269043
    },
    {
      "epoch": 0.000229376220703125,
      "model_forward_time": 0.11519861221313477,
      "step": 37581
    },
    {
      "epoch": 0.000229376220703125,
      "step": 37581,
      "training_step_time": 0.39319467544555664
    },
    {
      "epoch": 0.00022938232421875,
      "model_forward_time": 0.1152791976928711,
      "step": 37582
    },
    {
      "epoch": 0.00022938232421875,
      "step": 37582,
      "training_step_time": 0.4023916721343994
    },
    {
      "epoch": 0.000229388427734375,
      "model_forward_time": 0.11579394340515137,
      "step": 37583
    },
    {
      "epoch": 0.000229388427734375,
      "step": 37583,
      "training_step_time": 0.4003005027770996
    },
    {
      "epoch": 0.00022939453125,
      "model_forward_time": 0.11677956581115723,
      "step": 37584
    },
    {
      "epoch": 0.00022939453125,
      "step": 37584,
      "training_step_time": 0.408397912979126
    },
    {
      "epoch": 0.000229400634765625,
      "model_forward_time": 0.1152029037475586,
      "step": 37585
    },
    {
      "epoch": 0.000229400634765625,
      "step": 37585,
      "training_step_time": 0.37097668647766113
    },
    {
      "epoch": 0.00022940673828125,
      "model_forward_time": 0.11536026000976562,
      "step": 37586
    },
    {
      "epoch": 0.00022940673828125,
      "step": 37586,
      "training_step_time": 0.4748380184173584
    },
    {
      "epoch": 0.000229412841796875,
      "model_forward_time": 0.11592936515808105,
      "step": 37587
    },
    {
      "epoch": 0.000229412841796875,
      "step": 37587,
      "training_step_time": 0.4032618999481201
    },
    {
      "epoch": 0.0002294189453125,
      "model_forward_time": 0.1156759262084961,
      "step": 37588
    },
    {
      "epoch": 0.0002294189453125,
      "step": 37588,
      "training_step_time": 0.38987278938293457
    },
    {
      "epoch": 0.000229425048828125,
      "model_forward_time": 0.11551713943481445,
      "step": 37589
    },
    {
      "epoch": 0.000229425048828125,
      "step": 37589,
      "training_step_time": 0.3991267681121826
    },
    {
      "epoch": 0.00022943115234375,
      "grad_norm": 0.12345968186855316,
      "learning_rate": 3.3530645198168295e-05,
      "loss": 0.0458,
      "step": 37590
    },
    {
      "epoch": 0.00022943115234375,
      "model_forward_time": 0.11520743370056152,
      "step": 37590
    },
    {
      "epoch": 0.00022943115234375,
      "step": 37590,
      "training_step_time": 0.39987969398498535
    },
    {
      "epoch": 0.000229437255859375,
      "model_forward_time": 0.11477828025817871,
      "step": 37591
    },
    {
      "epoch": 0.000229437255859375,
      "step": 37591,
      "training_step_time": 0.39273524284362793
    },
    {
      "epoch": 0.000229443359375,
      "model_forward_time": 0.11572599411010742,
      "step": 37592
    },
    {
      "epoch": 0.000229443359375,
      "step": 37592,
      "training_step_time": 0.3878617286682129
    },
    {
      "epoch": 0.000229449462890625,
      "model_forward_time": 0.1151587963104248,
      "step": 37593
    },
    {
      "epoch": 0.000229449462890625,
      "step": 37593,
      "training_step_time": 0.40669679641723633
    },
    {
      "epoch": 0.00022945556640625,
      "model_forward_time": 0.11590576171875,
      "step": 37594
    },
    {
      "epoch": 0.00022945556640625,
      "step": 37594,
      "training_step_time": 0.4935932159423828
    },
    {
      "epoch": 0.000229461669921875,
      "model_forward_time": 0.11480498313903809,
      "step": 37595
    },
    {
      "epoch": 0.000229461669921875,
      "step": 37595,
      "training_step_time": 0.49895787239074707
    },
    {
      "epoch": 0.0002294677734375,
      "model_forward_time": 0.11649537086486816,
      "step": 37596
    },
    {
      "epoch": 0.0002294677734375,
      "step": 37596,
      "training_step_time": 0.402080774307251
    },
    {
      "epoch": 0.000229473876953125,
      "model_forward_time": 0.11635041236877441,
      "step": 37597
    },
    {
      "epoch": 0.000229473876953125,
      "step": 37597,
      "training_step_time": 0.38692498207092285
    },
    {
      "epoch": 0.00022947998046875,
      "model_forward_time": 0.11551403999328613,
      "step": 37598
    },
    {
      "epoch": 0.00022947998046875,
      "step": 37598,
      "training_step_time": 0.3916435241699219
    },
    {
      "epoch": 0.000229486083984375,
      "model_forward_time": 0.11504459381103516,
      "step": 37599
    },
    {
      "epoch": 0.000229486083984375,
      "step": 37599,
      "training_step_time": 0.42322731018066406
    },
    {
      "epoch": 0.0002294921875,
      "grad_norm": 0.1218622475862503,
      "learning_rate": 3.350462773622086e-05,
      "loss": 0.0466,
      "step": 37600
    },
    {
      "epoch": 0.0002294921875,
      "model_forward_time": 0.11482596397399902,
      "step": 37600
    },
    {
      "epoch": 0.0002294921875,
      "step": 37600,
      "training_step_time": 0.39185285568237305
    },
    {
      "epoch": 0.000229498291015625,
      "model_forward_time": 0.11642575263977051,
      "step": 37601
    },
    {
      "epoch": 0.000229498291015625,
      "step": 37601,
      "training_step_time": 0.48755669593811035
    },
    {
      "epoch": 0.00022950439453125,
      "model_forward_time": 0.11594796180725098,
      "step": 37602
    },
    {
      "epoch": 0.00022950439453125,
      "step": 37602,
      "training_step_time": 0.4431314468383789
    },
    {
      "epoch": 0.000229510498046875,
      "model_forward_time": 0.11582779884338379,
      "step": 37603
    },
    {
      "epoch": 0.000229510498046875,
      "step": 37603,
      "training_step_time": 0.39443039894104004
    },
    {
      "epoch": 0.0002295166015625,
      "model_forward_time": 0.11508011817932129,
      "step": 37604
    },
    {
      "epoch": 0.0002295166015625,
      "step": 37604,
      "training_step_time": 0.40340733528137207
    },
    {
      "epoch": 0.000229522705078125,
      "model_forward_time": 0.11514472961425781,
      "step": 37605
    },
    {
      "epoch": 0.000229522705078125,
      "step": 37605,
      "training_step_time": 0.4337139129638672
    },
    {
      "epoch": 0.00022952880859375,
      "model_forward_time": 0.11537766456604004,
      "step": 37606
    },
    {
      "epoch": 0.00022952880859375,
      "step": 37606,
      "training_step_time": 0.3907325267791748
    },
    {
      "epoch": 0.000229534912109375,
      "model_forward_time": 0.11571955680847168,
      "step": 37607
    },
    {
      "epoch": 0.000229534912109375,
      "step": 37607,
      "training_step_time": 0.41406965255737305
    },
    {
      "epoch": 0.000229541015625,
      "model_forward_time": 0.11511850357055664,
      "step": 37608
    },
    {
      "epoch": 0.000229541015625,
      "step": 37608,
      "training_step_time": 0.43073582649230957
    },
    {
      "epoch": 0.000229547119140625,
      "model_forward_time": 0.11531376838684082,
      "step": 37609
    },
    {
      "epoch": 0.000229547119140625,
      "step": 37609,
      "training_step_time": 0.41864800453186035
    },
    {
      "epoch": 0.00022955322265625,
      "grad_norm": 0.135413259267807,
      "learning_rate": 3.3478615285131994e-05,
      "loss": 0.0425,
      "step": 37610
    },
    {
      "epoch": 0.00022955322265625,
      "model_forward_time": 0.11565470695495605,
      "step": 37610
    },
    {
      "epoch": 0.00022955322265625,
      "step": 37610,
      "training_step_time": 0.4921903610229492
    },
    {
      "epoch": 0.000229559326171875,
      "model_forward_time": 0.11530876159667969,
      "step": 37611
    },
    {
      "epoch": 0.000229559326171875,
      "step": 37611,
      "training_step_time": 0.3899421691894531
    },
    {
      "epoch": 0.0002295654296875,
      "model_forward_time": 0.11782526969909668,
      "step": 37612
    },
    {
      "epoch": 0.0002295654296875,
      "step": 37612,
      "training_step_time": 0.39280271530151367
    },
    {
      "epoch": 0.000229571533203125,
      "model_forward_time": 0.1149752140045166,
      "step": 37613
    },
    {
      "epoch": 0.000229571533203125,
      "step": 37613,
      "training_step_time": 0.39676928520202637
    },
    {
      "epoch": 0.00022957763671875,
      "model_forward_time": 0.11508822441101074,
      "step": 37614
    },
    {
      "epoch": 0.00022957763671875,
      "step": 37614,
      "training_step_time": 0.3661634922027588
    },
    {
      "epoch": 0.000229583740234375,
      "model_forward_time": 0.11522150039672852,
      "step": 37615
    },
    {
      "epoch": 0.000229583740234375,
      "step": 37615,
      "training_step_time": 0.4532647132873535
    },
    {
      "epoch": 0.00022958984375,
      "model_forward_time": 0.11539506912231445,
      "step": 37616
    },
    {
      "epoch": 0.00022958984375,
      "step": 37616,
      "training_step_time": 0.4323291778564453
    },
    {
      "epoch": 0.000229595947265625,
      "model_forward_time": 0.11527562141418457,
      "step": 37617
    },
    {
      "epoch": 0.000229595947265625,
      "step": 37617,
      "training_step_time": 0.3789994716644287
    },
    {
      "epoch": 0.00022960205078125,
      "model_forward_time": 0.1149296760559082,
      "step": 37618
    },
    {
      "epoch": 0.00022960205078125,
      "step": 37618,
      "training_step_time": 0.38875722885131836
    },
    {
      "epoch": 0.000229608154296875,
      "model_forward_time": 0.1153249740600586,
      "step": 37619
    },
    {
      "epoch": 0.000229608154296875,
      "step": 37619,
      "training_step_time": 0.46146655082702637
    },
    {
      "epoch": 0.0002296142578125,
      "grad_norm": 0.10977182537317276,
      "learning_rate": 3.3452607852803584e-05,
      "loss": 0.04,
      "step": 37620
    },
    {
      "epoch": 0.0002296142578125,
      "model_forward_time": 0.11483240127563477,
      "step": 37620
    },
    {
      "epoch": 0.0002296142578125,
      "step": 37620,
      "training_step_time": 0.4039280414581299
    },
    {
      "epoch": 0.000229620361328125,
      "model_forward_time": 0.11548924446105957,
      "step": 37621
    },
    {
      "epoch": 0.000229620361328125,
      "step": 37621,
      "training_step_time": 0.3864414691925049
    },
    {
      "epoch": 0.00022962646484375,
      "model_forward_time": 0.11504483222961426,
      "step": 37622
    },
    {
      "epoch": 0.00022962646484375,
      "step": 37622,
      "training_step_time": 0.4242818355560303
    },
    {
      "epoch": 0.000229632568359375,
      "model_forward_time": 0.11531567573547363,
      "step": 37623
    },
    {
      "epoch": 0.000229632568359375,
      "step": 37623,
      "training_step_time": 0.39383792877197266
    },
    {
      "epoch": 0.000229638671875,
      "model_forward_time": 0.1147458553314209,
      "step": 37624
    },
    {
      "epoch": 0.000229638671875,
      "step": 37624,
      "training_step_time": 0.44262218475341797
    },
    {
      "epoch": 0.000229644775390625,
      "model_forward_time": 0.1150672435760498,
      "step": 37625
    },
    {
      "epoch": 0.000229644775390625,
      "step": 37625,
      "training_step_time": 0.3942759037017822
    },
    {
      "epoch": 0.00022965087890625,
      "model_forward_time": 0.11513924598693848,
      "step": 37626
    },
    {
      "epoch": 0.00022965087890625,
      "step": 37626,
      "training_step_time": 0.39794397354125977
    },
    {
      "epoch": 0.000229656982421875,
      "model_forward_time": 0.11502385139465332,
      "step": 37627
    },
    {
      "epoch": 0.000229656982421875,
      "step": 37627,
      "training_step_time": 0.388638973236084
    },
    {
      "epoch": 0.0002296630859375,
      "model_forward_time": 0.11511683464050293,
      "step": 37628
    },
    {
      "epoch": 0.0002296630859375,
      "step": 37628,
      "training_step_time": 0.3966662883758545
    },
    {
      "epoch": 0.000229669189453125,
      "model_forward_time": 0.11576700210571289,
      "step": 37629
    },
    {
      "epoch": 0.000229669189453125,
      "step": 37629,
      "training_step_time": 0.4284934997558594
    },
    {
      "epoch": 0.00022967529296875,
      "grad_norm": 0.12173211574554443,
      "learning_rate": 3.3426605447136004e-05,
      "loss": 0.0404,
      "step": 37630
    },
    {
      "epoch": 0.00022967529296875,
      "model_forward_time": 0.11574411392211914,
      "step": 37630
    },
    {
      "epoch": 0.00022967529296875,
      "step": 37630,
      "training_step_time": 0.3958549499511719
    },
    {
      "epoch": 0.000229681396484375,
      "model_forward_time": 0.11543703079223633,
      "step": 37631
    },
    {
      "epoch": 0.000229681396484375,
      "step": 37631,
      "training_step_time": 0.4200165271759033
    },
    {
      "epoch": 0.0002296875,
      "model_forward_time": 0.11449790000915527,
      "step": 37632
    },
    {
      "epoch": 0.0002296875,
      "step": 37632,
      "training_step_time": 0.4005002975463867
    },
    {
      "epoch": 0.000229693603515625,
      "model_forward_time": 0.11525845527648926,
      "step": 37633
    },
    {
      "epoch": 0.000229693603515625,
      "step": 37633,
      "training_step_time": 0.44958949089050293
    },
    {
      "epoch": 0.00022969970703125,
      "model_forward_time": 0.11544537544250488,
      "step": 37634
    },
    {
      "epoch": 0.00022969970703125,
      "step": 37634,
      "training_step_time": 0.4223001003265381
    },
    {
      "epoch": 0.000229705810546875,
      "model_forward_time": 0.11539816856384277,
      "step": 37635
    },
    {
      "epoch": 0.000229705810546875,
      "step": 37635,
      "training_step_time": 0.3776512145996094
    },
    {
      "epoch": 0.0002297119140625,
      "model_forward_time": 0.1157827377319336,
      "step": 37636
    },
    {
      "epoch": 0.0002297119140625,
      "step": 37636,
      "training_step_time": 0.40041661262512207
    },
    {
      "epoch": 0.000229718017578125,
      "model_forward_time": 0.11527442932128906,
      "step": 37637
    },
    {
      "epoch": 0.000229718017578125,
      "step": 37637,
      "training_step_time": 0.49616360664367676
    },
    {
      "epoch": 0.00022972412109375,
      "model_forward_time": 0.11475110054016113,
      "step": 37638
    },
    {
      "epoch": 0.00022972412109375,
      "step": 37638,
      "training_step_time": 0.3953735828399658
    },
    {
      "epoch": 0.000229730224609375,
      "model_forward_time": 0.11557364463806152,
      "step": 37639
    },
    {
      "epoch": 0.000229730224609375,
      "step": 37639,
      "training_step_time": 0.3969705104827881
    },
    {
      "epoch": 0.000229736328125,
      "grad_norm": 0.14237183332443237,
      "learning_rate": 3.3400608076028094e-05,
      "loss": 0.0479,
      "step": 37640
    },
    {
      "epoch": 0.000229736328125,
      "model_forward_time": 0.11540699005126953,
      "step": 37640
    },
    {
      "epoch": 0.000229736328125,
      "step": 37640,
      "training_step_time": 0.3875548839569092
    },
    {
      "epoch": 0.000229742431640625,
      "model_forward_time": 0.11596989631652832,
      "step": 37641
    },
    {
      "epoch": 0.000229742431640625,
      "step": 37641,
      "training_step_time": 0.3907935619354248
    },
    {
      "epoch": 0.00022974853515625,
      "model_forward_time": 0.11482596397399902,
      "step": 37642
    },
    {
      "epoch": 0.00022974853515625,
      "step": 37642,
      "training_step_time": 0.40312957763671875
    },
    {
      "epoch": 0.000229754638671875,
      "model_forward_time": 0.11582231521606445,
      "step": 37643
    },
    {
      "epoch": 0.000229754638671875,
      "step": 37643,
      "training_step_time": 0.4002978801727295
    },
    {
      "epoch": 0.0002297607421875,
      "model_forward_time": 0.11521363258361816,
      "step": 37644
    },
    {
      "epoch": 0.0002297607421875,
      "step": 37644,
      "training_step_time": 0.44206929206848145
    },
    {
      "epoch": 0.000229766845703125,
      "model_forward_time": 0.11565470695495605,
      "step": 37645
    },
    {
      "epoch": 0.000229766845703125,
      "step": 37645,
      "training_step_time": 0.5060060024261475
    },
    {
      "epoch": 0.00022977294921875,
      "model_forward_time": 0.1156315803527832,
      "step": 37646
    },
    {
      "epoch": 0.00022977294921875,
      "step": 37646,
      "training_step_time": 0.4297444820404053
    },
    {
      "epoch": 0.000229779052734375,
      "model_forward_time": 0.11552906036376953,
      "step": 37647
    },
    {
      "epoch": 0.000229779052734375,
      "step": 37647,
      "training_step_time": 0.42770862579345703
    },
    {
      "epoch": 0.00022978515625,
      "model_forward_time": 0.1148672103881836,
      "step": 37648
    },
    {
      "epoch": 0.00022978515625,
      "step": 37648,
      "training_step_time": 0.4745807647705078
    },
    {
      "epoch": 0.000229791259765625,
      "model_forward_time": 0.11471390724182129,
      "step": 37649
    },
    {
      "epoch": 0.000229791259765625,
      "step": 37649,
      "training_step_time": 0.38951921463012695
    },
    {
      "epoch": 0.00022979736328125,
      "grad_norm": 0.16262555122375488,
      "learning_rate": 3.337461574737716e-05,
      "loss": 0.0449,
      "step": 37650
    },
    {
      "epoch": 0.00022979736328125,
      "model_forward_time": 0.11501288414001465,
      "step": 37650
    },
    {
      "epoch": 0.00022979736328125,
      "step": 37650,
      "training_step_time": 0.39541053771972656
    },
    {
      "epoch": 0.000229803466796875,
      "model_forward_time": 0.11525821685791016,
      "step": 37651
    },
    {
      "epoch": 0.000229803466796875,
      "step": 37651,
      "training_step_time": 0.5172514915466309
    },
    {
      "epoch": 0.0002298095703125,
      "model_forward_time": 0.11474466323852539,
      "step": 37652
    },
    {
      "epoch": 0.0002298095703125,
      "step": 37652,
      "training_step_time": 0.4152686595916748
    },
    {
      "epoch": 0.000229815673828125,
      "model_forward_time": 0.11484050750732422,
      "step": 37653
    },
    {
      "epoch": 0.000229815673828125,
      "step": 37653,
      "training_step_time": 0.4790356159210205
    },
    {
      "epoch": 0.00022982177734375,
      "model_forward_time": 0.11482763290405273,
      "step": 37654
    },
    {
      "epoch": 0.00022982177734375,
      "step": 37654,
      "training_step_time": 0.4012928009033203
    },
    {
      "epoch": 0.000229827880859375,
      "model_forward_time": 0.11507415771484375,
      "step": 37655
    },
    {
      "epoch": 0.000229827880859375,
      "step": 37655,
      "training_step_time": 0.39293813705444336
    },
    {
      "epoch": 0.000229833984375,
      "model_forward_time": 0.11497926712036133,
      "step": 37656
    },
    {
      "epoch": 0.000229833984375,
      "step": 37656,
      "training_step_time": 0.38752102851867676
    },
    {
      "epoch": 0.000229840087890625,
      "model_forward_time": 0.11505818367004395,
      "step": 37657
    },
    {
      "epoch": 0.000229840087890625,
      "step": 37657,
      "training_step_time": 0.38767290115356445
    },
    {
      "epoch": 0.00022984619140625,
      "model_forward_time": 0.11618852615356445,
      "step": 37658
    },
    {
      "epoch": 0.00022984619140625,
      "step": 37658,
      "training_step_time": 0.39955830574035645
    },
    {
      "epoch": 0.000229852294921875,
      "model_forward_time": 0.11539673805236816,
      "step": 37659
    },
    {
      "epoch": 0.000229852294921875,
      "step": 37659,
      "training_step_time": 0.5033383369445801
    },
    {
      "epoch": 0.0002298583984375,
      "grad_norm": 0.0992860347032547,
      "learning_rate": 3.3348628469079e-05,
      "loss": 0.0416,
      "step": 37660
    },
    {
      "epoch": 0.0002298583984375,
      "model_forward_time": 0.11431527137756348,
      "step": 37660
    },
    {
      "epoch": 0.0002298583984375,
      "step": 37660,
      "training_step_time": 0.4925079345703125
    },
    {
      "epoch": 0.000229864501953125,
      "model_forward_time": 0.11503148078918457,
      "step": 37661
    },
    {
      "epoch": 0.000229864501953125,
      "step": 37661,
      "training_step_time": 0.4208104610443115
    },
    {
      "epoch": 0.00022987060546875,
      "model_forward_time": 0.11510109901428223,
      "step": 37662
    },
    {
      "epoch": 0.00022987060546875,
      "step": 37662,
      "training_step_time": 0.387662410736084
    },
    {
      "epoch": 0.000229876708984375,
      "model_forward_time": 0.12370514869689941,
      "step": 37663
    },
    {
      "epoch": 0.000229876708984375,
      "step": 37663,
      "training_step_time": 0.3881223201751709
    },
    {
      "epoch": 0.0002298828125,
      "model_forward_time": 0.11509275436401367,
      "step": 37664
    },
    {
      "epoch": 0.0002298828125,
      "step": 37664,
      "training_step_time": 0.3950064182281494
    },
    {
      "epoch": 0.000229888916015625,
      "model_forward_time": 0.1147921085357666,
      "step": 37665
    },
    {
      "epoch": 0.000229888916015625,
      "step": 37665,
      "training_step_time": 0.39676976203918457
    },
    {
      "epoch": 0.00022989501953125,
      "model_forward_time": 0.11507177352905273,
      "step": 37666
    },
    {
      "epoch": 0.00022989501953125,
      "step": 37666,
      "training_step_time": 0.43848180770874023
    },
    {
      "epoch": 0.000229901123046875,
      "model_forward_time": 0.1148977279663086,
      "step": 37667
    },
    {
      "epoch": 0.000229901123046875,
      "step": 37667,
      "training_step_time": 0.4207625389099121
    },
    {
      "epoch": 0.0002299072265625,
      "model_forward_time": 0.11497950553894043,
      "step": 37668
    },
    {
      "epoch": 0.0002299072265625,
      "step": 37668,
      "training_step_time": 0.48935699462890625
    },
    {
      "epoch": 0.000229913330078125,
      "model_forward_time": 0.11537694931030273,
      "step": 37669
    },
    {
      "epoch": 0.000229913330078125,
      "step": 37669,
      "training_step_time": 0.3899402618408203
    },
    {
      "epoch": 0.00022991943359375,
      "grad_norm": 0.1485721319913864,
      "learning_rate": 3.332264624902787e-05,
      "loss": 0.044,
      "step": 37670
    },
    {
      "epoch": 0.00022991943359375,
      "model_forward_time": 0.11657166481018066,
      "step": 37670
    },
    {
      "epoch": 0.00022991943359375,
      "step": 37670,
      "training_step_time": 0.3918907642364502
    },
    {
      "epoch": 0.000229925537109375,
      "model_forward_time": 0.11465740203857422,
      "step": 37671
    },
    {
      "epoch": 0.000229925537109375,
      "step": 37671,
      "training_step_time": 0.38150477409362793
    },
    {
      "epoch": 0.000229931640625,
      "model_forward_time": 0.1147310733795166,
      "step": 37672
    },
    {
      "epoch": 0.000229931640625,
      "step": 37672,
      "training_step_time": 0.39312124252319336
    },
    {
      "epoch": 0.000229937744140625,
      "model_forward_time": 0.11522078514099121,
      "step": 37673
    },
    {
      "epoch": 0.000229937744140625,
      "step": 37673,
      "training_step_time": 0.4150378704071045
    },
    {
      "epoch": 0.00022994384765625,
      "model_forward_time": 0.11558985710144043,
      "step": 37674
    },
    {
      "epoch": 0.00022994384765625,
      "step": 37674,
      "training_step_time": 0.4444417953491211
    },
    {
      "epoch": 0.000229949951171875,
      "model_forward_time": 0.11542272567749023,
      "step": 37675
    },
    {
      "epoch": 0.000229949951171875,
      "step": 37675,
      "training_step_time": 0.456376314163208
    },
    {
      "epoch": 0.0002299560546875,
      "model_forward_time": 0.11521339416503906,
      "step": 37676
    },
    {
      "epoch": 0.0002299560546875,
      "step": 37676,
      "training_step_time": 0.39380407333374023
    },
    {
      "epoch": 0.000229962158203125,
      "model_forward_time": 0.11559557914733887,
      "step": 37677
    },
    {
      "epoch": 0.000229962158203125,
      "step": 37677,
      "training_step_time": 0.3975963592529297
    },
    {
      "epoch": 0.00022996826171875,
      "model_forward_time": 0.11509037017822266,
      "step": 37678
    },
    {
      "epoch": 0.00022996826171875,
      "step": 37678,
      "training_step_time": 0.3849978446960449
    },
    {
      "epoch": 0.000229974365234375,
      "model_forward_time": 0.11507153511047363,
      "step": 37679
    },
    {
      "epoch": 0.000229974365234375,
      "step": 37679,
      "training_step_time": 0.3954792022705078
    },
    {
      "epoch": 0.00022998046875,
      "grad_norm": 0.16584616899490356,
      "learning_rate": 3.329666909511645e-05,
      "loss": 0.045,
      "step": 37680
    },
    {
      "epoch": 0.00022998046875,
      "model_forward_time": 0.1149449348449707,
      "step": 37680
    },
    {
      "epoch": 0.00022998046875,
      "step": 37680,
      "training_step_time": 0.3982725143432617
    },
    {
      "epoch": 0.000229986572265625,
      "model_forward_time": 0.11516714096069336,
      "step": 37681
    },
    {
      "epoch": 0.000229986572265625,
      "step": 37681,
      "training_step_time": 0.41940832138061523
    },
    {
      "epoch": 0.00022999267578125,
      "model_forward_time": 0.11494016647338867,
      "step": 37682
    },
    {
      "epoch": 0.00022999267578125,
      "step": 37682,
      "training_step_time": 0.3812580108642578
    },
    {
      "epoch": 0.000229998779296875,
      "model_forward_time": 0.11590814590454102,
      "step": 37683
    },
    {
      "epoch": 0.000229998779296875,
      "step": 37683,
      "training_step_time": 0.3973233699798584
    },
    {
      "epoch": 0.0002300048828125,
      "model_forward_time": 0.11648869514465332,
      "step": 37684
    },
    {
      "epoch": 0.0002300048828125,
      "step": 37684,
      "training_step_time": 0.3923370838165283
    },
    {
      "epoch": 0.000230010986328125,
      "model_forward_time": 0.11467695236206055,
      "step": 37685
    },
    {
      "epoch": 0.000230010986328125,
      "step": 37685,
      "training_step_time": 0.3958711624145508
    },
    {
      "epoch": 0.00023001708984375,
      "model_forward_time": 0.11610674858093262,
      "step": 37686
    },
    {
      "epoch": 0.00023001708984375,
      "step": 37686,
      "training_step_time": 0.39614033699035645
    },
    {
      "epoch": 0.000230023193359375,
      "model_forward_time": 0.11509323120117188,
      "step": 37687
    },
    {
      "epoch": 0.000230023193359375,
      "step": 37687,
      "training_step_time": 0.390714168548584
    },
    {
      "epoch": 0.000230029296875,
      "model_forward_time": 0.11580014228820801,
      "step": 37688
    },
    {
      "epoch": 0.000230029296875,
      "step": 37688,
      "training_step_time": 0.45334410667419434
    },
    {
      "epoch": 0.000230035400390625,
      "model_forward_time": 0.11522650718688965,
      "step": 37689
    },
    {
      "epoch": 0.000230035400390625,
      "step": 37689,
      "training_step_time": 0.5143849849700928
    },
    {
      "epoch": 0.00023004150390625,
      "grad_norm": 0.11391939222812653,
      "learning_rate": 3.327069701523595e-05,
      "loss": 0.0402,
      "step": 37690
    },
    {
      "epoch": 0.00023004150390625,
      "model_forward_time": 0.11596465110778809,
      "step": 37690
    },
    {
      "epoch": 0.00023004150390625,
      "step": 37690,
      "training_step_time": 0.43279123306274414
    },
    {
      "epoch": 0.000230047607421875,
      "model_forward_time": 0.11502861976623535,
      "step": 37691
    },
    {
      "epoch": 0.000230047607421875,
      "step": 37691,
      "training_step_time": 0.3923990726470947
    },
    {
      "epoch": 0.0002300537109375,
      "model_forward_time": 0.11493897438049316,
      "step": 37692
    },
    {
      "epoch": 0.0002300537109375,
      "step": 37692,
      "training_step_time": 0.39469480514526367
    },
    {
      "epoch": 0.000230059814453125,
      "model_forward_time": 0.11566686630249023,
      "step": 37693
    },
    {
      "epoch": 0.000230059814453125,
      "step": 37693,
      "training_step_time": 0.3907737731933594
    },
    {
      "epoch": 0.00023006591796875,
      "model_forward_time": 0.11526250839233398,
      "step": 37694
    },
    {
      "epoch": 0.00023006591796875,
      "step": 37694,
      "training_step_time": 0.39173245429992676
    },
    {
      "epoch": 0.000230072021484375,
      "model_forward_time": 0.11488866806030273,
      "step": 37695
    },
    {
      "epoch": 0.000230072021484375,
      "step": 37695,
      "training_step_time": 0.3816704750061035
    },
    {
      "epoch": 0.000230078125,
      "model_forward_time": 0.11646676063537598,
      "step": 37696
    },
    {
      "epoch": 0.000230078125,
      "step": 37696,
      "training_step_time": 0.4485774040222168
    },
    {
      "epoch": 0.000230084228515625,
      "model_forward_time": 0.1157069206237793,
      "step": 37697
    },
    {
      "epoch": 0.000230084228515625,
      "step": 37697,
      "training_step_time": 0.5812468528747559
    },
    {
      "epoch": 0.00023009033203125,
      "model_forward_time": 0.11485958099365234,
      "step": 37698
    },
    {
      "epoch": 0.00023009033203125,
      "step": 37698,
      "training_step_time": 0.38849902153015137
    },
    {
      "epoch": 0.000230096435546875,
      "model_forward_time": 0.11525845527648926,
      "step": 37699
    },
    {
      "epoch": 0.000230096435546875,
      "step": 37699,
      "training_step_time": 0.3870425224304199
    },
    {
      "epoch": 0.0002301025390625,
      "grad_norm": 0.14685197174549103,
      "learning_rate": 3.324473001727597e-05,
      "loss": 0.0464,
      "step": 37700
    },
    {
      "epoch": 0.0002301025390625,
      "model_forward_time": 0.11554384231567383,
      "step": 37700
    },
    {
      "epoch": 0.0002301025390625,
      "step": 37700,
      "training_step_time": 0.3956298828125
    },
    {
      "epoch": 0.000230108642578125,
      "model_forward_time": 0.11545777320861816,
      "step": 37701
    },
    {
      "epoch": 0.000230108642578125,
      "step": 37701,
      "training_step_time": 0.42113780975341797
    },
    {
      "epoch": 0.00023011474609375,
      "model_forward_time": 0.11574268341064453,
      "step": 37702
    },
    {
      "epoch": 0.00023011474609375,
      "step": 37702,
      "training_step_time": 0.370746374130249
    },
    {
      "epoch": 0.000230120849609375,
      "model_forward_time": 0.11612892150878906,
      "step": 37703
    },
    {
      "epoch": 0.000230120849609375,
      "step": 37703,
      "training_step_time": 0.6542539596557617
    },
    {
      "epoch": 0.000230126953125,
      "model_forward_time": 0.11525940895080566,
      "step": 37704
    },
    {
      "epoch": 0.000230126953125,
      "step": 37704,
      "training_step_time": 0.44179868698120117
    },
    {
      "epoch": 0.000230133056640625,
      "model_forward_time": 0.11526083946228027,
      "step": 37705
    },
    {
      "epoch": 0.000230133056640625,
      "step": 37705,
      "training_step_time": 0.38309168815612793
    },
    {
      "epoch": 0.00023013916015625,
      "model_forward_time": 0.1143639087677002,
      "step": 37706
    },
    {
      "epoch": 0.00023013916015625,
      "step": 37706,
      "training_step_time": 0.3983802795410156
    },
    {
      "epoch": 0.000230145263671875,
      "model_forward_time": 0.1149129867553711,
      "step": 37707
    },
    {
      "epoch": 0.000230145263671875,
      "step": 37707,
      "training_step_time": 0.38878726959228516
    },
    {
      "epoch": 0.0002301513671875,
      "model_forward_time": 0.11479640007019043,
      "step": 37708
    },
    {
      "epoch": 0.0002301513671875,
      "step": 37708,
      "training_step_time": 0.389559268951416
    },
    {
      "epoch": 0.000230157470703125,
      "model_forward_time": 0.11455893516540527,
      "step": 37709
    },
    {
      "epoch": 0.000230157470703125,
      "step": 37709,
      "training_step_time": 0.5190742015838623
    },
    {
      "epoch": 0.00023016357421875,
      "grad_norm": 0.12916305661201477,
      "learning_rate": 3.321876810912461e-05,
      "loss": 0.0425,
      "step": 37710
    },
    {
      "epoch": 0.00023016357421875,
      "model_forward_time": 0.11594271659851074,
      "step": 37710
    },
    {
      "epoch": 0.00023016357421875,
      "step": 37710,
      "training_step_time": 0.4146249294281006
    },
    {
      "epoch": 0.000230169677734375,
      "model_forward_time": 0.11485576629638672,
      "step": 37711
    },
    {
      "epoch": 0.000230169677734375,
      "step": 37711,
      "training_step_time": 0.43830275535583496
    },
    {
      "epoch": 0.00023017578125,
      "model_forward_time": 0.11523056030273438,
      "step": 37712
    },
    {
      "epoch": 0.00023017578125,
      "step": 37712,
      "training_step_time": 0.397930383682251
    },
    {
      "epoch": 0.000230181884765625,
      "model_forward_time": 0.11486077308654785,
      "step": 37713
    },
    {
      "epoch": 0.000230181884765625,
      "step": 37713,
      "training_step_time": 0.3896160125732422
    },
    {
      "epoch": 0.00023018798828125,
      "model_forward_time": 0.11515402793884277,
      "step": 37714
    },
    {
      "epoch": 0.00023018798828125,
      "step": 37714,
      "training_step_time": 0.3911447525024414
    },
    {
      "epoch": 0.000230194091796875,
      "model_forward_time": 0.1156928539276123,
      "step": 37715
    },
    {
      "epoch": 0.000230194091796875,
      "step": 37715,
      "training_step_time": 0.5192782878875732
    },
    {
      "epoch": 0.0002302001953125,
      "model_forward_time": 0.1145327091217041,
      "step": 37716
    },
    {
      "epoch": 0.0002302001953125,
      "step": 37716,
      "training_step_time": 0.3680233955383301
    },
    {
      "epoch": 0.000230206298828125,
      "model_forward_time": 0.11514735221862793,
      "step": 37717
    },
    {
      "epoch": 0.000230206298828125,
      "step": 37717,
      "training_step_time": 0.4256429672241211
    },
    {
      "epoch": 0.00023021240234375,
      "model_forward_time": 0.11549592018127441,
      "step": 37718
    },
    {
      "epoch": 0.00023021240234375,
      "step": 37718,
      "training_step_time": 0.5044214725494385
    },
    {
      "epoch": 0.000230218505859375,
      "model_forward_time": 0.11514163017272949,
      "step": 37719
    },
    {
      "epoch": 0.000230218505859375,
      "step": 37719,
      "training_step_time": 0.38694071769714355
    },
    {
      "epoch": 0.000230224609375,
      "grad_norm": 0.12192884087562561,
      "learning_rate": 3.3192811298668434e-05,
      "loss": 0.043,
      "step": 37720
    },
    {
      "epoch": 0.000230224609375,
      "model_forward_time": 0.11521673202514648,
      "step": 37720
    },
    {
      "epoch": 0.000230224609375,
      "step": 37720,
      "training_step_time": 0.3893444538116455
    },
    {
      "epoch": 0.000230230712890625,
      "model_forward_time": 0.11495828628540039,
      "step": 37721
    },
    {
      "epoch": 0.000230230712890625,
      "step": 37721,
      "training_step_time": 0.3859243392944336
    },
    {
      "epoch": 0.00023023681640625,
      "model_forward_time": 0.11516404151916504,
      "step": 37722
    },
    {
      "epoch": 0.00023023681640625,
      "step": 37722,
      "training_step_time": 0.39748120307922363
    },
    {
      "epoch": 0.000230242919921875,
      "model_forward_time": 0.11534643173217773,
      "step": 37723
    },
    {
      "epoch": 0.000230242919921875,
      "step": 37723,
      "training_step_time": 0.3914468288421631
    },
    {
      "epoch": 0.0002302490234375,
      "model_forward_time": 0.11495161056518555,
      "step": 37724
    },
    {
      "epoch": 0.0002302490234375,
      "step": 37724,
      "training_step_time": 0.4679579734802246
    },
    {
      "epoch": 0.000230255126953125,
      "model_forward_time": 0.11570525169372559,
      "step": 37725
    },
    {
      "epoch": 0.000230255126953125,
      "step": 37725,
      "training_step_time": 0.39975571632385254
    },
    {
      "epoch": 0.00023026123046875,
      "model_forward_time": 0.11566472053527832,
      "step": 37726
    },
    {
      "epoch": 0.00023026123046875,
      "step": 37726,
      "training_step_time": 0.40851545333862305
    },
    {
      "epoch": 0.000230267333984375,
      "model_forward_time": 0.11523032188415527,
      "step": 37727
    },
    {
      "epoch": 0.000230267333984375,
      "step": 37727,
      "training_step_time": 0.38741326332092285
    },
    {
      "epoch": 0.0002302734375,
      "model_forward_time": 0.11592912673950195,
      "step": 37728
    },
    {
      "epoch": 0.0002302734375,
      "step": 37728,
      "training_step_time": 0.41770100593566895
    },
    {
      "epoch": 0.000230279541015625,
      "model_forward_time": 0.11489200592041016,
      "step": 37729
    },
    {
      "epoch": 0.000230279541015625,
      "step": 37729,
      "training_step_time": 0.4149632453918457
    },
    {
      "epoch": 0.00023028564453125,
      "grad_norm": 0.10996776819229126,
      "learning_rate": 3.316685959379241e-05,
      "loss": 0.0425,
      "step": 37730
    },
    {
      "epoch": 0.00023028564453125,
      "model_forward_time": 0.11593770980834961,
      "step": 37730
    },
    {
      "epoch": 0.00023028564453125,
      "step": 37730,
      "training_step_time": 0.39534473419189453
    },
    {
      "epoch": 0.000230291748046875,
      "model_forward_time": 0.11612582206726074,
      "step": 37731
    },
    {
      "epoch": 0.000230291748046875,
      "step": 37731,
      "training_step_time": 0.4553642272949219
    },
    {
      "epoch": 0.0002302978515625,
      "model_forward_time": 0.11619019508361816,
      "step": 37732
    },
    {
      "epoch": 0.0002302978515625,
      "step": 37732,
      "training_step_time": 0.41205763816833496
    },
    {
      "epoch": 0.000230303955078125,
      "model_forward_time": 0.11529207229614258,
      "step": 37733
    },
    {
      "epoch": 0.000230303955078125,
      "step": 37733,
      "training_step_time": 0.5422344207763672
    },
    {
      "epoch": 0.00023031005859375,
      "model_forward_time": 0.11495828628540039,
      "step": 37734
    },
    {
      "epoch": 0.00023031005859375,
      "step": 37734,
      "training_step_time": 0.3944742679595947
    },
    {
      "epoch": 0.000230316162109375,
      "model_forward_time": 0.11474728584289551,
      "step": 37735
    },
    {
      "epoch": 0.000230316162109375,
      "step": 37735,
      "training_step_time": 0.3854215145111084
    },
    {
      "epoch": 0.000230322265625,
      "model_forward_time": 0.11511778831481934,
      "step": 37736
    },
    {
      "epoch": 0.000230322265625,
      "step": 37736,
      "training_step_time": 0.39138174057006836
    },
    {
      "epoch": 0.000230328369140625,
      "model_forward_time": 0.11490201950073242,
      "step": 37737
    },
    {
      "epoch": 0.000230328369140625,
      "step": 37737,
      "training_step_time": 0.39952564239501953
    },
    {
      "epoch": 0.00023033447265625,
      "model_forward_time": 0.11534690856933594,
      "step": 37738
    },
    {
      "epoch": 0.00023033447265625,
      "step": 37738,
      "training_step_time": 0.38992762565612793
    },
    {
      "epoch": 0.000230340576171875,
      "model_forward_time": 0.11514520645141602,
      "step": 37739
    },
    {
      "epoch": 0.000230340576171875,
      "step": 37739,
      "training_step_time": 0.5900232791900635
    },
    {
      "epoch": 0.0002303466796875,
      "grad_norm": 0.0867210403084755,
      "learning_rate": 3.3140913002379995e-05,
      "loss": 0.0408,
      "step": 37740
    },
    {
      "epoch": 0.0002303466796875,
      "model_forward_time": 0.11414837837219238,
      "step": 37740
    },
    {
      "epoch": 0.0002303466796875,
      "step": 37740,
      "training_step_time": 0.5089612007141113
    },
    {
      "epoch": 0.000230352783203125,
      "model_forward_time": 0.11433100700378418,
      "step": 37741
    },
    {
      "epoch": 0.000230352783203125,
      "step": 37741,
      "training_step_time": 0.39186906814575195
    },
    {
      "epoch": 0.00023035888671875,
      "model_forward_time": 0.11430883407592773,
      "step": 37742
    },
    {
      "epoch": 0.00023035888671875,
      "step": 37742,
      "training_step_time": 0.40741825103759766
    },
    {
      "epoch": 0.000230364990234375,
      "model_forward_time": 0.11476683616638184,
      "step": 37743
    },
    {
      "epoch": 0.000230364990234375,
      "step": 37743,
      "training_step_time": 0.3937985897064209
    },
    {
      "epoch": 0.00023037109375,
      "model_forward_time": 0.11469006538391113,
      "step": 37744
    },
    {
      "epoch": 0.00023037109375,
      "step": 37744,
      "training_step_time": 0.3968379497528076
    },
    {
      "epoch": 0.000230377197265625,
      "model_forward_time": 0.11600232124328613,
      "step": 37745
    },
    {
      "epoch": 0.000230377197265625,
      "step": 37745,
      "training_step_time": 0.4936642646789551
    },
    {
      "epoch": 0.00023038330078125,
      "model_forward_time": 0.11564135551452637,
      "step": 37746
    },
    {
      "epoch": 0.00023038330078125,
      "step": 37746,
      "training_step_time": 0.45682787895202637
    },
    {
      "epoch": 0.000230389404296875,
      "model_forward_time": 0.11559796333312988,
      "step": 37747
    },
    {
      "epoch": 0.000230389404296875,
      "step": 37747,
      "training_step_time": 0.4774453639984131
    },
    {
      "epoch": 0.0002303955078125,
      "model_forward_time": 0.11494874954223633,
      "step": 37748
    },
    {
      "epoch": 0.0002303955078125,
      "step": 37748,
      "training_step_time": 0.3922131061553955
    },
    {
      "epoch": 0.000230401611328125,
      "model_forward_time": 0.11491560935974121,
      "step": 37749
    },
    {
      "epoch": 0.000230401611328125,
      "step": 37749,
      "training_step_time": 0.3842201232910156
    },
    {
      "epoch": 0.00023040771484375,
      "grad_norm": 0.10605558753013611,
      "learning_rate": 3.3114971532313056e-05,
      "loss": 0.0397,
      "step": 37750
    },
    {
      "epoch": 0.00023040771484375,
      "model_forward_time": 0.11490130424499512,
      "step": 37750
    },
    {
      "epoch": 0.00023040771484375,
      "step": 37750,
      "training_step_time": 0.3807382583618164
    },
    {
      "epoch": 0.000230413818359375,
      "model_forward_time": 0.11524581909179688,
      "step": 37751
    },
    {
      "epoch": 0.000230413818359375,
      "step": 37751,
      "training_step_time": 0.3879566192626953
    },
    {
      "epoch": 0.000230419921875,
      "model_forward_time": 0.11450815200805664,
      "step": 37752
    },
    {
      "epoch": 0.000230419921875,
      "step": 37752,
      "training_step_time": 0.3932054042816162
    },
    {
      "epoch": 0.000230426025390625,
      "model_forward_time": 0.11499738693237305,
      "step": 37753
    },
    {
      "epoch": 0.000230426025390625,
      "step": 37753,
      "training_step_time": 0.4189937114715576
    },
    {
      "epoch": 0.00023043212890625,
      "model_forward_time": 0.11585831642150879,
      "step": 37754
    },
    {
      "epoch": 0.00023043212890625,
      "step": 37754,
      "training_step_time": 0.3979213237762451
    },
    {
      "epoch": 0.000230438232421875,
      "model_forward_time": 0.11601853370666504,
      "step": 37755
    },
    {
      "epoch": 0.000230438232421875,
      "step": 37755,
      "training_step_time": 0.41933202743530273
    },
    {
      "epoch": 0.0002304443359375,
      "model_forward_time": 0.11516094207763672,
      "step": 37756
    },
    {
      "epoch": 0.0002304443359375,
      "step": 37756,
      "training_step_time": 0.4486854076385498
    },
    {
      "epoch": 0.000230450439453125,
      "model_forward_time": 0.11495661735534668,
      "step": 37757
    },
    {
      "epoch": 0.000230450439453125,
      "step": 37757,
      "training_step_time": 0.40433192253112793
    },
    {
      "epoch": 0.00023045654296875,
      "model_forward_time": 0.11504721641540527,
      "step": 37758
    },
    {
      "epoch": 0.00023045654296875,
      "step": 37758,
      "training_step_time": 0.3949270248413086
    },
    {
      "epoch": 0.000230462646484375,
      "model_forward_time": 0.11507987976074219,
      "step": 37759
    },
    {
      "epoch": 0.000230462646484375,
      "step": 37759,
      "training_step_time": 0.39159417152404785
    },
    {
      "epoch": 0.00023046875,
      "grad_norm": 0.1035483255982399,
      "learning_rate": 3.308903519147194e-05,
      "loss": 0.0399,
      "step": 37760
    },
    {
      "epoch": 0.00023046875,
      "model_forward_time": 0.11532711982727051,
      "step": 37760
    },
    {
      "epoch": 0.00023046875,
      "step": 37760,
      "training_step_time": 0.392200231552124
    },
    {
      "epoch": 0.000230474853515625,
      "model_forward_time": 0.11546516418457031,
      "step": 37761
    },
    {
      "epoch": 0.000230474853515625,
      "step": 37761,
      "training_step_time": 0.504626989364624
    },
    {
      "epoch": 0.00023048095703125,
      "model_forward_time": 0.11480021476745605,
      "step": 37762
    },
    {
      "epoch": 0.00023048095703125,
      "step": 37762,
      "training_step_time": 0.5312097072601318
    },
    {
      "epoch": 0.000230487060546875,
      "model_forward_time": 0.11496686935424805,
      "step": 37763
    },
    {
      "epoch": 0.000230487060546875,
      "step": 37763,
      "training_step_time": 0.38445138931274414
    },
    {
      "epoch": 0.0002304931640625,
      "model_forward_time": 0.11581206321716309,
      "step": 37764
    },
    {
      "epoch": 0.0002304931640625,
      "step": 37764,
      "training_step_time": 0.3904440402984619
    },
    {
      "epoch": 0.000230499267578125,
      "model_forward_time": 0.11474037170410156,
      "step": 37765
    },
    {
      "epoch": 0.000230499267578125,
      "step": 37765,
      "training_step_time": 0.39423441886901855
    },
    {
      "epoch": 0.00023050537109375,
      "model_forward_time": 0.11512494087219238,
      "step": 37766
    },
    {
      "epoch": 0.00023050537109375,
      "step": 37766,
      "training_step_time": 0.38757896423339844
    },
    {
      "epoch": 0.000230511474609375,
      "model_forward_time": 0.1157383918762207,
      "step": 37767
    },
    {
      "epoch": 0.000230511474609375,
      "step": 37767,
      "training_step_time": 0.47942638397216797
    },
    {
      "epoch": 0.000230517578125,
      "model_forward_time": 0.1151878833770752,
      "step": 37768
    },
    {
      "epoch": 0.000230517578125,
      "step": 37768,
      "training_step_time": 0.4313929080963135
    },
    {
      "epoch": 0.000230523681640625,
      "model_forward_time": 0.11445784568786621,
      "step": 37769
    },
    {
      "epoch": 0.000230523681640625,
      "step": 37769,
      "training_step_time": 0.4813826084136963
    },
    {
      "epoch": 0.00023052978515625,
      "grad_norm": 0.09709583967924118,
      "learning_rate": 3.3063103987735433e-05,
      "loss": 0.0415,
      "step": 37770
    },
    {
      "epoch": 0.00023052978515625,
      "model_forward_time": 0.11554646492004395,
      "step": 37770
    },
    {
      "epoch": 0.00023052978515625,
      "step": 37770,
      "training_step_time": 0.43972325325012207
    },
    {
      "epoch": 0.000230535888671875,
      "model_forward_time": 0.11491775512695312,
      "step": 37771
    },
    {
      "epoch": 0.000230535888671875,
      "step": 37771,
      "training_step_time": 0.41905713081359863
    },
    {
      "epoch": 0.0002305419921875,
      "model_forward_time": 0.11538410186767578,
      "step": 37772
    },
    {
      "epoch": 0.0002305419921875,
      "step": 37772,
      "training_step_time": 0.39138102531433105
    },
    {
      "epoch": 0.000230548095703125,
      "model_forward_time": 0.1148519515991211,
      "step": 37773
    },
    {
      "epoch": 0.000230548095703125,
      "step": 37773,
      "training_step_time": 0.396472692489624
    },
    {
      "epoch": 0.00023055419921875,
      "model_forward_time": 0.11544489860534668,
      "step": 37774
    },
    {
      "epoch": 0.00023055419921875,
      "step": 37774,
      "training_step_time": 0.41043949127197266
    },
    {
      "epoch": 0.000230560302734375,
      "model_forward_time": 0.11506271362304688,
      "step": 37775
    },
    {
      "epoch": 0.000230560302734375,
      "step": 37775,
      "training_step_time": 0.48769450187683105
    },
    {
      "epoch": 0.00023056640625,
      "model_forward_time": 0.11479973793029785,
      "step": 37776
    },
    {
      "epoch": 0.00023056640625,
      "step": 37776,
      "training_step_time": 0.4376082420349121
    },
    {
      "epoch": 0.000230572509765625,
      "model_forward_time": 0.11446547508239746,
      "step": 37777
    },
    {
      "epoch": 0.000230572509765625,
      "step": 37777,
      "training_step_time": 0.462860107421875
    },
    {
      "epoch": 0.00023057861328125,
      "model_forward_time": 0.11504220962524414,
      "step": 37778
    },
    {
      "epoch": 0.00023057861328125,
      "step": 37778,
      "training_step_time": 0.39132142066955566
    },
    {
      "epoch": 0.000230584716796875,
      "model_forward_time": 0.11502957344055176,
      "step": 37779
    },
    {
      "epoch": 0.000230584716796875,
      "step": 37779,
      "training_step_time": 0.3775637149810791
    },
    {
      "epoch": 0.0002305908203125,
      "grad_norm": 0.09368695318698883,
      "learning_rate": 3.3037177928980735e-05,
      "loss": 0.0368,
      "step": 37780
    },
    {
      "epoch": 0.0002305908203125,
      "model_forward_time": 0.11469006538391113,
      "step": 37780
    },
    {
      "epoch": 0.0002305908203125,
      "step": 37780,
      "training_step_time": 0.39807558059692383
    },
    {
      "epoch": 0.000230596923828125,
      "model_forward_time": 0.11523962020874023,
      "step": 37781
    },
    {
      "epoch": 0.000230596923828125,
      "step": 37781,
      "training_step_time": 0.4878807067871094
    },
    {
      "epoch": 0.00023060302734375,
      "model_forward_time": 0.11466360092163086,
      "step": 37782
    },
    {
      "epoch": 0.00023060302734375,
      "step": 37782,
      "training_step_time": 0.4078991413116455
    },
    {
      "epoch": 0.000230609130859375,
      "model_forward_time": 0.11499381065368652,
      "step": 37783
    },
    {
      "epoch": 0.000230609130859375,
      "step": 37783,
      "training_step_time": 0.4938960075378418
    },
    {
      "epoch": 0.000230615234375,
      "model_forward_time": 0.11467885971069336,
      "step": 37784
    },
    {
      "epoch": 0.000230615234375,
      "step": 37784,
      "training_step_time": 0.4088127613067627
    },
    {
      "epoch": 0.000230621337890625,
      "model_forward_time": 0.11562681198120117,
      "step": 37785
    },
    {
      "epoch": 0.000230621337890625,
      "step": 37785,
      "training_step_time": 0.39503026008605957
    },
    {
      "epoch": 0.00023062744140625,
      "model_forward_time": 0.11499595642089844,
      "step": 37786
    },
    {
      "epoch": 0.00023062744140625,
      "step": 37786,
      "training_step_time": 0.3958451747894287
    },
    {
      "epoch": 0.000230633544921875,
      "model_forward_time": 0.11557412147521973,
      "step": 37787
    },
    {
      "epoch": 0.000230633544921875,
      "step": 37787,
      "training_step_time": 0.486980676651001
    },
    {
      "epoch": 0.0002306396484375,
      "model_forward_time": 0.11437082290649414,
      "step": 37788
    },
    {
      "epoch": 0.0002306396484375,
      "step": 37788,
      "training_step_time": 0.3670022487640381
    },
    {
      "epoch": 0.000230645751953125,
      "model_forward_time": 0.11514830589294434,
      "step": 37789
    },
    {
      "epoch": 0.000230645751953125,
      "step": 37789,
      "training_step_time": 0.4476630687713623
    },
    {
      "epoch": 0.00023065185546875,
      "grad_norm": 0.12419591099023819,
      "learning_rate": 3.301125702308353e-05,
      "loss": 0.0423,
      "step": 37790
    },
    {
      "epoch": 0.00023065185546875,
      "model_forward_time": 0.11478495597839355,
      "step": 37790
    },
    {
      "epoch": 0.00023065185546875,
      "step": 37790,
      "training_step_time": 0.40715503692626953
    },
    {
      "epoch": 0.000230657958984375,
      "model_forward_time": 0.11510443687438965,
      "step": 37791
    },
    {
      "epoch": 0.000230657958984375,
      "step": 37791,
      "training_step_time": 0.4524855613708496
    },
    {
      "epoch": 0.0002306640625,
      "model_forward_time": 0.1154787540435791,
      "step": 37792
    },
    {
      "epoch": 0.0002306640625,
      "step": 37792,
      "training_step_time": 0.39771127700805664
    },
    {
      "epoch": 0.000230670166015625,
      "model_forward_time": 0.11493539810180664,
      "step": 37793
    },
    {
      "epoch": 0.000230670166015625,
      "step": 37793,
      "training_step_time": 0.4373347759246826
    },
    {
      "epoch": 0.00023067626953125,
      "model_forward_time": 0.11466360092163086,
      "step": 37794
    },
    {
      "epoch": 0.00023067626953125,
      "step": 37794,
      "training_step_time": 0.416379451751709
    },
    {
      "epoch": 0.000230682373046875,
      "model_forward_time": 0.1142418384552002,
      "step": 37795
    },
    {
      "epoch": 0.000230682373046875,
      "step": 37795,
      "training_step_time": 0.421830415725708
    },
    {
      "epoch": 0.0002306884765625,
      "model_forward_time": 0.11449313163757324,
      "step": 37796
    },
    {
      "epoch": 0.0002306884765625,
      "step": 37796,
      "training_step_time": 0.396697998046875
    },
    {
      "epoch": 0.000230694580078125,
      "model_forward_time": 0.11463737487792969,
      "step": 37797
    },
    {
      "epoch": 0.000230694580078125,
      "step": 37797,
      "training_step_time": 0.44855809211730957
    },
    {
      "epoch": 0.00023070068359375,
      "model_forward_time": 0.11542987823486328,
      "step": 37798
    },
    {
      "epoch": 0.00023070068359375,
      "step": 37798,
      "training_step_time": 0.4106454849243164
    },
    {
      "epoch": 0.000230706787109375,
      "model_forward_time": 0.11486482620239258,
      "step": 37799
    },
    {
      "epoch": 0.000230706787109375,
      "step": 37799,
      "training_step_time": 0.4802358150482178
    },
    {
      "epoch": 0.000230712890625,
      "grad_norm": 0.10439316928386688,
      "learning_rate": 3.298534127791785e-05,
      "loss": 0.0375,
      "step": 37800
    },
    {
      "epoch": 0.000230712890625,
      "model_forward_time": 0.11530852317810059,
      "step": 37800
    },
    {
      "epoch": 0.000230712890625,
      "step": 37800,
      "training_step_time": 0.38521456718444824
    },
    {
      "epoch": 0.000230718994140625,
      "model_forward_time": 0.11507558822631836,
      "step": 37801
    },
    {
      "epoch": 0.000230718994140625,
      "step": 37801,
      "training_step_time": 0.39386916160583496
    },
    {
      "epoch": 0.00023072509765625,
      "model_forward_time": 0.11486363410949707,
      "step": 37802
    },
    {
      "epoch": 0.00023072509765625,
      "step": 37802,
      "training_step_time": 0.399463415145874
    },
    {
      "epoch": 0.000230731201171875,
      "model_forward_time": 0.11570858955383301,
      "step": 37803
    },
    {
      "epoch": 0.000230731201171875,
      "step": 37803,
      "training_step_time": 0.42325329780578613
    },
    {
      "epoch": 0.0002307373046875,
      "model_forward_time": 0.11580109596252441,
      "step": 37804
    },
    {
      "epoch": 0.0002307373046875,
      "step": 37804,
      "training_step_time": 0.4997270107269287
    },
    {
      "epoch": 0.000230743408203125,
      "model_forward_time": 0.11539936065673828,
      "step": 37805
    },
    {
      "epoch": 0.000230743408203125,
      "step": 37805,
      "training_step_time": 0.4644923210144043
    },
    {
      "epoch": 0.00023074951171875,
      "model_forward_time": 0.11451315879821777,
      "step": 37806
    },
    {
      "epoch": 0.00023074951171875,
      "step": 37806,
      "training_step_time": 0.39021897315979004
    },
    {
      "epoch": 0.000230755615234375,
      "model_forward_time": 0.1154325008392334,
      "step": 37807
    },
    {
      "epoch": 0.000230755615234375,
      "step": 37807,
      "training_step_time": 0.3896825313568115
    },
    {
      "epoch": 0.00023076171875,
      "model_forward_time": 0.1153872013092041,
      "step": 37808
    },
    {
      "epoch": 0.00023076171875,
      "step": 37808,
      "training_step_time": 0.3967461585998535
    },
    {
      "epoch": 0.000230767822265625,
      "model_forward_time": 0.11503434181213379,
      "step": 37809
    },
    {
      "epoch": 0.000230767822265625,
      "step": 37809,
      "training_step_time": 0.3820624351501465
    },
    {
      "epoch": 0.00023077392578125,
      "grad_norm": 0.08619373291730881,
      "learning_rate": 3.295943070135625e-05,
      "loss": 0.0378,
      "step": 37810
    },
    {
      "epoch": 0.00023077392578125,
      "model_forward_time": 0.11543965339660645,
      "step": 37810
    },
    {
      "epoch": 0.00023077392578125,
      "step": 37810,
      "training_step_time": 0.4342775344848633
    },
    {
      "epoch": 0.000230780029296875,
      "model_forward_time": 0.11470794677734375,
      "step": 37811
    },
    {
      "epoch": 0.000230780029296875,
      "step": 37811,
      "training_step_time": 0.4766499996185303
    },
    {
      "epoch": 0.0002307861328125,
      "model_forward_time": 0.11530327796936035,
      "step": 37812
    },
    {
      "epoch": 0.0002307861328125,
      "step": 37812,
      "training_step_time": 0.43076443672180176
    },
    {
      "epoch": 0.000230792236328125,
      "model_forward_time": 0.11487221717834473,
      "step": 37813
    },
    {
      "epoch": 0.000230792236328125,
      "step": 37813,
      "training_step_time": 0.4027688503265381
    },
    {
      "epoch": 0.00023079833984375,
      "model_forward_time": 0.11525678634643555,
      "step": 37814
    },
    {
      "epoch": 0.00023079833984375,
      "step": 37814,
      "training_step_time": 0.3976271152496338
    },
    {
      "epoch": 0.000230804443359375,
      "model_forward_time": 0.11524724960327148,
      "step": 37815
    },
    {
      "epoch": 0.000230804443359375,
      "step": 37815,
      "training_step_time": 0.39038586616516113
    },
    {
      "epoch": 0.000230810546875,
      "model_forward_time": 0.11550378799438477,
      "step": 37816
    },
    {
      "epoch": 0.000230810546875,
      "step": 37816,
      "training_step_time": 0.38602113723754883
    },
    {
      "epoch": 0.000230816650390625,
      "model_forward_time": 0.11445474624633789,
      "step": 37817
    },
    {
      "epoch": 0.000230816650390625,
      "step": 37817,
      "training_step_time": 0.4658372402191162
    },
    {
      "epoch": 0.00023082275390625,
      "model_forward_time": 0.11612486839294434,
      "step": 37818
    },
    {
      "epoch": 0.00023082275390625,
      "step": 37818,
      "training_step_time": 0.4251515865325928
    },
    {
      "epoch": 0.000230828857421875,
      "model_forward_time": 0.11545825004577637,
      "step": 37819
    },
    {
      "epoch": 0.000230828857421875,
      "step": 37819,
      "training_step_time": 0.43882131576538086
    },
    {
      "epoch": 0.0002308349609375,
      "grad_norm": 0.1177728995680809,
      "learning_rate": 3.2933525301269684e-05,
      "loss": 0.0431,
      "step": 37820
    },
    {
      "epoch": 0.0002308349609375,
      "model_forward_time": 0.11504912376403809,
      "step": 37820
    },
    {
      "epoch": 0.0002308349609375,
      "step": 37820,
      "training_step_time": 0.44298768043518066
    },
    {
      "epoch": 0.000230841064453125,
      "model_forward_time": 0.11425471305847168,
      "step": 37821
    },
    {
      "epoch": 0.000230841064453125,
      "step": 37821,
      "training_step_time": 0.3845791816711426
    },
    {
      "epoch": 0.00023084716796875,
      "model_forward_time": 0.1148991584777832,
      "step": 37822
    },
    {
      "epoch": 0.00023084716796875,
      "step": 37822,
      "training_step_time": 0.38291120529174805
    },
    {
      "epoch": 0.000230853271484375,
      "model_forward_time": 0.11515665054321289,
      "step": 37823
    },
    {
      "epoch": 0.000230853271484375,
      "step": 37823,
      "training_step_time": 0.42397379875183105
    },
    {
      "epoch": 0.000230859375,
      "model_forward_time": 0.11489653587341309,
      "step": 37824
    },
    {
      "epoch": 0.000230859375,
      "step": 37824,
      "training_step_time": 0.4337501525878906
    },
    {
      "epoch": 0.000230865478515625,
      "model_forward_time": 0.11586523056030273,
      "step": 37825
    },
    {
      "epoch": 0.000230865478515625,
      "step": 37825,
      "training_step_time": 0.39595961570739746
    },
    {
      "epoch": 0.00023087158203125,
      "model_forward_time": 0.1154792308807373,
      "step": 37826
    },
    {
      "epoch": 0.00023087158203125,
      "step": 37826,
      "training_step_time": 0.45055389404296875
    },
    {
      "epoch": 0.000230877685546875,
      "model_forward_time": 0.11546874046325684,
      "step": 37827
    },
    {
      "epoch": 0.000230877685546875,
      "step": 37827,
      "training_step_time": 0.3859710693359375
    },
    {
      "epoch": 0.0002308837890625,
      "model_forward_time": 0.1149129867553711,
      "step": 37828
    },
    {
      "epoch": 0.0002308837890625,
      "step": 37828,
      "training_step_time": 0.3868229389190674
    },
    {
      "epoch": 0.000230889892578125,
      "model_forward_time": 0.11547231674194336,
      "step": 37829
    },
    {
      "epoch": 0.000230889892578125,
      "step": 37829,
      "training_step_time": 0.4965193271636963
    },
    {
      "epoch": 0.00023089599609375,
      "grad_norm": 0.1684453785419464,
      "learning_rate": 3.2907625085527503e-05,
      "loss": 0.0427,
      "step": 37830
    },
    {
      "epoch": 0.00023089599609375,
      "model_forward_time": 0.11511063575744629,
      "step": 37830
    },
    {
      "epoch": 0.00023089599609375,
      "step": 37830,
      "training_step_time": 0.3909425735473633
    },
    {
      "epoch": 0.000230902099609375,
      "model_forward_time": 0.11555862426757812,
      "step": 37831
    },
    {
      "epoch": 0.000230902099609375,
      "step": 37831,
      "training_step_time": 0.39193201065063477
    },
    {
      "epoch": 0.000230908203125,
      "model_forward_time": 0.11487627029418945,
      "step": 37832
    },
    {
      "epoch": 0.000230908203125,
      "step": 37832,
      "training_step_time": 0.4205336570739746
    },
    {
      "epoch": 0.000230914306640625,
      "model_forward_time": 0.11591601371765137,
      "step": 37833
    },
    {
      "epoch": 0.000230914306640625,
      "step": 37833,
      "training_step_time": 0.4331657886505127
    },
    {
      "epoch": 0.00023092041015625,
      "model_forward_time": 0.11686325073242188,
      "step": 37834
    },
    {
      "epoch": 0.00023092041015625,
      "step": 37834,
      "training_step_time": 0.4596874713897705
    },
    {
      "epoch": 0.000230926513671875,
      "model_forward_time": 0.11539745330810547,
      "step": 37835
    },
    {
      "epoch": 0.000230926513671875,
      "step": 37835,
      "training_step_time": 0.5989158153533936
    },
    {
      "epoch": 0.0002309326171875,
      "model_forward_time": 0.11557722091674805,
      "step": 37836
    },
    {
      "epoch": 0.0002309326171875,
      "step": 37836,
      "training_step_time": 0.3889138698577881
    },
    {
      "epoch": 0.000230938720703125,
      "model_forward_time": 0.11542987823486328,
      "step": 37837
    },
    {
      "epoch": 0.000230938720703125,
      "step": 37837,
      "training_step_time": 0.4359002113342285
    },
    {
      "epoch": 0.00023094482421875,
      "model_forward_time": 0.11610198020935059,
      "step": 37838
    },
    {
      "epoch": 0.00023094482421875,
      "step": 37838,
      "training_step_time": 0.44037723541259766
    },
    {
      "epoch": 0.000230950927734375,
      "model_forward_time": 0.11439943313598633,
      "step": 37839
    },
    {
      "epoch": 0.000230950927734375,
      "step": 37839,
      "training_step_time": 0.41040635108947754
    },
    {
      "epoch": 0.00023095703125,
      "grad_norm": 0.14392320811748505,
      "learning_rate": 3.288173006199755e-05,
      "loss": 0.0417,
      "step": 37840
    },
    {
      "epoch": 0.00023095703125,
      "model_forward_time": 0.11447787284851074,
      "step": 37840
    },
    {
      "epoch": 0.00023095703125,
      "step": 37840,
      "training_step_time": 0.49877142906188965
    },
    {
      "epoch": 0.000230963134765625,
      "model_forward_time": 0.1145021915435791,
      "step": 37841
    },
    {
      "epoch": 0.000230963134765625,
      "step": 37841,
      "training_step_time": 0.38969945907592773
    },
    {
      "epoch": 0.00023096923828125,
      "model_forward_time": 0.11484813690185547,
      "step": 37842
    },
    {
      "epoch": 0.00023096923828125,
      "step": 37842,
      "training_step_time": 0.3924119472503662
    },
    {
      "epoch": 0.000230975341796875,
      "model_forward_time": 0.11536121368408203,
      "step": 37843
    },
    {
      "epoch": 0.000230975341796875,
      "step": 37843,
      "training_step_time": 0.3921821117401123
    },
    {
      "epoch": 0.0002309814453125,
      "model_forward_time": 0.11579298973083496,
      "step": 37844
    },
    {
      "epoch": 0.0002309814453125,
      "step": 37844,
      "training_step_time": 0.3696873188018799
    },
    {
      "epoch": 0.000230987548828125,
      "model_forward_time": 0.11549544334411621,
      "step": 37845
    },
    {
      "epoch": 0.000230987548828125,
      "step": 37845,
      "training_step_time": 0.3867812156677246
    },
    {
      "epoch": 0.00023099365234375,
      "model_forward_time": 0.11481475830078125,
      "step": 37846
    },
    {
      "epoch": 0.00023099365234375,
      "step": 37846,
      "training_step_time": 0.368635892868042
    },
    {
      "epoch": 0.000230999755859375,
      "model_forward_time": 0.11680841445922852,
      "step": 37847
    },
    {
      "epoch": 0.000230999755859375,
      "step": 37847,
      "training_step_time": 0.45316362380981445
    },
    {
      "epoch": 0.000231005859375,
      "model_forward_time": 0.11480069160461426,
      "step": 37848
    },
    {
      "epoch": 0.000231005859375,
      "step": 37848,
      "training_step_time": 0.427004337310791
    },
    {
      "epoch": 0.000231011962890625,
      "model_forward_time": 0.11497926712036133,
      "step": 37849
    },
    {
      "epoch": 0.000231011962890625,
      "step": 37849,
      "training_step_time": 0.4078044891357422
    },
    {
      "epoch": 0.00023101806640625,
      "grad_norm": 0.09442951530218124,
      "learning_rate": 3.2855840238546e-05,
      "loss": 0.0379,
      "step": 37850
    },
    {
      "epoch": 0.00023101806640625,
      "model_forward_time": 0.11545228958129883,
      "step": 37850
    },
    {
      "epoch": 0.00023101806640625,
      "step": 37850,
      "training_step_time": 0.3878297805786133
    },
    {
      "epoch": 0.000231024169921875,
      "model_forward_time": 0.11501455307006836,
      "step": 37851
    },
    {
      "epoch": 0.000231024169921875,
      "step": 37851,
      "training_step_time": 0.3973691463470459
    },
    {
      "epoch": 0.0002310302734375,
      "model_forward_time": 0.11543655395507812,
      "step": 37852
    },
    {
      "epoch": 0.0002310302734375,
      "step": 37852,
      "training_step_time": 0.39256978034973145
    },
    {
      "epoch": 0.000231036376953125,
      "model_forward_time": 0.11486387252807617,
      "step": 37853
    },
    {
      "epoch": 0.000231036376953125,
      "step": 37853,
      "training_step_time": 0.5891642570495605
    },
    {
      "epoch": 0.00023104248046875,
      "model_forward_time": 0.11496472358703613,
      "step": 37854
    },
    {
      "epoch": 0.00023104248046875,
      "step": 37854,
      "training_step_time": 0.4514284133911133
    },
    {
      "epoch": 0.000231048583984375,
      "model_forward_time": 0.11460280418395996,
      "step": 37855
    },
    {
      "epoch": 0.000231048583984375,
      "step": 37855,
      "training_step_time": 0.39320898056030273
    },
    {
      "epoch": 0.0002310546875,
      "model_forward_time": 0.11558818817138672,
      "step": 37856
    },
    {
      "epoch": 0.0002310546875,
      "step": 37856,
      "training_step_time": 0.3970472812652588
    },
    {
      "epoch": 0.000231060791015625,
      "model_forward_time": 0.11484313011169434,
      "step": 37857
    },
    {
      "epoch": 0.000231060791015625,
      "step": 37857,
      "training_step_time": 0.39176487922668457
    },
    {
      "epoch": 0.00023106689453125,
      "model_forward_time": 0.11475110054016113,
      "step": 37858
    },
    {
      "epoch": 0.00023106689453125,
      "step": 37858,
      "training_step_time": 0.39295196533203125
    },
    {
      "epoch": 0.000231072998046875,
      "model_forward_time": 0.11563849449157715,
      "step": 37859
    },
    {
      "epoch": 0.000231072998046875,
      "step": 37859,
      "training_step_time": 0.5541095733642578
    },
    {
      "epoch": 0.0002310791015625,
      "grad_norm": 0.1342221051454544,
      "learning_rate": 3.282995562303754e-05,
      "loss": 0.0413,
      "step": 37860
    },
    {
      "epoch": 0.0002310791015625,
      "model_forward_time": 0.11483502388000488,
      "step": 37860
    },
    {
      "epoch": 0.0002310791015625,
      "step": 37860,
      "training_step_time": 0.3660609722137451
    },
    {
      "epoch": 0.000231085205078125,
      "model_forward_time": 0.11527657508850098,
      "step": 37861
    },
    {
      "epoch": 0.000231085205078125,
      "step": 37861,
      "training_step_time": 0.46472644805908203
    },
    {
      "epoch": 0.00023109130859375,
      "model_forward_time": 0.11571812629699707,
      "step": 37862
    },
    {
      "epoch": 0.00023109130859375,
      "step": 37862,
      "training_step_time": 0.4023442268371582
    },
    {
      "epoch": 0.000231097412109375,
      "model_forward_time": 0.11458706855773926,
      "step": 37863
    },
    {
      "epoch": 0.000231097412109375,
      "step": 37863,
      "training_step_time": 0.42121458053588867
    },
    {
      "epoch": 0.000231103515625,
      "model_forward_time": 0.11447620391845703,
      "step": 37864
    },
    {
      "epoch": 0.000231103515625,
      "step": 37864,
      "training_step_time": 0.4178147315979004
    },
    {
      "epoch": 0.000231109619140625,
      "model_forward_time": 0.11518716812133789,
      "step": 37865
    },
    {
      "epoch": 0.000231109619140625,
      "step": 37865,
      "training_step_time": 0.39373326301574707
    },
    {
      "epoch": 0.00023111572265625,
      "model_forward_time": 0.11541533470153809,
      "step": 37866
    },
    {
      "epoch": 0.00023111572265625,
      "step": 37866,
      "training_step_time": 0.39925670623779297
    },
    {
      "epoch": 0.000231121826171875,
      "model_forward_time": 0.11490678787231445,
      "step": 37867
    },
    {
      "epoch": 0.000231121826171875,
      "step": 37867,
      "training_step_time": 0.5077133178710938
    },
    {
      "epoch": 0.0002311279296875,
      "model_forward_time": 0.11432719230651855,
      "step": 37868
    },
    {
      "epoch": 0.0002311279296875,
      "step": 37868,
      "training_step_time": 0.38382673263549805
    },
    {
      "epoch": 0.000231134033203125,
      "model_forward_time": 0.11579704284667969,
      "step": 37869
    },
    {
      "epoch": 0.000231134033203125,
      "step": 37869,
      "training_step_time": 0.39699506759643555
    },
    {
      "epoch": 0.00023114013671875,
      "grad_norm": 0.09416047483682632,
      "learning_rate": 3.280407622333518e-05,
      "loss": 0.0412,
      "step": 37870
    },
    {
      "epoch": 0.00023114013671875,
      "model_forward_time": 0.11469626426696777,
      "step": 37870
    },
    {
      "epoch": 0.00023114013671875,
      "step": 37870,
      "training_step_time": 0.40821146965026855
    },
    {
      "epoch": 0.000231146240234375,
      "model_forward_time": 0.11490845680236816,
      "step": 37871
    },
    {
      "epoch": 0.000231146240234375,
      "step": 37871,
      "training_step_time": 0.3936347961425781
    },
    {
      "epoch": 0.00023115234375,
      "model_forward_time": 0.11490678787231445,
      "step": 37872
    },
    {
      "epoch": 0.00023115234375,
      "step": 37872,
      "training_step_time": 0.4039304256439209
    },
    {
      "epoch": 0.000231158447265625,
      "model_forward_time": 0.11523246765136719,
      "step": 37873
    },
    {
      "epoch": 0.000231158447265625,
      "step": 37873,
      "training_step_time": 0.395216703414917
    },
    {
      "epoch": 0.00023116455078125,
      "model_forward_time": 0.1154026985168457,
      "step": 37874
    },
    {
      "epoch": 0.00023116455078125,
      "step": 37874,
      "training_step_time": 0.3981761932373047
    },
    {
      "epoch": 0.000231170654296875,
      "model_forward_time": 0.1162409782409668,
      "step": 37875
    },
    {
      "epoch": 0.000231170654296875,
      "step": 37875,
      "training_step_time": 0.46685791015625
    },
    {
      "epoch": 0.0002311767578125,
      "model_forward_time": 0.11669301986694336,
      "step": 37876
    },
    {
      "epoch": 0.0002311767578125,
      "step": 37876,
      "training_step_time": 0.42868614196777344
    },
    {
      "epoch": 0.000231182861328125,
      "model_forward_time": 0.1153099536895752,
      "step": 37877
    },
    {
      "epoch": 0.000231182861328125,
      "step": 37877,
      "training_step_time": 0.4456806182861328
    },
    {
      "epoch": 0.00023118896484375,
      "model_forward_time": 0.11650633811950684,
      "step": 37878
    },
    {
      "epoch": 0.00023118896484375,
      "step": 37878,
      "training_step_time": 0.48944926261901855
    },
    {
      "epoch": 0.000231195068359375,
      "model_forward_time": 0.11426258087158203,
      "step": 37879
    },
    {
      "epoch": 0.000231195068359375,
      "step": 37879,
      "training_step_time": 0.397599458694458
    },
    {
      "epoch": 0.000231201171875,
      "grad_norm": 0.0947713628411293,
      "learning_rate": 3.2778202047300444e-05,
      "loss": 0.033,
      "step": 37880
    },
    {
      "epoch": 0.000231201171875,
      "model_forward_time": 0.11480903625488281,
      "step": 37880
    },
    {
      "epoch": 0.000231201171875,
      "step": 37880,
      "training_step_time": 0.3834249973297119
    },
    {
      "epoch": 0.000231207275390625,
      "model_forward_time": 0.11617016792297363,
      "step": 37881
    },
    {
      "epoch": 0.000231207275390625,
      "step": 37881,
      "training_step_time": 0.4472813606262207
    },
    {
      "epoch": 0.00023121337890625,
      "model_forward_time": 0.11464285850524902,
      "step": 37882
    },
    {
      "epoch": 0.00023121337890625,
      "step": 37882,
      "training_step_time": 0.42346930503845215
    },
    {
      "epoch": 0.000231219482421875,
      "model_forward_time": 0.11521697044372559,
      "step": 37883
    },
    {
      "epoch": 0.000231219482421875,
      "step": 37883,
      "training_step_time": 0.48125290870666504
    },
    {
      "epoch": 0.0002312255859375,
      "model_forward_time": 0.11481952667236328,
      "step": 37884
    },
    {
      "epoch": 0.0002312255859375,
      "step": 37884,
      "training_step_time": 0.39967846870422363
    },
    {
      "epoch": 0.000231231689453125,
      "model_forward_time": 0.11569428443908691,
      "step": 37885
    },
    {
      "epoch": 0.000231231689453125,
      "step": 37885,
      "training_step_time": 0.3984103202819824
    },
    {
      "epoch": 0.00023123779296875,
      "model_forward_time": 0.11522865295410156,
      "step": 37886
    },
    {
      "epoch": 0.00023123779296875,
      "step": 37886,
      "training_step_time": 0.387326717376709
    },
    {
      "epoch": 0.000231243896484375,
      "model_forward_time": 0.11544013023376465,
      "step": 37887
    },
    {
      "epoch": 0.000231243896484375,
      "step": 37887,
      "training_step_time": 0.38238096237182617
    },
    {
      "epoch": 0.00023125,
      "model_forward_time": 0.11447477340698242,
      "step": 37888
    },
    {
      "epoch": 0.00023125,
      "step": 37888,
      "training_step_time": 0.401322603225708
    },
    {
      "epoch": 0.000231256103515625,
      "model_forward_time": 0.11502408981323242,
      "step": 37889
    },
    {
      "epoch": 0.000231256103515625,
      "step": 37889,
      "training_step_time": 0.49857139587402344
    },
    {
      "epoch": 0.00023126220703125,
      "grad_norm": 0.09059096872806549,
      "learning_rate": 3.275233310279321e-05,
      "loss": 0.0388,
      "step": 37890
    },
    {
      "epoch": 0.00023126220703125,
      "model_forward_time": 0.11521434783935547,
      "step": 37890
    },
    {
      "epoch": 0.00023126220703125,
      "step": 37890,
      "training_step_time": 0.3933696746826172
    },
    {
      "epoch": 0.000231268310546875,
      "model_forward_time": 0.11531639099121094,
      "step": 37891
    },
    {
      "epoch": 0.000231268310546875,
      "step": 37891,
      "training_step_time": 0.48749613761901855
    },
    {
      "epoch": 0.0002312744140625,
      "model_forward_time": 0.11511778831481934,
      "step": 37892
    },
    {
      "epoch": 0.0002312744140625,
      "step": 37892,
      "training_step_time": 0.4612910747528076
    },
    {
      "epoch": 0.000231280517578125,
      "model_forward_time": 0.1151583194732666,
      "step": 37893
    },
    {
      "epoch": 0.000231280517578125,
      "step": 37893,
      "training_step_time": 0.3903954029083252
    },
    {
      "epoch": 0.00023128662109375,
      "model_forward_time": 0.1153714656829834,
      "step": 37894
    },
    {
      "epoch": 0.00023128662109375,
      "step": 37894,
      "training_step_time": 0.3883030414581299
    },
    {
      "epoch": 0.000231292724609375,
      "model_forward_time": 0.1154930591583252,
      "step": 37895
    },
    {
      "epoch": 0.000231292724609375,
      "step": 37895,
      "training_step_time": 0.389385461807251
    },
    {
      "epoch": 0.000231298828125,
      "model_forward_time": 0.11525821685791016,
      "step": 37896
    },
    {
      "epoch": 0.000231298828125,
      "step": 37896,
      "training_step_time": 0.4269404411315918
    },
    {
      "epoch": 0.000231304931640625,
      "model_forward_time": 0.11550331115722656,
      "step": 37897
    },
    {
      "epoch": 0.000231304931640625,
      "step": 37897,
      "training_step_time": 0.4844334125518799
    },
    {
      "epoch": 0.00023131103515625,
      "model_forward_time": 0.11534643173217773,
      "step": 37898
    },
    {
      "epoch": 0.00023131103515625,
      "step": 37898,
      "training_step_time": 0.3845705986022949
    },
    {
      "epoch": 0.000231317138671875,
      "model_forward_time": 0.11537981033325195,
      "step": 37899
    },
    {
      "epoch": 0.000231317138671875,
      "step": 37899,
      "training_step_time": 0.3923475742340088
    },
    {
      "epoch": 0.0002313232421875,
      "grad_norm": 0.10729201883077621,
      "learning_rate": 3.272646939767179e-05,
      "loss": 0.0412,
      "step": 37900
    },
    {
      "epoch": 0.0002313232421875,
      "model_forward_time": 0.1154019832611084,
      "step": 37900
    },
    {
      "epoch": 0.0002313232421875,
      "step": 37900,
      "training_step_time": 0.3964719772338867
    },
    {
      "epoch": 0.000231329345703125,
      "model_forward_time": 0.11481261253356934,
      "step": 37901
    },
    {
      "epoch": 0.000231329345703125,
      "step": 37901,
      "training_step_time": 0.6371386051177979
    },
    {
      "epoch": 0.00023133544921875,
      "model_forward_time": 0.11482572555541992,
      "step": 37902
    },
    {
      "epoch": 0.00023133544921875,
      "step": 37902,
      "training_step_time": 0.3867349624633789
    },
    {
      "epoch": 0.000231341552734375,
      "model_forward_time": 0.11455011367797852,
      "step": 37903
    },
    {
      "epoch": 0.000231341552734375,
      "step": 37903,
      "training_step_time": 0.40802597999572754
    },
    {
      "epoch": 0.00023134765625,
      "model_forward_time": 0.11596441268920898,
      "step": 37904
    },
    {
      "epoch": 0.00023134765625,
      "step": 37904,
      "training_step_time": 0.5110485553741455
    },
    {
      "epoch": 0.000231353759765625,
      "model_forward_time": 0.1154475212097168,
      "step": 37905
    },
    {
      "epoch": 0.000231353759765625,
      "step": 37905,
      "training_step_time": 0.4786808490753174
    },
    {
      "epoch": 0.00023135986328125,
      "model_forward_time": 0.11513423919677734,
      "step": 37906
    },
    {
      "epoch": 0.00023135986328125,
      "step": 37906,
      "training_step_time": 0.491225004196167
    },
    {
      "epoch": 0.000231365966796875,
      "model_forward_time": 0.11516642570495605,
      "step": 37907
    },
    {
      "epoch": 0.000231365966796875,
      "step": 37907,
      "training_step_time": 0.38904905319213867
    },
    {
      "epoch": 0.0002313720703125,
      "model_forward_time": 0.1149299144744873,
      "step": 37908
    },
    {
      "epoch": 0.0002313720703125,
      "step": 37908,
      "training_step_time": 0.38602352142333984
    },
    {
      "epoch": 0.000231378173828125,
      "model_forward_time": 0.1143195629119873,
      "step": 37909
    },
    {
      "epoch": 0.000231378173828125,
      "step": 37909,
      "training_step_time": 0.39258456230163574
    },
    {
      "epoch": 0.00023138427734375,
      "grad_norm": 0.10766015201807022,
      "learning_rate": 3.2700610939792885e-05,
      "loss": 0.0374,
      "step": 37910
    },
    {
      "epoch": 0.00023138427734375,
      "model_forward_time": 0.11536097526550293,
      "step": 37910
    },
    {
      "epoch": 0.00023138427734375,
      "step": 37910,
      "training_step_time": 0.3903846740722656
    },
    {
      "epoch": 0.000231390380859375,
      "model_forward_time": 0.11434078216552734,
      "step": 37911
    },
    {
      "epoch": 0.000231390380859375,
      "step": 37911,
      "training_step_time": 0.39632153511047363
    },
    {
      "epoch": 0.000231396484375,
      "model_forward_time": 0.11724090576171875,
      "step": 37912
    },
    {
      "epoch": 0.000231396484375,
      "step": 37912,
      "training_step_time": 0.3936772346496582
    },
    {
      "epoch": 0.000231402587890625,
      "model_forward_time": 0.1153557300567627,
      "step": 37913
    },
    {
      "epoch": 0.000231402587890625,
      "step": 37913,
      "training_step_time": 0.42984843254089355
    },
    {
      "epoch": 0.00023140869140625,
      "model_forward_time": 0.1162712574005127,
      "step": 37914
    },
    {
      "epoch": 0.00023140869140625,
      "step": 37914,
      "training_step_time": 0.402008056640625
    },
    {
      "epoch": 0.000231414794921875,
      "model_forward_time": 0.11627078056335449,
      "step": 37915
    },
    {
      "epoch": 0.000231414794921875,
      "step": 37915,
      "training_step_time": 0.38459229469299316
    },
    {
      "epoch": 0.0002314208984375,
      "model_forward_time": 0.11524128913879395,
      "step": 37916
    },
    {
      "epoch": 0.0002314208984375,
      "step": 37916,
      "training_step_time": 0.4069998264312744
    },
    {
      "epoch": 0.000231427001953125,
      "model_forward_time": 0.11529326438903809,
      "step": 37917
    },
    {
      "epoch": 0.000231427001953125,
      "step": 37917,
      "training_step_time": 0.4039175510406494
    },
    {
      "epoch": 0.00023143310546875,
      "model_forward_time": 0.11466479301452637,
      "step": 37918
    },
    {
      "epoch": 0.00023143310546875,
      "step": 37918,
      "training_step_time": 0.3694040775299072
    },
    {
      "epoch": 0.000231439208984375,
      "model_forward_time": 0.11512446403503418,
      "step": 37919
    },
    {
      "epoch": 0.000231439208984375,
      "step": 37919,
      "training_step_time": 0.4559776782989502
    },
    {
      "epoch": 0.0002314453125,
      "grad_norm": 0.10152530670166016,
      "learning_rate": 3.267475773701161e-05,
      "loss": 0.0422,
      "step": 37920
    },
    {
      "epoch": 0.0002314453125,
      "model_forward_time": 0.1150963306427002,
      "step": 37920
    },
    {
      "epoch": 0.0002314453125,
      "step": 37920,
      "training_step_time": 0.4109537601470947
    },
    {
      "epoch": 0.000231451416015625,
      "model_forward_time": 0.11563563346862793,
      "step": 37921
    },
    {
      "epoch": 0.000231451416015625,
      "step": 37921,
      "training_step_time": 0.4327576160430908
    },
    {
      "epoch": 0.00023145751953125,
      "model_forward_time": 0.11510062217712402,
      "step": 37922
    },
    {
      "epoch": 0.00023145751953125,
      "step": 37922,
      "training_step_time": 0.487194299697876
    },
    {
      "epoch": 0.000231463623046875,
      "model_forward_time": 0.1154627799987793,
      "step": 37923
    },
    {
      "epoch": 0.000231463623046875,
      "step": 37923,
      "training_step_time": 0.38542604446411133
    },
    {
      "epoch": 0.0002314697265625,
      "model_forward_time": 0.11548352241516113,
      "step": 37924
    },
    {
      "epoch": 0.0002314697265625,
      "step": 37924,
      "training_step_time": 0.4127073287963867
    },
    {
      "epoch": 0.000231475830078125,
      "model_forward_time": 0.11496472358703613,
      "step": 37925
    },
    {
      "epoch": 0.000231475830078125,
      "step": 37925,
      "training_step_time": 0.4229731559753418
    },
    {
      "epoch": 0.00023148193359375,
      "model_forward_time": 0.11636829376220703,
      "step": 37926
    },
    {
      "epoch": 0.00023148193359375,
      "step": 37926,
      "training_step_time": 0.49173831939697266
    },
    {
      "epoch": 0.000231488037109375,
      "model_forward_time": 0.11475515365600586,
      "step": 37927
    },
    {
      "epoch": 0.000231488037109375,
      "step": 37927,
      "training_step_time": 0.3975515365600586
    },
    {
      "epoch": 0.000231494140625,
      "model_forward_time": 0.11482882499694824,
      "step": 37928
    },
    {
      "epoch": 0.000231494140625,
      "step": 37928,
      "training_step_time": 0.37920618057250977
    },
    {
      "epoch": 0.000231500244140625,
      "model_forward_time": 0.11515283584594727,
      "step": 37929
    },
    {
      "epoch": 0.000231500244140625,
      "step": 37929,
      "training_step_time": 0.3804159164428711
    },
    {
      "epoch": 0.00023150634765625,
      "grad_norm": 0.14348793029785156,
      "learning_rate": 3.264890979718147e-05,
      "loss": 0.0424,
      "step": 37930
    },
    {
      "epoch": 0.00023150634765625,
      "model_forward_time": 0.1147007942199707,
      "step": 37930
    },
    {
      "epoch": 0.00023150634765625,
      "step": 37930,
      "training_step_time": 0.3946835994720459
    },
    {
      "epoch": 0.000231512451171875,
      "model_forward_time": 0.1156454086303711,
      "step": 37931
    },
    {
      "epoch": 0.000231512451171875,
      "step": 37931,
      "training_step_time": 0.4303629398345947
    },
    {
      "epoch": 0.0002315185546875,
      "model_forward_time": 0.11611533164978027,
      "step": 37932
    },
    {
      "epoch": 0.0002315185546875,
      "step": 37932,
      "training_step_time": 0.4114058017730713
    },
    {
      "epoch": 0.000231524658203125,
      "model_forward_time": 0.11525130271911621,
      "step": 37933
    },
    {
      "epoch": 0.000231524658203125,
      "step": 37933,
      "training_step_time": 0.3949592113494873
    },
    {
      "epoch": 0.00023153076171875,
      "model_forward_time": 0.1163327693939209,
      "step": 37934
    },
    {
      "epoch": 0.00023153076171875,
      "step": 37934,
      "training_step_time": 0.5196070671081543
    },
    {
      "epoch": 0.000231536865234375,
      "model_forward_time": 0.11530470848083496,
      "step": 37935
    },
    {
      "epoch": 0.000231536865234375,
      "step": 37935,
      "training_step_time": 0.47064685821533203
    },
    {
      "epoch": 0.00023154296875,
      "model_forward_time": 0.11512494087219238,
      "step": 37936
    },
    {
      "epoch": 0.00023154296875,
      "step": 37936,
      "training_step_time": 0.37905120849609375
    },
    {
      "epoch": 0.000231549072265625,
      "model_forward_time": 0.11626911163330078,
      "step": 37937
    },
    {
      "epoch": 0.000231549072265625,
      "step": 37937,
      "training_step_time": 0.3875753879547119
    },
    {
      "epoch": 0.00023155517578125,
      "model_forward_time": 0.11507749557495117,
      "step": 37938
    },
    {
      "epoch": 0.00023155517578125,
      "step": 37938,
      "training_step_time": 0.40154409408569336
    },
    {
      "epoch": 0.000231561279296875,
      "model_forward_time": 0.11511540412902832,
      "step": 37939
    },
    {
      "epoch": 0.000231561279296875,
      "step": 37939,
      "training_step_time": 0.4071929454803467
    },
    {
      "epoch": 0.0002315673828125,
      "grad_norm": 0.09418075531721115,
      "learning_rate": 3.262306712815444e-05,
      "loss": 0.039,
      "step": 37940
    },
    {
      "epoch": 0.0002315673828125,
      "model_forward_time": 0.1147150993347168,
      "step": 37940
    },
    {
      "epoch": 0.0002315673828125,
      "step": 37940,
      "training_step_time": 0.5005285739898682
    },
    {
      "epoch": 0.000231573486328125,
      "model_forward_time": 0.11516666412353516,
      "step": 37941
    },
    {
      "epoch": 0.000231573486328125,
      "step": 37941,
      "training_step_time": 0.3938271999359131
    },
    {
      "epoch": 0.00023157958984375,
      "model_forward_time": 0.11516642570495605,
      "step": 37942
    },
    {
      "epoch": 0.00023157958984375,
      "step": 37942,
      "training_step_time": 0.41495203971862793
    },
    {
      "epoch": 0.000231585693359375,
      "model_forward_time": 0.11535096168518066,
      "step": 37943
    },
    {
      "epoch": 0.000231585693359375,
      "step": 37943,
      "training_step_time": 0.3935208320617676
    },
    {
      "epoch": 0.000231591796875,
      "model_forward_time": 0.11514472961425781,
      "step": 37944
    },
    {
      "epoch": 0.000231591796875,
      "step": 37944,
      "training_step_time": 0.4034304618835449
    },
    {
      "epoch": 0.000231597900390625,
      "model_forward_time": 0.11527061462402344,
      "step": 37945
    },
    {
      "epoch": 0.000231597900390625,
      "step": 37945,
      "training_step_time": 0.41118383407592773
    },
    {
      "epoch": 0.00023160400390625,
      "model_forward_time": 0.1146535873413086,
      "step": 37946
    },
    {
      "epoch": 0.00023160400390625,
      "step": 37946,
      "training_step_time": 0.41321563720703125
    },
    {
      "epoch": 0.000231610107421875,
      "model_forward_time": 0.11481642723083496,
      "step": 37947
    },
    {
      "epoch": 0.000231610107421875,
      "step": 37947,
      "training_step_time": 0.36775875091552734
    },
    {
      "epoch": 0.0002316162109375,
      "model_forward_time": 0.1152040958404541,
      "step": 37948
    },
    {
      "epoch": 0.0002316162109375,
      "step": 37948,
      "training_step_time": 0.46245431900024414
    },
    {
      "epoch": 0.000231622314453125,
      "model_forward_time": 0.11554718017578125,
      "step": 37949
    },
    {
      "epoch": 0.000231622314453125,
      "step": 37949,
      "training_step_time": 0.4290757179260254
    },
    {
      "epoch": 0.00023162841796875,
      "grad_norm": 0.10154171288013458,
      "learning_rate": 3.2597229737780774e-05,
      "loss": 0.0387,
      "step": 37950
    },
    {
      "epoch": 0.00023162841796875,
      "model_forward_time": 0.11471343040466309,
      "step": 37950
    },
    {
      "epoch": 0.00023162841796875,
      "step": 37950,
      "training_step_time": 0.4634666442871094
    },
    {
      "epoch": 0.000231634521484375,
      "model_forward_time": 0.11476445198059082,
      "step": 37951
    },
    {
      "epoch": 0.000231634521484375,
      "step": 37951,
      "training_step_time": 0.3856019973754883
    },
    {
      "epoch": 0.000231640625,
      "model_forward_time": 0.11466741561889648,
      "step": 37952
    },
    {
      "epoch": 0.000231640625,
      "step": 37952,
      "training_step_time": 0.3865535259246826
    },
    {
      "epoch": 0.000231646728515625,
      "model_forward_time": 0.115570068359375,
      "step": 37953
    },
    {
      "epoch": 0.000231646728515625,
      "step": 37953,
      "training_step_time": 0.4031393527984619
    },
    {
      "epoch": 0.00023165283203125,
      "model_forward_time": 0.11532974243164062,
      "step": 37954
    },
    {
      "epoch": 0.00023165283203125,
      "step": 37954,
      "training_step_time": 0.4841125011444092
    },
    {
      "epoch": 0.000231658935546875,
      "model_forward_time": 0.1150660514831543,
      "step": 37955
    },
    {
      "epoch": 0.000231658935546875,
      "step": 37955,
      "training_step_time": 0.4036579132080078
    },
    {
      "epoch": 0.0002316650390625,
      "model_forward_time": 0.11532282829284668,
      "step": 37956
    },
    {
      "epoch": 0.0002316650390625,
      "step": 37956,
      "training_step_time": 0.3909296989440918
    },
    {
      "epoch": 0.000231671142578125,
      "model_forward_time": 0.11523723602294922,
      "step": 37957
    },
    {
      "epoch": 0.000231671142578125,
      "step": 37957,
      "training_step_time": 0.39063358306884766
    },
    {
      "epoch": 0.00023167724609375,
      "model_forward_time": 0.11532402038574219,
      "step": 37958
    },
    {
      "epoch": 0.00023167724609375,
      "step": 37958,
      "training_step_time": 0.38419198989868164
    },
    {
      "epoch": 0.000231683349609375,
      "model_forward_time": 0.1146547794342041,
      "step": 37959
    },
    {
      "epoch": 0.000231683349609375,
      "step": 37959,
      "training_step_time": 0.4014253616333008
    },
    {
      "epoch": 0.000231689453125,
      "grad_norm": 0.11370127648115158,
      "learning_rate": 3.257139763390925e-05,
      "loss": 0.0433,
      "step": 37960
    },
    {
      "epoch": 0.000231689453125,
      "model_forward_time": 0.11557388305664062,
      "step": 37960
    },
    {
      "epoch": 0.000231689453125,
      "step": 37960,
      "training_step_time": 0.38785338401794434
    },
    {
      "epoch": 0.000231695556640625,
      "model_forward_time": 0.11632561683654785,
      "step": 37961
    },
    {
      "epoch": 0.000231695556640625,
      "step": 37961,
      "training_step_time": 0.4847118854522705
    },
    {
      "epoch": 0.00023170166015625,
      "model_forward_time": 0.11554336547851562,
      "step": 37962
    },
    {
      "epoch": 0.00023170166015625,
      "step": 37962,
      "training_step_time": 0.44177889823913574
    },
    {
      "epoch": 0.000231707763671875,
      "model_forward_time": 0.11479306221008301,
      "step": 37963
    },
    {
      "epoch": 0.000231707763671875,
      "step": 37963,
      "training_step_time": 0.48272061347961426
    },
    {
      "epoch": 0.0002317138671875,
      "model_forward_time": 0.11635017395019531,
      "step": 37964
    },
    {
      "epoch": 0.0002317138671875,
      "step": 37964,
      "training_step_time": 0.4511861801147461
    },
    {
      "epoch": 0.000231719970703125,
      "model_forward_time": 0.11514091491699219,
      "step": 37965
    },
    {
      "epoch": 0.000231719970703125,
      "step": 37965,
      "training_step_time": 0.4819221496582031
    },
    {
      "epoch": 0.00023172607421875,
      "model_forward_time": 0.11535358428955078,
      "step": 37966
    },
    {
      "epoch": 0.00023172607421875,
      "step": 37966,
      "training_step_time": 0.3939495086669922
    },
    {
      "epoch": 0.000231732177734375,
      "model_forward_time": 0.11481595039367676,
      "step": 37967
    },
    {
      "epoch": 0.000231732177734375,
      "step": 37967,
      "training_step_time": 0.4444293975830078
    },
    {
      "epoch": 0.00023173828125,
      "model_forward_time": 0.11512875556945801,
      "step": 37968
    },
    {
      "epoch": 0.00023173828125,
      "step": 37968,
      "training_step_time": 0.48865747451782227
    },
    {
      "epoch": 0.000231744384765625,
      "model_forward_time": 0.11537909507751465,
      "step": 37969
    },
    {
      "epoch": 0.000231744384765625,
      "step": 37969,
      "training_step_time": 0.38822293281555176
    },
    {
      "epoch": 0.00023175048828125,
      "grad_norm": 0.10169688612222672,
      "learning_rate": 3.2545570824386925e-05,
      "loss": 0.0416,
      "step": 37970
    },
    {
      "epoch": 0.00023175048828125,
      "model_forward_time": 0.11497187614440918,
      "step": 37970
    },
    {
      "epoch": 0.00023175048828125,
      "step": 37970,
      "training_step_time": 0.38219523429870605
    },
    {
      "epoch": 0.000231756591796875,
      "model_forward_time": 0.11510920524597168,
      "step": 37971
    },
    {
      "epoch": 0.000231756591796875,
      "step": 37971,
      "training_step_time": 0.38776159286499023
    },
    {
      "epoch": 0.0002317626953125,
      "model_forward_time": 0.11527180671691895,
      "step": 37972
    },
    {
      "epoch": 0.0002317626953125,
      "step": 37972,
      "training_step_time": 0.4063761234283447
    },
    {
      "epoch": 0.000231768798828125,
      "model_forward_time": 0.11472964286804199,
      "step": 37973
    },
    {
      "epoch": 0.000231768798828125,
      "step": 37973,
      "training_step_time": 0.4024648666381836
    },
    {
      "epoch": 0.00023177490234375,
      "model_forward_time": 0.1152191162109375,
      "step": 37974
    },
    {
      "epoch": 0.00023177490234375,
      "step": 37974,
      "training_step_time": 0.3951282501220703
    },
    {
      "epoch": 0.000231781005859375,
      "model_forward_time": 0.11581087112426758,
      "step": 37975
    },
    {
      "epoch": 0.000231781005859375,
      "step": 37975,
      "training_step_time": 0.3923513889312744
    },
    {
      "epoch": 0.000231787109375,
      "model_forward_time": 0.11533284187316895,
      "step": 37976
    },
    {
      "epoch": 0.000231787109375,
      "step": 37976,
      "training_step_time": 0.3750119209289551
    },
    {
      "epoch": 0.000231793212890625,
      "model_forward_time": 0.11465001106262207,
      "step": 37977
    },
    {
      "epoch": 0.000231793212890625,
      "step": 37977,
      "training_step_time": 0.44535279273986816
    },
    {
      "epoch": 0.00023179931640625,
      "model_forward_time": 0.11512589454650879,
      "step": 37978
    },
    {
      "epoch": 0.00023179931640625,
      "step": 37978,
      "training_step_time": 0.4544835090637207
    },
    {
      "epoch": 0.000231805419921875,
      "model_forward_time": 0.11534500122070312,
      "step": 37979
    },
    {
      "epoch": 0.000231805419921875,
      "step": 37979,
      "training_step_time": 0.4137873649597168
    },
    {
      "epoch": 0.0002318115234375,
      "grad_norm": 0.12746697664260864,
      "learning_rate": 3.251974931705933e-05,
      "loss": 0.0406,
      "step": 37980
    },
    {
      "epoch": 0.0002318115234375,
      "model_forward_time": 0.11475348472595215,
      "step": 37980
    },
    {
      "epoch": 0.0002318115234375,
      "step": 37980,
      "training_step_time": 0.39586901664733887
    },
    {
      "epoch": 0.000231817626953125,
      "model_forward_time": 0.11508488655090332,
      "step": 37981
    },
    {
      "epoch": 0.000231817626953125,
      "step": 37981,
      "training_step_time": 0.44408559799194336
    },
    {
      "epoch": 0.00023182373046875,
      "model_forward_time": 0.11499905586242676,
      "step": 37982
    },
    {
      "epoch": 0.00023182373046875,
      "step": 37982,
      "training_step_time": 0.3855414390563965
    },
    {
      "epoch": 0.000231829833984375,
      "model_forward_time": 0.11517643928527832,
      "step": 37983
    },
    {
      "epoch": 0.000231829833984375,
      "step": 37983,
      "training_step_time": 0.45373106002807617
    },
    {
      "epoch": 0.0002318359375,
      "model_forward_time": 0.11508798599243164,
      "step": 37984
    },
    {
      "epoch": 0.0002318359375,
      "step": 37984,
      "training_step_time": 0.38304948806762695
    },
    {
      "epoch": 0.000231842041015625,
      "model_forward_time": 0.1158590316772461,
      "step": 37985
    },
    {
      "epoch": 0.000231842041015625,
      "step": 37985,
      "training_step_time": 0.39920902252197266
    },
    {
      "epoch": 0.00023184814453125,
      "model_forward_time": 0.11458945274353027,
      "step": 37986
    },
    {
      "epoch": 0.00023184814453125,
      "step": 37986,
      "training_step_time": 0.3954153060913086
    },
    {
      "epoch": 0.000231854248046875,
      "model_forward_time": 0.11522626876831055,
      "step": 37987
    },
    {
      "epoch": 0.000231854248046875,
      "step": 37987,
      "training_step_time": 0.4012000560760498
    },
    {
      "epoch": 0.0002318603515625,
      "model_forward_time": 0.11568474769592285,
      "step": 37988
    },
    {
      "epoch": 0.0002318603515625,
      "step": 37988,
      "training_step_time": 0.38237881660461426
    },
    {
      "epoch": 0.000231866455078125,
      "model_forward_time": 0.11477112770080566,
      "step": 37989
    },
    {
      "epoch": 0.000231866455078125,
      "step": 37989,
      "training_step_time": 0.37499403953552246
    },
    {
      "epoch": 0.00023187255859375,
      "grad_norm": 0.17211568355560303,
      "learning_rate": 3.249393311977037e-05,
      "loss": 0.0379,
      "step": 37990
    },
    {
      "epoch": 0.00023187255859375,
      "model_forward_time": 0.11611199378967285,
      "step": 37990
    },
    {
      "epoch": 0.00023187255859375,
      "step": 37990,
      "training_step_time": 0.39040660858154297
    },
    {
      "epoch": 0.000231878662109375,
      "model_forward_time": 0.11555099487304688,
      "step": 37991
    },
    {
      "epoch": 0.000231878662109375,
      "step": 37991,
      "training_step_time": 0.4575362205505371
    },
    {
      "epoch": 0.000231884765625,
      "model_forward_time": 0.11533856391906738,
      "step": 37992
    },
    {
      "epoch": 0.000231884765625,
      "step": 37992,
      "training_step_time": 0.4791224002838135
    },
    {
      "epoch": 0.000231890869140625,
      "model_forward_time": 0.1143798828125,
      "step": 37993
    },
    {
      "epoch": 0.000231890869140625,
      "step": 37993,
      "training_step_time": 0.41543149948120117
    },
    {
      "epoch": 0.00023189697265625,
      "model_forward_time": 0.11551189422607422,
      "step": 37994
    },
    {
      "epoch": 0.00023189697265625,
      "step": 37994,
      "training_step_time": 0.41680312156677246
    },
    {
      "epoch": 0.000231903076171875,
      "model_forward_time": 0.11546897888183594,
      "step": 37995
    },
    {
      "epoch": 0.000231903076171875,
      "step": 37995,
      "training_step_time": 0.3796677589416504
    },
    {
      "epoch": 0.0002319091796875,
      "model_forward_time": 0.11560487747192383,
      "step": 37996
    },
    {
      "epoch": 0.0002319091796875,
      "step": 37996,
      "training_step_time": 0.3986964225769043
    },
    {
      "epoch": 0.000231915283203125,
      "model_forward_time": 0.115753173828125,
      "step": 37997
    },
    {
      "epoch": 0.000231915283203125,
      "step": 37997,
      "training_step_time": 0.4355318546295166
    },
    {
      "epoch": 0.00023192138671875,
      "model_forward_time": 0.11607027053833008,
      "step": 37998
    },
    {
      "epoch": 0.00023192138671875,
      "step": 37998,
      "training_step_time": 0.38129568099975586
    },
    {
      "epoch": 0.000231927490234375,
      "model_forward_time": 0.11529326438903809,
      "step": 37999
    },
    {
      "epoch": 0.000231927490234375,
      "step": 37999,
      "training_step_time": 0.39328765869140625
    },
    {
      "epoch": 0.00023193359375,
      "grad_norm": 0.1380060464143753,
      "learning_rate": 3.2468122240362284e-05,
      "loss": 0.0402,
      "step": 38000
    },
    {
      "epoch": 0.00023193359375,
      "model_forward_time": 0.11272525787353516,
      "step": 38000
    },
    {
      "epoch": 0.00023193359375,
      "step": 38000,
      "training_step_time": 0.3535499572753906
    },
    {
      "epoch": 0.000231939697265625,
      "model_forward_time": 0.11231017112731934,
      "step": 38001
    },
    {
      "epoch": 0.000231939697265625,
      "step": 38001,
      "training_step_time": 0.4262819290161133
    },
    {
      "epoch": 0.00023194580078125,
      "model_forward_time": 0.11328673362731934,
      "step": 38002
    },
    {
      "epoch": 0.00023194580078125,
      "step": 38002,
      "training_step_time": 0.38787341117858887
    },
    {
      "epoch": 0.000231951904296875,
      "model_forward_time": 0.1140899658203125,
      "step": 38003
    },
    {
      "epoch": 0.000231951904296875,
      "step": 38003,
      "training_step_time": 0.3762819766998291
    },
    {
      "epoch": 0.0002319580078125,
      "model_forward_time": 0.11370182037353516,
      "step": 38004
    },
    {
      "epoch": 0.0002319580078125,
      "step": 38004,
      "training_step_time": 0.37122392654418945
    },
    {
      "epoch": 0.000231964111328125,
      "model_forward_time": 0.11464095115661621,
      "step": 38005
    },
    {
      "epoch": 0.000231964111328125,
      "step": 38005,
      "training_step_time": 0.38179516792297363
    },
    {
      "epoch": 0.00023197021484375,
      "model_forward_time": 0.11506271362304688,
      "step": 38006
    },
    {
      "epoch": 0.00023197021484375,
      "step": 38006,
      "training_step_time": 0.3919506072998047
    },
    {
      "epoch": 0.000231976318359375,
      "model_forward_time": 0.11541080474853516,
      "step": 38007
    },
    {
      "epoch": 0.000231976318359375,
      "step": 38007,
      "training_step_time": 0.4031982421875
    },
    {
      "epoch": 0.000231982421875,
      "model_forward_time": 0.11537933349609375,
      "step": 38008
    },
    {
      "epoch": 0.000231982421875,
      "step": 38008,
      "training_step_time": 0.5059504508972168
    },
    {
      "epoch": 0.000231988525390625,
      "model_forward_time": 0.1157221794128418,
      "step": 38009
    },
    {
      "epoch": 0.000231988525390625,
      "step": 38009,
      "training_step_time": 0.5404922962188721
    },
    {
      "epoch": 0.00023199462890625,
      "grad_norm": 0.15483129024505615,
      "learning_rate": 3.244231668667578e-05,
      "loss": 0.0402,
      "step": 38010
    },
    {
      "epoch": 0.00023199462890625,
      "model_forward_time": 0.11471390724182129,
      "step": 38010
    },
    {
      "epoch": 0.00023199462890625,
      "step": 38010,
      "training_step_time": 0.39136171340942383
    },
    {
      "epoch": 0.000232000732421875,
      "model_forward_time": 0.11525106430053711,
      "step": 38011
    },
    {
      "epoch": 0.000232000732421875,
      "step": 38011,
      "training_step_time": 0.3886890411376953
    },
    {
      "epoch": 0.0002320068359375,
      "model_forward_time": 0.11507630348205566,
      "step": 38012
    },
    {
      "epoch": 0.0002320068359375,
      "step": 38012,
      "training_step_time": 0.39524149894714355
    },
    {
      "epoch": 0.000232012939453125,
      "model_forward_time": 0.11518120765686035,
      "step": 38013
    },
    {
      "epoch": 0.000232012939453125,
      "step": 38013,
      "training_step_time": 0.4219319820404053
    },
    {
      "epoch": 0.00023201904296875,
      "model_forward_time": 0.11511063575744629,
      "step": 38014
    },
    {
      "epoch": 0.00023201904296875,
      "step": 38014,
      "training_step_time": 0.47891783714294434
    },
    {
      "epoch": 0.000232025146484375,
      "model_forward_time": 0.11500883102416992,
      "step": 38015
    },
    {
      "epoch": 0.000232025146484375,
      "step": 38015,
      "training_step_time": 0.40517473220825195
    },
    {
      "epoch": 0.00023203125,
      "model_forward_time": 0.11485934257507324,
      "step": 38016
    },
    {
      "epoch": 0.00023203125,
      "step": 38016,
      "training_step_time": 0.40378522872924805
    },
    {
      "epoch": 0.000232037353515625,
      "model_forward_time": 0.11533164978027344,
      "step": 38017
    },
    {
      "epoch": 0.000232037353515625,
      "step": 38017,
      "training_step_time": 0.40060997009277344
    },
    {
      "epoch": 0.00023204345703125,
      "model_forward_time": 0.11505413055419922,
      "step": 38018
    },
    {
      "epoch": 0.00023204345703125,
      "step": 38018,
      "training_step_time": 0.3894634246826172
    },
    {
      "epoch": 0.000232049560546875,
      "model_forward_time": 0.11501789093017578,
      "step": 38019
    },
    {
      "epoch": 0.000232049560546875,
      "step": 38019,
      "training_step_time": 0.38482141494750977
    },
    {
      "epoch": 0.0002320556640625,
      "grad_norm": 0.08196355402469635,
      "learning_rate": 3.241651646654986e-05,
      "loss": 0.0379,
      "step": 38020
    },
    {
      "epoch": 0.0002320556640625,
      "model_forward_time": 0.11509346961975098,
      "step": 38020
    },
    {
      "epoch": 0.0002320556640625,
      "step": 38020,
      "training_step_time": 0.38824033737182617
    },
    {
      "epoch": 0.000232061767578125,
      "model_forward_time": 0.1162712574005127,
      "step": 38021
    },
    {
      "epoch": 0.000232061767578125,
      "step": 38021,
      "training_step_time": 0.3934347629547119
    },
    {
      "epoch": 0.00023206787109375,
      "model_forward_time": 0.11514663696289062,
      "step": 38022
    },
    {
      "epoch": 0.00023206787109375,
      "step": 38022,
      "training_step_time": 0.49484777450561523
    },
    {
      "epoch": 0.000232073974609375,
      "model_forward_time": 0.11461377143859863,
      "step": 38023
    },
    {
      "epoch": 0.000232073974609375,
      "step": 38023,
      "training_step_time": 0.5065805912017822
    },
    {
      "epoch": 0.000232080078125,
      "model_forward_time": 0.11475849151611328,
      "step": 38024
    },
    {
      "epoch": 0.000232080078125,
      "step": 38024,
      "training_step_time": 0.43810415267944336
    },
    {
      "epoch": 0.000232086181640625,
      "model_forward_time": 0.1150672435760498,
      "step": 38025
    },
    {
      "epoch": 0.000232086181640625,
      "step": 38025,
      "training_step_time": 0.39306092262268066
    },
    {
      "epoch": 0.00023209228515625,
      "model_forward_time": 0.11464357376098633,
      "step": 38026
    },
    {
      "epoch": 0.00023209228515625,
      "step": 38026,
      "training_step_time": 0.3940579891204834
    },
    {
      "epoch": 0.000232098388671875,
      "model_forward_time": 0.11473464965820312,
      "step": 38027
    },
    {
      "epoch": 0.000232098388671875,
      "step": 38027,
      "training_step_time": 0.402224063873291
    },
    {
      "epoch": 0.0002321044921875,
      "model_forward_time": 0.11475920677185059,
      "step": 38028
    },
    {
      "epoch": 0.0002321044921875,
      "step": 38028,
      "training_step_time": 0.446270227432251
    },
    {
      "epoch": 0.000232110595703125,
      "model_forward_time": 0.11461687088012695,
      "step": 38029
    },
    {
      "epoch": 0.000232110595703125,
      "step": 38029,
      "training_step_time": 0.40056586265563965
    },
    {
      "epoch": 0.00023211669921875,
      "grad_norm": 0.12435010820627213,
      "learning_rate": 3.239072158782198e-05,
      "loss": 0.0379,
      "step": 38030
    },
    {
      "epoch": 0.00023211669921875,
      "model_forward_time": 0.11589169502258301,
      "step": 38030
    },
    {
      "epoch": 0.00023211669921875,
      "step": 38030,
      "training_step_time": 0.41681766510009766
    },
    {
      "epoch": 0.000232122802734375,
      "model_forward_time": 0.11508297920227051,
      "step": 38031
    },
    {
      "epoch": 0.000232122802734375,
      "step": 38031,
      "training_step_time": 0.39818334579467773
    },
    {
      "epoch": 0.00023212890625,
      "model_forward_time": 0.11487770080566406,
      "step": 38032
    },
    {
      "epoch": 0.00023212890625,
      "step": 38032,
      "training_step_time": 0.3915531635284424
    },
    {
      "epoch": 0.000232135009765625,
      "model_forward_time": 0.11500406265258789,
      "step": 38033
    },
    {
      "epoch": 0.000232135009765625,
      "step": 38033,
      "training_step_time": 0.39307379722595215
    },
    {
      "epoch": 0.00023214111328125,
      "model_forward_time": 0.11489677429199219,
      "step": 38034
    },
    {
      "epoch": 0.00023214111328125,
      "step": 38034,
      "training_step_time": 0.40094923973083496
    },
    {
      "epoch": 0.000232147216796875,
      "model_forward_time": 0.11487269401550293,
      "step": 38035
    },
    {
      "epoch": 0.000232147216796875,
      "step": 38035,
      "training_step_time": 0.398876428604126
    },
    {
      "epoch": 0.0002321533203125,
      "model_forward_time": 0.11520648002624512,
      "step": 38036
    },
    {
      "epoch": 0.0002321533203125,
      "step": 38036,
      "training_step_time": 0.3688242435455322
    },
    {
      "epoch": 0.000232159423828125,
      "model_forward_time": 0.11541318893432617,
      "step": 38037
    },
    {
      "epoch": 0.000232159423828125,
      "step": 38037,
      "training_step_time": 0.4594717025756836
    },
    {
      "epoch": 0.00023216552734375,
      "model_forward_time": 0.11530208587646484,
      "step": 38038
    },
    {
      "epoch": 0.00023216552734375,
      "step": 38038,
      "training_step_time": 0.402904748916626
    },
    {
      "epoch": 0.000232171630859375,
      "model_forward_time": 0.11539483070373535,
      "step": 38039
    },
    {
      "epoch": 0.000232171630859375,
      "step": 38039,
      "training_step_time": 0.4234426021575928
    },
    {
      "epoch": 0.000232177734375,
      "grad_norm": 0.1421019732952118,
      "learning_rate": 3.236493205832795e-05,
      "loss": 0.0388,
      "step": 38040
    },
    {
      "epoch": 0.000232177734375,
      "model_forward_time": 0.1150507926940918,
      "step": 38040
    },
    {
      "epoch": 0.000232177734375,
      "step": 38040,
      "training_step_time": 0.38626933097839355
    },
    {
      "epoch": 0.000232183837890625,
      "model_forward_time": 0.11510467529296875,
      "step": 38041
    },
    {
      "epoch": 0.000232183837890625,
      "step": 38041,
      "training_step_time": 0.46466851234436035
    },
    {
      "epoch": 0.00023218994140625,
      "model_forward_time": 0.11521697044372559,
      "step": 38042
    },
    {
      "epoch": 0.00023218994140625,
      "step": 38042,
      "training_step_time": 0.4171257019042969
    },
    {
      "epoch": 0.000232196044921875,
      "model_forward_time": 0.11534953117370605,
      "step": 38043
    },
    {
      "epoch": 0.000232196044921875,
      "step": 38043,
      "training_step_time": 0.49465274810791016
    },
    {
      "epoch": 0.0002322021484375,
      "model_forward_time": 0.1144864559173584,
      "step": 38044
    },
    {
      "epoch": 0.0002322021484375,
      "step": 38044,
      "training_step_time": 0.3926818370819092
    },
    {
      "epoch": 0.000232208251953125,
      "model_forward_time": 0.11553263664245605,
      "step": 38045
    },
    {
      "epoch": 0.000232208251953125,
      "step": 38045,
      "training_step_time": 0.389751672744751
    },
    {
      "epoch": 0.00023221435546875,
      "model_forward_time": 0.1150367259979248,
      "step": 38046
    },
    {
      "epoch": 0.00023221435546875,
      "step": 38046,
      "training_step_time": 0.4151194095611572
    },
    {
      "epoch": 0.000232220458984375,
      "model_forward_time": 0.11535763740539551,
      "step": 38047
    },
    {
      "epoch": 0.000232220458984375,
      "step": 38047,
      "training_step_time": 0.38185596466064453
    },
    {
      "epoch": 0.0002322265625,
      "model_forward_time": 0.11666655540466309,
      "step": 38048
    },
    {
      "epoch": 0.0002322265625,
      "step": 38048,
      "training_step_time": 0.3785526752471924
    },
    {
      "epoch": 0.000232232666015625,
      "model_forward_time": 0.1153256893157959,
      "step": 38049
    },
    {
      "epoch": 0.000232232666015625,
      "step": 38049,
      "training_step_time": 0.37862730026245117
    },
    {
      "epoch": 0.00023223876953125,
      "grad_norm": 0.11716029047966003,
      "learning_rate": 3.233914788590192e-05,
      "loss": 0.0388,
      "step": 38050
    },
    {
      "epoch": 0.00023223876953125,
      "model_forward_time": 0.11600375175476074,
      "step": 38050
    },
    {
      "epoch": 0.00023223876953125,
      "step": 38050,
      "training_step_time": 0.38681817054748535
    },
    {
      "epoch": 0.000232244873046875,
      "model_forward_time": 0.11528968811035156,
      "step": 38051
    },
    {
      "epoch": 0.000232244873046875,
      "step": 38051,
      "training_step_time": 0.4434361457824707
    },
    {
      "epoch": 0.0002322509765625,
      "model_forward_time": 0.11527299880981445,
      "step": 38052
    },
    {
      "epoch": 0.0002322509765625,
      "step": 38052,
      "training_step_time": 0.41830873489379883
    },
    {
      "epoch": 0.000232257080078125,
      "model_forward_time": 0.1152658462524414,
      "step": 38053
    },
    {
      "epoch": 0.000232257080078125,
      "step": 38053,
      "training_step_time": 0.47675418853759766
    },
    {
      "epoch": 0.00023226318359375,
      "model_forward_time": 0.11517620086669922,
      "step": 38054
    },
    {
      "epoch": 0.00023226318359375,
      "step": 38054,
      "training_step_time": 0.40045690536499023
    },
    {
      "epoch": 0.000232269287109375,
      "model_forward_time": 0.1152198314666748,
      "step": 38055
    },
    {
      "epoch": 0.000232269287109375,
      "step": 38055,
      "training_step_time": 0.44039130210876465
    },
    {
      "epoch": 0.000232275390625,
      "model_forward_time": 0.11540532112121582,
      "step": 38056
    },
    {
      "epoch": 0.000232275390625,
      "step": 38056,
      "training_step_time": 0.4154782295227051
    },
    {
      "epoch": 0.000232281494140625,
      "model_forward_time": 0.11506295204162598,
      "step": 38057
    },
    {
      "epoch": 0.000232281494140625,
      "step": 38057,
      "training_step_time": 0.4391052722930908
    },
    {
      "epoch": 0.00023228759765625,
      "model_forward_time": 0.11590576171875,
      "step": 38058
    },
    {
      "epoch": 0.00023228759765625,
      "step": 38058,
      "training_step_time": 0.3979377746582031
    },
    {
      "epoch": 0.000232293701171875,
      "model_forward_time": 0.11497354507446289,
      "step": 38059
    },
    {
      "epoch": 0.000232293701171875,
      "step": 38059,
      "training_step_time": 0.3938286304473877
    },
    {
      "epoch": 0.0002322998046875,
      "grad_norm": 0.11573541164398193,
      "learning_rate": 3.231336907837646e-05,
      "loss": 0.0409,
      "step": 38060
    },
    {
      "epoch": 0.0002322998046875,
      "model_forward_time": 0.11494970321655273,
      "step": 38060
    },
    {
      "epoch": 0.0002322998046875,
      "step": 38060,
      "training_step_time": 0.3901553153991699
    },
    {
      "epoch": 0.000232305908203125,
      "model_forward_time": 0.11526274681091309,
      "step": 38061
    },
    {
      "epoch": 0.000232305908203125,
      "step": 38061,
      "training_step_time": 0.39669346809387207
    },
    {
      "epoch": 0.00023231201171875,
      "model_forward_time": 0.11612439155578613,
      "step": 38062
    },
    {
      "epoch": 0.00023231201171875,
      "step": 38062,
      "training_step_time": 0.40363073348999023
    },
    {
      "epoch": 0.000232318115234375,
      "model_forward_time": 0.11469697952270508,
      "step": 38063
    },
    {
      "epoch": 0.000232318115234375,
      "step": 38063,
      "training_step_time": 0.3872056007385254
    },
    {
      "epoch": 0.00023232421875,
      "model_forward_time": 0.11562252044677734,
      "step": 38064
    },
    {
      "epoch": 0.00023232421875,
      "step": 38064,
      "training_step_time": 0.40711450576782227
    },
    {
      "epoch": 0.000232330322265625,
      "model_forward_time": 0.11501193046569824,
      "step": 38065
    },
    {
      "epoch": 0.000232330322265625,
      "step": 38065,
      "training_step_time": 0.3666257858276367
    },
    {
      "epoch": 0.00023233642578125,
      "model_forward_time": 0.11487793922424316,
      "step": 38066
    },
    {
      "epoch": 0.00023233642578125,
      "step": 38066,
      "training_step_time": 0.4352283477783203
    },
    {
      "epoch": 0.000232342529296875,
      "model_forward_time": 0.1154167652130127,
      "step": 38067
    },
    {
      "epoch": 0.000232342529296875,
      "step": 38067,
      "training_step_time": 0.5018234252929688
    },
    {
      "epoch": 0.0002323486328125,
      "model_forward_time": 0.11505937576293945,
      "step": 38068
    },
    {
      "epoch": 0.0002323486328125,
      "step": 38068,
      "training_step_time": 0.41663694381713867
    },
    {
      "epoch": 0.000232354736328125,
      "model_forward_time": 0.11496639251708984,
      "step": 38069
    },
    {
      "epoch": 0.000232354736328125,
      "step": 38069,
      "training_step_time": 0.40801334381103516
    },
    {
      "epoch": 0.00023236083984375,
      "grad_norm": 0.09745994210243225,
      "learning_rate": 3.228759564358248e-05,
      "loss": 0.0421,
      "step": 38070
    },
    {
      "epoch": 0.00023236083984375,
      "model_forward_time": 0.11590242385864258,
      "step": 38070
    },
    {
      "epoch": 0.00023236083984375,
      "step": 38070,
      "training_step_time": 0.451488733291626
    },
    {
      "epoch": 0.000232366943359375,
      "model_forward_time": 0.11474871635437012,
      "step": 38071
    },
    {
      "epoch": 0.000232366943359375,
      "step": 38071,
      "training_step_time": 0.4283018112182617
    },
    {
      "epoch": 0.000232373046875,
      "model_forward_time": 0.11541032791137695,
      "step": 38072
    },
    {
      "epoch": 0.000232373046875,
      "step": 38072,
      "training_step_time": 0.4148092269897461
    },
    {
      "epoch": 0.000232379150390625,
      "model_forward_time": 0.11629724502563477,
      "step": 38073
    },
    {
      "epoch": 0.000232379150390625,
      "step": 38073,
      "training_step_time": 0.39945220947265625
    },
    {
      "epoch": 0.00023238525390625,
      "model_forward_time": 0.11522245407104492,
      "step": 38074
    },
    {
      "epoch": 0.00023238525390625,
      "step": 38074,
      "training_step_time": 0.38500070571899414
    },
    {
      "epoch": 0.000232391357421875,
      "model_forward_time": 0.11507391929626465,
      "step": 38075
    },
    {
      "epoch": 0.000232391357421875,
      "step": 38075,
      "training_step_time": 0.3809010982513428
    },
    {
      "epoch": 0.0002323974609375,
      "model_forward_time": 0.11476898193359375,
      "step": 38076
    },
    {
      "epoch": 0.0002323974609375,
      "step": 38076,
      "training_step_time": 0.39143919944763184
    },
    {
      "epoch": 0.000232403564453125,
      "model_forward_time": 0.11524271965026855,
      "step": 38077
    },
    {
      "epoch": 0.000232403564453125,
      "step": 38077,
      "training_step_time": 0.38945698738098145
    },
    {
      "epoch": 0.00023240966796875,
      "model_forward_time": 0.11669015884399414,
      "step": 38078
    },
    {
      "epoch": 0.00023240966796875,
      "step": 38078,
      "training_step_time": 0.384232759475708
    },
    {
      "epoch": 0.000232415771484375,
      "model_forward_time": 0.11481618881225586,
      "step": 38079
    },
    {
      "epoch": 0.000232415771484375,
      "step": 38079,
      "training_step_time": 0.3834691047668457
    },
    {
      "epoch": 0.000232421875,
      "grad_norm": 0.13751162588596344,
      "learning_rate": 3.226182758934927e-05,
      "loss": 0.0445,
      "step": 38080
    },
    {
      "epoch": 0.000232421875,
      "model_forward_time": 0.11548805236816406,
      "step": 38080
    },
    {
      "epoch": 0.000232421875,
      "step": 38080,
      "training_step_time": 0.3699939250946045
    },
    {
      "epoch": 0.000232427978515625,
      "model_forward_time": 0.11510586738586426,
      "step": 38081
    },
    {
      "epoch": 0.000232427978515625,
      "step": 38081,
      "training_step_time": 0.44330883026123047
    },
    {
      "epoch": 0.00023243408203125,
      "model_forward_time": 0.11536097526550293,
      "step": 38082
    },
    {
      "epoch": 0.00023243408203125,
      "step": 38082,
      "training_step_time": 0.43660950660705566
    },
    {
      "epoch": 0.000232440185546875,
      "model_forward_time": 0.11565542221069336,
      "step": 38083
    },
    {
      "epoch": 0.000232440185546875,
      "step": 38083,
      "training_step_time": 0.43093347549438477
    },
    {
      "epoch": 0.0002324462890625,
      "model_forward_time": 0.11540389060974121,
      "step": 38084
    },
    {
      "epoch": 0.0002324462890625,
      "step": 38084,
      "training_step_time": 0.44910192489624023
    },
    {
      "epoch": 0.000232452392578125,
      "model_forward_time": 0.11564326286315918,
      "step": 38085
    },
    {
      "epoch": 0.000232452392578125,
      "step": 38085,
      "training_step_time": 0.4081692695617676
    },
    {
      "epoch": 0.00023245849609375,
      "model_forward_time": 0.1150670051574707,
      "step": 38086
    },
    {
      "epoch": 0.00023245849609375,
      "step": 38086,
      "training_step_time": 0.43862199783325195
    },
    {
      "epoch": 0.000232464599609375,
      "model_forward_time": 0.11526870727539062,
      "step": 38087
    },
    {
      "epoch": 0.000232464599609375,
      "step": 38087,
      "training_step_time": 0.3869287967681885
    },
    {
      "epoch": 0.000232470703125,
      "model_forward_time": 0.11504149436950684,
      "step": 38088
    },
    {
      "epoch": 0.000232470703125,
      "step": 38088,
      "training_step_time": 0.4051501750946045
    },
    {
      "epoch": 0.000232476806640625,
      "model_forward_time": 0.1144101619720459,
      "step": 38089
    },
    {
      "epoch": 0.000232476806640625,
      "step": 38089,
      "training_step_time": 0.394773006439209
    },
    {
      "epoch": 0.00023248291015625,
      "grad_norm": 0.1139049082994461,
      "learning_rate": 3.223606492350451e-05,
      "loss": 0.0385,
      "step": 38090
    },
    {
      "epoch": 0.00023248291015625,
      "model_forward_time": 0.11555051803588867,
      "step": 38090
    },
    {
      "epoch": 0.00023248291015625,
      "step": 38090,
      "training_step_time": 0.39128589630126953
    },
    {
      "epoch": 0.000232489013671875,
      "model_forward_time": 0.11500716209411621,
      "step": 38091
    },
    {
      "epoch": 0.000232489013671875,
      "step": 38091,
      "training_step_time": 0.39706850051879883
    },
    {
      "epoch": 0.0002324951171875,
      "model_forward_time": 0.1159975528717041,
      "step": 38092
    },
    {
      "epoch": 0.0002324951171875,
      "step": 38092,
      "training_step_time": 0.3913273811340332
    },
    {
      "epoch": 0.000232501220703125,
      "model_forward_time": 0.11559343338012695,
      "step": 38093
    },
    {
      "epoch": 0.000232501220703125,
      "step": 38093,
      "training_step_time": 0.3785834312438965
    },
    {
      "epoch": 0.00023250732421875,
      "model_forward_time": 0.11614012718200684,
      "step": 38094
    },
    {
      "epoch": 0.00023250732421875,
      "step": 38094,
      "training_step_time": 0.38959670066833496
    },
    {
      "epoch": 0.000232513427734375,
      "model_forward_time": 0.11546206474304199,
      "step": 38095
    },
    {
      "epoch": 0.000232513427734375,
      "step": 38095,
      "training_step_time": 0.36661434173583984
    },
    {
      "epoch": 0.00023251953125,
      "model_forward_time": 0.11491680145263672,
      "step": 38096
    },
    {
      "epoch": 0.00023251953125,
      "step": 38096,
      "training_step_time": 0.4696006774902344
    },
    {
      "epoch": 0.000232525634765625,
      "model_forward_time": 0.11521434783935547,
      "step": 38097
    },
    {
      "epoch": 0.000232525634765625,
      "step": 38097,
      "training_step_time": 0.4709596633911133
    },
    {
      "epoch": 0.00023253173828125,
      "model_forward_time": 0.11472225189208984,
      "step": 38098
    },
    {
      "epoch": 0.00023253173828125,
      "step": 38098,
      "training_step_time": 0.4593958854675293
    },
    {
      "epoch": 0.000232537841796875,
      "model_forward_time": 0.1145637035369873,
      "step": 38099
    },
    {
      "epoch": 0.000232537841796875,
      "step": 38099,
      "training_step_time": 0.4462442398071289
    },
    {
      "epoch": 0.0002325439453125,
      "grad_norm": 0.12940113246440887,
      "learning_rate": 3.221030765387417e-05,
      "loss": 0.041,
      "step": 38100
    },
    {
      "epoch": 0.0002325439453125,
      "model_forward_time": 0.11465954780578613,
      "step": 38100
    },
    {
      "epoch": 0.0002325439453125,
      "step": 38100,
      "training_step_time": 0.4832167625427246
    },
    {
      "epoch": 0.000232550048828125,
      "model_forward_time": 0.1148827075958252,
      "step": 38101
    },
    {
      "epoch": 0.000232550048828125,
      "step": 38101,
      "training_step_time": 0.390244722366333
    },
    {
      "epoch": 0.00023255615234375,
      "model_forward_time": 0.11477470397949219,
      "step": 38102
    },
    {
      "epoch": 0.00023255615234375,
      "step": 38102,
      "training_step_time": 0.38191723823547363
    },
    {
      "epoch": 0.000232562255859375,
      "model_forward_time": 0.11489391326904297,
      "step": 38103
    },
    {
      "epoch": 0.000232562255859375,
      "step": 38103,
      "training_step_time": 0.3846752643585205
    },
    {
      "epoch": 0.000232568359375,
      "model_forward_time": 0.11446833610534668,
      "step": 38104
    },
    {
      "epoch": 0.000232568359375,
      "step": 38104,
      "training_step_time": 0.393848180770874
    },
    {
      "epoch": 0.000232574462890625,
      "model_forward_time": 0.11503291130065918,
      "step": 38105
    },
    {
      "epoch": 0.000232574462890625,
      "step": 38105,
      "training_step_time": 0.3802938461303711
    },
    {
      "epoch": 0.00023258056640625,
      "model_forward_time": 0.11459755897521973,
      "step": 38106
    },
    {
      "epoch": 0.00023258056640625,
      "step": 38106,
      "training_step_time": 0.39745092391967773
    },
    {
      "epoch": 0.000232586669921875,
      "model_forward_time": 0.11516976356506348,
      "step": 38107
    },
    {
      "epoch": 0.000232586669921875,
      "step": 38107,
      "training_step_time": 0.3970663547515869
    },
    {
      "epoch": 0.0002325927734375,
      "model_forward_time": 0.11531925201416016,
      "step": 38108
    },
    {
      "epoch": 0.0002325927734375,
      "step": 38108,
      "training_step_time": 0.3874702453613281
    },
    {
      "epoch": 0.000232598876953125,
      "model_forward_time": 0.11559510231018066,
      "step": 38109
    },
    {
      "epoch": 0.000232598876953125,
      "step": 38109,
      "training_step_time": 0.3850367069244385
    },
    {
      "epoch": 0.00023260498046875,
      "grad_norm": 0.11664827913045883,
      "learning_rate": 3.218455578828269e-05,
      "loss": 0.0434,
      "step": 38110
    },
    {
      "epoch": 0.00023260498046875,
      "model_forward_time": 0.11735653877258301,
      "step": 38110
    },
    {
      "epoch": 0.00023260498046875,
      "step": 38110,
      "training_step_time": 0.4821631908416748
    },
    {
      "epoch": 0.000232611083984375,
      "model_forward_time": 0.11555910110473633,
      "step": 38111
    },
    {
      "epoch": 0.000232611083984375,
      "step": 38111,
      "training_step_time": 0.47821760177612305
    },
    {
      "epoch": 0.0002326171875,
      "model_forward_time": 0.11684942245483398,
      "step": 38112
    },
    {
      "epoch": 0.0002326171875,
      "step": 38112,
      "training_step_time": 0.4213576316833496
    },
    {
      "epoch": 0.000232623291015625,
      "model_forward_time": 0.11523222923278809,
      "step": 38113
    },
    {
      "epoch": 0.000232623291015625,
      "step": 38113,
      "training_step_time": 0.48571276664733887
    },
    {
      "epoch": 0.00023262939453125,
      "model_forward_time": 0.11484217643737793,
      "step": 38114
    },
    {
      "epoch": 0.00023262939453125,
      "step": 38114,
      "training_step_time": 0.41837215423583984
    },
    {
      "epoch": 0.000232635498046875,
      "model_forward_time": 0.11574077606201172,
      "step": 38115
    },
    {
      "epoch": 0.000232635498046875,
      "step": 38115,
      "training_step_time": 0.5092926025390625
    },
    {
      "epoch": 0.0002326416015625,
      "model_forward_time": 0.11507558822631836,
      "step": 38116
    },
    {
      "epoch": 0.0002326416015625,
      "step": 38116,
      "training_step_time": 0.42118096351623535
    },
    {
      "epoch": 0.000232647705078125,
      "model_forward_time": 0.11621451377868652,
      "step": 38117
    },
    {
      "epoch": 0.000232647705078125,
      "step": 38117,
      "training_step_time": 0.3914914131164551
    },
    {
      "epoch": 0.00023265380859375,
      "model_forward_time": 0.11487197875976562,
      "step": 38118
    },
    {
      "epoch": 0.00023265380859375,
      "step": 38118,
      "training_step_time": 0.40615272521972656
    },
    {
      "epoch": 0.000232659912109375,
      "model_forward_time": 0.11510133743286133,
      "step": 38119
    },
    {
      "epoch": 0.000232659912109375,
      "step": 38119,
      "training_step_time": 0.3941810131072998
    },
    {
      "epoch": 0.000232666015625,
      "grad_norm": 0.08388500660657883,
      "learning_rate": 3.2158809334552745e-05,
      "loss": 0.041,
      "step": 38120
    },
    {
      "epoch": 0.000232666015625,
      "model_forward_time": 0.11423778533935547,
      "step": 38120
    },
    {
      "epoch": 0.000232666015625,
      "step": 38120,
      "training_step_time": 0.38965535163879395
    },
    {
      "epoch": 0.000232672119140625,
      "model_forward_time": 0.11536049842834473,
      "step": 38121
    },
    {
      "epoch": 0.000232672119140625,
      "step": 38121,
      "training_step_time": 0.391040563583374
    },
    {
      "epoch": 0.00023267822265625,
      "model_forward_time": 0.1151268482208252,
      "step": 38122
    },
    {
      "epoch": 0.00023267822265625,
      "step": 38122,
      "training_step_time": 0.3882274627685547
    },
    {
      "epoch": 0.000232684326171875,
      "model_forward_time": 0.11586594581604004,
      "step": 38123
    },
    {
      "epoch": 0.000232684326171875,
      "step": 38123,
      "training_step_time": 0.3946044445037842
    },
    {
      "epoch": 0.0002326904296875,
      "model_forward_time": 0.11542773246765137,
      "step": 38124
    },
    {
      "epoch": 0.0002326904296875,
      "step": 38124,
      "training_step_time": 0.40857386589050293
    },
    {
      "epoch": 0.000232696533203125,
      "model_forward_time": 0.11554408073425293,
      "step": 38125
    },
    {
      "epoch": 0.000232696533203125,
      "step": 38125,
      "training_step_time": 0.5153212547302246
    },
    {
      "epoch": 0.00023270263671875,
      "model_forward_time": 0.11484622955322266,
      "step": 38126
    },
    {
      "epoch": 0.00023270263671875,
      "step": 38126,
      "training_step_time": 0.4869844913482666
    },
    {
      "epoch": 0.000232708740234375,
      "model_forward_time": 0.11481428146362305,
      "step": 38127
    },
    {
      "epoch": 0.000232708740234375,
      "step": 38127,
      "training_step_time": 0.390655517578125
    },
    {
      "epoch": 0.00023271484375,
      "model_forward_time": 0.1148521900177002,
      "step": 38128
    },
    {
      "epoch": 0.00023271484375,
      "step": 38128,
      "training_step_time": 0.47875285148620605
    },
    {
      "epoch": 0.000232720947265625,
      "model_forward_time": 0.11442685127258301,
      "step": 38129
    },
    {
      "epoch": 0.000232720947265625,
      "step": 38129,
      "training_step_time": 0.40308547019958496
    },
    {
      "epoch": 0.00023272705078125,
      "grad_norm": 0.10680735856294632,
      "learning_rate": 3.2133068300505455e-05,
      "loss": 0.0424,
      "step": 38130
    },
    {
      "epoch": 0.00023272705078125,
      "model_forward_time": 0.11508584022521973,
      "step": 38130
    },
    {
      "epoch": 0.00023272705078125,
      "step": 38130,
      "training_step_time": 0.48841166496276855
    },
    {
      "epoch": 0.000232733154296875,
      "model_forward_time": 0.11491823196411133,
      "step": 38131
    },
    {
      "epoch": 0.000232733154296875,
      "step": 38131,
      "training_step_time": 0.3871800899505615
    },
    {
      "epoch": 0.0002327392578125,
      "model_forward_time": 0.11501002311706543,
      "step": 38132
    },
    {
      "epoch": 0.0002327392578125,
      "step": 38132,
      "training_step_time": 0.38461971282958984
    },
    {
      "epoch": 0.000232745361328125,
      "model_forward_time": 0.11467289924621582,
      "step": 38133
    },
    {
      "epoch": 0.000232745361328125,
      "step": 38133,
      "training_step_time": 0.38445472717285156
    },
    {
      "epoch": 0.00023275146484375,
      "model_forward_time": 0.11516761779785156,
      "step": 38134
    },
    {
      "epoch": 0.00023275146484375,
      "step": 38134,
      "training_step_time": 0.38373827934265137
    },
    {
      "epoch": 0.000232757568359375,
      "model_forward_time": 0.11501502990722656,
      "step": 38135
    },
    {
      "epoch": 0.000232757568359375,
      "step": 38135,
      "training_step_time": 0.4013395309448242
    },
    {
      "epoch": 0.000232763671875,
      "model_forward_time": 0.11557435989379883,
      "step": 38136
    },
    {
      "epoch": 0.000232763671875,
      "step": 38136,
      "training_step_time": 0.3981285095214844
    },
    {
      "epoch": 0.000232769775390625,
      "model_forward_time": 0.11517667770385742,
      "step": 38137
    },
    {
      "epoch": 0.000232769775390625,
      "step": 38137,
      "training_step_time": 0.40375757217407227
    },
    {
      "epoch": 0.00023277587890625,
      "model_forward_time": 0.11553144454956055,
      "step": 38138
    },
    {
      "epoch": 0.00023277587890625,
      "step": 38138,
      "training_step_time": 0.41907668113708496
    },
    {
      "epoch": 0.000232781982421875,
      "model_forward_time": 0.11466336250305176,
      "step": 38139
    },
    {
      "epoch": 0.000232781982421875,
      "step": 38139,
      "training_step_time": 0.3682255744934082
    },
    {
      "epoch": 0.0002327880859375,
      "grad_norm": 0.12821777164936066,
      "learning_rate": 3.210733269396028e-05,
      "loss": 0.0362,
      "step": 38140
    },
    {
      "epoch": 0.0002327880859375,
      "model_forward_time": 0.11521601676940918,
      "step": 38140
    },
    {
      "epoch": 0.0002327880859375,
      "step": 38140,
      "training_step_time": 0.45053648948669434
    },
    {
      "epoch": 0.000232794189453125,
      "model_forward_time": 0.11545753479003906,
      "step": 38141
    },
    {
      "epoch": 0.000232794189453125,
      "step": 38141,
      "training_step_time": 0.4277958869934082
    },
    {
      "epoch": 0.00023280029296875,
      "model_forward_time": 0.11487317085266113,
      "step": 38142
    },
    {
      "epoch": 0.00023280029296875,
      "step": 38142,
      "training_step_time": 0.41384267807006836
    },
    {
      "epoch": 0.000232806396484375,
      "model_forward_time": 0.11513042449951172,
      "step": 38143
    },
    {
      "epoch": 0.000232806396484375,
      "step": 38143,
      "training_step_time": 0.3989830017089844
    },
    {
      "epoch": 0.0002328125,
      "model_forward_time": 0.11518263816833496,
      "step": 38144
    },
    {
      "epoch": 0.0002328125,
      "step": 38144,
      "training_step_time": 0.4545300006866455
    },
    {
      "epoch": 0.000232818603515625,
      "model_forward_time": 0.11562895774841309,
      "step": 38145
    },
    {
      "epoch": 0.000232818603515625,
      "step": 38145,
      "training_step_time": 0.39480161666870117
    },
    {
      "epoch": 0.00023282470703125,
      "model_forward_time": 0.11449575424194336,
      "step": 38146
    },
    {
      "epoch": 0.00023282470703125,
      "step": 38146,
      "training_step_time": 0.3948178291320801
    },
    {
      "epoch": 0.000232830810546875,
      "model_forward_time": 0.11591362953186035,
      "step": 38147
    },
    {
      "epoch": 0.000232830810546875,
      "step": 38147,
      "training_step_time": 0.4014885425567627
    },
    {
      "epoch": 0.0002328369140625,
      "model_forward_time": 0.11517667770385742,
      "step": 38148
    },
    {
      "epoch": 0.0002328369140625,
      "step": 38148,
      "training_step_time": 0.39111924171447754
    },
    {
      "epoch": 0.000232843017578125,
      "model_forward_time": 0.11510252952575684,
      "step": 38149
    },
    {
      "epoch": 0.000232843017578125,
      "step": 38149,
      "training_step_time": 0.38387179374694824
    },
    {
      "epoch": 0.00023284912109375,
      "grad_norm": 0.07626234740018845,
      "learning_rate": 3.2081602522734986e-05,
      "loss": 0.042,
      "step": 38150
    },
    {
      "epoch": 0.00023284912109375,
      "model_forward_time": 0.11497068405151367,
      "step": 38150
    },
    {
      "epoch": 0.00023284912109375,
      "step": 38150,
      "training_step_time": 0.39441418647766113
    },
    {
      "epoch": 0.000232855224609375,
      "model_forward_time": 0.11539626121520996,
      "step": 38151
    },
    {
      "epoch": 0.000232855224609375,
      "step": 38151,
      "training_step_time": 0.42874765396118164
    },
    {
      "epoch": 0.000232861328125,
      "model_forward_time": 0.11551141738891602,
      "step": 38152
    },
    {
      "epoch": 0.000232861328125,
      "step": 38152,
      "training_step_time": 0.411060094833374
    },
    {
      "epoch": 0.000232867431640625,
      "model_forward_time": 0.11538195610046387,
      "step": 38153
    },
    {
      "epoch": 0.000232867431640625,
      "step": 38153,
      "training_step_time": 0.3850259780883789
    },
    {
      "epoch": 0.00023287353515625,
      "model_forward_time": 0.1143944263458252,
      "step": 38154
    },
    {
      "epoch": 0.00023287353515625,
      "step": 38154,
      "training_step_time": 0.3657264709472656
    },
    {
      "epoch": 0.000232879638671875,
      "model_forward_time": 0.11579370498657227,
      "step": 38155
    },
    {
      "epoch": 0.000232879638671875,
      "step": 38155,
      "training_step_time": 0.4135894775390625
    },
    {
      "epoch": 0.0002328857421875,
      "model_forward_time": 0.11567902565002441,
      "step": 38156
    },
    {
      "epoch": 0.0002328857421875,
      "step": 38156,
      "training_step_time": 0.43847227096557617
    },
    {
      "epoch": 0.000232891845703125,
      "model_forward_time": 0.11577582359313965,
      "step": 38157
    },
    {
      "epoch": 0.000232891845703125,
      "step": 38157,
      "training_step_time": 0.3767690658569336
    },
    {
      "epoch": 0.00023289794921875,
      "model_forward_time": 0.11531686782836914,
      "step": 38158
    },
    {
      "epoch": 0.00023289794921875,
      "step": 38158,
      "training_step_time": 0.4605584144592285
    },
    {
      "epoch": 0.000232904052734375,
      "model_forward_time": 0.11501121520996094,
      "step": 38159
    },
    {
      "epoch": 0.000232904052734375,
      "step": 38159,
      "training_step_time": 0.44781041145324707
    },
    {
      "epoch": 0.00023291015625,
      "grad_norm": 0.09563605487346649,
      "learning_rate": 3.205587779464576e-05,
      "loss": 0.039,
      "step": 38160
    },
    {
      "epoch": 0.00023291015625,
      "model_forward_time": 0.11548423767089844,
      "step": 38160
    },
    {
      "epoch": 0.00023291015625,
      "step": 38160,
      "training_step_time": 0.39136815071105957
    },
    {
      "epoch": 0.000232916259765625,
      "model_forward_time": 0.11514687538146973,
      "step": 38161
    },
    {
      "epoch": 0.000232916259765625,
      "step": 38161,
      "training_step_time": 0.37586474418640137
    },
    {
      "epoch": 0.00023292236328125,
      "model_forward_time": 0.11561369895935059,
      "step": 38162
    },
    {
      "epoch": 0.00023292236328125,
      "step": 38162,
      "training_step_time": 0.38636159896850586
    },
    {
      "epoch": 0.000232928466796875,
      "model_forward_time": 0.1146852970123291,
      "step": 38163
    },
    {
      "epoch": 0.000232928466796875,
      "step": 38163,
      "training_step_time": 0.41450977325439453
    },
    {
      "epoch": 0.0002329345703125,
      "model_forward_time": 0.11554718017578125,
      "step": 38164
    },
    {
      "epoch": 0.0002329345703125,
      "step": 38164,
      "training_step_time": 0.41348910331726074
    },
    {
      "epoch": 0.000232940673828125,
      "model_forward_time": 0.11569762229919434,
      "step": 38165
    },
    {
      "epoch": 0.000232940673828125,
      "step": 38165,
      "training_step_time": 0.40415263175964355
    },
    {
      "epoch": 0.00023294677734375,
      "model_forward_time": 0.11465144157409668,
      "step": 38166
    },
    {
      "epoch": 0.00023294677734375,
      "step": 38166,
      "training_step_time": 0.40437960624694824
    },
    {
      "epoch": 0.000232952880859375,
      "model_forward_time": 0.11526870727539062,
      "step": 38167
    },
    {
      "epoch": 0.000232952880859375,
      "step": 38167,
      "training_step_time": 0.3777651786804199
    },
    {
      "epoch": 0.000232958984375,
      "model_forward_time": 0.11462950706481934,
      "step": 38168
    },
    {
      "epoch": 0.000232958984375,
      "step": 38168,
      "training_step_time": 0.3897430896759033
    },
    {
      "epoch": 0.000232965087890625,
      "model_forward_time": 0.11527180671691895,
      "step": 38169
    },
    {
      "epoch": 0.000232965087890625,
      "step": 38169,
      "training_step_time": 0.3681199550628662
    },
    {
      "epoch": 0.00023297119140625,
      "grad_norm": 0.10745564103126526,
      "learning_rate": 3.203015851750706e-05,
      "loss": 0.039,
      "step": 38170
    },
    {
      "epoch": 0.00023297119140625,
      "model_forward_time": 0.11561107635498047,
      "step": 38170
    },
    {
      "epoch": 0.00023297119140625,
      "step": 38170,
      "training_step_time": 0.45250487327575684
    },
    {
      "epoch": 0.000232977294921875,
      "model_forward_time": 0.11532378196716309,
      "step": 38171
    },
    {
      "epoch": 0.000232977294921875,
      "step": 38171,
      "training_step_time": 0.4591515064239502
    },
    {
      "epoch": 0.0002329833984375,
      "model_forward_time": 0.11600637435913086,
      "step": 38172
    },
    {
      "epoch": 0.0002329833984375,
      "step": 38172,
      "training_step_time": 0.47652339935302734
    },
    {
      "epoch": 0.000232989501953125,
      "model_forward_time": 0.11478424072265625,
      "step": 38173
    },
    {
      "epoch": 0.000232989501953125,
      "step": 38173,
      "training_step_time": 0.40590906143188477
    },
    {
      "epoch": 0.00023299560546875,
      "model_forward_time": 0.11515665054321289,
      "step": 38174
    },
    {
      "epoch": 0.00023299560546875,
      "step": 38174,
      "training_step_time": 0.3957507610321045
    },
    {
      "epoch": 0.000233001708984375,
      "model_forward_time": 0.11540436744689941,
      "step": 38175
    },
    {
      "epoch": 0.000233001708984375,
      "step": 38175,
      "training_step_time": 0.39647841453552246
    },
    {
      "epoch": 0.0002330078125,
      "model_forward_time": 0.11532092094421387,
      "step": 38176
    },
    {
      "epoch": 0.0002330078125,
      "step": 38176,
      "training_step_time": 0.41278767585754395
    },
    {
      "epoch": 0.000233013916015625,
      "model_forward_time": 0.1148529052734375,
      "step": 38177
    },
    {
      "epoch": 0.000233013916015625,
      "step": 38177,
      "training_step_time": 0.4059910774230957
    },
    {
      "epoch": 0.00023302001953125,
      "model_forward_time": 0.11511397361755371,
      "step": 38178
    },
    {
      "epoch": 0.00023302001953125,
      "step": 38178,
      "training_step_time": 0.3938279151916504
    },
    {
      "epoch": 0.000233026123046875,
      "model_forward_time": 0.11545205116271973,
      "step": 38179
    },
    {
      "epoch": 0.000233026123046875,
      "step": 38179,
      "training_step_time": 0.43428540229797363
    },
    {
      "epoch": 0.0002330322265625,
      "grad_norm": 0.10428624600172043,
      "learning_rate": 3.2004444699131727e-05,
      "loss": 0.0375,
      "step": 38180
    },
    {
      "epoch": 0.0002330322265625,
      "model_forward_time": 0.11519551277160645,
      "step": 38180
    },
    {
      "epoch": 0.0002330322265625,
      "step": 38180,
      "training_step_time": 0.3900134563446045
    },
    {
      "epoch": 0.000233038330078125,
      "model_forward_time": 0.11528825759887695,
      "step": 38181
    },
    {
      "epoch": 0.000233038330078125,
      "step": 38181,
      "training_step_time": 0.3982255458831787
    },
    {
      "epoch": 0.00023304443359375,
      "model_forward_time": 0.11527419090270996,
      "step": 38182
    },
    {
      "epoch": 0.00023304443359375,
      "step": 38182,
      "training_step_time": 0.4012784957885742
    },
    {
      "epoch": 0.000233050537109375,
      "model_forward_time": 0.11506175994873047,
      "step": 38183
    },
    {
      "epoch": 0.000233050537109375,
      "step": 38183,
      "training_step_time": 0.42130064964294434
    },
    {
      "epoch": 0.000233056640625,
      "model_forward_time": 0.11601877212524414,
      "step": 38184
    },
    {
      "epoch": 0.000233056640625,
      "step": 38184,
      "training_step_time": 0.41625213623046875
    },
    {
      "epoch": 0.000233062744140625,
      "model_forward_time": 0.11606216430664062,
      "step": 38185
    },
    {
      "epoch": 0.000233062744140625,
      "step": 38185,
      "training_step_time": 0.5187089443206787
    },
    {
      "epoch": 0.00023306884765625,
      "model_forward_time": 0.11675834655761719,
      "step": 38186
    },
    {
      "epoch": 0.00023306884765625,
      "step": 38186,
      "training_step_time": 0.4139223098754883
    },
    {
      "epoch": 0.000233074951171875,
      "model_forward_time": 0.1153099536895752,
      "step": 38187
    },
    {
      "epoch": 0.000233074951171875,
      "step": 38187,
      "training_step_time": 0.40289950370788574
    },
    {
      "epoch": 0.0002330810546875,
      "model_forward_time": 0.11473798751831055,
      "step": 38188
    },
    {
      "epoch": 0.0002330810546875,
      "step": 38188,
      "training_step_time": 0.43251824378967285
    },
    {
      "epoch": 0.000233087158203125,
      "model_forward_time": 0.11510324478149414,
      "step": 38189
    },
    {
      "epoch": 0.000233087158203125,
      "step": 38189,
      "training_step_time": 0.40033626556396484
    },
    {
      "epoch": 0.00023309326171875,
      "grad_norm": 0.12219711393117905,
      "learning_rate": 3.197873634733096e-05,
      "loss": 0.0383,
      "step": 38190
    },
    {
      "epoch": 0.00023309326171875,
      "model_forward_time": 0.11521530151367188,
      "step": 38190
    },
    {
      "epoch": 0.00023309326171875,
      "step": 38190,
      "training_step_time": 0.4050626754760742
    },
    {
      "epoch": 0.000233099365234375,
      "model_forward_time": 0.11540484428405762,
      "step": 38191
    },
    {
      "epoch": 0.000233099365234375,
      "step": 38191,
      "training_step_time": 0.3929111957550049
    },
    {
      "epoch": 0.00023310546875,
      "model_forward_time": 0.11504507064819336,
      "step": 38192
    },
    {
      "epoch": 0.00023310546875,
      "step": 38192,
      "training_step_time": 0.4272494316101074
    },
    {
      "epoch": 0.000233111572265625,
      "model_forward_time": 0.11585879325866699,
      "step": 38193
    },
    {
      "epoch": 0.000233111572265625,
      "step": 38193,
      "training_step_time": 0.3877098560333252
    },
    {
      "epoch": 0.00023311767578125,
      "model_forward_time": 0.11527824401855469,
      "step": 38194
    },
    {
      "epoch": 0.00023311767578125,
      "step": 38194,
      "training_step_time": 0.4086606502532959
    },
    {
      "epoch": 0.000233123779296875,
      "model_forward_time": 0.11529970169067383,
      "step": 38195
    },
    {
      "epoch": 0.000233123779296875,
      "step": 38195,
      "training_step_time": 0.3907134532928467
    },
    {
      "epoch": 0.0002331298828125,
      "model_forward_time": 0.11519145965576172,
      "step": 38196
    },
    {
      "epoch": 0.0002331298828125,
      "step": 38196,
      "training_step_time": 0.3956644535064697
    },
    {
      "epoch": 0.000233135986328125,
      "model_forward_time": 0.11492252349853516,
      "step": 38197
    },
    {
      "epoch": 0.000233135986328125,
      "step": 38197,
      "training_step_time": 0.4114701747894287
    },
    {
      "epoch": 0.00023314208984375,
      "model_forward_time": 0.11577463150024414,
      "step": 38198
    },
    {
      "epoch": 0.00023314208984375,
      "step": 38198,
      "training_step_time": 0.4668288230895996
    },
    {
      "epoch": 0.000233148193359375,
      "model_forward_time": 0.11481356620788574,
      "step": 38199
    },
    {
      "epoch": 0.000233148193359375,
      "step": 38199,
      "training_step_time": 0.4672055244445801
    },
    {
      "epoch": 0.000233154296875,
      "grad_norm": 0.1250804215669632,
      "learning_rate": 3.1953033469914276e-05,
      "loss": 0.0453,
      "step": 38200
    },
    {
      "epoch": 0.000233154296875,
      "model_forward_time": 0.11512875556945801,
      "step": 38200
    },
    {
      "epoch": 0.000233154296875,
      "step": 38200,
      "training_step_time": 0.49108099937438965
    },
    {
      "epoch": 0.000233160400390625,
      "model_forward_time": 0.11582303047180176,
      "step": 38201
    },
    {
      "epoch": 0.000233160400390625,
      "step": 38201,
      "training_step_time": 0.4881868362426758
    },
    {
      "epoch": 0.00023316650390625,
      "model_forward_time": 0.11473464965820312,
      "step": 38202
    },
    {
      "epoch": 0.00023316650390625,
      "step": 38202,
      "training_step_time": 0.4242072105407715
    },
    {
      "epoch": 0.000233172607421875,
      "model_forward_time": 0.11502528190612793,
      "step": 38203
    },
    {
      "epoch": 0.000233172607421875,
      "step": 38203,
      "training_step_time": 0.3916656970977783
    },
    {
      "epoch": 0.0002331787109375,
      "model_forward_time": 0.11478519439697266,
      "step": 38204
    },
    {
      "epoch": 0.0002331787109375,
      "step": 38204,
      "training_step_time": 0.37699460983276367
    },
    {
      "epoch": 0.000233184814453125,
      "model_forward_time": 0.11564493179321289,
      "step": 38205
    },
    {
      "epoch": 0.000233184814453125,
      "step": 38205,
      "training_step_time": 0.38880085945129395
    },
    {
      "epoch": 0.00023319091796875,
      "model_forward_time": 0.11506319046020508,
      "step": 38206
    },
    {
      "epoch": 0.00023319091796875,
      "step": 38206,
      "training_step_time": 0.39186835289001465
    },
    {
      "epoch": 0.000233197021484375,
      "model_forward_time": 0.11541581153869629,
      "step": 38207
    },
    {
      "epoch": 0.000233197021484375,
      "step": 38207,
      "training_step_time": 0.39563941955566406
    },
    {
      "epoch": 0.000233203125,
      "model_forward_time": 0.11508440971374512,
      "step": 38208
    },
    {
      "epoch": 0.000233203125,
      "step": 38208,
      "training_step_time": 0.4143366813659668
    },
    {
      "epoch": 0.000233209228515625,
      "model_forward_time": 0.11520743370056152,
      "step": 38209
    },
    {
      "epoch": 0.000233209228515625,
      "step": 38209,
      "training_step_time": 0.38645482063293457
    },
    {
      "epoch": 0.00023321533203125,
      "grad_norm": 0.1330304592847824,
      "learning_rate": 3.192733607468951e-05,
      "loss": 0.0465,
      "step": 38210
    },
    {
      "epoch": 0.00023321533203125,
      "model_forward_time": 0.11492729187011719,
      "step": 38210
    },
    {
      "epoch": 0.00023321533203125,
      "step": 38210,
      "training_step_time": 0.39309215545654297
    },
    {
      "epoch": 0.000233221435546875,
      "model_forward_time": 0.1157374382019043,
      "step": 38211
    },
    {
      "epoch": 0.000233221435546875,
      "step": 38211,
      "training_step_time": 0.3921835422515869
    },
    {
      "epoch": 0.0002332275390625,
      "model_forward_time": 0.11522769927978516,
      "step": 38212
    },
    {
      "epoch": 0.0002332275390625,
      "step": 38212,
      "training_step_time": 0.4510493278503418
    },
    {
      "epoch": 0.000233233642578125,
      "model_forward_time": 0.1153268814086914,
      "step": 38213
    },
    {
      "epoch": 0.000233233642578125,
      "step": 38213,
      "training_step_time": 0.3720858097076416
    },
    {
      "epoch": 0.00023323974609375,
      "model_forward_time": 0.11548829078674316,
      "step": 38214
    },
    {
      "epoch": 0.00023323974609375,
      "step": 38214,
      "training_step_time": 0.4741537570953369
    },
    {
      "epoch": 0.000233245849609375,
      "model_forward_time": 0.11467313766479492,
      "step": 38215
    },
    {
      "epoch": 0.000233245849609375,
      "step": 38215,
      "training_step_time": 0.44356775283813477
    },
    {
      "epoch": 0.000233251953125,
      "model_forward_time": 0.1156148910522461,
      "step": 38216
    },
    {
      "epoch": 0.000233251953125,
      "step": 38216,
      "training_step_time": 0.39145350456237793
    },
    {
      "epoch": 0.000233258056640625,
      "model_forward_time": 0.11522269248962402,
      "step": 38217
    },
    {
      "epoch": 0.000233258056640625,
      "step": 38217,
      "training_step_time": 0.45023226737976074
    },
    {
      "epoch": 0.00023326416015625,
      "model_forward_time": 0.11543488502502441,
      "step": 38218
    },
    {
      "epoch": 0.00023326416015625,
      "step": 38218,
      "training_step_time": 0.3797478675842285
    },
    {
      "epoch": 0.000233270263671875,
      "model_forward_time": 0.11536240577697754,
      "step": 38219
    },
    {
      "epoch": 0.000233270263671875,
      "step": 38219,
      "training_step_time": 0.4247860908508301
    },
    {
      "epoch": 0.0002332763671875,
      "grad_norm": 0.10594993084669113,
      "learning_rate": 3.190164416946285e-05,
      "loss": 0.0415,
      "step": 38220
    },
    {
      "epoch": 0.0002332763671875,
      "model_forward_time": 0.11512494087219238,
      "step": 38220
    },
    {
      "epoch": 0.0002332763671875,
      "step": 38220,
      "training_step_time": 0.3979983329772949
    },
    {
      "epoch": 0.000233282470703125,
      "model_forward_time": 0.11469364166259766,
      "step": 38221
    },
    {
      "epoch": 0.000233282470703125,
      "step": 38221,
      "training_step_time": 0.3833332061767578
    },
    {
      "epoch": 0.00023328857421875,
      "model_forward_time": 0.11501479148864746,
      "step": 38222
    },
    {
      "epoch": 0.00023328857421875,
      "step": 38222,
      "training_step_time": 0.3870108127593994
    },
    {
      "epoch": 0.000233294677734375,
      "model_forward_time": 0.11637234687805176,
      "step": 38223
    },
    {
      "epoch": 0.000233294677734375,
      "step": 38223,
      "training_step_time": 0.39487528800964355
    },
    {
      "epoch": 0.00023330078125,
      "model_forward_time": 0.11482715606689453,
      "step": 38224
    },
    {
      "epoch": 0.00023330078125,
      "step": 38224,
      "training_step_time": 0.3968214988708496
    },
    {
      "epoch": 0.000233306884765625,
      "model_forward_time": 0.11629986763000488,
      "step": 38225
    },
    {
      "epoch": 0.000233306884765625,
      "step": 38225,
      "training_step_time": 0.3846852779388428
    },
    {
      "epoch": 0.00023331298828125,
      "model_forward_time": 0.11530280113220215,
      "step": 38226
    },
    {
      "epoch": 0.00023331298828125,
      "step": 38226,
      "training_step_time": 0.3965630531311035
    },
    {
      "epoch": 0.000233319091796875,
      "model_forward_time": 0.11532044410705566,
      "step": 38227
    },
    {
      "epoch": 0.000233319091796875,
      "step": 38227,
      "training_step_time": 0.4358100891113281
    },
    {
      "epoch": 0.0002333251953125,
      "model_forward_time": 0.11636781692504883,
      "step": 38228
    },
    {
      "epoch": 0.0002333251953125,
      "step": 38228,
      "training_step_time": 0.36757516860961914
    },
    {
      "epoch": 0.000233331298828125,
      "model_forward_time": 0.11518168449401855,
      "step": 38229
    },
    {
      "epoch": 0.000233331298828125,
      "step": 38229,
      "training_step_time": 0.45538902282714844
    },
    {
      "epoch": 0.00023333740234375,
      "grad_norm": 0.12481388449668884,
      "learning_rate": 3.187595776203886e-05,
      "loss": 0.0413,
      "step": 38230
    },
    {
      "epoch": 0.00023333740234375,
      "model_forward_time": 0.11524844169616699,
      "step": 38230
    },
    {
      "epoch": 0.00023333740234375,
      "step": 38230,
      "training_step_time": 0.4445171356201172
    },
    {
      "epoch": 0.000233343505859375,
      "model_forward_time": 0.1165151596069336,
      "step": 38231
    },
    {
      "epoch": 0.000233343505859375,
      "step": 38231,
      "training_step_time": 0.4741089344024658
    },
    {
      "epoch": 0.000233349609375,
      "model_forward_time": 0.1155557632446289,
      "step": 38232
    },
    {
      "epoch": 0.000233349609375,
      "step": 38232,
      "training_step_time": 0.4104764461517334
    },
    {
      "epoch": 0.000233355712890625,
      "model_forward_time": 0.11565709114074707,
      "step": 38233
    },
    {
      "epoch": 0.000233355712890625,
      "step": 38233,
      "training_step_time": 0.38649821281433105
    },
    {
      "epoch": 0.00023336181640625,
      "model_forward_time": 0.11480021476745605,
      "step": 38234
    },
    {
      "epoch": 0.00023336181640625,
      "step": 38234,
      "training_step_time": 0.42138195037841797
    },
    {
      "epoch": 0.000233367919921875,
      "model_forward_time": 0.11505794525146484,
      "step": 38235
    },
    {
      "epoch": 0.000233367919921875,
      "step": 38235,
      "training_step_time": 0.3961827754974365
    },
    {
      "epoch": 0.0002333740234375,
      "model_forward_time": 0.11511039733886719,
      "step": 38236
    },
    {
      "epoch": 0.0002333740234375,
      "step": 38236,
      "training_step_time": 0.39692139625549316
    },
    {
      "epoch": 0.000233380126953125,
      "model_forward_time": 0.1153566837310791,
      "step": 38237
    },
    {
      "epoch": 0.000233380126953125,
      "step": 38237,
      "training_step_time": 0.4040806293487549
    },
    {
      "epoch": 0.00023338623046875,
      "model_forward_time": 0.11499905586242676,
      "step": 38238
    },
    {
      "epoch": 0.00023338623046875,
      "step": 38238,
      "training_step_time": 0.4054994583129883
    },
    {
      "epoch": 0.000233392333984375,
      "model_forward_time": 0.11488175392150879,
      "step": 38239
    },
    {
      "epoch": 0.000233392333984375,
      "step": 38239,
      "training_step_time": 0.4076554775238037
    },
    {
      "epoch": 0.0002333984375,
      "grad_norm": 0.09331342577934265,
      "learning_rate": 3.1850276860220346e-05,
      "loss": 0.0376,
      "step": 38240
    },
    {
      "epoch": 0.0002333984375,
      "model_forward_time": 0.11479306221008301,
      "step": 38240
    },
    {
      "epoch": 0.0002333984375,
      "step": 38240,
      "training_step_time": 0.40087032318115234
    },
    {
      "epoch": 0.000233404541015625,
      "model_forward_time": 0.1174771785736084,
      "step": 38241
    },
    {
      "epoch": 0.000233404541015625,
      "step": 38241,
      "training_step_time": 0.38762807846069336
    },
    {
      "epoch": 0.00023341064453125,
      "model_forward_time": 0.11484432220458984,
      "step": 38242
    },
    {
      "epoch": 0.00023341064453125,
      "step": 38242,
      "training_step_time": 0.4209604263305664
    },
    {
      "epoch": 0.000233416748046875,
      "model_forward_time": 0.11522698402404785,
      "step": 38243
    },
    {
      "epoch": 0.000233416748046875,
      "step": 38243,
      "training_step_time": 0.5871050357818604
    },
    {
      "epoch": 0.0002334228515625,
      "model_forward_time": 0.11505961418151855,
      "step": 38244
    },
    {
      "epoch": 0.0002334228515625,
      "step": 38244,
      "training_step_time": 0.4556257724761963
    },
    {
      "epoch": 0.000233428955078125,
      "model_forward_time": 0.11537504196166992,
      "step": 38245
    },
    {
      "epoch": 0.000233428955078125,
      "step": 38245,
      "training_step_time": 0.4204413890838623
    },
    {
      "epoch": 0.00023343505859375,
      "model_forward_time": 0.11543416976928711,
      "step": 38246
    },
    {
      "epoch": 0.00023343505859375,
      "step": 38246,
      "training_step_time": 0.41535449028015137
    },
    {
      "epoch": 0.000233441162109375,
      "model_forward_time": 0.1147608757019043,
      "step": 38247
    },
    {
      "epoch": 0.000233441162109375,
      "step": 38247,
      "training_step_time": 0.392622709274292
    },
    {
      "epoch": 0.000233447265625,
      "model_forward_time": 0.11557865142822266,
      "step": 38248
    },
    {
      "epoch": 0.000233447265625,
      "step": 38248,
      "training_step_time": 0.40599799156188965
    },
    {
      "epoch": 0.000233453369140625,
      "model_forward_time": 0.11501741409301758,
      "step": 38249
    },
    {
      "epoch": 0.000233453369140625,
      "step": 38249,
      "training_step_time": 0.3926694393157959
    },
    {
      "epoch": 0.00023345947265625,
      "grad_norm": 0.10367795825004578,
      "learning_rate": 3.18246014718085e-05,
      "loss": 0.04,
      "step": 38250
    },
    {
      "epoch": 0.00023345947265625,
      "model_forward_time": 0.11536526679992676,
      "step": 38250
    },
    {
      "epoch": 0.00023345947265625,
      "step": 38250,
      "training_step_time": 0.4067702293395996
    },
    {
      "epoch": 0.000233465576171875,
      "model_forward_time": 0.11490392684936523,
      "step": 38251
    },
    {
      "epoch": 0.000233465576171875,
      "step": 38251,
      "training_step_time": 0.395000696182251
    },
    {
      "epoch": 0.0002334716796875,
      "model_forward_time": 0.1155540943145752,
      "step": 38252
    },
    {
      "epoch": 0.0002334716796875,
      "step": 38252,
      "training_step_time": 0.3945784568786621
    },
    {
      "epoch": 0.000233477783203125,
      "model_forward_time": 0.11468315124511719,
      "step": 38253
    },
    {
      "epoch": 0.000233477783203125,
      "step": 38253,
      "training_step_time": 0.387531042098999
    },
    {
      "epoch": 0.00023348388671875,
      "model_forward_time": 0.11560177803039551,
      "step": 38254
    },
    {
      "epoch": 0.00023348388671875,
      "step": 38254,
      "training_step_time": 0.39083218574523926
    },
    {
      "epoch": 0.000233489990234375,
      "model_forward_time": 0.11548280715942383,
      "step": 38255
    },
    {
      "epoch": 0.000233489990234375,
      "step": 38255,
      "training_step_time": 0.38532161712646484
    },
    {
      "epoch": 0.00023349609375,
      "model_forward_time": 0.11561393737792969,
      "step": 38256
    },
    {
      "epoch": 0.00023349609375,
      "step": 38256,
      "training_step_time": 0.41385626792907715
    },
    {
      "epoch": 0.000233502197265625,
      "model_forward_time": 0.1154932975769043,
      "step": 38257
    },
    {
      "epoch": 0.000233502197265625,
      "step": 38257,
      "training_step_time": 0.40626049041748047
    },
    {
      "epoch": 0.00023350830078125,
      "model_forward_time": 0.11646795272827148,
      "step": 38258
    },
    {
      "epoch": 0.00023350830078125,
      "step": 38258,
      "training_step_time": 0.42736101150512695
    },
    {
      "epoch": 0.000233514404296875,
      "model_forward_time": 0.1154327392578125,
      "step": 38259
    },
    {
      "epoch": 0.000233514404296875,
      "step": 38259,
      "training_step_time": 0.5299487113952637
    },
    {
      "epoch": 0.0002335205078125,
      "grad_norm": 0.12110960483551025,
      "learning_rate": 3.1798931604602864e-05,
      "loss": 0.0403,
      "step": 38260
    },
    {
      "epoch": 0.0002335205078125,
      "model_forward_time": 0.11479067802429199,
      "step": 38260
    },
    {
      "epoch": 0.0002335205078125,
      "step": 38260,
      "training_step_time": 0.4247915744781494
    },
    {
      "epoch": 0.000233526611328125,
      "model_forward_time": 0.11476397514343262,
      "step": 38261
    },
    {
      "epoch": 0.000233526611328125,
      "step": 38261,
      "training_step_time": 0.41854405403137207
    },
    {
      "epoch": 0.00023353271484375,
      "model_forward_time": 0.11496448516845703,
      "step": 38262
    },
    {
      "epoch": 0.00023353271484375,
      "step": 38262,
      "training_step_time": 0.3938138484954834
    },
    {
      "epoch": 0.000233538818359375,
      "model_forward_time": 0.11532425880432129,
      "step": 38263
    },
    {
      "epoch": 0.000233538818359375,
      "step": 38263,
      "training_step_time": 0.39214420318603516
    },
    {
      "epoch": 0.000233544921875,
      "model_forward_time": 0.1150367259979248,
      "step": 38264
    },
    {
      "epoch": 0.000233544921875,
      "step": 38264,
      "training_step_time": 0.3939192295074463
    },
    {
      "epoch": 0.000233551025390625,
      "model_forward_time": 0.11532139778137207,
      "step": 38265
    },
    {
      "epoch": 0.000233551025390625,
      "step": 38265,
      "training_step_time": 0.3895411491394043
    },
    {
      "epoch": 0.00023355712890625,
      "model_forward_time": 0.11534857749938965,
      "step": 38266
    },
    {
      "epoch": 0.00023355712890625,
      "step": 38266,
      "training_step_time": 0.38895249366760254
    },
    {
      "epoch": 0.000233563232421875,
      "model_forward_time": 0.11547660827636719,
      "step": 38267
    },
    {
      "epoch": 0.000233563232421875,
      "step": 38267,
      "training_step_time": 0.5496892929077148
    },
    {
      "epoch": 0.0002335693359375,
      "model_forward_time": 0.11517500877380371,
      "step": 38268
    },
    {
      "epoch": 0.0002335693359375,
      "step": 38268,
      "training_step_time": 0.4014246463775635
    },
    {
      "epoch": 0.000233575439453125,
      "model_forward_time": 0.11513829231262207,
      "step": 38269
    },
    {
      "epoch": 0.000233575439453125,
      "step": 38269,
      "training_step_time": 0.39678144454956055
    },
    {
      "epoch": 0.00023358154296875,
      "grad_norm": 0.1393377035856247,
      "learning_rate": 3.1773267266401206e-05,
      "loss": 0.037,
      "step": 38270
    },
    {
      "epoch": 0.00023358154296875,
      "model_forward_time": 0.11476349830627441,
      "step": 38270
    },
    {
      "epoch": 0.00023358154296875,
      "step": 38270,
      "training_step_time": 0.3878648281097412
    },
    {
      "epoch": 0.000233587646484375,
      "model_forward_time": 0.11547064781188965,
      "step": 38271
    },
    {
      "epoch": 0.000233587646484375,
      "step": 38271,
      "training_step_time": 0.4108235836029053
    },
    {
      "epoch": 0.00023359375,
      "model_forward_time": 0.11520719528198242,
      "step": 38272
    },
    {
      "epoch": 0.00023359375,
      "step": 38272,
      "training_step_time": 0.4218449592590332
    },
    {
      "epoch": 0.000233599853515625,
      "model_forward_time": 0.11453962326049805,
      "step": 38273
    },
    {
      "epoch": 0.000233599853515625,
      "step": 38273,
      "training_step_time": 0.515702486038208
    },
    {
      "epoch": 0.00023360595703125,
      "model_forward_time": 0.11525201797485352,
      "step": 38274
    },
    {
      "epoch": 0.00023360595703125,
      "step": 38274,
      "training_step_time": 0.5138404369354248
    },
    {
      "epoch": 0.000233612060546875,
      "model_forward_time": 0.11639761924743652,
      "step": 38275
    },
    {
      "epoch": 0.000233612060546875,
      "step": 38275,
      "training_step_time": 0.43494081497192383
    },
    {
      "epoch": 0.0002336181640625,
      "model_forward_time": 0.11536264419555664,
      "step": 38276
    },
    {
      "epoch": 0.0002336181640625,
      "step": 38276,
      "training_step_time": 0.3833017349243164
    },
    {
      "epoch": 0.000233624267578125,
      "model_forward_time": 0.1152336597442627,
      "step": 38277
    },
    {
      "epoch": 0.000233624267578125,
      "step": 38277,
      "training_step_time": 0.3802335262298584
    },
    {
      "epoch": 0.00023363037109375,
      "model_forward_time": 0.11503028869628906,
      "step": 38278
    },
    {
      "epoch": 0.00023363037109375,
      "step": 38278,
      "training_step_time": 0.3910706043243408
    },
    {
      "epoch": 0.000233636474609375,
      "model_forward_time": 0.1152493953704834,
      "step": 38279
    },
    {
      "epoch": 0.000233636474609375,
      "step": 38279,
      "training_step_time": 0.4001779556274414
    },
    {
      "epoch": 0.000233642578125,
      "grad_norm": 0.10826807469129562,
      "learning_rate": 3.1747608464999725e-05,
      "loss": 0.0436,
      "step": 38280
    },
    {
      "epoch": 0.000233642578125,
      "model_forward_time": 0.11536288261413574,
      "step": 38280
    },
    {
      "epoch": 0.000233642578125,
      "step": 38280,
      "training_step_time": 0.39829039573669434
    },
    {
      "epoch": 0.000233648681640625,
      "model_forward_time": 0.11560893058776855,
      "step": 38281
    },
    {
      "epoch": 0.000233648681640625,
      "step": 38281,
      "training_step_time": 0.4106452465057373
    },
    {
      "epoch": 0.00023365478515625,
      "model_forward_time": 0.11567258834838867,
      "step": 38282
    },
    {
      "epoch": 0.00023365478515625,
      "step": 38282,
      "training_step_time": 0.39510273933410645
    },
    {
      "epoch": 0.000233660888671875,
      "model_forward_time": 0.11671590805053711,
      "step": 38283
    },
    {
      "epoch": 0.000233660888671875,
      "step": 38283,
      "training_step_time": 0.4053647518157959
    },
    {
      "epoch": 0.0002336669921875,
      "model_forward_time": 0.1158895492553711,
      "step": 38284
    },
    {
      "epoch": 0.0002336669921875,
      "step": 38284,
      "training_step_time": 0.3967115879058838
    },
    {
      "epoch": 0.000233673095703125,
      "model_forward_time": 0.11517620086669922,
      "step": 38285
    },
    {
      "epoch": 0.000233673095703125,
      "step": 38285,
      "training_step_time": 0.3920469284057617
    },
    {
      "epoch": 0.00023367919921875,
      "model_forward_time": 0.11489558219909668,
      "step": 38286
    },
    {
      "epoch": 0.00023367919921875,
      "step": 38286,
      "training_step_time": 0.39742302894592285
    },
    {
      "epoch": 0.000233685302734375,
      "model_forward_time": 0.11491942405700684,
      "step": 38287
    },
    {
      "epoch": 0.000233685302734375,
      "step": 38287,
      "training_step_time": 0.3662290573120117
    },
    {
      "epoch": 0.00023369140625,
      "model_forward_time": 0.11574053764343262,
      "step": 38288
    },
    {
      "epoch": 0.00023369140625,
      "step": 38288,
      "training_step_time": 0.49401164054870605
    },
    {
      "epoch": 0.000233697509765625,
      "model_forward_time": 0.11513400077819824,
      "step": 38289
    },
    {
      "epoch": 0.000233697509765625,
      "step": 38289,
      "training_step_time": 0.46407294273376465
    },
    {
      "epoch": 0.00023370361328125,
      "grad_norm": 0.09159547835588455,
      "learning_rate": 3.172195520819285e-05,
      "loss": 0.039,
      "step": 38290
    },
    {
      "epoch": 0.00023370361328125,
      "model_forward_time": 0.11557626724243164,
      "step": 38290
    },
    {
      "epoch": 0.00023370361328125,
      "step": 38290,
      "training_step_time": 0.4482390880584717
    },
    {
      "epoch": 0.000233709716796875,
      "model_forward_time": 0.11458659172058105,
      "step": 38291
    },
    {
      "epoch": 0.000233709716796875,
      "step": 38291,
      "training_step_time": 0.4065239429473877
    },
    {
      "epoch": 0.0002337158203125,
      "model_forward_time": 0.11529326438903809,
      "step": 38292
    },
    {
      "epoch": 0.0002337158203125,
      "step": 38292,
      "training_step_time": 0.39216089248657227
    },
    {
      "epoch": 0.000233721923828125,
      "model_forward_time": 0.11534833908081055,
      "step": 38293
    },
    {
      "epoch": 0.000233721923828125,
      "step": 38293,
      "training_step_time": 0.3944423198699951
    },
    {
      "epoch": 0.00023372802734375,
      "model_forward_time": 0.11507105827331543,
      "step": 38294
    },
    {
      "epoch": 0.00023372802734375,
      "step": 38294,
      "training_step_time": 0.3774118423461914
    },
    {
      "epoch": 0.000233734130859375,
      "model_forward_time": 0.1154484748840332,
      "step": 38295
    },
    {
      "epoch": 0.000233734130859375,
      "step": 38295,
      "training_step_time": 0.39292454719543457
    },
    {
      "epoch": 0.000233740234375,
      "model_forward_time": 0.11527371406555176,
      "step": 38296
    },
    {
      "epoch": 0.000233740234375,
      "step": 38296,
      "training_step_time": 0.3979156017303467
    },
    {
      "epoch": 0.000233746337890625,
      "model_forward_time": 0.11548447608947754,
      "step": 38297
    },
    {
      "epoch": 0.000233746337890625,
      "step": 38297,
      "training_step_time": 0.3988518714904785
    },
    {
      "epoch": 0.00023375244140625,
      "model_forward_time": 0.11530399322509766,
      "step": 38298
    },
    {
      "epoch": 0.00023375244140625,
      "step": 38298,
      "training_step_time": 0.39229774475097656
    },
    {
      "epoch": 0.000233758544921875,
      "model_forward_time": 0.11688661575317383,
      "step": 38299
    },
    {
      "epoch": 0.000233758544921875,
      "step": 38299,
      "training_step_time": 0.39902591705322266
    },
    {
      "epoch": 0.0002337646484375,
      "grad_norm": 0.10651148110628128,
      "learning_rate": 3.169630750377337e-05,
      "loss": 0.038,
      "step": 38300
    },
    {
      "epoch": 0.0002337646484375,
      "model_forward_time": 0.11548376083374023,
      "step": 38300
    },
    {
      "epoch": 0.0002337646484375,
      "step": 38300,
      "training_step_time": 0.3813283443450928
    },
    {
      "epoch": 0.000233770751953125,
      "model_forward_time": 0.11544013023376465,
      "step": 38301
    },
    {
      "epoch": 0.000233770751953125,
      "step": 38301,
      "training_step_time": 0.4585087299346924
    },
    {
      "epoch": 0.00023377685546875,
      "model_forward_time": 0.11629271507263184,
      "step": 38302
    },
    {
      "epoch": 0.00023377685546875,
      "step": 38302,
      "training_step_time": 0.4018087387084961
    },
    {
      "epoch": 0.000233782958984375,
      "model_forward_time": 0.11572694778442383,
      "step": 38303
    },
    {
      "epoch": 0.000233782958984375,
      "step": 38303,
      "training_step_time": 0.43666672706604004
    },
    {
      "epoch": 0.0002337890625,
      "model_forward_time": 0.11539578437805176,
      "step": 38304
    },
    {
      "epoch": 0.0002337890625,
      "step": 38304,
      "training_step_time": 0.41248559951782227
    },
    {
      "epoch": 0.000233795166015625,
      "model_forward_time": 0.11574554443359375,
      "step": 38305
    },
    {
      "epoch": 0.000233795166015625,
      "step": 38305,
      "training_step_time": 0.3903946876525879
    },
    {
      "epoch": 0.00023380126953125,
      "model_forward_time": 0.11525130271911621,
      "step": 38306
    },
    {
      "epoch": 0.00023380126953125,
      "step": 38306,
      "training_step_time": 0.39446568489074707
    },
    {
      "epoch": 0.000233807373046875,
      "model_forward_time": 0.11554241180419922,
      "step": 38307
    },
    {
      "epoch": 0.000233807373046875,
      "step": 38307,
      "training_step_time": 0.3941175937652588
    },
    {
      "epoch": 0.0002338134765625,
      "model_forward_time": 0.11507725715637207,
      "step": 38308
    },
    {
      "epoch": 0.0002338134765625,
      "step": 38308,
      "training_step_time": 0.3917849063873291
    },
    {
      "epoch": 0.000233819580078125,
      "model_forward_time": 0.11554074287414551,
      "step": 38309
    },
    {
      "epoch": 0.000233819580078125,
      "step": 38309,
      "training_step_time": 0.39762210845947266
    },
    {
      "epoch": 0.00023382568359375,
      "grad_norm": 0.08799121528863907,
      "learning_rate": 3.167066535953242e-05,
      "loss": 0.0451,
      "step": 38310
    },
    {
      "epoch": 0.00023382568359375,
      "model_forward_time": 0.11507892608642578,
      "step": 38310
    },
    {
      "epoch": 0.00023382568359375,
      "step": 38310,
      "training_step_time": 0.3840932846069336
    },
    {
      "epoch": 0.000233831787109375,
      "model_forward_time": 0.1153252124786377,
      "step": 38311
    },
    {
      "epoch": 0.000233831787109375,
      "step": 38311,
      "training_step_time": 0.382338285446167
    },
    {
      "epoch": 0.000233837890625,
      "model_forward_time": 0.11557483673095703,
      "step": 38312
    },
    {
      "epoch": 0.000233837890625,
      "step": 38312,
      "training_step_time": 0.38458776473999023
    },
    {
      "epoch": 0.000233843994140625,
      "model_forward_time": 0.11522388458251953,
      "step": 38313
    },
    {
      "epoch": 0.000233843994140625,
      "step": 38313,
      "training_step_time": 0.39344143867492676
    },
    {
      "epoch": 0.00023385009765625,
      "model_forward_time": 0.11514568328857422,
      "step": 38314
    },
    {
      "epoch": 0.00023385009765625,
      "step": 38314,
      "training_step_time": 0.38811469078063965
    },
    {
      "epoch": 0.000233856201171875,
      "model_forward_time": 0.11560249328613281,
      "step": 38315
    },
    {
      "epoch": 0.000233856201171875,
      "step": 38315,
      "training_step_time": 0.38089609146118164
    },
    {
      "epoch": 0.0002338623046875,
      "model_forward_time": 0.11512875556945801,
      "step": 38316
    },
    {
      "epoch": 0.0002338623046875,
      "step": 38316,
      "training_step_time": 0.4531362056732178
    },
    {
      "epoch": 0.000233868408203125,
      "model_forward_time": 0.11527895927429199,
      "step": 38317
    },
    {
      "epoch": 0.000233868408203125,
      "step": 38317,
      "training_step_time": 0.3730621337890625
    },
    {
      "epoch": 0.00023387451171875,
      "model_forward_time": 0.11543059349060059,
      "step": 38318
    },
    {
      "epoch": 0.00023387451171875,
      "step": 38318,
      "training_step_time": 0.48967838287353516
    },
    {
      "epoch": 0.000233880615234375,
      "model_forward_time": 0.11522722244262695,
      "step": 38319
    },
    {
      "epoch": 0.000233880615234375,
      "step": 38319,
      "training_step_time": 0.4838130474090576
    },
    {
      "epoch": 0.00023388671875,
      "grad_norm": 0.10263456404209137,
      "learning_rate": 3.1645028783259345e-05,
      "loss": 0.0321,
      "step": 38320
    },
    {
      "epoch": 0.00023388671875,
      "model_forward_time": 0.11434793472290039,
      "step": 38320
    },
    {
      "epoch": 0.00023388671875,
      "step": 38320,
      "training_step_time": 0.3882887363433838
    },
    {
      "epoch": 0.000233892822265625,
      "model_forward_time": 0.11529326438903809,
      "step": 38321
    },
    {
      "epoch": 0.000233892822265625,
      "step": 38321,
      "training_step_time": 0.3801288604736328
    },
    {
      "epoch": 0.00023389892578125,
      "model_forward_time": 0.11626291275024414,
      "step": 38322
    },
    {
      "epoch": 0.00023389892578125,
      "step": 38322,
      "training_step_time": 0.41422390937805176
    },
    {
      "epoch": 0.000233905029296875,
      "model_forward_time": 0.11556363105773926,
      "step": 38323
    },
    {
      "epoch": 0.000233905029296875,
      "step": 38323,
      "training_step_time": 0.38301539421081543
    },
    {
      "epoch": 0.0002339111328125,
      "model_forward_time": 0.11510300636291504,
      "step": 38324
    },
    {
      "epoch": 0.0002339111328125,
      "step": 38324,
      "training_step_time": 0.3734598159790039
    },
    {
      "epoch": 0.000233917236328125,
      "model_forward_time": 0.1155235767364502,
      "step": 38325
    },
    {
      "epoch": 0.000233917236328125,
      "step": 38325,
      "training_step_time": 0.38645195960998535
    },
    {
      "epoch": 0.00023392333984375,
      "model_forward_time": 0.11595296859741211,
      "step": 38326
    },
    {
      "epoch": 0.00023392333984375,
      "step": 38326,
      "training_step_time": 0.39427924156188965
    },
    {
      "epoch": 0.000233929443359375,
      "model_forward_time": 0.11543798446655273,
      "step": 38327
    },
    {
      "epoch": 0.000233929443359375,
      "step": 38327,
      "training_step_time": 0.38466835021972656
    },
    {
      "epoch": 0.000233935546875,
      "model_forward_time": 0.11521720886230469,
      "step": 38328
    },
    {
      "epoch": 0.000233935546875,
      "step": 38328,
      "training_step_time": 0.4017188549041748
    },
    {
      "epoch": 0.000233941650390625,
      "model_forward_time": 0.11507582664489746,
      "step": 38329
    },
    {
      "epoch": 0.000233941650390625,
      "step": 38329,
      "training_step_time": 0.4003274440765381
    },
    {
      "epoch": 0.00023394775390625,
      "grad_norm": 0.10772845149040222,
      "learning_rate": 3.161939778274191e-05,
      "loss": 0.0348,
      "step": 38330
    },
    {
      "epoch": 0.00023394775390625,
      "model_forward_time": 0.11691761016845703,
      "step": 38330
    },
    {
      "epoch": 0.00023394775390625,
      "step": 38330,
      "training_step_time": 0.4505293369293213
    },
    {
      "epoch": 0.000233953857421875,
      "model_forward_time": 0.11510062217712402,
      "step": 38331
    },
    {
      "epoch": 0.000233953857421875,
      "step": 38331,
      "training_step_time": 0.46651411056518555
    },
    {
      "epoch": 0.0002339599609375,
      "model_forward_time": 0.11646103858947754,
      "step": 38332
    },
    {
      "epoch": 0.0002339599609375,
      "step": 38332,
      "training_step_time": 0.46131181716918945
    },
    {
      "epoch": 0.000233966064453125,
      "model_forward_time": 0.11520576477050781,
      "step": 38333
    },
    {
      "epoch": 0.000233966064453125,
      "step": 38333,
      "training_step_time": 0.4739563465118408
    },
    {
      "epoch": 0.00023397216796875,
      "model_forward_time": 0.11531376838684082,
      "step": 38334
    },
    {
      "epoch": 0.00023397216796875,
      "step": 38334,
      "training_step_time": 0.4540860652923584
    },
    {
      "epoch": 0.000233978271484375,
      "model_forward_time": 0.11587405204772949,
      "step": 38335
    },
    {
      "epoch": 0.000233978271484375,
      "step": 38335,
      "training_step_time": 0.38982057571411133
    },
    {
      "epoch": 0.000233984375,
      "model_forward_time": 0.11511635780334473,
      "step": 38336
    },
    {
      "epoch": 0.000233984375,
      "step": 38336,
      "training_step_time": 0.3724346160888672
    },
    {
      "epoch": 0.000233990478515625,
      "model_forward_time": 0.1154029369354248,
      "step": 38337
    },
    {
      "epoch": 0.000233990478515625,
      "step": 38337,
      "training_step_time": 0.3864021301269531
    },
    {
      "epoch": 0.00023399658203125,
      "model_forward_time": 0.11527776718139648,
      "step": 38338
    },
    {
      "epoch": 0.00023399658203125,
      "step": 38338,
      "training_step_time": 0.38776469230651855
    },
    {
      "epoch": 0.000234002685546875,
      "model_forward_time": 0.11545443534851074,
      "step": 38339
    },
    {
      "epoch": 0.000234002685546875,
      "step": 38339,
      "training_step_time": 0.3838975429534912
    },
    {
      "epoch": 0.0002340087890625,
      "grad_norm": 0.09871942549943924,
      "learning_rate": 3.1593772365766105e-05,
      "loss": 0.0356,
      "step": 38340
    },
    {
      "epoch": 0.0002340087890625,
      "model_forward_time": 0.1145620346069336,
      "step": 38340
    },
    {
      "epoch": 0.0002340087890625,
      "step": 38340,
      "training_step_time": 0.39483165740966797
    },
    {
      "epoch": 0.000234014892578125,
      "model_forward_time": 0.11590862274169922,
      "step": 38341
    },
    {
      "epoch": 0.000234014892578125,
      "step": 38341,
      "training_step_time": 0.4066953659057617
    },
    {
      "epoch": 0.00023402099609375,
      "model_forward_time": 0.11571121215820312,
      "step": 38342
    },
    {
      "epoch": 0.00023402099609375,
      "step": 38342,
      "training_step_time": 0.37383484840393066
    },
    {
      "epoch": 0.000234027099609375,
      "model_forward_time": 0.11517906188964844,
      "step": 38343
    },
    {
      "epoch": 0.000234027099609375,
      "step": 38343,
      "training_step_time": 0.4186124801635742
    },
    {
      "epoch": 0.000234033203125,
      "model_forward_time": 0.1157839298248291,
      "step": 38344
    },
    {
      "epoch": 0.000234033203125,
      "step": 38344,
      "training_step_time": 0.4089639186859131
    },
    {
      "epoch": 0.000234039306640625,
      "model_forward_time": 0.11505866050720215,
      "step": 38345
    },
    {
      "epoch": 0.000234039306640625,
      "step": 38345,
      "training_step_time": 0.4174973964691162
    },
    {
      "epoch": 0.00023404541015625,
      "model_forward_time": 0.11576509475708008,
      "step": 38346
    },
    {
      "epoch": 0.00023404541015625,
      "step": 38346,
      "training_step_time": 0.4016754627227783
    },
    {
      "epoch": 0.000234051513671875,
      "model_forward_time": 0.11586976051330566,
      "step": 38347
    },
    {
      "epoch": 0.000234051513671875,
      "step": 38347,
      "training_step_time": 0.48098063468933105
    },
    {
      "epoch": 0.0002340576171875,
      "model_forward_time": 0.11498427391052246,
      "step": 38348
    },
    {
      "epoch": 0.0002340576171875,
      "step": 38348,
      "training_step_time": 0.4879286289215088
    },
    {
      "epoch": 0.000234063720703125,
      "model_forward_time": 0.11473989486694336,
      "step": 38349
    },
    {
      "epoch": 0.000234063720703125,
      "step": 38349,
      "training_step_time": 0.39778900146484375
    },
    {
      "epoch": 0.00023406982421875,
      "grad_norm": 0.11545498669147491,
      "learning_rate": 3.156815254011627e-05,
      "loss": 0.0417,
      "step": 38350
    },
    {
      "epoch": 0.00023406982421875,
      "model_forward_time": 0.11513233184814453,
      "step": 38350
    },
    {
      "epoch": 0.00023406982421875,
      "step": 38350,
      "training_step_time": 0.3890390396118164
    },
    {
      "epoch": 0.000234075927734375,
      "model_forward_time": 0.1143653392791748,
      "step": 38351
    },
    {
      "epoch": 0.000234075927734375,
      "step": 38351,
      "training_step_time": 0.39046478271484375
    },
    {
      "epoch": 0.00023408203125,
      "model_forward_time": 0.1151728630065918,
      "step": 38352
    },
    {
      "epoch": 0.00023408203125,
      "step": 38352,
      "training_step_time": 0.38498520851135254
    },
    {
      "epoch": 0.000234088134765625,
      "model_forward_time": 0.1145176887512207,
      "step": 38353
    },
    {
      "epoch": 0.000234088134765625,
      "step": 38353,
      "training_step_time": 0.40090441703796387
    },
    {
      "epoch": 0.00023409423828125,
      "model_forward_time": 0.11553335189819336,
      "step": 38354
    },
    {
      "epoch": 0.00023409423828125,
      "step": 38354,
      "training_step_time": 0.4004950523376465
    },
    {
      "epoch": 0.000234100341796875,
      "model_forward_time": 0.11493611335754395,
      "step": 38355
    },
    {
      "epoch": 0.000234100341796875,
      "step": 38355,
      "training_step_time": 0.3889775276184082
    },
    {
      "epoch": 0.0002341064453125,
      "model_forward_time": 0.11620068550109863,
      "step": 38356
    },
    {
      "epoch": 0.0002341064453125,
      "step": 38356,
      "training_step_time": 0.39949846267700195
    },
    {
      "epoch": 0.000234112548828125,
      "model_forward_time": 0.11524653434753418,
      "step": 38357
    },
    {
      "epoch": 0.000234112548828125,
      "step": 38357,
      "training_step_time": 0.3983347415924072
    },
    {
      "epoch": 0.00023411865234375,
      "model_forward_time": 0.11563634872436523,
      "step": 38358
    },
    {
      "epoch": 0.00023411865234375,
      "step": 38358,
      "training_step_time": 0.41807055473327637
    },
    {
      "epoch": 0.000234124755859375,
      "model_forward_time": 0.11548733711242676,
      "step": 38359
    },
    {
      "epoch": 0.000234124755859375,
      "step": 38359,
      "training_step_time": 0.3982689380645752
    },
    {
      "epoch": 0.000234130859375,
      "grad_norm": 0.09854421019554138,
      "learning_rate": 3.1542538313575035e-05,
      "loss": 0.0359,
      "step": 38360
    },
    {
      "epoch": 0.000234130859375,
      "model_forward_time": 0.11528730392456055,
      "step": 38360
    },
    {
      "epoch": 0.000234130859375,
      "step": 38360,
      "training_step_time": 0.4153778553009033
    },
    {
      "epoch": 0.000234136962890625,
      "model_forward_time": 0.11601138114929199,
      "step": 38361
    },
    {
      "epoch": 0.000234136962890625,
      "step": 38361,
      "training_step_time": 0.4088165760040283
    },
    {
      "epoch": 0.00023414306640625,
      "model_forward_time": 0.11532902717590332,
      "step": 38362
    },
    {
      "epoch": 0.00023414306640625,
      "step": 38362,
      "training_step_time": 0.4980800151824951
    },
    {
      "epoch": 0.000234149169921875,
      "model_forward_time": 0.11530852317810059,
      "step": 38363
    },
    {
      "epoch": 0.000234149169921875,
      "step": 38363,
      "training_step_time": 0.4916684627532959
    },
    {
      "epoch": 0.0002341552734375,
      "model_forward_time": 0.1148078441619873,
      "step": 38364
    },
    {
      "epoch": 0.0002341552734375,
      "step": 38364,
      "training_step_time": 0.3907327651977539
    },
    {
      "epoch": 0.000234161376953125,
      "model_forward_time": 0.11585116386413574,
      "step": 38365
    },
    {
      "epoch": 0.000234161376953125,
      "step": 38365,
      "training_step_time": 0.3909926414489746
    },
    {
      "epoch": 0.00023416748046875,
      "model_forward_time": 0.11550760269165039,
      "step": 38366
    },
    {
      "epoch": 0.00023416748046875,
      "step": 38366,
      "training_step_time": 0.39055681228637695
    },
    {
      "epoch": 0.000234173583984375,
      "model_forward_time": 0.11549544334411621,
      "step": 38367
    },
    {
      "epoch": 0.000234173583984375,
      "step": 38367,
      "training_step_time": 0.380892276763916
    },
    {
      "epoch": 0.0002341796875,
      "model_forward_time": 0.1159505844116211,
      "step": 38368
    },
    {
      "epoch": 0.0002341796875,
      "step": 38368,
      "training_step_time": 0.3877451419830322
    },
    {
      "epoch": 0.000234185791015625,
      "model_forward_time": 0.11549210548400879,
      "step": 38369
    },
    {
      "epoch": 0.000234185791015625,
      "step": 38369,
      "training_step_time": 0.3956618309020996
    },
    {
      "epoch": 0.00023419189453125,
      "grad_norm": 0.12936776876449585,
      "learning_rate": 3.1516929693923315e-05,
      "loss": 0.041,
      "step": 38370
    },
    {
      "epoch": 0.00023419189453125,
      "model_forward_time": 0.11503887176513672,
      "step": 38370
    },
    {
      "epoch": 0.00023419189453125,
      "step": 38370,
      "training_step_time": 0.3913085460662842
    },
    {
      "epoch": 0.000234197998046875,
      "model_forward_time": 0.11583948135375977,
      "step": 38371
    },
    {
      "epoch": 0.000234197998046875,
      "step": 38371,
      "training_step_time": 0.4021570682525635
    },
    {
      "epoch": 0.0002342041015625,
      "model_forward_time": 0.1153557300567627,
      "step": 38372
    },
    {
      "epoch": 0.0002342041015625,
      "step": 38372,
      "training_step_time": 0.39011335372924805
    },
    {
      "epoch": 0.000234210205078125,
      "model_forward_time": 0.11544632911682129,
      "step": 38373
    },
    {
      "epoch": 0.000234210205078125,
      "step": 38373,
      "training_step_time": 0.3847651481628418
    },
    {
      "epoch": 0.00023421630859375,
      "model_forward_time": 0.11624646186828613,
      "step": 38374
    },
    {
      "epoch": 0.00023421630859375,
      "step": 38374,
      "training_step_time": 0.3934447765350342
    },
    {
      "epoch": 0.000234222412109375,
      "model_forward_time": 0.11552572250366211,
      "step": 38375
    },
    {
      "epoch": 0.000234222412109375,
      "step": 38375,
      "training_step_time": 0.46906089782714844
    },
    {
      "epoch": 0.000234228515625,
      "model_forward_time": 0.11455631256103516,
      "step": 38376
    },
    {
      "epoch": 0.000234228515625,
      "step": 38376,
      "training_step_time": 0.3630247116088867
    },
    {
      "epoch": 0.000234234619140625,
      "model_forward_time": 0.11517572402954102,
      "step": 38377
    },
    {
      "epoch": 0.000234234619140625,
      "step": 38377,
      "training_step_time": 0.4498298168182373
    },
    {
      "epoch": 0.00023424072265625,
      "model_forward_time": 0.11582183837890625,
      "step": 38378
    },
    {
      "epoch": 0.00023424072265625,
      "step": 38378,
      "training_step_time": 0.4334542751312256
    },
    {
      "epoch": 0.000234246826171875,
      "model_forward_time": 0.115203857421875,
      "step": 38379
    },
    {
      "epoch": 0.000234246826171875,
      "step": 38379,
      "training_step_time": 0.38776135444641113
    },
    {
      "epoch": 0.0002342529296875,
      "grad_norm": 0.14764994382858276,
      "learning_rate": 3.1491326688940345e-05,
      "loss": 0.0387,
      "step": 38380
    },
    {
      "epoch": 0.0002342529296875,
      "model_forward_time": 0.11500167846679688,
      "step": 38380
    },
    {
      "epoch": 0.0002342529296875,
      "step": 38380,
      "training_step_time": 0.386580228805542
    },
    {
      "epoch": 0.000234259033203125,
      "model_forward_time": 0.11527252197265625,
      "step": 38381
    },
    {
      "epoch": 0.000234259033203125,
      "step": 38381,
      "training_step_time": 0.37790656089782715
    },
    {
      "epoch": 0.00023426513671875,
      "model_forward_time": 0.11543869972229004,
      "step": 38382
    },
    {
      "epoch": 0.00023426513671875,
      "step": 38382,
      "training_step_time": 0.39118361473083496
    },
    {
      "epoch": 0.000234271240234375,
      "model_forward_time": 0.11505556106567383,
      "step": 38383
    },
    {
      "epoch": 0.000234271240234375,
      "step": 38383,
      "training_step_time": 0.39371466636657715
    },
    {
      "epoch": 0.00023427734375,
      "model_forward_time": 0.11517763137817383,
      "step": 38384
    },
    {
      "epoch": 0.00023427734375,
      "step": 38384,
      "training_step_time": 0.4018399715423584
    },
    {
      "epoch": 0.000234283447265625,
      "model_forward_time": 0.11478590965270996,
      "step": 38385
    },
    {
      "epoch": 0.000234283447265625,
      "step": 38385,
      "training_step_time": 0.3981325626373291
    },
    {
      "epoch": 0.00023428955078125,
      "model_forward_time": 0.11515021324157715,
      "step": 38386
    },
    {
      "epoch": 0.00023428955078125,
      "step": 38386,
      "training_step_time": 0.3977811336517334
    },
    {
      "epoch": 0.000234295654296875,
      "model_forward_time": 0.11580991744995117,
      "step": 38387
    },
    {
      "epoch": 0.000234295654296875,
      "step": 38387,
      "training_step_time": 0.40341639518737793
    },
    {
      "epoch": 0.0002343017578125,
      "model_forward_time": 0.11528205871582031,
      "step": 38388
    },
    {
      "epoch": 0.0002343017578125,
      "step": 38388,
      "training_step_time": 0.3918581008911133
    },
    {
      "epoch": 0.000234307861328125,
      "model_forward_time": 0.11546134948730469,
      "step": 38389
    },
    {
      "epoch": 0.000234307861328125,
      "step": 38389,
      "training_step_time": 0.39522337913513184
    },
    {
      "epoch": 0.00023431396484375,
      "grad_norm": 0.12005133926868439,
      "learning_rate": 3.146572930640362e-05,
      "loss": 0.0428,
      "step": 38390
    },
    {
      "epoch": 0.00023431396484375,
      "model_forward_time": 0.11553335189819336,
      "step": 38390
    },
    {
      "epoch": 0.00023431396484375,
      "step": 38390,
      "training_step_time": 0.3980691432952881
    },
    {
      "epoch": 0.000234320068359375,
      "model_forward_time": 0.11635518074035645,
      "step": 38391
    },
    {
      "epoch": 0.000234320068359375,
      "step": 38391,
      "training_step_time": 0.42336583137512207
    },
    {
      "epoch": 0.000234326171875,
      "model_forward_time": 0.11637020111083984,
      "step": 38392
    },
    {
      "epoch": 0.000234326171875,
      "step": 38392,
      "training_step_time": 0.4799063205718994
    },
    {
      "epoch": 0.000234332275390625,
      "model_forward_time": 0.11461138725280762,
      "step": 38393
    },
    {
      "epoch": 0.000234332275390625,
      "step": 38393,
      "training_step_time": 0.48229384422302246
    },
    {
      "epoch": 0.00023433837890625,
      "model_forward_time": 0.11505746841430664,
      "step": 38394
    },
    {
      "epoch": 0.00023433837890625,
      "step": 38394,
      "training_step_time": 0.3936653137207031
    },
    {
      "epoch": 0.000234344482421875,
      "model_forward_time": 0.11524677276611328,
      "step": 38395
    },
    {
      "epoch": 0.000234344482421875,
      "step": 38395,
      "training_step_time": 0.40904736518859863
    },
    {
      "epoch": 0.0002343505859375,
      "model_forward_time": 0.11498832702636719,
      "step": 38396
    },
    {
      "epoch": 0.0002343505859375,
      "step": 38396,
      "training_step_time": 0.38851451873779297
    },
    {
      "epoch": 0.000234356689453125,
      "model_forward_time": 0.11506104469299316,
      "step": 38397
    },
    {
      "epoch": 0.000234356689453125,
      "step": 38397,
      "training_step_time": 0.3840823173522949
    },
    {
      "epoch": 0.00023436279296875,
      "model_forward_time": 0.11498618125915527,
      "step": 38398
    },
    {
      "epoch": 0.00023436279296875,
      "step": 38398,
      "training_step_time": 0.39460062980651855
    },
    {
      "epoch": 0.000234368896484375,
      "model_forward_time": 0.11503005027770996,
      "step": 38399
    },
    {
      "epoch": 0.000234368896484375,
      "step": 38399,
      "training_step_time": 0.4104635715484619
    },
    {
      "epoch": 0.000234375,
      "grad_norm": 0.12106936424970627,
      "learning_rate": 3.144013755408895e-05,
      "loss": 0.0406,
      "step": 38400
    },
    {
      "epoch": 0.000234375,
      "model_forward_time": 0.11481285095214844,
      "step": 38400
    },
    {
      "epoch": 0.000234375,
      "step": 38400,
      "training_step_time": 0.44204258918762207
    },
    {
      "epoch": 0.000234381103515625,
      "model_forward_time": 0.11621499061584473,
      "step": 38401
    },
    {
      "epoch": 0.000234381103515625,
      "step": 38401,
      "training_step_time": 0.3960123062133789
    },
    {
      "epoch": 0.00023438720703125,
      "model_forward_time": 0.11464977264404297,
      "step": 38402
    },
    {
      "epoch": 0.00023438720703125,
      "step": 38402,
      "training_step_time": 0.39267444610595703
    },
    {
      "epoch": 0.000234393310546875,
      "model_forward_time": 0.11521530151367188,
      "step": 38403
    },
    {
      "epoch": 0.000234393310546875,
      "step": 38403,
      "training_step_time": 0.38656091690063477
    },
    {
      "epoch": 0.0002343994140625,
      "model_forward_time": 0.11562871932983398,
      "step": 38404
    },
    {
      "epoch": 0.0002343994140625,
      "step": 38404,
      "training_step_time": 0.3886375427246094
    },
    {
      "epoch": 0.000234405517578125,
      "model_forward_time": 0.11474275588989258,
      "step": 38405
    },
    {
      "epoch": 0.000234405517578125,
      "step": 38405,
      "training_step_time": 0.49747228622436523
    },
    {
      "epoch": 0.00023441162109375,
      "model_forward_time": 0.11586117744445801,
      "step": 38406
    },
    {
      "epoch": 0.00023441162109375,
      "step": 38406,
      "training_step_time": 0.42438745498657227
    },
    {
      "epoch": 0.000234417724609375,
      "model_forward_time": 0.11585140228271484,
      "step": 38407
    },
    {
      "epoch": 0.000234417724609375,
      "step": 38407,
      "training_step_time": 0.5321409702301025
    },
    {
      "epoch": 0.000234423828125,
      "model_forward_time": 0.11616015434265137,
      "step": 38408
    },
    {
      "epoch": 0.000234423828125,
      "step": 38408,
      "training_step_time": 0.45253658294677734
    },
    {
      "epoch": 0.000234429931640625,
      "model_forward_time": 0.11546826362609863,
      "step": 38409
    },
    {
      "epoch": 0.000234429931640625,
      "step": 38409,
      "training_step_time": 0.3946115970611572
    },
    {
      "epoch": 0.00023443603515625,
      "grad_norm": 0.14134939014911652,
      "learning_rate": 3.141455143977049e-05,
      "loss": 0.035,
      "step": 38410
    },
    {
      "epoch": 0.00023443603515625,
      "model_forward_time": 0.11507964134216309,
      "step": 38410
    },
    {
      "epoch": 0.00023443603515625,
      "step": 38410,
      "training_step_time": 0.3826735019683838
    },
    {
      "epoch": 0.000234442138671875,
      "model_forward_time": 0.11511588096618652,
      "step": 38411
    },
    {
      "epoch": 0.000234442138671875,
      "step": 38411,
      "training_step_time": 0.3845999240875244
    },
    {
      "epoch": 0.0002344482421875,
      "model_forward_time": 0.11552000045776367,
      "step": 38412
    },
    {
      "epoch": 0.0002344482421875,
      "step": 38412,
      "training_step_time": 0.4893815517425537
    },
    {
      "epoch": 0.000234454345703125,
      "model_forward_time": 0.11460351943969727,
      "step": 38413
    },
    {
      "epoch": 0.000234454345703125,
      "step": 38413,
      "training_step_time": 0.39873600006103516
    },
    {
      "epoch": 0.00023446044921875,
      "model_forward_time": 0.11473941802978516,
      "step": 38414
    },
    {
      "epoch": 0.00023446044921875,
      "step": 38414,
      "training_step_time": 0.38629746437072754
    },
    {
      "epoch": 0.000234466552734375,
      "model_forward_time": 0.11517667770385742,
      "step": 38415
    },
    {
      "epoch": 0.000234466552734375,
      "step": 38415,
      "training_step_time": 0.3901059627532959
    },
    {
      "epoch": 0.00023447265625,
      "model_forward_time": 0.11533427238464355,
      "step": 38416
    },
    {
      "epoch": 0.00023447265625,
      "step": 38416,
      "training_step_time": 0.39154839515686035
    },
    {
      "epoch": 0.000234478759765625,
      "model_forward_time": 0.1155693531036377,
      "step": 38417
    },
    {
      "epoch": 0.000234478759765625,
      "step": 38417,
      "training_step_time": 0.3901548385620117
    },
    {
      "epoch": 0.00023448486328125,
      "model_forward_time": 0.11480903625488281,
      "step": 38418
    },
    {
      "epoch": 0.00023448486328125,
      "step": 38418,
      "training_step_time": 0.4014124870300293
    },
    {
      "epoch": 0.000234490966796875,
      "model_forward_time": 0.11577391624450684,
      "step": 38419
    },
    {
      "epoch": 0.000234490966796875,
      "step": 38419,
      "training_step_time": 0.3922274112701416
    },
    {
      "epoch": 0.0002344970703125,
      "grad_norm": 0.11498760432004929,
      "learning_rate": 3.1388970971220546e-05,
      "loss": 0.035,
      "step": 38420
    },
    {
      "epoch": 0.0002344970703125,
      "model_forward_time": 0.11549949645996094,
      "step": 38420
    },
    {
      "epoch": 0.0002344970703125,
      "step": 38420,
      "training_step_time": 0.44077491760253906
    },
    {
      "epoch": 0.000234503173828125,
      "model_forward_time": 0.11538362503051758,
      "step": 38421
    },
    {
      "epoch": 0.000234503173828125,
      "step": 38421,
      "training_step_time": 0.5032482147216797
    },
    {
      "epoch": 0.00023450927734375,
      "model_forward_time": 0.11509919166564941,
      "step": 38422
    },
    {
      "epoch": 0.00023450927734375,
      "step": 38422,
      "training_step_time": 0.5215048789978027
    },
    {
      "epoch": 0.000234515380859375,
      "model_forward_time": 0.11454248428344727,
      "step": 38423
    },
    {
      "epoch": 0.000234515380859375,
      "step": 38423,
      "training_step_time": 0.3914914131164551
    },
    {
      "epoch": 0.000234521484375,
      "model_forward_time": 0.1151895523071289,
      "step": 38424
    },
    {
      "epoch": 0.000234521484375,
      "step": 38424,
      "training_step_time": 0.38492655754089355
    },
    {
      "epoch": 0.000234527587890625,
      "model_forward_time": 0.11545705795288086,
      "step": 38425
    },
    {
      "epoch": 0.000234527587890625,
      "step": 38425,
      "training_step_time": 0.39353179931640625
    },
    {
      "epoch": 0.00023453369140625,
      "model_forward_time": 0.11541342735290527,
      "step": 38426
    },
    {
      "epoch": 0.00023453369140625,
      "step": 38426,
      "training_step_time": 0.5176088809967041
    },
    {
      "epoch": 0.000234539794921875,
      "model_forward_time": 0.11744213104248047,
      "step": 38427
    },
    {
      "epoch": 0.000234539794921875,
      "step": 38427,
      "training_step_time": 0.547299861907959
    },
    {
      "epoch": 0.0002345458984375,
      "model_forward_time": 0.11805534362792969,
      "step": 38428
    },
    {
      "epoch": 0.0002345458984375,
      "step": 38428,
      "training_step_time": 0.6365420818328857
    },
    {
      "epoch": 0.000234552001953125,
      "model_forward_time": 0.1225583553314209,
      "step": 38429
    },
    {
      "epoch": 0.000234552001953125,
      "step": 38429,
      "training_step_time": 0.6486396789550781
    },
    {
      "epoch": 0.00023455810546875,
      "grad_norm": 0.13153190910816193,
      "learning_rate": 3.136339615620985e-05,
      "loss": 0.0382,
      "step": 38430
    },
    {
      "epoch": 0.00023455810546875,
      "model_forward_time": 0.119293212890625,
      "step": 38430
    },
    {
      "epoch": 0.00023455810546875,
      "step": 38430,
      "training_step_time": 0.7101688385009766
    },
    {
      "epoch": 0.000234564208984375,
      "model_forward_time": 0.11829161643981934,
      "step": 38431
    },
    {
      "epoch": 0.000234564208984375,
      "step": 38431,
      "training_step_time": 0.6528420448303223
    },
    {
      "epoch": 0.0002345703125,
      "model_forward_time": 0.11582565307617188,
      "step": 38432
    },
    {
      "epoch": 0.0002345703125,
      "step": 38432,
      "training_step_time": 0.5347051620483398
    },
    {
      "epoch": 0.000234576416015625,
      "model_forward_time": 0.12392687797546387,
      "step": 38433
    },
    {
      "epoch": 0.000234576416015625,
      "step": 38433,
      "training_step_time": 0.770003080368042
    },
    {
      "epoch": 0.00023458251953125,
      "model_forward_time": 0.1205606460571289,
      "step": 38434
    },
    {
      "epoch": 0.00023458251953125,
      "step": 38434,
      "training_step_time": 0.7126598358154297
    },
    {
      "epoch": 0.000234588623046875,
      "model_forward_time": 0.12224936485290527,
      "step": 38435
    },
    {
      "epoch": 0.000234588623046875,
      "step": 38435,
      "training_step_time": 0.6395401954650879
    },
    {
      "epoch": 0.0002345947265625,
      "model_forward_time": 0.13734102249145508,
      "step": 38436
    },
    {
      "epoch": 0.0002345947265625,
      "step": 38436,
      "training_step_time": 0.6480216979980469
    },
    {
      "epoch": 0.000234600830078125,
      "model_forward_time": 0.12119817733764648,
      "step": 38437
    },
    {
      "epoch": 0.000234600830078125,
      "step": 38437,
      "training_step_time": 0.6712145805358887
    },
    {
      "epoch": 0.00023460693359375,
      "model_forward_time": 0.11667633056640625,
      "step": 38438
    },
    {
      "epoch": 0.00023460693359375,
      "step": 38438,
      "training_step_time": 0.6397340297698975
    },
    {
      "epoch": 0.000234613037109375,
      "model_forward_time": 0.12072467803955078,
      "step": 38439
    },
    {
      "epoch": 0.000234613037109375,
      "step": 38439,
      "training_step_time": 0.6444170475006104
    },
    {
      "epoch": 0.000234619140625,
      "grad_norm": 0.10959240049123764,
      "learning_rate": 3.133782700250731e-05,
      "loss": 0.0405,
      "step": 38440
    },
    {
      "epoch": 0.000234619140625,
      "model_forward_time": 0.11923646926879883,
      "step": 38440
    },
    {
      "epoch": 0.000234619140625,
      "step": 38440,
      "training_step_time": 0.6792731285095215
    },
    {
      "epoch": 0.000234625244140625,
      "model_forward_time": 0.12229442596435547,
      "step": 38441
    },
    {
      "epoch": 0.000234625244140625,
      "step": 38441,
      "training_step_time": 0.7343802452087402
    },
    {
      "epoch": 0.00023463134765625,
      "model_forward_time": 0.11952781677246094,
      "step": 38442
    },
    {
      "epoch": 0.00023463134765625,
      "step": 38442,
      "training_step_time": 0.7524271011352539
    },
    {
      "epoch": 0.000234637451171875,
      "model_forward_time": 0.11799883842468262,
      "step": 38443
    },
    {
      "epoch": 0.000234637451171875,
      "step": 38443,
      "training_step_time": 0.7113230228424072
    },
    {
      "epoch": 0.0002346435546875,
      "model_forward_time": 0.11795830726623535,
      "step": 38444
    },
    {
      "epoch": 0.0002346435546875,
      "step": 38444,
      "training_step_time": 0.6879079341888428
    },
    {
      "epoch": 0.000234649658203125,
      "model_forward_time": 0.12206196784973145,
      "step": 38445
    },
    {
      "epoch": 0.000234649658203125,
      "step": 38445,
      "training_step_time": 0.6046609878540039
    },
    {
      "epoch": 0.00023465576171875,
      "model_forward_time": 0.11877655982971191,
      "step": 38446
    },
    {
      "epoch": 0.00023465576171875,
      "step": 38446,
      "training_step_time": 0.6636853218078613
    },
    {
      "epoch": 0.000234661865234375,
      "model_forward_time": 0.12202215194702148,
      "step": 38447
    },
    {
      "epoch": 0.000234661865234375,
      "step": 38447,
      "training_step_time": 0.6582987308502197
    },
    {
      "epoch": 0.00023466796875,
      "model_forward_time": 0.12224912643432617,
      "step": 38448
    },
    {
      "epoch": 0.00023466796875,
      "step": 38448,
      "training_step_time": 0.6075012683868408
    },
    {
      "epoch": 0.000234674072265625,
      "model_forward_time": 0.11735939979553223,
      "step": 38449
    },
    {
      "epoch": 0.000234674072265625,
      "step": 38449,
      "training_step_time": 0.6986901760101318
    },
    {
      "epoch": 0.00023468017578125,
      "grad_norm": 0.10682424157857895,
      "learning_rate": 3.131226351788018e-05,
      "loss": 0.0483,
      "step": 38450
    },
    {
      "epoch": 0.00023468017578125,
      "model_forward_time": 0.11838459968566895,
      "step": 38450
    },
    {
      "epoch": 0.00023468017578125,
      "step": 38450,
      "training_step_time": 0.6619348526000977
    },
    {
      "epoch": 0.000234686279296875,
      "model_forward_time": 0.11845660209655762,
      "step": 38451
    },
    {
      "epoch": 0.000234686279296875,
      "step": 38451,
      "training_step_time": 0.6831626892089844
    },
    {
      "epoch": 0.0002346923828125,
      "model_forward_time": 0.12000489234924316,
      "step": 38452
    },
    {
      "epoch": 0.0002346923828125,
      "step": 38452,
      "training_step_time": 0.699409008026123
    },
    {
      "epoch": 0.000234698486328125,
      "model_forward_time": 0.11817574501037598,
      "step": 38453
    },
    {
      "epoch": 0.000234698486328125,
      "step": 38453,
      "training_step_time": 0.6821651458740234
    },
    {
      "epoch": 0.00023470458984375,
      "model_forward_time": 0.12068295478820801,
      "step": 38454
    },
    {
      "epoch": 0.00023470458984375,
      "step": 38454,
      "training_step_time": 0.595200777053833
    },
    {
      "epoch": 0.000234710693359375,
      "model_forward_time": 0.12376785278320312,
      "step": 38455
    },
    {
      "epoch": 0.000234710693359375,
      "step": 38455,
      "training_step_time": 0.6861248016357422
    },
    {
      "epoch": 0.000234716796875,
      "model_forward_time": 0.11917710304260254,
      "step": 38456
    },
    {
      "epoch": 0.000234716796875,
      "step": 38456,
      "training_step_time": 0.7377634048461914
    },
    {
      "epoch": 0.000234722900390625,
      "model_forward_time": 0.12292194366455078,
      "step": 38457
    },
    {
      "epoch": 0.000234722900390625,
      "step": 38457,
      "training_step_time": 0.6242427825927734
    },
    {
      "epoch": 0.00023472900390625,
      "model_forward_time": 0.12012386322021484,
      "step": 38458
    },
    {
      "epoch": 0.00023472900390625,
      "step": 38458,
      "training_step_time": 0.6240675449371338
    },
    {
      "epoch": 0.000234735107421875,
      "model_forward_time": 0.12354493141174316,
      "step": 38459
    },
    {
      "epoch": 0.000234735107421875,
      "step": 38459,
      "training_step_time": 0.6925077438354492
    },
    {
      "epoch": 0.0002347412109375,
      "grad_norm": 0.1555701047182083,
      "learning_rate": 3.128670571009399e-05,
      "loss": 0.0433,
      "step": 38460
    },
    {
      "epoch": 0.0002347412109375,
      "model_forward_time": 0.11916112899780273,
      "step": 38460
    },
    {
      "epoch": 0.0002347412109375,
      "step": 38460,
      "training_step_time": 0.6912412643432617
    },
    {
      "epoch": 0.000234747314453125,
      "model_forward_time": 0.11832690238952637,
      "step": 38461
    },
    {
      "epoch": 0.000234747314453125,
      "step": 38461,
      "training_step_time": 0.6005034446716309
    },
    {
      "epoch": 0.00023475341796875,
      "model_forward_time": 0.11684846878051758,
      "step": 38462
    },
    {
      "epoch": 0.00023475341796875,
      "step": 38462,
      "training_step_time": 0.6723642349243164
    },
    {
      "epoch": 0.000234759521484375,
      "model_forward_time": 0.1401522159576416,
      "step": 38463
    },
    {
      "epoch": 0.000234759521484375,
      "step": 38463,
      "training_step_time": 0.6814982891082764
    },
    {
      "epoch": 0.000234765625,
      "model_forward_time": 0.11783027648925781,
      "step": 38464
    },
    {
      "epoch": 0.000234765625,
      "step": 38464,
      "training_step_time": 0.6818931102752686
    },
    {
      "epoch": 0.000234771728515625,
      "model_forward_time": 0.1255631446838379,
      "step": 38465
    },
    {
      "epoch": 0.000234771728515625,
      "step": 38465,
      "training_step_time": 0.586721658706665
    },
    {
      "epoch": 0.00023477783203125,
      "model_forward_time": 0.11904191970825195,
      "step": 38466
    },
    {
      "epoch": 0.00023477783203125,
      "step": 38466,
      "training_step_time": 0.683110237121582
    },
    {
      "epoch": 0.000234783935546875,
      "model_forward_time": 0.12321877479553223,
      "step": 38467
    },
    {
      "epoch": 0.000234783935546875,
      "step": 38467,
      "training_step_time": 0.7164130210876465
    },
    {
      "epoch": 0.0002347900390625,
      "model_forward_time": 0.1184391975402832,
      "step": 38468
    },
    {
      "epoch": 0.0002347900390625,
      "step": 38468,
      "training_step_time": 0.6514732837677002
    },
    {
      "epoch": 0.000234796142578125,
      "model_forward_time": 0.11889433860778809,
      "step": 38469
    },
    {
      "epoch": 0.000234796142578125,
      "step": 38469,
      "training_step_time": 0.7047889232635498
    },
    {
      "epoch": 0.00023480224609375,
      "grad_norm": 0.10803306847810745,
      "learning_rate": 3.126115358691249e-05,
      "loss": 0.0405,
      "step": 38470
    },
    {
      "epoch": 0.00023480224609375,
      "model_forward_time": 0.11618828773498535,
      "step": 38470
    },
    {
      "epoch": 0.00023480224609375,
      "step": 38470,
      "training_step_time": 0.7538695335388184
    },
    {
      "epoch": 0.000234808349609375,
      "model_forward_time": 0.12207651138305664,
      "step": 38471
    },
    {
      "epoch": 0.000234808349609375,
      "step": 38471,
      "training_step_time": 0.7245702743530273
    },
    {
      "epoch": 0.000234814453125,
      "model_forward_time": 0.11609983444213867,
      "step": 38472
    },
    {
      "epoch": 0.000234814453125,
      "step": 38472,
      "training_step_time": 0.6978814601898193
    },
    {
      "epoch": 0.000234820556640625,
      "model_forward_time": 0.11815714836120605,
      "step": 38473
    },
    {
      "epoch": 0.000234820556640625,
      "step": 38473,
      "training_step_time": 0.6318986415863037
    },
    {
      "epoch": 0.00023482666015625,
      "model_forward_time": 0.11748027801513672,
      "step": 38474
    },
    {
      "epoch": 0.00023482666015625,
      "step": 38474,
      "training_step_time": 0.6281561851501465
    },
    {
      "epoch": 0.000234832763671875,
      "model_forward_time": 0.12341785430908203,
      "step": 38475
    },
    {
      "epoch": 0.000234832763671875,
      "step": 38475,
      "training_step_time": 0.6356022357940674
    },
    {
      "epoch": 0.0002348388671875,
      "model_forward_time": 0.11810827255249023,
      "step": 38476
    },
    {
      "epoch": 0.0002348388671875,
      "step": 38476,
      "training_step_time": 0.6653831005096436
    },
    {
      "epoch": 0.000234844970703125,
      "model_forward_time": 0.11920785903930664,
      "step": 38477
    },
    {
      "epoch": 0.000234844970703125,
      "step": 38477,
      "training_step_time": 0.6359691619873047
    },
    {
      "epoch": 0.00023485107421875,
      "model_forward_time": 0.1276862621307373,
      "step": 38478
    },
    {
      "epoch": 0.00023485107421875,
      "step": 38478,
      "training_step_time": 0.6149864196777344
    },
    {
      "epoch": 0.000234857177734375,
      "model_forward_time": 0.11781144142150879,
      "step": 38479
    },
    {
      "epoch": 0.000234857177734375,
      "step": 38479,
      "training_step_time": 0.6898152828216553
    },
    {
      "epoch": 0.00023486328125,
      "grad_norm": 0.11291972547769547,
      "learning_rate": 3.123560715609777e-05,
      "loss": 0.047,
      "step": 38480
    },
    {
      "epoch": 0.00023486328125,
      "model_forward_time": 0.1229851245880127,
      "step": 38480
    },
    {
      "epoch": 0.00023486328125,
      "step": 38480,
      "training_step_time": 0.5664350986480713
    },
    {
      "epoch": 0.000234869384765625,
      "model_forward_time": 0.13138294219970703,
      "step": 38481
    },
    {
      "epoch": 0.000234869384765625,
      "step": 38481,
      "training_step_time": 0.7695462703704834
    },
    {
      "epoch": 0.00023487548828125,
      "model_forward_time": 0.11946749687194824,
      "step": 38482
    },
    {
      "epoch": 0.00023487548828125,
      "step": 38482,
      "training_step_time": 0.6503281593322754
    },
    {
      "epoch": 0.000234881591796875,
      "model_forward_time": 0.11791658401489258,
      "step": 38483
    },
    {
      "epoch": 0.000234881591796875,
      "step": 38483,
      "training_step_time": 0.6599390506744385
    },
    {
      "epoch": 0.0002348876953125,
      "model_forward_time": 0.1181190013885498,
      "step": 38484
    },
    {
      "epoch": 0.0002348876953125,
      "step": 38484,
      "training_step_time": 0.6499903202056885
    },
    {
      "epoch": 0.000234893798828125,
      "model_forward_time": 0.11677885055541992,
      "step": 38485
    },
    {
      "epoch": 0.000234893798828125,
      "step": 38485,
      "training_step_time": 0.6204016208648682
    },
    {
      "epoch": 0.00023489990234375,
      "model_forward_time": 0.11834025382995605,
      "step": 38486
    },
    {
      "epoch": 0.00023489990234375,
      "step": 38486,
      "training_step_time": 0.7037203311920166
    },
    {
      "epoch": 0.000234906005859375,
      "model_forward_time": 0.12197327613830566,
      "step": 38487
    },
    {
      "epoch": 0.000234906005859375,
      "step": 38487,
      "training_step_time": 0.6305632591247559
    },
    {
      "epoch": 0.000234912109375,
      "model_forward_time": 0.11694049835205078,
      "step": 38488
    },
    {
      "epoch": 0.000234912109375,
      "step": 38488,
      "training_step_time": 0.7045130729675293
    },
    {
      "epoch": 0.000234918212890625,
      "model_forward_time": 0.12022590637207031,
      "step": 38489
    },
    {
      "epoch": 0.000234918212890625,
      "step": 38489,
      "training_step_time": 0.6766939163208008
    },
    {
      "epoch": 0.00023492431640625,
      "grad_norm": 0.14542482793331146,
      "learning_rate": 3.121006642541014e-05,
      "loss": 0.0447,
      "step": 38490
    },
    {
      "epoch": 0.00023492431640625,
      "model_forward_time": 0.12744903564453125,
      "step": 38490
    },
    {
      "epoch": 0.00023492431640625,
      "step": 38490,
      "training_step_time": 0.6532745361328125
    },
    {
      "epoch": 0.000234930419921875,
      "model_forward_time": 0.12067914009094238,
      "step": 38491
    },
    {
      "epoch": 0.000234930419921875,
      "step": 38491,
      "training_step_time": 0.6676487922668457
    },
    {
      "epoch": 0.0002349365234375,
      "model_forward_time": 0.12314200401306152,
      "step": 38492
    },
    {
      "epoch": 0.0002349365234375,
      "step": 38492,
      "training_step_time": 0.5938489437103271
    },
    {
      "epoch": 0.000234942626953125,
      "model_forward_time": 0.12454771995544434,
      "step": 38493
    },
    {
      "epoch": 0.000234942626953125,
      "step": 38493,
      "training_step_time": 0.6244640350341797
    },
    {
      "epoch": 0.00023494873046875,
      "model_forward_time": 0.11742568016052246,
      "step": 38494
    },
    {
      "epoch": 0.00023494873046875,
      "step": 38494,
      "training_step_time": 0.5763111114501953
    },
    {
      "epoch": 0.000234954833984375,
      "model_forward_time": 0.1262056827545166,
      "step": 38495
    },
    {
      "epoch": 0.000234954833984375,
      "step": 38495,
      "training_step_time": 0.6346311569213867
    },
    {
      "epoch": 0.0002349609375,
      "model_forward_time": 0.12260842323303223,
      "step": 38496
    },
    {
      "epoch": 0.0002349609375,
      "step": 38496,
      "training_step_time": 0.5843195915222168
    },
    {
      "epoch": 0.000234967041015625,
      "model_forward_time": 0.11997818946838379,
      "step": 38497
    },
    {
      "epoch": 0.000234967041015625,
      "step": 38497,
      "training_step_time": 0.5833859443664551
    },
    {
      "epoch": 0.00023497314453125,
      "model_forward_time": 0.11843681335449219,
      "step": 38498
    },
    {
      "epoch": 0.00023497314453125,
      "step": 38498,
      "training_step_time": 0.645989179611206
    },
    {
      "epoch": 0.000234979248046875,
      "model_forward_time": 0.11834168434143066,
      "step": 38499
    },
    {
      "epoch": 0.000234979248046875,
      "step": 38499,
      "training_step_time": 0.5677638053894043
    },
    {
      "epoch": 0.0002349853515625,
      "grad_norm": 0.13125160336494446,
      "learning_rate": 3.118453140260823e-05,
      "loss": 0.0508,
      "step": 38500
    },
    {
      "epoch": 0.0002349853515625,
      "model_forward_time": 0.1169424057006836,
      "step": 38500
    },
    {
      "epoch": 0.0002349853515625,
      "step": 38500,
      "training_step_time": 0.4009380340576172
    },
    {
      "epoch": 0.000234991455078125,
      "model_forward_time": 0.11707139015197754,
      "step": 38501
    },
    {
      "epoch": 0.000234991455078125,
      "step": 38501,
      "training_step_time": 0.4526183605194092
    },
    {
      "epoch": 0.00023499755859375,
      "model_forward_time": 0.11602377891540527,
      "step": 38502
    },
    {
      "epoch": 0.00023499755859375,
      "step": 38502,
      "training_step_time": 0.5212898254394531
    },
    {
      "epoch": 0.000235003662109375,
      "model_forward_time": 0.11595511436462402,
      "step": 38503
    },
    {
      "epoch": 0.000235003662109375,
      "step": 38503,
      "training_step_time": 0.4261810779571533
    },
    {
      "epoch": 0.000235009765625,
      "model_forward_time": 0.11571693420410156,
      "step": 38504
    },
    {
      "epoch": 0.000235009765625,
      "step": 38504,
      "training_step_time": 0.4056665897369385
    },
    {
      "epoch": 0.000235015869140625,
      "model_forward_time": 0.11563539505004883,
      "step": 38505
    },
    {
      "epoch": 0.000235015869140625,
      "step": 38505,
      "training_step_time": 0.4330177307128906
    },
    {
      "epoch": 0.00023502197265625,
      "model_forward_time": 0.11517691612243652,
      "step": 38506
    },
    {
      "epoch": 0.00023502197265625,
      "step": 38506,
      "training_step_time": 0.398040771484375
    },
    {
      "epoch": 0.000235028076171875,
      "model_forward_time": 0.11628866195678711,
      "step": 38507
    },
    {
      "epoch": 0.000235028076171875,
      "step": 38507,
      "training_step_time": 0.4135091304779053
    },
    {
      "epoch": 0.0002350341796875,
      "model_forward_time": 0.11499214172363281,
      "step": 38508
    },
    {
      "epoch": 0.0002350341796875,
      "step": 38508,
      "training_step_time": 0.41277551651000977
    },
    {
      "epoch": 0.000235040283203125,
      "model_forward_time": 0.1143956184387207,
      "step": 38509
    },
    {
      "epoch": 0.000235040283203125,
      "step": 38509,
      "training_step_time": 0.3945627212524414
    },
    {
      "epoch": 0.00023504638671875,
      "grad_norm": 0.14068540930747986,
      "learning_rate": 3.115900209544887e-05,
      "loss": 0.0431,
      "step": 38510
    },
    {
      "epoch": 0.00023504638671875,
      "model_forward_time": 0.11507368087768555,
      "step": 38510
    },
    {
      "epoch": 0.00023504638671875,
      "step": 38510,
      "training_step_time": 0.409365177154541
    },
    {
      "epoch": 0.000235052490234375,
      "model_forward_time": 0.11533093452453613,
      "step": 38511
    },
    {
      "epoch": 0.000235052490234375,
      "step": 38511,
      "training_step_time": 0.42766880989074707
    },
    {
      "epoch": 0.00023505859375,
      "model_forward_time": 0.11474061012268066,
      "step": 38512
    },
    {
      "epoch": 0.00023505859375,
      "step": 38512,
      "training_step_time": 0.4014580249786377
    },
    {
      "epoch": 0.000235064697265625,
      "model_forward_time": 0.11572098731994629,
      "step": 38513
    },
    {
      "epoch": 0.000235064697265625,
      "step": 38513,
      "training_step_time": 0.43103480339050293
    },
    {
      "epoch": 0.00023507080078125,
      "model_forward_time": 0.1150810718536377,
      "step": 38514
    },
    {
      "epoch": 0.00023507080078125,
      "step": 38514,
      "training_step_time": 0.513664960861206
    },
    {
      "epoch": 0.000235076904296875,
      "model_forward_time": 0.1144094467163086,
      "step": 38515
    },
    {
      "epoch": 0.000235076904296875,
      "step": 38515,
      "training_step_time": 0.3702380657196045
    },
    {
      "epoch": 0.0002350830078125,
      "model_forward_time": 0.11560583114624023,
      "step": 38516
    },
    {
      "epoch": 0.0002350830078125,
      "step": 38516,
      "training_step_time": 0.47673964500427246
    },
    {
      "epoch": 0.000235089111328125,
      "model_forward_time": 0.1154487133026123,
      "step": 38517
    },
    {
      "epoch": 0.000235089111328125,
      "step": 38517,
      "training_step_time": 0.4163637161254883
    },
    {
      "epoch": 0.00023509521484375,
      "model_forward_time": 0.1146249771118164,
      "step": 38518
    },
    {
      "epoch": 0.00023509521484375,
      "step": 38518,
      "training_step_time": 0.3940165042877197
    },
    {
      "epoch": 0.000235101318359375,
      "model_forward_time": 0.1150059700012207,
      "step": 38519
    },
    {
      "epoch": 0.000235101318359375,
      "step": 38519,
      "training_step_time": 0.3870365619659424
    },
    {
      "epoch": 0.000235107421875,
      "grad_norm": 0.12270403653383255,
      "learning_rate": 3.113347851168721e-05,
      "loss": 0.0463,
      "step": 38520
    },
    {
      "epoch": 0.000235107421875,
      "model_forward_time": 0.11523985862731934,
      "step": 38520
    },
    {
      "epoch": 0.000235107421875,
      "step": 38520,
      "training_step_time": 0.4298243522644043
    },
    {
      "epoch": 0.000235113525390625,
      "model_forward_time": 0.11472654342651367,
      "step": 38521
    },
    {
      "epoch": 0.000235113525390625,
      "step": 38521,
      "training_step_time": 0.39644503593444824
    },
    {
      "epoch": 0.00023511962890625,
      "model_forward_time": 0.11460065841674805,
      "step": 38522
    },
    {
      "epoch": 0.00023511962890625,
      "step": 38522,
      "training_step_time": 0.3869900703430176
    },
    {
      "epoch": 0.000235125732421875,
      "model_forward_time": 0.11563944816589355,
      "step": 38523
    },
    {
      "epoch": 0.000235125732421875,
      "step": 38523,
      "training_step_time": 0.3904585838317871
    },
    {
      "epoch": 0.0002351318359375,
      "model_forward_time": 0.11550545692443848,
      "step": 38524
    },
    {
      "epoch": 0.0002351318359375,
      "step": 38524,
      "training_step_time": 0.3914909362792969
    },
    {
      "epoch": 0.000235137939453125,
      "model_forward_time": 0.11464810371398926,
      "step": 38525
    },
    {
      "epoch": 0.000235137939453125,
      "step": 38525,
      "training_step_time": 0.3943660259246826
    },
    {
      "epoch": 0.00023514404296875,
      "model_forward_time": 0.11490345001220703,
      "step": 38526
    },
    {
      "epoch": 0.00023514404296875,
      "step": 38526,
      "training_step_time": 0.3917701244354248
    },
    {
      "epoch": 0.000235150146484375,
      "model_forward_time": 0.11535429954528809,
      "step": 38527
    },
    {
      "epoch": 0.000235150146484375,
      "step": 38527,
      "training_step_time": 0.41745853424072266
    },
    {
      "epoch": 0.00023515625,
      "model_forward_time": 0.1149444580078125,
      "step": 38528
    },
    {
      "epoch": 0.00023515625,
      "step": 38528,
      "training_step_time": 0.4453415870666504
    },
    {
      "epoch": 0.000235162353515625,
      "model_forward_time": 0.11567831039428711,
      "step": 38529
    },
    {
      "epoch": 0.000235162353515625,
      "step": 38529,
      "training_step_time": 0.41611385345458984
    },
    {
      "epoch": 0.00023516845703125,
      "grad_norm": 0.09994246810674667,
      "learning_rate": 3.110796065907665e-05,
      "loss": 0.0385,
      "step": 38530
    },
    {
      "epoch": 0.00023516845703125,
      "model_forward_time": 0.11531758308410645,
      "step": 38530
    },
    {
      "epoch": 0.00023516845703125,
      "step": 38530,
      "training_step_time": 0.46211910247802734
    },
    {
      "epoch": 0.000235174560546875,
      "model_forward_time": 0.11577224731445312,
      "step": 38531
    },
    {
      "epoch": 0.000235174560546875,
      "step": 38531,
      "training_step_time": 0.5108883380889893
    },
    {
      "epoch": 0.0002351806640625,
      "model_forward_time": 0.11623191833496094,
      "step": 38532
    },
    {
      "epoch": 0.0002351806640625,
      "step": 38532,
      "training_step_time": 0.4289271831512451
    },
    {
      "epoch": 0.000235186767578125,
      "model_forward_time": 0.11499810218811035,
      "step": 38533
    },
    {
      "epoch": 0.000235186767578125,
      "step": 38533,
      "training_step_time": 0.3921988010406494
    },
    {
      "epoch": 0.00023519287109375,
      "model_forward_time": 0.11493730545043945,
      "step": 38534
    },
    {
      "epoch": 0.00023519287109375,
      "step": 38534,
      "training_step_time": 0.38529276847839355
    },
    {
      "epoch": 0.000235198974609375,
      "model_forward_time": 0.11525845527648926,
      "step": 38535
    },
    {
      "epoch": 0.000235198974609375,
      "step": 38535,
      "training_step_time": 0.39302563667297363
    },
    {
      "epoch": 0.000235205078125,
      "model_forward_time": 0.11540842056274414,
      "step": 38536
    },
    {
      "epoch": 0.000235205078125,
      "step": 38536,
      "training_step_time": 0.39222049713134766
    },
    {
      "epoch": 0.000235211181640625,
      "model_forward_time": 0.11560916900634766,
      "step": 38537
    },
    {
      "epoch": 0.000235211181640625,
      "step": 38537,
      "training_step_time": 0.391768217086792
    },
    {
      "epoch": 0.00023521728515625,
      "model_forward_time": 0.11534380912780762,
      "step": 38538
    },
    {
      "epoch": 0.00023521728515625,
      "step": 38538,
      "training_step_time": 0.41149473190307617
    },
    {
      "epoch": 0.000235223388671875,
      "model_forward_time": 0.11549639701843262,
      "step": 38539
    },
    {
      "epoch": 0.000235223388671875,
      "step": 38539,
      "training_step_time": 0.4214348793029785
    },
    {
      "epoch": 0.0002352294921875,
      "grad_norm": 0.10583265125751495,
      "learning_rate": 3.1082448545368814e-05,
      "loss": 0.0374,
      "step": 38540
    },
    {
      "epoch": 0.0002352294921875,
      "model_forward_time": 0.11495566368103027,
      "step": 38540
    },
    {
      "epoch": 0.0002352294921875,
      "step": 38540,
      "training_step_time": 0.4001655578613281
    },
    {
      "epoch": 0.000235235595703125,
      "model_forward_time": 0.1154317855834961,
      "step": 38541
    },
    {
      "epoch": 0.000235235595703125,
      "step": 38541,
      "training_step_time": 0.3951404094696045
    },
    {
      "epoch": 0.00023524169921875,
      "model_forward_time": 0.11632537841796875,
      "step": 38542
    },
    {
      "epoch": 0.00023524169921875,
      "step": 38542,
      "training_step_time": 0.4029886722564697
    },
    {
      "epoch": 0.000235247802734375,
      "model_forward_time": 0.11568188667297363,
      "step": 38543
    },
    {
      "epoch": 0.000235247802734375,
      "step": 38543,
      "training_step_time": 0.4356522560119629
    },
    {
      "epoch": 0.00023525390625,
      "model_forward_time": 0.11656618118286133,
      "step": 38544
    },
    {
      "epoch": 0.00023525390625,
      "step": 38544,
      "training_step_time": 0.4251549243927002
    },
    {
      "epoch": 0.000235260009765625,
      "model_forward_time": 0.11505675315856934,
      "step": 38545
    },
    {
      "epoch": 0.000235260009765625,
      "step": 38545,
      "training_step_time": 0.4658470153808594
    },
    {
      "epoch": 0.00023526611328125,
      "model_forward_time": 0.11498713493347168,
      "step": 38546
    },
    {
      "epoch": 0.00023526611328125,
      "step": 38546,
      "training_step_time": 0.4101073741912842
    },
    {
      "epoch": 0.000235272216796875,
      "model_forward_time": 0.11652088165283203,
      "step": 38547
    },
    {
      "epoch": 0.000235272216796875,
      "step": 38547,
      "training_step_time": 0.41390442848205566
    },
    {
      "epoch": 0.0002352783203125,
      "model_forward_time": 0.11499381065368652,
      "step": 38548
    },
    {
      "epoch": 0.0002352783203125,
      "step": 38548,
      "training_step_time": 0.38796424865722656
    },
    {
      "epoch": 0.000235284423828125,
      "model_forward_time": 0.11539483070373535,
      "step": 38549
    },
    {
      "epoch": 0.000235284423828125,
      "step": 38549,
      "training_step_time": 0.38964056968688965
    },
    {
      "epoch": 0.00023529052734375,
      "grad_norm": 0.10351570695638657,
      "learning_rate": 3.105694217831361e-05,
      "loss": 0.0465,
      "step": 38550
    },
    {
      "epoch": 0.00023529052734375,
      "model_forward_time": 0.1154928207397461,
      "step": 38550
    },
    {
      "epoch": 0.00023529052734375,
      "step": 38550,
      "training_step_time": 0.38005971908569336
    },
    {
      "epoch": 0.000235296630859375,
      "model_forward_time": 0.11552906036376953,
      "step": 38551
    },
    {
      "epoch": 0.000235296630859375,
      "step": 38551,
      "training_step_time": 0.3906123638153076
    },
    {
      "epoch": 0.000235302734375,
      "model_forward_time": 0.11576294898986816,
      "step": 38552
    },
    {
      "epoch": 0.000235302734375,
      "step": 38552,
      "training_step_time": 0.4114866256713867
    },
    {
      "epoch": 0.000235308837890625,
      "model_forward_time": 0.11481261253356934,
      "step": 38553
    },
    {
      "epoch": 0.000235308837890625,
      "step": 38553,
      "training_step_time": 0.4547584056854248
    },
    {
      "epoch": 0.00023531494140625,
      "model_forward_time": 0.11546492576599121,
      "step": 38554
    },
    {
      "epoch": 0.00023531494140625,
      "step": 38554,
      "training_step_time": 0.39244842529296875
    },
    {
      "epoch": 0.000235321044921875,
      "model_forward_time": 0.11545729637145996,
      "step": 38555
    },
    {
      "epoch": 0.000235321044921875,
      "step": 38555,
      "training_step_time": 0.38658571243286133
    },
    {
      "epoch": 0.0002353271484375,
      "model_forward_time": 0.11567091941833496,
      "step": 38556
    },
    {
      "epoch": 0.0002353271484375,
      "step": 38556,
      "training_step_time": 0.3896043300628662
    },
    {
      "epoch": 0.000235333251953125,
      "model_forward_time": 0.11499381065368652,
      "step": 38557
    },
    {
      "epoch": 0.000235333251953125,
      "step": 38557,
      "training_step_time": 0.39965391159057617
    },
    {
      "epoch": 0.00023533935546875,
      "model_forward_time": 0.11497735977172852,
      "step": 38558
    },
    {
      "epoch": 0.00023533935546875,
      "step": 38558,
      "training_step_time": 0.4553673267364502
    },
    {
      "epoch": 0.000235345458984375,
      "model_forward_time": 0.11467385292053223,
      "step": 38559
    },
    {
      "epoch": 0.000235345458984375,
      "step": 38559,
      "training_step_time": 0.40389442443847656
    },
    {
      "epoch": 0.0002353515625,
      "grad_norm": 0.17938171327114105,
      "learning_rate": 3.1031441565659235e-05,
      "loss": 0.0475,
      "step": 38560
    },
    {
      "epoch": 0.0002353515625,
      "model_forward_time": 0.11491274833679199,
      "step": 38560
    },
    {
      "epoch": 0.0002353515625,
      "step": 38560,
      "training_step_time": 0.42788195610046387
    },
    {
      "epoch": 0.000235357666015625,
      "model_forward_time": 0.11590290069580078,
      "step": 38561
    },
    {
      "epoch": 0.000235357666015625,
      "step": 38561,
      "training_step_time": 0.4599032402038574
    },
    {
      "epoch": 0.00023536376953125,
      "model_forward_time": 0.11553955078125,
      "step": 38562
    },
    {
      "epoch": 0.00023536376953125,
      "step": 38562,
      "training_step_time": 0.4118163585662842
    },
    {
      "epoch": 0.000235369873046875,
      "model_forward_time": 0.1155250072479248,
      "step": 38563
    },
    {
      "epoch": 0.000235369873046875,
      "step": 38563,
      "training_step_time": 0.3866703510284424
    },
    {
      "epoch": 0.0002353759765625,
      "model_forward_time": 0.11521768569946289,
      "step": 38564
    },
    {
      "epoch": 0.0002353759765625,
      "step": 38564,
      "training_step_time": 0.3895430564880371
    },
    {
      "epoch": 0.000235382080078125,
      "model_forward_time": 0.11524200439453125,
      "step": 38565
    },
    {
      "epoch": 0.000235382080078125,
      "step": 38565,
      "training_step_time": 0.3853445053100586
    },
    {
      "epoch": 0.00023538818359375,
      "model_forward_time": 0.11588478088378906,
      "step": 38566
    },
    {
      "epoch": 0.00023538818359375,
      "step": 38566,
      "training_step_time": 0.42911314964294434
    },
    {
      "epoch": 0.000235394287109375,
      "model_forward_time": 0.11508655548095703,
      "step": 38567
    },
    {
      "epoch": 0.000235394287109375,
      "step": 38567,
      "training_step_time": 0.4157836437225342
    },
    {
      "epoch": 0.000235400390625,
      "model_forward_time": 0.11512923240661621,
      "step": 38568
    },
    {
      "epoch": 0.000235400390625,
      "step": 38568,
      "training_step_time": 0.3847370147705078
    },
    {
      "epoch": 0.000235406494140625,
      "model_forward_time": 0.11526846885681152,
      "step": 38569
    },
    {
      "epoch": 0.000235406494140625,
      "step": 38569,
      "training_step_time": 0.40097808837890625
    },
    {
      "epoch": 0.00023541259765625,
      "grad_norm": 0.12881195545196533,
      "learning_rate": 3.100594671515206e-05,
      "loss": 0.0432,
      "step": 38570
    },
    {
      "epoch": 0.00023541259765625,
      "model_forward_time": 0.11567473411560059,
      "step": 38570
    },
    {
      "epoch": 0.00023541259765625,
      "step": 38570,
      "training_step_time": 0.39728808403015137
    },
    {
      "epoch": 0.000235418701171875,
      "model_forward_time": 0.11611127853393555,
      "step": 38571
    },
    {
      "epoch": 0.000235418701171875,
      "step": 38571,
      "training_step_time": 0.4888453483581543
    },
    {
      "epoch": 0.0002354248046875,
      "model_forward_time": 0.11539077758789062,
      "step": 38572
    },
    {
      "epoch": 0.0002354248046875,
      "step": 38572,
      "training_step_time": 0.39994168281555176
    },
    {
      "epoch": 0.000235430908203125,
      "model_forward_time": 0.11513280868530273,
      "step": 38573
    },
    {
      "epoch": 0.000235430908203125,
      "step": 38573,
      "training_step_time": 0.5181059837341309
    },
    {
      "epoch": 0.00023543701171875,
      "model_forward_time": 0.1147468090057373,
      "step": 38574
    },
    {
      "epoch": 0.00023543701171875,
      "step": 38574,
      "training_step_time": 0.3790931701660156
    },
    {
      "epoch": 0.000235443115234375,
      "model_forward_time": 0.1153724193572998,
      "step": 38575
    },
    {
      "epoch": 0.000235443115234375,
      "step": 38575,
      "training_step_time": 0.4560859203338623
    },
    {
      "epoch": 0.00023544921875,
      "model_forward_time": 0.11528611183166504,
      "step": 38576
    },
    {
      "epoch": 0.00023544921875,
      "step": 38576,
      "training_step_time": 0.43789029121398926
    },
    {
      "epoch": 0.000235455322265625,
      "model_forward_time": 0.11504554748535156,
      "step": 38577
    },
    {
      "epoch": 0.000235455322265625,
      "step": 38577,
      "training_step_time": 0.38503074645996094
    },
    {
      "epoch": 0.00023546142578125,
      "model_forward_time": 0.1150960922241211,
      "step": 38578
    },
    {
      "epoch": 0.00023546142578125,
      "step": 38578,
      "training_step_time": 0.38777780532836914
    },
    {
      "epoch": 0.000235467529296875,
      "model_forward_time": 0.11517620086669922,
      "step": 38579
    },
    {
      "epoch": 0.000235467529296875,
      "step": 38579,
      "training_step_time": 0.38474106788635254
    },
    {
      "epoch": 0.0002354736328125,
      "grad_norm": 0.12146349251270294,
      "learning_rate": 3.098045763453678e-05,
      "loss": 0.0395,
      "step": 38580
    },
    {
      "epoch": 0.0002354736328125,
      "model_forward_time": 0.11589932441711426,
      "step": 38580
    },
    {
      "epoch": 0.0002354736328125,
      "step": 38580,
      "training_step_time": 0.4400489330291748
    },
    {
      "epoch": 0.000235479736328125,
      "model_forward_time": 0.11593222618103027,
      "step": 38581
    },
    {
      "epoch": 0.000235479736328125,
      "step": 38581,
      "training_step_time": 0.38304662704467773
    },
    {
      "epoch": 0.00023548583984375,
      "model_forward_time": 0.11544585227966309,
      "step": 38582
    },
    {
      "epoch": 0.00023548583984375,
      "step": 38582,
      "training_step_time": 0.4003274440765381
    },
    {
      "epoch": 0.000235491943359375,
      "model_forward_time": 0.1152029037475586,
      "step": 38583
    },
    {
      "epoch": 0.000235491943359375,
      "step": 38583,
      "training_step_time": 0.38811159133911133
    },
    {
      "epoch": 0.000235498046875,
      "model_forward_time": 0.11493992805480957,
      "step": 38584
    },
    {
      "epoch": 0.000235498046875,
      "step": 38584,
      "training_step_time": 0.39113616943359375
    },
    {
      "epoch": 0.000235504150390625,
      "model_forward_time": 0.11474776268005371,
      "step": 38585
    },
    {
      "epoch": 0.000235504150390625,
      "step": 38585,
      "training_step_time": 0.39783263206481934
    },
    {
      "epoch": 0.00023551025390625,
      "model_forward_time": 0.11519765853881836,
      "step": 38586
    },
    {
      "epoch": 0.00023551025390625,
      "step": 38586,
      "training_step_time": 0.41916728019714355
    },
    {
      "epoch": 0.000235516357421875,
      "model_forward_time": 0.11548352241516113,
      "step": 38587
    },
    {
      "epoch": 0.000235516357421875,
      "step": 38587,
      "training_step_time": 0.473613977432251
    },
    {
      "epoch": 0.0002355224609375,
      "model_forward_time": 0.11488008499145508,
      "step": 38588
    },
    {
      "epoch": 0.0002355224609375,
      "step": 38588,
      "training_step_time": 0.39646148681640625
    },
    {
      "epoch": 0.000235528564453125,
      "model_forward_time": 0.11588430404663086,
      "step": 38589
    },
    {
      "epoch": 0.000235528564453125,
      "step": 38589,
      "training_step_time": 0.36708903312683105
    },
    {
      "epoch": 0.00023553466796875,
      "grad_norm": 0.10149221867322922,
      "learning_rate": 3.095497433155626e-05,
      "loss": 0.0441,
      "step": 38590
    },
    {
      "epoch": 0.00023553466796875,
      "model_forward_time": 0.11501097679138184,
      "step": 38590
    },
    {
      "epoch": 0.00023553466796875,
      "step": 38590,
      "training_step_time": 0.4801669120788574
    },
    {
      "epoch": 0.000235540771484375,
      "model_forward_time": 0.11455154418945312,
      "step": 38591
    },
    {
      "epoch": 0.000235540771484375,
      "step": 38591,
      "training_step_time": 0.4058079719543457
    },
    {
      "epoch": 0.000235546875,
      "model_forward_time": 0.11481571197509766,
      "step": 38592
    },
    {
      "epoch": 0.000235546875,
      "step": 38592,
      "training_step_time": 0.3815290927886963
    },
    {
      "epoch": 0.000235552978515625,
      "model_forward_time": 0.1145639419555664,
      "step": 38593
    },
    {
      "epoch": 0.000235552978515625,
      "step": 38593,
      "training_step_time": 0.4535560607910156
    },
    {
      "epoch": 0.00023555908203125,
      "model_forward_time": 0.11434006690979004,
      "step": 38594
    },
    {
      "epoch": 0.00023555908203125,
      "step": 38594,
      "training_step_time": 0.39568328857421875
    },
    {
      "epoch": 0.000235565185546875,
      "model_forward_time": 0.1159830093383789,
      "step": 38595
    },
    {
      "epoch": 0.000235565185546875,
      "step": 38595,
      "training_step_time": 0.4016873836517334
    },
    {
      "epoch": 0.0002355712890625,
      "model_forward_time": 0.11470413208007812,
      "step": 38596
    },
    {
      "epoch": 0.0002355712890625,
      "step": 38596,
      "training_step_time": 0.3970980644226074
    },
    {
      "epoch": 0.000235577392578125,
      "model_forward_time": 0.11542868614196777,
      "step": 38597
    },
    {
      "epoch": 0.000235577392578125,
      "step": 38597,
      "training_step_time": 0.39288330078125
    },
    {
      "epoch": 0.00023558349609375,
      "model_forward_time": 0.11518025398254395,
      "step": 38598
    },
    {
      "epoch": 0.00023558349609375,
      "step": 38598,
      "training_step_time": 0.4033846855163574
    },
    {
      "epoch": 0.000235589599609375,
      "model_forward_time": 0.11555814743041992,
      "step": 38599
    },
    {
      "epoch": 0.000235589599609375,
      "step": 38599,
      "training_step_time": 0.3971390724182129
    },
    {
      "epoch": 0.000235595703125,
      "grad_norm": 0.12672461569309235,
      "learning_rate": 3.0929496813951694e-05,
      "loss": 0.0427,
      "step": 38600
    },
    {
      "epoch": 0.000235595703125,
      "model_forward_time": 0.11537647247314453,
      "step": 38600
    },
    {
      "epoch": 0.000235595703125,
      "step": 38600,
      "training_step_time": 0.5021347999572754
    },
    {
      "epoch": 0.000235601806640625,
      "model_forward_time": 0.11547279357910156,
      "step": 38601
    },
    {
      "epoch": 0.000235601806640625,
      "step": 38601,
      "training_step_time": 0.39292478561401367
    },
    {
      "epoch": 0.00023560791015625,
      "model_forward_time": 0.11528968811035156,
      "step": 38602
    },
    {
      "epoch": 0.00023560791015625,
      "step": 38602,
      "training_step_time": 0.45681309700012207
    },
    {
      "epoch": 0.000235614013671875,
      "model_forward_time": 0.11506414413452148,
      "step": 38603
    },
    {
      "epoch": 0.000235614013671875,
      "step": 38603,
      "training_step_time": 0.48883867263793945
    },
    {
      "epoch": 0.0002356201171875,
      "model_forward_time": 0.11505722999572754,
      "step": 38604
    },
    {
      "epoch": 0.0002356201171875,
      "step": 38604,
      "training_step_time": 0.4949822425842285
    },
    {
      "epoch": 0.000235626220703125,
      "model_forward_time": 0.1155555248260498,
      "step": 38605
    },
    {
      "epoch": 0.000235626220703125,
      "step": 38605,
      "training_step_time": 0.5030372142791748
    },
    {
      "epoch": 0.00023563232421875,
      "model_forward_time": 0.11488986015319824,
      "step": 38606
    },
    {
      "epoch": 0.00023563232421875,
      "step": 38606,
      "training_step_time": 0.407581090927124
    },
    {
      "epoch": 0.000235638427734375,
      "model_forward_time": 0.1142721176147461,
      "step": 38607
    },
    {
      "epoch": 0.000235638427734375,
      "step": 38607,
      "training_step_time": 0.42223024368286133
    },
    {
      "epoch": 0.00023564453125,
      "model_forward_time": 0.11442422866821289,
      "step": 38608
    },
    {
      "epoch": 0.00023564453125,
      "step": 38608,
      "training_step_time": 0.3889927864074707
    },
    {
      "epoch": 0.000235650634765625,
      "model_forward_time": 0.11515998840332031,
      "step": 38609
    },
    {
      "epoch": 0.000235650634765625,
      "step": 38609,
      "training_step_time": 0.3857607841491699
    },
    {
      "epoch": 0.00023565673828125,
      "grad_norm": 0.16000959277153015,
      "learning_rate": 3.090402508946249e-05,
      "loss": 0.045,
      "step": 38610
    },
    {
      "epoch": 0.00023565673828125,
      "model_forward_time": 0.11529374122619629,
      "step": 38610
    },
    {
      "epoch": 0.00023565673828125,
      "step": 38610,
      "training_step_time": 0.3970017433166504
    },
    {
      "epoch": 0.000235662841796875,
      "model_forward_time": 0.11532139778137207,
      "step": 38611
    },
    {
      "epoch": 0.000235662841796875,
      "step": 38611,
      "training_step_time": 0.393115758895874
    },
    {
      "epoch": 0.0002356689453125,
      "model_forward_time": 0.11525106430053711,
      "step": 38612
    },
    {
      "epoch": 0.0002356689453125,
      "step": 38612,
      "training_step_time": 0.3949873447418213
    },
    {
      "epoch": 0.000235675048828125,
      "model_forward_time": 0.11481428146362305,
      "step": 38613
    },
    {
      "epoch": 0.000235675048828125,
      "step": 38613,
      "training_step_time": 0.39789366722106934
    },
    {
      "epoch": 0.00023568115234375,
      "model_forward_time": 0.11593222618103027,
      "step": 38614
    },
    {
      "epoch": 0.00023568115234375,
      "step": 38614,
      "training_step_time": 0.4502716064453125
    },
    {
      "epoch": 0.000235687255859375,
      "model_forward_time": 0.11577558517456055,
      "step": 38615
    },
    {
      "epoch": 0.000235687255859375,
      "step": 38615,
      "training_step_time": 0.3989086151123047
    },
    {
      "epoch": 0.000235693359375,
      "model_forward_time": 0.1158299446105957,
      "step": 38616
    },
    {
      "epoch": 0.000235693359375,
      "step": 38616,
      "training_step_time": 0.4394969940185547
    },
    {
      "epoch": 0.000235699462890625,
      "model_forward_time": 0.11524748802185059,
      "step": 38617
    },
    {
      "epoch": 0.000235699462890625,
      "step": 38617,
      "training_step_time": 0.410794734954834
    },
    {
      "epoch": 0.00023570556640625,
      "model_forward_time": 0.1152496337890625,
      "step": 38618
    },
    {
      "epoch": 0.00023570556640625,
      "step": 38618,
      "training_step_time": 0.36600756645202637
    },
    {
      "epoch": 0.000235711669921875,
      "model_forward_time": 0.11489486694335938,
      "step": 38619
    },
    {
      "epoch": 0.000235711669921875,
      "step": 38619,
      "training_step_time": 0.46837353706359863
    },
    {
      "epoch": 0.0002357177734375,
      "grad_norm": 0.1259295642375946,
      "learning_rate": 3.0878559165826236e-05,
      "loss": 0.0386,
      "step": 38620
    },
    {
      "epoch": 0.0002357177734375,
      "model_forward_time": 0.11529922485351562,
      "step": 38620
    },
    {
      "epoch": 0.0002357177734375,
      "step": 38620,
      "training_step_time": 0.4536929130554199
    },
    {
      "epoch": 0.000235723876953125,
      "model_forward_time": 0.11507463455200195,
      "step": 38621
    },
    {
      "epoch": 0.000235723876953125,
      "step": 38621,
      "training_step_time": 0.38781070709228516
    },
    {
      "epoch": 0.00023572998046875,
      "model_forward_time": 0.11501884460449219,
      "step": 38622
    },
    {
      "epoch": 0.00023572998046875,
      "step": 38622,
      "training_step_time": 0.3885214328765869
    },
    {
      "epoch": 0.000235736083984375,
      "model_forward_time": 0.11443424224853516,
      "step": 38623
    },
    {
      "epoch": 0.000235736083984375,
      "step": 38623,
      "training_step_time": 0.3954813480377197
    },
    {
      "epoch": 0.0002357421875,
      "model_forward_time": 0.11490464210510254,
      "step": 38624
    },
    {
      "epoch": 0.0002357421875,
      "step": 38624,
      "training_step_time": 0.39208555221557617
    },
    {
      "epoch": 0.000235748291015625,
      "model_forward_time": 0.11512136459350586,
      "step": 38625
    },
    {
      "epoch": 0.000235748291015625,
      "step": 38625,
      "training_step_time": 0.3823428153991699
    },
    {
      "epoch": 0.00023575439453125,
      "model_forward_time": 0.11469483375549316,
      "step": 38626
    },
    {
      "epoch": 0.00023575439453125,
      "step": 38626,
      "training_step_time": 0.3946225643157959
    },
    {
      "epoch": 0.000235760498046875,
      "model_forward_time": 0.11578917503356934,
      "step": 38627
    },
    {
      "epoch": 0.000235760498046875,
      "step": 38627,
      "training_step_time": 0.38966941833496094
    },
    {
      "epoch": 0.0002357666015625,
      "model_forward_time": 0.11488819122314453,
      "step": 38628
    },
    {
      "epoch": 0.0002357666015625,
      "step": 38628,
      "training_step_time": 0.39287352561950684
    },
    {
      "epoch": 0.000235772705078125,
      "model_forward_time": 0.1152946949005127,
      "step": 38629
    },
    {
      "epoch": 0.000235772705078125,
      "step": 38629,
      "training_step_time": 0.41195201873779297
    },
    {
      "epoch": 0.00023577880859375,
      "grad_norm": 0.10075530409812927,
      "learning_rate": 3.0853099050778854e-05,
      "loss": 0.0424,
      "step": 38630
    },
    {
      "epoch": 0.00023577880859375,
      "model_forward_time": 0.11575031280517578,
      "step": 38630
    },
    {
      "epoch": 0.00023577880859375,
      "step": 38630,
      "training_step_time": 0.506868839263916
    },
    {
      "epoch": 0.000235784912109375,
      "model_forward_time": 0.11518025398254395,
      "step": 38631
    },
    {
      "epoch": 0.000235784912109375,
      "step": 38631,
      "training_step_time": 0.3895714282989502
    },
    {
      "epoch": 0.000235791015625,
      "model_forward_time": 0.11497735977172852,
      "step": 38632
    },
    {
      "epoch": 0.000235791015625,
      "step": 38632,
      "training_step_time": 0.40766310691833496
    },
    {
      "epoch": 0.000235797119140625,
      "model_forward_time": 0.11562991142272949,
      "step": 38633
    },
    {
      "epoch": 0.000235797119140625,
      "step": 38633,
      "training_step_time": 0.45480966567993164
    },
    {
      "epoch": 0.00023580322265625,
      "model_forward_time": 0.11531925201416016,
      "step": 38634
    },
    {
      "epoch": 0.00023580322265625,
      "step": 38634,
      "training_step_time": 0.47286534309387207
    },
    {
      "epoch": 0.000235809326171875,
      "model_forward_time": 0.11521530151367188,
      "step": 38635
    },
    {
      "epoch": 0.000235809326171875,
      "step": 38635,
      "training_step_time": 0.4174346923828125
    },
    {
      "epoch": 0.0002358154296875,
      "model_forward_time": 0.11496210098266602,
      "step": 38636
    },
    {
      "epoch": 0.0002358154296875,
      "step": 38636,
      "training_step_time": 0.39912891387939453
    },
    {
      "epoch": 0.000235821533203125,
      "model_forward_time": 0.11451554298400879,
      "step": 38637
    },
    {
      "epoch": 0.000235821533203125,
      "step": 38637,
      "training_step_time": 0.3846170902252197
    },
    {
      "epoch": 0.00023582763671875,
      "model_forward_time": 0.11495423316955566,
      "step": 38638
    },
    {
      "epoch": 0.00023582763671875,
      "step": 38638,
      "training_step_time": 0.3944675922393799
    },
    {
      "epoch": 0.000235833740234375,
      "model_forward_time": 0.11508536338806152,
      "step": 38639
    },
    {
      "epoch": 0.000235833740234375,
      "step": 38639,
      "training_step_time": 0.395113468170166
    },
    {
      "epoch": 0.00023583984375,
      "grad_norm": 0.1300428956747055,
      "learning_rate": 3.082764475205442e-05,
      "loss": 0.045,
      "step": 38640
    },
    {
      "epoch": 0.00023583984375,
      "model_forward_time": 0.1151430606842041,
      "step": 38640
    },
    {
      "epoch": 0.00023583984375,
      "step": 38640,
      "training_step_time": 0.39544129371643066
    },
    {
      "epoch": 0.000235845947265625,
      "model_forward_time": 0.11510729789733887,
      "step": 38641
    },
    {
      "epoch": 0.000235845947265625,
      "step": 38641,
      "training_step_time": 0.40076255798339844
    },
    {
      "epoch": 0.00023585205078125,
      "model_forward_time": 0.11506319046020508,
      "step": 38642
    },
    {
      "epoch": 0.00023585205078125,
      "step": 38642,
      "training_step_time": 0.39682888984680176
    },
    {
      "epoch": 0.000235858154296875,
      "model_forward_time": 0.1153554916381836,
      "step": 38643
    },
    {
      "epoch": 0.000235858154296875,
      "step": 38643,
      "training_step_time": 0.3796534538269043
    },
    {
      "epoch": 0.0002358642578125,
      "model_forward_time": 0.11538481712341309,
      "step": 38644
    },
    {
      "epoch": 0.0002358642578125,
      "step": 38644,
      "training_step_time": 0.4507737159729004
    },
    {
      "epoch": 0.000235870361328125,
      "model_forward_time": 0.11525726318359375,
      "step": 38645
    },
    {
      "epoch": 0.000235870361328125,
      "step": 38645,
      "training_step_time": 0.47931599617004395
    },
    {
      "epoch": 0.00023587646484375,
      "model_forward_time": 0.11439394950866699,
      "step": 38646
    },
    {
      "epoch": 0.00023587646484375,
      "step": 38646,
      "training_step_time": 0.43852758407592773
    },
    {
      "epoch": 0.000235882568359375,
      "model_forward_time": 0.1155233383178711,
      "step": 38647
    },
    {
      "epoch": 0.000235882568359375,
      "step": 38647,
      "training_step_time": 0.47293853759765625
    },
    {
      "epoch": 0.000235888671875,
      "model_forward_time": 0.1153571605682373,
      "step": 38648
    },
    {
      "epoch": 0.000235888671875,
      "step": 38648,
      "training_step_time": 0.4314765930175781
    },
    {
      "epoch": 0.000235894775390625,
      "model_forward_time": 0.11537694931030273,
      "step": 38649
    },
    {
      "epoch": 0.000235894775390625,
      "step": 38649,
      "training_step_time": 0.4853849411010742
    },
    {
      "epoch": 0.00023590087890625,
      "grad_norm": 0.0777445062994957,
      "learning_rate": 3.0802196277385317e-05,
      "loss": 0.0428,
      "step": 38650
    },
    {
      "epoch": 0.00023590087890625,
      "model_forward_time": 0.11455416679382324,
      "step": 38650
    },
    {
      "epoch": 0.00023590087890625,
      "step": 38650,
      "training_step_time": 0.3891918659210205
    },
    {
      "epoch": 0.000235906982421875,
      "model_forward_time": 0.11451125144958496,
      "step": 38651
    },
    {
      "epoch": 0.000235906982421875,
      "step": 38651,
      "training_step_time": 0.38570117950439453
    },
    {
      "epoch": 0.0002359130859375,
      "model_forward_time": 0.11533093452453613,
      "step": 38652
    },
    {
      "epoch": 0.0002359130859375,
      "step": 38652,
      "training_step_time": 0.4031827449798584
    },
    {
      "epoch": 0.000235919189453125,
      "model_forward_time": 0.11570310592651367,
      "step": 38653
    },
    {
      "epoch": 0.000235919189453125,
      "step": 38653,
      "training_step_time": 0.4017660617828369
    },
    {
      "epoch": 0.00023592529296875,
      "model_forward_time": 0.11603927612304688,
      "step": 38654
    },
    {
      "epoch": 0.00023592529296875,
      "step": 38654,
      "training_step_time": 0.3976922035217285
    },
    {
      "epoch": 0.000235931396484375,
      "model_forward_time": 0.11493635177612305,
      "step": 38655
    },
    {
      "epoch": 0.000235931396484375,
      "step": 38655,
      "training_step_time": 0.3727304935455322
    },
    {
      "epoch": 0.0002359375,
      "model_forward_time": 0.11541533470153809,
      "step": 38656
    },
    {
      "epoch": 0.0002359375,
      "step": 38656,
      "training_step_time": 0.38925886154174805
    },
    {
      "epoch": 0.000235943603515625,
      "model_forward_time": 0.11548519134521484,
      "step": 38657
    },
    {
      "epoch": 0.000235943603515625,
      "step": 38657,
      "training_step_time": 0.42976880073547363
    },
    {
      "epoch": 0.00023594970703125,
      "model_forward_time": 0.11547613143920898,
      "step": 38658
    },
    {
      "epoch": 0.00023594970703125,
      "step": 38658,
      "training_step_time": 0.4075353145599365
    },
    {
      "epoch": 0.000235955810546875,
      "model_forward_time": 0.1146690845489502,
      "step": 38659
    },
    {
      "epoch": 0.000235955810546875,
      "step": 38659,
      "training_step_time": 0.43918585777282715
    },
    {
      "epoch": 0.0002359619140625,
      "grad_norm": 0.13385562598705292,
      "learning_rate": 3.077675363450207e-05,
      "loss": 0.0386,
      "step": 38660
    },
    {
      "epoch": 0.0002359619140625,
      "model_forward_time": 0.1151115894317627,
      "step": 38660
    },
    {
      "epoch": 0.0002359619140625,
      "step": 38660,
      "training_step_time": 0.45939183235168457
    },
    {
      "epoch": 0.000235968017578125,
      "model_forward_time": 0.11714768409729004,
      "step": 38661
    },
    {
      "epoch": 0.000235968017578125,
      "step": 38661,
      "training_step_time": 0.4203460216522217
    },
    {
      "epoch": 0.00023597412109375,
      "model_forward_time": 0.11595606803894043,
      "step": 38662
    },
    {
      "epoch": 0.00023597412109375,
      "step": 38662,
      "training_step_time": 0.4836306571960449
    },
    {
      "epoch": 0.000235980224609375,
      "model_forward_time": 0.11501955986022949,
      "step": 38663
    },
    {
      "epoch": 0.000235980224609375,
      "step": 38663,
      "training_step_time": 0.4204704761505127
    },
    {
      "epoch": 0.000235986328125,
      "model_forward_time": 0.11513781547546387,
      "step": 38664
    },
    {
      "epoch": 0.000235986328125,
      "step": 38664,
      "training_step_time": 0.453601598739624
    },
    {
      "epoch": 0.000235992431640625,
      "model_forward_time": 0.11474752426147461,
      "step": 38665
    },
    {
      "epoch": 0.000235992431640625,
      "step": 38665,
      "training_step_time": 0.3976917266845703
    },
    {
      "epoch": 0.00023599853515625,
      "model_forward_time": 0.11459112167358398,
      "step": 38666
    },
    {
      "epoch": 0.00023599853515625,
      "step": 38666,
      "training_step_time": 0.38996362686157227
    },
    {
      "epoch": 0.000236004638671875,
      "model_forward_time": 0.11539340019226074,
      "step": 38667
    },
    {
      "epoch": 0.000236004638671875,
      "step": 38667,
      "training_step_time": 0.3752317428588867
    },
    {
      "epoch": 0.0002360107421875,
      "model_forward_time": 0.1153266429901123,
      "step": 38668
    },
    {
      "epoch": 0.0002360107421875,
      "step": 38668,
      "training_step_time": 0.39620327949523926
    },
    {
      "epoch": 0.000236016845703125,
      "model_forward_time": 0.11514997482299805,
      "step": 38669
    },
    {
      "epoch": 0.000236016845703125,
      "step": 38669,
      "training_step_time": 0.397524356842041
    },
    {
      "epoch": 0.00023602294921875,
      "grad_norm": 0.12184503674507141,
      "learning_rate": 3.075131683113352e-05,
      "loss": 0.0405,
      "step": 38670
    },
    {
      "epoch": 0.00023602294921875,
      "model_forward_time": 0.11458826065063477,
      "step": 38670
    },
    {
      "epoch": 0.00023602294921875,
      "step": 38670,
      "training_step_time": 0.396256685256958
    },
    {
      "epoch": 0.000236029052734375,
      "model_forward_time": 0.11540985107421875,
      "step": 38671
    },
    {
      "epoch": 0.000236029052734375,
      "step": 38671,
      "training_step_time": 0.4050133228302002
    },
    {
      "epoch": 0.00023603515625,
      "model_forward_time": 0.11515402793884277,
      "step": 38672
    },
    {
      "epoch": 0.00023603515625,
      "step": 38672,
      "training_step_time": 0.45661163330078125
    },
    {
      "epoch": 0.000236041259765625,
      "model_forward_time": 0.11476588249206543,
      "step": 38673
    },
    {
      "epoch": 0.000236041259765625,
      "step": 38673,
      "training_step_time": 0.38590121269226074
    },
    {
      "epoch": 0.00023604736328125,
      "model_forward_time": 0.11543846130371094,
      "step": 38674
    },
    {
      "epoch": 0.00023604736328125,
      "step": 38674,
      "training_step_time": 0.4510664939880371
    },
    {
      "epoch": 0.000236053466796875,
      "model_forward_time": 0.11498188972473145,
      "step": 38675
    },
    {
      "epoch": 0.000236053466796875,
      "step": 38675,
      "training_step_time": 0.4399409294128418
    },
    {
      "epoch": 0.0002360595703125,
      "model_forward_time": 0.11449122428894043,
      "step": 38676
    },
    {
      "epoch": 0.0002360595703125,
      "step": 38676,
      "training_step_time": 0.4422173500061035
    },
    {
      "epoch": 0.000236065673828125,
      "model_forward_time": 0.11503362655639648,
      "step": 38677
    },
    {
      "epoch": 0.000236065673828125,
      "step": 38677,
      "training_step_time": 0.48291444778442383
    },
    {
      "epoch": 0.00023607177734375,
      "model_forward_time": 0.11493968963623047,
      "step": 38678
    },
    {
      "epoch": 0.00023607177734375,
      "step": 38678,
      "training_step_time": 0.495086669921875
    },
    {
      "epoch": 0.000236077880859375,
      "model_forward_time": 0.11454963684082031,
      "step": 38679
    },
    {
      "epoch": 0.000236077880859375,
      "step": 38679,
      "training_step_time": 0.37714695930480957
    },
    {
      "epoch": 0.000236083984375,
      "grad_norm": 0.12207301706075668,
      "learning_rate": 3.072588587500669e-05,
      "loss": 0.0408,
      "step": 38680
    },
    {
      "epoch": 0.000236083984375,
      "model_forward_time": 0.11589598655700684,
      "step": 38680
    },
    {
      "epoch": 0.000236083984375,
      "step": 38680,
      "training_step_time": 0.39496946334838867
    },
    {
      "epoch": 0.000236090087890625,
      "model_forward_time": 0.11499619483947754,
      "step": 38681
    },
    {
      "epoch": 0.000236090087890625,
      "step": 38681,
      "training_step_time": 0.38562536239624023
    },
    {
      "epoch": 0.00023609619140625,
      "model_forward_time": 0.11527490615844727,
      "step": 38682
    },
    {
      "epoch": 0.00023609619140625,
      "step": 38682,
      "training_step_time": 0.3942267894744873
    },
    {
      "epoch": 0.000236102294921875,
      "model_forward_time": 0.11511850357055664,
      "step": 38683
    },
    {
      "epoch": 0.000236102294921875,
      "step": 38683,
      "training_step_time": 0.3926362991333008
    },
    {
      "epoch": 0.0002361083984375,
      "model_forward_time": 0.11543154716491699,
      "step": 38684
    },
    {
      "epoch": 0.0002361083984375,
      "step": 38684,
      "training_step_time": 0.38900160789489746
    },
    {
      "epoch": 0.000236114501953125,
      "model_forward_time": 0.11518096923828125,
      "step": 38685
    },
    {
      "epoch": 0.000236114501953125,
      "step": 38685,
      "training_step_time": 0.39600229263305664
    },
    {
      "epoch": 0.00023612060546875,
      "model_forward_time": 0.11493277549743652,
      "step": 38686
    },
    {
      "epoch": 0.00023612060546875,
      "step": 38686,
      "training_step_time": 0.42706847190856934
    },
    {
      "epoch": 0.000236126708984375,
      "model_forward_time": 0.11523318290710449,
      "step": 38687
    },
    {
      "epoch": 0.000236126708984375,
      "step": 38687,
      "training_step_time": 0.4308767318725586
    },
    {
      "epoch": 0.0002361328125,
      "model_forward_time": 0.11498522758483887,
      "step": 38688
    },
    {
      "epoch": 0.0002361328125,
      "step": 38688,
      "training_step_time": 0.4055514335632324
    },
    {
      "epoch": 0.000236138916015625,
      "model_forward_time": 0.1155545711517334,
      "step": 38689
    },
    {
      "epoch": 0.000236138916015625,
      "step": 38689,
      "training_step_time": 0.4949619770050049
    },
    {
      "epoch": 0.00023614501953125,
      "grad_norm": 0.09762375056743622,
      "learning_rate": 3.070046077384682e-05,
      "loss": 0.0404,
      "step": 38690
    },
    {
      "epoch": 0.00023614501953125,
      "model_forward_time": 0.11478734016418457,
      "step": 38690
    },
    {
      "epoch": 0.00023614501953125,
      "step": 38690,
      "training_step_time": 0.40074849128723145
    },
    {
      "epoch": 0.000236151123046875,
      "model_forward_time": 0.11509871482849121,
      "step": 38691
    },
    {
      "epoch": 0.000236151123046875,
      "step": 38691,
      "training_step_time": 0.37865424156188965
    },
    {
      "epoch": 0.0002361572265625,
      "model_forward_time": 0.11511754989624023,
      "step": 38692
    },
    {
      "epoch": 0.0002361572265625,
      "step": 38692,
      "training_step_time": 0.4456746578216553
    },
    {
      "epoch": 0.000236163330078125,
      "model_forward_time": 0.11463093757629395,
      "step": 38693
    },
    {
      "epoch": 0.000236163330078125,
      "step": 38693,
      "training_step_time": 0.4536442756652832
    },
    {
      "epoch": 0.00023616943359375,
      "model_forward_time": 0.11500692367553711,
      "step": 38694
    },
    {
      "epoch": 0.00023616943359375,
      "step": 38694,
      "training_step_time": 0.3925013542175293
    },
    {
      "epoch": 0.000236175537109375,
      "model_forward_time": 0.11407208442687988,
      "step": 38695
    },
    {
      "epoch": 0.000236175537109375,
      "step": 38695,
      "training_step_time": 0.39890432357788086
    },
    {
      "epoch": 0.000236181640625,
      "model_forward_time": 0.11615395545959473,
      "step": 38696
    },
    {
      "epoch": 0.000236181640625,
      "step": 38696,
      "training_step_time": 0.3846557140350342
    },
    {
      "epoch": 0.000236187744140625,
      "model_forward_time": 0.11504077911376953,
      "step": 38697
    },
    {
      "epoch": 0.000236187744140625,
      "step": 38697,
      "training_step_time": 0.3869307041168213
    },
    {
      "epoch": 0.00023619384765625,
      "model_forward_time": 0.1149909496307373,
      "step": 38698
    },
    {
      "epoch": 0.00023619384765625,
      "step": 38698,
      "training_step_time": 0.3896298408508301
    },
    {
      "epoch": 0.000236199951171875,
      "model_forward_time": 0.11471915245056152,
      "step": 38699
    },
    {
      "epoch": 0.000236199951171875,
      "step": 38699,
      "training_step_time": 0.3941159248352051
    },
    {
      "epoch": 0.0002362060546875,
      "grad_norm": 0.08419504761695862,
      "learning_rate": 3.0675041535377405e-05,
      "loss": 0.0382,
      "step": 38700
    },
    {
      "epoch": 0.0002362060546875,
      "model_forward_time": 0.1148676872253418,
      "step": 38700
    },
    {
      "epoch": 0.0002362060546875,
      "step": 38700,
      "training_step_time": 0.39975452423095703
    },
    {
      "epoch": 0.000236212158203125,
      "model_forward_time": 0.11576032638549805,
      "step": 38701
    },
    {
      "epoch": 0.000236212158203125,
      "step": 38701,
      "training_step_time": 0.4697089195251465
    },
    {
      "epoch": 0.00023621826171875,
      "model_forward_time": 0.11533498764038086,
      "step": 38702
    },
    {
      "epoch": 0.00023621826171875,
      "step": 38702,
      "training_step_time": 0.42835569381713867
    },
    {
      "epoch": 0.000236224365234375,
      "model_forward_time": 0.11536765098571777,
      "step": 38703
    },
    {
      "epoch": 0.000236224365234375,
      "step": 38703,
      "training_step_time": 0.44968295097351074
    },
    {
      "epoch": 0.00023623046875,
      "model_forward_time": 0.11508941650390625,
      "step": 38704
    },
    {
      "epoch": 0.00023623046875,
      "step": 38704,
      "training_step_time": 0.3982222080230713
    },
    {
      "epoch": 0.000236236572265625,
      "model_forward_time": 0.11533188819885254,
      "step": 38705
    },
    {
      "epoch": 0.000236236572265625,
      "step": 38705,
      "training_step_time": 0.41631412506103516
    },
    {
      "epoch": 0.00023624267578125,
      "model_forward_time": 0.11507630348205566,
      "step": 38706
    },
    {
      "epoch": 0.00023624267578125,
      "step": 38706,
      "training_step_time": 0.42765164375305176
    },
    {
      "epoch": 0.000236248779296875,
      "model_forward_time": 0.11521244049072266,
      "step": 38707
    },
    {
      "epoch": 0.000236248779296875,
      "step": 38707,
      "training_step_time": 0.5028905868530273
    },
    {
      "epoch": 0.0002362548828125,
      "model_forward_time": 0.11617445945739746,
      "step": 38708
    },
    {
      "epoch": 0.0002362548828125,
      "step": 38708,
      "training_step_time": 0.4250905513763428
    },
    {
      "epoch": 0.000236260986328125,
      "model_forward_time": 0.115692138671875,
      "step": 38709
    },
    {
      "epoch": 0.000236260986328125,
      "step": 38709,
      "training_step_time": 0.37943124771118164
    },
    {
      "epoch": 0.00023626708984375,
      "grad_norm": 0.15471580624580383,
      "learning_rate": 3.06496281673201e-05,
      "loss": 0.0399,
      "step": 38710
    },
    {
      "epoch": 0.00023626708984375,
      "model_forward_time": 0.11529374122619629,
      "step": 38710
    },
    {
      "epoch": 0.00023626708984375,
      "step": 38710,
      "training_step_time": 0.3999626636505127
    },
    {
      "epoch": 0.000236273193359375,
      "model_forward_time": 0.11509394645690918,
      "step": 38711
    },
    {
      "epoch": 0.000236273193359375,
      "step": 38711,
      "training_step_time": 0.4102754592895508
    },
    {
      "epoch": 0.000236279296875,
      "model_forward_time": 0.11469411849975586,
      "step": 38712
    },
    {
      "epoch": 0.000236279296875,
      "step": 38712,
      "training_step_time": 0.4051239490509033
    },
    {
      "epoch": 0.000236285400390625,
      "model_forward_time": 0.1162254810333252,
      "step": 38713
    },
    {
      "epoch": 0.000236285400390625,
      "step": 38713,
      "training_step_time": 0.39720845222473145
    },
    {
      "epoch": 0.00023629150390625,
      "model_forward_time": 0.11544227600097656,
      "step": 38714
    },
    {
      "epoch": 0.00023629150390625,
      "step": 38714,
      "training_step_time": 0.4039890766143799
    },
    {
      "epoch": 0.000236297607421875,
      "model_forward_time": 0.11559510231018066,
      "step": 38715
    },
    {
      "epoch": 0.000236297607421875,
      "step": 38715,
      "training_step_time": 0.44239044189453125
    },
    {
      "epoch": 0.0002363037109375,
      "model_forward_time": 0.1152791976928711,
      "step": 38716
    },
    {
      "epoch": 0.0002363037109375,
      "step": 38716,
      "training_step_time": 0.44042444229125977
    },
    {
      "epoch": 0.000236309814453125,
      "model_forward_time": 0.11507654190063477,
      "step": 38717
    },
    {
      "epoch": 0.000236309814453125,
      "step": 38717,
      "training_step_time": 0.4050614833831787
    },
    {
      "epoch": 0.00023631591796875,
      "model_forward_time": 0.11499595642089844,
      "step": 38718
    },
    {
      "epoch": 0.00023631591796875,
      "step": 38718,
      "training_step_time": 0.4334735870361328
    },
    {
      "epoch": 0.000236322021484375,
      "model_forward_time": 0.11458921432495117,
      "step": 38719
    },
    {
      "epoch": 0.000236322021484375,
      "step": 38719,
      "training_step_time": 0.39745450019836426
    },
    {
      "epoch": 0.000236328125,
      "grad_norm": 0.12265399843454361,
      "learning_rate": 3.062422067739485e-05,
      "loss": 0.0382,
      "step": 38720
    },
    {
      "epoch": 0.000236328125,
      "model_forward_time": 0.11547565460205078,
      "step": 38720
    },
    {
      "epoch": 0.000236328125,
      "step": 38720,
      "training_step_time": 0.45997095108032227
    },
    {
      "epoch": 0.000236334228515625,
      "model_forward_time": 0.11490368843078613,
      "step": 38721
    },
    {
      "epoch": 0.000236334228515625,
      "step": 38721,
      "training_step_time": 0.4282371997833252
    },
    {
      "epoch": 0.00023634033203125,
      "model_forward_time": 0.11472964286804199,
      "step": 38722
    },
    {
      "epoch": 0.00023634033203125,
      "step": 38722,
      "training_step_time": 0.5084912776947021
    },
    {
      "epoch": 0.000236346435546875,
      "model_forward_time": 0.11504077911376953,
      "step": 38723
    },
    {
      "epoch": 0.000236346435546875,
      "step": 38723,
      "training_step_time": 0.4309701919555664
    },
    {
      "epoch": 0.0002363525390625,
      "model_forward_time": 0.11490535736083984,
      "step": 38724
    },
    {
      "epoch": 0.0002363525390625,
      "step": 38724,
      "training_step_time": 0.3906064033508301
    },
    {
      "epoch": 0.000236358642578125,
      "model_forward_time": 0.11556100845336914,
      "step": 38725
    },
    {
      "epoch": 0.000236358642578125,
      "step": 38725,
      "training_step_time": 0.392653226852417
    },
    {
      "epoch": 0.00023636474609375,
      "model_forward_time": 0.11619162559509277,
      "step": 38726
    },
    {
      "epoch": 0.00023636474609375,
      "step": 38726,
      "training_step_time": 0.3849525451660156
    },
    {
      "epoch": 0.000236370849609375,
      "model_forward_time": 0.11577463150024414,
      "step": 38727
    },
    {
      "epoch": 0.000236370849609375,
      "step": 38727,
      "training_step_time": 0.39362645149230957
    },
    {
      "epoch": 0.000236376953125,
      "model_forward_time": 0.11499905586242676,
      "step": 38728
    },
    {
      "epoch": 0.000236376953125,
      "step": 38728,
      "training_step_time": 0.38802433013916016
    },
    {
      "epoch": 0.000236383056640625,
      "model_forward_time": 0.11475777626037598,
      "step": 38729
    },
    {
      "epoch": 0.000236383056640625,
      "step": 38729,
      "training_step_time": 0.3864884376525879
    },
    {
      "epoch": 0.00023638916015625,
      "grad_norm": 0.13371668756008148,
      "learning_rate": 3.059881907331979e-05,
      "loss": 0.0407,
      "step": 38730
    },
    {
      "epoch": 0.00023638916015625,
      "model_forward_time": 0.11514449119567871,
      "step": 38730
    },
    {
      "epoch": 0.00023638916015625,
      "step": 38730,
      "training_step_time": 0.4360923767089844
    },
    {
      "epoch": 0.000236395263671875,
      "model_forward_time": 0.1149590015411377,
      "step": 38731
    },
    {
      "epoch": 0.000236395263671875,
      "step": 38731,
      "training_step_time": 0.4987494945526123
    },
    {
      "epoch": 0.0002364013671875,
      "model_forward_time": 0.11504602432250977,
      "step": 38732
    },
    {
      "epoch": 0.0002364013671875,
      "step": 38732,
      "training_step_time": 0.38773012161254883
    },
    {
      "epoch": 0.000236407470703125,
      "model_forward_time": 0.11554384231567383,
      "step": 38733
    },
    {
      "epoch": 0.000236407470703125,
      "step": 38733,
      "training_step_time": 0.4748685359954834
    },
    {
      "epoch": 0.00023641357421875,
      "model_forward_time": 0.11524438858032227,
      "step": 38734
    },
    {
      "epoch": 0.00023641357421875,
      "step": 38734,
      "training_step_time": 0.40325021743774414
    },
    {
      "epoch": 0.000236419677734375,
      "model_forward_time": 0.11504387855529785,
      "step": 38735
    },
    {
      "epoch": 0.000236419677734375,
      "step": 38735,
      "training_step_time": 0.4666156768798828
    },
    {
      "epoch": 0.00023642578125,
      "model_forward_time": 0.11494660377502441,
      "step": 38736
    },
    {
      "epoch": 0.00023642578125,
      "step": 38736,
      "training_step_time": 0.3986647129058838
    },
    {
      "epoch": 0.000236431884765625,
      "model_forward_time": 0.11519575119018555,
      "step": 38737
    },
    {
      "epoch": 0.000236431884765625,
      "step": 38737,
      "training_step_time": 0.5017311573028564
    },
    {
      "epoch": 0.00023643798828125,
      "model_forward_time": 0.11461043357849121,
      "step": 38738
    },
    {
      "epoch": 0.00023643798828125,
      "step": 38738,
      "training_step_time": 0.3937563896179199
    },
    {
      "epoch": 0.000236444091796875,
      "model_forward_time": 0.11676979064941406,
      "step": 38739
    },
    {
      "epoch": 0.000236444091796875,
      "step": 38739,
      "training_step_time": 0.38933515548706055
    },
    {
      "epoch": 0.0002364501953125,
      "grad_norm": 0.11504591256380081,
      "learning_rate": 3.057342336281122e-05,
      "loss": 0.0461,
      "step": 38740
    },
    {
      "epoch": 0.0002364501953125,
      "model_forward_time": 0.11457967758178711,
      "step": 38740
    },
    {
      "epoch": 0.0002364501953125,
      "step": 38740,
      "training_step_time": 0.3888583183288574
    },
    {
      "epoch": 0.000236456298828125,
      "model_forward_time": 0.11526918411254883,
      "step": 38741
    },
    {
      "epoch": 0.000236456298828125,
      "step": 38741,
      "training_step_time": 0.39247894287109375
    },
    {
      "epoch": 0.00023646240234375,
      "model_forward_time": 0.11479043960571289,
      "step": 38742
    },
    {
      "epoch": 0.00023646240234375,
      "step": 38742,
      "training_step_time": 0.39714956283569336
    },
    {
      "epoch": 0.000236468505859375,
      "model_forward_time": 0.11490702629089355,
      "step": 38743
    },
    {
      "epoch": 0.000236468505859375,
      "step": 38743,
      "training_step_time": 0.4399394989013672
    },
    {
      "epoch": 0.000236474609375,
      "model_forward_time": 0.11474227905273438,
      "step": 38744
    },
    {
      "epoch": 0.000236474609375,
      "step": 38744,
      "training_step_time": 0.4025282859802246
    },
    {
      "epoch": 0.000236480712890625,
      "model_forward_time": 0.11533212661743164,
      "step": 38745
    },
    {
      "epoch": 0.000236480712890625,
      "step": 38745,
      "training_step_time": 0.4257333278656006
    },
    {
      "epoch": 0.00023648681640625,
      "model_forward_time": 0.1152045726776123,
      "step": 38746
    },
    {
      "epoch": 0.00023648681640625,
      "step": 38746,
      "training_step_time": 0.474102258682251
    },
    {
      "epoch": 0.000236492919921875,
      "model_forward_time": 0.11539840698242188,
      "step": 38747
    },
    {
      "epoch": 0.000236492919921875,
      "step": 38747,
      "training_step_time": 0.5034263134002686
    },
    {
      "epoch": 0.0002364990234375,
      "model_forward_time": 0.11478519439697266,
      "step": 38748
    },
    {
      "epoch": 0.0002364990234375,
      "step": 38748,
      "training_step_time": 0.44254589080810547
    },
    {
      "epoch": 0.000236505126953125,
      "model_forward_time": 0.11474871635437012,
      "step": 38749
    },
    {
      "epoch": 0.000236505126953125,
      "step": 38749,
      "training_step_time": 0.4800913333892822
    },
    {
      "epoch": 0.00023651123046875,
      "grad_norm": 0.09929671883583069,
      "learning_rate": 3.0548033553583705e-05,
      "loss": 0.0385,
      "step": 38750
    },
    {
      "epoch": 0.00023651123046875,
      "model_forward_time": 0.11452555656433105,
      "step": 38750
    },
    {
      "epoch": 0.00023651123046875,
      "step": 38750,
      "training_step_time": 0.4723505973815918
    },
    {
      "epoch": 0.000236517333984375,
      "model_forward_time": 0.11519551277160645,
      "step": 38751
    },
    {
      "epoch": 0.000236517333984375,
      "step": 38751,
      "training_step_time": 0.49914002418518066
    },
    {
      "epoch": 0.0002365234375,
      "model_forward_time": 0.11630892753601074,
      "step": 38752
    },
    {
      "epoch": 0.0002365234375,
      "step": 38752,
      "training_step_time": 0.39376235008239746
    },
    {
      "epoch": 0.000236529541015625,
      "model_forward_time": 0.11464834213256836,
      "step": 38753
    },
    {
      "epoch": 0.000236529541015625,
      "step": 38753,
      "training_step_time": 0.39469432830810547
    },
    {
      "epoch": 0.00023653564453125,
      "model_forward_time": 0.11476302146911621,
      "step": 38754
    },
    {
      "epoch": 0.00023653564453125,
      "step": 38754,
      "training_step_time": 0.3848695755004883
    },
    {
      "epoch": 0.000236541748046875,
      "model_forward_time": 0.11482548713684082,
      "step": 38755
    },
    {
      "epoch": 0.000236541748046875,
      "step": 38755,
      "training_step_time": 0.3894822597503662
    },
    {
      "epoch": 0.0002365478515625,
      "model_forward_time": 0.11512422561645508,
      "step": 38756
    },
    {
      "epoch": 0.0002365478515625,
      "step": 38756,
      "training_step_time": 0.41940855979919434
    },
    {
      "epoch": 0.000236553955078125,
      "model_forward_time": 0.11571478843688965,
      "step": 38757
    },
    {
      "epoch": 0.000236553955078125,
      "step": 38757,
      "training_step_time": 0.39232468605041504
    },
    {
      "epoch": 0.00023656005859375,
      "model_forward_time": 0.11561441421508789,
      "step": 38758
    },
    {
      "epoch": 0.00023656005859375,
      "step": 38758,
      "training_step_time": 0.43226146697998047
    },
    {
      "epoch": 0.000236566162109375,
      "model_forward_time": 0.11542630195617676,
      "step": 38759
    },
    {
      "epoch": 0.000236566162109375,
      "step": 38759,
      "training_step_time": 0.40469932556152344
    },
    {
      "epoch": 0.000236572265625,
      "grad_norm": 0.10883892327547073,
      "learning_rate": 3.052264965335e-05,
      "loss": 0.0379,
      "step": 38760
    },
    {
      "epoch": 0.000236572265625,
      "model_forward_time": 0.11517572402954102,
      "step": 38760
    },
    {
      "epoch": 0.000236572265625,
      "step": 38760,
      "training_step_time": 0.41977548599243164
    },
    {
      "epoch": 0.000236578369140625,
      "model_forward_time": 0.1159064769744873,
      "step": 38761
    },
    {
      "epoch": 0.000236578369140625,
      "step": 38761,
      "training_step_time": 0.40052294731140137
    },
    {
      "epoch": 0.00023658447265625,
      "model_forward_time": 0.11536788940429688,
      "step": 38762
    },
    {
      "epoch": 0.00023658447265625,
      "step": 38762,
      "training_step_time": 0.4431595802307129
    },
    {
      "epoch": 0.000236590576171875,
      "model_forward_time": 0.1148231029510498,
      "step": 38763
    },
    {
      "epoch": 0.000236590576171875,
      "step": 38763,
      "training_step_time": 0.46713852882385254
    },
    {
      "epoch": 0.0002365966796875,
      "model_forward_time": 0.11595940589904785,
      "step": 38764
    },
    {
      "epoch": 0.0002365966796875,
      "step": 38764,
      "training_step_time": 0.4105224609375
    },
    {
      "epoch": 0.000236602783203125,
      "model_forward_time": 0.11559891700744629,
      "step": 38765
    },
    {
      "epoch": 0.000236602783203125,
      "step": 38765,
      "training_step_time": 0.41551709175109863
    },
    {
      "epoch": 0.00023660888671875,
      "model_forward_time": 0.11551332473754883,
      "step": 38766
    },
    {
      "epoch": 0.00023660888671875,
      "step": 38766,
      "training_step_time": 0.45177721977233887
    },
    {
      "epoch": 0.000236614990234375,
      "model_forward_time": 0.11512327194213867,
      "step": 38767
    },
    {
      "epoch": 0.000236614990234375,
      "step": 38767,
      "training_step_time": 0.39405250549316406
    },
    {
      "epoch": 0.00023662109375,
      "model_forward_time": 0.11518502235412598,
      "step": 38768
    },
    {
      "epoch": 0.00023662109375,
      "step": 38768,
      "training_step_time": 0.3875112533569336
    },
    {
      "epoch": 0.000236627197265625,
      "model_forward_time": 0.11620235443115234,
      "step": 38769
    },
    {
      "epoch": 0.000236627197265625,
      "step": 38769,
      "training_step_time": 0.39612531661987305
    },
    {
      "epoch": 0.00023663330078125,
      "grad_norm": 0.1438494324684143,
      "learning_rate": 3.049727166982105e-05,
      "loss": 0.0415,
      "step": 38770
    },
    {
      "epoch": 0.00023663330078125,
      "model_forward_time": 0.11517596244812012,
      "step": 38770
    },
    {
      "epoch": 0.00023663330078125,
      "step": 38770,
      "training_step_time": 0.4068937301635742
    },
    {
      "epoch": 0.000236639404296875,
      "model_forward_time": 0.11579751968383789,
      "step": 38771
    },
    {
      "epoch": 0.000236639404296875,
      "step": 38771,
      "training_step_time": 0.41481614112854004
    },
    {
      "epoch": 0.0002366455078125,
      "model_forward_time": 0.11553144454956055,
      "step": 38772
    },
    {
      "epoch": 0.0002366455078125,
      "step": 38772,
      "training_step_time": 0.45388317108154297
    },
    {
      "epoch": 0.000236651611328125,
      "model_forward_time": 0.11536645889282227,
      "step": 38773
    },
    {
      "epoch": 0.000236651611328125,
      "step": 38773,
      "training_step_time": 0.39264822006225586
    },
    {
      "epoch": 0.00023665771484375,
      "model_forward_time": 0.11497616767883301,
      "step": 38774
    },
    {
      "epoch": 0.00023665771484375,
      "step": 38774,
      "training_step_time": 0.43154454231262207
    },
    {
      "epoch": 0.000236663818359375,
      "model_forward_time": 0.11473870277404785,
      "step": 38775
    },
    {
      "epoch": 0.000236663818359375,
      "step": 38775,
      "training_step_time": 0.3964574337005615
    },
    {
      "epoch": 0.000236669921875,
      "model_forward_time": 0.11510992050170898,
      "step": 38776
    },
    {
      "epoch": 0.000236669921875,
      "step": 38776,
      "training_step_time": 0.3991105556488037
    },
    {
      "epoch": 0.000236676025390625,
      "model_forward_time": 0.1145322322845459,
      "step": 38777
    },
    {
      "epoch": 0.000236676025390625,
      "step": 38777,
      "training_step_time": 0.4326465129852295
    },
    {
      "epoch": 0.00023668212890625,
      "model_forward_time": 0.11561274528503418,
      "step": 38778
    },
    {
      "epoch": 0.00023668212890625,
      "step": 38778,
      "training_step_time": 0.47402405738830566
    },
    {
      "epoch": 0.000236688232421875,
      "model_forward_time": 0.11504912376403809,
      "step": 38779
    },
    {
      "epoch": 0.000236688232421875,
      "step": 38779,
      "training_step_time": 0.4121286869049072
    },
    {
      "epoch": 0.0002366943359375,
      "grad_norm": 0.13828198611736298,
      "learning_rate": 3.0471899610706038e-05,
      "loss": 0.0407,
      "step": 38780
    },
    {
      "epoch": 0.0002366943359375,
      "model_forward_time": 0.1148977279663086,
      "step": 38780
    },
    {
      "epoch": 0.0002366943359375,
      "step": 38780,
      "training_step_time": 0.49996066093444824
    },
    {
      "epoch": 0.000236700439453125,
      "model_forward_time": 0.11496257781982422,
      "step": 38781
    },
    {
      "epoch": 0.000236700439453125,
      "step": 38781,
      "training_step_time": 0.42281436920166016
    },
    {
      "epoch": 0.00023670654296875,
      "model_forward_time": 0.11491775512695312,
      "step": 38782
    },
    {
      "epoch": 0.00023670654296875,
      "step": 38782,
      "training_step_time": 0.39473962783813477
    },
    {
      "epoch": 0.000236712646484375,
      "model_forward_time": 0.11506509780883789,
      "step": 38783
    },
    {
      "epoch": 0.000236712646484375,
      "step": 38783,
      "training_step_time": 0.40402770042419434
    },
    {
      "epoch": 0.00023671875,
      "model_forward_time": 0.11472296714782715,
      "step": 38784
    },
    {
      "epoch": 0.00023671875,
      "step": 38784,
      "training_step_time": 0.4232163429260254
    },
    {
      "epoch": 0.000236724853515625,
      "model_forward_time": 0.11549019813537598,
      "step": 38785
    },
    {
      "epoch": 0.000236724853515625,
      "step": 38785,
      "training_step_time": 0.3921642303466797
    },
    {
      "epoch": 0.00023673095703125,
      "model_forward_time": 0.11524200439453125,
      "step": 38786
    },
    {
      "epoch": 0.00023673095703125,
      "step": 38786,
      "training_step_time": 0.3946363925933838
    },
    {
      "epoch": 0.000236737060546875,
      "model_forward_time": 0.11448955535888672,
      "step": 38787
    },
    {
      "epoch": 0.000236737060546875,
      "step": 38787,
      "training_step_time": 0.47826218605041504
    },
    {
      "epoch": 0.0002367431640625,
      "model_forward_time": 0.11516141891479492,
      "step": 38788
    },
    {
      "epoch": 0.0002367431640625,
      "step": 38788,
      "training_step_time": 0.4242286682128906
    },
    {
      "epoch": 0.000236749267578125,
      "model_forward_time": 0.11512064933776855,
      "step": 38789
    },
    {
      "epoch": 0.000236749267578125,
      "step": 38789,
      "training_step_time": 0.4734621047973633
    },
    {
      "epoch": 0.00023675537109375,
      "grad_norm": 0.08342114835977554,
      "learning_rate": 3.0446533483712304e-05,
      "loss": 0.0433,
      "step": 38790
    },
    {
      "epoch": 0.00023675537109375,
      "model_forward_time": 0.11403536796569824,
      "step": 38790
    },
    {
      "epoch": 0.00023675537109375,
      "step": 38790,
      "training_step_time": 0.3918917179107666
    },
    {
      "epoch": 0.000236761474609375,
      "model_forward_time": 0.11523938179016113,
      "step": 38791
    },
    {
      "epoch": 0.000236761474609375,
      "step": 38791,
      "training_step_time": 0.42819857597351074
    },
    {
      "epoch": 0.000236767578125,
      "model_forward_time": 0.11479592323303223,
      "step": 38792
    },
    {
      "epoch": 0.000236767578125,
      "step": 38792,
      "training_step_time": 0.4117133617401123
    },
    {
      "epoch": 0.000236773681640625,
      "model_forward_time": 0.11492347717285156,
      "step": 38793
    },
    {
      "epoch": 0.000236773681640625,
      "step": 38793,
      "training_step_time": 0.38706159591674805
    },
    {
      "epoch": 0.00023677978515625,
      "model_forward_time": 0.11490035057067871,
      "step": 38794
    },
    {
      "epoch": 0.00023677978515625,
      "step": 38794,
      "training_step_time": 0.4300050735473633
    },
    {
      "epoch": 0.000236785888671875,
      "model_forward_time": 0.11478734016418457,
      "step": 38795
    },
    {
      "epoch": 0.000236785888671875,
      "step": 38795,
      "training_step_time": 0.41623544692993164
    },
    {
      "epoch": 0.0002367919921875,
      "model_forward_time": 0.11542367935180664,
      "step": 38796
    },
    {
      "epoch": 0.0002367919921875,
      "step": 38796,
      "training_step_time": 0.39810776710510254
    },
    {
      "epoch": 0.000236798095703125,
      "model_forward_time": 0.11501479148864746,
      "step": 38797
    },
    {
      "epoch": 0.000236798095703125,
      "step": 38797,
      "training_step_time": 0.42667150497436523
    },
    {
      "epoch": 0.00023680419921875,
      "model_forward_time": 0.11521172523498535,
      "step": 38798
    },
    {
      "epoch": 0.00023680419921875,
      "step": 38798,
      "training_step_time": 0.3895263671875
    },
    {
      "epoch": 0.000236810302734375,
      "model_forward_time": 0.11456751823425293,
      "step": 38799
    },
    {
      "epoch": 0.000236810302734375,
      "step": 38799,
      "training_step_time": 0.39068603515625
    },
    {
      "epoch": 0.00023681640625,
      "grad_norm": 0.15050844848155975,
      "learning_rate": 3.042117329654544e-05,
      "loss": 0.0387,
      "step": 38800
    },
    {
      "epoch": 0.00023681640625,
      "model_forward_time": 0.11502432823181152,
      "step": 38800
    },
    {
      "epoch": 0.00023681640625,
      "step": 38800,
      "training_step_time": 0.3898296356201172
    },
    {
      "epoch": 0.000236822509765625,
      "model_forward_time": 0.1151881217956543,
      "step": 38801
    },
    {
      "epoch": 0.000236822509765625,
      "step": 38801,
      "training_step_time": 0.49942493438720703
    },
    {
      "epoch": 0.00023682861328125,
      "model_forward_time": 0.11508035659790039,
      "step": 38802
    },
    {
      "epoch": 0.00023682861328125,
      "step": 38802,
      "training_step_time": 0.4282832145690918
    },
    {
      "epoch": 0.000236834716796875,
      "model_forward_time": 0.11506366729736328,
      "step": 38803
    },
    {
      "epoch": 0.000236834716796875,
      "step": 38803,
      "training_step_time": 0.4928267002105713
    },
    {
      "epoch": 0.0002368408203125,
      "model_forward_time": 0.11572742462158203,
      "step": 38804
    },
    {
      "epoch": 0.0002368408203125,
      "step": 38804,
      "training_step_time": 0.3782203197479248
    },
    {
      "epoch": 0.000236846923828125,
      "model_forward_time": 0.11472773551940918,
      "step": 38805
    },
    {
      "epoch": 0.000236846923828125,
      "step": 38805,
      "training_step_time": 0.3952963352203369
    },
    {
      "epoch": 0.00023685302734375,
      "model_forward_time": 0.11527180671691895,
      "step": 38806
    },
    {
      "epoch": 0.00023685302734375,
      "step": 38806,
      "training_step_time": 0.47029590606689453
    },
    {
      "epoch": 0.000236859130859375,
      "model_forward_time": 0.11517691612243652,
      "step": 38807
    },
    {
      "epoch": 0.000236859130859375,
      "step": 38807,
      "training_step_time": 0.45304179191589355
    },
    {
      "epoch": 0.000236865234375,
      "model_forward_time": 0.11524772644042969,
      "step": 38808
    },
    {
      "epoch": 0.000236865234375,
      "step": 38808,
      "training_step_time": 0.454282283782959
    },
    {
      "epoch": 0.000236871337890625,
      "model_forward_time": 0.1152489185333252,
      "step": 38809
    },
    {
      "epoch": 0.000236871337890625,
      "step": 38809,
      "training_step_time": 0.4997880458831787
    },
    {
      "epoch": 0.00023687744140625,
      "grad_norm": 0.14354927837848663,
      "learning_rate": 3.039581905690916e-05,
      "loss": 0.0401,
      "step": 38810
    },
    {
      "epoch": 0.00023687744140625,
      "model_forward_time": 0.11553525924682617,
      "step": 38810
    },
    {
      "epoch": 0.00023687744140625,
      "step": 38810,
      "training_step_time": 0.40935754776000977
    },
    {
      "epoch": 0.000236883544921875,
      "model_forward_time": 0.11504817008972168,
      "step": 38811
    },
    {
      "epoch": 0.000236883544921875,
      "step": 38811,
      "training_step_time": 0.39040279388427734
    },
    {
      "epoch": 0.0002368896484375,
      "model_forward_time": 0.11512446403503418,
      "step": 38812
    },
    {
      "epoch": 0.0002368896484375,
      "step": 38812,
      "training_step_time": 0.40775489807128906
    },
    {
      "epoch": 0.000236895751953125,
      "model_forward_time": 0.11567115783691406,
      "step": 38813
    },
    {
      "epoch": 0.000236895751953125,
      "step": 38813,
      "training_step_time": 0.39490771293640137
    },
    {
      "epoch": 0.00023690185546875,
      "model_forward_time": 0.11556196212768555,
      "step": 38814
    },
    {
      "epoch": 0.00023690185546875,
      "step": 38814,
      "training_step_time": 0.38260483741760254
    },
    {
      "epoch": 0.000236907958984375,
      "model_forward_time": 0.11471891403198242,
      "step": 38815
    },
    {
      "epoch": 0.000236907958984375,
      "step": 38815,
      "training_step_time": 0.4315354824066162
    },
    {
      "epoch": 0.0002369140625,
      "model_forward_time": 0.11474084854125977,
      "step": 38816
    },
    {
      "epoch": 0.0002369140625,
      "step": 38816,
      "training_step_time": 0.3867640495300293
    },
    {
      "epoch": 0.000236920166015625,
      "model_forward_time": 0.11513113975524902,
      "step": 38817
    },
    {
      "epoch": 0.000236920166015625,
      "step": 38817,
      "training_step_time": 0.47612547874450684
    },
    {
      "epoch": 0.00023692626953125,
      "model_forward_time": 0.11605453491210938,
      "step": 38818
    },
    {
      "epoch": 0.00023692626953125,
      "step": 38818,
      "training_step_time": 0.3859903812408447
    },
    {
      "epoch": 0.000236932373046875,
      "model_forward_time": 0.114837646484375,
      "step": 38819
    },
    {
      "epoch": 0.000236932373046875,
      "step": 38819,
      "training_step_time": 0.39118313789367676
    },
    {
      "epoch": 0.0002369384765625,
      "grad_norm": 0.10220202058553696,
      "learning_rate": 3.0370470772505433e-05,
      "loss": 0.0397,
      "step": 38820
    },
    {
      "epoch": 0.0002369384765625,
      "model_forward_time": 0.11519861221313477,
      "step": 38820
    },
    {
      "epoch": 0.0002369384765625,
      "step": 38820,
      "training_step_time": 0.3989553451538086
    },
    {
      "epoch": 0.000236944580078125,
      "model_forward_time": 0.11416864395141602,
      "step": 38821
    },
    {
      "epoch": 0.000236944580078125,
      "step": 38821,
      "training_step_time": 0.40399622917175293
    },
    {
      "epoch": 0.00023695068359375,
      "model_forward_time": 0.11568832397460938,
      "step": 38822
    },
    {
      "epoch": 0.00023695068359375,
      "step": 38822,
      "training_step_time": 0.4810478687286377
    },
    {
      "epoch": 0.000236956787109375,
      "model_forward_time": 0.1150820255279541,
      "step": 38823
    },
    {
      "epoch": 0.000236956787109375,
      "step": 38823,
      "training_step_time": 0.47852444648742676
    },
    {
      "epoch": 0.000236962890625,
      "model_forward_time": 0.11506080627441406,
      "step": 38824
    },
    {
      "epoch": 0.000236962890625,
      "step": 38824,
      "training_step_time": 0.38966917991638184
    },
    {
      "epoch": 0.000236968994140625,
      "model_forward_time": 0.1147458553314209,
      "step": 38825
    },
    {
      "epoch": 0.000236968994140625,
      "step": 38825,
      "training_step_time": 0.428180456161499
    },
    {
      "epoch": 0.00023697509765625,
      "model_forward_time": 0.11503195762634277,
      "step": 38826
    },
    {
      "epoch": 0.00023697509765625,
      "step": 38826,
      "training_step_time": 0.3887507915496826
    },
    {
      "epoch": 0.000236981201171875,
      "model_forward_time": 0.11570882797241211,
      "step": 38827
    },
    {
      "epoch": 0.000236981201171875,
      "step": 38827,
      "training_step_time": 0.393155574798584
    },
    {
      "epoch": 0.0002369873046875,
      "model_forward_time": 0.11436843872070312,
      "step": 38828
    },
    {
      "epoch": 0.0002369873046875,
      "step": 38828,
      "training_step_time": 0.4006493091583252
    },
    {
      "epoch": 0.000236993408203125,
      "model_forward_time": 0.11513853073120117,
      "step": 38829
    },
    {
      "epoch": 0.000236993408203125,
      "step": 38829,
      "training_step_time": 0.3906137943267822
    },
    {
      "epoch": 0.00023699951171875,
      "grad_norm": 0.11120644211769104,
      "learning_rate": 3.034512845103441e-05,
      "loss": 0.0355,
      "step": 38830
    },
    {
      "epoch": 0.00023699951171875,
      "model_forward_time": 0.11582326889038086,
      "step": 38830
    },
    {
      "epoch": 0.00023699951171875,
      "step": 38830,
      "training_step_time": 0.3975071907043457
    },
    {
      "epoch": 0.000237005615234375,
      "model_forward_time": 0.11513447761535645,
      "step": 38831
    },
    {
      "epoch": 0.000237005615234375,
      "step": 38831,
      "training_step_time": 0.42984795570373535
    },
    {
      "epoch": 0.00023701171875,
      "model_forward_time": 0.11460542678833008,
      "step": 38832
    },
    {
      "epoch": 0.00023701171875,
      "step": 38832,
      "training_step_time": 0.48930835723876953
    },
    {
      "epoch": 0.000237017822265625,
      "model_forward_time": 0.11499214172363281,
      "step": 38833
    },
    {
      "epoch": 0.000237017822265625,
      "step": 38833,
      "training_step_time": 0.39464592933654785
    },
    {
      "epoch": 0.00023702392578125,
      "model_forward_time": 0.11529278755187988,
      "step": 38834
    },
    {
      "epoch": 0.00023702392578125,
      "step": 38834,
      "training_step_time": 0.3847787380218506
    },
    {
      "epoch": 0.000237030029296875,
      "model_forward_time": 0.1151125431060791,
      "step": 38835
    },
    {
      "epoch": 0.000237030029296875,
      "step": 38835,
      "training_step_time": 0.4208352565765381
    },
    {
      "epoch": 0.0002370361328125,
      "model_forward_time": 0.11516690254211426,
      "step": 38836
    },
    {
      "epoch": 0.0002370361328125,
      "step": 38836,
      "training_step_time": 0.40287280082702637
    },
    {
      "epoch": 0.000237042236328125,
      "model_forward_time": 0.11552643775939941,
      "step": 38837
    },
    {
      "epoch": 0.000237042236328125,
      "step": 38837,
      "training_step_time": 0.5016798973083496
    },
    {
      "epoch": 0.00023704833984375,
      "model_forward_time": 0.11494803428649902,
      "step": 38838
    },
    {
      "epoch": 0.00023704833984375,
      "step": 38838,
      "training_step_time": 0.4882938861846924
    },
    {
      "epoch": 0.000237054443359375,
      "model_forward_time": 0.11452126502990723,
      "step": 38839
    },
    {
      "epoch": 0.000237054443359375,
      "step": 38839,
      "training_step_time": 0.38897252082824707
    },
    {
      "epoch": 0.000237060546875,
      "grad_norm": 0.12807512283325195,
      "learning_rate": 3.03197921001944e-05,
      "loss": 0.0439,
      "step": 38840
    },
    {
      "epoch": 0.000237060546875,
      "model_forward_time": 0.11466646194458008,
      "step": 38840
    },
    {
      "epoch": 0.000237060546875,
      "step": 38840,
      "training_step_time": 0.3863053321838379
    },
    {
      "epoch": 0.000237066650390625,
      "model_forward_time": 0.11619234085083008,
      "step": 38841
    },
    {
      "epoch": 0.000237066650390625,
      "step": 38841,
      "training_step_time": 0.38596081733703613
    },
    {
      "epoch": 0.00023707275390625,
      "model_forward_time": 0.11446285247802734,
      "step": 38842
    },
    {
      "epoch": 0.00023707275390625,
      "step": 38842,
      "training_step_time": 0.3907139301300049
    },
    {
      "epoch": 0.000237078857421875,
      "model_forward_time": 0.1158454418182373,
      "step": 38843
    },
    {
      "epoch": 0.000237078857421875,
      "step": 38843,
      "training_step_time": 0.39370179176330566
    },
    {
      "epoch": 0.0002370849609375,
      "model_forward_time": 0.11498355865478516,
      "step": 38844
    },
    {
      "epoch": 0.0002370849609375,
      "step": 38844,
      "training_step_time": 0.393857479095459
    },
    {
      "epoch": 0.000237091064453125,
      "model_forward_time": 0.11576628684997559,
      "step": 38845
    },
    {
      "epoch": 0.000237091064453125,
      "step": 38845,
      "training_step_time": 0.40405941009521484
    },
    {
      "epoch": 0.00023709716796875,
      "model_forward_time": 0.1152029037475586,
      "step": 38846
    },
    {
      "epoch": 0.00023709716796875,
      "step": 38846,
      "training_step_time": 0.4849097728729248
    },
    {
      "epoch": 0.000237103271484375,
      "model_forward_time": 0.11543512344360352,
      "step": 38847
    },
    {
      "epoch": 0.000237103271484375,
      "step": 38847,
      "training_step_time": 0.3995704650878906
    },
    {
      "epoch": 0.000237109375,
      "model_forward_time": 0.1154484748840332,
      "step": 38848
    },
    {
      "epoch": 0.000237109375,
      "step": 38848,
      "training_step_time": 0.3945198059082031
    },
    {
      "epoch": 0.000237115478515625,
      "model_forward_time": 0.11526870727539062,
      "step": 38849
    },
    {
      "epoch": 0.000237115478515625,
      "step": 38849,
      "training_step_time": 0.46050190925598145
    },
    {
      "epoch": 0.00023712158203125,
      "grad_norm": 0.13432466983795166,
      "learning_rate": 3.0294461727681932e-05,
      "loss": 0.0372,
      "step": 38850
    },
    {
      "epoch": 0.00023712158203125,
      "model_forward_time": 0.11506104469299316,
      "step": 38850
    },
    {
      "epoch": 0.00023712158203125,
      "step": 38850,
      "training_step_time": 0.4362761974334717
    },
    {
      "epoch": 0.000237127685546875,
      "model_forward_time": 0.11455917358398438,
      "step": 38851
    },
    {
      "epoch": 0.000237127685546875,
      "step": 38851,
      "training_step_time": 0.3669321537017822
    },
    {
      "epoch": 0.0002371337890625,
      "model_forward_time": 0.11495304107666016,
      "step": 38852
    },
    {
      "epoch": 0.0002371337890625,
      "step": 38852,
      "training_step_time": 0.46274781227111816
    },
    {
      "epoch": 0.000237139892578125,
      "model_forward_time": 0.11536478996276855,
      "step": 38853
    },
    {
      "epoch": 0.000237139892578125,
      "step": 38853,
      "training_step_time": 0.4153006076812744
    },
    {
      "epoch": 0.00023714599609375,
      "model_forward_time": 0.11525297164916992,
      "step": 38854
    },
    {
      "epoch": 0.00023714599609375,
      "step": 38854,
      "training_step_time": 0.38530445098876953
    },
    {
      "epoch": 0.000237152099609375,
      "model_forward_time": 0.11448812484741211,
      "step": 38855
    },
    {
      "epoch": 0.000237152099609375,
      "step": 38855,
      "training_step_time": 0.3875107765197754
    },
    {
      "epoch": 0.000237158203125,
      "model_forward_time": 0.11495757102966309,
      "step": 38856
    },
    {
      "epoch": 0.000237158203125,
      "step": 38856,
      "training_step_time": 0.3947625160217285
    },
    {
      "epoch": 0.000237164306640625,
      "model_forward_time": 0.11531996726989746,
      "step": 38857
    },
    {
      "epoch": 0.000237164306640625,
      "step": 38857,
      "training_step_time": 0.39438557624816895
    },
    {
      "epoch": 0.00023717041015625,
      "model_forward_time": 0.11535763740539551,
      "step": 38858
    },
    {
      "epoch": 0.00023717041015625,
      "step": 38858,
      "training_step_time": 0.38995981216430664
    },
    {
      "epoch": 0.000237176513671875,
      "model_forward_time": 0.11518144607543945,
      "step": 38859
    },
    {
      "epoch": 0.000237176513671875,
      "step": 38859,
      "training_step_time": 0.4907345771789551
    },
    {
      "epoch": 0.0002371826171875,
      "grad_norm": 0.1577576845884323,
      "learning_rate": 3.0269137341191677e-05,
      "loss": 0.0387,
      "step": 38860
    },
    {
      "epoch": 0.0002371826171875,
      "model_forward_time": 0.11502242088317871,
      "step": 38860
    },
    {
      "epoch": 0.0002371826171875,
      "step": 38860,
      "training_step_time": 0.45073580741882324
    },
    {
      "epoch": 0.000237188720703125,
      "model_forward_time": 0.11499428749084473,
      "step": 38861
    },
    {
      "epoch": 0.000237188720703125,
      "step": 38861,
      "training_step_time": 0.39676809310913086
    },
    {
      "epoch": 0.00023719482421875,
      "model_forward_time": 0.11510944366455078,
      "step": 38862
    },
    {
      "epoch": 0.00023719482421875,
      "step": 38862,
      "training_step_time": 0.4000699520111084
    },
    {
      "epoch": 0.000237200927734375,
      "model_forward_time": 0.11499547958374023,
      "step": 38863
    },
    {
      "epoch": 0.000237200927734375,
      "step": 38863,
      "training_step_time": 0.3970048427581787
    },
    {
      "epoch": 0.00023720703125,
      "model_forward_time": 0.11628031730651855,
      "step": 38864
    },
    {
      "epoch": 0.00023720703125,
      "step": 38864,
      "training_step_time": 0.3907012939453125
    },
    {
      "epoch": 0.000237213134765625,
      "model_forward_time": 0.11511421203613281,
      "step": 38865
    },
    {
      "epoch": 0.000237213134765625,
      "step": 38865,
      "training_step_time": 0.40927910804748535
    },
    {
      "epoch": 0.00023721923828125,
      "model_forward_time": 0.11522269248962402,
      "step": 38866
    },
    {
      "epoch": 0.00023721923828125,
      "step": 38866,
      "training_step_time": 0.44675493240356445
    },
    {
      "epoch": 0.000237225341796875,
      "model_forward_time": 0.11525416374206543,
      "step": 38867
    },
    {
      "epoch": 0.000237225341796875,
      "step": 38867,
      "training_step_time": 0.5125465393066406
    },
    {
      "epoch": 0.0002372314453125,
      "model_forward_time": 0.11501216888427734,
      "step": 38868
    },
    {
      "epoch": 0.0002372314453125,
      "step": 38868,
      "training_step_time": 0.4185295104980469
    },
    {
      "epoch": 0.000237237548828125,
      "model_forward_time": 0.11432647705078125,
      "step": 38869
    },
    {
      "epoch": 0.000237237548828125,
      "step": 38869,
      "training_step_time": 0.3885674476623535
    },
    {
      "epoch": 0.00023724365234375,
      "grad_norm": 0.14872033894062042,
      "learning_rate": 3.0243818948416543e-05,
      "loss": 0.0457,
      "step": 38870
    },
    {
      "epoch": 0.00023724365234375,
      "model_forward_time": 0.11541438102722168,
      "step": 38870
    },
    {
      "epoch": 0.00023724365234375,
      "step": 38870,
      "training_step_time": 0.391284704208374
    },
    {
      "epoch": 0.000237249755859375,
      "model_forward_time": 0.11542272567749023,
      "step": 38871
    },
    {
      "epoch": 0.000237249755859375,
      "step": 38871,
      "training_step_time": 0.3919401168823242
    },
    {
      "epoch": 0.000237255859375,
      "model_forward_time": 0.11564159393310547,
      "step": 38872
    },
    {
      "epoch": 0.000237255859375,
      "step": 38872,
      "training_step_time": 0.392899751663208
    },
    {
      "epoch": 0.000237261962890625,
      "model_forward_time": 0.1154778003692627,
      "step": 38873
    },
    {
      "epoch": 0.000237261962890625,
      "step": 38873,
      "training_step_time": 0.39449214935302734
    },
    {
      "epoch": 0.00023726806640625,
      "model_forward_time": 0.11515426635742188,
      "step": 38874
    },
    {
      "epoch": 0.00023726806640625,
      "step": 38874,
      "training_step_time": 0.41187596321105957
    },
    {
      "epoch": 0.000237274169921875,
      "model_forward_time": 0.11539173126220703,
      "step": 38875
    },
    {
      "epoch": 0.000237274169921875,
      "step": 38875,
      "training_step_time": 0.4593179225921631
    },
    {
      "epoch": 0.0002372802734375,
      "model_forward_time": 0.11482930183410645,
      "step": 38876
    },
    {
      "epoch": 0.0002372802734375,
      "step": 38876,
      "training_step_time": 0.3928382396697998
    },
    {
      "epoch": 0.000237286376953125,
      "model_forward_time": 0.11588430404663086,
      "step": 38877
    },
    {
      "epoch": 0.000237286376953125,
      "step": 38877,
      "training_step_time": 0.38973402976989746
    },
    {
      "epoch": 0.00023729248046875,
      "model_forward_time": 0.11551499366760254,
      "step": 38878
    },
    {
      "epoch": 0.00023729248046875,
      "step": 38878,
      "training_step_time": 0.4461052417755127
    },
    {
      "epoch": 0.000237298583984375,
      "model_forward_time": 0.11521172523498535,
      "step": 38879
    },
    {
      "epoch": 0.000237298583984375,
      "step": 38879,
      "training_step_time": 0.4010274410247803
    },
    {
      "epoch": 0.0002373046875,
      "grad_norm": 0.10690687596797943,
      "learning_rate": 3.0218506557047598e-05,
      "loss": 0.0426,
      "step": 38880
    },
    {
      "epoch": 0.0002373046875,
      "model_forward_time": 0.11505937576293945,
      "step": 38880
    },
    {
      "epoch": 0.0002373046875,
      "step": 38880,
      "training_step_time": 0.367570161819458
    },
    {
      "epoch": 0.000237310791015625,
      "model_forward_time": 0.11543750762939453,
      "step": 38881
    },
    {
      "epoch": 0.000237310791015625,
      "step": 38881,
      "training_step_time": 0.42282700538635254
    },
    {
      "epoch": 0.00023731689453125,
      "model_forward_time": 0.11567401885986328,
      "step": 38882
    },
    {
      "epoch": 0.00023731689453125,
      "step": 38882,
      "training_step_time": 0.5115706920623779
    },
    {
      "epoch": 0.000237322998046875,
      "model_forward_time": 0.11491179466247559,
      "step": 38883
    },
    {
      "epoch": 0.000237322998046875,
      "step": 38883,
      "training_step_time": 0.38784003257751465
    },
    {
      "epoch": 0.0002373291015625,
      "model_forward_time": 0.11558198928833008,
      "step": 38884
    },
    {
      "epoch": 0.0002373291015625,
      "step": 38884,
      "training_step_time": 0.39092540740966797
    },
    {
      "epoch": 0.000237335205078125,
      "model_forward_time": 0.11603808403015137,
      "step": 38885
    },
    {
      "epoch": 0.000237335205078125,
      "step": 38885,
      "training_step_time": 0.39179277420043945
    },
    {
      "epoch": 0.00023734130859375,
      "model_forward_time": 0.115142822265625,
      "step": 38886
    },
    {
      "epoch": 0.00023734130859375,
      "step": 38886,
      "training_step_time": 0.4002394676208496
    },
    {
      "epoch": 0.000237347412109375,
      "model_forward_time": 0.1142728328704834,
      "step": 38887
    },
    {
      "epoch": 0.000237347412109375,
      "step": 38887,
      "training_step_time": 0.3976480960845947
    },
    {
      "epoch": 0.000237353515625,
      "model_forward_time": 0.11530399322509766,
      "step": 38888
    },
    {
      "epoch": 0.000237353515625,
      "step": 38888,
      "training_step_time": 0.436107873916626
    },
    {
      "epoch": 0.000237359619140625,
      "model_forward_time": 0.11512923240661621,
      "step": 38889
    },
    {
      "epoch": 0.000237359619140625,
      "step": 38889,
      "training_step_time": 0.38638949394226074
    },
    {
      "epoch": 0.00023736572265625,
      "grad_norm": 0.11062881350517273,
      "learning_rate": 3.0193200174774038e-05,
      "loss": 0.0367,
      "step": 38890
    },
    {
      "epoch": 0.00023736572265625,
      "model_forward_time": 0.11548376083374023,
      "step": 38890
    },
    {
      "epoch": 0.00023736572265625,
      "step": 38890,
      "training_step_time": 0.464282751083374
    },
    {
      "epoch": 0.000237371826171875,
      "model_forward_time": 0.11489486694335938,
      "step": 38891
    },
    {
      "epoch": 0.000237371826171875,
      "step": 38891,
      "training_step_time": 0.3851139545440674
    },
    {
      "epoch": 0.0002373779296875,
      "model_forward_time": 0.11515450477600098,
      "step": 38892
    },
    {
      "epoch": 0.0002373779296875,
      "step": 38892,
      "training_step_time": 0.40740203857421875
    },
    {
      "epoch": 0.000237384033203125,
      "model_forward_time": 0.11534380912780762,
      "step": 38893
    },
    {
      "epoch": 0.000237384033203125,
      "step": 38893,
      "training_step_time": 0.420060396194458
    },
    {
      "epoch": 0.00023739013671875,
      "model_forward_time": 0.1146385669708252,
      "step": 38894
    },
    {
      "epoch": 0.00023739013671875,
      "step": 38894,
      "training_step_time": 0.4329679012298584
    },
    {
      "epoch": 0.000237396240234375,
      "model_forward_time": 0.11490273475646973,
      "step": 38895
    },
    {
      "epoch": 0.000237396240234375,
      "step": 38895,
      "training_step_time": 0.48665475845336914
    },
    {
      "epoch": 0.00023740234375,
      "model_forward_time": 0.1157994270324707,
      "step": 38896
    },
    {
      "epoch": 0.00023740234375,
      "step": 38896,
      "training_step_time": 0.41554880142211914
    },
    {
      "epoch": 0.000237408447265625,
      "model_forward_time": 0.1149284839630127,
      "step": 38897
    },
    {
      "epoch": 0.000237408447265625,
      "step": 38897,
      "training_step_time": 0.46585512161254883
    },
    {
      "epoch": 0.00023741455078125,
      "model_forward_time": 0.11541604995727539,
      "step": 38898
    },
    {
      "epoch": 0.00023741455078125,
      "step": 38898,
      "training_step_time": 0.3912777900695801
    },
    {
      "epoch": 0.000237420654296875,
      "model_forward_time": 0.11452412605285645,
      "step": 38899
    },
    {
      "epoch": 0.000237420654296875,
      "step": 38899,
      "training_step_time": 0.3915879726409912
    },
    {
      "epoch": 0.0002374267578125,
      "grad_norm": 0.11571535468101501,
      "learning_rate": 3.0167899809283308e-05,
      "loss": 0.0337,
      "step": 38900
    },
    {
      "epoch": 0.0002374267578125,
      "model_forward_time": 0.11539888381958008,
      "step": 38900
    },
    {
      "epoch": 0.0002374267578125,
      "step": 38900,
      "training_step_time": 0.3883368968963623
    },
    {
      "epoch": 0.000237432861328125,
      "model_forward_time": 0.11509919166564941,
      "step": 38901
    },
    {
      "epoch": 0.000237432861328125,
      "step": 38901,
      "training_step_time": 0.41367149353027344
    },
    {
      "epoch": 0.00023743896484375,
      "model_forward_time": 0.1156623363494873,
      "step": 38902
    },
    {
      "epoch": 0.00023743896484375,
      "step": 38902,
      "training_step_time": 0.38858556747436523
    },
    {
      "epoch": 0.000237445068359375,
      "model_forward_time": 0.11604094505310059,
      "step": 38903
    },
    {
      "epoch": 0.000237445068359375,
      "step": 38903,
      "training_step_time": 0.48390817642211914
    },
    {
      "epoch": 0.000237451171875,
      "model_forward_time": 0.11522507667541504,
      "step": 38904
    },
    {
      "epoch": 0.000237451171875,
      "step": 38904,
      "training_step_time": 0.45363616943359375
    },
    {
      "epoch": 0.000237457275390625,
      "model_forward_time": 0.11548423767089844,
      "step": 38905
    },
    {
      "epoch": 0.000237457275390625,
      "step": 38905,
      "training_step_time": 0.39762043952941895
    },
    {
      "epoch": 0.00023746337890625,
      "model_forward_time": 0.11469793319702148,
      "step": 38906
    },
    {
      "epoch": 0.00023746337890625,
      "step": 38906,
      "training_step_time": 0.3876783847808838
    },
    {
      "epoch": 0.000237469482421875,
      "model_forward_time": 0.11475324630737305,
      "step": 38907
    },
    {
      "epoch": 0.000237469482421875,
      "step": 38907,
      "training_step_time": 0.43403148651123047
    },
    {
      "epoch": 0.0002374755859375,
      "model_forward_time": 0.11500358581542969,
      "step": 38908
    },
    {
      "epoch": 0.0002374755859375,
      "step": 38908,
      "training_step_time": 0.46587491035461426
    },
    {
      "epoch": 0.000237481689453125,
      "model_forward_time": 0.114898681640625,
      "step": 38909
    },
    {
      "epoch": 0.000237481689453125,
      "step": 38909,
      "training_step_time": 0.36930179595947266
    },
    {
      "epoch": 0.00023748779296875,
      "grad_norm": 0.1053362488746643,
      "learning_rate": 3.0142605468260978e-05,
      "loss": 0.0384,
      "step": 38910
    },
    {
      "epoch": 0.00023748779296875,
      "model_forward_time": 0.11507463455200195,
      "step": 38910
    },
    {
      "epoch": 0.00023748779296875,
      "step": 38910,
      "training_step_time": 0.42824840545654297
    },
    {
      "epoch": 0.000237493896484375,
      "model_forward_time": 0.11577701568603516,
      "step": 38911
    },
    {
      "epoch": 0.000237493896484375,
      "step": 38911,
      "training_step_time": 0.42292213439941406
    },
    {
      "epoch": 0.0002375,
      "model_forward_time": 0.11581730842590332,
      "step": 38912
    },
    {
      "epoch": 0.0002375,
      "step": 38912,
      "training_step_time": 0.3862335681915283
    },
    {
      "epoch": 0.000237506103515625,
      "model_forward_time": 0.11543011665344238,
      "step": 38913
    },
    {
      "epoch": 0.000237506103515625,
      "step": 38913,
      "training_step_time": 0.3790583610534668
    },
    {
      "epoch": 0.00023751220703125,
      "model_forward_time": 0.11505579948425293,
      "step": 38914
    },
    {
      "epoch": 0.00023751220703125,
      "step": 38914,
      "training_step_time": 0.37862300872802734
    },
    {
      "epoch": 0.000237518310546875,
      "model_forward_time": 0.11547636985778809,
      "step": 38915
    },
    {
      "epoch": 0.000237518310546875,
      "step": 38915,
      "training_step_time": 0.38999319076538086
    },
    {
      "epoch": 0.0002375244140625,
      "model_forward_time": 0.11601543426513672,
      "step": 38916
    },
    {
      "epoch": 0.0002375244140625,
      "step": 38916,
      "training_step_time": 0.4042370319366455
    },
    {
      "epoch": 0.000237530517578125,
      "model_forward_time": 0.11471748352050781,
      "step": 38917
    },
    {
      "epoch": 0.000237530517578125,
      "step": 38917,
      "training_step_time": 0.41405653953552246
    },
    {
      "epoch": 0.00023753662109375,
      "model_forward_time": 0.11553311347961426,
      "step": 38918
    },
    {
      "epoch": 0.00023753662109375,
      "step": 38918,
      "training_step_time": 0.42182493209838867
    },
    {
      "epoch": 0.000237542724609375,
      "model_forward_time": 0.11526966094970703,
      "step": 38919
    },
    {
      "epoch": 0.000237542724609375,
      "step": 38919,
      "training_step_time": 0.4916229248046875
    },
    {
      "epoch": 0.000237548828125,
      "grad_norm": 0.14410488307476044,
      "learning_rate": 3.0117317159390794e-05,
      "loss": 0.0387,
      "step": 38920
    },
    {
      "epoch": 0.000237548828125,
      "model_forward_time": 0.1143343448638916,
      "step": 38920
    },
    {
      "epoch": 0.000237548828125,
      "step": 38920,
      "training_step_time": 0.4237954616546631
    },
    {
      "epoch": 0.000237554931640625,
      "model_forward_time": 0.11533355712890625,
      "step": 38921
    },
    {
      "epoch": 0.000237554931640625,
      "step": 38921,
      "training_step_time": 0.39777326583862305
    },
    {
      "epoch": 0.00023756103515625,
      "model_forward_time": 0.11576199531555176,
      "step": 38922
    },
    {
      "epoch": 0.00023756103515625,
      "step": 38922,
      "training_step_time": 0.45238447189331055
    },
    {
      "epoch": 0.000237567138671875,
      "model_forward_time": 0.11487078666687012,
      "step": 38923
    },
    {
      "epoch": 0.000237567138671875,
      "step": 38923,
      "training_step_time": 0.38918328285217285
    },
    {
      "epoch": 0.0002375732421875,
      "model_forward_time": 0.11511588096618652,
      "step": 38924
    },
    {
      "epoch": 0.0002375732421875,
      "step": 38924,
      "training_step_time": 0.46086716651916504
    },
    {
      "epoch": 0.000237579345703125,
      "model_forward_time": 0.11579704284667969,
      "step": 38925
    },
    {
      "epoch": 0.000237579345703125,
      "step": 38925,
      "training_step_time": 0.47184252738952637
    },
    {
      "epoch": 0.00023758544921875,
      "model_forward_time": 0.11625027656555176,
      "step": 38926
    },
    {
      "epoch": 0.00023758544921875,
      "step": 38926,
      "training_step_time": 0.3995680809020996
    },
    {
      "epoch": 0.000237591552734375,
      "model_forward_time": 0.11487674713134766,
      "step": 38927
    },
    {
      "epoch": 0.000237591552734375,
      "step": 38927,
      "training_step_time": 0.3843202590942383
    },
    {
      "epoch": 0.00023759765625,
      "model_forward_time": 0.11535477638244629,
      "step": 38928
    },
    {
      "epoch": 0.00023759765625,
      "step": 38928,
      "training_step_time": 0.3840961456298828
    },
    {
      "epoch": 0.000237603759765625,
      "model_forward_time": 0.11590886116027832,
      "step": 38929
    },
    {
      "epoch": 0.000237603759765625,
      "step": 38929,
      "training_step_time": 0.3989999294281006
    },
    {
      "epoch": 0.00023760986328125,
      "grad_norm": 0.11151744425296783,
      "learning_rate": 3.0092034890354694e-05,
      "loss": 0.0372,
      "step": 38930
    },
    {
      "epoch": 0.00023760986328125,
      "model_forward_time": 0.11514902114868164,
      "step": 38930
    },
    {
      "epoch": 0.00023760986328125,
      "step": 38930,
      "training_step_time": 0.3798942565917969
    },
    {
      "epoch": 0.000237615966796875,
      "model_forward_time": 0.11513566970825195,
      "step": 38931
    },
    {
      "epoch": 0.000237615966796875,
      "step": 38931,
      "training_step_time": 0.3844144344329834
    },
    {
      "epoch": 0.0002376220703125,
      "model_forward_time": 0.1159520149230957,
      "step": 38932
    },
    {
      "epoch": 0.0002376220703125,
      "step": 38932,
      "training_step_time": 0.39607691764831543
    },
    {
      "epoch": 0.000237628173828125,
      "model_forward_time": 0.11493921279907227,
      "step": 38933
    },
    {
      "epoch": 0.000237628173828125,
      "step": 38933,
      "training_step_time": 0.4582366943359375
    },
    {
      "epoch": 0.00023763427734375,
      "model_forward_time": 0.11464285850524902,
      "step": 38934
    },
    {
      "epoch": 0.00023763427734375,
      "step": 38934,
      "training_step_time": 0.4429473876953125
    },
    {
      "epoch": 0.000237640380859375,
      "model_forward_time": 0.11572051048278809,
      "step": 38935
    },
    {
      "epoch": 0.000237640380859375,
      "step": 38935,
      "training_step_time": 0.4041421413421631
    },
    {
      "epoch": 0.000237646484375,
      "model_forward_time": 0.11690545082092285,
      "step": 38936
    },
    {
      "epoch": 0.000237646484375,
      "step": 38936,
      "training_step_time": 0.38335299491882324
    },
    {
      "epoch": 0.000237652587890625,
      "model_forward_time": 0.11525583267211914,
      "step": 38937
    },
    {
      "epoch": 0.000237652587890625,
      "step": 38937,
      "training_step_time": 0.5101301670074463
    },
    {
      "epoch": 0.00023765869140625,
      "model_forward_time": 0.11492180824279785,
      "step": 38938
    },
    {
      "epoch": 0.00023765869140625,
      "step": 38938,
      "training_step_time": 0.45106983184814453
    },
    {
      "epoch": 0.000237664794921875,
      "model_forward_time": 0.11509346961975098,
      "step": 38939
    },
    {
      "epoch": 0.000237664794921875,
      "step": 38939,
      "training_step_time": 0.4713590145111084
    },
    {
      "epoch": 0.0002376708984375,
      "grad_norm": 0.1279696375131607,
      "learning_rate": 3.006675866883275e-05,
      "loss": 0.0385,
      "step": 38940
    },
    {
      "epoch": 0.0002376708984375,
      "model_forward_time": 0.11461853981018066,
      "step": 38940
    },
    {
      "epoch": 0.0002376708984375,
      "step": 38940,
      "training_step_time": 0.4552628993988037
    },
    {
      "epoch": 0.000237677001953125,
      "model_forward_time": 0.11498212814331055,
      "step": 38941
    },
    {
      "epoch": 0.000237677001953125,
      "step": 38941,
      "training_step_time": 0.39168214797973633
    },
    {
      "epoch": 0.00023768310546875,
      "model_forward_time": 0.11436986923217773,
      "step": 38942
    },
    {
      "epoch": 0.00023768310546875,
      "step": 38942,
      "training_step_time": 0.38631319999694824
    },
    {
      "epoch": 0.000237689208984375,
      "model_forward_time": 0.1145322322845459,
      "step": 38943
    },
    {
      "epoch": 0.000237689208984375,
      "step": 38943,
      "training_step_time": 0.3803095817565918
    },
    {
      "epoch": 0.0002376953125,
      "model_forward_time": 0.11524605751037598,
      "step": 38944
    },
    {
      "epoch": 0.0002376953125,
      "step": 38944,
      "training_step_time": 0.3874843120574951
    },
    {
      "epoch": 0.000237701416015625,
      "model_forward_time": 0.1150369644165039,
      "step": 38945
    },
    {
      "epoch": 0.000237701416015625,
      "step": 38945,
      "training_step_time": 0.39263248443603516
    },
    {
      "epoch": 0.00023770751953125,
      "model_forward_time": 0.11599564552307129,
      "step": 38946
    },
    {
      "epoch": 0.00023770751953125,
      "step": 38946,
      "training_step_time": 0.46851682662963867
    },
    {
      "epoch": 0.000237713623046875,
      "model_forward_time": 0.11531639099121094,
      "step": 38947
    },
    {
      "epoch": 0.000237713623046875,
      "step": 38947,
      "training_step_time": 0.436983585357666
    },
    {
      "epoch": 0.0002377197265625,
      "model_forward_time": 0.11504101753234863,
      "step": 38948
    },
    {
      "epoch": 0.0002377197265625,
      "step": 38948,
      "training_step_time": 0.40797948837280273
    },
    {
      "epoch": 0.000237725830078125,
      "model_forward_time": 0.11496520042419434,
      "step": 38949
    },
    {
      "epoch": 0.000237725830078125,
      "step": 38949,
      "training_step_time": 0.39100193977355957
    },
    {
      "epoch": 0.00023773193359375,
      "grad_norm": 0.13167808949947357,
      "learning_rate": 3.004148850250323e-05,
      "loss": 0.0354,
      "step": 38950
    },
    {
      "epoch": 0.00023773193359375,
      "model_forward_time": 0.11542081832885742,
      "step": 38950
    },
    {
      "epoch": 0.00023773193359375,
      "step": 38950,
      "training_step_time": 0.38852906227111816
    },
    {
      "epoch": 0.000237738037109375,
      "model_forward_time": 0.11512184143066406,
      "step": 38951
    },
    {
      "epoch": 0.000237738037109375,
      "step": 38951,
      "training_step_time": 0.3953218460083008
    },
    {
      "epoch": 0.000237744140625,
      "model_forward_time": 0.11531972885131836,
      "step": 38952
    },
    {
      "epoch": 0.000237744140625,
      "step": 38952,
      "training_step_time": 0.4694178104400635
    },
    {
      "epoch": 0.000237750244140625,
      "model_forward_time": 0.11526632308959961,
      "step": 38953
    },
    {
      "epoch": 0.000237750244140625,
      "step": 38953,
      "training_step_time": 0.43459367752075195
    },
    {
      "epoch": 0.00023775634765625,
      "model_forward_time": 0.1149451732635498,
      "step": 38954
    },
    {
      "epoch": 0.00023775634765625,
      "step": 38954,
      "training_step_time": 0.40829992294311523
    },
    {
      "epoch": 0.000237762451171875,
      "model_forward_time": 0.11551260948181152,
      "step": 38955
    },
    {
      "epoch": 0.000237762451171875,
      "step": 38955,
      "training_step_time": 0.3869631290435791
    },
    {
      "epoch": 0.0002377685546875,
      "model_forward_time": 0.11537003517150879,
      "step": 38956
    },
    {
      "epoch": 0.0002377685546875,
      "step": 38956,
      "training_step_time": 0.3986530303955078
    },
    {
      "epoch": 0.000237774658203125,
      "model_forward_time": 0.11520266532897949,
      "step": 38957
    },
    {
      "epoch": 0.000237774658203125,
      "step": 38957,
      "training_step_time": 0.38065624237060547
    },
    {
      "epoch": 0.00023778076171875,
      "model_forward_time": 0.11543917655944824,
      "step": 38958
    },
    {
      "epoch": 0.00023778076171875,
      "step": 38958,
      "training_step_time": 0.3895840644836426
    },
    {
      "epoch": 0.000237786865234375,
      "model_forward_time": 0.11522316932678223,
      "step": 38959
    },
    {
      "epoch": 0.000237786865234375,
      "step": 38959,
      "training_step_time": 0.3959627151489258
    },
    {
      "epoch": 0.00023779296875,
      "grad_norm": 0.08744389563798904,
      "learning_rate": 3.0016224399042515e-05,
      "loss": 0.0401,
      "step": 38960
    },
    {
      "epoch": 0.00023779296875,
      "model_forward_time": 0.11569523811340332,
      "step": 38960
    },
    {
      "epoch": 0.00023779296875,
      "step": 38960,
      "training_step_time": 0.389739990234375
    },
    {
      "epoch": 0.000237799072265625,
      "model_forward_time": 0.11510086059570312,
      "step": 38961
    },
    {
      "epoch": 0.000237799072265625,
      "step": 38961,
      "training_step_time": 0.43355298042297363
    },
    {
      "epoch": 0.00023780517578125,
      "model_forward_time": 0.11517786979675293,
      "step": 38962
    },
    {
      "epoch": 0.00023780517578125,
      "step": 38962,
      "training_step_time": 0.49294400215148926
    },
    {
      "epoch": 0.000237811279296875,
      "model_forward_time": 0.11525487899780273,
      "step": 38963
    },
    {
      "epoch": 0.000237811279296875,
      "step": 38963,
      "training_step_time": 0.41284966468811035
    },
    {
      "epoch": 0.0002378173828125,
      "model_forward_time": 0.11464309692382812,
      "step": 38964
    },
    {
      "epoch": 0.0002378173828125,
      "step": 38964,
      "training_step_time": 0.39696741104125977
    },
    {
      "epoch": 0.000237823486328125,
      "model_forward_time": 0.11539411544799805,
      "step": 38965
    },
    {
      "epoch": 0.000237823486328125,
      "step": 38965,
      "training_step_time": 0.38985204696655273
    },
    {
      "epoch": 0.00023782958984375,
      "model_forward_time": 0.11504888534545898,
      "step": 38966
    },
    {
      "epoch": 0.00023782958984375,
      "step": 38966,
      "training_step_time": 0.38736987113952637
    },
    {
      "epoch": 0.000237835693359375,
      "model_forward_time": 0.11502385139465332,
      "step": 38967
    },
    {
      "epoch": 0.000237835693359375,
      "step": 38967,
      "training_step_time": 0.39085912704467773
    },
    {
      "epoch": 0.000237841796875,
      "model_forward_time": 0.11513805389404297,
      "step": 38968
    },
    {
      "epoch": 0.000237841796875,
      "step": 38968,
      "training_step_time": 0.40705013275146484
    },
    {
      "epoch": 0.000237847900390625,
      "model_forward_time": 0.11933040618896484,
      "step": 38969
    },
    {
      "epoch": 0.000237847900390625,
      "step": 38969,
      "training_step_time": 0.42445850372314453
    },
    {
      "epoch": 0.00023785400390625,
      "grad_norm": 0.13751967251300812,
      "learning_rate": 2.999096636612518e-05,
      "loss": 0.0381,
      "step": 38970
    },
    {
      "epoch": 0.00023785400390625,
      "model_forward_time": 0.11662006378173828,
      "step": 38970
    },
    {
      "epoch": 0.00023785400390625,
      "step": 38970,
      "training_step_time": 0.4653027057647705
    },
    {
      "epoch": 0.000237860107421875,
      "model_forward_time": 0.11493206024169922,
      "step": 38971
    },
    {
      "epoch": 0.000237860107421875,
      "step": 38971,
      "training_step_time": 0.39563679695129395
    },
    {
      "epoch": 0.0002378662109375,
      "model_forward_time": 0.11536979675292969,
      "step": 38972
    },
    {
      "epoch": 0.0002378662109375,
      "step": 38972,
      "training_step_time": 0.3777458667755127
    },
    {
      "epoch": 0.000237872314453125,
      "model_forward_time": 0.11510229110717773,
      "step": 38973
    },
    {
      "epoch": 0.000237872314453125,
      "step": 38973,
      "training_step_time": 0.3882577419281006
    },
    {
      "epoch": 0.00023787841796875,
      "model_forward_time": 0.11496639251708984,
      "step": 38974
    },
    {
      "epoch": 0.00023787841796875,
      "step": 38974,
      "training_step_time": 0.39852309226989746
    },
    {
      "epoch": 0.000237884521484375,
      "model_forward_time": 0.11488914489746094,
      "step": 38975
    },
    {
      "epoch": 0.000237884521484375,
      "step": 38975,
      "training_step_time": 0.43563008308410645
    },
    {
      "epoch": 0.000237890625,
      "model_forward_time": 0.11514663696289062,
      "step": 38976
    },
    {
      "epoch": 0.000237890625,
      "step": 38976,
      "training_step_time": 0.41228675842285156
    },
    {
      "epoch": 0.000237896728515625,
      "model_forward_time": 0.11558842658996582,
      "step": 38977
    },
    {
      "epoch": 0.000237896728515625,
      "step": 38977,
      "training_step_time": 0.47520995140075684
    },
    {
      "epoch": 0.00023790283203125,
      "model_forward_time": 0.1152048110961914,
      "step": 38978
    },
    {
      "epoch": 0.00023790283203125,
      "step": 38978,
      "training_step_time": 0.3823356628417969
    },
    {
      "epoch": 0.000237908935546875,
      "model_forward_time": 0.11493062973022461,
      "step": 38979
    },
    {
      "epoch": 0.000237908935546875,
      "step": 38979,
      "training_step_time": 0.3882012367248535
    },
    {
      "epoch": 0.0002379150390625,
      "grad_norm": 0.13358716666698456,
      "learning_rate": 2.9965714411423972e-05,
      "loss": 0.0392,
      "step": 38980
    },
    {
      "epoch": 0.0002379150390625,
      "model_forward_time": 0.1150050163269043,
      "step": 38980
    },
    {
      "epoch": 0.0002379150390625,
      "step": 38980,
      "training_step_time": 0.386014461517334
    },
    {
      "epoch": 0.000237921142578125,
      "model_forward_time": 0.11574172973632812,
      "step": 38981
    },
    {
      "epoch": 0.000237921142578125,
      "step": 38981,
      "training_step_time": 0.5563724040985107
    },
    {
      "epoch": 0.00023792724609375,
      "model_forward_time": 0.11471199989318848,
      "step": 38982
    },
    {
      "epoch": 0.00023792724609375,
      "step": 38982,
      "training_step_time": 0.3786494731903076
    },
    {
      "epoch": 0.000237933349609375,
      "model_forward_time": 0.11506819725036621,
      "step": 38983
    },
    {
      "epoch": 0.000237933349609375,
      "step": 38983,
      "training_step_time": 0.4560873508453369
    },
    {
      "epoch": 0.000237939453125,
      "model_forward_time": 0.11567997932434082,
      "step": 38984
    },
    {
      "epoch": 0.000237939453125,
      "step": 38984,
      "training_step_time": 0.40475964546203613
    },
    {
      "epoch": 0.000237945556640625,
      "model_forward_time": 0.1144869327545166,
      "step": 38985
    },
    {
      "epoch": 0.000237945556640625,
      "step": 38985,
      "training_step_time": 0.38347625732421875
    },
    {
      "epoch": 0.00023795166015625,
      "model_forward_time": 0.11501550674438477,
      "step": 38986
    },
    {
      "epoch": 0.00023795166015625,
      "step": 38986,
      "training_step_time": 0.38204216957092285
    },
    {
      "epoch": 0.000237957763671875,
      "model_forward_time": 0.11545109748840332,
      "step": 38987
    },
    {
      "epoch": 0.000237957763671875,
      "step": 38987,
      "training_step_time": 0.434920072555542
    },
    {
      "epoch": 0.0002379638671875,
      "model_forward_time": 0.11513304710388184,
      "step": 38988
    },
    {
      "epoch": 0.0002379638671875,
      "step": 38988,
      "training_step_time": 0.3958423137664795
    },
    {
      "epoch": 0.000237969970703125,
      "model_forward_time": 0.1147298812866211,
      "step": 38989
    },
    {
      "epoch": 0.000237969970703125,
      "step": 38989,
      "training_step_time": 0.3916890621185303
    },
    {
      "epoch": 0.00023797607421875,
      "grad_norm": 0.08405904471874237,
      "learning_rate": 2.994046854260974e-05,
      "loss": 0.0355,
      "step": 38990
    },
    {
      "epoch": 0.00023797607421875,
      "model_forward_time": 0.1152198314666748,
      "step": 38990
    },
    {
      "epoch": 0.00023797607421875,
      "step": 38990,
      "training_step_time": 0.40735411643981934
    },
    {
      "epoch": 0.000237982177734375,
      "model_forward_time": 0.11537003517150879,
      "step": 38991
    },
    {
      "epoch": 0.000237982177734375,
      "step": 38991,
      "training_step_time": 0.5049104690551758
    },
    {
      "epoch": 0.00023798828125,
      "model_forward_time": 0.11498785018920898,
      "step": 38992
    },
    {
      "epoch": 0.00023798828125,
      "step": 38992,
      "training_step_time": 0.3942255973815918
    },
    {
      "epoch": 0.000237994384765625,
      "model_forward_time": 0.11500239372253418,
      "step": 38993
    },
    {
      "epoch": 0.000237994384765625,
      "step": 38993,
      "training_step_time": 0.5028636455535889
    },
    {
      "epoch": 0.00023800048828125,
      "model_forward_time": 0.11536264419555664,
      "step": 38994
    },
    {
      "epoch": 0.00023800048828125,
      "step": 38994,
      "training_step_time": 0.3950018882751465
    },
    {
      "epoch": 0.000238006591796875,
      "model_forward_time": 0.11434555053710938,
      "step": 38995
    },
    {
      "epoch": 0.000238006591796875,
      "step": 38995,
      "training_step_time": 0.3932943344116211
    },
    {
      "epoch": 0.0002380126953125,
      "model_forward_time": 0.11498332023620605,
      "step": 38996
    },
    {
      "epoch": 0.0002380126953125,
      "step": 38996,
      "training_step_time": 0.42148327827453613
    },
    {
      "epoch": 0.000238018798828125,
      "model_forward_time": 0.11492729187011719,
      "step": 38997
    },
    {
      "epoch": 0.000238018798828125,
      "step": 38997,
      "training_step_time": 0.48235487937927246
    },
    {
      "epoch": 0.00023802490234375,
      "model_forward_time": 0.11478257179260254,
      "step": 38998
    },
    {
      "epoch": 0.00023802490234375,
      "step": 38998,
      "training_step_time": 0.5034699440002441
    },
    {
      "epoch": 0.000238031005859375,
      "model_forward_time": 0.11522316932678223,
      "step": 38999
    },
    {
      "epoch": 0.000238031005859375,
      "step": 38999,
      "training_step_time": 0.3877909183502197
    },
    {
      "epoch": 0.000238037109375,
      "grad_norm": 0.08668594807386398,
      "learning_rate": 2.991522876735154e-05,
      "loss": 0.0373,
      "step": 39000
    },
    {
      "epoch": 0.000238037109375,
      "model_forward_time": 0.11395549774169922,
      "step": 39000
    },
    {
      "epoch": 0.000238037109375,
      "step": 39000,
      "training_step_time": 0.3614156246185303
    },
    {
      "epoch": 0.000238043212890625,
      "model_forward_time": 0.11304903030395508,
      "step": 39001
    },
    {
      "epoch": 0.000238043212890625,
      "step": 39001,
      "training_step_time": 0.37653088569641113
    },
    {
      "epoch": 0.00023804931640625,
      "model_forward_time": 0.11283683776855469,
      "step": 39002
    },
    {
      "epoch": 0.00023804931640625,
      "step": 39002,
      "training_step_time": 0.37962770462036133
    },
    {
      "epoch": 0.000238055419921875,
      "model_forward_time": 0.11342453956604004,
      "step": 39003
    },
    {
      "epoch": 0.000238055419921875,
      "step": 39003,
      "training_step_time": 0.37173938751220703
    },
    {
      "epoch": 0.0002380615234375,
      "model_forward_time": 0.11336016654968262,
      "step": 39004
    },
    {
      "epoch": 0.0002380615234375,
      "step": 39004,
      "training_step_time": 0.3750877380371094
    },
    {
      "epoch": 0.000238067626953125,
      "model_forward_time": 0.11415386199951172,
      "step": 39005
    },
    {
      "epoch": 0.000238067626953125,
      "step": 39005,
      "training_step_time": 0.4071524143218994
    },
    {
      "epoch": 0.00023807373046875,
      "model_forward_time": 0.11444568634033203,
      "step": 39006
    },
    {
      "epoch": 0.00023807373046875,
      "step": 39006,
      "training_step_time": 0.406099796295166
    },
    {
      "epoch": 0.000238079833984375,
      "model_forward_time": 0.11708498001098633,
      "step": 39007
    },
    {
      "epoch": 0.000238079833984375,
      "step": 39007,
      "training_step_time": 0.4956824779510498
    },
    {
      "epoch": 0.0002380859375,
      "model_forward_time": 0.1147613525390625,
      "step": 39008
    },
    {
      "epoch": 0.0002380859375,
      "step": 39008,
      "training_step_time": 0.39395880699157715
    },
    {
      "epoch": 0.000238092041015625,
      "model_forward_time": 0.1166384220123291,
      "step": 39009
    },
    {
      "epoch": 0.000238092041015625,
      "step": 39009,
      "training_step_time": 0.38245487213134766
    },
    {
      "epoch": 0.00023809814453125,
      "grad_norm": 0.13632075488567352,
      "learning_rate": 2.9889995093316515e-05,
      "loss": 0.0447,
      "step": 39010
    },
    {
      "epoch": 0.00023809814453125,
      "model_forward_time": 0.11440134048461914,
      "step": 39010
    },
    {
      "epoch": 0.00023809814453125,
      "step": 39010,
      "training_step_time": 0.38461947441101074
    },
    {
      "epoch": 0.000238104248046875,
      "model_forward_time": 0.11423134803771973,
      "step": 39011
    },
    {
      "epoch": 0.000238104248046875,
      "step": 39011,
      "training_step_time": 0.3853025436401367
    },
    {
      "epoch": 0.0002381103515625,
      "model_forward_time": 0.11516189575195312,
      "step": 39012
    },
    {
      "epoch": 0.0002381103515625,
      "step": 39012,
      "training_step_time": 0.4141659736633301
    },
    {
      "epoch": 0.000238116455078125,
      "model_forward_time": 0.11471915245056152,
      "step": 39013
    },
    {
      "epoch": 0.000238116455078125,
      "step": 39013,
      "training_step_time": 0.3692331314086914
    },
    {
      "epoch": 0.00023812255859375,
      "model_forward_time": 0.11499977111816406,
      "step": 39014
    },
    {
      "epoch": 0.00023812255859375,
      "step": 39014,
      "training_step_time": 0.43985629081726074
    },
    {
      "epoch": 0.000238128662109375,
      "model_forward_time": 0.11517667770385742,
      "step": 39015
    },
    {
      "epoch": 0.000238128662109375,
      "step": 39015,
      "training_step_time": 0.442246675491333
    },
    {
      "epoch": 0.000238134765625,
      "model_forward_time": 0.11562013626098633,
      "step": 39016
    },
    {
      "epoch": 0.000238134765625,
      "step": 39016,
      "training_step_time": 0.38850975036621094
    },
    {
      "epoch": 0.000238140869140625,
      "model_forward_time": 0.11574769020080566,
      "step": 39017
    },
    {
      "epoch": 0.000238140869140625,
      "step": 39017,
      "training_step_time": 0.43134045600891113
    },
    {
      "epoch": 0.00023814697265625,
      "model_forward_time": 0.11569857597351074,
      "step": 39018
    },
    {
      "epoch": 0.00023814697265625,
      "step": 39018,
      "training_step_time": 0.3919224739074707
    },
    {
      "epoch": 0.000238153076171875,
      "model_forward_time": 0.11466836929321289,
      "step": 39019
    },
    {
      "epoch": 0.000238153076171875,
      "step": 39019,
      "training_step_time": 0.3832857608795166
    },
    {
      "epoch": 0.0002381591796875,
      "grad_norm": 0.15236611664295197,
      "learning_rate": 2.9864767528170002e-05,
      "loss": 0.0443,
      "step": 39020
    },
    {
      "epoch": 0.0002381591796875,
      "model_forward_time": 0.1150057315826416,
      "step": 39020
    },
    {
      "epoch": 0.0002381591796875,
      "step": 39020,
      "training_step_time": 0.4228057861328125
    },
    {
      "epoch": 0.000238165283203125,
      "model_forward_time": 0.11624264717102051,
      "step": 39021
    },
    {
      "epoch": 0.000238165283203125,
      "step": 39021,
      "training_step_time": 0.48801565170288086
    },
    {
      "epoch": 0.00023817138671875,
      "model_forward_time": 0.11468029022216797,
      "step": 39022
    },
    {
      "epoch": 0.00023817138671875,
      "step": 39022,
      "training_step_time": 0.3880903720855713
    },
    {
      "epoch": 0.000238177490234375,
      "model_forward_time": 0.1147923469543457,
      "step": 39023
    },
    {
      "epoch": 0.000238177490234375,
      "step": 39023,
      "training_step_time": 0.3829786777496338
    },
    {
      "epoch": 0.00023818359375,
      "model_forward_time": 0.11472702026367188,
      "step": 39024
    },
    {
      "epoch": 0.00023818359375,
      "step": 39024,
      "training_step_time": 0.39340710639953613
    },
    {
      "epoch": 0.000238189697265625,
      "model_forward_time": 0.1156315803527832,
      "step": 39025
    },
    {
      "epoch": 0.000238189697265625,
      "step": 39025,
      "training_step_time": 0.39302778244018555
    },
    {
      "epoch": 0.00023819580078125,
      "model_forward_time": 0.11516404151916504,
      "step": 39026
    },
    {
      "epoch": 0.00023819580078125,
      "step": 39026,
      "training_step_time": 0.40375804901123047
    },
    {
      "epoch": 0.000238201904296875,
      "model_forward_time": 0.11525726318359375,
      "step": 39027
    },
    {
      "epoch": 0.000238201904296875,
      "step": 39027,
      "training_step_time": 0.3882272243499756
    },
    {
      "epoch": 0.0002382080078125,
      "model_forward_time": 0.11501574516296387,
      "step": 39028
    },
    {
      "epoch": 0.0002382080078125,
      "step": 39028,
      "training_step_time": 0.45348262786865234
    },
    {
      "epoch": 0.000238214111328125,
      "model_forward_time": 0.11590075492858887,
      "step": 39029
    },
    {
      "epoch": 0.000238214111328125,
      "step": 39029,
      "training_step_time": 0.5298950672149658
    },
    {
      "epoch": 0.00023822021484375,
      "grad_norm": 0.09509360045194626,
      "learning_rate": 2.9839546079575497e-05,
      "loss": 0.036,
      "step": 39030
    },
    {
      "epoch": 0.00023822021484375,
      "model_forward_time": 0.1164255142211914,
      "step": 39030
    },
    {
      "epoch": 0.00023822021484375,
      "step": 39030,
      "training_step_time": 0.4464230537414551
    },
    {
      "epoch": 0.000238226318359375,
      "model_forward_time": 0.11660385131835938,
      "step": 39031
    },
    {
      "epoch": 0.000238226318359375,
      "step": 39031,
      "training_step_time": 0.43993115425109863
    },
    {
      "epoch": 0.000238232421875,
      "model_forward_time": 0.11525654792785645,
      "step": 39032
    },
    {
      "epoch": 0.000238232421875,
      "step": 39032,
      "training_step_time": 0.39226388931274414
    },
    {
      "epoch": 0.000238238525390625,
      "model_forward_time": 0.11489510536193848,
      "step": 39033
    },
    {
      "epoch": 0.000238238525390625,
      "step": 39033,
      "training_step_time": 0.38670969009399414
    },
    {
      "epoch": 0.00023824462890625,
      "model_forward_time": 0.11623764038085938,
      "step": 39034
    },
    {
      "epoch": 0.00023824462890625,
      "step": 39034,
      "training_step_time": 0.45619726181030273
    },
    {
      "epoch": 0.000238250732421875,
      "model_forward_time": 0.11537551879882812,
      "step": 39035
    },
    {
      "epoch": 0.000238250732421875,
      "step": 39035,
      "training_step_time": 0.38768863677978516
    },
    {
      "epoch": 0.0002382568359375,
      "model_forward_time": 0.11562323570251465,
      "step": 39036
    },
    {
      "epoch": 0.0002382568359375,
      "step": 39036,
      "training_step_time": 0.3920431137084961
    },
    {
      "epoch": 0.000238262939453125,
      "model_forward_time": 0.11626148223876953,
      "step": 39037
    },
    {
      "epoch": 0.000238262939453125,
      "step": 39037,
      "training_step_time": 0.3885068893432617
    },
    {
      "epoch": 0.00023826904296875,
      "model_forward_time": 0.11471772193908691,
      "step": 39038
    },
    {
      "epoch": 0.00023826904296875,
      "step": 39038,
      "training_step_time": 0.388216495513916
    },
    {
      "epoch": 0.000238275146484375,
      "model_forward_time": 0.11547994613647461,
      "step": 39039
    },
    {
      "epoch": 0.000238275146484375,
      "step": 39039,
      "training_step_time": 0.39140939712524414
    },
    {
      "epoch": 0.00023828125,
      "grad_norm": 0.1583167463541031,
      "learning_rate": 2.9814330755194564e-05,
      "loss": 0.0395,
      "step": 39040
    },
    {
      "epoch": 0.00023828125,
      "model_forward_time": 0.11578011512756348,
      "step": 39040
    },
    {
      "epoch": 0.00023828125,
      "step": 39040,
      "training_step_time": 0.38669610023498535
    },
    {
      "epoch": 0.000238287353515625,
      "model_forward_time": 0.11594223976135254,
      "step": 39041
    },
    {
      "epoch": 0.000238287353515625,
      "step": 39041,
      "training_step_time": 0.3957505226135254
    },
    {
      "epoch": 0.00023829345703125,
      "model_forward_time": 0.11659908294677734,
      "step": 39042
    },
    {
      "epoch": 0.00023829345703125,
      "step": 39042,
      "training_step_time": 0.3966503143310547
    },
    {
      "epoch": 0.000238299560546875,
      "model_forward_time": 0.11643576622009277,
      "step": 39043
    },
    {
      "epoch": 0.000238299560546875,
      "step": 39043,
      "training_step_time": 0.4741499423980713
    },
    {
      "epoch": 0.0002383056640625,
      "model_forward_time": 0.11600875854492188,
      "step": 39044
    },
    {
      "epoch": 0.0002383056640625,
      "step": 39044,
      "training_step_time": 0.47351598739624023
    },
    {
      "epoch": 0.000238311767578125,
      "model_forward_time": 0.11562347412109375,
      "step": 39045
    },
    {
      "epoch": 0.000238311767578125,
      "step": 39045,
      "training_step_time": 0.4131453037261963
    },
    {
      "epoch": 0.00023831787109375,
      "model_forward_time": 0.11620473861694336,
      "step": 39046
    },
    {
      "epoch": 0.00023831787109375,
      "step": 39046,
      "training_step_time": 0.46918463706970215
    },
    {
      "epoch": 0.000238323974609375,
      "model_forward_time": 0.1146090030670166,
      "step": 39047
    },
    {
      "epoch": 0.000238323974609375,
      "step": 39047,
      "training_step_time": 0.48851799964904785
    },
    {
      "epoch": 0.000238330078125,
      "model_forward_time": 0.11449289321899414,
      "step": 39048
    },
    {
      "epoch": 0.000238330078125,
      "step": 39048,
      "training_step_time": 0.48728489875793457
    },
    {
      "epoch": 0.000238336181640625,
      "model_forward_time": 0.11514830589294434,
      "step": 39049
    },
    {
      "epoch": 0.000238336181640625,
      "step": 39049,
      "training_step_time": 0.3986968994140625
    },
    {
      "epoch": 0.00023834228515625,
      "grad_norm": 0.15325000882148743,
      "learning_rate": 2.978912156268699e-05,
      "loss": 0.0451,
      "step": 39050
    },
    {
      "epoch": 0.00023834228515625,
      "model_forward_time": 0.11635112762451172,
      "step": 39050
    },
    {
      "epoch": 0.00023834228515625,
      "step": 39050,
      "training_step_time": 0.405057430267334
    },
    {
      "epoch": 0.000238348388671875,
      "model_forward_time": 0.1145484447479248,
      "step": 39051
    },
    {
      "epoch": 0.000238348388671875,
      "step": 39051,
      "training_step_time": 0.3785436153411865
    },
    {
      "epoch": 0.0002383544921875,
      "model_forward_time": 0.11466503143310547,
      "step": 39052
    },
    {
      "epoch": 0.0002383544921875,
      "step": 39052,
      "training_step_time": 0.38887977600097656
    },
    {
      "epoch": 0.000238360595703125,
      "model_forward_time": 0.11508536338806152,
      "step": 39053
    },
    {
      "epoch": 0.000238360595703125,
      "step": 39053,
      "training_step_time": 0.3890528678894043
    },
    {
      "epoch": 0.00023836669921875,
      "model_forward_time": 0.11501431465148926,
      "step": 39054
    },
    {
      "epoch": 0.00023836669921875,
      "step": 39054,
      "training_step_time": 0.38303279876708984
    },
    {
      "epoch": 0.000238372802734375,
      "model_forward_time": 0.11572647094726562,
      "step": 39055
    },
    {
      "epoch": 0.000238372802734375,
      "step": 39055,
      "training_step_time": 0.4002494812011719
    },
    {
      "epoch": 0.00023837890625,
      "model_forward_time": 0.11592459678649902,
      "step": 39056
    },
    {
      "epoch": 0.00023837890625,
      "step": 39056,
      "training_step_time": 0.4156186580657959
    },
    {
      "epoch": 0.000238385009765625,
      "model_forward_time": 0.11548900604248047,
      "step": 39057
    },
    {
      "epoch": 0.000238385009765625,
      "step": 39057,
      "training_step_time": 0.4157423973083496
    },
    {
      "epoch": 0.00023839111328125,
      "model_forward_time": 0.11530470848083496,
      "step": 39058
    },
    {
      "epoch": 0.00023839111328125,
      "step": 39058,
      "training_step_time": 0.5008864402770996
    },
    {
      "epoch": 0.000238397216796875,
      "model_forward_time": 0.11452841758728027,
      "step": 39059
    },
    {
      "epoch": 0.000238397216796875,
      "step": 39059,
      "training_step_time": 0.49151182174682617
    },
    {
      "epoch": 0.0002384033203125,
      "grad_norm": 0.13828925788402557,
      "learning_rate": 2.976391850971065e-05,
      "loss": 0.0387,
      "step": 39060
    },
    {
      "epoch": 0.0002384033203125,
      "model_forward_time": 0.11490678787231445,
      "step": 39060
    },
    {
      "epoch": 0.0002384033203125,
      "step": 39060,
      "training_step_time": 0.4029507637023926
    },
    {
      "epoch": 0.000238409423828125,
      "model_forward_time": 0.1153724193572998,
      "step": 39061
    },
    {
      "epoch": 0.000238409423828125,
      "step": 39061,
      "training_step_time": 0.3835716247558594
    },
    {
      "epoch": 0.00023841552734375,
      "model_forward_time": 0.11449575424194336,
      "step": 39062
    },
    {
      "epoch": 0.00023841552734375,
      "step": 39062,
      "training_step_time": 0.3851203918457031
    },
    {
      "epoch": 0.000238421630859375,
      "model_forward_time": 0.11554908752441406,
      "step": 39063
    },
    {
      "epoch": 0.000238421630859375,
      "step": 39063,
      "training_step_time": 0.40638303756713867
    },
    {
      "epoch": 0.000238427734375,
      "model_forward_time": 0.11523771286010742,
      "step": 39064
    },
    {
      "epoch": 0.000238427734375,
      "step": 39064,
      "training_step_time": 0.4871056079864502
    },
    {
      "epoch": 0.000238433837890625,
      "model_forward_time": 0.11517834663391113,
      "step": 39065
    },
    {
      "epoch": 0.000238433837890625,
      "step": 39065,
      "training_step_time": 0.38845276832580566
    },
    {
      "epoch": 0.00023843994140625,
      "model_forward_time": 0.11522722244262695,
      "step": 39066
    },
    {
      "epoch": 0.00023843994140625,
      "step": 39066,
      "training_step_time": 0.3887944221496582
    },
    {
      "epoch": 0.000238446044921875,
      "model_forward_time": 0.11544179916381836,
      "step": 39067
    },
    {
      "epoch": 0.000238446044921875,
      "step": 39067,
      "training_step_time": 0.39070701599121094
    },
    {
      "epoch": 0.0002384521484375,
      "model_forward_time": 0.11540460586547852,
      "step": 39068
    },
    {
      "epoch": 0.0002384521484375,
      "step": 39068,
      "training_step_time": 0.38777923583984375
    },
    {
      "epoch": 0.000238458251953125,
      "model_forward_time": 0.11562752723693848,
      "step": 39069
    },
    {
      "epoch": 0.000238458251953125,
      "step": 39069,
      "training_step_time": 0.3814122676849365
    },
    {
      "epoch": 0.00023846435546875,
      "grad_norm": 0.15076637268066406,
      "learning_rate": 2.973872160392156e-05,
      "loss": 0.0395,
      "step": 39070
    },
    {
      "epoch": 0.00023846435546875,
      "model_forward_time": 0.1152048110961914,
      "step": 39070
    },
    {
      "epoch": 0.00023846435546875,
      "step": 39070,
      "training_step_time": 0.4403712749481201
    },
    {
      "epoch": 0.000238470458984375,
      "model_forward_time": 0.11570024490356445,
      "step": 39071
    },
    {
      "epoch": 0.000238470458984375,
      "step": 39071,
      "training_step_time": 0.43537211418151855
    },
    {
      "epoch": 0.0002384765625,
      "model_forward_time": 0.11491942405700684,
      "step": 39072
    },
    {
      "epoch": 0.0002384765625,
      "step": 39072,
      "training_step_time": 0.37128734588623047
    },
    {
      "epoch": 0.000238482666015625,
      "model_forward_time": 0.1150660514831543,
      "step": 39073
    },
    {
      "epoch": 0.000238482666015625,
      "step": 39073,
      "training_step_time": 0.45823121070861816
    },
    {
      "epoch": 0.00023848876953125,
      "model_forward_time": 0.1153252124786377,
      "step": 39074
    },
    {
      "epoch": 0.00023848876953125,
      "step": 39074,
      "training_step_time": 0.42012882232666016
    },
    {
      "epoch": 0.000238494873046875,
      "model_forward_time": 0.11462140083312988,
      "step": 39075
    },
    {
      "epoch": 0.000238494873046875,
      "step": 39075,
      "training_step_time": 0.3812074661254883
    },
    {
      "epoch": 0.0002385009765625,
      "model_forward_time": 0.11528372764587402,
      "step": 39076
    },
    {
      "epoch": 0.0002385009765625,
      "step": 39076,
      "training_step_time": 0.3838918209075928
    },
    {
      "epoch": 0.000238507080078125,
      "model_forward_time": 0.11556673049926758,
      "step": 39077
    },
    {
      "epoch": 0.000238507080078125,
      "step": 39077,
      "training_step_time": 0.4578580856323242
    },
    {
      "epoch": 0.00023851318359375,
      "model_forward_time": 0.1157376766204834,
      "step": 39078
    },
    {
      "epoch": 0.00023851318359375,
      "step": 39078,
      "training_step_time": 0.49585533142089844
    },
    {
      "epoch": 0.000238519287109375,
      "model_forward_time": 0.1161196231842041,
      "step": 39079
    },
    {
      "epoch": 0.000238519287109375,
      "step": 39079,
      "training_step_time": 0.38718104362487793
    },
    {
      "epoch": 0.000238525390625,
      "grad_norm": 0.09638869017362595,
      "learning_rate": 2.971353085297387e-05,
      "loss": 0.0334,
      "step": 39080
    },
    {
      "epoch": 0.000238525390625,
      "model_forward_time": 0.11451339721679688,
      "step": 39080
    },
    {
      "epoch": 0.000238525390625,
      "step": 39080,
      "training_step_time": 0.38602757453918457
    },
    {
      "epoch": 0.000238531494140625,
      "model_forward_time": 0.11536908149719238,
      "step": 39081
    },
    {
      "epoch": 0.000238531494140625,
      "step": 39081,
      "training_step_time": 0.37624692916870117
    },
    {
      "epoch": 0.00023853759765625,
      "model_forward_time": 0.11507797241210938,
      "step": 39082
    },
    {
      "epoch": 0.00023853759765625,
      "step": 39082,
      "training_step_time": 0.3866879940032959
    },
    {
      "epoch": 0.000238543701171875,
      "model_forward_time": 0.11539745330810547,
      "step": 39083
    },
    {
      "epoch": 0.000238543701171875,
      "step": 39083,
      "training_step_time": 0.39824509620666504
    },
    {
      "epoch": 0.0002385498046875,
      "model_forward_time": 0.11503005027770996,
      "step": 39084
    },
    {
      "epoch": 0.0002385498046875,
      "step": 39084,
      "training_step_time": 0.3939814567565918
    },
    {
      "epoch": 0.000238555908203125,
      "model_forward_time": 0.1150522232055664,
      "step": 39085
    },
    {
      "epoch": 0.000238555908203125,
      "step": 39085,
      "training_step_time": 0.4550354480743408
    },
    {
      "epoch": 0.00023856201171875,
      "model_forward_time": 0.1152646541595459,
      "step": 39086
    },
    {
      "epoch": 0.00023856201171875,
      "step": 39086,
      "training_step_time": 0.41666269302368164
    },
    {
      "epoch": 0.000238568115234375,
      "model_forward_time": 0.1172487735748291,
      "step": 39087
    },
    {
      "epoch": 0.000238568115234375,
      "step": 39087,
      "training_step_time": 0.43412184715270996
    },
    {
      "epoch": 0.00023857421875,
      "model_forward_time": 0.11556553840637207,
      "step": 39088
    },
    {
      "epoch": 0.00023857421875,
      "step": 39088,
      "training_step_time": 0.4998195171356201
    },
    {
      "epoch": 0.000238580322265625,
      "model_forward_time": 0.11500740051269531,
      "step": 39089
    },
    {
      "epoch": 0.000238580322265625,
      "step": 39089,
      "training_step_time": 0.4181065559387207
    },
    {
      "epoch": 0.00023858642578125,
      "grad_norm": 0.10801186412572861,
      "learning_rate": 2.9688346264519866e-05,
      "loss": 0.04,
      "step": 39090
    },
    {
      "epoch": 0.00023858642578125,
      "model_forward_time": 0.11547350883483887,
      "step": 39090
    },
    {
      "epoch": 0.00023858642578125,
      "step": 39090,
      "training_step_time": 0.38222718238830566
    },
    {
      "epoch": 0.000238592529296875,
      "model_forward_time": 0.11533570289611816,
      "step": 39091
    },
    {
      "epoch": 0.000238592529296875,
      "step": 39091,
      "training_step_time": 0.4022510051727295
    },
    {
      "epoch": 0.0002385986328125,
      "model_forward_time": 0.11522245407104492,
      "step": 39092
    },
    {
      "epoch": 0.0002385986328125,
      "step": 39092,
      "training_step_time": 0.4211242198944092
    },
    {
      "epoch": 0.000238604736328125,
      "model_forward_time": 0.1149604320526123,
      "step": 39093
    },
    {
      "epoch": 0.000238604736328125,
      "step": 39093,
      "training_step_time": 0.48685193061828613
    },
    {
      "epoch": 0.00023861083984375,
      "model_forward_time": 0.11550164222717285,
      "step": 39094
    },
    {
      "epoch": 0.00023861083984375,
      "step": 39094,
      "training_step_time": 0.3921070098876953
    },
    {
      "epoch": 0.000238616943359375,
      "model_forward_time": 0.11508393287658691,
      "step": 39095
    },
    {
      "epoch": 0.000238616943359375,
      "step": 39095,
      "training_step_time": 0.3874828815460205
    },
    {
      "epoch": 0.000238623046875,
      "model_forward_time": 0.11496925354003906,
      "step": 39096
    },
    {
      "epoch": 0.000238623046875,
      "step": 39096,
      "training_step_time": 0.3927578926086426
    },
    {
      "epoch": 0.000238629150390625,
      "model_forward_time": 0.11508560180664062,
      "step": 39097
    },
    {
      "epoch": 0.000238629150390625,
      "step": 39097,
      "training_step_time": 0.39308881759643555
    },
    {
      "epoch": 0.00023863525390625,
      "model_forward_time": 0.11536979675292969,
      "step": 39098
    },
    {
      "epoch": 0.00023863525390625,
      "step": 39098,
      "training_step_time": 0.3885993957519531
    },
    {
      "epoch": 0.000238641357421875,
      "model_forward_time": 0.11490893363952637,
      "step": 39099
    },
    {
      "epoch": 0.000238641357421875,
      "step": 39099,
      "training_step_time": 0.40890932083129883
    },
    {
      "epoch": 0.0002386474609375,
      "grad_norm": 0.16363884508609772,
      "learning_rate": 2.9663167846209998e-05,
      "loss": 0.0374,
      "step": 39100
    },
    {
      "epoch": 0.0002386474609375,
      "model_forward_time": 0.1157526969909668,
      "step": 39100
    },
    {
      "epoch": 0.0002386474609375,
      "step": 39100,
      "training_step_time": 0.4345426559448242
    },
    {
      "epoch": 0.000238653564453125,
      "model_forward_time": 0.11483907699584961,
      "step": 39101
    },
    {
      "epoch": 0.000238653564453125,
      "step": 39101,
      "training_step_time": 0.37104249000549316
    },
    {
      "epoch": 0.00023865966796875,
      "model_forward_time": 0.11517906188964844,
      "step": 39102
    },
    {
      "epoch": 0.00023865966796875,
      "step": 39102,
      "training_step_time": 0.44342756271362305
    },
    {
      "epoch": 0.000238665771484375,
      "model_forward_time": 0.11572122573852539,
      "step": 39103
    },
    {
      "epoch": 0.000238665771484375,
      "step": 39103,
      "training_step_time": 0.4259171485900879
    },
    {
      "epoch": 0.000238671875,
      "model_forward_time": 0.11577630043029785,
      "step": 39104
    },
    {
      "epoch": 0.000238671875,
      "step": 39104,
      "training_step_time": 0.3856210708618164
    },
    {
      "epoch": 0.000238677978515625,
      "model_forward_time": 0.11561942100524902,
      "step": 39105
    },
    {
      "epoch": 0.000238677978515625,
      "step": 39105,
      "training_step_time": 0.3943631649017334
    },
    {
      "epoch": 0.00023868408203125,
      "model_forward_time": 0.11555004119873047,
      "step": 39106
    },
    {
      "epoch": 0.00023868408203125,
      "step": 39106,
      "training_step_time": 0.3858458995819092
    },
    {
      "epoch": 0.000238690185546875,
      "model_forward_time": 0.11488866806030273,
      "step": 39107
    },
    {
      "epoch": 0.000238690185546875,
      "step": 39107,
      "training_step_time": 0.42824602127075195
    },
    {
      "epoch": 0.0002386962890625,
      "model_forward_time": 0.11547064781188965,
      "step": 39108
    },
    {
      "epoch": 0.0002386962890625,
      "step": 39108,
      "training_step_time": 0.37958693504333496
    },
    {
      "epoch": 0.000238702392578125,
      "model_forward_time": 0.11502933502197266,
      "step": 39109
    },
    {
      "epoch": 0.000238702392578125,
      "step": 39109,
      "training_step_time": 0.3988983631134033
    },
    {
      "epoch": 0.00023870849609375,
      "grad_norm": 0.11420443654060364,
      "learning_rate": 2.963799560569275e-05,
      "loss": 0.0414,
      "step": 39110
    },
    {
      "epoch": 0.00023870849609375,
      "model_forward_time": 0.11507582664489746,
      "step": 39110
    },
    {
      "epoch": 0.00023870849609375,
      "step": 39110,
      "training_step_time": 0.3910362720489502
    },
    {
      "epoch": 0.000238714599609375,
      "model_forward_time": 0.1149742603302002,
      "step": 39111
    },
    {
      "epoch": 0.000238714599609375,
      "step": 39111,
      "training_step_time": 0.4009733200073242
    },
    {
      "epoch": 0.000238720703125,
      "model_forward_time": 0.11487030982971191,
      "step": 39112
    },
    {
      "epoch": 0.000238720703125,
      "step": 39112,
      "training_step_time": 0.38352441787719727
    },
    {
      "epoch": 0.000238726806640625,
      "model_forward_time": 0.11556625366210938,
      "step": 39113
    },
    {
      "epoch": 0.000238726806640625,
      "step": 39113,
      "training_step_time": 0.4155759811401367
    },
    {
      "epoch": 0.00023873291015625,
      "model_forward_time": 0.11610126495361328,
      "step": 39114
    },
    {
      "epoch": 0.00023873291015625,
      "step": 39114,
      "training_step_time": 0.4416341781616211
    },
    {
      "epoch": 0.000238739013671875,
      "model_forward_time": 0.11524844169616699,
      "step": 39115
    },
    {
      "epoch": 0.000238739013671875,
      "step": 39115,
      "training_step_time": 0.38558459281921387
    },
    {
      "epoch": 0.0002387451171875,
      "model_forward_time": 0.11533379554748535,
      "step": 39116
    },
    {
      "epoch": 0.0002387451171875,
      "step": 39116,
      "training_step_time": 0.4043734073638916
    },
    {
      "epoch": 0.000238751220703125,
      "model_forward_time": 0.11523866653442383,
      "step": 39117
    },
    {
      "epoch": 0.000238751220703125,
      "step": 39117,
      "training_step_time": 0.4291388988494873
    },
    {
      "epoch": 0.00023875732421875,
      "model_forward_time": 0.11591911315917969,
      "step": 39118
    },
    {
      "epoch": 0.00023875732421875,
      "step": 39118,
      "training_step_time": 0.41652846336364746
    },
    {
      "epoch": 0.000238763427734375,
      "model_forward_time": 0.11431527137756348,
      "step": 39119
    },
    {
      "epoch": 0.000238763427734375,
      "step": 39119,
      "training_step_time": 0.38483190536499023
    },
    {
      "epoch": 0.00023876953125,
      "grad_norm": 0.1036297008395195,
      "learning_rate": 2.9612829550614836e-05,
      "loss": 0.0361,
      "step": 39120
    },
    {
      "epoch": 0.00023876953125,
      "model_forward_time": 0.11456966400146484,
      "step": 39120
    },
    {
      "epoch": 0.00023876953125,
      "step": 39120,
      "training_step_time": 0.40720319747924805
    },
    {
      "epoch": 0.000238775634765625,
      "model_forward_time": 0.11450934410095215,
      "step": 39121
    },
    {
      "epoch": 0.000238775634765625,
      "step": 39121,
      "training_step_time": 0.41980671882629395
    },
    {
      "epoch": 0.00023878173828125,
      "model_forward_time": 0.11548304557800293,
      "step": 39122
    },
    {
      "epoch": 0.00023878173828125,
      "step": 39122,
      "training_step_time": 0.4785187244415283
    },
    {
      "epoch": 0.000238787841796875,
      "model_forward_time": 0.11634016036987305,
      "step": 39123
    },
    {
      "epoch": 0.000238787841796875,
      "step": 39123,
      "training_step_time": 0.3662071228027344
    },
    {
      "epoch": 0.0002387939453125,
      "model_forward_time": 0.11446952819824219,
      "step": 39124
    },
    {
      "epoch": 0.0002387939453125,
      "step": 39124,
      "training_step_time": 0.38907790184020996
    },
    {
      "epoch": 0.000238800048828125,
      "model_forward_time": 0.11459851264953613,
      "step": 39125
    },
    {
      "epoch": 0.000238800048828125,
      "step": 39125,
      "training_step_time": 0.3798179626464844
    },
    {
      "epoch": 0.00023880615234375,
      "model_forward_time": 0.1155853271484375,
      "step": 39126
    },
    {
      "epoch": 0.00023880615234375,
      "step": 39126,
      "training_step_time": 0.39283108711242676
    },
    {
      "epoch": 0.000238812255859375,
      "model_forward_time": 0.11483311653137207,
      "step": 39127
    },
    {
      "epoch": 0.000238812255859375,
      "step": 39127,
      "training_step_time": 0.3988158702850342
    },
    {
      "epoch": 0.000238818359375,
      "model_forward_time": 0.1149897575378418,
      "step": 39128
    },
    {
      "epoch": 0.000238818359375,
      "step": 39128,
      "training_step_time": 0.4153733253479004
    },
    {
      "epoch": 0.000238824462890625,
      "model_forward_time": 0.11489725112915039,
      "step": 39129
    },
    {
      "epoch": 0.000238824462890625,
      "step": 39129,
      "training_step_time": 0.4495859146118164
    },
    {
      "epoch": 0.00023883056640625,
      "grad_norm": 0.08556961268186569,
      "learning_rate": 2.9587669688620988e-05,
      "loss": 0.036,
      "step": 39130
    },
    {
      "epoch": 0.00023883056640625,
      "model_forward_time": 0.11499714851379395,
      "step": 39130
    },
    {
      "epoch": 0.00023883056640625,
      "step": 39130,
      "training_step_time": 0.3931400775909424
    },
    {
      "epoch": 0.000238836669921875,
      "model_forward_time": 0.11498498916625977,
      "step": 39131
    },
    {
      "epoch": 0.000238836669921875,
      "step": 39131,
      "training_step_time": 0.44846034049987793
    },
    {
      "epoch": 0.0002388427734375,
      "model_forward_time": 0.11595916748046875,
      "step": 39132
    },
    {
      "epoch": 0.0002388427734375,
      "step": 39132,
      "training_step_time": 0.37909913063049316
    },
    {
      "epoch": 0.000238848876953125,
      "model_forward_time": 0.11552810668945312,
      "step": 39133
    },
    {
      "epoch": 0.000238848876953125,
      "step": 39133,
      "training_step_time": 0.4044058322906494
    },
    {
      "epoch": 0.00023885498046875,
      "model_forward_time": 0.1157224178314209,
      "step": 39134
    },
    {
      "epoch": 0.00023885498046875,
      "step": 39134,
      "training_step_time": 0.38190460205078125
    },
    {
      "epoch": 0.000238861083984375,
      "model_forward_time": 0.11516189575195312,
      "step": 39135
    },
    {
      "epoch": 0.000238861083984375,
      "step": 39135,
      "training_step_time": 0.4665844440460205
    },
    {
      "epoch": 0.0002388671875,
      "model_forward_time": 0.11595273017883301,
      "step": 39136
    },
    {
      "epoch": 0.0002388671875,
      "step": 39136,
      "training_step_time": 0.49542808532714844
    },
    {
      "epoch": 0.000238873291015625,
      "model_forward_time": 0.11507153511047363,
      "step": 39137
    },
    {
      "epoch": 0.000238873291015625,
      "step": 39137,
      "training_step_time": 0.3886606693267822
    },
    {
      "epoch": 0.00023887939453125,
      "model_forward_time": 0.11553716659545898,
      "step": 39138
    },
    {
      "epoch": 0.00023887939453125,
      "step": 39138,
      "training_step_time": 0.39757561683654785
    },
    {
      "epoch": 0.000238885498046875,
      "model_forward_time": 0.11487817764282227,
      "step": 39139
    },
    {
      "epoch": 0.000238885498046875,
      "step": 39139,
      "training_step_time": 0.3906397819519043
    },
    {
      "epoch": 0.0002388916015625,
      "grad_norm": 0.11531779170036316,
      "learning_rate": 2.956251602735413e-05,
      "loss": 0.043,
      "step": 39140
    },
    {
      "epoch": 0.0002388916015625,
      "model_forward_time": 0.11419439315795898,
      "step": 39140
    },
    {
      "epoch": 0.0002388916015625,
      "step": 39140,
      "training_step_time": 0.385117769241333
    },
    {
      "epoch": 0.000238897705078125,
      "model_forward_time": 0.11546969413757324,
      "step": 39141
    },
    {
      "epoch": 0.000238897705078125,
      "step": 39141,
      "training_step_time": 0.45016026496887207
    },
    {
      "epoch": 0.00023890380859375,
      "model_forward_time": 0.11519789695739746,
      "step": 39142
    },
    {
      "epoch": 0.00023890380859375,
      "step": 39142,
      "training_step_time": 0.39081311225891113
    },
    {
      "epoch": 0.000238909912109375,
      "model_forward_time": 0.11583065986633301,
      "step": 39143
    },
    {
      "epoch": 0.000238909912109375,
      "step": 39143,
      "training_step_time": 0.39215612411499023
    },
    {
      "epoch": 0.000238916015625,
      "model_forward_time": 0.1156778335571289,
      "step": 39144
    },
    {
      "epoch": 0.000238916015625,
      "step": 39144,
      "training_step_time": 0.4399592876434326
    },
    {
      "epoch": 0.000238922119140625,
      "model_forward_time": 0.11596369743347168,
      "step": 39145
    },
    {
      "epoch": 0.000238922119140625,
      "step": 39145,
      "training_step_time": 0.4024925231933594
    },
    {
      "epoch": 0.00023892822265625,
      "model_forward_time": 0.11590933799743652,
      "step": 39146
    },
    {
      "epoch": 0.00023892822265625,
      "step": 39146,
      "training_step_time": 0.46351051330566406
    },
    {
      "epoch": 0.000238934326171875,
      "model_forward_time": 0.11585354804992676,
      "step": 39147
    },
    {
      "epoch": 0.000238934326171875,
      "step": 39147,
      "training_step_time": 0.4410405158996582
    },
    {
      "epoch": 0.0002389404296875,
      "model_forward_time": 0.11617040634155273,
      "step": 39148
    },
    {
      "epoch": 0.0002389404296875,
      "step": 39148,
      "training_step_time": 0.3832693099975586
    },
    {
      "epoch": 0.000238946533203125,
      "model_forward_time": 0.1155095100402832,
      "step": 39149
    },
    {
      "epoch": 0.000238946533203125,
      "step": 39149,
      "training_step_time": 0.38262367248535156
    },
    {
      "epoch": 0.00023895263671875,
      "grad_norm": 0.09634841978549957,
      "learning_rate": 2.9537368574455304e-05,
      "loss": 0.0381,
      "step": 39150
    },
    {
      "epoch": 0.00023895263671875,
      "model_forward_time": 0.1148996353149414,
      "step": 39150
    },
    {
      "epoch": 0.00023895263671875,
      "step": 39150,
      "training_step_time": 0.41929030418395996
    },
    {
      "epoch": 0.000238958740234375,
      "model_forward_time": 0.11538290977478027,
      "step": 39151
    },
    {
      "epoch": 0.000238958740234375,
      "step": 39151,
      "training_step_time": 0.4901275634765625
    },
    {
      "epoch": 0.00023896484375,
      "model_forward_time": 0.11544036865234375,
      "step": 39152
    },
    {
      "epoch": 0.00023896484375,
      "step": 39152,
      "training_step_time": 0.38274288177490234
    },
    {
      "epoch": 0.000238970947265625,
      "model_forward_time": 0.11498713493347168,
      "step": 39153
    },
    {
      "epoch": 0.000238970947265625,
      "step": 39153,
      "training_step_time": 0.3849611282348633
    },
    {
      "epoch": 0.00023897705078125,
      "model_forward_time": 0.11551117897033691,
      "step": 39154
    },
    {
      "epoch": 0.00023897705078125,
      "step": 39154,
      "training_step_time": 0.38825368881225586
    },
    {
      "epoch": 0.000238983154296875,
      "model_forward_time": 0.11554574966430664,
      "step": 39155
    },
    {
      "epoch": 0.000238983154296875,
      "step": 39155,
      "training_step_time": 0.41694188117980957
    },
    {
      "epoch": 0.0002389892578125,
      "model_forward_time": 0.11590266227722168,
      "step": 39156
    },
    {
      "epoch": 0.0002389892578125,
      "step": 39156,
      "training_step_time": 0.40165281295776367
    },
    {
      "epoch": 0.000238995361328125,
      "model_forward_time": 0.1152336597442627,
      "step": 39157
    },
    {
      "epoch": 0.000238995361328125,
      "step": 39157,
      "training_step_time": 0.3965415954589844
    },
    {
      "epoch": 0.00023900146484375,
      "model_forward_time": 0.11509180068969727,
      "step": 39158
    },
    {
      "epoch": 0.00023900146484375,
      "step": 39158,
      "training_step_time": 0.3840055465698242
    },
    {
      "epoch": 0.000239007568359375,
      "model_forward_time": 0.1153872013092041,
      "step": 39159
    },
    {
      "epoch": 0.000239007568359375,
      "step": 39159,
      "training_step_time": 0.7270221710205078
    },
    {
      "epoch": 0.000239013671875,
      "grad_norm": 0.17481206357479095,
      "learning_rate": 2.9512227337563604e-05,
      "loss": 0.039,
      "step": 39160
    },
    {
      "epoch": 0.000239013671875,
      "model_forward_time": 0.11473965644836426,
      "step": 39160
    },
    {
      "epoch": 0.000239013671875,
      "step": 39160,
      "training_step_time": 0.43866753578186035
    },
    {
      "epoch": 0.000239019775390625,
      "model_forward_time": 0.11472201347351074,
      "step": 39161
    },
    {
      "epoch": 0.000239019775390625,
      "step": 39161,
      "training_step_time": 0.49533772468566895
    },
    {
      "epoch": 0.00023902587890625,
      "model_forward_time": 0.11458325386047363,
      "step": 39162
    },
    {
      "epoch": 0.00023902587890625,
      "step": 39162,
      "training_step_time": 0.3831915855407715
    },
    {
      "epoch": 0.000239031982421875,
      "model_forward_time": 0.11453819274902344,
      "step": 39163
    },
    {
      "epoch": 0.000239031982421875,
      "step": 39163,
      "training_step_time": 0.41569042205810547
    },
    {
      "epoch": 0.0002390380859375,
      "model_forward_time": 0.11425256729125977,
      "step": 39164
    },
    {
      "epoch": 0.0002390380859375,
      "step": 39164,
      "training_step_time": 0.3928987979888916
    },
    {
      "epoch": 0.000239044189453125,
      "model_forward_time": 0.11474108695983887,
      "step": 39165
    },
    {
      "epoch": 0.000239044189453125,
      "step": 39165,
      "training_step_time": 0.4033522605895996
    },
    {
      "epoch": 0.00023905029296875,
      "model_forward_time": 0.11442017555236816,
      "step": 39166
    },
    {
      "epoch": 0.00023905029296875,
      "step": 39166,
      "training_step_time": 0.3884727954864502
    },
    {
      "epoch": 0.000239056396484375,
      "model_forward_time": 0.11561870574951172,
      "step": 39167
    },
    {
      "epoch": 0.000239056396484375,
      "step": 39167,
      "training_step_time": 0.3976459503173828
    },
    {
      "epoch": 0.0002390625,
      "model_forward_time": 0.11542820930480957,
      "step": 39168
    },
    {
      "epoch": 0.0002390625,
      "step": 39168,
      "training_step_time": 0.402738094329834
    },
    {
      "epoch": 0.000239068603515625,
      "model_forward_time": 0.11541748046875,
      "step": 39169
    },
    {
      "epoch": 0.000239068603515625,
      "step": 39169,
      "training_step_time": 0.42642736434936523
    },
    {
      "epoch": 0.00023907470703125,
      "grad_norm": 0.10828130692243576,
      "learning_rate": 2.948709232431631e-05,
      "loss": 0.0367,
      "step": 39170
    },
    {
      "epoch": 0.00023907470703125,
      "model_forward_time": 0.11528444290161133,
      "step": 39170
    },
    {
      "epoch": 0.00023907470703125,
      "step": 39170,
      "training_step_time": 0.38866758346557617
    },
    {
      "epoch": 0.000239080810546875,
      "model_forward_time": 0.11578702926635742,
      "step": 39171
    },
    {
      "epoch": 0.000239080810546875,
      "step": 39171,
      "training_step_time": 0.4791707992553711
    },
    {
      "epoch": 0.0002390869140625,
      "model_forward_time": 0.11558771133422852,
      "step": 39172
    },
    {
      "epoch": 0.0002390869140625,
      "step": 39172,
      "training_step_time": 0.38771677017211914
    },
    {
      "epoch": 0.000239093017578125,
      "model_forward_time": 0.11540770530700684,
      "step": 39173
    },
    {
      "epoch": 0.000239093017578125,
      "step": 39173,
      "training_step_time": 0.41095566749572754
    },
    {
      "epoch": 0.00023909912109375,
      "model_forward_time": 0.1150362491607666,
      "step": 39174
    },
    {
      "epoch": 0.00023909912109375,
      "step": 39174,
      "training_step_time": 0.4362678527832031
    },
    {
      "epoch": 0.000239105224609375,
      "model_forward_time": 0.11460232734680176,
      "step": 39175
    },
    {
      "epoch": 0.000239105224609375,
      "step": 39175,
      "training_step_time": 0.4071693420410156
    },
    {
      "epoch": 0.000239111328125,
      "model_forward_time": 0.1153879165649414,
      "step": 39176
    },
    {
      "epoch": 0.000239111328125,
      "step": 39176,
      "training_step_time": 0.42853593826293945
    },
    {
      "epoch": 0.000239117431640625,
      "model_forward_time": 0.1154487133026123,
      "step": 39177
    },
    {
      "epoch": 0.000239117431640625,
      "step": 39177,
      "training_step_time": 0.7114791870117188
    },
    {
      "epoch": 0.00023912353515625,
      "model_forward_time": 0.11453938484191895,
      "step": 39178
    },
    {
      "epoch": 0.00023912353515625,
      "step": 39178,
      "training_step_time": 0.39278411865234375
    },
    {
      "epoch": 0.000239129638671875,
      "model_forward_time": 0.11451888084411621,
      "step": 39179
    },
    {
      "epoch": 0.000239129638671875,
      "step": 39179,
      "training_step_time": 0.4404184818267822
    },
    {
      "epoch": 0.0002391357421875,
      "grad_norm": 0.0859113410115242,
      "learning_rate": 2.9461963542348737e-05,
      "loss": 0.0333,
      "step": 39180
    },
    {
      "epoch": 0.0002391357421875,
      "model_forward_time": 0.11451578140258789,
      "step": 39180
    },
    {
      "epoch": 0.0002391357421875,
      "step": 39180,
      "training_step_time": 0.39182233810424805
    },
    {
      "epoch": 0.000239141845703125,
      "model_forward_time": 0.11495161056518555,
      "step": 39181
    },
    {
      "epoch": 0.000239141845703125,
      "step": 39181,
      "training_step_time": 0.42592787742614746
    },
    {
      "epoch": 0.00023914794921875,
      "model_forward_time": 0.11567163467407227,
      "step": 39182
    },
    {
      "epoch": 0.00023914794921875,
      "step": 39182,
      "training_step_time": 0.4303133487701416
    },
    {
      "epoch": 0.000239154052734375,
      "model_forward_time": 0.11514759063720703,
      "step": 39183
    },
    {
      "epoch": 0.000239154052734375,
      "step": 39183,
      "training_step_time": 0.5242252349853516
    },
    {
      "epoch": 0.00023916015625,
      "model_forward_time": 0.11460542678833008,
      "step": 39184
    },
    {
      "epoch": 0.00023916015625,
      "step": 39184,
      "training_step_time": 0.386582612991333
    },
    {
      "epoch": 0.000239166259765625,
      "model_forward_time": 0.11607718467712402,
      "step": 39185
    },
    {
      "epoch": 0.000239166259765625,
      "step": 39185,
      "training_step_time": 0.38164830207824707
    },
    {
      "epoch": 0.00023917236328125,
      "model_forward_time": 0.11466813087463379,
      "step": 39186
    },
    {
      "epoch": 0.00023917236328125,
      "step": 39186,
      "training_step_time": 0.3901376724243164
    },
    {
      "epoch": 0.000239178466796875,
      "model_forward_time": 0.11526226997375488,
      "step": 39187
    },
    {
      "epoch": 0.000239178466796875,
      "step": 39187,
      "training_step_time": 0.44740772247314453
    },
    {
      "epoch": 0.0002391845703125,
      "model_forward_time": 0.11542105674743652,
      "step": 39188
    },
    {
      "epoch": 0.0002391845703125,
      "step": 39188,
      "training_step_time": 0.37885165214538574
    },
    {
      "epoch": 0.000239190673828125,
      "model_forward_time": 0.11498856544494629,
      "step": 39189
    },
    {
      "epoch": 0.000239190673828125,
      "step": 39189,
      "training_step_time": 0.7948238849639893
    },
    {
      "epoch": 0.00023919677734375,
      "grad_norm": 0.11128634959459305,
      "learning_rate": 2.943684099929436e-05,
      "loss": 0.0395,
      "step": 39190
    },
    {
      "epoch": 0.00023919677734375,
      "model_forward_time": 0.11520075798034668,
      "step": 39190
    },
    {
      "epoch": 0.00023919677734375,
      "step": 39190,
      "training_step_time": 0.38872313499450684
    },
    {
      "epoch": 0.000239202880859375,
      "model_forward_time": 0.11472415924072266,
      "step": 39191
    },
    {
      "epoch": 0.000239202880859375,
      "step": 39191,
      "training_step_time": 0.4192826747894287
    },
    {
      "epoch": 0.000239208984375,
      "model_forward_time": 0.1148829460144043,
      "step": 39192
    },
    {
      "epoch": 0.000239208984375,
      "step": 39192,
      "training_step_time": 0.4767165184020996
    },
    {
      "epoch": 0.000239215087890625,
      "model_forward_time": 0.11523175239562988,
      "step": 39193
    },
    {
      "epoch": 0.000239215087890625,
      "step": 39193,
      "training_step_time": 0.3917052745819092
    },
    {
      "epoch": 0.00023922119140625,
      "model_forward_time": 0.11429929733276367,
      "step": 39194
    },
    {
      "epoch": 0.00023922119140625,
      "step": 39194,
      "training_step_time": 0.38831353187561035
    },
    {
      "epoch": 0.000239227294921875,
      "model_forward_time": 0.1148991584777832,
      "step": 39195
    },
    {
      "epoch": 0.000239227294921875,
      "step": 39195,
      "training_step_time": 0.7101426124572754
    },
    {
      "epoch": 0.0002392333984375,
      "model_forward_time": 0.11454343795776367,
      "step": 39196
    },
    {
      "epoch": 0.0002392333984375,
      "step": 39196,
      "training_step_time": 0.37880945205688477
    },
    {
      "epoch": 0.000239239501953125,
      "model_forward_time": 0.11448788642883301,
      "step": 39197
    },
    {
      "epoch": 0.000239239501953125,
      "step": 39197,
      "training_step_time": 0.37305188179016113
    },
    {
      "epoch": 0.00023924560546875,
      "model_forward_time": 0.11464476585388184,
      "step": 39198
    },
    {
      "epoch": 0.00023924560546875,
      "step": 39198,
      "training_step_time": 0.3973388671875
    },
    {
      "epoch": 0.000239251708984375,
      "model_forward_time": 0.11467218399047852,
      "step": 39199
    },
    {
      "epoch": 0.000239251708984375,
      "step": 39199,
      "training_step_time": 0.3889341354370117
    },
    {
      "epoch": 0.0002392578125,
      "grad_norm": 0.12726715207099915,
      "learning_rate": 2.9411724702784758e-05,
      "loss": 0.0372,
      "step": 39200
    },
    {
      "epoch": 0.0002392578125,
      "model_forward_time": 0.11444664001464844,
      "step": 39200
    },
    {
      "epoch": 0.0002392578125,
      "step": 39200,
      "training_step_time": 0.43933963775634766
    },
    {
      "epoch": 0.000239263916015625,
      "model_forward_time": 0.11549067497253418,
      "step": 39201
    },
    {
      "epoch": 0.000239263916015625,
      "step": 39201,
      "training_step_time": 0.4393131732940674
    },
    {
      "epoch": 0.00023927001953125,
      "model_forward_time": 0.11504888534545898,
      "step": 39202
    },
    {
      "epoch": 0.00023927001953125,
      "step": 39202,
      "training_step_time": 0.468048095703125
    },
    {
      "epoch": 0.000239276123046875,
      "model_forward_time": 0.11485862731933594,
      "step": 39203
    },
    {
      "epoch": 0.000239276123046875,
      "step": 39203,
      "training_step_time": 0.5236899852752686
    },
    {
      "epoch": 0.0002392822265625,
      "model_forward_time": 0.11562895774841309,
      "step": 39204
    },
    {
      "epoch": 0.0002392822265625,
      "step": 39204,
      "training_step_time": 0.4356682300567627
    },
    {
      "epoch": 0.000239288330078125,
      "model_forward_time": 0.11524319648742676,
      "step": 39205
    },
    {
      "epoch": 0.000239288330078125,
      "step": 39205,
      "training_step_time": 0.3811509609222412
    },
    {
      "epoch": 0.00023929443359375,
      "model_forward_time": 0.11455893516540527,
      "step": 39206
    },
    {
      "epoch": 0.00023929443359375,
      "step": 39206,
      "training_step_time": 0.44496607780456543
    },
    {
      "epoch": 0.000239300537109375,
      "model_forward_time": 0.11578774452209473,
      "step": 39207
    },
    {
      "epoch": 0.000239300537109375,
      "step": 39207,
      "training_step_time": 0.435366153717041
    },
    {
      "epoch": 0.000239306640625,
      "model_forward_time": 0.11485028266906738,
      "step": 39208
    },
    {
      "epoch": 0.000239306640625,
      "step": 39208,
      "training_step_time": 0.39195966720581055
    },
    {
      "epoch": 0.000239312744140625,
      "model_forward_time": 0.11508440971374512,
      "step": 39209
    },
    {
      "epoch": 0.000239312744140625,
      "step": 39209,
      "training_step_time": 0.3962976932525635
    },
    {
      "epoch": 0.00023931884765625,
      "grad_norm": 0.1520971804857254,
      "learning_rate": 2.9386614660449596e-05,
      "loss": 0.0348,
      "step": 39210
    },
    {
      "epoch": 0.00023931884765625,
      "model_forward_time": 0.11470580101013184,
      "step": 39210
    },
    {
      "epoch": 0.00023931884765625,
      "step": 39210,
      "training_step_time": 0.3913846015930176
    },
    {
      "epoch": 0.000239324951171875,
      "model_forward_time": 0.11540389060974121,
      "step": 39211
    },
    {
      "epoch": 0.000239324951171875,
      "step": 39211,
      "training_step_time": 0.3944051265716553
    },
    {
      "epoch": 0.0002393310546875,
      "model_forward_time": 0.11456441879272461,
      "step": 39212
    },
    {
      "epoch": 0.0002393310546875,
      "step": 39212,
      "training_step_time": 0.39813804626464844
    },
    {
      "epoch": 0.000239337158203125,
      "model_forward_time": 0.11409354209899902,
      "step": 39213
    },
    {
      "epoch": 0.000239337158203125,
      "step": 39213,
      "training_step_time": 0.38991236686706543
    },
    {
      "epoch": 0.00023934326171875,
      "model_forward_time": 0.11513471603393555,
      "step": 39214
    },
    {
      "epoch": 0.00023934326171875,
      "step": 39214,
      "training_step_time": 0.4156932830810547
    },
    {
      "epoch": 0.000239349365234375,
      "model_forward_time": 0.11574578285217285,
      "step": 39215
    },
    {
      "epoch": 0.000239349365234375,
      "step": 39215,
      "training_step_time": 0.39916229248046875
    },
    {
      "epoch": 0.00023935546875,
      "model_forward_time": 0.11551880836486816,
      "step": 39216
    },
    {
      "epoch": 0.00023935546875,
      "step": 39216,
      "training_step_time": 0.4583747386932373
    },
    {
      "epoch": 0.000239361572265625,
      "model_forward_time": 0.11503911018371582,
      "step": 39217
    },
    {
      "epoch": 0.000239361572265625,
      "step": 39217,
      "training_step_time": 0.47519516944885254
    },
    {
      "epoch": 0.00023936767578125,
      "model_forward_time": 0.11606049537658691,
      "step": 39218
    },
    {
      "epoch": 0.00023936767578125,
      "step": 39218,
      "training_step_time": 0.5207552909851074
    },
    {
      "epoch": 0.000239373779296875,
      "model_forward_time": 0.11475658416748047,
      "step": 39219
    },
    {
      "epoch": 0.000239373779296875,
      "step": 39219,
      "training_step_time": 0.4291677474975586
    },
    {
      "epoch": 0.0002393798828125,
      "grad_norm": 0.08734051138162613,
      "learning_rate": 2.936151087991663e-05,
      "loss": 0.0398,
      "step": 39220
    },
    {
      "epoch": 0.0002393798828125,
      "model_forward_time": 0.11560320854187012,
      "step": 39220
    },
    {
      "epoch": 0.0002393798828125,
      "step": 39220,
      "training_step_time": 0.42972826957702637
    },
    {
      "epoch": 0.000239385986328125,
      "model_forward_time": 0.11571812629699707,
      "step": 39221
    },
    {
      "epoch": 0.000239385986328125,
      "step": 39221,
      "training_step_time": 0.4426236152648926
    },
    {
      "epoch": 0.00023939208984375,
      "model_forward_time": 0.11504125595092773,
      "step": 39222
    },
    {
      "epoch": 0.00023939208984375,
      "step": 39222,
      "training_step_time": 0.38953638076782227
    },
    {
      "epoch": 0.000239398193359375,
      "model_forward_time": 0.11512160301208496,
      "step": 39223
    },
    {
      "epoch": 0.000239398193359375,
      "step": 39223,
      "training_step_time": 0.3970808982849121
    },
    {
      "epoch": 0.000239404296875,
      "model_forward_time": 0.11490392684936523,
      "step": 39224
    },
    {
      "epoch": 0.000239404296875,
      "step": 39224,
      "training_step_time": 0.39666223526000977
    },
    {
      "epoch": 0.000239410400390625,
      "model_forward_time": 0.11512112617492676,
      "step": 39225
    },
    {
      "epoch": 0.000239410400390625,
      "step": 39225,
      "training_step_time": 0.3897280693054199
    },
    {
      "epoch": 0.00023941650390625,
      "model_forward_time": 0.11649036407470703,
      "step": 39226
    },
    {
      "epoch": 0.00023941650390625,
      "step": 39226,
      "training_step_time": 0.38421130180358887
    },
    {
      "epoch": 0.000239422607421875,
      "model_forward_time": 0.11481666564941406,
      "step": 39227
    },
    {
      "epoch": 0.000239422607421875,
      "step": 39227,
      "training_step_time": 0.39812231063842773
    },
    {
      "epoch": 0.0002394287109375,
      "model_forward_time": 0.11575865745544434,
      "step": 39228
    },
    {
      "epoch": 0.0002394287109375,
      "step": 39228,
      "training_step_time": 0.3996543884277344
    },
    {
      "epoch": 0.000239434814453125,
      "model_forward_time": 0.11528873443603516,
      "step": 39229
    },
    {
      "epoch": 0.000239434814453125,
      "step": 39229,
      "training_step_time": 0.4584798812866211
    },
    {
      "epoch": 0.00023944091796875,
      "grad_norm": 0.08575169742107391,
      "learning_rate": 2.9336413368811723e-05,
      "loss": 0.0329,
      "step": 39230
    },
    {
      "epoch": 0.00023944091796875,
      "model_forward_time": 0.1152489185333252,
      "step": 39230
    },
    {
      "epoch": 0.00023944091796875,
      "step": 39230,
      "training_step_time": 0.4103729724884033
    },
    {
      "epoch": 0.000239447021484375,
      "model_forward_time": 0.11507129669189453,
      "step": 39231
    },
    {
      "epoch": 0.000239447021484375,
      "step": 39231,
      "training_step_time": 0.4454214572906494
    },
    {
      "epoch": 0.000239453125,
      "model_forward_time": 0.11543464660644531,
      "step": 39232
    },
    {
      "epoch": 0.000239453125,
      "step": 39232,
      "training_step_time": 0.4976804256439209
    },
    {
      "epoch": 0.000239459228515625,
      "model_forward_time": 0.11517524719238281,
      "step": 39233
    },
    {
      "epoch": 0.000239459228515625,
      "step": 39233,
      "training_step_time": 0.4251997470855713
    },
    {
      "epoch": 0.00023946533203125,
      "model_forward_time": 0.11473584175109863,
      "step": 39234
    },
    {
      "epoch": 0.00023946533203125,
      "step": 39234,
      "training_step_time": 0.4925873279571533
    },
    {
      "epoch": 0.000239471435546875,
      "model_forward_time": 0.11579251289367676,
      "step": 39235
    },
    {
      "epoch": 0.000239471435546875,
      "step": 39235,
      "training_step_time": 0.45961642265319824
    },
    {
      "epoch": 0.0002394775390625,
      "model_forward_time": 0.11493349075317383,
      "step": 39236
    },
    {
      "epoch": 0.0002394775390625,
      "step": 39236,
      "training_step_time": 0.40769362449645996
    },
    {
      "epoch": 0.000239483642578125,
      "model_forward_time": 0.11491847038269043,
      "step": 39237
    },
    {
      "epoch": 0.000239483642578125,
      "step": 39237,
      "training_step_time": 0.38671278953552246
    },
    {
      "epoch": 0.00023948974609375,
      "model_forward_time": 0.11517071723937988,
      "step": 39238
    },
    {
      "epoch": 0.00023948974609375,
      "step": 39238,
      "training_step_time": 0.38411784172058105
    },
    {
      "epoch": 0.000239495849609375,
      "model_forward_time": 0.11475300788879395,
      "step": 39239
    },
    {
      "epoch": 0.000239495849609375,
      "step": 39239,
      "training_step_time": 0.3847811222076416
    },
    {
      "epoch": 0.000239501953125,
      "grad_norm": 0.10869865119457245,
      "learning_rate": 2.931132213475884e-05,
      "loss": 0.0366,
      "step": 39240
    },
    {
      "epoch": 0.000239501953125,
      "model_forward_time": 0.11545944213867188,
      "step": 39240
    },
    {
      "epoch": 0.000239501953125,
      "step": 39240,
      "training_step_time": 0.3886268138885498
    },
    {
      "epoch": 0.000239508056640625,
      "model_forward_time": 0.11560201644897461,
      "step": 39241
    },
    {
      "epoch": 0.000239508056640625,
      "step": 39241,
      "training_step_time": 0.39833950996398926
    },
    {
      "epoch": 0.00023951416015625,
      "model_forward_time": 0.11471319198608398,
      "step": 39242
    },
    {
      "epoch": 0.00023951416015625,
      "step": 39242,
      "training_step_time": 0.4041006565093994
    },
    {
      "epoch": 0.000239520263671875,
      "model_forward_time": 0.11528849601745605,
      "step": 39243
    },
    {
      "epoch": 0.000239520263671875,
      "step": 39243,
      "training_step_time": 0.395125150680542
    },
    {
      "epoch": 0.0002395263671875,
      "model_forward_time": 0.11569786071777344,
      "step": 39244
    },
    {
      "epoch": 0.0002395263671875,
      "step": 39244,
      "training_step_time": 0.4172215461730957
    },
    {
      "epoch": 0.000239532470703125,
      "model_forward_time": 0.11480283737182617,
      "step": 39245
    },
    {
      "epoch": 0.000239532470703125,
      "step": 39245,
      "training_step_time": 0.4970240592956543
    },
    {
      "epoch": 0.00023953857421875,
      "model_forward_time": 0.11460733413696289,
      "step": 39246
    },
    {
      "epoch": 0.00023953857421875,
      "step": 39246,
      "training_step_time": 0.43604445457458496
    },
    {
      "epoch": 0.000239544677734375,
      "model_forward_time": 0.11526370048522949,
      "step": 39247
    },
    {
      "epoch": 0.000239544677734375,
      "step": 39247,
      "training_step_time": 0.5017213821411133
    },
    {
      "epoch": 0.00023955078125,
      "model_forward_time": 0.11486530303955078,
      "step": 39248
    },
    {
      "epoch": 0.00023955078125,
      "step": 39248,
      "training_step_time": 0.4824635982513428
    },
    {
      "epoch": 0.000239556884765625,
      "model_forward_time": 0.11497902870178223,
      "step": 39249
    },
    {
      "epoch": 0.000239556884765625,
      "step": 39249,
      "training_step_time": 0.39014434814453125
    },
    {
      "epoch": 0.00023956298828125,
      "grad_norm": 0.11386872082948685,
      "learning_rate": 2.928623718538006e-05,
      "loss": 0.0413,
      "step": 39250
    },
    {
      "epoch": 0.00023956298828125,
      "model_forward_time": 0.11477899551391602,
      "step": 39250
    },
    {
      "epoch": 0.00023956298828125,
      "step": 39250,
      "training_step_time": 0.3878209590911865
    },
    {
      "epoch": 0.000239569091796875,
      "model_forward_time": 0.11471700668334961,
      "step": 39251
    },
    {
      "epoch": 0.000239569091796875,
      "step": 39251,
      "training_step_time": 0.37718892097473145
    },
    {
      "epoch": 0.0002395751953125,
      "model_forward_time": 0.11488723754882812,
      "step": 39252
    },
    {
      "epoch": 0.0002395751953125,
      "step": 39252,
      "training_step_time": 0.39935994148254395
    },
    {
      "epoch": 0.000239581298828125,
      "model_forward_time": 0.11489272117614746,
      "step": 39253
    },
    {
      "epoch": 0.000239581298828125,
      "step": 39253,
      "training_step_time": 0.3933370113372803
    },
    {
      "epoch": 0.00023958740234375,
      "model_forward_time": 0.11569976806640625,
      "step": 39254
    },
    {
      "epoch": 0.00023958740234375,
      "step": 39254,
      "training_step_time": 0.39089417457580566
    },
    {
      "epoch": 0.000239593505859375,
      "model_forward_time": 0.11556267738342285,
      "step": 39255
    },
    {
      "epoch": 0.000239593505859375,
      "step": 39255,
      "training_step_time": 0.3900489807128906
    },
    {
      "epoch": 0.000239599609375,
      "model_forward_time": 0.11534595489501953,
      "step": 39256
    },
    {
      "epoch": 0.000239599609375,
      "step": 39256,
      "training_step_time": 0.3871326446533203
    },
    {
      "epoch": 0.000239605712890625,
      "model_forward_time": 0.11551666259765625,
      "step": 39257
    },
    {
      "epoch": 0.000239605712890625,
      "step": 39257,
      "training_step_time": 0.38330793380737305
    },
    {
      "epoch": 0.00023961181640625,
      "model_forward_time": 0.1165323257446289,
      "step": 39258
    },
    {
      "epoch": 0.00023961181640625,
      "step": 39258,
      "training_step_time": 0.48674488067626953
    },
    {
      "epoch": 0.000239617919921875,
      "model_forward_time": 0.1151130199432373,
      "step": 39259
    },
    {
      "epoch": 0.000239617919921875,
      "step": 39259,
      "training_step_time": 0.4070243835449219
    },
    {
      "epoch": 0.0002396240234375,
      "grad_norm": 0.11134961247444153,
      "learning_rate": 2.9261158528295495e-05,
      "loss": 0.0368,
      "step": 39260
    },
    {
      "epoch": 0.0002396240234375,
      "model_forward_time": 0.11673784255981445,
      "step": 39260
    },
    {
      "epoch": 0.0002396240234375,
      "step": 39260,
      "training_step_time": 0.3973052501678467
    },
    {
      "epoch": 0.000239630126953125,
      "model_forward_time": 0.11535954475402832,
      "step": 39261
    },
    {
      "epoch": 0.000239630126953125,
      "step": 39261,
      "training_step_time": 0.3784306049346924
    },
    {
      "epoch": 0.00023963623046875,
      "model_forward_time": 0.11550474166870117,
      "step": 39262
    },
    {
      "epoch": 0.00023963623046875,
      "step": 39262,
      "training_step_time": 0.4043848514556885
    },
    {
      "epoch": 0.000239642333984375,
      "model_forward_time": 0.11521744728088379,
      "step": 39263
    },
    {
      "epoch": 0.000239642333984375,
      "step": 39263,
      "training_step_time": 0.4034707546234131
    },
    {
      "epoch": 0.0002396484375,
      "model_forward_time": 0.1153569221496582,
      "step": 39264
    },
    {
      "epoch": 0.0002396484375,
      "step": 39264,
      "training_step_time": 0.3791773319244385
    },
    {
      "epoch": 0.000239654541015625,
      "model_forward_time": 0.11496162414550781,
      "step": 39265
    },
    {
      "epoch": 0.000239654541015625,
      "step": 39265,
      "training_step_time": 0.3994901180267334
    },
    {
      "epoch": 0.00023966064453125,
      "model_forward_time": 0.11566472053527832,
      "step": 39266
    },
    {
      "epoch": 0.00023966064453125,
      "step": 39266,
      "training_step_time": 0.38396263122558594
    },
    {
      "epoch": 0.000239666748046875,
      "model_forward_time": 0.11527657508850098,
      "step": 39267
    },
    {
      "epoch": 0.000239666748046875,
      "step": 39267,
      "training_step_time": 0.39156103134155273
    },
    {
      "epoch": 0.0002396728515625,
      "model_forward_time": 0.11514091491699219,
      "step": 39268
    },
    {
      "epoch": 0.0002396728515625,
      "step": 39268,
      "training_step_time": 0.39086079597473145
    },
    {
      "epoch": 0.000239678955078125,
      "model_forward_time": 0.11548709869384766,
      "step": 39269
    },
    {
      "epoch": 0.000239678955078125,
      "step": 39269,
      "training_step_time": 0.6164209842681885
    },
    {
      "epoch": 0.00023968505859375,
      "grad_norm": 0.11188460141420364,
      "learning_rate": 2.9236086171123404e-05,
      "loss": 0.0343,
      "step": 39270
    },
    {
      "epoch": 0.00023968505859375,
      "model_forward_time": 0.11481857299804688,
      "step": 39270
    },
    {
      "epoch": 0.00023968505859375,
      "step": 39270,
      "training_step_time": 0.39205312728881836
    },
    {
      "epoch": 0.000239691162109375,
      "model_forward_time": 0.1156768798828125,
      "step": 39271
    },
    {
      "epoch": 0.000239691162109375,
      "step": 39271,
      "training_step_time": 0.3919868469238281
    },
    {
      "epoch": 0.000239697265625,
      "model_forward_time": 0.11499881744384766,
      "step": 39272
    },
    {
      "epoch": 0.000239697265625,
      "step": 39272,
      "training_step_time": 0.3875269889831543
    },
    {
      "epoch": 0.000239703369140625,
      "model_forward_time": 0.11556196212768555,
      "step": 39273
    },
    {
      "epoch": 0.000239703369140625,
      "step": 39273,
      "training_step_time": 0.443756103515625
    },
    {
      "epoch": 0.00023970947265625,
      "model_forward_time": 0.11546707153320312,
      "step": 39274
    },
    {
      "epoch": 0.00023970947265625,
      "step": 39274,
      "training_step_time": 0.4069046974182129
    },
    {
      "epoch": 0.000239715576171875,
      "model_forward_time": 0.11560654640197754,
      "step": 39275
    },
    {
      "epoch": 0.000239715576171875,
      "step": 39275,
      "training_step_time": 0.4962620735168457
    },
    {
      "epoch": 0.0002397216796875,
      "model_forward_time": 0.11519145965576172,
      "step": 39276
    },
    {
      "epoch": 0.0002397216796875,
      "step": 39276,
      "training_step_time": 0.5111079216003418
    },
    {
      "epoch": 0.000239727783203125,
      "model_forward_time": 0.11532711982727051,
      "step": 39277
    },
    {
      "epoch": 0.000239727783203125,
      "step": 39277,
      "training_step_time": 0.42563295364379883
    },
    {
      "epoch": 0.00023973388671875,
      "model_forward_time": 0.11519098281860352,
      "step": 39278
    },
    {
      "epoch": 0.00023973388671875,
      "step": 39278,
      "training_step_time": 0.38674259185791016
    },
    {
      "epoch": 0.000239739990234375,
      "model_forward_time": 0.11515569686889648,
      "step": 39279
    },
    {
      "epoch": 0.000239739990234375,
      "step": 39279,
      "training_step_time": 0.38449764251708984
    },
    {
      "epoch": 0.00023974609375,
      "grad_norm": 0.10824894905090332,
      "learning_rate": 2.9211020121480083e-05,
      "loss": 0.0366,
      "step": 39280
    },
    {
      "epoch": 0.00023974609375,
      "model_forward_time": 0.11492347717285156,
      "step": 39280
    },
    {
      "epoch": 0.00023974609375,
      "step": 39280,
      "training_step_time": 0.3797180652618408
    },
    {
      "epoch": 0.000239752197265625,
      "model_forward_time": 0.11611437797546387,
      "step": 39281
    },
    {
      "epoch": 0.000239752197265625,
      "step": 39281,
      "training_step_time": 0.4926271438598633
    },
    {
      "epoch": 0.00023975830078125,
      "model_forward_time": 0.11512637138366699,
      "step": 39282
    },
    {
      "epoch": 0.00023975830078125,
      "step": 39282,
      "training_step_time": 0.39559412002563477
    },
    {
      "epoch": 0.000239764404296875,
      "model_forward_time": 0.1157386302947998,
      "step": 39283
    },
    {
      "epoch": 0.000239764404296875,
      "step": 39283,
      "training_step_time": 0.3884871006011963
    },
    {
      "epoch": 0.0002397705078125,
      "model_forward_time": 0.1153557300567627,
      "step": 39284
    },
    {
      "epoch": 0.0002397705078125,
      "step": 39284,
      "training_step_time": 0.3988161087036133
    },
    {
      "epoch": 0.000239776611328125,
      "model_forward_time": 0.11562609672546387,
      "step": 39285
    },
    {
      "epoch": 0.000239776611328125,
      "step": 39285,
      "training_step_time": 0.3816261291503906
    },
    {
      "epoch": 0.00023978271484375,
      "model_forward_time": 0.11731791496276855,
      "step": 39286
    },
    {
      "epoch": 0.00023978271484375,
      "step": 39286,
      "training_step_time": 0.3802452087402344
    },
    {
      "epoch": 0.000239788818359375,
      "model_forward_time": 0.11592626571655273,
      "step": 39287
    },
    {
      "epoch": 0.000239788818359375,
      "step": 39287,
      "training_step_time": 0.584942102432251
    },
    {
      "epoch": 0.000239794921875,
      "model_forward_time": 0.11554241180419922,
      "step": 39288
    },
    {
      "epoch": 0.000239794921875,
      "step": 39288,
      "training_step_time": 0.3994278907775879
    },
    {
      "epoch": 0.000239801025390625,
      "model_forward_time": 0.11528539657592773,
      "step": 39289
    },
    {
      "epoch": 0.000239801025390625,
      "step": 39289,
      "training_step_time": 0.4456615447998047
    },
    {
      "epoch": 0.00023980712890625,
      "grad_norm": 0.13633839786052704,
      "learning_rate": 2.918596038697995e-05,
      "loss": 0.039,
      "step": 39290
    },
    {
      "epoch": 0.00023980712890625,
      "model_forward_time": 0.11522841453552246,
      "step": 39290
    },
    {
      "epoch": 0.00023980712890625,
      "step": 39290,
      "training_step_time": 0.4145970344543457
    },
    {
      "epoch": 0.000239813232421875,
      "model_forward_time": 0.11516094207763672,
      "step": 39291
    },
    {
      "epoch": 0.000239813232421875,
      "step": 39291,
      "training_step_time": 0.4850802421569824
    },
    {
      "epoch": 0.0002398193359375,
      "model_forward_time": 0.11606192588806152,
      "step": 39292
    },
    {
      "epoch": 0.0002398193359375,
      "step": 39292,
      "training_step_time": 0.38610005378723145
    },
    {
      "epoch": 0.000239825439453125,
      "model_forward_time": 0.11543059349060059,
      "step": 39293
    },
    {
      "epoch": 0.000239825439453125,
      "step": 39293,
      "training_step_time": 0.45586347579956055
    },
    {
      "epoch": 0.00023983154296875,
      "model_forward_time": 0.11463689804077148,
      "step": 39294
    },
    {
      "epoch": 0.00023983154296875,
      "step": 39294,
      "training_step_time": 0.3888437747955322
    },
    {
      "epoch": 0.000239837646484375,
      "model_forward_time": 0.11575150489807129,
      "step": 39295
    },
    {
      "epoch": 0.000239837646484375,
      "step": 39295,
      "training_step_time": 0.3910026550292969
    },
    {
      "epoch": 0.00023984375,
      "model_forward_time": 0.11507630348205566,
      "step": 39296
    },
    {
      "epoch": 0.00023984375,
      "step": 39296,
      "training_step_time": 0.38507580757141113
    },
    {
      "epoch": 0.000239849853515625,
      "model_forward_time": 0.11533379554748535,
      "step": 39297
    },
    {
      "epoch": 0.000239849853515625,
      "step": 39297,
      "training_step_time": 0.386707067489624
    },
    {
      "epoch": 0.00023985595703125,
      "model_forward_time": 0.11584329605102539,
      "step": 39298
    },
    {
      "epoch": 0.00023985595703125,
      "step": 39298,
      "training_step_time": 0.40344691276550293
    },
    {
      "epoch": 0.000239862060546875,
      "model_forward_time": 0.11595892906188965,
      "step": 39299
    },
    {
      "epoch": 0.000239862060546875,
      "step": 39299,
      "training_step_time": 0.6098942756652832
    },
    {
      "epoch": 0.0002398681640625,
      "grad_norm": 0.1150837317109108,
      "learning_rate": 2.916090697523549e-05,
      "loss": 0.0363,
      "step": 39300
    },
    {
      "epoch": 0.0002398681640625,
      "model_forward_time": 0.11506533622741699,
      "step": 39300
    },
    {
      "epoch": 0.0002398681640625,
      "step": 39300,
      "training_step_time": 0.39310550689697266
    },
    {
      "epoch": 0.000239874267578125,
      "model_forward_time": 0.11543798446655273,
      "step": 39301
    },
    {
      "epoch": 0.000239874267578125,
      "step": 39301,
      "training_step_time": 0.39353394508361816
    },
    {
      "epoch": 0.00023988037109375,
      "model_forward_time": 0.1158897876739502,
      "step": 39302
    },
    {
      "epoch": 0.00023988037109375,
      "step": 39302,
      "training_step_time": 0.46392130851745605
    },
    {
      "epoch": 0.000239886474609375,
      "model_forward_time": 0.11484408378601074,
      "step": 39303
    },
    {
      "epoch": 0.000239886474609375,
      "step": 39303,
      "training_step_time": 0.3674910068511963
    },
    {
      "epoch": 0.000239892578125,
      "model_forward_time": 0.11471176147460938,
      "step": 39304
    },
    {
      "epoch": 0.000239892578125,
      "step": 39304,
      "training_step_time": 0.46150660514831543
    },
    {
      "epoch": 0.000239898681640625,
      "model_forward_time": 0.11453127861022949,
      "step": 39305
    },
    {
      "epoch": 0.000239898681640625,
      "step": 39305,
      "training_step_time": 0.4063279628753662
    },
    {
      "epoch": 0.00023990478515625,
      "model_forward_time": 0.1151723861694336,
      "step": 39306
    },
    {
      "epoch": 0.00023990478515625,
      "step": 39306,
      "training_step_time": 0.4809741973876953
    },
    {
      "epoch": 0.000239910888671875,
      "model_forward_time": 0.11578607559204102,
      "step": 39307
    },
    {
      "epoch": 0.000239910888671875,
      "step": 39307,
      "training_step_time": 0.3799431324005127
    },
    {
      "epoch": 0.0002399169921875,
      "model_forward_time": 0.11475062370300293,
      "step": 39308
    },
    {
      "epoch": 0.0002399169921875,
      "step": 39308,
      "training_step_time": 0.37915563583374023
    },
    {
      "epoch": 0.000239923095703125,
      "model_forward_time": 0.114959716796875,
      "step": 39309
    },
    {
      "epoch": 0.000239923095703125,
      "step": 39309,
      "training_step_time": 0.3833498954772949
    },
    {
      "epoch": 0.00023992919921875,
      "grad_norm": 0.09624709188938141,
      "learning_rate": 2.9135859893857248e-05,
      "loss": 0.0418,
      "step": 39310
    },
    {
      "epoch": 0.00023992919921875,
      "model_forward_time": 0.1163933277130127,
      "step": 39310
    },
    {
      "epoch": 0.00023992919921875,
      "step": 39310,
      "training_step_time": 0.3787996768951416
    },
    {
      "epoch": 0.000239935302734375,
      "model_forward_time": 0.11486363410949707,
      "step": 39311
    },
    {
      "epoch": 0.000239935302734375,
      "step": 39311,
      "training_step_time": 0.38033413887023926
    },
    {
      "epoch": 0.00023994140625,
      "model_forward_time": 0.11554217338562012,
      "step": 39312
    },
    {
      "epoch": 0.00023994140625,
      "step": 39312,
      "training_step_time": 0.38350415229797363
    },
    {
      "epoch": 0.000239947509765625,
      "model_forward_time": 0.11564803123474121,
      "step": 39313
    },
    {
      "epoch": 0.000239947509765625,
      "step": 39313,
      "training_step_time": 0.39899420738220215
    },
    {
      "epoch": 0.00023995361328125,
      "model_forward_time": 0.11545896530151367,
      "step": 39314
    },
    {
      "epoch": 0.00023995361328125,
      "step": 39314,
      "training_step_time": 0.3849661350250244
    },
    {
      "epoch": 0.000239959716796875,
      "model_forward_time": 0.1153564453125,
      "step": 39315
    },
    {
      "epoch": 0.000239959716796875,
      "step": 39315,
      "training_step_time": 0.38800811767578125
    },
    {
      "epoch": 0.0002399658203125,
      "model_forward_time": 0.11551952362060547,
      "step": 39316
    },
    {
      "epoch": 0.0002399658203125,
      "step": 39316,
      "training_step_time": 0.41561341285705566
    },
    {
      "epoch": 0.000239971923828125,
      "model_forward_time": 0.11507153511047363,
      "step": 39317
    },
    {
      "epoch": 0.000239971923828125,
      "step": 39317,
      "training_step_time": 0.4238588809967041
    },
    {
      "epoch": 0.00023997802734375,
      "model_forward_time": 0.1156158447265625,
      "step": 39318
    },
    {
      "epoch": 0.00023997802734375,
      "step": 39318,
      "training_step_time": 0.43700480461120605
    },
    {
      "epoch": 0.000239984130859375,
      "model_forward_time": 0.1154329776763916,
      "step": 39319
    },
    {
      "epoch": 0.000239984130859375,
      "step": 39319,
      "training_step_time": 0.44878315925598145
    },
    {
      "epoch": 0.000239990234375,
      "grad_norm": 0.13921116292476654,
      "learning_rate": 2.9110819150453927e-05,
      "loss": 0.0398,
      "step": 39320
    },
    {
      "epoch": 0.000239990234375,
      "model_forward_time": 0.11627888679504395,
      "step": 39320
    },
    {
      "epoch": 0.000239990234375,
      "step": 39320,
      "training_step_time": 0.4438357353210449
    },
    {
      "epoch": 0.000239996337890625,
      "model_forward_time": 0.11519479751586914,
      "step": 39321
    },
    {
      "epoch": 0.000239996337890625,
      "step": 39321,
      "training_step_time": 0.43071961402893066
    },
    {
      "epoch": 0.00024000244140625,
      "model_forward_time": 0.11478924751281738,
      "step": 39322
    },
    {
      "epoch": 0.00024000244140625,
      "step": 39322,
      "training_step_time": 0.38434386253356934
    },
    {
      "epoch": 0.000240008544921875,
      "model_forward_time": 0.11562013626098633,
      "step": 39323
    },
    {
      "epoch": 0.000240008544921875,
      "step": 39323,
      "training_step_time": 0.3932771682739258
    },
    {
      "epoch": 0.0002400146484375,
      "model_forward_time": 0.11544299125671387,
      "step": 39324
    },
    {
      "epoch": 0.0002400146484375,
      "step": 39324,
      "training_step_time": 0.41049766540527344
    },
    {
      "epoch": 0.000240020751953125,
      "model_forward_time": 0.11516499519348145,
      "step": 39325
    },
    {
      "epoch": 0.000240020751953125,
      "step": 39325,
      "training_step_time": 0.39904332160949707
    },
    {
      "epoch": 0.00024002685546875,
      "model_forward_time": 0.11567258834838867,
      "step": 39326
    },
    {
      "epoch": 0.00024002685546875,
      "step": 39326,
      "training_step_time": 0.39736247062683105
    },
    {
      "epoch": 0.000240032958984375,
      "model_forward_time": 0.11565637588500977,
      "step": 39327
    },
    {
      "epoch": 0.000240032958984375,
      "step": 39327,
      "training_step_time": 0.3931865692138672
    },
    {
      "epoch": 0.0002400390625,
      "model_forward_time": 0.1152491569519043,
      "step": 39328
    },
    {
      "epoch": 0.0002400390625,
      "step": 39328,
      "training_step_time": 0.3921339511871338
    },
    {
      "epoch": 0.000240045166015625,
      "model_forward_time": 0.11568021774291992,
      "step": 39329
    },
    {
      "epoch": 0.000240045166015625,
      "step": 39329,
      "training_step_time": 0.3885767459869385
    },
    {
      "epoch": 0.00024005126953125,
      "grad_norm": 0.1298312395811081,
      "learning_rate": 2.9085784752632157e-05,
      "loss": 0.0359,
      "step": 39330
    },
    {
      "epoch": 0.00024005126953125,
      "model_forward_time": 0.115325927734375,
      "step": 39330
    },
    {
      "epoch": 0.00024005126953125,
      "step": 39330,
      "training_step_time": 0.4268498420715332
    },
    {
      "epoch": 0.000240057373046875,
      "model_forward_time": 0.11589407920837402,
      "step": 39331
    },
    {
      "epoch": 0.000240057373046875,
      "step": 39331,
      "training_step_time": 0.4178500175476074
    },
    {
      "epoch": 0.0002400634765625,
      "model_forward_time": 0.11562299728393555,
      "step": 39332
    },
    {
      "epoch": 0.0002400634765625,
      "step": 39332,
      "training_step_time": 0.45806360244750977
    },
    {
      "epoch": 0.000240069580078125,
      "model_forward_time": 0.11559629440307617,
      "step": 39333
    },
    {
      "epoch": 0.000240069580078125,
      "step": 39333,
      "training_step_time": 0.781649112701416
    },
    {
      "epoch": 0.00024007568359375,
      "model_forward_time": 0.11461234092712402,
      "step": 39334
    },
    {
      "epoch": 0.00024007568359375,
      "step": 39334,
      "training_step_time": 0.4795546531677246
    },
    {
      "epoch": 0.000240081787109375,
      "model_forward_time": 0.11397933959960938,
      "step": 39335
    },
    {
      "epoch": 0.000240081787109375,
      "step": 39335,
      "training_step_time": 0.3763160705566406
    },
    {
      "epoch": 0.000240087890625,
      "model_forward_time": 0.11435294151306152,
      "step": 39336
    },
    {
      "epoch": 0.000240087890625,
      "step": 39336,
      "training_step_time": 0.3849034309387207
    },
    {
      "epoch": 0.000240093994140625,
      "model_forward_time": 0.11405062675476074,
      "step": 39337
    },
    {
      "epoch": 0.000240093994140625,
      "step": 39337,
      "training_step_time": 0.38944172859191895
    },
    {
      "epoch": 0.00024010009765625,
      "model_forward_time": 0.11420035362243652,
      "step": 39338
    },
    {
      "epoch": 0.00024010009765625,
      "step": 39338,
      "training_step_time": 0.3902146816253662
    },
    {
      "epoch": 0.000240106201171875,
      "model_forward_time": 0.11506009101867676,
      "step": 39339
    },
    {
      "epoch": 0.000240106201171875,
      "step": 39339,
      "training_step_time": 0.43255066871643066
    },
    {
      "epoch": 0.0002401123046875,
      "grad_norm": 0.14106903970241547,
      "learning_rate": 2.9060756707996796e-05,
      "loss": 0.0462,
      "step": 39340
    },
    {
      "epoch": 0.0002401123046875,
      "model_forward_time": 0.11514639854431152,
      "step": 39340
    },
    {
      "epoch": 0.0002401123046875,
      "step": 39340,
      "training_step_time": 0.38805675506591797
    },
    {
      "epoch": 0.000240118408203125,
      "model_forward_time": 0.11495780944824219,
      "step": 39341
    },
    {
      "epoch": 0.000240118408203125,
      "step": 39341,
      "training_step_time": 0.37891244888305664
    },
    {
      "epoch": 0.00024012451171875,
      "model_forward_time": 0.11522650718688965,
      "step": 39342
    },
    {
      "epoch": 0.00024012451171875,
      "step": 39342,
      "training_step_time": 0.3955259323120117
    },
    {
      "epoch": 0.000240130615234375,
      "model_forward_time": 0.11533236503601074,
      "step": 39343
    },
    {
      "epoch": 0.000240130615234375,
      "step": 39343,
      "training_step_time": 0.4541623592376709
    },
    {
      "epoch": 0.00024013671875,
      "model_forward_time": 0.1148381233215332,
      "step": 39344
    },
    {
      "epoch": 0.00024013671875,
      "step": 39344,
      "training_step_time": 0.4083833694458008
    },
    {
      "epoch": 0.000240142822265625,
      "model_forward_time": 0.11506056785583496,
      "step": 39345
    },
    {
      "epoch": 0.000240142822265625,
      "step": 39345,
      "training_step_time": 0.647890567779541
    },
    {
      "epoch": 0.00024014892578125,
      "model_forward_time": 0.11470985412597656,
      "step": 39346
    },
    {
      "epoch": 0.00024014892578125,
      "step": 39346,
      "training_step_time": 0.36525702476501465
    },
    {
      "epoch": 0.000240155029296875,
      "model_forward_time": 0.11494731903076172,
      "step": 39347
    },
    {
      "epoch": 0.000240155029296875,
      "step": 39347,
      "training_step_time": 0.442699670791626
    },
    {
      "epoch": 0.0002401611328125,
      "model_forward_time": 0.11469006538391113,
      "step": 39348
    },
    {
      "epoch": 0.0002401611328125,
      "step": 39348,
      "training_step_time": 0.46666502952575684
    },
    {
      "epoch": 0.000240167236328125,
      "model_forward_time": 0.11480951309204102,
      "step": 39349
    },
    {
      "epoch": 0.000240167236328125,
      "step": 39349,
      "training_step_time": 0.38503074645996094
    },
    {
      "epoch": 0.00024017333984375,
      "grad_norm": 0.13494046032428741,
      "learning_rate": 2.9035735024150674e-05,
      "loss": 0.0354,
      "step": 39350
    },
    {
      "epoch": 0.00024017333984375,
      "model_forward_time": 0.11412239074707031,
      "step": 39350
    },
    {
      "epoch": 0.00024017333984375,
      "step": 39350,
      "training_step_time": 0.3875424861907959
    },
    {
      "epoch": 0.000240179443359375,
      "model_forward_time": 0.1150217056274414,
      "step": 39351
    },
    {
      "epoch": 0.000240179443359375,
      "step": 39351,
      "training_step_time": 0.4251716136932373
    },
    {
      "epoch": 0.000240185546875,
      "model_forward_time": 0.11519026756286621,
      "step": 39352
    },
    {
      "epoch": 0.000240185546875,
      "step": 39352,
      "training_step_time": 0.38869142532348633
    },
    {
      "epoch": 0.000240191650390625,
      "model_forward_time": 0.11501288414001465,
      "step": 39353
    },
    {
      "epoch": 0.000240191650390625,
      "step": 39353,
      "training_step_time": 0.3955214023590088
    },
    {
      "epoch": 0.00024019775390625,
      "model_forward_time": 0.11436629295349121,
      "step": 39354
    },
    {
      "epoch": 0.00024019775390625,
      "step": 39354,
      "training_step_time": 0.3912374973297119
    },
    {
      "epoch": 0.000240203857421875,
      "model_forward_time": 0.11479783058166504,
      "step": 39355
    },
    {
      "epoch": 0.000240203857421875,
      "step": 39355,
      "training_step_time": 0.39347219467163086
    },
    {
      "epoch": 0.0002402099609375,
      "model_forward_time": 0.11496853828430176,
      "step": 39356
    },
    {
      "epoch": 0.0002402099609375,
      "step": 39356,
      "training_step_time": 0.4284641742706299
    },
    {
      "epoch": 0.000240216064453125,
      "model_forward_time": 0.11537480354309082,
      "step": 39357
    },
    {
      "epoch": 0.000240216064453125,
      "step": 39357,
      "training_step_time": 0.4074833393096924
    },
    {
      "epoch": 0.00024022216796875,
      "model_forward_time": 0.11529803276062012,
      "step": 39358
    },
    {
      "epoch": 0.00024022216796875,
      "step": 39358,
      "training_step_time": 0.39212679862976074
    },
    {
      "epoch": 0.000240228271484375,
      "model_forward_time": 0.1151285171508789,
      "step": 39359
    },
    {
      "epoch": 0.000240228271484375,
      "step": 39359,
      "training_step_time": 0.3811368942260742
    },
    {
      "epoch": 0.000240234375,
      "grad_norm": 0.11392629891633987,
      "learning_rate": 2.9010719708694722e-05,
      "loss": 0.0347,
      "step": 39360
    },
    {
      "epoch": 0.000240234375,
      "model_forward_time": 0.11475610733032227,
      "step": 39360
    },
    {
      "epoch": 0.000240234375,
      "step": 39360,
      "training_step_time": 0.3945732116699219
    },
    {
      "epoch": 0.000240240478515625,
      "model_forward_time": 0.11565518379211426,
      "step": 39361
    },
    {
      "epoch": 0.000240240478515625,
      "step": 39361,
      "training_step_time": 0.4400162696838379
    },
    {
      "epoch": 0.00024024658203125,
      "model_forward_time": 0.11615967750549316,
      "step": 39362
    },
    {
      "epoch": 0.00024024658203125,
      "step": 39362,
      "training_step_time": 0.4540884494781494
    },
    {
      "epoch": 0.000240252685546875,
      "model_forward_time": 0.11640596389770508,
      "step": 39363
    },
    {
      "epoch": 0.000240252685546875,
      "step": 39363,
      "training_step_time": 0.5528793334960938
    },
    {
      "epoch": 0.0002402587890625,
      "model_forward_time": 0.1196281909942627,
      "step": 39364
    },
    {
      "epoch": 0.0002402587890625,
      "step": 39364,
      "training_step_time": 0.5918543338775635
    },
    {
      "epoch": 0.000240264892578125,
      "model_forward_time": 0.11960291862487793,
      "step": 39365
    },
    {
      "epoch": 0.000240264892578125,
      "step": 39365,
      "training_step_time": 0.6542079448699951
    },
    {
      "epoch": 0.00024027099609375,
      "model_forward_time": 0.11589574813842773,
      "step": 39366
    },
    {
      "epoch": 0.00024027099609375,
      "step": 39366,
      "training_step_time": 0.6259753704071045
    },
    {
      "epoch": 0.000240277099609375,
      "model_forward_time": 0.11935210227966309,
      "step": 39367
    },
    {
      "epoch": 0.000240277099609375,
      "step": 39367,
      "training_step_time": 0.6897547245025635
    },
    {
      "epoch": 0.000240283203125,
      "model_forward_time": 0.12350606918334961,
      "step": 39368
    },
    {
      "epoch": 0.000240283203125,
      "step": 39368,
      "training_step_time": 0.6891109943389893
    },
    {
      "epoch": 0.000240289306640625,
      "model_forward_time": 0.12147045135498047,
      "step": 39369
    },
    {
      "epoch": 0.000240289306640625,
      "step": 39369,
      "training_step_time": 0.7215466499328613
    },
    {
      "epoch": 0.00024029541015625,
      "grad_norm": 0.11783138662576675,
      "learning_rate": 2.8985710769227936e-05,
      "loss": 0.0324,
      "step": 39370
    },
    {
      "epoch": 0.00024029541015625,
      "model_forward_time": 0.12097573280334473,
      "step": 39370
    },
    {
      "epoch": 0.00024029541015625,
      "step": 39370,
      "training_step_time": 0.6247894763946533
    },
    {
      "epoch": 0.000240301513671875,
      "model_forward_time": 0.1201627254486084,
      "step": 39371
    },
    {
      "epoch": 0.000240301513671875,
      "step": 39371,
      "training_step_time": 0.7386434078216553
    },
    {
      "epoch": 0.0002403076171875,
      "model_forward_time": 0.12337064743041992,
      "step": 39372
    },
    {
      "epoch": 0.0002403076171875,
      "step": 39372,
      "training_step_time": 0.7281155586242676
    },
    {
      "epoch": 0.000240313720703125,
      "model_forward_time": 0.12448263168334961,
      "step": 39373
    },
    {
      "epoch": 0.000240313720703125,
      "step": 39373,
      "training_step_time": 0.6520814895629883
    },
    {
      "epoch": 0.00024031982421875,
      "model_forward_time": 0.11986112594604492,
      "step": 39374
    },
    {
      "epoch": 0.00024031982421875,
      "step": 39374,
      "training_step_time": 0.6933989524841309
    },
    {
      "epoch": 0.000240325927734375,
      "model_forward_time": 0.12661170959472656,
      "step": 39375
    },
    {
      "epoch": 0.000240325927734375,
      "step": 39375,
      "training_step_time": 0.6111371517181396
    },
    {
      "epoch": 0.00024033203125,
      "model_forward_time": 0.12120604515075684,
      "step": 39376
    },
    {
      "epoch": 0.00024033203125,
      "step": 39376,
      "training_step_time": 0.7021260261535645
    },
    {
      "epoch": 0.000240338134765625,
      "model_forward_time": 0.12473273277282715,
      "step": 39377
    },
    {
      "epoch": 0.000240338134765625,
      "step": 39377,
      "training_step_time": 0.6573965549468994
    },
    {
      "epoch": 0.00024034423828125,
      "model_forward_time": 0.11722397804260254,
      "step": 39378
    },
    {
      "epoch": 0.00024034423828125,
      "step": 39378,
      "training_step_time": 0.6508102416992188
    },
    {
      "epoch": 0.000240350341796875,
      "model_forward_time": 0.12564444541931152,
      "step": 39379
    },
    {
      "epoch": 0.000240350341796875,
      "step": 39379,
      "training_step_time": 0.6469025611877441
    },
    {
      "epoch": 0.0002403564453125,
      "grad_norm": 0.11102177202701569,
      "learning_rate": 2.8960708213347366e-05,
      "loss": 0.0418,
      "step": 39380
    },
    {
      "epoch": 0.0002403564453125,
      "model_forward_time": 0.12058258056640625,
      "step": 39380
    },
    {
      "epoch": 0.0002403564453125,
      "step": 39380,
      "training_step_time": 0.6596033573150635
    },
    {
      "epoch": 0.000240362548828125,
      "model_forward_time": 0.12572026252746582,
      "step": 39381
    },
    {
      "epoch": 0.000240362548828125,
      "step": 39381,
      "training_step_time": 0.7129194736480713
    },
    {
      "epoch": 0.00024036865234375,
      "model_forward_time": 0.12383532524108887,
      "step": 39382
    },
    {
      "epoch": 0.00024036865234375,
      "step": 39382,
      "training_step_time": 0.7417147159576416
    },
    {
      "epoch": 0.000240374755859375,
      "model_forward_time": 0.123931884765625,
      "step": 39383
    },
    {
      "epoch": 0.000240374755859375,
      "step": 39383,
      "training_step_time": 0.6728155612945557
    },
    {
      "epoch": 0.000240380859375,
      "model_forward_time": 0.1159055233001709,
      "step": 39384
    },
    {
      "epoch": 0.000240380859375,
      "step": 39384,
      "training_step_time": 0.5874507427215576
    },
    {
      "epoch": 0.000240386962890625,
      "model_forward_time": 0.11848711967468262,
      "step": 39385
    },
    {
      "epoch": 0.000240386962890625,
      "step": 39385,
      "training_step_time": 0.6340575218200684
    },
    {
      "epoch": 0.00024039306640625,
      "model_forward_time": 0.11739087104797363,
      "step": 39386
    },
    {
      "epoch": 0.00024039306640625,
      "step": 39386,
      "training_step_time": 0.7422654628753662
    },
    {
      "epoch": 0.000240399169921875,
      "model_forward_time": 0.12338066101074219,
      "step": 39387
    },
    {
      "epoch": 0.000240399169921875,
      "step": 39387,
      "training_step_time": 0.6391019821166992
    },
    {
      "epoch": 0.0002404052734375,
      "model_forward_time": 0.1251533031463623,
      "step": 39388
    },
    {
      "epoch": 0.0002404052734375,
      "step": 39388,
      "training_step_time": 0.7498722076416016
    },
    {
      "epoch": 0.000240411376953125,
      "model_forward_time": 0.11943936347961426,
      "step": 39389
    },
    {
      "epoch": 0.000240411376953125,
      "step": 39389,
      "training_step_time": 0.6427407264709473
    },
    {
      "epoch": 0.00024041748046875,
      "grad_norm": 0.10711231082677841,
      "learning_rate": 2.8935712048648112e-05,
      "loss": 0.0422,
      "step": 39390
    },
    {
      "epoch": 0.00024041748046875,
      "model_forward_time": 0.11779522895812988,
      "step": 39390
    },
    {
      "epoch": 0.00024041748046875,
      "step": 39390,
      "training_step_time": 0.696580171585083
    },
    {
      "epoch": 0.000240423583984375,
      "model_forward_time": 0.11814522743225098,
      "step": 39391
    },
    {
      "epoch": 0.000240423583984375,
      "step": 39391,
      "training_step_time": 0.7677357196807861
    },
    {
      "epoch": 0.0002404296875,
      "model_forward_time": 0.11744093894958496,
      "step": 39392
    },
    {
      "epoch": 0.0002404296875,
      "step": 39392,
      "training_step_time": 0.6767857074737549
    },
    {
      "epoch": 0.000240435791015625,
      "model_forward_time": 0.12089157104492188,
      "step": 39393
    },
    {
      "epoch": 0.000240435791015625,
      "step": 39393,
      "training_step_time": 0.6622750759124756
    },
    {
      "epoch": 0.00024044189453125,
      "model_forward_time": 0.11696100234985352,
      "step": 39394
    },
    {
      "epoch": 0.00024044189453125,
      "step": 39394,
      "training_step_time": 0.6537628173828125
    },
    {
      "epoch": 0.000240447998046875,
      "model_forward_time": 0.11891555786132812,
      "step": 39395
    },
    {
      "epoch": 0.000240447998046875,
      "step": 39395,
      "training_step_time": 0.5940680503845215
    },
    {
      "epoch": 0.0002404541015625,
      "model_forward_time": 0.11647200584411621,
      "step": 39396
    },
    {
      "epoch": 0.0002404541015625,
      "step": 39396,
      "training_step_time": 0.7022697925567627
    },
    {
      "epoch": 0.000240460205078125,
      "model_forward_time": 0.11785554885864258,
      "step": 39397
    },
    {
      "epoch": 0.000240460205078125,
      "step": 39397,
      "training_step_time": 0.6588456630706787
    },
    {
      "epoch": 0.00024046630859375,
      "model_forward_time": 0.11746931076049805,
      "step": 39398
    },
    {
      "epoch": 0.00024046630859375,
      "step": 39398,
      "training_step_time": 0.729809045791626
    },
    {
      "epoch": 0.000240472412109375,
      "model_forward_time": 0.12007451057434082,
      "step": 39399
    },
    {
      "epoch": 0.000240472412109375,
      "step": 39399,
      "training_step_time": 0.7324745655059814
    },
    {
      "epoch": 0.000240478515625,
      "grad_norm": 0.11476461589336395,
      "learning_rate": 2.89107222827234e-05,
      "loss": 0.0408,
      "step": 39400
    },
    {
      "epoch": 0.000240478515625,
      "model_forward_time": 0.11905193328857422,
      "step": 39400
    },
    {
      "epoch": 0.000240478515625,
      "step": 39400,
      "training_step_time": 0.5883457660675049
    },
    {
      "epoch": 0.000240484619140625,
      "model_forward_time": 0.12147378921508789,
      "step": 39401
    },
    {
      "epoch": 0.000240484619140625,
      "step": 39401,
      "training_step_time": 0.7160928249359131
    },
    {
      "epoch": 0.00024049072265625,
      "model_forward_time": 0.11857390403747559,
      "step": 39402
    },
    {
      "epoch": 0.00024049072265625,
      "step": 39402,
      "training_step_time": 0.6373708248138428
    },
    {
      "epoch": 0.000240496826171875,
      "model_forward_time": 0.12620806694030762,
      "step": 39403
    },
    {
      "epoch": 0.000240496826171875,
      "step": 39403,
      "training_step_time": 0.6822285652160645
    },
    {
      "epoch": 0.0002405029296875,
      "model_forward_time": 0.12139487266540527,
      "step": 39404
    },
    {
      "epoch": 0.0002405029296875,
      "step": 39404,
      "training_step_time": 0.7020092010498047
    },
    {
      "epoch": 0.000240509033203125,
      "model_forward_time": 0.1251518726348877,
      "step": 39405
    },
    {
      "epoch": 0.000240509033203125,
      "step": 39405,
      "training_step_time": 0.6224799156188965
    },
    {
      "epoch": 0.00024051513671875,
      "model_forward_time": 0.1210014820098877,
      "step": 39406
    },
    {
      "epoch": 0.00024051513671875,
      "step": 39406,
      "training_step_time": 0.621406078338623
    },
    {
      "epoch": 0.000240521240234375,
      "model_forward_time": 0.12406158447265625,
      "step": 39407
    },
    {
      "epoch": 0.000240521240234375,
      "step": 39407,
      "training_step_time": 0.6037886142730713
    },
    {
      "epoch": 0.00024052734375,
      "model_forward_time": 0.1193845272064209,
      "step": 39408
    },
    {
      "epoch": 0.00024052734375,
      "step": 39408,
      "training_step_time": 0.8743853569030762
    },
    {
      "epoch": 0.000240533447265625,
      "model_forward_time": 0.12363839149475098,
      "step": 39409
    },
    {
      "epoch": 0.000240533447265625,
      "step": 39409,
      "training_step_time": 0.7302582263946533
    },
    {
      "epoch": 0.00024053955078125,
      "grad_norm": 0.11806251853704453,
      "learning_rate": 2.8885738923164395e-05,
      "loss": 0.0416,
      "step": 39410
    },
    {
      "epoch": 0.00024053955078125,
      "model_forward_time": 0.12072896957397461,
      "step": 39410
    },
    {
      "epoch": 0.00024053955078125,
      "step": 39410,
      "training_step_time": 0.6925978660583496
    },
    {
      "epoch": 0.000240545654296875,
      "model_forward_time": 0.12038087844848633,
      "step": 39411
    },
    {
      "epoch": 0.000240545654296875,
      "step": 39411,
      "training_step_time": 0.7273716926574707
    },
    {
      "epoch": 0.0002405517578125,
      "model_forward_time": 0.11908674240112305,
      "step": 39412
    },
    {
      "epoch": 0.0002405517578125,
      "step": 39412,
      "training_step_time": 0.6345665454864502
    },
    {
      "epoch": 0.000240557861328125,
      "model_forward_time": 0.11664414405822754,
      "step": 39413
    },
    {
      "epoch": 0.000240557861328125,
      "step": 39413,
      "training_step_time": 0.6516201496124268
    },
    {
      "epoch": 0.00024056396484375,
      "model_forward_time": 0.11690163612365723,
      "step": 39414
    },
    {
      "epoch": 0.00024056396484375,
      "step": 39414,
      "training_step_time": 0.633556604385376
    },
    {
      "epoch": 0.000240570068359375,
      "model_forward_time": 0.11979985237121582,
      "step": 39415
    },
    {
      "epoch": 0.000240570068359375,
      "step": 39415,
      "training_step_time": 0.6392467021942139
    },
    {
      "epoch": 0.000240576171875,
      "model_forward_time": 0.11987972259521484,
      "step": 39416
    },
    {
      "epoch": 0.000240576171875,
      "step": 39416,
      "training_step_time": 0.6814711093902588
    },
    {
      "epoch": 0.000240582275390625,
      "model_forward_time": 0.12585806846618652,
      "step": 39417
    },
    {
      "epoch": 0.000240582275390625,
      "step": 39417,
      "training_step_time": 0.6601111888885498
    },
    {
      "epoch": 0.00024058837890625,
      "model_forward_time": 0.11859440803527832,
      "step": 39418
    },
    {
      "epoch": 0.00024058837890625,
      "step": 39418,
      "training_step_time": 0.6719355583190918
    },
    {
      "epoch": 0.000240594482421875,
      "model_forward_time": 0.1186985969543457,
      "step": 39419
    },
    {
      "epoch": 0.000240594482421875,
      "step": 39419,
      "training_step_time": 0.611382246017456
    },
    {
      "epoch": 0.0002406005859375,
      "grad_norm": 0.13473467528820038,
      "learning_rate": 2.8860761977560436e-05,
      "loss": 0.0382,
      "step": 39420
    },
    {
      "epoch": 0.0002406005859375,
      "model_forward_time": 0.11911129951477051,
      "step": 39420
    },
    {
      "epoch": 0.0002406005859375,
      "step": 39420,
      "training_step_time": 0.6875054836273193
    },
    {
      "epoch": 0.000240606689453125,
      "model_forward_time": 0.1221768856048584,
      "step": 39421
    },
    {
      "epoch": 0.000240606689453125,
      "step": 39421,
      "training_step_time": 0.7105281352996826
    },
    {
      "epoch": 0.00024061279296875,
      "model_forward_time": 0.12423324584960938,
      "step": 39422
    },
    {
      "epoch": 0.00024061279296875,
      "step": 39422,
      "training_step_time": 0.5665552616119385
    },
    {
      "epoch": 0.000240618896484375,
      "model_forward_time": 0.12415313720703125,
      "step": 39423
    },
    {
      "epoch": 0.000240618896484375,
      "step": 39423,
      "training_step_time": 0.6662213802337646
    },
    {
      "epoch": 0.000240625,
      "model_forward_time": 0.12393379211425781,
      "step": 39424
    },
    {
      "epoch": 0.000240625,
      "step": 39424,
      "training_step_time": 0.6795022487640381
    },
    {
      "epoch": 0.000240631103515625,
      "model_forward_time": 0.11742162704467773,
      "step": 39425
    },
    {
      "epoch": 0.000240631103515625,
      "step": 39425,
      "training_step_time": 0.6019821166992188
    },
    {
      "epoch": 0.00024063720703125,
      "model_forward_time": 0.12176036834716797,
      "step": 39426
    },
    {
      "epoch": 0.00024063720703125,
      "step": 39426,
      "training_step_time": 0.7303657531738281
    },
    {
      "epoch": 0.000240643310546875,
      "model_forward_time": 0.12116026878356934,
      "step": 39427
    },
    {
      "epoch": 0.000240643310546875,
      "step": 39427,
      "training_step_time": 0.6156105995178223
    },
    {
      "epoch": 0.0002406494140625,
      "model_forward_time": 0.11629652976989746,
      "step": 39428
    },
    {
      "epoch": 0.0002406494140625,
      "step": 39428,
      "training_step_time": 0.7175893783569336
    },
    {
      "epoch": 0.000240655517578125,
      "model_forward_time": 0.11738228797912598,
      "step": 39429
    },
    {
      "epoch": 0.000240655517578125,
      "step": 39429,
      "training_step_time": 0.7939093112945557
    },
    {
      "epoch": 0.00024066162109375,
      "grad_norm": 0.10680718719959259,
      "learning_rate": 2.883579145349884e-05,
      "loss": 0.0431,
      "step": 39430
    },
    {
      "epoch": 0.00024066162109375,
      "model_forward_time": 0.11847996711730957,
      "step": 39430
    },
    {
      "epoch": 0.00024066162109375,
      "step": 39430,
      "training_step_time": 0.653019905090332
    },
    {
      "epoch": 0.000240667724609375,
      "model_forward_time": 0.11898088455200195,
      "step": 39431
    },
    {
      "epoch": 0.000240667724609375,
      "step": 39431,
      "training_step_time": 0.6621174812316895
    },
    {
      "epoch": 0.000240673828125,
      "model_forward_time": 0.12346005439758301,
      "step": 39432
    },
    {
      "epoch": 0.000240673828125,
      "step": 39432,
      "training_step_time": 0.6127669811248779
    },
    {
      "epoch": 0.000240679931640625,
      "model_forward_time": 0.11761856079101562,
      "step": 39433
    },
    {
      "epoch": 0.000240679931640625,
      "step": 39433,
      "training_step_time": 0.5848345756530762
    },
    {
      "epoch": 0.00024068603515625,
      "model_forward_time": 0.11986494064331055,
      "step": 39434
    },
    {
      "epoch": 0.00024068603515625,
      "step": 39434,
      "training_step_time": 0.5911190509796143
    },
    {
      "epoch": 0.000240692138671875,
      "model_forward_time": 0.11892533302307129,
      "step": 39435
    },
    {
      "epoch": 0.000240692138671875,
      "step": 39435,
      "training_step_time": 0.501579761505127
    },
    {
      "epoch": 0.0002406982421875,
      "model_forward_time": 0.11904764175415039,
      "step": 39436
    },
    {
      "epoch": 0.0002406982421875,
      "step": 39436,
      "training_step_time": 0.48934173583984375
    },
    {
      "epoch": 0.000240704345703125,
      "model_forward_time": 0.12066197395324707,
      "step": 39437
    },
    {
      "epoch": 0.000240704345703125,
      "step": 39437,
      "training_step_time": 0.6441073417663574
    },
    {
      "epoch": 0.00024071044921875,
      "model_forward_time": 0.11707282066345215,
      "step": 39438
    },
    {
      "epoch": 0.00024071044921875,
      "step": 39438,
      "training_step_time": 0.5297598838806152
    },
    {
      "epoch": 0.000240716552734375,
      "model_forward_time": 0.11646842956542969,
      "step": 39439
    },
    {
      "epoch": 0.000240716552734375,
      "step": 39439,
      "training_step_time": 0.5559298992156982
    },
    {
      "epoch": 0.00024072265625,
      "grad_norm": 0.14992974698543549,
      "learning_rate": 2.881082735856499e-05,
      "loss": 0.044,
      "step": 39440
    },
    {
      "epoch": 0.00024072265625,
      "model_forward_time": 0.11601400375366211,
      "step": 39440
    },
    {
      "epoch": 0.00024072265625,
      "step": 39440,
      "training_step_time": 0.5220339298248291
    },
    {
      "epoch": 0.000240728759765625,
      "model_forward_time": 0.11535096168518066,
      "step": 39441
    },
    {
      "epoch": 0.000240728759765625,
      "step": 39441,
      "training_step_time": 0.529442548751831
    },
    {
      "epoch": 0.00024073486328125,
      "model_forward_time": 0.11470985412597656,
      "step": 39442
    },
    {
      "epoch": 0.00024073486328125,
      "step": 39442,
      "training_step_time": 0.4038701057434082
    },
    {
      "epoch": 0.000240740966796875,
      "model_forward_time": 0.11453127861022949,
      "step": 39443
    },
    {
      "epoch": 0.000240740966796875,
      "step": 39443,
      "training_step_time": 0.38609910011291504
    },
    {
      "epoch": 0.0002407470703125,
      "model_forward_time": 0.11536431312561035,
      "step": 39444
    },
    {
      "epoch": 0.0002407470703125,
      "step": 39444,
      "training_step_time": 0.4016439914703369
    },
    {
      "epoch": 0.000240753173828125,
      "model_forward_time": 0.11577725410461426,
      "step": 39445
    },
    {
      "epoch": 0.000240753173828125,
      "step": 39445,
      "training_step_time": 0.39887356758117676
    },
    {
      "epoch": 0.00024075927734375,
      "model_forward_time": 0.11478042602539062,
      "step": 39446
    },
    {
      "epoch": 0.00024075927734375,
      "step": 39446,
      "training_step_time": 0.39328622817993164
    },
    {
      "epoch": 0.000240765380859375,
      "model_forward_time": 0.11532115936279297,
      "step": 39447
    },
    {
      "epoch": 0.000240765380859375,
      "step": 39447,
      "training_step_time": 0.3769354820251465
    },
    {
      "epoch": 0.000240771484375,
      "model_forward_time": 0.11472463607788086,
      "step": 39448
    },
    {
      "epoch": 0.000240771484375,
      "step": 39448,
      "training_step_time": 0.3864414691925049
    },
    {
      "epoch": 0.000240777587890625,
      "model_forward_time": 0.11592841148376465,
      "step": 39449
    },
    {
      "epoch": 0.000240777587890625,
      "step": 39449,
      "training_step_time": 0.382953405380249
    },
    {
      "epoch": 0.00024078369140625,
      "grad_norm": 0.10940041393041611,
      "learning_rate": 2.878586970034232e-05,
      "loss": 0.0423,
      "step": 39450
    },
    {
      "epoch": 0.00024078369140625,
      "model_forward_time": 0.11660981178283691,
      "step": 39450
    },
    {
      "epoch": 0.00024078369140625,
      "step": 39450,
      "training_step_time": 0.39890480041503906
    },
    {
      "epoch": 0.000240789794921875,
      "model_forward_time": 0.11459493637084961,
      "step": 39451
    },
    {
      "epoch": 0.000240789794921875,
      "step": 39451,
      "training_step_time": 0.39024829864501953
    },
    {
      "epoch": 0.0002407958984375,
      "model_forward_time": 0.11573600769042969,
      "step": 39452
    },
    {
      "epoch": 0.0002407958984375,
      "step": 39452,
      "training_step_time": 0.3930845260620117
    },
    {
      "epoch": 0.000240802001953125,
      "model_forward_time": 0.11533021926879883,
      "step": 39453
    },
    {
      "epoch": 0.000240802001953125,
      "step": 39453,
      "training_step_time": 0.4103200435638428
    },
    {
      "epoch": 0.00024080810546875,
      "model_forward_time": 0.11488127708435059,
      "step": 39454
    },
    {
      "epoch": 0.00024080810546875,
      "step": 39454,
      "training_step_time": 0.36866307258605957
    },
    {
      "epoch": 0.000240814208984375,
      "model_forward_time": 0.11546850204467773,
      "step": 39455
    },
    {
      "epoch": 0.000240814208984375,
      "step": 39455,
      "training_step_time": 0.46697378158569336
    },
    {
      "epoch": 0.0002408203125,
      "model_forward_time": 0.11631059646606445,
      "step": 39456
    },
    {
      "epoch": 0.0002408203125,
      "step": 39456,
      "training_step_time": 0.4756932258605957
    },
    {
      "epoch": 0.000240826416015625,
      "model_forward_time": 0.11499571800231934,
      "step": 39457
    },
    {
      "epoch": 0.000240826416015625,
      "step": 39457,
      "training_step_time": 0.3964064121246338
    },
    {
      "epoch": 0.00024083251953125,
      "model_forward_time": 0.1150519847869873,
      "step": 39458
    },
    {
      "epoch": 0.00024083251953125,
      "step": 39458,
      "training_step_time": 0.3905148506164551
    },
    {
      "epoch": 0.000240838623046875,
      "model_forward_time": 0.11497688293457031,
      "step": 39459
    },
    {
      "epoch": 0.000240838623046875,
      "step": 39459,
      "training_step_time": 0.38338541984558105
    },
    {
      "epoch": 0.0002408447265625,
      "grad_norm": 0.10071386396884918,
      "learning_rate": 2.8760918486412292e-05,
      "loss": 0.0412,
      "step": 39460
    },
    {
      "epoch": 0.0002408447265625,
      "model_forward_time": 0.11486625671386719,
      "step": 39460
    },
    {
      "epoch": 0.0002408447265625,
      "step": 39460,
      "training_step_time": 0.3913719654083252
    },
    {
      "epoch": 0.000240850830078125,
      "model_forward_time": 0.11597847938537598,
      "step": 39461
    },
    {
      "epoch": 0.000240850830078125,
      "step": 39461,
      "training_step_time": 0.39397382736206055
    },
    {
      "epoch": 0.00024085693359375,
      "model_forward_time": 0.11478853225708008,
      "step": 39462
    },
    {
      "epoch": 0.00024085693359375,
      "step": 39462,
      "training_step_time": 0.3922717571258545
    },
    {
      "epoch": 0.000240863037109375,
      "model_forward_time": 0.1151266098022461,
      "step": 39463
    },
    {
      "epoch": 0.000240863037109375,
      "step": 39463,
      "training_step_time": 0.3958313465118408
    },
    {
      "epoch": 0.000240869140625,
      "model_forward_time": 0.11507129669189453,
      "step": 39464
    },
    {
      "epoch": 0.000240869140625,
      "step": 39464,
      "training_step_time": 0.3817298412322998
    },
    {
      "epoch": 0.000240875244140625,
      "model_forward_time": 0.11524558067321777,
      "step": 39465
    },
    {
      "epoch": 0.000240875244140625,
      "step": 39465,
      "training_step_time": 0.4015767574310303
    },
    {
      "epoch": 0.00024088134765625,
      "model_forward_time": 0.11536908149719238,
      "step": 39466
    },
    {
      "epoch": 0.00024088134765625,
      "step": 39466,
      "training_step_time": 0.39235591888427734
    },
    {
      "epoch": 0.000240887451171875,
      "model_forward_time": 0.11594557762145996,
      "step": 39467
    },
    {
      "epoch": 0.000240887451171875,
      "step": 39467,
      "training_step_time": 0.41312718391418457
    },
    {
      "epoch": 0.0002408935546875,
      "model_forward_time": 0.11485886573791504,
      "step": 39468
    },
    {
      "epoch": 0.0002408935546875,
      "step": 39468,
      "training_step_time": 0.4169175624847412
    },
    {
      "epoch": 0.000240899658203125,
      "model_forward_time": 0.11529850959777832,
      "step": 39469
    },
    {
      "epoch": 0.000240899658203125,
      "step": 39469,
      "training_step_time": 0.36977052688598633
    },
    {
      "epoch": 0.00024090576171875,
      "grad_norm": 0.15171018242835999,
      "learning_rate": 2.8735973724354482e-05,
      "loss": 0.0407,
      "step": 39470
    },
    {
      "epoch": 0.00024090576171875,
      "model_forward_time": 0.11531352996826172,
      "step": 39470
    },
    {
      "epoch": 0.00024090576171875,
      "step": 39470,
      "training_step_time": 0.4591531753540039
    },
    {
      "epoch": 0.000240911865234375,
      "model_forward_time": 0.11564087867736816,
      "step": 39471
    },
    {
      "epoch": 0.000240911865234375,
      "step": 39471,
      "training_step_time": 0.39797019958496094
    },
    {
      "epoch": 0.00024091796875,
      "model_forward_time": 0.11538267135620117,
      "step": 39472
    },
    {
      "epoch": 0.00024091796875,
      "step": 39472,
      "training_step_time": 0.3939783573150635
    },
    {
      "epoch": 0.000240924072265625,
      "model_forward_time": 0.11588525772094727,
      "step": 39473
    },
    {
      "epoch": 0.000240924072265625,
      "step": 39473,
      "training_step_time": 0.390974760055542
    },
    {
      "epoch": 0.00024093017578125,
      "model_forward_time": 0.11502218246459961,
      "step": 39474
    },
    {
      "epoch": 0.00024093017578125,
      "step": 39474,
      "training_step_time": 0.39326930046081543
    },
    {
      "epoch": 0.000240936279296875,
      "model_forward_time": 0.11524391174316406,
      "step": 39475
    },
    {
      "epoch": 0.000240936279296875,
      "step": 39475,
      "training_step_time": 0.3901362419128418
    },
    {
      "epoch": 0.0002409423828125,
      "model_forward_time": 0.11499929428100586,
      "step": 39476
    },
    {
      "epoch": 0.0002409423828125,
      "step": 39476,
      "training_step_time": 0.39345407485961914
    },
    {
      "epoch": 0.000240948486328125,
      "model_forward_time": 0.11547279357910156,
      "step": 39477
    },
    {
      "epoch": 0.000240948486328125,
      "step": 39477,
      "training_step_time": 0.4226648807525635
    },
    {
      "epoch": 0.00024095458984375,
      "model_forward_time": 0.115234375,
      "step": 39478
    },
    {
      "epoch": 0.00024095458984375,
      "step": 39478,
      "training_step_time": 0.3975191116333008
    },
    {
      "epoch": 0.000240960693359375,
      "model_forward_time": 0.11518168449401855,
      "step": 39479
    },
    {
      "epoch": 0.000240960693359375,
      "step": 39479,
      "training_step_time": 0.39964866638183594
    },
    {
      "epoch": 0.000240966796875,
      "grad_norm": 0.08310326933860779,
      "learning_rate": 2.8711035421746367e-05,
      "loss": 0.0405,
      "step": 39480
    },
    {
      "epoch": 0.000240966796875,
      "model_forward_time": 0.11519575119018555,
      "step": 39480
    },
    {
      "epoch": 0.000240966796875,
      "step": 39480,
      "training_step_time": 0.3968040943145752
    },
    {
      "epoch": 0.000240972900390625,
      "model_forward_time": 0.11673903465270996,
      "step": 39481
    },
    {
      "epoch": 0.000240972900390625,
      "step": 39481,
      "training_step_time": 0.39557480812072754
    },
    {
      "epoch": 0.00024097900390625,
      "model_forward_time": 0.1154325008392334,
      "step": 39482
    },
    {
      "epoch": 0.00024097900390625,
      "step": 39482,
      "training_step_time": 0.3966991901397705
    },
    {
      "epoch": 0.000240985107421875,
      "model_forward_time": 0.11530900001525879,
      "step": 39483
    },
    {
      "epoch": 0.000240985107421875,
      "step": 39483,
      "training_step_time": 0.523186445236206
    },
    {
      "epoch": 0.0002409912109375,
      "model_forward_time": 0.1165158748626709,
      "step": 39484
    },
    {
      "epoch": 0.0002409912109375,
      "step": 39484,
      "training_step_time": 0.41748905181884766
    },
    {
      "epoch": 0.000240997314453125,
      "model_forward_time": 0.11478686332702637,
      "step": 39485
    },
    {
      "epoch": 0.000240997314453125,
      "step": 39485,
      "training_step_time": 0.45311641693115234
    },
    {
      "epoch": 0.00024100341796875,
      "model_forward_time": 0.11609482765197754,
      "step": 39486
    },
    {
      "epoch": 0.00024100341796875,
      "step": 39486,
      "training_step_time": 0.4052300453186035
    },
    {
      "epoch": 0.000241009521484375,
      "model_forward_time": 0.11583089828491211,
      "step": 39487
    },
    {
      "epoch": 0.000241009521484375,
      "step": 39487,
      "training_step_time": 0.3960273265838623
    },
    {
      "epoch": 0.000241015625,
      "model_forward_time": 0.11501598358154297,
      "step": 39488
    },
    {
      "epoch": 0.000241015625,
      "step": 39488,
      "training_step_time": 0.39377379417419434
    },
    {
      "epoch": 0.000241021728515625,
      "model_forward_time": 0.11536955833435059,
      "step": 39489
    },
    {
      "epoch": 0.000241021728515625,
      "step": 39489,
      "training_step_time": 0.39292430877685547
    },
    {
      "epoch": 0.00024102783203125,
      "grad_norm": 0.09725301712751389,
      "learning_rate": 2.8686103586163626e-05,
      "loss": 0.0431,
      "step": 39490
    },
    {
      "epoch": 0.00024102783203125,
      "model_forward_time": 0.11574101448059082,
      "step": 39490
    },
    {
      "epoch": 0.00024102783203125,
      "step": 39490,
      "training_step_time": 0.3922586441040039
    },
    {
      "epoch": 0.000241033935546875,
      "model_forward_time": 0.11562633514404297,
      "step": 39491
    },
    {
      "epoch": 0.000241033935546875,
      "step": 39491,
      "training_step_time": 0.384030818939209
    },
    {
      "epoch": 0.0002410400390625,
      "model_forward_time": 0.11577701568603516,
      "step": 39492
    },
    {
      "epoch": 0.0002410400390625,
      "step": 39492,
      "training_step_time": 0.4032902717590332
    },
    {
      "epoch": 0.000241046142578125,
      "model_forward_time": 0.1152341365814209,
      "step": 39493
    },
    {
      "epoch": 0.000241046142578125,
      "step": 39493,
      "training_step_time": 0.5101406574249268
    },
    {
      "epoch": 0.00024105224609375,
      "model_forward_time": 0.11464643478393555,
      "step": 39494
    },
    {
      "epoch": 0.00024105224609375,
      "step": 39494,
      "training_step_time": 0.3817024230957031
    },
    {
      "epoch": 0.000241058349609375,
      "model_forward_time": 0.11510634422302246,
      "step": 39495
    },
    {
      "epoch": 0.000241058349609375,
      "step": 39495,
      "training_step_time": 0.38640904426574707
    },
    {
      "epoch": 0.000241064453125,
      "model_forward_time": 0.11518049240112305,
      "step": 39496
    },
    {
      "epoch": 0.000241064453125,
      "step": 39496,
      "training_step_time": 0.4034538269042969
    },
    {
      "epoch": 0.000241070556640625,
      "model_forward_time": 0.11458635330200195,
      "step": 39497
    },
    {
      "epoch": 0.000241070556640625,
      "step": 39497,
      "training_step_time": 0.39777302742004395
    },
    {
      "epoch": 0.00024107666015625,
      "model_forward_time": 0.11575460433959961,
      "step": 39498
    },
    {
      "epoch": 0.00024107666015625,
      "step": 39498,
      "training_step_time": 0.48120927810668945
    },
    {
      "epoch": 0.000241082763671875,
      "model_forward_time": 0.11478829383850098,
      "step": 39499
    },
    {
      "epoch": 0.000241082763671875,
      "step": 39499,
      "training_step_time": 0.6874852180480957
    },
    {
      "epoch": 0.0002410888671875,
      "grad_norm": 0.11451467126607895,
      "learning_rate": 2.866117822517982e-05,
      "loss": 0.0398,
      "step": 39500
    },
    {
      "epoch": 0.0002410888671875,
      "model_forward_time": 0.11457157135009766,
      "step": 39500
    },
    {
      "epoch": 0.0002410888671875,
      "step": 39500,
      "training_step_time": 0.5134754180908203
    },
    {
      "epoch": 0.000241094970703125,
      "model_forward_time": 0.11446809768676758,
      "step": 39501
    },
    {
      "epoch": 0.000241094970703125,
      "step": 39501,
      "training_step_time": 0.3857917785644531
    },
    {
      "epoch": 0.00024110107421875,
      "model_forward_time": 0.1153106689453125,
      "step": 39502
    },
    {
      "epoch": 0.00024110107421875,
      "step": 39502,
      "training_step_time": 0.38832831382751465
    },
    {
      "epoch": 0.000241107177734375,
      "model_forward_time": 0.11476540565490723,
      "step": 39503
    },
    {
      "epoch": 0.000241107177734375,
      "step": 39503,
      "training_step_time": 0.3876326084136963
    },
    {
      "epoch": 0.00024111328125,
      "model_forward_time": 0.11490392684936523,
      "step": 39504
    },
    {
      "epoch": 0.00024111328125,
      "step": 39504,
      "training_step_time": 0.39182186126708984
    },
    {
      "epoch": 0.000241119384765625,
      "model_forward_time": 0.11515307426452637,
      "step": 39505
    },
    {
      "epoch": 0.000241119384765625,
      "step": 39505,
      "training_step_time": 0.4667055606842041
    },
    {
      "epoch": 0.00024112548828125,
      "model_forward_time": 0.11579346656799316,
      "step": 39506
    },
    {
      "epoch": 0.00024112548828125,
      "step": 39506,
      "training_step_time": 0.3952457904815674
    },
    {
      "epoch": 0.000241131591796875,
      "model_forward_time": 0.11548686027526855,
      "step": 39507
    },
    {
      "epoch": 0.000241131591796875,
      "step": 39507,
      "training_step_time": 0.3876311779022217
    },
    {
      "epoch": 0.0002411376953125,
      "model_forward_time": 0.11551046371459961,
      "step": 39508
    },
    {
      "epoch": 0.0002411376953125,
      "step": 39508,
      "training_step_time": 0.39072442054748535
    },
    {
      "epoch": 0.000241143798828125,
      "model_forward_time": 0.11433815956115723,
      "step": 39509
    },
    {
      "epoch": 0.000241143798828125,
      "step": 39509,
      "training_step_time": 0.44064950942993164
    },
    {
      "epoch": 0.00024114990234375,
      "grad_norm": 0.12968072295188904,
      "learning_rate": 2.8636259346366666e-05,
      "loss": 0.0439,
      "step": 39510
    },
    {
      "epoch": 0.00024114990234375,
      "model_forward_time": 0.11474990844726562,
      "step": 39510
    },
    {
      "epoch": 0.00024114990234375,
      "step": 39510,
      "training_step_time": 0.4063737392425537
    },
    {
      "epoch": 0.000241156005859375,
      "model_forward_time": 0.11505365371704102,
      "step": 39511
    },
    {
      "epoch": 0.000241156005859375,
      "step": 39511,
      "training_step_time": 0.6235406398773193
    },
    {
      "epoch": 0.000241162109375,
      "model_forward_time": 0.11571598052978516,
      "step": 39512
    },
    {
      "epoch": 0.000241162109375,
      "step": 39512,
      "training_step_time": 0.40311694145202637
    },
    {
      "epoch": 0.000241168212890625,
      "model_forward_time": 0.11507987976074219,
      "step": 39513
    },
    {
      "epoch": 0.000241168212890625,
      "step": 39513,
      "training_step_time": 0.45577073097229004
    },
    {
      "epoch": 0.00024117431640625,
      "model_forward_time": 0.11546611785888672,
      "step": 39514
    },
    {
      "epoch": 0.00024117431640625,
      "step": 39514,
      "training_step_time": 0.3987276554107666
    },
    {
      "epoch": 0.000241180419921875,
      "model_forward_time": 0.11579179763793945,
      "step": 39515
    },
    {
      "epoch": 0.000241180419921875,
      "step": 39515,
      "training_step_time": 0.5276303291320801
    },
    {
      "epoch": 0.0002411865234375,
      "model_forward_time": 0.11524200439453125,
      "step": 39516
    },
    {
      "epoch": 0.0002411865234375,
      "step": 39516,
      "training_step_time": 0.37977027893066406
    },
    {
      "epoch": 0.000241192626953125,
      "model_forward_time": 0.1157841682434082,
      "step": 39517
    },
    {
      "epoch": 0.000241192626953125,
      "step": 39517,
      "training_step_time": 0.5001771450042725
    },
    {
      "epoch": 0.00024119873046875,
      "model_forward_time": 0.11455655097961426,
      "step": 39518
    },
    {
      "epoch": 0.00024119873046875,
      "step": 39518,
      "training_step_time": 0.3851635456085205
    },
    {
      "epoch": 0.000241204833984375,
      "model_forward_time": 0.11419367790222168,
      "step": 39519
    },
    {
      "epoch": 0.000241204833984375,
      "step": 39519,
      "training_step_time": 0.4104022979736328
    },
    {
      "epoch": 0.0002412109375,
      "grad_norm": 0.1235218346118927,
      "learning_rate": 2.861134695729385e-05,
      "loss": 0.0415,
      "step": 39520
    },
    {
      "epoch": 0.0002412109375,
      "model_forward_time": 0.11478137969970703,
      "step": 39520
    },
    {
      "epoch": 0.0002412109375,
      "step": 39520,
      "training_step_time": 0.3844141960144043
    },
    {
      "epoch": 0.000241217041015625,
      "model_forward_time": 0.115386962890625,
      "step": 39521
    },
    {
      "epoch": 0.000241217041015625,
      "step": 39521,
      "training_step_time": 0.4954690933227539
    },
    {
      "epoch": 0.00024122314453125,
      "model_forward_time": 0.11508417129516602,
      "step": 39522
    },
    {
      "epoch": 0.00024122314453125,
      "step": 39522,
      "training_step_time": 0.42130398750305176
    },
    {
      "epoch": 0.000241229248046875,
      "model_forward_time": 0.11574125289916992,
      "step": 39523
    },
    {
      "epoch": 0.000241229248046875,
      "step": 39523,
      "training_step_time": 0.5133204460144043
    },
    {
      "epoch": 0.0002412353515625,
      "model_forward_time": 0.11458015441894531,
      "step": 39524
    },
    {
      "epoch": 0.0002412353515625,
      "step": 39524,
      "training_step_time": 0.38819193840026855
    },
    {
      "epoch": 0.000241241455078125,
      "model_forward_time": 0.11531543731689453,
      "step": 39525
    },
    {
      "epoch": 0.000241241455078125,
      "step": 39525,
      "training_step_time": 0.479022741317749
    },
    {
      "epoch": 0.00024124755859375,
      "model_forward_time": 0.11490225791931152,
      "step": 39526
    },
    {
      "epoch": 0.00024124755859375,
      "step": 39526,
      "training_step_time": 0.44255566596984863
    },
    {
      "epoch": 0.000241253662109375,
      "model_forward_time": 0.11649894714355469,
      "step": 39527
    },
    {
      "epoch": 0.000241253662109375,
      "step": 39527,
      "training_step_time": 0.4714493751525879
    },
    {
      "epoch": 0.000241259765625,
      "model_forward_time": 0.1154179573059082,
      "step": 39528
    },
    {
      "epoch": 0.000241259765625,
      "step": 39528,
      "training_step_time": 0.38792943954467773
    },
    {
      "epoch": 0.000241265869140625,
      "model_forward_time": 0.1149599552154541,
      "step": 39529
    },
    {
      "epoch": 0.000241265869140625,
      "step": 39529,
      "training_step_time": 0.39613866806030273
    },
    {
      "epoch": 0.00024127197265625,
      "grad_norm": 0.16560350358486176,
      "learning_rate": 2.858644106552909e-05,
      "loss": 0.0415,
      "step": 39530
    },
    {
      "epoch": 0.00024127197265625,
      "model_forward_time": 0.11504507064819336,
      "step": 39530
    },
    {
      "epoch": 0.00024127197265625,
      "step": 39530,
      "training_step_time": 0.40275073051452637
    },
    {
      "epoch": 0.000241278076171875,
      "model_forward_time": 0.11509895324707031,
      "step": 39531
    },
    {
      "epoch": 0.000241278076171875,
      "step": 39531,
      "training_step_time": 0.3949456214904785
    },
    {
      "epoch": 0.0002412841796875,
      "model_forward_time": 0.11573362350463867,
      "step": 39532
    },
    {
      "epoch": 0.0002412841796875,
      "step": 39532,
      "training_step_time": 0.3902089595794678
    },
    {
      "epoch": 0.000241290283203125,
      "model_forward_time": 0.11583113670349121,
      "step": 39533
    },
    {
      "epoch": 0.000241290283203125,
      "step": 39533,
      "training_step_time": 0.6747798919677734
    },
    {
      "epoch": 0.00024129638671875,
      "model_forward_time": 0.11496853828430176,
      "step": 39534
    },
    {
      "epoch": 0.00024129638671875,
      "step": 39534,
      "training_step_time": 0.4016902446746826
    },
    {
      "epoch": 0.000241302490234375,
      "model_forward_time": 0.11572623252868652,
      "step": 39535
    },
    {
      "epoch": 0.000241302490234375,
      "step": 39535,
      "training_step_time": 0.398151159286499
    },
    {
      "epoch": 0.00024130859375,
      "model_forward_time": 0.11468768119812012,
      "step": 39536
    },
    {
      "epoch": 0.00024130859375,
      "step": 39536,
      "training_step_time": 0.42439937591552734
    },
    {
      "epoch": 0.000241314697265625,
      "model_forward_time": 0.11506199836730957,
      "step": 39537
    },
    {
      "epoch": 0.000241314697265625,
      "step": 39537,
      "training_step_time": 0.4171180725097656
    },
    {
      "epoch": 0.00024132080078125,
      "model_forward_time": 0.1165916919708252,
      "step": 39538
    },
    {
      "epoch": 0.00024132080078125,
      "step": 39538,
      "training_step_time": 0.3827791213989258
    },
    {
      "epoch": 0.000241326904296875,
      "model_forward_time": 0.11560297012329102,
      "step": 39539
    },
    {
      "epoch": 0.000241326904296875,
      "step": 39539,
      "training_step_time": 0.4729926586151123
    },
    {
      "epoch": 0.0002413330078125,
      "grad_norm": 0.11906632781028748,
      "learning_rate": 2.8561541678638142e-05,
      "loss": 0.0444,
      "step": 39540
    },
    {
      "epoch": 0.0002413330078125,
      "model_forward_time": 0.11511111259460449,
      "step": 39540
    },
    {
      "epoch": 0.0002413330078125,
      "step": 39540,
      "training_step_time": 0.4210784435272217
    },
    {
      "epoch": 0.000241339111328125,
      "model_forward_time": 0.11688923835754395,
      "step": 39541
    },
    {
      "epoch": 0.000241339111328125,
      "step": 39541,
      "training_step_time": 0.48302650451660156
    },
    {
      "epoch": 0.00024134521484375,
      "model_forward_time": 0.11503434181213379,
      "step": 39542
    },
    {
      "epoch": 0.00024134521484375,
      "step": 39542,
      "training_step_time": 0.40694689750671387
    },
    {
      "epoch": 0.000241351318359375,
      "model_forward_time": 0.11481881141662598,
      "step": 39543
    },
    {
      "epoch": 0.000241351318359375,
      "step": 39543,
      "training_step_time": 0.44509434700012207
    },
    {
      "epoch": 0.000241357421875,
      "model_forward_time": 0.11475586891174316,
      "step": 39544
    },
    {
      "epoch": 0.000241357421875,
      "step": 39544,
      "training_step_time": 0.38661980628967285
    },
    {
      "epoch": 0.000241363525390625,
      "model_forward_time": 0.11562061309814453,
      "step": 39545
    },
    {
      "epoch": 0.000241363525390625,
      "step": 39545,
      "training_step_time": 0.4350709915161133
    },
    {
      "epoch": 0.00024136962890625,
      "model_forward_time": 0.11481833457946777,
      "step": 39546
    },
    {
      "epoch": 0.00024136962890625,
      "step": 39546,
      "training_step_time": 0.3838984966278076
    },
    {
      "epoch": 0.000241375732421875,
      "model_forward_time": 0.11568021774291992,
      "step": 39547
    },
    {
      "epoch": 0.000241375732421875,
      "step": 39547,
      "training_step_time": 0.39231300354003906
    },
    {
      "epoch": 0.0002413818359375,
      "model_forward_time": 0.11485934257507324,
      "step": 39548
    },
    {
      "epoch": 0.0002413818359375,
      "step": 39548,
      "training_step_time": 0.3805420398712158
    },
    {
      "epoch": 0.000241387939453125,
      "model_forward_time": 0.11481833457946777,
      "step": 39549
    },
    {
      "epoch": 0.000241387939453125,
      "step": 39549,
      "training_step_time": 0.45166540145874023
    },
    {
      "epoch": 0.00024139404296875,
      "grad_norm": 0.12325140088796616,
      "learning_rate": 2.8536648804184785e-05,
      "loss": 0.041,
      "step": 39550
    },
    {
      "epoch": 0.00024139404296875,
      "model_forward_time": 0.11545014381408691,
      "step": 39550
    },
    {
      "epoch": 0.00024139404296875,
      "step": 39550,
      "training_step_time": 0.41134214401245117
    },
    {
      "epoch": 0.000241400146484375,
      "model_forward_time": 0.11480355262756348,
      "step": 39551
    },
    {
      "epoch": 0.000241400146484375,
      "step": 39551,
      "training_step_time": 0.6359405517578125
    },
    {
      "epoch": 0.00024140625,
      "model_forward_time": 0.11512541770935059,
      "step": 39552
    },
    {
      "epoch": 0.00024140625,
      "step": 39552,
      "training_step_time": 0.4024357795715332
    },
    {
      "epoch": 0.000241412353515625,
      "model_forward_time": 0.11479544639587402,
      "step": 39553
    },
    {
      "epoch": 0.000241412353515625,
      "step": 39553,
      "training_step_time": 0.3957533836364746
    },
    {
      "epoch": 0.00024141845703125,
      "model_forward_time": 0.11482453346252441,
      "step": 39554
    },
    {
      "epoch": 0.00024141845703125,
      "step": 39554,
      "training_step_time": 0.44385671615600586
    },
    {
      "epoch": 0.000241424560546875,
      "model_forward_time": 0.11445879936218262,
      "step": 39555
    },
    {
      "epoch": 0.000241424560546875,
      "step": 39555,
      "training_step_time": 0.46222901344299316
    },
    {
      "epoch": 0.0002414306640625,
      "model_forward_time": 0.11482501029968262,
      "step": 39556
    },
    {
      "epoch": 0.0002414306640625,
      "step": 39556,
      "training_step_time": 0.4608023166656494
    },
    {
      "epoch": 0.000241436767578125,
      "model_forward_time": 0.1151738166809082,
      "step": 39557
    },
    {
      "epoch": 0.000241436767578125,
      "step": 39557,
      "training_step_time": 0.3923063278198242
    },
    {
      "epoch": 0.00024144287109375,
      "model_forward_time": 0.11507272720336914,
      "step": 39558
    },
    {
      "epoch": 0.00024144287109375,
      "step": 39558,
      "training_step_time": 0.3787410259246826
    },
    {
      "epoch": 0.000241448974609375,
      "model_forward_time": 0.11477136611938477,
      "step": 39559
    },
    {
      "epoch": 0.000241448974609375,
      "step": 39559,
      "training_step_time": 0.38862156867980957
    },
    {
      "epoch": 0.000241455078125,
      "grad_norm": 0.10795723646879196,
      "learning_rate": 2.8511762449730795e-05,
      "loss": 0.0378,
      "step": 39560
    },
    {
      "epoch": 0.000241455078125,
      "model_forward_time": 0.11471080780029297,
      "step": 39560
    },
    {
      "epoch": 0.000241455078125,
      "step": 39560,
      "training_step_time": 0.3860645294189453
    },
    {
      "epoch": 0.000241461181640625,
      "model_forward_time": 0.1154627799987793,
      "step": 39561
    },
    {
      "epoch": 0.000241461181640625,
      "step": 39561,
      "training_step_time": 0.39197778701782227
    },
    {
      "epoch": 0.00024146728515625,
      "model_forward_time": 0.11550283432006836,
      "step": 39562
    },
    {
      "epoch": 0.00024146728515625,
      "step": 39562,
      "training_step_time": 0.3853437900543213
    },
    {
      "epoch": 0.000241473388671875,
      "model_forward_time": 0.11568927764892578,
      "step": 39563
    },
    {
      "epoch": 0.000241473388671875,
      "step": 39563,
      "training_step_time": 0.6586499214172363
    },
    {
      "epoch": 0.0002414794921875,
      "model_forward_time": 0.11498212814331055,
      "step": 39564
    },
    {
      "epoch": 0.0002414794921875,
      "step": 39564,
      "training_step_time": 0.38480162620544434
    },
    {
      "epoch": 0.000241485595703125,
      "model_forward_time": 0.11471414566040039,
      "step": 39565
    },
    {
      "epoch": 0.000241485595703125,
      "step": 39565,
      "training_step_time": 0.3882138729095459
    },
    {
      "epoch": 0.00024149169921875,
      "model_forward_time": 0.11473941802978516,
      "step": 39566
    },
    {
      "epoch": 0.00024149169921875,
      "step": 39566,
      "training_step_time": 0.384552001953125
    },
    {
      "epoch": 0.000241497802734375,
      "model_forward_time": 0.11483383178710938,
      "step": 39567
    },
    {
      "epoch": 0.000241497802734375,
      "step": 39567,
      "training_step_time": 0.387192964553833
    },
    {
      "epoch": 0.00024150390625,
      "model_forward_time": 0.11516237258911133,
      "step": 39568
    },
    {
      "epoch": 0.00024150390625,
      "step": 39568,
      "training_step_time": 0.44809627532958984
    },
    {
      "epoch": 0.000241510009765625,
      "model_forward_time": 0.11570978164672852,
      "step": 39569
    },
    {
      "epoch": 0.000241510009765625,
      "step": 39569,
      "training_step_time": 0.5173275470733643
    },
    {
      "epoch": 0.00024151611328125,
      "grad_norm": 0.12397128343582153,
      "learning_rate": 2.8486882622836026e-05,
      "loss": 0.0477,
      "step": 39570
    },
    {
      "epoch": 0.00024151611328125,
      "model_forward_time": 0.11527013778686523,
      "step": 39570
    },
    {
      "epoch": 0.00024151611328125,
      "step": 39570,
      "training_step_time": 0.470872163772583
    },
    {
      "epoch": 0.000241522216796875,
      "model_forward_time": 0.11502408981323242,
      "step": 39571
    },
    {
      "epoch": 0.000241522216796875,
      "step": 39571,
      "training_step_time": 0.4029576778411865
    },
    {
      "epoch": 0.0002415283203125,
      "model_forward_time": 0.11509466171264648,
      "step": 39572
    },
    {
      "epoch": 0.0002415283203125,
      "step": 39572,
      "training_step_time": 0.39064574241638184
    },
    {
      "epoch": 0.000241534423828125,
      "model_forward_time": 0.11455178260803223,
      "step": 39573
    },
    {
      "epoch": 0.000241534423828125,
      "step": 39573,
      "training_step_time": 0.3936736583709717
    },
    {
      "epoch": 0.00024154052734375,
      "model_forward_time": 0.11465096473693848,
      "step": 39574
    },
    {
      "epoch": 0.00024154052734375,
      "step": 39574,
      "training_step_time": 0.39801573753356934
    },
    {
      "epoch": 0.000241546630859375,
      "model_forward_time": 0.11542487144470215,
      "step": 39575
    },
    {
      "epoch": 0.000241546630859375,
      "step": 39575,
      "training_step_time": 0.5148594379425049
    },
    {
      "epoch": 0.000241552734375,
      "model_forward_time": 0.11502861976623535,
      "step": 39576
    },
    {
      "epoch": 0.000241552734375,
      "step": 39576,
      "training_step_time": 0.42679524421691895
    },
    {
      "epoch": 0.000241558837890625,
      "model_forward_time": 0.11657118797302246,
      "step": 39577
    },
    {
      "epoch": 0.000241558837890625,
      "step": 39577,
      "training_step_time": 0.44712257385253906
    },
    {
      "epoch": 0.00024156494140625,
      "model_forward_time": 0.11568689346313477,
      "step": 39578
    },
    {
      "epoch": 0.00024156494140625,
      "step": 39578,
      "training_step_time": 0.3817257881164551
    },
    {
      "epoch": 0.000241571044921875,
      "model_forward_time": 0.11514425277709961,
      "step": 39579
    },
    {
      "epoch": 0.000241571044921875,
      "step": 39579,
      "training_step_time": 0.392974853515625
    },
    {
      "epoch": 0.0002415771484375,
      "grad_norm": 0.10294072329998016,
      "learning_rate": 2.846200933105829e-05,
      "loss": 0.039,
      "step": 39580
    },
    {
      "epoch": 0.0002415771484375,
      "model_forward_time": 0.11484742164611816,
      "step": 39580
    },
    {
      "epoch": 0.0002415771484375,
      "step": 39580,
      "training_step_time": 0.3841235637664795
    },
    {
      "epoch": 0.000241583251953125,
      "model_forward_time": 0.11564445495605469,
      "step": 39581
    },
    {
      "epoch": 0.000241583251953125,
      "step": 39581,
      "training_step_time": 0.5269854068756104
    },
    {
      "epoch": 0.00024158935546875,
      "model_forward_time": 0.11516213417053223,
      "step": 39582
    },
    {
      "epoch": 0.00024158935546875,
      "step": 39582,
      "training_step_time": 0.420398473739624
    },
    {
      "epoch": 0.000241595458984375,
      "model_forward_time": 0.11473464965820312,
      "step": 39583
    },
    {
      "epoch": 0.000241595458984375,
      "step": 39583,
      "training_step_time": 0.3668084144592285
    },
    {
      "epoch": 0.0002416015625,
      "model_forward_time": 0.11493635177612305,
      "step": 39584
    },
    {
      "epoch": 0.0002416015625,
      "step": 39584,
      "training_step_time": 0.44239234924316406
    },
    {
      "epoch": 0.000241607666015625,
      "model_forward_time": 0.1154482364654541,
      "step": 39585
    },
    {
      "epoch": 0.000241607666015625,
      "step": 39585,
      "training_step_time": 0.4014120101928711
    },
    {
      "epoch": 0.00024161376953125,
      "model_forward_time": 0.1153707504272461,
      "step": 39586
    },
    {
      "epoch": 0.00024161376953125,
      "step": 39586,
      "training_step_time": 0.37946319580078125
    },
    {
      "epoch": 0.000241619873046875,
      "model_forward_time": 0.11490607261657715,
      "step": 39587
    },
    {
      "epoch": 0.000241619873046875,
      "step": 39587,
      "training_step_time": 0.5529122352600098
    },
    {
      "epoch": 0.0002416259765625,
      "model_forward_time": 0.11527252197265625,
      "step": 39588
    },
    {
      "epoch": 0.0002416259765625,
      "step": 39588,
      "training_step_time": 0.38367509841918945
    },
    {
      "epoch": 0.000241632080078125,
      "model_forward_time": 0.11569762229919434,
      "step": 39589
    },
    {
      "epoch": 0.000241632080078125,
      "step": 39589,
      "training_step_time": 0.3893733024597168
    },
    {
      "epoch": 0.00024163818359375,
      "grad_norm": 0.11511794477701187,
      "learning_rate": 2.843714258195346e-05,
      "loss": 0.0439,
      "step": 39590
    },
    {
      "epoch": 0.00024163818359375,
      "model_forward_time": 0.11525130271911621,
      "step": 39590
    },
    {
      "epoch": 0.00024163818359375,
      "step": 39590,
      "training_step_time": 0.40252065658569336
    },
    {
      "epoch": 0.000241644287109375,
      "model_forward_time": 0.11494612693786621,
      "step": 39591
    },
    {
      "epoch": 0.000241644287109375,
      "step": 39591,
      "training_step_time": 0.387040376663208
    },
    {
      "epoch": 0.000241650390625,
      "model_forward_time": 0.11564016342163086,
      "step": 39592
    },
    {
      "epoch": 0.000241650390625,
      "step": 39592,
      "training_step_time": 0.38471245765686035
    },
    {
      "epoch": 0.000241656494140625,
      "model_forward_time": 0.11529183387756348,
      "step": 39593
    },
    {
      "epoch": 0.000241656494140625,
      "step": 39593,
      "training_step_time": 0.6376063823699951
    },
    {
      "epoch": 0.00024166259765625,
      "model_forward_time": 0.1152341365814209,
      "step": 39594
    },
    {
      "epoch": 0.00024166259765625,
      "step": 39594,
      "training_step_time": 0.380251407623291
    },
    {
      "epoch": 0.000241668701171875,
      "model_forward_time": 0.11591315269470215,
      "step": 39595
    },
    {
      "epoch": 0.000241668701171875,
      "step": 39595,
      "training_step_time": 0.45618486404418945
    },
    {
      "epoch": 0.0002416748046875,
      "model_forward_time": 0.11518263816833496,
      "step": 39596
    },
    {
      "epoch": 0.0002416748046875,
      "step": 39596,
      "training_step_time": 0.417783260345459
    },
    {
      "epoch": 0.000241680908203125,
      "model_forward_time": 0.1154472827911377,
      "step": 39597
    },
    {
      "epoch": 0.000241680908203125,
      "step": 39597,
      "training_step_time": 0.464191198348999
    },
    {
      "epoch": 0.00024168701171875,
      "model_forward_time": 0.11436295509338379,
      "step": 39598
    },
    {
      "epoch": 0.00024168701171875,
      "step": 39598,
      "training_step_time": 0.46459364891052246
    },
    {
      "epoch": 0.000241693115234375,
      "model_forward_time": 0.11472630500793457,
      "step": 39599
    },
    {
      "epoch": 0.000241693115234375,
      "step": 39599,
      "training_step_time": 0.4094998836517334
    },
    {
      "epoch": 0.00024169921875,
      "grad_norm": 0.11322291940450668,
      "learning_rate": 2.8412282383075363e-05,
      "loss": 0.0394,
      "step": 39600
    },
    {
      "epoch": 0.00024169921875,
      "model_forward_time": 0.11461758613586426,
      "step": 39600
    },
    {
      "epoch": 0.00024169921875,
      "step": 39600,
      "training_step_time": 0.3936774730682373
    },
    {
      "epoch": 0.000241705322265625,
      "model_forward_time": 0.1151425838470459,
      "step": 39601
    },
    {
      "epoch": 0.000241705322265625,
      "step": 39601,
      "training_step_time": 0.38988566398620605
    },
    {
      "epoch": 0.00024171142578125,
      "model_forward_time": 0.11565876007080078,
      "step": 39602
    },
    {
      "epoch": 0.00024171142578125,
      "step": 39602,
      "training_step_time": 0.4492523670196533
    },
    {
      "epoch": 0.000241717529296875,
      "model_forward_time": 0.11469125747680664,
      "step": 39603
    },
    {
      "epoch": 0.000241717529296875,
      "step": 39603,
      "training_step_time": 0.40541911125183105
    },
    {
      "epoch": 0.0002417236328125,
      "model_forward_time": 0.11480975151062012,
      "step": 39604
    },
    {
      "epoch": 0.0002417236328125,
      "step": 39604,
      "training_step_time": 0.3857121467590332
    },
    {
      "epoch": 0.000241729736328125,
      "model_forward_time": 0.11554169654846191,
      "step": 39605
    },
    {
      "epoch": 0.000241729736328125,
      "step": 39605,
      "training_step_time": 0.5739274024963379
    },
    {
      "epoch": 0.00024173583984375,
      "model_forward_time": 0.11490273475646973,
      "step": 39606
    },
    {
      "epoch": 0.00024173583984375,
      "step": 39606,
      "training_step_time": 0.3900015354156494
    },
    {
      "epoch": 0.000241741943359375,
      "model_forward_time": 0.11475753784179688,
      "step": 39607
    },
    {
      "epoch": 0.000241741943359375,
      "step": 39607,
      "training_step_time": 0.38642430305480957
    },
    {
      "epoch": 0.000241748046875,
      "model_forward_time": 0.11528539657592773,
      "step": 39608
    },
    {
      "epoch": 0.000241748046875,
      "step": 39608,
      "training_step_time": 0.3857560157775879
    },
    {
      "epoch": 0.000241754150390625,
      "model_forward_time": 0.11603903770446777,
      "step": 39609
    },
    {
      "epoch": 0.000241754150390625,
      "step": 39609,
      "training_step_time": 0.41777896881103516
    },
    {
      "epoch": 0.00024176025390625,
      "grad_norm": 0.1332484632730484,
      "learning_rate": 2.838742874197587e-05,
      "loss": 0.0435,
      "step": 39610
    },
    {
      "epoch": 0.00024176025390625,
      "model_forward_time": 0.11544466018676758,
      "step": 39610
    },
    {
      "epoch": 0.00024176025390625,
      "step": 39610,
      "training_step_time": 0.42006707191467285
    },
    {
      "epoch": 0.000241766357421875,
      "model_forward_time": 0.1147921085357666,
      "step": 39611
    },
    {
      "epoch": 0.000241766357421875,
      "step": 39611,
      "training_step_time": 0.7502176761627197
    },
    {
      "epoch": 0.0002417724609375,
      "model_forward_time": 0.11489105224609375,
      "step": 39612
    },
    {
      "epoch": 0.0002417724609375,
      "step": 39612,
      "training_step_time": 0.475372314453125
    },
    {
      "epoch": 0.000241778564453125,
      "model_forward_time": 0.11524677276611328,
      "step": 39613
    },
    {
      "epoch": 0.000241778564453125,
      "step": 39613,
      "training_step_time": 0.4303746223449707
    },
    {
      "epoch": 0.00024178466796875,
      "model_forward_time": 0.11479520797729492,
      "step": 39614
    },
    {
      "epoch": 0.00024178466796875,
      "step": 39614,
      "training_step_time": 0.39607858657836914
    },
    {
      "epoch": 0.000241790771484375,
      "model_forward_time": 0.11443543434143066,
      "step": 39615
    },
    {
      "epoch": 0.000241790771484375,
      "step": 39615,
      "training_step_time": 0.4058949947357178
    },
    {
      "epoch": 0.000241796875,
      "model_forward_time": 0.1146390438079834,
      "step": 39616
    },
    {
      "epoch": 0.000241796875,
      "step": 39616,
      "training_step_time": 0.4433019161224365
    },
    {
      "epoch": 0.000241802978515625,
      "model_forward_time": 0.11444711685180664,
      "step": 39617
    },
    {
      "epoch": 0.000241802978515625,
      "step": 39617,
      "training_step_time": 0.3765897750854492
    },
    {
      "epoch": 0.00024180908203125,
      "model_forward_time": 0.11506152153015137,
      "step": 39618
    },
    {
      "epoch": 0.00024180908203125,
      "step": 39618,
      "training_step_time": 0.3816862106323242
    },
    {
      "epoch": 0.000241815185546875,
      "model_forward_time": 0.11524558067321777,
      "step": 39619
    },
    {
      "epoch": 0.000241815185546875,
      "step": 39619,
      "training_step_time": 0.3918302059173584
    },
    {
      "epoch": 0.0002418212890625,
      "grad_norm": 0.12139609456062317,
      "learning_rate": 2.8362581666204918e-05,
      "loss": 0.04,
      "step": 39620
    },
    {
      "epoch": 0.0002418212890625,
      "model_forward_time": 0.11520600318908691,
      "step": 39620
    },
    {
      "epoch": 0.0002418212890625,
      "step": 39620,
      "training_step_time": 0.39021921157836914
    },
    {
      "epoch": 0.000241827392578125,
      "model_forward_time": 0.11540079116821289,
      "step": 39621
    },
    {
      "epoch": 0.000241827392578125,
      "step": 39621,
      "training_step_time": 0.3912956714630127
    },
    {
      "epoch": 0.00024183349609375,
      "model_forward_time": 0.1147921085357666,
      "step": 39622
    },
    {
      "epoch": 0.00024183349609375,
      "step": 39622,
      "training_step_time": 0.38935351371765137
    },
    {
      "epoch": 0.000241839599609375,
      "model_forward_time": 0.11604046821594238,
      "step": 39623
    },
    {
      "epoch": 0.000241839599609375,
      "step": 39623,
      "training_step_time": 0.6362035274505615
    },
    {
      "epoch": 0.000241845703125,
      "model_forward_time": 0.11582183837890625,
      "step": 39624
    },
    {
      "epoch": 0.000241845703125,
      "step": 39624,
      "training_step_time": 0.3994264602661133
    },
    {
      "epoch": 0.000241851806640625,
      "model_forward_time": 0.11482715606689453,
      "step": 39625
    },
    {
      "epoch": 0.000241851806640625,
      "step": 39625,
      "training_step_time": 0.3671715259552002
    },
    {
      "epoch": 0.00024185791015625,
      "model_forward_time": 0.11460304260253906,
      "step": 39626
    },
    {
      "epoch": 0.00024185791015625,
      "step": 39626,
      "training_step_time": 0.44114041328430176
    },
    {
      "epoch": 0.000241864013671875,
      "model_forward_time": 0.11520552635192871,
      "step": 39627
    },
    {
      "epoch": 0.000241864013671875,
      "step": 39627,
      "training_step_time": 0.4136202335357666
    },
    {
      "epoch": 0.0002418701171875,
      "model_forward_time": 0.11518502235412598,
      "step": 39628
    },
    {
      "epoch": 0.0002418701171875,
      "step": 39628,
      "training_step_time": 0.44402289390563965
    },
    {
      "epoch": 0.000241876220703125,
      "model_forward_time": 0.1155996322631836,
      "step": 39629
    },
    {
      "epoch": 0.000241876220703125,
      "step": 39629,
      "training_step_time": 0.5206389427185059
    },
    {
      "epoch": 0.00024188232421875,
      "grad_norm": 0.13203752040863037,
      "learning_rate": 2.8337741163310317e-05,
      "loss": 0.0409,
      "step": 39630
    },
    {
      "epoch": 0.00024188232421875,
      "model_forward_time": 0.11499977111816406,
      "step": 39630
    },
    {
      "epoch": 0.00024188232421875,
      "step": 39630,
      "training_step_time": 0.38721466064453125
    },
    {
      "epoch": 0.000241888427734375,
      "model_forward_time": 0.11482501029968262,
      "step": 39631
    },
    {
      "epoch": 0.000241888427734375,
      "step": 39631,
      "training_step_time": 0.39054012298583984
    },
    {
      "epoch": 0.00024189453125,
      "model_forward_time": 0.11549592018127441,
      "step": 39632
    },
    {
      "epoch": 0.00024189453125,
      "step": 39632,
      "training_step_time": 0.38334035873413086
    },
    {
      "epoch": 0.000241900634765625,
      "model_forward_time": 0.11543154716491699,
      "step": 39633
    },
    {
      "epoch": 0.000241900634765625,
      "step": 39633,
      "training_step_time": 0.4093928337097168
    },
    {
      "epoch": 0.00024190673828125,
      "model_forward_time": 0.11453366279602051,
      "step": 39634
    },
    {
      "epoch": 0.00024190673828125,
      "step": 39634,
      "training_step_time": 0.387906551361084
    },
    {
      "epoch": 0.000241912841796875,
      "model_forward_time": 0.11552977561950684,
      "step": 39635
    },
    {
      "epoch": 0.000241912841796875,
      "step": 39635,
      "training_step_time": 0.7933642864227295
    },
    {
      "epoch": 0.0002419189453125,
      "model_forward_time": 0.11470961570739746,
      "step": 39636
    },
    {
      "epoch": 0.0002419189453125,
      "step": 39636,
      "training_step_time": 0.40175962448120117
    },
    {
      "epoch": 0.000241925048828125,
      "model_forward_time": 0.1148993968963623,
      "step": 39637
    },
    {
      "epoch": 0.000241925048828125,
      "step": 39637,
      "training_step_time": 0.4428365230560303
    },
    {
      "epoch": 0.00024193115234375,
      "model_forward_time": 0.11408233642578125,
      "step": 39638
    },
    {
      "epoch": 0.00024193115234375,
      "step": 39638,
      "training_step_time": 0.47023725509643555
    },
    {
      "epoch": 0.000241937255859375,
      "model_forward_time": 0.11441254615783691,
      "step": 39639
    },
    {
      "epoch": 0.000241937255859375,
      "step": 39639,
      "training_step_time": 0.44783949851989746
    },
    {
      "epoch": 0.000241943359375,
      "grad_norm": 0.1036323830485344,
      "learning_rate": 2.8312907240838027e-05,
      "loss": 0.0381,
      "step": 39640
    },
    {
      "epoch": 0.000241943359375,
      "model_forward_time": 0.11394286155700684,
      "step": 39640
    },
    {
      "epoch": 0.000241943359375,
      "step": 39640,
      "training_step_time": 0.4783046245574951
    },
    {
      "epoch": 0.000241949462890625,
      "model_forward_time": 0.1149282455444336,
      "step": 39641
    },
    {
      "epoch": 0.000241949462890625,
      "step": 39641,
      "training_step_time": 0.4240736961364746
    },
    {
      "epoch": 0.00024195556640625,
      "model_forward_time": 0.11482810974121094,
      "step": 39642
    },
    {
      "epoch": 0.00024195556640625,
      "step": 39642,
      "training_step_time": 0.39313459396362305
    },
    {
      "epoch": 0.000241961669921875,
      "model_forward_time": 0.11440157890319824,
      "step": 39643
    },
    {
      "epoch": 0.000241961669921875,
      "step": 39643,
      "training_step_time": 0.3894691467285156
    },
    {
      "epoch": 0.0002419677734375,
      "model_forward_time": 0.11481070518493652,
      "step": 39644
    },
    {
      "epoch": 0.0002419677734375,
      "step": 39644,
      "training_step_time": 0.37903547286987305
    },
    {
      "epoch": 0.000241973876953125,
      "model_forward_time": 0.11556506156921387,
      "step": 39645
    },
    {
      "epoch": 0.000241973876953125,
      "step": 39645,
      "training_step_time": 0.3864617347717285
    },
    {
      "epoch": 0.00024197998046875,
      "model_forward_time": 0.1160116195678711,
      "step": 39646
    },
    {
      "epoch": 0.00024197998046875,
      "step": 39646,
      "training_step_time": 0.3926842212677002
    },
    {
      "epoch": 0.000241986083984375,
      "model_forward_time": 0.1157541275024414,
      "step": 39647
    },
    {
      "epoch": 0.000241986083984375,
      "step": 39647,
      "training_step_time": 0.5748465061187744
    },
    {
      "epoch": 0.0002419921875,
      "model_forward_time": 0.11543536186218262,
      "step": 39648
    },
    {
      "epoch": 0.0002419921875,
      "step": 39648,
      "training_step_time": 0.3875274658203125
    },
    {
      "epoch": 0.000241998291015625,
      "model_forward_time": 0.11545896530151367,
      "step": 39649
    },
    {
      "epoch": 0.000241998291015625,
      "step": 39649,
      "training_step_time": 0.39440488815307617
    },
    {
      "epoch": 0.00024200439453125,
      "grad_norm": 0.10171312093734741,
      "learning_rate": 2.8288079906331864e-05,
      "loss": 0.0389,
      "step": 39650
    },
    {
      "epoch": 0.00024200439453125,
      "model_forward_time": 0.11566615104675293,
      "step": 39650
    },
    {
      "epoch": 0.00024200439453125,
      "step": 39650,
      "training_step_time": 0.4049358367919922
    },
    {
      "epoch": 0.000242010498046875,
      "model_forward_time": 0.11553835868835449,
      "step": 39651
    },
    {
      "epoch": 0.000242010498046875,
      "step": 39651,
      "training_step_time": 0.40384459495544434
    },
    {
      "epoch": 0.0002420166015625,
      "model_forward_time": 0.1153407096862793,
      "step": 39652
    },
    {
      "epoch": 0.0002420166015625,
      "step": 39652,
      "training_step_time": 0.41950440406799316
    },
    {
      "epoch": 0.000242022705078125,
      "model_forward_time": 0.11480998992919922,
      "step": 39653
    },
    {
      "epoch": 0.000242022705078125,
      "step": 39653,
      "training_step_time": 0.48583245277404785
    },
    {
      "epoch": 0.00024202880859375,
      "model_forward_time": 0.115234375,
      "step": 39654
    },
    {
      "epoch": 0.00024202880859375,
      "step": 39654,
      "training_step_time": 0.41956138610839844
    },
    {
      "epoch": 0.000242034912109375,
      "model_forward_time": 0.11500263214111328,
      "step": 39655
    },
    {
      "epoch": 0.000242034912109375,
      "step": 39655,
      "training_step_time": 0.45070385932922363
    },
    {
      "epoch": 0.000242041015625,
      "model_forward_time": 0.11486363410949707,
      "step": 39656
    },
    {
      "epoch": 0.000242041015625,
      "step": 39656,
      "training_step_time": 0.38486409187316895
    },
    {
      "epoch": 0.000242047119140625,
      "model_forward_time": 0.11460089683532715,
      "step": 39657
    },
    {
      "epoch": 0.000242047119140625,
      "step": 39657,
      "training_step_time": 0.387786865234375
    },
    {
      "epoch": 0.00024205322265625,
      "model_forward_time": 0.11511039733886719,
      "step": 39658
    },
    {
      "epoch": 0.00024205322265625,
      "step": 39658,
      "training_step_time": 0.38934946060180664
    },
    {
      "epoch": 0.000242059326171875,
      "model_forward_time": 0.11508512496948242,
      "step": 39659
    },
    {
      "epoch": 0.000242059326171875,
      "step": 39659,
      "training_step_time": 0.5703840255737305
    },
    {
      "epoch": 0.0002420654296875,
      "grad_norm": 0.14930406212806702,
      "learning_rate": 2.8263259167333777e-05,
      "loss": 0.0427,
      "step": 39660
    },
    {
      "epoch": 0.0002420654296875,
      "model_forward_time": 0.11489129066467285,
      "step": 39660
    },
    {
      "epoch": 0.0002420654296875,
      "step": 39660,
      "training_step_time": 0.3999629020690918
    },
    {
      "epoch": 0.000242071533203125,
      "model_forward_time": 0.1146695613861084,
      "step": 39661
    },
    {
      "epoch": 0.000242071533203125,
      "step": 39661,
      "training_step_time": 0.3911244869232178
    },
    {
      "epoch": 0.00024207763671875,
      "model_forward_time": 0.11525559425354004,
      "step": 39662
    },
    {
      "epoch": 0.00024207763671875,
      "step": 39662,
      "training_step_time": 0.38834714889526367
    },
    {
      "epoch": 0.000242083740234375,
      "model_forward_time": 0.11598873138427734,
      "step": 39663
    },
    {
      "epoch": 0.000242083740234375,
      "step": 39663,
      "training_step_time": 0.39319872856140137
    },
    {
      "epoch": 0.00024208984375,
      "model_forward_time": 0.11516594886779785,
      "step": 39664
    },
    {
      "epoch": 0.00024208984375,
      "step": 39664,
      "training_step_time": 0.3973240852355957
    },
    {
      "epoch": 0.000242095947265625,
      "model_forward_time": 0.11487150192260742,
      "step": 39665
    },
    {
      "epoch": 0.000242095947265625,
      "step": 39665,
      "training_step_time": 0.6713075637817383
    },
    {
      "epoch": 0.00024210205078125,
      "model_forward_time": 0.11495542526245117,
      "step": 39666
    },
    {
      "epoch": 0.00024210205078125,
      "step": 39666,
      "training_step_time": 0.45511460304260254
    },
    {
      "epoch": 0.000242108154296875,
      "model_forward_time": 0.11437463760375977,
      "step": 39667
    },
    {
      "epoch": 0.000242108154296875,
      "step": 39667,
      "training_step_time": 0.41039443016052246
    },
    {
      "epoch": 0.0002421142578125,
      "model_forward_time": 0.11509919166564941,
      "step": 39668
    },
    {
      "epoch": 0.0002421142578125,
      "step": 39668,
      "training_step_time": 0.4010732173919678
    },
    {
      "epoch": 0.000242120361328125,
      "model_forward_time": 0.1163034439086914,
      "step": 39669
    },
    {
      "epoch": 0.000242120361328125,
      "step": 39669,
      "training_step_time": 0.3892650604248047
    },
    {
      "epoch": 0.00024212646484375,
      "grad_norm": 0.08204880356788635,
      "learning_rate": 2.823844503138363e-05,
      "loss": 0.04,
      "step": 39670
    },
    {
      "epoch": 0.00024212646484375,
      "model_forward_time": 0.11447930335998535,
      "step": 39670
    },
    {
      "epoch": 0.00024212646484375,
      "step": 39670,
      "training_step_time": 0.38399243354797363
    },
    {
      "epoch": 0.000242132568359375,
      "model_forward_time": 0.11522436141967773,
      "step": 39671
    },
    {
      "epoch": 0.000242132568359375,
      "step": 39671,
      "training_step_time": 0.5508732795715332
    },
    {
      "epoch": 0.000242138671875,
      "model_forward_time": 0.11443924903869629,
      "step": 39672
    },
    {
      "epoch": 0.000242138671875,
      "step": 39672,
      "training_step_time": 0.40008091926574707
    },
    {
      "epoch": 0.000242144775390625,
      "model_forward_time": 0.11498332023620605,
      "step": 39673
    },
    {
      "epoch": 0.000242144775390625,
      "step": 39673,
      "training_step_time": 0.39105796813964844
    },
    {
      "epoch": 0.00024215087890625,
      "model_forward_time": 0.11455917358398438,
      "step": 39674
    },
    {
      "epoch": 0.00024215087890625,
      "step": 39674,
      "training_step_time": 0.39088964462280273
    },
    {
      "epoch": 0.000242156982421875,
      "model_forward_time": 0.11523890495300293,
      "step": 39675
    },
    {
      "epoch": 0.000242156982421875,
      "step": 39675,
      "training_step_time": 0.3904130458831787
    },
    {
      "epoch": 0.0002421630859375,
      "model_forward_time": 0.11577725410461426,
      "step": 39676
    },
    {
      "epoch": 0.0002421630859375,
      "step": 39676,
      "training_step_time": 0.40197205543518066
    },
    {
      "epoch": 0.000242169189453125,
      "model_forward_time": 0.11458849906921387,
      "step": 39677
    },
    {
      "epoch": 0.000242169189453125,
      "step": 39677,
      "training_step_time": 0.6238901615142822
    },
    {
      "epoch": 0.00024217529296875,
      "model_forward_time": 0.11696362495422363,
      "step": 39678
    },
    {
      "epoch": 0.00024217529296875,
      "step": 39678,
      "training_step_time": 0.40836262702941895
    },
    {
      "epoch": 0.000242181396484375,
      "model_forward_time": 0.11500191688537598,
      "step": 39679
    },
    {
      "epoch": 0.000242181396484375,
      "step": 39679,
      "training_step_time": 0.43538403511047363
    },
    {
      "epoch": 0.0002421875,
      "grad_norm": 0.1052907407283783,
      "learning_rate": 2.8213637506019304e-05,
      "loss": 0.0389,
      "step": 39680
    },
    {
      "epoch": 0.0002421875,
      "model_forward_time": 0.11482048034667969,
      "step": 39680
    },
    {
      "epoch": 0.0002421875,
      "step": 39680,
      "training_step_time": 0.47749757766723633
    },
    {
      "epoch": 0.000242193603515625,
      "model_forward_time": 0.1153099536895752,
      "step": 39681
    },
    {
      "epoch": 0.000242193603515625,
      "step": 39681,
      "training_step_time": 0.48980045318603516
    },
    {
      "epoch": 0.00024219970703125,
      "model_forward_time": 0.11476850509643555,
      "step": 39682
    },
    {
      "epoch": 0.00024219970703125,
      "step": 39682,
      "training_step_time": 0.4428400993347168
    },
    {
      "epoch": 0.000242205810546875,
      "model_forward_time": 0.11541008949279785,
      "step": 39683
    },
    {
      "epoch": 0.000242205810546875,
      "step": 39683,
      "training_step_time": 0.42432332038879395
    },
    {
      "epoch": 0.0002422119140625,
      "model_forward_time": 0.11475896835327148,
      "step": 39684
    },
    {
      "epoch": 0.0002422119140625,
      "step": 39684,
      "training_step_time": 0.39644885063171387
    },
    {
      "epoch": 0.000242218017578125,
      "model_forward_time": 0.1151885986328125,
      "step": 39685
    },
    {
      "epoch": 0.000242218017578125,
      "step": 39685,
      "training_step_time": 0.394728422164917
    },
    {
      "epoch": 0.00024222412109375,
      "model_forward_time": 0.11517930030822754,
      "step": 39686
    },
    {
      "epoch": 0.00024222412109375,
      "step": 39686,
      "training_step_time": 0.3964850902557373
    },
    {
      "epoch": 0.000242230224609375,
      "model_forward_time": 0.11498856544494629,
      "step": 39687
    },
    {
      "epoch": 0.000242230224609375,
      "step": 39687,
      "training_step_time": 0.3899879455566406
    },
    {
      "epoch": 0.000242236328125,
      "model_forward_time": 0.11481642723083496,
      "step": 39688
    },
    {
      "epoch": 0.000242236328125,
      "step": 39688,
      "training_step_time": 0.3915860652923584
    },
    {
      "epoch": 0.000242242431640625,
      "model_forward_time": 0.11543512344360352,
      "step": 39689
    },
    {
      "epoch": 0.000242242431640625,
      "step": 39689,
      "training_step_time": 0.6247713565826416
    },
    {
      "epoch": 0.00024224853515625,
      "grad_norm": 0.1334456354379654,
      "learning_rate": 2.8188836598776662e-05,
      "loss": 0.0403,
      "step": 39690
    },
    {
      "epoch": 0.00024224853515625,
      "model_forward_time": 0.11464500427246094,
      "step": 39690
    },
    {
      "epoch": 0.00024224853515625,
      "step": 39690,
      "training_step_time": 0.3956272602081299
    },
    {
      "epoch": 0.000242254638671875,
      "model_forward_time": 0.1151571273803711,
      "step": 39691
    },
    {
      "epoch": 0.000242254638671875,
      "step": 39691,
      "training_step_time": 0.3985466957092285
    },
    {
      "epoch": 0.0002422607421875,
      "model_forward_time": 0.11490464210510254,
      "step": 39692
    },
    {
      "epoch": 0.0002422607421875,
      "step": 39692,
      "training_step_time": 0.397266149520874
    },
    {
      "epoch": 0.000242266845703125,
      "model_forward_time": 0.11496472358703613,
      "step": 39693
    },
    {
      "epoch": 0.000242266845703125,
      "step": 39693,
      "training_step_time": 0.4565112590789795
    },
    {
      "epoch": 0.00024227294921875,
      "model_forward_time": 0.11492228507995605,
      "step": 39694
    },
    {
      "epoch": 0.00024227294921875,
      "step": 39694,
      "training_step_time": 0.4825112819671631
    },
    {
      "epoch": 0.000242279052734375,
      "model_forward_time": 0.1149439811706543,
      "step": 39695
    },
    {
      "epoch": 0.000242279052734375,
      "step": 39695,
      "training_step_time": 0.37245655059814453
    },
    {
      "epoch": 0.00024228515625,
      "model_forward_time": 0.11541199684143066,
      "step": 39696
    },
    {
      "epoch": 0.00024228515625,
      "step": 39696,
      "training_step_time": 0.48323702812194824
    },
    {
      "epoch": 0.000242291259765625,
      "model_forward_time": 0.11500072479248047,
      "step": 39697
    },
    {
      "epoch": 0.000242291259765625,
      "step": 39697,
      "training_step_time": 0.3948953151702881
    },
    {
      "epoch": 0.00024229736328125,
      "model_forward_time": 0.11505579948425293,
      "step": 39698
    },
    {
      "epoch": 0.00024229736328125,
      "step": 39698,
      "training_step_time": 0.3934023380279541
    },
    {
      "epoch": 0.000242303466796875,
      "model_forward_time": 0.11502933502197266,
      "step": 39699
    },
    {
      "epoch": 0.000242303466796875,
      "step": 39699,
      "training_step_time": 0.38950204849243164
    },
    {
      "epoch": 0.0002423095703125,
      "grad_norm": 0.10286509990692139,
      "learning_rate": 2.8164042317189575e-05,
      "loss": 0.0398,
      "step": 39700
    },
    {
      "epoch": 0.0002423095703125,
      "model_forward_time": 0.11492109298706055,
      "step": 39700
    },
    {
      "epoch": 0.0002423095703125,
      "step": 39700,
      "training_step_time": 0.38561511039733887
    },
    {
      "epoch": 0.000242315673828125,
      "model_forward_time": 0.11539983749389648,
      "step": 39701
    },
    {
      "epoch": 0.000242315673828125,
      "step": 39701,
      "training_step_time": 0.5831835269927979
    },
    {
      "epoch": 0.00024232177734375,
      "model_forward_time": 0.11448431015014648,
      "step": 39702
    },
    {
      "epoch": 0.00024232177734375,
      "step": 39702,
      "training_step_time": 0.3955986499786377
    },
    {
      "epoch": 0.000242327880859375,
      "model_forward_time": 0.11574578285217285,
      "step": 39703
    },
    {
      "epoch": 0.000242327880859375,
      "step": 39703,
      "training_step_time": 0.39531970024108887
    },
    {
      "epoch": 0.000242333984375,
      "model_forward_time": 0.11467242240905762,
      "step": 39704
    },
    {
      "epoch": 0.000242333984375,
      "step": 39704,
      "training_step_time": 0.3842794895172119
    },
    {
      "epoch": 0.000242340087890625,
      "model_forward_time": 0.11478209495544434,
      "step": 39705
    },
    {
      "epoch": 0.000242340087890625,
      "step": 39705,
      "training_step_time": 0.3945276737213135
    },
    {
      "epoch": 0.00024234619140625,
      "model_forward_time": 0.11534309387207031,
      "step": 39706
    },
    {
      "epoch": 0.00024234619140625,
      "step": 39706,
      "training_step_time": 0.4263436794281006
    },
    {
      "epoch": 0.000242352294921875,
      "model_forward_time": 0.11553311347961426,
      "step": 39707
    },
    {
      "epoch": 0.000242352294921875,
      "step": 39707,
      "training_step_time": 0.7108471393585205
    },
    {
      "epoch": 0.0002423583984375,
      "model_forward_time": 0.11522173881530762,
      "step": 39708
    },
    {
      "epoch": 0.0002423583984375,
      "step": 39708,
      "training_step_time": 0.4337766170501709
    },
    {
      "epoch": 0.000242364501953125,
      "model_forward_time": 0.1154322624206543,
      "step": 39709
    },
    {
      "epoch": 0.000242364501953125,
      "step": 39709,
      "training_step_time": 0.44861316680908203
    },
    {
      "epoch": 0.00024237060546875,
      "grad_norm": 0.1238902285695076,
      "learning_rate": 2.8139254668789867e-05,
      "loss": 0.0436,
      "step": 39710
    },
    {
      "epoch": 0.00024237060546875,
      "model_forward_time": 0.11488699913024902,
      "step": 39710
    },
    {
      "epoch": 0.00024237060546875,
      "step": 39710,
      "training_step_time": 0.4257049560546875
    },
    {
      "epoch": 0.000242376708984375,
      "model_forward_time": 0.1138005256652832,
      "step": 39711
    },
    {
      "epoch": 0.000242376708984375,
      "step": 39711,
      "training_step_time": 0.44211912155151367
    },
    {
      "epoch": 0.0002423828125,
      "model_forward_time": 0.11447834968566895,
      "step": 39712
    },
    {
      "epoch": 0.0002423828125,
      "step": 39712,
      "training_step_time": 0.3759117126464844
    },
    {
      "epoch": 0.000242388916015625,
      "model_forward_time": 0.11503410339355469,
      "step": 39713
    },
    {
      "epoch": 0.000242388916015625,
      "step": 39713,
      "training_step_time": 0.37973856925964355
    },
    {
      "epoch": 0.00024239501953125,
      "model_forward_time": 0.11509013175964355,
      "step": 39714
    },
    {
      "epoch": 0.00024239501953125,
      "step": 39714,
      "training_step_time": 0.38100457191467285
    },
    {
      "epoch": 0.000242401123046875,
      "model_forward_time": 0.11481547355651855,
      "step": 39715
    },
    {
      "epoch": 0.000242401123046875,
      "step": 39715,
      "training_step_time": 0.38793039321899414
    },
    {
      "epoch": 0.0002424072265625,
      "model_forward_time": 0.11589956283569336,
      "step": 39716
    },
    {
      "epoch": 0.0002424072265625,
      "step": 39716,
      "training_step_time": 0.394336462020874
    },
    {
      "epoch": 0.000242413330078125,
      "model_forward_time": 0.11541104316711426,
      "step": 39717
    },
    {
      "epoch": 0.000242413330078125,
      "step": 39717,
      "training_step_time": 0.502753734588623
    },
    {
      "epoch": 0.00024241943359375,
      "model_forward_time": 0.11486697196960449,
      "step": 39718
    },
    {
      "epoch": 0.00024241943359375,
      "step": 39718,
      "training_step_time": 0.38843846321105957
    },
    {
      "epoch": 0.000242425537109375,
      "model_forward_time": 0.11550569534301758,
      "step": 39719
    },
    {
      "epoch": 0.000242425537109375,
      "step": 39719,
      "training_step_time": 0.4703943729400635
    },
    {
      "epoch": 0.000242431640625,
      "grad_norm": 0.12644492089748383,
      "learning_rate": 2.811447366110741e-05,
      "loss": 0.04,
      "step": 39720
    },
    {
      "epoch": 0.000242431640625,
      "model_forward_time": 0.11446380615234375,
      "step": 39720
    },
    {
      "epoch": 0.000242431640625,
      "step": 39720,
      "training_step_time": 0.4593653678894043
    },
    {
      "epoch": 0.000242437744140625,
      "model_forward_time": 0.11539602279663086,
      "step": 39721
    },
    {
      "epoch": 0.000242437744140625,
      "step": 39721,
      "training_step_time": 0.4383707046508789
    },
    {
      "epoch": 0.00024244384765625,
      "model_forward_time": 0.11428618431091309,
      "step": 39722
    },
    {
      "epoch": 0.00024244384765625,
      "step": 39722,
      "training_step_time": 0.4291114807128906
    },
    {
      "epoch": 0.000242449951171875,
      "model_forward_time": 0.11473560333251953,
      "step": 39723
    },
    {
      "epoch": 0.000242449951171875,
      "step": 39723,
      "training_step_time": 0.44904017448425293
    },
    {
      "epoch": 0.0002424560546875,
      "model_forward_time": 0.11607861518859863,
      "step": 39724
    },
    {
      "epoch": 0.0002424560546875,
      "step": 39724,
      "training_step_time": 0.416135311126709
    },
    {
      "epoch": 0.000242462158203125,
      "model_forward_time": 0.11584854125976562,
      "step": 39725
    },
    {
      "epoch": 0.000242462158203125,
      "step": 39725,
      "training_step_time": 0.4826977252960205
    },
    {
      "epoch": 0.00024246826171875,
      "model_forward_time": 0.11498832702636719,
      "step": 39726
    },
    {
      "epoch": 0.00024246826171875,
      "step": 39726,
      "training_step_time": 0.3932318687438965
    },
    {
      "epoch": 0.000242474365234375,
      "model_forward_time": 0.11499691009521484,
      "step": 39727
    },
    {
      "epoch": 0.000242474365234375,
      "step": 39727,
      "training_step_time": 0.38620662689208984
    },
    {
      "epoch": 0.00024248046875,
      "model_forward_time": 0.11500763893127441,
      "step": 39728
    },
    {
      "epoch": 0.00024248046875,
      "step": 39728,
      "training_step_time": 0.37874269485473633
    },
    {
      "epoch": 0.000242486572265625,
      "model_forward_time": 0.1161048412322998,
      "step": 39729
    },
    {
      "epoch": 0.000242486572265625,
      "step": 39729,
      "training_step_time": 0.43445491790771484
    },
    {
      "epoch": 0.00024249267578125,
      "grad_norm": 0.11553341895341873,
      "learning_rate": 2.8089699301670002e-05,
      "loss": 0.0419,
      "step": 39730
    },
    {
      "epoch": 0.00024249267578125,
      "model_forward_time": 0.11525869369506836,
      "step": 39730
    },
    {
      "epoch": 0.00024249267578125,
      "step": 39730,
      "training_step_time": 0.3821866512298584
    },
    {
      "epoch": 0.000242498779296875,
      "model_forward_time": 0.11522650718688965,
      "step": 39731
    },
    {
      "epoch": 0.000242498779296875,
      "step": 39731,
      "training_step_time": 0.3860299587249756
    },
    {
      "epoch": 0.0002425048828125,
      "model_forward_time": 0.1157686710357666,
      "step": 39732
    },
    {
      "epoch": 0.0002425048828125,
      "step": 39732,
      "training_step_time": 0.38425374031066895
    },
    {
      "epoch": 0.000242510986328125,
      "model_forward_time": 0.11487507820129395,
      "step": 39733
    },
    {
      "epoch": 0.000242510986328125,
      "step": 39733,
      "training_step_time": 0.3986790180206299
    },
    {
      "epoch": 0.00024251708984375,
      "model_forward_time": 0.11557245254516602,
      "step": 39734
    },
    {
      "epoch": 0.00024251708984375,
      "step": 39734,
      "training_step_time": 0.42801356315612793
    },
    {
      "epoch": 0.000242523193359375,
      "model_forward_time": 0.11583065986633301,
      "step": 39735
    },
    {
      "epoch": 0.000242523193359375,
      "step": 39735,
      "training_step_time": 0.510932445526123
    },
    {
      "epoch": 0.000242529296875,
      "model_forward_time": 0.11507058143615723,
      "step": 39736
    },
    {
      "epoch": 0.000242529296875,
      "step": 39736,
      "training_step_time": 0.4823644161224365
    },
    {
      "epoch": 0.000242535400390625,
      "model_forward_time": 0.11527585983276367,
      "step": 39737
    },
    {
      "epoch": 0.000242535400390625,
      "step": 39737,
      "training_step_time": 0.43056774139404297
    },
    {
      "epoch": 0.00024254150390625,
      "model_forward_time": 0.11459040641784668,
      "step": 39738
    },
    {
      "epoch": 0.00024254150390625,
      "step": 39738,
      "training_step_time": 0.4830644130706787
    },
    {
      "epoch": 0.000242547607421875,
      "model_forward_time": 0.11509418487548828,
      "step": 39739
    },
    {
      "epoch": 0.000242547607421875,
      "step": 39739,
      "training_step_time": 0.5040135383605957
    },
    {
      "epoch": 0.0002425537109375,
      "grad_norm": 0.09503505378961563,
      "learning_rate": 2.8064931598003436e-05,
      "loss": 0.04,
      "step": 39740
    },
    {
      "epoch": 0.0002425537109375,
      "model_forward_time": 0.11445307731628418,
      "step": 39740
    },
    {
      "epoch": 0.0002425537109375,
      "step": 39740,
      "training_step_time": 0.41329336166381836
    },
    {
      "epoch": 0.000242559814453125,
      "model_forward_time": 0.11466240882873535,
      "step": 39741
    },
    {
      "epoch": 0.000242559814453125,
      "step": 39741,
      "training_step_time": 0.39417219161987305
    },
    {
      "epoch": 0.00024256591796875,
      "model_forward_time": 0.11516284942626953,
      "step": 39742
    },
    {
      "epoch": 0.00024256591796875,
      "step": 39742,
      "training_step_time": 0.38220691680908203
    },
    {
      "epoch": 0.000242572021484375,
      "model_forward_time": 0.11441922187805176,
      "step": 39743
    },
    {
      "epoch": 0.000242572021484375,
      "step": 39743,
      "training_step_time": 0.3885822296142578
    },
    {
      "epoch": 0.000242578125,
      "model_forward_time": 0.11622309684753418,
      "step": 39744
    },
    {
      "epoch": 0.000242578125,
      "step": 39744,
      "training_step_time": 0.38877272605895996
    },
    {
      "epoch": 0.000242584228515625,
      "model_forward_time": 0.11542415618896484,
      "step": 39745
    },
    {
      "epoch": 0.000242584228515625,
      "step": 39745,
      "training_step_time": 0.3886728286743164
    },
    {
      "epoch": 0.00024259033203125,
      "model_forward_time": 0.11522746086120605,
      "step": 39746
    },
    {
      "epoch": 0.00024259033203125,
      "step": 39746,
      "training_step_time": 0.3889739513397217
    },
    {
      "epoch": 0.000242596435546875,
      "model_forward_time": 0.11546540260314941,
      "step": 39747
    },
    {
      "epoch": 0.000242596435546875,
      "step": 39747,
      "training_step_time": 0.4756941795349121
    },
    {
      "epoch": 0.0002426025390625,
      "model_forward_time": 0.11565756797790527,
      "step": 39748
    },
    {
      "epoch": 0.0002426025390625,
      "step": 39748,
      "training_step_time": 0.3774423599243164
    },
    {
      "epoch": 0.000242608642578125,
      "model_forward_time": 0.11493206024169922,
      "step": 39749
    },
    {
      "epoch": 0.000242608642578125,
      "step": 39749,
      "training_step_time": 0.5767090320587158
    },
    {
      "epoch": 0.00024261474609375,
      "grad_norm": 0.09807148575782776,
      "learning_rate": 2.804017055763149e-05,
      "loss": 0.0406,
      "step": 39750
    },
    {
      "epoch": 0.00024261474609375,
      "model_forward_time": 0.11563611030578613,
      "step": 39750
    },
    {
      "epoch": 0.00024261474609375,
      "step": 39750,
      "training_step_time": 0.5016729831695557
    },
    {
      "epoch": 0.000242620849609375,
      "model_forward_time": 0.11576342582702637,
      "step": 39751
    },
    {
      "epoch": 0.000242620849609375,
      "step": 39751,
      "training_step_time": 0.46866893768310547
    },
    {
      "epoch": 0.000242626953125,
      "model_forward_time": 0.11449933052062988,
      "step": 39752
    },
    {
      "epoch": 0.000242626953125,
      "step": 39752,
      "training_step_time": 0.4462594985961914
    },
    {
      "epoch": 0.000242633056640625,
      "model_forward_time": 0.11456608772277832,
      "step": 39753
    },
    {
      "epoch": 0.000242633056640625,
      "step": 39753,
      "training_step_time": 0.4778423309326172
    },
    {
      "epoch": 0.00024263916015625,
      "model_forward_time": 0.11449646949768066,
      "step": 39754
    },
    {
      "epoch": 0.00024263916015625,
      "step": 39754,
      "training_step_time": 0.37995028495788574
    },
    {
      "epoch": 0.000242645263671875,
      "model_forward_time": 0.11443734169006348,
      "step": 39755
    },
    {
      "epoch": 0.000242645263671875,
      "step": 39755,
      "training_step_time": 0.3974459171295166
    },
    {
      "epoch": 0.0002426513671875,
      "model_forward_time": 0.11530184745788574,
      "step": 39756
    },
    {
      "epoch": 0.0002426513671875,
      "step": 39756,
      "training_step_time": 0.407102108001709
    },
    {
      "epoch": 0.000242657470703125,
      "model_forward_time": 0.1152646541595459,
      "step": 39757
    },
    {
      "epoch": 0.000242657470703125,
      "step": 39757,
      "training_step_time": 0.39200806617736816
    },
    {
      "epoch": 0.00024266357421875,
      "model_forward_time": 0.11633753776550293,
      "step": 39758
    },
    {
      "epoch": 0.00024266357421875,
      "step": 39758,
      "training_step_time": 0.37918877601623535
    },
    {
      "epoch": 0.000242669677734375,
      "model_forward_time": 0.11549186706542969,
      "step": 39759
    },
    {
      "epoch": 0.000242669677734375,
      "step": 39759,
      "training_step_time": 0.3972923755645752
    },
    {
      "epoch": 0.00024267578125,
      "grad_norm": 0.08260286599397659,
      "learning_rate": 2.8015416188075893e-05,
      "loss": 0.0392,
      "step": 39760
    },
    {
      "epoch": 0.00024267578125,
      "model_forward_time": 0.11453366279602051,
      "step": 39760
    },
    {
      "epoch": 0.00024267578125,
      "step": 39760,
      "training_step_time": 0.41938185691833496
    },
    {
      "epoch": 0.000242681884765625,
      "model_forward_time": 0.11558794975280762,
      "step": 39761
    },
    {
      "epoch": 0.000242681884765625,
      "step": 39761,
      "training_step_time": 0.42130255699157715
    },
    {
      "epoch": 0.00024268798828125,
      "model_forward_time": 0.11534595489501953,
      "step": 39762
    },
    {
      "epoch": 0.00024268798828125,
      "step": 39762,
      "training_step_time": 0.3960425853729248
    },
    {
      "epoch": 0.000242694091796875,
      "model_forward_time": 0.11516308784484863,
      "step": 39763
    },
    {
      "epoch": 0.000242694091796875,
      "step": 39763,
      "training_step_time": 0.40045881271362305
    },
    {
      "epoch": 0.0002427001953125,
      "model_forward_time": 0.11549067497253418,
      "step": 39764
    },
    {
      "epoch": 0.0002427001953125,
      "step": 39764,
      "training_step_time": 0.38927197456359863
    },
    {
      "epoch": 0.000242706298828125,
      "model_forward_time": 0.11511802673339844,
      "step": 39765
    },
    {
      "epoch": 0.000242706298828125,
      "step": 39765,
      "training_step_time": 0.42119836807250977
    },
    {
      "epoch": 0.00024271240234375,
      "model_forward_time": 0.11696434020996094,
      "step": 39766
    },
    {
      "epoch": 0.00024271240234375,
      "step": 39766,
      "training_step_time": 0.38169217109680176
    },
    {
      "epoch": 0.000242718505859375,
      "model_forward_time": 0.11624741554260254,
      "step": 39767
    },
    {
      "epoch": 0.000242718505859375,
      "step": 39767,
      "training_step_time": 0.5329563617706299
    },
    {
      "epoch": 0.000242724609375,
      "model_forward_time": 0.11543035507202148,
      "step": 39768
    },
    {
      "epoch": 0.000242724609375,
      "step": 39768,
      "training_step_time": 0.4621603488922119
    },
    {
      "epoch": 0.000242730712890625,
      "model_forward_time": 0.11588382720947266,
      "step": 39769
    },
    {
      "epoch": 0.000242730712890625,
      "step": 39769,
      "training_step_time": 0.4242818355560303
    },
    {
      "epoch": 0.00024273681640625,
      "grad_norm": 0.10879664123058319,
      "learning_rate": 2.7990668496856427e-05,
      "loss": 0.0391,
      "step": 39770
    },
    {
      "epoch": 0.00024273681640625,
      "model_forward_time": 0.11528778076171875,
      "step": 39770
    },
    {
      "epoch": 0.00024273681640625,
      "step": 39770,
      "training_step_time": 0.385174036026001
    },
    {
      "epoch": 0.000242742919921875,
      "model_forward_time": 0.11541080474853516,
      "step": 39771
    },
    {
      "epoch": 0.000242742919921875,
      "step": 39771,
      "training_step_time": 0.39593005180358887
    },
    {
      "epoch": 0.0002427490234375,
      "model_forward_time": 0.11741495132446289,
      "step": 39772
    },
    {
      "epoch": 0.0002427490234375,
      "step": 39772,
      "training_step_time": 0.37917089462280273
    },
    {
      "epoch": 0.000242755126953125,
      "model_forward_time": 0.1150507926940918,
      "step": 39773
    },
    {
      "epoch": 0.000242755126953125,
      "step": 39773,
      "training_step_time": 0.5895578861236572
    },
    {
      "epoch": 0.00024276123046875,
      "model_forward_time": 0.11538100242614746,
      "step": 39774
    },
    {
      "epoch": 0.00024276123046875,
      "step": 39774,
      "training_step_time": 0.3862299919128418
    },
    {
      "epoch": 0.000242767333984375,
      "model_forward_time": 0.1155860424041748,
      "step": 39775
    },
    {
      "epoch": 0.000242767333984375,
      "step": 39775,
      "training_step_time": 0.4211719036102295
    },
    {
      "epoch": 0.0002427734375,
      "model_forward_time": 0.1154782772064209,
      "step": 39776
    },
    {
      "epoch": 0.0002427734375,
      "step": 39776,
      "training_step_time": 0.385143518447876
    },
    {
      "epoch": 0.000242779541015625,
      "model_forward_time": 0.11538553237915039,
      "step": 39777
    },
    {
      "epoch": 0.000242779541015625,
      "step": 39777,
      "training_step_time": 0.510061502456665
    },
    {
      "epoch": 0.00024278564453125,
      "model_forward_time": 0.11527204513549805,
      "step": 39778
    },
    {
      "epoch": 0.00024278564453125,
      "step": 39778,
      "training_step_time": 0.37453675270080566
    },
    {
      "epoch": 0.000242791748046875,
      "model_forward_time": 0.1153707504272461,
      "step": 39779
    },
    {
      "epoch": 0.000242791748046875,
      "step": 39779,
      "training_step_time": 0.48990464210510254
    },
    {
      "epoch": 0.0002427978515625,
      "grad_norm": 0.13071836531162262,
      "learning_rate": 2.7965927491490705e-05,
      "loss": 0.0409,
      "step": 39780
    },
    {
      "epoch": 0.0002427978515625,
      "model_forward_time": 0.1149146556854248,
      "step": 39780
    },
    {
      "epoch": 0.0002427978515625,
      "step": 39780,
      "training_step_time": 0.42276740074157715
    },
    {
      "epoch": 0.000242803955078125,
      "model_forward_time": 0.11465764045715332,
      "step": 39781
    },
    {
      "epoch": 0.000242803955078125,
      "step": 39781,
      "training_step_time": 0.39797425270080566
    },
    {
      "epoch": 0.00024281005859375,
      "model_forward_time": 0.11574840545654297,
      "step": 39782
    },
    {
      "epoch": 0.00024281005859375,
      "step": 39782,
      "training_step_time": 0.41541504859924316
    },
    {
      "epoch": 0.000242816162109375,
      "model_forward_time": 0.11486124992370605,
      "step": 39783
    },
    {
      "epoch": 0.000242816162109375,
      "step": 39783,
      "training_step_time": 0.41419291496276855
    },
    {
      "epoch": 0.000242822265625,
      "model_forward_time": 0.11460661888122559,
      "step": 39784
    },
    {
      "epoch": 0.000242822265625,
      "step": 39784,
      "training_step_time": 0.3746018409729004
    },
    {
      "epoch": 0.000242828369140625,
      "model_forward_time": 0.11570954322814941,
      "step": 39785
    },
    {
      "epoch": 0.000242828369140625,
      "step": 39785,
      "training_step_time": 0.494600772857666
    },
    {
      "epoch": 0.00024283447265625,
      "model_forward_time": 0.11667871475219727,
      "step": 39786
    },
    {
      "epoch": 0.00024283447265625,
      "step": 39786,
      "training_step_time": 0.38805580139160156
    },
    {
      "epoch": 0.000242840576171875,
      "model_forward_time": 0.11505818367004395,
      "step": 39787
    },
    {
      "epoch": 0.000242840576171875,
      "step": 39787,
      "training_step_time": 0.4016730785369873
    },
    {
      "epoch": 0.0002428466796875,
      "model_forward_time": 0.11603856086730957,
      "step": 39788
    },
    {
      "epoch": 0.0002428466796875,
      "step": 39788,
      "training_step_time": 0.4031369686126709
    },
    {
      "epoch": 0.000242852783203125,
      "model_forward_time": 0.11501049995422363,
      "step": 39789
    },
    {
      "epoch": 0.000242852783203125,
      "step": 39789,
      "training_step_time": 0.43782520294189453
    },
    {
      "epoch": 0.00024285888671875,
      "grad_norm": 0.18828929960727692,
      "learning_rate": 2.7941193179494484e-05,
      "loss": 0.0406,
      "step": 39790
    },
    {
      "epoch": 0.00024285888671875,
      "model_forward_time": 0.11559510231018066,
      "step": 39790
    },
    {
      "epoch": 0.00024285888671875,
      "step": 39790,
      "training_step_time": 0.5001208782196045
    },
    {
      "epoch": 0.000242864990234375,
      "model_forward_time": 0.11485767364501953,
      "step": 39791
    },
    {
      "epoch": 0.000242864990234375,
      "step": 39791,
      "training_step_time": 0.5031366348266602
    },
    {
      "epoch": 0.00024287109375,
      "model_forward_time": 0.11502432823181152,
      "step": 39792
    },
    {
      "epoch": 0.00024287109375,
      "step": 39792,
      "training_step_time": 0.42178964614868164
    },
    {
      "epoch": 0.000242877197265625,
      "model_forward_time": 0.11463761329650879,
      "step": 39793
    },
    {
      "epoch": 0.000242877197265625,
      "step": 39793,
      "training_step_time": 0.4271383285522461
    },
    {
      "epoch": 0.00024288330078125,
      "model_forward_time": 0.1154172420501709,
      "step": 39794
    },
    {
      "epoch": 0.00024288330078125,
      "step": 39794,
      "training_step_time": 0.43900322914123535
    },
    {
      "epoch": 0.000242889404296875,
      "model_forward_time": 0.11406183242797852,
      "step": 39795
    },
    {
      "epoch": 0.000242889404296875,
      "step": 39795,
      "training_step_time": 0.40421605110168457
    },
    {
      "epoch": 0.0002428955078125,
      "model_forward_time": 0.11509537696838379,
      "step": 39796
    },
    {
      "epoch": 0.0002428955078125,
      "step": 39796,
      "training_step_time": 0.4091005325317383
    },
    {
      "epoch": 0.000242901611328125,
      "model_forward_time": 0.11430072784423828,
      "step": 39797
    },
    {
      "epoch": 0.000242901611328125,
      "step": 39797,
      "training_step_time": 0.4451944828033447
    },
    {
      "epoch": 0.00024290771484375,
      "model_forward_time": 0.1155390739440918,
      "step": 39798
    },
    {
      "epoch": 0.00024290771484375,
      "step": 39798,
      "training_step_time": 0.382981538772583
    },
    {
      "epoch": 0.000242913818359375,
      "model_forward_time": 0.11540794372558594,
      "step": 39799
    },
    {
      "epoch": 0.000242913818359375,
      "step": 39799,
      "training_step_time": 0.3986542224884033
    },
    {
      "epoch": 0.000242919921875,
      "grad_norm": 0.08786917477846146,
      "learning_rate": 2.79164655683813e-05,
      "loss": 0.0353,
      "step": 39800
    },
    {
      "epoch": 0.000242919921875,
      "model_forward_time": 0.11500072479248047,
      "step": 39800
    },
    {
      "epoch": 0.000242919921875,
      "step": 39800,
      "training_step_time": 0.42879390716552734
    },
    {
      "epoch": 0.000242926025390625,
      "model_forward_time": 0.11493372917175293,
      "step": 39801
    },
    {
      "epoch": 0.000242926025390625,
      "step": 39801,
      "training_step_time": 0.4351961612701416
    },
    {
      "epoch": 0.00024293212890625,
      "model_forward_time": 0.11574435234069824,
      "step": 39802
    },
    {
      "epoch": 0.00024293212890625,
      "step": 39802,
      "training_step_time": 0.4721074104309082
    },
    {
      "epoch": 0.000242938232421875,
      "model_forward_time": 0.11483979225158691,
      "step": 39803
    },
    {
      "epoch": 0.000242938232421875,
      "step": 39803,
      "training_step_time": 0.516798734664917
    },
    {
      "epoch": 0.0002429443359375,
      "model_forward_time": 0.1144859790802002,
      "step": 39804
    },
    {
      "epoch": 0.0002429443359375,
      "step": 39804,
      "training_step_time": 0.38947415351867676
    },
    {
      "epoch": 0.000242950439453125,
      "model_forward_time": 0.11449980735778809,
      "step": 39805
    },
    {
      "epoch": 0.000242950439453125,
      "step": 39805,
      "training_step_time": 0.3904998302459717
    },
    {
      "epoch": 0.00024295654296875,
      "model_forward_time": 0.11474943161010742,
      "step": 39806
    },
    {
      "epoch": 0.00024295654296875,
      "step": 39806,
      "training_step_time": 0.4373300075531006
    },
    {
      "epoch": 0.000242962646484375,
      "model_forward_time": 0.11480927467346191,
      "step": 39807
    },
    {
      "epoch": 0.000242962646484375,
      "step": 39807,
      "training_step_time": 0.4813199043273926
    },
    {
      "epoch": 0.00024296875,
      "model_forward_time": 0.11504960060119629,
      "step": 39808
    },
    {
      "epoch": 0.00024296875,
      "step": 39808,
      "training_step_time": 0.5102498531341553
    },
    {
      "epoch": 0.000242974853515625,
      "model_forward_time": 0.1155858039855957,
      "step": 39809
    },
    {
      "epoch": 0.000242974853515625,
      "step": 39809,
      "training_step_time": 0.3848550319671631
    },
    {
      "epoch": 0.00024298095703125,
      "grad_norm": 0.15381982922554016,
      "learning_rate": 2.7891744665662823e-05,
      "loss": 0.047,
      "step": 39810
    },
    {
      "epoch": 0.00024298095703125,
      "model_forward_time": 0.11469340324401855,
      "step": 39810
    },
    {
      "epoch": 0.00024298095703125,
      "step": 39810,
      "training_step_time": 0.4250602722167969
    },
    {
      "epoch": 0.000242987060546875,
      "model_forward_time": 0.1147456169128418,
      "step": 39811
    },
    {
      "epoch": 0.000242987060546875,
      "step": 39811,
      "training_step_time": 0.4108705520629883
    },
    {
      "epoch": 0.0002429931640625,
      "model_forward_time": 0.1144568920135498,
      "step": 39812
    },
    {
      "epoch": 0.0002429931640625,
      "step": 39812,
      "training_step_time": 0.3892812728881836
    },
    {
      "epoch": 0.000242999267578125,
      "model_forward_time": 0.11547207832336426,
      "step": 39813
    },
    {
      "epoch": 0.000242999267578125,
      "step": 39813,
      "training_step_time": 0.39525890350341797
    },
    {
      "epoch": 0.00024300537109375,
      "model_forward_time": 0.11481857299804688,
      "step": 39814
    },
    {
      "epoch": 0.00024300537109375,
      "step": 39814,
      "training_step_time": 0.6476929187774658
    },
    {
      "epoch": 0.000243011474609375,
      "model_forward_time": 0.11451888084411621,
      "step": 39815
    },
    {
      "epoch": 0.000243011474609375,
      "step": 39815,
      "training_step_time": 0.38953495025634766
    },
    {
      "epoch": 0.000243017578125,
      "model_forward_time": 0.11526203155517578,
      "step": 39816
    },
    {
      "epoch": 0.000243017578125,
      "step": 39816,
      "training_step_time": 0.3940920829772949
    },
    {
      "epoch": 0.000243023681640625,
      "model_forward_time": 0.11486101150512695,
      "step": 39817
    },
    {
      "epoch": 0.000243023681640625,
      "step": 39817,
      "training_step_time": 0.3887007236480713
    },
    {
      "epoch": 0.00024302978515625,
      "model_forward_time": 0.1146245002746582,
      "step": 39818
    },
    {
      "epoch": 0.00024302978515625,
      "step": 39818,
      "training_step_time": 0.3847789764404297
    },
    {
      "epoch": 0.000243035888671875,
      "model_forward_time": 0.11503005027770996,
      "step": 39819
    },
    {
      "epoch": 0.000243035888671875,
      "step": 39819,
      "training_step_time": 0.4427371025085449
    },
    {
      "epoch": 0.0002430419921875,
      "grad_norm": 0.11375211179256439,
      "learning_rate": 2.7867030478848577e-05,
      "loss": 0.0354,
      "step": 39820
    },
    {
      "epoch": 0.0002430419921875,
      "model_forward_time": 0.1150977611541748,
      "step": 39820
    },
    {
      "epoch": 0.0002430419921875,
      "step": 39820,
      "training_step_time": 0.5937972068786621
    },
    {
      "epoch": 0.000243048095703125,
      "model_forward_time": 0.11475944519042969,
      "step": 39821
    },
    {
      "epoch": 0.000243048095703125,
      "step": 39821,
      "training_step_time": 0.38783860206604004
    },
    {
      "epoch": 0.00024305419921875,
      "model_forward_time": 0.11580061912536621,
      "step": 39822
    },
    {
      "epoch": 0.00024305419921875,
      "step": 39822,
      "training_step_time": 0.41846656799316406
    },
    {
      "epoch": 0.000243060302734375,
      "model_forward_time": 0.1156165599822998,
      "step": 39823
    },
    {
      "epoch": 0.000243060302734375,
      "step": 39823,
      "training_step_time": 0.47794294357299805
    },
    {
      "epoch": 0.00024306640625,
      "model_forward_time": 0.1150364875793457,
      "step": 39824
    },
    {
      "epoch": 0.00024306640625,
      "step": 39824,
      "training_step_time": 0.40512681007385254
    },
    {
      "epoch": 0.000243072509765625,
      "model_forward_time": 0.11501264572143555,
      "step": 39825
    },
    {
      "epoch": 0.000243072509765625,
      "step": 39825,
      "training_step_time": 0.4464895725250244
    },
    {
      "epoch": 0.00024307861328125,
      "model_forward_time": 0.11541080474853516,
      "step": 39826
    },
    {
      "epoch": 0.00024307861328125,
      "step": 39826,
      "training_step_time": 0.42691588401794434
    },
    {
      "epoch": 0.000243084716796875,
      "model_forward_time": 0.1150503158569336,
      "step": 39827
    },
    {
      "epoch": 0.000243084716796875,
      "step": 39827,
      "training_step_time": 0.3966701030731201
    },
    {
      "epoch": 0.0002430908203125,
      "model_forward_time": 0.11530137062072754,
      "step": 39828
    },
    {
      "epoch": 0.0002430908203125,
      "step": 39828,
      "training_step_time": 0.3976280689239502
    },
    {
      "epoch": 0.000243096923828125,
      "model_forward_time": 0.11517691612243652,
      "step": 39829
    },
    {
      "epoch": 0.000243096923828125,
      "step": 39829,
      "training_step_time": 0.3964545726776123
    },
    {
      "epoch": 0.00024310302734375,
      "grad_norm": 0.13313888013362885,
      "learning_rate": 2.7842323015446082e-05,
      "loss": 0.0376,
      "step": 39830
    },
    {
      "epoch": 0.00024310302734375,
      "model_forward_time": 0.11525344848632812,
      "step": 39830
    },
    {
      "epoch": 0.00024310302734375,
      "step": 39830,
      "training_step_time": 0.4047276973724365
    },
    {
      "epoch": 0.000243109130859375,
      "model_forward_time": 0.11541247367858887,
      "step": 39831
    },
    {
      "epoch": 0.000243109130859375,
      "step": 39831,
      "training_step_time": 0.40860748291015625
    },
    {
      "epoch": 0.000243115234375,
      "model_forward_time": 0.1154794692993164,
      "step": 39832
    },
    {
      "epoch": 0.000243115234375,
      "step": 39832,
      "training_step_time": 0.5997314453125
    },
    {
      "epoch": 0.000243121337890625,
      "model_forward_time": 0.11471128463745117,
      "step": 39833
    },
    {
      "epoch": 0.000243121337890625,
      "step": 39833,
      "training_step_time": 0.40164637565612793
    },
    {
      "epoch": 0.00024312744140625,
      "model_forward_time": 0.11501049995422363,
      "step": 39834
    },
    {
      "epoch": 0.00024312744140625,
      "step": 39834,
      "training_step_time": 0.39296436309814453
    },
    {
      "epoch": 0.000243133544921875,
      "model_forward_time": 0.11459112167358398,
      "step": 39835
    },
    {
      "epoch": 0.000243133544921875,
      "step": 39835,
      "training_step_time": 0.4337756633758545
    },
    {
      "epoch": 0.0002431396484375,
      "model_forward_time": 0.11532282829284668,
      "step": 39836
    },
    {
      "epoch": 0.0002431396484375,
      "step": 39836,
      "training_step_time": 0.39084482192993164
    },
    {
      "epoch": 0.000243145751953125,
      "model_forward_time": 0.11526155471801758,
      "step": 39837
    },
    {
      "epoch": 0.000243145751953125,
      "step": 39837,
      "training_step_time": 0.37273383140563965
    },
    {
      "epoch": 0.00024315185546875,
      "model_forward_time": 0.11591339111328125,
      "step": 39838
    },
    {
      "epoch": 0.00024315185546875,
      "step": 39838,
      "training_step_time": 0.541968822479248
    },
    {
      "epoch": 0.000243157958984375,
      "model_forward_time": 0.11507058143615723,
      "step": 39839
    },
    {
      "epoch": 0.000243157958984375,
      "step": 39839,
      "training_step_time": 0.42908191680908203
    },
    {
      "epoch": 0.0002431640625,
      "grad_norm": 0.12331552058458328,
      "learning_rate": 2.7817622282960815e-05,
      "loss": 0.0385,
      "step": 39840
    },
    {
      "epoch": 0.0002431640625,
      "model_forward_time": 0.1145474910736084,
      "step": 39840
    },
    {
      "epoch": 0.0002431640625,
      "step": 39840,
      "training_step_time": 0.4042379856109619
    },
    {
      "epoch": 0.000243170166015625,
      "model_forward_time": 0.11499953269958496,
      "step": 39841
    },
    {
      "epoch": 0.000243170166015625,
      "step": 39841,
      "training_step_time": 0.4027719497680664
    },
    {
      "epoch": 0.00024317626953125,
      "model_forward_time": 0.11500978469848633,
      "step": 39842
    },
    {
      "epoch": 0.00024317626953125,
      "step": 39842,
      "training_step_time": 0.3943912982940674
    },
    {
      "epoch": 0.000243182373046875,
      "model_forward_time": 0.11491775512695312,
      "step": 39843
    },
    {
      "epoch": 0.000243182373046875,
      "step": 39843,
      "training_step_time": 0.393648624420166
    },
    {
      "epoch": 0.0002431884765625,
      "model_forward_time": 0.11542344093322754,
      "step": 39844
    },
    {
      "epoch": 0.0002431884765625,
      "step": 39844,
      "training_step_time": 0.686342716217041
    },
    {
      "epoch": 0.000243194580078125,
      "model_forward_time": 0.11493611335754395,
      "step": 39845
    },
    {
      "epoch": 0.000243194580078125,
      "step": 39845,
      "training_step_time": 0.3984987735748291
    },
    {
      "epoch": 0.00024320068359375,
      "model_forward_time": 0.1149892807006836,
      "step": 39846
    },
    {
      "epoch": 0.00024320068359375,
      "step": 39846,
      "training_step_time": 0.3969302177429199
    },
    {
      "epoch": 0.000243206787109375,
      "model_forward_time": 0.11463594436645508,
      "step": 39847
    },
    {
      "epoch": 0.000243206787109375,
      "step": 39847,
      "training_step_time": 0.4685957431793213
    },
    {
      "epoch": 0.000243212890625,
      "model_forward_time": 0.1148214340209961,
      "step": 39848
    },
    {
      "epoch": 0.000243212890625,
      "step": 39848,
      "training_step_time": 0.47771739959716797
    },
    {
      "epoch": 0.000243218994140625,
      "model_forward_time": 0.11407685279846191,
      "step": 39849
    },
    {
      "epoch": 0.000243218994140625,
      "step": 39849,
      "training_step_time": 0.39933228492736816
    },
    {
      "epoch": 0.00024322509765625,
      "grad_norm": 0.14084549248218536,
      "learning_rate": 2.7792928288896202e-05,
      "loss": 0.0442,
      "step": 39850
    },
    {
      "epoch": 0.00024322509765625,
      "model_forward_time": 0.11620450019836426,
      "step": 39850
    },
    {
      "epoch": 0.00024322509765625,
      "step": 39850,
      "training_step_time": 0.503943920135498
    },
    {
      "epoch": 0.000243231201171875,
      "model_forward_time": 0.11432862281799316,
      "step": 39851
    },
    {
      "epoch": 0.000243231201171875,
      "step": 39851,
      "training_step_time": 0.42199158668518066
    },
    {
      "epoch": 0.0002432373046875,
      "model_forward_time": 0.11516594886779785,
      "step": 39852
    },
    {
      "epoch": 0.0002432373046875,
      "step": 39852,
      "training_step_time": 0.4540059566497803
    },
    {
      "epoch": 0.000243243408203125,
      "model_forward_time": 0.11520552635192871,
      "step": 39853
    },
    {
      "epoch": 0.000243243408203125,
      "step": 39853,
      "training_step_time": 0.42780327796936035
    },
    {
      "epoch": 0.00024324951171875,
      "model_forward_time": 0.11516475677490234,
      "step": 39854
    },
    {
      "epoch": 0.00024324951171875,
      "step": 39854,
      "training_step_time": 0.38503408432006836
    },
    {
      "epoch": 0.000243255615234375,
      "model_forward_time": 0.11486458778381348,
      "step": 39855
    },
    {
      "epoch": 0.000243255615234375,
      "step": 39855,
      "training_step_time": 0.3917267322540283
    },
    {
      "epoch": 0.00024326171875,
      "model_forward_time": 0.11590027809143066,
      "step": 39856
    },
    {
      "epoch": 0.00024326171875,
      "step": 39856,
      "training_step_time": 0.5540313720703125
    },
    {
      "epoch": 0.000243267822265625,
      "model_forward_time": 0.11490225791931152,
      "step": 39857
    },
    {
      "epoch": 0.000243267822265625,
      "step": 39857,
      "training_step_time": 0.38735508918762207
    },
    {
      "epoch": 0.00024327392578125,
      "model_forward_time": 0.11581945419311523,
      "step": 39858
    },
    {
      "epoch": 0.00024327392578125,
      "step": 39858,
      "training_step_time": 0.39772510528564453
    },
    {
      "epoch": 0.000243280029296875,
      "model_forward_time": 0.11572551727294922,
      "step": 39859
    },
    {
      "epoch": 0.000243280029296875,
      "step": 39859,
      "training_step_time": 0.39762401580810547
    },
    {
      "epoch": 0.0002432861328125,
      "grad_norm": 0.1252099573612213,
      "learning_rate": 2.776824104075364e-05,
      "loss": 0.044,
      "step": 39860
    },
    {
      "epoch": 0.0002432861328125,
      "model_forward_time": 0.11512637138366699,
      "step": 39860
    },
    {
      "epoch": 0.0002432861328125,
      "step": 39860,
      "training_step_time": 0.3903319835662842
    },
    {
      "epoch": 0.000243292236328125,
      "model_forward_time": 0.11492252349853516,
      "step": 39861
    },
    {
      "epoch": 0.000243292236328125,
      "step": 39861,
      "training_step_time": 0.424544095993042
    },
    {
      "epoch": 0.00024329833984375,
      "model_forward_time": 0.11522984504699707,
      "step": 39862
    },
    {
      "epoch": 0.00024329833984375,
      "step": 39862,
      "training_step_time": 0.5485482215881348
    },
    {
      "epoch": 0.000243304443359375,
      "model_forward_time": 0.1144559383392334,
      "step": 39863
    },
    {
      "epoch": 0.000243304443359375,
      "step": 39863,
      "training_step_time": 0.38672637939453125
    },
    {
      "epoch": 0.000243310546875,
      "model_forward_time": 0.11517643928527832,
      "step": 39864
    },
    {
      "epoch": 0.000243310546875,
      "step": 39864,
      "training_step_time": 0.3892953395843506
    },
    {
      "epoch": 0.000243316650390625,
      "model_forward_time": 0.11523580551147461,
      "step": 39865
    },
    {
      "epoch": 0.000243316650390625,
      "step": 39865,
      "training_step_time": 0.36754822731018066
    },
    {
      "epoch": 0.00024332275390625,
      "model_forward_time": 0.11482357978820801,
      "step": 39866
    },
    {
      "epoch": 0.00024332275390625,
      "step": 39866,
      "training_step_time": 0.4041483402252197
    },
    {
      "epoch": 0.000243328857421875,
      "model_forward_time": 0.11475777626037598,
      "step": 39867
    },
    {
      "epoch": 0.000243328857421875,
      "step": 39867,
      "training_step_time": 0.45116376876831055
    },
    {
      "epoch": 0.0002433349609375,
      "model_forward_time": 0.11536288261413574,
      "step": 39868
    },
    {
      "epoch": 0.0002433349609375,
      "step": 39868,
      "training_step_time": 0.5385499000549316
    },
    {
      "epoch": 0.000243341064453125,
      "model_forward_time": 0.11530756950378418,
      "step": 39869
    },
    {
      "epoch": 0.000243341064453125,
      "step": 39869,
      "training_step_time": 0.38916611671447754
    },
    {
      "epoch": 0.00024334716796875,
      "grad_norm": 0.11939448863267899,
      "learning_rate": 2.774356054603243e-05,
      "loss": 0.0391,
      "step": 39870
    },
    {
      "epoch": 0.00024334716796875,
      "model_forward_time": 0.11493992805480957,
      "step": 39870
    },
    {
      "epoch": 0.00024334716796875,
      "step": 39870,
      "training_step_time": 0.39261722564697266
    },
    {
      "epoch": 0.000243353271484375,
      "model_forward_time": 0.11535906791687012,
      "step": 39871
    },
    {
      "epoch": 0.000243353271484375,
      "step": 39871,
      "training_step_time": 0.39773011207580566
    },
    {
      "epoch": 0.000243359375,
      "model_forward_time": 0.11538147926330566,
      "step": 39872
    },
    {
      "epoch": 0.000243359375,
      "step": 39872,
      "training_step_time": 0.4004526138305664
    },
    {
      "epoch": 0.000243365478515625,
      "model_forward_time": 0.11465907096862793,
      "step": 39873
    },
    {
      "epoch": 0.000243365478515625,
      "step": 39873,
      "training_step_time": 0.38785219192504883
    },
    {
      "epoch": 0.00024337158203125,
      "model_forward_time": 0.11510682106018066,
      "step": 39874
    },
    {
      "epoch": 0.00024337158203125,
      "step": 39874,
      "training_step_time": 0.6136422157287598
    },
    {
      "epoch": 0.000243377685546875,
      "model_forward_time": 0.11479997634887695,
      "step": 39875
    },
    {
      "epoch": 0.000243377685546875,
      "step": 39875,
      "training_step_time": 0.5018107891082764
    },
    {
      "epoch": 0.0002433837890625,
      "model_forward_time": 0.11526727676391602,
      "step": 39876
    },
    {
      "epoch": 0.0002433837890625,
      "step": 39876,
      "training_step_time": 0.3900141716003418
    },
    {
      "epoch": 0.000243389892578125,
      "model_forward_time": 0.11464214324951172,
      "step": 39877
    },
    {
      "epoch": 0.000243389892578125,
      "step": 39877,
      "training_step_time": 0.39765453338623047
    },
    {
      "epoch": 0.00024339599609375,
      "model_forward_time": 0.11533570289611816,
      "step": 39878
    },
    {
      "epoch": 0.00024339599609375,
      "step": 39878,
      "training_step_time": 0.41603612899780273
    },
    {
      "epoch": 0.000243402099609375,
      "model_forward_time": 0.11491584777832031,
      "step": 39879
    },
    {
      "epoch": 0.000243402099609375,
      "step": 39879,
      "training_step_time": 0.4401857852935791
    },
    {
      "epoch": 0.000243408203125,
      "grad_norm": 0.09168641269207001,
      "learning_rate": 2.7718886812229907e-05,
      "loss": 0.0362,
      "step": 39880
    },
    {
      "epoch": 0.000243408203125,
      "model_forward_time": 0.11489558219909668,
      "step": 39880
    },
    {
      "epoch": 0.000243408203125,
      "step": 39880,
      "training_step_time": 0.4743483066558838
    },
    {
      "epoch": 0.000243414306640625,
      "model_forward_time": 0.11536335945129395,
      "step": 39881
    },
    {
      "epoch": 0.000243414306640625,
      "step": 39881,
      "training_step_time": 0.4655454158782959
    },
    {
      "epoch": 0.00024342041015625,
      "model_forward_time": 0.11560177803039551,
      "step": 39882
    },
    {
      "epoch": 0.00024342041015625,
      "step": 39882,
      "training_step_time": 0.39360833168029785
    },
    {
      "epoch": 0.000243426513671875,
      "model_forward_time": 0.11465191841125488,
      "step": 39883
    },
    {
      "epoch": 0.000243426513671875,
      "step": 39883,
      "training_step_time": 0.3968362808227539
    },
    {
      "epoch": 0.0002434326171875,
      "model_forward_time": 0.11486291885375977,
      "step": 39884
    },
    {
      "epoch": 0.0002434326171875,
      "step": 39884,
      "training_step_time": 0.3940126895904541
    },
    {
      "epoch": 0.000243438720703125,
      "model_forward_time": 0.11482048034667969,
      "step": 39885
    },
    {
      "epoch": 0.000243438720703125,
      "step": 39885,
      "training_step_time": 0.39202046394348145
    },
    {
      "epoch": 0.00024344482421875,
      "model_forward_time": 0.1151735782623291,
      "step": 39886
    },
    {
      "epoch": 0.00024344482421875,
      "step": 39886,
      "training_step_time": 0.5180411338806152
    },
    {
      "epoch": 0.000243450927734375,
      "model_forward_time": 0.1146237850189209,
      "step": 39887
    },
    {
      "epoch": 0.000243450927734375,
      "step": 39887,
      "training_step_time": 0.3915841579437256
    },
    {
      "epoch": 0.00024345703125,
      "model_forward_time": 0.11524415016174316,
      "step": 39888
    },
    {
      "epoch": 0.00024345703125,
      "step": 39888,
      "training_step_time": 0.3882927894592285
    },
    {
      "epoch": 0.000243463134765625,
      "model_forward_time": 0.1150360107421875,
      "step": 39889
    },
    {
      "epoch": 0.000243463134765625,
      "step": 39889,
      "training_step_time": 0.4042649269104004
    },
    {
      "epoch": 0.00024346923828125,
      "grad_norm": 0.0851190835237503,
      "learning_rate": 2.7694219846841262e-05,
      "loss": 0.0372,
      "step": 39890
    },
    {
      "epoch": 0.00024346923828125,
      "model_forward_time": 0.11490821838378906,
      "step": 39890
    },
    {
      "epoch": 0.00024346923828125,
      "step": 39890,
      "training_step_time": 0.44753241539001465
    },
    {
      "epoch": 0.000243475341796875,
      "model_forward_time": 0.11531305313110352,
      "step": 39891
    },
    {
      "epoch": 0.000243475341796875,
      "step": 39891,
      "training_step_time": 0.47316956520080566
    },
    {
      "epoch": 0.0002434814453125,
      "model_forward_time": 0.11526346206665039,
      "step": 39892
    },
    {
      "epoch": 0.0002434814453125,
      "step": 39892,
      "training_step_time": 0.51483154296875
    },
    {
      "epoch": 0.000243487548828125,
      "model_forward_time": 0.11481571197509766,
      "step": 39893
    },
    {
      "epoch": 0.000243487548828125,
      "step": 39893,
      "training_step_time": 0.4176003932952881
    },
    {
      "epoch": 0.00024349365234375,
      "model_forward_time": 0.11464238166809082,
      "step": 39894
    },
    {
      "epoch": 0.00024349365234375,
      "step": 39894,
      "training_step_time": 0.48073816299438477
    },
    {
      "epoch": 0.000243499755859375,
      "model_forward_time": 0.1149282455444336,
      "step": 39895
    },
    {
      "epoch": 0.000243499755859375,
      "step": 39895,
      "training_step_time": 0.4954955577850342
    },
    {
      "epoch": 0.000243505859375,
      "model_forward_time": 0.11426186561584473,
      "step": 39896
    },
    {
      "epoch": 0.000243505859375,
      "step": 39896,
      "training_step_time": 0.38419556617736816
    },
    {
      "epoch": 0.000243511962890625,
      "model_forward_time": 0.11467599868774414,
      "step": 39897
    },
    {
      "epoch": 0.000243511962890625,
      "step": 39897,
      "training_step_time": 0.39658665657043457
    },
    {
      "epoch": 0.00024351806640625,
      "model_forward_time": 0.11547064781188965,
      "step": 39898
    },
    {
      "epoch": 0.00024351806640625,
      "step": 39898,
      "training_step_time": 0.4244999885559082
    },
    {
      "epoch": 0.000243524169921875,
      "model_forward_time": 0.11478328704833984,
      "step": 39899
    },
    {
      "epoch": 0.000243524169921875,
      "step": 39899,
      "training_step_time": 0.38996219635009766
    },
    {
      "epoch": 0.0002435302734375,
      "grad_norm": 0.08967018127441406,
      "learning_rate": 2.766955965735968e-05,
      "loss": 0.0394,
      "step": 39900
    },
    {
      "epoch": 0.0002435302734375,
      "model_forward_time": 0.11487674713134766,
      "step": 39900
    },
    {
      "epoch": 0.0002435302734375,
      "step": 39900,
      "training_step_time": 0.38852620124816895
    },
    {
      "epoch": 0.000243536376953125,
      "model_forward_time": 0.11554837226867676,
      "step": 39901
    },
    {
      "epoch": 0.000243536376953125,
      "step": 39901,
      "training_step_time": 0.39055871963500977
    },
    {
      "epoch": 0.00024354248046875,
      "model_forward_time": 0.11500120162963867,
      "step": 39902
    },
    {
      "epoch": 0.00024354248046875,
      "step": 39902,
      "training_step_time": 0.39426326751708984
    },
    {
      "epoch": 0.000243548583984375,
      "model_forward_time": 0.1156914234161377,
      "step": 39903
    },
    {
      "epoch": 0.000243548583984375,
      "step": 39903,
      "training_step_time": 0.4748556613922119
    },
    {
      "epoch": 0.0002435546875,
      "model_forward_time": 0.1156766414642334,
      "step": 39904
    },
    {
      "epoch": 0.0002435546875,
      "step": 39904,
      "training_step_time": 0.5641083717346191
    },
    {
      "epoch": 0.000243560791015625,
      "model_forward_time": 0.11473584175109863,
      "step": 39905
    },
    {
      "epoch": 0.000243560791015625,
      "step": 39905,
      "training_step_time": 0.5202372074127197
    },
    {
      "epoch": 0.00024356689453125,
      "model_forward_time": 0.11523056030273438,
      "step": 39906
    },
    {
      "epoch": 0.00024356689453125,
      "step": 39906,
      "training_step_time": 0.402113676071167
    },
    {
      "epoch": 0.000243572998046875,
      "model_forward_time": 0.1143500804901123,
      "step": 39907
    },
    {
      "epoch": 0.000243572998046875,
      "step": 39907,
      "training_step_time": 0.4524056911468506
    },
    {
      "epoch": 0.0002435791015625,
      "model_forward_time": 0.11455011367797852,
      "step": 39908
    },
    {
      "epoch": 0.0002435791015625,
      "step": 39908,
      "training_step_time": 0.4101145267486572
    },
    {
      "epoch": 0.000243585205078125,
      "model_forward_time": 0.1152198314666748,
      "step": 39909
    },
    {
      "epoch": 0.000243585205078125,
      "step": 39909,
      "training_step_time": 0.4986269474029541
    },
    {
      "epoch": 0.00024359130859375,
      "grad_norm": 0.08611232787370682,
      "learning_rate": 2.764490625127627e-05,
      "loss": 0.0416,
      "step": 39910
    },
    {
      "epoch": 0.00024359130859375,
      "model_forward_time": 0.11496663093566895,
      "step": 39910
    },
    {
      "epoch": 0.00024359130859375,
      "step": 39910,
      "training_step_time": 0.3918178081512451
    },
    {
      "epoch": 0.000243597412109375,
      "model_forward_time": 0.11468029022216797,
      "step": 39911
    },
    {
      "epoch": 0.000243597412109375,
      "step": 39911,
      "training_step_time": 0.5094499588012695
    },
    {
      "epoch": 0.000243603515625,
      "model_forward_time": 0.1150970458984375,
      "step": 39912
    },
    {
      "epoch": 0.000243603515625,
      "step": 39912,
      "training_step_time": 0.38619136810302734
    },
    {
      "epoch": 0.000243609619140625,
      "model_forward_time": 0.11417078971862793,
      "step": 39913
    },
    {
      "epoch": 0.000243609619140625,
      "step": 39913,
      "training_step_time": 0.39115405082702637
    },
    {
      "epoch": 0.00024361572265625,
      "model_forward_time": 0.11679768562316895,
      "step": 39914
    },
    {
      "epoch": 0.00024361572265625,
      "step": 39914,
      "training_step_time": 0.39541149139404297
    },
    {
      "epoch": 0.000243621826171875,
      "model_forward_time": 0.11460542678833008,
      "step": 39915
    },
    {
      "epoch": 0.000243621826171875,
      "step": 39915,
      "training_step_time": 0.396634578704834
    },
    {
      "epoch": 0.0002436279296875,
      "model_forward_time": 0.11492371559143066,
      "step": 39916
    },
    {
      "epoch": 0.0002436279296875,
      "step": 39916,
      "training_step_time": 0.5030241012573242
    },
    {
      "epoch": 0.000243634033203125,
      "model_forward_time": 0.11514616012573242,
      "step": 39917
    },
    {
      "epoch": 0.000243634033203125,
      "step": 39917,
      "training_step_time": 0.5772116184234619
    },
    {
      "epoch": 0.00024364013671875,
      "model_forward_time": 0.11540102958679199,
      "step": 39918
    },
    {
      "epoch": 0.00024364013671875,
      "step": 39918,
      "training_step_time": 0.4387385845184326
    },
    {
      "epoch": 0.000243646240234375,
      "model_forward_time": 0.11459779739379883,
      "step": 39919
    },
    {
      "epoch": 0.000243646240234375,
      "step": 39919,
      "training_step_time": 0.40490245819091797
    },
    {
      "epoch": 0.00024365234375,
      "grad_norm": 0.17057861387729645,
      "learning_rate": 2.762025963608009e-05,
      "loss": 0.0455,
      "step": 39920
    },
    {
      "epoch": 0.00024365234375,
      "model_forward_time": 0.11499571800231934,
      "step": 39920
    },
    {
      "epoch": 0.00024365234375,
      "step": 39920,
      "training_step_time": 0.4161694049835205
    },
    {
      "epoch": 0.000243658447265625,
      "model_forward_time": 0.11457037925720215,
      "step": 39921
    },
    {
      "epoch": 0.000243658447265625,
      "step": 39921,
      "training_step_time": 0.3661916255950928
    },
    {
      "epoch": 0.00024366455078125,
      "model_forward_time": 0.11451983451843262,
      "step": 39922
    },
    {
      "epoch": 0.00024366455078125,
      "step": 39922,
      "training_step_time": 0.447981595993042
    },
    {
      "epoch": 0.000243670654296875,
      "model_forward_time": 0.1149747371673584,
      "step": 39923
    },
    {
      "epoch": 0.000243670654296875,
      "step": 39923,
      "training_step_time": 0.4006178379058838
    },
    {
      "epoch": 0.0002436767578125,
      "model_forward_time": 0.1146078109741211,
      "step": 39924
    },
    {
      "epoch": 0.0002436767578125,
      "step": 39924,
      "training_step_time": 0.3994324207305908
    },
    {
      "epoch": 0.000243682861328125,
      "model_forward_time": 0.11545825004577637,
      "step": 39925
    },
    {
      "epoch": 0.000243682861328125,
      "step": 39925,
      "training_step_time": 0.37851381301879883
    },
    {
      "epoch": 0.00024368896484375,
      "model_forward_time": 0.11555123329162598,
      "step": 39926
    },
    {
      "epoch": 0.00024368896484375,
      "step": 39926,
      "training_step_time": 0.38289499282836914
    },
    {
      "epoch": 0.000243695068359375,
      "model_forward_time": 0.11510515213012695,
      "step": 39927
    },
    {
      "epoch": 0.000243695068359375,
      "step": 39927,
      "training_step_time": 0.38929152488708496
    },
    {
      "epoch": 0.000243701171875,
      "model_forward_time": 0.11500835418701172,
      "step": 39928
    },
    {
      "epoch": 0.000243701171875,
      "step": 39928,
      "training_step_time": 0.646723747253418
    },
    {
      "epoch": 0.000243707275390625,
      "model_forward_time": 0.11549568176269531,
      "step": 39929
    },
    {
      "epoch": 0.000243707275390625,
      "step": 39929,
      "training_step_time": 0.38416028022766113
    },
    {
      "epoch": 0.00024371337890625,
      "grad_norm": 0.11161636561155319,
      "learning_rate": 2.7595619819258116e-05,
      "loss": 0.0355,
      "step": 39930
    },
    {
      "epoch": 0.00024371337890625,
      "model_forward_time": 0.11487531661987305,
      "step": 39930
    },
    {
      "epoch": 0.00024371337890625,
      "step": 39930,
      "training_step_time": 0.40848374366760254
    },
    {
      "epoch": 0.000243719482421875,
      "model_forward_time": 0.11460638046264648,
      "step": 39931
    },
    {
      "epoch": 0.000243719482421875,
      "step": 39931,
      "training_step_time": 0.48949742317199707
    },
    {
      "epoch": 0.0002437255859375,
      "model_forward_time": 0.11533761024475098,
      "step": 39932
    },
    {
      "epoch": 0.0002437255859375,
      "step": 39932,
      "training_step_time": 0.4375474452972412
    },
    {
      "epoch": 0.000243731689453125,
      "model_forward_time": 0.1144723892211914,
      "step": 39933
    },
    {
      "epoch": 0.000243731689453125,
      "step": 39933,
      "training_step_time": 0.4621696472167969
    },
    {
      "epoch": 0.00024373779296875,
      "model_forward_time": 0.1152350902557373,
      "step": 39934
    },
    {
      "epoch": 0.00024373779296875,
      "step": 39934,
      "training_step_time": 0.4128241539001465
    },
    {
      "epoch": 0.000243743896484375,
      "model_forward_time": 0.11498522758483887,
      "step": 39935
    },
    {
      "epoch": 0.000243743896484375,
      "step": 39935,
      "training_step_time": 0.4480276107788086
    },
    {
      "epoch": 0.00024375,
      "model_forward_time": 0.11644601821899414,
      "step": 39936
    },
    {
      "epoch": 0.00024375,
      "step": 39936,
      "training_step_time": 0.4332282543182373
    },
    {
      "epoch": 0.000243756103515625,
      "model_forward_time": 0.11462950706481934,
      "step": 39937
    },
    {
      "epoch": 0.000243756103515625,
      "step": 39937,
      "training_step_time": 0.44055891036987305
    },
    {
      "epoch": 0.00024376220703125,
      "model_forward_time": 0.11522150039672852,
      "step": 39938
    },
    {
      "epoch": 0.00024376220703125,
      "step": 39938,
      "training_step_time": 0.3888890743255615
    },
    {
      "epoch": 0.000243768310546875,
      "model_forward_time": 0.11532902717590332,
      "step": 39939
    },
    {
      "epoch": 0.000243768310546875,
      "step": 39939,
      "training_step_time": 0.3895697593688965
    },
    {
      "epoch": 0.0002437744140625,
      "grad_norm": 0.12318246811628342,
      "learning_rate": 2.7570986808295324e-05,
      "loss": 0.0387,
      "step": 39940
    },
    {
      "epoch": 0.0002437744140625,
      "model_forward_time": 0.11474418640136719,
      "step": 39940
    },
    {
      "epoch": 0.0002437744140625,
      "step": 39940,
      "training_step_time": 0.439943790435791
    },
    {
      "epoch": 0.000243780517578125,
      "model_forward_time": 0.11520028114318848,
      "step": 39941
    },
    {
      "epoch": 0.000243780517578125,
      "step": 39941,
      "training_step_time": 0.39403676986694336
    },
    {
      "epoch": 0.00024378662109375,
      "model_forward_time": 0.1156919002532959,
      "step": 39942
    },
    {
      "epoch": 0.00024378662109375,
      "step": 39942,
      "training_step_time": 0.39130187034606934
    },
    {
      "epoch": 0.000243792724609375,
      "model_forward_time": 0.1155250072479248,
      "step": 39943
    },
    {
      "epoch": 0.000243792724609375,
      "step": 39943,
      "training_step_time": 0.39286351203918457
    },
    {
      "epoch": 0.000243798828125,
      "model_forward_time": 0.11554241180419922,
      "step": 39944
    },
    {
      "epoch": 0.000243798828125,
      "step": 39944,
      "training_step_time": 0.43120241165161133
    },
    {
      "epoch": 0.000243804931640625,
      "model_forward_time": 0.11507487297058105,
      "step": 39945
    },
    {
      "epoch": 0.000243804931640625,
      "step": 39945,
      "training_step_time": 0.3919646739959717
    },
    {
      "epoch": 0.00024381103515625,
      "model_forward_time": 0.11528873443603516,
      "step": 39946
    },
    {
      "epoch": 0.00024381103515625,
      "step": 39946,
      "training_step_time": 0.6086769104003906
    },
    {
      "epoch": 0.000243817138671875,
      "model_forward_time": 0.11559438705444336,
      "step": 39947
    },
    {
      "epoch": 0.000243817138671875,
      "step": 39947,
      "training_step_time": 0.4137275218963623
    },
    {
      "epoch": 0.0002438232421875,
      "model_forward_time": 0.11549878120422363,
      "step": 39948
    },
    {
      "epoch": 0.0002438232421875,
      "step": 39948,
      "training_step_time": 0.4214451313018799
    },
    {
      "epoch": 0.000243829345703125,
      "model_forward_time": 0.11450886726379395,
      "step": 39949
    },
    {
      "epoch": 0.000243829345703125,
      "step": 39949,
      "training_step_time": 0.4908792972564697
    },
    {
      "epoch": 0.00024383544921875,
      "grad_norm": 0.07237781584262848,
      "learning_rate": 2.7546360610674493e-05,
      "loss": 0.0374,
      "step": 39950
    },
    {
      "epoch": 0.00024383544921875,
      "model_forward_time": 0.11518025398254395,
      "step": 39950
    },
    {
      "epoch": 0.00024383544921875,
      "step": 39950,
      "training_step_time": 0.3994557857513428
    },
    {
      "epoch": 0.000243841552734375,
      "model_forward_time": 0.11487817764282227,
      "step": 39951
    },
    {
      "epoch": 0.000243841552734375,
      "step": 39951,
      "training_step_time": 0.4916403293609619
    },
    {
      "epoch": 0.00024384765625,
      "model_forward_time": 0.11560940742492676,
      "step": 39952
    },
    {
      "epoch": 0.00024384765625,
      "step": 39952,
      "training_step_time": 0.546262264251709
    },
    {
      "epoch": 0.000243853759765625,
      "model_forward_time": 0.11446809768676758,
      "step": 39953
    },
    {
      "epoch": 0.000243853759765625,
      "step": 39953,
      "training_step_time": 0.38982391357421875
    },
    {
      "epoch": 0.00024385986328125,
      "model_forward_time": 0.11504650115966797,
      "step": 39954
    },
    {
      "epoch": 0.00024385986328125,
      "step": 39954,
      "training_step_time": 0.3889288902282715
    },
    {
      "epoch": 0.000243865966796875,
      "model_forward_time": 0.11519074440002441,
      "step": 39955
    },
    {
      "epoch": 0.000243865966796875,
      "step": 39955,
      "training_step_time": 0.3929011821746826
    },
    {
      "epoch": 0.0002438720703125,
      "model_forward_time": 0.11486983299255371,
      "step": 39956
    },
    {
      "epoch": 0.0002438720703125,
      "step": 39956,
      "training_step_time": 0.39197564125061035
    },
    {
      "epoch": 0.000243878173828125,
      "model_forward_time": 0.11513876914978027,
      "step": 39957
    },
    {
      "epoch": 0.000243878173828125,
      "step": 39957,
      "training_step_time": 0.4554448127746582
    },
    {
      "epoch": 0.00024388427734375,
      "model_forward_time": 0.11711716651916504,
      "step": 39958
    },
    {
      "epoch": 0.00024388427734375,
      "step": 39958,
      "training_step_time": 0.48058009147644043
    },
    {
      "epoch": 0.000243890380859375,
      "model_forward_time": 0.1148984432220459,
      "step": 39959
    },
    {
      "epoch": 0.000243890380859375,
      "step": 39959,
      "training_step_time": 0.4423391819000244
    },
    {
      "epoch": 0.000243896484375,
      "grad_norm": 0.09886318445205688,
      "learning_rate": 2.7521741233876496e-05,
      "loss": 0.0365,
      "step": 39960
    },
    {
      "epoch": 0.000243896484375,
      "model_forward_time": 0.11488986015319824,
      "step": 39960
    },
    {
      "epoch": 0.000243896484375,
      "step": 39960,
      "training_step_time": 0.3993504047393799
    },
    {
      "epoch": 0.000243902587890625,
      "model_forward_time": 0.11513090133666992,
      "step": 39961
    },
    {
      "epoch": 0.000243902587890625,
      "step": 39961,
      "training_step_time": 0.3932149410247803
    },
    {
      "epoch": 0.00024390869140625,
      "model_forward_time": 0.11509060859680176,
      "step": 39962
    },
    {
      "epoch": 0.00024390869140625,
      "step": 39962,
      "training_step_time": 0.4286003112792969
    },
    {
      "epoch": 0.000243914794921875,
      "model_forward_time": 0.11481428146362305,
      "step": 39963
    },
    {
      "epoch": 0.000243914794921875,
      "step": 39963,
      "training_step_time": 0.5023765563964844
    },
    {
      "epoch": 0.0002439208984375,
      "model_forward_time": 0.11503052711486816,
      "step": 39964
    },
    {
      "epoch": 0.0002439208984375,
      "step": 39964,
      "training_step_time": 0.4035181999206543
    },
    {
      "epoch": 0.000243927001953125,
      "model_forward_time": 0.1248769760131836,
      "step": 39965
    },
    {
      "epoch": 0.000243927001953125,
      "step": 39965,
      "training_step_time": 0.4969758987426758
    },
    {
      "epoch": 0.00024393310546875,
      "model_forward_time": 0.11841273307800293,
      "step": 39966
    },
    {
      "epoch": 0.00024393310546875,
      "step": 39966,
      "training_step_time": 0.38040828704833984
    },
    {
      "epoch": 0.000243939208984375,
      "model_forward_time": 0.1179506778717041,
      "step": 39967
    },
    {
      "epoch": 0.000243939208984375,
      "step": 39967,
      "training_step_time": 0.380781888961792
    },
    {
      "epoch": 0.0002439453125,
      "model_forward_time": 0.11777520179748535,
      "step": 39968
    },
    {
      "epoch": 0.0002439453125,
      "step": 39968,
      "training_step_time": 0.3790597915649414
    },
    {
      "epoch": 0.000243951416015625,
      "model_forward_time": 0.11531376838684082,
      "step": 39969
    },
    {
      "epoch": 0.000243951416015625,
      "step": 39969,
      "training_step_time": 0.3948357105255127
    },
    {
      "epoch": 0.00024395751953125,
      "grad_norm": 0.11814989149570465,
      "learning_rate": 2.749712868537997e-05,
      "loss": 0.0333,
      "step": 39970
    },
    {
      "epoch": 0.00024395751953125,
      "model_forward_time": 0.11480402946472168,
      "step": 39970
    },
    {
      "epoch": 0.00024395751953125,
      "step": 39970,
      "training_step_time": 0.43120598793029785
    },
    {
      "epoch": 0.000243963623046875,
      "model_forward_time": 0.11474776268005371,
      "step": 39971
    },
    {
      "epoch": 0.000243963623046875,
      "step": 39971,
      "training_step_time": 0.7436003684997559
    },
    {
      "epoch": 0.0002439697265625,
      "model_forward_time": 0.11490106582641602,
      "step": 39972
    },
    {
      "epoch": 0.0002439697265625,
      "step": 39972,
      "training_step_time": 0.48650574684143066
    },
    {
      "epoch": 0.000243975830078125,
      "model_forward_time": 0.11462569236755371,
      "step": 39973
    },
    {
      "epoch": 0.000243975830078125,
      "step": 39973,
      "training_step_time": 0.40557122230529785
    },
    {
      "epoch": 0.00024398193359375,
      "model_forward_time": 0.11434221267700195,
      "step": 39974
    },
    {
      "epoch": 0.00024398193359375,
      "step": 39974,
      "training_step_time": 0.39441394805908203
    },
    {
      "epoch": 0.000243988037109375,
      "model_forward_time": 0.11406373977661133,
      "step": 39975
    },
    {
      "epoch": 0.000243988037109375,
      "step": 39975,
      "training_step_time": 0.38800740242004395
    },
    {
      "epoch": 0.000243994140625,
      "model_forward_time": 0.11476707458496094,
      "step": 39976
    },
    {
      "epoch": 0.000243994140625,
      "step": 39976,
      "training_step_time": 0.4131608009338379
    },
    {
      "epoch": 0.000244000244140625,
      "model_forward_time": 0.11490440368652344,
      "step": 39977
    },
    {
      "epoch": 0.000244000244140625,
      "step": 39977,
      "training_step_time": 0.4550364017486572
    },
    {
      "epoch": 0.00024400634765625,
      "model_forward_time": 0.11499142646789551,
      "step": 39978
    },
    {
      "epoch": 0.00024400634765625,
      "step": 39978,
      "training_step_time": 0.43604230880737305
    },
    {
      "epoch": 0.000244012451171875,
      "model_forward_time": 0.11548614501953125,
      "step": 39979
    },
    {
      "epoch": 0.000244012451171875,
      "step": 39979,
      "training_step_time": 0.4568355083465576
    },
    {
      "epoch": 0.0002440185546875,
      "grad_norm": 0.08439963310956955,
      "learning_rate": 2.747252297266162e-05,
      "loss": 0.0369,
      "step": 39980
    },
    {
      "epoch": 0.0002440185546875,
      "model_forward_time": 0.1156923770904541,
      "step": 39980
    },
    {
      "epoch": 0.0002440185546875,
      "step": 39980,
      "training_step_time": 0.38441920280456543
    },
    {
      "epoch": 0.000244024658203125,
      "model_forward_time": 0.11542201042175293,
      "step": 39981
    },
    {
      "epoch": 0.000244024658203125,
      "step": 39981,
      "training_step_time": 0.39226388931274414
    },
    {
      "epoch": 0.00024403076171875,
      "model_forward_time": 0.11526942253112793,
      "step": 39982
    },
    {
      "epoch": 0.00024403076171875,
      "step": 39982,
      "training_step_time": 0.3877272605895996
    },
    {
      "epoch": 0.000244036865234375,
      "model_forward_time": 0.1155385971069336,
      "step": 39983
    },
    {
      "epoch": 0.000244036865234375,
      "step": 39983,
      "training_step_time": 0.5687961578369141
    },
    {
      "epoch": 0.00024404296875,
      "model_forward_time": 0.11516714096069336,
      "step": 39984
    },
    {
      "epoch": 0.00024404296875,
      "step": 39984,
      "training_step_time": 0.4010963439941406
    },
    {
      "epoch": 0.000244049072265625,
      "model_forward_time": 0.11515951156616211,
      "step": 39985
    },
    {
      "epoch": 0.000244049072265625,
      "step": 39985,
      "training_step_time": 0.46221184730529785
    },
    {
      "epoch": 0.00024405517578125,
      "model_forward_time": 0.11454582214355469,
      "step": 39986
    },
    {
      "epoch": 0.00024405517578125,
      "step": 39986,
      "training_step_time": 0.3974432945251465
    },
    {
      "epoch": 0.000244061279296875,
      "model_forward_time": 0.11476731300354004,
      "step": 39987
    },
    {
      "epoch": 0.000244061279296875,
      "step": 39987,
      "training_step_time": 0.4466087818145752
    },
    {
      "epoch": 0.0002440673828125,
      "model_forward_time": 0.11514616012573242,
      "step": 39988
    },
    {
      "epoch": 0.0002440673828125,
      "step": 39988,
      "training_step_time": 0.3898296356201172
    },
    {
      "epoch": 0.000244073486328125,
      "model_forward_time": 0.1150822639465332,
      "step": 39989
    },
    {
      "epoch": 0.000244073486328125,
      "step": 39989,
      "training_step_time": 0.5611417293548584
    },
    {
      "epoch": 0.00024407958984375,
      "grad_norm": 0.07951968908309937,
      "learning_rate": 2.7447924103195976e-05,
      "loss": 0.0385,
      "step": 39990
    },
    {
      "epoch": 0.00024407958984375,
      "model_forward_time": 0.1150655746459961,
      "step": 39990
    },
    {
      "epoch": 0.00024407958984375,
      "step": 39990,
      "training_step_time": 0.5107426643371582
    },
    {
      "epoch": 0.000244085693359375,
      "model_forward_time": 0.11515235900878906,
      "step": 39991
    },
    {
      "epoch": 0.000244085693359375,
      "step": 39991,
      "training_step_time": 0.4188551902770996
    },
    {
      "epoch": 0.000244091796875,
      "model_forward_time": 0.11470675468444824,
      "step": 39992
    },
    {
      "epoch": 0.000244091796875,
      "step": 39992,
      "training_step_time": 0.4917275905609131
    },
    {
      "epoch": 0.000244097900390625,
      "model_forward_time": 0.11545062065124512,
      "step": 39993
    },
    {
      "epoch": 0.000244097900390625,
      "step": 39993,
      "training_step_time": 0.4120807647705078
    },
    {
      "epoch": 0.00024410400390625,
      "model_forward_time": 0.1153404712677002,
      "step": 39994
    },
    {
      "epoch": 0.00024410400390625,
      "step": 39994,
      "training_step_time": 0.3876640796661377
    },
    {
      "epoch": 0.000244110107421875,
      "model_forward_time": 0.11515212059020996,
      "step": 39995
    },
    {
      "epoch": 0.000244110107421875,
      "step": 39995,
      "training_step_time": 0.39545488357543945
    },
    {
      "epoch": 0.0002441162109375,
      "model_forward_time": 0.11564183235168457,
      "step": 39996
    },
    {
      "epoch": 0.0002441162109375,
      "step": 39996,
      "training_step_time": 0.4056704044342041
    },
    {
      "epoch": 0.000244122314453125,
      "model_forward_time": 0.11523056030273438,
      "step": 39997
    },
    {
      "epoch": 0.000244122314453125,
      "step": 39997,
      "training_step_time": 0.388735294342041
    },
    {
      "epoch": 0.00024412841796875,
      "model_forward_time": 0.11525607109069824,
      "step": 39998
    },
    {
      "epoch": 0.00024412841796875,
      "step": 39998,
      "training_step_time": 0.39723753929138184
    },
    {
      "epoch": 0.000244134521484375,
      "model_forward_time": 0.11877632141113281,
      "step": 39999
    },
    {
      "epoch": 0.000244134521484375,
      "step": 39999,
      "training_step_time": 0.42191624641418457
    },
    {
      "epoch": 0.000244140625,
      "grad_norm": 0.14264963567256927,
      "learning_rate": 2.7423332084455544e-05,
      "loss": 0.0378,
      "step": 40000
    },
    {
      "epoch": 0.000244140625,
      "model_forward_time": 0.11415624618530273,
      "step": 40000
    },
    {
      "epoch": 0.000244140625,
      "step": 40000,
      "training_step_time": 0.35497021675109863
    },
    {
      "epoch": 0.000244146728515625,
      "model_forward_time": 0.11244058609008789,
      "step": 40001
    },
    {
      "epoch": 0.000244146728515625,
      "step": 40001,
      "training_step_time": 0.4375572204589844
    },
    {
      "epoch": 0.00024415283203125,
      "model_forward_time": 0.11297917366027832,
      "step": 40002
    },
    {
      "epoch": 0.00024415283203125,
      "step": 40002,
      "training_step_time": 0.41736435890197754
    },
    {
      "epoch": 0.000244158935546875,
      "model_forward_time": 0.11352324485778809,
      "step": 40003
    },
    {
      "epoch": 0.000244158935546875,
      "step": 40003,
      "training_step_time": 0.3784217834472656
    },
    {
      "epoch": 0.0002441650390625,
      "model_forward_time": 0.11445450782775879,
      "step": 40004
    },
    {
      "epoch": 0.0002441650390625,
      "step": 40004,
      "training_step_time": 0.4172821044921875
    },
    {
      "epoch": 0.000244171142578125,
      "model_forward_time": 0.11457586288452148,
      "step": 40005
    },
    {
      "epoch": 0.000244171142578125,
      "step": 40005,
      "training_step_time": 0.3973047733306885
    },
    {
      "epoch": 0.00024417724609375,
      "model_forward_time": 0.11501693725585938,
      "step": 40006
    },
    {
      "epoch": 0.00024417724609375,
      "step": 40006,
      "training_step_time": 0.4760935306549072
    },
    {
      "epoch": 0.000244183349609375,
      "model_forward_time": 0.11498689651489258,
      "step": 40007
    },
    {
      "epoch": 0.000244183349609375,
      "step": 40007,
      "training_step_time": 0.5151543617248535
    },
    {
      "epoch": 0.000244189453125,
      "model_forward_time": 0.11499977111816406,
      "step": 40008
    },
    {
      "epoch": 0.000244189453125,
      "step": 40008,
      "training_step_time": 0.4894428253173828
    },
    {
      "epoch": 0.000244195556640625,
      "model_forward_time": 0.11529922485351562,
      "step": 40009
    },
    {
      "epoch": 0.000244195556640625,
      "step": 40009,
      "training_step_time": 0.3782680034637451
    },
    {
      "epoch": 0.00024420166015625,
      "grad_norm": 0.09754101932048798,
      "learning_rate": 2.7398746923910723e-05,
      "loss": 0.0353,
      "step": 40010
    },
    {
      "epoch": 0.00024420166015625,
      "model_forward_time": 0.11490893363952637,
      "step": 40010
    },
    {
      "epoch": 0.00024420166015625,
      "step": 40010,
      "training_step_time": 0.4021601676940918
    },
    {
      "epoch": 0.000244207763671875,
      "model_forward_time": 0.11508297920227051,
      "step": 40011
    },
    {
      "epoch": 0.000244207763671875,
      "step": 40011,
      "training_step_time": 0.38075995445251465
    },
    {
      "epoch": 0.0002442138671875,
      "model_forward_time": 0.11507177352905273,
      "step": 40012
    },
    {
      "epoch": 0.0002442138671875,
      "step": 40012,
      "training_step_time": 0.38082432746887207
    },
    {
      "epoch": 0.000244219970703125,
      "model_forward_time": 0.11505556106567383,
      "step": 40013
    },
    {
      "epoch": 0.000244219970703125,
      "step": 40013,
      "training_step_time": 0.42053771018981934
    },
    {
      "epoch": 0.00024422607421875,
      "model_forward_time": 0.11630773544311523,
      "step": 40014
    },
    {
      "epoch": 0.00024422607421875,
      "step": 40014,
      "training_step_time": 0.494396448135376
    },
    {
      "epoch": 0.000244232177734375,
      "model_forward_time": 0.11582684516906738,
      "step": 40015
    },
    {
      "epoch": 0.000244232177734375,
      "step": 40015,
      "training_step_time": 0.4507791996002197
    },
    {
      "epoch": 0.00024423828125,
      "model_forward_time": 0.1153862476348877,
      "step": 40016
    },
    {
      "epoch": 0.00024423828125,
      "step": 40016,
      "training_step_time": 0.47587037086486816
    },
    {
      "epoch": 0.000244244384765625,
      "model_forward_time": 0.11432480812072754,
      "step": 40017
    },
    {
      "epoch": 0.000244244384765625,
      "step": 40017,
      "training_step_time": 0.3893756866455078
    },
    {
      "epoch": 0.00024425048828125,
      "model_forward_time": 0.11502814292907715,
      "step": 40018
    },
    {
      "epoch": 0.00024425048828125,
      "step": 40018,
      "training_step_time": 0.45698094367980957
    },
    {
      "epoch": 0.000244256591796875,
      "model_forward_time": 0.11464667320251465,
      "step": 40019
    },
    {
      "epoch": 0.000244256591796875,
      "step": 40019,
      "training_step_time": 0.4318416118621826
    },
    {
      "epoch": 0.0002442626953125,
      "grad_norm": 0.1211269274353981,
      "learning_rate": 2.7374168629029813e-05,
      "loss": 0.0358,
      "step": 40020
    },
    {
      "epoch": 0.0002442626953125,
      "model_forward_time": 0.1147313117980957,
      "step": 40020
    },
    {
      "epoch": 0.0002442626953125,
      "step": 40020,
      "training_step_time": 0.4798295497894287
    },
    {
      "epoch": 0.000244268798828125,
      "model_forward_time": 0.11552596092224121,
      "step": 40021
    },
    {
      "epoch": 0.000244268798828125,
      "step": 40021,
      "training_step_time": 0.4929189682006836
    },
    {
      "epoch": 0.00024427490234375,
      "model_forward_time": 0.11462664604187012,
      "step": 40022
    },
    {
      "epoch": 0.00024427490234375,
      "step": 40022,
      "training_step_time": 0.4962458610534668
    },
    {
      "epoch": 0.000244281005859375,
      "model_forward_time": 0.1143333911895752,
      "step": 40023
    },
    {
      "epoch": 0.000244281005859375,
      "step": 40023,
      "training_step_time": 0.3799288272857666
    },
    {
      "epoch": 0.000244287109375,
      "model_forward_time": 0.11426830291748047,
      "step": 40024
    },
    {
      "epoch": 0.000244287109375,
      "step": 40024,
      "training_step_time": 0.3921849727630615
    },
    {
      "epoch": 0.000244293212890625,
      "model_forward_time": 0.11498165130615234,
      "step": 40025
    },
    {
      "epoch": 0.000244293212890625,
      "step": 40025,
      "training_step_time": 0.3902621269226074
    },
    {
      "epoch": 0.00024429931640625,
      "model_forward_time": 0.1153411865234375,
      "step": 40026
    },
    {
      "epoch": 0.00024429931640625,
      "step": 40026,
      "training_step_time": 0.411121129989624
    },
    {
      "epoch": 0.000244305419921875,
      "model_forward_time": 0.11518692970275879,
      "step": 40027
    },
    {
      "epoch": 0.000244305419921875,
      "step": 40027,
      "training_step_time": 0.4192674160003662
    },
    {
      "epoch": 0.0002443115234375,
      "model_forward_time": 0.11461257934570312,
      "step": 40028
    },
    {
      "epoch": 0.0002443115234375,
      "step": 40028,
      "training_step_time": 0.4073822498321533
    },
    {
      "epoch": 0.000244317626953125,
      "model_forward_time": 0.11523747444152832,
      "step": 40029
    },
    {
      "epoch": 0.000244317626953125,
      "step": 40029,
      "training_step_time": 0.4135746955871582
    },
    {
      "epoch": 0.00024432373046875,
      "grad_norm": 0.1075589507818222,
      "learning_rate": 2.7349597207279088e-05,
      "loss": 0.0382,
      "step": 40030
    },
    {
      "epoch": 0.00024432373046875,
      "model_forward_time": 0.11550378799438477,
      "step": 40030
    },
    {
      "epoch": 0.00024432373046875,
      "step": 40030,
      "training_step_time": 0.4253730773925781
    },
    {
      "epoch": 0.000244329833984375,
      "model_forward_time": 0.11566495895385742,
      "step": 40031
    },
    {
      "epoch": 0.000244329833984375,
      "step": 40031,
      "training_step_time": 0.40630483627319336
    },
    {
      "epoch": 0.0002443359375,
      "model_forward_time": 0.11535406112670898,
      "step": 40032
    },
    {
      "epoch": 0.0002443359375,
      "step": 40032,
      "training_step_time": 0.39812564849853516
    },
    {
      "epoch": 0.000244342041015625,
      "model_forward_time": 0.11506032943725586,
      "step": 40033
    },
    {
      "epoch": 0.000244342041015625,
      "step": 40033,
      "training_step_time": 0.43830180168151855
    },
    {
      "epoch": 0.00024434814453125,
      "model_forward_time": 0.11554694175720215,
      "step": 40034
    },
    {
      "epoch": 0.00024434814453125,
      "step": 40034,
      "training_step_time": 0.47757506370544434
    },
    {
      "epoch": 0.000244354248046875,
      "model_forward_time": 0.1154778003692627,
      "step": 40035
    },
    {
      "epoch": 0.000244354248046875,
      "step": 40035,
      "training_step_time": 0.4457728862762451
    },
    {
      "epoch": 0.0002443603515625,
      "model_forward_time": 0.11602783203125,
      "step": 40036
    },
    {
      "epoch": 0.0002443603515625,
      "step": 40036,
      "training_step_time": 0.5063285827636719
    },
    {
      "epoch": 0.000244366455078125,
      "model_forward_time": 0.1152493953704834,
      "step": 40037
    },
    {
      "epoch": 0.000244366455078125,
      "step": 40037,
      "training_step_time": 0.4200272560119629
    },
    {
      "epoch": 0.00024437255859375,
      "model_forward_time": 0.11524748802185059,
      "step": 40038
    },
    {
      "epoch": 0.00024437255859375,
      "step": 40038,
      "training_step_time": 0.40023255348205566
    },
    {
      "epoch": 0.000244378662109375,
      "model_forward_time": 0.11540770530700684,
      "step": 40039
    },
    {
      "epoch": 0.000244378662109375,
      "step": 40039,
      "training_step_time": 0.3954150676727295
    },
    {
      "epoch": 0.000244384765625,
      "grad_norm": 0.1114371046423912,
      "learning_rate": 2.7325032666122686e-05,
      "loss": 0.036,
      "step": 40040
    },
    {
      "epoch": 0.000244384765625,
      "model_forward_time": 0.11480069160461426,
      "step": 40040
    },
    {
      "epoch": 0.000244384765625,
      "step": 40040,
      "training_step_time": 0.44684886932373047
    },
    {
      "epoch": 0.000244390869140625,
      "model_forward_time": 0.11541748046875,
      "step": 40041
    },
    {
      "epoch": 0.000244390869140625,
      "step": 40041,
      "training_step_time": 0.41662144660949707
    },
    {
      "epoch": 0.00024439697265625,
      "model_forward_time": 0.11527633666992188,
      "step": 40042
    },
    {
      "epoch": 0.00024439697265625,
      "step": 40042,
      "training_step_time": 0.4219050407409668
    },
    {
      "epoch": 0.000244403076171875,
      "model_forward_time": 0.11465573310852051,
      "step": 40043
    },
    {
      "epoch": 0.000244403076171875,
      "step": 40043,
      "training_step_time": 0.43338489532470703
    },
    {
      "epoch": 0.0002444091796875,
      "model_forward_time": 0.11488127708435059,
      "step": 40044
    },
    {
      "epoch": 0.0002444091796875,
      "step": 40044,
      "training_step_time": 0.411121129989624
    },
    {
      "epoch": 0.000244415283203125,
      "model_forward_time": 0.1155080795288086,
      "step": 40045
    },
    {
      "epoch": 0.000244415283203125,
      "step": 40045,
      "training_step_time": 0.4022862911224365
    },
    {
      "epoch": 0.00024442138671875,
      "model_forward_time": 0.11486029624938965,
      "step": 40046
    },
    {
      "epoch": 0.00024442138671875,
      "step": 40046,
      "training_step_time": 0.38810110092163086
    },
    {
      "epoch": 0.000244427490234375,
      "model_forward_time": 0.11606788635253906,
      "step": 40047
    },
    {
      "epoch": 0.000244427490234375,
      "step": 40047,
      "training_step_time": 0.3974325656890869
    },
    {
      "epoch": 0.00024443359375,
      "model_forward_time": 0.11492037773132324,
      "step": 40048
    },
    {
      "epoch": 0.00024443359375,
      "step": 40048,
      "training_step_time": 0.41439318656921387
    },
    {
      "epoch": 0.000244439697265625,
      "model_forward_time": 0.115570068359375,
      "step": 40049
    },
    {
      "epoch": 0.000244439697265625,
      "step": 40049,
      "training_step_time": 0.4776909351348877
    },
    {
      "epoch": 0.00024444580078125,
      "grad_norm": 0.10290631651878357,
      "learning_rate": 2.7300475013022663e-05,
      "loss": 0.039,
      "step": 40050
    },
    {
      "epoch": 0.00024444580078125,
      "model_forward_time": 0.11493587493896484,
      "step": 40050
    },
    {
      "epoch": 0.00024444580078125,
      "step": 40050,
      "training_step_time": 0.42232656478881836
    },
    {
      "epoch": 0.000244451904296875,
      "model_forward_time": 0.11470150947570801,
      "step": 40051
    },
    {
      "epoch": 0.000244451904296875,
      "step": 40051,
      "training_step_time": 0.4779083728790283
    },
    {
      "epoch": 0.0002444580078125,
      "model_forward_time": 0.11527872085571289,
      "step": 40052
    },
    {
      "epoch": 0.0002444580078125,
      "step": 40052,
      "training_step_time": 0.39383745193481445
    },
    {
      "epoch": 0.000244464111328125,
      "model_forward_time": 0.1152346134185791,
      "step": 40053
    },
    {
      "epoch": 0.000244464111328125,
      "step": 40053,
      "training_step_time": 0.3914024829864502
    },
    {
      "epoch": 0.00024447021484375,
      "model_forward_time": 0.11481928825378418,
      "step": 40054
    },
    {
      "epoch": 0.00024447021484375,
      "step": 40054,
      "training_step_time": 0.396564245223999
    },
    {
      "epoch": 0.000244476318359375,
      "model_forward_time": 0.1148993968963623,
      "step": 40055
    },
    {
      "epoch": 0.000244476318359375,
      "step": 40055,
      "training_step_time": 0.46329689025878906
    },
    {
      "epoch": 0.000244482421875,
      "model_forward_time": 0.11478519439697266,
      "step": 40056
    },
    {
      "epoch": 0.000244482421875,
      "step": 40056,
      "training_step_time": 0.39119410514831543
    },
    {
      "epoch": 0.000244488525390625,
      "model_forward_time": 0.11522889137268066,
      "step": 40057
    },
    {
      "epoch": 0.000244488525390625,
      "step": 40057,
      "training_step_time": 0.4419262409210205
    },
    {
      "epoch": 0.00024449462890625,
      "model_forward_time": 0.11488461494445801,
      "step": 40058
    },
    {
      "epoch": 0.00024449462890625,
      "step": 40058,
      "training_step_time": 0.4009552001953125
    },
    {
      "epoch": 0.000244500732421875,
      "model_forward_time": 0.11568689346313477,
      "step": 40059
    },
    {
      "epoch": 0.000244500732421875,
      "step": 40059,
      "training_step_time": 0.4679281711578369
    },
    {
      "epoch": 0.0002445068359375,
      "grad_norm": 0.10326328873634338,
      "learning_rate": 2.727592425543899e-05,
      "loss": 0.0387,
      "step": 40060
    },
    {
      "epoch": 0.0002445068359375,
      "model_forward_time": 0.11526179313659668,
      "step": 40060
    },
    {
      "epoch": 0.0002445068359375,
      "step": 40060,
      "training_step_time": 0.38643383979797363
    },
    {
      "epoch": 0.000244512939453125,
      "model_forward_time": 0.11516118049621582,
      "step": 40061
    },
    {
      "epoch": 0.000244512939453125,
      "step": 40061,
      "training_step_time": 0.3978736400604248
    },
    {
      "epoch": 0.00024451904296875,
      "model_forward_time": 0.11533522605895996,
      "step": 40062
    },
    {
      "epoch": 0.00024451904296875,
      "step": 40062,
      "training_step_time": 0.41080737113952637
    },
    {
      "epoch": 0.000244525146484375,
      "model_forward_time": 0.11543869972229004,
      "step": 40063
    },
    {
      "epoch": 0.000244525146484375,
      "step": 40063,
      "training_step_time": 0.5093338489532471
    },
    {
      "epoch": 0.00024453125,
      "model_forward_time": 0.11525392532348633,
      "step": 40064
    },
    {
      "epoch": 0.00024453125,
      "step": 40064,
      "training_step_time": 0.40920257568359375
    },
    {
      "epoch": 0.000244537353515625,
      "model_forward_time": 0.11845588684082031,
      "step": 40065
    },
    {
      "epoch": 0.000244537353515625,
      "step": 40065,
      "training_step_time": 0.38309407234191895
    },
    {
      "epoch": 0.00024454345703125,
      "model_forward_time": 0.11560797691345215,
      "step": 40066
    },
    {
      "epoch": 0.00024454345703125,
      "step": 40066,
      "training_step_time": 0.45417308807373047
    },
    {
      "epoch": 0.000244549560546875,
      "model_forward_time": 0.11528563499450684,
      "step": 40067
    },
    {
      "epoch": 0.000244549560546875,
      "step": 40067,
      "training_step_time": 0.3943474292755127
    },
    {
      "epoch": 0.0002445556640625,
      "model_forward_time": 0.11531329154968262,
      "step": 40068
    },
    {
      "epoch": 0.0002445556640625,
      "step": 40068,
      "training_step_time": 0.3989267349243164
    },
    {
      "epoch": 0.000244561767578125,
      "model_forward_time": 0.11537504196166992,
      "step": 40069
    },
    {
      "epoch": 0.000244561767578125,
      "step": 40069,
      "training_step_time": 0.3941836357116699
    },
    {
      "epoch": 0.00024456787109375,
      "grad_norm": 0.09350061416625977,
      "learning_rate": 2.725138040082953e-05,
      "loss": 0.0378,
      "step": 40070
    },
    {
      "epoch": 0.00024456787109375,
      "model_forward_time": 0.11499786376953125,
      "step": 40070
    },
    {
      "epoch": 0.00024456787109375,
      "step": 40070,
      "training_step_time": 0.3938875198364258
    },
    {
      "epoch": 0.000244573974609375,
      "model_forward_time": 0.11530351638793945,
      "step": 40071
    },
    {
      "epoch": 0.000244573974609375,
      "step": 40071,
      "training_step_time": 0.39171886444091797
    },
    {
      "epoch": 0.000244580078125,
      "model_forward_time": 0.11485505104064941,
      "step": 40072
    },
    {
      "epoch": 0.000244580078125,
      "step": 40072,
      "training_step_time": 0.41335272789001465
    },
    {
      "epoch": 0.000244586181640625,
      "model_forward_time": 0.11596465110778809,
      "step": 40073
    },
    {
      "epoch": 0.000244586181640625,
      "step": 40073,
      "training_step_time": 0.40122365951538086
    },
    {
      "epoch": 0.00024459228515625,
      "model_forward_time": 0.11499857902526855,
      "step": 40074
    },
    {
      "epoch": 0.00024459228515625,
      "step": 40074,
      "training_step_time": 0.45838475227355957
    },
    {
      "epoch": 0.000244598388671875,
      "model_forward_time": 0.11645197868347168,
      "step": 40075
    },
    {
      "epoch": 0.000244598388671875,
      "step": 40075,
      "training_step_time": 0.4948298931121826
    },
    {
      "epoch": 0.0002446044921875,
      "model_forward_time": 0.11535954475402832,
      "step": 40076
    },
    {
      "epoch": 0.0002446044921875,
      "step": 40076,
      "training_step_time": 0.4389169216156006
    },
    {
      "epoch": 0.000244610595703125,
      "model_forward_time": 0.11554956436157227,
      "step": 40077
    },
    {
      "epoch": 0.000244610595703125,
      "step": 40077,
      "training_step_time": 0.4354066848754883
    },
    {
      "epoch": 0.00024461669921875,
      "model_forward_time": 0.11578083038330078,
      "step": 40078
    },
    {
      "epoch": 0.00024461669921875,
      "step": 40078,
      "training_step_time": 0.47055697441101074
    },
    {
      "epoch": 0.000244622802734375,
      "model_forward_time": 0.11485815048217773,
      "step": 40079
    },
    {
      "epoch": 0.000244622802734375,
      "step": 40079,
      "training_step_time": 0.4844236373901367
    },
    {
      "epoch": 0.00024462890625,
      "grad_norm": 0.10636908560991287,
      "learning_rate": 2.7226843456650037e-05,
      "loss": 0.0366,
      "step": 40080
    },
    {
      "epoch": 0.00024462890625,
      "model_forward_time": 0.11441540718078613,
      "step": 40080
    },
    {
      "epoch": 0.00024462890625,
      "step": 40080,
      "training_step_time": 0.47544312477111816
    },
    {
      "epoch": 0.000244635009765625,
      "model_forward_time": 0.11470723152160645,
      "step": 40081
    },
    {
      "epoch": 0.000244635009765625,
      "step": 40081,
      "training_step_time": 0.41236257553100586
    },
    {
      "epoch": 0.00024464111328125,
      "model_forward_time": 0.11414742469787598,
      "step": 40082
    },
    {
      "epoch": 0.00024464111328125,
      "step": 40082,
      "training_step_time": 0.38123083114624023
    },
    {
      "epoch": 0.000244647216796875,
      "model_forward_time": 0.11467432975769043,
      "step": 40083
    },
    {
      "epoch": 0.000244647216796875,
      "step": 40083,
      "training_step_time": 0.3874199390411377
    },
    {
      "epoch": 0.0002446533203125,
      "model_forward_time": 0.1156761646270752,
      "step": 40084
    },
    {
      "epoch": 0.0002446533203125,
      "step": 40084,
      "training_step_time": 0.38271260261535645
    },
    {
      "epoch": 0.000244659423828125,
      "model_forward_time": 0.11508703231811523,
      "step": 40085
    },
    {
      "epoch": 0.000244659423828125,
      "step": 40085,
      "training_step_time": 0.3983592987060547
    },
    {
      "epoch": 0.00024466552734375,
      "model_forward_time": 0.11745119094848633,
      "step": 40086
    },
    {
      "epoch": 0.00024466552734375,
      "step": 40086,
      "training_step_time": 0.40613651275634766
    },
    {
      "epoch": 0.000244671630859375,
      "model_forward_time": 0.1154477596282959,
      "step": 40087
    },
    {
      "epoch": 0.000244671630859375,
      "step": 40087,
      "training_step_time": 0.500605583190918
    },
    {
      "epoch": 0.000244677734375,
      "model_forward_time": 0.11504220962524414,
      "step": 40088
    },
    {
      "epoch": 0.000244677734375,
      "step": 40088,
      "training_step_time": 0.39549875259399414
    },
    {
      "epoch": 0.000244683837890625,
      "model_forward_time": 0.11484813690185547,
      "step": 40089
    },
    {
      "epoch": 0.000244683837890625,
      "step": 40089,
      "training_step_time": 0.3895092010498047
    },
    {
      "epoch": 0.00024468994140625,
      "grad_norm": 0.10081955045461655,
      "learning_rate": 2.7202313430354253e-05,
      "loss": 0.0349,
      "step": 40090
    },
    {
      "epoch": 0.00024468994140625,
      "model_forward_time": 0.11533927917480469,
      "step": 40090
    },
    {
      "epoch": 0.00024468994140625,
      "step": 40090,
      "training_step_time": 0.38895487785339355
    },
    {
      "epoch": 0.000244696044921875,
      "model_forward_time": 0.11492753028869629,
      "step": 40091
    },
    {
      "epoch": 0.000244696044921875,
      "step": 40091,
      "training_step_time": 0.45880985260009766
    },
    {
      "epoch": 0.0002447021484375,
      "model_forward_time": 0.11536145210266113,
      "step": 40092
    },
    {
      "epoch": 0.0002447021484375,
      "step": 40092,
      "training_step_time": 0.47310853004455566
    },
    {
      "epoch": 0.000244708251953125,
      "model_forward_time": 0.11655688285827637,
      "step": 40093
    },
    {
      "epoch": 0.000244708251953125,
      "step": 40093,
      "training_step_time": 0.4194362163543701
    },
    {
      "epoch": 0.00024471435546875,
      "model_forward_time": 0.11609148979187012,
      "step": 40094
    },
    {
      "epoch": 0.00024471435546875,
      "step": 40094,
      "training_step_time": 0.4490973949432373
    },
    {
      "epoch": 0.000244720458984375,
      "model_forward_time": 0.11565351486206055,
      "step": 40095
    },
    {
      "epoch": 0.000244720458984375,
      "step": 40095,
      "training_step_time": 0.4653468132019043
    },
    {
      "epoch": 0.0002447265625,
      "model_forward_time": 0.11565351486206055,
      "step": 40096
    },
    {
      "epoch": 0.0002447265625,
      "step": 40096,
      "training_step_time": 0.3813800811767578
    },
    {
      "epoch": 0.000244732666015625,
      "model_forward_time": 0.11485004425048828,
      "step": 40097
    },
    {
      "epoch": 0.000244732666015625,
      "step": 40097,
      "training_step_time": 0.4002079963684082
    },
    {
      "epoch": 0.00024473876953125,
      "model_forward_time": 0.1158761978149414,
      "step": 40098
    },
    {
      "epoch": 0.00024473876953125,
      "step": 40098,
      "training_step_time": 0.39924001693725586
    },
    {
      "epoch": 0.000244744873046875,
      "model_forward_time": 0.11512923240661621,
      "step": 40099
    },
    {
      "epoch": 0.000244744873046875,
      "step": 40099,
      "training_step_time": 0.39438700675964355
    },
    {
      "epoch": 0.0002447509765625,
      "grad_norm": 0.11370740085840225,
      "learning_rate": 2.717779032939367e-05,
      "loss": 0.0333,
      "step": 40100
    },
    {
      "epoch": 0.0002447509765625,
      "model_forward_time": 0.11557435989379883,
      "step": 40100
    },
    {
      "epoch": 0.0002447509765625,
      "step": 40100,
      "training_step_time": 0.4028456211090088
    },
    {
      "epoch": 0.000244757080078125,
      "model_forward_time": 0.11578035354614258,
      "step": 40101
    },
    {
      "epoch": 0.000244757080078125,
      "step": 40101,
      "training_step_time": 0.450577974319458
    },
    {
      "epoch": 0.00024476318359375,
      "model_forward_time": 0.11513781547546387,
      "step": 40102
    },
    {
      "epoch": 0.00024476318359375,
      "step": 40102,
      "training_step_time": 0.41364169120788574
    },
    {
      "epoch": 0.000244769287109375,
      "model_forward_time": 0.11524558067321777,
      "step": 40103
    },
    {
      "epoch": 0.000244769287109375,
      "step": 40103,
      "training_step_time": 0.39717984199523926
    },
    {
      "epoch": 0.000244775390625,
      "model_forward_time": 0.11541748046875,
      "step": 40104
    },
    {
      "epoch": 0.000244775390625,
      "step": 40104,
      "training_step_time": 0.397308349609375
    },
    {
      "epoch": 0.000244781494140625,
      "model_forward_time": 0.11512517929077148,
      "step": 40105
    },
    {
      "epoch": 0.000244781494140625,
      "step": 40105,
      "training_step_time": 0.45032238960266113
    },
    {
      "epoch": 0.00024478759765625,
      "model_forward_time": 0.1149592399597168,
      "step": 40106
    },
    {
      "epoch": 0.00024478759765625,
      "step": 40106,
      "training_step_time": 0.4144144058227539
    },
    {
      "epoch": 0.000244793701171875,
      "model_forward_time": 0.11567473411560059,
      "step": 40107
    },
    {
      "epoch": 0.000244793701171875,
      "step": 40107,
      "training_step_time": 0.4944741725921631
    },
    {
      "epoch": 0.0002447998046875,
      "model_forward_time": 0.11515212059020996,
      "step": 40108
    },
    {
      "epoch": 0.0002447998046875,
      "step": 40108,
      "training_step_time": 0.44771432876586914
    },
    {
      "epoch": 0.000244805908203125,
      "model_forward_time": 0.11560249328613281,
      "step": 40109
    },
    {
      "epoch": 0.000244805908203125,
      "step": 40109,
      "training_step_time": 0.48886609077453613
    },
    {
      "epoch": 0.00024481201171875,
      "grad_norm": 0.08954634517431259,
      "learning_rate": 2.7153274161217846e-05,
      "loss": 0.0316,
      "step": 40110
    },
    {
      "epoch": 0.00024481201171875,
      "model_forward_time": 0.11528325080871582,
      "step": 40110
    },
    {
      "epoch": 0.00024481201171875,
      "step": 40110,
      "training_step_time": 0.3747427463531494
    },
    {
      "epoch": 0.000244818115234375,
      "model_forward_time": 0.11504411697387695,
      "step": 40111
    },
    {
      "epoch": 0.000244818115234375,
      "step": 40111,
      "training_step_time": 0.37833261489868164
    },
    {
      "epoch": 0.00024482421875,
      "model_forward_time": 0.11513996124267578,
      "step": 40112
    },
    {
      "epoch": 0.00024482421875,
      "step": 40112,
      "training_step_time": 0.39363574981689453
    },
    {
      "epoch": 0.000244830322265625,
      "model_forward_time": 0.11638903617858887,
      "step": 40113
    },
    {
      "epoch": 0.000244830322265625,
      "step": 40113,
      "training_step_time": 0.40040111541748047
    },
    {
      "epoch": 0.00024483642578125,
      "model_forward_time": 0.11534667015075684,
      "step": 40114
    },
    {
      "epoch": 0.00024483642578125,
      "step": 40114,
      "training_step_time": 0.38599634170532227
    },
    {
      "epoch": 0.000244842529296875,
      "model_forward_time": 0.11977624893188477,
      "step": 40115
    },
    {
      "epoch": 0.000244842529296875,
      "step": 40115,
      "training_step_time": 0.4260108470916748
    },
    {
      "epoch": 0.0002448486328125,
      "model_forward_time": 0.11506342887878418,
      "step": 40116
    },
    {
      "epoch": 0.0002448486328125,
      "step": 40116,
      "training_step_time": 0.3931899070739746
    },
    {
      "epoch": 0.000244854736328125,
      "model_forward_time": 0.11490941047668457,
      "step": 40117
    },
    {
      "epoch": 0.000244854736328125,
      "step": 40117,
      "training_step_time": 0.4198124408721924
    },
    {
      "epoch": 0.00024486083984375,
      "model_forward_time": 0.11521053314208984,
      "step": 40118
    },
    {
      "epoch": 0.00024486083984375,
      "step": 40118,
      "training_step_time": 0.39164137840270996
    },
    {
      "epoch": 0.000244866943359375,
      "model_forward_time": 0.1146848201751709,
      "step": 40119
    },
    {
      "epoch": 0.000244866943359375,
      "step": 40119,
      "training_step_time": 0.3977508544921875
    },
    {
      "epoch": 0.000244873046875,
      "grad_norm": 0.09281397610902786,
      "learning_rate": 2.7128764933274052e-05,
      "loss": 0.0368,
      "step": 40120
    },
    {
      "epoch": 0.000244873046875,
      "model_forward_time": 0.11551761627197266,
      "step": 40120
    },
    {
      "epoch": 0.000244873046875,
      "step": 40120,
      "training_step_time": 0.5000641345977783
    },
    {
      "epoch": 0.000244879150390625,
      "model_forward_time": 0.11559796333312988,
      "step": 40121
    },
    {
      "epoch": 0.000244879150390625,
      "step": 40121,
      "training_step_time": 0.3869204521179199
    },
    {
      "epoch": 0.00024488525390625,
      "model_forward_time": 0.11509132385253906,
      "step": 40122
    },
    {
      "epoch": 0.00024488525390625,
      "step": 40122,
      "training_step_time": 0.3984217643737793
    },
    {
      "epoch": 0.000244891357421875,
      "model_forward_time": 0.11545586585998535,
      "step": 40123
    },
    {
      "epoch": 0.000244891357421875,
      "step": 40123,
      "training_step_time": 0.42464399337768555
    },
    {
      "epoch": 0.0002448974609375,
      "model_forward_time": 0.11528325080871582,
      "step": 40124
    },
    {
      "epoch": 0.0002448974609375,
      "step": 40124,
      "training_step_time": 0.4214775562286377
    },
    {
      "epoch": 0.000244903564453125,
      "model_forward_time": 0.11522173881530762,
      "step": 40125
    },
    {
      "epoch": 0.000244903564453125,
      "step": 40125,
      "training_step_time": 0.39246344566345215
    },
    {
      "epoch": 0.00024490966796875,
      "model_forward_time": 0.11487865447998047,
      "step": 40126
    },
    {
      "epoch": 0.00024490966796875,
      "step": 40126,
      "training_step_time": 0.3976421356201172
    },
    {
      "epoch": 0.000244915771484375,
      "model_forward_time": 0.1147925853729248,
      "step": 40127
    },
    {
      "epoch": 0.000244915771484375,
      "step": 40127,
      "training_step_time": 0.38858914375305176
    },
    {
      "epoch": 0.000244921875,
      "model_forward_time": 0.1150507926940918,
      "step": 40128
    },
    {
      "epoch": 0.000244921875,
      "step": 40128,
      "training_step_time": 0.4012930393218994
    },
    {
      "epoch": 0.000244927978515625,
      "model_forward_time": 0.11487936973571777,
      "step": 40129
    },
    {
      "epoch": 0.000244927978515625,
      "step": 40129,
      "training_step_time": 0.4843132495880127
    },
    {
      "epoch": 0.00024493408203125,
      "grad_norm": 0.09382817894220352,
      "learning_rate": 2.7104262653007616e-05,
      "loss": 0.0342,
      "step": 40130
    },
    {
      "epoch": 0.00024493408203125,
      "model_forward_time": 0.11470293998718262,
      "step": 40130
    },
    {
      "epoch": 0.00024493408203125,
      "step": 40130,
      "training_step_time": 0.4207170009613037
    },
    {
      "epoch": 0.000244940185546875,
      "model_forward_time": 0.11464643478393555,
      "step": 40131
    },
    {
      "epoch": 0.000244940185546875,
      "step": 40131,
      "training_step_time": 0.48691749572753906
    },
    {
      "epoch": 0.0002449462890625,
      "model_forward_time": 0.11520957946777344,
      "step": 40132
    },
    {
      "epoch": 0.0002449462890625,
      "step": 40132,
      "training_step_time": 0.38672590255737305
    },
    {
      "epoch": 0.000244952392578125,
      "model_forward_time": 0.11533951759338379,
      "step": 40133
    },
    {
      "epoch": 0.000244952392578125,
      "step": 40133,
      "training_step_time": 0.38861989974975586
    },
    {
      "epoch": 0.00024495849609375,
      "model_forward_time": 0.11517024040222168,
      "step": 40134
    },
    {
      "epoch": 0.00024495849609375,
      "step": 40134,
      "training_step_time": 0.41519808769226074
    },
    {
      "epoch": 0.000244964599609375,
      "model_forward_time": 0.11521673202514648,
      "step": 40135
    },
    {
      "epoch": 0.000244964599609375,
      "step": 40135,
      "training_step_time": 0.4315805435180664
    },
    {
      "epoch": 0.000244970703125,
      "model_forward_time": 0.11493349075317383,
      "step": 40136
    },
    {
      "epoch": 0.000244970703125,
      "step": 40136,
      "training_step_time": 0.46479344367980957
    },
    {
      "epoch": 0.000244976806640625,
      "model_forward_time": 0.11519074440002441,
      "step": 40137
    },
    {
      "epoch": 0.000244976806640625,
      "step": 40137,
      "training_step_time": 0.4793272018432617
    },
    {
      "epoch": 0.00024498291015625,
      "model_forward_time": 0.11544132232666016,
      "step": 40138
    },
    {
      "epoch": 0.00024498291015625,
      "step": 40138,
      "training_step_time": 0.5250318050384521
    },
    {
      "epoch": 0.000244989013671875,
      "model_forward_time": 0.11564230918884277,
      "step": 40139
    },
    {
      "epoch": 0.000244989013671875,
      "step": 40139,
      "training_step_time": 0.3839602470397949
    },
    {
      "epoch": 0.0002449951171875,
      "grad_norm": 0.10059252381324768,
      "learning_rate": 2.707976732786166e-05,
      "loss": 0.0401,
      "step": 40140
    },
    {
      "epoch": 0.0002449951171875,
      "model_forward_time": 0.11504149436950684,
      "step": 40140
    },
    {
      "epoch": 0.0002449951171875,
      "step": 40140,
      "training_step_time": 0.3938412666320801
    },
    {
      "epoch": 0.000245001220703125,
      "model_forward_time": 0.11487412452697754,
      "step": 40141
    },
    {
      "epoch": 0.000245001220703125,
      "step": 40141,
      "training_step_time": 0.3788421154022217
    },
    {
      "epoch": 0.00024500732421875,
      "model_forward_time": 0.11496710777282715,
      "step": 40142
    },
    {
      "epoch": 0.00024500732421875,
      "step": 40142,
      "training_step_time": 0.38810014724731445
    },
    {
      "epoch": 0.000245013427734375,
      "model_forward_time": 0.1148369312286377,
      "step": 40143
    },
    {
      "epoch": 0.000245013427734375,
      "step": 40143,
      "training_step_time": 0.39696526527404785
    },
    {
      "epoch": 0.00024501953125,
      "model_forward_time": 0.11493372917175293,
      "step": 40144
    },
    {
      "epoch": 0.00024501953125,
      "step": 40144,
      "training_step_time": 0.43944883346557617
    },
    {
      "epoch": 0.000245025634765625,
      "model_forward_time": 0.11556792259216309,
      "step": 40145
    },
    {
      "epoch": 0.000245025634765625,
      "step": 40145,
      "training_step_time": 0.4299600124359131
    },
    {
      "epoch": 0.00024503173828125,
      "model_forward_time": 0.11478805541992188,
      "step": 40146
    },
    {
      "epoch": 0.00024503173828125,
      "step": 40146,
      "training_step_time": 0.42168426513671875
    },
    {
      "epoch": 0.000245037841796875,
      "model_forward_time": 0.1149756908416748,
      "step": 40147
    },
    {
      "epoch": 0.000245037841796875,
      "step": 40147,
      "training_step_time": 0.37946510314941406
    },
    {
      "epoch": 0.0002450439453125,
      "model_forward_time": 0.1145782470703125,
      "step": 40148
    },
    {
      "epoch": 0.0002450439453125,
      "step": 40148,
      "training_step_time": 0.39520978927612305
    },
    {
      "epoch": 0.000245050048828125,
      "model_forward_time": 0.11522102355957031,
      "step": 40149
    },
    {
      "epoch": 0.000245050048828125,
      "step": 40149,
      "training_step_time": 0.4095771312713623
    },
    {
      "epoch": 0.00024505615234375,
      "grad_norm": 0.10476727783679962,
      "learning_rate": 2.705527896527721e-05,
      "loss": 0.0352,
      "step": 40150
    },
    {
      "epoch": 0.00024505615234375,
      "model_forward_time": 0.11539435386657715,
      "step": 40150
    },
    {
      "epoch": 0.00024505615234375,
      "step": 40150,
      "training_step_time": 0.4240598678588867
    },
    {
      "epoch": 0.000245062255859375,
      "model_forward_time": 0.1152961254119873,
      "step": 40151
    },
    {
      "epoch": 0.000245062255859375,
      "step": 40151,
      "training_step_time": 0.3642768859863281
    },
    {
      "epoch": 0.000245068359375,
      "model_forward_time": 0.11507511138916016,
      "step": 40152
    },
    {
      "epoch": 0.000245068359375,
      "step": 40152,
      "training_step_time": 0.47144556045532227
    },
    {
      "epoch": 0.000245074462890625,
      "model_forward_time": 0.11510944366455078,
      "step": 40153
    },
    {
      "epoch": 0.000245074462890625,
      "step": 40153,
      "training_step_time": 0.45235705375671387
    },
    {
      "epoch": 0.00024508056640625,
      "model_forward_time": 0.1147921085357666,
      "step": 40154
    },
    {
      "epoch": 0.00024508056640625,
      "step": 40154,
      "training_step_time": 0.39201879501342773
    },
    {
      "epoch": 0.000245086669921875,
      "model_forward_time": 0.11484193801879883,
      "step": 40155
    },
    {
      "epoch": 0.000245086669921875,
      "step": 40155,
      "training_step_time": 0.3934323787689209
    },
    {
      "epoch": 0.0002450927734375,
      "model_forward_time": 0.11492729187011719,
      "step": 40156
    },
    {
      "epoch": 0.0002450927734375,
      "step": 40156,
      "training_step_time": 0.3940441608428955
    },
    {
      "epoch": 0.000245098876953125,
      "model_forward_time": 0.11501932144165039,
      "step": 40157
    },
    {
      "epoch": 0.000245098876953125,
      "step": 40157,
      "training_step_time": 0.38526034355163574
    },
    {
      "epoch": 0.00024510498046875,
      "model_forward_time": 0.11472868919372559,
      "step": 40158
    },
    {
      "epoch": 0.00024510498046875,
      "step": 40158,
      "training_step_time": 0.4135615825653076
    },
    {
      "epoch": 0.000245111083984375,
      "model_forward_time": 0.11525845527648926,
      "step": 40159
    },
    {
      "epoch": 0.000245111083984375,
      "step": 40159,
      "training_step_time": 0.49858617782592773
    },
    {
      "epoch": 0.0002451171875,
      "grad_norm": 0.1198805719614029,
      "learning_rate": 2.703079757269319e-05,
      "loss": 0.0371,
      "step": 40160
    },
    {
      "epoch": 0.0002451171875,
      "model_forward_time": 0.11511611938476562,
      "step": 40160
    },
    {
      "epoch": 0.0002451171875,
      "step": 40160,
      "training_step_time": 0.3943064212799072
    },
    {
      "epoch": 0.000245123291015625,
      "model_forward_time": 0.11544466018676758,
      "step": 40161
    },
    {
      "epoch": 0.000245123291015625,
      "step": 40161,
      "training_step_time": 0.3885824680328369
    },
    {
      "epoch": 0.00024512939453125,
      "model_forward_time": 0.11471748352050781,
      "step": 40162
    },
    {
      "epoch": 0.00024512939453125,
      "step": 40162,
      "training_step_time": 0.4030754566192627
    },
    {
      "epoch": 0.000245135498046875,
      "model_forward_time": 0.11524200439453125,
      "step": 40163
    },
    {
      "epoch": 0.000245135498046875,
      "step": 40163,
      "training_step_time": 0.4356856346130371
    },
    {
      "epoch": 0.0002451416015625,
      "model_forward_time": 0.11501312255859375,
      "step": 40164
    },
    {
      "epoch": 0.0002451416015625,
      "step": 40164,
      "training_step_time": 0.4387338161468506
    },
    {
      "epoch": 0.000245147705078125,
      "model_forward_time": 0.1145792007446289,
      "step": 40165
    },
    {
      "epoch": 0.000245147705078125,
      "step": 40165,
      "training_step_time": 0.4756152629852295
    },
    {
      "epoch": 0.00024515380859375,
      "model_forward_time": 0.11586332321166992,
      "step": 40166
    },
    {
      "epoch": 0.00024515380859375,
      "step": 40166,
      "training_step_time": 0.4085099697113037
    },
    {
      "epoch": 0.000245159912109375,
      "model_forward_time": 0.11535143852233887,
      "step": 40167
    },
    {
      "epoch": 0.000245159912109375,
      "step": 40167,
      "training_step_time": 0.476346492767334
    },
    {
      "epoch": 0.000245166015625,
      "model_forward_time": 0.11514067649841309,
      "step": 40168
    },
    {
      "epoch": 0.000245166015625,
      "step": 40168,
      "training_step_time": 0.4236750602722168
    },
    {
      "epoch": 0.000245172119140625,
      "model_forward_time": 0.11507797241210938,
      "step": 40169
    },
    {
      "epoch": 0.000245172119140625,
      "step": 40169,
      "training_step_time": 0.43245863914489746
    },
    {
      "epoch": 0.00024517822265625,
      "grad_norm": 0.14437328279018402,
      "learning_rate": 2.7006323157546386e-05,
      "loss": 0.0391,
      "step": 40170
    },
    {
      "epoch": 0.00024517822265625,
      "model_forward_time": 0.11480474472045898,
      "step": 40170
    },
    {
      "epoch": 0.00024517822265625,
      "step": 40170,
      "training_step_time": 0.3988654613494873
    },
    {
      "epoch": 0.000245184326171875,
      "model_forward_time": 0.11530017852783203,
      "step": 40171
    },
    {
      "epoch": 0.000245184326171875,
      "step": 40171,
      "training_step_time": 0.3893280029296875
    },
    {
      "epoch": 0.0002451904296875,
      "model_forward_time": 0.11490249633789062,
      "step": 40172
    },
    {
      "epoch": 0.0002451904296875,
      "step": 40172,
      "training_step_time": 0.39009594917297363
    },
    {
      "epoch": 0.000245196533203125,
      "model_forward_time": 0.11511898040771484,
      "step": 40173
    },
    {
      "epoch": 0.000245196533203125,
      "step": 40173,
      "training_step_time": 0.4196913242340088
    },
    {
      "epoch": 0.00024520263671875,
      "model_forward_time": 0.11508607864379883,
      "step": 40174
    },
    {
      "epoch": 0.00024520263671875,
      "step": 40174,
      "training_step_time": 0.41655945777893066
    },
    {
      "epoch": 0.000245208740234375,
      "model_forward_time": 0.11462593078613281,
      "step": 40175
    },
    {
      "epoch": 0.000245208740234375,
      "step": 40175,
      "training_step_time": 0.4949030876159668
    },
    {
      "epoch": 0.00024521484375,
      "model_forward_time": 0.11548519134521484,
      "step": 40176
    },
    {
      "epoch": 0.00024521484375,
      "step": 40176,
      "training_step_time": 0.39362430572509766
    },
    {
      "epoch": 0.000245220947265625,
      "model_forward_time": 0.11554503440856934,
      "step": 40177
    },
    {
      "epoch": 0.000245220947265625,
      "step": 40177,
      "training_step_time": 0.44649219512939453
    },
    {
      "epoch": 0.00024522705078125,
      "model_forward_time": 0.11521744728088379,
      "step": 40178
    },
    {
      "epoch": 0.00024522705078125,
      "step": 40178,
      "training_step_time": 0.41850996017456055
    },
    {
      "epoch": 0.000245233154296875,
      "model_forward_time": 0.11588287353515625,
      "step": 40179
    },
    {
      "epoch": 0.000245233154296875,
      "step": 40179,
      "training_step_time": 0.4225783348083496
    },
    {
      "epoch": 0.0002452392578125,
      "grad_norm": 0.09219539910554886,
      "learning_rate": 2.698185572727151e-05,
      "loss": 0.0326,
      "step": 40180
    },
    {
      "epoch": 0.0002452392578125,
      "model_forward_time": 0.11485838890075684,
      "step": 40180
    },
    {
      "epoch": 0.0002452392578125,
      "step": 40180,
      "training_step_time": 0.5043551921844482
    },
    {
      "epoch": 0.000245245361328125,
      "model_forward_time": 0.11445403099060059,
      "step": 40181
    },
    {
      "epoch": 0.000245245361328125,
      "step": 40181,
      "training_step_time": 0.4309868812561035
    },
    {
      "epoch": 0.00024525146484375,
      "model_forward_time": 0.11582827568054199,
      "step": 40182
    },
    {
      "epoch": 0.00024525146484375,
      "step": 40182,
      "training_step_time": 0.42705273628234863
    },
    {
      "epoch": 0.000245257568359375,
      "model_forward_time": 0.11531877517700195,
      "step": 40183
    },
    {
      "epoch": 0.000245257568359375,
      "step": 40183,
      "training_step_time": 0.39516568183898926
    },
    {
      "epoch": 0.000245263671875,
      "model_forward_time": 0.11637616157531738,
      "step": 40184
    },
    {
      "epoch": 0.000245263671875,
      "step": 40184,
      "training_step_time": 0.39059019088745117
    },
    {
      "epoch": 0.000245269775390625,
      "model_forward_time": 0.11506342887878418,
      "step": 40185
    },
    {
      "epoch": 0.000245269775390625,
      "step": 40185,
      "training_step_time": 0.40201592445373535
    },
    {
      "epoch": 0.00024527587890625,
      "model_forward_time": 0.11506128311157227,
      "step": 40186
    },
    {
      "epoch": 0.00024527587890625,
      "step": 40186,
      "training_step_time": 0.4413933753967285
    },
    {
      "epoch": 0.000245281982421875,
      "model_forward_time": 0.11524558067321777,
      "step": 40187
    },
    {
      "epoch": 0.000245281982421875,
      "step": 40187,
      "training_step_time": 0.4492652416229248
    },
    {
      "epoch": 0.0002452880859375,
      "model_forward_time": 0.11490678787231445,
      "step": 40188
    },
    {
      "epoch": 0.0002452880859375,
      "step": 40188,
      "training_step_time": 0.3949463367462158
    },
    {
      "epoch": 0.000245294189453125,
      "model_forward_time": 0.11490106582641602,
      "step": 40189
    },
    {
      "epoch": 0.000245294189453125,
      "step": 40189,
      "training_step_time": 0.4515821933746338
    },
    {
      "epoch": 0.00024530029296875,
      "grad_norm": 0.102098248898983,
      "learning_rate": 2.695739528930111e-05,
      "loss": 0.0349,
      "step": 40190
    },
    {
      "epoch": 0.00024530029296875,
      "model_forward_time": 0.11444973945617676,
      "step": 40190
    },
    {
      "epoch": 0.00024530029296875,
      "step": 40190,
      "training_step_time": 0.3958427906036377
    },
    {
      "epoch": 0.000245306396484375,
      "model_forward_time": 0.11618256568908691,
      "step": 40191
    },
    {
      "epoch": 0.000245306396484375,
      "step": 40191,
      "training_step_time": 0.4107794761657715
    },
    {
      "epoch": 0.0002453125,
      "model_forward_time": 0.11633539199829102,
      "step": 40192
    },
    {
      "epoch": 0.0002453125,
      "step": 40192,
      "training_step_time": 0.4908609390258789
    },
    {
      "epoch": 0.000245318603515625,
      "model_forward_time": 0.11506414413452148,
      "step": 40193
    },
    {
      "epoch": 0.000245318603515625,
      "step": 40193,
      "training_step_time": 0.39626502990722656
    },
    {
      "epoch": 0.00024532470703125,
      "model_forward_time": 0.11570882797241211,
      "step": 40194
    },
    {
      "epoch": 0.00024532470703125,
      "step": 40194,
      "training_step_time": 0.41591930389404297
    },
    {
      "epoch": 0.000245330810546875,
      "model_forward_time": 0.11519479751586914,
      "step": 40195
    },
    {
      "epoch": 0.000245330810546875,
      "step": 40195,
      "training_step_time": 0.4473552703857422
    },
    {
      "epoch": 0.0002453369140625,
      "model_forward_time": 0.11520171165466309,
      "step": 40196
    },
    {
      "epoch": 0.0002453369140625,
      "step": 40196,
      "training_step_time": 0.4702322483062744
    },
    {
      "epoch": 0.000245343017578125,
      "model_forward_time": 0.11514949798583984,
      "step": 40197
    },
    {
      "epoch": 0.000245343017578125,
      "step": 40197,
      "training_step_time": 0.3978569507598877
    },
    {
      "epoch": 0.00024534912109375,
      "model_forward_time": 0.11504936218261719,
      "step": 40198
    },
    {
      "epoch": 0.00024534912109375,
      "step": 40198,
      "training_step_time": 0.4380838871002197
    },
    {
      "epoch": 0.000245355224609375,
      "model_forward_time": 0.11470413208007812,
      "step": 40199
    },
    {
      "epoch": 0.000245355224609375,
      "step": 40199,
      "training_step_time": 0.38828063011169434
    },
    {
      "epoch": 0.000245361328125,
      "grad_norm": 0.10643496364355087,
      "learning_rate": 2.693294185106562e-05,
      "loss": 0.035,
      "step": 40200
    },
    {
      "epoch": 0.000245361328125,
      "model_forward_time": 0.11564087867736816,
      "step": 40200
    },
    {
      "epoch": 0.000245361328125,
      "step": 40200,
      "training_step_time": 0.38931775093078613
    },
    {
      "epoch": 0.000245367431640625,
      "model_forward_time": 0.11521720886230469,
      "step": 40201
    },
    {
      "epoch": 0.000245367431640625,
      "step": 40201,
      "training_step_time": 0.3889894485473633
    },
    {
      "epoch": 0.00024537353515625,
      "model_forward_time": 0.1156148910522461,
      "step": 40202
    },
    {
      "epoch": 0.00024537353515625,
      "step": 40202,
      "training_step_time": 0.4892263412475586
    },
    {
      "epoch": 0.000245379638671875,
      "model_forward_time": 0.11496138572692871,
      "step": 40203
    },
    {
      "epoch": 0.000245379638671875,
      "step": 40203,
      "training_step_time": 0.3911151885986328
    },
    {
      "epoch": 0.0002453857421875,
      "model_forward_time": 0.1155538558959961,
      "step": 40204
    },
    {
      "epoch": 0.0002453857421875,
      "step": 40204,
      "training_step_time": 0.660428524017334
    },
    {
      "epoch": 0.000245391845703125,
      "model_forward_time": 0.11661291122436523,
      "step": 40205
    },
    {
      "epoch": 0.000245391845703125,
      "step": 40205,
      "training_step_time": 0.40860724449157715
    },
    {
      "epoch": 0.00024539794921875,
      "model_forward_time": 0.1149296760559082,
      "step": 40206
    },
    {
      "epoch": 0.00024539794921875,
      "step": 40206,
      "training_step_time": 0.38452911376953125
    },
    {
      "epoch": 0.000245404052734375,
      "model_forward_time": 0.11458897590637207,
      "step": 40207
    },
    {
      "epoch": 0.000245404052734375,
      "step": 40207,
      "training_step_time": 0.39103007316589355
    },
    {
      "epoch": 0.00024541015625,
      "model_forward_time": 0.11519360542297363,
      "step": 40208
    },
    {
      "epoch": 0.00024541015625,
      "step": 40208,
      "training_step_time": 0.39603519439697266
    },
    {
      "epoch": 0.000245416259765625,
      "model_forward_time": 0.11465263366699219,
      "step": 40209
    },
    {
      "epoch": 0.000245416259765625,
      "step": 40209,
      "training_step_time": 0.38265085220336914
    },
    {
      "epoch": 0.00024542236328125,
      "grad_norm": 0.12638378143310547,
      "learning_rate": 2.690849541999333e-05,
      "loss": 0.0363,
      "step": 40210
    },
    {
      "epoch": 0.00024542236328125,
      "model_forward_time": 0.11563420295715332,
      "step": 40210
    },
    {
      "epoch": 0.00024542236328125,
      "step": 40210,
      "training_step_time": 0.5149190425872803
    },
    {
      "epoch": 0.000245428466796875,
      "model_forward_time": 0.1149892807006836,
      "step": 40211
    },
    {
      "epoch": 0.000245428466796875,
      "step": 40211,
      "training_step_time": 0.43219828605651855
    },
    {
      "epoch": 0.0002454345703125,
      "model_forward_time": 0.11536049842834473,
      "step": 40212
    },
    {
      "epoch": 0.0002454345703125,
      "step": 40212,
      "training_step_time": 0.38490891456604004
    },
    {
      "epoch": 0.000245440673828125,
      "model_forward_time": 0.11494970321655273,
      "step": 40213
    },
    {
      "epoch": 0.000245440673828125,
      "step": 40213,
      "training_step_time": 0.3897974491119385
    },
    {
      "epoch": 0.00024544677734375,
      "model_forward_time": 0.1147165298461914,
      "step": 40214
    },
    {
      "epoch": 0.00024544677734375,
      "step": 40214,
      "training_step_time": 0.3927905559539795
    },
    {
      "epoch": 0.000245452880859375,
      "model_forward_time": 0.11524105072021484,
      "step": 40215
    },
    {
      "epoch": 0.000245452880859375,
      "step": 40215,
      "training_step_time": 0.3957834243774414
    },
    {
      "epoch": 0.000245458984375,
      "model_forward_time": 0.11500740051269531,
      "step": 40216
    },
    {
      "epoch": 0.000245458984375,
      "step": 40216,
      "training_step_time": 0.70916748046875
    },
    {
      "epoch": 0.000245465087890625,
      "model_forward_time": 0.11474800109863281,
      "step": 40217
    },
    {
      "epoch": 0.000245465087890625,
      "step": 40217,
      "training_step_time": 0.4771595001220703
    },
    {
      "epoch": 0.00024547119140625,
      "model_forward_time": 0.1147775650024414,
      "step": 40218
    },
    {
      "epoch": 0.00024547119140625,
      "step": 40218,
      "training_step_time": 0.39696216583251953
    },
    {
      "epoch": 0.000245477294921875,
      "model_forward_time": 0.11443591117858887,
      "step": 40219
    },
    {
      "epoch": 0.000245477294921875,
      "step": 40219,
      "training_step_time": 0.3870275020599365
    },
    {
      "epoch": 0.0002454833984375,
      "grad_norm": 0.11077737808227539,
      "learning_rate": 2.688405600351045e-05,
      "loss": 0.0369,
      "step": 40220
    },
    {
      "epoch": 0.0002454833984375,
      "model_forward_time": 0.11438202857971191,
      "step": 40220
    },
    {
      "epoch": 0.0002454833984375,
      "step": 40220,
      "training_step_time": 0.3831791877746582
    },
    {
      "epoch": 0.000245489501953125,
      "model_forward_time": 0.1152656078338623,
      "step": 40221
    },
    {
      "epoch": 0.000245489501953125,
      "step": 40221,
      "training_step_time": 0.40850019454956055
    },
    {
      "epoch": 0.00024549560546875,
      "model_forward_time": 0.11525440216064453,
      "step": 40222
    },
    {
      "epoch": 0.00024549560546875,
      "step": 40222,
      "training_step_time": 0.4277067184448242
    },
    {
      "epoch": 0.000245501708984375,
      "model_forward_time": 0.11507487297058105,
      "step": 40223
    },
    {
      "epoch": 0.000245501708984375,
      "step": 40223,
      "training_step_time": 0.36759233474731445
    },
    {
      "epoch": 0.0002455078125,
      "model_forward_time": 0.11516714096069336,
      "step": 40224
    },
    {
      "epoch": 0.0002455078125,
      "step": 40224,
      "training_step_time": 0.44820332527160645
    },
    {
      "epoch": 0.000245513916015625,
      "model_forward_time": 0.1148688793182373,
      "step": 40225
    },
    {
      "epoch": 0.000245513916015625,
      "step": 40225,
      "training_step_time": 0.41700315475463867
    },
    {
      "epoch": 0.00024552001953125,
      "model_forward_time": 0.11551141738891602,
      "step": 40226
    },
    {
      "epoch": 0.00024552001953125,
      "step": 40226,
      "training_step_time": 0.38994932174682617
    },
    {
      "epoch": 0.000245526123046875,
      "model_forward_time": 0.11501312255859375,
      "step": 40227
    },
    {
      "epoch": 0.000245526123046875,
      "step": 40227,
      "training_step_time": 0.3804965019226074
    },
    {
      "epoch": 0.0002455322265625,
      "model_forward_time": 0.11519718170166016,
      "step": 40228
    },
    {
      "epoch": 0.0002455322265625,
      "step": 40228,
      "training_step_time": 0.6181197166442871
    },
    {
      "epoch": 0.000245538330078125,
      "model_forward_time": 0.11456775665283203,
      "step": 40229
    },
    {
      "epoch": 0.000245538330078125,
      "step": 40229,
      "training_step_time": 0.45694804191589355
    },
    {
      "epoch": 0.00024554443359375,
      "grad_norm": 0.1783573180437088,
      "learning_rate": 2.6859623609040984e-05,
      "loss": 0.0352,
      "step": 40230
    },
    {
      "epoch": 0.00024554443359375,
      "model_forward_time": 0.11580681800842285,
      "step": 40230
    },
    {
      "epoch": 0.00024554443359375,
      "step": 40230,
      "training_step_time": 0.44464778900146484
    },
    {
      "epoch": 0.000245550537109375,
      "model_forward_time": 0.11492538452148438,
      "step": 40231
    },
    {
      "epoch": 0.000245550537109375,
      "step": 40231,
      "training_step_time": 0.41931724548339844
    },
    {
      "epoch": 0.000245556640625,
      "model_forward_time": 0.11554384231567383,
      "step": 40232
    },
    {
      "epoch": 0.000245556640625,
      "step": 40232,
      "training_step_time": 0.3943030834197998
    },
    {
      "epoch": 0.000245562744140625,
      "model_forward_time": 0.11480164527893066,
      "step": 40233
    },
    {
      "epoch": 0.000245562744140625,
      "step": 40233,
      "training_step_time": 0.3894538879394531
    },
    {
      "epoch": 0.00024556884765625,
      "model_forward_time": 0.11457014083862305,
      "step": 40234
    },
    {
      "epoch": 0.00024556884765625,
      "step": 40234,
      "training_step_time": 0.6451632976531982
    },
    {
      "epoch": 0.000245574951171875,
      "model_forward_time": 0.11429524421691895,
      "step": 40235
    },
    {
      "epoch": 0.000245574951171875,
      "step": 40235,
      "training_step_time": 0.39411044120788574
    },
    {
      "epoch": 0.0002455810546875,
      "model_forward_time": 0.11602282524108887,
      "step": 40236
    },
    {
      "epoch": 0.0002455810546875,
      "step": 40236,
      "training_step_time": 0.40895771980285645
    },
    {
      "epoch": 0.000245587158203125,
      "model_forward_time": 0.11478352546691895,
      "step": 40237
    },
    {
      "epoch": 0.000245587158203125,
      "step": 40237,
      "training_step_time": 0.4358642101287842
    },
    {
      "epoch": 0.00024559326171875,
      "model_forward_time": 0.11536502838134766,
      "step": 40238
    },
    {
      "epoch": 0.00024559326171875,
      "step": 40238,
      "training_step_time": 0.48138999938964844
    },
    {
      "epoch": 0.000245599365234375,
      "model_forward_time": 0.11468696594238281,
      "step": 40239
    },
    {
      "epoch": 0.000245599365234375,
      "step": 40239,
      "training_step_time": 0.413973331451416
    },
    {
      "epoch": 0.00024560546875,
      "grad_norm": 0.08891547471284866,
      "learning_rate": 2.6835198244006927e-05,
      "loss": 0.0372,
      "step": 40240
    },
    {
      "epoch": 0.00024560546875,
      "model_forward_time": 0.11452913284301758,
      "step": 40240
    },
    {
      "epoch": 0.00024560546875,
      "step": 40240,
      "training_step_time": 0.4210777282714844
    },
    {
      "epoch": 0.000245611572265625,
      "model_forward_time": 0.11479830741882324,
      "step": 40241
    },
    {
      "epoch": 0.000245611572265625,
      "step": 40241,
      "training_step_time": 0.4070262908935547
    },
    {
      "epoch": 0.00024561767578125,
      "model_forward_time": 0.11496233940124512,
      "step": 40242
    },
    {
      "epoch": 0.00024561767578125,
      "step": 40242,
      "training_step_time": 0.4171750545501709
    },
    {
      "epoch": 0.000245623779296875,
      "model_forward_time": 0.11546874046325684,
      "step": 40243
    },
    {
      "epoch": 0.000245623779296875,
      "step": 40243,
      "training_step_time": 0.48459458351135254
    },
    {
      "epoch": 0.0002456298828125,
      "model_forward_time": 0.11467313766479492,
      "step": 40244
    },
    {
      "epoch": 0.0002456298828125,
      "step": 40244,
      "training_step_time": 0.38570070266723633
    },
    {
      "epoch": 0.000245635986328125,
      "model_forward_time": 0.1140904426574707,
      "step": 40245
    },
    {
      "epoch": 0.000245635986328125,
      "step": 40245,
      "training_step_time": 0.40150976181030273
    },
    {
      "epoch": 0.00024564208984375,
      "model_forward_time": 0.11552047729492188,
      "step": 40246
    },
    {
      "epoch": 0.00024564208984375,
      "step": 40246,
      "training_step_time": 0.5702717304229736
    },
    {
      "epoch": 0.000245648193359375,
      "model_forward_time": 0.11498594284057617,
      "step": 40247
    },
    {
      "epoch": 0.000245648193359375,
      "step": 40247,
      "training_step_time": 0.3823819160461426
    },
    {
      "epoch": 0.000245654296875,
      "model_forward_time": 0.11503386497497559,
      "step": 40248
    },
    {
      "epoch": 0.000245654296875,
      "step": 40248,
      "training_step_time": 0.3916184902191162
    },
    {
      "epoch": 0.000245660400390625,
      "model_forward_time": 0.11516618728637695,
      "step": 40249
    },
    {
      "epoch": 0.000245660400390625,
      "step": 40249,
      "training_step_time": 0.40447521209716797
    },
    {
      "epoch": 0.00024566650390625,
      "grad_norm": 0.07818366587162018,
      "learning_rate": 2.681077991582797e-05,
      "loss": 0.0362,
      "step": 40250
    },
    {
      "epoch": 0.00024566650390625,
      "model_forward_time": 0.1165320873260498,
      "step": 40250
    },
    {
      "epoch": 0.00024566650390625,
      "step": 40250,
      "training_step_time": 0.4266190528869629
    },
    {
      "epoch": 0.000245672607421875,
      "model_forward_time": 0.11434626579284668,
      "step": 40251
    },
    {
      "epoch": 0.000245672607421875,
      "step": 40251,
      "training_step_time": 0.36626529693603516
    },
    {
      "epoch": 0.0002456787109375,
      "model_forward_time": 0.11478090286254883,
      "step": 40252
    },
    {
      "epoch": 0.0002456787109375,
      "step": 40252,
      "training_step_time": 0.4303162097930908
    },
    {
      "epoch": 0.000245684814453125,
      "model_forward_time": 0.11422419548034668,
      "step": 40253
    },
    {
      "epoch": 0.000245684814453125,
      "step": 40253,
      "training_step_time": 0.42925310134887695
    },
    {
      "epoch": 0.00024569091796875,
      "model_forward_time": 0.11477971076965332,
      "step": 40254
    },
    {
      "epoch": 0.00024569091796875,
      "step": 40254,
      "training_step_time": 0.3888404369354248
    },
    {
      "epoch": 0.000245697021484375,
      "model_forward_time": 0.11454224586486816,
      "step": 40255
    },
    {
      "epoch": 0.000245697021484375,
      "step": 40255,
      "training_step_time": 0.39420270919799805
    },
    {
      "epoch": 0.000245703125,
      "model_forward_time": 0.1153407096862793,
      "step": 40256
    },
    {
      "epoch": 0.000245703125,
      "step": 40256,
      "training_step_time": 0.3975520133972168
    },
    {
      "epoch": 0.000245709228515625,
      "model_forward_time": 0.11502933502197266,
      "step": 40257
    },
    {
      "epoch": 0.000245709228515625,
      "step": 40257,
      "training_step_time": 0.5211448669433594
    },
    {
      "epoch": 0.00024571533203125,
      "model_forward_time": 0.11535477638244629,
      "step": 40258
    },
    {
      "epoch": 0.00024571533203125,
      "step": 40258,
      "training_step_time": 0.47531819343566895
    },
    {
      "epoch": 0.000245721435546875,
      "model_forward_time": 0.11497879028320312,
      "step": 40259
    },
    {
      "epoch": 0.000245721435546875,
      "step": 40259,
      "training_step_time": 0.43642210960388184
    },
    {
      "epoch": 0.0002457275390625,
      "grad_norm": 0.1211787611246109,
      "learning_rate": 2.6786368631921836e-05,
      "loss": 0.0371,
      "step": 40260
    },
    {
      "epoch": 0.0002457275390625,
      "model_forward_time": 0.11571478843688965,
      "step": 40260
    },
    {
      "epoch": 0.0002457275390625,
      "step": 40260,
      "training_step_time": 0.3961060047149658
    },
    {
      "epoch": 0.000245733642578125,
      "model_forward_time": 0.11508607864379883,
      "step": 40261
    },
    {
      "epoch": 0.000245733642578125,
      "step": 40261,
      "training_step_time": 0.39249253273010254
    },
    {
      "epoch": 0.00024573974609375,
      "model_forward_time": 0.1150052547454834,
      "step": 40262
    },
    {
      "epoch": 0.00024573974609375,
      "step": 40262,
      "training_step_time": 0.3903462886810303
    },
    {
      "epoch": 0.000245745849609375,
      "model_forward_time": 0.11553072929382324,
      "step": 40263
    },
    {
      "epoch": 0.000245745849609375,
      "step": 40263,
      "training_step_time": 0.5057432651519775
    },
    {
      "epoch": 0.000245751953125,
      "model_forward_time": 0.11504077911376953,
      "step": 40264
    },
    {
      "epoch": 0.000245751953125,
      "step": 40264,
      "training_step_time": 0.5008506774902344
    },
    {
      "epoch": 0.000245758056640625,
      "model_forward_time": 0.1148681640625,
      "step": 40265
    },
    {
      "epoch": 0.000245758056640625,
      "step": 40265,
      "training_step_time": 0.36652207374572754
    },
    {
      "epoch": 0.00024576416015625,
      "model_forward_time": 0.11548447608947754,
      "step": 40266
    },
    {
      "epoch": 0.00024576416015625,
      "step": 40266,
      "training_step_time": 0.4285149574279785
    },
    {
      "epoch": 0.000245770263671875,
      "model_forward_time": 0.11494016647338867,
      "step": 40267
    },
    {
      "epoch": 0.000245770263671875,
      "step": 40267,
      "training_step_time": 0.3964512348175049
    },
    {
      "epoch": 0.0002457763671875,
      "model_forward_time": 0.11537599563598633,
      "step": 40268
    },
    {
      "epoch": 0.0002457763671875,
      "step": 40268,
      "training_step_time": 0.3942573070526123
    },
    {
      "epoch": 0.000245782470703125,
      "model_forward_time": 0.11501741409301758,
      "step": 40269
    },
    {
      "epoch": 0.000245782470703125,
      "step": 40269,
      "training_step_time": 0.39757251739501953
    },
    {
      "epoch": 0.00024578857421875,
      "grad_norm": 0.1061728224158287,
      "learning_rate": 2.6761964399703955e-05,
      "loss": 0.0358,
      "step": 40270
    },
    {
      "epoch": 0.00024578857421875,
      "model_forward_time": 0.11476945877075195,
      "step": 40270
    },
    {
      "epoch": 0.00024578857421875,
      "step": 40270,
      "training_step_time": 0.581613302230835
    },
    {
      "epoch": 0.000245794677734375,
      "model_forward_time": 0.1146707534790039,
      "step": 40271
    },
    {
      "epoch": 0.000245794677734375,
      "step": 40271,
      "training_step_time": 0.39668703079223633
    },
    {
      "epoch": 0.00024580078125,
      "model_forward_time": 0.11558985710144043,
      "step": 40272
    },
    {
      "epoch": 0.00024580078125,
      "step": 40272,
      "training_step_time": 0.4022974967956543
    },
    {
      "epoch": 0.000245806884765625,
      "model_forward_time": 0.11431360244750977,
      "step": 40273
    },
    {
      "epoch": 0.000245806884765625,
      "step": 40273,
      "training_step_time": 0.422283411026001
    },
    {
      "epoch": 0.00024581298828125,
      "model_forward_time": 0.11511349678039551,
      "step": 40274
    },
    {
      "epoch": 0.00024581298828125,
      "step": 40274,
      "training_step_time": 0.4067354202270508
    },
    {
      "epoch": 0.000245819091796875,
      "model_forward_time": 0.1145625114440918,
      "step": 40275
    },
    {
      "epoch": 0.000245819091796875,
      "step": 40275,
      "training_step_time": 0.40051937103271484
    },
    {
      "epoch": 0.0002458251953125,
      "model_forward_time": 0.11522507667541504,
      "step": 40276
    },
    {
      "epoch": 0.0002458251953125,
      "step": 40276,
      "training_step_time": 0.6251888275146484
    },
    {
      "epoch": 0.000245831298828125,
      "model_forward_time": 0.11435437202453613,
      "step": 40277
    },
    {
      "epoch": 0.000245831298828125,
      "step": 40277,
      "training_step_time": 0.4237039089202881
    },
    {
      "epoch": 0.00024583740234375,
      "model_forward_time": 0.11502838134765625,
      "step": 40278
    },
    {
      "epoch": 0.00024583740234375,
      "step": 40278,
      "training_step_time": 0.4307746887207031
    },
    {
      "epoch": 0.000245843505859375,
      "model_forward_time": 0.11442112922668457,
      "step": 40279
    },
    {
      "epoch": 0.000245843505859375,
      "step": 40279,
      "training_step_time": 0.41077542304992676
    },
    {
      "epoch": 0.000245849609375,
      "grad_norm": 0.10772882401943207,
      "learning_rate": 2.6737567226587747e-05,
      "loss": 0.0381,
      "step": 40280
    },
    {
      "epoch": 0.000245849609375,
      "model_forward_time": 0.11502933502197266,
      "step": 40280
    },
    {
      "epoch": 0.000245849609375,
      "step": 40280,
      "training_step_time": 0.40024781227111816
    },
    {
      "epoch": 0.000245855712890625,
      "model_forward_time": 0.11464762687683105,
      "step": 40281
    },
    {
      "epoch": 0.000245855712890625,
      "step": 40281,
      "training_step_time": 0.39409422874450684
    },
    {
      "epoch": 0.00024586181640625,
      "model_forward_time": 0.11566758155822754,
      "step": 40282
    },
    {
      "epoch": 0.00024586181640625,
      "step": 40282,
      "training_step_time": 0.45148587226867676
    },
    {
      "epoch": 0.000245867919921875,
      "model_forward_time": 0.11548519134521484,
      "step": 40283
    },
    {
      "epoch": 0.000245867919921875,
      "step": 40283,
      "training_step_time": 0.41687750816345215
    },
    {
      "epoch": 0.0002458740234375,
      "model_forward_time": 0.1154634952545166,
      "step": 40284
    },
    {
      "epoch": 0.0002458740234375,
      "step": 40284,
      "training_step_time": 0.3902878761291504
    },
    {
      "epoch": 0.000245880126953125,
      "model_forward_time": 0.11507725715637207,
      "step": 40285
    },
    {
      "epoch": 0.000245880126953125,
      "step": 40285,
      "training_step_time": 0.4583425521850586
    },
    {
      "epoch": 0.00024588623046875,
      "model_forward_time": 0.11462235450744629,
      "step": 40286
    },
    {
      "epoch": 0.00024588623046875,
      "step": 40286,
      "training_step_time": 0.4299485683441162
    },
    {
      "epoch": 0.000245892333984375,
      "model_forward_time": 0.1145789623260498,
      "step": 40287
    },
    {
      "epoch": 0.000245892333984375,
      "step": 40287,
      "training_step_time": 0.4498605728149414
    },
    {
      "epoch": 0.0002458984375,
      "model_forward_time": 0.11433839797973633,
      "step": 40288
    },
    {
      "epoch": 0.0002458984375,
      "step": 40288,
      "training_step_time": 0.38870763778686523
    },
    {
      "epoch": 0.000245904541015625,
      "model_forward_time": 0.11510872840881348,
      "step": 40289
    },
    {
      "epoch": 0.000245904541015625,
      "step": 40289,
      "training_step_time": 0.4331481456756592
    },
    {
      "epoch": 0.00024591064453125,
      "grad_norm": 0.13380558788776398,
      "learning_rate": 2.67131771199844e-05,
      "loss": 0.0332,
      "step": 40290
    },
    {
      "epoch": 0.00024591064453125,
      "model_forward_time": 0.11485171318054199,
      "step": 40290
    },
    {
      "epoch": 0.00024591064453125,
      "step": 40290,
      "training_step_time": 0.3925611972808838
    },
    {
      "epoch": 0.000245916748046875,
      "model_forward_time": 0.11528468132019043,
      "step": 40291
    },
    {
      "epoch": 0.000245916748046875,
      "step": 40291,
      "training_step_time": 0.47422289848327637
    },
    {
      "epoch": 0.0002459228515625,
      "model_forward_time": 0.11531686782836914,
      "step": 40292
    },
    {
      "epoch": 0.0002459228515625,
      "step": 40292,
      "training_step_time": 0.41037535667419434
    },
    {
      "epoch": 0.000245928955078125,
      "model_forward_time": 0.11519050598144531,
      "step": 40293
    },
    {
      "epoch": 0.000245928955078125,
      "step": 40293,
      "training_step_time": 0.4067261219024658
    },
    {
      "epoch": 0.00024593505859375,
      "model_forward_time": 0.11487436294555664,
      "step": 40294
    },
    {
      "epoch": 0.00024593505859375,
      "step": 40294,
      "training_step_time": 0.5082120895385742
    },
    {
      "epoch": 0.000245941162109375,
      "model_forward_time": 0.11565709114074707,
      "step": 40295
    },
    {
      "epoch": 0.000245941162109375,
      "step": 40295,
      "training_step_time": 0.48856377601623535
    },
    {
      "epoch": 0.000245947265625,
      "model_forward_time": 0.1147620677947998,
      "step": 40296
    },
    {
      "epoch": 0.000245947265625,
      "step": 40296,
      "training_step_time": 0.38454604148864746
    },
    {
      "epoch": 0.000245953369140625,
      "model_forward_time": 0.11533427238464355,
      "step": 40297
    },
    {
      "epoch": 0.000245953369140625,
      "step": 40297,
      "training_step_time": 0.3810148239135742
    },
    {
      "epoch": 0.00024595947265625,
      "model_forward_time": 0.1160273551940918,
      "step": 40298
    },
    {
      "epoch": 0.00024595947265625,
      "step": 40298,
      "training_step_time": 0.38219141960144043
    },
    {
      "epoch": 0.000245965576171875,
      "model_forward_time": 0.11514830589294434,
      "step": 40299
    },
    {
      "epoch": 0.000245965576171875,
      "step": 40299,
      "training_step_time": 0.37963175773620605
    },
    {
      "epoch": 0.0002459716796875,
      "grad_norm": 0.09116668999195099,
      "learning_rate": 2.6688794087302993e-05,
      "loss": 0.0375,
      "step": 40300
    },
    {
      "epoch": 0.0002459716796875,
      "model_forward_time": 0.11535358428955078,
      "step": 40300
    },
    {
      "epoch": 0.0002459716796875,
      "step": 40300,
      "training_step_time": 0.5583171844482422
    },
    {
      "epoch": 0.000245977783203125,
      "model_forward_time": 0.11740469932556152,
      "step": 40301
    },
    {
      "epoch": 0.000245977783203125,
      "step": 40301,
      "training_step_time": 0.5833683013916016
    },
    {
      "epoch": 0.00024598388671875,
      "model_forward_time": 0.11747598648071289,
      "step": 40302
    },
    {
      "epoch": 0.00024598388671875,
      "step": 40302,
      "training_step_time": 0.5616230964660645
    },
    {
      "epoch": 0.000245989990234375,
      "model_forward_time": 0.12242555618286133,
      "step": 40303
    },
    {
      "epoch": 0.000245989990234375,
      "step": 40303,
      "training_step_time": 0.678231954574585
    },
    {
      "epoch": 0.00024599609375,
      "model_forward_time": 0.125946044921875,
      "step": 40304
    },
    {
      "epoch": 0.00024599609375,
      "step": 40304,
      "training_step_time": 0.8146505355834961
    },
    {
      "epoch": 0.000246002197265625,
      "model_forward_time": 0.11623406410217285,
      "step": 40305
    },
    {
      "epoch": 0.000246002197265625,
      "step": 40305,
      "training_step_time": 0.7508182525634766
    },
    {
      "epoch": 0.00024600830078125,
      "model_forward_time": 0.11791443824768066,
      "step": 40306
    },
    {
      "epoch": 0.00024600830078125,
      "step": 40306,
      "training_step_time": 0.6838641166687012
    },
    {
      "epoch": 0.000246014404296875,
      "model_forward_time": 0.11583423614501953,
      "step": 40307
    },
    {
      "epoch": 0.000246014404296875,
      "step": 40307,
      "training_step_time": 0.6055619716644287
    },
    {
      "epoch": 0.0002460205078125,
      "model_forward_time": 0.1185610294342041,
      "step": 40308
    },
    {
      "epoch": 0.0002460205078125,
      "step": 40308,
      "training_step_time": 0.6773233413696289
    },
    {
      "epoch": 0.000246026611328125,
      "model_forward_time": 0.1221160888671875,
      "step": 40309
    },
    {
      "epoch": 0.000246026611328125,
      "step": 40309,
      "training_step_time": 0.7288382053375244
    },
    {
      "epoch": 0.00024603271484375,
      "grad_norm": 0.12726010382175446,
      "learning_rate": 2.6664418135950453e-05,
      "loss": 0.0406,
      "step": 40310
    },
    {
      "epoch": 0.00024603271484375,
      "model_forward_time": 0.11979174613952637,
      "step": 40310
    },
    {
      "epoch": 0.00024603271484375,
      "step": 40310,
      "training_step_time": 0.6358587741851807
    },
    {
      "epoch": 0.000246038818359375,
      "model_forward_time": 0.11872291564941406,
      "step": 40311
    },
    {
      "epoch": 0.000246038818359375,
      "step": 40311,
      "training_step_time": 0.6189336776733398
    },
    {
      "epoch": 0.000246044921875,
      "model_forward_time": 0.11970186233520508,
      "step": 40312
    },
    {
      "epoch": 0.000246044921875,
      "step": 40312,
      "training_step_time": 0.6725647449493408
    },
    {
      "epoch": 0.000246051025390625,
      "model_forward_time": 0.11788320541381836,
      "step": 40313
    },
    {
      "epoch": 0.000246051025390625,
      "step": 40313,
      "training_step_time": 0.6792829036712646
    },
    {
      "epoch": 0.00024605712890625,
      "model_forward_time": 0.11995506286621094,
      "step": 40314
    },
    {
      "epoch": 0.00024605712890625,
      "step": 40314,
      "training_step_time": 0.6474378108978271
    },
    {
      "epoch": 0.000246063232421875,
      "model_forward_time": 0.12417960166931152,
      "step": 40315
    },
    {
      "epoch": 0.000246063232421875,
      "step": 40315,
      "training_step_time": 0.7456841468811035
    },
    {
      "epoch": 0.0002460693359375,
      "model_forward_time": 0.11665916442871094,
      "step": 40316
    },
    {
      "epoch": 0.0002460693359375,
      "step": 40316,
      "training_step_time": 0.7156474590301514
    },
    {
      "epoch": 0.000246075439453125,
      "model_forward_time": 0.12078070640563965,
      "step": 40317
    },
    {
      "epoch": 0.000246075439453125,
      "step": 40317,
      "training_step_time": 0.6297557353973389
    },
    {
      "epoch": 0.00024608154296875,
      "model_forward_time": 0.1192319393157959,
      "step": 40318
    },
    {
      "epoch": 0.00024608154296875,
      "step": 40318,
      "training_step_time": 0.6027655601501465
    },
    {
      "epoch": 0.000246087646484375,
      "model_forward_time": 0.12056112289428711,
      "step": 40319
    },
    {
      "epoch": 0.000246087646484375,
      "step": 40319,
      "training_step_time": 0.678088903427124
    },
    {
      "epoch": 0.00024609375,
      "grad_norm": 0.11473619192838669,
      "learning_rate": 2.6640049273331515e-05,
      "loss": 0.043,
      "step": 40320
    },
    {
      "epoch": 0.00024609375,
      "model_forward_time": 0.12013101577758789,
      "step": 40320
    },
    {
      "epoch": 0.00024609375,
      "step": 40320,
      "training_step_time": 0.7132246494293213
    },
    {
      "epoch": 0.000246099853515625,
      "model_forward_time": 0.11770462989807129,
      "step": 40321
    },
    {
      "epoch": 0.000246099853515625,
      "step": 40321,
      "training_step_time": 0.7208118438720703
    },
    {
      "epoch": 0.00024610595703125,
      "model_forward_time": 0.12015891075134277,
      "step": 40322
    },
    {
      "epoch": 0.00024610595703125,
      "step": 40322,
      "training_step_time": 0.6781647205352783
    },
    {
      "epoch": 0.000246112060546875,
      "model_forward_time": 0.1208031177520752,
      "step": 40323
    },
    {
      "epoch": 0.000246112060546875,
      "step": 40323,
      "training_step_time": 0.70458984375
    },
    {
      "epoch": 0.0002461181640625,
      "model_forward_time": 0.11832189559936523,
      "step": 40324
    },
    {
      "epoch": 0.0002461181640625,
      "step": 40324,
      "training_step_time": 0.7086408138275146
    },
    {
      "epoch": 0.000246124267578125,
      "model_forward_time": 0.11816668510437012,
      "step": 40325
    },
    {
      "epoch": 0.000246124267578125,
      "step": 40325,
      "training_step_time": 0.6586384773254395
    },
    {
      "epoch": 0.00024613037109375,
      "model_forward_time": 0.12007665634155273,
      "step": 40326
    },
    {
      "epoch": 0.00024613037109375,
      "step": 40326,
      "training_step_time": 0.660106897354126
    },
    {
      "epoch": 0.000246136474609375,
      "model_forward_time": 0.12128353118896484,
      "step": 40327
    },
    {
      "epoch": 0.000246136474609375,
      "step": 40327,
      "training_step_time": 0.6320724487304688
    },
    {
      "epoch": 0.000246142578125,
      "model_forward_time": 0.12291383743286133,
      "step": 40328
    },
    {
      "epoch": 0.000246142578125,
      "step": 40328,
      "training_step_time": 0.6980390548706055
    },
    {
      "epoch": 0.000246148681640625,
      "model_forward_time": 0.11978387832641602,
      "step": 40329
    },
    {
      "epoch": 0.000246148681640625,
      "step": 40329,
      "training_step_time": 0.7281382083892822
    },
    {
      "epoch": 0.00024615478515625,
      "grad_norm": 0.17925605177879333,
      "learning_rate": 2.6615687506848864e-05,
      "loss": 0.042,
      "step": 40330
    },
    {
      "epoch": 0.00024615478515625,
      "model_forward_time": 0.11876368522644043,
      "step": 40330
    },
    {
      "epoch": 0.00024615478515625,
      "step": 40330,
      "training_step_time": 0.6513073444366455
    },
    {
      "epoch": 0.000246160888671875,
      "model_forward_time": 0.1243886947631836,
      "step": 40331
    },
    {
      "epoch": 0.000246160888671875,
      "step": 40331,
      "training_step_time": 0.6622567176818848
    },
    {
      "epoch": 0.0002461669921875,
      "model_forward_time": 0.1202707290649414,
      "step": 40332
    },
    {
      "epoch": 0.0002461669921875,
      "step": 40332,
      "training_step_time": 0.6326694488525391
    },
    {
      "epoch": 0.000246173095703125,
      "model_forward_time": 0.11725497245788574,
      "step": 40333
    },
    {
      "epoch": 0.000246173095703125,
      "step": 40333,
      "training_step_time": 0.757112979888916
    },
    {
      "epoch": 0.00024617919921875,
      "model_forward_time": 0.13423490524291992,
      "step": 40334
    },
    {
      "epoch": 0.00024617919921875,
      "step": 40334,
      "training_step_time": 0.8013594150543213
    },
    {
      "epoch": 0.000246185302734375,
      "model_forward_time": 0.12286639213562012,
      "step": 40335
    },
    {
      "epoch": 0.000246185302734375,
      "step": 40335,
      "training_step_time": 0.7221677303314209
    },
    {
      "epoch": 0.00024619140625,
      "model_forward_time": 0.11820554733276367,
      "step": 40336
    },
    {
      "epoch": 0.00024619140625,
      "step": 40336,
      "training_step_time": 0.647367000579834
    },
    {
      "epoch": 0.000246197509765625,
      "model_forward_time": 0.11717820167541504,
      "step": 40337
    },
    {
      "epoch": 0.000246197509765625,
      "step": 40337,
      "training_step_time": 0.7530562877655029
    },
    {
      "epoch": 0.00024620361328125,
      "model_forward_time": 0.11562156677246094,
      "step": 40338
    },
    {
      "epoch": 0.00024620361328125,
      "step": 40338,
      "training_step_time": 0.6107268333435059
    },
    {
      "epoch": 0.000246209716796875,
      "model_forward_time": 0.11757254600524902,
      "step": 40339
    },
    {
      "epoch": 0.000246209716796875,
      "step": 40339,
      "training_step_time": 0.6457748413085938
    },
    {
      "epoch": 0.0002462158203125,
      "grad_norm": 0.10376063734292984,
      "learning_rate": 2.6591332843902884e-05,
      "loss": 0.0452,
      "step": 40340
    },
    {
      "epoch": 0.0002462158203125,
      "model_forward_time": 0.12008023262023926,
      "step": 40340
    },
    {
      "epoch": 0.0002462158203125,
      "step": 40340,
      "training_step_time": 0.5926659107208252
    },
    {
      "epoch": 0.000246221923828125,
      "model_forward_time": 0.1199955940246582,
      "step": 40341
    },
    {
      "epoch": 0.000246221923828125,
      "step": 40341,
      "training_step_time": 0.6554594039916992
    },
    {
      "epoch": 0.00024622802734375,
      "model_forward_time": 0.11759519577026367,
      "step": 40342
    },
    {
      "epoch": 0.00024622802734375,
      "step": 40342,
      "training_step_time": 0.6775393486022949
    },
    {
      "epoch": 0.000246234130859375,
      "model_forward_time": 0.12112188339233398,
      "step": 40343
    },
    {
      "epoch": 0.000246234130859375,
      "step": 40343,
      "training_step_time": 0.6055564880371094
    },
    {
      "epoch": 0.000246240234375,
      "model_forward_time": 0.1282503604888916,
      "step": 40344
    },
    {
      "epoch": 0.000246240234375,
      "step": 40344,
      "training_step_time": 0.7286977767944336
    },
    {
      "epoch": 0.000246246337890625,
      "model_forward_time": 0.12135910987854004,
      "step": 40345
    },
    {
      "epoch": 0.000246246337890625,
      "step": 40345,
      "training_step_time": 0.6315498352050781
    },
    {
      "epoch": 0.00024625244140625,
      "model_forward_time": 0.11913561820983887,
      "step": 40346
    },
    {
      "epoch": 0.00024625244140625,
      "step": 40346,
      "training_step_time": 0.6887598037719727
    },
    {
      "epoch": 0.000246258544921875,
      "model_forward_time": 0.11782717704772949,
      "step": 40347
    },
    {
      "epoch": 0.000246258544921875,
      "step": 40347,
      "training_step_time": 0.6238257884979248
    },
    {
      "epoch": 0.0002462646484375,
      "model_forward_time": 0.11647439002990723,
      "step": 40348
    },
    {
      "epoch": 0.0002462646484375,
      "step": 40348,
      "training_step_time": 0.6933979988098145
    },
    {
      "epoch": 0.000246270751953125,
      "model_forward_time": 0.11719107627868652,
      "step": 40349
    },
    {
      "epoch": 0.000246270751953125,
      "step": 40349,
      "training_step_time": 0.6111183166503906
    },
    {
      "epoch": 0.00024627685546875,
      "grad_norm": 0.12522631883621216,
      "learning_rate": 2.656698529189193e-05,
      "loss": 0.0454,
      "step": 40350
    },
    {
      "epoch": 0.00024627685546875,
      "model_forward_time": 0.11964082717895508,
      "step": 40350
    },
    {
      "epoch": 0.00024627685546875,
      "step": 40350,
      "training_step_time": 0.6821365356445312
    },
    {
      "epoch": 0.000246282958984375,
      "model_forward_time": 0.12206387519836426,
      "step": 40351
    },
    {
      "epoch": 0.000246282958984375,
      "step": 40351,
      "training_step_time": 0.76202392578125
    },
    {
      "epoch": 0.0002462890625,
      "model_forward_time": 0.11941385269165039,
      "step": 40352
    },
    {
      "epoch": 0.0002462890625,
      "step": 40352,
      "training_step_time": 0.7436370849609375
    },
    {
      "epoch": 0.000246295166015625,
      "model_forward_time": 0.11860942840576172,
      "step": 40353
    },
    {
      "epoch": 0.000246295166015625,
      "step": 40353,
      "training_step_time": 0.6674957275390625
    },
    {
      "epoch": 0.00024630126953125,
      "model_forward_time": 0.11898636817932129,
      "step": 40354
    },
    {
      "epoch": 0.00024630126953125,
      "step": 40354,
      "training_step_time": 0.6494498252868652
    },
    {
      "epoch": 0.000246307373046875,
      "model_forward_time": 0.11716246604919434,
      "step": 40355
    },
    {
      "epoch": 0.000246307373046875,
      "step": 40355,
      "training_step_time": 0.6266028881072998
    },
    {
      "epoch": 0.0002463134765625,
      "model_forward_time": 0.12381482124328613,
      "step": 40356
    },
    {
      "epoch": 0.0002463134765625,
      "step": 40356,
      "training_step_time": 0.7494902610778809
    },
    {
      "epoch": 0.000246319580078125,
      "model_forward_time": 0.11820745468139648,
      "step": 40357
    },
    {
      "epoch": 0.000246319580078125,
      "step": 40357,
      "training_step_time": 0.7165610790252686
    },
    {
      "epoch": 0.00024632568359375,
      "model_forward_time": 0.11886167526245117,
      "step": 40358
    },
    {
      "epoch": 0.00024632568359375,
      "step": 40358,
      "training_step_time": 0.5900144577026367
    },
    {
      "epoch": 0.000246331787109375,
      "model_forward_time": 0.11780977249145508,
      "step": 40359
    },
    {
      "epoch": 0.000246331787109375,
      "step": 40359,
      "training_step_time": 0.6407792568206787
    },
    {
      "epoch": 0.000246337890625,
      "grad_norm": 0.11259657144546509,
      "learning_rate": 2.654264485821214e-05,
      "loss": 0.0392,
      "step": 40360
    },
    {
      "epoch": 0.000246337890625,
      "model_forward_time": 0.11935639381408691,
      "step": 40360
    },
    {
      "epoch": 0.000246337890625,
      "step": 40360,
      "training_step_time": 0.5483825206756592
    },
    {
      "epoch": 0.000246343994140625,
      "model_forward_time": 0.12422752380371094,
      "step": 40361
    },
    {
      "epoch": 0.000246343994140625,
      "step": 40361,
      "training_step_time": 0.6670632362365723
    },
    {
      "epoch": 0.00024635009765625,
      "model_forward_time": 0.11609578132629395,
      "step": 40362
    },
    {
      "epoch": 0.00024635009765625,
      "step": 40362,
      "training_step_time": 0.6346085071563721
    },
    {
      "epoch": 0.000246356201171875,
      "model_forward_time": 0.11997056007385254,
      "step": 40363
    },
    {
      "epoch": 0.000246356201171875,
      "step": 40363,
      "training_step_time": 0.7102043628692627
    },
    {
      "epoch": 0.0002463623046875,
      "model_forward_time": 0.11822271347045898,
      "step": 40364
    },
    {
      "epoch": 0.0002463623046875,
      "step": 40364,
      "training_step_time": 0.5834393501281738
    },
    {
      "epoch": 0.000246368408203125,
      "model_forward_time": 0.11746859550476074,
      "step": 40365
    },
    {
      "epoch": 0.000246368408203125,
      "step": 40365,
      "training_step_time": 0.6618540287017822
    },
    {
      "epoch": 0.00024637451171875,
      "model_forward_time": 0.11971330642700195,
      "step": 40366
    },
    {
      "epoch": 0.00024637451171875,
      "step": 40366,
      "training_step_time": 0.6685183048248291
    },
    {
      "epoch": 0.000246380615234375,
      "model_forward_time": 0.1325676441192627,
      "step": 40367
    },
    {
      "epoch": 0.000246380615234375,
      "step": 40367,
      "training_step_time": 0.7214415073394775
    },
    {
      "epoch": 0.00024638671875,
      "model_forward_time": 0.1202688217163086,
      "step": 40368
    },
    {
      "epoch": 0.00024638671875,
      "step": 40368,
      "training_step_time": 0.6228656768798828
    },
    {
      "epoch": 0.000246392822265625,
      "model_forward_time": 0.12017488479614258,
      "step": 40369
    },
    {
      "epoch": 0.000246392822265625,
      "step": 40369,
      "training_step_time": 0.5785510540008545
    },
    {
      "epoch": 0.00024639892578125,
      "grad_norm": 0.09296201914548874,
      "learning_rate": 2.6518311550257478e-05,
      "loss": 0.0425,
      "step": 40370
    },
    {
      "epoch": 0.00024639892578125,
      "model_forward_time": 0.11843180656433105,
      "step": 40370
    },
    {
      "epoch": 0.00024639892578125,
      "step": 40370,
      "training_step_time": 0.7172927856445312
    },
    {
      "epoch": 0.000246405029296875,
      "model_forward_time": 0.1269395351409912,
      "step": 40371
    },
    {
      "epoch": 0.000246405029296875,
      "step": 40371,
      "training_step_time": 0.5801613330841064
    },
    {
      "epoch": 0.0002464111328125,
      "model_forward_time": 0.11899542808532715,
      "step": 40372
    },
    {
      "epoch": 0.0002464111328125,
      "step": 40372,
      "training_step_time": 0.516796350479126
    },
    {
      "epoch": 0.000246417236328125,
      "model_forward_time": 0.11842489242553711,
      "step": 40373
    },
    {
      "epoch": 0.000246417236328125,
      "step": 40373,
      "training_step_time": 0.4636993408203125
    },
    {
      "epoch": 0.00024642333984375,
      "model_forward_time": 0.11678791046142578,
      "step": 40374
    },
    {
      "epoch": 0.00024642333984375,
      "step": 40374,
      "training_step_time": 0.5202145576477051
    },
    {
      "epoch": 0.000246429443359375,
      "model_forward_time": 0.11724424362182617,
      "step": 40375
    },
    {
      "epoch": 0.000246429443359375,
      "step": 40375,
      "training_step_time": 0.4584386348724365
    },
    {
      "epoch": 0.000246435546875,
      "model_forward_time": 0.11554193496704102,
      "step": 40376
    },
    {
      "epoch": 0.000246435546875,
      "step": 40376,
      "training_step_time": 0.45278024673461914
    },
    {
      "epoch": 0.000246441650390625,
      "model_forward_time": 0.11663269996643066,
      "step": 40377
    },
    {
      "epoch": 0.000246441650390625,
      "step": 40377,
      "training_step_time": 0.47922515869140625
    },
    {
      "epoch": 0.00024644775390625,
      "model_forward_time": 0.11501479148864746,
      "step": 40378
    },
    {
      "epoch": 0.00024644775390625,
      "step": 40378,
      "training_step_time": 0.4162905216217041
    },
    {
      "epoch": 0.000246453857421875,
      "model_forward_time": 0.11575031280517578,
      "step": 40379
    },
    {
      "epoch": 0.000246453857421875,
      "step": 40379,
      "training_step_time": 0.4187507629394531
    },
    {
      "epoch": 0.0002464599609375,
      "grad_norm": 0.14506544172763824,
      "learning_rate": 2.6493985375419778e-05,
      "loss": 0.0424,
      "step": 40380
    },
    {
      "epoch": 0.0002464599609375,
      "model_forward_time": 0.11553406715393066,
      "step": 40380
    },
    {
      "epoch": 0.0002464599609375,
      "step": 40380,
      "training_step_time": 0.4355475902557373
    },
    {
      "epoch": 0.000246466064453125,
      "model_forward_time": 0.11583733558654785,
      "step": 40381
    },
    {
      "epoch": 0.000246466064453125,
      "step": 40381,
      "training_step_time": 0.4099142551422119
    },
    {
      "epoch": 0.00024647216796875,
      "model_forward_time": 0.11505460739135742,
      "step": 40382
    },
    {
      "epoch": 0.00024647216796875,
      "step": 40382,
      "training_step_time": 0.4001142978668213
    },
    {
      "epoch": 0.000246478271484375,
      "model_forward_time": 0.1157076358795166,
      "step": 40383
    },
    {
      "epoch": 0.000246478271484375,
      "step": 40383,
      "training_step_time": 0.3789176940917969
    },
    {
      "epoch": 0.000246484375,
      "model_forward_time": 0.1147613525390625,
      "step": 40384
    },
    {
      "epoch": 0.000246484375,
      "step": 40384,
      "training_step_time": 0.43126749992370605
    },
    {
      "epoch": 0.000246490478515625,
      "model_forward_time": 0.1164698600769043,
      "step": 40385
    },
    {
      "epoch": 0.000246490478515625,
      "step": 40385,
      "training_step_time": 0.40123629570007324
    },
    {
      "epoch": 0.00024649658203125,
      "model_forward_time": 0.11536765098571777,
      "step": 40386
    },
    {
      "epoch": 0.00024649658203125,
      "step": 40386,
      "training_step_time": 0.4623892307281494
    },
    {
      "epoch": 0.000246502685546875,
      "model_forward_time": 0.11611604690551758,
      "step": 40387
    },
    {
      "epoch": 0.000246502685546875,
      "step": 40387,
      "training_step_time": 0.3856465816497803
    },
    {
      "epoch": 0.0002465087890625,
      "model_forward_time": 0.11516189575195312,
      "step": 40388
    },
    {
      "epoch": 0.0002465087890625,
      "step": 40388,
      "training_step_time": 0.4124937057495117
    },
    {
      "epoch": 0.000246514892578125,
      "model_forward_time": 0.11507868766784668,
      "step": 40389
    },
    {
      "epoch": 0.000246514892578125,
      "step": 40389,
      "training_step_time": 0.4108245372772217
    },
    {
      "epoch": 0.00024652099609375,
      "grad_norm": 0.1464349627494812,
      "learning_rate": 2.6469666341088677e-05,
      "loss": 0.0387,
      "step": 40390
    },
    {
      "epoch": 0.00024652099609375,
      "model_forward_time": 0.11528778076171875,
      "step": 40390
    },
    {
      "epoch": 0.00024652099609375,
      "step": 40390,
      "training_step_time": 0.3984694480895996
    },
    {
      "epoch": 0.000246527099609375,
      "model_forward_time": 0.11513972282409668,
      "step": 40391
    },
    {
      "epoch": 0.000246527099609375,
      "step": 40391,
      "training_step_time": 0.5255358219146729
    },
    {
      "epoch": 0.000246533203125,
      "model_forward_time": 0.11609339714050293,
      "step": 40392
    },
    {
      "epoch": 0.000246533203125,
      "step": 40392,
      "training_step_time": 0.4250984191894531
    },
    {
      "epoch": 0.000246539306640625,
      "model_forward_time": 0.11468124389648438,
      "step": 40393
    },
    {
      "epoch": 0.000246539306640625,
      "step": 40393,
      "training_step_time": 0.48779940605163574
    },
    {
      "epoch": 0.00024654541015625,
      "model_forward_time": 0.1153414249420166,
      "step": 40394
    },
    {
      "epoch": 0.00024654541015625,
      "step": 40394,
      "training_step_time": 0.40035295486450195
    },
    {
      "epoch": 0.000246551513671875,
      "model_forward_time": 0.11530470848083496,
      "step": 40395
    },
    {
      "epoch": 0.000246551513671875,
      "step": 40395,
      "training_step_time": 0.3938930034637451
    },
    {
      "epoch": 0.0002465576171875,
      "model_forward_time": 0.11484694480895996,
      "step": 40396
    },
    {
      "epoch": 0.0002465576171875,
      "step": 40396,
      "training_step_time": 0.3870964050292969
    },
    {
      "epoch": 0.000246563720703125,
      "model_forward_time": 0.11514115333557129,
      "step": 40397
    },
    {
      "epoch": 0.000246563720703125,
      "step": 40397,
      "training_step_time": 0.3913717269897461
    },
    {
      "epoch": 0.00024656982421875,
      "model_forward_time": 0.11565685272216797,
      "step": 40398
    },
    {
      "epoch": 0.00024656982421875,
      "step": 40398,
      "training_step_time": 0.42429637908935547
    },
    {
      "epoch": 0.000246575927734375,
      "model_forward_time": 0.11564278602600098,
      "step": 40399
    },
    {
      "epoch": 0.000246575927734375,
      "step": 40399,
      "training_step_time": 0.4147014617919922
    },
    {
      "epoch": 0.00024658203125,
      "grad_norm": 0.10284851491451263,
      "learning_rate": 2.644535445465164e-05,
      "loss": 0.0409,
      "step": 40400
    },
    {
      "epoch": 0.00024658203125,
      "model_forward_time": 0.11455774307250977,
      "step": 40400
    },
    {
      "epoch": 0.00024658203125,
      "step": 40400,
      "training_step_time": 0.43744707107543945
    },
    {
      "epoch": 0.000246588134765625,
      "model_forward_time": 0.11531448364257812,
      "step": 40401
    },
    {
      "epoch": 0.000246588134765625,
      "step": 40401,
      "training_step_time": 0.3673250675201416
    },
    {
      "epoch": 0.00024659423828125,
      "model_forward_time": 0.11495399475097656,
      "step": 40402
    },
    {
      "epoch": 0.00024659423828125,
      "step": 40402,
      "training_step_time": 0.426288366317749
    },
    {
      "epoch": 0.000246600341796875,
      "model_forward_time": 0.11502242088317871,
      "step": 40403
    },
    {
      "epoch": 0.000246600341796875,
      "step": 40403,
      "training_step_time": 0.49947500228881836
    },
    {
      "epoch": 0.0002466064453125,
      "model_forward_time": 0.11449861526489258,
      "step": 40404
    },
    {
      "epoch": 0.0002466064453125,
      "step": 40404,
      "training_step_time": 0.38342761993408203
    },
    {
      "epoch": 0.000246612548828125,
      "model_forward_time": 0.11512207984924316,
      "step": 40405
    },
    {
      "epoch": 0.000246612548828125,
      "step": 40405,
      "training_step_time": 0.395967960357666
    },
    {
      "epoch": 0.00024661865234375,
      "model_forward_time": 0.11479425430297852,
      "step": 40406
    },
    {
      "epoch": 0.00024661865234375,
      "step": 40406,
      "training_step_time": 0.4383072853088379
    },
    {
      "epoch": 0.000246624755859375,
      "model_forward_time": 0.11613774299621582,
      "step": 40407
    },
    {
      "epoch": 0.000246624755859375,
      "step": 40407,
      "training_step_time": 0.4600203037261963
    },
    {
      "epoch": 0.000246630859375,
      "model_forward_time": 0.11499214172363281,
      "step": 40408
    },
    {
      "epoch": 0.000246630859375,
      "step": 40408,
      "training_step_time": 0.45409655570983887
    },
    {
      "epoch": 0.000246636962890625,
      "model_forward_time": 0.11523294448852539,
      "step": 40409
    },
    {
      "epoch": 0.000246636962890625,
      "step": 40409,
      "training_step_time": 0.3962821960449219
    },
    {
      "epoch": 0.00024664306640625,
      "grad_norm": 0.12492036819458008,
      "learning_rate": 2.642104972349403e-05,
      "loss": 0.0427,
      "step": 40410
    },
    {
      "epoch": 0.00024664306640625,
      "model_forward_time": 0.11459207534790039,
      "step": 40410
    },
    {
      "epoch": 0.00024664306640625,
      "step": 40410,
      "training_step_time": 0.4009394645690918
    },
    {
      "epoch": 0.000246649169921875,
      "model_forward_time": 0.1154789924621582,
      "step": 40411
    },
    {
      "epoch": 0.000246649169921875,
      "step": 40411,
      "training_step_time": 0.4004364013671875
    },
    {
      "epoch": 0.0002466552734375,
      "model_forward_time": 0.11481761932373047,
      "step": 40412
    },
    {
      "epoch": 0.0002466552734375,
      "step": 40412,
      "training_step_time": 0.3963661193847656
    },
    {
      "epoch": 0.000246661376953125,
      "model_forward_time": 0.11495089530944824,
      "step": 40413
    },
    {
      "epoch": 0.000246661376953125,
      "step": 40413,
      "training_step_time": 0.4352254867553711
    },
    {
      "epoch": 0.00024666748046875,
      "model_forward_time": 0.11465978622436523,
      "step": 40414
    },
    {
      "epoch": 0.00024666748046875,
      "step": 40414,
      "training_step_time": 0.41674375534057617
    },
    {
      "epoch": 0.000246673583984375,
      "model_forward_time": 0.1149909496307373,
      "step": 40415
    },
    {
      "epoch": 0.000246673583984375,
      "step": 40415,
      "training_step_time": 0.4371676445007324
    },
    {
      "epoch": 0.0002466796875,
      "model_forward_time": 0.11538338661193848,
      "step": 40416
    },
    {
      "epoch": 0.0002466796875,
      "step": 40416,
      "training_step_time": 0.3954784870147705
    },
    {
      "epoch": 0.000246685791015625,
      "model_forward_time": 0.11566495895385742,
      "step": 40417
    },
    {
      "epoch": 0.000246685791015625,
      "step": 40417,
      "training_step_time": 0.4928920269012451
    },
    {
      "epoch": 0.00024669189453125,
      "model_forward_time": 0.11522960662841797,
      "step": 40418
    },
    {
      "epoch": 0.00024669189453125,
      "step": 40418,
      "training_step_time": 0.39690351486206055
    },
    {
      "epoch": 0.000246697998046875,
      "model_forward_time": 0.11474895477294922,
      "step": 40419
    },
    {
      "epoch": 0.000246697998046875,
      "step": 40419,
      "training_step_time": 0.3882629871368408
    },
    {
      "epoch": 0.0002467041015625,
      "grad_norm": 0.12862710654735565,
      "learning_rate": 2.6396752154998915e-05,
      "loss": 0.0499,
      "step": 40420
    },
    {
      "epoch": 0.0002467041015625,
      "model_forward_time": 0.11526155471801758,
      "step": 40420
    },
    {
      "epoch": 0.0002467041015625,
      "step": 40420,
      "training_step_time": 0.40047430992126465
    },
    {
      "epoch": 0.000246710205078125,
      "model_forward_time": 0.11509943008422852,
      "step": 40421
    },
    {
      "epoch": 0.000246710205078125,
      "step": 40421,
      "training_step_time": 0.44318103790283203
    },
    {
      "epoch": 0.00024671630859375,
      "model_forward_time": 0.11435341835021973,
      "step": 40422
    },
    {
      "epoch": 0.00024671630859375,
      "step": 40422,
      "training_step_time": 0.5410113334655762
    },
    {
      "epoch": 0.000246722412109375,
      "model_forward_time": 0.1151430606842041,
      "step": 40423
    },
    {
      "epoch": 0.000246722412109375,
      "step": 40423,
      "training_step_time": 0.4094698429107666
    },
    {
      "epoch": 0.000246728515625,
      "model_forward_time": 0.114349365234375,
      "step": 40424
    },
    {
      "epoch": 0.000246728515625,
      "step": 40424,
      "training_step_time": 0.39418649673461914
    },
    {
      "epoch": 0.000246734619140625,
      "model_forward_time": 0.11496424674987793,
      "step": 40425
    },
    {
      "epoch": 0.000246734619140625,
      "step": 40425,
      "training_step_time": 0.4026930332183838
    },
    {
      "epoch": 0.00024674072265625,
      "model_forward_time": 0.11427450180053711,
      "step": 40426
    },
    {
      "epoch": 0.00024674072265625,
      "step": 40426,
      "training_step_time": 0.40032958984375
    },
    {
      "epoch": 0.000246746826171875,
      "model_forward_time": 0.11478686332702637,
      "step": 40427
    },
    {
      "epoch": 0.000246746826171875,
      "step": 40427,
      "training_step_time": 0.39275193214416504
    },
    {
      "epoch": 0.0002467529296875,
      "model_forward_time": 0.11523842811584473,
      "step": 40428
    },
    {
      "epoch": 0.0002467529296875,
      "step": 40428,
      "training_step_time": 0.4053044319152832
    },
    {
      "epoch": 0.000246759033203125,
      "model_forward_time": 0.11561322212219238,
      "step": 40429
    },
    {
      "epoch": 0.000246759033203125,
      "step": 40429,
      "training_step_time": 0.4225461483001709
    },
    {
      "epoch": 0.00024676513671875,
      "grad_norm": 0.149600088596344,
      "learning_rate": 2.6372461756547306e-05,
      "loss": 0.0437,
      "step": 40430
    },
    {
      "epoch": 0.00024676513671875,
      "model_forward_time": 0.11528635025024414,
      "step": 40430
    },
    {
      "epoch": 0.00024676513671875,
      "step": 40430,
      "training_step_time": 0.4378829002380371
    },
    {
      "epoch": 0.000246771240234375,
      "model_forward_time": 0.11563420295715332,
      "step": 40431
    },
    {
      "epoch": 0.000246771240234375,
      "step": 40431,
      "training_step_time": 0.4570448398590088
    },
    {
      "epoch": 0.00024677734375,
      "model_forward_time": 0.11545181274414062,
      "step": 40432
    },
    {
      "epoch": 0.00024677734375,
      "step": 40432,
      "training_step_time": 0.4923715591430664
    },
    {
      "epoch": 0.000246783447265625,
      "model_forward_time": 0.11523723602294922,
      "step": 40433
    },
    {
      "epoch": 0.000246783447265625,
      "step": 40433,
      "training_step_time": 0.4096090793609619
    },
    {
      "epoch": 0.00024678955078125,
      "model_forward_time": 0.11492490768432617,
      "step": 40434
    },
    {
      "epoch": 0.00024678955078125,
      "step": 40434,
      "training_step_time": 0.40434908866882324
    },
    {
      "epoch": 0.000246795654296875,
      "model_forward_time": 0.11529779434204102,
      "step": 40435
    },
    {
      "epoch": 0.000246795654296875,
      "step": 40435,
      "training_step_time": 0.3892998695373535
    },
    {
      "epoch": 0.0002468017578125,
      "model_forward_time": 0.11464285850524902,
      "step": 40436
    },
    {
      "epoch": 0.0002468017578125,
      "step": 40436,
      "training_step_time": 0.4340496063232422
    },
    {
      "epoch": 0.000246807861328125,
      "model_forward_time": 0.11506986618041992,
      "step": 40437
    },
    {
      "epoch": 0.000246807861328125,
      "step": 40437,
      "training_step_time": 0.39517903327941895
    },
    {
      "epoch": 0.00024681396484375,
      "model_forward_time": 0.11518526077270508,
      "step": 40438
    },
    {
      "epoch": 0.00024681396484375,
      "step": 40438,
      "training_step_time": 0.38066601753234863
    },
    {
      "epoch": 0.000246820068359375,
      "model_forward_time": 0.11531686782836914,
      "step": 40439
    },
    {
      "epoch": 0.000246820068359375,
      "step": 40439,
      "training_step_time": 0.3914787769317627
    },
    {
      "epoch": 0.000246826171875,
      "grad_norm": 0.1402713507413864,
      "learning_rate": 2.6348178535517966e-05,
      "loss": 0.0449,
      "step": 40440
    },
    {
      "epoch": 0.000246826171875,
      "model_forward_time": 0.1155862808227539,
      "step": 40440
    },
    {
      "epoch": 0.000246826171875,
      "step": 40440,
      "training_step_time": 0.405900239944458
    },
    {
      "epoch": 0.000246832275390625,
      "model_forward_time": 0.11510777473449707,
      "step": 40441
    },
    {
      "epoch": 0.000246832275390625,
      "step": 40441,
      "training_step_time": 0.3953890800476074
    },
    {
      "epoch": 0.00024683837890625,
      "model_forward_time": 0.11523246765136719,
      "step": 40442
    },
    {
      "epoch": 0.00024683837890625,
      "step": 40442,
      "training_step_time": 0.3915412425994873
    },
    {
      "epoch": 0.000246844482421875,
      "model_forward_time": 0.11553740501403809,
      "step": 40443
    },
    {
      "epoch": 0.000246844482421875,
      "step": 40443,
      "training_step_time": 0.4290773868560791
    },
    {
      "epoch": 0.0002468505859375,
      "model_forward_time": 0.11500811576843262,
      "step": 40444
    },
    {
      "epoch": 0.0002468505859375,
      "step": 40444,
      "training_step_time": 0.4115900993347168
    },
    {
      "epoch": 0.000246856689453125,
      "model_forward_time": 0.11515259742736816,
      "step": 40445
    },
    {
      "epoch": 0.000246856689453125,
      "step": 40445,
      "training_step_time": 0.37796950340270996
    },
    {
      "epoch": 0.00024686279296875,
      "model_forward_time": 0.11508774757385254,
      "step": 40446
    },
    {
      "epoch": 0.00024686279296875,
      "step": 40446,
      "training_step_time": 0.4241445064544678
    },
    {
      "epoch": 0.000246868896484375,
      "model_forward_time": 0.1155695915222168,
      "step": 40447
    },
    {
      "epoch": 0.000246868896484375,
      "step": 40447,
      "training_step_time": 0.49890756607055664
    },
    {
      "epoch": 0.000246875,
      "model_forward_time": 0.1153416633605957,
      "step": 40448
    },
    {
      "epoch": 0.000246875,
      "step": 40448,
      "training_step_time": 0.41054749488830566
    },
    {
      "epoch": 0.000246881103515625,
      "model_forward_time": 0.11484742164611816,
      "step": 40449
    },
    {
      "epoch": 0.000246881103515625,
      "step": 40449,
      "training_step_time": 0.5453884601593018
    },
    {
      "epoch": 0.00024688720703125,
      "grad_norm": 0.11488134413957596,
      "learning_rate": 2.6323902499287473e-05,
      "loss": 0.0417,
      "step": 40450
    },
    {
      "epoch": 0.00024688720703125,
      "model_forward_time": 0.11476302146911621,
      "step": 40450
    },
    {
      "epoch": 0.00024688720703125,
      "step": 40450,
      "training_step_time": 0.45435619354248047
    },
    {
      "epoch": 0.000246893310546875,
      "model_forward_time": 0.11474323272705078,
      "step": 40451
    },
    {
      "epoch": 0.000246893310546875,
      "step": 40451,
      "training_step_time": 0.3988344669342041
    },
    {
      "epoch": 0.0002468994140625,
      "model_forward_time": 0.11444783210754395,
      "step": 40452
    },
    {
      "epoch": 0.0002468994140625,
      "step": 40452,
      "training_step_time": 0.38984036445617676
    },
    {
      "epoch": 0.000246905517578125,
      "model_forward_time": 0.11549901962280273,
      "step": 40453
    },
    {
      "epoch": 0.000246905517578125,
      "step": 40453,
      "training_step_time": 0.39596080780029297
    },
    {
      "epoch": 0.00024691162109375,
      "model_forward_time": 0.11478376388549805,
      "step": 40454
    },
    {
      "epoch": 0.00024691162109375,
      "step": 40454,
      "training_step_time": 0.38899755477905273
    },
    {
      "epoch": 0.000246917724609375,
      "model_forward_time": 0.11556315422058105,
      "step": 40455
    },
    {
      "epoch": 0.000246917724609375,
      "step": 40455,
      "training_step_time": 0.38068056106567383
    },
    {
      "epoch": 0.000246923828125,
      "model_forward_time": 0.11578059196472168,
      "step": 40456
    },
    {
      "epoch": 0.000246923828125,
      "step": 40456,
      "training_step_time": 0.3944535255432129
    },
    {
      "epoch": 0.000246929931640625,
      "model_forward_time": 0.11540913581848145,
      "step": 40457
    },
    {
      "epoch": 0.000246929931640625,
      "step": 40457,
      "training_step_time": 0.485001802444458
    },
    {
      "epoch": 0.00024693603515625,
      "model_forward_time": 0.11556482315063477,
      "step": 40458
    },
    {
      "epoch": 0.00024693603515625,
      "step": 40458,
      "training_step_time": 0.43851518630981445
    },
    {
      "epoch": 0.000246942138671875,
      "model_forward_time": 0.11551380157470703,
      "step": 40459
    },
    {
      "epoch": 0.000246942138671875,
      "step": 40459,
      "training_step_time": 0.4994819164276123
    },
    {
      "epoch": 0.0002469482421875,
      "grad_norm": 0.15940499305725098,
      "learning_rate": 2.629963365523031e-05,
      "loss": 0.0448,
      "step": 40460
    },
    {
      "epoch": 0.0002469482421875,
      "model_forward_time": 0.11489439010620117,
      "step": 40460
    },
    {
      "epoch": 0.0002469482421875,
      "step": 40460,
      "training_step_time": 0.3961050510406494
    },
    {
      "epoch": 0.000246954345703125,
      "model_forward_time": 0.11925387382507324,
      "step": 40461
    },
    {
      "epoch": 0.000246954345703125,
      "step": 40461,
      "training_step_time": 0.4808952808380127
    },
    {
      "epoch": 0.00024696044921875,
      "model_forward_time": 0.12137985229492188,
      "step": 40462
    },
    {
      "epoch": 0.00024696044921875,
      "step": 40462,
      "training_step_time": 0.3968691825866699
    },
    {
      "epoch": 0.000246966552734375,
      "model_forward_time": 0.11555266380310059,
      "step": 40463
    },
    {
      "epoch": 0.000246966552734375,
      "step": 40463,
      "training_step_time": 0.37793445587158203
    },
    {
      "epoch": 0.00024697265625,
      "model_forward_time": 0.11510062217712402,
      "step": 40464
    },
    {
      "epoch": 0.00024697265625,
      "step": 40464,
      "training_step_time": 0.39365339279174805
    },
    {
      "epoch": 0.000246978759765625,
      "model_forward_time": 0.11569905281066895,
      "step": 40465
    },
    {
      "epoch": 0.000246978759765625,
      "step": 40465,
      "training_step_time": 0.5062921047210693
    },
    {
      "epoch": 0.00024698486328125,
      "model_forward_time": 0.11484050750732422,
      "step": 40466
    },
    {
      "epoch": 0.00024698486328125,
      "step": 40466,
      "training_step_time": 0.39371442794799805
    },
    {
      "epoch": 0.000246990966796875,
      "model_forward_time": 0.11524248123168945,
      "step": 40467
    },
    {
      "epoch": 0.000246990966796875,
      "step": 40467,
      "training_step_time": 0.39076733589172363
    },
    {
      "epoch": 0.0002469970703125,
      "model_forward_time": 0.11434602737426758,
      "step": 40468
    },
    {
      "epoch": 0.0002469970703125,
      "step": 40468,
      "training_step_time": 0.3929712772369385
    },
    {
      "epoch": 0.000247003173828125,
      "model_forward_time": 0.1154015064239502,
      "step": 40469
    },
    {
      "epoch": 0.000247003173828125,
      "step": 40469,
      "training_step_time": 0.3911275863647461
    },
    {
      "epoch": 0.00024700927734375,
      "grad_norm": 0.12699584662914276,
      "learning_rate": 2.6275372010718635e-05,
      "loss": 0.0385,
      "step": 40470
    },
    {
      "epoch": 0.00024700927734375,
      "model_forward_time": 0.11585664749145508,
      "step": 40470
    },
    {
      "epoch": 0.00024700927734375,
      "step": 40470,
      "training_step_time": 0.4022841453552246
    },
    {
      "epoch": 0.000247015380859375,
      "model_forward_time": 0.11657977104187012,
      "step": 40471
    },
    {
      "epoch": 0.000247015380859375,
      "step": 40471,
      "training_step_time": 0.3919999599456787
    },
    {
      "epoch": 0.000247021484375,
      "model_forward_time": 0.11590743064880371,
      "step": 40472
    },
    {
      "epoch": 0.000247021484375,
      "step": 40472,
      "training_step_time": 0.4863448143005371
    },
    {
      "epoch": 0.000247027587890625,
      "model_forward_time": 0.11498904228210449,
      "step": 40473
    },
    {
      "epoch": 0.000247027587890625,
      "step": 40473,
      "training_step_time": 0.45473480224609375
    },
    {
      "epoch": 0.00024703369140625,
      "model_forward_time": 0.11693859100341797,
      "step": 40474
    },
    {
      "epoch": 0.00024703369140625,
      "step": 40474,
      "training_step_time": 0.4936814308166504
    },
    {
      "epoch": 0.000247039794921875,
      "model_forward_time": 0.1175851821899414,
      "step": 40475
    },
    {
      "epoch": 0.000247039794921875,
      "step": 40475,
      "training_step_time": 0.4740254878997803
    },
    {
      "epoch": 0.0002470458984375,
      "model_forward_time": 0.11710596084594727,
      "step": 40476
    },
    {
      "epoch": 0.0002470458984375,
      "step": 40476,
      "training_step_time": 0.4430527687072754
    },
    {
      "epoch": 0.000247052001953125,
      "model_forward_time": 0.11854672431945801,
      "step": 40477
    },
    {
      "epoch": 0.000247052001953125,
      "step": 40477,
      "training_step_time": 0.3856995105743408
    },
    {
      "epoch": 0.00024705810546875,
      "model_forward_time": 0.11504507064819336,
      "step": 40478
    },
    {
      "epoch": 0.00024705810546875,
      "step": 40478,
      "training_step_time": 0.38065457344055176
    },
    {
      "epoch": 0.000247064208984375,
      "model_forward_time": 0.11528968811035156,
      "step": 40479
    },
    {
      "epoch": 0.000247064208984375,
      "step": 40479,
      "training_step_time": 0.4542219638824463
    },
    {
      "epoch": 0.0002470703125,
      "grad_norm": 0.09760888665914536,
      "learning_rate": 2.6251117573122563e-05,
      "loss": 0.0415,
      "step": 40480
    },
    {
      "epoch": 0.0002470703125,
      "model_forward_time": 0.11514163017272949,
      "step": 40480
    },
    {
      "epoch": 0.0002470703125,
      "step": 40480,
      "training_step_time": 0.3914783000946045
    },
    {
      "epoch": 0.000247076416015625,
      "model_forward_time": 0.11521148681640625,
      "step": 40481
    },
    {
      "epoch": 0.000247076416015625,
      "step": 40481,
      "training_step_time": 0.39403486251831055
    },
    {
      "epoch": 0.00024708251953125,
      "model_forward_time": 0.11502909660339355,
      "step": 40482
    },
    {
      "epoch": 0.00024708251953125,
      "step": 40482,
      "training_step_time": 0.39604854583740234
    },
    {
      "epoch": 0.000247088623046875,
      "model_forward_time": 0.11528992652893066,
      "step": 40483
    },
    {
      "epoch": 0.000247088623046875,
      "step": 40483,
      "training_step_time": 0.3908975124359131
    },
    {
      "epoch": 0.0002470947265625,
      "model_forward_time": 0.11516308784484863,
      "step": 40484
    },
    {
      "epoch": 0.0002470947265625,
      "step": 40484,
      "training_step_time": 0.3923380374908447
    },
    {
      "epoch": 0.000247100830078125,
      "model_forward_time": 0.11567831039428711,
      "step": 40485
    },
    {
      "epoch": 0.000247100830078125,
      "step": 40485,
      "training_step_time": 0.3962864875793457
    },
    {
      "epoch": 0.00024710693359375,
      "model_forward_time": 0.11506080627441406,
      "step": 40486
    },
    {
      "epoch": 0.00024710693359375,
      "step": 40486,
      "training_step_time": 0.3994009494781494
    },
    {
      "epoch": 0.000247113037109375,
      "model_forward_time": 0.11556291580200195,
      "step": 40487
    },
    {
      "epoch": 0.000247113037109375,
      "step": 40487,
      "training_step_time": 0.4652674198150635
    },
    {
      "epoch": 0.000247119140625,
      "model_forward_time": 0.11502885818481445,
      "step": 40488
    },
    {
      "epoch": 0.000247119140625,
      "step": 40488,
      "training_step_time": 0.411456823348999
    },
    {
      "epoch": 0.000247125244140625,
      "model_forward_time": 0.11546564102172852,
      "step": 40489
    },
    {
      "epoch": 0.000247125244140625,
      "step": 40489,
      "training_step_time": 0.489238977432251
    },
    {
      "epoch": 0.00024713134765625,
      "grad_norm": 0.09663919359445572,
      "learning_rate": 2.6226870349809885e-05,
      "loss": 0.0364,
      "step": 40490
    },
    {
      "epoch": 0.00024713134765625,
      "model_forward_time": 0.11652112007141113,
      "step": 40490
    },
    {
      "epoch": 0.00024713134765625,
      "step": 40490,
      "training_step_time": 0.5132787227630615
    },
    {
      "epoch": 0.000247137451171875,
      "model_forward_time": 0.1155250072479248,
      "step": 40491
    },
    {
      "epoch": 0.000247137451171875,
      "step": 40491,
      "training_step_time": 0.3916809558868408
    },
    {
      "epoch": 0.0002471435546875,
      "model_forward_time": 0.11456680297851562,
      "step": 40492
    },
    {
      "epoch": 0.0002471435546875,
      "step": 40492,
      "training_step_time": 0.4590792655944824
    },
    {
      "epoch": 0.000247149658203125,
      "model_forward_time": 0.11515641212463379,
      "step": 40493
    },
    {
      "epoch": 0.000247149658203125,
      "step": 40493,
      "training_step_time": 0.4048805236816406
    },
    {
      "epoch": 0.00024715576171875,
      "model_forward_time": 0.1150503158569336,
      "step": 40494
    },
    {
      "epoch": 0.00024715576171875,
      "step": 40494,
      "training_step_time": 0.3964071273803711
    },
    {
      "epoch": 0.000247161865234375,
      "model_forward_time": 0.11480093002319336,
      "step": 40495
    },
    {
      "epoch": 0.000247161865234375,
      "step": 40495,
      "training_step_time": 0.38973236083984375
    },
    {
      "epoch": 0.00024716796875,
      "model_forward_time": 0.11568379402160645,
      "step": 40496
    },
    {
      "epoch": 0.00024716796875,
      "step": 40496,
      "training_step_time": 0.39579081535339355
    },
    {
      "epoch": 0.000247174072265625,
      "model_forward_time": 0.11432099342346191,
      "step": 40497
    },
    {
      "epoch": 0.000247174072265625,
      "step": 40497,
      "training_step_time": 0.3905913829803467
    },
    {
      "epoch": 0.00024718017578125,
      "model_forward_time": 0.11550688743591309,
      "step": 40498
    },
    {
      "epoch": 0.00024718017578125,
      "step": 40498,
      "training_step_time": 0.3966183662414551
    },
    {
      "epoch": 0.000247186279296875,
      "model_forward_time": 0.11518096923828125,
      "step": 40499
    },
    {
      "epoch": 0.000247186279296875,
      "step": 40499,
      "training_step_time": 0.40683889389038086
    },
    {
      "epoch": 0.0002471923828125,
      "grad_norm": 0.12004134804010391,
      "learning_rate": 2.6202630348146324e-05,
      "loss": 0.0404,
      "step": 40500
    },
    {
      "epoch": 0.0002471923828125,
      "model_forward_time": 0.11505389213562012,
      "step": 40500
    },
    {
      "epoch": 0.0002471923828125,
      "step": 40500,
      "training_step_time": 0.3981058597564697
    },
    {
      "epoch": 0.000247198486328125,
      "model_forward_time": 0.11549997329711914,
      "step": 40501
    },
    {
      "epoch": 0.000247198486328125,
      "step": 40501,
      "training_step_time": 0.3955080509185791
    },
    {
      "epoch": 0.00024720458984375,
      "model_forward_time": 0.11539816856384277,
      "step": 40502
    },
    {
      "epoch": 0.00024720458984375,
      "step": 40502,
      "training_step_time": 0.4635963439941406
    },
    {
      "epoch": 0.000247210693359375,
      "model_forward_time": 0.11502265930175781,
      "step": 40503
    },
    {
      "epoch": 0.000247210693359375,
      "step": 40503,
      "training_step_time": 0.3670032024383545
    },
    {
      "epoch": 0.000247216796875,
      "model_forward_time": 0.1147618293762207,
      "step": 40504
    },
    {
      "epoch": 0.000247216796875,
      "step": 40504,
      "training_step_time": 0.46187543869018555
    },
    {
      "epoch": 0.000247222900390625,
      "model_forward_time": 0.11511421203613281,
      "step": 40505
    },
    {
      "epoch": 0.000247222900390625,
      "step": 40505,
      "training_step_time": 0.4108269214630127
    },
    {
      "epoch": 0.00024722900390625,
      "model_forward_time": 0.11458110809326172,
      "step": 40506
    },
    {
      "epoch": 0.00024722900390625,
      "step": 40506,
      "training_step_time": 0.39073777198791504
    },
    {
      "epoch": 0.000247235107421875,
      "model_forward_time": 0.11463308334350586,
      "step": 40507
    },
    {
      "epoch": 0.000247235107421875,
      "step": 40507,
      "training_step_time": 0.4185764789581299
    },
    {
      "epoch": 0.0002472412109375,
      "model_forward_time": 0.11443138122558594,
      "step": 40508
    },
    {
      "epoch": 0.0002472412109375,
      "step": 40508,
      "training_step_time": 0.41101503372192383
    },
    {
      "epoch": 0.000247247314453125,
      "model_forward_time": 0.11492109298706055,
      "step": 40509
    },
    {
      "epoch": 0.000247247314453125,
      "step": 40509,
      "training_step_time": 0.3963336944580078
    },
    {
      "epoch": 0.00024725341796875,
      "grad_norm": 0.11362861096858978,
      "learning_rate": 2.6178397575495328e-05,
      "loss": 0.0429,
      "step": 40510
    },
    {
      "epoch": 0.00024725341796875,
      "model_forward_time": 0.11402750015258789,
      "step": 40510
    },
    {
      "epoch": 0.00024725341796875,
      "step": 40510,
      "training_step_time": 0.40133070945739746
    },
    {
      "epoch": 0.000247259521484375,
      "model_forward_time": 0.11507201194763184,
      "step": 40511
    },
    {
      "epoch": 0.000247259521484375,
      "step": 40511,
      "training_step_time": 0.39789485931396484
    },
    {
      "epoch": 0.000247265625,
      "model_forward_time": 0.11501836776733398,
      "step": 40512
    },
    {
      "epoch": 0.000247265625,
      "step": 40512,
      "training_step_time": 0.39630866050720215
    },
    {
      "epoch": 0.000247271728515625,
      "model_forward_time": 0.1148838996887207,
      "step": 40513
    },
    {
      "epoch": 0.000247271728515625,
      "step": 40513,
      "training_step_time": 0.4054074287414551
    },
    {
      "epoch": 0.00024727783203125,
      "model_forward_time": 0.11513161659240723,
      "step": 40514
    },
    {
      "epoch": 0.00024727783203125,
      "step": 40514,
      "training_step_time": 0.3872189521789551
    },
    {
      "epoch": 0.000247283935546875,
      "model_forward_time": 0.11474609375,
      "step": 40515
    },
    {
      "epoch": 0.000247283935546875,
      "step": 40515,
      "training_step_time": 0.5624563694000244
    },
    {
      "epoch": 0.0002472900390625,
      "model_forward_time": 0.11442852020263672,
      "step": 40516
    },
    {
      "epoch": 0.0002472900390625,
      "step": 40516,
      "training_step_time": 0.4399135112762451
    },
    {
      "epoch": 0.000247296142578125,
      "model_forward_time": 0.11453890800476074,
      "step": 40517
    },
    {
      "epoch": 0.000247296142578125,
      "step": 40517,
      "training_step_time": 0.40604352951049805
    },
    {
      "epoch": 0.00024730224609375,
      "model_forward_time": 0.11504244804382324,
      "step": 40518
    },
    {
      "epoch": 0.00024730224609375,
      "step": 40518,
      "training_step_time": 0.5103476047515869
    },
    {
      "epoch": 0.000247308349609375,
      "model_forward_time": 0.11495399475097656,
      "step": 40519
    },
    {
      "epoch": 0.000247308349609375,
      "step": 40519,
      "training_step_time": 0.5018308162689209
    },
    {
      "epoch": 0.000247314453125,
      "grad_norm": 0.16091737151145935,
      "learning_rate": 2.6154172039218172e-05,
      "loss": 0.0408,
      "step": 40520
    },
    {
      "epoch": 0.000247314453125,
      "model_forward_time": 0.11473822593688965,
      "step": 40520
    },
    {
      "epoch": 0.000247314453125,
      "step": 40520,
      "training_step_time": 0.4568972587585449
    },
    {
      "epoch": 0.000247320556640625,
      "model_forward_time": 0.11463212966918945,
      "step": 40521
    },
    {
      "epoch": 0.000247320556640625,
      "step": 40521,
      "training_step_time": 0.4308762550354004
    },
    {
      "epoch": 0.00024732666015625,
      "model_forward_time": 0.11460423469543457,
      "step": 40522
    },
    {
      "epoch": 0.00024732666015625,
      "step": 40522,
      "training_step_time": 0.3845188617706299
    },
    {
      "epoch": 0.000247332763671875,
      "model_forward_time": 0.11472916603088379,
      "step": 40523
    },
    {
      "epoch": 0.000247332763671875,
      "step": 40523,
      "training_step_time": 0.3798346519470215
    },
    {
      "epoch": 0.0002473388671875,
      "model_forward_time": 0.11422896385192871,
      "step": 40524
    },
    {
      "epoch": 0.0002473388671875,
      "step": 40524,
      "training_step_time": 0.385589599609375
    },
    {
      "epoch": 0.000247344970703125,
      "model_forward_time": 0.11654281616210938,
      "step": 40525
    },
    {
      "epoch": 0.000247344970703125,
      "step": 40525,
      "training_step_time": 0.3948550224304199
    },
    {
      "epoch": 0.00024735107421875,
      "model_forward_time": 0.11499762535095215,
      "step": 40526
    },
    {
      "epoch": 0.00024735107421875,
      "step": 40526,
      "training_step_time": 0.3960897922515869
    },
    {
      "epoch": 0.000247357177734375,
      "model_forward_time": 0.11503410339355469,
      "step": 40527
    },
    {
      "epoch": 0.000247357177734375,
      "step": 40527,
      "training_step_time": 0.41738319396972656
    },
    {
      "epoch": 0.00024736328125,
      "model_forward_time": 0.11528611183166504,
      "step": 40528
    },
    {
      "epoch": 0.00024736328125,
      "step": 40528,
      "training_step_time": 0.39870357513427734
    },
    {
      "epoch": 0.000247369384765625,
      "model_forward_time": 0.11561894416809082,
      "step": 40529
    },
    {
      "epoch": 0.000247369384765625,
      "step": 40529,
      "training_step_time": 0.4049222469329834
    },
    {
      "epoch": 0.00024737548828125,
      "grad_norm": 0.11635586619377136,
      "learning_rate": 2.612995374667394e-05,
      "loss": 0.0407,
      "step": 40530
    },
    {
      "epoch": 0.00024737548828125,
      "model_forward_time": 0.11507797241210938,
      "step": 40530
    },
    {
      "epoch": 0.00024737548828125,
      "step": 40530,
      "training_step_time": 0.45576047897338867
    },
    {
      "epoch": 0.000247381591796875,
      "model_forward_time": 0.1153254508972168,
      "step": 40531
    },
    {
      "epoch": 0.000247381591796875,
      "step": 40531,
      "training_step_time": 0.41975998878479004
    },
    {
      "epoch": 0.0002473876953125,
      "model_forward_time": 0.11444640159606934,
      "step": 40532
    },
    {
      "epoch": 0.0002473876953125,
      "step": 40532,
      "training_step_time": 0.44407081604003906
    },
    {
      "epoch": 0.000247393798828125,
      "model_forward_time": 0.11494302749633789,
      "step": 40533
    },
    {
      "epoch": 0.000247393798828125,
      "step": 40533,
      "training_step_time": 0.5184605121612549
    },
    {
      "epoch": 0.00024739990234375,
      "model_forward_time": 0.11519336700439453,
      "step": 40534
    },
    {
      "epoch": 0.00024739990234375,
      "step": 40534,
      "training_step_time": 0.4614429473876953
    },
    {
      "epoch": 0.000247406005859375,
      "model_forward_time": 0.1152801513671875,
      "step": 40535
    },
    {
      "epoch": 0.000247406005859375,
      "step": 40535,
      "training_step_time": 0.41135716438293457
    },
    {
      "epoch": 0.000247412109375,
      "model_forward_time": 0.11489105224609375,
      "step": 40536
    },
    {
      "epoch": 0.000247412109375,
      "step": 40536,
      "training_step_time": 0.47965073585510254
    },
    {
      "epoch": 0.000247418212890625,
      "model_forward_time": 0.11469435691833496,
      "step": 40537
    },
    {
      "epoch": 0.000247418212890625,
      "step": 40537,
      "training_step_time": 0.3973405361175537
    },
    {
      "epoch": 0.00024742431640625,
      "model_forward_time": 0.11470675468444824,
      "step": 40538
    },
    {
      "epoch": 0.00024742431640625,
      "step": 40538,
      "training_step_time": 0.4016544818878174
    },
    {
      "epoch": 0.000247430419921875,
      "model_forward_time": 0.11460041999816895,
      "step": 40539
    },
    {
      "epoch": 0.000247430419921875,
      "step": 40539,
      "training_step_time": 0.39614224433898926
    },
    {
      "epoch": 0.0002474365234375,
      "grad_norm": 0.08693865686655045,
      "learning_rate": 2.6105742705219515e-05,
      "loss": 0.0367,
      "step": 40540
    },
    {
      "epoch": 0.0002474365234375,
      "model_forward_time": 0.11487126350402832,
      "step": 40540
    },
    {
      "epoch": 0.0002474365234375,
      "step": 40540,
      "training_step_time": 0.3972001075744629
    },
    {
      "epoch": 0.000247442626953125,
      "model_forward_time": 0.11560893058776855,
      "step": 40541
    },
    {
      "epoch": 0.000247442626953125,
      "step": 40541,
      "training_step_time": 0.3962833881378174
    },
    {
      "epoch": 0.00024744873046875,
      "model_forward_time": 0.1150672435760498,
      "step": 40542
    },
    {
      "epoch": 0.00024744873046875,
      "step": 40542,
      "training_step_time": 0.513829231262207
    },
    {
      "epoch": 0.000247454833984375,
      "model_forward_time": 0.11437630653381348,
      "step": 40543
    },
    {
      "epoch": 0.000247454833984375,
      "step": 40543,
      "training_step_time": 0.39283156394958496
    },
    {
      "epoch": 0.0002474609375,
      "model_forward_time": 0.1152944564819336,
      "step": 40544
    },
    {
      "epoch": 0.0002474609375,
      "step": 40544,
      "training_step_time": 0.39498233795166016
    },
    {
      "epoch": 0.000247467041015625,
      "model_forward_time": 0.11498713493347168,
      "step": 40545
    },
    {
      "epoch": 0.000247467041015625,
      "step": 40545,
      "training_step_time": 0.45766711235046387
    },
    {
      "epoch": 0.00024747314453125,
      "model_forward_time": 0.11499357223510742,
      "step": 40546
    },
    {
      "epoch": 0.00024747314453125,
      "step": 40546,
      "training_step_time": 0.39475107192993164
    },
    {
      "epoch": 0.000247479248046875,
      "model_forward_time": 0.11678028106689453,
      "step": 40547
    },
    {
      "epoch": 0.000247479248046875,
      "step": 40547,
      "training_step_time": 0.44845128059387207
    },
    {
      "epoch": 0.0002474853515625,
      "model_forward_time": 0.11562919616699219,
      "step": 40548
    },
    {
      "epoch": 0.0002474853515625,
      "step": 40548,
      "training_step_time": 0.5106954574584961
    },
    {
      "epoch": 0.000247491455078125,
      "model_forward_time": 0.11461925506591797,
      "step": 40549
    },
    {
      "epoch": 0.000247491455078125,
      "step": 40549,
      "training_step_time": 0.4431331157684326
    },
    {
      "epoch": 0.00024749755859375,
      "grad_norm": 0.10116781294345856,
      "learning_rate": 2.6081538922209535e-05,
      "loss": 0.0367,
      "step": 40550
    },
    {
      "epoch": 0.00024749755859375,
      "model_forward_time": 0.11499643325805664,
      "step": 40550
    },
    {
      "epoch": 0.00024749755859375,
      "step": 40550,
      "training_step_time": 0.39951658248901367
    },
    {
      "epoch": 0.000247503662109375,
      "model_forward_time": 0.11548614501953125,
      "step": 40551
    },
    {
      "epoch": 0.000247503662109375,
      "step": 40551,
      "training_step_time": 0.399322509765625
    },
    {
      "epoch": 0.000247509765625,
      "model_forward_time": 0.11527800559997559,
      "step": 40552
    },
    {
      "epoch": 0.000247509765625,
      "step": 40552,
      "training_step_time": 0.3885800838470459
    },
    {
      "epoch": 0.000247515869140625,
      "model_forward_time": 0.11575889587402344,
      "step": 40553
    },
    {
      "epoch": 0.000247515869140625,
      "step": 40553,
      "training_step_time": 0.3908193111419678
    },
    {
      "epoch": 0.00024752197265625,
      "model_forward_time": 0.11516642570495605,
      "step": 40554
    },
    {
      "epoch": 0.00024752197265625,
      "step": 40554,
      "training_step_time": 0.5075135231018066
    },
    {
      "epoch": 0.000247528076171875,
      "model_forward_time": 0.11504054069519043,
      "step": 40555
    },
    {
      "epoch": 0.000247528076171875,
      "step": 40555,
      "training_step_time": 0.3901557922363281
    },
    {
      "epoch": 0.0002475341796875,
      "model_forward_time": 0.11537718772888184,
      "step": 40556
    },
    {
      "epoch": 0.0002475341796875,
      "step": 40556,
      "training_step_time": 0.40758442878723145
    },
    {
      "epoch": 0.000247540283203125,
      "model_forward_time": 0.11478829383850098,
      "step": 40557
    },
    {
      "epoch": 0.000247540283203125,
      "step": 40557,
      "training_step_time": 0.4661705493927002
    },
    {
      "epoch": 0.00024754638671875,
      "model_forward_time": 0.11487770080566406,
      "step": 40558
    },
    {
      "epoch": 0.00024754638671875,
      "step": 40558,
      "training_step_time": 0.3983185291290283
    },
    {
      "epoch": 0.000247552490234375,
      "model_forward_time": 0.11701703071594238,
      "step": 40559
    },
    {
      "epoch": 0.000247552490234375,
      "step": 40559,
      "training_step_time": 0.46652865409851074
    },
    {
      "epoch": 0.00024755859375,
      "grad_norm": 0.14180423319339752,
      "learning_rate": 2.6057342404996522e-05,
      "loss": 0.0399,
      "step": 40560
    },
    {
      "epoch": 0.00024755859375,
      "model_forward_time": 0.11470842361450195,
      "step": 40560
    },
    {
      "epoch": 0.00024755859375,
      "step": 40560,
      "training_step_time": 0.48174190521240234
    },
    {
      "epoch": 0.000247564697265625,
      "model_forward_time": 0.1152195930480957,
      "step": 40561
    },
    {
      "epoch": 0.000247564697265625,
      "step": 40561,
      "training_step_time": 0.43836045265197754
    },
    {
      "epoch": 0.00024757080078125,
      "model_forward_time": 0.11494278907775879,
      "step": 40562
    },
    {
      "epoch": 0.00024757080078125,
      "step": 40562,
      "training_step_time": 0.45507097244262695
    },
    {
      "epoch": 0.000247576904296875,
      "model_forward_time": 0.11503243446350098,
      "step": 40563
    },
    {
      "epoch": 0.000247576904296875,
      "step": 40563,
      "training_step_time": 0.4248204231262207
    },
    {
      "epoch": 0.0002475830078125,
      "model_forward_time": 0.11543917655944824,
      "step": 40564
    },
    {
      "epoch": 0.0002475830078125,
      "step": 40564,
      "training_step_time": 0.3959510326385498
    },
    {
      "epoch": 0.000247589111328125,
      "model_forward_time": 0.11480474472045898,
      "step": 40565
    },
    {
      "epoch": 0.000247589111328125,
      "step": 40565,
      "training_step_time": 0.4058060646057129
    },
    {
      "epoch": 0.00024759521484375,
      "model_forward_time": 0.11577796936035156,
      "step": 40566
    },
    {
      "epoch": 0.00024759521484375,
      "step": 40566,
      "training_step_time": 0.5119266510009766
    },
    {
      "epoch": 0.000247601318359375,
      "model_forward_time": 0.1147773265838623,
      "step": 40567
    },
    {
      "epoch": 0.000247601318359375,
      "step": 40567,
      "training_step_time": 0.39352989196777344
    },
    {
      "epoch": 0.000247607421875,
      "model_forward_time": 0.11481785774230957,
      "step": 40568
    },
    {
      "epoch": 0.000247607421875,
      "step": 40568,
      "training_step_time": 0.3974795341491699
    },
    {
      "epoch": 0.000247613525390625,
      "model_forward_time": 0.11555099487304688,
      "step": 40569
    },
    {
      "epoch": 0.000247613525390625,
      "step": 40569,
      "training_step_time": 0.43530797958374023
    },
    {
      "epoch": 0.00024761962890625,
      "grad_norm": 0.11573775857686996,
      "learning_rate": 2.6033153160930722e-05,
      "loss": 0.0368,
      "step": 40570
    },
    {
      "epoch": 0.00024761962890625,
      "model_forward_time": 0.11541438102722168,
      "step": 40570
    },
    {
      "epoch": 0.00024761962890625,
      "step": 40570,
      "training_step_time": 0.44779157638549805
    },
    {
      "epoch": 0.000247625732421875,
      "model_forward_time": 0.1152200698852539,
      "step": 40571
    },
    {
      "epoch": 0.000247625732421875,
      "step": 40571,
      "training_step_time": 0.3962216377258301
    },
    {
      "epoch": 0.0002476318359375,
      "model_forward_time": 0.1144564151763916,
      "step": 40572
    },
    {
      "epoch": 0.0002476318359375,
      "step": 40572,
      "training_step_time": 0.6410071849822998
    },
    {
      "epoch": 0.000247637939453125,
      "model_forward_time": 0.11486577987670898,
      "step": 40573
    },
    {
      "epoch": 0.000247637939453125,
      "step": 40573,
      "training_step_time": 0.39326047897338867
    },
    {
      "epoch": 0.00024764404296875,
      "model_forward_time": 0.11550188064575195,
      "step": 40574
    },
    {
      "epoch": 0.00024764404296875,
      "step": 40574,
      "training_step_time": 0.41405248641967773
    },
    {
      "epoch": 0.000247650146484375,
      "model_forward_time": 0.11443066596984863,
      "step": 40575
    },
    {
      "epoch": 0.000247650146484375,
      "step": 40575,
      "training_step_time": 0.4594848155975342
    },
    {
      "epoch": 0.00024765625,
      "model_forward_time": 0.11549210548400879,
      "step": 40576
    },
    {
      "epoch": 0.00024765625,
      "step": 40576,
      "training_step_time": 0.5102169513702393
    },
    {
      "epoch": 0.000247662353515625,
      "model_forward_time": 0.11462640762329102,
      "step": 40577
    },
    {
      "epoch": 0.000247662353515625,
      "step": 40577,
      "training_step_time": 0.4520597457885742
    },
    {
      "epoch": 0.00024766845703125,
      "model_forward_time": 0.11571311950683594,
      "step": 40578
    },
    {
      "epoch": 0.00024766845703125,
      "step": 40578,
      "training_step_time": 0.4460940361022949
    },
    {
      "epoch": 0.000247674560546875,
      "model_forward_time": 0.11514806747436523,
      "step": 40579
    },
    {
      "epoch": 0.000247674560546875,
      "step": 40579,
      "training_step_time": 0.39450526237487793
    },
    {
      "epoch": 0.0002476806640625,
      "grad_norm": 0.12177682667970657,
      "learning_rate": 2.6008971197360176e-05,
      "loss": 0.0417,
      "step": 40580
    },
    {
      "epoch": 0.0002476806640625,
      "model_forward_time": 0.11513471603393555,
      "step": 40580
    },
    {
      "epoch": 0.0002476806640625,
      "step": 40580,
      "training_step_time": 0.40366458892822266
    },
    {
      "epoch": 0.000247686767578125,
      "model_forward_time": 0.11550092697143555,
      "step": 40581
    },
    {
      "epoch": 0.000247686767578125,
      "step": 40581,
      "training_step_time": 0.399489164352417
    },
    {
      "epoch": 0.00024769287109375,
      "model_forward_time": 0.11487889289855957,
      "step": 40582
    },
    {
      "epoch": 0.00024769287109375,
      "step": 40582,
      "training_step_time": 0.3841218948364258
    },
    {
      "epoch": 0.000247698974609375,
      "model_forward_time": 0.11520004272460938,
      "step": 40583
    },
    {
      "epoch": 0.000247698974609375,
      "step": 40583,
      "training_step_time": 0.3994293212890625
    },
    {
      "epoch": 0.000247705078125,
      "model_forward_time": 0.11563467979431152,
      "step": 40584
    },
    {
      "epoch": 0.000247705078125,
      "step": 40584,
      "training_step_time": 0.48868274688720703
    },
    {
      "epoch": 0.000247711181640625,
      "model_forward_time": 0.11521172523498535,
      "step": 40585
    },
    {
      "epoch": 0.000247711181640625,
      "step": 40585,
      "training_step_time": 0.4070444107055664
    },
    {
      "epoch": 0.00024771728515625,
      "model_forward_time": 0.1147165298461914,
      "step": 40586
    },
    {
      "epoch": 0.00024771728515625,
      "step": 40586,
      "training_step_time": 0.4041774272918701
    },
    {
      "epoch": 0.000247723388671875,
      "model_forward_time": 0.11605405807495117,
      "step": 40587
    },
    {
      "epoch": 0.000247723388671875,
      "step": 40587,
      "training_step_time": 0.4149200916290283
    },
    {
      "epoch": 0.0002477294921875,
      "model_forward_time": 0.11542916297912598,
      "step": 40588
    },
    {
      "epoch": 0.0002477294921875,
      "step": 40588,
      "training_step_time": 0.41548728942871094
    },
    {
      "epoch": 0.000247735595703125,
      "model_forward_time": 0.11526775360107422,
      "step": 40589
    },
    {
      "epoch": 0.000247735595703125,
      "step": 40589,
      "training_step_time": 0.3676636219024658
    },
    {
      "epoch": 0.00024774169921875,
      "grad_norm": 0.10638294368982315,
      "learning_rate": 2.5984796521630737e-05,
      "loss": 0.043,
      "step": 40590
    },
    {
      "epoch": 0.00024774169921875,
      "model_forward_time": 0.11549139022827148,
      "step": 40590
    },
    {
      "epoch": 0.00024774169921875,
      "step": 40590,
      "training_step_time": 0.46170997619628906
    },
    {
      "epoch": 0.000247747802734375,
      "model_forward_time": 0.11491870880126953,
      "step": 40591
    },
    {
      "epoch": 0.000247747802734375,
      "step": 40591,
      "training_step_time": 0.46127772331237793
    },
    {
      "epoch": 0.00024775390625,
      "model_forward_time": 0.11542296409606934,
      "step": 40592
    },
    {
      "epoch": 0.00024775390625,
      "step": 40592,
      "training_step_time": 0.4026031494140625
    },
    {
      "epoch": 0.000247760009765625,
      "model_forward_time": 0.11530375480651855,
      "step": 40593
    },
    {
      "epoch": 0.000247760009765625,
      "step": 40593,
      "training_step_time": 0.3909919261932373
    },
    {
      "epoch": 0.00024776611328125,
      "model_forward_time": 0.11515998840332031,
      "step": 40594
    },
    {
      "epoch": 0.00024776611328125,
      "step": 40594,
      "training_step_time": 0.38706421852111816
    },
    {
      "epoch": 0.000247772216796875,
      "model_forward_time": 0.1152656078338623,
      "step": 40595
    },
    {
      "epoch": 0.000247772216796875,
      "step": 40595,
      "training_step_time": 0.3956012725830078
    },
    {
      "epoch": 0.0002477783203125,
      "model_forward_time": 0.11549234390258789,
      "step": 40596
    },
    {
      "epoch": 0.0002477783203125,
      "step": 40596,
      "training_step_time": 0.544144868850708
    },
    {
      "epoch": 0.000247784423828125,
      "model_forward_time": 0.11518478393554688,
      "step": 40597
    },
    {
      "epoch": 0.000247784423828125,
      "step": 40597,
      "training_step_time": 0.40883398056030273
    },
    {
      "epoch": 0.00024779052734375,
      "model_forward_time": 0.11508703231811523,
      "step": 40598
    },
    {
      "epoch": 0.00024779052734375,
      "step": 40598,
      "training_step_time": 0.3933892250061035
    },
    {
      "epoch": 0.000247796630859375,
      "model_forward_time": 0.11546993255615234,
      "step": 40599
    },
    {
      "epoch": 0.000247796630859375,
      "step": 40599,
      "training_step_time": 0.38599348068237305
    },
    {
      "epoch": 0.000247802734375,
      "grad_norm": 0.10696757584810257,
      "learning_rate": 2.5960629141086012e-05,
      "loss": 0.0393,
      "step": 40600
    },
    {
      "epoch": 0.000247802734375,
      "model_forward_time": 0.11526155471801758,
      "step": 40600
    },
    {
      "epoch": 0.000247802734375,
      "step": 40600,
      "training_step_time": 0.3947136402130127
    },
    {
      "epoch": 0.000247808837890625,
      "model_forward_time": 0.11598944664001465,
      "step": 40601
    },
    {
      "epoch": 0.000247808837890625,
      "step": 40601,
      "training_step_time": 0.39173364639282227
    },
    {
      "epoch": 0.00024781494140625,
      "model_forward_time": 0.11499524116516113,
      "step": 40602
    },
    {
      "epoch": 0.00024781494140625,
      "step": 40602,
      "training_step_time": 0.605402946472168
    },
    {
      "epoch": 0.000247821044921875,
      "model_forward_time": 0.11487174034118652,
      "step": 40603
    },
    {
      "epoch": 0.000247821044921875,
      "step": 40603,
      "training_step_time": 0.4846057891845703
    },
    {
      "epoch": 0.0002478271484375,
      "model_forward_time": 0.11450028419494629,
      "step": 40604
    },
    {
      "epoch": 0.0002478271484375,
      "step": 40604,
      "training_step_time": 0.47010135650634766
    },
    {
      "epoch": 0.000247833251953125,
      "model_forward_time": 0.11555790901184082,
      "step": 40605
    },
    {
      "epoch": 0.000247833251953125,
      "step": 40605,
      "training_step_time": 0.4095165729522705
    },
    {
      "epoch": 0.00024783935546875,
      "model_forward_time": 0.11483979225158691,
      "step": 40606
    },
    {
      "epoch": 0.00024783935546875,
      "step": 40606,
      "training_step_time": 0.38498449325561523
    },
    {
      "epoch": 0.000247845458984375,
      "model_forward_time": 0.11478567123413086,
      "step": 40607
    },
    {
      "epoch": 0.000247845458984375,
      "step": 40607,
      "training_step_time": 0.3937723636627197
    },
    {
      "epoch": 0.0002478515625,
      "model_forward_time": 0.11501193046569824,
      "step": 40608
    },
    {
      "epoch": 0.0002478515625,
      "step": 40608,
      "training_step_time": 0.40688085556030273
    },
    {
      "epoch": 0.000247857666015625,
      "model_forward_time": 0.11515927314758301,
      "step": 40609
    },
    {
      "epoch": 0.000247857666015625,
      "step": 40609,
      "training_step_time": 0.37868595123291016
    },
    {
      "epoch": 0.00024786376953125,
      "grad_norm": 0.1458144187927246,
      "learning_rate": 2.593646906306747e-05,
      "loss": 0.0341,
      "step": 40610
    },
    {
      "epoch": 0.00024786376953125,
      "model_forward_time": 0.11548590660095215,
      "step": 40610
    },
    {
      "epoch": 0.00024786376953125,
      "step": 40610,
      "training_step_time": 0.44477081298828125
    },
    {
      "epoch": 0.000247869873046875,
      "model_forward_time": 0.11515998840332031,
      "step": 40611
    },
    {
      "epoch": 0.000247869873046875,
      "step": 40611,
      "training_step_time": 0.42593955993652344
    },
    {
      "epoch": 0.0002478759765625,
      "model_forward_time": 0.1155858039855957,
      "step": 40612
    },
    {
      "epoch": 0.0002478759765625,
      "step": 40612,
      "training_step_time": 0.39308619499206543
    },
    {
      "epoch": 0.000247882080078125,
      "model_forward_time": 0.11518621444702148,
      "step": 40613
    },
    {
      "epoch": 0.000247882080078125,
      "step": 40613,
      "training_step_time": 0.39849090576171875
    },
    {
      "epoch": 0.00024788818359375,
      "model_forward_time": 0.11554551124572754,
      "step": 40614
    },
    {
      "epoch": 0.00024788818359375,
      "step": 40614,
      "training_step_time": 0.5930483341217041
    },
    {
      "epoch": 0.000247894287109375,
      "model_forward_time": 0.11475276947021484,
      "step": 40615
    },
    {
      "epoch": 0.000247894287109375,
      "step": 40615,
      "training_step_time": 0.3962850570678711
    },
    {
      "epoch": 0.000247900390625,
      "model_forward_time": 0.11515450477600098,
      "step": 40616
    },
    {
      "epoch": 0.000247900390625,
      "step": 40616,
      "training_step_time": 0.4697730541229248
    },
    {
      "epoch": 0.000247906494140625,
      "model_forward_time": 0.11491727828979492,
      "step": 40617
    },
    {
      "epoch": 0.000247906494140625,
      "step": 40617,
      "training_step_time": 0.3829967975616455
    },
    {
      "epoch": 0.00024791259765625,
      "model_forward_time": 0.11457252502441406,
      "step": 40618
    },
    {
      "epoch": 0.00024791259765625,
      "step": 40618,
      "training_step_time": 0.4366481304168701
    },
    {
      "epoch": 0.000247918701171875,
      "model_forward_time": 0.11496639251708984,
      "step": 40619
    },
    {
      "epoch": 0.000247918701171875,
      "step": 40619,
      "training_step_time": 0.4047424793243408
    },
    {
      "epoch": 0.0002479248046875,
      "grad_norm": 0.11092828214168549,
      "learning_rate": 2.591231629491423e-05,
      "loss": 0.0434,
      "step": 40620
    },
    {
      "epoch": 0.0002479248046875,
      "model_forward_time": 0.11513876914978027,
      "step": 40620
    },
    {
      "epoch": 0.0002479248046875,
      "step": 40620,
      "training_step_time": 0.5448362827301025
    },
    {
      "epoch": 0.000247930908203125,
      "model_forward_time": 0.11496782302856445,
      "step": 40621
    },
    {
      "epoch": 0.000247930908203125,
      "step": 40621,
      "training_step_time": 0.39079809188842773
    },
    {
      "epoch": 0.00024793701171875,
      "model_forward_time": 0.11529994010925293,
      "step": 40622
    },
    {
      "epoch": 0.00024793701171875,
      "step": 40622,
      "training_step_time": 0.3922758102416992
    },
    {
      "epoch": 0.000247943115234375,
      "model_forward_time": 0.11513829231262207,
      "step": 40623
    },
    {
      "epoch": 0.000247943115234375,
      "step": 40623,
      "training_step_time": 0.3863356113433838
    },
    {
      "epoch": 0.00024794921875,
      "model_forward_time": 0.11517453193664551,
      "step": 40624
    },
    {
      "epoch": 0.00024794921875,
      "step": 40624,
      "training_step_time": 0.4056079387664795
    },
    {
      "epoch": 0.000247955322265625,
      "model_forward_time": 0.12009024620056152,
      "step": 40625
    },
    {
      "epoch": 0.000247955322265625,
      "step": 40625,
      "training_step_time": 0.3846592903137207
    },
    {
      "epoch": 0.00024796142578125,
      "model_forward_time": 0.11532902717590332,
      "step": 40626
    },
    {
      "epoch": 0.00024796142578125,
      "step": 40626,
      "training_step_time": 0.6186800003051758
    },
    {
      "epoch": 0.000247967529296875,
      "model_forward_time": 0.1153876781463623,
      "step": 40627
    },
    {
      "epoch": 0.000247967529296875,
      "step": 40627,
      "training_step_time": 0.39295220375061035
    },
    {
      "epoch": 0.0002479736328125,
      "model_forward_time": 0.11466288566589355,
      "step": 40628
    },
    {
      "epoch": 0.0002479736328125,
      "step": 40628,
      "training_step_time": 0.39209413528442383
    },
    {
      "epoch": 0.000247979736328125,
      "model_forward_time": 0.11546540260314941,
      "step": 40629
    },
    {
      "epoch": 0.000247979736328125,
      "step": 40629,
      "training_step_time": 0.3976931571960449
    },
    {
      "epoch": 0.00024798583984375,
      "grad_norm": 0.11218654364347458,
      "learning_rate": 2.5888170843963332e-05,
      "loss": 0.0372,
      "step": 40630
    },
    {
      "epoch": 0.00024798583984375,
      "model_forward_time": 0.1150810718536377,
      "step": 40630
    },
    {
      "epoch": 0.00024798583984375,
      "step": 40630,
      "training_step_time": 0.3934309482574463
    },
    {
      "epoch": 0.000247991943359375,
      "model_forward_time": 0.11467766761779785,
      "step": 40631
    },
    {
      "epoch": 0.000247991943359375,
      "step": 40631,
      "training_step_time": 0.42234086990356445
    },
    {
      "epoch": 0.000247998046875,
      "model_forward_time": 0.11458659172058105,
      "step": 40632
    },
    {
      "epoch": 0.000247998046875,
      "step": 40632,
      "training_step_time": 0.5709099769592285
    },
    {
      "epoch": 0.000248004150390625,
      "model_forward_time": 0.11501216888427734,
      "step": 40633
    },
    {
      "epoch": 0.000248004150390625,
      "step": 40633,
      "training_step_time": 0.42032623291015625
    },
    {
      "epoch": 0.00024801025390625,
      "model_forward_time": 0.11562752723693848,
      "step": 40634
    },
    {
      "epoch": 0.00024801025390625,
      "step": 40634,
      "training_step_time": 0.40082454681396484
    },
    {
      "epoch": 0.000248016357421875,
      "model_forward_time": 0.11485671997070312,
      "step": 40635
    },
    {
      "epoch": 0.000248016357421875,
      "step": 40635,
      "training_step_time": 0.39647459983825684
    },
    {
      "epoch": 0.0002480224609375,
      "model_forward_time": 0.11506819725036621,
      "step": 40636
    },
    {
      "epoch": 0.0002480224609375,
      "step": 40636,
      "training_step_time": 0.4275088310241699
    },
    {
      "epoch": 0.000248028564453125,
      "model_forward_time": 0.11510157585144043,
      "step": 40637
    },
    {
      "epoch": 0.000248028564453125,
      "step": 40637,
      "training_step_time": 0.40461230278015137
    },
    {
      "epoch": 0.00024803466796875,
      "model_forward_time": 0.1149141788482666,
      "step": 40638
    },
    {
      "epoch": 0.00024803466796875,
      "step": 40638,
      "training_step_time": 0.5562388896942139
    },
    {
      "epoch": 0.000248040771484375,
      "model_forward_time": 0.11491990089416504,
      "step": 40639
    },
    {
      "epoch": 0.000248040771484375,
      "step": 40639,
      "training_step_time": 0.3944878578186035
    },
    {
      "epoch": 0.000248046875,
      "grad_norm": 0.10379166901111603,
      "learning_rate": 2.586403271754947e-05,
      "loss": 0.0346,
      "step": 40640
    },
    {
      "epoch": 0.000248046875,
      "model_forward_time": 0.1144096851348877,
      "step": 40640
    },
    {
      "epoch": 0.000248046875,
      "step": 40640,
      "training_step_time": 0.39970993995666504
    },
    {
      "epoch": 0.000248052978515625,
      "model_forward_time": 0.1153709888458252,
      "step": 40641
    },
    {
      "epoch": 0.000248052978515625,
      "step": 40641,
      "training_step_time": 0.38863348960876465
    },
    {
      "epoch": 0.00024805908203125,
      "model_forward_time": 0.11455225944519043,
      "step": 40642
    },
    {
      "epoch": 0.00024805908203125,
      "step": 40642,
      "training_step_time": 0.38643503189086914
    },
    {
      "epoch": 0.000248065185546875,
      "model_forward_time": 0.11492919921875,
      "step": 40643
    },
    {
      "epoch": 0.000248065185546875,
      "step": 40643,
      "training_step_time": 0.39578914642333984
    },
    {
      "epoch": 0.0002480712890625,
      "model_forward_time": 0.11618208885192871,
      "step": 40644
    },
    {
      "epoch": 0.0002480712890625,
      "step": 40644,
      "training_step_time": 0.6430771350860596
    },
    {
      "epoch": 0.000248077392578125,
      "model_forward_time": 0.11490130424499512,
      "step": 40645
    },
    {
      "epoch": 0.000248077392578125,
      "step": 40645,
      "training_step_time": 0.3676924705505371
    },
    {
      "epoch": 0.00024808349609375,
      "model_forward_time": 0.11476635932922363,
      "step": 40646
    },
    {
      "epoch": 0.00024808349609375,
      "step": 40646,
      "training_step_time": 0.46701693534851074
    },
    {
      "epoch": 0.000248089599609375,
      "model_forward_time": 0.11475968360900879,
      "step": 40647
    },
    {
      "epoch": 0.000248089599609375,
      "step": 40647,
      "training_step_time": 0.42530393600463867
    },
    {
      "epoch": 0.000248095703125,
      "model_forward_time": 0.11564850807189941,
      "step": 40648
    },
    {
      "epoch": 0.000248095703125,
      "step": 40648,
      "training_step_time": 0.4154782295227051
    },
    {
      "epoch": 0.000248101806640625,
      "model_forward_time": 0.11467218399047852,
      "step": 40649
    },
    {
      "epoch": 0.000248101806640625,
      "step": 40649,
      "training_step_time": 0.4332263469696045
    },
    {
      "epoch": 0.00024810791015625,
      "grad_norm": 0.09087462723255157,
      "learning_rate": 2.5839901923005205e-05,
      "loss": 0.0422,
      "step": 40650
    },
    {
      "epoch": 0.00024810791015625,
      "model_forward_time": 0.11485147476196289,
      "step": 40650
    },
    {
      "epoch": 0.00024810791015625,
      "step": 40650,
      "training_step_time": 0.45109033584594727
    },
    {
      "epoch": 0.000248114013671875,
      "model_forward_time": 0.11492586135864258,
      "step": 40651
    },
    {
      "epoch": 0.000248114013671875,
      "step": 40651,
      "training_step_time": 0.39206886291503906
    },
    {
      "epoch": 0.0002481201171875,
      "model_forward_time": 0.11459684371948242,
      "step": 40652
    },
    {
      "epoch": 0.0002481201171875,
      "step": 40652,
      "training_step_time": 0.38851380348205566
    },
    {
      "epoch": 0.000248126220703125,
      "model_forward_time": 0.11538147926330566,
      "step": 40653
    },
    {
      "epoch": 0.000248126220703125,
      "step": 40653,
      "training_step_time": 0.40355896949768066
    },
    {
      "epoch": 0.00024813232421875,
      "model_forward_time": 0.11481499671936035,
      "step": 40654
    },
    {
      "epoch": 0.00024813232421875,
      "step": 40654,
      "training_step_time": 0.396256685256958
    },
    {
      "epoch": 0.000248138427734375,
      "model_forward_time": 0.11491751670837402,
      "step": 40655
    },
    {
      "epoch": 0.000248138427734375,
      "step": 40655,
      "training_step_time": 0.3919086456298828
    },
    {
      "epoch": 0.00024814453125,
      "model_forward_time": 0.1150505542755127,
      "step": 40656
    },
    {
      "epoch": 0.00024814453125,
      "step": 40656,
      "training_step_time": 0.5517127513885498
    },
    {
      "epoch": 0.000248150634765625,
      "model_forward_time": 0.11456823348999023,
      "step": 40657
    },
    {
      "epoch": 0.000248150634765625,
      "step": 40657,
      "training_step_time": 0.4007413387298584
    },
    {
      "epoch": 0.00024815673828125,
      "model_forward_time": 0.11477899551391602,
      "step": 40658
    },
    {
      "epoch": 0.00024815673828125,
      "step": 40658,
      "training_step_time": 0.4070699214935303
    },
    {
      "epoch": 0.000248162841796875,
      "model_forward_time": 0.11480855941772461,
      "step": 40659
    },
    {
      "epoch": 0.000248162841796875,
      "step": 40659,
      "training_step_time": 0.39719581604003906
    },
    {
      "epoch": 0.0002481689453125,
      "grad_norm": 0.17805375158786774,
      "learning_rate": 2.5815778467660823e-05,
      "loss": 0.0379,
      "step": 40660
    },
    {
      "epoch": 0.0002481689453125,
      "model_forward_time": 0.11434459686279297,
      "step": 40660
    },
    {
      "epoch": 0.0002481689453125,
      "step": 40660,
      "training_step_time": 0.5042421817779541
    },
    {
      "epoch": 0.000248175048828125,
      "model_forward_time": 0.11494755744934082,
      "step": 40661
    },
    {
      "epoch": 0.000248175048828125,
      "step": 40661,
      "training_step_time": 0.5079994201660156
    },
    {
      "epoch": 0.00024818115234375,
      "model_forward_time": 0.11516427993774414,
      "step": 40662
    },
    {
      "epoch": 0.00024818115234375,
      "step": 40662,
      "training_step_time": 0.45313262939453125
    },
    {
      "epoch": 0.000248187255859375,
      "model_forward_time": 0.11495780944824219,
      "step": 40663
    },
    {
      "epoch": 0.000248187255859375,
      "step": 40663,
      "training_step_time": 0.4084632396697998
    },
    {
      "epoch": 0.000248193359375,
      "model_forward_time": 0.11451506614685059,
      "step": 40664
    },
    {
      "epoch": 0.000248193359375,
      "step": 40664,
      "training_step_time": 0.3920633792877197
    },
    {
      "epoch": 0.000248199462890625,
      "model_forward_time": 0.11530613899230957,
      "step": 40665
    },
    {
      "epoch": 0.000248199462890625,
      "step": 40665,
      "training_step_time": 0.40582990646362305
    },
    {
      "epoch": 0.00024820556640625,
      "model_forward_time": 0.11459660530090332,
      "step": 40666
    },
    {
      "epoch": 0.00024820556640625,
      "step": 40666,
      "training_step_time": 0.3994863033294678
    },
    {
      "epoch": 0.000248211669921875,
      "model_forward_time": 0.11499762535095215,
      "step": 40667
    },
    {
      "epoch": 0.000248211669921875,
      "step": 40667,
      "training_step_time": 0.40581345558166504
    },
    {
      "epoch": 0.0002482177734375,
      "model_forward_time": 0.11519885063171387,
      "step": 40668
    },
    {
      "epoch": 0.0002482177734375,
      "step": 40668,
      "training_step_time": 0.5790035724639893
    },
    {
      "epoch": 0.000248223876953125,
      "model_forward_time": 0.11551642417907715,
      "step": 40669
    },
    {
      "epoch": 0.000248223876953125,
      "step": 40669,
      "training_step_time": 0.39483022689819336
    },
    {
      "epoch": 0.00024822998046875,
      "grad_norm": 0.09443353116512299,
      "learning_rate": 2.57916623588444e-05,
      "loss": 0.0383,
      "step": 40670
    },
    {
      "epoch": 0.00024822998046875,
      "model_forward_time": 0.1159212589263916,
      "step": 40670
    },
    {
      "epoch": 0.00024822998046875,
      "step": 40670,
      "training_step_time": 0.39668703079223633
    },
    {
      "epoch": 0.000248236083984375,
      "model_forward_time": 0.11506175994873047,
      "step": 40671
    },
    {
      "epoch": 0.000248236083984375,
      "step": 40671,
      "training_step_time": 0.3843684196472168
    },
    {
      "epoch": 0.0002482421875,
      "model_forward_time": 0.11510205268859863,
      "step": 40672
    },
    {
      "epoch": 0.0002482421875,
      "step": 40672,
      "training_step_time": 0.39374709129333496
    },
    {
      "epoch": 0.000248248291015625,
      "model_forward_time": 0.11512231826782227,
      "step": 40673
    },
    {
      "epoch": 0.000248248291015625,
      "step": 40673,
      "training_step_time": 0.4830443859100342
    },
    {
      "epoch": 0.00024825439453125,
      "model_forward_time": 0.11503815650939941,
      "step": 40674
    },
    {
      "epoch": 0.00024825439453125,
      "step": 40674,
      "training_step_time": 0.47787904739379883
    },
    {
      "epoch": 0.000248260498046875,
      "model_forward_time": 0.114715576171875,
      "step": 40675
    },
    {
      "epoch": 0.000248260498046875,
      "step": 40675,
      "training_step_time": 0.5032150745391846
    },
    {
      "epoch": 0.0002482666015625,
      "model_forward_time": 0.11484742164611816,
      "step": 40676
    },
    {
      "epoch": 0.0002482666015625,
      "step": 40676,
      "training_step_time": 0.43860960006713867
    },
    {
      "epoch": 0.000248272705078125,
      "model_forward_time": 0.11438298225402832,
      "step": 40677
    },
    {
      "epoch": 0.000248272705078125,
      "step": 40677,
      "training_step_time": 0.42374205589294434
    },
    {
      "epoch": 0.00024827880859375,
      "model_forward_time": 0.11469125747680664,
      "step": 40678
    },
    {
      "epoch": 0.00024827880859375,
      "step": 40678,
      "training_step_time": 0.38299560546875
    },
    {
      "epoch": 0.000248284912109375,
      "model_forward_time": 0.11460351943969727,
      "step": 40679
    },
    {
      "epoch": 0.000248284912109375,
      "step": 40679,
      "training_step_time": 0.39503979682922363
    },
    {
      "epoch": 0.000248291015625,
      "grad_norm": 0.11298603564500809,
      "learning_rate": 2.5767553603881767e-05,
      "loss": 0.0371,
      "step": 40680
    },
    {
      "epoch": 0.000248291015625,
      "model_forward_time": 0.11507248878479004,
      "step": 40680
    },
    {
      "epoch": 0.000248291015625,
      "step": 40680,
      "training_step_time": 0.3929331302642822
    },
    {
      "epoch": 0.000248297119140625,
      "model_forward_time": 0.11486077308654785,
      "step": 40681
    },
    {
      "epoch": 0.000248297119140625,
      "step": 40681,
      "training_step_time": 0.39493846893310547
    },
    {
      "epoch": 0.00024830322265625,
      "model_forward_time": 0.11475324630737305,
      "step": 40682
    },
    {
      "epoch": 0.00024830322265625,
      "step": 40682,
      "training_step_time": 0.395932674407959
    },
    {
      "epoch": 0.000248309326171875,
      "model_forward_time": 0.11557388305664062,
      "step": 40683
    },
    {
      "epoch": 0.000248309326171875,
      "step": 40683,
      "training_step_time": 0.38787841796875
    },
    {
      "epoch": 0.0002483154296875,
      "model_forward_time": 0.11508393287658691,
      "step": 40684
    },
    {
      "epoch": 0.0002483154296875,
      "step": 40684,
      "training_step_time": 0.38765668869018555
    },
    {
      "epoch": 0.000248321533203125,
      "model_forward_time": 0.11514115333557129,
      "step": 40685
    },
    {
      "epoch": 0.000248321533203125,
      "step": 40685,
      "training_step_time": 0.38994407653808594
    },
    {
      "epoch": 0.00024832763671875,
      "model_forward_time": 0.1161341667175293,
      "step": 40686
    },
    {
      "epoch": 0.00024832763671875,
      "step": 40686,
      "training_step_time": 0.5871617794036865
    },
    {
      "epoch": 0.000248333740234375,
      "model_forward_time": 0.11561107635498047,
      "step": 40687
    },
    {
      "epoch": 0.000248333740234375,
      "step": 40687,
      "training_step_time": 0.41720151901245117
    },
    {
      "epoch": 0.00024833984375,
      "model_forward_time": 0.1147918701171875,
      "step": 40688
    },
    {
      "epoch": 0.00024833984375,
      "step": 40688,
      "training_step_time": 0.42769598960876465
    },
    {
      "epoch": 0.000248345947265625,
      "model_forward_time": 0.11586236953735352,
      "step": 40689
    },
    {
      "epoch": 0.000248345947265625,
      "step": 40689,
      "training_step_time": 0.5148367881774902
    },
    {
      "epoch": 0.00024835205078125,
      "grad_norm": 0.13541379570960999,
      "learning_rate": 2.574345221009653e-05,
      "loss": 0.0401,
      "step": 40690
    },
    {
      "epoch": 0.00024835205078125,
      "model_forward_time": 0.11475539207458496,
      "step": 40690
    },
    {
      "epoch": 0.00024835205078125,
      "step": 40690,
      "training_step_time": 0.4623870849609375
    },
    {
      "epoch": 0.000248358154296875,
      "model_forward_time": 0.11452412605285645,
      "step": 40691
    },
    {
      "epoch": 0.000248358154296875,
      "step": 40691,
      "training_step_time": 0.40801453590393066
    },
    {
      "epoch": 0.0002483642578125,
      "model_forward_time": 0.11487317085266113,
      "step": 40692
    },
    {
      "epoch": 0.0002483642578125,
      "step": 40692,
      "training_step_time": 0.40558886528015137
    },
    {
      "epoch": 0.000248370361328125,
      "model_forward_time": 0.11440515518188477,
      "step": 40693
    },
    {
      "epoch": 0.000248370361328125,
      "step": 40693,
      "training_step_time": 0.3878905773162842
    },
    {
      "epoch": 0.00024837646484375,
      "model_forward_time": 0.11491894721984863,
      "step": 40694
    },
    {
      "epoch": 0.00024837646484375,
      "step": 40694,
      "training_step_time": 0.3927481174468994
    },
    {
      "epoch": 0.000248382568359375,
      "model_forward_time": 0.11524295806884766,
      "step": 40695
    },
    {
      "epoch": 0.000248382568359375,
      "step": 40695,
      "training_step_time": 0.3849329948425293
    },
    {
      "epoch": 0.000248388671875,
      "model_forward_time": 0.11531186103820801,
      "step": 40696
    },
    {
      "epoch": 0.000248388671875,
      "step": 40696,
      "training_step_time": 0.39468884468078613
    },
    {
      "epoch": 0.000248394775390625,
      "model_forward_time": 0.11544632911682129,
      "step": 40697
    },
    {
      "epoch": 0.000248394775390625,
      "step": 40697,
      "training_step_time": 0.423903226852417
    },
    {
      "epoch": 0.00024840087890625,
      "model_forward_time": 0.11485910415649414,
      "step": 40698
    },
    {
      "epoch": 0.00024840087890625,
      "step": 40698,
      "training_step_time": 0.4931633472442627
    },
    {
      "epoch": 0.000248406982421875,
      "model_forward_time": 0.11496853828430176,
      "step": 40699
    },
    {
      "epoch": 0.000248406982421875,
      "step": 40699,
      "training_step_time": 0.3900907039642334
    },
    {
      "epoch": 0.0002484130859375,
      "grad_norm": 0.09896580129861832,
      "learning_rate": 2.571935818481005e-05,
      "loss": 0.0375,
      "step": 40700
    },
    {
      "epoch": 0.0002484130859375,
      "model_forward_time": 0.11434435844421387,
      "step": 40700
    },
    {
      "epoch": 0.0002484130859375,
      "step": 40700,
      "training_step_time": 0.3991248607635498
    },
    {
      "epoch": 0.000248419189453125,
      "model_forward_time": 0.1151278018951416,
      "step": 40701
    },
    {
      "epoch": 0.000248419189453125,
      "step": 40701,
      "training_step_time": 0.3954308032989502
    },
    {
      "epoch": 0.00024842529296875,
      "model_forward_time": 0.11483049392700195,
      "step": 40702
    },
    {
      "epoch": 0.00024842529296875,
      "step": 40702,
      "training_step_time": 0.36399102210998535
    },
    {
      "epoch": 0.000248431396484375,
      "model_forward_time": 0.11534500122070312,
      "step": 40703
    },
    {
      "epoch": 0.000248431396484375,
      "step": 40703,
      "training_step_time": 0.5821104049682617
    },
    {
      "epoch": 0.0002484375,
      "model_forward_time": 0.11488485336303711,
      "step": 40704
    },
    {
      "epoch": 0.0002484375,
      "step": 40704,
      "training_step_time": 0.47855591773986816
    },
    {
      "epoch": 0.000248443603515625,
      "model_forward_time": 0.11487197875976562,
      "step": 40705
    },
    {
      "epoch": 0.000248443603515625,
      "step": 40705,
      "training_step_time": 0.39396047592163086
    },
    {
      "epoch": 0.00024844970703125,
      "model_forward_time": 0.11461234092712402,
      "step": 40706
    },
    {
      "epoch": 0.00024844970703125,
      "step": 40706,
      "training_step_time": 0.39440083503723145
    },
    {
      "epoch": 0.000248455810546875,
      "model_forward_time": 0.11472845077514648,
      "step": 40707
    },
    {
      "epoch": 0.000248455810546875,
      "step": 40707,
      "training_step_time": 0.38894009590148926
    },
    {
      "epoch": 0.0002484619140625,
      "model_forward_time": 0.11462736129760742,
      "step": 40708
    },
    {
      "epoch": 0.0002484619140625,
      "step": 40708,
      "training_step_time": 0.38468432426452637
    },
    {
      "epoch": 0.000248468017578125,
      "model_forward_time": 0.11551308631896973,
      "step": 40709
    },
    {
      "epoch": 0.000248468017578125,
      "step": 40709,
      "training_step_time": 0.39153242111206055
    },
    {
      "epoch": 0.00024847412109375,
      "grad_norm": 0.11436460167169571,
      "learning_rate": 2.5695271535341443e-05,
      "loss": 0.0368,
      "step": 40710
    },
    {
      "epoch": 0.00024847412109375,
      "model_forward_time": 0.11532711982727051,
      "step": 40710
    },
    {
      "epoch": 0.00024847412109375,
      "step": 40710,
      "training_step_time": 0.43617701530456543
    },
    {
      "epoch": 0.000248480224609375,
      "model_forward_time": 0.11519837379455566,
      "step": 40711
    },
    {
      "epoch": 0.000248480224609375,
      "step": 40711,
      "training_step_time": 0.39520692825317383
    },
    {
      "epoch": 0.000248486328125,
      "model_forward_time": 0.11525297164916992,
      "step": 40712
    },
    {
      "epoch": 0.000248486328125,
      "step": 40712,
      "training_step_time": 0.3943600654602051
    },
    {
      "epoch": 0.000248492431640625,
      "model_forward_time": 0.11537957191467285,
      "step": 40713
    },
    {
      "epoch": 0.000248492431640625,
      "step": 40713,
      "training_step_time": 0.6008961200714111
    },
    {
      "epoch": 0.00024849853515625,
      "model_forward_time": 0.11503863334655762,
      "step": 40714
    },
    {
      "epoch": 0.00024849853515625,
      "step": 40714,
      "training_step_time": 0.3884260654449463
    },
    {
      "epoch": 0.000248504638671875,
      "model_forward_time": 0.11478447914123535,
      "step": 40715
    },
    {
      "epoch": 0.000248504638671875,
      "step": 40715,
      "training_step_time": 0.3932943344116211
    },
    {
      "epoch": 0.0002485107421875,
      "model_forward_time": 0.1151125431060791,
      "step": 40716
    },
    {
      "epoch": 0.0002485107421875,
      "step": 40716,
      "training_step_time": 0.40241193771362305
    },
    {
      "epoch": 0.000248516845703125,
      "model_forward_time": 0.11629772186279297,
      "step": 40717
    },
    {
      "epoch": 0.000248516845703125,
      "step": 40717,
      "training_step_time": 0.5054702758789062
    },
    {
      "epoch": 0.00024852294921875,
      "model_forward_time": 0.11513972282409668,
      "step": 40718
    },
    {
      "epoch": 0.00024852294921875,
      "step": 40718,
      "training_step_time": 0.4926939010620117
    },
    {
      "epoch": 0.000248529052734375,
      "model_forward_time": 0.11427974700927734,
      "step": 40719
    },
    {
      "epoch": 0.000248529052734375,
      "step": 40719,
      "training_step_time": 0.5075521469116211
    },
    {
      "epoch": 0.00024853515625,
      "grad_norm": 0.10537064075469971,
      "learning_rate": 2.567119226900764e-05,
      "loss": 0.0429,
      "step": 40720
    },
    {
      "epoch": 0.00024853515625,
      "model_forward_time": 0.11473870277404785,
      "step": 40720
    },
    {
      "epoch": 0.00024853515625,
      "step": 40720,
      "training_step_time": 0.388721227645874
    },
    {
      "epoch": 0.000248541259765625,
      "model_forward_time": 0.11511826515197754,
      "step": 40721
    },
    {
      "epoch": 0.000248541259765625,
      "step": 40721,
      "training_step_time": 0.3876230716705322
    },
    {
      "epoch": 0.00024854736328125,
      "model_forward_time": 0.11468648910522461,
      "step": 40722
    },
    {
      "epoch": 0.00024854736328125,
      "step": 40722,
      "training_step_time": 0.3984377384185791
    },
    {
      "epoch": 0.000248553466796875,
      "model_forward_time": 0.11511492729187012,
      "step": 40723
    },
    {
      "epoch": 0.000248553466796875,
      "step": 40723,
      "training_step_time": 0.39392590522766113
    },
    {
      "epoch": 0.0002485595703125,
      "model_forward_time": 0.11483049392700195,
      "step": 40724
    },
    {
      "epoch": 0.0002485595703125,
      "step": 40724,
      "training_step_time": 0.39382028579711914
    },
    {
      "epoch": 0.000248565673828125,
      "model_forward_time": 0.11513638496398926,
      "step": 40725
    },
    {
      "epoch": 0.000248565673828125,
      "step": 40725,
      "training_step_time": 0.6276609897613525
    },
    {
      "epoch": 0.00024857177734375,
      "model_forward_time": 0.11492085456848145,
      "step": 40726
    },
    {
      "epoch": 0.00024857177734375,
      "step": 40726,
      "training_step_time": 0.3977048397064209
    },
    {
      "epoch": 0.000248577880859375,
      "model_forward_time": 0.11495089530944824,
      "step": 40727
    },
    {
      "epoch": 0.000248577880859375,
      "step": 40727,
      "training_step_time": 0.39728522300720215
    },
    {
      "epoch": 0.000248583984375,
      "model_forward_time": 0.11501407623291016,
      "step": 40728
    },
    {
      "epoch": 0.000248583984375,
      "step": 40728,
      "training_step_time": 0.40429258346557617
    },
    {
      "epoch": 0.000248590087890625,
      "model_forward_time": 0.11463522911071777,
      "step": 40729
    },
    {
      "epoch": 0.000248590087890625,
      "step": 40729,
      "training_step_time": 0.3912544250488281
    },
    {
      "epoch": 0.00024859619140625,
      "grad_norm": 0.2020147144794464,
      "learning_rate": 2.5647120393123246e-05,
      "loss": 0.0385,
      "step": 40730
    },
    {
      "epoch": 0.00024859619140625,
      "model_forward_time": 0.11499595642089844,
      "step": 40730
    },
    {
      "epoch": 0.00024859619140625,
      "step": 40730,
      "training_step_time": 0.43090128898620605
    },
    {
      "epoch": 0.000248602294921875,
      "model_forward_time": 0.11545944213867188,
      "step": 40731
    },
    {
      "epoch": 0.000248602294921875,
      "step": 40731,
      "training_step_time": 0.6440224647521973
    },
    {
      "epoch": 0.0002486083984375,
      "model_forward_time": 0.11471199989318848,
      "step": 40732
    },
    {
      "epoch": 0.0002486083984375,
      "step": 40732,
      "training_step_time": 0.4749777317047119
    },
    {
      "epoch": 0.000248614501953125,
      "model_forward_time": 0.11429834365844727,
      "step": 40733
    },
    {
      "epoch": 0.000248614501953125,
      "step": 40733,
      "training_step_time": 0.38255929946899414
    },
    {
      "epoch": 0.00024862060546875,
      "model_forward_time": 0.11437344551086426,
      "step": 40734
    },
    {
      "epoch": 0.00024862060546875,
      "step": 40734,
      "training_step_time": 0.38956332206726074
    },
    {
      "epoch": 0.000248626708984375,
      "model_forward_time": 0.11504960060119629,
      "step": 40735
    },
    {
      "epoch": 0.000248626708984375,
      "step": 40735,
      "training_step_time": 0.3969693183898926
    },
    {
      "epoch": 0.0002486328125,
      "model_forward_time": 0.11503839492797852,
      "step": 40736
    },
    {
      "epoch": 0.0002486328125,
      "step": 40736,
      "training_step_time": 0.40152573585510254
    },
    {
      "epoch": 0.000248638916015625,
      "model_forward_time": 0.11530089378356934,
      "step": 40737
    },
    {
      "epoch": 0.000248638916015625,
      "step": 40737,
      "training_step_time": 0.5060670375823975
    },
    {
      "epoch": 0.00024864501953125,
      "model_forward_time": 0.11569404602050781,
      "step": 40738
    },
    {
      "epoch": 0.00024864501953125,
      "step": 40738,
      "training_step_time": 0.3919508457183838
    },
    {
      "epoch": 0.000248651123046875,
      "model_forward_time": 0.11523222923278809,
      "step": 40739
    },
    {
      "epoch": 0.000248651123046875,
      "step": 40739,
      "training_step_time": 0.3901662826538086
    },
    {
      "epoch": 0.0002486572265625,
      "grad_norm": 0.11717539280653,
      "learning_rate": 2.562305591500069e-05,
      "loss": 0.0394,
      "step": 40740
    },
    {
      "epoch": 0.0002486572265625,
      "model_forward_time": 0.11515331268310547,
      "step": 40740
    },
    {
      "epoch": 0.0002486572265625,
      "step": 40740,
      "training_step_time": 0.3933069705963135
    },
    {
      "epoch": 0.000248663330078125,
      "model_forward_time": 0.11559128761291504,
      "step": 40741
    },
    {
      "epoch": 0.000248663330078125,
      "step": 40741,
      "training_step_time": 0.3919491767883301
    },
    {
      "epoch": 0.00024866943359375,
      "model_forward_time": 0.1158144474029541,
      "step": 40742
    },
    {
      "epoch": 0.00024866943359375,
      "step": 40742,
      "training_step_time": 0.3918285369873047
    },
    {
      "epoch": 0.000248675537109375,
      "model_forward_time": 0.11524415016174316,
      "step": 40743
    },
    {
      "epoch": 0.000248675537109375,
      "step": 40743,
      "training_step_time": 0.6455903053283691
    },
    {
      "epoch": 0.000248681640625,
      "model_forward_time": 0.11461663246154785,
      "step": 40744
    },
    {
      "epoch": 0.000248681640625,
      "step": 40744,
      "training_step_time": 0.45510387420654297
    },
    {
      "epoch": 0.000248687744140625,
      "model_forward_time": 0.11537623405456543,
      "step": 40745
    },
    {
      "epoch": 0.000248687744140625,
      "step": 40745,
      "training_step_time": 0.4043104648590088
    },
    {
      "epoch": 0.00024869384765625,
      "model_forward_time": 0.11479783058166504,
      "step": 40746
    },
    {
      "epoch": 0.00024869384765625,
      "step": 40746,
      "training_step_time": 0.46125292778015137
    },
    {
      "epoch": 0.000248699951171875,
      "model_forward_time": 0.11536693572998047,
      "step": 40747
    },
    {
      "epoch": 0.000248699951171875,
      "step": 40747,
      "training_step_time": 0.44086790084838867
    },
    {
      "epoch": 0.0002487060546875,
      "model_forward_time": 0.1143951416015625,
      "step": 40748
    },
    {
      "epoch": 0.0002487060546875,
      "step": 40748,
      "training_step_time": 0.39141368865966797
    },
    {
      "epoch": 0.000248712158203125,
      "model_forward_time": 0.11509513854980469,
      "step": 40749
    },
    {
      "epoch": 0.000248712158203125,
      "step": 40749,
      "training_step_time": 0.49155330657958984
    },
    {
      "epoch": 0.00024871826171875,
      "grad_norm": 0.1303805112838745,
      "learning_rate": 2.5598998841950107e-05,
      "loss": 0.0447,
      "step": 40750
    },
    {
      "epoch": 0.00024871826171875,
      "model_forward_time": 0.11458492279052734,
      "step": 40750
    },
    {
      "epoch": 0.00024871826171875,
      "step": 40750,
      "training_step_time": 0.3970956802368164
    },
    {
      "epoch": 0.000248724365234375,
      "model_forward_time": 0.11484241485595703,
      "step": 40751
    },
    {
      "epoch": 0.000248724365234375,
      "step": 40751,
      "training_step_time": 0.39870595932006836
    },
    {
      "epoch": 0.00024873046875,
      "model_forward_time": 0.11589932441711426,
      "step": 40752
    },
    {
      "epoch": 0.00024873046875,
      "step": 40752,
      "training_step_time": 0.4190850257873535
    },
    {
      "epoch": 0.000248736572265625,
      "model_forward_time": 0.11465167999267578,
      "step": 40753
    },
    {
      "epoch": 0.000248736572265625,
      "step": 40753,
      "training_step_time": 0.3968367576599121
    },
    {
      "epoch": 0.00024874267578125,
      "model_forward_time": 0.11511397361755371,
      "step": 40754
    },
    {
      "epoch": 0.00024874267578125,
      "step": 40754,
      "training_step_time": 0.394132137298584
    },
    {
      "epoch": 0.000248748779296875,
      "model_forward_time": 0.11503863334655762,
      "step": 40755
    },
    {
      "epoch": 0.000248748779296875,
      "step": 40755,
      "training_step_time": 0.7095675468444824
    },
    {
      "epoch": 0.0002487548828125,
      "model_forward_time": 0.11507749557495117,
      "step": 40756
    },
    {
      "epoch": 0.0002487548828125,
      "step": 40756,
      "training_step_time": 0.4314868450164795
    },
    {
      "epoch": 0.000248760986328125,
      "model_forward_time": 0.11417102813720703,
      "step": 40757
    },
    {
      "epoch": 0.000248760986328125,
      "step": 40757,
      "training_step_time": 0.4078400135040283
    },
    {
      "epoch": 0.00024876708984375,
      "model_forward_time": 0.11449146270751953,
      "step": 40758
    },
    {
      "epoch": 0.00024876708984375,
      "step": 40758,
      "training_step_time": 0.47820329666137695
    },
    {
      "epoch": 0.000248773193359375,
      "model_forward_time": 0.11532807350158691,
      "step": 40759
    },
    {
      "epoch": 0.000248773193359375,
      "step": 40759,
      "training_step_time": 0.46573853492736816
    },
    {
      "epoch": 0.000248779296875,
      "grad_norm": 0.13080668449401855,
      "learning_rate": 2.55749491812794e-05,
      "loss": 0.0419,
      "step": 40760
    },
    {
      "epoch": 0.000248779296875,
      "model_forward_time": 0.11452198028564453,
      "step": 40760
    },
    {
      "epoch": 0.000248779296875,
      "step": 40760,
      "training_step_time": 0.46930766105651855
    },
    {
      "epoch": 0.000248785400390625,
      "model_forward_time": 0.11557412147521973,
      "step": 40761
    },
    {
      "epoch": 0.000248785400390625,
      "step": 40761,
      "training_step_time": 0.48099803924560547
    },
    {
      "epoch": 0.00024879150390625,
      "model_forward_time": 0.11477112770080566,
      "step": 40762
    },
    {
      "epoch": 0.00024879150390625,
      "step": 40762,
      "training_step_time": 0.3876199722290039
    },
    {
      "epoch": 0.000248797607421875,
      "model_forward_time": 0.11403441429138184,
      "step": 40763
    },
    {
      "epoch": 0.000248797607421875,
      "step": 40763,
      "training_step_time": 0.39357852935791016
    },
    {
      "epoch": 0.0002488037109375,
      "model_forward_time": 0.1146996021270752,
      "step": 40764
    },
    {
      "epoch": 0.0002488037109375,
      "step": 40764,
      "training_step_time": 0.39088916778564453
    },
    {
      "epoch": 0.000248809814453125,
      "model_forward_time": 0.11493635177612305,
      "step": 40765
    },
    {
      "epoch": 0.000248809814453125,
      "step": 40765,
      "training_step_time": 0.3949575424194336
    },
    {
      "epoch": 0.00024881591796875,
      "model_forward_time": 0.11513614654541016,
      "step": 40766
    },
    {
      "epoch": 0.00024881591796875,
      "step": 40766,
      "training_step_time": 0.3913125991821289
    },
    {
      "epoch": 0.000248822021484375,
      "model_forward_time": 0.11478376388549805,
      "step": 40767
    },
    {
      "epoch": 0.000248822021484375,
      "step": 40767,
      "training_step_time": 0.6218926906585693
    },
    {
      "epoch": 0.000248828125,
      "model_forward_time": 0.11416816711425781,
      "step": 40768
    },
    {
      "epoch": 0.000248828125,
      "step": 40768,
      "training_step_time": 0.3859212398529053
    },
    {
      "epoch": 0.000248834228515625,
      "model_forward_time": 0.11608147621154785,
      "step": 40769
    },
    {
      "epoch": 0.000248834228515625,
      "step": 40769,
      "training_step_time": 0.39310717582702637
    },
    {
      "epoch": 0.00024884033203125,
      "grad_norm": 0.09232273697853088,
      "learning_rate": 2.555090694029421e-05,
      "loss": 0.0317,
      "step": 40770
    },
    {
      "epoch": 0.00024884033203125,
      "model_forward_time": 0.1143343448638916,
      "step": 40770
    },
    {
      "epoch": 0.00024884033203125,
      "step": 40770,
      "training_step_time": 0.4205646514892578
    },
    {
      "epoch": 0.000248846435546875,
      "model_forward_time": 0.11464715003967285,
      "step": 40771
    },
    {
      "epoch": 0.000248846435546875,
      "step": 40771,
      "training_step_time": 0.38818931579589844
    },
    {
      "epoch": 0.0002488525390625,
      "model_forward_time": 0.1155862808227539,
      "step": 40772
    },
    {
      "epoch": 0.0002488525390625,
      "step": 40772,
      "training_step_time": 0.3849620819091797
    },
    {
      "epoch": 0.000248858642578125,
      "model_forward_time": 0.11507701873779297,
      "step": 40773
    },
    {
      "epoch": 0.000248858642578125,
      "step": 40773,
      "training_step_time": 0.73649001121521
    },
    {
      "epoch": 0.00024886474609375,
      "model_forward_time": 0.11458301544189453,
      "step": 40774
    },
    {
      "epoch": 0.00024886474609375,
      "step": 40774,
      "training_step_time": 0.47164487838745117
    },
    {
      "epoch": 0.000248870849609375,
      "model_forward_time": 0.11463141441345215,
      "step": 40775
    },
    {
      "epoch": 0.000248870849609375,
      "step": 40775,
      "training_step_time": 0.3890266418457031
    },
    {
      "epoch": 0.000248876953125,
      "model_forward_time": 0.11474919319152832,
      "step": 40776
    },
    {
      "epoch": 0.000248876953125,
      "step": 40776,
      "training_step_time": 0.3831601142883301
    },
    {
      "epoch": 0.000248883056640625,
      "model_forward_time": 0.11469793319702148,
      "step": 40777
    },
    {
      "epoch": 0.000248883056640625,
      "step": 40777,
      "training_step_time": 0.39406371116638184
    },
    {
      "epoch": 0.00024888916015625,
      "model_forward_time": 0.1143643856048584,
      "step": 40778
    },
    {
      "epoch": 0.00024888916015625,
      "step": 40778,
      "training_step_time": 0.38875913619995117
    },
    {
      "epoch": 0.000248895263671875,
      "model_forward_time": 0.11526656150817871,
      "step": 40779
    },
    {
      "epoch": 0.000248895263671875,
      "step": 40779,
      "training_step_time": 0.5385110378265381
    },
    {
      "epoch": 0.0002489013671875,
      "grad_norm": 0.13889923691749573,
      "learning_rate": 2.552687212629799e-05,
      "loss": 0.0389,
      "step": 40780
    },
    {
      "epoch": 0.0002489013671875,
      "model_forward_time": 0.11509060859680176,
      "step": 40780
    },
    {
      "epoch": 0.0002489013671875,
      "step": 40780,
      "training_step_time": 0.3866395950317383
    },
    {
      "epoch": 0.000248907470703125,
      "model_forward_time": 0.11548161506652832,
      "step": 40781
    },
    {
      "epoch": 0.000248907470703125,
      "step": 40781,
      "training_step_time": 0.3975698947906494
    },
    {
      "epoch": 0.00024891357421875,
      "model_forward_time": 0.11499190330505371,
      "step": 40782
    },
    {
      "epoch": 0.00024891357421875,
      "step": 40782,
      "training_step_time": 0.45922279357910156
    },
    {
      "epoch": 0.000248919677734375,
      "model_forward_time": 0.11565923690795898,
      "step": 40783
    },
    {
      "epoch": 0.000248919677734375,
      "step": 40783,
      "training_step_time": 0.43564343452453613
    },
    {
      "epoch": 0.00024892578125,
      "model_forward_time": 0.11469650268554688,
      "step": 40784
    },
    {
      "epoch": 0.00024892578125,
      "step": 40784,
      "training_step_time": 0.3928108215332031
    },
    {
      "epoch": 0.000248931884765625,
      "model_forward_time": 0.11538434028625488,
      "step": 40785
    },
    {
      "epoch": 0.000248931884765625,
      "step": 40785,
      "training_step_time": 0.7093241214752197
    },
    {
      "epoch": 0.00024893798828125,
      "model_forward_time": 0.11487889289855957,
      "step": 40786
    },
    {
      "epoch": 0.00024893798828125,
      "step": 40786,
      "training_step_time": 0.39936065673828125
    },
    {
      "epoch": 0.000248944091796875,
      "model_forward_time": 0.11423110961914062,
      "step": 40787
    },
    {
      "epoch": 0.000248944091796875,
      "step": 40787,
      "training_step_time": 0.4472990036010742
    },
    {
      "epoch": 0.0002489501953125,
      "model_forward_time": 0.11494827270507812,
      "step": 40788
    },
    {
      "epoch": 0.0002489501953125,
      "step": 40788,
      "training_step_time": 0.45444822311401367
    },
    {
      "epoch": 0.000248956298828125,
      "model_forward_time": 0.11484789848327637,
      "step": 40789
    },
    {
      "epoch": 0.000248956298828125,
      "step": 40789,
      "training_step_time": 0.3857095241546631
    },
    {
      "epoch": 0.00024896240234375,
      "grad_norm": 0.09837678074836731,
      "learning_rate": 2.5502844746591804e-05,
      "loss": 0.0455,
      "step": 40790
    },
    {
      "epoch": 0.00024896240234375,
      "model_forward_time": 0.11450409889221191,
      "step": 40790
    },
    {
      "epoch": 0.00024896240234375,
      "step": 40790,
      "training_step_time": 0.388338565826416
    },
    {
      "epoch": 0.000248968505859375,
      "model_forward_time": 0.11499166488647461,
      "step": 40791
    },
    {
      "epoch": 0.000248968505859375,
      "step": 40791,
      "training_step_time": 0.38622236251831055
    },
    {
      "epoch": 0.000248974609375,
      "model_forward_time": 0.11461019515991211,
      "step": 40792
    },
    {
      "epoch": 0.000248974609375,
      "step": 40792,
      "training_step_time": 0.3866453170776367
    },
    {
      "epoch": 0.000248980712890625,
      "model_forward_time": 0.11536240577697754,
      "step": 40793
    },
    {
      "epoch": 0.000248980712890625,
      "step": 40793,
      "training_step_time": 0.390946626663208
    },
    {
      "epoch": 0.00024898681640625,
      "model_forward_time": 0.11562347412109375,
      "step": 40794
    },
    {
      "epoch": 0.00024898681640625,
      "step": 40794,
      "training_step_time": 0.3898189067840576
    },
    {
      "epoch": 0.000248992919921875,
      "model_forward_time": 0.11473941802978516,
      "step": 40795
    },
    {
      "epoch": 0.000248992919921875,
      "step": 40795,
      "training_step_time": 0.39588141441345215
    },
    {
      "epoch": 0.0002489990234375,
      "model_forward_time": 0.11698532104492188,
      "step": 40796
    },
    {
      "epoch": 0.0002489990234375,
      "step": 40796,
      "training_step_time": 0.3961007595062256
    },
    {
      "epoch": 0.000249005126953125,
      "model_forward_time": 0.1158151626586914,
      "step": 40797
    },
    {
      "epoch": 0.000249005126953125,
      "step": 40797,
      "training_step_time": 0.5680630207061768
    },
    {
      "epoch": 0.00024901123046875,
      "model_forward_time": 0.1148078441619873,
      "step": 40798
    },
    {
      "epoch": 0.00024901123046875,
      "step": 40798,
      "training_step_time": 0.40920257568359375
    },
    {
      "epoch": 0.000249017333984375,
      "model_forward_time": 0.11510658264160156,
      "step": 40799
    },
    {
      "epoch": 0.000249017333984375,
      "step": 40799,
      "training_step_time": 0.4644293785095215
    },
    {
      "epoch": 0.0002490234375,
      "grad_norm": 0.15110233426094055,
      "learning_rate": 2.547882480847461e-05,
      "loss": 0.0344,
      "step": 40800
    },
    {
      "epoch": 0.0002490234375,
      "model_forward_time": 0.11446905136108398,
      "step": 40800
    },
    {
      "epoch": 0.0002490234375,
      "step": 40800,
      "training_step_time": 0.42264533042907715
    },
    {
      "epoch": 0.000249029541015625,
      "model_forward_time": 0.11458325386047363,
      "step": 40801
    },
    {
      "epoch": 0.000249029541015625,
      "step": 40801,
      "training_step_time": 0.449404239654541
    },
    {
      "epoch": 0.00024903564453125,
      "model_forward_time": 0.11507391929626465,
      "step": 40802
    },
    {
      "epoch": 0.00024903564453125,
      "step": 40802,
      "training_step_time": 0.486757755279541
    },
    {
      "epoch": 0.000249041748046875,
      "model_forward_time": 0.11530208587646484,
      "step": 40803
    },
    {
      "epoch": 0.000249041748046875,
      "step": 40803,
      "training_step_time": 0.394148588180542
    },
    {
      "epoch": 0.0002490478515625,
      "model_forward_time": 0.11441946029663086,
      "step": 40804
    },
    {
      "epoch": 0.0002490478515625,
      "step": 40804,
      "training_step_time": 0.39618968963623047
    },
    {
      "epoch": 0.000249053955078125,
      "model_forward_time": 0.11490964889526367,
      "step": 40805
    },
    {
      "epoch": 0.000249053955078125,
      "step": 40805,
      "training_step_time": 0.3907802104949951
    },
    {
      "epoch": 0.00024906005859375,
      "model_forward_time": 0.1148688793182373,
      "step": 40806
    },
    {
      "epoch": 0.00024906005859375,
      "step": 40806,
      "training_step_time": 0.38667893409729004
    },
    {
      "epoch": 0.000249066162109375,
      "model_forward_time": 0.11508417129516602,
      "step": 40807
    },
    {
      "epoch": 0.000249066162109375,
      "step": 40807,
      "training_step_time": 0.3947443962097168
    },
    {
      "epoch": 0.000249072265625,
      "model_forward_time": 0.11623954772949219,
      "step": 40808
    },
    {
      "epoch": 0.000249072265625,
      "step": 40808,
      "training_step_time": 0.4041759967803955
    },
    {
      "epoch": 0.000249078369140625,
      "model_forward_time": 0.1149592399597168,
      "step": 40809
    },
    {
      "epoch": 0.000249078369140625,
      "step": 40809,
      "training_step_time": 0.7298874855041504
    },
    {
      "epoch": 0.00024908447265625,
      "grad_norm": 0.11340652406215668,
      "learning_rate": 2.545481231924296e-05,
      "loss": 0.0392,
      "step": 40810
    },
    {
      "epoch": 0.00024908447265625,
      "model_forward_time": 0.11482810974121094,
      "step": 40810
    },
    {
      "epoch": 0.00024908447265625,
      "step": 40810,
      "training_step_time": 0.4015820026397705
    },
    {
      "epoch": 0.000249090576171875,
      "model_forward_time": 0.11464500427246094,
      "step": 40811
    },
    {
      "epoch": 0.000249090576171875,
      "step": 40811,
      "training_step_time": 0.3883500099182129
    },
    {
      "epoch": 0.0002490966796875,
      "model_forward_time": 0.11466670036315918,
      "step": 40812
    },
    {
      "epoch": 0.0002490966796875,
      "step": 40812,
      "training_step_time": 0.3963437080383301
    },
    {
      "epoch": 0.000249102783203125,
      "model_forward_time": 0.11425137519836426,
      "step": 40813
    },
    {
      "epoch": 0.000249102783203125,
      "step": 40813,
      "training_step_time": 0.4951612949371338
    },
    {
      "epoch": 0.00024910888671875,
      "model_forward_time": 0.11448311805725098,
      "step": 40814
    },
    {
      "epoch": 0.00024910888671875,
      "step": 40814,
      "training_step_time": 0.36380720138549805
    },
    {
      "epoch": 0.000249114990234375,
      "model_forward_time": 0.11478233337402344,
      "step": 40815
    },
    {
      "epoch": 0.000249114990234375,
      "step": 40815,
      "training_step_time": 0.5824606418609619
    },
    {
      "epoch": 0.00024912109375,
      "model_forward_time": 0.11478757858276367,
      "step": 40816
    },
    {
      "epoch": 0.00024912109375,
      "step": 40816,
      "training_step_time": 0.4047262668609619
    },
    {
      "epoch": 0.000249127197265625,
      "model_forward_time": 0.11457276344299316,
      "step": 40817
    },
    {
      "epoch": 0.000249127197265625,
      "step": 40817,
      "training_step_time": 0.3785257339477539
    },
    {
      "epoch": 0.00024913330078125,
      "model_forward_time": 0.11465334892272949,
      "step": 40818
    },
    {
      "epoch": 0.00024913330078125,
      "step": 40818,
      "training_step_time": 0.3868727684020996
    },
    {
      "epoch": 0.000249139404296875,
      "model_forward_time": 0.11448979377746582,
      "step": 40819
    },
    {
      "epoch": 0.000249139404296875,
      "step": 40819,
      "training_step_time": 0.3926723003387451
    },
    {
      "epoch": 0.0002491455078125,
      "grad_norm": 0.11685571819543839,
      "learning_rate": 2.543080728619127e-05,
      "loss": 0.033,
      "step": 40820
    },
    {
      "epoch": 0.0002491455078125,
      "model_forward_time": 0.11507940292358398,
      "step": 40820
    },
    {
      "epoch": 0.0002491455078125,
      "step": 40820,
      "training_step_time": 0.38810253143310547
    },
    {
      "epoch": 0.000249151611328125,
      "model_forward_time": 0.1151278018951416,
      "step": 40821
    },
    {
      "epoch": 0.000249151611328125,
      "step": 40821,
      "training_step_time": 0.6070013046264648
    },
    {
      "epoch": 0.00024915771484375,
      "model_forward_time": 0.11544132232666016,
      "step": 40822
    },
    {
      "epoch": 0.00024915771484375,
      "step": 40822,
      "training_step_time": 0.41530752182006836
    },
    {
      "epoch": 0.000249163818359375,
      "model_forward_time": 0.11455130577087402,
      "step": 40823
    },
    {
      "epoch": 0.000249163818359375,
      "step": 40823,
      "training_step_time": 0.4614589214324951
    },
    {
      "epoch": 0.000249169921875,
      "model_forward_time": 0.11505126953125,
      "step": 40824
    },
    {
      "epoch": 0.000249169921875,
      "step": 40824,
      "training_step_time": 0.38844752311706543
    },
    {
      "epoch": 0.000249176025390625,
      "model_forward_time": 0.11492252349853516,
      "step": 40825
    },
    {
      "epoch": 0.000249176025390625,
      "step": 40825,
      "training_step_time": 0.39537906646728516
    },
    {
      "epoch": 0.00024918212890625,
      "model_forward_time": 0.11551547050476074,
      "step": 40826
    },
    {
      "epoch": 0.00024918212890625,
      "step": 40826,
      "training_step_time": 0.442854642868042
    },
    {
      "epoch": 0.000249188232421875,
      "model_forward_time": 0.11481738090515137,
      "step": 40827
    },
    {
      "epoch": 0.000249188232421875,
      "step": 40827,
      "training_step_time": 0.5070955753326416
    },
    {
      "epoch": 0.0002491943359375,
      "model_forward_time": 0.11507821083068848,
      "step": 40828
    },
    {
      "epoch": 0.0002491943359375,
      "step": 40828,
      "training_step_time": 0.4682915210723877
    },
    {
      "epoch": 0.000249200439453125,
      "model_forward_time": 0.11470770835876465,
      "step": 40829
    },
    {
      "epoch": 0.000249200439453125,
      "step": 40829,
      "training_step_time": 0.531435489654541
    },
    {
      "epoch": 0.00024920654296875,
      "grad_norm": 0.10397078096866608,
      "learning_rate": 2.540680971661161e-05,
      "loss": 0.0356,
      "step": 40830
    },
    {
      "epoch": 0.00024920654296875,
      "model_forward_time": 0.11405658721923828,
      "step": 40830
    },
    {
      "epoch": 0.00024920654296875,
      "step": 40830,
      "training_step_time": 0.42974400520324707
    },
    {
      "epoch": 0.000249212646484375,
      "model_forward_time": 0.11451125144958496,
      "step": 40831
    },
    {
      "epoch": 0.000249212646484375,
      "step": 40831,
      "training_step_time": 0.3895456790924072
    },
    {
      "epoch": 0.00024921875,
      "model_forward_time": 0.1139674186706543,
      "step": 40832
    },
    {
      "epoch": 0.00024921875,
      "step": 40832,
      "training_step_time": 0.40285181999206543
    },
    {
      "epoch": 0.000249224853515625,
      "model_forward_time": 0.11417245864868164,
      "step": 40833
    },
    {
      "epoch": 0.000249224853515625,
      "step": 40833,
      "training_step_time": 0.41386842727661133
    },
    {
      "epoch": 0.00024923095703125,
      "model_forward_time": 0.11576366424560547,
      "step": 40834
    },
    {
      "epoch": 0.00024923095703125,
      "step": 40834,
      "training_step_time": 0.4088914394378662
    },
    {
      "epoch": 0.000249237060546875,
      "model_forward_time": 0.11419987678527832,
      "step": 40835
    },
    {
      "epoch": 0.000249237060546875,
      "step": 40835,
      "training_step_time": 0.41207289695739746
    },
    {
      "epoch": 0.0002492431640625,
      "model_forward_time": 0.11516022682189941,
      "step": 40836
    },
    {
      "epoch": 0.0002492431640625,
      "step": 40836,
      "training_step_time": 0.4096674919128418
    },
    {
      "epoch": 0.000249249267578125,
      "model_forward_time": 0.11552643775939941,
      "step": 40837
    },
    {
      "epoch": 0.000249249267578125,
      "step": 40837,
      "training_step_time": 0.40837883949279785
    },
    {
      "epoch": 0.00024925537109375,
      "model_forward_time": 0.1151881217956543,
      "step": 40838
    },
    {
      "epoch": 0.00024925537109375,
      "step": 40838,
      "training_step_time": 0.3959531784057617
    },
    {
      "epoch": 0.000249261474609375,
      "model_forward_time": 0.1147465705871582,
      "step": 40839
    },
    {
      "epoch": 0.000249261474609375,
      "step": 40839,
      "training_step_time": 0.4210212230682373
    },
    {
      "epoch": 0.000249267578125,
      "grad_norm": 0.1282326877117157,
      "learning_rate": 2.5382819617793813e-05,
      "loss": 0.0369,
      "step": 40840
    },
    {
      "epoch": 0.000249267578125,
      "model_forward_time": 0.11536645889282227,
      "step": 40840
    },
    {
      "epoch": 0.000249267578125,
      "step": 40840,
      "training_step_time": 0.39846348762512207
    },
    {
      "epoch": 0.000249273681640625,
      "model_forward_time": 0.11570072174072266,
      "step": 40841
    },
    {
      "epoch": 0.000249273681640625,
      "step": 40841,
      "training_step_time": 0.4431586265563965
    },
    {
      "epoch": 0.00024927978515625,
      "model_forward_time": 0.11498308181762695,
      "step": 40842
    },
    {
      "epoch": 0.00024927978515625,
      "step": 40842,
      "training_step_time": 0.4666764736175537
    },
    {
      "epoch": 0.000249285888671875,
      "model_forward_time": 0.11510396003723145,
      "step": 40843
    },
    {
      "epoch": 0.000249285888671875,
      "step": 40843,
      "training_step_time": 0.46260833740234375
    },
    {
      "epoch": 0.0002492919921875,
      "model_forward_time": 0.11519837379455566,
      "step": 40844
    },
    {
      "epoch": 0.0002492919921875,
      "step": 40844,
      "training_step_time": 0.5007171630859375
    },
    {
      "epoch": 0.000249298095703125,
      "model_forward_time": 0.11543607711791992,
      "step": 40845
    },
    {
      "epoch": 0.000249298095703125,
      "step": 40845,
      "training_step_time": 0.3834867477416992
    },
    {
      "epoch": 0.00024930419921875,
      "model_forward_time": 0.11459755897521973,
      "step": 40846
    },
    {
      "epoch": 0.00024930419921875,
      "step": 40846,
      "training_step_time": 0.39218926429748535
    },
    {
      "epoch": 0.000249310302734375,
      "model_forward_time": 0.11477184295654297,
      "step": 40847
    },
    {
      "epoch": 0.000249310302734375,
      "step": 40847,
      "training_step_time": 0.38959550857543945
    },
    {
      "epoch": 0.00024931640625,
      "model_forward_time": 0.11462831497192383,
      "step": 40848
    },
    {
      "epoch": 0.00024931640625,
      "step": 40848,
      "training_step_time": 0.40732622146606445
    },
    {
      "epoch": 0.000249322509765625,
      "model_forward_time": 0.11479425430297852,
      "step": 40849
    },
    {
      "epoch": 0.000249322509765625,
      "step": 40849,
      "training_step_time": 0.4156301021575928
    },
    {
      "epoch": 0.00024932861328125,
      "grad_norm": 0.09331520646810532,
      "learning_rate": 2.5358836997025437e-05,
      "loss": 0.0368,
      "step": 40850
    },
    {
      "epoch": 0.00024932861328125,
      "model_forward_time": 0.1152949333190918,
      "step": 40850
    },
    {
      "epoch": 0.00024932861328125,
      "step": 40850,
      "training_step_time": 0.4100980758666992
    },
    {
      "epoch": 0.000249334716796875,
      "model_forward_time": 0.11452460289001465,
      "step": 40851
    },
    {
      "epoch": 0.000249334716796875,
      "step": 40851,
      "training_step_time": 0.505434513092041
    },
    {
      "epoch": 0.0002493408203125,
      "model_forward_time": 0.11494064331054688,
      "step": 40852
    },
    {
      "epoch": 0.0002493408203125,
      "step": 40852,
      "training_step_time": 0.3960561752319336
    },
    {
      "epoch": 0.000249346923828125,
      "model_forward_time": 0.1147165298461914,
      "step": 40853
    },
    {
      "epoch": 0.000249346923828125,
      "step": 40853,
      "training_step_time": 0.39679813385009766
    },
    {
      "epoch": 0.00024935302734375,
      "model_forward_time": 0.11513042449951172,
      "step": 40854
    },
    {
      "epoch": 0.00024935302734375,
      "step": 40854,
      "training_step_time": 0.39838314056396484
    },
    {
      "epoch": 0.000249359130859375,
      "model_forward_time": 0.11453509330749512,
      "step": 40855
    },
    {
      "epoch": 0.000249359130859375,
      "step": 40855,
      "training_step_time": 0.4395785331726074
    },
    {
      "epoch": 0.000249365234375,
      "model_forward_time": 0.11534976959228516,
      "step": 40856
    },
    {
      "epoch": 0.000249365234375,
      "step": 40856,
      "training_step_time": 0.44034481048583984
    },
    {
      "epoch": 0.000249371337890625,
      "model_forward_time": 0.11634969711303711,
      "step": 40857
    },
    {
      "epoch": 0.000249371337890625,
      "step": 40857,
      "training_step_time": 0.5294907093048096
    },
    {
      "epoch": 0.00024937744140625,
      "model_forward_time": 0.11535024642944336,
      "step": 40858
    },
    {
      "epoch": 0.00024937744140625,
      "step": 40858,
      "training_step_time": 0.44310617446899414
    },
    {
      "epoch": 0.000249383544921875,
      "model_forward_time": 0.11501359939575195,
      "step": 40859
    },
    {
      "epoch": 0.000249383544921875,
      "step": 40859,
      "training_step_time": 0.43831467628479004
    },
    {
      "epoch": 0.0002493896484375,
      "grad_norm": 0.11477302014827728,
      "learning_rate": 2.5334861861591753e-05,
      "loss": 0.037,
      "step": 40860
    },
    {
      "epoch": 0.0002493896484375,
      "model_forward_time": 0.11465740203857422,
      "step": 40860
    },
    {
      "epoch": 0.0002493896484375,
      "step": 40860,
      "training_step_time": 0.39377927780151367
    },
    {
      "epoch": 0.000249395751953125,
      "model_forward_time": 0.11460542678833008,
      "step": 40861
    },
    {
      "epoch": 0.000249395751953125,
      "step": 40861,
      "training_step_time": 0.3890364170074463
    },
    {
      "epoch": 0.00024940185546875,
      "model_forward_time": 0.11536669731140137,
      "step": 40862
    },
    {
      "epoch": 0.00024940185546875,
      "step": 40862,
      "training_step_time": 0.4344041347503662
    },
    {
      "epoch": 0.000249407958984375,
      "model_forward_time": 0.11512613296508789,
      "step": 40863
    },
    {
      "epoch": 0.000249407958984375,
      "step": 40863,
      "training_step_time": 0.555321455001831
    },
    {
      "epoch": 0.0002494140625,
      "model_forward_time": 0.11471033096313477,
      "step": 40864
    },
    {
      "epoch": 0.0002494140625,
      "step": 40864,
      "training_step_time": 0.3840665817260742
    },
    {
      "epoch": 0.000249420166015625,
      "model_forward_time": 0.11568021774291992,
      "step": 40865
    },
    {
      "epoch": 0.000249420166015625,
      "step": 40865,
      "training_step_time": 0.3932838439941406
    },
    {
      "epoch": 0.00024942626953125,
      "model_forward_time": 0.1154325008392334,
      "step": 40866
    },
    {
      "epoch": 0.00024942626953125,
      "step": 40866,
      "training_step_time": 0.401869535446167
    },
    {
      "epoch": 0.000249432373046875,
      "model_forward_time": 0.11583685874938965,
      "step": 40867
    },
    {
      "epoch": 0.000249432373046875,
      "step": 40867,
      "training_step_time": 0.3947782516479492
    },
    {
      "epoch": 0.0002494384765625,
      "model_forward_time": 0.11501169204711914,
      "step": 40868
    },
    {
      "epoch": 0.0002494384765625,
      "step": 40868,
      "training_step_time": 0.4059431552886963
    },
    {
      "epoch": 0.000249444580078125,
      "model_forward_time": 0.11481714248657227,
      "step": 40869
    },
    {
      "epoch": 0.000249444580078125,
      "step": 40869,
      "training_step_time": 0.6608269214630127
    },
    {
      "epoch": 0.00024945068359375,
      "grad_norm": 0.14786282181739807,
      "learning_rate": 2.5310894218775805e-05,
      "loss": 0.0403,
      "step": 40870
    },
    {
      "epoch": 0.00024945068359375,
      "model_forward_time": 0.11447596549987793,
      "step": 40870
    },
    {
      "epoch": 0.00024945068359375,
      "step": 40870,
      "training_step_time": 0.49187159538269043
    },
    {
      "epoch": 0.000249456787109375,
      "model_forward_time": 0.11456465721130371,
      "step": 40871
    },
    {
      "epoch": 0.000249456787109375,
      "step": 40871,
      "training_step_time": 0.4145827293395996
    },
    {
      "epoch": 0.000249462890625,
      "model_forward_time": 0.1151578426361084,
      "step": 40872
    },
    {
      "epoch": 0.000249462890625,
      "step": 40872,
      "training_step_time": 0.4902169704437256
    },
    {
      "epoch": 0.000249468994140625,
      "model_forward_time": 0.11462998390197754,
      "step": 40873
    },
    {
      "epoch": 0.000249468994140625,
      "step": 40873,
      "training_step_time": 0.4078664779663086
    },
    {
      "epoch": 0.00024947509765625,
      "model_forward_time": 0.11461591720581055,
      "step": 40874
    },
    {
      "epoch": 0.00024947509765625,
      "step": 40874,
      "training_step_time": 0.3912181854248047
    },
    {
      "epoch": 0.000249481201171875,
      "model_forward_time": 0.11455774307250977,
      "step": 40875
    },
    {
      "epoch": 0.000249481201171875,
      "step": 40875,
      "training_step_time": 0.4370696544647217
    },
    {
      "epoch": 0.0002494873046875,
      "model_forward_time": 0.11500954627990723,
      "step": 40876
    },
    {
      "epoch": 0.0002494873046875,
      "step": 40876,
      "training_step_time": 0.4058961868286133
    },
    {
      "epoch": 0.000249493408203125,
      "model_forward_time": 0.11492729187011719,
      "step": 40877
    },
    {
      "epoch": 0.000249493408203125,
      "step": 40877,
      "training_step_time": 0.3948557376861572
    },
    {
      "epoch": 0.00024949951171875,
      "model_forward_time": 0.11503911018371582,
      "step": 40878
    },
    {
      "epoch": 0.00024949951171875,
      "step": 40878,
      "training_step_time": 0.401231050491333
    },
    {
      "epoch": 0.000249505615234375,
      "model_forward_time": 0.11484456062316895,
      "step": 40879
    },
    {
      "epoch": 0.000249505615234375,
      "step": 40879,
      "training_step_time": 0.3966197967529297
    },
    {
      "epoch": 0.00024951171875,
      "grad_norm": 0.08584695309400558,
      "learning_rate": 2.528693407585832e-05,
      "loss": 0.0366,
      "step": 40880
    },
    {
      "epoch": 0.00024951171875,
      "model_forward_time": 0.1147768497467041,
      "step": 40880
    },
    {
      "epoch": 0.00024951171875,
      "step": 40880,
      "training_step_time": 0.39109158515930176
    },
    {
      "epoch": 0.000249517822265625,
      "model_forward_time": 0.11497926712036133,
      "step": 40881
    },
    {
      "epoch": 0.000249517822265625,
      "step": 40881,
      "training_step_time": 0.5169651508331299
    },
    {
      "epoch": 0.00024952392578125,
      "model_forward_time": 0.11594843864440918,
      "step": 40882
    },
    {
      "epoch": 0.00024952392578125,
      "step": 40882,
      "training_step_time": 0.3909189701080322
    },
    {
      "epoch": 0.000249530029296875,
      "model_forward_time": 0.11488556861877441,
      "step": 40883
    },
    {
      "epoch": 0.000249530029296875,
      "step": 40883,
      "training_step_time": 0.4008336067199707
    },
    {
      "epoch": 0.0002495361328125,
      "model_forward_time": 0.11479926109313965,
      "step": 40884
    },
    {
      "epoch": 0.0002495361328125,
      "step": 40884,
      "training_step_time": 0.4524397850036621
    },
    {
      "epoch": 0.000249542236328125,
      "model_forward_time": 0.11531209945678711,
      "step": 40885
    },
    {
      "epoch": 0.000249542236328125,
      "step": 40885,
      "training_step_time": 0.36899375915527344
    },
    {
      "epoch": 0.00024954833984375,
      "model_forward_time": 0.11624407768249512,
      "step": 40886
    },
    {
      "epoch": 0.00024954833984375,
      "step": 40886,
      "training_step_time": 0.47054386138916016
    },
    {
      "epoch": 0.000249554443359375,
      "model_forward_time": 0.11497855186462402,
      "step": 40887
    },
    {
      "epoch": 0.000249554443359375,
      "step": 40887,
      "training_step_time": 0.6339213848114014
    },
    {
      "epoch": 0.000249560546875,
      "model_forward_time": 0.11442708969116211,
      "step": 40888
    },
    {
      "epoch": 0.000249560546875,
      "step": 40888,
      "training_step_time": 0.437239408493042
    },
    {
      "epoch": 0.000249566650390625,
      "model_forward_time": 0.11514830589294434,
      "step": 40889
    },
    {
      "epoch": 0.000249566650390625,
      "step": 40889,
      "training_step_time": 0.4172635078430176
    },
    {
      "epoch": 0.00024957275390625,
      "grad_norm": 0.13356438279151917,
      "learning_rate": 2.526298144011775e-05,
      "loss": 0.0366,
      "step": 40890
    },
    {
      "epoch": 0.00024957275390625,
      "model_forward_time": 0.11438465118408203,
      "step": 40890
    },
    {
      "epoch": 0.00024957275390625,
      "step": 40890,
      "training_step_time": 0.3965933322906494
    },
    {
      "epoch": 0.000249578857421875,
      "model_forward_time": 0.11414122581481934,
      "step": 40891
    },
    {
      "epoch": 0.000249578857421875,
      "step": 40891,
      "training_step_time": 0.3921055793762207
    },
    {
      "epoch": 0.0002495849609375,
      "model_forward_time": 0.11416196823120117,
      "step": 40892
    },
    {
      "epoch": 0.0002495849609375,
      "step": 40892,
      "training_step_time": 0.3965311050415039
    },
    {
      "epoch": 0.000249591064453125,
      "model_forward_time": 0.11492061614990234,
      "step": 40893
    },
    {
      "epoch": 0.000249591064453125,
      "step": 40893,
      "training_step_time": 0.4577205181121826
    },
    {
      "epoch": 0.00024959716796875,
      "model_forward_time": 0.11466312408447266,
      "step": 40894
    },
    {
      "epoch": 0.00024959716796875,
      "step": 40894,
      "training_step_time": 0.397430419921875
    },
    {
      "epoch": 0.000249603271484375,
      "model_forward_time": 0.11468315124511719,
      "step": 40895
    },
    {
      "epoch": 0.000249603271484375,
      "step": 40895,
      "training_step_time": 0.40155816078186035
    },
    {
      "epoch": 0.000249609375,
      "model_forward_time": 0.1150977611541748,
      "step": 40896
    },
    {
      "epoch": 0.000249609375,
      "step": 40896,
      "training_step_time": 0.39313769340515137
    },
    {
      "epoch": 0.000249615478515625,
      "model_forward_time": 0.11506366729736328,
      "step": 40897
    },
    {
      "epoch": 0.000249615478515625,
      "step": 40897,
      "training_step_time": 0.40612196922302246
    },
    {
      "epoch": 0.00024962158203125,
      "model_forward_time": 0.11568331718444824,
      "step": 40898
    },
    {
      "epoch": 0.00024962158203125,
      "step": 40898,
      "training_step_time": 0.4827408790588379
    },
    {
      "epoch": 0.000249627685546875,
      "model_forward_time": 0.11591982841491699,
      "step": 40899
    },
    {
      "epoch": 0.000249627685546875,
      "step": 40899,
      "training_step_time": 0.5015747547149658
    },
    {
      "epoch": 0.0002496337890625,
      "grad_norm": 0.11110925674438477,
      "learning_rate": 2.5239036318830278e-05,
      "loss": 0.0373,
      "step": 40900
    },
    {
      "epoch": 0.0002496337890625,
      "model_forward_time": 0.11508369445800781,
      "step": 40900
    },
    {
      "epoch": 0.0002496337890625,
      "step": 40900,
      "training_step_time": 0.5075123310089111
    },
    {
      "epoch": 0.000249639892578125,
      "model_forward_time": 0.11501216888427734,
      "step": 40901
    },
    {
      "epoch": 0.000249639892578125,
      "step": 40901,
      "training_step_time": 0.4401540756225586
    },
    {
      "epoch": 0.00024964599609375,
      "model_forward_time": 0.11463594436645508,
      "step": 40902
    },
    {
      "epoch": 0.00024964599609375,
      "step": 40902,
      "training_step_time": 0.40593504905700684
    },
    {
      "epoch": 0.000249652099609375,
      "model_forward_time": 0.11443328857421875,
      "step": 40903
    },
    {
      "epoch": 0.000249652099609375,
      "step": 40903,
      "training_step_time": 0.39444756507873535
    },
    {
      "epoch": 0.000249658203125,
      "model_forward_time": 0.11516618728637695,
      "step": 40904
    },
    {
      "epoch": 0.000249658203125,
      "step": 40904,
      "training_step_time": 0.3996291160583496
    },
    {
      "epoch": 0.000249664306640625,
      "model_forward_time": 0.11486601829528809,
      "step": 40905
    },
    {
      "epoch": 0.000249664306640625,
      "step": 40905,
      "training_step_time": 0.4214437007904053
    },
    {
      "epoch": 0.00024967041015625,
      "model_forward_time": 0.1149442195892334,
      "step": 40906
    },
    {
      "epoch": 0.00024967041015625,
      "step": 40906,
      "training_step_time": 0.3889141082763672
    },
    {
      "epoch": 0.000249676513671875,
      "model_forward_time": 0.1151430606842041,
      "step": 40907
    },
    {
      "epoch": 0.000249676513671875,
      "step": 40907,
      "training_step_time": 0.3990139961242676
    },
    {
      "epoch": 0.0002496826171875,
      "model_forward_time": 0.11469388008117676,
      "step": 40908
    },
    {
      "epoch": 0.0002496826171875,
      "step": 40908,
      "training_step_time": 0.39813756942749023
    },
    {
      "epoch": 0.000249688720703125,
      "model_forward_time": 0.1156773567199707,
      "step": 40909
    },
    {
      "epoch": 0.000249688720703125,
      "step": 40909,
      "training_step_time": 0.4002869129180908
    },
    {
      "epoch": 0.00024969482421875,
      "grad_norm": 0.1362999528646469,
      "learning_rate": 2.521509871926979e-05,
      "loss": 0.0361,
      "step": 40910
    },
    {
      "epoch": 0.00024969482421875,
      "model_forward_time": 0.11506080627441406,
      "step": 40910
    },
    {
      "epoch": 0.00024969482421875,
      "step": 40910,
      "training_step_time": 0.40509819984436035
    },
    {
      "epoch": 0.000249700927734375,
      "model_forward_time": 0.11442112922668457,
      "step": 40911
    },
    {
      "epoch": 0.000249700927734375,
      "step": 40911,
      "training_step_time": 0.7483856678009033
    },
    {
      "epoch": 0.00024970703125,
      "model_forward_time": 0.11412858963012695,
      "step": 40912
    },
    {
      "epoch": 0.00024970703125,
      "step": 40912,
      "training_step_time": 0.46230554580688477
    },
    {
      "epoch": 0.000249713134765625,
      "model_forward_time": 0.11416196823120117,
      "step": 40913
    },
    {
      "epoch": 0.000249713134765625,
      "step": 40913,
      "training_step_time": 0.3627004623413086
    },
    {
      "epoch": 0.00024971923828125,
      "model_forward_time": 0.11430883407592773,
      "step": 40914
    },
    {
      "epoch": 0.00024971923828125,
      "step": 40914,
      "training_step_time": 0.4308154582977295
    },
    {
      "epoch": 0.000249725341796875,
      "model_forward_time": 0.11395001411437988,
      "step": 40915
    },
    {
      "epoch": 0.000249725341796875,
      "step": 40915,
      "training_step_time": 0.4288156032562256
    },
    {
      "epoch": 0.0002497314453125,
      "model_forward_time": 0.11432790756225586,
      "step": 40916
    },
    {
      "epoch": 0.0002497314453125,
      "step": 40916,
      "training_step_time": 0.38831233978271484
    },
    {
      "epoch": 0.000249737548828125,
      "model_forward_time": 0.11411118507385254,
      "step": 40917
    },
    {
      "epoch": 0.000249737548828125,
      "step": 40917,
      "training_step_time": 0.3878042697906494
    },
    {
      "epoch": 0.00024974365234375,
      "model_forward_time": 0.11466860771179199,
      "step": 40918
    },
    {
      "epoch": 0.00024974365234375,
      "step": 40918,
      "training_step_time": 0.38750433921813965
    },
    {
      "epoch": 0.000249749755859375,
      "model_forward_time": 0.11490964889526367,
      "step": 40919
    },
    {
      "epoch": 0.000249749755859375,
      "step": 40919,
      "training_step_time": 0.3961670398712158
    },
    {
      "epoch": 0.000249755859375,
      "grad_norm": 0.11926078796386719,
      "learning_rate": 2.5191168648707887e-05,
      "loss": 0.034,
      "step": 40920
    },
    {
      "epoch": 0.000249755859375,
      "model_forward_time": 0.11488628387451172,
      "step": 40920
    },
    {
      "epoch": 0.000249755859375,
      "step": 40920,
      "training_step_time": 0.40175461769104004
    },
    {
      "epoch": 0.000249761962890625,
      "model_forward_time": 0.11492776870727539,
      "step": 40921
    },
    {
      "epoch": 0.000249761962890625,
      "step": 40921,
      "training_step_time": 0.3962290287017822
    },
    {
      "epoch": 0.00024976806640625,
      "model_forward_time": 0.11530137062072754,
      "step": 40922
    },
    {
      "epoch": 0.00024976806640625,
      "step": 40922,
      "training_step_time": 0.3918163776397705
    },
    {
      "epoch": 0.000249774169921875,
      "model_forward_time": 0.11514019966125488,
      "step": 40923
    },
    {
      "epoch": 0.000249774169921875,
      "step": 40923,
      "training_step_time": 0.5471041202545166
    },
    {
      "epoch": 0.0002497802734375,
      "model_forward_time": 0.1150205135345459,
      "step": 40924
    },
    {
      "epoch": 0.0002497802734375,
      "step": 40924,
      "training_step_time": 0.4322526454925537
    },
    {
      "epoch": 0.000249786376953125,
      "model_forward_time": 0.11503767967224121,
      "step": 40925
    },
    {
      "epoch": 0.000249786376953125,
      "step": 40925,
      "training_step_time": 0.40763187408447266
    },
    {
      "epoch": 0.00024979248046875,
      "model_forward_time": 0.11479043960571289,
      "step": 40926
    },
    {
      "epoch": 0.00024979248046875,
      "step": 40926,
      "training_step_time": 0.44935178756713867
    },
    {
      "epoch": 0.000249798583984375,
      "model_forward_time": 0.11460566520690918,
      "step": 40927
    },
    {
      "epoch": 0.000249798583984375,
      "step": 40927,
      "training_step_time": 0.3916912078857422
    },
    {
      "epoch": 0.0002498046875,
      "model_forward_time": 0.11488771438598633,
      "step": 40928
    },
    {
      "epoch": 0.0002498046875,
      "step": 40928,
      "training_step_time": 0.43917346000671387
    },
    {
      "epoch": 0.000249810791015625,
      "model_forward_time": 0.11552047729492188,
      "step": 40929
    },
    {
      "epoch": 0.000249810791015625,
      "step": 40929,
      "training_step_time": 0.4776577949523926
    },
    {
      "epoch": 0.00024981689453125,
      "grad_norm": 0.1266193985939026,
      "learning_rate": 2.5167246114413956e-05,
      "loss": 0.0395,
      "step": 40930
    },
    {
      "epoch": 0.00024981689453125,
      "model_forward_time": 0.11455965042114258,
      "step": 40930
    },
    {
      "epoch": 0.00024981689453125,
      "step": 40930,
      "training_step_time": 0.45171475410461426
    },
    {
      "epoch": 0.000249822998046875,
      "model_forward_time": 0.11460614204406738,
      "step": 40931
    },
    {
      "epoch": 0.000249822998046875,
      "step": 40931,
      "training_step_time": 0.3924407958984375
    },
    {
      "epoch": 0.0002498291015625,
      "model_forward_time": 0.11597776412963867,
      "step": 40932
    },
    {
      "epoch": 0.0002498291015625,
      "step": 40932,
      "training_step_time": 0.39347219467163086
    },
    {
      "epoch": 0.000249835205078125,
      "model_forward_time": 0.11453986167907715,
      "step": 40933
    },
    {
      "epoch": 0.000249835205078125,
      "step": 40933,
      "training_step_time": 0.39664292335510254
    },
    {
      "epoch": 0.00024984130859375,
      "model_forward_time": 0.11479973793029785,
      "step": 40934
    },
    {
      "epoch": 0.00024984130859375,
      "step": 40934,
      "training_step_time": 0.396740198135376
    },
    {
      "epoch": 0.000249847412109375,
      "model_forward_time": 0.11485552787780762,
      "step": 40935
    },
    {
      "epoch": 0.000249847412109375,
      "step": 40935,
      "training_step_time": 0.46857523918151855
    },
    {
      "epoch": 0.000249853515625,
      "model_forward_time": 0.11479973793029785,
      "step": 40936
    },
    {
      "epoch": 0.000249853515625,
      "step": 40936,
      "training_step_time": 0.3957023620605469
    },
    {
      "epoch": 0.000249859619140625,
      "model_forward_time": 0.11496257781982422,
      "step": 40937
    },
    {
      "epoch": 0.000249859619140625,
      "step": 40937,
      "training_step_time": 0.39769697189331055
    },
    {
      "epoch": 0.00024986572265625,
      "model_forward_time": 0.11424136161804199,
      "step": 40938
    },
    {
      "epoch": 0.00024986572265625,
      "step": 40938,
      "training_step_time": 0.39660072326660156
    },
    {
      "epoch": 0.000249871826171875,
      "model_forward_time": 0.11477088928222656,
      "step": 40939
    },
    {
      "epoch": 0.000249871826171875,
      "step": 40939,
      "training_step_time": 0.3989834785461426
    },
    {
      "epoch": 0.0002498779296875,
      "grad_norm": 0.12153520435094833,
      "learning_rate": 2.5143331123654933e-05,
      "loss": 0.0347,
      "step": 40940
    },
    {
      "epoch": 0.0002498779296875,
      "model_forward_time": 0.11467647552490234,
      "step": 40940
    },
    {
      "epoch": 0.0002498779296875,
      "step": 40940,
      "training_step_time": 0.39873480796813965
    },
    {
      "epoch": 0.000249884033203125,
      "model_forward_time": 0.11519336700439453,
      "step": 40941
    },
    {
      "epoch": 0.000249884033203125,
      "step": 40941,
      "training_step_time": 0.7001776695251465
    },
    {
      "epoch": 0.00024989013671875,
      "model_forward_time": 0.11432504653930664,
      "step": 40942
    },
    {
      "epoch": 0.00024989013671875,
      "step": 40942,
      "training_step_time": 0.36516809463500977
    },
    {
      "epoch": 0.000249896240234375,
      "model_forward_time": 0.11421680450439453,
      "step": 40943
    },
    {
      "epoch": 0.000249896240234375,
      "step": 40943,
      "training_step_time": 0.4471313953399658
    },
    {
      "epoch": 0.00024990234375,
      "model_forward_time": 0.11414504051208496,
      "step": 40944
    },
    {
      "epoch": 0.00024990234375,
      "step": 40944,
      "training_step_time": 0.3991096019744873
    },
    {
      "epoch": 0.000249908447265625,
      "model_forward_time": 0.11460375785827637,
      "step": 40945
    },
    {
      "epoch": 0.000249908447265625,
      "step": 40945,
      "training_step_time": 0.38991856575012207
    },
    {
      "epoch": 0.00024991455078125,
      "model_forward_time": 0.11413264274597168,
      "step": 40946
    },
    {
      "epoch": 0.00024991455078125,
      "step": 40946,
      "training_step_time": 0.3871452808380127
    },
    {
      "epoch": 0.000249920654296875,
      "model_forward_time": 0.11438179016113281,
      "step": 40947
    },
    {
      "epoch": 0.000249920654296875,
      "step": 40947,
      "training_step_time": 0.49471211433410645
    },
    {
      "epoch": 0.0002499267578125,
      "model_forward_time": 0.11446881294250488,
      "step": 40948
    },
    {
      "epoch": 0.0002499267578125,
      "step": 40948,
      "training_step_time": 0.39237380027770996
    },
    {
      "epoch": 0.000249932861328125,
      "model_forward_time": 0.11473250389099121,
      "step": 40949
    },
    {
      "epoch": 0.000249932861328125,
      "step": 40949,
      "training_step_time": 0.39403533935546875
    },
    {
      "epoch": 0.00024993896484375,
      "grad_norm": 0.09731242805719376,
      "learning_rate": 2.511942368369566e-05,
      "loss": 0.0414,
      "step": 40950
    },
    {
      "epoch": 0.00024993896484375,
      "model_forward_time": 0.11508011817932129,
      "step": 40950
    },
    {
      "epoch": 0.00024993896484375,
      "step": 40950,
      "training_step_time": 0.4030776023864746
    },
    {
      "epoch": 0.000249945068359375,
      "model_forward_time": 0.11461710929870605,
      "step": 40951
    },
    {
      "epoch": 0.000249945068359375,
      "step": 40951,
      "training_step_time": 0.41321897506713867
    },
    {
      "epoch": 0.000249951171875,
      "model_forward_time": 0.11493563652038574,
      "step": 40952
    },
    {
      "epoch": 0.000249951171875,
      "step": 40952,
      "training_step_time": 0.3979227542877197
    },
    {
      "epoch": 0.000249957275390625,
      "model_forward_time": 0.11458992958068848,
      "step": 40953
    },
    {
      "epoch": 0.000249957275390625,
      "step": 40953,
      "training_step_time": 0.596454381942749
    },
    {
      "epoch": 0.00024996337890625,
      "model_forward_time": 0.1147160530090332,
      "step": 40954
    },
    {
      "epoch": 0.00024996337890625,
      "step": 40954,
      "training_step_time": 0.40638303756713867
    },
    {
      "epoch": 0.000249969482421875,
      "model_forward_time": 0.11457443237304688,
      "step": 40955
    },
    {
      "epoch": 0.000249969482421875,
      "step": 40955,
      "training_step_time": 0.44965100288391113
    },
    {
      "epoch": 0.0002499755859375,
      "model_forward_time": 0.1146554946899414,
      "step": 40956
    },
    {
      "epoch": 0.0002499755859375,
      "step": 40956,
      "training_step_time": 0.37013816833496094
    },
    {
      "epoch": 0.000249981689453125,
      "model_forward_time": 0.11472415924072266,
      "step": 40957
    },
    {
      "epoch": 0.000249981689453125,
      "step": 40957,
      "training_step_time": 0.4866602420806885
    },
    {
      "epoch": 0.00024998779296875,
      "model_forward_time": 0.11393404006958008,
      "step": 40958
    },
    {
      "epoch": 0.00024998779296875,
      "step": 40958,
      "training_step_time": 0.4066929817199707
    },
    {
      "epoch": 0.000249993896484375,
      "model_forward_time": 0.11498689651489258,
      "step": 40959
    },
    {
      "epoch": 0.000249993896484375,
      "step": 40959,
      "training_step_time": 0.3941819667816162
    },
    {
      "epoch": 0.00025,
      "grad_norm": 0.1025506854057312,
      "learning_rate": 2.5095523801798495e-05,
      "loss": 0.0406,
      "step": 40960
    },
    {
      "epoch": 0.00025,
      "model_forward_time": 0.11473941802978516,
      "step": 40960
    },
    {
      "epoch": 0.00025,
      "step": 40960,
      "training_step_time": 0.4129912853240967
    },
    {
      "epoch": 0.000250006103515625,
      "model_forward_time": 0.1146998405456543,
      "step": 40961
    },
    {
      "epoch": 0.000250006103515625,
      "step": 40961,
      "training_step_time": 0.38988447189331055
    },
    {
      "epoch": 0.00025001220703125,
      "model_forward_time": 0.114227294921875,
      "step": 40962
    },
    {
      "epoch": 0.00025001220703125,
      "step": 40962,
      "training_step_time": 0.39997029304504395
    },
    {
      "epoch": 0.000250018310546875,
      "model_forward_time": 0.11547350883483887,
      "step": 40963
    },
    {
      "epoch": 0.000250018310546875,
      "step": 40963,
      "training_step_time": 0.39008450508117676
    },
    {
      "epoch": 0.0002500244140625,
      "model_forward_time": 0.11534476280212402,
      "step": 40964
    },
    {
      "epoch": 0.0002500244140625,
      "step": 40964,
      "training_step_time": 0.39366960525512695
    },
    {
      "epoch": 0.000250030517578125,
      "model_forward_time": 0.1153707504272461,
      "step": 40965
    },
    {
      "epoch": 0.000250030517578125,
      "step": 40965,
      "training_step_time": 0.6541774272918701
    },
    {
      "epoch": 0.00025003662109375,
      "model_forward_time": 0.11518597602844238,
      "step": 40966
    },
    {
      "epoch": 0.00025003662109375,
      "step": 40966,
      "training_step_time": 0.39548683166503906
    },
    {
      "epoch": 0.000250042724609375,
      "model_forward_time": 0.11512589454650879,
      "step": 40967
    },
    {
      "epoch": 0.000250042724609375,
      "step": 40967,
      "training_step_time": 0.44219422340393066
    },
    {
      "epoch": 0.000250048828125,
      "model_forward_time": 0.1147611141204834,
      "step": 40968
    },
    {
      "epoch": 0.000250048828125,
      "step": 40968,
      "training_step_time": 0.4248495101928711
    },
    {
      "epoch": 0.000250054931640625,
      "model_forward_time": 0.11440896987915039,
      "step": 40969
    },
    {
      "epoch": 0.000250054931640625,
      "step": 40969,
      "training_step_time": 0.5046427249908447
    },
    {
      "epoch": 0.00025006103515625,
      "grad_norm": 0.15290702879428864,
      "learning_rate": 2.5071631485223658e-05,
      "loss": 0.0369,
      "step": 40970
    },
    {
      "epoch": 0.00025006103515625,
      "model_forward_time": 0.11453127861022949,
      "step": 40970
    },
    {
      "epoch": 0.00025006103515625,
      "step": 40970,
      "training_step_time": 0.3676259517669678
    },
    {
      "epoch": 0.000250067138671875,
      "model_forward_time": 0.11490750312805176,
      "step": 40971
    },
    {
      "epoch": 0.000250067138671875,
      "step": 40971,
      "training_step_time": 0.48661279678344727
    },
    {
      "epoch": 0.0002500732421875,
      "model_forward_time": 0.11458444595336914,
      "step": 40972
    },
    {
      "epoch": 0.0002500732421875,
      "step": 40972,
      "training_step_time": 0.47269177436828613
    },
    {
      "epoch": 0.000250079345703125,
      "model_forward_time": 0.11449313163757324,
      "step": 40973
    },
    {
      "epoch": 0.000250079345703125,
      "step": 40973,
      "training_step_time": 0.39320945739746094
    },
    {
      "epoch": 0.00025008544921875,
      "model_forward_time": 0.11452817916870117,
      "step": 40974
    },
    {
      "epoch": 0.00025008544921875,
      "step": 40974,
      "training_step_time": 0.3935060501098633
    },
    {
      "epoch": 0.000250091552734375,
      "model_forward_time": 0.11473608016967773,
      "step": 40975
    },
    {
      "epoch": 0.000250091552734375,
      "step": 40975,
      "training_step_time": 0.39310669898986816
    },
    {
      "epoch": 0.00025009765625,
      "model_forward_time": 0.1151893138885498,
      "step": 40976
    },
    {
      "epoch": 0.00025009765625,
      "step": 40976,
      "training_step_time": 0.39103126525878906
    },
    {
      "epoch": 0.000250103759765625,
      "model_forward_time": 0.11487412452697754,
      "step": 40977
    },
    {
      "epoch": 0.000250103759765625,
      "step": 40977,
      "training_step_time": 0.4005293846130371
    },
    {
      "epoch": 0.00025010986328125,
      "model_forward_time": 0.11525154113769531,
      "step": 40978
    },
    {
      "epoch": 0.00025010986328125,
      "step": 40978,
      "training_step_time": 0.4045145511627197
    },
    {
      "epoch": 0.000250115966796875,
      "model_forward_time": 0.1150505542755127,
      "step": 40979
    },
    {
      "epoch": 0.000250115966796875,
      "step": 40979,
      "training_step_time": 0.4041574001312256
    },
    {
      "epoch": 0.0002501220703125,
      "grad_norm": 0.135171040892601,
      "learning_rate": 2.5047746741228978e-05,
      "loss": 0.0394,
      "step": 40980
    },
    {
      "epoch": 0.0002501220703125,
      "model_forward_time": 0.11464548110961914,
      "step": 40980
    },
    {
      "epoch": 0.0002501220703125,
      "step": 40980,
      "training_step_time": 0.3934361934661865
    },
    {
      "epoch": 0.000250128173828125,
      "model_forward_time": 0.11521053314208984,
      "step": 40981
    },
    {
      "epoch": 0.000250128173828125,
      "step": 40981,
      "training_step_time": 0.42118167877197266
    },
    {
      "epoch": 0.00025013427734375,
      "model_forward_time": 0.11479330062866211,
      "step": 40982
    },
    {
      "epoch": 0.00025013427734375,
      "step": 40982,
      "training_step_time": 0.44554829597473145
    },
    {
      "epoch": 0.000250140380859375,
      "model_forward_time": 0.1154320240020752,
      "step": 40983
    },
    {
      "epoch": 0.000250140380859375,
      "step": 40983,
      "training_step_time": 0.5552623271942139
    },
    {
      "epoch": 0.000250146484375,
      "model_forward_time": 0.11451554298400879,
      "step": 40984
    },
    {
      "epoch": 0.000250146484375,
      "step": 40984,
      "training_step_time": 0.4036746025085449
    },
    {
      "epoch": 0.000250152587890625,
      "model_forward_time": 0.11498475074768066,
      "step": 40985
    },
    {
      "epoch": 0.000250152587890625,
      "step": 40985,
      "training_step_time": 0.5122077465057373
    },
    {
      "epoch": 0.00025015869140625,
      "model_forward_time": 0.1148374080657959,
      "step": 40986
    },
    {
      "epoch": 0.00025015869140625,
      "step": 40986,
      "training_step_time": 0.4475071430206299
    },
    {
      "epoch": 0.000250164794921875,
      "model_forward_time": 0.11450934410095215,
      "step": 40987
    },
    {
      "epoch": 0.000250164794921875,
      "step": 40987,
      "training_step_time": 0.392505407333374
    },
    {
      "epoch": 0.0002501708984375,
      "model_forward_time": 0.11519145965576172,
      "step": 40988
    },
    {
      "epoch": 0.0002501708984375,
      "step": 40988,
      "training_step_time": 0.3912162780761719
    },
    {
      "epoch": 0.000250177001953125,
      "model_forward_time": 0.1150205135345459,
      "step": 40989
    },
    {
      "epoch": 0.000250177001953125,
      "step": 40989,
      "training_step_time": 0.4284830093383789
    },
    {
      "epoch": 0.00025018310546875,
      "grad_norm": 0.125722274184227,
      "learning_rate": 2.5023869577070013e-05,
      "loss": 0.034,
      "step": 40990
    },
    {
      "epoch": 0.00025018310546875,
      "model_forward_time": 0.11507368087768555,
      "step": 40990
    },
    {
      "epoch": 0.00025018310546875,
      "step": 40990,
      "training_step_time": 0.3940606117248535
    },
    {
      "epoch": 0.000250189208984375,
      "model_forward_time": 0.11459684371948242,
      "step": 40991
    },
    {
      "epoch": 0.000250189208984375,
      "step": 40991,
      "training_step_time": 0.3836641311645508
    },
    {
      "epoch": 0.0002501953125,
      "model_forward_time": 0.1147770881652832,
      "step": 40992
    },
    {
      "epoch": 0.0002501953125,
      "step": 40992,
      "training_step_time": 0.3911435604095459
    },
    {
      "epoch": 0.000250201416015625,
      "model_forward_time": 0.11545062065124512,
      "step": 40993
    },
    {
      "epoch": 0.000250201416015625,
      "step": 40993,
      "training_step_time": 0.39412546157836914
    },
    {
      "epoch": 0.00025020751953125,
      "model_forward_time": 0.11508011817932129,
      "step": 40994
    },
    {
      "epoch": 0.00025020751953125,
      "step": 40994,
      "training_step_time": 0.3976435661315918
    },
    {
      "epoch": 0.000250213623046875,
      "model_forward_time": 0.11520695686340332,
      "step": 40995
    },
    {
      "epoch": 0.000250213623046875,
      "step": 40995,
      "training_step_time": 0.6757457256317139
    },
    {
      "epoch": 0.0002502197265625,
      "model_forward_time": 0.11663579940795898,
      "step": 40996
    },
    {
      "epoch": 0.0002502197265625,
      "step": 40996,
      "training_step_time": 0.44248175621032715
    },
    {
      "epoch": 0.000250225830078125,
      "model_forward_time": 0.11535024642944336,
      "step": 40997
    },
    {
      "epoch": 0.000250225830078125,
      "step": 40997,
      "training_step_time": 0.49228882789611816
    },
    {
      "epoch": 0.00025023193359375,
      "model_forward_time": 0.11440014839172363,
      "step": 40998
    },
    {
      "epoch": 0.00025023193359375,
      "step": 40998,
      "training_step_time": 0.36433935165405273
    },
    {
      "epoch": 0.000250238037109375,
      "model_forward_time": 0.1152031421661377,
      "step": 40999
    },
    {
      "epoch": 0.000250238037109375,
      "step": 40999,
      "training_step_time": 0.4639558792114258
    },
    {
      "epoch": 0.000250244140625,
      "grad_norm": 0.08864835649728775,
      "learning_rate": 2.500000000000001e-05,
      "loss": 0.0339,
      "step": 41000
    },
    {
      "epoch": 0.000250244140625,
      "model_forward_time": 0.11421918869018555,
      "step": 41000
    },
    {
      "epoch": 0.000250244140625,
      "step": 41000,
      "training_step_time": 0.3579561710357666
    },
    {
      "epoch": 0.000250250244140625,
      "model_forward_time": 0.1128854751586914,
      "step": 41001
    },
    {
      "epoch": 0.000250250244140625,
      "step": 41001,
      "training_step_time": 0.41782426834106445
    },
    {
      "epoch": 0.00025025634765625,
      "model_forward_time": 0.11342263221740723,
      "step": 41002
    },
    {
      "epoch": 0.00025025634765625,
      "step": 41002,
      "training_step_time": 0.38100361824035645
    },
    {
      "epoch": 0.000250262451171875,
      "model_forward_time": 0.11338090896606445,
      "step": 41003
    },
    {
      "epoch": 0.000250262451171875,
      "step": 41003,
      "training_step_time": 0.3892810344696045
    },
    {
      "epoch": 0.0002502685546875,
      "model_forward_time": 0.11406064033508301,
      "step": 41004
    },
    {
      "epoch": 0.0002502685546875,
      "step": 41004,
      "training_step_time": 0.3888864517211914
    },
    {
      "epoch": 0.000250274658203125,
      "model_forward_time": 0.11505651473999023,
      "step": 41005
    },
    {
      "epoch": 0.000250274658203125,
      "step": 41005,
      "training_step_time": 0.3866910934448242
    },
    {
      "epoch": 0.00025028076171875,
      "model_forward_time": 0.11530518531799316,
      "step": 41006
    },
    {
      "epoch": 0.00025028076171875,
      "step": 41006,
      "training_step_time": 0.4048147201538086
    },
    {
      "epoch": 0.000250286865234375,
      "model_forward_time": 0.11473989486694336,
      "step": 41007
    },
    {
      "epoch": 0.000250286865234375,
      "step": 41007,
      "training_step_time": 0.4001467227935791
    },
    {
      "epoch": 0.00025029296875,
      "model_forward_time": 0.11491847038269043,
      "step": 41008
    },
    {
      "epoch": 0.00025029296875,
      "step": 41008,
      "training_step_time": 0.4419105052947998
    },
    {
      "epoch": 0.000250299072265625,
      "model_forward_time": 0.11514520645141602,
      "step": 41009
    },
    {
      "epoch": 0.000250299072265625,
      "step": 41009,
      "training_step_time": 0.4248838424682617
    },
    {
      "epoch": 0.00025030517578125,
      "grad_norm": 0.11182147264480591,
      "learning_rate": 2.4976138017269908e-05,
      "loss": 0.0364,
      "step": 41010
    },
    {
      "epoch": 0.00025030517578125,
      "model_forward_time": 0.1146700382232666,
      "step": 41010
    },
    {
      "epoch": 0.00025030517578125,
      "step": 41010,
      "training_step_time": 0.3939700126647949
    },
    {
      "epoch": 0.000250311279296875,
      "model_forward_time": 0.11484670639038086,
      "step": 41011
    },
    {
      "epoch": 0.000250311279296875,
      "step": 41011,
      "training_step_time": 0.40028953552246094
    },
    {
      "epoch": 0.0002503173828125,
      "model_forward_time": 0.11533355712890625,
      "step": 41012
    },
    {
      "epoch": 0.0002503173828125,
      "step": 41012,
      "training_step_time": 0.4010002613067627
    },
    {
      "epoch": 0.000250323486328125,
      "model_forward_time": 0.11535024642944336,
      "step": 41013
    },
    {
      "epoch": 0.000250323486328125,
      "step": 41013,
      "training_step_time": 0.48119306564331055
    },
    {
      "epoch": 0.00025032958984375,
      "model_forward_time": 0.1151115894317627,
      "step": 41014
    },
    {
      "epoch": 0.00025032958984375,
      "step": 41014,
      "training_step_time": 0.38261914253234863
    },
    {
      "epoch": 0.000250335693359375,
      "model_forward_time": 0.11507892608642578,
      "step": 41015
    },
    {
      "epoch": 0.000250335693359375,
      "step": 41015,
      "training_step_time": 0.4891848564147949
    },
    {
      "epoch": 0.000250341796875,
      "model_forward_time": 0.11490392684936523,
      "step": 41016
    },
    {
      "epoch": 0.000250341796875,
      "step": 41016,
      "training_step_time": 0.49538660049438477
    },
    {
      "epoch": 0.000250347900390625,
      "model_forward_time": 0.11445808410644531,
      "step": 41017
    },
    {
      "epoch": 0.000250347900390625,
      "step": 41017,
      "training_step_time": 0.40079283714294434
    },
    {
      "epoch": 0.00025035400390625,
      "model_forward_time": 0.11588048934936523,
      "step": 41018
    },
    {
      "epoch": 0.00025035400390625,
      "step": 41018,
      "training_step_time": 0.39291882514953613
    },
    {
      "epoch": 0.000250360107421875,
      "model_forward_time": 0.11491107940673828,
      "step": 41019
    },
    {
      "epoch": 0.000250360107421875,
      "step": 41019,
      "training_step_time": 0.3926389217376709
    },
    {
      "epoch": 0.0002503662109375,
      "grad_norm": 0.10149730741977692,
      "learning_rate": 2.4952283636128372e-05,
      "loss": 0.0379,
      "step": 41020
    },
    {
      "epoch": 0.0002503662109375,
      "model_forward_time": 0.11505579948425293,
      "step": 41020
    },
    {
      "epoch": 0.0002503662109375,
      "step": 41020,
      "training_step_time": 0.39861011505126953
    },
    {
      "epoch": 0.000250372314453125,
      "model_forward_time": 0.11518621444702148,
      "step": 41021
    },
    {
      "epoch": 0.000250372314453125,
      "step": 41021,
      "training_step_time": 0.392974853515625
    },
    {
      "epoch": 0.00025037841796875,
      "model_forward_time": 0.11475896835327148,
      "step": 41022
    },
    {
      "epoch": 0.00025037841796875,
      "step": 41022,
      "training_step_time": 0.390134334564209
    },
    {
      "epoch": 0.000250384521484375,
      "model_forward_time": 0.11553406715393066,
      "step": 41023
    },
    {
      "epoch": 0.000250384521484375,
      "step": 41023,
      "training_step_time": 0.4210355281829834
    },
    {
      "epoch": 0.000250390625,
      "model_forward_time": 0.11549735069274902,
      "step": 41024
    },
    {
      "epoch": 0.000250390625,
      "step": 41024,
      "training_step_time": 0.3947765827178955
    },
    {
      "epoch": 0.000250396728515625,
      "model_forward_time": 0.11541223526000977,
      "step": 41025
    },
    {
      "epoch": 0.000250396728515625,
      "step": 41025,
      "training_step_time": 0.3913092613220215
    },
    {
      "epoch": 0.00025040283203125,
      "model_forward_time": 0.1149744987487793,
      "step": 41026
    },
    {
      "epoch": 0.00025040283203125,
      "step": 41026,
      "training_step_time": 0.4161715507507324
    },
    {
      "epoch": 0.000250408935546875,
      "model_forward_time": 0.11757254600524902,
      "step": 41027
    },
    {
      "epoch": 0.000250408935546875,
      "step": 41027,
      "training_step_time": 0.5068490505218506
    },
    {
      "epoch": 0.0002504150390625,
      "model_forward_time": 0.11554145812988281,
      "step": 41028
    },
    {
      "epoch": 0.0002504150390625,
      "step": 41028,
      "training_step_time": 0.4416942596435547
    },
    {
      "epoch": 0.000250421142578125,
      "model_forward_time": 0.11508631706237793,
      "step": 41029
    },
    {
      "epoch": 0.000250421142578125,
      "step": 41029,
      "training_step_time": 0.47023606300354004
    },
    {
      "epoch": 0.00025042724609375,
      "grad_norm": 0.138932466506958,
      "learning_rate": 2.4928436863821725e-05,
      "loss": 0.0372,
      "step": 41030
    },
    {
      "epoch": 0.00025042724609375,
      "model_forward_time": 0.11540699005126953,
      "step": 41030
    },
    {
      "epoch": 0.00025042724609375,
      "step": 41030,
      "training_step_time": 0.4341428279876709
    },
    {
      "epoch": 0.000250433349609375,
      "model_forward_time": 0.11511921882629395,
      "step": 41031
    },
    {
      "epoch": 0.000250433349609375,
      "step": 41031,
      "training_step_time": 0.41114068031311035
    },
    {
      "epoch": 0.000250439453125,
      "model_forward_time": 0.11524200439453125,
      "step": 41032
    },
    {
      "epoch": 0.000250439453125,
      "step": 41032,
      "training_step_time": 0.387908935546875
    },
    {
      "epoch": 0.000250445556640625,
      "model_forward_time": 0.11494231224060059,
      "step": 41033
    },
    {
      "epoch": 0.000250445556640625,
      "step": 41033,
      "training_step_time": 0.38225698471069336
    },
    {
      "epoch": 0.00025045166015625,
      "model_forward_time": 0.11474800109863281,
      "step": 41034
    },
    {
      "epoch": 0.00025045166015625,
      "step": 41034,
      "training_step_time": 0.3933415412902832
    },
    {
      "epoch": 0.000250457763671875,
      "model_forward_time": 0.11565637588500977,
      "step": 41035
    },
    {
      "epoch": 0.000250457763671875,
      "step": 41035,
      "training_step_time": 0.38985538482666016
    },
    {
      "epoch": 0.0002504638671875,
      "model_forward_time": 0.11484360694885254,
      "step": 41036
    },
    {
      "epoch": 0.0002504638671875,
      "step": 41036,
      "training_step_time": 0.4717733860015869
    },
    {
      "epoch": 0.000250469970703125,
      "model_forward_time": 0.1148977279663086,
      "step": 41037
    },
    {
      "epoch": 0.000250469970703125,
      "step": 41037,
      "training_step_time": 0.3874492645263672
    },
    {
      "epoch": 0.00025047607421875,
      "model_forward_time": 0.11513233184814453,
      "step": 41038
    },
    {
      "epoch": 0.00025047607421875,
      "step": 41038,
      "training_step_time": 0.394726037979126
    },
    {
      "epoch": 0.000250482177734375,
      "model_forward_time": 0.11480188369750977,
      "step": 41039
    },
    {
      "epoch": 0.000250482177734375,
      "step": 41039,
      "training_step_time": 0.39631223678588867
    },
    {
      "epoch": 0.00025048828125,
      "grad_norm": 0.09501217305660248,
      "learning_rate": 2.490459770759398e-05,
      "loss": 0.0323,
      "step": 41040
    },
    {
      "epoch": 0.00025048828125,
      "model_forward_time": 0.11479973793029785,
      "step": 41040
    },
    {
      "epoch": 0.00025048828125,
      "step": 41040,
      "training_step_time": 0.48250627517700195
    },
    {
      "epoch": 0.000250494384765625,
      "model_forward_time": 0.11470460891723633,
      "step": 41041
    },
    {
      "epoch": 0.000250494384765625,
      "step": 41041,
      "training_step_time": 0.4080195426940918
    },
    {
      "epoch": 0.00025050048828125,
      "model_forward_time": 0.11512541770935059,
      "step": 41042
    },
    {
      "epoch": 0.00025050048828125,
      "step": 41042,
      "training_step_time": 0.4585611820220947
    },
    {
      "epoch": 0.000250506591796875,
      "model_forward_time": 0.1154022216796875,
      "step": 41043
    },
    {
      "epoch": 0.000250506591796875,
      "step": 41043,
      "training_step_time": 0.3651301860809326
    },
    {
      "epoch": 0.0002505126953125,
      "model_forward_time": 0.11513471603393555,
      "step": 41044
    },
    {
      "epoch": 0.0002505126953125,
      "step": 41044,
      "training_step_time": 0.47609615325927734
    },
    {
      "epoch": 0.000250518798828125,
      "model_forward_time": 0.11626124382019043,
      "step": 41045
    },
    {
      "epoch": 0.000250518798828125,
      "step": 41045,
      "training_step_time": 0.48050880432128906
    },
    {
      "epoch": 0.00025052490234375,
      "model_forward_time": 0.11494278907775879,
      "step": 41046
    },
    {
      "epoch": 0.00025052490234375,
      "step": 41046,
      "training_step_time": 0.3863224983215332
    },
    {
      "epoch": 0.000250531005859375,
      "model_forward_time": 0.11520504951477051,
      "step": 41047
    },
    {
      "epoch": 0.000250531005859375,
      "step": 41047,
      "training_step_time": 0.3878288269042969
    },
    {
      "epoch": 0.000250537109375,
      "model_forward_time": 0.11484527587890625,
      "step": 41048
    },
    {
      "epoch": 0.000250537109375,
      "step": 41048,
      "training_step_time": 0.3883543014526367
    },
    {
      "epoch": 0.000250543212890625,
      "model_forward_time": 0.11443352699279785,
      "step": 41049
    },
    {
      "epoch": 0.000250543212890625,
      "step": 41049,
      "training_step_time": 0.4188497066497803
    },
    {
      "epoch": 0.00025054931640625,
      "grad_norm": 0.16279533505439758,
      "learning_rate": 2.4880766174686848e-05,
      "loss": 0.0372,
      "step": 41050
    },
    {
      "epoch": 0.00025054931640625,
      "model_forward_time": 0.11602520942687988,
      "step": 41050
    },
    {
      "epoch": 0.00025054931640625,
      "step": 41050,
      "training_step_time": 0.4216775894165039
    },
    {
      "epoch": 0.000250555419921875,
      "model_forward_time": 0.11552047729492188,
      "step": 41051
    },
    {
      "epoch": 0.000250555419921875,
      "step": 41051,
      "training_step_time": 0.3812887668609619
    },
    {
      "epoch": 0.0002505615234375,
      "model_forward_time": 0.11531376838684082,
      "step": 41052
    },
    {
      "epoch": 0.0002505615234375,
      "step": 41052,
      "training_step_time": 0.3879544734954834
    },
    {
      "epoch": 0.000250567626953125,
      "model_forward_time": 0.11492753028869629,
      "step": 41053
    },
    {
      "epoch": 0.000250567626953125,
      "step": 41053,
      "training_step_time": 0.39228296279907227
    },
    {
      "epoch": 0.00025057373046875,
      "model_forward_time": 0.11523723602294922,
      "step": 41054
    },
    {
      "epoch": 0.00025057373046875,
      "step": 41054,
      "training_step_time": 0.3934142589569092
    },
    {
      "epoch": 0.000250579833984375,
      "model_forward_time": 0.11496543884277344,
      "step": 41055
    },
    {
      "epoch": 0.000250579833984375,
      "step": 41055,
      "training_step_time": 0.4642784595489502
    },
    {
      "epoch": 0.0002505859375,
      "model_forward_time": 0.11666679382324219,
      "step": 41056
    },
    {
      "epoch": 0.0002505859375,
      "step": 41056,
      "training_step_time": 0.4245455265045166
    },
    {
      "epoch": 0.000250592041015625,
      "model_forward_time": 0.1150667667388916,
      "step": 41057
    },
    {
      "epoch": 0.000250592041015625,
      "step": 41057,
      "training_step_time": 0.4445157051086426
    },
    {
      "epoch": 0.00025059814453125,
      "model_forward_time": 0.11540484428405762,
      "step": 41058
    },
    {
      "epoch": 0.00025059814453125,
      "step": 41058,
      "training_step_time": 0.4110541343688965
    },
    {
      "epoch": 0.000250604248046875,
      "model_forward_time": 0.11525440216064453,
      "step": 41059
    },
    {
      "epoch": 0.000250604248046875,
      "step": 41059,
      "training_step_time": 0.4971640110015869
    },
    {
      "epoch": 0.0002506103515625,
      "grad_norm": 0.15081126987934113,
      "learning_rate": 2.485694227233971e-05,
      "loss": 0.0365,
      "step": 41060
    },
    {
      "epoch": 0.0002506103515625,
      "model_forward_time": 0.11591887474060059,
      "step": 41060
    },
    {
      "epoch": 0.0002506103515625,
      "step": 41060,
      "training_step_time": 0.41411805152893066
    },
    {
      "epoch": 0.000250616455078125,
      "model_forward_time": 0.11517596244812012,
      "step": 41061
    },
    {
      "epoch": 0.000250616455078125,
      "step": 41061,
      "training_step_time": 0.4020380973815918
    },
    {
      "epoch": 0.00025062255859375,
      "model_forward_time": 0.11452054977416992,
      "step": 41062
    },
    {
      "epoch": 0.00025062255859375,
      "step": 41062,
      "training_step_time": 0.3843057155609131
    },
    {
      "epoch": 0.000250628662109375,
      "model_forward_time": 0.11517953872680664,
      "step": 41063
    },
    {
      "epoch": 0.000250628662109375,
      "step": 41063,
      "training_step_time": 0.4430410861968994
    },
    {
      "epoch": 0.000250634765625,
      "model_forward_time": 0.11487245559692383,
      "step": 41064
    },
    {
      "epoch": 0.000250634765625,
      "step": 41064,
      "training_step_time": 0.4003171920776367
    },
    {
      "epoch": 0.000250640869140625,
      "model_forward_time": 0.11486387252807617,
      "step": 41065
    },
    {
      "epoch": 0.000250640869140625,
      "step": 41065,
      "training_step_time": 0.3976883888244629
    },
    {
      "epoch": 0.00025064697265625,
      "model_forward_time": 0.11507654190063477,
      "step": 41066
    },
    {
      "epoch": 0.00025064697265625,
      "step": 41066,
      "training_step_time": 0.3968980312347412
    },
    {
      "epoch": 0.000250653076171875,
      "model_forward_time": 0.11519336700439453,
      "step": 41067
    },
    {
      "epoch": 0.000250653076171875,
      "step": 41067,
      "training_step_time": 0.4107680320739746
    },
    {
      "epoch": 0.0002506591796875,
      "model_forward_time": 0.11479425430297852,
      "step": 41068
    },
    {
      "epoch": 0.0002506591796875,
      "step": 41068,
      "training_step_time": 0.39674973487854004
    },
    {
      "epoch": 0.000250665283203125,
      "model_forward_time": 0.11520862579345703,
      "step": 41069
    },
    {
      "epoch": 0.000250665283203125,
      "step": 41069,
      "training_step_time": 0.39464330673217773
    },
    {
      "epoch": 0.00025067138671875,
      "grad_norm": 0.12549734115600586,
      "learning_rate": 2.4833126007789653e-05,
      "loss": 0.0381,
      "step": 41070
    },
    {
      "epoch": 0.00025067138671875,
      "model_forward_time": 0.11564803123474121,
      "step": 41070
    },
    {
      "epoch": 0.00025067138671875,
      "step": 41070,
      "training_step_time": 0.47025060653686523
    },
    {
      "epoch": 0.000250677490234375,
      "model_forward_time": 0.11536240577697754,
      "step": 41071
    },
    {
      "epoch": 0.000250677490234375,
      "step": 41071,
      "training_step_time": 0.4163248538970947
    },
    {
      "epoch": 0.00025068359375,
      "model_forward_time": 0.11510634422302246,
      "step": 41072
    },
    {
      "epoch": 0.00025068359375,
      "step": 41072,
      "training_step_time": 0.4521446228027344
    },
    {
      "epoch": 0.000250689697265625,
      "model_forward_time": 0.11509323120117188,
      "step": 41073
    },
    {
      "epoch": 0.000250689697265625,
      "step": 41073,
      "training_step_time": 0.510744571685791
    },
    {
      "epoch": 0.00025069580078125,
      "model_forward_time": 0.11509490013122559,
      "step": 41074
    },
    {
      "epoch": 0.00025069580078125,
      "step": 41074,
      "training_step_time": 0.4582374095916748
    },
    {
      "epoch": 0.000250701904296875,
      "model_forward_time": 0.11464643478393555,
      "step": 41075
    },
    {
      "epoch": 0.000250701904296875,
      "step": 41075,
      "training_step_time": 0.3993062973022461
    },
    {
      "epoch": 0.0002507080078125,
      "model_forward_time": 0.11484003067016602,
      "step": 41076
    },
    {
      "epoch": 0.0002507080078125,
      "step": 41076,
      "training_step_time": 0.4001948833465576
    },
    {
      "epoch": 0.000250714111328125,
      "model_forward_time": 0.11482763290405273,
      "step": 41077
    },
    {
      "epoch": 0.000250714111328125,
      "step": 41077,
      "training_step_time": 0.42578911781311035
    },
    {
      "epoch": 0.00025072021484375,
      "model_forward_time": 0.11481666564941406,
      "step": 41078
    },
    {
      "epoch": 0.00025072021484375,
      "step": 41078,
      "training_step_time": 0.390338659286499
    },
    {
      "epoch": 0.000250726318359375,
      "model_forward_time": 0.11562585830688477,
      "step": 41079
    },
    {
      "epoch": 0.000250726318359375,
      "step": 41079,
      "training_step_time": 0.4338521957397461
    },
    {
      "epoch": 0.000250732421875,
      "grad_norm": 0.1223817840218544,
      "learning_rate": 2.4809317388271426e-05,
      "loss": 0.0379,
      "step": 41080
    },
    {
      "epoch": 0.000250732421875,
      "model_forward_time": 0.1167747974395752,
      "step": 41080
    },
    {
      "epoch": 0.000250732421875,
      "step": 41080,
      "training_step_time": 0.40094876289367676
    },
    {
      "epoch": 0.000250738525390625,
      "model_forward_time": 0.11530089378356934,
      "step": 41081
    },
    {
      "epoch": 0.000250738525390625,
      "step": 41081,
      "training_step_time": 0.3880465030670166
    },
    {
      "epoch": 0.00025074462890625,
      "model_forward_time": 0.11488461494445801,
      "step": 41082
    },
    {
      "epoch": 0.00025074462890625,
      "step": 41082,
      "training_step_time": 0.3969407081604004
    },
    {
      "epoch": 0.000250750732421875,
      "model_forward_time": 0.1143486499786377,
      "step": 41083
    },
    {
      "epoch": 0.000250750732421875,
      "step": 41083,
      "training_step_time": 0.39011478424072266
    },
    {
      "epoch": 0.0002507568359375,
      "model_forward_time": 0.11542987823486328,
      "step": 41084
    },
    {
      "epoch": 0.0002507568359375,
      "step": 41084,
      "training_step_time": 0.49663305282592773
    },
    {
      "epoch": 0.000250762939453125,
      "model_forward_time": 0.11480116844177246,
      "step": 41085
    },
    {
      "epoch": 0.000250762939453125,
      "step": 41085,
      "training_step_time": 0.5168347358703613
    },
    {
      "epoch": 0.00025076904296875,
      "model_forward_time": 0.1148824691772461,
      "step": 41086
    },
    {
      "epoch": 0.00025076904296875,
      "step": 41086,
      "training_step_time": 0.49454641342163086
    },
    {
      "epoch": 0.000250775146484375,
      "model_forward_time": 0.11539077758789062,
      "step": 41087
    },
    {
      "epoch": 0.000250775146484375,
      "step": 41087,
      "training_step_time": 0.45641255378723145
    },
    {
      "epoch": 0.00025078125,
      "model_forward_time": 0.11430215835571289,
      "step": 41088
    },
    {
      "epoch": 0.00025078125,
      "step": 41088,
      "training_step_time": 0.4857199192047119
    },
    {
      "epoch": 0.000250787353515625,
      "model_forward_time": 0.11425018310546875,
      "step": 41089
    },
    {
      "epoch": 0.000250787353515625,
      "step": 41089,
      "training_step_time": 0.3892092704772949
    },
    {
      "epoch": 0.00025079345703125,
      "grad_norm": 0.11619830876588821,
      "learning_rate": 2.478551642101743e-05,
      "loss": 0.0335,
      "step": 41090
    },
    {
      "epoch": 0.00025079345703125,
      "model_forward_time": 0.11424612998962402,
      "step": 41090
    },
    {
      "epoch": 0.00025079345703125,
      "step": 41090,
      "training_step_time": 0.38831448554992676
    },
    {
      "epoch": 0.000250799560546875,
      "model_forward_time": 0.114715576171875,
      "step": 41091
    },
    {
      "epoch": 0.000250799560546875,
      "step": 41091,
      "training_step_time": 0.4273664951324463
    },
    {
      "epoch": 0.0002508056640625,
      "model_forward_time": 0.11517024040222168,
      "step": 41092
    },
    {
      "epoch": 0.0002508056640625,
      "step": 41092,
      "training_step_time": 0.38773059844970703
    },
    {
      "epoch": 0.000250811767578125,
      "model_forward_time": 0.11499762535095215,
      "step": 41093
    },
    {
      "epoch": 0.000250811767578125,
      "step": 41093,
      "training_step_time": 0.3915221691131592
    },
    {
      "epoch": 0.00025081787109375,
      "model_forward_time": 0.11522173881530762,
      "step": 41094
    },
    {
      "epoch": 0.00025081787109375,
      "step": 41094,
      "training_step_time": 0.40111875534057617
    },
    {
      "epoch": 0.000250823974609375,
      "model_forward_time": 0.11480545997619629,
      "step": 41095
    },
    {
      "epoch": 0.000250823974609375,
      "step": 41095,
      "training_step_time": 0.3934955596923828
    },
    {
      "epoch": 0.000250830078125,
      "model_forward_time": 0.11521506309509277,
      "step": 41096
    },
    {
      "epoch": 0.000250830078125,
      "step": 41096,
      "training_step_time": 0.40140676498413086
    },
    {
      "epoch": 0.000250836181640625,
      "model_forward_time": 0.11508512496948242,
      "step": 41097
    },
    {
      "epoch": 0.000250836181640625,
      "step": 41097,
      "training_step_time": 0.389420747756958
    },
    {
      "epoch": 0.00025084228515625,
      "model_forward_time": 0.11534285545349121,
      "step": 41098
    },
    {
      "epoch": 0.00025084228515625,
      "step": 41098,
      "training_step_time": 0.3970327377319336
    },
    {
      "epoch": 0.000250848388671875,
      "model_forward_time": 0.11504077911376953,
      "step": 41099
    },
    {
      "epoch": 0.000250848388671875,
      "step": 41099,
      "training_step_time": 0.4259762763977051
    },
    {
      "epoch": 0.0002508544921875,
      "grad_norm": 0.09011536836624146,
      "learning_rate": 2.476172311325783e-05,
      "loss": 0.0363,
      "step": 41100
    },
    {
      "epoch": 0.0002508544921875,
      "model_forward_time": 0.11600852012634277,
      "step": 41100
    },
    {
      "epoch": 0.0002508544921875,
      "step": 41100,
      "training_step_time": 0.47570109367370605
    },
    {
      "epoch": 0.000250860595703125,
      "model_forward_time": 0.1153252124786377,
      "step": 41101
    },
    {
      "epoch": 0.000250860595703125,
      "step": 41101,
      "training_step_time": 0.4673340320587158
    },
    {
      "epoch": 0.00025086669921875,
      "model_forward_time": 0.11536026000976562,
      "step": 41102
    },
    {
      "epoch": 0.00025086669921875,
      "step": 41102,
      "training_step_time": 0.4982576370239258
    },
    {
      "epoch": 0.000250872802734375,
      "model_forward_time": 0.11543703079223633,
      "step": 41103
    },
    {
      "epoch": 0.000250872802734375,
      "step": 41103,
      "training_step_time": 0.421644926071167
    },
    {
      "epoch": 0.00025087890625,
      "model_forward_time": 0.11492443084716797,
      "step": 41104
    },
    {
      "epoch": 0.00025087890625,
      "step": 41104,
      "training_step_time": 0.3905191421508789
    },
    {
      "epoch": 0.000250885009765625,
      "model_forward_time": 0.11454081535339355,
      "step": 41105
    },
    {
      "epoch": 0.000250885009765625,
      "step": 41105,
      "training_step_time": 0.424044132232666
    },
    {
      "epoch": 0.00025089111328125,
      "model_forward_time": 0.11476612091064453,
      "step": 41106
    },
    {
      "epoch": 0.00025089111328125,
      "step": 41106,
      "training_step_time": 0.3907642364501953
    },
    {
      "epoch": 0.000250897216796875,
      "model_forward_time": 0.1150822639465332,
      "step": 41107
    },
    {
      "epoch": 0.000250897216796875,
      "step": 41107,
      "training_step_time": 0.40767455101013184
    },
    {
      "epoch": 0.0002509033203125,
      "model_forward_time": 0.11541271209716797,
      "step": 41108
    },
    {
      "epoch": 0.0002509033203125,
      "step": 41108,
      "training_step_time": 0.3926830291748047
    },
    {
      "epoch": 0.000250909423828125,
      "model_forward_time": 0.11500024795532227,
      "step": 41109
    },
    {
      "epoch": 0.000250909423828125,
      "step": 41109,
      "training_step_time": 0.3957791328430176
    },
    {
      "epoch": 0.00025091552734375,
      "grad_norm": 0.11763698607683182,
      "learning_rate": 2.4737937472220336e-05,
      "loss": 0.0355,
      "step": 41110
    },
    {
      "epoch": 0.00025091552734375,
      "model_forward_time": 0.11553215980529785,
      "step": 41110
    },
    {
      "epoch": 0.00025091552734375,
      "step": 41110,
      "training_step_time": 0.4074263572692871
    },
    {
      "epoch": 0.000250921630859375,
      "model_forward_time": 0.11455988883972168,
      "step": 41111
    },
    {
      "epoch": 0.000250921630859375,
      "step": 41111,
      "training_step_time": 0.39128875732421875
    },
    {
      "epoch": 0.000250927734375,
      "model_forward_time": 0.11484408378601074,
      "step": 41112
    },
    {
      "epoch": 0.000250927734375,
      "step": 41112,
      "training_step_time": 0.4036390781402588
    },
    {
      "epoch": 0.000250933837890625,
      "model_forward_time": 0.11472105979919434,
      "step": 41113
    },
    {
      "epoch": 0.000250933837890625,
      "step": 41113,
      "training_step_time": 0.3995380401611328
    },
    {
      "epoch": 0.00025093994140625,
      "model_forward_time": 0.11480450630187988,
      "step": 41114
    },
    {
      "epoch": 0.00025093994140625,
      "step": 41114,
      "training_step_time": 0.39766550064086914
    },
    {
      "epoch": 0.000250946044921875,
      "model_forward_time": 0.11509895324707031,
      "step": 41115
    },
    {
      "epoch": 0.000250946044921875,
      "step": 41115,
      "training_step_time": 0.46056127548217773
    },
    {
      "epoch": 0.0002509521484375,
      "model_forward_time": 0.1150515079498291,
      "step": 41116
    },
    {
      "epoch": 0.0002509521484375,
      "step": 41116,
      "training_step_time": 0.5075860023498535
    },
    {
      "epoch": 0.000250958251953125,
      "model_forward_time": 0.11497306823730469,
      "step": 41117
    },
    {
      "epoch": 0.000250958251953125,
      "step": 41117,
      "training_step_time": 0.5113623142242432
    },
    {
      "epoch": 0.00025096435546875,
      "model_forward_time": 0.1148080825805664,
      "step": 41118
    },
    {
      "epoch": 0.00025096435546875,
      "step": 41118,
      "training_step_time": 0.4024653434753418
    },
    {
      "epoch": 0.000250970458984375,
      "model_forward_time": 0.11502480506896973,
      "step": 41119
    },
    {
      "epoch": 0.000250970458984375,
      "step": 41119,
      "training_step_time": 0.4150247573852539
    },
    {
      "epoch": 0.0002509765625,
      "grad_norm": 0.08428343385457993,
      "learning_rate": 2.4714159505130452e-05,
      "loss": 0.0359,
      "step": 41120
    },
    {
      "epoch": 0.0002509765625,
      "model_forward_time": 0.1139829158782959,
      "step": 41120
    },
    {
      "epoch": 0.0002509765625,
      "step": 41120,
      "training_step_time": 0.3939063549041748
    },
    {
      "epoch": 0.000250982666015625,
      "model_forward_time": 0.11524581909179688,
      "step": 41121
    },
    {
      "epoch": 0.000250982666015625,
      "step": 41121,
      "training_step_time": 0.39960551261901855
    },
    {
      "epoch": 0.00025098876953125,
      "model_forward_time": 0.11480259895324707,
      "step": 41122
    },
    {
      "epoch": 0.00025098876953125,
      "step": 41122,
      "training_step_time": 0.3903369903564453
    },
    {
      "epoch": 0.000250994873046875,
      "model_forward_time": 0.11556386947631836,
      "step": 41123
    },
    {
      "epoch": 0.000250994873046875,
      "step": 41123,
      "training_step_time": 0.4048013687133789
    },
    {
      "epoch": 0.0002510009765625,
      "model_forward_time": 0.11460614204406738,
      "step": 41124
    },
    {
      "epoch": 0.0002510009765625,
      "step": 41124,
      "training_step_time": 0.39379048347473145
    },
    {
      "epoch": 0.000251007080078125,
      "model_forward_time": 0.11519718170166016,
      "step": 41125
    },
    {
      "epoch": 0.000251007080078125,
      "step": 41125,
      "training_step_time": 0.394183874130249
    },
    {
      "epoch": 0.00025101318359375,
      "model_forward_time": 0.1149594783782959,
      "step": 41126
    },
    {
      "epoch": 0.00025101318359375,
      "step": 41126,
      "training_step_time": 0.3969407081604004
    },
    {
      "epoch": 0.000251019287109375,
      "model_forward_time": 0.1148521900177002,
      "step": 41127
    },
    {
      "epoch": 0.000251019287109375,
      "step": 41127,
      "training_step_time": 0.38949060440063477
    },
    {
      "epoch": 0.000251025390625,
      "model_forward_time": 0.11559796333312988,
      "step": 41128
    },
    {
      "epoch": 0.000251025390625,
      "step": 41128,
      "training_step_time": 0.4257497787475586
    },
    {
      "epoch": 0.000251031494140625,
      "model_forward_time": 0.11466240882873535,
      "step": 41129
    },
    {
      "epoch": 0.000251031494140625,
      "step": 41129,
      "training_step_time": 0.3984353542327881
    },
    {
      "epoch": 0.00025103759765625,
      "grad_norm": 0.13744321465492249,
      "learning_rate": 2.4690389219211273e-05,
      "loss": 0.0371,
      "step": 41130
    },
    {
      "epoch": 0.00025103759765625,
      "model_forward_time": 0.1144263744354248,
      "step": 41130
    },
    {
      "epoch": 0.00025103759765625,
      "step": 41130,
      "training_step_time": 0.3671298027038574
    },
    {
      "epoch": 0.000251043701171875,
      "model_forward_time": 0.11480259895324707,
      "step": 41131
    },
    {
      "epoch": 0.000251043701171875,
      "step": 41131,
      "training_step_time": 0.44931745529174805
    },
    {
      "epoch": 0.0002510498046875,
      "model_forward_time": 0.1152505874633789,
      "step": 41132
    },
    {
      "epoch": 0.0002510498046875,
      "step": 41132,
      "training_step_time": 0.42670679092407227
    },
    {
      "epoch": 0.000251055908203125,
      "model_forward_time": 0.11475658416748047,
      "step": 41133
    },
    {
      "epoch": 0.000251055908203125,
      "step": 41133,
      "training_step_time": 0.4107396602630615
    },
    {
      "epoch": 0.00025106201171875,
      "model_forward_time": 0.11452174186706543,
      "step": 41134
    },
    {
      "epoch": 0.00025106201171875,
      "step": 41134,
      "training_step_time": 0.3871932029724121
    },
    {
      "epoch": 0.000251068115234375,
      "model_forward_time": 0.115142822265625,
      "step": 41135
    },
    {
      "epoch": 0.000251068115234375,
      "step": 41135,
      "training_step_time": 0.38826608657836914
    },
    {
      "epoch": 0.00025107421875,
      "model_forward_time": 0.11478185653686523,
      "step": 41136
    },
    {
      "epoch": 0.00025107421875,
      "step": 41136,
      "training_step_time": 0.40198206901550293
    },
    {
      "epoch": 0.000251080322265625,
      "model_forward_time": 0.11493110656738281,
      "step": 41137
    },
    {
      "epoch": 0.000251080322265625,
      "step": 41137,
      "training_step_time": 0.39792561531066895
    },
    {
      "epoch": 0.00025108642578125,
      "model_forward_time": 0.11516618728637695,
      "step": 41138
    },
    {
      "epoch": 0.00025108642578125,
      "step": 41138,
      "training_step_time": 0.3949704170227051
    },
    {
      "epoch": 0.000251092529296875,
      "model_forward_time": 0.11458802223205566,
      "step": 41139
    },
    {
      "epoch": 0.000251092529296875,
      "step": 41139,
      "training_step_time": 0.39429569244384766
    },
    {
      "epoch": 0.0002510986328125,
      "grad_norm": 0.10804577171802521,
      "learning_rate": 2.4666626621683592e-05,
      "loss": 0.0352,
      "step": 41140
    },
    {
      "epoch": 0.0002510986328125,
      "model_forward_time": 0.11536693572998047,
      "step": 41140
    },
    {
      "epoch": 0.0002510986328125,
      "step": 41140,
      "training_step_time": 0.399566650390625
    },
    {
      "epoch": 0.000251104736328125,
      "model_forward_time": 0.11526823043823242,
      "step": 41141
    },
    {
      "epoch": 0.000251104736328125,
      "step": 41141,
      "training_step_time": 0.394972562789917
    },
    {
      "epoch": 0.00025111083984375,
      "model_forward_time": 0.11467313766479492,
      "step": 41142
    },
    {
      "epoch": 0.00025111083984375,
      "step": 41142,
      "training_step_time": 0.39321064949035645
    },
    {
      "epoch": 0.000251116943359375,
      "model_forward_time": 0.11503338813781738,
      "step": 41143
    },
    {
      "epoch": 0.000251116943359375,
      "step": 41143,
      "training_step_time": 0.4696507453918457
    },
    {
      "epoch": 0.000251123046875,
      "model_forward_time": 0.11548900604248047,
      "step": 41144
    },
    {
      "epoch": 0.000251123046875,
      "step": 41144,
      "training_step_time": 0.4190804958343506
    },
    {
      "epoch": 0.000251129150390625,
      "model_forward_time": 0.11495304107666016,
      "step": 41145
    },
    {
      "epoch": 0.000251129150390625,
      "step": 41145,
      "training_step_time": 0.46762657165527344
    },
    {
      "epoch": 0.00025113525390625,
      "model_forward_time": 0.11530876159667969,
      "step": 41146
    },
    {
      "epoch": 0.00025113525390625,
      "step": 41146,
      "training_step_time": 0.47426724433898926
    },
    {
      "epoch": 0.000251141357421875,
      "model_forward_time": 0.1160745620727539,
      "step": 41147
    },
    {
      "epoch": 0.000251141357421875,
      "step": 41147,
      "training_step_time": 0.4254262447357178
    },
    {
      "epoch": 0.0002511474609375,
      "model_forward_time": 0.11601066589355469,
      "step": 41148
    },
    {
      "epoch": 0.0002511474609375,
      "step": 41148,
      "training_step_time": 0.39066457748413086
    },
    {
      "epoch": 0.000251153564453125,
      "model_forward_time": 0.11512279510498047,
      "step": 41149
    },
    {
      "epoch": 0.000251153564453125,
      "step": 41149,
      "training_step_time": 0.39639949798583984
    },
    {
      "epoch": 0.00025115966796875,
      "grad_norm": 0.11238181591033936,
      "learning_rate": 2.4642871719765852e-05,
      "loss": 0.037,
      "step": 41150
    },
    {
      "epoch": 0.00025115966796875,
      "model_forward_time": 0.11574292182922363,
      "step": 41150
    },
    {
      "epoch": 0.00025115966796875,
      "step": 41150,
      "training_step_time": 0.4036092758178711
    },
    {
      "epoch": 0.000251165771484375,
      "model_forward_time": 0.11514091491699219,
      "step": 41151
    },
    {
      "epoch": 0.000251165771484375,
      "step": 41151,
      "training_step_time": 0.4096674919128418
    },
    {
      "epoch": 0.000251171875,
      "model_forward_time": 0.11549162864685059,
      "step": 41152
    },
    {
      "epoch": 0.000251171875,
      "step": 41152,
      "training_step_time": 0.389629602432251
    },
    {
      "epoch": 0.000251177978515625,
      "model_forward_time": 0.11562657356262207,
      "step": 41153
    },
    {
      "epoch": 0.000251177978515625,
      "step": 41153,
      "training_step_time": 0.40007901191711426
    },
    {
      "epoch": 0.00025118408203125,
      "model_forward_time": 0.11556363105773926,
      "step": 41154
    },
    {
      "epoch": 0.00025118408203125,
      "step": 41154,
      "training_step_time": 0.38800787925720215
    },
    {
      "epoch": 0.000251190185546875,
      "model_forward_time": 0.11526274681091309,
      "step": 41155
    },
    {
      "epoch": 0.000251190185546875,
      "step": 41155,
      "training_step_time": 0.39226436614990234
    },
    {
      "epoch": 0.0002511962890625,
      "model_forward_time": 0.11514759063720703,
      "step": 41156
    },
    {
      "epoch": 0.0002511962890625,
      "step": 41156,
      "training_step_time": 0.3974146842956543
    },
    {
      "epoch": 0.000251202392578125,
      "model_forward_time": 0.11497068405151367,
      "step": 41157
    },
    {
      "epoch": 0.000251202392578125,
      "step": 41157,
      "training_step_time": 0.4499027729034424
    },
    {
      "epoch": 0.00025120849609375,
      "model_forward_time": 0.1149299144744873,
      "step": 41158
    },
    {
      "epoch": 0.00025120849609375,
      "step": 41158,
      "training_step_time": 0.44460153579711914
    },
    {
      "epoch": 0.000251214599609375,
      "model_forward_time": 0.11541438102722168,
      "step": 41159
    },
    {
      "epoch": 0.000251214599609375,
      "step": 41159,
      "training_step_time": 0.44097399711608887
    },
    {
      "epoch": 0.000251220703125,
      "grad_norm": 0.1518605649471283,
      "learning_rate": 2.4619124520674146e-05,
      "loss": 0.0385,
      "step": 41160
    },
    {
      "epoch": 0.000251220703125,
      "model_forward_time": 0.11542344093322754,
      "step": 41160
    },
    {
      "epoch": 0.000251220703125,
      "step": 41160,
      "training_step_time": 0.4411952495574951
    },
    {
      "epoch": 0.000251226806640625,
      "model_forward_time": 0.1150975227355957,
      "step": 41161
    },
    {
      "epoch": 0.000251226806640625,
      "step": 41161,
      "training_step_time": 0.44544506072998047
    },
    {
      "epoch": 0.00025123291015625,
      "model_forward_time": 0.11592221260070801,
      "step": 41162
    },
    {
      "epoch": 0.00025123291015625,
      "step": 41162,
      "training_step_time": 0.4182267189025879
    },
    {
      "epoch": 0.000251239013671875,
      "model_forward_time": 0.11561441421508789,
      "step": 41163
    },
    {
      "epoch": 0.000251239013671875,
      "step": 41163,
      "training_step_time": 0.39751434326171875
    },
    {
      "epoch": 0.0002512451171875,
      "model_forward_time": 0.11506152153015137,
      "step": 41164
    },
    {
      "epoch": 0.0002512451171875,
      "step": 41164,
      "training_step_time": 0.39861297607421875
    },
    {
      "epoch": 0.000251251220703125,
      "model_forward_time": 0.11536169052124023,
      "step": 41165
    },
    {
      "epoch": 0.000251251220703125,
      "step": 41165,
      "training_step_time": 0.3938333988189697
    },
    {
      "epoch": 0.00025125732421875,
      "model_forward_time": 0.11464428901672363,
      "step": 41166
    },
    {
      "epoch": 0.00025125732421875,
      "step": 41166,
      "training_step_time": 0.39033985137939453
    },
    {
      "epoch": 0.000251263427734375,
      "model_forward_time": 0.11525225639343262,
      "step": 41167
    },
    {
      "epoch": 0.000251263427734375,
      "step": 41167,
      "training_step_time": 0.3923072814941406
    },
    {
      "epoch": 0.00025126953125,
      "model_forward_time": 0.11516666412353516,
      "step": 41168
    },
    {
      "epoch": 0.00025126953125,
      "step": 41168,
      "training_step_time": 0.39842653274536133
    },
    {
      "epoch": 0.000251275634765625,
      "model_forward_time": 0.11524605751037598,
      "step": 41169
    },
    {
      "epoch": 0.000251275634765625,
      "step": 41169,
      "training_step_time": 0.42955756187438965
    },
    {
      "epoch": 0.00025128173828125,
      "grad_norm": 0.11617472767829895,
      "learning_rate": 2.459538503162231e-05,
      "loss": 0.0379,
      "step": 41170
    },
    {
      "epoch": 0.00025128173828125,
      "model_forward_time": 0.11504054069519043,
      "step": 41170
    },
    {
      "epoch": 0.00025128173828125,
      "step": 41170,
      "training_step_time": 0.40395379066467285
    },
    {
      "epoch": 0.000251287841796875,
      "model_forward_time": 0.11526942253112793,
      "step": 41171
    },
    {
      "epoch": 0.000251287841796875,
      "step": 41171,
      "training_step_time": 0.40229082107543945
    },
    {
      "epoch": 0.0002512939453125,
      "model_forward_time": 0.11603116989135742,
      "step": 41172
    },
    {
      "epoch": 0.0002512939453125,
      "step": 41172,
      "training_step_time": 0.3943295478820801
    },
    {
      "epoch": 0.000251300048828125,
      "model_forward_time": 0.11525821685791016,
      "step": 41173
    },
    {
      "epoch": 0.000251300048828125,
      "step": 41173,
      "training_step_time": 0.45792341232299805
    },
    {
      "epoch": 0.00025130615234375,
      "model_forward_time": 0.11483168601989746,
      "step": 41174
    },
    {
      "epoch": 0.00025130615234375,
      "step": 41174,
      "training_step_time": 0.5055832862854004
    },
    {
      "epoch": 0.000251312255859375,
      "model_forward_time": 0.11517524719238281,
      "step": 41175
    },
    {
      "epoch": 0.000251312255859375,
      "step": 41175,
      "training_step_time": 0.4410984516143799
    },
    {
      "epoch": 0.000251318359375,
      "model_forward_time": 0.11536502838134766,
      "step": 41176
    },
    {
      "epoch": 0.000251318359375,
      "step": 41176,
      "training_step_time": 0.48302173614501953
    },
    {
      "epoch": 0.000251324462890625,
      "model_forward_time": 0.1159353256225586,
      "step": 41177
    },
    {
      "epoch": 0.000251324462890625,
      "step": 41177,
      "training_step_time": 0.40996479988098145
    },
    {
      "epoch": 0.00025133056640625,
      "model_forward_time": 0.1153113842010498,
      "step": 41178
    },
    {
      "epoch": 0.00025133056640625,
      "step": 41178,
      "training_step_time": 0.39588141441345215
    },
    {
      "epoch": 0.000251336669921875,
      "model_forward_time": 0.11510610580444336,
      "step": 41179
    },
    {
      "epoch": 0.000251336669921875,
      "step": 41179,
      "training_step_time": 0.3855106830596924
    },
    {
      "epoch": 0.0002513427734375,
      "grad_norm": 0.09509456902742386,
      "learning_rate": 2.4571653259821694e-05,
      "loss": 0.0332,
      "step": 41180
    },
    {
      "epoch": 0.0002513427734375,
      "model_forward_time": 0.11528491973876953,
      "step": 41180
    },
    {
      "epoch": 0.0002513427734375,
      "step": 41180,
      "training_step_time": 0.38541221618652344
    },
    {
      "epoch": 0.000251348876953125,
      "model_forward_time": 0.11525225639343262,
      "step": 41181
    },
    {
      "epoch": 0.000251348876953125,
      "step": 41181,
      "training_step_time": 0.3934178352355957
    },
    {
      "epoch": 0.00025135498046875,
      "model_forward_time": 0.11491250991821289,
      "step": 41182
    },
    {
      "epoch": 0.00025135498046875,
      "step": 41182,
      "training_step_time": 0.39051175117492676
    },
    {
      "epoch": 0.000251361083984375,
      "model_forward_time": 0.11587166786193848,
      "step": 41183
    },
    {
      "epoch": 0.000251361083984375,
      "step": 41183,
      "training_step_time": 0.38814616203308105
    },
    {
      "epoch": 0.0002513671875,
      "model_forward_time": 0.11538982391357422,
      "step": 41184
    },
    {
      "epoch": 0.0002513671875,
      "step": 41184,
      "training_step_time": 0.38288068771362305
    },
    {
      "epoch": 0.000251373291015625,
      "model_forward_time": 0.11545777320861816,
      "step": 41185
    },
    {
      "epoch": 0.000251373291015625,
      "step": 41185,
      "training_step_time": 0.39631128311157227
    },
    {
      "epoch": 0.00025137939453125,
      "model_forward_time": 0.1141812801361084,
      "step": 41186
    },
    {
      "epoch": 0.00025137939453125,
      "step": 41186,
      "training_step_time": 0.3927462100982666
    },
    {
      "epoch": 0.000251385498046875,
      "model_forward_time": 0.11492753028869629,
      "step": 41187
    },
    {
      "epoch": 0.000251385498046875,
      "step": 41187,
      "training_step_time": 0.49936866760253906
    },
    {
      "epoch": 0.0002513916015625,
      "model_forward_time": 0.11452555656433105,
      "step": 41188
    },
    {
      "epoch": 0.0002513916015625,
      "step": 41188,
      "training_step_time": 0.4409353733062744
    },
    {
      "epoch": 0.000251397705078125,
      "model_forward_time": 0.11558651924133301,
      "step": 41189
    },
    {
      "epoch": 0.000251397705078125,
      "step": 41189,
      "training_step_time": 0.5217483043670654
    },
    {
      "epoch": 0.00025140380859375,
      "grad_norm": 0.09256462007761002,
      "learning_rate": 2.4547929212481435e-05,
      "loss": 0.0347,
      "step": 41190
    },
    {
      "epoch": 0.00025140380859375,
      "model_forward_time": 0.11481595039367676,
      "step": 41190
    },
    {
      "epoch": 0.00025140380859375,
      "step": 41190,
      "training_step_time": 0.3999178409576416
    },
    {
      "epoch": 0.000251409912109375,
      "model_forward_time": 0.1139841079711914,
      "step": 41191
    },
    {
      "epoch": 0.000251409912109375,
      "step": 41191,
      "training_step_time": 0.43500208854675293
    },
    {
      "epoch": 0.000251416015625,
      "model_forward_time": 0.11479687690734863,
      "step": 41192
    },
    {
      "epoch": 0.000251416015625,
      "step": 41192,
      "training_step_time": 0.41062092781066895
    },
    {
      "epoch": 0.000251422119140625,
      "model_forward_time": 0.11429476737976074,
      "step": 41193
    },
    {
      "epoch": 0.000251422119140625,
      "step": 41193,
      "training_step_time": 0.41142964363098145
    },
    {
      "epoch": 0.00025142822265625,
      "model_forward_time": 0.11443042755126953,
      "step": 41194
    },
    {
      "epoch": 0.00025142822265625,
      "step": 41194,
      "training_step_time": 0.40329933166503906
    },
    {
      "epoch": 0.000251434326171875,
      "model_forward_time": 0.11418533325195312,
      "step": 41195
    },
    {
      "epoch": 0.000251434326171875,
      "step": 41195,
      "training_step_time": 0.39579176902770996
    },
    {
      "epoch": 0.0002514404296875,
      "model_forward_time": 0.11467432975769043,
      "step": 41196
    },
    {
      "epoch": 0.0002514404296875,
      "step": 41196,
      "training_step_time": 0.41742467880249023
    },
    {
      "epoch": 0.000251446533203125,
      "model_forward_time": 0.11471939086914062,
      "step": 41197
    },
    {
      "epoch": 0.000251446533203125,
      "step": 41197,
      "training_step_time": 0.39604663848876953
    },
    {
      "epoch": 0.00025145263671875,
      "model_forward_time": 0.11583662033081055,
      "step": 41198
    },
    {
      "epoch": 0.00025145263671875,
      "step": 41198,
      "training_step_time": 0.39304685592651367
    },
    {
      "epoch": 0.000251458740234375,
      "model_forward_time": 0.11526131629943848,
      "step": 41199
    },
    {
      "epoch": 0.000251458740234375,
      "step": 41199,
      "training_step_time": 0.4047276973724365
    },
    {
      "epoch": 0.00025146484375,
      "grad_norm": 0.1185758113861084,
      "learning_rate": 2.4524212896808263e-05,
      "loss": 0.0401,
      "step": 41200
    },
    {
      "epoch": 0.00025146484375,
      "model_forward_time": 0.11518359184265137,
      "step": 41200
    },
    {
      "epoch": 0.00025146484375,
      "step": 41200,
      "training_step_time": 0.39475059509277344
    },
    {
      "epoch": 0.000251470947265625,
      "model_forward_time": 0.1154170036315918,
      "step": 41201
    },
    {
      "epoch": 0.000251470947265625,
      "step": 41201,
      "training_step_time": 0.43361973762512207
    },
    {
      "epoch": 0.00025147705078125,
      "model_forward_time": 0.11473608016967773,
      "step": 41202
    },
    {
      "epoch": 0.00025147705078125,
      "step": 41202,
      "training_step_time": 0.422809362411499
    },
    {
      "epoch": 0.000251483154296875,
      "model_forward_time": 0.11479020118713379,
      "step": 41203
    },
    {
      "epoch": 0.000251483154296875,
      "step": 41203,
      "training_step_time": 0.5038721561431885
    },
    {
      "epoch": 0.0002514892578125,
      "model_forward_time": 0.11569714546203613,
      "step": 41204
    },
    {
      "epoch": 0.0002514892578125,
      "step": 41204,
      "training_step_time": 0.3981363773345947
    },
    {
      "epoch": 0.000251495361328125,
      "model_forward_time": 0.11540365219116211,
      "step": 41205
    },
    {
      "epoch": 0.000251495361328125,
      "step": 41205,
      "training_step_time": 0.513397216796875
    },
    {
      "epoch": 0.00025150146484375,
      "model_forward_time": 0.11489534378051758,
      "step": 41206
    },
    {
      "epoch": 0.00025150146484375,
      "step": 41206,
      "training_step_time": 0.4110381603240967
    },
    {
      "epoch": 0.000251507568359375,
      "model_forward_time": 0.11443781852722168,
      "step": 41207
    },
    {
      "epoch": 0.000251507568359375,
      "step": 41207,
      "training_step_time": 0.3859591484069824
    },
    {
      "epoch": 0.000251513671875,
      "model_forward_time": 0.11443400382995605,
      "step": 41208
    },
    {
      "epoch": 0.000251513671875,
      "step": 41208,
      "training_step_time": 0.39779210090637207
    },
    {
      "epoch": 0.000251519775390625,
      "model_forward_time": 0.11493134498596191,
      "step": 41209
    },
    {
      "epoch": 0.000251519775390625,
      "step": 41209,
      "training_step_time": 0.3945145606994629
    },
    {
      "epoch": 0.00025152587890625,
      "grad_norm": 0.13258297741413116,
      "learning_rate": 2.4500504320006562e-05,
      "loss": 0.0369,
      "step": 41210
    },
    {
      "epoch": 0.00025152587890625,
      "model_forward_time": 0.11499524116516113,
      "step": 41210
    },
    {
      "epoch": 0.00025152587890625,
      "step": 41210,
      "training_step_time": 0.39605093002319336
    },
    {
      "epoch": 0.000251531982421875,
      "model_forward_time": 0.11503267288208008,
      "step": 41211
    },
    {
      "epoch": 0.000251531982421875,
      "step": 41211,
      "training_step_time": 0.3885610103607178
    },
    {
      "epoch": 0.0002515380859375,
      "model_forward_time": 0.11539530754089355,
      "step": 41212
    },
    {
      "epoch": 0.0002515380859375,
      "step": 41212,
      "training_step_time": 0.38888096809387207
    },
    {
      "epoch": 0.000251544189453125,
      "model_forward_time": 0.11473870277404785,
      "step": 41213
    },
    {
      "epoch": 0.000251544189453125,
      "step": 41213,
      "training_step_time": 0.3859889507293701
    },
    {
      "epoch": 0.00025155029296875,
      "model_forward_time": 0.11581897735595703,
      "step": 41214
    },
    {
      "epoch": 0.00025155029296875,
      "step": 41214,
      "training_step_time": 0.40424013137817383
    },
    {
      "epoch": 0.000251556396484375,
      "model_forward_time": 0.11451983451843262,
      "step": 41215
    },
    {
      "epoch": 0.000251556396484375,
      "step": 41215,
      "training_step_time": 0.3982717990875244
    },
    {
      "epoch": 0.0002515625,
      "model_forward_time": 0.11605477333068848,
      "step": 41216
    },
    {
      "epoch": 0.0002515625,
      "step": 41216,
      "training_step_time": 0.4267761707305908
    },
    {
      "epoch": 0.000251568603515625,
      "model_forward_time": 0.11576247215270996,
      "step": 41217
    },
    {
      "epoch": 0.000251568603515625,
      "step": 41217,
      "training_step_time": 0.4428231716156006
    },
    {
      "epoch": 0.00025157470703125,
      "model_forward_time": 0.11544966697692871,
      "step": 41218
    },
    {
      "epoch": 0.00025157470703125,
      "step": 41218,
      "training_step_time": 0.496737003326416
    },
    {
      "epoch": 0.000251580810546875,
      "model_forward_time": 0.1151876449584961,
      "step": 41219
    },
    {
      "epoch": 0.000251580810546875,
      "step": 41219,
      "training_step_time": 0.41812849044799805
    },
    {
      "epoch": 0.0002515869140625,
      "grad_norm": 0.10501865297555923,
      "learning_rate": 2.447680348927837e-05,
      "loss": 0.0375,
      "step": 41220
    },
    {
      "epoch": 0.0002515869140625,
      "model_forward_time": 0.1159825325012207,
      "step": 41220
    },
    {
      "epoch": 0.0002515869140625,
      "step": 41220,
      "training_step_time": 0.4223973751068115
    },
    {
      "epoch": 0.000251593017578125,
      "model_forward_time": 0.11533737182617188,
      "step": 41221
    },
    {
      "epoch": 0.000251593017578125,
      "step": 41221,
      "training_step_time": 0.4282052516937256
    },
    {
      "epoch": 0.00025159912109375,
      "model_forward_time": 0.1147916316986084,
      "step": 41222
    },
    {
      "epoch": 0.00025159912109375,
      "step": 41222,
      "training_step_time": 0.3936011791229248
    },
    {
      "epoch": 0.000251605224609375,
      "model_forward_time": 0.11564970016479492,
      "step": 41223
    },
    {
      "epoch": 0.000251605224609375,
      "step": 41223,
      "training_step_time": 0.3916749954223633
    },
    {
      "epoch": 0.000251611328125,
      "model_forward_time": 0.11573290824890137,
      "step": 41224
    },
    {
      "epoch": 0.000251611328125,
      "step": 41224,
      "training_step_time": 0.394101619720459
    },
    {
      "epoch": 0.000251617431640625,
      "model_forward_time": 0.11525630950927734,
      "step": 41225
    },
    {
      "epoch": 0.000251617431640625,
      "step": 41225,
      "training_step_time": 0.4009256362915039
    },
    {
      "epoch": 0.00025162353515625,
      "model_forward_time": 0.11485576629638672,
      "step": 41226
    },
    {
      "epoch": 0.00025162353515625,
      "step": 41226,
      "training_step_time": 0.4096415042877197
    },
    {
      "epoch": 0.000251629638671875,
      "model_forward_time": 0.11541104316711426,
      "step": 41227
    },
    {
      "epoch": 0.000251629638671875,
      "step": 41227,
      "training_step_time": 0.3984806537628174
    },
    {
      "epoch": 0.0002516357421875,
      "model_forward_time": 0.11580157279968262,
      "step": 41228
    },
    {
      "epoch": 0.0002516357421875,
      "step": 41228,
      "training_step_time": 0.39888811111450195
    },
    {
      "epoch": 0.000251641845703125,
      "model_forward_time": 0.11541533470153809,
      "step": 41229
    },
    {
      "epoch": 0.000251641845703125,
      "step": 41229,
      "training_step_time": 0.4439213275909424
    },
    {
      "epoch": 0.00025164794921875,
      "grad_norm": 0.08742261677980423,
      "learning_rate": 2.4453110411823382e-05,
      "loss": 0.0334,
      "step": 41230
    },
    {
      "epoch": 0.00025164794921875,
      "model_forward_time": 0.11542868614196777,
      "step": 41230
    },
    {
      "epoch": 0.00025164794921875,
      "step": 41230,
      "training_step_time": 0.4104931354522705
    },
    {
      "epoch": 0.000251654052734375,
      "model_forward_time": 0.11480975151062012,
      "step": 41231
    },
    {
      "epoch": 0.000251654052734375,
      "step": 41231,
      "training_step_time": 0.5119562149047852
    },
    {
      "epoch": 0.00025166015625,
      "model_forward_time": 0.11508774757385254,
      "step": 41232
    },
    {
      "epoch": 0.00025166015625,
      "step": 41232,
      "training_step_time": 0.4268453121185303
    },
    {
      "epoch": 0.000251666259765625,
      "model_forward_time": 0.11585831642150879,
      "step": 41233
    },
    {
      "epoch": 0.000251666259765625,
      "step": 41233,
      "training_step_time": 0.4336555004119873
    },
    {
      "epoch": 0.00025167236328125,
      "model_forward_time": 0.11468863487243652,
      "step": 41234
    },
    {
      "epoch": 0.00025167236328125,
      "step": 41234,
      "training_step_time": 0.41500067710876465
    },
    {
      "epoch": 0.000251678466796875,
      "model_forward_time": 0.11439061164855957,
      "step": 41235
    },
    {
      "epoch": 0.000251678466796875,
      "step": 41235,
      "training_step_time": 0.43372154235839844
    },
    {
      "epoch": 0.0002516845703125,
      "model_forward_time": 0.11467885971069336,
      "step": 41236
    },
    {
      "epoch": 0.0002516845703125,
      "step": 41236,
      "training_step_time": 0.4196128845214844
    },
    {
      "epoch": 0.000251690673828125,
      "model_forward_time": 0.11478829383850098,
      "step": 41237
    },
    {
      "epoch": 0.000251690673828125,
      "step": 41237,
      "training_step_time": 0.3955047130584717
    },
    {
      "epoch": 0.00025169677734375,
      "model_forward_time": 0.11548948287963867,
      "step": 41238
    },
    {
      "epoch": 0.00025169677734375,
      "step": 41238,
      "training_step_time": 0.4223179817199707
    },
    {
      "epoch": 0.000251702880859375,
      "model_forward_time": 0.11613774299621582,
      "step": 41239
    },
    {
      "epoch": 0.000251702880859375,
      "step": 41239,
      "training_step_time": 0.46191835403442383
    },
    {
      "epoch": 0.000251708984375,
      "grad_norm": 0.07913269847631454,
      "learning_rate": 2.4429425094838903e-05,
      "loss": 0.0364,
      "step": 41240
    },
    {
      "epoch": 0.000251708984375,
      "model_forward_time": 0.11942672729492188,
      "step": 41240
    },
    {
      "epoch": 0.000251708984375,
      "step": 41240,
      "training_step_time": 0.5786147117614746
    },
    {
      "epoch": 0.000251715087890625,
      "model_forward_time": 0.13958001136779785,
      "step": 41241
    },
    {
      "epoch": 0.000251715087890625,
      "step": 41241,
      "training_step_time": 0.6109862327575684
    },
    {
      "epoch": 0.00025172119140625,
      "model_forward_time": 0.12065291404724121,
      "step": 41242
    },
    {
      "epoch": 0.00025172119140625,
      "step": 41242,
      "training_step_time": 0.6378464698791504
    },
    {
      "epoch": 0.000251727294921875,
      "model_forward_time": 0.12356138229370117,
      "step": 41243
    },
    {
      "epoch": 0.000251727294921875,
      "step": 41243,
      "training_step_time": 0.7073955535888672
    },
    {
      "epoch": 0.0002517333984375,
      "model_forward_time": 0.12289810180664062,
      "step": 41244
    },
    {
      "epoch": 0.0002517333984375,
      "step": 41244,
      "training_step_time": 0.6590838432312012
    },
    {
      "epoch": 0.000251739501953125,
      "model_forward_time": 0.12499165534973145,
      "step": 41245
    },
    {
      "epoch": 0.000251739501953125,
      "step": 41245,
      "training_step_time": 0.6025924682617188
    },
    {
      "epoch": 0.00025174560546875,
      "model_forward_time": 0.12261080741882324,
      "step": 41246
    },
    {
      "epoch": 0.00025174560546875,
      "step": 41246,
      "training_step_time": 0.7084090709686279
    },
    {
      "epoch": 0.000251751708984375,
      "model_forward_time": 0.11872386932373047,
      "step": 41247
    },
    {
      "epoch": 0.000251751708984375,
      "step": 41247,
      "training_step_time": 0.6241848468780518
    },
    {
      "epoch": 0.0002517578125,
      "model_forward_time": 0.11806631088256836,
      "step": 41248
    },
    {
      "epoch": 0.0002517578125,
      "step": 41248,
      "training_step_time": 0.6949450969696045
    },
    {
      "epoch": 0.000251763916015625,
      "model_forward_time": 0.16212248802185059,
      "step": 41249
    },
    {
      "epoch": 0.000251763916015625,
      "step": 41249,
      "training_step_time": 0.6312785148620605
    },
    {
      "epoch": 0.00025177001953125,
      "grad_norm": 0.10983069986104965,
      "learning_rate": 2.4405747545519963e-05,
      "loss": 0.0359,
      "step": 41250
    },
    {
      "epoch": 0.00025177001953125,
      "model_forward_time": 0.13417625427246094,
      "step": 41250
    },
    {
      "epoch": 0.00025177001953125,
      "step": 41250,
      "training_step_time": 0.7626631259918213
    },
    {
      "epoch": 0.000251776123046875,
      "model_forward_time": 0.11707806587219238,
      "step": 41251
    },
    {
      "epoch": 0.000251776123046875,
      "step": 41251,
      "training_step_time": 0.6705927848815918
    },
    {
      "epoch": 0.0002517822265625,
      "model_forward_time": 0.12239599227905273,
      "step": 41252
    },
    {
      "epoch": 0.0002517822265625,
      "step": 41252,
      "training_step_time": 0.6610245704650879
    },
    {
      "epoch": 0.000251788330078125,
      "model_forward_time": 0.12371611595153809,
      "step": 41253
    },
    {
      "epoch": 0.000251788330078125,
      "step": 41253,
      "training_step_time": 0.6682672500610352
    },
    {
      "epoch": 0.00025179443359375,
      "model_forward_time": 0.12034440040588379,
      "step": 41254
    },
    {
      "epoch": 0.00025179443359375,
      "step": 41254,
      "training_step_time": 0.660893440246582
    },
    {
      "epoch": 0.000251800537109375,
      "model_forward_time": 0.12542510032653809,
      "step": 41255
    },
    {
      "epoch": 0.000251800537109375,
      "step": 41255,
      "training_step_time": 0.7134263515472412
    },
    {
      "epoch": 0.000251806640625,
      "model_forward_time": 0.11884713172912598,
      "step": 41256
    },
    {
      "epoch": 0.000251806640625,
      "step": 41256,
      "training_step_time": 0.6916577816009521
    },
    {
      "epoch": 0.000251812744140625,
      "model_forward_time": 0.11824655532836914,
      "step": 41257
    },
    {
      "epoch": 0.000251812744140625,
      "step": 41257,
      "training_step_time": 0.6933798789978027
    },
    {
      "epoch": 0.00025181884765625,
      "model_forward_time": 0.11907958984375,
      "step": 41258
    },
    {
      "epoch": 0.00025181884765625,
      "step": 41258,
      "training_step_time": 0.6467258930206299
    },
    {
      "epoch": 0.000251824951171875,
      "model_forward_time": 0.11906766891479492,
      "step": 41259
    },
    {
      "epoch": 0.000251824951171875,
      "step": 41259,
      "training_step_time": 0.6426331996917725
    },
    {
      "epoch": 0.0002518310546875,
      "grad_norm": 0.11998141556978226,
      "learning_rate": 2.438207777105911e-05,
      "loss": 0.0435,
      "step": 41260
    },
    {
      "epoch": 0.0002518310546875,
      "model_forward_time": 0.11985588073730469,
      "step": 41260
    },
    {
      "epoch": 0.0002518310546875,
      "step": 41260,
      "training_step_time": 0.6588549613952637
    },
    {
      "epoch": 0.000251837158203125,
      "model_forward_time": 0.12496328353881836,
      "step": 41261
    },
    {
      "epoch": 0.000251837158203125,
      "step": 41261,
      "training_step_time": 0.656477689743042
    },
    {
      "epoch": 0.00025184326171875,
      "model_forward_time": 0.1271660327911377,
      "step": 41262
    },
    {
      "epoch": 0.00025184326171875,
      "step": 41262,
      "training_step_time": 0.6678884029388428
    },
    {
      "epoch": 0.000251849365234375,
      "model_forward_time": 0.12175798416137695,
      "step": 41263
    },
    {
      "epoch": 0.000251849365234375,
      "step": 41263,
      "training_step_time": 0.7080192565917969
    },
    {
      "epoch": 0.00025185546875,
      "model_forward_time": 0.11734867095947266,
      "step": 41264
    },
    {
      "epoch": 0.00025185546875,
      "step": 41264,
      "training_step_time": 0.8459815979003906
    },
    {
      "epoch": 0.000251861572265625,
      "model_forward_time": 0.11764335632324219,
      "step": 41265
    },
    {
      "epoch": 0.000251861572265625,
      "step": 41265,
      "training_step_time": 0.6917641162872314
    },
    {
      "epoch": 0.00025186767578125,
      "model_forward_time": 0.13458514213562012,
      "step": 41266
    },
    {
      "epoch": 0.00025186767578125,
      "step": 41266,
      "training_step_time": 0.6404104232788086
    },
    {
      "epoch": 0.000251873779296875,
      "model_forward_time": 0.1175541877746582,
      "step": 41267
    },
    {
      "epoch": 0.000251873779296875,
      "step": 41267,
      "training_step_time": 0.6442277431488037
    },
    {
      "epoch": 0.0002518798828125,
      "model_forward_time": 0.12309718132019043,
      "step": 41268
    },
    {
      "epoch": 0.0002518798828125,
      "step": 41268,
      "training_step_time": 0.7356700897216797
    },
    {
      "epoch": 0.000251885986328125,
      "model_forward_time": 0.11790156364440918,
      "step": 41269
    },
    {
      "epoch": 0.000251885986328125,
      "step": 41269,
      "training_step_time": 0.6916489601135254
    },
    {
      "epoch": 0.00025189208984375,
      "grad_norm": 0.10532305389642715,
      "learning_rate": 2.4358415778646643e-05,
      "loss": 0.0396,
      "step": 41270
    },
    {
      "epoch": 0.00025189208984375,
      "model_forward_time": 0.11710500717163086,
      "step": 41270
    },
    {
      "epoch": 0.00025189208984375,
      "step": 41270,
      "training_step_time": 0.6223716735839844
    },
    {
      "epoch": 0.000251898193359375,
      "model_forward_time": 0.12205982208251953,
      "step": 41271
    },
    {
      "epoch": 0.000251898193359375,
      "step": 41271,
      "training_step_time": 0.653719425201416
    },
    {
      "epoch": 0.000251904296875,
      "model_forward_time": 0.11952996253967285,
      "step": 41272
    },
    {
      "epoch": 0.000251904296875,
      "step": 41272,
      "training_step_time": 0.688133716583252
    },
    {
      "epoch": 0.000251910400390625,
      "model_forward_time": 0.12075972557067871,
      "step": 41273
    },
    {
      "epoch": 0.000251910400390625,
      "step": 41273,
      "training_step_time": 0.7172574996948242
    },
    {
      "epoch": 0.00025191650390625,
      "model_forward_time": 0.11789989471435547,
      "step": 41274
    },
    {
      "epoch": 0.00025191650390625,
      "step": 41274,
      "training_step_time": 0.688927412033081
    },
    {
      "epoch": 0.000251922607421875,
      "model_forward_time": 0.12032675743103027,
      "step": 41275
    },
    {
      "epoch": 0.000251922607421875,
      "step": 41275,
      "training_step_time": 0.7400681972503662
    },
    {
      "epoch": 0.0002519287109375,
      "model_forward_time": 0.11866211891174316,
      "step": 41276
    },
    {
      "epoch": 0.0002519287109375,
      "step": 41276,
      "training_step_time": 0.6649477481842041
    },
    {
      "epoch": 0.000251934814453125,
      "model_forward_time": 0.11825442314147949,
      "step": 41277
    },
    {
      "epoch": 0.000251934814453125,
      "step": 41277,
      "training_step_time": 0.735027551651001
    },
    {
      "epoch": 0.00025194091796875,
      "model_forward_time": 0.12079787254333496,
      "step": 41278
    },
    {
      "epoch": 0.00025194091796875,
      "step": 41278,
      "training_step_time": 0.6701653003692627
    },
    {
      "epoch": 0.000251947021484375,
      "model_forward_time": 0.1168375015258789,
      "step": 41279
    },
    {
      "epoch": 0.000251947021484375,
      "step": 41279,
      "training_step_time": 0.6500399112701416
    },
    {
      "epoch": 0.000251953125,
      "grad_norm": 0.12193329632282257,
      "learning_rate": 2.433476157547044e-05,
      "loss": 0.0422,
      "step": 41280
    },
    {
      "epoch": 0.000251953125,
      "model_forward_time": 0.12013983726501465,
      "step": 41280
    },
    {
      "epoch": 0.000251953125,
      "step": 41280,
      "training_step_time": 0.706505298614502
    },
    {
      "epoch": 0.000251959228515625,
      "model_forward_time": 0.12163352966308594,
      "step": 41281
    },
    {
      "epoch": 0.000251959228515625,
      "step": 41281,
      "training_step_time": 0.6791880130767822
    },
    {
      "epoch": 0.00025196533203125,
      "model_forward_time": 0.11913013458251953,
      "step": 41282
    },
    {
      "epoch": 0.00025196533203125,
      "step": 41282,
      "training_step_time": 0.7643489837646484
    },
    {
      "epoch": 0.000251971435546875,
      "model_forward_time": 0.11990857124328613,
      "step": 41283
    },
    {
      "epoch": 0.000251971435546875,
      "step": 41283,
      "training_step_time": 0.6196122169494629
    },
    {
      "epoch": 0.0002519775390625,
      "model_forward_time": 0.1227114200592041,
      "step": 41284
    },
    {
      "epoch": 0.0002519775390625,
      "step": 41284,
      "training_step_time": 0.6216886043548584
    },
    {
      "epoch": 0.000251983642578125,
      "model_forward_time": 0.13616156578063965,
      "step": 41285
    },
    {
      "epoch": 0.000251983642578125,
      "step": 41285,
      "training_step_time": 0.7019815444946289
    },
    {
      "epoch": 0.00025198974609375,
      "model_forward_time": 0.11610937118530273,
      "step": 41286
    },
    {
      "epoch": 0.00025198974609375,
      "step": 41286,
      "training_step_time": 0.7203078269958496
    },
    {
      "epoch": 0.000251995849609375,
      "model_forward_time": 0.13680720329284668,
      "step": 41287
    },
    {
      "epoch": 0.000251995849609375,
      "step": 41287,
      "training_step_time": 0.6591212749481201
    },
    {
      "epoch": 0.000252001953125,
      "model_forward_time": 0.11866450309753418,
      "step": 41288
    },
    {
      "epoch": 0.000252001953125,
      "step": 41288,
      "training_step_time": 0.7193593978881836
    },
    {
      "epoch": 0.000252008056640625,
      "model_forward_time": 0.11980795860290527,
      "step": 41289
    },
    {
      "epoch": 0.000252008056640625,
      "step": 41289,
      "training_step_time": 0.6924405097961426
    },
    {
      "epoch": 0.00025201416015625,
      "grad_norm": 0.15613369643688202,
      "learning_rate": 2.4311115168716013e-05,
      "loss": 0.0376,
      "step": 41290
    },
    {
      "epoch": 0.00025201416015625,
      "model_forward_time": 0.12006592750549316,
      "step": 41290
    },
    {
      "epoch": 0.00025201416015625,
      "step": 41290,
      "training_step_time": 0.6806750297546387
    },
    {
      "epoch": 0.000252020263671875,
      "model_forward_time": 0.12018752098083496,
      "step": 41291
    },
    {
      "epoch": 0.000252020263671875,
      "step": 41291,
      "training_step_time": 0.6818900108337402
    },
    {
      "epoch": 0.0002520263671875,
      "model_forward_time": 0.12197279930114746,
      "step": 41292
    },
    {
      "epoch": 0.0002520263671875,
      "step": 41292,
      "training_step_time": 0.5818049907684326
    },
    {
      "epoch": 0.000252032470703125,
      "model_forward_time": 0.12587499618530273,
      "step": 41293
    },
    {
      "epoch": 0.000252032470703125,
      "step": 41293,
      "training_step_time": 0.6518368721008301
    },
    {
      "epoch": 0.00025203857421875,
      "model_forward_time": 0.13057923316955566,
      "step": 41294
    },
    {
      "epoch": 0.00025203857421875,
      "step": 41294,
      "training_step_time": 0.7334003448486328
    },
    {
      "epoch": 0.000252044677734375,
      "model_forward_time": 0.12938261032104492,
      "step": 41295
    },
    {
      "epoch": 0.000252044677734375,
      "step": 41295,
      "training_step_time": 0.6419863700866699
    },
    {
      "epoch": 0.00025205078125,
      "model_forward_time": 0.12335705757141113,
      "step": 41296
    },
    {
      "epoch": 0.00025205078125,
      "step": 41296,
      "training_step_time": 0.6663978099822998
    },
    {
      "epoch": 0.000252056884765625,
      "model_forward_time": 0.1406383514404297,
      "step": 41297
    },
    {
      "epoch": 0.000252056884765625,
      "step": 41297,
      "training_step_time": 0.6155121326446533
    },
    {
      "epoch": 0.00025206298828125,
      "model_forward_time": 0.1273970603942871,
      "step": 41298
    },
    {
      "epoch": 0.00025206298828125,
      "step": 41298,
      "training_step_time": 0.7348446846008301
    },
    {
      "epoch": 0.000252069091796875,
      "model_forward_time": 0.1202702522277832,
      "step": 41299
    },
    {
      "epoch": 0.000252069091796875,
      "step": 41299,
      "training_step_time": 0.6510047912597656
    },
    {
      "epoch": 0.0002520751953125,
      "grad_norm": 0.10855474323034286,
      "learning_rate": 2.4287476565566527e-05,
      "loss": 0.0421,
      "step": 41300
    },
    {
      "epoch": 0.0002520751953125,
      "model_forward_time": 0.12256288528442383,
      "step": 41300
    },
    {
      "epoch": 0.0002520751953125,
      "step": 41300,
      "training_step_time": 0.7075631618499756
    },
    {
      "epoch": 0.000252081298828125,
      "model_forward_time": 0.1288137435913086,
      "step": 41301
    },
    {
      "epoch": 0.000252081298828125,
      "step": 41301,
      "training_step_time": 0.7101912498474121
    },
    {
      "epoch": 0.00025208740234375,
      "model_forward_time": 0.12439680099487305,
      "step": 41302
    },
    {
      "epoch": 0.00025208740234375,
      "step": 41302,
      "training_step_time": 0.6650118827819824
    },
    {
      "epoch": 0.000252093505859375,
      "model_forward_time": 0.12082886695861816,
      "step": 41303
    },
    {
      "epoch": 0.000252093505859375,
      "step": 41303,
      "training_step_time": 0.6785151958465576
    },
    {
      "epoch": 0.000252099609375,
      "model_forward_time": 0.11905050277709961,
      "step": 41304
    },
    {
      "epoch": 0.000252099609375,
      "step": 41304,
      "training_step_time": 0.6406278610229492
    },
    {
      "epoch": 0.000252105712890625,
      "model_forward_time": 0.12059354782104492,
      "step": 41305
    },
    {
      "epoch": 0.000252105712890625,
      "step": 41305,
      "training_step_time": 0.625126838684082
    },
    {
      "epoch": 0.00025211181640625,
      "model_forward_time": 0.12106156349182129,
      "step": 41306
    },
    {
      "epoch": 0.00025211181640625,
      "step": 41306,
      "training_step_time": 0.5642168521881104
    },
    {
      "epoch": 0.000252117919921875,
      "model_forward_time": 0.12168741226196289,
      "step": 41307
    },
    {
      "epoch": 0.000252117919921875,
      "step": 41307,
      "training_step_time": 0.5704431533813477
    },
    {
      "epoch": 0.0002521240234375,
      "model_forward_time": 0.1223917007446289,
      "step": 41308
    },
    {
      "epoch": 0.0002521240234375,
      "step": 41308,
      "training_step_time": 0.5543522834777832
    },
    {
      "epoch": 0.000252130126953125,
      "model_forward_time": 0.12272763252258301,
      "step": 41309
    },
    {
      "epoch": 0.000252130126953125,
      "step": 41309,
      "training_step_time": 0.7055835723876953
    },
    {
      "epoch": 0.00025213623046875,
      "grad_norm": 0.15131676197052002,
      "learning_rate": 2.4263845773202736e-05,
      "loss": 0.0406,
      "step": 41310
    },
    {
      "epoch": 0.00025213623046875,
      "model_forward_time": 0.11861586570739746,
      "step": 41310
    },
    {
      "epoch": 0.00025213623046875,
      "step": 41310,
      "training_step_time": 0.5011208057403564
    },
    {
      "epoch": 0.000252142333984375,
      "model_forward_time": 0.11812400817871094,
      "step": 41311
    },
    {
      "epoch": 0.000252142333984375,
      "step": 41311,
      "training_step_time": 0.5354537963867188
    },
    {
      "epoch": 0.0002521484375,
      "model_forward_time": 0.11730647087097168,
      "step": 41312
    },
    {
      "epoch": 0.0002521484375,
      "step": 41312,
      "training_step_time": 0.5358633995056152
    },
    {
      "epoch": 0.000252154541015625,
      "model_forward_time": 0.11879611015319824,
      "step": 41313
    },
    {
      "epoch": 0.000252154541015625,
      "step": 41313,
      "training_step_time": 0.45853281021118164
    },
    {
      "epoch": 0.00025216064453125,
      "model_forward_time": 0.11538124084472656,
      "step": 41314
    },
    {
      "epoch": 0.00025216064453125,
      "step": 41314,
      "training_step_time": 0.42473793029785156
    },
    {
      "epoch": 0.000252166748046875,
      "model_forward_time": 0.11524176597595215,
      "step": 41315
    },
    {
      "epoch": 0.000252166748046875,
      "step": 41315,
      "training_step_time": 0.41661524772644043
    },
    {
      "epoch": 0.0002521728515625,
      "model_forward_time": 0.1160268783569336,
      "step": 41316
    },
    {
      "epoch": 0.0002521728515625,
      "step": 41316,
      "training_step_time": 0.4076201915740967
    },
    {
      "epoch": 0.000252178955078125,
      "model_forward_time": 0.11470484733581543,
      "step": 41317
    },
    {
      "epoch": 0.000252178955078125,
      "step": 41317,
      "training_step_time": 0.39816784858703613
    },
    {
      "epoch": 0.00025218505859375,
      "model_forward_time": 0.11517477035522461,
      "step": 41318
    },
    {
      "epoch": 0.00025218505859375,
      "step": 41318,
      "training_step_time": 0.40880608558654785
    },
    {
      "epoch": 0.000252191162109375,
      "model_forward_time": 0.1154024600982666,
      "step": 41319
    },
    {
      "epoch": 0.000252191162109375,
      "step": 41319,
      "training_step_time": 0.40443968772888184
    },
    {
      "epoch": 0.000252197265625,
      "grad_norm": 0.11230796575546265,
      "learning_rate": 2.424022279880312e-05,
      "loss": 0.0429,
      "step": 41320
    },
    {
      "epoch": 0.000252197265625,
      "model_forward_time": 0.11513137817382812,
      "step": 41320
    },
    {
      "epoch": 0.000252197265625,
      "step": 41320,
      "training_step_time": 0.4147951602935791
    },
    {
      "epoch": 0.000252203369140625,
      "model_forward_time": 0.11497807502746582,
      "step": 41321
    },
    {
      "epoch": 0.000252203369140625,
      "step": 41321,
      "training_step_time": 0.4063389301300049
    },
    {
      "epoch": 0.00025220947265625,
      "model_forward_time": 0.11562991142272949,
      "step": 41322
    },
    {
      "epoch": 0.00025220947265625,
      "step": 41322,
      "training_step_time": 0.38333725929260254
    },
    {
      "epoch": 0.000252215576171875,
      "model_forward_time": 0.11505293846130371,
      "step": 41323
    },
    {
      "epoch": 0.000252215576171875,
      "step": 41323,
      "training_step_time": 0.4923560619354248
    },
    {
      "epoch": 0.0002522216796875,
      "model_forward_time": 0.11487770080566406,
      "step": 41324
    },
    {
      "epoch": 0.0002522216796875,
      "step": 41324,
      "training_step_time": 0.4517371654510498
    },
    {
      "epoch": 0.000252227783203125,
      "model_forward_time": 0.11527132987976074,
      "step": 41325
    },
    {
      "epoch": 0.000252227783203125,
      "step": 41325,
      "training_step_time": 0.4219214916229248
    },
    {
      "epoch": 0.00025223388671875,
      "model_forward_time": 0.1149892807006836,
      "step": 41326
    },
    {
      "epoch": 0.00025223388671875,
      "step": 41326,
      "training_step_time": 0.45801711082458496
    },
    {
      "epoch": 0.000252239990234375,
      "model_forward_time": 0.11479854583740234,
      "step": 41327
    },
    {
      "epoch": 0.000252239990234375,
      "step": 41327,
      "training_step_time": 0.4234447479248047
    },
    {
      "epoch": 0.00025224609375,
      "model_forward_time": 0.11598467826843262,
      "step": 41328
    },
    {
      "epoch": 0.00025224609375,
      "step": 41328,
      "training_step_time": 0.4197535514831543
    },
    {
      "epoch": 0.000252252197265625,
      "model_forward_time": 0.11527252197265625,
      "step": 41329
    },
    {
      "epoch": 0.000252252197265625,
      "step": 41329,
      "training_step_time": 0.48720884323120117
    },
    {
      "epoch": 0.00025225830078125,
      "grad_norm": 0.09423385560512543,
      "learning_rate": 2.4216607649543628e-05,
      "loss": 0.0406,
      "step": 41330
    },
    {
      "epoch": 0.00025225830078125,
      "model_forward_time": 0.11652040481567383,
      "step": 41330
    },
    {
      "epoch": 0.00025225830078125,
      "step": 41330,
      "training_step_time": 0.46343255043029785
    },
    {
      "epoch": 0.000252264404296875,
      "model_forward_time": 0.11577630043029785,
      "step": 41331
    },
    {
      "epoch": 0.000252264404296875,
      "step": 41331,
      "training_step_time": 0.39533233642578125
    },
    {
      "epoch": 0.0002522705078125,
      "model_forward_time": 0.11503791809082031,
      "step": 41332
    },
    {
      "epoch": 0.0002522705078125,
      "step": 41332,
      "training_step_time": 0.39100098609924316
    },
    {
      "epoch": 0.000252276611328125,
      "model_forward_time": 0.11462879180908203,
      "step": 41333
    },
    {
      "epoch": 0.000252276611328125,
      "step": 41333,
      "training_step_time": 0.3949248790740967
    },
    {
      "epoch": 0.00025228271484375,
      "model_forward_time": 0.11516165733337402,
      "step": 41334
    },
    {
      "epoch": 0.00025228271484375,
      "step": 41334,
      "training_step_time": 0.40262770652770996
    },
    {
      "epoch": 0.000252288818359375,
      "model_forward_time": 0.11493372917175293,
      "step": 41335
    },
    {
      "epoch": 0.000252288818359375,
      "step": 41335,
      "training_step_time": 0.3898899555206299
    },
    {
      "epoch": 0.000252294921875,
      "model_forward_time": 0.11531543731689453,
      "step": 41336
    },
    {
      "epoch": 0.000252294921875,
      "step": 41336,
      "training_step_time": 0.3923828601837158
    },
    {
      "epoch": 0.000252301025390625,
      "model_forward_time": 0.11588120460510254,
      "step": 41337
    },
    {
      "epoch": 0.000252301025390625,
      "step": 41337,
      "training_step_time": 0.40050339698791504
    },
    {
      "epoch": 0.00025230712890625,
      "model_forward_time": 0.11554718017578125,
      "step": 41338
    },
    {
      "epoch": 0.00025230712890625,
      "step": 41338,
      "training_step_time": 0.4537997245788574
    },
    {
      "epoch": 0.000252313232421875,
      "model_forward_time": 0.11576437950134277,
      "step": 41339
    },
    {
      "epoch": 0.000252313232421875,
      "step": 41339,
      "training_step_time": 0.474621057510376
    },
    {
      "epoch": 0.0002523193359375,
      "grad_norm": 0.07696795463562012,
      "learning_rate": 2.419300033259798e-05,
      "loss": 0.0361,
      "step": 41340
    },
    {
      "epoch": 0.0002523193359375,
      "model_forward_time": 0.11410236358642578,
      "step": 41340
    },
    {
      "epoch": 0.0002523193359375,
      "step": 41340,
      "training_step_time": 0.468364953994751
    },
    {
      "epoch": 0.000252325439453125,
      "model_forward_time": 0.11503887176513672,
      "step": 41341
    },
    {
      "epoch": 0.000252325439453125,
      "step": 41341,
      "training_step_time": 0.49677228927612305
    },
    {
      "epoch": 0.00025233154296875,
      "model_forward_time": 0.11512231826782227,
      "step": 41342
    },
    {
      "epoch": 0.00025233154296875,
      "step": 41342,
      "training_step_time": 0.3637969493865967
    },
    {
      "epoch": 0.000252337646484375,
      "model_forward_time": 0.11496973037719727,
      "step": 41343
    },
    {
      "epoch": 0.000252337646484375,
      "step": 41343,
      "training_step_time": 0.45850682258605957
    },
    {
      "epoch": 0.00025234375,
      "model_forward_time": 0.11581063270568848,
      "step": 41344
    },
    {
      "epoch": 0.00025234375,
      "step": 41344,
      "training_step_time": 0.40537452697753906
    },
    {
      "epoch": 0.000252349853515625,
      "model_forward_time": 0.11467194557189941,
      "step": 41345
    },
    {
      "epoch": 0.000252349853515625,
      "step": 41345,
      "training_step_time": 0.39458703994750977
    },
    {
      "epoch": 0.00025235595703125,
      "model_forward_time": 0.11579060554504395,
      "step": 41346
    },
    {
      "epoch": 0.00025235595703125,
      "step": 41346,
      "training_step_time": 0.3832364082336426
    },
    {
      "epoch": 0.000252362060546875,
      "model_forward_time": 0.1153709888458252,
      "step": 41347
    },
    {
      "epoch": 0.000252362060546875,
      "step": 41347,
      "training_step_time": 0.39481043815612793
    },
    {
      "epoch": 0.0002523681640625,
      "model_forward_time": 0.11564922332763672,
      "step": 41348
    },
    {
      "epoch": 0.0002523681640625,
      "step": 41348,
      "training_step_time": 0.3921091556549072
    },
    {
      "epoch": 0.000252374267578125,
      "model_forward_time": 0.11525583267211914,
      "step": 41349
    },
    {
      "epoch": 0.000252374267578125,
      "step": 41349,
      "training_step_time": 0.3882763385772705
    },
    {
      "epoch": 0.00025238037109375,
      "grad_norm": 0.10702735930681229,
      "learning_rate": 2.4169400855137436e-05,
      "loss": 0.0439,
      "step": 41350
    },
    {
      "epoch": 0.00025238037109375,
      "model_forward_time": 0.11565971374511719,
      "step": 41350
    },
    {
      "epoch": 0.00025238037109375,
      "step": 41350,
      "training_step_time": 0.39610981941223145
    },
    {
      "epoch": 0.000252386474609375,
      "model_forward_time": 0.11493945121765137,
      "step": 41351
    },
    {
      "epoch": 0.000252386474609375,
      "step": 41351,
      "training_step_time": 0.39489269256591797
    },
    {
      "epoch": 0.000252392578125,
      "model_forward_time": 0.11630392074584961,
      "step": 41352
    },
    {
      "epoch": 0.000252392578125,
      "step": 41352,
      "training_step_time": 0.39035844802856445
    },
    {
      "epoch": 0.000252398681640625,
      "model_forward_time": 0.11589527130126953,
      "step": 41353
    },
    {
      "epoch": 0.000252398681640625,
      "step": 41353,
      "training_step_time": 0.4371504783630371
    },
    {
      "epoch": 0.00025240478515625,
      "model_forward_time": 0.11564087867736816,
      "step": 41354
    },
    {
      "epoch": 0.00025240478515625,
      "step": 41354,
      "training_step_time": 0.4725511074066162
    },
    {
      "epoch": 0.000252410888671875,
      "model_forward_time": 0.11575961112976074,
      "step": 41355
    },
    {
      "epoch": 0.000252410888671875,
      "step": 41355,
      "training_step_time": 0.4563426971435547
    },
    {
      "epoch": 0.0002524169921875,
      "model_forward_time": 0.11554479598999023,
      "step": 41356
    },
    {
      "epoch": 0.0002524169921875,
      "step": 41356,
      "training_step_time": 0.41301584243774414
    },
    {
      "epoch": 0.000252423095703125,
      "model_forward_time": 0.11487150192260742,
      "step": 41357
    },
    {
      "epoch": 0.000252423095703125,
      "step": 41357,
      "training_step_time": 0.4376487731933594
    },
    {
      "epoch": 0.00025242919921875,
      "model_forward_time": 0.11518383026123047,
      "step": 41358
    },
    {
      "epoch": 0.00025242919921875,
      "step": 41358,
      "training_step_time": 0.42638468742370605
    },
    {
      "epoch": 0.000252435302734375,
      "model_forward_time": 0.11531519889831543,
      "step": 41359
    },
    {
      "epoch": 0.000252435302734375,
      "step": 41359,
      "training_step_time": 0.400346040725708
    },
    {
      "epoch": 0.00025244140625,
      "grad_norm": 0.11128635704517365,
      "learning_rate": 2.4145809224330896e-05,
      "loss": 0.0392,
      "step": 41360
    },
    {
      "epoch": 0.00025244140625,
      "model_forward_time": 0.11625003814697266,
      "step": 41360
    },
    {
      "epoch": 0.00025244140625,
      "step": 41360,
      "training_step_time": 0.40485692024230957
    },
    {
      "epoch": 0.000252447509765625,
      "model_forward_time": 0.11431217193603516,
      "step": 41361
    },
    {
      "epoch": 0.000252447509765625,
      "step": 41361,
      "training_step_time": 0.3820013999938965
    },
    {
      "epoch": 0.00025245361328125,
      "model_forward_time": 0.11552715301513672,
      "step": 41362
    },
    {
      "epoch": 0.00025245361328125,
      "step": 41362,
      "training_step_time": 0.3795740604400635
    },
    {
      "epoch": 0.000252459716796875,
      "model_forward_time": 0.11644482612609863,
      "step": 41363
    },
    {
      "epoch": 0.000252459716796875,
      "step": 41363,
      "training_step_time": 0.39554715156555176
    },
    {
      "epoch": 0.0002524658203125,
      "model_forward_time": 0.1160740852355957,
      "step": 41364
    },
    {
      "epoch": 0.0002524658203125,
      "step": 41364,
      "training_step_time": 0.38974428176879883
    },
    {
      "epoch": 0.000252471923828125,
      "model_forward_time": 0.11527371406555176,
      "step": 41365
    },
    {
      "epoch": 0.000252471923828125,
      "step": 41365,
      "training_step_time": 0.41788578033447266
    },
    {
      "epoch": 0.00025247802734375,
      "model_forward_time": 0.11536359786987305,
      "step": 41366
    },
    {
      "epoch": 0.00025247802734375,
      "step": 41366,
      "training_step_time": 0.39563941955566406
    },
    {
      "epoch": 0.000252484130859375,
      "model_forward_time": 0.11580538749694824,
      "step": 41367
    },
    {
      "epoch": 0.000252484130859375,
      "step": 41367,
      "training_step_time": 0.46513962745666504
    },
    {
      "epoch": 0.000252490234375,
      "model_forward_time": 0.1152186393737793,
      "step": 41368
    },
    {
      "epoch": 0.000252490234375,
      "step": 41368,
      "training_step_time": 0.43073463439941406
    },
    {
      "epoch": 0.000252496337890625,
      "model_forward_time": 0.11562561988830566,
      "step": 41369
    },
    {
      "epoch": 0.000252496337890625,
      "step": 41369,
      "training_step_time": 0.42441439628601074
    },
    {
      "epoch": 0.00025250244140625,
      "grad_norm": 0.1044461652636528,
      "learning_rate": 2.4122225447344875e-05,
      "loss": 0.0416,
      "step": 41370
    },
    {
      "epoch": 0.00025250244140625,
      "model_forward_time": 0.11472392082214355,
      "step": 41370
    },
    {
      "epoch": 0.00025250244140625,
      "step": 41370,
      "training_step_time": 0.46711039543151855
    },
    {
      "epoch": 0.000252508544921875,
      "model_forward_time": 0.11521697044372559,
      "step": 41371
    },
    {
      "epoch": 0.000252508544921875,
      "step": 41371,
      "training_step_time": 0.4097158908843994
    },
    {
      "epoch": 0.0002525146484375,
      "model_forward_time": 0.11646604537963867,
      "step": 41372
    },
    {
      "epoch": 0.0002525146484375,
      "step": 41372,
      "training_step_time": 0.41448020935058594
    },
    {
      "epoch": 0.000252520751953125,
      "model_forward_time": 0.11602354049682617,
      "step": 41373
    },
    {
      "epoch": 0.000252520751953125,
      "step": 41373,
      "training_step_time": 0.403195858001709
    },
    {
      "epoch": 0.00025252685546875,
      "model_forward_time": 0.11574888229370117,
      "step": 41374
    },
    {
      "epoch": 0.00025252685546875,
      "step": 41374,
      "training_step_time": 0.3956899642944336
    },
    {
      "epoch": 0.000252532958984375,
      "model_forward_time": 0.1156620979309082,
      "step": 41375
    },
    {
      "epoch": 0.000252532958984375,
      "step": 41375,
      "training_step_time": 0.40510058403015137
    },
    {
      "epoch": 0.0002525390625,
      "model_forward_time": 0.11578178405761719,
      "step": 41376
    },
    {
      "epoch": 0.0002525390625,
      "step": 41376,
      "training_step_time": 0.4011669158935547
    },
    {
      "epoch": 0.000252545166015625,
      "model_forward_time": 0.11543989181518555,
      "step": 41377
    },
    {
      "epoch": 0.000252545166015625,
      "step": 41377,
      "training_step_time": 0.3964505195617676
    },
    {
      "epoch": 0.00025255126953125,
      "model_forward_time": 0.11535906791687012,
      "step": 41378
    },
    {
      "epoch": 0.00025255126953125,
      "step": 41378,
      "training_step_time": 0.39586544036865234
    },
    {
      "epoch": 0.000252557373046875,
      "model_forward_time": 0.11571073532104492,
      "step": 41379
    },
    {
      "epoch": 0.000252557373046875,
      "step": 41379,
      "training_step_time": 0.39396166801452637
    },
    {
      "epoch": 0.0002525634765625,
      "grad_norm": 0.10746865719556808,
      "learning_rate": 2.4098649531343497e-05,
      "loss": 0.0389,
      "step": 41380
    },
    {
      "epoch": 0.0002525634765625,
      "model_forward_time": 0.11488080024719238,
      "step": 41380
    },
    {
      "epoch": 0.0002525634765625,
      "step": 41380,
      "training_step_time": 0.3902924060821533
    },
    {
      "epoch": 0.000252569580078125,
      "model_forward_time": 0.11608600616455078,
      "step": 41381
    },
    {
      "epoch": 0.000252569580078125,
      "step": 41381,
      "training_step_time": 0.40285730361938477
    },
    {
      "epoch": 0.00025257568359375,
      "model_forward_time": 0.11580967903137207,
      "step": 41382
    },
    {
      "epoch": 0.00025257568359375,
      "step": 41382,
      "training_step_time": 0.48601508140563965
    },
    {
      "epoch": 0.000252581787109375,
      "model_forward_time": 0.11524534225463867,
      "step": 41383
    },
    {
      "epoch": 0.000252581787109375,
      "step": 41383,
      "training_step_time": 0.4866156578063965
    },
    {
      "epoch": 0.000252587890625,
      "model_forward_time": 0.11481857299804688,
      "step": 41384
    },
    {
      "epoch": 0.000252587890625,
      "step": 41384,
      "training_step_time": 0.416064977645874
    },
    {
      "epoch": 0.000252593994140625,
      "model_forward_time": 0.11477065086364746,
      "step": 41385
    },
    {
      "epoch": 0.000252593994140625,
      "step": 41385,
      "training_step_time": 0.3949005603790283
    },
    {
      "epoch": 0.00025260009765625,
      "model_forward_time": 0.11620926856994629,
      "step": 41386
    },
    {
      "epoch": 0.00025260009765625,
      "step": 41386,
      "training_step_time": 0.3822479248046875
    },
    {
      "epoch": 0.000252606201171875,
      "model_forward_time": 0.11583566665649414,
      "step": 41387
    },
    {
      "epoch": 0.000252606201171875,
      "step": 41387,
      "training_step_time": 0.41940903663635254
    },
    {
      "epoch": 0.0002526123046875,
      "model_forward_time": 0.1158590316772461,
      "step": 41388
    },
    {
      "epoch": 0.0002526123046875,
      "step": 41388,
      "training_step_time": 0.4468262195587158
    },
    {
      "epoch": 0.000252618408203125,
      "model_forward_time": 0.11501622200012207,
      "step": 41389
    },
    {
      "epoch": 0.000252618408203125,
      "step": 41389,
      "training_step_time": 0.393110990524292
    },
    {
      "epoch": 0.00025262451171875,
      "grad_norm": 0.10622602701187134,
      "learning_rate": 2.4075081483488494e-05,
      "loss": 0.0437,
      "step": 41390
    },
    {
      "epoch": 0.00025262451171875,
      "model_forward_time": 0.11523771286010742,
      "step": 41390
    },
    {
      "epoch": 0.00025262451171875,
      "step": 41390,
      "training_step_time": 0.39778900146484375
    },
    {
      "epoch": 0.000252630615234375,
      "model_forward_time": 0.11537861824035645,
      "step": 41391
    },
    {
      "epoch": 0.000252630615234375,
      "step": 41391,
      "training_step_time": 0.3877730369567871
    },
    {
      "epoch": 0.00025263671875,
      "model_forward_time": 0.11581754684448242,
      "step": 41392
    },
    {
      "epoch": 0.00025263671875,
      "step": 41392,
      "training_step_time": 0.41013216972351074
    },
    {
      "epoch": 0.000252642822265625,
      "model_forward_time": 0.11513757705688477,
      "step": 41393
    },
    {
      "epoch": 0.000252642822265625,
      "step": 41393,
      "training_step_time": 0.3853282928466797
    },
    {
      "epoch": 0.00025264892578125,
      "model_forward_time": 0.11590385437011719,
      "step": 41394
    },
    {
      "epoch": 0.00025264892578125,
      "step": 41394,
      "training_step_time": 0.3786132335662842
    },
    {
      "epoch": 0.000252655029296875,
      "model_forward_time": 0.1155998706817627,
      "step": 41395
    },
    {
      "epoch": 0.000252655029296875,
      "step": 41395,
      "training_step_time": 0.39803028106689453
    },
    {
      "epoch": 0.0002526611328125,
      "model_forward_time": 0.11526298522949219,
      "step": 41396
    },
    {
      "epoch": 0.0002526611328125,
      "step": 41396,
      "training_step_time": 0.3974149227142334
    },
    {
      "epoch": 0.000252667236328125,
      "model_forward_time": 0.11538958549499512,
      "step": 41397
    },
    {
      "epoch": 0.000252667236328125,
      "step": 41397,
      "training_step_time": 0.4257345199584961
    },
    {
      "epoch": 0.00025267333984375,
      "model_forward_time": 0.11497068405151367,
      "step": 41398
    },
    {
      "epoch": 0.00025267333984375,
      "step": 41398,
      "training_step_time": 0.4838573932647705
    },
    {
      "epoch": 0.000252679443359375,
      "model_forward_time": 0.11547660827636719,
      "step": 41399
    },
    {
      "epoch": 0.000252679443359375,
      "step": 41399,
      "training_step_time": 0.4009394645690918
    },
    {
      "epoch": 0.000252685546875,
      "grad_norm": 0.09419704973697662,
      "learning_rate": 2.405152131093926e-05,
      "loss": 0.0377,
      "step": 41400
    },
    {
      "epoch": 0.000252685546875,
      "model_forward_time": 0.11435508728027344,
      "step": 41400
    },
    {
      "epoch": 0.000252685546875,
      "step": 41400,
      "training_step_time": 0.4031801223754883
    },
    {
      "epoch": 0.000252691650390625,
      "model_forward_time": 0.11532235145568848,
      "step": 41401
    },
    {
      "epoch": 0.000252691650390625,
      "step": 41401,
      "training_step_time": 0.3994472026824951
    },
    {
      "epoch": 0.00025269775390625,
      "model_forward_time": 0.11529302597045898,
      "step": 41402
    },
    {
      "epoch": 0.00025269775390625,
      "step": 41402,
      "training_step_time": 0.3960566520690918
    },
    {
      "epoch": 0.000252703857421875,
      "model_forward_time": 0.11445403099060059,
      "step": 41403
    },
    {
      "epoch": 0.000252703857421875,
      "step": 41403,
      "training_step_time": 0.43637633323669434
    },
    {
      "epoch": 0.0002527099609375,
      "model_forward_time": 0.11540031433105469,
      "step": 41404
    },
    {
      "epoch": 0.0002527099609375,
      "step": 41404,
      "training_step_time": 0.3824441432952881
    },
    {
      "epoch": 0.000252716064453125,
      "model_forward_time": 0.11625313758850098,
      "step": 41405
    },
    {
      "epoch": 0.000252716064453125,
      "step": 41405,
      "training_step_time": 0.40271854400634766
    },
    {
      "epoch": 0.00025272216796875,
      "model_forward_time": 0.1150515079498291,
      "step": 41406
    },
    {
      "epoch": 0.00025272216796875,
      "step": 41406,
      "training_step_time": 0.40030765533447266
    },
    {
      "epoch": 0.000252728271484375,
      "model_forward_time": 0.11571002006530762,
      "step": 41407
    },
    {
      "epoch": 0.000252728271484375,
      "step": 41407,
      "training_step_time": 0.40035176277160645
    },
    {
      "epoch": 0.000252734375,
      "model_forward_time": 0.11435532569885254,
      "step": 41408
    },
    {
      "epoch": 0.000252734375,
      "step": 41408,
      "training_step_time": 0.3915517330169678
    },
    {
      "epoch": 0.000252740478515625,
      "model_forward_time": 0.11475849151611328,
      "step": 41409
    },
    {
      "epoch": 0.000252740478515625,
      "step": 41409,
      "training_step_time": 0.397216796875
    },
    {
      "epoch": 0.00025274658203125,
      "grad_norm": 0.0707409530878067,
      "learning_rate": 2.402796902085268e-05,
      "loss": 0.0434,
      "step": 41410
    },
    {
      "epoch": 0.00025274658203125,
      "model_forward_time": 0.11526679992675781,
      "step": 41410
    },
    {
      "epoch": 0.00025274658203125,
      "step": 41410,
      "training_step_time": 0.4040184020996094
    },
    {
      "epoch": 0.000252752685546875,
      "model_forward_time": 0.11444401741027832,
      "step": 41411
    },
    {
      "epoch": 0.000252752685546875,
      "step": 41411,
      "training_step_time": 0.44450998306274414
    },
    {
      "epoch": 0.0002527587890625,
      "model_forward_time": 0.11506295204162598,
      "step": 41412
    },
    {
      "epoch": 0.0002527587890625,
      "step": 41412,
      "training_step_time": 0.4039626121520996
    },
    {
      "epoch": 0.000252764892578125,
      "model_forward_time": 0.1148378849029541,
      "step": 41413
    },
    {
      "epoch": 0.000252764892578125,
      "step": 41413,
      "training_step_time": 0.45525026321411133
    },
    {
      "epoch": 0.00025277099609375,
      "model_forward_time": 0.11523199081420898,
      "step": 41414
    },
    {
      "epoch": 0.00025277099609375,
      "step": 41414,
      "training_step_time": 0.3974735736846924
    },
    {
      "epoch": 0.000252777099609375,
      "model_forward_time": 0.11501765251159668,
      "step": 41415
    },
    {
      "epoch": 0.000252777099609375,
      "step": 41415,
      "training_step_time": 0.4645421504974365
    },
    {
      "epoch": 0.000252783203125,
      "model_forward_time": 0.11611795425415039,
      "step": 41416
    },
    {
      "epoch": 0.000252783203125,
      "step": 41416,
      "training_step_time": 0.3845973014831543
    },
    {
      "epoch": 0.000252789306640625,
      "model_forward_time": 0.11546540260314941,
      "step": 41417
    },
    {
      "epoch": 0.000252789306640625,
      "step": 41417,
      "training_step_time": 0.39605021476745605
    },
    {
      "epoch": 0.00025279541015625,
      "model_forward_time": 0.1157369613647461,
      "step": 41418
    },
    {
      "epoch": 0.00025279541015625,
      "step": 41418,
      "training_step_time": 0.40726613998413086
    },
    {
      "epoch": 0.000252801513671875,
      "model_forward_time": 0.11542129516601562,
      "step": 41419
    },
    {
      "epoch": 0.000252801513671875,
      "step": 41419,
      "training_step_time": 0.39922285079956055
    },
    {
      "epoch": 0.0002528076171875,
      "grad_norm": 0.10537061840295792,
      "learning_rate": 2.4004424620383386e-05,
      "loss": 0.0362,
      "step": 41420
    },
    {
      "epoch": 0.0002528076171875,
      "model_forward_time": 0.11797785758972168,
      "step": 41420
    },
    {
      "epoch": 0.0002528076171875,
      "step": 41420,
      "training_step_time": 0.3907887935638428
    },
    {
      "epoch": 0.000252813720703125,
      "model_forward_time": 0.11606478691101074,
      "step": 41421
    },
    {
      "epoch": 0.000252813720703125,
      "step": 41421,
      "training_step_time": 0.3774595260620117
    },
    {
      "epoch": 0.00025281982421875,
      "model_forward_time": 0.11566686630249023,
      "step": 41422
    },
    {
      "epoch": 0.00025281982421875,
      "step": 41422,
      "training_step_time": 0.37871479988098145
    },
    {
      "epoch": 0.000252825927734375,
      "model_forward_time": 0.11411619186401367,
      "step": 41423
    },
    {
      "epoch": 0.000252825927734375,
      "step": 41423,
      "training_step_time": 0.3773336410522461
    },
    {
      "epoch": 0.00025283203125,
      "model_forward_time": 0.11540746688842773,
      "step": 41424
    },
    {
      "epoch": 0.00025283203125,
      "step": 41424,
      "training_step_time": 0.40909242630004883
    },
    {
      "epoch": 0.000252838134765625,
      "model_forward_time": 0.11567020416259766,
      "step": 41425
    },
    {
      "epoch": 0.000252838134765625,
      "step": 41425,
      "training_step_time": 0.5253338813781738
    },
    {
      "epoch": 0.00025284423828125,
      "model_forward_time": 0.11582207679748535,
      "step": 41426
    },
    {
      "epoch": 0.00025284423828125,
      "step": 41426,
      "training_step_time": 0.4989931583404541
    },
    {
      "epoch": 0.000252850341796875,
      "model_forward_time": 0.11452531814575195,
      "step": 41427
    },
    {
      "epoch": 0.000252850341796875,
      "step": 41427,
      "training_step_time": 0.3918170928955078
    },
    {
      "epoch": 0.0002528564453125,
      "model_forward_time": 0.1156010627746582,
      "step": 41428
    },
    {
      "epoch": 0.0002528564453125,
      "step": 41428,
      "training_step_time": 0.40413427352905273
    },
    {
      "epoch": 0.000252862548828125,
      "model_forward_time": 0.11523199081420898,
      "step": 41429
    },
    {
      "epoch": 0.000252862548828125,
      "step": 41429,
      "training_step_time": 0.44526100158691406
    },
    {
      "epoch": 0.00025286865234375,
      "grad_norm": 0.1364542543888092,
      "learning_rate": 2.3980888116683515e-05,
      "loss": 0.0479,
      "step": 41430
    },
    {
      "epoch": 0.00025286865234375,
      "model_forward_time": 0.11522316932678223,
      "step": 41430
    },
    {
      "epoch": 0.00025286865234375,
      "step": 41430,
      "training_step_time": 0.4641554355621338
    },
    {
      "epoch": 0.000252874755859375,
      "model_forward_time": 0.11476731300354004,
      "step": 41431
    },
    {
      "epoch": 0.000252874755859375,
      "step": 41431,
      "training_step_time": 0.37152814865112305
    },
    {
      "epoch": 0.000252880859375,
      "model_forward_time": 0.11535811424255371,
      "step": 41432
    },
    {
      "epoch": 0.000252880859375,
      "step": 41432,
      "training_step_time": 0.4939403533935547
    },
    {
      "epoch": 0.000252886962890625,
      "model_forward_time": 0.11503148078918457,
      "step": 41433
    },
    {
      "epoch": 0.000252886962890625,
      "step": 41433,
      "training_step_time": 0.3974289894104004
    },
    {
      "epoch": 0.00025289306640625,
      "model_forward_time": 0.1151888370513916,
      "step": 41434
    },
    {
      "epoch": 0.00025289306640625,
      "step": 41434,
      "training_step_time": 0.3804209232330322
    },
    {
      "epoch": 0.000252899169921875,
      "model_forward_time": 0.11538314819335938,
      "step": 41435
    },
    {
      "epoch": 0.000252899169921875,
      "step": 41435,
      "training_step_time": 0.39601802825927734
    },
    {
      "epoch": 0.0002529052734375,
      "model_forward_time": 0.11544251441955566,
      "step": 41436
    },
    {
      "epoch": 0.0002529052734375,
      "step": 41436,
      "training_step_time": 0.3945455551147461
    },
    {
      "epoch": 0.000252911376953125,
      "model_forward_time": 0.11562514305114746,
      "step": 41437
    },
    {
      "epoch": 0.000252911376953125,
      "step": 41437,
      "training_step_time": 0.39722371101379395
    },
    {
      "epoch": 0.00025291748046875,
      "model_forward_time": 0.11542558670043945,
      "step": 41438
    },
    {
      "epoch": 0.00025291748046875,
      "step": 41438,
      "training_step_time": 0.4469583034515381
    },
    {
      "epoch": 0.000252923583984375,
      "model_forward_time": 0.11547303199768066,
      "step": 41439
    },
    {
      "epoch": 0.000252923583984375,
      "step": 41439,
      "training_step_time": 0.7405848503112793
    },
    {
      "epoch": 0.0002529296875,
      "grad_norm": 0.0769871398806572,
      "learning_rate": 2.3957359516902845e-05,
      "loss": 0.0401,
      "step": 41440
    },
    {
      "epoch": 0.0002529296875,
      "model_forward_time": 0.1312696933746338,
      "step": 41440
    },
    {
      "epoch": 0.0002529296875,
      "step": 41440,
      "training_step_time": 0.4054453372955322
    },
    {
      "epoch": 0.000252935791015625,
      "model_forward_time": 0.1149137020111084,
      "step": 41441
    },
    {
      "epoch": 0.000252935791015625,
      "step": 41441,
      "training_step_time": 0.4041147232055664
    },
    {
      "epoch": 0.00025294189453125,
      "model_forward_time": 0.11506009101867676,
      "step": 41442
    },
    {
      "epoch": 0.00025294189453125,
      "step": 41442,
      "training_step_time": 0.4218020439147949
    },
    {
      "epoch": 0.000252947998046875,
      "model_forward_time": 0.11520600318908691,
      "step": 41443
    },
    {
      "epoch": 0.000252947998046875,
      "step": 41443,
      "training_step_time": 0.3990199565887451
    },
    {
      "epoch": 0.0002529541015625,
      "model_forward_time": 0.11383986473083496,
      "step": 41444
    },
    {
      "epoch": 0.0002529541015625,
      "step": 41444,
      "training_step_time": 0.42713212966918945
    },
    {
      "epoch": 0.000252960205078125,
      "model_forward_time": 0.11611270904541016,
      "step": 41445
    },
    {
      "epoch": 0.000252960205078125,
      "step": 41445,
      "training_step_time": 0.43560290336608887
    },
    {
      "epoch": 0.00025296630859375,
      "model_forward_time": 0.11535525321960449,
      "step": 41446
    },
    {
      "epoch": 0.00025296630859375,
      "step": 41446,
      "training_step_time": 0.3824026584625244
    },
    {
      "epoch": 0.000252972412109375,
      "model_forward_time": 0.1150054931640625,
      "step": 41447
    },
    {
      "epoch": 0.000252972412109375,
      "step": 41447,
      "training_step_time": 0.40940046310424805
    },
    {
      "epoch": 0.000252978515625,
      "model_forward_time": 0.1153862476348877,
      "step": 41448
    },
    {
      "epoch": 0.000252978515625,
      "step": 41448,
      "training_step_time": 0.39273834228515625
    },
    {
      "epoch": 0.000252984619140625,
      "model_forward_time": 0.1155080795288086,
      "step": 41449
    },
    {
      "epoch": 0.000252984619140625,
      "step": 41449,
      "training_step_time": 0.3967244625091553
    },
    {
      "epoch": 0.00025299072265625,
      "grad_norm": 0.10255665332078934,
      "learning_rate": 2.3933838828188733e-05,
      "loss": 0.0428,
      "step": 41450
    },
    {
      "epoch": 0.00025299072265625,
      "model_forward_time": 0.11489272117614746,
      "step": 41450
    },
    {
      "epoch": 0.00025299072265625,
      "step": 41450,
      "training_step_time": 0.3939805030822754
    },
    {
      "epoch": 0.000252996826171875,
      "model_forward_time": 0.1145937442779541,
      "step": 41451
    },
    {
      "epoch": 0.000252996826171875,
      "step": 41451,
      "training_step_time": 0.7462527751922607
    },
    {
      "epoch": 0.0002530029296875,
      "model_forward_time": 0.11490750312805176,
      "step": 41452
    },
    {
      "epoch": 0.0002530029296875,
      "step": 41452,
      "training_step_time": 0.40682554244995117
    },
    {
      "epoch": 0.000253009033203125,
      "model_forward_time": 0.11509537696838379,
      "step": 41453
    },
    {
      "epoch": 0.000253009033203125,
      "step": 41453,
      "training_step_time": 0.4086425304412842
    },
    {
      "epoch": 0.00025301513671875,
      "model_forward_time": 0.11511588096618652,
      "step": 41454
    },
    {
      "epoch": 0.00025301513671875,
      "step": 41454,
      "training_step_time": 0.4140644073486328
    },
    {
      "epoch": 0.000253021240234375,
      "model_forward_time": 0.11488151550292969,
      "step": 41455
    },
    {
      "epoch": 0.000253021240234375,
      "step": 41455,
      "training_step_time": 0.42357301712036133
    },
    {
      "epoch": 0.00025302734375,
      "model_forward_time": 0.11487555503845215,
      "step": 41456
    },
    {
      "epoch": 0.00025302734375,
      "step": 41456,
      "training_step_time": 0.47344183921813965
    },
    {
      "epoch": 0.000253033447265625,
      "model_forward_time": 0.11489009857177734,
      "step": 41457
    },
    {
      "epoch": 0.000253033447265625,
      "step": 41457,
      "training_step_time": 0.44118785858154297
    },
    {
      "epoch": 0.00025303955078125,
      "model_forward_time": 0.11464786529541016,
      "step": 41458
    },
    {
      "epoch": 0.00025303955078125,
      "step": 41458,
      "training_step_time": 0.42298030853271484
    },
    {
      "epoch": 0.000253045654296875,
      "model_forward_time": 0.11520767211914062,
      "step": 41459
    },
    {
      "epoch": 0.000253045654296875,
      "step": 41459,
      "training_step_time": 0.39144229888916016
    },
    {
      "epoch": 0.0002530517578125,
      "grad_norm": 0.16617867350578308,
      "learning_rate": 2.3910326057686127e-05,
      "loss": 0.0389,
      "step": 41460
    },
    {
      "epoch": 0.0002530517578125,
      "model_forward_time": 0.11455345153808594,
      "step": 41460
    },
    {
      "epoch": 0.0002530517578125,
      "step": 41460,
      "training_step_time": 0.43950939178466797
    },
    {
      "epoch": 0.000253057861328125,
      "model_forward_time": 0.11570358276367188,
      "step": 41461
    },
    {
      "epoch": 0.000253057861328125,
      "step": 41461,
      "training_step_time": 0.44097280502319336
    },
    {
      "epoch": 0.00025306396484375,
      "model_forward_time": 0.11464905738830566,
      "step": 41462
    },
    {
      "epoch": 0.00025306396484375,
      "step": 41462,
      "training_step_time": 0.39079976081848145
    },
    {
      "epoch": 0.000253070068359375,
      "model_forward_time": 0.11638975143432617,
      "step": 41463
    },
    {
      "epoch": 0.000253070068359375,
      "step": 41463,
      "training_step_time": 0.459322452545166
    },
    {
      "epoch": 0.000253076171875,
      "model_forward_time": 0.1157677173614502,
      "step": 41464
    },
    {
      "epoch": 0.000253076171875,
      "step": 41464,
      "training_step_time": 0.4445927143096924
    },
    {
      "epoch": 0.000253082275390625,
      "model_forward_time": 0.11574220657348633,
      "step": 41465
    },
    {
      "epoch": 0.000253082275390625,
      "step": 41465,
      "training_step_time": 0.43052053451538086
    },
    {
      "epoch": 0.00025308837890625,
      "model_forward_time": 0.11491227149963379,
      "step": 41466
    },
    {
      "epoch": 0.00025308837890625,
      "step": 41466,
      "training_step_time": 0.3916196823120117
    },
    {
      "epoch": 0.000253094482421875,
      "model_forward_time": 0.11542868614196777,
      "step": 41467
    },
    {
      "epoch": 0.000253094482421875,
      "step": 41467,
      "training_step_time": 0.3890984058380127
    },
    {
      "epoch": 0.0002531005859375,
      "model_forward_time": 0.1155233383178711,
      "step": 41468
    },
    {
      "epoch": 0.0002531005859375,
      "step": 41468,
      "training_step_time": 0.39642834663391113
    },
    {
      "epoch": 0.000253106689453125,
      "model_forward_time": 0.1156156063079834,
      "step": 41469
    },
    {
      "epoch": 0.000253106689453125,
      "step": 41469,
      "training_step_time": 0.642470121383667
    },
    {
      "epoch": 0.00025311279296875,
      "grad_norm": 0.09156809002161026,
      "learning_rate": 2.3886821212537647e-05,
      "loss": 0.0395,
      "step": 41470
    },
    {
      "epoch": 0.00025311279296875,
      "model_forward_time": 0.1155097484588623,
      "step": 41470
    },
    {
      "epoch": 0.00025311279296875,
      "step": 41470,
      "training_step_time": 0.46866583824157715
    },
    {
      "epoch": 0.000253118896484375,
      "model_forward_time": 0.11533665657043457,
      "step": 41471
    },
    {
      "epoch": 0.000253118896484375,
      "step": 41471,
      "training_step_time": 0.4060039520263672
    },
    {
      "epoch": 0.000253125,
      "model_forward_time": 0.11461329460144043,
      "step": 41472
    },
    {
      "epoch": 0.000253125,
      "step": 41472,
      "training_step_time": 0.42381739616394043
    },
    {
      "epoch": 0.000253131103515625,
      "model_forward_time": 0.1145029067993164,
      "step": 41473
    },
    {
      "epoch": 0.000253131103515625,
      "step": 41473,
      "training_step_time": 0.36531782150268555
    },
    {
      "epoch": 0.00025313720703125,
      "model_forward_time": 0.11436820030212402,
      "step": 41474
    },
    {
      "epoch": 0.00025313720703125,
      "step": 41474,
      "training_step_time": 0.43674373626708984
    },
    {
      "epoch": 0.000253143310546875,
      "model_forward_time": 0.11481857299804688,
      "step": 41475
    },
    {
      "epoch": 0.000253143310546875,
      "step": 41475,
      "training_step_time": 0.5328869819641113
    },
    {
      "epoch": 0.0002531494140625,
      "model_forward_time": 0.11585783958435059,
      "step": 41476
    },
    {
      "epoch": 0.0002531494140625,
      "step": 41476,
      "training_step_time": 0.38036179542541504
    },
    {
      "epoch": 0.000253155517578125,
      "model_forward_time": 0.11560654640197754,
      "step": 41477
    },
    {
      "epoch": 0.000253155517578125,
      "step": 41477,
      "training_step_time": 0.4008636474609375
    },
    {
      "epoch": 0.00025316162109375,
      "model_forward_time": 0.11444664001464844,
      "step": 41478
    },
    {
      "epoch": 0.00025316162109375,
      "step": 41478,
      "training_step_time": 0.4052255153656006
    },
    {
      "epoch": 0.000253167724609375,
      "model_forward_time": 0.11545443534851074,
      "step": 41479
    },
    {
      "epoch": 0.000253167724609375,
      "step": 41479,
      "training_step_time": 0.3969607353210449
    },
    {
      "epoch": 0.000253173828125,
      "grad_norm": 0.09039949625730515,
      "learning_rate": 2.3863324299883366e-05,
      "loss": 0.0361,
      "step": 41480
    },
    {
      "epoch": 0.000253173828125,
      "model_forward_time": 0.11471366882324219,
      "step": 41480
    },
    {
      "epoch": 0.000253173828125,
      "step": 41480,
      "training_step_time": 0.40667104721069336
    },
    {
      "epoch": 0.000253179931640625,
      "model_forward_time": 0.11443209648132324,
      "step": 41481
    },
    {
      "epoch": 0.000253179931640625,
      "step": 41481,
      "training_step_time": 0.577812910079956
    },
    {
      "epoch": 0.00025318603515625,
      "model_forward_time": 0.11523604393005371,
      "step": 41482
    },
    {
      "epoch": 0.00025318603515625,
      "step": 41482,
      "training_step_time": 0.4494516849517822
    },
    {
      "epoch": 0.000253192138671875,
      "model_forward_time": 0.11473321914672852,
      "step": 41483
    },
    {
      "epoch": 0.000253192138671875,
      "step": 41483,
      "training_step_time": 0.49347639083862305
    },
    {
      "epoch": 0.0002531982421875,
      "model_forward_time": 0.11419296264648438,
      "step": 41484
    },
    {
      "epoch": 0.0002531982421875,
      "step": 41484,
      "training_step_time": 0.43657803535461426
    },
    {
      "epoch": 0.000253204345703125,
      "model_forward_time": 0.11471033096313477,
      "step": 41485
    },
    {
      "epoch": 0.000253204345703125,
      "step": 41485,
      "training_step_time": 0.4598815441131592
    },
    {
      "epoch": 0.00025321044921875,
      "model_forward_time": 0.11418867111206055,
      "step": 41486
    },
    {
      "epoch": 0.00025321044921875,
      "step": 41486,
      "training_step_time": 0.4461517333984375
    },
    {
      "epoch": 0.000253216552734375,
      "model_forward_time": 0.11471056938171387,
      "step": 41487
    },
    {
      "epoch": 0.000253216552734375,
      "step": 41487,
      "training_step_time": 0.3653602600097656
    },
    {
      "epoch": 0.00025322265625,
      "model_forward_time": 0.11503458023071289,
      "step": 41488
    },
    {
      "epoch": 0.00025322265625,
      "step": 41488,
      "training_step_time": 0.43764209747314453
    },
    {
      "epoch": 0.000253228759765625,
      "model_forward_time": 0.11462593078613281,
      "step": 41489
    },
    {
      "epoch": 0.000253228759765625,
      "step": 41489,
      "training_step_time": 0.41683006286621094
    },
    {
      "epoch": 0.00025323486328125,
      "grad_norm": 0.12127337604761124,
      "learning_rate": 2.3839835326861104e-05,
      "loss": 0.0404,
      "step": 41490
    },
    {
      "epoch": 0.00025323486328125,
      "model_forward_time": 0.11524820327758789,
      "step": 41490
    },
    {
      "epoch": 0.00025323486328125,
      "step": 41490,
      "training_step_time": 0.4402003288269043
    },
    {
      "epoch": 0.000253240966796875,
      "model_forward_time": 0.11578917503356934,
      "step": 41491
    },
    {
      "epoch": 0.000253240966796875,
      "step": 41491,
      "training_step_time": 0.4024064540863037
    },
    {
      "epoch": 0.0002532470703125,
      "model_forward_time": 0.11556506156921387,
      "step": 41492
    },
    {
      "epoch": 0.0002532470703125,
      "step": 41492,
      "training_step_time": 0.39508962631225586
    },
    {
      "epoch": 0.000253253173828125,
      "model_forward_time": 0.11510825157165527,
      "step": 41493
    },
    {
      "epoch": 0.000253253173828125,
      "step": 41493,
      "training_step_time": 0.5208165645599365
    },
    {
      "epoch": 0.00025325927734375,
      "model_forward_time": 0.11561989784240723,
      "step": 41494
    },
    {
      "epoch": 0.00025325927734375,
      "step": 41494,
      "training_step_time": 0.40146923065185547
    },
    {
      "epoch": 0.000253265380859375,
      "model_forward_time": 0.11514830589294434,
      "step": 41495
    },
    {
      "epoch": 0.000253265380859375,
      "step": 41495,
      "training_step_time": 0.4124786853790283
    },
    {
      "epoch": 0.000253271484375,
      "model_forward_time": 0.11501955986022949,
      "step": 41496
    },
    {
      "epoch": 0.000253271484375,
      "step": 41496,
      "training_step_time": 0.4306676387786865
    },
    {
      "epoch": 0.000253277587890625,
      "model_forward_time": 0.11548829078674316,
      "step": 41497
    },
    {
      "epoch": 0.000253277587890625,
      "step": 41497,
      "training_step_time": 0.49684762954711914
    },
    {
      "epoch": 0.00025328369140625,
      "model_forward_time": 0.11447811126708984,
      "step": 41498
    },
    {
      "epoch": 0.00025328369140625,
      "step": 41498,
      "training_step_time": 0.4708528518676758
    },
    {
      "epoch": 0.000253289794921875,
      "model_forward_time": 0.1151587963104248,
      "step": 41499
    },
    {
      "epoch": 0.000253289794921875,
      "step": 41499,
      "training_step_time": 0.5245592594146729
    },
    {
      "epoch": 0.0002532958984375,
      "grad_norm": 0.12252078950405121,
      "learning_rate": 2.381635430060611e-05,
      "loss": 0.038,
      "step": 41500
    },
    {
      "epoch": 0.0002532958984375,
      "model_forward_time": 0.11430644989013672,
      "step": 41500
    },
    {
      "epoch": 0.0002532958984375,
      "step": 41500,
      "training_step_time": 0.4413905143737793
    },
    {
      "epoch": 0.000253302001953125,
      "model_forward_time": 0.11553454399108887,
      "step": 41501
    },
    {
      "epoch": 0.000253302001953125,
      "step": 41501,
      "training_step_time": 0.42897701263427734
    },
    {
      "epoch": 0.00025330810546875,
      "model_forward_time": 0.11495161056518555,
      "step": 41502
    },
    {
      "epoch": 0.00025330810546875,
      "step": 41502,
      "training_step_time": 0.4837527275085449
    },
    {
      "epoch": 0.000253314208984375,
      "model_forward_time": 0.1148531436920166,
      "step": 41503
    },
    {
      "epoch": 0.000253314208984375,
      "step": 41503,
      "training_step_time": 0.43739891052246094
    },
    {
      "epoch": 0.0002533203125,
      "model_forward_time": 0.1150057315826416,
      "step": 41504
    },
    {
      "epoch": 0.0002533203125,
      "step": 41504,
      "training_step_time": 0.4573936462402344
    },
    {
      "epoch": 0.000253326416015625,
      "model_forward_time": 0.11472678184509277,
      "step": 41505
    },
    {
      "epoch": 0.000253326416015625,
      "step": 41505,
      "training_step_time": 0.3970668315887451
    },
    {
      "epoch": 0.00025333251953125,
      "model_forward_time": 0.11496710777282715,
      "step": 41506
    },
    {
      "epoch": 0.00025333251953125,
      "step": 41506,
      "training_step_time": 0.39167118072509766
    },
    {
      "epoch": 0.000253338623046875,
      "model_forward_time": 0.11534380912780762,
      "step": 41507
    },
    {
      "epoch": 0.000253338623046875,
      "step": 41507,
      "training_step_time": 0.39737391471862793
    },
    {
      "epoch": 0.0002533447265625,
      "model_forward_time": 0.11529397964477539,
      "step": 41508
    },
    {
      "epoch": 0.0002533447265625,
      "step": 41508,
      "training_step_time": 0.4003031253814697
    },
    {
      "epoch": 0.000253350830078125,
      "model_forward_time": 0.11535835266113281,
      "step": 41509
    },
    {
      "epoch": 0.000253350830078125,
      "step": 41509,
      "training_step_time": 0.5006122589111328
    },
    {
      "epoch": 0.00025335693359375,
      "grad_norm": 0.1083298996090889,
      "learning_rate": 2.3792881228251356e-05,
      "loss": 0.0369,
      "step": 41510
    },
    {
      "epoch": 0.00025335693359375,
      "model_forward_time": 0.11400818824768066,
      "step": 41510
    },
    {
      "epoch": 0.00025335693359375,
      "step": 41510,
      "training_step_time": 0.3994927406311035
    },
    {
      "epoch": 0.000253363037109375,
      "model_forward_time": 0.11507320404052734,
      "step": 41511
    },
    {
      "epoch": 0.000253363037109375,
      "step": 41511,
      "training_step_time": 0.5299184322357178
    },
    {
      "epoch": 0.000253369140625,
      "model_forward_time": 0.11448407173156738,
      "step": 41512
    },
    {
      "epoch": 0.000253369140625,
      "step": 41512,
      "training_step_time": 0.40213656425476074
    },
    {
      "epoch": 0.000253375244140625,
      "model_forward_time": 0.11582565307617188,
      "step": 41513
    },
    {
      "epoch": 0.000253375244140625,
      "step": 41513,
      "training_step_time": 0.4352731704711914
    },
    {
      "epoch": 0.00025338134765625,
      "model_forward_time": 0.1146538257598877,
      "step": 41514
    },
    {
      "epoch": 0.00025338134765625,
      "step": 41514,
      "training_step_time": 0.4890472888946533
    },
    {
      "epoch": 0.000253387451171875,
      "model_forward_time": 0.11484861373901367,
      "step": 41515
    },
    {
      "epoch": 0.000253387451171875,
      "step": 41515,
      "training_step_time": 0.36568570137023926
    },
    {
      "epoch": 0.0002533935546875,
      "model_forward_time": 0.11447381973266602,
      "step": 41516
    },
    {
      "epoch": 0.0002533935546875,
      "step": 41516,
      "training_step_time": 0.4359579086303711
    },
    {
      "epoch": 0.000253399658203125,
      "model_forward_time": 0.11458778381347656,
      "step": 41517
    },
    {
      "epoch": 0.000253399658203125,
      "step": 41517,
      "training_step_time": 0.5927596092224121
    },
    {
      "epoch": 0.00025340576171875,
      "model_forward_time": 0.11436343193054199,
      "step": 41518
    },
    {
      "epoch": 0.00025340576171875,
      "step": 41518,
      "training_step_time": 0.3832733631134033
    },
    {
      "epoch": 0.000253411865234375,
      "model_forward_time": 0.11487841606140137,
      "step": 41519
    },
    {
      "epoch": 0.000253411865234375,
      "step": 41519,
      "training_step_time": 0.39507246017456055
    },
    {
      "epoch": 0.00025341796875,
      "grad_norm": 0.09326035529375076,
      "learning_rate": 2.3769416116927335e-05,
      "loss": 0.0359,
      "step": 41520
    },
    {
      "epoch": 0.00025341796875,
      "model_forward_time": 0.11474466323852539,
      "step": 41520
    },
    {
      "epoch": 0.00025341796875,
      "step": 41520,
      "training_step_time": 0.392916202545166
    },
    {
      "epoch": 0.000253424072265625,
      "model_forward_time": 0.11553812026977539,
      "step": 41521
    },
    {
      "epoch": 0.000253424072265625,
      "step": 41521,
      "training_step_time": 0.3912925720214844
    },
    {
      "epoch": 0.00025343017578125,
      "model_forward_time": 0.11513090133666992,
      "step": 41522
    },
    {
      "epoch": 0.00025343017578125,
      "step": 41522,
      "training_step_time": 0.39502716064453125
    },
    {
      "epoch": 0.000253436279296875,
      "model_forward_time": 0.1150979995727539,
      "step": 41523
    },
    {
      "epoch": 0.000253436279296875,
      "step": 41523,
      "training_step_time": 0.6321499347686768
    },
    {
      "epoch": 0.0002534423828125,
      "model_forward_time": 0.1152958869934082,
      "step": 41524
    },
    {
      "epoch": 0.0002534423828125,
      "step": 41524,
      "training_step_time": 0.4453299045562744
    },
    {
      "epoch": 0.000253448486328125,
      "model_forward_time": 0.11534333229064941,
      "step": 41525
    },
    {
      "epoch": 0.000253448486328125,
      "step": 41525,
      "training_step_time": 0.4079759120941162
    },
    {
      "epoch": 0.00025345458984375,
      "model_forward_time": 0.11520886421203613,
      "step": 41526
    },
    {
      "epoch": 0.00025345458984375,
      "step": 41526,
      "training_step_time": 0.46315526962280273
    },
    {
      "epoch": 0.000253460693359375,
      "model_forward_time": 0.11399960517883301,
      "step": 41527
    },
    {
      "epoch": 0.000253460693359375,
      "step": 41527,
      "training_step_time": 0.39934396743774414
    },
    {
      "epoch": 0.000253466796875,
      "model_forward_time": 0.11415433883666992,
      "step": 41528
    },
    {
      "epoch": 0.000253466796875,
      "step": 41528,
      "training_step_time": 0.4092230796813965
    },
    {
      "epoch": 0.000253472900390625,
      "model_forward_time": 0.11545395851135254,
      "step": 41529
    },
    {
      "epoch": 0.000253472900390625,
      "step": 41529,
      "training_step_time": 0.36934947967529297
    },
    {
      "epoch": 0.00025347900390625,
      "grad_norm": 0.11580082029104233,
      "learning_rate": 2.374595897376211e-05,
      "loss": 0.0404,
      "step": 41530
    },
    {
      "epoch": 0.00025347900390625,
      "model_forward_time": 0.11489081382751465,
      "step": 41530
    },
    {
      "epoch": 0.00025347900390625,
      "step": 41530,
      "training_step_time": 0.44760775566101074
    },
    {
      "epoch": 0.000253485107421875,
      "model_forward_time": 0.11466455459594727,
      "step": 41531
    },
    {
      "epoch": 0.000253485107421875,
      "step": 41531,
      "training_step_time": 0.41215991973876953
    },
    {
      "epoch": 0.0002534912109375,
      "model_forward_time": 0.11481595039367676,
      "step": 41532
    },
    {
      "epoch": 0.0002534912109375,
      "step": 41532,
      "training_step_time": 0.3981819152832031
    },
    {
      "epoch": 0.000253497314453125,
      "model_forward_time": 0.11458373069763184,
      "step": 41533
    },
    {
      "epoch": 0.000253497314453125,
      "step": 41533,
      "training_step_time": 0.3938415050506592
    },
    {
      "epoch": 0.00025350341796875,
      "model_forward_time": 0.11461973190307617,
      "step": 41534
    },
    {
      "epoch": 0.00025350341796875,
      "step": 41534,
      "training_step_time": 0.3935847282409668
    },
    {
      "epoch": 0.000253509521484375,
      "model_forward_time": 0.1152031421661377,
      "step": 41535
    },
    {
      "epoch": 0.000253509521484375,
      "step": 41535,
      "training_step_time": 0.46499133110046387
    },
    {
      "epoch": 0.000253515625,
      "model_forward_time": 0.11509943008422852,
      "step": 41536
    },
    {
      "epoch": 0.000253515625,
      "step": 41536,
      "training_step_time": 0.38912057876586914
    },
    {
      "epoch": 0.000253521728515625,
      "model_forward_time": 0.11586189270019531,
      "step": 41537
    },
    {
      "epoch": 0.000253521728515625,
      "step": 41537,
      "training_step_time": 0.4470639228820801
    },
    {
      "epoch": 0.00025352783203125,
      "model_forward_time": 0.11510777473449707,
      "step": 41538
    },
    {
      "epoch": 0.00025352783203125,
      "step": 41538,
      "training_step_time": 0.4126017093658447
    },
    {
      "epoch": 0.000253533935546875,
      "model_forward_time": 0.11576986312866211,
      "step": 41539
    },
    {
      "epoch": 0.000253533935546875,
      "step": 41539,
      "training_step_time": 0.4249420166015625
    },
    {
      "epoch": 0.0002535400390625,
      "grad_norm": 0.11026225984096527,
      "learning_rate": 2.3722509805881356e-05,
      "loss": 0.0375,
      "step": 41540
    },
    {
      "epoch": 0.0002535400390625,
      "model_forward_time": 0.11536407470703125,
      "step": 41540
    },
    {
      "epoch": 0.0002535400390625,
      "step": 41540,
      "training_step_time": 0.46340394020080566
    },
    {
      "epoch": 0.000253546142578125,
      "model_forward_time": 0.11563491821289062,
      "step": 41541
    },
    {
      "epoch": 0.000253546142578125,
      "step": 41541,
      "training_step_time": 0.43030834197998047
    },
    {
      "epoch": 0.00025355224609375,
      "model_forward_time": 0.11507177352905273,
      "step": 41542
    },
    {
      "epoch": 0.00025355224609375,
      "step": 41542,
      "training_step_time": 0.5056116580963135
    },
    {
      "epoch": 0.000253558349609375,
      "model_forward_time": 0.11418819427490234,
      "step": 41543
    },
    {
      "epoch": 0.000253558349609375,
      "step": 41543,
      "training_step_time": 0.36505794525146484
    },
    {
      "epoch": 0.000253564453125,
      "model_forward_time": 0.11436295509338379,
      "step": 41544
    },
    {
      "epoch": 0.000253564453125,
      "step": 41544,
      "training_step_time": 0.44327783584594727
    },
    {
      "epoch": 0.000253570556640625,
      "model_forward_time": 0.11518669128417969,
      "step": 41545
    },
    {
      "epoch": 0.000253570556640625,
      "step": 41545,
      "training_step_time": 0.39809346199035645
    },
    {
      "epoch": 0.00025357666015625,
      "model_forward_time": 0.11489629745483398,
      "step": 41546
    },
    {
      "epoch": 0.00025357666015625,
      "step": 41546,
      "training_step_time": 0.4089949131011963
    },
    {
      "epoch": 0.000253582763671875,
      "model_forward_time": 0.11490869522094727,
      "step": 41547
    },
    {
      "epoch": 0.000253582763671875,
      "step": 41547,
      "training_step_time": 0.3939943313598633
    },
    {
      "epoch": 0.0002535888671875,
      "model_forward_time": 0.11574387550354004,
      "step": 41548
    },
    {
      "epoch": 0.0002535888671875,
      "step": 41548,
      "training_step_time": 0.4060657024383545
    },
    {
      "epoch": 0.000253594970703125,
      "model_forward_time": 0.11503195762634277,
      "step": 41549
    },
    {
      "epoch": 0.000253594970703125,
      "step": 41549,
      "training_step_time": 0.4074389934539795
    },
    {
      "epoch": 0.00025360107421875,
      "grad_norm": 0.13034465909004211,
      "learning_rate": 2.3699068620408304e-05,
      "loss": 0.0415,
      "step": 41550
    },
    {
      "epoch": 0.00025360107421875,
      "model_forward_time": 0.11478781700134277,
      "step": 41550
    },
    {
      "epoch": 0.00025360107421875,
      "step": 41550,
      "training_step_time": 0.3986821174621582
    },
    {
      "epoch": 0.000253607177734375,
      "model_forward_time": 0.11466503143310547,
      "step": 41551
    },
    {
      "epoch": 0.000253607177734375,
      "step": 41551,
      "training_step_time": 0.4511256217956543
    },
    {
      "epoch": 0.00025361328125,
      "model_forward_time": 0.1155242919921875,
      "step": 41552
    },
    {
      "epoch": 0.00025361328125,
      "step": 41552,
      "training_step_time": 0.4296236038208008
    },
    {
      "epoch": 0.000253619384765625,
      "model_forward_time": 0.11472821235656738,
      "step": 41553
    },
    {
      "epoch": 0.000253619384765625,
      "step": 41553,
      "training_step_time": 0.5461773872375488
    },
    {
      "epoch": 0.00025362548828125,
      "model_forward_time": 0.11504626274108887,
      "step": 41554
    },
    {
      "epoch": 0.00025362548828125,
      "step": 41554,
      "training_step_time": 0.4037902355194092
    },
    {
      "epoch": 0.000253631591796875,
      "model_forward_time": 0.11480450630187988,
      "step": 41555
    },
    {
      "epoch": 0.000253631591796875,
      "step": 41555,
      "training_step_time": 0.4478309154510498
    },
    {
      "epoch": 0.0002536376953125,
      "model_forward_time": 0.11539292335510254,
      "step": 41556
    },
    {
      "epoch": 0.0002536376953125,
      "step": 41556,
      "training_step_time": 0.4685826301574707
    },
    {
      "epoch": 0.000253643798828125,
      "model_forward_time": 0.1145329475402832,
      "step": 41557
    },
    {
      "epoch": 0.000253643798828125,
      "step": 41557,
      "training_step_time": 0.3996107578277588
    },
    {
      "epoch": 0.00025364990234375,
      "model_forward_time": 0.1151895523071289,
      "step": 41558
    },
    {
      "epoch": 0.00025364990234375,
      "step": 41558,
      "training_step_time": 0.4737529754638672
    },
    {
      "epoch": 0.000253656005859375,
      "model_forward_time": 0.11566853523254395,
      "step": 41559
    },
    {
      "epoch": 0.000253656005859375,
      "step": 41559,
      "training_step_time": 0.48986101150512695
    },
    {
      "epoch": 0.000253662109375,
      "grad_norm": 0.07595010101795197,
      "learning_rate": 2.3675635424463754e-05,
      "loss": 0.0368,
      "step": 41560
    },
    {
      "epoch": 0.000253662109375,
      "model_forward_time": 0.1145317554473877,
      "step": 41560
    },
    {
      "epoch": 0.000253662109375,
      "step": 41560,
      "training_step_time": 0.3917853832244873
    },
    {
      "epoch": 0.000253668212890625,
      "model_forward_time": 0.1143333911895752,
      "step": 41561
    },
    {
      "epoch": 0.000253668212890625,
      "step": 41561,
      "training_step_time": 0.3931117057800293
    },
    {
      "epoch": 0.00025367431640625,
      "model_forward_time": 0.11461997032165527,
      "step": 41562
    },
    {
      "epoch": 0.00025367431640625,
      "step": 41562,
      "training_step_time": 0.39641284942626953
    },
    {
      "epoch": 0.000253680419921875,
      "model_forward_time": 0.11503911018371582,
      "step": 41563
    },
    {
      "epoch": 0.000253680419921875,
      "step": 41563,
      "training_step_time": 0.3863673210144043
    },
    {
      "epoch": 0.0002536865234375,
      "model_forward_time": 0.11524462699890137,
      "step": 41564
    },
    {
      "epoch": 0.0002536865234375,
      "step": 41564,
      "training_step_time": 0.40604257583618164
    },
    {
      "epoch": 0.000253692626953125,
      "model_forward_time": 0.11495614051818848,
      "step": 41565
    },
    {
      "epoch": 0.000253692626953125,
      "step": 41565,
      "training_step_time": 0.5368564128875732
    },
    {
      "epoch": 0.00025369873046875,
      "model_forward_time": 0.11561298370361328,
      "step": 41566
    },
    {
      "epoch": 0.00025369873046875,
      "step": 41566,
      "training_step_time": 0.3943634033203125
    },
    {
      "epoch": 0.000253704833984375,
      "model_forward_time": 0.11483216285705566,
      "step": 41567
    },
    {
      "epoch": 0.000253704833984375,
      "step": 41567,
      "training_step_time": 0.4673595428466797
    },
    {
      "epoch": 0.0002537109375,
      "model_forward_time": 0.11451387405395508,
      "step": 41568
    },
    {
      "epoch": 0.0002537109375,
      "step": 41568,
      "training_step_time": 0.37851738929748535
    },
    {
      "epoch": 0.000253717041015625,
      "model_forward_time": 0.11501550674438477,
      "step": 41569
    },
    {
      "epoch": 0.000253717041015625,
      "step": 41569,
      "training_step_time": 0.42307138442993164
    },
    {
      "epoch": 0.00025372314453125,
      "grad_norm": 0.09090912342071533,
      "learning_rate": 2.3652210225166122e-05,
      "loss": 0.0401,
      "step": 41570
    },
    {
      "epoch": 0.00025372314453125,
      "model_forward_time": 0.11471891403198242,
      "step": 41570
    },
    {
      "epoch": 0.00025372314453125,
      "step": 41570,
      "training_step_time": 0.3979990482330322
    },
    {
      "epoch": 0.000253729248046875,
      "model_forward_time": 0.11484241485595703,
      "step": 41571
    },
    {
      "epoch": 0.000253729248046875,
      "step": 41571,
      "training_step_time": 0.45378994941711426
    },
    {
      "epoch": 0.0002537353515625,
      "model_forward_time": 0.11662077903747559,
      "step": 41572
    },
    {
      "epoch": 0.0002537353515625,
      "step": 41572,
      "training_step_time": 0.3861989974975586
    },
    {
      "epoch": 0.000253741455078125,
      "model_forward_time": 0.11620855331420898,
      "step": 41573
    },
    {
      "epoch": 0.000253741455078125,
      "step": 41573,
      "training_step_time": 0.48996496200561523
    },
    {
      "epoch": 0.00025374755859375,
      "model_forward_time": 0.11614489555358887,
      "step": 41574
    },
    {
      "epoch": 0.00025374755859375,
      "step": 41574,
      "training_step_time": 0.41023874282836914
    },
    {
      "epoch": 0.000253753662109375,
      "model_forward_time": 0.11532902717590332,
      "step": 41575
    },
    {
      "epoch": 0.000253753662109375,
      "step": 41575,
      "training_step_time": 0.40024352073669434
    },
    {
      "epoch": 0.000253759765625,
      "model_forward_time": 0.11576223373413086,
      "step": 41576
    },
    {
      "epoch": 0.000253759765625,
      "step": 41576,
      "training_step_time": 0.38356709480285645
    },
    {
      "epoch": 0.000253765869140625,
      "model_forward_time": 0.11540937423706055,
      "step": 41577
    },
    {
      "epoch": 0.000253765869140625,
      "step": 41577,
      "training_step_time": 0.5264770984649658
    },
    {
      "epoch": 0.00025377197265625,
      "model_forward_time": 0.11565613746643066,
      "step": 41578
    },
    {
      "epoch": 0.00025377197265625,
      "step": 41578,
      "training_step_time": 0.3799145221710205
    },
    {
      "epoch": 0.000253778076171875,
      "model_forward_time": 0.11524248123168945,
      "step": 41579
    },
    {
      "epoch": 0.000253778076171875,
      "step": 41579,
      "training_step_time": 0.48450231552124023
    },
    {
      "epoch": 0.0002537841796875,
      "grad_norm": 0.08095958083868027,
      "learning_rate": 2.362879302963135e-05,
      "loss": 0.0355,
      "step": 41580
    },
    {
      "epoch": 0.0002537841796875,
      "model_forward_time": 0.11560583114624023,
      "step": 41580
    },
    {
      "epoch": 0.0002537841796875,
      "step": 41580,
      "training_step_time": 0.39870142936706543
    },
    {
      "epoch": 0.000253790283203125,
      "model_forward_time": 0.11583375930786133,
      "step": 41581
    },
    {
      "epoch": 0.000253790283203125,
      "step": 41581,
      "training_step_time": 0.48748326301574707
    },
    {
      "epoch": 0.00025379638671875,
      "model_forward_time": 0.11426043510437012,
      "step": 41582
    },
    {
      "epoch": 0.00025379638671875,
      "step": 41582,
      "training_step_time": 0.3883945941925049
    },
    {
      "epoch": 0.000253802490234375,
      "model_forward_time": 0.11488103866577148,
      "step": 41583
    },
    {
      "epoch": 0.000253802490234375,
      "step": 41583,
      "training_step_time": 0.46885228157043457
    },
    {
      "epoch": 0.00025380859375,
      "model_forward_time": 0.11532282829284668,
      "step": 41584
    },
    {
      "epoch": 0.00025380859375,
      "step": 41584,
      "training_step_time": 0.40729641914367676
    },
    {
      "epoch": 0.000253814697265625,
      "model_forward_time": 0.11476874351501465,
      "step": 41585
    },
    {
      "epoch": 0.000253814697265625,
      "step": 41585,
      "training_step_time": 0.464069128036499
    },
    {
      "epoch": 0.00025382080078125,
      "model_forward_time": 0.11484742164611816,
      "step": 41586
    },
    {
      "epoch": 0.00025382080078125,
      "step": 41586,
      "training_step_time": 0.48674750328063965
    },
    {
      "epoch": 0.000253826904296875,
      "model_forward_time": 0.11484670639038086,
      "step": 41587
    },
    {
      "epoch": 0.000253826904296875,
      "step": 41587,
      "training_step_time": 0.4905679225921631
    },
    {
      "epoch": 0.0002538330078125,
      "model_forward_time": 0.11464214324951172,
      "step": 41588
    },
    {
      "epoch": 0.0002538330078125,
      "step": 41588,
      "training_step_time": 0.3921809196472168
    },
    {
      "epoch": 0.000253839111328125,
      "model_forward_time": 0.11418628692626953,
      "step": 41589
    },
    {
      "epoch": 0.000253839111328125,
      "step": 41589,
      "training_step_time": 0.3953394889831543
    },
    {
      "epoch": 0.00025384521484375,
      "grad_norm": 0.117749884724617,
      "learning_rate": 2.3605383844972966e-05,
      "loss": 0.0353,
      "step": 41590
    },
    {
      "epoch": 0.00025384521484375,
      "model_forward_time": 0.11444544792175293,
      "step": 41590
    },
    {
      "epoch": 0.00025384521484375,
      "step": 41590,
      "training_step_time": 0.39604830741882324
    },
    {
      "epoch": 0.000253851318359375,
      "model_forward_time": 0.11513805389404297,
      "step": 41591
    },
    {
      "epoch": 0.000253851318359375,
      "step": 41591,
      "training_step_time": 0.3916926383972168
    },
    {
      "epoch": 0.000253857421875,
      "model_forward_time": 0.11493468284606934,
      "step": 41592
    },
    {
      "epoch": 0.000253857421875,
      "step": 41592,
      "training_step_time": 0.39696311950683594
    },
    {
      "epoch": 0.000253863525390625,
      "model_forward_time": 0.11425304412841797,
      "step": 41593
    },
    {
      "epoch": 0.000253863525390625,
      "step": 41593,
      "training_step_time": 0.406313419342041
    },
    {
      "epoch": 0.00025386962890625,
      "model_forward_time": 0.11485099792480469,
      "step": 41594
    },
    {
      "epoch": 0.00025386962890625,
      "step": 41594,
      "training_step_time": 0.41721224784851074
    },
    {
      "epoch": 0.000253875732421875,
      "model_forward_time": 0.11501932144165039,
      "step": 41595
    },
    {
      "epoch": 0.000253875732421875,
      "step": 41595,
      "training_step_time": 0.5717489719390869
    },
    {
      "epoch": 0.0002538818359375,
      "model_forward_time": 0.11451506614685059,
      "step": 41596
    },
    {
      "epoch": 0.0002538818359375,
      "step": 41596,
      "training_step_time": 0.40027809143066406
    },
    {
      "epoch": 0.000253887939453125,
      "model_forward_time": 0.1148531436920166,
      "step": 41597
    },
    {
      "epoch": 0.000253887939453125,
      "step": 41597,
      "training_step_time": 0.3959031105041504
    },
    {
      "epoch": 0.00025389404296875,
      "model_forward_time": 0.11456155776977539,
      "step": 41598
    },
    {
      "epoch": 0.00025389404296875,
      "step": 41598,
      "training_step_time": 0.4347105026245117
    },
    {
      "epoch": 0.000253900146484375,
      "model_forward_time": 0.11450624465942383,
      "step": 41599
    },
    {
      "epoch": 0.000253900146484375,
      "step": 41599,
      "training_step_time": 0.46970605850219727
    },
    {
      "epoch": 0.00025390625,
      "grad_norm": 0.09370733052492142,
      "learning_rate": 2.3581982678302063e-05,
      "loss": 0.0389,
      "step": 41600
    },
    {
      "epoch": 0.00025390625,
      "model_forward_time": 0.1146087646484375,
      "step": 41600
    },
    {
      "epoch": 0.00025390625,
      "step": 41600,
      "training_step_time": 0.46799135208129883
    },
    {
      "epoch": 0.000253912353515625,
      "model_forward_time": 0.11552119255065918,
      "step": 41601
    },
    {
      "epoch": 0.000253912353515625,
      "step": 41601,
      "training_step_time": 0.48160481452941895
    },
    {
      "epoch": 0.00025391845703125,
      "model_forward_time": 0.11543869972229004,
      "step": 41602
    },
    {
      "epoch": 0.00025391845703125,
      "step": 41602,
      "training_step_time": 0.42965126037597656
    },
    {
      "epoch": 0.000253924560546875,
      "model_forward_time": 0.11451029777526855,
      "step": 41603
    },
    {
      "epoch": 0.000253924560546875,
      "step": 41603,
      "training_step_time": 0.38952136039733887
    },
    {
      "epoch": 0.0002539306640625,
      "model_forward_time": 0.11460590362548828,
      "step": 41604
    },
    {
      "epoch": 0.0002539306640625,
      "step": 41604,
      "training_step_time": 0.3998301029205322
    },
    {
      "epoch": 0.000253936767578125,
      "model_forward_time": 0.11493778228759766,
      "step": 41605
    },
    {
      "epoch": 0.000253936767578125,
      "step": 41605,
      "training_step_time": 0.3921544551849365
    },
    {
      "epoch": 0.00025394287109375,
      "model_forward_time": 0.11560440063476562,
      "step": 41606
    },
    {
      "epoch": 0.00025394287109375,
      "step": 41606,
      "training_step_time": 0.3910548686981201
    },
    {
      "epoch": 0.000253948974609375,
      "model_forward_time": 0.11601638793945312,
      "step": 41607
    },
    {
      "epoch": 0.000253948974609375,
      "step": 41607,
      "training_step_time": 0.5727930068969727
    },
    {
      "epoch": 0.000253955078125,
      "model_forward_time": 0.11495447158813477,
      "step": 41608
    },
    {
      "epoch": 0.000253955078125,
      "step": 41608,
      "training_step_time": 0.3892021179199219
    },
    {
      "epoch": 0.000253961181640625,
      "model_forward_time": 0.11444306373596191,
      "step": 41609
    },
    {
      "epoch": 0.000253961181640625,
      "step": 41609,
      "training_step_time": 0.4545285701751709
    },
    {
      "epoch": 0.00025396728515625,
      "grad_norm": 0.09475865960121155,
      "learning_rate": 2.3558589536727277e-05,
      "loss": 0.0386,
      "step": 41610
    },
    {
      "epoch": 0.00025396728515625,
      "model_forward_time": 0.11585330963134766,
      "step": 41610
    },
    {
      "epoch": 0.00025396728515625,
      "step": 41610,
      "training_step_time": 0.39719295501708984
    },
    {
      "epoch": 0.000253973388671875,
      "model_forward_time": 0.11506533622741699,
      "step": 41611
    },
    {
      "epoch": 0.000253973388671875,
      "step": 41611,
      "training_step_time": 0.39499831199645996
    },
    {
      "epoch": 0.0002539794921875,
      "model_forward_time": 0.11481404304504395,
      "step": 41612
    },
    {
      "epoch": 0.0002539794921875,
      "step": 41612,
      "training_step_time": 0.4113426208496094
    },
    {
      "epoch": 0.000253985595703125,
      "model_forward_time": 0.11502885818481445,
      "step": 41613
    },
    {
      "epoch": 0.000253985595703125,
      "step": 41613,
      "training_step_time": 0.627070426940918
    },
    {
      "epoch": 0.00025399169921875,
      "model_forward_time": 0.1153564453125,
      "step": 41614
    },
    {
      "epoch": 0.00025399169921875,
      "step": 41614,
      "training_step_time": 0.39954686164855957
    },
    {
      "epoch": 0.000253997802734375,
      "model_forward_time": 0.11696290969848633,
      "step": 41615
    },
    {
      "epoch": 0.000253997802734375,
      "step": 41615,
      "training_step_time": 0.5063929557800293
    },
    {
      "epoch": 0.00025400390625,
      "model_forward_time": 0.11535477638244629,
      "step": 41616
    },
    {
      "epoch": 0.00025400390625,
      "step": 41616,
      "training_step_time": 0.4005546569824219
    },
    {
      "epoch": 0.000254010009765625,
      "model_forward_time": 0.11470603942871094,
      "step": 41617
    },
    {
      "epoch": 0.000254010009765625,
      "step": 41617,
      "training_step_time": 0.3923683166503906
    },
    {
      "epoch": 0.00025401611328125,
      "model_forward_time": 0.11613631248474121,
      "step": 41618
    },
    {
      "epoch": 0.00025401611328125,
      "step": 41618,
      "training_step_time": 0.3860902786254883
    },
    {
      "epoch": 0.000254022216796875,
      "model_forward_time": 0.11612534523010254,
      "step": 41619
    },
    {
      "epoch": 0.000254022216796875,
      "step": 41619,
      "training_step_time": 0.47303175926208496
    },
    {
      "epoch": 0.0002540283203125,
      "grad_norm": 0.12022953480482101,
      "learning_rate": 2.353520442735488e-05,
      "loss": 0.0427,
      "step": 41620
    },
    {
      "epoch": 0.0002540283203125,
      "model_forward_time": 0.11551141738891602,
      "step": 41620
    },
    {
      "epoch": 0.0002540283203125,
      "step": 41620,
      "training_step_time": 0.3848302364349365
    },
    {
      "epoch": 0.000254034423828125,
      "model_forward_time": 0.11520791053771973,
      "step": 41621
    },
    {
      "epoch": 0.000254034423828125,
      "step": 41621,
      "training_step_time": 0.3954925537109375
    },
    {
      "epoch": 0.00025404052734375,
      "model_forward_time": 0.11513400077819824,
      "step": 41622
    },
    {
      "epoch": 0.00025404052734375,
      "step": 41622,
      "training_step_time": 0.46100378036499023
    },
    {
      "epoch": 0.000254046630859375,
      "model_forward_time": 0.11544346809387207,
      "step": 41623
    },
    {
      "epoch": 0.000254046630859375,
      "step": 41623,
      "training_step_time": 0.42464470863342285
    },
    {
      "epoch": 0.000254052734375,
      "model_forward_time": 0.11482954025268555,
      "step": 41624
    },
    {
      "epoch": 0.000254052734375,
      "step": 41624,
      "training_step_time": 0.4457378387451172
    },
    {
      "epoch": 0.000254058837890625,
      "model_forward_time": 0.11529159545898438,
      "step": 41625
    },
    {
      "epoch": 0.000254058837890625,
      "step": 41625,
      "training_step_time": 0.42006397247314453
    },
    {
      "epoch": 0.00025406494140625,
      "model_forward_time": 0.11503314971923828,
      "step": 41626
    },
    {
      "epoch": 0.00025406494140625,
      "step": 41626,
      "training_step_time": 0.43714332580566406
    },
    {
      "epoch": 0.000254071044921875,
      "model_forward_time": 0.11480474472045898,
      "step": 41627
    },
    {
      "epoch": 0.000254071044921875,
      "step": 41627,
      "training_step_time": 0.3995242118835449
    },
    {
      "epoch": 0.0002540771484375,
      "model_forward_time": 0.11501693725585938,
      "step": 41628
    },
    {
      "epoch": 0.0002540771484375,
      "step": 41628,
      "training_step_time": 0.4521174430847168
    },
    {
      "epoch": 0.000254083251953125,
      "model_forward_time": 0.1152198314666748,
      "step": 41629
    },
    {
      "epoch": 0.000254083251953125,
      "step": 41629,
      "training_step_time": 0.4137890338897705
    },
    {
      "epoch": 0.00025408935546875,
      "grad_norm": 0.08939595520496368,
      "learning_rate": 2.3511827357288575e-05,
      "loss": 0.034,
      "step": 41630
    },
    {
      "epoch": 0.00025408935546875,
      "model_forward_time": 0.11505746841430664,
      "step": 41630
    },
    {
      "epoch": 0.00025408935546875,
      "step": 41630,
      "training_step_time": 0.49062132835388184
    },
    {
      "epoch": 0.000254095458984375,
      "model_forward_time": 0.11494636535644531,
      "step": 41631
    },
    {
      "epoch": 0.000254095458984375,
      "step": 41631,
      "training_step_time": 0.5452241897583008
    },
    {
      "epoch": 0.0002541015625,
      "model_forward_time": 0.11454582214355469,
      "step": 41632
    },
    {
      "epoch": 0.0002541015625,
      "step": 41632,
      "training_step_time": 0.38872623443603516
    },
    {
      "epoch": 0.000254107666015625,
      "model_forward_time": 0.11430120468139648,
      "step": 41633
    },
    {
      "epoch": 0.000254107666015625,
      "step": 41633,
      "training_step_time": 0.3927006721496582
    },
    {
      "epoch": 0.00025411376953125,
      "model_forward_time": 0.11493730545043945,
      "step": 41634
    },
    {
      "epoch": 0.00025411376953125,
      "step": 41634,
      "training_step_time": 0.399350643157959
    },
    {
      "epoch": 0.000254119873046875,
      "model_forward_time": 0.11513233184814453,
      "step": 41635
    },
    {
      "epoch": 0.000254119873046875,
      "step": 41635,
      "training_step_time": 0.38784122467041016
    },
    {
      "epoch": 0.0002541259765625,
      "model_forward_time": 0.11486649513244629,
      "step": 41636
    },
    {
      "epoch": 0.0002541259765625,
      "step": 41636,
      "training_step_time": 0.4765889644622803
    },
    {
      "epoch": 0.000254132080078125,
      "model_forward_time": 0.1148228645324707,
      "step": 41637
    },
    {
      "epoch": 0.000254132080078125,
      "step": 41637,
      "training_step_time": 0.594188928604126
    },
    {
      "epoch": 0.00025413818359375,
      "model_forward_time": 0.11491060256958008,
      "step": 41638
    },
    {
      "epoch": 0.00025413818359375,
      "step": 41638,
      "training_step_time": 0.38569140434265137
    },
    {
      "epoch": 0.000254144287109375,
      "model_forward_time": 0.11415481567382812,
      "step": 41639
    },
    {
      "epoch": 0.000254144287109375,
      "step": 41639,
      "training_step_time": 0.39760255813598633
    },
    {
      "epoch": 0.000254150390625,
      "grad_norm": 0.09960795938968658,
      "learning_rate": 2.3488458333629777e-05,
      "loss": 0.041,
      "step": 41640
    },
    {
      "epoch": 0.000254150390625,
      "model_forward_time": 0.11492156982421875,
      "step": 41640
    },
    {
      "epoch": 0.000254150390625,
      "step": 41640,
      "training_step_time": 0.41809725761413574
    },
    {
      "epoch": 0.000254156494140625,
      "model_forward_time": 0.11504697799682617,
      "step": 41641
    },
    {
      "epoch": 0.000254156494140625,
      "step": 41641,
      "training_step_time": 0.39392614364624023
    },
    {
      "epoch": 0.00025416259765625,
      "model_forward_time": 0.11506104469299316,
      "step": 41642
    },
    {
      "epoch": 0.00025416259765625,
      "step": 41642,
      "training_step_time": 0.49146580696105957
    },
    {
      "epoch": 0.000254168701171875,
      "model_forward_time": 0.11498141288757324,
      "step": 41643
    },
    {
      "epoch": 0.000254168701171875,
      "step": 41643,
      "training_step_time": 0.47140073776245117
    },
    {
      "epoch": 0.0002541748046875,
      "model_forward_time": 0.11460232734680176,
      "step": 41644
    },
    {
      "epoch": 0.0002541748046875,
      "step": 41644,
      "training_step_time": 0.4282398223876953
    },
    {
      "epoch": 0.000254180908203125,
      "model_forward_time": 0.11496281623840332,
      "step": 41645
    },
    {
      "epoch": 0.000254180908203125,
      "step": 41645,
      "training_step_time": 0.387739896774292
    },
    {
      "epoch": 0.00025418701171875,
      "model_forward_time": 0.1149146556854248,
      "step": 41646
    },
    {
      "epoch": 0.00025418701171875,
      "step": 41646,
      "training_step_time": 0.38559818267822266
    },
    {
      "epoch": 0.000254193115234375,
      "model_forward_time": 0.11559677124023438,
      "step": 41647
    },
    {
      "epoch": 0.000254193115234375,
      "step": 41647,
      "training_step_time": 0.37736082077026367
    },
    {
      "epoch": 0.00025419921875,
      "model_forward_time": 0.1160881519317627,
      "step": 41648
    },
    {
      "epoch": 0.00025419921875,
      "step": 41648,
      "training_step_time": 0.394150972366333
    },
    {
      "epoch": 0.000254205322265625,
      "model_forward_time": 0.11560249328613281,
      "step": 41649
    },
    {
      "epoch": 0.000254205322265625,
      "step": 41649,
      "training_step_time": 0.5756044387817383
    },
    {
      "epoch": 0.00025421142578125,
      "grad_norm": 0.13291457295417786,
      "learning_rate": 2.3465097363477306e-05,
      "loss": 0.0417,
      "step": 41650
    },
    {
      "epoch": 0.00025421142578125,
      "model_forward_time": 0.11530542373657227,
      "step": 41650
    },
    {
      "epoch": 0.00025421142578125,
      "step": 41650,
      "training_step_time": 0.41231775283813477
    },
    {
      "epoch": 0.000254217529296875,
      "model_forward_time": 0.11529707908630371,
      "step": 41651
    },
    {
      "epoch": 0.000254217529296875,
      "step": 41651,
      "training_step_time": 0.38782310485839844
    },
    {
      "epoch": 0.0002542236328125,
      "model_forward_time": 0.11491060256958008,
      "step": 41652
    },
    {
      "epoch": 0.0002542236328125,
      "step": 41652,
      "training_step_time": 0.39214658737182617
    },
    {
      "epoch": 0.000254229736328125,
      "model_forward_time": 0.11545872688293457,
      "step": 41653
    },
    {
      "epoch": 0.000254229736328125,
      "step": 41653,
      "training_step_time": 0.379896879196167
    },
    {
      "epoch": 0.00025423583984375,
      "model_forward_time": 0.11621594429016113,
      "step": 41654
    },
    {
      "epoch": 0.00025423583984375,
      "step": 41654,
      "training_step_time": 0.40323781967163086
    },
    {
      "epoch": 0.000254241943359375,
      "model_forward_time": 0.11513566970825195,
      "step": 41655
    },
    {
      "epoch": 0.000254241943359375,
      "step": 41655,
      "training_step_time": 0.5200052261352539
    },
    {
      "epoch": 0.000254248046875,
      "model_forward_time": 0.11507463455200195,
      "step": 41656
    },
    {
      "epoch": 0.000254248046875,
      "step": 41656,
      "training_step_time": 0.4854090213775635
    },
    {
      "epoch": 0.000254254150390625,
      "model_forward_time": 0.1148068904876709,
      "step": 41657
    },
    {
      "epoch": 0.000254254150390625,
      "step": 41657,
      "training_step_time": 0.45824170112609863
    },
    {
      "epoch": 0.00025426025390625,
      "model_forward_time": 0.1149148941040039,
      "step": 41658
    },
    {
      "epoch": 0.00025426025390625,
      "step": 41658,
      "training_step_time": 0.4864315986633301
    },
    {
      "epoch": 0.000254266357421875,
      "model_forward_time": 0.11432313919067383,
      "step": 41659
    },
    {
      "epoch": 0.000254266357421875,
      "step": 41659,
      "training_step_time": 0.38695526123046875
    },
    {
      "epoch": 0.0002542724609375,
      "grad_norm": 0.13430142402648926,
      "learning_rate": 2.344174445392766e-05,
      "loss": 0.0391,
      "step": 41660
    },
    {
      "epoch": 0.0002542724609375,
      "model_forward_time": 0.11385297775268555,
      "step": 41660
    },
    {
      "epoch": 0.0002542724609375,
      "step": 41660,
      "training_step_time": 0.3844881057739258
    },
    {
      "epoch": 0.000254278564453125,
      "model_forward_time": 0.11503481864929199,
      "step": 41661
    },
    {
      "epoch": 0.000254278564453125,
      "step": 41661,
      "training_step_time": 0.369992733001709
    },
    {
      "epoch": 0.00025428466796875,
      "model_forward_time": 0.11536288261413574,
      "step": 41662
    },
    {
      "epoch": 0.00025428466796875,
      "step": 41662,
      "training_step_time": 0.4369337558746338
    },
    {
      "epoch": 0.000254290771484375,
      "model_forward_time": 0.11514735221862793,
      "step": 41663
    },
    {
      "epoch": 0.000254290771484375,
      "step": 41663,
      "training_step_time": 0.4191579818725586
    },
    {
      "epoch": 0.000254296875,
      "model_forward_time": 0.11491155624389648,
      "step": 41664
    },
    {
      "epoch": 0.000254296875,
      "step": 41664,
      "training_step_time": 0.4231598377227783
    },
    {
      "epoch": 0.000254302978515625,
      "model_forward_time": 0.1149446964263916,
      "step": 41665
    },
    {
      "epoch": 0.000254302978515625,
      "step": 41665,
      "training_step_time": 0.4198265075683594
    },
    {
      "epoch": 0.00025430908203125,
      "model_forward_time": 0.11545610427856445,
      "step": 41666
    },
    {
      "epoch": 0.00025430908203125,
      "step": 41666,
      "training_step_time": 0.4779541492462158
    },
    {
      "epoch": 0.000254315185546875,
      "model_forward_time": 0.1148989200592041,
      "step": 41667
    },
    {
      "epoch": 0.000254315185546875,
      "step": 41667,
      "training_step_time": 0.3752412796020508
    },
    {
      "epoch": 0.0002543212890625,
      "model_forward_time": 0.11502885818481445,
      "step": 41668
    },
    {
      "epoch": 0.0002543212890625,
      "step": 41668,
      "training_step_time": 0.3842654228210449
    },
    {
      "epoch": 0.000254327392578125,
      "model_forward_time": 0.11542534828186035,
      "step": 41669
    },
    {
      "epoch": 0.000254327392578125,
      "step": 41669,
      "training_step_time": 0.4071497917175293
    },
    {
      "epoch": 0.00025433349609375,
      "grad_norm": 0.10423947870731354,
      "learning_rate": 2.341839961207482e-05,
      "loss": 0.0377,
      "step": 41670
    },
    {
      "epoch": 0.00025433349609375,
      "model_forward_time": 0.11481904983520508,
      "step": 41670
    },
    {
      "epoch": 0.00025433349609375,
      "step": 41670,
      "training_step_time": 0.40583229064941406
    },
    {
      "epoch": 0.000254339599609375,
      "model_forward_time": 0.11520886421203613,
      "step": 41671
    },
    {
      "epoch": 0.000254339599609375,
      "step": 41671,
      "training_step_time": 0.5088915824890137
    },
    {
      "epoch": 0.000254345703125,
      "model_forward_time": 0.11850976943969727,
      "step": 41672
    },
    {
      "epoch": 0.000254345703125,
      "step": 41672,
      "training_step_time": 0.3967251777648926
    },
    {
      "epoch": 0.000254351806640625,
      "model_forward_time": 0.11516213417053223,
      "step": 41673
    },
    {
      "epoch": 0.000254351806640625,
      "step": 41673,
      "training_step_time": 0.56532883644104
    },
    {
      "epoch": 0.00025435791015625,
      "model_forward_time": 0.11466050148010254,
      "step": 41674
    },
    {
      "epoch": 0.00025435791015625,
      "step": 41674,
      "training_step_time": 0.3808600902557373
    },
    {
      "epoch": 0.000254364013671875,
      "model_forward_time": 0.11452841758728027,
      "step": 41675
    },
    {
      "epoch": 0.000254364013671875,
      "step": 41675,
      "training_step_time": 0.38573527336120605
    },
    {
      "epoch": 0.0002543701171875,
      "model_forward_time": 0.11443591117858887,
      "step": 41676
    },
    {
      "epoch": 0.0002543701171875,
      "step": 41676,
      "training_step_time": 0.39133262634277344
    },
    {
      "epoch": 0.000254376220703125,
      "model_forward_time": 0.11428594589233398,
      "step": 41677
    },
    {
      "epoch": 0.000254376220703125,
      "step": 41677,
      "training_step_time": 0.39496421813964844
    },
    {
      "epoch": 0.00025438232421875,
      "model_forward_time": 0.11561799049377441,
      "step": 41678
    },
    {
      "epoch": 0.00025438232421875,
      "step": 41678,
      "training_step_time": 0.444533109664917
    },
    {
      "epoch": 0.000254388427734375,
      "model_forward_time": 0.11586833000183105,
      "step": 41679
    },
    {
      "epoch": 0.000254388427734375,
      "step": 41679,
      "training_step_time": 0.6286122798919678
    },
    {
      "epoch": 0.00025439453125,
      "grad_norm": 0.08806174993515015,
      "learning_rate": 2.339506284501033e-05,
      "loss": 0.0336,
      "step": 41680
    },
    {
      "epoch": 0.00025439453125,
      "model_forward_time": 0.11560750007629395,
      "step": 41680
    },
    {
      "epoch": 0.00025439453125,
      "step": 41680,
      "training_step_time": 0.3948984146118164
    },
    {
      "epoch": 0.000254400634765625,
      "model_forward_time": 0.11427593231201172,
      "step": 41681
    },
    {
      "epoch": 0.000254400634765625,
      "step": 41681,
      "training_step_time": 0.3905165195465088
    },
    {
      "epoch": 0.00025440673828125,
      "model_forward_time": 0.11474156379699707,
      "step": 41682
    },
    {
      "epoch": 0.00025440673828125,
      "step": 41682,
      "training_step_time": 0.3892996311187744
    },
    {
      "epoch": 0.000254412841796875,
      "model_forward_time": 0.11549687385559082,
      "step": 41683
    },
    {
      "epoch": 0.000254412841796875,
      "step": 41683,
      "training_step_time": 0.45731353759765625
    },
    {
      "epoch": 0.0002544189453125,
      "model_forward_time": 0.11501955986022949,
      "step": 41684
    },
    {
      "epoch": 0.0002544189453125,
      "step": 41684,
      "training_step_time": 0.45888376235961914
    },
    {
      "epoch": 0.000254425048828125,
      "model_forward_time": 0.11515951156616211,
      "step": 41685
    },
    {
      "epoch": 0.000254425048828125,
      "step": 41685,
      "training_step_time": 0.5179824829101562
    },
    {
      "epoch": 0.00025443115234375,
      "model_forward_time": 0.1154489517211914,
      "step": 41686
    },
    {
      "epoch": 0.00025443115234375,
      "step": 41686,
      "training_step_time": 0.49866771697998047
    },
    {
      "epoch": 0.000254437255859375,
      "model_forward_time": 0.11487913131713867,
      "step": 41687
    },
    {
      "epoch": 0.000254437255859375,
      "step": 41687,
      "training_step_time": 0.3913705348968506
    },
    {
      "epoch": 0.000254443359375,
      "model_forward_time": 0.1145634651184082,
      "step": 41688
    },
    {
      "epoch": 0.000254443359375,
      "step": 41688,
      "training_step_time": 0.37703490257263184
    },
    {
      "epoch": 0.000254449462890625,
      "model_forward_time": 0.11487889289855957,
      "step": 41689
    },
    {
      "epoch": 0.000254449462890625,
      "step": 41689,
      "training_step_time": 0.41883301734924316
    },
    {
      "epoch": 0.00025445556640625,
      "grad_norm": 0.09700477123260498,
      "learning_rate": 2.3371734159823284e-05,
      "loss": 0.0435,
      "step": 41690
    },
    {
      "epoch": 0.00025445556640625,
      "model_forward_time": 0.11582112312316895,
      "step": 41690
    },
    {
      "epoch": 0.00025445556640625,
      "step": 41690,
      "training_step_time": 0.4522397518157959
    },
    {
      "epoch": 0.000254461669921875,
      "model_forward_time": 0.11498856544494629,
      "step": 41691
    },
    {
      "epoch": 0.000254461669921875,
      "step": 41691,
      "training_step_time": 0.4916222095489502
    },
    {
      "epoch": 0.0002544677734375,
      "model_forward_time": 0.11535000801086426,
      "step": 41692
    },
    {
      "epoch": 0.0002544677734375,
      "step": 41692,
      "training_step_time": 0.4841792583465576
    },
    {
      "epoch": 0.000254473876953125,
      "model_forward_time": 0.11479496955871582,
      "step": 41693
    },
    {
      "epoch": 0.000254473876953125,
      "step": 41693,
      "training_step_time": 0.3992576599121094
    },
    {
      "epoch": 0.00025447998046875,
      "model_forward_time": 0.11499285697937012,
      "step": 41694
    },
    {
      "epoch": 0.00025447998046875,
      "step": 41694,
      "training_step_time": 0.40606117248535156
    },
    {
      "epoch": 0.000254486083984375,
      "model_forward_time": 0.11528372764587402,
      "step": 41695
    },
    {
      "epoch": 0.000254486083984375,
      "step": 41695,
      "training_step_time": 0.39391517639160156
    },
    {
      "epoch": 0.0002544921875,
      "model_forward_time": 0.11494183540344238,
      "step": 41696
    },
    {
      "epoch": 0.0002544921875,
      "step": 41696,
      "training_step_time": 0.39641642570495605
    },
    {
      "epoch": 0.000254498291015625,
      "model_forward_time": 0.11480927467346191,
      "step": 41697
    },
    {
      "epoch": 0.000254498291015625,
      "step": 41697,
      "training_step_time": 0.4687166213989258
    },
    {
      "epoch": 0.00025450439453125,
      "model_forward_time": 0.11509370803833008,
      "step": 41698
    },
    {
      "epoch": 0.00025450439453125,
      "step": 41698,
      "training_step_time": 0.44491004943847656
    },
    {
      "epoch": 0.000254510498046875,
      "model_forward_time": 0.11465907096862793,
      "step": 41699
    },
    {
      "epoch": 0.000254510498046875,
      "step": 41699,
      "training_step_time": 0.4425840377807617
    },
    {
      "epoch": 0.0002545166015625,
      "grad_norm": 0.11547389626502991,
      "learning_rate": 2.3348413563600325e-05,
      "loss": 0.0333,
      "step": 41700
    },
    {
      "epoch": 0.0002545166015625,
      "model_forward_time": 0.11505722999572754,
      "step": 41700
    },
    {
      "epoch": 0.0002545166015625,
      "step": 41700,
      "training_step_time": 0.48430824279785156
    },
    {
      "epoch": 0.000254522705078125,
      "model_forward_time": 0.11622047424316406,
      "step": 41701
    },
    {
      "epoch": 0.000254522705078125,
      "step": 41701,
      "training_step_time": 0.38625574111938477
    },
    {
      "epoch": 0.00025452880859375,
      "model_forward_time": 0.11517548561096191,
      "step": 41702
    },
    {
      "epoch": 0.00025452880859375,
      "step": 41702,
      "training_step_time": 0.39485788345336914
    },
    {
      "epoch": 0.000254534912109375,
      "model_forward_time": 0.11547493934631348,
      "step": 41703
    },
    {
      "epoch": 0.000254534912109375,
      "step": 41703,
      "training_step_time": 0.40871691703796387
    },
    {
      "epoch": 0.000254541015625,
      "model_forward_time": 0.11452436447143555,
      "step": 41704
    },
    {
      "epoch": 0.000254541015625,
      "step": 41704,
      "training_step_time": 0.37267565727233887
    },
    {
      "epoch": 0.000254547119140625,
      "model_forward_time": 0.11491942405700684,
      "step": 41705
    },
    {
      "epoch": 0.000254547119140625,
      "step": 41705,
      "training_step_time": 0.4006640911102295
    },
    {
      "epoch": 0.00025455322265625,
      "model_forward_time": 0.11469554901123047,
      "step": 41706
    },
    {
      "epoch": 0.00025455322265625,
      "step": 41706,
      "training_step_time": 0.4912285804748535
    },
    {
      "epoch": 0.000254559326171875,
      "model_forward_time": 0.11459493637084961,
      "step": 41707
    },
    {
      "epoch": 0.000254559326171875,
      "step": 41707,
      "training_step_time": 0.4517803192138672
    },
    {
      "epoch": 0.0002545654296875,
      "model_forward_time": 0.11491775512695312,
      "step": 41708
    },
    {
      "epoch": 0.0002545654296875,
      "step": 41708,
      "training_step_time": 0.48160481452941895
    },
    {
      "epoch": 0.000254571533203125,
      "model_forward_time": 0.1152336597442627,
      "step": 41709
    },
    {
      "epoch": 0.000254571533203125,
      "step": 41709,
      "training_step_time": 0.386699914932251
    },
    {
      "epoch": 0.00025457763671875,
      "grad_norm": 0.11186455190181732,
      "learning_rate": 2.3325101063425608e-05,
      "loss": 0.0378,
      "step": 41710
    },
    {
      "epoch": 0.00025457763671875,
      "model_forward_time": 0.11540937423706055,
      "step": 41710
    },
    {
      "epoch": 0.00025457763671875,
      "step": 41710,
      "training_step_time": 0.39609313011169434
    },
    {
      "epoch": 0.000254583740234375,
      "model_forward_time": 0.11441230773925781,
      "step": 41711
    },
    {
      "epoch": 0.000254583740234375,
      "step": 41711,
      "training_step_time": 0.4749293327331543
    },
    {
      "epoch": 0.00025458984375,
      "model_forward_time": 0.1147303581237793,
      "step": 41712
    },
    {
      "epoch": 0.00025458984375,
      "step": 41712,
      "training_step_time": 0.4234898090362549
    },
    {
      "epoch": 0.000254595947265625,
      "model_forward_time": 0.11484980583190918,
      "step": 41713
    },
    {
      "epoch": 0.000254595947265625,
      "step": 41713,
      "training_step_time": 0.45421314239501953
    },
    {
      "epoch": 0.00025460205078125,
      "model_forward_time": 0.11504292488098145,
      "step": 41714
    },
    {
      "epoch": 0.00025460205078125,
      "step": 41714,
      "training_step_time": 0.49126315116882324
    },
    {
      "epoch": 0.000254608154296875,
      "model_forward_time": 0.11562085151672363,
      "step": 41715
    },
    {
      "epoch": 0.000254608154296875,
      "step": 41715,
      "training_step_time": 0.38820505142211914
    },
    {
      "epoch": 0.0002546142578125,
      "model_forward_time": 0.1152963638305664,
      "step": 41716
    },
    {
      "epoch": 0.0002546142578125,
      "step": 41716,
      "training_step_time": 0.38718605041503906
    },
    {
      "epoch": 0.000254620361328125,
      "model_forward_time": 0.11412549018859863,
      "step": 41717
    },
    {
      "epoch": 0.000254620361328125,
      "step": 41717,
      "training_step_time": 0.3898305892944336
    },
    {
      "epoch": 0.00025462646484375,
      "model_forward_time": 0.11488842964172363,
      "step": 41718
    },
    {
      "epoch": 0.00025462646484375,
      "step": 41718,
      "training_step_time": 0.40131211280822754
    },
    {
      "epoch": 0.000254632568359375,
      "model_forward_time": 0.11554098129272461,
      "step": 41719
    },
    {
      "epoch": 0.000254632568359375,
      "step": 41719,
      "training_step_time": 0.3760812282562256
    },
    {
      "epoch": 0.000254638671875,
      "grad_norm": 0.129172220826149,
      "learning_rate": 2.3301796666380898e-05,
      "loss": 0.0407,
      "step": 41720
    },
    {
      "epoch": 0.000254638671875,
      "model_forward_time": 0.11531805992126465,
      "step": 41720
    },
    {
      "epoch": 0.000254638671875,
      "step": 41720,
      "training_step_time": 0.40639352798461914
    },
    {
      "epoch": 0.000254644775390625,
      "model_forward_time": 0.11496210098266602,
      "step": 41721
    },
    {
      "epoch": 0.000254644775390625,
      "step": 41721,
      "training_step_time": 0.5101544857025146
    },
    {
      "epoch": 0.00025465087890625,
      "model_forward_time": 0.11526250839233398,
      "step": 41722
    },
    {
      "epoch": 0.00025465087890625,
      "step": 41722,
      "training_step_time": 0.4069523811340332
    },
    {
      "epoch": 0.000254656982421875,
      "model_forward_time": 0.11494302749633789,
      "step": 41723
    },
    {
      "epoch": 0.000254656982421875,
      "step": 41723,
      "training_step_time": 0.39402127265930176
    },
    {
      "epoch": 0.0002546630859375,
      "model_forward_time": 0.11482787132263184,
      "step": 41724
    },
    {
      "epoch": 0.0002546630859375,
      "step": 41724,
      "training_step_time": 0.40259408950805664
    },
    {
      "epoch": 0.000254669189453125,
      "model_forward_time": 0.11543011665344238,
      "step": 41725
    },
    {
      "epoch": 0.000254669189453125,
      "step": 41725,
      "training_step_time": 0.37706851959228516
    },
    {
      "epoch": 0.00025467529296875,
      "model_forward_time": 0.11444973945617676,
      "step": 41726
    },
    {
      "epoch": 0.00025467529296875,
      "step": 41726,
      "training_step_time": 0.44301533699035645
    },
    {
      "epoch": 0.000254681396484375,
      "model_forward_time": 0.1152796745300293,
      "step": 41727
    },
    {
      "epoch": 0.000254681396484375,
      "step": 41727,
      "training_step_time": 0.5509381294250488
    },
    {
      "epoch": 0.0002546875,
      "model_forward_time": 0.11485767364501953,
      "step": 41728
    },
    {
      "epoch": 0.0002546875,
      "step": 41728,
      "training_step_time": 0.47025394439697266
    },
    {
      "epoch": 0.000254693603515625,
      "model_forward_time": 0.11562299728393555,
      "step": 41729
    },
    {
      "epoch": 0.000254693603515625,
      "step": 41729,
      "training_step_time": 0.407956600189209
    },
    {
      "epoch": 0.00025469970703125,
      "grad_norm": 0.11476247012615204,
      "learning_rate": 2.3278500379545436e-05,
      "loss": 0.0434,
      "step": 41730
    },
    {
      "epoch": 0.00025469970703125,
      "model_forward_time": 0.11478233337402344,
      "step": 41730
    },
    {
      "epoch": 0.00025469970703125,
      "step": 41730,
      "training_step_time": 0.38440823554992676
    },
    {
      "epoch": 0.000254705810546875,
      "model_forward_time": 0.11447644233703613,
      "step": 41731
    },
    {
      "epoch": 0.000254705810546875,
      "step": 41731,
      "training_step_time": 0.37397098541259766
    },
    {
      "epoch": 0.0002547119140625,
      "model_forward_time": 0.11494660377502441,
      "step": 41732
    },
    {
      "epoch": 0.0002547119140625,
      "step": 41732,
      "training_step_time": 0.38906097412109375
    },
    {
      "epoch": 0.000254718017578125,
      "model_forward_time": 0.11512970924377441,
      "step": 41733
    },
    {
      "epoch": 0.000254718017578125,
      "step": 41733,
      "training_step_time": 0.7025806903839111
    },
    {
      "epoch": 0.00025472412109375,
      "model_forward_time": 0.11503767967224121,
      "step": 41734
    },
    {
      "epoch": 0.00025472412109375,
      "step": 41734,
      "training_step_time": 0.44828295707702637
    },
    {
      "epoch": 0.000254730224609375,
      "model_forward_time": 0.11468386650085449,
      "step": 41735
    },
    {
      "epoch": 0.000254730224609375,
      "step": 41735,
      "training_step_time": 0.4065392017364502
    },
    {
      "epoch": 0.000254736328125,
      "model_forward_time": 0.11437368392944336,
      "step": 41736
    },
    {
      "epoch": 0.000254736328125,
      "step": 41736,
      "training_step_time": 0.46736836433410645
    },
    {
      "epoch": 0.000254742431640625,
      "model_forward_time": 0.11385869979858398,
      "step": 41737
    },
    {
      "epoch": 0.000254742431640625,
      "step": 41737,
      "training_step_time": 0.3699913024902344
    },
    {
      "epoch": 0.00025474853515625,
      "model_forward_time": 0.11380648612976074,
      "step": 41738
    },
    {
      "epoch": 0.00025474853515625,
      "step": 41738,
      "training_step_time": 0.3890669345855713
    },
    {
      "epoch": 0.000254754638671875,
      "model_forward_time": 0.11410117149353027,
      "step": 41739
    },
    {
      "epoch": 0.000254754638671875,
      "step": 41739,
      "training_step_time": 0.47411394119262695
    },
    {
      "epoch": 0.0002547607421875,
      "grad_norm": 0.09380114823579788,
      "learning_rate": 2.3255212209996025e-05,
      "loss": 0.0379,
      "step": 41740
    },
    {
      "epoch": 0.0002547607421875,
      "model_forward_time": 0.11525893211364746,
      "step": 41740
    },
    {
      "epoch": 0.0002547607421875,
      "step": 41740,
      "training_step_time": 0.4241669178009033
    },
    {
      "epoch": 0.000254766845703125,
      "model_forward_time": 0.11476993560791016,
      "step": 41741
    },
    {
      "epoch": 0.000254766845703125,
      "step": 41741,
      "training_step_time": 0.49805283546447754
    },
    {
      "epoch": 0.00025477294921875,
      "model_forward_time": 0.1148841381072998,
      "step": 41742
    },
    {
      "epoch": 0.00025477294921875,
      "step": 41742,
      "training_step_time": 0.4496474266052246
    },
    {
      "epoch": 0.000254779052734375,
      "model_forward_time": 0.11571025848388672,
      "step": 41743
    },
    {
      "epoch": 0.000254779052734375,
      "step": 41743,
      "training_step_time": 0.40187788009643555
    },
    {
      "epoch": 0.00025478515625,
      "model_forward_time": 0.11392807960510254,
      "step": 41744
    },
    {
      "epoch": 0.00025478515625,
      "step": 41744,
      "training_step_time": 0.38635945320129395
    },
    {
      "epoch": 0.000254791259765625,
      "model_forward_time": 0.11416912078857422,
      "step": 41745
    },
    {
      "epoch": 0.000254791259765625,
      "step": 41745,
      "training_step_time": 0.48647165298461914
    },
    {
      "epoch": 0.00025479736328125,
      "model_forward_time": 0.11459040641784668,
      "step": 41746
    },
    {
      "epoch": 0.00025479736328125,
      "step": 41746,
      "training_step_time": 0.39362287521362305
    },
    {
      "epoch": 0.000254803466796875,
      "model_forward_time": 0.11517810821533203,
      "step": 41747
    },
    {
      "epoch": 0.000254803466796875,
      "step": 41747,
      "training_step_time": 0.4072296619415283
    },
    {
      "epoch": 0.0002548095703125,
      "model_forward_time": 0.11558771133422852,
      "step": 41748
    },
    {
      "epoch": 0.0002548095703125,
      "step": 41748,
      "training_step_time": 0.48987317085266113
    },
    {
      "epoch": 0.000254815673828125,
      "model_forward_time": 0.1146237850189209,
      "step": 41749
    },
    {
      "epoch": 0.000254815673828125,
      "step": 41749,
      "training_step_time": 0.4200453758239746
    },
    {
      "epoch": 0.00025482177734375,
      "grad_norm": 0.1335323601961136,
      "learning_rate": 2.323193216480698e-05,
      "loss": 0.0354,
      "step": 41750
    },
    {
      "epoch": 0.00025482177734375,
      "model_forward_time": 0.11518716812133789,
      "step": 41750
    },
    {
      "epoch": 0.00025482177734375,
      "step": 41750,
      "training_step_time": 0.4769749641418457
    },
    {
      "epoch": 0.000254827880859375,
      "model_forward_time": 0.11465287208557129,
      "step": 41751
    },
    {
      "epoch": 0.000254827880859375,
      "step": 41751,
      "training_step_time": 0.396817684173584
    },
    {
      "epoch": 0.000254833984375,
      "model_forward_time": 0.11530065536499023,
      "step": 41752
    },
    {
      "epoch": 0.000254833984375,
      "step": 41752,
      "training_step_time": 0.39209938049316406
    },
    {
      "epoch": 0.000254840087890625,
      "model_forward_time": 0.11437869071960449,
      "step": 41753
    },
    {
      "epoch": 0.000254840087890625,
      "step": 41753,
      "training_step_time": 0.3987846374511719
    },
    {
      "epoch": 0.00025484619140625,
      "model_forward_time": 0.11525726318359375,
      "step": 41754
    },
    {
      "epoch": 0.00025484619140625,
      "step": 41754,
      "training_step_time": 0.4435713291168213
    },
    {
      "epoch": 0.000254852294921875,
      "model_forward_time": 0.11469650268554688,
      "step": 41755
    },
    {
      "epoch": 0.000254852294921875,
      "step": 41755,
      "training_step_time": 0.43181395530700684
    },
    {
      "epoch": 0.0002548583984375,
      "model_forward_time": 0.11514830589294434,
      "step": 41756
    },
    {
      "epoch": 0.0002548583984375,
      "step": 41756,
      "training_step_time": 0.4420206546783447
    },
    {
      "epoch": 0.000254864501953125,
      "model_forward_time": 0.11457681655883789,
      "step": 41757
    },
    {
      "epoch": 0.000254864501953125,
      "step": 41757,
      "training_step_time": 0.4892299175262451
    },
    {
      "epoch": 0.00025487060546875,
      "model_forward_time": 0.11493730545043945,
      "step": 41758
    },
    {
      "epoch": 0.00025487060546875,
      "step": 41758,
      "training_step_time": 0.3792757987976074
    },
    {
      "epoch": 0.000254876708984375,
      "model_forward_time": 0.11468768119812012,
      "step": 41759
    },
    {
      "epoch": 0.000254876708984375,
      "step": 41759,
      "training_step_time": 0.3850536346435547
    },
    {
      "epoch": 0.0002548828125,
      "grad_norm": 0.16121438145637512,
      "learning_rate": 2.3208660251050158e-05,
      "loss": 0.0357,
      "step": 41760
    },
    {
      "epoch": 0.0002548828125,
      "model_forward_time": 0.114654541015625,
      "step": 41760
    },
    {
      "epoch": 0.0002548828125,
      "step": 41760,
      "training_step_time": 0.37952589988708496
    },
    {
      "epoch": 0.000254888916015625,
      "model_forward_time": 0.11461615562438965,
      "step": 41761
    },
    {
      "epoch": 0.000254888916015625,
      "step": 41761,
      "training_step_time": 0.3847522735595703
    },
    {
      "epoch": 0.00025489501953125,
      "model_forward_time": 0.11545515060424805,
      "step": 41762
    },
    {
      "epoch": 0.00025489501953125,
      "step": 41762,
      "training_step_time": 0.38379979133605957
    },
    {
      "epoch": 0.000254901123046875,
      "model_forward_time": 0.11458969116210938,
      "step": 41763
    },
    {
      "epoch": 0.000254901123046875,
      "step": 41763,
      "training_step_time": 0.5484929084777832
    },
    {
      "epoch": 0.0002549072265625,
      "model_forward_time": 0.11585736274719238,
      "step": 41764
    },
    {
      "epoch": 0.0002549072265625,
      "step": 41764,
      "training_step_time": 0.433305025100708
    },
    {
      "epoch": 0.000254913330078125,
      "model_forward_time": 0.11525774002075195,
      "step": 41765
    },
    {
      "epoch": 0.000254913330078125,
      "step": 41765,
      "training_step_time": 0.38364553451538086
    },
    {
      "epoch": 0.00025491943359375,
      "model_forward_time": 0.11465668678283691,
      "step": 41766
    },
    {
      "epoch": 0.00025491943359375,
      "step": 41766,
      "training_step_time": 0.3791210651397705
    },
    {
      "epoch": 0.000254925537109375,
      "model_forward_time": 0.11492013931274414,
      "step": 41767
    },
    {
      "epoch": 0.000254925537109375,
      "step": 41767,
      "training_step_time": 0.374039888381958
    },
    {
      "epoch": 0.000254931640625,
      "model_forward_time": 0.11565327644348145,
      "step": 41768
    },
    {
      "epoch": 0.000254931640625,
      "step": 41768,
      "training_step_time": 0.47992777824401855
    },
    {
      "epoch": 0.000254937744140625,
      "model_forward_time": 0.11472010612487793,
      "step": 41769
    },
    {
      "epoch": 0.000254937744140625,
      "step": 41769,
      "training_step_time": 0.38857126235961914
    },
    {
      "epoch": 0.00025494384765625,
      "grad_norm": 0.10359922051429749,
      "learning_rate": 2.3185396475795007e-05,
      "loss": 0.0402,
      "step": 41770
    },
    {
      "epoch": 0.00025494384765625,
      "model_forward_time": 0.11523652076721191,
      "step": 41770
    },
    {
      "epoch": 0.00025494384765625,
      "step": 41770,
      "training_step_time": 0.4427170753479004
    },
    {
      "epoch": 0.000254949951171875,
      "model_forward_time": 0.11464142799377441,
      "step": 41771
    },
    {
      "epoch": 0.000254949951171875,
      "step": 41771,
      "training_step_time": 0.3911621570587158
    },
    {
      "epoch": 0.0002549560546875,
      "model_forward_time": 0.11512398719787598,
      "step": 41772
    },
    {
      "epoch": 0.0002549560546875,
      "step": 41772,
      "training_step_time": 0.37760066986083984
    },
    {
      "epoch": 0.000254962158203125,
      "model_forward_time": 0.11475396156311035,
      "step": 41773
    },
    {
      "epoch": 0.000254962158203125,
      "step": 41773,
      "training_step_time": 0.3806319236755371
    },
    {
      "epoch": 0.00025496826171875,
      "model_forward_time": 0.11566805839538574,
      "step": 41774
    },
    {
      "epoch": 0.00025496826171875,
      "step": 41774,
      "training_step_time": 0.40108799934387207
    },
    {
      "epoch": 0.000254974365234375,
      "model_forward_time": 0.11481380462646484,
      "step": 41775
    },
    {
      "epoch": 0.000254974365234375,
      "step": 41775,
      "training_step_time": 0.4547698497772217
    },
    {
      "epoch": 0.00025498046875,
      "model_forward_time": 0.11553096771240234,
      "step": 41776
    },
    {
      "epoch": 0.00025498046875,
      "step": 41776,
      "training_step_time": 0.4341096878051758
    },
    {
      "epoch": 0.000254986572265625,
      "model_forward_time": 0.11477470397949219,
      "step": 41777
    },
    {
      "epoch": 0.000254986572265625,
      "step": 41777,
      "training_step_time": 0.41206955909729004
    },
    {
      "epoch": 0.00025499267578125,
      "model_forward_time": 0.115020751953125,
      "step": 41778
    },
    {
      "epoch": 0.00025499267578125,
      "step": 41778,
      "training_step_time": 0.3923618793487549
    },
    {
      "epoch": 0.000254998779296875,
      "model_forward_time": 0.11484718322753906,
      "step": 41779
    },
    {
      "epoch": 0.000254998779296875,
      "step": 41779,
      "training_step_time": 0.373424768447876
    },
    {
      "epoch": 0.0002550048828125,
      "grad_norm": 0.1047031357884407,
      "learning_rate": 2.3162140846108366e-05,
      "loss": 0.0385,
      "step": 41780
    },
    {
      "epoch": 0.0002550048828125,
      "model_forward_time": 0.11454653739929199,
      "step": 41780
    },
    {
      "epoch": 0.0002550048828125,
      "step": 41780,
      "training_step_time": 0.39068055152893066
    },
    {
      "epoch": 0.000255010986328125,
      "model_forward_time": 0.11504936218261719,
      "step": 41781
    },
    {
      "epoch": 0.000255010986328125,
      "step": 41781,
      "training_step_time": 0.5271928310394287
    },
    {
      "epoch": 0.00025501708984375,
      "model_forward_time": 0.1145167350769043,
      "step": 41782
    },
    {
      "epoch": 0.00025501708984375,
      "step": 41782,
      "training_step_time": 0.42058348655700684
    },
    {
      "epoch": 0.000255023193359375,
      "model_forward_time": 0.11454033851623535,
      "step": 41783
    },
    {
      "epoch": 0.000255023193359375,
      "step": 41783,
      "training_step_time": 0.4574732780456543
    },
    {
      "epoch": 0.000255029296875,
      "model_forward_time": 0.11506342887878418,
      "step": 41784
    },
    {
      "epoch": 0.000255029296875,
      "step": 41784,
      "training_step_time": 0.4598374366760254
    },
    {
      "epoch": 0.000255035400390625,
      "model_forward_time": 0.11544275283813477,
      "step": 41785
    },
    {
      "epoch": 0.000255035400390625,
      "step": 41785,
      "training_step_time": 0.3846476078033447
    },
    {
      "epoch": 0.00025504150390625,
      "model_forward_time": 0.11487483978271484,
      "step": 41786
    },
    {
      "epoch": 0.00025504150390625,
      "step": 41786,
      "training_step_time": 0.4161713123321533
    },
    {
      "epoch": 0.000255047607421875,
      "model_forward_time": 0.11514663696289062,
      "step": 41787
    },
    {
      "epoch": 0.000255047607421875,
      "step": 41787,
      "training_step_time": 0.5035512447357178
    },
    {
      "epoch": 0.0002550537109375,
      "model_forward_time": 0.11557435989379883,
      "step": 41788
    },
    {
      "epoch": 0.0002550537109375,
      "step": 41788,
      "training_step_time": 0.3930525779724121
    },
    {
      "epoch": 0.000255059814453125,
      "model_forward_time": 0.11532306671142578,
      "step": 41789
    },
    {
      "epoch": 0.000255059814453125,
      "step": 41789,
      "training_step_time": 0.3947880268096924
    },
    {
      "epoch": 0.00025506591796875,
      "grad_norm": 0.10828883200883865,
      "learning_rate": 2.3138893369054766e-05,
      "loss": 0.0378,
      "step": 41790
    },
    {
      "epoch": 0.00025506591796875,
      "model_forward_time": 0.11486387252807617,
      "step": 41790
    },
    {
      "epoch": 0.00025506591796875,
      "step": 41790,
      "training_step_time": 0.472759485244751
    },
    {
      "epoch": 0.000255072021484375,
      "model_forward_time": 0.11508870124816895,
      "step": 41791
    },
    {
      "epoch": 0.000255072021484375,
      "step": 41791,
      "training_step_time": 0.44089698791503906
    },
    {
      "epoch": 0.000255078125,
      "model_forward_time": 0.11512088775634766,
      "step": 41792
    },
    {
      "epoch": 0.000255078125,
      "step": 41792,
      "training_step_time": 0.4215881824493408
    },
    {
      "epoch": 0.000255084228515625,
      "model_forward_time": 0.11454963684082031,
      "step": 41793
    },
    {
      "epoch": 0.000255084228515625,
      "step": 41793,
      "training_step_time": 0.5079710483551025
    },
    {
      "epoch": 0.00025509033203125,
      "model_forward_time": 0.1147160530090332,
      "step": 41794
    },
    {
      "epoch": 0.00025509033203125,
      "step": 41794,
      "training_step_time": 0.41312265396118164
    },
    {
      "epoch": 0.000255096435546875,
      "model_forward_time": 0.11432242393493652,
      "step": 41795
    },
    {
      "epoch": 0.000255096435546875,
      "step": 41795,
      "training_step_time": 0.4081134796142578
    },
    {
      "epoch": 0.0002551025390625,
      "model_forward_time": 0.11512279510498047,
      "step": 41796
    },
    {
      "epoch": 0.0002551025390625,
      "step": 41796,
      "training_step_time": 0.4192357063293457
    },
    {
      "epoch": 0.000255108642578125,
      "model_forward_time": 0.11451554298400879,
      "step": 41797
    },
    {
      "epoch": 0.000255108642578125,
      "step": 41797,
      "training_step_time": 0.4012429714202881
    },
    {
      "epoch": 0.00025511474609375,
      "model_forward_time": 0.11510920524597168,
      "step": 41798
    },
    {
      "epoch": 0.00025511474609375,
      "step": 41798,
      "training_step_time": 0.43735671043395996
    },
    {
      "epoch": 0.000255120849609375,
      "model_forward_time": 0.11473727226257324,
      "step": 41799
    },
    {
      "epoch": 0.000255120849609375,
      "step": 41799,
      "training_step_time": 0.5497784614562988
    },
    {
      "epoch": 0.000255126953125,
      "grad_norm": 0.10136691480875015,
      "learning_rate": 2.3115654051696095e-05,
      "loss": 0.0426,
      "step": 41800
    },
    {
      "epoch": 0.000255126953125,
      "model_forward_time": 0.11491847038269043,
      "step": 41800
    },
    {
      "epoch": 0.000255126953125,
      "step": 41800,
      "training_step_time": 0.44286179542541504
    },
    {
      "epoch": 0.000255133056640625,
      "model_forward_time": 0.11474037170410156,
      "step": 41801
    },
    {
      "epoch": 0.000255133056640625,
      "step": 41801,
      "training_step_time": 0.3954756259918213
    },
    {
      "epoch": 0.00025513916015625,
      "model_forward_time": 0.11464142799377441,
      "step": 41802
    },
    {
      "epoch": 0.00025513916015625,
      "step": 41802,
      "training_step_time": 0.398700475692749
    },
    {
      "epoch": 0.000255145263671875,
      "model_forward_time": 0.11487150192260742,
      "step": 41803
    },
    {
      "epoch": 0.000255145263671875,
      "step": 41803,
      "training_step_time": 0.39328432083129883
    },
    {
      "epoch": 0.0002551513671875,
      "model_forward_time": 0.11449432373046875,
      "step": 41804
    },
    {
      "epoch": 0.0002551513671875,
      "step": 41804,
      "training_step_time": 0.41026926040649414
    },
    {
      "epoch": 0.000255157470703125,
      "model_forward_time": 0.1148838996887207,
      "step": 41805
    },
    {
      "epoch": 0.000255157470703125,
      "step": 41805,
      "training_step_time": 0.49854588508605957
    },
    {
      "epoch": 0.00025516357421875,
      "model_forward_time": 0.11462974548339844,
      "step": 41806
    },
    {
      "epoch": 0.00025516357421875,
      "step": 41806,
      "training_step_time": 0.41300487518310547
    },
    {
      "epoch": 0.000255169677734375,
      "model_forward_time": 0.11484575271606445,
      "step": 41807
    },
    {
      "epoch": 0.000255169677734375,
      "step": 41807,
      "training_step_time": 0.4138946533203125
    },
    {
      "epoch": 0.00025517578125,
      "model_forward_time": 0.1146237850189209,
      "step": 41808
    },
    {
      "epoch": 0.00025517578125,
      "step": 41808,
      "training_step_time": 0.4009287357330322
    },
    {
      "epoch": 0.000255181884765625,
      "model_forward_time": 0.11514091491699219,
      "step": 41809
    },
    {
      "epoch": 0.000255181884765625,
      "step": 41809,
      "training_step_time": 0.40435123443603516
    },
    {
      "epoch": 0.00025518798828125,
      "grad_norm": 0.13171792030334473,
      "learning_rate": 2.3092422901091897e-05,
      "loss": 0.0413,
      "step": 41810
    },
    {
      "epoch": 0.00025518798828125,
      "model_forward_time": 0.11458134651184082,
      "step": 41810
    },
    {
      "epoch": 0.00025518798828125,
      "step": 41810,
      "training_step_time": 0.41390323638916016
    },
    {
      "epoch": 0.000255194091796875,
      "model_forward_time": 0.11457610130310059,
      "step": 41811
    },
    {
      "epoch": 0.000255194091796875,
      "step": 41811,
      "training_step_time": 0.5923893451690674
    },
    {
      "epoch": 0.0002552001953125,
      "model_forward_time": 0.11548304557800293,
      "step": 41812
    },
    {
      "epoch": 0.0002552001953125,
      "step": 41812,
      "training_step_time": 0.46975088119506836
    },
    {
      "epoch": 0.000255206298828125,
      "model_forward_time": 0.11467790603637695,
      "step": 41813
    },
    {
      "epoch": 0.000255206298828125,
      "step": 41813,
      "training_step_time": 0.4702918529510498
    },
    {
      "epoch": 0.00025521240234375,
      "model_forward_time": 0.11452388763427734,
      "step": 41814
    },
    {
      "epoch": 0.00025521240234375,
      "step": 41814,
      "training_step_time": 0.39589357376098633
    },
    {
      "epoch": 0.000255218505859375,
      "model_forward_time": 0.11443400382995605,
      "step": 41815
    },
    {
      "epoch": 0.000255218505859375,
      "step": 41815,
      "training_step_time": 0.39372873306274414
    },
    {
      "epoch": 0.000255224609375,
      "model_forward_time": 0.11508440971374512,
      "step": 41816
    },
    {
      "epoch": 0.000255224609375,
      "step": 41816,
      "training_step_time": 0.389662504196167
    },
    {
      "epoch": 0.000255230712890625,
      "model_forward_time": 0.11475276947021484,
      "step": 41817
    },
    {
      "epoch": 0.000255230712890625,
      "step": 41817,
      "training_step_time": 0.389268159866333
    },
    {
      "epoch": 0.00025523681640625,
      "model_forward_time": 0.11547374725341797,
      "step": 41818
    },
    {
      "epoch": 0.00025523681640625,
      "step": 41818,
      "training_step_time": 0.4025382995605469
    },
    {
      "epoch": 0.000255242919921875,
      "model_forward_time": 0.11470460891723633,
      "step": 41819
    },
    {
      "epoch": 0.000255242919921875,
      "step": 41819,
      "training_step_time": 0.4219646453857422
    },
    {
      "epoch": 0.0002552490234375,
      "grad_norm": 0.09471187740564346,
      "learning_rate": 2.3069199924299174e-05,
      "loss": 0.0359,
      "step": 41820
    },
    {
      "epoch": 0.0002552490234375,
      "model_forward_time": 0.11514878273010254,
      "step": 41820
    },
    {
      "epoch": 0.0002552490234375,
      "step": 41820,
      "training_step_time": 0.48864221572875977
    },
    {
      "epoch": 0.000255255126953125,
      "model_forward_time": 0.11526846885681152,
      "step": 41821
    },
    {
      "epoch": 0.000255255126953125,
      "step": 41821,
      "training_step_time": 0.39395809173583984
    },
    {
      "epoch": 0.00025526123046875,
      "model_forward_time": 0.1148369312286377,
      "step": 41822
    },
    {
      "epoch": 0.00025526123046875,
      "step": 41822,
      "training_step_time": 0.40324950218200684
    },
    {
      "epoch": 0.000255267333984375,
      "model_forward_time": 0.11532378196716309,
      "step": 41823
    },
    {
      "epoch": 0.000255267333984375,
      "step": 41823,
      "training_step_time": 0.4524548053741455
    },
    {
      "epoch": 0.0002552734375,
      "model_forward_time": 0.11552190780639648,
      "step": 41824
    },
    {
      "epoch": 0.0002552734375,
      "step": 41824,
      "training_step_time": 0.3916456699371338
    },
    {
      "epoch": 0.000255279541015625,
      "model_forward_time": 0.11587786674499512,
      "step": 41825
    },
    {
      "epoch": 0.000255279541015625,
      "step": 41825,
      "training_step_time": 0.40958142280578613
    },
    {
      "epoch": 0.00025528564453125,
      "model_forward_time": 0.1155555248260498,
      "step": 41826
    },
    {
      "epoch": 0.00025528564453125,
      "step": 41826,
      "training_step_time": 0.4497530460357666
    },
    {
      "epoch": 0.000255291748046875,
      "model_forward_time": 0.11526989936828613,
      "step": 41827
    },
    {
      "epoch": 0.000255291748046875,
      "step": 41827,
      "training_step_time": 0.5155138969421387
    },
    {
      "epoch": 0.0002552978515625,
      "model_forward_time": 0.11543750762939453,
      "step": 41828
    },
    {
      "epoch": 0.0002552978515625,
      "step": 41828,
      "training_step_time": 0.48648691177368164
    },
    {
      "epoch": 0.000255303955078125,
      "model_forward_time": 0.11432862281799316,
      "step": 41829
    },
    {
      "epoch": 0.000255303955078125,
      "step": 41829,
      "training_step_time": 0.39322733879089355
    },
    {
      "epoch": 0.00025531005859375,
      "grad_norm": 0.09566566348075867,
      "learning_rate": 2.3045985128372442e-05,
      "loss": 0.0432,
      "step": 41830
    },
    {
      "epoch": 0.00025531005859375,
      "model_forward_time": 0.11590790748596191,
      "step": 41830
    },
    {
      "epoch": 0.00025531005859375,
      "step": 41830,
      "training_step_time": 0.3832988739013672
    },
    {
      "epoch": 0.000255316162109375,
      "model_forward_time": 0.11452603340148926,
      "step": 41831
    },
    {
      "epoch": 0.000255316162109375,
      "step": 41831,
      "training_step_time": 0.3770120143890381
    },
    {
      "epoch": 0.000255322265625,
      "model_forward_time": 0.11516261100769043,
      "step": 41832
    },
    {
      "epoch": 0.000255322265625,
      "step": 41832,
      "training_step_time": 0.39489150047302246
    },
    {
      "epoch": 0.000255328369140625,
      "model_forward_time": 0.11508703231811523,
      "step": 41833
    },
    {
      "epoch": 0.000255328369140625,
      "step": 41833,
      "training_step_time": 0.384080171585083
    },
    {
      "epoch": 0.00025533447265625,
      "model_forward_time": 0.11571216583251953,
      "step": 41834
    },
    {
      "epoch": 0.00025533447265625,
      "step": 41834,
      "training_step_time": 0.43631792068481445
    },
    {
      "epoch": 0.000255340576171875,
      "model_forward_time": 0.11478972434997559,
      "step": 41835
    },
    {
      "epoch": 0.000255340576171875,
      "step": 41835,
      "training_step_time": 0.6014595031738281
    },
    {
      "epoch": 0.0002553466796875,
      "model_forward_time": 0.11499691009521484,
      "step": 41836
    },
    {
      "epoch": 0.0002553466796875,
      "step": 41836,
      "training_step_time": 0.4309830665588379
    },
    {
      "epoch": 0.000255352783203125,
      "model_forward_time": 0.11445474624633789,
      "step": 41837
    },
    {
      "epoch": 0.000255352783203125,
      "step": 41837,
      "training_step_time": 0.40564918518066406
    },
    {
      "epoch": 0.00025535888671875,
      "model_forward_time": 0.1147010326385498,
      "step": 41838
    },
    {
      "epoch": 0.00025535888671875,
      "step": 41838,
      "training_step_time": 0.3969690799713135
    },
    {
      "epoch": 0.000255364990234375,
      "model_forward_time": 0.11439776420593262,
      "step": 41839
    },
    {
      "epoch": 0.000255364990234375,
      "step": 41839,
      "training_step_time": 0.4395475387573242
    },
    {
      "epoch": 0.00025537109375,
      "grad_norm": 0.08848735690116882,
      "learning_rate": 2.3022778520363753e-05,
      "loss": 0.038,
      "step": 41840
    },
    {
      "epoch": 0.00025537109375,
      "model_forward_time": 0.11426782608032227,
      "step": 41840
    },
    {
      "epoch": 0.00025537109375,
      "step": 41840,
      "training_step_time": 0.3965780735015869
    },
    {
      "epoch": 0.000255377197265625,
      "model_forward_time": 0.11488962173461914,
      "step": 41841
    },
    {
      "epoch": 0.000255377197265625,
      "step": 41841,
      "training_step_time": 0.5098679065704346
    },
    {
      "epoch": 0.00025538330078125,
      "model_forward_time": 0.11458373069763184,
      "step": 41842
    },
    {
      "epoch": 0.00025538330078125,
      "step": 41842,
      "training_step_time": 0.4953467845916748
    },
    {
      "epoch": 0.000255389404296875,
      "model_forward_time": 0.1142892837524414,
      "step": 41843
    },
    {
      "epoch": 0.000255389404296875,
      "step": 41843,
      "training_step_time": 0.3853719234466553
    },
    {
      "epoch": 0.0002553955078125,
      "model_forward_time": 0.1151127815246582,
      "step": 41844
    },
    {
      "epoch": 0.0002553955078125,
      "step": 41844,
      "training_step_time": 0.4041461944580078
    },
    {
      "epoch": 0.000255401611328125,
      "model_forward_time": 0.11496973037719727,
      "step": 41845
    },
    {
      "epoch": 0.000255401611328125,
      "step": 41845,
      "training_step_time": 0.3872034549713135
    },
    {
      "epoch": 0.00025540771484375,
      "model_forward_time": 0.11581277847290039,
      "step": 41846
    },
    {
      "epoch": 0.00025540771484375,
      "step": 41846,
      "training_step_time": 0.41393470764160156
    },
    {
      "epoch": 0.000255413818359375,
      "model_forward_time": 0.11603426933288574,
      "step": 41847
    },
    {
      "epoch": 0.000255413818359375,
      "step": 41847,
      "training_step_time": 0.4361574649810791
    },
    {
      "epoch": 0.000255419921875,
      "model_forward_time": 0.11511015892028809,
      "step": 41848
    },
    {
      "epoch": 0.000255419921875,
      "step": 41848,
      "training_step_time": 0.39092183113098145
    },
    {
      "epoch": 0.000255426025390625,
      "model_forward_time": 0.11545896530151367,
      "step": 41849
    },
    {
      "epoch": 0.000255426025390625,
      "step": 41849,
      "training_step_time": 0.392441987991333
    },
    {
      "epoch": 0.00025543212890625,
      "grad_norm": 0.12261786311864853,
      "learning_rate": 2.2999580107322653e-05,
      "loss": 0.0359,
      "step": 41850
    },
    {
      "epoch": 0.00025543212890625,
      "model_forward_time": 0.11541390419006348,
      "step": 41850
    },
    {
      "epoch": 0.00025543212890625,
      "step": 41850,
      "training_step_time": 0.4406754970550537
    },
    {
      "epoch": 0.000255438232421875,
      "model_forward_time": 0.11590242385864258,
      "step": 41851
    },
    {
      "epoch": 0.000255438232421875,
      "step": 41851,
      "training_step_time": 0.40559816360473633
    },
    {
      "epoch": 0.0002554443359375,
      "model_forward_time": 0.11533832550048828,
      "step": 41852
    },
    {
      "epoch": 0.0002554443359375,
      "step": 41852,
      "training_step_time": 0.3956277370452881
    },
    {
      "epoch": 0.000255450439453125,
      "model_forward_time": 0.11514496803283691,
      "step": 41853
    },
    {
      "epoch": 0.000255450439453125,
      "step": 41853,
      "training_step_time": 0.5794646739959717
    },
    {
      "epoch": 0.00025545654296875,
      "model_forward_time": 0.1148684024810791,
      "step": 41854
    },
    {
      "epoch": 0.00025545654296875,
      "step": 41854,
      "training_step_time": 0.4423103332519531
    },
    {
      "epoch": 0.000255462646484375,
      "model_forward_time": 0.11609935760498047,
      "step": 41855
    },
    {
      "epoch": 0.000255462646484375,
      "step": 41855,
      "training_step_time": 0.41512250900268555
    },
    {
      "epoch": 0.00025546875,
      "model_forward_time": 0.11504673957824707,
      "step": 41856
    },
    {
      "epoch": 0.00025546875,
      "step": 41856,
      "training_step_time": 0.46209096908569336
    },
    {
      "epoch": 0.000255474853515625,
      "model_forward_time": 0.11657381057739258,
      "step": 41857
    },
    {
      "epoch": 0.000255474853515625,
      "step": 41857,
      "training_step_time": 0.41483545303344727
    },
    {
      "epoch": 0.00025548095703125,
      "model_forward_time": 0.11501264572143555,
      "step": 41858
    },
    {
      "epoch": 0.00025548095703125,
      "step": 41858,
      "training_step_time": 0.3949296474456787
    },
    {
      "epoch": 0.000255487060546875,
      "model_forward_time": 0.11506175994873047,
      "step": 41859
    },
    {
      "epoch": 0.000255487060546875,
      "step": 41859,
      "training_step_time": 0.5399186611175537
    },
    {
      "epoch": 0.0002554931640625,
      "grad_norm": 0.10216988623142242,
      "learning_rate": 2.2976389896296203e-05,
      "loss": 0.038,
      "step": 41860
    },
    {
      "epoch": 0.0002554931640625,
      "model_forward_time": 0.11536979675292969,
      "step": 41860
    },
    {
      "epoch": 0.0002554931640625,
      "step": 41860,
      "training_step_time": 0.3843529224395752
    },
    {
      "epoch": 0.000255499267578125,
      "model_forward_time": 0.11440420150756836,
      "step": 41861
    },
    {
      "epoch": 0.000255499267578125,
      "step": 41861,
      "training_step_time": 0.41766905784606934
    },
    {
      "epoch": 0.00025550537109375,
      "model_forward_time": 0.11459159851074219,
      "step": 41862
    },
    {
      "epoch": 0.00025550537109375,
      "step": 41862,
      "training_step_time": 0.5018036365509033
    },
    {
      "epoch": 0.000255511474609375,
      "model_forward_time": 0.11513304710388184,
      "step": 41863
    },
    {
      "epoch": 0.000255511474609375,
      "step": 41863,
      "training_step_time": 0.3960297107696533
    },
    {
      "epoch": 0.000255517578125,
      "model_forward_time": 0.11536407470703125,
      "step": 41864
    },
    {
      "epoch": 0.000255517578125,
      "step": 41864,
      "training_step_time": 0.387941837310791
    },
    {
      "epoch": 0.000255523681640625,
      "model_forward_time": 0.11497306823730469,
      "step": 41865
    },
    {
      "epoch": 0.000255523681640625,
      "step": 41865,
      "training_step_time": 0.4561622142791748
    },
    {
      "epoch": 0.00025552978515625,
      "model_forward_time": 0.11437535285949707,
      "step": 41866
    },
    {
      "epoch": 0.00025552978515625,
      "step": 41866,
      "training_step_time": 0.3870210647583008
    },
    {
      "epoch": 0.000255535888671875,
      "model_forward_time": 0.11523938179016113,
      "step": 41867
    },
    {
      "epoch": 0.000255535888671875,
      "step": 41867,
      "training_step_time": 0.3934295177459717
    },
    {
      "epoch": 0.0002555419921875,
      "model_forward_time": 0.11569523811340332,
      "step": 41868
    },
    {
      "epoch": 0.0002555419921875,
      "step": 41868,
      "training_step_time": 0.3891592025756836
    },
    {
      "epoch": 0.000255548095703125,
      "model_forward_time": 0.11546039581298828,
      "step": 41869
    },
    {
      "epoch": 0.000255548095703125,
      "step": 41869,
      "training_step_time": 0.3673689365386963
    },
    {
      "epoch": 0.00025555419921875,
      "grad_norm": 0.1260281503200531,
      "learning_rate": 2.2953207894328992e-05,
      "loss": 0.0395,
      "step": 41870
    },
    {
      "epoch": 0.00025555419921875,
      "model_forward_time": 0.11515212059020996,
      "step": 41870
    },
    {
      "epoch": 0.00025555419921875,
      "step": 41870,
      "training_step_time": 0.455244779586792
    },
    {
      "epoch": 0.000255560302734375,
      "model_forward_time": 0.11480093002319336,
      "step": 41871
    },
    {
      "epoch": 0.000255560302734375,
      "step": 41871,
      "training_step_time": 0.4973330497741699
    },
    {
      "epoch": 0.00025556640625,
      "model_forward_time": 0.11463141441345215,
      "step": 41872
    },
    {
      "epoch": 0.00025556640625,
      "step": 41872,
      "training_step_time": 0.38520312309265137
    },
    {
      "epoch": 0.000255572509765625,
      "model_forward_time": 0.11443161964416504,
      "step": 41873
    },
    {
      "epoch": 0.000255572509765625,
      "step": 41873,
      "training_step_time": 0.40518665313720703
    },
    {
      "epoch": 0.00025557861328125,
      "model_forward_time": 0.11404871940612793,
      "step": 41874
    },
    {
      "epoch": 0.00025557861328125,
      "step": 41874,
      "training_step_time": 0.45174360275268555
    },
    {
      "epoch": 0.000255584716796875,
      "model_forward_time": 0.11529159545898438,
      "step": 41875
    },
    {
      "epoch": 0.000255584716796875,
      "step": 41875,
      "training_step_time": 0.3980846405029297
    },
    {
      "epoch": 0.0002555908203125,
      "model_forward_time": 0.11498308181762695,
      "step": 41876
    },
    {
      "epoch": 0.0002555908203125,
      "step": 41876,
      "training_step_time": 0.4107048511505127
    },
    {
      "epoch": 0.000255596923828125,
      "model_forward_time": 0.11427187919616699,
      "step": 41877
    },
    {
      "epoch": 0.000255596923828125,
      "step": 41877,
      "training_step_time": 0.4765920639038086
    },
    {
      "epoch": 0.00025560302734375,
      "model_forward_time": 0.11578679084777832,
      "step": 41878
    },
    {
      "epoch": 0.00025560302734375,
      "step": 41878,
      "training_step_time": 0.3979060649871826
    },
    {
      "epoch": 0.000255609130859375,
      "model_forward_time": 0.11488866806030273,
      "step": 41879
    },
    {
      "epoch": 0.000255609130859375,
      "step": 41879,
      "training_step_time": 0.3989286422729492
    },
    {
      "epoch": 0.000255615234375,
      "grad_norm": 0.12056189030408859,
      "learning_rate": 2.29300341084631e-05,
      "loss": 0.0344,
      "step": 41880
    },
    {
      "epoch": 0.000255615234375,
      "model_forward_time": 0.11400699615478516,
      "step": 41880
    },
    {
      "epoch": 0.000255615234375,
      "step": 41880,
      "training_step_time": 0.396986722946167
    },
    {
      "epoch": 0.000255621337890625,
      "model_forward_time": 0.11491799354553223,
      "step": 41881
    },
    {
      "epoch": 0.000255621337890625,
      "step": 41881,
      "training_step_time": 0.39447021484375
    },
    {
      "epoch": 0.00025562744140625,
      "model_forward_time": 0.11421632766723633,
      "step": 41882
    },
    {
      "epoch": 0.00025562744140625,
      "step": 41882,
      "training_step_time": 0.48958778381347656
    },
    {
      "epoch": 0.000255633544921875,
      "model_forward_time": 0.11433911323547363,
      "step": 41883
    },
    {
      "epoch": 0.000255633544921875,
      "step": 41883,
      "training_step_time": 0.4455118179321289
    },
    {
      "epoch": 0.0002556396484375,
      "model_forward_time": 0.11490583419799805,
      "step": 41884
    },
    {
      "epoch": 0.0002556396484375,
      "step": 41884,
      "training_step_time": 0.4707350730895996
    },
    {
      "epoch": 0.000255645751953125,
      "model_forward_time": 0.11469745635986328,
      "step": 41885
    },
    {
      "epoch": 0.000255645751953125,
      "step": 41885,
      "training_step_time": 0.423234224319458
    },
    {
      "epoch": 0.00025565185546875,
      "model_forward_time": 0.11570310592651367,
      "step": 41886
    },
    {
      "epoch": 0.00025565185546875,
      "step": 41886,
      "training_step_time": 0.4060218334197998
    },
    {
      "epoch": 0.000255657958984375,
      "model_forward_time": 0.11527299880981445,
      "step": 41887
    },
    {
      "epoch": 0.000255657958984375,
      "step": 41887,
      "training_step_time": 0.3950216770172119
    },
    {
      "epoch": 0.0002556640625,
      "model_forward_time": 0.11472749710083008,
      "step": 41888
    },
    {
      "epoch": 0.0002556640625,
      "step": 41888,
      "training_step_time": 0.40662050247192383
    },
    {
      "epoch": 0.000255670166015625,
      "model_forward_time": 0.11530017852783203,
      "step": 41889
    },
    {
      "epoch": 0.000255670166015625,
      "step": 41889,
      "training_step_time": 0.4265420436859131
    },
    {
      "epoch": 0.00025567626953125,
      "grad_norm": 0.08329152315855026,
      "learning_rate": 2.2906868545738102e-05,
      "loss": 0.0403,
      "step": 41890
    },
    {
      "epoch": 0.00025567626953125,
      "model_forward_time": 0.11580371856689453,
      "step": 41890
    },
    {
      "epoch": 0.00025567626953125,
      "step": 41890,
      "training_step_time": 0.4846913814544678
    },
    {
      "epoch": 0.000255682373046875,
      "model_forward_time": 0.11484694480895996,
      "step": 41891
    },
    {
      "epoch": 0.000255682373046875,
      "step": 41891,
      "training_step_time": 0.42566537857055664
    },
    {
      "epoch": 0.0002556884765625,
      "model_forward_time": 0.11510896682739258,
      "step": 41892
    },
    {
      "epoch": 0.0002556884765625,
      "step": 41892,
      "training_step_time": 0.39540767669677734
    },
    {
      "epoch": 0.000255694580078125,
      "model_forward_time": 0.11497902870178223,
      "step": 41893
    },
    {
      "epoch": 0.000255694580078125,
      "step": 41893,
      "training_step_time": 0.3899836540222168
    },
    {
      "epoch": 0.00025570068359375,
      "model_forward_time": 0.11489605903625488,
      "step": 41894
    },
    {
      "epoch": 0.00025570068359375,
      "step": 41894,
      "training_step_time": 0.38722944259643555
    },
    {
      "epoch": 0.000255706787109375,
      "model_forward_time": 0.11437678337097168,
      "step": 41895
    },
    {
      "epoch": 0.000255706787109375,
      "step": 41895,
      "training_step_time": 0.38286852836608887
    },
    {
      "epoch": 0.000255712890625,
      "model_forward_time": 0.1150510311126709,
      "step": 41896
    },
    {
      "epoch": 0.000255712890625,
      "step": 41896,
      "training_step_time": 0.4080190658569336
    },
    {
      "epoch": 0.000255718994140625,
      "model_forward_time": 0.11510634422302246,
      "step": 41897
    },
    {
      "epoch": 0.000255718994140625,
      "step": 41897,
      "training_step_time": 0.41672277450561523
    },
    {
      "epoch": 0.00025572509765625,
      "model_forward_time": 0.11528921127319336,
      "step": 41898
    },
    {
      "epoch": 0.00025572509765625,
      "step": 41898,
      "training_step_time": 0.45324277877807617
    },
    {
      "epoch": 0.000255731201171875,
      "model_forward_time": 0.1148993968963623,
      "step": 41899
    },
    {
      "epoch": 0.000255731201171875,
      "step": 41899,
      "training_step_time": 0.47287583351135254
    },
    {
      "epoch": 0.0002557373046875,
      "grad_norm": 0.10376047343015671,
      "learning_rate": 2.288371121319109e-05,
      "loss": 0.0356,
      "step": 41900
    },
    {
      "epoch": 0.0002557373046875,
      "model_forward_time": 0.11542463302612305,
      "step": 41900
    },
    {
      "epoch": 0.0002557373046875,
      "step": 41900,
      "training_step_time": 0.3909263610839844
    },
    {
      "epoch": 0.000255743408203125,
      "model_forward_time": 0.11513948440551758,
      "step": 41901
    },
    {
      "epoch": 0.000255743408203125,
      "step": 41901,
      "training_step_time": 0.42191195487976074
    },
    {
      "epoch": 0.00025574951171875,
      "model_forward_time": 0.11440825462341309,
      "step": 41902
    },
    {
      "epoch": 0.00025574951171875,
      "step": 41902,
      "training_step_time": 0.3963510990142822
    },
    {
      "epoch": 0.000255755615234375,
      "model_forward_time": 0.11514902114868164,
      "step": 41903
    },
    {
      "epoch": 0.000255755615234375,
      "step": 41903,
      "training_step_time": 0.4032456874847412
    },
    {
      "epoch": 0.00025576171875,
      "model_forward_time": 0.11459660530090332,
      "step": 41904
    },
    {
      "epoch": 0.00025576171875,
      "step": 41904,
      "training_step_time": 0.45305657386779785
    },
    {
      "epoch": 0.000255767822265625,
      "model_forward_time": 0.11496329307556152,
      "step": 41905
    },
    {
      "epoch": 0.000255767822265625,
      "step": 41905,
      "training_step_time": 0.42011523246765137
    },
    {
      "epoch": 0.00025577392578125,
      "model_forward_time": 0.11451029777526855,
      "step": 41906
    },
    {
      "epoch": 0.00025577392578125,
      "step": 41906,
      "training_step_time": 0.4640939235687256
    },
    {
      "epoch": 0.000255780029296875,
      "model_forward_time": 0.11492657661437988,
      "step": 41907
    },
    {
      "epoch": 0.000255780029296875,
      "step": 41907,
      "training_step_time": 0.40649867057800293
    },
    {
      "epoch": 0.0002557861328125,
      "model_forward_time": 0.11596322059631348,
      "step": 41908
    },
    {
      "epoch": 0.0002557861328125,
      "step": 41908,
      "training_step_time": 0.40267491340637207
    },
    {
      "epoch": 0.000255792236328125,
      "model_forward_time": 0.11475634574890137,
      "step": 41909
    },
    {
      "epoch": 0.000255792236328125,
      "step": 41909,
      "training_step_time": 0.40210604667663574
    },
    {
      "epoch": 0.00025579833984375,
      "grad_norm": 0.1145579069852829,
      "learning_rate": 2.2860562117856647e-05,
      "loss": 0.0365,
      "step": 41910
    },
    {
      "epoch": 0.00025579833984375,
      "model_forward_time": 0.11440300941467285,
      "step": 41910
    },
    {
      "epoch": 0.00025579833984375,
      "step": 41910,
      "training_step_time": 0.39260244369506836
    },
    {
      "epoch": 0.000255804443359375,
      "model_forward_time": 0.11487555503845215,
      "step": 41911
    },
    {
      "epoch": 0.000255804443359375,
      "step": 41911,
      "training_step_time": 0.3984498977661133
    },
    {
      "epoch": 0.000255810546875,
      "model_forward_time": 0.11500167846679688,
      "step": 41912
    },
    {
      "epoch": 0.000255810546875,
      "step": 41912,
      "training_step_time": 0.46246886253356934
    },
    {
      "epoch": 0.000255816650390625,
      "model_forward_time": 0.11478018760681152,
      "step": 41913
    },
    {
      "epoch": 0.000255816650390625,
      "step": 41913,
      "training_step_time": 0.4938197135925293
    },
    {
      "epoch": 0.00025582275390625,
      "model_forward_time": 0.11499953269958496,
      "step": 41914
    },
    {
      "epoch": 0.00025582275390625,
      "step": 41914,
      "training_step_time": 0.4005870819091797
    },
    {
      "epoch": 0.000255828857421875,
      "model_forward_time": 0.11498475074768066,
      "step": 41915
    },
    {
      "epoch": 0.000255828857421875,
      "step": 41915,
      "training_step_time": 0.453230619430542
    },
    {
      "epoch": 0.0002558349609375,
      "model_forward_time": 0.1145932674407959,
      "step": 41916
    },
    {
      "epoch": 0.0002558349609375,
      "step": 41916,
      "training_step_time": 0.3936464786529541
    },
    {
      "epoch": 0.000255841064453125,
      "model_forward_time": 0.11498260498046875,
      "step": 41917
    },
    {
      "epoch": 0.000255841064453125,
      "step": 41917,
      "training_step_time": 0.4261133670806885
    },
    {
      "epoch": 0.00025584716796875,
      "model_forward_time": 0.11467814445495605,
      "step": 41918
    },
    {
      "epoch": 0.00025584716796875,
      "step": 41918,
      "training_step_time": 0.41428256034851074
    },
    {
      "epoch": 0.000255853271484375,
      "model_forward_time": 0.11428618431091309,
      "step": 41919
    },
    {
      "epoch": 0.000255853271484375,
      "step": 41919,
      "training_step_time": 0.5191285610198975
    },
    {
      "epoch": 0.000255859375,
      "grad_norm": 0.10030271112918854,
      "learning_rate": 2.2837421266766857e-05,
      "loss": 0.0452,
      "step": 41920
    },
    {
      "epoch": 0.000255859375,
      "model_forward_time": 0.11459636688232422,
      "step": 41920
    },
    {
      "epoch": 0.000255859375,
      "step": 41920,
      "training_step_time": 0.3869667053222656
    },
    {
      "epoch": 0.000255865478515625,
      "model_forward_time": 0.11448073387145996,
      "step": 41921
    },
    {
      "epoch": 0.000255865478515625,
      "step": 41921,
      "training_step_time": 0.3911876678466797
    },
    {
      "epoch": 0.00025587158203125,
      "model_forward_time": 0.11547017097473145,
      "step": 41922
    },
    {
      "epoch": 0.00025587158203125,
      "step": 41922,
      "training_step_time": 0.40734076499938965
    },
    {
      "epoch": 0.000255877685546875,
      "model_forward_time": 0.11479806900024414,
      "step": 41923
    },
    {
      "epoch": 0.000255877685546875,
      "step": 41923,
      "training_step_time": 0.39111328125
    },
    {
      "epoch": 0.0002558837890625,
      "model_forward_time": 0.11514568328857422,
      "step": 41924
    },
    {
      "epoch": 0.0002558837890625,
      "step": 41924,
      "training_step_time": 0.3948171138763428
    },
    {
      "epoch": 0.000255889892578125,
      "model_forward_time": 0.11500978469848633,
      "step": 41925
    },
    {
      "epoch": 0.000255889892578125,
      "step": 41925,
      "training_step_time": 0.40132737159729004
    },
    {
      "epoch": 0.00025589599609375,
      "model_forward_time": 0.11602020263671875,
      "step": 41926
    },
    {
      "epoch": 0.00025589599609375,
      "step": 41926,
      "training_step_time": 0.4911022186279297
    },
    {
      "epoch": 0.000255902099609375,
      "model_forward_time": 0.11524510383605957,
      "step": 41927
    },
    {
      "epoch": 0.000255902099609375,
      "step": 41927,
      "training_step_time": 0.4106168746948242
    },
    {
      "epoch": 0.000255908203125,
      "model_forward_time": 0.11459875106811523,
      "step": 41928
    },
    {
      "epoch": 0.000255908203125,
      "step": 41928,
      "training_step_time": 0.36598920822143555
    },
    {
      "epoch": 0.000255914306640625,
      "model_forward_time": 0.11501884460449219,
      "step": 41929
    },
    {
      "epoch": 0.000255914306640625,
      "step": 41929,
      "training_step_time": 0.45784616470336914
    },
    {
      "epoch": 0.00025592041015625,
      "grad_norm": 0.10410252958536148,
      "learning_rate": 2.281428866695128e-05,
      "loss": 0.0402,
      "step": 41930
    },
    {
      "epoch": 0.00025592041015625,
      "model_forward_time": 0.11521029472351074,
      "step": 41930
    },
    {
      "epoch": 0.00025592041015625,
      "step": 41930,
      "training_step_time": 0.4157571792602539
    },
    {
      "epoch": 0.000255926513671875,
      "model_forward_time": 0.11494231224060059,
      "step": 41931
    },
    {
      "epoch": 0.000255926513671875,
      "step": 41931,
      "training_step_time": 0.4206383228302002
    },
    {
      "epoch": 0.0002559326171875,
      "model_forward_time": 0.11562681198120117,
      "step": 41932
    },
    {
      "epoch": 0.0002559326171875,
      "step": 41932,
      "training_step_time": 0.3880476951599121
    },
    {
      "epoch": 0.000255938720703125,
      "model_forward_time": 0.11559391021728516,
      "step": 41933
    },
    {
      "epoch": 0.000255938720703125,
      "step": 41933,
      "training_step_time": 0.4470953941345215
    },
    {
      "epoch": 0.00025594482421875,
      "model_forward_time": 0.11522054672241211,
      "step": 41934
    },
    {
      "epoch": 0.00025594482421875,
      "step": 41934,
      "training_step_time": 0.4386916160583496
    },
    {
      "epoch": 0.000255950927734375,
      "model_forward_time": 0.11584281921386719,
      "step": 41935
    },
    {
      "epoch": 0.000255950927734375,
      "step": 41935,
      "training_step_time": 0.39624500274658203
    },
    {
      "epoch": 0.00025595703125,
      "model_forward_time": 0.11529016494750977,
      "step": 41936
    },
    {
      "epoch": 0.00025595703125,
      "step": 41936,
      "training_step_time": 0.3955841064453125
    },
    {
      "epoch": 0.000255963134765625,
      "model_forward_time": 0.11516857147216797,
      "step": 41937
    },
    {
      "epoch": 0.000255963134765625,
      "step": 41937,
      "training_step_time": 0.4030468463897705
    },
    {
      "epoch": 0.00025596923828125,
      "model_forward_time": 0.11580085754394531,
      "step": 41938
    },
    {
      "epoch": 0.00025596923828125,
      "step": 41938,
      "training_step_time": 0.387768030166626
    },
    {
      "epoch": 0.000255975341796875,
      "model_forward_time": 0.11480021476745605,
      "step": 41939
    },
    {
      "epoch": 0.000255975341796875,
      "step": 41939,
      "training_step_time": 0.3964266777038574
    },
    {
      "epoch": 0.0002559814453125,
      "grad_norm": 0.13113699853420258,
      "learning_rate": 2.279116432543705e-05,
      "loss": 0.0366,
      "step": 41940
    },
    {
      "epoch": 0.0002559814453125,
      "model_forward_time": 0.1151731014251709,
      "step": 41940
    },
    {
      "epoch": 0.0002559814453125,
      "step": 41940,
      "training_step_time": 0.4042692184448242
    },
    {
      "epoch": 0.000255987548828125,
      "model_forward_time": 0.11512637138366699,
      "step": 41941
    },
    {
      "epoch": 0.000255987548828125,
      "step": 41941,
      "training_step_time": 0.4006636142730713
    },
    {
      "epoch": 0.00025599365234375,
      "model_forward_time": 0.11529755592346191,
      "step": 41942
    },
    {
      "epoch": 0.00025599365234375,
      "step": 41942,
      "training_step_time": 0.40447211265563965
    },
    {
      "epoch": 0.000255999755859375,
      "model_forward_time": 0.11629581451416016,
      "step": 41943
    },
    {
      "epoch": 0.000255999755859375,
      "step": 41943,
      "training_step_time": 0.44126152992248535
    },
    {
      "epoch": 0.000256005859375,
      "model_forward_time": 0.11446976661682129,
      "step": 41944
    },
    {
      "epoch": 0.000256005859375,
      "step": 41944,
      "training_step_time": 0.3987429141998291
    },
    {
      "epoch": 0.000256011962890625,
      "model_forward_time": 0.11506867408752441,
      "step": 41945
    },
    {
      "epoch": 0.000256011962890625,
      "step": 41945,
      "training_step_time": 0.44202542304992676
    },
    {
      "epoch": 0.00025601806640625,
      "model_forward_time": 0.11518383026123047,
      "step": 41946
    },
    {
      "epoch": 0.00025601806640625,
      "step": 41946,
      "training_step_time": 0.5077338218688965
    },
    {
      "epoch": 0.000256024169921875,
      "model_forward_time": 0.11516094207763672,
      "step": 41947
    },
    {
      "epoch": 0.000256024169921875,
      "step": 41947,
      "training_step_time": 0.3932623863220215
    },
    {
      "epoch": 0.0002560302734375,
      "model_forward_time": 0.11495184898376465,
      "step": 41948
    },
    {
      "epoch": 0.0002560302734375,
      "step": 41948,
      "training_step_time": 0.385120153427124
    },
    {
      "epoch": 0.000256036376953125,
      "model_forward_time": 0.11472368240356445,
      "step": 41949
    },
    {
      "epoch": 0.000256036376953125,
      "step": 41949,
      "training_step_time": 0.3867471218109131
    },
    {
      "epoch": 0.00025604248046875,
      "grad_norm": 0.12747834622859955,
      "learning_rate": 2.2768048249248648e-05,
      "loss": 0.0396,
      "step": 41950
    },
    {
      "epoch": 0.00025604248046875,
      "model_forward_time": 0.11569929122924805,
      "step": 41950
    },
    {
      "epoch": 0.00025604248046875,
      "step": 41950,
      "training_step_time": 0.39487433433532715
    },
    {
      "epoch": 0.000256048583984375,
      "model_forward_time": 0.11468338966369629,
      "step": 41951
    },
    {
      "epoch": 0.000256048583984375,
      "step": 41951,
      "training_step_time": 0.4002058506011963
    },
    {
      "epoch": 0.0002560546875,
      "model_forward_time": 0.11516261100769043,
      "step": 41952
    },
    {
      "epoch": 0.0002560546875,
      "step": 41952,
      "training_step_time": 0.39841771125793457
    },
    {
      "epoch": 0.000256060791015625,
      "model_forward_time": 0.11520624160766602,
      "step": 41953
    },
    {
      "epoch": 0.000256060791015625,
      "step": 41953,
      "training_step_time": 0.39510154724121094
    },
    {
      "epoch": 0.00025606689453125,
      "model_forward_time": 0.1157233715057373,
      "step": 41954
    },
    {
      "epoch": 0.00025606689453125,
      "step": 41954,
      "training_step_time": 0.40123581886291504
    },
    {
      "epoch": 0.000256072998046875,
      "model_forward_time": 0.11519908905029297,
      "step": 41955
    },
    {
      "epoch": 0.000256072998046875,
      "step": 41955,
      "training_step_time": 0.3937973976135254
    },
    {
      "epoch": 0.0002560791015625,
      "model_forward_time": 0.11614155769348145,
      "step": 41956
    },
    {
      "epoch": 0.0002560791015625,
      "step": 41956,
      "training_step_time": 0.44201135635375977
    },
    {
      "epoch": 0.000256085205078125,
      "model_forward_time": 0.11565661430358887,
      "step": 41957
    },
    {
      "epoch": 0.000256085205078125,
      "step": 41957,
      "training_step_time": 0.40972208976745605
    },
    {
      "epoch": 0.00025609130859375,
      "model_forward_time": 0.1156153678894043,
      "step": 41958
    },
    {
      "epoch": 0.00025609130859375,
      "step": 41958,
      "training_step_time": 0.45359277725219727
    },
    {
      "epoch": 0.000256097412109375,
      "model_forward_time": 0.11554265022277832,
      "step": 41959
    },
    {
      "epoch": 0.000256097412109375,
      "step": 41959,
      "training_step_time": 0.47638821601867676
    },
    {
      "epoch": 0.000256103515625,
      "grad_norm": 0.09379544109106064,
      "learning_rate": 2.2744940445408202e-05,
      "loss": 0.0382,
      "step": 41960
    },
    {
      "epoch": 0.000256103515625,
      "model_forward_time": 0.1149148941040039,
      "step": 41960
    },
    {
      "epoch": 0.000256103515625,
      "step": 41960,
      "training_step_time": 0.4190492630004883
    },
    {
      "epoch": 0.000256109619140625,
      "model_forward_time": 0.11504411697387695,
      "step": 41961
    },
    {
      "epoch": 0.000256109619140625,
      "step": 41961,
      "training_step_time": 0.505699872970581
    },
    {
      "epoch": 0.00025611572265625,
      "model_forward_time": 0.11510562896728516,
      "step": 41962
    },
    {
      "epoch": 0.00025611572265625,
      "step": 41962,
      "training_step_time": 0.4599440097808838
    },
    {
      "epoch": 0.000256121826171875,
      "model_forward_time": 0.11489605903625488,
      "step": 41963
    },
    {
      "epoch": 0.000256121826171875,
      "step": 41963,
      "training_step_time": 0.5121939182281494
    },
    {
      "epoch": 0.0002561279296875,
      "model_forward_time": 0.1146860122680664,
      "step": 41964
    },
    {
      "epoch": 0.0002561279296875,
      "step": 41964,
      "training_step_time": 0.39615416526794434
    },
    {
      "epoch": 0.000256134033203125,
      "model_forward_time": 0.11487221717834473,
      "step": 41965
    },
    {
      "epoch": 0.000256134033203125,
      "step": 41965,
      "training_step_time": 0.39257216453552246
    },
    {
      "epoch": 0.00025614013671875,
      "model_forward_time": 0.11458611488342285,
      "step": 41966
    },
    {
      "epoch": 0.00025614013671875,
      "step": 41966,
      "training_step_time": 0.3881709575653076
    },
    {
      "epoch": 0.000256146240234375,
      "model_forward_time": 0.1152186393737793,
      "step": 41967
    },
    {
      "epoch": 0.000256146240234375,
      "step": 41967,
      "training_step_time": 0.4430725574493408
    },
    {
      "epoch": 0.00025615234375,
      "model_forward_time": 0.11509466171264648,
      "step": 41968
    },
    {
      "epoch": 0.00025615234375,
      "step": 41968,
      "training_step_time": 0.3961925506591797
    },
    {
      "epoch": 0.000256158447265625,
      "model_forward_time": 0.11562538146972656,
      "step": 41969
    },
    {
      "epoch": 0.000256158447265625,
      "step": 41969,
      "training_step_time": 0.41309285163879395
    },
    {
      "epoch": 0.00025616455078125,
      "grad_norm": 0.08710911870002747,
      "learning_rate": 2.2721840920935196e-05,
      "loss": 0.0367,
      "step": 41970
    },
    {
      "epoch": 0.00025616455078125,
      "model_forward_time": 0.11506772041320801,
      "step": 41970
    },
    {
      "epoch": 0.00025616455078125,
      "step": 41970,
      "training_step_time": 0.39577627182006836
    },
    {
      "epoch": 0.000256170654296875,
      "model_forward_time": 0.11579561233520508,
      "step": 41971
    },
    {
      "epoch": 0.000256170654296875,
      "step": 41971,
      "training_step_time": 0.41047096252441406
    },
    {
      "epoch": 0.0002561767578125,
      "model_forward_time": 0.11478424072265625,
      "step": 41972
    },
    {
      "epoch": 0.0002561767578125,
      "step": 41972,
      "training_step_time": 0.41346144676208496
    },
    {
      "epoch": 0.000256182861328125,
      "model_forward_time": 0.11486530303955078,
      "step": 41973
    },
    {
      "epoch": 0.000256182861328125,
      "step": 41973,
      "training_step_time": 0.4937417507171631
    },
    {
      "epoch": 0.00025618896484375,
      "model_forward_time": 0.11495280265808105,
      "step": 41974
    },
    {
      "epoch": 0.00025618896484375,
      "step": 41974,
      "training_step_time": 0.4541285037994385
    },
    {
      "epoch": 0.000256195068359375,
      "model_forward_time": 0.11467456817626953,
      "step": 41975
    },
    {
      "epoch": 0.000256195068359375,
      "step": 41975,
      "training_step_time": 0.5106592178344727
    },
    {
      "epoch": 0.000256201171875,
      "model_forward_time": 0.11481428146362305,
      "step": 41976
    },
    {
      "epoch": 0.000256201171875,
      "step": 41976,
      "training_step_time": 0.4182300567626953
    },
    {
      "epoch": 0.000256207275390625,
      "model_forward_time": 0.11371064186096191,
      "step": 41977
    },
    {
      "epoch": 0.000256207275390625,
      "step": 41977,
      "training_step_time": 0.49030375480651855
    },
    {
      "epoch": 0.00025621337890625,
      "model_forward_time": 0.11455535888671875,
      "step": 41978
    },
    {
      "epoch": 0.00025621337890625,
      "step": 41978,
      "training_step_time": 0.40694689750671387
    },
    {
      "epoch": 0.000256219482421875,
      "model_forward_time": 0.11427927017211914,
      "step": 41979
    },
    {
      "epoch": 0.000256219482421875,
      "step": 41979,
      "training_step_time": 0.3947110176086426
    },
    {
      "epoch": 0.0002562255859375,
      "grad_norm": 0.11345867812633514,
      "learning_rate": 2.2698749682846687e-05,
      "loss": 0.0364,
      "step": 41980
    },
    {
      "epoch": 0.0002562255859375,
      "model_forward_time": 0.11520242691040039,
      "step": 41980
    },
    {
      "epoch": 0.0002562255859375,
      "step": 41980,
      "training_step_time": 0.3898630142211914
    },
    {
      "epoch": 0.000256231689453125,
      "model_forward_time": 0.11416888236999512,
      "step": 41981
    },
    {
      "epoch": 0.000256231689453125,
      "step": 41981,
      "training_step_time": 0.40099501609802246
    },
    {
      "epoch": 0.00025623779296875,
      "model_forward_time": 0.11514115333557129,
      "step": 41982
    },
    {
      "epoch": 0.00025623779296875,
      "step": 41982,
      "training_step_time": 0.4078254699707031
    },
    {
      "epoch": 0.000256243896484375,
      "model_forward_time": 0.11576390266418457,
      "step": 41983
    },
    {
      "epoch": 0.000256243896484375,
      "step": 41983,
      "training_step_time": 0.39531779289245605
    },
    {
      "epoch": 0.00025625,
      "model_forward_time": 0.1152031421661377,
      "step": 41984
    },
    {
      "epoch": 0.00025625,
      "step": 41984,
      "training_step_time": 0.42804908752441406
    },
    {
      "epoch": 0.000256256103515625,
      "model_forward_time": 0.11531424522399902,
      "step": 41985
    },
    {
      "epoch": 0.000256256103515625,
      "step": 41985,
      "training_step_time": 0.48797178268432617
    },
    {
      "epoch": 0.00025626220703125,
      "model_forward_time": 0.11506915092468262,
      "step": 41986
    },
    {
      "epoch": 0.00025626220703125,
      "step": 41986,
      "training_step_time": 0.37650632858276367
    },
    {
      "epoch": 0.000256268310546875,
      "model_forward_time": 0.11458373069763184,
      "step": 41987
    },
    {
      "epoch": 0.000256268310546875,
      "step": 41987,
      "training_step_time": 0.4462087154388428
    },
    {
      "epoch": 0.0002562744140625,
      "model_forward_time": 0.11482977867126465,
      "step": 41988
    },
    {
      "epoch": 0.0002562744140625,
      "step": 41988,
      "training_step_time": 0.44898056983947754
    },
    {
      "epoch": 0.000256280517578125,
      "model_forward_time": 0.11441540718078613,
      "step": 41989
    },
    {
      "epoch": 0.000256280517578125,
      "step": 41989,
      "training_step_time": 0.4983634948730469
    },
    {
      "epoch": 0.00025628662109375,
      "grad_norm": 0.10075235366821289,
      "learning_rate": 2.2675666738157186e-05,
      "loss": 0.0365,
      "step": 41990
    },
    {
      "epoch": 0.00025628662109375,
      "model_forward_time": 0.11460208892822266,
      "step": 41990
    },
    {
      "epoch": 0.00025628662109375,
      "step": 41990,
      "training_step_time": 0.4073054790496826
    },
    {
      "epoch": 0.000256292724609375,
      "model_forward_time": 0.11544561386108398,
      "step": 41991
    },
    {
      "epoch": 0.000256292724609375,
      "step": 41991,
      "training_step_time": 0.4735708236694336
    },
    {
      "epoch": 0.000256298828125,
      "model_forward_time": 0.1145484447479248,
      "step": 41992
    },
    {
      "epoch": 0.000256298828125,
      "step": 41992,
      "training_step_time": 0.3893599510192871
    },
    {
      "epoch": 0.000256304931640625,
      "model_forward_time": 0.11426973342895508,
      "step": 41993
    },
    {
      "epoch": 0.000256304931640625,
      "step": 41993,
      "training_step_time": 0.38707780838012695
    },
    {
      "epoch": 0.00025631103515625,
      "model_forward_time": 0.11475968360900879,
      "step": 41994
    },
    {
      "epoch": 0.00025631103515625,
      "step": 41994,
      "training_step_time": 0.3986539840698242
    },
    {
      "epoch": 0.000256317138671875,
      "model_forward_time": 0.11504912376403809,
      "step": 41995
    },
    {
      "epoch": 0.000256317138671875,
      "step": 41995,
      "training_step_time": 0.4196476936340332
    },
    {
      "epoch": 0.0002563232421875,
      "model_forward_time": 0.1152496337890625,
      "step": 41996
    },
    {
      "epoch": 0.0002563232421875,
      "step": 41996,
      "training_step_time": 0.4002842903137207
    },
    {
      "epoch": 0.000256329345703125,
      "model_forward_time": 0.11506056785583496,
      "step": 41997
    },
    {
      "epoch": 0.000256329345703125,
      "step": 41997,
      "training_step_time": 0.3957860469818115
    },
    {
      "epoch": 0.00025633544921875,
      "model_forward_time": 0.11561274528503418,
      "step": 41998
    },
    {
      "epoch": 0.00025633544921875,
      "step": 41998,
      "training_step_time": 0.3968770503997803
    },
    {
      "epoch": 0.000256341552734375,
      "model_forward_time": 0.11534452438354492,
      "step": 41999
    },
    {
      "epoch": 0.000256341552734375,
      "step": 41999,
      "training_step_time": 0.42734193801879883
    },
    {
      "epoch": 0.00025634765625,
      "grad_norm": 0.09084592759609222,
      "learning_rate": 2.2652592093878666e-05,
      "loss": 0.0351,
      "step": 42000
    },
    {
      "epoch": 0.00025634765625,
      "model_forward_time": 0.11307597160339355,
      "step": 42000
    },
    {
      "epoch": 0.00025634765625,
      "step": 42000,
      "training_step_time": 0.35524535179138184
    },
    {
      "epoch": 0.000256353759765625,
      "model_forward_time": 0.11330199241638184,
      "step": 42001
    },
    {
      "epoch": 0.000256353759765625,
      "step": 42001,
      "training_step_time": 0.39395833015441895
    },
    {
      "epoch": 0.00025635986328125,
      "model_forward_time": 0.11342358589172363,
      "step": 42002
    },
    {
      "epoch": 0.00025635986328125,
      "step": 42002,
      "training_step_time": 0.4182770252227783
    },
    {
      "epoch": 0.000256365966796875,
      "model_forward_time": 0.11372613906860352,
      "step": 42003
    },
    {
      "epoch": 0.000256365966796875,
      "step": 42003,
      "training_step_time": 0.40975499153137207
    },
    {
      "epoch": 0.0002563720703125,
      "model_forward_time": 0.11420965194702148,
      "step": 42004
    },
    {
      "epoch": 0.0002563720703125,
      "step": 42004,
      "training_step_time": 0.4217653274536133
    },
    {
      "epoch": 0.000256378173828125,
      "model_forward_time": 0.11422395706176758,
      "step": 42005
    },
    {
      "epoch": 0.000256378173828125,
      "step": 42005,
      "training_step_time": 0.3873116970062256
    },
    {
      "epoch": 0.00025638427734375,
      "model_forward_time": 0.11390185356140137,
      "step": 42006
    },
    {
      "epoch": 0.00025638427734375,
      "step": 42006,
      "training_step_time": 0.39664292335510254
    },
    {
      "epoch": 0.000256390380859375,
      "model_forward_time": 0.11479830741882324,
      "step": 42007
    },
    {
      "epoch": 0.000256390380859375,
      "step": 42007,
      "training_step_time": 0.389817476272583
    },
    {
      "epoch": 0.000256396484375,
      "model_forward_time": 0.11468219757080078,
      "step": 42008
    },
    {
      "epoch": 0.000256396484375,
      "step": 42008,
      "training_step_time": 0.39615631103515625
    },
    {
      "epoch": 0.000256402587890625,
      "model_forward_time": 0.1150808334350586,
      "step": 42009
    },
    {
      "epoch": 0.000256402587890625,
      "step": 42009,
      "training_step_time": 0.4024777412414551
    },
    {
      "epoch": 0.00025640869140625,
      "grad_norm": 0.18625476956367493,
      "learning_rate": 2.26295257570206e-05,
      "loss": 0.0412,
      "step": 42010
    },
    {
      "epoch": 0.00025640869140625,
      "model_forward_time": 0.11489725112915039,
      "step": 42010
    },
    {
      "epoch": 0.00025640869140625,
      "step": 42010,
      "training_step_time": 0.40529966354370117
    },
    {
      "epoch": 0.000256414794921875,
      "model_forward_time": 0.11544609069824219,
      "step": 42011
    },
    {
      "epoch": 0.000256414794921875,
      "step": 42011,
      "training_step_time": 0.40935730934143066
    },
    {
      "epoch": 0.0002564208984375,
      "model_forward_time": 0.11542367935180664,
      "step": 42012
    },
    {
      "epoch": 0.0002564208984375,
      "step": 42012,
      "training_step_time": 0.4022798538208008
    },
    {
      "epoch": 0.000256427001953125,
      "model_forward_time": 0.11464405059814453,
      "step": 42013
    },
    {
      "epoch": 0.000256427001953125,
      "step": 42013,
      "training_step_time": 0.4091477394104004
    },
    {
      "epoch": 0.00025643310546875,
      "model_forward_time": 0.11513018608093262,
      "step": 42014
    },
    {
      "epoch": 0.00025643310546875,
      "step": 42014,
      "training_step_time": 0.4249124526977539
    },
    {
      "epoch": 0.000256439208984375,
      "model_forward_time": 0.1150517463684082,
      "step": 42015
    },
    {
      "epoch": 0.000256439208984375,
      "step": 42015,
      "training_step_time": 0.474367618560791
    },
    {
      "epoch": 0.0002564453125,
      "model_forward_time": 0.11584711074829102,
      "step": 42016
    },
    {
      "epoch": 0.0002564453125,
      "step": 42016,
      "training_step_time": 0.3790457248687744
    },
    {
      "epoch": 0.000256451416015625,
      "model_forward_time": 0.11536073684692383,
      "step": 42017
    },
    {
      "epoch": 0.000256451416015625,
      "step": 42017,
      "training_step_time": 0.44489336013793945
    },
    {
      "epoch": 0.00025645751953125,
      "model_forward_time": 0.11523175239562988,
      "step": 42018
    },
    {
      "epoch": 0.00025645751953125,
      "step": 42018,
      "training_step_time": 0.485013484954834
    },
    {
      "epoch": 0.000256463623046875,
      "model_forward_time": 0.11499214172363281,
      "step": 42019
    },
    {
      "epoch": 0.000256463623046875,
      "step": 42019,
      "training_step_time": 0.43392133712768555
    },
    {
      "epoch": 0.0002564697265625,
      "grad_norm": 0.0861777812242508,
      "learning_rate": 2.2606467734589924e-05,
      "loss": 0.0402,
      "step": 42020
    },
    {
      "epoch": 0.0002564697265625,
      "model_forward_time": 0.11511039733886719,
      "step": 42020
    },
    {
      "epoch": 0.0002564697265625,
      "step": 42020,
      "training_step_time": 0.4762609004974365
    },
    {
      "epoch": 0.000256475830078125,
      "model_forward_time": 0.11538386344909668,
      "step": 42021
    },
    {
      "epoch": 0.000256475830078125,
      "step": 42021,
      "training_step_time": 0.3977503776550293
    },
    {
      "epoch": 0.00025648193359375,
      "model_forward_time": 0.11581110954284668,
      "step": 42022
    },
    {
      "epoch": 0.00025648193359375,
      "step": 42022,
      "training_step_time": 0.3995325565338135
    },
    {
      "epoch": 0.000256488037109375,
      "model_forward_time": 0.11518168449401855,
      "step": 42023
    },
    {
      "epoch": 0.000256488037109375,
      "step": 42023,
      "training_step_time": 0.3911435604095459
    },
    {
      "epoch": 0.000256494140625,
      "model_forward_time": 0.11509156227111816,
      "step": 42024
    },
    {
      "epoch": 0.000256494140625,
      "step": 42024,
      "training_step_time": 0.4000711441040039
    },
    {
      "epoch": 0.000256500244140625,
      "model_forward_time": 0.1154322624206543,
      "step": 42025
    },
    {
      "epoch": 0.000256500244140625,
      "step": 42025,
      "training_step_time": 0.393963098526001
    },
    {
      "epoch": 0.00025650634765625,
      "model_forward_time": 0.1153268814086914,
      "step": 42026
    },
    {
      "epoch": 0.00025650634765625,
      "step": 42026,
      "training_step_time": 0.3959534168243408
    },
    {
      "epoch": 0.000256512451171875,
      "model_forward_time": 0.11536812782287598,
      "step": 42027
    },
    {
      "epoch": 0.000256512451171875,
      "step": 42027,
      "training_step_time": 0.4279463291168213
    },
    {
      "epoch": 0.0002565185546875,
      "model_forward_time": 0.1148524284362793,
      "step": 42028
    },
    {
      "epoch": 0.0002565185546875,
      "step": 42028,
      "training_step_time": 0.4319722652435303
    },
    {
      "epoch": 0.000256524658203125,
      "model_forward_time": 0.11478877067565918,
      "step": 42029
    },
    {
      "epoch": 0.000256524658203125,
      "step": 42029,
      "training_step_time": 0.43866729736328125
    },
    {
      "epoch": 0.00025653076171875,
      "grad_norm": 0.10330255329608917,
      "learning_rate": 2.258341803359108e-05,
      "loss": 0.0375,
      "step": 42030
    },
    {
      "epoch": 0.00025653076171875,
      "model_forward_time": 0.11477208137512207,
      "step": 42030
    },
    {
      "epoch": 0.00025653076171875,
      "step": 42030,
      "training_step_time": 0.4224512577056885
    },
    {
      "epoch": 0.000256536865234375,
      "model_forward_time": 0.11447024345397949,
      "step": 42031
    },
    {
      "epoch": 0.000256536865234375,
      "step": 42031,
      "training_step_time": 0.5128092765808105
    },
    {
      "epoch": 0.00025654296875,
      "model_forward_time": 0.11488151550292969,
      "step": 42032
    },
    {
      "epoch": 0.00025654296875,
      "step": 42032,
      "training_step_time": 0.4959893226623535
    },
    {
      "epoch": 0.000256549072265625,
      "model_forward_time": 0.11464047431945801,
      "step": 42033
    },
    {
      "epoch": 0.000256549072265625,
      "step": 42033,
      "training_step_time": 0.45150256156921387
    },
    {
      "epoch": 0.00025655517578125,
      "model_forward_time": 0.11538100242614746,
      "step": 42034
    },
    {
      "epoch": 0.00025655517578125,
      "step": 42034,
      "training_step_time": 0.48885345458984375
    },
    {
      "epoch": 0.000256561279296875,
      "model_forward_time": 0.11451125144958496,
      "step": 42035
    },
    {
      "epoch": 0.000256561279296875,
      "step": 42035,
      "training_step_time": 0.3949739933013916
    },
    {
      "epoch": 0.0002565673828125,
      "model_forward_time": 0.1142587661743164,
      "step": 42036
    },
    {
      "epoch": 0.0002565673828125,
      "step": 42036,
      "training_step_time": 0.3911263942718506
    },
    {
      "epoch": 0.000256573486328125,
      "model_forward_time": 0.11469006538391113,
      "step": 42037
    },
    {
      "epoch": 0.000256573486328125,
      "step": 42037,
      "training_step_time": 0.39710569381713867
    },
    {
      "epoch": 0.00025657958984375,
      "model_forward_time": 0.1152641773223877,
      "step": 42038
    },
    {
      "epoch": 0.00025657958984375,
      "step": 42038,
      "training_step_time": 0.39365100860595703
    },
    {
      "epoch": 0.000256585693359375,
      "model_forward_time": 0.11541128158569336,
      "step": 42039
    },
    {
      "epoch": 0.000256585693359375,
      "step": 42039,
      "training_step_time": 0.3992171287536621
    },
    {
      "epoch": 0.000256591796875,
      "grad_norm": 0.12878581881523132,
      "learning_rate": 2.2560376661025945e-05,
      "loss": 0.0412,
      "step": 42040
    },
    {
      "epoch": 0.000256591796875,
      "model_forward_time": 0.11509346961975098,
      "step": 42040
    },
    {
      "epoch": 0.000256591796875,
      "step": 42040,
      "training_step_time": 0.4000728130340576
    },
    {
      "epoch": 0.000256597900390625,
      "model_forward_time": 0.11559772491455078,
      "step": 42041
    },
    {
      "epoch": 0.000256597900390625,
      "step": 42041,
      "training_step_time": 0.3975982666015625
    },
    {
      "epoch": 0.00025660400390625,
      "model_forward_time": 0.11506104469299316,
      "step": 42042
    },
    {
      "epoch": 0.00025660400390625,
      "step": 42042,
      "training_step_time": 0.4089977741241455
    },
    {
      "epoch": 0.000256610107421875,
      "model_forward_time": 0.11510276794433594,
      "step": 42043
    },
    {
      "epoch": 0.000256610107421875,
      "step": 42043,
      "training_step_time": 0.39293718338012695
    },
    {
      "epoch": 0.0002566162109375,
      "model_forward_time": 0.11513566970825195,
      "step": 42044
    },
    {
      "epoch": 0.0002566162109375,
      "step": 42044,
      "training_step_time": 0.4427967071533203
    },
    {
      "epoch": 0.000256622314453125,
      "model_forward_time": 0.11529898643493652,
      "step": 42045
    },
    {
      "epoch": 0.000256622314453125,
      "step": 42045,
      "training_step_time": 0.37018775939941406
    },
    {
      "epoch": 0.00025662841796875,
      "model_forward_time": 0.11507058143615723,
      "step": 42046
    },
    {
      "epoch": 0.00025662841796875,
      "step": 42046,
      "training_step_time": 0.4490017890930176
    },
    {
      "epoch": 0.000256634521484375,
      "model_forward_time": 0.11523199081420898,
      "step": 42047
    },
    {
      "epoch": 0.000256634521484375,
      "step": 42047,
      "training_step_time": 0.4848794937133789
    },
    {
      "epoch": 0.000256640625,
      "model_forward_time": 0.11472392082214355,
      "step": 42048
    },
    {
      "epoch": 0.000256640625,
      "step": 42048,
      "training_step_time": 0.4084906578063965
    },
    {
      "epoch": 0.000256646728515625,
      "model_forward_time": 0.1151583194732666,
      "step": 42049
    },
    {
      "epoch": 0.000256646728515625,
      "step": 42049,
      "training_step_time": 0.47965216636657715
    },
    {
      "epoch": 0.00025665283203125,
      "grad_norm": 0.12258205562829971,
      "learning_rate": 2.2537343623893874e-05,
      "loss": 0.0357,
      "step": 42050
    },
    {
      "epoch": 0.00025665283203125,
      "model_forward_time": 0.1147613525390625,
      "step": 42050
    },
    {
      "epoch": 0.00025665283203125,
      "step": 42050,
      "training_step_time": 0.4015529155731201
    },
    {
      "epoch": 0.000256658935546875,
      "model_forward_time": 0.11414480209350586,
      "step": 42051
    },
    {
      "epoch": 0.000256658935546875,
      "step": 42051,
      "training_step_time": 0.3965437412261963
    },
    {
      "epoch": 0.0002566650390625,
      "model_forward_time": 0.11539292335510254,
      "step": 42052
    },
    {
      "epoch": 0.0002566650390625,
      "step": 42052,
      "training_step_time": 0.3884925842285156
    },
    {
      "epoch": 0.000256671142578125,
      "model_forward_time": 0.11447668075561523,
      "step": 42053
    },
    {
      "epoch": 0.000256671142578125,
      "step": 42053,
      "training_step_time": 0.39679455757141113
    },
    {
      "epoch": 0.00025667724609375,
      "model_forward_time": 0.11494660377502441,
      "step": 42054
    },
    {
      "epoch": 0.00025667724609375,
      "step": 42054,
      "training_step_time": 0.4063558578491211
    },
    {
      "epoch": 0.000256683349609375,
      "model_forward_time": 0.11499762535095215,
      "step": 42055
    },
    {
      "epoch": 0.000256683349609375,
      "step": 42055,
      "training_step_time": 0.4327867031097412
    },
    {
      "epoch": 0.000256689453125,
      "model_forward_time": 0.11485886573791504,
      "step": 42056
    },
    {
      "epoch": 0.000256689453125,
      "step": 42056,
      "training_step_time": 0.4545145034790039
    },
    {
      "epoch": 0.000256695556640625,
      "model_forward_time": 0.11506366729736328,
      "step": 42057
    },
    {
      "epoch": 0.000256695556640625,
      "step": 42057,
      "training_step_time": 0.3916466236114502
    },
    {
      "epoch": 0.00025670166015625,
      "model_forward_time": 0.11617350578308105,
      "step": 42058
    },
    {
      "epoch": 0.00025670166015625,
      "step": 42058,
      "training_step_time": 0.40565013885498047
    },
    {
      "epoch": 0.000256707763671875,
      "model_forward_time": 0.11498522758483887,
      "step": 42059
    },
    {
      "epoch": 0.000256707763671875,
      "step": 42059,
      "training_step_time": 0.46359872817993164
    },
    {
      "epoch": 0.0002567138671875,
      "grad_norm": 0.10713482648134232,
      "learning_rate": 2.251431892919171e-05,
      "loss": 0.0348,
      "step": 42060
    },
    {
      "epoch": 0.0002567138671875,
      "model_forward_time": 0.11536169052124023,
      "step": 42060
    },
    {
      "epoch": 0.0002567138671875,
      "step": 42060,
      "training_step_time": 0.4792792797088623
    },
    {
      "epoch": 0.000256719970703125,
      "model_forward_time": 0.11583614349365234,
      "step": 42061
    },
    {
      "epoch": 0.000256719970703125,
      "step": 42061,
      "training_step_time": 0.49670886993408203
    },
    {
      "epoch": 0.00025672607421875,
      "model_forward_time": 0.1145012378692627,
      "step": 42062
    },
    {
      "epoch": 0.00025672607421875,
      "step": 42062,
      "training_step_time": 0.42493462562561035
    },
    {
      "epoch": 0.000256732177734375,
      "model_forward_time": 0.11481833457946777,
      "step": 42063
    },
    {
      "epoch": 0.000256732177734375,
      "step": 42063,
      "training_step_time": 0.498903751373291
    },
    {
      "epoch": 0.00025673828125,
      "model_forward_time": 0.11474061012268066,
      "step": 42064
    },
    {
      "epoch": 0.00025673828125,
      "step": 42064,
      "training_step_time": 0.3951692581176758
    },
    {
      "epoch": 0.000256744384765625,
      "model_forward_time": 0.1150214672088623,
      "step": 42065
    },
    {
      "epoch": 0.000256744384765625,
      "step": 42065,
      "training_step_time": 0.38992953300476074
    },
    {
      "epoch": 0.00025675048828125,
      "model_forward_time": 0.1150968074798584,
      "step": 42066
    },
    {
      "epoch": 0.00025675048828125,
      "step": 42066,
      "training_step_time": 0.39658164978027344
    },
    {
      "epoch": 0.000256756591796875,
      "model_forward_time": 0.11517548561096191,
      "step": 42067
    },
    {
      "epoch": 0.000256756591796875,
      "step": 42067,
      "training_step_time": 0.39253783226013184
    },
    {
      "epoch": 0.0002567626953125,
      "model_forward_time": 0.11486625671386719,
      "step": 42068
    },
    {
      "epoch": 0.0002567626953125,
      "step": 42068,
      "training_step_time": 0.4346354007720947
    },
    {
      "epoch": 0.000256768798828125,
      "model_forward_time": 0.11419320106506348,
      "step": 42069
    },
    {
      "epoch": 0.000256768798828125,
      "step": 42069,
      "training_step_time": 0.3941512107849121
    },
    {
      "epoch": 0.00025677490234375,
      "grad_norm": 0.10794701427221298,
      "learning_rate": 2.249130258391373e-05,
      "loss": 0.0418,
      "step": 42070
    },
    {
      "epoch": 0.00025677490234375,
      "model_forward_time": 0.11546468734741211,
      "step": 42070
    },
    {
      "epoch": 0.00025677490234375,
      "step": 42070,
      "training_step_time": 0.39227867126464844
    },
    {
      "epoch": 0.000256781005859375,
      "model_forward_time": 0.11509084701538086,
      "step": 42071
    },
    {
      "epoch": 0.000256781005859375,
      "step": 42071,
      "training_step_time": 0.40450525283813477
    },
    {
      "epoch": 0.000256787109375,
      "model_forward_time": 0.11549663543701172,
      "step": 42072
    },
    {
      "epoch": 0.000256787109375,
      "step": 42072,
      "training_step_time": 0.3961617946624756
    },
    {
      "epoch": 0.000256793212890625,
      "model_forward_time": 0.11462211608886719,
      "step": 42073
    },
    {
      "epoch": 0.000256793212890625,
      "step": 42073,
      "training_step_time": 0.4252643585205078
    },
    {
      "epoch": 0.00025679931640625,
      "model_forward_time": 0.11456608772277832,
      "step": 42074
    },
    {
      "epoch": 0.00025679931640625,
      "step": 42074,
      "training_step_time": 0.3663918972015381
    },
    {
      "epoch": 0.000256805419921875,
      "model_forward_time": 0.11475706100463867,
      "step": 42075
    },
    {
      "epoch": 0.000256805419921875,
      "step": 42075,
      "training_step_time": 0.4550485610961914
    },
    {
      "epoch": 0.0002568115234375,
      "model_forward_time": 0.11537289619445801,
      "step": 42076
    },
    {
      "epoch": 0.0002568115234375,
      "step": 42076,
      "training_step_time": 0.45060133934020996
    },
    {
      "epoch": 0.000256817626953125,
      "model_forward_time": 0.11564493179321289,
      "step": 42077
    },
    {
      "epoch": 0.000256817626953125,
      "step": 42077,
      "training_step_time": 0.38308143615722656
    },
    {
      "epoch": 0.00025682373046875,
      "model_forward_time": 0.1153104305267334,
      "step": 42078
    },
    {
      "epoch": 0.00025682373046875,
      "step": 42078,
      "training_step_time": 0.4049246311187744
    },
    {
      "epoch": 0.000256829833984375,
      "model_forward_time": 0.11450028419494629,
      "step": 42079
    },
    {
      "epoch": 0.000256829833984375,
      "step": 42079,
      "training_step_time": 0.38139891624450684
    },
    {
      "epoch": 0.0002568359375,
      "grad_norm": 0.09173199534416199,
      "learning_rate": 2.2468294595051687e-05,
      "loss": 0.0349,
      "step": 42080
    },
    {
      "epoch": 0.0002568359375,
      "model_forward_time": 0.11508584022521973,
      "step": 42080
    },
    {
      "epoch": 0.0002568359375,
      "step": 42080,
      "training_step_time": 0.38348889350891113
    },
    {
      "epoch": 0.000256842041015625,
      "model_forward_time": 0.11576962471008301,
      "step": 42081
    },
    {
      "epoch": 0.000256842041015625,
      "step": 42081,
      "training_step_time": 0.40136003494262695
    },
    {
      "epoch": 0.00025684814453125,
      "model_forward_time": 0.11527228355407715,
      "step": 42082
    },
    {
      "epoch": 0.00025684814453125,
      "step": 42082,
      "training_step_time": 0.41623687744140625
    },
    {
      "epoch": 0.000256854248046875,
      "model_forward_time": 0.11545658111572266,
      "step": 42083
    },
    {
      "epoch": 0.000256854248046875,
      "step": 42083,
      "training_step_time": 0.414914608001709
    },
    {
      "epoch": 0.0002568603515625,
      "model_forward_time": 0.11586499214172363,
      "step": 42084
    },
    {
      "epoch": 0.0002568603515625,
      "step": 42084,
      "training_step_time": 0.4059779644012451
    },
    {
      "epoch": 0.000256866455078125,
      "model_forward_time": 0.11544346809387207,
      "step": 42085
    },
    {
      "epoch": 0.000256866455078125,
      "step": 42085,
      "training_step_time": 0.39989399909973145
    },
    {
      "epoch": 0.00025687255859375,
      "model_forward_time": 0.11587238311767578,
      "step": 42086
    },
    {
      "epoch": 0.00025687255859375,
      "step": 42086,
      "training_step_time": 0.4181849956512451
    },
    {
      "epoch": 0.000256878662109375,
      "model_forward_time": 0.1149439811706543,
      "step": 42087
    },
    {
      "epoch": 0.000256878662109375,
      "step": 42087,
      "training_step_time": 0.399949312210083
    },
    {
      "epoch": 0.000256884765625,
      "model_forward_time": 0.11690521240234375,
      "step": 42088
    },
    {
      "epoch": 0.000256884765625,
      "step": 42088,
      "training_step_time": 0.43767833709716797
    },
    {
      "epoch": 0.000256890869140625,
      "model_forward_time": 0.11620521545410156,
      "step": 42089
    },
    {
      "epoch": 0.000256890869140625,
      "step": 42089,
      "training_step_time": 0.41259193420410156
    },
    {
      "epoch": 0.00025689697265625,
      "grad_norm": 0.13698910176753998,
      "learning_rate": 2.2445294969594844e-05,
      "loss": 0.0378,
      "step": 42090
    },
    {
      "epoch": 0.00025689697265625,
      "model_forward_time": 0.11484527587890625,
      "step": 42090
    },
    {
      "epoch": 0.00025689697265625,
      "step": 42090,
      "training_step_time": 0.43660926818847656
    },
    {
      "epoch": 0.000256903076171875,
      "model_forward_time": 0.11578798294067383,
      "step": 42091
    },
    {
      "epoch": 0.000256903076171875,
      "step": 42091,
      "training_step_time": 0.46146535873413086
    },
    {
      "epoch": 0.0002569091796875,
      "model_forward_time": 0.11612129211425781,
      "step": 42092
    },
    {
      "epoch": 0.0002569091796875,
      "step": 42092,
      "training_step_time": 0.4012582302093506
    },
    {
      "epoch": 0.000256915283203125,
      "model_forward_time": 0.1152043342590332,
      "step": 42093
    },
    {
      "epoch": 0.000256915283203125,
      "step": 42093,
      "training_step_time": 0.381911039352417
    },
    {
      "epoch": 0.00025692138671875,
      "model_forward_time": 0.11552095413208008,
      "step": 42094
    },
    {
      "epoch": 0.00025692138671875,
      "step": 42094,
      "training_step_time": 0.38676953315734863
    },
    {
      "epoch": 0.000256927490234375,
      "model_forward_time": 0.11465740203857422,
      "step": 42095
    },
    {
      "epoch": 0.000256927490234375,
      "step": 42095,
      "training_step_time": 0.396991491317749
    },
    {
      "epoch": 0.00025693359375,
      "model_forward_time": 0.11538839340209961,
      "step": 42096
    },
    {
      "epoch": 0.00025693359375,
      "step": 42096,
      "training_step_time": 0.4775962829589844
    },
    {
      "epoch": 0.000256939697265625,
      "model_forward_time": 0.11509132385253906,
      "step": 42097
    },
    {
      "epoch": 0.000256939697265625,
      "step": 42097,
      "training_step_time": 0.39583587646484375
    },
    {
      "epoch": 0.00025694580078125,
      "model_forward_time": 0.11494159698486328,
      "step": 42098
    },
    {
      "epoch": 0.00025694580078125,
      "step": 42098,
      "training_step_time": 0.41336703300476074
    },
    {
      "epoch": 0.000256951904296875,
      "model_forward_time": 0.11583399772644043,
      "step": 42099
    },
    {
      "epoch": 0.000256951904296875,
      "step": 42099,
      "training_step_time": 0.3941929340362549
    },
    {
      "epoch": 0.0002569580078125,
      "grad_norm": 0.16011326014995575,
      "learning_rate": 2.242230371452982e-05,
      "loss": 0.037,
      "step": 42100
    },
    {
      "epoch": 0.0002569580078125,
      "model_forward_time": 0.11522412300109863,
      "step": 42100
    },
    {
      "epoch": 0.0002569580078125,
      "step": 42100,
      "training_step_time": 0.3939549922943115
    },
    {
      "epoch": 0.000256964111328125,
      "model_forward_time": 0.11580777168273926,
      "step": 42101
    },
    {
      "epoch": 0.000256964111328125,
      "step": 42101,
      "training_step_time": 0.3989250659942627
    },
    {
      "epoch": 0.00025697021484375,
      "model_forward_time": 0.11503982543945312,
      "step": 42102
    },
    {
      "epoch": 0.00025697021484375,
      "step": 42102,
      "training_step_time": 0.3865926265716553
    },
    {
      "epoch": 0.000256976318359375,
      "model_forward_time": 0.11523270606994629,
      "step": 42103
    },
    {
      "epoch": 0.000256976318359375,
      "step": 42103,
      "training_step_time": 0.41969847679138184
    },
    {
      "epoch": 0.000256982421875,
      "model_forward_time": 0.1153402328491211,
      "step": 42104
    },
    {
      "epoch": 0.000256982421875,
      "step": 42104,
      "training_step_time": 0.43652963638305664
    },
    {
      "epoch": 0.000256988525390625,
      "model_forward_time": 0.11584877967834473,
      "step": 42105
    },
    {
      "epoch": 0.000256988525390625,
      "step": 42105,
      "training_step_time": 0.522650957107544
    },
    {
      "epoch": 0.00025699462890625,
      "model_forward_time": 0.1153414249420166,
      "step": 42106
    },
    {
      "epoch": 0.00025699462890625,
      "step": 42106,
      "training_step_time": 0.4118225574493408
    },
    {
      "epoch": 0.000257000732421875,
      "model_forward_time": 0.11514401435852051,
      "step": 42107
    },
    {
      "epoch": 0.000257000732421875,
      "step": 42107,
      "training_step_time": 0.41758179664611816
    },
    {
      "epoch": 0.0002570068359375,
      "model_forward_time": 0.11530041694641113,
      "step": 42108
    },
    {
      "epoch": 0.0002570068359375,
      "step": 42108,
      "training_step_time": 0.3957216739654541
    },
    {
      "epoch": 0.000257012939453125,
      "model_forward_time": 0.11527585983276367,
      "step": 42109
    },
    {
      "epoch": 0.000257012939453125,
      "step": 42109,
      "training_step_time": 0.3936319351196289
    },
    {
      "epoch": 0.00025701904296875,
      "grad_norm": 0.0970778539776802,
      "learning_rate": 2.239932083684081e-05,
      "loss": 0.0341,
      "step": 42110
    },
    {
      "epoch": 0.00025701904296875,
      "model_forward_time": 0.11483025550842285,
      "step": 42110
    },
    {
      "epoch": 0.00025701904296875,
      "step": 42110,
      "training_step_time": 0.4377467632293701
    },
    {
      "epoch": 0.000257025146484375,
      "model_forward_time": 0.11517596244812012,
      "step": 42111
    },
    {
      "epoch": 0.000257025146484375,
      "step": 42111,
      "training_step_time": 0.4735145568847656
    },
    {
      "epoch": 0.00025703125,
      "model_forward_time": 0.1155252456665039,
      "step": 42112
    },
    {
      "epoch": 0.00025703125,
      "step": 42112,
      "training_step_time": 0.3898298740386963
    },
    {
      "epoch": 0.000257037353515625,
      "model_forward_time": 0.11511850357055664,
      "step": 42113
    },
    {
      "epoch": 0.000257037353515625,
      "step": 42113,
      "training_step_time": 0.40108704566955566
    },
    {
      "epoch": 0.00025704345703125,
      "model_forward_time": 0.11538529396057129,
      "step": 42114
    },
    {
      "epoch": 0.00025704345703125,
      "step": 42114,
      "training_step_time": 0.3852696418762207
    },
    {
      "epoch": 0.000257049560546875,
      "model_forward_time": 0.11477327346801758,
      "step": 42115
    },
    {
      "epoch": 0.000257049560546875,
      "step": 42115,
      "training_step_time": 0.394010066986084
    },
    {
      "epoch": 0.0002570556640625,
      "model_forward_time": 0.11493635177612305,
      "step": 42116
    },
    {
      "epoch": 0.0002570556640625,
      "step": 42116,
      "training_step_time": 0.4034249782562256
    },
    {
      "epoch": 0.000257061767578125,
      "model_forward_time": 0.11560606956481934,
      "step": 42117
    },
    {
      "epoch": 0.000257061767578125,
      "step": 42117,
      "training_step_time": 0.5154016017913818
    },
    {
      "epoch": 0.00025706787109375,
      "model_forward_time": 0.11528587341308594,
      "step": 42118
    },
    {
      "epoch": 0.00025706787109375,
      "step": 42118,
      "training_step_time": 0.38353776931762695
    },
    {
      "epoch": 0.000257073974609375,
      "model_forward_time": 0.11532974243164062,
      "step": 42119
    },
    {
      "epoch": 0.000257073974609375,
      "step": 42119,
      "training_step_time": 0.5227737426757812
    },
    {
      "epoch": 0.000257080078125,
      "grad_norm": 0.12688976526260376,
      "learning_rate": 2.237634634350934e-05,
      "loss": 0.0369,
      "step": 42120
    },
    {
      "epoch": 0.000257080078125,
      "model_forward_time": 0.11445975303649902,
      "step": 42120
    },
    {
      "epoch": 0.000257080078125,
      "step": 42120,
      "training_step_time": 0.44611525535583496
    },
    {
      "epoch": 0.000257086181640625,
      "model_forward_time": 0.11518216133117676,
      "step": 42121
    },
    {
      "epoch": 0.000257086181640625,
      "step": 42121,
      "training_step_time": 0.49298667907714844
    },
    {
      "epoch": 0.00025709228515625,
      "model_forward_time": 0.11478805541992188,
      "step": 42122
    },
    {
      "epoch": 0.00025709228515625,
      "step": 42122,
      "training_step_time": 0.39625000953674316
    },
    {
      "epoch": 0.000257098388671875,
      "model_forward_time": 0.11462736129760742,
      "step": 42123
    },
    {
      "epoch": 0.000257098388671875,
      "step": 42123,
      "training_step_time": 0.4167006015777588
    },
    {
      "epoch": 0.0002571044921875,
      "model_forward_time": 0.11504149436950684,
      "step": 42124
    },
    {
      "epoch": 0.0002571044921875,
      "step": 42124,
      "training_step_time": 0.41671204566955566
    },
    {
      "epoch": 0.000257110595703125,
      "model_forward_time": 0.11561393737792969,
      "step": 42125
    },
    {
      "epoch": 0.000257110595703125,
      "step": 42125,
      "training_step_time": 0.39091968536376953
    },
    {
      "epoch": 0.00025711669921875,
      "model_forward_time": 0.1152498722076416,
      "step": 42126
    },
    {
      "epoch": 0.00025711669921875,
      "step": 42126,
      "training_step_time": 0.40789103507995605
    },
    {
      "epoch": 0.000257122802734375,
      "model_forward_time": 0.11566781997680664,
      "step": 42127
    },
    {
      "epoch": 0.000257122802734375,
      "step": 42127,
      "training_step_time": 0.39821791648864746
    },
    {
      "epoch": 0.00025712890625,
      "model_forward_time": 0.1150062084197998,
      "step": 42128
    },
    {
      "epoch": 0.00025712890625,
      "step": 42128,
      "training_step_time": 0.408128023147583
    },
    {
      "epoch": 0.000257135009765625,
      "model_forward_time": 0.11592602729797363,
      "step": 42129
    },
    {
      "epoch": 0.000257135009765625,
      "step": 42129,
      "training_step_time": 0.40611863136291504
    },
    {
      "epoch": 0.00025714111328125,
      "grad_norm": 0.11145681887865067,
      "learning_rate": 2.2353380241514515e-05,
      "loss": 0.0319,
      "step": 42130
    },
    {
      "epoch": 0.00025714111328125,
      "model_forward_time": 0.11576008796691895,
      "step": 42130
    },
    {
      "epoch": 0.00025714111328125,
      "step": 42130,
      "training_step_time": 0.4041929244995117
    },
    {
      "epoch": 0.000257147216796875,
      "model_forward_time": 0.11594700813293457,
      "step": 42131
    },
    {
      "epoch": 0.000257147216796875,
      "step": 42131,
      "training_step_time": 0.410137414932251
    },
    {
      "epoch": 0.0002571533203125,
      "model_forward_time": 0.11542391777038574,
      "step": 42132
    },
    {
      "epoch": 0.0002571533203125,
      "step": 42132,
      "training_step_time": 0.3975331783294678
    },
    {
      "epoch": 0.000257159423828125,
      "model_forward_time": 0.1153249740600586,
      "step": 42133
    },
    {
      "epoch": 0.000257159423828125,
      "step": 42133,
      "training_step_time": 0.42253828048706055
    },
    {
      "epoch": 0.00025716552734375,
      "model_forward_time": 0.11582350730895996,
      "step": 42134
    },
    {
      "epoch": 0.00025716552734375,
      "step": 42134,
      "training_step_time": 0.48878002166748047
    },
    {
      "epoch": 0.000257171630859375,
      "model_forward_time": 0.11491775512695312,
      "step": 42135
    },
    {
      "epoch": 0.000257171630859375,
      "step": 42135,
      "training_step_time": 0.4687044620513916
    },
    {
      "epoch": 0.000257177734375,
      "model_forward_time": 0.1156010627746582,
      "step": 42136
    },
    {
      "epoch": 0.000257177734375,
      "step": 42136,
      "training_step_time": 0.42542266845703125
    },
    {
      "epoch": 0.000257183837890625,
      "model_forward_time": 0.11533737182617188,
      "step": 42137
    },
    {
      "epoch": 0.000257183837890625,
      "step": 42137,
      "training_step_time": 0.41500306129455566
    },
    {
      "epoch": 0.00025718994140625,
      "model_forward_time": 0.11501884460449219,
      "step": 42138
    },
    {
      "epoch": 0.00025718994140625,
      "step": 42138,
      "training_step_time": 0.4100005626678467
    },
    {
      "epoch": 0.000257196044921875,
      "model_forward_time": 0.11522173881530762,
      "step": 42139
    },
    {
      "epoch": 0.000257196044921875,
      "step": 42139,
      "training_step_time": 0.4064500331878662
    },
    {
      "epoch": 0.0002572021484375,
      "grad_norm": 0.08925213664770126,
      "learning_rate": 2.23304225378328e-05,
      "loss": 0.0334,
      "step": 42140
    },
    {
      "epoch": 0.0002572021484375,
      "model_forward_time": 0.11458206176757812,
      "step": 42140
    },
    {
      "epoch": 0.0002572021484375,
      "step": 42140,
      "training_step_time": 0.39684200286865234
    },
    {
      "epoch": 0.000257208251953125,
      "model_forward_time": 0.11545085906982422,
      "step": 42141
    },
    {
      "epoch": 0.000257208251953125,
      "step": 42141,
      "training_step_time": 0.48145031929016113
    },
    {
      "epoch": 0.00025721435546875,
      "model_forward_time": 0.11621952056884766,
      "step": 42142
    },
    {
      "epoch": 0.00025721435546875,
      "step": 42142,
      "training_step_time": 0.39987707138061523
    },
    {
      "epoch": 0.000257220458984375,
      "model_forward_time": 0.11529803276062012,
      "step": 42143
    },
    {
      "epoch": 0.000257220458984375,
      "step": 42143,
      "training_step_time": 0.39737939834594727
    },
    {
      "epoch": 0.0002572265625,
      "model_forward_time": 0.11451148986816406,
      "step": 42144
    },
    {
      "epoch": 0.0002572265625,
      "step": 42144,
      "training_step_time": 0.41274046897888184
    },
    {
      "epoch": 0.000257232666015625,
      "model_forward_time": 0.11534452438354492,
      "step": 42145
    },
    {
      "epoch": 0.000257232666015625,
      "step": 42145,
      "training_step_time": 0.39449644088745117
    },
    {
      "epoch": 0.00025723876953125,
      "model_forward_time": 0.11484956741333008,
      "step": 42146
    },
    {
      "epoch": 0.00025723876953125,
      "step": 42146,
      "training_step_time": 0.3954188823699951
    },
    {
      "epoch": 0.000257244873046875,
      "model_forward_time": 0.11548209190368652,
      "step": 42147
    },
    {
      "epoch": 0.000257244873046875,
      "step": 42147,
      "training_step_time": 0.48346519470214844
    },
    {
      "epoch": 0.0002572509765625,
      "model_forward_time": 0.11522936820983887,
      "step": 42148
    },
    {
      "epoch": 0.0002572509765625,
      "step": 42148,
      "training_step_time": 0.4547443389892578
    },
    {
      "epoch": 0.000257257080078125,
      "model_forward_time": 0.11583852767944336,
      "step": 42149
    },
    {
      "epoch": 0.000257257080078125,
      "step": 42149,
      "training_step_time": 0.46947240829467773
    },
    {
      "epoch": 0.00025726318359375,
      "grad_norm": 0.09780331701040268,
      "learning_rate": 2.2307473239438154e-05,
      "loss": 0.0401,
      "step": 42150
    },
    {
      "epoch": 0.00025726318359375,
      "model_forward_time": 0.11522555351257324,
      "step": 42150
    },
    {
      "epoch": 0.00025726318359375,
      "step": 42150,
      "training_step_time": 0.46701598167419434
    },
    {
      "epoch": 0.000257269287109375,
      "model_forward_time": 0.11453723907470703,
      "step": 42151
    },
    {
      "epoch": 0.000257269287109375,
      "step": 42151,
      "training_step_time": 0.4110453128814697
    },
    {
      "epoch": 0.000257275390625,
      "model_forward_time": 0.11502361297607422,
      "step": 42152
    },
    {
      "epoch": 0.000257275390625,
      "step": 42152,
      "training_step_time": 0.3961918354034424
    },
    {
      "epoch": 0.000257281494140625,
      "model_forward_time": 0.11443781852722168,
      "step": 42153
    },
    {
      "epoch": 0.000257281494140625,
      "step": 42153,
      "training_step_time": 0.3843519687652588
    },
    {
      "epoch": 0.00025728759765625,
      "model_forward_time": 0.11548805236816406,
      "step": 42154
    },
    {
      "epoch": 0.00025728759765625,
      "step": 42154,
      "training_step_time": 0.38989949226379395
    },
    {
      "epoch": 0.000257293701171875,
      "model_forward_time": 0.1165475845336914,
      "step": 42155
    },
    {
      "epoch": 0.000257293701171875,
      "step": 42155,
      "training_step_time": 0.38897037506103516
    },
    {
      "epoch": 0.0002572998046875,
      "model_forward_time": 0.11588501930236816,
      "step": 42156
    },
    {
      "epoch": 0.0002572998046875,
      "step": 42156,
      "training_step_time": 0.4013998508453369
    },
    {
      "epoch": 0.000257305908203125,
      "model_forward_time": 0.11490440368652344,
      "step": 42157
    },
    {
      "epoch": 0.000257305908203125,
      "step": 42157,
      "training_step_time": 0.3929927349090576
    },
    {
      "epoch": 0.00025731201171875,
      "model_forward_time": 0.11528944969177246,
      "step": 42158
    },
    {
      "epoch": 0.00025731201171875,
      "step": 42158,
      "training_step_time": 0.3934931755065918
    },
    {
      "epoch": 0.000257318115234375,
      "model_forward_time": 0.1154947280883789,
      "step": 42159
    },
    {
      "epoch": 0.000257318115234375,
      "step": 42159,
      "training_step_time": 0.5934531688690186
    },
    {
      "epoch": 0.00025732421875,
      "grad_norm": 0.1389898657798767,
      "learning_rate": 2.2284532353301953e-05,
      "loss": 0.0344,
      "step": 42160
    },
    {
      "epoch": 0.00025732421875,
      "model_forward_time": 0.11505293846130371,
      "step": 42160
    },
    {
      "epoch": 0.00025732421875,
      "step": 42160,
      "training_step_time": 0.3975682258605957
    },
    {
      "epoch": 0.000257330322265625,
      "model_forward_time": 0.11470317840576172,
      "step": 42161
    },
    {
      "epoch": 0.000257330322265625,
      "step": 42161,
      "training_step_time": 0.36560940742492676
    },
    {
      "epoch": 0.00025733642578125,
      "model_forward_time": 0.11515259742736816,
      "step": 42162
    },
    {
      "epoch": 0.00025733642578125,
      "step": 42162,
      "training_step_time": 0.45820093154907227
    },
    {
      "epoch": 0.000257342529296875,
      "model_forward_time": 0.1153252124786377,
      "step": 42163
    },
    {
      "epoch": 0.000257342529296875,
      "step": 42163,
      "training_step_time": 0.4874117374420166
    },
    {
      "epoch": 0.0002573486328125,
      "model_forward_time": 0.11464977264404297,
      "step": 42164
    },
    {
      "epoch": 0.0002573486328125,
      "step": 42164,
      "training_step_time": 0.4105057716369629
    },
    {
      "epoch": 0.000257354736328125,
      "model_forward_time": 0.11533117294311523,
      "step": 42165
    },
    {
      "epoch": 0.000257354736328125,
      "step": 42165,
      "training_step_time": 0.4116809368133545
    },
    {
      "epoch": 0.00025736083984375,
      "model_forward_time": 0.11514067649841309,
      "step": 42166
    },
    {
      "epoch": 0.00025736083984375,
      "step": 42166,
      "training_step_time": 0.3876936435699463
    },
    {
      "epoch": 0.000257366943359375,
      "model_forward_time": 0.11499881744384766,
      "step": 42167
    },
    {
      "epoch": 0.000257366943359375,
      "step": 42167,
      "training_step_time": 0.3785223960876465
    },
    {
      "epoch": 0.000257373046875,
      "model_forward_time": 0.11506438255310059,
      "step": 42168
    },
    {
      "epoch": 0.000257373046875,
      "step": 42168,
      "training_step_time": 0.40038299560546875
    },
    {
      "epoch": 0.000257379150390625,
      "model_forward_time": 0.11576247215270996,
      "step": 42169
    },
    {
      "epoch": 0.000257379150390625,
      "step": 42169,
      "training_step_time": 0.38710451126098633
    },
    {
      "epoch": 0.00025738525390625,
      "grad_norm": 0.11411947757005692,
      "learning_rate": 2.2261599886393014e-05,
      "loss": 0.0346,
      "step": 42170
    },
    {
      "epoch": 0.00025738525390625,
      "model_forward_time": 0.11582159996032715,
      "step": 42170
    },
    {
      "epoch": 0.00025738525390625,
      "step": 42170,
      "training_step_time": 0.3843700885772705
    },
    {
      "epoch": 0.000257391357421875,
      "model_forward_time": 0.11617302894592285,
      "step": 42171
    },
    {
      "epoch": 0.000257391357421875,
      "step": 42171,
      "training_step_time": 0.39855122566223145
    },
    {
      "epoch": 0.0002573974609375,
      "model_forward_time": 0.11583805084228516,
      "step": 42172
    },
    {
      "epoch": 0.0002573974609375,
      "step": 42172,
      "training_step_time": 0.3927955627441406
    },
    {
      "epoch": 0.000257403564453125,
      "model_forward_time": 0.11524844169616699,
      "step": 42173
    },
    {
      "epoch": 0.000257403564453125,
      "step": 42173,
      "training_step_time": 0.39662790298461914
    },
    {
      "epoch": 0.00025740966796875,
      "model_forward_time": 0.11519122123718262,
      "step": 42174
    },
    {
      "epoch": 0.00025740966796875,
      "step": 42174,
      "training_step_time": 0.3938472270965576
    },
    {
      "epoch": 0.000257415771484375,
      "model_forward_time": 0.11534237861633301,
      "step": 42175
    },
    {
      "epoch": 0.000257415771484375,
      "step": 42175,
      "training_step_time": 0.39981627464294434
    },
    {
      "epoch": 0.000257421875,
      "model_forward_time": 0.11683464050292969,
      "step": 42176
    },
    {
      "epoch": 0.000257421875,
      "step": 42176,
      "training_step_time": 0.6131076812744141
    },
    {
      "epoch": 0.000257427978515625,
      "model_forward_time": 0.1179037094116211,
      "step": 42177
    },
    {
      "epoch": 0.000257427978515625,
      "step": 42177,
      "training_step_time": 0.6869258880615234
    },
    {
      "epoch": 0.00025743408203125,
      "model_forward_time": 0.11819028854370117,
      "step": 42178
    },
    {
      "epoch": 0.00025743408203125,
      "step": 42178,
      "training_step_time": 0.7085988521575928
    },
    {
      "epoch": 0.000257440185546875,
      "model_forward_time": 0.1192162036895752,
      "step": 42179
    },
    {
      "epoch": 0.000257440185546875,
      "step": 42179,
      "training_step_time": 0.6801457405090332
    },
    {
      "epoch": 0.0002574462890625,
      "grad_norm": 0.1301405131816864,
      "learning_rate": 2.2238675845677663e-05,
      "loss": 0.0345,
      "step": 42180
    },
    {
      "epoch": 0.0002574462890625,
      "model_forward_time": 0.11844778060913086,
      "step": 42180
    },
    {
      "epoch": 0.0002574462890625,
      "step": 42180,
      "training_step_time": 0.663806676864624
    },
    {
      "epoch": 0.000257452392578125,
      "model_forward_time": 0.11822319030761719,
      "step": 42181
    },
    {
      "epoch": 0.000257452392578125,
      "step": 42181,
      "training_step_time": 0.686392068862915
    },
    {
      "epoch": 0.00025745849609375,
      "model_forward_time": 0.1453685760498047,
      "step": 42182
    },
    {
      "epoch": 0.00025745849609375,
      "step": 42182,
      "training_step_time": 0.7194864749908447
    },
    {
      "epoch": 0.000257464599609375,
      "model_forward_time": 0.1167917251586914,
      "step": 42183
    },
    {
      "epoch": 0.000257464599609375,
      "step": 42183,
      "training_step_time": 0.6267716884613037
    },
    {
      "epoch": 0.000257470703125,
      "model_forward_time": 0.11764001846313477,
      "step": 42184
    },
    {
      "epoch": 0.000257470703125,
      "step": 42184,
      "training_step_time": 0.6508114337921143
    },
    {
      "epoch": 0.000257476806640625,
      "model_forward_time": 0.1247549057006836,
      "step": 42185
    },
    {
      "epoch": 0.000257476806640625,
      "step": 42185,
      "training_step_time": 0.6814103126525879
    },
    {
      "epoch": 0.00025748291015625,
      "model_forward_time": 0.11751627922058105,
      "step": 42186
    },
    {
      "epoch": 0.00025748291015625,
      "step": 42186,
      "training_step_time": 0.7282559871673584
    },
    {
      "epoch": 0.000257489013671875,
      "model_forward_time": 0.12353992462158203,
      "step": 42187
    },
    {
      "epoch": 0.000257489013671875,
      "step": 42187,
      "training_step_time": 0.8614037036895752
    },
    {
      "epoch": 0.0002574951171875,
      "model_forward_time": 0.12139725685119629,
      "step": 42188
    },
    {
      "epoch": 0.0002574951171875,
      "step": 42188,
      "training_step_time": 0.6848297119140625
    },
    {
      "epoch": 0.000257501220703125,
      "model_forward_time": 0.11913657188415527,
      "step": 42189
    },
    {
      "epoch": 0.000257501220703125,
      "step": 42189,
      "training_step_time": 0.6622958183288574
    },
    {
      "epoch": 0.00025750732421875,
      "grad_norm": 0.11072543263435364,
      "learning_rate": 2.22157602381196e-05,
      "loss": 0.0362,
      "step": 42190
    },
    {
      "epoch": 0.00025750732421875,
      "model_forward_time": 0.11618423461914062,
      "step": 42190
    },
    {
      "epoch": 0.00025750732421875,
      "step": 42190,
      "training_step_time": 0.6721804141998291
    },
    {
      "epoch": 0.000257513427734375,
      "model_forward_time": 0.11634278297424316,
      "step": 42191
    },
    {
      "epoch": 0.000257513427734375,
      "step": 42191,
      "training_step_time": 0.6519901752471924
    },
    {
      "epoch": 0.00025751953125,
      "model_forward_time": 0.12206768989562988,
      "step": 42192
    },
    {
      "epoch": 0.00025751953125,
      "step": 42192,
      "training_step_time": 0.6782543659210205
    },
    {
      "epoch": 0.000257525634765625,
      "model_forward_time": 0.11980295181274414,
      "step": 42193
    },
    {
      "epoch": 0.000257525634765625,
      "step": 42193,
      "training_step_time": 0.6546134948730469
    },
    {
      "epoch": 0.00025753173828125,
      "model_forward_time": 0.12066769599914551,
      "step": 42194
    },
    {
      "epoch": 0.00025753173828125,
      "step": 42194,
      "training_step_time": 0.6385586261749268
    },
    {
      "epoch": 0.000257537841796875,
      "model_forward_time": 0.11785721778869629,
      "step": 42195
    },
    {
      "epoch": 0.000257537841796875,
      "step": 42195,
      "training_step_time": 0.7395293712615967
    },
    {
      "epoch": 0.0002575439453125,
      "model_forward_time": 0.11971402168273926,
      "step": 42196
    },
    {
      "epoch": 0.0002575439453125,
      "step": 42196,
      "training_step_time": 0.7042403221130371
    },
    {
      "epoch": 0.000257550048828125,
      "model_forward_time": 0.13946962356567383,
      "step": 42197
    },
    {
      "epoch": 0.000257550048828125,
      "step": 42197,
      "training_step_time": 0.5782160758972168
    },
    {
      "epoch": 0.00025755615234375,
      "model_forward_time": 0.11621952056884766,
      "step": 42198
    },
    {
      "epoch": 0.00025755615234375,
      "step": 42198,
      "training_step_time": 0.7165720462799072
    },
    {
      "epoch": 0.000257562255859375,
      "model_forward_time": 0.11925220489501953,
      "step": 42199
    },
    {
      "epoch": 0.000257562255859375,
      "step": 42199,
      "training_step_time": 0.5978398323059082
    },
    {
      "epoch": 0.000257568359375,
      "grad_norm": 0.11045775562524796,
      "learning_rate": 2.219285307067997e-05,
      "loss": 0.0423,
      "step": 42200
    },
    {
      "epoch": 0.000257568359375,
      "model_forward_time": 0.12169623374938965,
      "step": 42200
    },
    {
      "epoch": 0.000257568359375,
      "step": 42200,
      "training_step_time": 0.6668424606323242
    },
    {
      "epoch": 0.000257574462890625,
      "model_forward_time": 0.11754345893859863,
      "step": 42201
    },
    {
      "epoch": 0.000257574462890625,
      "step": 42201,
      "training_step_time": 0.625993013381958
    },
    {
      "epoch": 0.00025758056640625,
      "model_forward_time": 0.11799764633178711,
      "step": 42202
    },
    {
      "epoch": 0.00025758056640625,
      "step": 42202,
      "training_step_time": 0.6590781211853027
    },
    {
      "epoch": 0.000257586669921875,
      "model_forward_time": 0.12086677551269531,
      "step": 42203
    },
    {
      "epoch": 0.000257586669921875,
      "step": 42203,
      "training_step_time": 0.700580358505249
    },
    {
      "epoch": 0.0002575927734375,
      "model_forward_time": 0.1306605339050293,
      "step": 42204
    },
    {
      "epoch": 0.0002575927734375,
      "step": 42204,
      "training_step_time": 0.7402892112731934
    },
    {
      "epoch": 0.000257598876953125,
      "model_forward_time": 0.12197446823120117,
      "step": 42205
    },
    {
      "epoch": 0.000257598876953125,
      "step": 42205,
      "training_step_time": 0.6728410720825195
    },
    {
      "epoch": 0.00025760498046875,
      "model_forward_time": 0.1169743537902832,
      "step": 42206
    },
    {
      "epoch": 0.00025760498046875,
      "step": 42206,
      "training_step_time": 0.6839990615844727
    },
    {
      "epoch": 0.000257611083984375,
      "model_forward_time": 0.11748361587524414,
      "step": 42207
    },
    {
      "epoch": 0.000257611083984375,
      "step": 42207,
      "training_step_time": 0.6280930042266846
    },
    {
      "epoch": 0.0002576171875,
      "model_forward_time": 0.12472772598266602,
      "step": 42208
    },
    {
      "epoch": 0.0002576171875,
      "step": 42208,
      "training_step_time": 0.6139955520629883
    },
    {
      "epoch": 0.000257623291015625,
      "model_forward_time": 0.11817741394042969,
      "step": 42209
    },
    {
      "epoch": 0.000257623291015625,
      "step": 42209,
      "training_step_time": 0.6552505493164062
    },
    {
      "epoch": 0.00025762939453125,
      "grad_norm": 0.14293172955513,
      "learning_rate": 2.2169954350317374e-05,
      "loss": 0.0406,
      "step": 42210
    },
    {
      "epoch": 0.00025762939453125,
      "model_forward_time": 0.11620330810546875,
      "step": 42210
    },
    {
      "epoch": 0.00025762939453125,
      "step": 42210,
      "training_step_time": 0.6313755512237549
    },
    {
      "epoch": 0.000257635498046875,
      "model_forward_time": 0.11703276634216309,
      "step": 42211
    },
    {
      "epoch": 0.000257635498046875,
      "step": 42211,
      "training_step_time": 0.6779782772064209
    },
    {
      "epoch": 0.0002576416015625,
      "model_forward_time": 0.12177920341491699,
      "step": 42212
    },
    {
      "epoch": 0.0002576416015625,
      "step": 42212,
      "training_step_time": 0.6818311214447021
    },
    {
      "epoch": 0.000257647705078125,
      "model_forward_time": 0.11977767944335938,
      "step": 42213
    },
    {
      "epoch": 0.000257647705078125,
      "step": 42213,
      "training_step_time": 0.6903765201568604
    },
    {
      "epoch": 0.00025765380859375,
      "model_forward_time": 0.11871194839477539,
      "step": 42214
    },
    {
      "epoch": 0.00025765380859375,
      "step": 42214,
      "training_step_time": 0.5629076957702637
    },
    {
      "epoch": 0.000257659912109375,
      "model_forward_time": 0.12264060974121094,
      "step": 42215
    },
    {
      "epoch": 0.000257659912109375,
      "step": 42215,
      "training_step_time": 0.7608609199523926
    },
    {
      "epoch": 0.000257666015625,
      "model_forward_time": 0.11666560173034668,
      "step": 42216
    },
    {
      "epoch": 0.000257666015625,
      "step": 42216,
      "training_step_time": 0.6393201351165771
    },
    {
      "epoch": 0.000257672119140625,
      "model_forward_time": 0.11643099784851074,
      "step": 42217
    },
    {
      "epoch": 0.000257672119140625,
      "step": 42217,
      "training_step_time": 0.7047204971313477
    },
    {
      "epoch": 0.00025767822265625,
      "model_forward_time": 0.11702156066894531,
      "step": 42218
    },
    {
      "epoch": 0.00025767822265625,
      "step": 42218,
      "training_step_time": 0.6067061424255371
    },
    {
      "epoch": 0.000257684326171875,
      "model_forward_time": 0.1196293830871582,
      "step": 42219
    },
    {
      "epoch": 0.000257684326171875,
      "step": 42219,
      "training_step_time": 0.6376194953918457
    },
    {
      "epoch": 0.0002576904296875,
      "grad_norm": 0.12782151997089386,
      "learning_rate": 2.2147064083987838e-05,
      "loss": 0.0382,
      "step": 42220
    },
    {
      "epoch": 0.0002576904296875,
      "model_forward_time": 0.12204504013061523,
      "step": 42220
    },
    {
      "epoch": 0.0002576904296875,
      "step": 42220,
      "training_step_time": 0.6412756443023682
    },
    {
      "epoch": 0.000257696533203125,
      "model_forward_time": 0.11684942245483398,
      "step": 42221
    },
    {
      "epoch": 0.000257696533203125,
      "step": 42221,
      "training_step_time": 0.6710879802703857
    },
    {
      "epoch": 0.00025770263671875,
      "model_forward_time": 0.11638832092285156,
      "step": 42222
    },
    {
      "epoch": 0.00025770263671875,
      "step": 42222,
      "training_step_time": 0.7075436115264893
    },
    {
      "epoch": 0.000257708740234375,
      "model_forward_time": 0.12040853500366211,
      "step": 42223
    },
    {
      "epoch": 0.000257708740234375,
      "step": 42223,
      "training_step_time": 0.6843042373657227
    },
    {
      "epoch": 0.00025771484375,
      "model_forward_time": 0.11936235427856445,
      "step": 42224
    },
    {
      "epoch": 0.00025771484375,
      "step": 42224,
      "training_step_time": 0.7443203926086426
    },
    {
      "epoch": 0.000257720947265625,
      "model_forward_time": 0.12042880058288574,
      "step": 42225
    },
    {
      "epoch": 0.000257720947265625,
      "step": 42225,
      "training_step_time": 0.747288703918457
    },
    {
      "epoch": 0.00025772705078125,
      "model_forward_time": 0.12094497680664062,
      "step": 42226
    },
    {
      "epoch": 0.00025772705078125,
      "step": 42226,
      "training_step_time": 0.673820972442627
    },
    {
      "epoch": 0.000257733154296875,
      "model_forward_time": 0.12003707885742188,
      "step": 42227
    },
    {
      "epoch": 0.000257733154296875,
      "step": 42227,
      "training_step_time": 0.684197187423706
    },
    {
      "epoch": 0.0002577392578125,
      "model_forward_time": 0.1177682876586914,
      "step": 42228
    },
    {
      "epoch": 0.0002577392578125,
      "step": 42228,
      "training_step_time": 0.6134843826293945
    },
    {
      "epoch": 0.000257745361328125,
      "model_forward_time": 0.11673760414123535,
      "step": 42229
    },
    {
      "epoch": 0.000257745361328125,
      "step": 42229,
      "training_step_time": 0.7151174545288086
    },
    {
      "epoch": 0.00025775146484375,
      "grad_norm": 0.10998304188251495,
      "learning_rate": 2.21241822786448e-05,
      "loss": 0.037,
      "step": 42230
    },
    {
      "epoch": 0.00025775146484375,
      "model_forward_time": 0.11894059181213379,
      "step": 42230
    },
    {
      "epoch": 0.00025775146484375,
      "step": 42230,
      "training_step_time": 0.6359741687774658
    },
    {
      "epoch": 0.000257757568359375,
      "model_forward_time": 0.12150382995605469,
      "step": 42231
    },
    {
      "epoch": 0.000257757568359375,
      "step": 42231,
      "training_step_time": 0.6032447814941406
    },
    {
      "epoch": 0.000257763671875,
      "model_forward_time": 0.12324714660644531,
      "step": 42232
    },
    {
      "epoch": 0.000257763671875,
      "step": 42232,
      "training_step_time": 0.6142544746398926
    },
    {
      "epoch": 0.000257769775390625,
      "model_forward_time": 0.12079977989196777,
      "step": 42233
    },
    {
      "epoch": 0.000257769775390625,
      "step": 42233,
      "training_step_time": 0.6910650730133057
    },
    {
      "epoch": 0.00025777587890625,
      "model_forward_time": 0.12215757369995117,
      "step": 42234
    },
    {
      "epoch": 0.00025777587890625,
      "step": 42234,
      "training_step_time": 0.7171258926391602
    },
    {
      "epoch": 0.000257781982421875,
      "model_forward_time": 0.1285860538482666,
      "step": 42235
    },
    {
      "epoch": 0.000257781982421875,
      "step": 42235,
      "training_step_time": 0.6940953731536865
    },
    {
      "epoch": 0.0002577880859375,
      "model_forward_time": 0.12559819221496582,
      "step": 42236
    },
    {
      "epoch": 0.0002577880859375,
      "step": 42236,
      "training_step_time": 0.6405160427093506
    },
    {
      "epoch": 0.000257794189453125,
      "model_forward_time": 0.11637353897094727,
      "step": 42237
    },
    {
      "epoch": 0.000257794189453125,
      "step": 42237,
      "training_step_time": 0.6740913391113281
    },
    {
      "epoch": 0.00025780029296875,
      "model_forward_time": 0.12961792945861816,
      "step": 42238
    },
    {
      "epoch": 0.00025780029296875,
      "step": 42238,
      "training_step_time": 0.755016565322876
    },
    {
      "epoch": 0.000257806396484375,
      "model_forward_time": 0.1251528263092041,
      "step": 42239
    },
    {
      "epoch": 0.000257806396484375,
      "step": 42239,
      "training_step_time": 0.6498010158538818
    },
    {
      "epoch": 0.0002578125,
      "grad_norm": 0.09837929904460907,
      "learning_rate": 2.2101308941239203e-05,
      "loss": 0.042,
      "step": 42240
    },
    {
      "epoch": 0.0002578125,
      "model_forward_time": 0.1204218864440918,
      "step": 42240
    },
    {
      "epoch": 0.0002578125,
      "step": 42240,
      "training_step_time": 0.6443624496459961
    },
    {
      "epoch": 0.000257818603515625,
      "model_forward_time": 0.11705279350280762,
      "step": 42241
    },
    {
      "epoch": 0.000257818603515625,
      "step": 42241,
      "training_step_time": 0.6077678203582764
    },
    {
      "epoch": 0.00025782470703125,
      "model_forward_time": 0.11917281150817871,
      "step": 42242
    },
    {
      "epoch": 0.00025782470703125,
      "step": 42242,
      "training_step_time": 0.6803441047668457
    },
    {
      "epoch": 0.000257830810546875,
      "model_forward_time": 0.11974000930786133,
      "step": 42243
    },
    {
      "epoch": 0.000257830810546875,
      "step": 42243,
      "training_step_time": 0.6649212837219238
    },
    {
      "epoch": 0.0002578369140625,
      "model_forward_time": 0.12080717086791992,
      "step": 42244
    },
    {
      "epoch": 0.0002578369140625,
      "step": 42244,
      "training_step_time": 0.7374136447906494
    },
    {
      "epoch": 0.000257843017578125,
      "model_forward_time": 0.1231684684753418,
      "step": 42245
    },
    {
      "epoch": 0.000257843017578125,
      "step": 42245,
      "training_step_time": 0.6737029552459717
    },
    {
      "epoch": 0.00025784912109375,
      "model_forward_time": 0.12003803253173828,
      "step": 42246
    },
    {
      "epoch": 0.00025784912109375,
      "step": 42246,
      "training_step_time": 0.5355191230773926
    },
    {
      "epoch": 0.000257855224609375,
      "model_forward_time": 0.12340831756591797,
      "step": 42247
    },
    {
      "epoch": 0.000257855224609375,
      "step": 42247,
      "training_step_time": 0.49568796157836914
    },
    {
      "epoch": 0.000257861328125,
      "model_forward_time": 0.11873865127563477,
      "step": 42248
    },
    {
      "epoch": 0.000257861328125,
      "step": 42248,
      "training_step_time": 0.4819304943084717
    },
    {
      "epoch": 0.000257867431640625,
      "model_forward_time": 0.11767125129699707,
      "step": 42249
    },
    {
      "epoch": 0.000257867431640625,
      "step": 42249,
      "training_step_time": 0.5521159172058105
    },
    {
      "epoch": 0.00025787353515625,
      "grad_norm": 0.14430350065231323,
      "learning_rate": 2.207844407871929e-05,
      "loss": 0.0456,
      "step": 42250
    },
    {
      "epoch": 0.00025787353515625,
      "model_forward_time": 0.11728119850158691,
      "step": 42250
    },
    {
      "epoch": 0.00025787353515625,
      "step": 42250,
      "training_step_time": 0.44663190841674805
    },
    {
      "epoch": 0.000257879638671875,
      "model_forward_time": 0.11696171760559082,
      "step": 42251
    },
    {
      "epoch": 0.000257879638671875,
      "step": 42251,
      "training_step_time": 0.47211313247680664
    },
    {
      "epoch": 0.0002578857421875,
      "model_forward_time": 0.1166231632232666,
      "step": 42252
    },
    {
      "epoch": 0.0002578857421875,
      "step": 42252,
      "training_step_time": 0.42257022857666016
    },
    {
      "epoch": 0.000257891845703125,
      "model_forward_time": 0.11646580696105957,
      "step": 42253
    },
    {
      "epoch": 0.000257891845703125,
      "step": 42253,
      "training_step_time": 0.41278576850891113
    },
    {
      "epoch": 0.00025789794921875,
      "model_forward_time": 0.11549949645996094,
      "step": 42254
    },
    {
      "epoch": 0.00025789794921875,
      "step": 42254,
      "training_step_time": 0.44532155990600586
    },
    {
      "epoch": 0.000257904052734375,
      "model_forward_time": 0.1146688461303711,
      "step": 42255
    },
    {
      "epoch": 0.000257904052734375,
      "step": 42255,
      "training_step_time": 0.4280822277069092
    },
    {
      "epoch": 0.00025791015625,
      "model_forward_time": 0.11555147171020508,
      "step": 42256
    },
    {
      "epoch": 0.00025791015625,
      "step": 42256,
      "training_step_time": 0.4249415397644043
    },
    {
      "epoch": 0.000257916259765625,
      "model_forward_time": 0.11480236053466797,
      "step": 42257
    },
    {
      "epoch": 0.000257916259765625,
      "step": 42257,
      "training_step_time": 0.428818941116333
    },
    {
      "epoch": 0.00025792236328125,
      "model_forward_time": 0.11505270004272461,
      "step": 42258
    },
    {
      "epoch": 0.00025792236328125,
      "step": 42258,
      "training_step_time": 0.4974019527435303
    },
    {
      "epoch": 0.000257928466796875,
      "model_forward_time": 0.11621689796447754,
      "step": 42259
    },
    {
      "epoch": 0.000257928466796875,
      "step": 42259,
      "training_step_time": 0.40866804122924805
    },
    {
      "epoch": 0.0002579345703125,
      "grad_norm": 0.12326124310493469,
      "learning_rate": 2.2055587698030877e-05,
      "loss": 0.041,
      "step": 42260
    },
    {
      "epoch": 0.0002579345703125,
      "model_forward_time": 0.11415624618530273,
      "step": 42260
    },
    {
      "epoch": 0.0002579345703125,
      "step": 42260,
      "training_step_time": 0.4089062213897705
    },
    {
      "epoch": 0.000257940673828125,
      "model_forward_time": 0.11546802520751953,
      "step": 42261
    },
    {
      "epoch": 0.000257940673828125,
      "step": 42261,
      "training_step_time": 0.40105581283569336
    },
    {
      "epoch": 0.00025794677734375,
      "model_forward_time": 0.11514091491699219,
      "step": 42262
    },
    {
      "epoch": 0.00025794677734375,
      "step": 42262,
      "training_step_time": 0.4084634780883789
    },
    {
      "epoch": 0.000257952880859375,
      "model_forward_time": 0.11518311500549316,
      "step": 42263
    },
    {
      "epoch": 0.000257952880859375,
      "step": 42263,
      "training_step_time": 0.4165010452270508
    },
    {
      "epoch": 0.000257958984375,
      "model_forward_time": 0.11489486694335938,
      "step": 42264
    },
    {
      "epoch": 0.000257958984375,
      "step": 42264,
      "training_step_time": 0.37517809867858887
    },
    {
      "epoch": 0.000257965087890625,
      "model_forward_time": 0.11536574363708496,
      "step": 42265
    },
    {
      "epoch": 0.000257965087890625,
      "step": 42265,
      "training_step_time": 0.39584898948669434
    },
    {
      "epoch": 0.00025797119140625,
      "model_forward_time": 0.11621904373168945,
      "step": 42266
    },
    {
      "epoch": 0.00025797119140625,
      "step": 42266,
      "training_step_time": 0.3919708728790283
    },
    {
      "epoch": 0.000257977294921875,
      "model_forward_time": 0.11499762535095215,
      "step": 42267
    },
    {
      "epoch": 0.000257977294921875,
      "step": 42267,
      "training_step_time": 0.3890101909637451
    },
    {
      "epoch": 0.0002579833984375,
      "model_forward_time": 0.11521434783935547,
      "step": 42268
    },
    {
      "epoch": 0.0002579833984375,
      "step": 42268,
      "training_step_time": 0.38761448860168457
    },
    {
      "epoch": 0.000257989501953125,
      "model_forward_time": 0.11529684066772461,
      "step": 42269
    },
    {
      "epoch": 0.000257989501953125,
      "step": 42269,
      "training_step_time": 0.46871256828308105
    },
    {
      "epoch": 0.00025799560546875,
      "grad_norm": 0.09103766083717346,
      "learning_rate": 2.2032739806117058e-05,
      "loss": 0.0369,
      "step": 42270
    },
    {
      "epoch": 0.00025799560546875,
      "model_forward_time": 0.11568522453308105,
      "step": 42270
    },
    {
      "epoch": 0.00025799560546875,
      "step": 42270,
      "training_step_time": 0.37478041648864746
    },
    {
      "epoch": 0.000258001708984375,
      "model_forward_time": 0.1150355339050293,
      "step": 42271
    },
    {
      "epoch": 0.000258001708984375,
      "step": 42271,
      "training_step_time": 0.5006287097930908
    },
    {
      "epoch": 0.0002580078125,
      "model_forward_time": 0.11449575424194336,
      "step": 42272
    },
    {
      "epoch": 0.0002580078125,
      "step": 42272,
      "training_step_time": 0.5053820610046387
    },
    {
      "epoch": 0.000258013916015625,
      "model_forward_time": 0.1143336296081543,
      "step": 42273
    },
    {
      "epoch": 0.000258013916015625,
      "step": 42273,
      "training_step_time": 0.47130799293518066
    },
    {
      "epoch": 0.00025802001953125,
      "model_forward_time": 0.11471700668334961,
      "step": 42274
    },
    {
      "epoch": 0.00025802001953125,
      "step": 42274,
      "training_step_time": 0.3948543071746826
    },
    {
      "epoch": 0.000258026123046875,
      "model_forward_time": 0.11449074745178223,
      "step": 42275
    },
    {
      "epoch": 0.000258026123046875,
      "step": 42275,
      "training_step_time": 0.3854820728302002
    },
    {
      "epoch": 0.0002580322265625,
      "model_forward_time": 0.11523604393005371,
      "step": 42276
    },
    {
      "epoch": 0.0002580322265625,
      "step": 42276,
      "training_step_time": 0.37259435653686523
    },
    {
      "epoch": 0.000258038330078125,
      "model_forward_time": 0.11456465721130371,
      "step": 42277
    },
    {
      "epoch": 0.000258038330078125,
      "step": 42277,
      "training_step_time": 0.44964599609375
    },
    {
      "epoch": 0.00025804443359375,
      "model_forward_time": 0.1178896427154541,
      "step": 42278
    },
    {
      "epoch": 0.00025804443359375,
      "step": 42278,
      "training_step_time": 0.3962092399597168
    },
    {
      "epoch": 0.000258050537109375,
      "model_forward_time": 0.11514830589294434,
      "step": 42279
    },
    {
      "epoch": 0.000258050537109375,
      "step": 42279,
      "training_step_time": 0.4004542827606201
    },
    {
      "epoch": 0.000258056640625,
      "grad_norm": 0.08555637300014496,
      "learning_rate": 2.2009900409918465e-05,
      "loss": 0.0461,
      "step": 42280
    },
    {
      "epoch": 0.000258056640625,
      "model_forward_time": 0.11505341529846191,
      "step": 42280
    },
    {
      "epoch": 0.000258056640625,
      "step": 42280,
      "training_step_time": 0.3961062431335449
    },
    {
      "epoch": 0.000258062744140625,
      "model_forward_time": 0.11618709564208984,
      "step": 42281
    },
    {
      "epoch": 0.000258062744140625,
      "step": 42281,
      "training_step_time": 0.39453744888305664
    },
    {
      "epoch": 0.00025806884765625,
      "model_forward_time": 0.11532211303710938,
      "step": 42282
    },
    {
      "epoch": 0.00025806884765625,
      "step": 42282,
      "training_step_time": 0.37713193893432617
    },
    {
      "epoch": 0.000258074951171875,
      "model_forward_time": 0.11487078666687012,
      "step": 42283
    },
    {
      "epoch": 0.000258074951171875,
      "step": 42283,
      "training_step_time": 0.4566686153411865
    },
    {
      "epoch": 0.0002580810546875,
      "model_forward_time": 0.11520981788635254,
      "step": 42284
    },
    {
      "epoch": 0.0002580810546875,
      "step": 42284,
      "training_step_time": 0.3920609951019287
    },
    {
      "epoch": 0.000258087158203125,
      "model_forward_time": 0.11574506759643555,
      "step": 42285
    },
    {
      "epoch": 0.000258087158203125,
      "step": 42285,
      "training_step_time": 0.45849132537841797
    },
    {
      "epoch": 0.00025809326171875,
      "model_forward_time": 0.11666440963745117,
      "step": 42286
    },
    {
      "epoch": 0.00025809326171875,
      "step": 42286,
      "training_step_time": 0.4404106140136719
    },
    {
      "epoch": 0.000258099365234375,
      "model_forward_time": 0.11568880081176758,
      "step": 42287
    },
    {
      "epoch": 0.000258099365234375,
      "step": 42287,
      "training_step_time": 0.42125844955444336
    },
    {
      "epoch": 0.00025810546875,
      "model_forward_time": 0.11546874046325684,
      "step": 42288
    },
    {
      "epoch": 0.00025810546875,
      "step": 42288,
      "training_step_time": 0.3742671012878418
    },
    {
      "epoch": 0.000258111572265625,
      "model_forward_time": 0.11555838584899902,
      "step": 42289
    },
    {
      "epoch": 0.000258111572265625,
      "step": 42289,
      "training_step_time": 0.40800929069519043
    },
    {
      "epoch": 0.00025811767578125,
      "grad_norm": 0.08240849524736404,
      "learning_rate": 2.1987069516373098e-05,
      "loss": 0.0391,
      "step": 42290
    },
    {
      "epoch": 0.00025811767578125,
      "model_forward_time": 0.11513090133666992,
      "step": 42290
    },
    {
      "epoch": 0.00025811767578125,
      "step": 42290,
      "training_step_time": 0.41229820251464844
    },
    {
      "epoch": 0.000258123779296875,
      "model_forward_time": 0.11456441879272461,
      "step": 42291
    },
    {
      "epoch": 0.000258123779296875,
      "step": 42291,
      "training_step_time": 0.4218931198120117
    },
    {
      "epoch": 0.0002581298828125,
      "model_forward_time": 0.11510396003723145,
      "step": 42292
    },
    {
      "epoch": 0.0002581298828125,
      "step": 42292,
      "training_step_time": 0.3906106948852539
    },
    {
      "epoch": 0.000258135986328125,
      "model_forward_time": 0.11464858055114746,
      "step": 42293
    },
    {
      "epoch": 0.000258135986328125,
      "step": 42293,
      "training_step_time": 0.39702463150024414
    },
    {
      "epoch": 0.00025814208984375,
      "model_forward_time": 0.11510682106018066,
      "step": 42294
    },
    {
      "epoch": 0.00025814208984375,
      "step": 42294,
      "training_step_time": 0.3810083866119385
    },
    {
      "epoch": 0.000258148193359375,
      "model_forward_time": 0.11466693878173828,
      "step": 42295
    },
    {
      "epoch": 0.000258148193359375,
      "step": 42295,
      "training_step_time": 0.39798521995544434
    },
    {
      "epoch": 0.000258154296875,
      "model_forward_time": 0.11570191383361816,
      "step": 42296
    },
    {
      "epoch": 0.000258154296875,
      "step": 42296,
      "training_step_time": 0.38921618461608887
    },
    {
      "epoch": 0.000258160400390625,
      "model_forward_time": 0.11528921127319336,
      "step": 42297
    },
    {
      "epoch": 0.000258160400390625,
      "step": 42297,
      "training_step_time": 0.39899730682373047
    },
    {
      "epoch": 0.00025816650390625,
      "model_forward_time": 0.11533117294311523,
      "step": 42298
    },
    {
      "epoch": 0.00025816650390625,
      "step": 42298,
      "training_step_time": 0.41822171211242676
    },
    {
      "epoch": 0.000258172607421875,
      "model_forward_time": 0.11505532264709473,
      "step": 42299
    },
    {
      "epoch": 0.000258172607421875,
      "step": 42299,
      "training_step_time": 0.36892008781433105
    },
    {
      "epoch": 0.0002581787109375,
      "grad_norm": 0.11278021335601807,
      "learning_rate": 2.196424713241637e-05,
      "loss": 0.0399,
      "step": 42300
    },
    {
      "epoch": 0.0002581787109375,
      "model_forward_time": 0.11515545845031738,
      "step": 42300
    },
    {
      "epoch": 0.0002581787109375,
      "step": 42300,
      "training_step_time": 0.46799397468566895
    },
    {
      "epoch": 0.000258184814453125,
      "model_forward_time": 0.11466217041015625,
      "step": 42301
    },
    {
      "epoch": 0.000258184814453125,
      "step": 42301,
      "training_step_time": 0.4101073741912842
    },
    {
      "epoch": 0.00025819091796875,
      "model_forward_time": 0.11519002914428711,
      "step": 42302
    },
    {
      "epoch": 0.00025819091796875,
      "step": 42302,
      "training_step_time": 0.4096395969390869
    },
    {
      "epoch": 0.000258197021484375,
      "model_forward_time": 0.11623668670654297,
      "step": 42303
    },
    {
      "epoch": 0.000258197021484375,
      "step": 42303,
      "training_step_time": 0.5001256465911865
    },
    {
      "epoch": 0.000258203125,
      "model_forward_time": 0.11474323272705078,
      "step": 42304
    },
    {
      "epoch": 0.000258203125,
      "step": 42304,
      "training_step_time": 0.43128299713134766
    },
    {
      "epoch": 0.000258209228515625,
      "model_forward_time": 0.11505508422851562,
      "step": 42305
    },
    {
      "epoch": 0.000258209228515625,
      "step": 42305,
      "training_step_time": 0.38468122482299805
    },
    {
      "epoch": 0.00025821533203125,
      "model_forward_time": 0.1154332160949707,
      "step": 42306
    },
    {
      "epoch": 0.00025821533203125,
      "step": 42306,
      "training_step_time": 0.38721752166748047
    },
    {
      "epoch": 0.000258221435546875,
      "model_forward_time": 0.11487698554992676,
      "step": 42307
    },
    {
      "epoch": 0.000258221435546875,
      "step": 42307,
      "training_step_time": 0.40792322158813477
    },
    {
      "epoch": 0.0002582275390625,
      "model_forward_time": 0.11496090888977051,
      "step": 42308
    },
    {
      "epoch": 0.0002582275390625,
      "step": 42308,
      "training_step_time": 0.3884084224700928
    },
    {
      "epoch": 0.000258233642578125,
      "model_forward_time": 0.11553478240966797,
      "step": 42309
    },
    {
      "epoch": 0.000258233642578125,
      "step": 42309,
      "training_step_time": 0.39270973205566406
    },
    {
      "epoch": 0.00025823974609375,
      "grad_norm": 0.10583183914422989,
      "learning_rate": 2.1941433264981125e-05,
      "loss": 0.0375,
      "step": 42310
    },
    {
      "epoch": 0.00025823974609375,
      "model_forward_time": 0.11710238456726074,
      "step": 42310
    },
    {
      "epoch": 0.00025823974609375,
      "step": 42310,
      "training_step_time": 0.3998572826385498
    },
    {
      "epoch": 0.000258245849609375,
      "model_forward_time": 0.11536097526550293,
      "step": 42311
    },
    {
      "epoch": 0.000258245849609375,
      "step": 42311,
      "training_step_time": 0.3818492889404297
    },
    {
      "epoch": 0.000258251953125,
      "model_forward_time": 0.11590361595153809,
      "step": 42312
    },
    {
      "epoch": 0.000258251953125,
      "step": 42312,
      "training_step_time": 0.38522815704345703
    },
    {
      "epoch": 0.000258258056640625,
      "model_forward_time": 0.11524391174316406,
      "step": 42313
    },
    {
      "epoch": 0.000258258056640625,
      "step": 42313,
      "training_step_time": 0.4993903636932373
    },
    {
      "epoch": 0.00025826416015625,
      "model_forward_time": 0.11497879028320312,
      "step": 42314
    },
    {
      "epoch": 0.00025826416015625,
      "step": 42314,
      "training_step_time": 0.5227005481719971
    },
    {
      "epoch": 0.000258270263671875,
      "model_forward_time": 0.11472082138061523,
      "step": 42315
    },
    {
      "epoch": 0.000258270263671875,
      "step": 42315,
      "training_step_time": 0.5047094821929932
    },
    {
      "epoch": 0.0002582763671875,
      "model_forward_time": 0.11497974395751953,
      "step": 42316
    },
    {
      "epoch": 0.0002582763671875,
      "step": 42316,
      "training_step_time": 0.41715049743652344
    },
    {
      "epoch": 0.000258282470703125,
      "model_forward_time": 0.1144399642944336,
      "step": 42317
    },
    {
      "epoch": 0.000258282470703125,
      "step": 42317,
      "training_step_time": 0.4718291759490967
    },
    {
      "epoch": 0.00025828857421875,
      "model_forward_time": 0.11508989334106445,
      "step": 42318
    },
    {
      "epoch": 0.00025828857421875,
      "step": 42318,
      "training_step_time": 0.38698458671569824
    },
    {
      "epoch": 0.000258294677734375,
      "model_forward_time": 0.11502623558044434,
      "step": 42319
    },
    {
      "epoch": 0.000258294677734375,
      "step": 42319,
      "training_step_time": 0.4036409854888916
    },
    {
      "epoch": 0.00025830078125,
      "grad_norm": 0.12175052613019943,
      "learning_rate": 2.1918627920997593e-05,
      "loss": 0.0402,
      "step": 42320
    },
    {
      "epoch": 0.00025830078125,
      "model_forward_time": 0.11448478698730469,
      "step": 42320
    },
    {
      "epoch": 0.00025830078125,
      "step": 42320,
      "training_step_time": 0.3980560302734375
    },
    {
      "epoch": 0.000258306884765625,
      "model_forward_time": 0.11491560935974121,
      "step": 42321
    },
    {
      "epoch": 0.000258306884765625,
      "step": 42321,
      "training_step_time": 0.3932986259460449
    },
    {
      "epoch": 0.00025831298828125,
      "model_forward_time": 0.11563348770141602,
      "step": 42322
    },
    {
      "epoch": 0.00025831298828125,
      "step": 42322,
      "training_step_time": 0.3907890319824219
    },
    {
      "epoch": 0.000258319091796875,
      "model_forward_time": 0.11509847640991211,
      "step": 42323
    },
    {
      "epoch": 0.000258319091796875,
      "step": 42323,
      "training_step_time": 0.4064207077026367
    },
    {
      "epoch": 0.0002583251953125,
      "model_forward_time": 0.11586260795593262,
      "step": 42324
    },
    {
      "epoch": 0.0002583251953125,
      "step": 42324,
      "training_step_time": 0.3853473663330078
    },
    {
      "epoch": 0.000258331298828125,
      "model_forward_time": 0.11550331115722656,
      "step": 42325
    },
    {
      "epoch": 0.000258331298828125,
      "step": 42325,
      "training_step_time": 0.398425817489624
    },
    {
      "epoch": 0.00025833740234375,
      "model_forward_time": 0.11533284187316895,
      "step": 42326
    },
    {
      "epoch": 0.00025833740234375,
      "step": 42326,
      "training_step_time": 0.3911442756652832
    },
    {
      "epoch": 0.000258343505859375,
      "model_forward_time": 0.11517119407653809,
      "step": 42327
    },
    {
      "epoch": 0.000258343505859375,
      "step": 42327,
      "training_step_time": 0.4393599033355713
    },
    {
      "epoch": 0.000258349609375,
      "model_forward_time": 0.11498332023620605,
      "step": 42328
    },
    {
      "epoch": 0.000258349609375,
      "step": 42328,
      "training_step_time": 0.36541247367858887
    },
    {
      "epoch": 0.000258355712890625,
      "model_forward_time": 0.11590409278869629,
      "step": 42329
    },
    {
      "epoch": 0.000258355712890625,
      "step": 42329,
      "training_step_time": 0.39931535720825195
    },
    {
      "epoch": 0.00025836181640625,
      "grad_norm": 0.1248398870229721,
      "learning_rate": 2.1895831107393484e-05,
      "loss": 0.0411,
      "step": 42330
    },
    {
      "epoch": 0.00025836181640625,
      "model_forward_time": 0.11525917053222656,
      "step": 42330
    },
    {
      "epoch": 0.00025836181640625,
      "step": 42330,
      "training_step_time": 0.48624539375305176
    },
    {
      "epoch": 0.000258367919921875,
      "model_forward_time": 0.11493778228759766,
      "step": 42331
    },
    {
      "epoch": 0.000258367919921875,
      "step": 42331,
      "training_step_time": 0.41579341888427734
    },
    {
      "epoch": 0.0002583740234375,
      "model_forward_time": 0.11499977111816406,
      "step": 42332
    },
    {
      "epoch": 0.0002583740234375,
      "step": 42332,
      "training_step_time": 0.441561222076416
    },
    {
      "epoch": 0.000258380126953125,
      "model_forward_time": 0.11534619331359863,
      "step": 42333
    },
    {
      "epoch": 0.000258380126953125,
      "step": 42333,
      "training_step_time": 0.43217921257019043
    },
    {
      "epoch": 0.00025838623046875,
      "model_forward_time": 0.1147918701171875,
      "step": 42334
    },
    {
      "epoch": 0.00025838623046875,
      "step": 42334,
      "training_step_time": 0.39460158348083496
    },
    {
      "epoch": 0.000258392333984375,
      "model_forward_time": 0.11526155471801758,
      "step": 42335
    },
    {
      "epoch": 0.000258392333984375,
      "step": 42335,
      "training_step_time": 0.39299941062927246
    },
    {
      "epoch": 0.0002583984375,
      "model_forward_time": 0.11542916297912598,
      "step": 42336
    },
    {
      "epoch": 0.0002583984375,
      "step": 42336,
      "training_step_time": 0.38121891021728516
    },
    {
      "epoch": 0.000258404541015625,
      "model_forward_time": 0.11452269554138184,
      "step": 42337
    },
    {
      "epoch": 0.000258404541015625,
      "step": 42337,
      "training_step_time": 0.39670228958129883
    },
    {
      "epoch": 0.00025841064453125,
      "model_forward_time": 0.11531877517700195,
      "step": 42338
    },
    {
      "epoch": 0.00025841064453125,
      "step": 42338,
      "training_step_time": 0.4061269760131836
    },
    {
      "epoch": 0.000258416748046875,
      "model_forward_time": 0.11507534980773926,
      "step": 42339
    },
    {
      "epoch": 0.000258416748046875,
      "step": 42339,
      "training_step_time": 0.39139723777770996
    },
    {
      "epoch": 0.0002584228515625,
      "grad_norm": 0.10052474588155746,
      "learning_rate": 2.1873042831093803e-05,
      "loss": 0.0428,
      "step": 42340
    },
    {
      "epoch": 0.0002584228515625,
      "model_forward_time": 0.11551070213317871,
      "step": 42340
    },
    {
      "epoch": 0.0002584228515625,
      "step": 42340,
      "training_step_time": 0.39626407623291016
    },
    {
      "epoch": 0.000258428955078125,
      "model_forward_time": 0.11583852767944336,
      "step": 42341
    },
    {
      "epoch": 0.000258428955078125,
      "step": 42341,
      "training_step_time": 0.3995687961578369
    },
    {
      "epoch": 0.00025843505859375,
      "model_forward_time": 0.11547350883483887,
      "step": 42342
    },
    {
      "epoch": 0.00025843505859375,
      "step": 42342,
      "training_step_time": 0.4657175540924072
    },
    {
      "epoch": 0.000258441162109375,
      "model_forward_time": 0.11479401588439941,
      "step": 42343
    },
    {
      "epoch": 0.000258441162109375,
      "step": 42343,
      "training_step_time": 0.5062475204467773
    },
    {
      "epoch": 0.000258447265625,
      "model_forward_time": 0.11518526077270508,
      "step": 42344
    },
    {
      "epoch": 0.000258447265625,
      "step": 42344,
      "training_step_time": 0.429854154586792
    },
    {
      "epoch": 0.000258453369140625,
      "model_forward_time": 0.11531591415405273,
      "step": 42345
    },
    {
      "epoch": 0.000258453369140625,
      "step": 42345,
      "training_step_time": 0.4378972053527832
    },
    {
      "epoch": 0.00025845947265625,
      "model_forward_time": 0.11424922943115234,
      "step": 42346
    },
    {
      "epoch": 0.00025845947265625,
      "step": 42346,
      "training_step_time": 0.4169795513153076
    },
    {
      "epoch": 0.000258465576171875,
      "model_forward_time": 0.11458826065063477,
      "step": 42347
    },
    {
      "epoch": 0.000258465576171875,
      "step": 42347,
      "training_step_time": 0.42769718170166016
    },
    {
      "epoch": 0.0002584716796875,
      "model_forward_time": 0.1161496639251709,
      "step": 42348
    },
    {
      "epoch": 0.0002584716796875,
      "step": 42348,
      "training_step_time": 0.38339972496032715
    },
    {
      "epoch": 0.000258477783203125,
      "model_forward_time": 0.11514973640441895,
      "step": 42349
    },
    {
      "epoch": 0.000258477783203125,
      "step": 42349,
      "training_step_time": 0.4015078544616699
    },
    {
      "epoch": 0.00025848388671875,
      "grad_norm": 0.10617878288030624,
      "learning_rate": 2.1850263099021077e-05,
      "loss": 0.0361,
      "step": 42350
    },
    {
      "epoch": 0.00025848388671875,
      "model_forward_time": 0.11612606048583984,
      "step": 42350
    },
    {
      "epoch": 0.00025848388671875,
      "step": 42350,
      "training_step_time": 0.3916447162628174
    },
    {
      "epoch": 0.000258489990234375,
      "model_forward_time": 0.11419844627380371,
      "step": 42351
    },
    {
      "epoch": 0.000258489990234375,
      "step": 42351,
      "training_step_time": 0.4006640911102295
    },
    {
      "epoch": 0.00025849609375,
      "model_forward_time": 0.11524748802185059,
      "step": 42352
    },
    {
      "epoch": 0.00025849609375,
      "step": 42352,
      "training_step_time": 0.39542675018310547
    },
    {
      "epoch": 0.000258502197265625,
      "model_forward_time": 0.11512017250061035,
      "step": 42353
    },
    {
      "epoch": 0.000258502197265625,
      "step": 42353,
      "training_step_time": 0.40700292587280273
    },
    {
      "epoch": 0.00025850830078125,
      "model_forward_time": 0.1151430606842041,
      "step": 42354
    },
    {
      "epoch": 0.00025850830078125,
      "step": 42354,
      "training_step_time": 0.3802049160003662
    },
    {
      "epoch": 0.000258514404296875,
      "model_forward_time": 0.11490726470947266,
      "step": 42355
    },
    {
      "epoch": 0.000258514404296875,
      "step": 42355,
      "training_step_time": 0.3943212032318115
    },
    {
      "epoch": 0.0002585205078125,
      "model_forward_time": 0.11682701110839844,
      "step": 42356
    },
    {
      "epoch": 0.0002585205078125,
      "step": 42356,
      "training_step_time": 0.4285895824432373
    },
    {
      "epoch": 0.000258526611328125,
      "model_forward_time": 0.11477255821228027,
      "step": 42357
    },
    {
      "epoch": 0.000258526611328125,
      "step": 42357,
      "training_step_time": 0.4114508628845215
    },
    {
      "epoch": 0.00025853271484375,
      "model_forward_time": 0.11570382118225098,
      "step": 42358
    },
    {
      "epoch": 0.00025853271484375,
      "step": 42358,
      "training_step_time": 0.4527914524078369
    },
    {
      "epoch": 0.000258538818359375,
      "model_forward_time": 0.11533856391906738,
      "step": 42359
    },
    {
      "epoch": 0.000258538818359375,
      "step": 42359,
      "training_step_time": 0.5106208324432373
    },
    {
      "epoch": 0.000258544921875,
      "grad_norm": 0.0972096398472786,
      "learning_rate": 2.182749191809518e-05,
      "loss": 0.0406,
      "step": 42360
    },
    {
      "epoch": 0.000258544921875,
      "model_forward_time": 0.11534810066223145,
      "step": 42360
    },
    {
      "epoch": 0.000258544921875,
      "step": 42360,
      "training_step_time": 0.4365832805633545
    },
    {
      "epoch": 0.000258551025390625,
      "model_forward_time": 0.11478924751281738,
      "step": 42361
    },
    {
      "epoch": 0.000258551025390625,
      "step": 42361,
      "training_step_time": 0.47961854934692383
    },
    {
      "epoch": 0.00025855712890625,
      "model_forward_time": 0.11462640762329102,
      "step": 42362
    },
    {
      "epoch": 0.00025855712890625,
      "step": 42362,
      "training_step_time": 0.39370131492614746
    },
    {
      "epoch": 0.000258563232421875,
      "model_forward_time": 0.11538052558898926,
      "step": 42363
    },
    {
      "epoch": 0.000258563232421875,
      "step": 42363,
      "training_step_time": 0.3720376491546631
    },
    {
      "epoch": 0.0002585693359375,
      "model_forward_time": 0.11446070671081543,
      "step": 42364
    },
    {
      "epoch": 0.0002585693359375,
      "step": 42364,
      "training_step_time": 0.4260261058807373
    },
    {
      "epoch": 0.000258575439453125,
      "model_forward_time": 0.11507511138916016,
      "step": 42365
    },
    {
      "epoch": 0.000258575439453125,
      "step": 42365,
      "training_step_time": 0.38890814781188965
    },
    {
      "epoch": 0.00025858154296875,
      "model_forward_time": 0.1149599552154541,
      "step": 42366
    },
    {
      "epoch": 0.00025858154296875,
      "step": 42366,
      "training_step_time": 0.368269681930542
    },
    {
      "epoch": 0.000258587646484375,
      "model_forward_time": 0.11493372917175293,
      "step": 42367
    },
    {
      "epoch": 0.000258587646484375,
      "step": 42367,
      "training_step_time": 0.40172696113586426
    },
    {
      "epoch": 0.00025859375,
      "model_forward_time": 0.11584997177124023,
      "step": 42368
    },
    {
      "epoch": 0.00025859375,
      "step": 42368,
      "training_step_time": 0.41368818283081055
    },
    {
      "epoch": 0.000258599853515625,
      "model_forward_time": 0.11469841003417969,
      "step": 42369
    },
    {
      "epoch": 0.000258599853515625,
      "step": 42369,
      "training_step_time": 0.3962578773498535
    },
    {
      "epoch": 0.00025860595703125,
      "grad_norm": 0.11408668011426926,
      "learning_rate": 2.180472929523338e-05,
      "loss": 0.0372,
      "step": 42370
    },
    {
      "epoch": 0.00025860595703125,
      "model_forward_time": 0.11520648002624512,
      "step": 42370
    },
    {
      "epoch": 0.00025860595703125,
      "step": 42370,
      "training_step_time": 0.4348156452178955
    },
    {
      "epoch": 0.000258612060546875,
      "model_forward_time": 0.11499142646789551,
      "step": 42371
    },
    {
      "epoch": 0.000258612060546875,
      "step": 42371,
      "training_step_time": 0.437114953994751
    },
    {
      "epoch": 0.0002586181640625,
      "model_forward_time": 0.11440086364746094,
      "step": 42372
    },
    {
      "epoch": 0.0002586181640625,
      "step": 42372,
      "training_step_time": 0.3672919273376465
    },
    {
      "epoch": 0.000258624267578125,
      "model_forward_time": 0.11444377899169922,
      "step": 42373
    },
    {
      "epoch": 0.000258624267578125,
      "step": 42373,
      "training_step_time": 0.4716777801513672
    },
    {
      "epoch": 0.00025863037109375,
      "model_forward_time": 0.11542844772338867,
      "step": 42374
    },
    {
      "epoch": 0.00025863037109375,
      "step": 42374,
      "training_step_time": 0.4108588695526123
    },
    {
      "epoch": 0.000258636474609375,
      "model_forward_time": 0.11477828025817871,
      "step": 42375
    },
    {
      "epoch": 0.000258636474609375,
      "step": 42375,
      "training_step_time": 0.45864129066467285
    },
    {
      "epoch": 0.000258642578125,
      "model_forward_time": 0.11466526985168457,
      "step": 42376
    },
    {
      "epoch": 0.000258642578125,
      "step": 42376,
      "training_step_time": 0.3911099433898926
    },
    {
      "epoch": 0.000258648681640625,
      "model_forward_time": 0.11490035057067871,
      "step": 42377
    },
    {
      "epoch": 0.000258648681640625,
      "step": 42377,
      "training_step_time": 0.38968968391418457
    },
    {
      "epoch": 0.00025865478515625,
      "model_forward_time": 0.11525702476501465,
      "step": 42378
    },
    {
      "epoch": 0.00025865478515625,
      "step": 42378,
      "training_step_time": 0.37173986434936523
    },
    {
      "epoch": 0.000258660888671875,
      "model_forward_time": 0.11545467376708984,
      "step": 42379
    },
    {
      "epoch": 0.000258660888671875,
      "step": 42379,
      "training_step_time": 0.398939847946167
    },
    {
      "epoch": 0.0002586669921875,
      "grad_norm": 0.12012521922588348,
      "learning_rate": 2.1781975237350366e-05,
      "loss": 0.0458,
      "step": 42380
    },
    {
      "epoch": 0.0002586669921875,
      "model_forward_time": 0.11499810218811035,
      "step": 42380
    },
    {
      "epoch": 0.0002586669921875,
      "step": 42380,
      "training_step_time": 0.39830517768859863
    },
    {
      "epoch": 0.000258673095703125,
      "model_forward_time": 0.11689352989196777,
      "step": 42381
    },
    {
      "epoch": 0.000258673095703125,
      "step": 42381,
      "training_step_time": 0.39429163932800293
    },
    {
      "epoch": 0.00025867919921875,
      "model_forward_time": 0.11518263816833496,
      "step": 42382
    },
    {
      "epoch": 0.00025867919921875,
      "step": 42382,
      "training_step_time": 0.41240811347961426
    },
    {
      "epoch": 0.000258685302734375,
      "model_forward_time": 0.11517667770385742,
      "step": 42383
    },
    {
      "epoch": 0.000258685302734375,
      "step": 42383,
      "training_step_time": 0.3928391933441162
    },
    {
      "epoch": 0.00025869140625,
      "model_forward_time": 0.1146845817565918,
      "step": 42384
    },
    {
      "epoch": 0.00025869140625,
      "step": 42384,
      "training_step_time": 0.3670480251312256
    },
    {
      "epoch": 0.000258697509765625,
      "model_forward_time": 0.11436939239501953,
      "step": 42385
    },
    {
      "epoch": 0.000258697509765625,
      "step": 42385,
      "training_step_time": 0.44756388664245605
    },
    {
      "epoch": 0.00025870361328125,
      "model_forward_time": 0.1153104305267334,
      "step": 42386
    },
    {
      "epoch": 0.00025870361328125,
      "step": 42386,
      "training_step_time": 0.46577906608581543
    },
    {
      "epoch": 0.000258709716796875,
      "model_forward_time": 0.1145479679107666,
      "step": 42387
    },
    {
      "epoch": 0.000258709716796875,
      "step": 42387,
      "training_step_time": 0.39977455139160156
    },
    {
      "epoch": 0.0002587158203125,
      "model_forward_time": 0.11455535888671875,
      "step": 42388
    },
    {
      "epoch": 0.0002587158203125,
      "step": 42388,
      "training_step_time": 0.41298913955688477
    },
    {
      "epoch": 0.000258721923828125,
      "model_forward_time": 0.11492514610290527,
      "step": 42389
    },
    {
      "epoch": 0.000258721923828125,
      "step": 42389,
      "training_step_time": 0.40592002868652344
    },
    {
      "epoch": 0.00025872802734375,
      "grad_norm": 0.09315475821495056,
      "learning_rate": 2.1759229751358217e-05,
      "loss": 0.0377,
      "step": 42390
    },
    {
      "epoch": 0.00025872802734375,
      "model_forward_time": 0.11522579193115234,
      "step": 42390
    },
    {
      "epoch": 0.00025872802734375,
      "step": 42390,
      "training_step_time": 0.366840124130249
    },
    {
      "epoch": 0.000258734130859375,
      "model_forward_time": 0.11415767669677734,
      "step": 42391
    },
    {
      "epoch": 0.000258734130859375,
      "step": 42391,
      "training_step_time": 0.40224695205688477
    },
    {
      "epoch": 0.000258740234375,
      "model_forward_time": 0.11602902412414551,
      "step": 42392
    },
    {
      "epoch": 0.000258740234375,
      "step": 42392,
      "training_step_time": 0.39697933197021484
    },
    {
      "epoch": 0.000258746337890625,
      "model_forward_time": 0.11478900909423828,
      "step": 42393
    },
    {
      "epoch": 0.000258746337890625,
      "step": 42393,
      "training_step_time": 0.411975622177124
    },
    {
      "epoch": 0.00025875244140625,
      "model_forward_time": 0.11432647705078125,
      "step": 42394
    },
    {
      "epoch": 0.00025875244140625,
      "step": 42394,
      "training_step_time": 0.404665470123291
    },
    {
      "epoch": 0.000258758544921875,
      "model_forward_time": 0.11446332931518555,
      "step": 42395
    },
    {
      "epoch": 0.000258758544921875,
      "step": 42395,
      "training_step_time": 0.4005274772644043
    },
    {
      "epoch": 0.0002587646484375,
      "model_forward_time": 0.11469650268554688,
      "step": 42396
    },
    {
      "epoch": 0.0002587646484375,
      "step": 42396,
      "training_step_time": 0.3660454750061035
    },
    {
      "epoch": 0.000258770751953125,
      "model_forward_time": 0.1158454418182373,
      "step": 42397
    },
    {
      "epoch": 0.000258770751953125,
      "step": 42397,
      "training_step_time": 0.40764594078063965
    },
    {
      "epoch": 0.00025877685546875,
      "model_forward_time": 0.11439275741577148,
      "step": 42398
    },
    {
      "epoch": 0.00025877685546875,
      "step": 42398,
      "training_step_time": 0.39891505241394043
    },
    {
      "epoch": 0.000258782958984375,
      "model_forward_time": 0.11418771743774414,
      "step": 42399
    },
    {
      "epoch": 0.000258782958984375,
      "step": 42399,
      "training_step_time": 0.4662766456604004
    },
    {
      "epoch": 0.0002587890625,
      "grad_norm": 0.16397391259670258,
      "learning_rate": 2.1736492844166407e-05,
      "loss": 0.0347,
      "step": 42400
    },
    {
      "epoch": 0.0002587890625,
      "model_forward_time": 0.11503076553344727,
      "step": 42400
    },
    {
      "epoch": 0.0002587890625,
      "step": 42400,
      "training_step_time": 0.468656063079834
    },
    {
      "epoch": 0.000258795166015625,
      "model_forward_time": 0.11500835418701172,
      "step": 42401
    },
    {
      "epoch": 0.000258795166015625,
      "step": 42401,
      "training_step_time": 0.40099167823791504
    },
    {
      "epoch": 0.00025880126953125,
      "model_forward_time": 0.11564016342163086,
      "step": 42402
    },
    {
      "epoch": 0.00025880126953125,
      "step": 42402,
      "training_step_time": 0.4664735794067383
    },
    {
      "epoch": 0.000258807373046875,
      "model_forward_time": 0.11448097229003906,
      "step": 42403
    },
    {
      "epoch": 0.000258807373046875,
      "step": 42403,
      "training_step_time": 0.39870238304138184
    },
    {
      "epoch": 0.0002588134765625,
      "model_forward_time": 0.11484646797180176,
      "step": 42404
    },
    {
      "epoch": 0.0002588134765625,
      "step": 42404,
      "training_step_time": 0.4187657833099365
    },
    {
      "epoch": 0.000258819580078125,
      "model_forward_time": 0.11469221115112305,
      "step": 42405
    },
    {
      "epoch": 0.000258819580078125,
      "step": 42405,
      "training_step_time": 0.3943343162536621
    },
    {
      "epoch": 0.00025882568359375,
      "model_forward_time": 0.11529135704040527,
      "step": 42406
    },
    {
      "epoch": 0.00025882568359375,
      "step": 42406,
      "training_step_time": 0.4008476734161377
    },
    {
      "epoch": 0.000258831787109375,
      "model_forward_time": 0.11471939086914062,
      "step": 42407
    },
    {
      "epoch": 0.000258831787109375,
      "step": 42407,
      "training_step_time": 0.38456249237060547
    },
    {
      "epoch": 0.000258837890625,
      "model_forward_time": 0.11465167999267578,
      "step": 42408
    },
    {
      "epoch": 0.000258837890625,
      "step": 42408,
      "training_step_time": 0.36619043350219727
    },
    {
      "epoch": 0.000258843994140625,
      "model_forward_time": 0.11455535888671875,
      "step": 42409
    },
    {
      "epoch": 0.000258843994140625,
      "step": 42409,
      "training_step_time": 0.39855051040649414
    },
    {
      "epoch": 0.00025885009765625,
      "grad_norm": 0.10767550766468048,
      "learning_rate": 2.1713764522681846e-05,
      "loss": 0.0358,
      "step": 42410
    },
    {
      "epoch": 0.00025885009765625,
      "model_forward_time": 0.11514115333557129,
      "step": 42410
    },
    {
      "epoch": 0.00025885009765625,
      "step": 42410,
      "training_step_time": 0.4019136428833008
    },
    {
      "epoch": 0.000258856201171875,
      "model_forward_time": 0.11597251892089844,
      "step": 42411
    },
    {
      "epoch": 0.000258856201171875,
      "step": 42411,
      "training_step_time": 0.40035581588745117
    },
    {
      "epoch": 0.0002588623046875,
      "model_forward_time": 0.11470627784729004,
      "step": 42412
    },
    {
      "epoch": 0.0002588623046875,
      "step": 42412,
      "training_step_time": 0.39238476753234863
    },
    {
      "epoch": 0.000258868408203125,
      "model_forward_time": 0.1155548095703125,
      "step": 42413
    },
    {
      "epoch": 0.000258868408203125,
      "step": 42413,
      "training_step_time": 0.47333669662475586
    },
    {
      "epoch": 0.00025887451171875,
      "model_forward_time": 0.11439943313598633,
      "step": 42414
    },
    {
      "epoch": 0.00025887451171875,
      "step": 42414,
      "training_step_time": 0.37181830406188965
    },
    {
      "epoch": 0.000258880615234375,
      "model_forward_time": 0.11528372764587402,
      "step": 42415
    },
    {
      "epoch": 0.000258880615234375,
      "step": 42415,
      "training_step_time": 0.4405629634857178
    },
    {
      "epoch": 0.00025888671875,
      "model_forward_time": 0.11436128616333008,
      "step": 42416
    },
    {
      "epoch": 0.00025888671875,
      "step": 42416,
      "training_step_time": 0.4286034107208252
    },
    {
      "epoch": 0.000258892822265625,
      "model_forward_time": 0.11433172225952148,
      "step": 42417
    },
    {
      "epoch": 0.000258892822265625,
      "step": 42417,
      "training_step_time": 0.3991811275482178
    },
    {
      "epoch": 0.00025889892578125,
      "model_forward_time": 0.11415696144104004,
      "step": 42418
    },
    {
      "epoch": 0.00025889892578125,
      "step": 42418,
      "training_step_time": 0.44153594970703125
    },
    {
      "epoch": 0.000258905029296875,
      "model_forward_time": 0.11439299583435059,
      "step": 42419
    },
    {
      "epoch": 0.000258905029296875,
      "step": 42419,
      "training_step_time": 0.3870518207550049
    },
    {
      "epoch": 0.0002589111328125,
      "grad_norm": 0.12176018208265305,
      "learning_rate": 2.1691044793808734e-05,
      "loss": 0.0397,
      "step": 42420
    },
    {
      "epoch": 0.0002589111328125,
      "model_forward_time": 0.11553788185119629,
      "step": 42420
    },
    {
      "epoch": 0.0002589111328125,
      "step": 42420,
      "training_step_time": 0.3811206817626953
    },
    {
      "epoch": 0.000258917236328125,
      "model_forward_time": 0.11484813690185547,
      "step": 42421
    },
    {
      "epoch": 0.000258917236328125,
      "step": 42421,
      "training_step_time": 0.39557385444641113
    },
    {
      "epoch": 0.00025892333984375,
      "model_forward_time": 0.11470460891723633,
      "step": 42422
    },
    {
      "epoch": 0.00025892333984375,
      "step": 42422,
      "training_step_time": 0.4087493419647217
    },
    {
      "epoch": 0.000258929443359375,
      "model_forward_time": 0.11496138572692871,
      "step": 42423
    },
    {
      "epoch": 0.000258929443359375,
      "step": 42423,
      "training_step_time": 0.3875417709350586
    },
    {
      "epoch": 0.000258935546875,
      "model_forward_time": 0.11457061767578125,
      "step": 42424
    },
    {
      "epoch": 0.000258935546875,
      "step": 42424,
      "training_step_time": 0.3917975425720215
    },
    {
      "epoch": 0.000258941650390625,
      "model_forward_time": 0.11555600166320801,
      "step": 42425
    },
    {
      "epoch": 0.000258941650390625,
      "step": 42425,
      "training_step_time": 0.4013786315917969
    },
    {
      "epoch": 0.00025894775390625,
      "model_forward_time": 0.11444950103759766,
      "step": 42426
    },
    {
      "epoch": 0.00025894775390625,
      "step": 42426,
      "training_step_time": 0.36160850524902344
    },
    {
      "epoch": 0.000258953857421875,
      "model_forward_time": 0.1154017448425293,
      "step": 42427
    },
    {
      "epoch": 0.000258953857421875,
      "step": 42427,
      "training_step_time": 0.4393184185028076
    },
    {
      "epoch": 0.0002589599609375,
      "model_forward_time": 0.1146390438079834,
      "step": 42428
    },
    {
      "epoch": 0.0002589599609375,
      "step": 42428,
      "training_step_time": 0.4069514274597168
    },
    {
      "epoch": 0.000258966064453125,
      "model_forward_time": 0.11559224128723145,
      "step": 42429
    },
    {
      "epoch": 0.000258966064453125,
      "step": 42429,
      "training_step_time": 0.43256449699401855
    },
    {
      "epoch": 0.00025897216796875,
      "grad_norm": 0.11675557494163513,
      "learning_rate": 2.1668333664448776e-05,
      "loss": 0.0363,
      "step": 42430
    },
    {
      "epoch": 0.00025897216796875,
      "model_forward_time": 0.11460328102111816,
      "step": 42430
    },
    {
      "epoch": 0.00025897216796875,
      "step": 42430,
      "training_step_time": 0.5053796768188477
    },
    {
      "epoch": 0.000258978271484375,
      "model_forward_time": 0.11482810974121094,
      "step": 42431
    },
    {
      "epoch": 0.000258978271484375,
      "step": 42431,
      "training_step_time": 0.46291279792785645
    },
    {
      "epoch": 0.000258984375,
      "model_forward_time": 0.11561822891235352,
      "step": 42432
    },
    {
      "epoch": 0.000258984375,
      "step": 42432,
      "training_step_time": 0.4606759548187256
    },
    {
      "epoch": 0.000258990478515625,
      "model_forward_time": 0.11425590515136719,
      "step": 42433
    },
    {
      "epoch": 0.000258990478515625,
      "step": 42433,
      "training_step_time": 0.3940122127532959
    },
    {
      "epoch": 0.00025899658203125,
      "model_forward_time": 0.11462903022766113,
      "step": 42434
    },
    {
      "epoch": 0.00025899658203125,
      "step": 42434,
      "training_step_time": 0.39398741722106934
    },
    {
      "epoch": 0.000259002685546875,
      "model_forward_time": 0.11445379257202148,
      "step": 42435
    },
    {
      "epoch": 0.000259002685546875,
      "step": 42435,
      "training_step_time": 0.3903355598449707
    },
    {
      "epoch": 0.0002590087890625,
      "model_forward_time": 0.11458182334899902,
      "step": 42436
    },
    {
      "epoch": 0.0002590087890625,
      "step": 42436,
      "training_step_time": 0.38674306869506836
    },
    {
      "epoch": 0.000259014892578125,
      "model_forward_time": 0.11478710174560547,
      "step": 42437
    },
    {
      "epoch": 0.000259014892578125,
      "step": 42437,
      "training_step_time": 0.3979966640472412
    },
    {
      "epoch": 0.00025902099609375,
      "model_forward_time": 0.11556172370910645,
      "step": 42438
    },
    {
      "epoch": 0.00025902099609375,
      "step": 42438,
      "training_step_time": 0.3859982490539551
    },
    {
      "epoch": 0.000259027099609375,
      "model_forward_time": 0.11553788185119629,
      "step": 42439
    },
    {
      "epoch": 0.000259027099609375,
      "step": 42439,
      "training_step_time": 0.3970191478729248
    },
    {
      "epoch": 0.000259033203125,
      "grad_norm": 0.1490819901227951,
      "learning_rate": 2.1645631141500994e-05,
      "loss": 0.0409,
      "step": 42440
    },
    {
      "epoch": 0.000259033203125,
      "model_forward_time": 0.1156473159790039,
      "step": 42440
    },
    {
      "epoch": 0.000259033203125,
      "step": 42440,
      "training_step_time": 0.4496898651123047
    },
    {
      "epoch": 0.000259039306640625,
      "model_forward_time": 0.1150820255279541,
      "step": 42441
    },
    {
      "epoch": 0.000259039306640625,
      "step": 42441,
      "training_step_time": 0.43913912773132324
    },
    {
      "epoch": 0.00025904541015625,
      "model_forward_time": 0.1149749755859375,
      "step": 42442
    },
    {
      "epoch": 0.00025904541015625,
      "step": 42442,
      "training_step_time": 0.39667391777038574
    },
    {
      "epoch": 0.000259051513671875,
      "model_forward_time": 0.11533808708190918,
      "step": 42443
    },
    {
      "epoch": 0.000259051513671875,
      "step": 42443,
      "training_step_time": 0.36849236488342285
    },
    {
      "epoch": 0.0002590576171875,
      "model_forward_time": 0.11531496047973633,
      "step": 42444
    },
    {
      "epoch": 0.0002590576171875,
      "step": 42444,
      "training_step_time": 0.42900991439819336
    },
    {
      "epoch": 0.000259063720703125,
      "model_forward_time": 0.11531853675842285,
      "step": 42445
    },
    {
      "epoch": 0.000259063720703125,
      "step": 42445,
      "training_step_time": 0.48716115951538086
    },
    {
      "epoch": 0.00025906982421875,
      "model_forward_time": 0.11465930938720703,
      "step": 42446
    },
    {
      "epoch": 0.00025906982421875,
      "step": 42446,
      "training_step_time": 0.406846284866333
    },
    {
      "epoch": 0.000259075927734375,
      "model_forward_time": 0.11466002464294434,
      "step": 42447
    },
    {
      "epoch": 0.000259075927734375,
      "step": 42447,
      "training_step_time": 0.4601280689239502
    },
    {
      "epoch": 0.00025908203125,
      "model_forward_time": 0.11466240882873535,
      "step": 42448
    },
    {
      "epoch": 0.00025908203125,
      "step": 42448,
      "training_step_time": 0.3853421211242676
    },
    {
      "epoch": 0.000259088134765625,
      "model_forward_time": 0.11477255821228027,
      "step": 42449
    },
    {
      "epoch": 0.000259088134765625,
      "step": 42449,
      "training_step_time": 0.41059017181396484
    },
    {
      "epoch": 0.00025909423828125,
      "grad_norm": 0.08287352323532104,
      "learning_rate": 2.1622937231861822e-05,
      "loss": 0.0364,
      "step": 42450
    },
    {
      "epoch": 0.00025909423828125,
      "model_forward_time": 0.1147768497467041,
      "step": 42450
    },
    {
      "epoch": 0.00025909423828125,
      "step": 42450,
      "training_step_time": 0.3649458885192871
    },
    {
      "epoch": 0.000259100341796875,
      "model_forward_time": 0.11476635932922363,
      "step": 42451
    },
    {
      "epoch": 0.000259100341796875,
      "step": 42451,
      "training_step_time": 0.3990054130554199
    },
    {
      "epoch": 0.0002591064453125,
      "model_forward_time": 0.11577653884887695,
      "step": 42452
    },
    {
      "epoch": 0.0002591064453125,
      "step": 42452,
      "training_step_time": 0.39957737922668457
    },
    {
      "epoch": 0.000259112548828125,
      "model_forward_time": 0.11603188514709473,
      "step": 42453
    },
    {
      "epoch": 0.000259112548828125,
      "step": 42453,
      "training_step_time": 0.4363701343536377
    },
    {
      "epoch": 0.00025911865234375,
      "model_forward_time": 0.11509251594543457,
      "step": 42454
    },
    {
      "epoch": 0.00025911865234375,
      "step": 42454,
      "training_step_time": 0.41060829162597656
    },
    {
      "epoch": 0.000259124755859375,
      "model_forward_time": 0.11515092849731445,
      "step": 42455
    },
    {
      "epoch": 0.000259124755859375,
      "step": 42455,
      "training_step_time": 0.3966963291168213
    },
    {
      "epoch": 0.000259130859375,
      "model_forward_time": 0.11424851417541504,
      "step": 42456
    },
    {
      "epoch": 0.000259130859375,
      "step": 42456,
      "training_step_time": 0.36660170555114746
    },
    {
      "epoch": 0.000259136962890625,
      "model_forward_time": 0.11467289924621582,
      "step": 42457
    },
    {
      "epoch": 0.000259136962890625,
      "step": 42457,
      "training_step_time": 0.4154481887817383
    },
    {
      "epoch": 0.00025914306640625,
      "model_forward_time": 0.11484169960021973,
      "step": 42458
    },
    {
      "epoch": 0.00025914306640625,
      "step": 42458,
      "training_step_time": 0.41164278984069824
    },
    {
      "epoch": 0.000259149169921875,
      "model_forward_time": 0.1147620677947998,
      "step": 42459
    },
    {
      "epoch": 0.000259149169921875,
      "step": 42459,
      "training_step_time": 0.46899890899658203
    },
    {
      "epoch": 0.0002591552734375,
      "grad_norm": 0.13820932805538177,
      "learning_rate": 2.1600251942425066e-05,
      "loss": 0.0408,
      "step": 42460
    },
    {
      "epoch": 0.0002591552734375,
      "model_forward_time": 0.11515522003173828,
      "step": 42460
    },
    {
      "epoch": 0.0002591552734375,
      "step": 42460,
      "training_step_time": 0.4134531021118164
    },
    {
      "epoch": 0.000259161376953125,
      "model_forward_time": 0.11487340927124023,
      "step": 42461
    },
    {
      "epoch": 0.000259161376953125,
      "step": 42461,
      "training_step_time": 0.4592924118041992
    },
    {
      "epoch": 0.00025916748046875,
      "model_forward_time": 0.11497139930725098,
      "step": 42462
    },
    {
      "epoch": 0.00025916748046875,
      "step": 42462,
      "training_step_time": 0.38530445098876953
    },
    {
      "epoch": 0.000259173583984375,
      "model_forward_time": 0.11492347717285156,
      "step": 42463
    },
    {
      "epoch": 0.000259173583984375,
      "step": 42463,
      "training_step_time": 0.4017598628997803
    },
    {
      "epoch": 0.0002591796875,
      "model_forward_time": 0.11562442779541016,
      "step": 42464
    },
    {
      "epoch": 0.0002591796875,
      "step": 42464,
      "training_step_time": 0.4185645580291748
    },
    {
      "epoch": 0.000259185791015625,
      "model_forward_time": 0.11543083190917969,
      "step": 42465
    },
    {
      "epoch": 0.000259185791015625,
      "step": 42465,
      "training_step_time": 0.39026880264282227
    },
    {
      "epoch": 0.00025919189453125,
      "model_forward_time": 0.11477446556091309,
      "step": 42466
    },
    {
      "epoch": 0.00025919189453125,
      "step": 42466,
      "training_step_time": 0.3874838352203369
    },
    {
      "epoch": 0.000259197998046875,
      "model_forward_time": 0.11495733261108398,
      "step": 42467
    },
    {
      "epoch": 0.000259197998046875,
      "step": 42467,
      "training_step_time": 0.4086728096008301
    },
    {
      "epoch": 0.0002592041015625,
      "model_forward_time": 0.11587810516357422,
      "step": 42468
    },
    {
      "epoch": 0.0002592041015625,
      "step": 42468,
      "training_step_time": 0.3978440761566162
    },
    {
      "epoch": 0.000259210205078125,
      "model_forward_time": 0.11565279960632324,
      "step": 42469
    },
    {
      "epoch": 0.000259210205078125,
      "step": 42469,
      "training_step_time": 0.4106423854827881
    },
    {
      "epoch": 0.00025921630859375,
      "grad_norm": 0.11770796030759811,
      "learning_rate": 2.1577575280081896e-05,
      "loss": 0.0387,
      "step": 42470
    },
    {
      "epoch": 0.00025921630859375,
      "model_forward_time": 0.11490583419799805,
      "step": 42470
    },
    {
      "epoch": 0.00025921630859375,
      "step": 42470,
      "training_step_time": 0.4015533924102783
    },
    {
      "epoch": 0.000259222412109375,
      "model_forward_time": 0.11568784713745117,
      "step": 42471
    },
    {
      "epoch": 0.000259222412109375,
      "step": 42471,
      "training_step_time": 0.3976261615753174
    },
    {
      "epoch": 0.000259228515625,
      "model_forward_time": 0.11557984352111816,
      "step": 42472
    },
    {
      "epoch": 0.000259228515625,
      "step": 42472,
      "training_step_time": 0.44351768493652344
    },
    {
      "epoch": 0.000259234619140625,
      "model_forward_time": 0.11468362808227539,
      "step": 42473
    },
    {
      "epoch": 0.000259234619140625,
      "step": 42473,
      "training_step_time": 0.5151782035827637
    },
    {
      "epoch": 0.00025924072265625,
      "model_forward_time": 0.11529302597045898,
      "step": 42474
    },
    {
      "epoch": 0.00025924072265625,
      "step": 42474,
      "training_step_time": 0.4909825325012207
    },
    {
      "epoch": 0.000259246826171875,
      "model_forward_time": 0.11449265480041504,
      "step": 42475
    },
    {
      "epoch": 0.000259246826171875,
      "step": 42475,
      "training_step_time": 0.5164191722869873
    },
    {
      "epoch": 0.0002592529296875,
      "model_forward_time": 0.11424589157104492,
      "step": 42476
    },
    {
      "epoch": 0.0002592529296875,
      "step": 42476,
      "training_step_time": 0.3950786590576172
    },
    {
      "epoch": 0.000259259033203125,
      "model_forward_time": 0.11417865753173828,
      "step": 42477
    },
    {
      "epoch": 0.000259259033203125,
      "step": 42477,
      "training_step_time": 0.3854391574859619
    },
    {
      "epoch": 0.00025926513671875,
      "model_forward_time": 0.11440014839172363,
      "step": 42478
    },
    {
      "epoch": 0.00025926513671875,
      "step": 42478,
      "training_step_time": 0.38623046875
    },
    {
      "epoch": 0.000259271240234375,
      "model_forward_time": 0.11540555953979492,
      "step": 42479
    },
    {
      "epoch": 0.000259271240234375,
      "step": 42479,
      "training_step_time": 0.3947336673736572
    },
    {
      "epoch": 0.00025927734375,
      "grad_norm": 0.15074509382247925,
      "learning_rate": 2.1554907251720945e-05,
      "loss": 0.0421,
      "step": 42480
    },
    {
      "epoch": 0.00025927734375,
      "model_forward_time": 0.11523652076721191,
      "step": 42480
    },
    {
      "epoch": 0.00025927734375,
      "step": 42480,
      "training_step_time": 0.375
    },
    {
      "epoch": 0.000259283447265625,
      "model_forward_time": 0.11449790000915527,
      "step": 42481
    },
    {
      "epoch": 0.000259283447265625,
      "step": 42481,
      "training_step_time": 0.4078686237335205
    },
    {
      "epoch": 0.00025928955078125,
      "model_forward_time": 0.11470341682434082,
      "step": 42482
    },
    {
      "epoch": 0.00025928955078125,
      "step": 42482,
      "training_step_time": 0.39751243591308594
    },
    {
      "epoch": 0.000259295654296875,
      "model_forward_time": 0.11509537696838379,
      "step": 42483
    },
    {
      "epoch": 0.000259295654296875,
      "step": 42483,
      "training_step_time": 0.3912472724914551
    },
    {
      "epoch": 0.0002593017578125,
      "model_forward_time": 0.11591958999633789,
      "step": 42484
    },
    {
      "epoch": 0.0002593017578125,
      "step": 42484,
      "training_step_time": 0.3914337158203125
    },
    {
      "epoch": 0.000259307861328125,
      "model_forward_time": 0.11503386497497559,
      "step": 42485
    },
    {
      "epoch": 0.000259307861328125,
      "step": 42485,
      "training_step_time": 0.45269083976745605
    },
    {
      "epoch": 0.00025931396484375,
      "model_forward_time": 0.11515474319458008,
      "step": 42486
    },
    {
      "epoch": 0.00025931396484375,
      "step": 42486,
      "training_step_time": 0.3881957530975342
    },
    {
      "epoch": 0.000259320068359375,
      "model_forward_time": 0.11519217491149902,
      "step": 42487
    },
    {
      "epoch": 0.000259320068359375,
      "step": 42487,
      "training_step_time": 0.5088129043579102
    },
    {
      "epoch": 0.000259326171875,
      "model_forward_time": 0.11487889289855957,
      "step": 42488
    },
    {
      "epoch": 0.000259326171875,
      "step": 42488,
      "training_step_time": 0.44603919982910156
    },
    {
      "epoch": 0.000259332275390625,
      "model_forward_time": 0.11473250389099121,
      "step": 42489
    },
    {
      "epoch": 0.000259332275390625,
      "step": 42489,
      "training_step_time": 0.42564940452575684
    },
    {
      "epoch": 0.00025933837890625,
      "grad_norm": 0.13384291529655457,
      "learning_rate": 2.1532247864228084e-05,
      "loss": 0.0377,
      "step": 42490
    },
    {
      "epoch": 0.00025933837890625,
      "model_forward_time": 0.11501383781433105,
      "step": 42490
    },
    {
      "epoch": 0.00025933837890625,
      "step": 42490,
      "training_step_time": 0.46387410163879395
    },
    {
      "epoch": 0.000259344482421875,
      "model_forward_time": 0.11492347717285156,
      "step": 42491
    },
    {
      "epoch": 0.000259344482421875,
      "step": 42491,
      "training_step_time": 0.39725327491760254
    },
    {
      "epoch": 0.0002593505859375,
      "model_forward_time": 0.1151125431060791,
      "step": 42492
    },
    {
      "epoch": 0.0002593505859375,
      "step": 42492,
      "training_step_time": 0.3690354824066162
    },
    {
      "epoch": 0.000259356689453125,
      "model_forward_time": 0.11544466018676758,
      "step": 42493
    },
    {
      "epoch": 0.000259356689453125,
      "step": 42493,
      "training_step_time": 0.400531530380249
    },
    {
      "epoch": 0.00025936279296875,
      "model_forward_time": 0.11504244804382324,
      "step": 42494
    },
    {
      "epoch": 0.00025936279296875,
      "step": 42494,
      "training_step_time": 0.42299604415893555
    },
    {
      "epoch": 0.000259368896484375,
      "model_forward_time": 0.11466670036315918,
      "step": 42495
    },
    {
      "epoch": 0.000259368896484375,
      "step": 42495,
      "training_step_time": 0.42592597007751465
    },
    {
      "epoch": 0.000259375,
      "model_forward_time": 0.11529684066772461,
      "step": 42496
    },
    {
      "epoch": 0.000259375,
      "step": 42496,
      "training_step_time": 0.3906404972076416
    },
    {
      "epoch": 0.000259381103515625,
      "model_forward_time": 0.11502838134765625,
      "step": 42497
    },
    {
      "epoch": 0.000259381103515625,
      "step": 42497,
      "training_step_time": 0.40482115745544434
    },
    {
      "epoch": 0.00025938720703125,
      "model_forward_time": 0.11523580551147461,
      "step": 42498
    },
    {
      "epoch": 0.00025938720703125,
      "step": 42498,
      "training_step_time": 0.36981725692749023
    },
    {
      "epoch": 0.000259393310546875,
      "model_forward_time": 0.1148061752319336,
      "step": 42499
    },
    {
      "epoch": 0.000259393310546875,
      "step": 42499,
      "training_step_time": 0.40997815132141113
    },
    {
      "epoch": 0.0002593994140625,
      "grad_norm": 0.09042766690254211,
      "learning_rate": 2.150959712448669e-05,
      "loss": 0.0349,
      "step": 42500
    },
    {
      "epoch": 0.0002593994140625,
      "model_forward_time": 0.11478972434997559,
      "step": 42500
    },
    {
      "epoch": 0.0002593994140625,
      "step": 42500,
      "training_step_time": 0.40733814239501953
    },
    {
      "epoch": 0.000259405517578125,
      "model_forward_time": 0.11640644073486328,
      "step": 42501
    },
    {
      "epoch": 0.000259405517578125,
      "step": 42501,
      "training_step_time": 0.5356433391571045
    },
    {
      "epoch": 0.00025941162109375,
      "model_forward_time": 0.11519646644592285,
      "step": 42502
    },
    {
      "epoch": 0.00025941162109375,
      "step": 42502,
      "training_step_time": 0.4112062454223633
    },
    {
      "epoch": 0.000259417724609375,
      "model_forward_time": 0.1143639087677002,
      "step": 42503
    },
    {
      "epoch": 0.000259417724609375,
      "step": 42503,
      "training_step_time": 0.4574873447418213
    },
    {
      "epoch": 0.000259423828125,
      "model_forward_time": 0.11566925048828125,
      "step": 42504
    },
    {
      "epoch": 0.000259423828125,
      "step": 42504,
      "training_step_time": 0.4274907112121582
    },
    {
      "epoch": 0.000259429931640625,
      "model_forward_time": 0.11447262763977051,
      "step": 42505
    },
    {
      "epoch": 0.000259429931640625,
      "step": 42505,
      "training_step_time": 0.4673137664794922
    },
    {
      "epoch": 0.00025943603515625,
      "model_forward_time": 0.11482644081115723,
      "step": 42506
    },
    {
      "epoch": 0.00025943603515625,
      "step": 42506,
      "training_step_time": 0.4028804302215576
    },
    {
      "epoch": 0.000259442138671875,
      "model_forward_time": 0.11480426788330078,
      "step": 42507
    },
    {
      "epoch": 0.000259442138671875,
      "step": 42507,
      "training_step_time": 0.4154696464538574
    },
    {
      "epoch": 0.0002594482421875,
      "model_forward_time": 0.11601114273071289,
      "step": 42508
    },
    {
      "epoch": 0.0002594482421875,
      "step": 42508,
      "training_step_time": 0.4738438129425049
    },
    {
      "epoch": 0.000259454345703125,
      "model_forward_time": 0.11455392837524414,
      "step": 42509
    },
    {
      "epoch": 0.000259454345703125,
      "step": 42509,
      "training_step_time": 0.4047565460205078
    },
    {
      "epoch": 0.00025946044921875,
      "grad_norm": 0.12363211065530777,
      "learning_rate": 2.148695503937745e-05,
      "loss": 0.0366,
      "step": 42510
    },
    {
      "epoch": 0.00025946044921875,
      "model_forward_time": 0.11621308326721191,
      "step": 42510
    },
    {
      "epoch": 0.00025946044921875,
      "step": 42510,
      "training_step_time": 0.379622220993042
    },
    {
      "epoch": 0.000259466552734375,
      "model_forward_time": 0.11554765701293945,
      "step": 42511
    },
    {
      "epoch": 0.000259466552734375,
      "step": 42511,
      "training_step_time": 0.3930068016052246
    },
    {
      "epoch": 0.00025947265625,
      "model_forward_time": 0.11477828025817871,
      "step": 42512
    },
    {
      "epoch": 0.00025947265625,
      "step": 42512,
      "training_step_time": 0.40192127227783203
    },
    {
      "epoch": 0.000259478759765625,
      "model_forward_time": 0.11487650871276855,
      "step": 42513
    },
    {
      "epoch": 0.000259478759765625,
      "step": 42513,
      "training_step_time": 0.3965585231781006
    },
    {
      "epoch": 0.00025948486328125,
      "model_forward_time": 0.11473894119262695,
      "step": 42514
    },
    {
      "epoch": 0.00025948486328125,
      "step": 42514,
      "training_step_time": 0.39699411392211914
    },
    {
      "epoch": 0.000259490966796875,
      "model_forward_time": 0.11545801162719727,
      "step": 42515
    },
    {
      "epoch": 0.000259490966796875,
      "step": 42515,
      "training_step_time": 0.4089956283569336
    },
    {
      "epoch": 0.0002594970703125,
      "model_forward_time": 0.11481118202209473,
      "step": 42516
    },
    {
      "epoch": 0.0002594970703125,
      "step": 42516,
      "training_step_time": 0.36779212951660156
    },
    {
      "epoch": 0.000259503173828125,
      "model_forward_time": 0.11473703384399414,
      "step": 42517
    },
    {
      "epoch": 0.000259503173828125,
      "step": 42517,
      "training_step_time": 0.43137550354003906
    },
    {
      "epoch": 0.00025950927734375,
      "model_forward_time": 0.1149599552154541,
      "step": 42518
    },
    {
      "epoch": 0.00025950927734375,
      "step": 42518,
      "training_step_time": 0.41727399826049805
    },
    {
      "epoch": 0.000259515380859375,
      "model_forward_time": 0.11527538299560547,
      "step": 42519
    },
    {
      "epoch": 0.000259515380859375,
      "step": 42519,
      "training_step_time": 0.4565420150756836
    },
    {
      "epoch": 0.000259521484375,
      "grad_norm": 0.08277396112680435,
      "learning_rate": 2.1464321615778422e-05,
      "loss": 0.0342,
      "step": 42520
    },
    {
      "epoch": 0.000259521484375,
      "model_forward_time": 0.11470603942871094,
      "step": 42520
    },
    {
      "epoch": 0.000259521484375,
      "step": 42520,
      "training_step_time": 0.3892369270324707
    },
    {
      "epoch": 0.000259527587890625,
      "model_forward_time": 0.11466121673583984,
      "step": 42521
    },
    {
      "epoch": 0.000259527587890625,
      "step": 42521,
      "training_step_time": 0.41882872581481934
    },
    {
      "epoch": 0.00025953369140625,
      "model_forward_time": 0.1149907112121582,
      "step": 42522
    },
    {
      "epoch": 0.00025953369140625,
      "step": 42522,
      "training_step_time": 0.368131160736084
    },
    {
      "epoch": 0.000259539794921875,
      "model_forward_time": 0.11563968658447266,
      "step": 42523
    },
    {
      "epoch": 0.000259539794921875,
      "step": 42523,
      "training_step_time": 0.39197397232055664
    },
    {
      "epoch": 0.0002595458984375,
      "model_forward_time": 0.11538314819335938,
      "step": 42524
    },
    {
      "epoch": 0.0002595458984375,
      "step": 42524,
      "training_step_time": 0.40913891792297363
    },
    {
      "epoch": 0.000259552001953125,
      "model_forward_time": 0.11476302146911621,
      "step": 42525
    },
    {
      "epoch": 0.000259552001953125,
      "step": 42525,
      "training_step_time": 0.41023826599121094
    },
    {
      "epoch": 0.00025955810546875,
      "model_forward_time": 0.11461377143859863,
      "step": 42526
    },
    {
      "epoch": 0.00025955810546875,
      "step": 42526,
      "training_step_time": 0.38800597190856934
    },
    {
      "epoch": 0.000259564208984375,
      "model_forward_time": 0.11502480506896973,
      "step": 42527
    },
    {
      "epoch": 0.000259564208984375,
      "step": 42527,
      "training_step_time": 0.4486262798309326
    },
    {
      "epoch": 0.0002595703125,
      "model_forward_time": 0.11450886726379395,
      "step": 42528
    },
    {
      "epoch": 0.0002595703125,
      "step": 42528,
      "training_step_time": 0.3692049980163574
    },
    {
      "epoch": 0.000259576416015625,
      "model_forward_time": 0.11525535583496094,
      "step": 42529
    },
    {
      "epoch": 0.000259576416015625,
      "step": 42529,
      "training_step_time": 0.39623594284057617
    },
    {
      "epoch": 0.00025958251953125,
      "grad_norm": 0.10066699981689453,
      "learning_rate": 2.1441696860565048e-05,
      "loss": 0.041,
      "step": 42530
    },
    {
      "epoch": 0.00025958251953125,
      "model_forward_time": 0.11512422561645508,
      "step": 42530
    },
    {
      "epoch": 0.00025958251953125,
      "step": 42530,
      "training_step_time": 0.3749532699584961
    },
    {
      "epoch": 0.000259588623046875,
      "model_forward_time": 0.11522603034973145,
      "step": 42531
    },
    {
      "epoch": 0.000259588623046875,
      "step": 42531,
      "training_step_time": 0.46593475341796875
    },
    {
      "epoch": 0.0002595947265625,
      "model_forward_time": 0.11444783210754395,
      "step": 42532
    },
    {
      "epoch": 0.0002595947265625,
      "step": 42532,
      "training_step_time": 0.48047471046447754
    },
    {
      "epoch": 0.000259600830078125,
      "model_forward_time": 0.11469197273254395,
      "step": 42533
    },
    {
      "epoch": 0.000259600830078125,
      "step": 42533,
      "training_step_time": 0.4791090488433838
    },
    {
      "epoch": 0.00025960693359375,
      "model_forward_time": 0.11486387252807617,
      "step": 42534
    },
    {
      "epoch": 0.00025960693359375,
      "step": 42534,
      "training_step_time": 0.3671269416809082
    },
    {
      "epoch": 0.000259613037109375,
      "model_forward_time": 0.11528563499450684,
      "step": 42535
    },
    {
      "epoch": 0.000259613037109375,
      "step": 42535,
      "training_step_time": 0.41733741760253906
    },
    {
      "epoch": 0.000259619140625,
      "model_forward_time": 0.1145331859588623,
      "step": 42536
    },
    {
      "epoch": 0.000259619140625,
      "step": 42536,
      "training_step_time": 0.3918766975402832
    },
    {
      "epoch": 0.000259625244140625,
      "model_forward_time": 0.1142127513885498,
      "step": 42537
    },
    {
      "epoch": 0.000259625244140625,
      "step": 42537,
      "training_step_time": 0.4028027057647705
    },
    {
      "epoch": 0.00025963134765625,
      "model_forward_time": 0.1156911849975586,
      "step": 42538
    },
    {
      "epoch": 0.00025963134765625,
      "step": 42538,
      "training_step_time": 0.38884782791137695
    },
    {
      "epoch": 0.000259637451171875,
      "model_forward_time": 0.1145944595336914,
      "step": 42539
    },
    {
      "epoch": 0.000259637451171875,
      "step": 42539,
      "training_step_time": 0.404538631439209
    },
    {
      "epoch": 0.0002596435546875,
      "grad_norm": 0.1238744854927063,
      "learning_rate": 2.1419080780610123e-05,
      "loss": 0.0342,
      "step": 42540
    },
    {
      "epoch": 0.0002596435546875,
      "model_forward_time": 0.11524295806884766,
      "step": 42540
    },
    {
      "epoch": 0.0002596435546875,
      "step": 42540,
      "training_step_time": 0.3788723945617676
    },
    {
      "epoch": 0.000259649658203125,
      "model_forward_time": 0.1149446964263916,
      "step": 42541
    },
    {
      "epoch": 0.000259649658203125,
      "step": 42541,
      "training_step_time": 0.4283440113067627
    },
    {
      "epoch": 0.00025965576171875,
      "model_forward_time": 0.11495208740234375,
      "step": 42542
    },
    {
      "epoch": 0.00025965576171875,
      "step": 42542,
      "training_step_time": 0.40624523162841797
    },
    {
      "epoch": 0.000259661865234375,
      "model_forward_time": 0.11468791961669922,
      "step": 42543
    },
    {
      "epoch": 0.000259661865234375,
      "step": 42543,
      "training_step_time": 0.4008474349975586
    },
    {
      "epoch": 0.00025966796875,
      "model_forward_time": 0.11514139175415039,
      "step": 42544
    },
    {
      "epoch": 0.00025966796875,
      "step": 42544,
      "training_step_time": 0.3643522262573242
    },
    {
      "epoch": 0.000259674072265625,
      "model_forward_time": 0.11498665809631348,
      "step": 42545
    },
    {
      "epoch": 0.000259674072265625,
      "step": 42545,
      "training_step_time": 0.40031862258911133
    },
    {
      "epoch": 0.00025968017578125,
      "model_forward_time": 0.11493420600891113,
      "step": 42546
    },
    {
      "epoch": 0.00025968017578125,
      "step": 42546,
      "training_step_time": 0.3656165599822998
    },
    {
      "epoch": 0.000259686279296875,
      "model_forward_time": 0.1155550479888916,
      "step": 42547
    },
    {
      "epoch": 0.000259686279296875,
      "step": 42547,
      "training_step_time": 0.41240692138671875
    },
    {
      "epoch": 0.0002596923828125,
      "model_forward_time": 0.1144862174987793,
      "step": 42548
    },
    {
      "epoch": 0.0002596923828125,
      "step": 42548,
      "training_step_time": 0.45828962326049805
    },
    {
      "epoch": 0.000259698486328125,
      "model_forward_time": 0.1146082878112793,
      "step": 42549
    },
    {
      "epoch": 0.000259698486328125,
      "step": 42549,
      "training_step_time": 0.4410572052001953
    },
    {
      "epoch": 0.00025970458984375,
      "grad_norm": 0.10182449966669083,
      "learning_rate": 2.139647338278381e-05,
      "loss": 0.037,
      "step": 42550
    },
    {
      "epoch": 0.00025970458984375,
      "model_forward_time": 0.11514568328857422,
      "step": 42550
    },
    {
      "epoch": 0.00025970458984375,
      "step": 42550,
      "training_step_time": 0.38681697845458984
    },
    {
      "epoch": 0.000259710693359375,
      "model_forward_time": 0.11516547203063965,
      "step": 42551
    },
    {
      "epoch": 0.000259710693359375,
      "step": 42551,
      "training_step_time": 0.3957393169403076
    },
    {
      "epoch": 0.000259716796875,
      "model_forward_time": 0.11476635932922363,
      "step": 42552
    },
    {
      "epoch": 0.000259716796875,
      "step": 42552,
      "training_step_time": 0.3767359256744385
    },
    {
      "epoch": 0.000259722900390625,
      "model_forward_time": 0.11501765251159668,
      "step": 42553
    },
    {
      "epoch": 0.000259722900390625,
      "step": 42553,
      "training_step_time": 0.40512609481811523
    },
    {
      "epoch": 0.00025972900390625,
      "model_forward_time": 0.11481261253356934,
      "step": 42554
    },
    {
      "epoch": 0.00025972900390625,
      "step": 42554,
      "training_step_time": 0.3892967700958252
    },
    {
      "epoch": 0.000259735107421875,
      "model_forward_time": 0.11557626724243164,
      "step": 42555
    },
    {
      "epoch": 0.000259735107421875,
      "step": 42555,
      "training_step_time": 0.44306230545043945
    },
    {
      "epoch": 0.0002597412109375,
      "model_forward_time": 0.11651253700256348,
      "step": 42556
    },
    {
      "epoch": 0.0002597412109375,
      "step": 42556,
      "training_step_time": 0.40328097343444824
    },
    {
      "epoch": 0.000259747314453125,
      "model_forward_time": 0.11493444442749023,
      "step": 42557
    },
    {
      "epoch": 0.000259747314453125,
      "step": 42557,
      "training_step_time": 0.4085047245025635
    },
    {
      "epoch": 0.00025975341796875,
      "model_forward_time": 0.11567163467407227,
      "step": 42558
    },
    {
      "epoch": 0.00025975341796875,
      "step": 42558,
      "training_step_time": 0.38126659393310547
    },
    {
      "epoch": 0.000259759521484375,
      "model_forward_time": 0.11513805389404297,
      "step": 42559
    },
    {
      "epoch": 0.000259759521484375,
      "step": 42559,
      "training_step_time": 0.4586963653564453
    },
    {
      "epoch": 0.000259765625,
      "grad_norm": 0.10146139562129974,
      "learning_rate": 2.1373874673953685e-05,
      "loss": 0.0372,
      "step": 42560
    },
    {
      "epoch": 0.000259765625,
      "model_forward_time": 0.11585664749145508,
      "step": 42560
    },
    {
      "epoch": 0.000259765625,
      "step": 42560,
      "training_step_time": 0.5054435729980469
    },
    {
      "epoch": 0.000259771728515625,
      "model_forward_time": 0.11555814743041992,
      "step": 42561
    },
    {
      "epoch": 0.000259771728515625,
      "step": 42561,
      "training_step_time": 0.41950297355651855
    },
    {
      "epoch": 0.00025977783203125,
      "model_forward_time": 0.11556267738342285,
      "step": 42562
    },
    {
      "epoch": 0.00025977783203125,
      "step": 42562,
      "training_step_time": 0.45102930068969727
    },
    {
      "epoch": 0.000259783935546875,
      "model_forward_time": 0.11447882652282715,
      "step": 42563
    },
    {
      "epoch": 0.000259783935546875,
      "step": 42563,
      "training_step_time": 0.4193880558013916
    },
    {
      "epoch": 0.0002597900390625,
      "model_forward_time": 0.11522388458251953,
      "step": 42564
    },
    {
      "epoch": 0.0002597900390625,
      "step": 42564,
      "training_step_time": 0.37717676162719727
    },
    {
      "epoch": 0.000259796142578125,
      "model_forward_time": 0.11529707908630371,
      "step": 42565
    },
    {
      "epoch": 0.000259796142578125,
      "step": 42565,
      "training_step_time": 0.3955862522125244
    },
    {
      "epoch": 0.00025980224609375,
      "model_forward_time": 0.11482977867126465,
      "step": 42566
    },
    {
      "epoch": 0.00025980224609375,
      "step": 42566,
      "training_step_time": 0.3984529972076416
    },
    {
      "epoch": 0.000259808349609375,
      "model_forward_time": 0.11556792259216309,
      "step": 42567
    },
    {
      "epoch": 0.000259808349609375,
      "step": 42567,
      "training_step_time": 0.3958559036254883
    },
    {
      "epoch": 0.000259814453125,
      "model_forward_time": 0.11536836624145508,
      "step": 42568
    },
    {
      "epoch": 0.000259814453125,
      "step": 42568,
      "training_step_time": 0.39150118827819824
    },
    {
      "epoch": 0.000259820556640625,
      "model_forward_time": 0.11511492729187012,
      "step": 42569
    },
    {
      "epoch": 0.000259820556640625,
      "step": 42569,
      "training_step_time": 0.41900157928466797
    },
    {
      "epoch": 0.00025982666015625,
      "grad_norm": 0.09322454780340195,
      "learning_rate": 2.1351284660984572e-05,
      "loss": 0.0359,
      "step": 42570
    },
    {
      "epoch": 0.00025982666015625,
      "model_forward_time": 0.11501598358154297,
      "step": 42570
    },
    {
      "epoch": 0.00025982666015625,
      "step": 42570,
      "training_step_time": 0.3748667240142822
    },
    {
      "epoch": 0.000259832763671875,
      "model_forward_time": 0.11515069007873535,
      "step": 42571
    },
    {
      "epoch": 0.000259832763671875,
      "step": 42571,
      "training_step_time": 0.38948559761047363
    },
    {
      "epoch": 0.0002598388671875,
      "model_forward_time": 0.11471843719482422,
      "step": 42572
    },
    {
      "epoch": 0.0002598388671875,
      "step": 42572,
      "training_step_time": 0.3879988193511963
    },
    {
      "epoch": 0.000259844970703125,
      "model_forward_time": 0.11498665809631348,
      "step": 42573
    },
    {
      "epoch": 0.000259844970703125,
      "step": 42573,
      "training_step_time": 0.37644457817077637
    },
    {
      "epoch": 0.00025985107421875,
      "model_forward_time": 0.11558151245117188,
      "step": 42574
    },
    {
      "epoch": 0.00025985107421875,
      "step": 42574,
      "training_step_time": 0.4531233310699463
    },
    {
      "epoch": 0.000259857177734375,
      "model_forward_time": 0.1190328598022461,
      "step": 42575
    },
    {
      "epoch": 0.000259857177734375,
      "step": 42575,
      "training_step_time": 0.4020957946777344
    },
    {
      "epoch": 0.00025986328125,
      "model_forward_time": 0.1147000789642334,
      "step": 42576
    },
    {
      "epoch": 0.00025986328125,
      "step": 42576,
      "training_step_time": 0.3676018714904785
    },
    {
      "epoch": 0.000259869384765625,
      "model_forward_time": 0.11469149589538574,
      "step": 42577
    },
    {
      "epoch": 0.000259869384765625,
      "step": 42577,
      "training_step_time": 0.44878125190734863
    },
    {
      "epoch": 0.00025987548828125,
      "model_forward_time": 0.1142263412475586,
      "step": 42578
    },
    {
      "epoch": 0.00025987548828125,
      "step": 42578,
      "training_step_time": 0.3883073329925537
    },
    {
      "epoch": 0.000259881591796875,
      "model_forward_time": 0.11525249481201172,
      "step": 42579
    },
    {
      "epoch": 0.000259881591796875,
      "step": 42579,
      "training_step_time": 0.3849327564239502
    },
    {
      "epoch": 0.0002598876953125,
      "grad_norm": 0.09736040234565735,
      "learning_rate": 2.1328703350738765e-05,
      "loss": 0.0379,
      "step": 42580
    },
    {
      "epoch": 0.0002598876953125,
      "model_forward_time": 0.11484432220458984,
      "step": 42580
    },
    {
      "epoch": 0.0002598876953125,
      "step": 42580,
      "training_step_time": 0.3831617832183838
    },
    {
      "epoch": 0.000259893798828125,
      "model_forward_time": 0.11444807052612305,
      "step": 42581
    },
    {
      "epoch": 0.000259893798828125,
      "step": 42581,
      "training_step_time": 0.4081592559814453
    },
    {
      "epoch": 0.00025989990234375,
      "model_forward_time": 0.11480069160461426,
      "step": 42582
    },
    {
      "epoch": 0.00025989990234375,
      "step": 42582,
      "training_step_time": 0.366865873336792
    },
    {
      "epoch": 0.000259906005859375,
      "model_forward_time": 0.11476683616638184,
      "step": 42583
    },
    {
      "epoch": 0.000259906005859375,
      "step": 42583,
      "training_step_time": 0.40017056465148926
    },
    {
      "epoch": 0.000259912109375,
      "model_forward_time": 0.11435675621032715,
      "step": 42584
    },
    {
      "epoch": 0.000259912109375,
      "step": 42584,
      "training_step_time": 0.4708387851715088
    },
    {
      "epoch": 0.000259918212890625,
      "model_forward_time": 0.11484551429748535,
      "step": 42585
    },
    {
      "epoch": 0.000259918212890625,
      "step": 42585,
      "training_step_time": 0.39651060104370117
    },
    {
      "epoch": 0.00025992431640625,
      "model_forward_time": 0.11488151550292969,
      "step": 42586
    },
    {
      "epoch": 0.00025992431640625,
      "step": 42586,
      "training_step_time": 0.38553738594055176
    },
    {
      "epoch": 0.000259930419921875,
      "model_forward_time": 0.1152505874633789,
      "step": 42587
    },
    {
      "epoch": 0.000259930419921875,
      "step": 42587,
      "training_step_time": 0.3749580383300781
    },
    {
      "epoch": 0.0002599365234375,
      "model_forward_time": 0.11420965194702148,
      "step": 42588
    },
    {
      "epoch": 0.0002599365234375,
      "step": 42588,
      "training_step_time": 0.3661503791809082
    },
    {
      "epoch": 0.000259942626953125,
      "model_forward_time": 0.11519002914428711,
      "step": 42589
    },
    {
      "epoch": 0.000259942626953125,
      "step": 42589,
      "training_step_time": 0.4046945571899414
    },
    {
      "epoch": 0.00025994873046875,
      "grad_norm": 0.11316408962011337,
      "learning_rate": 2.1306130750075865e-05,
      "loss": 0.0399,
      "step": 42590
    },
    {
      "epoch": 0.00025994873046875,
      "model_forward_time": 0.11490535736083984,
      "step": 42590
    },
    {
      "epoch": 0.00025994873046875,
      "step": 42590,
      "training_step_time": 0.40132713317871094
    },
    {
      "epoch": 0.000259954833984375,
      "model_forward_time": 0.11435174942016602,
      "step": 42591
    },
    {
      "epoch": 0.000259954833984375,
      "step": 42591,
      "training_step_time": 0.42245006561279297
    },
    {
      "epoch": 0.0002599609375,
      "model_forward_time": 0.11465024948120117,
      "step": 42592
    },
    {
      "epoch": 0.0002599609375,
      "step": 42592,
      "training_step_time": 0.3860445022583008
    },
    {
      "epoch": 0.000259967041015625,
      "model_forward_time": 0.11478781700134277,
      "step": 42593
    },
    {
      "epoch": 0.000259967041015625,
      "step": 42593,
      "training_step_time": 0.38761234283447266
    },
    {
      "epoch": 0.00025997314453125,
      "model_forward_time": 0.11521124839782715,
      "step": 42594
    },
    {
      "epoch": 0.00025997314453125,
      "step": 42594,
      "training_step_time": 0.3653721809387207
    },
    {
      "epoch": 0.000259979248046875,
      "model_forward_time": 0.12630963325500488,
      "step": 42595
    },
    {
      "epoch": 0.000259979248046875,
      "step": 42595,
      "training_step_time": 0.3992009162902832
    },
    {
      "epoch": 0.0002599853515625,
      "model_forward_time": 0.11456799507141113,
      "step": 42596
    },
    {
      "epoch": 0.0002599853515625,
      "step": 42596,
      "training_step_time": 0.40160632133483887
    },
    {
      "epoch": 0.000259991455078125,
      "model_forward_time": 0.11474275588989258,
      "step": 42597
    },
    {
      "epoch": 0.000259991455078125,
      "step": 42597,
      "training_step_time": 0.39346766471862793
    },
    {
      "epoch": 0.00025999755859375,
      "model_forward_time": 0.11539268493652344,
      "step": 42598
    },
    {
      "epoch": 0.00025999755859375,
      "step": 42598,
      "training_step_time": 0.43688058853149414
    },
    {
      "epoch": 0.000260003662109375,
      "model_forward_time": 0.11527347564697266,
      "step": 42599
    },
    {
      "epoch": 0.000260003662109375,
      "step": 42599,
      "training_step_time": 0.3908212184906006
    },
    {
      "epoch": 0.000260009765625,
      "grad_norm": 0.11986958235502243,
      "learning_rate": 2.128356686585282e-05,
      "loss": 0.037,
      "step": 42600
    },
    {
      "epoch": 0.000260009765625,
      "model_forward_time": 0.11410212516784668,
      "step": 42600
    },
    {
      "epoch": 0.000260009765625,
      "step": 42600,
      "training_step_time": 0.36321043968200684
    },
    {
      "epoch": 0.000260015869140625,
      "model_forward_time": 0.1159977912902832,
      "step": 42601
    },
    {
      "epoch": 0.000260015869140625,
      "step": 42601,
      "training_step_time": 0.36539435386657715
    },
    {
      "epoch": 0.00026002197265625,
      "model_forward_time": 0.11489319801330566,
      "step": 42602
    },
    {
      "epoch": 0.00026002197265625,
      "step": 42602,
      "training_step_time": 0.4296436309814453
    },
    {
      "epoch": 0.000260028076171875,
      "model_forward_time": 0.11477255821228027,
      "step": 42603
    },
    {
      "epoch": 0.000260028076171875,
      "step": 42603,
      "training_step_time": 0.49773192405700684
    },
    {
      "epoch": 0.0002600341796875,
      "model_forward_time": 0.11564111709594727,
      "step": 42604
    },
    {
      "epoch": 0.0002600341796875,
      "step": 42604,
      "training_step_time": 0.436767578125
    },
    {
      "epoch": 0.000260040283203125,
      "model_forward_time": 0.11478281021118164,
      "step": 42605
    },
    {
      "epoch": 0.000260040283203125,
      "step": 42605,
      "training_step_time": 0.4936542510986328
    },
    {
      "epoch": 0.00026004638671875,
      "model_forward_time": 0.11488199234008789,
      "step": 42606
    },
    {
      "epoch": 0.00026004638671875,
      "step": 42606,
      "training_step_time": 0.3721613883972168
    },
    {
      "epoch": 0.000260052490234375,
      "model_forward_time": 0.11453771591186523,
      "step": 42607
    },
    {
      "epoch": 0.000260052490234375,
      "step": 42607,
      "training_step_time": 0.40157198905944824
    },
    {
      "epoch": 0.00026005859375,
      "model_forward_time": 0.1148691177368164,
      "step": 42608
    },
    {
      "epoch": 0.00026005859375,
      "step": 42608,
      "training_step_time": 0.3904087543487549
    },
    {
      "epoch": 0.000260064697265625,
      "model_forward_time": 0.11493086814880371,
      "step": 42609
    },
    {
      "epoch": 0.000260064697265625,
      "step": 42609,
      "training_step_time": 0.38345789909362793
    },
    {
      "epoch": 0.00026007080078125,
      "grad_norm": 0.11255399882793427,
      "learning_rate": 2.1261011704923955e-05,
      "loss": 0.0419,
      "step": 42610
    },
    {
      "epoch": 0.00026007080078125,
      "model_forward_time": 0.11560344696044922,
      "step": 42610
    },
    {
      "epoch": 0.00026007080078125,
      "step": 42610,
      "training_step_time": 0.462538480758667
    },
    {
      "epoch": 0.000260076904296875,
      "model_forward_time": 0.11466765403747559,
      "step": 42611
    },
    {
      "epoch": 0.000260076904296875,
      "step": 42611,
      "training_step_time": 0.44484686851501465
    },
    {
      "epoch": 0.0002600830078125,
      "model_forward_time": 0.11572098731994629,
      "step": 42612
    },
    {
      "epoch": 0.0002600830078125,
      "step": 42612,
      "training_step_time": 0.39339685440063477
    },
    {
      "epoch": 0.000260089111328125,
      "model_forward_time": 0.11519145965576172,
      "step": 42613
    },
    {
      "epoch": 0.000260089111328125,
      "step": 42613,
      "training_step_time": 0.40244030952453613
    },
    {
      "epoch": 0.00026009521484375,
      "model_forward_time": 0.1152811050415039,
      "step": 42614
    },
    {
      "epoch": 0.00026009521484375,
      "step": 42614,
      "training_step_time": 0.3940756320953369
    },
    {
      "epoch": 0.000260101318359375,
      "model_forward_time": 0.11490440368652344,
      "step": 42615
    },
    {
      "epoch": 0.000260101318359375,
      "step": 42615,
      "training_step_time": 0.40632033348083496
    },
    {
      "epoch": 0.000260107421875,
      "model_forward_time": 0.11534523963928223,
      "step": 42616
    },
    {
      "epoch": 0.000260107421875,
      "step": 42616,
      "training_step_time": 0.4925353527069092
    },
    {
      "epoch": 0.000260113525390625,
      "model_forward_time": 0.11540627479553223,
      "step": 42617
    },
    {
      "epoch": 0.000260113525390625,
      "step": 42617,
      "training_step_time": 0.5020034313201904
    },
    {
      "epoch": 0.00026011962890625,
      "model_forward_time": 0.11501860618591309,
      "step": 42618
    },
    {
      "epoch": 0.00026011962890625,
      "step": 42618,
      "training_step_time": 0.408524751663208
    },
    {
      "epoch": 0.000260125732421875,
      "model_forward_time": 0.11552238464355469,
      "step": 42619
    },
    {
      "epoch": 0.000260125732421875,
      "step": 42619,
      "training_step_time": 0.44382452964782715
    },
    {
      "epoch": 0.0002601318359375,
      "grad_norm": 0.09152308106422424,
      "learning_rate": 2.12384652741409e-05,
      "loss": 0.0401,
      "step": 42620
    },
    {
      "epoch": 0.0002601318359375,
      "model_forward_time": 0.11436080932617188,
      "step": 42620
    },
    {
      "epoch": 0.0002601318359375,
      "step": 42620,
      "training_step_time": 0.47919130325317383
    },
    {
      "epoch": 0.000260137939453125,
      "model_forward_time": 0.11437416076660156,
      "step": 42621
    },
    {
      "epoch": 0.000260137939453125,
      "step": 42621,
      "training_step_time": 0.3868143558502197
    },
    {
      "epoch": 0.00026014404296875,
      "model_forward_time": 0.11517477035522461,
      "step": 42622
    },
    {
      "epoch": 0.00026014404296875,
      "step": 42622,
      "training_step_time": 0.38614988327026367
    },
    {
      "epoch": 0.000260150146484375,
      "model_forward_time": 0.11510348320007324,
      "step": 42623
    },
    {
      "epoch": 0.000260150146484375,
      "step": 42623,
      "training_step_time": 0.38197898864746094
    },
    {
      "epoch": 0.00026015625,
      "model_forward_time": 0.1164393424987793,
      "step": 42624
    },
    {
      "epoch": 0.00026015625,
      "step": 42624,
      "training_step_time": 0.4042038917541504
    },
    {
      "epoch": 0.000260162353515625,
      "model_forward_time": 0.11562299728393555,
      "step": 42625
    },
    {
      "epoch": 0.000260162353515625,
      "step": 42625,
      "training_step_time": 0.4196622371673584
    },
    {
      "epoch": 0.00026016845703125,
      "model_forward_time": 0.115325927734375,
      "step": 42626
    },
    {
      "epoch": 0.00026016845703125,
      "step": 42626,
      "training_step_time": 0.3939230442047119
    },
    {
      "epoch": 0.000260174560546875,
      "model_forward_time": 0.11554193496704102,
      "step": 42627
    },
    {
      "epoch": 0.000260174560546875,
      "step": 42627,
      "training_step_time": 0.38993406295776367
    },
    {
      "epoch": 0.0002601806640625,
      "model_forward_time": 0.11545705795288086,
      "step": 42628
    },
    {
      "epoch": 0.0002601806640625,
      "step": 42628,
      "training_step_time": 0.3896195888519287
    },
    {
      "epoch": 0.000260186767578125,
      "model_forward_time": 0.11539411544799805,
      "step": 42629
    },
    {
      "epoch": 0.000260186767578125,
      "step": 42629,
      "training_step_time": 0.4149770736694336
    },
    {
      "epoch": 0.00026019287109375,
      "grad_norm": 0.0781148225069046,
      "learning_rate": 2.121592758035273e-05,
      "loss": 0.0343,
      "step": 42630
    },
    {
      "epoch": 0.00026019287109375,
      "model_forward_time": 0.11446571350097656,
      "step": 42630
    },
    {
      "epoch": 0.00026019287109375,
      "step": 42630,
      "training_step_time": 0.3695528507232666
    },
    {
      "epoch": 0.000260198974609375,
      "model_forward_time": 0.11535954475402832,
      "step": 42631
    },
    {
      "epoch": 0.000260198974609375,
      "step": 42631,
      "training_step_time": 0.4234800338745117
    },
    {
      "epoch": 0.000260205078125,
      "model_forward_time": 0.11555051803588867,
      "step": 42632
    },
    {
      "epoch": 0.000260205078125,
      "step": 42632,
      "training_step_time": 0.41251254081726074
    },
    {
      "epoch": 0.000260211181640625,
      "model_forward_time": 0.11537766456604004,
      "step": 42633
    },
    {
      "epoch": 0.000260211181640625,
      "step": 42633,
      "training_step_time": 0.469820499420166
    },
    {
      "epoch": 0.00026021728515625,
      "model_forward_time": 0.11574292182922363,
      "step": 42634
    },
    {
      "epoch": 0.00026021728515625,
      "step": 42634,
      "training_step_time": 0.39421677589416504
    },
    {
      "epoch": 0.000260223388671875,
      "model_forward_time": 0.11481118202209473,
      "step": 42635
    },
    {
      "epoch": 0.000260223388671875,
      "step": 42635,
      "training_step_time": 0.447343111038208
    },
    {
      "epoch": 0.0002602294921875,
      "model_forward_time": 0.11629247665405273,
      "step": 42636
    },
    {
      "epoch": 0.0002602294921875,
      "step": 42636,
      "training_step_time": 0.37502622604370117
    },
    {
      "epoch": 0.000260235595703125,
      "model_forward_time": 0.115142822265625,
      "step": 42637
    },
    {
      "epoch": 0.000260235595703125,
      "step": 42637,
      "training_step_time": 0.39671802520751953
    },
    {
      "epoch": 0.00026024169921875,
      "model_forward_time": 0.11464357376098633,
      "step": 42638
    },
    {
      "epoch": 0.00026024169921875,
      "step": 42638,
      "training_step_time": 0.39614224433898926
    },
    {
      "epoch": 0.000260247802734375,
      "model_forward_time": 0.11547374725341797,
      "step": 42639
    },
    {
      "epoch": 0.000260247802734375,
      "step": 42639,
      "training_step_time": 0.5999205112457275
    },
    {
      "epoch": 0.00026025390625,
      "grad_norm": 0.09734342992305756,
      "learning_rate": 2.1193398630405725e-05,
      "loss": 0.0364,
      "step": 42640
    },
    {
      "epoch": 0.00026025390625,
      "model_forward_time": 0.11487674713134766,
      "step": 42640
    },
    {
      "epoch": 0.00026025390625,
      "step": 42640,
      "training_step_time": 0.39319896697998047
    },
    {
      "epoch": 0.000260260009765625,
      "model_forward_time": 0.11527419090270996,
      "step": 42641
    },
    {
      "epoch": 0.000260260009765625,
      "step": 42641,
      "training_step_time": 0.4035370349884033
    },
    {
      "epoch": 0.00026026611328125,
      "model_forward_time": 0.11507296562194824,
      "step": 42642
    },
    {
      "epoch": 0.00026026611328125,
      "step": 42642,
      "training_step_time": 0.39665842056274414
    },
    {
      "epoch": 0.000260272216796875,
      "model_forward_time": 0.11833405494689941,
      "step": 42643
    },
    {
      "epoch": 0.000260272216796875,
      "step": 42643,
      "training_step_time": 0.42559027671813965
    },
    {
      "epoch": 0.0002602783203125,
      "model_forward_time": 0.11855101585388184,
      "step": 42644
    },
    {
      "epoch": 0.0002602783203125,
      "step": 42644,
      "training_step_time": 0.37644314765930176
    },
    {
      "epoch": 0.000260284423828125,
      "model_forward_time": 0.11795997619628906,
      "step": 42645
    },
    {
      "epoch": 0.000260284423828125,
      "step": 42645,
      "training_step_time": 0.7631969451904297
    },
    {
      "epoch": 0.00026029052734375,
      "model_forward_time": 0.11781525611877441,
      "step": 42646
    },
    {
      "epoch": 0.00026029052734375,
      "step": 42646,
      "training_step_time": 0.38755369186401367
    },
    {
      "epoch": 0.000260296630859375,
      "model_forward_time": 0.11706781387329102,
      "step": 42647
    },
    {
      "epoch": 0.000260296630859375,
      "step": 42647,
      "training_step_time": 0.4210817813873291
    },
    {
      "epoch": 0.000260302734375,
      "model_forward_time": 0.1156609058380127,
      "step": 42648
    },
    {
      "epoch": 0.000260302734375,
      "step": 42648,
      "training_step_time": 0.3747560977935791
    },
    {
      "epoch": 0.000260308837890625,
      "model_forward_time": 0.11474275588989258,
      "step": 42649
    },
    {
      "epoch": 0.000260308837890625,
      "step": 42649,
      "training_step_time": 0.4214818477630615
    },
    {
      "epoch": 0.00026031494140625,
      "grad_norm": 0.10612384974956512,
      "learning_rate": 2.1170878431143637e-05,
      "loss": 0.0386,
      "step": 42650
    },
    {
      "epoch": 0.00026031494140625,
      "model_forward_time": 0.1140890121459961,
      "step": 42650
    },
    {
      "epoch": 0.00026031494140625,
      "step": 42650,
      "training_step_time": 0.39118337631225586
    },
    {
      "epoch": 0.000260321044921875,
      "model_forward_time": 0.11474466323852539,
      "step": 42651
    },
    {
      "epoch": 0.000260321044921875,
      "step": 42651,
      "training_step_time": 0.3920917510986328
    },
    {
      "epoch": 0.0002603271484375,
      "model_forward_time": 0.11631894111633301,
      "step": 42652
    },
    {
      "epoch": 0.0002603271484375,
      "step": 42652,
      "training_step_time": 0.43350839614868164
    },
    {
      "epoch": 0.000260333251953125,
      "model_forward_time": 0.11496305465698242,
      "step": 42653
    },
    {
      "epoch": 0.000260333251953125,
      "step": 42653,
      "training_step_time": 0.3961169719696045
    },
    {
      "epoch": 0.00026033935546875,
      "model_forward_time": 0.1156001091003418,
      "step": 42654
    },
    {
      "epoch": 0.00026033935546875,
      "step": 42654,
      "training_step_time": 0.40500664710998535
    },
    {
      "epoch": 0.000260345458984375,
      "model_forward_time": 0.11525177955627441,
      "step": 42655
    },
    {
      "epoch": 0.000260345458984375,
      "step": 42655,
      "training_step_time": 0.40094542503356934
    },
    {
      "epoch": 0.0002603515625,
      "model_forward_time": 0.11512613296508789,
      "step": 42656
    },
    {
      "epoch": 0.0002603515625,
      "step": 42656,
      "training_step_time": 0.3998696804046631
    },
    {
      "epoch": 0.000260357666015625,
      "model_forward_time": 0.11533308029174805,
      "step": 42657
    },
    {
      "epoch": 0.000260357666015625,
      "step": 42657,
      "training_step_time": 0.4485049247741699
    },
    {
      "epoch": 0.00026036376953125,
      "model_forward_time": 0.11555266380310059,
      "step": 42658
    },
    {
      "epoch": 0.00026036376953125,
      "step": 42658,
      "training_step_time": 0.36866188049316406
    },
    {
      "epoch": 0.000260369873046875,
      "model_forward_time": 0.11615276336669922,
      "step": 42659
    },
    {
      "epoch": 0.000260369873046875,
      "step": 42659,
      "training_step_time": 0.4126284122467041
    },
    {
      "epoch": 0.0002603759765625,
      "grad_norm": 0.10356815159320831,
      "learning_rate": 2.1148366989407496e-05,
      "loss": 0.037,
      "step": 42660
    },
    {
      "epoch": 0.0002603759765625,
      "model_forward_time": 0.11516141891479492,
      "step": 42660
    },
    {
      "epoch": 0.0002603759765625,
      "step": 42660,
      "training_step_time": 0.4429893493652344
    },
    {
      "epoch": 0.000260382080078125,
      "model_forward_time": 0.115631103515625,
      "step": 42661
    },
    {
      "epoch": 0.000260382080078125,
      "step": 42661,
      "training_step_time": 0.429760217666626
    },
    {
      "epoch": 0.00026038818359375,
      "model_forward_time": 0.11465907096862793,
      "step": 42662
    },
    {
      "epoch": 0.00026038818359375,
      "step": 42662,
      "training_step_time": 0.4568614959716797
    },
    {
      "epoch": 0.000260394287109375,
      "model_forward_time": 0.11547589302062988,
      "step": 42663
    },
    {
      "epoch": 0.000260394287109375,
      "step": 42663,
      "training_step_time": 0.4143095016479492
    },
    {
      "epoch": 0.000260400390625,
      "model_forward_time": 0.11522650718688965,
      "step": 42664
    },
    {
      "epoch": 0.000260400390625,
      "step": 42664,
      "training_step_time": 0.4006352424621582
    },
    {
      "epoch": 0.000260406494140625,
      "model_forward_time": 0.11542987823486328,
      "step": 42665
    },
    {
      "epoch": 0.000260406494140625,
      "step": 42665,
      "training_step_time": 0.4013686180114746
    },
    {
      "epoch": 0.00026041259765625,
      "model_forward_time": 0.11523127555847168,
      "step": 42666
    },
    {
      "epoch": 0.00026041259765625,
      "step": 42666,
      "training_step_time": 0.3786585330963135
    },
    {
      "epoch": 0.000260418701171875,
      "model_forward_time": 0.11488580703735352,
      "step": 42667
    },
    {
      "epoch": 0.000260418701171875,
      "step": 42667,
      "training_step_time": 0.47822999954223633
    },
    {
      "epoch": 0.0002604248046875,
      "model_forward_time": 0.11512327194213867,
      "step": 42668
    },
    {
      "epoch": 0.0002604248046875,
      "step": 42668,
      "training_step_time": 0.38918304443359375
    },
    {
      "epoch": 0.000260430908203125,
      "model_forward_time": 0.1144709587097168,
      "step": 42669
    },
    {
      "epoch": 0.000260430908203125,
      "step": 42669,
      "training_step_time": 0.8860478401184082
    },
    {
      "epoch": 0.00026043701171875,
      "grad_norm": 0.10997994244098663,
      "learning_rate": 2.112586431203568e-05,
      "loss": 0.0393,
      "step": 42670
    },
    {
      "epoch": 0.00026043701171875,
      "model_forward_time": 0.11432671546936035,
      "step": 42670
    },
    {
      "epoch": 0.00026043701171875,
      "step": 42670,
      "training_step_time": 0.3918933868408203
    },
    {
      "epoch": 0.000260443115234375,
      "model_forward_time": 0.11407732963562012,
      "step": 42671
    },
    {
      "epoch": 0.000260443115234375,
      "step": 42671,
      "training_step_time": 0.39002180099487305
    },
    {
      "epoch": 0.00026044921875,
      "model_forward_time": 0.1143953800201416,
      "step": 42672
    },
    {
      "epoch": 0.00026044921875,
      "step": 42672,
      "training_step_time": 0.36457252502441406
    },
    {
      "epoch": 0.000260455322265625,
      "model_forward_time": 0.11533236503601074,
      "step": 42673
    },
    {
      "epoch": 0.000260455322265625,
      "step": 42673,
      "training_step_time": 0.41618776321411133
    },
    {
      "epoch": 0.00026046142578125,
      "model_forward_time": 0.11404109001159668,
      "step": 42674
    },
    {
      "epoch": 0.00026046142578125,
      "step": 42674,
      "training_step_time": 0.4807260036468506
    },
    {
      "epoch": 0.000260467529296875,
      "model_forward_time": 0.11644387245178223,
      "step": 42675
    },
    {
      "epoch": 0.000260467529296875,
      "step": 42675,
      "training_step_time": 1.0658752918243408
    },
    {
      "epoch": 0.0002604736328125,
      "model_forward_time": 0.11387419700622559,
      "step": 42676
    },
    {
      "epoch": 0.0002604736328125,
      "step": 42676,
      "training_step_time": 0.41780877113342285
    },
    {
      "epoch": 0.000260479736328125,
      "model_forward_time": 0.11416816711425781,
      "step": 42677
    },
    {
      "epoch": 0.000260479736328125,
      "step": 42677,
      "training_step_time": 0.37178969383239746
    },
    {
      "epoch": 0.00026048583984375,
      "model_forward_time": 0.1142582893371582,
      "step": 42678
    },
    {
      "epoch": 0.00026048583984375,
      "step": 42678,
      "training_step_time": 0.36868882179260254
    },
    {
      "epoch": 0.000260491943359375,
      "model_forward_time": 0.11404991149902344,
      "step": 42679
    },
    {
      "epoch": 0.000260491943359375,
      "step": 42679,
      "training_step_time": 0.4979274272918701
    },
    {
      "epoch": 0.000260498046875,
      "grad_norm": 0.1078328862786293,
      "learning_rate": 2.110337040586391e-05,
      "loss": 0.0398,
      "step": 42680
    },
    {
      "epoch": 0.000260498046875,
      "model_forward_time": 0.11406469345092773,
      "step": 42680
    },
    {
      "epoch": 0.000260498046875,
      "step": 42680,
      "training_step_time": 0.4581105709075928
    },
    {
      "epoch": 0.000260504150390625,
      "model_forward_time": 0.11553120613098145,
      "step": 42681
    },
    {
      "epoch": 0.000260504150390625,
      "step": 42681,
      "training_step_time": 0.4100353717803955
    },
    {
      "epoch": 0.00026051025390625,
      "model_forward_time": 0.11453914642333984,
      "step": 42682
    },
    {
      "epoch": 0.00026051025390625,
      "step": 42682,
      "training_step_time": 0.3957865238189697
    },
    {
      "epoch": 0.000260516357421875,
      "model_forward_time": 0.1148374080657959,
      "step": 42683
    },
    {
      "epoch": 0.000260516357421875,
      "step": 42683,
      "training_step_time": 0.4009082317352295
    },
    {
      "epoch": 0.0002605224609375,
      "model_forward_time": 0.1148674488067627,
      "step": 42684
    },
    {
      "epoch": 0.0002605224609375,
      "step": 42684,
      "training_step_time": 0.3985097408294678
    },
    {
      "epoch": 0.000260528564453125,
      "model_forward_time": 0.11511087417602539,
      "step": 42685
    },
    {
      "epoch": 0.000260528564453125,
      "step": 42685,
      "training_step_time": 0.39386630058288574
    },
    {
      "epoch": 0.00026053466796875,
      "model_forward_time": 0.11462545394897461,
      "step": 42686
    },
    {
      "epoch": 0.00026053466796875,
      "step": 42686,
      "training_step_time": 0.45734310150146484
    },
    {
      "epoch": 0.000260540771484375,
      "model_forward_time": 0.11565113067626953,
      "step": 42687
    },
    {
      "epoch": 0.000260540771484375,
      "step": 42687,
      "training_step_time": 0.6196944713592529
    },
    {
      "epoch": 0.000260546875,
      "model_forward_time": 0.11669445037841797,
      "step": 42688
    },
    {
      "epoch": 0.000260546875,
      "step": 42688,
      "training_step_time": 0.4205927848815918
    },
    {
      "epoch": 0.000260552978515625,
      "model_forward_time": 0.1147618293762207,
      "step": 42689
    },
    {
      "epoch": 0.000260552978515625,
      "step": 42689,
      "training_step_time": 0.444659948348999
    },
    {
      "epoch": 0.00026055908203125,
      "grad_norm": 0.1118675097823143,
      "learning_rate": 2.1080885277725236e-05,
      "loss": 0.0386,
      "step": 42690
    },
    {
      "epoch": 0.00026055908203125,
      "model_forward_time": 0.11561965942382812,
      "step": 42690
    },
    {
      "epoch": 0.00026055908203125,
      "step": 42690,
      "training_step_time": 0.4509270191192627
    },
    {
      "epoch": 0.000260565185546875,
      "model_forward_time": 0.11461353302001953,
      "step": 42691
    },
    {
      "epoch": 0.000260565185546875,
      "step": 42691,
      "training_step_time": 0.38809847831726074
    },
    {
      "epoch": 0.0002605712890625,
      "model_forward_time": 0.11451911926269531,
      "step": 42692
    },
    {
      "epoch": 0.0002605712890625,
      "step": 42692,
      "training_step_time": 0.3929939270019531
    },
    {
      "epoch": 0.000260577392578125,
      "model_forward_time": 0.11502242088317871,
      "step": 42693
    },
    {
      "epoch": 0.000260577392578125,
      "step": 42693,
      "training_step_time": 0.39720749855041504
    },
    {
      "epoch": 0.00026058349609375,
      "model_forward_time": 0.11449098587036133,
      "step": 42694
    },
    {
      "epoch": 0.00026058349609375,
      "step": 42694,
      "training_step_time": 0.4782085418701172
    },
    {
      "epoch": 0.000260589599609375,
      "model_forward_time": 0.11482548713684082,
      "step": 42695
    },
    {
      "epoch": 0.000260589599609375,
      "step": 42695,
      "training_step_time": 0.38527488708496094
    },
    {
      "epoch": 0.000260595703125,
      "model_forward_time": 0.11512184143066406,
      "step": 42696
    },
    {
      "epoch": 0.000260595703125,
      "step": 42696,
      "training_step_time": 0.3845794200897217
    },
    {
      "epoch": 0.000260601806640625,
      "model_forward_time": 0.11577415466308594,
      "step": 42697
    },
    {
      "epoch": 0.000260601806640625,
      "step": 42697,
      "training_step_time": 0.39714884757995605
    },
    {
      "epoch": 0.00026060791015625,
      "model_forward_time": 0.11589431762695312,
      "step": 42698
    },
    {
      "epoch": 0.00026060791015625,
      "step": 42698,
      "training_step_time": 0.40058398246765137
    },
    {
      "epoch": 0.000260614013671875,
      "model_forward_time": 0.11455440521240234,
      "step": 42699
    },
    {
      "epoch": 0.000260614013671875,
      "step": 42699,
      "training_step_time": 0.4667775630950928
    },
    {
      "epoch": 0.0002606201171875,
      "grad_norm": 0.14286430180072784,
      "learning_rate": 2.105840893445005e-05,
      "loss": 0.0395,
      "step": 42700
    },
    {
      "epoch": 0.0002606201171875,
      "model_forward_time": 0.1149144172668457,
      "step": 42700
    },
    {
      "epoch": 0.0002606201171875,
      "step": 42700,
      "training_step_time": 0.5156753063201904
    },
    {
      "epoch": 0.000260626220703125,
      "model_forward_time": 0.11493706703186035,
      "step": 42701
    },
    {
      "epoch": 0.000260626220703125,
      "step": 42701,
      "training_step_time": 0.503532886505127
    },
    {
      "epoch": 0.00026063232421875,
      "model_forward_time": 0.11496996879577637,
      "step": 42702
    },
    {
      "epoch": 0.00026063232421875,
      "step": 42702,
      "training_step_time": 0.48795270919799805
    },
    {
      "epoch": 0.000260638427734375,
      "model_forward_time": 0.11515283584594727,
      "step": 42703
    },
    {
      "epoch": 0.000260638427734375,
      "step": 42703,
      "training_step_time": 0.4339301586151123
    },
    {
      "epoch": 0.00026064453125,
      "model_forward_time": 0.11427617073059082,
      "step": 42704
    },
    {
      "epoch": 0.00026064453125,
      "step": 42704,
      "training_step_time": 0.48447513580322266
    },
    {
      "epoch": 0.000260650634765625,
      "model_forward_time": 0.11443781852722168,
      "step": 42705
    },
    {
      "epoch": 0.000260650634765625,
      "step": 42705,
      "training_step_time": 0.39037084579467773
    },
    {
      "epoch": 0.00026065673828125,
      "model_forward_time": 0.11416149139404297,
      "step": 42706
    },
    {
      "epoch": 0.00026065673828125,
      "step": 42706,
      "training_step_time": 0.37859272956848145
    },
    {
      "epoch": 0.000260662841796875,
      "model_forward_time": 0.11468887329101562,
      "step": 42707
    },
    {
      "epoch": 0.000260662841796875,
      "step": 42707,
      "training_step_time": 0.4100306034088135
    },
    {
      "epoch": 0.0002606689453125,
      "model_forward_time": 0.11501550674438477,
      "step": 42708
    },
    {
      "epoch": 0.0002606689453125,
      "step": 42708,
      "training_step_time": 0.4345967769622803
    },
    {
      "epoch": 0.000260675048828125,
      "model_forward_time": 0.11527371406555176,
      "step": 42709
    },
    {
      "epoch": 0.000260675048828125,
      "step": 42709,
      "training_step_time": 0.4007844924926758
    },
    {
      "epoch": 0.00026068115234375,
      "grad_norm": 0.08762440085411072,
      "learning_rate": 2.103594138286607e-05,
      "loss": 0.035,
      "step": 42710
    },
    {
      "epoch": 0.00026068115234375,
      "model_forward_time": 0.11479711532592773,
      "step": 42710
    },
    {
      "epoch": 0.00026068115234375,
      "step": 42710,
      "training_step_time": 0.40349721908569336
    },
    {
      "epoch": 0.000260687255859375,
      "model_forward_time": 0.11578536033630371,
      "step": 42711
    },
    {
      "epoch": 0.000260687255859375,
      "step": 42711,
      "training_step_time": 0.41786646842956543
    },
    {
      "epoch": 0.000260693359375,
      "model_forward_time": 0.11503291130065918,
      "step": 42712
    },
    {
      "epoch": 0.000260693359375,
      "step": 42712,
      "training_step_time": 0.3824915885925293
    },
    {
      "epoch": 0.000260699462890625,
      "model_forward_time": 0.11623573303222656,
      "step": 42713
    },
    {
      "epoch": 0.000260699462890625,
      "step": 42713,
      "training_step_time": 0.4044473171234131
    },
    {
      "epoch": 0.00026070556640625,
      "model_forward_time": 0.11519455909729004,
      "step": 42714
    },
    {
      "epoch": 0.00026070556640625,
      "step": 42714,
      "training_step_time": 0.36779117584228516
    },
    {
      "epoch": 0.000260711669921875,
      "model_forward_time": 0.11826276779174805,
      "step": 42715
    },
    {
      "epoch": 0.000260711669921875,
      "step": 42715,
      "training_step_time": 0.4298973083496094
    },
    {
      "epoch": 0.0002607177734375,
      "model_forward_time": 0.1185445785522461,
      "step": 42716
    },
    {
      "epoch": 0.0002607177734375,
      "step": 42716,
      "training_step_time": 0.40473103523254395
    },
    {
      "epoch": 0.000260723876953125,
      "model_forward_time": 0.1179819107055664,
      "step": 42717
    },
    {
      "epoch": 0.000260723876953125,
      "step": 42717,
      "training_step_time": 0.5205643177032471
    },
    {
      "epoch": 0.00026072998046875,
      "model_forward_time": 0.11795854568481445,
      "step": 42718
    },
    {
      "epoch": 0.00026072998046875,
      "step": 42718,
      "training_step_time": 0.4408912658691406
    },
    {
      "epoch": 0.000260736083984375,
      "model_forward_time": 0.11791872978210449,
      "step": 42719
    },
    {
      "epoch": 0.000260736083984375,
      "step": 42719,
      "training_step_time": 0.3842442035675049
    },
    {
      "epoch": 0.0002607421875,
      "grad_norm": 0.11392097175121307,
      "learning_rate": 2.1013482629798333e-05,
      "loss": 0.0437,
      "step": 42720
    },
    {
      "epoch": 0.0002607421875,
      "model_forward_time": 0.11785626411437988,
      "step": 42720
    },
    {
      "epoch": 0.0002607421875,
      "step": 42720,
      "training_step_time": 0.3846299648284912
    },
    {
      "epoch": 0.000260748291015625,
      "model_forward_time": 0.11593055725097656,
      "step": 42721
    },
    {
      "epoch": 0.000260748291015625,
      "step": 42721,
      "training_step_time": 0.4024941921234131
    },
    {
      "epoch": 0.00026075439453125,
      "model_forward_time": 0.11468744277954102,
      "step": 42722
    },
    {
      "epoch": 0.00026075439453125,
      "step": 42722,
      "training_step_time": 0.41048407554626465
    },
    {
      "epoch": 0.000260760498046875,
      "model_forward_time": 0.1183633804321289,
      "step": 42723
    },
    {
      "epoch": 0.000260760498046875,
      "step": 42723,
      "training_step_time": 0.43837499618530273
    },
    {
      "epoch": 0.0002607666015625,
      "model_forward_time": 0.11799216270446777,
      "step": 42724
    },
    {
      "epoch": 0.0002607666015625,
      "step": 42724,
      "training_step_time": 0.39182400703430176
    },
    {
      "epoch": 0.000260772705078125,
      "model_forward_time": 0.11797833442687988,
      "step": 42725
    },
    {
      "epoch": 0.000260772705078125,
      "step": 42725,
      "training_step_time": 0.4082462787628174
    },
    {
      "epoch": 0.00026077880859375,
      "model_forward_time": 0.11871600151062012,
      "step": 42726
    },
    {
      "epoch": 0.00026077880859375,
      "step": 42726,
      "training_step_time": 0.3838047981262207
    },
    {
      "epoch": 0.000260784912109375,
      "model_forward_time": 0.11792635917663574,
      "step": 42727
    },
    {
      "epoch": 0.000260784912109375,
      "step": 42727,
      "training_step_time": 0.38561224937438965
    },
    {
      "epoch": 0.000260791015625,
      "model_forward_time": 0.11810493469238281,
      "step": 42728
    },
    {
      "epoch": 0.000260791015625,
      "step": 42728,
      "training_step_time": 0.374744176864624
    },
    {
      "epoch": 0.000260797119140625,
      "model_forward_time": 0.11783695220947266,
      "step": 42729
    },
    {
      "epoch": 0.000260797119140625,
      "step": 42729,
      "training_step_time": 0.4074251651763916
    },
    {
      "epoch": 0.00026080322265625,
      "grad_norm": 0.14231929183006287,
      "learning_rate": 2.0991032682069246e-05,
      "loss": 0.0317,
      "step": 42730
    },
    {
      "epoch": 0.00026080322265625,
      "model_forward_time": 0.11825060844421387,
      "step": 42730
    },
    {
      "epoch": 0.00026080322265625,
      "step": 42730,
      "training_step_time": 0.42888426780700684
    },
    {
      "epoch": 0.000260809326171875,
      "model_forward_time": 0.11824679374694824,
      "step": 42731
    },
    {
      "epoch": 0.000260809326171875,
      "step": 42731,
      "training_step_time": 0.4093942642211914
    },
    {
      "epoch": 0.0002608154296875,
      "model_forward_time": 0.11568045616149902,
      "step": 42732
    },
    {
      "epoch": 0.0002608154296875,
      "step": 42732,
      "training_step_time": 0.44117164611816406
    },
    {
      "epoch": 0.000260821533203125,
      "model_forward_time": 0.11576628684997559,
      "step": 42733
    },
    {
      "epoch": 0.000260821533203125,
      "step": 42733,
      "training_step_time": 0.41344308853149414
    },
    {
      "epoch": 0.00026082763671875,
      "model_forward_time": 0.11497735977172852,
      "step": 42734
    },
    {
      "epoch": 0.00026082763671875,
      "step": 42734,
      "training_step_time": 0.44847631454467773
    },
    {
      "epoch": 0.000260833740234375,
      "model_forward_time": 0.11461830139160156,
      "step": 42735
    },
    {
      "epoch": 0.000260833740234375,
      "step": 42735,
      "training_step_time": 0.5129604339599609
    },
    {
      "epoch": 0.00026083984375,
      "model_forward_time": 0.11478829383850098,
      "step": 42736
    },
    {
      "epoch": 0.00026083984375,
      "step": 42736,
      "training_step_time": 0.48068737983703613
    },
    {
      "epoch": 0.000260845947265625,
      "model_forward_time": 0.11455798149108887,
      "step": 42737
    },
    {
      "epoch": 0.000260845947265625,
      "step": 42737,
      "training_step_time": 0.3935079574584961
    },
    {
      "epoch": 0.00026085205078125,
      "model_forward_time": 0.115692138671875,
      "step": 42738
    },
    {
      "epoch": 0.00026085205078125,
      "step": 42738,
      "training_step_time": 0.38580751419067383
    },
    {
      "epoch": 0.000260858154296875,
      "model_forward_time": 0.11746001243591309,
      "step": 42739
    },
    {
      "epoch": 0.000260858154296875,
      "step": 42739,
      "training_step_time": 0.39420056343078613
    },
    {
      "epoch": 0.0002608642578125,
      "grad_norm": 0.10550425201654434,
      "learning_rate": 2.0968591546498488e-05,
      "loss": 0.0368,
      "step": 42740
    },
    {
      "epoch": 0.0002608642578125,
      "model_forward_time": 0.11815094947814941,
      "step": 42740
    },
    {
      "epoch": 0.0002608642578125,
      "step": 42740,
      "training_step_time": 0.3837704658508301
    },
    {
      "epoch": 0.000260870361328125,
      "model_forward_time": 0.11750149726867676,
      "step": 42741
    },
    {
      "epoch": 0.000260870361328125,
      "step": 42741,
      "training_step_time": 0.37956857681274414
    },
    {
      "epoch": 0.00026087646484375,
      "model_forward_time": 0.11791348457336426,
      "step": 42742
    },
    {
      "epoch": 0.00026087646484375,
      "step": 42742,
      "training_step_time": 0.390155553817749
    },
    {
      "epoch": 0.000260882568359375,
      "model_forward_time": 0.11601853370666504,
      "step": 42743
    },
    {
      "epoch": 0.000260882568359375,
      "step": 42743,
      "training_step_time": 0.4435689449310303
    },
    {
      "epoch": 0.000260888671875,
      "model_forward_time": 0.1155252456665039,
      "step": 42744
    },
    {
      "epoch": 0.000260888671875,
      "step": 42744,
      "training_step_time": 0.396838903427124
    },
    {
      "epoch": 0.000260894775390625,
      "model_forward_time": 0.11658859252929688,
      "step": 42745
    },
    {
      "epoch": 0.000260894775390625,
      "step": 42745,
      "training_step_time": 0.40642499923706055
    },
    {
      "epoch": 0.00026090087890625,
      "model_forward_time": 0.1148386001586914,
      "step": 42746
    },
    {
      "epoch": 0.00026090087890625,
      "step": 42746,
      "training_step_time": 0.4734077453613281
    },
    {
      "epoch": 0.000260906982421875,
      "model_forward_time": 0.11501717567443848,
      "step": 42747
    },
    {
      "epoch": 0.000260906982421875,
      "step": 42747,
      "training_step_time": 0.4689490795135498
    },
    {
      "epoch": 0.0002609130859375,
      "model_forward_time": 0.11448311805725098,
      "step": 42748
    },
    {
      "epoch": 0.0002609130859375,
      "step": 42748,
      "training_step_time": 0.41261744499206543
    },
    {
      "epoch": 0.000260919189453125,
      "model_forward_time": 0.11510276794433594,
      "step": 42749
    },
    {
      "epoch": 0.000260919189453125,
      "step": 42749,
      "training_step_time": 0.45675015449523926
    },
    {
      "epoch": 0.00026092529296875,
      "grad_norm": 0.17819760739803314,
      "learning_rate": 2.094615922990309e-05,
      "loss": 0.0376,
      "step": 42750
    },
    {
      "epoch": 0.00026092529296875,
      "model_forward_time": 0.11477994918823242,
      "step": 42750
    },
    {
      "epoch": 0.00026092529296875,
      "step": 42750,
      "training_step_time": 0.40117669105529785
    },
    {
      "epoch": 0.000260931396484375,
      "model_forward_time": 0.1148993968963623,
      "step": 42751
    },
    {
      "epoch": 0.000260931396484375,
      "step": 42751,
      "training_step_time": 0.4879150390625
    },
    {
      "epoch": 0.0002609375,
      "model_forward_time": 0.11605000495910645,
      "step": 42752
    },
    {
      "epoch": 0.0002609375,
      "step": 42752,
      "training_step_time": 0.3892078399658203
    },
    {
      "epoch": 0.000260943603515625,
      "model_forward_time": 0.11579394340515137,
      "step": 42753
    },
    {
      "epoch": 0.000260943603515625,
      "step": 42753,
      "training_step_time": 0.7603416442871094
    },
    {
      "epoch": 0.00026094970703125,
      "model_forward_time": 0.11527371406555176,
      "step": 42754
    },
    {
      "epoch": 0.00026094970703125,
      "step": 42754,
      "training_step_time": 0.39380383491516113
    },
    {
      "epoch": 0.000260955810546875,
      "model_forward_time": 0.11407780647277832,
      "step": 42755
    },
    {
      "epoch": 0.000260955810546875,
      "step": 42755,
      "training_step_time": 0.3910715579986572
    },
    {
      "epoch": 0.0002609619140625,
      "model_forward_time": 0.11436152458190918,
      "step": 42756
    },
    {
      "epoch": 0.0002609619140625,
      "step": 42756,
      "training_step_time": 0.3625147342681885
    },
    {
      "epoch": 0.000260968017578125,
      "model_forward_time": 0.11446142196655273,
      "step": 42757
    },
    {
      "epoch": 0.000260968017578125,
      "step": 42757,
      "training_step_time": 0.4009130001068115
    },
    {
      "epoch": 0.00026097412109375,
      "model_forward_time": 0.1150970458984375,
      "step": 42758
    },
    {
      "epoch": 0.00026097412109375,
      "step": 42758,
      "training_step_time": 0.4800553321838379
    },
    {
      "epoch": 0.000260980224609375,
      "model_forward_time": 0.11472630500793457,
      "step": 42759
    },
    {
      "epoch": 0.000260980224609375,
      "step": 42759,
      "training_step_time": 0.8793342113494873
    },
    {
      "epoch": 0.000260986328125,
      "grad_norm": 0.11508451402187347,
      "learning_rate": 2.09237357390974e-05,
      "loss": 0.034,
      "step": 42760
    },
    {
      "epoch": 0.000260986328125,
      "model_forward_time": 0.11442232131958008,
      "step": 42760
    },
    {
      "epoch": 0.000260986328125,
      "step": 42760,
      "training_step_time": 0.4755527973175049
    },
    {
      "epoch": 0.000260992431640625,
      "model_forward_time": 0.11388349533081055,
      "step": 42761
    },
    {
      "epoch": 0.000260992431640625,
      "step": 42761,
      "training_step_time": 0.38326096534729004
    },
    {
      "epoch": 0.00026099853515625,
      "model_forward_time": 0.11415672302246094,
      "step": 42762
    },
    {
      "epoch": 0.00026099853515625,
      "step": 42762,
      "training_step_time": 0.3816044330596924
    },
    {
      "epoch": 0.000261004638671875,
      "model_forward_time": 0.11417245864868164,
      "step": 42763
    },
    {
      "epoch": 0.000261004638671875,
      "step": 42763,
      "training_step_time": 0.450214147567749
    },
    {
      "epoch": 0.0002610107421875,
      "model_forward_time": 0.11455559730529785,
      "step": 42764
    },
    {
      "epoch": 0.0002610107421875,
      "step": 42764,
      "training_step_time": 0.3857302665710449
    },
    {
      "epoch": 0.000261016845703125,
      "model_forward_time": 0.1149137020111084,
      "step": 42765
    },
    {
      "epoch": 0.000261016845703125,
      "step": 42765,
      "training_step_time": 0.9094884395599365
    },
    {
      "epoch": 0.00026102294921875,
      "model_forward_time": 0.11425590515136719,
      "step": 42766
    },
    {
      "epoch": 0.00026102294921875,
      "step": 42766,
      "training_step_time": 0.38876986503601074
    },
    {
      "epoch": 0.000261029052734375,
      "model_forward_time": 0.11405682563781738,
      "step": 42767
    },
    {
      "epoch": 0.000261029052734375,
      "step": 42767,
      "training_step_time": 0.3855702877044678
    },
    {
      "epoch": 0.00026103515625,
      "model_forward_time": 0.11410856246948242,
      "step": 42768
    },
    {
      "epoch": 0.00026103515625,
      "step": 42768,
      "training_step_time": 0.3937571048736572
    },
    {
      "epoch": 0.000261041259765625,
      "model_forward_time": 0.1147160530090332,
      "step": 42769
    },
    {
      "epoch": 0.000261041259765625,
      "step": 42769,
      "training_step_time": 0.4211125373840332
    },
    {
      "epoch": 0.00026104736328125,
      "grad_norm": 0.0988665223121643,
      "learning_rate": 2.0901321080893056e-05,
      "loss": 0.0336,
      "step": 42770
    },
    {
      "epoch": 0.00026104736328125,
      "model_forward_time": 0.11463308334350586,
      "step": 42770
    },
    {
      "epoch": 0.00026104736328125,
      "step": 42770,
      "training_step_time": 0.4787156581878662
    },
    {
      "epoch": 0.000261053466796875,
      "model_forward_time": 0.11541271209716797,
      "step": 42771
    },
    {
      "epoch": 0.000261053466796875,
      "step": 42771,
      "training_step_time": 0.41712355613708496
    },
    {
      "epoch": 0.0002610595703125,
      "model_forward_time": 0.11499857902526855,
      "step": 42772
    },
    {
      "epoch": 0.0002610595703125,
      "step": 42772,
      "training_step_time": 0.4255688190460205
    },
    {
      "epoch": 0.000261065673828125,
      "model_forward_time": 0.11522221565246582,
      "step": 42773
    },
    {
      "epoch": 0.000261065673828125,
      "step": 42773,
      "training_step_time": 0.40331220626831055
    },
    {
      "epoch": 0.00026107177734375,
      "model_forward_time": 0.1149444580078125,
      "step": 42774
    },
    {
      "epoch": 0.00026107177734375,
      "step": 42774,
      "training_step_time": 0.4437887668609619
    },
    {
      "epoch": 0.000261077880859375,
      "model_forward_time": 0.11466050148010254,
      "step": 42775
    },
    {
      "epoch": 0.000261077880859375,
      "step": 42775,
      "training_step_time": 0.4971127510070801
    },
    {
      "epoch": 0.000261083984375,
      "model_forward_time": 0.11455535888671875,
      "step": 42776
    },
    {
      "epoch": 0.000261083984375,
      "step": 42776,
      "training_step_time": 0.4087502956390381
    },
    {
      "epoch": 0.000261090087890625,
      "model_forward_time": 0.1140756607055664,
      "step": 42777
    },
    {
      "epoch": 0.000261090087890625,
      "step": 42777,
      "training_step_time": 0.7586228847503662
    },
    {
      "epoch": 0.00026109619140625,
      "model_forward_time": 0.11386775970458984,
      "step": 42778
    },
    {
      "epoch": 0.00026109619140625,
      "step": 42778,
      "training_step_time": 0.38784050941467285
    },
    {
      "epoch": 0.000261102294921875,
      "model_forward_time": 0.11664748191833496,
      "step": 42779
    },
    {
      "epoch": 0.000261102294921875,
      "step": 42779,
      "training_step_time": 0.396014928817749
    },
    {
      "epoch": 0.0002611083984375,
      "grad_norm": 0.11344080418348312,
      "learning_rate": 2.0878915262099098e-05,
      "loss": 0.0334,
      "step": 42780
    },
    {
      "epoch": 0.0002611083984375,
      "model_forward_time": 0.11425280570983887,
      "step": 42780
    },
    {
      "epoch": 0.0002611083984375,
      "step": 42780,
      "training_step_time": 0.3948080539703369
    },
    {
      "epoch": 0.000261114501953125,
      "model_forward_time": 0.11460065841674805,
      "step": 42781
    },
    {
      "epoch": 0.000261114501953125,
      "step": 42781,
      "training_step_time": 0.39348840713500977
    },
    {
      "epoch": 0.00026112060546875,
      "model_forward_time": 0.11428380012512207,
      "step": 42782
    },
    {
      "epoch": 0.00026112060546875,
      "step": 42782,
      "training_step_time": 0.39301586151123047
    },
    {
      "epoch": 0.000261126708984375,
      "model_forward_time": 0.11837387084960938,
      "step": 42783
    },
    {
      "epoch": 0.000261126708984375,
      "step": 42783,
      "training_step_time": 1.0500130653381348
    },
    {
      "epoch": 0.0002611328125,
      "model_forward_time": 0.11795282363891602,
      "step": 42784
    },
    {
      "epoch": 0.0002611328125,
      "step": 42784,
      "training_step_time": 0.42455315589904785
    },
    {
      "epoch": 0.000261138916015625,
      "model_forward_time": 0.11660099029541016,
      "step": 42785
    },
    {
      "epoch": 0.000261138916015625,
      "step": 42785,
      "training_step_time": 0.40238285064697266
    },
    {
      "epoch": 0.00026114501953125,
      "model_forward_time": 0.11417794227600098,
      "step": 42786
    },
    {
      "epoch": 0.00026114501953125,
      "step": 42786,
      "training_step_time": 0.440185546875
    },
    {
      "epoch": 0.000261151123046875,
      "model_forward_time": 0.11396241188049316,
      "step": 42787
    },
    {
      "epoch": 0.000261151123046875,
      "step": 42787,
      "training_step_time": 0.38633251190185547
    },
    {
      "epoch": 0.0002611572265625,
      "model_forward_time": 0.11431717872619629,
      "step": 42788
    },
    {
      "epoch": 0.0002611572265625,
      "step": 42788,
      "training_step_time": 0.40630578994750977
    },
    {
      "epoch": 0.000261163330078125,
      "model_forward_time": 0.11496162414550781,
      "step": 42789
    },
    {
      "epoch": 0.000261163330078125,
      "step": 42789,
      "training_step_time": 0.5252971649169922
    },
    {
      "epoch": 0.00026116943359375,
      "grad_norm": 0.1431674212217331,
      "learning_rate": 2.085651828952175e-05,
      "loss": 0.0395,
      "step": 42790
    },
    {
      "epoch": 0.00026116943359375,
      "model_forward_time": 0.11587190628051758,
      "step": 42790
    },
    {
      "epoch": 0.00026116943359375,
      "step": 42790,
      "training_step_time": 0.39173102378845215
    },
    {
      "epoch": 0.000261175537109375,
      "model_forward_time": 0.11471366882324219,
      "step": 42791
    },
    {
      "epoch": 0.000261175537109375,
      "step": 42791,
      "training_step_time": 0.4018852710723877
    },
    {
      "epoch": 0.000261181640625,
      "model_forward_time": 0.1150672435760498,
      "step": 42792
    },
    {
      "epoch": 0.000261181640625,
      "step": 42792,
      "training_step_time": 0.39456987380981445
    },
    {
      "epoch": 0.000261187744140625,
      "model_forward_time": 0.11550092697143555,
      "step": 42793
    },
    {
      "epoch": 0.000261187744140625,
      "step": 42793,
      "training_step_time": 0.39566493034362793
    },
    {
      "epoch": 0.00026119384765625,
      "model_forward_time": 0.1143033504486084,
      "step": 42794
    },
    {
      "epoch": 0.00026119384765625,
      "step": 42794,
      "training_step_time": 0.3911585807800293
    },
    {
      "epoch": 0.000261199951171875,
      "model_forward_time": 0.11472630500793457,
      "step": 42795
    },
    {
      "epoch": 0.000261199951171875,
      "step": 42795,
      "training_step_time": 0.4064066410064697
    },
    {
      "epoch": 0.0002612060546875,
      "model_forward_time": 0.11559557914733887,
      "step": 42796
    },
    {
      "epoch": 0.0002612060546875,
      "step": 42796,
      "training_step_time": 0.36881184577941895
    },
    {
      "epoch": 0.000261212158203125,
      "model_forward_time": 0.11478495597839355,
      "step": 42797
    },
    {
      "epoch": 0.000261212158203125,
      "step": 42797,
      "training_step_time": 0.4618055820465088
    },
    {
      "epoch": 0.00026121826171875,
      "model_forward_time": 0.11481785774230957,
      "step": 42798
    },
    {
      "epoch": 0.00026121826171875,
      "step": 42798,
      "training_step_time": 0.4575812816619873
    },
    {
      "epoch": 0.000261224365234375,
      "model_forward_time": 0.11473846435546875,
      "step": 42799
    },
    {
      "epoch": 0.000261224365234375,
      "step": 42799,
      "training_step_time": 0.45665621757507324
    },
    {
      "epoch": 0.00026123046875,
      "grad_norm": 0.10246357321739197,
      "learning_rate": 2.0834130169964692e-05,
      "loss": 0.0358,
      "step": 42800
    },
    {
      "epoch": 0.00026123046875,
      "model_forward_time": 0.11466741561889648,
      "step": 42800
    },
    {
      "epoch": 0.00026123046875,
      "step": 42800,
      "training_step_time": 0.4527251720428467
    },
    {
      "epoch": 0.000261236572265625,
      "model_forward_time": 0.11484169960021973,
      "step": 42801
    },
    {
      "epoch": 0.000261236572265625,
      "step": 42801,
      "training_step_time": 0.38610386848449707
    },
    {
      "epoch": 0.00026124267578125,
      "model_forward_time": 0.11404013633728027,
      "step": 42802
    },
    {
      "epoch": 0.00026124267578125,
      "step": 42802,
      "training_step_time": 0.42555713653564453
    },
    {
      "epoch": 0.000261248779296875,
      "model_forward_time": 0.11439681053161621,
      "step": 42803
    },
    {
      "epoch": 0.000261248779296875,
      "step": 42803,
      "training_step_time": 0.4038851261138916
    },
    {
      "epoch": 0.0002612548828125,
      "model_forward_time": 0.11504387855529785,
      "step": 42804
    },
    {
      "epoch": 0.0002612548828125,
      "step": 42804,
      "training_step_time": 0.45490145683288574
    },
    {
      "epoch": 0.000261260986328125,
      "model_forward_time": 0.11506438255310059,
      "step": 42805
    },
    {
      "epoch": 0.000261260986328125,
      "step": 42805,
      "training_step_time": 0.39957571029663086
    },
    {
      "epoch": 0.00026126708984375,
      "model_forward_time": 0.11483144760131836,
      "step": 42806
    },
    {
      "epoch": 0.00026126708984375,
      "step": 42806,
      "training_step_time": 0.3973262310028076
    },
    {
      "epoch": 0.000261273193359375,
      "model_forward_time": 0.1151888370513916,
      "step": 42807
    },
    {
      "epoch": 0.000261273193359375,
      "step": 42807,
      "training_step_time": 0.3885841369628906
    },
    {
      "epoch": 0.000261279296875,
      "model_forward_time": 0.11493873596191406,
      "step": 42808
    },
    {
      "epoch": 0.000261279296875,
      "step": 42808,
      "training_step_time": 0.3808932304382324
    },
    {
      "epoch": 0.000261285400390625,
      "model_forward_time": 0.11468219757080078,
      "step": 42809
    },
    {
      "epoch": 0.000261285400390625,
      "step": 42809,
      "training_step_time": 0.3914515972137451
    },
    {
      "epoch": 0.00026129150390625,
      "grad_norm": 0.14734181761741638,
      "learning_rate": 2.0811750910228774e-05,
      "loss": 0.0366,
      "step": 42810
    },
    {
      "epoch": 0.00026129150390625,
      "model_forward_time": 0.11616778373718262,
      "step": 42810
    },
    {
      "epoch": 0.00026129150390625,
      "step": 42810,
      "training_step_time": 0.4280056953430176
    },
    {
      "epoch": 0.000261297607421875,
      "model_forward_time": 0.11513352394104004,
      "step": 42811
    },
    {
      "epoch": 0.000261297607421875,
      "step": 42811,
      "training_step_time": 0.40926027297973633
    },
    {
      "epoch": 0.0002613037109375,
      "model_forward_time": 0.11571836471557617,
      "step": 42812
    },
    {
      "epoch": 0.0002613037109375,
      "step": 42812,
      "training_step_time": 0.4512455463409424
    },
    {
      "epoch": 0.000261309814453125,
      "model_forward_time": 0.11671161651611328,
      "step": 42813
    },
    {
      "epoch": 0.000261309814453125,
      "step": 42813,
      "training_step_time": 0.41662073135375977
    },
    {
      "epoch": 0.00026131591796875,
      "model_forward_time": 0.1152195930480957,
      "step": 42814
    },
    {
      "epoch": 0.00026131591796875,
      "step": 42814,
      "training_step_time": 0.40432190895080566
    },
    {
      "epoch": 0.000261322021484375,
      "model_forward_time": 0.11455011367797852,
      "step": 42815
    },
    {
      "epoch": 0.000261322021484375,
      "step": 42815,
      "training_step_time": 0.5216293334960938
    },
    {
      "epoch": 0.000261328125,
      "model_forward_time": 0.11537861824035645,
      "step": 42816
    },
    {
      "epoch": 0.000261328125,
      "step": 42816,
      "training_step_time": 0.39626312255859375
    },
    {
      "epoch": 0.000261334228515625,
      "model_forward_time": 0.11466574668884277,
      "step": 42817
    },
    {
      "epoch": 0.000261334228515625,
      "step": 42817,
      "training_step_time": 0.39527344703674316
    },
    {
      "epoch": 0.00026134033203125,
      "model_forward_time": 0.11589479446411133,
      "step": 42818
    },
    {
      "epoch": 0.00026134033203125,
      "step": 42818,
      "training_step_time": 0.39084315299987793
    },
    {
      "epoch": 0.000261346435546875,
      "model_forward_time": 0.11467576026916504,
      "step": 42819
    },
    {
      "epoch": 0.000261346435546875,
      "step": 42819,
      "training_step_time": 0.39739489555358887
    },
    {
      "epoch": 0.0002613525390625,
      "grad_norm": 0.10146918147802353,
      "learning_rate": 2.0789380517112272e-05,
      "loss": 0.0391,
      "step": 42820
    },
    {
      "epoch": 0.0002613525390625,
      "model_forward_time": 0.11522889137268066,
      "step": 42820
    },
    {
      "epoch": 0.0002613525390625,
      "step": 42820,
      "training_step_time": 0.38196611404418945
    },
    {
      "epoch": 0.000261358642578125,
      "model_forward_time": 0.11447000503540039,
      "step": 42821
    },
    {
      "epoch": 0.000261358642578125,
      "step": 42821,
      "training_step_time": 0.40049028396606445
    },
    {
      "epoch": 0.00026136474609375,
      "model_forward_time": 0.11533522605895996,
      "step": 42822
    },
    {
      "epoch": 0.00026136474609375,
      "step": 42822,
      "training_step_time": 0.39148926734924316
    },
    {
      "epoch": 0.000261370849609375,
      "model_forward_time": 0.11566686630249023,
      "step": 42823
    },
    {
      "epoch": 0.000261370849609375,
      "step": 42823,
      "training_step_time": 0.39324116706848145
    },
    {
      "epoch": 0.000261376953125,
      "model_forward_time": 0.115966796875,
      "step": 42824
    },
    {
      "epoch": 0.000261376953125,
      "step": 42824,
      "training_step_time": 0.43459510803222656
    },
    {
      "epoch": 0.000261383056640625,
      "model_forward_time": 0.11553382873535156,
      "step": 42825
    },
    {
      "epoch": 0.000261383056640625,
      "step": 42825,
      "training_step_time": 0.591015100479126
    },
    {
      "epoch": 0.00026138916015625,
      "model_forward_time": 0.12507033348083496,
      "step": 42826
    },
    {
      "epoch": 0.00026138916015625,
      "step": 42826,
      "training_step_time": 0.41167545318603516
    },
    {
      "epoch": 0.000261395263671875,
      "model_forward_time": 0.11760902404785156,
      "step": 42827
    },
    {
      "epoch": 0.000261395263671875,
      "step": 42827,
      "training_step_time": 0.4842495918273926
    },
    {
      "epoch": 0.0002614013671875,
      "model_forward_time": 0.11775708198547363,
      "step": 42828
    },
    {
      "epoch": 0.0002614013671875,
      "step": 42828,
      "training_step_time": 0.4288785457611084
    },
    {
      "epoch": 0.000261407470703125,
      "model_forward_time": 0.11781907081604004,
      "step": 42829
    },
    {
      "epoch": 0.000261407470703125,
      "step": 42829,
      "training_step_time": 0.38988709449768066
    },
    {
      "epoch": 0.00026141357421875,
      "grad_norm": 0.11104472726583481,
      "learning_rate": 2.0767018997410713e-05,
      "loss": 0.0358,
      "step": 42830
    },
    {
      "epoch": 0.00026141357421875,
      "model_forward_time": 0.11536741256713867,
      "step": 42830
    },
    {
      "epoch": 0.00026141357421875,
      "step": 42830,
      "training_step_time": 0.4040541648864746
    },
    {
      "epoch": 0.000261419677734375,
      "model_forward_time": 0.11488842964172363,
      "step": 42831
    },
    {
      "epoch": 0.000261419677734375,
      "step": 42831,
      "training_step_time": 0.42943286895751953
    },
    {
      "epoch": 0.00026142578125,
      "model_forward_time": 0.11485981941223145,
      "step": 42832
    },
    {
      "epoch": 0.00026142578125,
      "step": 42832,
      "training_step_time": 0.435793399810791
    },
    {
      "epoch": 0.000261431884765625,
      "model_forward_time": 0.1148824691772461,
      "step": 42833
    },
    {
      "epoch": 0.000261431884765625,
      "step": 42833,
      "training_step_time": 0.39597153663635254
    },
    {
      "epoch": 0.00026143798828125,
      "model_forward_time": 0.1151876449584961,
      "step": 42834
    },
    {
      "epoch": 0.00026143798828125,
      "step": 42834,
      "training_step_time": 0.3803901672363281
    },
    {
      "epoch": 0.000261444091796875,
      "model_forward_time": 0.11539411544799805,
      "step": 42835
    },
    {
      "epoch": 0.000261444091796875,
      "step": 42835,
      "training_step_time": 0.40064263343811035
    },
    {
      "epoch": 0.0002614501953125,
      "model_forward_time": 0.11560750007629395,
      "step": 42836
    },
    {
      "epoch": 0.0002614501953125,
      "step": 42836,
      "training_step_time": 0.39446473121643066
    },
    {
      "epoch": 0.000261456298828125,
      "model_forward_time": 0.1148838996887207,
      "step": 42837
    },
    {
      "epoch": 0.000261456298828125,
      "step": 42837,
      "training_step_time": 0.392411470413208
    },
    {
      "epoch": 0.00026146240234375,
      "model_forward_time": 0.11531329154968262,
      "step": 42838
    },
    {
      "epoch": 0.00026146240234375,
      "step": 42838,
      "training_step_time": 0.3820338249206543
    },
    {
      "epoch": 0.000261468505859375,
      "model_forward_time": 0.11557149887084961,
      "step": 42839
    },
    {
      "epoch": 0.000261468505859375,
      "step": 42839,
      "training_step_time": 0.42010951042175293
    },
    {
      "epoch": 0.000261474609375,
      "grad_norm": 0.11853911727666855,
      "learning_rate": 2.0744666357916925e-05,
      "loss": 0.0378,
      "step": 42840
    },
    {
      "epoch": 0.000261474609375,
      "model_forward_time": 0.11549639701843262,
      "step": 42840
    },
    {
      "epoch": 0.000261474609375,
      "step": 42840,
      "training_step_time": 0.4401562213897705
    },
    {
      "epoch": 0.000261480712890625,
      "model_forward_time": 0.11519885063171387,
      "step": 42841
    },
    {
      "epoch": 0.000261480712890625,
      "step": 42841,
      "training_step_time": 0.4066169261932373
    },
    {
      "epoch": 0.00026148681640625,
      "model_forward_time": 0.11622190475463867,
      "step": 42842
    },
    {
      "epoch": 0.00026148681640625,
      "step": 42842,
      "training_step_time": 0.4195077419281006
    },
    {
      "epoch": 0.000261492919921875,
      "model_forward_time": 0.11512303352355957,
      "step": 42843
    },
    {
      "epoch": 0.000261492919921875,
      "step": 42843,
      "training_step_time": 0.43959808349609375
    },
    {
      "epoch": 0.0002614990234375,
      "model_forward_time": 0.11494088172912598,
      "step": 42844
    },
    {
      "epoch": 0.0002614990234375,
      "step": 42844,
      "training_step_time": 0.3873128890991211
    },
    {
      "epoch": 0.000261505126953125,
      "model_forward_time": 0.11512422561645508,
      "step": 42845
    },
    {
      "epoch": 0.000261505126953125,
      "step": 42845,
      "training_step_time": 0.4341564178466797
    },
    {
      "epoch": 0.00026151123046875,
      "model_forward_time": 0.1147923469543457,
      "step": 42846
    },
    {
      "epoch": 0.00026151123046875,
      "step": 42846,
      "training_step_time": 0.4215707778930664
    },
    {
      "epoch": 0.000261517333984375,
      "model_forward_time": 0.1155233383178711,
      "step": 42847
    },
    {
      "epoch": 0.000261517333984375,
      "step": 42847,
      "training_step_time": 0.4291560649871826
    },
    {
      "epoch": 0.0002615234375,
      "model_forward_time": 0.11516809463500977,
      "step": 42848
    },
    {
      "epoch": 0.0002615234375,
      "step": 42848,
      "training_step_time": 0.39708948135375977
    },
    {
      "epoch": 0.000261529541015625,
      "model_forward_time": 0.11493539810180664,
      "step": 42849
    },
    {
      "epoch": 0.000261529541015625,
      "step": 42849,
      "training_step_time": 0.5007879734039307
    },
    {
      "epoch": 0.00026153564453125,
      "grad_norm": 0.11947350949048996,
      "learning_rate": 2.072232260542106e-05,
      "loss": 0.0344,
      "step": 42850
    },
    {
      "epoch": 0.00026153564453125,
      "model_forward_time": 0.11493730545043945,
      "step": 42850
    },
    {
      "epoch": 0.00026153564453125,
      "step": 42850,
      "training_step_time": 0.3984189033508301
    },
    {
      "epoch": 0.000261541748046875,
      "model_forward_time": 0.11556339263916016,
      "step": 42851
    },
    {
      "epoch": 0.000261541748046875,
      "step": 42851,
      "training_step_time": 0.4019961357116699
    },
    {
      "epoch": 0.0002615478515625,
      "model_forward_time": 0.11529874801635742,
      "step": 42852
    },
    {
      "epoch": 0.0002615478515625,
      "step": 42852,
      "training_step_time": 0.40253353118896484
    },
    {
      "epoch": 0.000261553955078125,
      "model_forward_time": 0.11483287811279297,
      "step": 42853
    },
    {
      "epoch": 0.000261553955078125,
      "step": 42853,
      "training_step_time": 0.396639347076416
    },
    {
      "epoch": 0.00026156005859375,
      "model_forward_time": 0.1150979995727539,
      "step": 42854
    },
    {
      "epoch": 0.00026156005859375,
      "step": 42854,
      "training_step_time": 0.42421507835388184
    },
    {
      "epoch": 0.000261566162109375,
      "model_forward_time": 0.11483335494995117,
      "step": 42855
    },
    {
      "epoch": 0.000261566162109375,
      "step": 42855,
      "training_step_time": 0.6761445999145508
    },
    {
      "epoch": 0.000261572265625,
      "model_forward_time": 0.11506032943725586,
      "step": 42856
    },
    {
      "epoch": 0.000261572265625,
      "step": 42856,
      "training_step_time": 0.4256596565246582
    },
    {
      "epoch": 0.000261578369140625,
      "model_forward_time": 0.11434793472290039,
      "step": 42857
    },
    {
      "epoch": 0.000261578369140625,
      "step": 42857,
      "training_step_time": 0.40210413932800293
    },
    {
      "epoch": 0.00026158447265625,
      "model_forward_time": 0.11481380462646484,
      "step": 42858
    },
    {
      "epoch": 0.00026158447265625,
      "step": 42858,
      "training_step_time": 0.4122188091278076
    },
    {
      "epoch": 0.000261590576171875,
      "model_forward_time": 0.11406540870666504,
      "step": 42859
    },
    {
      "epoch": 0.000261590576171875,
      "step": 42859,
      "training_step_time": 0.46070098876953125
    },
    {
      "epoch": 0.0002615966796875,
      "grad_norm": 0.1355125904083252,
      "learning_rate": 2.0699987746710554e-05,
      "loss": 0.0374,
      "step": 42860
    },
    {
      "epoch": 0.0002615966796875,
      "model_forward_time": 0.11434698104858398,
      "step": 42860
    },
    {
      "epoch": 0.0002615966796875,
      "step": 42860,
      "training_step_time": 0.4380338191986084
    },
    {
      "epoch": 0.000261602783203125,
      "model_forward_time": 0.11513805389404297,
      "step": 42861
    },
    {
      "epoch": 0.000261602783203125,
      "step": 42861,
      "training_step_time": 0.4698665142059326
    },
    {
      "epoch": 0.00026160888671875,
      "model_forward_time": 0.11485815048217773,
      "step": 42862
    },
    {
      "epoch": 0.00026160888671875,
      "step": 42862,
      "training_step_time": 0.3870813846588135
    },
    {
      "epoch": 0.000261614990234375,
      "model_forward_time": 0.11475491523742676,
      "step": 42863
    },
    {
      "epoch": 0.000261614990234375,
      "step": 42863,
      "training_step_time": 0.38980746269226074
    },
    {
      "epoch": 0.00026162109375,
      "model_forward_time": 0.1149296760559082,
      "step": 42864
    },
    {
      "epoch": 0.00026162109375,
      "step": 42864,
      "training_step_time": 0.387545108795166
    },
    {
      "epoch": 0.000261627197265625,
      "model_forward_time": 0.11518239974975586,
      "step": 42865
    },
    {
      "epoch": 0.000261627197265625,
      "step": 42865,
      "training_step_time": 0.39254236221313477
    },
    {
      "epoch": 0.00026163330078125,
      "model_forward_time": 0.11451911926269531,
      "step": 42866
    },
    {
      "epoch": 0.00026163330078125,
      "step": 42866,
      "training_step_time": 0.3921012878417969
    },
    {
      "epoch": 0.000261639404296875,
      "model_forward_time": 0.11572098731994629,
      "step": 42867
    },
    {
      "epoch": 0.000261639404296875,
      "step": 42867,
      "training_step_time": 0.4344162940979004
    },
    {
      "epoch": 0.0002616455078125,
      "model_forward_time": 0.11563515663146973,
      "step": 42868
    },
    {
      "epoch": 0.0002616455078125,
      "step": 42868,
      "training_step_time": 0.40771937370300293
    },
    {
      "epoch": 0.000261651611328125,
      "model_forward_time": 0.11516642570495605,
      "step": 42869
    },
    {
      "epoch": 0.000261651611328125,
      "step": 42869,
      "training_step_time": 0.3867528438568115
    },
    {
      "epoch": 0.00026165771484375,
      "grad_norm": 0.12327486276626587,
      "learning_rate": 2.067766178857013e-05,
      "loss": 0.0339,
      "step": 42870
    },
    {
      "epoch": 0.00026165771484375,
      "model_forward_time": 0.11575675010681152,
      "step": 42870
    },
    {
      "epoch": 0.00026165771484375,
      "step": 42870,
      "training_step_time": 0.43642163276672363
    },
    {
      "epoch": 0.000261663818359375,
      "model_forward_time": 0.11485576629638672,
      "step": 42871
    },
    {
      "epoch": 0.000261663818359375,
      "step": 42871,
      "training_step_time": 0.4191474914550781
    },
    {
      "epoch": 0.000261669921875,
      "model_forward_time": 0.11531543731689453,
      "step": 42872
    },
    {
      "epoch": 0.000261669921875,
      "step": 42872,
      "training_step_time": 0.4463198184967041
    },
    {
      "epoch": 0.000261676025390625,
      "model_forward_time": 0.11459922790527344,
      "step": 42873
    },
    {
      "epoch": 0.000261676025390625,
      "step": 42873,
      "training_step_time": 0.4925343990325928
    },
    {
      "epoch": 0.00026168212890625,
      "model_forward_time": 0.11513686180114746,
      "step": 42874
    },
    {
      "epoch": 0.00026168212890625,
      "step": 42874,
      "training_step_time": 0.47898173332214355
    },
    {
      "epoch": 0.000261688232421875,
      "model_forward_time": 0.11513781547546387,
      "step": 42875
    },
    {
      "epoch": 0.000261688232421875,
      "step": 42875,
      "training_step_time": 0.38768792152404785
    },
    {
      "epoch": 0.0002616943359375,
      "model_forward_time": 0.11456418037414551,
      "step": 42876
    },
    {
      "epoch": 0.0002616943359375,
      "step": 42876,
      "training_step_time": 0.41672301292419434
    },
    {
      "epoch": 0.000261700439453125,
      "model_forward_time": 0.11592435836791992,
      "step": 42877
    },
    {
      "epoch": 0.000261700439453125,
      "step": 42877,
      "training_step_time": 0.38629674911499023
    },
    {
      "epoch": 0.00026170654296875,
      "model_forward_time": 0.11522483825683594,
      "step": 42878
    },
    {
      "epoch": 0.00026170654296875,
      "step": 42878,
      "training_step_time": 0.3974590301513672
    },
    {
      "epoch": 0.000261712646484375,
      "model_forward_time": 0.11516523361206055,
      "step": 42879
    },
    {
      "epoch": 0.000261712646484375,
      "step": 42879,
      "training_step_time": 0.6734588146209717
    },
    {
      "epoch": 0.00026171875,
      "grad_norm": 0.12138596177101135,
      "learning_rate": 2.065534473778186e-05,
      "loss": 0.036,
      "step": 42880
    },
    {
      "epoch": 0.00026171875,
      "model_forward_time": 0.11529302597045898,
      "step": 42880
    },
    {
      "epoch": 0.00026171875,
      "step": 42880,
      "training_step_time": 0.4188501834869385
    },
    {
      "epoch": 0.000261724853515625,
      "model_forward_time": 0.1142418384552002,
      "step": 42881
    },
    {
      "epoch": 0.000261724853515625,
      "step": 42881,
      "training_step_time": 0.4098069667816162
    },
    {
      "epoch": 0.00026173095703125,
      "model_forward_time": 0.11474394798278809,
      "step": 42882
    },
    {
      "epoch": 0.00026173095703125,
      "step": 42882,
      "training_step_time": 0.396237850189209
    },
    {
      "epoch": 0.000261737060546875,
      "model_forward_time": 0.11503839492797852,
      "step": 42883
    },
    {
      "epoch": 0.000261737060546875,
      "step": 42883,
      "training_step_time": 0.38017964363098145
    },
    {
      "epoch": 0.0002617431640625,
      "model_forward_time": 0.11471772193908691,
      "step": 42884
    },
    {
      "epoch": 0.0002617431640625,
      "step": 42884,
      "training_step_time": 0.433408260345459
    },
    {
      "epoch": 0.000261749267578125,
      "model_forward_time": 0.1147301197052002,
      "step": 42885
    },
    {
      "epoch": 0.000261749267578125,
      "step": 42885,
      "training_step_time": 0.5175189971923828
    },
    {
      "epoch": 0.00026175537109375,
      "model_forward_time": 0.11766648292541504,
      "step": 42886
    },
    {
      "epoch": 0.00026175537109375,
      "step": 42886,
      "training_step_time": 0.398390531539917
    },
    {
      "epoch": 0.000261761474609375,
      "model_forward_time": 0.11597466468811035,
      "step": 42887
    },
    {
      "epoch": 0.000261761474609375,
      "step": 42887,
      "training_step_time": 0.4124596118927002
    },
    {
      "epoch": 0.000261767578125,
      "model_forward_time": 0.11526036262512207,
      "step": 42888
    },
    {
      "epoch": 0.000261767578125,
      "step": 42888,
      "training_step_time": 0.46491503715515137
    },
    {
      "epoch": 0.000261773681640625,
      "model_forward_time": 0.11525225639343262,
      "step": 42889
    },
    {
      "epoch": 0.000261773681640625,
      "step": 42889,
      "training_step_time": 0.49370431900024414
    },
    {
      "epoch": 0.00026177978515625,
      "grad_norm": 0.10678135603666306,
      "learning_rate": 2.063303660112506e-05,
      "loss": 0.0342,
      "step": 42890
    },
    {
      "epoch": 0.00026177978515625,
      "model_forward_time": 0.11435937881469727,
      "step": 42890
    },
    {
      "epoch": 0.00026177978515625,
      "step": 42890,
      "training_step_time": 0.4029812812805176
    },
    {
      "epoch": 0.000261785888671875,
      "model_forward_time": 0.11450862884521484,
      "step": 42891
    },
    {
      "epoch": 0.000261785888671875,
      "step": 42891,
      "training_step_time": 0.3877890110015869
    },
    {
      "epoch": 0.0002617919921875,
      "model_forward_time": 0.11503219604492188,
      "step": 42892
    },
    {
      "epoch": 0.0002617919921875,
      "step": 42892,
      "training_step_time": 0.4026792049407959
    },
    {
      "epoch": 0.000261798095703125,
      "model_forward_time": 0.11423516273498535,
      "step": 42893
    },
    {
      "epoch": 0.000261798095703125,
      "step": 42893,
      "training_step_time": 0.3964715003967285
    },
    {
      "epoch": 0.00026180419921875,
      "model_forward_time": 0.11527109146118164,
      "step": 42894
    },
    {
      "epoch": 0.00026180419921875,
      "step": 42894,
      "training_step_time": 0.41599249839782715
    },
    {
      "epoch": 0.000261810302734375,
      "model_forward_time": 0.11496686935424805,
      "step": 42895
    },
    {
      "epoch": 0.000261810302734375,
      "step": 42895,
      "training_step_time": 0.4258458614349365
    },
    {
      "epoch": 0.00026181640625,
      "model_forward_time": 0.11505770683288574,
      "step": 42896
    },
    {
      "epoch": 0.00026181640625,
      "step": 42896,
      "training_step_time": 0.38578081130981445
    },
    {
      "epoch": 0.000261822509765625,
      "model_forward_time": 0.1150503158569336,
      "step": 42897
    },
    {
      "epoch": 0.000261822509765625,
      "step": 42897,
      "training_step_time": 0.3987772464752197
    },
    {
      "epoch": 0.00026182861328125,
      "model_forward_time": 0.1148369312286377,
      "step": 42898
    },
    {
      "epoch": 0.00026182861328125,
      "step": 42898,
      "training_step_time": 0.5120341777801514
    },
    {
      "epoch": 0.000261834716796875,
      "model_forward_time": 0.11547112464904785,
      "step": 42899
    },
    {
      "epoch": 0.000261834716796875,
      "step": 42899,
      "training_step_time": 0.505629301071167
    },
    {
      "epoch": 0.0002618408203125,
      "grad_norm": 0.09645458310842514,
      "learning_rate": 2.061073738537635e-05,
      "loss": 0.0363,
      "step": 42900
    },
    {
      "epoch": 0.0002618408203125,
      "model_forward_time": 0.11500811576843262,
      "step": 42900
    },
    {
      "epoch": 0.0002618408203125,
      "step": 42900,
      "training_step_time": 0.3995649814605713
    },
    {
      "epoch": 0.000261846923828125,
      "model_forward_time": 0.114990234375,
      "step": 42901
    },
    {
      "epoch": 0.000261846923828125,
      "step": 42901,
      "training_step_time": 0.42191338539123535
    },
    {
      "epoch": 0.00026185302734375,
      "model_forward_time": 0.11396241188049316,
      "step": 42902
    },
    {
      "epoch": 0.00026185302734375,
      "step": 42902,
      "training_step_time": 0.49101686477661133
    },
    {
      "epoch": 0.000261859130859375,
      "model_forward_time": 0.11447978019714355,
      "step": 42903
    },
    {
      "epoch": 0.000261859130859375,
      "step": 42903,
      "training_step_time": 0.6944246292114258
    },
    {
      "epoch": 0.000261865234375,
      "model_forward_time": 0.11394381523132324,
      "step": 42904
    },
    {
      "epoch": 0.000261865234375,
      "step": 42904,
      "training_step_time": 0.37917304039001465
    },
    {
      "epoch": 0.000261871337890625,
      "model_forward_time": 0.11415743827819824,
      "step": 42905
    },
    {
      "epoch": 0.000261871337890625,
      "step": 42905,
      "training_step_time": 0.38982224464416504
    },
    {
      "epoch": 0.00026187744140625,
      "model_forward_time": 0.1143646240234375,
      "step": 42906
    },
    {
      "epoch": 0.00026187744140625,
      "step": 42906,
      "training_step_time": 0.39174461364746094
    },
    {
      "epoch": 0.000261883544921875,
      "model_forward_time": 0.11536002159118652,
      "step": 42907
    },
    {
      "epoch": 0.000261883544921875,
      "step": 42907,
      "training_step_time": 0.4093515872955322
    },
    {
      "epoch": 0.0002618896484375,
      "model_forward_time": 0.11421418190002441,
      "step": 42908
    },
    {
      "epoch": 0.0002618896484375,
      "step": 42908,
      "training_step_time": 0.4701225757598877
    },
    {
      "epoch": 0.000261895751953125,
      "model_forward_time": 0.11565041542053223,
      "step": 42909
    },
    {
      "epoch": 0.000261895751953125,
      "step": 42909,
      "training_step_time": 0.8474442958831787
    },
    {
      "epoch": 0.00026190185546875,
      "grad_norm": 0.11146623641252518,
      "learning_rate": 2.0588447097309645e-05,
      "loss": 0.033,
      "step": 42910
    },
    {
      "epoch": 0.00026190185546875,
      "model_forward_time": 0.11479806900024414,
      "step": 42910
    },
    {
      "epoch": 0.00026190185546875,
      "step": 42910,
      "training_step_time": 0.3640580177307129
    },
    {
      "epoch": 0.000261907958984375,
      "model_forward_time": 0.11402535438537598,
      "step": 42911
    },
    {
      "epoch": 0.000261907958984375,
      "step": 42911,
      "training_step_time": 0.4322941303253174
    },
    {
      "epoch": 0.0002619140625,
      "model_forward_time": 0.1145792007446289,
      "step": 42912
    },
    {
      "epoch": 0.0002619140625,
      "step": 42912,
      "training_step_time": 0.4157843589782715
    },
    {
      "epoch": 0.000261920166015625,
      "model_forward_time": 0.11448550224304199,
      "step": 42913
    },
    {
      "epoch": 0.000261920166015625,
      "step": 42913,
      "training_step_time": 0.41978883743286133
    },
    {
      "epoch": 0.00026192626953125,
      "model_forward_time": 0.11475062370300293,
      "step": 42914
    },
    {
      "epoch": 0.00026192626953125,
      "step": 42914,
      "training_step_time": 0.41959524154663086
    },
    {
      "epoch": 0.000261932373046875,
      "model_forward_time": 0.11454105377197266,
      "step": 42915
    },
    {
      "epoch": 0.000261932373046875,
      "step": 42915,
      "training_step_time": 0.5075571537017822
    },
    {
      "epoch": 0.0002619384765625,
      "model_forward_time": 0.11524367332458496,
      "step": 42916
    },
    {
      "epoch": 0.0002619384765625,
      "step": 42916,
      "training_step_time": 0.43471503257751465
    },
    {
      "epoch": 0.000261944580078125,
      "model_forward_time": 0.11536860466003418,
      "step": 42917
    },
    {
      "epoch": 0.000261944580078125,
      "step": 42917,
      "training_step_time": 0.4014260768890381
    },
    {
      "epoch": 0.00026195068359375,
      "model_forward_time": 0.11472082138061523,
      "step": 42918
    },
    {
      "epoch": 0.00026195068359375,
      "step": 42918,
      "training_step_time": 0.3982126712799072
    },
    {
      "epoch": 0.000261956787109375,
      "model_forward_time": 0.11530709266662598,
      "step": 42919
    },
    {
      "epoch": 0.000261956787109375,
      "step": 42919,
      "training_step_time": 0.39461231231689453
    },
    {
      "epoch": 0.000261962890625,
      "grad_norm": 0.13154010474681854,
      "learning_rate": 2.056616574369612e-05,
      "loss": 0.0403,
      "step": 42920
    },
    {
      "epoch": 0.000261962890625,
      "model_forward_time": 0.11545777320861816,
      "step": 42920
    },
    {
      "epoch": 0.000261962890625,
      "step": 42920,
      "training_step_time": 0.39652085304260254
    },
    {
      "epoch": 0.000261968994140625,
      "model_forward_time": 0.11603927612304688,
      "step": 42921
    },
    {
      "epoch": 0.000261968994140625,
      "step": 42921,
      "training_step_time": 0.8780672550201416
    },
    {
      "epoch": 0.00026197509765625,
      "model_forward_time": 0.11444687843322754,
      "step": 42922
    },
    {
      "epoch": 0.00026197509765625,
      "step": 42922,
      "training_step_time": 0.39157724380493164
    },
    {
      "epoch": 0.000261981201171875,
      "model_forward_time": 0.11504411697387695,
      "step": 42923
    },
    {
      "epoch": 0.000261981201171875,
      "step": 42923,
      "training_step_time": 0.39123058319091797
    },
    {
      "epoch": 0.0002619873046875,
      "model_forward_time": 0.11456298828125,
      "step": 42924
    },
    {
      "epoch": 0.0002619873046875,
      "step": 42924,
      "training_step_time": 0.4166851043701172
    },
    {
      "epoch": 0.000261993408203125,
      "model_forward_time": 0.11435556411743164,
      "step": 42925
    },
    {
      "epoch": 0.000261993408203125,
      "step": 42925,
      "training_step_time": 0.4061160087585449
    },
    {
      "epoch": 0.00026199951171875,
      "model_forward_time": 0.11440134048461914,
      "step": 42926
    },
    {
      "epoch": 0.00026199951171875,
      "step": 42926,
      "training_step_time": 0.3833591938018799
    },
    {
      "epoch": 0.000262005615234375,
      "model_forward_time": 0.11541509628295898,
      "step": 42927
    },
    {
      "epoch": 0.000262005615234375,
      "step": 42927,
      "training_step_time": 0.6879453659057617
    },
    {
      "epoch": 0.00026201171875,
      "model_forward_time": 0.11448240280151367,
      "step": 42928
    },
    {
      "epoch": 0.00026201171875,
      "step": 42928,
      "training_step_time": 0.45389580726623535
    },
    {
      "epoch": 0.000262017822265625,
      "model_forward_time": 0.11461234092712402,
      "step": 42929
    },
    {
      "epoch": 0.000262017822265625,
      "step": 42929,
      "training_step_time": 0.3971717357635498
    },
    {
      "epoch": 0.00026202392578125,
      "grad_norm": 0.11210591346025467,
      "learning_rate": 2.0543893331304333e-05,
      "loss": 0.0316,
      "step": 42930
    },
    {
      "epoch": 0.00026202392578125,
      "model_forward_time": 0.11458110809326172,
      "step": 42930
    },
    {
      "epoch": 0.00026202392578125,
      "step": 42930,
      "training_step_time": 0.3880021572113037
    },
    {
      "epoch": 0.000262030029296875,
      "model_forward_time": 0.11418724060058594,
      "step": 42931
    },
    {
      "epoch": 0.000262030029296875,
      "step": 42931,
      "training_step_time": 0.38625478744506836
    },
    {
      "epoch": 0.0002620361328125,
      "model_forward_time": 0.1149287223815918,
      "step": 42932
    },
    {
      "epoch": 0.0002620361328125,
      "step": 42932,
      "training_step_time": 0.42318010330200195
    },
    {
      "epoch": 0.000262042236328125,
      "model_forward_time": 0.11501932144165039,
      "step": 42933
    },
    {
      "epoch": 0.000262042236328125,
      "step": 42933,
      "training_step_time": 0.4617195129394531
    },
    {
      "epoch": 0.00026204833984375,
      "model_forward_time": 0.11581659317016602,
      "step": 42934
    },
    {
      "epoch": 0.00026204833984375,
      "step": 42934,
      "training_step_time": 0.39766383171081543
    },
    {
      "epoch": 0.000262054443359375,
      "model_forward_time": 0.11477231979370117,
      "step": 42935
    },
    {
      "epoch": 0.000262054443359375,
      "step": 42935,
      "training_step_time": 0.40073084831237793
    },
    {
      "epoch": 0.000262060546875,
      "model_forward_time": 0.11597609519958496,
      "step": 42936
    },
    {
      "epoch": 0.000262060546875,
      "step": 42936,
      "training_step_time": 0.397139310836792
    },
    {
      "epoch": 0.000262066650390625,
      "model_forward_time": 0.11500382423400879,
      "step": 42937
    },
    {
      "epoch": 0.000262066650390625,
      "step": 42937,
      "training_step_time": 0.39699506759643555
    },
    {
      "epoch": 0.00026207275390625,
      "model_forward_time": 0.1158151626586914,
      "step": 42938
    },
    {
      "epoch": 0.00026207275390625,
      "step": 42938,
      "training_step_time": 0.37369632720947266
    },
    {
      "epoch": 0.000262078857421875,
      "model_forward_time": 0.11557936668395996,
      "step": 42939
    },
    {
      "epoch": 0.000262078857421875,
      "step": 42939,
      "training_step_time": 0.4497098922729492
    },
    {
      "epoch": 0.0002620849609375,
      "grad_norm": 0.14717265963554382,
      "learning_rate": 2.0521629866899966e-05,
      "loss": 0.0426,
      "step": 42940
    },
    {
      "epoch": 0.0002620849609375,
      "model_forward_time": 0.11481523513793945,
      "step": 42940
    },
    {
      "epoch": 0.0002620849609375,
      "step": 42940,
      "training_step_time": 0.406296968460083
    },
    {
      "epoch": 0.000262091064453125,
      "model_forward_time": 0.11548852920532227,
      "step": 42941
    },
    {
      "epoch": 0.000262091064453125,
      "step": 42941,
      "training_step_time": 0.487323522567749
    },
    {
      "epoch": 0.00026209716796875,
      "model_forward_time": 0.1154015064239502,
      "step": 42942
    },
    {
      "epoch": 0.00026209716796875,
      "step": 42942,
      "training_step_time": 0.46848177909851074
    },
    {
      "epoch": 0.000262103271484375,
      "model_forward_time": 0.11487674713134766,
      "step": 42943
    },
    {
      "epoch": 0.000262103271484375,
      "step": 42943,
      "training_step_time": 0.4955480098724365
    },
    {
      "epoch": 0.000262109375,
      "model_forward_time": 0.11474609375,
      "step": 42944
    },
    {
      "epoch": 0.000262109375,
      "step": 42944,
      "training_step_time": 0.4352564811706543
    },
    {
      "epoch": 0.000262115478515625,
      "model_forward_time": 0.11469578742980957,
      "step": 42945
    },
    {
      "epoch": 0.000262115478515625,
      "step": 42945,
      "training_step_time": 0.45346665382385254
    },
    {
      "epoch": 0.00026212158203125,
      "model_forward_time": 0.11493968963623047,
      "step": 42946
    },
    {
      "epoch": 0.00026212158203125,
      "step": 42946,
      "training_step_time": 0.39601755142211914
    },
    {
      "epoch": 0.000262127685546875,
      "model_forward_time": 0.11492371559143066,
      "step": 42947
    },
    {
      "epoch": 0.000262127685546875,
      "step": 42947,
      "training_step_time": 0.3901333808898926
    },
    {
      "epoch": 0.0002621337890625,
      "model_forward_time": 0.1150672435760498,
      "step": 42948
    },
    {
      "epoch": 0.0002621337890625,
      "step": 42948,
      "training_step_time": 0.3980844020843506
    },
    {
      "epoch": 0.000262139892578125,
      "model_forward_time": 0.11551976203918457,
      "step": 42949
    },
    {
      "epoch": 0.000262139892578125,
      "step": 42949,
      "training_step_time": 0.39203667640686035
    },
    {
      "epoch": 0.00026214599609375,
      "grad_norm": 0.07427849620580673,
      "learning_rate": 2.0499375357246148e-05,
      "loss": 0.0337,
      "step": 42950
    },
    {
      "epoch": 0.00026214599609375,
      "model_forward_time": 0.11578083038330078,
      "step": 42950
    },
    {
      "epoch": 0.00026214599609375,
      "step": 42950,
      "training_step_time": 0.3897225856781006
    },
    {
      "epoch": 0.000262152099609375,
      "model_forward_time": 0.11491823196411133,
      "step": 42951
    },
    {
      "epoch": 0.000262152099609375,
      "step": 42951,
      "training_step_time": 0.9230005741119385
    },
    {
      "epoch": 0.000262158203125,
      "model_forward_time": 0.11464166641235352,
      "step": 42952
    },
    {
      "epoch": 0.000262158203125,
      "step": 42952,
      "training_step_time": 0.45021820068359375
    },
    {
      "epoch": 0.000262164306640625,
      "model_forward_time": 0.11435127258300781,
      "step": 42953
    },
    {
      "epoch": 0.000262164306640625,
      "step": 42953,
      "training_step_time": 0.4701061248779297
    },
    {
      "epoch": 0.00026217041015625,
      "model_forward_time": 0.11469602584838867,
      "step": 42954
    },
    {
      "epoch": 0.00026217041015625,
      "step": 42954,
      "training_step_time": 0.38458251953125
    },
    {
      "epoch": 0.000262176513671875,
      "model_forward_time": 0.11419916152954102,
      "step": 42955
    },
    {
      "epoch": 0.000262176513671875,
      "step": 42955,
      "training_step_time": 0.4842641353607178
    },
    {
      "epoch": 0.0002621826171875,
      "model_forward_time": 0.11445450782775879,
      "step": 42956
    },
    {
      "epoch": 0.0002621826171875,
      "step": 42956,
      "training_step_time": 0.46251440048217773
    },
    {
      "epoch": 0.000262188720703125,
      "model_forward_time": 0.11493682861328125,
      "step": 42957
    },
    {
      "epoch": 0.000262188720703125,
      "step": 42957,
      "training_step_time": 0.4157724380493164
    },
    {
      "epoch": 0.00026219482421875,
      "model_forward_time": 0.11536097526550293,
      "step": 42958
    },
    {
      "epoch": 0.00026219482421875,
      "step": 42958,
      "training_step_time": 0.43401384353637695
    },
    {
      "epoch": 0.000262200927734375,
      "model_forward_time": 0.11461472511291504,
      "step": 42959
    },
    {
      "epoch": 0.000262200927734375,
      "step": 42959,
      "training_step_time": 0.38393068313598633
    },
    {
      "epoch": 0.00026220703125,
      "grad_norm": 0.1047809049487114,
      "learning_rate": 2.0477129809103147e-05,
      "loss": 0.0361,
      "step": 42960
    },
    {
      "epoch": 0.00026220703125,
      "model_forward_time": 0.11441516876220703,
      "step": 42960
    },
    {
      "epoch": 0.00026220703125,
      "step": 42960,
      "training_step_time": 0.39992690086364746
    },
    {
      "epoch": 0.000262213134765625,
      "model_forward_time": 0.11543679237365723,
      "step": 42961
    },
    {
      "epoch": 0.000262213134765625,
      "step": 42961,
      "training_step_time": 0.3882317543029785
    },
    {
      "epoch": 0.00026221923828125,
      "model_forward_time": 0.1152503490447998,
      "step": 42962
    },
    {
      "epoch": 0.00026221923828125,
      "step": 42962,
      "training_step_time": 0.38414835929870605
    },
    {
      "epoch": 0.000262225341796875,
      "model_forward_time": 0.11498427391052246,
      "step": 42963
    },
    {
      "epoch": 0.000262225341796875,
      "step": 42963,
      "training_step_time": 0.8233804702758789
    },
    {
      "epoch": 0.0002622314453125,
      "model_forward_time": 0.11570215225219727,
      "step": 42964
    },
    {
      "epoch": 0.0002622314453125,
      "step": 42964,
      "training_step_time": 0.36553192138671875
    },
    {
      "epoch": 0.000262237548828125,
      "model_forward_time": 0.11508607864379883,
      "step": 42965
    },
    {
      "epoch": 0.000262237548828125,
      "step": 42965,
      "training_step_time": 0.4225282669067383
    },
    {
      "epoch": 0.00026224365234375,
      "model_forward_time": 0.11527323722839355,
      "step": 42966
    },
    {
      "epoch": 0.00026224365234375,
      "step": 42966,
      "training_step_time": 0.4836127758026123
    },
    {
      "epoch": 0.000262249755859375,
      "model_forward_time": 0.11440730094909668,
      "step": 42967
    },
    {
      "epoch": 0.000262249755859375,
      "step": 42967,
      "training_step_time": 0.4028756618499756
    },
    {
      "epoch": 0.000262255859375,
      "model_forward_time": 0.1208500862121582,
      "step": 42968
    },
    {
      "epoch": 0.000262255859375,
      "step": 42968,
      "training_step_time": 0.4604966640472412
    },
    {
      "epoch": 0.000262261962890625,
      "model_forward_time": 0.11558794975280762,
      "step": 42969
    },
    {
      "epoch": 0.000262261962890625,
      "step": 42969,
      "training_step_time": 0.6225013732910156
    },
    {
      "epoch": 0.00026226806640625,
      "grad_norm": 0.11181841045618057,
      "learning_rate": 2.0454893229228617e-05,
      "loss": 0.0383,
      "step": 42970
    },
    {
      "epoch": 0.00026226806640625,
      "model_forward_time": 0.11495018005371094,
      "step": 42970
    },
    {
      "epoch": 0.00026226806640625,
      "step": 42970,
      "training_step_time": 0.44997286796569824
    },
    {
      "epoch": 0.000262274169921875,
      "model_forward_time": 0.11419343948364258,
      "step": 42971
    },
    {
      "epoch": 0.000262274169921875,
      "step": 42971,
      "training_step_time": 0.38765931129455566
    },
    {
      "epoch": 0.0002622802734375,
      "model_forward_time": 0.11531901359558105,
      "step": 42972
    },
    {
      "epoch": 0.0002622802734375,
      "step": 42972,
      "training_step_time": 0.3973088264465332
    },
    {
      "epoch": 0.000262286376953125,
      "model_forward_time": 0.1149144172668457,
      "step": 42973
    },
    {
      "epoch": 0.000262286376953125,
      "step": 42973,
      "training_step_time": 0.3876523971557617
    },
    {
      "epoch": 0.00026229248046875,
      "model_forward_time": 0.11544060707092285,
      "step": 42974
    },
    {
      "epoch": 0.00026229248046875,
      "step": 42974,
      "training_step_time": 0.3918495178222656
    },
    {
      "epoch": 0.000262298583984375,
      "model_forward_time": 0.11544251441955566,
      "step": 42975
    },
    {
      "epoch": 0.000262298583984375,
      "step": 42975,
      "training_step_time": 0.5653483867645264
    },
    {
      "epoch": 0.0002623046875,
      "model_forward_time": 0.11576461791992188,
      "step": 42976
    },
    {
      "epoch": 0.0002623046875,
      "step": 42976,
      "training_step_time": 0.38738107681274414
    },
    {
      "epoch": 0.000262310791015625,
      "model_forward_time": 0.11523222923278809,
      "step": 42977
    },
    {
      "epoch": 0.000262310791015625,
      "step": 42977,
      "training_step_time": 0.38486623764038086
    },
    {
      "epoch": 0.00026231689453125,
      "model_forward_time": 0.11549520492553711,
      "step": 42978
    },
    {
      "epoch": 0.00026231689453125,
      "step": 42978,
      "training_step_time": 0.39403700828552246
    },
    {
      "epoch": 0.000262322998046875,
      "model_forward_time": 0.11653637886047363,
      "step": 42979
    },
    {
      "epoch": 0.000262322998046875,
      "step": 42979,
      "training_step_time": 0.4331831932067871
    },
    {
      "epoch": 0.0002623291015625,
      "grad_norm": 0.10258528590202332,
      "learning_rate": 2.0432665624377434e-05,
      "loss": 0.0356,
      "step": 42980
    },
    {
      "epoch": 0.0002623291015625,
      "model_forward_time": 0.11435794830322266,
      "step": 42980
    },
    {
      "epoch": 0.0002623291015625,
      "step": 42980,
      "training_step_time": 0.40599846839904785
    },
    {
      "epoch": 0.000262335205078125,
      "model_forward_time": 0.11512041091918945,
      "step": 42981
    },
    {
      "epoch": 0.000262335205078125,
      "step": 42981,
      "training_step_time": 0.50836181640625
    },
    {
      "epoch": 0.00026234130859375,
      "model_forward_time": 0.11548185348510742,
      "step": 42982
    },
    {
      "epoch": 0.00026234130859375,
      "step": 42982,
      "training_step_time": 0.44948649406433105
    },
    {
      "epoch": 0.000262347412109375,
      "model_forward_time": 0.11442065238952637,
      "step": 42983
    },
    {
      "epoch": 0.000262347412109375,
      "step": 42983,
      "training_step_time": 0.4084494113922119
    },
    {
      "epoch": 0.000262353515625,
      "model_forward_time": 0.11503982543945312,
      "step": 42984
    },
    {
      "epoch": 0.000262353515625,
      "step": 42984,
      "training_step_time": 0.44185614585876465
    },
    {
      "epoch": 0.000262359619140625,
      "model_forward_time": 0.11457681655883789,
      "step": 42985
    },
    {
      "epoch": 0.000262359619140625,
      "step": 42985,
      "training_step_time": 0.45197629928588867
    },
    {
      "epoch": 0.00026236572265625,
      "model_forward_time": 0.11543035507202148,
      "step": 42986
    },
    {
      "epoch": 0.00026236572265625,
      "step": 42986,
      "training_step_time": 0.3922770023345947
    },
    {
      "epoch": 0.000262371826171875,
      "model_forward_time": 0.11501789093017578,
      "step": 42987
    },
    {
      "epoch": 0.000262371826171875,
      "step": 42987,
      "training_step_time": 0.9364802837371826
    },
    {
      "epoch": 0.0002623779296875,
      "model_forward_time": 0.11451053619384766,
      "step": 42988
    },
    {
      "epoch": 0.0002623779296875,
      "step": 42988,
      "training_step_time": 0.3793783187866211
    },
    {
      "epoch": 0.000262384033203125,
      "model_forward_time": 0.11443424224853516,
      "step": 42989
    },
    {
      "epoch": 0.000262384033203125,
      "step": 42989,
      "training_step_time": 0.3865318298339844
    },
    {
      "epoch": 0.00026239013671875,
      "grad_norm": 0.10510958731174469,
      "learning_rate": 2.0410447001301753e-05,
      "loss": 0.0348,
      "step": 42990
    },
    {
      "epoch": 0.00026239013671875,
      "model_forward_time": 0.11436891555786133,
      "step": 42990
    },
    {
      "epoch": 0.00026239013671875,
      "step": 42990,
      "training_step_time": 0.3902285099029541
    },
    {
      "epoch": 0.000262396240234375,
      "model_forward_time": 0.11413931846618652,
      "step": 42991
    },
    {
      "epoch": 0.000262396240234375,
      "step": 42991,
      "training_step_time": 0.3835480213165283
    },
    {
      "epoch": 0.00026240234375,
      "model_forward_time": 0.11522579193115234,
      "step": 42992
    },
    {
      "epoch": 0.00026240234375,
      "step": 42992,
      "training_step_time": 0.3859982490539551
    },
    {
      "epoch": 0.000262408447265625,
      "model_forward_time": 0.11484956741333008,
      "step": 42993
    },
    {
      "epoch": 0.000262408447265625,
      "step": 42993,
      "training_step_time": 0.8861594200134277
    },
    {
      "epoch": 0.00026241455078125,
      "model_forward_time": 0.11481451988220215,
      "step": 42994
    },
    {
      "epoch": 0.00026241455078125,
      "step": 42994,
      "training_step_time": 0.4477415084838867
    },
    {
      "epoch": 0.000262420654296875,
      "model_forward_time": 0.11430025100708008,
      "step": 42995
    },
    {
      "epoch": 0.000262420654296875,
      "step": 42995,
      "training_step_time": 0.420609712600708
    },
    {
      "epoch": 0.0002624267578125,
      "model_forward_time": 0.11538553237915039,
      "step": 42996
    },
    {
      "epoch": 0.0002624267578125,
      "step": 42996,
      "training_step_time": 0.4704866409301758
    },
    {
      "epoch": 0.000262432861328125,
      "model_forward_time": 0.11402750015258789,
      "step": 42997
    },
    {
      "epoch": 0.000262432861328125,
      "step": 42997,
      "training_step_time": 0.39663219451904297
    },
    {
      "epoch": 0.00026243896484375,
      "model_forward_time": 0.11441779136657715,
      "step": 42998
    },
    {
      "epoch": 0.00026243896484375,
      "step": 42998,
      "training_step_time": 0.38562750816345215
    },
    {
      "epoch": 0.000262445068359375,
      "model_forward_time": 0.11586928367614746,
      "step": 42999
    },
    {
      "epoch": 0.000262445068359375,
      "step": 42999,
      "training_step_time": 0.4122929573059082
    },
    {
      "epoch": 0.000262451171875,
      "grad_norm": 0.11680608242750168,
      "learning_rate": 2.0388237366751006e-05,
      "loss": 0.033,
      "step": 43000
    },
    {
      "epoch": 0.000262451171875,
      "model_forward_time": 0.113037109375,
      "step": 43000
    },
    {
      "epoch": 0.000262451171875,
      "step": 43000,
      "training_step_time": 0.354658842086792
    },
    {
      "epoch": 0.000262457275390625,
      "model_forward_time": 0.11240363121032715,
      "step": 43001
    },
    {
      "epoch": 0.000262457275390625,
      "step": 43001,
      "training_step_time": 0.36273932456970215
    },
    {
      "epoch": 0.00026246337890625,
      "model_forward_time": 0.11333632469177246,
      "step": 43002
    },
    {
      "epoch": 0.00026246337890625,
      "step": 43002,
      "training_step_time": 0.38383960723876953
    },
    {
      "epoch": 0.000262469482421875,
      "model_forward_time": 0.11321163177490234,
      "step": 43003
    },
    {
      "epoch": 0.000262469482421875,
      "step": 43003,
      "training_step_time": 0.3850879669189453
    },
    {
      "epoch": 0.0002624755859375,
      "model_forward_time": 0.11459708213806152,
      "step": 43004
    },
    {
      "epoch": 0.0002624755859375,
      "step": 43004,
      "training_step_time": 0.3853428363800049
    },
    {
      "epoch": 0.000262481689453125,
      "model_forward_time": 0.11444783210754395,
      "step": 43005
    },
    {
      "epoch": 0.000262481689453125,
      "step": 43005,
      "training_step_time": 0.37576723098754883
    },
    {
      "epoch": 0.00026248779296875,
      "model_forward_time": 0.11488127708435059,
      "step": 43006
    },
    {
      "epoch": 0.00026248779296875,
      "step": 43006,
      "training_step_time": 0.3820788860321045
    },
    {
      "epoch": 0.000262493896484375,
      "model_forward_time": 0.11502957344055176,
      "step": 43007
    },
    {
      "epoch": 0.000262493896484375,
      "step": 43007,
      "training_step_time": 0.3786811828613281
    },
    {
      "epoch": 0.0002625,
      "model_forward_time": 0.1153104305267334,
      "step": 43008
    },
    {
      "epoch": 0.0002625,
      "step": 43008,
      "training_step_time": 0.4199564456939697
    },
    {
      "epoch": 0.000262506103515625,
      "model_forward_time": 0.11582708358764648,
      "step": 43009
    },
    {
      "epoch": 0.000262506103515625,
      "step": 43009,
      "training_step_time": 0.5044457912445068
    },
    {
      "epoch": 0.00026251220703125,
      "grad_norm": 0.10054361820220947,
      "learning_rate": 2.03660367274719e-05,
      "loss": 0.0366,
      "step": 43010
    },
    {
      "epoch": 0.00026251220703125,
      "model_forward_time": 0.11510229110717773,
      "step": 43010
    },
    {
      "epoch": 0.00026251220703125,
      "step": 43010,
      "training_step_time": 0.4216766357421875
    },
    {
      "epoch": 0.000262518310546875,
      "model_forward_time": 0.11611366271972656,
      "step": 43011
    },
    {
      "epoch": 0.000262518310546875,
      "step": 43011,
      "training_step_time": 0.5030620098114014
    },
    {
      "epoch": 0.0002625244140625,
      "model_forward_time": 0.1151890754699707,
      "step": 43012
    },
    {
      "epoch": 0.0002625244140625,
      "step": 43012,
      "training_step_time": 0.49669599533081055
    },
    {
      "epoch": 0.000262530517578125,
      "model_forward_time": 0.11421990394592285,
      "step": 43013
    },
    {
      "epoch": 0.000262530517578125,
      "step": 43013,
      "training_step_time": 0.445850133895874
    },
    {
      "epoch": 0.00026253662109375,
      "model_forward_time": 0.11527729034423828,
      "step": 43014
    },
    {
      "epoch": 0.00026253662109375,
      "step": 43014,
      "training_step_time": 0.3871629238128662
    },
    {
      "epoch": 0.000262542724609375,
      "model_forward_time": 0.11437010765075684,
      "step": 43015
    },
    {
      "epoch": 0.000262542724609375,
      "step": 43015,
      "training_step_time": 0.3941500186920166
    },
    {
      "epoch": 0.000262548828125,
      "model_forward_time": 0.11480307579040527,
      "step": 43016
    },
    {
      "epoch": 0.000262548828125,
      "step": 43016,
      "training_step_time": 0.3875880241394043
    },
    {
      "epoch": 0.000262554931640625,
      "model_forward_time": 0.11521410942077637,
      "step": 43017
    },
    {
      "epoch": 0.000262554931640625,
      "step": 43017,
      "training_step_time": 0.3925611972808838
    },
    {
      "epoch": 0.00026256103515625,
      "model_forward_time": 0.11566400527954102,
      "step": 43018
    },
    {
      "epoch": 0.00026256103515625,
      "step": 43018,
      "training_step_time": 0.3892033100128174
    },
    {
      "epoch": 0.000262567138671875,
      "model_forward_time": 0.11532974243164062,
      "step": 43019
    },
    {
      "epoch": 0.000262567138671875,
      "step": 43019,
      "training_step_time": 0.3897726535797119
    },
    {
      "epoch": 0.0002625732421875,
      "grad_norm": 0.09990431368350983,
      "learning_rate": 2.0343845090208368e-05,
      "loss": 0.0363,
      "step": 43020
    },
    {
      "epoch": 0.0002625732421875,
      "model_forward_time": 0.11498618125915527,
      "step": 43020
    },
    {
      "epoch": 0.0002625732421875,
      "step": 43020,
      "training_step_time": 0.39828062057495117
    },
    {
      "epoch": 0.000262579345703125,
      "model_forward_time": 0.11521172523498535,
      "step": 43021
    },
    {
      "epoch": 0.000262579345703125,
      "step": 43021,
      "training_step_time": 0.3971133232116699
    },
    {
      "epoch": 0.00026258544921875,
      "model_forward_time": 0.11591124534606934,
      "step": 43022
    },
    {
      "epoch": 0.00026258544921875,
      "step": 43022,
      "training_step_time": 0.4475057125091553
    },
    {
      "epoch": 0.000262591552734375,
      "model_forward_time": 0.11541509628295898,
      "step": 43023
    },
    {
      "epoch": 0.000262591552734375,
      "step": 43023,
      "training_step_time": 0.46645379066467285
    },
    {
      "epoch": 0.00026259765625,
      "model_forward_time": 0.1153407096862793,
      "step": 43024
    },
    {
      "epoch": 0.00026259765625,
      "step": 43024,
      "training_step_time": 0.5112366676330566
    },
    {
      "epoch": 0.000262603759765625,
      "model_forward_time": 0.11518239974975586,
      "step": 43025
    },
    {
      "epoch": 0.000262603759765625,
      "step": 43025,
      "training_step_time": 0.39105749130249023
    },
    {
      "epoch": 0.00026260986328125,
      "model_forward_time": 0.1152644157409668,
      "step": 43026
    },
    {
      "epoch": 0.00026260986328125,
      "step": 43026,
      "training_step_time": 0.5251498222351074
    },
    {
      "epoch": 0.000262615966796875,
      "model_forward_time": 0.11525273323059082,
      "step": 43027
    },
    {
      "epoch": 0.000262615966796875,
      "step": 43027,
      "training_step_time": 0.4867265224456787
    },
    {
      "epoch": 0.0002626220703125,
      "model_forward_time": 0.11442399024963379,
      "step": 43028
    },
    {
      "epoch": 0.0002626220703125,
      "step": 43028,
      "training_step_time": 0.3969583511352539
    },
    {
      "epoch": 0.000262628173828125,
      "model_forward_time": 0.11493062973022461,
      "step": 43029
    },
    {
      "epoch": 0.000262628173828125,
      "step": 43029,
      "training_step_time": 0.382155179977417
    },
    {
      "epoch": 0.00026263427734375,
      "grad_norm": 0.10233855247497559,
      "learning_rate": 2.0321662461701696e-05,
      "loss": 0.036,
      "step": 43030
    },
    {
      "epoch": 0.00026263427734375,
      "model_forward_time": 0.1156003475189209,
      "step": 43030
    },
    {
      "epoch": 0.00026263427734375,
      "step": 43030,
      "training_step_time": 0.38405585289001465
    },
    {
      "epoch": 0.000262640380859375,
      "model_forward_time": 0.11512160301208496,
      "step": 43031
    },
    {
      "epoch": 0.000262640380859375,
      "step": 43031,
      "training_step_time": 0.39362406730651855
    },
    {
      "epoch": 0.000262646484375,
      "model_forward_time": 0.11504507064819336,
      "step": 43032
    },
    {
      "epoch": 0.000262646484375,
      "step": 43032,
      "training_step_time": 0.4073808193206787
    },
    {
      "epoch": 0.000262652587890625,
      "model_forward_time": 0.11533689498901367,
      "step": 43033
    },
    {
      "epoch": 0.000262652587890625,
      "step": 43033,
      "training_step_time": 0.3961021900177002
    },
    {
      "epoch": 0.00026265869140625,
      "model_forward_time": 0.11558747291564941,
      "step": 43034
    },
    {
      "epoch": 0.00026265869140625,
      "step": 43034,
      "training_step_time": 0.3905050754547119
    },
    {
      "epoch": 0.000262664794921875,
      "model_forward_time": 0.11513447761535645,
      "step": 43035
    },
    {
      "epoch": 0.000262664794921875,
      "step": 43035,
      "training_step_time": 0.39285898208618164
    },
    {
      "epoch": 0.0002626708984375,
      "model_forward_time": 0.11516642570495605,
      "step": 43036
    },
    {
      "epoch": 0.0002626708984375,
      "step": 43036,
      "training_step_time": 0.39359498023986816
    },
    {
      "epoch": 0.000262677001953125,
      "model_forward_time": 0.11537957191467285,
      "step": 43037
    },
    {
      "epoch": 0.000262677001953125,
      "step": 43037,
      "training_step_time": 0.43843698501586914
    },
    {
      "epoch": 0.00026268310546875,
      "model_forward_time": 0.11586403846740723,
      "step": 43038
    },
    {
      "epoch": 0.00026268310546875,
      "step": 43038,
      "training_step_time": 0.46510839462280273
    },
    {
      "epoch": 0.000262689208984375,
      "model_forward_time": 0.1154794692993164,
      "step": 43039
    },
    {
      "epoch": 0.000262689208984375,
      "step": 43039,
      "training_step_time": 0.4655177593231201
    },
    {
      "epoch": 0.0002626953125,
      "grad_norm": 0.10272540152072906,
      "learning_rate": 2.0299488848690355e-05,
      "loss": 0.0412,
      "step": 43040
    },
    {
      "epoch": 0.0002626953125,
      "model_forward_time": 0.1151571273803711,
      "step": 43040
    },
    {
      "epoch": 0.0002626953125,
      "step": 43040,
      "training_step_time": 0.48783302307128906
    },
    {
      "epoch": 0.000262701416015625,
      "model_forward_time": 0.11522078514099121,
      "step": 43041
    },
    {
      "epoch": 0.000262701416015625,
      "step": 43041,
      "training_step_time": 0.42247605323791504
    },
    {
      "epoch": 0.00026270751953125,
      "model_forward_time": 0.11486577987670898,
      "step": 43042
    },
    {
      "epoch": 0.00026270751953125,
      "step": 43042,
      "training_step_time": 0.4813268184661865
    },
    {
      "epoch": 0.000262713623046875,
      "model_forward_time": 0.11524415016174316,
      "step": 43043
    },
    {
      "epoch": 0.000262713623046875,
      "step": 43043,
      "training_step_time": 0.4033050537109375
    },
    {
      "epoch": 0.0002627197265625,
      "model_forward_time": 0.11521220207214355,
      "step": 43044
    },
    {
      "epoch": 0.0002627197265625,
      "step": 43044,
      "training_step_time": 0.40000438690185547
    },
    {
      "epoch": 0.000262725830078125,
      "model_forward_time": 0.11499333381652832,
      "step": 43045
    },
    {
      "epoch": 0.000262725830078125,
      "step": 43045,
      "training_step_time": 0.3921990394592285
    },
    {
      "epoch": 0.00026273193359375,
      "model_forward_time": 0.11574292182922363,
      "step": 43046
    },
    {
      "epoch": 0.00026273193359375,
      "step": 43046,
      "training_step_time": 0.39247894287109375
    },
    {
      "epoch": 0.000262738037109375,
      "model_forward_time": 0.11499381065368652,
      "step": 43047
    },
    {
      "epoch": 0.000262738037109375,
      "step": 43047,
      "training_step_time": 0.39006781578063965
    },
    {
      "epoch": 0.000262744140625,
      "model_forward_time": 0.11515426635742188,
      "step": 43048
    },
    {
      "epoch": 0.000262744140625,
      "step": 43048,
      "training_step_time": 0.3846888542175293
    },
    {
      "epoch": 0.000262750244140625,
      "model_forward_time": 0.11500382423400879,
      "step": 43049
    },
    {
      "epoch": 0.000262750244140625,
      "step": 43049,
      "training_step_time": 0.3920145034790039
    },
    {
      "epoch": 0.00026275634765625,
      "grad_norm": 0.10573896765708923,
      "learning_rate": 2.0277324257910106e-05,
      "loss": 0.0328,
      "step": 43050
    },
    {
      "epoch": 0.00026275634765625,
      "model_forward_time": 0.11502194404602051,
      "step": 43050
    },
    {
      "epoch": 0.00026275634765625,
      "step": 43050,
      "training_step_time": 0.4013247489929199
    },
    {
      "epoch": 0.000262762451171875,
      "model_forward_time": 0.11579370498657227,
      "step": 43051
    },
    {
      "epoch": 0.000262762451171875,
      "step": 43051,
      "training_step_time": 0.3976912498474121
    },
    {
      "epoch": 0.0002627685546875,
      "model_forward_time": 0.11588430404663086,
      "step": 43052
    },
    {
      "epoch": 0.0002627685546875,
      "step": 43052,
      "training_step_time": 0.44887733459472656
    },
    {
      "epoch": 0.000262774658203125,
      "model_forward_time": 0.11539292335510254,
      "step": 43053
    },
    {
      "epoch": 0.000262774658203125,
      "step": 43053,
      "training_step_time": 0.46514225006103516
    },
    {
      "epoch": 0.00026278076171875,
      "model_forward_time": 0.11529946327209473,
      "step": 43054
    },
    {
      "epoch": 0.00026278076171875,
      "step": 43054,
      "training_step_time": 0.4136652946472168
    },
    {
      "epoch": 0.000262786865234375,
      "model_forward_time": 0.1163022518157959,
      "step": 43055
    },
    {
      "epoch": 0.000262786865234375,
      "step": 43055,
      "training_step_time": 0.48610854148864746
    },
    {
      "epoch": 0.00026279296875,
      "model_forward_time": 0.11597371101379395,
      "step": 43056
    },
    {
      "epoch": 0.00026279296875,
      "step": 43056,
      "training_step_time": 0.4081120491027832
    },
    {
      "epoch": 0.000262799072265625,
      "model_forward_time": 0.11510586738586426,
      "step": 43057
    },
    {
      "epoch": 0.000262799072265625,
      "step": 43057,
      "training_step_time": 0.4322359561920166
    },
    {
      "epoch": 0.00026280517578125,
      "model_forward_time": 0.11527490615844727,
      "step": 43058
    },
    {
      "epoch": 0.00026280517578125,
      "step": 43058,
      "training_step_time": 0.3930633068084717
    },
    {
      "epoch": 0.000262811279296875,
      "model_forward_time": 0.11495542526245117,
      "step": 43059
    },
    {
      "epoch": 0.000262811279296875,
      "step": 43059,
      "training_step_time": 0.38491272926330566
    },
    {
      "epoch": 0.0002628173828125,
      "grad_norm": 0.10950358211994171,
      "learning_rate": 2.0255168696093968e-05,
      "loss": 0.0366,
      "step": 43060
    },
    {
      "epoch": 0.0002628173828125,
      "model_forward_time": 0.11575126647949219,
      "step": 43060
    },
    {
      "epoch": 0.0002628173828125,
      "step": 43060,
      "training_step_time": 0.3841104507446289
    },
    {
      "epoch": 0.000262823486328125,
      "model_forward_time": 0.11532044410705566,
      "step": 43061
    },
    {
      "epoch": 0.000262823486328125,
      "step": 43061,
      "training_step_time": 0.4037203788757324
    },
    {
      "epoch": 0.00026282958984375,
      "model_forward_time": 0.11555314064025879,
      "step": 43062
    },
    {
      "epoch": 0.00026282958984375,
      "step": 43062,
      "training_step_time": 0.40447378158569336
    },
    {
      "epoch": 0.000262835693359375,
      "model_forward_time": 0.11501860618591309,
      "step": 43063
    },
    {
      "epoch": 0.000262835693359375,
      "step": 43063,
      "training_step_time": 0.39350080490112305
    },
    {
      "epoch": 0.000262841796875,
      "model_forward_time": 0.11525368690490723,
      "step": 43064
    },
    {
      "epoch": 0.000262841796875,
      "step": 43064,
      "training_step_time": 0.396960973739624
    },
    {
      "epoch": 0.000262847900390625,
      "model_forward_time": 0.11707258224487305,
      "step": 43065
    },
    {
      "epoch": 0.000262847900390625,
      "step": 43065,
      "training_step_time": 0.4417576789855957
    },
    {
      "epoch": 0.00026285400390625,
      "model_forward_time": 0.11585855484008789,
      "step": 43066
    },
    {
      "epoch": 0.00026285400390625,
      "step": 43066,
      "training_step_time": 0.4294593334197998
    },
    {
      "epoch": 0.000262860107421875,
      "model_forward_time": 0.11562395095825195,
      "step": 43067
    },
    {
      "epoch": 0.000262860107421875,
      "step": 43067,
      "training_step_time": 0.45751404762268066
    },
    {
      "epoch": 0.0002628662109375,
      "model_forward_time": 0.11546540260314941,
      "step": 43068
    },
    {
      "epoch": 0.0002628662109375,
      "step": 43068,
      "training_step_time": 0.5022318363189697
    },
    {
      "epoch": 0.000262872314453125,
      "model_forward_time": 0.11516928672790527,
      "step": 43069
    },
    {
      "epoch": 0.000262872314453125,
      "step": 43069,
      "training_step_time": 0.40709590911865234
    },
    {
      "epoch": 0.00026287841796875,
      "grad_norm": 0.1176912933588028,
      "learning_rate": 2.0233022169972192e-05,
      "loss": 0.0349,
      "step": 43070
    },
    {
      "epoch": 0.00026287841796875,
      "model_forward_time": 0.1153860092163086,
      "step": 43070
    },
    {
      "epoch": 0.00026287841796875,
      "step": 43070,
      "training_step_time": 0.4039757251739502
    },
    {
      "epoch": 0.000262884521484375,
      "model_forward_time": 0.11491990089416504,
      "step": 43071
    },
    {
      "epoch": 0.000262884521484375,
      "step": 43071,
      "training_step_time": 0.48174405097961426
    },
    {
      "epoch": 0.000262890625,
      "model_forward_time": 0.11491131782531738,
      "step": 43072
    },
    {
      "epoch": 0.000262890625,
      "step": 43072,
      "training_step_time": 0.41927313804626465
    },
    {
      "epoch": 0.000262896728515625,
      "model_forward_time": 0.11473894119262695,
      "step": 43073
    },
    {
      "epoch": 0.000262896728515625,
      "step": 43073,
      "training_step_time": 0.38797521591186523
    },
    {
      "epoch": 0.00026290283203125,
      "model_forward_time": 0.11566376686096191,
      "step": 43074
    },
    {
      "epoch": 0.00026290283203125,
      "step": 43074,
      "training_step_time": 0.38759922981262207
    },
    {
      "epoch": 0.000262908935546875,
      "model_forward_time": 0.11499357223510742,
      "step": 43075
    },
    {
      "epoch": 0.000262908935546875,
      "step": 43075,
      "training_step_time": 0.3875741958618164
    },
    {
      "epoch": 0.0002629150390625,
      "model_forward_time": 0.11496400833129883,
      "step": 43076
    },
    {
      "epoch": 0.0002629150390625,
      "step": 43076,
      "training_step_time": 0.3980062007904053
    },
    {
      "epoch": 0.000262921142578125,
      "model_forward_time": 0.11597585678100586,
      "step": 43077
    },
    {
      "epoch": 0.000262921142578125,
      "step": 43077,
      "training_step_time": 0.3916642665863037
    },
    {
      "epoch": 0.00026292724609375,
      "model_forward_time": 0.11590409278869629,
      "step": 43078
    },
    {
      "epoch": 0.00026292724609375,
      "step": 43078,
      "training_step_time": 0.403822660446167
    },
    {
      "epoch": 0.000262933349609375,
      "model_forward_time": 0.11677289009094238,
      "step": 43079
    },
    {
      "epoch": 0.000262933349609375,
      "step": 43079,
      "training_step_time": 0.41670870780944824
    },
    {
      "epoch": 0.000262939453125,
      "grad_norm": 0.09489846974611282,
      "learning_rate": 2.0210884686272368e-05,
      "loss": 0.0327,
      "step": 43080
    },
    {
      "epoch": 0.000262939453125,
      "model_forward_time": 0.11539936065673828,
      "step": 43080
    },
    {
      "epoch": 0.000262939453125,
      "step": 43080,
      "training_step_time": 0.4390525817871094
    },
    {
      "epoch": 0.000262945556640625,
      "model_forward_time": 0.1153111457824707,
      "step": 43081
    },
    {
      "epoch": 0.000262945556640625,
      "step": 43081,
      "training_step_time": 0.36737823486328125
    },
    {
      "epoch": 0.00026295166015625,
      "model_forward_time": 0.11485028266906738,
      "step": 43082
    },
    {
      "epoch": 0.00026295166015625,
      "step": 43082,
      "training_step_time": 0.4524965286254883
    },
    {
      "epoch": 0.000262957763671875,
      "model_forward_time": 0.11532855033874512,
      "step": 43083
    },
    {
      "epoch": 0.000262957763671875,
      "step": 43083,
      "training_step_time": 0.49489426612854004
    },
    {
      "epoch": 0.0002629638671875,
      "model_forward_time": 0.11568045616149902,
      "step": 43084
    },
    {
      "epoch": 0.0002629638671875,
      "step": 43084,
      "training_step_time": 0.47855448722839355
    },
    {
      "epoch": 0.000262969970703125,
      "model_forward_time": 0.11538147926330566,
      "step": 43085
    },
    {
      "epoch": 0.000262969970703125,
      "step": 43085,
      "training_step_time": 0.43580126762390137
    },
    {
      "epoch": 0.00026297607421875,
      "model_forward_time": 0.11536455154418945,
      "step": 43086
    },
    {
      "epoch": 0.00026297607421875,
      "step": 43086,
      "training_step_time": 0.45996737480163574
    },
    {
      "epoch": 0.000262982177734375,
      "model_forward_time": 0.11501145362854004,
      "step": 43087
    },
    {
      "epoch": 0.000262982177734375,
      "step": 43087,
      "training_step_time": 0.3921689987182617
    },
    {
      "epoch": 0.00026298828125,
      "model_forward_time": 0.1150827407836914,
      "step": 43088
    },
    {
      "epoch": 0.00026298828125,
      "step": 43088,
      "training_step_time": 0.3956146240234375
    },
    {
      "epoch": 0.000262994384765625,
      "model_forward_time": 0.11493086814880371,
      "step": 43089
    },
    {
      "epoch": 0.000262994384765625,
      "step": 43089,
      "training_step_time": 0.3877875804901123
    },
    {
      "epoch": 0.00026300048828125,
      "grad_norm": 0.11295361071825027,
      "learning_rate": 2.0188756251719203e-05,
      "loss": 0.0363,
      "step": 43090
    },
    {
      "epoch": 0.00026300048828125,
      "model_forward_time": 0.11579155921936035,
      "step": 43090
    },
    {
      "epoch": 0.00026300048828125,
      "step": 43090,
      "training_step_time": 0.3880274295806885
    },
    {
      "epoch": 0.000263006591796875,
      "model_forward_time": 0.11512494087219238,
      "step": 43091
    },
    {
      "epoch": 0.000263006591796875,
      "step": 43091,
      "training_step_time": 0.3952338695526123
    },
    {
      "epoch": 0.0002630126953125,
      "model_forward_time": 0.11516094207763672,
      "step": 43092
    },
    {
      "epoch": 0.0002630126953125,
      "step": 43092,
      "training_step_time": 0.39798879623413086
    },
    {
      "epoch": 0.000263018798828125,
      "model_forward_time": 0.11533546447753906,
      "step": 43093
    },
    {
      "epoch": 0.000263018798828125,
      "step": 43093,
      "training_step_time": 0.42287731170654297
    },
    {
      "epoch": 0.00026302490234375,
      "model_forward_time": 0.11571455001831055,
      "step": 43094
    },
    {
      "epoch": 0.00026302490234375,
      "step": 43094,
      "training_step_time": 0.4137730598449707
    },
    {
      "epoch": 0.000263031005859375,
      "model_forward_time": 0.1149590015411377,
      "step": 43095
    },
    {
      "epoch": 0.000263031005859375,
      "step": 43095,
      "training_step_time": 0.389941930770874
    },
    {
      "epoch": 0.000263037109375,
      "model_forward_time": 0.1157846450805664,
      "step": 43096
    },
    {
      "epoch": 0.000263037109375,
      "step": 43096,
      "training_step_time": 0.45470380783081055
    },
    {
      "epoch": 0.000263043212890625,
      "model_forward_time": 0.11581826210021973,
      "step": 43097
    },
    {
      "epoch": 0.000263043212890625,
      "step": 43097,
      "training_step_time": 0.5038185119628906
    },
    {
      "epoch": 0.00026304931640625,
      "model_forward_time": 0.11597037315368652,
      "step": 43098
    },
    {
      "epoch": 0.00026304931640625,
      "step": 43098,
      "training_step_time": 0.41085219383239746
    },
    {
      "epoch": 0.000263055419921875,
      "model_forward_time": 0.11528968811035156,
      "step": 43099
    },
    {
      "epoch": 0.000263055419921875,
      "step": 43099,
      "training_step_time": 0.39737582206726074
    },
    {
      "epoch": 0.0002630615234375,
      "grad_norm": 0.1127026230096817,
      "learning_rate": 2.0166636873034805e-05,
      "loss": 0.0336,
      "step": 43100
    },
    {
      "epoch": 0.0002630615234375,
      "model_forward_time": 0.11518263816833496,
      "step": 43100
    },
    {
      "epoch": 0.0002630615234375,
      "step": 43100,
      "training_step_time": 0.39823198318481445
    },
    {
      "epoch": 0.000263067626953125,
      "model_forward_time": 0.11555194854736328,
      "step": 43101
    },
    {
      "epoch": 0.000263067626953125,
      "step": 43101,
      "training_step_time": 1.1625702381134033
    },
    {
      "epoch": 0.00026307373046875,
      "model_forward_time": 0.11400628089904785,
      "step": 43102
    },
    {
      "epoch": 0.00026307373046875,
      "step": 43102,
      "training_step_time": 0.3774073123931885
    },
    {
      "epoch": 0.000263079833984375,
      "model_forward_time": 0.11420226097106934,
      "step": 43103
    },
    {
      "epoch": 0.000263079833984375,
      "step": 43103,
      "training_step_time": 0.38321518898010254
    },
    {
      "epoch": 0.0002630859375,
      "model_forward_time": 0.11425495147705078,
      "step": 43104
    },
    {
      "epoch": 0.0002630859375,
      "step": 43104,
      "training_step_time": 0.3936762809753418
    },
    {
      "epoch": 0.000263092041015625,
      "model_forward_time": 0.11387014389038086,
      "step": 43105
    },
    {
      "epoch": 0.000263092041015625,
      "step": 43105,
      "training_step_time": 0.38303303718566895
    },
    {
      "epoch": 0.00026309814453125,
      "model_forward_time": 0.11467933654785156,
      "step": 43106
    },
    {
      "epoch": 0.00026309814453125,
      "step": 43106,
      "training_step_time": 0.43508243560791016
    },
    {
      "epoch": 0.000263104248046875,
      "model_forward_time": 0.11515116691589355,
      "step": 43107
    },
    {
      "epoch": 0.000263104248046875,
      "step": 43107,
      "training_step_time": 0.8447895050048828
    },
    {
      "epoch": 0.0002631103515625,
      "model_forward_time": 0.1141045093536377,
      "step": 43108
    },
    {
      "epoch": 0.0002631103515625,
      "step": 43108,
      "training_step_time": 0.3616502285003662
    },
    {
      "epoch": 0.000263116455078125,
      "model_forward_time": 0.11470484733581543,
      "step": 43109
    },
    {
      "epoch": 0.000263116455078125,
      "step": 43109,
      "training_step_time": 0.42665696144104004
    },
    {
      "epoch": 0.00026312255859375,
      "grad_norm": 0.08767115324735641,
      "learning_rate": 2.0144526556938387e-05,
      "loss": 0.0348,
      "step": 43110
    },
    {
      "epoch": 0.00026312255859375,
      "model_forward_time": 0.1147451400756836,
      "step": 43110
    },
    {
      "epoch": 0.00026312255859375,
      "step": 43110,
      "training_step_time": 0.4250218868255615
    },
    {
      "epoch": 0.000263128662109375,
      "model_forward_time": 0.11425185203552246,
      "step": 43111
    },
    {
      "epoch": 0.000263128662109375,
      "step": 43111,
      "training_step_time": 0.40294861793518066
    },
    {
      "epoch": 0.000263134765625,
      "model_forward_time": 0.11459493637084961,
      "step": 43112
    },
    {
      "epoch": 0.000263134765625,
      "step": 43112,
      "training_step_time": 0.3863680362701416
    },
    {
      "epoch": 0.000263140869140625,
      "model_forward_time": 0.11480951309204102,
      "step": 43113
    },
    {
      "epoch": 0.000263140869140625,
      "step": 43113,
      "training_step_time": 1.0996196269989014
    },
    {
      "epoch": 0.00026314697265625,
      "model_forward_time": 0.1172792911529541,
      "step": 43114
    },
    {
      "epoch": 0.00026314697265625,
      "step": 43114,
      "training_step_time": 0.6163816452026367
    },
    {
      "epoch": 0.000263153076171875,
      "model_forward_time": 0.1275768280029297,
      "step": 43115
    },
    {
      "epoch": 0.000263153076171875,
      "step": 43115,
      "training_step_time": 0.601708173751831
    },
    {
      "epoch": 0.0002631591796875,
      "model_forward_time": 0.11544656753540039,
      "step": 43116
    },
    {
      "epoch": 0.0002631591796875,
      "step": 43116,
      "training_step_time": 0.6075360774993896
    },
    {
      "epoch": 0.000263165283203125,
      "model_forward_time": 0.11626839637756348,
      "step": 43117
    },
    {
      "epoch": 0.000263165283203125,
      "step": 43117,
      "training_step_time": 0.7036070823669434
    },
    {
      "epoch": 0.00026317138671875,
      "model_forward_time": 0.11836910247802734,
      "step": 43118
    },
    {
      "epoch": 0.00026317138671875,
      "step": 43118,
      "training_step_time": 0.6541755199432373
    },
    {
      "epoch": 0.000263177490234375,
      "model_forward_time": 0.1169729232788086,
      "step": 43119
    },
    {
      "epoch": 0.000263177490234375,
      "step": 43119,
      "training_step_time": 0.7245211601257324
    },
    {
      "epoch": 0.00026318359375,
      "grad_norm": 0.0883786529302597,
      "learning_rate": 2.0122425310146542e-05,
      "loss": 0.0375,
      "step": 43120
    },
    {
      "epoch": 0.00026318359375,
      "model_forward_time": 0.12148785591125488,
      "step": 43120
    },
    {
      "epoch": 0.00026318359375,
      "step": 43120,
      "training_step_time": 0.7266731262207031
    },
    {
      "epoch": 0.000263189697265625,
      "model_forward_time": 0.12253355979919434,
      "step": 43121
    },
    {
      "epoch": 0.000263189697265625,
      "step": 43121,
      "training_step_time": 0.6336674690246582
    },
    {
      "epoch": 0.00026319580078125,
      "model_forward_time": 0.11888670921325684,
      "step": 43122
    },
    {
      "epoch": 0.00026319580078125,
      "step": 43122,
      "training_step_time": 0.6390330791473389
    },
    {
      "epoch": 0.000263201904296875,
      "model_forward_time": 0.11654973030090332,
      "step": 43123
    },
    {
      "epoch": 0.000263201904296875,
      "step": 43123,
      "training_step_time": 0.6861867904663086
    },
    {
      "epoch": 0.0002632080078125,
      "model_forward_time": 0.12893223762512207,
      "step": 43124
    },
    {
      "epoch": 0.0002632080078125,
      "step": 43124,
      "training_step_time": 0.6915140151977539
    },
    {
      "epoch": 0.000263214111328125,
      "model_forward_time": 0.11905574798583984,
      "step": 43125
    },
    {
      "epoch": 0.000263214111328125,
      "step": 43125,
      "training_step_time": 0.6536726951599121
    },
    {
      "epoch": 0.00026322021484375,
      "model_forward_time": 0.11758232116699219,
      "step": 43126
    },
    {
      "epoch": 0.00026322021484375,
      "step": 43126,
      "training_step_time": 0.7112922668457031
    },
    {
      "epoch": 0.000263226318359375,
      "model_forward_time": 0.11683082580566406,
      "step": 43127
    },
    {
      "epoch": 0.000263226318359375,
      "step": 43127,
      "training_step_time": 0.6393566131591797
    },
    {
      "epoch": 0.000263232421875,
      "model_forward_time": 0.12071013450622559,
      "step": 43128
    },
    {
      "epoch": 0.000263232421875,
      "step": 43128,
      "training_step_time": 0.6206111907958984
    },
    {
      "epoch": 0.000263238525390625,
      "model_forward_time": 0.11862945556640625,
      "step": 43129
    },
    {
      "epoch": 0.000263238525390625,
      "step": 43129,
      "training_step_time": 0.6805064678192139
    },
    {
      "epoch": 0.00026324462890625,
      "grad_norm": 0.08961141109466553,
      "learning_rate": 2.0100333139372985e-05,
      "loss": 0.039,
      "step": 43130
    },
    {
      "epoch": 0.00026324462890625,
      "model_forward_time": 0.12913823127746582,
      "step": 43130
    },
    {
      "epoch": 0.00026324462890625,
      "step": 43130,
      "training_step_time": 0.7306880950927734
    },
    {
      "epoch": 0.000263250732421875,
      "model_forward_time": 0.12117719650268555,
      "step": 43131
    },
    {
      "epoch": 0.000263250732421875,
      "step": 43131,
      "training_step_time": 0.7117273807525635
    },
    {
      "epoch": 0.0002632568359375,
      "model_forward_time": 0.12416696548461914,
      "step": 43132
    },
    {
      "epoch": 0.0002632568359375,
      "step": 43132,
      "training_step_time": 0.7006492614746094
    },
    {
      "epoch": 0.000263262939453125,
      "model_forward_time": 0.13156390190124512,
      "step": 43133
    },
    {
      "epoch": 0.000263262939453125,
      "step": 43133,
      "training_step_time": 0.6769516468048096
    },
    {
      "epoch": 0.00026326904296875,
      "model_forward_time": 0.11792182922363281,
      "step": 43134
    },
    {
      "epoch": 0.00026326904296875,
      "step": 43134,
      "training_step_time": 0.7203481197357178
    },
    {
      "epoch": 0.000263275146484375,
      "model_forward_time": 0.1194603443145752,
      "step": 43135
    },
    {
      "epoch": 0.000263275146484375,
      "step": 43135,
      "training_step_time": 0.6500236988067627
    },
    {
      "epoch": 0.00026328125,
      "model_forward_time": 0.12256884574890137,
      "step": 43136
    },
    {
      "epoch": 0.00026328125,
      "step": 43136,
      "training_step_time": 0.7067315578460693
    },
    {
      "epoch": 0.000263287353515625,
      "model_forward_time": 0.12436318397521973,
      "step": 43137
    },
    {
      "epoch": 0.000263287353515625,
      "step": 43137,
      "training_step_time": 0.6691305637359619
    },
    {
      "epoch": 0.00026329345703125,
      "model_forward_time": 0.11936593055725098,
      "step": 43138
    },
    {
      "epoch": 0.00026329345703125,
      "step": 43138,
      "training_step_time": 0.6550703048706055
    },
    {
      "epoch": 0.000263299560546875,
      "model_forward_time": 0.11880898475646973,
      "step": 43139
    },
    {
      "epoch": 0.000263299560546875,
      "step": 43139,
      "training_step_time": 0.6812987327575684
    },
    {
      "epoch": 0.0002633056640625,
      "grad_norm": 0.11486773192882538,
      "learning_rate": 2.0078250051328784e-05,
      "loss": 0.0399,
      "step": 43140
    },
    {
      "epoch": 0.0002633056640625,
      "model_forward_time": 0.1239156723022461,
      "step": 43140
    },
    {
      "epoch": 0.0002633056640625,
      "step": 43140,
      "training_step_time": 0.6666555404663086
    },
    {
      "epoch": 0.000263311767578125,
      "model_forward_time": 0.12760138511657715,
      "step": 43141
    },
    {
      "epoch": 0.000263311767578125,
      "step": 43141,
      "training_step_time": 0.7043139934539795
    },
    {
      "epoch": 0.00026331787109375,
      "model_forward_time": 0.12018322944641113,
      "step": 43142
    },
    {
      "epoch": 0.00026331787109375,
      "step": 43142,
      "training_step_time": 0.644589900970459
    },
    {
      "epoch": 0.000263323974609375,
      "model_forward_time": 0.12060999870300293,
      "step": 43143
    },
    {
      "epoch": 0.000263323974609375,
      "step": 43143,
      "training_step_time": 0.6932363510131836
    },
    {
      "epoch": 0.000263330078125,
      "model_forward_time": 0.11958670616149902,
      "step": 43144
    },
    {
      "epoch": 0.000263330078125,
      "step": 43144,
      "training_step_time": 0.6919562816619873
    },
    {
      "epoch": 0.000263336181640625,
      "model_forward_time": 0.12328338623046875,
      "step": 43145
    },
    {
      "epoch": 0.000263336181640625,
      "step": 43145,
      "training_step_time": 0.6174149513244629
    },
    {
      "epoch": 0.00026334228515625,
      "model_forward_time": 0.1162419319152832,
      "step": 43146
    },
    {
      "epoch": 0.00026334228515625,
      "step": 43146,
      "training_step_time": 0.6219301223754883
    },
    {
      "epoch": 0.000263348388671875,
      "model_forward_time": 0.12051796913146973,
      "step": 43147
    },
    {
      "epoch": 0.000263348388671875,
      "step": 43147,
      "training_step_time": 0.7281341552734375
    },
    {
      "epoch": 0.0002633544921875,
      "model_forward_time": 0.1167593002319336,
      "step": 43148
    },
    {
      "epoch": 0.0002633544921875,
      "step": 43148,
      "training_step_time": 0.737602710723877
    },
    {
      "epoch": 0.000263360595703125,
      "model_forward_time": 0.1207113265991211,
      "step": 43149
    },
    {
      "epoch": 0.000263360595703125,
      "step": 43149,
      "training_step_time": 0.6438605785369873
    },
    {
      "epoch": 0.00026336669921875,
      "grad_norm": 0.09838179498910904,
      "learning_rate": 2.0056176052722174e-05,
      "loss": 0.0433,
      "step": 43150
    },
    {
      "epoch": 0.00026336669921875,
      "model_forward_time": 0.11824512481689453,
      "step": 43150
    },
    {
      "epoch": 0.00026336669921875,
      "step": 43150,
      "training_step_time": 0.6060574054718018
    },
    {
      "epoch": 0.000263372802734375,
      "model_forward_time": 0.13037967681884766,
      "step": 43151
    },
    {
      "epoch": 0.000263372802734375,
      "step": 43151,
      "training_step_time": 0.6411106586456299
    },
    {
      "epoch": 0.00026337890625,
      "model_forward_time": 0.1211698055267334,
      "step": 43152
    },
    {
      "epoch": 0.00026337890625,
      "step": 43152,
      "training_step_time": 0.6836133003234863
    },
    {
      "epoch": 0.000263385009765625,
      "model_forward_time": 0.12079095840454102,
      "step": 43153
    },
    {
      "epoch": 0.000263385009765625,
      "step": 43153,
      "training_step_time": 0.6682391166687012
    },
    {
      "epoch": 0.00026339111328125,
      "model_forward_time": 0.12129855155944824,
      "step": 43154
    },
    {
      "epoch": 0.00026339111328125,
      "step": 43154,
      "training_step_time": 0.7007031440734863
    },
    {
      "epoch": 0.000263397216796875,
      "model_forward_time": 0.12042784690856934,
      "step": 43155
    },
    {
      "epoch": 0.000263397216796875,
      "step": 43155,
      "training_step_time": 0.731494665145874
    },
    {
      "epoch": 0.0002634033203125,
      "model_forward_time": 0.11931562423706055,
      "step": 43156
    },
    {
      "epoch": 0.0002634033203125,
      "step": 43156,
      "training_step_time": 0.7674908638000488
    },
    {
      "epoch": 0.000263409423828125,
      "model_forward_time": 0.11896061897277832,
      "step": 43157
    },
    {
      "epoch": 0.000263409423828125,
      "step": 43157,
      "training_step_time": 0.6566483974456787
    },
    {
      "epoch": 0.00026341552734375,
      "model_forward_time": 0.1244056224822998,
      "step": 43158
    },
    {
      "epoch": 0.00026341552734375,
      "step": 43158,
      "training_step_time": 0.7347936630249023
    },
    {
      "epoch": 0.000263421630859375,
      "model_forward_time": 0.11855840682983398,
      "step": 43159
    },
    {
      "epoch": 0.000263421630859375,
      "step": 43159,
      "training_step_time": 0.6997835636138916
    },
    {
      "epoch": 0.000263427734375,
      "grad_norm": 0.08676928281784058,
      "learning_rate": 2.0034111150258666e-05,
      "loss": 0.04,
      "step": 43160
    },
    {
      "epoch": 0.000263427734375,
      "model_forward_time": 0.12201380729675293,
      "step": 43160
    },
    {
      "epoch": 0.000263427734375,
      "step": 43160,
      "training_step_time": 0.6657004356384277
    },
    {
      "epoch": 0.000263433837890625,
      "model_forward_time": 0.11881208419799805,
      "step": 43161
    },
    {
      "epoch": 0.000263433837890625,
      "step": 43161,
      "training_step_time": 0.6531829833984375
    },
    {
      "epoch": 0.00026343994140625,
      "model_forward_time": 0.11818671226501465,
      "step": 43162
    },
    {
      "epoch": 0.00026343994140625,
      "step": 43162,
      "training_step_time": 0.6125729084014893
    },
    {
      "epoch": 0.000263446044921875,
      "model_forward_time": 0.11930108070373535,
      "step": 43163
    },
    {
      "epoch": 0.000263446044921875,
      "step": 43163,
      "training_step_time": 0.6407227516174316
    },
    {
      "epoch": 0.0002634521484375,
      "model_forward_time": 0.12183451652526855,
      "step": 43164
    },
    {
      "epoch": 0.0002634521484375,
      "step": 43164,
      "training_step_time": 0.6030440330505371
    },
    {
      "epoch": 0.000263458251953125,
      "model_forward_time": 0.12491083145141602,
      "step": 43165
    },
    {
      "epoch": 0.000263458251953125,
      "step": 43165,
      "training_step_time": 0.7583522796630859
    },
    {
      "epoch": 0.00026346435546875,
      "model_forward_time": 0.11714720726013184,
      "step": 43166
    },
    {
      "epoch": 0.00026346435546875,
      "step": 43166,
      "training_step_time": 0.6910881996154785
    },
    {
      "epoch": 0.000263470458984375,
      "model_forward_time": 0.12147736549377441,
      "step": 43167
    },
    {
      "epoch": 0.000263470458984375,
      "step": 43167,
      "training_step_time": 0.6736283302307129
    },
    {
      "epoch": 0.0002634765625,
      "model_forward_time": 0.12412118911743164,
      "step": 43168
    },
    {
      "epoch": 0.0002634765625,
      "step": 43168,
      "training_step_time": 0.7081375122070312
    },
    {
      "epoch": 0.000263482666015625,
      "model_forward_time": 0.11744189262390137,
      "step": 43169
    },
    {
      "epoch": 0.000263482666015625,
      "step": 43169,
      "training_step_time": 0.6777431964874268
    },
    {
      "epoch": 0.00026348876953125,
      "grad_norm": 0.10702859610319138,
      "learning_rate": 2.0012055350640986e-05,
      "loss": 0.0411,
      "step": 43170
    },
    {
      "epoch": 0.00026348876953125,
      "model_forward_time": 0.12218832969665527,
      "step": 43170
    },
    {
      "epoch": 0.00026348876953125,
      "step": 43170,
      "training_step_time": 0.6845273971557617
    },
    {
      "epoch": 0.000263494873046875,
      "model_forward_time": 0.11859893798828125,
      "step": 43171
    },
    {
      "epoch": 0.000263494873046875,
      "step": 43171,
      "training_step_time": 0.5961651802062988
    },
    {
      "epoch": 0.0002635009765625,
      "model_forward_time": 0.12107086181640625,
      "step": 43172
    },
    {
      "epoch": 0.0002635009765625,
      "step": 43172,
      "training_step_time": 0.6183648109436035
    },
    {
      "epoch": 0.000263507080078125,
      "model_forward_time": 0.11887073516845703,
      "step": 43173
    },
    {
      "epoch": 0.000263507080078125,
      "step": 43173,
      "training_step_time": 0.6586282253265381
    },
    {
      "epoch": 0.00026351318359375,
      "model_forward_time": 0.11896109580993652,
      "step": 43174
    },
    {
      "epoch": 0.00026351318359375,
      "step": 43174,
      "training_step_time": 0.6439380645751953
    },
    {
      "epoch": 0.000263519287109375,
      "model_forward_time": 0.12298822402954102,
      "step": 43175
    },
    {
      "epoch": 0.000263519287109375,
      "step": 43175,
      "training_step_time": 0.6800885200500488
    },
    {
      "epoch": 0.000263525390625,
      "model_forward_time": 0.12497162818908691,
      "step": 43176
    },
    {
      "epoch": 0.000263525390625,
      "step": 43176,
      "training_step_time": 0.6851768493652344
    },
    {
      "epoch": 0.000263531494140625,
      "model_forward_time": 0.12946629524230957,
      "step": 43177
    },
    {
      "epoch": 0.000263531494140625,
      "step": 43177,
      "training_step_time": 0.7593655586242676
    },
    {
      "epoch": 0.00026353759765625,
      "model_forward_time": 0.11843347549438477,
      "step": 43178
    },
    {
      "epoch": 0.00026353759765625,
      "step": 43178,
      "training_step_time": 0.7019636631011963
    },
    {
      "epoch": 0.000263543701171875,
      "model_forward_time": 0.12053823471069336,
      "step": 43179
    },
    {
      "epoch": 0.000263543701171875,
      "step": 43179,
      "training_step_time": 0.6735055446624756
    },
    {
      "epoch": 0.0002635498046875,
      "grad_norm": 0.12648999691009521,
      "learning_rate": 1.999000866056908e-05,
      "loss": 0.0394,
      "step": 43180
    },
    {
      "epoch": 0.0002635498046875,
      "model_forward_time": 0.11718487739562988,
      "step": 43180
    },
    {
      "epoch": 0.0002635498046875,
      "step": 43180,
      "training_step_time": 0.6028316020965576
    },
    {
      "epoch": 0.000263555908203125,
      "model_forward_time": 0.11583113670349121,
      "step": 43181
    },
    {
      "epoch": 0.000263555908203125,
      "step": 43181,
      "training_step_time": 0.5388414859771729
    },
    {
      "epoch": 0.00026356201171875,
      "model_forward_time": 0.11656928062438965,
      "step": 43182
    },
    {
      "epoch": 0.00026356201171875,
      "step": 43182,
      "training_step_time": 0.5748176574707031
    },
    {
      "epoch": 0.000263568115234375,
      "model_forward_time": 0.11682534217834473,
      "step": 43183
    },
    {
      "epoch": 0.000263568115234375,
      "step": 43183,
      "training_step_time": 0.5236234664916992
    },
    {
      "epoch": 0.00026357421875,
      "model_forward_time": 0.11736536026000977,
      "step": 43184
    },
    {
      "epoch": 0.00026357421875,
      "step": 43184,
      "training_step_time": 0.5070524215698242
    },
    {
      "epoch": 0.000263580322265625,
      "model_forward_time": 0.11681723594665527,
      "step": 43185
    },
    {
      "epoch": 0.000263580322265625,
      "step": 43185,
      "training_step_time": 0.5073139667510986
    },
    {
      "epoch": 0.00026358642578125,
      "model_forward_time": 0.11691093444824219,
      "step": 43186
    },
    {
      "epoch": 0.00026358642578125,
      "step": 43186,
      "training_step_time": 0.4007902145385742
    },
    {
      "epoch": 0.000263592529296875,
      "model_forward_time": 0.11552166938781738,
      "step": 43187
    },
    {
      "epoch": 0.000263592529296875,
      "step": 43187,
      "training_step_time": 0.5100564956665039
    },
    {
      "epoch": 0.0002635986328125,
      "model_forward_time": 0.11703085899353027,
      "step": 43188
    },
    {
      "epoch": 0.0002635986328125,
      "step": 43188,
      "training_step_time": 0.5529239177703857
    },
    {
      "epoch": 0.000263604736328125,
      "model_forward_time": 0.11505770683288574,
      "step": 43189
    },
    {
      "epoch": 0.000263604736328125,
      "step": 43189,
      "training_step_time": 0.5192697048187256
    },
    {
      "epoch": 0.00026361083984375,
      "grad_norm": 0.11387138068675995,
      "learning_rate": 1.9967971086740195e-05,
      "loss": 0.0371,
      "step": 43190
    },
    {
      "epoch": 0.00026361083984375,
      "model_forward_time": 0.11452484130859375,
      "step": 43190
    },
    {
      "epoch": 0.00026361083984375,
      "step": 43190,
      "training_step_time": 0.43295884132385254
    },
    {
      "epoch": 0.000263616943359375,
      "model_forward_time": 0.11475467681884766,
      "step": 43191
    },
    {
      "epoch": 0.000263616943359375,
      "step": 43191,
      "training_step_time": 0.4615292549133301
    },
    {
      "epoch": 0.000263623046875,
      "model_forward_time": 0.11454010009765625,
      "step": 43192
    },
    {
      "epoch": 0.000263623046875,
      "step": 43192,
      "training_step_time": 0.39969491958618164
    },
    {
      "epoch": 0.000263629150390625,
      "model_forward_time": 0.11438846588134766,
      "step": 43193
    },
    {
      "epoch": 0.000263629150390625,
      "step": 43193,
      "training_step_time": 0.3860955238342285
    },
    {
      "epoch": 0.00026363525390625,
      "model_forward_time": 0.11513972282409668,
      "step": 43194
    },
    {
      "epoch": 0.00026363525390625,
      "step": 43194,
      "training_step_time": 0.4001483917236328
    },
    {
      "epoch": 0.000263641357421875,
      "model_forward_time": 0.11448550224304199,
      "step": 43195
    },
    {
      "epoch": 0.000263641357421875,
      "step": 43195,
      "training_step_time": 0.40198802947998047
    },
    {
      "epoch": 0.0002636474609375,
      "model_forward_time": 0.11497259140014648,
      "step": 43196
    },
    {
      "epoch": 0.0002636474609375,
      "step": 43196,
      "training_step_time": 0.4055449962615967
    },
    {
      "epoch": 0.000263653564453125,
      "model_forward_time": 0.11415219306945801,
      "step": 43197
    },
    {
      "epoch": 0.000263653564453125,
      "step": 43197,
      "training_step_time": 0.3828403949737549
    },
    {
      "epoch": 0.00026365966796875,
      "model_forward_time": 0.11520814895629883,
      "step": 43198
    },
    {
      "epoch": 0.00026365966796875,
      "step": 43198,
      "training_step_time": 0.38943934440612793
    },
    {
      "epoch": 0.000263665771484375,
      "model_forward_time": 0.1152338981628418,
      "step": 43199
    },
    {
      "epoch": 0.000263665771484375,
      "step": 43199,
      "training_step_time": 0.4069499969482422
    },
    {
      "epoch": 0.000263671875,
      "grad_norm": 0.08705580979585648,
      "learning_rate": 1.9945942635848748e-05,
      "loss": 0.0402,
      "step": 43200
    },
    {
      "epoch": 0.000263671875,
      "model_forward_time": 0.11501026153564453,
      "step": 43200
    },
    {
      "epoch": 0.000263671875,
      "step": 43200,
      "training_step_time": 0.3986833095550537
    },
    {
      "epoch": 0.000263677978515625,
      "model_forward_time": 0.11530447006225586,
      "step": 43201
    },
    {
      "epoch": 0.000263677978515625,
      "step": 43201,
      "training_step_time": 0.4968070983886719
    },
    {
      "epoch": 0.00026368408203125,
      "model_forward_time": 0.11497664451599121,
      "step": 43202
    },
    {
      "epoch": 0.00026368408203125,
      "step": 43202,
      "training_step_time": 0.4492793083190918
    },
    {
      "epoch": 0.000263690185546875,
      "model_forward_time": 0.11540865898132324,
      "step": 43203
    },
    {
      "epoch": 0.000263690185546875,
      "step": 43203,
      "training_step_time": 0.4298288822174072
    },
    {
      "epoch": 0.0002636962890625,
      "model_forward_time": 0.11453032493591309,
      "step": 43204
    },
    {
      "epoch": 0.0002636962890625,
      "step": 43204,
      "training_step_time": 0.44505786895751953
    },
    {
      "epoch": 0.000263702392578125,
      "model_forward_time": 0.11532902717590332,
      "step": 43205
    },
    {
      "epoch": 0.000263702392578125,
      "step": 43205,
      "training_step_time": 0.49246978759765625
    },
    {
      "epoch": 0.00026370849609375,
      "model_forward_time": 0.11618185043334961,
      "step": 43206
    },
    {
      "epoch": 0.00026370849609375,
      "step": 43206,
      "training_step_time": 0.40284061431884766
    },
    {
      "epoch": 0.000263714599609375,
      "model_forward_time": 0.11492633819580078,
      "step": 43207
    },
    {
      "epoch": 0.000263714599609375,
      "step": 43207,
      "training_step_time": 0.3993544578552246
    },
    {
      "epoch": 0.000263720703125,
      "model_forward_time": 0.11455249786376953,
      "step": 43208
    },
    {
      "epoch": 0.000263720703125,
      "step": 43208,
      "training_step_time": 0.390880823135376
    },
    {
      "epoch": 0.000263726806640625,
      "model_forward_time": 0.11528778076171875,
      "step": 43209
    },
    {
      "epoch": 0.000263726806640625,
      "step": 43209,
      "training_step_time": 0.38727331161499023
    },
    {
      "epoch": 0.00026373291015625,
      "grad_norm": 0.1704169064760208,
      "learning_rate": 1.9923923314586397e-05,
      "loss": 0.0396,
      "step": 43210
    },
    {
      "epoch": 0.00026373291015625,
      "model_forward_time": 0.11617064476013184,
      "step": 43210
    },
    {
      "epoch": 0.00026373291015625,
      "step": 43210,
      "training_step_time": 0.3852062225341797
    },
    {
      "epoch": 0.000263739013671875,
      "model_forward_time": 0.1155393123626709,
      "step": 43211
    },
    {
      "epoch": 0.000263739013671875,
      "step": 43211,
      "training_step_time": 0.38254880905151367
    },
    {
      "epoch": 0.0002637451171875,
      "model_forward_time": 0.11515069007873535,
      "step": 43212
    },
    {
      "epoch": 0.0002637451171875,
      "step": 43212,
      "training_step_time": 0.3945326805114746
    },
    {
      "epoch": 0.000263751220703125,
      "model_forward_time": 0.1155860424041748,
      "step": 43213
    },
    {
      "epoch": 0.000263751220703125,
      "step": 43213,
      "training_step_time": 0.3878211975097656
    },
    {
      "epoch": 0.00026375732421875,
      "model_forward_time": 0.11554169654846191,
      "step": 43214
    },
    {
      "epoch": 0.00026375732421875,
      "step": 43214,
      "training_step_time": 0.45029711723327637
    },
    {
      "epoch": 0.000263763427734375,
      "model_forward_time": 0.11445140838623047,
      "step": 43215
    },
    {
      "epoch": 0.000263763427734375,
      "step": 43215,
      "training_step_time": 0.4011547565460205
    },
    {
      "epoch": 0.00026376953125,
      "model_forward_time": 0.11527395248413086,
      "step": 43216
    },
    {
      "epoch": 0.00026376953125,
      "step": 43216,
      "training_step_time": 0.4995462894439697
    },
    {
      "epoch": 0.000263775634765625,
      "model_forward_time": 0.11441278457641602,
      "step": 43217
    },
    {
      "epoch": 0.000263775634765625,
      "step": 43217,
      "training_step_time": 0.47713518142700195
    },
    {
      "epoch": 0.00026378173828125,
      "model_forward_time": 0.11583685874938965,
      "step": 43218
    },
    {
      "epoch": 0.00026378173828125,
      "step": 43218,
      "training_step_time": 0.42249560356140137
    },
    {
      "epoch": 0.000263787841796875,
      "model_forward_time": 0.11500072479248047,
      "step": 43219
    },
    {
      "epoch": 0.000263787841796875,
      "step": 43219,
      "training_step_time": 0.3975839614868164
    },
    {
      "epoch": 0.0002637939453125,
      "grad_norm": 0.10326637327671051,
      "learning_rate": 1.9901913129642024e-05,
      "loss": 0.046,
      "step": 43220
    },
    {
      "epoch": 0.0002637939453125,
      "model_forward_time": 0.11521387100219727,
      "step": 43220
    },
    {
      "epoch": 0.0002637939453125,
      "step": 43220,
      "training_step_time": 0.43328261375427246
    },
    {
      "epoch": 0.000263800048828125,
      "model_forward_time": 0.11530303955078125,
      "step": 43221
    },
    {
      "epoch": 0.000263800048828125,
      "step": 43221,
      "training_step_time": 0.38761234283447266
    },
    {
      "epoch": 0.00026380615234375,
      "model_forward_time": 0.11537384986877441,
      "step": 43222
    },
    {
      "epoch": 0.00026380615234375,
      "step": 43222,
      "training_step_time": 0.391141414642334
    },
    {
      "epoch": 0.000263812255859375,
      "model_forward_time": 0.11515569686889648,
      "step": 43223
    },
    {
      "epoch": 0.000263812255859375,
      "step": 43223,
      "training_step_time": 0.3866922855377197
    },
    {
      "epoch": 0.000263818359375,
      "model_forward_time": 0.11527323722839355,
      "step": 43224
    },
    {
      "epoch": 0.000263818359375,
      "step": 43224,
      "training_step_time": 0.3882479667663574
    },
    {
      "epoch": 0.000263824462890625,
      "model_forward_time": 0.11676692962646484,
      "step": 43225
    },
    {
      "epoch": 0.000263824462890625,
      "step": 43225,
      "training_step_time": 0.3912844657897949
    },
    {
      "epoch": 0.00026383056640625,
      "model_forward_time": 0.11503386497497559,
      "step": 43226
    },
    {
      "epoch": 0.00026383056640625,
      "step": 43226,
      "training_step_time": 0.38918519020080566
    },
    {
      "epoch": 0.000263836669921875,
      "model_forward_time": 0.11625432968139648,
      "step": 43227
    },
    {
      "epoch": 0.000263836669921875,
      "step": 43227,
      "training_step_time": 0.38915348052978516
    },
    {
      "epoch": 0.0002638427734375,
      "model_forward_time": 0.11534929275512695,
      "step": 43228
    },
    {
      "epoch": 0.0002638427734375,
      "step": 43228,
      "training_step_time": 0.46744441986083984
    },
    {
      "epoch": 0.000263848876953125,
      "model_forward_time": 0.11555218696594238,
      "step": 43229
    },
    {
      "epoch": 0.000263848876953125,
      "step": 43229,
      "training_step_time": 0.40579795837402344
    },
    {
      "epoch": 0.00026385498046875,
      "grad_norm": 0.10796283930540085,
      "learning_rate": 1.9879912087701753e-05,
      "loss": 0.0423,
      "step": 43230
    },
    {
      "epoch": 0.00026385498046875,
      "model_forward_time": 0.1162264347076416,
      "step": 43230
    },
    {
      "epoch": 0.00026385498046875,
      "step": 43230,
      "training_step_time": 0.3663442134857178
    },
    {
      "epoch": 0.000263861083984375,
      "model_forward_time": 0.11559700965881348,
      "step": 43231
    },
    {
      "epoch": 0.000263861083984375,
      "step": 43231,
      "training_step_time": 0.40627384185791016
    },
    {
      "epoch": 0.0002638671875,
      "model_forward_time": 0.11513805389404297,
      "step": 43232
    },
    {
      "epoch": 0.0002638671875,
      "step": 43232,
      "training_step_time": 0.48258376121520996
    },
    {
      "epoch": 0.000263873291015625,
      "model_forward_time": 0.1153566837310791,
      "step": 43233
    },
    {
      "epoch": 0.000263873291015625,
      "step": 43233,
      "training_step_time": 0.4011046886444092
    },
    {
      "epoch": 0.00026387939453125,
      "model_forward_time": 0.11557888984680176,
      "step": 43234
    },
    {
      "epoch": 0.00026387939453125,
      "step": 43234,
      "training_step_time": 0.41983556747436523
    },
    {
      "epoch": 0.000263885498046875,
      "model_forward_time": 0.11515164375305176,
      "step": 43235
    },
    {
      "epoch": 0.000263885498046875,
      "step": 43235,
      "training_step_time": 0.4435732364654541
    },
    {
      "epoch": 0.0002638916015625,
      "model_forward_time": 0.11519837379455566,
      "step": 43236
    },
    {
      "epoch": 0.0002638916015625,
      "step": 43236,
      "training_step_time": 0.4028439521789551
    },
    {
      "epoch": 0.000263897705078125,
      "model_forward_time": 0.11574888229370117,
      "step": 43237
    },
    {
      "epoch": 0.000263897705078125,
      "step": 43237,
      "training_step_time": 0.3973677158355713
    },
    {
      "epoch": 0.00026390380859375,
      "model_forward_time": 0.11460423469543457,
      "step": 43238
    },
    {
      "epoch": 0.00026390380859375,
      "step": 43238,
      "training_step_time": 0.39142870903015137
    },
    {
      "epoch": 0.000263909912109375,
      "model_forward_time": 0.11511540412902832,
      "step": 43239
    },
    {
      "epoch": 0.000263909912109375,
      "step": 43239,
      "training_step_time": 0.39051270484924316
    },
    {
      "epoch": 0.000263916015625,
      "grad_norm": 0.09775012731552124,
      "learning_rate": 1.98579201954489e-05,
      "loss": 0.0364,
      "step": 43240
    },
    {
      "epoch": 0.000263916015625,
      "model_forward_time": 0.11524271965026855,
      "step": 43240
    },
    {
      "epoch": 0.000263916015625,
      "step": 43240,
      "training_step_time": 0.4041433334350586
    },
    {
      "epoch": 0.000263922119140625,
      "model_forward_time": 0.11530089378356934,
      "step": 43241
    },
    {
      "epoch": 0.000263922119140625,
      "step": 43241,
      "training_step_time": 0.38281941413879395
    },
    {
      "epoch": 0.00026392822265625,
      "model_forward_time": 0.1163172721862793,
      "step": 43242
    },
    {
      "epoch": 0.00026392822265625,
      "step": 43242,
      "training_step_time": 0.396442174911499
    },
    {
      "epoch": 0.000263934326171875,
      "model_forward_time": 0.1150972843170166,
      "step": 43243
    },
    {
      "epoch": 0.000263934326171875,
      "step": 43243,
      "training_step_time": 0.45916271209716797
    },
    {
      "epoch": 0.0002639404296875,
      "model_forward_time": 0.11514019966125488,
      "step": 43244
    },
    {
      "epoch": 0.0002639404296875,
      "step": 43244,
      "training_step_time": 0.4214339256286621
    },
    {
      "epoch": 0.000263946533203125,
      "model_forward_time": 0.11478900909423828,
      "step": 43245
    },
    {
      "epoch": 0.000263946533203125,
      "step": 43245,
      "training_step_time": 0.361677885055542
    },
    {
      "epoch": 0.00026395263671875,
      "model_forward_time": 0.11560273170471191,
      "step": 43246
    },
    {
      "epoch": 0.00026395263671875,
      "step": 43246,
      "training_step_time": 0.45906829833984375
    },
    {
      "epoch": 0.000263958740234375,
      "model_forward_time": 0.11497831344604492,
      "step": 43247
    },
    {
      "epoch": 0.000263958740234375,
      "step": 43247,
      "training_step_time": 0.41303443908691406
    },
    {
      "epoch": 0.00026396484375,
      "model_forward_time": 0.11417198181152344,
      "step": 43248
    },
    {
      "epoch": 0.00026396484375,
      "step": 43248,
      "training_step_time": 0.4151611328125
    },
    {
      "epoch": 0.000263970947265625,
      "model_forward_time": 0.11469006538391113,
      "step": 43249
    },
    {
      "epoch": 0.000263970947265625,
      "step": 43249,
      "training_step_time": 0.49354982376098633
    },
    {
      "epoch": 0.00026397705078125,
      "grad_norm": 0.11999707669019699,
      "learning_rate": 1.9835937459564064e-05,
      "loss": 0.0398,
      "step": 43250
    },
    {
      "epoch": 0.00026397705078125,
      "model_forward_time": 0.11489057540893555,
      "step": 43250
    },
    {
      "epoch": 0.00026397705078125,
      "step": 43250,
      "training_step_time": 0.3887817859649658
    },
    {
      "epoch": 0.000263983154296875,
      "model_forward_time": 0.11514067649841309,
      "step": 43251
    },
    {
      "epoch": 0.000263983154296875,
      "step": 43251,
      "training_step_time": 0.41080498695373535
    },
    {
      "epoch": 0.0002639892578125,
      "model_forward_time": 0.11533284187316895,
      "step": 43252
    },
    {
      "epoch": 0.0002639892578125,
      "step": 43252,
      "training_step_time": 0.39183712005615234
    },
    {
      "epoch": 0.000263995361328125,
      "model_forward_time": 0.11452198028564453,
      "step": 43253
    },
    {
      "epoch": 0.000263995361328125,
      "step": 43253,
      "training_step_time": 0.38076329231262207
    },
    {
      "epoch": 0.00026400146484375,
      "model_forward_time": 0.11554265022277832,
      "step": 43254
    },
    {
      "epoch": 0.00026400146484375,
      "step": 43254,
      "training_step_time": 0.3867671489715576
    },
    {
      "epoch": 0.000264007568359375,
      "model_forward_time": 0.11590242385864258,
      "step": 43255
    },
    {
      "epoch": 0.000264007568359375,
      "step": 43255,
      "training_step_time": 0.39266324043273926
    },
    {
      "epoch": 0.000264013671875,
      "model_forward_time": 0.11517715454101562,
      "step": 43256
    },
    {
      "epoch": 0.000264013671875,
      "step": 43256,
      "training_step_time": 0.39235448837280273
    },
    {
      "epoch": 0.000264019775390625,
      "model_forward_time": 0.11531782150268555,
      "step": 43257
    },
    {
      "epoch": 0.000264019775390625,
      "step": 43257,
      "training_step_time": 0.3908228874206543
    },
    {
      "epoch": 0.00026402587890625,
      "model_forward_time": 0.11538195610046387,
      "step": 43258
    },
    {
      "epoch": 0.00026402587890625,
      "step": 43258,
      "training_step_time": 0.450791597366333
    },
    {
      "epoch": 0.000264031982421875,
      "model_forward_time": 0.11439824104309082,
      "step": 43259
    },
    {
      "epoch": 0.000264031982421875,
      "step": 43259,
      "training_step_time": 0.5166542530059814
    },
    {
      "epoch": 0.0002640380859375,
      "grad_norm": 0.124635711312294,
      "learning_rate": 1.981396388672496e-05,
      "loss": 0.0381,
      "step": 43260
    },
    {
      "epoch": 0.0002640380859375,
      "model_forward_time": 0.11445474624633789,
      "step": 43260
    },
    {
      "epoch": 0.0002640380859375,
      "step": 43260,
      "training_step_time": 0.4484071731567383
    },
    {
      "epoch": 0.000264044189453125,
      "model_forward_time": 0.11491227149963379,
      "step": 43261
    },
    {
      "epoch": 0.000264044189453125,
      "step": 43261,
      "training_step_time": 0.4940605163574219
    },
    {
      "epoch": 0.00026405029296875,
      "model_forward_time": 0.11430859565734863,
      "step": 43262
    },
    {
      "epoch": 0.00026405029296875,
      "step": 43262,
      "training_step_time": 0.4664113521575928
    },
    {
      "epoch": 0.000264056396484375,
      "model_forward_time": 0.11491608619689941,
      "step": 43263
    },
    {
      "epoch": 0.000264056396484375,
      "step": 43263,
      "training_step_time": 0.4155690670013428
    },
    {
      "epoch": 0.0002640625,
      "model_forward_time": 0.11392545700073242,
      "step": 43264
    },
    {
      "epoch": 0.0002640625,
      "step": 43264,
      "training_step_time": 0.4263327121734619
    },
    {
      "epoch": 0.000264068603515625,
      "model_forward_time": 0.1149437427520752,
      "step": 43265
    },
    {
      "epoch": 0.000264068603515625,
      "step": 43265,
      "training_step_time": 0.38054394721984863
    },
    {
      "epoch": 0.00026407470703125,
      "model_forward_time": 0.1146852970123291,
      "step": 43266
    },
    {
      "epoch": 0.00026407470703125,
      "step": 43266,
      "training_step_time": 0.42559313774108887
    },
    {
      "epoch": 0.000264080810546875,
      "model_forward_time": 0.11484408378601074,
      "step": 43267
    },
    {
      "epoch": 0.000264080810546875,
      "step": 43267,
      "training_step_time": 0.3950328826904297
    },
    {
      "epoch": 0.0002640869140625,
      "model_forward_time": 0.11489462852478027,
      "step": 43268
    },
    {
      "epoch": 0.0002640869140625,
      "step": 43268,
      "training_step_time": 0.3912785053253174
    },
    {
      "epoch": 0.000264093017578125,
      "model_forward_time": 0.11510825157165527,
      "step": 43269
    },
    {
      "epoch": 0.000264093017578125,
      "step": 43269,
      "training_step_time": 0.38251304626464844
    },
    {
      "epoch": 0.00026409912109375,
      "grad_norm": 0.11316560208797455,
      "learning_rate": 1.9791999483606648e-05,
      "loss": 0.038,
      "step": 43270
    },
    {
      "epoch": 0.00026409912109375,
      "model_forward_time": 0.11517071723937988,
      "step": 43270
    },
    {
      "epoch": 0.00026409912109375,
      "step": 43270,
      "training_step_time": 0.39018750190734863
    },
    {
      "epoch": 0.000264105224609375,
      "model_forward_time": 0.11630749702453613,
      "step": 43271
    },
    {
      "epoch": 0.000264105224609375,
      "step": 43271,
      "training_step_time": 0.39512038230895996
    },
    {
      "epoch": 0.000264111328125,
      "model_forward_time": 0.11550211906433105,
      "step": 43272
    },
    {
      "epoch": 0.000264111328125,
      "step": 43272,
      "training_step_time": 0.4162018299102783
    },
    {
      "epoch": 0.000264117431640625,
      "model_forward_time": 0.11500096321105957,
      "step": 43273
    },
    {
      "epoch": 0.000264117431640625,
      "step": 43273,
      "training_step_time": 0.503422737121582
    },
    {
      "epoch": 0.00026412353515625,
      "model_forward_time": 0.11525702476501465,
      "step": 43274
    },
    {
      "epoch": 0.00026412353515625,
      "step": 43274,
      "training_step_time": 0.40267443656921387
    },
    {
      "epoch": 0.000264129638671875,
      "model_forward_time": 0.11517810821533203,
      "step": 43275
    },
    {
      "epoch": 0.000264129638671875,
      "step": 43275,
      "training_step_time": 0.519829511642456
    },
    {
      "epoch": 0.0002641357421875,
      "model_forward_time": 0.11533975601196289,
      "step": 43276
    },
    {
      "epoch": 0.0002641357421875,
      "step": 43276,
      "training_step_time": 0.504188060760498
    },
    {
      "epoch": 0.000264141845703125,
      "model_forward_time": 0.11554288864135742,
      "step": 43277
    },
    {
      "epoch": 0.000264141845703125,
      "step": 43277,
      "training_step_time": 0.3876795768737793
    },
    {
      "epoch": 0.00026414794921875,
      "model_forward_time": 0.11461377143859863,
      "step": 43278
    },
    {
      "epoch": 0.00026414794921875,
      "step": 43278,
      "training_step_time": 0.4834935665130615
    },
    {
      "epoch": 0.000264154052734375,
      "model_forward_time": 0.11571955680847168,
      "step": 43279
    },
    {
      "epoch": 0.000264154052734375,
      "step": 43279,
      "training_step_time": 0.3950352668762207
    },
    {
      "epoch": 0.00026416015625,
      "grad_norm": 0.1038692444562912,
      "learning_rate": 1.977004425688126e-05,
      "loss": 0.0397,
      "step": 43280
    },
    {
      "epoch": 0.00026416015625,
      "model_forward_time": 0.11430048942565918,
      "step": 43280
    },
    {
      "epoch": 0.00026416015625,
      "step": 43280,
      "training_step_time": 0.3952505588531494
    },
    {
      "epoch": 0.000264166259765625,
      "model_forward_time": 0.11487030982971191,
      "step": 43281
    },
    {
      "epoch": 0.000264166259765625,
      "step": 43281,
      "training_step_time": 0.39043235778808594
    },
    {
      "epoch": 0.00026417236328125,
      "model_forward_time": 0.11484169960021973,
      "step": 43282
    },
    {
      "epoch": 0.00026417236328125,
      "step": 43282,
      "training_step_time": 0.3909473419189453
    },
    {
      "epoch": 0.000264178466796875,
      "model_forward_time": 0.1149442195892334,
      "step": 43283
    },
    {
      "epoch": 0.000264178466796875,
      "step": 43283,
      "training_step_time": 0.3727233409881592
    },
    {
      "epoch": 0.0002641845703125,
      "model_forward_time": 0.11536788940429688,
      "step": 43284
    },
    {
      "epoch": 0.0002641845703125,
      "step": 43284,
      "training_step_time": 0.39009547233581543
    },
    {
      "epoch": 0.000264190673828125,
      "model_forward_time": 0.1156148910522461,
      "step": 43285
    },
    {
      "epoch": 0.000264190673828125,
      "step": 43285,
      "training_step_time": 0.3999619483947754
    },
    {
      "epoch": 0.00026419677734375,
      "model_forward_time": 0.11559152603149414,
      "step": 43286
    },
    {
      "epoch": 0.00026419677734375,
      "step": 43286,
      "training_step_time": 0.48805785179138184
    },
    {
      "epoch": 0.000264202880859375,
      "model_forward_time": 0.11498546600341797,
      "step": 43287
    },
    {
      "epoch": 0.000264202880859375,
      "step": 43287,
      "training_step_time": 0.435823917388916
    },
    {
      "epoch": 0.000264208984375,
      "model_forward_time": 0.11466217041015625,
      "step": 43288
    },
    {
      "epoch": 0.000264208984375,
      "step": 43288,
      "training_step_time": 0.46158480644226074
    },
    {
      "epoch": 0.000264215087890625,
      "model_forward_time": 0.11496973037719727,
      "step": 43289
    },
    {
      "epoch": 0.000264215087890625,
      "step": 43289,
      "training_step_time": 0.36657285690307617
    },
    {
      "epoch": 0.00026422119140625,
      "grad_norm": 0.09754446148872375,
      "learning_rate": 1.974809821321827e-05,
      "loss": 0.0413,
      "step": 43290
    },
    {
      "epoch": 0.00026422119140625,
      "model_forward_time": 0.11458659172058105,
      "step": 43290
    },
    {
      "epoch": 0.00026422119140625,
      "step": 43290,
      "training_step_time": 0.43198680877685547
    },
    {
      "epoch": 0.000264227294921875,
      "model_forward_time": 0.11453938484191895,
      "step": 43291
    },
    {
      "epoch": 0.000264227294921875,
      "step": 43291,
      "training_step_time": 0.4474222660064697
    },
    {
      "epoch": 0.0002642333984375,
      "model_forward_time": 0.11556816101074219,
      "step": 43292
    },
    {
      "epoch": 0.0002642333984375,
      "step": 43292,
      "training_step_time": 0.44513797760009766
    },
    {
      "epoch": 0.000264239501953125,
      "model_forward_time": 0.11483907699584961,
      "step": 43293
    },
    {
      "epoch": 0.000264239501953125,
      "step": 43293,
      "training_step_time": 0.40315747261047363
    },
    {
      "epoch": 0.00026424560546875,
      "model_forward_time": 0.11506223678588867,
      "step": 43294
    },
    {
      "epoch": 0.00026424560546875,
      "step": 43294,
      "training_step_time": 0.39251160621643066
    },
    {
      "epoch": 0.000264251708984375,
      "model_forward_time": 0.1154167652130127,
      "step": 43295
    },
    {
      "epoch": 0.000264251708984375,
      "step": 43295,
      "training_step_time": 0.38671398162841797
    },
    {
      "epoch": 0.0002642578125,
      "model_forward_time": 0.11459755897521973,
      "step": 43296
    },
    {
      "epoch": 0.0002642578125,
      "step": 43296,
      "training_step_time": 0.4032437801361084
    },
    {
      "epoch": 0.000264263916015625,
      "model_forward_time": 0.11501121520996094,
      "step": 43297
    },
    {
      "epoch": 0.000264263916015625,
      "step": 43297,
      "training_step_time": 0.3951129913330078
    },
    {
      "epoch": 0.00026427001953125,
      "model_forward_time": 0.11587166786193848,
      "step": 43298
    },
    {
      "epoch": 0.00026427001953125,
      "step": 43298,
      "training_step_time": 0.3943150043487549
    },
    {
      "epoch": 0.000264276123046875,
      "model_forward_time": 0.11497831344604492,
      "step": 43299
    },
    {
      "epoch": 0.000264276123046875,
      "step": 43299,
      "training_step_time": 0.3923354148864746
    },
    {
      "epoch": 0.0002642822265625,
      "grad_norm": 0.0648893490433693,
      "learning_rate": 1.9726161359284286e-05,
      "loss": 0.0377,
      "step": 43300
    },
    {
      "epoch": 0.0002642822265625,
      "model_forward_time": 0.11544132232666016,
      "step": 43300
    },
    {
      "epoch": 0.0002642822265625,
      "step": 43300,
      "training_step_time": 0.42658185958862305
    },
    {
      "epoch": 0.000264288330078125,
      "model_forward_time": 0.11551022529602051,
      "step": 43301
    },
    {
      "epoch": 0.000264288330078125,
      "step": 43301,
      "training_step_time": 0.3976097106933594
    },
    {
      "epoch": 0.00026429443359375,
      "model_forward_time": 0.11510324478149414,
      "step": 43302
    },
    {
      "epoch": 0.00026429443359375,
      "step": 43302,
      "training_step_time": 0.4624016284942627
    },
    {
      "epoch": 0.000264300537109375,
      "model_forward_time": 0.11535787582397461,
      "step": 43303
    },
    {
      "epoch": 0.000264300537109375,
      "step": 43303,
      "training_step_time": 0.398252010345459
    },
    {
      "epoch": 0.000264306640625,
      "model_forward_time": 0.11537647247314453,
      "step": 43304
    },
    {
      "epoch": 0.000264306640625,
      "step": 43304,
      "training_step_time": 0.4326300621032715
    },
    {
      "epoch": 0.000264312744140625,
      "model_forward_time": 0.11554837226867676,
      "step": 43305
    },
    {
      "epoch": 0.000264312744140625,
      "step": 43305,
      "training_step_time": 0.5134103298187256
    },
    {
      "epoch": 0.00026431884765625,
      "model_forward_time": 0.11556029319763184,
      "step": 43306
    },
    {
      "epoch": 0.00026431884765625,
      "step": 43306,
      "training_step_time": 0.4212324619293213
    },
    {
      "epoch": 0.000264324951171875,
      "model_forward_time": 0.11587405204772949,
      "step": 43307
    },
    {
      "epoch": 0.000264324951171875,
      "step": 43307,
      "training_step_time": 0.3946192264556885
    },
    {
      "epoch": 0.0002643310546875,
      "model_forward_time": 0.11468505859375,
      "step": 43308
    },
    {
      "epoch": 0.0002643310546875,
      "step": 43308,
      "training_step_time": 0.42667698860168457
    },
    {
      "epoch": 0.000264337158203125,
      "model_forward_time": 0.11510086059570312,
      "step": 43309
    },
    {
      "epoch": 0.000264337158203125,
      "step": 43309,
      "training_step_time": 0.3943946361541748
    },
    {
      "epoch": 0.00026434326171875,
      "grad_norm": 0.1069587841629982,
      "learning_rate": 1.9704233701743148e-05,
      "loss": 0.0406,
      "step": 43310
    },
    {
      "epoch": 0.00026434326171875,
      "model_forward_time": 0.11466479301452637,
      "step": 43310
    },
    {
      "epoch": 0.00026434326171875,
      "step": 43310,
      "training_step_time": 0.39145636558532715
    },
    {
      "epoch": 0.000264349365234375,
      "model_forward_time": 0.11620855331420898,
      "step": 43311
    },
    {
      "epoch": 0.000264349365234375,
      "step": 43311,
      "training_step_time": 0.48212170600891113
    },
    {
      "epoch": 0.00026435546875,
      "model_forward_time": 0.11484313011169434,
      "step": 43312
    },
    {
      "epoch": 0.00026435546875,
      "step": 43312,
      "training_step_time": 0.39527297019958496
    },
    {
      "epoch": 0.000264361572265625,
      "model_forward_time": 0.11507081985473633,
      "step": 43313
    },
    {
      "epoch": 0.000264361572265625,
      "step": 43313,
      "training_step_time": 0.37528443336486816
    },
    {
      "epoch": 0.00026436767578125,
      "model_forward_time": 0.11547589302062988,
      "step": 43314
    },
    {
      "epoch": 0.00026436767578125,
      "step": 43314,
      "training_step_time": 0.4937551021575928
    },
    {
      "epoch": 0.000264373779296875,
      "model_forward_time": 0.11438465118408203,
      "step": 43315
    },
    {
      "epoch": 0.000264373779296875,
      "step": 43315,
      "training_step_time": 0.4430983066558838
    },
    {
      "epoch": 0.0002643798828125,
      "model_forward_time": 0.11592912673950195,
      "step": 43316
    },
    {
      "epoch": 0.0002643798828125,
      "step": 43316,
      "training_step_time": 0.44437432289123535
    },
    {
      "epoch": 0.000264385986328125,
      "model_forward_time": 0.11530160903930664,
      "step": 43317
    },
    {
      "epoch": 0.000264385986328125,
      "step": 43317,
      "training_step_time": 0.5185258388519287
    },
    {
      "epoch": 0.00026439208984375,
      "model_forward_time": 0.11508941650390625,
      "step": 43318
    },
    {
      "epoch": 0.00026439208984375,
      "step": 43318,
      "training_step_time": 0.3621714115142822
    },
    {
      "epoch": 0.000264398193359375,
      "model_forward_time": 0.1147775650024414,
      "step": 43319
    },
    {
      "epoch": 0.000264398193359375,
      "step": 43319,
      "training_step_time": 0.44230079650878906
    },
    {
      "epoch": 0.000264404296875,
      "grad_norm": 0.11476723104715347,
      "learning_rate": 1.9682315247255894e-05,
      "loss": 0.0353,
      "step": 43320
    },
    {
      "epoch": 0.000264404296875,
      "model_forward_time": 0.11463689804077148,
      "step": 43320
    },
    {
      "epoch": 0.000264404296875,
      "step": 43320,
      "training_step_time": 0.42818522453308105
    },
    {
      "epoch": 0.000264410400390625,
      "model_forward_time": 0.11500906944274902,
      "step": 43321
    },
    {
      "epoch": 0.000264410400390625,
      "step": 43321,
      "training_step_time": 0.3891136646270752
    },
    {
      "epoch": 0.00026441650390625,
      "model_forward_time": 0.11452889442443848,
      "step": 43322
    },
    {
      "epoch": 0.00026441650390625,
      "step": 43322,
      "training_step_time": 0.39874839782714844
    },
    {
      "epoch": 0.000264422607421875,
      "model_forward_time": 0.11479878425598145,
      "step": 43323
    },
    {
      "epoch": 0.000264422607421875,
      "step": 43323,
      "training_step_time": 0.38698410987854004
    },
    {
      "epoch": 0.0002644287109375,
      "model_forward_time": 0.11448311805725098,
      "step": 43324
    },
    {
      "epoch": 0.0002644287109375,
      "step": 43324,
      "training_step_time": 0.3834824562072754
    },
    {
      "epoch": 0.000264434814453125,
      "model_forward_time": 0.11527681350708008,
      "step": 43325
    },
    {
      "epoch": 0.000264434814453125,
      "step": 43325,
      "training_step_time": 0.37950778007507324
    },
    {
      "epoch": 0.00026444091796875,
      "model_forward_time": 0.11484956741333008,
      "step": 43326
    },
    {
      "epoch": 0.00026444091796875,
      "step": 43326,
      "training_step_time": 0.39437079429626465
    },
    {
      "epoch": 0.000264447021484375,
      "model_forward_time": 0.1166386604309082,
      "step": 43327
    },
    {
      "epoch": 0.000264447021484375,
      "step": 43327,
      "training_step_time": 0.45470261573791504
    },
    {
      "epoch": 0.000264453125,
      "model_forward_time": 0.11540389060974121,
      "step": 43328
    },
    {
      "epoch": 0.000264453125,
      "step": 43328,
      "training_step_time": 0.42232728004455566
    },
    {
      "epoch": 0.000264459228515625,
      "model_forward_time": 0.1154029369354248,
      "step": 43329
    },
    {
      "epoch": 0.000264459228515625,
      "step": 43329,
      "training_step_time": 0.4400906562805176
    },
    {
      "epoch": 0.00026446533203125,
      "grad_norm": 0.08500636368989944,
      "learning_rate": 1.9660406002480765e-05,
      "loss": 0.0368,
      "step": 43330
    },
    {
      "epoch": 0.00026446533203125,
      "model_forward_time": 0.11543440818786621,
      "step": 43330
    },
    {
      "epoch": 0.00026446533203125,
      "step": 43330,
      "training_step_time": 0.4886045455932617
    },
    {
      "epoch": 0.000264471435546875,
      "model_forward_time": 0.11499691009521484,
      "step": 43331
    },
    {
      "epoch": 0.000264471435546875,
      "step": 43331,
      "training_step_time": 0.38112926483154297
    },
    {
      "epoch": 0.0002644775390625,
      "model_forward_time": 0.11534237861633301,
      "step": 43332
    },
    {
      "epoch": 0.0002644775390625,
      "step": 43332,
      "training_step_time": 0.39618754386901855
    },
    {
      "epoch": 0.000264483642578125,
      "model_forward_time": 0.11508011817932129,
      "step": 43333
    },
    {
      "epoch": 0.000264483642578125,
      "step": 43333,
      "training_step_time": 0.4246337413787842
    },
    {
      "epoch": 0.00026448974609375,
      "model_forward_time": 0.11473298072814941,
      "step": 43334
    },
    {
      "epoch": 0.00026448974609375,
      "step": 43334,
      "training_step_time": 0.3872852325439453
    },
    {
      "epoch": 0.000264495849609375,
      "model_forward_time": 0.11464762687683105,
      "step": 43335
    },
    {
      "epoch": 0.000264495849609375,
      "step": 43335,
      "training_step_time": 0.4191441535949707
    },
    {
      "epoch": 0.000264501953125,
      "model_forward_time": 0.1151418685913086,
      "step": 43336
    },
    {
      "epoch": 0.000264501953125,
      "step": 43336,
      "training_step_time": 0.44187188148498535
    },
    {
      "epoch": 0.000264508056640625,
      "model_forward_time": 0.11656522750854492,
      "step": 43337
    },
    {
      "epoch": 0.000264508056640625,
      "step": 43337,
      "training_step_time": 0.46700549125671387
    },
    {
      "epoch": 0.00026451416015625,
      "model_forward_time": 0.11542344093322754,
      "step": 43338
    },
    {
      "epoch": 0.00026451416015625,
      "step": 43338,
      "training_step_time": 0.39496779441833496
    },
    {
      "epoch": 0.000264520263671875,
      "model_forward_time": 0.11443185806274414,
      "step": 43339
    },
    {
      "epoch": 0.000264520263671875,
      "step": 43339,
      "training_step_time": 0.39762091636657715
    },
    {
      "epoch": 0.0002645263671875,
      "grad_norm": 0.1229788288474083,
      "learning_rate": 1.9638505974073234e-05,
      "loss": 0.0408,
      "step": 43340
    },
    {
      "epoch": 0.0002645263671875,
      "model_forward_time": 0.11495113372802734,
      "step": 43340
    },
    {
      "epoch": 0.0002645263671875,
      "step": 43340,
      "training_step_time": 0.3996701240539551
    },
    {
      "epoch": 0.000264532470703125,
      "model_forward_time": 0.11501073837280273,
      "step": 43341
    },
    {
      "epoch": 0.000264532470703125,
      "step": 43341,
      "training_step_time": 0.5223114490509033
    },
    {
      "epoch": 0.00026453857421875,
      "model_forward_time": 0.1153569221496582,
      "step": 43342
    },
    {
      "epoch": 0.00026453857421875,
      "step": 43342,
      "training_step_time": 0.4006979465484619
    },
    {
      "epoch": 0.000264544677734375,
      "model_forward_time": 0.11514902114868164,
      "step": 43343
    },
    {
      "epoch": 0.000264544677734375,
      "step": 43343,
      "training_step_time": 0.4840524196624756
    },
    {
      "epoch": 0.00026455078125,
      "model_forward_time": 0.11486434936523438,
      "step": 43344
    },
    {
      "epoch": 0.00026455078125,
      "step": 43344,
      "training_step_time": 0.40567612648010254
    },
    {
      "epoch": 0.000264556884765625,
      "model_forward_time": 0.11510229110717773,
      "step": 43345
    },
    {
      "epoch": 0.000264556884765625,
      "step": 43345,
      "training_step_time": 0.40830206871032715
    },
    {
      "epoch": 0.00026456298828125,
      "model_forward_time": 0.1140742301940918,
      "step": 43346
    },
    {
      "epoch": 0.00026456298828125,
      "step": 43346,
      "training_step_time": 0.3919034004211426
    },
    {
      "epoch": 0.000264569091796875,
      "model_forward_time": 0.11480975151062012,
      "step": 43347
    },
    {
      "epoch": 0.000264569091796875,
      "step": 43347,
      "training_step_time": 0.3663146495819092
    },
    {
      "epoch": 0.0002645751953125,
      "model_forward_time": 0.11563444137573242,
      "step": 43348
    },
    {
      "epoch": 0.0002645751953125,
      "step": 43348,
      "training_step_time": 0.45378661155700684
    },
    {
      "epoch": 0.000264581298828125,
      "model_forward_time": 0.11555743217468262,
      "step": 43349
    },
    {
      "epoch": 0.000264581298828125,
      "step": 43349,
      "training_step_time": 0.48108696937561035
    },
    {
      "epoch": 0.00026458740234375,
      "grad_norm": 0.12517639994621277,
      "learning_rate": 1.9616615168685943e-05,
      "loss": 0.0408,
      "step": 43350
    },
    {
      "epoch": 0.00026458740234375,
      "model_forward_time": 0.11460137367248535,
      "step": 43350
    },
    {
      "epoch": 0.00026458740234375,
      "step": 43350,
      "training_step_time": 0.4093472957611084
    },
    {
      "epoch": 0.000264593505859375,
      "model_forward_time": 0.11590218544006348,
      "step": 43351
    },
    {
      "epoch": 0.000264593505859375,
      "step": 43351,
      "training_step_time": 0.4524075984954834
    },
    {
      "epoch": 0.000264599609375,
      "model_forward_time": 0.11422991752624512,
      "step": 43352
    },
    {
      "epoch": 0.000264599609375,
      "step": 43352,
      "training_step_time": 0.3962090015411377
    },
    {
      "epoch": 0.000264605712890625,
      "model_forward_time": 0.11501359939575195,
      "step": 43353
    },
    {
      "epoch": 0.000264605712890625,
      "step": 43353,
      "training_step_time": 0.3939528465270996
    },
    {
      "epoch": 0.00026461181640625,
      "model_forward_time": 0.11498379707336426,
      "step": 43354
    },
    {
      "epoch": 0.00026461181640625,
      "step": 43354,
      "training_step_time": 0.45348215103149414
    },
    {
      "epoch": 0.000264617919921875,
      "model_forward_time": 0.11572122573852539,
      "step": 43355
    },
    {
      "epoch": 0.000264617919921875,
      "step": 43355,
      "training_step_time": 0.38878774642944336
    },
    {
      "epoch": 0.0002646240234375,
      "model_forward_time": 0.11554455757141113,
      "step": 43356
    },
    {
      "epoch": 0.0002646240234375,
      "step": 43356,
      "training_step_time": 0.3990905284881592
    },
    {
      "epoch": 0.000264630126953125,
      "model_forward_time": 0.11446571350097656,
      "step": 43357
    },
    {
      "epoch": 0.000264630126953125,
      "step": 43357,
      "training_step_time": 0.46242284774780273
    },
    {
      "epoch": 0.00026463623046875,
      "model_forward_time": 0.11662769317626953,
      "step": 43358
    },
    {
      "epoch": 0.00026463623046875,
      "step": 43358,
      "training_step_time": 0.41994380950927734
    },
    {
      "epoch": 0.000264642333984375,
      "model_forward_time": 0.11629390716552734,
      "step": 43359
    },
    {
      "epoch": 0.000264642333984375,
      "step": 43359,
      "training_step_time": 0.4124629497528076
    },
    {
      "epoch": 0.0002646484375,
      "grad_norm": 0.11688236147165298,
      "learning_rate": 1.9594733592968733e-05,
      "loss": 0.0377,
      "step": 43360
    },
    {
      "epoch": 0.0002646484375,
      "model_forward_time": 0.11560177803039551,
      "step": 43360
    },
    {
      "epoch": 0.0002646484375,
      "step": 43360,
      "training_step_time": 0.3941915035247803
    },
    {
      "epoch": 0.000264654541015625,
      "model_forward_time": 0.11564350128173828,
      "step": 43361
    },
    {
      "epoch": 0.000264654541015625,
      "step": 43361,
      "training_step_time": 0.3896777629852295
    },
    {
      "epoch": 0.00026466064453125,
      "model_forward_time": 0.11627626419067383,
      "step": 43362
    },
    {
      "epoch": 0.00026466064453125,
      "step": 43362,
      "training_step_time": 0.4078366756439209
    },
    {
      "epoch": 0.000264666748046875,
      "model_forward_time": 0.11583757400512695,
      "step": 43363
    },
    {
      "epoch": 0.000264666748046875,
      "step": 43363,
      "training_step_time": 0.4809541702270508
    },
    {
      "epoch": 0.0002646728515625,
      "model_forward_time": 0.1148231029510498,
      "step": 43364
    },
    {
      "epoch": 0.0002646728515625,
      "step": 43364,
      "training_step_time": 0.41927433013916016
    },
    {
      "epoch": 0.000264678955078125,
      "model_forward_time": 0.11469388008117676,
      "step": 43365
    },
    {
      "epoch": 0.000264678955078125,
      "step": 43365,
      "training_step_time": 0.48166513442993164
    },
    {
      "epoch": 0.00026468505859375,
      "model_forward_time": 0.1147303581237793,
      "step": 43366
    },
    {
      "epoch": 0.00026468505859375,
      "step": 43366,
      "training_step_time": 0.46147608757019043
    },
    {
      "epoch": 0.000264691162109375,
      "model_forward_time": 0.1153559684753418,
      "step": 43367
    },
    {
      "epoch": 0.000264691162109375,
      "step": 43367,
      "training_step_time": 0.38403773307800293
    },
    {
      "epoch": 0.000264697265625,
      "model_forward_time": 0.11505675315856934,
      "step": 43368
    },
    {
      "epoch": 0.000264697265625,
      "step": 43368,
      "training_step_time": 0.469010591506958
    },
    {
      "epoch": 0.000264703369140625,
      "model_forward_time": 0.11447691917419434,
      "step": 43369
    },
    {
      "epoch": 0.000264703369140625,
      "step": 43369,
      "training_step_time": 0.42029309272766113
    },
    {
      "epoch": 0.00026470947265625,
      "grad_norm": 0.09975118190050125,
      "learning_rate": 1.9572861253568657e-05,
      "loss": 0.0349,
      "step": 43370
    },
    {
      "epoch": 0.00026470947265625,
      "model_forward_time": 0.11406660079956055,
      "step": 43370
    },
    {
      "epoch": 0.00026470947265625,
      "step": 43370,
      "training_step_time": 0.39679837226867676
    },
    {
      "epoch": 0.000264715576171875,
      "model_forward_time": 0.11511731147766113,
      "step": 43371
    },
    {
      "epoch": 0.000264715576171875,
      "step": 43371,
      "training_step_time": 0.4959392547607422
    },
    {
      "epoch": 0.0002647216796875,
      "model_forward_time": 0.11509060859680176,
      "step": 43372
    },
    {
      "epoch": 0.0002647216796875,
      "step": 43372,
      "training_step_time": 0.45675230026245117
    },
    {
      "epoch": 0.000264727783203125,
      "model_forward_time": 0.11530160903930664,
      "step": 43373
    },
    {
      "epoch": 0.000264727783203125,
      "step": 43373,
      "training_step_time": 0.40439319610595703
    },
    {
      "epoch": 0.00026473388671875,
      "model_forward_time": 0.11474037170410156,
      "step": 43374
    },
    {
      "epoch": 0.00026473388671875,
      "step": 43374,
      "training_step_time": 0.4047963619232178
    },
    {
      "epoch": 0.000264739990234375,
      "model_forward_time": 0.11489701271057129,
      "step": 43375
    },
    {
      "epoch": 0.000264739990234375,
      "step": 43375,
      "training_step_time": 0.3990020751953125
    },
    {
      "epoch": 0.00026474609375,
      "model_forward_time": 0.11514830589294434,
      "step": 43376
    },
    {
      "epoch": 0.00026474609375,
      "step": 43376,
      "training_step_time": 0.38764166831970215
    },
    {
      "epoch": 0.000264752197265625,
      "model_forward_time": 0.11533069610595703,
      "step": 43377
    },
    {
      "epoch": 0.000264752197265625,
      "step": 43377,
      "training_step_time": 0.5448029041290283
    },
    {
      "epoch": 0.00026475830078125,
      "model_forward_time": 0.11531543731689453,
      "step": 43378
    },
    {
      "epoch": 0.00026475830078125,
      "step": 43378,
      "training_step_time": 0.3940715789794922
    },
    {
      "epoch": 0.000264764404296875,
      "model_forward_time": 0.1151123046875,
      "step": 43379
    },
    {
      "epoch": 0.000264764404296875,
      "step": 43379,
      "training_step_time": 0.4320197105407715
    },
    {
      "epoch": 0.0002647705078125,
      "grad_norm": 0.07701732218265533,
      "learning_rate": 1.9550998157129946e-05,
      "loss": 0.035,
      "step": 43380
    },
    {
      "epoch": 0.0002647705078125,
      "model_forward_time": 0.11537337303161621,
      "step": 43380
    },
    {
      "epoch": 0.0002647705078125,
      "step": 43380,
      "training_step_time": 0.4040334224700928
    },
    {
      "epoch": 0.000264776611328125,
      "model_forward_time": 0.11512422561645508,
      "step": 43381
    },
    {
      "epoch": 0.000264776611328125,
      "step": 43381,
      "training_step_time": 0.40175795555114746
    },
    {
      "epoch": 0.00026478271484375,
      "model_forward_time": 0.11461234092712402,
      "step": 43382
    },
    {
      "epoch": 0.00026478271484375,
      "step": 43382,
      "training_step_time": 0.3912475109100342
    },
    {
      "epoch": 0.000264788818359375,
      "model_forward_time": 0.1149294376373291,
      "step": 43383
    },
    {
      "epoch": 0.000264788818359375,
      "step": 43383,
      "training_step_time": 0.48618221282958984
    },
    {
      "epoch": 0.000264794921875,
      "model_forward_time": 0.11462092399597168,
      "step": 43384
    },
    {
      "epoch": 0.000264794921875,
      "step": 43384,
      "training_step_time": 0.393970251083374
    },
    {
      "epoch": 0.000264801025390625,
      "model_forward_time": 0.1156611442565918,
      "step": 43385
    },
    {
      "epoch": 0.000264801025390625,
      "step": 43385,
      "training_step_time": 0.4058341979980469
    },
    {
      "epoch": 0.00026480712890625,
      "model_forward_time": 0.11503148078918457,
      "step": 43386
    },
    {
      "epoch": 0.00026480712890625,
      "step": 43386,
      "training_step_time": 0.39591407775878906
    },
    {
      "epoch": 0.000264813232421875,
      "model_forward_time": 0.11527562141418457,
      "step": 43387
    },
    {
      "epoch": 0.000264813232421875,
      "step": 43387,
      "training_step_time": 0.5011153221130371
    },
    {
      "epoch": 0.0002648193359375,
      "model_forward_time": 0.11530661582946777,
      "step": 43388
    },
    {
      "epoch": 0.0002648193359375,
      "step": 43388,
      "training_step_time": 0.41187310218811035
    },
    {
      "epoch": 0.000264825439453125,
      "model_forward_time": 0.11696553230285645,
      "step": 43389
    },
    {
      "epoch": 0.000264825439453125,
      "step": 43389,
      "training_step_time": 0.4820675849914551
    },
    {
      "epoch": 0.00026483154296875,
      "grad_norm": 0.1431189775466919,
      "learning_rate": 1.9529144310294023e-05,
      "loss": 0.0417,
      "step": 43390
    },
    {
      "epoch": 0.00026483154296875,
      "model_forward_time": 0.11461186408996582,
      "step": 43390
    },
    {
      "epoch": 0.00026483154296875,
      "step": 43390,
      "training_step_time": 0.38414502143859863
    },
    {
      "epoch": 0.000264837646484375,
      "model_forward_time": 0.11523962020874023,
      "step": 43391
    },
    {
      "epoch": 0.000264837646484375,
      "step": 43391,
      "training_step_time": 0.36546969413757324
    },
    {
      "epoch": 0.00026484375,
      "model_forward_time": 0.11561465263366699,
      "step": 43392
    },
    {
      "epoch": 0.00026484375,
      "step": 43392,
      "training_step_time": 0.454756498336792
    },
    {
      "epoch": 0.000264849853515625,
      "model_forward_time": 0.11455965042114258,
      "step": 43393
    },
    {
      "epoch": 0.000264849853515625,
      "step": 43393,
      "training_step_time": 0.3994104862213135
    },
    {
      "epoch": 0.00026485595703125,
      "model_forward_time": 0.11523151397705078,
      "step": 43394
    },
    {
      "epoch": 0.00026485595703125,
      "step": 43394,
      "training_step_time": 0.4107391834259033
    },
    {
      "epoch": 0.000264862060546875,
      "model_forward_time": 0.11478638648986816,
      "step": 43395
    },
    {
      "epoch": 0.000264862060546875,
      "step": 43395,
      "training_step_time": 0.46085691452026367
    },
    {
      "epoch": 0.0002648681640625,
      "model_forward_time": 0.11528921127319336,
      "step": 43396
    },
    {
      "epoch": 0.0002648681640625,
      "step": 43396,
      "training_step_time": 0.40314483642578125
    },
    {
      "epoch": 0.000264874267578125,
      "model_forward_time": 0.11511445045471191,
      "step": 43397
    },
    {
      "epoch": 0.000264874267578125,
      "step": 43397,
      "training_step_time": 0.39180612564086914
    },
    {
      "epoch": 0.00026488037109375,
      "model_forward_time": 0.11528277397155762,
      "step": 43398
    },
    {
      "epoch": 0.00026488037109375,
      "step": 43398,
      "training_step_time": 0.389265775680542
    },
    {
      "epoch": 0.000264886474609375,
      "model_forward_time": 0.11490702629089355,
      "step": 43399
    },
    {
      "epoch": 0.000264886474609375,
      "step": 43399,
      "training_step_time": 0.3883185386657715
    },
    {
      "epoch": 0.000264892578125,
      "grad_norm": 0.1212235689163208,
      "learning_rate": 1.950729971969955e-05,
      "loss": 0.0385,
      "step": 43400
    },
    {
      "epoch": 0.000264892578125,
      "model_forward_time": 0.11531472206115723,
      "step": 43400
    },
    {
      "epoch": 0.000264892578125,
      "step": 43400,
      "training_step_time": 0.47693300247192383
    },
    {
      "epoch": 0.000264898681640625,
      "model_forward_time": 0.11511397361755371,
      "step": 43401
    },
    {
      "epoch": 0.000264898681640625,
      "step": 43401,
      "training_step_time": 0.5160458087921143
    },
    {
      "epoch": 0.00026490478515625,
      "model_forward_time": 0.11533904075622559,
      "step": 43402
    },
    {
      "epoch": 0.00026490478515625,
      "step": 43402,
      "training_step_time": 0.38243985176086426
    },
    {
      "epoch": 0.000264910888671875,
      "model_forward_time": 0.11468768119812012,
      "step": 43403
    },
    {
      "epoch": 0.000264910888671875,
      "step": 43403,
      "training_step_time": 0.46163129806518555
    },
    {
      "epoch": 0.0002649169921875,
      "model_forward_time": 0.11529040336608887,
      "step": 43404
    },
    {
      "epoch": 0.0002649169921875,
      "step": 43404,
      "training_step_time": 0.3886690139770508
    },
    {
      "epoch": 0.000264923095703125,
      "model_forward_time": 0.11513137817382812,
      "step": 43405
    },
    {
      "epoch": 0.000264923095703125,
      "step": 43405,
      "training_step_time": 0.3932309150695801
    },
    {
      "epoch": 0.00026492919921875,
      "model_forward_time": 0.1150364875793457,
      "step": 43406
    },
    {
      "epoch": 0.00026492919921875,
      "step": 43406,
      "training_step_time": 0.49233293533325195
    },
    {
      "epoch": 0.000264935302734375,
      "model_forward_time": 0.11502790451049805,
      "step": 43407
    },
    {
      "epoch": 0.000264935302734375,
      "step": 43407,
      "training_step_time": 0.5041027069091797
    },
    {
      "epoch": 0.00026494140625,
      "model_forward_time": 0.11483621597290039,
      "step": 43408
    },
    {
      "epoch": 0.00026494140625,
      "step": 43408,
      "training_step_time": 0.4994521141052246
    },
    {
      "epoch": 0.000264947509765625,
      "model_forward_time": 0.11480975151062012,
      "step": 43409
    },
    {
      "epoch": 0.000264947509765625,
      "step": 43409,
      "training_step_time": 0.6013689041137695
    },
    {
      "epoch": 0.00026495361328125,
      "grad_norm": 0.07015363872051239,
      "learning_rate": 1.9485464391982284e-05,
      "loss": 0.0341,
      "step": 43410
    },
    {
      "epoch": 0.00026495361328125,
      "model_forward_time": 0.11484217643737793,
      "step": 43410
    },
    {
      "epoch": 0.00026495361328125,
      "step": 43410,
      "training_step_time": 0.38973522186279297
    },
    {
      "epoch": 0.000264959716796875,
      "model_forward_time": 0.11445474624633789,
      "step": 43411
    },
    {
      "epoch": 0.000264959716796875,
      "step": 43411,
      "training_step_time": 0.3970358371734619
    },
    {
      "epoch": 0.0002649658203125,
      "model_forward_time": 0.11586165428161621,
      "step": 43412
    },
    {
      "epoch": 0.0002649658203125,
      "step": 43412,
      "training_step_time": 0.38467836380004883
    },
    {
      "epoch": 0.000264971923828125,
      "model_forward_time": 0.1142728328704834,
      "step": 43413
    },
    {
      "epoch": 0.000264971923828125,
      "step": 43413,
      "training_step_time": 0.4581458568572998
    },
    {
      "epoch": 0.00026497802734375,
      "model_forward_time": 0.11493563652038574,
      "step": 43414
    },
    {
      "epoch": 0.00026497802734375,
      "step": 43414,
      "training_step_time": 0.4197554588317871
    },
    {
      "epoch": 0.000264984130859375,
      "model_forward_time": 0.11449313163757324,
      "step": 43415
    },
    {
      "epoch": 0.000264984130859375,
      "step": 43415,
      "training_step_time": 0.6003744602203369
    },
    {
      "epoch": 0.000264990234375,
      "model_forward_time": 0.1153717041015625,
      "step": 43416
    },
    {
      "epoch": 0.000264990234375,
      "step": 43416,
      "training_step_time": 0.3950991630554199
    },
    {
      "epoch": 0.000264996337890625,
      "model_forward_time": 0.1148378849029541,
      "step": 43417
    },
    {
      "epoch": 0.000264996337890625,
      "step": 43417,
      "training_step_time": 0.3964998722076416
    },
    {
      "epoch": 0.00026500244140625,
      "model_forward_time": 0.11455917358398438,
      "step": 43418
    },
    {
      "epoch": 0.00026500244140625,
      "step": 43418,
      "training_step_time": 0.3968822956085205
    },
    {
      "epoch": 0.000265008544921875,
      "model_forward_time": 0.11621546745300293,
      "step": 43419
    },
    {
      "epoch": 0.000265008544921875,
      "step": 43419,
      "training_step_time": 0.3801436424255371
    },
    {
      "epoch": 0.0002650146484375,
      "grad_norm": 0.09654732793569565,
      "learning_rate": 1.9463638333775276e-05,
      "loss": 0.0387,
      "step": 43420
    },
    {
      "epoch": 0.0002650146484375,
      "model_forward_time": 0.11574149131774902,
      "step": 43420
    },
    {
      "epoch": 0.0002650146484375,
      "step": 43420,
      "training_step_time": 0.4341237545013428
    },
    {
      "epoch": 0.000265020751953125,
      "model_forward_time": 0.11574554443359375,
      "step": 43421
    },
    {
      "epoch": 0.000265020751953125,
      "step": 43421,
      "training_step_time": 0.6356885433197021
    },
    {
      "epoch": 0.00026502685546875,
      "model_forward_time": 0.11528754234313965,
      "step": 43422
    },
    {
      "epoch": 0.00026502685546875,
      "step": 43422,
      "training_step_time": 0.40720248222351074
    },
    {
      "epoch": 0.000265032958984375,
      "model_forward_time": 0.11509251594543457,
      "step": 43423
    },
    {
      "epoch": 0.000265032958984375,
      "step": 43423,
      "training_step_time": 0.39347171783447266
    },
    {
      "epoch": 0.0002650390625,
      "model_forward_time": 0.11478185653686523,
      "step": 43424
    },
    {
      "epoch": 0.0002650390625,
      "step": 43424,
      "training_step_time": 0.4015495777130127
    },
    {
      "epoch": 0.000265045166015625,
      "model_forward_time": 0.11475968360900879,
      "step": 43425
    },
    {
      "epoch": 0.000265045166015625,
      "step": 43425,
      "training_step_time": 0.38524913787841797
    },
    {
      "epoch": 0.00026505126953125,
      "model_forward_time": 0.1149759292602539,
      "step": 43426
    },
    {
      "epoch": 0.00026505126953125,
      "step": 43426,
      "training_step_time": 0.389725923538208
    },
    {
      "epoch": 0.000265057373046875,
      "model_forward_time": 0.11612820625305176,
      "step": 43427
    },
    {
      "epoch": 0.000265057373046875,
      "step": 43427,
      "training_step_time": 0.5643472671508789
    },
    {
      "epoch": 0.0002650634765625,
      "model_forward_time": 0.11725139617919922,
      "step": 43428
    },
    {
      "epoch": 0.0002650634765625,
      "step": 43428,
      "training_step_time": 0.45283007621765137
    },
    {
      "epoch": 0.000265069580078125,
      "model_forward_time": 0.11636495590209961,
      "step": 43429
    },
    {
      "epoch": 0.000265069580078125,
      "step": 43429,
      "training_step_time": 0.5002250671386719
    },
    {
      "epoch": 0.00026507568359375,
      "grad_norm": 0.08898117393255234,
      "learning_rate": 1.944182155170864e-05,
      "loss": 0.0398,
      "step": 43430
    },
    {
      "epoch": 0.00026507568359375,
      "model_forward_time": 0.11472129821777344,
      "step": 43430
    },
    {
      "epoch": 0.00026507568359375,
      "step": 43430,
      "training_step_time": 0.39589476585388184
    },
    {
      "epoch": 0.000265081787109375,
      "model_forward_time": 0.11451101303100586,
      "step": 43431
    },
    {
      "epoch": 0.000265081787109375,
      "step": 43431,
      "training_step_time": 0.3942131996154785
    },
    {
      "epoch": 0.000265087890625,
      "model_forward_time": 0.11506319046020508,
      "step": 43432
    },
    {
      "epoch": 0.000265087890625,
      "step": 43432,
      "training_step_time": 0.3878173828125
    },
    {
      "epoch": 0.000265093994140625,
      "model_forward_time": 0.11557292938232422,
      "step": 43433
    },
    {
      "epoch": 0.000265093994140625,
      "step": 43433,
      "training_step_time": 0.5130887031555176
    },
    {
      "epoch": 0.00026510009765625,
      "model_forward_time": 0.11454510688781738,
      "step": 43434
    },
    {
      "epoch": 0.00026510009765625,
      "step": 43434,
      "training_step_time": 0.49936461448669434
    },
    {
      "epoch": 0.000265106201171875,
      "model_forward_time": 0.11555719375610352,
      "step": 43435
    },
    {
      "epoch": 0.000265106201171875,
      "step": 43435,
      "training_step_time": 0.4360020160675049
    },
    {
      "epoch": 0.0002651123046875,
      "model_forward_time": 0.11467504501342773,
      "step": 43436
    },
    {
      "epoch": 0.0002651123046875,
      "step": 43436,
      "training_step_time": 0.4123189449310303
    },
    {
      "epoch": 0.000265118408203125,
      "model_forward_time": 0.11444091796875,
      "step": 43437
    },
    {
      "epoch": 0.000265118408203125,
      "step": 43437,
      "training_step_time": 0.39570116996765137
    },
    {
      "epoch": 0.00026512451171875,
      "model_forward_time": 0.1149592399597168,
      "step": 43438
    },
    {
      "epoch": 0.00026512451171875,
      "step": 43438,
      "training_step_time": 0.392472505569458
    },
    {
      "epoch": 0.000265130615234375,
      "model_forward_time": 0.11440587043762207,
      "step": 43439
    },
    {
      "epoch": 0.000265130615234375,
      "step": 43439,
      "training_step_time": 0.9681110382080078
    },
    {
      "epoch": 0.00026513671875,
      "grad_norm": 0.10388456284999847,
      "learning_rate": 1.942001405240979e-05,
      "loss": 0.0397,
      "step": 43440
    },
    {
      "epoch": 0.00026513671875,
      "model_forward_time": 0.11410403251647949,
      "step": 43440
    },
    {
      "epoch": 0.00026513671875,
      "step": 43440,
      "training_step_time": 0.4485762119293213
    },
    {
      "epoch": 0.000265142822265625,
      "model_forward_time": 0.11471319198608398,
      "step": 43441
    },
    {
      "epoch": 0.000265142822265625,
      "step": 43441,
      "training_step_time": 0.39371204376220703
    },
    {
      "epoch": 0.00026514892578125,
      "model_forward_time": 0.11374878883361816,
      "step": 43442
    },
    {
      "epoch": 0.00026514892578125,
      "step": 43442,
      "training_step_time": 0.46089625358581543
    },
    {
      "epoch": 0.000265155029296875,
      "model_forward_time": 0.11467361450195312,
      "step": 43443
    },
    {
      "epoch": 0.000265155029296875,
      "step": 43443,
      "training_step_time": 0.378096342086792
    },
    {
      "epoch": 0.0002651611328125,
      "model_forward_time": 0.11515140533447266,
      "step": 43444
    },
    {
      "epoch": 0.0002651611328125,
      "step": 43444,
      "training_step_time": 0.3949453830718994
    },
    {
      "epoch": 0.000265167236328125,
      "model_forward_time": 0.11587119102478027,
      "step": 43445
    },
    {
      "epoch": 0.000265167236328125,
      "step": 43445,
      "training_step_time": 0.6904275417327881
    },
    {
      "epoch": 0.00026517333984375,
      "model_forward_time": 0.11451578140258789,
      "step": 43446
    },
    {
      "epoch": 0.00026517333984375,
      "step": 43446,
      "training_step_time": 0.41313886642456055
    },
    {
      "epoch": 0.000265179443359375,
      "model_forward_time": 0.11444211006164551,
      "step": 43447
    },
    {
      "epoch": 0.000265179443359375,
      "step": 43447,
      "training_step_time": 0.46257805824279785
    },
    {
      "epoch": 0.000265185546875,
      "model_forward_time": 0.11497259140014648,
      "step": 43448
    },
    {
      "epoch": 0.000265185546875,
      "step": 43448,
      "training_step_time": 0.4068107604980469
    },
    {
      "epoch": 0.000265191650390625,
      "model_forward_time": 0.11407732963562012,
      "step": 43449
    },
    {
      "epoch": 0.000265191650390625,
      "step": 43449,
      "training_step_time": 0.4677705764770508
    },
    {
      "epoch": 0.00026519775390625,
      "grad_norm": 0.12222223728895187,
      "learning_rate": 1.939821584250326e-05,
      "loss": 0.0409,
      "step": 43450
    },
    {
      "epoch": 0.00026519775390625,
      "model_forward_time": 0.1146383285522461,
      "step": 43450
    },
    {
      "epoch": 0.00026519775390625,
      "step": 43450,
      "training_step_time": 0.3959941864013672
    },
    {
      "epoch": 0.000265203857421875,
      "model_forward_time": 0.11477494239807129,
      "step": 43451
    },
    {
      "epoch": 0.000265203857421875,
      "step": 43451,
      "training_step_time": 0.7303030490875244
    },
    {
      "epoch": 0.0002652099609375,
      "model_forward_time": 0.11404299736022949,
      "step": 43452
    },
    {
      "epoch": 0.0002652099609375,
      "step": 43452,
      "training_step_time": 0.3850524425506592
    },
    {
      "epoch": 0.000265216064453125,
      "model_forward_time": 0.11468195915222168,
      "step": 43453
    },
    {
      "epoch": 0.000265216064453125,
      "step": 43453,
      "training_step_time": 0.42082858085632324
    },
    {
      "epoch": 0.00026522216796875,
      "model_forward_time": 0.1141214370727539,
      "step": 43454
    },
    {
      "epoch": 0.00026522216796875,
      "step": 43454,
      "training_step_time": 0.40559840202331543
    },
    {
      "epoch": 0.000265228271484375,
      "model_forward_time": 0.11423134803771973,
      "step": 43455
    },
    {
      "epoch": 0.000265228271484375,
      "step": 43455,
      "training_step_time": 0.3954792022705078
    },
    {
      "epoch": 0.000265234375,
      "model_forward_time": 0.11416172981262207,
      "step": 43456
    },
    {
      "epoch": 0.000265234375,
      "step": 43456,
      "training_step_time": 0.3984205722808838
    },
    {
      "epoch": 0.000265240478515625,
      "model_forward_time": 0.11488151550292969,
      "step": 43457
    },
    {
      "epoch": 0.000265240478515625,
      "step": 43457,
      "training_step_time": 0.6259891986846924
    },
    {
      "epoch": 0.00026524658203125,
      "model_forward_time": 0.11476922035217285,
      "step": 43458
    },
    {
      "epoch": 0.00026524658203125,
      "step": 43458,
      "training_step_time": 0.4014780521392822
    },
    {
      "epoch": 0.000265252685546875,
      "model_forward_time": 0.11516261100769043,
      "step": 43459
    },
    {
      "epoch": 0.000265252685546875,
      "step": 43459,
      "training_step_time": 0.46824121475219727
    },
    {
      "epoch": 0.0002652587890625,
      "grad_norm": 0.1270478516817093,
      "learning_rate": 1.937642692861076e-05,
      "loss": 0.0393,
      "step": 43460
    },
    {
      "epoch": 0.0002652587890625,
      "model_forward_time": 0.11457371711730957,
      "step": 43460
    },
    {
      "epoch": 0.0002652587890625,
      "step": 43460,
      "training_step_time": 0.4184443950653076
    },
    {
      "epoch": 0.000265264892578125,
      "model_forward_time": 0.11520838737487793,
      "step": 43461
    },
    {
      "epoch": 0.000265264892578125,
      "step": 43461,
      "training_step_time": 0.48187708854675293
    },
    {
      "epoch": 0.00026527099609375,
      "model_forward_time": 0.11472415924072266,
      "step": 43462
    },
    {
      "epoch": 0.00026527099609375,
      "step": 43462,
      "training_step_time": 0.4513986110687256
    },
    {
      "epoch": 0.000265277099609375,
      "model_forward_time": 0.11547374725341797,
      "step": 43463
    },
    {
      "epoch": 0.000265277099609375,
      "step": 43463,
      "training_step_time": 0.3965110778808594
    },
    {
      "epoch": 0.000265283203125,
      "model_forward_time": 0.11415958404541016,
      "step": 43464
    },
    {
      "epoch": 0.000265283203125,
      "step": 43464,
      "training_step_time": 0.3963611125946045
    },
    {
      "epoch": 0.000265289306640625,
      "model_forward_time": 0.11504030227661133,
      "step": 43465
    },
    {
      "epoch": 0.000265289306640625,
      "step": 43465,
      "training_step_time": 0.397580623626709
    },
    {
      "epoch": 0.00026529541015625,
      "model_forward_time": 0.1149284839630127,
      "step": 43466
    },
    {
      "epoch": 0.00026529541015625,
      "step": 43466,
      "training_step_time": 0.40026116371154785
    },
    {
      "epoch": 0.000265301513671875,
      "model_forward_time": 0.11578083038330078,
      "step": 43467
    },
    {
      "epoch": 0.000265301513671875,
      "step": 43467,
      "training_step_time": 0.41428375244140625
    },
    {
      "epoch": 0.0002653076171875,
      "model_forward_time": 0.11464929580688477,
      "step": 43468
    },
    {
      "epoch": 0.0002653076171875,
      "step": 43468,
      "training_step_time": 0.4278099536895752
    },
    {
      "epoch": 0.000265313720703125,
      "model_forward_time": 0.11592411994934082,
      "step": 43469
    },
    {
      "epoch": 0.000265313720703125,
      "step": 43469,
      "training_step_time": 0.5511569976806641
    },
    {
      "epoch": 0.00026531982421875,
      "grad_norm": 0.11276349425315857,
      "learning_rate": 1.9354647317351188e-05,
      "loss": 0.0388,
      "step": 43470
    },
    {
      "epoch": 0.00026531982421875,
      "model_forward_time": 0.11504936218261719,
      "step": 43470
    },
    {
      "epoch": 0.00026531982421875,
      "step": 43470,
      "training_step_time": 0.39791417121887207
    },
    {
      "epoch": 0.000265325927734375,
      "model_forward_time": 0.11535978317260742,
      "step": 43471
    },
    {
      "epoch": 0.000265325927734375,
      "step": 43471,
      "training_step_time": 0.39722442626953125
    },
    {
      "epoch": 0.00026533203125,
      "model_forward_time": 0.12967896461486816,
      "step": 43472
    },
    {
      "epoch": 0.00026533203125,
      "step": 43472,
      "training_step_time": 0.4217972755432129
    },
    {
      "epoch": 0.000265338134765625,
      "model_forward_time": 0.11517620086669922,
      "step": 43473
    },
    {
      "epoch": 0.000265338134765625,
      "step": 43473,
      "training_step_time": 0.4176614284515381
    },
    {
      "epoch": 0.00026534423828125,
      "model_forward_time": 0.11510133743286133,
      "step": 43474
    },
    {
      "epoch": 0.00026534423828125,
      "step": 43474,
      "training_step_time": 0.4653055667877197
    },
    {
      "epoch": 0.000265350341796875,
      "model_forward_time": 0.11527705192565918,
      "step": 43475
    },
    {
      "epoch": 0.000265350341796875,
      "step": 43475,
      "training_step_time": 0.5039081573486328
    },
    {
      "epoch": 0.0002653564453125,
      "model_forward_time": 0.114959716796875,
      "step": 43476
    },
    {
      "epoch": 0.0002653564453125,
      "step": 43476,
      "training_step_time": 0.4056355953216553
    },
    {
      "epoch": 0.000265362548828125,
      "model_forward_time": 0.11546826362609863,
      "step": 43477
    },
    {
      "epoch": 0.000265362548828125,
      "step": 43477,
      "training_step_time": 0.3934469223022461
    },
    {
      "epoch": 0.00026536865234375,
      "model_forward_time": 0.11486935615539551,
      "step": 43478
    },
    {
      "epoch": 0.00026536865234375,
      "step": 43478,
      "training_step_time": 0.39617443084716797
    },
    {
      "epoch": 0.000265374755859375,
      "model_forward_time": 0.11499166488647461,
      "step": 43479
    },
    {
      "epoch": 0.000265374755859375,
      "step": 43479,
      "training_step_time": 0.391721248626709
    },
    {
      "epoch": 0.000265380859375,
      "grad_norm": 0.10155145823955536,
      "learning_rate": 1.93328770153406e-05,
      "loss": 0.0369,
      "step": 43480
    },
    {
      "epoch": 0.000265380859375,
      "model_forward_time": 0.1154935359954834,
      "step": 43480
    },
    {
      "epoch": 0.000265380859375,
      "step": 43480,
      "training_step_time": 0.39943981170654297
    },
    {
      "epoch": 0.000265386962890625,
      "model_forward_time": 0.11582517623901367,
      "step": 43481
    },
    {
      "epoch": 0.000265386962890625,
      "step": 43481,
      "training_step_time": 0.5548017024993896
    },
    {
      "epoch": 0.00026539306640625,
      "model_forward_time": 0.11472129821777344,
      "step": 43482
    },
    {
      "epoch": 0.00026539306640625,
      "step": 43482,
      "training_step_time": 0.4261782169342041
    },
    {
      "epoch": 0.000265399169921875,
      "model_forward_time": 0.11531877517700195,
      "step": 43483
    },
    {
      "epoch": 0.000265399169921875,
      "step": 43483,
      "training_step_time": 0.41492486000061035
    },
    {
      "epoch": 0.0002654052734375,
      "model_forward_time": 0.11533451080322266,
      "step": 43484
    },
    {
      "epoch": 0.0002654052734375,
      "step": 43484,
      "training_step_time": 0.3955111503601074
    },
    {
      "epoch": 0.000265411376953125,
      "model_forward_time": 0.1153717041015625,
      "step": 43485
    },
    {
      "epoch": 0.000265411376953125,
      "step": 43485,
      "training_step_time": 0.391221284866333
    },
    {
      "epoch": 0.00026541748046875,
      "model_forward_time": 0.11672449111938477,
      "step": 43486
    },
    {
      "epoch": 0.00026541748046875,
      "step": 43486,
      "training_step_time": 0.4071159362792969
    },
    {
      "epoch": 0.000265423583984375,
      "model_forward_time": 0.11563754081726074,
      "step": 43487
    },
    {
      "epoch": 0.000265423583984375,
      "step": 43487,
      "training_step_time": 0.4228355884552002
    },
    {
      "epoch": 0.0002654296875,
      "model_forward_time": 0.11476969718933105,
      "step": 43488
    },
    {
      "epoch": 0.0002654296875,
      "step": 43488,
      "training_step_time": 0.43743181228637695
    },
    {
      "epoch": 0.000265435791015625,
      "model_forward_time": 0.11535763740539551,
      "step": 43489
    },
    {
      "epoch": 0.000265435791015625,
      "step": 43489,
      "training_step_time": 0.439666748046875
    },
    {
      "epoch": 0.00026544189453125,
      "grad_norm": 0.13208232820034027,
      "learning_rate": 1.9311116029192278e-05,
      "loss": 0.0445,
      "step": 43490
    },
    {
      "epoch": 0.00026544189453125,
      "model_forward_time": 0.115997314453125,
      "step": 43490
    },
    {
      "epoch": 0.00026544189453125,
      "step": 43490,
      "training_step_time": 0.4333484172821045
    },
    {
      "epoch": 0.000265447998046875,
      "model_forward_time": 0.11469149589538574,
      "step": 43491
    },
    {
      "epoch": 0.000265447998046875,
      "step": 43491,
      "training_step_time": 0.47256016731262207
    },
    {
      "epoch": 0.0002654541015625,
      "model_forward_time": 0.11679482460021973,
      "step": 43492
    },
    {
      "epoch": 0.0002654541015625,
      "step": 43492,
      "training_step_time": 0.3857405185699463
    },
    {
      "epoch": 0.000265460205078125,
      "model_forward_time": 0.11496424674987793,
      "step": 43493
    },
    {
      "epoch": 0.000265460205078125,
      "step": 43493,
      "training_step_time": 0.6484620571136475
    },
    {
      "epoch": 0.00026546630859375,
      "model_forward_time": 0.11453485488891602,
      "step": 43494
    },
    {
      "epoch": 0.00026546630859375,
      "step": 43494,
      "training_step_time": 0.3869011402130127
    },
    {
      "epoch": 0.000265472412109375,
      "model_forward_time": 0.1146233081817627,
      "step": 43495
    },
    {
      "epoch": 0.000265472412109375,
      "step": 43495,
      "training_step_time": 0.4548149108886719
    },
    {
      "epoch": 0.000265478515625,
      "model_forward_time": 0.11501741409301758,
      "step": 43496
    },
    {
      "epoch": 0.000265478515625,
      "step": 43496,
      "training_step_time": 0.4401991367340088
    },
    {
      "epoch": 0.000265484619140625,
      "model_forward_time": 0.11433267593383789,
      "step": 43497
    },
    {
      "epoch": 0.000265484619140625,
      "step": 43497,
      "training_step_time": 0.3973250389099121
    },
    {
      "epoch": 0.00026549072265625,
      "model_forward_time": 0.11457490921020508,
      "step": 43498
    },
    {
      "epoch": 0.00026549072265625,
      "step": 43498,
      "training_step_time": 0.39256882667541504
    },
    {
      "epoch": 0.000265496826171875,
      "model_forward_time": 0.11543941497802734,
      "step": 43499
    },
    {
      "epoch": 0.000265496826171875,
      "step": 43499,
      "training_step_time": 0.3967173099517822
    },
    {
      "epoch": 0.0002655029296875,
      "grad_norm": 0.13827399909496307,
      "learning_rate": 1.928936436551661e-05,
      "loss": 0.0394,
      "step": 43500
    },
    {
      "epoch": 0.0002655029296875,
      "model_forward_time": 0.11542797088623047,
      "step": 43500
    },
    {
      "epoch": 0.0002655029296875,
      "step": 43500,
      "training_step_time": 0.41748881340026855
    },
    {
      "epoch": 0.000265509033203125,
      "model_forward_time": 0.11476898193359375,
      "step": 43501
    },
    {
      "epoch": 0.000265509033203125,
      "step": 43501,
      "training_step_time": 0.3959341049194336
    },
    {
      "epoch": 0.00026551513671875,
      "model_forward_time": 0.11574292182922363,
      "step": 43502
    },
    {
      "epoch": 0.00026551513671875,
      "step": 43502,
      "training_step_time": 0.4232299327850342
    },
    {
      "epoch": 0.000265521240234375,
      "model_forward_time": 0.11496090888977051,
      "step": 43503
    },
    {
      "epoch": 0.000265521240234375,
      "step": 43503,
      "training_step_time": 0.39650964736938477
    },
    {
      "epoch": 0.00026552734375,
      "model_forward_time": 0.11534547805786133,
      "step": 43504
    },
    {
      "epoch": 0.00026552734375,
      "step": 43504,
      "training_step_time": 0.42351794242858887
    },
    {
      "epoch": 0.000265533447265625,
      "model_forward_time": 0.1152040958404541,
      "step": 43505
    },
    {
      "epoch": 0.000265533447265625,
      "step": 43505,
      "training_step_time": 0.6704702377319336
    },
    {
      "epoch": 0.00026553955078125,
      "model_forward_time": 0.11446785926818848,
      "step": 43506
    },
    {
      "epoch": 0.00026553955078125,
      "step": 43506,
      "training_step_time": 0.39336705207824707
    },
    {
      "epoch": 0.000265545654296875,
      "model_forward_time": 0.11481308937072754,
      "step": 43507
    },
    {
      "epoch": 0.000265545654296875,
      "step": 43507,
      "training_step_time": 0.39572787284851074
    },
    {
      "epoch": 0.0002655517578125,
      "model_forward_time": 0.11433720588684082,
      "step": 43508
    },
    {
      "epoch": 0.0002655517578125,
      "step": 43508,
      "training_step_time": 0.38808131217956543
    },
    {
      "epoch": 0.000265557861328125,
      "model_forward_time": 0.11560463905334473,
      "step": 43509
    },
    {
      "epoch": 0.000265557861328125,
      "step": 43509,
      "training_step_time": 0.40315842628479004
    },
    {
      "epoch": 0.00026556396484375,
      "grad_norm": 0.09799941629171371,
      "learning_rate": 1.9267622030921184e-05,
      "loss": 0.0365,
      "step": 43510
    },
    {
      "epoch": 0.00026556396484375,
      "model_forward_time": 0.11480903625488281,
      "step": 43510
    },
    {
      "epoch": 0.00026556396484375,
      "step": 43510,
      "training_step_time": 0.42452406883239746
    },
    {
      "epoch": 0.000265570068359375,
      "model_forward_time": 0.11498641967773438,
      "step": 43511
    },
    {
      "epoch": 0.000265570068359375,
      "step": 43511,
      "training_step_time": 1.1894772052764893
    },
    {
      "epoch": 0.000265576171875,
      "model_forward_time": 0.11342191696166992,
      "step": 43512
    },
    {
      "epoch": 0.000265576171875,
      "step": 43512,
      "training_step_time": 0.3963046073913574
    },
    {
      "epoch": 0.000265582275390625,
      "model_forward_time": 0.11361932754516602,
      "step": 43513
    },
    {
      "epoch": 0.000265582275390625,
      "step": 43513,
      "training_step_time": 0.3879096508026123
    },
    {
      "epoch": 0.00026558837890625,
      "model_forward_time": 0.11398100852966309,
      "step": 43514
    },
    {
      "epoch": 0.00026558837890625,
      "step": 43514,
      "training_step_time": 0.38649868965148926
    },
    {
      "epoch": 0.000265594482421875,
      "model_forward_time": 0.11366724967956543,
      "step": 43515
    },
    {
      "epoch": 0.000265594482421875,
      "step": 43515,
      "training_step_time": 0.385866641998291
    },
    {
      "epoch": 0.0002656005859375,
      "model_forward_time": 0.11425566673278809,
      "step": 43516
    },
    {
      "epoch": 0.0002656005859375,
      "step": 43516,
      "training_step_time": 0.4092519283294678
    },
    {
      "epoch": 0.000265606689453125,
      "model_forward_time": 0.11607956886291504,
      "step": 43517
    },
    {
      "epoch": 0.000265606689453125,
      "step": 43517,
      "training_step_time": 0.6022045612335205
    },
    {
      "epoch": 0.00026561279296875,
      "model_forward_time": 0.11444830894470215,
      "step": 43518
    },
    {
      "epoch": 0.00026561279296875,
      "step": 43518,
      "training_step_time": 0.43329286575317383
    },
    {
      "epoch": 0.000265618896484375,
      "model_forward_time": 0.11443424224853516,
      "step": 43519
    },
    {
      "epoch": 0.000265618896484375,
      "step": 43519,
      "training_step_time": 0.38779306411743164
    },
    {
      "epoch": 0.000265625,
      "grad_norm": 0.08866847306489944,
      "learning_rate": 1.924588903201074e-05,
      "loss": 0.0397,
      "step": 43520
    },
    {
      "epoch": 0.000265625,
      "model_forward_time": 0.11461758613586426,
      "step": 43520
    },
    {
      "epoch": 0.000265625,
      "step": 43520,
      "training_step_time": 0.38695192337036133
    },
    {
      "epoch": 0.000265631103515625,
      "model_forward_time": 0.1148078441619873,
      "step": 43521
    },
    {
      "epoch": 0.000265631103515625,
      "step": 43521,
      "training_step_time": 0.4088616371154785
    },
    {
      "epoch": 0.00026563720703125,
      "model_forward_time": 0.11536526679992676,
      "step": 43522
    },
    {
      "epoch": 0.00026563720703125,
      "step": 43522,
      "training_step_time": 0.44983887672424316
    },
    {
      "epoch": 0.000265643310546875,
      "model_forward_time": 0.11513686180114746,
      "step": 43523
    },
    {
      "epoch": 0.000265643310546875,
      "step": 43523,
      "training_step_time": 0.7385697364807129
    },
    {
      "epoch": 0.0002656494140625,
      "model_forward_time": 0.11511993408203125,
      "step": 43524
    },
    {
      "epoch": 0.0002656494140625,
      "step": 43524,
      "training_step_time": 0.40972208976745605
    },
    {
      "epoch": 0.000265655517578125,
      "model_forward_time": 0.11475515365600586,
      "step": 43525
    },
    {
      "epoch": 0.000265655517578125,
      "step": 43525,
      "training_step_time": 0.37918972969055176
    },
    {
      "epoch": 0.00026566162109375,
      "model_forward_time": 0.11439871788024902,
      "step": 43526
    },
    {
      "epoch": 0.00026566162109375,
      "step": 43526,
      "training_step_time": 0.38591504096984863
    },
    {
      "epoch": 0.000265667724609375,
      "model_forward_time": 0.11477136611938477,
      "step": 43527
    },
    {
      "epoch": 0.000265667724609375,
      "step": 43527,
      "training_step_time": 0.40546178817749023
    },
    {
      "epoch": 0.000265673828125,
      "model_forward_time": 0.11467337608337402,
      "step": 43528
    },
    {
      "epoch": 0.000265673828125,
      "step": 43528,
      "training_step_time": 0.3868286609649658
    },
    {
      "epoch": 0.000265679931640625,
      "model_forward_time": 0.11554932594299316,
      "step": 43529
    },
    {
      "epoch": 0.000265679931640625,
      "step": 43529,
      "training_step_time": 0.6567268371582031
    },
    {
      "epoch": 0.00026568603515625,
      "grad_norm": 0.08195383101701736,
      "learning_rate": 1.9224165375387193e-05,
      "loss": 0.0361,
      "step": 43530
    },
    {
      "epoch": 0.00026568603515625,
      "model_forward_time": 0.1146235466003418,
      "step": 43530
    },
    {
      "epoch": 0.00026568603515625,
      "step": 43530,
      "training_step_time": 0.3993065357208252
    },
    {
      "epoch": 0.000265692138671875,
      "model_forward_time": 0.11520147323608398,
      "step": 43531
    },
    {
      "epoch": 0.000265692138671875,
      "step": 43531,
      "training_step_time": 0.3835885524749756
    },
    {
      "epoch": 0.0002656982421875,
      "model_forward_time": 0.11427640914916992,
      "step": 43532
    },
    {
      "epoch": 0.0002656982421875,
      "step": 43532,
      "training_step_time": 0.4285898208618164
    },
    {
      "epoch": 0.000265704345703125,
      "model_forward_time": 0.11464214324951172,
      "step": 43533
    },
    {
      "epoch": 0.000265704345703125,
      "step": 43533,
      "training_step_time": 0.3992915153503418
    },
    {
      "epoch": 0.00026571044921875,
      "model_forward_time": 0.11431670188903809,
      "step": 43534
    },
    {
      "epoch": 0.00026571044921875,
      "step": 43534,
      "training_step_time": 0.3890087604522705
    },
    {
      "epoch": 0.000265716552734375,
      "model_forward_time": 0.11580300331115723,
      "step": 43535
    },
    {
      "epoch": 0.000265716552734375,
      "step": 43535,
      "training_step_time": 0.7361950874328613
    },
    {
      "epoch": 0.00026572265625,
      "model_forward_time": 0.11521410942077637,
      "step": 43536
    },
    {
      "epoch": 0.00026572265625,
      "step": 43536,
      "training_step_time": 0.47578930854797363
    },
    {
      "epoch": 0.000265728759765625,
      "model_forward_time": 0.11464238166809082,
      "step": 43537
    },
    {
      "epoch": 0.000265728759765625,
      "step": 43537,
      "training_step_time": 0.4206125736236572
    },
    {
      "epoch": 0.00026573486328125,
      "model_forward_time": 0.1149444580078125,
      "step": 43538
    },
    {
      "epoch": 0.00026573486328125,
      "step": 43538,
      "training_step_time": 0.4086036682128906
    },
    {
      "epoch": 0.000265740966796875,
      "model_forward_time": 0.11465883255004883,
      "step": 43539
    },
    {
      "epoch": 0.000265740966796875,
      "step": 43539,
      "training_step_time": 0.3758811950683594
    },
    {
      "epoch": 0.0002657470703125,
      "grad_norm": 0.0883241519331932,
      "learning_rate": 1.920245106764962e-05,
      "loss": 0.0389,
      "step": 43540
    },
    {
      "epoch": 0.0002657470703125,
      "model_forward_time": 0.11443948745727539,
      "step": 43540
    },
    {
      "epoch": 0.0002657470703125,
      "step": 43540,
      "training_step_time": 0.3828012943267822
    },
    {
      "epoch": 0.000265753173828125,
      "model_forward_time": 0.11633443832397461,
      "step": 43541
    },
    {
      "epoch": 0.000265753173828125,
      "step": 43541,
      "training_step_time": 0.5020365715026855
    },
    {
      "epoch": 0.00026575927734375,
      "model_forward_time": 0.11498355865478516,
      "step": 43542
    },
    {
      "epoch": 0.00026575927734375,
      "step": 43542,
      "training_step_time": 0.3757205009460449
    },
    {
      "epoch": 0.000265765380859375,
      "model_forward_time": 0.11567521095275879,
      "step": 43543
    },
    {
      "epoch": 0.000265765380859375,
      "step": 43543,
      "training_step_time": 0.44547438621520996
    },
    {
      "epoch": 0.000265771484375,
      "model_forward_time": 0.11376476287841797,
      "step": 43544
    },
    {
      "epoch": 0.000265771484375,
      "step": 43544,
      "training_step_time": 0.43893861770629883
    },
    {
      "epoch": 0.000265777587890625,
      "model_forward_time": 0.11482763290405273,
      "step": 43545
    },
    {
      "epoch": 0.000265777587890625,
      "step": 43545,
      "training_step_time": 0.3923330307006836
    },
    {
      "epoch": 0.00026578369140625,
      "model_forward_time": 0.11628198623657227,
      "step": 43546
    },
    {
      "epoch": 0.00026578369140625,
      "step": 43546,
      "training_step_time": 0.38397955894470215
    },
    {
      "epoch": 0.000265789794921875,
      "model_forward_time": 0.11537051200866699,
      "step": 43547
    },
    {
      "epoch": 0.000265789794921875,
      "step": 43547,
      "training_step_time": 0.3884303569793701
    },
    {
      "epoch": 0.0002657958984375,
      "model_forward_time": 0.11537814140319824,
      "step": 43548
    },
    {
      "epoch": 0.0002657958984375,
      "step": 43548,
      "training_step_time": 0.39591073989868164
    },
    {
      "epoch": 0.000265802001953125,
      "model_forward_time": 0.11700987815856934,
      "step": 43549
    },
    {
      "epoch": 0.000265802001953125,
      "step": 43549,
      "training_step_time": 0.38228464126586914
    },
    {
      "epoch": 0.00026580810546875,
      "grad_norm": 0.11017156392335892,
      "learning_rate": 1.9180746115394243e-05,
      "loss": 0.037,
      "step": 43550
    },
    {
      "epoch": 0.00026580810546875,
      "model_forward_time": 0.1155087947845459,
      "step": 43550
    },
    {
      "epoch": 0.00026580810546875,
      "step": 43550,
      "training_step_time": 0.45420169830322266
    },
    {
      "epoch": 0.000265814208984375,
      "model_forward_time": 0.11634397506713867,
      "step": 43551
    },
    {
      "epoch": 0.000265814208984375,
      "step": 43551,
      "training_step_time": 0.4538137912750244
    },
    {
      "epoch": 0.0002658203125,
      "model_forward_time": 0.11614203453063965,
      "step": 43552
    },
    {
      "epoch": 0.0002658203125,
      "step": 43552,
      "training_step_time": 0.41770195960998535
    },
    {
      "epoch": 0.000265826416015625,
      "model_forward_time": 0.11570262908935547,
      "step": 43553
    },
    {
      "epoch": 0.000265826416015625,
      "step": 43553,
      "training_step_time": 0.3969721794128418
    },
    {
      "epoch": 0.00026583251953125,
      "model_forward_time": 0.11522746086120605,
      "step": 43554
    },
    {
      "epoch": 0.00026583251953125,
      "step": 43554,
      "training_step_time": 0.3989889621734619
    },
    {
      "epoch": 0.000265838623046875,
      "model_forward_time": 0.11470484733581543,
      "step": 43555
    },
    {
      "epoch": 0.000265838623046875,
      "step": 43555,
      "training_step_time": 0.377957820892334
    },
    {
      "epoch": 0.0002658447265625,
      "model_forward_time": 0.11541008949279785,
      "step": 43556
    },
    {
      "epoch": 0.0002658447265625,
      "step": 43556,
      "training_step_time": 0.39328575134277344
    },
    {
      "epoch": 0.000265850830078125,
      "model_forward_time": 0.11557888984680176,
      "step": 43557
    },
    {
      "epoch": 0.000265850830078125,
      "step": 43557,
      "training_step_time": 0.4115118980407715
    },
    {
      "epoch": 0.00026585693359375,
      "model_forward_time": 0.11546015739440918,
      "step": 43558
    },
    {
      "epoch": 0.00026585693359375,
      "step": 43558,
      "training_step_time": 0.47279953956604004
    },
    {
      "epoch": 0.000265863037109375,
      "model_forward_time": 0.11513710021972656,
      "step": 43559
    },
    {
      "epoch": 0.000265863037109375,
      "step": 43559,
      "training_step_time": 0.4612574577331543
    },
    {
      "epoch": 0.000265869140625,
      "grad_norm": 0.1033404991030693,
      "learning_rate": 1.9159050525214452e-05,
      "loss": 0.0391,
      "step": 43560
    },
    {
      "epoch": 0.000265869140625,
      "model_forward_time": 0.11551046371459961,
      "step": 43560
    },
    {
      "epoch": 0.000265869140625,
      "step": 43560,
      "training_step_time": 0.460770845413208
    },
    {
      "epoch": 0.000265875244140625,
      "model_forward_time": 0.1151738166809082,
      "step": 43561
    },
    {
      "epoch": 0.000265875244140625,
      "step": 43561,
      "training_step_time": 0.387340784072876
    },
    {
      "epoch": 0.00026588134765625,
      "model_forward_time": 0.11594486236572266,
      "step": 43562
    },
    {
      "epoch": 0.00026588134765625,
      "step": 43562,
      "training_step_time": 0.38866710662841797
    },
    {
      "epoch": 0.000265887451171875,
      "model_forward_time": 0.11499857902526855,
      "step": 43563
    },
    {
      "epoch": 0.000265887451171875,
      "step": 43563,
      "training_step_time": 0.3923671245574951
    },
    {
      "epoch": 0.0002658935546875,
      "model_forward_time": 0.11455941200256348,
      "step": 43564
    },
    {
      "epoch": 0.0002658935546875,
      "step": 43564,
      "training_step_time": 0.4810373783111572
    },
    {
      "epoch": 0.000265899658203125,
      "model_forward_time": 0.11556124687194824,
      "step": 43565
    },
    {
      "epoch": 0.000265899658203125,
      "step": 43565,
      "training_step_time": 0.44936180114746094
    },
    {
      "epoch": 0.00026590576171875,
      "model_forward_time": 0.11504340171813965,
      "step": 43566
    },
    {
      "epoch": 0.00026590576171875,
      "step": 43566,
      "training_step_time": 0.4538595676422119
    },
    {
      "epoch": 0.000265911865234375,
      "model_forward_time": 0.11462593078613281,
      "step": 43567
    },
    {
      "epoch": 0.000265911865234375,
      "step": 43567,
      "training_step_time": 0.37690138816833496
    },
    {
      "epoch": 0.00026591796875,
      "model_forward_time": 0.11533474922180176,
      "step": 43568
    },
    {
      "epoch": 0.00026591796875,
      "step": 43568,
      "training_step_time": 0.4077799320220947
    },
    {
      "epoch": 0.000265924072265625,
      "model_forward_time": 0.11532258987426758,
      "step": 43569
    },
    {
      "epoch": 0.000265924072265625,
      "step": 43569,
      "training_step_time": 0.3988499641418457
    },
    {
      "epoch": 0.00026593017578125,
      "grad_norm": 0.10960251837968826,
      "learning_rate": 1.9137364303700833e-05,
      "loss": 0.0334,
      "step": 43570
    },
    {
      "epoch": 0.00026593017578125,
      "model_forward_time": 0.11492633819580078,
      "step": 43570
    },
    {
      "epoch": 0.00026593017578125,
      "step": 43570,
      "training_step_time": 0.4007608890533447
    },
    {
      "epoch": 0.000265936279296875,
      "model_forward_time": 0.11472797393798828,
      "step": 43571
    },
    {
      "epoch": 0.000265936279296875,
      "step": 43571,
      "training_step_time": 0.4050633907318115
    },
    {
      "epoch": 0.0002659423828125,
      "model_forward_time": 0.11689305305480957,
      "step": 43572
    },
    {
      "epoch": 0.0002659423828125,
      "step": 43572,
      "training_step_time": 0.5165896415710449
    },
    {
      "epoch": 0.000265948486328125,
      "model_forward_time": 0.11484575271606445,
      "step": 43573
    },
    {
      "epoch": 0.000265948486328125,
      "step": 43573,
      "training_step_time": 0.5102746486663818
    },
    {
      "epoch": 0.00026595458984375,
      "model_forward_time": 0.11491250991821289,
      "step": 43574
    },
    {
      "epoch": 0.00026595458984375,
      "step": 43574,
      "training_step_time": 0.520174503326416
    },
    {
      "epoch": 0.000265960693359375,
      "model_forward_time": 0.1150655746459961,
      "step": 43575
    },
    {
      "epoch": 0.000265960693359375,
      "step": 43575,
      "training_step_time": 0.38138294219970703
    },
    {
      "epoch": 0.000265966796875,
      "model_forward_time": 0.11509418487548828,
      "step": 43576
    },
    {
      "epoch": 0.000265966796875,
      "step": 43576,
      "training_step_time": 0.3845829963684082
    },
    {
      "epoch": 0.000265972900390625,
      "model_forward_time": 0.11507415771484375,
      "step": 43577
    },
    {
      "epoch": 0.000265972900390625,
      "step": 43577,
      "training_step_time": 0.37914133071899414
    },
    {
      "epoch": 0.00026597900390625,
      "model_forward_time": 0.1148674488067627,
      "step": 43578
    },
    {
      "epoch": 0.00026597900390625,
      "step": 43578,
      "training_step_time": 0.41780972480773926
    },
    {
      "epoch": 0.000265985107421875,
      "model_forward_time": 0.11462831497192383,
      "step": 43579
    },
    {
      "epoch": 0.000265985107421875,
      "step": 43579,
      "training_step_time": 0.4072682857513428
    },
    {
      "epoch": 0.0002659912109375,
      "grad_norm": 0.0830303207039833,
      "learning_rate": 1.9115687457441022e-05,
      "loss": 0.0343,
      "step": 43580
    },
    {
      "epoch": 0.0002659912109375,
      "model_forward_time": 0.11532950401306152,
      "step": 43580
    },
    {
      "epoch": 0.0002659912109375,
      "step": 43580,
      "training_step_time": 0.4892091751098633
    },
    {
      "epoch": 0.000265997314453125,
      "model_forward_time": 0.11639642715454102,
      "step": 43581
    },
    {
      "epoch": 0.000265997314453125,
      "step": 43581,
      "training_step_time": 0.3995082378387451
    },
    {
      "epoch": 0.00026600341796875,
      "model_forward_time": 0.1145787239074707,
      "step": 43582
    },
    {
      "epoch": 0.00026600341796875,
      "step": 43582,
      "training_step_time": 0.38369131088256836
    },
    {
      "epoch": 0.000266009521484375,
      "model_forward_time": 0.11481666564941406,
      "step": 43583
    },
    {
      "epoch": 0.000266009521484375,
      "step": 43583,
      "training_step_time": 0.39241576194763184
    },
    {
      "epoch": 0.000266015625,
      "model_forward_time": 0.11495661735534668,
      "step": 43584
    },
    {
      "epoch": 0.000266015625,
      "step": 43584,
      "training_step_time": 0.39068126678466797
    },
    {
      "epoch": 0.000266021728515625,
      "model_forward_time": 0.11485052108764648,
      "step": 43585
    },
    {
      "epoch": 0.000266021728515625,
      "step": 43585,
      "training_step_time": 0.40042710304260254
    },
    {
      "epoch": 0.00026602783203125,
      "model_forward_time": 0.11513638496398926,
      "step": 43586
    },
    {
      "epoch": 0.00026602783203125,
      "step": 43586,
      "training_step_time": 0.3972434997558594
    },
    {
      "epoch": 0.000266033935546875,
      "model_forward_time": 0.11551690101623535,
      "step": 43587
    },
    {
      "epoch": 0.000266033935546875,
      "step": 43587,
      "training_step_time": 0.4125978946685791
    },
    {
      "epoch": 0.0002660400390625,
      "model_forward_time": 0.11486124992370605,
      "step": 43588
    },
    {
      "epoch": 0.0002660400390625,
      "step": 43588,
      "training_step_time": 0.47351598739624023
    },
    {
      "epoch": 0.000266046142578125,
      "model_forward_time": 0.11575055122375488,
      "step": 43589
    },
    {
      "epoch": 0.000266046142578125,
      "step": 43589,
      "training_step_time": 0.42589807510375977
    },
    {
      "epoch": 0.00026605224609375,
      "grad_norm": 0.09547100216150284,
      "learning_rate": 1.909401999301993e-05,
      "loss": 0.0377,
      "step": 43590
    },
    {
      "epoch": 0.00026605224609375,
      "model_forward_time": 0.11522507667541504,
      "step": 43590
    },
    {
      "epoch": 0.00026605224609375,
      "step": 43590,
      "training_step_time": 0.46187567710876465
    },
    {
      "epoch": 0.000266058349609375,
      "model_forward_time": 0.11521267890930176,
      "step": 43591
    },
    {
      "epoch": 0.000266058349609375,
      "step": 43591,
      "training_step_time": 0.3977649211883545
    },
    {
      "epoch": 0.000266064453125,
      "model_forward_time": 0.11529183387756348,
      "step": 43592
    },
    {
      "epoch": 0.000266064453125,
      "step": 43592,
      "training_step_time": 0.3899810314178467
    },
    {
      "epoch": 0.000266070556640625,
      "model_forward_time": 0.1150047779083252,
      "step": 43593
    },
    {
      "epoch": 0.000266070556640625,
      "step": 43593,
      "training_step_time": 0.4134187698364258
    },
    {
      "epoch": 0.00026607666015625,
      "model_forward_time": 0.11516880989074707,
      "step": 43594
    },
    {
      "epoch": 0.00026607666015625,
      "step": 43594,
      "training_step_time": 0.44442296028137207
    },
    {
      "epoch": 0.000266082763671875,
      "model_forward_time": 0.11469817161560059,
      "step": 43595
    },
    {
      "epoch": 0.000266082763671875,
      "step": 43595,
      "training_step_time": 0.39242053031921387
    },
    {
      "epoch": 0.0002660888671875,
      "model_forward_time": 0.11559367179870605,
      "step": 43596
    },
    {
      "epoch": 0.0002660888671875,
      "step": 43596,
      "training_step_time": 0.4015483856201172
    },
    {
      "epoch": 0.000266094970703125,
      "model_forward_time": 0.11587333679199219,
      "step": 43597
    },
    {
      "epoch": 0.000266094970703125,
      "step": 43597,
      "training_step_time": 0.4089548587799072
    },
    {
      "epoch": 0.00026610107421875,
      "model_forward_time": 0.11474752426147461,
      "step": 43598
    },
    {
      "epoch": 0.00026610107421875,
      "step": 43598,
      "training_step_time": 0.38822412490844727
    },
    {
      "epoch": 0.000266107177734375,
      "model_forward_time": 0.11529111862182617,
      "step": 43599
    },
    {
      "epoch": 0.000266107177734375,
      "step": 43599,
      "training_step_time": 0.393310546875
    },
    {
      "epoch": 0.00026611328125,
      "grad_norm": 0.10031858831644058,
      "learning_rate": 1.9072361917019536e-05,
      "loss": 0.0381,
      "step": 43600
    },
    {
      "epoch": 0.00026611328125,
      "model_forward_time": 0.11492037773132324,
      "step": 43600
    },
    {
      "epoch": 0.00026611328125,
      "step": 43600,
      "training_step_time": 0.39667296409606934
    },
    {
      "epoch": 0.000266119384765625,
      "model_forward_time": 0.11600732803344727,
      "step": 43601
    },
    {
      "epoch": 0.000266119384765625,
      "step": 43601,
      "training_step_time": 0.4326972961425781
    },
    {
      "epoch": 0.00026612548828125,
      "model_forward_time": 0.11532258987426758,
      "step": 43602
    },
    {
      "epoch": 0.00026612548828125,
      "step": 43602,
      "training_step_time": 0.4503962993621826
    },
    {
      "epoch": 0.000266131591796875,
      "model_forward_time": 0.11551332473754883,
      "step": 43603
    },
    {
      "epoch": 0.000266131591796875,
      "step": 43603,
      "training_step_time": 0.43215036392211914
    },
    {
      "epoch": 0.0002661376953125,
      "model_forward_time": 0.11597061157226562,
      "step": 43604
    },
    {
      "epoch": 0.0002661376953125,
      "step": 43604,
      "training_step_time": 0.48794078826904297
    },
    {
      "epoch": 0.000266143798828125,
      "model_forward_time": 0.11540508270263672,
      "step": 43605
    },
    {
      "epoch": 0.000266143798828125,
      "step": 43605,
      "training_step_time": 0.4735584259033203
    },
    {
      "epoch": 0.00026614990234375,
      "model_forward_time": 0.11482882499694824,
      "step": 43606
    },
    {
      "epoch": 0.00026614990234375,
      "step": 43606,
      "training_step_time": 0.45258116722106934
    },
    {
      "epoch": 0.000266156005859375,
      "model_forward_time": 0.11548233032226562,
      "step": 43607
    },
    {
      "epoch": 0.000266156005859375,
      "step": 43607,
      "training_step_time": 0.4213395118713379
    },
    {
      "epoch": 0.000266162109375,
      "model_forward_time": 0.11436104774475098,
      "step": 43608
    },
    {
      "epoch": 0.000266162109375,
      "step": 43608,
      "training_step_time": 0.3928642272949219
    },
    {
      "epoch": 0.000266168212890625,
      "model_forward_time": 0.11502933502197266,
      "step": 43609
    },
    {
      "epoch": 0.000266168212890625,
      "step": 43609,
      "training_step_time": 0.41232967376708984
    },
    {
      "epoch": 0.00026617431640625,
      "grad_norm": 0.1821177452802658,
      "learning_rate": 1.9050713236018998e-05,
      "loss": 0.0397,
      "step": 43610
    },
    {
      "epoch": 0.00026617431640625,
      "model_forward_time": 0.11572861671447754,
      "step": 43610
    },
    {
      "epoch": 0.00026617431640625,
      "step": 43610,
      "training_step_time": 0.38843846321105957
    },
    {
      "epoch": 0.000266180419921875,
      "model_forward_time": 0.11510992050170898,
      "step": 43611
    },
    {
      "epoch": 0.000266180419921875,
      "step": 43611,
      "training_step_time": 0.38898205757141113
    },
    {
      "epoch": 0.0002661865234375,
      "model_forward_time": 0.11539912223815918,
      "step": 43612
    },
    {
      "epoch": 0.0002661865234375,
      "step": 43612,
      "training_step_time": 0.4059102535247803
    },
    {
      "epoch": 0.000266192626953125,
      "model_forward_time": 0.11657524108886719,
      "step": 43613
    },
    {
      "epoch": 0.000266192626953125,
      "step": 43613,
      "training_step_time": 0.3960726261138916
    },
    {
      "epoch": 0.00026619873046875,
      "model_forward_time": 0.11466526985168457,
      "step": 43614
    },
    {
      "epoch": 0.00026619873046875,
      "step": 43614,
      "training_step_time": 0.39572763442993164
    },
    {
      "epoch": 0.000266204833984375,
      "model_forward_time": 0.11483955383300781,
      "step": 43615
    },
    {
      "epoch": 0.000266204833984375,
      "step": 43615,
      "training_step_time": 0.36968255043029785
    },
    {
      "epoch": 0.0002662109375,
      "model_forward_time": 0.11565971374511719,
      "step": 43616
    },
    {
      "epoch": 0.0002662109375,
      "step": 43616,
      "training_step_time": 0.384047269821167
    },
    {
      "epoch": 0.000266217041015625,
      "model_forward_time": 0.11607217788696289,
      "step": 43617
    },
    {
      "epoch": 0.000266217041015625,
      "step": 43617,
      "training_step_time": 0.4963705539703369
    },
    {
      "epoch": 0.00026622314453125,
      "model_forward_time": 0.1156926155090332,
      "step": 43618
    },
    {
      "epoch": 0.00026622314453125,
      "step": 43618,
      "training_step_time": 0.4009881019592285
    },
    {
      "epoch": 0.000266229248046875,
      "model_forward_time": 0.11537313461303711,
      "step": 43619
    },
    {
      "epoch": 0.000266229248046875,
      "step": 43619,
      "training_step_time": 0.4984152317047119
    },
    {
      "epoch": 0.0002662353515625,
      "grad_norm": 0.12773802876472473,
      "learning_rate": 1.9029073956594606e-05,
      "loss": 0.0385,
      "step": 43620
    },
    {
      "epoch": 0.0002662353515625,
      "model_forward_time": 0.11529231071472168,
      "step": 43620
    },
    {
      "epoch": 0.0002662353515625,
      "step": 43620,
      "training_step_time": 0.4109020233154297
    },
    {
      "epoch": 0.000266241455078125,
      "model_forward_time": 0.11487054824829102,
      "step": 43621
    },
    {
      "epoch": 0.000266241455078125,
      "step": 43621,
      "training_step_time": 0.457108736038208
    },
    {
      "epoch": 0.00026624755859375,
      "model_forward_time": 0.11482715606689453,
      "step": 43622
    },
    {
      "epoch": 0.00026624755859375,
      "step": 43622,
      "training_step_time": 0.4283638000488281
    },
    {
      "epoch": 0.000266253662109375,
      "model_forward_time": 0.11584234237670898,
      "step": 43623
    },
    {
      "epoch": 0.000266253662109375,
      "step": 43623,
      "training_step_time": 0.40102505683898926
    },
    {
      "epoch": 0.000266259765625,
      "model_forward_time": 0.11461615562438965,
      "step": 43624
    },
    {
      "epoch": 0.000266259765625,
      "step": 43624,
      "training_step_time": 0.3903815746307373
    },
    {
      "epoch": 0.000266265869140625,
      "model_forward_time": 0.11483025550842285,
      "step": 43625
    },
    {
      "epoch": 0.000266265869140625,
      "step": 43625,
      "training_step_time": 0.3943161964416504
    },
    {
      "epoch": 0.00026627197265625,
      "model_forward_time": 0.11553001403808594,
      "step": 43626
    },
    {
      "epoch": 0.00026627197265625,
      "step": 43626,
      "training_step_time": 0.40561985969543457
    },
    {
      "epoch": 0.000266278076171875,
      "model_forward_time": 0.11556053161621094,
      "step": 43627
    },
    {
      "epoch": 0.000266278076171875,
      "step": 43627,
      "training_step_time": 0.3975801467895508
    },
    {
      "epoch": 0.0002662841796875,
      "model_forward_time": 0.11568927764892578,
      "step": 43628
    },
    {
      "epoch": 0.0002662841796875,
      "step": 43628,
      "training_step_time": 0.40055060386657715
    },
    {
      "epoch": 0.000266290283203125,
      "model_forward_time": 0.11588287353515625,
      "step": 43629
    },
    {
      "epoch": 0.000266290283203125,
      "step": 43629,
      "training_step_time": 0.39780664443969727
    },
    {
      "epoch": 0.00026629638671875,
      "grad_norm": 0.1391347348690033,
      "learning_rate": 1.9007444085319786e-05,
      "loss": 0.0365,
      "step": 43630
    },
    {
      "epoch": 0.00026629638671875,
      "model_forward_time": 0.11633443832397461,
      "step": 43630
    },
    {
      "epoch": 0.00026629638671875,
      "step": 43630,
      "training_step_time": 0.41937851905822754
    },
    {
      "epoch": 0.000266302490234375,
      "model_forward_time": 0.11701512336730957,
      "step": 43631
    },
    {
      "epoch": 0.000266302490234375,
      "step": 43631,
      "training_step_time": 0.4946422576904297
    },
    {
      "epoch": 0.00026630859375,
      "model_forward_time": 0.11559700965881348,
      "step": 43632
    },
    {
      "epoch": 0.00026630859375,
      "step": 43632,
      "training_step_time": 0.41045689582824707
    },
    {
      "epoch": 0.000266314697265625,
      "model_forward_time": 0.11623859405517578,
      "step": 43633
    },
    {
      "epoch": 0.000266314697265625,
      "step": 43633,
      "training_step_time": 0.4242541790008545
    },
    {
      "epoch": 0.00026632080078125,
      "model_forward_time": 0.11508584022521973,
      "step": 43634
    },
    {
      "epoch": 0.00026632080078125,
      "step": 43634,
      "training_step_time": 0.47132182121276855
    },
    {
      "epoch": 0.000266326904296875,
      "model_forward_time": 0.11460304260253906,
      "step": 43635
    },
    {
      "epoch": 0.000266326904296875,
      "step": 43635,
      "training_step_time": 0.38518857955932617
    },
    {
      "epoch": 0.0002663330078125,
      "model_forward_time": 0.11472249031066895,
      "step": 43636
    },
    {
      "epoch": 0.0002663330078125,
      "step": 43636,
      "training_step_time": 0.4665219783782959
    },
    {
      "epoch": 0.000266339111328125,
      "model_forward_time": 0.11595845222473145,
      "step": 43637
    },
    {
      "epoch": 0.000266339111328125,
      "step": 43637,
      "training_step_time": 0.43776702880859375
    },
    {
      "epoch": 0.00026634521484375,
      "model_forward_time": 0.11567902565002441,
      "step": 43638
    },
    {
      "epoch": 0.00026634521484375,
      "step": 43638,
      "training_step_time": 0.39559197425842285
    },
    {
      "epoch": 0.000266351318359375,
      "model_forward_time": 0.11472296714782715,
      "step": 43639
    },
    {
      "epoch": 0.000266351318359375,
      "step": 43639,
      "training_step_time": 0.40367579460144043
    },
    {
      "epoch": 0.000266357421875,
      "grad_norm": 0.07942826300859451,
      "learning_rate": 1.8985823628765188e-05,
      "loss": 0.0379,
      "step": 43640
    },
    {
      "epoch": 0.000266357421875,
      "model_forward_time": 0.11565041542053223,
      "step": 43640
    },
    {
      "epoch": 0.000266357421875,
      "step": 43640,
      "training_step_time": 0.39268922805786133
    },
    {
      "epoch": 0.000266363525390625,
      "model_forward_time": 0.11546730995178223,
      "step": 43641
    },
    {
      "epoch": 0.000266363525390625,
      "step": 43641,
      "training_step_time": 0.3937644958496094
    },
    {
      "epoch": 0.00026636962890625,
      "model_forward_time": 0.11519908905029297,
      "step": 43642
    },
    {
      "epoch": 0.00026636962890625,
      "step": 43642,
      "training_step_time": 0.3896021842956543
    },
    {
      "epoch": 0.000266375732421875,
      "model_forward_time": 0.11621928215026855,
      "step": 43643
    },
    {
      "epoch": 0.000266375732421875,
      "step": 43643,
      "training_step_time": 0.3938486576080322
    },
    {
      "epoch": 0.0002663818359375,
      "model_forward_time": 0.11491680145263672,
      "step": 43644
    },
    {
      "epoch": 0.0002663818359375,
      "step": 43644,
      "training_step_time": 0.4026319980621338
    },
    {
      "epoch": 0.000266387939453125,
      "model_forward_time": 0.11652183532714844,
      "step": 43645
    },
    {
      "epoch": 0.000266387939453125,
      "step": 43645,
      "training_step_time": 0.40137147903442383
    },
    {
      "epoch": 0.00026639404296875,
      "model_forward_time": 0.11515021324157715,
      "step": 43646
    },
    {
      "epoch": 0.00026639404296875,
      "step": 43646,
      "training_step_time": 0.39660000801086426
    },
    {
      "epoch": 0.000266400146484375,
      "model_forward_time": 0.11572957038879395,
      "step": 43647
    },
    {
      "epoch": 0.000266400146484375,
      "step": 43647,
      "training_step_time": 0.41167116165161133
    },
    {
      "epoch": 0.00026640625,
      "model_forward_time": 0.11524772644042969,
      "step": 43648
    },
    {
      "epoch": 0.00026640625,
      "step": 43648,
      "training_step_time": 0.42264890670776367
    },
    {
      "epoch": 0.000266412353515625,
      "model_forward_time": 0.11522841453552246,
      "step": 43649
    },
    {
      "epoch": 0.000266412353515625,
      "step": 43649,
      "training_step_time": 0.3989412784576416
    },
    {
      "epoch": 0.00026641845703125,
      "grad_norm": 0.1381165087223053,
      "learning_rate": 1.8964212593498442e-05,
      "loss": 0.0381,
      "step": 43650
    },
    {
      "epoch": 0.00026641845703125,
      "model_forward_time": 0.1151282787322998,
      "step": 43650
    },
    {
      "epoch": 0.00026641845703125,
      "step": 43650,
      "training_step_time": 0.5415871143341064
    },
    {
      "epoch": 0.000266424560546875,
      "model_forward_time": 0.11452651023864746,
      "step": 43651
    },
    {
      "epoch": 0.000266424560546875,
      "step": 43651,
      "training_step_time": 0.42946338653564453
    },
    {
      "epoch": 0.0002664306640625,
      "model_forward_time": 0.11461710929870605,
      "step": 43652
    },
    {
      "epoch": 0.0002664306640625,
      "step": 43652,
      "training_step_time": 0.39316391944885254
    },
    {
      "epoch": 0.000266436767578125,
      "model_forward_time": 0.11551713943481445,
      "step": 43653
    },
    {
      "epoch": 0.000266436767578125,
      "step": 43653,
      "training_step_time": 0.38466310501098633
    },
    {
      "epoch": 0.00026644287109375,
      "model_forward_time": 0.11603021621704102,
      "step": 43654
    },
    {
      "epoch": 0.00026644287109375,
      "step": 43654,
      "training_step_time": 0.38980722427368164
    },
    {
      "epoch": 0.000266448974609375,
      "model_forward_time": 0.11459732055664062,
      "step": 43655
    },
    {
      "epoch": 0.000266448974609375,
      "step": 43655,
      "training_step_time": 0.37958312034606934
    },
    {
      "epoch": 0.000266455078125,
      "model_forward_time": 0.11514997482299805,
      "step": 43656
    },
    {
      "epoch": 0.000266455078125,
      "step": 43656,
      "training_step_time": 0.8976099491119385
    },
    {
      "epoch": 0.000266461181640625,
      "model_forward_time": 0.11481475830078125,
      "step": 43657
    },
    {
      "epoch": 0.000266461181640625,
      "step": 43657,
      "training_step_time": 0.3896675109863281
    },
    {
      "epoch": 0.00026646728515625,
      "model_forward_time": 0.11452102661132812,
      "step": 43658
    },
    {
      "epoch": 0.00026646728515625,
      "step": 43658,
      "training_step_time": 0.3836033344268799
    },
    {
      "epoch": 0.000266473388671875,
      "model_forward_time": 0.11638879776000977,
      "step": 43659
    },
    {
      "epoch": 0.000266473388671875,
      "step": 43659,
      "training_step_time": 0.4144628047943115
    },
    {
      "epoch": 0.0002664794921875,
      "grad_norm": 0.11326928436756134,
      "learning_rate": 1.8942610986084486e-05,
      "loss": 0.0382,
      "step": 43660
    },
    {
      "epoch": 0.0002664794921875,
      "model_forward_time": 0.11435294151306152,
      "step": 43660
    },
    {
      "epoch": 0.0002664794921875,
      "step": 43660,
      "training_step_time": 0.45259904861450195
    },
    {
      "epoch": 0.000266485595703125,
      "model_forward_time": 0.11510682106018066,
      "step": 43661
    },
    {
      "epoch": 0.000266485595703125,
      "step": 43661,
      "training_step_time": 0.49692845344543457
    },
    {
      "epoch": 0.00026649169921875,
      "model_forward_time": 0.11555719375610352,
      "step": 43662
    },
    {
      "epoch": 0.00026649169921875,
      "step": 43662,
      "training_step_time": 0.5118975639343262
    },
    {
      "epoch": 0.000266497802734375,
      "model_forward_time": 0.11498618125915527,
      "step": 43663
    },
    {
      "epoch": 0.000266497802734375,
      "step": 43663,
      "training_step_time": 0.45888757705688477
    },
    {
      "epoch": 0.00026650390625,
      "model_forward_time": 0.11491799354553223,
      "step": 43664
    },
    {
      "epoch": 0.00026650390625,
      "step": 43664,
      "training_step_time": 0.3915293216705322
    },
    {
      "epoch": 0.000266510009765625,
      "model_forward_time": 0.11487507820129395,
      "step": 43665
    },
    {
      "epoch": 0.000266510009765625,
      "step": 43665,
      "training_step_time": 0.41057920455932617
    },
    {
      "epoch": 0.00026651611328125,
      "model_forward_time": 0.11479592323303223,
      "step": 43666
    },
    {
      "epoch": 0.00026651611328125,
      "step": 43666,
      "training_step_time": 0.4085690975189209
    },
    {
      "epoch": 0.000266522216796875,
      "model_forward_time": 0.1158452033996582,
      "step": 43667
    },
    {
      "epoch": 0.000266522216796875,
      "step": 43667,
      "training_step_time": 0.3780176639556885
    },
    {
      "epoch": 0.0002665283203125,
      "model_forward_time": 0.11483621597290039,
      "step": 43668
    },
    {
      "epoch": 0.0002665283203125,
      "step": 43668,
      "training_step_time": 0.45752954483032227
    },
    {
      "epoch": 0.000266534423828125,
      "model_forward_time": 0.11475086212158203,
      "step": 43669
    },
    {
      "epoch": 0.000266534423828125,
      "step": 43669,
      "training_step_time": 0.39705467224121094
    },
    {
      "epoch": 0.00026654052734375,
      "grad_norm": 0.1323222815990448,
      "learning_rate": 1.8921018813085283e-05,
      "loss": 0.0332,
      "step": 43670
    },
    {
      "epoch": 0.00026654052734375,
      "model_forward_time": 0.11510086059570312,
      "step": 43670
    },
    {
      "epoch": 0.00026654052734375,
      "step": 43670,
      "training_step_time": 0.3955519199371338
    },
    {
      "epoch": 0.000266546630859375,
      "model_forward_time": 0.11522340774536133,
      "step": 43671
    },
    {
      "epoch": 0.000266546630859375,
      "step": 43671,
      "training_step_time": 0.3991374969482422
    },
    {
      "epoch": 0.000266552734375,
      "model_forward_time": 0.11550164222717285,
      "step": 43672
    },
    {
      "epoch": 0.000266552734375,
      "step": 43672,
      "training_step_time": 0.3998076915740967
    },
    {
      "epoch": 0.000266558837890625,
      "model_forward_time": 0.11454367637634277,
      "step": 43673
    },
    {
      "epoch": 0.000266558837890625,
      "step": 43673,
      "training_step_time": 0.4021124839782715
    },
    {
      "epoch": 0.00026656494140625,
      "model_forward_time": 0.11495327949523926,
      "step": 43674
    },
    {
      "epoch": 0.00026656494140625,
      "step": 43674,
      "training_step_time": 0.573732852935791
    },
    {
      "epoch": 0.000266571044921875,
      "model_forward_time": 0.11613178253173828,
      "step": 43675
    },
    {
      "epoch": 0.000266571044921875,
      "step": 43675,
      "training_step_time": 0.4249861240386963
    },
    {
      "epoch": 0.0002665771484375,
      "model_forward_time": 0.1142880916595459,
      "step": 43676
    },
    {
      "epoch": 0.0002665771484375,
      "step": 43676,
      "training_step_time": 0.49142980575561523
    },
    {
      "epoch": 0.000266583251953125,
      "model_forward_time": 0.11486554145812988,
      "step": 43677
    },
    {
      "epoch": 0.000266583251953125,
      "step": 43677,
      "training_step_time": 0.4473121166229248
    },
    {
      "epoch": 0.00026658935546875,
      "model_forward_time": 0.115386962890625,
      "step": 43678
    },
    {
      "epoch": 0.00026658935546875,
      "step": 43678,
      "training_step_time": 0.41991353034973145
    },
    {
      "epoch": 0.000266595458984375,
      "model_forward_time": 0.11459732055664062,
      "step": 43679
    },
    {
      "epoch": 0.000266595458984375,
      "step": 43679,
      "training_step_time": 0.3893709182739258
    },
    {
      "epoch": 0.0002666015625,
      "grad_norm": 0.136795774102211,
      "learning_rate": 1.8899436081059975e-05,
      "loss": 0.0399,
      "step": 43680
    },
    {
      "epoch": 0.0002666015625,
      "model_forward_time": 0.11424732208251953,
      "step": 43680
    },
    {
      "epoch": 0.0002666015625,
      "step": 43680,
      "training_step_time": 0.5363571643829346
    },
    {
      "epoch": 0.000266607666015625,
      "model_forward_time": 0.11487102508544922,
      "step": 43681
    },
    {
      "epoch": 0.000266607666015625,
      "step": 43681,
      "training_step_time": 0.39586639404296875
    },
    {
      "epoch": 0.00026661376953125,
      "model_forward_time": 0.11488747596740723,
      "step": 43682
    },
    {
      "epoch": 0.00026661376953125,
      "step": 43682,
      "training_step_time": 0.39612865447998047
    },
    {
      "epoch": 0.000266619873046875,
      "model_forward_time": 0.11504316329956055,
      "step": 43683
    },
    {
      "epoch": 0.000266619873046875,
      "step": 43683,
      "training_step_time": 0.38812971115112305
    },
    {
      "epoch": 0.0002666259765625,
      "model_forward_time": 0.11536383628845215,
      "step": 43684
    },
    {
      "epoch": 0.0002666259765625,
      "step": 43684,
      "training_step_time": 0.3886394500732422
    },
    {
      "epoch": 0.000266632080078125,
      "model_forward_time": 0.11545634269714355,
      "step": 43685
    },
    {
      "epoch": 0.000266632080078125,
      "step": 43685,
      "training_step_time": 0.3939781188964844
    },
    {
      "epoch": 0.00026663818359375,
      "model_forward_time": 0.11537384986877441,
      "step": 43686
    },
    {
      "epoch": 0.00026663818359375,
      "step": 43686,
      "training_step_time": 0.5733306407928467
    },
    {
      "epoch": 0.000266644287109375,
      "model_forward_time": 0.11544942855834961,
      "step": 43687
    },
    {
      "epoch": 0.000266644287109375,
      "step": 43687,
      "training_step_time": 0.3803255558013916
    },
    {
      "epoch": 0.000266650390625,
      "model_forward_time": 0.11499142646789551,
      "step": 43688
    },
    {
      "epoch": 0.000266650390625,
      "step": 43688,
      "training_step_time": 0.41002917289733887
    },
    {
      "epoch": 0.000266656494140625,
      "model_forward_time": 0.11538839340209961,
      "step": 43689
    },
    {
      "epoch": 0.000266656494140625,
      "step": 43689,
      "training_step_time": 0.4450867176055908
    },
    {
      "epoch": 0.00026666259765625,
      "grad_norm": 0.08563889563083649,
      "learning_rate": 1.887786279656482e-05,
      "loss": 0.0366,
      "step": 43690
    },
    {
      "epoch": 0.00026666259765625,
      "model_forward_time": 0.1154026985168457,
      "step": 43690
    },
    {
      "epoch": 0.00026666259765625,
      "step": 43690,
      "training_step_time": 0.4058825969696045
    },
    {
      "epoch": 0.000266668701171875,
      "model_forward_time": 0.1153111457824707,
      "step": 43691
    },
    {
      "epoch": 0.000266668701171875,
      "step": 43691,
      "training_step_time": 0.4300496578216553
    },
    {
      "epoch": 0.0002666748046875,
      "model_forward_time": 0.11480522155761719,
      "step": 43692
    },
    {
      "epoch": 0.0002666748046875,
      "step": 43692,
      "training_step_time": 0.47440099716186523
    },
    {
      "epoch": 0.000266680908203125,
      "model_forward_time": 0.11511111259460449,
      "step": 43693
    },
    {
      "epoch": 0.000266680908203125,
      "step": 43693,
      "training_step_time": 0.42104268074035645
    },
    {
      "epoch": 0.00026668701171875,
      "model_forward_time": 0.11477208137512207,
      "step": 43694
    },
    {
      "epoch": 0.00026668701171875,
      "step": 43694,
      "training_step_time": 0.3992745876312256
    },
    {
      "epoch": 0.000266693115234375,
      "model_forward_time": 0.11575436592102051,
      "step": 43695
    },
    {
      "epoch": 0.000266693115234375,
      "step": 43695,
      "training_step_time": 0.41118431091308594
    },
    {
      "epoch": 0.00026669921875,
      "model_forward_time": 0.1148827075958252,
      "step": 43696
    },
    {
      "epoch": 0.00026669921875,
      "step": 43696,
      "training_step_time": 0.39935779571533203
    },
    {
      "epoch": 0.000266705322265625,
      "model_forward_time": 0.11541008949279785,
      "step": 43697
    },
    {
      "epoch": 0.000266705322265625,
      "step": 43697,
      "training_step_time": 0.4003598690032959
    },
    {
      "epoch": 0.00026671142578125,
      "model_forward_time": 0.11492657661437988,
      "step": 43698
    },
    {
      "epoch": 0.00026671142578125,
      "step": 43698,
      "training_step_time": 0.45290684700012207
    },
    {
      "epoch": 0.000266717529296875,
      "model_forward_time": 0.11471176147460938,
      "step": 43699
    },
    {
      "epoch": 0.000266717529296875,
      "step": 43699,
      "training_step_time": 0.40044212341308594
    },
    {
      "epoch": 0.0002667236328125,
      "grad_norm": 0.08848258852958679,
      "learning_rate": 1.8856298966153212e-05,
      "loss": 0.0388,
      "step": 43700
    },
    {
      "epoch": 0.0002667236328125,
      "model_forward_time": 0.1148080825805664,
      "step": 43700
    },
    {
      "epoch": 0.0002667236328125,
      "step": 43700,
      "training_step_time": 0.434551477432251
    },
    {
      "epoch": 0.000266729736328125,
      "model_forward_time": 0.11492657661437988,
      "step": 43701
    },
    {
      "epoch": 0.000266729736328125,
      "step": 43701,
      "training_step_time": 0.4077575206756592
    },
    {
      "epoch": 0.00026673583984375,
      "model_forward_time": 0.1150660514831543,
      "step": 43702
    },
    {
      "epoch": 0.00026673583984375,
      "step": 43702,
      "training_step_time": 0.4258296489715576
    },
    {
      "epoch": 0.000266741943359375,
      "model_forward_time": 0.11554956436157227,
      "step": 43703
    },
    {
      "epoch": 0.000266741943359375,
      "step": 43703,
      "training_step_time": 0.4296441078186035
    },
    {
      "epoch": 0.000266748046875,
      "model_forward_time": 0.11431002616882324,
      "step": 43704
    },
    {
      "epoch": 0.000266748046875,
      "step": 43704,
      "training_step_time": 0.5768780708312988
    },
    {
      "epoch": 0.000266754150390625,
      "model_forward_time": 0.11466360092163086,
      "step": 43705
    },
    {
      "epoch": 0.000266754150390625,
      "step": 43705,
      "training_step_time": 0.48784756660461426
    },
    {
      "epoch": 0.00026676025390625,
      "model_forward_time": 0.11570310592651367,
      "step": 43706
    },
    {
      "epoch": 0.00026676025390625,
      "step": 43706,
      "training_step_time": 0.390488862991333
    },
    {
      "epoch": 0.000266766357421875,
      "model_forward_time": 0.11498212814331055,
      "step": 43707
    },
    {
      "epoch": 0.000266766357421875,
      "step": 43707,
      "training_step_time": 0.40394067764282227
    },
    {
      "epoch": 0.0002667724609375,
      "model_forward_time": 0.11467933654785156,
      "step": 43708
    },
    {
      "epoch": 0.0002667724609375,
      "step": 43708,
      "training_step_time": 0.38741517066955566
    },
    {
      "epoch": 0.000266778564453125,
      "model_forward_time": 0.11519074440002441,
      "step": 43709
    },
    {
      "epoch": 0.000266778564453125,
      "step": 43709,
      "training_step_time": 0.3770456314086914
    },
    {
      "epoch": 0.00026678466796875,
      "grad_norm": 0.08966994285583496,
      "learning_rate": 1.8834744596375666e-05,
      "loss": 0.0378,
      "step": 43710
    },
    {
      "epoch": 0.00026678466796875,
      "model_forward_time": 0.1156919002532959,
      "step": 43710
    },
    {
      "epoch": 0.00026678466796875,
      "step": 43710,
      "training_step_time": 0.3927125930786133
    },
    {
      "epoch": 0.000266790771484375,
      "model_forward_time": 0.1166830062866211,
      "step": 43711
    },
    {
      "epoch": 0.000266790771484375,
      "step": 43711,
      "training_step_time": 0.3982057571411133
    },
    {
      "epoch": 0.000266796875,
      "model_forward_time": 0.11575126647949219,
      "step": 43712
    },
    {
      "epoch": 0.000266796875,
      "step": 43712,
      "training_step_time": 0.3934636116027832
    },
    {
      "epoch": 0.000266802978515625,
      "model_forward_time": 0.11532235145568848,
      "step": 43713
    },
    {
      "epoch": 0.000266802978515625,
      "step": 43713,
      "training_step_time": 0.43765687942504883
    },
    {
      "epoch": 0.00026680908203125,
      "model_forward_time": 0.11555075645446777,
      "step": 43714
    },
    {
      "epoch": 0.00026680908203125,
      "step": 43714,
      "training_step_time": 0.43593764305114746
    },
    {
      "epoch": 0.000266815185546875,
      "model_forward_time": 0.11559271812438965,
      "step": 43715
    },
    {
      "epoch": 0.000266815185546875,
      "step": 43715,
      "training_step_time": 0.38207221031188965
    },
    {
      "epoch": 0.0002668212890625,
      "model_forward_time": 0.11577272415161133,
      "step": 43716
    },
    {
      "epoch": 0.0002668212890625,
      "step": 43716,
      "training_step_time": 0.4372367858886719
    },
    {
      "epoch": 0.000266827392578125,
      "model_forward_time": 0.11491847038269043,
      "step": 43717
    },
    {
      "epoch": 0.000266827392578125,
      "step": 43717,
      "training_step_time": 0.3952367305755615
    },
    {
      "epoch": 0.00026683349609375,
      "model_forward_time": 0.11693000793457031,
      "step": 43718
    },
    {
      "epoch": 0.00026683349609375,
      "step": 43718,
      "training_step_time": 0.42675161361694336
    },
    {
      "epoch": 0.000266839599609375,
      "model_forward_time": 0.11466789245605469,
      "step": 43719
    },
    {
      "epoch": 0.000266839599609375,
      "step": 43719,
      "training_step_time": 0.40784215927124023
    },
    {
      "epoch": 0.000266845703125,
      "grad_norm": 0.07742457836866379,
      "learning_rate": 1.881319969377987e-05,
      "loss": 0.0374,
      "step": 43720
    },
    {
      "epoch": 0.000266845703125,
      "model_forward_time": 0.11418008804321289,
      "step": 43720
    },
    {
      "epoch": 0.000266845703125,
      "step": 43720,
      "training_step_time": 0.4598958492279053
    },
    {
      "epoch": 0.000266851806640625,
      "model_forward_time": 0.11495184898376465,
      "step": 43721
    },
    {
      "epoch": 0.000266851806640625,
      "step": 43721,
      "training_step_time": 0.3821241855621338
    },
    {
      "epoch": 0.00026685791015625,
      "model_forward_time": 0.11605978012084961,
      "step": 43722
    },
    {
      "epoch": 0.00026685791015625,
      "step": 43722,
      "training_step_time": 0.4707179069519043
    },
    {
      "epoch": 0.000266864013671875,
      "model_forward_time": 0.11537861824035645,
      "step": 43723
    },
    {
      "epoch": 0.000266864013671875,
      "step": 43723,
      "training_step_time": 0.3932013511657715
    },
    {
      "epoch": 0.0002668701171875,
      "model_forward_time": 0.11564016342163086,
      "step": 43724
    },
    {
      "epoch": 0.0002668701171875,
      "step": 43724,
      "training_step_time": 0.3957526683807373
    },
    {
      "epoch": 0.000266876220703125,
      "model_forward_time": 0.11531567573547363,
      "step": 43725
    },
    {
      "epoch": 0.000266876220703125,
      "step": 43725,
      "training_step_time": 0.3966200351715088
    },
    {
      "epoch": 0.00026688232421875,
      "model_forward_time": 0.11494064331054688,
      "step": 43726
    },
    {
      "epoch": 0.00026688232421875,
      "step": 43726,
      "training_step_time": 0.39137911796569824
    },
    {
      "epoch": 0.000266888427734375,
      "model_forward_time": 0.11467885971069336,
      "step": 43727
    },
    {
      "epoch": 0.000266888427734375,
      "step": 43727,
      "training_step_time": 0.4026217460632324
    },
    {
      "epoch": 0.00026689453125,
      "model_forward_time": 0.11556124687194824,
      "step": 43728
    },
    {
      "epoch": 0.00026689453125,
      "step": 43728,
      "training_step_time": 0.4196758270263672
    },
    {
      "epoch": 0.000266900634765625,
      "model_forward_time": 0.11731934547424316,
      "step": 43729
    },
    {
      "epoch": 0.000266900634765625,
      "step": 43729,
      "training_step_time": 0.43615055084228516
    },
    {
      "epoch": 0.00026690673828125,
      "grad_norm": 0.10555489361286163,
      "learning_rate": 1.8791664264910537e-05,
      "loss": 0.0391,
      "step": 43730
    },
    {
      "epoch": 0.00026690673828125,
      "model_forward_time": 0.11550450325012207,
      "step": 43730
    },
    {
      "epoch": 0.00026690673828125,
      "step": 43730,
      "training_step_time": 0.40224647521972656
    },
    {
      "epoch": 0.000266912841796875,
      "model_forward_time": 0.11552095413208008,
      "step": 43731
    },
    {
      "epoch": 0.000266912841796875,
      "step": 43731,
      "training_step_time": 0.41629624366760254
    },
    {
      "epoch": 0.0002669189453125,
      "model_forward_time": 0.11542940139770508,
      "step": 43732
    },
    {
      "epoch": 0.0002669189453125,
      "step": 43732,
      "training_step_time": 0.5027918815612793
    },
    {
      "epoch": 0.000266925048828125,
      "model_forward_time": 0.11532759666442871,
      "step": 43733
    },
    {
      "epoch": 0.000266925048828125,
      "step": 43733,
      "training_step_time": 0.42049527168273926
    },
    {
      "epoch": 0.00026693115234375,
      "model_forward_time": 0.11476278305053711,
      "step": 43734
    },
    {
      "epoch": 0.00026693115234375,
      "step": 43734,
      "training_step_time": 0.4075753688812256
    },
    {
      "epoch": 0.000266937255859375,
      "model_forward_time": 0.11525106430053711,
      "step": 43735
    },
    {
      "epoch": 0.000266937255859375,
      "step": 43735,
      "training_step_time": 0.42420172691345215
    },
    {
      "epoch": 0.000266943359375,
      "model_forward_time": 0.11478376388549805,
      "step": 43736
    },
    {
      "epoch": 0.000266943359375,
      "step": 43736,
      "training_step_time": 0.48363447189331055
    },
    {
      "epoch": 0.000266949462890625,
      "model_forward_time": 0.11479616165161133,
      "step": 43737
    },
    {
      "epoch": 0.000266949462890625,
      "step": 43737,
      "training_step_time": 0.3877124786376953
    },
    {
      "epoch": 0.00026695556640625,
      "model_forward_time": 0.11556100845336914,
      "step": 43738
    },
    {
      "epoch": 0.00026695556640625,
      "step": 43738,
      "training_step_time": 0.3916189670562744
    },
    {
      "epoch": 0.000266961669921875,
      "model_forward_time": 0.11553835868835449,
      "step": 43739
    },
    {
      "epoch": 0.000266961669921875,
      "step": 43739,
      "training_step_time": 0.37964558601379395
    },
    {
      "epoch": 0.0002669677734375,
      "grad_norm": 0.08143419027328491,
      "learning_rate": 1.877013831630961e-05,
      "loss": 0.0385,
      "step": 43740
    },
    {
      "epoch": 0.0002669677734375,
      "model_forward_time": 0.11520242691040039,
      "step": 43740
    },
    {
      "epoch": 0.0002669677734375,
      "step": 43740,
      "training_step_time": 0.39743947982788086
    },
    {
      "epoch": 0.000266973876953125,
      "model_forward_time": 0.11523199081420898,
      "step": 43741
    },
    {
      "epoch": 0.000266973876953125,
      "step": 43741,
      "training_step_time": 0.3993527889251709
    },
    {
      "epoch": 0.00026697998046875,
      "model_forward_time": 0.11519312858581543,
      "step": 43742
    },
    {
      "epoch": 0.00026697998046875,
      "step": 43742,
      "training_step_time": 0.40950751304626465
    },
    {
      "epoch": 0.000266986083984375,
      "model_forward_time": 0.11492919921875,
      "step": 43743
    },
    {
      "epoch": 0.000266986083984375,
      "step": 43743,
      "training_step_time": 0.4412558078765869
    },
    {
      "epoch": 0.0002669921875,
      "model_forward_time": 0.11489748954772949,
      "step": 43744
    },
    {
      "epoch": 0.0002669921875,
      "step": 43744,
      "training_step_time": 0.3939082622528076
    },
    {
      "epoch": 0.000266998291015625,
      "model_forward_time": 0.11567115783691406,
      "step": 43745
    },
    {
      "epoch": 0.000266998291015625,
      "step": 43745,
      "training_step_time": 0.3858826160430908
    },
    {
      "epoch": 0.00026700439453125,
      "model_forward_time": 0.1154322624206543,
      "step": 43746
    },
    {
      "epoch": 0.00026700439453125,
      "step": 43746,
      "training_step_time": 0.47547292709350586
    },
    {
      "epoch": 0.000267010498046875,
      "model_forward_time": 0.11842775344848633,
      "step": 43747
    },
    {
      "epoch": 0.000267010498046875,
      "step": 43747,
      "training_step_time": 0.45816898345947266
    },
    {
      "epoch": 0.0002670166015625,
      "model_forward_time": 0.1191554069519043,
      "step": 43748
    },
    {
      "epoch": 0.0002670166015625,
      "step": 43748,
      "training_step_time": 0.4331846237182617
    },
    {
      "epoch": 0.000267022705078125,
      "model_forward_time": 0.11794519424438477,
      "step": 43749
    },
    {
      "epoch": 0.000267022705078125,
      "step": 43749,
      "training_step_time": 0.4226679801940918
    },
    {
      "epoch": 0.00026702880859375,
      "grad_norm": 0.10127520561218262,
      "learning_rate": 1.874862185451608e-05,
      "loss": 0.0386,
      "step": 43750
    },
    {
      "epoch": 0.00026702880859375,
      "model_forward_time": 0.11536169052124023,
      "step": 43750
    },
    {
      "epoch": 0.00026702880859375,
      "step": 43750,
      "training_step_time": 0.3885762691497803
    },
    {
      "epoch": 0.000267034912109375,
      "model_forward_time": 0.11484575271606445,
      "step": 43751
    },
    {
      "epoch": 0.000267034912109375,
      "step": 43751,
      "training_step_time": 0.37429189682006836
    },
    {
      "epoch": 0.000267041015625,
      "model_forward_time": 0.11527681350708008,
      "step": 43752
    },
    {
      "epoch": 0.000267041015625,
      "step": 43752,
      "training_step_time": 0.392031192779541
    },
    {
      "epoch": 0.000267047119140625,
      "model_forward_time": 0.11431574821472168,
      "step": 43753
    },
    {
      "epoch": 0.000267047119140625,
      "step": 43753,
      "training_step_time": 0.4048311710357666
    },
    {
      "epoch": 0.00026705322265625,
      "model_forward_time": 0.11460256576538086,
      "step": 43754
    },
    {
      "epoch": 0.00026705322265625,
      "step": 43754,
      "training_step_time": 0.39644336700439453
    },
    {
      "epoch": 0.000267059326171875,
      "model_forward_time": 0.11524057388305664,
      "step": 43755
    },
    {
      "epoch": 0.000267059326171875,
      "step": 43755,
      "training_step_time": 0.8963668346405029
    },
    {
      "epoch": 0.0002670654296875,
      "model_forward_time": 0.11548304557800293,
      "step": 43756
    },
    {
      "epoch": 0.0002670654296875,
      "step": 43756,
      "training_step_time": 0.43883800506591797
    },
    {
      "epoch": 0.000267071533203125,
      "model_forward_time": 0.11488723754882812,
      "step": 43757
    },
    {
      "epoch": 0.000267071533203125,
      "step": 43757,
      "training_step_time": 0.3847687244415283
    },
    {
      "epoch": 0.00026707763671875,
      "model_forward_time": 0.11377763748168945,
      "step": 43758
    },
    {
      "epoch": 0.00026707763671875,
      "step": 43758,
      "training_step_time": 0.3935983180999756
    },
    {
      "epoch": 0.000267083740234375,
      "model_forward_time": 0.11471366882324219,
      "step": 43759
    },
    {
      "epoch": 0.000267083740234375,
      "step": 43759,
      "training_step_time": 0.3881223201751709
    },
    {
      "epoch": 0.00026708984375,
      "grad_norm": 0.14372485876083374,
      "learning_rate": 1.872711488606609e-05,
      "loss": 0.0331,
      "step": 43760
    },
    {
      "epoch": 0.00026708984375,
      "model_forward_time": 0.11656975746154785,
      "step": 43760
    },
    {
      "epoch": 0.00026708984375,
      "step": 43760,
      "training_step_time": 0.39681363105773926
    },
    {
      "epoch": 0.000267095947265625,
      "model_forward_time": 0.11502766609191895,
      "step": 43761
    },
    {
      "epoch": 0.000267095947265625,
      "step": 43761,
      "training_step_time": 0.48024654388427734
    },
    {
      "epoch": 0.00026710205078125,
      "model_forward_time": 0.11507415771484375,
      "step": 43762
    },
    {
      "epoch": 0.00026710205078125,
      "step": 43762,
      "training_step_time": 0.46979236602783203
    },
    {
      "epoch": 0.000267108154296875,
      "model_forward_time": 0.11536908149719238,
      "step": 43763
    },
    {
      "epoch": 0.000267108154296875,
      "step": 43763,
      "training_step_time": 0.45735907554626465
    },
    {
      "epoch": 0.0002671142578125,
      "model_forward_time": 0.11512875556945801,
      "step": 43764
    },
    {
      "epoch": 0.0002671142578125,
      "step": 43764,
      "training_step_time": 0.4253511428833008
    },
    {
      "epoch": 0.000267120361328125,
      "model_forward_time": 0.11545920372009277,
      "step": 43765
    },
    {
      "epoch": 0.000267120361328125,
      "step": 43765,
      "training_step_time": 0.3966550827026367
    },
    {
      "epoch": 0.00026712646484375,
      "model_forward_time": 0.11487674713134766,
      "step": 43766
    },
    {
      "epoch": 0.00026712646484375,
      "step": 43766,
      "training_step_time": 0.3907322883605957
    },
    {
      "epoch": 0.000267132568359375,
      "model_forward_time": 0.11462783813476562,
      "step": 43767
    },
    {
      "epoch": 0.000267132568359375,
      "step": 43767,
      "training_step_time": 0.39533543586730957
    },
    {
      "epoch": 0.000267138671875,
      "model_forward_time": 0.1156778335571289,
      "step": 43768
    },
    {
      "epoch": 0.000267138671875,
      "step": 43768,
      "training_step_time": 0.406421422958374
    },
    {
      "epoch": 0.000267144775390625,
      "model_forward_time": 0.11528539657592773,
      "step": 43769
    },
    {
      "epoch": 0.000267144775390625,
      "step": 43769,
      "training_step_time": 0.41741204261779785
    },
    {
      "epoch": 0.00026715087890625,
      "grad_norm": 0.09141314029693604,
      "learning_rate": 1.8705617417492883e-05,
      "loss": 0.0366,
      "step": 43770
    },
    {
      "epoch": 0.00026715087890625,
      "model_forward_time": 0.1163473129272461,
      "step": 43770
    },
    {
      "epoch": 0.00026715087890625,
      "step": 43770,
      "training_step_time": 0.40647244453430176
    },
    {
      "epoch": 0.000267156982421875,
      "model_forward_time": 0.11675214767456055,
      "step": 43771
    },
    {
      "epoch": 0.000267156982421875,
      "step": 43771,
      "training_step_time": 0.40261173248291016
    },
    {
      "epoch": 0.0002671630859375,
      "model_forward_time": 0.11544299125671387,
      "step": 43772
    },
    {
      "epoch": 0.0002671630859375,
      "step": 43772,
      "training_step_time": 0.3888077735900879
    },
    {
      "epoch": 0.000267169189453125,
      "model_forward_time": 0.11586308479309082,
      "step": 43773
    },
    {
      "epoch": 0.000267169189453125,
      "step": 43773,
      "training_step_time": 0.3849940299987793
    },
    {
      "epoch": 0.00026717529296875,
      "model_forward_time": 0.11552619934082031,
      "step": 43774
    },
    {
      "epoch": 0.00026717529296875,
      "step": 43774,
      "training_step_time": 0.3854329586029053
    },
    {
      "epoch": 0.000267181396484375,
      "model_forward_time": 0.1158597469329834,
      "step": 43775
    },
    {
      "epoch": 0.000267181396484375,
      "step": 43775,
      "training_step_time": 0.4294123649597168
    },
    {
      "epoch": 0.0002671875,
      "model_forward_time": 0.11530470848083496,
      "step": 43776
    },
    {
      "epoch": 0.0002671875,
      "step": 43776,
      "training_step_time": 0.4720323085784912
    },
    {
      "epoch": 0.000267193603515625,
      "model_forward_time": 0.11533808708190918,
      "step": 43777
    },
    {
      "epoch": 0.000267193603515625,
      "step": 43777,
      "training_step_time": 0.4565713405609131
    },
    {
      "epoch": 0.00026719970703125,
      "model_forward_time": 0.11533069610595703,
      "step": 43778
    },
    {
      "epoch": 0.00026719970703125,
      "step": 43778,
      "training_step_time": 0.41432738304138184
    },
    {
      "epoch": 0.000267205810546875,
      "model_forward_time": 0.1148688793182373,
      "step": 43779
    },
    {
      "epoch": 0.000267205810546875,
      "step": 43779,
      "training_step_time": 0.43851304054260254
    },
    {
      "epoch": 0.0002672119140625,
      "grad_norm": 0.1316787749528885,
      "learning_rate": 1.868412945532681e-05,
      "loss": 0.0382,
      "step": 43780
    },
    {
      "epoch": 0.0002672119140625,
      "model_forward_time": 0.11517477035522461,
      "step": 43780
    },
    {
      "epoch": 0.0002672119140625,
      "step": 43780,
      "training_step_time": 0.38663196563720703
    },
    {
      "epoch": 0.000267218017578125,
      "model_forward_time": 0.11513733863830566,
      "step": 43781
    },
    {
      "epoch": 0.000267218017578125,
      "step": 43781,
      "training_step_time": 0.3894007205963135
    },
    {
      "epoch": 0.00026722412109375,
      "model_forward_time": 0.1151740550994873,
      "step": 43782
    },
    {
      "epoch": 0.00026722412109375,
      "step": 43782,
      "training_step_time": 0.405454158782959
    },
    {
      "epoch": 0.000267230224609375,
      "model_forward_time": 0.1159965991973877,
      "step": 43783
    },
    {
      "epoch": 0.000267230224609375,
      "step": 43783,
      "training_step_time": 0.4415464401245117
    },
    {
      "epoch": 0.000267236328125,
      "model_forward_time": 0.11463618278503418,
      "step": 43784
    },
    {
      "epoch": 0.000267236328125,
      "step": 43784,
      "training_step_time": 0.3892555236816406
    },
    {
      "epoch": 0.000267242431640625,
      "model_forward_time": 0.11583900451660156,
      "step": 43785
    },
    {
      "epoch": 0.000267242431640625,
      "step": 43785,
      "training_step_time": 0.39173102378845215
    },
    {
      "epoch": 0.00026724853515625,
      "model_forward_time": 0.1152963638305664,
      "step": 43786
    },
    {
      "epoch": 0.00026724853515625,
      "step": 43786,
      "training_step_time": 0.3878192901611328
    },
    {
      "epoch": 0.000267254638671875,
      "model_forward_time": 0.1158602237701416,
      "step": 43787
    },
    {
      "epoch": 0.000267254638671875,
      "step": 43787,
      "training_step_time": 0.3936924934387207
    },
    {
      "epoch": 0.0002672607421875,
      "model_forward_time": 0.11510205268859863,
      "step": 43788
    },
    {
      "epoch": 0.0002672607421875,
      "step": 43788,
      "training_step_time": 0.3970208168029785
    },
    {
      "epoch": 0.000267266845703125,
      "model_forward_time": 0.11556553840637207,
      "step": 43789
    },
    {
      "epoch": 0.000267266845703125,
      "step": 43789,
      "training_step_time": 0.3961763381958008
    },
    {
      "epoch": 0.00026727294921875,
      "grad_norm": 0.0979338139295578,
      "learning_rate": 1.8662651006095387e-05,
      "loss": 0.0334,
      "step": 43790
    },
    {
      "epoch": 0.00026727294921875,
      "model_forward_time": 0.11580705642700195,
      "step": 43790
    },
    {
      "epoch": 0.00026727294921875,
      "step": 43790,
      "training_step_time": 0.4367847442626953
    },
    {
      "epoch": 0.000267279052734375,
      "model_forward_time": 0.11466717720031738,
      "step": 43791
    },
    {
      "epoch": 0.000267279052734375,
      "step": 43791,
      "training_step_time": 0.442950963973999
    },
    {
      "epoch": 0.00026728515625,
      "model_forward_time": 0.1146543025970459,
      "step": 43792
    },
    {
      "epoch": 0.00026728515625,
      "step": 43792,
      "training_step_time": 0.49323511123657227
    },
    {
      "epoch": 0.000267291259765625,
      "model_forward_time": 0.11818695068359375,
      "step": 43793
    },
    {
      "epoch": 0.000267291259765625,
      "step": 43793,
      "training_step_time": 0.4952657222747803
    },
    {
      "epoch": 0.00026729736328125,
      "model_forward_time": 0.11525511741638184,
      "step": 43794
    },
    {
      "epoch": 0.00026729736328125,
      "step": 43794,
      "training_step_time": 0.4000067710876465
    },
    {
      "epoch": 0.000267303466796875,
      "model_forward_time": 0.11566805839538574,
      "step": 43795
    },
    {
      "epoch": 0.000267303466796875,
      "step": 43795,
      "training_step_time": 0.4043288230895996
    },
    {
      "epoch": 0.0002673095703125,
      "model_forward_time": 0.11543965339660645,
      "step": 43796
    },
    {
      "epoch": 0.0002673095703125,
      "step": 43796,
      "training_step_time": 0.4345369338989258
    },
    {
      "epoch": 0.000267315673828125,
      "model_forward_time": 0.1148374080657959,
      "step": 43797
    },
    {
      "epoch": 0.000267315673828125,
      "step": 43797,
      "training_step_time": 0.41693949699401855
    },
    {
      "epoch": 0.00026732177734375,
      "model_forward_time": 0.11534905433654785,
      "step": 43798
    },
    {
      "epoch": 0.00026732177734375,
      "step": 43798,
      "training_step_time": 0.39081287384033203
    },
    {
      "epoch": 0.000267327880859375,
      "model_forward_time": 0.11593866348266602,
      "step": 43799
    },
    {
      "epoch": 0.000267327880859375,
      "step": 43799,
      "training_step_time": 0.38927292823791504
    },
    {
      "epoch": 0.000267333984375,
      "grad_norm": 0.10063480585813522,
      "learning_rate": 1.8641182076323148e-05,
      "loss": 0.0366,
      "step": 43800
    },
    {
      "epoch": 0.000267333984375,
      "model_forward_time": 0.11464333534240723,
      "step": 43800
    },
    {
      "epoch": 0.000267333984375,
      "step": 43800,
      "training_step_time": 0.41278791427612305
    },
    {
      "epoch": 0.000267340087890625,
      "model_forward_time": 0.11488056182861328,
      "step": 43801
    },
    {
      "epoch": 0.000267340087890625,
      "step": 43801,
      "training_step_time": 0.3963043689727783
    },
    {
      "epoch": 0.00026734619140625,
      "model_forward_time": 0.11477899551391602,
      "step": 43802
    },
    {
      "epoch": 0.00026734619140625,
      "step": 43802,
      "training_step_time": 0.3987114429473877
    },
    {
      "epoch": 0.000267352294921875,
      "model_forward_time": 0.11485552787780762,
      "step": 43803
    },
    {
      "epoch": 0.000267352294921875,
      "step": 43803,
      "training_step_time": 0.39035654067993164
    },
    {
      "epoch": 0.0002673583984375,
      "model_forward_time": 0.11561226844787598,
      "step": 43804
    },
    {
      "epoch": 0.0002673583984375,
      "step": 43804,
      "training_step_time": 0.3941628932952881
    },
    {
      "epoch": 0.000267364501953125,
      "model_forward_time": 0.11571931838989258,
      "step": 43805
    },
    {
      "epoch": 0.000267364501953125,
      "step": 43805,
      "training_step_time": 0.5108625888824463
    },
    {
      "epoch": 0.00026737060546875,
      "model_forward_time": 0.11513614654541016,
      "step": 43806
    },
    {
      "epoch": 0.00026737060546875,
      "step": 43806,
      "training_step_time": 0.5035507678985596
    },
    {
      "epoch": 0.000267376708984375,
      "model_forward_time": 0.11578154563903809,
      "step": 43807
    },
    {
      "epoch": 0.000267376708984375,
      "step": 43807,
      "training_step_time": 0.46950483322143555
    },
    {
      "epoch": 0.0002673828125,
      "model_forward_time": 0.11466741561889648,
      "step": 43808
    },
    {
      "epoch": 0.0002673828125,
      "step": 43808,
      "training_step_time": 0.3891420364379883
    },
    {
      "epoch": 0.000267388916015625,
      "model_forward_time": 0.11530828475952148,
      "step": 43809
    },
    {
      "epoch": 0.000267388916015625,
      "step": 43809,
      "training_step_time": 0.48665499687194824
    },
    {
      "epoch": 0.00026739501953125,
      "grad_norm": 0.09380869567394257,
      "learning_rate": 1.8619722672531825e-05,
      "loss": 0.0348,
      "step": 43810
    },
    {
      "epoch": 0.00026739501953125,
      "model_forward_time": 0.11467409133911133,
      "step": 43810
    },
    {
      "epoch": 0.00026739501953125,
      "step": 43810,
      "training_step_time": 0.38932037353515625
    },
    {
      "epoch": 0.000267401123046875,
      "model_forward_time": 0.11565899848937988,
      "step": 43811
    },
    {
      "epoch": 0.000267401123046875,
      "step": 43811,
      "training_step_time": 0.41901612281799316
    },
    {
      "epoch": 0.0002674072265625,
      "model_forward_time": 0.11530160903930664,
      "step": 43812
    },
    {
      "epoch": 0.0002674072265625,
      "step": 43812,
      "training_step_time": 0.41036438941955566
    },
    {
      "epoch": 0.000267413330078125,
      "model_forward_time": 0.11541962623596191,
      "step": 43813
    },
    {
      "epoch": 0.000267413330078125,
      "step": 43813,
      "training_step_time": 0.3985152244567871
    },
    {
      "epoch": 0.00026741943359375,
      "model_forward_time": 0.11622405052185059,
      "step": 43814
    },
    {
      "epoch": 0.00026741943359375,
      "step": 43814,
      "training_step_time": 0.3936593532562256
    },
    {
      "epoch": 0.000267425537109375,
      "model_forward_time": 0.1157083511352539,
      "step": 43815
    },
    {
      "epoch": 0.000267425537109375,
      "step": 43815,
      "training_step_time": 0.7795908451080322
    },
    {
      "epoch": 0.000267431640625,
      "model_forward_time": 0.11446714401245117,
      "step": 43816
    },
    {
      "epoch": 0.000267431640625,
      "step": 43816,
      "training_step_time": 0.38762831687927246
    },
    {
      "epoch": 0.000267437744140625,
      "model_forward_time": 0.11479544639587402,
      "step": 43817
    },
    {
      "epoch": 0.000267437744140625,
      "step": 43817,
      "training_step_time": 0.3784646987915039
    },
    {
      "epoch": 0.00026744384765625,
      "model_forward_time": 0.11528515815734863,
      "step": 43818
    },
    {
      "epoch": 0.00026744384765625,
      "step": 43818,
      "training_step_time": 0.3876020908355713
    },
    {
      "epoch": 0.000267449951171875,
      "model_forward_time": 0.11472249031066895,
      "step": 43819
    },
    {
      "epoch": 0.000267449951171875,
      "step": 43819,
      "training_step_time": 0.4320805072784424
    },
    {
      "epoch": 0.0002674560546875,
      "grad_norm": 0.10774502903223038,
      "learning_rate": 1.8598272801240213e-05,
      "loss": 0.0395,
      "step": 43820
    },
    {
      "epoch": 0.0002674560546875,
      "model_forward_time": 0.1146383285522461,
      "step": 43820
    },
    {
      "epoch": 0.0002674560546875,
      "step": 43820,
      "training_step_time": 0.46459269523620605
    },
    {
      "epoch": 0.000267462158203125,
      "model_forward_time": 0.12333989143371582,
      "step": 43821
    },
    {
      "epoch": 0.000267462158203125,
      "step": 43821,
      "training_step_time": 0.49747538566589355
    },
    {
      "epoch": 0.00026746826171875,
      "model_forward_time": 0.11443805694580078,
      "step": 43822
    },
    {
      "epoch": 0.00026746826171875,
      "step": 43822,
      "training_step_time": 0.39458703994750977
    },
    {
      "epoch": 0.000267474365234375,
      "model_forward_time": 0.11541175842285156,
      "step": 43823
    },
    {
      "epoch": 0.000267474365234375,
      "step": 43823,
      "training_step_time": 0.456662654876709
    },
    {
      "epoch": 0.00026748046875,
      "model_forward_time": 0.11489605903625488,
      "step": 43824
    },
    {
      "epoch": 0.00026748046875,
      "step": 43824,
      "training_step_time": 0.4087715148925781
    },
    {
      "epoch": 0.000267486572265625,
      "model_forward_time": 0.11625266075134277,
      "step": 43825
    },
    {
      "epoch": 0.000267486572265625,
      "step": 43825,
      "training_step_time": 0.39310622215270996
    },
    {
      "epoch": 0.00026749267578125,
      "model_forward_time": 0.11487150192260742,
      "step": 43826
    },
    {
      "epoch": 0.00026749267578125,
      "step": 43826,
      "training_step_time": 0.3903205394744873
    },
    {
      "epoch": 0.000267498779296875,
      "model_forward_time": 0.11461400985717773,
      "step": 43827
    },
    {
      "epoch": 0.000267498779296875,
      "step": 43827,
      "training_step_time": 0.4021925926208496
    },
    {
      "epoch": 0.0002675048828125,
      "model_forward_time": 0.1152641773223877,
      "step": 43828
    },
    {
      "epoch": 0.0002675048828125,
      "step": 43828,
      "training_step_time": 0.39154934883117676
    },
    {
      "epoch": 0.000267510986328125,
      "model_forward_time": 0.11559677124023438,
      "step": 43829
    },
    {
      "epoch": 0.000267510986328125,
      "step": 43829,
      "training_step_time": 0.4008162021636963
    },
    {
      "epoch": 0.00026751708984375,
      "grad_norm": 0.11686928570270538,
      "learning_rate": 1.85768324689642e-05,
      "loss": 0.0337,
      "step": 43830
    },
    {
      "epoch": 0.00026751708984375,
      "model_forward_time": 0.11613798141479492,
      "step": 43830
    },
    {
      "epoch": 0.00026751708984375,
      "step": 43830,
      "training_step_time": 0.41784238815307617
    },
    {
      "epoch": 0.000267523193359375,
      "model_forward_time": 0.11492228507995605,
      "step": 43831
    },
    {
      "epoch": 0.000267523193359375,
      "step": 43831,
      "training_step_time": 0.4029693603515625
    },
    {
      "epoch": 0.000267529296875,
      "model_forward_time": 0.11520814895629883,
      "step": 43832
    },
    {
      "epoch": 0.000267529296875,
      "step": 43832,
      "training_step_time": 0.3924410343170166
    },
    {
      "epoch": 0.000267535400390625,
      "model_forward_time": 0.11510300636291504,
      "step": 43833
    },
    {
      "epoch": 0.000267535400390625,
      "step": 43833,
      "training_step_time": 0.4116513729095459
    },
    {
      "epoch": 0.00026754150390625,
      "model_forward_time": 0.11483168601989746,
      "step": 43834
    },
    {
      "epoch": 0.00026754150390625,
      "step": 43834,
      "training_step_time": 0.4370255470275879
    },
    {
      "epoch": 0.000267547607421875,
      "model_forward_time": 0.11510920524597168,
      "step": 43835
    },
    {
      "epoch": 0.000267547607421875,
      "step": 43835,
      "training_step_time": 0.4856734275817871
    },
    {
      "epoch": 0.0002675537109375,
      "model_forward_time": 0.11483955383300781,
      "step": 43836
    },
    {
      "epoch": 0.0002675537109375,
      "step": 43836,
      "training_step_time": 0.39672088623046875
    },
    {
      "epoch": 0.000267559814453125,
      "model_forward_time": 0.12109017372131348,
      "step": 43837
    },
    {
      "epoch": 0.000267559814453125,
      "step": 43837,
      "training_step_time": 0.4409823417663574
    },
    {
      "epoch": 0.00026756591796875,
      "model_forward_time": 0.11905622482299805,
      "step": 43838
    },
    {
      "epoch": 0.00026756591796875,
      "step": 43838,
      "training_step_time": 0.40726137161254883
    },
    {
      "epoch": 0.000267572021484375,
      "model_forward_time": 0.11507725715637207,
      "step": 43839
    },
    {
      "epoch": 0.000267572021484375,
      "step": 43839,
      "training_step_time": 0.3880946636199951
    },
    {
      "epoch": 0.000267578125,
      "grad_norm": 0.0952889695763588,
      "learning_rate": 1.855540168221681e-05,
      "loss": 0.0375,
      "step": 43840
    },
    {
      "epoch": 0.000267578125,
      "model_forward_time": 0.11587834358215332,
      "step": 43840
    },
    {
      "epoch": 0.000267578125,
      "step": 43840,
      "training_step_time": 0.38989877700805664
    },
    {
      "epoch": 0.000267584228515625,
      "model_forward_time": 0.1154623031616211,
      "step": 43841
    },
    {
      "epoch": 0.000267584228515625,
      "step": 43841,
      "training_step_time": 0.3942737579345703
    },
    {
      "epoch": 0.00026759033203125,
      "model_forward_time": 0.11462068557739258,
      "step": 43842
    },
    {
      "epoch": 0.00026759033203125,
      "step": 43842,
      "training_step_time": 0.3999288082122803
    },
    {
      "epoch": 0.000267596435546875,
      "model_forward_time": 0.1166074275970459,
      "step": 43843
    },
    {
      "epoch": 0.000267596435546875,
      "step": 43843,
      "training_step_time": 0.3966078758239746
    },
    {
      "epoch": 0.0002676025390625,
      "model_forward_time": 0.11586427688598633,
      "step": 43844
    },
    {
      "epoch": 0.0002676025390625,
      "step": 43844,
      "training_step_time": 0.41242480278015137
    },
    {
      "epoch": 0.000267608642578125,
      "model_forward_time": 0.11568355560302734,
      "step": 43845
    },
    {
      "epoch": 0.000267608642578125,
      "step": 43845,
      "training_step_time": 0.39681291580200195
    },
    {
      "epoch": 0.00026761474609375,
      "model_forward_time": 0.11503791809082031,
      "step": 43846
    },
    {
      "epoch": 0.00026761474609375,
      "step": 43846,
      "training_step_time": 0.4163503646850586
    },
    {
      "epoch": 0.000267620849609375,
      "model_forward_time": 0.1155848503112793,
      "step": 43847
    },
    {
      "epoch": 0.000267620849609375,
      "step": 43847,
      "training_step_time": 0.37771105766296387
    },
    {
      "epoch": 0.000267626953125,
      "model_forward_time": 0.11555147171020508,
      "step": 43848
    },
    {
      "epoch": 0.000267626953125,
      "step": 43848,
      "training_step_time": 0.4497244358062744
    },
    {
      "epoch": 0.000267633056640625,
      "model_forward_time": 0.1154332160949707,
      "step": 43849
    },
    {
      "epoch": 0.000267633056640625,
      "step": 43849,
      "training_step_time": 0.4600238800048828
    },
    {
      "epoch": 0.00026763916015625,
      "grad_norm": 0.09048379212617874,
      "learning_rate": 1.8533980447508137e-05,
      "loss": 0.0402,
      "step": 43850
    },
    {
      "epoch": 0.00026763916015625,
      "model_forward_time": 0.11574268341064453,
      "step": 43850
    },
    {
      "epoch": 0.00026763916015625,
      "step": 43850,
      "training_step_time": 0.45007824897766113
    },
    {
      "epoch": 0.000267645263671875,
      "model_forward_time": 0.1158447265625,
      "step": 43851
    },
    {
      "epoch": 0.000267645263671875,
      "step": 43851,
      "training_step_time": 0.42778468132019043
    },
    {
      "epoch": 0.0002676513671875,
      "model_forward_time": 0.11497020721435547,
      "step": 43852
    },
    {
      "epoch": 0.0002676513671875,
      "step": 43852,
      "training_step_time": 0.40709543228149414
    },
    {
      "epoch": 0.000267657470703125,
      "model_forward_time": 0.11527609825134277,
      "step": 43853
    },
    {
      "epoch": 0.000267657470703125,
      "step": 43853,
      "training_step_time": 0.38153958320617676
    },
    {
      "epoch": 0.00026766357421875,
      "model_forward_time": 0.11552906036376953,
      "step": 43854
    },
    {
      "epoch": 0.00026766357421875,
      "step": 43854,
      "training_step_time": 0.4109015464782715
    },
    {
      "epoch": 0.000267669677734375,
      "model_forward_time": 0.11606740951538086,
      "step": 43855
    },
    {
      "epoch": 0.000267669677734375,
      "step": 43855,
      "training_step_time": 0.39713358879089355
    },
    {
      "epoch": 0.00026767578125,
      "model_forward_time": 0.11575508117675781,
      "step": 43856
    },
    {
      "epoch": 0.00026767578125,
      "step": 43856,
      "training_step_time": 0.3989694118499756
    },
    {
      "epoch": 0.000267681884765625,
      "model_forward_time": 0.11577701568603516,
      "step": 43857
    },
    {
      "epoch": 0.000267681884765625,
      "step": 43857,
      "training_step_time": 0.3867783546447754
    },
    {
      "epoch": 0.00026768798828125,
      "model_forward_time": 0.11635780334472656,
      "step": 43858
    },
    {
      "epoch": 0.00026768798828125,
      "step": 43858,
      "training_step_time": 0.38831448554992676
    },
    {
      "epoch": 0.000267694091796875,
      "model_forward_time": 0.11569547653198242,
      "step": 43859
    },
    {
      "epoch": 0.000267694091796875,
      "step": 43859,
      "training_step_time": 0.3911292552947998
    },
    {
      "epoch": 0.0002677001953125,
      "grad_norm": 0.1233004778623581,
      "learning_rate": 1.851256877134538e-05,
      "loss": 0.0414,
      "step": 43860
    },
    {
      "epoch": 0.0002677001953125,
      "model_forward_time": 0.11525368690490723,
      "step": 43860
    },
    {
      "epoch": 0.0002677001953125,
      "step": 43860,
      "training_step_time": 0.40925145149230957
    },
    {
      "epoch": 0.000267706298828125,
      "model_forward_time": 0.11609268188476562,
      "step": 43861
    },
    {
      "epoch": 0.000267706298828125,
      "step": 43861,
      "training_step_time": 0.39966368675231934
    },
    {
      "epoch": 0.00026771240234375,
      "model_forward_time": 0.1151437759399414,
      "step": 43862
    },
    {
      "epoch": 0.00026771240234375,
      "step": 43862,
      "training_step_time": 0.39869117736816406
    },
    {
      "epoch": 0.000267718505859375,
      "model_forward_time": 0.11590147018432617,
      "step": 43863
    },
    {
      "epoch": 0.000267718505859375,
      "step": 43863,
      "training_step_time": 0.496854305267334
    },
    {
      "epoch": 0.000267724609375,
      "model_forward_time": 0.11536169052124023,
      "step": 43864
    },
    {
      "epoch": 0.000267724609375,
      "step": 43864,
      "training_step_time": 0.47101259231567383
    },
    {
      "epoch": 0.000267730712890625,
      "model_forward_time": 0.11562180519104004,
      "step": 43865
    },
    {
      "epoch": 0.000267730712890625,
      "step": 43865,
      "training_step_time": 0.5076174736022949
    },
    {
      "epoch": 0.00026773681640625,
      "model_forward_time": 0.11543917655944824,
      "step": 43866
    },
    {
      "epoch": 0.00026773681640625,
      "step": 43866,
      "training_step_time": 0.4271998405456543
    },
    {
      "epoch": 0.000267742919921875,
      "model_forward_time": 0.11552262306213379,
      "step": 43867
    },
    {
      "epoch": 0.000267742919921875,
      "step": 43867,
      "training_step_time": 0.39630842208862305
    },
    {
      "epoch": 0.0002677490234375,
      "model_forward_time": 0.11455154418945312,
      "step": 43868
    },
    {
      "epoch": 0.0002677490234375,
      "step": 43868,
      "training_step_time": 0.4041128158569336
    },
    {
      "epoch": 0.000267755126953125,
      "model_forward_time": 0.11461424827575684,
      "step": 43869
    },
    {
      "epoch": 0.000267755126953125,
      "step": 43869,
      "training_step_time": 0.3926527500152588
    },
    {
      "epoch": 0.00026776123046875,
      "grad_norm": 0.09130825102329254,
      "learning_rate": 1.849116666023288e-05,
      "loss": 0.038,
      "step": 43870
    },
    {
      "epoch": 0.00026776123046875,
      "model_forward_time": 0.11474370956420898,
      "step": 43870
    },
    {
      "epoch": 0.00026776123046875,
      "step": 43870,
      "training_step_time": 0.3973674774169922
    },
    {
      "epoch": 0.000267767333984375,
      "model_forward_time": 0.1158437728881836,
      "step": 43871
    },
    {
      "epoch": 0.000267767333984375,
      "step": 43871,
      "training_step_time": 0.38609933853149414
    },
    {
      "epoch": 0.0002677734375,
      "model_forward_time": 0.1155550479888916,
      "step": 43872
    },
    {
      "epoch": 0.0002677734375,
      "step": 43872,
      "training_step_time": 0.40315699577331543
    },
    {
      "epoch": 0.000267779541015625,
      "model_forward_time": 0.11574769020080566,
      "step": 43873
    },
    {
      "epoch": 0.000267779541015625,
      "step": 43873,
      "training_step_time": 0.4061861038208008
    },
    {
      "epoch": 0.00026778564453125,
      "model_forward_time": 0.11615800857543945,
      "step": 43874
    },
    {
      "epoch": 0.00026778564453125,
      "step": 43874,
      "training_step_time": 0.407102108001709
    },
    {
      "epoch": 0.000267791748046875,
      "model_forward_time": 0.1159815788269043,
      "step": 43875
    },
    {
      "epoch": 0.000267791748046875,
      "step": 43875,
      "training_step_time": 0.4097597599029541
    },
    {
      "epoch": 0.0002677978515625,
      "model_forward_time": 0.11670732498168945,
      "step": 43876
    },
    {
      "epoch": 0.0002677978515625,
      "step": 43876,
      "training_step_time": 0.38662099838256836
    },
    {
      "epoch": 0.000267803955078125,
      "model_forward_time": 0.11580348014831543,
      "step": 43877
    },
    {
      "epoch": 0.000267803955078125,
      "step": 43877,
      "training_step_time": 0.3873152732849121
    },
    {
      "epoch": 0.00026781005859375,
      "model_forward_time": 0.11616683006286621,
      "step": 43878
    },
    {
      "epoch": 0.00026781005859375,
      "step": 43878,
      "training_step_time": 0.39766502380371094
    },
    {
      "epoch": 0.000267816162109375,
      "model_forward_time": 0.11490249633789062,
      "step": 43879
    },
    {
      "epoch": 0.000267816162109375,
      "step": 43879,
      "training_step_time": 0.4323544502258301
    },
    {
      "epoch": 0.000267822265625,
      "grad_norm": 0.09380028396844864,
      "learning_rate": 1.846977412067198e-05,
      "loss": 0.0339,
      "step": 43880
    },
    {
      "epoch": 0.000267822265625,
      "model_forward_time": 0.1155385971069336,
      "step": 43880
    },
    {
      "epoch": 0.000267822265625,
      "step": 43880,
      "training_step_time": 0.5091550350189209
    },
    {
      "epoch": 0.000267828369140625,
      "model_forward_time": 0.11594390869140625,
      "step": 43881
    },
    {
      "epoch": 0.000267828369140625,
      "step": 43881,
      "training_step_time": 0.397721529006958
    },
    {
      "epoch": 0.00026783447265625,
      "model_forward_time": 0.11518430709838867,
      "step": 43882
    },
    {
      "epoch": 0.00026783447265625,
      "step": 43882,
      "training_step_time": 0.40598034858703613
    },
    {
      "epoch": 0.000267840576171875,
      "model_forward_time": 0.11531543731689453,
      "step": 43883
    },
    {
      "epoch": 0.000267840576171875,
      "step": 43883,
      "training_step_time": 0.3700740337371826
    },
    {
      "epoch": 0.0002678466796875,
      "model_forward_time": 0.11469817161560059,
      "step": 43884
    },
    {
      "epoch": 0.0002678466796875,
      "step": 43884,
      "training_step_time": 0.42316460609436035
    },
    {
      "epoch": 0.000267852783203125,
      "model_forward_time": 0.11529803276062012,
      "step": 43885
    },
    {
      "epoch": 0.000267852783203125,
      "step": 43885,
      "training_step_time": 0.39949536323547363
    },
    {
      "epoch": 0.00026785888671875,
      "model_forward_time": 0.11521673202514648,
      "step": 43886
    },
    {
      "epoch": 0.00026785888671875,
      "step": 43886,
      "training_step_time": 0.39573240280151367
    },
    {
      "epoch": 0.000267864990234375,
      "model_forward_time": 0.11532330513000488,
      "step": 43887
    },
    {
      "epoch": 0.000267864990234375,
      "step": 43887,
      "training_step_time": 0.393308162689209
    },
    {
      "epoch": 0.00026787109375,
      "model_forward_time": 0.115234375,
      "step": 43888
    },
    {
      "epoch": 0.00026787109375,
      "step": 43888,
      "training_step_time": 0.3955528736114502
    },
    {
      "epoch": 0.000267877197265625,
      "model_forward_time": 0.11606168746948242,
      "step": 43889
    },
    {
      "epoch": 0.000267877197265625,
      "step": 43889,
      "training_step_time": 0.38319873809814453
    },
    {
      "epoch": 0.00026788330078125,
      "grad_norm": 0.09059420973062515,
      "learning_rate": 1.8448391159161204e-05,
      "loss": 0.034,
      "step": 43890
    },
    {
      "epoch": 0.00026788330078125,
      "model_forward_time": 0.11577987670898438,
      "step": 43890
    },
    {
      "epoch": 0.00026788330078125,
      "step": 43890,
      "training_step_time": 0.4083695411682129
    },
    {
      "epoch": 0.000267889404296875,
      "model_forward_time": 0.11652421951293945,
      "step": 43891
    },
    {
      "epoch": 0.000267889404296875,
      "step": 43891,
      "training_step_time": 0.4140510559082031
    },
    {
      "epoch": 0.0002678955078125,
      "model_forward_time": 0.11620807647705078,
      "step": 43892
    },
    {
      "epoch": 0.0002678955078125,
      "step": 43892,
      "training_step_time": 0.4653465747833252
    },
    {
      "epoch": 0.000267901611328125,
      "model_forward_time": 0.1150674819946289,
      "step": 43893
    },
    {
      "epoch": 0.000267901611328125,
      "step": 43893,
      "training_step_time": 0.4037480354309082
    },
    {
      "epoch": 0.00026790771484375,
      "model_forward_time": 0.11500978469848633,
      "step": 43894
    },
    {
      "epoch": 0.00026790771484375,
      "step": 43894,
      "training_step_time": 0.4173884391784668
    },
    {
      "epoch": 0.000267913818359375,
      "model_forward_time": 0.11534690856933594,
      "step": 43895
    },
    {
      "epoch": 0.000267913818359375,
      "step": 43895,
      "training_step_time": 0.4794316291809082
    },
    {
      "epoch": 0.000267919921875,
      "model_forward_time": 0.1153867244720459,
      "step": 43896
    },
    {
      "epoch": 0.000267919921875,
      "step": 43896,
      "training_step_time": 0.3975675106048584
    },
    {
      "epoch": 0.000267926025390625,
      "model_forward_time": 0.11479520797729492,
      "step": 43897
    },
    {
      "epoch": 0.000267926025390625,
      "step": 43897,
      "training_step_time": 0.39801812171936035
    },
    {
      "epoch": 0.00026793212890625,
      "model_forward_time": 0.11602902412414551,
      "step": 43898
    },
    {
      "epoch": 0.00026793212890625,
      "step": 43898,
      "training_step_time": 0.3923523426055908
    },
    {
      "epoch": 0.000267938232421875,
      "model_forward_time": 0.1149606704711914,
      "step": 43899
    },
    {
      "epoch": 0.000267938232421875,
      "step": 43899,
      "training_step_time": 0.38929247856140137
    },
    {
      "epoch": 0.0002679443359375,
      "grad_norm": 0.0888628140091896,
      "learning_rate": 1.8427017782196127e-05,
      "loss": 0.0347,
      "step": 43900
    },
    {
      "epoch": 0.0002679443359375,
      "model_forward_time": 0.11526703834533691,
      "step": 43900
    },
    {
      "epoch": 0.0002679443359375,
      "step": 43900,
      "training_step_time": 0.391129732131958
    },
    {
      "epoch": 0.000267950439453125,
      "model_forward_time": 0.11588692665100098,
      "step": 43901
    },
    {
      "epoch": 0.000267950439453125,
      "step": 43901,
      "training_step_time": 0.37726783752441406
    },
    {
      "epoch": 0.00026795654296875,
      "model_forward_time": 0.1165003776550293,
      "step": 43902
    },
    {
      "epoch": 0.00026795654296875,
      "step": 43902,
      "training_step_time": 0.39732956886291504
    },
    {
      "epoch": 0.000267962646484375,
      "model_forward_time": 0.11541056632995605,
      "step": 43903
    },
    {
      "epoch": 0.000267962646484375,
      "step": 43903,
      "training_step_time": 0.40004754066467285
    },
    {
      "epoch": 0.00026796875,
      "model_forward_time": 0.11558198928833008,
      "step": 43904
    },
    {
      "epoch": 0.00026796875,
      "step": 43904,
      "training_step_time": 0.4009683132171631
    },
    {
      "epoch": 0.000267974853515625,
      "model_forward_time": 0.11520886421203613,
      "step": 43905
    },
    {
      "epoch": 0.000267974853515625,
      "step": 43905,
      "training_step_time": 0.3881247043609619
    },
    {
      "epoch": 0.00026798095703125,
      "model_forward_time": 0.11566352844238281,
      "step": 43906
    },
    {
      "epoch": 0.00026798095703125,
      "step": 43906,
      "training_step_time": 0.3877246379852295
    },
    {
      "epoch": 0.000267987060546875,
      "model_forward_time": 0.11629796028137207,
      "step": 43907
    },
    {
      "epoch": 0.000267987060546875,
      "step": 43907,
      "training_step_time": 0.43140411376953125
    },
    {
      "epoch": 0.0002679931640625,
      "model_forward_time": 0.11507749557495117,
      "step": 43908
    },
    {
      "epoch": 0.0002679931640625,
      "step": 43908,
      "training_step_time": 0.36786413192749023
    },
    {
      "epoch": 0.000267999267578125,
      "model_forward_time": 0.11602473258972168,
      "step": 43909
    },
    {
      "epoch": 0.000267999267578125,
      "step": 43909,
      "training_step_time": 0.4021470546722412
    },
    {
      "epoch": 0.00026800537109375,
      "grad_norm": 0.13624528050422668,
      "learning_rate": 1.840565399626939e-05,
      "loss": 0.0312,
      "step": 43910
    },
    {
      "epoch": 0.00026800537109375,
      "model_forward_time": 0.11564326286315918,
      "step": 43910
    },
    {
      "epoch": 0.00026800537109375,
      "step": 43910,
      "training_step_time": 0.5435233116149902
    },
    {
      "epoch": 0.000268011474609375,
      "model_forward_time": 0.11471152305603027,
      "step": 43911
    },
    {
      "epoch": 0.000268011474609375,
      "step": 43911,
      "training_step_time": 0.37961626052856445
    },
    {
      "epoch": 0.000268017578125,
      "model_forward_time": 0.11532306671142578,
      "step": 43912
    },
    {
      "epoch": 0.000268017578125,
      "step": 43912,
      "training_step_time": 0.381162166595459
    },
    {
      "epoch": 0.000268023681640625,
      "model_forward_time": 0.11484646797180176,
      "step": 43913
    },
    {
      "epoch": 0.000268023681640625,
      "step": 43913,
      "training_step_time": 0.3704111576080322
    },
    {
      "epoch": 0.00026802978515625,
      "model_forward_time": 0.11517643928527832,
      "step": 43914
    },
    {
      "epoch": 0.00026802978515625,
      "step": 43914,
      "training_step_time": 0.38166284561157227
    },
    {
      "epoch": 0.000268035888671875,
      "model_forward_time": 0.1152961254119873,
      "step": 43915
    },
    {
      "epoch": 0.000268035888671875,
      "step": 43915,
      "training_step_time": 0.3916609287261963
    },
    {
      "epoch": 0.0002680419921875,
      "model_forward_time": 0.11622905731201172,
      "step": 43916
    },
    {
      "epoch": 0.0002680419921875,
      "step": 43916,
      "training_step_time": 0.6254897117614746
    },
    {
      "epoch": 0.000268048095703125,
      "model_forward_time": 0.11520504951477051,
      "step": 43917
    },
    {
      "epoch": 0.000268048095703125,
      "step": 43917,
      "training_step_time": 0.38937926292419434
    },
    {
      "epoch": 0.00026805419921875,
      "model_forward_time": 0.11514639854431152,
      "step": 43918
    },
    {
      "epoch": 0.00026805419921875,
      "step": 43918,
      "training_step_time": 0.4086778163909912
    },
    {
      "epoch": 0.000268060302734375,
      "model_forward_time": 0.1151585578918457,
      "step": 43919
    },
    {
      "epoch": 0.000268060302734375,
      "step": 43919,
      "training_step_time": 0.382277250289917
    },
    {
      "epoch": 0.00026806640625,
      "grad_norm": 0.09804975986480713,
      "learning_rate": 1.838429980787081e-05,
      "loss": 0.0355,
      "step": 43920
    },
    {
      "epoch": 0.00026806640625,
      "model_forward_time": 0.11503982543945312,
      "step": 43920
    },
    {
      "epoch": 0.00026806640625,
      "step": 43920,
      "training_step_time": 0.4593687057495117
    },
    {
      "epoch": 0.000268072509765625,
      "model_forward_time": 0.11490440368652344,
      "step": 43921
    },
    {
      "epoch": 0.000268072509765625,
      "step": 43921,
      "training_step_time": 0.45461249351501465
    },
    {
      "epoch": 0.00026807861328125,
      "model_forward_time": 0.11499357223510742,
      "step": 43922
    },
    {
      "epoch": 0.00026807861328125,
      "step": 43922,
      "training_step_time": 0.57733154296875
    },
    {
      "epoch": 0.000268084716796875,
      "model_forward_time": 0.11552309989929199,
      "step": 43923
    },
    {
      "epoch": 0.000268084716796875,
      "step": 43923,
      "training_step_time": 0.4127614498138428
    },
    {
      "epoch": 0.0002680908203125,
      "model_forward_time": 0.11492276191711426,
      "step": 43924
    },
    {
      "epoch": 0.0002680908203125,
      "step": 43924,
      "training_step_time": 0.427992582321167
    },
    {
      "epoch": 0.000268096923828125,
      "model_forward_time": 0.1150205135345459,
      "step": 43925
    },
    {
      "epoch": 0.000268096923828125,
      "step": 43925,
      "training_step_time": 0.3677701950073242
    },
    {
      "epoch": 0.00026810302734375,
      "model_forward_time": 0.11491203308105469,
      "step": 43926
    },
    {
      "epoch": 0.00026810302734375,
      "step": 43926,
      "training_step_time": 0.38190269470214844
    },
    {
      "epoch": 0.000268109130859375,
      "model_forward_time": 0.1151731014251709,
      "step": 43927
    },
    {
      "epoch": 0.000268109130859375,
      "step": 43927,
      "training_step_time": 0.4014456272125244
    },
    {
      "epoch": 0.000268115234375,
      "model_forward_time": 0.11505842208862305,
      "step": 43928
    },
    {
      "epoch": 0.000268115234375,
      "step": 43928,
      "training_step_time": 0.4809558391571045
    },
    {
      "epoch": 0.000268121337890625,
      "model_forward_time": 0.11538386344909668,
      "step": 43929
    },
    {
      "epoch": 0.000268121337890625,
      "step": 43929,
      "training_step_time": 0.37961673736572266
    },
    {
      "epoch": 0.00026812744140625,
      "grad_norm": 0.1357090175151825,
      "learning_rate": 1.8362955223487143e-05,
      "loss": 0.0351,
      "step": 43930
    },
    {
      "epoch": 0.00026812744140625,
      "model_forward_time": 0.1151115894317627,
      "step": 43930
    },
    {
      "epoch": 0.00026812744140625,
      "step": 43930,
      "training_step_time": 0.3898942470550537
    },
    {
      "epoch": 0.000268133544921875,
      "model_forward_time": 0.11528277397155762,
      "step": 43931
    },
    {
      "epoch": 0.000268133544921875,
      "step": 43931,
      "training_step_time": 0.3846399784088135
    },
    {
      "epoch": 0.0002681396484375,
      "model_forward_time": 0.11504507064819336,
      "step": 43932
    },
    {
      "epoch": 0.0002681396484375,
      "step": 43932,
      "training_step_time": 0.41233038902282715
    },
    {
      "epoch": 0.000268145751953125,
      "model_forward_time": 0.1143958568572998,
      "step": 43933
    },
    {
      "epoch": 0.000268145751953125,
      "step": 43933,
      "training_step_time": 0.3973047733306885
    },
    {
      "epoch": 0.00026815185546875,
      "model_forward_time": 0.1153872013092041,
      "step": 43934
    },
    {
      "epoch": 0.00026815185546875,
      "step": 43934,
      "training_step_time": 0.6576411724090576
    },
    {
      "epoch": 0.000268157958984375,
      "model_forward_time": 0.11449766159057617,
      "step": 43935
    },
    {
      "epoch": 0.000268157958984375,
      "step": 43935,
      "training_step_time": 0.41033244132995605
    },
    {
      "epoch": 0.0002681640625,
      "model_forward_time": 0.11559605598449707,
      "step": 43936
    },
    {
      "epoch": 0.0002681640625,
      "step": 43936,
      "training_step_time": 0.45185232162475586
    },
    {
      "epoch": 0.000268170166015625,
      "model_forward_time": 0.11535072326660156,
      "step": 43937
    },
    {
      "epoch": 0.000268170166015625,
      "step": 43937,
      "training_step_time": 0.4343118667602539
    },
    {
      "epoch": 0.00026817626953125,
      "model_forward_time": 0.11486411094665527,
      "step": 43938
    },
    {
      "epoch": 0.00026817626953125,
      "step": 43938,
      "training_step_time": 0.43473362922668457
    },
    {
      "epoch": 0.000268182373046875,
      "model_forward_time": 0.114959716796875,
      "step": 43939
    },
    {
      "epoch": 0.000268182373046875,
      "step": 43939,
      "training_step_time": 0.458237886428833
    },
    {
      "epoch": 0.0002681884765625,
      "grad_norm": 0.06997696310281754,
      "learning_rate": 1.8341620249602387e-05,
      "loss": 0.0367,
      "step": 43940
    },
    {
      "epoch": 0.0002681884765625,
      "model_forward_time": 0.11490273475646973,
      "step": 43940
    },
    {
      "epoch": 0.0002681884765625,
      "step": 43940,
      "training_step_time": 0.39618802070617676
    },
    {
      "epoch": 0.000268194580078125,
      "model_forward_time": 0.11472463607788086,
      "step": 43941
    },
    {
      "epoch": 0.000268194580078125,
      "step": 43941,
      "training_step_time": 0.39284324645996094
    },
    {
      "epoch": 0.00026820068359375,
      "model_forward_time": 0.11625838279724121,
      "step": 43942
    },
    {
      "epoch": 0.00026820068359375,
      "step": 43942,
      "training_step_time": 0.3891754150390625
    },
    {
      "epoch": 0.000268206787109375,
      "model_forward_time": 0.11525988578796387,
      "step": 43943
    },
    {
      "epoch": 0.000268206787109375,
      "step": 43943,
      "training_step_time": 0.381563663482666
    },
    {
      "epoch": 0.000268212890625,
      "model_forward_time": 0.11530685424804688,
      "step": 43944
    },
    {
      "epoch": 0.000268212890625,
      "step": 43944,
      "training_step_time": 0.39981985092163086
    },
    {
      "epoch": 0.000268218994140625,
      "model_forward_time": 0.11625981330871582,
      "step": 43945
    },
    {
      "epoch": 0.000268218994140625,
      "step": 43945,
      "training_step_time": 0.4066040515899658
    },
    {
      "epoch": 0.00026822509765625,
      "model_forward_time": 0.11672258377075195,
      "step": 43946
    },
    {
      "epoch": 0.00026822509765625,
      "step": 43946,
      "training_step_time": 0.631103515625
    },
    {
      "epoch": 0.000268231201171875,
      "model_forward_time": 0.11526346206665039,
      "step": 43947
    },
    {
      "epoch": 0.000268231201171875,
      "step": 43947,
      "training_step_time": 0.4047412872314453
    },
    {
      "epoch": 0.0002682373046875,
      "model_forward_time": 0.11499643325805664,
      "step": 43948
    },
    {
      "epoch": 0.0002682373046875,
      "step": 43948,
      "training_step_time": 0.40758800506591797
    },
    {
      "epoch": 0.000268243408203125,
      "model_forward_time": 0.11483192443847656,
      "step": 43949
    },
    {
      "epoch": 0.000268243408203125,
      "step": 43949,
      "training_step_time": 0.44760584831237793
    },
    {
      "epoch": 0.00026824951171875,
      "grad_norm": 0.10217972099781036,
      "learning_rate": 1.8320294892697478e-05,
      "loss": 0.0394,
      "step": 43950
    },
    {
      "epoch": 0.00026824951171875,
      "model_forward_time": 0.11479735374450684,
      "step": 43950
    },
    {
      "epoch": 0.00026824951171875,
      "step": 43950,
      "training_step_time": 0.47852420806884766
    },
    {
      "epoch": 0.000268255615234375,
      "model_forward_time": 0.11504578590393066,
      "step": 43951
    },
    {
      "epoch": 0.000268255615234375,
      "step": 43951,
      "training_step_time": 0.4025838375091553
    },
    {
      "epoch": 0.00026826171875,
      "model_forward_time": 0.11525249481201172,
      "step": 43952
    },
    {
      "epoch": 0.00026826171875,
      "step": 43952,
      "training_step_time": 0.5093579292297363
    },
    {
      "epoch": 0.000268267822265625,
      "model_forward_time": 0.11521196365356445,
      "step": 43953
    },
    {
      "epoch": 0.000268267822265625,
      "step": 43953,
      "training_step_time": 0.4668300151824951
    },
    {
      "epoch": 0.00026827392578125,
      "model_forward_time": 0.11569499969482422,
      "step": 43954
    },
    {
      "epoch": 0.00026827392578125,
      "step": 43954,
      "training_step_time": 0.38245558738708496
    },
    {
      "epoch": 0.000268280029296875,
      "model_forward_time": 0.11459493637084961,
      "step": 43955
    },
    {
      "epoch": 0.000268280029296875,
      "step": 43955,
      "training_step_time": 0.3794567584991455
    },
    {
      "epoch": 0.0002682861328125,
      "model_forward_time": 0.11529731750488281,
      "step": 43956
    },
    {
      "epoch": 0.0002682861328125,
      "step": 43956,
      "training_step_time": 0.3909022808074951
    },
    {
      "epoch": 0.000268292236328125,
      "model_forward_time": 0.11488914489746094,
      "step": 43957
    },
    {
      "epoch": 0.000268292236328125,
      "step": 43957,
      "training_step_time": 0.3982830047607422
    },
    {
      "epoch": 0.00026829833984375,
      "model_forward_time": 0.11513090133666992,
      "step": 43958
    },
    {
      "epoch": 0.00026829833984375,
      "step": 43958,
      "training_step_time": 0.3942570686340332
    },
    {
      "epoch": 0.000268304443359375,
      "model_forward_time": 0.11512637138366699,
      "step": 43959
    },
    {
      "epoch": 0.000268304443359375,
      "step": 43959,
      "training_step_time": 0.38631105422973633
    },
    {
      "epoch": 0.000268310546875,
      "grad_norm": 0.12634235620498657,
      "learning_rate": 1.8298979159250557e-05,
      "loss": 0.0343,
      "step": 43960
    },
    {
      "epoch": 0.000268310546875,
      "model_forward_time": 0.11551475524902344,
      "step": 43960
    },
    {
      "epoch": 0.000268310546875,
      "step": 43960,
      "training_step_time": 0.39222168922424316
    },
    {
      "epoch": 0.000268316650390625,
      "model_forward_time": 0.11544966697692871,
      "step": 43961
    },
    {
      "epoch": 0.000268316650390625,
      "step": 43961,
      "training_step_time": 0.4067668914794922
    },
    {
      "epoch": 0.00026832275390625,
      "model_forward_time": 0.11526370048522949,
      "step": 43962
    },
    {
      "epoch": 0.00026832275390625,
      "step": 43962,
      "training_step_time": 0.4216010570526123
    },
    {
      "epoch": 0.000268328857421875,
      "model_forward_time": 0.11504960060119629,
      "step": 43963
    },
    {
      "epoch": 0.000268328857421875,
      "step": 43963,
      "training_step_time": 0.3988792896270752
    },
    {
      "epoch": 0.0002683349609375,
      "model_forward_time": 0.11539530754089355,
      "step": 43964
    },
    {
      "epoch": 0.0002683349609375,
      "step": 43964,
      "training_step_time": 0.5581133365631104
    },
    {
      "epoch": 0.000268341064453125,
      "model_forward_time": 0.1148066520690918,
      "step": 43965
    },
    {
      "epoch": 0.000268341064453125,
      "step": 43965,
      "training_step_time": 0.36650562286376953
    },
    {
      "epoch": 0.00026834716796875,
      "model_forward_time": 0.11480426788330078,
      "step": 43966
    },
    {
      "epoch": 0.00026834716796875,
      "step": 43966,
      "training_step_time": 0.4620957374572754
    },
    {
      "epoch": 0.000268353271484375,
      "model_forward_time": 0.11484503746032715,
      "step": 43967
    },
    {
      "epoch": 0.000268353271484375,
      "step": 43967,
      "training_step_time": 0.4552958011627197
    },
    {
      "epoch": 0.000268359375,
      "model_forward_time": 0.11443972587585449,
      "step": 43968
    },
    {
      "epoch": 0.000268359375,
      "step": 43968,
      "training_step_time": 0.39314842224121094
    },
    {
      "epoch": 0.000268365478515625,
      "model_forward_time": 0.11498594284057617,
      "step": 43969
    },
    {
      "epoch": 0.000268365478515625,
      "step": 43969,
      "training_step_time": 0.396636962890625
    },
    {
      "epoch": 0.00026837158203125,
      "grad_norm": 0.13435502350330353,
      "learning_rate": 1.8277673055736715e-05,
      "loss": 0.0349,
      "step": 43970
    },
    {
      "epoch": 0.00026837158203125,
      "model_forward_time": 0.1157996654510498,
      "step": 43970
    },
    {
      "epoch": 0.00026837158203125,
      "step": 43970,
      "training_step_time": 0.4519634246826172
    },
    {
      "epoch": 0.000268377685546875,
      "model_forward_time": 0.11441469192504883,
      "step": 43971
    },
    {
      "epoch": 0.000268377685546875,
      "step": 43971,
      "training_step_time": 0.4025418758392334
    },
    {
      "epoch": 0.0002683837890625,
      "model_forward_time": 0.11520028114318848,
      "step": 43972
    },
    {
      "epoch": 0.0002683837890625,
      "step": 43972,
      "training_step_time": 0.3900289535522461
    },
    {
      "epoch": 0.000268389892578125,
      "model_forward_time": 0.11509346961975098,
      "step": 43973
    },
    {
      "epoch": 0.000268389892578125,
      "step": 43973,
      "training_step_time": 0.3781111240386963
    },
    {
      "epoch": 0.00026839599609375,
      "model_forward_time": 0.11586880683898926,
      "step": 43974
    },
    {
      "epoch": 0.00026839599609375,
      "step": 43974,
      "training_step_time": 0.4011077880859375
    },
    {
      "epoch": 0.000268402099609375,
      "model_forward_time": 0.11513161659240723,
      "step": 43975
    },
    {
      "epoch": 0.000268402099609375,
      "step": 43975,
      "training_step_time": 0.44878339767456055
    },
    {
      "epoch": 0.000268408203125,
      "model_forward_time": 0.11477804183959961,
      "step": 43976
    },
    {
      "epoch": 0.000268408203125,
      "step": 43976,
      "training_step_time": 0.5509681701660156
    },
    {
      "epoch": 0.000268414306640625,
      "model_forward_time": 0.11487174034118652,
      "step": 43977
    },
    {
      "epoch": 0.000268414306640625,
      "step": 43977,
      "training_step_time": 0.5163638591766357
    },
    {
      "epoch": 0.00026842041015625,
      "model_forward_time": 0.11519169807434082,
      "step": 43978
    },
    {
      "epoch": 0.00026842041015625,
      "step": 43978,
      "training_step_time": 0.4252645969390869
    },
    {
      "epoch": 0.000268426513671875,
      "model_forward_time": 0.11514902114868164,
      "step": 43979
    },
    {
      "epoch": 0.000268426513671875,
      "step": 43979,
      "training_step_time": 0.42799925804138184
    },
    {
      "epoch": 0.0002684326171875,
      "grad_norm": 0.11106665432453156,
      "learning_rate": 1.8256376588628238e-05,
      "loss": 0.0351,
      "step": 43980
    },
    {
      "epoch": 0.0002684326171875,
      "model_forward_time": 0.11467599868774414,
      "step": 43980
    },
    {
      "epoch": 0.0002684326171875,
      "step": 43980,
      "training_step_time": 0.488633394241333
    },
    {
      "epoch": 0.000268438720703125,
      "model_forward_time": 0.11541199684143066,
      "step": 43981
    },
    {
      "epoch": 0.000268438720703125,
      "step": 43981,
      "training_step_time": 0.49482202529907227
    },
    {
      "epoch": 0.00026844482421875,
      "model_forward_time": 0.11424970626831055,
      "step": 43982
    },
    {
      "epoch": 0.00026844482421875,
      "step": 43982,
      "training_step_time": 0.39983057975769043
    },
    {
      "epoch": 0.000268450927734375,
      "model_forward_time": 0.11453795433044434,
      "step": 43983
    },
    {
      "epoch": 0.000268450927734375,
      "step": 43983,
      "training_step_time": 0.3986396789550781
    },
    {
      "epoch": 0.00026845703125,
      "model_forward_time": 0.11543536186218262,
      "step": 43984
    },
    {
      "epoch": 0.00026845703125,
      "step": 43984,
      "training_step_time": 0.3902590274810791
    },
    {
      "epoch": 0.000268463134765625,
      "model_forward_time": 0.11673593521118164,
      "step": 43985
    },
    {
      "epoch": 0.000268463134765625,
      "step": 43985,
      "training_step_time": 0.39493250846862793
    },
    {
      "epoch": 0.00026846923828125,
      "model_forward_time": 0.11496567726135254,
      "step": 43986
    },
    {
      "epoch": 0.00026846923828125,
      "step": 43986,
      "training_step_time": 0.4069690704345703
    },
    {
      "epoch": 0.000268475341796875,
      "model_forward_time": 0.11536526679992676,
      "step": 43987
    },
    {
      "epoch": 0.000268475341796875,
      "step": 43987,
      "training_step_time": 0.4154324531555176
    },
    {
      "epoch": 0.0002684814453125,
      "model_forward_time": 0.11618685722351074,
      "step": 43988
    },
    {
      "epoch": 0.0002684814453125,
      "step": 43988,
      "training_step_time": 0.4405050277709961
    },
    {
      "epoch": 0.000268487548828125,
      "model_forward_time": 0.11496329307556152,
      "step": 43989
    },
    {
      "epoch": 0.000268487548828125,
      "step": 43989,
      "training_step_time": 0.3903965950012207
    },
    {
      "epoch": 0.00026849365234375,
      "grad_norm": 0.09716411679983139,
      "learning_rate": 1.8235089764394408e-05,
      "loss": 0.0383,
      "step": 43990
    },
    {
      "epoch": 0.00026849365234375,
      "model_forward_time": 0.11517214775085449,
      "step": 43990
    },
    {
      "epoch": 0.00026849365234375,
      "step": 43990,
      "training_step_time": 0.39505434036254883
    },
    {
      "epoch": 0.000268499755859375,
      "model_forward_time": 0.11539626121520996,
      "step": 43991
    },
    {
      "epoch": 0.000268499755859375,
      "step": 43991,
      "training_step_time": 0.38738203048706055
    },
    {
      "epoch": 0.000268505859375,
      "model_forward_time": 0.1156468391418457,
      "step": 43992
    },
    {
      "epoch": 0.000268505859375,
      "step": 43992,
      "training_step_time": 0.41263556480407715
    },
    {
      "epoch": 0.000268511962890625,
      "model_forward_time": 0.11510872840881348,
      "step": 43993
    },
    {
      "epoch": 0.000268511962890625,
      "step": 43993,
      "training_step_time": 0.4971926212310791
    },
    {
      "epoch": 0.00026851806640625,
      "model_forward_time": 0.11496853828430176,
      "step": 43994
    },
    {
      "epoch": 0.00026851806640625,
      "step": 43994,
      "training_step_time": 0.3723323345184326
    },
    {
      "epoch": 0.000268524169921875,
      "model_forward_time": 0.11504292488098145,
      "step": 43995
    },
    {
      "epoch": 0.000268524169921875,
      "step": 43995,
      "training_step_time": 0.5501682758331299
    },
    {
      "epoch": 0.0002685302734375,
      "model_forward_time": 0.11463785171508789,
      "step": 43996
    },
    {
      "epoch": 0.0002685302734375,
      "step": 43996,
      "training_step_time": 0.4286472797393799
    },
    {
      "epoch": 0.000268536376953125,
      "model_forward_time": 0.11478471755981445,
      "step": 43997
    },
    {
      "epoch": 0.000268536376953125,
      "step": 43997,
      "training_step_time": 0.38460731506347656
    },
    {
      "epoch": 0.00026854248046875,
      "model_forward_time": 0.11476469039916992,
      "step": 43998
    },
    {
      "epoch": 0.00026854248046875,
      "step": 43998,
      "training_step_time": 0.4104645252227783
    },
    {
      "epoch": 0.000268548583984375,
      "model_forward_time": 0.1150965690612793,
      "step": 43999
    },
    {
      "epoch": 0.000268548583984375,
      "step": 43999,
      "training_step_time": 0.39882469177246094
    },
    {
      "epoch": 0.0002685546875,
      "grad_norm": 0.1255466490983963,
      "learning_rate": 1.821381258950161e-05,
      "loss": 0.0344,
      "step": 44000
    },
    {
      "epoch": 0.0002685546875,
      "model_forward_time": 0.11279034614562988,
      "step": 44000
    },
    {
      "epoch": 0.0002685546875,
      "step": 44000,
      "training_step_time": 0.3524928092956543
    },
    {
      "epoch": 0.000268560791015625,
      "model_forward_time": 0.11269330978393555,
      "step": 44001
    },
    {
      "epoch": 0.000268560791015625,
      "step": 44001,
      "training_step_time": 0.4181253910064697
    },
    {
      "epoch": 0.00026856689453125,
      "model_forward_time": 0.11312198638916016,
      "step": 44002
    },
    {
      "epoch": 0.00026856689453125,
      "step": 44002,
      "training_step_time": 0.42167186737060547
    },
    {
      "epoch": 0.000268572998046875,
      "model_forward_time": 0.1135413646697998,
      "step": 44003
    },
    {
      "epoch": 0.000268572998046875,
      "step": 44003,
      "training_step_time": 0.3779733180999756
    },
    {
      "epoch": 0.0002685791015625,
      "model_forward_time": 0.11387109756469727,
      "step": 44004
    },
    {
      "epoch": 0.0002685791015625,
      "step": 44004,
      "training_step_time": 0.3832416534423828
    },
    {
      "epoch": 0.000268585205078125,
      "model_forward_time": 0.11488580703735352,
      "step": 44005
    },
    {
      "epoch": 0.000268585205078125,
      "step": 44005,
      "training_step_time": 0.38849449157714844
    },
    {
      "epoch": 0.00026859130859375,
      "model_forward_time": 0.11397433280944824,
      "step": 44006
    },
    {
      "epoch": 0.00026859130859375,
      "step": 44006,
      "training_step_time": 0.39269399642944336
    },
    {
      "epoch": 0.000268597412109375,
      "model_forward_time": 0.1147775650024414,
      "step": 44007
    },
    {
      "epoch": 0.000268597412109375,
      "step": 44007,
      "training_step_time": 0.42447996139526367
    },
    {
      "epoch": 0.000268603515625,
      "model_forward_time": 0.11514043807983398,
      "step": 44008
    },
    {
      "epoch": 0.000268603515625,
      "step": 44008,
      "training_step_time": 0.42504239082336426
    },
    {
      "epoch": 0.000268609619140625,
      "model_forward_time": 0.11567521095275879,
      "step": 44009
    },
    {
      "epoch": 0.000268609619140625,
      "step": 44009,
      "training_step_time": 0.39034533500671387
    },
    {
      "epoch": 0.00026861572265625,
      "grad_norm": 0.07705395668745041,
      "learning_rate": 1.8192545070413282e-05,
      "loss": 0.0309,
      "step": 44010
    },
    {
      "epoch": 0.00026861572265625,
      "model_forward_time": 0.1154634952545166,
      "step": 44010
    },
    {
      "epoch": 0.00026861572265625,
      "step": 44010,
      "training_step_time": 0.3943290710449219
    },
    {
      "epoch": 0.000268621826171875,
      "model_forward_time": 0.11467480659484863,
      "step": 44011
    },
    {
      "epoch": 0.000268621826171875,
      "step": 44011,
      "training_step_time": 0.4595777988433838
    },
    {
      "epoch": 0.0002686279296875,
      "model_forward_time": 0.11536240577697754,
      "step": 44012
    },
    {
      "epoch": 0.0002686279296875,
      "step": 44012,
      "training_step_time": 0.4225461483001709
    },
    {
      "epoch": 0.000268634033203125,
      "model_forward_time": 0.11498641967773438,
      "step": 44013
    },
    {
      "epoch": 0.000268634033203125,
      "step": 44013,
      "training_step_time": 0.39837074279785156
    },
    {
      "epoch": 0.00026864013671875,
      "model_forward_time": 0.11497044563293457,
      "step": 44014
    },
    {
      "epoch": 0.00026864013671875,
      "step": 44014,
      "training_step_time": 0.388805627822876
    },
    {
      "epoch": 0.000268646240234375,
      "model_forward_time": 0.11551403999328613,
      "step": 44015
    },
    {
      "epoch": 0.000268646240234375,
      "step": 44015,
      "training_step_time": 0.42479872703552246
    },
    {
      "epoch": 0.00026865234375,
      "model_forward_time": 0.11520528793334961,
      "step": 44016
    },
    {
      "epoch": 0.00026865234375,
      "step": 44016,
      "training_step_time": 0.4082362651824951
    },
    {
      "epoch": 0.000268658447265625,
      "model_forward_time": 0.11545705795288086,
      "step": 44017
    },
    {
      "epoch": 0.000268658447265625,
      "step": 44017,
      "training_step_time": 0.3939790725708008
    },
    {
      "epoch": 0.00026866455078125,
      "model_forward_time": 0.1153097152709961,
      "step": 44018
    },
    {
      "epoch": 0.00026866455078125,
      "step": 44018,
      "training_step_time": 0.3910799026489258
    },
    {
      "epoch": 0.000268670654296875,
      "model_forward_time": 0.11464452743530273,
      "step": 44019
    },
    {
      "epoch": 0.000268670654296875,
      "step": 44019,
      "training_step_time": 0.39269399642944336
    },
    {
      "epoch": 0.0002686767578125,
      "grad_norm": 0.12112849205732346,
      "learning_rate": 1.817128721358991e-05,
      "loss": 0.0389,
      "step": 44020
    },
    {
      "epoch": 0.0002686767578125,
      "model_forward_time": 0.1147012710571289,
      "step": 44020
    },
    {
      "epoch": 0.0002686767578125,
      "step": 44020,
      "training_step_time": 0.3918497562408447
    },
    {
      "epoch": 0.000268682861328125,
      "model_forward_time": 0.11556816101074219,
      "step": 44021
    },
    {
      "epoch": 0.000268682861328125,
      "step": 44021,
      "training_step_time": 0.4195101261138916
    },
    {
      "epoch": 0.00026868896484375,
      "model_forward_time": 0.11555099487304688,
      "step": 44022
    },
    {
      "epoch": 0.00026868896484375,
      "step": 44022,
      "training_step_time": 0.44087767601013184
    },
    {
      "epoch": 0.000268695068359375,
      "model_forward_time": 0.1159825325012207,
      "step": 44023
    },
    {
      "epoch": 0.000268695068359375,
      "step": 44023,
      "training_step_time": 0.49235033988952637
    },
    {
      "epoch": 0.000268701171875,
      "model_forward_time": 0.11499881744384766,
      "step": 44024
    },
    {
      "epoch": 0.000268701171875,
      "step": 44024,
      "training_step_time": 0.4469475746154785
    },
    {
      "epoch": 0.000268707275390625,
      "model_forward_time": 0.1154472827911377,
      "step": 44025
    },
    {
      "epoch": 0.000268707275390625,
      "step": 44025,
      "training_step_time": 0.47702836990356445
    },
    {
      "epoch": 0.00026871337890625,
      "model_forward_time": 0.11506867408752441,
      "step": 44026
    },
    {
      "epoch": 0.00026871337890625,
      "step": 44026,
      "training_step_time": 0.49429893493652344
    },
    {
      "epoch": 0.000268719482421875,
      "model_forward_time": 0.11584329605102539,
      "step": 44027
    },
    {
      "epoch": 0.000268719482421875,
      "step": 44027,
      "training_step_time": 0.37857866287231445
    },
    {
      "epoch": 0.0002687255859375,
      "model_forward_time": 0.1147928237915039,
      "step": 44028
    },
    {
      "epoch": 0.0002687255859375,
      "step": 44028,
      "training_step_time": 0.39760494232177734
    },
    {
      "epoch": 0.000268731689453125,
      "model_forward_time": 0.1159522533416748,
      "step": 44029
    },
    {
      "epoch": 0.000268731689453125,
      "step": 44029,
      "training_step_time": 0.407088041305542
    },
    {
      "epoch": 0.00026873779296875,
      "grad_norm": 0.09111790359020233,
      "learning_rate": 1.8150039025489113e-05,
      "loss": 0.0354,
      "step": 44030
    },
    {
      "epoch": 0.00026873779296875,
      "model_forward_time": 0.11458253860473633,
      "step": 44030
    },
    {
      "epoch": 0.00026873779296875,
      "step": 44030,
      "training_step_time": 0.4205646514892578
    },
    {
      "epoch": 0.000268743896484375,
      "model_forward_time": 0.11515498161315918,
      "step": 44031
    },
    {
      "epoch": 0.000268743896484375,
      "step": 44031,
      "training_step_time": 0.39045000076293945
    },
    {
      "epoch": 0.00026875,
      "model_forward_time": 0.11529874801635742,
      "step": 44032
    },
    {
      "epoch": 0.00026875,
      "step": 44032,
      "training_step_time": 0.3861067295074463
    },
    {
      "epoch": 0.000268756103515625,
      "model_forward_time": 0.11589646339416504,
      "step": 44033
    },
    {
      "epoch": 0.000268756103515625,
      "step": 44033,
      "training_step_time": 0.4019601345062256
    },
    {
      "epoch": 0.00026876220703125,
      "model_forward_time": 0.11498522758483887,
      "step": 44034
    },
    {
      "epoch": 0.00026876220703125,
      "step": 44034,
      "training_step_time": 0.40694355964660645
    },
    {
      "epoch": 0.000268768310546875,
      "model_forward_time": 0.11510539054870605,
      "step": 44035
    },
    {
      "epoch": 0.000268768310546875,
      "step": 44035,
      "training_step_time": 0.426072359085083
    },
    {
      "epoch": 0.0002687744140625,
      "model_forward_time": 0.11505770683288574,
      "step": 44036
    },
    {
      "epoch": 0.0002687744140625,
      "step": 44036,
      "training_step_time": 0.3927795886993408
    },
    {
      "epoch": 0.000268780517578125,
      "model_forward_time": 0.11466813087463379,
      "step": 44037
    },
    {
      "epoch": 0.000268780517578125,
      "step": 44037,
      "training_step_time": 0.441788911819458
    },
    {
      "epoch": 0.00026878662109375,
      "model_forward_time": 0.11507391929626465,
      "step": 44038
    },
    {
      "epoch": 0.00026878662109375,
      "step": 44038,
      "training_step_time": 0.4135596752166748
    },
    {
      "epoch": 0.000268792724609375,
      "model_forward_time": 0.11478590965270996,
      "step": 44039
    },
    {
      "epoch": 0.000268792724609375,
      "step": 44039,
      "training_step_time": 0.412564754486084
    },
    {
      "epoch": 0.000268798828125,
      "grad_norm": 0.12106920033693314,
      "learning_rate": 1.8128800512565513e-05,
      "loss": 0.0385,
      "step": 44040
    },
    {
      "epoch": 0.000268798828125,
      "model_forward_time": 0.1149449348449707,
      "step": 44040
    },
    {
      "epoch": 0.000268798828125,
      "step": 44040,
      "training_step_time": 0.4919397830963135
    },
    {
      "epoch": 0.000268804931640625,
      "model_forward_time": 0.11475849151611328,
      "step": 44041
    },
    {
      "epoch": 0.000268804931640625,
      "step": 44041,
      "training_step_time": 0.4518611431121826
    },
    {
      "epoch": 0.00026881103515625,
      "model_forward_time": 0.11507058143615723,
      "step": 44042
    },
    {
      "epoch": 0.00026881103515625,
      "step": 44042,
      "training_step_time": 0.4087512493133545
    },
    {
      "epoch": 0.000268817138671875,
      "model_forward_time": 0.11569690704345703,
      "step": 44043
    },
    {
      "epoch": 0.000268817138671875,
      "step": 44043,
      "training_step_time": 0.4327085018157959
    },
    {
      "epoch": 0.0002688232421875,
      "model_forward_time": 0.11501622200012207,
      "step": 44044
    },
    {
      "epoch": 0.0002688232421875,
      "step": 44044,
      "training_step_time": 0.3874332904815674
    },
    {
      "epoch": 0.000268829345703125,
      "model_forward_time": 0.11629843711853027,
      "step": 44045
    },
    {
      "epoch": 0.000268829345703125,
      "step": 44045,
      "training_step_time": 0.3817920684814453
    },
    {
      "epoch": 0.00026883544921875,
      "model_forward_time": 0.11519336700439453,
      "step": 44046
    },
    {
      "epoch": 0.00026883544921875,
      "step": 44046,
      "training_step_time": 0.3956947326660156
    },
    {
      "epoch": 0.000268841552734375,
      "model_forward_time": 0.11536192893981934,
      "step": 44047
    },
    {
      "epoch": 0.000268841552734375,
      "step": 44047,
      "training_step_time": 0.3971517086029053
    },
    {
      "epoch": 0.00026884765625,
      "model_forward_time": 0.1156301498413086,
      "step": 44048
    },
    {
      "epoch": 0.00026884765625,
      "step": 44048,
      "training_step_time": 0.3939650058746338
    },
    {
      "epoch": 0.000268853759765625,
      "model_forward_time": 0.11495208740234375,
      "step": 44049
    },
    {
      "epoch": 0.000268853759765625,
      "step": 44049,
      "training_step_time": 0.3888366222381592
    },
    {
      "epoch": 0.00026885986328125,
      "grad_norm": 0.12573495507240295,
      "learning_rate": 1.810757168127081e-05,
      "loss": 0.0365,
      "step": 44050
    },
    {
      "epoch": 0.00026885986328125,
      "model_forward_time": 0.11550235748291016,
      "step": 44050
    },
    {
      "epoch": 0.00026885986328125,
      "step": 44050,
      "training_step_time": 0.49172091484069824
    },
    {
      "epoch": 0.000268865966796875,
      "model_forward_time": 0.11702775955200195,
      "step": 44051
    },
    {
      "epoch": 0.000268865966796875,
      "step": 44051,
      "training_step_time": 0.46979832649230957
    },
    {
      "epoch": 0.0002688720703125,
      "model_forward_time": 0.11935234069824219,
      "step": 44052
    },
    {
      "epoch": 0.0002688720703125,
      "step": 44052,
      "training_step_time": 0.6126315593719482
    },
    {
      "epoch": 0.000268878173828125,
      "model_forward_time": 0.12552499771118164,
      "step": 44053
    },
    {
      "epoch": 0.000268878173828125,
      "step": 44053,
      "training_step_time": 0.664379358291626
    },
    {
      "epoch": 0.00026888427734375,
      "model_forward_time": 0.11677980422973633,
      "step": 44054
    },
    {
      "epoch": 0.00026888427734375,
      "step": 44054,
      "training_step_time": 0.7531044483184814
    },
    {
      "epoch": 0.000268890380859375,
      "model_forward_time": 0.11865091323852539,
      "step": 44055
    },
    {
      "epoch": 0.000268890380859375,
      "step": 44055,
      "training_step_time": 0.6553173065185547
    },
    {
      "epoch": 0.000268896484375,
      "model_forward_time": 0.11764407157897949,
      "step": 44056
    },
    {
      "epoch": 0.000268896484375,
      "step": 44056,
      "training_step_time": 0.6888887882232666
    },
    {
      "epoch": 0.000268902587890625,
      "model_forward_time": 0.12605929374694824,
      "step": 44057
    },
    {
      "epoch": 0.000268902587890625,
      "step": 44057,
      "training_step_time": 0.672278881072998
    },
    {
      "epoch": 0.00026890869140625,
      "model_forward_time": 0.12456154823303223,
      "step": 44058
    },
    {
      "epoch": 0.00026890869140625,
      "step": 44058,
      "training_step_time": 0.6893997192382812
    },
    {
      "epoch": 0.000268914794921875,
      "model_forward_time": 0.12485003471374512,
      "step": 44059
    },
    {
      "epoch": 0.000268914794921875,
      "step": 44059,
      "training_step_time": 0.6730358600616455
    },
    {
      "epoch": 0.0002689208984375,
      "grad_norm": 0.09065650403499603,
      "learning_rate": 1.808635253805376e-05,
      "loss": 0.0353,
      "step": 44060
    },
    {
      "epoch": 0.0002689208984375,
      "model_forward_time": 0.12037897109985352,
      "step": 44060
    },
    {
      "epoch": 0.0002689208984375,
      "step": 44060,
      "training_step_time": 0.7001268863677979
    },
    {
      "epoch": 0.000268927001953125,
      "model_forward_time": 0.11867475509643555,
      "step": 44061
    },
    {
      "epoch": 0.000268927001953125,
      "step": 44061,
      "training_step_time": 0.6876270771026611
    },
    {
      "epoch": 0.00026893310546875,
      "model_forward_time": 0.11574769020080566,
      "step": 44062
    },
    {
      "epoch": 0.00026893310546875,
      "step": 44062,
      "training_step_time": 0.6895051002502441
    },
    {
      "epoch": 0.000268939208984375,
      "model_forward_time": 0.13013172149658203,
      "step": 44063
    },
    {
      "epoch": 0.000268939208984375,
      "step": 44063,
      "training_step_time": 0.7079823017120361
    },
    {
      "epoch": 0.0002689453125,
      "model_forward_time": 0.12356185913085938,
      "step": 44064
    },
    {
      "epoch": 0.0002689453125,
      "step": 44064,
      "training_step_time": 0.6657609939575195
    },
    {
      "epoch": 0.000268951416015625,
      "model_forward_time": 0.11656785011291504,
      "step": 44065
    },
    {
      "epoch": 0.000268951416015625,
      "step": 44065,
      "training_step_time": 0.5894410610198975
    },
    {
      "epoch": 0.00026895751953125,
      "model_forward_time": 0.13098978996276855,
      "step": 44066
    },
    {
      "epoch": 0.00026895751953125,
      "step": 44066,
      "training_step_time": 0.6811094284057617
    },
    {
      "epoch": 0.000268963623046875,
      "model_forward_time": 0.12874102592468262,
      "step": 44067
    },
    {
      "epoch": 0.000268963623046875,
      "step": 44067,
      "training_step_time": 0.6499676704406738
    },
    {
      "epoch": 0.0002689697265625,
      "model_forward_time": 0.11735010147094727,
      "step": 44068
    },
    {
      "epoch": 0.0002689697265625,
      "step": 44068,
      "training_step_time": 0.6764211654663086
    },
    {
      "epoch": 0.000268975830078125,
      "model_forward_time": 0.1164083480834961,
      "step": 44069
    },
    {
      "epoch": 0.000268975830078125,
      "step": 44069,
      "training_step_time": 0.7010786533355713
    },
    {
      "epoch": 0.00026898193359375,
      "grad_norm": 0.10294836759567261,
      "learning_rate": 1.8065143089360172e-05,
      "loss": 0.0408,
      "step": 44070
    },
    {
      "epoch": 0.00026898193359375,
      "model_forward_time": 0.12081789970397949,
      "step": 44070
    },
    {
      "epoch": 0.00026898193359375,
      "step": 44070,
      "training_step_time": 0.7364194393157959
    },
    {
      "epoch": 0.000268988037109375,
      "model_forward_time": 0.12292671203613281,
      "step": 44071
    },
    {
      "epoch": 0.000268988037109375,
      "step": 44071,
      "training_step_time": 0.6526975631713867
    },
    {
      "epoch": 0.000268994140625,
      "model_forward_time": 0.12369585037231445,
      "step": 44072
    },
    {
      "epoch": 0.000268994140625,
      "step": 44072,
      "training_step_time": 0.7015254497528076
    },
    {
      "epoch": 0.000269000244140625,
      "model_forward_time": 0.12795615196228027,
      "step": 44073
    },
    {
      "epoch": 0.000269000244140625,
      "step": 44073,
      "training_step_time": 0.7198164463043213
    },
    {
      "epoch": 0.00026900634765625,
      "model_forward_time": 0.12032485008239746,
      "step": 44074
    },
    {
      "epoch": 0.00026900634765625,
      "step": 44074,
      "training_step_time": 0.6159272193908691
    },
    {
      "epoch": 0.000269012451171875,
      "model_forward_time": 0.12226486206054688,
      "step": 44075
    },
    {
      "epoch": 0.000269012451171875,
      "step": 44075,
      "training_step_time": 0.67189621925354
    },
    {
      "epoch": 0.0002690185546875,
      "model_forward_time": 0.1196589469909668,
      "step": 44076
    },
    {
      "epoch": 0.0002690185546875,
      "step": 44076,
      "training_step_time": 0.6655926704406738
    },
    {
      "epoch": 0.000269024658203125,
      "model_forward_time": 0.12008881568908691,
      "step": 44077
    },
    {
      "epoch": 0.000269024658203125,
      "step": 44077,
      "training_step_time": 0.7011728286743164
    },
    {
      "epoch": 0.00026903076171875,
      "model_forward_time": 0.12346100807189941,
      "step": 44078
    },
    {
      "epoch": 0.00026903076171875,
      "step": 44078,
      "training_step_time": 0.6625411510467529
    },
    {
      "epoch": 0.000269036865234375,
      "model_forward_time": 0.12172889709472656,
      "step": 44079
    },
    {
      "epoch": 0.000269036865234375,
      "step": 44079,
      "training_step_time": 0.7576577663421631
    },
    {
      "epoch": 0.00026904296875,
      "grad_norm": 0.14445684850215912,
      "learning_rate": 1.8043943341632907e-05,
      "loss": 0.0402,
      "step": 44080
    },
    {
      "epoch": 0.00026904296875,
      "model_forward_time": 0.12103676795959473,
      "step": 44080
    },
    {
      "epoch": 0.00026904296875,
      "step": 44080,
      "training_step_time": 0.7594404220581055
    },
    {
      "epoch": 0.000269049072265625,
      "model_forward_time": 0.11623454093933105,
      "step": 44081
    },
    {
      "epoch": 0.000269049072265625,
      "step": 44081,
      "training_step_time": 0.568427324295044
    },
    {
      "epoch": 0.00026905517578125,
      "model_forward_time": 0.11841320991516113,
      "step": 44082
    },
    {
      "epoch": 0.00026905517578125,
      "step": 44082,
      "training_step_time": 0.7385311126708984
    },
    {
      "epoch": 0.000269061279296875,
      "model_forward_time": 0.11714673042297363,
      "step": 44083
    },
    {
      "epoch": 0.000269061279296875,
      "step": 44083,
      "training_step_time": 0.6637051105499268
    },
    {
      "epoch": 0.0002690673828125,
      "model_forward_time": 0.12707757949829102,
      "step": 44084
    },
    {
      "epoch": 0.0002690673828125,
      "step": 44084,
      "training_step_time": 0.6764521598815918
    },
    {
      "epoch": 0.000269073486328125,
      "model_forward_time": 0.13001680374145508,
      "step": 44085
    },
    {
      "epoch": 0.000269073486328125,
      "step": 44085,
      "training_step_time": 0.686394214630127
    },
    {
      "epoch": 0.00026907958984375,
      "model_forward_time": 0.12114715576171875,
      "step": 44086
    },
    {
      "epoch": 0.00026907958984375,
      "step": 44086,
      "training_step_time": 0.6076269149780273
    },
    {
      "epoch": 0.000269085693359375,
      "model_forward_time": 0.12411689758300781,
      "step": 44087
    },
    {
      "epoch": 0.000269085693359375,
      "step": 44087,
      "training_step_time": 0.7227272987365723
    },
    {
      "epoch": 0.000269091796875,
      "model_forward_time": 0.13222098350524902,
      "step": 44088
    },
    {
      "epoch": 0.000269091796875,
      "step": 44088,
      "training_step_time": 0.6916224956512451
    },
    {
      "epoch": 0.000269097900390625,
      "model_forward_time": 0.11900043487548828,
      "step": 44089
    },
    {
      "epoch": 0.000269097900390625,
      "step": 44089,
      "training_step_time": 0.665428638458252
    },
    {
      "epoch": 0.00026910400390625,
      "grad_norm": 0.11837557703256607,
      "learning_rate": 1.8022753301311935e-05,
      "loss": 0.0417,
      "step": 44090
    },
    {
      "epoch": 0.00026910400390625,
      "model_forward_time": 0.11997604370117188,
      "step": 44090
    },
    {
      "epoch": 0.00026910400390625,
      "step": 44090,
      "training_step_time": 0.7538297176361084
    },
    {
      "epoch": 0.000269110107421875,
      "model_forward_time": 0.12063026428222656,
      "step": 44091
    },
    {
      "epoch": 0.000269110107421875,
      "step": 44091,
      "training_step_time": 0.6987247467041016
    },
    {
      "epoch": 0.0002691162109375,
      "model_forward_time": 0.11644673347473145,
      "step": 44092
    },
    {
      "epoch": 0.0002691162109375,
      "step": 44092,
      "training_step_time": 0.65281081199646
    },
    {
      "epoch": 0.000269122314453125,
      "model_forward_time": 0.11842131614685059,
      "step": 44093
    },
    {
      "epoch": 0.000269122314453125,
      "step": 44093,
      "training_step_time": 0.6646993160247803
    },
    {
      "epoch": 0.00026912841796875,
      "model_forward_time": 0.1408231258392334,
      "step": 44094
    },
    {
      "epoch": 0.00026912841796875,
      "step": 44094,
      "training_step_time": 0.6294291019439697
    },
    {
      "epoch": 0.000269134521484375,
      "model_forward_time": 0.12221002578735352,
      "step": 44095
    },
    {
      "epoch": 0.000269134521484375,
      "step": 44095,
      "training_step_time": 0.7015109062194824
    },
    {
      "epoch": 0.000269140625,
      "model_forward_time": 0.11771893501281738,
      "step": 44096
    },
    {
      "epoch": 0.000269140625,
      "step": 44096,
      "training_step_time": 0.6487760543823242
    },
    {
      "epoch": 0.000269146728515625,
      "model_forward_time": 0.11777138710021973,
      "step": 44097
    },
    {
      "epoch": 0.000269146728515625,
      "step": 44097,
      "training_step_time": 0.7320277690887451
    },
    {
      "epoch": 0.00026915283203125,
      "model_forward_time": 0.11754608154296875,
      "step": 44098
    },
    {
      "epoch": 0.00026915283203125,
      "step": 44098,
      "training_step_time": 0.7111506462097168
    },
    {
      "epoch": 0.000269158935546875,
      "model_forward_time": 0.12121081352233887,
      "step": 44099
    },
    {
      "epoch": 0.000269158935546875,
      "step": 44099,
      "training_step_time": 0.7305445671081543
    },
    {
      "epoch": 0.0002691650390625,
      "grad_norm": 0.15212400257587433,
      "learning_rate": 1.800157297483417e-05,
      "loss": 0.0412,
      "step": 44100
    },
    {
      "epoch": 0.0002691650390625,
      "model_forward_time": 0.11771416664123535,
      "step": 44100
    },
    {
      "epoch": 0.0002691650390625,
      "step": 44100,
      "training_step_time": 0.7461061477661133
    },
    {
      "epoch": 0.000269171142578125,
      "model_forward_time": 0.11903786659240723,
      "step": 44101
    },
    {
      "epoch": 0.000269171142578125,
      "step": 44101,
      "training_step_time": 0.7194194793701172
    },
    {
      "epoch": 0.00026917724609375,
      "model_forward_time": 0.11852216720581055,
      "step": 44102
    },
    {
      "epoch": 0.00026917724609375,
      "step": 44102,
      "training_step_time": 0.6756949424743652
    },
    {
      "epoch": 0.000269183349609375,
      "model_forward_time": 0.12390398979187012,
      "step": 44103
    },
    {
      "epoch": 0.000269183349609375,
      "step": 44103,
      "training_step_time": 0.7531929016113281
    },
    {
      "epoch": 0.000269189453125,
      "model_forward_time": 0.11955475807189941,
      "step": 44104
    },
    {
      "epoch": 0.000269189453125,
      "step": 44104,
      "training_step_time": 0.7278833389282227
    },
    {
      "epoch": 0.000269195556640625,
      "model_forward_time": 0.12639117240905762,
      "step": 44105
    },
    {
      "epoch": 0.000269195556640625,
      "step": 44105,
      "training_step_time": 0.6717486381530762
    },
    {
      "epoch": 0.00026920166015625,
      "model_forward_time": 0.11963796615600586,
      "step": 44106
    },
    {
      "epoch": 0.00026920166015625,
      "step": 44106,
      "training_step_time": 0.7252576351165771
    },
    {
      "epoch": 0.000269207763671875,
      "model_forward_time": 0.12262606620788574,
      "step": 44107
    },
    {
      "epoch": 0.000269207763671875,
      "step": 44107,
      "training_step_time": 0.6963598728179932
    },
    {
      "epoch": 0.0002692138671875,
      "model_forward_time": 0.1175844669342041,
      "step": 44108
    },
    {
      "epoch": 0.0002692138671875,
      "step": 44108,
      "training_step_time": 0.6307694911956787
    },
    {
      "epoch": 0.000269219970703125,
      "model_forward_time": 0.11991643905639648,
      "step": 44109
    },
    {
      "epoch": 0.000269219970703125,
      "step": 44109,
      "training_step_time": 0.5750837326049805
    },
    {
      "epoch": 0.00026922607421875,
      "grad_norm": 0.10962317883968353,
      "learning_rate": 1.7980402368633703e-05,
      "loss": 0.0364,
      "step": 44110
    },
    {
      "epoch": 0.00026922607421875,
      "model_forward_time": 0.12697577476501465,
      "step": 44110
    },
    {
      "epoch": 0.00026922607421875,
      "step": 44110,
      "training_step_time": 0.7310218811035156
    },
    {
      "epoch": 0.000269232177734375,
      "model_forward_time": 0.11778926849365234,
      "step": 44111
    },
    {
      "epoch": 0.000269232177734375,
      "step": 44111,
      "training_step_time": 0.6797847747802734
    },
    {
      "epoch": 0.00026923828125,
      "model_forward_time": 0.12163329124450684,
      "step": 44112
    },
    {
      "epoch": 0.00026923828125,
      "step": 44112,
      "training_step_time": 0.6838111877441406
    },
    {
      "epoch": 0.000269244384765625,
      "model_forward_time": 0.1189723014831543,
      "step": 44113
    },
    {
      "epoch": 0.000269244384765625,
      "step": 44113,
      "training_step_time": 0.6353869438171387
    },
    {
      "epoch": 0.00026925048828125,
      "model_forward_time": 0.12368106842041016,
      "step": 44114
    },
    {
      "epoch": 0.00026925048828125,
      "step": 44114,
      "training_step_time": 0.6395807266235352
    },
    {
      "epoch": 0.000269256591796875,
      "model_forward_time": 0.12307858467102051,
      "step": 44115
    },
    {
      "epoch": 0.000269256591796875,
      "step": 44115,
      "training_step_time": 0.5982911586761475
    },
    {
      "epoch": 0.0002692626953125,
      "model_forward_time": 0.12085938453674316,
      "step": 44116
    },
    {
      "epoch": 0.0002692626953125,
      "step": 44116,
      "training_step_time": 0.6618072986602783
    },
    {
      "epoch": 0.000269268798828125,
      "model_forward_time": 0.1219334602355957,
      "step": 44117
    },
    {
      "epoch": 0.000269268798828125,
      "step": 44117,
      "training_step_time": 0.6216182708740234
    },
    {
      "epoch": 0.00026927490234375,
      "model_forward_time": 0.1179049015045166,
      "step": 44118
    },
    {
      "epoch": 0.00026927490234375,
      "step": 44118,
      "training_step_time": 0.7117996215820312
    },
    {
      "epoch": 0.000269281005859375,
      "model_forward_time": 0.11946368217468262,
      "step": 44119
    },
    {
      "epoch": 0.000269281005859375,
      "step": 44119,
      "training_step_time": 0.5804431438446045
    },
    {
      "epoch": 0.000269287109375,
      "grad_norm": 0.07903735339641571,
      "learning_rate": 1.7959241489141525e-05,
      "loss": 0.0363,
      "step": 44120
    },
    {
      "epoch": 0.000269287109375,
      "model_forward_time": 0.11837100982666016,
      "step": 44120
    },
    {
      "epoch": 0.000269287109375,
      "step": 44120,
      "training_step_time": 0.5637519359588623
    },
    {
      "epoch": 0.000269293212890625,
      "model_forward_time": 0.11793732643127441,
      "step": 44121
    },
    {
      "epoch": 0.000269293212890625,
      "step": 44121,
      "training_step_time": 0.4946022033691406
    },
    {
      "epoch": 0.00026929931640625,
      "model_forward_time": 0.11870837211608887,
      "step": 44122
    },
    {
      "epoch": 0.00026929931640625,
      "step": 44122,
      "training_step_time": 0.4868128299713135
    },
    {
      "epoch": 0.000269305419921875,
      "model_forward_time": 0.11843323707580566,
      "step": 44123
    },
    {
      "epoch": 0.000269305419921875,
      "step": 44123,
      "training_step_time": 0.4336998462677002
    },
    {
      "epoch": 0.0002693115234375,
      "model_forward_time": 0.11677742004394531,
      "step": 44124
    },
    {
      "epoch": 0.0002693115234375,
      "step": 44124,
      "training_step_time": 0.4308140277862549
    },
    {
      "epoch": 0.000269317626953125,
      "model_forward_time": 0.1163942813873291,
      "step": 44125
    },
    {
      "epoch": 0.000269317626953125,
      "step": 44125,
      "training_step_time": 0.42105722427368164
    },
    {
      "epoch": 0.00026932373046875,
      "model_forward_time": 0.11600470542907715,
      "step": 44126
    },
    {
      "epoch": 0.00026932373046875,
      "step": 44126,
      "training_step_time": 0.40042567253112793
    },
    {
      "epoch": 0.000269329833984375,
      "model_forward_time": 0.1158297061920166,
      "step": 44127
    },
    {
      "epoch": 0.000269329833984375,
      "step": 44127,
      "training_step_time": 0.48792004585266113
    },
    {
      "epoch": 0.0002693359375,
      "model_forward_time": 0.1155705451965332,
      "step": 44128
    },
    {
      "epoch": 0.0002693359375,
      "step": 44128,
      "training_step_time": 0.41136741638183594
    },
    {
      "epoch": 0.000269342041015625,
      "model_forward_time": 0.11517047882080078,
      "step": 44129
    },
    {
      "epoch": 0.000269342041015625,
      "step": 44129,
      "training_step_time": 0.5348203182220459
    },
    {
      "epoch": 0.00026934814453125,
      "grad_norm": 0.09129849076271057,
      "learning_rate": 1.7938090342785817e-05,
      "loss": 0.04,
      "step": 44130
    },
    {
      "epoch": 0.00026934814453125,
      "model_forward_time": 0.11501264572143555,
      "step": 44130
    },
    {
      "epoch": 0.00026934814453125,
      "step": 44130,
      "training_step_time": 0.4006533622741699
    },
    {
      "epoch": 0.000269354248046875,
      "model_forward_time": 0.1152658462524414,
      "step": 44131
    },
    {
      "epoch": 0.000269354248046875,
      "step": 44131,
      "training_step_time": 0.4056992530822754
    },
    {
      "epoch": 0.0002693603515625,
      "model_forward_time": 0.11499261856079102,
      "step": 44132
    },
    {
      "epoch": 0.0002693603515625,
      "step": 44132,
      "training_step_time": 0.4200010299682617
    },
    {
      "epoch": 0.000269366455078125,
      "model_forward_time": 0.11477017402648926,
      "step": 44133
    },
    {
      "epoch": 0.000269366455078125,
      "step": 44133,
      "training_step_time": 0.40869736671447754
    },
    {
      "epoch": 0.00026937255859375,
      "model_forward_time": 0.11637449264526367,
      "step": 44134
    },
    {
      "epoch": 0.00026937255859375,
      "step": 44134,
      "training_step_time": 0.38700413703918457
    },
    {
      "epoch": 0.000269378662109375,
      "model_forward_time": 0.11484861373901367,
      "step": 44135
    },
    {
      "epoch": 0.000269378662109375,
      "step": 44135,
      "training_step_time": 0.511770486831665
    },
    {
      "epoch": 0.000269384765625,
      "model_forward_time": 0.11555147171020508,
      "step": 44136
    },
    {
      "epoch": 0.000269384765625,
      "step": 44136,
      "training_step_time": 0.4154672622680664
    },
    {
      "epoch": 0.000269390869140625,
      "model_forward_time": 0.11534667015075684,
      "step": 44137
    },
    {
      "epoch": 0.000269390869140625,
      "step": 44137,
      "training_step_time": 0.3978095054626465
    },
    {
      "epoch": 0.00026939697265625,
      "model_forward_time": 0.11512565612792969,
      "step": 44138
    },
    {
      "epoch": 0.00026939697265625,
      "step": 44138,
      "training_step_time": 0.40232348442077637
    },
    {
      "epoch": 0.000269403076171875,
      "model_forward_time": 0.11512875556945801,
      "step": 44139
    },
    {
      "epoch": 0.000269403076171875,
      "step": 44139,
      "training_step_time": 0.38565516471862793
    },
    {
      "epoch": 0.0002694091796875,
      "grad_norm": 0.07478811591863632,
      "learning_rate": 1.7916948935991718e-05,
      "loss": 0.0373,
      "step": 44140
    },
    {
      "epoch": 0.0002694091796875,
      "model_forward_time": 0.11531519889831543,
      "step": 44140
    },
    {
      "epoch": 0.0002694091796875,
      "step": 44140,
      "training_step_time": 0.406278133392334
    },
    {
      "epoch": 0.000269415283203125,
      "model_forward_time": 0.11499714851379395,
      "step": 44141
    },
    {
      "epoch": 0.000269415283203125,
      "step": 44141,
      "training_step_time": 0.4405834674835205
    },
    {
      "epoch": 0.00026942138671875,
      "model_forward_time": 0.11505627632141113,
      "step": 44142
    },
    {
      "epoch": 0.00026942138671875,
      "step": 44142,
      "training_step_time": 0.428957462310791
    },
    {
      "epoch": 0.000269427490234375,
      "model_forward_time": 0.11534523963928223,
      "step": 44143
    },
    {
      "epoch": 0.000269427490234375,
      "step": 44143,
      "training_step_time": 0.4991617202758789
    },
    {
      "epoch": 0.00026943359375,
      "model_forward_time": 0.11522197723388672,
      "step": 44144
    },
    {
      "epoch": 0.00026943359375,
      "step": 44144,
      "training_step_time": 0.3993189334869385
    },
    {
      "epoch": 0.000269439697265625,
      "model_forward_time": 0.11517953872680664,
      "step": 44145
    },
    {
      "epoch": 0.000269439697265625,
      "step": 44145,
      "training_step_time": 0.4030003547668457
    },
    {
      "epoch": 0.00026944580078125,
      "model_forward_time": 0.11484742164611816,
      "step": 44146
    },
    {
      "epoch": 0.00026944580078125,
      "step": 44146,
      "training_step_time": 0.4210796356201172
    },
    {
      "epoch": 0.000269451904296875,
      "model_forward_time": 0.11548566818237305,
      "step": 44147
    },
    {
      "epoch": 0.000269451904296875,
      "step": 44147,
      "training_step_time": 0.7377870082855225
    },
    {
      "epoch": 0.0002694580078125,
      "model_forward_time": 0.11423993110656738,
      "step": 44148
    },
    {
      "epoch": 0.0002694580078125,
      "step": 44148,
      "training_step_time": 0.37083983421325684
    },
    {
      "epoch": 0.000269464111328125,
      "model_forward_time": 0.1144723892211914,
      "step": 44149
    },
    {
      "epoch": 0.000269464111328125,
      "step": 44149,
      "training_step_time": 0.4443669319152832
    },
    {
      "epoch": 0.00026947021484375,
      "grad_norm": 0.07771662622690201,
      "learning_rate": 1.789581727518143e-05,
      "loss": 0.038,
      "step": 44150
    },
    {
      "epoch": 0.00026947021484375,
      "model_forward_time": 0.1145777702331543,
      "step": 44150
    },
    {
      "epoch": 0.00026947021484375,
      "step": 44150,
      "training_step_time": 0.5335605144500732
    },
    {
      "epoch": 0.000269476318359375,
      "model_forward_time": 0.1137552261352539,
      "step": 44151
    },
    {
      "epoch": 0.000269476318359375,
      "step": 44151,
      "training_step_time": 0.38558387756347656
    },
    {
      "epoch": 0.000269482421875,
      "model_forward_time": 0.1147768497467041,
      "step": 44152
    },
    {
      "epoch": 0.000269482421875,
      "step": 44152,
      "training_step_time": 0.38390135765075684
    },
    {
      "epoch": 0.000269488525390625,
      "model_forward_time": 0.11538529396057129,
      "step": 44153
    },
    {
      "epoch": 0.000269488525390625,
      "step": 44153,
      "training_step_time": 0.9584357738494873
    },
    {
      "epoch": 0.00026949462890625,
      "model_forward_time": 0.11437034606933594,
      "step": 44154
    },
    {
      "epoch": 0.00026949462890625,
      "step": 44154,
      "training_step_time": 0.42227673530578613
    },
    {
      "epoch": 0.000269500732421875,
      "model_forward_time": 0.11456704139709473,
      "step": 44155
    },
    {
      "epoch": 0.000269500732421875,
      "step": 44155,
      "training_step_time": 0.40335750579833984
    },
    {
      "epoch": 0.0002695068359375,
      "model_forward_time": 0.11412262916564941,
      "step": 44156
    },
    {
      "epoch": 0.0002695068359375,
      "step": 44156,
      "training_step_time": 0.39400696754455566
    },
    {
      "epoch": 0.000269512939453125,
      "model_forward_time": 0.1142425537109375,
      "step": 44157
    },
    {
      "epoch": 0.000269512939453125,
      "step": 44157,
      "training_step_time": 0.38227033615112305
    },
    {
      "epoch": 0.00026951904296875,
      "model_forward_time": 0.11507558822631836,
      "step": 44158
    },
    {
      "epoch": 0.00026951904296875,
      "step": 44158,
      "training_step_time": 0.48868370056152344
    },
    {
      "epoch": 0.000269525146484375,
      "model_forward_time": 0.11483120918273926,
      "step": 44159
    },
    {
      "epoch": 0.000269525146484375,
      "step": 44159,
      "training_step_time": 1.018721580505371
    },
    {
      "epoch": 0.00026953125,
      "grad_norm": 0.091043621301651,
      "learning_rate": 1.787469536677419e-05,
      "loss": 0.0384,
      "step": 44160
    },
    {
      "epoch": 0.00026953125,
      "model_forward_time": 0.11424589157104492,
      "step": 44160
    },
    {
      "epoch": 0.00026953125,
      "step": 44160,
      "training_step_time": 0.363018274307251
    },
    {
      "epoch": 0.000269537353515625,
      "model_forward_time": 0.11480998992919922,
      "step": 44161
    },
    {
      "epoch": 0.000269537353515625,
      "step": 44161,
      "training_step_time": 0.4212679862976074
    },
    {
      "epoch": 0.00026954345703125,
      "model_forward_time": 0.11452484130859375,
      "step": 44162
    },
    {
      "epoch": 0.00026954345703125,
      "step": 44162,
      "training_step_time": 0.5297033786773682
    },
    {
      "epoch": 0.000269549560546875,
      "model_forward_time": 0.11424469947814941,
      "step": 44163
    },
    {
      "epoch": 0.000269549560546875,
      "step": 44163,
      "training_step_time": 0.37893223762512207
    },
    {
      "epoch": 0.0002695556640625,
      "model_forward_time": 0.1145467758178711,
      "step": 44164
    },
    {
      "epoch": 0.0002695556640625,
      "step": 44164,
      "training_step_time": 0.390134334564209
    },
    {
      "epoch": 0.000269561767578125,
      "model_forward_time": 0.1147623062133789,
      "step": 44165
    },
    {
      "epoch": 0.000269561767578125,
      "step": 44165,
      "training_step_time": 0.40575146675109863
    },
    {
      "epoch": 0.00026956787109375,
      "model_forward_time": 0.11598634719848633,
      "step": 44166
    },
    {
      "epoch": 0.00026956787109375,
      "step": 44166,
      "training_step_time": 0.3998868465423584
    },
    {
      "epoch": 0.000269573974609375,
      "model_forward_time": 0.11579537391662598,
      "step": 44167
    },
    {
      "epoch": 0.000269573974609375,
      "step": 44167,
      "training_step_time": 0.45905447006225586
    },
    {
      "epoch": 0.000269580078125,
      "model_forward_time": 0.1154627799987793,
      "step": 44168
    },
    {
      "epoch": 0.000269580078125,
      "step": 44168,
      "training_step_time": 0.46628451347351074
    },
    {
      "epoch": 0.000269586181640625,
      "model_forward_time": 0.11483001708984375,
      "step": 44169
    },
    {
      "epoch": 0.000269586181640625,
      "step": 44169,
      "training_step_time": 0.5053403377532959
    },
    {
      "epoch": 0.00026959228515625,
      "grad_norm": 0.1137586236000061,
      "learning_rate": 1.785358321718626e-05,
      "loss": 0.0403,
      "step": 44170
    },
    {
      "epoch": 0.00026959228515625,
      "model_forward_time": 0.11584806442260742,
      "step": 44170
    },
    {
      "epoch": 0.00026959228515625,
      "step": 44170,
      "training_step_time": 0.38851284980773926
    },
    {
      "epoch": 0.000269598388671875,
      "model_forward_time": 0.1143791675567627,
      "step": 44171
    },
    {
      "epoch": 0.000269598388671875,
      "step": 44171,
      "training_step_time": 0.38988733291625977
    },
    {
      "epoch": 0.0002696044921875,
      "model_forward_time": 0.11479473114013672,
      "step": 44172
    },
    {
      "epoch": 0.0002696044921875,
      "step": 44172,
      "training_step_time": 0.4493563175201416
    },
    {
      "epoch": 0.000269610595703125,
      "model_forward_time": 0.11508011817932129,
      "step": 44173
    },
    {
      "epoch": 0.000269610595703125,
      "step": 44173,
      "training_step_time": 0.4067342281341553
    },
    {
      "epoch": 0.00026961669921875,
      "model_forward_time": 0.11474013328552246,
      "step": 44174
    },
    {
      "epoch": 0.00026961669921875,
      "step": 44174,
      "training_step_time": 0.3671717643737793
    },
    {
      "epoch": 0.000269622802734375,
      "model_forward_time": 0.11468887329101562,
      "step": 44175
    },
    {
      "epoch": 0.000269622802734375,
      "step": 44175,
      "training_step_time": 0.45303916931152344
    },
    {
      "epoch": 0.00026962890625,
      "model_forward_time": 0.11481928825378418,
      "step": 44176
    },
    {
      "epoch": 0.00026962890625,
      "step": 44176,
      "training_step_time": 0.4113168716430664
    },
    {
      "epoch": 0.000269635009765625,
      "model_forward_time": 0.11482381820678711,
      "step": 44177
    },
    {
      "epoch": 0.000269635009765625,
      "step": 44177,
      "training_step_time": 0.3920431137084961
    },
    {
      "epoch": 0.00026964111328125,
      "model_forward_time": 0.11481785774230957,
      "step": 44178
    },
    {
      "epoch": 0.00026964111328125,
      "step": 44178,
      "training_step_time": 0.4319016933441162
    },
    {
      "epoch": 0.000269647216796875,
      "model_forward_time": 0.11464238166809082,
      "step": 44179
    },
    {
      "epoch": 0.000269647216796875,
      "step": 44179,
      "training_step_time": 0.44452643394470215
    },
    {
      "epoch": 0.0002696533203125,
      "grad_norm": 0.10013147443532944,
      "learning_rate": 1.7832480832830987e-05,
      "loss": 0.0398,
      "step": 44180
    },
    {
      "epoch": 0.0002696533203125,
      "model_forward_time": 0.11508059501647949,
      "step": 44180
    },
    {
      "epoch": 0.0002696533203125,
      "step": 44180,
      "training_step_time": 0.39288902282714844
    },
    {
      "epoch": 0.000269659423828125,
      "model_forward_time": 0.11478877067565918,
      "step": 44181
    },
    {
      "epoch": 0.000269659423828125,
      "step": 44181,
      "training_step_time": 0.45566487312316895
    },
    {
      "epoch": 0.00026966552734375,
      "model_forward_time": 0.11538457870483398,
      "step": 44182
    },
    {
      "epoch": 0.00026966552734375,
      "step": 44182,
      "training_step_time": 0.40300726890563965
    },
    {
      "epoch": 0.000269671630859375,
      "model_forward_time": 0.11508822441101074,
      "step": 44183
    },
    {
      "epoch": 0.000269671630859375,
      "step": 44183,
      "training_step_time": 0.44690489768981934
    },
    {
      "epoch": 0.000269677734375,
      "model_forward_time": 0.11471700668334961,
      "step": 44184
    },
    {
      "epoch": 0.000269677734375,
      "step": 44184,
      "training_step_time": 0.39453887939453125
    },
    {
      "epoch": 0.000269683837890625,
      "model_forward_time": 0.11520075798034668,
      "step": 44185
    },
    {
      "epoch": 0.000269683837890625,
      "step": 44185,
      "training_step_time": 0.3971743583679199
    },
    {
      "epoch": 0.00026968994140625,
      "model_forward_time": 0.11541271209716797,
      "step": 44186
    },
    {
      "epoch": 0.00026968994140625,
      "step": 44186,
      "training_step_time": 0.4536261558532715
    },
    {
      "epoch": 0.000269696044921875,
      "model_forward_time": 0.11481094360351562,
      "step": 44187
    },
    {
      "epoch": 0.000269696044921875,
      "step": 44187,
      "training_step_time": 0.4691343307495117
    },
    {
      "epoch": 0.0002697021484375,
      "model_forward_time": 0.11446094512939453,
      "step": 44188
    },
    {
      "epoch": 0.0002697021484375,
      "step": 44188,
      "training_step_time": 0.3934354782104492
    },
    {
      "epoch": 0.000269708251953125,
      "model_forward_time": 0.11520051956176758,
      "step": 44189
    },
    {
      "epoch": 0.000269708251953125,
      "step": 44189,
      "training_step_time": 0.46978092193603516
    },
    {
      "epoch": 0.00026971435546875,
      "grad_norm": 0.12362022697925568,
      "learning_rate": 1.7811388220118707e-05,
      "loss": 0.0384,
      "step": 44190
    },
    {
      "epoch": 0.00026971435546875,
      "model_forward_time": 0.11547994613647461,
      "step": 44190
    },
    {
      "epoch": 0.00026971435546875,
      "step": 44190,
      "training_step_time": 0.4944894313812256
    },
    {
      "epoch": 0.000269720458984375,
      "model_forward_time": 0.11470365524291992,
      "step": 44191
    },
    {
      "epoch": 0.000269720458984375,
      "step": 44191,
      "training_step_time": 0.3871462345123291
    },
    {
      "epoch": 0.0002697265625,
      "model_forward_time": 0.11511588096618652,
      "step": 44192
    },
    {
      "epoch": 0.0002697265625,
      "step": 44192,
      "training_step_time": 0.4221346378326416
    },
    {
      "epoch": 0.000269732666015625,
      "model_forward_time": 0.11529064178466797,
      "step": 44193
    },
    {
      "epoch": 0.000269732666015625,
      "step": 44193,
      "training_step_time": 0.4144148826599121
    },
    {
      "epoch": 0.00026973876953125,
      "model_forward_time": 0.11613225936889648,
      "step": 44194
    },
    {
      "epoch": 0.00026973876953125,
      "step": 44194,
      "training_step_time": 0.38261914253234863
    },
    {
      "epoch": 0.000269744873046875,
      "model_forward_time": 0.1149907112121582,
      "step": 44195
    },
    {
      "epoch": 0.000269744873046875,
      "step": 44195,
      "training_step_time": 0.404801607131958
    },
    {
      "epoch": 0.0002697509765625,
      "model_forward_time": 0.11504983901977539,
      "step": 44196
    },
    {
      "epoch": 0.0002697509765625,
      "step": 44196,
      "training_step_time": 0.4089851379394531
    },
    {
      "epoch": 0.000269757080078125,
      "model_forward_time": 0.11612772941589355,
      "step": 44197
    },
    {
      "epoch": 0.000269757080078125,
      "step": 44197,
      "training_step_time": 0.4217643737792969
    },
    {
      "epoch": 0.00026976318359375,
      "model_forward_time": 0.11451196670532227,
      "step": 44198
    },
    {
      "epoch": 0.00026976318359375,
      "step": 44198,
      "training_step_time": 0.40361928939819336
    },
    {
      "epoch": 0.000269769287109375,
      "model_forward_time": 0.1152191162109375,
      "step": 44199
    },
    {
      "epoch": 0.000269769287109375,
      "step": 44199,
      "training_step_time": 0.392719030380249
    },
    {
      "epoch": 0.000269775390625,
      "grad_norm": 0.13811881840229034,
      "learning_rate": 1.7790305385456795e-05,
      "loss": 0.0353,
      "step": 44200
    },
    {
      "epoch": 0.000269775390625,
      "model_forward_time": 0.11516642570495605,
      "step": 44200
    },
    {
      "epoch": 0.000269775390625,
      "step": 44200,
      "training_step_time": 0.40007996559143066
    },
    {
      "epoch": 0.000269781494140625,
      "model_forward_time": 0.11448884010314941,
      "step": 44201
    },
    {
      "epoch": 0.000269781494140625,
      "step": 44201,
      "training_step_time": 0.44879150390625
    },
    {
      "epoch": 0.00026978759765625,
      "model_forward_time": 0.1148068904876709,
      "step": 44202
    },
    {
      "epoch": 0.00026978759765625,
      "step": 44202,
      "training_step_time": 0.42823052406311035
    },
    {
      "epoch": 0.000269793701171875,
      "model_forward_time": 0.11668634414672852,
      "step": 44203
    },
    {
      "epoch": 0.000269793701171875,
      "step": 44203,
      "training_step_time": 0.44504690170288086
    },
    {
      "epoch": 0.0002697998046875,
      "model_forward_time": 0.11551380157470703,
      "step": 44204
    },
    {
      "epoch": 0.0002697998046875,
      "step": 44204,
      "training_step_time": 0.4184751510620117
    },
    {
      "epoch": 0.000269805908203125,
      "model_forward_time": 0.11500978469848633,
      "step": 44205
    },
    {
      "epoch": 0.000269805908203125,
      "step": 44205,
      "training_step_time": 0.42043185234069824
    },
    {
      "epoch": 0.00026981201171875,
      "model_forward_time": 0.11561346054077148,
      "step": 44206
    },
    {
      "epoch": 0.00026981201171875,
      "step": 44206,
      "training_step_time": 0.42691540718078613
    },
    {
      "epoch": 0.000269818115234375,
      "model_forward_time": 0.11451148986816406,
      "step": 44207
    },
    {
      "epoch": 0.000269818115234375,
      "step": 44207,
      "training_step_time": 0.4694514274597168
    },
    {
      "epoch": 0.00026982421875,
      "model_forward_time": 0.1151125431060791,
      "step": 44208
    },
    {
      "epoch": 0.00026982421875,
      "step": 44208,
      "training_step_time": 0.39391279220581055
    },
    {
      "epoch": 0.000269830322265625,
      "model_forward_time": 0.11494970321655273,
      "step": 44209
    },
    {
      "epoch": 0.000269830322265625,
      "step": 44209,
      "training_step_time": 0.4057579040527344
    },
    {
      "epoch": 0.00026983642578125,
      "grad_norm": 0.09862882643938065,
      "learning_rate": 1.7769232335249657e-05,
      "loss": 0.0428,
      "step": 44210
    },
    {
      "epoch": 0.00026983642578125,
      "model_forward_time": 0.11511611938476562,
      "step": 44210
    },
    {
      "epoch": 0.00026983642578125,
      "step": 44210,
      "training_step_time": 0.6082377433776855
    },
    {
      "epoch": 0.000269842529296875,
      "model_forward_time": 0.11533856391906738,
      "step": 44211
    },
    {
      "epoch": 0.000269842529296875,
      "step": 44211,
      "training_step_time": 0.3885343074798584
    },
    {
      "epoch": 0.0002698486328125,
      "model_forward_time": 0.1143653392791748,
      "step": 44212
    },
    {
      "epoch": 0.0002698486328125,
      "step": 44212,
      "training_step_time": 0.40460777282714844
    },
    {
      "epoch": 0.000269854736328125,
      "model_forward_time": 0.11540651321411133,
      "step": 44213
    },
    {
      "epoch": 0.000269854736328125,
      "step": 44213,
      "training_step_time": 0.38936400413513184
    },
    {
      "epoch": 0.00026986083984375,
      "model_forward_time": 0.11475396156311035,
      "step": 44214
    },
    {
      "epoch": 0.00026986083984375,
      "step": 44214,
      "training_step_time": 0.39811277389526367
    },
    {
      "epoch": 0.000269866943359375,
      "model_forward_time": 0.11473560333251953,
      "step": 44215
    },
    {
      "epoch": 0.000269866943359375,
      "step": 44215,
      "training_step_time": 0.4648163318634033
    },
    {
      "epoch": 0.000269873046875,
      "model_forward_time": 0.11507058143615723,
      "step": 44216
    },
    {
      "epoch": 0.000269873046875,
      "step": 44216,
      "training_step_time": 0.5827138423919678
    },
    {
      "epoch": 0.000269879150390625,
      "model_forward_time": 0.11502838134765625,
      "step": 44217
    },
    {
      "epoch": 0.000269879150390625,
      "step": 44217,
      "training_step_time": 0.4229578971862793
    },
    {
      "epoch": 0.00026988525390625,
      "model_forward_time": 0.11560630798339844,
      "step": 44218
    },
    {
      "epoch": 0.00026988525390625,
      "step": 44218,
      "training_step_time": 0.49370527267456055
    },
    {
      "epoch": 0.000269891357421875,
      "model_forward_time": 0.11466050148010254,
      "step": 44219
    },
    {
      "epoch": 0.000269891357421875,
      "step": 44219,
      "training_step_time": 0.4081094264984131
    },
    {
      "epoch": 0.0002698974609375,
      "grad_norm": 0.13092713057994843,
      "learning_rate": 1.774816907589873e-05,
      "loss": 0.042,
      "step": 44220
    },
    {
      "epoch": 0.0002698974609375,
      "model_forward_time": 0.11461758613586426,
      "step": 44220
    },
    {
      "epoch": 0.0002698974609375,
      "step": 44220,
      "training_step_time": 0.3894386291503906
    },
    {
      "epoch": 0.000269903564453125,
      "model_forward_time": 0.11480569839477539,
      "step": 44221
    },
    {
      "epoch": 0.000269903564453125,
      "step": 44221,
      "training_step_time": 0.3883213996887207
    },
    {
      "epoch": 0.00026990966796875,
      "model_forward_time": 0.11613965034484863,
      "step": 44222
    },
    {
      "epoch": 0.00026990966796875,
      "step": 44222,
      "training_step_time": 0.3928649425506592
    },
    {
      "epoch": 0.000269915771484375,
      "model_forward_time": 0.11601376533508301,
      "step": 44223
    },
    {
      "epoch": 0.000269915771484375,
      "step": 44223,
      "training_step_time": 0.3912312984466553
    },
    {
      "epoch": 0.000269921875,
      "model_forward_time": 0.11555051803588867,
      "step": 44224
    },
    {
      "epoch": 0.000269921875,
      "step": 44224,
      "training_step_time": 0.3925483226776123
    },
    {
      "epoch": 0.000269927978515625,
      "model_forward_time": 0.11541604995727539,
      "step": 44225
    },
    {
      "epoch": 0.000269927978515625,
      "step": 44225,
      "training_step_time": 0.40796875953674316
    },
    {
      "epoch": 0.00026993408203125,
      "model_forward_time": 0.11647152900695801,
      "step": 44226
    },
    {
      "epoch": 0.00026993408203125,
      "step": 44226,
      "training_step_time": 0.44394755363464355
    },
    {
      "epoch": 0.000269940185546875,
      "model_forward_time": 0.11527442932128906,
      "step": 44227
    },
    {
      "epoch": 0.000269940185546875,
      "step": 44227,
      "training_step_time": 0.4197390079498291
    },
    {
      "epoch": 0.0002699462890625,
      "model_forward_time": 0.11560893058776855,
      "step": 44228
    },
    {
      "epoch": 0.0002699462890625,
      "step": 44228,
      "training_step_time": 0.5524344444274902
    },
    {
      "epoch": 0.000269952392578125,
      "model_forward_time": 0.11594581604003906,
      "step": 44229
    },
    {
      "epoch": 0.000269952392578125,
      "step": 44229,
      "training_step_time": 0.4133491516113281
    },
    {
      "epoch": 0.00026995849609375,
      "grad_norm": 0.1341789960861206,
      "learning_rate": 1.7727115613802465e-05,
      "loss": 0.035,
      "step": 44230
    },
    {
      "epoch": 0.00026995849609375,
      "model_forward_time": 0.11468720436096191,
      "step": 44230
    },
    {
      "epoch": 0.00026995849609375,
      "step": 44230,
      "training_step_time": 0.4633207321166992
    },
    {
      "epoch": 0.000269964599609375,
      "model_forward_time": 0.11475300788879395,
      "step": 44231
    },
    {
      "epoch": 0.000269964599609375,
      "step": 44231,
      "training_step_time": 0.36551547050476074
    },
    {
      "epoch": 0.000269970703125,
      "model_forward_time": 0.11513543128967285,
      "step": 44232
    },
    {
      "epoch": 0.000269970703125,
      "step": 44232,
      "training_step_time": 0.44675588607788086
    },
    {
      "epoch": 0.000269976806640625,
      "model_forward_time": 0.11483263969421387,
      "step": 44233
    },
    {
      "epoch": 0.000269976806640625,
      "step": 44233,
      "training_step_time": 0.42775774002075195
    },
    {
      "epoch": 0.00026998291015625,
      "model_forward_time": 0.11527037620544434,
      "step": 44234
    },
    {
      "epoch": 0.00026998291015625,
      "step": 44234,
      "training_step_time": 0.46205663681030273
    },
    {
      "epoch": 0.000269989013671875,
      "model_forward_time": 0.11502528190612793,
      "step": 44235
    },
    {
      "epoch": 0.000269989013671875,
      "step": 44235,
      "training_step_time": 0.3842332363128662
    },
    {
      "epoch": 0.0002699951171875,
      "model_forward_time": 0.11486077308654785,
      "step": 44236
    },
    {
      "epoch": 0.0002699951171875,
      "step": 44236,
      "training_step_time": 0.37859463691711426
    },
    {
      "epoch": 0.000270001220703125,
      "model_forward_time": 0.11495828628540039,
      "step": 44237
    },
    {
      "epoch": 0.000270001220703125,
      "step": 44237,
      "training_step_time": 0.3931574821472168
    },
    {
      "epoch": 0.00027000732421875,
      "model_forward_time": 0.11462092399597168,
      "step": 44238
    },
    {
      "epoch": 0.00027000732421875,
      "step": 44238,
      "training_step_time": 0.3795466423034668
    },
    {
      "epoch": 0.000270013427734375,
      "model_forward_time": 0.11501860618591309,
      "step": 44239
    },
    {
      "epoch": 0.000270013427734375,
      "step": 44239,
      "training_step_time": 0.43224644660949707
    },
    {
      "epoch": 0.00027001953125,
      "grad_norm": 0.10933005064725876,
      "learning_rate": 1.770607195535639e-05,
      "loss": 0.0376,
      "step": 44240
    },
    {
      "epoch": 0.00027001953125,
      "model_forward_time": 0.11459231376647949,
      "step": 44240
    },
    {
      "epoch": 0.00027001953125,
      "step": 44240,
      "training_step_time": 0.5966277122497559
    },
    {
      "epoch": 0.000270025634765625,
      "model_forward_time": 0.11492228507995605,
      "step": 44241
    },
    {
      "epoch": 0.000270025634765625,
      "step": 44241,
      "training_step_time": 0.48809194564819336
    },
    {
      "epoch": 0.00027003173828125,
      "model_forward_time": 0.11365532875061035,
      "step": 44242
    },
    {
      "epoch": 0.00027003173828125,
      "step": 44242,
      "training_step_time": 0.38221049308776855
    },
    {
      "epoch": 0.000270037841796875,
      "model_forward_time": 0.11469364166259766,
      "step": 44243
    },
    {
      "epoch": 0.000270037841796875,
      "step": 44243,
      "training_step_time": 0.3956477642059326
    },
    {
      "epoch": 0.0002700439453125,
      "model_forward_time": 0.1142890453338623,
      "step": 44244
    },
    {
      "epoch": 0.0002700439453125,
      "step": 44244,
      "training_step_time": 0.41625165939331055
    },
    {
      "epoch": 0.000270050048828125,
      "model_forward_time": 0.11446475982666016,
      "step": 44245
    },
    {
      "epoch": 0.000270050048828125,
      "step": 44245,
      "training_step_time": 0.3890690803527832
    },
    {
      "epoch": 0.00027005615234375,
      "model_forward_time": 0.11525559425354004,
      "step": 44246
    },
    {
      "epoch": 0.00027005615234375,
      "step": 44246,
      "training_step_time": 0.6013522148132324
    },
    {
      "epoch": 0.000270062255859375,
      "model_forward_time": 0.11491775512695312,
      "step": 44247
    },
    {
      "epoch": 0.000270062255859375,
      "step": 44247,
      "training_step_time": 0.4056968688964844
    },
    {
      "epoch": 0.000270068359375,
      "model_forward_time": 0.11484026908874512,
      "step": 44248
    },
    {
      "epoch": 0.000270068359375,
      "step": 44248,
      "training_step_time": 0.38501548767089844
    },
    {
      "epoch": 0.000270074462890625,
      "model_forward_time": 0.11522459983825684,
      "step": 44249
    },
    {
      "epoch": 0.000270074462890625,
      "step": 44249,
      "training_step_time": 0.40044188499450684
    },
    {
      "epoch": 0.00027008056640625,
      "grad_norm": 0.09927311539649963,
      "learning_rate": 1.768503810695295e-05,
      "loss": 0.04,
      "step": 44250
    },
    {
      "epoch": 0.00027008056640625,
      "model_forward_time": 0.11498236656188965,
      "step": 44250
    },
    {
      "epoch": 0.00027008056640625,
      "step": 44250,
      "training_step_time": 0.38474202156066895
    },
    {
      "epoch": 0.000270086669921875,
      "model_forward_time": 0.11530017852783203,
      "step": 44251
    },
    {
      "epoch": 0.000270086669921875,
      "step": 44251,
      "training_step_time": 0.3947896957397461
    },
    {
      "epoch": 0.0002700927734375,
      "model_forward_time": 0.11463403701782227,
      "step": 44252
    },
    {
      "epoch": 0.0002700927734375,
      "step": 44252,
      "training_step_time": 0.7351269721984863
    },
    {
      "epoch": 0.000270098876953125,
      "model_forward_time": 0.11456942558288574,
      "step": 44253
    },
    {
      "epoch": 0.000270098876953125,
      "step": 44253,
      "training_step_time": 0.4555807113647461
    },
    {
      "epoch": 0.00027010498046875,
      "model_forward_time": 0.1142282485961914,
      "step": 44254
    },
    {
      "epoch": 0.00027010498046875,
      "step": 44254,
      "training_step_time": 0.489469051361084
    },
    {
      "epoch": 0.000270111083984375,
      "model_forward_time": 0.11376667022705078,
      "step": 44255
    },
    {
      "epoch": 0.000270111083984375,
      "step": 44255,
      "training_step_time": 0.4031989574432373
    },
    {
      "epoch": 0.0002701171875,
      "model_forward_time": 0.11473894119262695,
      "step": 44256
    },
    {
      "epoch": 0.0002701171875,
      "step": 44256,
      "training_step_time": 0.3942723274230957
    },
    {
      "epoch": 0.000270123291015625,
      "model_forward_time": 0.11364912986755371,
      "step": 44257
    },
    {
      "epoch": 0.000270123291015625,
      "step": 44257,
      "training_step_time": 0.43711352348327637
    },
    {
      "epoch": 0.00027012939453125,
      "model_forward_time": 0.11480140686035156,
      "step": 44258
    },
    {
      "epoch": 0.00027012939453125,
      "step": 44258,
      "training_step_time": 0.4710993766784668
    },
    {
      "epoch": 0.000270135498046875,
      "model_forward_time": 0.11505293846130371,
      "step": 44259
    },
    {
      "epoch": 0.000270135498046875,
      "step": 44259,
      "training_step_time": 0.3958780765533447
    },
    {
      "epoch": 0.0002701416015625,
      "grad_norm": 0.09708474576473236,
      "learning_rate": 1.7664014074981742e-05,
      "loss": 0.0415,
      "step": 44260
    },
    {
      "epoch": 0.0002701416015625,
      "model_forward_time": 0.11895203590393066,
      "step": 44260
    },
    {
      "epoch": 0.0002701416015625,
      "step": 44260,
      "training_step_time": 0.4124891757965088
    },
    {
      "epoch": 0.000270147705078125,
      "model_forward_time": 0.11616992950439453,
      "step": 44261
    },
    {
      "epoch": 0.000270147705078125,
      "step": 44261,
      "training_step_time": 0.4085533618927002
    },
    {
      "epoch": 0.00027015380859375,
      "model_forward_time": 0.1147456169128418,
      "step": 44262
    },
    {
      "epoch": 0.00027015380859375,
      "step": 44262,
      "training_step_time": 0.39057469367980957
    },
    {
      "epoch": 0.000270159912109375,
      "model_forward_time": 0.11474204063415527,
      "step": 44263
    },
    {
      "epoch": 0.000270159912109375,
      "step": 44263,
      "training_step_time": 0.39416003227233887
    },
    {
      "epoch": 0.000270166015625,
      "model_forward_time": 0.11521530151367188,
      "step": 44264
    },
    {
      "epoch": 0.000270166015625,
      "step": 44264,
      "training_step_time": 0.4077279567718506
    },
    {
      "epoch": 0.000270172119140625,
      "model_forward_time": 0.11593008041381836,
      "step": 44265
    },
    {
      "epoch": 0.000270172119140625,
      "step": 44265,
      "training_step_time": 0.39285969734191895
    },
    {
      "epoch": 0.00027017822265625,
      "model_forward_time": 0.11484003067016602,
      "step": 44266
    },
    {
      "epoch": 0.00027017822265625,
      "step": 44266,
      "training_step_time": 0.3911168575286865
    },
    {
      "epoch": 0.000270184326171875,
      "model_forward_time": 0.11676216125488281,
      "step": 44267
    },
    {
      "epoch": 0.000270184326171875,
      "step": 44267,
      "training_step_time": 0.418731689453125
    },
    {
      "epoch": 0.0002701904296875,
      "model_forward_time": 0.11475014686584473,
      "step": 44268
    },
    {
      "epoch": 0.0002701904296875,
      "step": 44268,
      "training_step_time": 0.4190714359283447
    },
    {
      "epoch": 0.000270196533203125,
      "model_forward_time": 0.1158604621887207,
      "step": 44269
    },
    {
      "epoch": 0.000270196533203125,
      "step": 44269,
      "training_step_time": 0.5101521015167236
    },
    {
      "epoch": 0.00027020263671875,
      "grad_norm": 0.10593996196985245,
      "learning_rate": 1.764299986582924e-05,
      "loss": 0.0372,
      "step": 44270
    },
    {
      "epoch": 0.00027020263671875,
      "model_forward_time": 0.11525440216064453,
      "step": 44270
    },
    {
      "epoch": 0.00027020263671875,
      "step": 44270,
      "training_step_time": 0.5308017730712891
    },
    {
      "epoch": 0.000270208740234375,
      "model_forward_time": 0.11511564254760742,
      "step": 44271
    },
    {
      "epoch": 0.000270208740234375,
      "step": 44271,
      "training_step_time": 0.4168236255645752
    },
    {
      "epoch": 0.00027021484375,
      "model_forward_time": 0.11501193046569824,
      "step": 44272
    },
    {
      "epoch": 0.00027021484375,
      "step": 44272,
      "training_step_time": 0.4581761360168457
    },
    {
      "epoch": 0.000270220947265625,
      "model_forward_time": 0.11439204216003418,
      "step": 44273
    },
    {
      "epoch": 0.000270220947265625,
      "step": 44273,
      "training_step_time": 0.37570810317993164
    },
    {
      "epoch": 0.00027022705078125,
      "model_forward_time": 0.11468172073364258,
      "step": 44274
    },
    {
      "epoch": 0.00027022705078125,
      "step": 44274,
      "training_step_time": 0.418926477432251
    },
    {
      "epoch": 0.000270233154296875,
      "model_forward_time": 0.11458039283752441,
      "step": 44275
    },
    {
      "epoch": 0.000270233154296875,
      "step": 44275,
      "training_step_time": 0.4056236743927002
    },
    {
      "epoch": 0.0002702392578125,
      "model_forward_time": 0.11860466003417969,
      "step": 44276
    },
    {
      "epoch": 0.0002702392578125,
      "step": 44276,
      "training_step_time": 0.37996363639831543
    },
    {
      "epoch": 0.000270245361328125,
      "model_forward_time": 0.1186513900756836,
      "step": 44277
    },
    {
      "epoch": 0.000270245361328125,
      "step": 44277,
      "training_step_time": 0.3813190460205078
    },
    {
      "epoch": 0.00027025146484375,
      "model_forward_time": 0.11563372611999512,
      "step": 44278
    },
    {
      "epoch": 0.00027025146484375,
      "step": 44278,
      "training_step_time": 0.388106107711792
    },
    {
      "epoch": 0.000270257568359375,
      "model_forward_time": 0.11640691757202148,
      "step": 44279
    },
    {
      "epoch": 0.000270257568359375,
      "step": 44279,
      "training_step_time": 0.40256309509277344
    },
    {
      "epoch": 0.000270263671875,
      "grad_norm": 0.17130324244499207,
      "learning_rate": 1.7621995485879062e-05,
      "loss": 0.04,
      "step": 44280
    },
    {
      "epoch": 0.000270263671875,
      "model_forward_time": 0.11471128463745117,
      "step": 44280
    },
    {
      "epoch": 0.000270263671875,
      "step": 44280,
      "training_step_time": 0.4012720584869385
    },
    {
      "epoch": 0.000270269775390625,
      "model_forward_time": 0.11542272567749023,
      "step": 44281
    },
    {
      "epoch": 0.000270269775390625,
      "step": 44281,
      "training_step_time": 0.448819637298584
    },
    {
      "epoch": 0.00027027587890625,
      "model_forward_time": 0.11661195755004883,
      "step": 44282
    },
    {
      "epoch": 0.00027027587890625,
      "step": 44282,
      "training_step_time": 0.4981369972229004
    },
    {
      "epoch": 0.000270281982421875,
      "model_forward_time": 0.11566758155822754,
      "step": 44283
    },
    {
      "epoch": 0.000270281982421875,
      "step": 44283,
      "training_step_time": 0.4226522445678711
    },
    {
      "epoch": 0.0002702880859375,
      "model_forward_time": 0.11434340476989746,
      "step": 44284
    },
    {
      "epoch": 0.0002702880859375,
      "step": 44284,
      "training_step_time": 0.3869442939758301
    },
    {
      "epoch": 0.000270294189453125,
      "model_forward_time": 0.11558318138122559,
      "step": 44285
    },
    {
      "epoch": 0.000270294189453125,
      "step": 44285,
      "training_step_time": 0.4115111827850342
    },
    {
      "epoch": 0.00027030029296875,
      "model_forward_time": 0.11511945724487305,
      "step": 44286
    },
    {
      "epoch": 0.00027030029296875,
      "step": 44286,
      "training_step_time": 0.44374823570251465
    },
    {
      "epoch": 0.000270306396484375,
      "model_forward_time": 0.11494779586791992,
      "step": 44287
    },
    {
      "epoch": 0.000270306396484375,
      "step": 44287,
      "training_step_time": 0.4229295253753662
    },
    {
      "epoch": 0.0002703125,
      "model_forward_time": 0.1158444881439209,
      "step": 44288
    },
    {
      "epoch": 0.0002703125,
      "step": 44288,
      "training_step_time": 0.6577410697937012
    },
    {
      "epoch": 0.000270318603515625,
      "model_forward_time": 0.11532402038574219,
      "step": 44289
    },
    {
      "epoch": 0.000270318603515625,
      "step": 44289,
      "training_step_time": 0.46736621856689453
    },
    {
      "epoch": 0.00027032470703125,
      "grad_norm": 0.10228488594293594,
      "learning_rate": 1.7601000941511757e-05,
      "loss": 0.0373,
      "step": 44290
    },
    {
      "epoch": 0.00027032470703125,
      "model_forward_time": 0.1146233081817627,
      "step": 44290
    },
    {
      "epoch": 0.00027032470703125,
      "step": 44290,
      "training_step_time": 0.41307902336120605
    },
    {
      "epoch": 0.000270330810546875,
      "model_forward_time": 0.11519289016723633,
      "step": 44291
    },
    {
      "epoch": 0.000270330810546875,
      "step": 44291,
      "training_step_time": 0.39202356338500977
    },
    {
      "epoch": 0.0002703369140625,
      "model_forward_time": 0.11469841003417969,
      "step": 44292
    },
    {
      "epoch": 0.0002703369140625,
      "step": 44292,
      "training_step_time": 0.3917734622955322
    },
    {
      "epoch": 0.000270343017578125,
      "model_forward_time": 0.1147160530090332,
      "step": 44293
    },
    {
      "epoch": 0.000270343017578125,
      "step": 44293,
      "training_step_time": 0.39338254928588867
    },
    {
      "epoch": 0.00027034912109375,
      "model_forward_time": 0.11559224128723145,
      "step": 44294
    },
    {
      "epoch": 0.00027034912109375,
      "step": 44294,
      "training_step_time": 0.5158653259277344
    },
    {
      "epoch": 0.000270355224609375,
      "model_forward_time": 0.11489558219909668,
      "step": 44295
    },
    {
      "epoch": 0.000270355224609375,
      "step": 44295,
      "training_step_time": 0.4664173126220703
    },
    {
      "epoch": 0.000270361328125,
      "model_forward_time": 0.11641502380371094,
      "step": 44296
    },
    {
      "epoch": 0.000270361328125,
      "step": 44296,
      "training_step_time": 0.48456788063049316
    },
    {
      "epoch": 0.000270367431640625,
      "model_forward_time": 0.11506843566894531,
      "step": 44297
    },
    {
      "epoch": 0.000270367431640625,
      "step": 44297,
      "training_step_time": 0.4186103343963623
    },
    {
      "epoch": 0.00027037353515625,
      "model_forward_time": 0.11413240432739258,
      "step": 44298
    },
    {
      "epoch": 0.00027037353515625,
      "step": 44298,
      "training_step_time": 0.38581132888793945
    },
    {
      "epoch": 0.000270379638671875,
      "model_forward_time": 0.11461567878723145,
      "step": 44299
    },
    {
      "epoch": 0.000270379638671875,
      "step": 44299,
      "training_step_time": 0.41547679901123047
    },
    {
      "epoch": 0.0002703857421875,
      "grad_norm": 0.10309750586748123,
      "learning_rate": 1.7580016239104924e-05,
      "loss": 0.0374,
      "step": 44300
    },
    {
      "epoch": 0.0002703857421875,
      "model_forward_time": 0.11493730545043945,
      "step": 44300
    },
    {
      "epoch": 0.0002703857421875,
      "step": 44300,
      "training_step_time": 0.44751405715942383
    },
    {
      "epoch": 0.000270391845703125,
      "model_forward_time": 0.11501049995422363,
      "step": 44301
    },
    {
      "epoch": 0.000270391845703125,
      "step": 44301,
      "training_step_time": 0.39760637283325195
    },
    {
      "epoch": 0.00027039794921875,
      "model_forward_time": 0.11664605140686035,
      "step": 44302
    },
    {
      "epoch": 0.00027039794921875,
      "step": 44302,
      "training_step_time": 0.3995537757873535
    },
    {
      "epoch": 0.000270404052734375,
      "model_forward_time": 0.11520123481750488,
      "step": 44303
    },
    {
      "epoch": 0.000270404052734375,
      "step": 44303,
      "training_step_time": 0.4373049736022949
    },
    {
      "epoch": 0.00027041015625,
      "model_forward_time": 0.1158914566040039,
      "step": 44304
    },
    {
      "epoch": 0.00027041015625,
      "step": 44304,
      "training_step_time": 0.4042210578918457
    },
    {
      "epoch": 0.000270416259765625,
      "model_forward_time": 0.11625838279724121,
      "step": 44305
    },
    {
      "epoch": 0.000270416259765625,
      "step": 44305,
      "training_step_time": 0.3901231288909912
    },
    {
      "epoch": 0.00027042236328125,
      "model_forward_time": 0.11492347717285156,
      "step": 44306
    },
    {
      "epoch": 0.00027042236328125,
      "step": 44306,
      "training_step_time": 0.39901280403137207
    },
    {
      "epoch": 0.000270428466796875,
      "model_forward_time": 0.11502480506896973,
      "step": 44307
    },
    {
      "epoch": 0.000270428466796875,
      "step": 44307,
      "training_step_time": 0.38721704483032227
    },
    {
      "epoch": 0.0002704345703125,
      "model_forward_time": 0.1160283088684082,
      "step": 44308
    },
    {
      "epoch": 0.0002704345703125,
      "step": 44308,
      "training_step_time": 0.3907008171081543
    },
    {
      "epoch": 0.000270440673828125,
      "model_forward_time": 0.11455583572387695,
      "step": 44309
    },
    {
      "epoch": 0.000270440673828125,
      "step": 44309,
      "training_step_time": 0.5106375217437744
    },
    {
      "epoch": 0.00027044677734375,
      "grad_norm": 0.11454371362924576,
      "learning_rate": 1.755904138503316e-05,
      "loss": 0.0443,
      "step": 44310
    },
    {
      "epoch": 0.00027044677734375,
      "model_forward_time": 0.11459136009216309,
      "step": 44310
    },
    {
      "epoch": 0.00027044677734375,
      "step": 44310,
      "training_step_time": 0.4204561710357666
    },
    {
      "epoch": 0.000270452880859375,
      "model_forward_time": 0.11520242691040039,
      "step": 44311
    },
    {
      "epoch": 0.000270452880859375,
      "step": 44311,
      "training_step_time": 0.49471235275268555
    },
    {
      "epoch": 0.000270458984375,
      "model_forward_time": 0.11690163612365723,
      "step": 44312
    },
    {
      "epoch": 0.000270458984375,
      "step": 44312,
      "training_step_time": 0.48248934745788574
    },
    {
      "epoch": 0.000270465087890625,
      "model_forward_time": 0.11564326286315918,
      "step": 44313
    },
    {
      "epoch": 0.000270465087890625,
      "step": 44313,
      "training_step_time": 0.41769933700561523
    },
    {
      "epoch": 0.00027047119140625,
      "model_forward_time": 0.11633872985839844,
      "step": 44314
    },
    {
      "epoch": 0.00027047119140625,
      "step": 44314,
      "training_step_time": 0.41300225257873535
    },
    {
      "epoch": 0.000270477294921875,
      "model_forward_time": 0.1150214672088623,
      "step": 44315
    },
    {
      "epoch": 0.000270477294921875,
      "step": 44315,
      "training_step_time": 0.44380640983581543
    },
    {
      "epoch": 0.0002704833984375,
      "model_forward_time": 0.11616754531860352,
      "step": 44316
    },
    {
      "epoch": 0.0002704833984375,
      "step": 44316,
      "training_step_time": 0.46956586837768555
    },
    {
      "epoch": 0.000270489501953125,
      "model_forward_time": 0.11680078506469727,
      "step": 44317
    },
    {
      "epoch": 0.000270489501953125,
      "step": 44317,
      "training_step_time": 0.494234561920166
    },
    {
      "epoch": 0.00027049560546875,
      "model_forward_time": 0.11521196365356445,
      "step": 44318
    },
    {
      "epoch": 0.00027049560546875,
      "step": 44318,
      "training_step_time": 0.3905501365661621
    },
    {
      "epoch": 0.000270501708984375,
      "model_forward_time": 0.11513209342956543,
      "step": 44319
    },
    {
      "epoch": 0.000270501708984375,
      "step": 44319,
      "training_step_time": 0.3932521343231201
    },
    {
      "epoch": 0.0002705078125,
      "grad_norm": 0.11111774295568466,
      "learning_rate": 1.753807638566805e-05,
      "loss": 0.0371,
      "step": 44320
    },
    {
      "epoch": 0.0002705078125,
      "model_forward_time": 0.11499619483947754,
      "step": 44320
    },
    {
      "epoch": 0.0002705078125,
      "step": 44320,
      "training_step_time": 0.3887758255004883
    },
    {
      "epoch": 0.000270513916015625,
      "model_forward_time": 0.1154015064239502,
      "step": 44321
    },
    {
      "epoch": 0.000270513916015625,
      "step": 44321,
      "training_step_time": 0.4001157283782959
    },
    {
      "epoch": 0.00027052001953125,
      "model_forward_time": 0.11478519439697266,
      "step": 44322
    },
    {
      "epoch": 0.00027052001953125,
      "step": 44322,
      "training_step_time": 0.39457273483276367
    },
    {
      "epoch": 0.000270526123046875,
      "model_forward_time": 0.11501479148864746,
      "step": 44323
    },
    {
      "epoch": 0.000270526123046875,
      "step": 44323,
      "training_step_time": 0.39920568466186523
    },
    {
      "epoch": 0.0002705322265625,
      "model_forward_time": 0.11559081077575684,
      "step": 44324
    },
    {
      "epoch": 0.0002705322265625,
      "step": 44324,
      "training_step_time": 0.6578176021575928
    },
    {
      "epoch": 0.000270538330078125,
      "model_forward_time": 0.11453127861022949,
      "step": 44325
    },
    {
      "epoch": 0.000270538330078125,
      "step": 44325,
      "training_step_time": 0.42866015434265137
    },
    {
      "epoch": 0.00027054443359375,
      "model_forward_time": 0.11465191841125488,
      "step": 44326
    },
    {
      "epoch": 0.00027054443359375,
      "step": 44326,
      "training_step_time": 0.46851158142089844
    },
    {
      "epoch": 0.000270550537109375,
      "model_forward_time": 0.11461091041564941,
      "step": 44327
    },
    {
      "epoch": 0.000270550537109375,
      "step": 44327,
      "training_step_time": 0.39827442169189453
    },
    {
      "epoch": 0.000270556640625,
      "model_forward_time": 0.11501550674438477,
      "step": 44328
    },
    {
      "epoch": 0.000270556640625,
      "step": 44328,
      "training_step_time": 0.4050774574279785
    },
    {
      "epoch": 0.000270562744140625,
      "model_forward_time": 0.11611461639404297,
      "step": 44329
    },
    {
      "epoch": 0.000270562744140625,
      "step": 44329,
      "training_step_time": 0.4824211597442627
    },
    {
      "epoch": 0.00027056884765625,
      "grad_norm": 0.122504822909832,
      "learning_rate": 1.751712124737826e-05,
      "loss": 0.0386,
      "step": 44330
    },
    {
      "epoch": 0.00027056884765625,
      "model_forward_time": 0.11931872367858887,
      "step": 44330
    },
    {
      "epoch": 0.00027056884765625,
      "step": 44330,
      "training_step_time": 0.3836252689361572
    },
    {
      "epoch": 0.000270574951171875,
      "model_forward_time": 0.11852264404296875,
      "step": 44331
    },
    {
      "epoch": 0.000270574951171875,
      "step": 44331,
      "training_step_time": 0.4591500759124756
    },
    {
      "epoch": 0.0002705810546875,
      "model_forward_time": 0.11899566650390625,
      "step": 44332
    },
    {
      "epoch": 0.0002705810546875,
      "step": 44332,
      "training_step_time": 0.39487147331237793
    },
    {
      "epoch": 0.000270587158203125,
      "model_forward_time": 0.11825942993164062,
      "step": 44333
    },
    {
      "epoch": 0.000270587158203125,
      "step": 44333,
      "training_step_time": 0.38512253761291504
    },
    {
      "epoch": 0.00027059326171875,
      "model_forward_time": 0.11922192573547363,
      "step": 44334
    },
    {
      "epoch": 0.00027059326171875,
      "step": 44334,
      "training_step_time": 0.3810102939605713
    },
    {
      "epoch": 0.000270599365234375,
      "model_forward_time": 0.11757612228393555,
      "step": 44335
    },
    {
      "epoch": 0.000270599365234375,
      "step": 44335,
      "training_step_time": 0.3861083984375
    },
    {
      "epoch": 0.00027060546875,
      "model_forward_time": 0.11886191368103027,
      "step": 44336
    },
    {
      "epoch": 0.00027060546875,
      "step": 44336,
      "training_step_time": 0.5019004344940186
    },
    {
      "epoch": 0.000270611572265625,
      "model_forward_time": 0.1207876205444336,
      "step": 44337
    },
    {
      "epoch": 0.000270611572265625,
      "step": 44337,
      "training_step_time": 0.4473588466644287
    },
    {
      "epoch": 0.00027061767578125,
      "model_forward_time": 0.11774921417236328,
      "step": 44338
    },
    {
      "epoch": 0.00027061767578125,
      "step": 44338,
      "training_step_time": 0.4037191867828369
    },
    {
      "epoch": 0.000270623779296875,
      "model_forward_time": 0.11984109878540039,
      "step": 44339
    },
    {
      "epoch": 0.000270623779296875,
      "step": 44339,
      "training_step_time": 0.4813230037689209
    },
    {
      "epoch": 0.0002706298828125,
      "grad_norm": 0.10601862519979477,
      "learning_rate": 1.749617597652934e-05,
      "loss": 0.0414,
      "step": 44340
    },
    {
      "epoch": 0.0002706298828125,
      "model_forward_time": 0.11942648887634277,
      "step": 44340
    },
    {
      "epoch": 0.0002706298828125,
      "step": 44340,
      "training_step_time": 0.388486385345459
    },
    {
      "epoch": 0.000270635986328125,
      "model_forward_time": 0.11600399017333984,
      "step": 44341
    },
    {
      "epoch": 0.000270635986328125,
      "step": 44341,
      "training_step_time": 0.39153265953063965
    },
    {
      "epoch": 0.00027064208984375,
      "model_forward_time": 0.11551308631896973,
      "step": 44342
    },
    {
      "epoch": 0.00027064208984375,
      "step": 44342,
      "training_step_time": 0.4476628303527832
    },
    {
      "epoch": 0.000270648193359375,
      "model_forward_time": 0.11500239372253418,
      "step": 44343
    },
    {
      "epoch": 0.000270648193359375,
      "step": 44343,
      "training_step_time": 0.41434574127197266
    },
    {
      "epoch": 0.000270654296875,
      "model_forward_time": 0.11501121520996094,
      "step": 44344
    },
    {
      "epoch": 0.000270654296875,
      "step": 44344,
      "training_step_time": 0.43826794624328613
    },
    {
      "epoch": 0.000270660400390625,
      "model_forward_time": 0.11692142486572266,
      "step": 44345
    },
    {
      "epoch": 0.000270660400390625,
      "step": 44345,
      "training_step_time": 0.49054694175720215
    },
    {
      "epoch": 0.00027066650390625,
      "model_forward_time": 0.11515974998474121,
      "step": 44346
    },
    {
      "epoch": 0.00027066650390625,
      "step": 44346,
      "training_step_time": 0.40541625022888184
    },
    {
      "epoch": 0.000270672607421875,
      "model_forward_time": 0.11598539352416992,
      "step": 44347
    },
    {
      "epoch": 0.000270672607421875,
      "step": 44347,
      "training_step_time": 0.39573097229003906
    },
    {
      "epoch": 0.0002706787109375,
      "model_forward_time": 0.11469268798828125,
      "step": 44348
    },
    {
      "epoch": 0.0002706787109375,
      "step": 44348,
      "training_step_time": 0.4555816650390625
    },
    {
      "epoch": 0.000270684814453125,
      "model_forward_time": 0.1157538890838623,
      "step": 44349
    },
    {
      "epoch": 0.000270684814453125,
      "step": 44349,
      "training_step_time": 0.4146084785461426
    },
    {
      "epoch": 0.00027069091796875,
      "grad_norm": 0.09471037983894348,
      "learning_rate": 1.7475240579483958e-05,
      "loss": 0.0357,
      "step": 44350
    },
    {
      "epoch": 0.00027069091796875,
      "model_forward_time": 0.11531758308410645,
      "step": 44350
    },
    {
      "epoch": 0.00027069091796875,
      "step": 44350,
      "training_step_time": 0.3952779769897461
    },
    {
      "epoch": 0.000270697021484375,
      "model_forward_time": 0.11562299728393555,
      "step": 44351
    },
    {
      "epoch": 0.000270697021484375,
      "step": 44351,
      "training_step_time": 0.3937833309173584
    },
    {
      "epoch": 0.000270703125,
      "model_forward_time": 0.11473631858825684,
      "step": 44352
    },
    {
      "epoch": 0.000270703125,
      "step": 44352,
      "training_step_time": 0.44911861419677734
    },
    {
      "epoch": 0.000270709228515625,
      "model_forward_time": 0.1148676872253418,
      "step": 44353
    },
    {
      "epoch": 0.000270709228515625,
      "step": 44353,
      "training_step_time": 0.44075632095336914
    },
    {
      "epoch": 0.00027071533203125,
      "model_forward_time": 0.11605429649353027,
      "step": 44354
    },
    {
      "epoch": 0.00027071533203125,
      "step": 44354,
      "training_step_time": 0.5594692230224609
    },
    {
      "epoch": 0.000270721435546875,
      "model_forward_time": 0.11499357223510742,
      "step": 44355
    },
    {
      "epoch": 0.000270721435546875,
      "step": 44355,
      "training_step_time": 0.3901546001434326
    },
    {
      "epoch": 0.0002707275390625,
      "model_forward_time": 0.11516785621643066,
      "step": 44356
    },
    {
      "epoch": 0.0002707275390625,
      "step": 44356,
      "training_step_time": 0.41432619094848633
    },
    {
      "epoch": 0.000270733642578125,
      "model_forward_time": 0.11499333381652832,
      "step": 44357
    },
    {
      "epoch": 0.000270733642578125,
      "step": 44357,
      "training_step_time": 0.4141848087310791
    },
    {
      "epoch": 0.00027073974609375,
      "model_forward_time": 0.11542201042175293,
      "step": 44358
    },
    {
      "epoch": 0.00027073974609375,
      "step": 44358,
      "training_step_time": 0.3690204620361328
    },
    {
      "epoch": 0.000270745849609375,
      "model_forward_time": 0.11502218246459961,
      "step": 44359
    },
    {
      "epoch": 0.000270745849609375,
      "step": 44359,
      "training_step_time": 0.45598530769348145
    },
    {
      "epoch": 0.000270751953125,
      "grad_norm": 0.11563859134912491,
      "learning_rate": 1.745431506260173e-05,
      "loss": 0.0399,
      "step": 44360
    },
    {
      "epoch": 0.000270751953125,
      "model_forward_time": 0.11483621597290039,
      "step": 44360
    },
    {
      "epoch": 0.000270751953125,
      "step": 44360,
      "training_step_time": 0.55289626121521
    },
    {
      "epoch": 0.000270758056640625,
      "model_forward_time": 0.11501383781433105,
      "step": 44361
    },
    {
      "epoch": 0.000270758056640625,
      "step": 44361,
      "training_step_time": 0.39245152473449707
    },
    {
      "epoch": 0.00027076416015625,
      "model_forward_time": 0.11461019515991211,
      "step": 44362
    },
    {
      "epoch": 0.00027076416015625,
      "step": 44362,
      "training_step_time": 0.39043354988098145
    },
    {
      "epoch": 0.000270770263671875,
      "model_forward_time": 0.11569476127624512,
      "step": 44363
    },
    {
      "epoch": 0.000270770263671875,
      "step": 44363,
      "training_step_time": 0.38723158836364746
    },
    {
      "epoch": 0.0002707763671875,
      "model_forward_time": 0.11479473114013672,
      "step": 44364
    },
    {
      "epoch": 0.0002707763671875,
      "step": 44364,
      "training_step_time": 0.393047571182251
    },
    {
      "epoch": 0.000270782470703125,
      "model_forward_time": 0.11479616165161133,
      "step": 44365
    },
    {
      "epoch": 0.000270782470703125,
      "step": 44365,
      "training_step_time": 0.4021189212799072
    },
    {
      "epoch": 0.00027078857421875,
      "model_forward_time": 0.11638975143432617,
      "step": 44366
    },
    {
      "epoch": 0.00027078857421875,
      "step": 44366,
      "training_step_time": 0.6865606307983398
    },
    {
      "epoch": 0.000270794677734375,
      "model_forward_time": 0.11475181579589844,
      "step": 44367
    },
    {
      "epoch": 0.000270794677734375,
      "step": 44367,
      "training_step_time": 0.4374871253967285
    },
    {
      "epoch": 0.00027080078125,
      "model_forward_time": 0.11432766914367676,
      "step": 44368
    },
    {
      "epoch": 0.00027080078125,
      "step": 44368,
      "training_step_time": 0.3880441188812256
    },
    {
      "epoch": 0.000270806884765625,
      "model_forward_time": 0.11517715454101562,
      "step": 44369
    },
    {
      "epoch": 0.000270806884765625,
      "step": 44369,
      "training_step_time": 0.3947889804840088
    },
    {
      "epoch": 0.00027081298828125,
      "grad_norm": 0.07964486628770828,
      "learning_rate": 1.743339943223926e-05,
      "loss": 0.033,
      "step": 44370
    },
    {
      "epoch": 0.00027081298828125,
      "model_forward_time": 0.11517930030822754,
      "step": 44370
    },
    {
      "epoch": 0.00027081298828125,
      "step": 44370,
      "training_step_time": 0.4093308448791504
    },
    {
      "epoch": 0.000270819091796875,
      "model_forward_time": 0.11479425430297852,
      "step": 44371
    },
    {
      "epoch": 0.000270819091796875,
      "step": 44371,
      "training_step_time": 0.40831732749938965
    },
    {
      "epoch": 0.0002708251953125,
      "model_forward_time": 0.11526298522949219,
      "step": 44372
    },
    {
      "epoch": 0.0002708251953125,
      "step": 44372,
      "training_step_time": 0.40288519859313965
    },
    {
      "epoch": 0.000270831298828125,
      "model_forward_time": 0.11460018157958984,
      "step": 44373
    },
    {
      "epoch": 0.000270831298828125,
      "step": 44373,
      "training_step_time": 0.42629265785217285
    },
    {
      "epoch": 0.00027083740234375,
      "model_forward_time": 0.11428403854370117,
      "step": 44374
    },
    {
      "epoch": 0.00027083740234375,
      "step": 44374,
      "training_step_time": 0.40178370475769043
    },
    {
      "epoch": 0.000270843505859375,
      "model_forward_time": 0.11499619483947754,
      "step": 44375
    },
    {
      "epoch": 0.000270843505859375,
      "step": 44375,
      "training_step_time": 0.3910949230194092
    },
    {
      "epoch": 0.000270849609375,
      "model_forward_time": 0.11540079116821289,
      "step": 44376
    },
    {
      "epoch": 0.000270849609375,
      "step": 44376,
      "training_step_time": 0.38851428031921387
    },
    {
      "epoch": 0.000270855712890625,
      "model_forward_time": 0.11451339721679688,
      "step": 44377
    },
    {
      "epoch": 0.000270855712890625,
      "step": 44377,
      "training_step_time": 0.40133190155029297
    },
    {
      "epoch": 0.00027086181640625,
      "model_forward_time": 0.11557912826538086,
      "step": 44378
    },
    {
      "epoch": 0.00027086181640625,
      "step": 44378,
      "training_step_time": 0.4920783042907715
    },
    {
      "epoch": 0.000270867919921875,
      "model_forward_time": 0.11538457870483398,
      "step": 44379
    },
    {
      "epoch": 0.000270867919921875,
      "step": 44379,
      "training_step_time": 0.3986027240753174
    },
    {
      "epoch": 0.0002708740234375,
      "grad_norm": 0.09044411033391953,
      "learning_rate": 1.7412493694750176e-05,
      "loss": 0.0367,
      "step": 44380
    },
    {
      "epoch": 0.0002708740234375,
      "model_forward_time": 0.11472797393798828,
      "step": 44380
    },
    {
      "epoch": 0.0002708740234375,
      "step": 44380,
      "training_step_time": 0.44119811058044434
    },
    {
      "epoch": 0.000270880126953125,
      "model_forward_time": 0.11504864692687988,
      "step": 44381
    },
    {
      "epoch": 0.000270880126953125,
      "step": 44381,
      "training_step_time": 0.4027838706970215
    },
    {
      "epoch": 0.00027088623046875,
      "model_forward_time": 0.1148519515991211,
      "step": 44382
    },
    {
      "epoch": 0.00027088623046875,
      "step": 44382,
      "training_step_time": 0.41692066192626953
    },
    {
      "epoch": 0.000270892333984375,
      "model_forward_time": 0.11477518081665039,
      "step": 44383
    },
    {
      "epoch": 0.000270892333984375,
      "step": 44383,
      "training_step_time": 0.399064302444458
    },
    {
      "epoch": 0.0002708984375,
      "model_forward_time": 0.11502981185913086,
      "step": 44384
    },
    {
      "epoch": 0.0002708984375,
      "step": 44384,
      "training_step_time": 0.61934494972229
    },
    {
      "epoch": 0.000270904541015625,
      "model_forward_time": 0.1149144172668457,
      "step": 44385
    },
    {
      "epoch": 0.000270904541015625,
      "step": 44385,
      "training_step_time": 0.4066638946533203
    },
    {
      "epoch": 0.00027091064453125,
      "model_forward_time": 0.1147909164428711,
      "step": 44386
    },
    {
      "epoch": 0.00027091064453125,
      "step": 44386,
      "training_step_time": 0.3679473400115967
    },
    {
      "epoch": 0.000270916748046875,
      "model_forward_time": 0.1145639419555664,
      "step": 44387
    },
    {
      "epoch": 0.000270916748046875,
      "step": 44387,
      "training_step_time": 0.44295358657836914
    },
    {
      "epoch": 0.0002709228515625,
      "model_forward_time": 0.11749958992004395,
      "step": 44388
    },
    {
      "epoch": 0.0002709228515625,
      "step": 44388,
      "training_step_time": 0.4255383014678955
    },
    {
      "epoch": 0.000270928955078125,
      "model_forward_time": 0.11804819107055664,
      "step": 44389
    },
    {
      "epoch": 0.000270928955078125,
      "step": 44389,
      "training_step_time": 0.3787102699279785
    },
    {
      "epoch": 0.00027093505859375,
      "grad_norm": 0.08775024861097336,
      "learning_rate": 1.7391597856485083e-05,
      "loss": 0.0366,
      "step": 44390
    },
    {
      "epoch": 0.00027093505859375,
      "model_forward_time": 0.11725568771362305,
      "step": 44390
    },
    {
      "epoch": 0.00027093505859375,
      "step": 44390,
      "training_step_time": 0.39670515060424805
    },
    {
      "epoch": 0.000270941162109375,
      "model_forward_time": 0.11504626274108887,
      "step": 44391
    },
    {
      "epoch": 0.000270941162109375,
      "step": 44391,
      "training_step_time": 0.4041931629180908
    },
    {
      "epoch": 0.000270947265625,
      "model_forward_time": 0.11475944519042969,
      "step": 44392
    },
    {
      "epoch": 0.000270947265625,
      "step": 44392,
      "training_step_time": 0.38582444190979004
    },
    {
      "epoch": 0.000270953369140625,
      "model_forward_time": 0.11525821685791016,
      "step": 44393
    },
    {
      "epoch": 0.000270953369140625,
      "step": 44393,
      "training_step_time": 0.41932106018066406
    },
    {
      "epoch": 0.00027095947265625,
      "model_forward_time": 0.11469602584838867,
      "step": 44394
    },
    {
      "epoch": 0.00027095947265625,
      "step": 44394,
      "training_step_time": 0.45590949058532715
    },
    {
      "epoch": 0.000270965576171875,
      "model_forward_time": 0.1148989200592041,
      "step": 44395
    },
    {
      "epoch": 0.000270965576171875,
      "step": 44395,
      "training_step_time": 0.40080714225769043
    },
    {
      "epoch": 0.0002709716796875,
      "model_forward_time": 0.11482834815979004,
      "step": 44396
    },
    {
      "epoch": 0.0002709716796875,
      "step": 44396,
      "training_step_time": 0.4458341598510742
    },
    {
      "epoch": 0.000270977783203125,
      "model_forward_time": 0.11469912528991699,
      "step": 44397
    },
    {
      "epoch": 0.000270977783203125,
      "step": 44397,
      "training_step_time": 0.3877549171447754
    },
    {
      "epoch": 0.00027098388671875,
      "model_forward_time": 0.11493134498596191,
      "step": 44398
    },
    {
      "epoch": 0.00027098388671875,
      "step": 44398,
      "training_step_time": 0.4001271724700928
    },
    {
      "epoch": 0.000270989990234375,
      "model_forward_time": 0.11518573760986328,
      "step": 44399
    },
    {
      "epoch": 0.000270989990234375,
      "step": 44399,
      "training_step_time": 0.5851140022277832
    },
    {
      "epoch": 0.00027099609375,
      "grad_norm": 0.13661625981330872,
      "learning_rate": 1.7370711923791567e-05,
      "loss": 0.0386,
      "step": 44400
    },
    {
      "epoch": 0.00027099609375,
      "model_forward_time": 0.11499738693237305,
      "step": 44400
    },
    {
      "epoch": 0.00027099609375,
      "step": 44400,
      "training_step_time": 0.40907716751098633
    },
    {
      "epoch": 0.000271002197265625,
      "model_forward_time": 0.11544179916381836,
      "step": 44401
    },
    {
      "epoch": 0.000271002197265625,
      "step": 44401,
      "training_step_time": 0.4581336975097656
    },
    {
      "epoch": 0.00027100830078125,
      "model_forward_time": 0.11458230018615723,
      "step": 44402
    },
    {
      "epoch": 0.00027100830078125,
      "step": 44402,
      "training_step_time": 0.43718886375427246
    },
    {
      "epoch": 0.000271014404296875,
      "model_forward_time": 0.1230919361114502,
      "step": 44403
    },
    {
      "epoch": 0.000271014404296875,
      "step": 44403,
      "training_step_time": 0.41656041145324707
    },
    {
      "epoch": 0.0002710205078125,
      "model_forward_time": 0.1185300350189209,
      "step": 44404
    },
    {
      "epoch": 0.0002710205078125,
      "step": 44404,
      "training_step_time": 0.3831799030303955
    },
    {
      "epoch": 0.000271026611328125,
      "model_forward_time": 0.11606359481811523,
      "step": 44405
    },
    {
      "epoch": 0.000271026611328125,
      "step": 44405,
      "training_step_time": 0.7289676666259766
    },
    {
      "epoch": 0.00027103271484375,
      "model_forward_time": 0.1144721508026123,
      "step": 44406
    },
    {
      "epoch": 0.00027103271484375,
      "step": 44406,
      "training_step_time": 0.4286789894104004
    },
    {
      "epoch": 0.000271038818359375,
      "model_forward_time": 0.1145169734954834,
      "step": 44407
    },
    {
      "epoch": 0.000271038818359375,
      "step": 44407,
      "training_step_time": 0.41488218307495117
    },
    {
      "epoch": 0.000271044921875,
      "model_forward_time": 0.11540794372558594,
      "step": 44408
    },
    {
      "epoch": 0.000271044921875,
      "step": 44408,
      "training_step_time": 0.3865480422973633
    },
    {
      "epoch": 0.000271051025390625,
      "model_forward_time": 0.11411070823669434,
      "step": 44409
    },
    {
      "epoch": 0.000271051025390625,
      "step": 44409,
      "training_step_time": 0.4756958484649658
    },
    {
      "epoch": 0.00027105712890625,
      "grad_norm": 0.11514563858509064,
      "learning_rate": 1.7349835903014277e-05,
      "loss": 0.0369,
      "step": 44410
    },
    {
      "epoch": 0.00027105712890625,
      "model_forward_time": 0.11507248878479004,
      "step": 44410
    },
    {
      "epoch": 0.00027105712890625,
      "step": 44410,
      "training_step_time": 0.38723158836364746
    },
    {
      "epoch": 0.000271063232421875,
      "model_forward_time": 0.11519026756286621,
      "step": 44411
    },
    {
      "epoch": 0.000271063232421875,
      "step": 44411,
      "training_step_time": 0.49628376960754395
    },
    {
      "epoch": 0.0002710693359375,
      "model_forward_time": 0.11534667015075684,
      "step": 44412
    },
    {
      "epoch": 0.0002710693359375,
      "step": 44412,
      "training_step_time": 0.3957200050354004
    },
    {
      "epoch": 0.000271075439453125,
      "model_forward_time": 0.11710119247436523,
      "step": 44413
    },
    {
      "epoch": 0.000271075439453125,
      "step": 44413,
      "training_step_time": 0.43594980239868164
    },
    {
      "epoch": 0.00027108154296875,
      "model_forward_time": 0.11536478996276855,
      "step": 44414
    },
    {
      "epoch": 0.00027108154296875,
      "step": 44414,
      "training_step_time": 0.46852827072143555
    },
    {
      "epoch": 0.000271087646484375,
      "model_forward_time": 0.11522245407104492,
      "step": 44415
    },
    {
      "epoch": 0.000271087646484375,
      "step": 44415,
      "training_step_time": 0.4305274486541748
    },
    {
      "epoch": 0.00027109375,
      "model_forward_time": 0.11631011962890625,
      "step": 44416
    },
    {
      "epoch": 0.00027109375,
      "step": 44416,
      "training_step_time": 0.500349760055542
    },
    {
      "epoch": 0.000271099853515625,
      "model_forward_time": 0.11517834663391113,
      "step": 44417
    },
    {
      "epoch": 0.000271099853515625,
      "step": 44417,
      "training_step_time": 0.3940448760986328
    },
    {
      "epoch": 0.00027110595703125,
      "model_forward_time": 0.11487793922424316,
      "step": 44418
    },
    {
      "epoch": 0.00027110595703125,
      "step": 44418,
      "training_step_time": 0.3879733085632324
    },
    {
      "epoch": 0.000271112060546875,
      "model_forward_time": 0.11511111259460449,
      "step": 44419
    },
    {
      "epoch": 0.000271112060546875,
      "step": 44419,
      "training_step_time": 0.4239614009857178
    },
    {
      "epoch": 0.0002711181640625,
      "grad_norm": 0.10936689376831055,
      "learning_rate": 1.7328969800494726e-05,
      "loss": 0.0453,
      "step": 44420
    },
    {
      "epoch": 0.0002711181640625,
      "model_forward_time": 0.11625480651855469,
      "step": 44420
    },
    {
      "epoch": 0.0002711181640625,
      "step": 44420,
      "training_step_time": 0.4213685989379883
    },
    {
      "epoch": 0.000271124267578125,
      "model_forward_time": 0.1164846420288086,
      "step": 44421
    },
    {
      "epoch": 0.000271124267578125,
      "step": 44421,
      "training_step_time": 0.3921036720275879
    },
    {
      "epoch": 0.00027113037109375,
      "model_forward_time": 0.1152794361114502,
      "step": 44422
    },
    {
      "epoch": 0.00027113037109375,
      "step": 44422,
      "training_step_time": 0.3985130786895752
    },
    {
      "epoch": 0.000271136474609375,
      "model_forward_time": 0.11584806442260742,
      "step": 44423
    },
    {
      "epoch": 0.000271136474609375,
      "step": 44423,
      "training_step_time": 0.4039652347564697
    },
    {
      "epoch": 0.000271142578125,
      "model_forward_time": 0.1156916618347168,
      "step": 44424
    },
    {
      "epoch": 0.000271142578125,
      "step": 44424,
      "training_step_time": 0.40312790870666504
    },
    {
      "epoch": 0.000271148681640625,
      "model_forward_time": 0.11559391021728516,
      "step": 44425
    },
    {
      "epoch": 0.000271148681640625,
      "step": 44425,
      "training_step_time": 0.4102005958557129
    },
    {
      "epoch": 0.00027115478515625,
      "model_forward_time": 0.1151890754699707,
      "step": 44426
    },
    {
      "epoch": 0.00027115478515625,
      "step": 44426,
      "training_step_time": 0.4038832187652588
    },
    {
      "epoch": 0.000271160888671875,
      "model_forward_time": 0.1158745288848877,
      "step": 44427
    },
    {
      "epoch": 0.000271160888671875,
      "step": 44427,
      "training_step_time": 0.4508693218231201
    },
    {
      "epoch": 0.0002711669921875,
      "model_forward_time": 0.11562085151672363,
      "step": 44428
    },
    {
      "epoch": 0.0002711669921875,
      "step": 44428,
      "training_step_time": 0.4113280773162842
    },
    {
      "epoch": 0.000271173095703125,
      "model_forward_time": 0.11778044700622559,
      "step": 44429
    },
    {
      "epoch": 0.000271173095703125,
      "step": 44429,
      "training_step_time": 0.7715969085693359
    },
    {
      "epoch": 0.00027117919921875,
      "grad_norm": 0.09852210432291031,
      "learning_rate": 1.7308113622571544e-05,
      "loss": 0.0387,
      "step": 44430
    },
    {
      "epoch": 0.00027117919921875,
      "model_forward_time": 0.11571264266967773,
      "step": 44430
    },
    {
      "epoch": 0.00027117919921875,
      "step": 44430,
      "training_step_time": 0.4220409393310547
    },
    {
      "epoch": 0.000271185302734375,
      "model_forward_time": 0.11548852920532227,
      "step": 44431
    },
    {
      "epoch": 0.000271185302734375,
      "step": 44431,
      "training_step_time": 0.39401960372924805
    },
    {
      "epoch": 0.00027119140625,
      "model_forward_time": 0.11464262008666992,
      "step": 44432
    },
    {
      "epoch": 0.00027119140625,
      "step": 44432,
      "training_step_time": 0.3875434398651123
    },
    {
      "epoch": 0.000271197509765625,
      "model_forward_time": 0.11445116996765137,
      "step": 44433
    },
    {
      "epoch": 0.000271197509765625,
      "step": 44433,
      "training_step_time": 0.3860170841217041
    },
    {
      "epoch": 0.00027120361328125,
      "model_forward_time": 0.11499691009521484,
      "step": 44434
    },
    {
      "epoch": 0.00027120361328125,
      "step": 44434,
      "training_step_time": 0.37983036041259766
    },
    {
      "epoch": 0.000271209716796875,
      "model_forward_time": 0.11624431610107422,
      "step": 44435
    },
    {
      "epoch": 0.000271209716796875,
      "step": 44435,
      "training_step_time": 0.6087813377380371
    },
    {
      "epoch": 0.0002712158203125,
      "model_forward_time": 0.1156151294708252,
      "step": 44436
    },
    {
      "epoch": 0.0002712158203125,
      "step": 44436,
      "training_step_time": 0.413938045501709
    },
    {
      "epoch": 0.000271221923828125,
      "model_forward_time": 0.11534547805786133,
      "step": 44437
    },
    {
      "epoch": 0.000271221923828125,
      "step": 44437,
      "training_step_time": 0.5053174495697021
    },
    {
      "epoch": 0.00027122802734375,
      "model_forward_time": 0.11564850807189941,
      "step": 44438
    },
    {
      "epoch": 0.00027122802734375,
      "step": 44438,
      "training_step_time": 0.3994910717010498
    },
    {
      "epoch": 0.000271234130859375,
      "model_forward_time": 0.11510252952575684,
      "step": 44439
    },
    {
      "epoch": 0.000271234130859375,
      "step": 44439,
      "training_step_time": 0.3931009769439697
    },
    {
      "epoch": 0.000271240234375,
      "grad_norm": 0.1086299791932106,
      "learning_rate": 1.7287267375580256e-05,
      "loss": 0.0342,
      "step": 44440
    },
    {
      "epoch": 0.000271240234375,
      "model_forward_time": 0.11521363258361816,
      "step": 44440
    },
    {
      "epoch": 0.000271240234375,
      "step": 44440,
      "training_step_time": 0.40407657623291016
    },
    {
      "epoch": 0.000271246337890625,
      "model_forward_time": 0.1161651611328125,
      "step": 44441
    },
    {
      "epoch": 0.000271246337890625,
      "step": 44441,
      "training_step_time": 0.8140499591827393
    },
    {
      "epoch": 0.00027125244140625,
      "model_forward_time": 0.11528849601745605,
      "step": 44442
    },
    {
      "epoch": 0.00027125244140625,
      "step": 44442,
      "training_step_time": 0.4377269744873047
    },
    {
      "epoch": 0.000271258544921875,
      "model_forward_time": 0.11512637138366699,
      "step": 44443
    },
    {
      "epoch": 0.000271258544921875,
      "step": 44443,
      "training_step_time": 0.4802091121673584
    },
    {
      "epoch": 0.0002712646484375,
      "model_forward_time": 0.11495757102966309,
      "step": 44444
    },
    {
      "epoch": 0.0002712646484375,
      "step": 44444,
      "training_step_time": 0.41623497009277344
    },
    {
      "epoch": 0.000271270751953125,
      "model_forward_time": 0.11457991600036621,
      "step": 44445
    },
    {
      "epoch": 0.000271270751953125,
      "step": 44445,
      "training_step_time": 0.3964409828186035
    },
    {
      "epoch": 0.00027127685546875,
      "model_forward_time": 0.1146693229675293,
      "step": 44446
    },
    {
      "epoch": 0.00027127685546875,
      "step": 44446,
      "training_step_time": 0.3864610195159912
    },
    {
      "epoch": 0.000271282958984375,
      "model_forward_time": 0.1154024600982666,
      "step": 44447
    },
    {
      "epoch": 0.000271282958984375,
      "step": 44447,
      "training_step_time": 0.39592576026916504
    },
    {
      "epoch": 0.0002712890625,
      "model_forward_time": 0.11684846878051758,
      "step": 44448
    },
    {
      "epoch": 0.0002712890625,
      "step": 44448,
      "training_step_time": 0.4045085906982422
    },
    {
      "epoch": 0.000271295166015625,
      "model_forward_time": 0.11554384231567383,
      "step": 44449
    },
    {
      "epoch": 0.000271295166015625,
      "step": 44449,
      "training_step_time": 0.43535470962524414
    },
    {
      "epoch": 0.00027130126953125,
      "grad_norm": 0.07931596785783768,
      "learning_rate": 1.7266431065853416e-05,
      "loss": 0.0369,
      "step": 44450
    },
    {
      "epoch": 0.00027130126953125,
      "model_forward_time": 0.11522245407104492,
      "step": 44450
    },
    {
      "epoch": 0.00027130126953125,
      "step": 44450,
      "training_step_time": 0.4082939624786377
    },
    {
      "epoch": 0.000271307373046875,
      "model_forward_time": 0.1153249740600586,
      "step": 44451
    },
    {
      "epoch": 0.000271307373046875,
      "step": 44451,
      "training_step_time": 0.4569551944732666
    },
    {
      "epoch": 0.0002713134765625,
      "model_forward_time": 0.11610531806945801,
      "step": 44452
    },
    {
      "epoch": 0.0002713134765625,
      "step": 44452,
      "training_step_time": 0.4015235900878906
    },
    {
      "epoch": 0.000271319580078125,
      "model_forward_time": 0.11546063423156738,
      "step": 44453
    },
    {
      "epoch": 0.000271319580078125,
      "step": 44453,
      "training_step_time": 0.40113186836242676
    },
    {
      "epoch": 0.00027132568359375,
      "model_forward_time": 0.11539006233215332,
      "step": 44454
    },
    {
      "epoch": 0.00027132568359375,
      "step": 44454,
      "training_step_time": 0.3959078788757324
    },
    {
      "epoch": 0.000271331787109375,
      "model_forward_time": 0.11573314666748047,
      "step": 44455
    },
    {
      "epoch": 0.000271331787109375,
      "step": 44455,
      "training_step_time": 0.44533681869506836
    },
    {
      "epoch": 0.000271337890625,
      "model_forward_time": 0.11612963676452637,
      "step": 44456
    },
    {
      "epoch": 0.000271337890625,
      "step": 44456,
      "training_step_time": 0.4786949157714844
    },
    {
      "epoch": 0.000271343994140625,
      "model_forward_time": 0.11840009689331055,
      "step": 44457
    },
    {
      "epoch": 0.000271343994140625,
      "step": 44457,
      "training_step_time": 0.39307379722595215
    },
    {
      "epoch": 0.00027135009765625,
      "model_forward_time": 0.11884808540344238,
      "step": 44458
    },
    {
      "epoch": 0.00027135009765625,
      "step": 44458,
      "training_step_time": 0.4879496097564697
    },
    {
      "epoch": 0.000271356201171875,
      "model_forward_time": 0.11893415451049805,
      "step": 44459
    },
    {
      "epoch": 0.000271356201171875,
      "step": 44459,
      "training_step_time": 0.4134845733642578
    },
    {
      "epoch": 0.0002713623046875,
      "grad_norm": 0.09261117875576019,
      "learning_rate": 1.7245604699720535e-05,
      "loss": 0.0367,
      "step": 44460
    },
    {
      "epoch": 0.0002713623046875,
      "model_forward_time": 0.1178138256072998,
      "step": 44460
    },
    {
      "epoch": 0.0002713623046875,
      "step": 44460,
      "training_step_time": 0.4027411937713623
    },
    {
      "epoch": 0.000271368408203125,
      "model_forward_time": 0.11546754837036133,
      "step": 44461
    },
    {
      "epoch": 0.000271368408203125,
      "step": 44461,
      "training_step_time": 0.38932085037231445
    },
    {
      "epoch": 0.00027137451171875,
      "model_forward_time": 0.11571693420410156,
      "step": 44462
    },
    {
      "epoch": 0.00027137451171875,
      "step": 44462,
      "training_step_time": 0.39401865005493164
    },
    {
      "epoch": 0.000271380615234375,
      "model_forward_time": 0.11685776710510254,
      "step": 44463
    },
    {
      "epoch": 0.000271380615234375,
      "step": 44463,
      "training_step_time": 0.474839448928833
    },
    {
      "epoch": 0.00027138671875,
      "model_forward_time": 0.11685562133789062,
      "step": 44464
    },
    {
      "epoch": 0.00027138671875,
      "step": 44464,
      "training_step_time": 0.43375396728515625
    },
    {
      "epoch": 0.000271392822265625,
      "model_forward_time": 0.11550068855285645,
      "step": 44465
    },
    {
      "epoch": 0.000271392822265625,
      "step": 44465,
      "training_step_time": 0.3968954086303711
    },
    {
      "epoch": 0.00027139892578125,
      "model_forward_time": 0.11499404907226562,
      "step": 44466
    },
    {
      "epoch": 0.00027139892578125,
      "step": 44466,
      "training_step_time": 0.39856910705566406
    },
    {
      "epoch": 0.000271405029296875,
      "model_forward_time": 0.11611318588256836,
      "step": 44467
    },
    {
      "epoch": 0.000271405029296875,
      "step": 44467,
      "training_step_time": 0.3941667079925537
    },
    {
      "epoch": 0.0002714111328125,
      "model_forward_time": 0.11717605590820312,
      "step": 44468
    },
    {
      "epoch": 0.0002714111328125,
      "step": 44468,
      "training_step_time": 0.41092848777770996
    },
    {
      "epoch": 0.000271417236328125,
      "model_forward_time": 0.11498236656188965,
      "step": 44469
    },
    {
      "epoch": 0.000271417236328125,
      "step": 44469,
      "training_step_time": 0.4476892948150635
    },
    {
      "epoch": 0.00027142333984375,
      "grad_norm": 0.09984356164932251,
      "learning_rate": 1.7224788283508103e-05,
      "loss": 0.0369,
      "step": 44470
    },
    {
      "epoch": 0.00027142333984375,
      "model_forward_time": 0.1152031421661377,
      "step": 44470
    },
    {
      "epoch": 0.00027142333984375,
      "step": 44470,
      "training_step_time": 0.49344921112060547
    },
    {
      "epoch": 0.000271429443359375,
      "model_forward_time": 0.12044167518615723,
      "step": 44471
    },
    {
      "epoch": 0.000271429443359375,
      "step": 44471,
      "training_step_time": 0.4335339069366455
    },
    {
      "epoch": 0.000271435546875,
      "model_forward_time": 0.11723971366882324,
      "step": 44472
    },
    {
      "epoch": 0.000271435546875,
      "step": 44472,
      "training_step_time": 0.42815399169921875
    },
    {
      "epoch": 0.000271441650390625,
      "model_forward_time": 0.11492133140563965,
      "step": 44473
    },
    {
      "epoch": 0.000271441650390625,
      "step": 44473,
      "training_step_time": 0.42229223251342773
    },
    {
      "epoch": 0.00027144775390625,
      "model_forward_time": 0.11572885513305664,
      "step": 44474
    },
    {
      "epoch": 0.00027144775390625,
      "step": 44474,
      "training_step_time": 0.3877601623535156
    },
    {
      "epoch": 0.000271453857421875,
      "model_forward_time": 0.11568164825439453,
      "step": 44475
    },
    {
      "epoch": 0.000271453857421875,
      "step": 44475,
      "training_step_time": 0.3978841304779053
    },
    {
      "epoch": 0.0002714599609375,
      "model_forward_time": 0.11520767211914062,
      "step": 44476
    },
    {
      "epoch": 0.0002714599609375,
      "step": 44476,
      "training_step_time": 0.39678215980529785
    },
    {
      "epoch": 0.000271466064453125,
      "model_forward_time": 0.11535072326660156,
      "step": 44477
    },
    {
      "epoch": 0.000271466064453125,
      "step": 44477,
      "training_step_time": 0.3983776569366455
    },
    {
      "epoch": 0.00027147216796875,
      "model_forward_time": 0.11498522758483887,
      "step": 44478
    },
    {
      "epoch": 0.00027147216796875,
      "step": 44478,
      "training_step_time": 0.45216870307922363
    },
    {
      "epoch": 0.000271478271484375,
      "model_forward_time": 0.1155862808227539,
      "step": 44479
    },
    {
      "epoch": 0.000271478271484375,
      "step": 44479,
      "training_step_time": 0.44074344635009766
    },
    {
      "epoch": 0.000271484375,
      "grad_norm": 0.09338352829217911,
      "learning_rate": 1.7203981823539643e-05,
      "loss": 0.0332,
      "step": 44480
    },
    {
      "epoch": 0.000271484375,
      "model_forward_time": 0.1158440113067627,
      "step": 44480
    },
    {
      "epoch": 0.000271484375,
      "step": 44480,
      "training_step_time": 0.39577174186706543
    },
    {
      "epoch": 0.000271490478515625,
      "model_forward_time": 0.11502957344055176,
      "step": 44481
    },
    {
      "epoch": 0.000271490478515625,
      "step": 44481,
      "training_step_time": 0.3907003402709961
    },
    {
      "epoch": 0.00027149658203125,
      "model_forward_time": 0.11558175086975098,
      "step": 44482
    },
    {
      "epoch": 0.00027149658203125,
      "step": 44482,
      "training_step_time": 0.39649271965026855
    },
    {
      "epoch": 0.000271502685546875,
      "model_forward_time": 0.1150355339050293,
      "step": 44483
    },
    {
      "epoch": 0.000271502685546875,
      "step": 44483,
      "training_step_time": 0.4301612377166748
    },
    {
      "epoch": 0.0002715087890625,
      "model_forward_time": 0.11553072929382324,
      "step": 44484
    },
    {
      "epoch": 0.0002715087890625,
      "step": 44484,
      "training_step_time": 0.42031002044677734
    },
    {
      "epoch": 0.000271514892578125,
      "model_forward_time": 0.11521577835083008,
      "step": 44485
    },
    {
      "epoch": 0.000271514892578125,
      "step": 44485,
      "training_step_time": 0.37399888038635254
    },
    {
      "epoch": 0.00027152099609375,
      "model_forward_time": 0.1157236099243164,
      "step": 44486
    },
    {
      "epoch": 0.00027152099609375,
      "step": 44486,
      "training_step_time": 0.4098801612854004
    },
    {
      "epoch": 0.000271527099609375,
      "model_forward_time": 0.11507940292358398,
      "step": 44487
    },
    {
      "epoch": 0.000271527099609375,
      "step": 44487,
      "training_step_time": 0.4790792465209961
    },
    {
      "epoch": 0.000271533203125,
      "model_forward_time": 0.11544084548950195,
      "step": 44488
    },
    {
      "epoch": 0.000271533203125,
      "step": 44488,
      "training_step_time": 0.3897740840911865
    },
    {
      "epoch": 0.000271539306640625,
      "model_forward_time": 0.11571836471557617,
      "step": 44489
    },
    {
      "epoch": 0.000271539306640625,
      "step": 44489,
      "training_step_time": 0.3919374942779541
    },
    {
      "epoch": 0.00027154541015625,
      "grad_norm": 0.10870649665594101,
      "learning_rate": 1.7183185326135543e-05,
      "loss": 0.0375,
      "step": 44490
    },
    {
      "epoch": 0.00027154541015625,
      "model_forward_time": 0.11544227600097656,
      "step": 44490
    },
    {
      "epoch": 0.00027154541015625,
      "step": 44490,
      "training_step_time": 0.3902928829193115
    },
    {
      "epoch": 0.000271551513671875,
      "model_forward_time": 0.11590242385864258,
      "step": 44491
    },
    {
      "epoch": 0.000271551513671875,
      "step": 44491,
      "training_step_time": 0.38755226135253906
    },
    {
      "epoch": 0.0002715576171875,
      "model_forward_time": 0.11568236351013184,
      "step": 44492
    },
    {
      "epoch": 0.0002715576171875,
      "step": 44492,
      "training_step_time": 0.38379454612731934
    },
    {
      "epoch": 0.000271563720703125,
      "model_forward_time": 0.1157083511352539,
      "step": 44493
    },
    {
      "epoch": 0.000271563720703125,
      "step": 44493,
      "training_step_time": 0.43334126472473145
    },
    {
      "epoch": 0.00027156982421875,
      "model_forward_time": 0.1156003475189209,
      "step": 44494
    },
    {
      "epoch": 0.00027156982421875,
      "step": 44494,
      "training_step_time": 0.386613130569458
    },
    {
      "epoch": 0.000271575927734375,
      "model_forward_time": 0.11491155624389648,
      "step": 44495
    },
    {
      "epoch": 0.000271575927734375,
      "step": 44495,
      "training_step_time": 0.4216740131378174
    },
    {
      "epoch": 0.00027158203125,
      "model_forward_time": 0.1152334213256836,
      "step": 44496
    },
    {
      "epoch": 0.00027158203125,
      "step": 44496,
      "training_step_time": 0.39730000495910645
    },
    {
      "epoch": 0.000271588134765625,
      "model_forward_time": 0.11533594131469727,
      "step": 44497
    },
    {
      "epoch": 0.000271588134765625,
      "step": 44497,
      "training_step_time": 0.39122676849365234
    },
    {
      "epoch": 0.00027159423828125,
      "model_forward_time": 0.11467242240905762,
      "step": 44498
    },
    {
      "epoch": 0.00027159423828125,
      "step": 44498,
      "training_step_time": 0.4013049602508545
    },
    {
      "epoch": 0.000271600341796875,
      "model_forward_time": 0.1148831844329834,
      "step": 44499
    },
    {
      "epoch": 0.000271600341796875,
      "step": 44499,
      "training_step_time": 0.4395632743835449
    },
    {
      "epoch": 0.0002716064453125,
      "grad_norm": 0.09584234654903412,
      "learning_rate": 1.7162398797613282e-05,
      "loss": 0.0375,
      "step": 44500
    },
    {
      "epoch": 0.0002716064453125,
      "model_forward_time": 0.1149756908416748,
      "step": 44500
    },
    {
      "epoch": 0.0002716064453125,
      "step": 44500,
      "training_step_time": 0.4594388008117676
    },
    {
      "epoch": 0.000271612548828125,
      "model_forward_time": 0.11564135551452637,
      "step": 44501
    },
    {
      "epoch": 0.000271612548828125,
      "step": 44501,
      "training_step_time": 0.40418386459350586
    },
    {
      "epoch": 0.00027161865234375,
      "model_forward_time": 0.1152338981628418,
      "step": 44502
    },
    {
      "epoch": 0.00027161865234375,
      "step": 44502,
      "training_step_time": 0.42891383171081543
    },
    {
      "epoch": 0.000271624755859375,
      "model_forward_time": 0.11506795883178711,
      "step": 44503
    },
    {
      "epoch": 0.000271624755859375,
      "step": 44503,
      "training_step_time": 0.3966240882873535
    },
    {
      "epoch": 0.000271630859375,
      "model_forward_time": 0.11562156677246094,
      "step": 44504
    },
    {
      "epoch": 0.000271630859375,
      "step": 44504,
      "training_step_time": 0.3902318477630615
    },
    {
      "epoch": 0.000271636962890625,
      "model_forward_time": 0.11546039581298828,
      "step": 44505
    },
    {
      "epoch": 0.000271636962890625,
      "step": 44505,
      "training_step_time": 0.40075111389160156
    },
    {
      "epoch": 0.00027164306640625,
      "model_forward_time": 0.11597347259521484,
      "step": 44506
    },
    {
      "epoch": 0.00027164306640625,
      "step": 44506,
      "training_step_time": 0.39760589599609375
    },
    {
      "epoch": 0.000271649169921875,
      "model_forward_time": 0.11611485481262207,
      "step": 44507
    },
    {
      "epoch": 0.000271649169921875,
      "step": 44507,
      "training_step_time": 0.3941926956176758
    },
    {
      "epoch": 0.0002716552734375,
      "model_forward_time": 0.11519360542297363,
      "step": 44508
    },
    {
      "epoch": 0.0002716552734375,
      "step": 44508,
      "training_step_time": 0.4046037197113037
    },
    {
      "epoch": 0.000271661376953125,
      "model_forward_time": 0.11538529396057129,
      "step": 44509
    },
    {
      "epoch": 0.000271661376953125,
      "step": 44509,
      "training_step_time": 0.5120317935943604
    },
    {
      "epoch": 0.00027166748046875,
      "grad_norm": 0.08678626269102097,
      "learning_rate": 1.7141622244287235e-05,
      "loss": 0.032,
      "step": 44510
    },
    {
      "epoch": 0.00027166748046875,
      "model_forward_time": 0.11481904983520508,
      "step": 44510
    },
    {
      "epoch": 0.00027166748046875,
      "step": 44510,
      "training_step_time": 0.40050721168518066
    },
    {
      "epoch": 0.000271673583984375,
      "model_forward_time": 0.11510133743286133,
      "step": 44511
    },
    {
      "epoch": 0.000271673583984375,
      "step": 44511,
      "training_step_time": 0.3856494426727295
    },
    {
      "epoch": 0.0002716796875,
      "model_forward_time": 0.11551952362060547,
      "step": 44512
    },
    {
      "epoch": 0.0002716796875,
      "step": 44512,
      "training_step_time": 0.3915562629699707
    },
    {
      "epoch": 0.000271685791015625,
      "model_forward_time": 0.11499190330505371,
      "step": 44513
    },
    {
      "epoch": 0.000271685791015625,
      "step": 44513,
      "training_step_time": 0.42548608779907227
    },
    {
      "epoch": 0.00027169189453125,
      "model_forward_time": 0.11502361297607422,
      "step": 44514
    },
    {
      "epoch": 0.00027169189453125,
      "step": 44514,
      "training_step_time": 0.4302246570587158
    },
    {
      "epoch": 0.000271697998046875,
      "model_forward_time": 0.11598086357116699,
      "step": 44515
    },
    {
      "epoch": 0.000271697998046875,
      "step": 44515,
      "training_step_time": 0.43908214569091797
    },
    {
      "epoch": 0.0002717041015625,
      "model_forward_time": 0.1156916618347168,
      "step": 44516
    },
    {
      "epoch": 0.0002717041015625,
      "step": 44516,
      "training_step_time": 0.3993709087371826
    },
    {
      "epoch": 0.000271710205078125,
      "model_forward_time": 0.11517691612243652,
      "step": 44517
    },
    {
      "epoch": 0.000271710205078125,
      "step": 44517,
      "training_step_time": 0.41564297676086426
    },
    {
      "epoch": 0.00027171630859375,
      "model_forward_time": 0.11917495727539062,
      "step": 44518
    },
    {
      "epoch": 0.00027171630859375,
      "step": 44518,
      "training_step_time": 0.38671374320983887
    },
    {
      "epoch": 0.000271722412109375,
      "model_forward_time": 0.11560821533203125,
      "step": 44519
    },
    {
      "epoch": 0.000271722412109375,
      "step": 44519,
      "training_step_time": 0.3681447505950928
    },
    {
      "epoch": 0.000271728515625,
      "grad_norm": 0.1023394837975502,
      "learning_rate": 1.712085567246878e-05,
      "loss": 0.0357,
      "step": 44520
    },
    {
      "epoch": 0.000271728515625,
      "model_forward_time": 0.1161046028137207,
      "step": 44520
    },
    {
      "epoch": 0.000271728515625,
      "step": 44520,
      "training_step_time": 0.39745259284973145
    },
    {
      "epoch": 0.000271734619140625,
      "model_forward_time": 0.11518621444702148,
      "step": 44521
    },
    {
      "epoch": 0.000271734619140625,
      "step": 44521,
      "training_step_time": 0.39484643936157227
    },
    {
      "epoch": 0.00027174072265625,
      "model_forward_time": 0.1152951717376709,
      "step": 44522
    },
    {
      "epoch": 0.00027174072265625,
      "step": 44522,
      "training_step_time": 0.5001318454742432
    },
    {
      "epoch": 0.000271746826171875,
      "model_forward_time": 0.11524176597595215,
      "step": 44523
    },
    {
      "epoch": 0.000271746826171875,
      "step": 44523,
      "training_step_time": 0.3921189308166504
    },
    {
      "epoch": 0.0002717529296875,
      "model_forward_time": 0.11557650566101074,
      "step": 44524
    },
    {
      "epoch": 0.0002717529296875,
      "step": 44524,
      "training_step_time": 0.4128987789154053
    },
    {
      "epoch": 0.000271759033203125,
      "model_forward_time": 0.11518740653991699,
      "step": 44525
    },
    {
      "epoch": 0.000271759033203125,
      "step": 44525,
      "training_step_time": 0.38446831703186035
    },
    {
      "epoch": 0.00027176513671875,
      "model_forward_time": 0.1155092716217041,
      "step": 44526
    },
    {
      "epoch": 0.00027176513671875,
      "step": 44526,
      "training_step_time": 0.39651966094970703
    },
    {
      "epoch": 0.000271771240234375,
      "model_forward_time": 0.11534261703491211,
      "step": 44527
    },
    {
      "epoch": 0.000271771240234375,
      "step": 44527,
      "training_step_time": 0.4400813579559326
    },
    {
      "epoch": 0.00027177734375,
      "model_forward_time": 0.1146550178527832,
      "step": 44528
    },
    {
      "epoch": 0.00027177734375,
      "step": 44528,
      "training_step_time": 0.4599275588989258
    },
    {
      "epoch": 0.000271783447265625,
      "model_forward_time": 0.11491632461547852,
      "step": 44529
    },
    {
      "epoch": 0.000271783447265625,
      "step": 44529,
      "training_step_time": 0.47815680503845215
    },
    {
      "epoch": 0.00027178955078125,
      "grad_norm": 0.10103927552700043,
      "learning_rate": 1.7100099088466242e-05,
      "loss": 0.0366,
      "step": 44530
    },
    {
      "epoch": 0.00027178955078125,
      "model_forward_time": 0.11531233787536621,
      "step": 44530
    },
    {
      "epoch": 0.00027178955078125,
      "step": 44530,
      "training_step_time": 0.42694807052612305
    },
    {
      "epoch": 0.000271795654296875,
      "model_forward_time": 0.11475157737731934,
      "step": 44531
    },
    {
      "epoch": 0.000271795654296875,
      "step": 44531,
      "training_step_time": 0.49623632431030273
    },
    {
      "epoch": 0.0002718017578125,
      "model_forward_time": 0.11497759819030762,
      "step": 44532
    },
    {
      "epoch": 0.0002718017578125,
      "step": 44532,
      "training_step_time": 0.3924074172973633
    },
    {
      "epoch": 0.000271807861328125,
      "model_forward_time": 0.1147165298461914,
      "step": 44533
    },
    {
      "epoch": 0.000271807861328125,
      "step": 44533,
      "training_step_time": 0.3901326656341553
    },
    {
      "epoch": 0.00027181396484375,
      "model_forward_time": 0.11447811126708984,
      "step": 44534
    },
    {
      "epoch": 0.00027181396484375,
      "step": 44534,
      "training_step_time": 0.39401721954345703
    },
    {
      "epoch": 0.000271820068359375,
      "model_forward_time": 0.11548423767089844,
      "step": 44535
    },
    {
      "epoch": 0.000271820068359375,
      "step": 44535,
      "training_step_time": 0.3940589427947998
    },
    {
      "epoch": 0.000271826171875,
      "model_forward_time": 0.11476254463195801,
      "step": 44536
    },
    {
      "epoch": 0.000271826171875,
      "step": 44536,
      "training_step_time": 0.3965630531311035
    },
    {
      "epoch": 0.000271832275390625,
      "model_forward_time": 0.1165008544921875,
      "step": 44537
    },
    {
      "epoch": 0.000271832275390625,
      "step": 44537,
      "training_step_time": 0.43065881729125977
    },
    {
      "epoch": 0.00027183837890625,
      "model_forward_time": 0.11500954627990723,
      "step": 44538
    },
    {
      "epoch": 0.00027183837890625,
      "step": 44538,
      "training_step_time": 0.50360107421875
    },
    {
      "epoch": 0.000271844482421875,
      "model_forward_time": 0.1139211654663086,
      "step": 44539
    },
    {
      "epoch": 0.000271844482421875,
      "step": 44539,
      "training_step_time": 0.3953258991241455
    },
    {
      "epoch": 0.0002718505859375,
      "grad_norm": 0.12055633217096329,
      "learning_rate": 1.7079352498584934e-05,
      "loss": 0.0385,
      "step": 44540
    },
    {
      "epoch": 0.0002718505859375,
      "model_forward_time": 0.11516332626342773,
      "step": 44540
    },
    {
      "epoch": 0.0002718505859375,
      "step": 44540,
      "training_step_time": 0.4527595043182373
    },
    {
      "epoch": 0.000271856689453125,
      "model_forward_time": 0.11548113822937012,
      "step": 44541
    },
    {
      "epoch": 0.000271856689453125,
      "step": 44541,
      "training_step_time": 0.3910248279571533
    },
    {
      "epoch": 0.00027186279296875,
      "model_forward_time": 0.11619067192077637,
      "step": 44542
    },
    {
      "epoch": 0.00027186279296875,
      "step": 44542,
      "training_step_time": 0.46516919136047363
    },
    {
      "epoch": 0.000271868896484375,
      "model_forward_time": 0.11587047576904297,
      "step": 44543
    },
    {
      "epoch": 0.000271868896484375,
      "step": 44543,
      "training_step_time": 0.43285417556762695
    },
    {
      "epoch": 0.000271875,
      "model_forward_time": 0.1159367561340332,
      "step": 44544
    },
    {
      "epoch": 0.000271875,
      "step": 44544,
      "training_step_time": 0.43808531761169434
    },
    {
      "epoch": 0.000271881103515625,
      "model_forward_time": 0.11583280563354492,
      "step": 44545
    },
    {
      "epoch": 0.000271881103515625,
      "step": 44545,
      "training_step_time": 0.4924609661102295
    },
    {
      "epoch": 0.00027188720703125,
      "model_forward_time": 0.11500167846679688,
      "step": 44546
    },
    {
      "epoch": 0.00027188720703125,
      "step": 44546,
      "training_step_time": 0.4151582717895508
    },
    {
      "epoch": 0.000271893310546875,
      "model_forward_time": 0.11616754531860352,
      "step": 44547
    },
    {
      "epoch": 0.000271893310546875,
      "step": 44547,
      "training_step_time": 0.3917984962463379
    },
    {
      "epoch": 0.0002718994140625,
      "model_forward_time": 0.11447858810424805,
      "step": 44548
    },
    {
      "epoch": 0.0002718994140625,
      "step": 44548,
      "training_step_time": 0.39693140983581543
    },
    {
      "epoch": 0.000271905517578125,
      "model_forward_time": 0.11531925201416016,
      "step": 44549
    },
    {
      "epoch": 0.000271905517578125,
      "step": 44549,
      "training_step_time": 0.39245057106018066
    },
    {
      "epoch": 0.00027191162109375,
      "grad_norm": 0.14130723476409912,
      "learning_rate": 1.70586159091271e-05,
      "loss": 0.038,
      "step": 44550
    },
    {
      "epoch": 0.00027191162109375,
      "model_forward_time": 0.11516427993774414,
      "step": 44550
    },
    {
      "epoch": 0.00027191162109375,
      "step": 44550,
      "training_step_time": 0.42993664741516113
    },
    {
      "epoch": 0.000271917724609375,
      "model_forward_time": 0.11492180824279785,
      "step": 44551
    },
    {
      "epoch": 0.000271917724609375,
      "step": 44551,
      "training_step_time": 0.3961751461029053
    },
    {
      "epoch": 0.000271923828125,
      "model_forward_time": 0.11574363708496094,
      "step": 44552
    },
    {
      "epoch": 0.000271923828125,
      "step": 44552,
      "training_step_time": 0.3988358974456787
    },
    {
      "epoch": 0.000271929931640625,
      "model_forward_time": 0.11526131629943848,
      "step": 44553
    },
    {
      "epoch": 0.000271929931640625,
      "step": 44553,
      "training_step_time": 0.3938102722167969
    },
    {
      "epoch": 0.00027193603515625,
      "model_forward_time": 0.11530351638793945,
      "step": 44554
    },
    {
      "epoch": 0.00027193603515625,
      "step": 44554,
      "training_step_time": 0.4073350429534912
    },
    {
      "epoch": 0.000271942138671875,
      "model_forward_time": 0.11492729187011719,
      "step": 44555
    },
    {
      "epoch": 0.000271942138671875,
      "step": 44555,
      "training_step_time": 0.43884968757629395
    },
    {
      "epoch": 0.0002719482421875,
      "model_forward_time": 0.11510872840881348,
      "step": 44556
    },
    {
      "epoch": 0.0002719482421875,
      "step": 44556,
      "training_step_time": 0.40874266624450684
    },
    {
      "epoch": 0.000271954345703125,
      "model_forward_time": 0.11503934860229492,
      "step": 44557
    },
    {
      "epoch": 0.000271954345703125,
      "step": 44557,
      "training_step_time": 0.418656587600708
    },
    {
      "epoch": 0.00027196044921875,
      "model_forward_time": 0.11510610580444336,
      "step": 44558
    },
    {
      "epoch": 0.00027196044921875,
      "step": 44558,
      "training_step_time": 0.5512502193450928
    },
    {
      "epoch": 0.000271966552734375,
      "model_forward_time": 0.11484384536743164,
      "step": 44559
    },
    {
      "epoch": 0.000271966552734375,
      "step": 44559,
      "training_step_time": 0.43763136863708496
    },
    {
      "epoch": 0.00027197265625,
      "grad_norm": 0.10133227705955505,
      "learning_rate": 1.703788932639202e-05,
      "loss": 0.0384,
      "step": 44560
    },
    {
      "epoch": 0.00027197265625,
      "model_forward_time": 0.11529803276062012,
      "step": 44560
    },
    {
      "epoch": 0.00027197265625,
      "step": 44560,
      "training_step_time": 0.4023702144622803
    },
    {
      "epoch": 0.000271978759765625,
      "model_forward_time": 0.11546993255615234,
      "step": 44561
    },
    {
      "epoch": 0.000271978759765625,
      "step": 44561,
      "training_step_time": 0.47917914390563965
    },
    {
      "epoch": 0.00027198486328125,
      "model_forward_time": 0.11440658569335938,
      "step": 44562
    },
    {
      "epoch": 0.00027198486328125,
      "step": 44562,
      "training_step_time": 0.3975701332092285
    },
    {
      "epoch": 0.000271990966796875,
      "model_forward_time": 0.11472082138061523,
      "step": 44563
    },
    {
      "epoch": 0.000271990966796875,
      "step": 44563,
      "training_step_time": 0.4027833938598633
    },
    {
      "epoch": 0.0002719970703125,
      "model_forward_time": 0.11514711380004883,
      "step": 44564
    },
    {
      "epoch": 0.0002719970703125,
      "step": 44564,
      "training_step_time": 0.4120340347290039
    },
    {
      "epoch": 0.000272003173828125,
      "model_forward_time": 0.11559653282165527,
      "step": 44565
    },
    {
      "epoch": 0.000272003173828125,
      "step": 44565,
      "training_step_time": 0.4543795585632324
    },
    {
      "epoch": 0.00027200927734375,
      "model_forward_time": 0.11530089378356934,
      "step": 44566
    },
    {
      "epoch": 0.00027200927734375,
      "step": 44566,
      "training_step_time": 0.4525747299194336
    },
    {
      "epoch": 0.000272015380859375,
      "model_forward_time": 0.11509442329406738,
      "step": 44567
    },
    {
      "epoch": 0.000272015380859375,
      "step": 44567,
      "training_step_time": 0.3960421085357666
    },
    {
      "epoch": 0.000272021484375,
      "model_forward_time": 0.11498451232910156,
      "step": 44568
    },
    {
      "epoch": 0.000272021484375,
      "step": 44568,
      "training_step_time": 0.40938329696655273
    },
    {
      "epoch": 0.000272027587890625,
      "model_forward_time": 0.11486029624938965,
      "step": 44569
    },
    {
      "epoch": 0.000272027587890625,
      "step": 44569,
      "training_step_time": 0.39835643768310547
    },
    {
      "epoch": 0.00027203369140625,
      "grad_norm": 0.08533589541912079,
      "learning_rate": 1.7017172756675813e-05,
      "loss": 0.0331,
      "step": 44570
    },
    {
      "epoch": 0.00027203369140625,
      "model_forward_time": 0.11514544486999512,
      "step": 44570
    },
    {
      "epoch": 0.00027203369140625,
      "step": 44570,
      "training_step_time": 0.4207589626312256
    },
    {
      "epoch": 0.000272039794921875,
      "model_forward_time": 0.1146087646484375,
      "step": 44571
    },
    {
      "epoch": 0.000272039794921875,
      "step": 44571,
      "training_step_time": 0.44310760498046875
    },
    {
      "epoch": 0.0002720458984375,
      "model_forward_time": 0.11476898193359375,
      "step": 44572
    },
    {
      "epoch": 0.0002720458984375,
      "step": 44572,
      "training_step_time": 0.39604830741882324
    },
    {
      "epoch": 0.000272052001953125,
      "model_forward_time": 0.11463284492492676,
      "step": 44573
    },
    {
      "epoch": 0.000272052001953125,
      "step": 44573,
      "training_step_time": 0.36435723304748535
    },
    {
      "epoch": 0.00027205810546875,
      "model_forward_time": 0.11542630195617676,
      "step": 44574
    },
    {
      "epoch": 0.00027205810546875,
      "step": 44574,
      "training_step_time": 0.4498276710510254
    },
    {
      "epoch": 0.000272064208984375,
      "model_forward_time": 0.1153566837310791,
      "step": 44575
    },
    {
      "epoch": 0.000272064208984375,
      "step": 44575,
      "training_step_time": 0.4182932376861572
    },
    {
      "epoch": 0.0002720703125,
      "model_forward_time": 0.11430835723876953,
      "step": 44576
    },
    {
      "epoch": 0.0002720703125,
      "step": 44576,
      "training_step_time": 0.5020661354064941
    },
    {
      "epoch": 0.000272076416015625,
      "model_forward_time": 0.11479949951171875,
      "step": 44577
    },
    {
      "epoch": 0.000272076416015625,
      "step": 44577,
      "training_step_time": 0.39269590377807617
    },
    {
      "epoch": 0.00027208251953125,
      "model_forward_time": 0.11473631858825684,
      "step": 44578
    },
    {
      "epoch": 0.00027208251953125,
      "step": 44578,
      "training_step_time": 0.3878173828125
    },
    {
      "epoch": 0.000272088623046875,
      "model_forward_time": 0.11507868766784668,
      "step": 44579
    },
    {
      "epoch": 0.000272088623046875,
      "step": 44579,
      "training_step_time": 0.41761183738708496
    },
    {
      "epoch": 0.0002720947265625,
      "grad_norm": 0.08760229498147964,
      "learning_rate": 1.699646620627168e-05,
      "loss": 0.0388,
      "step": 44580
    },
    {
      "epoch": 0.0002720947265625,
      "model_forward_time": 0.11680793762207031,
      "step": 44580
    },
    {
      "epoch": 0.0002720947265625,
      "step": 44580,
      "training_step_time": 0.44626331329345703
    },
    {
      "epoch": 0.000272100830078125,
      "model_forward_time": 0.11542892456054688,
      "step": 44581
    },
    {
      "epoch": 0.000272100830078125,
      "step": 44581,
      "training_step_time": 0.3989415168762207
    },
    {
      "epoch": 0.00027210693359375,
      "model_forward_time": 0.11514735221862793,
      "step": 44582
    },
    {
      "epoch": 0.00027210693359375,
      "step": 44582,
      "training_step_time": 0.6728515625
    },
    {
      "epoch": 0.000272113037109375,
      "model_forward_time": 0.1149587631225586,
      "step": 44583
    },
    {
      "epoch": 0.000272113037109375,
      "step": 44583,
      "training_step_time": 0.3838999271392822
    },
    {
      "epoch": 0.000272119140625,
      "model_forward_time": 0.11467385292053223,
      "step": 44584
    },
    {
      "epoch": 0.000272119140625,
      "step": 44584,
      "training_step_time": 0.4707522392272949
    },
    {
      "epoch": 0.000272125244140625,
      "model_forward_time": 0.11512303352355957,
      "step": 44585
    },
    {
      "epoch": 0.000272125244140625,
      "step": 44585,
      "training_step_time": 0.42566418647766113
    },
    {
      "epoch": 0.00027213134765625,
      "model_forward_time": 0.11502480506896973,
      "step": 44586
    },
    {
      "epoch": 0.00027213134765625,
      "step": 44586,
      "training_step_time": 0.4106628894805908
    },
    {
      "epoch": 0.000272137451171875,
      "model_forward_time": 0.11438941955566406,
      "step": 44587
    },
    {
      "epoch": 0.000272137451171875,
      "step": 44587,
      "training_step_time": 0.36446070671081543
    },
    {
      "epoch": 0.0002721435546875,
      "model_forward_time": 0.11477780342102051,
      "step": 44588
    },
    {
      "epoch": 0.0002721435546875,
      "step": 44588,
      "training_step_time": 0.48735475540161133
    },
    {
      "epoch": 0.000272149658203125,
      "model_forward_time": 0.11483526229858398,
      "step": 44589
    },
    {
      "epoch": 0.000272149658203125,
      "step": 44589,
      "training_step_time": 0.38866114616394043
    },
    {
      "epoch": 0.00027215576171875,
      "grad_norm": 0.09721938520669937,
      "learning_rate": 1.6975769681469705e-05,
      "loss": 0.0441,
      "step": 44590
    },
    {
      "epoch": 0.00027215576171875,
      "model_forward_time": 0.11611080169677734,
      "step": 44590
    },
    {
      "epoch": 0.00027215576171875,
      "step": 44590,
      "training_step_time": 0.38480710983276367
    },
    {
      "epoch": 0.000272161865234375,
      "model_forward_time": 0.11519503593444824,
      "step": 44591
    },
    {
      "epoch": 0.000272161865234375,
      "step": 44591,
      "training_step_time": 0.3865501880645752
    },
    {
      "epoch": 0.00027216796875,
      "model_forward_time": 0.11544060707092285,
      "step": 44592
    },
    {
      "epoch": 0.00027216796875,
      "step": 44592,
      "training_step_time": 0.4448587894439697
    },
    {
      "epoch": 0.000272174072265625,
      "model_forward_time": 0.11469459533691406,
      "step": 44593
    },
    {
      "epoch": 0.000272174072265625,
      "step": 44593,
      "training_step_time": 0.394240140914917
    },
    {
      "epoch": 0.00027218017578125,
      "model_forward_time": 0.11515641212463379,
      "step": 44594
    },
    {
      "epoch": 0.00027218017578125,
      "step": 44594,
      "training_step_time": 0.5670132637023926
    },
    {
      "epoch": 0.000272186279296875,
      "model_forward_time": 0.11537027359008789,
      "step": 44595
    },
    {
      "epoch": 0.000272186279296875,
      "step": 44595,
      "training_step_time": 0.4183692932128906
    },
    {
      "epoch": 0.0002721923828125,
      "model_forward_time": 0.1147613525390625,
      "step": 44596
    },
    {
      "epoch": 0.0002721923828125,
      "step": 44596,
      "training_step_time": 0.39972448348999023
    },
    {
      "epoch": 0.000272198486328125,
      "model_forward_time": 0.11490988731384277,
      "step": 44597
    },
    {
      "epoch": 0.000272198486328125,
      "step": 44597,
      "training_step_time": 0.389401912689209
    },
    {
      "epoch": 0.00027220458984375,
      "model_forward_time": 0.11506032943725586,
      "step": 44598
    },
    {
      "epoch": 0.00027220458984375,
      "step": 44598,
      "training_step_time": 0.43253374099731445
    },
    {
      "epoch": 0.000272210693359375,
      "model_forward_time": 0.11455106735229492,
      "step": 44599
    },
    {
      "epoch": 0.000272210693359375,
      "step": 44599,
      "training_step_time": 0.43548130989074707
    },
    {
      "epoch": 0.000272216796875,
      "grad_norm": 0.10251446813344955,
      "learning_rate": 1.6955083188556947e-05,
      "loss": 0.0321,
      "step": 44600
    },
    {
      "epoch": 0.000272216796875,
      "model_forward_time": 0.1158442497253418,
      "step": 44600
    },
    {
      "epoch": 0.000272216796875,
      "step": 44600,
      "training_step_time": 0.4782593250274658
    },
    {
      "epoch": 0.000272222900390625,
      "model_forward_time": 0.11462688446044922,
      "step": 44601
    },
    {
      "epoch": 0.000272222900390625,
      "step": 44601,
      "training_step_time": 0.36565256118774414
    },
    {
      "epoch": 0.00027222900390625,
      "model_forward_time": 0.11478614807128906,
      "step": 44602
    },
    {
      "epoch": 0.00027222900390625,
      "step": 44602,
      "training_step_time": 0.45955419540405273
    },
    {
      "epoch": 0.000272235107421875,
      "model_forward_time": 0.1142888069152832,
      "step": 44603
    },
    {
      "epoch": 0.000272235107421875,
      "step": 44603,
      "training_step_time": 0.43482446670532227
    },
    {
      "epoch": 0.0002722412109375,
      "model_forward_time": 0.11451196670532227,
      "step": 44604
    },
    {
      "epoch": 0.0002722412109375,
      "step": 44604,
      "training_step_time": 0.4002828598022461
    },
    {
      "epoch": 0.000272247314453125,
      "model_forward_time": 0.11447787284851074,
      "step": 44605
    },
    {
      "epoch": 0.000272247314453125,
      "step": 44605,
      "training_step_time": 0.39130234718322754
    },
    {
      "epoch": 0.00027225341796875,
      "model_forward_time": 0.11507034301757812,
      "step": 44606
    },
    {
      "epoch": 0.00027225341796875,
      "step": 44606,
      "training_step_time": 0.3993954658508301
    },
    {
      "epoch": 0.000272259521484375,
      "model_forward_time": 0.1154487133026123,
      "step": 44607
    },
    {
      "epoch": 0.000272259521484375,
      "step": 44607,
      "training_step_time": 0.4046199321746826
    },
    {
      "epoch": 0.000272265625,
      "model_forward_time": 0.1145932674407959,
      "step": 44608
    },
    {
      "epoch": 0.000272265625,
      "step": 44608,
      "training_step_time": 0.4265618324279785
    },
    {
      "epoch": 0.000272271728515625,
      "model_forward_time": 0.11480951309204102,
      "step": 44609
    },
    {
      "epoch": 0.000272271728515625,
      "step": 44609,
      "training_step_time": 0.525285005569458
    },
    {
      "epoch": 0.00027227783203125,
      "grad_norm": 0.12091292440891266,
      "learning_rate": 1.6934406733817414e-05,
      "loss": 0.0348,
      "step": 44610
    },
    {
      "epoch": 0.00027227783203125,
      "model_forward_time": 0.11515951156616211,
      "step": 44610
    },
    {
      "epoch": 0.00027227783203125,
      "step": 44610,
      "training_step_time": 0.4026355743408203
    },
    {
      "epoch": 0.000272283935546875,
      "model_forward_time": 0.11519408226013184,
      "step": 44611
    },
    {
      "epoch": 0.000272283935546875,
      "step": 44611,
      "training_step_time": 0.3903007507324219
    },
    {
      "epoch": 0.0002722900390625,
      "model_forward_time": 0.11519408226013184,
      "step": 44612
    },
    {
      "epoch": 0.0002722900390625,
      "step": 44612,
      "training_step_time": 0.4039185047149658
    },
    {
      "epoch": 0.000272296142578125,
      "model_forward_time": 0.11482954025268555,
      "step": 44613
    },
    {
      "epoch": 0.000272296142578125,
      "step": 44613,
      "training_step_time": 0.46760010719299316
    },
    {
      "epoch": 0.00027230224609375,
      "model_forward_time": 0.11458921432495117,
      "step": 44614
    },
    {
      "epoch": 0.00027230224609375,
      "step": 44614,
      "training_step_time": 0.4160153865814209
    },
    {
      "epoch": 0.000272308349609375,
      "model_forward_time": 0.11522746086120605,
      "step": 44615
    },
    {
      "epoch": 0.000272308349609375,
      "step": 44615,
      "training_step_time": 0.44899797439575195
    },
    {
      "epoch": 0.000272314453125,
      "model_forward_time": 0.1154024600982666,
      "step": 44616
    },
    {
      "epoch": 0.000272314453125,
      "step": 44616,
      "training_step_time": 0.44124794006347656
    },
    {
      "epoch": 0.000272320556640625,
      "model_forward_time": 0.1152040958404541,
      "step": 44617
    },
    {
      "epoch": 0.000272320556640625,
      "step": 44617,
      "training_step_time": 0.4087650775909424
    },
    {
      "epoch": 0.00027232666015625,
      "model_forward_time": 0.11514520645141602,
      "step": 44618
    },
    {
      "epoch": 0.00027232666015625,
      "step": 44618,
      "training_step_time": 0.45330238342285156
    },
    {
      "epoch": 0.000272332763671875,
      "model_forward_time": 0.11543774604797363,
      "step": 44619
    },
    {
      "epoch": 0.000272332763671875,
      "step": 44619,
      "training_step_time": 0.391857385635376
    },
    {
      "epoch": 0.0002723388671875,
      "grad_norm": 0.12488885968923569,
      "learning_rate": 1.691374032353205e-05,
      "loss": 0.0362,
      "step": 44620
    },
    {
      "epoch": 0.0002723388671875,
      "model_forward_time": 0.11568498611450195,
      "step": 44620
    },
    {
      "epoch": 0.0002723388671875,
      "step": 44620,
      "training_step_time": 0.39475464820861816
    },
    {
      "epoch": 0.000272344970703125,
      "model_forward_time": 0.11541962623596191,
      "step": 44621
    },
    {
      "epoch": 0.000272344970703125,
      "step": 44621,
      "training_step_time": 0.41020917892456055
    },
    {
      "epoch": 0.00027235107421875,
      "model_forward_time": 0.11402773857116699,
      "step": 44622
    },
    {
      "epoch": 0.00027235107421875,
      "step": 44622,
      "training_step_time": 0.4114832878112793
    },
    {
      "epoch": 0.000272357177734375,
      "model_forward_time": 0.1162419319152832,
      "step": 44623
    },
    {
      "epoch": 0.000272357177734375,
      "step": 44623,
      "training_step_time": 0.41831183433532715
    },
    {
      "epoch": 0.00027236328125,
      "model_forward_time": 0.11561393737792969,
      "step": 44624
    },
    {
      "epoch": 0.00027236328125,
      "step": 44624,
      "training_step_time": 0.39105892181396484
    },
    {
      "epoch": 0.000272369384765625,
      "model_forward_time": 0.11658287048339844,
      "step": 44625
    },
    {
      "epoch": 0.000272369384765625,
      "step": 44625,
      "training_step_time": 0.39421939849853516
    },
    {
      "epoch": 0.00027237548828125,
      "model_forward_time": 0.11542129516601562,
      "step": 44626
    },
    {
      "epoch": 0.00027237548828125,
      "step": 44626,
      "training_step_time": 0.39249491691589355
    },
    {
      "epoch": 0.000272381591796875,
      "model_forward_time": 0.11541008949279785,
      "step": 44627
    },
    {
      "epoch": 0.000272381591796875,
      "step": 44627,
      "training_step_time": 0.40454792976379395
    },
    {
      "epoch": 0.0002723876953125,
      "model_forward_time": 0.11470866203308105,
      "step": 44628
    },
    {
      "epoch": 0.0002723876953125,
      "step": 44628,
      "training_step_time": 0.4175558090209961
    },
    {
      "epoch": 0.000272393798828125,
      "model_forward_time": 0.11507463455200195,
      "step": 44629
    },
    {
      "epoch": 0.000272393798828125,
      "step": 44629,
      "training_step_time": 0.458207368850708
    },
    {
      "epoch": 0.00027239990234375,
      "grad_norm": 0.0913214460015297,
      "learning_rate": 1.689308396397882e-05,
      "loss": 0.0344,
      "step": 44630
    },
    {
      "epoch": 0.00027239990234375,
      "model_forward_time": 0.11589241027832031,
      "step": 44630
    },
    {
      "epoch": 0.00027239990234375,
      "step": 44630,
      "training_step_time": 0.45844173431396484
    },
    {
      "epoch": 0.000272406005859375,
      "model_forward_time": 0.11532306671142578,
      "step": 44631
    },
    {
      "epoch": 0.000272406005859375,
      "step": 44631,
      "training_step_time": 0.4291229248046875
    },
    {
      "epoch": 0.000272412109375,
      "model_forward_time": 0.11553382873535156,
      "step": 44632
    },
    {
      "epoch": 0.000272412109375,
      "step": 44632,
      "training_step_time": 0.391249418258667
    },
    {
      "epoch": 0.000272418212890625,
      "model_forward_time": 0.1149740219116211,
      "step": 44633
    },
    {
      "epoch": 0.000272418212890625,
      "step": 44633,
      "training_step_time": 0.40850400924682617
    },
    {
      "epoch": 0.00027242431640625,
      "model_forward_time": 0.1155693531036377,
      "step": 44634
    },
    {
      "epoch": 0.00027242431640625,
      "step": 44634,
      "training_step_time": 0.39629554748535156
    },
    {
      "epoch": 0.000272430419921875,
      "model_forward_time": 0.11519241333007812,
      "step": 44635
    },
    {
      "epoch": 0.000272430419921875,
      "step": 44635,
      "training_step_time": 0.4115886688232422
    },
    {
      "epoch": 0.0002724365234375,
      "model_forward_time": 0.11530017852783203,
      "step": 44636
    },
    {
      "epoch": 0.0002724365234375,
      "step": 44636,
      "training_step_time": 0.4201054573059082
    },
    {
      "epoch": 0.000272442626953125,
      "model_forward_time": 0.11481475830078125,
      "step": 44637
    },
    {
      "epoch": 0.000272442626953125,
      "step": 44637,
      "training_step_time": 0.4745149612426758
    },
    {
      "epoch": 0.00027244873046875,
      "model_forward_time": 0.1160440444946289,
      "step": 44638
    },
    {
      "epoch": 0.00027244873046875,
      "step": 44638,
      "training_step_time": 0.4194915294647217
    },
    {
      "epoch": 0.000272454833984375,
      "model_forward_time": 0.11493897438049316,
      "step": 44639
    },
    {
      "epoch": 0.000272454833984375,
      "step": 44639,
      "training_step_time": 0.39983201026916504
    },
    {
      "epoch": 0.0002724609375,
      "grad_norm": 0.10782075673341751,
      "learning_rate": 1.6872437661432517e-05,
      "loss": 0.037,
      "step": 44640
    },
    {
      "epoch": 0.0002724609375,
      "model_forward_time": 0.11545014381408691,
      "step": 44640
    },
    {
      "epoch": 0.0002724609375,
      "step": 44640,
      "training_step_time": 0.39957308769226074
    },
    {
      "epoch": 0.000272467041015625,
      "model_forward_time": 0.11580634117126465,
      "step": 44641
    },
    {
      "epoch": 0.000272467041015625,
      "step": 44641,
      "training_step_time": 0.4064643383026123
    },
    {
      "epoch": 0.00027247314453125,
      "model_forward_time": 0.1150212287902832,
      "step": 44642
    },
    {
      "epoch": 0.00027247314453125,
      "step": 44642,
      "training_step_time": 0.49716687202453613
    },
    {
      "epoch": 0.000272479248046875,
      "model_forward_time": 0.11499810218811035,
      "step": 44643
    },
    {
      "epoch": 0.000272479248046875,
      "step": 44643,
      "training_step_time": 0.44100213050842285
    },
    {
      "epoch": 0.0002724853515625,
      "model_forward_time": 0.11477947235107422,
      "step": 44644
    },
    {
      "epoch": 0.0002724853515625,
      "step": 44644,
      "training_step_time": 0.511183500289917
    },
    {
      "epoch": 0.000272491455078125,
      "model_forward_time": 0.11461091041564941,
      "step": 44645
    },
    {
      "epoch": 0.000272491455078125,
      "step": 44645,
      "training_step_time": 0.36519598960876465
    },
    {
      "epoch": 0.00027249755859375,
      "model_forward_time": 0.1150965690612793,
      "step": 44646
    },
    {
      "epoch": 0.00027249755859375,
      "step": 44646,
      "training_step_time": 0.45781993865966797
    },
    {
      "epoch": 0.000272503662109375,
      "model_forward_time": 0.11453366279602051,
      "step": 44647
    },
    {
      "epoch": 0.000272503662109375,
      "step": 44647,
      "training_step_time": 0.45455217361450195
    },
    {
      "epoch": 0.000272509765625,
      "model_forward_time": 0.11486649513244629,
      "step": 44648
    },
    {
      "epoch": 0.000272509765625,
      "step": 44648,
      "training_step_time": 0.3964848518371582
    },
    {
      "epoch": 0.000272515869140625,
      "model_forward_time": 0.11510157585144043,
      "step": 44649
    },
    {
      "epoch": 0.000272515869140625,
      "step": 44649,
      "training_step_time": 0.45247864723205566
    },
    {
      "epoch": 0.00027252197265625,
      "grad_norm": 0.14299072325229645,
      "learning_rate": 1.685180142216498e-05,
      "loss": 0.0424,
      "step": 44650
    },
    {
      "epoch": 0.00027252197265625,
      "model_forward_time": 0.11515164375305176,
      "step": 44650
    },
    {
      "epoch": 0.00027252197265625,
      "step": 44650,
      "training_step_time": 0.47513747215270996
    },
    {
      "epoch": 0.000272528076171875,
      "model_forward_time": 0.11524438858032227,
      "step": 44651
    },
    {
      "epoch": 0.000272528076171875,
      "step": 44651,
      "training_step_time": 0.5081877708435059
    },
    {
      "epoch": 0.0002725341796875,
      "model_forward_time": 0.11527705192565918,
      "step": 44652
    },
    {
      "epoch": 0.0002725341796875,
      "step": 44652,
      "training_step_time": 0.40817832946777344
    },
    {
      "epoch": 0.000272540283203125,
      "model_forward_time": 0.11449193954467773,
      "step": 44653
    },
    {
      "epoch": 0.000272540283203125,
      "step": 44653,
      "training_step_time": 0.39386630058288574
    },
    {
      "epoch": 0.00027254638671875,
      "model_forward_time": 0.1151895523071289,
      "step": 44654
    },
    {
      "epoch": 0.00027254638671875,
      "step": 44654,
      "training_step_time": 0.39601707458496094
    },
    {
      "epoch": 0.000272552490234375,
      "model_forward_time": 0.11517167091369629,
      "step": 44655
    },
    {
      "epoch": 0.000272552490234375,
      "step": 44655,
      "training_step_time": 0.39490842819213867
    },
    {
      "epoch": 0.00027255859375,
      "model_forward_time": 0.11513161659240723,
      "step": 44656
    },
    {
      "epoch": 0.00027255859375,
      "step": 44656,
      "training_step_time": 0.4219958782196045
    },
    {
      "epoch": 0.000272564697265625,
      "model_forward_time": 0.11420798301696777,
      "step": 44657
    },
    {
      "epoch": 0.000272564697265625,
      "step": 44657,
      "training_step_time": 0.4141674041748047
    },
    {
      "epoch": 0.00027257080078125,
      "model_forward_time": 0.11588096618652344,
      "step": 44658
    },
    {
      "epoch": 0.00027257080078125,
      "step": 44658,
      "training_step_time": 0.46350717544555664
    },
    {
      "epoch": 0.000272576904296875,
      "model_forward_time": 0.11476659774780273,
      "step": 44659
    },
    {
      "epoch": 0.000272576904296875,
      "step": 44659,
      "training_step_time": 0.3671865463256836
    },
    {
      "epoch": 0.0002725830078125,
      "grad_norm": 0.10966785252094269,
      "learning_rate": 1.6831175252444943e-05,
      "loss": 0.0361,
      "step": 44660
    },
    {
      "epoch": 0.0002725830078125,
      "model_forward_time": 0.11525845527648926,
      "step": 44660
    },
    {
      "epoch": 0.0002725830078125,
      "step": 44660,
      "training_step_time": 0.46120285987854004
    },
    {
      "epoch": 0.000272589111328125,
      "model_forward_time": 0.1161191463470459,
      "step": 44661
    },
    {
      "epoch": 0.000272589111328125,
      "step": 44661,
      "training_step_time": 0.3813188076019287
    },
    {
      "epoch": 0.00027259521484375,
      "model_forward_time": 0.11479854583740234,
      "step": 44662
    },
    {
      "epoch": 0.00027259521484375,
      "step": 44662,
      "training_step_time": 0.3894479274749756
    },
    {
      "epoch": 0.000272601318359375,
      "model_forward_time": 0.11514568328857422,
      "step": 44663
    },
    {
      "epoch": 0.000272601318359375,
      "step": 44663,
      "training_step_time": 0.42934083938598633
    },
    {
      "epoch": 0.000272607421875,
      "model_forward_time": 0.11585092544555664,
      "step": 44664
    },
    {
      "epoch": 0.000272607421875,
      "step": 44664,
      "training_step_time": 0.43564772605895996
    },
    {
      "epoch": 0.000272613525390625,
      "model_forward_time": 0.11510825157165527,
      "step": 44665
    },
    {
      "epoch": 0.000272613525390625,
      "step": 44665,
      "training_step_time": 0.4339120388031006
    },
    {
      "epoch": 0.00027261962890625,
      "model_forward_time": 0.11496543884277344,
      "step": 44666
    },
    {
      "epoch": 0.00027261962890625,
      "step": 44666,
      "training_step_time": 0.3897271156311035
    },
    {
      "epoch": 0.000272625732421875,
      "model_forward_time": 0.11493825912475586,
      "step": 44667
    },
    {
      "epoch": 0.000272625732421875,
      "step": 44667,
      "training_step_time": 0.39637136459350586
    },
    {
      "epoch": 0.0002726318359375,
      "model_forward_time": 0.11450338363647461,
      "step": 44668
    },
    {
      "epoch": 0.0002726318359375,
      "step": 44668,
      "training_step_time": 0.3935511112213135
    },
    {
      "epoch": 0.000272637939453125,
      "model_forward_time": 0.11675858497619629,
      "step": 44669
    },
    {
      "epoch": 0.000272637939453125,
      "step": 44669,
      "training_step_time": 0.4021313190460205
    },
    {
      "epoch": 0.00027264404296875,
      "grad_norm": 0.09213912487030029,
      "learning_rate": 1.6810559158538092e-05,
      "loss": 0.0355,
      "step": 44670
    },
    {
      "epoch": 0.00027264404296875,
      "model_forward_time": 0.11484694480895996,
      "step": 44670
    },
    {
      "epoch": 0.00027264404296875,
      "step": 44670,
      "training_step_time": 0.39803242683410645
    },
    {
      "epoch": 0.000272650146484375,
      "model_forward_time": 0.1145784854888916,
      "step": 44671
    },
    {
      "epoch": 0.000272650146484375,
      "step": 44671,
      "training_step_time": 0.40849900245666504
    },
    {
      "epoch": 0.00027265625,
      "model_forward_time": 0.11496376991271973,
      "step": 44672
    },
    {
      "epoch": 0.00027265625,
      "step": 44672,
      "training_step_time": 0.4046292304992676
    },
    {
      "epoch": 0.000272662353515625,
      "model_forward_time": 0.11537766456604004,
      "step": 44673
    },
    {
      "epoch": 0.000272662353515625,
      "step": 44673,
      "training_step_time": 0.47556591033935547
    },
    {
      "epoch": 0.00027266845703125,
      "model_forward_time": 0.1157529354095459,
      "step": 44674
    },
    {
      "epoch": 0.00027266845703125,
      "step": 44674,
      "training_step_time": 0.3730199337005615
    },
    {
      "epoch": 0.000272674560546875,
      "model_forward_time": 0.11489105224609375,
      "step": 44675
    },
    {
      "epoch": 0.000272674560546875,
      "step": 44675,
      "training_step_time": 0.4466252326965332
    },
    {
      "epoch": 0.0002726806640625,
      "model_forward_time": 0.11484837532043457,
      "step": 44676
    },
    {
      "epoch": 0.0002726806640625,
      "step": 44676,
      "training_step_time": 0.4534778594970703
    },
    {
      "epoch": 0.000272686767578125,
      "model_forward_time": 0.11499786376953125,
      "step": 44677
    },
    {
      "epoch": 0.000272686767578125,
      "step": 44677,
      "training_step_time": 0.455751895904541
    },
    {
      "epoch": 0.00027269287109375,
      "model_forward_time": 0.11506319046020508,
      "step": 44678
    },
    {
      "epoch": 0.00027269287109375,
      "step": 44678,
      "training_step_time": 0.39928460121154785
    },
    {
      "epoch": 0.000272698974609375,
      "model_forward_time": 0.11541080474853516,
      "step": 44679
    },
    {
      "epoch": 0.000272698974609375,
      "step": 44679,
      "training_step_time": 0.40402674674987793
    },
    {
      "epoch": 0.000272705078125,
      "grad_norm": 0.08928638696670532,
      "learning_rate": 1.6789953146707053e-05,
      "loss": 0.0369,
      "step": 44680
    },
    {
      "epoch": 0.000272705078125,
      "model_forward_time": 0.11506509780883789,
      "step": 44680
    },
    {
      "epoch": 0.000272705078125,
      "step": 44680,
      "training_step_time": 0.44773077964782715
    },
    {
      "epoch": 0.000272711181640625,
      "model_forward_time": 0.11570501327514648,
      "step": 44681
    },
    {
      "epoch": 0.000272711181640625,
      "step": 44681,
      "training_step_time": 0.39716458320617676
    },
    {
      "epoch": 0.00027271728515625,
      "model_forward_time": 0.11507678031921387,
      "step": 44682
    },
    {
      "epoch": 0.00027271728515625,
      "step": 44682,
      "training_step_time": 0.39507532119750977
    },
    {
      "epoch": 0.000272723388671875,
      "model_forward_time": 0.11705541610717773,
      "step": 44683
    },
    {
      "epoch": 0.000272723388671875,
      "step": 44683,
      "training_step_time": 0.3982052803039551
    },
    {
      "epoch": 0.0002727294921875,
      "model_forward_time": 0.11446428298950195,
      "step": 44684
    },
    {
      "epoch": 0.0002727294921875,
      "step": 44684,
      "training_step_time": 0.3925356864929199
    },
    {
      "epoch": 0.000272735595703125,
      "model_forward_time": 0.11653542518615723,
      "step": 44685
    },
    {
      "epoch": 0.000272735595703125,
      "step": 44685,
      "training_step_time": 0.3990514278411865
    },
    {
      "epoch": 0.00027274169921875,
      "model_forward_time": 0.11466121673583984,
      "step": 44686
    },
    {
      "epoch": 0.00027274169921875,
      "step": 44686,
      "training_step_time": 0.4023714065551758
    },
    {
      "epoch": 0.000272747802734375,
      "model_forward_time": 0.11535286903381348,
      "step": 44687
    },
    {
      "epoch": 0.000272747802734375,
      "step": 44687,
      "training_step_time": 0.41164612770080566
    },
    {
      "epoch": 0.00027275390625,
      "model_forward_time": 0.1152040958404541,
      "step": 44688
    },
    {
      "epoch": 0.00027275390625,
      "step": 44688,
      "training_step_time": 0.4992527961730957
    },
    {
      "epoch": 0.000272760009765625,
      "model_forward_time": 0.11494779586791992,
      "step": 44689
    },
    {
      "epoch": 0.000272760009765625,
      "step": 44689,
      "training_step_time": 0.46321654319763184
    },
    {
      "epoch": 0.00027276611328125,
      "grad_norm": 0.12432211637496948,
      "learning_rate": 1.676935722321139e-05,
      "loss": 0.0353,
      "step": 44690
    },
    {
      "epoch": 0.00027276611328125,
      "model_forward_time": 0.11611628532409668,
      "step": 44690
    },
    {
      "epoch": 0.00027276611328125,
      "step": 44690,
      "training_step_time": 0.4010598659515381
    },
    {
      "epoch": 0.000272772216796875,
      "model_forward_time": 0.11460757255554199,
      "step": 44691
    },
    {
      "epoch": 0.000272772216796875,
      "step": 44691,
      "training_step_time": 0.4710514545440674
    },
    {
      "epoch": 0.0002727783203125,
      "model_forward_time": 0.11534738540649414,
      "step": 44692
    },
    {
      "epoch": 0.0002727783203125,
      "step": 44692,
      "training_step_time": 0.39562368392944336
    },
    {
      "epoch": 0.000272784423828125,
      "model_forward_time": 0.11486625671386719,
      "step": 44693
    },
    {
      "epoch": 0.000272784423828125,
      "step": 44693,
      "training_step_time": 0.44563722610473633
    },
    {
      "epoch": 0.00027279052734375,
      "model_forward_time": 0.11499857902526855,
      "step": 44694
    },
    {
      "epoch": 0.00027279052734375,
      "step": 44694,
      "training_step_time": 0.4548180103302002
    },
    {
      "epoch": 0.000272796630859375,
      "model_forward_time": 0.1154325008392334,
      "step": 44695
    },
    {
      "epoch": 0.000272796630859375,
      "step": 44695,
      "training_step_time": 0.41640782356262207
    },
    {
      "epoch": 0.000272802734375,
      "model_forward_time": 0.11484050750732422,
      "step": 44696
    },
    {
      "epoch": 0.000272802734375,
      "step": 44696,
      "training_step_time": 0.3983137607574463
    },
    {
      "epoch": 0.000272808837890625,
      "model_forward_time": 0.11529994010925293,
      "step": 44697
    },
    {
      "epoch": 0.000272808837890625,
      "step": 44697,
      "training_step_time": 0.387892484664917
    },
    {
      "epoch": 0.00027281494140625,
      "model_forward_time": 0.11548185348510742,
      "step": 44698
    },
    {
      "epoch": 0.00027281494140625,
      "step": 44698,
      "training_step_time": 0.395477294921875
    },
    {
      "epoch": 0.000272821044921875,
      "model_forward_time": 0.1148989200592041,
      "step": 44699
    },
    {
      "epoch": 0.000272821044921875,
      "step": 44699,
      "training_step_time": 0.42483043670654297
    },
    {
      "epoch": 0.0002728271484375,
      "grad_norm": 0.11592478305101395,
      "learning_rate": 1.6748771394307585e-05,
      "loss": 0.0413,
      "step": 44700
    },
    {
      "epoch": 0.0002728271484375,
      "model_forward_time": 0.11481714248657227,
      "step": 44700
    },
    {
      "epoch": 0.0002728271484375,
      "step": 44700,
      "training_step_time": 0.44930338859558105
    },
    {
      "epoch": 0.000272833251953125,
      "model_forward_time": 0.11593961715698242,
      "step": 44701
    },
    {
      "epoch": 0.000272833251953125,
      "step": 44701,
      "training_step_time": 0.4069967269897461
    },
    {
      "epoch": 0.00027283935546875,
      "model_forward_time": 0.11444234848022461,
      "step": 44702
    },
    {
      "epoch": 0.00027283935546875,
      "step": 44702,
      "training_step_time": 0.4876847267150879
    },
    {
      "epoch": 0.000272845458984375,
      "model_forward_time": 0.11498069763183594,
      "step": 44703
    },
    {
      "epoch": 0.000272845458984375,
      "step": 44703,
      "training_step_time": 0.3654954433441162
    },
    {
      "epoch": 0.0002728515625,
      "model_forward_time": 0.11534929275512695,
      "step": 44704
    },
    {
      "epoch": 0.0002728515625,
      "step": 44704,
      "training_step_time": 0.45523524284362793
    },
    {
      "epoch": 0.000272857666015625,
      "model_forward_time": 0.11423182487487793,
      "step": 44705
    },
    {
      "epoch": 0.000272857666015625,
      "step": 44705,
      "training_step_time": 0.39264369010925293
    },
    {
      "epoch": 0.00027286376953125,
      "model_forward_time": 0.11456298828125,
      "step": 44706
    },
    {
      "epoch": 0.00027286376953125,
      "step": 44706,
      "training_step_time": 0.40376782417297363
    },
    {
      "epoch": 0.000272869873046875,
      "model_forward_time": 0.11465764045715332,
      "step": 44707
    },
    {
      "epoch": 0.000272869873046875,
      "step": 44707,
      "training_step_time": 0.4977121353149414
    },
    {
      "epoch": 0.0002728759765625,
      "model_forward_time": 0.1151432991027832,
      "step": 44708
    },
    {
      "epoch": 0.0002728759765625,
      "step": 44708,
      "training_step_time": 0.42216992378234863
    },
    {
      "epoch": 0.000272882080078125,
      "model_forward_time": 0.11546182632446289,
      "step": 44709
    },
    {
      "epoch": 0.000272882080078125,
      "step": 44709,
      "training_step_time": 0.47963595390319824
    },
    {
      "epoch": 0.00027288818359375,
      "grad_norm": 0.07306225597858429,
      "learning_rate": 1.672819566624911e-05,
      "loss": 0.033,
      "step": 44710
    },
    {
      "epoch": 0.00027288818359375,
      "model_forward_time": 0.11463665962219238,
      "step": 44710
    },
    {
      "epoch": 0.00027288818359375,
      "step": 44710,
      "training_step_time": 0.38390254974365234
    },
    {
      "epoch": 0.000272894287109375,
      "model_forward_time": 0.11558055877685547,
      "step": 44711
    },
    {
      "epoch": 0.000272894287109375,
      "step": 44711,
      "training_step_time": 0.406052827835083
    },
    {
      "epoch": 0.000272900390625,
      "model_forward_time": 0.1155405044555664,
      "step": 44712
    },
    {
      "epoch": 0.000272900390625,
      "step": 44712,
      "training_step_time": 0.40200281143188477
    },
    {
      "epoch": 0.000272906494140625,
      "model_forward_time": 0.11519241333007812,
      "step": 44713
    },
    {
      "epoch": 0.000272906494140625,
      "step": 44713,
      "training_step_time": 0.3951387405395508
    },
    {
      "epoch": 0.00027291259765625,
      "model_forward_time": 0.11558365821838379,
      "step": 44714
    },
    {
      "epoch": 0.00027291259765625,
      "step": 44714,
      "training_step_time": 0.39426541328430176
    },
    {
      "epoch": 0.000272918701171875,
      "model_forward_time": 0.11447501182556152,
      "step": 44715
    },
    {
      "epoch": 0.000272918701171875,
      "step": 44715,
      "training_step_time": 0.3975255489349365
    },
    {
      "epoch": 0.0002729248046875,
      "model_forward_time": 0.11570882797241211,
      "step": 44716
    },
    {
      "epoch": 0.0002729248046875,
      "step": 44716,
      "training_step_time": 0.41138505935668945
    },
    {
      "epoch": 0.000272930908203125,
      "model_forward_time": 0.11585736274719238,
      "step": 44717
    },
    {
      "epoch": 0.000272930908203125,
      "step": 44717,
      "training_step_time": 0.40439677238464355
    },
    {
      "epoch": 0.00027293701171875,
      "model_forward_time": 0.11525297164916992,
      "step": 44718
    },
    {
      "epoch": 0.00027293701171875,
      "step": 44718,
      "training_step_time": 0.49220848083496094
    },
    {
      "epoch": 0.000272943115234375,
      "model_forward_time": 0.11465859413146973,
      "step": 44719
    },
    {
      "epoch": 0.000272943115234375,
      "step": 44719,
      "training_step_time": 0.49646568298339844
    },
    {
      "epoch": 0.00027294921875,
      "grad_norm": 0.10262758284807205,
      "learning_rate": 1.6707630045286265e-05,
      "loss": 0.0342,
      "step": 44720
    },
    {
      "epoch": 0.00027294921875,
      "model_forward_time": 0.1148076057434082,
      "step": 44720
    },
    {
      "epoch": 0.00027294921875,
      "step": 44720,
      "training_step_time": 0.39563584327697754
    },
    {
      "epoch": 0.000272955322265625,
      "model_forward_time": 0.11504077911376953,
      "step": 44721
    },
    {
      "epoch": 0.000272955322265625,
      "step": 44721,
      "training_step_time": 0.44240760803222656
    },
    {
      "epoch": 0.00027296142578125,
      "model_forward_time": 0.11426448822021484,
      "step": 44722
    },
    {
      "epoch": 0.00027296142578125,
      "step": 44722,
      "training_step_time": 0.3924064636230469
    },
    {
      "epoch": 0.000272967529296875,
      "model_forward_time": 0.11449480056762695,
      "step": 44723
    },
    {
      "epoch": 0.000272967529296875,
      "step": 44723,
      "training_step_time": 0.43496155738830566
    },
    {
      "epoch": 0.0002729736328125,
      "model_forward_time": 0.11516523361206055,
      "step": 44724
    },
    {
      "epoch": 0.0002729736328125,
      "step": 44724,
      "training_step_time": 0.4049949645996094
    },
    {
      "epoch": 0.000272979736328125,
      "model_forward_time": 0.1146543025970459,
      "step": 44725
    },
    {
      "epoch": 0.000272979736328125,
      "step": 44725,
      "training_step_time": 0.40044689178466797
    },
    {
      "epoch": 0.00027298583984375,
      "model_forward_time": 0.11484622955322266,
      "step": 44726
    },
    {
      "epoch": 0.00027298583984375,
      "step": 44726,
      "training_step_time": 0.40023064613342285
    },
    {
      "epoch": 0.000272991943359375,
      "model_forward_time": 0.1159358024597168,
      "step": 44727
    },
    {
      "epoch": 0.000272991943359375,
      "step": 44727,
      "training_step_time": 0.40595412254333496
    },
    {
      "epoch": 0.000272998046875,
      "model_forward_time": 0.11507821083068848,
      "step": 44728
    },
    {
      "epoch": 0.000272998046875,
      "step": 44728,
      "training_step_time": 0.3854331970214844
    },
    {
      "epoch": 0.000273004150390625,
      "model_forward_time": 0.11536431312561035,
      "step": 44729
    },
    {
      "epoch": 0.000273004150390625,
      "step": 44729,
      "training_step_time": 0.3849790096282959
    },
    {
      "epoch": 0.00027301025390625,
      "grad_norm": 0.10491456836462021,
      "learning_rate": 1.6687074537666398e-05,
      "loss": 0.0356,
      "step": 44730
    },
    {
      "epoch": 0.00027301025390625,
      "model_forward_time": 0.11517953872680664,
      "step": 44730
    },
    {
      "epoch": 0.00027301025390625,
      "step": 44730,
      "training_step_time": 0.40013599395751953
    },
    {
      "epoch": 0.000273016357421875,
      "model_forward_time": 0.11502695083618164,
      "step": 44731
    },
    {
      "epoch": 0.000273016357421875,
      "step": 44731,
      "training_step_time": 0.4555964469909668
    },
    {
      "epoch": 0.0002730224609375,
      "model_forward_time": 0.11598873138427734,
      "step": 44732
    },
    {
      "epoch": 0.0002730224609375,
      "step": 44732,
      "training_step_time": 0.46059322357177734
    },
    {
      "epoch": 0.000273028564453125,
      "model_forward_time": 0.11539101600646973,
      "step": 44733
    },
    {
      "epoch": 0.000273028564453125,
      "step": 44733,
      "training_step_time": 0.4323122501373291
    },
    {
      "epoch": 0.00027303466796875,
      "model_forward_time": 0.11514139175415039,
      "step": 44734
    },
    {
      "epoch": 0.00027303466796875,
      "step": 44734,
      "training_step_time": 0.4303915500640869
    },
    {
      "epoch": 0.000273040771484375,
      "model_forward_time": 0.1157979965209961,
      "step": 44735
    },
    {
      "epoch": 0.000273040771484375,
      "step": 44735,
      "training_step_time": 0.43146777153015137
    },
    {
      "epoch": 0.000273046875,
      "model_forward_time": 0.1151270866394043,
      "step": 44736
    },
    {
      "epoch": 0.000273046875,
      "step": 44736,
      "training_step_time": 0.44643282890319824
    },
    {
      "epoch": 0.000273052978515625,
      "model_forward_time": 0.11529779434204102,
      "step": 44737
    },
    {
      "epoch": 0.000273052978515625,
      "step": 44737,
      "training_step_time": 0.4390380382537842
    },
    {
      "epoch": 0.00027305908203125,
      "model_forward_time": 0.11612844467163086,
      "step": 44738
    },
    {
      "epoch": 0.00027305908203125,
      "step": 44738,
      "training_step_time": 0.391024112701416
    },
    {
      "epoch": 0.000273065185546875,
      "model_forward_time": 0.11617541313171387,
      "step": 44739
    },
    {
      "epoch": 0.000273065185546875,
      "step": 44739,
      "training_step_time": 0.3883976936340332
    },
    {
      "epoch": 0.0002730712890625,
      "grad_norm": 0.09687993675470352,
      "learning_rate": 1.666652914963371e-05,
      "loss": 0.0394,
      "step": 44740
    },
    {
      "epoch": 0.0002730712890625,
      "model_forward_time": 0.11493420600891113,
      "step": 44740
    },
    {
      "epoch": 0.0002730712890625,
      "step": 44740,
      "training_step_time": 0.40105199813842773
    },
    {
      "epoch": 0.000273077392578125,
      "model_forward_time": 0.11502671241760254,
      "step": 44741
    },
    {
      "epoch": 0.000273077392578125,
      "step": 44741,
      "training_step_time": 0.393557071685791
    },
    {
      "epoch": 0.00027308349609375,
      "model_forward_time": 0.11467885971069336,
      "step": 44742
    },
    {
      "epoch": 0.00027308349609375,
      "step": 44742,
      "training_step_time": 0.39347195625305176
    },
    {
      "epoch": 0.000273089599609375,
      "model_forward_time": 0.11588644981384277,
      "step": 44743
    },
    {
      "epoch": 0.000273089599609375,
      "step": 44743,
      "training_step_time": 0.39593052864074707
    },
    {
      "epoch": 0.000273095703125,
      "model_forward_time": 0.11571073532104492,
      "step": 44744
    },
    {
      "epoch": 0.000273095703125,
      "step": 44744,
      "training_step_time": 0.4570920467376709
    },
    {
      "epoch": 0.000273101806640625,
      "model_forward_time": 0.11590456962585449,
      "step": 44745
    },
    {
      "epoch": 0.000273101806640625,
      "step": 44745,
      "training_step_time": 0.409412145614624
    },
    {
      "epoch": 0.00027310791015625,
      "model_forward_time": 0.11513447761535645,
      "step": 44746
    },
    {
      "epoch": 0.00027310791015625,
      "step": 44746,
      "training_step_time": 0.42586660385131836
    },
    {
      "epoch": 0.000273114013671875,
      "model_forward_time": 0.1148691177368164,
      "step": 44747
    },
    {
      "epoch": 0.000273114013671875,
      "step": 44747,
      "training_step_time": 0.5056490898132324
    },
    {
      "epoch": 0.0002731201171875,
      "model_forward_time": 0.11511778831481934,
      "step": 44748
    },
    {
      "epoch": 0.0002731201171875,
      "step": 44748,
      "training_step_time": 0.5069575309753418
    },
    {
      "epoch": 0.000273126220703125,
      "model_forward_time": 0.11500263214111328,
      "step": 44749
    },
    {
      "epoch": 0.000273126220703125,
      "step": 44749,
      "training_step_time": 0.4014706611633301
    },
    {
      "epoch": 0.00027313232421875,
      "grad_norm": 0.15191318094730377,
      "learning_rate": 1.6645993887429345e-05,
      "loss": 0.0354,
      "step": 44750
    },
    {
      "epoch": 0.00027313232421875,
      "model_forward_time": 0.11483526229858398,
      "step": 44750
    },
    {
      "epoch": 0.00027313232421875,
      "step": 44750,
      "training_step_time": 0.40961551666259766
    },
    {
      "epoch": 0.000273138427734375,
      "model_forward_time": 0.11449646949768066,
      "step": 44751
    },
    {
      "epoch": 0.000273138427734375,
      "step": 44751,
      "training_step_time": 0.40867161750793457
    },
    {
      "epoch": 0.00027314453125,
      "model_forward_time": 0.11481237411499023,
      "step": 44752
    },
    {
      "epoch": 0.00027314453125,
      "step": 44752,
      "training_step_time": 0.4070882797241211
    },
    {
      "epoch": 0.000273150634765625,
      "model_forward_time": 0.11519384384155273,
      "step": 44753
    },
    {
      "epoch": 0.000273150634765625,
      "step": 44753,
      "training_step_time": 0.39882612228393555
    },
    {
      "epoch": 0.00027315673828125,
      "model_forward_time": 0.1152181625366211,
      "step": 44754
    },
    {
      "epoch": 0.00027315673828125,
      "step": 44754,
      "training_step_time": 0.3933568000793457
    },
    {
      "epoch": 0.000273162841796875,
      "model_forward_time": 0.11513543128967285,
      "step": 44755
    },
    {
      "epoch": 0.000273162841796875,
      "step": 44755,
      "training_step_time": 0.40155458450317383
    },
    {
      "epoch": 0.0002731689453125,
      "model_forward_time": 0.11522221565246582,
      "step": 44756
    },
    {
      "epoch": 0.0002731689453125,
      "step": 44756,
      "training_step_time": 0.39168763160705566
    },
    {
      "epoch": 0.000273175048828125,
      "model_forward_time": 0.11516261100769043,
      "step": 44757
    },
    {
      "epoch": 0.000273175048828125,
      "step": 44757,
      "training_step_time": 0.3970818519592285
    },
    {
      "epoch": 0.00027318115234375,
      "model_forward_time": 0.11521220207214355,
      "step": 44758
    },
    {
      "epoch": 0.00027318115234375,
      "step": 44758,
      "training_step_time": 0.38921499252319336
    },
    {
      "epoch": 0.000273187255859375,
      "model_forward_time": 0.11549854278564453,
      "step": 44759
    },
    {
      "epoch": 0.000273187255859375,
      "step": 44759,
      "training_step_time": 0.4254024028778076
    },
    {
      "epoch": 0.000273193359375,
      "grad_norm": 0.12868690490722656,
      "learning_rate": 1.662546875729138e-05,
      "loss": 0.0353,
      "step": 44760
    },
    {
      "epoch": 0.000273193359375,
      "model_forward_time": 0.11497640609741211,
      "step": 44760
    },
    {
      "epoch": 0.000273193359375,
      "step": 44760,
      "training_step_time": 0.45313358306884766
    },
    {
      "epoch": 0.000273199462890625,
      "model_forward_time": 0.1159219741821289,
      "step": 44761
    },
    {
      "epoch": 0.000273199462890625,
      "step": 44761,
      "training_step_time": 0.49518465995788574
    },
    {
      "epoch": 0.00027320556640625,
      "model_forward_time": 0.1151587963104248,
      "step": 44762
    },
    {
      "epoch": 0.00027320556640625,
      "step": 44762,
      "training_step_time": 0.44883155822753906
    },
    {
      "epoch": 0.000273211669921875,
      "model_forward_time": 0.1159067153930664,
      "step": 44763
    },
    {
      "epoch": 0.000273211669921875,
      "step": 44763,
      "training_step_time": 0.42624473571777344
    },
    {
      "epoch": 0.0002732177734375,
      "model_forward_time": 0.11440634727478027,
      "step": 44764
    },
    {
      "epoch": 0.0002732177734375,
      "step": 44764,
      "training_step_time": 0.4119727611541748
    },
    {
      "epoch": 0.000273223876953125,
      "model_forward_time": 0.11513113975524902,
      "step": 44765
    },
    {
      "epoch": 0.000273223876953125,
      "step": 44765,
      "training_step_time": 0.408740758895874
    },
    {
      "epoch": 0.00027322998046875,
      "model_forward_time": 0.11537432670593262,
      "step": 44766
    },
    {
      "epoch": 0.00027322998046875,
      "step": 44766,
      "training_step_time": 0.4261128902435303
    },
    {
      "epoch": 0.000273236083984375,
      "model_forward_time": 0.11532068252563477,
      "step": 44767
    },
    {
      "epoch": 0.000273236083984375,
      "step": 44767,
      "training_step_time": 0.40909385681152344
    },
    {
      "epoch": 0.0002732421875,
      "model_forward_time": 0.11449074745178223,
      "step": 44768
    },
    {
      "epoch": 0.0002732421875,
      "step": 44768,
      "training_step_time": 0.3845791816711426
    },
    {
      "epoch": 0.000273248291015625,
      "model_forward_time": 0.1155691146850586,
      "step": 44769
    },
    {
      "epoch": 0.000273248291015625,
      "step": 44769,
      "training_step_time": 0.38625359535217285
    },
    {
      "epoch": 0.00027325439453125,
      "grad_norm": 0.16060850024223328,
      "learning_rate": 1.660495376545478e-05,
      "loss": 0.0366,
      "step": 44770
    },
    {
      "epoch": 0.00027325439453125,
      "model_forward_time": 0.11531972885131836,
      "step": 44770
    },
    {
      "epoch": 0.00027325439453125,
      "step": 44770,
      "training_step_time": 0.3915832042694092
    },
    {
      "epoch": 0.000273260498046875,
      "model_forward_time": 0.1152036190032959,
      "step": 44771
    },
    {
      "epoch": 0.000273260498046875,
      "step": 44771,
      "training_step_time": 0.38234519958496094
    },
    {
      "epoch": 0.0002732666015625,
      "model_forward_time": 0.11557435989379883,
      "step": 44772
    },
    {
      "epoch": 0.0002732666015625,
      "step": 44772,
      "training_step_time": 0.4050755500793457
    },
    {
      "epoch": 0.000273272705078125,
      "model_forward_time": 0.11525344848632812,
      "step": 44773
    },
    {
      "epoch": 0.000273272705078125,
      "step": 44773,
      "training_step_time": 0.4793834686279297
    },
    {
      "epoch": 0.00027327880859375,
      "model_forward_time": 0.11542963981628418,
      "step": 44774
    },
    {
      "epoch": 0.00027327880859375,
      "step": 44774,
      "training_step_time": 0.42009520530700684
    },
    {
      "epoch": 0.000273284912109375,
      "model_forward_time": 0.11584782600402832,
      "step": 44775
    },
    {
      "epoch": 0.000273284912109375,
      "step": 44775,
      "training_step_time": 0.4124898910522461
    },
    {
      "epoch": 0.000273291015625,
      "model_forward_time": 0.11526274681091309,
      "step": 44776
    },
    {
      "epoch": 0.000273291015625,
      "step": 44776,
      "training_step_time": 0.4975574016571045
    },
    {
      "epoch": 0.000273297119140625,
      "model_forward_time": 0.11505317687988281,
      "step": 44777
    },
    {
      "epoch": 0.000273297119140625,
      "step": 44777,
      "training_step_time": 0.5109376907348633
    },
    {
      "epoch": 0.00027330322265625,
      "model_forward_time": 0.11462235450744629,
      "step": 44778
    },
    {
      "epoch": 0.00027330322265625,
      "step": 44778,
      "training_step_time": 0.43929100036621094
    },
    {
      "epoch": 0.000273309326171875,
      "model_forward_time": 0.1148068904876709,
      "step": 44779
    },
    {
      "epoch": 0.000273309326171875,
      "step": 44779,
      "training_step_time": 0.3975412845611572
    },
    {
      "epoch": 0.0002733154296875,
      "grad_norm": 0.12334558367729187,
      "learning_rate": 1.658444891815152e-05,
      "loss": 0.0384,
      "step": 44780
    },
    {
      "epoch": 0.0002733154296875,
      "model_forward_time": 0.11473250389099121,
      "step": 44780
    },
    {
      "epoch": 0.0002733154296875,
      "step": 44780,
      "training_step_time": 0.4053795337677002
    },
    {
      "epoch": 0.000273321533203125,
      "model_forward_time": 0.1147468090057373,
      "step": 44781
    },
    {
      "epoch": 0.000273321533203125,
      "step": 44781,
      "training_step_time": 0.4852461814880371
    },
    {
      "epoch": 0.00027332763671875,
      "model_forward_time": 0.11508584022521973,
      "step": 44782
    },
    {
      "epoch": 0.00027332763671875,
      "step": 44782,
      "training_step_time": 0.3858788013458252
    },
    {
      "epoch": 0.000273333740234375,
      "model_forward_time": 0.1147317886352539,
      "step": 44783
    },
    {
      "epoch": 0.000273333740234375,
      "step": 44783,
      "training_step_time": 0.3884904384613037
    },
    {
      "epoch": 0.00027333984375,
      "model_forward_time": 0.11521196365356445,
      "step": 44784
    },
    {
      "epoch": 0.00027333984375,
      "step": 44784,
      "training_step_time": 0.38887524604797363
    },
    {
      "epoch": 0.000273345947265625,
      "model_forward_time": 0.11516523361206055,
      "step": 44785
    },
    {
      "epoch": 0.000273345947265625,
      "step": 44785,
      "training_step_time": 0.39453649520874023
    },
    {
      "epoch": 0.00027335205078125,
      "model_forward_time": 0.11510705947875977,
      "step": 44786
    },
    {
      "epoch": 0.00027335205078125,
      "step": 44786,
      "training_step_time": 0.4345676898956299
    },
    {
      "epoch": 0.000273358154296875,
      "model_forward_time": 0.11529898643493652,
      "step": 44787
    },
    {
      "epoch": 0.000273358154296875,
      "step": 44787,
      "training_step_time": 0.3997044563293457
    },
    {
      "epoch": 0.0002733642578125,
      "model_forward_time": 0.11574316024780273,
      "step": 44788
    },
    {
      "epoch": 0.0002733642578125,
      "step": 44788,
      "training_step_time": 0.4600973129272461
    },
    {
      "epoch": 0.000273370361328125,
      "model_forward_time": 0.11518359184265137,
      "step": 44789
    },
    {
      "epoch": 0.000273370361328125,
      "step": 44789,
      "training_step_time": 0.49623847007751465
    },
    {
      "epoch": 0.00027337646484375,
      "grad_norm": 0.10742921382188797,
      "learning_rate": 1.6563954221610355e-05,
      "loss": 0.036,
      "step": 44790
    },
    {
      "epoch": 0.00027337646484375,
      "model_forward_time": 0.1148536205291748,
      "step": 44790
    },
    {
      "epoch": 0.00027337646484375,
      "step": 44790,
      "training_step_time": 0.4533553123474121
    },
    {
      "epoch": 0.000273382568359375,
      "model_forward_time": 0.11510777473449707,
      "step": 44791
    },
    {
      "epoch": 0.000273382568359375,
      "step": 44791,
      "training_step_time": 0.49543046951293945
    },
    {
      "epoch": 0.000273388671875,
      "model_forward_time": 0.11470603942871094,
      "step": 44792
    },
    {
      "epoch": 0.000273388671875,
      "step": 44792,
      "training_step_time": 0.4252007007598877
    },
    {
      "epoch": 0.000273394775390625,
      "model_forward_time": 0.11456990242004395,
      "step": 44793
    },
    {
      "epoch": 0.000273394775390625,
      "step": 44793,
      "training_step_time": 0.4191262722015381
    },
    {
      "epoch": 0.00027340087890625,
      "model_forward_time": 0.11490106582641602,
      "step": 44794
    },
    {
      "epoch": 0.00027340087890625,
      "step": 44794,
      "training_step_time": 0.4130396842956543
    },
    {
      "epoch": 0.000273406982421875,
      "model_forward_time": 0.11488223075866699,
      "step": 44795
    },
    {
      "epoch": 0.000273406982421875,
      "step": 44795,
      "training_step_time": 0.4734160900115967
    },
    {
      "epoch": 0.0002734130859375,
      "model_forward_time": 0.11551165580749512,
      "step": 44796
    },
    {
      "epoch": 0.0002734130859375,
      "step": 44796,
      "training_step_time": 0.42668747901916504
    },
    {
      "epoch": 0.000273419189453125,
      "model_forward_time": 0.11491918563842773,
      "step": 44797
    },
    {
      "epoch": 0.000273419189453125,
      "step": 44797,
      "training_step_time": 0.4004497528076172
    },
    {
      "epoch": 0.00027342529296875,
      "model_forward_time": 0.11530089378356934,
      "step": 44798
    },
    {
      "epoch": 0.00027342529296875,
      "step": 44798,
      "training_step_time": 0.3959078788757324
    },
    {
      "epoch": 0.000273431396484375,
      "model_forward_time": 0.11516833305358887,
      "step": 44799
    },
    {
      "epoch": 0.000273431396484375,
      "step": 44799,
      "training_step_time": 0.3846168518066406
    },
    {
      "epoch": 0.0002734375,
      "grad_norm": 0.09425890445709229,
      "learning_rate": 1.6543469682057106e-05,
      "loss": 0.0365,
      "step": 44800
    },
    {
      "epoch": 0.0002734375,
      "model_forward_time": 0.11545062065124512,
      "step": 44800
    },
    {
      "epoch": 0.0002734375,
      "step": 44800,
      "training_step_time": 0.41401076316833496
    },
    {
      "epoch": 0.000273443603515625,
      "model_forward_time": 0.11512160301208496,
      "step": 44801
    },
    {
      "epoch": 0.000273443603515625,
      "step": 44801,
      "training_step_time": 0.4125945568084717
    },
    {
      "epoch": 0.00027344970703125,
      "model_forward_time": 0.11491656303405762,
      "step": 44802
    },
    {
      "epoch": 0.00027344970703125,
      "step": 44802,
      "training_step_time": 0.45340609550476074
    },
    {
      "epoch": 0.000273455810546875,
      "model_forward_time": 0.11539840698242188,
      "step": 44803
    },
    {
      "epoch": 0.000273455810546875,
      "step": 44803,
      "training_step_time": 0.4147210121154785
    },
    {
      "epoch": 0.0002734619140625,
      "model_forward_time": 0.11483907699584961,
      "step": 44804
    },
    {
      "epoch": 0.0002734619140625,
      "step": 44804,
      "training_step_time": 0.3659627437591553
    },
    {
      "epoch": 0.000273468017578125,
      "model_forward_time": 0.11556792259216309,
      "step": 44805
    },
    {
      "epoch": 0.000273468017578125,
      "step": 44805,
      "training_step_time": 0.38764357566833496
    },
    {
      "epoch": 0.00027347412109375,
      "model_forward_time": 0.11544466018676758,
      "step": 44806
    },
    {
      "epoch": 0.00027347412109375,
      "step": 44806,
      "training_step_time": 0.49655938148498535
    },
    {
      "epoch": 0.000273480224609375,
      "model_forward_time": 0.11447811126708984,
      "step": 44807
    },
    {
      "epoch": 0.000273480224609375,
      "step": 44807,
      "training_step_time": 0.3933706283569336
    },
    {
      "epoch": 0.000273486328125,
      "model_forward_time": 0.11486577987670898,
      "step": 44808
    },
    {
      "epoch": 0.000273486328125,
      "step": 44808,
      "training_step_time": 0.4586362838745117
    },
    {
      "epoch": 0.000273492431640625,
      "model_forward_time": 0.11478805541992188,
      "step": 44809
    },
    {
      "epoch": 0.000273492431640625,
      "step": 44809,
      "training_step_time": 0.44649386405944824
    },
    {
      "epoch": 0.00027349853515625,
      "grad_norm": 0.11065033823251724,
      "learning_rate": 1.652299530571436e-05,
      "loss": 0.0359,
      "step": 44810
    },
    {
      "epoch": 0.00027349853515625,
      "model_forward_time": 0.11494636535644531,
      "step": 44810
    },
    {
      "epoch": 0.00027349853515625,
      "step": 44810,
      "training_step_time": 0.385469913482666
    },
    {
      "epoch": 0.000273504638671875,
      "model_forward_time": 0.11452722549438477,
      "step": 44811
    },
    {
      "epoch": 0.000273504638671875,
      "step": 44811,
      "training_step_time": 0.39278221130371094
    },
    {
      "epoch": 0.0002735107421875,
      "model_forward_time": 0.11533713340759277,
      "step": 44812
    },
    {
      "epoch": 0.0002735107421875,
      "step": 44812,
      "training_step_time": 0.3925797939300537
    },
    {
      "epoch": 0.000273516845703125,
      "model_forward_time": 0.11488199234008789,
      "step": 44813
    },
    {
      "epoch": 0.000273516845703125,
      "step": 44813,
      "training_step_time": 0.40235233306884766
    },
    {
      "epoch": 0.00027352294921875,
      "model_forward_time": 0.11481881141662598,
      "step": 44814
    },
    {
      "epoch": 0.00027352294921875,
      "step": 44814,
      "training_step_time": 0.43680787086486816
    },
    {
      "epoch": 0.000273529052734375,
      "model_forward_time": 0.115203857421875,
      "step": 44815
    },
    {
      "epoch": 0.000273529052734375,
      "step": 44815,
      "training_step_time": 0.4146125316619873
    },
    {
      "epoch": 0.00027353515625,
      "model_forward_time": 0.11532711982727051,
      "step": 44816
    },
    {
      "epoch": 0.00027353515625,
      "step": 44816,
      "training_step_time": 0.44895029067993164
    },
    {
      "epoch": 0.000273541259765625,
      "model_forward_time": 0.11490845680236816,
      "step": 44817
    },
    {
      "epoch": 0.000273541259765625,
      "step": 44817,
      "training_step_time": 0.4024543762207031
    },
    {
      "epoch": 0.00027354736328125,
      "model_forward_time": 0.1150662899017334,
      "step": 44818
    },
    {
      "epoch": 0.00027354736328125,
      "step": 44818,
      "training_step_time": 0.4728209972381592
    },
    {
      "epoch": 0.000273553466796875,
      "model_forward_time": 0.11478686332702637,
      "step": 44819
    },
    {
      "epoch": 0.000273553466796875,
      "step": 44819,
      "training_step_time": 0.45559048652648926
    },
    {
      "epoch": 0.0002735595703125,
      "grad_norm": 0.09748362749814987,
      "learning_rate": 1.6502531098801753e-05,
      "loss": 0.0389,
      "step": 44820
    },
    {
      "epoch": 0.0002735595703125,
      "model_forward_time": 0.11417555809020996,
      "step": 44820
    },
    {
      "epoch": 0.0002735595703125,
      "step": 44820,
      "training_step_time": 0.3900165557861328
    },
    {
      "epoch": 0.000273565673828125,
      "model_forward_time": 0.11520600318908691,
      "step": 44821
    },
    {
      "epoch": 0.000273565673828125,
      "step": 44821,
      "training_step_time": 0.41310787200927734
    },
    {
      "epoch": 0.00027357177734375,
      "model_forward_time": 0.11547017097473145,
      "step": 44822
    },
    {
      "epoch": 0.00027357177734375,
      "step": 44822,
      "training_step_time": 0.44446730613708496
    },
    {
      "epoch": 0.000273577880859375,
      "model_forward_time": 0.11461615562438965,
      "step": 44823
    },
    {
      "epoch": 0.000273577880859375,
      "step": 44823,
      "training_step_time": 0.49318766593933105
    },
    {
      "epoch": 0.000273583984375,
      "model_forward_time": 0.11521148681640625,
      "step": 44824
    },
    {
      "epoch": 0.000273583984375,
      "step": 44824,
      "training_step_time": 0.384718656539917
    },
    {
      "epoch": 0.000273590087890625,
      "model_forward_time": 0.11566352844238281,
      "step": 44825
    },
    {
      "epoch": 0.000273590087890625,
      "step": 44825,
      "training_step_time": 0.3986358642578125
    },
    {
      "epoch": 0.00027359619140625,
      "model_forward_time": 0.1156456470489502,
      "step": 44826
    },
    {
      "epoch": 0.00027359619140625,
      "step": 44826,
      "training_step_time": 0.40328145027160645
    },
    {
      "epoch": 0.000273602294921875,
      "model_forward_time": 0.11481833457946777,
      "step": 44827
    },
    {
      "epoch": 0.000273602294921875,
      "step": 44827,
      "training_step_time": 0.48060011863708496
    },
    {
      "epoch": 0.0002736083984375,
      "model_forward_time": 0.11479902267456055,
      "step": 44828
    },
    {
      "epoch": 0.0002736083984375,
      "step": 44828,
      "training_step_time": 0.424304723739624
    },
    {
      "epoch": 0.000273614501953125,
      "model_forward_time": 0.11469864845275879,
      "step": 44829
    },
    {
      "epoch": 0.000273614501953125,
      "step": 44829,
      "training_step_time": 0.39159059524536133
    },
    {
      "epoch": 0.00027362060546875,
      "grad_norm": 0.12857016921043396,
      "learning_rate": 1.648207706753575e-05,
      "loss": 0.0424,
      "step": 44830
    },
    {
      "epoch": 0.00027362060546875,
      "model_forward_time": 0.11480021476745605,
      "step": 44830
    },
    {
      "epoch": 0.00027362060546875,
      "step": 44830,
      "training_step_time": 0.39432811737060547
    },
    {
      "epoch": 0.000273626708984375,
      "model_forward_time": 0.11469483375549316,
      "step": 44831
    },
    {
      "epoch": 0.000273626708984375,
      "step": 44831,
      "training_step_time": 0.4077889919281006
    },
    {
      "epoch": 0.0002736328125,
      "model_forward_time": 0.11564350128173828,
      "step": 44832
    },
    {
      "epoch": 0.0002736328125,
      "step": 44832,
      "training_step_time": 0.45998644828796387
    },
    {
      "epoch": 0.000273638916015625,
      "model_forward_time": 0.11550664901733398,
      "step": 44833
    },
    {
      "epoch": 0.000273638916015625,
      "step": 44833,
      "training_step_time": 0.4115755558013916
    },
    {
      "epoch": 0.00027364501953125,
      "model_forward_time": 0.11467480659484863,
      "step": 44834
    },
    {
      "epoch": 0.00027364501953125,
      "step": 44834,
      "training_step_time": 0.4039425849914551
    },
    {
      "epoch": 0.000273651123046875,
      "model_forward_time": 0.11650204658508301,
      "step": 44835
    },
    {
      "epoch": 0.000273651123046875,
      "step": 44835,
      "training_step_time": 0.444263219833374
    },
    {
      "epoch": 0.0002736572265625,
      "model_forward_time": 0.1157681941986084,
      "step": 44836
    },
    {
      "epoch": 0.0002736572265625,
      "step": 44836,
      "training_step_time": 0.4552326202392578
    },
    {
      "epoch": 0.000273663330078125,
      "model_forward_time": 0.11613845825195312,
      "step": 44837
    },
    {
      "epoch": 0.000273663330078125,
      "step": 44837,
      "training_step_time": 0.43900346755981445
    },
    {
      "epoch": 0.00027366943359375,
      "model_forward_time": 0.11529111862182617,
      "step": 44838
    },
    {
      "epoch": 0.00027366943359375,
      "step": 44838,
      "training_step_time": 0.4008326530456543
    },
    {
      "epoch": 0.000273675537109375,
      "model_forward_time": 0.11557555198669434,
      "step": 44839
    },
    {
      "epoch": 0.000273675537109375,
      "step": 44839,
      "training_step_time": 0.3960909843444824
    },
    {
      "epoch": 0.000273681640625,
      "grad_norm": 0.10304279625415802,
      "learning_rate": 1.646163321812974e-05,
      "loss": 0.0371,
      "step": 44840
    },
    {
      "epoch": 0.000273681640625,
      "model_forward_time": 0.11522054672241211,
      "step": 44840
    },
    {
      "epoch": 0.000273681640625,
      "step": 44840,
      "training_step_time": 0.3989243507385254
    },
    {
      "epoch": 0.000273687744140625,
      "model_forward_time": 0.11559557914733887,
      "step": 44841
    },
    {
      "epoch": 0.000273687744140625,
      "step": 44841,
      "training_step_time": 0.40382814407348633
    },
    {
      "epoch": 0.00027369384765625,
      "model_forward_time": 0.11419510841369629,
      "step": 44842
    },
    {
      "epoch": 0.00027369384765625,
      "step": 44842,
      "training_step_time": 0.4453611373901367
    },
    {
      "epoch": 0.000273699951171875,
      "model_forward_time": 0.11479997634887695,
      "step": 44843
    },
    {
      "epoch": 0.000273699951171875,
      "step": 44843,
      "training_step_time": 0.41956281661987305
    },
    {
      "epoch": 0.0002737060546875,
      "model_forward_time": 0.1143643856048584,
      "step": 44844
    },
    {
      "epoch": 0.0002737060546875,
      "step": 44844,
      "training_step_time": 0.4021427631378174
    },
    {
      "epoch": 0.000273712158203125,
      "model_forward_time": 0.1154778003692627,
      "step": 44845
    },
    {
      "epoch": 0.000273712158203125,
      "step": 44845,
      "training_step_time": 0.4166123867034912
    },
    {
      "epoch": 0.00027371826171875,
      "model_forward_time": 0.11494827270507812,
      "step": 44846
    },
    {
      "epoch": 0.00027371826171875,
      "step": 44846,
      "training_step_time": 0.44357728958129883
    },
    {
      "epoch": 0.000273724365234375,
      "model_forward_time": 0.11554312705993652,
      "step": 44847
    },
    {
      "epoch": 0.000273724365234375,
      "step": 44847,
      "training_step_time": 0.49755859375
    },
    {
      "epoch": 0.00027373046875,
      "model_forward_time": 0.11441683769226074,
      "step": 44848
    },
    {
      "epoch": 0.00027373046875,
      "step": 44848,
      "training_step_time": 0.4484095573425293
    },
    {
      "epoch": 0.000273736572265625,
      "model_forward_time": 0.11474061012268066,
      "step": 44849
    },
    {
      "epoch": 0.000273736572265625,
      "step": 44849,
      "training_step_time": 0.4927351474761963
    },
    {
      "epoch": 0.00027374267578125,
      "grad_norm": 0.10679007321596146,
      "learning_rate": 1.6441199556794033e-05,
      "loss": 0.0361,
      "step": 44850
    },
    {
      "epoch": 0.00027374267578125,
      "model_forward_time": 0.11426711082458496,
      "step": 44850
    },
    {
      "epoch": 0.00027374267578125,
      "step": 44850,
      "training_step_time": 0.4557499885559082
    },
    {
      "epoch": 0.000273748779296875,
      "model_forward_time": 0.11497259140014648,
      "step": 44851
    },
    {
      "epoch": 0.000273748779296875,
      "step": 44851,
      "training_step_time": 0.41540956497192383
    },
    {
      "epoch": 0.0002737548828125,
      "model_forward_time": 0.11502861976623535,
      "step": 44852
    },
    {
      "epoch": 0.0002737548828125,
      "step": 44852,
      "training_step_time": 0.4053330421447754
    },
    {
      "epoch": 0.000273760986328125,
      "model_forward_time": 0.11470890045166016,
      "step": 44853
    },
    {
      "epoch": 0.000273760986328125,
      "step": 44853,
      "training_step_time": 0.387007474899292
    },
    {
      "epoch": 0.00027376708984375,
      "model_forward_time": 0.11552596092224121,
      "step": 44854
    },
    {
      "epoch": 0.00027376708984375,
      "step": 44854,
      "training_step_time": 0.44942140579223633
    },
    {
      "epoch": 0.000273773193359375,
      "model_forward_time": 0.11491751670837402,
      "step": 44855
    },
    {
      "epoch": 0.000273773193359375,
      "step": 44855,
      "training_step_time": 0.3997042179107666
    },
    {
      "epoch": 0.000273779296875,
      "model_forward_time": 0.11515498161315918,
      "step": 44856
    },
    {
      "epoch": 0.000273779296875,
      "step": 44856,
      "training_step_time": 0.3929908275604248
    },
    {
      "epoch": 0.000273785400390625,
      "model_forward_time": 0.11507534980773926,
      "step": 44857
    },
    {
      "epoch": 0.000273785400390625,
      "step": 44857,
      "training_step_time": 0.3968930244445801
    },
    {
      "epoch": 0.00027379150390625,
      "model_forward_time": 0.11506056785583496,
      "step": 44858
    },
    {
      "epoch": 0.00027379150390625,
      "step": 44858,
      "training_step_time": 0.3905909061431885
    },
    {
      "epoch": 0.000273797607421875,
      "model_forward_time": 0.11524486541748047,
      "step": 44859
    },
    {
      "epoch": 0.000273797607421875,
      "step": 44859,
      "training_step_time": 0.39018797874450684
    },
    {
      "epoch": 0.0002738037109375,
      "grad_norm": 0.11630856990814209,
      "learning_rate": 1.6420776089735827e-05,
      "loss": 0.0331,
      "step": 44860
    },
    {
      "epoch": 0.0002738037109375,
      "model_forward_time": 0.1148991584777832,
      "step": 44860
    },
    {
      "epoch": 0.0002738037109375,
      "step": 44860,
      "training_step_time": 0.41417479515075684
    },
    {
      "epoch": 0.000273809814453125,
      "model_forward_time": 0.11525607109069824,
      "step": 44861
    },
    {
      "epoch": 0.000273809814453125,
      "step": 44861,
      "training_step_time": 0.41201210021972656
    },
    {
      "epoch": 0.00027381591796875,
      "model_forward_time": 0.11509108543395996,
      "step": 44862
    },
    {
      "epoch": 0.00027381591796875,
      "step": 44862,
      "training_step_time": 0.4843316078186035
    },
    {
      "epoch": 0.000273822021484375,
      "model_forward_time": 0.11548209190368652,
      "step": 44863
    },
    {
      "epoch": 0.000273822021484375,
      "step": 44863,
      "training_step_time": 0.3992602825164795
    },
    {
      "epoch": 0.000273828125,
      "model_forward_time": 0.11412644386291504,
      "step": 44864
    },
    {
      "epoch": 0.000273828125,
      "step": 44864,
      "training_step_time": 0.45344972610473633
    },
    {
      "epoch": 0.000273834228515625,
      "model_forward_time": 0.11475157737731934,
      "step": 44865
    },
    {
      "epoch": 0.000273834228515625,
      "step": 44865,
      "training_step_time": 0.4703707695007324
    },
    {
      "epoch": 0.00027384033203125,
      "model_forward_time": 0.11454892158508301,
      "step": 44866
    },
    {
      "epoch": 0.00027384033203125,
      "step": 44866,
      "training_step_time": 0.4351768493652344
    },
    {
      "epoch": 0.000273846435546875,
      "model_forward_time": 0.11492300033569336,
      "step": 44867
    },
    {
      "epoch": 0.000273846435546875,
      "step": 44867,
      "training_step_time": 0.4349639415740967
    },
    {
      "epoch": 0.0002738525390625,
      "model_forward_time": 0.11494684219360352,
      "step": 44868
    },
    {
      "epoch": 0.0002738525390625,
      "step": 44868,
      "training_step_time": 0.4261353015899658
    },
    {
      "epoch": 0.000273858642578125,
      "model_forward_time": 0.11501431465148926,
      "step": 44869
    },
    {
      "epoch": 0.000273858642578125,
      "step": 44869,
      "training_step_time": 0.4062066078186035
    },
    {
      "epoch": 0.00027386474609375,
      "grad_norm": 0.10890018939971924,
      "learning_rate": 1.6400362823159228e-05,
      "loss": 0.0403,
      "step": 44870
    },
    {
      "epoch": 0.00027386474609375,
      "model_forward_time": 0.11484408378601074,
      "step": 44870
    },
    {
      "epoch": 0.00027386474609375,
      "step": 44870,
      "training_step_time": 0.404219388961792
    },
    {
      "epoch": 0.000273870849609375,
      "model_forward_time": 0.11481499671936035,
      "step": 44871
    },
    {
      "epoch": 0.000273870849609375,
      "step": 44871,
      "training_step_time": 0.39867520332336426
    },
    {
      "epoch": 0.000273876953125,
      "model_forward_time": 0.11501622200012207,
      "step": 44872
    },
    {
      "epoch": 0.000273876953125,
      "step": 44872,
      "training_step_time": 0.38362669944763184
    },
    {
      "epoch": 0.000273883056640625,
      "model_forward_time": 0.11570525169372559,
      "step": 44873
    },
    {
      "epoch": 0.000273883056640625,
      "step": 44873,
      "training_step_time": 0.39660024642944336
    },
    {
      "epoch": 0.00027388916015625,
      "model_forward_time": 0.11533021926879883,
      "step": 44874
    },
    {
      "epoch": 0.00027388916015625,
      "step": 44874,
      "training_step_time": 0.39222049713134766
    },
    {
      "epoch": 0.000273895263671875,
      "model_forward_time": 0.11472368240356445,
      "step": 44875
    },
    {
      "epoch": 0.000273895263671875,
      "step": 44875,
      "training_step_time": 0.4902069568634033
    },
    {
      "epoch": 0.0002739013671875,
      "model_forward_time": 0.11513280868530273,
      "step": 44876
    },
    {
      "epoch": 0.0002739013671875,
      "step": 44876,
      "training_step_time": 0.4619579315185547
    },
    {
      "epoch": 0.000273907470703125,
      "model_forward_time": 0.11507010459899902,
      "step": 44877
    },
    {
      "epoch": 0.000273907470703125,
      "step": 44877,
      "training_step_time": 0.4698631763458252
    },
    {
      "epoch": 0.00027391357421875,
      "model_forward_time": 0.11531448364257812,
      "step": 44878
    },
    {
      "epoch": 0.00027391357421875,
      "step": 44878,
      "training_step_time": 0.4040262699127197
    },
    {
      "epoch": 0.000273919677734375,
      "model_forward_time": 0.11565041542053223,
      "step": 44879
    },
    {
      "epoch": 0.000273919677734375,
      "step": 44879,
      "training_step_time": 0.49904823303222656
    },
    {
      "epoch": 0.00027392578125,
      "grad_norm": 0.08666640520095825,
      "learning_rate": 1.637995976326527e-05,
      "loss": 0.0331,
      "step": 44880
    },
    {
      "epoch": 0.00027392578125,
      "model_forward_time": 0.11439251899719238,
      "step": 44880
    },
    {
      "epoch": 0.00027392578125,
      "step": 44880,
      "training_step_time": 0.40681886672973633
    },
    {
      "epoch": 0.000273931884765625,
      "model_forward_time": 0.11457252502441406,
      "step": 44881
    },
    {
      "epoch": 0.000273931884765625,
      "step": 44881,
      "training_step_time": 0.42649173736572266
    },
    {
      "epoch": 0.00027393798828125,
      "model_forward_time": 0.11463308334350586,
      "step": 44882
    },
    {
      "epoch": 0.00027393798828125,
      "step": 44882,
      "training_step_time": 0.4189469814300537
    },
    {
      "epoch": 0.000273944091796875,
      "model_forward_time": 0.1154167652130127,
      "step": 44883
    },
    {
      "epoch": 0.000273944091796875,
      "step": 44883,
      "training_step_time": 0.38724684715270996
    },
    {
      "epoch": 0.0002739501953125,
      "model_forward_time": 0.11435842514038086,
      "step": 44884
    },
    {
      "epoch": 0.0002739501953125,
      "step": 44884,
      "training_step_time": 0.4038681983947754
    },
    {
      "epoch": 0.000273956298828125,
      "model_forward_time": 0.11494827270507812,
      "step": 44885
    },
    {
      "epoch": 0.000273956298828125,
      "step": 44885,
      "training_step_time": 0.4006235599517822
    },
    {
      "epoch": 0.00027396240234375,
      "model_forward_time": 0.11604690551757812,
      "step": 44886
    },
    {
      "epoch": 0.00027396240234375,
      "step": 44886,
      "training_step_time": 0.39790916442871094
    },
    {
      "epoch": 0.000273968505859375,
      "model_forward_time": 0.11517596244812012,
      "step": 44887
    },
    {
      "epoch": 0.000273968505859375,
      "step": 44887,
      "training_step_time": 0.3925936222076416
    },
    {
      "epoch": 0.000273974609375,
      "model_forward_time": 0.11530208587646484,
      "step": 44888
    },
    {
      "epoch": 0.000273974609375,
      "step": 44888,
      "training_step_time": 0.40357422828674316
    },
    {
      "epoch": 0.000273980712890625,
      "model_forward_time": 0.1156315803527832,
      "step": 44889
    },
    {
      "epoch": 0.000273980712890625,
      "step": 44889,
      "training_step_time": 0.4079272747039795
    },
    {
      "epoch": 0.00027398681640625,
      "grad_norm": 0.10605917125940323,
      "learning_rate": 1.6359566916251845e-05,
      "loss": 0.0366,
      "step": 44890
    },
    {
      "epoch": 0.00027398681640625,
      "model_forward_time": 0.11515927314758301,
      "step": 44890
    },
    {
      "epoch": 0.00027398681640625,
      "step": 44890,
      "training_step_time": 0.4088423252105713
    },
    {
      "epoch": 0.000273992919921875,
      "model_forward_time": 0.11532282829284668,
      "step": 44891
    },
    {
      "epoch": 0.000273992919921875,
      "step": 44891,
      "training_step_time": 0.45603084564208984
    },
    {
      "epoch": 0.0002739990234375,
      "model_forward_time": 0.11492395401000977,
      "step": 44892
    },
    {
      "epoch": 0.0002739990234375,
      "step": 44892,
      "training_step_time": 0.48277854919433594
    },
    {
      "epoch": 0.000274005126953125,
      "model_forward_time": 0.11496257781982422,
      "step": 44893
    },
    {
      "epoch": 0.000274005126953125,
      "step": 44893,
      "training_step_time": 0.5079255104064941
    },
    {
      "epoch": 0.00027401123046875,
      "model_forward_time": 0.11497020721435547,
      "step": 44894
    },
    {
      "epoch": 0.00027401123046875,
      "step": 44894,
      "training_step_time": 0.4557638168334961
    },
    {
      "epoch": 0.000274017333984375,
      "model_forward_time": 0.11495542526245117,
      "step": 44895
    },
    {
      "epoch": 0.000274017333984375,
      "step": 44895,
      "training_step_time": 0.4594399929046631
    },
    {
      "epoch": 0.0002740234375,
      "model_forward_time": 0.11513710021972656,
      "step": 44896
    },
    {
      "epoch": 0.0002740234375,
      "step": 44896,
      "training_step_time": 0.4041438102722168
    },
    {
      "epoch": 0.000274029541015625,
      "model_forward_time": 0.11443305015563965,
      "step": 44897
    },
    {
      "epoch": 0.000274029541015625,
      "step": 44897,
      "training_step_time": 0.39124155044555664
    },
    {
      "epoch": 0.00027403564453125,
      "model_forward_time": 0.11505866050720215,
      "step": 44898
    },
    {
      "epoch": 0.00027403564453125,
      "step": 44898,
      "training_step_time": 0.39046645164489746
    },
    {
      "epoch": 0.000274041748046875,
      "model_forward_time": 0.11481380462646484,
      "step": 44899
    },
    {
      "epoch": 0.000274041748046875,
      "step": 44899,
      "training_step_time": 0.3952443599700928
    },
    {
      "epoch": 0.0002740478515625,
      "grad_norm": 0.10527694225311279,
      "learning_rate": 1.633918428831377e-05,
      "loss": 0.0391,
      "step": 44900
    },
    {
      "epoch": 0.0002740478515625,
      "model_forward_time": 0.11492729187011719,
      "step": 44900
    },
    {
      "epoch": 0.0002740478515625,
      "step": 44900,
      "training_step_time": 0.39348888397216797
    },
    {
      "epoch": 0.000274053955078125,
      "model_forward_time": 0.11571002006530762,
      "step": 44901
    },
    {
      "epoch": 0.000274053955078125,
      "step": 44901,
      "training_step_time": 0.39215803146362305
    },
    {
      "epoch": 0.00027406005859375,
      "model_forward_time": 0.11507225036621094,
      "step": 44902
    },
    {
      "epoch": 0.00027406005859375,
      "step": 44902,
      "training_step_time": 0.3959641456604004
    },
    {
      "epoch": 0.000274066162109375,
      "model_forward_time": 0.11532115936279297,
      "step": 44903
    },
    {
      "epoch": 0.000274066162109375,
      "step": 44903,
      "training_step_time": 0.39407920837402344
    },
    {
      "epoch": 0.000274072265625,
      "model_forward_time": 0.11480498313903809,
      "step": 44904
    },
    {
      "epoch": 0.000274072265625,
      "step": 44904,
      "training_step_time": 0.4088714122772217
    },
    {
      "epoch": 0.000274078369140625,
      "model_forward_time": 0.11589956283569336,
      "step": 44905
    },
    {
      "epoch": 0.000274078369140625,
      "step": 44905,
      "training_step_time": 0.41748976707458496
    },
    {
      "epoch": 0.00027408447265625,
      "model_forward_time": 0.11495137214660645,
      "step": 44906
    },
    {
      "epoch": 0.00027408447265625,
      "step": 44906,
      "training_step_time": 0.46488261222839355
    },
    {
      "epoch": 0.000274090576171875,
      "model_forward_time": 0.11535811424255371,
      "step": 44907
    },
    {
      "epoch": 0.000274090576171875,
      "step": 44907,
      "training_step_time": 0.3963639736175537
    },
    {
      "epoch": 0.0002740966796875,
      "model_forward_time": 0.11466503143310547,
      "step": 44908
    },
    {
      "epoch": 0.0002740966796875,
      "step": 44908,
      "training_step_time": 0.39209842681884766
    },
    {
      "epoch": 0.000274102783203125,
      "model_forward_time": 0.11493921279907227,
      "step": 44909
    },
    {
      "epoch": 0.000274102783203125,
      "step": 44909,
      "training_step_time": 0.5049412250518799
    },
    {
      "epoch": 0.00027410888671875,
      "grad_norm": 0.07734417915344238,
      "learning_rate": 1.631881188564275e-05,
      "loss": 0.0344,
      "step": 44910
    },
    {
      "epoch": 0.00027410888671875,
      "model_forward_time": 0.11459064483642578,
      "step": 44910
    },
    {
      "epoch": 0.00027410888671875,
      "step": 44910,
      "training_step_time": 0.3989756107330322
    },
    {
      "epoch": 0.000274114990234375,
      "model_forward_time": 0.11473894119262695,
      "step": 44911
    },
    {
      "epoch": 0.000274114990234375,
      "step": 44911,
      "training_step_time": 0.3844563961029053
    },
    {
      "epoch": 0.00027412109375,
      "model_forward_time": 0.11513900756835938,
      "step": 44912
    },
    {
      "epoch": 0.00027412109375,
      "step": 44912,
      "training_step_time": 0.398853063583374
    },
    {
      "epoch": 0.000274127197265625,
      "model_forward_time": 0.11484146118164062,
      "step": 44913
    },
    {
      "epoch": 0.000274127197265625,
      "step": 44913,
      "training_step_time": 0.39337611198425293
    },
    {
      "epoch": 0.00027413330078125,
      "model_forward_time": 0.1147148609161377,
      "step": 44914
    },
    {
      "epoch": 0.00027413330078125,
      "step": 44914,
      "training_step_time": 0.40213465690612793
    },
    {
      "epoch": 0.000274139404296875,
      "model_forward_time": 0.11582040786743164,
      "step": 44915
    },
    {
      "epoch": 0.000274139404296875,
      "step": 44915,
      "training_step_time": 0.39129042625427246
    },
    {
      "epoch": 0.0002741455078125,
      "model_forward_time": 0.1155538558959961,
      "step": 44916
    },
    {
      "epoch": 0.0002741455078125,
      "step": 44916,
      "training_step_time": 0.39751243591308594
    },
    {
      "epoch": 0.000274151611328125,
      "model_forward_time": 0.11535000801086426,
      "step": 44917
    },
    {
      "epoch": 0.000274151611328125,
      "step": 44917,
      "training_step_time": 0.3990333080291748
    },
    {
      "epoch": 0.00027415771484375,
      "model_forward_time": 0.11560487747192383,
      "step": 44918
    },
    {
      "epoch": 0.00027415771484375,
      "step": 44918,
      "training_step_time": 0.40096497535705566
    },
    {
      "epoch": 0.000274163818359375,
      "model_forward_time": 0.11507391929626465,
      "step": 44919
    },
    {
      "epoch": 0.000274163818359375,
      "step": 44919,
      "training_step_time": 0.4911363124847412
    },
    {
      "epoch": 0.000274169921875,
      "grad_norm": 0.08168430626392365,
      "learning_rate": 1.6298449714427355e-05,
      "loss": 0.037,
      "step": 44920
    },
    {
      "epoch": 0.000274169921875,
      "model_forward_time": 0.11504054069519043,
      "step": 44920
    },
    {
      "epoch": 0.000274169921875,
      "step": 44920,
      "training_step_time": 0.3975226879119873
    },
    {
      "epoch": 0.000274176025390625,
      "model_forward_time": 0.11521601676940918,
      "step": 44921
    },
    {
      "epoch": 0.000274176025390625,
      "step": 44921,
      "training_step_time": 0.44803905487060547
    },
    {
      "epoch": 0.00027418212890625,
      "model_forward_time": 0.11506080627441406,
      "step": 44922
    },
    {
      "epoch": 0.00027418212890625,
      "step": 44922,
      "training_step_time": 0.4548964500427246
    },
    {
      "epoch": 0.000274188232421875,
      "model_forward_time": 0.11556053161621094,
      "step": 44923
    },
    {
      "epoch": 0.000274188232421875,
      "step": 44923,
      "training_step_time": 0.4962136745452881
    },
    {
      "epoch": 0.0002741943359375,
      "model_forward_time": 0.11482429504394531,
      "step": 44924
    },
    {
      "epoch": 0.0002741943359375,
      "step": 44924,
      "training_step_time": 0.4176173210144043
    },
    {
      "epoch": 0.000274200439453125,
      "model_forward_time": 0.1151437759399414,
      "step": 44925
    },
    {
      "epoch": 0.000274200439453125,
      "step": 44925,
      "training_step_time": 0.41539859771728516
    },
    {
      "epoch": 0.00027420654296875,
      "model_forward_time": 0.11522579193115234,
      "step": 44926
    },
    {
      "epoch": 0.00027420654296875,
      "step": 44926,
      "training_step_time": 0.3859105110168457
    },
    {
      "epoch": 0.000274212646484375,
      "model_forward_time": 0.11530613899230957,
      "step": 44927
    },
    {
      "epoch": 0.000274212646484375,
      "step": 44927,
      "training_step_time": 0.39009857177734375
    },
    {
      "epoch": 0.00027421875,
      "model_forward_time": 0.11526346206665039,
      "step": 44928
    },
    {
      "epoch": 0.00027421875,
      "step": 44928,
      "training_step_time": 0.4035017490386963
    },
    {
      "epoch": 0.000274224853515625,
      "model_forward_time": 0.11607623100280762,
      "step": 44929
    },
    {
      "epoch": 0.000274224853515625,
      "step": 44929,
      "training_step_time": 0.39353466033935547
    },
    {
      "epoch": 0.00027423095703125,
      "grad_norm": 0.08687330782413483,
      "learning_rate": 1.6278097780853136e-05,
      "loss": 0.0341,
      "step": 44930
    },
    {
      "epoch": 0.00027423095703125,
      "model_forward_time": 0.11563324928283691,
      "step": 44930
    },
    {
      "epoch": 0.00027423095703125,
      "step": 44930,
      "training_step_time": 0.3932650089263916
    },
    {
      "epoch": 0.000274237060546875,
      "model_forward_time": 0.11467242240905762,
      "step": 44931
    },
    {
      "epoch": 0.000274237060546875,
      "step": 44931,
      "training_step_time": 0.4024183750152588
    },
    {
      "epoch": 0.0002742431640625,
      "model_forward_time": 0.11547088623046875,
      "step": 44932
    },
    {
      "epoch": 0.0002742431640625,
      "step": 44932,
      "training_step_time": 0.39000844955444336
    },
    {
      "epoch": 0.000274249267578125,
      "model_forward_time": 0.1149439811706543,
      "step": 44933
    },
    {
      "epoch": 0.000274249267578125,
      "step": 44933,
      "training_step_time": 0.3939535617828369
    },
    {
      "epoch": 0.00027425537109375,
      "model_forward_time": 0.11559104919433594,
      "step": 44934
    },
    {
      "epoch": 0.00027425537109375,
      "step": 44934,
      "training_step_time": 0.48439884185791016
    },
    {
      "epoch": 0.000274261474609375,
      "model_forward_time": 0.11505985260009766,
      "step": 44935
    },
    {
      "epoch": 0.000274261474609375,
      "step": 44935,
      "training_step_time": 0.42520976066589355
    },
    {
      "epoch": 0.000274267578125,
      "model_forward_time": 0.11498451232910156,
      "step": 44936
    },
    {
      "epoch": 0.000274267578125,
      "step": 44936,
      "training_step_time": 0.4400479793548584
    },
    {
      "epoch": 0.000274273681640625,
      "model_forward_time": 0.11490345001220703,
      "step": 44937
    },
    {
      "epoch": 0.000274273681640625,
      "step": 44937,
      "training_step_time": 0.4052467346191406
    },
    {
      "epoch": 0.00027427978515625,
      "model_forward_time": 0.1150660514831543,
      "step": 44938
    },
    {
      "epoch": 0.00027427978515625,
      "step": 44938,
      "training_step_time": 0.4904754161834717
    },
    {
      "epoch": 0.000274285888671875,
      "model_forward_time": 0.11503243446350098,
      "step": 44939
    },
    {
      "epoch": 0.000274285888671875,
      "step": 44939,
      "training_step_time": 0.39875221252441406
    },
    {
      "epoch": 0.0002742919921875,
      "grad_norm": 0.07768527418375015,
      "learning_rate": 1.62577560911024e-05,
      "loss": 0.0361,
      "step": 44940
    },
    {
      "epoch": 0.0002742919921875,
      "model_forward_time": 0.11491990089416504,
      "step": 44940
    },
    {
      "epoch": 0.0002742919921875,
      "step": 44940,
      "training_step_time": 0.4641749858856201
    },
    {
      "epoch": 0.000274298095703125,
      "model_forward_time": 0.11484694480895996,
      "step": 44941
    },
    {
      "epoch": 0.000274298095703125,
      "step": 44941,
      "training_step_time": 0.4012486934661865
    },
    {
      "epoch": 0.00027430419921875,
      "model_forward_time": 0.11538243293762207,
      "step": 44942
    },
    {
      "epoch": 0.00027430419921875,
      "step": 44942,
      "training_step_time": 0.3920009136199951
    },
    {
      "epoch": 0.000274310302734375,
      "model_forward_time": 0.11559081077575684,
      "step": 44943
    },
    {
      "epoch": 0.000274310302734375,
      "step": 44943,
      "training_step_time": 0.395139217376709
    },
    {
      "epoch": 0.00027431640625,
      "model_forward_time": 0.11508488655090332,
      "step": 44944
    },
    {
      "epoch": 0.00027431640625,
      "step": 44944,
      "training_step_time": 0.3949708938598633
    },
    {
      "epoch": 0.000274322509765625,
      "model_forward_time": 0.11505842208862305,
      "step": 44945
    },
    {
      "epoch": 0.000274322509765625,
      "step": 44945,
      "training_step_time": 0.39826011657714844
    },
    {
      "epoch": 0.00027432861328125,
      "model_forward_time": 0.11496591567993164,
      "step": 44946
    },
    {
      "epoch": 0.00027432861328125,
      "step": 44946,
      "training_step_time": 0.39479780197143555
    },
    {
      "epoch": 0.000274334716796875,
      "model_forward_time": 0.11534571647644043,
      "step": 44947
    },
    {
      "epoch": 0.000274334716796875,
      "step": 44947,
      "training_step_time": 0.3953819274902344
    },
    {
      "epoch": 0.0002743408203125,
      "model_forward_time": 0.11498665809631348,
      "step": 44948
    },
    {
      "epoch": 0.0002743408203125,
      "step": 44948,
      "training_step_time": 0.39980292320251465
    },
    {
      "epoch": 0.000274346923828125,
      "model_forward_time": 0.11569714546203613,
      "step": 44949
    },
    {
      "epoch": 0.000274346923828125,
      "step": 44949,
      "training_step_time": 0.4812612533569336
    },
    {
      "epoch": 0.00027435302734375,
      "grad_norm": 0.08850108087062836,
      "learning_rate": 1.623742465135449e-05,
      "loss": 0.0339,
      "step": 44950
    },
    {
      "epoch": 0.00027435302734375,
      "model_forward_time": 0.11470198631286621,
      "step": 44950
    },
    {
      "epoch": 0.00027435302734375,
      "step": 44950,
      "training_step_time": 0.4461829662322998
    },
    {
      "epoch": 0.000274359130859375,
      "model_forward_time": 0.1149289608001709,
      "step": 44951
    },
    {
      "epoch": 0.000274359130859375,
      "step": 44951,
      "training_step_time": 0.4192967414855957
    },
    {
      "epoch": 0.000274365234375,
      "model_forward_time": 0.11513233184814453,
      "step": 44952
    },
    {
      "epoch": 0.000274365234375,
      "step": 44952,
      "training_step_time": 0.4069802761077881
    },
    {
      "epoch": 0.000274371337890625,
      "model_forward_time": 0.1154470443725586,
      "step": 44953
    },
    {
      "epoch": 0.000274371337890625,
      "step": 44953,
      "training_step_time": 0.410433292388916
    },
    {
      "epoch": 0.00027437744140625,
      "model_forward_time": 0.1145181655883789,
      "step": 44954
    },
    {
      "epoch": 0.00027437744140625,
      "step": 44954,
      "training_step_time": 0.4101862907409668
    },
    {
      "epoch": 0.000274383544921875,
      "model_forward_time": 0.11554503440856934,
      "step": 44955
    },
    {
      "epoch": 0.000274383544921875,
      "step": 44955,
      "training_step_time": 0.38815832138061523
    },
    {
      "epoch": 0.0002743896484375,
      "model_forward_time": 0.11554384231567383,
      "step": 44956
    },
    {
      "epoch": 0.0002743896484375,
      "step": 44956,
      "training_step_time": 0.3882296085357666
    },
    {
      "epoch": 0.000274395751953125,
      "model_forward_time": 0.11516213417053223,
      "step": 44957
    },
    {
      "epoch": 0.000274395751953125,
      "step": 44957,
      "training_step_time": 0.40644001960754395
    },
    {
      "epoch": 0.00027440185546875,
      "model_forward_time": 0.11496758460998535,
      "step": 44958
    },
    {
      "epoch": 0.00027440185546875,
      "step": 44958,
      "training_step_time": 0.3922750949859619
    },
    {
      "epoch": 0.000274407958984375,
      "model_forward_time": 0.1154940128326416,
      "step": 44959
    },
    {
      "epoch": 0.000274407958984375,
      "step": 44959,
      "training_step_time": 0.39275288581848145
    },
    {
      "epoch": 0.0002744140625,
      "grad_norm": 0.12161029875278473,
      "learning_rate": 1.6217103467785484e-05,
      "loss": 0.0336,
      "step": 44960
    },
    {
      "epoch": 0.0002744140625,
      "model_forward_time": 0.11575722694396973,
      "step": 44960
    },
    {
      "epoch": 0.0002744140625,
      "step": 44960,
      "training_step_time": 0.39566731452941895
    },
    {
      "epoch": 0.000274420166015625,
      "model_forward_time": 0.11587858200073242,
      "step": 44961
    },
    {
      "epoch": 0.000274420166015625,
      "step": 44961,
      "training_step_time": 0.4010181427001953
    },
    {
      "epoch": 0.00027442626953125,
      "model_forward_time": 0.11512637138366699,
      "step": 44962
    },
    {
      "epoch": 0.00027442626953125,
      "step": 44962,
      "training_step_time": 0.40229249000549316
    },
    {
      "epoch": 0.000274432373046875,
      "model_forward_time": 0.11578941345214844,
      "step": 44963
    },
    {
      "epoch": 0.000274432373046875,
      "step": 44963,
      "training_step_time": 0.4170236587524414
    },
    {
      "epoch": 0.0002744384765625,
      "model_forward_time": 0.11505579948425293,
      "step": 44964
    },
    {
      "epoch": 0.0002744384765625,
      "step": 44964,
      "training_step_time": 0.4498298168182373
    },
    {
      "epoch": 0.000274444580078125,
      "model_forward_time": 0.11492085456848145,
      "step": 44965
    },
    {
      "epoch": 0.000274444580078125,
      "step": 44965,
      "training_step_time": 0.41856837272644043
    },
    {
      "epoch": 0.00027445068359375,
      "model_forward_time": 0.11515164375305176,
      "step": 44966
    },
    {
      "epoch": 0.00027445068359375,
      "step": 44966,
      "training_step_time": 0.43924427032470703
    },
    {
      "epoch": 0.000274456787109375,
      "model_forward_time": 0.11519026756286621,
      "step": 44967
    },
    {
      "epoch": 0.000274456787109375,
      "step": 44967,
      "training_step_time": 0.44663166999816895
    },
    {
      "epoch": 0.000274462890625,
      "model_forward_time": 0.1147468090057373,
      "step": 44968
    },
    {
      "epoch": 0.000274462890625,
      "step": 44968,
      "training_step_time": 0.4882469177246094
    },
    {
      "epoch": 0.000274468994140625,
      "model_forward_time": 0.11488008499145508,
      "step": 44969
    },
    {
      "epoch": 0.000274468994140625,
      "step": 44969,
      "training_step_time": 0.4227261543273926
    },
    {
      "epoch": 0.00027447509765625,
      "grad_norm": 0.10837066918611526,
      "learning_rate": 1.6196792546568472e-05,
      "loss": 0.0326,
      "step": 44970
    },
    {
      "epoch": 0.00027447509765625,
      "model_forward_time": 0.11453771591186523,
      "step": 44970
    },
    {
      "epoch": 0.00027447509765625,
      "step": 44970,
      "training_step_time": 0.39131879806518555
    },
    {
      "epoch": 0.000274481201171875,
      "model_forward_time": 0.11523008346557617,
      "step": 44971
    },
    {
      "epoch": 0.000274481201171875,
      "step": 44971,
      "training_step_time": 0.4003312587738037
    },
    {
      "epoch": 0.0002744873046875,
      "model_forward_time": 0.11529278755187988,
      "step": 44972
    },
    {
      "epoch": 0.0002744873046875,
      "step": 44972,
      "training_step_time": 0.39362263679504395
    },
    {
      "epoch": 0.000274493408203125,
      "model_forward_time": 0.11437463760375977,
      "step": 44973
    },
    {
      "epoch": 0.000274493408203125,
      "step": 44973,
      "training_step_time": 0.382354736328125
    },
    {
      "epoch": 0.00027449951171875,
      "model_forward_time": 0.1146547794342041,
      "step": 44974
    },
    {
      "epoch": 0.00027449951171875,
      "step": 44974,
      "training_step_time": 0.40435171127319336
    },
    {
      "epoch": 0.000274505615234375,
      "model_forward_time": 0.11531734466552734,
      "step": 44975
    },
    {
      "epoch": 0.000274505615234375,
      "step": 44975,
      "training_step_time": 0.3873417377471924
    },
    {
      "epoch": 0.00027451171875,
      "model_forward_time": 0.1155240535736084,
      "step": 44976
    },
    {
      "epoch": 0.00027451171875,
      "step": 44976,
      "training_step_time": 0.4198434352874756
    },
    {
      "epoch": 0.000274517822265625,
      "model_forward_time": 0.11508011817932129,
      "step": 44977
    },
    {
      "epoch": 0.000274517822265625,
      "step": 44977,
      "training_step_time": 0.4104030132293701
    },
    {
      "epoch": 0.00027452392578125,
      "model_forward_time": 0.11661696434020996,
      "step": 44978
    },
    {
      "epoch": 0.00027452392578125,
      "step": 44978,
      "training_step_time": 0.3957827091217041
    },
    {
      "epoch": 0.000274530029296875,
      "model_forward_time": 0.11528921127319336,
      "step": 44979
    },
    {
      "epoch": 0.000274530029296875,
      "step": 44979,
      "training_step_time": 0.39896726608276367
    },
    {
      "epoch": 0.0002745361328125,
      "grad_norm": 0.13494417071342468,
      "learning_rate": 1.617649189387337e-05,
      "loss": 0.0317,
      "step": 44980
    },
    {
      "epoch": 0.0002745361328125,
      "model_forward_time": 0.11495757102966309,
      "step": 44980
    },
    {
      "epoch": 0.0002745361328125,
      "step": 44980,
      "training_step_time": 0.49186182022094727
    },
    {
      "epoch": 0.000274542236328125,
      "model_forward_time": 0.11542868614196777,
      "step": 44981
    },
    {
      "epoch": 0.000274542236328125,
      "step": 44981,
      "training_step_time": 0.4470839500427246
    },
    {
      "epoch": 0.00027454833984375,
      "model_forward_time": 0.11515522003173828,
      "step": 44982
    },
    {
      "epoch": 0.00027454833984375,
      "step": 44982,
      "training_step_time": 0.4929063320159912
    },
    {
      "epoch": 0.000274554443359375,
      "model_forward_time": 0.11578989028930664,
      "step": 44983
    },
    {
      "epoch": 0.000274554443359375,
      "step": 44983,
      "training_step_time": 0.5042333602905273
    },
    {
      "epoch": 0.000274560546875,
      "model_forward_time": 0.1141960620880127,
      "step": 44984
    },
    {
      "epoch": 0.000274560546875,
      "step": 44984,
      "training_step_time": 0.4322068691253662
    },
    {
      "epoch": 0.000274566650390625,
      "model_forward_time": 0.11438465118408203,
      "step": 44985
    },
    {
      "epoch": 0.000274566650390625,
      "step": 44985,
      "training_step_time": 0.387498140335083
    },
    {
      "epoch": 0.00027457275390625,
      "model_forward_time": 0.11543464660644531,
      "step": 44986
    },
    {
      "epoch": 0.00027457275390625,
      "step": 44986,
      "training_step_time": 0.39850378036499023
    },
    {
      "epoch": 0.000274578857421875,
      "model_forward_time": 0.11489081382751465,
      "step": 44987
    },
    {
      "epoch": 0.000274578857421875,
      "step": 44987,
      "training_step_time": 0.3957810401916504
    },
    {
      "epoch": 0.0002745849609375,
      "model_forward_time": 0.11575436592102051,
      "step": 44988
    },
    {
      "epoch": 0.0002745849609375,
      "step": 44988,
      "training_step_time": 0.3960425853729248
    },
    {
      "epoch": 0.000274591064453125,
      "model_forward_time": 0.12314200401306152,
      "step": 44989
    },
    {
      "epoch": 0.000274591064453125,
      "step": 44989,
      "training_step_time": 0.47647714614868164
    },
    {
      "epoch": 0.00027459716796875,
      "grad_norm": 0.08952059596776962,
      "learning_rate": 1.615620151586697e-05,
      "loss": 0.0349,
      "step": 44990
    },
    {
      "epoch": 0.00027459716796875,
      "model_forward_time": 0.11983847618103027,
      "step": 44990
    },
    {
      "epoch": 0.00027459716796875,
      "step": 44990,
      "training_step_time": 0.554898738861084
    },
    {
      "epoch": 0.000274603271484375,
      "model_forward_time": 0.11760258674621582,
      "step": 44991
    },
    {
      "epoch": 0.000274603271484375,
      "step": 44991,
      "training_step_time": 0.6221940517425537
    },
    {
      "epoch": 0.000274609375,
      "model_forward_time": 0.11902809143066406,
      "step": 44992
    },
    {
      "epoch": 0.000274609375,
      "step": 44992,
      "training_step_time": 0.7018101215362549
    },
    {
      "epoch": 0.000274615478515625,
      "model_forward_time": 0.12235236167907715,
      "step": 44993
    },
    {
      "epoch": 0.000274615478515625,
      "step": 44993,
      "training_step_time": 0.7673351764678955
    },
    {
      "epoch": 0.00027462158203125,
      "model_forward_time": 0.11760616302490234,
      "step": 44994
    },
    {
      "epoch": 0.00027462158203125,
      "step": 44994,
      "training_step_time": 0.6982977390289307
    },
    {
      "epoch": 0.000274627685546875,
      "model_forward_time": 0.11989378929138184,
      "step": 44995
    },
    {
      "epoch": 0.000274627685546875,
      "step": 44995,
      "training_step_time": 0.6773903369903564
    },
    {
      "epoch": 0.0002746337890625,
      "model_forward_time": 0.1302781105041504,
      "step": 44996
    },
    {
      "epoch": 0.0002746337890625,
      "step": 44996,
      "training_step_time": 0.6394612789154053
    },
    {
      "epoch": 0.000274639892578125,
      "model_forward_time": 0.11740851402282715,
      "step": 44997
    },
    {
      "epoch": 0.000274639892578125,
      "step": 44997,
      "training_step_time": 0.6569464206695557
    },
    {
      "epoch": 0.00027464599609375,
      "model_forward_time": 0.11850118637084961,
      "step": 44998
    },
    {
      "epoch": 0.00027464599609375,
      "step": 44998,
      "training_step_time": 0.6361589431762695
    },
    {
      "epoch": 0.000274652099609375,
      "model_forward_time": 0.11627817153930664,
      "step": 44999
    },
    {
      "epoch": 0.000274652099609375,
      "step": 44999,
      "training_step_time": 0.7456257343292236
    },
    {
      "epoch": 0.000274658203125,
      "grad_norm": 0.10006517171859741,
      "learning_rate": 1.6135921418712956e-05,
      "loss": 0.0369,
      "step": 45000
    },
    {
      "epoch": 0.000274658203125,
      "model_forward_time": 0.11660933494567871,
      "step": 45000
    },
    {
      "epoch": 0.000274658203125,
      "step": 45000,
      "training_step_time": 0.4478321075439453
    },
    {
      "epoch": 0.000274664306640625,
      "model_forward_time": 0.11665678024291992,
      "step": 45001
    },
    {
      "epoch": 0.000274664306640625,
      "step": 45001,
      "training_step_time": 0.6466529369354248
    },
    {
      "epoch": 0.00027467041015625,
      "model_forward_time": 0.1174933910369873,
      "step": 45002
    },
    {
      "epoch": 0.00027467041015625,
      "step": 45002,
      "training_step_time": 0.659186601638794
    },
    {
      "epoch": 0.000274676513671875,
      "model_forward_time": 0.12169241905212402,
      "step": 45003
    },
    {
      "epoch": 0.000274676513671875,
      "step": 45003,
      "training_step_time": 0.6792099475860596
    },
    {
      "epoch": 0.0002746826171875,
      "model_forward_time": 0.11775612831115723,
      "step": 45004
    },
    {
      "epoch": 0.0002746826171875,
      "step": 45004,
      "training_step_time": 0.734438419342041
    },
    {
      "epoch": 0.000274688720703125,
      "model_forward_time": 0.1301710605621338,
      "step": 45005
    },
    {
      "epoch": 0.000274688720703125,
      "step": 45005,
      "training_step_time": 0.6347494125366211
    },
    {
      "epoch": 0.00027469482421875,
      "model_forward_time": 0.1195363998413086,
      "step": 45006
    },
    {
      "epoch": 0.00027469482421875,
      "step": 45006,
      "training_step_time": 0.6783909797668457
    },
    {
      "epoch": 0.000274700927734375,
      "model_forward_time": 0.11967587471008301,
      "step": 45007
    },
    {
      "epoch": 0.000274700927734375,
      "step": 45007,
      "training_step_time": 0.6740050315856934
    },
    {
      "epoch": 0.00027470703125,
      "model_forward_time": 0.11909008026123047,
      "step": 45008
    },
    {
      "epoch": 0.00027470703125,
      "step": 45008,
      "training_step_time": 0.6481485366821289
    },
    {
      "epoch": 0.000274713134765625,
      "model_forward_time": 0.11807107925415039,
      "step": 45009
    },
    {
      "epoch": 0.000274713134765625,
      "step": 45009,
      "training_step_time": 0.6707069873809814
    },
    {
      "epoch": 0.00027471923828125,
      "grad_norm": 0.10897723585367203,
      "learning_rate": 1.6115651608571887e-05,
      "loss": 0.0391,
      "step": 45010
    },
    {
      "epoch": 0.00027471923828125,
      "model_forward_time": 0.11747145652770996,
      "step": 45010
    },
    {
      "epoch": 0.00027471923828125,
      "step": 45010,
      "training_step_time": 0.7268517017364502
    },
    {
      "epoch": 0.000274725341796875,
      "model_forward_time": 0.11906576156616211,
      "step": 45011
    },
    {
      "epoch": 0.000274725341796875,
      "step": 45011,
      "training_step_time": 0.7457752227783203
    },
    {
      "epoch": 0.0002747314453125,
      "model_forward_time": 0.12141895294189453,
      "step": 45012
    },
    {
      "epoch": 0.0002747314453125,
      "step": 45012,
      "training_step_time": 0.7309386730194092
    },
    {
      "epoch": 0.000274737548828125,
      "model_forward_time": 0.11975955963134766,
      "step": 45013
    },
    {
      "epoch": 0.000274737548828125,
      "step": 45013,
      "training_step_time": 0.7532436847686768
    },
    {
      "epoch": 0.00027474365234375,
      "model_forward_time": 0.1324753761291504,
      "step": 45014
    },
    {
      "epoch": 0.00027474365234375,
      "step": 45014,
      "training_step_time": 0.6628060340881348
    },
    {
      "epoch": 0.000274749755859375,
      "model_forward_time": 0.1289806365966797,
      "step": 45015
    },
    {
      "epoch": 0.000274749755859375,
      "step": 45015,
      "training_step_time": 0.646784782409668
    },
    {
      "epoch": 0.000274755859375,
      "model_forward_time": 0.11846160888671875,
      "step": 45016
    },
    {
      "epoch": 0.000274755859375,
      "step": 45016,
      "training_step_time": 0.6490938663482666
    },
    {
      "epoch": 0.000274761962890625,
      "model_forward_time": 0.11917853355407715,
      "step": 45017
    },
    {
      "epoch": 0.000274761962890625,
      "step": 45017,
      "training_step_time": 0.6395056247711182
    },
    {
      "epoch": 0.00027476806640625,
      "model_forward_time": 0.11605381965637207,
      "step": 45018
    },
    {
      "epoch": 0.00027476806640625,
      "step": 45018,
      "training_step_time": 0.6569938659667969
    },
    {
      "epoch": 0.000274774169921875,
      "model_forward_time": 0.11774635314941406,
      "step": 45019
    },
    {
      "epoch": 0.000274774169921875,
      "step": 45019,
      "training_step_time": 0.6412169933319092
    },
    {
      "epoch": 0.0002747802734375,
      "grad_norm": 0.10370822995901108,
      "learning_rate": 1.6095392091601175e-05,
      "loss": 0.0425,
      "step": 45020
    },
    {
      "epoch": 0.0002747802734375,
      "model_forward_time": 0.1217656135559082,
      "step": 45020
    },
    {
      "epoch": 0.0002747802734375,
      "step": 45020,
      "training_step_time": 0.6862587928771973
    },
    {
      "epoch": 0.000274786376953125,
      "model_forward_time": 0.12202334403991699,
      "step": 45021
    },
    {
      "epoch": 0.000274786376953125,
      "step": 45021,
      "training_step_time": 0.7023851871490479
    },
    {
      "epoch": 0.00027479248046875,
      "model_forward_time": 0.11742734909057617,
      "step": 45022
    },
    {
      "epoch": 0.00027479248046875,
      "step": 45022,
      "training_step_time": 0.666346549987793
    },
    {
      "epoch": 0.000274798583984375,
      "model_forward_time": 0.11792826652526855,
      "step": 45023
    },
    {
      "epoch": 0.000274798583984375,
      "step": 45023,
      "training_step_time": 0.675156831741333
    },
    {
      "epoch": 0.0002748046875,
      "model_forward_time": 0.11838197708129883,
      "step": 45024
    },
    {
      "epoch": 0.0002748046875,
      "step": 45024,
      "training_step_time": 0.6634507179260254
    },
    {
      "epoch": 0.000274810791015625,
      "model_forward_time": 0.11804676055908203,
      "step": 45025
    },
    {
      "epoch": 0.000274810791015625,
      "step": 45025,
      "training_step_time": 0.6600775718688965
    },
    {
      "epoch": 0.00027481689453125,
      "model_forward_time": 0.12245345115661621,
      "step": 45026
    },
    {
      "epoch": 0.00027481689453125,
      "step": 45026,
      "training_step_time": 0.6111164093017578
    },
    {
      "epoch": 0.000274822998046875,
      "model_forward_time": 0.1341092586517334,
      "step": 45027
    },
    {
      "epoch": 0.000274822998046875,
      "step": 45027,
      "training_step_time": 0.6883947849273682
    },
    {
      "epoch": 0.0002748291015625,
      "model_forward_time": 0.11867833137512207,
      "step": 45028
    },
    {
      "epoch": 0.0002748291015625,
      "step": 45028,
      "training_step_time": 0.620361328125
    },
    {
      "epoch": 0.000274835205078125,
      "model_forward_time": 0.11749672889709473,
      "step": 45029
    },
    {
      "epoch": 0.000274835205078125,
      "step": 45029,
      "training_step_time": 0.68825364112854
    },
    {
      "epoch": 0.00027484130859375,
      "grad_norm": 0.07902612537145615,
      "learning_rate": 1.6075142873955164e-05,
      "loss": 0.0415,
      "step": 45030
    },
    {
      "epoch": 0.00027484130859375,
      "model_forward_time": 0.1199643611907959,
      "step": 45030
    },
    {
      "epoch": 0.00027484130859375,
      "step": 45030,
      "training_step_time": 0.7786405086517334
    },
    {
      "epoch": 0.000274847412109375,
      "model_forward_time": 0.11599087715148926,
      "step": 45031
    },
    {
      "epoch": 0.000274847412109375,
      "step": 45031,
      "training_step_time": 0.7363979816436768
    },
    {
      "epoch": 0.000274853515625,
      "model_forward_time": 0.12073230743408203,
      "step": 45032
    },
    {
      "epoch": 0.000274853515625,
      "step": 45032,
      "training_step_time": 0.717010498046875
    },
    {
      "epoch": 0.000274859619140625,
      "model_forward_time": 0.11937761306762695,
      "step": 45033
    },
    {
      "epoch": 0.000274859619140625,
      "step": 45033,
      "training_step_time": 0.6340091228485107
    },
    {
      "epoch": 0.00027486572265625,
      "model_forward_time": 0.12130141258239746,
      "step": 45034
    },
    {
      "epoch": 0.00027486572265625,
      "step": 45034,
      "training_step_time": 0.694586992263794
    },
    {
      "epoch": 0.000274871826171875,
      "model_forward_time": 0.12041020393371582,
      "step": 45035
    },
    {
      "epoch": 0.000274871826171875,
      "step": 45035,
      "training_step_time": 0.6360876560211182
    },
    {
      "epoch": 0.0002748779296875,
      "model_forward_time": 0.1187894344329834,
      "step": 45036
    },
    {
      "epoch": 0.0002748779296875,
      "step": 45036,
      "training_step_time": 0.6093878746032715
    },
    {
      "epoch": 0.000274884033203125,
      "model_forward_time": 0.11879372596740723,
      "step": 45037
    },
    {
      "epoch": 0.000274884033203125,
      "step": 45037,
      "training_step_time": 0.6938493251800537
    },
    {
      "epoch": 0.00027489013671875,
      "model_forward_time": 0.11953902244567871,
      "step": 45038
    },
    {
      "epoch": 0.00027489013671875,
      "step": 45038,
      "training_step_time": 0.6747219562530518
    },
    {
      "epoch": 0.000274896240234375,
      "model_forward_time": 0.12264847755432129,
      "step": 45039
    },
    {
      "epoch": 0.000274896240234375,
      "step": 45039,
      "training_step_time": 0.602107048034668
    },
    {
      "epoch": 0.00027490234375,
      "grad_norm": 0.1181241124868393,
      "learning_rate": 1.6054903961785013e-05,
      "loss": 0.0434,
      "step": 45040
    },
    {
      "epoch": 0.00027490234375,
      "model_forward_time": 0.11931562423706055,
      "step": 45040
    },
    {
      "epoch": 0.00027490234375,
      "step": 45040,
      "training_step_time": 0.7108094692230225
    },
    {
      "epoch": 0.000274908447265625,
      "model_forward_time": 0.13116073608398438,
      "step": 45041
    },
    {
      "epoch": 0.000274908447265625,
      "step": 45041,
      "training_step_time": 0.6988875865936279
    },
    {
      "epoch": 0.00027491455078125,
      "model_forward_time": 0.12218070030212402,
      "step": 45042
    },
    {
      "epoch": 0.00027491455078125,
      "step": 45042,
      "training_step_time": 0.6908934116363525
    },
    {
      "epoch": 0.000274920654296875,
      "model_forward_time": 0.12145256996154785,
      "step": 45043
    },
    {
      "epoch": 0.000274920654296875,
      "step": 45043,
      "training_step_time": 0.5320689678192139
    },
    {
      "epoch": 0.0002749267578125,
      "model_forward_time": 0.12280535697937012,
      "step": 45044
    },
    {
      "epoch": 0.0002749267578125,
      "step": 45044,
      "training_step_time": 0.5954313278198242
    },
    {
      "epoch": 0.000274932861328125,
      "model_forward_time": 0.119781494140625,
      "step": 45045
    },
    {
      "epoch": 0.000274932861328125,
      "step": 45045,
      "training_step_time": 0.6363339424133301
    },
    {
      "epoch": 0.00027493896484375,
      "model_forward_time": 0.11717557907104492,
      "step": 45046
    },
    {
      "epoch": 0.00027493896484375,
      "step": 45046,
      "training_step_time": 0.48986220359802246
    },
    {
      "epoch": 0.000274945068359375,
      "model_forward_time": 0.11993694305419922,
      "step": 45047
    },
    {
      "epoch": 0.000274945068359375,
      "step": 45047,
      "training_step_time": 0.45102405548095703
    },
    {
      "epoch": 0.000274951171875,
      "model_forward_time": 0.11691021919250488,
      "step": 45048
    },
    {
      "epoch": 0.000274951171875,
      "step": 45048,
      "training_step_time": 0.4318077564239502
    },
    {
      "epoch": 0.000274957275390625,
      "model_forward_time": 0.11751842498779297,
      "step": 45049
    },
    {
      "epoch": 0.000274957275390625,
      "step": 45049,
      "training_step_time": 0.5231678485870361
    },
    {
      "epoch": 0.00027496337890625,
      "grad_norm": 0.09396672248840332,
      "learning_rate": 1.603467536123877e-05,
      "loss": 0.0383,
      "step": 45050
    },
    {
      "epoch": 0.00027496337890625,
      "model_forward_time": 0.11712360382080078,
      "step": 45050
    },
    {
      "epoch": 0.00027496337890625,
      "step": 45050,
      "training_step_time": 0.43833470344543457
    },
    {
      "epoch": 0.000274969482421875,
      "model_forward_time": 0.11626482009887695,
      "step": 45051
    },
    {
      "epoch": 0.000274969482421875,
      "step": 45051,
      "training_step_time": 0.5426836013793945
    },
    {
      "epoch": 0.0002749755859375,
      "model_forward_time": 0.11572480201721191,
      "step": 45052
    },
    {
      "epoch": 0.0002749755859375,
      "step": 45052,
      "training_step_time": 0.44179439544677734
    },
    {
      "epoch": 0.000274981689453125,
      "model_forward_time": 0.11566710472106934,
      "step": 45053
    },
    {
      "epoch": 0.000274981689453125,
      "step": 45053,
      "training_step_time": 0.4823160171508789
    },
    {
      "epoch": 0.00027498779296875,
      "model_forward_time": 0.11556744575500488,
      "step": 45054
    },
    {
      "epoch": 0.00027498779296875,
      "step": 45054,
      "training_step_time": 0.5114829540252686
    },
    {
      "epoch": 0.000274993896484375,
      "model_forward_time": 0.11475920677185059,
      "step": 45055
    },
    {
      "epoch": 0.000274993896484375,
      "step": 45055,
      "training_step_time": 0.3807942867279053
    },
    {
      "epoch": 0.000275,
      "model_forward_time": 0.11567020416259766,
      "step": 45056
    },
    {
      "epoch": 0.000275,
      "step": 45056,
      "training_step_time": 0.39971137046813965
    },
    {
      "epoch": 0.000275006103515625,
      "model_forward_time": 0.11423015594482422,
      "step": 45057
    },
    {
      "epoch": 0.000275006103515625,
      "step": 45057,
      "training_step_time": 0.39239978790283203
    },
    {
      "epoch": 0.00027501220703125,
      "model_forward_time": 0.11508035659790039,
      "step": 45058
    },
    {
      "epoch": 0.00027501220703125,
      "step": 45058,
      "training_step_time": 0.3800194263458252
    },
    {
      "epoch": 0.000275018310546875,
      "model_forward_time": 0.11513280868530273,
      "step": 45059
    },
    {
      "epoch": 0.000275018310546875,
      "step": 45059,
      "training_step_time": 0.39052367210388184
    },
    {
      "epoch": 0.0002750244140625,
      "grad_norm": 0.1008220762014389,
      "learning_rate": 1.6014457078461353e-05,
      "loss": 0.0368,
      "step": 45060
    },
    {
      "epoch": 0.0002750244140625,
      "model_forward_time": 0.11638069152832031,
      "step": 45060
    },
    {
      "epoch": 0.0002750244140625,
      "step": 45060,
      "training_step_time": 0.40305280685424805
    },
    {
      "epoch": 0.000275030517578125,
      "model_forward_time": 0.11535525321960449,
      "step": 45061
    },
    {
      "epoch": 0.000275030517578125,
      "step": 45061,
      "training_step_time": 0.37875843048095703
    },
    {
      "epoch": 0.00027503662109375,
      "model_forward_time": 0.11567139625549316,
      "step": 45062
    },
    {
      "epoch": 0.00027503662109375,
      "step": 45062,
      "training_step_time": 0.39769840240478516
    },
    {
      "epoch": 0.000275042724609375,
      "model_forward_time": 0.11559772491455078,
      "step": 45063
    },
    {
      "epoch": 0.000275042724609375,
      "step": 45063,
      "training_step_time": 0.3929455280303955
    },
    {
      "epoch": 0.000275048828125,
      "model_forward_time": 0.11547446250915527,
      "step": 45064
    },
    {
      "epoch": 0.000275048828125,
      "step": 45064,
      "training_step_time": 0.43221163749694824
    },
    {
      "epoch": 0.000275054931640625,
      "model_forward_time": 0.11475229263305664,
      "step": 45065
    },
    {
      "epoch": 0.000275054931640625,
      "step": 45065,
      "training_step_time": 0.4804661273956299
    },
    {
      "epoch": 0.00027506103515625,
      "model_forward_time": 0.1146245002746582,
      "step": 45066
    },
    {
      "epoch": 0.00027506103515625,
      "step": 45066,
      "training_step_time": 0.40759992599487305
    },
    {
      "epoch": 0.000275067138671875,
      "model_forward_time": 0.11505126953125,
      "step": 45067
    },
    {
      "epoch": 0.000275067138671875,
      "step": 45067,
      "training_step_time": 0.3855316638946533
    },
    {
      "epoch": 0.0002750732421875,
      "model_forward_time": 0.11707615852355957,
      "step": 45068
    },
    {
      "epoch": 0.0002750732421875,
      "step": 45068,
      "training_step_time": 0.44425344467163086
    },
    {
      "epoch": 0.000275079345703125,
      "model_forward_time": 0.11490440368652344,
      "step": 45069
    },
    {
      "epoch": 0.000275079345703125,
      "step": 45069,
      "training_step_time": 0.42119717597961426
    },
    {
      "epoch": 0.00027508544921875,
      "grad_norm": 0.1154860332608223,
      "learning_rate": 1.5994249119594517e-05,
      "loss": 0.0448,
      "step": 45070
    },
    {
      "epoch": 0.00027508544921875,
      "model_forward_time": 0.11506986618041992,
      "step": 45070
    },
    {
      "epoch": 0.00027508544921875,
      "step": 45070,
      "training_step_time": 0.40706753730773926
    },
    {
      "epoch": 0.000275091552734375,
      "model_forward_time": 0.11566710472106934,
      "step": 45071
    },
    {
      "epoch": 0.000275091552734375,
      "step": 45071,
      "training_step_time": 0.3869459629058838
    },
    {
      "epoch": 0.00027509765625,
      "model_forward_time": 0.1151878833770752,
      "step": 45072
    },
    {
      "epoch": 0.00027509765625,
      "step": 45072,
      "training_step_time": 0.4060094356536865
    },
    {
      "epoch": 0.000275103759765625,
      "model_forward_time": 0.11465144157409668,
      "step": 45073
    },
    {
      "epoch": 0.000275103759765625,
      "step": 45073,
      "training_step_time": 0.40822792053222656
    },
    {
      "epoch": 0.00027510986328125,
      "model_forward_time": 0.1145482063293457,
      "step": 45074
    },
    {
      "epoch": 0.00027510986328125,
      "step": 45074,
      "training_step_time": 0.39325690269470215
    },
    {
      "epoch": 0.000275115966796875,
      "model_forward_time": 0.11692142486572266,
      "step": 45075
    },
    {
      "epoch": 0.000275115966796875,
      "step": 45075,
      "training_step_time": 0.38210272789001465
    },
    {
      "epoch": 0.0002751220703125,
      "model_forward_time": 0.11577343940734863,
      "step": 45076
    },
    {
      "epoch": 0.0002751220703125,
      "step": 45076,
      "training_step_time": 0.39293670654296875
    },
    {
      "epoch": 0.000275128173828125,
      "model_forward_time": 0.1153264045715332,
      "step": 45077
    },
    {
      "epoch": 0.000275128173828125,
      "step": 45077,
      "training_step_time": 0.3863983154296875
    },
    {
      "epoch": 0.00027513427734375,
      "model_forward_time": 0.11558938026428223,
      "step": 45078
    },
    {
      "epoch": 0.00027513427734375,
      "step": 45078,
      "training_step_time": 0.3897883892059326
    },
    {
      "epoch": 0.000275140380859375,
      "model_forward_time": 0.11553502082824707,
      "step": 45079
    },
    {
      "epoch": 0.000275140380859375,
      "step": 45079,
      "training_step_time": 0.4561152458190918
    },
    {
      "epoch": 0.000275146484375,
      "grad_norm": 0.11024130880832672,
      "learning_rate": 1.597405149077697e-05,
      "loss": 0.0378,
      "step": 45080
    },
    {
      "epoch": 0.000275146484375,
      "model_forward_time": 0.11527323722839355,
      "step": 45080
    },
    {
      "epoch": 0.000275146484375,
      "step": 45080,
      "training_step_time": 0.4266054630279541
    },
    {
      "epoch": 0.000275152587890625,
      "model_forward_time": 0.11469411849975586,
      "step": 45081
    },
    {
      "epoch": 0.000275152587890625,
      "step": 45081,
      "training_step_time": 0.501765251159668
    },
    {
      "epoch": 0.00027515869140625,
      "model_forward_time": 0.1153409481048584,
      "step": 45082
    },
    {
      "epoch": 0.00027515869140625,
      "step": 45082,
      "training_step_time": 0.42587995529174805
    },
    {
      "epoch": 0.000275164794921875,
      "model_forward_time": 0.1158900260925293,
      "step": 45083
    },
    {
      "epoch": 0.000275164794921875,
      "step": 45083,
      "training_step_time": 0.4952974319458008
    },
    {
      "epoch": 0.0002751708984375,
      "model_forward_time": 0.11467957496643066,
      "step": 45084
    },
    {
      "epoch": 0.0002751708984375,
      "step": 45084,
      "training_step_time": 0.4296121597290039
    },
    {
      "epoch": 0.000275177001953125,
      "model_forward_time": 0.1148686408996582,
      "step": 45085
    },
    {
      "epoch": 0.000275177001953125,
      "step": 45085,
      "training_step_time": 0.392333984375
    },
    {
      "epoch": 0.00027518310546875,
      "model_forward_time": 0.11514067649841309,
      "step": 45086
    },
    {
      "epoch": 0.00027518310546875,
      "step": 45086,
      "training_step_time": 0.39678192138671875
    },
    {
      "epoch": 0.000275189208984375,
      "model_forward_time": 0.11650419235229492,
      "step": 45087
    },
    {
      "epoch": 0.000275189208984375,
      "step": 45087,
      "training_step_time": 0.38460588455200195
    },
    {
      "epoch": 0.0002751953125,
      "model_forward_time": 0.11478972434997559,
      "step": 45088
    },
    {
      "epoch": 0.0002751953125,
      "step": 45088,
      "training_step_time": 0.38280248641967773
    },
    {
      "epoch": 0.000275201416015625,
      "model_forward_time": 0.11570906639099121,
      "step": 45089
    },
    {
      "epoch": 0.000275201416015625,
      "step": 45089,
      "training_step_time": 0.39034104347229004
    },
    {
      "epoch": 0.00027520751953125,
      "grad_norm": 0.1133994460105896,
      "learning_rate": 1.5953864198144135e-05,
      "loss": 0.0417,
      "step": 45090
    },
    {
      "epoch": 0.00027520751953125,
      "model_forward_time": 0.1158440113067627,
      "step": 45090
    },
    {
      "epoch": 0.00027520751953125,
      "step": 45090,
      "training_step_time": 0.4059579372406006
    },
    {
      "epoch": 0.000275213623046875,
      "model_forward_time": 0.11480975151062012,
      "step": 45091
    },
    {
      "epoch": 0.000275213623046875,
      "step": 45091,
      "training_step_time": 0.40328192710876465
    },
    {
      "epoch": 0.0002752197265625,
      "model_forward_time": 0.11509013175964355,
      "step": 45092
    },
    {
      "epoch": 0.0002752197265625,
      "step": 45092,
      "training_step_time": 0.40014123916625977
    },
    {
      "epoch": 0.000275225830078125,
      "model_forward_time": 0.11600756645202637,
      "step": 45093
    },
    {
      "epoch": 0.000275225830078125,
      "step": 45093,
      "training_step_time": 0.39218831062316895
    },
    {
      "epoch": 0.00027523193359375,
      "model_forward_time": 0.11550021171569824,
      "step": 45094
    },
    {
      "epoch": 0.00027523193359375,
      "step": 45094,
      "training_step_time": 0.45912623405456543
    },
    {
      "epoch": 0.000275238037109375,
      "model_forward_time": 0.11505532264709473,
      "step": 45095
    },
    {
      "epoch": 0.000275238037109375,
      "step": 45095,
      "training_step_time": 0.40612196922302246
    },
    {
      "epoch": 0.000275244140625,
      "model_forward_time": 0.11490988731384277,
      "step": 45096
    },
    {
      "epoch": 0.000275244140625,
      "step": 45096,
      "training_step_time": 0.46106576919555664
    },
    {
      "epoch": 0.000275250244140625,
      "model_forward_time": 0.1150960922241211,
      "step": 45097
    },
    {
      "epoch": 0.000275250244140625,
      "step": 45097,
      "training_step_time": 0.4972200393676758
    },
    {
      "epoch": 0.00027525634765625,
      "model_forward_time": 0.1152801513671875,
      "step": 45098
    },
    {
      "epoch": 0.00027525634765625,
      "step": 45098,
      "training_step_time": 0.4427196979522705
    },
    {
      "epoch": 0.000275262451171875,
      "model_forward_time": 0.11518239974975586,
      "step": 45099
    },
    {
      "epoch": 0.000275262451171875,
      "step": 45099,
      "training_step_time": 0.4170856475830078
    },
    {
      "epoch": 0.0002752685546875,
      "grad_norm": 0.08626748621463776,
      "learning_rate": 1.593368724782846e-05,
      "loss": 0.0379,
      "step": 45100
    },
    {
      "epoch": 0.0002752685546875,
      "model_forward_time": 0.11512041091918945,
      "step": 45100
    },
    {
      "epoch": 0.0002752685546875,
      "step": 45100,
      "training_step_time": 0.40224790573120117
    },
    {
      "epoch": 0.000275274658203125,
      "model_forward_time": 0.11484837532043457,
      "step": 45101
    },
    {
      "epoch": 0.000275274658203125,
      "step": 45101,
      "training_step_time": 0.37487006187438965
    },
    {
      "epoch": 0.00027528076171875,
      "model_forward_time": 0.11455106735229492,
      "step": 45102
    },
    {
      "epoch": 0.00027528076171875,
      "step": 45102,
      "training_step_time": 0.39711618423461914
    },
    {
      "epoch": 0.000275286865234375,
      "model_forward_time": 0.11598038673400879,
      "step": 45103
    },
    {
      "epoch": 0.000275286865234375,
      "step": 45103,
      "training_step_time": 0.39928722381591797
    },
    {
      "epoch": 0.00027529296875,
      "model_forward_time": 0.11524415016174316,
      "step": 45104
    },
    {
      "epoch": 0.00027529296875,
      "step": 45104,
      "training_step_time": 0.3982720375061035
    },
    {
      "epoch": 0.000275299072265625,
      "model_forward_time": 0.11571455001831055,
      "step": 45105
    },
    {
      "epoch": 0.000275299072265625,
      "step": 45105,
      "training_step_time": 0.7060725688934326
    },
    {
      "epoch": 0.00027530517578125,
      "model_forward_time": 0.11448884010314941,
      "step": 45106
    },
    {
      "epoch": 0.00027530517578125,
      "step": 45106,
      "training_step_time": 0.3923823833465576
    },
    {
      "epoch": 0.000275311279296875,
      "model_forward_time": 0.11559772491455078,
      "step": 45107
    },
    {
      "epoch": 0.000275311279296875,
      "step": 45107,
      "training_step_time": 0.3758397102355957
    },
    {
      "epoch": 0.0002753173828125,
      "model_forward_time": 0.11441564559936523,
      "step": 45108
    },
    {
      "epoch": 0.0002753173828125,
      "step": 45108,
      "training_step_time": 0.45058393478393555
    },
    {
      "epoch": 0.000275323486328125,
      "model_forward_time": 0.11546492576599121,
      "step": 45109
    },
    {
      "epoch": 0.000275323486328125,
      "step": 45109,
      "training_step_time": 0.4400675296783447
    },
    {
      "epoch": 0.00027532958984375,
      "grad_norm": 0.10633430629968643,
      "learning_rate": 1.5913520645959097e-05,
      "loss": 0.0368,
      "step": 45110
    },
    {
      "epoch": 0.00027532958984375,
      "model_forward_time": 0.11506938934326172,
      "step": 45110
    },
    {
      "epoch": 0.00027532958984375,
      "step": 45110,
      "training_step_time": 0.4426999092102051
    },
    {
      "epoch": 0.000275335693359375,
      "model_forward_time": 0.11455273628234863,
      "step": 45111
    },
    {
      "epoch": 0.000275335693359375,
      "step": 45111,
      "training_step_time": 0.465343713760376
    },
    {
      "epoch": 0.000275341796875,
      "model_forward_time": 0.11557531356811523,
      "step": 45112
    },
    {
      "epoch": 0.000275341796875,
      "step": 45112,
      "training_step_time": 0.39377665519714355
    },
    {
      "epoch": 0.000275347900390625,
      "model_forward_time": 0.11589717864990234,
      "step": 45113
    },
    {
      "epoch": 0.000275347900390625,
      "step": 45113,
      "training_step_time": 0.4159281253814697
    },
    {
      "epoch": 0.00027535400390625,
      "model_forward_time": 0.11584043502807617,
      "step": 45114
    },
    {
      "epoch": 0.00027535400390625,
      "step": 45114,
      "training_step_time": 0.3869311809539795
    },
    {
      "epoch": 0.000275360107421875,
      "model_forward_time": 0.11452865600585938,
      "step": 45115
    },
    {
      "epoch": 0.000275360107421875,
      "step": 45115,
      "training_step_time": 0.394895076751709
    },
    {
      "epoch": 0.0002753662109375,
      "model_forward_time": 0.11488723754882812,
      "step": 45116
    },
    {
      "epoch": 0.0002753662109375,
      "step": 45116,
      "training_step_time": 0.38523101806640625
    },
    {
      "epoch": 0.000275372314453125,
      "model_forward_time": 0.11535453796386719,
      "step": 45117
    },
    {
      "epoch": 0.000275372314453125,
      "step": 45117,
      "training_step_time": 0.3987269401550293
    },
    {
      "epoch": 0.00027537841796875,
      "model_forward_time": 0.11536741256713867,
      "step": 45118
    },
    {
      "epoch": 0.00027537841796875,
      "step": 45118,
      "training_step_time": 0.4012269973754883
    },
    {
      "epoch": 0.000275384521484375,
      "model_forward_time": 0.11623024940490723,
      "step": 45119
    },
    {
      "epoch": 0.000275384521484375,
      "step": 45119,
      "training_step_time": 0.391357421875
    },
    {
      "epoch": 0.000275390625,
      "grad_norm": 0.08149727433919907,
      "learning_rate": 1.5893364398662176e-05,
      "loss": 0.0356,
      "step": 45120
    },
    {
      "epoch": 0.000275390625,
      "model_forward_time": 0.11463236808776855,
      "step": 45120
    },
    {
      "epoch": 0.000275390625,
      "step": 45120,
      "training_step_time": 0.3934152126312256
    },
    {
      "epoch": 0.000275396728515625,
      "model_forward_time": 0.1153566837310791,
      "step": 45121
    },
    {
      "epoch": 0.000275396728515625,
      "step": 45121,
      "training_step_time": 0.3938007354736328
    },
    {
      "epoch": 0.00027540283203125,
      "model_forward_time": 0.11533427238464355,
      "step": 45122
    },
    {
      "epoch": 0.00027540283203125,
      "step": 45122,
      "training_step_time": 0.4570794105529785
    },
    {
      "epoch": 0.000275408935546875,
      "model_forward_time": 0.1150510311126709,
      "step": 45123
    },
    {
      "epoch": 0.000275408935546875,
      "step": 45123,
      "training_step_time": 0.7231042385101318
    },
    {
      "epoch": 0.0002754150390625,
      "model_forward_time": 0.11460542678833008,
      "step": 45124
    },
    {
      "epoch": 0.0002754150390625,
      "step": 45124,
      "training_step_time": 0.419126033782959
    },
    {
      "epoch": 0.000275421142578125,
      "model_forward_time": 0.11542320251464844,
      "step": 45125
    },
    {
      "epoch": 0.000275421142578125,
      "step": 45125,
      "training_step_time": 0.4286074638366699
    },
    {
      "epoch": 0.00027542724609375,
      "model_forward_time": 0.11482024192810059,
      "step": 45126
    },
    {
      "epoch": 0.00027542724609375,
      "step": 45126,
      "training_step_time": 0.38913536071777344
    },
    {
      "epoch": 0.000275433349609375,
      "model_forward_time": 0.11423254013061523,
      "step": 45127
    },
    {
      "epoch": 0.000275433349609375,
      "step": 45127,
      "training_step_time": 0.40386390686035156
    },
    {
      "epoch": 0.000275439453125,
      "model_forward_time": 0.11441969871520996,
      "step": 45128
    },
    {
      "epoch": 0.000275439453125,
      "step": 45128,
      "training_step_time": 0.3955390453338623
    },
    {
      "epoch": 0.000275445556640625,
      "model_forward_time": 0.11556863784790039,
      "step": 45129
    },
    {
      "epoch": 0.000275445556640625,
      "step": 45129,
      "training_step_time": 0.4080643653869629
    },
    {
      "epoch": 0.00027545166015625,
      "grad_norm": 0.1117791086435318,
      "learning_rate": 1.587321851206061e-05,
      "loss": 0.0385,
      "step": 45130
    },
    {
      "epoch": 0.00027545166015625,
      "model_forward_time": 0.11522650718688965,
      "step": 45130
    },
    {
      "epoch": 0.00027545166015625,
      "step": 45130,
      "training_step_time": 0.3928072452545166
    },
    {
      "epoch": 0.000275457763671875,
      "model_forward_time": 0.11513853073120117,
      "step": 45131
    },
    {
      "epoch": 0.000275457763671875,
      "step": 45131,
      "training_step_time": 0.3923299312591553
    },
    {
      "epoch": 0.0002754638671875,
      "model_forward_time": 0.11488747596740723,
      "step": 45132
    },
    {
      "epoch": 0.0002754638671875,
      "step": 45132,
      "training_step_time": 0.3999354839324951
    },
    {
      "epoch": 0.000275469970703125,
      "model_forward_time": 0.11485648155212402,
      "step": 45133
    },
    {
      "epoch": 0.000275469970703125,
      "step": 45133,
      "training_step_time": 0.39137935638427734
    },
    {
      "epoch": 0.00027547607421875,
      "model_forward_time": 0.11528372764587402,
      "step": 45134
    },
    {
      "epoch": 0.00027547607421875,
      "step": 45134,
      "training_step_time": 0.3974266052246094
    },
    {
      "epoch": 0.000275482177734375,
      "model_forward_time": 0.11508822441101074,
      "step": 45135
    },
    {
      "epoch": 0.000275482177734375,
      "step": 45135,
      "training_step_time": 0.6794683933258057
    },
    {
      "epoch": 0.00027548828125,
      "model_forward_time": 0.11569046974182129,
      "step": 45136
    },
    {
      "epoch": 0.00027548828125,
      "step": 45136,
      "training_step_time": 0.47919487953186035
    },
    {
      "epoch": 0.000275494384765625,
      "model_forward_time": 0.1145026683807373,
      "step": 45137
    },
    {
      "epoch": 0.000275494384765625,
      "step": 45137,
      "training_step_time": 0.42718076705932617
    },
    {
      "epoch": 0.00027550048828125,
      "model_forward_time": 0.11444234848022461,
      "step": 45138
    },
    {
      "epoch": 0.00027550048828125,
      "step": 45138,
      "training_step_time": 0.47760629653930664
    },
    {
      "epoch": 0.000275506591796875,
      "model_forward_time": 0.1150064468383789,
      "step": 45139
    },
    {
      "epoch": 0.000275506591796875,
      "step": 45139,
      "training_step_time": 0.3677692413330078
    },
    {
      "epoch": 0.0002755126953125,
      "grad_norm": 0.11403838545084,
      "learning_rate": 1.5853082992274205e-05,
      "loss": 0.0388,
      "step": 45140
    },
    {
      "epoch": 0.0002755126953125,
      "model_forward_time": 0.11436986923217773,
      "step": 45140
    },
    {
      "epoch": 0.0002755126953125,
      "step": 45140,
      "training_step_time": 0.4019629955291748
    },
    {
      "epoch": 0.000275518798828125,
      "model_forward_time": 0.11507153511047363,
      "step": 45141
    },
    {
      "epoch": 0.000275518798828125,
      "step": 45141,
      "training_step_time": 0.9287204742431641
    },
    {
      "epoch": 0.00027552490234375,
      "model_forward_time": 0.11512541770935059,
      "step": 45142
    },
    {
      "epoch": 0.00027552490234375,
      "step": 45142,
      "training_step_time": 0.3758409023284912
    },
    {
      "epoch": 0.000275531005859375,
      "model_forward_time": 0.11422038078308105,
      "step": 45143
    },
    {
      "epoch": 0.000275531005859375,
      "step": 45143,
      "training_step_time": 0.39055347442626953
    },
    {
      "epoch": 0.000275537109375,
      "model_forward_time": 0.11458683013916016,
      "step": 45144
    },
    {
      "epoch": 0.000275537109375,
      "step": 45144,
      "training_step_time": 0.3715970516204834
    },
    {
      "epoch": 0.000275543212890625,
      "model_forward_time": 0.11440491676330566,
      "step": 45145
    },
    {
      "epoch": 0.000275543212890625,
      "step": 45145,
      "training_step_time": 0.36905837059020996
    },
    {
      "epoch": 0.00027554931640625,
      "model_forward_time": 0.11446166038513184,
      "step": 45146
    },
    {
      "epoch": 0.00027554931640625,
      "step": 45146,
      "training_step_time": 0.36919665336608887
    },
    {
      "epoch": 0.000275555419921875,
      "model_forward_time": 0.11500024795532227,
      "step": 45147
    },
    {
      "epoch": 0.000275555419921875,
      "step": 45147,
      "training_step_time": 0.6031653881072998
    },
    {
      "epoch": 0.0002755615234375,
      "model_forward_time": 0.11499547958374023,
      "step": 45148
    },
    {
      "epoch": 0.0002755615234375,
      "step": 45148,
      "training_step_time": 0.39847302436828613
    },
    {
      "epoch": 0.000275567626953125,
      "model_forward_time": 0.11505341529846191,
      "step": 45149
    },
    {
      "epoch": 0.000275567626953125,
      "step": 45149,
      "training_step_time": 0.4031662940979004
    },
    {
      "epoch": 0.00027557373046875,
      "grad_norm": 0.10440238565206528,
      "learning_rate": 1.583295784541958e-05,
      "loss": 0.0414,
      "step": 45150
    },
    {
      "epoch": 0.00027557373046875,
      "model_forward_time": 0.11467528343200684,
      "step": 45150
    },
    {
      "epoch": 0.00027557373046875,
      "step": 45150,
      "training_step_time": 0.4299800395965576
    },
    {
      "epoch": 0.000275579833984375,
      "model_forward_time": 0.11502194404602051,
      "step": 45151
    },
    {
      "epoch": 0.000275579833984375,
      "step": 45151,
      "training_step_time": 0.4198944568634033
    },
    {
      "epoch": 0.0002755859375,
      "model_forward_time": 0.11461901664733887,
      "step": 45152
    },
    {
      "epoch": 0.0002755859375,
      "step": 45152,
      "training_step_time": 0.36682820320129395
    },
    {
      "epoch": 0.000275592041015625,
      "model_forward_time": 0.1143500804901123,
      "step": 45153
    },
    {
      "epoch": 0.000275592041015625,
      "step": 45153,
      "training_step_time": 0.7656855583190918
    },
    {
      "epoch": 0.00027559814453125,
      "model_forward_time": 0.11461806297302246,
      "step": 45154
    },
    {
      "epoch": 0.00027559814453125,
      "step": 45154,
      "training_step_time": 0.3940622806549072
    },
    {
      "epoch": 0.000275604248046875,
      "model_forward_time": 0.11453485488891602,
      "step": 45155
    },
    {
      "epoch": 0.000275604248046875,
      "step": 45155,
      "training_step_time": 0.3892972469329834
    },
    {
      "epoch": 0.0002756103515625,
      "model_forward_time": 0.11497306823730469,
      "step": 45156
    },
    {
      "epoch": 0.0002756103515625,
      "step": 45156,
      "training_step_time": 0.3830389976501465
    },
    {
      "epoch": 0.000275616455078125,
      "model_forward_time": 0.11393308639526367,
      "step": 45157
    },
    {
      "epoch": 0.000275616455078125,
      "step": 45157,
      "training_step_time": 0.38955020904541016
    },
    {
      "epoch": 0.00027562255859375,
      "model_forward_time": 0.11448121070861816,
      "step": 45158
    },
    {
      "epoch": 0.00027562255859375,
      "step": 45158,
      "training_step_time": 0.39180564880371094
    },
    {
      "epoch": 0.000275628662109375,
      "model_forward_time": 0.1147158145904541,
      "step": 45159
    },
    {
      "epoch": 0.000275628662109375,
      "step": 45159,
      "training_step_time": 0.9647567272186279
    },
    {
      "epoch": 0.000275634765625,
      "grad_norm": 0.10027021914720535,
      "learning_rate": 1.581284307761024e-05,
      "loss": 0.0381,
      "step": 45160
    },
    {
      "epoch": 0.000275634765625,
      "model_forward_time": 0.11411690711975098,
      "step": 45160
    },
    {
      "epoch": 0.000275634765625,
      "step": 45160,
      "training_step_time": 0.41602158546447754
    },
    {
      "epoch": 0.000275640869140625,
      "model_forward_time": 0.11472368240356445,
      "step": 45161
    },
    {
      "epoch": 0.000275640869140625,
      "step": 45161,
      "training_step_time": 0.3850717544555664
    },
    {
      "epoch": 0.00027564697265625,
      "model_forward_time": 0.11397886276245117,
      "step": 45162
    },
    {
      "epoch": 0.00027564697265625,
      "step": 45162,
      "training_step_time": 0.3963627815246582
    },
    {
      "epoch": 0.000275653076171875,
      "model_forward_time": 0.11380887031555176,
      "step": 45163
    },
    {
      "epoch": 0.000275653076171875,
      "step": 45163,
      "training_step_time": 0.38739633560180664
    },
    {
      "epoch": 0.0002756591796875,
      "model_forward_time": 0.11343240737915039,
      "step": 45164
    },
    {
      "epoch": 0.0002756591796875,
      "step": 45164,
      "training_step_time": 0.4168086051940918
    },
    {
      "epoch": 0.000275665283203125,
      "model_forward_time": 0.1153557300567627,
      "step": 45165
    },
    {
      "epoch": 0.000275665283203125,
      "step": 45165,
      "training_step_time": 0.537628173828125
    },
    {
      "epoch": 0.00027567138671875,
      "model_forward_time": 0.11481904983520508,
      "step": 45166
    },
    {
      "epoch": 0.00027567138671875,
      "step": 45166,
      "training_step_time": 0.47672080993652344
    },
    {
      "epoch": 0.000275677490234375,
      "model_forward_time": 0.11455821990966797,
      "step": 45167
    },
    {
      "epoch": 0.000275677490234375,
      "step": 45167,
      "training_step_time": 0.40477705001831055
    },
    {
      "epoch": 0.00027568359375,
      "model_forward_time": 0.1146688461303711,
      "step": 45168
    },
    {
      "epoch": 0.00027568359375,
      "step": 45168,
      "training_step_time": 0.38565564155578613
    },
    {
      "epoch": 0.000275689697265625,
      "model_forward_time": 0.11505293846130371,
      "step": 45169
    },
    {
      "epoch": 0.000275689697265625,
      "step": 45169,
      "training_step_time": 0.3945186138153076
    },
    {
      "epoch": 0.00027569580078125,
      "grad_norm": 0.11489498615264893,
      "learning_rate": 1.5792738694956516e-05,
      "loss": 0.0378,
      "step": 45170
    },
    {
      "epoch": 0.00027569580078125,
      "model_forward_time": 0.11458611488342285,
      "step": 45170
    },
    {
      "epoch": 0.00027569580078125,
      "step": 45170,
      "training_step_time": 0.3928530216217041
    },
    {
      "epoch": 0.000275701904296875,
      "model_forward_time": 0.11496639251708984,
      "step": 45171
    },
    {
      "epoch": 0.000275701904296875,
      "step": 45171,
      "training_step_time": 1.287205696105957
    },
    {
      "epoch": 0.0002757080078125,
      "model_forward_time": 0.11397385597229004,
      "step": 45172
    },
    {
      "epoch": 0.0002757080078125,
      "step": 45172,
      "training_step_time": 0.38105082511901855
    },
    {
      "epoch": 0.000275714111328125,
      "model_forward_time": 0.11367988586425781,
      "step": 45173
    },
    {
      "epoch": 0.000275714111328125,
      "step": 45173,
      "training_step_time": 0.3931567668914795
    },
    {
      "epoch": 0.00027572021484375,
      "model_forward_time": 0.11375117301940918,
      "step": 45174
    },
    {
      "epoch": 0.00027572021484375,
      "step": 45174,
      "training_step_time": 0.396759033203125
    },
    {
      "epoch": 0.000275726318359375,
      "model_forward_time": 0.11380958557128906,
      "step": 45175
    },
    {
      "epoch": 0.000275726318359375,
      "step": 45175,
      "training_step_time": 0.46769213676452637
    },
    {
      "epoch": 0.000275732421875,
      "model_forward_time": 0.11415910720825195,
      "step": 45176
    },
    {
      "epoch": 0.000275732421875,
      "step": 45176,
      "training_step_time": 0.4253816604614258
    },
    {
      "epoch": 0.000275738525390625,
      "model_forward_time": 0.11443614959716797,
      "step": 45177
    },
    {
      "epoch": 0.000275738525390625,
      "step": 45177,
      "training_step_time": 0.524522066116333
    },
    {
      "epoch": 0.00027574462890625,
      "model_forward_time": 0.1152658462524414,
      "step": 45178
    },
    {
      "epoch": 0.00027574462890625,
      "step": 45178,
      "training_step_time": 0.43735599517822266
    },
    {
      "epoch": 0.000275750732421875,
      "model_forward_time": 0.11519598960876465,
      "step": 45179
    },
    {
      "epoch": 0.000275750732421875,
      "step": 45179,
      "training_step_time": 0.4841575622558594
    },
    {
      "epoch": 0.0002757568359375,
      "grad_norm": 0.100569948554039,
      "learning_rate": 1.5772644703565565e-05,
      "loss": 0.039,
      "step": 45180
    },
    {
      "epoch": 0.0002757568359375,
      "model_forward_time": 0.11436295509338379,
      "step": 45180
    },
    {
      "epoch": 0.0002757568359375,
      "step": 45180,
      "training_step_time": 0.4010930061340332
    },
    {
      "epoch": 0.000275762939453125,
      "model_forward_time": 0.11490535736083984,
      "step": 45181
    },
    {
      "epoch": 0.000275762939453125,
      "step": 45181,
      "training_step_time": 0.3859081268310547
    },
    {
      "epoch": 0.00027576904296875,
      "model_forward_time": 0.11515116691589355,
      "step": 45182
    },
    {
      "epoch": 0.00027576904296875,
      "step": 45182,
      "training_step_time": 0.4199256896972656
    },
    {
      "epoch": 0.000275775146484375,
      "model_forward_time": 0.11546063423156738,
      "step": 45183
    },
    {
      "epoch": 0.000275775146484375,
      "step": 45183,
      "training_step_time": 0.6311182975769043
    },
    {
      "epoch": 0.00027578125,
      "model_forward_time": 0.11512899398803711,
      "step": 45184
    },
    {
      "epoch": 0.00027578125,
      "step": 45184,
      "training_step_time": 0.40308117866516113
    },
    {
      "epoch": 0.000275787353515625,
      "model_forward_time": 0.11498689651489258,
      "step": 45185
    },
    {
      "epoch": 0.000275787353515625,
      "step": 45185,
      "training_step_time": 0.3985707759857178
    },
    {
      "epoch": 0.00027579345703125,
      "model_forward_time": 0.11443305015563965,
      "step": 45186
    },
    {
      "epoch": 0.00027579345703125,
      "step": 45186,
      "training_step_time": 0.41014862060546875
    },
    {
      "epoch": 0.000275799560546875,
      "model_forward_time": 0.11468315124511719,
      "step": 45187
    },
    {
      "epoch": 0.000275799560546875,
      "step": 45187,
      "training_step_time": 0.39412641525268555
    },
    {
      "epoch": 0.0002758056640625,
      "model_forward_time": 0.11475062370300293,
      "step": 45188
    },
    {
      "epoch": 0.0002758056640625,
      "step": 45188,
      "training_step_time": 0.4696164131164551
    },
    {
      "epoch": 0.000275811767578125,
      "model_forward_time": 0.11451339721679688,
      "step": 45189
    },
    {
      "epoch": 0.000275811767578125,
      "step": 45189,
      "training_step_time": 0.8310389518737793
    },
    {
      "epoch": 0.00027581787109375,
      "grad_norm": 0.1396055817604065,
      "learning_rate": 1.5752561109541447e-05,
      "loss": 0.0369,
      "step": 45190
    },
    {
      "epoch": 0.00027581787109375,
      "model_forward_time": 0.11459827423095703,
      "step": 45190
    },
    {
      "epoch": 0.00027581787109375,
      "step": 45190,
      "training_step_time": 0.3882739543914795
    },
    {
      "epoch": 0.000275823974609375,
      "model_forward_time": 0.11480093002319336,
      "step": 45191
    },
    {
      "epoch": 0.000275823974609375,
      "step": 45191,
      "training_step_time": 0.4020729064941406
    },
    {
      "epoch": 0.000275830078125,
      "model_forward_time": 0.11501955986022949,
      "step": 45192
    },
    {
      "epoch": 0.000275830078125,
      "step": 45192,
      "training_step_time": 0.452070951461792
    },
    {
      "epoch": 0.000275836181640625,
      "model_forward_time": 0.11642742156982422,
      "step": 45193
    },
    {
      "epoch": 0.000275836181640625,
      "step": 45193,
      "training_step_time": 0.4875166416168213
    },
    {
      "epoch": 0.00027584228515625,
      "model_forward_time": 0.11487936973571777,
      "step": 45194
    },
    {
      "epoch": 0.00027584228515625,
      "step": 45194,
      "training_step_time": 0.37320756912231445
    },
    {
      "epoch": 0.000275848388671875,
      "model_forward_time": 0.11478805541992188,
      "step": 45195
    },
    {
      "epoch": 0.000275848388671875,
      "step": 45195,
      "training_step_time": 0.39032959938049316
    },
    {
      "epoch": 0.0002758544921875,
      "model_forward_time": 0.11503291130065918,
      "step": 45196
    },
    {
      "epoch": 0.0002758544921875,
      "step": 45196,
      "training_step_time": 0.3939375877380371
    },
    {
      "epoch": 0.000275860595703125,
      "model_forward_time": 0.11455774307250977,
      "step": 45197
    },
    {
      "epoch": 0.000275860595703125,
      "step": 45197,
      "training_step_time": 0.44234728813171387
    },
    {
      "epoch": 0.00027586669921875,
      "model_forward_time": 0.11525249481201172,
      "step": 45198
    },
    {
      "epoch": 0.00027586669921875,
      "step": 45198,
      "training_step_time": 0.42011547088623047
    },
    {
      "epoch": 0.000275872802734375,
      "model_forward_time": 0.11467146873474121,
      "step": 45199
    },
    {
      "epoch": 0.000275872802734375,
      "step": 45199,
      "training_step_time": 0.3953063488006592
    },
    {
      "epoch": 0.00027587890625,
      "grad_norm": 0.11096086353063583,
      "learning_rate": 1.5732487918985018e-05,
      "loss": 0.0379,
      "step": 45200
    },
    {
      "epoch": 0.00027587890625,
      "model_forward_time": 0.115081787109375,
      "step": 45200
    },
    {
      "epoch": 0.00027587890625,
      "step": 45200,
      "training_step_time": 0.39601850509643555
    },
    {
      "epoch": 0.000275885009765625,
      "model_forward_time": 0.11462783813476562,
      "step": 45201
    },
    {
      "epoch": 0.000275885009765625,
      "step": 45201,
      "training_step_time": 0.7156858444213867
    },
    {
      "epoch": 0.00027589111328125,
      "model_forward_time": 0.1144871711730957,
      "step": 45202
    },
    {
      "epoch": 0.00027589111328125,
      "step": 45202,
      "training_step_time": 0.47245311737060547
    },
    {
      "epoch": 0.000275897216796875,
      "model_forward_time": 0.11472439765930176,
      "step": 45203
    },
    {
      "epoch": 0.000275897216796875,
      "step": 45203,
      "training_step_time": 0.4678621292114258
    },
    {
      "epoch": 0.0002759033203125,
      "model_forward_time": 0.1142892837524414,
      "step": 45204
    },
    {
      "epoch": 0.0002759033203125,
      "step": 45204,
      "training_step_time": 0.3972129821777344
    },
    {
      "epoch": 0.000275909423828125,
      "model_forward_time": 0.11477518081665039,
      "step": 45205
    },
    {
      "epoch": 0.000275909423828125,
      "step": 45205,
      "training_step_time": 0.3926115036010742
    },
    {
      "epoch": 0.00027591552734375,
      "model_forward_time": 0.11455202102661133,
      "step": 45206
    },
    {
      "epoch": 0.00027591552734375,
      "step": 45206,
      "training_step_time": 0.42671680450439453
    },
    {
      "epoch": 0.000275921630859375,
      "model_forward_time": 0.11638760566711426,
      "step": 45207
    },
    {
      "epoch": 0.000275921630859375,
      "step": 45207,
      "training_step_time": 0.8065288066864014
    },
    {
      "epoch": 0.000275927734375,
      "model_forward_time": 0.11489415168762207,
      "step": 45208
    },
    {
      "epoch": 0.000275927734375,
      "step": 45208,
      "training_step_time": 0.3832831382751465
    },
    {
      "epoch": 0.000275933837890625,
      "model_forward_time": 0.1143043041229248,
      "step": 45209
    },
    {
      "epoch": 0.000275933837890625,
      "step": 45209,
      "training_step_time": 0.3867039680480957
    },
    {
      "epoch": 0.00027593994140625,
      "grad_norm": 0.09792259335517883,
      "learning_rate": 1.5712425137993973e-05,
      "loss": 0.0422,
      "step": 45210
    },
    {
      "epoch": 0.00027593994140625,
      "model_forward_time": 0.11447286605834961,
      "step": 45210
    },
    {
      "epoch": 0.00027593994140625,
      "step": 45210,
      "training_step_time": 0.46958065032958984
    },
    {
      "epoch": 0.000275946044921875,
      "model_forward_time": 0.11421799659729004,
      "step": 45211
    },
    {
      "epoch": 0.000275946044921875,
      "step": 45211,
      "training_step_time": 0.3954164981842041
    },
    {
      "epoch": 0.0002759521484375,
      "model_forward_time": 0.11436724662780762,
      "step": 45212
    },
    {
      "epoch": 0.0002759521484375,
      "step": 45212,
      "training_step_time": 0.39801692962646484
    },
    {
      "epoch": 0.000275958251953125,
      "model_forward_time": 0.1146390438079834,
      "step": 45213
    },
    {
      "epoch": 0.000275958251953125,
      "step": 45213,
      "training_step_time": 0.9750711917877197
    },
    {
      "epoch": 0.00027596435546875,
      "model_forward_time": 0.11448383331298828,
      "step": 45214
    },
    {
      "epoch": 0.00027596435546875,
      "step": 45214,
      "training_step_time": 0.450268030166626
    },
    {
      "epoch": 0.000275970458984375,
      "model_forward_time": 0.11470365524291992,
      "step": 45215
    },
    {
      "epoch": 0.000275970458984375,
      "step": 45215,
      "training_step_time": 0.3979930877685547
    },
    {
      "epoch": 0.0002759765625,
      "model_forward_time": 0.11394691467285156,
      "step": 45216
    },
    {
      "epoch": 0.0002759765625,
      "step": 45216,
      "training_step_time": 0.45148348808288574
    },
    {
      "epoch": 0.000275982666015625,
      "model_forward_time": 0.1141824722290039,
      "step": 45217
    },
    {
      "epoch": 0.000275982666015625,
      "step": 45217,
      "training_step_time": 0.4502243995666504
    },
    {
      "epoch": 0.00027598876953125,
      "model_forward_time": 0.11433577537536621,
      "step": 45218
    },
    {
      "epoch": 0.00027598876953125,
      "step": 45218,
      "training_step_time": 0.4713726043701172
    },
    {
      "epoch": 0.000275994873046875,
      "model_forward_time": 0.11476373672485352,
      "step": 45219
    },
    {
      "epoch": 0.000275994873046875,
      "step": 45219,
      "training_step_time": 0.5256540775299072
    },
    {
      "epoch": 0.0002760009765625,
      "grad_norm": 0.10587065666913986,
      "learning_rate": 1.569237277266286e-05,
      "loss": 0.037,
      "step": 45220
    },
    {
      "epoch": 0.0002760009765625,
      "model_forward_time": 0.11419224739074707,
      "step": 45220
    },
    {
      "epoch": 0.0002760009765625,
      "step": 45220,
      "training_step_time": 0.3830246925354004
    },
    {
      "epoch": 0.000276007080078125,
      "model_forward_time": 0.11527466773986816,
      "step": 45221
    },
    {
      "epoch": 0.000276007080078125,
      "step": 45221,
      "training_step_time": 0.38452863693237305
    },
    {
      "epoch": 0.00027601318359375,
      "model_forward_time": 0.11501717567443848,
      "step": 45222
    },
    {
      "epoch": 0.00027601318359375,
      "step": 45222,
      "training_step_time": 0.40378355979919434
    },
    {
      "epoch": 0.000276019287109375,
      "model_forward_time": 0.11459779739379883,
      "step": 45223
    },
    {
      "epoch": 0.000276019287109375,
      "step": 45223,
      "training_step_time": 0.40326380729675293
    },
    {
      "epoch": 0.000276025390625,
      "model_forward_time": 0.11509990692138672,
      "step": 45224
    },
    {
      "epoch": 0.000276025390625,
      "step": 45224,
      "training_step_time": 0.3926525115966797
    },
    {
      "epoch": 0.000276031494140625,
      "model_forward_time": 0.11451554298400879,
      "step": 45225
    },
    {
      "epoch": 0.000276031494140625,
      "step": 45225,
      "training_step_time": 0.38679027557373047
    },
    {
      "epoch": 0.00027603759765625,
      "model_forward_time": 0.1150510311126709,
      "step": 45226
    },
    {
      "epoch": 0.00027603759765625,
      "step": 45226,
      "training_step_time": 0.3887603282928467
    },
    {
      "epoch": 0.000276043701171875,
      "model_forward_time": 0.1151280403137207,
      "step": 45227
    },
    {
      "epoch": 0.000276043701171875,
      "step": 45227,
      "training_step_time": 0.3858814239501953
    },
    {
      "epoch": 0.0002760498046875,
      "model_forward_time": 0.11559462547302246,
      "step": 45228
    },
    {
      "epoch": 0.0002760498046875,
      "step": 45228,
      "training_step_time": 0.3997766971588135
    },
    {
      "epoch": 0.000276055908203125,
      "model_forward_time": 0.11556506156921387,
      "step": 45229
    },
    {
      "epoch": 0.000276055908203125,
      "step": 45229,
      "training_step_time": 0.39916110038757324
    },
    {
      "epoch": 0.00027606201171875,
      "grad_norm": 0.10288331657648087,
      "learning_rate": 1.567233082908306e-05,
      "loss": 0.0388,
      "step": 45230
    },
    {
      "epoch": 0.00027606201171875,
      "model_forward_time": 0.11458683013916016,
      "step": 45230
    },
    {
      "epoch": 0.00027606201171875,
      "step": 45230,
      "training_step_time": 0.4547576904296875
    },
    {
      "epoch": 0.000276068115234375,
      "model_forward_time": 0.11534786224365234,
      "step": 45231
    },
    {
      "epoch": 0.000276068115234375,
      "step": 45231,
      "training_step_time": 0.5915579795837402
    },
    {
      "epoch": 0.00027607421875,
      "model_forward_time": 0.11531925201416016,
      "step": 45232
    },
    {
      "epoch": 0.00027607421875,
      "step": 45232,
      "training_step_time": 0.4672722816467285
    },
    {
      "epoch": 0.000276080322265625,
      "model_forward_time": 0.11456656455993652,
      "step": 45233
    },
    {
      "epoch": 0.000276080322265625,
      "step": 45233,
      "training_step_time": 0.3776850700378418
    },
    {
      "epoch": 0.00027608642578125,
      "model_forward_time": 0.11440634727478027,
      "step": 45234
    },
    {
      "epoch": 0.00027608642578125,
      "step": 45234,
      "training_step_time": 0.46665430068969727
    },
    {
      "epoch": 0.000276092529296875,
      "model_forward_time": 0.11468648910522461,
      "step": 45235
    },
    {
      "epoch": 0.000276092529296875,
      "step": 45235,
      "training_step_time": 0.39270949363708496
    },
    {
      "epoch": 0.0002760986328125,
      "model_forward_time": 0.11456179618835449,
      "step": 45236
    },
    {
      "epoch": 0.0002760986328125,
      "step": 45236,
      "training_step_time": 0.44457197189331055
    },
    {
      "epoch": 0.000276104736328125,
      "model_forward_time": 0.11499857902526855,
      "step": 45237
    },
    {
      "epoch": 0.000276104736328125,
      "step": 45237,
      "training_step_time": 0.39410948753356934
    },
    {
      "epoch": 0.00027611083984375,
      "model_forward_time": 0.11507773399353027,
      "step": 45238
    },
    {
      "epoch": 0.00027611083984375,
      "step": 45238,
      "training_step_time": 0.3873755931854248
    },
    {
      "epoch": 0.000276116943359375,
      "model_forward_time": 0.11474895477294922,
      "step": 45239
    },
    {
      "epoch": 0.000276116943359375,
      "step": 45239,
      "training_step_time": 0.37618041038513184
    },
    {
      "epoch": 0.000276123046875,
      "grad_norm": 0.12469884008169174,
      "learning_rate": 1.5652299313342773e-05,
      "loss": 0.0413,
      "step": 45240
    },
    {
      "epoch": 0.000276123046875,
      "model_forward_time": 0.11437797546386719,
      "step": 45240
    },
    {
      "epoch": 0.000276123046875,
      "step": 45240,
      "training_step_time": 0.3951847553253174
    },
    {
      "epoch": 0.000276129150390625,
      "model_forward_time": 0.1149134635925293,
      "step": 45241
    },
    {
      "epoch": 0.000276129150390625,
      "step": 45241,
      "training_step_time": 0.3977663516998291
    },
    {
      "epoch": 0.00027613525390625,
      "model_forward_time": 0.11516523361206055,
      "step": 45242
    },
    {
      "epoch": 0.00027613525390625,
      "step": 45242,
      "training_step_time": 0.39456915855407715
    },
    {
      "epoch": 0.000276141357421875,
      "model_forward_time": 0.11538100242614746,
      "step": 45243
    },
    {
      "epoch": 0.000276141357421875,
      "step": 45243,
      "training_step_time": 0.45487308502197266
    },
    {
      "epoch": 0.0002761474609375,
      "model_forward_time": 0.11546063423156738,
      "step": 45244
    },
    {
      "epoch": 0.0002761474609375,
      "step": 45244,
      "training_step_time": 0.49788713455200195
    },
    {
      "epoch": 0.000276153564453125,
      "model_forward_time": 0.11540055274963379,
      "step": 45245
    },
    {
      "epoch": 0.000276153564453125,
      "step": 45245,
      "training_step_time": 0.3802962303161621
    },
    {
      "epoch": 0.00027615966796875,
      "model_forward_time": 0.11448907852172852,
      "step": 45246
    },
    {
      "epoch": 0.00027615966796875,
      "step": 45246,
      "training_step_time": 0.3799562454223633
    },
    {
      "epoch": 0.000276165771484375,
      "model_forward_time": 0.11558246612548828,
      "step": 45247
    },
    {
      "epoch": 0.000276165771484375,
      "step": 45247,
      "training_step_time": 0.4364197254180908
    },
    {
      "epoch": 0.000276171875,
      "model_forward_time": 0.1145014762878418,
      "step": 45248
    },
    {
      "epoch": 0.000276171875,
      "step": 45248,
      "training_step_time": 0.40750598907470703
    },
    {
      "epoch": 0.000276177978515625,
      "model_forward_time": 0.11530399322509766,
      "step": 45249
    },
    {
      "epoch": 0.000276177978515625,
      "step": 45249,
      "training_step_time": 0.607170820236206
    },
    {
      "epoch": 0.00027618408203125,
      "grad_norm": 0.14690116047859192,
      "learning_rate": 1.563227823152708e-05,
      "loss": 0.04,
      "step": 45250
    },
    {
      "epoch": 0.00027618408203125,
      "model_forward_time": 0.11407709121704102,
      "step": 45250
    },
    {
      "epoch": 0.00027618408203125,
      "step": 45250,
      "training_step_time": 0.38996171951293945
    },
    {
      "epoch": 0.000276190185546875,
      "model_forward_time": 0.11416411399841309,
      "step": 45251
    },
    {
      "epoch": 0.000276190185546875,
      "step": 45251,
      "training_step_time": 0.37534260749816895
    },
    {
      "epoch": 0.0002761962890625,
      "model_forward_time": 0.1149742603302002,
      "step": 45252
    },
    {
      "epoch": 0.0002761962890625,
      "step": 45252,
      "training_step_time": 0.390439510345459
    },
    {
      "epoch": 0.000276202392578125,
      "model_forward_time": 0.1148996353149414,
      "step": 45253
    },
    {
      "epoch": 0.000276202392578125,
      "step": 45253,
      "training_step_time": 0.39492225646972656
    },
    {
      "epoch": 0.00027620849609375,
      "model_forward_time": 0.11481618881225586,
      "step": 45254
    },
    {
      "epoch": 0.00027620849609375,
      "step": 45254,
      "training_step_time": 0.39412593841552734
    },
    {
      "epoch": 0.000276214599609375,
      "model_forward_time": 0.1151130199432373,
      "step": 45255
    },
    {
      "epoch": 0.000276214599609375,
      "step": 45255,
      "training_step_time": 0.5548806190490723
    },
    {
      "epoch": 0.000276220703125,
      "model_forward_time": 0.11487746238708496,
      "step": 45256
    },
    {
      "epoch": 0.000276220703125,
      "step": 45256,
      "training_step_time": 0.9097423553466797
    },
    {
      "epoch": 0.000276226806640625,
      "model_forward_time": 0.11454200744628906,
      "step": 45257
    },
    {
      "epoch": 0.000276226806640625,
      "step": 45257,
      "training_step_time": 0.39557671546936035
    },
    {
      "epoch": 0.00027623291015625,
      "model_forward_time": 0.11450767517089844,
      "step": 45258
    },
    {
      "epoch": 0.00027623291015625,
      "step": 45258,
      "training_step_time": 0.47484683990478516
    },
    {
      "epoch": 0.000276239013671875,
      "model_forward_time": 0.1143956184387207,
      "step": 45259
    },
    {
      "epoch": 0.000276239013671875,
      "step": 45259,
      "training_step_time": 0.36226749420166016
    },
    {
      "epoch": 0.0002762451171875,
      "grad_norm": 0.0827944278717041,
      "learning_rate": 1.5612267589717805e-05,
      "loss": 0.0397,
      "step": 45260
    },
    {
      "epoch": 0.0002762451171875,
      "model_forward_time": 0.11411547660827637,
      "step": 45260
    },
    {
      "epoch": 0.0002762451171875,
      "step": 45260,
      "training_step_time": 0.46707844734191895
    },
    {
      "epoch": 0.000276251220703125,
      "model_forward_time": 0.11430859565734863,
      "step": 45261
    },
    {
      "epoch": 0.000276251220703125,
      "step": 45261,
      "training_step_time": 0.45288705825805664
    },
    {
      "epoch": 0.00027625732421875,
      "model_forward_time": 0.11430168151855469,
      "step": 45262
    },
    {
      "epoch": 0.00027625732421875,
      "step": 45262,
      "training_step_time": 0.40773868560791016
    },
    {
      "epoch": 0.000276263427734375,
      "model_forward_time": 0.11423230171203613,
      "step": 45263
    },
    {
      "epoch": 0.000276263427734375,
      "step": 45263,
      "training_step_time": 0.38112354278564453
    },
    {
      "epoch": 0.00027626953125,
      "model_forward_time": 0.1142416000366211,
      "step": 45264
    },
    {
      "epoch": 0.00027626953125,
      "step": 45264,
      "training_step_time": 0.38669514656066895
    },
    {
      "epoch": 0.000276275634765625,
      "model_forward_time": 0.11477065086364746,
      "step": 45265
    },
    {
      "epoch": 0.000276275634765625,
      "step": 45265,
      "training_step_time": 0.3877098560333252
    },
    {
      "epoch": 0.00027628173828125,
      "model_forward_time": 0.1151268482208252,
      "step": 45266
    },
    {
      "epoch": 0.00027628173828125,
      "step": 45266,
      "training_step_time": 0.3919258117675781
    },
    {
      "epoch": 0.000276287841796875,
      "model_forward_time": 0.11530900001525879,
      "step": 45267
    },
    {
      "epoch": 0.000276287841796875,
      "step": 45267,
      "training_step_time": 0.38239526748657227
    },
    {
      "epoch": 0.0002762939453125,
      "model_forward_time": 0.11488199234008789,
      "step": 45268
    },
    {
      "epoch": 0.0002762939453125,
      "step": 45268,
      "training_step_time": 0.6806795597076416
    },
    {
      "epoch": 0.000276300048828125,
      "model_forward_time": 0.1148378849029541,
      "step": 45269
    },
    {
      "epoch": 0.000276300048828125,
      "step": 45269,
      "training_step_time": 0.4582812786102295
    },
    {
      "epoch": 0.00027630615234375,
      "grad_norm": 0.10847795009613037,
      "learning_rate": 1.5592267393993716e-05,
      "loss": 0.038,
      "step": 45270
    },
    {
      "epoch": 0.00027630615234375,
      "model_forward_time": 0.11428356170654297,
      "step": 45270
    },
    {
      "epoch": 0.00027630615234375,
      "step": 45270,
      "training_step_time": 0.40167713165283203
    },
    {
      "epoch": 0.000276312255859375,
      "model_forward_time": 0.11476778984069824,
      "step": 45271
    },
    {
      "epoch": 0.000276312255859375,
      "step": 45271,
      "training_step_time": 0.478818416595459
    },
    {
      "epoch": 0.000276318359375,
      "model_forward_time": 0.11507177352905273,
      "step": 45272
    },
    {
      "epoch": 0.000276318359375,
      "step": 45272,
      "training_step_time": 0.39125776290893555
    },
    {
      "epoch": 0.000276324462890625,
      "model_forward_time": 0.11456680297851562,
      "step": 45273
    },
    {
      "epoch": 0.000276324462890625,
      "step": 45273,
      "training_step_time": 0.36285996437072754
    },
    {
      "epoch": 0.00027633056640625,
      "model_forward_time": 0.11443209648132324,
      "step": 45274
    },
    {
      "epoch": 0.00027633056640625,
      "step": 45274,
      "training_step_time": 0.6931626796722412
    },
    {
      "epoch": 0.000276336669921875,
      "model_forward_time": 0.11406278610229492,
      "step": 45275
    },
    {
      "epoch": 0.000276336669921875,
      "step": 45275,
      "training_step_time": 0.41287994384765625
    },
    {
      "epoch": 0.0002763427734375,
      "model_forward_time": 0.11497306823730469,
      "step": 45276
    },
    {
      "epoch": 0.0002763427734375,
      "step": 45276,
      "training_step_time": 0.3882594108581543
    },
    {
      "epoch": 0.000276348876953125,
      "model_forward_time": 0.11482381820678711,
      "step": 45277
    },
    {
      "epoch": 0.000276348876953125,
      "step": 45277,
      "training_step_time": 0.38840484619140625
    },
    {
      "epoch": 0.00027635498046875,
      "model_forward_time": 0.11517047882080078,
      "step": 45278
    },
    {
      "epoch": 0.00027635498046875,
      "step": 45278,
      "training_step_time": 0.38773632049560547
    },
    {
      "epoch": 0.000276361083984375,
      "model_forward_time": 0.1151285171508789,
      "step": 45279
    },
    {
      "epoch": 0.000276361083984375,
      "step": 45279,
      "training_step_time": 0.38350534439086914
    },
    {
      "epoch": 0.0002763671875,
      "grad_norm": 0.10856185108423233,
      "learning_rate": 1.557227765043027e-05,
      "loss": 0.034,
      "step": 45280
    },
    {
      "epoch": 0.0002763671875,
      "model_forward_time": 0.1149747371673584,
      "step": 45280
    },
    {
      "epoch": 0.0002763671875,
      "step": 45280,
      "training_step_time": 0.9123208522796631
    },
    {
      "epoch": 0.000276373291015625,
      "model_forward_time": 0.1151123046875,
      "step": 45281
    },
    {
      "epoch": 0.000276373291015625,
      "step": 45281,
      "training_step_time": 0.3842341899871826
    },
    {
      "epoch": 0.00027637939453125,
      "model_forward_time": 0.11412930488586426,
      "step": 45282
    },
    {
      "epoch": 0.00027637939453125,
      "step": 45282,
      "training_step_time": 0.4386742115020752
    },
    {
      "epoch": 0.000276385498046875,
      "model_forward_time": 0.11427712440490723,
      "step": 45283
    },
    {
      "epoch": 0.000276385498046875,
      "step": 45283,
      "training_step_time": 0.3963935375213623
    },
    {
      "epoch": 0.0002763916015625,
      "model_forward_time": 0.11454987525939941,
      "step": 45284
    },
    {
      "epoch": 0.0002763916015625,
      "step": 45284,
      "training_step_time": 0.4485337734222412
    },
    {
      "epoch": 0.000276397705078125,
      "model_forward_time": 0.11499905586242676,
      "step": 45285
    },
    {
      "epoch": 0.000276397705078125,
      "step": 45285,
      "training_step_time": 0.47711920738220215
    },
    {
      "epoch": 0.00027640380859375,
      "model_forward_time": 0.11512160301208496,
      "step": 45286
    },
    {
      "epoch": 0.00027640380859375,
      "step": 45286,
      "training_step_time": 0.6504542827606201
    },
    {
      "epoch": 0.000276409912109375,
      "model_forward_time": 0.11517739295959473,
      "step": 45287
    },
    {
      "epoch": 0.000276409912109375,
      "step": 45287,
      "training_step_time": 0.42574644088745117
    },
    {
      "epoch": 0.000276416015625,
      "model_forward_time": 0.11430501937866211,
      "step": 45288
    },
    {
      "epoch": 0.000276416015625,
      "step": 45288,
      "training_step_time": 0.45395946502685547
    },
    {
      "epoch": 0.000276422119140625,
      "model_forward_time": 0.11435270309448242,
      "step": 45289
    },
    {
      "epoch": 0.000276422119140625,
      "step": 45289,
      "training_step_time": 0.3786280155181885
    },
    {
      "epoch": 0.00027642822265625,
      "grad_norm": 0.11865692585706711,
      "learning_rate": 1.5552298365099882e-05,
      "loss": 0.041,
      "step": 45290
    },
    {
      "epoch": 0.00027642822265625,
      "model_forward_time": 0.11458611488342285,
      "step": 45290
    },
    {
      "epoch": 0.00027642822265625,
      "step": 45290,
      "training_step_time": 0.40492725372314453
    },
    {
      "epoch": 0.000276434326171875,
      "model_forward_time": 0.1153719425201416,
      "step": 45291
    },
    {
      "epoch": 0.000276434326171875,
      "step": 45291,
      "training_step_time": 0.3865664005279541
    },
    {
      "epoch": 0.0002764404296875,
      "model_forward_time": 0.11530470848083496,
      "step": 45292
    },
    {
      "epoch": 0.0002764404296875,
      "step": 45292,
      "training_step_time": 0.7951614856719971
    },
    {
      "epoch": 0.000276446533203125,
      "model_forward_time": 0.11375665664672852,
      "step": 45293
    },
    {
      "epoch": 0.000276446533203125,
      "step": 45293,
      "training_step_time": 0.3753788471221924
    },
    {
      "epoch": 0.00027645263671875,
      "model_forward_time": 0.11452889442443848,
      "step": 45294
    },
    {
      "epoch": 0.00027645263671875,
      "step": 45294,
      "training_step_time": 0.39353322982788086
    },
    {
      "epoch": 0.000276458740234375,
      "model_forward_time": 0.1144571304321289,
      "step": 45295
    },
    {
      "epoch": 0.000276458740234375,
      "step": 45295,
      "training_step_time": 0.3946816921234131
    },
    {
      "epoch": 0.00027646484375,
      "model_forward_time": 0.11461424827575684,
      "step": 45296
    },
    {
      "epoch": 0.00027646484375,
      "step": 45296,
      "training_step_time": 0.386721134185791
    },
    {
      "epoch": 0.000276470947265625,
      "model_forward_time": 0.11429262161254883,
      "step": 45297
    },
    {
      "epoch": 0.000276470947265625,
      "step": 45297,
      "training_step_time": 0.47432613372802734
    },
    {
      "epoch": 0.00027647705078125,
      "model_forward_time": 0.11519503593444824,
      "step": 45298
    },
    {
      "epoch": 0.00027647705078125,
      "step": 45298,
      "training_step_time": 0.7477552890777588
    },
    {
      "epoch": 0.000276483154296875,
      "model_forward_time": 0.1150977611541748,
      "step": 45299
    },
    {
      "epoch": 0.000276483154296875,
      "step": 45299,
      "training_step_time": 0.36777234077453613
    },
    {
      "epoch": 0.0002764892578125,
      "grad_norm": 0.0925348550081253,
      "learning_rate": 1.553232954407171e-05,
      "loss": 0.0379,
      "step": 45300
    },
    {
      "epoch": 0.0002764892578125,
      "model_forward_time": 0.11504483222961426,
      "step": 45300
    },
    {
      "epoch": 0.0002764892578125,
      "step": 45300,
      "training_step_time": 0.44094324111938477
    },
    {
      "epoch": 0.000276495361328125,
      "model_forward_time": 0.11399030685424805,
      "step": 45301
    },
    {
      "epoch": 0.000276495361328125,
      "step": 45301,
      "training_step_time": 0.41358113288879395
    },
    {
      "epoch": 0.00027650146484375,
      "model_forward_time": 0.11460447311401367,
      "step": 45302
    },
    {
      "epoch": 0.00027650146484375,
      "step": 45302,
      "training_step_time": 0.3738210201263428
    },
    {
      "epoch": 0.000276507568359375,
      "model_forward_time": 0.11439180374145508,
      "step": 45303
    },
    {
      "epoch": 0.000276507568359375,
      "step": 45303,
      "training_step_time": 0.39354968070983887
    },
    {
      "epoch": 0.000276513671875,
      "model_forward_time": 0.11623382568359375,
      "step": 45304
    },
    {
      "epoch": 0.000276513671875,
      "step": 45304,
      "training_step_time": 0.7147724628448486
    },
    {
      "epoch": 0.000276519775390625,
      "model_forward_time": 0.11448812484741211,
      "step": 45305
    },
    {
      "epoch": 0.000276519775390625,
      "step": 45305,
      "training_step_time": 0.39008307456970215
    },
    {
      "epoch": 0.00027652587890625,
      "model_forward_time": 0.11448454856872559,
      "step": 45306
    },
    {
      "epoch": 0.00027652587890625,
      "step": 45306,
      "training_step_time": 0.38930416107177734
    },
    {
      "epoch": 0.000276531982421875,
      "model_forward_time": 0.1143803596496582,
      "step": 45307
    },
    {
      "epoch": 0.000276531982421875,
      "step": 45307,
      "training_step_time": 0.396395206451416
    },
    {
      "epoch": 0.0002765380859375,
      "model_forward_time": 0.11510348320007324,
      "step": 45308
    },
    {
      "epoch": 0.0002765380859375,
      "step": 45308,
      "training_step_time": 0.4011085033416748
    },
    {
      "epoch": 0.000276544189453125,
      "model_forward_time": 0.11417961120605469,
      "step": 45309
    },
    {
      "epoch": 0.000276544189453125,
      "step": 45309,
      "training_step_time": 0.4358181953430176
    },
    {
      "epoch": 0.00027655029296875,
      "grad_norm": 0.0958179458975792,
      "learning_rate": 1.5512371193411762e-05,
      "loss": 0.0405,
      "step": 45310
    },
    {
      "epoch": 0.00027655029296875,
      "model_forward_time": 0.11494874954223633,
      "step": 45310
    },
    {
      "epoch": 0.00027655029296875,
      "step": 45310,
      "training_step_time": 0.6462230682373047
    },
    {
      "epoch": 0.000276556396484375,
      "model_forward_time": 0.11474132537841797,
      "step": 45311
    },
    {
      "epoch": 0.000276556396484375,
      "step": 45311,
      "training_step_time": 0.3873720169067383
    },
    {
      "epoch": 0.0002765625,
      "model_forward_time": 0.1141805648803711,
      "step": 45312
    },
    {
      "epoch": 0.0002765625,
      "step": 45312,
      "training_step_time": 0.44260382652282715
    },
    {
      "epoch": 0.000276568603515625,
      "model_forward_time": 0.11523818969726562,
      "step": 45313
    },
    {
      "epoch": 0.000276568603515625,
      "step": 45313,
      "training_step_time": 0.4707322120666504
    },
    {
      "epoch": 0.00027657470703125,
      "model_forward_time": 0.11570262908935547,
      "step": 45314
    },
    {
      "epoch": 0.00027657470703125,
      "step": 45314,
      "training_step_time": 0.4265577793121338
    },
    {
      "epoch": 0.000276580810546875,
      "model_forward_time": 0.11440920829772949,
      "step": 45315
    },
    {
      "epoch": 0.000276580810546875,
      "step": 45315,
      "training_step_time": 0.404111385345459
    },
    {
      "epoch": 0.0002765869140625,
      "model_forward_time": 0.1151125431060791,
      "step": 45316
    },
    {
      "epoch": 0.0002765869140625,
      "step": 45316,
      "training_step_time": 0.3914601802825928
    },
    {
      "epoch": 0.000276593017578125,
      "model_forward_time": 0.1148066520690918,
      "step": 45317
    },
    {
      "epoch": 0.000276593017578125,
      "step": 45317,
      "training_step_time": 0.3856062889099121
    },
    {
      "epoch": 0.00027659912109375,
      "model_forward_time": 0.1152961254119873,
      "step": 45318
    },
    {
      "epoch": 0.00027659912109375,
      "step": 45318,
      "training_step_time": 0.4066009521484375
    },
    {
      "epoch": 0.000276605224609375,
      "model_forward_time": 0.11502313613891602,
      "step": 45319
    },
    {
      "epoch": 0.000276605224609375,
      "step": 45319,
      "training_step_time": 0.3920400142669678
    },
    {
      "epoch": 0.000276611328125,
      "grad_norm": 0.08409705013036728,
      "learning_rate": 1.549242331918285e-05,
      "loss": 0.0361,
      "step": 45320
    },
    {
      "epoch": 0.000276611328125,
      "model_forward_time": 0.11596894264221191,
      "step": 45320
    },
    {
      "epoch": 0.000276611328125,
      "step": 45320,
      "training_step_time": 0.39104676246643066
    },
    {
      "epoch": 0.000276617431640625,
      "model_forward_time": 0.11505722999572754,
      "step": 45321
    },
    {
      "epoch": 0.000276617431640625,
      "step": 45321,
      "training_step_time": 0.3863821029663086
    },
    {
      "epoch": 0.00027662353515625,
      "model_forward_time": 0.11532998085021973,
      "step": 45322
    },
    {
      "epoch": 0.00027662353515625,
      "step": 45322,
      "training_step_time": 0.4682776927947998
    },
    {
      "epoch": 0.000276629638671875,
      "model_forward_time": 0.11538910865783691,
      "step": 45323
    },
    {
      "epoch": 0.000276629638671875,
      "step": 45323,
      "training_step_time": 0.4640493392944336
    },
    {
      "epoch": 0.0002766357421875,
      "model_forward_time": 0.11493706703186035,
      "step": 45324
    },
    {
      "epoch": 0.0002766357421875,
      "step": 45324,
      "training_step_time": 0.48681640625
    },
    {
      "epoch": 0.000276641845703125,
      "model_forward_time": 0.11453080177307129,
      "step": 45325
    },
    {
      "epoch": 0.000276641845703125,
      "step": 45325,
      "training_step_time": 0.42785096168518066
    },
    {
      "epoch": 0.00027664794921875,
      "model_forward_time": 0.1148216724395752,
      "step": 45326
    },
    {
      "epoch": 0.00027664794921875,
      "step": 45326,
      "training_step_time": 0.40102291107177734
    },
    {
      "epoch": 0.000276654052734375,
      "model_forward_time": 0.11453843116760254,
      "step": 45327
    },
    {
      "epoch": 0.000276654052734375,
      "step": 45327,
      "training_step_time": 0.38018131256103516
    },
    {
      "epoch": 0.00027666015625,
      "model_forward_time": 0.11449408531188965,
      "step": 45328
    },
    {
      "epoch": 0.00027666015625,
      "step": 45328,
      "training_step_time": 0.4351365566253662
    },
    {
      "epoch": 0.000276666259765625,
      "model_forward_time": 0.11493968963623047,
      "step": 45329
    },
    {
      "epoch": 0.000276666259765625,
      "step": 45329,
      "training_step_time": 0.4097158908843994
    },
    {
      "epoch": 0.00027667236328125,
      "grad_norm": 0.09058485925197601,
      "learning_rate": 1.5472485927444597e-05,
      "loss": 0.0402,
      "step": 45330
    },
    {
      "epoch": 0.00027667236328125,
      "model_forward_time": 0.1152353286743164,
      "step": 45330
    },
    {
      "epoch": 0.00027667236328125,
      "step": 45330,
      "training_step_time": 0.3904702663421631
    },
    {
      "epoch": 0.000276678466796875,
      "model_forward_time": 0.11513280868530273,
      "step": 45331
    },
    {
      "epoch": 0.000276678466796875,
      "step": 45331,
      "training_step_time": 0.3993031978607178
    },
    {
      "epoch": 0.0002766845703125,
      "model_forward_time": 0.11493802070617676,
      "step": 45332
    },
    {
      "epoch": 0.0002766845703125,
      "step": 45332,
      "training_step_time": 0.3883962631225586
    },
    {
      "epoch": 0.000276690673828125,
      "model_forward_time": 0.11490058898925781,
      "step": 45333
    },
    {
      "epoch": 0.000276690673828125,
      "step": 45333,
      "training_step_time": 0.3898298740386963
    },
    {
      "epoch": 0.00027669677734375,
      "model_forward_time": 0.11538982391357422,
      "step": 45334
    },
    {
      "epoch": 0.00027669677734375,
      "step": 45334,
      "training_step_time": 0.46677088737487793
    },
    {
      "epoch": 0.000276702880859375,
      "model_forward_time": 0.11539554595947266,
      "step": 45335
    },
    {
      "epoch": 0.000276702880859375,
      "step": 45335,
      "training_step_time": 0.38982558250427246
    },
    {
      "epoch": 0.000276708984375,
      "model_forward_time": 0.11534476280212402,
      "step": 45336
    },
    {
      "epoch": 0.000276708984375,
      "step": 45336,
      "training_step_time": 0.3966856002807617
    },
    {
      "epoch": 0.000276715087890625,
      "model_forward_time": 0.11502838134765625,
      "step": 45337
    },
    {
      "epoch": 0.000276715087890625,
      "step": 45337,
      "training_step_time": 0.4465630054473877
    },
    {
      "epoch": 0.00027672119140625,
      "model_forward_time": 0.11511397361755371,
      "step": 45338
    },
    {
      "epoch": 0.00027672119140625,
      "step": 45338,
      "training_step_time": 0.4236783981323242
    },
    {
      "epoch": 0.000276727294921875,
      "model_forward_time": 0.11478877067565918,
      "step": 45339
    },
    {
      "epoch": 0.000276727294921875,
      "step": 45339,
      "training_step_time": 0.4770224094390869
    },
    {
      "epoch": 0.0002767333984375,
      "grad_norm": 0.08594084531068802,
      "learning_rate": 1.5452559024253487e-05,
      "loss": 0.0355,
      "step": 45340
    },
    {
      "epoch": 0.0002767333984375,
      "model_forward_time": 0.11478638648986816,
      "step": 45340
    },
    {
      "epoch": 0.0002767333984375,
      "step": 45340,
      "training_step_time": 0.4533112049102783
    },
    {
      "epoch": 0.000276739501953125,
      "model_forward_time": 0.11510729789733887,
      "step": 45341
    },
    {
      "epoch": 0.000276739501953125,
      "step": 45341,
      "training_step_time": 0.39637136459350586
    },
    {
      "epoch": 0.00027674560546875,
      "model_forward_time": 0.11559629440307617,
      "step": 45342
    },
    {
      "epoch": 0.00027674560546875,
      "step": 45342,
      "training_step_time": 0.4274253845214844
    },
    {
      "epoch": 0.000276751708984375,
      "model_forward_time": 0.11540699005126953,
      "step": 45343
    },
    {
      "epoch": 0.000276751708984375,
      "step": 45343,
      "training_step_time": 0.5302555561065674
    },
    {
      "epoch": 0.0002767578125,
      "model_forward_time": 0.11516475677490234,
      "step": 45344
    },
    {
      "epoch": 0.0002767578125,
      "step": 45344,
      "training_step_time": 0.3959381580352783
    },
    {
      "epoch": 0.000276763916015625,
      "model_forward_time": 0.1157071590423584,
      "step": 45345
    },
    {
      "epoch": 0.000276763916015625,
      "step": 45345,
      "training_step_time": 0.38817262649536133
    },
    {
      "epoch": 0.00027677001953125,
      "model_forward_time": 0.1161794662475586,
      "step": 45346
    },
    {
      "epoch": 0.00027677001953125,
      "step": 45346,
      "training_step_time": 0.42519044876098633
    },
    {
      "epoch": 0.000276776123046875,
      "model_forward_time": 0.11497092247009277,
      "step": 45347
    },
    {
      "epoch": 0.000276776123046875,
      "step": 45347,
      "training_step_time": 0.39781880378723145
    },
    {
      "epoch": 0.0002767822265625,
      "model_forward_time": 0.1144101619720459,
      "step": 45348
    },
    {
      "epoch": 0.0002767822265625,
      "step": 45348,
      "training_step_time": 0.39893269538879395
    },
    {
      "epoch": 0.000276788330078125,
      "model_forward_time": 0.11524677276611328,
      "step": 45349
    },
    {
      "epoch": 0.000276788330078125,
      "step": 45349,
      "training_step_time": 0.39256882667541504
    },
    {
      "epoch": 0.00027679443359375,
      "grad_norm": 0.08586549758911133,
      "learning_rate": 1.5432642615662774e-05,
      "loss": 0.0333,
      "step": 45350
    },
    {
      "epoch": 0.00027679443359375,
      "model_forward_time": 0.11498403549194336,
      "step": 45350
    },
    {
      "epoch": 0.00027679443359375,
      "step": 45350,
      "training_step_time": 0.3959476947784424
    },
    {
      "epoch": 0.000276800537109375,
      "model_forward_time": 0.11531686782836914,
      "step": 45351
    },
    {
      "epoch": 0.000276800537109375,
      "step": 45351,
      "training_step_time": 0.3878903388977051
    },
    {
      "epoch": 0.000276806640625,
      "model_forward_time": 0.11510753631591797,
      "step": 45352
    },
    {
      "epoch": 0.000276806640625,
      "step": 45352,
      "training_step_time": 0.5078451633453369
    },
    {
      "epoch": 0.000276812744140625,
      "model_forward_time": 0.11511778831481934,
      "step": 45353
    },
    {
      "epoch": 0.000276812744140625,
      "step": 45353,
      "training_step_time": 0.43723368644714355
    },
    {
      "epoch": 0.00027681884765625,
      "model_forward_time": 0.11508059501647949,
      "step": 45354
    },
    {
      "epoch": 0.00027681884765625,
      "step": 45354,
      "training_step_time": 0.40558862686157227
    },
    {
      "epoch": 0.000276824951171875,
      "model_forward_time": 0.11505794525146484,
      "step": 45355
    },
    {
      "epoch": 0.000276824951171875,
      "step": 45355,
      "training_step_time": 0.39546704292297363
    },
    {
      "epoch": 0.0002768310546875,
      "model_forward_time": 0.11539673805236816,
      "step": 45356
    },
    {
      "epoch": 0.0002768310546875,
      "step": 45356,
      "training_step_time": 0.39179515838623047
    },
    {
      "epoch": 0.000276837158203125,
      "model_forward_time": 0.11502528190612793,
      "step": 45357
    },
    {
      "epoch": 0.000276837158203125,
      "step": 45357,
      "training_step_time": 0.4531252384185791
    },
    {
      "epoch": 0.00027684326171875,
      "model_forward_time": 0.1154935359954834,
      "step": 45358
    },
    {
      "epoch": 0.00027684326171875,
      "step": 45358,
      "training_step_time": 0.5087344646453857
    },
    {
      "epoch": 0.000276849365234375,
      "model_forward_time": 0.11506032943725586,
      "step": 45359
    },
    {
      "epoch": 0.000276849365234375,
      "step": 45359,
      "training_step_time": 0.38439512252807617
    },
    {
      "epoch": 0.00027685546875,
      "grad_norm": 0.10421150922775269,
      "learning_rate": 1.5412736707722537e-05,
      "loss": 0.0426,
      "step": 45360
    },
    {
      "epoch": 0.00027685546875,
      "model_forward_time": 0.11547398567199707,
      "step": 45360
    },
    {
      "epoch": 0.00027685546875,
      "step": 45360,
      "training_step_time": 0.4124884605407715
    },
    {
      "epoch": 0.000276861572265625,
      "model_forward_time": 0.11458420753479004,
      "step": 45361
    },
    {
      "epoch": 0.000276861572265625,
      "step": 45361,
      "training_step_time": 0.3903481960296631
    },
    {
      "epoch": 0.00027686767578125,
      "model_forward_time": 0.1150057315826416,
      "step": 45362
    },
    {
      "epoch": 0.00027686767578125,
      "step": 45362,
      "training_step_time": 0.38593244552612305
    },
    {
      "epoch": 0.000276873779296875,
      "model_forward_time": 0.11539268493652344,
      "step": 45363
    },
    {
      "epoch": 0.000276873779296875,
      "step": 45363,
      "training_step_time": 0.39539265632629395
    },
    {
      "epoch": 0.0002768798828125,
      "model_forward_time": 0.11460137367248535,
      "step": 45364
    },
    {
      "epoch": 0.0002768798828125,
      "step": 45364,
      "training_step_time": 0.5109212398529053
    },
    {
      "epoch": 0.000276885986328125,
      "model_forward_time": 0.11503195762634277,
      "step": 45365
    },
    {
      "epoch": 0.000276885986328125,
      "step": 45365,
      "training_step_time": 0.38686513900756836
    },
    {
      "epoch": 0.00027689208984375,
      "model_forward_time": 0.11486101150512695,
      "step": 45366
    },
    {
      "epoch": 0.00027689208984375,
      "step": 45366,
      "training_step_time": 0.4039881229400635
    },
    {
      "epoch": 0.000276898193359375,
      "model_forward_time": 0.11424946784973145,
      "step": 45367
    },
    {
      "epoch": 0.000276898193359375,
      "step": 45367,
      "training_step_time": 0.4540743827819824
    },
    {
      "epoch": 0.000276904296875,
      "model_forward_time": 0.11525654792785645,
      "step": 45368
    },
    {
      "epoch": 0.000276904296875,
      "step": 45368,
      "training_step_time": 0.39357638359069824
    },
    {
      "epoch": 0.000276910400390625,
      "model_forward_time": 0.1152350902557373,
      "step": 45369
    },
    {
      "epoch": 0.000276910400390625,
      "step": 45369,
      "training_step_time": 0.3838644027709961
    },
    {
      "epoch": 0.00027691650390625,
      "grad_norm": 0.17930330336093903,
      "learning_rate": 1.5392841306479666e-05,
      "loss": 0.0404,
      "step": 45370
    },
    {
      "epoch": 0.00027691650390625,
      "model_forward_time": 0.11465072631835938,
      "step": 45370
    },
    {
      "epoch": 0.00027691650390625,
      "step": 45370,
      "training_step_time": 0.4423692226409912
    },
    {
      "epoch": 0.000276922607421875,
      "model_forward_time": 0.1146860122680664,
      "step": 45371
    },
    {
      "epoch": 0.000276922607421875,
      "step": 45371,
      "training_step_time": 0.36733365058898926
    },
    {
      "epoch": 0.0002769287109375,
      "model_forward_time": 0.1154928207397461,
      "step": 45372
    },
    {
      "epoch": 0.0002769287109375,
      "step": 45372,
      "training_step_time": 0.4581944942474365
    },
    {
      "epoch": 0.000276934814453125,
      "model_forward_time": 0.11469602584838867,
      "step": 45373
    },
    {
      "epoch": 0.000276934814453125,
      "step": 45373,
      "training_step_time": 0.446352481842041
    },
    {
      "epoch": 0.00027694091796875,
      "model_forward_time": 0.11527633666992188,
      "step": 45374
    },
    {
      "epoch": 0.00027694091796875,
      "step": 45374,
      "training_step_time": 0.3880727291107178
    },
    {
      "epoch": 0.000276947021484375,
      "model_forward_time": 0.11482024192810059,
      "step": 45375
    },
    {
      "epoch": 0.000276947021484375,
      "step": 45375,
      "training_step_time": 0.4021451473236084
    },
    {
      "epoch": 0.000276953125,
      "model_forward_time": 0.1157982349395752,
      "step": 45376
    },
    {
      "epoch": 0.000276953125,
      "step": 45376,
      "training_step_time": 0.3917245864868164
    },
    {
      "epoch": 0.000276959228515625,
      "model_forward_time": 0.11464500427246094,
      "step": 45377
    },
    {
      "epoch": 0.000276959228515625,
      "step": 45377,
      "training_step_time": 0.39338088035583496
    },
    {
      "epoch": 0.00027696533203125,
      "model_forward_time": 0.1149301528930664,
      "step": 45378
    },
    {
      "epoch": 0.00027696533203125,
      "step": 45378,
      "training_step_time": 0.38701772689819336
    },
    {
      "epoch": 0.000276971435546875,
      "model_forward_time": 0.11490821838378906,
      "step": 45379
    },
    {
      "epoch": 0.000276971435546875,
      "step": 45379,
      "training_step_time": 0.39701128005981445
    },
    {
      "epoch": 0.0002769775390625,
      "grad_norm": 0.07497429847717285,
      "learning_rate": 1.537295641797785e-05,
      "loss": 0.0385,
      "step": 45380
    },
    {
      "epoch": 0.0002769775390625,
      "model_forward_time": 0.11526155471801758,
      "step": 45380
    },
    {
      "epoch": 0.0002769775390625,
      "step": 45380,
      "training_step_time": 0.47251272201538086
    },
    {
      "epoch": 0.000276983642578125,
      "model_forward_time": 0.11583662033081055,
      "step": 45381
    },
    {
      "epoch": 0.000276983642578125,
      "step": 45381,
      "training_step_time": 0.42192578315734863
    },
    {
      "epoch": 0.00027698974609375,
      "model_forward_time": 0.11445498466491699,
      "step": 45382
    },
    {
      "epoch": 0.00027698974609375,
      "step": 45382,
      "training_step_time": 0.4908456802368164
    },
    {
      "epoch": 0.000276995849609375,
      "model_forward_time": 0.11496496200561523,
      "step": 45383
    },
    {
      "epoch": 0.000276995849609375,
      "step": 45383,
      "training_step_time": 0.39510440826416016
    },
    {
      "epoch": 0.000277001953125,
      "model_forward_time": 0.11502981185913086,
      "step": 45384
    },
    {
      "epoch": 0.000277001953125,
      "step": 45384,
      "training_step_time": 0.40106964111328125
    },
    {
      "epoch": 0.000277008056640625,
      "model_forward_time": 0.11508059501647949,
      "step": 45385
    },
    {
      "epoch": 0.000277008056640625,
      "step": 45385,
      "training_step_time": 0.3913412094116211
    },
    {
      "epoch": 0.00027701416015625,
      "model_forward_time": 0.11522197723388672,
      "step": 45386
    },
    {
      "epoch": 0.00027701416015625,
      "step": 45386,
      "training_step_time": 0.4717893600463867
    },
    {
      "epoch": 0.000277020263671875,
      "model_forward_time": 0.11549901962280273,
      "step": 45387
    },
    {
      "epoch": 0.000277020263671875,
      "step": 45387,
      "training_step_time": 0.47369909286499023
    },
    {
      "epoch": 0.0002770263671875,
      "model_forward_time": 0.11492753028869629,
      "step": 45388
    },
    {
      "epoch": 0.0002770263671875,
      "step": 45388,
      "training_step_time": 0.5432054996490479
    },
    {
      "epoch": 0.000277032470703125,
      "model_forward_time": 0.11514806747436523,
      "step": 45389
    },
    {
      "epoch": 0.000277032470703125,
      "step": 45389,
      "training_step_time": 0.3947904109954834
    },
    {
      "epoch": 0.00027703857421875,
      "grad_norm": 0.11350462585687637,
      "learning_rate": 1.5353082048257596e-05,
      "loss": 0.0402,
      "step": 45390
    },
    {
      "epoch": 0.00027703857421875,
      "model_forward_time": 0.11459064483642578,
      "step": 45390
    },
    {
      "epoch": 0.00027703857421875,
      "step": 45390,
      "training_step_time": 0.3982114791870117
    },
    {
      "epoch": 0.000277044677734375,
      "model_forward_time": 0.11467361450195312,
      "step": 45391
    },
    {
      "epoch": 0.000277044677734375,
      "step": 45391,
      "training_step_time": 0.391002893447876
    },
    {
      "epoch": 0.00027705078125,
      "model_forward_time": 0.1153414249420166,
      "step": 45392
    },
    {
      "epoch": 0.00027705078125,
      "step": 45392,
      "training_step_time": 0.39812684059143066
    },
    {
      "epoch": 0.000277056884765625,
      "model_forward_time": 0.11593484878540039,
      "step": 45393
    },
    {
      "epoch": 0.000277056884765625,
      "step": 45393,
      "training_step_time": 0.427093505859375
    },
    {
      "epoch": 0.00027706298828125,
      "model_forward_time": 0.11542344093322754,
      "step": 45394
    },
    {
      "epoch": 0.00027706298828125,
      "step": 45394,
      "training_step_time": 0.39751720428466797
    },
    {
      "epoch": 0.000277069091796875,
      "model_forward_time": 0.11496782302856445,
      "step": 45395
    },
    {
      "epoch": 0.000277069091796875,
      "step": 45395,
      "training_step_time": 0.47852563858032227
    },
    {
      "epoch": 0.0002770751953125,
      "model_forward_time": 0.11471199989318848,
      "step": 45396
    },
    {
      "epoch": 0.0002770751953125,
      "step": 45396,
      "training_step_time": 0.45593714714050293
    },
    {
      "epoch": 0.000277081298828125,
      "model_forward_time": 0.11624407768249512,
      "step": 45397
    },
    {
      "epoch": 0.000277081298828125,
      "step": 45397,
      "training_step_time": 0.4038078784942627
    },
    {
      "epoch": 0.00027708740234375,
      "model_forward_time": 0.11492681503295898,
      "step": 45398
    },
    {
      "epoch": 0.00027708740234375,
      "step": 45398,
      "training_step_time": 0.3912982940673828
    },
    {
      "epoch": 0.000277093505859375,
      "model_forward_time": 0.1149287223815918,
      "step": 45399
    },
    {
      "epoch": 0.000277093505859375,
      "step": 45399,
      "training_step_time": 0.3915119171142578
    },
    {
      "epoch": 0.000277099609375,
      "grad_norm": 0.1036829724907875,
      "learning_rate": 1.5333218203356243e-05,
      "loss": 0.0371,
      "step": 45400
    },
    {
      "epoch": 0.000277099609375,
      "model_forward_time": 0.11483907699584961,
      "step": 45400
    },
    {
      "epoch": 0.000277099609375,
      "step": 45400,
      "training_step_time": 0.37258052825927734
    },
    {
      "epoch": 0.000277105712890625,
      "model_forward_time": 0.11578488349914551,
      "step": 45401
    },
    {
      "epoch": 0.000277105712890625,
      "step": 45401,
      "training_step_time": 0.4495561122894287
    },
    {
      "epoch": 0.00027711181640625,
      "model_forward_time": 0.11543989181518555,
      "step": 45402
    },
    {
      "epoch": 0.00027711181640625,
      "step": 45402,
      "training_step_time": 0.46268582344055176
    },
    {
      "epoch": 0.000277117919921875,
      "model_forward_time": 0.11513614654541016,
      "step": 45403
    },
    {
      "epoch": 0.000277117919921875,
      "step": 45403,
      "training_step_time": 0.39942359924316406
    },
    {
      "epoch": 0.0002771240234375,
      "model_forward_time": 0.11500072479248047,
      "step": 45404
    },
    {
      "epoch": 0.0002771240234375,
      "step": 45404,
      "training_step_time": 0.406785249710083
    },
    {
      "epoch": 0.000277130126953125,
      "model_forward_time": 0.11487579345703125,
      "step": 45405
    },
    {
      "epoch": 0.000277130126953125,
      "step": 45405,
      "training_step_time": 0.391326904296875
    },
    {
      "epoch": 0.00027713623046875,
      "model_forward_time": 0.11554837226867676,
      "step": 45406
    },
    {
      "epoch": 0.00027713623046875,
      "step": 45406,
      "training_step_time": 0.49037837982177734
    },
    {
      "epoch": 0.000277142333984375,
      "model_forward_time": 0.11532855033874512,
      "step": 45407
    },
    {
      "epoch": 0.000277142333984375,
      "step": 45407,
      "training_step_time": 0.4277524948120117
    },
    {
      "epoch": 0.0002771484375,
      "model_forward_time": 0.11485457420349121,
      "step": 45408
    },
    {
      "epoch": 0.0002771484375,
      "step": 45408,
      "training_step_time": 0.3911595344543457
    },
    {
      "epoch": 0.000277154541015625,
      "model_forward_time": 0.11603784561157227,
      "step": 45409
    },
    {
      "epoch": 0.000277154541015625,
      "step": 45409,
      "training_step_time": 0.41622233390808105
    },
    {
      "epoch": 0.00027716064453125,
      "grad_norm": 0.14297211170196533,
      "learning_rate": 1.531336488930784e-05,
      "loss": 0.0388,
      "step": 45410
    },
    {
      "epoch": 0.00027716064453125,
      "model_forward_time": 0.11578130722045898,
      "step": 45410
    },
    {
      "epoch": 0.00027716064453125,
      "step": 45410,
      "training_step_time": 0.4078028202056885
    },
    {
      "epoch": 0.000277166748046875,
      "model_forward_time": 0.11459898948669434,
      "step": 45411
    },
    {
      "epoch": 0.000277166748046875,
      "step": 45411,
      "training_step_time": 0.44026637077331543
    },
    {
      "epoch": 0.0002771728515625,
      "model_forward_time": 0.11508369445800781,
      "step": 45412
    },
    {
      "epoch": 0.0002771728515625,
      "step": 45412,
      "training_step_time": 0.47396206855773926
    },
    {
      "epoch": 0.000277178955078125,
      "model_forward_time": 0.11475181579589844,
      "step": 45413
    },
    {
      "epoch": 0.000277178955078125,
      "step": 45413,
      "training_step_time": 0.39112067222595215
    },
    {
      "epoch": 0.00027718505859375,
      "model_forward_time": 0.11522984504699707,
      "step": 45414
    },
    {
      "epoch": 0.00027718505859375,
      "step": 45414,
      "training_step_time": 0.3914468288421631
    },
    {
      "epoch": 0.000277191162109375,
      "model_forward_time": 0.11532711982727051,
      "step": 45415
    },
    {
      "epoch": 0.000277191162109375,
      "step": 45415,
      "training_step_time": 0.4642806053161621
    },
    {
      "epoch": 0.000277197265625,
      "model_forward_time": 0.11578917503356934,
      "step": 45416
    },
    {
      "epoch": 0.000277197265625,
      "step": 45416,
      "training_step_time": 0.49054837226867676
    },
    {
      "epoch": 0.000277203369140625,
      "model_forward_time": 0.11504387855529785,
      "step": 45417
    },
    {
      "epoch": 0.000277203369140625,
      "step": 45417,
      "training_step_time": 0.37762975692749023
    },
    {
      "epoch": 0.00027720947265625,
      "model_forward_time": 0.11613774299621582,
      "step": 45418
    },
    {
      "epoch": 0.00027720947265625,
      "step": 45418,
      "training_step_time": 0.55637526512146
    },
    {
      "epoch": 0.000277215576171875,
      "model_forward_time": 0.11409568786621094,
      "step": 45419
    },
    {
      "epoch": 0.000277215576171875,
      "step": 45419,
      "training_step_time": 0.3764193058013916
    },
    {
      "epoch": 0.0002772216796875,
      "grad_norm": 0.1760345846414566,
      "learning_rate": 1.5293522112143373e-05,
      "loss": 0.0396,
      "step": 45420
    },
    {
      "epoch": 0.0002772216796875,
      "model_forward_time": 0.11442065238952637,
      "step": 45420
    },
    {
      "epoch": 0.0002772216796875,
      "step": 45420,
      "training_step_time": 0.386122465133667
    },
    {
      "epoch": 0.000277227783203125,
      "model_forward_time": 0.11469769477844238,
      "step": 45421
    },
    {
      "epoch": 0.000277227783203125,
      "step": 45421,
      "training_step_time": 0.4221179485321045
    },
    {
      "epoch": 0.00027723388671875,
      "model_forward_time": 0.11472153663635254,
      "step": 45422
    },
    {
      "epoch": 0.00027723388671875,
      "step": 45422,
      "training_step_time": 0.3935282230377197
    },
    {
      "epoch": 0.000277239990234375,
      "model_forward_time": 0.11484909057617188,
      "step": 45423
    },
    {
      "epoch": 0.000277239990234375,
      "step": 45423,
      "training_step_time": 0.436190128326416
    },
    {
      "epoch": 0.00027724609375,
      "model_forward_time": 0.11483883857727051,
      "step": 45424
    },
    {
      "epoch": 0.00027724609375,
      "step": 45424,
      "training_step_time": 0.5772404670715332
    },
    {
      "epoch": 0.000277252197265625,
      "model_forward_time": 0.11475706100463867,
      "step": 45425
    },
    {
      "epoch": 0.000277252197265625,
      "step": 45425,
      "training_step_time": 0.38837742805480957
    },
    {
      "epoch": 0.00027725830078125,
      "model_forward_time": 0.1152658462524414,
      "step": 45426
    },
    {
      "epoch": 0.00027725830078125,
      "step": 45426,
      "training_step_time": 0.3904449939727783
    },
    {
      "epoch": 0.000277264404296875,
      "model_forward_time": 0.11474466323852539,
      "step": 45427
    },
    {
      "epoch": 0.000277264404296875,
      "step": 45427,
      "training_step_time": 0.3877749443054199
    },
    {
      "epoch": 0.0002772705078125,
      "model_forward_time": 0.11507081985473633,
      "step": 45428
    },
    {
      "epoch": 0.0002772705078125,
      "step": 45428,
      "training_step_time": 0.39487290382385254
    },
    {
      "epoch": 0.000277276611328125,
      "model_forward_time": 0.11505627632141113,
      "step": 45429
    },
    {
      "epoch": 0.000277276611328125,
      "step": 45429,
      "training_step_time": 0.41979384422302246
    },
    {
      "epoch": 0.00027728271484375,
      "grad_norm": 0.1187586858868599,
      "learning_rate": 1.5273689877890485e-05,
      "loss": 0.0372,
      "step": 45430
    },
    {
      "epoch": 0.00027728271484375,
      "model_forward_time": 0.11522245407104492,
      "step": 45430
    },
    {
      "epoch": 0.00027728271484375,
      "step": 45430,
      "training_step_time": 0.9791848659515381
    },
    {
      "epoch": 0.000277288818359375,
      "model_forward_time": 0.11373043060302734,
      "step": 45431
    },
    {
      "epoch": 0.000277288818359375,
      "step": 45431,
      "training_step_time": 0.3794381618499756
    },
    {
      "epoch": 0.000277294921875,
      "model_forward_time": 0.11509227752685547,
      "step": 45432
    },
    {
      "epoch": 0.000277294921875,
      "step": 45432,
      "training_step_time": 0.3779103755950928
    },
    {
      "epoch": 0.000277301025390625,
      "model_forward_time": 0.11442947387695312,
      "step": 45433
    },
    {
      "epoch": 0.000277301025390625,
      "step": 45433,
      "training_step_time": 0.4201960563659668
    },
    {
      "epoch": 0.00027730712890625,
      "model_forward_time": 0.11478996276855469,
      "step": 45434
    },
    {
      "epoch": 0.00027730712890625,
      "step": 45434,
      "training_step_time": 0.3820972442626953
    },
    {
      "epoch": 0.000277313232421875,
      "model_forward_time": 0.11509585380554199,
      "step": 45435
    },
    {
      "epoch": 0.000277313232421875,
      "step": 45435,
      "training_step_time": 0.38671016693115234
    },
    {
      "epoch": 0.0002773193359375,
      "model_forward_time": 0.1149435043334961,
      "step": 45436
    },
    {
      "epoch": 0.0002773193359375,
      "step": 45436,
      "training_step_time": 0.7005767822265625
    },
    {
      "epoch": 0.000277325439453125,
      "model_forward_time": 0.11424899101257324,
      "step": 45437
    },
    {
      "epoch": 0.000277325439453125,
      "step": 45437,
      "training_step_time": 0.40767788887023926
    },
    {
      "epoch": 0.00027733154296875,
      "model_forward_time": 0.11533546447753906,
      "step": 45438
    },
    {
      "epoch": 0.00027733154296875,
      "step": 45438,
      "training_step_time": 0.39783644676208496
    },
    {
      "epoch": 0.000277337646484375,
      "model_forward_time": 0.11461591720581055,
      "step": 45439
    },
    {
      "epoch": 0.000277337646484375,
      "step": 45439,
      "training_step_time": 0.38718676567077637
    },
    {
      "epoch": 0.00027734375,
      "grad_norm": 0.08965804427862167,
      "learning_rate": 1.5253868192573729e-05,
      "loss": 0.0377,
      "step": 45440
    },
    {
      "epoch": 0.00027734375,
      "model_forward_time": 0.11501550674438477,
      "step": 45440
    },
    {
      "epoch": 0.00027734375,
      "step": 45440,
      "training_step_time": 0.39403510093688965
    },
    {
      "epoch": 0.000277349853515625,
      "model_forward_time": 0.11466550827026367,
      "step": 45441
    },
    {
      "epoch": 0.000277349853515625,
      "step": 45441,
      "training_step_time": 0.3911721706390381
    },
    {
      "epoch": 0.00027735595703125,
      "model_forward_time": 0.1155693531036377,
      "step": 45442
    },
    {
      "epoch": 0.00027735595703125,
      "step": 45442,
      "training_step_time": 1.0376789569854736
    },
    {
      "epoch": 0.000277362060546875,
      "model_forward_time": 0.11521792411804199,
      "step": 45443
    },
    {
      "epoch": 0.000277362060546875,
      "step": 45443,
      "training_step_time": 0.4297823905944824
    },
    {
      "epoch": 0.0002773681640625,
      "model_forward_time": 0.11472439765930176,
      "step": 45444
    },
    {
      "epoch": 0.0002773681640625,
      "step": 45444,
      "training_step_time": 0.37181997299194336
    },
    {
      "epoch": 0.000277374267578125,
      "model_forward_time": 0.11411714553833008,
      "step": 45445
    },
    {
      "epoch": 0.000277374267578125,
      "step": 45445,
      "training_step_time": 0.42545008659362793
    },
    {
      "epoch": 0.00027738037109375,
      "model_forward_time": 0.11424469947814941,
      "step": 45446
    },
    {
      "epoch": 0.00027738037109375,
      "step": 45446,
      "training_step_time": 0.4012002944946289
    },
    {
      "epoch": 0.000277386474609375,
      "model_forward_time": 0.1137387752532959,
      "step": 45447
    },
    {
      "epoch": 0.000277386474609375,
      "step": 45447,
      "training_step_time": 0.37724995613098145
    },
    {
      "epoch": 0.000277392578125,
      "model_forward_time": 0.11436820030212402,
      "step": 45448
    },
    {
      "epoch": 0.000277392578125,
      "step": 45448,
      "training_step_time": 0.7527651786804199
    },
    {
      "epoch": 0.000277398681640625,
      "model_forward_time": 0.11459708213806152,
      "step": 45449
    },
    {
      "epoch": 0.000277398681640625,
      "step": 45449,
      "training_step_time": 0.49309635162353516
    },
    {
      "epoch": 0.00027740478515625,
      "grad_norm": 0.13312587141990662,
      "learning_rate": 1.5234057062214402e-05,
      "loss": 0.0365,
      "step": 45450
    },
    {
      "epoch": 0.00027740478515625,
      "model_forward_time": 0.11420226097106934,
      "step": 45450
    },
    {
      "epoch": 0.00027740478515625,
      "step": 45450,
      "training_step_time": 0.38312602043151855
    },
    {
      "epoch": 0.000277410888671875,
      "model_forward_time": 0.11421751976013184,
      "step": 45451
    },
    {
      "epoch": 0.000277410888671875,
      "step": 45451,
      "training_step_time": 0.46315526962280273
    },
    {
      "epoch": 0.0002774169921875,
      "model_forward_time": 0.11442255973815918,
      "step": 45452
    },
    {
      "epoch": 0.0002774169921875,
      "step": 45452,
      "training_step_time": 0.3820788860321045
    },
    {
      "epoch": 0.000277423095703125,
      "model_forward_time": 0.11464929580688477,
      "step": 45453
    },
    {
      "epoch": 0.000277423095703125,
      "step": 45453,
      "training_step_time": 0.3989400863647461
    },
    {
      "epoch": 0.00027742919921875,
      "model_forward_time": 0.11482787132263184,
      "step": 45454
    },
    {
      "epoch": 0.00027742919921875,
      "step": 45454,
      "training_step_time": 0.7667908668518066
    },
    {
      "epoch": 0.000277435302734375,
      "model_forward_time": 0.11406826972961426,
      "step": 45455
    },
    {
      "epoch": 0.000277435302734375,
      "step": 45455,
      "training_step_time": 0.427065372467041
    },
    {
      "epoch": 0.00027744140625,
      "model_forward_time": 0.11554837226867676,
      "step": 45456
    },
    {
      "epoch": 0.00027744140625,
      "step": 45456,
      "training_step_time": 0.45502686500549316
    },
    {
      "epoch": 0.000277447509765625,
      "model_forward_time": 0.11558866500854492,
      "step": 45457
    },
    {
      "epoch": 0.000277447509765625,
      "step": 45457,
      "training_step_time": 0.4269533157348633
    },
    {
      "epoch": 0.00027745361328125,
      "model_forward_time": 0.11436867713928223,
      "step": 45458
    },
    {
      "epoch": 0.00027745361328125,
      "step": 45458,
      "training_step_time": 0.4001941680908203
    },
    {
      "epoch": 0.000277459716796875,
      "model_forward_time": 0.11394166946411133,
      "step": 45459
    },
    {
      "epoch": 0.000277459716796875,
      "step": 45459,
      "training_step_time": 0.3849151134490967
    },
    {
      "epoch": 0.0002774658203125,
      "grad_norm": 0.1150214746594429,
      "learning_rate": 1.5214256492830598e-05,
      "loss": 0.0321,
      "step": 45460
    },
    {
      "epoch": 0.0002774658203125,
      "model_forward_time": 0.11463379859924316,
      "step": 45460
    },
    {
      "epoch": 0.0002774658203125,
      "step": 45460,
      "training_step_time": 1.0650789737701416
    },
    {
      "epoch": 0.000277471923828125,
      "model_forward_time": 0.11413979530334473,
      "step": 45461
    },
    {
      "epoch": 0.000277471923828125,
      "step": 45461,
      "training_step_time": 0.38399767875671387
    },
    {
      "epoch": 0.00027747802734375,
      "model_forward_time": 0.11394119262695312,
      "step": 45462
    },
    {
      "epoch": 0.00027747802734375,
      "step": 45462,
      "training_step_time": 0.447587251663208
    },
    {
      "epoch": 0.000277484130859375,
      "model_forward_time": 0.11433076858520508,
      "step": 45463
    },
    {
      "epoch": 0.000277484130859375,
      "step": 45463,
      "training_step_time": 0.4048655033111572
    },
    {
      "epoch": 0.000277490234375,
      "model_forward_time": 0.11422324180603027,
      "step": 45464
    },
    {
      "epoch": 0.000277490234375,
      "step": 45464,
      "training_step_time": 0.37810707092285156
    },
    {
      "epoch": 0.000277496337890625,
      "model_forward_time": 0.11433672904968262,
      "step": 45465
    },
    {
      "epoch": 0.000277496337890625,
      "step": 45465,
      "training_step_time": 0.3871166706085205
    },
    {
      "epoch": 0.00027750244140625,
      "model_forward_time": 0.11473536491394043,
      "step": 45466
    },
    {
      "epoch": 0.00027750244140625,
      "step": 45466,
      "training_step_time": 0.6610970497131348
    },
    {
      "epoch": 0.000277508544921875,
      "model_forward_time": 0.11464953422546387,
      "step": 45467
    },
    {
      "epoch": 0.000277508544921875,
      "step": 45467,
      "training_step_time": 0.4062469005584717
    },
    {
      "epoch": 0.0002775146484375,
      "model_forward_time": 0.11484146118164062,
      "step": 45468
    },
    {
      "epoch": 0.0002775146484375,
      "step": 45468,
      "training_step_time": 0.47624778747558594
    },
    {
      "epoch": 0.000277520751953125,
      "model_forward_time": 0.11498427391052246,
      "step": 45469
    },
    {
      "epoch": 0.000277520751953125,
      "step": 45469,
      "training_step_time": 0.41804027557373047
    },
    {
      "epoch": 0.00027752685546875,
      "grad_norm": 0.11299300193786621,
      "learning_rate": 1.5194466490437203e-05,
      "loss": 0.0356,
      "step": 45470
    },
    {
      "epoch": 0.00027752685546875,
      "model_forward_time": 0.11474299430847168,
      "step": 45470
    },
    {
      "epoch": 0.00027752685546875,
      "step": 45470,
      "training_step_time": 0.43995070457458496
    },
    {
      "epoch": 0.000277532958984375,
      "model_forward_time": 0.11447978019714355,
      "step": 45471
    },
    {
      "epoch": 0.000277532958984375,
      "step": 45471,
      "training_step_time": 0.3833045959472656
    },
    {
      "epoch": 0.0002775390625,
      "model_forward_time": 0.11491799354553223,
      "step": 45472
    },
    {
      "epoch": 0.0002775390625,
      "step": 45472,
      "training_step_time": 0.8142733573913574
    },
    {
      "epoch": 0.000277545166015625,
      "model_forward_time": 0.11445832252502441,
      "step": 45473
    },
    {
      "epoch": 0.000277545166015625,
      "step": 45473,
      "training_step_time": 0.3866913318634033
    },
    {
      "epoch": 0.00027755126953125,
      "model_forward_time": 0.1142425537109375,
      "step": 45474
    },
    {
      "epoch": 0.00027755126953125,
      "step": 45474,
      "training_step_time": 0.4109926223754883
    },
    {
      "epoch": 0.000277557373046875,
      "model_forward_time": 0.11450839042663574,
      "step": 45475
    },
    {
      "epoch": 0.000277557373046875,
      "step": 45475,
      "training_step_time": 0.4087986946105957
    },
    {
      "epoch": 0.0002775634765625,
      "model_forward_time": 0.11463689804077148,
      "step": 45476
    },
    {
      "epoch": 0.0002775634765625,
      "step": 45476,
      "training_step_time": 0.3820981979370117
    },
    {
      "epoch": 0.000277569580078125,
      "model_forward_time": 0.11424636840820312,
      "step": 45477
    },
    {
      "epoch": 0.000277569580078125,
      "step": 45477,
      "training_step_time": 0.37917041778564453
    },
    {
      "epoch": 0.00027757568359375,
      "model_forward_time": 0.11494708061218262,
      "step": 45478
    },
    {
      "epoch": 0.00027757568359375,
      "step": 45478,
      "training_step_time": 0.4315659999847412
    },
    {
      "epoch": 0.000277581787109375,
      "model_forward_time": 0.1148228645324707,
      "step": 45479
    },
    {
      "epoch": 0.000277581787109375,
      "step": 45479,
      "training_step_time": 0.39004969596862793
    },
    {
      "epoch": 0.000277587890625,
      "grad_norm": 0.09682901948690414,
      "learning_rate": 1.517468706104589e-05,
      "loss": 0.0388,
      "step": 45480
    },
    {
      "epoch": 0.000277587890625,
      "model_forward_time": 0.11478996276855469,
      "step": 45480
    },
    {
      "epoch": 0.000277587890625,
      "step": 45480,
      "training_step_time": 0.39588379859924316
    },
    {
      "epoch": 0.000277593994140625,
      "model_forward_time": 0.11570096015930176,
      "step": 45481
    },
    {
      "epoch": 0.000277593994140625,
      "step": 45481,
      "training_step_time": 0.38445496559143066
    },
    {
      "epoch": 0.00027760009765625,
      "model_forward_time": 0.11587762832641602,
      "step": 45482
    },
    {
      "epoch": 0.00027760009765625,
      "step": 45482,
      "training_step_time": 0.452944278717041
    },
    {
      "epoch": 0.000277606201171875,
      "model_forward_time": 0.11587262153625488,
      "step": 45483
    },
    {
      "epoch": 0.000277606201171875,
      "step": 45483,
      "training_step_time": 0.49808359146118164
    },
    {
      "epoch": 0.0002776123046875,
      "model_forward_time": 0.11524581909179688,
      "step": 45484
    },
    {
      "epoch": 0.0002776123046875,
      "step": 45484,
      "training_step_time": 0.5188500881195068
    },
    {
      "epoch": 0.000277618408203125,
      "model_forward_time": 0.11479854583740234,
      "step": 45485
    },
    {
      "epoch": 0.000277618408203125,
      "step": 45485,
      "training_step_time": 0.39447951316833496
    },
    {
      "epoch": 0.00027762451171875,
      "model_forward_time": 0.11453104019165039,
      "step": 45486
    },
    {
      "epoch": 0.00027762451171875,
      "step": 45486,
      "training_step_time": 0.3870081901550293
    },
    {
      "epoch": 0.000277630615234375,
      "model_forward_time": 0.11453485488891602,
      "step": 45487
    },
    {
      "epoch": 0.000277630615234375,
      "step": 45487,
      "training_step_time": 0.3964557647705078
    },
    {
      "epoch": 0.00027763671875,
      "model_forward_time": 0.11498379707336426,
      "step": 45488
    },
    {
      "epoch": 0.00027763671875,
      "step": 45488,
      "training_step_time": 0.3896188735961914
    },
    {
      "epoch": 0.000277642822265625,
      "model_forward_time": 0.1152491569519043,
      "step": 45489
    },
    {
      "epoch": 0.000277642822265625,
      "step": 45489,
      "training_step_time": 0.3954460620880127
    },
    {
      "epoch": 0.00027764892578125,
      "grad_norm": 0.10123848170042038,
      "learning_rate": 1.5154918210665148e-05,
      "loss": 0.0364,
      "step": 45490
    },
    {
      "epoch": 0.00027764892578125,
      "model_forward_time": 0.11486196517944336,
      "step": 45490
    },
    {
      "epoch": 0.00027764892578125,
      "step": 45490,
      "training_step_time": 0.6519918441772461
    },
    {
      "epoch": 0.000277655029296875,
      "model_forward_time": 0.11492013931274414,
      "step": 45491
    },
    {
      "epoch": 0.000277655029296875,
      "step": 45491,
      "training_step_time": 0.38822197914123535
    },
    {
      "epoch": 0.0002776611328125,
      "model_forward_time": 0.11461091041564941,
      "step": 45492
    },
    {
      "epoch": 0.0002776611328125,
      "step": 45492,
      "training_step_time": 0.3919672966003418
    },
    {
      "epoch": 0.000277667236328125,
      "model_forward_time": 0.1146547794342041,
      "step": 45493
    },
    {
      "epoch": 0.000277667236328125,
      "step": 45493,
      "training_step_time": 0.42183423042297363
    },
    {
      "epoch": 0.00027767333984375,
      "model_forward_time": 0.11530375480651855,
      "step": 45494
    },
    {
      "epoch": 0.00027767333984375,
      "step": 45494,
      "training_step_time": 0.3907766342163086
    },
    {
      "epoch": 0.000277679443359375,
      "model_forward_time": 0.11532831192016602,
      "step": 45495
    },
    {
      "epoch": 0.000277679443359375,
      "step": 45495,
      "training_step_time": 0.4560563564300537
    },
    {
      "epoch": 0.000277685546875,
      "model_forward_time": 0.11490917205810547,
      "step": 45496
    },
    {
      "epoch": 0.000277685546875,
      "step": 45496,
      "training_step_time": 0.5623812675476074
    },
    {
      "epoch": 0.000277691650390625,
      "model_forward_time": 0.11550498008728027,
      "step": 45497
    },
    {
      "epoch": 0.000277691650390625,
      "step": 45497,
      "training_step_time": 0.42317867279052734
    },
    {
      "epoch": 0.00027769775390625,
      "model_forward_time": 0.11479687690734863,
      "step": 45498
    },
    {
      "epoch": 0.00027769775390625,
      "step": 45498,
      "training_step_time": 0.3983452320098877
    },
    {
      "epoch": 0.000277703857421875,
      "model_forward_time": 0.11448097229003906,
      "step": 45499
    },
    {
      "epoch": 0.000277703857421875,
      "step": 45499,
      "training_step_time": 0.40015602111816406
    },
    {
      "epoch": 0.0002777099609375,
      "grad_norm": 0.14406134188175201,
      "learning_rate": 1.5135159945300231e-05,
      "loss": 0.037,
      "step": 45500
    },
    {
      "epoch": 0.0002777099609375,
      "model_forward_time": 0.1159675121307373,
      "step": 45500
    },
    {
      "epoch": 0.0002777099609375,
      "step": 45500,
      "training_step_time": 0.39225077629089355
    },
    {
      "epoch": 0.000277716064453125,
      "model_forward_time": 0.1150655746459961,
      "step": 45501
    },
    {
      "epoch": 0.000277716064453125,
      "step": 45501,
      "training_step_time": 0.3981804847717285
    },
    {
      "epoch": 0.00027772216796875,
      "model_forward_time": 0.11577200889587402,
      "step": 45502
    },
    {
      "epoch": 0.00027772216796875,
      "step": 45502,
      "training_step_time": 0.442659854888916
    },
    {
      "epoch": 0.000277728271484375,
      "model_forward_time": 0.11563706398010254,
      "step": 45503
    },
    {
      "epoch": 0.000277728271484375,
      "step": 45503,
      "training_step_time": 0.4734992980957031
    },
    {
      "epoch": 0.000277734375,
      "model_forward_time": 0.11547374725341797,
      "step": 45504
    },
    {
      "epoch": 0.000277734375,
      "step": 45504,
      "training_step_time": 0.3938899040222168
    },
    {
      "epoch": 0.000277740478515625,
      "model_forward_time": 0.1154177188873291,
      "step": 45505
    },
    {
      "epoch": 0.000277740478515625,
      "step": 45505,
      "training_step_time": 0.39130640029907227
    },
    {
      "epoch": 0.00027774658203125,
      "model_forward_time": 0.1148064136505127,
      "step": 45506
    },
    {
      "epoch": 0.00027774658203125,
      "step": 45506,
      "training_step_time": 0.3902134895324707
    },
    {
      "epoch": 0.000277752685546875,
      "model_forward_time": 0.1155400276184082,
      "step": 45507
    },
    {
      "epoch": 0.000277752685546875,
      "step": 45507,
      "training_step_time": 0.3847188949584961
    },
    {
      "epoch": 0.0002777587890625,
      "model_forward_time": 0.11470246315002441,
      "step": 45508
    },
    {
      "epoch": 0.0002777587890625,
      "step": 45508,
      "training_step_time": 1.0133724212646484
    },
    {
      "epoch": 0.000277764892578125,
      "model_forward_time": 0.11474013328552246,
      "step": 45509
    },
    {
      "epoch": 0.000277764892578125,
      "step": 45509,
      "training_step_time": 0.3619389533996582
    },
    {
      "epoch": 0.00027777099609375,
      "grad_norm": 0.09553897380828857,
      "learning_rate": 1.5115412270953167e-05,
      "loss": 0.0399,
      "step": 45510
    },
    {
      "epoch": 0.00027777099609375,
      "model_forward_time": 0.11399245262145996,
      "step": 45510
    },
    {
      "epoch": 0.00027777099609375,
      "step": 45510,
      "training_step_time": 0.42173290252685547
    },
    {
      "epoch": 0.000277777099609375,
      "model_forward_time": 0.11416363716125488,
      "step": 45511
    },
    {
      "epoch": 0.000277777099609375,
      "step": 45511,
      "training_step_time": 0.39394640922546387
    },
    {
      "epoch": 0.000277783203125,
      "model_forward_time": 0.11383962631225586,
      "step": 45512
    },
    {
      "epoch": 0.000277783203125,
      "step": 45512,
      "training_step_time": 0.38704824447631836
    },
    {
      "epoch": 0.000277789306640625,
      "model_forward_time": 0.11382341384887695,
      "step": 45513
    },
    {
      "epoch": 0.000277789306640625,
      "step": 45513,
      "training_step_time": 0.38056421279907227
    },
    {
      "epoch": 0.00027779541015625,
      "model_forward_time": 0.1152348518371582,
      "step": 45514
    },
    {
      "epoch": 0.00027779541015625,
      "step": 45514,
      "training_step_time": 0.5483295917510986
    },
    {
      "epoch": 0.000277801513671875,
      "model_forward_time": 0.11482644081115723,
      "step": 45515
    },
    {
      "epoch": 0.000277801513671875,
      "step": 45515,
      "training_step_time": 0.4030733108520508
    },
    {
      "epoch": 0.0002778076171875,
      "model_forward_time": 0.11500740051269531,
      "step": 45516
    },
    {
      "epoch": 0.0002778076171875,
      "step": 45516,
      "training_step_time": 0.4764087200164795
    },
    {
      "epoch": 0.000277813720703125,
      "model_forward_time": 0.11455464363098145,
      "step": 45517
    },
    {
      "epoch": 0.000277813720703125,
      "step": 45517,
      "training_step_time": 0.3926868438720703
    },
    {
      "epoch": 0.00027781982421875,
      "model_forward_time": 0.11529254913330078,
      "step": 45518
    },
    {
      "epoch": 0.00027781982421875,
      "step": 45518,
      "training_step_time": 0.3878970146179199
    },
    {
      "epoch": 0.000277825927734375,
      "model_forward_time": 0.11475419998168945,
      "step": 45519
    },
    {
      "epoch": 0.000277825927734375,
      "step": 45519,
      "training_step_time": 0.3865783214569092
    },
    {
      "epoch": 0.00027783203125,
      "grad_norm": 0.12439306080341339,
      "learning_rate": 1.5095675193622777e-05,
      "loss": 0.0349,
      "step": 45520
    },
    {
      "epoch": 0.00027783203125,
      "model_forward_time": 0.1146399974822998,
      "step": 45520
    },
    {
      "epoch": 0.00027783203125,
      "step": 45520,
      "training_step_time": 0.7806382179260254
    },
    {
      "epoch": 0.000277838134765625,
      "model_forward_time": 0.11409950256347656,
      "step": 45521
    },
    {
      "epoch": 0.000277838134765625,
      "step": 45521,
      "training_step_time": 0.4307699203491211
    },
    {
      "epoch": 0.00027784423828125,
      "model_forward_time": 0.11436200141906738,
      "step": 45522
    },
    {
      "epoch": 0.00027784423828125,
      "step": 45522,
      "training_step_time": 0.4284532070159912
    },
    {
      "epoch": 0.000277850341796875,
      "model_forward_time": 0.11458826065063477,
      "step": 45523
    },
    {
      "epoch": 0.000277850341796875,
      "step": 45523,
      "training_step_time": 0.43987464904785156
    },
    {
      "epoch": 0.0002778564453125,
      "model_forward_time": 0.11504673957824707,
      "step": 45524
    },
    {
      "epoch": 0.0002778564453125,
      "step": 45524,
      "training_step_time": 0.44530320167541504
    },
    {
      "epoch": 0.000277862548828125,
      "model_forward_time": 0.11466717720031738,
      "step": 45525
    },
    {
      "epoch": 0.000277862548828125,
      "step": 45525,
      "training_step_time": 0.38955092430114746
    },
    {
      "epoch": 0.00027786865234375,
      "model_forward_time": 0.11466789245605469,
      "step": 45526
    },
    {
      "epoch": 0.00027786865234375,
      "step": 45526,
      "training_step_time": 0.47034239768981934
    },
    {
      "epoch": 0.000277874755859375,
      "model_forward_time": 0.11460542678833008,
      "step": 45527
    },
    {
      "epoch": 0.000277874755859375,
      "step": 45527,
      "training_step_time": 0.39417052268981934
    },
    {
      "epoch": 0.000277880859375,
      "model_forward_time": 0.11562991142272949,
      "step": 45528
    },
    {
      "epoch": 0.000277880859375,
      "step": 45528,
      "training_step_time": 0.39617061614990234
    },
    {
      "epoch": 0.000277886962890625,
      "model_forward_time": 0.11464333534240723,
      "step": 45529
    },
    {
      "epoch": 0.000277886962890625,
      "step": 45529,
      "training_step_time": 0.4033362865447998
    },
    {
      "epoch": 0.00027789306640625,
      "grad_norm": 0.0983094796538353,
      "learning_rate": 1.5075948719304672e-05,
      "loss": 0.0369,
      "step": 45530
    },
    {
      "epoch": 0.00027789306640625,
      "model_forward_time": 0.11479306221008301,
      "step": 45530
    },
    {
      "epoch": 0.00027789306640625,
      "step": 45530,
      "training_step_time": 0.44902634620666504
    },
    {
      "epoch": 0.000277899169921875,
      "model_forward_time": 0.11466693878173828,
      "step": 45531
    },
    {
      "epoch": 0.000277899169921875,
      "step": 45531,
      "training_step_time": 0.3844006061553955
    },
    {
      "epoch": 0.0002779052734375,
      "model_forward_time": 0.11547422409057617,
      "step": 45532
    },
    {
      "epoch": 0.0002779052734375,
      "step": 45532,
      "training_step_time": 0.6957447528839111
    },
    {
      "epoch": 0.000277911376953125,
      "model_forward_time": 0.11402058601379395,
      "step": 45533
    },
    {
      "epoch": 0.000277911376953125,
      "step": 45533,
      "training_step_time": 0.3912498950958252
    },
    {
      "epoch": 0.00027791748046875,
      "model_forward_time": 0.11453700065612793,
      "step": 45534
    },
    {
      "epoch": 0.00027791748046875,
      "step": 45534,
      "training_step_time": 0.386293888092041
    },
    {
      "epoch": 0.000277923583984375,
      "model_forward_time": 0.11514401435852051,
      "step": 45535
    },
    {
      "epoch": 0.000277923583984375,
      "step": 45535,
      "training_step_time": 0.41260719299316406
    },
    {
      "epoch": 0.0002779296875,
      "model_forward_time": 0.11476254463195801,
      "step": 45536
    },
    {
      "epoch": 0.0002779296875,
      "step": 45536,
      "training_step_time": 0.41748690605163574
    },
    {
      "epoch": 0.000277935791015625,
      "model_forward_time": 0.11489081382751465,
      "step": 45537
    },
    {
      "epoch": 0.000277935791015625,
      "step": 45537,
      "training_step_time": 0.3644130229949951
    },
    {
      "epoch": 0.00027794189453125,
      "model_forward_time": 0.11493849754333496,
      "step": 45538
    },
    {
      "epoch": 0.00027794189453125,
      "step": 45538,
      "training_step_time": 0.982231616973877
    },
    {
      "epoch": 0.000277947998046875,
      "model_forward_time": 0.11413049697875977,
      "step": 45539
    },
    {
      "epoch": 0.000277947998046875,
      "step": 45539,
      "training_step_time": 0.3735239505767822
    },
    {
      "epoch": 0.0002779541015625,
      "grad_norm": 0.10500549525022507,
      "learning_rate": 1.5056232853991209e-05,
      "loss": 0.0358,
      "step": 45540
    },
    {
      "epoch": 0.0002779541015625,
      "model_forward_time": 0.1137700080871582,
      "step": 45540
    },
    {
      "epoch": 0.0002779541015625,
      "step": 45540,
      "training_step_time": 0.3840906620025635
    },
    {
      "epoch": 0.000277960205078125,
      "model_forward_time": 0.11393475532531738,
      "step": 45541
    },
    {
      "epoch": 0.000277960205078125,
      "step": 45541,
      "training_step_time": 0.38930249214172363
    },
    {
      "epoch": 0.00027796630859375,
      "model_forward_time": 0.11431193351745605,
      "step": 45542
    },
    {
      "epoch": 0.00027796630859375,
      "step": 45542,
      "training_step_time": 0.398181676864624
    },
    {
      "epoch": 0.000277972412109375,
      "model_forward_time": 0.11412954330444336,
      "step": 45543
    },
    {
      "epoch": 0.000277972412109375,
      "step": 45543,
      "training_step_time": 0.5147488117218018
    },
    {
      "epoch": 0.000277978515625,
      "model_forward_time": 0.11560630798339844,
      "step": 45544
    },
    {
      "epoch": 0.000277978515625,
      "step": 45544,
      "training_step_time": 0.7723362445831299
    },
    {
      "epoch": 0.000277984619140625,
      "model_forward_time": 0.11464738845825195,
      "step": 45545
    },
    {
      "epoch": 0.000277984619140625,
      "step": 45545,
      "training_step_time": 0.3866891860961914
    },
    {
      "epoch": 0.00027799072265625,
      "model_forward_time": 0.11411428451538086,
      "step": 45546
    },
    {
      "epoch": 0.00027799072265625,
      "step": 45546,
      "training_step_time": 0.39879584312438965
    },
    {
      "epoch": 0.000277996826171875,
      "model_forward_time": 0.11436724662780762,
      "step": 45547
    },
    {
      "epoch": 0.000277996826171875,
      "step": 45547,
      "training_step_time": 0.4168589115142822
    },
    {
      "epoch": 0.0002780029296875,
      "model_forward_time": 0.1145479679107666,
      "step": 45548
    },
    {
      "epoch": 0.0002780029296875,
      "step": 45548,
      "training_step_time": 0.4532454013824463
    },
    {
      "epoch": 0.000278009033203125,
      "model_forward_time": 0.11410355567932129,
      "step": 45549
    },
    {
      "epoch": 0.000278009033203125,
      "step": 45549,
      "training_step_time": 0.4215538501739502
    },
    {
      "epoch": 0.00027801513671875,
      "grad_norm": 0.10331828892230988,
      "learning_rate": 1.5036527603671608e-05,
      "loss": 0.0352,
      "step": 45550
    },
    {
      "epoch": 0.00027801513671875,
      "model_forward_time": 0.1153872013092041,
      "step": 45550
    },
    {
      "epoch": 0.00027801513671875,
      "step": 45550,
      "training_step_time": 0.8129465579986572
    },
    {
      "epoch": 0.000278021240234375,
      "model_forward_time": 0.11435222625732422,
      "step": 45551
    },
    {
      "epoch": 0.000278021240234375,
      "step": 45551,
      "training_step_time": 0.4640624523162842
    },
    {
      "epoch": 0.00027802734375,
      "model_forward_time": 0.11400127410888672,
      "step": 45552
    },
    {
      "epoch": 0.00027802734375,
      "step": 45552,
      "training_step_time": 0.38733339309692383
    },
    {
      "epoch": 0.000278033447265625,
      "model_forward_time": 0.11364340782165527,
      "step": 45553
    },
    {
      "epoch": 0.000278033447265625,
      "step": 45553,
      "training_step_time": 0.38875627517700195
    },
    {
      "epoch": 0.00027803955078125,
      "model_forward_time": 0.1140754222869873,
      "step": 45554
    },
    {
      "epoch": 0.00027803955078125,
      "step": 45554,
      "training_step_time": 0.44153332710266113
    },
    {
      "epoch": 0.000278045654296875,
      "model_forward_time": 0.11408638954162598,
      "step": 45555
    },
    {
      "epoch": 0.000278045654296875,
      "step": 45555,
      "training_step_time": 0.40022826194763184
    },
    {
      "epoch": 0.0002780517578125,
      "model_forward_time": 0.11525559425354004,
      "step": 45556
    },
    {
      "epoch": 0.0002780517578125,
      "step": 45556,
      "training_step_time": 0.6506557464599609
    },
    {
      "epoch": 0.000278057861328125,
      "model_forward_time": 0.11420178413391113,
      "step": 45557
    },
    {
      "epoch": 0.000278057861328125,
      "step": 45557,
      "training_step_time": 0.402998685836792
    },
    {
      "epoch": 0.00027806396484375,
      "model_forward_time": 0.11409425735473633,
      "step": 45558
    },
    {
      "epoch": 0.00027806396484375,
      "step": 45558,
      "training_step_time": 0.39095592498779297
    },
    {
      "epoch": 0.000278070068359375,
      "model_forward_time": 0.11505722999572754,
      "step": 45559
    },
    {
      "epoch": 0.000278070068359375,
      "step": 45559,
      "training_step_time": 0.43199682235717773
    },
    {
      "epoch": 0.000278076171875,
      "grad_norm": 0.09899826347827911,
      "learning_rate": 1.5016832974331724e-05,
      "loss": 0.0371,
      "step": 45560
    },
    {
      "epoch": 0.000278076171875,
      "model_forward_time": 0.11435365676879883,
      "step": 45560
    },
    {
      "epoch": 0.000278076171875,
      "step": 45560,
      "training_step_time": 0.3959379196166992
    },
    {
      "epoch": 0.000278082275390625,
      "model_forward_time": 0.11449813842773438,
      "step": 45561
    },
    {
      "epoch": 0.000278082275390625,
      "step": 45561,
      "training_step_time": 0.39481568336486816
    },
    {
      "epoch": 0.00027808837890625,
      "model_forward_time": 0.11468124389648438,
      "step": 45562
    },
    {
      "epoch": 0.00027808837890625,
      "step": 45562,
      "training_step_time": 0.7524068355560303
    },
    {
      "epoch": 0.000278094482421875,
      "model_forward_time": 0.11466217041015625,
      "step": 45563
    },
    {
      "epoch": 0.000278094482421875,
      "step": 45563,
      "training_step_time": 0.47057533264160156
    },
    {
      "epoch": 0.0002781005859375,
      "model_forward_time": 0.1140599250793457,
      "step": 45564
    },
    {
      "epoch": 0.0002781005859375,
      "step": 45564,
      "training_step_time": 0.4882049560546875
    },
    {
      "epoch": 0.000278106689453125,
      "model_forward_time": 0.11459660530090332,
      "step": 45565
    },
    {
      "epoch": 0.000278106689453125,
      "step": 45565,
      "training_step_time": 0.3864459991455078
    },
    {
      "epoch": 0.00027811279296875,
      "model_forward_time": 0.11885285377502441,
      "step": 45566
    },
    {
      "epoch": 0.00027811279296875,
      "step": 45566,
      "training_step_time": 0.3771240711212158
    },
    {
      "epoch": 0.000278118896484375,
      "model_forward_time": 0.12001895904541016,
      "step": 45567
    },
    {
      "epoch": 0.000278118896484375,
      "step": 45567,
      "training_step_time": 0.4697701930999756
    },
    {
      "epoch": 0.000278125,
      "model_forward_time": 0.11852717399597168,
      "step": 45568
    },
    {
      "epoch": 0.000278125,
      "step": 45568,
      "training_step_time": 0.5742208957672119
    },
    {
      "epoch": 0.000278131103515625,
      "model_forward_time": 0.11861562728881836,
      "step": 45569
    },
    {
      "epoch": 0.000278131103515625,
      "step": 45569,
      "training_step_time": 0.4191286563873291
    },
    {
      "epoch": 0.00027813720703125,
      "grad_norm": 0.0836099311709404,
      "learning_rate": 1.4997148971954344e-05,
      "loss": 0.0366,
      "step": 45570
    },
    {
      "epoch": 0.00027813720703125,
      "model_forward_time": 0.11827325820922852,
      "step": 45570
    },
    {
      "epoch": 0.00027813720703125,
      "step": 45570,
      "training_step_time": 0.37967586517333984
    },
    {
      "epoch": 0.000278143310546875,
      "model_forward_time": 0.11543464660644531,
      "step": 45571
    },
    {
      "epoch": 0.000278143310546875,
      "step": 45571,
      "training_step_time": 0.38482189178466797
    },
    {
      "epoch": 0.0002781494140625,
      "model_forward_time": 0.11471867561340332,
      "step": 45572
    },
    {
      "epoch": 0.0002781494140625,
      "step": 45572,
      "training_step_time": 0.39822888374328613
    },
    {
      "epoch": 0.000278155517578125,
      "model_forward_time": 0.11497282981872559,
      "step": 45573
    },
    {
      "epoch": 0.000278155517578125,
      "step": 45573,
      "training_step_time": 0.39890122413635254
    },
    {
      "epoch": 0.00027816162109375,
      "model_forward_time": 0.11523985862731934,
      "step": 45574
    },
    {
      "epoch": 0.00027816162109375,
      "step": 45574,
      "training_step_time": 0.5643794536590576
    },
    {
      "epoch": 0.000278167724609375,
      "model_forward_time": 0.11477947235107422,
      "step": 45575
    },
    {
      "epoch": 0.000278167724609375,
      "step": 45575,
      "training_step_time": 0.38966917991638184
    },
    {
      "epoch": 0.000278173828125,
      "model_forward_time": 0.11455154418945312,
      "step": 45576
    },
    {
      "epoch": 0.000278173828125,
      "step": 45576,
      "training_step_time": 0.3648974895477295
    },
    {
      "epoch": 0.000278179931640625,
      "model_forward_time": 0.11459612846374512,
      "step": 45577
    },
    {
      "epoch": 0.000278179931640625,
      "step": 45577,
      "training_step_time": 0.44156527519226074
    },
    {
      "epoch": 0.00027818603515625,
      "model_forward_time": 0.11490249633789062,
      "step": 45578
    },
    {
      "epoch": 0.00027818603515625,
      "step": 45578,
      "training_step_time": 0.39799070358276367
    },
    {
      "epoch": 0.000278192138671875,
      "model_forward_time": 0.11436820030212402,
      "step": 45579
    },
    {
      "epoch": 0.000278192138671875,
      "step": 45579,
      "training_step_time": 0.3837242126464844
    },
    {
      "epoch": 0.0002781982421875,
      "grad_norm": 0.12896780669689178,
      "learning_rate": 1.4977475602518876e-05,
      "loss": 0.0386,
      "step": 45580
    },
    {
      "epoch": 0.0002781982421875,
      "model_forward_time": 0.11457133293151855,
      "step": 45580
    },
    {
      "epoch": 0.0002781982421875,
      "step": 45580,
      "training_step_time": 0.7296502590179443
    },
    {
      "epoch": 0.000278204345703125,
      "model_forward_time": 0.11440014839172363,
      "step": 45581
    },
    {
      "epoch": 0.000278204345703125,
      "step": 45581,
      "training_step_time": 0.44223666191101074
    },
    {
      "epoch": 0.00027821044921875,
      "model_forward_time": 0.11491990089416504,
      "step": 45582
    },
    {
      "epoch": 0.00027821044921875,
      "step": 45582,
      "training_step_time": 0.45732760429382324
    },
    {
      "epoch": 0.000278216552734375,
      "model_forward_time": 0.11440491676330566,
      "step": 45583
    },
    {
      "epoch": 0.000278216552734375,
      "step": 45583,
      "training_step_time": 0.38771748542785645
    },
    {
      "epoch": 0.00027822265625,
      "model_forward_time": 0.1143338680267334,
      "step": 45584
    },
    {
      "epoch": 0.00027822265625,
      "step": 45584,
      "training_step_time": 0.41750383377075195
    },
    {
      "epoch": 0.000278228759765625,
      "model_forward_time": 0.1141672134399414,
      "step": 45585
    },
    {
      "epoch": 0.000278228759765625,
      "step": 45585,
      "training_step_time": 0.3932304382324219
    },
    {
      "epoch": 0.00027823486328125,
      "model_forward_time": 0.11446142196655273,
      "step": 45586
    },
    {
      "epoch": 0.00027823486328125,
      "step": 45586,
      "training_step_time": 0.6746511459350586
    },
    {
      "epoch": 0.000278240966796875,
      "model_forward_time": 0.11414527893066406,
      "step": 45587
    },
    {
      "epoch": 0.000278240966796875,
      "step": 45587,
      "training_step_time": 0.39986109733581543
    },
    {
      "epoch": 0.0002782470703125,
      "model_forward_time": 0.11405062675476074,
      "step": 45588
    },
    {
      "epoch": 0.0002782470703125,
      "step": 45588,
      "training_step_time": 0.38904881477355957
    },
    {
      "epoch": 0.000278253173828125,
      "model_forward_time": 0.11432051658630371,
      "step": 45589
    },
    {
      "epoch": 0.000278253173828125,
      "step": 45589,
      "training_step_time": 0.40772438049316406
    },
    {
      "epoch": 0.00027825927734375,
      "grad_norm": 0.11268766224384308,
      "learning_rate": 1.4957812872001614e-05,
      "loss": 0.0331,
      "step": 45590
    },
    {
      "epoch": 0.00027825927734375,
      "model_forward_time": 0.11536502838134766,
      "step": 45590
    },
    {
      "epoch": 0.00027825927734375,
      "step": 45590,
      "training_step_time": 0.47417378425598145
    },
    {
      "epoch": 0.000278265380859375,
      "model_forward_time": 0.11459064483642578,
      "step": 45591
    },
    {
      "epoch": 0.000278265380859375,
      "step": 45591,
      "training_step_time": 0.4900825023651123
    },
    {
      "epoch": 0.000278271484375,
      "model_forward_time": 0.11541271209716797,
      "step": 45592
    },
    {
      "epoch": 0.000278271484375,
      "step": 45592,
      "training_step_time": 1.0298473834991455
    },
    {
      "epoch": 0.000278277587890625,
      "model_forward_time": 0.11360836029052734,
      "step": 45593
    },
    {
      "epoch": 0.000278277587890625,
      "step": 45593,
      "training_step_time": 0.40433764457702637
    },
    {
      "epoch": 0.00027828369140625,
      "model_forward_time": 0.11443901062011719,
      "step": 45594
    },
    {
      "epoch": 0.00027828369140625,
      "step": 45594,
      "training_step_time": 0.4522545337677002
    },
    {
      "epoch": 0.000278289794921875,
      "model_forward_time": 0.11347603797912598,
      "step": 45595
    },
    {
      "epoch": 0.000278289794921875,
      "step": 45595,
      "training_step_time": 0.38402509689331055
    },
    {
      "epoch": 0.0002782958984375,
      "model_forward_time": 0.11399626731872559,
      "step": 45596
    },
    {
      "epoch": 0.0002782958984375,
      "step": 45596,
      "training_step_time": 0.4096674919128418
    },
    {
      "epoch": 0.000278302001953125,
      "model_forward_time": 0.11432743072509766,
      "step": 45597
    },
    {
      "epoch": 0.000278302001953125,
      "step": 45597,
      "training_step_time": 0.4209778308868408
    },
    {
      "epoch": 0.00027830810546875,
      "model_forward_time": 0.11489653587341309,
      "step": 45598
    },
    {
      "epoch": 0.00027830810546875,
      "step": 45598,
      "training_step_time": 0.5036535263061523
    },
    {
      "epoch": 0.000278314208984375,
      "model_forward_time": 0.11477494239807129,
      "step": 45599
    },
    {
      "epoch": 0.000278314208984375,
      "step": 45599,
      "training_step_time": 0.3890113830566406
    },
    {
      "epoch": 0.0002783203125,
      "grad_norm": 0.0967230573296547,
      "learning_rate": 1.4938160786375572e-05,
      "loss": 0.0328,
      "step": 45600
    },
    {
      "epoch": 0.0002783203125,
      "model_forward_time": 0.11447262763977051,
      "step": 45600
    },
    {
      "epoch": 0.0002783203125,
      "step": 45600,
      "training_step_time": 0.3943674564361572
    },
    {
      "epoch": 0.000278326416015625,
      "model_forward_time": 0.11614060401916504,
      "step": 45601
    },
    {
      "epoch": 0.000278326416015625,
      "step": 45601,
      "training_step_time": 0.3969900608062744
    },
    {
      "epoch": 0.00027833251953125,
      "model_forward_time": 0.11571145057678223,
      "step": 45602
    },
    {
      "epoch": 0.00027833251953125,
      "step": 45602,
      "training_step_time": 0.4302847385406494
    },
    {
      "epoch": 0.000278338623046875,
      "model_forward_time": 0.11524105072021484,
      "step": 45603
    },
    {
      "epoch": 0.000278338623046875,
      "step": 45603,
      "training_step_time": 0.49717187881469727
    },
    {
      "epoch": 0.0002783447265625,
      "model_forward_time": 0.11497306823730469,
      "step": 45604
    },
    {
      "epoch": 0.0002783447265625,
      "step": 45604,
      "training_step_time": 0.6484940052032471
    },
    {
      "epoch": 0.000278350830078125,
      "model_forward_time": 0.11407184600830078,
      "step": 45605
    },
    {
      "epoch": 0.000278350830078125,
      "step": 45605,
      "training_step_time": 0.3799774646759033
    },
    {
      "epoch": 0.00027835693359375,
      "model_forward_time": 0.11449790000915527,
      "step": 45606
    },
    {
      "epoch": 0.00027835693359375,
      "step": 45606,
      "training_step_time": 0.3960151672363281
    },
    {
      "epoch": 0.000278363037109375,
      "model_forward_time": 0.11489224433898926,
      "step": 45607
    },
    {
      "epoch": 0.000278363037109375,
      "step": 45607,
      "training_step_time": 0.39975595474243164
    },
    {
      "epoch": 0.000278369140625,
      "model_forward_time": 0.1150505542755127,
      "step": 45608
    },
    {
      "epoch": 0.000278369140625,
      "step": 45608,
      "training_step_time": 0.4856226444244385
    },
    {
      "epoch": 0.000278375244140625,
      "model_forward_time": 0.11448550224304199,
      "step": 45609
    },
    {
      "epoch": 0.000278375244140625,
      "step": 45609,
      "training_step_time": 0.4514191150665283
    },
    {
      "epoch": 0.00027838134765625,
      "grad_norm": 0.0740712359547615,
      "learning_rate": 1.4918519351610527e-05,
      "loss": 0.0357,
      "step": 45610
    },
    {
      "epoch": 0.00027838134765625,
      "model_forward_time": 0.11512613296508789,
      "step": 45610
    },
    {
      "epoch": 0.00027838134765625,
      "step": 45610,
      "training_step_time": 0.6114809513092041
    },
    {
      "epoch": 0.000278387451171875,
      "model_forward_time": 0.11423444747924805,
      "step": 45611
    },
    {
      "epoch": 0.000278387451171875,
      "step": 45611,
      "training_step_time": 0.37786078453063965
    },
    {
      "epoch": 0.0002783935546875,
      "model_forward_time": 0.11430168151855469,
      "step": 45612
    },
    {
      "epoch": 0.0002783935546875,
      "step": 45612,
      "training_step_time": 0.3901188373565674
    },
    {
      "epoch": 0.000278399658203125,
      "model_forward_time": 0.11425948143005371,
      "step": 45613
    },
    {
      "epoch": 0.000278399658203125,
      "step": 45613,
      "training_step_time": 0.3912925720214844
    },
    {
      "epoch": 0.00027840576171875,
      "model_forward_time": 0.11429405212402344,
      "step": 45614
    },
    {
      "epoch": 0.00027840576171875,
      "step": 45614,
      "training_step_time": 0.3900418281555176
    },
    {
      "epoch": 0.000278411865234375,
      "model_forward_time": 0.11484813690185547,
      "step": 45615
    },
    {
      "epoch": 0.000278411865234375,
      "step": 45615,
      "training_step_time": 0.3982703685760498
    },
    {
      "epoch": 0.00027841796875,
      "model_forward_time": 0.11579561233520508,
      "step": 45616
    },
    {
      "epoch": 0.00027841796875,
      "step": 45616,
      "training_step_time": 1.1379055976867676
    },
    {
      "epoch": 0.000278424072265625,
      "model_forward_time": 0.11430072784423828,
      "step": 45617
    },
    {
      "epoch": 0.000278424072265625,
      "step": 45617,
      "training_step_time": 0.4393885135650635
    },
    {
      "epoch": 0.00027843017578125,
      "model_forward_time": 0.11414456367492676,
      "step": 45618
    },
    {
      "epoch": 0.00027843017578125,
      "step": 45618,
      "training_step_time": 0.37711620330810547
    },
    {
      "epoch": 0.000278436279296875,
      "model_forward_time": 0.11418867111206055,
      "step": 45619
    },
    {
      "epoch": 0.000278436279296875,
      "step": 45619,
      "training_step_time": 0.48773670196533203
    },
    {
      "epoch": 0.0002784423828125,
      "grad_norm": 0.08847527205944061,
      "learning_rate": 1.4898888573673031e-05,
      "loss": 0.0371,
      "step": 45620
    },
    {
      "epoch": 0.0002784423828125,
      "model_forward_time": 0.11391878128051758,
      "step": 45620
    },
    {
      "epoch": 0.0002784423828125,
      "step": 45620,
      "training_step_time": 0.41500091552734375
    },
    {
      "epoch": 0.000278448486328125,
      "model_forward_time": 0.11424398422241211,
      "step": 45621
    },
    {
      "epoch": 0.000278448486328125,
      "step": 45621,
      "training_step_time": 0.4594137668609619
    },
    {
      "epoch": 0.00027845458984375,
      "model_forward_time": 0.11483454704284668,
      "step": 45622
    },
    {
      "epoch": 0.00027845458984375,
      "step": 45622,
      "training_step_time": 0.8917844295501709
    },
    {
      "epoch": 0.000278460693359375,
      "model_forward_time": 0.11407589912414551,
      "step": 45623
    },
    {
      "epoch": 0.000278460693359375,
      "step": 45623,
      "training_step_time": 0.38202595710754395
    },
    {
      "epoch": 0.000278466796875,
      "model_forward_time": 0.11346292495727539,
      "step": 45624
    },
    {
      "epoch": 0.000278466796875,
      "step": 45624,
      "training_step_time": 0.3806779384613037
    },
    {
      "epoch": 0.000278472900390625,
      "model_forward_time": 0.11452245712280273,
      "step": 45625
    },
    {
      "epoch": 0.000278472900390625,
      "step": 45625,
      "training_step_time": 0.38802218437194824
    },
    {
      "epoch": 0.00027847900390625,
      "model_forward_time": 0.11430048942565918,
      "step": 45626
    },
    {
      "epoch": 0.00027847900390625,
      "step": 45626,
      "training_step_time": 0.38394761085510254
    },
    {
      "epoch": 0.000278485107421875,
      "model_forward_time": 0.1141209602355957,
      "step": 45627
    },
    {
      "epoch": 0.000278485107421875,
      "step": 45627,
      "training_step_time": 0.3811924457550049
    },
    {
      "epoch": 0.0002784912109375,
      "model_forward_time": 0.11485505104064941,
      "step": 45628
    },
    {
      "epoch": 0.0002784912109375,
      "step": 45628,
      "training_step_time": 0.8421630859375
    },
    {
      "epoch": 0.000278497314453125,
      "model_forward_time": 0.11467099189758301,
      "step": 45629
    },
    {
      "epoch": 0.000278497314453125,
      "step": 45629,
      "training_step_time": 0.4717583656311035
    },
    {
      "epoch": 0.00027850341796875,
      "grad_norm": 0.11539136618375778,
      "learning_rate": 1.4879268458526379e-05,
      "loss": 0.0373,
      "step": 45630
    },
    {
      "epoch": 0.00027850341796875,
      "model_forward_time": 0.11444568634033203,
      "step": 45630
    },
    {
      "epoch": 0.00027850341796875,
      "step": 45630,
      "training_step_time": 0.402904748916626
    },
    {
      "epoch": 0.000278509521484375,
      "model_forward_time": 0.11408138275146484,
      "step": 45631
    },
    {
      "epoch": 0.000278509521484375,
      "step": 45631,
      "training_step_time": 0.39336705207824707
    },
    {
      "epoch": 0.000278515625,
      "model_forward_time": 0.11460232734680176,
      "step": 45632
    },
    {
      "epoch": 0.000278515625,
      "step": 45632,
      "training_step_time": 0.3813784122467041
    },
    {
      "epoch": 0.000278521728515625,
      "model_forward_time": 0.11446309089660645,
      "step": 45633
    },
    {
      "epoch": 0.000278521728515625,
      "step": 45633,
      "training_step_time": 0.5014021396636963
    },
    {
      "epoch": 0.00027852783203125,
      "model_forward_time": 0.11483383178710938,
      "step": 45634
    },
    {
      "epoch": 0.00027852783203125,
      "step": 45634,
      "training_step_time": 0.9321110248565674
    },
    {
      "epoch": 0.000278533935546875,
      "model_forward_time": 0.11395049095153809,
      "step": 45635
    },
    {
      "epoch": 0.000278533935546875,
      "step": 45635,
      "training_step_time": 0.3812851905822754
    },
    {
      "epoch": 0.0002785400390625,
      "model_forward_time": 0.1141045093536377,
      "step": 45636
    },
    {
      "epoch": 0.0002785400390625,
      "step": 45636,
      "training_step_time": 0.39302921295166016
    },
    {
      "epoch": 0.000278546142578125,
      "model_forward_time": 0.11502766609191895,
      "step": 45637
    },
    {
      "epoch": 0.000278546142578125,
      "step": 45637,
      "training_step_time": 0.38860511779785156
    },
    {
      "epoch": 0.00027855224609375,
      "model_forward_time": 0.11437296867370605,
      "step": 45638
    },
    {
      "epoch": 0.00027855224609375,
      "step": 45638,
      "training_step_time": 0.38138389587402344
    },
    {
      "epoch": 0.000278558349609375,
      "model_forward_time": 0.1141364574432373,
      "step": 45639
    },
    {
      "epoch": 0.000278558349609375,
      "step": 45639,
      "training_step_time": 0.38446640968322754
    },
    {
      "epoch": 0.000278564453125,
      "grad_norm": 0.07393242418766022,
      "learning_rate": 1.4859659012130695e-05,
      "loss": 0.0306,
      "step": 45640
    },
    {
      "epoch": 0.000278564453125,
      "model_forward_time": 0.1147165298461914,
      "step": 45640
    },
    {
      "epoch": 0.000278564453125,
      "step": 45640,
      "training_step_time": 1.0154874324798584
    },
    {
      "epoch": 0.000278570556640625,
      "model_forward_time": 0.11413860321044922,
      "step": 45641
    },
    {
      "epoch": 0.000278570556640625,
      "step": 45641,
      "training_step_time": 0.40093231201171875
    },
    {
      "epoch": 0.00027857666015625,
      "model_forward_time": 0.1145777702331543,
      "step": 45642
    },
    {
      "epoch": 0.00027857666015625,
      "step": 45642,
      "training_step_time": 0.4518764019012451
    },
    {
      "epoch": 0.000278582763671875,
      "model_forward_time": 0.11443376541137695,
      "step": 45643
    },
    {
      "epoch": 0.000278582763671875,
      "step": 45643,
      "training_step_time": 0.385317325592041
    },
    {
      "epoch": 0.0002785888671875,
      "model_forward_time": 0.11409521102905273,
      "step": 45644
    },
    {
      "epoch": 0.0002785888671875,
      "step": 45644,
      "training_step_time": 0.4392890930175781
    },
    {
      "epoch": 0.000278594970703125,
      "model_forward_time": 0.11396408081054688,
      "step": 45645
    },
    {
      "epoch": 0.000278594970703125,
      "step": 45645,
      "training_step_time": 0.45688557624816895
    },
    {
      "epoch": 0.00027860107421875,
      "model_forward_time": 0.11436319351196289,
      "step": 45646
    },
    {
      "epoch": 0.00027860107421875,
      "step": 45646,
      "training_step_time": 1.0048604011535645
    },
    {
      "epoch": 0.000278607177734375,
      "model_forward_time": 0.11397790908813477,
      "step": 45647
    },
    {
      "epoch": 0.000278607177734375,
      "step": 45647,
      "training_step_time": 0.39118504524230957
    },
    {
      "epoch": 0.00027861328125,
      "model_forward_time": 0.11393594741821289,
      "step": 45648
    },
    {
      "epoch": 0.00027861328125,
      "step": 45648,
      "training_step_time": 0.3890821933746338
    },
    {
      "epoch": 0.000278619384765625,
      "model_forward_time": 0.11403107643127441,
      "step": 45649
    },
    {
      "epoch": 0.000278619384765625,
      "step": 45649,
      "training_step_time": 0.388012170791626
    },
    {
      "epoch": 0.00027862548828125,
      "grad_norm": 0.10860420763492584,
      "learning_rate": 1.4840060240442738e-05,
      "loss": 0.0298,
      "step": 45650
    },
    {
      "epoch": 0.00027862548828125,
      "model_forward_time": 0.11399245262145996,
      "step": 45650
    },
    {
      "epoch": 0.00027862548828125,
      "step": 45650,
      "training_step_time": 0.38329124450683594
    },
    {
      "epoch": 0.000278631591796875,
      "model_forward_time": 0.11425065994262695,
      "step": 45651
    },
    {
      "epoch": 0.000278631591796875,
      "step": 45651,
      "training_step_time": 0.40668416023254395
    },
    {
      "epoch": 0.0002786376953125,
      "model_forward_time": 0.1143641471862793,
      "step": 45652
    },
    {
      "epoch": 0.0002786376953125,
      "step": 45652,
      "training_step_time": 0.6093714237213135
    },
    {
      "epoch": 0.000278643798828125,
      "model_forward_time": 0.11467313766479492,
      "step": 45653
    },
    {
      "epoch": 0.000278643798828125,
      "step": 45653,
      "training_step_time": 0.3952498435974121
    },
    {
      "epoch": 0.00027864990234375,
      "model_forward_time": 0.11450433731079102,
      "step": 45654
    },
    {
      "epoch": 0.00027864990234375,
      "step": 45654,
      "training_step_time": 0.4928557872772217
    },
    {
      "epoch": 0.000278656005859375,
      "model_forward_time": 0.1151888370513916,
      "step": 45655
    },
    {
      "epoch": 0.000278656005859375,
      "step": 45655,
      "training_step_time": 0.4875679016113281
    },
    {
      "epoch": 0.000278662109375,
      "model_forward_time": 0.11475539207458496,
      "step": 45656
    },
    {
      "epoch": 0.000278662109375,
      "step": 45656,
      "training_step_time": 0.4153017997741699
    },
    {
      "epoch": 0.000278668212890625,
      "model_forward_time": 0.1146998405456543,
      "step": 45657
    },
    {
      "epoch": 0.000278668212890625,
      "step": 45657,
      "training_step_time": 0.41397690773010254
    },
    {
      "epoch": 0.00027867431640625,
      "model_forward_time": 0.1152799129486084,
      "step": 45658
    },
    {
      "epoch": 0.00027867431640625,
      "step": 45658,
      "training_step_time": 0.7260661125183105
    },
    {
      "epoch": 0.000278680419921875,
      "model_forward_time": 0.11367940902709961,
      "step": 45659
    },
    {
      "epoch": 0.000278680419921875,
      "step": 45659,
      "training_step_time": 0.3811795711517334
    },
    {
      "epoch": 0.0002786865234375,
      "grad_norm": 0.08297327905893326,
      "learning_rate": 1.4820472149416154e-05,
      "loss": 0.0362,
      "step": 45660
    },
    {
      "epoch": 0.0002786865234375,
      "model_forward_time": 0.11396503448486328,
      "step": 45660
    },
    {
      "epoch": 0.0002786865234375,
      "step": 45660,
      "training_step_time": 0.39272332191467285
    },
    {
      "epoch": 0.000278692626953125,
      "model_forward_time": 0.11452555656433105,
      "step": 45661
    },
    {
      "epoch": 0.000278692626953125,
      "step": 45661,
      "training_step_time": 0.3893885612487793
    },
    {
      "epoch": 0.00027869873046875,
      "model_forward_time": 0.11477923393249512,
      "step": 45662
    },
    {
      "epoch": 0.00027869873046875,
      "step": 45662,
      "training_step_time": 0.3885974884033203
    },
    {
      "epoch": 0.000278704833984375,
      "model_forward_time": 0.1145637035369873,
      "step": 45663
    },
    {
      "epoch": 0.000278704833984375,
      "step": 45663,
      "training_step_time": 0.389143705368042
    },
    {
      "epoch": 0.0002787109375,
      "model_forward_time": 0.1152961254119873,
      "step": 45664
    },
    {
      "epoch": 0.0002787109375,
      "step": 45664,
      "training_step_time": 0.9382638931274414
    },
    {
      "epoch": 0.000278717041015625,
      "model_forward_time": 0.11462688446044922,
      "step": 45665
    },
    {
      "epoch": 0.000278717041015625,
      "step": 45665,
      "training_step_time": 0.3983016014099121
    },
    {
      "epoch": 0.00027872314453125,
      "model_forward_time": 0.1142568588256836,
      "step": 45666
    },
    {
      "epoch": 0.00027872314453125,
      "step": 45666,
      "training_step_time": 0.3592240810394287
    },
    {
      "epoch": 0.000278729248046875,
      "model_forward_time": 0.11435890197753906,
      "step": 45667
    },
    {
      "epoch": 0.000278729248046875,
      "step": 45667,
      "training_step_time": 0.42605066299438477
    },
    {
      "epoch": 0.0002787353515625,
      "model_forward_time": 0.11418271064758301,
      "step": 45668
    },
    {
      "epoch": 0.0002787353515625,
      "step": 45668,
      "training_step_time": 0.4511885643005371
    },
    {
      "epoch": 0.000278741455078125,
      "model_forward_time": 0.11485075950622559,
      "step": 45669
    },
    {
      "epoch": 0.000278741455078125,
      "step": 45669,
      "training_step_time": 0.4427671432495117
    },
    {
      "epoch": 0.00027874755859375,
      "grad_norm": 0.12718094885349274,
      "learning_rate": 1.480089474500127e-05,
      "loss": 0.0365,
      "step": 45670
    },
    {
      "epoch": 0.00027874755859375,
      "model_forward_time": 0.11469197273254395,
      "step": 45670
    },
    {
      "epoch": 0.00027874755859375,
      "step": 45670,
      "training_step_time": 0.46649956703186035
    },
    {
      "epoch": 0.000278753662109375,
      "model_forward_time": 0.11441802978515625,
      "step": 45671
    },
    {
      "epoch": 0.000278753662109375,
      "step": 45671,
      "training_step_time": 0.43597412109375
    },
    {
      "epoch": 0.000278759765625,
      "model_forward_time": 0.1147456169128418,
      "step": 45672
    },
    {
      "epoch": 0.000278759765625,
      "step": 45672,
      "training_step_time": 0.38590502738952637
    },
    {
      "epoch": 0.000278765869140625,
      "model_forward_time": 0.11437201499938965,
      "step": 45673
    },
    {
      "epoch": 0.000278765869140625,
      "step": 45673,
      "training_step_time": 0.38974499702453613
    },
    {
      "epoch": 0.00027877197265625,
      "model_forward_time": 0.11534762382507324,
      "step": 45674
    },
    {
      "epoch": 0.00027877197265625,
      "step": 45674,
      "training_step_time": 0.3959689140319824
    },
    {
      "epoch": 0.000278778076171875,
      "model_forward_time": 0.11499691009521484,
      "step": 45675
    },
    {
      "epoch": 0.000278778076171875,
      "step": 45675,
      "training_step_time": 0.3873119354248047
    },
    {
      "epoch": 0.0002787841796875,
      "model_forward_time": 0.11529231071472168,
      "step": 45676
    },
    {
      "epoch": 0.0002787841796875,
      "step": 45676,
      "training_step_time": 0.5757534503936768
    },
    {
      "epoch": 0.000278790283203125,
      "model_forward_time": 0.11478972434997559,
      "step": 45677
    },
    {
      "epoch": 0.000278790283203125,
      "step": 45677,
      "training_step_time": 0.38698387145996094
    },
    {
      "epoch": 0.00027879638671875,
      "model_forward_time": 0.11587762832641602,
      "step": 45678
    },
    {
      "epoch": 0.00027879638671875,
      "step": 45678,
      "training_step_time": 0.39656591415405273
    },
    {
      "epoch": 0.000278802490234375,
      "model_forward_time": 0.11514663696289062,
      "step": 45679
    },
    {
      "epoch": 0.000278802490234375,
      "step": 45679,
      "training_step_time": 0.399564266204834
    },
    {
      "epoch": 0.00027880859375,
      "grad_norm": 0.11460716277360916,
      "learning_rate": 1.4781328033145187e-05,
      "loss": 0.0336,
      "step": 45680
    },
    {
      "epoch": 0.00027880859375,
      "model_forward_time": 0.11484193801879883,
      "step": 45680
    },
    {
      "epoch": 0.00027880859375,
      "step": 45680,
      "training_step_time": 0.36699748039245605
    },
    {
      "epoch": 0.000278814697265625,
      "model_forward_time": 0.11462020874023438,
      "step": 45681
    },
    {
      "epoch": 0.000278814697265625,
      "step": 45681,
      "training_step_time": 0.458437442779541
    },
    {
      "epoch": 0.00027882080078125,
      "model_forward_time": 0.11501026153564453,
      "step": 45682
    },
    {
      "epoch": 0.00027882080078125,
      "step": 45682,
      "training_step_time": 0.5117001533508301
    },
    {
      "epoch": 0.000278826904296875,
      "model_forward_time": 0.11552166938781738,
      "step": 45683
    },
    {
      "epoch": 0.000278826904296875,
      "step": 45683,
      "training_step_time": 0.4423086643218994
    },
    {
      "epoch": 0.0002788330078125,
      "model_forward_time": 0.11383485794067383,
      "step": 45684
    },
    {
      "epoch": 0.0002788330078125,
      "step": 45684,
      "training_step_time": 0.4851398468017578
    },
    {
      "epoch": 0.000278839111328125,
      "model_forward_time": 0.11511397361755371,
      "step": 45685
    },
    {
      "epoch": 0.000278839111328125,
      "step": 45685,
      "training_step_time": 0.39464473724365234
    },
    {
      "epoch": 0.00027884521484375,
      "model_forward_time": 0.11449599266052246,
      "step": 45686
    },
    {
      "epoch": 0.00027884521484375,
      "step": 45686,
      "training_step_time": 0.38811779022216797
    },
    {
      "epoch": 0.000278851318359375,
      "model_forward_time": 0.11481499671936035,
      "step": 45687
    },
    {
      "epoch": 0.000278851318359375,
      "step": 45687,
      "training_step_time": 0.39298248291015625
    },
    {
      "epoch": 0.000278857421875,
      "model_forward_time": 0.11524128913879395,
      "step": 45688
    },
    {
      "epoch": 0.000278857421875,
      "step": 45688,
      "training_step_time": 0.6635911464691162
    },
    {
      "epoch": 0.000278863525390625,
      "model_forward_time": 0.11472916603088379,
      "step": 45689
    },
    {
      "epoch": 0.000278863525390625,
      "step": 45689,
      "training_step_time": 0.3918330669403076
    },
    {
      "epoch": 0.00027886962890625,
      "grad_norm": 0.10957710444927216,
      "learning_rate": 1.4761772019791748e-05,
      "loss": 0.0344,
      "step": 45690
    },
    {
      "epoch": 0.00027886962890625,
      "model_forward_time": 0.11498451232910156,
      "step": 45690
    },
    {
      "epoch": 0.00027886962890625,
      "step": 45690,
      "training_step_time": 0.3889024257659912
    },
    {
      "epoch": 0.000278875732421875,
      "model_forward_time": 0.11472606658935547,
      "step": 45691
    },
    {
      "epoch": 0.000278875732421875,
      "step": 45691,
      "training_step_time": 0.3904092311859131
    },
    {
      "epoch": 0.0002788818359375,
      "model_forward_time": 0.11518168449401855,
      "step": 45692
    },
    {
      "epoch": 0.0002788818359375,
      "step": 45692,
      "training_step_time": 0.3937091827392578
    },
    {
      "epoch": 0.000278887939453125,
      "model_forward_time": 0.11433196067810059,
      "step": 45693
    },
    {
      "epoch": 0.000278887939453125,
      "step": 45693,
      "training_step_time": 0.3889317512512207
    },
    {
      "epoch": 0.00027889404296875,
      "model_forward_time": 0.11466717720031738,
      "step": 45694
    },
    {
      "epoch": 0.00027889404296875,
      "step": 45694,
      "training_step_time": 0.8191394805908203
    },
    {
      "epoch": 0.000278900146484375,
      "model_forward_time": 0.11436963081359863,
      "step": 45695
    },
    {
      "epoch": 0.000278900146484375,
      "step": 45695,
      "training_step_time": 0.42757439613342285
    },
    {
      "epoch": 0.00027890625,
      "model_forward_time": 0.11467814445495605,
      "step": 45696
    },
    {
      "epoch": 0.00027890625,
      "step": 45696,
      "training_step_time": 0.4375896453857422
    },
    {
      "epoch": 0.000278912353515625,
      "model_forward_time": 0.11488056182861328,
      "step": 45697
    },
    {
      "epoch": 0.000278912353515625,
      "step": 45697,
      "training_step_time": 0.3909189701080322
    },
    {
      "epoch": 0.00027891845703125,
      "model_forward_time": 0.11542582511901855,
      "step": 45698
    },
    {
      "epoch": 0.00027891845703125,
      "step": 45698,
      "training_step_time": 0.4556467533111572
    },
    {
      "epoch": 0.000278924560546875,
      "model_forward_time": 0.11410832405090332,
      "step": 45699
    },
    {
      "epoch": 0.000278924560546875,
      "step": 45699,
      "training_step_time": 0.38069677352905273
    },
    {
      "epoch": 0.0002789306640625,
      "grad_norm": 0.09874119609594345,
      "learning_rate": 1.4742226710881558e-05,
      "loss": 0.0348,
      "step": 45700
    },
    {
      "epoch": 0.0002789306640625,
      "model_forward_time": 0.11462903022766113,
      "step": 45700
    },
    {
      "epoch": 0.0002789306640625,
      "step": 45700,
      "training_step_time": 0.7086412906646729
    },
    {
      "epoch": 0.000278936767578125,
      "model_forward_time": 0.11426258087158203,
      "step": 45701
    },
    {
      "epoch": 0.000278936767578125,
      "step": 45701,
      "training_step_time": 0.3878748416900635
    },
    {
      "epoch": 0.00027894287109375,
      "model_forward_time": 0.11437559127807617,
      "step": 45702
    },
    {
      "epoch": 0.00027894287109375,
      "step": 45702,
      "training_step_time": 0.3881113529205322
    },
    {
      "epoch": 0.000278948974609375,
      "model_forward_time": 0.11463642120361328,
      "step": 45703
    },
    {
      "epoch": 0.000278948974609375,
      "step": 45703,
      "training_step_time": 0.38390588760375977
    },
    {
      "epoch": 0.000278955078125,
      "model_forward_time": 0.11522412300109863,
      "step": 45704
    },
    {
      "epoch": 0.000278955078125,
      "step": 45704,
      "training_step_time": 0.38497376441955566
    },
    {
      "epoch": 0.000278961181640625,
      "model_forward_time": 0.11562848091125488,
      "step": 45705
    },
    {
      "epoch": 0.000278961181640625,
      "step": 45705,
      "training_step_time": 0.38483095169067383
    },
    {
      "epoch": 0.00027896728515625,
      "model_forward_time": 0.1150503158569336,
      "step": 45706
    },
    {
      "epoch": 0.00027896728515625,
      "step": 45706,
      "training_step_time": 1.007232666015625
    },
    {
      "epoch": 0.000278973388671875,
      "model_forward_time": 0.11531615257263184,
      "step": 45707
    },
    {
      "epoch": 0.000278973388671875,
      "step": 45707,
      "training_step_time": 0.4327695369720459
    },
    {
      "epoch": 0.0002789794921875,
      "model_forward_time": 0.11433219909667969,
      "step": 45708
    },
    {
      "epoch": 0.0002789794921875,
      "step": 45708,
      "training_step_time": 0.44785022735595703
    },
    {
      "epoch": 0.000278985595703125,
      "model_forward_time": 0.11453580856323242,
      "step": 45709
    },
    {
      "epoch": 0.000278985595703125,
      "step": 45709,
      "training_step_time": 0.48872804641723633
    },
    {
      "epoch": 0.00027899169921875,
      "grad_norm": 0.0919366106390953,
      "learning_rate": 1.472269211235195e-05,
      "loss": 0.0382,
      "step": 45710
    },
    {
      "epoch": 0.00027899169921875,
      "model_forward_time": 0.113861083984375,
      "step": 45710
    },
    {
      "epoch": 0.00027899169921875,
      "step": 45710,
      "training_step_time": 0.4482390880584717
    },
    {
      "epoch": 0.000278997802734375,
      "model_forward_time": 0.11417746543884277,
      "step": 45711
    },
    {
      "epoch": 0.000278997802734375,
      "step": 45711,
      "training_step_time": 0.3819551467895508
    },
    {
      "epoch": 0.00027900390625,
      "model_forward_time": 0.11443591117858887,
      "step": 45712
    },
    {
      "epoch": 0.00027900390625,
      "step": 45712,
      "training_step_time": 0.3986492156982422
    },
    {
      "epoch": 0.000279010009765625,
      "model_forward_time": 0.11489653587341309,
      "step": 45713
    },
    {
      "epoch": 0.000279010009765625,
      "step": 45713,
      "training_step_time": 0.39306092262268066
    },
    {
      "epoch": 0.00027901611328125,
      "model_forward_time": 0.11490964889526367,
      "step": 45714
    },
    {
      "epoch": 0.00027901611328125,
      "step": 45714,
      "training_step_time": 0.3887617588043213
    },
    {
      "epoch": 0.000279022216796875,
      "model_forward_time": 0.1154320240020752,
      "step": 45715
    },
    {
      "epoch": 0.000279022216796875,
      "step": 45715,
      "training_step_time": 0.394589900970459
    },
    {
      "epoch": 0.0002790283203125,
      "model_forward_time": 0.11508703231811523,
      "step": 45716
    },
    {
      "epoch": 0.0002790283203125,
      "step": 45716,
      "training_step_time": 0.3950991630554199
    },
    {
      "epoch": 0.000279034423828125,
      "model_forward_time": 0.11622929573059082,
      "step": 45717
    },
    {
      "epoch": 0.000279034423828125,
      "step": 45717,
      "training_step_time": 0.3843824863433838
    },
    {
      "epoch": 0.00027904052734375,
      "model_forward_time": 0.11574435234069824,
      "step": 45718
    },
    {
      "epoch": 0.00027904052734375,
      "step": 45718,
      "training_step_time": 0.8024153709411621
    },
    {
      "epoch": 0.000279046630859375,
      "model_forward_time": 0.11446356773376465,
      "step": 45719
    },
    {
      "epoch": 0.000279046630859375,
      "step": 45719,
      "training_step_time": 0.4052314758300781
    },
    {
      "epoch": 0.000279052734375,
      "grad_norm": 0.0881907269358635,
      "learning_rate": 1.470316823013707e-05,
      "loss": 0.0325,
      "step": 45720
    },
    {
      "epoch": 0.000279052734375,
      "model_forward_time": 0.11471796035766602,
      "step": 45720
    },
    {
      "epoch": 0.000279052734375,
      "step": 45720,
      "training_step_time": 0.39124274253845215
    },
    {
      "epoch": 0.000279058837890625,
      "model_forward_time": 0.11460280418395996,
      "step": 45721
    },
    {
      "epoch": 0.000279058837890625,
      "step": 45721,
      "training_step_time": 0.4295990467071533
    },
    {
      "epoch": 0.00027906494140625,
      "model_forward_time": 0.11574578285217285,
      "step": 45722
    },
    {
      "epoch": 0.00027906494140625,
      "step": 45722,
      "training_step_time": 0.4010920524597168
    },
    {
      "epoch": 0.000279071044921875,
      "model_forward_time": 0.1150972843170166,
      "step": 45723
    },
    {
      "epoch": 0.000279071044921875,
      "step": 45723,
      "training_step_time": 0.4572618007659912
    },
    {
      "epoch": 0.0002790771484375,
      "model_forward_time": 0.11568069458007812,
      "step": 45724
    },
    {
      "epoch": 0.0002790771484375,
      "step": 45724,
      "training_step_time": 0.7242085933685303
    },
    {
      "epoch": 0.000279083251953125,
      "model_forward_time": 0.11423635482788086,
      "step": 45725
    },
    {
      "epoch": 0.000279083251953125,
      "step": 45725,
      "training_step_time": 0.38493967056274414
    },
    {
      "epoch": 0.00027908935546875,
      "model_forward_time": 0.11400723457336426,
      "step": 45726
    },
    {
      "epoch": 0.00027908935546875,
      "step": 45726,
      "training_step_time": 0.3854634761810303
    },
    {
      "epoch": 0.000279095458984375,
      "model_forward_time": 0.11465978622436523,
      "step": 45727
    },
    {
      "epoch": 0.000279095458984375,
      "step": 45727,
      "training_step_time": 0.3860297203063965
    },
    {
      "epoch": 0.0002791015625,
      "model_forward_time": 0.11481261253356934,
      "step": 45728
    },
    {
      "epoch": 0.0002791015625,
      "step": 45728,
      "training_step_time": 0.3870863914489746
    },
    {
      "epoch": 0.000279107666015625,
      "model_forward_time": 0.11549067497253418,
      "step": 45729
    },
    {
      "epoch": 0.000279107666015625,
      "step": 45729,
      "training_step_time": 0.3808562755584717
    },
    {
      "epoch": 0.00027911376953125,
      "grad_norm": 0.14046356081962585,
      "learning_rate": 1.468365507016769e-05,
      "loss": 0.0373,
      "step": 45730
    },
    {
      "epoch": 0.00027911376953125,
      "model_forward_time": 0.11486554145812988,
      "step": 45730
    },
    {
      "epoch": 0.00027911376953125,
      "step": 45730,
      "training_step_time": 0.8334121704101562
    },
    {
      "epoch": 0.000279119873046875,
      "model_forward_time": 0.11440372467041016,
      "step": 45731
    },
    {
      "epoch": 0.000279119873046875,
      "step": 45731,
      "training_step_time": 0.3871171474456787
    },
    {
      "epoch": 0.0002791259765625,
      "model_forward_time": 0.11408019065856934,
      "step": 45732
    },
    {
      "epoch": 0.0002791259765625,
      "step": 45732,
      "training_step_time": 0.438460111618042
    },
    {
      "epoch": 0.000279132080078125,
      "model_forward_time": 0.11442422866821289,
      "step": 45733
    },
    {
      "epoch": 0.000279132080078125,
      "step": 45733,
      "training_step_time": 0.42520928382873535
    },
    {
      "epoch": 0.00027913818359375,
      "model_forward_time": 0.11513113975524902,
      "step": 45734
    },
    {
      "epoch": 0.00027913818359375,
      "step": 45734,
      "training_step_time": 0.4390373229980469
    },
    {
      "epoch": 0.000279144287109375,
      "model_forward_time": 0.11482048034667969,
      "step": 45735
    },
    {
      "epoch": 0.000279144287109375,
      "step": 45735,
      "training_step_time": 0.42760753631591797
    },
    {
      "epoch": 0.000279150390625,
      "model_forward_time": 0.11478924751281738,
      "step": 45736
    },
    {
      "epoch": 0.000279150390625,
      "step": 45736,
      "training_step_time": 0.6576929092407227
    },
    {
      "epoch": 0.000279156494140625,
      "model_forward_time": 0.11391520500183105,
      "step": 45737
    },
    {
      "epoch": 0.000279156494140625,
      "step": 45737,
      "training_step_time": 0.40833258628845215
    },
    {
      "epoch": 0.00027916259765625,
      "model_forward_time": 0.11428141593933105,
      "step": 45738
    },
    {
      "epoch": 0.00027916259765625,
      "step": 45738,
      "training_step_time": 0.39402341842651367
    },
    {
      "epoch": 0.000279168701171875,
      "model_forward_time": 0.11446785926818848,
      "step": 45739
    },
    {
      "epoch": 0.000279168701171875,
      "step": 45739,
      "training_step_time": 0.39116573333740234
    },
    {
      "epoch": 0.0002791748046875,
      "grad_norm": 0.0899687334895134,
      "learning_rate": 1.4664152638371437e-05,
      "loss": 0.039,
      "step": 45740
    },
    {
      "epoch": 0.0002791748046875,
      "model_forward_time": 0.11460995674133301,
      "step": 45740
    },
    {
      "epoch": 0.0002791748046875,
      "step": 45740,
      "training_step_time": 0.3931293487548828
    },
    {
      "epoch": 0.000279180908203125,
      "model_forward_time": 0.11554551124572754,
      "step": 45741
    },
    {
      "epoch": 0.000279180908203125,
      "step": 45741,
      "training_step_time": 0.38311004638671875
    },
    {
      "epoch": 0.00027918701171875,
      "model_forward_time": 0.11527442932128906,
      "step": 45742
    },
    {
      "epoch": 0.00027918701171875,
      "step": 45742,
      "training_step_time": 0.9992568492889404
    },
    {
      "epoch": 0.000279193115234375,
      "model_forward_time": 0.11506319046020508,
      "step": 45743
    },
    {
      "epoch": 0.000279193115234375,
      "step": 45743,
      "training_step_time": 0.3795156478881836
    },
    {
      "epoch": 0.00027919921875,
      "model_forward_time": 0.11426377296447754,
      "step": 45744
    },
    {
      "epoch": 0.00027919921875,
      "step": 45744,
      "training_step_time": 0.45229578018188477
    },
    {
      "epoch": 0.000279205322265625,
      "model_forward_time": 0.11356806755065918,
      "step": 45745
    },
    {
      "epoch": 0.000279205322265625,
      "step": 45745,
      "training_step_time": 0.3869154453277588
    },
    {
      "epoch": 0.00027921142578125,
      "model_forward_time": 0.11410284042358398,
      "step": 45746
    },
    {
      "epoch": 0.00027921142578125,
      "step": 45746,
      "training_step_time": 0.38433051109313965
    },
    {
      "epoch": 0.000279217529296875,
      "model_forward_time": 0.11444687843322754,
      "step": 45747
    },
    {
      "epoch": 0.000279217529296875,
      "step": 45747,
      "training_step_time": 0.4410560131072998
    },
    {
      "epoch": 0.0002792236328125,
      "model_forward_time": 0.11485958099365234,
      "step": 45748
    },
    {
      "epoch": 0.0002792236328125,
      "step": 45748,
      "training_step_time": 1.1358602046966553
    },
    {
      "epoch": 0.000279229736328125,
      "model_forward_time": 0.11421942710876465,
      "step": 45749
    },
    {
      "epoch": 0.000279229736328125,
      "step": 45749,
      "training_step_time": 0.43529582023620605
    },
    {
      "epoch": 0.00027923583984375,
      "grad_norm": 0.0949663519859314,
      "learning_rate": 1.4644660940672627e-05,
      "loss": 0.036,
      "step": 45750
    },
    {
      "epoch": 0.00027923583984375,
      "model_forward_time": 0.11375117301940918,
      "step": 45750
    },
    {
      "epoch": 0.00027923583984375,
      "step": 45750,
      "training_step_time": 0.3823854923248291
    },
    {
      "epoch": 0.000279241943359375,
      "model_forward_time": 0.11415386199951172,
      "step": 45751
    },
    {
      "epoch": 0.000279241943359375,
      "step": 45751,
      "training_step_time": 0.3947579860687256
    },
    {
      "epoch": 0.000279248046875,
      "model_forward_time": 0.11433982849121094,
      "step": 45752
    },
    {
      "epoch": 0.000279248046875,
      "step": 45752,
      "training_step_time": 0.3845484256744385
    },
    {
      "epoch": 0.000279254150390625,
      "model_forward_time": 0.11426162719726562,
      "step": 45753
    },
    {
      "epoch": 0.000279254150390625,
      "step": 45753,
      "training_step_time": 0.3902554512023926
    },
    {
      "epoch": 0.00027926025390625,
      "model_forward_time": 0.11508822441101074,
      "step": 45754
    },
    {
      "epoch": 0.00027926025390625,
      "step": 45754,
      "training_step_time": 0.7472689151763916
    },
    {
      "epoch": 0.000279266357421875,
      "model_forward_time": 0.11417269706726074,
      "step": 45755
    },
    {
      "epoch": 0.000279266357421875,
      "step": 45755,
      "training_step_time": 0.38730812072753906
    },
    {
      "epoch": 0.0002792724609375,
      "model_forward_time": 0.1142725944519043,
      "step": 45756
    },
    {
      "epoch": 0.0002792724609375,
      "step": 45756,
      "training_step_time": 0.4074282646179199
    },
    {
      "epoch": 0.000279278564453125,
      "model_forward_time": 0.11446833610534668,
      "step": 45757
    },
    {
      "epoch": 0.000279278564453125,
      "step": 45757,
      "training_step_time": 0.4032928943634033
    },
    {
      "epoch": 0.00027928466796875,
      "model_forward_time": 0.11423707008361816,
      "step": 45758
    },
    {
      "epoch": 0.00027928466796875,
      "step": 45758,
      "training_step_time": 0.3871040344238281
    },
    {
      "epoch": 0.000279290771484375,
      "model_forward_time": 0.11442875862121582,
      "step": 45759
    },
    {
      "epoch": 0.000279290771484375,
      "step": 45759,
      "training_step_time": 0.3623826503753662
    },
    {
      "epoch": 0.000279296875,
      "grad_norm": 0.07021547853946686,
      "learning_rate": 1.4625179982992321e-05,
      "loss": 0.0363,
      "step": 45760
    },
    {
      "epoch": 0.000279296875,
      "model_forward_time": 0.11557221412658691,
      "step": 45760
    },
    {
      "epoch": 0.000279296875,
      "step": 45760,
      "training_step_time": 0.953601598739624
    },
    {
      "epoch": 0.000279302978515625,
      "model_forward_time": 0.11400914192199707,
      "step": 45761
    },
    {
      "epoch": 0.000279302978515625,
      "step": 45761,
      "training_step_time": 0.3842647075653076
    },
    {
      "epoch": 0.00027930908203125,
      "model_forward_time": 0.11475920677185059,
      "step": 45762
    },
    {
      "epoch": 0.00027930908203125,
      "step": 45762,
      "training_step_time": 0.4360215663909912
    },
    {
      "epoch": 0.000279315185546875,
      "model_forward_time": 0.11459231376647949,
      "step": 45763
    },
    {
      "epoch": 0.000279315185546875,
      "step": 45763,
      "training_step_time": 0.3923778533935547
    },
    {
      "epoch": 0.0002793212890625,
      "model_forward_time": 0.11424446105957031,
      "step": 45764
    },
    {
      "epoch": 0.0002793212890625,
      "step": 45764,
      "training_step_time": 0.3831062316894531
    },
    {
      "epoch": 0.000279327392578125,
      "model_forward_time": 0.11438155174255371,
      "step": 45765
    },
    {
      "epoch": 0.000279327392578125,
      "step": 45765,
      "training_step_time": 0.38902902603149414
    },
    {
      "epoch": 0.00027933349609375,
      "model_forward_time": 0.11469054222106934,
      "step": 45766
    },
    {
      "epoch": 0.00027933349609375,
      "step": 45766,
      "training_step_time": 0.7883415222167969
    },
    {
      "epoch": 0.000279339599609375,
      "model_forward_time": 0.11410832405090332,
      "step": 45767
    },
    {
      "epoch": 0.000279339599609375,
      "step": 45767,
      "training_step_time": 0.38760995864868164
    },
    {
      "epoch": 0.000279345703125,
      "model_forward_time": 0.11436128616333008,
      "step": 45768
    },
    {
      "epoch": 0.000279345703125,
      "step": 45768,
      "training_step_time": 0.40352559089660645
    },
    {
      "epoch": 0.000279351806640625,
      "model_forward_time": 0.11451005935668945,
      "step": 45769
    },
    {
      "epoch": 0.000279351806640625,
      "step": 45769,
      "training_step_time": 0.3879556655883789
    },
    {
      "epoch": 0.00027935791015625,
      "grad_norm": 0.09885197877883911,
      "learning_rate": 1.4605709771248316e-05,
      "loss": 0.0351,
      "step": 45770
    },
    {
      "epoch": 0.00027935791015625,
      "model_forward_time": 0.1148383617401123,
      "step": 45770
    },
    {
      "epoch": 0.00027935791015625,
      "step": 45770,
      "training_step_time": 0.38354969024658203
    },
    {
      "epoch": 0.000279364013671875,
      "model_forward_time": 0.11488056182861328,
      "step": 45771
    },
    {
      "epoch": 0.000279364013671875,
      "step": 45771,
      "training_step_time": 0.3925282955169678
    },
    {
      "epoch": 0.0002793701171875,
      "model_forward_time": 0.11449003219604492,
      "step": 45772
    },
    {
      "epoch": 0.0002793701171875,
      "step": 45772,
      "training_step_time": 0.8333685398101807
    },
    {
      "epoch": 0.000279376220703125,
      "model_forward_time": 0.11497735977172852,
      "step": 45773
    },
    {
      "epoch": 0.000279376220703125,
      "step": 45773,
      "training_step_time": 0.48081398010253906
    },
    {
      "epoch": 0.00027938232421875,
      "model_forward_time": 0.11418581008911133,
      "step": 45774
    },
    {
      "epoch": 0.00027938232421875,
      "step": 45774,
      "training_step_time": 0.4044315814971924
    },
    {
      "epoch": 0.000279388427734375,
      "model_forward_time": 0.11445999145507812,
      "step": 45775
    },
    {
      "epoch": 0.000279388427734375,
      "step": 45775,
      "training_step_time": 0.46433281898498535
    },
    {
      "epoch": 0.00027939453125,
      "model_forward_time": 0.1138300895690918,
      "step": 45776
    },
    {
      "epoch": 0.00027939453125,
      "step": 45776,
      "training_step_time": 0.3857746124267578
    },
    {
      "epoch": 0.000279400634765625,
      "model_forward_time": 0.11403465270996094,
      "step": 45777
    },
    {
      "epoch": 0.000279400634765625,
      "step": 45777,
      "training_step_time": 0.3799624443054199
    },
    {
      "epoch": 0.00027940673828125,
      "model_forward_time": 0.11460423469543457,
      "step": 45778
    },
    {
      "epoch": 0.00027940673828125,
      "step": 45778,
      "training_step_time": 0.6934454441070557
    },
    {
      "epoch": 0.000279412841796875,
      "model_forward_time": 0.11449718475341797,
      "step": 45779
    },
    {
      "epoch": 0.000279412841796875,
      "step": 45779,
      "training_step_time": 0.388782262802124
    },
    {
      "epoch": 0.0002794189453125,
      "grad_norm": 0.12288784980773926,
      "learning_rate": 1.4586250311355132e-05,
      "loss": 0.0328,
      "step": 45780
    },
    {
      "epoch": 0.0002794189453125,
      "model_forward_time": 0.11462092399597168,
      "step": 45780
    },
    {
      "epoch": 0.0002794189453125,
      "step": 45780,
      "training_step_time": 0.40459322929382324
    },
    {
      "epoch": 0.000279425048828125,
      "model_forward_time": 0.11474990844726562,
      "step": 45781
    },
    {
      "epoch": 0.000279425048828125,
      "step": 45781,
      "training_step_time": 0.4136488437652588
    },
    {
      "epoch": 0.00027943115234375,
      "model_forward_time": 0.1149892807006836,
      "step": 45782
    },
    {
      "epoch": 0.00027943115234375,
      "step": 45782,
      "training_step_time": 0.38828325271606445
    },
    {
      "epoch": 0.000279437255859375,
      "model_forward_time": 0.11460685729980469,
      "step": 45783
    },
    {
      "epoch": 0.000279437255859375,
      "step": 45783,
      "training_step_time": 0.3774139881134033
    },
    {
      "epoch": 0.000279443359375,
      "model_forward_time": 0.11470794677734375,
      "step": 45784
    },
    {
      "epoch": 0.000279443359375,
      "step": 45784,
      "training_step_time": 0.9680020809173584
    },
    {
      "epoch": 0.000279449462890625,
      "model_forward_time": 0.1140589714050293,
      "step": 45785
    },
    {
      "epoch": 0.000279449462890625,
      "step": 45785,
      "training_step_time": 0.4230520725250244
    },
    {
      "epoch": 0.00027945556640625,
      "model_forward_time": 0.11512160301208496,
      "step": 45786
    },
    {
      "epoch": 0.00027945556640625,
      "step": 45786,
      "training_step_time": 0.48349618911743164
    },
    {
      "epoch": 0.000279461669921875,
      "model_forward_time": 0.11430811882019043,
      "step": 45787
    },
    {
      "epoch": 0.000279461669921875,
      "step": 45787,
      "training_step_time": 0.40426111221313477
    },
    {
      "epoch": 0.0002794677734375,
      "model_forward_time": 0.11382246017456055,
      "step": 45788
    },
    {
      "epoch": 0.0002794677734375,
      "step": 45788,
      "training_step_time": 0.39446568489074707
    },
    {
      "epoch": 0.000279473876953125,
      "model_forward_time": 0.11480426788330078,
      "step": 45789
    },
    {
      "epoch": 0.000279473876953125,
      "step": 45789,
      "training_step_time": 0.37406420707702637
    },
    {
      "epoch": 0.00027947998046875,
      "grad_norm": 0.1307893544435501,
      "learning_rate": 1.4566801609224096e-05,
      "loss": 0.0343,
      "step": 45790
    },
    {
      "epoch": 0.00027947998046875,
      "model_forward_time": 0.11470842361450195,
      "step": 45790
    },
    {
      "epoch": 0.00027947998046875,
      "step": 45790,
      "training_step_time": 0.47765111923217773
    },
    {
      "epoch": 0.000279486083984375,
      "model_forward_time": 0.11427998542785645,
      "step": 45791
    },
    {
      "epoch": 0.000279486083984375,
      "step": 45791,
      "training_step_time": 0.37587928771972656
    },
    {
      "epoch": 0.0002794921875,
      "model_forward_time": 0.11503195762634277,
      "step": 45792
    },
    {
      "epoch": 0.0002794921875,
      "step": 45792,
      "training_step_time": 0.4017815589904785
    },
    {
      "epoch": 0.000279498291015625,
      "model_forward_time": 0.11466813087463379,
      "step": 45793
    },
    {
      "epoch": 0.000279498291015625,
      "step": 45793,
      "training_step_time": 0.4709329605102539
    },
    {
      "epoch": 0.00027950439453125,
      "model_forward_time": 0.11502408981323242,
      "step": 45794
    },
    {
      "epoch": 0.00027950439453125,
      "step": 45794,
      "training_step_time": 0.41583919525146484
    },
    {
      "epoch": 0.000279510498046875,
      "model_forward_time": 0.11482763290405273,
      "step": 45795
    },
    {
      "epoch": 0.000279510498046875,
      "step": 45795,
      "training_step_time": 0.38223934173583984
    },
    {
      "epoch": 0.0002795166015625,
      "model_forward_time": 0.11551594734191895,
      "step": 45796
    },
    {
      "epoch": 0.0002795166015625,
      "step": 45796,
      "training_step_time": 0.930640697479248
    },
    {
      "epoch": 0.000279522705078125,
      "model_forward_time": 0.11352682113647461,
      "step": 45797
    },
    {
      "epoch": 0.000279522705078125,
      "step": 45797,
      "training_step_time": 0.3776850700378418
    },
    {
      "epoch": 0.00027952880859375,
      "model_forward_time": 0.11459612846374512,
      "step": 45798
    },
    {
      "epoch": 0.00027952880859375,
      "step": 45798,
      "training_step_time": 0.3845329284667969
    },
    {
      "epoch": 0.000279534912109375,
      "model_forward_time": 0.11491703987121582,
      "step": 45799
    },
    {
      "epoch": 0.000279534912109375,
      "step": 45799,
      "training_step_time": 0.5017640590667725
    },
    {
      "epoch": 0.000279541015625,
      "grad_norm": 0.10307633876800537,
      "learning_rate": 1.4547363670763137e-05,
      "loss": 0.036,
      "step": 45800
    },
    {
      "epoch": 0.000279541015625,
      "model_forward_time": 0.1146998405456543,
      "step": 45800
    },
    {
      "epoch": 0.000279541015625,
      "step": 45800,
      "training_step_time": 0.46723508834838867
    },
    {
      "epoch": 0.000279547119140625,
      "model_forward_time": 0.11450338363647461,
      "step": 45801
    },
    {
      "epoch": 0.000279547119140625,
      "step": 45801,
      "training_step_time": 0.4322826862335205
    },
    {
      "epoch": 0.00027955322265625,
      "model_forward_time": 0.11480879783630371,
      "step": 45802
    },
    {
      "epoch": 0.00027955322265625,
      "step": 45802,
      "training_step_time": 0.4844975471496582
    },
    {
      "epoch": 0.000279559326171875,
      "model_forward_time": 0.11428689956665039,
      "step": 45803
    },
    {
      "epoch": 0.000279559326171875,
      "step": 45803,
      "training_step_time": 0.38846278190612793
    },
    {
      "epoch": 0.0002795654296875,
      "model_forward_time": 0.11483597755432129,
      "step": 45804
    },
    {
      "epoch": 0.0002795654296875,
      "step": 45804,
      "training_step_time": 0.39269256591796875
    },
    {
      "epoch": 0.000279571533203125,
      "model_forward_time": 0.11576986312866211,
      "step": 45805
    },
    {
      "epoch": 0.000279571533203125,
      "step": 45805,
      "training_step_time": 0.3969109058380127
    },
    {
      "epoch": 0.00027957763671875,
      "model_forward_time": 0.11491131782531738,
      "step": 45806
    },
    {
      "epoch": 0.00027957763671875,
      "step": 45806,
      "training_step_time": 0.4577972888946533
    },
    {
      "epoch": 0.000279583740234375,
      "model_forward_time": 0.11514663696289062,
      "step": 45807
    },
    {
      "epoch": 0.000279583740234375,
      "step": 45807,
      "training_step_time": 0.39501237869262695
    },
    {
      "epoch": 0.00027958984375,
      "model_forward_time": 0.1147620677947998,
      "step": 45808
    },
    {
      "epoch": 0.00027958984375,
      "step": 45808,
      "training_step_time": 0.6479101181030273
    },
    {
      "epoch": 0.000279595947265625,
      "model_forward_time": 0.11526370048522949,
      "step": 45809
    },
    {
      "epoch": 0.000279595947265625,
      "step": 45809,
      "training_step_time": 0.3879737854003906
    },
    {
      "epoch": 0.00027960205078125,
      "grad_norm": 0.09627805650234222,
      "learning_rate": 1.4527936501877032e-05,
      "loss": 0.0375,
      "step": 45810
    },
    {
      "epoch": 0.00027960205078125,
      "model_forward_time": 0.11456942558288574,
      "step": 45810
    },
    {
      "epoch": 0.00027960205078125,
      "step": 45810,
      "training_step_time": 0.3899857997894287
    },
    {
      "epoch": 0.000279608154296875,
      "model_forward_time": 0.11475825309753418,
      "step": 45811
    },
    {
      "epoch": 0.000279608154296875,
      "step": 45811,
      "training_step_time": 0.3918344974517822
    },
    {
      "epoch": 0.0002796142578125,
      "model_forward_time": 0.11492347717285156,
      "step": 45812
    },
    {
      "epoch": 0.0002796142578125,
      "step": 45812,
      "training_step_time": 0.3615868091583252
    },
    {
      "epoch": 0.000279620361328125,
      "model_forward_time": 0.11507630348205566,
      "step": 45813
    },
    {
      "epoch": 0.000279620361328125,
      "step": 45813,
      "training_step_time": 0.4344205856323242
    },
    {
      "epoch": 0.00027962646484375,
      "model_forward_time": 0.11458325386047363,
      "step": 45814
    },
    {
      "epoch": 0.00027962646484375,
      "step": 45814,
      "training_step_time": 0.5277633666992188
    },
    {
      "epoch": 0.000279632568359375,
      "model_forward_time": 0.11452317237854004,
      "step": 45815
    },
    {
      "epoch": 0.000279632568359375,
      "step": 45815,
      "training_step_time": 0.4094526767730713
    },
    {
      "epoch": 0.000279638671875,
      "model_forward_time": 0.11497187614440918,
      "step": 45816
    },
    {
      "epoch": 0.000279638671875,
      "step": 45816,
      "training_step_time": 0.4013674259185791
    },
    {
      "epoch": 0.000279644775390625,
      "model_forward_time": 0.11460089683532715,
      "step": 45817
    },
    {
      "epoch": 0.000279644775390625,
      "step": 45817,
      "training_step_time": 0.3879714012145996
    },
    {
      "epoch": 0.00027965087890625,
      "model_forward_time": 0.11479043960571289,
      "step": 45818
    },
    {
      "epoch": 0.00027965087890625,
      "step": 45818,
      "training_step_time": 0.4378345012664795
    },
    {
      "epoch": 0.000279656982421875,
      "model_forward_time": 0.1146249771118164,
      "step": 45819
    },
    {
      "epoch": 0.000279656982421875,
      "step": 45819,
      "training_step_time": 0.38843822479248047
    },
    {
      "epoch": 0.0002796630859375,
      "grad_norm": 0.11841995269060135,
      "learning_rate": 1.450852010846724e-05,
      "loss": 0.0329,
      "step": 45820
    },
    {
      "epoch": 0.0002796630859375,
      "model_forward_time": 0.11455702781677246,
      "step": 45820
    },
    {
      "epoch": 0.0002796630859375,
      "step": 45820,
      "training_step_time": 0.836137056350708
    },
    {
      "epoch": 0.000279669189453125,
      "model_forward_time": 0.11488509178161621,
      "step": 45821
    },
    {
      "epoch": 0.000279669189453125,
      "step": 45821,
      "training_step_time": 0.3938322067260742
    },
    {
      "epoch": 0.00027967529296875,
      "model_forward_time": 0.1141512393951416,
      "step": 45822
    },
    {
      "epoch": 0.00027967529296875,
      "step": 45822,
      "training_step_time": 0.394794225692749
    },
    {
      "epoch": 0.000279681396484375,
      "model_forward_time": 0.11467313766479492,
      "step": 45823
    },
    {
      "epoch": 0.000279681396484375,
      "step": 45823,
      "training_step_time": 0.386324405670166
    },
    {
      "epoch": 0.0002796875,
      "model_forward_time": 0.11432218551635742,
      "step": 45824
    },
    {
      "epoch": 0.0002796875,
      "step": 45824,
      "training_step_time": 0.38736724853515625
    },
    {
      "epoch": 0.000279693603515625,
      "model_forward_time": 0.1144859790802002,
      "step": 45825
    },
    {
      "epoch": 0.000279693603515625,
      "step": 45825,
      "training_step_time": 0.38254809379577637
    },
    {
      "epoch": 0.00027969970703125,
      "model_forward_time": 0.11501216888427734,
      "step": 45826
    },
    {
      "epoch": 0.00027969970703125,
      "step": 45826,
      "training_step_time": 0.8141489028930664
    },
    {
      "epoch": 0.000279705810546875,
      "model_forward_time": 0.11429405212402344,
      "step": 45827
    },
    {
      "epoch": 0.000279705810546875,
      "step": 45827,
      "training_step_time": 0.48168134689331055
    },
    {
      "epoch": 0.0002797119140625,
      "model_forward_time": 0.11429142951965332,
      "step": 45828
    },
    {
      "epoch": 0.0002797119140625,
      "step": 45828,
      "training_step_time": 0.42092061042785645
    },
    {
      "epoch": 0.000279718017578125,
      "model_forward_time": 0.11484146118164062,
      "step": 45829
    },
    {
      "epoch": 0.000279718017578125,
      "step": 45829,
      "training_step_time": 0.38133764266967773
    },
    {
      "epoch": 0.00027972412109375,
      "grad_norm": 0.13053448498249054,
      "learning_rate": 1.4489114496431938e-05,
      "loss": 0.035,
      "step": 45830
    },
    {
      "epoch": 0.00027972412109375,
      "model_forward_time": 0.11406946182250977,
      "step": 45830
    },
    {
      "epoch": 0.00027972412109375,
      "step": 45830,
      "training_step_time": 0.3929877281188965
    },
    {
      "epoch": 0.000279730224609375,
      "model_forward_time": 0.1142416000366211,
      "step": 45831
    },
    {
      "epoch": 0.000279730224609375,
      "step": 45831,
      "training_step_time": 0.3912796974182129
    },
    {
      "epoch": 0.000279736328125,
      "model_forward_time": 0.1149892807006836,
      "step": 45832
    },
    {
      "epoch": 0.000279736328125,
      "step": 45832,
      "training_step_time": 0.558978796005249
    },
    {
      "epoch": 0.000279742431640625,
      "model_forward_time": 0.11479544639587402,
      "step": 45833
    },
    {
      "epoch": 0.000279742431640625,
      "step": 45833,
      "training_step_time": 0.3794066905975342
    },
    {
      "epoch": 0.00027974853515625,
      "model_forward_time": 0.11521148681640625,
      "step": 45834
    },
    {
      "epoch": 0.00027974853515625,
      "step": 45834,
      "training_step_time": 0.38840699195861816
    },
    {
      "epoch": 0.000279754638671875,
      "model_forward_time": 0.11517047882080078,
      "step": 45835
    },
    {
      "epoch": 0.000279754638671875,
      "step": 45835,
      "training_step_time": 0.3924522399902344
    },
    {
      "epoch": 0.0002797607421875,
      "model_forward_time": 0.11599135398864746,
      "step": 45836
    },
    {
      "epoch": 0.0002797607421875,
      "step": 45836,
      "training_step_time": 0.39504027366638184
    },
    {
      "epoch": 0.000279766845703125,
      "model_forward_time": 0.11529159545898438,
      "step": 45837
    },
    {
      "epoch": 0.000279766845703125,
      "step": 45837,
      "training_step_time": 0.3849620819091797
    },
    {
      "epoch": 0.00027977294921875,
      "model_forward_time": 0.1149911880493164,
      "step": 45838
    },
    {
      "epoch": 0.00027977294921875,
      "step": 45838,
      "training_step_time": 0.5843842029571533
    },
    {
      "epoch": 0.000279779052734375,
      "model_forward_time": 0.11689519882202148,
      "step": 45839
    },
    {
      "epoch": 0.000279779052734375,
      "step": 45839,
      "training_step_time": 0.4838898181915283
    },
    {
      "epoch": 0.00027978515625,
      "grad_norm": 0.1383221298456192,
      "learning_rate": 1.4469719671666043e-05,
      "loss": 0.0418,
      "step": 45840
    },
    {
      "epoch": 0.00027978515625,
      "model_forward_time": 0.11496186256408691,
      "step": 45840
    },
    {
      "epoch": 0.00027978515625,
      "step": 45840,
      "training_step_time": 0.4417736530303955
    },
    {
      "epoch": 0.000279791259765625,
      "model_forward_time": 0.11547303199768066,
      "step": 45841
    },
    {
      "epoch": 0.000279791259765625,
      "step": 45841,
      "training_step_time": 0.47660326957702637
    },
    {
      "epoch": 0.00027979736328125,
      "model_forward_time": 0.11416482925415039,
      "step": 45842
    },
    {
      "epoch": 0.00027979736328125,
      "step": 45842,
      "training_step_time": 0.3980066776275635
    },
    {
      "epoch": 0.000279803466796875,
      "model_forward_time": 0.1136012077331543,
      "step": 45843
    },
    {
      "epoch": 0.000279803466796875,
      "step": 45843,
      "training_step_time": 0.3807804584503174
    },
    {
      "epoch": 0.0002798095703125,
      "model_forward_time": 0.11455821990966797,
      "step": 45844
    },
    {
      "epoch": 0.0002798095703125,
      "step": 45844,
      "training_step_time": 0.8062319755554199
    },
    {
      "epoch": 0.000279815673828125,
      "model_forward_time": 0.11442089080810547,
      "step": 45845
    },
    {
      "epoch": 0.000279815673828125,
      "step": 45845,
      "training_step_time": 0.3785576820373535
    },
    {
      "epoch": 0.00027982177734375,
      "model_forward_time": 0.11494684219360352,
      "step": 45846
    },
    {
      "epoch": 0.00027982177734375,
      "step": 45846,
      "training_step_time": 0.39554834365844727
    },
    {
      "epoch": 0.000279827880859375,
      "model_forward_time": 0.11384439468383789,
      "step": 45847
    },
    {
      "epoch": 0.000279827880859375,
      "step": 45847,
      "training_step_time": 0.393204927444458
    },
    {
      "epoch": 0.000279833984375,
      "model_forward_time": 0.1149141788482666,
      "step": 45848
    },
    {
      "epoch": 0.000279833984375,
      "step": 45848,
      "training_step_time": 0.3791322708129883
    },
    {
      "epoch": 0.000279840087890625,
      "model_forward_time": 0.11428713798522949,
      "step": 45849
    },
    {
      "epoch": 0.000279840087890625,
      "step": 45849,
      "training_step_time": 0.3856658935546875
    },
    {
      "epoch": 0.00027984619140625,
      "grad_norm": 0.11787951737642288,
      "learning_rate": 1.4450335640061185e-05,
      "loss": 0.0374,
      "step": 45850
    },
    {
      "epoch": 0.00027984619140625,
      "model_forward_time": 0.11514115333557129,
      "step": 45850
    },
    {
      "epoch": 0.00027984619140625,
      "step": 45850,
      "training_step_time": 0.7709348201751709
    },
    {
      "epoch": 0.000279852294921875,
      "model_forward_time": 0.11433577537536621,
      "step": 45851
    },
    {
      "epoch": 0.000279852294921875,
      "step": 45851,
      "training_step_time": 0.3790466785430908
    },
    {
      "epoch": 0.0002798583984375,
      "model_forward_time": 0.11462807655334473,
      "step": 45852
    },
    {
      "epoch": 0.0002798583984375,
      "step": 45852,
      "training_step_time": 0.3909428119659424
    },
    {
      "epoch": 0.000279864501953125,
      "model_forward_time": 0.11470270156860352,
      "step": 45853
    },
    {
      "epoch": 0.000279864501953125,
      "step": 45853,
      "training_step_time": 0.4652564525604248
    },
    {
      "epoch": 0.00027987060546875,
      "model_forward_time": 0.1148672103881836,
      "step": 45854
    },
    {
      "epoch": 0.00027987060546875,
      "step": 45854,
      "training_step_time": 0.5160613059997559
    },
    {
      "epoch": 0.000279876708984375,
      "model_forward_time": 0.11446928977966309,
      "step": 45855
    },
    {
      "epoch": 0.000279876708984375,
      "step": 45855,
      "training_step_time": 0.3858158588409424
    },
    {
      "epoch": 0.0002798828125,
      "model_forward_time": 0.11527872085571289,
      "step": 45856
    },
    {
      "epoch": 0.0002798828125,
      "step": 45856,
      "training_step_time": 0.974186897277832
    },
    {
      "epoch": 0.000279888916015625,
      "model_forward_time": 0.11394190788269043,
      "step": 45857
    },
    {
      "epoch": 0.000279888916015625,
      "step": 45857,
      "training_step_time": 0.3854043483734131
    },
    {
      "epoch": 0.00027989501953125,
      "model_forward_time": 0.11423110961914062,
      "step": 45858
    },
    {
      "epoch": 0.00027989501953125,
      "step": 45858,
      "training_step_time": 0.3863492012023926
    },
    {
      "epoch": 0.000279901123046875,
      "model_forward_time": 0.11382198333740234,
      "step": 45859
    },
    {
      "epoch": 0.000279901123046875,
      "step": 45859,
      "training_step_time": 0.3867678642272949
    },
    {
      "epoch": 0.0002799072265625,
      "grad_norm": 0.08197511732578278,
      "learning_rate": 1.443096240750571e-05,
      "loss": 0.0353,
      "step": 45860
    },
    {
      "epoch": 0.0002799072265625,
      "model_forward_time": 0.11380195617675781,
      "step": 45860
    },
    {
      "epoch": 0.0002799072265625,
      "step": 45860,
      "training_step_time": 0.3847024440765381
    },
    {
      "epoch": 0.000279913330078125,
      "model_forward_time": 0.11577677726745605,
      "step": 45861
    },
    {
      "epoch": 0.000279913330078125,
      "step": 45861,
      "training_step_time": 0.38466930389404297
    },
    {
      "epoch": 0.00027991943359375,
      "model_forward_time": 0.1149437427520752,
      "step": 45862
    },
    {
      "epoch": 0.00027991943359375,
      "step": 45862,
      "training_step_time": 0.6747958660125732
    },
    {
      "epoch": 0.000279925537109375,
      "model_forward_time": 0.11456036567687988,
      "step": 45863
    },
    {
      "epoch": 0.000279925537109375,
      "step": 45863,
      "training_step_time": 0.3923768997192383
    },
    {
      "epoch": 0.000279931640625,
      "model_forward_time": 0.11424922943115234,
      "step": 45864
    },
    {
      "epoch": 0.000279931640625,
      "step": 45864,
      "training_step_time": 0.38942980766296387
    },
    {
      "epoch": 0.000279937744140625,
      "model_forward_time": 0.11533665657043457,
      "step": 45865
    },
    {
      "epoch": 0.000279937744140625,
      "step": 45865,
      "training_step_time": 0.38712477684020996
    },
    {
      "epoch": 0.00027994384765625,
      "model_forward_time": 0.11464214324951172,
      "step": 45866
    },
    {
      "epoch": 0.00027994384765625,
      "step": 45866,
      "training_step_time": 0.4656190872192383
    },
    {
      "epoch": 0.000279949951171875,
      "model_forward_time": 0.11501383781433105,
      "step": 45867
    },
    {
      "epoch": 0.000279949951171875,
      "step": 45867,
      "training_step_time": 0.47756218910217285
    },
    {
      "epoch": 0.0002799560546875,
      "model_forward_time": 0.11505341529846191,
      "step": 45868
    },
    {
      "epoch": 0.0002799560546875,
      "step": 45868,
      "training_step_time": 0.45991969108581543
    },
    {
      "epoch": 0.000279962158203125,
      "model_forward_time": 0.11538529396057129,
      "step": 45869
    },
    {
      "epoch": 0.000279962158203125,
      "step": 45869,
      "training_step_time": 0.3852875232696533
    },
    {
      "epoch": 0.00027996826171875,
      "grad_norm": 0.10063562542200089,
      "learning_rate": 1.4411599979884744e-05,
      "loss": 0.0393,
      "step": 45870
    },
    {
      "epoch": 0.00027996826171875,
      "model_forward_time": 0.11548280715942383,
      "step": 45870
    },
    {
      "epoch": 0.00027996826171875,
      "step": 45870,
      "training_step_time": 0.3919947147369385
    },
    {
      "epoch": 0.000279974365234375,
      "model_forward_time": 0.11469411849975586,
      "step": 45871
    },
    {
      "epoch": 0.000279974365234375,
      "step": 45871,
      "training_step_time": 0.39884066581726074
    },
    {
      "epoch": 0.00027998046875,
      "model_forward_time": 0.11507678031921387,
      "step": 45872
    },
    {
      "epoch": 0.00027998046875,
      "step": 45872,
      "training_step_time": 0.39275360107421875
    },
    {
      "epoch": 0.000279986572265625,
      "model_forward_time": 0.11468911170959473,
      "step": 45873
    },
    {
      "epoch": 0.000279986572265625,
      "step": 45873,
      "training_step_time": 0.396618127822876
    },
    {
      "epoch": 0.00027999267578125,
      "model_forward_time": 0.11523962020874023,
      "step": 45874
    },
    {
      "epoch": 0.00027999267578125,
      "step": 45874,
      "training_step_time": 0.9916670322418213
    },
    {
      "epoch": 0.000279998779296875,
      "model_forward_time": 0.11456918716430664,
      "step": 45875
    },
    {
      "epoch": 0.000279998779296875,
      "step": 45875,
      "training_step_time": 0.37656354904174805
    },
    {
      "epoch": 0.0002800048828125,
      "model_forward_time": 0.11412858963012695,
      "step": 45876
    },
    {
      "epoch": 0.0002800048828125,
      "step": 45876,
      "training_step_time": 0.38736653327941895
    },
    {
      "epoch": 0.000280010986328125,
      "model_forward_time": 0.1142892837524414,
      "step": 45877
    },
    {
      "epoch": 0.000280010986328125,
      "step": 45877,
      "training_step_time": 0.3885376453399658
    },
    {
      "epoch": 0.00028001708984375,
      "model_forward_time": 0.1142117977142334,
      "step": 45878
    },
    {
      "epoch": 0.00028001708984375,
      "step": 45878,
      "training_step_time": 0.3851606845855713
    },
    {
      "epoch": 0.000280023193359375,
      "model_forward_time": 0.11468148231506348,
      "step": 45879
    },
    {
      "epoch": 0.000280023193359375,
      "step": 45879,
      "training_step_time": 0.36482882499694824
    },
    {
      "epoch": 0.000280029296875,
      "grad_norm": 0.0839407816529274,
      "learning_rate": 1.439224836308002e-05,
      "loss": 0.033,
      "step": 45880
    },
    {
      "epoch": 0.000280029296875,
      "model_forward_time": 0.11591768264770508,
      "step": 45880
    },
    {
      "epoch": 0.000280029296875,
      "step": 45880,
      "training_step_time": 0.7914314270019531
    },
    {
      "epoch": 0.000280035400390625,
      "model_forward_time": 0.11486101150512695,
      "step": 45881
    },
    {
      "epoch": 0.000280035400390625,
      "step": 45881,
      "training_step_time": 0.41805601119995117
    },
    {
      "epoch": 0.00028004150390625,
      "model_forward_time": 0.11494660377502441,
      "step": 45882
    },
    {
      "epoch": 0.00028004150390625,
      "step": 45882,
      "training_step_time": 0.36960935592651367
    },
    {
      "epoch": 0.000280047607421875,
      "model_forward_time": 0.11493539810180664,
      "step": 45883
    },
    {
      "epoch": 0.000280047607421875,
      "step": 45883,
      "training_step_time": 0.38873958587646484
    },
    {
      "epoch": 0.0002800537109375,
      "model_forward_time": 0.1139981746673584,
      "step": 45884
    },
    {
      "epoch": 0.0002800537109375,
      "step": 45884,
      "training_step_time": 0.3766171932220459
    },
    {
      "epoch": 0.000280059814453125,
      "model_forward_time": 0.11420345306396484,
      "step": 45885
    },
    {
      "epoch": 0.000280059814453125,
      "step": 45885,
      "training_step_time": 0.3939549922943115
    },
    {
      "epoch": 0.00028006591796875,
      "model_forward_time": 0.11475324630737305,
      "step": 45886
    },
    {
      "epoch": 0.00028006591796875,
      "step": 45886,
      "training_step_time": 0.7528002262115479
    },
    {
      "epoch": 0.000280072021484375,
      "model_forward_time": 0.11442399024963379,
      "step": 45887
    },
    {
      "epoch": 0.000280072021484375,
      "step": 45887,
      "training_step_time": 0.3850100040435791
    },
    {
      "epoch": 0.000280078125,
      "model_forward_time": 0.11394476890563965,
      "step": 45888
    },
    {
      "epoch": 0.000280078125,
      "step": 45888,
      "training_step_time": 0.38675975799560547
    },
    {
      "epoch": 0.000280084228515625,
      "model_forward_time": 0.11476397514343262,
      "step": 45889
    },
    {
      "epoch": 0.000280084228515625,
      "step": 45889,
      "training_step_time": 0.3898341655731201
    },
    {
      "epoch": 0.00028009033203125,
      "grad_norm": 0.11186420172452927,
      "learning_rate": 1.4372907562970079e-05,
      "loss": 0.0331,
      "step": 45890
    },
    {
      "epoch": 0.00028009033203125,
      "model_forward_time": 0.11480998992919922,
      "step": 45890
    },
    {
      "epoch": 0.00028009033203125,
      "step": 45890,
      "training_step_time": 0.3916151523590088
    },
    {
      "epoch": 0.000280096435546875,
      "model_forward_time": 0.11445188522338867,
      "step": 45891
    },
    {
      "epoch": 0.000280096435546875,
      "step": 45891,
      "training_step_time": 0.38210320472717285
    },
    {
      "epoch": 0.0002801025390625,
      "model_forward_time": 0.11552858352661133,
      "step": 45892
    },
    {
      "epoch": 0.0002801025390625,
      "step": 45892,
      "training_step_time": 1.1562423706054688
    },
    {
      "epoch": 0.000280108642578125,
      "model_forward_time": 0.11384963989257812,
      "step": 45893
    },
    {
      "epoch": 0.000280108642578125,
      "step": 45893,
      "training_step_time": 0.47263288497924805
    },
    {
      "epoch": 0.00028011474609375,
      "model_forward_time": 0.11400985717773438,
      "step": 45894
    },
    {
      "epoch": 0.00028011474609375,
      "step": 45894,
      "training_step_time": 0.4145843982696533
    },
    {
      "epoch": 0.000280120849609375,
      "model_forward_time": 0.11407279968261719,
      "step": 45895
    },
    {
      "epoch": 0.000280120849609375,
      "step": 45895,
      "training_step_time": 0.38506007194519043
    },
    {
      "epoch": 0.000280126953125,
      "model_forward_time": 0.11386585235595703,
      "step": 45896
    },
    {
      "epoch": 0.000280126953125,
      "step": 45896,
      "training_step_time": 0.37984228134155273
    },
    {
      "epoch": 0.000280133056640625,
      "model_forward_time": 0.11354351043701172,
      "step": 45897
    },
    {
      "epoch": 0.000280133056640625,
      "step": 45897,
      "training_step_time": 0.3916623592376709
    },
    {
      "epoch": 0.00028013916015625,
      "model_forward_time": 0.11481332778930664,
      "step": 45898
    },
    {
      "epoch": 0.00028013916015625,
      "step": 45898,
      "training_step_time": 0.9406805038452148
    },
    {
      "epoch": 0.000280145263671875,
      "model_forward_time": 0.11383652687072754,
      "step": 45899
    },
    {
      "epoch": 0.000280145263671875,
      "step": 45899,
      "training_step_time": 0.37514424324035645
    },
    {
      "epoch": 0.0002801513671875,
      "grad_norm": 0.08436354994773865,
      "learning_rate": 1.435357758543015e-05,
      "loss": 0.0347,
      "step": 45900
    },
    {
      "epoch": 0.0002801513671875,
      "model_forward_time": 0.11436676979064941,
      "step": 45900
    },
    {
      "epoch": 0.0002801513671875,
      "step": 45900,
      "training_step_time": 0.38592004776000977
    },
    {
      "epoch": 0.000280157470703125,
      "model_forward_time": 0.11467576026916504,
      "step": 45901
    },
    {
      "epoch": 0.000280157470703125,
      "step": 45901,
      "training_step_time": 0.38544607162475586
    },
    {
      "epoch": 0.00028016357421875,
      "model_forward_time": 0.11442804336547852,
      "step": 45902
    },
    {
      "epoch": 0.00028016357421875,
      "step": 45902,
      "training_step_time": 0.3796827793121338
    },
    {
      "epoch": 0.000280169677734375,
      "model_forward_time": 0.11431288719177246,
      "step": 45903
    },
    {
      "epoch": 0.000280169677734375,
      "step": 45903,
      "training_step_time": 0.3811914920806885
    },
    {
      "epoch": 0.00028017578125,
      "model_forward_time": 0.11467933654785156,
      "step": 45904
    },
    {
      "epoch": 0.00028017578125,
      "step": 45904,
      "training_step_time": 0.680351972579956
    },
    {
      "epoch": 0.000280181884765625,
      "model_forward_time": 0.11478734016418457,
      "step": 45905
    },
    {
      "epoch": 0.000280181884765625,
      "step": 45905,
      "training_step_time": 0.4159073829650879
    },
    {
      "epoch": 0.00028018798828125,
      "model_forward_time": 0.11507940292358398,
      "step": 45906
    },
    {
      "epoch": 0.00028018798828125,
      "step": 45906,
      "training_step_time": 0.4882957935333252
    },
    {
      "epoch": 0.000280194091796875,
      "model_forward_time": 0.11440658569335938,
      "step": 45907
    },
    {
      "epoch": 0.000280194091796875,
      "step": 45907,
      "training_step_time": 0.43122076988220215
    },
    {
      "epoch": 0.0002802001953125,
      "model_forward_time": 0.1175687313079834,
      "step": 45908
    },
    {
      "epoch": 0.0002802001953125,
      "step": 45908,
      "training_step_time": 0.39059996604919434
    },
    {
      "epoch": 0.000280206298828125,
      "model_forward_time": 0.11451101303100586,
      "step": 45909
    },
    {
      "epoch": 0.000280206298828125,
      "step": 45909,
      "training_step_time": 0.39011168479919434
    },
    {
      "epoch": 0.00028021240234375,
      "grad_norm": 0.08894003927707672,
      "learning_rate": 1.433425843633216e-05,
      "loss": 0.0322,
      "step": 45910
    },
    {
      "epoch": 0.00028021240234375,
      "model_forward_time": 0.11527490615844727,
      "step": 45910
    },
    {
      "epoch": 0.00028021240234375,
      "step": 45910,
      "training_step_time": 0.8282814025878906
    },
    {
      "epoch": 0.000280218505859375,
      "model_forward_time": 0.11361336708068848,
      "step": 45911
    },
    {
      "epoch": 0.000280218505859375,
      "step": 45911,
      "training_step_time": 0.39186644554138184
    },
    {
      "epoch": 0.000280224609375,
      "model_forward_time": 0.11480426788330078,
      "step": 45912
    },
    {
      "epoch": 0.000280224609375,
      "step": 45912,
      "training_step_time": 0.38500213623046875
    },
    {
      "epoch": 0.000280230712890625,
      "model_forward_time": 0.11376953125,
      "step": 45913
    },
    {
      "epoch": 0.000280230712890625,
      "step": 45913,
      "training_step_time": 0.3890724182128906
    },
    {
      "epoch": 0.00028023681640625,
      "model_forward_time": 0.11446881294250488,
      "step": 45914
    },
    {
      "epoch": 0.00028023681640625,
      "step": 45914,
      "training_step_time": 0.38474082946777344
    },
    {
      "epoch": 0.000280242919921875,
      "model_forward_time": 0.1141350269317627,
      "step": 45915
    },
    {
      "epoch": 0.000280242919921875,
      "step": 45915,
      "training_step_time": 0.38419628143310547
    },
    {
      "epoch": 0.0002802490234375,
      "model_forward_time": 0.11482906341552734,
      "step": 45916
    },
    {
      "epoch": 0.0002802490234375,
      "step": 45916,
      "training_step_time": 0.8283092975616455
    },
    {
      "epoch": 0.000280255126953125,
      "model_forward_time": 0.11516380310058594,
      "step": 45917
    },
    {
      "epoch": 0.000280255126953125,
      "step": 45917,
      "training_step_time": 0.3961606025695801
    },
    {
      "epoch": 0.00028026123046875,
      "model_forward_time": 0.11482858657836914,
      "step": 45918
    },
    {
      "epoch": 0.00028026123046875,
      "step": 45918,
      "training_step_time": 0.36410093307495117
    },
    {
      "epoch": 0.000280267333984375,
      "model_forward_time": 0.11452627182006836,
      "step": 45919
    },
    {
      "epoch": 0.000280267333984375,
      "step": 45919,
      "training_step_time": 0.46616435050964355
    },
    {
      "epoch": 0.0002802734375,
      "grad_norm": 0.1169709637761116,
      "learning_rate": 1.4314950121544756e-05,
      "loss": 0.0328,
      "step": 45920
    },
    {
      "epoch": 0.0002802734375,
      "model_forward_time": 0.11907672882080078,
      "step": 45920
    },
    {
      "epoch": 0.0002802734375,
      "step": 45920,
      "training_step_time": 0.5210480690002441
    },
    {
      "epoch": 0.000280279541015625,
      "model_forward_time": 0.11541891098022461,
      "step": 45921
    },
    {
      "epoch": 0.000280279541015625,
      "step": 45921,
      "training_step_time": 0.3746631145477295
    },
    {
      "epoch": 0.00028028564453125,
      "model_forward_time": 0.11461544036865234,
      "step": 45922
    },
    {
      "epoch": 0.00028028564453125,
      "step": 45922,
      "training_step_time": 0.407853364944458
    },
    {
      "epoch": 0.000280291748046875,
      "model_forward_time": 0.11537313461303711,
      "step": 45923
    },
    {
      "epoch": 0.000280291748046875,
      "step": 45923,
      "training_step_time": 0.37398314476013184
    },
    {
      "epoch": 0.0002802978515625,
      "model_forward_time": 0.11444830894470215,
      "step": 45924
    },
    {
      "epoch": 0.0002802978515625,
      "step": 45924,
      "training_step_time": 0.39051008224487305
    },
    {
      "epoch": 0.000280303955078125,
      "model_forward_time": 0.11502909660339355,
      "step": 45925
    },
    {
      "epoch": 0.000280303955078125,
      "step": 45925,
      "training_step_time": 0.38384103775024414
    },
    {
      "epoch": 0.00028031005859375,
      "model_forward_time": 0.11603617668151855,
      "step": 45926
    },
    {
      "epoch": 0.00028031005859375,
      "step": 45926,
      "training_step_time": 0.45100831985473633
    },
    {
      "epoch": 0.000280316162109375,
      "model_forward_time": 0.11764049530029297,
      "step": 45927
    },
    {
      "epoch": 0.000280316162109375,
      "step": 45927,
      "training_step_time": 0.577172040939331
    },
    {
      "epoch": 0.000280322265625,
      "model_forward_time": 0.12015223503112793,
      "step": 45928
    },
    {
      "epoch": 0.000280322265625,
      "step": 45928,
      "training_step_time": 0.8605351448059082
    },
    {
      "epoch": 0.000280328369140625,
      "model_forward_time": 0.11844325065612793,
      "step": 45929
    },
    {
      "epoch": 0.000280328369140625,
      "step": 45929,
      "training_step_time": 0.680389404296875
    },
    {
      "epoch": 0.00028033447265625,
      "grad_norm": 0.11830224841833115,
      "learning_rate": 1.4295652646933277e-05,
      "loss": 0.0322,
      "step": 45930
    },
    {
      "epoch": 0.00028033447265625,
      "model_forward_time": 0.12512850761413574,
      "step": 45930
    },
    {
      "epoch": 0.00028033447265625,
      "step": 45930,
      "training_step_time": 0.6034586429595947
    },
    {
      "epoch": 0.000280340576171875,
      "model_forward_time": 0.12476253509521484,
      "step": 45931
    },
    {
      "epoch": 0.000280340576171875,
      "step": 45931,
      "training_step_time": 0.6958820819854736
    },
    {
      "epoch": 0.0002803466796875,
      "model_forward_time": 0.11986541748046875,
      "step": 45932
    },
    {
      "epoch": 0.0002803466796875,
      "step": 45932,
      "training_step_time": 0.6877231597900391
    },
    {
      "epoch": 0.000280352783203125,
      "model_forward_time": 0.12224936485290527,
      "step": 45933
    },
    {
      "epoch": 0.000280352783203125,
      "step": 45933,
      "training_step_time": 0.6575350761413574
    },
    {
      "epoch": 0.00028035888671875,
      "model_forward_time": 0.11846804618835449,
      "step": 45934
    },
    {
      "epoch": 0.00028035888671875,
      "step": 45934,
      "training_step_time": 0.7087178230285645
    },
    {
      "epoch": 0.000280364990234375,
      "model_forward_time": 0.11681413650512695,
      "step": 45935
    },
    {
      "epoch": 0.000280364990234375,
      "step": 45935,
      "training_step_time": 0.6330070495605469
    },
    {
      "epoch": 0.00028037109375,
      "model_forward_time": 0.11812758445739746,
      "step": 45936
    },
    {
      "epoch": 0.00028037109375,
      "step": 45936,
      "training_step_time": 0.7289752960205078
    },
    {
      "epoch": 0.000280377197265625,
      "model_forward_time": 0.11754941940307617,
      "step": 45937
    },
    {
      "epoch": 0.000280377197265625,
      "step": 45937,
      "training_step_time": 0.655827522277832
    },
    {
      "epoch": 0.00028038330078125,
      "model_forward_time": 0.11619234085083008,
      "step": 45938
    },
    {
      "epoch": 0.00028038330078125,
      "step": 45938,
      "training_step_time": 0.6491243839263916
    },
    {
      "epoch": 0.000280389404296875,
      "model_forward_time": 0.12108659744262695,
      "step": 45939
    },
    {
      "epoch": 0.000280389404296875,
      "step": 45939,
      "training_step_time": 0.7235836982727051
    },
    {
      "epoch": 0.0002803955078125,
      "grad_norm": 0.11216089129447937,
      "learning_rate": 1.4276366018359844e-05,
      "loss": 0.0361,
      "step": 45940
    },
    {
      "epoch": 0.0002803955078125,
      "model_forward_time": 0.1163945198059082,
      "step": 45940
    },
    {
      "epoch": 0.0002803955078125,
      "step": 45940,
      "training_step_time": 0.7320563793182373
    },
    {
      "epoch": 0.000280401611328125,
      "model_forward_time": 0.1262376308441162,
      "step": 45941
    },
    {
      "epoch": 0.000280401611328125,
      "step": 45941,
      "training_step_time": 0.8233723640441895
    },
    {
      "epoch": 0.00028040771484375,
      "model_forward_time": 0.12101292610168457,
      "step": 45942
    },
    {
      "epoch": 0.00028040771484375,
      "step": 45942,
      "training_step_time": 0.6879103183746338
    },
    {
      "epoch": 0.000280413818359375,
      "model_forward_time": 0.11899518966674805,
      "step": 45943
    },
    {
      "epoch": 0.000280413818359375,
      "step": 45943,
      "training_step_time": 0.6683318614959717
    },
    {
      "epoch": 0.000280419921875,
      "model_forward_time": 0.1247556209564209,
      "step": 45944
    },
    {
      "epoch": 0.000280419921875,
      "step": 45944,
      "training_step_time": 0.6609237194061279
    },
    {
      "epoch": 0.000280426025390625,
      "model_forward_time": 0.11645030975341797,
      "step": 45945
    },
    {
      "epoch": 0.000280426025390625,
      "step": 45945,
      "training_step_time": 0.6919350624084473
    },
    {
      "epoch": 0.00028043212890625,
      "model_forward_time": 0.11608624458312988,
      "step": 45946
    },
    {
      "epoch": 0.00028043212890625,
      "step": 45946,
      "training_step_time": 0.679283618927002
    },
    {
      "epoch": 0.000280438232421875,
      "model_forward_time": 0.12023663520812988,
      "step": 45947
    },
    {
      "epoch": 0.000280438232421875,
      "step": 45947,
      "training_step_time": 0.6473016738891602
    },
    {
      "epoch": 0.0002804443359375,
      "model_forward_time": 0.1191861629486084,
      "step": 45948
    },
    {
      "epoch": 0.0002804443359375,
      "step": 45948,
      "training_step_time": 0.7346951961517334
    },
    {
      "epoch": 0.000280450439453125,
      "model_forward_time": 0.11625981330871582,
      "step": 45949
    },
    {
      "epoch": 0.000280450439453125,
      "step": 45949,
      "training_step_time": 0.7379956245422363
    },
    {
      "epoch": 0.00028045654296875,
      "grad_norm": 0.14972873032093048,
      "learning_rate": 1.4257090241683152e-05,
      "loss": 0.0395,
      "step": 45950
    },
    {
      "epoch": 0.00028045654296875,
      "model_forward_time": 0.11704134941101074,
      "step": 45950
    },
    {
      "epoch": 0.00028045654296875,
      "step": 45950,
      "training_step_time": 0.6548666954040527
    },
    {
      "epoch": 0.000280462646484375,
      "model_forward_time": 0.11713075637817383,
      "step": 45951
    },
    {
      "epoch": 0.000280462646484375,
      "step": 45951,
      "training_step_time": 0.6549510955810547
    },
    {
      "epoch": 0.00028046875,
      "model_forward_time": 0.11726927757263184,
      "step": 45952
    },
    {
      "epoch": 0.00028046875,
      "step": 45952,
      "training_step_time": 0.6561193466186523
    },
    {
      "epoch": 0.000280474853515625,
      "model_forward_time": 0.11706137657165527,
      "step": 45953
    },
    {
      "epoch": 0.000280474853515625,
      "step": 45953,
      "training_step_time": 0.6301934719085693
    },
    {
      "epoch": 0.00028048095703125,
      "model_forward_time": 0.12153053283691406,
      "step": 45954
    },
    {
      "epoch": 0.00028048095703125,
      "step": 45954,
      "training_step_time": 0.6474840641021729
    },
    {
      "epoch": 0.000280487060546875,
      "model_forward_time": 0.1233522891998291,
      "step": 45955
    },
    {
      "epoch": 0.000280487060546875,
      "step": 45955,
      "training_step_time": 0.6500966548919678
    },
    {
      "epoch": 0.0002804931640625,
      "model_forward_time": 0.11674118041992188,
      "step": 45956
    },
    {
      "epoch": 0.0002804931640625,
      "step": 45956,
      "training_step_time": 0.6312539577484131
    },
    {
      "epoch": 0.000280499267578125,
      "model_forward_time": 0.11983919143676758,
      "step": 45957
    },
    {
      "epoch": 0.000280499267578125,
      "step": 45957,
      "training_step_time": 0.6624183654785156
    },
    {
      "epoch": 0.00028050537109375,
      "model_forward_time": 0.13231539726257324,
      "step": 45958
    },
    {
      "epoch": 0.00028050537109375,
      "step": 45958,
      "training_step_time": 0.6973228454589844
    },
    {
      "epoch": 0.000280511474609375,
      "model_forward_time": 0.1187601089477539,
      "step": 45959
    },
    {
      "epoch": 0.000280511474609375,
      "step": 45959,
      "training_step_time": 0.7585070133209229
    },
    {
      "epoch": 0.000280517578125,
      "grad_norm": 0.11569319665431976,
      "learning_rate": 1.4237825322758736e-05,
      "loss": 0.043,
      "step": 45960
    },
    {
      "epoch": 0.000280517578125,
      "model_forward_time": 0.12054967880249023,
      "step": 45960
    },
    {
      "epoch": 0.000280517578125,
      "step": 45960,
      "training_step_time": 0.7160978317260742
    },
    {
      "epoch": 0.000280523681640625,
      "model_forward_time": 0.12240290641784668,
      "step": 45961
    },
    {
      "epoch": 0.000280523681640625,
      "step": 45961,
      "training_step_time": 0.6096451282501221
    },
    {
      "epoch": 0.00028052978515625,
      "model_forward_time": 0.12522625923156738,
      "step": 45962
    },
    {
      "epoch": 0.00028052978515625,
      "step": 45962,
      "training_step_time": 0.7182843685150146
    },
    {
      "epoch": 0.000280535888671875,
      "model_forward_time": 0.12335896492004395,
      "step": 45963
    },
    {
      "epoch": 0.000280535888671875,
      "step": 45963,
      "training_step_time": 0.6521189212799072
    },
    {
      "epoch": 0.0002805419921875,
      "model_forward_time": 0.1190946102142334,
      "step": 45964
    },
    {
      "epoch": 0.0002805419921875,
      "step": 45964,
      "training_step_time": 0.653449535369873
    },
    {
      "epoch": 0.000280548095703125,
      "model_forward_time": 0.11889863014221191,
      "step": 45965
    },
    {
      "epoch": 0.000280548095703125,
      "step": 45965,
      "training_step_time": 0.6723074913024902
    },
    {
      "epoch": 0.00028055419921875,
      "model_forward_time": 0.11822319030761719,
      "step": 45966
    },
    {
      "epoch": 0.00028055419921875,
      "step": 45966,
      "training_step_time": 0.6748430728912354
    },
    {
      "epoch": 0.000280560302734375,
      "model_forward_time": 0.12018609046936035,
      "step": 45967
    },
    {
      "epoch": 0.000280560302734375,
      "step": 45967,
      "training_step_time": 0.7129850387573242
    },
    {
      "epoch": 0.00028056640625,
      "model_forward_time": 0.12000226974487305,
      "step": 45968
    },
    {
      "epoch": 0.00028056640625,
      "step": 45968,
      "training_step_time": 0.6842014789581299
    },
    {
      "epoch": 0.000280572509765625,
      "model_forward_time": 0.12071967124938965,
      "step": 45969
    },
    {
      "epoch": 0.000280572509765625,
      "step": 45969,
      "training_step_time": 0.6166980266571045
    },
    {
      "epoch": 0.00028057861328125,
      "grad_norm": 0.0957324355840683,
      "learning_rate": 1.4218571267438712e-05,
      "loss": 0.0428,
      "step": 45970
    },
    {
      "epoch": 0.00028057861328125,
      "model_forward_time": 0.12451982498168945,
      "step": 45970
    },
    {
      "epoch": 0.00028057861328125,
      "step": 45970,
      "training_step_time": 0.6271710395812988
    },
    {
      "epoch": 0.000280584716796875,
      "model_forward_time": 0.11848950386047363,
      "step": 45971
    },
    {
      "epoch": 0.000280584716796875,
      "step": 45971,
      "training_step_time": 0.6289010047912598
    },
    {
      "epoch": 0.0002805908203125,
      "model_forward_time": 0.12011265754699707,
      "step": 45972
    },
    {
      "epoch": 0.0002805908203125,
      "step": 45972,
      "training_step_time": 0.7048842906951904
    },
    {
      "epoch": 0.000280596923828125,
      "model_forward_time": 0.12196850776672363,
      "step": 45973
    },
    {
      "epoch": 0.000280596923828125,
      "step": 45973,
      "training_step_time": 0.6807699203491211
    },
    {
      "epoch": 0.00028060302734375,
      "model_forward_time": 0.13052821159362793,
      "step": 45974
    },
    {
      "epoch": 0.00028060302734375,
      "step": 45974,
      "training_step_time": 0.6794087886810303
    },
    {
      "epoch": 0.000280609130859375,
      "model_forward_time": 0.11796307563781738,
      "step": 45975
    },
    {
      "epoch": 0.000280609130859375,
      "step": 45975,
      "training_step_time": 0.7344698905944824
    },
    {
      "epoch": 0.000280615234375,
      "model_forward_time": 0.11599135398864746,
      "step": 45976
    },
    {
      "epoch": 0.000280615234375,
      "step": 45976,
      "training_step_time": 0.6290690898895264
    },
    {
      "epoch": 0.000280621337890625,
      "model_forward_time": 0.12381672859191895,
      "step": 45977
    },
    {
      "epoch": 0.000280621337890625,
      "step": 45977,
      "training_step_time": 0.6497576236724854
    },
    {
      "epoch": 0.00028062744140625,
      "model_forward_time": 0.12104463577270508,
      "step": 45978
    },
    {
      "epoch": 0.00028062744140625,
      "step": 45978,
      "training_step_time": 0.7631068229675293
    },
    {
      "epoch": 0.000280633544921875,
      "model_forward_time": 0.12270069122314453,
      "step": 45979
    },
    {
      "epoch": 0.000280633544921875,
      "step": 45979,
      "training_step_time": 0.6977560520172119
    },
    {
      "epoch": 0.0002806396484375,
      "grad_norm": 0.10888515412807465,
      "learning_rate": 1.4199328081572e-05,
      "loss": 0.0379,
      "step": 45980
    },
    {
      "epoch": 0.0002806396484375,
      "model_forward_time": 0.11773014068603516,
      "step": 45980
    },
    {
      "epoch": 0.0002806396484375,
      "step": 45980,
      "training_step_time": 0.6505067348480225
    },
    {
      "epoch": 0.000280645751953125,
      "model_forward_time": 0.12031745910644531,
      "step": 45981
    },
    {
      "epoch": 0.000280645751953125,
      "step": 45981,
      "training_step_time": 0.6479465961456299
    },
    {
      "epoch": 0.00028065185546875,
      "model_forward_time": 0.11716341972351074,
      "step": 45982
    },
    {
      "epoch": 0.00028065185546875,
      "step": 45982,
      "training_step_time": 0.6006417274475098
    },
    {
      "epoch": 0.000280657958984375,
      "model_forward_time": 0.11717796325683594,
      "step": 45983
    },
    {
      "epoch": 0.000280657958984375,
      "step": 45983,
      "training_step_time": 0.6298255920410156
    },
    {
      "epoch": 0.0002806640625,
      "model_forward_time": 0.13979792594909668,
      "step": 45984
    },
    {
      "epoch": 0.0002806640625,
      "step": 45984,
      "training_step_time": 0.6996943950653076
    },
    {
      "epoch": 0.000280670166015625,
      "model_forward_time": 0.11650705337524414,
      "step": 45985
    },
    {
      "epoch": 0.000280670166015625,
      "step": 45985,
      "training_step_time": 0.6427221298217773
    },
    {
      "epoch": 0.00028067626953125,
      "model_forward_time": 0.11744022369384766,
      "step": 45986
    },
    {
      "epoch": 0.00028067626953125,
      "step": 45986,
      "training_step_time": 0.6210241317749023
    },
    {
      "epoch": 0.000280682373046875,
      "model_forward_time": 0.1233224868774414,
      "step": 45987
    },
    {
      "epoch": 0.000280682373046875,
      "step": 45987,
      "training_step_time": 0.6846270561218262
    },
    {
      "epoch": 0.0002806884765625,
      "model_forward_time": 0.12289929389953613,
      "step": 45988
    },
    {
      "epoch": 0.0002806884765625,
      "step": 45988,
      "training_step_time": 0.725804328918457
    },
    {
      "epoch": 0.000280694580078125,
      "model_forward_time": 0.11750650405883789,
      "step": 45989
    },
    {
      "epoch": 0.000280694580078125,
      "step": 45989,
      "training_step_time": 0.6820363998413086
    },
    {
      "epoch": 0.00028070068359375,
      "grad_norm": 0.11517566442489624,
      "learning_rate": 1.4180095771004154e-05,
      "loss": 0.0391,
      "step": 45990
    },
    {
      "epoch": 0.00028070068359375,
      "model_forward_time": 0.11862564086914062,
      "step": 45990
    },
    {
      "epoch": 0.00028070068359375,
      "step": 45990,
      "training_step_time": 0.6226606369018555
    },
    {
      "epoch": 0.000280706787109375,
      "model_forward_time": 0.11698102951049805,
      "step": 45991
    },
    {
      "epoch": 0.000280706787109375,
      "step": 45991,
      "training_step_time": 0.6381003856658936
    },
    {
      "epoch": 0.000280712890625,
      "model_forward_time": 0.12398838996887207,
      "step": 45992
    },
    {
      "epoch": 0.000280712890625,
      "step": 45992,
      "training_step_time": 0.6181914806365967
    },
    {
      "epoch": 0.000280718994140625,
      "model_forward_time": 0.1300191879272461,
      "step": 45993
    },
    {
      "epoch": 0.000280718994140625,
      "step": 45993,
      "training_step_time": 0.608497142791748
    },
    {
      "epoch": 0.00028072509765625,
      "model_forward_time": 0.11884045600891113,
      "step": 45994
    },
    {
      "epoch": 0.00028072509765625,
      "step": 45994,
      "training_step_time": 0.5454733371734619
    },
    {
      "epoch": 0.000280731201171875,
      "model_forward_time": 0.12970328330993652,
      "step": 45995
    },
    {
      "epoch": 0.000280731201171875,
      "step": 45995,
      "training_step_time": 0.5908136367797852
    },
    {
      "epoch": 0.0002807373046875,
      "model_forward_time": 0.12601566314697266,
      "step": 45996
    },
    {
      "epoch": 0.0002807373046875,
      "step": 45996,
      "training_step_time": 0.6581079959869385
    },
    {
      "epoch": 0.000280743408203125,
      "model_forward_time": 0.12069439888000488,
      "step": 45997
    },
    {
      "epoch": 0.000280743408203125,
      "step": 45997,
      "training_step_time": 0.6641955375671387
    },
    {
      "epoch": 0.00028074951171875,
      "model_forward_time": 0.11895370483398438,
      "step": 45998
    },
    {
      "epoch": 0.00028074951171875,
      "step": 45998,
      "training_step_time": 0.7077431678771973
    },
    {
      "epoch": 0.000280755615234375,
      "model_forward_time": 0.11755657196044922,
      "step": 45999
    },
    {
      "epoch": 0.000280755615234375,
      "step": 45999,
      "training_step_time": 0.6234285831451416
    },
    {
      "epoch": 0.00028076171875,
      "grad_norm": 0.13045787811279297,
      "learning_rate": 1.4160874341577446e-05,
      "loss": 0.0371,
      "step": 46000
    },
    {
      "epoch": 0.00028076171875,
      "model_forward_time": 0.1133890151977539,
      "step": 46000
    },
    {
      "epoch": 0.00028076171875,
      "step": 46000,
      "training_step_time": 0.35529303550720215
    },
    {
      "epoch": 0.000280767822265625,
      "model_forward_time": 0.11304163932800293,
      "step": 46001
    },
    {
      "epoch": 0.000280767822265625,
      "step": 46001,
      "training_step_time": 0.39637112617492676
    },
    {
      "epoch": 0.00028077392578125,
      "model_forward_time": 0.11211943626403809,
      "step": 46002
    },
    {
      "epoch": 0.00028077392578125,
      "step": 46002,
      "training_step_time": 0.36637091636657715
    },
    {
      "epoch": 0.000280780029296875,
      "model_forward_time": 0.11277508735656738,
      "step": 46003
    },
    {
      "epoch": 0.000280780029296875,
      "step": 46003,
      "training_step_time": 0.378154993057251
    },
    {
      "epoch": 0.0002807861328125,
      "model_forward_time": 0.11359858512878418,
      "step": 46004
    },
    {
      "epoch": 0.0002807861328125,
      "step": 46004,
      "training_step_time": 0.4076685905456543
    },
    {
      "epoch": 0.000280792236328125,
      "model_forward_time": 0.1149301528930664,
      "step": 46005
    },
    {
      "epoch": 0.000280792236328125,
      "step": 46005,
      "training_step_time": 0.37348127365112305
    },
    {
      "epoch": 0.00028079833984375,
      "model_forward_time": 0.11549210548400879,
      "step": 46006
    },
    {
      "epoch": 0.00028079833984375,
      "step": 46006,
      "training_step_time": 0.3768594264984131
    },
    {
      "epoch": 0.000280804443359375,
      "model_forward_time": 0.11537551879882812,
      "step": 46007
    },
    {
      "epoch": 0.000280804443359375,
      "step": 46007,
      "training_step_time": 0.39122891426086426
    },
    {
      "epoch": 0.000280810546875,
      "model_forward_time": 0.11410832405090332,
      "step": 46008
    },
    {
      "epoch": 0.000280810546875,
      "step": 46008,
      "training_step_time": 0.39073967933654785
    },
    {
      "epoch": 0.000280816650390625,
      "model_forward_time": 0.11471366882324219,
      "step": 46009
    },
    {
      "epoch": 0.000280816650390625,
      "step": 46009,
      "training_step_time": 0.3940553665161133
    },
    {
      "epoch": 0.00028082275390625,
      "grad_norm": 0.11349909752607346,
      "learning_rate": 1.4141663799130833e-05,
      "loss": 0.0386,
      "step": 46010
    },
    {
      "epoch": 0.00028082275390625,
      "model_forward_time": 0.11463212966918945,
      "step": 46010
    },
    {
      "epoch": 0.00028082275390625,
      "step": 46010,
      "training_step_time": 0.3922109603881836
    },
    {
      "epoch": 0.000280828857421875,
      "model_forward_time": 0.11524629592895508,
      "step": 46011
    },
    {
      "epoch": 0.000280828857421875,
      "step": 46011,
      "training_step_time": 0.39742588996887207
    },
    {
      "epoch": 0.0002808349609375,
      "model_forward_time": 0.11511850357055664,
      "step": 46012
    },
    {
      "epoch": 0.0002808349609375,
      "step": 46012,
      "training_step_time": 0.42650794982910156
    },
    {
      "epoch": 0.000280841064453125,
      "model_forward_time": 0.11550211906433105,
      "step": 46013
    },
    {
      "epoch": 0.000280841064453125,
      "step": 46013,
      "training_step_time": 0.5210320949554443
    },
    {
      "epoch": 0.00028084716796875,
      "model_forward_time": 0.11568570137023926,
      "step": 46014
    },
    {
      "epoch": 0.00028084716796875,
      "step": 46014,
      "training_step_time": 0.46471118927001953
    },
    {
      "epoch": 0.000280853271484375,
      "model_forward_time": 0.11496925354003906,
      "step": 46015
    },
    {
      "epoch": 0.000280853271484375,
      "step": 46015,
      "training_step_time": 0.4965939521789551
    },
    {
      "epoch": 0.000280859375,
      "model_forward_time": 0.11450433731079102,
      "step": 46016
    },
    {
      "epoch": 0.000280859375,
      "step": 46016,
      "training_step_time": 0.38373684883117676
    },
    {
      "epoch": 0.000280865478515625,
      "model_forward_time": 0.11458468437194824,
      "step": 46017
    },
    {
      "epoch": 0.000280865478515625,
      "step": 46017,
      "training_step_time": 0.4113790988922119
    },
    {
      "epoch": 0.00028087158203125,
      "model_forward_time": 0.11426067352294922,
      "step": 46018
    },
    {
      "epoch": 0.00028087158203125,
      "step": 46018,
      "training_step_time": 0.4144439697265625
    },
    {
      "epoch": 0.000280877685546875,
      "model_forward_time": 0.11486244201660156,
      "step": 46019
    },
    {
      "epoch": 0.000280877685546875,
      "step": 46019,
      "training_step_time": 0.4000370502471924
    },
    {
      "epoch": 0.0002808837890625,
      "grad_norm": 0.10183265060186386,
      "learning_rate": 1.412246414949997e-05,
      "loss": 0.0395,
      "step": 46020
    },
    {
      "epoch": 0.0002808837890625,
      "model_forward_time": 0.11458873748779297,
      "step": 46020
    },
    {
      "epoch": 0.0002808837890625,
      "step": 46020,
      "training_step_time": 0.3969435691833496
    },
    {
      "epoch": 0.000280889892578125,
      "model_forward_time": 0.11507892608642578,
      "step": 46021
    },
    {
      "epoch": 0.000280889892578125,
      "step": 46021,
      "training_step_time": 0.39671993255615234
    },
    {
      "epoch": 0.00028089599609375,
      "model_forward_time": 0.11497759819030762,
      "step": 46022
    },
    {
      "epoch": 0.00028089599609375,
      "step": 46022,
      "training_step_time": 0.38085460662841797
    },
    {
      "epoch": 0.000280902099609375,
      "model_forward_time": 0.11567211151123047,
      "step": 46023
    },
    {
      "epoch": 0.000280902099609375,
      "step": 46023,
      "training_step_time": 0.3903927803039551
    },
    {
      "epoch": 0.000280908203125,
      "model_forward_time": 0.11500835418701172,
      "step": 46024
    },
    {
      "epoch": 0.000280908203125,
      "step": 46024,
      "training_step_time": 0.39078211784362793
    },
    {
      "epoch": 0.000280914306640625,
      "model_forward_time": 0.11552000045776367,
      "step": 46025
    },
    {
      "epoch": 0.000280914306640625,
      "step": 46025,
      "training_step_time": 0.4928162097930908
    },
    {
      "epoch": 0.00028092041015625,
      "model_forward_time": 0.11490511894226074,
      "step": 46026
    },
    {
      "epoch": 0.00028092041015625,
      "step": 46026,
      "training_step_time": 0.45022130012512207
    },
    {
      "epoch": 0.000280926513671875,
      "model_forward_time": 0.11488461494445801,
      "step": 46027
    },
    {
      "epoch": 0.000280926513671875,
      "step": 46027,
      "training_step_time": 0.4962296485900879
    },
    {
      "epoch": 0.0002809326171875,
      "model_forward_time": 0.11477899551391602,
      "step": 46028
    },
    {
      "epoch": 0.0002809326171875,
      "step": 46028,
      "training_step_time": 0.37990355491638184
    },
    {
      "epoch": 0.000280938720703125,
      "model_forward_time": 0.11806011199951172,
      "step": 46029
    },
    {
      "epoch": 0.000280938720703125,
      "step": 46029,
      "training_step_time": 0.3964686393737793
    },
    {
      "epoch": 0.00028094482421875,
      "grad_norm": 0.10760138928890228,
      "learning_rate": 1.4103275398517197e-05,
      "loss": 0.0402,
      "step": 46030
    },
    {
      "epoch": 0.00028094482421875,
      "model_forward_time": 0.11591982841491699,
      "step": 46030
    },
    {
      "epoch": 0.00028094482421875,
      "step": 46030,
      "training_step_time": 0.49036335945129395
    },
    {
      "epoch": 0.000280950927734375,
      "model_forward_time": 0.11443686485290527,
      "step": 46031
    },
    {
      "epoch": 0.000280950927734375,
      "step": 46031,
      "training_step_time": 0.41869282722473145
    },
    {
      "epoch": 0.00028095703125,
      "model_forward_time": 0.11462664604187012,
      "step": 46032
    },
    {
      "epoch": 0.00028095703125,
      "step": 46032,
      "training_step_time": 0.39235973358154297
    },
    {
      "epoch": 0.000280963134765625,
      "model_forward_time": 0.11498260498046875,
      "step": 46033
    },
    {
      "epoch": 0.000280963134765625,
      "step": 46033,
      "training_step_time": 0.3933241367340088
    },
    {
      "epoch": 0.00028096923828125,
      "model_forward_time": 0.11515164375305176,
      "step": 46034
    },
    {
      "epoch": 0.00028096923828125,
      "step": 46034,
      "training_step_time": 0.38985490798950195
    },
    {
      "epoch": 0.000280975341796875,
      "model_forward_time": 0.11486554145812988,
      "step": 46035
    },
    {
      "epoch": 0.000280975341796875,
      "step": 46035,
      "training_step_time": 0.3856995105743408
    },
    {
      "epoch": 0.0002809814453125,
      "model_forward_time": 0.11543631553649902,
      "step": 46036
    },
    {
      "epoch": 0.0002809814453125,
      "step": 46036,
      "training_step_time": 0.4080665111541748
    },
    {
      "epoch": 0.000280987548828125,
      "model_forward_time": 0.11545157432556152,
      "step": 46037
    },
    {
      "epoch": 0.000280987548828125,
      "step": 46037,
      "training_step_time": 0.3887450695037842
    },
    {
      "epoch": 0.00028099365234375,
      "model_forward_time": 0.11514067649841309,
      "step": 46038
    },
    {
      "epoch": 0.00028099365234375,
      "step": 46038,
      "training_step_time": 0.4002196788787842
    },
    {
      "epoch": 0.000280999755859375,
      "model_forward_time": 0.11527204513549805,
      "step": 46039
    },
    {
      "epoch": 0.000280999755859375,
      "step": 46039,
      "training_step_time": 0.39708733558654785
    },
    {
      "epoch": 0.000281005859375,
      "grad_norm": 0.0707377940416336,
      "learning_rate": 1.4084097552011571e-05,
      "loss": 0.0355,
      "step": 46040
    },
    {
      "epoch": 0.000281005859375,
      "model_forward_time": 0.11490058898925781,
      "step": 46040
    },
    {
      "epoch": 0.000281005859375,
      "step": 46040,
      "training_step_time": 0.5089068412780762
    },
    {
      "epoch": 0.000281011962890625,
      "model_forward_time": 0.11636805534362793,
      "step": 46041
    },
    {
      "epoch": 0.000281011962890625,
      "step": 46041,
      "training_step_time": 0.41936445236206055
    },
    {
      "epoch": 0.00028101806640625,
      "model_forward_time": 0.11515045166015625,
      "step": 46042
    },
    {
      "epoch": 0.00028101806640625,
      "step": 46042,
      "training_step_time": 0.4799344539642334
    },
    {
      "epoch": 0.000281024169921875,
      "model_forward_time": 0.11487054824829102,
      "step": 46043
    },
    {
      "epoch": 0.000281024169921875,
      "step": 46043,
      "training_step_time": 0.3853178024291992
    },
    {
      "epoch": 0.0002810302734375,
      "model_forward_time": 0.11554074287414551,
      "step": 46044
    },
    {
      "epoch": 0.0002810302734375,
      "step": 46044,
      "training_step_time": 0.5012285709381104
    },
    {
      "epoch": 0.000281036376953125,
      "model_forward_time": 0.11487817764282227,
      "step": 46045
    },
    {
      "epoch": 0.000281036376953125,
      "step": 46045,
      "training_step_time": 0.49709296226501465
    },
    {
      "epoch": 0.00028104248046875,
      "model_forward_time": 0.11568474769592285,
      "step": 46046
    },
    {
      "epoch": 0.00028104248046875,
      "step": 46046,
      "training_step_time": 0.4348773956298828
    },
    {
      "epoch": 0.000281048583984375,
      "model_forward_time": 0.11482644081115723,
      "step": 46047
    },
    {
      "epoch": 0.000281048583984375,
      "step": 46047,
      "training_step_time": 0.4002518653869629
    },
    {
      "epoch": 0.0002810546875,
      "model_forward_time": 0.11406707763671875,
      "step": 46048
    },
    {
      "epoch": 0.0002810546875,
      "step": 46048,
      "training_step_time": 0.38593339920043945
    },
    {
      "epoch": 0.000281060791015625,
      "model_forward_time": 0.11527514457702637,
      "step": 46049
    },
    {
      "epoch": 0.000281060791015625,
      "step": 46049,
      "training_step_time": 0.3970003128051758
    },
    {
      "epoch": 0.00028106689453125,
      "grad_norm": 0.10566800087690353,
      "learning_rate": 1.4064930615808808e-05,
      "loss": 0.0416,
      "step": 46050
    },
    {
      "epoch": 0.00028106689453125,
      "model_forward_time": 0.11548304557800293,
      "step": 46050
    },
    {
      "epoch": 0.00028106689453125,
      "step": 46050,
      "training_step_time": 0.3907127380371094
    },
    {
      "epoch": 0.000281072998046875,
      "model_forward_time": 0.11453557014465332,
      "step": 46051
    },
    {
      "epoch": 0.000281072998046875,
      "step": 46051,
      "training_step_time": 0.3942110538482666
    },
    {
      "epoch": 0.0002810791015625,
      "model_forward_time": 0.11523604393005371,
      "step": 46052
    },
    {
      "epoch": 0.0002810791015625,
      "step": 46052,
      "training_step_time": 0.3899071216583252
    },
    {
      "epoch": 0.000281085205078125,
      "model_forward_time": 0.11528706550598145,
      "step": 46053
    },
    {
      "epoch": 0.000281085205078125,
      "step": 46053,
      "training_step_time": 0.38864827156066895
    },
    {
      "epoch": 0.00028109130859375,
      "model_forward_time": 0.11513781547546387,
      "step": 46054
    },
    {
      "epoch": 0.00028109130859375,
      "step": 46054,
      "training_step_time": 0.39406657218933105
    },
    {
      "epoch": 0.000281097412109375,
      "model_forward_time": 0.11556077003479004,
      "step": 46055
    },
    {
      "epoch": 0.000281097412109375,
      "step": 46055,
      "training_step_time": 0.46327733993530273
    },
    {
      "epoch": 0.000281103515625,
      "model_forward_time": 0.1149451732635498,
      "step": 46056
    },
    {
      "epoch": 0.000281103515625,
      "step": 46056,
      "training_step_time": 0.404491662979126
    },
    {
      "epoch": 0.000281109619140625,
      "model_forward_time": 0.1148076057434082,
      "step": 46057
    },
    {
      "epoch": 0.000281109619140625,
      "step": 46057,
      "training_step_time": 0.40141940116882324
    },
    {
      "epoch": 0.00028111572265625,
      "model_forward_time": 0.11519765853881836,
      "step": 46058
    },
    {
      "epoch": 0.00028111572265625,
      "step": 46058,
      "training_step_time": 0.37598729133605957
    },
    {
      "epoch": 0.000281121826171875,
      "model_forward_time": 0.1151883602142334,
      "step": 46059
    },
    {
      "epoch": 0.000281121826171875,
      "step": 46059,
      "training_step_time": 0.45469212532043457
    },
    {
      "epoch": 0.0002811279296875,
      "grad_norm": 0.09183251857757568,
      "learning_rate": 1.4045774595731315e-05,
      "loss": 0.04,
      "step": 46060
    },
    {
      "epoch": 0.0002811279296875,
      "model_forward_time": 0.11508560180664062,
      "step": 46060
    },
    {
      "epoch": 0.0002811279296875,
      "step": 46060,
      "training_step_time": 0.4068312644958496
    },
    {
      "epoch": 0.000281134033203125,
      "model_forward_time": 0.11508011817932129,
      "step": 46061
    },
    {
      "epoch": 0.000281134033203125,
      "step": 46061,
      "training_step_time": 0.39365243911743164
    },
    {
      "epoch": 0.00028114013671875,
      "model_forward_time": 0.11536908149719238,
      "step": 46062
    },
    {
      "epoch": 0.00028114013671875,
      "step": 46062,
      "training_step_time": 0.3934788703918457
    },
    {
      "epoch": 0.000281146240234375,
      "model_forward_time": 0.11544942855834961,
      "step": 46063
    },
    {
      "epoch": 0.000281146240234375,
      "step": 46063,
      "training_step_time": 0.42380380630493164
    },
    {
      "epoch": 0.00028115234375,
      "model_forward_time": 0.11566042900085449,
      "step": 46064
    },
    {
      "epoch": 0.00028115234375,
      "step": 46064,
      "training_step_time": 0.40393495559692383
    },
    {
      "epoch": 0.000281158447265625,
      "model_forward_time": 0.11587309837341309,
      "step": 46065
    },
    {
      "epoch": 0.000281158447265625,
      "step": 46065,
      "training_step_time": 0.38744401931762695
    },
    {
      "epoch": 0.00028116455078125,
      "model_forward_time": 0.11508345603942871,
      "step": 46066
    },
    {
      "epoch": 0.00028116455078125,
      "step": 46066,
      "training_step_time": 0.39031195640563965
    },
    {
      "epoch": 0.000281170654296875,
      "model_forward_time": 0.11594557762145996,
      "step": 46067
    },
    {
      "epoch": 0.000281170654296875,
      "step": 46067,
      "training_step_time": 0.39981961250305176
    },
    {
      "epoch": 0.0002811767578125,
      "model_forward_time": 0.11518406867980957,
      "step": 46068
    },
    {
      "epoch": 0.0002811767578125,
      "step": 46068,
      "training_step_time": 0.3914012908935547
    },
    {
      "epoch": 0.000281182861328125,
      "model_forward_time": 0.11600446701049805,
      "step": 46069
    },
    {
      "epoch": 0.000281182861328125,
      "step": 46069,
      "training_step_time": 0.4675872325897217
    },
    {
      "epoch": 0.00028118896484375,
      "grad_norm": 0.10089917480945587,
      "learning_rate": 1.4026629497598177e-05,
      "loss": 0.0379,
      "step": 46070
    },
    {
      "epoch": 0.00028118896484375,
      "model_forward_time": 0.1158151626586914,
      "step": 46070
    },
    {
      "epoch": 0.00028118896484375,
      "step": 46070,
      "training_step_time": 0.39881086349487305
    },
    {
      "epoch": 0.000281195068359375,
      "model_forward_time": 0.11435413360595703,
      "step": 46071
    },
    {
      "epoch": 0.000281195068359375,
      "step": 46071,
      "training_step_time": 0.4210624694824219
    },
    {
      "epoch": 0.000281201171875,
      "model_forward_time": 0.11628198623657227,
      "step": 46072
    },
    {
      "epoch": 0.000281201171875,
      "step": 46072,
      "training_step_time": 0.4232978820800781
    },
    {
      "epoch": 0.000281207275390625,
      "model_forward_time": 0.11522579193115234,
      "step": 46073
    },
    {
      "epoch": 0.000281207275390625,
      "step": 46073,
      "training_step_time": 0.4575674533843994
    },
    {
      "epoch": 0.00028121337890625,
      "model_forward_time": 0.11529922485351562,
      "step": 46074
    },
    {
      "epoch": 0.00028121337890625,
      "step": 46074,
      "training_step_time": 0.4433736801147461
    },
    {
      "epoch": 0.000281219482421875,
      "model_forward_time": 0.1164999008178711,
      "step": 46075
    },
    {
      "epoch": 0.000281219482421875,
      "step": 46075,
      "training_step_time": 0.41802263259887695
    },
    {
      "epoch": 0.0002812255859375,
      "model_forward_time": 0.1150960922241211,
      "step": 46076
    },
    {
      "epoch": 0.0002812255859375,
      "step": 46076,
      "training_step_time": 0.3934040069580078
    },
    {
      "epoch": 0.000281231689453125,
      "model_forward_time": 0.11453413963317871,
      "step": 46077
    },
    {
      "epoch": 0.000281231689453125,
      "step": 46077,
      "training_step_time": 0.4040079116821289
    },
    {
      "epoch": 0.00028123779296875,
      "model_forward_time": 0.11514425277709961,
      "step": 46078
    },
    {
      "epoch": 0.00028123779296875,
      "step": 46078,
      "training_step_time": 0.4154953956604004
    },
    {
      "epoch": 0.000281243896484375,
      "model_forward_time": 0.11499524116516113,
      "step": 46079
    },
    {
      "epoch": 0.000281243896484375,
      "step": 46079,
      "training_step_time": 0.7902688980102539
    },
    {
      "epoch": 0.00028125,
      "grad_norm": 0.11104506254196167,
      "learning_rate": 1.4007495327225162e-05,
      "loss": 0.0398,
      "step": 46080
    },
    {
      "epoch": 0.00028125,
      "model_forward_time": 0.11465120315551758,
      "step": 46080
    },
    {
      "epoch": 0.00028125,
      "step": 46080,
      "training_step_time": 0.38198041915893555
    },
    {
      "epoch": 0.000281256103515625,
      "model_forward_time": 0.11484813690185547,
      "step": 46081
    },
    {
      "epoch": 0.000281256103515625,
      "step": 46081,
      "training_step_time": 0.39533567428588867
    },
    {
      "epoch": 0.00028126220703125,
      "model_forward_time": 0.11450672149658203,
      "step": 46082
    },
    {
      "epoch": 0.00028126220703125,
      "step": 46082,
      "training_step_time": 0.38774967193603516
    },
    {
      "epoch": 0.000281268310546875,
      "model_forward_time": 0.11482691764831543,
      "step": 46083
    },
    {
      "epoch": 0.000281268310546875,
      "step": 46083,
      "training_step_time": 0.38860034942626953
    },
    {
      "epoch": 0.0002812744140625,
      "model_forward_time": 0.11423730850219727,
      "step": 46084
    },
    {
      "epoch": 0.0002812744140625,
      "step": 46084,
      "training_step_time": 0.44807863235473633
    },
    {
      "epoch": 0.000281280517578125,
      "model_forward_time": 0.11514139175415039,
      "step": 46085
    },
    {
      "epoch": 0.000281280517578125,
      "step": 46085,
      "training_step_time": 0.8740792274475098
    },
    {
      "epoch": 0.00028128662109375,
      "model_forward_time": 0.11376237869262695,
      "step": 46086
    },
    {
      "epoch": 0.00028128662109375,
      "step": 46086,
      "training_step_time": 0.44693732261657715
    },
    {
      "epoch": 0.000281292724609375,
      "model_forward_time": 0.11512899398803711,
      "step": 46087
    },
    {
      "epoch": 0.000281292724609375,
      "step": 46087,
      "training_step_time": 0.4810309410095215
    },
    {
      "epoch": 0.000281298828125,
      "model_forward_time": 0.11454916000366211,
      "step": 46088
    },
    {
      "epoch": 0.000281298828125,
      "step": 46088,
      "training_step_time": 0.4079709053039551
    },
    {
      "epoch": 0.000281304931640625,
      "model_forward_time": 0.11386322975158691,
      "step": 46089
    },
    {
      "epoch": 0.000281304931640625,
      "step": 46089,
      "training_step_time": 0.3706941604614258
    },
    {
      "epoch": 0.00028131103515625,
      "grad_norm": 0.09867416322231293,
      "learning_rate": 1.3988372090424773e-05,
      "loss": 0.0448,
      "step": 46090
    },
    {
      "epoch": 0.00028131103515625,
      "model_forward_time": 0.11420154571533203,
      "step": 46090
    },
    {
      "epoch": 0.00028131103515625,
      "step": 46090,
      "training_step_time": 0.37828850746154785
    },
    {
      "epoch": 0.000281317138671875,
      "model_forward_time": 0.1147468090057373,
      "step": 46091
    },
    {
      "epoch": 0.000281317138671875,
      "step": 46091,
      "training_step_time": 0.5524430274963379
    },
    {
      "epoch": 0.0002813232421875,
      "model_forward_time": 0.11501622200012207,
      "step": 46092
    },
    {
      "epoch": 0.0002813232421875,
      "step": 46092,
      "training_step_time": 0.38791346549987793
    },
    {
      "epoch": 0.000281329345703125,
      "model_forward_time": 0.11463809013366699,
      "step": 46093
    },
    {
      "epoch": 0.000281329345703125,
      "step": 46093,
      "training_step_time": 0.3878505229949951
    },
    {
      "epoch": 0.00028133544921875,
      "model_forward_time": 0.11492538452148438,
      "step": 46094
    },
    {
      "epoch": 0.00028133544921875,
      "step": 46094,
      "training_step_time": 0.39109349250793457
    },
    {
      "epoch": 0.000281341552734375,
      "model_forward_time": 0.11507081985473633,
      "step": 46095
    },
    {
      "epoch": 0.000281341552734375,
      "step": 46095,
      "training_step_time": 0.37981677055358887
    },
    {
      "epoch": 0.00028134765625,
      "model_forward_time": 0.11451435089111328,
      "step": 46096
    },
    {
      "epoch": 0.00028134765625,
      "step": 46096,
      "training_step_time": 0.49417662620544434
    },
    {
      "epoch": 0.000281353759765625,
      "model_forward_time": 0.11408448219299316,
      "step": 46097
    },
    {
      "epoch": 0.000281353759765625,
      "step": 46097,
      "training_step_time": 1.13319730758667
    },
    {
      "epoch": 0.00028135986328125,
      "model_forward_time": 0.11363673210144043,
      "step": 46098
    },
    {
      "epoch": 0.00028135986328125,
      "step": 46098,
      "training_step_time": 0.4363255500793457
    },
    {
      "epoch": 0.000281365966796875,
      "model_forward_time": 0.11372232437133789,
      "step": 46099
    },
    {
      "epoch": 0.000281365966796875,
      "step": 46099,
      "training_step_time": 0.4048745632171631
    },
    {
      "epoch": 0.0002813720703125,
      "grad_norm": 0.1026371493935585,
      "learning_rate": 1.3969259793006079e-05,
      "loss": 0.0398,
      "step": 46100
    },
    {
      "epoch": 0.0002813720703125,
      "model_forward_time": 0.1138758659362793,
      "step": 46100
    },
    {
      "epoch": 0.0002813720703125,
      "step": 46100,
      "training_step_time": 0.46945810317993164
    },
    {
      "epoch": 0.000281378173828125,
      "model_forward_time": 0.11403012275695801,
      "step": 46101
    },
    {
      "epoch": 0.000281378173828125,
      "step": 46101,
      "training_step_time": 0.4321155548095703
    },
    {
      "epoch": 0.00028138427734375,
      "model_forward_time": 0.11424660682678223,
      "step": 46102
    },
    {
      "epoch": 0.00028138427734375,
      "step": 46102,
      "training_step_time": 0.3831191062927246
    },
    {
      "epoch": 0.000281390380859375,
      "model_forward_time": 0.11449980735778809,
      "step": 46103
    },
    {
      "epoch": 0.000281390380859375,
      "step": 46103,
      "training_step_time": 0.5720705986022949
    },
    {
      "epoch": 0.000281396484375,
      "model_forward_time": 0.1147158145904541,
      "step": 46104
    },
    {
      "epoch": 0.000281396484375,
      "step": 46104,
      "training_step_time": 0.38835787773132324
    },
    {
      "epoch": 0.000281402587890625,
      "model_forward_time": 0.11427974700927734,
      "step": 46105
    },
    {
      "epoch": 0.000281402587890625,
      "step": 46105,
      "training_step_time": 0.3897061347961426
    },
    {
      "epoch": 0.00028140869140625,
      "model_forward_time": 0.11520218849182129,
      "step": 46106
    },
    {
      "epoch": 0.00028140869140625,
      "step": 46106,
      "training_step_time": 0.39078354835510254
    },
    {
      "epoch": 0.000281414794921875,
      "model_forward_time": 0.11412978172302246,
      "step": 46107
    },
    {
      "epoch": 0.000281414794921875,
      "step": 46107,
      "training_step_time": 0.37589144706726074
    },
    {
      "epoch": 0.0002814208984375,
      "model_forward_time": 0.11463570594787598,
      "step": 46108
    },
    {
      "epoch": 0.0002814208984375,
      "step": 46108,
      "training_step_time": 0.3879659175872803
    },
    {
      "epoch": 0.000281427001953125,
      "model_forward_time": 0.11475205421447754,
      "step": 46109
    },
    {
      "epoch": 0.000281427001953125,
      "step": 46109,
      "training_step_time": 1.0271143913269043
    },
    {
      "epoch": 0.00028143310546875,
      "grad_norm": 0.10630113631486893,
      "learning_rate": 1.3950158440774957e-05,
      "loss": 0.0396,
      "step": 46110
    },
    {
      "epoch": 0.00028143310546875,
      "model_forward_time": 0.11406230926513672,
      "step": 46110
    },
    {
      "epoch": 0.00028143310546875,
      "step": 46110,
      "training_step_time": 0.44624853134155273
    },
    {
      "epoch": 0.000281439208984375,
      "model_forward_time": 0.11377263069152832,
      "step": 46111
    },
    {
      "epoch": 0.000281439208984375,
      "step": 46111,
      "training_step_time": 0.46365809440612793
    },
    {
      "epoch": 0.0002814453125,
      "model_forward_time": 0.11379265785217285,
      "step": 46112
    },
    {
      "epoch": 0.0002814453125,
      "step": 46112,
      "training_step_time": 0.42145681381225586
    },
    {
      "epoch": 0.000281451416015625,
      "model_forward_time": 0.11446166038513184,
      "step": 46113
    },
    {
      "epoch": 0.000281451416015625,
      "step": 46113,
      "training_step_time": 0.3963778018951416
    },
    {
      "epoch": 0.00028145751953125,
      "model_forward_time": 0.11470603942871094,
      "step": 46114
    },
    {
      "epoch": 0.00028145751953125,
      "step": 46114,
      "training_step_time": 0.40562891960144043
    },
    {
      "epoch": 0.000281463623046875,
      "model_forward_time": 0.11450386047363281,
      "step": 46115
    },
    {
      "epoch": 0.000281463623046875,
      "step": 46115,
      "training_step_time": 0.8980226516723633
    },
    {
      "epoch": 0.0002814697265625,
      "model_forward_time": 0.11439204216003418,
      "step": 46116
    },
    {
      "epoch": 0.0002814697265625,
      "step": 46116,
      "training_step_time": 0.38871288299560547
    },
    {
      "epoch": 0.000281475830078125,
      "model_forward_time": 0.11390972137451172,
      "step": 46117
    },
    {
      "epoch": 0.000281475830078125,
      "step": 46117,
      "training_step_time": 0.3953676223754883
    },
    {
      "epoch": 0.00028148193359375,
      "model_forward_time": 0.11416411399841309,
      "step": 46118
    },
    {
      "epoch": 0.00028148193359375,
      "step": 46118,
      "training_step_time": 0.38473057746887207
    },
    {
      "epoch": 0.000281488037109375,
      "model_forward_time": 0.11399078369140625,
      "step": 46119
    },
    {
      "epoch": 0.000281488037109375,
      "step": 46119,
      "training_step_time": 0.38250732421875
    },
    {
      "epoch": 0.000281494140625,
      "grad_norm": 0.10781927406787872,
      "learning_rate": 1.3931068039533823e-05,
      "loss": 0.0354,
      "step": 46120
    },
    {
      "epoch": 0.000281494140625,
      "model_forward_time": 0.11459493637084961,
      "step": 46120
    },
    {
      "epoch": 0.000281494140625,
      "step": 46120,
      "training_step_time": 0.3863670825958252
    },
    {
      "epoch": 0.000281500244140625,
      "model_forward_time": 0.11408305168151855,
      "step": 46121
    },
    {
      "epoch": 0.000281500244140625,
      "step": 46121,
      "training_step_time": 1.101898193359375
    },
    {
      "epoch": 0.00028150634765625,
      "model_forward_time": 0.11455845832824707,
      "step": 46122
    },
    {
      "epoch": 0.00028150634765625,
      "step": 46122,
      "training_step_time": 0.41422247886657715
    },
    {
      "epoch": 0.000281512451171875,
      "model_forward_time": 0.11355018615722656,
      "step": 46123
    },
    {
      "epoch": 0.000281512451171875,
      "step": 46123,
      "training_step_time": 0.44704747200012207
    },
    {
      "epoch": 0.0002815185546875,
      "model_forward_time": 0.11377882957458496,
      "step": 46124
    },
    {
      "epoch": 0.0002815185546875,
      "step": 46124,
      "training_step_time": 0.3581404685974121
    },
    {
      "epoch": 0.000281524658203125,
      "model_forward_time": 0.11376571655273438,
      "step": 46125
    },
    {
      "epoch": 0.000281524658203125,
      "step": 46125,
      "training_step_time": 0.4282362461090088
    },
    {
      "epoch": 0.00028153076171875,
      "model_forward_time": 0.11365222930908203,
      "step": 46126
    },
    {
      "epoch": 0.00028153076171875,
      "step": 46126,
      "training_step_time": 0.41580629348754883
    },
    {
      "epoch": 0.000281536865234375,
      "model_forward_time": 0.11475992202758789,
      "step": 46127
    },
    {
      "epoch": 0.000281536865234375,
      "step": 46127,
      "training_step_time": 0.6935765743255615
    },
    {
      "epoch": 0.00028154296875,
      "model_forward_time": 0.11406779289245605,
      "step": 46128
    },
    {
      "epoch": 0.00028154296875,
      "step": 46128,
      "training_step_time": 0.3914351463317871
    },
    {
      "epoch": 0.000281549072265625,
      "model_forward_time": 0.11569738388061523,
      "step": 46129
    },
    {
      "epoch": 0.000281549072265625,
      "step": 46129,
      "training_step_time": 0.3854982852935791
    },
    {
      "epoch": 0.00028155517578125,
      "grad_norm": 0.11970212310552597,
      "learning_rate": 1.3911988595081893e-05,
      "loss": 0.0354,
      "step": 46130
    },
    {
      "epoch": 0.00028155517578125,
      "model_forward_time": 0.11439085006713867,
      "step": 46130
    },
    {
      "epoch": 0.00028155517578125,
      "step": 46130,
      "training_step_time": 0.3883359432220459
    },
    {
      "epoch": 0.000281561279296875,
      "model_forward_time": 0.11425304412841797,
      "step": 46131
    },
    {
      "epoch": 0.000281561279296875,
      "step": 46131,
      "training_step_time": 0.4000875949859619
    },
    {
      "epoch": 0.0002815673828125,
      "model_forward_time": 0.11445450782775879,
      "step": 46132
    },
    {
      "epoch": 0.0002815673828125,
      "step": 46132,
      "training_step_time": 0.3854811191558838
    },
    {
      "epoch": 0.000281573486328125,
      "model_forward_time": 0.11447620391845703,
      "step": 46133
    },
    {
      "epoch": 0.000281573486328125,
      "step": 46133,
      "training_step_time": 0.7887163162231445
    },
    {
      "epoch": 0.00028157958984375,
      "model_forward_time": 0.1138758659362793,
      "step": 46134
    },
    {
      "epoch": 0.00028157958984375,
      "step": 46134,
      "training_step_time": 0.430342435836792
    },
    {
      "epoch": 0.000281585693359375,
      "model_forward_time": 0.11453080177307129,
      "step": 46135
    },
    {
      "epoch": 0.000281585693359375,
      "step": 46135,
      "training_step_time": 0.4794185161590576
    },
    {
      "epoch": 0.000281591796875,
      "model_forward_time": 0.11413764953613281,
      "step": 46136
    },
    {
      "epoch": 0.000281591796875,
      "step": 46136,
      "training_step_time": 0.4206819534301758
    },
    {
      "epoch": 0.000281597900390625,
      "model_forward_time": 0.11410665512084961,
      "step": 46137
    },
    {
      "epoch": 0.000281597900390625,
      "step": 46137,
      "training_step_time": 0.4524688720703125
    },
    {
      "epoch": 0.00028160400390625,
      "model_forward_time": 0.11482000350952148,
      "step": 46138
    },
    {
      "epoch": 0.00028160400390625,
      "step": 46138,
      "training_step_time": 0.3976094722747803
    },
    {
      "epoch": 0.000281610107421875,
      "model_forward_time": 0.11495494842529297,
      "step": 46139
    },
    {
      "epoch": 0.000281610107421875,
      "step": 46139,
      "training_step_time": 0.4717850685119629
    },
    {
      "epoch": 0.0002816162109375,
      "grad_norm": 0.10823286324739456,
      "learning_rate": 1.389292011321498e-05,
      "loss": 0.0358,
      "step": 46140
    },
    {
      "epoch": 0.0002816162109375,
      "model_forward_time": 0.11439180374145508,
      "step": 46140
    },
    {
      "epoch": 0.0002816162109375,
      "step": 46140,
      "training_step_time": 0.4022817611694336
    },
    {
      "epoch": 0.000281622314453125,
      "model_forward_time": 0.1150975227355957,
      "step": 46141
    },
    {
      "epoch": 0.000281622314453125,
      "step": 46141,
      "training_step_time": 0.3990633487701416
    },
    {
      "epoch": 0.00028162841796875,
      "model_forward_time": 0.11506891250610352,
      "step": 46142
    },
    {
      "epoch": 0.00028162841796875,
      "step": 46142,
      "training_step_time": 0.3977015018463135
    },
    {
      "epoch": 0.000281634521484375,
      "model_forward_time": 0.11525893211364746,
      "step": 46143
    },
    {
      "epoch": 0.000281634521484375,
      "step": 46143,
      "training_step_time": 0.39668774604797363
    },
    {
      "epoch": 0.000281640625,
      "model_forward_time": 0.11482667922973633,
      "step": 46144
    },
    {
      "epoch": 0.000281640625,
      "step": 46144,
      "training_step_time": 0.3966846466064453
    },
    {
      "epoch": 0.000281646728515625,
      "model_forward_time": 0.11485743522644043,
      "step": 46145
    },
    {
      "epoch": 0.000281646728515625,
      "step": 46145,
      "training_step_time": 0.8201637268066406
    },
    {
      "epoch": 0.00028165283203125,
      "model_forward_time": 0.11402750015258789,
      "step": 46146
    },
    {
      "epoch": 0.00028165283203125,
      "step": 46146,
      "training_step_time": 0.39461755752563477
    },
    {
      "epoch": 0.000281658935546875,
      "model_forward_time": 0.1141061782836914,
      "step": 46147
    },
    {
      "epoch": 0.000281658935546875,
      "step": 46147,
      "training_step_time": 0.4456605911254883
    },
    {
      "epoch": 0.0002816650390625,
      "model_forward_time": 0.11458945274353027,
      "step": 46148
    },
    {
      "epoch": 0.0002816650390625,
      "step": 46148,
      "training_step_time": 0.474759578704834
    },
    {
      "epoch": 0.000281671142578125,
      "model_forward_time": 0.1147918701171875,
      "step": 46149
    },
    {
      "epoch": 0.000281671142578125,
      "step": 46149,
      "training_step_time": 0.44388341903686523
    },
    {
      "epoch": 0.00028167724609375,
      "grad_norm": 0.12245107442140579,
      "learning_rate": 1.3873862599725578e-05,
      "loss": 0.0356,
      "step": 46150
    },
    {
      "epoch": 0.00028167724609375,
      "model_forward_time": 0.11429452896118164,
      "step": 46150
    },
    {
      "epoch": 0.00028167724609375,
      "step": 46150,
      "training_step_time": 0.4538891315460205
    },
    {
      "epoch": 0.000281683349609375,
      "model_forward_time": 0.11430811882019043,
      "step": 46151
    },
    {
      "epoch": 0.000281683349609375,
      "step": 46151,
      "training_step_time": 0.45803093910217285
    },
    {
      "epoch": 0.000281689453125,
      "model_forward_time": 0.11458921432495117,
      "step": 46152
    },
    {
      "epoch": 0.000281689453125,
      "step": 46152,
      "training_step_time": 0.3984818458557129
    },
    {
      "epoch": 0.000281695556640625,
      "model_forward_time": 0.11426782608032227,
      "step": 46153
    },
    {
      "epoch": 0.000281695556640625,
      "step": 46153,
      "training_step_time": 0.46440911293029785
    },
    {
      "epoch": 0.00028170166015625,
      "model_forward_time": 0.11481237411499023,
      "step": 46154
    },
    {
      "epoch": 0.00028170166015625,
      "step": 46154,
      "training_step_time": 0.38429927825927734
    },
    {
      "epoch": 0.000281707763671875,
      "model_forward_time": 0.11476302146911621,
      "step": 46155
    },
    {
      "epoch": 0.000281707763671875,
      "step": 46155,
      "training_step_time": 0.38138771057128906
    },
    {
      "epoch": 0.0002817138671875,
      "model_forward_time": 0.11506986618041992,
      "step": 46156
    },
    {
      "epoch": 0.0002817138671875,
      "step": 46156,
      "training_step_time": 0.3949770927429199
    },
    {
      "epoch": 0.000281719970703125,
      "model_forward_time": 0.11509037017822266,
      "step": 46157
    },
    {
      "epoch": 0.000281719970703125,
      "step": 46157,
      "training_step_time": 0.5790321826934814
    },
    {
      "epoch": 0.00028172607421875,
      "model_forward_time": 0.11505460739135742,
      "step": 46158
    },
    {
      "epoch": 0.00028172607421875,
      "step": 46158,
      "training_step_time": 0.38964152336120605
    },
    {
      "epoch": 0.000281732177734375,
      "model_forward_time": 0.11500430107116699,
      "step": 46159
    },
    {
      "epoch": 0.000281732177734375,
      "step": 46159,
      "training_step_time": 0.3918178081512451
    },
    {
      "epoch": 0.00028173828125,
      "grad_norm": 0.12936590611934662,
      "learning_rate": 1.385481606040287e-05,
      "loss": 0.0414,
      "step": 46160
    },
    {
      "epoch": 0.00028173828125,
      "model_forward_time": 0.11476469039916992,
      "step": 46160
    },
    {
      "epoch": 0.00028173828125,
      "step": 46160,
      "training_step_time": 0.5014410018920898
    },
    {
      "epoch": 0.000281744384765625,
      "model_forward_time": 0.11480498313903809,
      "step": 46161
    },
    {
      "epoch": 0.000281744384765625,
      "step": 46161,
      "training_step_time": 0.4484982490539551
    },
    {
      "epoch": 0.00028175048828125,
      "model_forward_time": 0.11429524421691895,
      "step": 46162
    },
    {
      "epoch": 0.00028175048828125,
      "step": 46162,
      "training_step_time": 0.49910640716552734
    },
    {
      "epoch": 0.000281756591796875,
      "model_forward_time": 0.11424493789672852,
      "step": 46163
    },
    {
      "epoch": 0.000281756591796875,
      "step": 46163,
      "training_step_time": 0.48175859451293945
    },
    {
      "epoch": 0.0002817626953125,
      "model_forward_time": 0.11444735527038574,
      "step": 46164
    },
    {
      "epoch": 0.0002817626953125,
      "step": 46164,
      "training_step_time": 0.39975976943969727
    },
    {
      "epoch": 0.000281768798828125,
      "model_forward_time": 0.11463570594787598,
      "step": 46165
    },
    {
      "epoch": 0.000281768798828125,
      "step": 46165,
      "training_step_time": 0.46885108947753906
    },
    {
      "epoch": 0.00028177490234375,
      "model_forward_time": 0.11515140533447266,
      "step": 46166
    },
    {
      "epoch": 0.00028177490234375,
      "step": 46166,
      "training_step_time": 0.4815399646759033
    },
    {
      "epoch": 0.000281781005859375,
      "model_forward_time": 0.11430907249450684,
      "step": 46167
    },
    {
      "epoch": 0.000281781005859375,
      "step": 46167,
      "training_step_time": 0.4062318801879883
    },
    {
      "epoch": 0.000281787109375,
      "model_forward_time": 0.11425638198852539,
      "step": 46168
    },
    {
      "epoch": 0.000281787109375,
      "step": 46168,
      "training_step_time": 0.38904619216918945
    },
    {
      "epoch": 0.000281793212890625,
      "model_forward_time": 0.11576461791992188,
      "step": 46169
    },
    {
      "epoch": 0.000281793212890625,
      "step": 46169,
      "training_step_time": 0.39894914627075195
    },
    {
      "epoch": 0.00028179931640625,
      "grad_norm": 0.10838077962398529,
      "learning_rate": 1.383578050103268e-05,
      "loss": 0.0403,
      "step": 46170
    },
    {
      "epoch": 0.00028179931640625,
      "model_forward_time": 0.11517524719238281,
      "step": 46170
    },
    {
      "epoch": 0.00028179931640625,
      "step": 46170,
      "training_step_time": 0.3865163326263428
    },
    {
      "epoch": 0.000281805419921875,
      "model_forward_time": 0.1149134635925293,
      "step": 46171
    },
    {
      "epoch": 0.000281805419921875,
      "step": 46171,
      "training_step_time": 0.39200901985168457
    },
    {
      "epoch": 0.0002818115234375,
      "model_forward_time": 0.11527514457702637,
      "step": 46172
    },
    {
      "epoch": 0.0002818115234375,
      "step": 46172,
      "training_step_time": 0.39779233932495117
    },
    {
      "epoch": 0.000281817626953125,
      "model_forward_time": 0.11521553993225098,
      "step": 46173
    },
    {
      "epoch": 0.000281817626953125,
      "step": 46173,
      "training_step_time": 0.4192311763763428
    },
    {
      "epoch": 0.00028182373046875,
      "model_forward_time": 0.11557650566101074,
      "step": 46174
    },
    {
      "epoch": 0.00028182373046875,
      "step": 46174,
      "training_step_time": 0.47325706481933594
    },
    {
      "epoch": 0.000281829833984375,
      "model_forward_time": 0.11479043960571289,
      "step": 46175
    },
    {
      "epoch": 0.000281829833984375,
      "step": 46175,
      "training_step_time": 0.6687893867492676
    },
    {
      "epoch": 0.0002818359375,
      "model_forward_time": 0.11451959609985352,
      "step": 46176
    },
    {
      "epoch": 0.0002818359375,
      "step": 46176,
      "training_step_time": 0.4712038040161133
    },
    {
      "epoch": 0.000281842041015625,
      "model_forward_time": 0.11459803581237793,
      "step": 46177
    },
    {
      "epoch": 0.000281842041015625,
      "step": 46177,
      "training_step_time": 0.43617868423461914
    },
    {
      "epoch": 0.00028184814453125,
      "model_forward_time": 0.11478734016418457,
      "step": 46178
    },
    {
      "epoch": 0.00028184814453125,
      "step": 46178,
      "training_step_time": 0.37581586837768555
    },
    {
      "epoch": 0.000281854248046875,
      "model_forward_time": 0.11437010765075684,
      "step": 46179
    },
    {
      "epoch": 0.000281854248046875,
      "step": 46179,
      "training_step_time": 0.43333864212036133
    },
    {
      "epoch": 0.0002818603515625,
      "grad_norm": 0.13394363224506378,
      "learning_rate": 1.3816755927397502e-05,
      "loss": 0.0343,
      "step": 46180
    },
    {
      "epoch": 0.0002818603515625,
      "model_forward_time": 0.11472916603088379,
      "step": 46180
    },
    {
      "epoch": 0.0002818603515625,
      "step": 46180,
      "training_step_time": 0.4154846668243408
    },
    {
      "epoch": 0.000281866455078125,
      "model_forward_time": 0.11511778831481934,
      "step": 46181
    },
    {
      "epoch": 0.000281866455078125,
      "step": 46181,
      "training_step_time": 0.40726161003112793
    },
    {
      "epoch": 0.00028187255859375,
      "model_forward_time": 0.11507296562194824,
      "step": 46182
    },
    {
      "epoch": 0.00028187255859375,
      "step": 46182,
      "training_step_time": 0.39306163787841797
    },
    {
      "epoch": 0.000281878662109375,
      "model_forward_time": 0.11511921882629395,
      "step": 46183
    },
    {
      "epoch": 0.000281878662109375,
      "step": 46183,
      "training_step_time": 0.39536023139953613
    },
    {
      "epoch": 0.000281884765625,
      "model_forward_time": 0.11509537696838379,
      "step": 46184
    },
    {
      "epoch": 0.000281884765625,
      "step": 46184,
      "training_step_time": 0.39151883125305176
    },
    {
      "epoch": 0.000281890869140625,
      "model_forward_time": 0.11537671089172363,
      "step": 46185
    },
    {
      "epoch": 0.000281890869140625,
      "step": 46185,
      "training_step_time": 0.3941788673400879
    },
    {
      "epoch": 0.00028189697265625,
      "model_forward_time": 0.11455035209655762,
      "step": 46186
    },
    {
      "epoch": 0.00028189697265625,
      "step": 46186,
      "training_step_time": 0.4619026184082031
    },
    {
      "epoch": 0.000281903076171875,
      "model_forward_time": 0.11477017402648926,
      "step": 46187
    },
    {
      "epoch": 0.000281903076171875,
      "step": 46187,
      "training_step_time": 0.5860435962677002
    },
    {
      "epoch": 0.0002819091796875,
      "model_forward_time": 0.11439776420593262,
      "step": 46188
    },
    {
      "epoch": 0.0002819091796875,
      "step": 46188,
      "training_step_time": 0.4030466079711914
    },
    {
      "epoch": 0.000281915283203125,
      "model_forward_time": 0.11467671394348145,
      "step": 46189
    },
    {
      "epoch": 0.000281915283203125,
      "step": 46189,
      "training_step_time": 0.48749279975891113
    },
    {
      "epoch": 0.00028192138671875,
      "grad_norm": 0.16002976894378662,
      "learning_rate": 1.3797742345276521e-05,
      "loss": 0.0373,
      "step": 46190
    },
    {
      "epoch": 0.00028192138671875,
      "model_forward_time": 0.11438202857971191,
      "step": 46190
    },
    {
      "epoch": 0.00028192138671875,
      "step": 46190,
      "training_step_time": 0.4405977725982666
    },
    {
      "epoch": 0.000281927490234375,
      "model_forward_time": 0.11418271064758301,
      "step": 46191
    },
    {
      "epoch": 0.000281927490234375,
      "step": 46191,
      "training_step_time": 0.45787620544433594
    },
    {
      "epoch": 0.00028193359375,
      "model_forward_time": 0.11479830741882324,
      "step": 46192
    },
    {
      "epoch": 0.00028193359375,
      "step": 46192,
      "training_step_time": 0.36490559577941895
    },
    {
      "epoch": 0.000281939697265625,
      "model_forward_time": 0.11460018157958984,
      "step": 46193
    },
    {
      "epoch": 0.000281939697265625,
      "step": 46193,
      "training_step_time": 0.43244051933288574
    },
    {
      "epoch": 0.00028194580078125,
      "model_forward_time": 0.11468076705932617,
      "step": 46194
    },
    {
      "epoch": 0.00028194580078125,
      "step": 46194,
      "training_step_time": 0.39966511726379395
    },
    {
      "epoch": 0.000281951904296875,
      "model_forward_time": 0.11414504051208496,
      "step": 46195
    },
    {
      "epoch": 0.000281951904296875,
      "step": 46195,
      "training_step_time": 0.39002132415771484
    },
    {
      "epoch": 0.0002819580078125,
      "model_forward_time": 0.1158452033996582,
      "step": 46196
    },
    {
      "epoch": 0.0002819580078125,
      "step": 46196,
      "training_step_time": 0.3889036178588867
    },
    {
      "epoch": 0.000281964111328125,
      "model_forward_time": 0.1151270866394043,
      "step": 46197
    },
    {
      "epoch": 0.000281964111328125,
      "step": 46197,
      "training_step_time": 0.38927745819091797
    },
    {
      "epoch": 0.00028197021484375,
      "model_forward_time": 0.1151435375213623,
      "step": 46198
    },
    {
      "epoch": 0.00028197021484375,
      "step": 46198,
      "training_step_time": 0.38669896125793457
    },
    {
      "epoch": 0.000281976318359375,
      "model_forward_time": 0.11503839492797852,
      "step": 46199
    },
    {
      "epoch": 0.000281976318359375,
      "step": 46199,
      "training_step_time": 0.6981797218322754
    },
    {
      "epoch": 0.000281982421875,
      "grad_norm": 0.11941175162792206,
      "learning_rate": 1.3778739760445552e-05,
      "loss": 0.0424,
      "step": 46200
    },
    {
      "epoch": 0.000281982421875,
      "model_forward_time": 0.11496400833129883,
      "step": 46200
    },
    {
      "epoch": 0.000281982421875,
      "step": 46200,
      "training_step_time": 0.39429497718811035
    },
    {
      "epoch": 0.000281988525390625,
      "model_forward_time": 0.11532425880432129,
      "step": 46201
    },
    {
      "epoch": 0.000281988525390625,
      "step": 46201,
      "training_step_time": 0.38626861572265625
    },
    {
      "epoch": 0.00028199462890625,
      "model_forward_time": 0.11488461494445801,
      "step": 46202
    },
    {
      "epoch": 0.00028199462890625,
      "step": 46202,
      "training_step_time": 0.422670841217041
    },
    {
      "epoch": 0.000282000732421875,
      "model_forward_time": 0.1147775650024414,
      "step": 46203
    },
    {
      "epoch": 0.000282000732421875,
      "step": 46203,
      "training_step_time": 0.5118510723114014
    },
    {
      "epoch": 0.0002820068359375,
      "model_forward_time": 0.11414861679077148,
      "step": 46204
    },
    {
      "epoch": 0.0002820068359375,
      "step": 46204,
      "training_step_time": 0.46073460578918457
    },
    {
      "epoch": 0.000282012939453125,
      "model_forward_time": 0.11539101600646973,
      "step": 46205
    },
    {
      "epoch": 0.000282012939453125,
      "step": 46205,
      "training_step_time": 0.42194414138793945
    },
    {
      "epoch": 0.00028201904296875,
      "model_forward_time": 0.11458158493041992,
      "step": 46206
    },
    {
      "epoch": 0.00028201904296875,
      "step": 46206,
      "training_step_time": 0.463407039642334
    },
    {
      "epoch": 0.000282025146484375,
      "model_forward_time": 0.11432123184204102,
      "step": 46207
    },
    {
      "epoch": 0.000282025146484375,
      "step": 46207,
      "training_step_time": 0.41395092010498047
    },
    {
      "epoch": 0.00028203125,
      "model_forward_time": 0.11488986015319824,
      "step": 46208
    },
    {
      "epoch": 0.00028203125,
      "step": 46208,
      "training_step_time": 0.4502713680267334
    },
    {
      "epoch": 0.000282037353515625,
      "model_forward_time": 0.1146087646484375,
      "step": 46209
    },
    {
      "epoch": 0.000282037353515625,
      "step": 46209,
      "training_step_time": 0.38611674308776855
    },
    {
      "epoch": 0.00028204345703125,
      "grad_norm": 0.13484294712543488,
      "learning_rate": 1.3759748178677078e-05,
      "loss": 0.0419,
      "step": 46210
    },
    {
      "epoch": 0.00028204345703125,
      "model_forward_time": 0.11451268196105957,
      "step": 46210
    },
    {
      "epoch": 0.00028204345703125,
      "step": 46210,
      "training_step_time": 0.3935232162475586
    },
    {
      "epoch": 0.000282049560546875,
      "model_forward_time": 0.11525917053222656,
      "step": 46211
    },
    {
      "epoch": 0.000282049560546875,
      "step": 46211,
      "training_step_time": 0.38587498664855957
    },
    {
      "epoch": 0.0002820556640625,
      "model_forward_time": 0.11653423309326172,
      "step": 46212
    },
    {
      "epoch": 0.0002820556640625,
      "step": 46212,
      "training_step_time": 0.3961496353149414
    },
    {
      "epoch": 0.000282061767578125,
      "model_forward_time": 0.11520075798034668,
      "step": 46213
    },
    {
      "epoch": 0.000282061767578125,
      "step": 46213,
      "training_step_time": 0.40629053115844727
    },
    {
      "epoch": 0.00028206787109375,
      "model_forward_time": 0.11523866653442383,
      "step": 46214
    },
    {
      "epoch": 0.00028206787109375,
      "step": 46214,
      "training_step_time": 0.3869504928588867
    },
    {
      "epoch": 0.000282073974609375,
      "model_forward_time": 0.11508417129516602,
      "step": 46215
    },
    {
      "epoch": 0.000282073974609375,
      "step": 46215,
      "training_step_time": 0.3959014415740967
    },
    {
      "epoch": 0.000282080078125,
      "model_forward_time": 0.11635804176330566,
      "step": 46216
    },
    {
      "epoch": 0.000282080078125,
      "step": 46216,
      "training_step_time": 0.44773006439208984
    },
    {
      "epoch": 0.000282086181640625,
      "model_forward_time": 0.1148674488067627,
      "step": 46217
    },
    {
      "epoch": 0.000282086181640625,
      "step": 46217,
      "training_step_time": 0.7292366027832031
    },
    {
      "epoch": 0.00028209228515625,
      "model_forward_time": 0.11394214630126953,
      "step": 46218
    },
    {
      "epoch": 0.00028209228515625,
      "step": 46218,
      "training_step_time": 0.443636417388916
    },
    {
      "epoch": 0.000282098388671875,
      "model_forward_time": 0.1139986515045166,
      "step": 46219
    },
    {
      "epoch": 0.000282098388671875,
      "step": 46219,
      "training_step_time": 0.42394042015075684
    },
    {
      "epoch": 0.0002821044921875,
      "grad_norm": 0.10112913697957993,
      "learning_rate": 1.374076760574024e-05,
      "loss": 0.0379,
      "step": 46220
    },
    {
      "epoch": 0.0002821044921875,
      "model_forward_time": 0.11440205574035645,
      "step": 46220
    },
    {
      "epoch": 0.0002821044921875,
      "step": 46220,
      "training_step_time": 0.46488213539123535
    },
    {
      "epoch": 0.000282110595703125,
      "model_forward_time": 0.11482548713684082,
      "step": 46221
    },
    {
      "epoch": 0.000282110595703125,
      "step": 46221,
      "training_step_time": 0.48512816429138184
    },
    {
      "epoch": 0.00028211669921875,
      "model_forward_time": 0.11433839797973633,
      "step": 46222
    },
    {
      "epoch": 0.00028211669921875,
      "step": 46222,
      "training_step_time": 0.3850984573364258
    },
    {
      "epoch": 0.000282122802734375,
      "model_forward_time": 0.11443519592285156,
      "step": 46223
    },
    {
      "epoch": 0.000282122802734375,
      "step": 46223,
      "training_step_time": 0.3903834819793701
    },
    {
      "epoch": 0.00028212890625,
      "model_forward_time": 0.1148982048034668,
      "step": 46224
    },
    {
      "epoch": 0.00028212890625,
      "step": 46224,
      "training_step_time": 0.39908576011657715
    },
    {
      "epoch": 0.000282135009765625,
      "model_forward_time": 0.11484217643737793,
      "step": 46225
    },
    {
      "epoch": 0.000282135009765625,
      "step": 46225,
      "training_step_time": 0.392350435256958
    },
    {
      "epoch": 0.00028214111328125,
      "model_forward_time": 0.11501359939575195,
      "step": 46226
    },
    {
      "epoch": 0.00028214111328125,
      "step": 46226,
      "training_step_time": 0.4259939193725586
    },
    {
      "epoch": 0.000282147216796875,
      "model_forward_time": 0.11560535430908203,
      "step": 46227
    },
    {
      "epoch": 0.000282147216796875,
      "step": 46227,
      "training_step_time": 0.3849809169769287
    },
    {
      "epoch": 0.0002821533203125,
      "model_forward_time": 0.11471915245056152,
      "step": 46228
    },
    {
      "epoch": 0.0002821533203125,
      "step": 46228,
      "training_step_time": 0.3981754779815674
    },
    {
      "epoch": 0.000282159423828125,
      "model_forward_time": 0.11472344398498535,
      "step": 46229
    },
    {
      "epoch": 0.000282159423828125,
      "step": 46229,
      "training_step_time": 0.47249913215637207
    },
    {
      "epoch": 0.00028216552734375,
      "grad_norm": 0.1381121575832367,
      "learning_rate": 1.3721798047400813e-05,
      "loss": 0.0421,
      "step": 46230
    },
    {
      "epoch": 0.00028216552734375,
      "model_forward_time": 0.11573934555053711,
      "step": 46230
    },
    {
      "epoch": 0.00028216552734375,
      "step": 46230,
      "training_step_time": 0.4113607406616211
    },
    {
      "epoch": 0.000282171630859375,
      "model_forward_time": 0.11487126350402832,
      "step": 46231
    },
    {
      "epoch": 0.000282171630859375,
      "step": 46231,
      "training_step_time": 0.41369009017944336
    },
    {
      "epoch": 0.000282177734375,
      "model_forward_time": 0.1148076057434082,
      "step": 46232
    },
    {
      "epoch": 0.000282177734375,
      "step": 46232,
      "training_step_time": 0.39804816246032715
    },
    {
      "epoch": 0.000282183837890625,
      "model_forward_time": 0.11487364768981934,
      "step": 46233
    },
    {
      "epoch": 0.000282183837890625,
      "step": 46233,
      "training_step_time": 0.4188089370727539
    },
    {
      "epoch": 0.00028218994140625,
      "model_forward_time": 0.1148080825805664,
      "step": 46234
    },
    {
      "epoch": 0.00028218994140625,
      "step": 46234,
      "training_step_time": 0.44327449798583984
    },
    {
      "epoch": 0.000282196044921875,
      "model_forward_time": 0.11509871482849121,
      "step": 46235
    },
    {
      "epoch": 0.000282196044921875,
      "step": 46235,
      "training_step_time": 0.6189608573913574
    },
    {
      "epoch": 0.0002822021484375,
      "model_forward_time": 0.11458826065063477,
      "step": 46236
    },
    {
      "epoch": 0.0002822021484375,
      "step": 46236,
      "training_step_time": 0.38704371452331543
    },
    {
      "epoch": 0.000282208251953125,
      "model_forward_time": 0.11515641212463379,
      "step": 46237
    },
    {
      "epoch": 0.000282208251953125,
      "step": 46237,
      "training_step_time": 0.3929147720336914
    },
    {
      "epoch": 0.00028221435546875,
      "model_forward_time": 0.11478829383850098,
      "step": 46238
    },
    {
      "epoch": 0.00028221435546875,
      "step": 46238,
      "training_step_time": 0.3855125904083252
    },
    {
      "epoch": 0.000282220458984375,
      "model_forward_time": 0.11453962326049805,
      "step": 46239
    },
    {
      "epoch": 0.000282220458984375,
      "step": 46239,
      "training_step_time": 0.4555068016052246
    },
    {
      "epoch": 0.0002822265625,
      "grad_norm": 0.10028795897960663,
      "learning_rate": 1.3702839509421305e-05,
      "loss": 0.0354,
      "step": 46240
    },
    {
      "epoch": 0.0002822265625,
      "model_forward_time": 0.1147148609161377,
      "step": 46240
    },
    {
      "epoch": 0.0002822265625,
      "step": 46240,
      "training_step_time": 0.38691091537475586
    },
    {
      "epoch": 0.000282232666015625,
      "model_forward_time": 0.11476469039916992,
      "step": 46241
    },
    {
      "epoch": 0.000282232666015625,
      "step": 46241,
      "training_step_time": 0.6219038963317871
    },
    {
      "epoch": 0.00028223876953125,
      "model_forward_time": 0.11458039283752441,
      "step": 46242
    },
    {
      "epoch": 0.00028223876953125,
      "step": 46242,
      "training_step_time": 0.39159417152404785
    },
    {
      "epoch": 0.000282244873046875,
      "model_forward_time": 0.11441326141357422,
      "step": 46243
    },
    {
      "epoch": 0.000282244873046875,
      "step": 46243,
      "training_step_time": 0.48644280433654785
    },
    {
      "epoch": 0.0002822509765625,
      "model_forward_time": 0.11444377899169922,
      "step": 46244
    },
    {
      "epoch": 0.0002822509765625,
      "step": 46244,
      "training_step_time": 0.4686732292175293
    },
    {
      "epoch": 0.000282257080078125,
      "model_forward_time": 0.11425256729125977,
      "step": 46245
    },
    {
      "epoch": 0.000282257080078125,
      "step": 46245,
      "training_step_time": 0.38221263885498047
    },
    {
      "epoch": 0.00028226318359375,
      "model_forward_time": 0.11381673812866211,
      "step": 46246
    },
    {
      "epoch": 0.00028226318359375,
      "step": 46246,
      "training_step_time": 0.4893147945404053
    },
    {
      "epoch": 0.000282269287109375,
      "model_forward_time": 0.11455416679382324,
      "step": 46247
    },
    {
      "epoch": 0.000282269287109375,
      "step": 46247,
      "training_step_time": 0.42536020278930664
    },
    {
      "epoch": 0.000282275390625,
      "model_forward_time": 0.11466431617736816,
      "step": 46248
    },
    {
      "epoch": 0.000282275390625,
      "step": 46248,
      "training_step_time": 0.48430967330932617
    },
    {
      "epoch": 0.000282281494140625,
      "model_forward_time": 0.11441445350646973,
      "step": 46249
    },
    {
      "epoch": 0.000282281494140625,
      "step": 46249,
      "training_step_time": 0.49782490730285645
    },
    {
      "epoch": 0.00028228759765625,
      "grad_norm": 0.09517005831003189,
      "learning_rate": 1.368389199756075e-05,
      "loss": 0.0384,
      "step": 46250
    },
    {
      "epoch": 0.00028228759765625,
      "model_forward_time": 0.11489009857177734,
      "step": 46250
    },
    {
      "epoch": 0.00028228759765625,
      "step": 46250,
      "training_step_time": 0.3970332145690918
    },
    {
      "epoch": 0.000282293701171875,
      "model_forward_time": 0.11410760879516602,
      "step": 46251
    },
    {
      "epoch": 0.000282293701171875,
      "step": 46251,
      "training_step_time": 0.40648603439331055
    },
    {
      "epoch": 0.0002822998046875,
      "model_forward_time": 0.1143953800201416,
      "step": 46252
    },
    {
      "epoch": 0.0002822998046875,
      "step": 46252,
      "training_step_time": 0.4321434497833252
    },
    {
      "epoch": 0.000282305908203125,
      "model_forward_time": 0.11396503448486328,
      "step": 46253
    },
    {
      "epoch": 0.000282305908203125,
      "step": 46253,
      "training_step_time": 0.41692686080932617
    },
    {
      "epoch": 0.00028231201171875,
      "model_forward_time": 0.1156466007232666,
      "step": 46254
    },
    {
      "epoch": 0.00028231201171875,
      "step": 46254,
      "training_step_time": 0.39890360832214355
    },
    {
      "epoch": 0.000282318115234375,
      "model_forward_time": 0.11535024642944336,
      "step": 46255
    },
    {
      "epoch": 0.000282318115234375,
      "step": 46255,
      "training_step_time": 0.39623379707336426
    },
    {
      "epoch": 0.00028232421875,
      "model_forward_time": 0.11522626876831055,
      "step": 46256
    },
    {
      "epoch": 0.00028232421875,
      "step": 46256,
      "training_step_time": 0.3920927047729492
    },
    {
      "epoch": 0.000282330322265625,
      "model_forward_time": 0.11475801467895508,
      "step": 46257
    },
    {
      "epoch": 0.000282330322265625,
      "step": 46257,
      "training_step_time": 0.3959174156188965
    },
    {
      "epoch": 0.00028233642578125,
      "model_forward_time": 0.1149907112121582,
      "step": 46258
    },
    {
      "epoch": 0.00028233642578125,
      "step": 46258,
      "training_step_time": 0.4567108154296875
    },
    {
      "epoch": 0.000282342529296875,
      "model_forward_time": 0.11463379859924316,
      "step": 46259
    },
    {
      "epoch": 0.000282342529296875,
      "step": 46259,
      "training_step_time": 0.39424824714660645
    },
    {
      "epoch": 0.0002823486328125,
      "grad_norm": 0.12693870067596436,
      "learning_rate": 1.3664955517574968e-05,
      "loss": 0.0354,
      "step": 46260
    },
    {
      "epoch": 0.0002823486328125,
      "model_forward_time": 0.11594319343566895,
      "step": 46260
    },
    {
      "epoch": 0.0002823486328125,
      "step": 46260,
      "training_step_time": 0.40703892707824707
    },
    {
      "epoch": 0.000282354736328125,
      "model_forward_time": 0.11479425430297852,
      "step": 46261
    },
    {
      "epoch": 0.000282354736328125,
      "step": 46261,
      "training_step_time": 0.3936798572540283
    },
    {
      "epoch": 0.00028236083984375,
      "model_forward_time": 0.11483573913574219,
      "step": 46262
    },
    {
      "epoch": 0.00028236083984375,
      "step": 46262,
      "training_step_time": 0.39344072341918945
    },
    {
      "epoch": 0.000282366943359375,
      "model_forward_time": 0.11479783058166504,
      "step": 46263
    },
    {
      "epoch": 0.000282366943359375,
      "step": 46263,
      "training_step_time": 0.5011851787567139
    },
    {
      "epoch": 0.000282373046875,
      "model_forward_time": 0.11406683921813965,
      "step": 46264
    },
    {
      "epoch": 0.000282373046875,
      "step": 46264,
      "training_step_time": 0.39378809928894043
    },
    {
      "epoch": 0.000282379150390625,
      "model_forward_time": 0.115203857421875,
      "step": 46265
    },
    {
      "epoch": 0.000282379150390625,
      "step": 46265,
      "training_step_time": 0.42426371574401855
    },
    {
      "epoch": 0.00028238525390625,
      "model_forward_time": 0.11511540412902832,
      "step": 46266
    },
    {
      "epoch": 0.00028238525390625,
      "step": 46266,
      "training_step_time": 0.4126725196838379
    },
    {
      "epoch": 0.000282391357421875,
      "model_forward_time": 0.11508059501647949,
      "step": 46267
    },
    {
      "epoch": 0.000282391357421875,
      "step": 46267,
      "training_step_time": 0.4077150821685791
    },
    {
      "epoch": 0.0002823974609375,
      "model_forward_time": 0.11483025550842285,
      "step": 46268
    },
    {
      "epoch": 0.0002823974609375,
      "step": 46268,
      "training_step_time": 0.3810305595397949
    },
    {
      "epoch": 0.000282403564453125,
      "model_forward_time": 0.1144552230834961,
      "step": 46269
    },
    {
      "epoch": 0.000282403564453125,
      "step": 46269,
      "training_step_time": 0.38568663597106934
    },
    {
      "epoch": 0.00028240966796875,
      "grad_norm": 0.08996180444955826,
      "learning_rate": 1.3646030075216293e-05,
      "loss": 0.0357,
      "step": 46270
    },
    {
      "epoch": 0.00028240966796875,
      "model_forward_time": 0.1163177490234375,
      "step": 46270
    },
    {
      "epoch": 0.00028240966796875,
      "step": 46270,
      "training_step_time": 0.39726686477661133
    },
    {
      "epoch": 0.000282415771484375,
      "model_forward_time": 0.11482548713684082,
      "step": 46271
    },
    {
      "epoch": 0.000282415771484375,
      "step": 46271,
      "training_step_time": 0.6767511367797852
    },
    {
      "epoch": 0.000282421875,
      "model_forward_time": 0.11516094207763672,
      "step": 46272
    },
    {
      "epoch": 0.000282421875,
      "step": 46272,
      "training_step_time": 0.4609382152557373
    },
    {
      "epoch": 0.000282427978515625,
      "model_forward_time": 0.11470341682434082,
      "step": 46273
    },
    {
      "epoch": 0.000282427978515625,
      "step": 46273,
      "training_step_time": 0.39386987686157227
    },
    {
      "epoch": 0.00028243408203125,
      "model_forward_time": 0.11471676826477051,
      "step": 46274
    },
    {
      "epoch": 0.00028243408203125,
      "step": 46274,
      "training_step_time": 0.39112019538879395
    },
    {
      "epoch": 0.000282440185546875,
      "model_forward_time": 0.11518573760986328,
      "step": 46275
    },
    {
      "epoch": 0.000282440185546875,
      "step": 46275,
      "training_step_time": 0.4309682846069336
    },
    {
      "epoch": 0.0002824462890625,
      "model_forward_time": 0.11471986770629883,
      "step": 46276
    },
    {
      "epoch": 0.0002824462890625,
      "step": 46276,
      "training_step_time": 0.3994331359863281
    },
    {
      "epoch": 0.000282452392578125,
      "model_forward_time": 0.1146845817565918,
      "step": 46277
    },
    {
      "epoch": 0.000282452392578125,
      "step": 46277,
      "training_step_time": 0.363635778427124
    },
    {
      "epoch": 0.00028245849609375,
      "model_forward_time": 0.11429715156555176,
      "step": 46278
    },
    {
      "epoch": 0.00028245849609375,
      "step": 46278,
      "training_step_time": 0.45500779151916504
    },
    {
      "epoch": 0.000282464599609375,
      "model_forward_time": 0.1150963306427002,
      "step": 46279
    },
    {
      "epoch": 0.000282464599609375,
      "step": 46279,
      "training_step_time": 0.4428291320800781
    },
    {
      "epoch": 0.000282470703125,
      "grad_norm": 0.11084502190351486,
      "learning_rate": 1.3627115676233832e-05,
      "loss": 0.034,
      "step": 46280
    },
    {
      "epoch": 0.000282470703125,
      "model_forward_time": 0.11516761779785156,
      "step": 46280
    },
    {
      "epoch": 0.000282470703125,
      "step": 46280,
      "training_step_time": 0.3969612121582031
    },
    {
      "epoch": 0.000282476806640625,
      "model_forward_time": 0.11444687843322754,
      "step": 46281
    },
    {
      "epoch": 0.000282476806640625,
      "step": 46281,
      "training_step_time": 0.38460588455200195
    },
    {
      "epoch": 0.00028248291015625,
      "model_forward_time": 0.11580324172973633,
      "step": 46282
    },
    {
      "epoch": 0.00028248291015625,
      "step": 46282,
      "training_step_time": 0.3881387710571289
    },
    {
      "epoch": 0.000282489013671875,
      "model_forward_time": 0.11556029319763184,
      "step": 46283
    },
    {
      "epoch": 0.000282489013671875,
      "step": 46283,
      "training_step_time": 0.39873170852661133
    },
    {
      "epoch": 0.0002824951171875,
      "model_forward_time": 0.11466097831726074,
      "step": 46284
    },
    {
      "epoch": 0.0002824951171875,
      "step": 46284,
      "training_step_time": 0.40317225456237793
    },
    {
      "epoch": 0.000282501220703125,
      "model_forward_time": 0.11450934410095215,
      "step": 46285
    },
    {
      "epoch": 0.000282501220703125,
      "step": 46285,
      "training_step_time": 0.410111665725708
    },
    {
      "epoch": 0.00028250732421875,
      "model_forward_time": 0.11527752876281738,
      "step": 46286
    },
    {
      "epoch": 0.00028250732421875,
      "step": 46286,
      "training_step_time": 0.4198727607727051
    },
    {
      "epoch": 0.000282513427734375,
      "model_forward_time": 0.11528182029724121,
      "step": 46287
    },
    {
      "epoch": 0.000282513427734375,
      "step": 46287,
      "training_step_time": 0.4997875690460205
    },
    {
      "epoch": 0.00028251953125,
      "model_forward_time": 0.11505317687988281,
      "step": 46288
    },
    {
      "epoch": 0.00028251953125,
      "step": 46288,
      "training_step_time": 0.4026167392730713
    },
    {
      "epoch": 0.000282525634765625,
      "model_forward_time": 0.11556720733642578,
      "step": 46289
    },
    {
      "epoch": 0.000282525634765625,
      "step": 46289,
      "training_step_time": 0.40354108810424805
    },
    {
      "epoch": 0.00028253173828125,
      "grad_norm": 0.11259496212005615,
      "learning_rate": 1.3608212326373249e-05,
      "loss": 0.0401,
      "step": 46290
    },
    {
      "epoch": 0.00028253173828125,
      "model_forward_time": 0.11495685577392578,
      "step": 46290
    },
    {
      "epoch": 0.00028253173828125,
      "step": 46290,
      "training_step_time": 0.4059922695159912
    },
    {
      "epoch": 0.000282537841796875,
      "model_forward_time": 0.11484694480895996,
      "step": 46291
    },
    {
      "epoch": 0.000282537841796875,
      "step": 46291,
      "training_step_time": 0.4381411075592041
    },
    {
      "epoch": 0.0002825439453125,
      "model_forward_time": 0.11513185501098633,
      "step": 46292
    },
    {
      "epoch": 0.0002825439453125,
      "step": 46292,
      "training_step_time": 0.47356367111206055
    },
    {
      "epoch": 0.000282550048828125,
      "model_forward_time": 0.11453104019165039,
      "step": 46293
    },
    {
      "epoch": 0.000282550048828125,
      "step": 46293,
      "training_step_time": 0.4492921829223633
    },
    {
      "epoch": 0.00028255615234375,
      "model_forward_time": 0.11642861366271973,
      "step": 46294
    },
    {
      "epoch": 0.00028255615234375,
      "step": 46294,
      "training_step_time": 0.4225940704345703
    },
    {
      "epoch": 0.000282562255859375,
      "model_forward_time": 0.114776611328125,
      "step": 46295
    },
    {
      "epoch": 0.000282562255859375,
      "step": 46295,
      "training_step_time": 0.4721195697784424
    },
    {
      "epoch": 0.000282568359375,
      "model_forward_time": 0.1146547794342041,
      "step": 46296
    },
    {
      "epoch": 0.000282568359375,
      "step": 46296,
      "training_step_time": 0.39631032943725586
    },
    {
      "epoch": 0.000282574462890625,
      "model_forward_time": 0.11455512046813965,
      "step": 46297
    },
    {
      "epoch": 0.000282574462890625,
      "step": 46297,
      "training_step_time": 0.3958601951599121
    },
    {
      "epoch": 0.00028258056640625,
      "model_forward_time": 0.11452865600585938,
      "step": 46298
    },
    {
      "epoch": 0.00028258056640625,
      "step": 46298,
      "training_step_time": 0.3945736885070801
    },
    {
      "epoch": 0.000282586669921875,
      "model_forward_time": 0.11472749710083008,
      "step": 46299
    },
    {
      "epoch": 0.000282586669921875,
      "step": 46299,
      "training_step_time": 0.4147794246673584
    },
    {
      "epoch": 0.0002825927734375,
      "grad_norm": 0.14502669870853424,
      "learning_rate": 1.3589320031376901e-05,
      "loss": 0.0404,
      "step": 46300
    },
    {
      "epoch": 0.0002825927734375,
      "model_forward_time": 0.11433196067810059,
      "step": 46300
    },
    {
      "epoch": 0.0002825927734375,
      "step": 46300,
      "training_step_time": 0.4320979118347168
    },
    {
      "epoch": 0.000282598876953125,
      "model_forward_time": 0.11484646797180176,
      "step": 46301
    },
    {
      "epoch": 0.000282598876953125,
      "step": 46301,
      "training_step_time": 0.6097862720489502
    },
    {
      "epoch": 0.00028260498046875,
      "model_forward_time": 0.11451315879821777,
      "step": 46302
    },
    {
      "epoch": 0.00028260498046875,
      "step": 46302,
      "training_step_time": 0.3999969959259033
    },
    {
      "epoch": 0.000282611083984375,
      "model_forward_time": 0.11456084251403809,
      "step": 46303
    },
    {
      "epoch": 0.000282611083984375,
      "step": 46303,
      "training_step_time": 0.39548659324645996
    },
    {
      "epoch": 0.0002826171875,
      "model_forward_time": 0.11459231376647949,
      "step": 46304
    },
    {
      "epoch": 0.0002826171875,
      "step": 46304,
      "training_step_time": 0.4512481689453125
    },
    {
      "epoch": 0.000282623291015625,
      "model_forward_time": 0.11432862281799316,
      "step": 46305
    },
    {
      "epoch": 0.000282623291015625,
      "step": 46305,
      "training_step_time": 0.4095950126647949
    },
    {
      "epoch": 0.00028262939453125,
      "model_forward_time": 0.11465048789978027,
      "step": 46306
    },
    {
      "epoch": 0.00028262939453125,
      "step": 46306,
      "training_step_time": 0.4232814311981201
    },
    {
      "epoch": 0.000282635498046875,
      "model_forward_time": 0.11447668075561523,
      "step": 46307
    },
    {
      "epoch": 0.000282635498046875,
      "step": 46307,
      "training_step_time": 0.43151378631591797
    },
    {
      "epoch": 0.0002826416015625,
      "model_forward_time": 0.11431455612182617,
      "step": 46308
    },
    {
      "epoch": 0.0002826416015625,
      "step": 46308,
      "training_step_time": 0.49651145935058594
    },
    {
      "epoch": 0.000282647705078125,
      "model_forward_time": 0.11455321311950684,
      "step": 46309
    },
    {
      "epoch": 0.000282647705078125,
      "step": 46309,
      "training_step_time": 0.3937408924102783
    },
    {
      "epoch": 0.00028265380859375,
      "grad_norm": 0.11598491668701172,
      "learning_rate": 1.3570438796983753e-05,
      "loss": 0.0447,
      "step": 46310
    },
    {
      "epoch": 0.00028265380859375,
      "model_forward_time": 0.11545395851135254,
      "step": 46310
    },
    {
      "epoch": 0.00028265380859375,
      "step": 46310,
      "training_step_time": 0.3973057270050049
    },
    {
      "epoch": 0.000282659912109375,
      "model_forward_time": 0.11477923393249512,
      "step": 46311
    },
    {
      "epoch": 0.000282659912109375,
      "step": 46311,
      "training_step_time": 0.401613712310791
    },
    {
      "epoch": 0.000282666015625,
      "model_forward_time": 0.11543083190917969,
      "step": 46312
    },
    {
      "epoch": 0.000282666015625,
      "step": 46312,
      "training_step_time": 0.393277645111084
    },
    {
      "epoch": 0.000282672119140625,
      "model_forward_time": 0.11497640609741211,
      "step": 46313
    },
    {
      "epoch": 0.000282672119140625,
      "step": 46313,
      "training_step_time": 0.5294458866119385
    },
    {
      "epoch": 0.00028267822265625,
      "model_forward_time": 0.11423754692077637,
      "step": 46314
    },
    {
      "epoch": 0.00028267822265625,
      "step": 46314,
      "training_step_time": 0.4060380458831787
    },
    {
      "epoch": 0.000282684326171875,
      "model_forward_time": 0.11476612091064453,
      "step": 46315
    },
    {
      "epoch": 0.000282684326171875,
      "step": 46315,
      "training_step_time": 0.4414229393005371
    },
    {
      "epoch": 0.0002826904296875,
      "model_forward_time": 0.1145944595336914,
      "step": 46316
    },
    {
      "epoch": 0.0002826904296875,
      "step": 46316,
      "training_step_time": 0.38715457916259766
    },
    {
      "epoch": 0.000282696533203125,
      "model_forward_time": 0.11486315727233887,
      "step": 46317
    },
    {
      "epoch": 0.000282696533203125,
      "step": 46317,
      "training_step_time": 0.39104676246643066
    },
    {
      "epoch": 0.00028270263671875,
      "model_forward_time": 0.1146397590637207,
      "step": 46318
    },
    {
      "epoch": 0.00028270263671875,
      "step": 46318,
      "training_step_time": 0.4090919494628906
    },
    {
      "epoch": 0.000282708740234375,
      "model_forward_time": 0.11472773551940918,
      "step": 46319
    },
    {
      "epoch": 0.000282708740234375,
      "step": 46319,
      "training_step_time": 0.5211336612701416
    },
    {
      "epoch": 0.00028271484375,
      "grad_norm": 0.11346901953220367,
      "learning_rate": 1.3551568628929434e-05,
      "loss": 0.038,
      "step": 46320
    },
    {
      "epoch": 0.00028271484375,
      "model_forward_time": 0.11515450477600098,
      "step": 46320
    },
    {
      "epoch": 0.00028271484375,
      "step": 46320,
      "training_step_time": 0.4143650531768799
    },
    {
      "epoch": 0.000282720947265625,
      "model_forward_time": 0.11501955986022949,
      "step": 46321
    },
    {
      "epoch": 0.000282720947265625,
      "step": 46321,
      "training_step_time": 0.4714233875274658
    },
    {
      "epoch": 0.00028272705078125,
      "model_forward_time": 0.11522531509399414,
      "step": 46322
    },
    {
      "epoch": 0.00028272705078125,
      "step": 46322,
      "training_step_time": 0.4879734516143799
    },
    {
      "epoch": 0.000282733154296875,
      "model_forward_time": 0.11511588096618652,
      "step": 46323
    },
    {
      "epoch": 0.000282733154296875,
      "step": 46323,
      "training_step_time": 0.38249969482421875
    },
    {
      "epoch": 0.0002827392578125,
      "model_forward_time": 0.1147451400756836,
      "step": 46324
    },
    {
      "epoch": 0.0002827392578125,
      "step": 46324,
      "training_step_time": 0.38524627685546875
    },
    {
      "epoch": 0.000282745361328125,
      "model_forward_time": 0.115081787109375,
      "step": 46325
    },
    {
      "epoch": 0.000282745361328125,
      "step": 46325,
      "training_step_time": 0.5639889240264893
    },
    {
      "epoch": 0.00028275146484375,
      "model_forward_time": 0.1143503189086914,
      "step": 46326
    },
    {
      "epoch": 0.00028275146484375,
      "step": 46326,
      "training_step_time": 0.3988628387451172
    },
    {
      "epoch": 0.000282757568359375,
      "model_forward_time": 0.11460733413696289,
      "step": 46327
    },
    {
      "epoch": 0.000282757568359375,
      "step": 46327,
      "training_step_time": 0.4013073444366455
    },
    {
      "epoch": 0.000282763671875,
      "model_forward_time": 0.11579537391662598,
      "step": 46328
    },
    {
      "epoch": 0.000282763671875,
      "step": 46328,
      "training_step_time": 0.44210243225097656
    },
    {
      "epoch": 0.000282769775390625,
      "model_forward_time": 0.11479377746582031,
      "step": 46329
    },
    {
      "epoch": 0.000282769775390625,
      "step": 46329,
      "training_step_time": 0.44918012619018555
    },
    {
      "epoch": 0.00028277587890625,
      "grad_norm": 0.10919611901044846,
      "learning_rate": 1.3532709532946186e-05,
      "loss": 0.0348,
      "step": 46330
    },
    {
      "epoch": 0.00028277587890625,
      "model_forward_time": 0.11482548713684082,
      "step": 46330
    },
    {
      "epoch": 0.00028277587890625,
      "step": 46330,
      "training_step_time": 0.3990051746368408
    },
    {
      "epoch": 0.000282781982421875,
      "model_forward_time": 0.11450457572937012,
      "step": 46331
    },
    {
      "epoch": 0.000282781982421875,
      "step": 46331,
      "training_step_time": 0.5249302387237549
    },
    {
      "epoch": 0.0002827880859375,
      "model_forward_time": 0.11569094657897949,
      "step": 46332
    },
    {
      "epoch": 0.0002827880859375,
      "step": 46332,
      "training_step_time": 0.48760533332824707
    },
    {
      "epoch": 0.000282794189453125,
      "model_forward_time": 0.11488604545593262,
      "step": 46333
    },
    {
      "epoch": 0.000282794189453125,
      "step": 46333,
      "training_step_time": 0.4441366195678711
    },
    {
      "epoch": 0.00028280029296875,
      "model_forward_time": 0.11486005783081055,
      "step": 46334
    },
    {
      "epoch": 0.00028280029296875,
      "step": 46334,
      "training_step_time": 0.43855762481689453
    },
    {
      "epoch": 0.000282806396484375,
      "model_forward_time": 0.11435604095458984,
      "step": 46335
    },
    {
      "epoch": 0.000282806396484375,
      "step": 46335,
      "training_step_time": 0.3998260498046875
    },
    {
      "epoch": 0.0002828125,
      "model_forward_time": 0.1148378849029541,
      "step": 46336
    },
    {
      "epoch": 0.0002828125,
      "step": 46336,
      "training_step_time": 0.4513564109802246
    },
    {
      "epoch": 0.000282818603515625,
      "model_forward_time": 0.11471390724182129,
      "step": 46337
    },
    {
      "epoch": 0.000282818603515625,
      "step": 46337,
      "training_step_time": 0.39923620223999023
    },
    {
      "epoch": 0.00028282470703125,
      "model_forward_time": 0.11508822441101074,
      "step": 46338
    },
    {
      "epoch": 0.00028282470703125,
      "step": 46338,
      "training_step_time": 0.40097904205322266
    },
    {
      "epoch": 0.000282830810546875,
      "model_forward_time": 0.11502504348754883,
      "step": 46339
    },
    {
      "epoch": 0.000282830810546875,
      "step": 46339,
      "training_step_time": 0.3950071334838867
    },
    {
      "epoch": 0.0002828369140625,
      "grad_norm": 0.08848267793655396,
      "learning_rate": 1.3513861514762933e-05,
      "loss": 0.0368,
      "step": 46340
    },
    {
      "epoch": 0.0002828369140625,
      "model_forward_time": 0.11437368392944336,
      "step": 46340
    },
    {
      "epoch": 0.0002828369140625,
      "step": 46340,
      "training_step_time": 0.3933117389678955
    },
    {
      "epoch": 0.000282843017578125,
      "model_forward_time": 0.1148989200592041,
      "step": 46341
    },
    {
      "epoch": 0.000282843017578125,
      "step": 46341,
      "training_step_time": 0.3867051601409912
    },
    {
      "epoch": 0.00028284912109375,
      "model_forward_time": 0.11566805839538574,
      "step": 46342
    },
    {
      "epoch": 0.00028284912109375,
      "step": 46342,
      "training_step_time": 0.41863346099853516
    },
    {
      "epoch": 0.000282855224609375,
      "model_forward_time": 0.11564993858337402,
      "step": 46343
    },
    {
      "epoch": 0.000282855224609375,
      "step": 46343,
      "training_step_time": 0.4891824722290039
    },
    {
      "epoch": 0.000282861328125,
      "model_forward_time": 0.11472940444946289,
      "step": 46344
    },
    {
      "epoch": 0.000282861328125,
      "step": 46344,
      "training_step_time": 0.4216175079345703
    },
    {
      "epoch": 0.000282867431640625,
      "model_forward_time": 0.11503005027770996,
      "step": 46345
    },
    {
      "epoch": 0.000282867431640625,
      "step": 46345,
      "training_step_time": 0.39591455459594727
    },
    {
      "epoch": 0.00028287353515625,
      "model_forward_time": 0.11436057090759277,
      "step": 46346
    },
    {
      "epoch": 0.00028287353515625,
      "step": 46346,
      "training_step_time": 0.41225337982177734
    },
    {
      "epoch": 0.000282879638671875,
      "model_forward_time": 0.11575651168823242,
      "step": 46347
    },
    {
      "epoch": 0.000282879638671875,
      "step": 46347,
      "training_step_time": 0.41484832763671875
    },
    {
      "epoch": 0.0002828857421875,
      "model_forward_time": 0.11572766304016113,
      "step": 46348
    },
    {
      "epoch": 0.0002828857421875,
      "step": 46348,
      "training_step_time": 0.3952507972717285
    },
    {
      "epoch": 0.000282891845703125,
      "model_forward_time": 0.1156775951385498,
      "step": 46349
    },
    {
      "epoch": 0.000282891845703125,
      "step": 46349,
      "training_step_time": 0.9628376960754395
    },
    {
      "epoch": 0.00028289794921875,
      "grad_norm": 0.11144841462373734,
      "learning_rate": 1.3495024580105192e-05,
      "loss": 0.0403,
      "step": 46350
    },
    {
      "epoch": 0.00028289794921875,
      "model_forward_time": 0.1144413948059082,
      "step": 46350
    },
    {
      "epoch": 0.00028289794921875,
      "step": 46350,
      "training_step_time": 0.40839695930480957
    },
    {
      "epoch": 0.000282904052734375,
      "model_forward_time": 0.1143956184387207,
      "step": 46351
    },
    {
      "epoch": 0.000282904052734375,
      "step": 46351,
      "training_step_time": 0.4051387310028076
    },
    {
      "epoch": 0.00028291015625,
      "model_forward_time": 0.11410808563232422,
      "step": 46352
    },
    {
      "epoch": 0.00028291015625,
      "step": 46352,
      "training_step_time": 0.3882255554199219
    },
    {
      "epoch": 0.000282916259765625,
      "model_forward_time": 0.11399102210998535,
      "step": 46353
    },
    {
      "epoch": 0.000282916259765625,
      "step": 46353,
      "training_step_time": 0.3899068832397461
    },
    {
      "epoch": 0.00028292236328125,
      "model_forward_time": 0.1136925220489502,
      "step": 46354
    },
    {
      "epoch": 0.00028292236328125,
      "step": 46354,
      "training_step_time": 0.390289306640625
    },
    {
      "epoch": 0.000282928466796875,
      "model_forward_time": 0.11475014686584473,
      "step": 46355
    },
    {
      "epoch": 0.000282928466796875,
      "step": 46355,
      "training_step_time": 0.5401389598846436
    },
    {
      "epoch": 0.0002829345703125,
      "model_forward_time": 0.11587786674499512,
      "step": 46356
    },
    {
      "epoch": 0.0002829345703125,
      "step": 46356,
      "training_step_time": 0.4885988235473633
    },
    {
      "epoch": 0.000282940673828125,
      "model_forward_time": 0.11435961723327637,
      "step": 46357
    },
    {
      "epoch": 0.000282940673828125,
      "step": 46357,
      "training_step_time": 0.390580415725708
    },
    {
      "epoch": 0.00028294677734375,
      "model_forward_time": 0.11492371559143066,
      "step": 46358
    },
    {
      "epoch": 0.00028294677734375,
      "step": 46358,
      "training_step_time": 0.4033985137939453
    },
    {
      "epoch": 0.000282952880859375,
      "model_forward_time": 0.11647486686706543,
      "step": 46359
    },
    {
      "epoch": 0.000282952880859375,
      "step": 46359,
      "training_step_time": 0.4019777774810791
    },
    {
      "epoch": 0.000282958984375,
      "grad_norm": 0.11863214522600174,
      "learning_rate": 1.3476198734695122e-05,
      "loss": 0.0338,
      "step": 46360
    },
    {
      "epoch": 0.000282958984375,
      "model_forward_time": 0.1145486831665039,
      "step": 46360
    },
    {
      "epoch": 0.000282958984375,
      "step": 46360,
      "training_step_time": 0.4807248115539551
    },
    {
      "epoch": 0.000282965087890625,
      "model_forward_time": 0.11466050148010254,
      "step": 46361
    },
    {
      "epoch": 0.000282965087890625,
      "step": 46361,
      "training_step_time": 0.4577140808105469
    },
    {
      "epoch": 0.00028297119140625,
      "model_forward_time": 0.11427688598632812,
      "step": 46362
    },
    {
      "epoch": 0.00028297119140625,
      "step": 46362,
      "training_step_time": 0.3660249710083008
    },
    {
      "epoch": 0.000282977294921875,
      "model_forward_time": 0.11552095413208008,
      "step": 46363
    },
    {
      "epoch": 0.000282977294921875,
      "step": 46363,
      "training_step_time": 0.4334695339202881
    },
    {
      "epoch": 0.0002829833984375,
      "model_forward_time": 0.11484217643737793,
      "step": 46364
    },
    {
      "epoch": 0.0002829833984375,
      "step": 46364,
      "training_step_time": 0.40129542350769043
    },
    {
      "epoch": 0.000282989501953125,
      "model_forward_time": 0.11493539810180664,
      "step": 46365
    },
    {
      "epoch": 0.000282989501953125,
      "step": 46365,
      "training_step_time": 0.39727139472961426
    },
    {
      "epoch": 0.00028299560546875,
      "model_forward_time": 0.11592245101928711,
      "step": 46366
    },
    {
      "epoch": 0.00028299560546875,
      "step": 46366,
      "training_step_time": 0.3826165199279785
    },
    {
      "epoch": 0.000283001708984375,
      "model_forward_time": 0.11561417579650879,
      "step": 46367
    },
    {
      "epoch": 0.000283001708984375,
      "step": 46367,
      "training_step_time": 0.38602757453918457
    },
    {
      "epoch": 0.0002830078125,
      "model_forward_time": 0.11506152153015137,
      "step": 46368
    },
    {
      "epoch": 0.0002830078125,
      "step": 46368,
      "training_step_time": 0.3850569725036621
    },
    {
      "epoch": 0.000283013916015625,
      "model_forward_time": 0.11549854278564453,
      "step": 46369
    },
    {
      "epoch": 0.000283013916015625,
      "step": 46369,
      "training_step_time": 0.48005056381225586
    },
    {
      "epoch": 0.00028302001953125,
      "grad_norm": 0.08815466612577438,
      "learning_rate": 1.3457383984251525e-05,
      "loss": 0.037,
      "step": 46370
    },
    {
      "epoch": 0.00028302001953125,
      "model_forward_time": 0.11542868614196777,
      "step": 46370
    },
    {
      "epoch": 0.00028302001953125,
      "step": 46370,
      "training_step_time": 0.42720985412597656
    },
    {
      "epoch": 0.000283026123046875,
      "model_forward_time": 0.11533594131469727,
      "step": 46371
    },
    {
      "epoch": 0.000283026123046875,
      "step": 46371,
      "training_step_time": 0.48694801330566406
    },
    {
      "epoch": 0.0002830322265625,
      "model_forward_time": 0.11517739295959473,
      "step": 46372
    },
    {
      "epoch": 0.0002830322265625,
      "step": 46372,
      "training_step_time": 0.44109225273132324
    },
    {
      "epoch": 0.000283038330078125,
      "model_forward_time": 0.11558032035827637,
      "step": 46373
    },
    {
      "epoch": 0.000283038330078125,
      "step": 46373,
      "training_step_time": 0.4008605480194092
    },
    {
      "epoch": 0.00028304443359375,
      "model_forward_time": 0.11477255821228027,
      "step": 46374
    },
    {
      "epoch": 0.00028304443359375,
      "step": 46374,
      "training_step_time": 0.41170501708984375
    },
    {
      "epoch": 0.000283050537109375,
      "model_forward_time": 0.11477231979370117,
      "step": 46375
    },
    {
      "epoch": 0.000283050537109375,
      "step": 46375,
      "training_step_time": 0.4043426513671875
    },
    {
      "epoch": 0.000283056640625,
      "model_forward_time": 0.11589241027832031,
      "step": 46376
    },
    {
      "epoch": 0.000283056640625,
      "step": 46376,
      "training_step_time": 0.49816346168518066
    },
    {
      "epoch": 0.000283062744140625,
      "model_forward_time": 0.11604118347167969,
      "step": 46377
    },
    {
      "epoch": 0.000283062744140625,
      "step": 46377,
      "training_step_time": 0.4414784908294678
    },
    {
      "epoch": 0.00028306884765625,
      "model_forward_time": 0.11478424072265625,
      "step": 46378
    },
    {
      "epoch": 0.00028306884765625,
      "step": 46378,
      "training_step_time": 0.49917125701904297
    },
    {
      "epoch": 0.000283074951171875,
      "model_forward_time": 0.11453008651733398,
      "step": 46379
    },
    {
      "epoch": 0.000283074951171875,
      "step": 46379,
      "training_step_time": 0.4009871482849121
    },
    {
      "epoch": 0.0002830810546875,
      "grad_norm": 0.09527681022882462,
      "learning_rate": 1.343858033448982e-05,
      "loss": 0.0381,
      "step": 46380
    },
    {
      "epoch": 0.0002830810546875,
      "model_forward_time": 0.11477994918823242,
      "step": 46380
    },
    {
      "epoch": 0.0002830810546875,
      "step": 46380,
      "training_step_time": 0.39420175552368164
    },
    {
      "epoch": 0.000283087158203125,
      "model_forward_time": 0.11429381370544434,
      "step": 46381
    },
    {
      "epoch": 0.000283087158203125,
      "step": 46381,
      "training_step_time": 0.39387941360473633
    },
    {
      "epoch": 0.00028309326171875,
      "model_forward_time": 0.11530089378356934,
      "step": 46382
    },
    {
      "epoch": 0.00028309326171875,
      "step": 46382,
      "training_step_time": 0.38863563537597656
    },
    {
      "epoch": 0.000283099365234375,
      "model_forward_time": 0.11475896835327148,
      "step": 46383
    },
    {
      "epoch": 0.000283099365234375,
      "step": 46383,
      "training_step_time": 0.38872575759887695
    },
    {
      "epoch": 0.00028310546875,
      "model_forward_time": 0.11526107788085938,
      "step": 46384
    },
    {
      "epoch": 0.00028310546875,
      "step": 46384,
      "training_step_time": 0.41461920738220215
    },
    {
      "epoch": 0.000283111572265625,
      "model_forward_time": 0.11551547050476074,
      "step": 46385
    },
    {
      "epoch": 0.000283111572265625,
      "step": 46385,
      "training_step_time": 0.3961312770843506
    },
    {
      "epoch": 0.00028311767578125,
      "model_forward_time": 0.11627912521362305,
      "step": 46386
    },
    {
      "epoch": 0.00028311767578125,
      "step": 46386,
      "training_step_time": 0.47043871879577637
    },
    {
      "epoch": 0.000283123779296875,
      "model_forward_time": 0.11592841148376465,
      "step": 46387
    },
    {
      "epoch": 0.000283123779296875,
      "step": 46387,
      "training_step_time": 0.42424631118774414
    },
    {
      "epoch": 0.0002831298828125,
      "model_forward_time": 0.11680150032043457,
      "step": 46388
    },
    {
      "epoch": 0.0002831298828125,
      "step": 46388,
      "training_step_time": 0.38951945304870605
    },
    {
      "epoch": 0.000283135986328125,
      "model_forward_time": 0.1156008243560791,
      "step": 46389
    },
    {
      "epoch": 0.000283135986328125,
      "step": 46389,
      "training_step_time": 0.41768550872802734
    },
    {
      "epoch": 0.00028314208984375,
      "grad_norm": 0.07553431391716003,
      "learning_rate": 1.3419787791122062e-05,
      "loss": 0.0362,
      "step": 46390
    },
    {
      "epoch": 0.00028314208984375,
      "model_forward_time": 0.11548733711242676,
      "step": 46390
    },
    {
      "epoch": 0.00028314208984375,
      "step": 46390,
      "training_step_time": 0.5127153396606445
    },
    {
      "epoch": 0.000283148193359375,
      "model_forward_time": 0.11489343643188477,
      "step": 46391
    },
    {
      "epoch": 0.000283148193359375,
      "step": 46391,
      "training_step_time": 0.44260454177856445
    },
    {
      "epoch": 0.000283154296875,
      "model_forward_time": 0.11569428443908691,
      "step": 46392
    },
    {
      "epoch": 0.000283154296875,
      "step": 46392,
      "training_step_time": 0.5061359405517578
    },
    {
      "epoch": 0.000283160400390625,
      "model_forward_time": 0.11550664901733398,
      "step": 46393
    },
    {
      "epoch": 0.000283160400390625,
      "step": 46393,
      "training_step_time": 0.3933908939361572
    },
    {
      "epoch": 0.00028316650390625,
      "model_forward_time": 0.11493730545043945,
      "step": 46394
    },
    {
      "epoch": 0.00028316650390625,
      "step": 46394,
      "training_step_time": 0.3846757411956787
    },
    {
      "epoch": 0.000283172607421875,
      "model_forward_time": 0.11532378196716309,
      "step": 46395
    },
    {
      "epoch": 0.000283172607421875,
      "step": 46395,
      "training_step_time": 0.38350653648376465
    },
    {
      "epoch": 0.0002831787109375,
      "model_forward_time": 0.1157081127166748,
      "step": 46396
    },
    {
      "epoch": 0.0002831787109375,
      "step": 46396,
      "training_step_time": 0.39113473892211914
    },
    {
      "epoch": 0.000283184814453125,
      "model_forward_time": 0.11470890045166016,
      "step": 46397
    },
    {
      "epoch": 0.000283184814453125,
      "step": 46397,
      "training_step_time": 0.40058469772338867
    },
    {
      "epoch": 0.00028319091796875,
      "model_forward_time": 0.11568307876586914,
      "step": 46398
    },
    {
      "epoch": 0.00028319091796875,
      "step": 46398,
      "training_step_time": 0.4701211452484131
    },
    {
      "epoch": 0.000283197021484375,
      "model_forward_time": 0.11576986312866211,
      "step": 46399
    },
    {
      "epoch": 0.000283197021484375,
      "step": 46399,
      "training_step_time": 0.4281949996948242
    },
    {
      "epoch": 0.000283203125,
      "grad_norm": 0.13088048994541168,
      "learning_rate": 1.3401006359856915e-05,
      "loss": 0.0341,
      "step": 46400
    },
    {
      "epoch": 0.000283203125,
      "model_forward_time": 0.11530470848083496,
      "step": 46400
    },
    {
      "epoch": 0.000283203125,
      "step": 46400,
      "training_step_time": 0.411038875579834
    },
    {
      "epoch": 0.000283209228515625,
      "model_forward_time": 0.11549592018127441,
      "step": 46401
    },
    {
      "epoch": 0.000283209228515625,
      "step": 46401,
      "training_step_time": 0.38092899322509766
    },
    {
      "epoch": 0.00028321533203125,
      "model_forward_time": 0.11511349678039551,
      "step": 46402
    },
    {
      "epoch": 0.00028321533203125,
      "step": 46402,
      "training_step_time": 0.3862936496734619
    },
    {
      "epoch": 0.000283221435546875,
      "model_forward_time": 0.11560559272766113,
      "step": 46403
    },
    {
      "epoch": 0.000283221435546875,
      "step": 46403,
      "training_step_time": 0.44228100776672363
    },
    {
      "epoch": 0.0002832275390625,
      "model_forward_time": 0.11548066139221191,
      "step": 46404
    },
    {
      "epoch": 0.0002832275390625,
      "step": 46404,
      "training_step_time": 0.4469137191772461
    },
    {
      "epoch": 0.000283233642578125,
      "model_forward_time": 0.11460089683532715,
      "step": 46405
    },
    {
      "epoch": 0.000283233642578125,
      "step": 46405,
      "training_step_time": 0.5051724910736084
    },
    {
      "epoch": 0.00028323974609375,
      "model_forward_time": 0.1148064136505127,
      "step": 46406
    },
    {
      "epoch": 0.00028323974609375,
      "step": 46406,
      "training_step_time": 0.39583659172058105
    },
    {
      "epoch": 0.000283245849609375,
      "model_forward_time": 0.11521387100219727,
      "step": 46407
    },
    {
      "epoch": 0.000283245849609375,
      "step": 46407,
      "training_step_time": 0.5033392906188965
    },
    {
      "epoch": 0.000283251953125,
      "model_forward_time": 0.11535954475402832,
      "step": 46408
    },
    {
      "epoch": 0.000283251953125,
      "step": 46408,
      "training_step_time": 0.3837471008300781
    },
    {
      "epoch": 0.000283258056640625,
      "model_forward_time": 0.11537027359008789,
      "step": 46409
    },
    {
      "epoch": 0.000283258056640625,
      "step": 46409,
      "training_step_time": 0.3953566551208496
    },
    {
      "epoch": 0.00028326416015625,
      "grad_norm": 0.12444295734167099,
      "learning_rate": 1.3382236046399722e-05,
      "loss": 0.0351,
      "step": 46410
    },
    {
      "epoch": 0.00028326416015625,
      "model_forward_time": 0.11461424827575684,
      "step": 46410
    },
    {
      "epoch": 0.00028326416015625,
      "step": 46410,
      "training_step_time": 0.4048492908477783
    },
    {
      "epoch": 0.000283270263671875,
      "model_forward_time": 0.11477947235107422,
      "step": 46411
    },
    {
      "epoch": 0.000283270263671875,
      "step": 46411,
      "training_step_time": 0.39136600494384766
    },
    {
      "epoch": 0.0002832763671875,
      "model_forward_time": 0.11556053161621094,
      "step": 46412
    },
    {
      "epoch": 0.0002832763671875,
      "step": 46412,
      "training_step_time": 0.42758893966674805
    },
    {
      "epoch": 0.000283282470703125,
      "model_forward_time": 0.11500000953674316,
      "step": 46413
    },
    {
      "epoch": 0.000283282470703125,
      "step": 46413,
      "training_step_time": 0.42430996894836426
    },
    {
      "epoch": 0.00028328857421875,
      "model_forward_time": 0.11500883102416992,
      "step": 46414
    },
    {
      "epoch": 0.00028328857421875,
      "step": 46414,
      "training_step_time": 0.45490217208862305
    },
    {
      "epoch": 0.000283294677734375,
      "model_forward_time": 0.11509275436401367,
      "step": 46415
    },
    {
      "epoch": 0.000283294677734375,
      "step": 46415,
      "training_step_time": 0.4114556312561035
    },
    {
      "epoch": 0.00028330078125,
      "model_forward_time": 0.11508584022521973,
      "step": 46416
    },
    {
      "epoch": 0.00028330078125,
      "step": 46416,
      "training_step_time": 0.39136362075805664
    },
    {
      "epoch": 0.000283306884765625,
      "model_forward_time": 0.11503791809082031,
      "step": 46417
    },
    {
      "epoch": 0.000283306884765625,
      "step": 46417,
      "training_step_time": 0.3925323486328125
    },
    {
      "epoch": 0.00028331298828125,
      "model_forward_time": 0.11528348922729492,
      "step": 46418
    },
    {
      "epoch": 0.00028331298828125,
      "step": 46418,
      "training_step_time": 0.4554157257080078
    },
    {
      "epoch": 0.000283319091796875,
      "model_forward_time": 0.1148834228515625,
      "step": 46419
    },
    {
      "epoch": 0.000283319091796875,
      "step": 46419,
      "training_step_time": 0.40125203132629395
    },
    {
      "epoch": 0.0002833251953125,
      "grad_norm": 0.10529785603284836,
      "learning_rate": 1.3363476856452356e-05,
      "loss": 0.0324,
      "step": 46420
    },
    {
      "epoch": 0.0002833251953125,
      "model_forward_time": 0.11550712585449219,
      "step": 46420
    },
    {
      "epoch": 0.0002833251953125,
      "step": 46420,
      "training_step_time": 0.4300391674041748
    },
    {
      "epoch": 0.000283331298828125,
      "model_forward_time": 0.11530351638793945,
      "step": 46421
    },
    {
      "epoch": 0.000283331298828125,
      "step": 46421,
      "training_step_time": 0.5104379653930664
    },
    {
      "epoch": 0.00028333740234375,
      "model_forward_time": 0.1149444580078125,
      "step": 46422
    },
    {
      "epoch": 0.00028333740234375,
      "step": 46422,
      "training_step_time": 0.4211745262145996
    },
    {
      "epoch": 0.000283343505859375,
      "model_forward_time": 0.1147623062133789,
      "step": 46423
    },
    {
      "epoch": 0.000283343505859375,
      "step": 46423,
      "training_step_time": 0.3993101119995117
    },
    {
      "epoch": 0.000283349609375,
      "model_forward_time": 0.1152029037475586,
      "step": 46424
    },
    {
      "epoch": 0.000283349609375,
      "step": 46424,
      "training_step_time": 0.4037799835205078
    },
    {
      "epoch": 0.000283355712890625,
      "model_forward_time": 0.1145334243774414,
      "step": 46425
    },
    {
      "epoch": 0.000283355712890625,
      "step": 46425,
      "training_step_time": 0.3918342590332031
    },
    {
      "epoch": 0.00028336181640625,
      "model_forward_time": 0.11554479598999023,
      "step": 46426
    },
    {
      "epoch": 0.00028336181640625,
      "step": 46426,
      "training_step_time": 0.389235258102417
    },
    {
      "epoch": 0.000283367919921875,
      "model_forward_time": 0.11547279357910156,
      "step": 46427
    },
    {
      "epoch": 0.000283367919921875,
      "step": 46427,
      "training_step_time": 0.502251148223877
    },
    {
      "epoch": 0.0002833740234375,
      "model_forward_time": 0.11539459228515625,
      "step": 46428
    },
    {
      "epoch": 0.0002833740234375,
      "step": 46428,
      "training_step_time": 0.4257380962371826
    },
    {
      "epoch": 0.000283380126953125,
      "model_forward_time": 0.11527681350708008,
      "step": 46429
    },
    {
      "epoch": 0.000283380126953125,
      "step": 46429,
      "training_step_time": 0.41962385177612305
    },
    {
      "epoch": 0.00028338623046875,
      "grad_norm": 0.1261577308177948,
      "learning_rate": 1.3344728795713413e-05,
      "loss": 0.0352,
      "step": 46430
    },
    {
      "epoch": 0.00028338623046875,
      "model_forward_time": 0.1148979663848877,
      "step": 46430
    },
    {
      "epoch": 0.00028338623046875,
      "step": 46430,
      "training_step_time": 0.3896770477294922
    },
    {
      "epoch": 0.000283392333984375,
      "model_forward_time": 0.11516594886779785,
      "step": 46431
    },
    {
      "epoch": 0.000283392333984375,
      "step": 46431,
      "training_step_time": 0.3775017261505127
    },
    {
      "epoch": 0.0002833984375,
      "model_forward_time": 0.1148688793182373,
      "step": 46432
    },
    {
      "epoch": 0.0002833984375,
      "step": 46432,
      "training_step_time": 0.4294617176055908
    },
    {
      "epoch": 0.000283404541015625,
      "model_forward_time": 0.11517572402954102,
      "step": 46433
    },
    {
      "epoch": 0.000283404541015625,
      "step": 46433,
      "training_step_time": 0.3958406448364258
    },
    {
      "epoch": 0.00028341064453125,
      "model_forward_time": 0.11464214324951172,
      "step": 46434
    },
    {
      "epoch": 0.00028341064453125,
      "step": 46434,
      "training_step_time": 0.47834205627441406
    },
    {
      "epoch": 0.000283416748046875,
      "model_forward_time": 0.11479687690734863,
      "step": 46435
    },
    {
      "epoch": 0.000283416748046875,
      "step": 46435,
      "training_step_time": 0.41609883308410645
    },
    {
      "epoch": 0.0002834228515625,
      "model_forward_time": 0.11481690406799316,
      "step": 46436
    },
    {
      "epoch": 0.0002834228515625,
      "step": 46436,
      "training_step_time": 0.5057611465454102
    },
    {
      "epoch": 0.000283428955078125,
      "model_forward_time": 0.11487460136413574,
      "step": 46437
    },
    {
      "epoch": 0.000283428955078125,
      "step": 46437,
      "training_step_time": 0.38735413551330566
    },
    {
      "epoch": 0.00028343505859375,
      "model_forward_time": 0.11468076705932617,
      "step": 46438
    },
    {
      "epoch": 0.00028343505859375,
      "step": 46438,
      "training_step_time": 0.37186622619628906
    },
    {
      "epoch": 0.000283441162109375,
      "model_forward_time": 0.11461615562438965,
      "step": 46439
    },
    {
      "epoch": 0.000283441162109375,
      "step": 46439,
      "training_step_time": 0.40709519386291504
    },
    {
      "epoch": 0.000283447265625,
      "grad_norm": 0.10047906637191772,
      "learning_rate": 1.3325991869878013e-05,
      "loss": 0.0379,
      "step": 46440
    },
    {
      "epoch": 0.000283447265625,
      "model_forward_time": 0.11500716209411621,
      "step": 46440
    },
    {
      "epoch": 0.000283447265625,
      "step": 46440,
      "training_step_time": 0.39571285247802734
    },
    {
      "epoch": 0.000283453369140625,
      "model_forward_time": 0.11510014533996582,
      "step": 46441
    },
    {
      "epoch": 0.000283453369140625,
      "step": 46441,
      "training_step_time": 0.4612741470336914
    },
    {
      "epoch": 0.00028345947265625,
      "model_forward_time": 0.11481976509094238,
      "step": 46442
    },
    {
      "epoch": 0.00028345947265625,
      "step": 46442,
      "training_step_time": 0.4285552501678467
    },
    {
      "epoch": 0.000283465576171875,
      "model_forward_time": 0.11525845527648926,
      "step": 46443
    },
    {
      "epoch": 0.000283465576171875,
      "step": 46443,
      "training_step_time": 0.46179819107055664
    },
    {
      "epoch": 0.0002834716796875,
      "model_forward_time": 0.11469101905822754,
      "step": 46444
    },
    {
      "epoch": 0.0002834716796875,
      "step": 46444,
      "training_step_time": 0.381192684173584
    },
    {
      "epoch": 0.000283477783203125,
      "model_forward_time": 0.11506986618041992,
      "step": 46445
    },
    {
      "epoch": 0.000283477783203125,
      "step": 46445,
      "training_step_time": 0.39519500732421875
    },
    {
      "epoch": 0.00028348388671875,
      "model_forward_time": 0.11465907096862793,
      "step": 46446
    },
    {
      "epoch": 0.00028348388671875,
      "step": 46446,
      "training_step_time": 0.3907954692840576
    },
    {
      "epoch": 0.000283489990234375,
      "model_forward_time": 0.11508989334106445,
      "step": 46447
    },
    {
      "epoch": 0.000283489990234375,
      "step": 46447,
      "training_step_time": 0.4888284206390381
    },
    {
      "epoch": 0.00028349609375,
      "model_forward_time": 0.11418485641479492,
      "step": 46448
    },
    {
      "epoch": 0.00028349609375,
      "step": 46448,
      "training_step_time": 0.4398524761199951
    },
    {
      "epoch": 0.000283502197265625,
      "model_forward_time": 0.11491727828979492,
      "step": 46449
    },
    {
      "epoch": 0.000283502197265625,
      "step": 46449,
      "training_step_time": 0.5146956443786621
    },
    {
      "epoch": 0.00028350830078125,
      "grad_norm": 0.09313669055700302,
      "learning_rate": 1.330726608463797e-05,
      "loss": 0.0327,
      "step": 46450
    },
    {
      "epoch": 0.00028350830078125,
      "model_forward_time": 0.11499881744384766,
      "step": 46450
    },
    {
      "epoch": 0.00028350830078125,
      "step": 46450,
      "training_step_time": 0.4499204158782959
    },
    {
      "epoch": 0.000283514404296875,
      "model_forward_time": 0.11490774154663086,
      "step": 46451
    },
    {
      "epoch": 0.000283514404296875,
      "step": 46451,
      "training_step_time": 0.379361629486084
    },
    {
      "epoch": 0.0002835205078125,
      "model_forward_time": 0.1143336296081543,
      "step": 46452
    },
    {
      "epoch": 0.0002835205078125,
      "step": 46452,
      "training_step_time": 0.38878345489501953
    },
    {
      "epoch": 0.000283526611328125,
      "model_forward_time": 0.11454343795776367,
      "step": 46453
    },
    {
      "epoch": 0.000283526611328125,
      "step": 46453,
      "training_step_time": 0.3890380859375
    },
    {
      "epoch": 0.00028353271484375,
      "model_forward_time": 0.11487960815429688,
      "step": 46454
    },
    {
      "epoch": 0.00028353271484375,
      "step": 46454,
      "training_step_time": 0.412761926651001
    },
    {
      "epoch": 0.000283538818359375,
      "model_forward_time": 0.11502337455749512,
      "step": 46455
    },
    {
      "epoch": 0.000283538818359375,
      "step": 46455,
      "training_step_time": 0.3964879512786865
    },
    {
      "epoch": 0.000283544921875,
      "model_forward_time": 0.1145009994506836,
      "step": 46456
    },
    {
      "epoch": 0.000283544921875,
      "step": 46456,
      "training_step_time": 0.385739803314209
    },
    {
      "epoch": 0.000283551025390625,
      "model_forward_time": 0.1148223876953125,
      "step": 46457
    },
    {
      "epoch": 0.000283551025390625,
      "step": 46457,
      "training_step_time": 0.4540436267852783
    },
    {
      "epoch": 0.00028355712890625,
      "model_forward_time": 0.11441326141357422,
      "step": 46458
    },
    {
      "epoch": 0.00028355712890625,
      "step": 46458,
      "training_step_time": 0.40364575386047363
    },
    {
      "epoch": 0.000283563232421875,
      "model_forward_time": 0.11478209495544434,
      "step": 46459
    },
    {
      "epoch": 0.000283563232421875,
      "step": 46459,
      "training_step_time": 0.38917970657348633
    },
    {
      "epoch": 0.0002835693359375,
      "grad_norm": 0.1498652994632721,
      "learning_rate": 1.328855144568168e-05,
      "loss": 0.0381,
      "step": 46460
    },
    {
      "epoch": 0.0002835693359375,
      "model_forward_time": 0.1154327392578125,
      "step": 46460
    },
    {
      "epoch": 0.0002835693359375,
      "step": 46460,
      "training_step_time": 0.3975555896759033
    },
    {
      "epoch": 0.000283575439453125,
      "model_forward_time": 0.11534428596496582,
      "step": 46461
    },
    {
      "epoch": 0.000283575439453125,
      "step": 46461,
      "training_step_time": 0.38936948776245117
    },
    {
      "epoch": 0.00028358154296875,
      "model_forward_time": 0.11526226997375488,
      "step": 46462
    },
    {
      "epoch": 0.00028358154296875,
      "step": 46462,
      "training_step_time": 0.427609920501709
    },
    {
      "epoch": 0.000283587646484375,
      "model_forward_time": 0.11513829231262207,
      "step": 46463
    },
    {
      "epoch": 0.000283587646484375,
      "step": 46463,
      "training_step_time": 0.6108317375183105
    },
    {
      "epoch": 0.00028359375,
      "model_forward_time": 0.11443686485290527,
      "step": 46464
    },
    {
      "epoch": 0.00028359375,
      "step": 46464,
      "training_step_time": 0.4021186828613281
    },
    {
      "epoch": 0.000283599853515625,
      "model_forward_time": 0.11513447761535645,
      "step": 46465
    },
    {
      "epoch": 0.000283599853515625,
      "step": 46465,
      "training_step_time": 0.3916347026824951
    },
    {
      "epoch": 0.00028360595703125,
      "model_forward_time": 0.11423158645629883,
      "step": 46466
    },
    {
      "epoch": 0.00028360595703125,
      "step": 46466,
      "training_step_time": 0.3877542018890381
    },
    {
      "epoch": 0.000283612060546875,
      "model_forward_time": 0.11477994918823242,
      "step": 46467
    },
    {
      "epoch": 0.000283612060546875,
      "step": 46467,
      "training_step_time": 0.39000415802001953
    },
    {
      "epoch": 0.0002836181640625,
      "model_forward_time": 0.11434555053710938,
      "step": 46468
    },
    {
      "epoch": 0.0002836181640625,
      "step": 46468,
      "training_step_time": 0.393993616104126
    },
    {
      "epoch": 0.000283624267578125,
      "model_forward_time": 0.11438322067260742,
      "step": 46469
    },
    {
      "epoch": 0.000283624267578125,
      "step": 46469,
      "training_step_time": 0.6470434665679932
    },
    {
      "epoch": 0.00028363037109375,
      "grad_norm": 0.11952764540910721,
      "learning_rate": 1.3269847958694148e-05,
      "loss": 0.0354,
      "step": 46470
    },
    {
      "epoch": 0.00028363037109375,
      "model_forward_time": 0.11484766006469727,
      "step": 46470
    },
    {
      "epoch": 0.00028363037109375,
      "step": 46470,
      "training_step_time": 0.4194517135620117
    },
    {
      "epoch": 0.000283636474609375,
      "model_forward_time": 0.11493563652038574,
      "step": 46471
    },
    {
      "epoch": 0.000283636474609375,
      "step": 46471,
      "training_step_time": 0.45959019660949707
    },
    {
      "epoch": 0.000283642578125,
      "model_forward_time": 0.11501955986022949,
      "step": 46472
    },
    {
      "epoch": 0.000283642578125,
      "step": 46472,
      "training_step_time": 0.3868870735168457
    },
    {
      "epoch": 0.000283648681640625,
      "model_forward_time": 0.11532807350158691,
      "step": 46473
    },
    {
      "epoch": 0.000283648681640625,
      "step": 46473,
      "training_step_time": 0.3901047706604004
    },
    {
      "epoch": 0.00028365478515625,
      "model_forward_time": 0.11438202857971191,
      "step": 46474
    },
    {
      "epoch": 0.00028365478515625,
      "step": 46474,
      "training_step_time": 0.3867971897125244
    },
    {
      "epoch": 0.000283660888671875,
      "model_forward_time": 0.11548399925231934,
      "step": 46475
    },
    {
      "epoch": 0.000283660888671875,
      "step": 46475,
      "training_step_time": 0.3978853225708008
    },
    {
      "epoch": 0.0002836669921875,
      "model_forward_time": 0.11529016494750977,
      "step": 46476
    },
    {
      "epoch": 0.0002836669921875,
      "step": 46476,
      "training_step_time": 0.438122034072876
    },
    {
      "epoch": 0.000283673095703125,
      "model_forward_time": 0.11554169654846191,
      "step": 46477
    },
    {
      "epoch": 0.000283673095703125,
      "step": 46477,
      "training_step_time": 0.38759803771972656
    },
    {
      "epoch": 0.00028367919921875,
      "model_forward_time": 0.11527299880981445,
      "step": 46478
    },
    {
      "epoch": 0.00028367919921875,
      "step": 46478,
      "training_step_time": 0.43381714820861816
    },
    {
      "epoch": 0.000283685302734375,
      "model_forward_time": 0.11556053161621094,
      "step": 46479
    },
    {
      "epoch": 0.000283685302734375,
      "step": 46479,
      "training_step_time": 0.42818140983581543
    },
    {
      "epoch": 0.00028369140625,
      "grad_norm": 0.0919942557811737,
      "learning_rate": 1.325115562935701e-05,
      "loss": 0.0407,
      "step": 46480
    },
    {
      "epoch": 0.00028369140625,
      "model_forward_time": 0.11586952209472656,
      "step": 46480
    },
    {
      "epoch": 0.00028369140625,
      "step": 46480,
      "training_step_time": 0.37653207778930664
    },
    {
      "epoch": 0.000283697509765625,
      "model_forward_time": 0.11595726013183594,
      "step": 46481
    },
    {
      "epoch": 0.000283697509765625,
      "step": 46481,
      "training_step_time": 0.41134095191955566
    },
    {
      "epoch": 0.00028370361328125,
      "model_forward_time": 0.1153111457824707,
      "step": 46482
    },
    {
      "epoch": 0.00028370361328125,
      "step": 46482,
      "training_step_time": 0.40253496170043945
    },
    {
      "epoch": 0.000283709716796875,
      "model_forward_time": 0.11539363861083984,
      "step": 46483
    },
    {
      "epoch": 0.000283709716796875,
      "step": 46483,
      "training_step_time": 0.3954479694366455
    },
    {
      "epoch": 0.0002837158203125,
      "model_forward_time": 0.1150505542755127,
      "step": 46484
    },
    {
      "epoch": 0.0002837158203125,
      "step": 46484,
      "training_step_time": 0.43709301948547363
    },
    {
      "epoch": 0.000283721923828125,
      "model_forward_time": 0.11509537696838379,
      "step": 46485
    },
    {
      "epoch": 0.000283721923828125,
      "step": 46485,
      "training_step_time": 0.399242639541626
    },
    {
      "epoch": 0.00028372802734375,
      "model_forward_time": 0.11513090133666992,
      "step": 46486
    },
    {
      "epoch": 0.00028372802734375,
      "step": 46486,
      "training_step_time": 0.4231531620025635
    },
    {
      "epoch": 0.000283734130859375,
      "model_forward_time": 0.11507296562194824,
      "step": 46487
    },
    {
      "epoch": 0.000283734130859375,
      "step": 46487,
      "training_step_time": 0.4510159492492676
    },
    {
      "epoch": 0.000283740234375,
      "model_forward_time": 0.11547589302062988,
      "step": 46488
    },
    {
      "epoch": 0.000283740234375,
      "step": 46488,
      "training_step_time": 0.4227478504180908
    },
    {
      "epoch": 0.000283746337890625,
      "model_forward_time": 0.1148827075958252,
      "step": 46489
    },
    {
      "epoch": 0.000283746337890625,
      "step": 46489,
      "training_step_time": 0.40868663787841797
    },
    {
      "epoch": 0.00028375244140625,
      "grad_norm": 0.06817111372947693,
      "learning_rate": 1.323247446334847e-05,
      "loss": 0.0365,
      "step": 46490
    },
    {
      "epoch": 0.00028375244140625,
      "model_forward_time": 0.11500859260559082,
      "step": 46490
    },
    {
      "epoch": 0.00028375244140625,
      "step": 46490,
      "training_step_time": 0.3926725387573242
    },
    {
      "epoch": 0.000283758544921875,
      "model_forward_time": 0.1152794361114502,
      "step": 46491
    },
    {
      "epoch": 0.000283758544921875,
      "step": 46491,
      "training_step_time": 0.40125465393066406
    },
    {
      "epoch": 0.0002837646484375,
      "model_forward_time": 0.11540651321411133,
      "step": 46492
    },
    {
      "epoch": 0.0002837646484375,
      "step": 46492,
      "training_step_time": 0.39660072326660156
    },
    {
      "epoch": 0.000283770751953125,
      "model_forward_time": 0.11518120765686035,
      "step": 46493
    },
    {
      "epoch": 0.000283770751953125,
      "step": 46493,
      "training_step_time": 0.762866735458374
    },
    {
      "epoch": 0.00028377685546875,
      "model_forward_time": 0.11463046073913574,
      "step": 46494
    },
    {
      "epoch": 0.00028377685546875,
      "step": 46494,
      "training_step_time": 0.4077465534210205
    },
    {
      "epoch": 0.000283782958984375,
      "model_forward_time": 0.11460137367248535,
      "step": 46495
    },
    {
      "epoch": 0.000283782958984375,
      "step": 46495,
      "training_step_time": 0.39377760887145996
    },
    {
      "epoch": 0.0002837890625,
      "model_forward_time": 0.11441493034362793,
      "step": 46496
    },
    {
      "epoch": 0.0002837890625,
      "step": 46496,
      "training_step_time": 0.38190627098083496
    },
    {
      "epoch": 0.000283795166015625,
      "model_forward_time": 0.11372923851013184,
      "step": 46497
    },
    {
      "epoch": 0.000283795166015625,
      "step": 46497,
      "training_step_time": 0.37933349609375
    },
    {
      "epoch": 0.00028380126953125,
      "model_forward_time": 0.11415958404541016,
      "step": 46498
    },
    {
      "epoch": 0.00028380126953125,
      "step": 46498,
      "training_step_time": 0.42735719680786133
    },
    {
      "epoch": 0.000283807373046875,
      "model_forward_time": 0.1149606704711914,
      "step": 46499
    },
    {
      "epoch": 0.000283807373046875,
      "step": 46499,
      "training_step_time": 0.5789883136749268
    },
    {
      "epoch": 0.0002838134765625,
      "grad_norm": 0.09737502038478851,
      "learning_rate": 1.3213804466343421e-05,
      "loss": 0.0353,
      "step": 46500
    },
    {
      "epoch": 0.0002838134765625,
      "model_forward_time": 0.11472916603088379,
      "step": 46500
    },
    {
      "epoch": 0.0002838134765625,
      "step": 46500,
      "training_step_time": 0.4620053768157959
    },
    {
      "epoch": 0.000283819580078125,
      "model_forward_time": 0.11429643630981445,
      "step": 46501
    },
    {
      "epoch": 0.000283819580078125,
      "step": 46501,
      "training_step_time": 0.3917102813720703
    },
    {
      "epoch": 0.00028382568359375,
      "model_forward_time": 0.11504220962524414,
      "step": 46502
    },
    {
      "epoch": 0.00028382568359375,
      "step": 46502,
      "training_step_time": 0.3965611457824707
    },
    {
      "epoch": 0.000283831787109375,
      "model_forward_time": 0.11445426940917969,
      "step": 46503
    },
    {
      "epoch": 0.000283831787109375,
      "step": 46503,
      "training_step_time": 0.38133907318115234
    },
    {
      "epoch": 0.000283837890625,
      "model_forward_time": 0.11521267890930176,
      "step": 46504
    },
    {
      "epoch": 0.000283837890625,
      "step": 46504,
      "training_step_time": 0.39786767959594727
    },
    {
      "epoch": 0.000283843994140625,
      "model_forward_time": 0.11495018005371094,
      "step": 46505
    },
    {
      "epoch": 0.000283843994140625,
      "step": 46505,
      "training_step_time": 0.6990966796875
    },
    {
      "epoch": 0.00028385009765625,
      "model_forward_time": 0.11471748352050781,
      "step": 46506
    },
    {
      "epoch": 0.00028385009765625,
      "step": 46506,
      "training_step_time": 0.5106368064880371
    },
    {
      "epoch": 0.000283856201171875,
      "model_forward_time": 0.11492228507995605,
      "step": 46507
    },
    {
      "epoch": 0.000283856201171875,
      "step": 46507,
      "training_step_time": 0.47919511795043945
    },
    {
      "epoch": 0.0002838623046875,
      "model_forward_time": 0.11461424827575684,
      "step": 46508
    },
    {
      "epoch": 0.0002838623046875,
      "step": 46508,
      "training_step_time": 0.4171750545501709
    },
    {
      "epoch": 0.000283868408203125,
      "model_forward_time": 0.1148993968963623,
      "step": 46509
    },
    {
      "epoch": 0.000283868408203125,
      "step": 46509,
      "training_step_time": 0.3931276798248291
    },
    {
      "epoch": 0.00028387451171875,
      "grad_norm": 0.09440454095602036,
      "learning_rate": 1.3195145644013285e-05,
      "loss": 0.0382,
      "step": 46510
    },
    {
      "epoch": 0.00028387451171875,
      "model_forward_time": 0.11440730094909668,
      "step": 46510
    },
    {
      "epoch": 0.00028387451171875,
      "step": 46510,
      "training_step_time": 0.38475537300109863
    },
    {
      "epoch": 0.000283880615234375,
      "model_forward_time": 0.11490440368652344,
      "step": 46511
    },
    {
      "epoch": 0.000283880615234375,
      "step": 46511,
      "training_step_time": 0.3832433223724365
    },
    {
      "epoch": 0.00028388671875,
      "model_forward_time": 0.11486148834228516,
      "step": 46512
    },
    {
      "epoch": 0.00028388671875,
      "step": 46512,
      "training_step_time": 0.39319562911987305
    },
    {
      "epoch": 0.000283892822265625,
      "model_forward_time": 0.1143639087677002,
      "step": 46513
    },
    {
      "epoch": 0.000283892822265625,
      "step": 46513,
      "training_step_time": 0.45754551887512207
    },
    {
      "epoch": 0.00028389892578125,
      "model_forward_time": 0.11521339416503906,
      "step": 46514
    },
    {
      "epoch": 0.00028389892578125,
      "step": 46514,
      "training_step_time": 0.39493370056152344
    },
    {
      "epoch": 0.000283905029296875,
      "model_forward_time": 0.11624741554260254,
      "step": 46515
    },
    {
      "epoch": 0.000283905029296875,
      "step": 46515,
      "training_step_time": 0.39646196365356445
    },
    {
      "epoch": 0.0002839111328125,
      "model_forward_time": 0.11504530906677246,
      "step": 46516
    },
    {
      "epoch": 0.0002839111328125,
      "step": 46516,
      "training_step_time": 0.37804746627807617
    },
    {
      "epoch": 0.000283917236328125,
      "model_forward_time": 0.11573529243469238,
      "step": 46517
    },
    {
      "epoch": 0.000283917236328125,
      "step": 46517,
      "training_step_time": 0.4479660987854004
    },
    {
      "epoch": 0.00028392333984375,
      "model_forward_time": 0.11509490013122559,
      "step": 46518
    },
    {
      "epoch": 0.00028392333984375,
      "step": 46518,
      "training_step_time": 0.4451735019683838
    },
    {
      "epoch": 0.000283929443359375,
      "model_forward_time": 0.11475086212158203,
      "step": 46519
    },
    {
      "epoch": 0.000283929443359375,
      "step": 46519,
      "training_step_time": 0.43542909622192383
    },
    {
      "epoch": 0.000283935546875,
      "grad_norm": 0.08308405429124832,
      "learning_rate": 1.3176498002026128e-05,
      "loss": 0.0395,
      "step": 46520
    },
    {
      "epoch": 0.000283935546875,
      "model_forward_time": 0.11508631706237793,
      "step": 46520
    },
    {
      "epoch": 0.000283935546875,
      "step": 46520,
      "training_step_time": 0.47545313835144043
    },
    {
      "epoch": 0.000283941650390625,
      "model_forward_time": 0.11617088317871094,
      "step": 46521
    },
    {
      "epoch": 0.000283941650390625,
      "step": 46521,
      "training_step_time": 0.4938185214996338
    },
    {
      "epoch": 0.00028394775390625,
      "model_forward_time": 0.11486649513244629,
      "step": 46522
    },
    {
      "epoch": 0.00028394775390625,
      "step": 46522,
      "training_step_time": 0.42467594146728516
    },
    {
      "epoch": 0.000283953857421875,
      "model_forward_time": 0.11439013481140137,
      "step": 46523
    },
    {
      "epoch": 0.000283953857421875,
      "step": 46523,
      "training_step_time": 0.3934016227722168
    },
    {
      "epoch": 0.0002839599609375,
      "model_forward_time": 0.11506938934326172,
      "step": 46524
    },
    {
      "epoch": 0.0002839599609375,
      "step": 46524,
      "training_step_time": 0.4012107849121094
    },
    {
      "epoch": 0.000283966064453125,
      "model_forward_time": 0.11487221717834473,
      "step": 46525
    },
    {
      "epoch": 0.000283966064453125,
      "step": 46525,
      "training_step_time": 0.39370179176330566
    },
    {
      "epoch": 0.00028397216796875,
      "model_forward_time": 0.11456418037414551,
      "step": 46526
    },
    {
      "epoch": 0.00028397216796875,
      "step": 46526,
      "training_step_time": 0.45794081687927246
    },
    {
      "epoch": 0.000283978271484375,
      "model_forward_time": 0.11454963684082031,
      "step": 46527
    },
    {
      "epoch": 0.000283978271484375,
      "step": 46527,
      "training_step_time": 0.38475918769836426
    },
    {
      "epoch": 0.000283984375,
      "model_forward_time": 0.11448478698730469,
      "step": 46528
    },
    {
      "epoch": 0.000283984375,
      "step": 46528,
      "training_step_time": 0.40290307998657227
    },
    {
      "epoch": 0.000283990478515625,
      "model_forward_time": 0.11516332626342773,
      "step": 46529
    },
    {
      "epoch": 0.000283990478515625,
      "step": 46529,
      "training_step_time": 0.49602842330932617
    },
    {
      "epoch": 0.00028399658203125,
      "grad_norm": 0.08625304698944092,
      "learning_rate": 1.3157861546046613e-05,
      "loss": 0.0377,
      "step": 46530
    },
    {
      "epoch": 0.00028399658203125,
      "model_forward_time": 0.11438703536987305,
      "step": 46530
    },
    {
      "epoch": 0.00028399658203125,
      "step": 46530,
      "training_step_time": 0.3964364528656006
    },
    {
      "epoch": 0.000284002685546875,
      "model_forward_time": 0.11456131935119629,
      "step": 46531
    },
    {
      "epoch": 0.000284002685546875,
      "step": 46531,
      "training_step_time": 0.39418768882751465
    },
    {
      "epoch": 0.0002840087890625,
      "model_forward_time": 0.11582636833190918,
      "step": 46532
    },
    {
      "epoch": 0.0002840087890625,
      "step": 46532,
      "training_step_time": 0.3888368606567383
    },
    {
      "epoch": 0.000284014892578125,
      "model_forward_time": 0.11526107788085938,
      "step": 46533
    },
    {
      "epoch": 0.000284014892578125,
      "step": 46533,
      "training_step_time": 0.41991138458251953
    },
    {
      "epoch": 0.00028402099609375,
      "model_forward_time": 0.11515402793884277,
      "step": 46534
    },
    {
      "epoch": 0.00028402099609375,
      "step": 46534,
      "training_step_time": 0.4504115581512451
    },
    {
      "epoch": 0.000284027099609375,
      "model_forward_time": 0.11473870277404785,
      "step": 46535
    },
    {
      "epoch": 0.000284027099609375,
      "step": 46535,
      "training_step_time": 0.5910978317260742
    },
    {
      "epoch": 0.000284033203125,
      "model_forward_time": 0.11479640007019043,
      "step": 46536
    },
    {
      "epoch": 0.000284033203125,
      "step": 46536,
      "training_step_time": 0.40215110778808594
    },
    {
      "epoch": 0.000284039306640625,
      "model_forward_time": 0.11415910720825195,
      "step": 46537
    },
    {
      "epoch": 0.000284039306640625,
      "step": 46537,
      "training_step_time": 0.3982431888580322
    },
    {
      "epoch": 0.00028404541015625,
      "model_forward_time": 0.11511588096618652,
      "step": 46538
    },
    {
      "epoch": 0.00028404541015625,
      "step": 46538,
      "training_step_time": 0.3865845203399658
    },
    {
      "epoch": 0.000284051513671875,
      "model_forward_time": 0.11515164375305176,
      "step": 46539
    },
    {
      "epoch": 0.000284051513671875,
      "step": 46539,
      "training_step_time": 0.3901534080505371
    },
    {
      "epoch": 0.0002840576171875,
      "grad_norm": 0.06875777244567871,
      "learning_rate": 1.3139236281736e-05,
      "loss": 0.0331,
      "step": 46540
    },
    {
      "epoch": 0.0002840576171875,
      "model_forward_time": 0.1143801212310791,
      "step": 46540
    },
    {
      "epoch": 0.0002840576171875,
      "step": 46540,
      "training_step_time": 0.40082669258117676
    },
    {
      "epoch": 0.000284063720703125,
      "model_forward_time": 0.11523771286010742,
      "step": 46541
    },
    {
      "epoch": 0.000284063720703125,
      "step": 46541,
      "training_step_time": 0.5446643829345703
    },
    {
      "epoch": 0.00028406982421875,
      "model_forward_time": 0.11478924751281738,
      "step": 46542
    },
    {
      "epoch": 0.00028406982421875,
      "step": 46542,
      "training_step_time": 0.4050912857055664
    },
    {
      "epoch": 0.000284075927734375,
      "model_forward_time": 0.11426067352294922,
      "step": 46543
    },
    {
      "epoch": 0.000284075927734375,
      "step": 46543,
      "training_step_time": 0.3873744010925293
    },
    {
      "epoch": 0.00028408203125,
      "model_forward_time": 0.1145639419555664,
      "step": 46544
    },
    {
      "epoch": 0.00028408203125,
      "step": 46544,
      "training_step_time": 0.3855621814727783
    },
    {
      "epoch": 0.000284088134765625,
      "model_forward_time": 0.11487054824829102,
      "step": 46545
    },
    {
      "epoch": 0.000284088134765625,
      "step": 46545,
      "training_step_time": 0.3960092067718506
    },
    {
      "epoch": 0.00028409423828125,
      "model_forward_time": 0.11574482917785645,
      "step": 46546
    },
    {
      "epoch": 0.00028409423828125,
      "step": 46546,
      "training_step_time": 0.3921785354614258
    },
    {
      "epoch": 0.000284100341796875,
      "model_forward_time": 0.11490821838378906,
      "step": 46547
    },
    {
      "epoch": 0.000284100341796875,
      "step": 46547,
      "training_step_time": 0.758608341217041
    },
    {
      "epoch": 0.0002841064453125,
      "model_forward_time": 0.11448311805725098,
      "step": 46548
    },
    {
      "epoch": 0.0002841064453125,
      "step": 46548,
      "training_step_time": 0.3612022399902344
    },
    {
      "epoch": 0.000284112548828125,
      "model_forward_time": 0.11417627334594727,
      "step": 46549
    },
    {
      "epoch": 0.000284112548828125,
      "step": 46549,
      "training_step_time": 0.43541693687438965
    },
    {
      "epoch": 0.00028411865234375,
      "grad_norm": 0.0806182324886322,
      "learning_rate": 1.3120622214752132e-05,
      "loss": 0.0339,
      "step": 46550
    },
    {
      "epoch": 0.00028411865234375,
      "model_forward_time": 0.11479544639587402,
      "step": 46550
    },
    {
      "epoch": 0.00028411865234375,
      "step": 46550,
      "training_step_time": 0.38485074043273926
    },
    {
      "epoch": 0.000284124755859375,
      "model_forward_time": 0.11479544639587402,
      "step": 46551
    },
    {
      "epoch": 0.000284124755859375,
      "step": 46551,
      "training_step_time": 0.3774912357330322
    },
    {
      "epoch": 0.000284130859375,
      "model_forward_time": 0.11487150192260742,
      "step": 46552
    },
    {
      "epoch": 0.000284130859375,
      "step": 46552,
      "training_step_time": 0.388852596282959
    },
    {
      "epoch": 0.000284136962890625,
      "model_forward_time": 0.11475992202758789,
      "step": 46553
    },
    {
      "epoch": 0.000284136962890625,
      "step": 46553,
      "training_step_time": 0.3935720920562744
    },
    {
      "epoch": 0.00028414306640625,
      "model_forward_time": 0.11506152153015137,
      "step": 46554
    },
    {
      "epoch": 0.00028414306640625,
      "step": 46554,
      "training_step_time": 0.4624018669128418
    },
    {
      "epoch": 0.000284149169921875,
      "model_forward_time": 0.11505842208862305,
      "step": 46555
    },
    {
      "epoch": 0.000284149169921875,
      "step": 46555,
      "training_step_time": 0.38341856002807617
    },
    {
      "epoch": 0.0002841552734375,
      "model_forward_time": 0.11478304862976074,
      "step": 46556
    },
    {
      "epoch": 0.0002841552734375,
      "step": 46556,
      "training_step_time": 0.41449999809265137
    },
    {
      "epoch": 0.000284161376953125,
      "model_forward_time": 0.1148366928100586,
      "step": 46557
    },
    {
      "epoch": 0.000284161376953125,
      "step": 46557,
      "training_step_time": 0.3784058094024658
    },
    {
      "epoch": 0.00028416748046875,
      "model_forward_time": 0.11485505104064941,
      "step": 46558
    },
    {
      "epoch": 0.00028416748046875,
      "step": 46558,
      "training_step_time": 0.38787078857421875
    },
    {
      "epoch": 0.000284173583984375,
      "model_forward_time": 0.11467599868774414,
      "step": 46559
    },
    {
      "epoch": 0.000284173583984375,
      "step": 46559,
      "training_step_time": 0.5860843658447266
    },
    {
      "epoch": 0.0002841796875,
      "grad_norm": 0.1292593628168106,
      "learning_rate": 1.3102019350749528e-05,
      "loss": 0.0389,
      "step": 46560
    },
    {
      "epoch": 0.0002841796875,
      "model_forward_time": 0.11480545997619629,
      "step": 46560
    },
    {
      "epoch": 0.0002841796875,
      "step": 46560,
      "training_step_time": 0.4005405902862549
    },
    {
      "epoch": 0.000284185791015625,
      "model_forward_time": 0.11452007293701172,
      "step": 46561
    },
    {
      "epoch": 0.000284185791015625,
      "step": 46561,
      "training_step_time": 0.4172477722167969
    },
    {
      "epoch": 0.00028419189453125,
      "model_forward_time": 0.11502552032470703,
      "step": 46562
    },
    {
      "epoch": 0.00028419189453125,
      "step": 46562,
      "training_step_time": 0.41000938415527344
    },
    {
      "epoch": 0.000284197998046875,
      "model_forward_time": 0.1143181324005127,
      "step": 46563
    },
    {
      "epoch": 0.000284197998046875,
      "step": 46563,
      "training_step_time": 0.47579216957092285
    },
    {
      "epoch": 0.0002842041015625,
      "model_forward_time": 0.11485886573791504,
      "step": 46564
    },
    {
      "epoch": 0.0002842041015625,
      "step": 46564,
      "training_step_time": 0.4944751262664795
    },
    {
      "epoch": 0.000284210205078125,
      "model_forward_time": 0.11426520347595215,
      "step": 46565
    },
    {
      "epoch": 0.000284210205078125,
      "step": 46565,
      "training_step_time": 0.3844895362854004
    },
    {
      "epoch": 0.00028421630859375,
      "model_forward_time": 0.11494183540344238,
      "step": 46566
    },
    {
      "epoch": 0.00028421630859375,
      "step": 46566,
      "training_step_time": 0.39502763748168945
    },
    {
      "epoch": 0.000284222412109375,
      "model_forward_time": 0.11466193199157715,
      "step": 46567
    },
    {
      "epoch": 0.000284222412109375,
      "step": 46567,
      "training_step_time": 0.3914036750793457
    },
    {
      "epoch": 0.000284228515625,
      "model_forward_time": 0.1147611141204834,
      "step": 46568
    },
    {
      "epoch": 0.000284228515625,
      "step": 46568,
      "training_step_time": 0.3967304229736328
    },
    {
      "epoch": 0.000284234619140625,
      "model_forward_time": 0.11471366882324219,
      "step": 46569
    },
    {
      "epoch": 0.000284234619140625,
      "step": 46569,
      "training_step_time": 0.42627501487731934
    },
    {
      "epoch": 0.00028424072265625,
      "grad_norm": 0.10595005005598068,
      "learning_rate": 1.3083427695379175e-05,
      "loss": 0.0361,
      "step": 46570
    },
    {
      "epoch": 0.00028424072265625,
      "model_forward_time": 0.11444759368896484,
      "step": 46570
    },
    {
      "epoch": 0.00028424072265625,
      "step": 46570,
      "training_step_time": 0.48532938957214355
    },
    {
      "epoch": 0.000284246826171875,
      "model_forward_time": 0.11512613296508789,
      "step": 46571
    },
    {
      "epoch": 0.000284246826171875,
      "step": 46571,
      "training_step_time": 0.4803030490875244
    },
    {
      "epoch": 0.0002842529296875,
      "model_forward_time": 0.11506462097167969,
      "step": 46572
    },
    {
      "epoch": 0.0002842529296875,
      "step": 46572,
      "training_step_time": 0.3908376693725586
    },
    {
      "epoch": 0.000284259033203125,
      "model_forward_time": 0.11621809005737305,
      "step": 46573
    },
    {
      "epoch": 0.000284259033203125,
      "step": 46573,
      "training_step_time": 0.4057183265686035
    },
    {
      "epoch": 0.00028426513671875,
      "model_forward_time": 0.1152200698852539,
      "step": 46574
    },
    {
      "epoch": 0.00028426513671875,
      "step": 46574,
      "training_step_time": 0.4064524173736572
    },
    {
      "epoch": 0.000284271240234375,
      "model_forward_time": 0.11503410339355469,
      "step": 46575
    },
    {
      "epoch": 0.000284271240234375,
      "step": 46575,
      "training_step_time": 0.47026586532592773
    },
    {
      "epoch": 0.00028427734375,
      "model_forward_time": 0.1152801513671875,
      "step": 46576
    },
    {
      "epoch": 0.00028427734375,
      "step": 46576,
      "training_step_time": 0.42438268661499023
    },
    {
      "epoch": 0.000284283447265625,
      "model_forward_time": 0.11454129219055176,
      "step": 46577
    },
    {
      "epoch": 0.000284283447265625,
      "step": 46577,
      "training_step_time": 0.5071039199829102
    },
    {
      "epoch": 0.00028428955078125,
      "model_forward_time": 0.11624693870544434,
      "step": 46578
    },
    {
      "epoch": 0.00028428955078125,
      "step": 46578,
      "training_step_time": 0.48619508743286133
    },
    {
      "epoch": 0.000284295654296875,
      "model_forward_time": 0.1143641471862793,
      "step": 46579
    },
    {
      "epoch": 0.000284295654296875,
      "step": 46579,
      "training_step_time": 0.39173221588134766
    },
    {
      "epoch": 0.0002843017578125,
      "grad_norm": 0.10911240428686142,
      "learning_rate": 1.3064847254288797e-05,
      "loss": 0.0351,
      "step": 46580
    },
    {
      "epoch": 0.0002843017578125,
      "model_forward_time": 0.11456966400146484,
      "step": 46580
    },
    {
      "epoch": 0.0002843017578125,
      "step": 46580,
      "training_step_time": 0.38002490997314453
    },
    {
      "epoch": 0.000284307861328125,
      "model_forward_time": 0.11464047431945801,
      "step": 46581
    },
    {
      "epoch": 0.000284307861328125,
      "step": 46581,
      "training_step_time": 0.4106874465942383
    },
    {
      "epoch": 0.00028431396484375,
      "model_forward_time": 0.11447596549987793,
      "step": 46582
    },
    {
      "epoch": 0.00028431396484375,
      "step": 46582,
      "training_step_time": 0.3986210823059082
    },
    {
      "epoch": 0.000284320068359375,
      "model_forward_time": 0.11524558067321777,
      "step": 46583
    },
    {
      "epoch": 0.000284320068359375,
      "step": 46583,
      "training_step_time": 0.5084302425384521
    },
    {
      "epoch": 0.000284326171875,
      "model_forward_time": 0.11546516418457031,
      "step": 46584
    },
    {
      "epoch": 0.000284326171875,
      "step": 46584,
      "training_step_time": 0.41478466987609863
    },
    {
      "epoch": 0.000284332275390625,
      "model_forward_time": 0.11523318290710449,
      "step": 46585
    },
    {
      "epoch": 0.000284332275390625,
      "step": 46585,
      "training_step_time": 0.424304723739624
    },
    {
      "epoch": 0.00028433837890625,
      "model_forward_time": 0.11430573463439941,
      "step": 46586
    },
    {
      "epoch": 0.00028433837890625,
      "step": 46586,
      "training_step_time": 0.39086198806762695
    },
    {
      "epoch": 0.000284344482421875,
      "model_forward_time": 0.11504101753234863,
      "step": 46587
    },
    {
      "epoch": 0.000284344482421875,
      "step": 46587,
      "training_step_time": 0.39391374588012695
    },
    {
      "epoch": 0.0002843505859375,
      "model_forward_time": 0.11515188217163086,
      "step": 46588
    },
    {
      "epoch": 0.0002843505859375,
      "step": 46588,
      "training_step_time": 0.4158494472503662
    },
    {
      "epoch": 0.000284356689453125,
      "model_forward_time": 0.11501121520996094,
      "step": 46589
    },
    {
      "epoch": 0.000284356689453125,
      "step": 46589,
      "training_step_time": 0.5681362152099609
    },
    {
      "epoch": 0.00028436279296875,
      "grad_norm": 0.08177314698696136,
      "learning_rate": 1.3046278033122577e-05,
      "loss": 0.0334,
      "step": 46590
    },
    {
      "epoch": 0.00028436279296875,
      "model_forward_time": 0.11571669578552246,
      "step": 46590
    },
    {
      "epoch": 0.00028436279296875,
      "step": 46590,
      "training_step_time": 0.4004476070404053
    },
    {
      "epoch": 0.000284368896484375,
      "model_forward_time": 0.11487817764282227,
      "step": 46591
    },
    {
      "epoch": 0.000284368896484375,
      "step": 46591,
      "training_step_time": 0.4933609962463379
    },
    {
      "epoch": 0.000284375,
      "model_forward_time": 0.11495137214660645,
      "step": 46592
    },
    {
      "epoch": 0.000284375,
      "step": 46592,
      "training_step_time": 0.5024547576904297
    },
    {
      "epoch": 0.000284381103515625,
      "model_forward_time": 0.11452150344848633,
      "step": 46593
    },
    {
      "epoch": 0.000284381103515625,
      "step": 46593,
      "training_step_time": 0.3869795799255371
    },
    {
      "epoch": 0.00028438720703125,
      "model_forward_time": 0.11513280868530273,
      "step": 46594
    },
    {
      "epoch": 0.00028438720703125,
      "step": 46594,
      "training_step_time": 0.3879687786102295
    },
    {
      "epoch": 0.000284393310546875,
      "model_forward_time": 0.11506152153015137,
      "step": 46595
    },
    {
      "epoch": 0.000284393310546875,
      "step": 46595,
      "training_step_time": 0.40999555587768555
    },
    {
      "epoch": 0.0002843994140625,
      "model_forward_time": 0.11450481414794922,
      "step": 46596
    },
    {
      "epoch": 0.0002843994140625,
      "step": 46596,
      "training_step_time": 0.4206521511077881
    },
    {
      "epoch": 0.000284405517578125,
      "model_forward_time": 0.11447739601135254,
      "step": 46597
    },
    {
      "epoch": 0.000284405517578125,
      "step": 46597,
      "training_step_time": 0.42691779136657715
    },
    {
      "epoch": 0.00028441162109375,
      "model_forward_time": 0.11498045921325684,
      "step": 46598
    },
    {
      "epoch": 0.00028441162109375,
      "step": 46598,
      "training_step_time": 0.4762442111968994
    },
    {
      "epoch": 0.000284417724609375,
      "model_forward_time": 0.11481976509094238,
      "step": 46599
    },
    {
      "epoch": 0.000284417724609375,
      "step": 46599,
      "training_step_time": 0.39241552352905273
    },
    {
      "epoch": 0.000284423828125,
      "grad_norm": 0.09531647711992264,
      "learning_rate": 1.3027720037521397e-05,
      "loss": 0.0347,
      "step": 46600
    },
    {
      "epoch": 0.000284423828125,
      "model_forward_time": 0.11489319801330566,
      "step": 46600
    },
    {
      "epoch": 0.000284423828125,
      "step": 46600,
      "training_step_time": 0.38793087005615234
    },
    {
      "epoch": 0.000284429931640625,
      "model_forward_time": 0.11546993255615234,
      "step": 46601
    },
    {
      "epoch": 0.000284429931640625,
      "step": 46601,
      "training_step_time": 0.41809558868408203
    },
    {
      "epoch": 0.00028443603515625,
      "model_forward_time": 0.11491870880126953,
      "step": 46602
    },
    {
      "epoch": 0.00028443603515625,
      "step": 46602,
      "training_step_time": 0.4074573516845703
    },
    {
      "epoch": 0.000284442138671875,
      "model_forward_time": 0.11446690559387207,
      "step": 46603
    },
    {
      "epoch": 0.000284442138671875,
      "step": 46603,
      "training_step_time": 0.40233588218688965
    },
    {
      "epoch": 0.0002844482421875,
      "model_forward_time": 0.11492753028869629,
      "step": 46604
    },
    {
      "epoch": 0.0002844482421875,
      "step": 46604,
      "training_step_time": 0.4028623104095459
    },
    {
      "epoch": 0.000284454345703125,
      "model_forward_time": 0.11509037017822266,
      "step": 46605
    },
    {
      "epoch": 0.000284454345703125,
      "step": 46605,
      "training_step_time": 0.36577630043029785
    },
    {
      "epoch": 0.00028446044921875,
      "model_forward_time": 0.11465001106262207,
      "step": 46606
    },
    {
      "epoch": 0.00028446044921875,
      "step": 46606,
      "training_step_time": 0.4386308193206787
    },
    {
      "epoch": 0.000284466552734375,
      "model_forward_time": 0.11468744277954102,
      "step": 46607
    },
    {
      "epoch": 0.000284466552734375,
      "step": 46607,
      "training_step_time": 0.4643135070800781
    },
    {
      "epoch": 0.00028447265625,
      "model_forward_time": 0.11514663696289062,
      "step": 46608
    },
    {
      "epoch": 0.00028447265625,
      "step": 46608,
      "training_step_time": 0.3951573371887207
    },
    {
      "epoch": 0.000284478759765625,
      "model_forward_time": 0.11542034149169922,
      "step": 46609
    },
    {
      "epoch": 0.000284478759765625,
      "step": 46609,
      "training_step_time": 0.3988528251647949
    },
    {
      "epoch": 0.00028448486328125,
      "grad_norm": 0.09359160810709,
      "learning_rate": 1.300917327312267e-05,
      "loss": 0.0353,
      "step": 46610
    },
    {
      "epoch": 0.00028448486328125,
      "model_forward_time": 0.11488461494445801,
      "step": 46610
    },
    {
      "epoch": 0.00028448486328125,
      "step": 46610,
      "training_step_time": 0.3856205940246582
    },
    {
      "epoch": 0.000284490966796875,
      "model_forward_time": 0.11611223220825195,
      "step": 46611
    },
    {
      "epoch": 0.000284490966796875,
      "step": 46611,
      "training_step_time": 0.4121739864349365
    },
    {
      "epoch": 0.0002844970703125,
      "model_forward_time": 0.11454415321350098,
      "step": 46612
    },
    {
      "epoch": 0.0002844970703125,
      "step": 46612,
      "training_step_time": 0.5053999423980713
    },
    {
      "epoch": 0.000284503173828125,
      "model_forward_time": 0.11473703384399414,
      "step": 46613
    },
    {
      "epoch": 0.000284503173828125,
      "step": 46613,
      "training_step_time": 0.3947765827178955
    },
    {
      "epoch": 0.00028450927734375,
      "model_forward_time": 0.11471843719482422,
      "step": 46614
    },
    {
      "epoch": 0.00028450927734375,
      "step": 46614,
      "training_step_time": 0.39449524879455566
    },
    {
      "epoch": 0.000284515380859375,
      "model_forward_time": 0.1148684024810791,
      "step": 46615
    },
    {
      "epoch": 0.000284515380859375,
      "step": 46615,
      "training_step_time": 0.41426897048950195
    },
    {
      "epoch": 0.000284521484375,
      "model_forward_time": 0.11474609375,
      "step": 46616
    },
    {
      "epoch": 0.000284521484375,
      "step": 46616,
      "training_step_time": 0.4161067008972168
    },
    {
      "epoch": 0.000284527587890625,
      "model_forward_time": 0.11518144607543945,
      "step": 46617
    },
    {
      "epoch": 0.000284527587890625,
      "step": 46617,
      "training_step_time": 0.37927699089050293
    },
    {
      "epoch": 0.00028453369140625,
      "model_forward_time": 0.11459541320800781,
      "step": 46618
    },
    {
      "epoch": 0.00028453369140625,
      "step": 46618,
      "training_step_time": 0.4070291519165039
    },
    {
      "epoch": 0.000284539794921875,
      "model_forward_time": 0.11503767967224121,
      "step": 46619
    },
    {
      "epoch": 0.000284539794921875,
      "step": 46619,
      "training_step_time": 0.5661444664001465
    },
    {
      "epoch": 0.0002845458984375,
      "grad_norm": 0.09193915128707886,
      "learning_rate": 1.299063774556042e-05,
      "loss": 0.0367,
      "step": 46620
    },
    {
      "epoch": 0.0002845458984375,
      "model_forward_time": 0.11453723907470703,
      "step": 46620
    },
    {
      "epoch": 0.0002845458984375,
      "step": 46620,
      "training_step_time": 0.37808847427368164
    },
    {
      "epoch": 0.000284552001953125,
      "model_forward_time": 0.11499834060668945,
      "step": 46621
    },
    {
      "epoch": 0.000284552001953125,
      "step": 46621,
      "training_step_time": 0.4342963695526123
    },
    {
      "epoch": 0.00028455810546875,
      "model_forward_time": 0.11428117752075195,
      "step": 46622
    },
    {
      "epoch": 0.00028455810546875,
      "step": 46622,
      "training_step_time": 0.398423433303833
    },
    {
      "epoch": 0.000284564208984375,
      "model_forward_time": 0.11460733413696289,
      "step": 46623
    },
    {
      "epoch": 0.000284564208984375,
      "step": 46623,
      "training_step_time": 0.40918874740600586
    },
    {
      "epoch": 0.0002845703125,
      "model_forward_time": 0.11461901664733887,
      "step": 46624
    },
    {
      "epoch": 0.0002845703125,
      "step": 46624,
      "training_step_time": 0.390627384185791
    },
    {
      "epoch": 0.000284576416015625,
      "model_forward_time": 0.11544561386108398,
      "step": 46625
    },
    {
      "epoch": 0.000284576416015625,
      "step": 46625,
      "training_step_time": 0.4762420654296875
    },
    {
      "epoch": 0.00028458251953125,
      "model_forward_time": 0.11475467681884766,
      "step": 46626
    },
    {
      "epoch": 0.00028458251953125,
      "step": 46626,
      "training_step_time": 0.4502096176147461
    },
    {
      "epoch": 0.000284588623046875,
      "model_forward_time": 0.11516404151916504,
      "step": 46627
    },
    {
      "epoch": 0.000284588623046875,
      "step": 46627,
      "training_step_time": 0.39498281478881836
    },
    {
      "epoch": 0.0002845947265625,
      "model_forward_time": 0.11407470703125,
      "step": 46628
    },
    {
      "epoch": 0.0002845947265625,
      "step": 46628,
      "training_step_time": 0.42164111137390137
    },
    {
      "epoch": 0.000284600830078125,
      "model_forward_time": 0.11493420600891113,
      "step": 46629
    },
    {
      "epoch": 0.000284600830078125,
      "step": 46629,
      "training_step_time": 0.4215974807739258
    },
    {
      "epoch": 0.00028460693359375,
      "grad_norm": 0.0762239620089531,
      "learning_rate": 1.2972113460465246e-05,
      "loss": 0.0314,
      "step": 46630
    },
    {
      "epoch": 0.00028460693359375,
      "model_forward_time": 0.11461520195007324,
      "step": 46630
    },
    {
      "epoch": 0.00028460693359375,
      "step": 46630,
      "training_step_time": 0.3863227367401123
    },
    {
      "epoch": 0.000284613037109375,
      "model_forward_time": 0.11542701721191406,
      "step": 46631
    },
    {
      "epoch": 0.000284613037109375,
      "step": 46631,
      "training_step_time": 0.6008574962615967
    },
    {
      "epoch": 0.000284619140625,
      "model_forward_time": 0.11421751976013184,
      "step": 46632
    },
    {
      "epoch": 0.000284619140625,
      "step": 46632,
      "training_step_time": 0.41579484939575195
    },
    {
      "epoch": 0.000284625244140625,
      "model_forward_time": 0.11519217491149902,
      "step": 46633
    },
    {
      "epoch": 0.000284625244140625,
      "step": 46633,
      "training_step_time": 0.39571309089660645
    },
    {
      "epoch": 0.00028463134765625,
      "model_forward_time": 0.11516761779785156,
      "step": 46634
    },
    {
      "epoch": 0.00028463134765625,
      "step": 46634,
      "training_step_time": 0.36710047721862793
    },
    {
      "epoch": 0.000284637451171875,
      "model_forward_time": 0.11487364768981934,
      "step": 46635
    },
    {
      "epoch": 0.000284637451171875,
      "step": 46635,
      "training_step_time": 0.3993527889251709
    },
    {
      "epoch": 0.0002846435546875,
      "model_forward_time": 0.11438751220703125,
      "step": 46636
    },
    {
      "epoch": 0.0002846435546875,
      "step": 46636,
      "training_step_time": 0.45908570289611816
    },
    {
      "epoch": 0.000284649658203125,
      "model_forward_time": 0.11476421356201172,
      "step": 46637
    },
    {
      "epoch": 0.000284649658203125,
      "step": 46637,
      "training_step_time": 0.3853034973144531
    },
    {
      "epoch": 0.00028465576171875,
      "model_forward_time": 0.11606597900390625,
      "step": 46638
    },
    {
      "epoch": 0.00028465576171875,
      "step": 46638,
      "training_step_time": 0.3763163089752197
    },
    {
      "epoch": 0.000284661865234375,
      "model_forward_time": 0.11456894874572754,
      "step": 46639
    },
    {
      "epoch": 0.000284661865234375,
      "step": 46639,
      "training_step_time": 0.44142627716064453
    },
    {
      "epoch": 0.00028466796875,
      "grad_norm": 0.08946765959262848,
      "learning_rate": 1.2953600423464324e-05,
      "loss": 0.0371,
      "step": 46640
    },
    {
      "epoch": 0.00028466796875,
      "model_forward_time": 0.11526083946228027,
      "step": 46640
    },
    {
      "epoch": 0.00028466796875,
      "step": 46640,
      "training_step_time": 0.4005589485168457
    },
    {
      "epoch": 0.000284674072265625,
      "model_forward_time": 0.11535358428955078,
      "step": 46641
    },
    {
      "epoch": 0.000284674072265625,
      "step": 46641,
      "training_step_time": 0.4493887424468994
    },
    {
      "epoch": 0.00028468017578125,
      "model_forward_time": 0.11760091781616211,
      "step": 46642
    },
    {
      "epoch": 0.00028468017578125,
      "step": 46642,
      "training_step_time": 0.39888930320739746
    },
    {
      "epoch": 0.000284686279296875,
      "model_forward_time": 0.11529040336608887,
      "step": 46643
    },
    {
      "epoch": 0.000284686279296875,
      "step": 46643,
      "training_step_time": 0.40220212936401367
    },
    {
      "epoch": 0.0002846923828125,
      "model_forward_time": 0.1153419017791748,
      "step": 46644
    },
    {
      "epoch": 0.0002846923828125,
      "step": 46644,
      "training_step_time": 0.39798879623413086
    },
    {
      "epoch": 0.000284698486328125,
      "model_forward_time": 0.11542057991027832,
      "step": 46645
    },
    {
      "epoch": 0.000284698486328125,
      "step": 46645,
      "training_step_time": 0.3970499038696289
    },
    {
      "epoch": 0.00028470458984375,
      "model_forward_time": 0.11459493637084961,
      "step": 46646
    },
    {
      "epoch": 0.00028470458984375,
      "step": 46646,
      "training_step_time": 0.39589715003967285
    },
    {
      "epoch": 0.000284710693359375,
      "model_forward_time": 0.11563801765441895,
      "step": 46647
    },
    {
      "epoch": 0.000284710693359375,
      "step": 46647,
      "training_step_time": 0.4362664222717285
    },
    {
      "epoch": 0.000284716796875,
      "model_forward_time": 0.11565566062927246,
      "step": 46648
    },
    {
      "epoch": 0.000284716796875,
      "step": 46648,
      "training_step_time": 0.4437687397003174
    },
    {
      "epoch": 0.000284722900390625,
      "model_forward_time": 0.11567473411560059,
      "step": 46649
    },
    {
      "epoch": 0.000284722900390625,
      "step": 46649,
      "training_step_time": 0.589439868927002
    },
    {
      "epoch": 0.00028472900390625,
      "grad_norm": 0.0859563946723938,
      "learning_rate": 1.293509864018146e-05,
      "loss": 0.0348,
      "step": 46650
    },
    {
      "epoch": 0.00028472900390625,
      "model_forward_time": 0.1154477596282959,
      "step": 46650
    },
    {
      "epoch": 0.00028472900390625,
      "step": 46650,
      "training_step_time": 0.41864967346191406
    },
    {
      "epoch": 0.000284735107421875,
      "model_forward_time": 0.11471390724182129,
      "step": 46651
    },
    {
      "epoch": 0.000284735107421875,
      "step": 46651,
      "training_step_time": 0.39748096466064453
    },
    {
      "epoch": 0.0002847412109375,
      "model_forward_time": 0.11430931091308594,
      "step": 46652
    },
    {
      "epoch": 0.0002847412109375,
      "step": 46652,
      "training_step_time": 0.4001026153564453
    },
    {
      "epoch": 0.000284747314453125,
      "model_forward_time": 0.11514616012573242,
      "step": 46653
    },
    {
      "epoch": 0.000284747314453125,
      "step": 46653,
      "training_step_time": 0.4503898620605469
    },
    {
      "epoch": 0.00028475341796875,
      "model_forward_time": 0.11521172523498535,
      "step": 46654
    },
    {
      "epoch": 0.00028475341796875,
      "step": 46654,
      "training_step_time": 0.42331981658935547
    },
    {
      "epoch": 0.000284759521484375,
      "model_forward_time": 0.11464500427246094,
      "step": 46655
    },
    {
      "epoch": 0.000284759521484375,
      "step": 46655,
      "training_step_time": 0.433851957321167
    },
    {
      "epoch": 0.000284765625,
      "model_forward_time": 0.11460566520690918,
      "step": 46656
    },
    {
      "epoch": 0.000284765625,
      "step": 46656,
      "training_step_time": 0.39076948165893555
    },
    {
      "epoch": 0.000284771728515625,
      "model_forward_time": 0.11543393135070801,
      "step": 46657
    },
    {
      "epoch": 0.000284771728515625,
      "step": 46657,
      "training_step_time": 0.43083858489990234
    },
    {
      "epoch": 0.00028477783203125,
      "model_forward_time": 0.11541008949279785,
      "step": 46658
    },
    {
      "epoch": 0.00028477783203125,
      "step": 46658,
      "training_step_time": 0.39579272270202637
    },
    {
      "epoch": 0.000284783935546875,
      "model_forward_time": 0.11518287658691406,
      "step": 46659
    },
    {
      "epoch": 0.000284783935546875,
      "step": 46659,
      "training_step_time": 0.4002504348754883
    },
    {
      "epoch": 0.0002847900390625,
      "grad_norm": 0.09992045909166336,
      "learning_rate": 1.2916608116236977e-05,
      "loss": 0.0324,
      "step": 46660
    },
    {
      "epoch": 0.0002847900390625,
      "model_forward_time": 0.1153867244720459,
      "step": 46660
    },
    {
      "epoch": 0.0002847900390625,
      "step": 46660,
      "training_step_time": 0.39542436599731445
    },
    {
      "epoch": 0.000284796142578125,
      "model_forward_time": 0.11509513854980469,
      "step": 46661
    },
    {
      "epoch": 0.000284796142578125,
      "step": 46661,
      "training_step_time": 0.7173357009887695
    },
    {
      "epoch": 0.00028480224609375,
      "model_forward_time": 0.1144866943359375,
      "step": 46662
    },
    {
      "epoch": 0.00028480224609375,
      "step": 46662,
      "training_step_time": 0.4006078243255615
    },
    {
      "epoch": 0.000284808349609375,
      "model_forward_time": 0.11496996879577637,
      "step": 46663
    },
    {
      "epoch": 0.000284808349609375,
      "step": 46663,
      "training_step_time": 0.36609506607055664
    },
    {
      "epoch": 0.000284814453125,
      "model_forward_time": 0.11447000503540039,
      "step": 46664
    },
    {
      "epoch": 0.000284814453125,
      "step": 46664,
      "training_step_time": 0.4345409870147705
    },
    {
      "epoch": 0.000284820556640625,
      "model_forward_time": 0.11493229866027832,
      "step": 46665
    },
    {
      "epoch": 0.000284820556640625,
      "step": 46665,
      "training_step_time": 0.40237879753112793
    },
    {
      "epoch": 0.00028482666015625,
      "model_forward_time": 0.11432909965515137,
      "step": 46666
    },
    {
      "epoch": 0.00028482666015625,
      "step": 46666,
      "training_step_time": 0.3931422233581543
    },
    {
      "epoch": 0.000284832763671875,
      "model_forward_time": 0.11463499069213867,
      "step": 46667
    },
    {
      "epoch": 0.000284832763671875,
      "step": 46667,
      "training_step_time": 0.47144627571105957
    },
    {
      "epoch": 0.0002848388671875,
      "model_forward_time": 0.11514091491699219,
      "step": 46668
    },
    {
      "epoch": 0.0002848388671875,
      "step": 46668,
      "training_step_time": 0.4164860248565674
    },
    {
      "epoch": 0.000284844970703125,
      "model_forward_time": 0.11516380310058594,
      "step": 46669
    },
    {
      "epoch": 0.000284844970703125,
      "step": 46669,
      "training_step_time": 0.4481773376464844
    },
    {
      "epoch": 0.00028485107421875,
      "grad_norm": 0.08633337169885635,
      "learning_rate": 1.2898128857247827e-05,
      "loss": 0.0401,
      "step": 46670
    },
    {
      "epoch": 0.00028485107421875,
      "model_forward_time": 0.1148073673248291,
      "step": 46670
    },
    {
      "epoch": 0.00028485107421875,
      "step": 46670,
      "training_step_time": 0.41617679595947266
    },
    {
      "epoch": 0.000284857177734375,
      "model_forward_time": 0.11598086357116699,
      "step": 46671
    },
    {
      "epoch": 0.000284857177734375,
      "step": 46671,
      "training_step_time": 0.3929431438446045
    },
    {
      "epoch": 0.00028486328125,
      "model_forward_time": 0.11494183540344238,
      "step": 46672
    },
    {
      "epoch": 0.00028486328125,
      "step": 46672,
      "training_step_time": 0.3942840099334717
    },
    {
      "epoch": 0.000284869384765625,
      "model_forward_time": 0.11529922485351562,
      "step": 46673
    },
    {
      "epoch": 0.000284869384765625,
      "step": 46673,
      "training_step_time": 0.3938581943511963
    },
    {
      "epoch": 0.00028487548828125,
      "model_forward_time": 0.11724376678466797,
      "step": 46674
    },
    {
      "epoch": 0.00028487548828125,
      "step": 46674,
      "training_step_time": 0.3795149326324463
    },
    {
      "epoch": 0.000284881591796875,
      "model_forward_time": 0.11469674110412598,
      "step": 46675
    },
    {
      "epoch": 0.000284881591796875,
      "step": 46675,
      "training_step_time": 0.39794278144836426
    },
    {
      "epoch": 0.0002848876953125,
      "model_forward_time": 0.1159672737121582,
      "step": 46676
    },
    {
      "epoch": 0.0002848876953125,
      "step": 46676,
      "training_step_time": 0.4530010223388672
    },
    {
      "epoch": 0.000284893798828125,
      "model_forward_time": 0.11510968208312988,
      "step": 46677
    },
    {
      "epoch": 0.000284893798828125,
      "step": 46677,
      "training_step_time": 0.49027585983276367
    },
    {
      "epoch": 0.00028489990234375,
      "model_forward_time": 0.11594605445861816,
      "step": 46678
    },
    {
      "epoch": 0.00028489990234375,
      "step": 46678,
      "training_step_time": 0.4629058837890625
    },
    {
      "epoch": 0.000284906005859375,
      "model_forward_time": 0.11528944969177246,
      "step": 46679
    },
    {
      "epoch": 0.000284906005859375,
      "step": 46679,
      "training_step_time": 0.554842472076416
    },
    {
      "epoch": 0.000284912109375,
      "grad_norm": 0.10491038113832474,
      "learning_rate": 1.2879660868827508e-05,
      "loss": 0.0363,
      "step": 46680
    },
    {
      "epoch": 0.000284912109375,
      "model_forward_time": 0.11572504043579102,
      "step": 46680
    },
    {
      "epoch": 0.000284912109375,
      "step": 46680,
      "training_step_time": 0.3803391456604004
    },
    {
      "epoch": 0.000284918212890625,
      "model_forward_time": 0.11513161659240723,
      "step": 46681
    },
    {
      "epoch": 0.000284918212890625,
      "step": 46681,
      "training_step_time": 0.4844632148742676
    },
    {
      "epoch": 0.00028492431640625,
      "model_forward_time": 0.11420512199401855,
      "step": 46682
    },
    {
      "epoch": 0.00028492431640625,
      "step": 46682,
      "training_step_time": 0.4332113265991211
    },
    {
      "epoch": 0.000284930419921875,
      "model_forward_time": 0.11433291435241699,
      "step": 46683
    },
    {
      "epoch": 0.000284930419921875,
      "step": 46683,
      "training_step_time": 0.40407800674438477
    },
    {
      "epoch": 0.0002849365234375,
      "model_forward_time": 0.11451244354248047,
      "step": 46684
    },
    {
      "epoch": 0.0002849365234375,
      "step": 46684,
      "training_step_time": 0.3934755325317383
    },
    {
      "epoch": 0.000284942626953125,
      "model_forward_time": 0.11505842208862305,
      "step": 46685
    },
    {
      "epoch": 0.000284942626953125,
      "step": 46685,
      "training_step_time": 0.4186697006225586
    },
    {
      "epoch": 0.00028494873046875,
      "model_forward_time": 0.11490678787231445,
      "step": 46686
    },
    {
      "epoch": 0.00028494873046875,
      "step": 46686,
      "training_step_time": 0.38831090927124023
    },
    {
      "epoch": 0.000284954833984375,
      "model_forward_time": 0.11571669578552246,
      "step": 46687
    },
    {
      "epoch": 0.000284954833984375,
      "step": 46687,
      "training_step_time": 0.39107227325439453
    },
    {
      "epoch": 0.0002849609375,
      "model_forward_time": 0.11538553237915039,
      "step": 46688
    },
    {
      "epoch": 0.0002849609375,
      "step": 46688,
      "training_step_time": 0.38403964042663574
    },
    {
      "epoch": 0.000284967041015625,
      "model_forward_time": 0.11623740196228027,
      "step": 46689
    },
    {
      "epoch": 0.000284967041015625,
      "step": 46689,
      "training_step_time": 0.38804006576538086
    },
    {
      "epoch": 0.00028497314453125,
      "grad_norm": 0.09729297459125519,
      "learning_rate": 1.286120415658611e-05,
      "loss": 0.0356,
      "step": 46690
    },
    {
      "epoch": 0.00028497314453125,
      "model_forward_time": 0.11524629592895508,
      "step": 46690
    },
    {
      "epoch": 0.00028497314453125,
      "step": 46690,
      "training_step_time": 0.46461057662963867
    },
    {
      "epoch": 0.000284979248046875,
      "model_forward_time": 0.11482667922973633,
      "step": 46691
    },
    {
      "epoch": 0.000284979248046875,
      "step": 46691,
      "training_step_time": 0.4627540111541748
    },
    {
      "epoch": 0.0002849853515625,
      "model_forward_time": 0.1156165599822998,
      "step": 46692
    },
    {
      "epoch": 0.0002849853515625,
      "step": 46692,
      "training_step_time": 0.4089484214782715
    },
    {
      "epoch": 0.000284991455078125,
      "model_forward_time": 0.11488175392150879,
      "step": 46693
    },
    {
      "epoch": 0.000284991455078125,
      "step": 46693,
      "training_step_time": 0.5083858966827393
    },
    {
      "epoch": 0.00028499755859375,
      "model_forward_time": 0.11480998992919922,
      "step": 46694
    },
    {
      "epoch": 0.00028499755859375,
      "step": 46694,
      "training_step_time": 0.38816213607788086
    },
    {
      "epoch": 0.000285003662109375,
      "model_forward_time": 0.11471056938171387,
      "step": 46695
    },
    {
      "epoch": 0.000285003662109375,
      "step": 46695,
      "training_step_time": 0.3964967727661133
    },
    {
      "epoch": 0.000285009765625,
      "model_forward_time": 0.1150519847869873,
      "step": 46696
    },
    {
      "epoch": 0.000285009765625,
      "step": 46696,
      "training_step_time": 0.45935773849487305
    },
    {
      "epoch": 0.000285015869140625,
      "model_forward_time": 0.11515331268310547,
      "step": 46697
    },
    {
      "epoch": 0.000285015869140625,
      "step": 46697,
      "training_step_time": 0.4410731792449951
    },
    {
      "epoch": 0.00028502197265625,
      "model_forward_time": 0.11515593528747559,
      "step": 46698
    },
    {
      "epoch": 0.00028502197265625,
      "step": 46698,
      "training_step_time": 0.39986205101013184
    },
    {
      "epoch": 0.000285028076171875,
      "model_forward_time": 0.11499929428100586,
      "step": 46699
    },
    {
      "epoch": 0.000285028076171875,
      "step": 46699,
      "training_step_time": 0.3920905590057373
    },
    {
      "epoch": 0.0002850341796875,
      "grad_norm": 0.0769890546798706,
      "learning_rate": 1.2842758726130283e-05,
      "loss": 0.0316,
      "step": 46700
    },
    {
      "epoch": 0.0002850341796875,
      "model_forward_time": 0.11510396003723145,
      "step": 46700
    },
    {
      "epoch": 0.0002850341796875,
      "step": 46700,
      "training_step_time": 0.3884415626525879
    },
    {
      "epoch": 0.000285040283203125,
      "model_forward_time": 0.1165311336517334,
      "step": 46701
    },
    {
      "epoch": 0.000285040283203125,
      "step": 46701,
      "training_step_time": 0.3874471187591553
    },
    {
      "epoch": 0.00028504638671875,
      "model_forward_time": 0.11470150947570801,
      "step": 46702
    },
    {
      "epoch": 0.00028504638671875,
      "step": 46702,
      "training_step_time": 0.3974475860595703
    },
    {
      "epoch": 0.000285052490234375,
      "model_forward_time": 0.11533236503601074,
      "step": 46703
    },
    {
      "epoch": 0.000285052490234375,
      "step": 46703,
      "training_step_time": 0.39270615577697754
    },
    {
      "epoch": 0.00028505859375,
      "model_forward_time": 0.11554789543151855,
      "step": 46704
    },
    {
      "epoch": 0.00028505859375,
      "step": 46704,
      "training_step_time": 0.40198278427124023
    },
    {
      "epoch": 0.000285064697265625,
      "model_forward_time": 0.11459088325500488,
      "step": 46705
    },
    {
      "epoch": 0.000285064697265625,
      "step": 46705,
      "training_step_time": 0.4004371166229248
    },
    {
      "epoch": 0.00028507080078125,
      "model_forward_time": 0.11511397361755371,
      "step": 46706
    },
    {
      "epoch": 0.00028507080078125,
      "step": 46706,
      "training_step_time": 0.4008469581604004
    },
    {
      "epoch": 0.000285076904296875,
      "model_forward_time": 0.11540436744689941,
      "step": 46707
    },
    {
      "epoch": 0.000285076904296875,
      "step": 46707,
      "training_step_time": 0.4423065185546875
    },
    {
      "epoch": 0.0002850830078125,
      "model_forward_time": 0.11462235450744629,
      "step": 46708
    },
    {
      "epoch": 0.0002850830078125,
      "step": 46708,
      "training_step_time": 0.514580249786377
    },
    {
      "epoch": 0.000285089111328125,
      "model_forward_time": 0.11460542678833008,
      "step": 46709
    },
    {
      "epoch": 0.000285089111328125,
      "step": 46709,
      "training_step_time": 0.4180605411529541
    },
    {
      "epoch": 0.00028509521484375,
      "grad_norm": 0.12197001278400421,
      "learning_rate": 1.2824324583063302e-05,
      "loss": 0.0371,
      "step": 46710
    },
    {
      "epoch": 0.00028509521484375,
      "model_forward_time": 0.11482644081115723,
      "step": 46710
    },
    {
      "epoch": 0.00028509521484375,
      "step": 46710,
      "training_step_time": 0.4336557388305664
    },
    {
      "epoch": 0.000285101318359375,
      "model_forward_time": 0.11530685424804688,
      "step": 46711
    },
    {
      "epoch": 0.000285101318359375,
      "step": 46711,
      "training_step_time": 0.4822523593902588
    },
    {
      "epoch": 0.000285107421875,
      "model_forward_time": 0.11495447158813477,
      "step": 46712
    },
    {
      "epoch": 0.000285107421875,
      "step": 46712,
      "training_step_time": 0.39505529403686523
    },
    {
      "epoch": 0.000285113525390625,
      "model_forward_time": 0.11455631256103516,
      "step": 46713
    },
    {
      "epoch": 0.000285113525390625,
      "step": 46713,
      "training_step_time": 0.386852502822876
    },
    {
      "epoch": 0.00028511962890625,
      "model_forward_time": 0.11530256271362305,
      "step": 46714
    },
    {
      "epoch": 0.00028511962890625,
      "step": 46714,
      "training_step_time": 0.38530802726745605
    },
    {
      "epoch": 0.000285125732421875,
      "model_forward_time": 0.11466026306152344,
      "step": 46715
    },
    {
      "epoch": 0.000285125732421875,
      "step": 46715,
      "training_step_time": 0.6214747428894043
    },
    {
      "epoch": 0.0002851318359375,
      "model_forward_time": 0.11446976661682129,
      "step": 46716
    },
    {
      "epoch": 0.0002851318359375,
      "step": 46716,
      "training_step_time": 0.3904836177825928
    },
    {
      "epoch": 0.000285137939453125,
      "model_forward_time": 0.11518073081970215,
      "step": 46717
    },
    {
      "epoch": 0.000285137939453125,
      "step": 46717,
      "training_step_time": 0.39238405227661133
    },
    {
      "epoch": 0.00028514404296875,
      "model_forward_time": 0.11544513702392578,
      "step": 46718
    },
    {
      "epoch": 0.00028514404296875,
      "step": 46718,
      "training_step_time": 0.3818838596343994
    },
    {
      "epoch": 0.000285150146484375,
      "model_forward_time": 0.11453700065612793,
      "step": 46719
    },
    {
      "epoch": 0.000285150146484375,
      "step": 46719,
      "training_step_time": 0.4705233573913574
    },
    {
      "epoch": 0.00028515625,
      "grad_norm": 0.10106078535318375,
      "learning_rate": 1.2805901732984921e-05,
      "loss": 0.0334,
      "step": 46720
    },
    {
      "epoch": 0.00028515625,
      "model_forward_time": 0.1152029037475586,
      "step": 46720
    },
    {
      "epoch": 0.00028515625,
      "step": 46720,
      "training_step_time": 0.42935776710510254
    },
    {
      "epoch": 0.000285162353515625,
      "model_forward_time": 0.11499643325805664,
      "step": 46721
    },
    {
      "epoch": 0.000285162353515625,
      "step": 46721,
      "training_step_time": 1.0460357666015625
    },
    {
      "epoch": 0.00028516845703125,
      "model_forward_time": 0.1137537956237793,
      "step": 46722
    },
    {
      "epoch": 0.00028516845703125,
      "step": 46722,
      "training_step_time": 0.4264497756958008
    },
    {
      "epoch": 0.000285174560546875,
      "model_forward_time": 0.11389899253845215,
      "step": 46723
    },
    {
      "epoch": 0.000285174560546875,
      "step": 46723,
      "training_step_time": 0.42966485023498535
    },
    {
      "epoch": 0.0002851806640625,
      "model_forward_time": 0.11348271369934082,
      "step": 46724
    },
    {
      "epoch": 0.0002851806640625,
      "step": 46724,
      "training_step_time": 0.3873612880706787
    },
    {
      "epoch": 0.000285186767578125,
      "model_forward_time": 0.11410951614379883,
      "step": 46725
    },
    {
      "epoch": 0.000285186767578125,
      "step": 46725,
      "training_step_time": 0.3879401683807373
    },
    {
      "epoch": 0.00028519287109375,
      "model_forward_time": 0.11400413513183594,
      "step": 46726
    },
    {
      "epoch": 0.00028519287109375,
      "step": 46726,
      "training_step_time": 0.39705729484558105
    },
    {
      "epoch": 0.000285198974609375,
      "model_forward_time": 0.11507201194763184,
      "step": 46727
    },
    {
      "epoch": 0.000285198974609375,
      "step": 46727,
      "training_step_time": 0.6269142627716064
    },
    {
      "epoch": 0.000285205078125,
      "model_forward_time": 0.11476802825927734,
      "step": 46728
    },
    {
      "epoch": 0.000285205078125,
      "step": 46728,
      "training_step_time": 0.3901093006134033
    },
    {
      "epoch": 0.000285211181640625,
      "model_forward_time": 0.11449527740478516,
      "step": 46729
    },
    {
      "epoch": 0.000285211181640625,
      "step": 46729,
      "training_step_time": 0.38294363021850586
    },
    {
      "epoch": 0.00028521728515625,
      "grad_norm": 0.12049190700054169,
      "learning_rate": 1.2787490181491568e-05,
      "loss": 0.0371,
      "step": 46730
    },
    {
      "epoch": 0.00028521728515625,
      "model_forward_time": 0.11510348320007324,
      "step": 46730
    },
    {
      "epoch": 0.00028521728515625,
      "step": 46730,
      "training_step_time": 0.3906588554382324
    },
    {
      "epoch": 0.000285223388671875,
      "model_forward_time": 0.1140894889831543,
      "step": 46731
    },
    {
      "epoch": 0.000285223388671875,
      "step": 46731,
      "training_step_time": 0.39285969734191895
    },
    {
      "epoch": 0.0002852294921875,
      "model_forward_time": 0.1145181655883789,
      "step": 46732
    },
    {
      "epoch": 0.0002852294921875,
      "step": 46732,
      "training_step_time": 0.4061272144317627
    },
    {
      "epoch": 0.000285235595703125,
      "model_forward_time": 0.11507534980773926,
      "step": 46733
    },
    {
      "epoch": 0.000285235595703125,
      "step": 46733,
      "training_step_time": 0.7277445793151855
    },
    {
      "epoch": 0.00028524169921875,
      "model_forward_time": 0.1147911548614502,
      "step": 46734
    },
    {
      "epoch": 0.00028524169921875,
      "step": 46734,
      "training_step_time": 0.4190378189086914
    },
    {
      "epoch": 0.000285247802734375,
      "model_forward_time": 0.11418318748474121,
      "step": 46735
    },
    {
      "epoch": 0.000285247802734375,
      "step": 46735,
      "training_step_time": 0.4858100414276123
    },
    {
      "epoch": 0.00028525390625,
      "model_forward_time": 0.1141805648803711,
      "step": 46736
    },
    {
      "epoch": 0.00028525390625,
      "step": 46736,
      "training_step_time": 0.40212154388427734
    },
    {
      "epoch": 0.000285260009765625,
      "model_forward_time": 0.11412692070007324,
      "step": 46737
    },
    {
      "epoch": 0.000285260009765625,
      "step": 46737,
      "training_step_time": 0.46473169326782227
    },
    {
      "epoch": 0.00028526611328125,
      "model_forward_time": 0.11381912231445312,
      "step": 46738
    },
    {
      "epoch": 0.00028526611328125,
      "step": 46738,
      "training_step_time": 0.40023231506347656
    },
    {
      "epoch": 0.000285272216796875,
      "model_forward_time": 0.11500024795532227,
      "step": 46739
    },
    {
      "epoch": 0.000285272216796875,
      "step": 46739,
      "training_step_time": 0.4117748737335205
    },
    {
      "epoch": 0.0002852783203125,
      "grad_norm": 0.11316493153572083,
      "learning_rate": 1.2769089934176126e-05,
      "loss": 0.0365,
      "step": 46740
    },
    {
      "epoch": 0.0002852783203125,
      "model_forward_time": 0.11487030982971191,
      "step": 46740
    },
    {
      "epoch": 0.0002852783203125,
      "step": 46740,
      "training_step_time": 0.39939260482788086
    },
    {
      "epoch": 0.000285284423828125,
      "model_forward_time": 0.11505603790283203,
      "step": 46741
    },
    {
      "epoch": 0.000285284423828125,
      "step": 46741,
      "training_step_time": 0.38831520080566406
    },
    {
      "epoch": 0.00028529052734375,
      "model_forward_time": 0.1143348217010498,
      "step": 46742
    },
    {
      "epoch": 0.00028529052734375,
      "step": 46742,
      "training_step_time": 0.40248656272888184
    },
    {
      "epoch": 0.000285296630859375,
      "model_forward_time": 0.1152181625366211,
      "step": 46743
    },
    {
      "epoch": 0.000285296630859375,
      "step": 46743,
      "training_step_time": 0.39472222328186035
    },
    {
      "epoch": 0.000285302734375,
      "model_forward_time": 0.11509370803833008,
      "step": 46744
    },
    {
      "epoch": 0.000285302734375,
      "step": 46744,
      "training_step_time": 0.39665722846984863
    },
    {
      "epoch": 0.000285308837890625,
      "model_forward_time": 0.11557555198669434,
      "step": 46745
    },
    {
      "epoch": 0.000285308837890625,
      "step": 46745,
      "training_step_time": 0.793703556060791
    },
    {
      "epoch": 0.00028531494140625,
      "model_forward_time": 0.11488795280456543,
      "step": 46746
    },
    {
      "epoch": 0.00028531494140625,
      "step": 46746,
      "training_step_time": 0.4288959503173828
    },
    {
      "epoch": 0.000285321044921875,
      "model_forward_time": 0.11467790603637695,
      "step": 46747
    },
    {
      "epoch": 0.000285321044921875,
      "step": 46747,
      "training_step_time": 0.3621397018432617
    },
    {
      "epoch": 0.0002853271484375,
      "model_forward_time": 0.11457967758178711,
      "step": 46748
    },
    {
      "epoch": 0.0002853271484375,
      "step": 46748,
      "training_step_time": 0.44546055793762207
    },
    {
      "epoch": 0.000285333251953125,
      "model_forward_time": 0.1143193244934082,
      "step": 46749
    },
    {
      "epoch": 0.000285333251953125,
      "step": 46749,
      "training_step_time": 0.4564239978790283
    },
    {
      "epoch": 0.00028533935546875,
      "grad_norm": 0.11509369313716888,
      "learning_rate": 1.275070099662815e-05,
      "loss": 0.0375,
      "step": 46750
    },
    {
      "epoch": 0.00028533935546875,
      "model_forward_time": 0.11434030532836914,
      "step": 46750
    },
    {
      "epoch": 0.00028533935546875,
      "step": 46750,
      "training_step_time": 0.391892671585083
    },
    {
      "epoch": 0.000285345458984375,
      "model_forward_time": 0.11447858810424805,
      "step": 46751
    },
    {
      "epoch": 0.000285345458984375,
      "step": 46751,
      "training_step_time": 0.47014927864074707
    },
    {
      "epoch": 0.0002853515625,
      "model_forward_time": 0.11458873748779297,
      "step": 46752
    },
    {
      "epoch": 0.0002853515625,
      "step": 46752,
      "training_step_time": 0.3962132930755615
    },
    {
      "epoch": 0.000285357666015625,
      "model_forward_time": 0.1142585277557373,
      "step": 46753
    },
    {
      "epoch": 0.000285357666015625,
      "step": 46753,
      "training_step_time": 0.39053988456726074
    },
    {
      "epoch": 0.00028536376953125,
      "model_forward_time": 0.11491703987121582,
      "step": 46754
    },
    {
      "epoch": 0.00028536376953125,
      "step": 46754,
      "training_step_time": 0.3977346420288086
    },
    {
      "epoch": 0.000285369873046875,
      "model_forward_time": 0.11517500877380371,
      "step": 46755
    },
    {
      "epoch": 0.000285369873046875,
      "step": 46755,
      "training_step_time": 0.3960444927215576
    },
    {
      "epoch": 0.0002853759765625,
      "model_forward_time": 0.11502337455749512,
      "step": 46756
    },
    {
      "epoch": 0.0002853759765625,
      "step": 46756,
      "training_step_time": 0.40163516998291016
    },
    {
      "epoch": 0.000285382080078125,
      "model_forward_time": 0.11558771133422852,
      "step": 46757
    },
    {
      "epoch": 0.000285382080078125,
      "step": 46757,
      "training_step_time": 0.3983588218688965
    },
    {
      "epoch": 0.00028538818359375,
      "model_forward_time": 0.11510014533996582,
      "step": 46758
    },
    {
      "epoch": 0.00028538818359375,
      "step": 46758,
      "training_step_time": 0.4020986557006836
    },
    {
      "epoch": 0.000285394287109375,
      "model_forward_time": 0.11488676071166992,
      "step": 46759
    },
    {
      "epoch": 0.000285394287109375,
      "step": 46759,
      "training_step_time": 0.40006589889526367
    },
    {
      "epoch": 0.000285400390625,
      "grad_norm": 0.10273399204015732,
      "learning_rate": 1.2732323374433707e-05,
      "loss": 0.0337,
      "step": 46760
    },
    {
      "epoch": 0.000285400390625,
      "model_forward_time": 0.11520695686340332,
      "step": 46760
    },
    {
      "epoch": 0.000285400390625,
      "step": 46760,
      "training_step_time": 0.4381852149963379
    },
    {
      "epoch": 0.000285406494140625,
      "model_forward_time": 0.1146693229675293,
      "step": 46761
    },
    {
      "epoch": 0.000285406494140625,
      "step": 46761,
      "training_step_time": 0.4392566680908203
    },
    {
      "epoch": 0.00028541259765625,
      "model_forward_time": 0.11478257179260254,
      "step": 46762
    },
    {
      "epoch": 0.00028541259765625,
      "step": 46762,
      "training_step_time": 0.49515342712402344
    },
    {
      "epoch": 0.000285418701171875,
      "model_forward_time": 0.11549830436706543,
      "step": 46763
    },
    {
      "epoch": 0.000285418701171875,
      "step": 46763,
      "training_step_time": 0.5049879550933838
    },
    {
      "epoch": 0.0002854248046875,
      "model_forward_time": 0.11502313613891602,
      "step": 46764
    },
    {
      "epoch": 0.0002854248046875,
      "step": 46764,
      "training_step_time": 0.43027305603027344
    },
    {
      "epoch": 0.000285430908203125,
      "model_forward_time": 0.11542415618896484,
      "step": 46765
    },
    {
      "epoch": 0.000285430908203125,
      "step": 46765,
      "training_step_time": 0.4233677387237549
    },
    {
      "epoch": 0.00028543701171875,
      "model_forward_time": 0.11447262763977051,
      "step": 46766
    },
    {
      "epoch": 0.00028543701171875,
      "step": 46766,
      "training_step_time": 0.39383840560913086
    },
    {
      "epoch": 0.000285443115234375,
      "model_forward_time": 0.11536431312561035,
      "step": 46767
    },
    {
      "epoch": 0.000285443115234375,
      "step": 46767,
      "training_step_time": 0.3999500274658203
    },
    {
      "epoch": 0.00028544921875,
      "model_forward_time": 0.11477470397949219,
      "step": 46768
    },
    {
      "epoch": 0.00028544921875,
      "step": 46768,
      "training_step_time": 0.39919400215148926
    },
    {
      "epoch": 0.000285455322265625,
      "model_forward_time": 0.11535906791687012,
      "step": 46769
    },
    {
      "epoch": 0.000285455322265625,
      "step": 46769,
      "training_step_time": 0.5289983749389648
    },
    {
      "epoch": 0.00028546142578125,
      "grad_norm": 0.1046626940369606,
      "learning_rate": 1.2713957073175425e-05,
      "loss": 0.0362,
      "step": 46770
    },
    {
      "epoch": 0.00028546142578125,
      "model_forward_time": 0.11458587646484375,
      "step": 46770
    },
    {
      "epoch": 0.00028546142578125,
      "step": 46770,
      "training_step_time": 0.40479516983032227
    },
    {
      "epoch": 0.000285467529296875,
      "model_forward_time": 0.11490321159362793,
      "step": 46771
    },
    {
      "epoch": 0.000285467529296875,
      "step": 46771,
      "training_step_time": 0.40161919593811035
    },
    {
      "epoch": 0.0002854736328125,
      "model_forward_time": 0.11538314819335938,
      "step": 46772
    },
    {
      "epoch": 0.0002854736328125,
      "step": 46772,
      "training_step_time": 0.40939974784851074
    },
    {
      "epoch": 0.000285479736328125,
      "model_forward_time": 0.11492156982421875,
      "step": 46773
    },
    {
      "epoch": 0.000285479736328125,
      "step": 46773,
      "training_step_time": 0.38900113105773926
    },
    {
      "epoch": 0.00028548583984375,
      "model_forward_time": 0.11472773551940918,
      "step": 46774
    },
    {
      "epoch": 0.00028548583984375,
      "step": 46774,
      "training_step_time": 0.4006993770599365
    },
    {
      "epoch": 0.000285491943359375,
      "model_forward_time": 0.1161048412322998,
      "step": 46775
    },
    {
      "epoch": 0.000285491943359375,
      "step": 46775,
      "training_step_time": 1.030358076095581
    },
    {
      "epoch": 0.000285498046875,
      "model_forward_time": 0.11448287963867188,
      "step": 46776
    },
    {
      "epoch": 0.000285498046875,
      "step": 46776,
      "training_step_time": 0.4203798770904541
    },
    {
      "epoch": 0.000285504150390625,
      "model_forward_time": 0.11466598510742188,
      "step": 46777
    },
    {
      "epoch": 0.000285504150390625,
      "step": 46777,
      "training_step_time": 0.43392062187194824
    },
    {
      "epoch": 0.00028551025390625,
      "model_forward_time": 0.11516070365905762,
      "step": 46778
    },
    {
      "epoch": 0.00028551025390625,
      "step": 46778,
      "training_step_time": 0.4691293239593506
    },
    {
      "epoch": 0.000285516357421875,
      "model_forward_time": 0.11397647857666016,
      "step": 46779
    },
    {
      "epoch": 0.000285516357421875,
      "step": 46779,
      "training_step_time": 0.36863112449645996
    },
    {
      "epoch": 0.0002855224609375,
      "grad_norm": 0.09672216325998306,
      "learning_rate": 1.2695602098432502e-05,
      "loss": 0.0362,
      "step": 46780
    },
    {
      "epoch": 0.0002855224609375,
      "model_forward_time": 0.11489200592041016,
      "step": 46780
    },
    {
      "epoch": 0.0002855224609375,
      "step": 46780,
      "training_step_time": 0.3767237663269043
    },
    {
      "epoch": 0.000285528564453125,
      "model_forward_time": 0.11509561538696289,
      "step": 46781
    },
    {
      "epoch": 0.000285528564453125,
      "step": 46781,
      "training_step_time": 0.5954480171203613
    },
    {
      "epoch": 0.00028553466796875,
      "model_forward_time": 0.11412787437438965,
      "step": 46782
    },
    {
      "epoch": 0.00028553466796875,
      "step": 46782,
      "training_step_time": 0.3954644203186035
    },
    {
      "epoch": 0.000285540771484375,
      "model_forward_time": 0.11410188674926758,
      "step": 46783
    },
    {
      "epoch": 0.000285540771484375,
      "step": 46783,
      "training_step_time": 0.3899092674255371
    },
    {
      "epoch": 0.000285546875,
      "model_forward_time": 0.11424612998962402,
      "step": 46784
    },
    {
      "epoch": 0.000285546875,
      "step": 46784,
      "training_step_time": 0.38650941848754883
    },
    {
      "epoch": 0.000285552978515625,
      "model_forward_time": 0.11547255516052246,
      "step": 46785
    },
    {
      "epoch": 0.000285552978515625,
      "step": 46785,
      "training_step_time": 0.3830680847167969
    },
    {
      "epoch": 0.00028555908203125,
      "model_forward_time": 0.11434435844421387,
      "step": 46786
    },
    {
      "epoch": 0.00028555908203125,
      "step": 46786,
      "training_step_time": 0.38897085189819336
    },
    {
      "epoch": 0.000285565185546875,
      "model_forward_time": 0.11467337608337402,
      "step": 46787
    },
    {
      "epoch": 0.000285565185546875,
      "step": 46787,
      "training_step_time": 1.1252307891845703
    },
    {
      "epoch": 0.0002855712890625,
      "model_forward_time": 0.11366462707519531,
      "step": 46788
    },
    {
      "epoch": 0.0002855712890625,
      "step": 46788,
      "training_step_time": 0.44475603103637695
    },
    {
      "epoch": 0.000285577392578125,
      "model_forward_time": 0.11357593536376953,
      "step": 46789
    },
    {
      "epoch": 0.000285577392578125,
      "step": 46789,
      "training_step_time": 0.44635486602783203
    },
    {
      "epoch": 0.00028558349609375,
      "grad_norm": 0.09602846950292587,
      "learning_rate": 1.2677258455780683e-05,
      "loss": 0.0344,
      "step": 46790
    },
    {
      "epoch": 0.00028558349609375,
      "model_forward_time": 0.1132206916809082,
      "step": 46790
    },
    {
      "epoch": 0.00028558349609375,
      "step": 46790,
      "training_step_time": 0.47057414054870605
    },
    {
      "epoch": 0.000285589599609375,
      "model_forward_time": 0.11364889144897461,
      "step": 46791
    },
    {
      "epoch": 0.000285589599609375,
      "step": 46791,
      "training_step_time": 0.3800020217895508
    },
    {
      "epoch": 0.000285595703125,
      "model_forward_time": 0.11406087875366211,
      "step": 46792
    },
    {
      "epoch": 0.000285595703125,
      "step": 46792,
      "training_step_time": 0.38255929946899414
    },
    {
      "epoch": 0.000285601806640625,
      "model_forward_time": 0.1141517162322998,
      "step": 46793
    },
    {
      "epoch": 0.000285601806640625,
      "step": 46793,
      "training_step_time": 0.70937180519104
    },
    {
      "epoch": 0.00028560791015625,
      "model_forward_time": 0.1141810417175293,
      "step": 46794
    },
    {
      "epoch": 0.00028560791015625,
      "step": 46794,
      "training_step_time": 0.386824369430542
    },
    {
      "epoch": 0.000285614013671875,
      "model_forward_time": 0.11377406120300293,
      "step": 46795
    },
    {
      "epoch": 0.000285614013671875,
      "step": 46795,
      "training_step_time": 0.3863091468811035
    },
    {
      "epoch": 0.0002856201171875,
      "model_forward_time": 0.1149129867553711,
      "step": 46796
    },
    {
      "epoch": 0.0002856201171875,
      "step": 46796,
      "training_step_time": 0.39496779441833496
    },
    {
      "epoch": 0.000285626220703125,
      "model_forward_time": 0.11401796340942383,
      "step": 46797
    },
    {
      "epoch": 0.000285626220703125,
      "step": 46797,
      "training_step_time": 0.3915128707885742
    },
    {
      "epoch": 0.00028563232421875,
      "model_forward_time": 0.11483931541442871,
      "step": 46798
    },
    {
      "epoch": 0.00028563232421875,
      "step": 46798,
      "training_step_time": 0.3857455253601074
    },
    {
      "epoch": 0.000285638427734375,
      "model_forward_time": 0.11510014533996582,
      "step": 46799
    },
    {
      "epoch": 0.000285638427734375,
      "step": 46799,
      "training_step_time": 0.5250890254974365
    },
    {
      "epoch": 0.00028564453125,
      "grad_norm": 0.12223093211650848,
      "learning_rate": 1.2658926150792322e-05,
      "loss": 0.0362,
      "step": 46800
    },
    {
      "epoch": 0.00028564453125,
      "model_forward_time": 0.11502265930175781,
      "step": 46800
    },
    {
      "epoch": 0.00028564453125,
      "step": 46800,
      "training_step_time": 0.4463379383087158
    },
    {
      "epoch": 0.000285650634765625,
      "model_forward_time": 0.11457157135009766,
      "step": 46801
    },
    {
      "epoch": 0.000285650634765625,
      "step": 46801,
      "training_step_time": 0.4347209930419922
    },
    {
      "epoch": 0.00028565673828125,
      "model_forward_time": 0.115020751953125,
      "step": 46802
    },
    {
      "epoch": 0.00028565673828125,
      "step": 46802,
      "training_step_time": 0.3673708438873291
    },
    {
      "epoch": 0.000285662841796875,
      "model_forward_time": 0.11614322662353516,
      "step": 46803
    },
    {
      "epoch": 0.000285662841796875,
      "step": 46803,
      "training_step_time": 0.4720759391784668
    },
    {
      "epoch": 0.0002856689453125,
      "model_forward_time": 0.11414313316345215,
      "step": 46804
    },
    {
      "epoch": 0.0002856689453125,
      "step": 46804,
      "training_step_time": 0.4023306369781494
    },
    {
      "epoch": 0.000285675048828125,
      "model_forward_time": 0.11480951309204102,
      "step": 46805
    },
    {
      "epoch": 0.000285675048828125,
      "step": 46805,
      "training_step_time": 0.519538402557373
    },
    {
      "epoch": 0.00028568115234375,
      "model_forward_time": 0.1142268180847168,
      "step": 46806
    },
    {
      "epoch": 0.00028568115234375,
      "step": 46806,
      "training_step_time": 0.3789510726928711
    },
    {
      "epoch": 0.000285687255859375,
      "model_forward_time": 0.1147160530090332,
      "step": 46807
    },
    {
      "epoch": 0.000285687255859375,
      "step": 46807,
      "training_step_time": 0.38287806510925293
    },
    {
      "epoch": 0.000285693359375,
      "model_forward_time": 0.11509275436401367,
      "step": 46808
    },
    {
      "epoch": 0.000285693359375,
      "step": 46808,
      "training_step_time": 0.37630200386047363
    },
    {
      "epoch": 0.000285699462890625,
      "model_forward_time": 0.11571216583251953,
      "step": 46809
    },
    {
      "epoch": 0.000285699462890625,
      "step": 46809,
      "training_step_time": 0.3867807388305664
    },
    {
      "epoch": 0.00028570556640625,
      "grad_norm": 0.121140256524086,
      "learning_rate": 1.2640605189036232e-05,
      "loss": 0.0369,
      "step": 46810
    },
    {
      "epoch": 0.00028570556640625,
      "model_forward_time": 0.11567854881286621,
      "step": 46810
    },
    {
      "epoch": 0.00028570556640625,
      "step": 46810,
      "training_step_time": 0.39981818199157715
    },
    {
      "epoch": 0.000285711669921875,
      "model_forward_time": 0.11481070518493652,
      "step": 46811
    },
    {
      "epoch": 0.000285711669921875,
      "step": 46811,
      "training_step_time": 0.5602762699127197
    },
    {
      "epoch": 0.0002857177734375,
      "model_forward_time": 0.11607551574707031,
      "step": 46812
    },
    {
      "epoch": 0.0002857177734375,
      "step": 46812,
      "training_step_time": 0.4122781753540039
    },
    {
      "epoch": 0.000285723876953125,
      "model_forward_time": 0.11477398872375488,
      "step": 46813
    },
    {
      "epoch": 0.000285723876953125,
      "step": 46813,
      "training_step_time": 0.39541125297546387
    },
    {
      "epoch": 0.00028572998046875,
      "model_forward_time": 0.11514973640441895,
      "step": 46814
    },
    {
      "epoch": 0.00028572998046875,
      "step": 46814,
      "training_step_time": 0.3921527862548828
    },
    {
      "epoch": 0.000285736083984375,
      "model_forward_time": 0.1143028736114502,
      "step": 46815
    },
    {
      "epoch": 0.000285736083984375,
      "step": 46815,
      "training_step_time": 0.45120835304260254
    },
    {
      "epoch": 0.0002857421875,
      "model_forward_time": 0.1146996021270752,
      "step": 46816
    },
    {
      "epoch": 0.0002857421875,
      "step": 46816,
      "training_step_time": 0.3975558280944824
    },
    {
      "epoch": 0.000285748291015625,
      "model_forward_time": 0.1149451732635498,
      "step": 46817
    },
    {
      "epoch": 0.000285748291015625,
      "step": 46817,
      "training_step_time": 0.4329185485839844
    },
    {
      "epoch": 0.00028575439453125,
      "model_forward_time": 0.11447930335998535,
      "step": 46818
    },
    {
      "epoch": 0.00028575439453125,
      "step": 46818,
      "training_step_time": 0.45421481132507324
    },
    {
      "epoch": 0.000285760498046875,
      "model_forward_time": 0.11533689498901367,
      "step": 46819
    },
    {
      "epoch": 0.000285760498046875,
      "step": 46819,
      "training_step_time": 0.38953685760498047
    },
    {
      "epoch": 0.0002857666015625,
      "grad_norm": 0.07535265386104584,
      "learning_rate": 1.2622295576077886e-05,
      "loss": 0.0324,
      "step": 46820
    },
    {
      "epoch": 0.0002857666015625,
      "model_forward_time": 0.11458086967468262,
      "step": 46820
    },
    {
      "epoch": 0.0002857666015625,
      "step": 46820,
      "training_step_time": 0.37746262550354004
    },
    {
      "epoch": 0.000285772705078125,
      "model_forward_time": 0.11503839492797852,
      "step": 46821
    },
    {
      "epoch": 0.000285772705078125,
      "step": 46821,
      "training_step_time": 0.3949456214904785
    },
    {
      "epoch": 0.00028577880859375,
      "model_forward_time": 0.11511540412902832,
      "step": 46822
    },
    {
      "epoch": 0.00028577880859375,
      "step": 46822,
      "training_step_time": 0.39754581451416016
    },
    {
      "epoch": 0.000285784912109375,
      "model_forward_time": 0.11483359336853027,
      "step": 46823
    },
    {
      "epoch": 0.000285784912109375,
      "step": 46823,
      "training_step_time": 0.5392043590545654
    },
    {
      "epoch": 0.000285791015625,
      "model_forward_time": 0.1147928237915039,
      "step": 46824
    },
    {
      "epoch": 0.000285791015625,
      "step": 46824,
      "training_step_time": 0.3952157497406006
    },
    {
      "epoch": 0.000285797119140625,
      "model_forward_time": 0.11519336700439453,
      "step": 46825
    },
    {
      "epoch": 0.000285797119140625,
      "step": 46825,
      "training_step_time": 0.4191758632659912
    },
    {
      "epoch": 0.00028580322265625,
      "model_forward_time": 0.11468672752380371,
      "step": 46826
    },
    {
      "epoch": 0.00028580322265625,
      "step": 46826,
      "training_step_time": 0.40804576873779297
    },
    {
      "epoch": 0.000285809326171875,
      "model_forward_time": 0.1154019832611084,
      "step": 46827
    },
    {
      "epoch": 0.000285809326171875,
      "step": 46827,
      "training_step_time": 0.3908252716064453
    },
    {
      "epoch": 0.0002858154296875,
      "model_forward_time": 0.1147146224975586,
      "step": 46828
    },
    {
      "epoch": 0.0002858154296875,
      "step": 46828,
      "training_step_time": 0.39711618423461914
    },
    {
      "epoch": 0.000285821533203125,
      "model_forward_time": 0.11455321311950684,
      "step": 46829
    },
    {
      "epoch": 0.000285821533203125,
      "step": 46829,
      "training_step_time": 0.5122098922729492
    },
    {
      "epoch": 0.00028582763671875,
      "grad_norm": 0.13018307089805603,
      "learning_rate": 1.2603997317479238e-05,
      "loss": 0.0399,
      "step": 46830
    },
    {
      "epoch": 0.00028582763671875,
      "model_forward_time": 0.11528682708740234,
      "step": 46830
    },
    {
      "epoch": 0.00028582763671875,
      "step": 46830,
      "training_step_time": 0.4241616725921631
    },
    {
      "epoch": 0.000285833740234375,
      "model_forward_time": 0.11478447914123535,
      "step": 46831
    },
    {
      "epoch": 0.000285833740234375,
      "step": 46831,
      "training_step_time": 0.45195865631103516
    },
    {
      "epoch": 0.00028583984375,
      "model_forward_time": 0.11528587341308594,
      "step": 46832
    },
    {
      "epoch": 0.00028583984375,
      "step": 46832,
      "training_step_time": 0.478804349899292
    },
    {
      "epoch": 0.000285845947265625,
      "model_forward_time": 0.11496591567993164,
      "step": 46833
    },
    {
      "epoch": 0.000285845947265625,
      "step": 46833,
      "training_step_time": 0.4828922748565674
    },
    {
      "epoch": 0.00028585205078125,
      "model_forward_time": 0.11504340171813965,
      "step": 46834
    },
    {
      "epoch": 0.00028585205078125,
      "step": 46834,
      "training_step_time": 0.3898775577545166
    },
    {
      "epoch": 0.000285858154296875,
      "model_forward_time": 0.11458897590637207,
      "step": 46835
    },
    {
      "epoch": 0.000285858154296875,
      "step": 46835,
      "training_step_time": 0.42406487464904785
    },
    {
      "epoch": 0.0002858642578125,
      "model_forward_time": 0.11462783813476562,
      "step": 46836
    },
    {
      "epoch": 0.0002858642578125,
      "step": 46836,
      "training_step_time": 0.3892202377319336
    },
    {
      "epoch": 0.000285870361328125,
      "model_forward_time": 0.11504340171813965,
      "step": 46837
    },
    {
      "epoch": 0.000285870361328125,
      "step": 46837,
      "training_step_time": 0.3951301574707031
    },
    {
      "epoch": 0.00028587646484375,
      "model_forward_time": 0.11524033546447754,
      "step": 46838
    },
    {
      "epoch": 0.00028587646484375,
      "step": 46838,
      "training_step_time": 0.402010440826416
    },
    {
      "epoch": 0.000285882568359375,
      "model_forward_time": 0.1159210205078125,
      "step": 46839
    },
    {
      "epoch": 0.000285882568359375,
      "step": 46839,
      "training_step_time": 0.4048941135406494
    },
    {
      "epoch": 0.000285888671875,
      "grad_norm": 0.09046506881713867,
      "learning_rate": 1.2585710418798823e-05,
      "loss": 0.0342,
      "step": 46840
    },
    {
      "epoch": 0.000285888671875,
      "model_forward_time": 0.11484289169311523,
      "step": 46840
    },
    {
      "epoch": 0.000285888671875,
      "step": 46840,
      "training_step_time": 0.47805023193359375
    },
    {
      "epoch": 0.000285894775390625,
      "model_forward_time": 0.11546730995178223,
      "step": 46841
    },
    {
      "epoch": 0.000285894775390625,
      "step": 46841,
      "training_step_time": 0.4602549076080322
    },
    {
      "epoch": 0.00028590087890625,
      "model_forward_time": 0.11502575874328613,
      "step": 46842
    },
    {
      "epoch": 0.00028590087890625,
      "step": 46842,
      "training_step_time": 0.3940896987915039
    },
    {
      "epoch": 0.000285906982421875,
      "model_forward_time": 0.11557674407958984,
      "step": 46843
    },
    {
      "epoch": 0.000285906982421875,
      "step": 46843,
      "training_step_time": 0.39818668365478516
    },
    {
      "epoch": 0.0002859130859375,
      "model_forward_time": 0.11510586738586426,
      "step": 46844
    },
    {
      "epoch": 0.0002859130859375,
      "step": 46844,
      "training_step_time": 0.44031572341918945
    },
    {
      "epoch": 0.000285919189453125,
      "model_forward_time": 0.1149909496307373,
      "step": 46845
    },
    {
      "epoch": 0.000285919189453125,
      "step": 46845,
      "training_step_time": 0.435344934463501
    },
    {
      "epoch": 0.00028592529296875,
      "model_forward_time": 0.11477494239807129,
      "step": 46846
    },
    {
      "epoch": 0.00028592529296875,
      "step": 46846,
      "training_step_time": 0.4023001194000244
    },
    {
      "epoch": 0.000285931396484375,
      "model_forward_time": 0.11499667167663574,
      "step": 46847
    },
    {
      "epoch": 0.000285931396484375,
      "step": 46847,
      "training_step_time": 0.5449929237365723
    },
    {
      "epoch": 0.0002859375,
      "model_forward_time": 0.11563491821289062,
      "step": 46848
    },
    {
      "epoch": 0.0002859375,
      "step": 46848,
      "training_step_time": 0.3915135860443115
    },
    {
      "epoch": 0.000285943603515625,
      "model_forward_time": 0.11452603340148926,
      "step": 46849
    },
    {
      "epoch": 0.000285943603515625,
      "step": 46849,
      "training_step_time": 0.39565134048461914
    },
    {
      "epoch": 0.00028594970703125,
      "grad_norm": 0.09252053499221802,
      "learning_rate": 1.2567434885591722e-05,
      "loss": 0.0345,
      "step": 46850
    },
    {
      "epoch": 0.00028594970703125,
      "model_forward_time": 0.11443448066711426,
      "step": 46850
    },
    {
      "epoch": 0.00028594970703125,
      "step": 46850,
      "training_step_time": 0.3997161388397217
    },
    {
      "epoch": 0.000285955810546875,
      "model_forward_time": 0.11497306823730469,
      "step": 46851
    },
    {
      "epoch": 0.000285955810546875,
      "step": 46851,
      "training_step_time": 0.3895854949951172
    },
    {
      "epoch": 0.0002859619140625,
      "model_forward_time": 0.11539530754089355,
      "step": 46852
    },
    {
      "epoch": 0.0002859619140625,
      "step": 46852,
      "training_step_time": 0.3905456066131592
    },
    {
      "epoch": 0.000285968017578125,
      "model_forward_time": 0.11483907699584961,
      "step": 46853
    },
    {
      "epoch": 0.000285968017578125,
      "step": 46853,
      "training_step_time": 0.5580408573150635
    },
    {
      "epoch": 0.00028597412109375,
      "model_forward_time": 0.11516523361206055,
      "step": 46854
    },
    {
      "epoch": 0.00028597412109375,
      "step": 46854,
      "training_step_time": 0.399669885635376
    },
    {
      "epoch": 0.000285980224609375,
      "model_forward_time": 0.11498856544494629,
      "step": 46855
    },
    {
      "epoch": 0.000285980224609375,
      "step": 46855,
      "training_step_time": 0.39728498458862305
    },
    {
      "epoch": 0.000285986328125,
      "model_forward_time": 0.11523962020874023,
      "step": 46856
    },
    {
      "epoch": 0.000285986328125,
      "step": 46856,
      "training_step_time": 0.40589237213134766
    },
    {
      "epoch": 0.000285992431640625,
      "model_forward_time": 0.11500763893127441,
      "step": 46857
    },
    {
      "epoch": 0.000285992431640625,
      "step": 46857,
      "training_step_time": 0.38879895210266113
    },
    {
      "epoch": 0.00028599853515625,
      "model_forward_time": 0.11526608467102051,
      "step": 46858
    },
    {
      "epoch": 0.00028599853515625,
      "step": 46858,
      "training_step_time": 0.43744421005249023
    },
    {
      "epoch": 0.000286004638671875,
      "model_forward_time": 0.1146543025970459,
      "step": 46859
    },
    {
      "epoch": 0.000286004638671875,
      "step": 46859,
      "training_step_time": 0.8119244575500488
    },
    {
      "epoch": 0.0002860107421875,
      "grad_norm": 0.0756116583943367,
      "learning_rate": 1.2549170723409549e-05,
      "loss": 0.0362,
      "step": 46860
    },
    {
      "epoch": 0.0002860107421875,
      "model_forward_time": 0.11484146118164062,
      "step": 46860
    },
    {
      "epoch": 0.0002860107421875,
      "step": 46860,
      "training_step_time": 0.4107198715209961
    },
    {
      "epoch": 0.000286016845703125,
      "model_forward_time": 0.11527013778686523,
      "step": 46861
    },
    {
      "epoch": 0.000286016845703125,
      "step": 46861,
      "training_step_time": 0.42928385734558105
    },
    {
      "epoch": 0.00028602294921875,
      "model_forward_time": 0.11496472358703613,
      "step": 46862
    },
    {
      "epoch": 0.00028602294921875,
      "step": 46862,
      "training_step_time": 0.3753325939178467
    },
    {
      "epoch": 0.000286029052734375,
      "model_forward_time": 0.1157233715057373,
      "step": 46863
    },
    {
      "epoch": 0.000286029052734375,
      "step": 46863,
      "training_step_time": 0.41623473167419434
    },
    {
      "epoch": 0.00028603515625,
      "model_forward_time": 0.11575078964233398,
      "step": 46864
    },
    {
      "epoch": 0.00028603515625,
      "step": 46864,
      "training_step_time": 0.44928407669067383
    },
    {
      "epoch": 0.000286041259765625,
      "model_forward_time": 0.1173710823059082,
      "step": 46865
    },
    {
      "epoch": 0.000286041259765625,
      "step": 46865,
      "training_step_time": 0.6767182350158691
    },
    {
      "epoch": 0.00028604736328125,
      "model_forward_time": 0.11595010757446289,
      "step": 46866
    },
    {
      "epoch": 0.00028604736328125,
      "step": 46866,
      "training_step_time": 0.6842293739318848
    },
    {
      "epoch": 0.000286053466796875,
      "model_forward_time": 0.12295866012573242,
      "step": 46867
    },
    {
      "epoch": 0.000286053466796875,
      "step": 46867,
      "training_step_time": 0.7818965911865234
    },
    {
      "epoch": 0.0002860595703125,
      "model_forward_time": 0.11709332466125488,
      "step": 46868
    },
    {
      "epoch": 0.0002860595703125,
      "step": 46868,
      "training_step_time": 0.7071700096130371
    },
    {
      "epoch": 0.000286065673828125,
      "model_forward_time": 0.1188511848449707,
      "step": 46869
    },
    {
      "epoch": 0.000286065673828125,
      "step": 46869,
      "training_step_time": 0.7184402942657471
    },
    {
      "epoch": 0.00028607177734375,
      "grad_norm": 0.08263949304819107,
      "learning_rate": 1.2530917937800463e-05,
      "loss": 0.0338,
      "step": 46870
    },
    {
      "epoch": 0.00028607177734375,
      "model_forward_time": 0.12020421028137207,
      "step": 46870
    },
    {
      "epoch": 0.00028607177734375,
      "step": 46870,
      "training_step_time": 0.5863125324249268
    },
    {
      "epoch": 0.000286077880859375,
      "model_forward_time": 0.11740589141845703,
      "step": 46871
    },
    {
      "epoch": 0.000286077880859375,
      "step": 46871,
      "training_step_time": 0.6641762256622314
    },
    {
      "epoch": 0.000286083984375,
      "model_forward_time": 0.13088726997375488,
      "step": 46872
    },
    {
      "epoch": 0.000286083984375,
      "step": 46872,
      "training_step_time": 0.6660470962524414
    },
    {
      "epoch": 0.000286090087890625,
      "model_forward_time": 0.1160128116607666,
      "step": 46873
    },
    {
      "epoch": 0.000286090087890625,
      "step": 46873,
      "training_step_time": 0.7069947719573975
    },
    {
      "epoch": 0.00028609619140625,
      "model_forward_time": 0.11583185195922852,
      "step": 46874
    },
    {
      "epoch": 0.00028609619140625,
      "step": 46874,
      "training_step_time": 0.690718412399292
    },
    {
      "epoch": 0.000286102294921875,
      "model_forward_time": 0.12182998657226562,
      "step": 46875
    },
    {
      "epoch": 0.000286102294921875,
      "step": 46875,
      "training_step_time": 0.657329797744751
    },
    {
      "epoch": 0.0002861083984375,
      "model_forward_time": 0.11989903450012207,
      "step": 46876
    },
    {
      "epoch": 0.0002861083984375,
      "step": 46876,
      "training_step_time": 0.5834789276123047
    },
    {
      "epoch": 0.000286114501953125,
      "model_forward_time": 0.12247323989868164,
      "step": 46877
    },
    {
      "epoch": 0.000286114501953125,
      "step": 46877,
      "training_step_time": 0.6760296821594238
    },
    {
      "epoch": 0.00028612060546875,
      "model_forward_time": 0.12057375907897949,
      "step": 46878
    },
    {
      "epoch": 0.00028612060546875,
      "step": 46878,
      "training_step_time": 0.730048656463623
    },
    {
      "epoch": 0.000286126708984375,
      "model_forward_time": 0.12878847122192383,
      "step": 46879
    },
    {
      "epoch": 0.000286126708984375,
      "step": 46879,
      "training_step_time": 0.6664164066314697
    },
    {
      "epoch": 0.0002861328125,
      "grad_norm": 0.08553595095872879,
      "learning_rate": 1.251267653430922e-05,
      "loss": 0.0387,
      "step": 46880
    },
    {
      "epoch": 0.0002861328125,
      "model_forward_time": 0.11906266212463379,
      "step": 46880
    },
    {
      "epoch": 0.0002861328125,
      "step": 46880,
      "training_step_time": 0.7392439842224121
    },
    {
      "epoch": 0.000286138916015625,
      "model_forward_time": 0.11859965324401855,
      "step": 46881
    },
    {
      "epoch": 0.000286138916015625,
      "step": 46881,
      "training_step_time": 0.564450740814209
    },
    {
      "epoch": 0.00028614501953125,
      "model_forward_time": 0.11963582038879395,
      "step": 46882
    },
    {
      "epoch": 0.00028614501953125,
      "step": 46882,
      "training_step_time": 0.6365678310394287
    },
    {
      "epoch": 0.000286151123046875,
      "model_forward_time": 0.12162971496582031,
      "step": 46883
    },
    {
      "epoch": 0.000286151123046875,
      "step": 46883,
      "training_step_time": 0.7314608097076416
    },
    {
      "epoch": 0.0002861572265625,
      "model_forward_time": 0.11580634117126465,
      "step": 46884
    },
    {
      "epoch": 0.0002861572265625,
      "step": 46884,
      "training_step_time": 0.6465928554534912
    },
    {
      "epoch": 0.000286163330078125,
      "model_forward_time": 0.11892485618591309,
      "step": 46885
    },
    {
      "epoch": 0.000286163330078125,
      "step": 46885,
      "training_step_time": 0.7525310516357422
    },
    {
      "epoch": 0.00028616943359375,
      "model_forward_time": 0.12019634246826172,
      "step": 46886
    },
    {
      "epoch": 0.00028616943359375,
      "step": 46886,
      "training_step_time": 0.6299340724945068
    },
    {
      "epoch": 0.000286175537109375,
      "model_forward_time": 0.11763405799865723,
      "step": 46887
    },
    {
      "epoch": 0.000286175537109375,
      "step": 46887,
      "training_step_time": 0.6814610958099365
    },
    {
      "epoch": 0.000286181640625,
      "model_forward_time": 0.1193544864654541,
      "step": 46888
    },
    {
      "epoch": 0.000286181640625,
      "step": 46888,
      "training_step_time": 0.6383867263793945
    },
    {
      "epoch": 0.000286187744140625,
      "model_forward_time": 0.11780428886413574,
      "step": 46889
    },
    {
      "epoch": 0.000286187744140625,
      "step": 46889,
      "training_step_time": 0.7216155529022217
    },
    {
      "epoch": 0.00028619384765625,
      "grad_norm": 0.11078961193561554,
      "learning_rate": 1.2494446518477022e-05,
      "loss": 0.0353,
      "step": 46890
    },
    {
      "epoch": 0.00028619384765625,
      "model_forward_time": 0.11922836303710938,
      "step": 46890
    },
    {
      "epoch": 0.00028619384765625,
      "step": 46890,
      "training_step_time": 0.660980224609375
    },
    {
      "epoch": 0.000286199951171875,
      "model_forward_time": 0.12041997909545898,
      "step": 46891
    },
    {
      "epoch": 0.000286199951171875,
      "step": 46891,
      "training_step_time": 0.6728007793426514
    },
    {
      "epoch": 0.0002862060546875,
      "model_forward_time": 0.1193094253540039,
      "step": 46892
    },
    {
      "epoch": 0.0002862060546875,
      "step": 46892,
      "training_step_time": 0.6378631591796875
    },
    {
      "epoch": 0.000286212158203125,
      "model_forward_time": 0.11607742309570312,
      "step": 46893
    },
    {
      "epoch": 0.000286212158203125,
      "step": 46893,
      "training_step_time": 0.6572391986846924
    },
    {
      "epoch": 0.00028621826171875,
      "model_forward_time": 0.12165260314941406,
      "step": 46894
    },
    {
      "epoch": 0.00028621826171875,
      "step": 46894,
      "training_step_time": 0.6705749034881592
    },
    {
      "epoch": 0.000286224365234375,
      "model_forward_time": 0.11628556251525879,
      "step": 46895
    },
    {
      "epoch": 0.000286224365234375,
      "step": 46895,
      "training_step_time": 0.6496293544769287
    },
    {
      "epoch": 0.00028623046875,
      "model_forward_time": 0.12182736396789551,
      "step": 46896
    },
    {
      "epoch": 0.00028623046875,
      "step": 46896,
      "training_step_time": 0.6326427459716797
    },
    {
      "epoch": 0.000286236572265625,
      "model_forward_time": 0.11619067192077637,
      "step": 46897
    },
    {
      "epoch": 0.000286236572265625,
      "step": 46897,
      "training_step_time": 0.7127025127410889
    },
    {
      "epoch": 0.00028624267578125,
      "model_forward_time": 0.1255354881286621,
      "step": 46898
    },
    {
      "epoch": 0.00028624267578125,
      "step": 46898,
      "training_step_time": 0.7403302192687988
    },
    {
      "epoch": 0.000286248779296875,
      "model_forward_time": 0.1190643310546875,
      "step": 46899
    },
    {
      "epoch": 0.000286248779296875,
      "step": 46899,
      "training_step_time": 0.6395823955535889
    },
    {
      "epoch": 0.0002862548828125,
      "grad_norm": 0.11501464247703552,
      "learning_rate": 1.2476227895841713e-05,
      "loss": 0.0375,
      "step": 46900
    },
    {
      "epoch": 0.0002862548828125,
      "model_forward_time": 0.11679363250732422,
      "step": 46900
    },
    {
      "epoch": 0.0002862548828125,
      "step": 46900,
      "training_step_time": 0.6342775821685791
    },
    {
      "epoch": 0.000286260986328125,
      "model_forward_time": 0.11847448348999023,
      "step": 46901
    },
    {
      "epoch": 0.000286260986328125,
      "step": 46901,
      "training_step_time": 0.7197074890136719
    },
    {
      "epoch": 0.00028626708984375,
      "model_forward_time": 0.11795306205749512,
      "step": 46902
    },
    {
      "epoch": 0.00028626708984375,
      "step": 46902,
      "training_step_time": 0.6564981937408447
    },
    {
      "epoch": 0.000286273193359375,
      "model_forward_time": 0.11923336982727051,
      "step": 46903
    },
    {
      "epoch": 0.000286273193359375,
      "step": 46903,
      "training_step_time": 0.669191837310791
    },
    {
      "epoch": 0.000286279296875,
      "model_forward_time": 0.11818337440490723,
      "step": 46904
    },
    {
      "epoch": 0.000286279296875,
      "step": 46904,
      "training_step_time": 0.7384862899780273
    },
    {
      "epoch": 0.000286285400390625,
      "model_forward_time": 0.12058687210083008,
      "step": 46905
    },
    {
      "epoch": 0.000286285400390625,
      "step": 46905,
      "training_step_time": 0.6950197219848633
    },
    {
      "epoch": 0.00028629150390625,
      "model_forward_time": 0.1193537712097168,
      "step": 46906
    },
    {
      "epoch": 0.00028629150390625,
      "step": 46906,
      "training_step_time": 0.6763496398925781
    },
    {
      "epoch": 0.000286297607421875,
      "model_forward_time": 0.12814760208129883,
      "step": 46907
    },
    {
      "epoch": 0.000286297607421875,
      "step": 46907,
      "training_step_time": 0.7099449634552002
    },
    {
      "epoch": 0.0002863037109375,
      "model_forward_time": 0.12119317054748535,
      "step": 46908
    },
    {
      "epoch": 0.0002863037109375,
      "step": 46908,
      "training_step_time": 0.7722263336181641
    },
    {
      "epoch": 0.000286309814453125,
      "model_forward_time": 0.11726522445678711,
      "step": 46909
    },
    {
      "epoch": 0.000286309814453125,
      "step": 46909,
      "training_step_time": 0.7427070140838623
    },
    {
      "epoch": 0.00028631591796875,
      "grad_norm": 0.1111207976937294,
      "learning_rate": 1.245802067193761e-05,
      "loss": 0.0393,
      "step": 46910
    },
    {
      "epoch": 0.00028631591796875,
      "model_forward_time": 0.11605954170227051,
      "step": 46910
    },
    {
      "epoch": 0.00028631591796875,
      "step": 46910,
      "training_step_time": 0.6367096900939941
    },
    {
      "epoch": 0.000286322021484375,
      "model_forward_time": 0.11792278289794922,
      "step": 46911
    },
    {
      "epoch": 0.000286322021484375,
      "step": 46911,
      "training_step_time": 0.6654167175292969
    },
    {
      "epoch": 0.000286328125,
      "model_forward_time": 0.11841654777526855,
      "step": 46912
    },
    {
      "epoch": 0.000286328125,
      "step": 46912,
      "training_step_time": 0.8255865573883057
    },
    {
      "epoch": 0.000286334228515625,
      "model_forward_time": 0.1164705753326416,
      "step": 46913
    },
    {
      "epoch": 0.000286334228515625,
      "step": 46913,
      "training_step_time": 0.6530280113220215
    },
    {
      "epoch": 0.00028634033203125,
      "model_forward_time": 0.14395356178283691,
      "step": 46914
    },
    {
      "epoch": 0.00028634033203125,
      "step": 46914,
      "training_step_time": 0.6278274059295654
    },
    {
      "epoch": 0.000286346435546875,
      "model_forward_time": 0.11877226829528809,
      "step": 46915
    },
    {
      "epoch": 0.000286346435546875,
      "step": 46915,
      "training_step_time": 0.6048552989959717
    },
    {
      "epoch": 0.0002863525390625,
      "model_forward_time": 0.12375998497009277,
      "step": 46916
    },
    {
      "epoch": 0.0002863525390625,
      "step": 46916,
      "training_step_time": 0.6799972057342529
    },
    {
      "epoch": 0.000286358642578125,
      "model_forward_time": 0.1164100170135498,
      "step": 46917
    },
    {
      "epoch": 0.000286358642578125,
      "step": 46917,
      "training_step_time": 0.727907657623291
    },
    {
      "epoch": 0.00028636474609375,
      "model_forward_time": 0.11626434326171875,
      "step": 46918
    },
    {
      "epoch": 0.00028636474609375,
      "step": 46918,
      "training_step_time": 0.5908493995666504
    },
    {
      "epoch": 0.000286370849609375,
      "model_forward_time": 0.12104630470275879,
      "step": 46919
    },
    {
      "epoch": 0.000286370849609375,
      "step": 46919,
      "training_step_time": 0.7945694923400879
    },
    {
      "epoch": 0.000286376953125,
      "grad_norm": 0.1097882017493248,
      "learning_rate": 1.243982485229559e-05,
      "loss": 0.0413,
      "step": 46920
    },
    {
      "epoch": 0.000286376953125,
      "model_forward_time": 0.12008905410766602,
      "step": 46920
    },
    {
      "epoch": 0.000286376953125,
      "step": 46920,
      "training_step_time": 0.7486803531646729
    },
    {
      "epoch": 0.000286383056640625,
      "model_forward_time": 0.11725354194641113,
      "step": 46921
    },
    {
      "epoch": 0.000286383056640625,
      "step": 46921,
      "training_step_time": 0.6808371543884277
    },
    {
      "epoch": 0.00028638916015625,
      "model_forward_time": 0.12116193771362305,
      "step": 46922
    },
    {
      "epoch": 0.00028638916015625,
      "step": 46922,
      "training_step_time": 0.682823896408081
    },
    {
      "epoch": 0.000286395263671875,
      "model_forward_time": 0.13168883323669434,
      "step": 46923
    },
    {
      "epoch": 0.000286395263671875,
      "step": 46923,
      "training_step_time": 0.667320966720581
    },
    {
      "epoch": 0.0002864013671875,
      "model_forward_time": 0.11620020866394043,
      "step": 46924
    },
    {
      "epoch": 0.0002864013671875,
      "step": 46924,
      "training_step_time": 0.6700193881988525
    },
    {
      "epoch": 0.000286407470703125,
      "model_forward_time": 0.11970639228820801,
      "step": 46925
    },
    {
      "epoch": 0.000286407470703125,
      "step": 46925,
      "training_step_time": 0.6166121959686279
    },
    {
      "epoch": 0.00028641357421875,
      "model_forward_time": 0.11788511276245117,
      "step": 46926
    },
    {
      "epoch": 0.00028641357421875,
      "step": 46926,
      "training_step_time": 0.6909127235412598
    },
    {
      "epoch": 0.000286419677734375,
      "model_forward_time": 0.11950063705444336,
      "step": 46927
    },
    {
      "epoch": 0.000286419677734375,
      "step": 46927,
      "training_step_time": 0.8156790733337402
    },
    {
      "epoch": 0.00028642578125,
      "model_forward_time": 0.11844658851623535,
      "step": 46928
    },
    {
      "epoch": 0.00028642578125,
      "step": 46928,
      "training_step_time": 0.624077320098877
    },
    {
      "epoch": 0.000286431884765625,
      "model_forward_time": 0.11918115615844727,
      "step": 46929
    },
    {
      "epoch": 0.000286431884765625,
      "step": 46929,
      "training_step_time": 0.6607275009155273
    },
    {
      "epoch": 0.00028643798828125,
      "grad_norm": 0.09767087548971176,
      "learning_rate": 1.2421640442443055e-05,
      "loss": 0.0405,
      "step": 46930
    },
    {
      "epoch": 0.00028643798828125,
      "model_forward_time": 0.12097954750061035,
      "step": 46930
    },
    {
      "epoch": 0.00028643798828125,
      "step": 46930,
      "training_step_time": 0.5591914653778076
    },
    {
      "epoch": 0.000286444091796875,
      "model_forward_time": 0.12562274932861328,
      "step": 46931
    },
    {
      "epoch": 0.000286444091796875,
      "step": 46931,
      "training_step_time": 0.5575683116912842
    },
    {
      "epoch": 0.0002864501953125,
      "model_forward_time": 0.12101578712463379,
      "step": 46932
    },
    {
      "epoch": 0.0002864501953125,
      "step": 46932,
      "training_step_time": 0.5533506870269775
    },
    {
      "epoch": 0.000286456298828125,
      "model_forward_time": 0.11958718299865723,
      "step": 46933
    },
    {
      "epoch": 0.000286456298828125,
      "step": 46933,
      "training_step_time": 0.6027166843414307
    },
    {
      "epoch": 0.00028646240234375,
      "model_forward_time": 0.11935210227966309,
      "step": 46934
    },
    {
      "epoch": 0.00028646240234375,
      "step": 46934,
      "training_step_time": 0.5599520206451416
    },
    {
      "epoch": 0.000286468505859375,
      "model_forward_time": 0.1178140640258789,
      "step": 46935
    },
    {
      "epoch": 0.000286468505859375,
      "step": 46935,
      "training_step_time": 0.5826249122619629
    },
    {
      "epoch": 0.000286474609375,
      "model_forward_time": 0.11709475517272949,
      "step": 46936
    },
    {
      "epoch": 0.000286474609375,
      "step": 46936,
      "training_step_time": 0.6439969539642334
    },
    {
      "epoch": 0.000286480712890625,
      "model_forward_time": 0.11690759658813477,
      "step": 46937
    },
    {
      "epoch": 0.000286480712890625,
      "step": 46937,
      "training_step_time": 0.5111343860626221
    },
    {
      "epoch": 0.00028648681640625,
      "model_forward_time": 0.11588883399963379,
      "step": 46938
    },
    {
      "epoch": 0.00028648681640625,
      "step": 46938,
      "training_step_time": 0.37804079055786133
    },
    {
      "epoch": 0.000286492919921875,
      "model_forward_time": 0.11543035507202148,
      "step": 46939
    },
    {
      "epoch": 0.000286492919921875,
      "step": 46939,
      "training_step_time": 0.47217249870300293
    },
    {
      "epoch": 0.0002864990234375,
      "grad_norm": 0.10958612710237503,
      "learning_rate": 1.2403467447903943e-05,
      "loss": 0.0438,
      "step": 46940
    },
    {
      "epoch": 0.0002864990234375,
      "model_forward_time": 0.1149301528930664,
      "step": 46940
    },
    {
      "epoch": 0.0002864990234375,
      "step": 46940,
      "training_step_time": 0.42948174476623535
    },
    {
      "epoch": 0.000286505126953125,
      "model_forward_time": 0.11463809013366699,
      "step": 46941
    },
    {
      "epoch": 0.000286505126953125,
      "step": 46941,
      "training_step_time": 0.41850972175598145
    },
    {
      "epoch": 0.00028651123046875,
      "model_forward_time": 0.11557579040527344,
      "step": 46942
    },
    {
      "epoch": 0.00028651123046875,
      "step": 46942,
      "training_step_time": 0.40126895904541016
    },
    {
      "epoch": 0.000286517333984375,
      "model_forward_time": 0.11542868614196777,
      "step": 46943
    },
    {
      "epoch": 0.000286517333984375,
      "step": 46943,
      "training_step_time": 0.3770167827606201
    },
    {
      "epoch": 0.0002865234375,
      "model_forward_time": 0.11529421806335449,
      "step": 46944
    },
    {
      "epoch": 0.0002865234375,
      "step": 46944,
      "training_step_time": 0.404407262802124
    },
    {
      "epoch": 0.000286529541015625,
      "model_forward_time": 0.11602640151977539,
      "step": 46945
    },
    {
      "epoch": 0.000286529541015625,
      "step": 46945,
      "training_step_time": 0.3893725872039795
    },
    {
      "epoch": 0.00028653564453125,
      "model_forward_time": 0.11516952514648438,
      "step": 46946
    },
    {
      "epoch": 0.00028653564453125,
      "step": 46946,
      "training_step_time": 0.3797166347503662
    },
    {
      "epoch": 0.000286541748046875,
      "model_forward_time": 0.11541485786437988,
      "step": 46947
    },
    {
      "epoch": 0.000286541748046875,
      "step": 46947,
      "training_step_time": 0.3961167335510254
    },
    {
      "epoch": 0.0002865478515625,
      "model_forward_time": 0.1153569221496582,
      "step": 46948
    },
    {
      "epoch": 0.0002865478515625,
      "step": 46948,
      "training_step_time": 0.3916807174682617
    },
    {
      "epoch": 0.000286553955078125,
      "model_forward_time": 0.11484861373901367,
      "step": 46949
    },
    {
      "epoch": 0.000286553955078125,
      "step": 46949,
      "training_step_time": 0.3811380863189697
    },
    {
      "epoch": 0.00028656005859375,
      "grad_norm": 0.16069598495960236,
      "learning_rate": 1.2385305874198776e-05,
      "loss": 0.0421,
      "step": 46950
    },
    {
      "epoch": 0.00028656005859375,
      "model_forward_time": 0.11556553840637207,
      "step": 46950
    },
    {
      "epoch": 0.00028656005859375,
      "step": 46950,
      "training_step_time": 0.4257059097290039
    },
    {
      "epoch": 0.000286566162109375,
      "model_forward_time": 0.11527490615844727,
      "step": 46951
    },
    {
      "epoch": 0.000286566162109375,
      "step": 46951,
      "training_step_time": 0.46448779106140137
    },
    {
      "epoch": 0.000286572265625,
      "model_forward_time": 0.11504030227661133,
      "step": 46952
    },
    {
      "epoch": 0.000286572265625,
      "step": 46952,
      "training_step_time": 0.389467716217041
    },
    {
      "epoch": 0.000286578369140625,
      "model_forward_time": 0.11523008346557617,
      "step": 46953
    },
    {
      "epoch": 0.000286578369140625,
      "step": 46953,
      "training_step_time": 0.4004340171813965
    },
    {
      "epoch": 0.00028658447265625,
      "model_forward_time": 0.116058349609375,
      "step": 46954
    },
    {
      "epoch": 0.00028658447265625,
      "step": 46954,
      "training_step_time": 0.42774391174316406
    },
    {
      "epoch": 0.000286590576171875,
      "model_forward_time": 0.115814208984375,
      "step": 46955
    },
    {
      "epoch": 0.000286590576171875,
      "step": 46955,
      "training_step_time": 0.4388301372528076
    },
    {
      "epoch": 0.0002865966796875,
      "model_forward_time": 0.11511039733886719,
      "step": 46956
    },
    {
      "epoch": 0.0002865966796875,
      "step": 46956,
      "training_step_time": 0.3922438621520996
    },
    {
      "epoch": 0.000286602783203125,
      "model_forward_time": 0.1150054931640625,
      "step": 46957
    },
    {
      "epoch": 0.000286602783203125,
      "step": 46957,
      "training_step_time": 0.39536404609680176
    },
    {
      "epoch": 0.00028660888671875,
      "model_forward_time": 0.11457681655883789,
      "step": 46958
    },
    {
      "epoch": 0.00028660888671875,
      "step": 46958,
      "training_step_time": 0.39989614486694336
    },
    {
      "epoch": 0.000286614990234375,
      "model_forward_time": 0.11477231979370117,
      "step": 46959
    },
    {
      "epoch": 0.000286614990234375,
      "step": 46959,
      "training_step_time": 0.3868892192840576
    },
    {
      "epoch": 0.00028662109375,
      "grad_norm": 0.09317785501480103,
      "learning_rate": 1.2367155726844492e-05,
      "loss": 0.0379,
      "step": 46960
    },
    {
      "epoch": 0.00028662109375,
      "model_forward_time": 0.11658692359924316,
      "step": 46960
    },
    {
      "epoch": 0.00028662109375,
      "step": 46960,
      "training_step_time": 0.3861539363861084
    },
    {
      "epoch": 0.000286627197265625,
      "model_forward_time": 0.11553359031677246,
      "step": 46961
    },
    {
      "epoch": 0.000286627197265625,
      "step": 46961,
      "training_step_time": 0.3823580741882324
    },
    {
      "epoch": 0.00028663330078125,
      "model_forward_time": 0.11482810974121094,
      "step": 46962
    },
    {
      "epoch": 0.00028663330078125,
      "step": 46962,
      "training_step_time": 0.39400696754455566
    },
    {
      "epoch": 0.000286639404296875,
      "model_forward_time": 0.11656570434570312,
      "step": 46963
    },
    {
      "epoch": 0.000286639404296875,
      "step": 46963,
      "training_step_time": 0.39861321449279785
    },
    {
      "epoch": 0.0002866455078125,
      "model_forward_time": 0.11567449569702148,
      "step": 46964
    },
    {
      "epoch": 0.0002866455078125,
      "step": 46964,
      "training_step_time": 0.4053969383239746
    },
    {
      "epoch": 0.000286651611328125,
      "model_forward_time": 0.11541390419006348,
      "step": 46965
    },
    {
      "epoch": 0.000286651611328125,
      "step": 46965,
      "training_step_time": 0.415341854095459
    },
    {
      "epoch": 0.00028665771484375,
      "model_forward_time": 0.1158456802368164,
      "step": 46966
    },
    {
      "epoch": 0.00028665771484375,
      "step": 46966,
      "training_step_time": 0.402254581451416
    },
    {
      "epoch": 0.000286663818359375,
      "model_forward_time": 0.11497211456298828,
      "step": 46967
    },
    {
      "epoch": 0.000286663818359375,
      "step": 46967,
      "training_step_time": 0.469881534576416
    },
    {
      "epoch": 0.000286669921875,
      "model_forward_time": 0.11631417274475098,
      "step": 46968
    },
    {
      "epoch": 0.000286669921875,
      "step": 46968,
      "training_step_time": 0.4571201801300049
    },
    {
      "epoch": 0.000286676025390625,
      "model_forward_time": 0.11668252944946289,
      "step": 46969
    },
    {
      "epoch": 0.000286676025390625,
      "step": 46969,
      "training_step_time": 0.5236063003540039
    },
    {
      "epoch": 0.00028668212890625,
      "grad_norm": 0.08941449224948883,
      "learning_rate": 1.2349017011354675e-05,
      "loss": 0.0339,
      "step": 46970
    },
    {
      "epoch": 0.00028668212890625,
      "model_forward_time": 0.11661505699157715,
      "step": 46970
    },
    {
      "epoch": 0.00028668212890625,
      "step": 46970,
      "training_step_time": 0.4206089973449707
    },
    {
      "epoch": 0.000286688232421875,
      "model_forward_time": 0.11602377891540527,
      "step": 46971
    },
    {
      "epoch": 0.000286688232421875,
      "step": 46971,
      "training_step_time": 0.39261817932128906
    },
    {
      "epoch": 0.0002866943359375,
      "model_forward_time": 0.11596298217773438,
      "step": 46972
    },
    {
      "epoch": 0.0002866943359375,
      "step": 46972,
      "training_step_time": 0.3767123222351074
    },
    {
      "epoch": 0.000286700439453125,
      "model_forward_time": 0.11511063575744629,
      "step": 46973
    },
    {
      "epoch": 0.000286700439453125,
      "step": 46973,
      "training_step_time": 0.37731432914733887
    },
    {
      "epoch": 0.00028670654296875,
      "model_forward_time": 0.11472940444946289,
      "step": 46974
    },
    {
      "epoch": 0.00028670654296875,
      "step": 46974,
      "training_step_time": 0.38022613525390625
    },
    {
      "epoch": 0.000286712646484375,
      "model_forward_time": 0.11492681503295898,
      "step": 46975
    },
    {
      "epoch": 0.000286712646484375,
      "step": 46975,
      "training_step_time": 0.3921928405761719
    },
    {
      "epoch": 0.00028671875,
      "model_forward_time": 0.11567544937133789,
      "step": 46976
    },
    {
      "epoch": 0.00028671875,
      "step": 46976,
      "training_step_time": 0.3729071617126465
    },
    {
      "epoch": 0.000286724853515625,
      "model_forward_time": 0.11502695083618164,
      "step": 46977
    },
    {
      "epoch": 0.000286724853515625,
      "step": 46977,
      "training_step_time": 0.4511582851409912
    },
    {
      "epoch": 0.00028673095703125,
      "model_forward_time": 0.11571192741394043,
      "step": 46978
    },
    {
      "epoch": 0.00028673095703125,
      "step": 46978,
      "training_step_time": 0.3911750316619873
    },
    {
      "epoch": 0.000286737060546875,
      "model_forward_time": 0.11489415168762207,
      "step": 46979
    },
    {
      "epoch": 0.000286737060546875,
      "step": 46979,
      "training_step_time": 0.39148521423339844
    },
    {
      "epoch": 0.0002867431640625,
      "grad_norm": 0.10068026185035706,
      "learning_rate": 1.233088973323937e-05,
      "loss": 0.0375,
      "step": 46980
    },
    {
      "epoch": 0.0002867431640625,
      "model_forward_time": 0.11555886268615723,
      "step": 46980
    },
    {
      "epoch": 0.0002867431640625,
      "step": 46980,
      "training_step_time": 0.46297359466552734
    },
    {
      "epoch": 0.000286749267578125,
      "model_forward_time": 0.1150979995727539,
      "step": 46981
    },
    {
      "epoch": 0.000286749267578125,
      "step": 46981,
      "training_step_time": 0.45436954498291016
    },
    {
      "epoch": 0.00028675537109375,
      "model_forward_time": 0.11558675765991211,
      "step": 46982
    },
    {
      "epoch": 0.00028675537109375,
      "step": 46982,
      "training_step_time": 0.40921878814697266
    },
    {
      "epoch": 0.000286761474609375,
      "model_forward_time": 0.11557888984680176,
      "step": 46983
    },
    {
      "epoch": 0.000286761474609375,
      "step": 46983,
      "training_step_time": 0.4563901424407959
    },
    {
      "epoch": 0.000286767578125,
      "model_forward_time": 0.11601924896240234,
      "step": 46984
    },
    {
      "epoch": 0.000286767578125,
      "step": 46984,
      "training_step_time": 0.4246666431427002
    },
    {
      "epoch": 0.000286773681640625,
      "model_forward_time": 0.11601734161376953,
      "step": 46985
    },
    {
      "epoch": 0.000286773681640625,
      "step": 46985,
      "training_step_time": 0.38823437690734863
    },
    {
      "epoch": 0.00028677978515625,
      "model_forward_time": 0.11548256874084473,
      "step": 46986
    },
    {
      "epoch": 0.00028677978515625,
      "step": 46986,
      "training_step_time": 0.3855581283569336
    },
    {
      "epoch": 0.000286785888671875,
      "model_forward_time": 0.11528825759887695,
      "step": 46987
    },
    {
      "epoch": 0.000286785888671875,
      "step": 46987,
      "training_step_time": 0.40117645263671875
    },
    {
      "epoch": 0.0002867919921875,
      "model_forward_time": 0.11513352394104004,
      "step": 46988
    },
    {
      "epoch": 0.0002867919921875,
      "step": 46988,
      "training_step_time": 0.3856315612792969
    },
    {
      "epoch": 0.000286798095703125,
      "model_forward_time": 0.11567187309265137,
      "step": 46989
    },
    {
      "epoch": 0.000286798095703125,
      "step": 46989,
      "training_step_time": 0.386080265045166
    },
    {
      "epoch": 0.00028680419921875,
      "grad_norm": 0.1001281812787056,
      "learning_rate": 1.2312773898005175e-05,
      "loss": 0.0378,
      "step": 46990
    },
    {
      "epoch": 0.00028680419921875,
      "model_forward_time": 0.11554241180419922,
      "step": 46990
    },
    {
      "epoch": 0.00028680419921875,
      "step": 46990,
      "training_step_time": 0.38457489013671875
    },
    {
      "epoch": 0.000286810302734375,
      "model_forward_time": 0.11552572250366211,
      "step": 46991
    },
    {
      "epoch": 0.000286810302734375,
      "step": 46991,
      "training_step_time": 0.3973734378814697
    },
    {
      "epoch": 0.00028681640625,
      "model_forward_time": 0.11525416374206543,
      "step": 46992
    },
    {
      "epoch": 0.00028681640625,
      "step": 46992,
      "training_step_time": 0.4170992374420166
    },
    {
      "epoch": 0.000286822509765625,
      "model_forward_time": 0.11585545539855957,
      "step": 46993
    },
    {
      "epoch": 0.000286822509765625,
      "step": 46993,
      "training_step_time": 0.4428102970123291
    },
    {
      "epoch": 0.00028682861328125,
      "model_forward_time": 0.11524701118469238,
      "step": 46994
    },
    {
      "epoch": 0.00028682861328125,
      "step": 46994,
      "training_step_time": 0.39409589767456055
    },
    {
      "epoch": 0.000286834716796875,
      "model_forward_time": 0.11502385139465332,
      "step": 46995
    },
    {
      "epoch": 0.000286834716796875,
      "step": 46995,
      "training_step_time": 0.4068591594696045
    },
    {
      "epoch": 0.0002868408203125,
      "model_forward_time": 0.11548185348510742,
      "step": 46996
    },
    {
      "epoch": 0.0002868408203125,
      "step": 46996,
      "training_step_time": 0.5025355815887451
    },
    {
      "epoch": 0.000286846923828125,
      "model_forward_time": 0.11610913276672363,
      "step": 46997
    },
    {
      "epoch": 0.000286846923828125,
      "step": 46997,
      "training_step_time": 0.4357278347015381
    },
    {
      "epoch": 0.00028685302734375,
      "model_forward_time": 0.11482787132263184,
      "step": 46998
    },
    {
      "epoch": 0.00028685302734375,
      "step": 46998,
      "training_step_time": 0.41834521293640137
    },
    {
      "epoch": 0.000286859130859375,
      "model_forward_time": 0.11539053916931152,
      "step": 46999
    },
    {
      "epoch": 0.000286859130859375,
      "step": 46999,
      "training_step_time": 0.4779493808746338
    },
    {
      "epoch": 0.000286865234375,
      "grad_norm": 0.09973540157079697,
      "learning_rate": 1.2294669511155193e-05,
      "loss": 0.0406,
      "step": 47000
    },
    {
      "epoch": 0.000286865234375,
      "model_forward_time": 0.11379289627075195,
      "step": 47000
    },
    {
      "epoch": 0.000286865234375,
      "step": 47000,
      "training_step_time": 0.35759401321411133
    },
    {
      "epoch": 0.000286871337890625,
      "model_forward_time": 0.11317753791809082,
      "step": 47001
    },
    {
      "epoch": 0.000286871337890625,
      "step": 47001,
      "training_step_time": 0.36873292922973633
    },
    {
      "epoch": 0.00028687744140625,
      "model_forward_time": 0.11299943923950195,
      "step": 47002
    },
    {
      "epoch": 0.00028687744140625,
      "step": 47002,
      "training_step_time": 0.3944103717803955
    },
    {
      "epoch": 0.000286883544921875,
      "model_forward_time": 0.11348533630371094,
      "step": 47003
    },
    {
      "epoch": 0.000286883544921875,
      "step": 47003,
      "training_step_time": 0.36339688301086426
    },
    {
      "epoch": 0.0002868896484375,
      "model_forward_time": 0.11362600326538086,
      "step": 47004
    },
    {
      "epoch": 0.0002868896484375,
      "step": 47004,
      "training_step_time": 0.3982534408569336
    },
    {
      "epoch": 0.000286895751953125,
      "model_forward_time": 0.11429953575134277,
      "step": 47005
    },
    {
      "epoch": 0.000286895751953125,
      "step": 47005,
      "training_step_time": 0.38518571853637695
    },
    {
      "epoch": 0.00028690185546875,
      "model_forward_time": 0.11450695991516113,
      "step": 47006
    },
    {
      "epoch": 0.00028690185546875,
      "step": 47006,
      "training_step_time": 0.3922464847564697
    },
    {
      "epoch": 0.000286907958984375,
      "model_forward_time": 0.11492156982421875,
      "step": 47007
    },
    {
      "epoch": 0.000286907958984375,
      "step": 47007,
      "training_step_time": 0.3824896812438965
    },
    {
      "epoch": 0.0002869140625,
      "model_forward_time": 0.11516833305358887,
      "step": 47008
    },
    {
      "epoch": 0.0002869140625,
      "step": 47008,
      "training_step_time": 0.4217357635498047
    },
    {
      "epoch": 0.000286920166015625,
      "model_forward_time": 0.1154325008392334,
      "step": 47009
    },
    {
      "epoch": 0.000286920166015625,
      "step": 47009,
      "training_step_time": 0.4909336566925049
    },
    {
      "epoch": 0.00028692626953125,
      "grad_norm": 0.09701112657785416,
      "learning_rate": 1.2276576578189064e-05,
      "loss": 0.0371,
      "step": 47010
    },
    {
      "epoch": 0.00028692626953125,
      "model_forward_time": 0.11570239067077637,
      "step": 47010
    },
    {
      "epoch": 0.00028692626953125,
      "step": 47010,
      "training_step_time": 0.44605088233947754
    },
    {
      "epoch": 0.000286932373046875,
      "model_forward_time": 0.11623167991638184,
      "step": 47011
    },
    {
      "epoch": 0.000286932373046875,
      "step": 47011,
      "training_step_time": 0.509547233581543
    },
    {
      "epoch": 0.0002869384765625,
      "model_forward_time": 0.11481213569641113,
      "step": 47012
    },
    {
      "epoch": 0.0002869384765625,
      "step": 47012,
      "training_step_time": 0.46330904960632324
    },
    {
      "epoch": 0.000286944580078125,
      "model_forward_time": 0.11443614959716797,
      "step": 47013
    },
    {
      "epoch": 0.000286944580078125,
      "step": 47013,
      "training_step_time": 0.380342960357666
    },
    {
      "epoch": 0.00028695068359375,
      "model_forward_time": 0.1166524887084961,
      "step": 47014
    },
    {
      "epoch": 0.00028695068359375,
      "step": 47014,
      "training_step_time": 0.39060425758361816
    },
    {
      "epoch": 0.000286956787109375,
      "model_forward_time": 0.11461925506591797,
      "step": 47015
    },
    {
      "epoch": 0.000286956787109375,
      "step": 47015,
      "training_step_time": 0.3749275207519531
    },
    {
      "epoch": 0.000286962890625,
      "model_forward_time": 0.11582708358764648,
      "step": 47016
    },
    {
      "epoch": 0.000286962890625,
      "step": 47016,
      "training_step_time": 0.39633893966674805
    },
    {
      "epoch": 0.000286968994140625,
      "model_forward_time": 0.11533617973327637,
      "step": 47017
    },
    {
      "epoch": 0.000286968994140625,
      "step": 47017,
      "training_step_time": 0.4359884262084961
    },
    {
      "epoch": 0.00028697509765625,
      "model_forward_time": 0.11539983749389648,
      "step": 47018
    },
    {
      "epoch": 0.00028697509765625,
      "step": 47018,
      "training_step_time": 0.4585855007171631
    },
    {
      "epoch": 0.000286981201171875,
      "model_forward_time": 0.11512446403503418,
      "step": 47019
    },
    {
      "epoch": 0.000286981201171875,
      "step": 47019,
      "training_step_time": 0.3777754306793213
    },
    {
      "epoch": 0.0002869873046875,
      "grad_norm": 0.10292495042085648,
      "learning_rate": 1.2258495104602924e-05,
      "loss": 0.0391,
      "step": 47020
    },
    {
      "epoch": 0.0002869873046875,
      "model_forward_time": 0.11503100395202637,
      "step": 47020
    },
    {
      "epoch": 0.0002869873046875,
      "step": 47020,
      "training_step_time": 0.40711426734924316
    },
    {
      "epoch": 0.000286993408203125,
      "model_forward_time": 0.11472439765930176,
      "step": 47021
    },
    {
      "epoch": 0.000286993408203125,
      "step": 47021,
      "training_step_time": 0.4504811763763428
    },
    {
      "epoch": 0.00028699951171875,
      "model_forward_time": 0.11482620239257812,
      "step": 47022
    },
    {
      "epoch": 0.00028699951171875,
      "step": 47022,
      "training_step_time": 0.43563318252563477
    },
    {
      "epoch": 0.000287005615234375,
      "model_forward_time": 0.11429786682128906,
      "step": 47023
    },
    {
      "epoch": 0.000287005615234375,
      "step": 47023,
      "training_step_time": 0.4996156692504883
    },
    {
      "epoch": 0.00028701171875,
      "model_forward_time": 0.11540746688842773,
      "step": 47024
    },
    {
      "epoch": 0.00028701171875,
      "step": 47024,
      "training_step_time": 0.4558897018432617
    },
    {
      "epoch": 0.000287017822265625,
      "model_forward_time": 0.11529898643493652,
      "step": 47025
    },
    {
      "epoch": 0.000287017822265625,
      "step": 47025,
      "training_step_time": 0.4669535160064697
    },
    {
      "epoch": 0.00028702392578125,
      "model_forward_time": 0.11451101303100586,
      "step": 47026
    },
    {
      "epoch": 0.00028702392578125,
      "step": 47026,
      "training_step_time": 0.39325499534606934
    },
    {
      "epoch": 0.000287030029296875,
      "model_forward_time": 0.11500334739685059,
      "step": 47027
    },
    {
      "epoch": 0.000287030029296875,
      "step": 47027,
      "training_step_time": 0.4500138759613037
    },
    {
      "epoch": 0.0002870361328125,
      "model_forward_time": 0.1151278018951416,
      "step": 47028
    },
    {
      "epoch": 0.0002870361328125,
      "step": 47028,
      "training_step_time": 0.394838809967041
    },
    {
      "epoch": 0.000287042236328125,
      "model_forward_time": 0.1149742603302002,
      "step": 47029
    },
    {
      "epoch": 0.000287042236328125,
      "step": 47029,
      "training_step_time": 0.39040541648864746
    },
    {
      "epoch": 0.00028704833984375,
      "grad_norm": 0.08227468281984329,
      "learning_rate": 1.2240425095889495e-05,
      "loss": 0.039,
      "step": 47030
    },
    {
      "epoch": 0.00028704833984375,
      "model_forward_time": 0.11499214172363281,
      "step": 47030
    },
    {
      "epoch": 0.00028704833984375,
      "step": 47030,
      "training_step_time": 0.407895565032959
    },
    {
      "epoch": 0.000287054443359375,
      "model_forward_time": 0.11451554298400879,
      "step": 47031
    },
    {
      "epoch": 0.000287054443359375,
      "step": 47031,
      "training_step_time": 0.43437695503234863
    },
    {
      "epoch": 0.000287060546875,
      "model_forward_time": 0.1149742603302002,
      "step": 47032
    },
    {
      "epoch": 0.000287060546875,
      "step": 47032,
      "training_step_time": 0.38883137702941895
    },
    {
      "epoch": 0.000287066650390625,
      "model_forward_time": 0.11574530601501465,
      "step": 47033
    },
    {
      "epoch": 0.000287066650390625,
      "step": 47033,
      "training_step_time": 0.37816834449768066
    },
    {
      "epoch": 0.00028707275390625,
      "model_forward_time": 0.11523222923278809,
      "step": 47034
    },
    {
      "epoch": 0.00028707275390625,
      "step": 47034,
      "training_step_time": 0.3985574245452881
    },
    {
      "epoch": 0.000287078857421875,
      "model_forward_time": 0.11519384384155273,
      "step": 47035
    },
    {
      "epoch": 0.000287078857421875,
      "step": 47035,
      "training_step_time": 0.39377903938293457
    },
    {
      "epoch": 0.0002870849609375,
      "model_forward_time": 0.11506104469299316,
      "step": 47036
    },
    {
      "epoch": 0.0002870849609375,
      "step": 47036,
      "training_step_time": 0.48509883880615234
    },
    {
      "epoch": 0.000287091064453125,
      "model_forward_time": 0.11518383026123047,
      "step": 47037
    },
    {
      "epoch": 0.000287091064453125,
      "step": 47037,
      "training_step_time": 0.3988971710205078
    },
    {
      "epoch": 0.00028709716796875,
      "model_forward_time": 0.11509895324707031,
      "step": 47038
    },
    {
      "epoch": 0.00028709716796875,
      "step": 47038,
      "training_step_time": 0.41216564178466797
    },
    {
      "epoch": 0.000287103271484375,
      "model_forward_time": 0.11487531661987305,
      "step": 47039
    },
    {
      "epoch": 0.000287103271484375,
      "step": 47039,
      "training_step_time": 0.4149322509765625
    },
    {
      "epoch": 0.000287109375,
      "grad_norm": 0.10935378819704056,
      "learning_rate": 1.2222366557537911e-05,
      "loss": 0.0404,
      "step": 47040
    },
    {
      "epoch": 0.000287109375,
      "model_forward_time": 0.11548686027526855,
      "step": 47040
    },
    {
      "epoch": 0.000287109375,
      "step": 47040,
      "training_step_time": 0.490584135055542
    },
    {
      "epoch": 0.000287115478515625,
      "model_forward_time": 0.11514759063720703,
      "step": 47041
    },
    {
      "epoch": 0.000287115478515625,
      "step": 47041,
      "training_step_time": 0.5085372924804688
    },
    {
      "epoch": 0.00028712158203125,
      "model_forward_time": 0.11472654342651367,
      "step": 47042
    },
    {
      "epoch": 0.00028712158203125,
      "step": 47042,
      "training_step_time": 0.4609715938568115
    },
    {
      "epoch": 0.000287127685546875,
      "model_forward_time": 0.11485791206359863,
      "step": 47043
    },
    {
      "epoch": 0.000287127685546875,
      "step": 47043,
      "training_step_time": 0.39777493476867676
    },
    {
      "epoch": 0.0002871337890625,
      "model_forward_time": 0.11479067802429199,
      "step": 47044
    },
    {
      "epoch": 0.0002871337890625,
      "step": 47044,
      "training_step_time": 0.4353504180908203
    },
    {
      "epoch": 0.000287139892578125,
      "model_forward_time": 0.11447644233703613,
      "step": 47045
    },
    {
      "epoch": 0.000287139892578125,
      "step": 47045,
      "training_step_time": 0.4095609188079834
    },
    {
      "epoch": 0.00028714599609375,
      "model_forward_time": 0.11425161361694336,
      "step": 47046
    },
    {
      "epoch": 0.00028714599609375,
      "step": 47046,
      "training_step_time": 0.4654369354248047
    },
    {
      "epoch": 0.000287152099609375,
      "model_forward_time": 0.11491227149963379,
      "step": 47047
    },
    {
      "epoch": 0.000287152099609375,
      "step": 47047,
      "training_step_time": 0.41048407554626465
    },
    {
      "epoch": 0.000287158203125,
      "model_forward_time": 0.1141977310180664,
      "step": 47048
    },
    {
      "epoch": 0.000287158203125,
      "step": 47048,
      "training_step_time": 0.3863105773925781
    },
    {
      "epoch": 0.000287164306640625,
      "model_forward_time": 0.11581683158874512,
      "step": 47049
    },
    {
      "epoch": 0.000287164306640625,
      "step": 47049,
      "training_step_time": 0.3917698860168457
    },
    {
      "epoch": 0.00028717041015625,
      "grad_norm": 0.1320524364709854,
      "learning_rate": 1.2204319495033916e-05,
      "loss": 0.0413,
      "step": 47050
    },
    {
      "epoch": 0.00028717041015625,
      "model_forward_time": 0.1150522232055664,
      "step": 47050
    },
    {
      "epoch": 0.00028717041015625,
      "step": 47050,
      "training_step_time": 0.5029244422912598
    },
    {
      "epoch": 0.000287176513671875,
      "model_forward_time": 0.11445951461791992,
      "step": 47051
    },
    {
      "epoch": 0.000287176513671875,
      "step": 47051,
      "training_step_time": 0.3736128807067871
    },
    {
      "epoch": 0.0002871826171875,
      "model_forward_time": 0.11450743675231934,
      "step": 47052
    },
    {
      "epoch": 0.0002871826171875,
      "step": 47052,
      "training_step_time": 0.43921947479248047
    },
    {
      "epoch": 0.000287188720703125,
      "model_forward_time": 0.11494684219360352,
      "step": 47053
    },
    {
      "epoch": 0.000287188720703125,
      "step": 47053,
      "training_step_time": 0.41271281242370605
    },
    {
      "epoch": 0.00028719482421875,
      "model_forward_time": 0.11568021774291992,
      "step": 47054
    },
    {
      "epoch": 0.00028719482421875,
      "step": 47054,
      "training_step_time": 0.42484593391418457
    },
    {
      "epoch": 0.000287200927734375,
      "model_forward_time": 0.11456179618835449,
      "step": 47055
    },
    {
      "epoch": 0.000287200927734375,
      "step": 47055,
      "training_step_time": 0.4081592559814453
    },
    {
      "epoch": 0.00028720703125,
      "model_forward_time": 0.11563968658447266,
      "step": 47056
    },
    {
      "epoch": 0.00028720703125,
      "step": 47056,
      "training_step_time": 0.4031665325164795
    },
    {
      "epoch": 0.000287213134765625,
      "model_forward_time": 0.11483573913574219,
      "step": 47057
    },
    {
      "epoch": 0.000287213134765625,
      "step": 47057,
      "training_step_time": 0.3897871971130371
    },
    {
      "epoch": 0.00028721923828125,
      "model_forward_time": 0.11578726768493652,
      "step": 47058
    },
    {
      "epoch": 0.00028721923828125,
      "step": 47058,
      "training_step_time": 0.44794154167175293
    },
    {
      "epoch": 0.000287225341796875,
      "model_forward_time": 0.11511826515197754,
      "step": 47059
    },
    {
      "epoch": 0.000287225341796875,
      "step": 47059,
      "training_step_time": 0.4292004108428955
    },
    {
      "epoch": 0.0002872314453125,
      "grad_norm": 0.11329913884401321,
      "learning_rate": 1.2186283913859726e-05,
      "loss": 0.0348,
      "step": 47060
    },
    {
      "epoch": 0.0002872314453125,
      "model_forward_time": 0.11462974548339844,
      "step": 47060
    },
    {
      "epoch": 0.0002872314453125,
      "step": 47060,
      "training_step_time": 0.4023430347442627
    },
    {
      "epoch": 0.000287237548828125,
      "model_forward_time": 0.11577057838439941,
      "step": 47061
    },
    {
      "epoch": 0.000287237548828125,
      "step": 47061,
      "training_step_time": 0.3917553424835205
    },
    {
      "epoch": 0.00028724365234375,
      "model_forward_time": 0.11518692970275879,
      "step": 47062
    },
    {
      "epoch": 0.00028724365234375,
      "step": 47062,
      "training_step_time": 0.39279770851135254
    },
    {
      "epoch": 0.000287249755859375,
      "model_forward_time": 0.11493420600891113,
      "step": 47063
    },
    {
      "epoch": 0.000287249755859375,
      "step": 47063,
      "training_step_time": 0.39873290061950684
    },
    {
      "epoch": 0.000287255859375,
      "model_forward_time": 0.1152184009552002,
      "step": 47064
    },
    {
      "epoch": 0.000287255859375,
      "step": 47064,
      "training_step_time": 0.40369367599487305
    },
    {
      "epoch": 0.000287261962890625,
      "model_forward_time": 0.11480522155761719,
      "step": 47065
    },
    {
      "epoch": 0.000287261962890625,
      "step": 47065,
      "training_step_time": 0.4485158920288086
    },
    {
      "epoch": 0.00028726806640625,
      "model_forward_time": 0.11527609825134277,
      "step": 47066
    },
    {
      "epoch": 0.00028726806640625,
      "step": 47066,
      "training_step_time": 0.46447300910949707
    },
    {
      "epoch": 0.000287274169921875,
      "model_forward_time": 0.1156923770904541,
      "step": 47067
    },
    {
      "epoch": 0.000287274169921875,
      "step": 47067,
      "training_step_time": 0.40265727043151855
    },
    {
      "epoch": 0.0002872802734375,
      "model_forward_time": 0.11501836776733398,
      "step": 47068
    },
    {
      "epoch": 0.0002872802734375,
      "step": 47068,
      "training_step_time": 0.3967123031616211
    },
    {
      "epoch": 0.000287286376953125,
      "model_forward_time": 0.1162259578704834,
      "step": 47069
    },
    {
      "epoch": 0.000287286376953125,
      "step": 47069,
      "training_step_time": 0.49602746963500977
    },
    {
      "epoch": 0.00028729248046875,
      "grad_norm": 0.0990820825099945,
      "learning_rate": 1.2168259819494066e-05,
      "loss": 0.0391,
      "step": 47070
    },
    {
      "epoch": 0.00028729248046875,
      "model_forward_time": 0.1152644157409668,
      "step": 47070
    },
    {
      "epoch": 0.00028729248046875,
      "step": 47070,
      "training_step_time": 0.5015456676483154
    },
    {
      "epoch": 0.000287298583984375,
      "model_forward_time": 0.11559510231018066,
      "step": 47071
    },
    {
      "epoch": 0.000287298583984375,
      "step": 47071,
      "training_step_time": 0.4369471073150635
    },
    {
      "epoch": 0.0002873046875,
      "model_forward_time": 0.11544156074523926,
      "step": 47072
    },
    {
      "epoch": 0.0002873046875,
      "step": 47072,
      "training_step_time": 0.40382862091064453
    },
    {
      "epoch": 0.000287310791015625,
      "model_forward_time": 0.11473798751831055,
      "step": 47073
    },
    {
      "epoch": 0.000287310791015625,
      "step": 47073,
      "training_step_time": 0.3810882568359375
    },
    {
      "epoch": 0.00028731689453125,
      "model_forward_time": 0.11510467529296875,
      "step": 47074
    },
    {
      "epoch": 0.00028731689453125,
      "step": 47074,
      "training_step_time": 0.3769407272338867
    },
    {
      "epoch": 0.000287322998046875,
      "model_forward_time": 0.11626100540161133,
      "step": 47075
    },
    {
      "epoch": 0.000287322998046875,
      "step": 47075,
      "training_step_time": 0.5907595157623291
    },
    {
      "epoch": 0.0002873291015625,
      "model_forward_time": 0.11450052261352539,
      "step": 47076
    },
    {
      "epoch": 0.0002873291015625,
      "step": 47076,
      "training_step_time": 0.3829324245452881
    },
    {
      "epoch": 0.000287335205078125,
      "model_forward_time": 0.11481690406799316,
      "step": 47077
    },
    {
      "epoch": 0.000287335205078125,
      "step": 47077,
      "training_step_time": 0.3907794952392578
    },
    {
      "epoch": 0.00028734130859375,
      "model_forward_time": 0.11517500877380371,
      "step": 47078
    },
    {
      "epoch": 0.00028734130859375,
      "step": 47078,
      "training_step_time": 0.418255090713501
    },
    {
      "epoch": 0.000287347412109375,
      "model_forward_time": 0.11507010459899902,
      "step": 47079
    },
    {
      "epoch": 0.000287347412109375,
      "step": 47079,
      "training_step_time": 0.3813910484313965
    },
    {
      "epoch": 0.000287353515625,
      "grad_norm": 0.11900753527879715,
      "learning_rate": 1.2150247217412186e-05,
      "loss": 0.0404,
      "step": 47080
    },
    {
      "epoch": 0.000287353515625,
      "model_forward_time": 0.11556196212768555,
      "step": 47080
    },
    {
      "epoch": 0.000287353515625,
      "step": 47080,
      "training_step_time": 0.4235227108001709
    },
    {
      "epoch": 0.000287359619140625,
      "model_forward_time": 0.11505866050720215,
      "step": 47081
    },
    {
      "epoch": 0.000287359619140625,
      "step": 47081,
      "training_step_time": 1.0973811149597168
    },
    {
      "epoch": 0.00028736572265625,
      "model_forward_time": 0.11440896987915039,
      "step": 47082
    },
    {
      "epoch": 0.00028736572265625,
      "step": 47082,
      "training_step_time": 0.4657139778137207
    },
    {
      "epoch": 0.000287371826171875,
      "model_forward_time": 0.11427474021911621,
      "step": 47083
    },
    {
      "epoch": 0.000287371826171875,
      "step": 47083,
      "training_step_time": 0.46369504928588867
    },
    {
      "epoch": 0.0002873779296875,
      "model_forward_time": 0.1140131950378418,
      "step": 47084
    },
    {
      "epoch": 0.0002873779296875,
      "step": 47084,
      "training_step_time": 0.4005625247955322
    },
    {
      "epoch": 0.000287384033203125,
      "model_forward_time": 0.1143655776977539,
      "step": 47085
    },
    {
      "epoch": 0.000287384033203125,
      "step": 47085,
      "training_step_time": 0.3788132667541504
    },
    {
      "epoch": 0.00028739013671875,
      "model_forward_time": 0.11531352996826172,
      "step": 47086
    },
    {
      "epoch": 0.00028739013671875,
      "step": 47086,
      "training_step_time": 0.37959861755371094
    },
    {
      "epoch": 0.000287396240234375,
      "model_forward_time": 0.11466312408447266,
      "step": 47087
    },
    {
      "epoch": 0.000287396240234375,
      "step": 47087,
      "training_step_time": 0.5927231311798096
    },
    {
      "epoch": 0.00028740234375,
      "model_forward_time": 0.11478185653686523,
      "step": 47088
    },
    {
      "epoch": 0.00028740234375,
      "step": 47088,
      "training_step_time": 0.3942267894744873
    },
    {
      "epoch": 0.000287408447265625,
      "model_forward_time": 0.11535024642944336,
      "step": 47089
    },
    {
      "epoch": 0.000287408447265625,
      "step": 47089,
      "training_step_time": 0.3872084617614746
    },
    {
      "epoch": 0.00028741455078125,
      "grad_norm": 0.11787671595811844,
      "learning_rate": 1.2132246113085822e-05,
      "loss": 0.0413,
      "step": 47090
    },
    {
      "epoch": 0.00028741455078125,
      "model_forward_time": 0.11531305313110352,
      "step": 47090
    },
    {
      "epoch": 0.00028741455078125,
      "step": 47090,
      "training_step_time": 0.3860504627227783
    },
    {
      "epoch": 0.000287420654296875,
      "model_forward_time": 0.11472582817077637,
      "step": 47091
    },
    {
      "epoch": 0.000287420654296875,
      "step": 47091,
      "training_step_time": 0.4847438335418701
    },
    {
      "epoch": 0.0002874267578125,
      "model_forward_time": 0.11567139625549316,
      "step": 47092
    },
    {
      "epoch": 0.0002874267578125,
      "step": 47092,
      "training_step_time": 0.48024749755859375
    },
    {
      "epoch": 0.000287432861328125,
      "model_forward_time": 0.11446022987365723,
      "step": 47093
    },
    {
      "epoch": 0.000287432861328125,
      "step": 47093,
      "training_step_time": 0.869964599609375
    },
    {
      "epoch": 0.00028743896484375,
      "model_forward_time": 0.11436319351196289,
      "step": 47094
    },
    {
      "epoch": 0.00028743896484375,
      "step": 47094,
      "training_step_time": 0.4204978942871094
    },
    {
      "epoch": 0.000287445068359375,
      "model_forward_time": 0.11407637596130371,
      "step": 47095
    },
    {
      "epoch": 0.000287445068359375,
      "step": 47095,
      "training_step_time": 0.45404911041259766
    },
    {
      "epoch": 0.000287451171875,
      "model_forward_time": 0.11398029327392578,
      "step": 47096
    },
    {
      "epoch": 0.000287451171875,
      "step": 47096,
      "training_step_time": 0.433943510055542
    },
    {
      "epoch": 0.000287457275390625,
      "model_forward_time": 0.11416482925415039,
      "step": 47097
    },
    {
      "epoch": 0.000287457275390625,
      "step": 47097,
      "training_step_time": 0.4393198490142822
    },
    {
      "epoch": 0.00028746337890625,
      "model_forward_time": 0.11467576026916504,
      "step": 47098
    },
    {
      "epoch": 0.00028746337890625,
      "step": 47098,
      "training_step_time": 0.399660587310791
    },
    {
      "epoch": 0.000287469482421875,
      "model_forward_time": 0.11426138877868652,
      "step": 47099
    },
    {
      "epoch": 0.000287469482421875,
      "step": 47099,
      "training_step_time": 0.8873147964477539
    },
    {
      "epoch": 0.0002874755859375,
      "grad_norm": 0.08334169536828995,
      "learning_rate": 1.2114256511983274e-05,
      "loss": 0.0375,
      "step": 47100
    },
    {
      "epoch": 0.0002874755859375,
      "model_forward_time": 0.11430978775024414,
      "step": 47100
    },
    {
      "epoch": 0.0002874755859375,
      "step": 47100,
      "training_step_time": 0.39041614532470703
    },
    {
      "epoch": 0.000287481689453125,
      "model_forward_time": 0.11432814598083496,
      "step": 47101
    },
    {
      "epoch": 0.000287481689453125,
      "step": 47101,
      "training_step_time": 0.3877081871032715
    },
    {
      "epoch": 0.00028748779296875,
      "model_forward_time": 0.1143808364868164,
      "step": 47102
    },
    {
      "epoch": 0.00028748779296875,
      "step": 47102,
      "training_step_time": 0.3913309574127197
    },
    {
      "epoch": 0.000287493896484375,
      "model_forward_time": 0.11391305923461914,
      "step": 47103
    },
    {
      "epoch": 0.000287493896484375,
      "step": 47103,
      "training_step_time": 0.39205145835876465
    },
    {
      "epoch": 0.0002875,
      "model_forward_time": 0.1142425537109375,
      "step": 47104
    },
    {
      "epoch": 0.0002875,
      "step": 47104,
      "training_step_time": 0.4506490230560303
    },
    {
      "epoch": 0.000287506103515625,
      "model_forward_time": 0.11492800712585449,
      "step": 47105
    },
    {
      "epoch": 0.000287506103515625,
      "step": 47105,
      "training_step_time": 0.48337507247924805
    },
    {
      "epoch": 0.00028751220703125,
      "model_forward_time": 0.11466312408447266,
      "step": 47106
    },
    {
      "epoch": 0.00028751220703125,
      "step": 47106,
      "training_step_time": 0.417217493057251
    },
    {
      "epoch": 0.000287518310546875,
      "model_forward_time": 0.11485481262207031,
      "step": 47107
    },
    {
      "epoch": 0.000287518310546875,
      "step": 47107,
      "training_step_time": 0.45692873001098633
    },
    {
      "epoch": 0.0002875244140625,
      "model_forward_time": 0.11470770835876465,
      "step": 47108
    },
    {
      "epoch": 0.0002875244140625,
      "step": 47108,
      "training_step_time": 0.4864847660064697
    },
    {
      "epoch": 0.000287530517578125,
      "model_forward_time": 0.11429262161254883,
      "step": 47109
    },
    {
      "epoch": 0.000287530517578125,
      "step": 47109,
      "training_step_time": 0.5057764053344727
    },
    {
      "epoch": 0.00028753662109375,
      "grad_norm": 0.21373239159584045,
      "learning_rate": 1.2096278419569257e-05,
      "loss": 0.04,
      "step": 47110
    },
    {
      "epoch": 0.00028753662109375,
      "model_forward_time": 0.11444687843322754,
      "step": 47110
    },
    {
      "epoch": 0.00028753662109375,
      "step": 47110,
      "training_step_time": 0.4158906936645508
    },
    {
      "epoch": 0.000287542724609375,
      "model_forward_time": 0.1151578426361084,
      "step": 47111
    },
    {
      "epoch": 0.000287542724609375,
      "step": 47111,
      "training_step_time": 0.6691145896911621
    },
    {
      "epoch": 0.000287548828125,
      "model_forward_time": 0.11429023742675781,
      "step": 47112
    },
    {
      "epoch": 0.000287548828125,
      "step": 47112,
      "training_step_time": 0.3935675621032715
    },
    {
      "epoch": 0.000287554931640625,
      "model_forward_time": 0.11365413665771484,
      "step": 47113
    },
    {
      "epoch": 0.000287554931640625,
      "step": 47113,
      "training_step_time": 0.38669466972351074
    },
    {
      "epoch": 0.00028756103515625,
      "model_forward_time": 0.1143646240234375,
      "step": 47114
    },
    {
      "epoch": 0.00028756103515625,
      "step": 47114,
      "training_step_time": 0.38124966621398926
    },
    {
      "epoch": 0.000287567138671875,
      "model_forward_time": 0.11442685127258301,
      "step": 47115
    },
    {
      "epoch": 0.000287567138671875,
      "step": 47115,
      "training_step_time": 0.3892529010772705
    },
    {
      "epoch": 0.0002875732421875,
      "model_forward_time": 0.1144249439239502,
      "step": 47116
    },
    {
      "epoch": 0.0002875732421875,
      "step": 47116,
      "training_step_time": 0.38643360137939453
    },
    {
      "epoch": 0.000287579345703125,
      "model_forward_time": 0.11476993560791016,
      "step": 47117
    },
    {
      "epoch": 0.000287579345703125,
      "step": 47117,
      "training_step_time": 1.1098408699035645
    },
    {
      "epoch": 0.00028758544921875,
      "model_forward_time": 0.11541748046875,
      "step": 47118
    },
    {
      "epoch": 0.00028758544921875,
      "step": 47118,
      "training_step_time": 0.3942253589630127
    },
    {
      "epoch": 0.000287591552734375,
      "model_forward_time": 0.1144552230834961,
      "step": 47119
    },
    {
      "epoch": 0.000287591552734375,
      "step": 47119,
      "training_step_time": 0.44731688499450684
    },
    {
      "epoch": 0.00028759765625,
      "grad_norm": 0.14313271641731262,
      "learning_rate": 1.2078311841305084e-05,
      "loss": 0.0375,
      "step": 47120
    },
    {
      "epoch": 0.00028759765625,
      "model_forward_time": 0.11362624168395996,
      "step": 47120
    },
    {
      "epoch": 0.00028759765625,
      "step": 47120,
      "training_step_time": 0.39699888229370117
    },
    {
      "epoch": 0.000287603759765625,
      "model_forward_time": 0.11474347114562988,
      "step": 47121
    },
    {
      "epoch": 0.000287603759765625,
      "step": 47121,
      "training_step_time": 0.36045193672180176
    },
    {
      "epoch": 0.00028760986328125,
      "model_forward_time": 0.11415314674377441,
      "step": 47122
    },
    {
      "epoch": 0.00028760986328125,
      "step": 47122,
      "training_step_time": 0.4378383159637451
    },
    {
      "epoch": 0.000287615966796875,
      "model_forward_time": 0.1146237850189209,
      "step": 47123
    },
    {
      "epoch": 0.000287615966796875,
      "step": 47123,
      "training_step_time": 0.6515898704528809
    },
    {
      "epoch": 0.0002876220703125,
      "model_forward_time": 0.11429858207702637,
      "step": 47124
    },
    {
      "epoch": 0.0002876220703125,
      "step": 47124,
      "training_step_time": 0.3948957920074463
    },
    {
      "epoch": 0.000287628173828125,
      "model_forward_time": 0.11381888389587402,
      "step": 47125
    },
    {
      "epoch": 0.000287628173828125,
      "step": 47125,
      "training_step_time": 0.3965771198272705
    },
    {
      "epoch": 0.00028763427734375,
      "model_forward_time": 0.1147165298461914,
      "step": 47126
    },
    {
      "epoch": 0.00028763427734375,
      "step": 47126,
      "training_step_time": 0.3869035243988037
    },
    {
      "epoch": 0.000287640380859375,
      "model_forward_time": 0.1147162914276123,
      "step": 47127
    },
    {
      "epoch": 0.000287640380859375,
      "step": 47127,
      "training_step_time": 0.37894344329833984
    },
    {
      "epoch": 0.000287646484375,
      "model_forward_time": 0.11481022834777832,
      "step": 47128
    },
    {
      "epoch": 0.000287646484375,
      "step": 47128,
      "training_step_time": 0.3854091167449951
    },
    {
      "epoch": 0.000287652587890625,
      "model_forward_time": 0.11493492126464844,
      "step": 47129
    },
    {
      "epoch": 0.000287652587890625,
      "step": 47129,
      "training_step_time": 0.7418539524078369
    },
    {
      "epoch": 0.00028765869140625,
      "grad_norm": 0.14350970089435577,
      "learning_rate": 1.2060356782648503e-05,
      "loss": 0.04,
      "step": 47130
    },
    {
      "epoch": 0.00028765869140625,
      "model_forward_time": 0.11501073837280273,
      "step": 47130
    },
    {
      "epoch": 0.00028765869140625,
      "step": 47130,
      "training_step_time": 0.40625667572021484
    },
    {
      "epoch": 0.000287664794921875,
      "model_forward_time": 0.11396479606628418,
      "step": 47131
    },
    {
      "epoch": 0.000287664794921875,
      "step": 47131,
      "training_step_time": 0.4073331356048584
    },
    {
      "epoch": 0.0002876708984375,
      "model_forward_time": 0.1155848503112793,
      "step": 47132
    },
    {
      "epoch": 0.0002876708984375,
      "step": 47132,
      "training_step_time": 0.38806581497192383
    },
    {
      "epoch": 0.000287677001953125,
      "model_forward_time": 0.11427474021911621,
      "step": 47133
    },
    {
      "epoch": 0.000287677001953125,
      "step": 47133,
      "training_step_time": 0.42739391326904297
    },
    {
      "epoch": 0.00028768310546875,
      "model_forward_time": 0.11500668525695801,
      "step": 47134
    },
    {
      "epoch": 0.00028768310546875,
      "step": 47134,
      "training_step_time": 0.43494629859924316
    },
    {
      "epoch": 0.000287689208984375,
      "model_forward_time": 0.11507868766784668,
      "step": 47135
    },
    {
      "epoch": 0.000287689208984375,
      "step": 47135,
      "training_step_time": 0.8941171169281006
    },
    {
      "epoch": 0.0002876953125,
      "model_forward_time": 0.11564397811889648,
      "step": 47136
    },
    {
      "epoch": 0.0002876953125,
      "step": 47136,
      "training_step_time": 0.45026111602783203
    },
    {
      "epoch": 0.000287701416015625,
      "model_forward_time": 0.11399602890014648,
      "step": 47137
    },
    {
      "epoch": 0.000287701416015625,
      "step": 47137,
      "training_step_time": 0.3843357563018799
    },
    {
      "epoch": 0.00028770751953125,
      "model_forward_time": 0.11370015144348145,
      "step": 47138
    },
    {
      "epoch": 0.00028770751953125,
      "step": 47138,
      "training_step_time": 0.3838338851928711
    },
    {
      "epoch": 0.000287713623046875,
      "model_forward_time": 0.11501336097717285,
      "step": 47139
    },
    {
      "epoch": 0.000287713623046875,
      "step": 47139,
      "training_step_time": 0.38382387161254883
    },
    {
      "epoch": 0.0002877197265625,
      "grad_norm": 0.10475369542837143,
      "learning_rate": 1.2042413249053796e-05,
      "loss": 0.0361,
      "step": 47140
    },
    {
      "epoch": 0.0002877197265625,
      "model_forward_time": 0.11370635032653809,
      "step": 47140
    },
    {
      "epoch": 0.0002877197265625,
      "step": 47140,
      "training_step_time": 0.38301944732666016
    },
    {
      "epoch": 0.000287725830078125,
      "model_forward_time": 0.11521410942077637,
      "step": 47141
    },
    {
      "epoch": 0.000287725830078125,
      "step": 47141,
      "training_step_time": 0.3996760845184326
    },
    {
      "epoch": 0.00028773193359375,
      "model_forward_time": 0.11506342887878418,
      "step": 47142
    },
    {
      "epoch": 0.00028773193359375,
      "step": 47142,
      "training_step_time": 0.3966867923736572
    },
    {
      "epoch": 0.000287738037109375,
      "model_forward_time": 0.1146554946899414,
      "step": 47143
    },
    {
      "epoch": 0.000287738037109375,
      "step": 47143,
      "training_step_time": 0.40372347831726074
    },
    {
      "epoch": 0.000287744140625,
      "model_forward_time": 0.11515402793884277,
      "step": 47144
    },
    {
      "epoch": 0.000287744140625,
      "step": 47144,
      "training_step_time": 0.46242570877075195
    },
    {
      "epoch": 0.000287750244140625,
      "model_forward_time": 0.11502599716186523,
      "step": 47145
    },
    {
      "epoch": 0.000287750244140625,
      "step": 47145,
      "training_step_time": 0.3872358798980713
    },
    {
      "epoch": 0.00028775634765625,
      "model_forward_time": 0.11538839340209961,
      "step": 47146
    },
    {
      "epoch": 0.00028775634765625,
      "step": 47146,
      "training_step_time": 0.4229304790496826
    },
    {
      "epoch": 0.000287762451171875,
      "model_forward_time": 0.11518144607543945,
      "step": 47147
    },
    {
      "epoch": 0.000287762451171875,
      "step": 47147,
      "training_step_time": 0.8983726501464844
    },
    {
      "epoch": 0.0002877685546875,
      "model_forward_time": 0.11488533020019531,
      "step": 47148
    },
    {
      "epoch": 0.0002877685546875,
      "step": 47148,
      "training_step_time": 0.4282207489013672
    },
    {
      "epoch": 0.000287774658203125,
      "model_forward_time": 0.1149446964263916,
      "step": 47149
    },
    {
      "epoch": 0.000287774658203125,
      "step": 47149,
      "training_step_time": 0.47510313987731934
    },
    {
      "epoch": 0.00028778076171875,
      "grad_norm": 0.07623335719108582,
      "learning_rate": 1.202448124597173e-05,
      "loss": 0.0337,
      "step": 47150
    },
    {
      "epoch": 0.00028778076171875,
      "model_forward_time": 0.11409950256347656,
      "step": 47150
    },
    {
      "epoch": 0.00028778076171875,
      "step": 47150,
      "training_step_time": 0.38811612129211426
    },
    {
      "epoch": 0.000287786865234375,
      "model_forward_time": 0.11414146423339844,
      "step": 47151
    },
    {
      "epoch": 0.000287786865234375,
      "step": 47151,
      "training_step_time": 0.37693357467651367
    },
    {
      "epoch": 0.00028779296875,
      "model_forward_time": 0.11443758010864258,
      "step": 47152
    },
    {
      "epoch": 0.00028779296875,
      "step": 47152,
      "training_step_time": 0.3785262107849121
    },
    {
      "epoch": 0.000287799072265625,
      "model_forward_time": 0.1152811050415039,
      "step": 47153
    },
    {
      "epoch": 0.000287799072265625,
      "step": 47153,
      "training_step_time": 0.4986848831176758
    },
    {
      "epoch": 0.00028780517578125,
      "model_forward_time": 0.11500978469848633,
      "step": 47154
    },
    {
      "epoch": 0.00028780517578125,
      "step": 47154,
      "training_step_time": 0.39546656608581543
    },
    {
      "epoch": 0.000287811279296875,
      "model_forward_time": 0.11377787590026855,
      "step": 47155
    },
    {
      "epoch": 0.000287811279296875,
      "step": 47155,
      "training_step_time": 0.4126856327056885
    },
    {
      "epoch": 0.0002878173828125,
      "model_forward_time": 0.11560773849487305,
      "step": 47156
    },
    {
      "epoch": 0.0002878173828125,
      "step": 47156,
      "training_step_time": 0.43697428703308105
    },
    {
      "epoch": 0.000287823486328125,
      "model_forward_time": 0.11509537696838379,
      "step": 47157
    },
    {
      "epoch": 0.000287823486328125,
      "step": 47157,
      "training_step_time": 0.41561460494995117
    },
    {
      "epoch": 0.00028782958984375,
      "model_forward_time": 0.11478424072265625,
      "step": 47158
    },
    {
      "epoch": 0.00028782958984375,
      "step": 47158,
      "training_step_time": 0.47898077964782715
    },
    {
      "epoch": 0.000287835693359375,
      "model_forward_time": 0.11550545692443848,
      "step": 47159
    },
    {
      "epoch": 0.000287835693359375,
      "step": 47159,
      "training_step_time": 0.5232605934143066
    },
    {
      "epoch": 0.000287841796875,
      "grad_norm": 0.12828329205513,
      "learning_rate": 1.2006560778849578e-05,
      "loss": 0.0403,
      "step": 47160
    },
    {
      "epoch": 0.000287841796875,
      "model_forward_time": 0.11395430564880371,
      "step": 47160
    },
    {
      "epoch": 0.000287841796875,
      "step": 47160,
      "training_step_time": 0.3895411491394043
    },
    {
      "epoch": 0.000287847900390625,
      "model_forward_time": 0.11442065238952637,
      "step": 47161
    },
    {
      "epoch": 0.000287847900390625,
      "step": 47161,
      "training_step_time": 0.36484622955322266
    },
    {
      "epoch": 0.00028785400390625,
      "model_forward_time": 0.1143035888671875,
      "step": 47162
    },
    {
      "epoch": 0.00028785400390625,
      "step": 47162,
      "training_step_time": 0.4633815288543701
    },
    {
      "epoch": 0.000287860107421875,
      "model_forward_time": 0.11439204216003418,
      "step": 47163
    },
    {
      "epoch": 0.000287860107421875,
      "step": 47163,
      "training_step_time": 0.45296525955200195
    },
    {
      "epoch": 0.0002878662109375,
      "model_forward_time": 0.1151893138885498,
      "step": 47164
    },
    {
      "epoch": 0.0002878662109375,
      "step": 47164,
      "training_step_time": 0.3837747573852539
    },
    {
      "epoch": 0.000287872314453125,
      "model_forward_time": 0.11436295509338379,
      "step": 47165
    },
    {
      "epoch": 0.000287872314453125,
      "step": 47165,
      "training_step_time": 0.38149118423461914
    },
    {
      "epoch": 0.00028787841796875,
      "model_forward_time": 0.11496210098266602,
      "step": 47166
    },
    {
      "epoch": 0.00028787841796875,
      "step": 47166,
      "training_step_time": 0.3908846378326416
    },
    {
      "epoch": 0.000287884521484375,
      "model_forward_time": 0.11508822441101074,
      "step": 47167
    },
    {
      "epoch": 0.000287884521484375,
      "step": 47167,
      "training_step_time": 0.3985443115234375
    },
    {
      "epoch": 0.000287890625,
      "model_forward_time": 0.1147160530090332,
      "step": 47168
    },
    {
      "epoch": 0.000287890625,
      "step": 47168,
      "training_step_time": 0.3912525177001953
    },
    {
      "epoch": 0.000287896728515625,
      "model_forward_time": 0.11507678031921387,
      "step": 47169
    },
    {
      "epoch": 0.000287896728515625,
      "step": 47169,
      "training_step_time": 0.39985084533691406
    },
    {
      "epoch": 0.00028790283203125,
      "grad_norm": 0.10186849534511566,
      "learning_rate": 1.1988651853131088e-05,
      "loss": 0.0359,
      "step": 47170
    },
    {
      "epoch": 0.00028790283203125,
      "model_forward_time": 0.11479663848876953,
      "step": 47170
    },
    {
      "epoch": 0.00028790283203125,
      "step": 47170,
      "training_step_time": 0.41331934928894043
    },
    {
      "epoch": 0.000287908935546875,
      "model_forward_time": 0.11531424522399902,
      "step": 47171
    },
    {
      "epoch": 0.000287908935546875,
      "step": 47171,
      "training_step_time": 1.0837786197662354
    },
    {
      "epoch": 0.0002879150390625,
      "model_forward_time": 0.11439895629882812,
      "step": 47172
    },
    {
      "epoch": 0.0002879150390625,
      "step": 47172,
      "training_step_time": 0.3924691677093506
    },
    {
      "epoch": 0.000287921142578125,
      "model_forward_time": 0.11393237113952637,
      "step": 47173
    },
    {
      "epoch": 0.000287921142578125,
      "step": 47173,
      "training_step_time": 0.388629674911499
    },
    {
      "epoch": 0.00028792724609375,
      "model_forward_time": 0.11389684677124023,
      "step": 47174
    },
    {
      "epoch": 0.00028792724609375,
      "step": 47174,
      "training_step_time": 0.3953273296356201
    },
    {
      "epoch": 0.000287933349609375,
      "model_forward_time": 0.11435103416442871,
      "step": 47175
    },
    {
      "epoch": 0.000287933349609375,
      "step": 47175,
      "training_step_time": 0.4400341510772705
    },
    {
      "epoch": 0.000287939453125,
      "model_forward_time": 0.1140284538269043,
      "step": 47176
    },
    {
      "epoch": 0.000287939453125,
      "step": 47176,
      "training_step_time": 0.4955637454986572
    },
    {
      "epoch": 0.000287945556640625,
      "model_forward_time": 0.11485481262207031,
      "step": 47177
    },
    {
      "epoch": 0.000287945556640625,
      "step": 47177,
      "training_step_time": 0.6254925727844238
    },
    {
      "epoch": 0.00028795166015625,
      "model_forward_time": 0.11441206932067871,
      "step": 47178
    },
    {
      "epoch": 0.00028795166015625,
      "step": 47178,
      "training_step_time": 0.39250946044921875
    },
    {
      "epoch": 0.000287957763671875,
      "model_forward_time": 0.11469435691833496,
      "step": 47179
    },
    {
      "epoch": 0.000287957763671875,
      "step": 47179,
      "training_step_time": 0.39279913902282715
    },
    {
      "epoch": 0.0002879638671875,
      "grad_norm": 0.10657720267772675,
      "learning_rate": 1.1970754474256563e-05,
      "loss": 0.0356,
      "step": 47180
    },
    {
      "epoch": 0.0002879638671875,
      "model_forward_time": 0.1144256591796875,
      "step": 47180
    },
    {
      "epoch": 0.0002879638671875,
      "step": 47180,
      "training_step_time": 0.38495755195617676
    },
    {
      "epoch": 0.000287969970703125,
      "model_forward_time": 0.11505317687988281,
      "step": 47181
    },
    {
      "epoch": 0.000287969970703125,
      "step": 47181,
      "training_step_time": 0.3831367492675781
    },
    {
      "epoch": 0.00028797607421875,
      "model_forward_time": 0.11499738693237305,
      "step": 47182
    },
    {
      "epoch": 0.00028797607421875,
      "step": 47182,
      "training_step_time": 0.3877396583557129
    },
    {
      "epoch": 0.000287982177734375,
      "model_forward_time": 0.11515474319458008,
      "step": 47183
    },
    {
      "epoch": 0.000287982177734375,
      "step": 47183,
      "training_step_time": 0.8770906925201416
    },
    {
      "epoch": 0.00028798828125,
      "model_forward_time": 0.1141824722290039,
      "step": 47184
    },
    {
      "epoch": 0.00028798828125,
      "step": 47184,
      "training_step_time": 0.4131777286529541
    },
    {
      "epoch": 0.000287994384765625,
      "model_forward_time": 0.11426734924316406,
      "step": 47185
    },
    {
      "epoch": 0.000287994384765625,
      "step": 47185,
      "training_step_time": 0.3888072967529297
    },
    {
      "epoch": 0.00028800048828125,
      "model_forward_time": 0.11415576934814453,
      "step": 47186
    },
    {
      "epoch": 0.00028800048828125,
      "step": 47186,
      "training_step_time": 0.38456058502197266
    },
    {
      "epoch": 0.000288006591796875,
      "model_forward_time": 0.11435961723327637,
      "step": 47187
    },
    {
      "epoch": 0.000288006591796875,
      "step": 47187,
      "training_step_time": 0.4774899482727051
    },
    {
      "epoch": 0.0002880126953125,
      "model_forward_time": 0.11518549919128418,
      "step": 47188
    },
    {
      "epoch": 0.0002880126953125,
      "step": 47188,
      "training_step_time": 0.4131584167480469
    },
    {
      "epoch": 0.000288018798828125,
      "model_forward_time": 0.11498045921325684,
      "step": 47189
    },
    {
      "epoch": 0.000288018798828125,
      "step": 47189,
      "training_step_time": 0.8570575714111328
    },
    {
      "epoch": 0.00028802490234375,
      "grad_norm": 0.12612739205360413,
      "learning_rate": 1.1952868647662696e-05,
      "loss": 0.0398,
      "step": 47190
    },
    {
      "epoch": 0.00028802490234375,
      "model_forward_time": 0.11415457725524902,
      "step": 47190
    },
    {
      "epoch": 0.00028802490234375,
      "step": 47190,
      "training_step_time": 0.39368271827697754
    },
    {
      "epoch": 0.000288031005859375,
      "model_forward_time": 0.11405754089355469,
      "step": 47191
    },
    {
      "epoch": 0.000288031005859375,
      "step": 47191,
      "training_step_time": 0.4072859287261963
    },
    {
      "epoch": 0.000288037109375,
      "model_forward_time": 0.11497116088867188,
      "step": 47192
    },
    {
      "epoch": 0.000288037109375,
      "step": 47192,
      "training_step_time": 0.3911008834838867
    },
    {
      "epoch": 0.000288043212890625,
      "model_forward_time": 0.11392545700073242,
      "step": 47193
    },
    {
      "epoch": 0.000288043212890625,
      "step": 47193,
      "training_step_time": 0.3885223865509033
    },
    {
      "epoch": 0.00028804931640625,
      "model_forward_time": 0.11406898498535156,
      "step": 47194
    },
    {
      "epoch": 0.00028804931640625,
      "step": 47194,
      "training_step_time": 0.39792442321777344
    },
    {
      "epoch": 0.000288055419921875,
      "model_forward_time": 0.1157228946685791,
      "step": 47195
    },
    {
      "epoch": 0.000288055419921875,
      "step": 47195,
      "training_step_time": 1.0502240657806396
    },
    {
      "epoch": 0.0002880615234375,
      "model_forward_time": 0.1152496337890625,
      "step": 47196
    },
    {
      "epoch": 0.0002880615234375,
      "step": 47196,
      "training_step_time": 0.4395596981048584
    },
    {
      "epoch": 0.000288067626953125,
      "model_forward_time": 0.11511683464050293,
      "step": 47197
    },
    {
      "epoch": 0.000288067626953125,
      "step": 47197,
      "training_step_time": 0.38742971420288086
    },
    {
      "epoch": 0.00028807373046875,
      "model_forward_time": 0.11367988586425781,
      "step": 47198
    },
    {
      "epoch": 0.00028807373046875,
      "step": 47198,
      "training_step_time": 0.39290738105773926
    },
    {
      "epoch": 0.000288079833984375,
      "model_forward_time": 0.11426258087158203,
      "step": 47199
    },
    {
      "epoch": 0.000288079833984375,
      "step": 47199,
      "training_step_time": 0.4169886112213135
    },
    {
      "epoch": 0.0002880859375,
      "grad_norm": 0.12808924913406372,
      "learning_rate": 1.1934994378782772e-05,
      "loss": 0.0354,
      "step": 47200
    },
    {
      "epoch": 0.0002880859375,
      "model_forward_time": 0.1147615909576416,
      "step": 47200
    },
    {
      "epoch": 0.0002880859375,
      "step": 47200,
      "training_step_time": 0.41342711448669434
    },
    {
      "epoch": 0.000288092041015625,
      "model_forward_time": 0.11483097076416016,
      "step": 47201
    },
    {
      "epoch": 0.000288092041015625,
      "step": 47201,
      "training_step_time": 0.7479219436645508
    },
    {
      "epoch": 0.00028809814453125,
      "model_forward_time": 0.11411762237548828,
      "step": 47202
    },
    {
      "epoch": 0.00028809814453125,
      "step": 47202,
      "training_step_time": 0.39481139183044434
    },
    {
      "epoch": 0.000288104248046875,
      "model_forward_time": 0.11459755897521973,
      "step": 47203
    },
    {
      "epoch": 0.000288104248046875,
      "step": 47203,
      "training_step_time": 0.3931162357330322
    },
    {
      "epoch": 0.0002881103515625,
      "model_forward_time": 0.11438226699829102,
      "step": 47204
    },
    {
      "epoch": 0.0002881103515625,
      "step": 47204,
      "training_step_time": 0.37520909309387207
    },
    {
      "epoch": 0.000288116455078125,
      "model_forward_time": 0.11463260650634766,
      "step": 47205
    },
    {
      "epoch": 0.000288116455078125,
      "step": 47205,
      "training_step_time": 0.3750333786010742
    },
    {
      "epoch": 0.00028812255859375,
      "model_forward_time": 0.11432838439941406,
      "step": 47206
    },
    {
      "epoch": 0.00028812255859375,
      "step": 47206,
      "training_step_time": 0.37738966941833496
    },
    {
      "epoch": 0.000288128662109375,
      "model_forward_time": 0.11538982391357422,
      "step": 47207
    },
    {
      "epoch": 0.000288128662109375,
      "step": 47207,
      "training_step_time": 1.0723052024841309
    },
    {
      "epoch": 0.000288134765625,
      "model_forward_time": 0.11437129974365234,
      "step": 47208
    },
    {
      "epoch": 0.000288134765625,
      "step": 47208,
      "training_step_time": 0.45627641677856445
    },
    {
      "epoch": 0.000288140869140625,
      "model_forward_time": 0.11426281929016113,
      "step": 47209
    },
    {
      "epoch": 0.000288140869140625,
      "step": 47209,
      "training_step_time": 0.44062256813049316
    },
    {
      "epoch": 0.00028814697265625,
      "grad_norm": 0.09425389021635056,
      "learning_rate": 1.1917131673046506e-05,
      "loss": 0.0365,
      "step": 47210
    },
    {
      "epoch": 0.00028814697265625,
      "model_forward_time": 0.11351513862609863,
      "step": 47210
    },
    {
      "epoch": 0.00028814697265625,
      "step": 47210,
      "training_step_time": 0.3901398181915283
    },
    {
      "epoch": 0.000288153076171875,
      "model_forward_time": 0.1131441593170166,
      "step": 47211
    },
    {
      "epoch": 0.000288153076171875,
      "step": 47211,
      "training_step_time": 0.38059425354003906
    },
    {
      "epoch": 0.0002881591796875,
      "model_forward_time": 0.1141810417175293,
      "step": 47212
    },
    {
      "epoch": 0.0002881591796875,
      "step": 47212,
      "training_step_time": 0.3994605541229248
    },
    {
      "epoch": 0.000288165283203125,
      "model_forward_time": 0.11481118202209473,
      "step": 47213
    },
    {
      "epoch": 0.000288165283203125,
      "step": 47213,
      "training_step_time": 0.3959810733795166
    },
    {
      "epoch": 0.00028817138671875,
      "model_forward_time": 0.11474132537841797,
      "step": 47214
    },
    {
      "epoch": 0.00028817138671875,
      "step": 47214,
      "training_step_time": 0.49747300148010254
    },
    {
      "epoch": 0.000288177490234375,
      "model_forward_time": 0.11481213569641113,
      "step": 47215
    },
    {
      "epoch": 0.000288177490234375,
      "step": 47215,
      "training_step_time": 0.4149491786956787
    },
    {
      "epoch": 0.00028818359375,
      "model_forward_time": 0.11632800102233887,
      "step": 47216
    },
    {
      "epoch": 0.00028818359375,
      "step": 47216,
      "training_step_time": 0.4154012203216553
    },
    {
      "epoch": 0.000288189697265625,
      "model_forward_time": 0.11497354507446289,
      "step": 47217
    },
    {
      "epoch": 0.000288189697265625,
      "step": 47217,
      "training_step_time": 0.39060211181640625
    },
    {
      "epoch": 0.00028819580078125,
      "model_forward_time": 0.11536860466003418,
      "step": 47218
    },
    {
      "epoch": 0.00028819580078125,
      "step": 47218,
      "training_step_time": 0.37889933586120605
    },
    {
      "epoch": 0.000288201904296875,
      "model_forward_time": 0.11466431617736816,
      "step": 47219
    },
    {
      "epoch": 0.000288201904296875,
      "step": 47219,
      "training_step_time": 0.7801182270050049
    },
    {
      "epoch": 0.0002882080078125,
      "grad_norm": 0.09041054546833038,
      "learning_rate": 1.1899280535880119e-05,
      "loss": 0.0351,
      "step": 47220
    },
    {
      "epoch": 0.0002882080078125,
      "model_forward_time": 0.11450362205505371,
      "step": 47220
    },
    {
      "epoch": 0.0002882080078125,
      "step": 47220,
      "training_step_time": 0.3928334712982178
    },
    {
      "epoch": 0.000288214111328125,
      "model_forward_time": 0.11419034004211426,
      "step": 47221
    },
    {
      "epoch": 0.000288214111328125,
      "step": 47221,
      "training_step_time": 0.4317135810852051
    },
    {
      "epoch": 0.00028822021484375,
      "model_forward_time": 0.1143796443939209,
      "step": 47222
    },
    {
      "epoch": 0.00028822021484375,
      "step": 47222,
      "training_step_time": 0.42569899559020996
    },
    {
      "epoch": 0.000288226318359375,
      "model_forward_time": 0.11442828178405762,
      "step": 47223
    },
    {
      "epoch": 0.000288226318359375,
      "step": 47223,
      "training_step_time": 0.38317155838012695
    },
    {
      "epoch": 0.000288232421875,
      "model_forward_time": 0.11441326141357422,
      "step": 47224
    },
    {
      "epoch": 0.000288232421875,
      "step": 47224,
      "training_step_time": 0.3830432891845703
    },
    {
      "epoch": 0.000288238525390625,
      "model_forward_time": 0.11469864845275879,
      "step": 47225
    },
    {
      "epoch": 0.000288238525390625,
      "step": 47225,
      "training_step_time": 0.6054532527923584
    },
    {
      "epoch": 0.00028824462890625,
      "model_forward_time": 0.11431431770324707,
      "step": 47226
    },
    {
      "epoch": 0.00028824462890625,
      "step": 47226,
      "training_step_time": 0.4472370147705078
    },
    {
      "epoch": 0.000288250732421875,
      "model_forward_time": 0.11425900459289551,
      "step": 47227
    },
    {
      "epoch": 0.000288250732421875,
      "step": 47227,
      "training_step_time": 0.524587869644165
    },
    {
      "epoch": 0.0002882568359375,
      "model_forward_time": 0.11492252349853516,
      "step": 47228
    },
    {
      "epoch": 0.0002882568359375,
      "step": 47228,
      "training_step_time": 0.39800381660461426
    },
    {
      "epoch": 0.000288262939453125,
      "model_forward_time": 0.11486124992370605,
      "step": 47229
    },
    {
      "epoch": 0.000288262939453125,
      "step": 47229,
      "training_step_time": 0.48979902267456055
    },
    {
      "epoch": 0.00028826904296875,
      "grad_norm": 0.11246204376220703,
      "learning_rate": 1.1881440972706315e-05,
      "loss": 0.0418,
      "step": 47230
    },
    {
      "epoch": 0.00028826904296875,
      "model_forward_time": 0.11533546447753906,
      "step": 47230
    },
    {
      "epoch": 0.00028826904296875,
      "step": 47230,
      "training_step_time": 0.40791893005371094
    },
    {
      "epoch": 0.000288275146484375,
      "model_forward_time": 0.11551403999328613,
      "step": 47231
    },
    {
      "epoch": 0.000288275146484375,
      "step": 47231,
      "training_step_time": 0.37363171577453613
    },
    {
      "epoch": 0.00028828125,
      "model_forward_time": 0.11543965339660645,
      "step": 47232
    },
    {
      "epoch": 0.00028828125,
      "step": 47232,
      "training_step_time": 0.3857250213623047
    },
    {
      "epoch": 0.000288287353515625,
      "model_forward_time": 0.11511969566345215,
      "step": 47233
    },
    {
      "epoch": 0.000288287353515625,
      "step": 47233,
      "training_step_time": 0.38622426986694336
    },
    {
      "epoch": 0.00028829345703125,
      "model_forward_time": 0.11523032188415527,
      "step": 47234
    },
    {
      "epoch": 0.00028829345703125,
      "step": 47234,
      "training_step_time": 0.386991024017334
    },
    {
      "epoch": 0.000288299560546875,
      "model_forward_time": 0.1165761947631836,
      "step": 47235
    },
    {
      "epoch": 0.000288299560546875,
      "step": 47235,
      "training_step_time": 0.3795032501220703
    },
    {
      "epoch": 0.0002883056640625,
      "model_forward_time": 0.1154017448425293,
      "step": 47236
    },
    {
      "epoch": 0.0002883056640625,
      "step": 47236,
      "training_step_time": 0.39736104011535645
    },
    {
      "epoch": 0.000288311767578125,
      "model_forward_time": 0.11475896835327148,
      "step": 47237
    },
    {
      "epoch": 0.000288311767578125,
      "step": 47237,
      "training_step_time": 0.5576822757720947
    },
    {
      "epoch": 0.00028831787109375,
      "model_forward_time": 0.11485934257507324,
      "step": 47238
    },
    {
      "epoch": 0.00028831787109375,
      "step": 47238,
      "training_step_time": 0.4031987190246582
    },
    {
      "epoch": 0.000288323974609375,
      "model_forward_time": 0.11502623558044434,
      "step": 47239
    },
    {
      "epoch": 0.000288323974609375,
      "step": 47239,
      "training_step_time": 0.39471936225891113
    },
    {
      "epoch": 0.000288330078125,
      "grad_norm": 0.09725400060415268,
      "learning_rate": 1.1863612988944267e-05,
      "loss": 0.0347,
      "step": 47240
    },
    {
      "epoch": 0.000288330078125,
      "model_forward_time": 0.11568307876586914,
      "step": 47240
    },
    {
      "epoch": 0.000288330078125,
      "step": 47240,
      "training_step_time": 0.41844654083251953
    },
    {
      "epoch": 0.000288336181640625,
      "model_forward_time": 0.1150672435760498,
      "step": 47241
    },
    {
      "epoch": 0.000288336181640625,
      "step": 47241,
      "training_step_time": 0.39307570457458496
    },
    {
      "epoch": 0.00028834228515625,
      "model_forward_time": 0.11462688446044922,
      "step": 47242
    },
    {
      "epoch": 0.00028834228515625,
      "step": 47242,
      "training_step_time": 0.366621732711792
    },
    {
      "epoch": 0.000288348388671875,
      "model_forward_time": 0.11486649513244629,
      "step": 47243
    },
    {
      "epoch": 0.000288348388671875,
      "step": 47243,
      "training_step_time": 0.5815548896789551
    },
    {
      "epoch": 0.0002883544921875,
      "model_forward_time": 0.11491274833679199,
      "step": 47244
    },
    {
      "epoch": 0.0002883544921875,
      "step": 47244,
      "training_step_time": 0.4515676498413086
    },
    {
      "epoch": 0.000288360595703125,
      "model_forward_time": 0.11449408531188965,
      "step": 47245
    },
    {
      "epoch": 0.000288360595703125,
      "step": 47245,
      "training_step_time": 0.3845479488372803
    },
    {
      "epoch": 0.00028836669921875,
      "model_forward_time": 0.11535310745239258,
      "step": 47246
    },
    {
      "epoch": 0.00028836669921875,
      "step": 47246,
      "training_step_time": 0.40202856063842773
    },
    {
      "epoch": 0.000288372802734375,
      "model_forward_time": 0.11455893516540527,
      "step": 47247
    },
    {
      "epoch": 0.000288372802734375,
      "step": 47247,
      "training_step_time": 0.41938090324401855
    },
    {
      "epoch": 0.00028837890625,
      "model_forward_time": 0.11450791358947754,
      "step": 47248
    },
    {
      "epoch": 0.00028837890625,
      "step": 47248,
      "training_step_time": 0.41330766677856445
    },
    {
      "epoch": 0.000288385009765625,
      "model_forward_time": 0.11508965492248535,
      "step": 47249
    },
    {
      "epoch": 0.000288385009765625,
      "step": 47249,
      "training_step_time": 0.47088623046875
    },
    {
      "epoch": 0.00028839111328125,
      "grad_norm": 0.12636615335941315,
      "learning_rate": 1.1845796590009683e-05,
      "loss": 0.0372,
      "step": 47250
    },
    {
      "epoch": 0.00028839111328125,
      "model_forward_time": 0.11481690406799316,
      "step": 47250
    },
    {
      "epoch": 0.00028839111328125,
      "step": 47250,
      "training_step_time": 0.4407966136932373
    },
    {
      "epoch": 0.000288397216796875,
      "model_forward_time": 0.11500215530395508,
      "step": 47251
    },
    {
      "epoch": 0.000288397216796875,
      "step": 47251,
      "training_step_time": 0.39560508728027344
    },
    {
      "epoch": 0.0002884033203125,
      "model_forward_time": 0.11596536636352539,
      "step": 47252
    },
    {
      "epoch": 0.0002884033203125,
      "step": 47252,
      "training_step_time": 0.3914756774902344
    },
    {
      "epoch": 0.000288409423828125,
      "model_forward_time": 0.11494803428649902,
      "step": 47253
    },
    {
      "epoch": 0.000288409423828125,
      "step": 47253,
      "training_step_time": 0.39342188835144043
    },
    {
      "epoch": 0.00028841552734375,
      "model_forward_time": 0.1154778003692627,
      "step": 47254
    },
    {
      "epoch": 0.00028841552734375,
      "step": 47254,
      "training_step_time": 0.3984801769256592
    },
    {
      "epoch": 0.000288421630859375,
      "model_forward_time": 0.11465954780578613,
      "step": 47255
    },
    {
      "epoch": 0.000288421630859375,
      "step": 47255,
      "training_step_time": 0.4030015468597412
    },
    {
      "epoch": 0.000288427734375,
      "model_forward_time": 0.11641502380371094,
      "step": 47256
    },
    {
      "epoch": 0.000288427734375,
      "step": 47256,
      "training_step_time": 0.4000816345214844
    },
    {
      "epoch": 0.000288433837890625,
      "model_forward_time": 0.11497879028320312,
      "step": 47257
    },
    {
      "epoch": 0.000288433837890625,
      "step": 47257,
      "training_step_time": 0.43656110763549805
    },
    {
      "epoch": 0.00028843994140625,
      "model_forward_time": 0.11503791809082031,
      "step": 47258
    },
    {
      "epoch": 0.00028843994140625,
      "step": 47258,
      "training_step_time": 0.4369330406188965
    },
    {
      "epoch": 0.000288446044921875,
      "model_forward_time": 0.11519336700439453,
      "step": 47259
    },
    {
      "epoch": 0.000288446044921875,
      "step": 47259,
      "training_step_time": 0.4244508743286133
    },
    {
      "epoch": 0.0002884521484375,
      "grad_norm": 0.13539491593837738,
      "learning_rate": 1.1827991781314667e-05,
      "loss": 0.0339,
      "step": 47260
    },
    {
      "epoch": 0.0002884521484375,
      "model_forward_time": 0.11489343643188477,
      "step": 47260
    },
    {
      "epoch": 0.0002884521484375,
      "step": 47260,
      "training_step_time": 0.46808767318725586
    },
    {
      "epoch": 0.000288458251953125,
      "model_forward_time": 0.11464810371398926,
      "step": 47261
    },
    {
      "epoch": 0.000288458251953125,
      "step": 47261,
      "training_step_time": 0.42444467544555664
    },
    {
      "epoch": 0.00028846435546875,
      "model_forward_time": 0.11517667770385742,
      "step": 47262
    },
    {
      "epoch": 0.00028846435546875,
      "step": 47262,
      "training_step_time": 0.44997215270996094
    },
    {
      "epoch": 0.000288470458984375,
      "model_forward_time": 0.11479616165161133,
      "step": 47263
    },
    {
      "epoch": 0.000288470458984375,
      "step": 47263,
      "training_step_time": 0.38791751861572266
    },
    {
      "epoch": 0.0002884765625,
      "model_forward_time": 0.11429357528686523,
      "step": 47264
    },
    {
      "epoch": 0.0002884765625,
      "step": 47264,
      "training_step_time": 0.4382295608520508
    },
    {
      "epoch": 0.000288482666015625,
      "model_forward_time": 0.11511754989624023,
      "step": 47265
    },
    {
      "epoch": 0.000288482666015625,
      "step": 47265,
      "training_step_time": 0.3856387138366699
    },
    {
      "epoch": 0.00028848876953125,
      "model_forward_time": 0.1146996021270752,
      "step": 47266
    },
    {
      "epoch": 0.00028848876953125,
      "step": 47266,
      "training_step_time": 0.39969658851623535
    },
    {
      "epoch": 0.000288494873046875,
      "model_forward_time": 0.11466073989868164,
      "step": 47267
    },
    {
      "epoch": 0.000288494873046875,
      "step": 47267,
      "training_step_time": 0.8141450881958008
    },
    {
      "epoch": 0.0002885009765625,
      "model_forward_time": 0.11429810523986816,
      "step": 47268
    },
    {
      "epoch": 0.0002885009765625,
      "step": 47268,
      "training_step_time": 0.3909883499145508
    },
    {
      "epoch": 0.000288507080078125,
      "model_forward_time": 0.11478519439697266,
      "step": 47269
    },
    {
      "epoch": 0.000288507080078125,
      "step": 47269,
      "training_step_time": 0.4360072612762451
    },
    {
      "epoch": 0.00028851318359375,
      "grad_norm": 0.08960358798503876,
      "learning_rate": 1.1810198568267905e-05,
      "loss": 0.0328,
      "step": 47270
    },
    {
      "epoch": 0.00028851318359375,
      "model_forward_time": 0.11492013931274414,
      "step": 47270
    },
    {
      "epoch": 0.00028851318359375,
      "step": 47270,
      "training_step_time": 0.45795202255249023
    },
    {
      "epoch": 0.000288519287109375,
      "model_forward_time": 0.11419486999511719,
      "step": 47271
    },
    {
      "epoch": 0.000288519287109375,
      "step": 47271,
      "training_step_time": 0.46193456649780273
    },
    {
      "epoch": 0.000288525390625,
      "model_forward_time": 0.11461758613586426,
      "step": 47272
    },
    {
      "epoch": 0.000288525390625,
      "step": 47272,
      "training_step_time": 0.3982675075531006
    },
    {
      "epoch": 0.000288531494140625,
      "model_forward_time": 0.11504125595092773,
      "step": 47273
    },
    {
      "epoch": 0.000288531494140625,
      "step": 47273,
      "training_step_time": 0.4774210453033447
    },
    {
      "epoch": 0.00028853759765625,
      "model_forward_time": 0.11577939987182617,
      "step": 47274
    },
    {
      "epoch": 0.00028853759765625,
      "step": 47274,
      "training_step_time": 0.4535665512084961
    },
    {
      "epoch": 0.000288543701171875,
      "model_forward_time": 0.11614537239074707,
      "step": 47275
    },
    {
      "epoch": 0.000288543701171875,
      "step": 47275,
      "training_step_time": 0.39063262939453125
    },
    {
      "epoch": 0.0002885498046875,
      "model_forward_time": 0.1151580810546875,
      "step": 47276
    },
    {
      "epoch": 0.0002885498046875,
      "step": 47276,
      "training_step_time": 0.3888876438140869
    },
    {
      "epoch": 0.000288555908203125,
      "model_forward_time": 0.11459660530090332,
      "step": 47277
    },
    {
      "epoch": 0.000288555908203125,
      "step": 47277,
      "training_step_time": 0.47404980659484863
    },
    {
      "epoch": 0.00028856201171875,
      "model_forward_time": 0.11508989334106445,
      "step": 47278
    },
    {
      "epoch": 0.00028856201171875,
      "step": 47278,
      "training_step_time": 0.3978421688079834
    },
    {
      "epoch": 0.000288568115234375,
      "model_forward_time": 0.1147308349609375,
      "step": 47279
    },
    {
      "epoch": 0.000288568115234375,
      "step": 47279,
      "training_step_time": 0.8333890438079834
    },
    {
      "epoch": 0.00028857421875,
      "grad_norm": 0.10872157663106918,
      "learning_rate": 1.1792416956274444e-05,
      "loss": 0.0369,
      "step": 47280
    },
    {
      "epoch": 0.00028857421875,
      "model_forward_time": 0.11451339721679688,
      "step": 47280
    },
    {
      "epoch": 0.00028857421875,
      "step": 47280,
      "training_step_time": 0.39769935607910156
    },
    {
      "epoch": 0.000288580322265625,
      "model_forward_time": 0.11430788040161133,
      "step": 47281
    },
    {
      "epoch": 0.000288580322265625,
      "step": 47281,
      "training_step_time": 0.3922007083892822
    },
    {
      "epoch": 0.00028858642578125,
      "model_forward_time": 0.11446332931518555,
      "step": 47282
    },
    {
      "epoch": 0.00028858642578125,
      "step": 47282,
      "training_step_time": 0.49449634552001953
    },
    {
      "epoch": 0.000288592529296875,
      "model_forward_time": 0.11417579650878906,
      "step": 47283
    },
    {
      "epoch": 0.000288592529296875,
      "step": 47283,
      "training_step_time": 0.4179842472076416
    },
    {
      "epoch": 0.0002885986328125,
      "model_forward_time": 0.11465311050415039,
      "step": 47284
    },
    {
      "epoch": 0.0002885986328125,
      "step": 47284,
      "training_step_time": 0.45891809463500977
    },
    {
      "epoch": 0.000288604736328125,
      "model_forward_time": 0.11502265930175781,
      "step": 47285
    },
    {
      "epoch": 0.000288604736328125,
      "step": 47285,
      "training_step_time": 0.5585842132568359
    },
    {
      "epoch": 0.00028861083984375,
      "model_forward_time": 0.11468291282653809,
      "step": 47286
    },
    {
      "epoch": 0.00028861083984375,
      "step": 47286,
      "training_step_time": 0.442028284072876
    },
    {
      "epoch": 0.000288616943359375,
      "model_forward_time": 0.11456036567687988,
      "step": 47287
    },
    {
      "epoch": 0.000288616943359375,
      "step": 47287,
      "training_step_time": 0.39564061164855957
    },
    {
      "epoch": 0.000288623046875,
      "model_forward_time": 0.1150214672088623,
      "step": 47288
    },
    {
      "epoch": 0.000288623046875,
      "step": 47288,
      "training_step_time": 0.39960622787475586
    },
    {
      "epoch": 0.000288629150390625,
      "model_forward_time": 0.11564397811889648,
      "step": 47289
    },
    {
      "epoch": 0.000288629150390625,
      "step": 47289,
      "training_step_time": 0.4420337677001953
    },
    {
      "epoch": 0.00028863525390625,
      "grad_norm": 0.1418095827102661,
      "learning_rate": 1.1774646950735913e-05,
      "loss": 0.0382,
      "step": 47290
    },
    {
      "epoch": 0.00028863525390625,
      "model_forward_time": 0.11460542678833008,
      "step": 47290
    },
    {
      "epoch": 0.00028863525390625,
      "step": 47290,
      "training_step_time": 0.4780709743499756
    },
    {
      "epoch": 0.000288641357421875,
      "model_forward_time": 0.11478495597839355,
      "step": 47291
    },
    {
      "epoch": 0.000288641357421875,
      "step": 47291,
      "training_step_time": 1.1710691452026367
    },
    {
      "epoch": 0.0002886474609375,
      "model_forward_time": 0.11398005485534668,
      "step": 47292
    },
    {
      "epoch": 0.0002886474609375,
      "step": 47292,
      "training_step_time": 0.3927936553955078
    },
    {
      "epoch": 0.000288653564453125,
      "model_forward_time": 0.11428260803222656,
      "step": 47293
    },
    {
      "epoch": 0.000288653564453125,
      "step": 47293,
      "training_step_time": 0.3857910633087158
    },
    {
      "epoch": 0.00028865966796875,
      "model_forward_time": 0.11381292343139648,
      "step": 47294
    },
    {
      "epoch": 0.00028865966796875,
      "step": 47294,
      "training_step_time": 0.3823435306549072
    },
    {
      "epoch": 0.000288665771484375,
      "model_forward_time": 0.1145174503326416,
      "step": 47295
    },
    {
      "epoch": 0.000288665771484375,
      "step": 47295,
      "training_step_time": 0.42124295234680176
    },
    {
      "epoch": 0.000288671875,
      "model_forward_time": 0.11474442481994629,
      "step": 47296
    },
    {
      "epoch": 0.000288671875,
      "step": 47296,
      "training_step_time": 0.4034440517425537
    },
    {
      "epoch": 0.000288677978515625,
      "model_forward_time": 0.11701703071594238,
      "step": 47297
    },
    {
      "epoch": 0.000288677978515625,
      "step": 47297,
      "training_step_time": 0.6991543769836426
    },
    {
      "epoch": 0.00028868408203125,
      "model_forward_time": 0.11455273628234863,
      "step": 47298
    },
    {
      "epoch": 0.00028868408203125,
      "step": 47298,
      "training_step_time": 0.4062356948852539
    },
    {
      "epoch": 0.000288690185546875,
      "model_forward_time": 0.11473608016967773,
      "step": 47299
    },
    {
      "epoch": 0.000288690185546875,
      "step": 47299,
      "training_step_time": 0.3882575035095215
    },
    {
      "epoch": 0.0002886962890625,
      "grad_norm": 0.14064043760299683,
      "learning_rate": 1.1756888557050355e-05,
      "loss": 0.04,
      "step": 47300
    },
    {
      "epoch": 0.0002886962890625,
      "model_forward_time": 0.11450409889221191,
      "step": 47300
    },
    {
      "epoch": 0.0002886962890625,
      "step": 47300,
      "training_step_time": 0.38265061378479004
    },
    {
      "epoch": 0.000288702392578125,
      "model_forward_time": 0.11491894721984863,
      "step": 47301
    },
    {
      "epoch": 0.000288702392578125,
      "step": 47301,
      "training_step_time": 0.3915431499481201
    },
    {
      "epoch": 0.00028870849609375,
      "model_forward_time": 0.11465001106262207,
      "step": 47302
    },
    {
      "epoch": 0.00028870849609375,
      "step": 47302,
      "training_step_time": 0.43727755546569824
    },
    {
      "epoch": 0.000288714599609375,
      "model_forward_time": 0.1151268482208252,
      "step": 47303
    },
    {
      "epoch": 0.000288714599609375,
      "step": 47303,
      "training_step_time": 0.7496731281280518
    },
    {
      "epoch": 0.000288720703125,
      "model_forward_time": 0.11427712440490723,
      "step": 47304
    },
    {
      "epoch": 0.000288720703125,
      "step": 47304,
      "training_step_time": 0.38933682441711426
    },
    {
      "epoch": 0.000288726806640625,
      "model_forward_time": 0.11465048789978027,
      "step": 47305
    },
    {
      "epoch": 0.000288726806640625,
      "step": 47305,
      "training_step_time": 0.39079999923706055
    },
    {
      "epoch": 0.00028873291015625,
      "model_forward_time": 0.11420941352844238,
      "step": 47306
    },
    {
      "epoch": 0.00028873291015625,
      "step": 47306,
      "training_step_time": 0.38741278648376465
    },
    {
      "epoch": 0.000288739013671875,
      "model_forward_time": 0.11481213569641113,
      "step": 47307
    },
    {
      "epoch": 0.000288739013671875,
      "step": 47307,
      "training_step_time": 0.3890531063079834
    },
    {
      "epoch": 0.0002887451171875,
      "model_forward_time": 0.11481833457946777,
      "step": 47308
    },
    {
      "epoch": 0.0002887451171875,
      "step": 47308,
      "training_step_time": 0.43455004692077637
    },
    {
      "epoch": 0.000288751220703125,
      "model_forward_time": 0.11562871932983398,
      "step": 47309
    },
    {
      "epoch": 0.000288751220703125,
      "step": 47309,
      "training_step_time": 0.9434185028076172
    },
    {
      "epoch": 0.00028875732421875,
      "grad_norm": 0.1010897308588028,
      "learning_rate": 1.1739141780612306e-05,
      "loss": 0.0398,
      "step": 47310
    },
    {
      "epoch": 0.00028875732421875,
      "model_forward_time": 0.11407685279846191,
      "step": 47310
    },
    {
      "epoch": 0.00028875732421875,
      "step": 47310,
      "training_step_time": 0.4665982723236084
    },
    {
      "epoch": 0.000288763427734375,
      "model_forward_time": 0.1138918399810791,
      "step": 47311
    },
    {
      "epoch": 0.000288763427734375,
      "step": 47311,
      "training_step_time": 0.392528772354126
    },
    {
      "epoch": 0.00028876953125,
      "model_forward_time": 0.11421656608581543,
      "step": 47312
    },
    {
      "epoch": 0.00028876953125,
      "step": 47312,
      "training_step_time": 0.38202929496765137
    },
    {
      "epoch": 0.000288775634765625,
      "model_forward_time": 0.11407113075256348,
      "step": 47313
    },
    {
      "epoch": 0.000288775634765625,
      "step": 47313,
      "training_step_time": 0.37758588790893555
    },
    {
      "epoch": 0.00028878173828125,
      "model_forward_time": 0.1145484447479248,
      "step": 47314
    },
    {
      "epoch": 0.00028878173828125,
      "step": 47314,
      "training_step_time": 0.41712403297424316
    },
    {
      "epoch": 0.000288787841796875,
      "model_forward_time": 0.11498570442199707,
      "step": 47315
    },
    {
      "epoch": 0.000288787841796875,
      "step": 47315,
      "training_step_time": 1.0907204151153564
    },
    {
      "epoch": 0.0002887939453125,
      "model_forward_time": 0.1140890121459961,
      "step": 47316
    },
    {
      "epoch": 0.0002887939453125,
      "step": 47316,
      "training_step_time": 0.39224863052368164
    },
    {
      "epoch": 0.000288800048828125,
      "model_forward_time": 0.1140906810760498,
      "step": 47317
    },
    {
      "epoch": 0.000288800048828125,
      "step": 47317,
      "training_step_time": 0.39095520973205566
    },
    {
      "epoch": 0.00028880615234375,
      "model_forward_time": 0.11438703536987305,
      "step": 47318
    },
    {
      "epoch": 0.00028880615234375,
      "step": 47318,
      "training_step_time": 0.38147759437561035
    },
    {
      "epoch": 0.000288812255859375,
      "model_forward_time": 0.11451148986816406,
      "step": 47319
    },
    {
      "epoch": 0.000288812255859375,
      "step": 47319,
      "training_step_time": 0.37755703926086426
    },
    {
      "epoch": 0.000288818359375,
      "grad_norm": 0.1101091280579567,
      "learning_rate": 1.1721406626812764e-05,
      "loss": 0.0424,
      "step": 47320
    },
    {
      "epoch": 0.000288818359375,
      "model_forward_time": 0.11466598510742188,
      "step": 47320
    },
    {
      "epoch": 0.000288818359375,
      "step": 47320,
      "training_step_time": 0.3706188201904297
    },
    {
      "epoch": 0.000288824462890625,
      "model_forward_time": 0.11509847640991211,
      "step": 47321
    },
    {
      "epoch": 0.000288824462890625,
      "step": 47321,
      "training_step_time": 1.029738187789917
    },
    {
      "epoch": 0.00028883056640625,
      "model_forward_time": 0.1141667366027832,
      "step": 47322
    },
    {
      "epoch": 0.00028883056640625,
      "step": 47322,
      "training_step_time": 0.39176177978515625
    },
    {
      "epoch": 0.000288836669921875,
      "model_forward_time": 0.11425971984863281,
      "step": 47323
    },
    {
      "epoch": 0.000288836669921875,
      "step": 47323,
      "training_step_time": 0.4610273838043213
    },
    {
      "epoch": 0.0002888427734375,
      "model_forward_time": 0.11492443084716797,
      "step": 47324
    },
    {
      "epoch": 0.0002888427734375,
      "step": 47324,
      "training_step_time": 0.43909406661987305
    },
    {
      "epoch": 0.000288848876953125,
      "model_forward_time": 0.11426877975463867,
      "step": 47325
    },
    {
      "epoch": 0.000288848876953125,
      "step": 47325,
      "training_step_time": 0.40067195892333984
    },
    {
      "epoch": 0.00028885498046875,
      "model_forward_time": 0.1150519847869873,
      "step": 47326
    },
    {
      "epoch": 0.00028885498046875,
      "step": 47326,
      "training_step_time": 0.3990933895111084
    },
    {
      "epoch": 0.000288861083984375,
      "model_forward_time": 0.11472392082214355,
      "step": 47327
    },
    {
      "epoch": 0.000288861083984375,
      "step": 47327,
      "training_step_time": 0.6061708927154541
    },
    {
      "epoch": 0.0002888671875,
      "model_forward_time": 0.1159217357635498,
      "step": 47328
    },
    {
      "epoch": 0.0002888671875,
      "step": 47328,
      "training_step_time": 0.3906984329223633
    },
    {
      "epoch": 0.000288873291015625,
      "model_forward_time": 0.11478304862976074,
      "step": 47329
    },
    {
      "epoch": 0.000288873291015625,
      "step": 47329,
      "training_step_time": 0.39166975021362305
    },
    {
      "epoch": 0.00028887939453125,
      "grad_norm": 0.12528444826602936,
      "learning_rate": 1.1703683101039197e-05,
      "loss": 0.0379,
      "step": 47330
    },
    {
      "epoch": 0.00028887939453125,
      "model_forward_time": 0.11454033851623535,
      "step": 47330
    },
    {
      "epoch": 0.00028887939453125,
      "step": 47330,
      "training_step_time": 0.39150333404541016
    },
    {
      "epoch": 0.000288885498046875,
      "model_forward_time": 0.11473321914672852,
      "step": 47331
    },
    {
      "epoch": 0.000288885498046875,
      "step": 47331,
      "training_step_time": 0.3828587532043457
    },
    {
      "epoch": 0.0002888916015625,
      "model_forward_time": 0.11484718322753906,
      "step": 47332
    },
    {
      "epoch": 0.0002888916015625,
      "step": 47332,
      "training_step_time": 0.38640689849853516
    },
    {
      "epoch": 0.000288897705078125,
      "model_forward_time": 0.11479353904724121,
      "step": 47333
    },
    {
      "epoch": 0.000288897705078125,
      "step": 47333,
      "training_step_time": 0.9847774505615234
    },
    {
      "epoch": 0.00028890380859375,
      "model_forward_time": 0.11488509178161621,
      "step": 47334
    },
    {
      "epoch": 0.00028890380859375,
      "step": 47334,
      "training_step_time": 0.4204854965209961
    },
    {
      "epoch": 0.000288909912109375,
      "model_forward_time": 0.1138002872467041,
      "step": 47335
    },
    {
      "epoch": 0.000288909912109375,
      "step": 47335,
      "training_step_time": 0.44850587844848633
    },
    {
      "epoch": 0.000288916015625,
      "model_forward_time": 0.11437296867370605,
      "step": 47336
    },
    {
      "epoch": 0.000288916015625,
      "step": 47336,
      "training_step_time": 0.45786070823669434
    },
    {
      "epoch": 0.000288922119140625,
      "model_forward_time": 0.11426591873168945,
      "step": 47337
    },
    {
      "epoch": 0.000288922119140625,
      "step": 47337,
      "training_step_time": 0.4929935932159424
    },
    {
      "epoch": 0.00028892822265625,
      "model_forward_time": 0.11434721946716309,
      "step": 47338
    },
    {
      "epoch": 0.00028892822265625,
      "step": 47338,
      "training_step_time": 0.47733497619628906
    },
    {
      "epoch": 0.000288934326171875,
      "model_forward_time": 0.11394786834716797,
      "step": 47339
    },
    {
      "epoch": 0.000288934326171875,
      "step": 47339,
      "training_step_time": 0.400754451751709
    },
    {
      "epoch": 0.0002889404296875,
      "grad_norm": 0.11834180355072021,
      "learning_rate": 1.1685971208675539e-05,
      "loss": 0.0358,
      "step": 47340
    },
    {
      "epoch": 0.0002889404296875,
      "model_forward_time": 0.1141054630279541,
      "step": 47340
    },
    {
      "epoch": 0.0002889404296875,
      "step": 47340,
      "training_step_time": 0.4514603614807129
    },
    {
      "epoch": 0.000288946533203125,
      "model_forward_time": 0.11432361602783203,
      "step": 47341
    },
    {
      "epoch": 0.000288946533203125,
      "step": 47341,
      "training_step_time": 0.39353442192077637
    },
    {
      "epoch": 0.00028895263671875,
      "model_forward_time": 0.11430668830871582,
      "step": 47342
    },
    {
      "epoch": 0.00028895263671875,
      "step": 47342,
      "training_step_time": 0.3873298168182373
    },
    {
      "epoch": 0.000288958740234375,
      "model_forward_time": 0.1149449348449707,
      "step": 47343
    },
    {
      "epoch": 0.000288958740234375,
      "step": 47343,
      "training_step_time": 0.3833615779876709
    },
    {
      "epoch": 0.00028896484375,
      "model_forward_time": 0.11497092247009277,
      "step": 47344
    },
    {
      "epoch": 0.00028896484375,
      "step": 47344,
      "training_step_time": 0.3877713680267334
    },
    {
      "epoch": 0.000288970947265625,
      "model_forward_time": 0.1149129867553711,
      "step": 47345
    },
    {
      "epoch": 0.000288970947265625,
      "step": 47345,
      "training_step_time": 0.48712921142578125
    },
    {
      "epoch": 0.00028897705078125,
      "model_forward_time": 0.11502599716186523,
      "step": 47346
    },
    {
      "epoch": 0.00028897705078125,
      "step": 47346,
      "training_step_time": 0.39949512481689453
    },
    {
      "epoch": 0.000288983154296875,
      "model_forward_time": 0.11559581756591797,
      "step": 47347
    },
    {
      "epoch": 0.000288983154296875,
      "step": 47347,
      "training_step_time": 0.4585740566253662
    },
    {
      "epoch": 0.0002889892578125,
      "model_forward_time": 0.1150369644165039,
      "step": 47348
    },
    {
      "epoch": 0.0002889892578125,
      "step": 47348,
      "training_step_time": 0.44795703887939453
    },
    {
      "epoch": 0.000288995361328125,
      "model_forward_time": 0.11550569534301758,
      "step": 47349
    },
    {
      "epoch": 0.000288995361328125,
      "step": 47349,
      "training_step_time": 0.5070123672485352
    },
    {
      "epoch": 0.00028900146484375,
      "grad_norm": 0.10617433488368988,
      "learning_rate": 1.1668270955102218e-05,
      "loss": 0.0304,
      "step": 47350
    },
    {
      "epoch": 0.00028900146484375,
      "model_forward_time": 0.11495709419250488,
      "step": 47350
    },
    {
      "epoch": 0.00028900146484375,
      "step": 47350,
      "training_step_time": 0.4042549133300781
    },
    {
      "epoch": 0.000289007568359375,
      "model_forward_time": 0.11437034606933594,
      "step": 47351
    },
    {
      "epoch": 0.000289007568359375,
      "step": 47351,
      "training_step_time": 0.45380473136901855
    },
    {
      "epoch": 0.000289013671875,
      "model_forward_time": 0.11489653587341309,
      "step": 47352
    },
    {
      "epoch": 0.000289013671875,
      "step": 47352,
      "training_step_time": 0.46141934394836426
    },
    {
      "epoch": 0.000289019775390625,
      "model_forward_time": 0.11409115791320801,
      "step": 47353
    },
    {
      "epoch": 0.000289019775390625,
      "step": 47353,
      "training_step_time": 0.4862794876098633
    },
    {
      "epoch": 0.00028902587890625,
      "model_forward_time": 0.11465978622436523,
      "step": 47354
    },
    {
      "epoch": 0.00028902587890625,
      "step": 47354,
      "training_step_time": 0.39049720764160156
    },
    {
      "epoch": 0.000289031982421875,
      "model_forward_time": 0.11539888381958008,
      "step": 47355
    },
    {
      "epoch": 0.000289031982421875,
      "step": 47355,
      "training_step_time": 0.3903195858001709
    },
    {
      "epoch": 0.0002890380859375,
      "model_forward_time": 0.11579656600952148,
      "step": 47356
    },
    {
      "epoch": 0.0002890380859375,
      "step": 47356,
      "training_step_time": 0.38753294944763184
    },
    {
      "epoch": 0.000289044189453125,
      "model_forward_time": 0.11490559577941895,
      "step": 47357
    },
    {
      "epoch": 0.000289044189453125,
      "step": 47357,
      "training_step_time": 0.463911771774292
    },
    {
      "epoch": 0.00028905029296875,
      "model_forward_time": 0.11518120765686035,
      "step": 47358
    },
    {
      "epoch": 0.00028905029296875,
      "step": 47358,
      "training_step_time": 0.3980271816253662
    },
    {
      "epoch": 0.000289056396484375,
      "model_forward_time": 0.1153411865234375,
      "step": 47359
    },
    {
      "epoch": 0.000289056396484375,
      "step": 47359,
      "training_step_time": 0.44315648078918457
    },
    {
      "epoch": 0.0002890625,
      "grad_norm": 0.11031647771596909,
      "learning_rate": 1.1650582345696088e-05,
      "loss": 0.0358,
      "step": 47360
    },
    {
      "epoch": 0.0002890625,
      "model_forward_time": 0.11490559577941895,
      "step": 47360
    },
    {
      "epoch": 0.0002890625,
      "step": 47360,
      "training_step_time": 0.4074137210845947
    },
    {
      "epoch": 0.000289068603515625,
      "model_forward_time": 0.1149594783782959,
      "step": 47361
    },
    {
      "epoch": 0.000289068603515625,
      "step": 47361,
      "training_step_time": 0.42949581146240234
    },
    {
      "epoch": 0.00028907470703125,
      "model_forward_time": 0.11507630348205566,
      "step": 47362
    },
    {
      "epoch": 0.00028907470703125,
      "step": 47362,
      "training_step_time": 0.3988792896270752
    },
    {
      "epoch": 0.000289080810546875,
      "model_forward_time": 0.11500787734985352,
      "step": 47363
    },
    {
      "epoch": 0.000289080810546875,
      "step": 47363,
      "training_step_time": 0.4796628952026367
    },
    {
      "epoch": 0.0002890869140625,
      "model_forward_time": 0.11468148231506348,
      "step": 47364
    },
    {
      "epoch": 0.0002890869140625,
      "step": 47364,
      "training_step_time": 0.44580745697021484
    },
    {
      "epoch": 0.000289093017578125,
      "model_forward_time": 0.11527705192565918,
      "step": 47365
    },
    {
      "epoch": 0.000289093017578125,
      "step": 47365,
      "training_step_time": 0.39966583251953125
    },
    {
      "epoch": 0.00028909912109375,
      "model_forward_time": 0.11548304557800293,
      "step": 47366
    },
    {
      "epoch": 0.00028909912109375,
      "step": 47366,
      "training_step_time": 0.4233572483062744
    },
    {
      "epoch": 0.000289105224609375,
      "model_forward_time": 0.11580109596252441,
      "step": 47367
    },
    {
      "epoch": 0.000289105224609375,
      "step": 47367,
      "training_step_time": 0.45685553550720215
    },
    {
      "epoch": 0.000289111328125,
      "model_forward_time": 0.11483979225158691,
      "step": 47368
    },
    {
      "epoch": 0.000289111328125,
      "step": 47368,
      "training_step_time": 0.48133087158203125
    },
    {
      "epoch": 0.000289117431640625,
      "model_forward_time": 0.11517453193664551,
      "step": 47369
    },
    {
      "epoch": 0.000289117431640625,
      "step": 47369,
      "training_step_time": 0.6963179111480713
    },
    {
      "epoch": 0.00028912353515625,
      "grad_norm": 0.10775162279605865,
      "learning_rate": 1.1632905385830484e-05,
      "loss": 0.0329,
      "step": 47370
    },
    {
      "epoch": 0.00028912353515625,
      "model_forward_time": 0.11409306526184082,
      "step": 47370
    },
    {
      "epoch": 0.00028912353515625,
      "step": 47370,
      "training_step_time": 0.39943408966064453
    },
    {
      "epoch": 0.000289129638671875,
      "model_forward_time": 0.1139378547668457,
      "step": 47371
    },
    {
      "epoch": 0.000289129638671875,
      "step": 47371,
      "training_step_time": 0.3898322582244873
    },
    {
      "epoch": 0.0002891357421875,
      "model_forward_time": 0.11401510238647461,
      "step": 47372
    },
    {
      "epoch": 0.0002891357421875,
      "step": 47372,
      "training_step_time": 0.38132762908935547
    },
    {
      "epoch": 0.000289141845703125,
      "model_forward_time": 0.1146700382232666,
      "step": 47373
    },
    {
      "epoch": 0.000289141845703125,
      "step": 47373,
      "training_step_time": 0.38201904296875
    },
    {
      "epoch": 0.00028914794921875,
      "model_forward_time": 0.11446952819824219,
      "step": 47374
    },
    {
      "epoch": 0.00028914794921875,
      "step": 47374,
      "training_step_time": 0.43540525436401367
    },
    {
      "epoch": 0.000289154052734375,
      "model_forward_time": 0.11508560180664062,
      "step": 47375
    },
    {
      "epoch": 0.000289154052734375,
      "step": 47375,
      "training_step_time": 0.39072704315185547
    },
    {
      "epoch": 0.00028916015625,
      "model_forward_time": 0.1151740550994873,
      "step": 47376
    },
    {
      "epoch": 0.00028916015625,
      "step": 47376,
      "training_step_time": 0.4024531841278076
    },
    {
      "epoch": 0.000289166259765625,
      "model_forward_time": 0.11495018005371094,
      "step": 47377
    },
    {
      "epoch": 0.000289166259765625,
      "step": 47377,
      "training_step_time": 0.45202112197875977
    },
    {
      "epoch": 0.00028917236328125,
      "model_forward_time": 0.11603379249572754,
      "step": 47378
    },
    {
      "epoch": 0.00028917236328125,
      "step": 47378,
      "training_step_time": 0.4729440212249756
    },
    {
      "epoch": 0.000289178466796875,
      "model_forward_time": 0.11492466926574707,
      "step": 47379
    },
    {
      "epoch": 0.000289178466796875,
      "step": 47379,
      "training_step_time": 0.503272533416748
    },
    {
      "epoch": 0.0002891845703125,
      "grad_norm": 0.10435733944177628,
      "learning_rate": 1.16152400808752e-05,
      "loss": 0.0363,
      "step": 47380
    },
    {
      "epoch": 0.0002891845703125,
      "model_forward_time": 0.11544156074523926,
      "step": 47380
    },
    {
      "epoch": 0.0002891845703125,
      "step": 47380,
      "training_step_time": 0.4557468891143799
    },
    {
      "epoch": 0.000289190673828125,
      "model_forward_time": 0.11466073989868164,
      "step": 47381
    },
    {
      "epoch": 0.000289190673828125,
      "step": 47381,
      "training_step_time": 0.395143985748291
    },
    {
      "epoch": 0.00028919677734375,
      "model_forward_time": 0.11500978469848633,
      "step": 47382
    },
    {
      "epoch": 0.00028919677734375,
      "step": 47382,
      "training_step_time": 0.44342899322509766
    },
    {
      "epoch": 0.000289202880859375,
      "model_forward_time": 0.11467957496643066,
      "step": 47383
    },
    {
      "epoch": 0.000289202880859375,
      "step": 47383,
      "training_step_time": 0.39319467544555664
    },
    {
      "epoch": 0.000289208984375,
      "model_forward_time": 0.11541914939880371,
      "step": 47384
    },
    {
      "epoch": 0.000289208984375,
      "step": 47384,
      "training_step_time": 0.39267468452453613
    },
    {
      "epoch": 0.000289215087890625,
      "model_forward_time": 0.11463379859924316,
      "step": 47385
    },
    {
      "epoch": 0.000289215087890625,
      "step": 47385,
      "training_step_time": 0.39078593254089355
    },
    {
      "epoch": 0.00028922119140625,
      "model_forward_time": 0.11475944519042969,
      "step": 47386
    },
    {
      "epoch": 0.00028922119140625,
      "step": 47386,
      "training_step_time": 0.41580772399902344
    },
    {
      "epoch": 0.000289227294921875,
      "model_forward_time": 0.11501049995422363,
      "step": 47387
    },
    {
      "epoch": 0.000289227294921875,
      "step": 47387,
      "training_step_time": 0.3869132995605469
    },
    {
      "epoch": 0.0002892333984375,
      "model_forward_time": 0.1152188777923584,
      "step": 47388
    },
    {
      "epoch": 0.0002892333984375,
      "step": 47388,
      "training_step_time": 0.3974754810333252
    },
    {
      "epoch": 0.000289239501953125,
      "model_forward_time": 0.11525392532348633,
      "step": 47389
    },
    {
      "epoch": 0.000289239501953125,
      "step": 47389,
      "training_step_time": 0.3918430805206299
    },
    {
      "epoch": 0.00028924560546875,
      "grad_norm": 0.1067969799041748,
      "learning_rate": 1.1597586436196473e-05,
      "loss": 0.0348,
      "step": 47390
    },
    {
      "epoch": 0.00028924560546875,
      "model_forward_time": 0.1156005859375,
      "step": 47390
    },
    {
      "epoch": 0.00028924560546875,
      "step": 47390,
      "training_step_time": 0.49721312522888184
    },
    {
      "epoch": 0.000289251708984375,
      "model_forward_time": 0.11482477188110352,
      "step": 47391
    },
    {
      "epoch": 0.000289251708984375,
      "step": 47391,
      "training_step_time": 0.4125797748565674
    },
    {
      "epoch": 0.0002892578125,
      "model_forward_time": 0.11499404907226562,
      "step": 47392
    },
    {
      "epoch": 0.0002892578125,
      "step": 47392,
      "training_step_time": 0.43015384674072266
    },
    {
      "epoch": 0.000289263916015625,
      "model_forward_time": 0.11585760116577148,
      "step": 47393
    },
    {
      "epoch": 0.000289263916015625,
      "step": 47393,
      "training_step_time": 0.5205280780792236
    },
    {
      "epoch": 0.00028927001953125,
      "model_forward_time": 0.11502718925476074,
      "step": 47394
    },
    {
      "epoch": 0.00028927001953125,
      "step": 47394,
      "training_step_time": 0.505040168762207
    },
    {
      "epoch": 0.000289276123046875,
      "model_forward_time": 0.11449599266052246,
      "step": 47395
    },
    {
      "epoch": 0.000289276123046875,
      "step": 47395,
      "training_step_time": 0.404740571975708
    },
    {
      "epoch": 0.0002892822265625,
      "model_forward_time": 0.11485886573791504,
      "step": 47396
    },
    {
      "epoch": 0.0002892822265625,
      "step": 47396,
      "training_step_time": 0.4764888286590576
    },
    {
      "epoch": 0.000289288330078125,
      "model_forward_time": 0.11396551132202148,
      "step": 47397
    },
    {
      "epoch": 0.000289288330078125,
      "step": 47397,
      "training_step_time": 0.39537811279296875
    },
    {
      "epoch": 0.00028929443359375,
      "model_forward_time": 0.11439371109008789,
      "step": 47398
    },
    {
      "epoch": 0.00028929443359375,
      "step": 47398,
      "training_step_time": 0.3928248882293701
    },
    {
      "epoch": 0.000289300537109375,
      "model_forward_time": 0.11498332023620605,
      "step": 47399
    },
    {
      "epoch": 0.000289300537109375,
      "step": 47399,
      "training_step_time": 1.1374919414520264
    },
    {
      "epoch": 0.000289306640625,
      "grad_norm": 0.13180674612522125,
      "learning_rate": 1.157994445715706e-05,
      "loss": 0.0362,
      "step": 47400
    },
    {
      "epoch": 0.000289306640625,
      "model_forward_time": 0.11341190338134766,
      "step": 47400
    },
    {
      "epoch": 0.000289306640625,
      "step": 47400,
      "training_step_time": 0.39124226570129395
    },
    {
      "epoch": 0.000289312744140625,
      "model_forward_time": 0.11435651779174805,
      "step": 47401
    },
    {
      "epoch": 0.000289312744140625,
      "step": 47401,
      "training_step_time": 0.3968634605407715
    },
    {
      "epoch": 0.00028931884765625,
      "model_forward_time": 0.11420774459838867,
      "step": 47402
    },
    {
      "epoch": 0.00028931884765625,
      "step": 47402,
      "training_step_time": 0.3775012493133545
    },
    {
      "epoch": 0.000289324951171875,
      "model_forward_time": 0.11399960517883301,
      "step": 47403
    },
    {
      "epoch": 0.000289324951171875,
      "step": 47403,
      "training_step_time": 0.4507887363433838
    },
    {
      "epoch": 0.0002893310546875,
      "model_forward_time": 0.11401963233947754,
      "step": 47404
    },
    {
      "epoch": 0.0002893310546875,
      "step": 47404,
      "training_step_time": 0.4076378345489502
    },
    {
      "epoch": 0.000289337158203125,
      "model_forward_time": 0.11525130271911621,
      "step": 47405
    },
    {
      "epoch": 0.000289337158203125,
      "step": 47405,
      "training_step_time": 0.688554048538208
    },
    {
      "epoch": 0.00028934326171875,
      "model_forward_time": 0.11424398422241211,
      "step": 47406
    },
    {
      "epoch": 0.00028934326171875,
      "step": 47406,
      "training_step_time": 0.46107959747314453
    },
    {
      "epoch": 0.000289349365234375,
      "model_forward_time": 0.11452412605285645,
      "step": 47407
    },
    {
      "epoch": 0.000289349365234375,
      "step": 47407,
      "training_step_time": 0.48026275634765625
    },
    {
      "epoch": 0.00028935546875,
      "model_forward_time": 0.11381888389587402,
      "step": 47408
    },
    {
      "epoch": 0.00028935546875,
      "step": 47408,
      "training_step_time": 0.40374255180358887
    },
    {
      "epoch": 0.000289361572265625,
      "model_forward_time": 0.1143028736114502,
      "step": 47409
    },
    {
      "epoch": 0.000289361572265625,
      "step": 47409,
      "training_step_time": 0.3819577693939209
    },
    {
      "epoch": 0.00028936767578125,
      "grad_norm": 0.09254392236471176,
      "learning_rate": 1.1562314149116065e-05,
      "loss": 0.0353,
      "step": 47410
    },
    {
      "epoch": 0.00028936767578125,
      "model_forward_time": 0.11388325691223145,
      "step": 47410
    },
    {
      "epoch": 0.00028936767578125,
      "step": 47410,
      "training_step_time": 0.38599133491516113
    },
    {
      "epoch": 0.000289373779296875,
      "model_forward_time": 0.11445951461791992,
      "step": 47411
    },
    {
      "epoch": 0.000289373779296875,
      "step": 47411,
      "training_step_time": 0.7542531490325928
    },
    {
      "epoch": 0.0002893798828125,
      "model_forward_time": 0.11478853225708008,
      "step": 47412
    },
    {
      "epoch": 0.0002893798828125,
      "step": 47412,
      "training_step_time": 0.39842820167541504
    },
    {
      "epoch": 0.000289385986328125,
      "model_forward_time": 0.1152036190032959,
      "step": 47413
    },
    {
      "epoch": 0.000289385986328125,
      "step": 47413,
      "training_step_time": 0.39187097549438477
    },
    {
      "epoch": 0.00028939208984375,
      "model_forward_time": 0.11429381370544434,
      "step": 47414
    },
    {
      "epoch": 0.00028939208984375,
      "step": 47414,
      "training_step_time": 0.38573646545410156
    },
    {
      "epoch": 0.000289398193359375,
      "model_forward_time": 0.11404109001159668,
      "step": 47415
    },
    {
      "epoch": 0.000289398193359375,
      "step": 47415,
      "training_step_time": 0.3999972343444824
    },
    {
      "epoch": 0.000289404296875,
      "model_forward_time": 0.11485695838928223,
      "step": 47416
    },
    {
      "epoch": 0.000289404296875,
      "step": 47416,
      "training_step_time": 0.3875768184661865
    },
    {
      "epoch": 0.000289410400390625,
      "model_forward_time": 0.11463499069213867,
      "step": 47417
    },
    {
      "epoch": 0.000289410400390625,
      "step": 47417,
      "training_step_time": 0.8082880973815918
    },
    {
      "epoch": 0.00028941650390625,
      "model_forward_time": 0.11442208290100098,
      "step": 47418
    },
    {
      "epoch": 0.00028941650390625,
      "step": 47418,
      "training_step_time": 0.36453700065612793
    },
    {
      "epoch": 0.000289422607421875,
      "model_forward_time": 0.11362028121948242,
      "step": 47419
    },
    {
      "epoch": 0.000289422607421875,
      "step": 47419,
      "training_step_time": 0.44054365158081055
    },
    {
      "epoch": 0.0002894287109375,
      "grad_norm": 0.09960256516933441,
      "learning_rate": 1.1544695517429178e-05,
      "loss": 0.04,
      "step": 47420
    },
    {
      "epoch": 0.0002894287109375,
      "model_forward_time": 0.11508035659790039,
      "step": 47420
    },
    {
      "epoch": 0.0002894287109375,
      "step": 47420,
      "training_step_time": 0.45036768913269043
    },
    {
      "epoch": 0.000289434814453125,
      "model_forward_time": 0.1140596866607666,
      "step": 47421
    },
    {
      "epoch": 0.000289434814453125,
      "step": 47421,
      "training_step_time": 0.49961233139038086
    },
    {
      "epoch": 0.00028944091796875,
      "model_forward_time": 0.11385250091552734,
      "step": 47422
    },
    {
      "epoch": 0.00028944091796875,
      "step": 47422,
      "training_step_time": 0.3862569332122803
    },
    {
      "epoch": 0.000289447021484375,
      "model_forward_time": 0.11425113677978516,
      "step": 47423
    },
    {
      "epoch": 0.000289447021484375,
      "step": 47423,
      "training_step_time": 0.7694056034088135
    },
    {
      "epoch": 0.000289453125,
      "model_forward_time": 0.11423182487487793,
      "step": 47424
    },
    {
      "epoch": 0.000289453125,
      "step": 47424,
      "training_step_time": 0.40035080909729004
    },
    {
      "epoch": 0.000289459228515625,
      "model_forward_time": 0.11426973342895508,
      "step": 47425
    },
    {
      "epoch": 0.000289459228515625,
      "step": 47425,
      "training_step_time": 0.3940389156341553
    },
    {
      "epoch": 0.00028946533203125,
      "model_forward_time": 0.11459875106811523,
      "step": 47426
    },
    {
      "epoch": 0.00028946533203125,
      "step": 47426,
      "training_step_time": 0.38058972358703613
    },
    {
      "epoch": 0.000289471435546875,
      "model_forward_time": 0.11432313919067383,
      "step": 47427
    },
    {
      "epoch": 0.000289471435546875,
      "step": 47427,
      "training_step_time": 0.38295960426330566
    },
    {
      "epoch": 0.0002894775390625,
      "model_forward_time": 0.11448216438293457,
      "step": 47428
    },
    {
      "epoch": 0.0002894775390625,
      "step": 47428,
      "training_step_time": 0.3839452266693115
    },
    {
      "epoch": 0.000289483642578125,
      "model_forward_time": 0.11508440971374512,
      "step": 47429
    },
    {
      "epoch": 0.000289483642578125,
      "step": 47429,
      "training_step_time": 0.9405848979949951
    },
    {
      "epoch": 0.00028948974609375,
      "grad_norm": 0.13529635965824127,
      "learning_rate": 1.1527088567448407e-05,
      "loss": 0.0396,
      "step": 47430
    },
    {
      "epoch": 0.00028948974609375,
      "model_forward_time": 0.11407923698425293,
      "step": 47430
    },
    {
      "epoch": 0.00028948974609375,
      "step": 47430,
      "training_step_time": 0.4416005611419678
    },
    {
      "epoch": 0.000289495849609375,
      "model_forward_time": 0.11424541473388672,
      "step": 47431
    },
    {
      "epoch": 0.000289495849609375,
      "step": 47431,
      "training_step_time": 0.3888816833496094
    },
    {
      "epoch": 0.000289501953125,
      "model_forward_time": 0.11414432525634766,
      "step": 47432
    },
    {
      "epoch": 0.000289501953125,
      "step": 47432,
      "training_step_time": 0.4692881107330322
    },
    {
      "epoch": 0.000289508056640625,
      "model_forward_time": 0.11442923545837402,
      "step": 47433
    },
    {
      "epoch": 0.000289508056640625,
      "step": 47433,
      "training_step_time": 0.4760568141937256
    },
    {
      "epoch": 0.00028951416015625,
      "model_forward_time": 0.11405277252197266,
      "step": 47434
    },
    {
      "epoch": 0.00028951416015625,
      "step": 47434,
      "training_step_time": 0.3831446170806885
    },
    {
      "epoch": 0.000289520263671875,
      "model_forward_time": 0.11461734771728516,
      "step": 47435
    },
    {
      "epoch": 0.000289520263671875,
      "step": 47435,
      "training_step_time": 0.5278012752532959
    },
    {
      "epoch": 0.0002895263671875,
      "model_forward_time": 0.11541867256164551,
      "step": 47436
    },
    {
      "epoch": 0.0002895263671875,
      "step": 47436,
      "training_step_time": 0.4089772701263428
    },
    {
      "epoch": 0.000289532470703125,
      "model_forward_time": 0.11497116088867188,
      "step": 47437
    },
    {
      "epoch": 0.000289532470703125,
      "step": 47437,
      "training_step_time": 0.39076757431030273
    },
    {
      "epoch": 0.00028953857421875,
      "model_forward_time": 0.11504960060119629,
      "step": 47438
    },
    {
      "epoch": 0.00028953857421875,
      "step": 47438,
      "training_step_time": 0.38562917709350586
    },
    {
      "epoch": 0.000289544677734375,
      "model_forward_time": 0.11503934860229492,
      "step": 47439
    },
    {
      "epoch": 0.000289544677734375,
      "step": 47439,
      "training_step_time": 0.3851158618927002
    },
    {
      "epoch": 0.00028955078125,
      "grad_norm": 0.09293877333402634,
      "learning_rate": 1.1509493304522329e-05,
      "loss": 0.0302,
      "step": 47440
    },
    {
      "epoch": 0.00028955078125,
      "model_forward_time": 0.11485815048217773,
      "step": 47440
    },
    {
      "epoch": 0.00028955078125,
      "step": 47440,
      "training_step_time": 0.3863961696624756
    },
    {
      "epoch": 0.000289556884765625,
      "model_forward_time": 0.11481738090515137,
      "step": 47441
    },
    {
      "epoch": 0.000289556884765625,
      "step": 47441,
      "training_step_time": 1.06270432472229
    },
    {
      "epoch": 0.00028956298828125,
      "model_forward_time": 0.11395764350891113,
      "step": 47442
    },
    {
      "epoch": 0.00028956298828125,
      "step": 47442,
      "training_step_time": 0.459439754486084
    },
    {
      "epoch": 0.000289569091796875,
      "model_forward_time": 0.11425447463989258,
      "step": 47443
    },
    {
      "epoch": 0.000289569091796875,
      "step": 47443,
      "training_step_time": 0.44788312911987305
    },
    {
      "epoch": 0.0002895751953125,
      "model_forward_time": 0.11412310600280762,
      "step": 47444
    },
    {
      "epoch": 0.0002895751953125,
      "step": 47444,
      "training_step_time": 0.40849876403808594
    },
    {
      "epoch": 0.000289581298828125,
      "model_forward_time": 0.11404132843017578,
      "step": 47445
    },
    {
      "epoch": 0.000289581298828125,
      "step": 47445,
      "training_step_time": 0.4418520927429199
    },
    {
      "epoch": 0.00028958740234375,
      "model_forward_time": 0.11451387405395508,
      "step": 47446
    },
    {
      "epoch": 0.00028958740234375,
      "step": 47446,
      "training_step_time": 0.46657729148864746
    },
    {
      "epoch": 0.000289593505859375,
      "model_forward_time": 0.11428594589233398,
      "step": 47447
    },
    {
      "epoch": 0.000289593505859375,
      "step": 47447,
      "training_step_time": 0.393801212310791
    },
    {
      "epoch": 0.000289599609375,
      "model_forward_time": 0.11448431015014648,
      "step": 47448
    },
    {
      "epoch": 0.000289599609375,
      "step": 47448,
      "training_step_time": 0.41212940216064453
    },
    {
      "epoch": 0.000289605712890625,
      "model_forward_time": 0.11454391479492188,
      "step": 47449
    },
    {
      "epoch": 0.000289605712890625,
      "step": 47449,
      "training_step_time": 0.392974853515625
    },
    {
      "epoch": 0.00028961181640625,
      "grad_norm": 0.08974934369325638,
      "learning_rate": 1.1491909733995898e-05,
      "loss": 0.0348,
      "step": 47450
    },
    {
      "epoch": 0.00028961181640625,
      "model_forward_time": 0.11447525024414062,
      "step": 47450
    },
    {
      "epoch": 0.00028961181640625,
      "step": 47450,
      "training_step_time": 0.3888833522796631
    },
    {
      "epoch": 0.000289617919921875,
      "model_forward_time": 0.11465716361999512,
      "step": 47451
    },
    {
      "epoch": 0.000289617919921875,
      "step": 47451,
      "training_step_time": 0.40024590492248535
    },
    {
      "epoch": 0.0002896240234375,
      "model_forward_time": 0.11486530303955078,
      "step": 47452
    },
    {
      "epoch": 0.0002896240234375,
      "step": 47452,
      "training_step_time": 0.385761022567749
    },
    {
      "epoch": 0.000289630126953125,
      "model_forward_time": 0.1157076358795166,
      "step": 47453
    },
    {
      "epoch": 0.000289630126953125,
      "step": 47453,
      "training_step_time": 0.6499998569488525
    },
    {
      "epoch": 0.00028963623046875,
      "model_forward_time": 0.11490607261657715,
      "step": 47454
    },
    {
      "epoch": 0.00028963623046875,
      "step": 47454,
      "training_step_time": 0.39831089973449707
    },
    {
      "epoch": 0.000289642333984375,
      "model_forward_time": 0.11604022979736328,
      "step": 47455
    },
    {
      "epoch": 0.000289642333984375,
      "step": 47455,
      "training_step_time": 0.3913557529449463
    },
    {
      "epoch": 0.0002896484375,
      "model_forward_time": 0.11524248123168945,
      "step": 47456
    },
    {
      "epoch": 0.0002896484375,
      "step": 47456,
      "training_step_time": 0.4047422409057617
    },
    {
      "epoch": 0.000289654541015625,
      "model_forward_time": 0.11458516120910645,
      "step": 47457
    },
    {
      "epoch": 0.000289654541015625,
      "step": 47457,
      "training_step_time": 0.44832611083984375
    },
    {
      "epoch": 0.00028966064453125,
      "model_forward_time": 0.11665868759155273,
      "step": 47458
    },
    {
      "epoch": 0.00028966064453125,
      "step": 47458,
      "training_step_time": 0.46837735176086426
    },
    {
      "epoch": 0.000289666748046875,
      "model_forward_time": 0.1141347885131836,
      "step": 47459
    },
    {
      "epoch": 0.000289666748046875,
      "step": 47459,
      "training_step_time": 0.6926379203796387
    },
    {
      "epoch": 0.0002896728515625,
      "grad_norm": 0.10588882118463516,
      "learning_rate": 1.1474337861210543e-05,
      "loss": 0.0367,
      "step": 47460
    },
    {
      "epoch": 0.0002896728515625,
      "model_forward_time": 0.11423134803771973,
      "step": 47460
    },
    {
      "epoch": 0.0002896728515625,
      "step": 47460,
      "training_step_time": 0.41219139099121094
    },
    {
      "epoch": 0.000289678955078125,
      "model_forward_time": 0.11550664901733398,
      "step": 47461
    },
    {
      "epoch": 0.000289678955078125,
      "step": 47461,
      "training_step_time": 0.4370880126953125
    },
    {
      "epoch": 0.00028968505859375,
      "model_forward_time": 0.11413359642028809,
      "step": 47462
    },
    {
      "epoch": 0.00028968505859375,
      "step": 47462,
      "training_step_time": 0.41044139862060547
    },
    {
      "epoch": 0.000289691162109375,
      "model_forward_time": 0.11469674110412598,
      "step": 47463
    },
    {
      "epoch": 0.000289691162109375,
      "step": 47463,
      "training_step_time": 0.3796241283416748
    },
    {
      "epoch": 0.000289697265625,
      "model_forward_time": 0.11449837684631348,
      "step": 47464
    },
    {
      "epoch": 0.000289697265625,
      "step": 47464,
      "training_step_time": 0.3858306407928467
    },
    {
      "epoch": 0.000289703369140625,
      "model_forward_time": 0.11519503593444824,
      "step": 47465
    },
    {
      "epoch": 0.000289703369140625,
      "step": 47465,
      "training_step_time": 0.6742525100708008
    },
    {
      "epoch": 0.00028970947265625,
      "model_forward_time": 0.11434674263000488,
      "step": 47466
    },
    {
      "epoch": 0.00028970947265625,
      "step": 47466,
      "training_step_time": 0.3995022773742676
    },
    {
      "epoch": 0.000289715576171875,
      "model_forward_time": 0.11463642120361328,
      "step": 47467
    },
    {
      "epoch": 0.000289715576171875,
      "step": 47467,
      "training_step_time": 0.38893556594848633
    },
    {
      "epoch": 0.0002897216796875,
      "model_forward_time": 0.11556458473205566,
      "step": 47468
    },
    {
      "epoch": 0.0002897216796875,
      "step": 47468,
      "training_step_time": 0.38724756240844727
    },
    {
      "epoch": 0.000289727783203125,
      "model_forward_time": 0.11500382423400879,
      "step": 47469
    },
    {
      "epoch": 0.000289727783203125,
      "step": 47469,
      "training_step_time": 0.4742460250854492
    },
    {
      "epoch": 0.00028973388671875,
      "grad_norm": 0.1031007319688797,
      "learning_rate": 1.145677769150414e-05,
      "loss": 0.0373,
      "step": 47470
    },
    {
      "epoch": 0.00028973388671875,
      "model_forward_time": 0.11477446556091309,
      "step": 47470
    },
    {
      "epoch": 0.00028973388671875,
      "step": 47470,
      "training_step_time": 0.42853617668151855
    },
    {
      "epoch": 0.000289739990234375,
      "model_forward_time": 0.11471390724182129,
      "step": 47471
    },
    {
      "epoch": 0.000289739990234375,
      "step": 47471,
      "training_step_time": 0.48592281341552734
    },
    {
      "epoch": 0.00028974609375,
      "model_forward_time": 0.1146247386932373,
      "step": 47472
    },
    {
      "epoch": 0.00028974609375,
      "step": 47472,
      "training_step_time": 0.41483354568481445
    },
    {
      "epoch": 0.000289752197265625,
      "model_forward_time": 0.11421823501586914,
      "step": 47473
    },
    {
      "epoch": 0.000289752197265625,
      "step": 47473,
      "training_step_time": 0.48123717308044434
    },
    {
      "epoch": 0.00028975830078125,
      "model_forward_time": 0.11516475677490234,
      "step": 47474
    },
    {
      "epoch": 0.00028975830078125,
      "step": 47474,
      "training_step_time": 0.44195055961608887
    },
    {
      "epoch": 0.000289764404296875,
      "model_forward_time": 0.11423635482788086,
      "step": 47475
    },
    {
      "epoch": 0.000289764404296875,
      "step": 47475,
      "training_step_time": 0.4018096923828125
    },
    {
      "epoch": 0.0002897705078125,
      "model_forward_time": 0.1150822639465332,
      "step": 47476
    },
    {
      "epoch": 0.0002897705078125,
      "step": 47476,
      "training_step_time": 0.3850545883178711
    },
    {
      "epoch": 0.000289776611328125,
      "model_forward_time": 0.1160123348236084,
      "step": 47477
    },
    {
      "epoch": 0.000289776611328125,
      "step": 47477,
      "training_step_time": 0.4036829471588135
    },
    {
      "epoch": 0.00028978271484375,
      "model_forward_time": 0.11461925506591797,
      "step": 47478
    },
    {
      "epoch": 0.00028978271484375,
      "step": 47478,
      "training_step_time": 0.3926863670349121
    },
    {
      "epoch": 0.000289788818359375,
      "model_forward_time": 0.11476922035217285,
      "step": 47479
    },
    {
      "epoch": 0.000289788818359375,
      "step": 47479,
      "training_step_time": 0.3940098285675049
    },
    {
      "epoch": 0.000289794921875,
      "grad_norm": 0.0874738022685051,
      "learning_rate": 1.143922923021099e-05,
      "loss": 0.0353,
      "step": 47480
    },
    {
      "epoch": 0.000289794921875,
      "model_forward_time": 0.11400032043457031,
      "step": 47480
    },
    {
      "epoch": 0.000289794921875,
      "step": 47480,
      "training_step_time": 0.39582300186157227
    },
    {
      "epoch": 0.000289801025390625,
      "model_forward_time": 0.1156768798828125,
      "step": 47481
    },
    {
      "epoch": 0.000289801025390625,
      "step": 47481,
      "training_step_time": 0.38571619987487793
    },
    {
      "epoch": 0.00028980712890625,
      "model_forward_time": 0.1153712272644043,
      "step": 47482
    },
    {
      "epoch": 0.00028980712890625,
      "step": 47482,
      "training_step_time": 0.39176130294799805
    },
    {
      "epoch": 0.000289813232421875,
      "model_forward_time": 0.11553621292114258,
      "step": 47483
    },
    {
      "epoch": 0.000289813232421875,
      "step": 47483,
      "training_step_time": 0.7064883708953857
    },
    {
      "epoch": 0.0002898193359375,
      "model_forward_time": 0.11468052864074707,
      "step": 47484
    },
    {
      "epoch": 0.0002898193359375,
      "step": 47484,
      "training_step_time": 0.41986966133117676
    },
    {
      "epoch": 0.000289825439453125,
      "model_forward_time": 0.11461544036865234,
      "step": 47485
    },
    {
      "epoch": 0.000289825439453125,
      "step": 47485,
      "training_step_time": 0.4847280979156494
    },
    {
      "epoch": 0.00028983154296875,
      "model_forward_time": 0.11429905891418457,
      "step": 47486
    },
    {
      "epoch": 0.00028983154296875,
      "step": 47486,
      "training_step_time": 0.4742453098297119
    },
    {
      "epoch": 0.000289837646484375,
      "model_forward_time": 0.11496758460998535,
      "step": 47487
    },
    {
      "epoch": 0.000289837646484375,
      "step": 47487,
      "training_step_time": 0.4180593490600586
    },
    {
      "epoch": 0.00028984375,
      "model_forward_time": 0.11425018310546875,
      "step": 47488
    },
    {
      "epoch": 0.00028984375,
      "step": 47488,
      "training_step_time": 0.4308149814605713
    },
    {
      "epoch": 0.000289849853515625,
      "model_forward_time": 0.1153559684753418,
      "step": 47489
    },
    {
      "epoch": 0.000289849853515625,
      "step": 47489,
      "training_step_time": 0.6103720664978027
    },
    {
      "epoch": 0.00028985595703125,
      "grad_norm": 0.09086926281452179,
      "learning_rate": 1.1421692482661856e-05,
      "loss": 0.039,
      "step": 47490
    },
    {
      "epoch": 0.00028985595703125,
      "model_forward_time": 0.11513185501098633,
      "step": 47490
    },
    {
      "epoch": 0.00028985595703125,
      "step": 47490,
      "training_step_time": 0.3933086395263672
    },
    {
      "epoch": 0.000289862060546875,
      "model_forward_time": 0.11475515365600586,
      "step": 47491
    },
    {
      "epoch": 0.000289862060546875,
      "step": 47491,
      "training_step_time": 0.3913388252258301
    },
    {
      "epoch": 0.0002898681640625,
      "model_forward_time": 0.11442971229553223,
      "step": 47492
    },
    {
      "epoch": 0.0002898681640625,
      "step": 47492,
      "training_step_time": 0.3962423801422119
    },
    {
      "epoch": 0.000289874267578125,
      "model_forward_time": 0.11443901062011719,
      "step": 47493
    },
    {
      "epoch": 0.000289874267578125,
      "step": 47493,
      "training_step_time": 0.38970136642456055
    },
    {
      "epoch": 0.00028988037109375,
      "model_forward_time": 0.11525297164916992,
      "step": 47494
    },
    {
      "epoch": 0.00028988037109375,
      "step": 47494,
      "training_step_time": 0.3864138126373291
    },
    {
      "epoch": 0.000289886474609375,
      "model_forward_time": 0.11466765403747559,
      "step": 47495
    },
    {
      "epoch": 0.000289886474609375,
      "step": 47495,
      "training_step_time": 1.038776159286499
    },
    {
      "epoch": 0.000289892578125,
      "model_forward_time": 0.11402559280395508,
      "step": 47496
    },
    {
      "epoch": 0.000289892578125,
      "step": 47496,
      "training_step_time": 0.40531420707702637
    },
    {
      "epoch": 0.000289898681640625,
      "model_forward_time": 0.11394214630126953,
      "step": 47497
    },
    {
      "epoch": 0.000289898681640625,
      "step": 47497,
      "training_step_time": 0.38837456703186035
    },
    {
      "epoch": 0.00028990478515625,
      "model_forward_time": 0.11469721794128418,
      "step": 47498
    },
    {
      "epoch": 0.00028990478515625,
      "step": 47498,
      "training_step_time": 0.4583911895751953
    },
    {
      "epoch": 0.000289910888671875,
      "model_forward_time": 0.11373400688171387,
      "step": 47499
    },
    {
      "epoch": 0.000289910888671875,
      "step": 47499,
      "training_step_time": 0.4589536190032959
    },
    {
      "epoch": 0.0002899169921875,
      "grad_norm": 0.0993843823671341,
      "learning_rate": 1.1404167454183957e-05,
      "loss": 0.0339,
      "step": 47500
    },
    {
      "epoch": 0.0002899169921875,
      "model_forward_time": 0.1144716739654541,
      "step": 47500
    },
    {
      "epoch": 0.0002899169921875,
      "step": 47500,
      "training_step_time": 0.4334681034088135
    },
    {
      "epoch": 0.000289923095703125,
      "model_forward_time": 0.1161041259765625,
      "step": 47501
    },
    {
      "epoch": 0.000289923095703125,
      "step": 47501,
      "training_step_time": 1.048424243927002
    },
    {
      "epoch": 0.00028992919921875,
      "model_forward_time": 0.11405491828918457,
      "step": 47502
    },
    {
      "epoch": 0.00028992919921875,
      "step": 47502,
      "training_step_time": 0.3843376636505127
    },
    {
      "epoch": 0.000289935302734375,
      "model_forward_time": 0.11376166343688965,
      "step": 47503
    },
    {
      "epoch": 0.000289935302734375,
      "step": 47503,
      "training_step_time": 0.391986608505249
    },
    {
      "epoch": 0.00028994140625,
      "model_forward_time": 0.11380243301391602,
      "step": 47504
    },
    {
      "epoch": 0.00028994140625,
      "step": 47504,
      "training_step_time": 0.3942263126373291
    },
    {
      "epoch": 0.000289947509765625,
      "model_forward_time": 0.11421608924865723,
      "step": 47505
    },
    {
      "epoch": 0.000289947509765625,
      "step": 47505,
      "training_step_time": 0.3836946487426758
    },
    {
      "epoch": 0.00028995361328125,
      "model_forward_time": 0.11399388313293457,
      "step": 47506
    },
    {
      "epoch": 0.00028995361328125,
      "step": 47506,
      "training_step_time": 0.38766908645629883
    },
    {
      "epoch": 0.000289959716796875,
      "model_forward_time": 0.11474347114562988,
      "step": 47507
    },
    {
      "epoch": 0.000289959716796875,
      "step": 47507,
      "training_step_time": 0.7034821510314941
    },
    {
      "epoch": 0.0002899658203125,
      "model_forward_time": 0.11465144157409668,
      "step": 47508
    },
    {
      "epoch": 0.0002899658203125,
      "step": 47508,
      "training_step_time": 0.3942546844482422
    },
    {
      "epoch": 0.000289971923828125,
      "model_forward_time": 0.11505436897277832,
      "step": 47509
    },
    {
      "epoch": 0.000289971923828125,
      "step": 47509,
      "training_step_time": 0.4468955993652344
    },
    {
      "epoch": 0.00028997802734375,
      "grad_norm": 0.08221503347158432,
      "learning_rate": 1.1386654150100918e-05,
      "loss": 0.0346,
      "step": 47510
    },
    {
      "epoch": 0.00028997802734375,
      "model_forward_time": 0.11466860771179199,
      "step": 47510
    },
    {
      "epoch": 0.00028997802734375,
      "step": 47510,
      "training_step_time": 0.4070878028869629
    },
    {
      "epoch": 0.000289984130859375,
      "model_forward_time": 0.11530876159667969,
      "step": 47511
    },
    {
      "epoch": 0.000289984130859375,
      "step": 47511,
      "training_step_time": 0.4545772075653076
    },
    {
      "epoch": 0.000289990234375,
      "model_forward_time": 0.11439943313598633,
      "step": 47512
    },
    {
      "epoch": 0.000289990234375,
      "step": 47512,
      "training_step_time": 0.43603062629699707
    },
    {
      "epoch": 0.000289996337890625,
      "model_forward_time": 0.11490750312805176,
      "step": 47513
    },
    {
      "epoch": 0.000289996337890625,
      "step": 47513,
      "training_step_time": 0.8492004871368408
    },
    {
      "epoch": 0.00029000244140625,
      "model_forward_time": 0.11403775215148926,
      "step": 47514
    },
    {
      "epoch": 0.00029000244140625,
      "step": 47514,
      "training_step_time": 0.3897688388824463
    },
    {
      "epoch": 0.000290008544921875,
      "model_forward_time": 0.11554455757141113,
      "step": 47515
    },
    {
      "epoch": 0.000290008544921875,
      "step": 47515,
      "training_step_time": 0.3858301639556885
    },
    {
      "epoch": 0.0002900146484375,
      "model_forward_time": 0.11428952217102051,
      "step": 47516
    },
    {
      "epoch": 0.0002900146484375,
      "step": 47516,
      "training_step_time": 0.3799121379852295
    },
    {
      "epoch": 0.000290020751953125,
      "model_forward_time": 0.11423158645629883,
      "step": 47517
    },
    {
      "epoch": 0.000290020751953125,
      "step": 47517,
      "training_step_time": 0.37844276428222656
    },
    {
      "epoch": 0.00029002685546875,
      "model_forward_time": 0.11545872688293457,
      "step": 47518
    },
    {
      "epoch": 0.00029002685546875,
      "step": 47518,
      "training_step_time": 0.3789098262786865
    },
    {
      "epoch": 0.000290032958984375,
      "model_forward_time": 0.11425447463989258,
      "step": 47519
    },
    {
      "epoch": 0.000290032958984375,
      "step": 47519,
      "training_step_time": 0.9322249889373779
    },
    {
      "epoch": 0.0002900390625,
      "grad_norm": 0.07898213714361191,
      "learning_rate": 1.1369152575732822e-05,
      "loss": 0.0409,
      "step": 47520
    },
    {
      "epoch": 0.0002900390625,
      "model_forward_time": 0.11426329612731934,
      "step": 47520
    },
    {
      "epoch": 0.0002900390625,
      "step": 47520,
      "training_step_time": 0.38832545280456543
    },
    {
      "epoch": 0.000290045166015625,
      "model_forward_time": 0.11534857749938965,
      "step": 47521
    },
    {
      "epoch": 0.000290045166015625,
      "step": 47521,
      "training_step_time": 0.3871469497680664
    },
    {
      "epoch": 0.00029005126953125,
      "model_forward_time": 0.11447596549987793,
      "step": 47522
    },
    {
      "epoch": 0.00029005126953125,
      "step": 47522,
      "training_step_time": 0.4597291946411133
    },
    {
      "epoch": 0.000290057373046875,
      "model_forward_time": 0.11401820182800293,
      "step": 47523
    },
    {
      "epoch": 0.000290057373046875,
      "step": 47523,
      "training_step_time": 0.40952181816101074
    },
    {
      "epoch": 0.0002900634765625,
      "model_forward_time": 0.11474967002868652,
      "step": 47524
    },
    {
      "epoch": 0.0002900634765625,
      "step": 47524,
      "training_step_time": 0.4958479404449463
    },
    {
      "epoch": 0.000290069580078125,
      "model_forward_time": 0.11493468284606934,
      "step": 47525
    },
    {
      "epoch": 0.000290069580078125,
      "step": 47525,
      "training_step_time": 0.7628319263458252
    },
    {
      "epoch": 0.00029007568359375,
      "model_forward_time": 0.11422491073608398,
      "step": 47526
    },
    {
      "epoch": 0.00029007568359375,
      "step": 47526,
      "training_step_time": 0.3767080307006836
    },
    {
      "epoch": 0.000290081787109375,
      "model_forward_time": 0.11467885971069336,
      "step": 47527
    },
    {
      "epoch": 0.000290081787109375,
      "step": 47527,
      "training_step_time": 0.3840925693511963
    },
    {
      "epoch": 0.000290087890625,
      "model_forward_time": 0.11425042152404785,
      "step": 47528
    },
    {
      "epoch": 0.000290087890625,
      "step": 47528,
      "training_step_time": 0.3860170841217041
    },
    {
      "epoch": 0.000290093994140625,
      "model_forward_time": 0.1140592098236084,
      "step": 47529
    },
    {
      "epoch": 0.000290093994140625,
      "step": 47529,
      "training_step_time": 0.38541364669799805
    },
    {
      "epoch": 0.00029010009765625,
      "grad_norm": 0.08460050821304321,
      "learning_rate": 1.135166273639619e-05,
      "loss": 0.0318,
      "step": 47530
    },
    {
      "epoch": 0.00029010009765625,
      "model_forward_time": 0.11404299736022949,
      "step": 47530
    },
    {
      "epoch": 0.00029010009765625,
      "step": 47530,
      "training_step_time": 0.38366198539733887
    },
    {
      "epoch": 0.000290106201171875,
      "model_forward_time": 0.11506414413452148,
      "step": 47531
    },
    {
      "epoch": 0.000290106201171875,
      "step": 47531,
      "training_step_time": 1.2252609729766846
    },
    {
      "epoch": 0.0002901123046875,
      "model_forward_time": 0.11379575729370117,
      "step": 47532
    },
    {
      "epoch": 0.0002901123046875,
      "step": 47532,
      "training_step_time": 0.40456676483154297
    },
    {
      "epoch": 0.000290118408203125,
      "model_forward_time": 0.11510181427001953,
      "step": 47533
    },
    {
      "epoch": 0.000290118408203125,
      "step": 47533,
      "training_step_time": 0.3816792964935303
    },
    {
      "epoch": 0.00029012451171875,
      "model_forward_time": 0.11434054374694824,
      "step": 47534
    },
    {
      "epoch": 0.00029012451171875,
      "step": 47534,
      "training_step_time": 0.39394330978393555
    },
    {
      "epoch": 0.000290130615234375,
      "model_forward_time": 0.11384868621826172,
      "step": 47535
    },
    {
      "epoch": 0.000290130615234375,
      "step": 47535,
      "training_step_time": 0.43279170989990234
    },
    {
      "epoch": 0.00029013671875,
      "model_forward_time": 0.11413359642028809,
      "step": 47536
    },
    {
      "epoch": 0.00029013671875,
      "step": 47536,
      "training_step_time": 0.4406273365020752
    },
    {
      "epoch": 0.000290142822265625,
      "model_forward_time": 0.11482930183410645,
      "step": 47537
    },
    {
      "epoch": 0.000290142822265625,
      "step": 47537,
      "training_step_time": 0.8515543937683105
    },
    {
      "epoch": 0.00029014892578125,
      "model_forward_time": 0.11458635330200195,
      "step": 47538
    },
    {
      "epoch": 0.00029014892578125,
      "step": 47538,
      "training_step_time": 0.3852977752685547
    },
    {
      "epoch": 0.000290155029296875,
      "model_forward_time": 0.1143496036529541,
      "step": 47539
    },
    {
      "epoch": 0.000290155029296875,
      "step": 47539,
      "training_step_time": 0.38249778747558594
    },
    {
      "epoch": 0.0002901611328125,
      "grad_norm": 0.08166239410638809,
      "learning_rate": 1.133418463740395e-05,
      "loss": 0.0374,
      "step": 47540
    },
    {
      "epoch": 0.0002901611328125,
      "model_forward_time": 0.1146845817565918,
      "step": 47540
    },
    {
      "epoch": 0.0002901611328125,
      "step": 47540,
      "training_step_time": 0.378476619720459
    },
    {
      "epoch": 0.000290167236328125,
      "model_forward_time": 0.11396265029907227,
      "step": 47541
    },
    {
      "epoch": 0.000290167236328125,
      "step": 47541,
      "training_step_time": 0.38697242736816406
    },
    {
      "epoch": 0.00029017333984375,
      "model_forward_time": 0.11446976661682129,
      "step": 47542
    },
    {
      "epoch": 0.00029017333984375,
      "step": 47542,
      "training_step_time": 0.38004279136657715
    },
    {
      "epoch": 0.000290179443359375,
      "model_forward_time": 0.1153876781463623,
      "step": 47543
    },
    {
      "epoch": 0.000290179443359375,
      "step": 47543,
      "training_step_time": 0.5997793674468994
    },
    {
      "epoch": 0.000290185546875,
      "model_forward_time": 0.11494731903076172,
      "step": 47544
    },
    {
      "epoch": 0.000290185546875,
      "step": 47544,
      "training_step_time": 0.3945777416229248
    },
    {
      "epoch": 0.000290191650390625,
      "model_forward_time": 0.11555647850036621,
      "step": 47545
    },
    {
      "epoch": 0.000290191650390625,
      "step": 47545,
      "training_step_time": 0.392380952835083
    },
    {
      "epoch": 0.00029019775390625,
      "model_forward_time": 0.11500239372253418,
      "step": 47546
    },
    {
      "epoch": 0.00029019775390625,
      "step": 47546,
      "training_step_time": 0.38496947288513184
    },
    {
      "epoch": 0.000290203857421875,
      "model_forward_time": 0.11532258987426758,
      "step": 47547
    },
    {
      "epoch": 0.000290203857421875,
      "step": 47547,
      "training_step_time": 0.39116382598876953
    },
    {
      "epoch": 0.0002902099609375,
      "model_forward_time": 0.1147928237915039,
      "step": 47548
    },
    {
      "epoch": 0.0002902099609375,
      "step": 47548,
      "training_step_time": 0.4770374298095703
    },
    {
      "epoch": 0.000290216064453125,
      "model_forward_time": 0.11464142799377441,
      "step": 47549
    },
    {
      "epoch": 0.000290216064453125,
      "step": 47549,
      "training_step_time": 0.5046629905700684
    },
    {
      "epoch": 0.00029022216796875,
      "grad_norm": 0.08849111944437027,
      "learning_rate": 1.1316718284065537e-05,
      "loss": 0.0393,
      "step": 47550
    },
    {
      "epoch": 0.00029022216796875,
      "model_forward_time": 0.11612987518310547,
      "step": 47550
    },
    {
      "epoch": 0.00029022216796875,
      "step": 47550,
      "training_step_time": 0.49799489974975586
    },
    {
      "epoch": 0.000290228271484375,
      "model_forward_time": 0.1146395206451416,
      "step": 47551
    },
    {
      "epoch": 0.000290228271484375,
      "step": 47551,
      "training_step_time": 0.3906855583190918
    },
    {
      "epoch": 0.000290234375,
      "model_forward_time": 0.11475253105163574,
      "step": 47552
    },
    {
      "epoch": 0.000290234375,
      "step": 47552,
      "training_step_time": 0.38510656356811523
    },
    {
      "epoch": 0.000290240478515625,
      "model_forward_time": 0.11479496955871582,
      "step": 47553
    },
    {
      "epoch": 0.000290240478515625,
      "step": 47553,
      "training_step_time": 0.3869163990020752
    },
    {
      "epoch": 0.00029024658203125,
      "model_forward_time": 0.11480069160461426,
      "step": 47554
    },
    {
      "epoch": 0.00029024658203125,
      "step": 47554,
      "training_step_time": 0.398435115814209
    },
    {
      "epoch": 0.000290252685546875,
      "model_forward_time": 0.11506509780883789,
      "step": 47555
    },
    {
      "epoch": 0.000290252685546875,
      "step": 47555,
      "training_step_time": 0.5693008899688721
    },
    {
      "epoch": 0.0002902587890625,
      "model_forward_time": 0.11544370651245117,
      "step": 47556
    },
    {
      "epoch": 0.0002902587890625,
      "step": 47556,
      "training_step_time": 0.40291404724121094
    },
    {
      "epoch": 0.000290264892578125,
      "model_forward_time": 0.11548519134521484,
      "step": 47557
    },
    {
      "epoch": 0.000290264892578125,
      "step": 47557,
      "training_step_time": 0.3953256607055664
    },
    {
      "epoch": 0.00029027099609375,
      "model_forward_time": 0.1152031421661377,
      "step": 47558
    },
    {
      "epoch": 0.00029027099609375,
      "step": 47558,
      "training_step_time": 0.3799874782562256
    },
    {
      "epoch": 0.000290277099609375,
      "model_forward_time": 0.11536836624145508,
      "step": 47559
    },
    {
      "epoch": 0.000290277099609375,
      "step": 47559,
      "training_step_time": 0.392195463180542
    },
    {
      "epoch": 0.000290283203125,
      "grad_norm": 0.09033012390136719,
      "learning_rate": 1.1299263681686706e-05,
      "loss": 0.0337,
      "step": 47560
    },
    {
      "epoch": 0.000290283203125,
      "model_forward_time": 0.11470580101013184,
      "step": 47560
    },
    {
      "epoch": 0.000290283203125,
      "step": 47560,
      "training_step_time": 0.3853762149810791
    },
    {
      "epoch": 0.000290289306640625,
      "model_forward_time": 0.11515998840332031,
      "step": 47561
    },
    {
      "epoch": 0.000290289306640625,
      "step": 47561,
      "training_step_time": 0.5303575992584229
    },
    {
      "epoch": 0.00029029541015625,
      "model_forward_time": 0.11453604698181152,
      "step": 47562
    },
    {
      "epoch": 0.00029029541015625,
      "step": 47562,
      "training_step_time": 0.41144371032714844
    },
    {
      "epoch": 0.000290301513671875,
      "model_forward_time": 0.11468291282653809,
      "step": 47563
    },
    {
      "epoch": 0.000290301513671875,
      "step": 47563,
      "training_step_time": 0.4243321418762207
    },
    {
      "epoch": 0.0002903076171875,
      "model_forward_time": 0.11469864845275879,
      "step": 47564
    },
    {
      "epoch": 0.0002903076171875,
      "step": 47564,
      "training_step_time": 0.47864508628845215
    },
    {
      "epoch": 0.000290313720703125,
      "model_forward_time": 0.11516189575195312,
      "step": 47565
    },
    {
      "epoch": 0.000290313720703125,
      "step": 47565,
      "training_step_time": 0.4454071521759033
    },
    {
      "epoch": 0.00029031982421875,
      "model_forward_time": 0.1143941879272461,
      "step": 47566
    },
    {
      "epoch": 0.00029031982421875,
      "step": 47566,
      "training_step_time": 0.39623475074768066
    },
    {
      "epoch": 0.000290325927734375,
      "model_forward_time": 0.11502695083618164,
      "step": 47567
    },
    {
      "epoch": 0.000290325927734375,
      "step": 47567,
      "training_step_time": 0.3817121982574463
    },
    {
      "epoch": 0.00029033203125,
      "model_forward_time": 0.11488986015319824,
      "step": 47568
    },
    {
      "epoch": 0.00029033203125,
      "step": 47568,
      "training_step_time": 0.39338254928588867
    },
    {
      "epoch": 0.000290338134765625,
      "model_forward_time": 0.11514568328857422,
      "step": 47569
    },
    {
      "epoch": 0.000290338134765625,
      "step": 47569,
      "training_step_time": 0.39532995223999023
    },
    {
      "epoch": 0.00029034423828125,
      "grad_norm": 0.08608405292034149,
      "learning_rate": 1.1281820835569761e-05,
      "loss": 0.037,
      "step": 47570
    },
    {
      "epoch": 0.00029034423828125,
      "model_forward_time": 0.11427545547485352,
      "step": 47570
    },
    {
      "epoch": 0.00029034423828125,
      "step": 47570,
      "training_step_time": 0.3963596820831299
    },
    {
      "epoch": 0.000290350341796875,
      "model_forward_time": 0.11505246162414551,
      "step": 47571
    },
    {
      "epoch": 0.000290350341796875,
      "step": 47571,
      "training_step_time": 0.39906930923461914
    },
    {
      "epoch": 0.0002903564453125,
      "model_forward_time": 0.11568093299865723,
      "step": 47572
    },
    {
      "epoch": 0.0002903564453125,
      "step": 47572,
      "training_step_time": 0.40582942962646484
    },
    {
      "epoch": 0.000290362548828125,
      "model_forward_time": 0.11566781997680664,
      "step": 47573
    },
    {
      "epoch": 0.000290362548828125,
      "step": 47573,
      "training_step_time": 0.3983466625213623
    },
    {
      "epoch": 0.00029036865234375,
      "model_forward_time": 0.11523008346557617,
      "step": 47574
    },
    {
      "epoch": 0.00029036865234375,
      "step": 47574,
      "training_step_time": 0.41285157203674316
    },
    {
      "epoch": 0.000290374755859375,
      "model_forward_time": 0.11579489707946777,
      "step": 47575
    },
    {
      "epoch": 0.000290374755859375,
      "step": 47575,
      "training_step_time": 0.41781163215637207
    },
    {
      "epoch": 0.000290380859375,
      "model_forward_time": 0.11609840393066406,
      "step": 47576
    },
    {
      "epoch": 0.000290380859375,
      "step": 47576,
      "training_step_time": 0.43810081481933594
    },
    {
      "epoch": 0.000290386962890625,
      "model_forward_time": 0.11569786071777344,
      "step": 47577
    },
    {
      "epoch": 0.000290386962890625,
      "step": 47577,
      "training_step_time": 0.46520400047302246
    },
    {
      "epoch": 0.00029039306640625,
      "model_forward_time": 0.11501002311706543,
      "step": 47578
    },
    {
      "epoch": 0.00029039306640625,
      "step": 47578,
      "training_step_time": 0.46744370460510254
    },
    {
      "epoch": 0.000290399169921875,
      "model_forward_time": 0.11500835418701172,
      "step": 47579
    },
    {
      "epoch": 0.000290399169921875,
      "step": 47579,
      "training_step_time": 0.49868297576904297
    },
    {
      "epoch": 0.0002904052734375,
      "grad_norm": 0.10632723569869995,
      "learning_rate": 1.1264389751013326e-05,
      "loss": 0.0333,
      "step": 47580
    },
    {
      "epoch": 0.0002904052734375,
      "model_forward_time": 0.11433887481689453,
      "step": 47580
    },
    {
      "epoch": 0.0002904052734375,
      "step": 47580,
      "training_step_time": 0.394777774810791
    },
    {
      "epoch": 0.000290411376953125,
      "model_forward_time": 0.11478114128112793,
      "step": 47581
    },
    {
      "epoch": 0.000290411376953125,
      "step": 47581,
      "training_step_time": 0.3920412063598633
    },
    {
      "epoch": 0.00029041748046875,
      "model_forward_time": 0.11502218246459961,
      "step": 47582
    },
    {
      "epoch": 0.00029041748046875,
      "step": 47582,
      "training_step_time": 0.40453267097473145
    },
    {
      "epoch": 0.000290423583984375,
      "model_forward_time": 0.11538243293762207,
      "step": 47583
    },
    {
      "epoch": 0.000290423583984375,
      "step": 47583,
      "training_step_time": 0.3944826126098633
    },
    {
      "epoch": 0.0002904296875,
      "model_forward_time": 0.11580777168273926,
      "step": 47584
    },
    {
      "epoch": 0.0002904296875,
      "step": 47584,
      "training_step_time": 0.3832879066467285
    },
    {
      "epoch": 0.000290435791015625,
      "model_forward_time": 0.1152791976928711,
      "step": 47585
    },
    {
      "epoch": 0.000290435791015625,
      "step": 47585,
      "training_step_time": 0.419877290725708
    },
    {
      "epoch": 0.00029044189453125,
      "model_forward_time": 0.11486077308654785,
      "step": 47586
    },
    {
      "epoch": 0.00029044189453125,
      "step": 47586,
      "training_step_time": 0.3971109390258789
    },
    {
      "epoch": 0.000290447998046875,
      "model_forward_time": 0.1154937744140625,
      "step": 47587
    },
    {
      "epoch": 0.000290447998046875,
      "step": 47587,
      "training_step_time": 0.3867495059967041
    },
    {
      "epoch": 0.0002904541015625,
      "model_forward_time": 0.11601734161376953,
      "step": 47588
    },
    {
      "epoch": 0.0002904541015625,
      "step": 47588,
      "training_step_time": 0.3972184658050537
    },
    {
      "epoch": 0.000290460205078125,
      "model_forward_time": 0.11544632911682129,
      "step": 47589
    },
    {
      "epoch": 0.000290460205078125,
      "step": 47589,
      "training_step_time": 0.4169158935546875
    },
    {
      "epoch": 0.00029046630859375,
      "grad_norm": 0.10113867372274399,
      "learning_rate": 1.124697043331256e-05,
      "loss": 0.0329,
      "step": 47590
    },
    {
      "epoch": 0.00029046630859375,
      "model_forward_time": 0.11493825912475586,
      "step": 47590
    },
    {
      "epoch": 0.00029046630859375,
      "step": 47590,
      "training_step_time": 0.38296079635620117
    },
    {
      "epoch": 0.000290472412109375,
      "model_forward_time": 0.11499810218811035,
      "step": 47591
    },
    {
      "epoch": 0.000290472412109375,
      "step": 47591,
      "training_step_time": 0.386746883392334
    },
    {
      "epoch": 0.000290478515625,
      "model_forward_time": 0.11555099487304688,
      "step": 47592
    },
    {
      "epoch": 0.000290478515625,
      "step": 47592,
      "training_step_time": 0.36681652069091797
    },
    {
      "epoch": 0.000290484619140625,
      "model_forward_time": 0.11485481262207031,
      "step": 47593
    },
    {
      "epoch": 0.000290484619140625,
      "step": 47593,
      "training_step_time": 0.4631502628326416
    },
    {
      "epoch": 0.00029049072265625,
      "model_forward_time": 0.11483025550842285,
      "step": 47594
    },
    {
      "epoch": 0.00029049072265625,
      "step": 47594,
      "training_step_time": 0.4521005153656006
    },
    {
      "epoch": 0.000290496826171875,
      "model_forward_time": 0.1149444580078125,
      "step": 47595
    },
    {
      "epoch": 0.000290496826171875,
      "step": 47595,
      "training_step_time": 0.38497471809387207
    },
    {
      "epoch": 0.0002905029296875,
      "model_forward_time": 0.11511945724487305,
      "step": 47596
    },
    {
      "epoch": 0.0002905029296875,
      "step": 47596,
      "training_step_time": 0.3872239589691162
    },
    {
      "epoch": 0.000290509033203125,
      "model_forward_time": 0.11476421356201172,
      "step": 47597
    },
    {
      "epoch": 0.000290509033203125,
      "step": 47597,
      "training_step_time": 0.39171910285949707
    },
    {
      "epoch": 0.00029051513671875,
      "model_forward_time": 0.11511588096618652,
      "step": 47598
    },
    {
      "epoch": 0.00029051513671875,
      "step": 47598,
      "training_step_time": 0.39608192443847656
    },
    {
      "epoch": 0.000290521240234375,
      "model_forward_time": 0.11567974090576172,
      "step": 47599
    },
    {
      "epoch": 0.000290521240234375,
      "step": 47599,
      "training_step_time": 0.3909766674041748
    },
    {
      "epoch": 0.00029052734375,
      "grad_norm": 0.0926932692527771,
      "learning_rate": 1.1229562887758926e-05,
      "loss": 0.0381,
      "step": 47600
    },
    {
      "epoch": 0.00029052734375,
      "model_forward_time": 0.1154787540435791,
      "step": 47600
    },
    {
      "epoch": 0.00029052734375,
      "step": 47600,
      "training_step_time": 0.3936755657196045
    },
    {
      "epoch": 0.000290533447265625,
      "model_forward_time": 0.11606216430664062,
      "step": 47601
    },
    {
      "epoch": 0.000290533447265625,
      "step": 47601,
      "training_step_time": 0.3946964740753174
    },
    {
      "epoch": 0.00029053955078125,
      "model_forward_time": 0.11520957946777344,
      "step": 47602
    },
    {
      "epoch": 0.00029053955078125,
      "step": 47602,
      "training_step_time": 0.3957676887512207
    },
    {
      "epoch": 0.000290545654296875,
      "model_forward_time": 0.11597728729248047,
      "step": 47603
    },
    {
      "epoch": 0.000290545654296875,
      "step": 47603,
      "training_step_time": 0.46682071685791016
    },
    {
      "epoch": 0.0002905517578125,
      "model_forward_time": 0.11524343490600586,
      "step": 47604
    },
    {
      "epoch": 0.0002905517578125,
      "step": 47604,
      "training_step_time": 0.4587082862854004
    },
    {
      "epoch": 0.000290557861328125,
      "model_forward_time": 0.11444354057312012,
      "step": 47605
    },
    {
      "epoch": 0.000290557861328125,
      "step": 47605,
      "training_step_time": 0.40898704528808594
    },
    {
      "epoch": 0.00029056396484375,
      "model_forward_time": 0.11590170860290527,
      "step": 47606
    },
    {
      "epoch": 0.00029056396484375,
      "step": 47606,
      "training_step_time": 0.38626575469970703
    },
    {
      "epoch": 0.000290570068359375,
      "model_forward_time": 0.11537694931030273,
      "step": 47607
    },
    {
      "epoch": 0.000290570068359375,
      "step": 47607,
      "training_step_time": 0.41693592071533203
    },
    {
      "epoch": 0.000290576171875,
      "model_forward_time": 0.11585307121276855,
      "step": 47608
    },
    {
      "epoch": 0.000290576171875,
      "step": 47608,
      "training_step_time": 0.4294545650482178
    },
    {
      "epoch": 0.000290582275390625,
      "model_forward_time": 0.11550283432006836,
      "step": 47609
    },
    {
      "epoch": 0.000290582275390625,
      "step": 47609,
      "training_step_time": 0.46820926666259766
    },
    {
      "epoch": 0.00029058837890625,
      "grad_norm": 0.0763627141714096,
      "learning_rate": 1.1212167119640438e-05,
      "loss": 0.0373,
      "step": 47610
    },
    {
      "epoch": 0.00029058837890625,
      "model_forward_time": 0.11504006385803223,
      "step": 47610
    },
    {
      "epoch": 0.00029058837890625,
      "step": 47610,
      "training_step_time": 0.426501989364624
    },
    {
      "epoch": 0.000290594482421875,
      "model_forward_time": 0.11558175086975098,
      "step": 47611
    },
    {
      "epoch": 0.000290594482421875,
      "step": 47611,
      "training_step_time": 0.3947722911834717
    },
    {
      "epoch": 0.0002906005859375,
      "model_forward_time": 0.11487698554992676,
      "step": 47612
    },
    {
      "epoch": 0.0002906005859375,
      "step": 47612,
      "training_step_time": 0.38887834548950195
    },
    {
      "epoch": 0.000290606689453125,
      "model_forward_time": 0.11497211456298828,
      "step": 47613
    },
    {
      "epoch": 0.000290606689453125,
      "step": 47613,
      "training_step_time": 0.38013410568237305
    },
    {
      "epoch": 0.00029061279296875,
      "model_forward_time": 0.1157369613647461,
      "step": 47614
    },
    {
      "epoch": 0.00029061279296875,
      "step": 47614,
      "training_step_time": 0.39269518852233887
    },
    {
      "epoch": 0.000290618896484375,
      "model_forward_time": 0.11567139625549316,
      "step": 47615
    },
    {
      "epoch": 0.000290618896484375,
      "step": 47615,
      "training_step_time": 0.6385815143585205
    },
    {
      "epoch": 0.000290625,
      "model_forward_time": 0.11504220962524414,
      "step": 47616
    },
    {
      "epoch": 0.000290625,
      "step": 47616,
      "training_step_time": 0.39015722274780273
    },
    {
      "epoch": 0.000290631103515625,
      "model_forward_time": 0.1149446964263916,
      "step": 47617
    },
    {
      "epoch": 0.000290631103515625,
      "step": 47617,
      "training_step_time": 0.42064785957336426
    },
    {
      "epoch": 0.00029063720703125,
      "model_forward_time": 0.11460614204406738,
      "step": 47618
    },
    {
      "epoch": 0.00029063720703125,
      "step": 47618,
      "training_step_time": 0.3858768939971924
    },
    {
      "epoch": 0.000290643310546875,
      "model_forward_time": 0.11609172821044922,
      "step": 47619
    },
    {
      "epoch": 0.000290643310546875,
      "step": 47619,
      "training_step_time": 0.3910865783691406
    },
    {
      "epoch": 0.0002906494140625,
      "grad_norm": 0.09039784222841263,
      "learning_rate": 1.1194783134241437e-05,
      "loss": 0.0383,
      "step": 47620
    },
    {
      "epoch": 0.0002906494140625,
      "model_forward_time": 0.1149449348449707,
      "step": 47620
    },
    {
      "epoch": 0.0002906494140625,
      "step": 47620,
      "training_step_time": 0.38632750511169434
    },
    {
      "epoch": 0.000290655517578125,
      "model_forward_time": 0.1156473159790039,
      "step": 47621
    },
    {
      "epoch": 0.000290655517578125,
      "step": 47621,
      "training_step_time": 0.6303660869598389
    },
    {
      "epoch": 0.00029066162109375,
      "model_forward_time": 0.11533498764038086,
      "step": 47622
    },
    {
      "epoch": 0.00029066162109375,
      "step": 47622,
      "training_step_time": 0.4582226276397705
    },
    {
      "epoch": 0.000290667724609375,
      "model_forward_time": 0.11548018455505371,
      "step": 47623
    },
    {
      "epoch": 0.000290667724609375,
      "step": 47623,
      "training_step_time": 0.48691534996032715
    },
    {
      "epoch": 0.000290673828125,
      "model_forward_time": 0.11494755744934082,
      "step": 47624
    },
    {
      "epoch": 0.000290673828125,
      "step": 47624,
      "training_step_time": 0.42272067070007324
    },
    {
      "epoch": 0.000290679931640625,
      "model_forward_time": 0.11476755142211914,
      "step": 47625
    },
    {
      "epoch": 0.000290679931640625,
      "step": 47625,
      "training_step_time": 0.39092469215393066
    },
    {
      "epoch": 0.00029068603515625,
      "model_forward_time": 0.11439323425292969,
      "step": 47626
    },
    {
      "epoch": 0.00029068603515625,
      "step": 47626,
      "training_step_time": 0.3916056156158447
    },
    {
      "epoch": 0.000290692138671875,
      "model_forward_time": 0.11575698852539062,
      "step": 47627
    },
    {
      "epoch": 0.000290692138671875,
      "step": 47627,
      "training_step_time": 0.39629244804382324
    },
    {
      "epoch": 0.0002906982421875,
      "model_forward_time": 0.11541318893432617,
      "step": 47628
    },
    {
      "epoch": 0.0002906982421875,
      "step": 47628,
      "training_step_time": 0.39328694343566895
    },
    {
      "epoch": 0.000290704345703125,
      "model_forward_time": 0.11571311950683594,
      "step": 47629
    },
    {
      "epoch": 0.000290704345703125,
      "step": 47629,
      "training_step_time": 0.447251558303833
    },
    {
      "epoch": 0.00029071044921875,
      "grad_norm": 0.09095809608697891,
      "learning_rate": 1.1177410936842719e-05,
      "loss": 0.0383,
      "step": 47630
    },
    {
      "epoch": 0.00029071044921875,
      "model_forward_time": 0.11583733558654785,
      "step": 47630
    },
    {
      "epoch": 0.00029071044921875,
      "step": 47630,
      "training_step_time": 0.42368221282958984
    },
    {
      "epoch": 0.000290716552734375,
      "model_forward_time": 0.11477780342102051,
      "step": 47631
    },
    {
      "epoch": 0.000290716552734375,
      "step": 47631,
      "training_step_time": 0.40662145614624023
    },
    {
      "epoch": 0.00029072265625,
      "model_forward_time": 0.11537313461303711,
      "step": 47632
    },
    {
      "epoch": 0.00029072265625,
      "step": 47632,
      "training_step_time": 0.3948390483856201
    },
    {
      "epoch": 0.000290728759765625,
      "model_forward_time": 0.11554813385009766,
      "step": 47633
    },
    {
      "epoch": 0.000290728759765625,
      "step": 47633,
      "training_step_time": 0.3950917720794678
    },
    {
      "epoch": 0.00029073486328125,
      "model_forward_time": 0.11473608016967773,
      "step": 47634
    },
    {
      "epoch": 0.00029073486328125,
      "step": 47634,
      "training_step_time": 0.3991720676422119
    },
    {
      "epoch": 0.000290740966796875,
      "model_forward_time": 0.11455535888671875,
      "step": 47635
    },
    {
      "epoch": 0.000290740966796875,
      "step": 47635,
      "training_step_time": 0.3953549861907959
    },
    {
      "epoch": 0.0002907470703125,
      "model_forward_time": 0.11598324775695801,
      "step": 47636
    },
    {
      "epoch": 0.0002907470703125,
      "step": 47636,
      "training_step_time": 0.4423706531524658
    },
    {
      "epoch": 0.000290753173828125,
      "model_forward_time": 0.11455726623535156,
      "step": 47637
    },
    {
      "epoch": 0.000290753173828125,
      "step": 47637,
      "training_step_time": 0.5729410648345947
    },
    {
      "epoch": 0.00029075927734375,
      "model_forward_time": 0.11513996124267578,
      "step": 47638
    },
    {
      "epoch": 0.00029075927734375,
      "step": 47638,
      "training_step_time": 0.4315977096557617
    },
    {
      "epoch": 0.000290765380859375,
      "model_forward_time": 0.11501240730285645,
      "step": 47639
    },
    {
      "epoch": 0.000290765380859375,
      "step": 47639,
      "training_step_time": 0.39237523078918457
    },
    {
      "epoch": 0.000290771484375,
      "grad_norm": 0.12433689832687378,
      "learning_rate": 1.1160050532721528e-05,
      "loss": 0.036,
      "step": 47640
    },
    {
      "epoch": 0.000290771484375,
      "model_forward_time": 0.11465811729431152,
      "step": 47640
    },
    {
      "epoch": 0.000290771484375,
      "step": 47640,
      "training_step_time": 0.3877217769622803
    },
    {
      "epoch": 0.000290777587890625,
      "model_forward_time": 0.11458349227905273,
      "step": 47641
    },
    {
      "epoch": 0.000290777587890625,
      "step": 47641,
      "training_step_time": 0.38718748092651367
    },
    {
      "epoch": 0.00029078369140625,
      "model_forward_time": 0.11481761932373047,
      "step": 47642
    },
    {
      "epoch": 0.00029078369140625,
      "step": 47642,
      "training_step_time": 0.3843240737915039
    },
    {
      "epoch": 0.000290789794921875,
      "model_forward_time": 0.11515951156616211,
      "step": 47643
    },
    {
      "epoch": 0.000290789794921875,
      "step": 47643,
      "training_step_time": 0.5357286930084229
    },
    {
      "epoch": 0.0002907958984375,
      "model_forward_time": 0.11541247367858887,
      "step": 47644
    },
    {
      "epoch": 0.0002907958984375,
      "step": 47644,
      "training_step_time": 0.4418776035308838
    },
    {
      "epoch": 0.000290802001953125,
      "model_forward_time": 0.11489629745483398,
      "step": 47645
    },
    {
      "epoch": 0.000290802001953125,
      "step": 47645,
      "training_step_time": 0.3871333599090576
    },
    {
      "epoch": 0.00029080810546875,
      "model_forward_time": 0.11468982696533203,
      "step": 47646
    },
    {
      "epoch": 0.00029080810546875,
      "step": 47646,
      "training_step_time": 0.3856239318847656
    },
    {
      "epoch": 0.000290814208984375,
      "model_forward_time": 0.11546635627746582,
      "step": 47647
    },
    {
      "epoch": 0.000290814208984375,
      "step": 47647,
      "training_step_time": 0.3924422264099121
    },
    {
      "epoch": 0.0002908203125,
      "model_forward_time": 0.11497974395751953,
      "step": 47648
    },
    {
      "epoch": 0.0002908203125,
      "step": 47648,
      "training_step_time": 0.4359612464904785
    },
    {
      "epoch": 0.000290826416015625,
      "model_forward_time": 0.11570215225219727,
      "step": 47649
    },
    {
      "epoch": 0.000290826416015625,
      "step": 47649,
      "training_step_time": 0.4534435272216797
    },
    {
      "epoch": 0.00029083251953125,
      "grad_norm": 0.11215486377477646,
      "learning_rate": 1.1142701927151456e-05,
      "loss": 0.0386,
      "step": 47650
    },
    {
      "epoch": 0.00029083251953125,
      "model_forward_time": 0.11524224281311035,
      "step": 47650
    },
    {
      "epoch": 0.00029083251953125,
      "step": 47650,
      "training_step_time": 0.46085095405578613
    },
    {
      "epoch": 0.000290838623046875,
      "model_forward_time": 0.11486935615539551,
      "step": 47651
    },
    {
      "epoch": 0.000290838623046875,
      "step": 47651,
      "training_step_time": 0.4747128486633301
    },
    {
      "epoch": 0.0002908447265625,
      "model_forward_time": 0.11500191688537598,
      "step": 47652
    },
    {
      "epoch": 0.0002908447265625,
      "step": 47652,
      "training_step_time": 0.4706308841705322
    },
    {
      "epoch": 0.000290850830078125,
      "model_forward_time": 0.11461162567138672,
      "step": 47653
    },
    {
      "epoch": 0.000290850830078125,
      "step": 47653,
      "training_step_time": 0.4187798500061035
    },
    {
      "epoch": 0.00029085693359375,
      "model_forward_time": 0.11517024040222168,
      "step": 47654
    },
    {
      "epoch": 0.00029085693359375,
      "step": 47654,
      "training_step_time": 0.38705897331237793
    },
    {
      "epoch": 0.000290863037109375,
      "model_forward_time": 0.114715576171875,
      "step": 47655
    },
    {
      "epoch": 0.000290863037109375,
      "step": 47655,
      "training_step_time": 0.38483500480651855
    },
    {
      "epoch": 0.000290869140625,
      "model_forward_time": 0.11553835868835449,
      "step": 47656
    },
    {
      "epoch": 0.000290869140625,
      "step": 47656,
      "training_step_time": 0.39823317527770996
    },
    {
      "epoch": 0.000290875244140625,
      "model_forward_time": 0.11606049537658691,
      "step": 47657
    },
    {
      "epoch": 0.000290875244140625,
      "step": 47657,
      "training_step_time": 0.3928103446960449
    },
    {
      "epoch": 0.00029088134765625,
      "model_forward_time": 0.11511754989624023,
      "step": 47658
    },
    {
      "epoch": 0.00029088134765625,
      "step": 47658,
      "training_step_time": 0.40156126022338867
    },
    {
      "epoch": 0.000290887451171875,
      "model_forward_time": 0.11534500122070312,
      "step": 47659
    },
    {
      "epoch": 0.000290887451171875,
      "step": 47659,
      "training_step_time": 0.39227890968322754
    },
    {
      "epoch": 0.0002908935546875,
      "grad_norm": 0.11016236245632172,
      "learning_rate": 1.1125365125402582e-05,
      "loss": 0.036,
      "step": 47660
    },
    {
      "epoch": 0.0002908935546875,
      "model_forward_time": 0.11541199684143066,
      "step": 47660
    },
    {
      "epoch": 0.0002908935546875,
      "step": 47660,
      "training_step_time": 0.3953869342803955
    },
    {
      "epoch": 0.000290899658203125,
      "model_forward_time": 0.11528134346008301,
      "step": 47661
    },
    {
      "epoch": 0.000290899658203125,
      "step": 47661,
      "training_step_time": 0.45631933212280273
    },
    {
      "epoch": 0.00029090576171875,
      "model_forward_time": 0.11489319801330566,
      "step": 47662
    },
    {
      "epoch": 0.00029090576171875,
      "step": 47662,
      "training_step_time": 0.39353084564208984
    },
    {
      "epoch": 0.000290911865234375,
      "model_forward_time": 0.11523079872131348,
      "step": 47663
    },
    {
      "epoch": 0.000290911865234375,
      "step": 47663,
      "training_step_time": 0.3941643238067627
    },
    {
      "epoch": 0.00029091796875,
      "model_forward_time": 0.11524081230163574,
      "step": 47664
    },
    {
      "epoch": 0.00029091796875,
      "step": 47664,
      "training_step_time": 0.4449317455291748
    },
    {
      "epoch": 0.000290924072265625,
      "model_forward_time": 0.11563706398010254,
      "step": 47665
    },
    {
      "epoch": 0.000290924072265625,
      "step": 47665,
      "training_step_time": 0.3980271816253662
    },
    {
      "epoch": 0.00029093017578125,
      "model_forward_time": 0.11545896530151367,
      "step": 47666
    },
    {
      "epoch": 0.00029093017578125,
      "step": 47666,
      "training_step_time": 0.45415401458740234
    },
    {
      "epoch": 0.000290936279296875,
      "model_forward_time": 0.11489653587341309,
      "step": 47667
    },
    {
      "epoch": 0.000290936279296875,
      "step": 47667,
      "training_step_time": 0.6840269565582275
    },
    {
      "epoch": 0.0002909423828125,
      "model_forward_time": 0.11435127258300781,
      "step": 47668
    },
    {
      "epoch": 0.0002909423828125,
      "step": 47668,
      "training_step_time": 0.38800501823425293
    },
    {
      "epoch": 0.000290948486328125,
      "model_forward_time": 0.11500048637390137,
      "step": 47669
    },
    {
      "epoch": 0.000290948486328125,
      "step": 47669,
      "training_step_time": 0.3945882320404053
    },
    {
      "epoch": 0.00029095458984375,
      "grad_norm": 0.10711236298084259,
      "learning_rate": 1.1108040132741354e-05,
      "loss": 0.034,
      "step": 47670
    },
    {
      "epoch": 0.00029095458984375,
      "model_forward_time": 0.11486458778381348,
      "step": 47670
    },
    {
      "epoch": 0.00029095458984375,
      "step": 47670,
      "training_step_time": 0.3951430320739746
    },
    {
      "epoch": 0.000290960693359375,
      "model_forward_time": 0.11434483528137207,
      "step": 47671
    },
    {
      "epoch": 0.000290960693359375,
      "step": 47671,
      "training_step_time": 0.41069769859313965
    },
    {
      "epoch": 0.000290966796875,
      "model_forward_time": 0.11493086814880371,
      "step": 47672
    },
    {
      "epoch": 0.000290966796875,
      "step": 47672,
      "training_step_time": 0.386751651763916
    },
    {
      "epoch": 0.000290972900390625,
      "model_forward_time": 0.11510324478149414,
      "step": 47673
    },
    {
      "epoch": 0.000290972900390625,
      "step": 47673,
      "training_step_time": 0.6863231658935547
    },
    {
      "epoch": 0.00029097900390625,
      "model_forward_time": 0.11525487899780273,
      "step": 47674
    },
    {
      "epoch": 0.00029097900390625,
      "step": 47674,
      "training_step_time": 0.3832864761352539
    },
    {
      "epoch": 0.000290985107421875,
      "model_forward_time": 0.11426472663879395,
      "step": 47675
    },
    {
      "epoch": 0.000290985107421875,
      "step": 47675,
      "training_step_time": 0.39077043533325195
    },
    {
      "epoch": 0.0002909912109375,
      "model_forward_time": 0.11539173126220703,
      "step": 47676
    },
    {
      "epoch": 0.0002909912109375,
      "step": 47676,
      "training_step_time": 0.39613914489746094
    },
    {
      "epoch": 0.000290997314453125,
      "model_forward_time": 0.11464238166809082,
      "step": 47677
    },
    {
      "epoch": 0.000290997314453125,
      "step": 47677,
      "training_step_time": 0.3916471004486084
    },
    {
      "epoch": 0.00029100341796875,
      "model_forward_time": 0.11484622955322266,
      "step": 47678
    },
    {
      "epoch": 0.00029100341796875,
      "step": 47678,
      "training_step_time": 0.3996589183807373
    },
    {
      "epoch": 0.000291009521484375,
      "model_forward_time": 0.11482691764831543,
      "step": 47679
    },
    {
      "epoch": 0.000291009521484375,
      "step": 47679,
      "training_step_time": 0.5872015953063965
    },
    {
      "epoch": 0.000291015625,
      "grad_norm": 0.08647000789642334,
      "learning_rate": 1.1090726954430658e-05,
      "loss": 0.0341,
      "step": 47680
    },
    {
      "epoch": 0.000291015625,
      "model_forward_time": 0.11513352394104004,
      "step": 47680
    },
    {
      "epoch": 0.000291015625,
      "step": 47680,
      "training_step_time": 0.5063292980194092
    },
    {
      "epoch": 0.000291021728515625,
      "model_forward_time": 0.11461257934570312,
      "step": 47681
    },
    {
      "epoch": 0.000291021728515625,
      "step": 47681,
      "training_step_time": 0.49953651428222656
    },
    {
      "epoch": 0.00029102783203125,
      "model_forward_time": 0.11419558525085449,
      "step": 47682
    },
    {
      "epoch": 0.00029102783203125,
      "step": 47682,
      "training_step_time": 0.40164780616760254
    },
    {
      "epoch": 0.000291033935546875,
      "model_forward_time": 0.11394691467285156,
      "step": 47683
    },
    {
      "epoch": 0.000291033935546875,
      "step": 47683,
      "training_step_time": 0.40779805183410645
    },
    {
      "epoch": 0.0002910400390625,
      "model_forward_time": 0.11497855186462402,
      "step": 47684
    },
    {
      "epoch": 0.0002910400390625,
      "step": 47684,
      "training_step_time": 0.40932583808898926
    },
    {
      "epoch": 0.000291046142578125,
      "model_forward_time": 0.11474800109863281,
      "step": 47685
    },
    {
      "epoch": 0.000291046142578125,
      "step": 47685,
      "training_step_time": 0.4001030921936035
    },
    {
      "epoch": 0.00029105224609375,
      "model_forward_time": 0.1149134635925293,
      "step": 47686
    },
    {
      "epoch": 0.00029105224609375,
      "step": 47686,
      "training_step_time": 0.39319419860839844
    },
    {
      "epoch": 0.000291058349609375,
      "model_forward_time": 0.11480283737182617,
      "step": 47687
    },
    {
      "epoch": 0.000291058349609375,
      "step": 47687,
      "training_step_time": 0.3930373191833496
    },
    {
      "epoch": 0.000291064453125,
      "model_forward_time": 0.11515402793884277,
      "step": 47688
    },
    {
      "epoch": 0.000291064453125,
      "step": 47688,
      "training_step_time": 0.4086325168609619
    },
    {
      "epoch": 0.000291070556640625,
      "model_forward_time": 0.11548161506652832,
      "step": 47689
    },
    {
      "epoch": 0.000291070556640625,
      "step": 47689,
      "training_step_time": 0.3933379650115967
    },
    {
      "epoch": 0.00029107666015625,
      "grad_norm": 0.07094711065292358,
      "learning_rate": 1.107342559572977e-05,
      "loss": 0.0372,
      "step": 47690
    },
    {
      "epoch": 0.00029107666015625,
      "model_forward_time": 0.11551761627197266,
      "step": 47690
    },
    {
      "epoch": 0.00029107666015625,
      "step": 47690,
      "training_step_time": 0.3927955627441406
    },
    {
      "epoch": 0.000291082763671875,
      "model_forward_time": 0.11525559425354004,
      "step": 47691
    },
    {
      "epoch": 0.000291082763671875,
      "step": 47691,
      "training_step_time": 0.6216716766357422
    },
    {
      "epoch": 0.0002910888671875,
      "model_forward_time": 0.11542177200317383,
      "step": 47692
    },
    {
      "epoch": 0.0002910888671875,
      "step": 47692,
      "training_step_time": 0.4299449920654297
    },
    {
      "epoch": 0.000291094970703125,
      "model_forward_time": 0.11460280418395996,
      "step": 47693
    },
    {
      "epoch": 0.000291094970703125,
      "step": 47693,
      "training_step_time": 0.38289761543273926
    },
    {
      "epoch": 0.00029110107421875,
      "model_forward_time": 0.11481142044067383,
      "step": 47694
    },
    {
      "epoch": 0.00029110107421875,
      "step": 47694,
      "training_step_time": 0.4430985450744629
    },
    {
      "epoch": 0.000291107177734375,
      "model_forward_time": 0.11470413208007812,
      "step": 47695
    },
    {
      "epoch": 0.000291107177734375,
      "step": 47695,
      "training_step_time": 0.4355347156524658
    },
    {
      "epoch": 0.00029111328125,
      "model_forward_time": 0.11575865745544434,
      "step": 47696
    },
    {
      "epoch": 0.00029111328125,
      "step": 47696,
      "training_step_time": 0.43932628631591797
    },
    {
      "epoch": 0.000291119384765625,
      "model_forward_time": 0.115234375,
      "step": 47697
    },
    {
      "epoch": 0.000291119384765625,
      "step": 47697,
      "training_step_time": 0.4561889171600342
    },
    {
      "epoch": 0.00029112548828125,
      "model_forward_time": 0.11521363258361816,
      "step": 47698
    },
    {
      "epoch": 0.00029112548828125,
      "step": 47698,
      "training_step_time": 0.38483357429504395
    },
    {
      "epoch": 0.000291131591796875,
      "model_forward_time": 0.1146700382232666,
      "step": 47699
    },
    {
      "epoch": 0.000291131591796875,
      "step": 47699,
      "training_step_time": 0.3919384479522705
    },
    {
      "epoch": 0.0002911376953125,
      "grad_norm": 0.10748258233070374,
      "learning_rate": 1.1056136061894384e-05,
      "loss": 0.0391,
      "step": 47700
    },
    {
      "epoch": 0.0002911376953125,
      "model_forward_time": 0.11537551879882812,
      "step": 47700
    },
    {
      "epoch": 0.0002911376953125,
      "step": 47700,
      "training_step_time": 0.3966066837310791
    },
    {
      "epoch": 0.000291143798828125,
      "model_forward_time": 0.11498403549194336,
      "step": 47701
    },
    {
      "epoch": 0.000291143798828125,
      "step": 47701,
      "training_step_time": 0.38909125328063965
    },
    {
      "epoch": 0.00029114990234375,
      "model_forward_time": 0.11500430107116699,
      "step": 47702
    },
    {
      "epoch": 0.00029114990234375,
      "step": 47702,
      "training_step_time": 0.3910233974456787
    },
    {
      "epoch": 0.000291156005859375,
      "model_forward_time": 0.11568284034729004,
      "step": 47703
    },
    {
      "epoch": 0.000291156005859375,
      "step": 47703,
      "training_step_time": 0.7511599063873291
    },
    {
      "epoch": 0.000291162109375,
      "model_forward_time": 0.11468696594238281,
      "step": 47704
    },
    {
      "epoch": 0.000291162109375,
      "step": 47704,
      "training_step_time": 0.382659912109375
    },
    {
      "epoch": 0.000291168212890625,
      "model_forward_time": 0.11557674407958984,
      "step": 47705
    },
    {
      "epoch": 0.000291168212890625,
      "step": 47705,
      "training_step_time": 0.39978766441345215
    },
    {
      "epoch": 0.00029117431640625,
      "model_forward_time": 0.11551117897033691,
      "step": 47706
    },
    {
      "epoch": 0.00029117431640625,
      "step": 47706,
      "training_step_time": 0.39794158935546875
    },
    {
      "epoch": 0.000291180419921875,
      "model_forward_time": 0.11554193496704102,
      "step": 47707
    },
    {
      "epoch": 0.000291180419921875,
      "step": 47707,
      "training_step_time": 0.5007388591766357
    },
    {
      "epoch": 0.0002911865234375,
      "model_forward_time": 0.11496114730834961,
      "step": 47708
    },
    {
      "epoch": 0.0002911865234375,
      "step": 47708,
      "training_step_time": 0.4131052494049072
    },
    {
      "epoch": 0.000291192626953125,
      "model_forward_time": 0.1152031421661377,
      "step": 47709
    },
    {
      "epoch": 0.000291192626953125,
      "step": 47709,
      "training_step_time": 0.5475418567657471
    },
    {
      "epoch": 0.00029119873046875,
      "grad_norm": 0.08263937383890152,
      "learning_rate": 1.1038858358176602e-05,
      "loss": 0.0367,
      "step": 47710
    },
    {
      "epoch": 0.00029119873046875,
      "model_forward_time": 0.11494135856628418,
      "step": 47710
    },
    {
      "epoch": 0.00029119873046875,
      "step": 47710,
      "training_step_time": 0.45095276832580566
    },
    {
      "epoch": 0.000291204833984375,
      "model_forward_time": 0.11441278457641602,
      "step": 47711
    },
    {
      "epoch": 0.000291204833984375,
      "step": 47711,
      "training_step_time": 0.3879833221435547
    },
    {
      "epoch": 0.0002912109375,
      "model_forward_time": 0.11494970321655273,
      "step": 47712
    },
    {
      "epoch": 0.0002912109375,
      "step": 47712,
      "training_step_time": 0.3905150890350342
    },
    {
      "epoch": 0.000291217041015625,
      "model_forward_time": 0.1145021915435791,
      "step": 47713
    },
    {
      "epoch": 0.000291217041015625,
      "step": 47713,
      "training_step_time": 0.3871614933013916
    },
    {
      "epoch": 0.00029122314453125,
      "model_forward_time": 0.11493039131164551,
      "step": 47714
    },
    {
      "epoch": 0.00029122314453125,
      "step": 47714,
      "training_step_time": 0.3904266357421875
    },
    {
      "epoch": 0.000291229248046875,
      "model_forward_time": 0.11498618125915527,
      "step": 47715
    },
    {
      "epoch": 0.000291229248046875,
      "step": 47715,
      "training_step_time": 0.5345158576965332
    },
    {
      "epoch": 0.0002912353515625,
      "model_forward_time": 0.1149439811706543,
      "step": 47716
    },
    {
      "epoch": 0.0002912353515625,
      "step": 47716,
      "training_step_time": 0.38468313217163086
    },
    {
      "epoch": 0.000291241455078125,
      "model_forward_time": 0.1150970458984375,
      "step": 47717
    },
    {
      "epoch": 0.000291241455078125,
      "step": 47717,
      "training_step_time": 0.40651559829711914
    },
    {
      "epoch": 0.00029124755859375,
      "model_forward_time": 0.11556291580200195,
      "step": 47718
    },
    {
      "epoch": 0.00029124755859375,
      "step": 47718,
      "training_step_time": 0.3957226276397705
    },
    {
      "epoch": 0.000291253662109375,
      "model_forward_time": 0.11488199234008789,
      "step": 47719
    },
    {
      "epoch": 0.000291253662109375,
      "step": 47719,
      "training_step_time": 0.3933289051055908
    },
    {
      "epoch": 0.000291259765625,
      "grad_norm": 0.11433710902929306,
      "learning_rate": 1.1021592489824967e-05,
      "loss": 0.0347,
      "step": 47720
    },
    {
      "epoch": 0.000291259765625,
      "model_forward_time": 0.11497378349304199,
      "step": 47720
    },
    {
      "epoch": 0.000291259765625,
      "step": 47720,
      "training_step_time": 0.39966440200805664
    },
    {
      "epoch": 0.000291265869140625,
      "model_forward_time": 0.11542439460754395,
      "step": 47721
    },
    {
      "epoch": 0.000291265869140625,
      "step": 47721,
      "training_step_time": 0.5456752777099609
    },
    {
      "epoch": 0.00029127197265625,
      "model_forward_time": 0.11545395851135254,
      "step": 47722
    },
    {
      "epoch": 0.00029127197265625,
      "step": 47722,
      "training_step_time": 0.4712347984313965
    },
    {
      "epoch": 0.000291278076171875,
      "model_forward_time": 0.11481642723083496,
      "step": 47723
    },
    {
      "epoch": 0.000291278076171875,
      "step": 47723,
      "training_step_time": 0.4613311290740967
    },
    {
      "epoch": 0.0002912841796875,
      "model_forward_time": 0.11495590209960938,
      "step": 47724
    },
    {
      "epoch": 0.0002912841796875,
      "step": 47724,
      "training_step_time": 0.3931539058685303
    },
    {
      "epoch": 0.000291290283203125,
      "model_forward_time": 0.11494565010070801,
      "step": 47725
    },
    {
      "epoch": 0.000291290283203125,
      "step": 47725,
      "training_step_time": 0.39807605743408203
    },
    {
      "epoch": 0.00029129638671875,
      "model_forward_time": 0.11481213569641113,
      "step": 47726
    },
    {
      "epoch": 0.00029129638671875,
      "step": 47726,
      "training_step_time": 0.3868114948272705
    },
    {
      "epoch": 0.000291302490234375,
      "model_forward_time": 0.11478400230407715,
      "step": 47727
    },
    {
      "epoch": 0.000291302490234375,
      "step": 47727,
      "training_step_time": 0.541412353515625
    },
    {
      "epoch": 0.00029130859375,
      "model_forward_time": 0.11515951156616211,
      "step": 47728
    },
    {
      "epoch": 0.00029130859375,
      "step": 47728,
      "training_step_time": 0.3986396789550781
    },
    {
      "epoch": 0.000291314697265625,
      "model_forward_time": 0.11542105674743652,
      "step": 47729
    },
    {
      "epoch": 0.000291314697265625,
      "step": 47729,
      "training_step_time": 0.3989739418029785
    },
    {
      "epoch": 0.00029132080078125,
      "grad_norm": 0.08505991846323013,
      "learning_rate": 1.100433846208434e-05,
      "loss": 0.0326,
      "step": 47730
    },
    {
      "epoch": 0.00029132080078125,
      "model_forward_time": 0.11484122276306152,
      "step": 47730
    },
    {
      "epoch": 0.00029132080078125,
      "step": 47730,
      "training_step_time": 0.3968193531036377
    },
    {
      "epoch": 0.000291326904296875,
      "model_forward_time": 0.11543726921081543,
      "step": 47731
    },
    {
      "epoch": 0.000291326904296875,
      "step": 47731,
      "training_step_time": 0.3878517150878906
    },
    {
      "epoch": 0.0002913330078125,
      "model_forward_time": 0.11502790451049805,
      "step": 47732
    },
    {
      "epoch": 0.0002913330078125,
      "step": 47732,
      "training_step_time": 0.39376401901245117
    },
    {
      "epoch": 0.000291339111328125,
      "model_forward_time": 0.11498236656188965,
      "step": 47733
    },
    {
      "epoch": 0.000291339111328125,
      "step": 47733,
      "training_step_time": 0.5833680629730225
    },
    {
      "epoch": 0.00029134521484375,
      "model_forward_time": 0.11480975151062012,
      "step": 47734
    },
    {
      "epoch": 0.00029134521484375,
      "step": 47734,
      "training_step_time": 0.40538787841796875
    },
    {
      "epoch": 0.000291351318359375,
      "model_forward_time": 0.11466145515441895,
      "step": 47735
    },
    {
      "epoch": 0.000291351318359375,
      "step": 47735,
      "training_step_time": 0.444044828414917
    },
    {
      "epoch": 0.000291357421875,
      "model_forward_time": 0.11533045768737793,
      "step": 47736
    },
    {
      "epoch": 0.000291357421875,
      "step": 47736,
      "training_step_time": 0.4416956901550293
    },
    {
      "epoch": 0.000291363525390625,
      "model_forward_time": 0.11483931541442871,
      "step": 47737
    },
    {
      "epoch": 0.000291363525390625,
      "step": 47737,
      "training_step_time": 0.44561767578125
    },
    {
      "epoch": 0.00029136962890625,
      "model_forward_time": 0.11492609977722168,
      "step": 47738
    },
    {
      "epoch": 0.00029136962890625,
      "step": 47738,
      "training_step_time": 0.40260791778564453
    },
    {
      "epoch": 0.000291375732421875,
      "model_forward_time": 0.11483168601989746,
      "step": 47739
    },
    {
      "epoch": 0.000291375732421875,
      "step": 47739,
      "training_step_time": 0.4176299571990967
    },
    {
      "epoch": 0.0002913818359375,
      "grad_norm": 0.10082937031984329,
      "learning_rate": 1.0987096280196086e-05,
      "loss": 0.0365,
      "step": 47740
    },
    {
      "epoch": 0.0002913818359375,
      "model_forward_time": 0.11638355255126953,
      "step": 47740
    },
    {
      "epoch": 0.0002913818359375,
      "step": 47740,
      "training_step_time": 0.38254308700561523
    },
    {
      "epoch": 0.000291387939453125,
      "model_forward_time": 0.11598563194274902,
      "step": 47741
    },
    {
      "epoch": 0.000291387939453125,
      "step": 47741,
      "training_step_time": 0.3945469856262207
    },
    {
      "epoch": 0.00029139404296875,
      "model_forward_time": 0.11558699607849121,
      "step": 47742
    },
    {
      "epoch": 0.00029139404296875,
      "step": 47742,
      "training_step_time": 0.3957357406616211
    },
    {
      "epoch": 0.000291400146484375,
      "model_forward_time": 0.11584830284118652,
      "step": 47743
    },
    {
      "epoch": 0.000291400146484375,
      "step": 47743,
      "training_step_time": 0.3931102752685547
    },
    {
      "epoch": 0.00029140625,
      "model_forward_time": 0.11540627479553223,
      "step": 47744
    },
    {
      "epoch": 0.00029140625,
      "step": 47744,
      "training_step_time": 0.38390088081359863
    },
    {
      "epoch": 0.000291412353515625,
      "model_forward_time": 0.11566424369812012,
      "step": 47745
    },
    {
      "epoch": 0.000291412353515625,
      "step": 47745,
      "training_step_time": 0.6142199039459229
    },
    {
      "epoch": 0.00029141845703125,
      "model_forward_time": 0.11503911018371582,
      "step": 47746
    },
    {
      "epoch": 0.00029141845703125,
      "step": 47746,
      "training_step_time": 0.3815934658050537
    },
    {
      "epoch": 0.000291424560546875,
      "model_forward_time": 0.11539530754089355,
      "step": 47747
    },
    {
      "epoch": 0.000291424560546875,
      "step": 47747,
      "training_step_time": 0.386202335357666
    },
    {
      "epoch": 0.0002914306640625,
      "model_forward_time": 0.11493945121765137,
      "step": 47748
    },
    {
      "epoch": 0.0002914306640625,
      "step": 47748,
      "training_step_time": 0.400158166885376
    },
    {
      "epoch": 0.000291436767578125,
      "model_forward_time": 0.11514711380004883,
      "step": 47749
    },
    {
      "epoch": 0.000291436767578125,
      "step": 47749,
      "training_step_time": 0.4869556427001953
    },
    {
      "epoch": 0.00029144287109375,
      "grad_norm": 0.10563930869102478,
      "learning_rate": 1.0969865949397901e-05,
      "loss": 0.0313,
      "step": 47750
    },
    {
      "epoch": 0.00029144287109375,
      "model_forward_time": 0.11449790000915527,
      "step": 47750
    },
    {
      "epoch": 0.00029144287109375,
      "step": 47750,
      "training_step_time": 0.3632364273071289
    },
    {
      "epoch": 0.000291448974609375,
      "model_forward_time": 0.11507678031921387,
      "step": 47751
    },
    {
      "epoch": 0.000291448974609375,
      "step": 47751,
      "training_step_time": 0.4409923553466797
    },
    {
      "epoch": 0.000291455078125,
      "model_forward_time": 0.11530017852783203,
      "step": 47752
    },
    {
      "epoch": 0.000291455078125,
      "step": 47752,
      "training_step_time": 0.49127650260925293
    },
    {
      "epoch": 0.000291461181640625,
      "model_forward_time": 0.11461448669433594,
      "step": 47753
    },
    {
      "epoch": 0.000291461181640625,
      "step": 47753,
      "training_step_time": 0.38855862617492676
    },
    {
      "epoch": 0.00029146728515625,
      "model_forward_time": 0.11543774604797363,
      "step": 47754
    },
    {
      "epoch": 0.00029146728515625,
      "step": 47754,
      "training_step_time": 0.38659214973449707
    },
    {
      "epoch": 0.000291473388671875,
      "model_forward_time": 0.11450433731079102,
      "step": 47755
    },
    {
      "epoch": 0.000291473388671875,
      "step": 47755,
      "training_step_time": 0.39218950271606445
    },
    {
      "epoch": 0.0002914794921875,
      "model_forward_time": 0.11435246467590332,
      "step": 47756
    },
    {
      "epoch": 0.0002914794921875,
      "step": 47756,
      "training_step_time": 0.3964877128601074
    },
    {
      "epoch": 0.000291485595703125,
      "model_forward_time": 0.11483907699584961,
      "step": 47757
    },
    {
      "epoch": 0.000291485595703125,
      "step": 47757,
      "training_step_time": 0.5016233921051025
    },
    {
      "epoch": 0.00029149169921875,
      "model_forward_time": 0.11464595794677734,
      "step": 47758
    },
    {
      "epoch": 0.00029149169921875,
      "step": 47758,
      "training_step_time": 0.41797637939453125
    },
    {
      "epoch": 0.000291497802734375,
      "model_forward_time": 0.11520075798034668,
      "step": 47759
    },
    {
      "epoch": 0.000291497802734375,
      "step": 47759,
      "training_step_time": 0.4025275707244873
    },
    {
      "epoch": 0.00029150390625,
      "grad_norm": 0.10873989760875702,
      "learning_rate": 1.095264747492391e-05,
      "loss": 0.0329,
      "step": 47760
    },
    {
      "epoch": 0.00029150390625,
      "model_forward_time": 0.11500263214111328,
      "step": 47760
    },
    {
      "epoch": 0.00029150390625,
      "step": 47760,
      "training_step_time": 0.40032529830932617
    },
    {
      "epoch": 0.000291510009765625,
      "model_forward_time": 0.11513137817382812,
      "step": 47761
    },
    {
      "epoch": 0.000291510009765625,
      "step": 47761,
      "training_step_time": 0.395143985748291
    },
    {
      "epoch": 0.00029151611328125,
      "model_forward_time": 0.11494755744934082,
      "step": 47762
    },
    {
      "epoch": 0.00029151611328125,
      "step": 47762,
      "training_step_time": 0.4450528621673584
    },
    {
      "epoch": 0.000291522216796875,
      "model_forward_time": 0.11589550971984863,
      "step": 47763
    },
    {
      "epoch": 0.000291522216796875,
      "step": 47763,
      "training_step_time": 0.6276376247406006
    },
    {
      "epoch": 0.0002915283203125,
      "model_forward_time": 0.114715576171875,
      "step": 47764
    },
    {
      "epoch": 0.0002915283203125,
      "step": 47764,
      "training_step_time": 0.4175577163696289
    },
    {
      "epoch": 0.000291534423828125,
      "model_forward_time": 0.11442708969116211,
      "step": 47765
    },
    {
      "epoch": 0.000291534423828125,
      "step": 47765,
      "training_step_time": 0.4396350383758545
    },
    {
      "epoch": 0.00029154052734375,
      "model_forward_time": 0.1147925853729248,
      "step": 47766
    },
    {
      "epoch": 0.00029154052734375,
      "step": 47766,
      "training_step_time": 0.41782283782958984
    },
    {
      "epoch": 0.000291546630859375,
      "model_forward_time": 0.1147153377532959,
      "step": 47767
    },
    {
      "epoch": 0.000291546630859375,
      "step": 47767,
      "training_step_time": 0.43660521507263184
    },
    {
      "epoch": 0.000291552734375,
      "model_forward_time": 0.1147146224975586,
      "step": 47768
    },
    {
      "epoch": 0.000291552734375,
      "step": 47768,
      "training_step_time": 0.3837883472442627
    },
    {
      "epoch": 0.000291558837890625,
      "model_forward_time": 0.11483025550842285,
      "step": 47769
    },
    {
      "epoch": 0.000291558837890625,
      "step": 47769,
      "training_step_time": 0.5266125202178955
    },
    {
      "epoch": 0.00029156494140625,
      "grad_norm": 0.10674098134040833,
      "learning_rate": 1.093544086200463e-05,
      "loss": 0.0332,
      "step": 47770
    },
    {
      "epoch": 0.00029156494140625,
      "model_forward_time": 0.11481809616088867,
      "step": 47770
    },
    {
      "epoch": 0.00029156494140625,
      "step": 47770,
      "training_step_time": 0.3967247009277344
    },
    {
      "epoch": 0.000291571044921875,
      "model_forward_time": 0.11514782905578613,
      "step": 47771
    },
    {
      "epoch": 0.000291571044921875,
      "step": 47771,
      "training_step_time": 0.3978917598724365
    },
    {
      "epoch": 0.0002915771484375,
      "model_forward_time": 0.11512112617492676,
      "step": 47772
    },
    {
      "epoch": 0.0002915771484375,
      "step": 47772,
      "training_step_time": 0.3976316452026367
    },
    {
      "epoch": 0.000291583251953125,
      "model_forward_time": 0.11479520797729492,
      "step": 47773
    },
    {
      "epoch": 0.000291583251953125,
      "step": 47773,
      "training_step_time": 0.3998904228210449
    },
    {
      "epoch": 0.00029158935546875,
      "model_forward_time": 0.1146705150604248,
      "step": 47774
    },
    {
      "epoch": 0.00029158935546875,
      "step": 47774,
      "training_step_time": 0.38709402084350586
    },
    {
      "epoch": 0.000291595458984375,
      "model_forward_time": 0.11514759063720703,
      "step": 47775
    },
    {
      "epoch": 0.000291595458984375,
      "step": 47775,
      "training_step_time": 0.6799821853637695
    },
    {
      "epoch": 0.0002916015625,
      "model_forward_time": 0.11575770378112793,
      "step": 47776
    },
    {
      "epoch": 0.0002916015625,
      "step": 47776,
      "training_step_time": 0.44219136238098145
    },
    {
      "epoch": 0.000291607666015625,
      "model_forward_time": 0.11421728134155273,
      "step": 47777
    },
    {
      "epoch": 0.000291607666015625,
      "step": 47777,
      "training_step_time": 0.4331059455871582
    },
    {
      "epoch": 0.00029161376953125,
      "model_forward_time": 0.11473822593688965,
      "step": 47778
    },
    {
      "epoch": 0.00029161376953125,
      "step": 47778,
      "training_step_time": 0.39560723304748535
    },
    {
      "epoch": 0.000291619873046875,
      "model_forward_time": 0.11506223678588867,
      "step": 47779
    },
    {
      "epoch": 0.000291619873046875,
      "step": 47779,
      "training_step_time": 0.45219850540161133
    },
    {
      "epoch": 0.0002916259765625,
      "grad_norm": 0.084809310734272,
      "learning_rate": 1.0918246115866964e-05,
      "loss": 0.0332,
      "step": 47780
    },
    {
      "epoch": 0.0002916259765625,
      "model_forward_time": 0.11497950553894043,
      "step": 47780
    },
    {
      "epoch": 0.0002916259765625,
      "step": 47780,
      "training_step_time": 0.473895788192749
    },
    {
      "epoch": 0.000291632080078125,
      "model_forward_time": 0.11504864692687988,
      "step": 47781
    },
    {
      "epoch": 0.000291632080078125,
      "step": 47781,
      "training_step_time": 0.47641658782958984
    },
    {
      "epoch": 0.00029163818359375,
      "model_forward_time": 0.11505603790283203,
      "step": 47782
    },
    {
      "epoch": 0.00029163818359375,
      "step": 47782,
      "training_step_time": 0.38816404342651367
    },
    {
      "epoch": 0.000291644287109375,
      "model_forward_time": 0.11460232734680176,
      "step": 47783
    },
    {
      "epoch": 0.000291644287109375,
      "step": 47783,
      "training_step_time": 0.3981482982635498
    },
    {
      "epoch": 0.000291650390625,
      "model_forward_time": 0.11517453193664551,
      "step": 47784
    },
    {
      "epoch": 0.000291650390625,
      "step": 47784,
      "training_step_time": 0.4094820022583008
    },
    {
      "epoch": 0.000291656494140625,
      "model_forward_time": 0.11549115180969238,
      "step": 47785
    },
    {
      "epoch": 0.000291656494140625,
      "step": 47785,
      "training_step_time": 0.3929157257080078
    },
    {
      "epoch": 0.00029166259765625,
      "model_forward_time": 0.11521697044372559,
      "step": 47786
    },
    {
      "epoch": 0.00029166259765625,
      "step": 47786,
      "training_step_time": 0.39151740074157715
    },
    {
      "epoch": 0.000291668701171875,
      "model_forward_time": 0.11545753479003906,
      "step": 47787
    },
    {
      "epoch": 0.000291668701171875,
      "step": 47787,
      "training_step_time": 0.7158620357513428
    },
    {
      "epoch": 0.0002916748046875,
      "model_forward_time": 0.11520910263061523,
      "step": 47788
    },
    {
      "epoch": 0.0002916748046875,
      "step": 47788,
      "training_step_time": 0.38420939445495605
    },
    {
      "epoch": 0.000291680908203125,
      "model_forward_time": 0.11436629295349121,
      "step": 47789
    },
    {
      "epoch": 0.000291680908203125,
      "step": 47789,
      "training_step_time": 0.5115330219268799
    },
    {
      "epoch": 0.00029168701171875,
      "grad_norm": 0.13112607598304749,
      "learning_rate": 1.090106324173426e-05,
      "loss": 0.0403,
      "step": 47790
    },
    {
      "epoch": 0.00029168701171875,
      "model_forward_time": 0.11479926109313965,
      "step": 47790
    },
    {
      "epoch": 0.00029168701171875,
      "step": 47790,
      "training_step_time": 0.4586625099182129
    },
    {
      "epoch": 0.000291693115234375,
      "model_forward_time": 0.11407232284545898,
      "step": 47791
    },
    {
      "epoch": 0.000291693115234375,
      "step": 47791,
      "training_step_time": 0.41379523277282715
    },
    {
      "epoch": 0.00029169921875,
      "model_forward_time": 0.11482906341552734,
      "step": 47792
    },
    {
      "epoch": 0.00029169921875,
      "step": 47792,
      "training_step_time": 0.42011070251464844
    },
    {
      "epoch": 0.000291705322265625,
      "model_forward_time": 0.11510992050170898,
      "step": 47793
    },
    {
      "epoch": 0.000291705322265625,
      "step": 47793,
      "training_step_time": 0.4378018379211426
    },
    {
      "epoch": 0.00029171142578125,
      "model_forward_time": 0.11492633819580078,
      "step": 47794
    },
    {
      "epoch": 0.00029171142578125,
      "step": 47794,
      "training_step_time": 0.45050621032714844
    },
    {
      "epoch": 0.000291717529296875,
      "model_forward_time": 0.11465167999267578,
      "step": 47795
    },
    {
      "epoch": 0.000291717529296875,
      "step": 47795,
      "training_step_time": 0.4308156967163086
    },
    {
      "epoch": 0.0002917236328125,
      "model_forward_time": 0.11504316329956055,
      "step": 47796
    },
    {
      "epoch": 0.0002917236328125,
      "step": 47796,
      "training_step_time": 0.39081668853759766
    },
    {
      "epoch": 0.000291729736328125,
      "model_forward_time": 0.11534643173217773,
      "step": 47797
    },
    {
      "epoch": 0.000291729736328125,
      "step": 47797,
      "training_step_time": 0.39551615715026855
    },
    {
      "epoch": 0.00029173583984375,
      "model_forward_time": 0.11488127708435059,
      "step": 47798
    },
    {
      "epoch": 0.00029173583984375,
      "step": 47798,
      "training_step_time": 0.40405750274658203
    },
    {
      "epoch": 0.000291741943359375,
      "model_forward_time": 0.11466407775878906,
      "step": 47799
    },
    {
      "epoch": 0.000291741943359375,
      "step": 47799,
      "training_step_time": 0.7209014892578125
    },
    {
      "epoch": 0.000291748046875,
      "grad_norm": 0.12557433545589447,
      "learning_rate": 1.0883892244826172e-05,
      "loss": 0.0368,
      "step": 47800
    },
    {
      "epoch": 0.000291748046875,
      "model_forward_time": 0.11596226692199707,
      "step": 47800
    },
    {
      "epoch": 0.000291748046875,
      "step": 47800,
      "training_step_time": 0.40769124031066895
    },
    {
      "epoch": 0.000291754150390625,
      "model_forward_time": 0.11853551864624023,
      "step": 47801
    },
    {
      "epoch": 0.000291754150390625,
      "step": 47801,
      "training_step_time": 0.4886898994445801
    },
    {
      "epoch": 0.00029176025390625,
      "model_forward_time": 0.11945009231567383,
      "step": 47802
    },
    {
      "epoch": 0.00029176025390625,
      "step": 47802,
      "training_step_time": 0.6286447048187256
    },
    {
      "epoch": 0.000291766357421875,
      "model_forward_time": 0.12291693687438965,
      "step": 47803
    },
    {
      "epoch": 0.000291766357421875,
      "step": 47803,
      "training_step_time": 0.636603832244873
    },
    {
      "epoch": 0.0002917724609375,
      "model_forward_time": 0.13943219184875488,
      "step": 47804
    },
    {
      "epoch": 0.0002917724609375,
      "step": 47804,
      "training_step_time": 0.719599723815918
    },
    {
      "epoch": 0.000291778564453125,
      "model_forward_time": 0.12300968170166016,
      "step": 47805
    },
    {
      "epoch": 0.000291778564453125,
      "step": 47805,
      "training_step_time": 0.733104944229126
    },
    {
      "epoch": 0.00029178466796875,
      "model_forward_time": 0.12278532981872559,
      "step": 47806
    },
    {
      "epoch": 0.00029178466796875,
      "step": 47806,
      "training_step_time": 0.653465747833252
    },
    {
      "epoch": 0.000291790771484375,
      "model_forward_time": 0.12317728996276855,
      "step": 47807
    },
    {
      "epoch": 0.000291790771484375,
      "step": 47807,
      "training_step_time": 0.6765265464782715
    },
    {
      "epoch": 0.000291796875,
      "model_forward_time": 0.11869502067565918,
      "step": 47808
    },
    {
      "epoch": 0.000291796875,
      "step": 47808,
      "training_step_time": 0.6161801815032959
    },
    {
      "epoch": 0.000291802978515625,
      "model_forward_time": 0.1188969612121582,
      "step": 47809
    },
    {
      "epoch": 0.000291802978515625,
      "step": 47809,
      "training_step_time": 0.6799850463867188
    },
    {
      "epoch": 0.00029180908203125,
      "grad_norm": 0.09874983131885529,
      "learning_rate": 1.086673313035883e-05,
      "loss": 0.0345,
      "step": 47810
    },
    {
      "epoch": 0.00029180908203125,
      "model_forward_time": 0.1239473819732666,
      "step": 47810
    },
    {
      "epoch": 0.00029180908203125,
      "step": 47810,
      "training_step_time": 0.6959562301635742
    },
    {
      "epoch": 0.000291815185546875,
      "model_forward_time": 0.12420344352722168,
      "step": 47811
    },
    {
      "epoch": 0.000291815185546875,
      "step": 47811,
      "training_step_time": 0.7646183967590332
    },
    {
      "epoch": 0.0002918212890625,
      "model_forward_time": 0.13749146461486816,
      "step": 47812
    },
    {
      "epoch": 0.0002918212890625,
      "step": 47812,
      "training_step_time": 0.7058730125427246
    },
    {
      "epoch": 0.000291827392578125,
      "model_forward_time": 0.11925816535949707,
      "step": 47813
    },
    {
      "epoch": 0.000291827392578125,
      "step": 47813,
      "training_step_time": 0.8706450462341309
    },
    {
      "epoch": 0.00029183349609375,
      "model_forward_time": 0.11751484870910645,
      "step": 47814
    },
    {
      "epoch": 0.00029183349609375,
      "step": 47814,
      "training_step_time": 0.6833422183990479
    },
    {
      "epoch": 0.000291839599609375,
      "model_forward_time": 0.12137079238891602,
      "step": 47815
    },
    {
      "epoch": 0.000291839599609375,
      "step": 47815,
      "training_step_time": 0.7098677158355713
    },
    {
      "epoch": 0.000291845703125,
      "model_forward_time": 0.1224973201751709,
      "step": 47816
    },
    {
      "epoch": 0.000291845703125,
      "step": 47816,
      "training_step_time": 0.6247942447662354
    },
    {
      "epoch": 0.000291851806640625,
      "model_forward_time": 0.1261425018310547,
      "step": 47817
    },
    {
      "epoch": 0.000291851806640625,
      "step": 47817,
      "training_step_time": 0.6878552436828613
    },
    {
      "epoch": 0.00029185791015625,
      "model_forward_time": 0.12172746658325195,
      "step": 47818
    },
    {
      "epoch": 0.00029185791015625,
      "step": 47818,
      "training_step_time": 0.6764061450958252
    },
    {
      "epoch": 0.000291864013671875,
      "model_forward_time": 0.11896181106567383,
      "step": 47819
    },
    {
      "epoch": 0.000291864013671875,
      "step": 47819,
      "training_step_time": 0.6631782054901123
    },
    {
      "epoch": 0.0002918701171875,
      "grad_norm": 0.10917755961418152,
      "learning_rate": 1.0849585903544706e-05,
      "loss": 0.0388,
      "step": 47820
    },
    {
      "epoch": 0.0002918701171875,
      "model_forward_time": 0.1184854507446289,
      "step": 47820
    },
    {
      "epoch": 0.0002918701171875,
      "step": 47820,
      "training_step_time": 0.6522631645202637
    },
    {
      "epoch": 0.000291876220703125,
      "model_forward_time": 0.11584138870239258,
      "step": 47821
    },
    {
      "epoch": 0.000291876220703125,
      "step": 47821,
      "training_step_time": 0.6893444061279297
    },
    {
      "epoch": 0.00029188232421875,
      "model_forward_time": 0.12312936782836914,
      "step": 47822
    },
    {
      "epoch": 0.00029188232421875,
      "step": 47822,
      "training_step_time": 0.7507379055023193
    },
    {
      "epoch": 0.000291888427734375,
      "model_forward_time": 0.12158489227294922,
      "step": 47823
    },
    {
      "epoch": 0.000291888427734375,
      "step": 47823,
      "training_step_time": 0.6417942047119141
    },
    {
      "epoch": 0.00029189453125,
      "model_forward_time": 0.11866164207458496,
      "step": 47824
    },
    {
      "epoch": 0.00029189453125,
      "step": 47824,
      "training_step_time": 0.705920934677124
    },
    {
      "epoch": 0.000291900634765625,
      "model_forward_time": 0.12152218818664551,
      "step": 47825
    },
    {
      "epoch": 0.000291900634765625,
      "step": 47825,
      "training_step_time": 0.6619963645935059
    },
    {
      "epoch": 0.00029190673828125,
      "model_forward_time": 0.11810445785522461,
      "step": 47826
    },
    {
      "epoch": 0.00029190673828125,
      "step": 47826,
      "training_step_time": 0.6588232517242432
    },
    {
      "epoch": 0.000291912841796875,
      "model_forward_time": 0.11893391609191895,
      "step": 47827
    },
    {
      "epoch": 0.000291912841796875,
      "step": 47827,
      "training_step_time": 0.689624547958374
    },
    {
      "epoch": 0.0002919189453125,
      "model_forward_time": 0.11664891242980957,
      "step": 47828
    },
    {
      "epoch": 0.0002919189453125,
      "step": 47828,
      "training_step_time": 0.6687450408935547
    },
    {
      "epoch": 0.000291925048828125,
      "model_forward_time": 0.12071776390075684,
      "step": 47829
    },
    {
      "epoch": 0.000291925048828125,
      "step": 47829,
      "training_step_time": 0.6683096885681152
    },
    {
      "epoch": 0.00029193115234375,
      "grad_norm": 0.10600335896015167,
      "learning_rate": 1.0832450569592684e-05,
      "loss": 0.0358,
      "step": 47830
    },
    {
      "epoch": 0.00029193115234375,
      "model_forward_time": 0.11825156211853027,
      "step": 47830
    },
    {
      "epoch": 0.00029193115234375,
      "step": 47830,
      "training_step_time": 0.667985200881958
    },
    {
      "epoch": 0.000291937255859375,
      "model_forward_time": 0.1200869083404541,
      "step": 47831
    },
    {
      "epoch": 0.000291937255859375,
      "step": 47831,
      "training_step_time": 0.6740624904632568
    },
    {
      "epoch": 0.000291943359375,
      "model_forward_time": 0.11596345901489258,
      "step": 47832
    },
    {
      "epoch": 0.000291943359375,
      "step": 47832,
      "training_step_time": 0.7217071056365967
    },
    {
      "epoch": 0.000291949462890625,
      "model_forward_time": 0.12451386451721191,
      "step": 47833
    },
    {
      "epoch": 0.000291949462890625,
      "step": 47833,
      "training_step_time": 0.6442632675170898
    },
    {
      "epoch": 0.00029195556640625,
      "model_forward_time": 0.11884093284606934,
      "step": 47834
    },
    {
      "epoch": 0.00029195556640625,
      "step": 47834,
      "training_step_time": 0.7454817295074463
    },
    {
      "epoch": 0.000291961669921875,
      "model_forward_time": 0.12309503555297852,
      "step": 47835
    },
    {
      "epoch": 0.000291961669921875,
      "step": 47835,
      "training_step_time": 0.6619546413421631
    },
    {
      "epoch": 0.0002919677734375,
      "model_forward_time": 0.11973166465759277,
      "step": 47836
    },
    {
      "epoch": 0.0002919677734375,
      "step": 47836,
      "training_step_time": 0.6491489410400391
    },
    {
      "epoch": 0.000291973876953125,
      "model_forward_time": 0.11767816543579102,
      "step": 47837
    },
    {
      "epoch": 0.000291973876953125,
      "step": 47837,
      "training_step_time": 0.6995048522949219
    },
    {
      "epoch": 0.00029197998046875,
      "model_forward_time": 0.11842918395996094,
      "step": 47838
    },
    {
      "epoch": 0.00029197998046875,
      "step": 47838,
      "training_step_time": 0.651517391204834
    },
    {
      "epoch": 0.000291986083984375,
      "model_forward_time": 0.12087655067443848,
      "step": 47839
    },
    {
      "epoch": 0.000291986083984375,
      "step": 47839,
      "training_step_time": 0.7237191200256348
    },
    {
      "epoch": 0.0002919921875,
      "grad_norm": 0.09603210538625717,
      "learning_rate": 1.0815327133708015e-05,
      "loss": 0.0381,
      "step": 47840
    },
    {
      "epoch": 0.0002919921875,
      "model_forward_time": 0.1187129020690918,
      "step": 47840
    },
    {
      "epoch": 0.0002919921875,
      "step": 47840,
      "training_step_time": 0.7164819240570068
    },
    {
      "epoch": 0.000291998291015625,
      "model_forward_time": 0.11663007736206055,
      "step": 47841
    },
    {
      "epoch": 0.000291998291015625,
      "step": 47841,
      "training_step_time": 0.6605515480041504
    },
    {
      "epoch": 0.00029200439453125,
      "model_forward_time": 0.12627053260803223,
      "step": 47842
    },
    {
      "epoch": 0.00029200439453125,
      "step": 47842,
      "training_step_time": 0.708763599395752
    },
    {
      "epoch": 0.000292010498046875,
      "model_forward_time": 0.12329983711242676,
      "step": 47843
    },
    {
      "epoch": 0.000292010498046875,
      "step": 47843,
      "training_step_time": 0.6984279155731201
    },
    {
      "epoch": 0.0002920166015625,
      "model_forward_time": 0.1198883056640625,
      "step": 47844
    },
    {
      "epoch": 0.0002920166015625,
      "step": 47844,
      "training_step_time": 0.6646647453308105
    },
    {
      "epoch": 0.000292022705078125,
      "model_forward_time": 0.11776399612426758,
      "step": 47845
    },
    {
      "epoch": 0.000292022705078125,
      "step": 47845,
      "training_step_time": 0.658329963684082
    },
    {
      "epoch": 0.00029202880859375,
      "model_forward_time": 0.12084364891052246,
      "step": 47846
    },
    {
      "epoch": 0.00029202880859375,
      "step": 47846,
      "training_step_time": 0.7316141128540039
    },
    {
      "epoch": 0.000292034912109375,
      "model_forward_time": 0.12031817436218262,
      "step": 47847
    },
    {
      "epoch": 0.000292034912109375,
      "step": 47847,
      "training_step_time": 0.6751394271850586
    },
    {
      "epoch": 0.000292041015625,
      "model_forward_time": 0.11879920959472656,
      "step": 47848
    },
    {
      "epoch": 0.000292041015625,
      "step": 47848,
      "training_step_time": 0.68280029296875
    },
    {
      "epoch": 0.000292047119140625,
      "model_forward_time": 0.12012553215026855,
      "step": 47849
    },
    {
      "epoch": 0.000292047119140625,
      "step": 47849,
      "training_step_time": 0.6624007225036621
    },
    {
      "epoch": 0.00029205322265625,
      "grad_norm": 0.09507432579994202,
      "learning_rate": 1.0798215601092354e-05,
      "loss": 0.0382,
      "step": 47850
    },
    {
      "epoch": 0.00029205322265625,
      "model_forward_time": 0.12133312225341797,
      "step": 47850
    },
    {
      "epoch": 0.00029205322265625,
      "step": 47850,
      "training_step_time": 0.6397957801818848
    },
    {
      "epoch": 0.000292059326171875,
      "model_forward_time": 0.11720991134643555,
      "step": 47851
    },
    {
      "epoch": 0.000292059326171875,
      "step": 47851,
      "training_step_time": 0.7200560569763184
    },
    {
      "epoch": 0.0002920654296875,
      "model_forward_time": 0.11870408058166504,
      "step": 47852
    },
    {
      "epoch": 0.0002920654296875,
      "step": 47852,
      "training_step_time": 0.6712822914123535
    },
    {
      "epoch": 0.000292071533203125,
      "model_forward_time": 0.12352848052978516,
      "step": 47853
    },
    {
      "epoch": 0.000292071533203125,
      "step": 47853,
      "training_step_time": 0.6603636741638184
    },
    {
      "epoch": 0.00029207763671875,
      "model_forward_time": 0.11506772041320801,
      "step": 47854
    },
    {
      "epoch": 0.00029207763671875,
      "step": 47854,
      "training_step_time": 0.6391134262084961
    },
    {
      "epoch": 0.000292083740234375,
      "model_forward_time": 0.1175541877746582,
      "step": 47855
    },
    {
      "epoch": 0.000292083740234375,
      "step": 47855,
      "training_step_time": 0.6202981472015381
    },
    {
      "epoch": 0.00029208984375,
      "model_forward_time": 0.13794708251953125,
      "step": 47856
    },
    {
      "epoch": 0.00029208984375,
      "step": 47856,
      "training_step_time": 0.6734716892242432
    },
    {
      "epoch": 0.000292095947265625,
      "model_forward_time": 0.12439441680908203,
      "step": 47857
    },
    {
      "epoch": 0.000292095947265625,
      "step": 47857,
      "training_step_time": 0.6941280364990234
    },
    {
      "epoch": 0.00029210205078125,
      "model_forward_time": 0.11765670776367188,
      "step": 47858
    },
    {
      "epoch": 0.00029210205078125,
      "step": 47858,
      "training_step_time": 0.748772382736206
    },
    {
      "epoch": 0.000292108154296875,
      "model_forward_time": 0.13425636291503906,
      "step": 47859
    },
    {
      "epoch": 0.000292108154296875,
      "step": 47859,
      "training_step_time": 0.6524991989135742
    },
    {
      "epoch": 0.0002921142578125,
      "grad_norm": 0.10500288009643555,
      "learning_rate": 1.0781115976943717e-05,
      "loss": 0.041,
      "step": 47860
    },
    {
      "epoch": 0.0002921142578125,
      "model_forward_time": 0.11919498443603516,
      "step": 47860
    },
    {
      "epoch": 0.0002921142578125,
      "step": 47860,
      "training_step_time": 0.7012546062469482
    },
    {
      "epoch": 0.000292120361328125,
      "model_forward_time": 0.11744523048400879,
      "step": 47861
    },
    {
      "epoch": 0.000292120361328125,
      "step": 47861,
      "training_step_time": 0.7002832889556885
    },
    {
      "epoch": 0.00029212646484375,
      "model_forward_time": 0.122039794921875,
      "step": 47862
    },
    {
      "epoch": 0.00029212646484375,
      "step": 47862,
      "training_step_time": 0.8006312847137451
    },
    {
      "epoch": 0.000292132568359375,
      "model_forward_time": 0.1166534423828125,
      "step": 47863
    },
    {
      "epoch": 0.000292132568359375,
      "step": 47863,
      "training_step_time": 0.5825281143188477
    },
    {
      "epoch": 0.000292138671875,
      "model_forward_time": 0.11703228950500488,
      "step": 47864
    },
    {
      "epoch": 0.000292138671875,
      "step": 47864,
      "training_step_time": 0.623075008392334
    },
    {
      "epoch": 0.000292144775390625,
      "model_forward_time": 0.11889934539794922,
      "step": 47865
    },
    {
      "epoch": 0.000292144775390625,
      "step": 47865,
      "training_step_time": 0.6225042343139648
    },
    {
      "epoch": 0.00029215087890625,
      "model_forward_time": 0.11814260482788086,
      "step": 47866
    },
    {
      "epoch": 0.00029215087890625,
      "step": 47866,
      "training_step_time": 0.5753183364868164
    },
    {
      "epoch": 0.000292156982421875,
      "model_forward_time": 0.1192476749420166,
      "step": 47867
    },
    {
      "epoch": 0.000292156982421875,
      "step": 47867,
      "training_step_time": 0.5327053070068359
    },
    {
      "epoch": 0.0002921630859375,
      "model_forward_time": 0.1345076560974121,
      "step": 47868
    },
    {
      "epoch": 0.0002921630859375,
      "step": 47868,
      "training_step_time": 0.5779144763946533
    },
    {
      "epoch": 0.000292169189453125,
      "model_forward_time": 0.11787247657775879,
      "step": 47869
    },
    {
      "epoch": 0.000292169189453125,
      "step": 47869,
      "training_step_time": 0.6059682369232178
    },
    {
      "epoch": 0.00029217529296875,
      "grad_norm": 0.08403603732585907,
      "learning_rate": 1.0764028266456555e-05,
      "loss": 0.0382,
      "step": 47870
    },
    {
      "epoch": 0.00029217529296875,
      "model_forward_time": 0.1241903305053711,
      "step": 47870
    },
    {
      "epoch": 0.00029217529296875,
      "step": 47870,
      "training_step_time": 0.5065591335296631
    },
    {
      "epoch": 0.000292181396484375,
      "model_forward_time": 0.11879491806030273,
      "step": 47871
    },
    {
      "epoch": 0.000292181396484375,
      "step": 47871,
      "training_step_time": 0.45177578926086426
    },
    {
      "epoch": 0.0002921875,
      "model_forward_time": 0.11917757987976074,
      "step": 47872
    },
    {
      "epoch": 0.0002921875,
      "step": 47872,
      "training_step_time": 0.4998509883880615
    },
    {
      "epoch": 0.000292193603515625,
      "model_forward_time": 0.11757135391235352,
      "step": 47873
    },
    {
      "epoch": 0.000292193603515625,
      "step": 47873,
      "training_step_time": 0.6041374206542969
    },
    {
      "epoch": 0.00029219970703125,
      "model_forward_time": 0.11738252639770508,
      "step": 47874
    },
    {
      "epoch": 0.00029219970703125,
      "step": 47874,
      "training_step_time": 0.46479058265686035
    },
    {
      "epoch": 0.000292205810546875,
      "model_forward_time": 0.11622476577758789,
      "step": 47875
    },
    {
      "epoch": 0.000292205810546875,
      "step": 47875,
      "training_step_time": 0.43695902824401855
    },
    {
      "epoch": 0.0002922119140625,
      "model_forward_time": 0.1167142391204834,
      "step": 47876
    },
    {
      "epoch": 0.0002922119140625,
      "step": 47876,
      "training_step_time": 0.4290275573730469
    },
    {
      "epoch": 0.000292218017578125,
      "model_forward_time": 0.11651897430419922,
      "step": 47877
    },
    {
      "epoch": 0.000292218017578125,
      "step": 47877,
      "training_step_time": 0.44606494903564453
    },
    {
      "epoch": 0.00029222412109375,
      "model_forward_time": 0.11673307418823242,
      "step": 47878
    },
    {
      "epoch": 0.00029222412109375,
      "step": 47878,
      "training_step_time": 0.42284631729125977
    },
    {
      "epoch": 0.000292230224609375,
      "model_forward_time": 0.11606717109680176,
      "step": 47879
    },
    {
      "epoch": 0.000292230224609375,
      "step": 47879,
      "training_step_time": 0.4009699821472168
    },
    {
      "epoch": 0.000292236328125,
      "grad_norm": 0.1058242917060852,
      "learning_rate": 1.0746952474821614e-05,
      "loss": 0.0396,
      "step": 47880
    },
    {
      "epoch": 0.000292236328125,
      "model_forward_time": 0.11496233940124512,
      "step": 47880
    },
    {
      "epoch": 0.000292236328125,
      "step": 47880,
      "training_step_time": 0.3864123821258545
    },
    {
      "epoch": 0.000292242431640625,
      "model_forward_time": 0.11482095718383789,
      "step": 47881
    },
    {
      "epoch": 0.000292242431640625,
      "step": 47881,
      "training_step_time": 0.4588761329650879
    },
    {
      "epoch": 0.00029224853515625,
      "model_forward_time": 0.11453032493591309,
      "step": 47882
    },
    {
      "epoch": 0.00029224853515625,
      "step": 47882,
      "training_step_time": 0.4417705535888672
    },
    {
      "epoch": 0.000292254638671875,
      "model_forward_time": 0.11495089530944824,
      "step": 47883
    },
    {
      "epoch": 0.000292254638671875,
      "step": 47883,
      "training_step_time": 0.49653100967407227
    },
    {
      "epoch": 0.0002922607421875,
      "model_forward_time": 0.11506009101867676,
      "step": 47884
    },
    {
      "epoch": 0.0002922607421875,
      "step": 47884,
      "training_step_time": 0.42640066146850586
    },
    {
      "epoch": 0.000292266845703125,
      "model_forward_time": 0.11507701873779297,
      "step": 47885
    },
    {
      "epoch": 0.000292266845703125,
      "step": 47885,
      "training_step_time": 0.47580456733703613
    },
    {
      "epoch": 0.00029227294921875,
      "model_forward_time": 0.1150658130645752,
      "step": 47886
    },
    {
      "epoch": 0.00029227294921875,
      "step": 47886,
      "training_step_time": 0.42472290992736816
    },
    {
      "epoch": 0.000292279052734375,
      "model_forward_time": 0.11534333229064941,
      "step": 47887
    },
    {
      "epoch": 0.000292279052734375,
      "step": 47887,
      "training_step_time": 0.5003175735473633
    },
    {
      "epoch": 0.00029228515625,
      "model_forward_time": 0.11524391174316406,
      "step": 47888
    },
    {
      "epoch": 0.00029228515625,
      "step": 47888,
      "training_step_time": 0.4273064136505127
    },
    {
      "epoch": 0.000292291259765625,
      "model_forward_time": 0.11503982543945312,
      "step": 47889
    },
    {
      "epoch": 0.000292291259765625,
      "step": 47889,
      "training_step_time": 0.39629101753234863
    },
    {
      "epoch": 0.00029229736328125,
      "grad_norm": 0.09857366979122162,
      "learning_rate": 1.0729888607226113e-05,
      "loss": 0.0397,
      "step": 47890
    },
    {
      "epoch": 0.00029229736328125,
      "model_forward_time": 0.11535263061523438,
      "step": 47890
    },
    {
      "epoch": 0.00029229736328125,
      "step": 47890,
      "training_step_time": 0.38800621032714844
    },
    {
      "epoch": 0.000292303466796875,
      "model_forward_time": 0.11452007293701172,
      "step": 47891
    },
    {
      "epoch": 0.000292303466796875,
      "step": 47891,
      "training_step_time": 0.40053272247314453
    },
    {
      "epoch": 0.0002923095703125,
      "model_forward_time": 0.1149139404296875,
      "step": 47892
    },
    {
      "epoch": 0.0002923095703125,
      "step": 47892,
      "training_step_time": 0.4044837951660156
    },
    {
      "epoch": 0.000292315673828125,
      "model_forward_time": 0.11509275436401367,
      "step": 47893
    },
    {
      "epoch": 0.000292315673828125,
      "step": 47893,
      "training_step_time": 0.3961360454559326
    },
    {
      "epoch": 0.00029232177734375,
      "model_forward_time": 0.11499404907226562,
      "step": 47894
    },
    {
      "epoch": 0.00029232177734375,
      "step": 47894,
      "training_step_time": 0.3894035816192627
    },
    {
      "epoch": 0.000292327880859375,
      "model_forward_time": 0.1152048110961914,
      "step": 47895
    },
    {
      "epoch": 0.000292327880859375,
      "step": 47895,
      "training_step_time": 0.49692368507385254
    },
    {
      "epoch": 0.000292333984375,
      "model_forward_time": 0.1152963638305664,
      "step": 47896
    },
    {
      "epoch": 0.000292333984375,
      "step": 47896,
      "training_step_time": 0.4054241180419922
    },
    {
      "epoch": 0.000292340087890625,
      "model_forward_time": 0.11603188514709473,
      "step": 47897
    },
    {
      "epoch": 0.000292340087890625,
      "step": 47897,
      "training_step_time": 0.5008385181427002
    },
    {
      "epoch": 0.00029234619140625,
      "model_forward_time": 0.11517643928527832,
      "step": 47898
    },
    {
      "epoch": 0.00029234619140625,
      "step": 47898,
      "training_step_time": 0.4269578456878662
    },
    {
      "epoch": 0.000292352294921875,
      "model_forward_time": 0.11456918716430664,
      "step": 47899
    },
    {
      "epoch": 0.000292352294921875,
      "step": 47899,
      "training_step_time": 0.3974721431732178
    },
    {
      "epoch": 0.0002923583984375,
      "grad_norm": 0.106041319668293,
      "learning_rate": 1.0712836668853582e-05,
      "loss": 0.0413,
      "step": 47900
    },
    {
      "epoch": 0.0002923583984375,
      "model_forward_time": 0.11479425430297852,
      "step": 47900
    },
    {
      "epoch": 0.0002923583984375,
      "step": 47900,
      "training_step_time": 0.42594242095947266
    },
    {
      "epoch": 0.000292364501953125,
      "model_forward_time": 0.11528968811035156,
      "step": 47901
    },
    {
      "epoch": 0.000292364501953125,
      "step": 47901,
      "training_step_time": 0.4981386661529541
    },
    {
      "epoch": 0.00029237060546875,
      "model_forward_time": 0.11449193954467773,
      "step": 47902
    },
    {
      "epoch": 0.00029237060546875,
      "step": 47902,
      "training_step_time": 0.4330627918243408
    },
    {
      "epoch": 0.000292376708984375,
      "model_forward_time": 0.11556863784790039,
      "step": 47903
    },
    {
      "epoch": 0.000292376708984375,
      "step": 47903,
      "training_step_time": 0.4042093753814697
    },
    {
      "epoch": 0.0002923828125,
      "model_forward_time": 0.11430740356445312,
      "step": 47904
    },
    {
      "epoch": 0.0002923828125,
      "step": 47904,
      "training_step_time": 0.3966062068939209
    },
    {
      "epoch": 0.000292388916015625,
      "model_forward_time": 0.11591005325317383,
      "step": 47905
    },
    {
      "epoch": 0.000292388916015625,
      "step": 47905,
      "training_step_time": 0.4082322120666504
    },
    {
      "epoch": 0.00029239501953125,
      "model_forward_time": 0.1154320240020752,
      "step": 47906
    },
    {
      "epoch": 0.00029239501953125,
      "step": 47906,
      "training_step_time": 0.3958866596221924
    },
    {
      "epoch": 0.000292401123046875,
      "model_forward_time": 0.11468625068664551,
      "step": 47907
    },
    {
      "epoch": 0.000292401123046875,
      "step": 47907,
      "training_step_time": 0.3913726806640625
    },
    {
      "epoch": 0.0002924072265625,
      "model_forward_time": 0.11483025550842285,
      "step": 47908
    },
    {
      "epoch": 0.0002924072265625,
      "step": 47908,
      "training_step_time": 0.37903690338134766
    },
    {
      "epoch": 0.000292413330078125,
      "model_forward_time": 0.11519885063171387,
      "step": 47909
    },
    {
      "epoch": 0.000292413330078125,
      "step": 47909,
      "training_step_time": 0.3944211006164551
    },
    {
      "epoch": 0.00029241943359375,
      "grad_norm": 0.09432648867368698,
      "learning_rate": 1.069579666488395e-05,
      "loss": 0.0429,
      "step": 47910
    },
    {
      "epoch": 0.00029241943359375,
      "model_forward_time": 0.11423707008361816,
      "step": 47910
    },
    {
      "epoch": 0.00029241943359375,
      "step": 47910,
      "training_step_time": 0.47083258628845215
    },
    {
      "epoch": 0.000292425537109375,
      "model_forward_time": 0.11461520195007324,
      "step": 47911
    },
    {
      "epoch": 0.000292425537109375,
      "step": 47911,
      "training_step_time": 0.4482736587524414
    },
    {
      "epoch": 0.000292431640625,
      "model_forward_time": 0.11451506614685059,
      "step": 47912
    },
    {
      "epoch": 0.000292431640625,
      "step": 47912,
      "training_step_time": 0.4397261142730713
    },
    {
      "epoch": 0.000292437744140625,
      "model_forward_time": 0.11540508270263672,
      "step": 47913
    },
    {
      "epoch": 0.000292437744140625,
      "step": 47913,
      "training_step_time": 0.4313197135925293
    },
    {
      "epoch": 0.00029244384765625,
      "model_forward_time": 0.11492609977722168,
      "step": 47914
    },
    {
      "epoch": 0.00029244384765625,
      "step": 47914,
      "training_step_time": 0.3876368999481201
    },
    {
      "epoch": 0.000292449951171875,
      "model_forward_time": 0.11489534378051758,
      "step": 47915
    },
    {
      "epoch": 0.000292449951171875,
      "step": 47915,
      "training_step_time": 0.3681011199951172
    },
    {
      "epoch": 0.0002924560546875,
      "model_forward_time": 0.1157987117767334,
      "step": 47916
    },
    {
      "epoch": 0.0002924560546875,
      "step": 47916,
      "training_step_time": 0.4585134983062744
    },
    {
      "epoch": 0.000292462158203125,
      "model_forward_time": 0.11556553840637207,
      "step": 47917
    },
    {
      "epoch": 0.000292462158203125,
      "step": 47917,
      "training_step_time": 0.45235276222229004
    },
    {
      "epoch": 0.00029246826171875,
      "model_forward_time": 0.11520004272460938,
      "step": 47918
    },
    {
      "epoch": 0.00029246826171875,
      "step": 47918,
      "training_step_time": 0.3998410701751709
    },
    {
      "epoch": 0.000292474365234375,
      "model_forward_time": 0.11490750312805176,
      "step": 47919
    },
    {
      "epoch": 0.000292474365234375,
      "step": 47919,
      "training_step_time": 0.4002072811126709
    },
    {
      "epoch": 0.00029248046875,
      "grad_norm": 0.1833888292312622,
      "learning_rate": 1.0678768600493528e-05,
      "loss": 0.0411,
      "step": 47920
    },
    {
      "epoch": 0.00029248046875,
      "model_forward_time": 0.11475014686584473,
      "step": 47920
    },
    {
      "epoch": 0.00029248046875,
      "step": 47920,
      "training_step_time": 0.39085888862609863
    },
    {
      "epoch": 0.000292486572265625,
      "model_forward_time": 0.11536026000976562,
      "step": 47921
    },
    {
      "epoch": 0.000292486572265625,
      "step": 47921,
      "training_step_time": 0.39737558364868164
    },
    {
      "epoch": 0.00029249267578125,
      "model_forward_time": 0.11428713798522949,
      "step": 47922
    },
    {
      "epoch": 0.00029249267578125,
      "step": 47922,
      "training_step_time": 0.4094669818878174
    },
    {
      "epoch": 0.000292498779296875,
      "model_forward_time": 0.11577177047729492,
      "step": 47923
    },
    {
      "epoch": 0.000292498779296875,
      "step": 47923,
      "training_step_time": 0.39557409286499023
    },
    {
      "epoch": 0.0002925048828125,
      "model_forward_time": 0.1152806282043457,
      "step": 47924
    },
    {
      "epoch": 0.0002925048828125,
      "step": 47924,
      "training_step_time": 0.4270360469818115
    },
    {
      "epoch": 0.000292510986328125,
      "model_forward_time": 0.11462688446044922,
      "step": 47925
    },
    {
      "epoch": 0.000292510986328125,
      "step": 47925,
      "training_step_time": 0.39124131202697754
    },
    {
      "epoch": 0.00029251708984375,
      "model_forward_time": 0.11537003517150879,
      "step": 47926
    },
    {
      "epoch": 0.00029251708984375,
      "step": 47926,
      "training_step_time": 0.4181346893310547
    },
    {
      "epoch": 0.000292523193359375,
      "model_forward_time": 0.11507582664489746,
      "step": 47927
    },
    {
      "epoch": 0.000292523193359375,
      "step": 47927,
      "training_step_time": 0.421586275100708
    },
    {
      "epoch": 0.000292529296875,
      "model_forward_time": 0.11455321311950684,
      "step": 47928
    },
    {
      "epoch": 0.000292529296875,
      "step": 47928,
      "training_step_time": 0.4933493137359619
    },
    {
      "epoch": 0.000292535400390625,
      "model_forward_time": 0.11466073989868164,
      "step": 47929
    },
    {
      "epoch": 0.000292535400390625,
      "step": 47929,
      "training_step_time": 0.4556722640991211
    },
    {
      "epoch": 0.00029254150390625,
      "grad_norm": 0.11151497066020966,
      "learning_rate": 1.0661752480854975e-05,
      "loss": 0.0432,
      "step": 47930
    },
    {
      "epoch": 0.00029254150390625,
      "model_forward_time": 0.11429929733276367,
      "step": 47930
    },
    {
      "epoch": 0.00029254150390625,
      "step": 47930,
      "training_step_time": 0.41558122634887695
    },
    {
      "epoch": 0.000292547607421875,
      "model_forward_time": 0.1150655746459961,
      "step": 47931
    },
    {
      "epoch": 0.000292547607421875,
      "step": 47931,
      "training_step_time": 0.4184229373931885
    },
    {
      "epoch": 0.0002925537109375,
      "model_forward_time": 0.11497664451599121,
      "step": 47932
    },
    {
      "epoch": 0.0002925537109375,
      "step": 47932,
      "training_step_time": 0.41991209983825684
    },
    {
      "epoch": 0.000292559814453125,
      "model_forward_time": 0.11471986770629883,
      "step": 47933
    },
    {
      "epoch": 0.000292559814453125,
      "step": 47933,
      "training_step_time": 0.38536620140075684
    },
    {
      "epoch": 0.00029256591796875,
      "model_forward_time": 0.11527490615844727,
      "step": 47934
    },
    {
      "epoch": 0.00029256591796875,
      "step": 47934,
      "training_step_time": 0.40306711196899414
    },
    {
      "epoch": 0.000292572021484375,
      "model_forward_time": 0.11522698402404785,
      "step": 47935
    },
    {
      "epoch": 0.000292572021484375,
      "step": 47935,
      "training_step_time": 0.39630651473999023
    },
    {
      "epoch": 0.000292578125,
      "model_forward_time": 0.11489295959472656,
      "step": 47936
    },
    {
      "epoch": 0.000292578125,
      "step": 47936,
      "training_step_time": 0.385272741317749
    },
    {
      "epoch": 0.000292584228515625,
      "model_forward_time": 0.11550498008728027,
      "step": 47937
    },
    {
      "epoch": 0.000292584228515625,
      "step": 47937,
      "training_step_time": 0.39817142486572266
    },
    {
      "epoch": 0.00029259033203125,
      "model_forward_time": 0.11501240730285645,
      "step": 47938
    },
    {
      "epoch": 0.00029259033203125,
      "step": 47938,
      "training_step_time": 0.40030694007873535
    },
    {
      "epoch": 0.000292596435546875,
      "model_forward_time": 0.11543774604797363,
      "step": 47939
    },
    {
      "epoch": 0.000292596435546875,
      "step": 47939,
      "training_step_time": 0.41384387016296387
    },
    {
      "epoch": 0.0002926025390625,
      "grad_norm": 0.12574107944965363,
      "learning_rate": 1.0644748311137376e-05,
      "loss": 0.0394,
      "step": 47940
    },
    {
      "epoch": 0.0002926025390625,
      "model_forward_time": 0.11473560333251953,
      "step": 47940
    },
    {
      "epoch": 0.0002926025390625,
      "step": 47940,
      "training_step_time": 0.5090129375457764
    },
    {
      "epoch": 0.000292608642578125,
      "model_forward_time": 0.11580348014831543,
      "step": 47941
    },
    {
      "epoch": 0.000292608642578125,
      "step": 47941,
      "training_step_time": 0.47688984870910645
    },
    {
      "epoch": 0.00029261474609375,
      "model_forward_time": 0.11503195762634277,
      "step": 47942
    },
    {
      "epoch": 0.00029261474609375,
      "step": 47942,
      "training_step_time": 0.48171067237854004
    },
    {
      "epoch": 0.000292620849609375,
      "model_forward_time": 0.11519622802734375,
      "step": 47943
    },
    {
      "epoch": 0.000292620849609375,
      "step": 47943,
      "training_step_time": 0.3955385684967041
    },
    {
      "epoch": 0.000292626953125,
      "model_forward_time": 0.1146392822265625,
      "step": 47944
    },
    {
      "epoch": 0.000292626953125,
      "step": 47944,
      "training_step_time": 0.4114713668823242
    },
    {
      "epoch": 0.000292633056640625,
      "model_forward_time": 0.11485004425048828,
      "step": 47945
    },
    {
      "epoch": 0.000292633056640625,
      "step": 47945,
      "training_step_time": 0.39759206771850586
    },
    {
      "epoch": 0.00029263916015625,
      "model_forward_time": 0.11448955535888672,
      "step": 47946
    },
    {
      "epoch": 0.00029263916015625,
      "step": 47946,
      "training_step_time": 0.4355759620666504
    },
    {
      "epoch": 0.000292645263671875,
      "model_forward_time": 0.11524605751037598,
      "step": 47947
    },
    {
      "epoch": 0.000292645263671875,
      "step": 47947,
      "training_step_time": 0.42212891578674316
    },
    {
      "epoch": 0.0002926513671875,
      "model_forward_time": 0.1148991584777832,
      "step": 47948
    },
    {
      "epoch": 0.0002926513671875,
      "step": 47948,
      "training_step_time": 0.3876230716705322
    },
    {
      "epoch": 0.000292657470703125,
      "model_forward_time": 0.1150357723236084,
      "step": 47949
    },
    {
      "epoch": 0.000292657470703125,
      "step": 47949,
      "training_step_time": 0.3928852081298828
    },
    {
      "epoch": 0.00029266357421875,
      "grad_norm": 0.10631139576435089,
      "learning_rate": 1.0627756096506102e-05,
      "loss": 0.0384,
      "step": 47950
    },
    {
      "epoch": 0.00029266357421875,
      "model_forward_time": 0.11559200286865234,
      "step": 47950
    },
    {
      "epoch": 0.00029266357421875,
      "step": 47950,
      "training_step_time": 0.3878922462463379
    },
    {
      "epoch": 0.000292669677734375,
      "model_forward_time": 0.11504149436950684,
      "step": 47951
    },
    {
      "epoch": 0.000292669677734375,
      "step": 47951,
      "training_step_time": 0.3961217403411865
    },
    {
      "epoch": 0.00029267578125,
      "model_forward_time": 0.11512565612792969,
      "step": 47952
    },
    {
      "epoch": 0.00029267578125,
      "step": 47952,
      "training_step_time": 0.39829039573669434
    },
    {
      "epoch": 0.000292681884765625,
      "model_forward_time": 0.11482405662536621,
      "step": 47953
    },
    {
      "epoch": 0.000292681884765625,
      "step": 47953,
      "training_step_time": 0.4922633171081543
    },
    {
      "epoch": 0.00029268798828125,
      "model_forward_time": 0.11537766456604004,
      "step": 47954
    },
    {
      "epoch": 0.00029268798828125,
      "step": 47954,
      "training_step_time": 0.3917989730834961
    },
    {
      "epoch": 0.000292694091796875,
      "model_forward_time": 0.11514854431152344,
      "step": 47955
    },
    {
      "epoch": 0.000292694091796875,
      "step": 47955,
      "training_step_time": 0.4499170780181885
    },
    {
      "epoch": 0.0002927001953125,
      "model_forward_time": 0.11494278907775879,
      "step": 47956
    },
    {
      "epoch": 0.0002927001953125,
      "step": 47956,
      "training_step_time": 0.4518585205078125
    },
    {
      "epoch": 0.000292706298828125,
      "model_forward_time": 0.11494684219360352,
      "step": 47957
    },
    {
      "epoch": 0.000292706298828125,
      "step": 47957,
      "training_step_time": 0.49183082580566406
    },
    {
      "epoch": 0.00029271240234375,
      "model_forward_time": 0.11455607414245605,
      "step": 47958
    },
    {
      "epoch": 0.00029271240234375,
      "step": 47958,
      "training_step_time": 0.39641547203063965
    },
    {
      "epoch": 0.000292718505859375,
      "model_forward_time": 0.11533761024475098,
      "step": 47959
    },
    {
      "epoch": 0.000292718505859375,
      "step": 47959,
      "training_step_time": 0.4190027713775635
    },
    {
      "epoch": 0.000292724609375,
      "grad_norm": 0.10016725212335587,
      "learning_rate": 1.0610775842122972e-05,
      "loss": 0.0388,
      "step": 47960
    },
    {
      "epoch": 0.000292724609375,
      "model_forward_time": 0.11434364318847656,
      "step": 47960
    },
    {
      "epoch": 0.000292724609375,
      "step": 47960,
      "training_step_time": 0.398212194442749
    },
    {
      "epoch": 0.000292730712890625,
      "model_forward_time": 0.1152944564819336,
      "step": 47961
    },
    {
      "epoch": 0.000292730712890625,
      "step": 47961,
      "training_step_time": 0.48567676544189453
    },
    {
      "epoch": 0.00029273681640625,
      "model_forward_time": 0.11501097679138184,
      "step": 47962
    },
    {
      "epoch": 0.00029273681640625,
      "step": 47962,
      "training_step_time": 2.78149676322937
    },
    {
      "epoch": 0.000292742919921875,
      "model_forward_time": 0.11220335960388184,
      "step": 47963
    },
    {
      "epoch": 0.000292742919921875,
      "step": 47963,
      "training_step_time": 0.4218165874481201
    },
    {
      "epoch": 0.0002927490234375,
      "model_forward_time": 0.11201786994934082,
      "step": 47964
    },
    {
      "epoch": 0.0002927490234375,
      "step": 47964,
      "training_step_time": 0.38140106201171875
    },
    {
      "epoch": 0.000292755126953125,
      "model_forward_time": 0.1132357120513916,
      "step": 47965
    },
    {
      "epoch": 0.000292755126953125,
      "step": 47965,
      "training_step_time": 0.4168519973754883
    },
    {
      "epoch": 0.00029276123046875,
      "model_forward_time": 0.11349010467529297,
      "step": 47966
    },
    {
      "epoch": 0.00029276123046875,
      "step": 47966,
      "training_step_time": 0.4319794178009033
    },
    {
      "epoch": 0.000292767333984375,
      "model_forward_time": 0.11464405059814453,
      "step": 47967
    },
    {
      "epoch": 0.000292767333984375,
      "step": 47967,
      "training_step_time": 0.38184380531311035
    },
    {
      "epoch": 0.0002927734375,
      "model_forward_time": 0.11429905891418457,
      "step": 47968
    },
    {
      "epoch": 0.0002927734375,
      "step": 47968,
      "training_step_time": 0.3868124485015869
    },
    {
      "epoch": 0.000292779541015625,
      "model_forward_time": 0.11514496803283691,
      "step": 47969
    },
    {
      "epoch": 0.000292779541015625,
      "step": 47969,
      "training_step_time": 0.41865015029907227
    },
    {
      "epoch": 0.00029278564453125,
      "grad_norm": 0.09954383224248886,
      "learning_rate": 1.059380755314613e-05,
      "loss": 0.0383,
      "step": 47970
    },
    {
      "epoch": 0.00029278564453125,
      "model_forward_time": 0.11531662940979004,
      "step": 47970
    },
    {
      "epoch": 0.00029278564453125,
      "step": 47970,
      "training_step_time": 0.43358445167541504
    },
    {
      "epoch": 0.000292791748046875,
      "model_forward_time": 0.1152496337890625,
      "step": 47971
    },
    {
      "epoch": 0.000292791748046875,
      "step": 47971,
      "training_step_time": 0.46120309829711914
    },
    {
      "epoch": 0.0002927978515625,
      "model_forward_time": 0.11465215682983398,
      "step": 47972
    },
    {
      "epoch": 0.0002927978515625,
      "step": 47972,
      "training_step_time": 0.3743298053741455
    },
    {
      "epoch": 0.000292803955078125,
      "model_forward_time": 0.11595010757446289,
      "step": 47973
    },
    {
      "epoch": 0.000292803955078125,
      "step": 47973,
      "training_step_time": 0.37950921058654785
    },
    {
      "epoch": 0.00029281005859375,
      "model_forward_time": 0.11544299125671387,
      "step": 47974
    },
    {
      "epoch": 0.00029281005859375,
      "step": 47974,
      "training_step_time": 0.3806498050689697
    },
    {
      "epoch": 0.000292816162109375,
      "model_forward_time": 0.11505889892578125,
      "step": 47975
    },
    {
      "epoch": 0.000292816162109375,
      "step": 47975,
      "training_step_time": 0.3791027069091797
    },
    {
      "epoch": 0.000292822265625,
      "model_forward_time": 0.11493039131164551,
      "step": 47976
    },
    {
      "epoch": 0.000292822265625,
      "step": 47976,
      "training_step_time": 0.4528160095214844
    },
    {
      "epoch": 0.000292828369140625,
      "model_forward_time": 0.11449575424194336,
      "step": 47977
    },
    {
      "epoch": 0.000292828369140625,
      "step": 47977,
      "training_step_time": 0.4095454216003418
    },
    {
      "epoch": 0.00029283447265625,
      "model_forward_time": 0.11561083793640137,
      "step": 47978
    },
    {
      "epoch": 0.00029283447265625,
      "step": 47978,
      "training_step_time": 0.48410654067993164
    },
    {
      "epoch": 0.000292840576171875,
      "model_forward_time": 0.11469531059265137,
      "step": 47979
    },
    {
      "epoch": 0.000292840576171875,
      "step": 47979,
      "training_step_time": 0.3949007987976074
    },
    {
      "epoch": 0.0002928466796875,
      "grad_norm": 0.10263831168413162,
      "learning_rate": 1.0576851234730095e-05,
      "loss": 0.0345,
      "step": 47980
    },
    {
      "epoch": 0.0002928466796875,
      "model_forward_time": 0.1149299144744873,
      "step": 47980
    },
    {
      "epoch": 0.0002928466796875,
      "step": 47980,
      "training_step_time": 0.41848063468933105
    },
    {
      "epoch": 0.000292852783203125,
      "model_forward_time": 0.11455392837524414,
      "step": 47981
    },
    {
      "epoch": 0.000292852783203125,
      "step": 47981,
      "training_step_time": 0.43718862533569336
    },
    {
      "epoch": 0.00029285888671875,
      "model_forward_time": 0.11499357223510742,
      "step": 47982
    },
    {
      "epoch": 0.00029285888671875,
      "step": 47982,
      "training_step_time": 0.3951115608215332
    },
    {
      "epoch": 0.000292864990234375,
      "model_forward_time": 0.1146707534790039,
      "step": 47983
    },
    {
      "epoch": 0.000292864990234375,
      "step": 47983,
      "training_step_time": 0.3638908863067627
    },
    {
      "epoch": 0.00029287109375,
      "model_forward_time": 0.11476874351501465,
      "step": 47984
    },
    {
      "epoch": 0.00029287109375,
      "step": 47984,
      "training_step_time": 0.4629676342010498
    },
    {
      "epoch": 0.000292877197265625,
      "model_forward_time": 0.11465167999267578,
      "step": 47985
    },
    {
      "epoch": 0.000292877197265625,
      "step": 47985,
      "training_step_time": 0.4457426071166992
    },
    {
      "epoch": 0.00029288330078125,
      "model_forward_time": 0.11512970924377441,
      "step": 47986
    },
    {
      "epoch": 0.00029288330078125,
      "step": 47986,
      "training_step_time": 0.38551831245422363
    },
    {
      "epoch": 0.000292889404296875,
      "model_forward_time": 0.11531209945678711,
      "step": 47987
    },
    {
      "epoch": 0.000292889404296875,
      "step": 47987,
      "training_step_time": 0.3922762870788574
    },
    {
      "epoch": 0.0002928955078125,
      "model_forward_time": 0.1151571273803711,
      "step": 47988
    },
    {
      "epoch": 0.0002928955078125,
      "step": 47988,
      "training_step_time": 0.4043600559234619
    },
    {
      "epoch": 0.000292901611328125,
      "model_forward_time": 0.11495208740234375,
      "step": 47989
    },
    {
      "epoch": 0.000292901611328125,
      "step": 47989,
      "training_step_time": 0.3929860591888428
    },
    {
      "epoch": 0.00029290771484375,
      "grad_norm": 0.08120349794626236,
      "learning_rate": 1.0559906892025745e-05,
      "loss": 0.0348,
      "step": 47990
    },
    {
      "epoch": 0.00029290771484375,
      "model_forward_time": 0.11533451080322266,
      "step": 47990
    },
    {
      "epoch": 0.00029290771484375,
      "step": 47990,
      "training_step_time": 0.38876914978027344
    },
    {
      "epoch": 0.000292913818359375,
      "model_forward_time": 0.11517190933227539,
      "step": 47991
    },
    {
      "epoch": 0.000292913818359375,
      "step": 47991,
      "training_step_time": 0.4176299571990967
    },
    {
      "epoch": 0.000292919921875,
      "model_forward_time": 0.11537528038024902,
      "step": 47992
    },
    {
      "epoch": 0.000292919921875,
      "step": 47992,
      "training_step_time": 0.49931764602661133
    },
    {
      "epoch": 0.000292926025390625,
      "model_forward_time": 0.11498713493347168,
      "step": 47993
    },
    {
      "epoch": 0.000292926025390625,
      "step": 47993,
      "training_step_time": 0.4677152633666992
    },
    {
      "epoch": 0.00029293212890625,
      "model_forward_time": 0.11632442474365234,
      "step": 47994
    },
    {
      "epoch": 0.00029293212890625,
      "step": 47994,
      "training_step_time": 0.3982577323913574
    },
    {
      "epoch": 0.000292938232421875,
      "model_forward_time": 0.11502552032470703,
      "step": 47995
    },
    {
      "epoch": 0.000292938232421875,
      "step": 47995,
      "training_step_time": 0.48145008087158203
    },
    {
      "epoch": 0.0002929443359375,
      "model_forward_time": 0.11483168601989746,
      "step": 47996
    },
    {
      "epoch": 0.0002929443359375,
      "step": 47996,
      "training_step_time": 0.41217923164367676
    },
    {
      "epoch": 0.000292950439453125,
      "model_forward_time": 0.11530542373657227,
      "step": 47997
    },
    {
      "epoch": 0.000292950439453125,
      "step": 47997,
      "training_step_time": 0.41942334175109863
    },
    {
      "epoch": 0.00029295654296875,
      "model_forward_time": 0.11890482902526855,
      "step": 47998
    },
    {
      "epoch": 0.00029295654296875,
      "step": 47998,
      "training_step_time": 0.4577810764312744
    },
    {
      "epoch": 0.000292962646484375,
      "model_forward_time": 0.11572813987731934,
      "step": 47999
    },
    {
      "epoch": 0.000292962646484375,
      "step": 47999,
      "training_step_time": 0.45899438858032227
    },
    {
      "epoch": 0.00029296875,
      "grad_norm": 0.11922606825828552,
      "learning_rate": 1.0542974530180327e-05,
      "loss": 0.0368,
      "step": 48000
    },
    {
      "epoch": 0.00029296875,
      "model_forward_time": 0.11512970924377441,
      "step": 48000
    },
    {
      "epoch": 0.00029296875,
      "step": 48000,
      "training_step_time": 0.3595917224884033
    },
    {
      "epoch": 0.000292974853515625,
      "model_forward_time": 0.11356210708618164,
      "step": 48001
    },
    {
      "epoch": 0.000292974853515625,
      "step": 48001,
      "training_step_time": 0.37743639945983887
    },
    {
      "epoch": 0.00029298095703125,
      "model_forward_time": 0.11288189888000488,
      "step": 48002
    },
    {
      "epoch": 0.00029298095703125,
      "step": 48002,
      "training_step_time": 0.36885881423950195
    },
    {
      "epoch": 0.000292987060546875,
      "model_forward_time": 0.11388134956359863,
      "step": 48003
    },
    {
      "epoch": 0.000292987060546875,
      "step": 48003,
      "training_step_time": 0.37761378288269043
    },
    {
      "epoch": 0.0002929931640625,
      "model_forward_time": 0.11456632614135742,
      "step": 48004
    },
    {
      "epoch": 0.0002929931640625,
      "step": 48004,
      "training_step_time": 0.3656606674194336
    },
    {
      "epoch": 0.000292999267578125,
      "model_forward_time": 0.11480522155761719,
      "step": 48005
    },
    {
      "epoch": 0.000292999267578125,
      "step": 48005,
      "training_step_time": 0.384249210357666
    },
    {
      "epoch": 0.00029300537109375,
      "model_forward_time": 0.11549520492553711,
      "step": 48006
    },
    {
      "epoch": 0.00029300537109375,
      "step": 48006,
      "training_step_time": 0.40276384353637695
    },
    {
      "epoch": 0.000293011474609375,
      "model_forward_time": 0.11530351638793945,
      "step": 48007
    },
    {
      "epoch": 0.000293011474609375,
      "step": 48007,
      "training_step_time": 0.4898803234100342
    },
    {
      "epoch": 0.000293017578125,
      "model_forward_time": 0.11587715148925781,
      "step": 48008
    },
    {
      "epoch": 0.000293017578125,
      "step": 48008,
      "training_step_time": 0.3846876621246338
    },
    {
      "epoch": 0.000293023681640625,
      "model_forward_time": 0.1150205135345459,
      "step": 48009
    },
    {
      "epoch": 0.000293023681640625,
      "step": 48009,
      "training_step_time": 0.4555997848510742
    },
    {
      "epoch": 0.00029302978515625,
      "grad_norm": 0.11796402186155319,
      "learning_rate": 1.0526054154337445e-05,
      "loss": 0.0394,
      "step": 48010
    },
    {
      "epoch": 0.00029302978515625,
      "model_forward_time": 0.11443519592285156,
      "step": 48010
    },
    {
      "epoch": 0.00029302978515625,
      "step": 48010,
      "training_step_time": 0.3897073268890381
    },
    {
      "epoch": 0.000293035888671875,
      "model_forward_time": 0.11510753631591797,
      "step": 48011
    },
    {
      "epoch": 0.000293035888671875,
      "step": 48011,
      "training_step_time": 0.40491700172424316
    },
    {
      "epoch": 0.0002930419921875,
      "model_forward_time": 0.11508870124816895,
      "step": 48012
    },
    {
      "epoch": 0.0002930419921875,
      "step": 48012,
      "training_step_time": 0.3954927921295166
    },
    {
      "epoch": 0.000293048095703125,
      "model_forward_time": 0.11499500274658203,
      "step": 48013
    },
    {
      "epoch": 0.000293048095703125,
      "step": 48013,
      "training_step_time": 0.36844968795776367
    },
    {
      "epoch": 0.00029305419921875,
      "model_forward_time": 0.1151573657989502,
      "step": 48014
    },
    {
      "epoch": 0.00029305419921875,
      "step": 48014,
      "training_step_time": 0.45470380783081055
    },
    {
      "epoch": 0.000293060302734375,
      "model_forward_time": 0.11582040786743164,
      "step": 48015
    },
    {
      "epoch": 0.000293060302734375,
      "step": 48015,
      "training_step_time": 0.41579318046569824
    },
    {
      "epoch": 0.00029306640625,
      "model_forward_time": 0.11502599716186523,
      "step": 48016
    },
    {
      "epoch": 0.00029306640625,
      "step": 48016,
      "training_step_time": 0.3929436206817627
    },
    {
      "epoch": 0.000293072509765625,
      "model_forward_time": 0.11503100395202637,
      "step": 48017
    },
    {
      "epoch": 0.000293072509765625,
      "step": 48017,
      "training_step_time": 0.4024477005004883
    },
    {
      "epoch": 0.00029307861328125,
      "model_forward_time": 0.11486625671386719,
      "step": 48018
    },
    {
      "epoch": 0.00029307861328125,
      "step": 48018,
      "training_step_time": 0.41570401191711426
    },
    {
      "epoch": 0.000293084716796875,
      "model_forward_time": 0.11508297920227051,
      "step": 48019
    },
    {
      "epoch": 0.000293084716796875,
      "step": 48019,
      "training_step_time": 0.3958423137664795
    },
    {
      "epoch": 0.0002930908203125,
      "grad_norm": 0.07439753413200378,
      "learning_rate": 1.0509145769637057e-05,
      "loss": 0.0393,
      "step": 48020
    },
    {
      "epoch": 0.0002930908203125,
      "model_forward_time": 0.11474108695983887,
      "step": 48020
    },
    {
      "epoch": 0.0002930908203125,
      "step": 48020,
      "training_step_time": 0.46691441535949707
    },
    {
      "epoch": 0.000293096923828125,
      "model_forward_time": 0.11495852470397949,
      "step": 48021
    },
    {
      "epoch": 0.000293096923828125,
      "step": 48021,
      "training_step_time": 0.5143401622772217
    },
    {
      "epoch": 0.00029310302734375,
      "model_forward_time": 0.11540794372558594,
      "step": 48022
    },
    {
      "epoch": 0.00029310302734375,
      "step": 48022,
      "training_step_time": 0.3856198787689209
    },
    {
      "epoch": 0.000293109130859375,
      "model_forward_time": 0.11483168601989746,
      "step": 48023
    },
    {
      "epoch": 0.000293109130859375,
      "step": 48023,
      "training_step_time": 0.497272253036499
    },
    {
      "epoch": 0.000293115234375,
      "model_forward_time": 0.11567234992980957,
      "step": 48024
    },
    {
      "epoch": 0.000293115234375,
      "step": 48024,
      "training_step_time": 0.4487874507904053
    },
    {
      "epoch": 0.000293121337890625,
      "model_forward_time": 0.11428427696228027,
      "step": 48025
    },
    {
      "epoch": 0.000293121337890625,
      "step": 48025,
      "training_step_time": 0.4735105037689209
    },
    {
      "epoch": 0.00029312744140625,
      "model_forward_time": 0.11426424980163574,
      "step": 48026
    },
    {
      "epoch": 0.00029312744140625,
      "step": 48026,
      "training_step_time": 0.39604616165161133
    },
    {
      "epoch": 0.000293133544921875,
      "model_forward_time": 0.1145167350769043,
      "step": 48027
    },
    {
      "epoch": 0.000293133544921875,
      "step": 48027,
      "training_step_time": 0.3688161373138428
    },
    {
      "epoch": 0.0002931396484375,
      "model_forward_time": 0.114959716796875,
      "step": 48028
    },
    {
      "epoch": 0.0002931396484375,
      "step": 48028,
      "training_step_time": 0.43579745292663574
    },
    {
      "epoch": 0.000293145751953125,
      "model_forward_time": 0.11484670639038086,
      "step": 48029
    },
    {
      "epoch": 0.000293145751953125,
      "step": 48029,
      "training_step_time": 0.39680957794189453
    },
    {
      "epoch": 0.00029315185546875,
      "grad_norm": 0.11118239909410477,
      "learning_rate": 1.049224938121548e-05,
      "loss": 0.0383,
      "step": 48030
    },
    {
      "epoch": 0.00029315185546875,
      "model_forward_time": 0.11496138572692871,
      "step": 48030
    },
    {
      "epoch": 0.00029315185546875,
      "step": 48030,
      "training_step_time": 0.4000697135925293
    },
    {
      "epoch": 0.000293157958984375,
      "model_forward_time": 0.11455941200256348,
      "step": 48031
    },
    {
      "epoch": 0.000293157958984375,
      "step": 48031,
      "training_step_time": 0.40340447425842285
    },
    {
      "epoch": 0.0002931640625,
      "model_forward_time": 0.11595559120178223,
      "step": 48032
    },
    {
      "epoch": 0.0002931640625,
      "step": 48032,
      "training_step_time": 0.38018178939819336
    },
    {
      "epoch": 0.000293170166015625,
      "model_forward_time": 0.11519503593444824,
      "step": 48033
    },
    {
      "epoch": 0.000293170166015625,
      "step": 48033,
      "training_step_time": 0.4191102981567383
    },
    {
      "epoch": 0.00029317626953125,
      "model_forward_time": 0.11513137817382812,
      "step": 48034
    },
    {
      "epoch": 0.00029317626953125,
      "step": 48034,
      "training_step_time": 0.4042642116546631
    },
    {
      "epoch": 0.000293182373046875,
      "model_forward_time": 0.11485767364501953,
      "step": 48035
    },
    {
      "epoch": 0.000293182373046875,
      "step": 48035,
      "training_step_time": 0.39337897300720215
    },
    {
      "epoch": 0.0002931884765625,
      "model_forward_time": 0.11505985260009766,
      "step": 48036
    },
    {
      "epoch": 0.0002931884765625,
      "step": 48036,
      "training_step_time": 0.4065079689025879
    },
    {
      "epoch": 0.000293194580078125,
      "model_forward_time": 0.11482620239257812,
      "step": 48037
    },
    {
      "epoch": 0.000293194580078125,
      "step": 48037,
      "training_step_time": 0.3937997817993164
    },
    {
      "epoch": 0.00029320068359375,
      "model_forward_time": 0.11540913581848145,
      "step": 48038
    },
    {
      "epoch": 0.00029320068359375,
      "step": 48038,
      "training_step_time": 0.40271615982055664
    },
    {
      "epoch": 0.000293206787109375,
      "model_forward_time": 0.11485815048217773,
      "step": 48039
    },
    {
      "epoch": 0.000293206787109375,
      "step": 48039,
      "training_step_time": 0.41130518913269043
    },
    {
      "epoch": 0.000293212890625,
      "grad_norm": 0.08745148032903671,
      "learning_rate": 1.0475364994205411e-05,
      "loss": 0.036,
      "step": 48040
    },
    {
      "epoch": 0.000293212890625,
      "model_forward_time": 0.11560821533203125,
      "step": 48040
    },
    {
      "epoch": 0.000293212890625,
      "step": 48040,
      "training_step_time": 0.42185163497924805
    },
    {
      "epoch": 0.000293218994140625,
      "model_forward_time": 0.11484265327453613,
      "step": 48041
    },
    {
      "epoch": 0.000293218994140625,
      "step": 48041,
      "training_step_time": 0.39128947257995605
    },
    {
      "epoch": 0.00029322509765625,
      "model_forward_time": 0.11577367782592773,
      "step": 48042
    },
    {
      "epoch": 0.00029322509765625,
      "step": 48042,
      "training_step_time": 0.39741063117980957
    },
    {
      "epoch": 0.000293231201171875,
      "model_forward_time": 0.11451315879821777,
      "step": 48043
    },
    {
      "epoch": 0.000293231201171875,
      "step": 48043,
      "training_step_time": 0.41823840141296387
    },
    {
      "epoch": 0.0002932373046875,
      "model_forward_time": 0.11419916152954102,
      "step": 48044
    },
    {
      "epoch": 0.0002932373046875,
      "step": 48044,
      "training_step_time": 0.4551725387573242
    },
    {
      "epoch": 0.000293243408203125,
      "model_forward_time": 0.11516261100769043,
      "step": 48045
    },
    {
      "epoch": 0.000293243408203125,
      "step": 48045,
      "training_step_time": 0.3851583003997803
    },
    {
      "epoch": 0.00029324951171875,
      "model_forward_time": 0.11528277397155762,
      "step": 48046
    },
    {
      "epoch": 0.00029324951171875,
      "step": 48046,
      "training_step_time": 0.39330244064331055
    },
    {
      "epoch": 0.000293255615234375,
      "model_forward_time": 0.11577773094177246,
      "step": 48047
    },
    {
      "epoch": 0.000293255615234375,
      "step": 48047,
      "training_step_time": 0.3811640739440918
    },
    {
      "epoch": 0.00029326171875,
      "model_forward_time": 0.11534309387207031,
      "step": 48048
    },
    {
      "epoch": 0.00029326171875,
      "step": 48048,
      "training_step_time": 0.461273193359375
    },
    {
      "epoch": 0.000293267822265625,
      "model_forward_time": 0.1146388053894043,
      "step": 48049
    },
    {
      "epoch": 0.000293267822265625,
      "step": 48049,
      "training_step_time": 0.42537689208984375
    },
    {
      "epoch": 0.00029327392578125,
      "grad_norm": 0.11653630435466766,
      "learning_rate": 1.045849261373587e-05,
      "loss": 0.0376,
      "step": 48050
    },
    {
      "epoch": 0.00029327392578125,
      "model_forward_time": 0.11448073387145996,
      "step": 48050
    },
    {
      "epoch": 0.00029327392578125,
      "step": 48050,
      "training_step_time": 0.4826948642730713
    },
    {
      "epoch": 0.000293280029296875,
      "model_forward_time": 0.1149594783782959,
      "step": 48051
    },
    {
      "epoch": 0.000293280029296875,
      "step": 48051,
      "training_step_time": 0.3936002254486084
    },
    {
      "epoch": 0.0002932861328125,
      "model_forward_time": 0.11491084098815918,
      "step": 48052
    },
    {
      "epoch": 0.0002932861328125,
      "step": 48052,
      "training_step_time": 0.39166975021362305
    },
    {
      "epoch": 0.000293292236328125,
      "model_forward_time": 0.11474370956420898,
      "step": 48053
    },
    {
      "epoch": 0.000293292236328125,
      "step": 48053,
      "training_step_time": 0.38916587829589844
    },
    {
      "epoch": 0.00029329833984375,
      "model_forward_time": 0.11503982543945312,
      "step": 48054
    },
    {
      "epoch": 0.00029329833984375,
      "step": 48054,
      "training_step_time": 0.387376070022583
    },
    {
      "epoch": 0.000293304443359375,
      "model_forward_time": 0.1149604320526123,
      "step": 48055
    },
    {
      "epoch": 0.000293304443359375,
      "step": 48055,
      "training_step_time": 0.44492197036743164
    },
    {
      "epoch": 0.000293310546875,
      "model_forward_time": 0.11580085754394531,
      "step": 48056
    },
    {
      "epoch": 0.000293310546875,
      "step": 48056,
      "training_step_time": 0.397310733795166
    },
    {
      "epoch": 0.000293316650390625,
      "model_forward_time": 0.11482930183410645,
      "step": 48057
    },
    {
      "epoch": 0.000293316650390625,
      "step": 48057,
      "training_step_time": 0.4150090217590332
    },
    {
      "epoch": 0.00029332275390625,
      "model_forward_time": 0.11507368087768555,
      "step": 48058
    },
    {
      "epoch": 0.00029332275390625,
      "step": 48058,
      "training_step_time": 0.44504642486572266
    },
    {
      "epoch": 0.000293328857421875,
      "model_forward_time": 0.11495065689086914,
      "step": 48059
    },
    {
      "epoch": 0.000293328857421875,
      "step": 48059,
      "training_step_time": 0.4226076602935791
    },
    {
      "epoch": 0.0002933349609375,
      "grad_norm": 0.08240246027708054,
      "learning_rate": 1.0441632244932237e-05,
      "loss": 0.0361,
      "step": 48060
    },
    {
      "epoch": 0.0002933349609375,
      "model_forward_time": 0.11482858657836914,
      "step": 48060
    },
    {
      "epoch": 0.0002933349609375,
      "step": 48060,
      "training_step_time": 0.3987722396850586
    },
    {
      "epoch": 0.000293341064453125,
      "model_forward_time": 0.11460733413696289,
      "step": 48061
    },
    {
      "epoch": 0.000293341064453125,
      "step": 48061,
      "training_step_time": 0.39196181297302246
    },
    {
      "epoch": 0.00029334716796875,
      "model_forward_time": 0.1160116195678711,
      "step": 48062
    },
    {
      "epoch": 0.00029334716796875,
      "step": 48062,
      "training_step_time": 0.3924281597137451
    },
    {
      "epoch": 0.000293353271484375,
      "model_forward_time": 0.11536908149719238,
      "step": 48063
    },
    {
      "epoch": 0.000293353271484375,
      "step": 48063,
      "training_step_time": 0.40575242042541504
    },
    {
      "epoch": 0.000293359375,
      "model_forward_time": 0.11532163619995117,
      "step": 48064
    },
    {
      "epoch": 0.000293359375,
      "step": 48064,
      "training_step_time": 0.38655734062194824
    },
    {
      "epoch": 0.000293365478515625,
      "model_forward_time": 0.11502909660339355,
      "step": 48065
    },
    {
      "epoch": 0.000293365478515625,
      "step": 48065,
      "training_step_time": 0.3838224411010742
    },
    {
      "epoch": 0.00029337158203125,
      "model_forward_time": 0.11494970321655273,
      "step": 48066
    },
    {
      "epoch": 0.00029337158203125,
      "step": 48066,
      "training_step_time": 0.40120673179626465
    },
    {
      "epoch": 0.000293377685546875,
      "model_forward_time": 0.11518383026123047,
      "step": 48067
    },
    {
      "epoch": 0.000293377685546875,
      "step": 48067,
      "training_step_time": 0.3820617198944092
    },
    {
      "epoch": 0.0002933837890625,
      "model_forward_time": 0.11530256271362305,
      "step": 48068
    },
    {
      "epoch": 0.0002933837890625,
      "step": 48068,
      "training_step_time": 0.3968381881713867
    },
    {
      "epoch": 0.000293389892578125,
      "model_forward_time": 0.11603331565856934,
      "step": 48069
    },
    {
      "epoch": 0.000293389892578125,
      "step": 48069,
      "training_step_time": 0.4253687858581543
    },
    {
      "epoch": 0.00029339599609375,
      "grad_norm": 0.07467834651470184,
      "learning_rate": 1.0424783892916257e-05,
      "loss": 0.0344,
      "step": 48070
    },
    {
      "epoch": 0.00029339599609375,
      "model_forward_time": 0.11485505104064941,
      "step": 48070
    },
    {
      "epoch": 0.00029339599609375,
      "step": 48070,
      "training_step_time": 0.43572425842285156
    },
    {
      "epoch": 0.000293402099609375,
      "model_forward_time": 0.11562752723693848,
      "step": 48071
    },
    {
      "epoch": 0.000293402099609375,
      "step": 48071,
      "training_step_time": 0.3861680030822754
    },
    {
      "epoch": 0.000293408203125,
      "model_forward_time": 0.11601662635803223,
      "step": 48072
    },
    {
      "epoch": 0.000293408203125,
      "step": 48072,
      "training_step_time": 0.404496431350708
    },
    {
      "epoch": 0.000293414306640625,
      "model_forward_time": 0.11560297012329102,
      "step": 48073
    },
    {
      "epoch": 0.000293414306640625,
      "step": 48073,
      "training_step_time": 0.4765775203704834
    },
    {
      "epoch": 0.00029342041015625,
      "model_forward_time": 0.11526608467102051,
      "step": 48074
    },
    {
      "epoch": 0.00029342041015625,
      "step": 48074,
      "training_step_time": 0.4170963764190674
    },
    {
      "epoch": 0.000293426513671875,
      "model_forward_time": 0.11503720283508301,
      "step": 48075
    },
    {
      "epoch": 0.000293426513671875,
      "step": 48075,
      "training_step_time": 0.42499828338623047
    },
    {
      "epoch": 0.0002934326171875,
      "model_forward_time": 0.11527895927429199,
      "step": 48076
    },
    {
      "epoch": 0.0002934326171875,
      "step": 48076,
      "training_step_time": 0.42092418670654297
    },
    {
      "epoch": 0.000293438720703125,
      "model_forward_time": 0.11523318290710449,
      "step": 48077
    },
    {
      "epoch": 0.000293438720703125,
      "step": 48077,
      "training_step_time": 0.530949592590332
    },
    {
      "epoch": 0.00029344482421875,
      "model_forward_time": 0.1152811050415039,
      "step": 48078
    },
    {
      "epoch": 0.00029344482421875,
      "step": 48078,
      "training_step_time": 0.40619945526123047
    },
    {
      "epoch": 0.000293450927734375,
      "model_forward_time": 0.11468076705932617,
      "step": 48079
    },
    {
      "epoch": 0.000293450927734375,
      "step": 48079,
      "training_step_time": 0.49169397354125977
    },
    {
      "epoch": 0.00029345703125,
      "grad_norm": 0.09955496340990067,
      "learning_rate": 1.0407947562805986e-05,
      "loss": 0.0342,
      "step": 48080
    },
    {
      "epoch": 0.00029345703125,
      "model_forward_time": 0.11501812934875488,
      "step": 48080
    },
    {
      "epoch": 0.00029345703125,
      "step": 48080,
      "training_step_time": 0.38796091079711914
    },
    {
      "epoch": 0.000293463134765625,
      "model_forward_time": 0.11501932144165039,
      "step": 48081
    },
    {
      "epoch": 0.000293463134765625,
      "step": 48081,
      "training_step_time": 0.38420820236206055
    },
    {
      "epoch": 0.00029346923828125,
      "model_forward_time": 0.11451029777526855,
      "step": 48082
    },
    {
      "epoch": 0.00029346923828125,
      "step": 48082,
      "training_step_time": 0.4993095397949219
    },
    {
      "epoch": 0.000293475341796875,
      "model_forward_time": 0.11464500427246094,
      "step": 48083
    },
    {
      "epoch": 0.000293475341796875,
      "step": 48083,
      "training_step_time": 0.43988656997680664
    },
    {
      "epoch": 0.0002934814453125,
      "model_forward_time": 0.11476874351501465,
      "step": 48084
    },
    {
      "epoch": 0.0002934814453125,
      "step": 48084,
      "training_step_time": 0.5025362968444824
    },
    {
      "epoch": 0.000293487548828125,
      "model_forward_time": 0.11568164825439453,
      "step": 48085
    },
    {
      "epoch": 0.000293487548828125,
      "step": 48085,
      "training_step_time": 0.39664554595947266
    },
    {
      "epoch": 0.00029349365234375,
      "model_forward_time": 0.11511611938476562,
      "step": 48086
    },
    {
      "epoch": 0.00029349365234375,
      "step": 48086,
      "training_step_time": 0.3772256374359131
    },
    {
      "epoch": 0.000293499755859375,
      "model_forward_time": 0.11641097068786621,
      "step": 48087
    },
    {
      "epoch": 0.000293499755859375,
      "step": 48087,
      "training_step_time": 0.44356513023376465
    },
    {
      "epoch": 0.000293505859375,
      "model_forward_time": 0.11599516868591309,
      "step": 48088
    },
    {
      "epoch": 0.000293505859375,
      "step": 48088,
      "training_step_time": 0.41286635398864746
    },
    {
      "epoch": 0.000293511962890625,
      "model_forward_time": 0.11477875709533691,
      "step": 48089
    },
    {
      "epoch": 0.000293511962890625,
      "step": 48089,
      "training_step_time": 0.41387486457824707
    },
    {
      "epoch": 0.00029351806640625,
      "grad_norm": 0.08462638407945633,
      "learning_rate": 1.0391123259715906e-05,
      "loss": 0.0394,
      "step": 48090
    },
    {
      "epoch": 0.00029351806640625,
      "model_forward_time": 0.11586809158325195,
      "step": 48090
    },
    {
      "epoch": 0.00029351806640625,
      "step": 48090,
      "training_step_time": 0.38700294494628906
    },
    {
      "epoch": 0.000293524169921875,
      "model_forward_time": 0.11439037322998047,
      "step": 48091
    },
    {
      "epoch": 0.000293524169921875,
      "step": 48091,
      "training_step_time": 0.48476338386535645
    },
    {
      "epoch": 0.0002935302734375,
      "model_forward_time": 0.1150670051574707,
      "step": 48092
    },
    {
      "epoch": 0.0002935302734375,
      "step": 48092,
      "training_step_time": 0.4158468246459961
    },
    {
      "epoch": 0.000293536376953125,
      "model_forward_time": 0.11530876159667969,
      "step": 48093
    },
    {
      "epoch": 0.000293536376953125,
      "step": 48093,
      "training_step_time": 0.49086594581604004
    },
    {
      "epoch": 0.00029354248046875,
      "model_forward_time": 0.11528158187866211,
      "step": 48094
    },
    {
      "epoch": 0.00029354248046875,
      "step": 48094,
      "training_step_time": 0.39730095863342285
    },
    {
      "epoch": 0.000293548583984375,
      "model_forward_time": 0.11454916000366211,
      "step": 48095
    },
    {
      "epoch": 0.000293548583984375,
      "step": 48095,
      "training_step_time": 0.39141082763671875
    },
    {
      "epoch": 0.0002935546875,
      "model_forward_time": 0.11513018608093262,
      "step": 48096
    },
    {
      "epoch": 0.0002935546875,
      "step": 48096,
      "training_step_time": 0.4030013084411621
    },
    {
      "epoch": 0.000293560791015625,
      "model_forward_time": 0.11524629592895508,
      "step": 48097
    },
    {
      "epoch": 0.000293560791015625,
      "step": 48097,
      "training_step_time": 0.40909767150878906
    },
    {
      "epoch": 0.00029356689453125,
      "model_forward_time": 0.11492657661437988,
      "step": 48098
    },
    {
      "epoch": 0.00029356689453125,
      "step": 48098,
      "training_step_time": 0.42969775199890137
    },
    {
      "epoch": 0.000293572998046875,
      "model_forward_time": 0.11493062973022461,
      "step": 48099
    },
    {
      "epoch": 0.000293572998046875,
      "step": 48099,
      "training_step_time": 0.4728708267211914
    },
    {
      "epoch": 0.0002935791015625,
      "grad_norm": 0.09139011800289154,
      "learning_rate": 1.0374310988756747e-05,
      "loss": 0.034,
      "step": 48100
    },
    {
      "epoch": 0.0002935791015625,
      "model_forward_time": 0.1154029369354248,
      "step": 48100
    },
    {
      "epoch": 0.0002935791015625,
      "step": 48100,
      "training_step_time": 0.4138646125793457
    },
    {
      "epoch": 0.000293585205078125,
      "model_forward_time": 0.1154184341430664,
      "step": 48101
    },
    {
      "epoch": 0.000293585205078125,
      "step": 48101,
      "training_step_time": 0.48133325576782227
    },
    {
      "epoch": 0.00029359130859375,
      "model_forward_time": 0.11536312103271484,
      "step": 48102
    },
    {
      "epoch": 0.00029359130859375,
      "step": 48102,
      "training_step_time": 0.4817821979522705
    },
    {
      "epoch": 0.000293597412109375,
      "model_forward_time": 0.11475896835327148,
      "step": 48103
    },
    {
      "epoch": 0.000293597412109375,
      "step": 48103,
      "training_step_time": 0.3965606689453125
    },
    {
      "epoch": 0.000293603515625,
      "model_forward_time": 0.11551141738891602,
      "step": 48104
    },
    {
      "epoch": 0.000293603515625,
      "step": 48104,
      "training_step_time": 0.3866429328918457
    },
    {
      "epoch": 0.000293609619140625,
      "model_forward_time": 0.11570215225219727,
      "step": 48105
    },
    {
      "epoch": 0.000293609619140625,
      "step": 48105,
      "training_step_time": 0.37642431259155273
    },
    {
      "epoch": 0.00029361572265625,
      "model_forward_time": 0.11488485336303711,
      "step": 48106
    },
    {
      "epoch": 0.00029361572265625,
      "step": 48106,
      "training_step_time": 0.4548368453979492
    },
    {
      "epoch": 0.000293621826171875,
      "model_forward_time": 0.11533308029174805,
      "step": 48107
    },
    {
      "epoch": 0.000293621826171875,
      "step": 48107,
      "training_step_time": 0.39835476875305176
    },
    {
      "epoch": 0.0002936279296875,
      "model_forward_time": 0.11516237258911133,
      "step": 48108
    },
    {
      "epoch": 0.0002936279296875,
      "step": 48108,
      "training_step_time": 0.3847818374633789
    },
    {
      "epoch": 0.000293634033203125,
      "model_forward_time": 0.11550331115722656,
      "step": 48109
    },
    {
      "epoch": 0.000293634033203125,
      "step": 48109,
      "training_step_time": 0.3901219367980957
    },
    {
      "epoch": 0.00029364013671875,
      "grad_norm": 0.09824930131435394,
      "learning_rate": 1.0357510755035677e-05,
      "loss": 0.0355,
      "step": 48110
    },
    {
      "epoch": 0.00029364013671875,
      "model_forward_time": 0.1152651309967041,
      "step": 48110
    },
    {
      "epoch": 0.00029364013671875,
      "step": 48110,
      "training_step_time": 0.3895425796508789
    },
    {
      "epoch": 0.000293646240234375,
      "model_forward_time": 0.1146860122680664,
      "step": 48111
    },
    {
      "epoch": 0.000293646240234375,
      "step": 48111,
      "training_step_time": 0.42592310905456543
    },
    {
      "epoch": 0.00029365234375,
      "model_forward_time": 0.11503481864929199,
      "step": 48112
    },
    {
      "epoch": 0.00029365234375,
      "step": 48112,
      "training_step_time": 0.40085673332214355
    },
    {
      "epoch": 0.000293658447265625,
      "model_forward_time": 0.11533284187316895,
      "step": 48113
    },
    {
      "epoch": 0.000293658447265625,
      "step": 48113,
      "training_step_time": 0.5085744857788086
    },
    {
      "epoch": 0.00029366455078125,
      "model_forward_time": 0.11558151245117188,
      "step": 48114
    },
    {
      "epoch": 0.00029366455078125,
      "step": 48114,
      "training_step_time": 0.40170717239379883
    },
    {
      "epoch": 0.000293670654296875,
      "model_forward_time": 0.11511611938476562,
      "step": 48115
    },
    {
      "epoch": 0.000293670654296875,
      "step": 48115,
      "training_step_time": 0.4164257049560547
    },
    {
      "epoch": 0.0002936767578125,
      "model_forward_time": 0.11542844772338867,
      "step": 48116
    },
    {
      "epoch": 0.0002936767578125,
      "step": 48116,
      "training_step_time": 0.3972656726837158
    },
    {
      "epoch": 0.000293682861328125,
      "model_forward_time": 0.11529421806335449,
      "step": 48117
    },
    {
      "epoch": 0.000293682861328125,
      "step": 48117,
      "training_step_time": 0.39617276191711426
    },
    {
      "epoch": 0.00029368896484375,
      "model_forward_time": 0.11530661582946777,
      "step": 48118
    },
    {
      "epoch": 0.00029368896484375,
      "step": 48118,
      "training_step_time": 0.3824028968811035
    },
    {
      "epoch": 0.000293695068359375,
      "model_forward_time": 0.11509442329406738,
      "step": 48119
    },
    {
      "epoch": 0.000293695068359375,
      "step": 48119,
      "training_step_time": 0.38536596298217773
    },
    {
      "epoch": 0.000293701171875,
      "grad_norm": 0.12864598631858826,
      "learning_rate": 1.0340722563656107e-05,
      "loss": 0.0399,
      "step": 48120
    },
    {
      "epoch": 0.000293701171875,
      "model_forward_time": 0.114471435546875,
      "step": 48120
    },
    {
      "epoch": 0.000293701171875,
      "step": 48120,
      "training_step_time": 0.46801114082336426
    },
    {
      "epoch": 0.000293707275390625,
      "model_forward_time": 0.11495661735534668,
      "step": 48121
    },
    {
      "epoch": 0.000293707275390625,
      "step": 48121,
      "training_step_time": 0.424236536026001
    },
    {
      "epoch": 0.00029371337890625,
      "model_forward_time": 0.11412930488586426,
      "step": 48122
    },
    {
      "epoch": 0.00029371337890625,
      "step": 48122,
      "training_step_time": 0.42064428329467773
    },
    {
      "epoch": 0.000293719482421875,
      "model_forward_time": 0.1155083179473877,
      "step": 48123
    },
    {
      "epoch": 0.000293719482421875,
      "step": 48123,
      "training_step_time": 0.38613462448120117
    },
    {
      "epoch": 0.0002937255859375,
      "model_forward_time": 0.11573290824890137,
      "step": 48124
    },
    {
      "epoch": 0.0002937255859375,
      "step": 48124,
      "training_step_time": 0.38978147506713867
    },
    {
      "epoch": 0.000293731689453125,
      "model_forward_time": 0.11468744277954102,
      "step": 48125
    },
    {
      "epoch": 0.000293731689453125,
      "step": 48125,
      "training_step_time": 0.3837871551513672
    },
    {
      "epoch": 0.00029373779296875,
      "model_forward_time": 0.11513805389404297,
      "step": 48126
    },
    {
      "epoch": 0.00029373779296875,
      "step": 48126,
      "training_step_time": 0.4801313877105713
    },
    {
      "epoch": 0.000293743896484375,
      "model_forward_time": 0.11497902870178223,
      "step": 48127
    },
    {
      "epoch": 0.000293743896484375,
      "step": 48127,
      "training_step_time": 0.4140655994415283
    },
    {
      "epoch": 0.00029375,
      "model_forward_time": 0.1151127815246582,
      "step": 48128
    },
    {
      "epoch": 0.00029375,
      "step": 48128,
      "training_step_time": 0.4490635395050049
    },
    {
      "epoch": 0.000293756103515625,
      "model_forward_time": 0.1148519515991211,
      "step": 48129
    },
    {
      "epoch": 0.000293756103515625,
      "step": 48129,
      "training_step_time": 0.3957853317260742
    },
    {
      "epoch": 0.00029376220703125,
      "grad_norm": 0.09476396441459656,
      "learning_rate": 1.03239464197179e-05,
      "loss": 0.0409,
      "step": 48130
    },
    {
      "epoch": 0.00029376220703125,
      "model_forward_time": 0.1150810718536377,
      "step": 48130
    },
    {
      "epoch": 0.00029376220703125,
      "step": 48130,
      "training_step_time": 0.4284069538116455
    },
    {
      "epoch": 0.000293768310546875,
      "model_forward_time": 0.11525440216064453,
      "step": 48131
    },
    {
      "epoch": 0.000293768310546875,
      "step": 48131,
      "training_step_time": 0.4105672836303711
    },
    {
      "epoch": 0.0002937744140625,
      "model_forward_time": 0.11471939086914062,
      "step": 48132
    },
    {
      "epoch": 0.0002937744140625,
      "step": 48132,
      "training_step_time": 0.456204891204834
    },
    {
      "epoch": 0.000293780517578125,
      "model_forward_time": 0.11526846885681152,
      "step": 48133
    },
    {
      "epoch": 0.000293780517578125,
      "step": 48133,
      "training_step_time": 0.38072896003723145
    },
    {
      "epoch": 0.00029378662109375,
      "model_forward_time": 0.11513185501098633,
      "step": 48134
    },
    {
      "epoch": 0.00029378662109375,
      "step": 48134,
      "training_step_time": 0.3858070373535156
    },
    {
      "epoch": 0.000293792724609375,
      "model_forward_time": 0.11462593078613281,
      "step": 48135
    },
    {
      "epoch": 0.000293792724609375,
      "step": 48135,
      "training_step_time": 0.38634467124938965
    },
    {
      "epoch": 0.000293798828125,
      "model_forward_time": 0.11518383026123047,
      "step": 48136
    },
    {
      "epoch": 0.000293798828125,
      "step": 48136,
      "training_step_time": 0.4833261966705322
    },
    {
      "epoch": 0.000293804931640625,
      "model_forward_time": 0.11545658111572266,
      "step": 48137
    },
    {
      "epoch": 0.000293804931640625,
      "step": 48137,
      "training_step_time": 0.3824036121368408
    },
    {
      "epoch": 0.00029381103515625,
      "model_forward_time": 0.11478471755981445,
      "step": 48138
    },
    {
      "epoch": 0.00029381103515625,
      "step": 48138,
      "training_step_time": 0.39275693893432617
    },
    {
      "epoch": 0.000293817138671875,
      "model_forward_time": 0.1151585578918457,
      "step": 48139
    },
    {
      "epoch": 0.000293817138671875,
      "step": 48139,
      "training_step_time": 0.3974490165710449
    },
    {
      "epoch": 0.0002938232421875,
      "grad_norm": 0.10028041154146194,
      "learning_rate": 1.0307182328317188e-05,
      "loss": 0.0338,
      "step": 48140
    },
    {
      "epoch": 0.0002938232421875,
      "model_forward_time": 0.11556577682495117,
      "step": 48140
    },
    {
      "epoch": 0.0002938232421875,
      "step": 48140,
      "training_step_time": 0.4014594554901123
    },
    {
      "epoch": 0.000293829345703125,
      "model_forward_time": 0.11542582511901855,
      "step": 48141
    },
    {
      "epoch": 0.000293829345703125,
      "step": 48141,
      "training_step_time": 0.443300724029541
    },
    {
      "epoch": 0.00029383544921875,
      "model_forward_time": 0.11504292488098145,
      "step": 48142
    },
    {
      "epoch": 0.00029383544921875,
      "step": 48142,
      "training_step_time": 0.40813279151916504
    },
    {
      "epoch": 0.000293841552734375,
      "model_forward_time": 0.1145317554473877,
      "step": 48143
    },
    {
      "epoch": 0.000293841552734375,
      "step": 48143,
      "training_step_time": 0.46820616722106934
    },
    {
      "epoch": 0.00029384765625,
      "model_forward_time": 0.11578774452209473,
      "step": 48144
    },
    {
      "epoch": 0.00029384765625,
      "step": 48144,
      "training_step_time": 0.4214053153991699
    },
    {
      "epoch": 0.000293853759765625,
      "model_forward_time": 0.1156015396118164,
      "step": 48145
    },
    {
      "epoch": 0.000293853759765625,
      "step": 48145,
      "training_step_time": 0.3940877914428711
    },
    {
      "epoch": 0.00029385986328125,
      "model_forward_time": 0.11500048637390137,
      "step": 48146
    },
    {
      "epoch": 0.00029385986328125,
      "step": 48146,
      "training_step_time": 0.3954031467437744
    },
    {
      "epoch": 0.000293865966796875,
      "model_forward_time": 0.11548876762390137,
      "step": 48147
    },
    {
      "epoch": 0.000293865966796875,
      "step": 48147,
      "training_step_time": 0.44985175132751465
    },
    {
      "epoch": 0.0002938720703125,
      "model_forward_time": 0.11553001403808594,
      "step": 48148
    },
    {
      "epoch": 0.0002938720703125,
      "step": 48148,
      "training_step_time": 0.3991556167602539
    },
    {
      "epoch": 0.000293878173828125,
      "model_forward_time": 0.1158454418182373,
      "step": 48149
    },
    {
      "epoch": 0.000293878173828125,
      "step": 48149,
      "training_step_time": 0.48262691497802734
    },
    {
      "epoch": 0.00029388427734375,
      "grad_norm": 0.11561199277639389,
      "learning_rate": 1.0290430294546449e-05,
      "loss": 0.0391,
      "step": 48150
    },
    {
      "epoch": 0.00029388427734375,
      "model_forward_time": 0.1149146556854248,
      "step": 48150
    },
    {
      "epoch": 0.00029388427734375,
      "step": 48150,
      "training_step_time": 0.41376638412475586
    },
    {
      "epoch": 0.000293890380859375,
      "model_forward_time": 0.1150813102722168,
      "step": 48151
    },
    {
      "epoch": 0.000293890380859375,
      "step": 48151,
      "training_step_time": 0.41907763481140137
    },
    {
      "epoch": 0.000293896484375,
      "model_forward_time": 0.11466741561889648,
      "step": 48152
    },
    {
      "epoch": 0.000293896484375,
      "step": 48152,
      "training_step_time": 0.38453102111816406
    },
    {
      "epoch": 0.000293902587890625,
      "model_forward_time": 0.11464571952819824,
      "step": 48153
    },
    {
      "epoch": 0.000293902587890625,
      "step": 48153,
      "training_step_time": 0.38318657875061035
    },
    {
      "epoch": 0.00029390869140625,
      "model_forward_time": 0.1149749755859375,
      "step": 48154
    },
    {
      "epoch": 0.00029390869140625,
      "step": 48154,
      "training_step_time": 0.39186882972717285
    },
    {
      "epoch": 0.000293914794921875,
      "model_forward_time": 0.11488580703735352,
      "step": 48155
    },
    {
      "epoch": 0.000293914794921875,
      "step": 48155,
      "training_step_time": 0.38773369789123535
    },
    {
      "epoch": 0.0002939208984375,
      "model_forward_time": 0.11475586891174316,
      "step": 48156
    },
    {
      "epoch": 0.0002939208984375,
      "step": 48156,
      "training_step_time": 0.5041549205780029
    },
    {
      "epoch": 0.000293927001953125,
      "model_forward_time": 0.11471319198608398,
      "step": 48157
    },
    {
      "epoch": 0.000293927001953125,
      "step": 48157,
      "training_step_time": 0.4361763000488281
    },
    {
      "epoch": 0.00029393310546875,
      "model_forward_time": 0.11493921279907227,
      "step": 48158
    },
    {
      "epoch": 0.00029393310546875,
      "step": 48158,
      "training_step_time": 0.4684333801269531
    },
    {
      "epoch": 0.000293939208984375,
      "model_forward_time": 0.11551618576049805,
      "step": 48159
    },
    {
      "epoch": 0.000293939208984375,
      "step": 48159,
      "training_step_time": 0.3650333881378174
    },
    {
      "epoch": 0.0002939453125,
      "grad_norm": 0.10104399919509888,
      "learning_rate": 1.0273690323494523e-05,
      "loss": 0.0371,
      "step": 48160
    },
    {
      "epoch": 0.0002939453125,
      "model_forward_time": 0.11530542373657227,
      "step": 48160
    },
    {
      "epoch": 0.0002939453125,
      "step": 48160,
      "training_step_time": 0.4465017318725586
    },
    {
      "epoch": 0.000293951416015625,
      "model_forward_time": 0.11501789093017578,
      "step": 48161
    },
    {
      "epoch": 0.000293951416015625,
      "step": 48161,
      "training_step_time": 0.4381980895996094
    },
    {
      "epoch": 0.00029395751953125,
      "model_forward_time": 0.11436700820922852,
      "step": 48162
    },
    {
      "epoch": 0.00029395751953125,
      "step": 48162,
      "training_step_time": 0.40199756622314453
    },
    {
      "epoch": 0.000293963623046875,
      "model_forward_time": 0.11451888084411621,
      "step": 48163
    },
    {
      "epoch": 0.000293963623046875,
      "step": 48163,
      "training_step_time": 0.42692065238952637
    },
    {
      "epoch": 0.0002939697265625,
      "model_forward_time": 0.11454415321350098,
      "step": 48164
    },
    {
      "epoch": 0.0002939697265625,
      "step": 48164,
      "training_step_time": 0.38571715354919434
    },
    {
      "epoch": 0.000293975830078125,
      "model_forward_time": 0.11565423011779785,
      "step": 48165
    },
    {
      "epoch": 0.000293975830078125,
      "step": 48165,
      "training_step_time": 0.4425826072692871
    },
    {
      "epoch": 0.00029398193359375,
      "model_forward_time": 0.1151423454284668,
      "step": 48166
    },
    {
      "epoch": 0.00029398193359375,
      "step": 48166,
      "training_step_time": 0.4000587463378906
    },
    {
      "epoch": 0.000293988037109375,
      "model_forward_time": 0.11456799507141113,
      "step": 48167
    },
    {
      "epoch": 0.000293988037109375,
      "step": 48167,
      "training_step_time": 0.39528369903564453
    },
    {
      "epoch": 0.000293994140625,
      "model_forward_time": 0.11519360542297363,
      "step": 48168
    },
    {
      "epoch": 0.000293994140625,
      "step": 48168,
      "training_step_time": 0.5884897708892822
    },
    {
      "epoch": 0.000294000244140625,
      "model_forward_time": 0.11559104919433594,
      "step": 48169
    },
    {
      "epoch": 0.000294000244140625,
      "step": 48169,
      "training_step_time": 0.3884856700897217
    },
    {
      "epoch": 0.00029400634765625,
      "grad_norm": 0.11659897118806839,
      "learning_rate": 1.0256962420246557e-05,
      "loss": 0.0364,
      "step": 48170
    },
    {
      "epoch": 0.00029400634765625,
      "model_forward_time": 0.1149437427520752,
      "step": 48170
    },
    {
      "epoch": 0.00029400634765625,
      "step": 48170,
      "training_step_time": 0.4486255645751953
    },
    {
      "epoch": 0.000294012451171875,
      "model_forward_time": 0.11516594886779785,
      "step": 48171
    },
    {
      "epoch": 0.000294012451171875,
      "step": 48171,
      "training_step_time": 0.4128684997558594
    },
    {
      "epoch": 0.0002940185546875,
      "model_forward_time": 0.11455345153808594,
      "step": 48172
    },
    {
      "epoch": 0.0002940185546875,
      "step": 48172,
      "training_step_time": 0.46993517875671387
    },
    {
      "epoch": 0.000294024658203125,
      "model_forward_time": 0.11434674263000488,
      "step": 48173
    },
    {
      "epoch": 0.000294024658203125,
      "step": 48173,
      "training_step_time": 0.361112117767334
    },
    {
      "epoch": 0.00029403076171875,
      "model_forward_time": 0.114715576171875,
      "step": 48174
    },
    {
      "epoch": 0.00029403076171875,
      "step": 48174,
      "training_step_time": 0.43977880477905273
    },
    {
      "epoch": 0.000294036865234375,
      "model_forward_time": 0.114990234375,
      "step": 48175
    },
    {
      "epoch": 0.000294036865234375,
      "step": 48175,
      "training_step_time": 0.4120464324951172
    },
    {
      "epoch": 0.00029404296875,
      "model_forward_time": 0.11464834213256836,
      "step": 48176
    },
    {
      "epoch": 0.00029404296875,
      "step": 48176,
      "training_step_time": 0.39721155166625977
    },
    {
      "epoch": 0.000294049072265625,
      "model_forward_time": 0.1149601936340332,
      "step": 48177
    },
    {
      "epoch": 0.000294049072265625,
      "step": 48177,
      "training_step_time": 0.4228661060333252
    },
    {
      "epoch": 0.00029405517578125,
      "model_forward_time": 0.11436152458190918,
      "step": 48178
    },
    {
      "epoch": 0.00029405517578125,
      "step": 48178,
      "training_step_time": 0.4197216033935547
    },
    {
      "epoch": 0.000294061279296875,
      "model_forward_time": 0.11445379257202148,
      "step": 48179
    },
    {
      "epoch": 0.000294061279296875,
      "step": 48179,
      "training_step_time": 0.4891674518585205
    },
    {
      "epoch": 0.0002940673828125,
      "grad_norm": 0.07914839684963226,
      "learning_rate": 1.0240246589884044e-05,
      "loss": 0.0353,
      "step": 48180
    },
    {
      "epoch": 0.0002940673828125,
      "model_forward_time": 0.11503386497497559,
      "step": 48180
    },
    {
      "epoch": 0.0002940673828125,
      "step": 48180,
      "training_step_time": 0.4557769298553467
    },
    {
      "epoch": 0.000294073486328125,
      "model_forward_time": 0.11501526832580566,
      "step": 48181
    },
    {
      "epoch": 0.000294073486328125,
      "step": 48181,
      "training_step_time": 0.3922913074493408
    },
    {
      "epoch": 0.00029407958984375,
      "model_forward_time": 0.1146087646484375,
      "step": 48182
    },
    {
      "epoch": 0.00029407958984375,
      "step": 48182,
      "training_step_time": 0.3937389850616455
    },
    {
      "epoch": 0.000294085693359375,
      "model_forward_time": 0.1151118278503418,
      "step": 48183
    },
    {
      "epoch": 0.000294085693359375,
      "step": 48183,
      "training_step_time": 0.3868417739868164
    },
    {
      "epoch": 0.000294091796875,
      "model_forward_time": 0.11502814292907715,
      "step": 48184
    },
    {
      "epoch": 0.000294091796875,
      "step": 48184,
      "training_step_time": 0.4156181812286377
    },
    {
      "epoch": 0.000294097900390625,
      "model_forward_time": 0.11496877670288086,
      "step": 48185
    },
    {
      "epoch": 0.000294097900390625,
      "step": 48185,
      "training_step_time": 0.408951997756958
    },
    {
      "epoch": 0.00029410400390625,
      "model_forward_time": 0.11490082740783691,
      "step": 48186
    },
    {
      "epoch": 0.00029410400390625,
      "step": 48186,
      "training_step_time": 0.5802404880523682
    },
    {
      "epoch": 0.000294110107421875,
      "model_forward_time": 0.1148679256439209,
      "step": 48187
    },
    {
      "epoch": 0.000294110107421875,
      "step": 48187,
      "training_step_time": 0.3657076358795166
    },
    {
      "epoch": 0.0002941162109375,
      "model_forward_time": 0.11435604095458984,
      "step": 48188
    },
    {
      "epoch": 0.0002941162109375,
      "step": 48188,
      "training_step_time": 0.40913939476013184
    },
    {
      "epoch": 0.000294122314453125,
      "model_forward_time": 0.11503052711486816,
      "step": 48189
    },
    {
      "epoch": 0.000294122314453125,
      "step": 48189,
      "training_step_time": 0.43458056449890137
    },
    {
      "epoch": 0.00029412841796875,
      "grad_norm": 0.11059177666902542,
      "learning_rate": 1.0223542837484839e-05,
      "loss": 0.0402,
      "step": 48190
    },
    {
      "epoch": 0.00029412841796875,
      "model_forward_time": 0.11467742919921875,
      "step": 48190
    },
    {
      "epoch": 0.00029412841796875,
      "step": 48190,
      "training_step_time": 0.3800981044769287
    },
    {
      "epoch": 0.000294134521484375,
      "model_forward_time": 0.11461687088012695,
      "step": 48191
    },
    {
      "epoch": 0.000294134521484375,
      "step": 48191,
      "training_step_time": 0.3881380558013916
    },
    {
      "epoch": 0.000294140625,
      "model_forward_time": 0.11511826515197754,
      "step": 48192
    },
    {
      "epoch": 0.000294140625,
      "step": 48192,
      "training_step_time": 0.5672438144683838
    },
    {
      "epoch": 0.000294146728515625,
      "model_forward_time": 0.11500239372253418,
      "step": 48193
    },
    {
      "epoch": 0.000294146728515625,
      "step": 48193,
      "training_step_time": 0.40436720848083496
    },
    {
      "epoch": 0.00029415283203125,
      "model_forward_time": 0.11463475227355957,
      "step": 48194
    },
    {
      "epoch": 0.00029415283203125,
      "step": 48194,
      "training_step_time": 0.3869919776916504
    },
    {
      "epoch": 0.000294158935546875,
      "model_forward_time": 0.1148383617401123,
      "step": 48195
    },
    {
      "epoch": 0.000294158935546875,
      "step": 48195,
      "training_step_time": 0.38460683822631836
    },
    {
      "epoch": 0.0002941650390625,
      "model_forward_time": 0.11464834213256836,
      "step": 48196
    },
    {
      "epoch": 0.0002941650390625,
      "step": 48196,
      "training_step_time": 0.38620972633361816
    },
    {
      "epoch": 0.000294171142578125,
      "model_forward_time": 0.11522126197814941,
      "step": 48197
    },
    {
      "epoch": 0.000294171142578125,
      "step": 48197,
      "training_step_time": 0.3855562210083008
    },
    {
      "epoch": 0.00029417724609375,
      "model_forward_time": 0.11493492126464844,
      "step": 48198
    },
    {
      "epoch": 0.00029417724609375,
      "step": 48198,
      "training_step_time": 0.657822847366333
    },
    {
      "epoch": 0.000294183349609375,
      "model_forward_time": 0.11471009254455566,
      "step": 48199
    },
    {
      "epoch": 0.000294183349609375,
      "step": 48199,
      "training_step_time": 0.4221808910369873
    },
    {
      "epoch": 0.000294189453125,
      "grad_norm": 0.12121264636516571,
      "learning_rate": 1.0206851168123077e-05,
      "loss": 0.0379,
      "step": 48200
    },
    {
      "epoch": 0.000294189453125,
      "model_forward_time": 0.11480283737182617,
      "step": 48200
    },
    {
      "epoch": 0.000294189453125,
      "step": 48200,
      "training_step_time": 0.45894408226013184
    },
    {
      "epoch": 0.000294195556640625,
      "model_forward_time": 0.11494803428649902,
      "step": 48201
    },
    {
      "epoch": 0.000294195556640625,
      "step": 48201,
      "training_step_time": 0.37602782249450684
    },
    {
      "epoch": 0.00029420166015625,
      "model_forward_time": 0.11413431167602539,
      "step": 48202
    },
    {
      "epoch": 0.00029420166015625,
      "step": 48202,
      "training_step_time": 0.40660762786865234
    },
    {
      "epoch": 0.000294207763671875,
      "model_forward_time": 0.11460137367248535,
      "step": 48203
    },
    {
      "epoch": 0.000294207763671875,
      "step": 48203,
      "training_step_time": 0.4231681823730469
    },
    {
      "epoch": 0.0002942138671875,
      "model_forward_time": 0.11487412452697754,
      "step": 48204
    },
    {
      "epoch": 0.0002942138671875,
      "step": 48204,
      "training_step_time": 0.40043187141418457
    },
    {
      "epoch": 0.000294219970703125,
      "model_forward_time": 0.11487126350402832,
      "step": 48205
    },
    {
      "epoch": 0.000294219970703125,
      "step": 48205,
      "training_step_time": 0.44234228134155273
    },
    {
      "epoch": 0.00029422607421875,
      "model_forward_time": 0.11532878875732422,
      "step": 48206
    },
    {
      "epoch": 0.00029422607421875,
      "step": 48206,
      "training_step_time": 0.38840818405151367
    },
    {
      "epoch": 0.000294232177734375,
      "model_forward_time": 0.11454606056213379,
      "step": 48207
    },
    {
      "epoch": 0.000294232177734375,
      "step": 48207,
      "training_step_time": 0.4614078998565674
    },
    {
      "epoch": 0.00029423828125,
      "model_forward_time": 0.11492133140563965,
      "step": 48208
    },
    {
      "epoch": 0.00029423828125,
      "step": 48208,
      "training_step_time": 0.3870081901550293
    },
    {
      "epoch": 0.000294244384765625,
      "model_forward_time": 0.11543965339660645,
      "step": 48209
    },
    {
      "epoch": 0.000294244384765625,
      "step": 48209,
      "training_step_time": 0.3890669345855713
    },
    {
      "epoch": 0.00029425048828125,
      "grad_norm": 0.07428821921348572,
      "learning_rate": 1.0190171586869258e-05,
      "loss": 0.0326,
      "step": 48210
    },
    {
      "epoch": 0.00029425048828125,
      "model_forward_time": 0.11536836624145508,
      "step": 48210
    },
    {
      "epoch": 0.00029425048828125,
      "step": 48210,
      "training_step_time": 0.5391101837158203
    },
    {
      "epoch": 0.000294256591796875,
      "model_forward_time": 0.1150510311126709,
      "step": 48211
    },
    {
      "epoch": 0.000294256591796875,
      "step": 48211,
      "training_step_time": 0.41225242614746094
    },
    {
      "epoch": 0.0002942626953125,
      "model_forward_time": 0.11517167091369629,
      "step": 48212
    },
    {
      "epoch": 0.0002942626953125,
      "step": 48212,
      "training_step_time": 0.4018063545227051
    },
    {
      "epoch": 0.000294268798828125,
      "model_forward_time": 0.11483478546142578,
      "step": 48213
    },
    {
      "epoch": 0.000294268798828125,
      "step": 48213,
      "training_step_time": 0.4343585968017578
    },
    {
      "epoch": 0.00029427490234375,
      "model_forward_time": 0.11468839645385742,
      "step": 48214
    },
    {
      "epoch": 0.00029427490234375,
      "step": 48214,
      "training_step_time": 0.43816447257995605
    },
    {
      "epoch": 0.000294281005859375,
      "model_forward_time": 0.11512613296508789,
      "step": 48215
    },
    {
      "epoch": 0.000294281005859375,
      "step": 48215,
      "training_step_time": 0.46433210372924805
    },
    {
      "epoch": 0.000294287109375,
      "model_forward_time": 0.11488127708435059,
      "step": 48216
    },
    {
      "epoch": 0.000294287109375,
      "step": 48216,
      "training_step_time": 0.5305380821228027
    },
    {
      "epoch": 0.000294293212890625,
      "model_forward_time": 0.11428332328796387,
      "step": 48217
    },
    {
      "epoch": 0.000294293212890625,
      "step": 48217,
      "training_step_time": 0.4445841312408447
    },
    {
      "epoch": 0.00029429931640625,
      "model_forward_time": 0.11456418037414551,
      "step": 48218
    },
    {
      "epoch": 0.00029429931640625,
      "step": 48218,
      "training_step_time": 0.3836517333984375
    },
    {
      "epoch": 0.000294305419921875,
      "model_forward_time": 0.11448454856872559,
      "step": 48219
    },
    {
      "epoch": 0.000294305419921875,
      "step": 48219,
      "training_step_time": 0.4077272415161133
    },
    {
      "epoch": 0.0002943115234375,
      "grad_norm": 0.095107302069664,
      "learning_rate": 1.0173504098790187e-05,
      "loss": 0.039,
      "step": 48220
    },
    {
      "epoch": 0.0002943115234375,
      "model_forward_time": 0.11475801467895508,
      "step": 48220
    },
    {
      "epoch": 0.0002943115234375,
      "step": 48220,
      "training_step_time": 0.4591495990753174
    },
    {
      "epoch": 0.000294317626953125,
      "model_forward_time": 0.11456131935119629,
      "step": 48221
    },
    {
      "epoch": 0.000294317626953125,
      "step": 48221,
      "training_step_time": 0.4042398929595947
    },
    {
      "epoch": 0.00029432373046875,
      "model_forward_time": 0.11452865600585938,
      "step": 48222
    },
    {
      "epoch": 0.00029432373046875,
      "step": 48222,
      "training_step_time": 0.5046882629394531
    },
    {
      "epoch": 0.000294329833984375,
      "model_forward_time": 0.1150510311126709,
      "step": 48223
    },
    {
      "epoch": 0.000294329833984375,
      "step": 48223,
      "training_step_time": 0.4010159969329834
    },
    {
      "epoch": 0.0002943359375,
      "model_forward_time": 0.11479878425598145,
      "step": 48224
    },
    {
      "epoch": 0.0002943359375,
      "step": 48224,
      "training_step_time": 0.3840479850769043
    },
    {
      "epoch": 0.000294342041015625,
      "model_forward_time": 0.11441564559936523,
      "step": 48225
    },
    {
      "epoch": 0.000294342041015625,
      "step": 48225,
      "training_step_time": 0.38213467597961426
    },
    {
      "epoch": 0.00029434814453125,
      "model_forward_time": 0.11521553993225098,
      "step": 48226
    },
    {
      "epoch": 0.00029434814453125,
      "step": 48226,
      "training_step_time": 0.3805546760559082
    },
    {
      "epoch": 0.000294354248046875,
      "model_forward_time": 0.11513566970825195,
      "step": 48227
    },
    {
      "epoch": 0.000294354248046875,
      "step": 48227,
      "training_step_time": 0.48081541061401367
    },
    {
      "epoch": 0.0002943603515625,
      "model_forward_time": 0.11460471153259277,
      "step": 48228
    },
    {
      "epoch": 0.0002943603515625,
      "step": 48228,
      "training_step_time": 0.5499396324157715
    },
    {
      "epoch": 0.000294366455078125,
      "model_forward_time": 0.11453819274902344,
      "step": 48229
    },
    {
      "epoch": 0.000294366455078125,
      "step": 48229,
      "training_step_time": 0.36583423614501953
    },
    {
      "epoch": 0.00029437255859375,
      "grad_norm": 0.1469218134880066,
      "learning_rate": 1.0156848708949006e-05,
      "loss": 0.0343,
      "step": 48230
    },
    {
      "epoch": 0.00029437255859375,
      "model_forward_time": 0.11414456367492676,
      "step": 48230
    },
    {
      "epoch": 0.00029437255859375,
      "step": 48230,
      "training_step_time": 0.3859729766845703
    },
    {
      "epoch": 0.000294378662109375,
      "model_forward_time": 0.11480903625488281,
      "step": 48231
    },
    {
      "epoch": 0.000294378662109375,
      "step": 48231,
      "training_step_time": 0.4610886573791504
    },
    {
      "epoch": 0.000294384765625,
      "model_forward_time": 0.11473250389099121,
      "step": 48232
    },
    {
      "epoch": 0.000294384765625,
      "step": 48232,
      "training_step_time": 0.39215540885925293
    },
    {
      "epoch": 0.000294390869140625,
      "model_forward_time": 0.11417293548583984,
      "step": 48233
    },
    {
      "epoch": 0.000294390869140625,
      "step": 48233,
      "training_step_time": 0.4298410415649414
    },
    {
      "epoch": 0.00029439697265625,
      "model_forward_time": 0.11541938781738281,
      "step": 48234
    },
    {
      "epoch": 0.00029439697265625,
      "step": 48234,
      "training_step_time": 0.5025730133056641
    },
    {
      "epoch": 0.000294403076171875,
      "model_forward_time": 0.11484026908874512,
      "step": 48235
    },
    {
      "epoch": 0.000294403076171875,
      "step": 48235,
      "training_step_time": 0.4002649784088135
    },
    {
      "epoch": 0.0002944091796875,
      "model_forward_time": 0.11471080780029297,
      "step": 48236
    },
    {
      "epoch": 0.0002944091796875,
      "step": 48236,
      "training_step_time": 0.4292902946472168
    },
    {
      "epoch": 0.000294415283203125,
      "model_forward_time": 0.11549782752990723,
      "step": 48237
    },
    {
      "epoch": 0.000294415283203125,
      "step": 48237,
      "training_step_time": 0.3994131088256836
    },
    {
      "epoch": 0.00029442138671875,
      "model_forward_time": 0.11553502082824707,
      "step": 48238
    },
    {
      "epoch": 0.00029442138671875,
      "step": 48238,
      "training_step_time": 0.38010549545288086
    },
    {
      "epoch": 0.000294427490234375,
      "model_forward_time": 0.11459469795227051,
      "step": 48239
    },
    {
      "epoch": 0.000294427490234375,
      "step": 48239,
      "training_step_time": 0.3950798511505127
    },
    {
      "epoch": 0.00029443359375,
      "grad_norm": 0.10546459257602692,
      "learning_rate": 1.0140205422405214e-05,
      "loss": 0.036,
      "step": 48240
    },
    {
      "epoch": 0.00029443359375,
      "model_forward_time": 0.11449742317199707,
      "step": 48240
    },
    {
      "epoch": 0.00029443359375,
      "step": 48240,
      "training_step_time": 0.5670733451843262
    },
    {
      "epoch": 0.000294439697265625,
      "model_forward_time": 0.1143486499786377,
      "step": 48241
    },
    {
      "epoch": 0.000294439697265625,
      "step": 48241,
      "training_step_time": 0.419968843460083
    },
    {
      "epoch": 0.00029444580078125,
      "model_forward_time": 0.11474156379699707,
      "step": 48242
    },
    {
      "epoch": 0.00029444580078125,
      "step": 48242,
      "training_step_time": 0.41566038131713867
    },
    {
      "epoch": 0.000294451904296875,
      "model_forward_time": 0.11449742317199707,
      "step": 48243
    },
    {
      "epoch": 0.000294451904296875,
      "step": 48243,
      "training_step_time": 0.40194082260131836
    },
    {
      "epoch": 0.0002944580078125,
      "model_forward_time": 0.11560797691345215,
      "step": 48244
    },
    {
      "epoch": 0.0002944580078125,
      "step": 48244,
      "training_step_time": 0.43636417388916016
    },
    {
      "epoch": 0.000294464111328125,
      "model_forward_time": 0.11494636535644531,
      "step": 48245
    },
    {
      "epoch": 0.000294464111328125,
      "step": 48245,
      "training_step_time": 0.39165782928466797
    },
    {
      "epoch": 0.00029447021484375,
      "model_forward_time": 0.11525774002075195,
      "step": 48246
    },
    {
      "epoch": 0.00029447021484375,
      "step": 48246,
      "training_step_time": 0.5311121940612793
    },
    {
      "epoch": 0.000294476318359375,
      "model_forward_time": 0.1155691146850586,
      "step": 48247
    },
    {
      "epoch": 0.000294476318359375,
      "step": 48247,
      "training_step_time": 0.3930065631866455
    },
    {
      "epoch": 0.000294482421875,
      "model_forward_time": 0.11587643623352051,
      "step": 48248
    },
    {
      "epoch": 0.000294482421875,
      "step": 48248,
      "training_step_time": 0.3847346305847168
    },
    {
      "epoch": 0.000294488525390625,
      "model_forward_time": 0.11586380004882812,
      "step": 48249
    },
    {
      "epoch": 0.000294488525390625,
      "step": 48249,
      "training_step_time": 0.39531517028808594
    },
    {
      "epoch": 0.00029449462890625,
      "grad_norm": 0.09905614703893661,
      "learning_rate": 1.012357424421455e-05,
      "loss": 0.0353,
      "step": 48250
    },
    {
      "epoch": 0.00029449462890625,
      "model_forward_time": 0.11578869819641113,
      "step": 48250
    },
    {
      "epoch": 0.00029449462890625,
      "step": 48250,
      "training_step_time": 0.3891603946685791
    },
    {
      "epoch": 0.000294500732421875,
      "model_forward_time": 0.11552715301513672,
      "step": 48251
    },
    {
      "epoch": 0.000294500732421875,
      "step": 48251,
      "training_step_time": 0.4320998191833496
    },
    {
      "epoch": 0.0002945068359375,
      "model_forward_time": 0.11536812782287598,
      "step": 48252
    },
    {
      "epoch": 0.0002945068359375,
      "step": 48252,
      "training_step_time": 1.0315287113189697
    },
    {
      "epoch": 0.000294512939453125,
      "model_forward_time": 0.11392641067504883,
      "step": 48253
    },
    {
      "epoch": 0.000294512939453125,
      "step": 48253,
      "training_step_time": 0.38632702827453613
    },
    {
      "epoch": 0.00029451904296875,
      "model_forward_time": 0.11362409591674805,
      "step": 48254
    },
    {
      "epoch": 0.00029451904296875,
      "step": 48254,
      "training_step_time": 0.3819084167480469
    },
    {
      "epoch": 0.000294525146484375,
      "model_forward_time": 0.11386775970458984,
      "step": 48255
    },
    {
      "epoch": 0.000294525146484375,
      "step": 48255,
      "training_step_time": 0.3801865577697754
    },
    {
      "epoch": 0.00029453125,
      "model_forward_time": 0.11398959159851074,
      "step": 48256
    },
    {
      "epoch": 0.00029453125,
      "step": 48256,
      "training_step_time": 0.4085056781768799
    },
    {
      "epoch": 0.000294537353515625,
      "model_forward_time": 0.1142425537109375,
      "step": 48257
    },
    {
      "epoch": 0.000294537353515625,
      "step": 48257,
      "training_step_time": 0.4437837600708008
    },
    {
      "epoch": 0.00029454345703125,
      "model_forward_time": 0.11540555953979492,
      "step": 48258
    },
    {
      "epoch": 0.00029454345703125,
      "step": 48258,
      "training_step_time": 0.602245569229126
    },
    {
      "epoch": 0.000294549560546875,
      "model_forward_time": 0.11553788185119629,
      "step": 48259
    },
    {
      "epoch": 0.000294549560546875,
      "step": 48259,
      "training_step_time": 0.4031558036804199
    },
    {
      "epoch": 0.0002945556640625,
      "grad_norm": 0.08535050600767136,
      "learning_rate": 1.0106955179429183e-05,
      "loss": 0.037,
      "step": 48260
    },
    {
      "epoch": 0.0002945556640625,
      "model_forward_time": 0.1177358627319336,
      "step": 48260
    },
    {
      "epoch": 0.0002945556640625,
      "step": 48260,
      "training_step_time": 0.3782968521118164
    },
    {
      "epoch": 0.000294561767578125,
      "model_forward_time": 0.11530685424804688,
      "step": 48261
    },
    {
      "epoch": 0.000294561767578125,
      "step": 48261,
      "training_step_time": 0.4537315368652344
    },
    {
      "epoch": 0.00029456787109375,
      "model_forward_time": 0.11452841758728027,
      "step": 48262
    },
    {
      "epoch": 0.00029456787109375,
      "step": 48262,
      "training_step_time": 0.4410972595214844
    },
    {
      "epoch": 0.000294573974609375,
      "model_forward_time": 0.11477947235107422,
      "step": 48263
    },
    {
      "epoch": 0.000294573974609375,
      "step": 48263,
      "training_step_time": 0.4181826114654541
    },
    {
      "epoch": 0.000294580078125,
      "model_forward_time": 0.11587023735046387,
      "step": 48264
    },
    {
      "epoch": 0.000294580078125,
      "step": 48264,
      "training_step_time": 0.5612471103668213
    },
    {
      "epoch": 0.000294586181640625,
      "model_forward_time": 0.11475753784179688,
      "step": 48265
    },
    {
      "epoch": 0.000294586181640625,
      "step": 48265,
      "training_step_time": 0.40128207206726074
    },
    {
      "epoch": 0.00029459228515625,
      "model_forward_time": 0.11501097679138184,
      "step": 48266
    },
    {
      "epoch": 0.00029459228515625,
      "step": 48266,
      "training_step_time": 0.3962571620941162
    },
    {
      "epoch": 0.000294598388671875,
      "model_forward_time": 0.11481404304504395,
      "step": 48267
    },
    {
      "epoch": 0.000294598388671875,
      "step": 48267,
      "training_step_time": 0.4182779788970947
    },
    {
      "epoch": 0.0002946044921875,
      "model_forward_time": 0.11571145057678223,
      "step": 48268
    },
    {
      "epoch": 0.0002946044921875,
      "step": 48268,
      "training_step_time": 0.3955345153808594
    },
    {
      "epoch": 0.000294610595703125,
      "model_forward_time": 0.11453700065612793,
      "step": 48269
    },
    {
      "epoch": 0.000294610595703125,
      "step": 48269,
      "training_step_time": 0.469606876373291
    },
    {
      "epoch": 0.00029461669921875,
      "grad_norm": 0.11081652343273163,
      "learning_rate": 1.009034823309749e-05,
      "loss": 0.0378,
      "step": 48270
    },
    {
      "epoch": 0.00029461669921875,
      "model_forward_time": 0.11572813987731934,
      "step": 48270
    },
    {
      "epoch": 0.00029461669921875,
      "step": 48270,
      "training_step_time": 0.6585311889648438
    },
    {
      "epoch": 0.000294622802734375,
      "model_forward_time": 0.11476707458496094,
      "step": 48271
    },
    {
      "epoch": 0.000294622802734375,
      "step": 48271,
      "training_step_time": 0.4756324291229248
    },
    {
      "epoch": 0.00029462890625,
      "model_forward_time": 0.11479306221008301,
      "step": 48272
    },
    {
      "epoch": 0.00029462890625,
      "step": 48272,
      "training_step_time": 0.4020073413848877
    },
    {
      "epoch": 0.000294635009765625,
      "model_forward_time": 0.11482763290405273,
      "step": 48273
    },
    {
      "epoch": 0.000294635009765625,
      "step": 48273,
      "training_step_time": 0.4028773307800293
    },
    {
      "epoch": 0.00029464111328125,
      "model_forward_time": 0.11437511444091797,
      "step": 48274
    },
    {
      "epoch": 0.00029464111328125,
      "step": 48274,
      "training_step_time": 0.46305394172668457
    },
    {
      "epoch": 0.000294647216796875,
      "model_forward_time": 0.11427783966064453,
      "step": 48275
    },
    {
      "epoch": 0.000294647216796875,
      "step": 48275,
      "training_step_time": 0.40894436836242676
    },
    {
      "epoch": 0.0002946533203125,
      "model_forward_time": 0.11494064331054688,
      "step": 48276
    },
    {
      "epoch": 0.0002946533203125,
      "step": 48276,
      "training_step_time": 0.5079195499420166
    },
    {
      "epoch": 0.000294659423828125,
      "model_forward_time": 0.11489629745483398,
      "step": 48277
    },
    {
      "epoch": 0.000294659423828125,
      "step": 48277,
      "training_step_time": 0.3899717330932617
    },
    {
      "epoch": 0.00029466552734375,
      "model_forward_time": 0.11488533020019531,
      "step": 48278
    },
    {
      "epoch": 0.00029466552734375,
      "step": 48278,
      "training_step_time": 0.4058089256286621
    },
    {
      "epoch": 0.000294671630859375,
      "model_forward_time": 0.1149897575378418,
      "step": 48279
    },
    {
      "epoch": 0.000294671630859375,
      "step": 48279,
      "training_step_time": 0.4025111198425293
    },
    {
      "epoch": 0.000294677734375,
      "grad_norm": 0.08553601801395416,
      "learning_rate": 1.0073753410264263e-05,
      "loss": 0.0321,
      "step": 48280
    },
    {
      "epoch": 0.000294677734375,
      "model_forward_time": 0.11449050903320312,
      "step": 48280
    },
    {
      "epoch": 0.000294677734375,
      "step": 48280,
      "training_step_time": 0.3839566707611084
    },
    {
      "epoch": 0.000294683837890625,
      "model_forward_time": 0.11548662185668945,
      "step": 48281
    },
    {
      "epoch": 0.000294683837890625,
      "step": 48281,
      "training_step_time": 0.3934948444366455
    },
    {
      "epoch": 0.00029468994140625,
      "model_forward_time": 0.11513876914978027,
      "step": 48282
    },
    {
      "epoch": 0.00029468994140625,
      "step": 48282,
      "training_step_time": 0.6619393825531006
    },
    {
      "epoch": 0.000294696044921875,
      "model_forward_time": 0.11486649513244629,
      "step": 48283
    },
    {
      "epoch": 0.000294696044921875,
      "step": 48283,
      "training_step_time": 0.3999197483062744
    },
    {
      "epoch": 0.0002947021484375,
      "model_forward_time": 0.11538839340209961,
      "step": 48284
    },
    {
      "epoch": 0.0002947021484375,
      "step": 48284,
      "training_step_time": 0.4217822551727295
    },
    {
      "epoch": 0.000294708251953125,
      "model_forward_time": 0.11517977714538574,
      "step": 48285
    },
    {
      "epoch": 0.000294708251953125,
      "step": 48285,
      "training_step_time": 0.46383047103881836
    },
    {
      "epoch": 0.00029471435546875,
      "model_forward_time": 0.11540699005126953,
      "step": 48286
    },
    {
      "epoch": 0.00029471435546875,
      "step": 48286,
      "training_step_time": 0.49524474143981934
    },
    {
      "epoch": 0.000294720458984375,
      "model_forward_time": 0.11477804183959961,
      "step": 48287
    },
    {
      "epoch": 0.000294720458984375,
      "step": 48287,
      "training_step_time": 0.45200467109680176
    },
    {
      "epoch": 0.0002947265625,
      "model_forward_time": 0.11482906341552734,
      "step": 48288
    },
    {
      "epoch": 0.0002947265625,
      "step": 48288,
      "training_step_time": 0.4598581790924072
    },
    {
      "epoch": 0.000294732666015625,
      "model_forward_time": 0.11632466316223145,
      "step": 48289
    },
    {
      "epoch": 0.000294732666015625,
      "step": 48289,
      "training_step_time": 0.4213747978210449
    },
    {
      "epoch": 0.00029473876953125,
      "grad_norm": 0.12087196856737137,
      "learning_rate": 1.0057170715970559e-05,
      "loss": 0.0333,
      "step": 48290
    },
    {
      "epoch": 0.00029473876953125,
      "model_forward_time": 0.11552095413208008,
      "step": 48290
    },
    {
      "epoch": 0.00029473876953125,
      "step": 48290,
      "training_step_time": 0.39386677742004395
    },
    {
      "epoch": 0.000294744873046875,
      "model_forward_time": 0.1147928237915039,
      "step": 48291
    },
    {
      "epoch": 0.000294744873046875,
      "step": 48291,
      "training_step_time": 0.38294005393981934
    },
    {
      "epoch": 0.0002947509765625,
      "model_forward_time": 0.1152334213256836,
      "step": 48292
    },
    {
      "epoch": 0.0002947509765625,
      "step": 48292,
      "training_step_time": 0.3829934597015381
    },
    {
      "epoch": 0.000294757080078125,
      "model_forward_time": 0.11507391929626465,
      "step": 48293
    },
    {
      "epoch": 0.000294757080078125,
      "step": 48293,
      "training_step_time": 0.38538646697998047
    },
    {
      "epoch": 0.00029476318359375,
      "model_forward_time": 0.1154470443725586,
      "step": 48294
    },
    {
      "epoch": 0.00029476318359375,
      "step": 48294,
      "training_step_time": 0.5653948783874512
    },
    {
      "epoch": 0.000294769287109375,
      "model_forward_time": 0.11536645889282227,
      "step": 48295
    },
    {
      "epoch": 0.000294769287109375,
      "step": 48295,
      "training_step_time": 0.4079928398132324
    },
    {
      "epoch": 0.000294775390625,
      "model_forward_time": 0.13236117362976074,
      "step": 48296
    },
    {
      "epoch": 0.000294775390625,
      "step": 48296,
      "training_step_time": 0.38944435119628906
    },
    {
      "epoch": 0.000294781494140625,
      "model_forward_time": 0.11510348320007324,
      "step": 48297
    },
    {
      "epoch": 0.000294781494140625,
      "step": 48297,
      "training_step_time": 0.4224581718444824
    },
    {
      "epoch": 0.00029478759765625,
      "model_forward_time": 0.11516642570495605,
      "step": 48298
    },
    {
      "epoch": 0.00029478759765625,
      "step": 48298,
      "training_step_time": 0.39258265495300293
    },
    {
      "epoch": 0.000294793701171875,
      "model_forward_time": 0.11603569984436035,
      "step": 48299
    },
    {
      "epoch": 0.000294793701171875,
      "step": 48299,
      "training_step_time": 0.43337249755859375
    },
    {
      "epoch": 0.0002947998046875,
      "grad_norm": 0.08448461443185806,
      "learning_rate": 1.0040600155253765e-05,
      "loss": 0.0332,
      "step": 48300
    },
    {
      "epoch": 0.0002947998046875,
      "model_forward_time": 0.11463522911071777,
      "step": 48300
    },
    {
      "epoch": 0.0002947998046875,
      "step": 48300,
      "training_step_time": 0.46089935302734375
    },
    {
      "epoch": 0.000294805908203125,
      "model_forward_time": 0.11818432807922363,
      "step": 48301
    },
    {
      "epoch": 0.000294805908203125,
      "step": 48301,
      "training_step_time": 0.45676612854003906
    },
    {
      "epoch": 0.00029481201171875,
      "model_forward_time": 0.11514925956726074,
      "step": 48302
    },
    {
      "epoch": 0.00029481201171875,
      "step": 48302,
      "training_step_time": 0.4456217288970947
    },
    {
      "epoch": 0.000294818115234375,
      "model_forward_time": 0.11513185501098633,
      "step": 48303
    },
    {
      "epoch": 0.000294818115234375,
      "step": 48303,
      "training_step_time": 0.3961505889892578
    },
    {
      "epoch": 0.00029482421875,
      "model_forward_time": 0.11417531967163086,
      "step": 48304
    },
    {
      "epoch": 0.00029482421875,
      "step": 48304,
      "training_step_time": 0.3971717357635498
    },
    {
      "epoch": 0.000294830322265625,
      "model_forward_time": 0.11495566368103027,
      "step": 48305
    },
    {
      "epoch": 0.000294830322265625,
      "step": 48305,
      "training_step_time": 0.4065709114074707
    },
    {
      "epoch": 0.00029483642578125,
      "model_forward_time": 0.11426639556884766,
      "step": 48306
    },
    {
      "epoch": 0.00029483642578125,
      "step": 48306,
      "training_step_time": 0.5372843742370605
    },
    {
      "epoch": 0.000294842529296875,
      "model_forward_time": 0.11486482620239258,
      "step": 48307
    },
    {
      "epoch": 0.000294842529296875,
      "step": 48307,
      "training_step_time": 0.3856372833251953
    },
    {
      "epoch": 0.0002948486328125,
      "model_forward_time": 0.1141200065612793,
      "step": 48308
    },
    {
      "epoch": 0.0002948486328125,
      "step": 48308,
      "training_step_time": 0.4017481803894043
    },
    {
      "epoch": 0.000294854736328125,
      "model_forward_time": 0.1156764030456543,
      "step": 48309
    },
    {
      "epoch": 0.000294854736328125,
      "step": 48309,
      "training_step_time": 0.3901939392089844
    },
    {
      "epoch": 0.00029486083984375,
      "grad_norm": 0.09106171876192093,
      "learning_rate": 1.0024041733147577e-05,
      "loss": 0.0372,
      "step": 48310
    },
    {
      "epoch": 0.00029486083984375,
      "model_forward_time": 0.11666274070739746,
      "step": 48310
    },
    {
      "epoch": 0.00029486083984375,
      "step": 48310,
      "training_step_time": 0.39665913581848145
    },
    {
      "epoch": 0.000294866943359375,
      "model_forward_time": 0.1145925521850586,
      "step": 48311
    },
    {
      "epoch": 0.000294866943359375,
      "step": 48311,
      "training_step_time": 0.39407825469970703
    },
    {
      "epoch": 0.000294873046875,
      "model_forward_time": 0.11511731147766113,
      "step": 48312
    },
    {
      "epoch": 0.000294873046875,
      "step": 48312,
      "training_step_time": 0.8382112979888916
    },
    {
      "epoch": 0.000294879150390625,
      "model_forward_time": 0.11411547660827637,
      "step": 48313
    },
    {
      "epoch": 0.000294879150390625,
      "step": 48313,
      "training_step_time": 0.47425198554992676
    },
    {
      "epoch": 0.00029488525390625,
      "model_forward_time": 0.11570978164672852,
      "step": 48314
    },
    {
      "epoch": 0.00029488525390625,
      "step": 48314,
      "training_step_time": 0.39536404609680176
    },
    {
      "epoch": 0.000294891357421875,
      "model_forward_time": 0.11418962478637695,
      "step": 48315
    },
    {
      "epoch": 0.000294891357421875,
      "step": 48315,
      "training_step_time": 0.4501516819000244
    },
    {
      "epoch": 0.0002948974609375,
      "model_forward_time": 0.11403799057006836,
      "step": 48316
    },
    {
      "epoch": 0.0002948974609375,
      "step": 48316,
      "training_step_time": 0.405658483505249
    },
    {
      "epoch": 0.000294903564453125,
      "model_forward_time": 0.11532187461853027,
      "step": 48317
    },
    {
      "epoch": 0.000294903564453125,
      "step": 48317,
      "training_step_time": 0.39309072494506836
    },
    {
      "epoch": 0.00029490966796875,
      "model_forward_time": 0.11484003067016602,
      "step": 48318
    },
    {
      "epoch": 0.00029490966796875,
      "step": 48318,
      "training_step_time": 0.3934469223022461
    },
    {
      "epoch": 0.000294915771484375,
      "model_forward_time": 0.11517667770385742,
      "step": 48319
    },
    {
      "epoch": 0.000294915771484375,
      "step": 48319,
      "training_step_time": 0.39469122886657715
    },
    {
      "epoch": 0.000294921875,
      "grad_norm": 0.08375171571969986,
      "learning_rate": 1.0007495454682004e-05,
      "loss": 0.0369,
      "step": 48320
    },
    {
      "epoch": 0.000294921875,
      "model_forward_time": 0.11510777473449707,
      "step": 48320
    },
    {
      "epoch": 0.000294921875,
      "step": 48320,
      "training_step_time": 0.40102434158325195
    },
    {
      "epoch": 0.000294927978515625,
      "model_forward_time": 0.11539864540100098,
      "step": 48321
    },
    {
      "epoch": 0.000294927978515625,
      "step": 48321,
      "training_step_time": 0.39112234115600586
    },
    {
      "epoch": 0.00029493408203125,
      "model_forward_time": 0.11552667617797852,
      "step": 48322
    },
    {
      "epoch": 0.00029493408203125,
      "step": 48322,
      "training_step_time": 0.39491915702819824
    },
    {
      "epoch": 0.000294940185546875,
      "model_forward_time": 0.11519861221313477,
      "step": 48323
    },
    {
      "epoch": 0.000294940185546875,
      "step": 48323,
      "training_step_time": 0.3902099132537842
    },
    {
      "epoch": 0.0002949462890625,
      "model_forward_time": 0.11569738388061523,
      "step": 48324
    },
    {
      "epoch": 0.0002949462890625,
      "step": 48324,
      "training_step_time": 0.5653841495513916
    },
    {
      "epoch": 0.000294952392578125,
      "model_forward_time": 0.11516070365905762,
      "step": 48325
    },
    {
      "epoch": 0.000294952392578125,
      "step": 48325,
      "training_step_time": 0.38983845710754395
    },
    {
      "epoch": 0.00029495849609375,
      "model_forward_time": 0.11496782302856445,
      "step": 48326
    },
    {
      "epoch": 0.00029495849609375,
      "step": 48326,
      "training_step_time": 0.45094776153564453
    },
    {
      "epoch": 0.000294964599609375,
      "model_forward_time": 0.11589765548706055,
      "step": 48327
    },
    {
      "epoch": 0.000294964599609375,
      "step": 48327,
      "training_step_time": 0.47612547874450684
    },
    {
      "epoch": 0.000294970703125,
      "model_forward_time": 0.11452889442443848,
      "step": 48328
    },
    {
      "epoch": 0.000294970703125,
      "step": 48328,
      "training_step_time": 0.46753907203674316
    },
    {
      "epoch": 0.000294976806640625,
      "model_forward_time": 0.11487245559692383,
      "step": 48329
    },
    {
      "epoch": 0.000294976806640625,
      "step": 48329,
      "training_step_time": 0.40816378593444824
    },
    {
      "epoch": 0.00029498291015625,
      "grad_norm": 0.11405020952224731,
      "learning_rate": 9.990961324883358e-06,
      "loss": 0.0372,
      "step": 48330
    },
    {
      "epoch": 0.00029498291015625,
      "model_forward_time": 0.11428117752075195,
      "step": 48330
    },
    {
      "epoch": 0.00029498291015625,
      "step": 48330,
      "training_step_time": 0.46240830421447754
    },
    {
      "epoch": 0.000294989013671875,
      "model_forward_time": 0.11423778533935547,
      "step": 48331
    },
    {
      "epoch": 0.000294989013671875,
      "step": 48331,
      "training_step_time": 0.39740633964538574
    },
    {
      "epoch": 0.0002949951171875,
      "model_forward_time": 0.11435747146606445,
      "step": 48332
    },
    {
      "epoch": 0.0002949951171875,
      "step": 48332,
      "training_step_time": 0.3954811096191406
    },
    {
      "epoch": 0.000295001220703125,
      "model_forward_time": 0.11542201042175293,
      "step": 48333
    },
    {
      "epoch": 0.000295001220703125,
      "step": 48333,
      "training_step_time": 0.39683103561401367
    },
    {
      "epoch": 0.00029500732421875,
      "model_forward_time": 0.11443901062011719,
      "step": 48334
    },
    {
      "epoch": 0.00029500732421875,
      "step": 48334,
      "training_step_time": 0.399442195892334
    },
    {
      "epoch": 0.000295013427734375,
      "model_forward_time": 0.11573076248168945,
      "step": 48335
    },
    {
      "epoch": 0.000295013427734375,
      "step": 48335,
      "training_step_time": 0.3926515579223633
    },
    {
      "epoch": 0.00029501953125,
      "model_forward_time": 0.11488485336303711,
      "step": 48336
    },
    {
      "epoch": 0.00029501953125,
      "step": 48336,
      "training_step_time": 0.4902510643005371
    },
    {
      "epoch": 0.000295025634765625,
      "model_forward_time": 0.11504936218261719,
      "step": 48337
    },
    {
      "epoch": 0.000295025634765625,
      "step": 48337,
      "training_step_time": 0.38634181022644043
    },
    {
      "epoch": 0.00029503173828125,
      "model_forward_time": 0.11455655097961426,
      "step": 48338
    },
    {
      "epoch": 0.00029503173828125,
      "step": 48338,
      "training_step_time": 0.39826416969299316
    },
    {
      "epoch": 0.000295037841796875,
      "model_forward_time": 0.11520147323608398,
      "step": 48339
    },
    {
      "epoch": 0.000295037841796875,
      "step": 48339,
      "training_step_time": 0.4065265655517578
    },
    {
      "epoch": 0.0002950439453125,
      "grad_norm": 0.10117799788713455,
      "learning_rate": 9.974439348774295e-06,
      "loss": 0.0401,
      "step": 48340
    },
    {
      "epoch": 0.0002950439453125,
      "model_forward_time": 0.11497187614440918,
      "step": 48340
    },
    {
      "epoch": 0.0002950439453125,
      "step": 48340,
      "training_step_time": 0.39789366722106934
    },
    {
      "epoch": 0.000295050048828125,
      "model_forward_time": 0.11505961418151855,
      "step": 48341
    },
    {
      "epoch": 0.000295050048828125,
      "step": 48341,
      "training_step_time": 0.4458284378051758
    },
    {
      "epoch": 0.00029505615234375,
      "model_forward_time": 0.11565232276916504,
      "step": 48342
    },
    {
      "epoch": 0.00029505615234375,
      "step": 48342,
      "training_step_time": 0.6917076110839844
    },
    {
      "epoch": 0.000295062255859375,
      "model_forward_time": 0.11549162864685059,
      "step": 48343
    },
    {
      "epoch": 0.000295062255859375,
      "step": 48343,
      "training_step_time": 0.4174947738647461
    },
    {
      "epoch": 0.000295068359375,
      "model_forward_time": 0.11455273628234863,
      "step": 48344
    },
    {
      "epoch": 0.000295068359375,
      "step": 48344,
      "training_step_time": 0.380748987197876
    },
    {
      "epoch": 0.000295074462890625,
      "model_forward_time": 0.11450481414794922,
      "step": 48345
    },
    {
      "epoch": 0.000295074462890625,
      "step": 48345,
      "training_step_time": 0.40022730827331543
    },
    {
      "epoch": 0.00029508056640625,
      "model_forward_time": 0.11452484130859375,
      "step": 48346
    },
    {
      "epoch": 0.00029508056640625,
      "step": 48346,
      "training_step_time": 0.38721585273742676
    },
    {
      "epoch": 0.000295086669921875,
      "model_forward_time": 0.11410021781921387,
      "step": 48347
    },
    {
      "epoch": 0.000295086669921875,
      "step": 48347,
      "training_step_time": 0.3871936798095703
    },
    {
      "epoch": 0.0002950927734375,
      "model_forward_time": 0.11530256271362305,
      "step": 48348
    },
    {
      "epoch": 0.0002950927734375,
      "step": 48348,
      "training_step_time": 0.7905545234680176
    },
    {
      "epoch": 0.000295098876953125,
      "model_forward_time": 0.11487936973571777,
      "step": 48349
    },
    {
      "epoch": 0.000295098876953125,
      "step": 48349,
      "training_step_time": 0.3983938694000244
    },
    {
      "epoch": 0.00029510498046875,
      "grad_norm": 0.08131396025419235,
      "learning_rate": 9.95792953137375e-06,
      "loss": 0.0348,
      "step": 48350
    },
    {
      "epoch": 0.00029510498046875,
      "model_forward_time": 0.11463451385498047,
      "step": 48350
    },
    {
      "epoch": 0.00029510498046875,
      "step": 48350,
      "training_step_time": 0.3851966857910156
    },
    {
      "epoch": 0.000295111083984375,
      "model_forward_time": 0.11442685127258301,
      "step": 48351
    },
    {
      "epoch": 0.000295111083984375,
      "step": 48351,
      "training_step_time": 0.41452884674072266
    },
    {
      "epoch": 0.0002951171875,
      "model_forward_time": 0.11455917358398438,
      "step": 48352
    },
    {
      "epoch": 0.0002951171875,
      "step": 48352,
      "training_step_time": 0.38902711868286133
    },
    {
      "epoch": 0.000295123291015625,
      "model_forward_time": 0.11408138275146484,
      "step": 48353
    },
    {
      "epoch": 0.000295123291015625,
      "step": 48353,
      "training_step_time": 0.44362688064575195
    },
    {
      "epoch": 0.00029512939453125,
      "model_forward_time": 0.11952757835388184,
      "step": 48354
    },
    {
      "epoch": 0.00029512939453125,
      "step": 48354,
      "training_step_time": 0.5458390712738037
    },
    {
      "epoch": 0.000295135498046875,
      "model_forward_time": 0.11493635177612305,
      "step": 48355
    },
    {
      "epoch": 0.000295135498046875,
      "step": 48355,
      "training_step_time": 0.37354159355163574
    },
    {
      "epoch": 0.0002951416015625,
      "model_forward_time": 0.11550617218017578,
      "step": 48356
    },
    {
      "epoch": 0.0002951416015625,
      "step": 48356,
      "training_step_time": 0.43368983268737793
    },
    {
      "epoch": 0.000295147705078125,
      "model_forward_time": 0.11525774002075195,
      "step": 48357
    },
    {
      "epoch": 0.000295147705078125,
      "step": 48357,
      "training_step_time": 0.5154728889465332
    },
    {
      "epoch": 0.00029515380859375,
      "model_forward_time": 0.11552906036376953,
      "step": 48358
    },
    {
      "epoch": 0.00029515380859375,
      "step": 48358,
      "training_step_time": 0.40027332305908203
    },
    {
      "epoch": 0.000295159912109375,
      "model_forward_time": 0.11481809616088867,
      "step": 48359
    },
    {
      "epoch": 0.000295159912109375,
      "step": 48359,
      "training_step_time": 0.3882319927215576
    },
    {
      "epoch": 0.000295166015625,
      "grad_norm": 0.1358201950788498,
      "learning_rate": 9.941431877696955e-06,
      "loss": 0.0401,
      "step": 48360
    },
    {
      "epoch": 0.000295166015625,
      "model_forward_time": 0.11546850204467773,
      "step": 48360
    },
    {
      "epoch": 0.000295166015625,
      "step": 48360,
      "training_step_time": 0.40349555015563965
    },
    {
      "epoch": 0.000295172119140625,
      "model_forward_time": 0.1154012680053711,
      "step": 48361
    },
    {
      "epoch": 0.000295172119140625,
      "step": 48361,
      "training_step_time": 0.3913881778717041
    },
    {
      "epoch": 0.00029517822265625,
      "model_forward_time": 0.11472034454345703,
      "step": 48362
    },
    {
      "epoch": 0.00029517822265625,
      "step": 48362,
      "training_step_time": 0.3808469772338867
    },
    {
      "epoch": 0.000295184326171875,
      "model_forward_time": 0.1149294376373291,
      "step": 48363
    },
    {
      "epoch": 0.000295184326171875,
      "step": 48363,
      "training_step_time": 0.3826582431793213
    },
    {
      "epoch": 0.0002951904296875,
      "model_forward_time": 0.11449122428894043,
      "step": 48364
    },
    {
      "epoch": 0.0002951904296875,
      "step": 48364,
      "training_step_time": 0.3887667655944824
    },
    {
      "epoch": 0.000295196533203125,
      "model_forward_time": 0.11644244194030762,
      "step": 48365
    },
    {
      "epoch": 0.000295196533203125,
      "step": 48365,
      "training_step_time": 0.39101624488830566
    },
    {
      "epoch": 0.00029520263671875,
      "model_forward_time": 0.11513090133666992,
      "step": 48366
    },
    {
      "epoch": 0.00029520263671875,
      "step": 48366,
      "training_step_time": 0.7004132270812988
    },
    {
      "epoch": 0.000295208740234375,
      "model_forward_time": 0.11477351188659668,
      "step": 48367
    },
    {
      "epoch": 0.000295208740234375,
      "step": 48367,
      "training_step_time": 0.39094114303588867
    },
    {
      "epoch": 0.00029521484375,
      "model_forward_time": 0.11415410041809082,
      "step": 48368
    },
    {
      "epoch": 0.00029521484375,
      "step": 48368,
      "training_step_time": 0.39766407012939453
    },
    {
      "epoch": 0.000295220947265625,
      "model_forward_time": 0.11476659774780273,
      "step": 48369
    },
    {
      "epoch": 0.000295220947265625,
      "step": 48369,
      "training_step_time": 0.5083751678466797
    },
    {
      "epoch": 0.00029522705078125,
      "grad_norm": 0.09456069022417068,
      "learning_rate": 9.924946392755479e-06,
      "loss": 0.0294,
      "step": 48370
    },
    {
      "epoch": 0.00029522705078125,
      "model_forward_time": 0.11476516723632812,
      "step": 48370
    },
    {
      "epoch": 0.00029522705078125,
      "step": 48370,
      "training_step_time": 0.4023582935333252
    },
    {
      "epoch": 0.000295233154296875,
      "model_forward_time": 0.11459684371948242,
      "step": 48371
    },
    {
      "epoch": 0.000295233154296875,
      "step": 48371,
      "training_step_time": 0.4877779483795166
    },
    {
      "epoch": 0.0002952392578125,
      "model_forward_time": 0.11500144004821777,
      "step": 48372
    },
    {
      "epoch": 0.0002952392578125,
      "step": 48372,
      "training_step_time": 0.4858269691467285
    },
    {
      "epoch": 0.000295245361328125,
      "model_forward_time": 0.11452722549438477,
      "step": 48373
    },
    {
      "epoch": 0.000295245361328125,
      "step": 48373,
      "training_step_time": 0.3869967460632324
    },
    {
      "epoch": 0.00029525146484375,
      "model_forward_time": 0.11479353904724121,
      "step": 48374
    },
    {
      "epoch": 0.00029525146484375,
      "step": 48374,
      "training_step_time": 0.37607645988464355
    },
    {
      "epoch": 0.000295257568359375,
      "model_forward_time": 0.11478304862976074,
      "step": 48375
    },
    {
      "epoch": 0.000295257568359375,
      "step": 48375,
      "training_step_time": 0.39479851722717285
    },
    {
      "epoch": 0.000295263671875,
      "model_forward_time": 0.11527466773986816,
      "step": 48376
    },
    {
      "epoch": 0.000295263671875,
      "step": 48376,
      "training_step_time": 0.3976776599884033
    },
    {
      "epoch": 0.000295269775390625,
      "model_forward_time": 0.11465239524841309,
      "step": 48377
    },
    {
      "epoch": 0.000295269775390625,
      "step": 48377,
      "training_step_time": 0.3950352668762207
    },
    {
      "epoch": 0.00029527587890625,
      "model_forward_time": 0.11493110656738281,
      "step": 48378
    },
    {
      "epoch": 0.00029527587890625,
      "step": 48378,
      "training_step_time": 0.691657304763794
    },
    {
      "epoch": 0.000295281982421875,
      "model_forward_time": 0.11562418937683105,
      "step": 48379
    },
    {
      "epoch": 0.000295281982421875,
      "step": 48379,
      "training_step_time": 0.3960545063018799
    },
    {
      "epoch": 0.0002952880859375,
      "grad_norm": 0.08628474920988083,
      "learning_rate": 9.908473081557151e-06,
      "loss": 0.0349,
      "step": 48380
    },
    {
      "epoch": 0.0002952880859375,
      "model_forward_time": 0.11447644233703613,
      "step": 48380
    },
    {
      "epoch": 0.0002952880859375,
      "step": 48380,
      "training_step_time": 0.3934144973754883
    },
    {
      "epoch": 0.000295294189453125,
      "model_forward_time": 0.11461520195007324,
      "step": 48381
    },
    {
      "epoch": 0.000295294189453125,
      "step": 48381,
      "training_step_time": 0.38704633712768555
    },
    {
      "epoch": 0.00029530029296875,
      "model_forward_time": 0.1145482063293457,
      "step": 48382
    },
    {
      "epoch": 0.00029530029296875,
      "step": 48382,
      "training_step_time": 0.3865973949432373
    },
    {
      "epoch": 0.000295306396484375,
      "model_forward_time": 0.11500239372253418,
      "step": 48383
    },
    {
      "epoch": 0.000295306396484375,
      "step": 48383,
      "training_step_time": 0.4571387767791748
    },
    {
      "epoch": 0.0002953125,
      "model_forward_time": 0.11508035659790039,
      "step": 48384
    },
    {
      "epoch": 0.0002953125,
      "step": 48384,
      "training_step_time": 0.6612145900726318
    },
    {
      "epoch": 0.000295318603515625,
      "model_forward_time": 0.11388039588928223,
      "step": 48385
    },
    {
      "epoch": 0.000295318603515625,
      "step": 48385,
      "training_step_time": 0.4760556221008301
    },
    {
      "epoch": 0.00029532470703125,
      "model_forward_time": 0.11632943153381348,
      "step": 48386
    },
    {
      "epoch": 0.00029532470703125,
      "step": 48386,
      "training_step_time": 0.3717222213745117
    },
    {
      "epoch": 0.000295330810546875,
      "model_forward_time": 0.11427593231201172,
      "step": 48387
    },
    {
      "epoch": 0.000295330810546875,
      "step": 48387,
      "training_step_time": 0.40190577507019043
    },
    {
      "epoch": 0.0002953369140625,
      "model_forward_time": 0.11472606658935547,
      "step": 48388
    },
    {
      "epoch": 0.0002953369140625,
      "step": 48388,
      "training_step_time": 0.3699991703033447
    },
    {
      "epoch": 0.000295343017578125,
      "model_forward_time": 0.11442232131958008,
      "step": 48389
    },
    {
      "epoch": 0.000295343017578125,
      "step": 48389,
      "training_step_time": 0.38848161697387695
    },
    {
      "epoch": 0.00029534912109375,
      "grad_norm": 0.10866949707269669,
      "learning_rate": 9.892011949106172e-06,
      "loss": 0.0399,
      "step": 48390
    },
    {
      "epoch": 0.00029534912109375,
      "model_forward_time": 0.11506271362304688,
      "step": 48390
    },
    {
      "epoch": 0.00029534912109375,
      "step": 48390,
      "training_step_time": 0.5183894634246826
    },
    {
      "epoch": 0.000295355224609375,
      "model_forward_time": 0.1157827377319336,
      "step": 48391
    },
    {
      "epoch": 0.000295355224609375,
      "step": 48391,
      "training_step_time": 0.39059925079345703
    },
    {
      "epoch": 0.000295361328125,
      "model_forward_time": 0.11464881896972656,
      "step": 48392
    },
    {
      "epoch": 0.000295361328125,
      "step": 48392,
      "training_step_time": 0.412036657333374
    },
    {
      "epoch": 0.000295367431640625,
      "model_forward_time": 0.11526203155517578,
      "step": 48393
    },
    {
      "epoch": 0.000295367431640625,
      "step": 48393,
      "training_step_time": 0.4255661964416504
    },
    {
      "epoch": 0.00029537353515625,
      "model_forward_time": 0.11513638496398926,
      "step": 48394
    },
    {
      "epoch": 0.00029537353515625,
      "step": 48394,
      "training_step_time": 0.39000868797302246
    },
    {
      "epoch": 0.000295379638671875,
      "model_forward_time": 0.11559724807739258,
      "step": 48395
    },
    {
      "epoch": 0.000295379638671875,
      "step": 48395,
      "training_step_time": 0.38950300216674805
    },
    {
      "epoch": 0.0002953857421875,
      "model_forward_time": 0.11517977714538574,
      "step": 48396
    },
    {
      "epoch": 0.0002953857421875,
      "step": 48396,
      "training_step_time": 0.6970500946044922
    },
    {
      "epoch": 0.000295391845703125,
      "model_forward_time": 0.1153109073638916,
      "step": 48397
    },
    {
      "epoch": 0.000295391845703125,
      "step": 48397,
      "training_step_time": 0.40001988410949707
    },
    {
      "epoch": 0.00029539794921875,
      "model_forward_time": 0.11484289169311523,
      "step": 48398
    },
    {
      "epoch": 0.00029539794921875,
      "step": 48398,
      "training_step_time": 0.4032464027404785
    },
    {
      "epoch": 0.000295404052734375,
      "model_forward_time": 0.11452698707580566,
      "step": 48399
    },
    {
      "epoch": 0.000295404052734375,
      "step": 48399,
      "training_step_time": 0.5267889499664307
    },
    {
      "epoch": 0.00029541015625,
      "grad_norm": 0.08533649146556854,
      "learning_rate": 9.875563000402948e-06,
      "loss": 0.0334,
      "step": 48400
    },
    {
      "epoch": 0.00029541015625,
      "model_forward_time": 0.11451959609985352,
      "step": 48400
    },
    {
      "epoch": 0.00029541015625,
      "step": 48400,
      "training_step_time": 0.3858797550201416
    },
    {
      "epoch": 0.000295416259765625,
      "model_forward_time": 0.11390018463134766,
      "step": 48401
    },
    {
      "epoch": 0.000295416259765625,
      "step": 48401,
      "training_step_time": 0.3906068801879883
    },
    {
      "epoch": 0.00029542236328125,
      "model_forward_time": 0.11481690406799316,
      "step": 48402
    },
    {
      "epoch": 0.00029542236328125,
      "step": 48402,
      "training_step_time": 0.5433664321899414
    },
    {
      "epoch": 0.000295428466796875,
      "model_forward_time": 0.11454916000366211,
      "step": 48403
    },
    {
      "epoch": 0.000295428466796875,
      "step": 48403,
      "training_step_time": 0.38607096672058105
    },
    {
      "epoch": 0.0002954345703125,
      "model_forward_time": 0.11462855339050293,
      "step": 48404
    },
    {
      "epoch": 0.0002954345703125,
      "step": 48404,
      "training_step_time": 0.38605809211730957
    },
    {
      "epoch": 0.000295440673828125,
      "model_forward_time": 0.11527490615844727,
      "step": 48405
    },
    {
      "epoch": 0.000295440673828125,
      "step": 48405,
      "training_step_time": 0.4401893615722656
    },
    {
      "epoch": 0.00029544677734375,
      "model_forward_time": 0.11488580703735352,
      "step": 48406
    },
    {
      "epoch": 0.00029544677734375,
      "step": 48406,
      "training_step_time": 0.42281413078308105
    },
    {
      "epoch": 0.000295452880859375,
      "model_forward_time": 0.1154177188873291,
      "step": 48407
    },
    {
      "epoch": 0.000295452880859375,
      "step": 48407,
      "training_step_time": 0.3936753273010254
    },
    {
      "epoch": 0.000295458984375,
      "model_forward_time": 0.11499738693237305,
      "step": 48408
    },
    {
      "epoch": 0.000295458984375,
      "step": 48408,
      "training_step_time": 0.4953920841217041
    },
    {
      "epoch": 0.000295465087890625,
      "model_forward_time": 0.11485481262207031,
      "step": 48409
    },
    {
      "epoch": 0.000295465087890625,
      "step": 48409,
      "training_step_time": 0.5004110336303711
    },
    {
      "epoch": 0.00029547119140625,
      "grad_norm": 0.09832935780286789,
      "learning_rate": 9.859126240444283e-06,
      "loss": 0.0339,
      "step": 48410
    },
    {
      "epoch": 0.00029547119140625,
      "model_forward_time": 0.11473274230957031,
      "step": 48410
    },
    {
      "epoch": 0.00029547119140625,
      "step": 48410,
      "training_step_time": 0.39856863021850586
    },
    {
      "epoch": 0.000295477294921875,
      "model_forward_time": 0.11535334587097168,
      "step": 48411
    },
    {
      "epoch": 0.000295477294921875,
      "step": 48411,
      "training_step_time": 0.3688924312591553
    },
    {
      "epoch": 0.0002954833984375,
      "model_forward_time": 0.11469769477844238,
      "step": 48412
    },
    {
      "epoch": 0.0002954833984375,
      "step": 48412,
      "training_step_time": 0.43758273124694824
    },
    {
      "epoch": 0.000295489501953125,
      "model_forward_time": 0.11459898948669434,
      "step": 48413
    },
    {
      "epoch": 0.000295489501953125,
      "step": 48413,
      "training_step_time": 0.45419955253601074
    },
    {
      "epoch": 0.00029549560546875,
      "model_forward_time": 0.11539268493652344,
      "step": 48414
    },
    {
      "epoch": 0.00029549560546875,
      "step": 48414,
      "training_step_time": 0.4317145347595215
    },
    {
      "epoch": 0.000295501708984375,
      "model_forward_time": 0.11567234992980957,
      "step": 48415
    },
    {
      "epoch": 0.000295501708984375,
      "step": 48415,
      "training_step_time": 0.39115262031555176
    },
    {
      "epoch": 0.0002955078125,
      "model_forward_time": 0.11507058143615723,
      "step": 48416
    },
    {
      "epoch": 0.0002955078125,
      "step": 48416,
      "training_step_time": 0.40026021003723145
    },
    {
      "epoch": 0.000295513916015625,
      "model_forward_time": 0.11488556861877441,
      "step": 48417
    },
    {
      "epoch": 0.000295513916015625,
      "step": 48417,
      "training_step_time": 0.39336585998535156
    },
    {
      "epoch": 0.00029552001953125,
      "model_forward_time": 0.11487603187561035,
      "step": 48418
    },
    {
      "epoch": 0.00029552001953125,
      "step": 48418,
      "training_step_time": 0.3838777542114258
    },
    {
      "epoch": 0.000295526123046875,
      "model_forward_time": 0.11538100242614746,
      "step": 48419
    },
    {
      "epoch": 0.000295526123046875,
      "step": 48419,
      "training_step_time": 0.4467780590057373
    },
    {
      "epoch": 0.0002955322265625,
      "grad_norm": 0.1110137552022934,
      "learning_rate": 9.842701674223187e-06,
      "loss": 0.0392,
      "step": 48420
    },
    {
      "epoch": 0.0002955322265625,
      "model_forward_time": 0.11527276039123535,
      "step": 48420
    },
    {
      "epoch": 0.0002955322265625,
      "step": 48420,
      "training_step_time": 0.5996167659759521
    },
    {
      "epoch": 0.000295538330078125,
      "model_forward_time": 0.11557221412658691,
      "step": 48421
    },
    {
      "epoch": 0.000295538330078125,
      "step": 48421,
      "training_step_time": 0.3917398452758789
    },
    {
      "epoch": 0.00029554443359375,
      "model_forward_time": 0.11487650871276855,
      "step": 48422
    },
    {
      "epoch": 0.00029554443359375,
      "step": 48422,
      "training_step_time": 0.3843269348144531
    },
    {
      "epoch": 0.000295550537109375,
      "model_forward_time": 0.11520671844482422,
      "step": 48423
    },
    {
      "epoch": 0.000295550537109375,
      "step": 48423,
      "training_step_time": 0.4188532829284668
    },
    {
      "epoch": 0.000295556640625,
      "model_forward_time": 0.11433672904968262,
      "step": 48424
    },
    {
      "epoch": 0.000295556640625,
      "step": 48424,
      "training_step_time": 0.39291930198669434
    },
    {
      "epoch": 0.000295562744140625,
      "model_forward_time": 0.1150047779083252,
      "step": 48425
    },
    {
      "epoch": 0.000295562744140625,
      "step": 48425,
      "training_step_time": 0.47506165504455566
    },
    {
      "epoch": 0.00029556884765625,
      "model_forward_time": 0.11512303352355957,
      "step": 48426
    },
    {
      "epoch": 0.00029556884765625,
      "step": 48426,
      "training_step_time": 0.6152467727661133
    },
    {
      "epoch": 0.000295574951171875,
      "model_forward_time": 0.11523628234863281,
      "step": 48427
    },
    {
      "epoch": 0.000295574951171875,
      "step": 48427,
      "training_step_time": 0.417400598526001
    },
    {
      "epoch": 0.0002955810546875,
      "model_forward_time": 0.11456990242004395,
      "step": 48428
    },
    {
      "epoch": 0.0002955810546875,
      "step": 48428,
      "training_step_time": 0.3826570510864258
    },
    {
      "epoch": 0.000295587158203125,
      "model_forward_time": 0.11526966094970703,
      "step": 48429
    },
    {
      "epoch": 0.000295587158203125,
      "step": 48429,
      "training_step_time": 0.3931424617767334
    },
    {
      "epoch": 0.00029559326171875,
      "grad_norm": 0.10300005227327347,
      "learning_rate": 9.826289306729052e-06,
      "loss": 0.0349,
      "step": 48430
    },
    {
      "epoch": 0.00029559326171875,
      "model_forward_time": 0.11480283737182617,
      "step": 48430
    },
    {
      "epoch": 0.00029559326171875,
      "step": 48430,
      "training_step_time": 0.38846516609191895
    },
    {
      "epoch": 0.000295599365234375,
      "model_forward_time": 0.1148219108581543,
      "step": 48431
    },
    {
      "epoch": 0.000295599365234375,
      "step": 48431,
      "training_step_time": 0.39211344718933105
    },
    {
      "epoch": 0.00029560546875,
      "model_forward_time": 0.11548280715942383,
      "step": 48432
    },
    {
      "epoch": 0.00029560546875,
      "step": 48432,
      "training_step_time": 0.5826258659362793
    },
    {
      "epoch": 0.000295611572265625,
      "model_forward_time": 0.1145625114440918,
      "step": 48433
    },
    {
      "epoch": 0.000295611572265625,
      "step": 48433,
      "training_step_time": 0.4208695888519287
    },
    {
      "epoch": 0.00029561767578125,
      "model_forward_time": 0.11641836166381836,
      "step": 48434
    },
    {
      "epoch": 0.00029561767578125,
      "step": 48434,
      "training_step_time": 0.38562607765197754
    },
    {
      "epoch": 0.000295623779296875,
      "model_forward_time": 0.11550283432006836,
      "step": 48435
    },
    {
      "epoch": 0.000295623779296875,
      "step": 48435,
      "training_step_time": 0.38997960090637207
    },
    {
      "epoch": 0.0002956298828125,
      "model_forward_time": 0.11509847640991211,
      "step": 48436
    },
    {
      "epoch": 0.0002956298828125,
      "step": 48436,
      "training_step_time": 0.38442063331604004
    },
    {
      "epoch": 0.000295635986328125,
      "model_forward_time": 0.1147303581237793,
      "step": 48437
    },
    {
      "epoch": 0.000295635986328125,
      "step": 48437,
      "training_step_time": 0.39628100395202637
    },
    {
      "epoch": 0.00029564208984375,
      "model_forward_time": 0.11489677429199219,
      "step": 48438
    },
    {
      "epoch": 0.00029564208984375,
      "step": 48438,
      "training_step_time": 0.6739239692687988
    },
    {
      "epoch": 0.000295648193359375,
      "model_forward_time": 0.11463093757629395,
      "step": 48439
    },
    {
      "epoch": 0.000295648193359375,
      "step": 48439,
      "training_step_time": 0.36516714096069336
    },
    {
      "epoch": 0.000295654296875,
      "grad_norm": 0.09595108032226562,
      "learning_rate": 9.809889142947476e-06,
      "loss": 0.0361,
      "step": 48440
    },
    {
      "epoch": 0.000295654296875,
      "model_forward_time": 0.11489582061767578,
      "step": 48440
    },
    {
      "epoch": 0.000295654296875,
      "step": 48440,
      "training_step_time": 0.45475196838378906
    },
    {
      "epoch": 0.000295660400390625,
      "model_forward_time": 0.11446762084960938,
      "step": 48441
    },
    {
      "epoch": 0.000295660400390625,
      "step": 48441,
      "training_step_time": 0.47778820991516113
    },
    {
      "epoch": 0.00029566650390625,
      "model_forward_time": 0.11431407928466797,
      "step": 48442
    },
    {
      "epoch": 0.00029566650390625,
      "step": 48442,
      "training_step_time": 0.3792147636413574
    },
    {
      "epoch": 0.000295672607421875,
      "model_forward_time": 0.11480212211608887,
      "step": 48443
    },
    {
      "epoch": 0.000295672607421875,
      "step": 48443,
      "training_step_time": 0.38017940521240234
    },
    {
      "epoch": 0.0002956787109375,
      "model_forward_time": 0.11465835571289062,
      "step": 48444
    },
    {
      "epoch": 0.0002956787109375,
      "step": 48444,
      "training_step_time": 0.4007585048675537
    },
    {
      "epoch": 0.000295684814453125,
      "model_forward_time": 0.11479616165161133,
      "step": 48445
    },
    {
      "epoch": 0.000295684814453125,
      "step": 48445,
      "training_step_time": 0.3917245864868164
    },
    {
      "epoch": 0.00029569091796875,
      "model_forward_time": 0.11504697799682617,
      "step": 48446
    },
    {
      "epoch": 0.00029569091796875,
      "step": 48446,
      "training_step_time": 0.4036216735839844
    },
    {
      "epoch": 0.000295697021484375,
      "model_forward_time": 0.11470603942871094,
      "step": 48447
    },
    {
      "epoch": 0.000295697021484375,
      "step": 48447,
      "training_step_time": 0.386918306350708
    },
    {
      "epoch": 0.000295703125,
      "model_forward_time": 0.11492538452148438,
      "step": 48448
    },
    {
      "epoch": 0.000295703125,
      "step": 48448,
      "training_step_time": 0.3886868953704834
    },
    {
      "epoch": 0.000295709228515625,
      "model_forward_time": 0.11524772644042969,
      "step": 48449
    },
    {
      "epoch": 0.000295709228515625,
      "step": 48449,
      "training_step_time": 0.4131743907928467
    },
    {
      "epoch": 0.00029571533203125,
      "grad_norm": 0.10157117247581482,
      "learning_rate": 9.793501187860432e-06,
      "loss": 0.0375,
      "step": 48450
    },
    {
      "epoch": 0.00029571533203125,
      "model_forward_time": 0.11472845077514648,
      "step": 48450
    },
    {
      "epoch": 0.00029571533203125,
      "step": 48450,
      "training_step_time": 0.5504000186920166
    },
    {
      "epoch": 0.000295721435546875,
      "model_forward_time": 0.11513781547546387,
      "step": 48451
    },
    {
      "epoch": 0.000295721435546875,
      "step": 48451,
      "training_step_time": 0.3990747928619385
    },
    {
      "epoch": 0.0002957275390625,
      "model_forward_time": 0.1151280403137207,
      "step": 48452
    },
    {
      "epoch": 0.0002957275390625,
      "step": 48452,
      "training_step_time": 0.4461398124694824
    },
    {
      "epoch": 0.000295733642578125,
      "model_forward_time": 0.11549711227416992,
      "step": 48453
    },
    {
      "epoch": 0.000295733642578125,
      "step": 48453,
      "training_step_time": 0.3909120559692383
    },
    {
      "epoch": 0.00029573974609375,
      "model_forward_time": 0.115264892578125,
      "step": 48454
    },
    {
      "epoch": 0.00029573974609375,
      "step": 48454,
      "training_step_time": 0.4766402244567871
    },
    {
      "epoch": 0.000295745849609375,
      "model_forward_time": 0.11427760124206543,
      "step": 48455
    },
    {
      "epoch": 0.000295745849609375,
      "step": 48455,
      "training_step_time": 0.48587679862976074
    },
    {
      "epoch": 0.000295751953125,
      "model_forward_time": 0.11466622352600098,
      "step": 48456
    },
    {
      "epoch": 0.000295751953125,
      "step": 48456,
      "training_step_time": 0.5056157112121582
    },
    {
      "epoch": 0.000295758056640625,
      "model_forward_time": 0.11554479598999023,
      "step": 48457
    },
    {
      "epoch": 0.000295758056640625,
      "step": 48457,
      "training_step_time": 0.39099955558776855
    },
    {
      "epoch": 0.00029576416015625,
      "model_forward_time": 0.11479997634887695,
      "step": 48458
    },
    {
      "epoch": 0.00029576416015625,
      "step": 48458,
      "training_step_time": 0.3886892795562744
    },
    {
      "epoch": 0.000295770263671875,
      "model_forward_time": 0.11484003067016602,
      "step": 48459
    },
    {
      "epoch": 0.000295770263671875,
      "step": 48459,
      "training_step_time": 0.4459569454193115
    },
    {
      "epoch": 0.0002957763671875,
      "grad_norm": 0.0976489707827568,
      "learning_rate": 9.777125446446133e-06,
      "loss": 0.0321,
      "step": 48460
    },
    {
      "epoch": 0.0002957763671875,
      "model_forward_time": 0.11456537246704102,
      "step": 48460
    },
    {
      "epoch": 0.0002957763671875,
      "step": 48460,
      "training_step_time": 0.4068176746368408
    },
    {
      "epoch": 0.000295782470703125,
      "model_forward_time": 0.11521553993225098,
      "step": 48461
    },
    {
      "epoch": 0.000295782470703125,
      "step": 48461,
      "training_step_time": 0.4041907787322998
    },
    {
      "epoch": 0.00029578857421875,
      "model_forward_time": 0.11465716361999512,
      "step": 48462
    },
    {
      "epoch": 0.00029578857421875,
      "step": 48462,
      "training_step_time": 0.5172238349914551
    },
    {
      "epoch": 0.000295794677734375,
      "model_forward_time": 0.11532211303710938,
      "step": 48463
    },
    {
      "epoch": 0.000295794677734375,
      "step": 48463,
      "training_step_time": 0.3948335647583008
    },
    {
      "epoch": 0.00029580078125,
      "model_forward_time": 0.11402225494384766,
      "step": 48464
    },
    {
      "epoch": 0.00029580078125,
      "step": 48464,
      "training_step_time": 0.40349888801574707
    },
    {
      "epoch": 0.000295806884765625,
      "model_forward_time": 0.11589789390563965,
      "step": 48465
    },
    {
      "epoch": 0.000295806884765625,
      "step": 48465,
      "training_step_time": 0.4250905513763428
    },
    {
      "epoch": 0.00029581298828125,
      "model_forward_time": 0.1146237850189209,
      "step": 48466
    },
    {
      "epoch": 0.00029581298828125,
      "step": 48466,
      "training_step_time": 0.3761277198791504
    },
    {
      "epoch": 0.000295819091796875,
      "model_forward_time": 0.1147611141204834,
      "step": 48467
    },
    {
      "epoch": 0.000295819091796875,
      "step": 48467,
      "training_step_time": 0.4886133670806885
    },
    {
      "epoch": 0.0002958251953125,
      "model_forward_time": 0.11568522453308105,
      "step": 48468
    },
    {
      "epoch": 0.0002958251953125,
      "step": 48468,
      "training_step_time": 0.7104172706604004
    },
    {
      "epoch": 0.000295831298828125,
      "model_forward_time": 0.11401534080505371,
      "step": 48469
    },
    {
      "epoch": 0.000295831298828125,
      "step": 48469,
      "training_step_time": 0.48153209686279297
    },
    {
      "epoch": 0.00029583740234375,
      "grad_norm": 0.06510607153177261,
      "learning_rate": 9.760761923679107e-06,
      "loss": 0.0351,
      "step": 48470
    },
    {
      "epoch": 0.00029583740234375,
      "model_forward_time": 0.11509513854980469,
      "step": 48470
    },
    {
      "epoch": 0.00029583740234375,
      "step": 48470,
      "training_step_time": 0.3869478702545166
    },
    {
      "epoch": 0.000295843505859375,
      "model_forward_time": 0.1140904426574707,
      "step": 48471
    },
    {
      "epoch": 0.000295843505859375,
      "step": 48471,
      "training_step_time": 0.39821720123291016
    },
    {
      "epoch": 0.000295849609375,
      "model_forward_time": 0.1143345832824707,
      "step": 48472
    },
    {
      "epoch": 0.000295849609375,
      "step": 48472,
      "training_step_time": 0.41811633110046387
    },
    {
      "epoch": 0.000295855712890625,
      "model_forward_time": 0.11448788642883301,
      "step": 48473
    },
    {
      "epoch": 0.000295855712890625,
      "step": 48473,
      "training_step_time": 0.4139890670776367
    },
    {
      "epoch": 0.00029586181640625,
      "model_forward_time": 0.11499357223510742,
      "step": 48474
    },
    {
      "epoch": 0.00029586181640625,
      "step": 48474,
      "training_step_time": 0.4425344467163086
    },
    {
      "epoch": 0.000295867919921875,
      "model_forward_time": 0.11534762382507324,
      "step": 48475
    },
    {
      "epoch": 0.000295867919921875,
      "step": 48475,
      "training_step_time": 0.3923685550689697
    },
    {
      "epoch": 0.0002958740234375,
      "model_forward_time": 0.11549758911132812,
      "step": 48476
    },
    {
      "epoch": 0.0002958740234375,
      "step": 48476,
      "training_step_time": 0.38895177841186523
    },
    {
      "epoch": 0.000295880126953125,
      "model_forward_time": 0.1156167984008789,
      "step": 48477
    },
    {
      "epoch": 0.000295880126953125,
      "step": 48477,
      "training_step_time": 0.39310717582702637
    },
    {
      "epoch": 0.00029588623046875,
      "model_forward_time": 0.11537861824035645,
      "step": 48478
    },
    {
      "epoch": 0.00029588623046875,
      "step": 48478,
      "training_step_time": 0.3800809383392334
    },
    {
      "epoch": 0.000295892333984375,
      "model_forward_time": 0.11525201797485352,
      "step": 48479
    },
    {
      "epoch": 0.000295892333984375,
      "step": 48479,
      "training_step_time": 0.3926365375518799
    },
    {
      "epoch": 0.0002958984375,
      "grad_norm": 0.06975120306015015,
      "learning_rate": 9.744410624530148e-06,
      "loss": 0.0331,
      "step": 48480
    },
    {
      "epoch": 0.0002958984375,
      "model_forward_time": 0.1146845817565918,
      "step": 48480
    },
    {
      "epoch": 0.0002958984375,
      "step": 48480,
      "training_step_time": 0.5644583702087402
    },
    {
      "epoch": 0.000295904541015625,
      "model_forward_time": 0.11535167694091797,
      "step": 48481
    },
    {
      "epoch": 0.000295904541015625,
      "step": 48481,
      "training_step_time": 0.44764018058776855
    },
    {
      "epoch": 0.00029591064453125,
      "model_forward_time": 0.11603164672851562,
      "step": 48482
    },
    {
      "epoch": 0.00029591064453125,
      "step": 48482,
      "training_step_time": 0.43862414360046387
    },
    {
      "epoch": 0.000295916748046875,
      "model_forward_time": 0.11515545845031738,
      "step": 48483
    },
    {
      "epoch": 0.000295916748046875,
      "step": 48483,
      "training_step_time": 0.4949376583099365
    },
    {
      "epoch": 0.0002959228515625,
      "model_forward_time": 0.11595869064331055,
      "step": 48484
    },
    {
      "epoch": 0.0002959228515625,
      "step": 48484,
      "training_step_time": 0.4079470634460449
    },
    {
      "epoch": 0.000295928955078125,
      "model_forward_time": 0.11478400230407715,
      "step": 48485
    },
    {
      "epoch": 0.000295928955078125,
      "step": 48485,
      "training_step_time": 0.38952040672302246
    },
    {
      "epoch": 0.00029593505859375,
      "model_forward_time": 0.11521029472351074,
      "step": 48486
    },
    {
      "epoch": 0.00029593505859375,
      "step": 48486,
      "training_step_time": 0.3854329586029053
    },
    {
      "epoch": 0.000295941162109375,
      "model_forward_time": 0.11509346961975098,
      "step": 48487
    },
    {
      "epoch": 0.000295941162109375,
      "step": 48487,
      "training_step_time": 0.3803706169128418
    },
    {
      "epoch": 0.000295947265625,
      "model_forward_time": 0.11533927917480469,
      "step": 48488
    },
    {
      "epoch": 0.000295947265625,
      "step": 48488,
      "training_step_time": 0.3751821517944336
    },
    {
      "epoch": 0.000295953369140625,
      "model_forward_time": 0.11549592018127441,
      "step": 48489
    },
    {
      "epoch": 0.000295953369140625,
      "step": 48489,
      "training_step_time": 0.38105344772338867
    },
    {
      "epoch": 0.00029595947265625,
      "grad_norm": 0.1313282549381256,
      "learning_rate": 9.728071553966339e-06,
      "loss": 0.0361,
      "step": 48490
    },
    {
      "epoch": 0.00029595947265625,
      "model_forward_time": 0.1154026985168457,
      "step": 48490
    },
    {
      "epoch": 0.00029595947265625,
      "step": 48490,
      "training_step_time": 0.38757848739624023
    },
    {
      "epoch": 0.000295965576171875,
      "model_forward_time": 0.11572408676147461,
      "step": 48491
    },
    {
      "epoch": 0.000295965576171875,
      "step": 48491,
      "training_step_time": 0.3773026466369629
    },
    {
      "epoch": 0.0002959716796875,
      "model_forward_time": 0.11486554145812988,
      "step": 48492
    },
    {
      "epoch": 0.0002959716796875,
      "step": 48492,
      "training_step_time": 0.6954293251037598
    },
    {
      "epoch": 0.000295977783203125,
      "model_forward_time": 0.11479806900024414,
      "step": 48493
    },
    {
      "epoch": 0.000295977783203125,
      "step": 48493,
      "training_step_time": 0.39849042892456055
    },
    {
      "epoch": 0.00029598388671875,
      "model_forward_time": 0.1151280403137207,
      "step": 48494
    },
    {
      "epoch": 0.00029598388671875,
      "step": 48494,
      "training_step_time": 0.38345861434936523
    },
    {
      "epoch": 0.000295989990234375,
      "model_forward_time": 0.11537027359008789,
      "step": 48495
    },
    {
      "epoch": 0.000295989990234375,
      "step": 48495,
      "training_step_time": 0.42044591903686523
    },
    {
      "epoch": 0.00029599609375,
      "model_forward_time": 0.11485767364501953,
      "step": 48496
    },
    {
      "epoch": 0.00029599609375,
      "step": 48496,
      "training_step_time": 0.36667704582214355
    },
    {
      "epoch": 0.000296002197265625,
      "model_forward_time": 0.1147453784942627,
      "step": 48497
    },
    {
      "epoch": 0.000296002197265625,
      "step": 48497,
      "training_step_time": 0.4418962001800537
    },
    {
      "epoch": 0.00029600830078125,
      "model_forward_time": 0.11516356468200684,
      "step": 48498
    },
    {
      "epoch": 0.00029600830078125,
      "step": 48498,
      "training_step_time": 0.5277547836303711
    },
    {
      "epoch": 0.000296014404296875,
      "model_forward_time": 0.11494636535644531,
      "step": 48499
    },
    {
      "epoch": 0.000296014404296875,
      "step": 48499,
      "training_step_time": 0.42365217208862305
    },
    {
      "epoch": 0.0002960205078125,
      "grad_norm": 0.1107926145195961,
      "learning_rate": 9.711744716951093e-06,
      "loss": 0.0379,
      "step": 48500
    },
    {
      "epoch": 0.0002960205078125,
      "model_forward_time": 0.11573266983032227,
      "step": 48500
    },
    {
      "epoch": 0.0002960205078125,
      "step": 48500,
      "training_step_time": 0.39968085289001465
    },
    {
      "epoch": 0.000296026611328125,
      "model_forward_time": 0.1145620346069336,
      "step": 48501
    },
    {
      "epoch": 0.000296026611328125,
      "step": 48501,
      "training_step_time": 0.4097001552581787
    },
    {
      "epoch": 0.00029603271484375,
      "model_forward_time": 0.1146383285522461,
      "step": 48502
    },
    {
      "epoch": 0.00029603271484375,
      "step": 48502,
      "training_step_time": 0.384566068649292
    },
    {
      "epoch": 0.000296038818359375,
      "model_forward_time": 0.11488103866577148,
      "step": 48503
    },
    {
      "epoch": 0.000296038818359375,
      "step": 48503,
      "training_step_time": 0.3928515911102295
    },
    {
      "epoch": 0.000296044921875,
      "model_forward_time": 0.11522698402404785,
      "step": 48504
    },
    {
      "epoch": 0.000296044921875,
      "step": 48504,
      "training_step_time": 0.6563382148742676
    },
    {
      "epoch": 0.000296051025390625,
      "model_forward_time": 0.11503458023071289,
      "step": 48505
    },
    {
      "epoch": 0.000296051025390625,
      "step": 48505,
      "training_step_time": 0.3858942985534668
    },
    {
      "epoch": 0.00029605712890625,
      "model_forward_time": 0.11448454856872559,
      "step": 48506
    },
    {
      "epoch": 0.00029605712890625,
      "step": 48506,
      "training_step_time": 0.39835596084594727
    },
    {
      "epoch": 0.000296063232421875,
      "model_forward_time": 0.11550784111022949,
      "step": 48507
    },
    {
      "epoch": 0.000296063232421875,
      "step": 48507,
      "training_step_time": 0.3903827667236328
    },
    {
      "epoch": 0.0002960693359375,
      "model_forward_time": 0.11515951156616211,
      "step": 48508
    },
    {
      "epoch": 0.0002960693359375,
      "step": 48508,
      "training_step_time": 0.38181424140930176
    },
    {
      "epoch": 0.000296075439453125,
      "model_forward_time": 0.11443376541137695,
      "step": 48509
    },
    {
      "epoch": 0.000296075439453125,
      "step": 48509,
      "training_step_time": 0.4571051597595215
    },
    {
      "epoch": 0.00029608154296875,
      "grad_norm": 0.0971231460571289,
      "learning_rate": 9.695430118444048e-06,
      "loss": 0.0362,
      "step": 48510
    },
    {
      "epoch": 0.00029608154296875,
      "model_forward_time": 0.11495304107666016,
      "step": 48510
    },
    {
      "epoch": 0.00029608154296875,
      "step": 48510,
      "training_step_time": 0.41169214248657227
    },
    {
      "epoch": 0.000296087646484375,
      "model_forward_time": 0.11591386795043945,
      "step": 48511
    },
    {
      "epoch": 0.000296087646484375,
      "step": 48511,
      "training_step_time": 0.46832966804504395
    },
    {
      "epoch": 0.00029609375,
      "model_forward_time": 0.11497116088867188,
      "step": 48512
    },
    {
      "epoch": 0.00029609375,
      "step": 48512,
      "training_step_time": 0.44180750846862793
    },
    {
      "epoch": 0.000296099853515625,
      "model_forward_time": 0.11511063575744629,
      "step": 48513
    },
    {
      "epoch": 0.000296099853515625,
      "step": 48513,
      "training_step_time": 0.38420915603637695
    },
    {
      "epoch": 0.00029610595703125,
      "model_forward_time": 0.11566591262817383,
      "step": 48514
    },
    {
      "epoch": 0.00029610595703125,
      "step": 48514,
      "training_step_time": 0.4147019386291504
    },
    {
      "epoch": 0.000296112060546875,
      "model_forward_time": 0.11496901512145996,
      "step": 48515
    },
    {
      "epoch": 0.000296112060546875,
      "step": 48515,
      "training_step_time": 0.40387701988220215
    },
    {
      "epoch": 0.0002961181640625,
      "model_forward_time": 0.11491775512695312,
      "step": 48516
    },
    {
      "epoch": 0.0002961181640625,
      "step": 48516,
      "training_step_time": 0.5093863010406494
    },
    {
      "epoch": 0.000296124267578125,
      "model_forward_time": 0.11486577987670898,
      "step": 48517
    },
    {
      "epoch": 0.000296124267578125,
      "step": 48517,
      "training_step_time": 0.40105581283569336
    },
    {
      "epoch": 0.00029613037109375,
      "model_forward_time": 0.11462259292602539,
      "step": 48518
    },
    {
      "epoch": 0.00029613037109375,
      "step": 48518,
      "training_step_time": 0.38945722579956055
    },
    {
      "epoch": 0.000296136474609375,
      "model_forward_time": 0.1149601936340332,
      "step": 48519
    },
    {
      "epoch": 0.000296136474609375,
      "step": 48519,
      "training_step_time": 0.39456820487976074
    },
    {
      "epoch": 0.000296142578125,
      "grad_norm": 0.1240183487534523,
      "learning_rate": 9.679127763401152e-06,
      "loss": 0.0339,
      "step": 48520
    },
    {
      "epoch": 0.000296142578125,
      "model_forward_time": 0.11438894271850586,
      "step": 48520
    },
    {
      "epoch": 0.000296142578125,
      "step": 48520,
      "training_step_time": 0.40201902389526367
    },
    {
      "epoch": 0.000296148681640625,
      "model_forward_time": 0.11513519287109375,
      "step": 48521
    },
    {
      "epoch": 0.000296148681640625,
      "step": 48521,
      "training_step_time": 0.4158761501312256
    },
    {
      "epoch": 0.00029615478515625,
      "model_forward_time": 0.11546516418457031,
      "step": 48522
    },
    {
      "epoch": 0.00029615478515625,
      "step": 48522,
      "training_step_time": 0.6089510917663574
    },
    {
      "epoch": 0.000296160888671875,
      "model_forward_time": 0.11513686180114746,
      "step": 48523
    },
    {
      "epoch": 0.000296160888671875,
      "step": 48523,
      "training_step_time": 0.4478144645690918
    },
    {
      "epoch": 0.0002961669921875,
      "model_forward_time": 0.11440587043762207,
      "step": 48524
    },
    {
      "epoch": 0.0002961669921875,
      "step": 48524,
      "training_step_time": 0.36547303199768066
    },
    {
      "epoch": 0.000296173095703125,
      "model_forward_time": 0.11585807800292969,
      "step": 48525
    },
    {
      "epoch": 0.000296173095703125,
      "step": 48525,
      "training_step_time": 0.45235490798950195
    },
    {
      "epoch": 0.00029617919921875,
      "model_forward_time": 0.11489748954772949,
      "step": 48526
    },
    {
      "epoch": 0.00029617919921875,
      "step": 48526,
      "training_step_time": 0.3884902000427246
    },
    {
      "epoch": 0.000296185302734375,
      "model_forward_time": 0.1144869327545166,
      "step": 48527
    },
    {
      "epoch": 0.000296185302734375,
      "step": 48527,
      "training_step_time": 0.3985133171081543
    },
    {
      "epoch": 0.00029619140625,
      "model_forward_time": 0.11516928672790527,
      "step": 48528
    },
    {
      "epoch": 0.00029619140625,
      "step": 48528,
      "training_step_time": 0.4520838260650635
    },
    {
      "epoch": 0.000296197509765625,
      "model_forward_time": 0.11491918563842773,
      "step": 48529
    },
    {
      "epoch": 0.000296197509765625,
      "step": 48529,
      "training_step_time": 0.4132723808288574
    },
    {
      "epoch": 0.00029620361328125,
      "grad_norm": 0.10023541748523712,
      "learning_rate": 9.662837656774632e-06,
      "loss": 0.0342,
      "step": 48530
    },
    {
      "epoch": 0.00029620361328125,
      "model_forward_time": 0.11480259895324707,
      "step": 48530
    },
    {
      "epoch": 0.00029620361328125,
      "step": 48530,
      "training_step_time": 0.39736509323120117
    },
    {
      "epoch": 0.000296209716796875,
      "model_forward_time": 0.11491513252258301,
      "step": 48531
    },
    {
      "epoch": 0.000296209716796875,
      "step": 48531,
      "training_step_time": 0.3924834728240967
    },
    {
      "epoch": 0.0002962158203125,
      "model_forward_time": 0.11566758155822754,
      "step": 48532
    },
    {
      "epoch": 0.0002962158203125,
      "step": 48532,
      "training_step_time": 0.3925466537475586
    },
    {
      "epoch": 0.000296221923828125,
      "model_forward_time": 0.11487555503845215,
      "step": 48533
    },
    {
      "epoch": 0.000296221923828125,
      "step": 48533,
      "training_step_time": 0.39064908027648926
    },
    {
      "epoch": 0.00029622802734375,
      "model_forward_time": 0.11500358581542969,
      "step": 48534
    },
    {
      "epoch": 0.00029622802734375,
      "step": 48534,
      "training_step_time": 0.5715553760528564
    },
    {
      "epoch": 0.000296234130859375,
      "model_forward_time": 0.11495113372802734,
      "step": 48535
    },
    {
      "epoch": 0.000296234130859375,
      "step": 48535,
      "training_step_time": 0.39209461212158203
    },
    {
      "epoch": 0.000296240234375,
      "model_forward_time": 0.11499452590942383,
      "step": 48536
    },
    {
      "epoch": 0.000296240234375,
      "step": 48536,
      "training_step_time": 0.38740110397338867
    },
    {
      "epoch": 0.000296246337890625,
      "model_forward_time": 0.11496686935424805,
      "step": 48537
    },
    {
      "epoch": 0.000296246337890625,
      "step": 48537,
      "training_step_time": 0.47878599166870117
    },
    {
      "epoch": 0.00029625244140625,
      "model_forward_time": 0.11530780792236328,
      "step": 48538
    },
    {
      "epoch": 0.00029625244140625,
      "step": 48538,
      "training_step_time": 0.44889187812805176
    },
    {
      "epoch": 0.000296258544921875,
      "model_forward_time": 0.11460185050964355,
      "step": 48539
    },
    {
      "epoch": 0.000296258544921875,
      "step": 48539,
      "training_step_time": 0.4122645854949951
    },
    {
      "epoch": 0.0002962646484375,
      "grad_norm": 0.10192177444696426,
      "learning_rate": 9.646559803512994e-06,
      "loss": 0.0364,
      "step": 48540
    },
    {
      "epoch": 0.0002962646484375,
      "model_forward_time": 0.11486577987670898,
      "step": 48540
    },
    {
      "epoch": 0.0002962646484375,
      "step": 48540,
      "training_step_time": 0.5036463737487793
    },
    {
      "epoch": 0.000296270751953125,
      "model_forward_time": 0.11507010459899902,
      "step": 48541
    },
    {
      "epoch": 0.000296270751953125,
      "step": 48541,
      "training_step_time": 0.3883490562438965
    },
    {
      "epoch": 0.00029627685546875,
      "model_forward_time": 0.11520648002624512,
      "step": 48542
    },
    {
      "epoch": 0.00029627685546875,
      "step": 48542,
      "training_step_time": 0.3827486038208008
    },
    {
      "epoch": 0.000296282958984375,
      "model_forward_time": 0.11550068855285645,
      "step": 48543
    },
    {
      "epoch": 0.000296282958984375,
      "step": 48543,
      "training_step_time": 0.3908541202545166
    },
    {
      "epoch": 0.0002962890625,
      "model_forward_time": 0.11502552032470703,
      "step": 48544
    },
    {
      "epoch": 0.0002962890625,
      "step": 48544,
      "training_step_time": 0.3880040645599365
    },
    {
      "epoch": 0.000296295166015625,
      "model_forward_time": 0.11616182327270508,
      "step": 48545
    },
    {
      "epoch": 0.000296295166015625,
      "step": 48545,
      "training_step_time": 0.38964271545410156
    },
    {
      "epoch": 0.00029630126953125,
      "model_forward_time": 0.11572051048278809,
      "step": 48546
    },
    {
      "epoch": 0.00029630126953125,
      "step": 48546,
      "training_step_time": 0.678483247756958
    },
    {
      "epoch": 0.000296307373046875,
      "model_forward_time": 0.11566495895385742,
      "step": 48547
    },
    {
      "epoch": 0.000296307373046875,
      "step": 48547,
      "training_step_time": 0.39490199089050293
    },
    {
      "epoch": 0.0002963134765625,
      "model_forward_time": 0.11566948890686035,
      "step": 48548
    },
    {
      "epoch": 0.0002963134765625,
      "step": 48548,
      "training_step_time": 0.39696335792541504
    },
    {
      "epoch": 0.000296319580078125,
      "model_forward_time": 0.11530303955078125,
      "step": 48549
    },
    {
      "epoch": 0.000296319580078125,
      "step": 48549,
      "training_step_time": 0.38460588455200195
    },
    {
      "epoch": 0.00029632568359375,
      "grad_norm": 0.09508448094129562,
      "learning_rate": 9.630294208560998e-06,
      "loss": 0.0358,
      "step": 48550
    },
    {
      "epoch": 0.00029632568359375,
      "model_forward_time": 0.1153874397277832,
      "step": 48550
    },
    {
      "epoch": 0.00029632568359375,
      "step": 48550,
      "training_step_time": 0.38546276092529297
    },
    {
      "epoch": 0.000296331787109375,
      "model_forward_time": 0.11522221565246582,
      "step": 48551
    },
    {
      "epoch": 0.000296331787109375,
      "step": 48551,
      "training_step_time": 0.4762887954711914
    },
    {
      "epoch": 0.000296337890625,
      "model_forward_time": 0.11516451835632324,
      "step": 48552
    },
    {
      "epoch": 0.000296337890625,
      "step": 48552,
      "training_step_time": 0.4664444923400879
    },
    {
      "epoch": 0.000296343994140625,
      "model_forward_time": 0.11519265174865723,
      "step": 48553
    },
    {
      "epoch": 0.000296343994140625,
      "step": 48553,
      "training_step_time": 0.4692103862762451
    },
    {
      "epoch": 0.00029635009765625,
      "model_forward_time": 0.11451053619384766,
      "step": 48554
    },
    {
      "epoch": 0.00029635009765625,
      "step": 48554,
      "training_step_time": 0.40674281120300293
    },
    {
      "epoch": 0.000296356201171875,
      "model_forward_time": 0.11438822746276855,
      "step": 48555
    },
    {
      "epoch": 0.000296356201171875,
      "step": 48555,
      "training_step_time": 0.3838353157043457
    },
    {
      "epoch": 0.0002963623046875,
      "model_forward_time": 0.11496496200561523,
      "step": 48556
    },
    {
      "epoch": 0.0002963623046875,
      "step": 48556,
      "training_step_time": 0.38472676277160645
    },
    {
      "epoch": 0.000296368408203125,
      "model_forward_time": 0.11487531661987305,
      "step": 48557
    },
    {
      "epoch": 0.000296368408203125,
      "step": 48557,
      "training_step_time": 0.3864600658416748
    },
    {
      "epoch": 0.00029637451171875,
      "model_forward_time": 0.11477184295654297,
      "step": 48558
    },
    {
      "epoch": 0.00029637451171875,
      "step": 48558,
      "training_step_time": 0.5408995151519775
    },
    {
      "epoch": 0.000296380615234375,
      "model_forward_time": 0.11501932144165039,
      "step": 48559
    },
    {
      "epoch": 0.000296380615234375,
      "step": 48559,
      "training_step_time": 0.39206600189208984
    },
    {
      "epoch": 0.00029638671875,
      "grad_norm": 0.10477177053689957,
      "learning_rate": 9.614040876859748e-06,
      "loss": 0.035,
      "step": 48560
    },
    {
      "epoch": 0.00029638671875,
      "model_forward_time": 0.11474776268005371,
      "step": 48560
    },
    {
      "epoch": 0.00029638671875,
      "step": 48560,
      "training_step_time": 0.3846571445465088
    },
    {
      "epoch": 0.000296392822265625,
      "model_forward_time": 0.11568903923034668,
      "step": 48561
    },
    {
      "epoch": 0.000296392822265625,
      "step": 48561,
      "training_step_time": 0.3887474536895752
    },
    {
      "epoch": 0.00029639892578125,
      "model_forward_time": 0.11549806594848633,
      "step": 48562
    },
    {
      "epoch": 0.00029639892578125,
      "step": 48562,
      "training_step_time": 0.39157795906066895
    },
    {
      "epoch": 0.000296405029296875,
      "model_forward_time": 0.11499357223510742,
      "step": 48563
    },
    {
      "epoch": 0.000296405029296875,
      "step": 48563,
      "training_step_time": 0.4002656936645508
    },
    {
      "epoch": 0.0002964111328125,
      "model_forward_time": 0.1151430606842041,
      "step": 48564
    },
    {
      "epoch": 0.0002964111328125,
      "step": 48564,
      "training_step_time": 0.7297611236572266
    },
    {
      "epoch": 0.000296417236328125,
      "model_forward_time": 0.11500406265258789,
      "step": 48565
    },
    {
      "epoch": 0.000296417236328125,
      "step": 48565,
      "training_step_time": 0.4220125675201416
    },
    {
      "epoch": 0.00029642333984375,
      "model_forward_time": 0.11464905738830566,
      "step": 48566
    },
    {
      "epoch": 0.00029642333984375,
      "step": 48566,
      "training_step_time": 0.3757927417755127
    },
    {
      "epoch": 0.000296429443359375,
      "model_forward_time": 0.11439967155456543,
      "step": 48567
    },
    {
      "epoch": 0.000296429443359375,
      "step": 48567,
      "training_step_time": 0.42926812171936035
    },
    {
      "epoch": 0.000296435546875,
      "model_forward_time": 0.11437630653381348,
      "step": 48568
    },
    {
      "epoch": 0.000296435546875,
      "step": 48568,
      "training_step_time": 0.4538304805755615
    },
    {
      "epoch": 0.000296441650390625,
      "model_forward_time": 0.11425971984863281,
      "step": 48569
    },
    {
      "epoch": 0.000296441650390625,
      "step": 48569,
      "training_step_time": 0.3836977481842041
    },
    {
      "epoch": 0.00029644775390625,
      "grad_norm": 0.09158483892679214,
      "learning_rate": 9.597799813346525e-06,
      "loss": 0.0352,
      "step": 48570
    },
    {
      "epoch": 0.00029644775390625,
      "model_forward_time": 0.11487936973571777,
      "step": 48570
    },
    {
      "epoch": 0.00029644775390625,
      "step": 48570,
      "training_step_time": 0.4608891010284424
    },
    {
      "epoch": 0.000296453857421875,
      "model_forward_time": 0.1147921085357666,
      "step": 48571
    },
    {
      "epoch": 0.000296453857421875,
      "step": 48571,
      "training_step_time": 0.39054274559020996
    },
    {
      "epoch": 0.0002964599609375,
      "model_forward_time": 0.11470961570739746,
      "step": 48572
    },
    {
      "epoch": 0.0002964599609375,
      "step": 48572,
      "training_step_time": 0.40018343925476074
    },
    {
      "epoch": 0.000296466064453125,
      "model_forward_time": 0.1153714656829834,
      "step": 48573
    },
    {
      "epoch": 0.000296466064453125,
      "step": 48573,
      "training_step_time": 0.401233434677124
    },
    {
      "epoch": 0.00029647216796875,
      "model_forward_time": 0.11606311798095703,
      "step": 48574
    },
    {
      "epoch": 0.00029647216796875,
      "step": 48574,
      "training_step_time": 0.3931865692138672
    },
    {
      "epoch": 0.000296478271484375,
      "model_forward_time": 0.11473679542541504,
      "step": 48575
    },
    {
      "epoch": 0.000296478271484375,
      "step": 48575,
      "training_step_time": 0.40090179443359375
    },
    {
      "epoch": 0.000296484375,
      "model_forward_time": 0.11455893516540527,
      "step": 48576
    },
    {
      "epoch": 0.000296484375,
      "step": 48576,
      "training_step_time": 0.5690486431121826
    },
    {
      "epoch": 0.000296490478515625,
      "model_forward_time": 0.11510062217712402,
      "step": 48577
    },
    {
      "epoch": 0.000296490478515625,
      "step": 48577,
      "training_step_time": 0.3951437473297119
    },
    {
      "epoch": 0.00029649658203125,
      "model_forward_time": 0.11512112617492676,
      "step": 48578
    },
    {
      "epoch": 0.00029649658203125,
      "step": 48578,
      "training_step_time": 0.4221615791320801
    },
    {
      "epoch": 0.000296502685546875,
      "model_forward_time": 0.11469912528991699,
      "step": 48579
    },
    {
      "epoch": 0.000296502685546875,
      "step": 48579,
      "training_step_time": 0.46919918060302734
    },
    {
      "epoch": 0.0002965087890625,
      "grad_norm": 0.10742666572332382,
      "learning_rate": 9.581571022954988e-06,
      "loss": 0.0386,
      "step": 48580
    },
    {
      "epoch": 0.0002965087890625,
      "model_forward_time": 0.11460494995117188,
      "step": 48580
    },
    {
      "epoch": 0.0002965087890625,
      "step": 48580,
      "training_step_time": 0.41523027420043945
    },
    {
      "epoch": 0.000296514892578125,
      "model_forward_time": 0.11414599418640137,
      "step": 48581
    },
    {
      "epoch": 0.000296514892578125,
      "step": 48581,
      "training_step_time": 0.4845118522644043
    },
    {
      "epoch": 0.00029652099609375,
      "model_forward_time": 0.11461281776428223,
      "step": 48582
    },
    {
      "epoch": 0.00029652099609375,
      "step": 48582,
      "training_step_time": 0.43854188919067383
    },
    {
      "epoch": 0.000296527099609375,
      "model_forward_time": 0.11464858055114746,
      "step": 48583
    },
    {
      "epoch": 0.000296527099609375,
      "step": 48583,
      "training_step_time": 0.38974666595458984
    },
    {
      "epoch": 0.000296533203125,
      "model_forward_time": 0.11469149589538574,
      "step": 48584
    },
    {
      "epoch": 0.000296533203125,
      "step": 48584,
      "training_step_time": 0.3930470943450928
    },
    {
      "epoch": 0.000296539306640625,
      "model_forward_time": 0.11525082588195801,
      "step": 48585
    },
    {
      "epoch": 0.000296539306640625,
      "step": 48585,
      "training_step_time": 0.391190767288208
    },
    {
      "epoch": 0.00029654541015625,
      "model_forward_time": 0.11498284339904785,
      "step": 48586
    },
    {
      "epoch": 0.00029654541015625,
      "step": 48586,
      "training_step_time": 0.40216803550720215
    },
    {
      "epoch": 0.000296551513671875,
      "model_forward_time": 0.11547684669494629,
      "step": 48587
    },
    {
      "epoch": 0.000296551513671875,
      "step": 48587,
      "training_step_time": 0.39176154136657715
    },
    {
      "epoch": 0.0002965576171875,
      "model_forward_time": 0.1154170036315918,
      "step": 48588
    },
    {
      "epoch": 0.0002965576171875,
      "step": 48588,
      "training_step_time": 0.6371643543243408
    },
    {
      "epoch": 0.000296563720703125,
      "model_forward_time": 0.11481404304504395,
      "step": 48589
    },
    {
      "epoch": 0.000296563720703125,
      "step": 48589,
      "training_step_time": 0.39959716796875
    },
    {
      "epoch": 0.00029656982421875,
      "grad_norm": 0.134565532207489,
      "learning_rate": 9.56535451061496e-06,
      "loss": 0.0376,
      "step": 48590
    },
    {
      "epoch": 0.00029656982421875,
      "model_forward_time": 0.1146707534790039,
      "step": 48590
    },
    {
      "epoch": 0.00029656982421875,
      "step": 48590,
      "training_step_time": 0.38262271881103516
    },
    {
      "epoch": 0.000296575927734375,
      "model_forward_time": 0.11465215682983398,
      "step": 48591
    },
    {
      "epoch": 0.000296575927734375,
      "step": 48591,
      "training_step_time": 0.4054727554321289
    },
    {
      "epoch": 0.00029658203125,
      "model_forward_time": 0.11502814292907715,
      "step": 48592
    },
    {
      "epoch": 0.00029658203125,
      "step": 48592,
      "training_step_time": 0.4038698673248291
    },
    {
      "epoch": 0.000296588134765625,
      "model_forward_time": 0.1144402027130127,
      "step": 48593
    },
    {
      "epoch": 0.000296588134765625,
      "step": 48593,
      "training_step_time": 0.42158007621765137
    },
    {
      "epoch": 0.00029659423828125,
      "model_forward_time": 0.11550259590148926,
      "step": 48594
    },
    {
      "epoch": 0.00029659423828125,
      "step": 48594,
      "training_step_time": 0.4748814105987549
    },
    {
      "epoch": 0.000296600341796875,
      "model_forward_time": 0.11520767211914062,
      "step": 48595
    },
    {
      "epoch": 0.000296600341796875,
      "step": 48595,
      "training_step_time": 0.45970797538757324
    },
    {
      "epoch": 0.0002966064453125,
      "model_forward_time": 0.11511540412902832,
      "step": 48596
    },
    {
      "epoch": 0.0002966064453125,
      "step": 48596,
      "training_step_time": 0.40996551513671875
    },
    {
      "epoch": 0.000296612548828125,
      "model_forward_time": 0.1151268482208252,
      "step": 48597
    },
    {
      "epoch": 0.000296612548828125,
      "step": 48597,
      "training_step_time": 0.4366457462310791
    },
    {
      "epoch": 0.00029661865234375,
      "model_forward_time": 0.1155843734741211,
      "step": 48598
    },
    {
      "epoch": 0.00029661865234375,
      "step": 48598,
      "training_step_time": 0.3839719295501709
    },
    {
      "epoch": 0.000296624755859375,
      "model_forward_time": 0.11471199989318848,
      "step": 48599
    },
    {
      "epoch": 0.000296624755859375,
      "step": 48599,
      "training_step_time": 0.3763465881347656
    },
    {
      "epoch": 0.000296630859375,
      "grad_norm": 0.11888428032398224,
      "learning_rate": 9.549150281252633e-06,
      "loss": 0.0376,
      "step": 48600
    },
    {
      "epoch": 0.000296630859375,
      "model_forward_time": 0.11566376686096191,
      "step": 48600
    },
    {
      "epoch": 0.000296630859375,
      "step": 48600,
      "training_step_time": 0.5097324848175049
    },
    {
      "epoch": 0.000296636962890625,
      "model_forward_time": 0.11594891548156738,
      "step": 48601
    },
    {
      "epoch": 0.000296636962890625,
      "step": 48601,
      "training_step_time": 0.38896822929382324
    },
    {
      "epoch": 0.00029664306640625,
      "model_forward_time": 0.11569762229919434,
      "step": 48602
    },
    {
      "epoch": 0.00029664306640625,
      "step": 48602,
      "training_step_time": 0.38379859924316406
    },
    {
      "epoch": 0.000296649169921875,
      "model_forward_time": 0.11503458023071289,
      "step": 48603
    },
    {
      "epoch": 0.000296649169921875,
      "step": 48603,
      "training_step_time": 0.38808584213256836
    },
    {
      "epoch": 0.0002966552734375,
      "model_forward_time": 0.11525416374206543,
      "step": 48604
    },
    {
      "epoch": 0.0002966552734375,
      "step": 48604,
      "training_step_time": 0.3981800079345703
    },
    {
      "epoch": 0.000296661376953125,
      "model_forward_time": 0.11562776565551758,
      "step": 48605
    },
    {
      "epoch": 0.000296661376953125,
      "step": 48605,
      "training_step_time": 0.3913578987121582
    },
    {
      "epoch": 0.00029666748046875,
      "model_forward_time": 0.11455917358398438,
      "step": 48606
    },
    {
      "epoch": 0.00029666748046875,
      "step": 48606,
      "training_step_time": 0.7979981899261475
    },
    {
      "epoch": 0.000296673583984375,
      "model_forward_time": 0.11488008499145508,
      "step": 48607
    },
    {
      "epoch": 0.000296673583984375,
      "step": 48607,
      "training_step_time": 0.4014322757720947
    },
    {
      "epoch": 0.0002966796875,
      "model_forward_time": 0.11476898193359375,
      "step": 48608
    },
    {
      "epoch": 0.0002966796875,
      "step": 48608,
      "training_step_time": 0.42385411262512207
    },
    {
      "epoch": 0.000296685791015625,
      "model_forward_time": 0.11441516876220703,
      "step": 48609
    },
    {
      "epoch": 0.000296685791015625,
      "step": 48609,
      "training_step_time": 0.49788928031921387
    },
    {
      "epoch": 0.00029669189453125,
      "grad_norm": 0.11561247706413269,
      "learning_rate": 9.532958339790404e-06,
      "loss": 0.0405,
      "step": 48610
    },
    {
      "epoch": 0.00029669189453125,
      "model_forward_time": 0.11501288414001465,
      "step": 48610
    },
    {
      "epoch": 0.00029669189453125,
      "step": 48610,
      "training_step_time": 0.48029470443725586
    },
    {
      "epoch": 0.000296697998046875,
      "model_forward_time": 0.11418914794921875,
      "step": 48611
    },
    {
      "epoch": 0.000296697998046875,
      "step": 48611,
      "training_step_time": 0.40268373489379883
    },
    {
      "epoch": 0.0002967041015625,
      "model_forward_time": 0.11510300636291504,
      "step": 48612
    },
    {
      "epoch": 0.0002967041015625,
      "step": 48612,
      "training_step_time": 0.39351534843444824
    },
    {
      "epoch": 0.000296710205078125,
      "model_forward_time": 0.11566448211669922,
      "step": 48613
    },
    {
      "epoch": 0.000296710205078125,
      "step": 48613,
      "training_step_time": 0.3804340362548828
    },
    {
      "epoch": 0.00029671630859375,
      "model_forward_time": 0.11447930335998535,
      "step": 48614
    },
    {
      "epoch": 0.00029671630859375,
      "step": 48614,
      "training_step_time": 0.3914504051208496
    },
    {
      "epoch": 0.000296722412109375,
      "model_forward_time": 0.11510205268859863,
      "step": 48615
    },
    {
      "epoch": 0.000296722412109375,
      "step": 48615,
      "training_step_time": 0.39275670051574707
    },
    {
      "epoch": 0.000296728515625,
      "model_forward_time": 0.11504673957824707,
      "step": 48616
    },
    {
      "epoch": 0.000296728515625,
      "step": 48616,
      "training_step_time": 0.39118099212646484
    },
    {
      "epoch": 0.000296734619140625,
      "model_forward_time": 0.11583256721496582,
      "step": 48617
    },
    {
      "epoch": 0.000296734619140625,
      "step": 48617,
      "training_step_time": 0.9174182415008545
    },
    {
      "epoch": 0.00029674072265625,
      "model_forward_time": 0.11471247673034668,
      "step": 48618
    },
    {
      "epoch": 0.00029674072265625,
      "step": 48618,
      "training_step_time": 0.3873767852783203
    },
    {
      "epoch": 0.000296746826171875,
      "model_forward_time": 0.11462163925170898,
      "step": 48619
    },
    {
      "epoch": 0.000296746826171875,
      "step": 48619,
      "training_step_time": 0.38576674461364746
    },
    {
      "epoch": 0.0002967529296875,
      "grad_norm": 0.10553807765245438,
      "learning_rate": 9.51677869114696e-06,
      "loss": 0.036,
      "step": 48620
    },
    {
      "epoch": 0.0002967529296875,
      "model_forward_time": 0.11498188972473145,
      "step": 48620
    },
    {
      "epoch": 0.0002967529296875,
      "step": 48620,
      "training_step_time": 0.4393792152404785
    },
    {
      "epoch": 0.000296759033203125,
      "model_forward_time": 0.11474776268005371,
      "step": 48621
    },
    {
      "epoch": 0.000296759033203125,
      "step": 48621,
      "training_step_time": 0.4201791286468506
    },
    {
      "epoch": 0.00029676513671875,
      "model_forward_time": 0.1146857738494873,
      "step": 48622
    },
    {
      "epoch": 0.00029676513671875,
      "step": 48622,
      "training_step_time": 0.43761181831359863
    },
    {
      "epoch": 0.000296771240234375,
      "model_forward_time": 0.1145319938659668,
      "step": 48623
    },
    {
      "epoch": 0.000296771240234375,
      "step": 48623,
      "training_step_time": 0.6475775241851807
    },
    {
      "epoch": 0.00029677734375,
      "model_forward_time": 0.11461853981018066,
      "step": 48624
    },
    {
      "epoch": 0.00029677734375,
      "step": 48624,
      "training_step_time": 0.4039597511291504
    },
    {
      "epoch": 0.000296783447265625,
      "model_forward_time": 0.11505460739135742,
      "step": 48625
    },
    {
      "epoch": 0.000296783447265625,
      "step": 48625,
      "training_step_time": 0.3837738037109375
    },
    {
      "epoch": 0.00029678955078125,
      "model_forward_time": 0.11472702026367188,
      "step": 48626
    },
    {
      "epoch": 0.00029678955078125,
      "step": 48626,
      "training_step_time": 0.3840522766113281
    },
    {
      "epoch": 0.000296795654296875,
      "model_forward_time": 0.11497616767883301,
      "step": 48627
    },
    {
      "epoch": 0.000296795654296875,
      "step": 48627,
      "training_step_time": 0.38976550102233887
    },
    {
      "epoch": 0.0002968017578125,
      "model_forward_time": 0.11455678939819336,
      "step": 48628
    },
    {
      "epoch": 0.0002968017578125,
      "step": 48628,
      "training_step_time": 0.3847968578338623
    },
    {
      "epoch": 0.000296807861328125,
      "model_forward_time": 0.11519598960876465,
      "step": 48629
    },
    {
      "epoch": 0.000296807861328125,
      "step": 48629,
      "training_step_time": 0.8113367557525635
    },
    {
      "epoch": 0.00029681396484375,
      "grad_norm": 0.12154976278543472,
      "learning_rate": 9.500611340237258e-06,
      "loss": 0.0391,
      "step": 48630
    },
    {
      "epoch": 0.00029681396484375,
      "model_forward_time": 0.11476302146911621,
      "step": 48630
    },
    {
      "epoch": 0.00029681396484375,
      "step": 48630,
      "training_step_time": 0.40623974800109863
    },
    {
      "epoch": 0.000296820068359375,
      "model_forward_time": 0.11455559730529785,
      "step": 48631
    },
    {
      "epoch": 0.000296820068359375,
      "step": 48631,
      "training_step_time": 0.3896026611328125
    },
    {
      "epoch": 0.000296826171875,
      "model_forward_time": 0.11476349830627441,
      "step": 48632
    },
    {
      "epoch": 0.000296826171875,
      "step": 48632,
      "training_step_time": 0.3779444694519043
    },
    {
      "epoch": 0.000296832275390625,
      "model_forward_time": 0.11429715156555176,
      "step": 48633
    },
    {
      "epoch": 0.000296832275390625,
      "step": 48633,
      "training_step_time": 0.45057177543640137
    },
    {
      "epoch": 0.00029683837890625,
      "model_forward_time": 0.1142730712890625,
      "step": 48634
    },
    {
      "epoch": 0.00029683837890625,
      "step": 48634,
      "training_step_time": 0.4328620433807373
    },
    {
      "epoch": 0.000296844482421875,
      "model_forward_time": 0.11503338813781738,
      "step": 48635
    },
    {
      "epoch": 0.000296844482421875,
      "step": 48635,
      "training_step_time": 0.43372011184692383
    },
    {
      "epoch": 0.0002968505859375,
      "model_forward_time": 0.1158597469329834,
      "step": 48636
    },
    {
      "epoch": 0.0002968505859375,
      "step": 48636,
      "training_step_time": 0.5005536079406738
    },
    {
      "epoch": 0.000296856689453125,
      "model_forward_time": 0.11612248420715332,
      "step": 48637
    },
    {
      "epoch": 0.000296856689453125,
      "step": 48637,
      "training_step_time": 0.4249134063720703
    },
    {
      "epoch": 0.00029686279296875,
      "model_forward_time": 0.11610102653503418,
      "step": 48638
    },
    {
      "epoch": 0.00029686279296875,
      "step": 48638,
      "training_step_time": 0.4307565689086914
    },
    {
      "epoch": 0.000296868896484375,
      "model_forward_time": 0.11510324478149414,
      "step": 48639
    },
    {
      "epoch": 0.000296868896484375,
      "step": 48639,
      "training_step_time": 0.394606351852417
    },
    {
      "epoch": 0.000296875,
      "grad_norm": 0.1326887458562851,
      "learning_rate": 9.484456291972487e-06,
      "loss": 0.0345,
      "step": 48640
    },
    {
      "epoch": 0.000296875,
      "model_forward_time": 0.1148080825805664,
      "step": 48640
    },
    {
      "epoch": 0.000296875,
      "step": 48640,
      "training_step_time": 0.3830139636993408
    },
    {
      "epoch": 0.000296881103515625,
      "model_forward_time": 0.1150665283203125,
      "step": 48641
    },
    {
      "epoch": 0.000296881103515625,
      "step": 48641,
      "training_step_time": 0.6835882663726807
    },
    {
      "epoch": 0.00029688720703125,
      "model_forward_time": 0.11456131935119629,
      "step": 48642
    },
    {
      "epoch": 0.00029688720703125,
      "step": 48642,
      "training_step_time": 0.3923768997192383
    },
    {
      "epoch": 0.000296893310546875,
      "model_forward_time": 0.11490058898925781,
      "step": 48643
    },
    {
      "epoch": 0.000296893310546875,
      "step": 48643,
      "training_step_time": 0.4170651435852051
    },
    {
      "epoch": 0.0002968994140625,
      "model_forward_time": 0.11504697799682617,
      "step": 48644
    },
    {
      "epoch": 0.0002968994140625,
      "step": 48644,
      "training_step_time": 0.381544828414917
    },
    {
      "epoch": 0.000296905517578125,
      "model_forward_time": 0.11472439765930176,
      "step": 48645
    },
    {
      "epoch": 0.000296905517578125,
      "step": 48645,
      "training_step_time": 0.38997530937194824
    },
    {
      "epoch": 0.00029691162109375,
      "model_forward_time": 0.11493897438049316,
      "step": 48646
    },
    {
      "epoch": 0.00029691162109375,
      "step": 48646,
      "training_step_time": 0.397327184677124
    },
    {
      "epoch": 0.000296917724609375,
      "model_forward_time": 0.11473917961120605,
      "step": 48647
    },
    {
      "epoch": 0.000296917724609375,
      "step": 48647,
      "training_step_time": 0.4413902759552002
    },
    {
      "epoch": 0.000296923828125,
      "model_forward_time": 0.1151728630065918,
      "step": 48648
    },
    {
      "epoch": 0.000296923828125,
      "step": 48648,
      "training_step_time": 0.4974229335784912
    },
    {
      "epoch": 0.000296929931640625,
      "model_forward_time": 0.11525201797485352,
      "step": 48649
    },
    {
      "epoch": 0.000296929931640625,
      "step": 48649,
      "training_step_time": 0.39316415786743164
    },
    {
      "epoch": 0.00029693603515625,
      "grad_norm": 0.08735667169094086,
      "learning_rate": 9.468313551260161e-06,
      "loss": 0.0317,
      "step": 48650
    },
    {
      "epoch": 0.00029693603515625,
      "model_forward_time": 0.1155853271484375,
      "step": 48650
    },
    {
      "epoch": 0.00029693603515625,
      "step": 48650,
      "training_step_time": 0.4309849739074707
    },
    {
      "epoch": 0.000296942138671875,
      "model_forward_time": 0.11507177352905273,
      "step": 48651
    },
    {
      "epoch": 0.000296942138671875,
      "step": 48651,
      "training_step_time": 0.44147372245788574
    },
    {
      "epoch": 0.0002969482421875,
      "model_forward_time": 0.11526632308959961,
      "step": 48652
    },
    {
      "epoch": 0.0002969482421875,
      "step": 48652,
      "training_step_time": 0.4906442165374756
    },
    {
      "epoch": 0.000296954345703125,
      "model_forward_time": 0.11441683769226074,
      "step": 48653
    },
    {
      "epoch": 0.000296954345703125,
      "step": 48653,
      "training_step_time": 0.3908576965332031
    },
    {
      "epoch": 0.00029696044921875,
      "model_forward_time": 0.11543798446655273,
      "step": 48654
    },
    {
      "epoch": 0.00029696044921875,
      "step": 48654,
      "training_step_time": 0.3936271667480469
    },
    {
      "epoch": 0.000296966552734375,
      "model_forward_time": 0.11496186256408691,
      "step": 48655
    },
    {
      "epoch": 0.000296966552734375,
      "step": 48655,
      "training_step_time": 0.38976073265075684
    },
    {
      "epoch": 0.00029697265625,
      "model_forward_time": 0.11532330513000488,
      "step": 48656
    },
    {
      "epoch": 0.00029697265625,
      "step": 48656,
      "training_step_time": 0.3962070941925049
    },
    {
      "epoch": 0.000296978759765625,
      "model_forward_time": 0.11756730079650879,
      "step": 48657
    },
    {
      "epoch": 0.000296978759765625,
      "step": 48657,
      "training_step_time": 0.4141814708709717
    },
    {
      "epoch": 0.00029698486328125,
      "model_forward_time": 0.11592674255371094,
      "step": 48658
    },
    {
      "epoch": 0.00029698486328125,
      "step": 48658,
      "training_step_time": 0.38051795959472656
    },
    {
      "epoch": 0.000296990966796875,
      "model_forward_time": 0.11908912658691406,
      "step": 48659
    },
    {
      "epoch": 0.000296990966796875,
      "step": 48659,
      "training_step_time": 0.39897871017456055
    },
    {
      "epoch": 0.0002969970703125,
      "grad_norm": 0.1088559702038765,
      "learning_rate": 9.452183123004e-06,
      "loss": 0.0359,
      "step": 48660
    },
    {
      "epoch": 0.0002969970703125,
      "model_forward_time": 0.11480355262756348,
      "step": 48660
    },
    {
      "epoch": 0.0002969970703125,
      "step": 48660,
      "training_step_time": 0.40878772735595703
    },
    {
      "epoch": 0.000297003173828125,
      "model_forward_time": 0.11548423767089844,
      "step": 48661
    },
    {
      "epoch": 0.000297003173828125,
      "step": 48661,
      "training_step_time": 0.3937861919403076
    },
    {
      "epoch": 0.00029700927734375,
      "model_forward_time": 0.11462140083312988,
      "step": 48662
    },
    {
      "epoch": 0.00029700927734375,
      "step": 48662,
      "training_step_time": 0.44419169425964355
    },
    {
      "epoch": 0.000297015380859375,
      "model_forward_time": 0.11476373672485352,
      "step": 48663
    },
    {
      "epoch": 0.000297015380859375,
      "step": 48663,
      "training_step_time": 0.3771216869354248
    },
    {
      "epoch": 0.000297021484375,
      "model_forward_time": 0.11536073684692383,
      "step": 48664
    },
    {
      "epoch": 0.000297021484375,
      "step": 48664,
      "training_step_time": 0.4438471794128418
    },
    {
      "epoch": 0.000297027587890625,
      "model_forward_time": 0.11527109146118164,
      "step": 48665
    },
    {
      "epoch": 0.000297027587890625,
      "step": 48665,
      "training_step_time": 0.46309900283813477
    },
    {
      "epoch": 0.00029703369140625,
      "model_forward_time": 0.11605143547058105,
      "step": 48666
    },
    {
      "epoch": 0.00029703369140625,
      "step": 48666,
      "training_step_time": 0.4427211284637451
    },
    {
      "epoch": 0.000297039794921875,
      "model_forward_time": 0.11503291130065918,
      "step": 48667
    },
    {
      "epoch": 0.000297039794921875,
      "step": 48667,
      "training_step_time": 0.4545326232910156
    },
    {
      "epoch": 0.0002970458984375,
      "model_forward_time": 0.1145477294921875,
      "step": 48668
    },
    {
      "epoch": 0.0002970458984375,
      "step": 48668,
      "training_step_time": 0.3923153877258301
    },
    {
      "epoch": 0.000297052001953125,
      "model_forward_time": 0.11478233337402344,
      "step": 48669
    },
    {
      "epoch": 0.000297052001953125,
      "step": 48669,
      "training_step_time": 0.3828442096710205
    },
    {
      "epoch": 0.00029705810546875,
      "grad_norm": 0.1022518053650856,
      "learning_rate": 9.436065012104001e-06,
      "loss": 0.0357,
      "step": 48670
    },
    {
      "epoch": 0.00029705810546875,
      "model_forward_time": 0.11542177200317383,
      "step": 48670
    },
    {
      "epoch": 0.00029705810546875,
      "step": 48670,
      "training_step_time": 0.4024045467376709
    },
    {
      "epoch": 0.000297064208984375,
      "model_forward_time": 0.11623358726501465,
      "step": 48671
    },
    {
      "epoch": 0.000297064208984375,
      "step": 48671,
      "training_step_time": 0.40737056732177734
    },
    {
      "epoch": 0.0002970703125,
      "model_forward_time": 0.11501049995422363,
      "step": 48672
    },
    {
      "epoch": 0.0002970703125,
      "step": 48672,
      "training_step_time": 0.40366244316101074
    },
    {
      "epoch": 0.000297076416015625,
      "model_forward_time": 0.11557888984680176,
      "step": 48673
    },
    {
      "epoch": 0.000297076416015625,
      "step": 48673,
      "training_step_time": 0.40230369567871094
    },
    {
      "epoch": 0.00029708251953125,
      "model_forward_time": 0.11567926406860352,
      "step": 48674
    },
    {
      "epoch": 0.00029708251953125,
      "step": 48674,
      "training_step_time": 0.39486145973205566
    },
    {
      "epoch": 0.000297088623046875,
      "model_forward_time": 0.1144111156463623,
      "step": 48675
    },
    {
      "epoch": 0.000297088623046875,
      "step": 48675,
      "training_step_time": 0.38506174087524414
    },
    {
      "epoch": 0.0002970947265625,
      "model_forward_time": 0.1148831844329834,
      "step": 48676
    },
    {
      "epoch": 0.0002970947265625,
      "step": 48676,
      "training_step_time": 0.3980751037597656
    },
    {
      "epoch": 0.000297100830078125,
      "model_forward_time": 0.11581277847290039,
      "step": 48677
    },
    {
      "epoch": 0.000297100830078125,
      "step": 48677,
      "training_step_time": 0.45750927925109863
    },
    {
      "epoch": 0.00029710693359375,
      "model_forward_time": 0.11619400978088379,
      "step": 48678
    },
    {
      "epoch": 0.00029710693359375,
      "step": 48678,
      "training_step_time": 0.6906709671020508
    },
    {
      "epoch": 0.000297113037109375,
      "model_forward_time": 0.11481046676635742,
      "step": 48679
    },
    {
      "epoch": 0.000297113037109375,
      "step": 48679,
      "training_step_time": 0.49256134033203125
    },
    {
      "epoch": 0.000297119140625,
      "grad_norm": 0.11681334674358368,
      "learning_rate": 9.41995922345642e-06,
      "loss": 0.0395,
      "step": 48680
    },
    {
      "epoch": 0.000297119140625,
      "model_forward_time": 0.11464715003967285,
      "step": 48680
    },
    {
      "epoch": 0.000297119140625,
      "step": 48680,
      "training_step_time": 0.5050160884857178
    },
    {
      "epoch": 0.000297125244140625,
      "model_forward_time": 0.1145322322845459,
      "step": 48681
    },
    {
      "epoch": 0.000297125244140625,
      "step": 48681,
      "training_step_time": 0.3959803581237793
    },
    {
      "epoch": 0.00029713134765625,
      "model_forward_time": 0.1147458553314209,
      "step": 48682
    },
    {
      "epoch": 0.00029713134765625,
      "step": 48682,
      "training_step_time": 0.38420963287353516
    },
    {
      "epoch": 0.000297137451171875,
      "model_forward_time": 0.11458826065063477,
      "step": 48683
    },
    {
      "epoch": 0.000297137451171875,
      "step": 48683,
      "training_step_time": 0.4548509120941162
    },
    {
      "epoch": 0.0002971435546875,
      "model_forward_time": 0.11510753631591797,
      "step": 48684
    },
    {
      "epoch": 0.0002971435546875,
      "step": 48684,
      "training_step_time": 0.4121849536895752
    },
    {
      "epoch": 0.000297149658203125,
      "model_forward_time": 0.11492490768432617,
      "step": 48685
    },
    {
      "epoch": 0.000297149658203125,
      "step": 48685,
      "training_step_time": 0.3919990062713623
    },
    {
      "epoch": 0.00029715576171875,
      "model_forward_time": 0.11514735221862793,
      "step": 48686
    },
    {
      "epoch": 0.00029715576171875,
      "step": 48686,
      "training_step_time": 0.3881251811981201
    },
    {
      "epoch": 0.000297161865234375,
      "model_forward_time": 0.11536979675292969,
      "step": 48687
    },
    {
      "epoch": 0.000297161865234375,
      "step": 48687,
      "training_step_time": 0.3973352909088135
    },
    {
      "epoch": 0.00029716796875,
      "model_forward_time": 0.1155538558959961,
      "step": 48688
    },
    {
      "epoch": 0.00029716796875,
      "step": 48688,
      "training_step_time": 0.39321398735046387
    },
    {
      "epoch": 0.000297174072265625,
      "model_forward_time": 0.11602306365966797,
      "step": 48689
    },
    {
      "epoch": 0.000297174072265625,
      "step": 48689,
      "training_step_time": 1.1321609020233154
    },
    {
      "epoch": 0.00029718017578125,
      "grad_norm": 0.10340680927038193,
      "learning_rate": 9.403865761953779e-06,
      "loss": 0.036,
      "step": 48690
    },
    {
      "epoch": 0.00029718017578125,
      "model_forward_time": 0.11443781852722168,
      "step": 48690
    },
    {
      "epoch": 0.00029718017578125,
      "step": 48690,
      "training_step_time": 0.41734933853149414
    },
    {
      "epoch": 0.000297186279296875,
      "model_forward_time": 0.11408329010009766,
      "step": 48691
    },
    {
      "epoch": 0.000297186279296875,
      "step": 48691,
      "training_step_time": 0.385387659072876
    },
    {
      "epoch": 0.0002971923828125,
      "model_forward_time": 0.11444568634033203,
      "step": 48692
    },
    {
      "epoch": 0.0002971923828125,
      "step": 48692,
      "training_step_time": 0.450162410736084
    },
    {
      "epoch": 0.000297198486328125,
      "model_forward_time": 0.11455440521240234,
      "step": 48693
    },
    {
      "epoch": 0.000297198486328125,
      "step": 48693,
      "training_step_time": 0.4589681625366211
    },
    {
      "epoch": 0.00029720458984375,
      "model_forward_time": 0.11478614807128906,
      "step": 48694
    },
    {
      "epoch": 0.00029720458984375,
      "step": 48694,
      "training_step_time": 0.3832559585571289
    },
    {
      "epoch": 0.000297210693359375,
      "model_forward_time": 0.1152334213256836,
      "step": 48695
    },
    {
      "epoch": 0.000297210693359375,
      "step": 48695,
      "training_step_time": 0.4730041027069092
    },
    {
      "epoch": 0.000297216796875,
      "model_forward_time": 0.11514091491699219,
      "step": 48696
    },
    {
      "epoch": 0.000297216796875,
      "step": 48696,
      "training_step_time": 0.39052700996398926
    },
    {
      "epoch": 0.000297222900390625,
      "model_forward_time": 0.11564040184020996,
      "step": 48697
    },
    {
      "epoch": 0.000297222900390625,
      "step": 48697,
      "training_step_time": 0.38413548469543457
    },
    {
      "epoch": 0.00029722900390625,
      "model_forward_time": 0.11539745330810547,
      "step": 48698
    },
    {
      "epoch": 0.00029722900390625,
      "step": 48698,
      "training_step_time": 0.38818955421447754
    },
    {
      "epoch": 0.000297235107421875,
      "model_forward_time": 0.11472225189208984,
      "step": 48699
    },
    {
      "epoch": 0.000297235107421875,
      "step": 48699,
      "training_step_time": 0.3777644634246826
    },
    {
      "epoch": 0.0002972412109375,
      "grad_norm": 0.0956568568944931,
      "learning_rate": 9.387784632484826e-06,
      "loss": 0.0374,
      "step": 48700
    },
    {
      "epoch": 0.0002972412109375,
      "model_forward_time": 0.11524224281311035,
      "step": 48700
    },
    {
      "epoch": 0.0002972412109375,
      "step": 48700,
      "training_step_time": 0.38234877586364746
    },
    {
      "epoch": 0.000297247314453125,
      "model_forward_time": 0.11497735977172852,
      "step": 48701
    },
    {
      "epoch": 0.000297247314453125,
      "step": 48701,
      "training_step_time": 0.8662159442901611
    },
    {
      "epoch": 0.00029725341796875,
      "model_forward_time": 0.11427831649780273,
      "step": 48702
    },
    {
      "epoch": 0.00029725341796875,
      "step": 48702,
      "training_step_time": 0.41975951194763184
    },
    {
      "epoch": 0.000297259521484375,
      "model_forward_time": 0.11546683311462402,
      "step": 48703
    },
    {
      "epoch": 0.000297259521484375,
      "step": 48703,
      "training_step_time": 0.45356273651123047
    },
    {
      "epoch": 0.000297265625,
      "model_forward_time": 0.11450028419494629,
      "step": 48704
    },
    {
      "epoch": 0.000297265625,
      "step": 48704,
      "training_step_time": 0.4169590473175049
    },
    {
      "epoch": 0.000297271728515625,
      "model_forward_time": 0.11466574668884277,
      "step": 48705
    },
    {
      "epoch": 0.000297271728515625,
      "step": 48705,
      "training_step_time": 0.41072797775268555
    },
    {
      "epoch": 0.00029727783203125,
      "model_forward_time": 0.11490035057067871,
      "step": 48706
    },
    {
      "epoch": 0.00029727783203125,
      "step": 48706,
      "training_step_time": 0.39864087104797363
    },
    {
      "epoch": 0.000297283935546875,
      "model_forward_time": 0.1146249771118164,
      "step": 48707
    },
    {
      "epoch": 0.000297283935546875,
      "step": 48707,
      "training_step_time": 0.47956085205078125
    },
    {
      "epoch": 0.0002972900390625,
      "model_forward_time": 0.1146535873413086,
      "step": 48708
    },
    {
      "epoch": 0.0002972900390625,
      "step": 48708,
      "training_step_time": 0.4259312152862549
    },
    {
      "epoch": 0.000297296142578125,
      "model_forward_time": 0.11638617515563965,
      "step": 48709
    },
    {
      "epoch": 0.000297296142578125,
      "step": 48709,
      "training_step_time": 0.3945956230163574
    },
    {
      "epoch": 0.00029730224609375,
      "grad_norm": 0.0969846248626709,
      "learning_rate": 9.371715839934625e-06,
      "loss": 0.0384,
      "step": 48710
    },
    {
      "epoch": 0.00029730224609375,
      "model_forward_time": 0.11492729187011719,
      "step": 48710
    },
    {
      "epoch": 0.00029730224609375,
      "step": 48710,
      "training_step_time": 0.40058255195617676
    },
    {
      "epoch": 0.000297308349609375,
      "model_forward_time": 0.11609435081481934,
      "step": 48711
    },
    {
      "epoch": 0.000297308349609375,
      "step": 48711,
      "training_step_time": 0.3963747024536133
    },
    {
      "epoch": 0.000297314453125,
      "model_forward_time": 0.11541581153869629,
      "step": 48712
    },
    {
      "epoch": 0.000297314453125,
      "step": 48712,
      "training_step_time": 0.3961789608001709
    },
    {
      "epoch": 0.000297320556640625,
      "model_forward_time": 0.11550188064575195,
      "step": 48713
    },
    {
      "epoch": 0.000297320556640625,
      "step": 48713,
      "training_step_time": 0.7807872295379639
    },
    {
      "epoch": 0.00029732666015625,
      "model_forward_time": 0.1150977611541748,
      "step": 48714
    },
    {
      "epoch": 0.00029732666015625,
      "step": 48714,
      "training_step_time": 0.3940279483795166
    },
    {
      "epoch": 0.000297332763671875,
      "model_forward_time": 0.11473703384399414,
      "step": 48715
    },
    {
      "epoch": 0.000297332763671875,
      "step": 48715,
      "training_step_time": 0.4805338382720947
    },
    {
      "epoch": 0.0002973388671875,
      "model_forward_time": 0.1143956184387207,
      "step": 48716
    },
    {
      "epoch": 0.0002973388671875,
      "step": 48716,
      "training_step_time": 0.43727970123291016
    },
    {
      "epoch": 0.000297344970703125,
      "model_forward_time": 0.11507463455200195,
      "step": 48717
    },
    {
      "epoch": 0.000297344970703125,
      "step": 48717,
      "training_step_time": 0.3858020305633545
    },
    {
      "epoch": 0.00029735107421875,
      "model_forward_time": 0.11472964286804199,
      "step": 48718
    },
    {
      "epoch": 0.00029735107421875,
      "step": 48718,
      "training_step_time": 0.4822971820831299
    },
    {
      "epoch": 0.000297357177734375,
      "model_forward_time": 0.11467123031616211,
      "step": 48719
    },
    {
      "epoch": 0.000297357177734375,
      "step": 48719,
      "training_step_time": 0.49788403511047363
    },
    {
      "epoch": 0.00029736328125,
      "grad_norm": 0.07690149545669556,
      "learning_rate": 9.355659389184396e-06,
      "loss": 0.0358,
      "step": 48720
    },
    {
      "epoch": 0.00029736328125,
      "model_forward_time": 0.1146852970123291,
      "step": 48720
    },
    {
      "epoch": 0.00029736328125,
      "step": 48720,
      "training_step_time": 0.44043946266174316
    },
    {
      "epoch": 0.000297369384765625,
      "model_forward_time": 0.11480951309204102,
      "step": 48721
    },
    {
      "epoch": 0.000297369384765625,
      "step": 48721,
      "training_step_time": 0.4620482921600342
    },
    {
      "epoch": 0.00029737548828125,
      "model_forward_time": 0.11521172523498535,
      "step": 48722
    },
    {
      "epoch": 0.00029737548828125,
      "step": 48722,
      "training_step_time": 0.4099712371826172
    },
    {
      "epoch": 0.000297381591796875,
      "model_forward_time": 0.11467885971069336,
      "step": 48723
    },
    {
      "epoch": 0.000297381591796875,
      "step": 48723,
      "training_step_time": 0.38488101959228516
    },
    {
      "epoch": 0.0002973876953125,
      "model_forward_time": 0.11542677879333496,
      "step": 48724
    },
    {
      "epoch": 0.0002973876953125,
      "step": 48724,
      "training_step_time": 0.3853011131286621
    },
    {
      "epoch": 0.000297393798828125,
      "model_forward_time": 0.11476349830627441,
      "step": 48725
    },
    {
      "epoch": 0.000297393798828125,
      "step": 48725,
      "training_step_time": 0.3957674503326416
    },
    {
      "epoch": 0.00029739990234375,
      "model_forward_time": 0.1145162582397461,
      "step": 48726
    },
    {
      "epoch": 0.00029739990234375,
      "step": 48726,
      "training_step_time": 0.41853952407836914
    },
    {
      "epoch": 0.000297406005859375,
      "model_forward_time": 0.11527132987976074,
      "step": 48727
    },
    {
      "epoch": 0.000297406005859375,
      "step": 48727,
      "training_step_time": 0.39073848724365234
    },
    {
      "epoch": 0.000297412109375,
      "model_forward_time": 0.11548018455505371,
      "step": 48728
    },
    {
      "epoch": 0.000297412109375,
      "step": 48728,
      "training_step_time": 0.3995687961578369
    },
    {
      "epoch": 0.000297418212890625,
      "model_forward_time": 0.11601424217224121,
      "step": 48729
    },
    {
      "epoch": 0.000297418212890625,
      "step": 48729,
      "training_step_time": 0.4337804317474365
    },
    {
      "epoch": 0.00029742431640625,
      "grad_norm": 0.11604578047990799,
      "learning_rate": 9.33961528511172e-06,
      "loss": 0.0364,
      "step": 48730
    },
    {
      "epoch": 0.00029742431640625,
      "model_forward_time": 0.11532092094421387,
      "step": 48730
    },
    {
      "epoch": 0.00029742431640625,
      "step": 48730,
      "training_step_time": 0.45203685760498047
    },
    {
      "epoch": 0.000297430419921875,
      "model_forward_time": 0.11526989936828613,
      "step": 48731
    },
    {
      "epoch": 0.000297430419921875,
      "step": 48731,
      "training_step_time": 0.5146725177764893
    },
    {
      "epoch": 0.0002974365234375,
      "model_forward_time": 0.11535143852233887,
      "step": 48732
    },
    {
      "epoch": 0.0002974365234375,
      "step": 48732,
      "training_step_time": 0.4457113742828369
    },
    {
      "epoch": 0.000297442626953125,
      "model_forward_time": 0.11528611183166504,
      "step": 48733
    },
    {
      "epoch": 0.000297442626953125,
      "step": 48733,
      "training_step_time": 0.4972670078277588
    },
    {
      "epoch": 0.00029744873046875,
      "model_forward_time": 0.11584234237670898,
      "step": 48734
    },
    {
      "epoch": 0.00029744873046875,
      "step": 48734,
      "training_step_time": 0.416827917098999
    },
    {
      "epoch": 0.000297454833984375,
      "model_forward_time": 0.11542463302612305,
      "step": 48735
    },
    {
      "epoch": 0.000297454833984375,
      "step": 48735,
      "training_step_time": 0.42139124870300293
    },
    {
      "epoch": 0.0002974609375,
      "model_forward_time": 0.11492729187011719,
      "step": 48736
    },
    {
      "epoch": 0.0002974609375,
      "step": 48736,
      "training_step_time": 0.3966352939605713
    },
    {
      "epoch": 0.000297467041015625,
      "model_forward_time": 0.11449885368347168,
      "step": 48737
    },
    {
      "epoch": 0.000297467041015625,
      "step": 48737,
      "training_step_time": 0.6490607261657715
    },
    {
      "epoch": 0.00029747314453125,
      "model_forward_time": 0.11623835563659668,
      "step": 48738
    },
    {
      "epoch": 0.00029747314453125,
      "step": 48738,
      "training_step_time": 0.4163494110107422
    },
    {
      "epoch": 0.000297479248046875,
      "model_forward_time": 0.11842179298400879,
      "step": 48739
    },
    {
      "epoch": 0.000297479248046875,
      "step": 48739,
      "training_step_time": 0.44820618629455566
    },
    {
      "epoch": 0.0002974853515625,
      "grad_norm": 0.09559701383113861,
      "learning_rate": 9.32358353259032e-06,
      "loss": 0.0335,
      "step": 48740
    },
    {
      "epoch": 0.0002974853515625,
      "model_forward_time": 0.12014269828796387,
      "step": 48740
    },
    {
      "epoch": 0.0002974853515625,
      "step": 48740,
      "training_step_time": 0.5609667301177979
    },
    {
      "epoch": 0.000297491455078125,
      "model_forward_time": 0.1204524040222168,
      "step": 48741
    },
    {
      "epoch": 0.000297491455078125,
      "step": 48741,
      "training_step_time": 0.5868422985076904
    },
    {
      "epoch": 0.00029749755859375,
      "model_forward_time": 0.118133544921875,
      "step": 48742
    },
    {
      "epoch": 0.00029749755859375,
      "step": 48742,
      "training_step_time": 0.7164812088012695
    },
    {
      "epoch": 0.000297503662109375,
      "model_forward_time": 0.11731600761413574,
      "step": 48743
    },
    {
      "epoch": 0.000297503662109375,
      "step": 48743,
      "training_step_time": 0.7917070388793945
    },
    {
      "epoch": 0.000297509765625,
      "model_forward_time": 0.12745165824890137,
      "step": 48744
    },
    {
      "epoch": 0.000297509765625,
      "step": 48744,
      "training_step_time": 0.6939389705657959
    },
    {
      "epoch": 0.000297515869140625,
      "model_forward_time": 0.11993622779846191,
      "step": 48745
    },
    {
      "epoch": 0.000297515869140625,
      "step": 48745,
      "training_step_time": 0.7415685653686523
    },
    {
      "epoch": 0.00029752197265625,
      "model_forward_time": 0.11738896369934082,
      "step": 48746
    },
    {
      "epoch": 0.00029752197265625,
      "step": 48746,
      "training_step_time": 0.6899044513702393
    },
    {
      "epoch": 0.000297528076171875,
      "model_forward_time": 0.1269845962524414,
      "step": 48747
    },
    {
      "epoch": 0.000297528076171875,
      "step": 48747,
      "training_step_time": 0.6950099468231201
    },
    {
      "epoch": 0.0002975341796875,
      "model_forward_time": 0.11979532241821289,
      "step": 48748
    },
    {
      "epoch": 0.0002975341796875,
      "step": 48748,
      "training_step_time": 0.6406669616699219
    },
    {
      "epoch": 0.000297540283203125,
      "model_forward_time": 0.11676549911499023,
      "step": 48749
    },
    {
      "epoch": 0.000297540283203125,
      "step": 48749,
      "training_step_time": 0.6506741046905518
    },
    {
      "epoch": 0.00029754638671875,
      "grad_norm": 0.10035450756549835,
      "learning_rate": 9.307564136490254e-06,
      "loss": 0.0334,
      "step": 48750
    },
    {
      "epoch": 0.00029754638671875,
      "model_forward_time": 0.12091875076293945,
      "step": 48750
    },
    {
      "epoch": 0.00029754638671875,
      "step": 48750,
      "training_step_time": 0.657214879989624
    },
    {
      "epoch": 0.000297552490234375,
      "model_forward_time": 0.1216428279876709,
      "step": 48751
    },
    {
      "epoch": 0.000297552490234375,
      "step": 48751,
      "training_step_time": 0.7531986236572266
    },
    {
      "epoch": 0.00029755859375,
      "model_forward_time": 0.11841678619384766,
      "step": 48752
    },
    {
      "epoch": 0.00029755859375,
      "step": 48752,
      "training_step_time": 0.7428989410400391
    },
    {
      "epoch": 0.000297564697265625,
      "model_forward_time": 0.12768840789794922,
      "step": 48753
    },
    {
      "epoch": 0.000297564697265625,
      "step": 48753,
      "training_step_time": 0.6959166526794434
    },
    {
      "epoch": 0.00029757080078125,
      "model_forward_time": 0.11684131622314453,
      "step": 48754
    },
    {
      "epoch": 0.00029757080078125,
      "step": 48754,
      "training_step_time": 0.7322835922241211
    },
    {
      "epoch": 0.000297576904296875,
      "model_forward_time": 0.11886191368103027,
      "step": 48755
    },
    {
      "epoch": 0.000297576904296875,
      "step": 48755,
      "training_step_time": 0.6728019714355469
    },
    {
      "epoch": 0.0002975830078125,
      "model_forward_time": 0.12507319450378418,
      "step": 48756
    },
    {
      "epoch": 0.0002975830078125,
      "step": 48756,
      "training_step_time": 0.7256855964660645
    },
    {
      "epoch": 0.000297589111328125,
      "model_forward_time": 0.11686944961547852,
      "step": 48757
    },
    {
      "epoch": 0.000297589111328125,
      "step": 48757,
      "training_step_time": 0.6809060573577881
    },
    {
      "epoch": 0.00029759521484375,
      "model_forward_time": 0.12109088897705078,
      "step": 48758
    },
    {
      "epoch": 0.00029759521484375,
      "step": 48758,
      "training_step_time": 0.6853060722351074
    },
    {
      "epoch": 0.000297601318359375,
      "model_forward_time": 0.11979913711547852,
      "step": 48759
    },
    {
      "epoch": 0.000297601318359375,
      "step": 48759,
      "training_step_time": 0.6983358860015869
    },
    {
      "epoch": 0.000297607421875,
      "grad_norm": 0.11200558394193649,
      "learning_rate": 9.291557101677784e-06,
      "loss": 0.0393,
      "step": 48760
    },
    {
      "epoch": 0.000297607421875,
      "model_forward_time": 0.12294125556945801,
      "step": 48760
    },
    {
      "epoch": 0.000297607421875,
      "step": 48760,
      "training_step_time": 0.7212245464324951
    },
    {
      "epoch": 0.000297613525390625,
      "model_forward_time": 0.13384175300598145,
      "step": 48761
    },
    {
      "epoch": 0.000297613525390625,
      "step": 48761,
      "training_step_time": 0.7012238502502441
    },
    {
      "epoch": 0.00029761962890625,
      "model_forward_time": 0.11792516708374023,
      "step": 48762
    },
    {
      "epoch": 0.00029761962890625,
      "step": 48762,
      "training_step_time": 0.6028058528900146
    },
    {
      "epoch": 0.000297625732421875,
      "model_forward_time": 0.12060856819152832,
      "step": 48763
    },
    {
      "epoch": 0.000297625732421875,
      "step": 48763,
      "training_step_time": 0.6845829486846924
    },
    {
      "epoch": 0.0002976318359375,
      "model_forward_time": 0.12009787559509277,
      "step": 48764
    },
    {
      "epoch": 0.0002976318359375,
      "step": 48764,
      "training_step_time": 0.6879911422729492
    },
    {
      "epoch": 0.000297637939453125,
      "model_forward_time": 0.12092113494873047,
      "step": 48765
    },
    {
      "epoch": 0.000297637939453125,
      "step": 48765,
      "training_step_time": 0.6179497241973877
    },
    {
      "epoch": 0.00029764404296875,
      "model_forward_time": 0.12368226051330566,
      "step": 48766
    },
    {
      "epoch": 0.00029764404296875,
      "step": 48766,
      "training_step_time": 0.5862472057342529
    },
    {
      "epoch": 0.000297650146484375,
      "model_forward_time": 0.12517213821411133,
      "step": 48767
    },
    {
      "epoch": 0.000297650146484375,
      "step": 48767,
      "training_step_time": 0.6992542743682861
    },
    {
      "epoch": 0.00029765625,
      "model_forward_time": 0.11851644515991211,
      "step": 48768
    },
    {
      "epoch": 0.00029765625,
      "step": 48768,
      "training_step_time": 0.6492421627044678
    },
    {
      "epoch": 0.000297662353515625,
      "model_forward_time": 0.11792731285095215,
      "step": 48769
    },
    {
      "epoch": 0.000297662353515625,
      "step": 48769,
      "training_step_time": 0.7135403156280518
    },
    {
      "epoch": 0.00029766845703125,
      "grad_norm": 0.07773886620998383,
      "learning_rate": 9.275562433015422e-06,
      "loss": 0.037,
      "step": 48770
    },
    {
      "epoch": 0.00029766845703125,
      "model_forward_time": 0.11673378944396973,
      "step": 48770
    },
    {
      "epoch": 0.00029766845703125,
      "step": 48770,
      "training_step_time": 0.7383856773376465
    },
    {
      "epoch": 0.000297674560546875,
      "model_forward_time": 0.11838364601135254,
      "step": 48771
    },
    {
      "epoch": 0.000297674560546875,
      "step": 48771,
      "training_step_time": 0.6627612113952637
    },
    {
      "epoch": 0.0002976806640625,
      "model_forward_time": 0.11848902702331543,
      "step": 48772
    },
    {
      "epoch": 0.0002976806640625,
      "step": 48772,
      "training_step_time": 0.7513902187347412
    },
    {
      "epoch": 0.000297686767578125,
      "model_forward_time": 0.11933708190917969,
      "step": 48773
    },
    {
      "epoch": 0.000297686767578125,
      "step": 48773,
      "training_step_time": 0.6819577217102051
    },
    {
      "epoch": 0.00029769287109375,
      "model_forward_time": 0.11850094795227051,
      "step": 48774
    },
    {
      "epoch": 0.00029769287109375,
      "step": 48774,
      "training_step_time": 0.7056818008422852
    },
    {
      "epoch": 0.000297698974609375,
      "model_forward_time": 0.12056946754455566,
      "step": 48775
    },
    {
      "epoch": 0.000297698974609375,
      "step": 48775,
      "training_step_time": 0.713982105255127
    },
    {
      "epoch": 0.000297705078125,
      "model_forward_time": 0.12373042106628418,
      "step": 48776
    },
    {
      "epoch": 0.000297705078125,
      "step": 48776,
      "training_step_time": 0.6278142929077148
    },
    {
      "epoch": 0.000297711181640625,
      "model_forward_time": 0.11784601211547852,
      "step": 48777
    },
    {
      "epoch": 0.000297711181640625,
      "step": 48777,
      "training_step_time": 0.6673519611358643
    },
    {
      "epoch": 0.00029771728515625,
      "model_forward_time": 0.11797952651977539,
      "step": 48778
    },
    {
      "epoch": 0.00029771728515625,
      "step": 48778,
      "training_step_time": 0.6582996845245361
    },
    {
      "epoch": 0.000297723388671875,
      "model_forward_time": 0.12126612663269043,
      "step": 48779
    },
    {
      "epoch": 0.000297723388671875,
      "step": 48779,
      "training_step_time": 0.654019832611084
    },
    {
      "epoch": 0.0002977294921875,
      "grad_norm": 0.17041613161563873,
      "learning_rate": 9.259580135361929e-06,
      "loss": 0.0403,
      "step": 48780
    },
    {
      "epoch": 0.0002977294921875,
      "model_forward_time": 0.11710047721862793,
      "step": 48780
    },
    {
      "epoch": 0.0002977294921875,
      "step": 48780,
      "training_step_time": 0.7299928665161133
    },
    {
      "epoch": 0.000297735595703125,
      "model_forward_time": 0.11735963821411133,
      "step": 48781
    },
    {
      "epoch": 0.000297735595703125,
      "step": 48781,
      "training_step_time": 0.666240930557251
    },
    {
      "epoch": 0.00029774169921875,
      "model_forward_time": 0.12325382232666016,
      "step": 48782
    },
    {
      "epoch": 0.00029774169921875,
      "step": 48782,
      "training_step_time": 0.7306942939758301
    },
    {
      "epoch": 0.000297747802734375,
      "model_forward_time": 0.11862945556640625,
      "step": 48783
    },
    {
      "epoch": 0.000297747802734375,
      "step": 48783,
      "training_step_time": 0.6590161323547363
    },
    {
      "epoch": 0.00029775390625,
      "model_forward_time": 0.12493467330932617,
      "step": 48784
    },
    {
      "epoch": 0.00029775390625,
      "step": 48784,
      "training_step_time": 0.6521971225738525
    },
    {
      "epoch": 0.000297760009765625,
      "model_forward_time": 0.1164698600769043,
      "step": 48785
    },
    {
      "epoch": 0.000297760009765625,
      "step": 48785,
      "training_step_time": 0.6465785503387451
    },
    {
      "epoch": 0.00029776611328125,
      "model_forward_time": 0.11679482460021973,
      "step": 48786
    },
    {
      "epoch": 0.00029776611328125,
      "step": 48786,
      "training_step_time": 0.6639797687530518
    },
    {
      "epoch": 0.000297772216796875,
      "model_forward_time": 0.12081360816955566,
      "step": 48787
    },
    {
      "epoch": 0.000297772216796875,
      "step": 48787,
      "training_step_time": 0.6806516647338867
    },
    {
      "epoch": 0.0002977783203125,
      "model_forward_time": 0.11610031127929688,
      "step": 48788
    },
    {
      "epoch": 0.0002977783203125,
      "step": 48788,
      "training_step_time": 0.7079024314880371
    },
    {
      "epoch": 0.000297784423828125,
      "model_forward_time": 0.12208771705627441,
      "step": 48789
    },
    {
      "epoch": 0.000297784423828125,
      "step": 48789,
      "training_step_time": 0.7925472259521484
    },
    {
      "epoch": 0.00029779052734375,
      "grad_norm": 0.08908569067716599,
      "learning_rate": 9.243610213572285e-06,
      "loss": 0.0382,
      "step": 48790
    },
    {
      "epoch": 0.00029779052734375,
      "model_forward_time": 0.11927199363708496,
      "step": 48790
    },
    {
      "epoch": 0.00029779052734375,
      "step": 48790,
      "training_step_time": 0.6494076251983643
    },
    {
      "epoch": 0.000297796630859375,
      "model_forward_time": 0.1169588565826416,
      "step": 48791
    },
    {
      "epoch": 0.000297796630859375,
      "step": 48791,
      "training_step_time": 0.6813335418701172
    },
    {
      "epoch": 0.000297802734375,
      "model_forward_time": 0.11990571022033691,
      "step": 48792
    },
    {
      "epoch": 0.000297802734375,
      "step": 48792,
      "training_step_time": 0.6639225482940674
    },
    {
      "epoch": 0.000297808837890625,
      "model_forward_time": 0.11559510231018066,
      "step": 48793
    },
    {
      "epoch": 0.000297808837890625,
      "step": 48793,
      "training_step_time": 0.6774201393127441
    },
    {
      "epoch": 0.00029781494140625,
      "model_forward_time": 0.11719942092895508,
      "step": 48794
    },
    {
      "epoch": 0.00029781494140625,
      "step": 48794,
      "training_step_time": 0.7081210613250732
    },
    {
      "epoch": 0.000297821044921875,
      "model_forward_time": 0.12190866470336914,
      "step": 48795
    },
    {
      "epoch": 0.000297821044921875,
      "step": 48795,
      "training_step_time": 0.6510252952575684
    },
    {
      "epoch": 0.0002978271484375,
      "model_forward_time": 0.11901021003723145,
      "step": 48796
    },
    {
      "epoch": 0.0002978271484375,
      "step": 48796,
      "training_step_time": 0.6296308040618896
    },
    {
      "epoch": 0.000297833251953125,
      "model_forward_time": 0.12291741371154785,
      "step": 48797
    },
    {
      "epoch": 0.000297833251953125,
      "step": 48797,
      "training_step_time": 0.6996519565582275
    },
    {
      "epoch": 0.00029783935546875,
      "model_forward_time": 0.11789941787719727,
      "step": 48798
    },
    {
      "epoch": 0.00029783935546875,
      "step": 48798,
      "training_step_time": 0.6564862728118896
    },
    {
      "epoch": 0.000297845458984375,
      "model_forward_time": 0.11710882186889648,
      "step": 48799
    },
    {
      "epoch": 0.000297845458984375,
      "step": 48799,
      "training_step_time": 0.6441788673400879
    },
    {
      "epoch": 0.0002978515625,
      "grad_norm": 0.0950460210442543,
      "learning_rate": 9.227652672497761e-06,
      "loss": 0.0411,
      "step": 48800
    },
    {
      "epoch": 0.0002978515625,
      "model_forward_time": 0.12175941467285156,
      "step": 48800
    },
    {
      "epoch": 0.0002978515625,
      "step": 48800,
      "training_step_time": 0.6820964813232422
    },
    {
      "epoch": 0.000297857666015625,
      "model_forward_time": 0.11658000946044922,
      "step": 48801
    },
    {
      "epoch": 0.000297857666015625,
      "step": 48801,
      "training_step_time": 0.7019393444061279
    },
    {
      "epoch": 0.00029786376953125,
      "model_forward_time": 0.12049055099487305,
      "step": 48802
    },
    {
      "epoch": 0.00029786376953125,
      "step": 48802,
      "training_step_time": 0.6894006729125977
    },
    {
      "epoch": 0.000297869873046875,
      "model_forward_time": 0.12462401390075684,
      "step": 48803
    },
    {
      "epoch": 0.000297869873046875,
      "step": 48803,
      "training_step_time": 0.6401462554931641
    },
    {
      "epoch": 0.0002978759765625,
      "model_forward_time": 0.11969828605651855,
      "step": 48804
    },
    {
      "epoch": 0.0002978759765625,
      "step": 48804,
      "training_step_time": 0.61712646484375
    },
    {
      "epoch": 0.000297882080078125,
      "model_forward_time": 0.11821103096008301,
      "step": 48805
    },
    {
      "epoch": 0.000297882080078125,
      "step": 48805,
      "training_step_time": 0.6203949451446533
    },
    {
      "epoch": 0.00029788818359375,
      "model_forward_time": 0.12027144432067871,
      "step": 48806
    },
    {
      "epoch": 0.00029788818359375,
      "step": 48806,
      "training_step_time": 0.5936665534973145
    },
    {
      "epoch": 0.000297894287109375,
      "model_forward_time": 0.11931228637695312,
      "step": 48807
    },
    {
      "epoch": 0.000297894287109375,
      "step": 48807,
      "training_step_time": 0.6057908535003662
    },
    {
      "epoch": 0.000297900390625,
      "model_forward_time": 0.11804842948913574,
      "step": 48808
    },
    {
      "epoch": 0.000297900390625,
      "step": 48808,
      "training_step_time": 0.6297564506530762
    },
    {
      "epoch": 0.000297906494140625,
      "model_forward_time": 0.12915635108947754,
      "step": 48809
    },
    {
      "epoch": 0.000297906494140625,
      "step": 48809,
      "training_step_time": 0.5756328105926514
    },
    {
      "epoch": 0.00029791259765625,
      "grad_norm": 0.18487296998500824,
      "learning_rate": 9.211707516985829e-06,
      "loss": 0.0456,
      "step": 48810
    },
    {
      "epoch": 0.00029791259765625,
      "model_forward_time": 0.11846804618835449,
      "step": 48810
    },
    {
      "epoch": 0.00029791259765625,
      "step": 48810,
      "training_step_time": 0.4238626956939697
    },
    {
      "epoch": 0.000297918701171875,
      "model_forward_time": 0.11673545837402344,
      "step": 48811
    },
    {
      "epoch": 0.000297918701171875,
      "step": 48811,
      "training_step_time": 0.5658748149871826
    },
    {
      "epoch": 0.0002979248046875,
      "model_forward_time": 0.1191258430480957,
      "step": 48812
    },
    {
      "epoch": 0.0002979248046875,
      "step": 48812,
      "training_step_time": 0.6101233959197998
    },
    {
      "epoch": 0.000297930908203125,
      "model_forward_time": 0.11810684204101562,
      "step": 48813
    },
    {
      "epoch": 0.000297930908203125,
      "step": 48813,
      "training_step_time": 0.5491690635681152
    },
    {
      "epoch": 0.00029793701171875,
      "model_forward_time": 0.11692118644714355,
      "step": 48814
    },
    {
      "epoch": 0.00029793701171875,
      "step": 48814,
      "training_step_time": 0.44079017639160156
    },
    {
      "epoch": 0.000297943115234375,
      "model_forward_time": 0.11559605598449707,
      "step": 48815
    },
    {
      "epoch": 0.000297943115234375,
      "step": 48815,
      "training_step_time": 0.4207439422607422
    },
    {
      "epoch": 0.00029794921875,
      "model_forward_time": 0.1160130500793457,
      "step": 48816
    },
    {
      "epoch": 0.00029794921875,
      "step": 48816,
      "training_step_time": 0.4167928695678711
    },
    {
      "epoch": 0.000297955322265625,
      "model_forward_time": 0.11757111549377441,
      "step": 48817
    },
    {
      "epoch": 0.000297955322265625,
      "step": 48817,
      "training_step_time": 0.4114370346069336
    },
    {
      "epoch": 0.00029796142578125,
      "model_forward_time": 0.11617660522460938,
      "step": 48818
    },
    {
      "epoch": 0.00029796142578125,
      "step": 48818,
      "training_step_time": 0.403519868850708
    },
    {
      "epoch": 0.000297967529296875,
      "model_forward_time": 0.11456418037414551,
      "step": 48819
    },
    {
      "epoch": 0.000297967529296875,
      "step": 48819,
      "training_step_time": 0.39848971366882324
    },
    {
      "epoch": 0.0002979736328125,
      "grad_norm": 0.11422465741634369,
      "learning_rate": 9.195774751880198e-06,
      "loss": 0.0381,
      "step": 48820
    },
    {
      "epoch": 0.0002979736328125,
      "model_forward_time": 0.11519718170166016,
      "step": 48820
    },
    {
      "epoch": 0.0002979736328125,
      "step": 48820,
      "training_step_time": 0.38071537017822266
    },
    {
      "epoch": 0.000297979736328125,
      "model_forward_time": 0.11496233940124512,
      "step": 48821
    },
    {
      "epoch": 0.000297979736328125,
      "step": 48821,
      "training_step_time": 0.4359700679779053
    },
    {
      "epoch": 0.00029798583984375,
      "model_forward_time": 0.11482691764831543,
      "step": 48822
    },
    {
      "epoch": 0.00029798583984375,
      "step": 48822,
      "training_step_time": 0.43445348739624023
    },
    {
      "epoch": 0.000297991943359375,
      "model_forward_time": 0.11495208740234375,
      "step": 48823
    },
    {
      "epoch": 0.000297991943359375,
      "step": 48823,
      "training_step_time": 0.4243285655975342
    },
    {
      "epoch": 0.000297998046875,
      "model_forward_time": 0.11561965942382812,
      "step": 48824
    },
    {
      "epoch": 0.000297998046875,
      "step": 48824,
      "training_step_time": 0.3935079574584961
    },
    {
      "epoch": 0.000298004150390625,
      "model_forward_time": 0.11530756950378418,
      "step": 48825
    },
    {
      "epoch": 0.000298004150390625,
      "step": 48825,
      "training_step_time": 0.39967823028564453
    },
    {
      "epoch": 0.00029801025390625,
      "model_forward_time": 0.11576080322265625,
      "step": 48826
    },
    {
      "epoch": 0.00029801025390625,
      "step": 48826,
      "training_step_time": 0.5012874603271484
    },
    {
      "epoch": 0.000298016357421875,
      "model_forward_time": 0.1152963638305664,
      "step": 48827
    },
    {
      "epoch": 0.000298016357421875,
      "step": 48827,
      "training_step_time": 0.4303131103515625
    },
    {
      "epoch": 0.0002980224609375,
      "model_forward_time": 0.11542773246765137,
      "step": 48828
    },
    {
      "epoch": 0.0002980224609375,
      "step": 48828,
      "training_step_time": 0.44663381576538086
    },
    {
      "epoch": 0.000298028564453125,
      "model_forward_time": 0.11536121368408203,
      "step": 48829
    },
    {
      "epoch": 0.000298028564453125,
      "step": 48829,
      "training_step_time": 0.38947343826293945
    },
    {
      "epoch": 0.00029803466796875,
      "grad_norm": 0.08200609683990479,
      "learning_rate": 9.17985438202082e-06,
      "loss": 0.0348,
      "step": 48830
    },
    {
      "epoch": 0.00029803466796875,
      "model_forward_time": 0.11516046524047852,
      "step": 48830
    },
    {
      "epoch": 0.00029803466796875,
      "step": 48830,
      "training_step_time": 0.40453290939331055
    },
    {
      "epoch": 0.000298040771484375,
      "model_forward_time": 0.11490631103515625,
      "step": 48831
    },
    {
      "epoch": 0.000298040771484375,
      "step": 48831,
      "training_step_time": 0.392240047454834
    },
    {
      "epoch": 0.000298046875,
      "model_forward_time": 0.11526370048522949,
      "step": 48832
    },
    {
      "epoch": 0.000298046875,
      "step": 48832,
      "training_step_time": 0.4231429100036621
    },
    {
      "epoch": 0.000298052978515625,
      "model_forward_time": 0.11487150192260742,
      "step": 48833
    },
    {
      "epoch": 0.000298052978515625,
      "step": 48833,
      "training_step_time": 0.41750574111938477
    },
    {
      "epoch": 0.00029805908203125,
      "model_forward_time": 0.11501336097717285,
      "step": 48834
    },
    {
      "epoch": 0.00029805908203125,
      "step": 48834,
      "training_step_time": 0.3988020420074463
    },
    {
      "epoch": 0.000298065185546875,
      "model_forward_time": 0.11532378196716309,
      "step": 48835
    },
    {
      "epoch": 0.000298065185546875,
      "step": 48835,
      "training_step_time": 0.42876482009887695
    },
    {
      "epoch": 0.0002980712890625,
      "model_forward_time": 0.1142430305480957,
      "step": 48836
    },
    {
      "epoch": 0.0002980712890625,
      "step": 48836,
      "training_step_time": 0.39095401763916016
    },
    {
      "epoch": 0.000298077392578125,
      "model_forward_time": 0.1154487133026123,
      "step": 48837
    },
    {
      "epoch": 0.000298077392578125,
      "step": 48837,
      "training_step_time": 0.4559051990509033
    },
    {
      "epoch": 0.00029808349609375,
      "model_forward_time": 0.11459994316101074,
      "step": 48838
    },
    {
      "epoch": 0.00029808349609375,
      "step": 48838,
      "training_step_time": 0.378978967666626
    },
    {
      "epoch": 0.000298089599609375,
      "model_forward_time": 0.11509132385253906,
      "step": 48839
    },
    {
      "epoch": 0.000298089599609375,
      "step": 48839,
      "training_step_time": 0.3934767246246338
    },
    {
      "epoch": 0.000298095703125,
      "grad_norm": 0.08129305392503738,
      "learning_rate": 9.163946412243896e-06,
      "loss": 0.0367,
      "step": 48840
    },
    {
      "epoch": 0.000298095703125,
      "model_forward_time": 0.11485624313354492,
      "step": 48840
    },
    {
      "epoch": 0.000298095703125,
      "step": 48840,
      "training_step_time": 0.4735291004180908
    },
    {
      "epoch": 0.000298101806640625,
      "model_forward_time": 0.11639738082885742,
      "step": 48841
    },
    {
      "epoch": 0.000298101806640625,
      "step": 48841,
      "training_step_time": 0.473008394241333
    },
    {
      "epoch": 0.00029810791015625,
      "model_forward_time": 0.11494612693786621,
      "step": 48842
    },
    {
      "epoch": 0.00029810791015625,
      "step": 48842,
      "training_step_time": 0.4723033905029297
    },
    {
      "epoch": 0.000298114013671875,
      "model_forward_time": 0.11551833152770996,
      "step": 48843
    },
    {
      "epoch": 0.000298114013671875,
      "step": 48843,
      "training_step_time": 0.388735294342041
    },
    {
      "epoch": 0.0002981201171875,
      "model_forward_time": 0.1152029037475586,
      "step": 48844
    },
    {
      "epoch": 0.0002981201171875,
      "step": 48844,
      "training_step_time": 0.3856692314147949
    },
    {
      "epoch": 0.000298126220703125,
      "model_forward_time": 0.11553478240966797,
      "step": 48845
    },
    {
      "epoch": 0.000298126220703125,
      "step": 48845,
      "training_step_time": 0.4274582862854004
    },
    {
      "epoch": 0.00029813232421875,
      "model_forward_time": 0.11499428749084473,
      "step": 48846
    },
    {
      "epoch": 0.00029813232421875,
      "step": 48846,
      "training_step_time": 0.3992619514465332
    },
    {
      "epoch": 0.000298138427734375,
      "model_forward_time": 0.11615276336669922,
      "step": 48847
    },
    {
      "epoch": 0.000298138427734375,
      "step": 48847,
      "training_step_time": 0.45226597785949707
    },
    {
      "epoch": 0.00029814453125,
      "model_forward_time": 0.11464309692382812,
      "step": 48848
    },
    {
      "epoch": 0.00029814453125,
      "step": 48848,
      "training_step_time": 0.3944435119628906
    },
    {
      "epoch": 0.000298150634765625,
      "model_forward_time": 0.11531400680541992,
      "step": 48849
    },
    {
      "epoch": 0.000298150634765625,
      "step": 48849,
      "training_step_time": 0.39292335510253906
    },
    {
      "epoch": 0.00029815673828125,
      "grad_norm": 0.12126754969358444,
      "learning_rate": 9.148050847381828e-06,
      "loss": 0.0381,
      "step": 48850
    },
    {
      "epoch": 0.00029815673828125,
      "model_forward_time": 0.11492609977722168,
      "step": 48850
    },
    {
      "epoch": 0.00029815673828125,
      "step": 48850,
      "training_step_time": 0.4752476215362549
    },
    {
      "epoch": 0.000298162841796875,
      "model_forward_time": 0.11473608016967773,
      "step": 48851
    },
    {
      "epoch": 0.000298162841796875,
      "step": 48851,
      "training_step_time": 0.3825860023498535
    },
    {
      "epoch": 0.0002981689453125,
      "model_forward_time": 0.11489677429199219,
      "step": 48852
    },
    {
      "epoch": 0.0002981689453125,
      "step": 48852,
      "training_step_time": 0.40027356147766113
    },
    {
      "epoch": 0.000298175048828125,
      "model_forward_time": 0.11508464813232422,
      "step": 48853
    },
    {
      "epoch": 0.000298175048828125,
      "step": 48853,
      "training_step_time": 0.39588022232055664
    },
    {
      "epoch": 0.00029818115234375,
      "model_forward_time": 0.11503314971923828,
      "step": 48854
    },
    {
      "epoch": 0.00029818115234375,
      "step": 48854,
      "training_step_time": 0.36818599700927734
    },
    {
      "epoch": 0.000298187255859375,
      "model_forward_time": 0.11573100090026855,
      "step": 48855
    },
    {
      "epoch": 0.000298187255859375,
      "step": 48855,
      "training_step_time": 0.4096260070800781
    },
    {
      "epoch": 0.000298193359375,
      "model_forward_time": 0.11498308181762695,
      "step": 48856
    },
    {
      "epoch": 0.000298193359375,
      "step": 48856,
      "training_step_time": 0.5347754955291748
    },
    {
      "epoch": 0.000298199462890625,
      "model_forward_time": 0.11508607864379883,
      "step": 48857
    },
    {
      "epoch": 0.000298199462890625,
      "step": 48857,
      "training_step_time": 0.421708345413208
    },
    {
      "epoch": 0.00029820556640625,
      "model_forward_time": 0.11613774299621582,
      "step": 48858
    },
    {
      "epoch": 0.00029820556640625,
      "step": 48858,
      "training_step_time": 0.3831472396850586
    },
    {
      "epoch": 0.000298211669921875,
      "model_forward_time": 0.11511087417602539,
      "step": 48859
    },
    {
      "epoch": 0.000298211669921875,
      "step": 48859,
      "training_step_time": 0.4303627014160156
    },
    {
      "epoch": 0.0002982177734375,
      "grad_norm": 0.14842082560062408,
      "learning_rate": 9.132167692263289e-06,
      "loss": 0.0395,
      "step": 48860
    },
    {
      "epoch": 0.0002982177734375,
      "model_forward_time": 0.11506271362304688,
      "step": 48860
    },
    {
      "epoch": 0.0002982177734375,
      "step": 48860,
      "training_step_time": 0.4009668827056885
    },
    {
      "epoch": 0.000298223876953125,
      "model_forward_time": 0.11568665504455566,
      "step": 48861
    },
    {
      "epoch": 0.000298223876953125,
      "step": 48861,
      "training_step_time": 0.3838019371032715
    },
    {
      "epoch": 0.00029822998046875,
      "model_forward_time": 0.11473703384399414,
      "step": 48862
    },
    {
      "epoch": 0.00029822998046875,
      "step": 48862,
      "training_step_time": 0.3848574161529541
    },
    {
      "epoch": 0.000298236083984375,
      "model_forward_time": 0.11497092247009277,
      "step": 48863
    },
    {
      "epoch": 0.000298236083984375,
      "step": 48863,
      "training_step_time": 0.38165760040283203
    },
    {
      "epoch": 0.0002982421875,
      "model_forward_time": 0.11514711380004883,
      "step": 48864
    },
    {
      "epoch": 0.0002982421875,
      "step": 48864,
      "training_step_time": 0.49770498275756836
    },
    {
      "epoch": 0.000298248291015625,
      "model_forward_time": 0.11474370956420898,
      "step": 48865
    },
    {
      "epoch": 0.000298248291015625,
      "step": 48865,
      "training_step_time": 0.41493916511535645
    },
    {
      "epoch": 0.00029825439453125,
      "model_forward_time": 0.11517930030822754,
      "step": 48866
    },
    {
      "epoch": 0.00029825439453125,
      "step": 48866,
      "training_step_time": 0.4267888069152832
    },
    {
      "epoch": 0.000298260498046875,
      "model_forward_time": 0.11534929275512695,
      "step": 48867
    },
    {
      "epoch": 0.000298260498046875,
      "step": 48867,
      "training_step_time": 0.3909425735473633
    },
    {
      "epoch": 0.0002982666015625,
      "model_forward_time": 0.11605286598205566,
      "step": 48868
    },
    {
      "epoch": 0.0002982666015625,
      "step": 48868,
      "training_step_time": 0.9916930198669434
    },
    {
      "epoch": 0.000298272705078125,
      "model_forward_time": 0.11430907249450684,
      "step": 48869
    },
    {
      "epoch": 0.000298272705078125,
      "step": 48869,
      "training_step_time": 0.4847230911254883
    },
    {
      "epoch": 0.00029827880859375,
      "grad_norm": 0.16025172173976898,
      "learning_rate": 9.116296951713133e-06,
      "loss": 0.0428,
      "step": 48870
    },
    {
      "epoch": 0.00029827880859375,
      "model_forward_time": 0.11401033401489258,
      "step": 48870
    },
    {
      "epoch": 0.00029827880859375,
      "step": 48870,
      "training_step_time": 0.39430761337280273
    },
    {
      "epoch": 0.000298284912109375,
      "model_forward_time": 0.11407136917114258,
      "step": 48871
    },
    {
      "epoch": 0.000298284912109375,
      "step": 48871,
      "training_step_time": 0.4048025608062744
    },
    {
      "epoch": 0.000298291015625,
      "model_forward_time": 0.11377978324890137,
      "step": 48872
    },
    {
      "epoch": 0.000298291015625,
      "step": 48872,
      "training_step_time": 0.38775062561035156
    },
    {
      "epoch": 0.000298297119140625,
      "model_forward_time": 0.11441445350646973,
      "step": 48873
    },
    {
      "epoch": 0.000298297119140625,
      "step": 48873,
      "training_step_time": 0.40766406059265137
    },
    {
      "epoch": 0.00029830322265625,
      "model_forward_time": 0.11453485488891602,
      "step": 48874
    },
    {
      "epoch": 0.00029830322265625,
      "step": 48874,
      "training_step_time": 0.39363932609558105
    },
    {
      "epoch": 0.000298309326171875,
      "model_forward_time": 0.1154627799987793,
      "step": 48875
    },
    {
      "epoch": 0.000298309326171875,
      "step": 48875,
      "training_step_time": 0.39211297035217285
    },
    {
      "epoch": 0.0002983154296875,
      "model_forward_time": 0.11556005477905273,
      "step": 48876
    },
    {
      "epoch": 0.0002983154296875,
      "step": 48876,
      "training_step_time": 0.4026613235473633
    },
    {
      "epoch": 0.000298321533203125,
      "model_forward_time": 0.11434030532836914,
      "step": 48877
    },
    {
      "epoch": 0.000298321533203125,
      "step": 48877,
      "training_step_time": 0.3938322067260742
    },
    {
      "epoch": 0.00029832763671875,
      "model_forward_time": 0.11609482765197754,
      "step": 48878
    },
    {
      "epoch": 0.00029832763671875,
      "step": 48878,
      "training_step_time": 0.48590707778930664
    },
    {
      "epoch": 0.000298333740234375,
      "model_forward_time": 0.11495804786682129,
      "step": 48879
    },
    {
      "epoch": 0.000298333740234375,
      "step": 48879,
      "training_step_time": 0.5048274993896484
    },
    {
      "epoch": 0.00029833984375,
      "grad_norm": 0.1191122978925705,
      "learning_rate": 9.100438630552521e-06,
      "loss": 0.0373,
      "step": 48880
    },
    {
      "epoch": 0.00029833984375,
      "model_forward_time": 0.11483502388000488,
      "step": 48880
    },
    {
      "epoch": 0.00029833984375,
      "step": 48880,
      "training_step_time": 0.5873813629150391
    },
    {
      "epoch": 0.000298345947265625,
      "model_forward_time": 0.1148824691772461,
      "step": 48881
    },
    {
      "epoch": 0.000298345947265625,
      "step": 48881,
      "training_step_time": 0.3878951072692871
    },
    {
      "epoch": 0.00029835205078125,
      "model_forward_time": 0.1145477294921875,
      "step": 48882
    },
    {
      "epoch": 0.00029835205078125,
      "step": 48882,
      "training_step_time": 0.43301987648010254
    },
    {
      "epoch": 0.000298358154296875,
      "model_forward_time": 0.11568713188171387,
      "step": 48883
    },
    {
      "epoch": 0.000298358154296875,
      "step": 48883,
      "training_step_time": 0.5011043548583984
    },
    {
      "epoch": 0.0002983642578125,
      "model_forward_time": 0.1153559684753418,
      "step": 48884
    },
    {
      "epoch": 0.0002983642578125,
      "step": 48884,
      "training_step_time": 0.42814207077026367
    },
    {
      "epoch": 0.000298370361328125,
      "model_forward_time": 0.1155235767364502,
      "step": 48885
    },
    {
      "epoch": 0.000298370361328125,
      "step": 48885,
      "training_step_time": 0.46211957931518555
    },
    {
      "epoch": 0.00029837646484375,
      "model_forward_time": 0.11482429504394531,
      "step": 48886
    },
    {
      "epoch": 0.00029837646484375,
      "step": 48886,
      "training_step_time": 0.4208090305328369
    },
    {
      "epoch": 0.000298382568359375,
      "model_forward_time": 0.11479520797729492,
      "step": 48887
    },
    {
      "epoch": 0.000298382568359375,
      "step": 48887,
      "training_step_time": 0.38201069831848145
    },
    {
      "epoch": 0.000298388671875,
      "model_forward_time": 0.11585307121276855,
      "step": 48888
    },
    {
      "epoch": 0.000298388671875,
      "step": 48888,
      "training_step_time": 0.39604735374450684
    },
    {
      "epoch": 0.000298394775390625,
      "model_forward_time": 0.11541175842285156,
      "step": 48889
    },
    {
      "epoch": 0.000298394775390625,
      "step": 48889,
      "training_step_time": 0.3912625312805176
    },
    {
      "epoch": 0.00029840087890625,
      "grad_norm": 0.18414248526096344,
      "learning_rate": 9.084592733598735e-06,
      "loss": 0.0434,
      "step": 48890
    },
    {
      "epoch": 0.00029840087890625,
      "model_forward_time": 0.11526823043823242,
      "step": 48890
    },
    {
      "epoch": 0.00029840087890625,
      "step": 48890,
      "training_step_time": 0.39503979682922363
    },
    {
      "epoch": 0.000298406982421875,
      "model_forward_time": 0.11595320701599121,
      "step": 48891
    },
    {
      "epoch": 0.000298406982421875,
      "step": 48891,
      "training_step_time": 0.3938922882080078
    },
    {
      "epoch": 0.0002984130859375,
      "model_forward_time": 0.11607599258422852,
      "step": 48892
    },
    {
      "epoch": 0.0002984130859375,
      "step": 48892,
      "training_step_time": 0.648749828338623
    },
    {
      "epoch": 0.000298419189453125,
      "model_forward_time": 0.11479067802429199,
      "step": 48893
    },
    {
      "epoch": 0.000298419189453125,
      "step": 48893,
      "training_step_time": 0.4053008556365967
    },
    {
      "epoch": 0.00029842529296875,
      "model_forward_time": 0.11503195762634277,
      "step": 48894
    },
    {
      "epoch": 0.00029842529296875,
      "step": 48894,
      "training_step_time": 0.3857612609863281
    },
    {
      "epoch": 0.000298431396484375,
      "model_forward_time": 0.11522912979125977,
      "step": 48895
    },
    {
      "epoch": 0.000298431396484375,
      "step": 48895,
      "training_step_time": 0.3974418640136719
    },
    {
      "epoch": 0.0002984375,
      "model_forward_time": 0.1151130199432373,
      "step": 48896
    },
    {
      "epoch": 0.0002984375,
      "step": 48896,
      "training_step_time": 0.3675069808959961
    },
    {
      "epoch": 0.000298443603515625,
      "model_forward_time": 0.11532258987426758,
      "step": 48897
    },
    {
      "epoch": 0.000298443603515625,
      "step": 48897,
      "training_step_time": 0.4692573547363281
    },
    {
      "epoch": 0.00029844970703125,
      "model_forward_time": 0.1152796745300293,
      "step": 48898
    },
    {
      "epoch": 0.00029844970703125,
      "step": 48898,
      "training_step_time": 0.44866251945495605
    },
    {
      "epoch": 0.000298455810546875,
      "model_forward_time": 0.11579203605651855,
      "step": 48899
    },
    {
      "epoch": 0.000298455810546875,
      "step": 48899,
      "training_step_time": 0.39223623275756836
    },
    {
      "epoch": 0.0002984619140625,
      "grad_norm": 0.12108880281448364,
      "learning_rate": 9.068759265665384e-06,
      "loss": 0.0422,
      "step": 48900
    },
    {
      "epoch": 0.0002984619140625,
      "model_forward_time": 0.11455154418945312,
      "step": 48900
    },
    {
      "epoch": 0.0002984619140625,
      "step": 48900,
      "training_step_time": 0.3942275047302246
    },
    {
      "epoch": 0.000298468017578125,
      "model_forward_time": 0.11528944969177246,
      "step": 48901
    },
    {
      "epoch": 0.000298468017578125,
      "step": 48901,
      "training_step_time": 0.4086894989013672
    },
    {
      "epoch": 0.00029847412109375,
      "model_forward_time": 0.11544656753540039,
      "step": 48902
    },
    {
      "epoch": 0.00029847412109375,
      "step": 48902,
      "training_step_time": 0.38994765281677246
    },
    {
      "epoch": 0.000298480224609375,
      "model_forward_time": 0.11588072776794434,
      "step": 48903
    },
    {
      "epoch": 0.000298480224609375,
      "step": 48903,
      "training_step_time": 0.3960990905761719
    },
    {
      "epoch": 0.000298486328125,
      "model_forward_time": 0.11543035507202148,
      "step": 48904
    },
    {
      "epoch": 0.000298486328125,
      "step": 48904,
      "training_step_time": 0.3905982971191406
    },
    {
      "epoch": 0.000298492431640625,
      "model_forward_time": 0.11565685272216797,
      "step": 48905
    },
    {
      "epoch": 0.000298492431640625,
      "step": 48905,
      "training_step_time": 0.3916921615600586
    },
    {
      "epoch": 0.00029849853515625,
      "model_forward_time": 0.11533212661743164,
      "step": 48906
    },
    {
      "epoch": 0.00029849853515625,
      "step": 48906,
      "training_step_time": 0.4217979907989502
    },
    {
      "epoch": 0.000298504638671875,
      "model_forward_time": 0.11500859260559082,
      "step": 48907
    },
    {
      "epoch": 0.000298504638671875,
      "step": 48907,
      "training_step_time": 0.4956393241882324
    },
    {
      "epoch": 0.0002985107421875,
      "model_forward_time": 0.11534500122070312,
      "step": 48908
    },
    {
      "epoch": 0.0002985107421875,
      "step": 48908,
      "training_step_time": 0.39455604553222656
    },
    {
      "epoch": 0.000298516845703125,
      "model_forward_time": 0.11519646644592285,
      "step": 48909
    },
    {
      "epoch": 0.000298516845703125,
      "step": 48909,
      "training_step_time": 0.38484907150268555
    },
    {
      "epoch": 0.00029852294921875,
      "grad_norm": 0.12509235739707947,
      "learning_rate": 9.052938231562236e-06,
      "loss": 0.0347,
      "step": 48910
    },
    {
      "epoch": 0.00029852294921875,
      "model_forward_time": 0.11575055122375488,
      "step": 48910
    },
    {
      "epoch": 0.00029852294921875,
      "step": 48910,
      "training_step_time": 0.6634769439697266
    },
    {
      "epoch": 0.000298529052734375,
      "model_forward_time": 0.11382460594177246,
      "step": 48911
    },
    {
      "epoch": 0.000298529052734375,
      "step": 48911,
      "training_step_time": 0.8207290172576904
    },
    {
      "epoch": 0.00029853515625,
      "model_forward_time": 0.11448979377746582,
      "step": 48912
    },
    {
      "epoch": 0.00029853515625,
      "step": 48912,
      "training_step_time": 0.41464781761169434
    },
    {
      "epoch": 0.000298541259765625,
      "model_forward_time": 0.11396956443786621,
      "step": 48913
    },
    {
      "epoch": 0.000298541259765625,
      "step": 48913,
      "training_step_time": 0.38112354278564453
    },
    {
      "epoch": 0.00029854736328125,
      "model_forward_time": 0.11421871185302734,
      "step": 48914
    },
    {
      "epoch": 0.00029854736328125,
      "step": 48914,
      "training_step_time": 0.38982129096984863
    },
    {
      "epoch": 0.000298553466796875,
      "model_forward_time": 0.11503171920776367,
      "step": 48915
    },
    {
      "epoch": 0.000298553466796875,
      "step": 48915,
      "training_step_time": 0.3768305778503418
    },
    {
      "epoch": 0.0002985595703125,
      "model_forward_time": 0.11417365074157715,
      "step": 48916
    },
    {
      "epoch": 0.0002985595703125,
      "step": 48916,
      "training_step_time": 0.3825187683105469
    },
    {
      "epoch": 0.000298565673828125,
      "model_forward_time": 0.11470508575439453,
      "step": 48917
    },
    {
      "epoch": 0.000298565673828125,
      "step": 48917,
      "training_step_time": 0.6278233528137207
    },
    {
      "epoch": 0.00029857177734375,
      "model_forward_time": 0.11501383781433105,
      "step": 48918
    },
    {
      "epoch": 0.00029857177734375,
      "step": 48918,
      "training_step_time": 0.3889636993408203
    },
    {
      "epoch": 0.000298577880859375,
      "model_forward_time": 0.11512613296508789,
      "step": 48919
    },
    {
      "epoch": 0.000298577880859375,
      "step": 48919,
      "training_step_time": 0.38812828063964844
    },
    {
      "epoch": 0.000298583984375,
      "grad_norm": 0.08366581797599792,
      "learning_rate": 9.037129636095309e-06,
      "loss": 0.0351,
      "step": 48920
    },
    {
      "epoch": 0.000298583984375,
      "model_forward_time": 0.11468625068664551,
      "step": 48920
    },
    {
      "epoch": 0.000298583984375,
      "step": 48920,
      "training_step_time": 0.3870220184326172
    },
    {
      "epoch": 0.000298590087890625,
      "model_forward_time": 0.1153569221496582,
      "step": 48921
    },
    {
      "epoch": 0.000298590087890625,
      "step": 48921,
      "training_step_time": 0.4513070583343506
    },
    {
      "epoch": 0.00029859619140625,
      "model_forward_time": 0.11454105377197266,
      "step": 48922
    },
    {
      "epoch": 0.00029859619140625,
      "step": 48922,
      "training_step_time": 0.7455604076385498
    },
    {
      "epoch": 0.000298602294921875,
      "model_forward_time": 0.11472392082214355,
      "step": 48923
    },
    {
      "epoch": 0.000298602294921875,
      "step": 48923,
      "training_step_time": 0.3991823196411133
    },
    {
      "epoch": 0.0002986083984375,
      "model_forward_time": 0.11495852470397949,
      "step": 48924
    },
    {
      "epoch": 0.0002986083984375,
      "step": 48924,
      "training_step_time": 0.47105908393859863
    },
    {
      "epoch": 0.000298614501953125,
      "model_forward_time": 0.11416149139404297,
      "step": 48925
    },
    {
      "epoch": 0.000298614501953125,
      "step": 48925,
      "training_step_time": 0.43721938133239746
    },
    {
      "epoch": 0.00029862060546875,
      "model_forward_time": 0.11428189277648926,
      "step": 48926
    },
    {
      "epoch": 0.00029862060546875,
      "step": 48926,
      "training_step_time": 0.4523472785949707
    },
    {
      "epoch": 0.000298626708984375,
      "model_forward_time": 0.11479616165161133,
      "step": 48927
    },
    {
      "epoch": 0.000298626708984375,
      "step": 48927,
      "training_step_time": 0.3745293617248535
    },
    {
      "epoch": 0.0002986328125,
      "model_forward_time": 0.11521267890930176,
      "step": 48928
    },
    {
      "epoch": 0.0002986328125,
      "step": 48928,
      "training_step_time": 0.3881371021270752
    },
    {
      "epoch": 0.000298638916015625,
      "model_forward_time": 0.11598753929138184,
      "step": 48929
    },
    {
      "epoch": 0.000298638916015625,
      "step": 48929,
      "training_step_time": 0.38237857818603516
    },
    {
      "epoch": 0.00029864501953125,
      "grad_norm": 0.09187513589859009,
      "learning_rate": 9.02133348406684e-06,
      "loss": 0.0314,
      "step": 48930
    },
    {
      "epoch": 0.00029864501953125,
      "model_forward_time": 0.1148374080657959,
      "step": 48930
    },
    {
      "epoch": 0.00029864501953125,
      "step": 48930,
      "training_step_time": 0.3945164680480957
    },
    {
      "epoch": 0.000298651123046875,
      "model_forward_time": 0.11513781547546387,
      "step": 48931
    },
    {
      "epoch": 0.000298651123046875,
      "step": 48931,
      "training_step_time": 0.3871476650238037
    },
    {
      "epoch": 0.0002986572265625,
      "model_forward_time": 0.11453104019165039,
      "step": 48932
    },
    {
      "epoch": 0.0002986572265625,
      "step": 48932,
      "training_step_time": 0.4011819362640381
    },
    {
      "epoch": 0.000298663330078125,
      "model_forward_time": 0.11597752571105957,
      "step": 48933
    },
    {
      "epoch": 0.000298663330078125,
      "step": 48933,
      "training_step_time": 0.4832324981689453
    },
    {
      "epoch": 0.00029866943359375,
      "model_forward_time": 0.11492180824279785,
      "step": 48934
    },
    {
      "epoch": 0.00029866943359375,
      "step": 48934,
      "training_step_time": 0.7934236526489258
    },
    {
      "epoch": 0.000298675537109375,
      "model_forward_time": 0.11407208442687988,
      "step": 48935
    },
    {
      "epoch": 0.000298675537109375,
      "step": 48935,
      "training_step_time": 0.37622570991516113
    },
    {
      "epoch": 0.000298681640625,
      "model_forward_time": 0.11460733413696289,
      "step": 48936
    },
    {
      "epoch": 0.000298681640625,
      "step": 48936,
      "training_step_time": 0.39131712913513184
    },
    {
      "epoch": 0.000298687744140625,
      "model_forward_time": 0.11414980888366699,
      "step": 48937
    },
    {
      "epoch": 0.000298687744140625,
      "step": 48937,
      "training_step_time": 0.3938431739807129
    },
    {
      "epoch": 0.00029869384765625,
      "model_forward_time": 0.11604189872741699,
      "step": 48938
    },
    {
      "epoch": 0.00029869384765625,
      "step": 48938,
      "training_step_time": 0.4156627655029297
    },
    {
      "epoch": 0.000298699951171875,
      "model_forward_time": 0.11456513404846191,
      "step": 48939
    },
    {
      "epoch": 0.000298699951171875,
      "step": 48939,
      "training_step_time": 0.41318750381469727
    },
    {
      "epoch": 0.0002987060546875,
      "grad_norm": 0.09128793329000473,
      "learning_rate": 9.005549780275263e-06,
      "loss": 0.0369,
      "step": 48940
    },
    {
      "epoch": 0.0002987060546875,
      "model_forward_time": 0.11431169509887695,
      "step": 48940
    },
    {
      "epoch": 0.0002987060546875,
      "step": 48940,
      "training_step_time": 0.8521912097930908
    },
    {
      "epoch": 0.000298712158203125,
      "model_forward_time": 0.11486053466796875,
      "step": 48941
    },
    {
      "epoch": 0.000298712158203125,
      "step": 48941,
      "training_step_time": 0.38044214248657227
    },
    {
      "epoch": 0.00029871826171875,
      "model_forward_time": 0.11433100700378418,
      "step": 48942
    },
    {
      "epoch": 0.00029871826171875,
      "step": 48942,
      "training_step_time": 0.3881795406341553
    },
    {
      "epoch": 0.000298724365234375,
      "model_forward_time": 0.1143636703491211,
      "step": 48943
    },
    {
      "epoch": 0.000298724365234375,
      "step": 48943,
      "training_step_time": 0.38443899154663086
    },
    {
      "epoch": 0.00029873046875,
      "model_forward_time": 0.11429190635681152,
      "step": 48944
    },
    {
      "epoch": 0.00029873046875,
      "step": 48944,
      "training_step_time": 0.38448405265808105
    },
    {
      "epoch": 0.000298736572265625,
      "model_forward_time": 0.11413359642028809,
      "step": 48945
    },
    {
      "epoch": 0.000298736572265625,
      "step": 48945,
      "training_step_time": 0.3857693672180176
    },
    {
      "epoch": 0.00029874267578125,
      "model_forward_time": 0.11421966552734375,
      "step": 48946
    },
    {
      "epoch": 0.00029874267578125,
      "step": 48946,
      "training_step_time": 0.7973241806030273
    },
    {
      "epoch": 0.000298748779296875,
      "model_forward_time": 0.11533141136169434,
      "step": 48947
    },
    {
      "epoch": 0.000298748779296875,
      "step": 48947,
      "training_step_time": 0.4068152904510498
    },
    {
      "epoch": 0.0002987548828125,
      "model_forward_time": 0.11481285095214844,
      "step": 48948
    },
    {
      "epoch": 0.0002987548828125,
      "step": 48948,
      "training_step_time": 0.3807656764984131
    },
    {
      "epoch": 0.000298760986328125,
      "model_forward_time": 0.11472582817077637,
      "step": 48949
    },
    {
      "epoch": 0.000298760986328125,
      "step": 48949,
      "training_step_time": 0.4405858516693115
    },
    {
      "epoch": 0.00029876708984375,
      "grad_norm": 0.08195189386606216,
      "learning_rate": 8.989778529515298e-06,
      "loss": 0.041,
      "step": 48950
    },
    {
      "epoch": 0.00029876708984375,
      "model_forward_time": 0.1148984432220459,
      "step": 48950
    },
    {
      "epoch": 0.00029876708984375,
      "step": 48950,
      "training_step_time": 0.36408281326293945
    },
    {
      "epoch": 0.000298773193359375,
      "model_forward_time": 0.11500978469848633,
      "step": 48951
    },
    {
      "epoch": 0.000298773193359375,
      "step": 48951,
      "training_step_time": 0.42992520332336426
    },
    {
      "epoch": 0.000298779296875,
      "model_forward_time": 0.11449432373046875,
      "step": 48952
    },
    {
      "epoch": 0.000298779296875,
      "step": 48952,
      "training_step_time": 0.5156881809234619
    },
    {
      "epoch": 0.000298785400390625,
      "model_forward_time": 0.11662602424621582,
      "step": 48953
    },
    {
      "epoch": 0.000298785400390625,
      "step": 48953,
      "training_step_time": 0.3846457004547119
    },
    {
      "epoch": 0.00029879150390625,
      "model_forward_time": 0.11452603340148926,
      "step": 48954
    },
    {
      "epoch": 0.00029879150390625,
      "step": 48954,
      "training_step_time": 0.38323235511779785
    },
    {
      "epoch": 0.000298797607421875,
      "model_forward_time": 0.11482477188110352,
      "step": 48955
    },
    {
      "epoch": 0.000298797607421875,
      "step": 48955,
      "training_step_time": 0.39124226570129395
    },
    {
      "epoch": 0.0002988037109375,
      "model_forward_time": 0.11501884460449219,
      "step": 48956
    },
    {
      "epoch": 0.0002988037109375,
      "step": 48956,
      "training_step_time": 0.3862578868865967
    },
    {
      "epoch": 0.000298809814453125,
      "model_forward_time": 0.11490297317504883,
      "step": 48957
    },
    {
      "epoch": 0.000298809814453125,
      "step": 48957,
      "training_step_time": 0.383317232131958
    },
    {
      "epoch": 0.00029881591796875,
      "model_forward_time": 0.11563324928283691,
      "step": 48958
    },
    {
      "epoch": 0.00029881591796875,
      "step": 48958,
      "training_step_time": 0.9955668449401855
    },
    {
      "epoch": 0.000298822021484375,
      "model_forward_time": 0.11436796188354492,
      "step": 48959
    },
    {
      "epoch": 0.000298822021484375,
      "step": 48959,
      "training_step_time": 0.3891425132751465
    },
    {
      "epoch": 0.000298828125,
      "grad_norm": 0.09434966742992401,
      "learning_rate": 8.974019736577777e-06,
      "loss": 0.0361,
      "step": 48960
    },
    {
      "epoch": 0.000298828125,
      "model_forward_time": 0.11407303810119629,
      "step": 48960
    },
    {
      "epoch": 0.000298828125,
      "step": 48960,
      "training_step_time": 0.40706944465637207
    },
    {
      "epoch": 0.000298834228515625,
      "model_forward_time": 0.11492252349853516,
      "step": 48961
    },
    {
      "epoch": 0.000298834228515625,
      "step": 48961,
      "training_step_time": 0.38883519172668457
    },
    {
      "epoch": 0.00029884033203125,
      "model_forward_time": 0.11425209045410156,
      "step": 48962
    },
    {
      "epoch": 0.00029884033203125,
      "step": 48962,
      "training_step_time": 0.3834269046783447
    },
    {
      "epoch": 0.000298846435546875,
      "model_forward_time": 0.11473321914672852,
      "step": 48963
    },
    {
      "epoch": 0.000298846435546875,
      "step": 48963,
      "training_step_time": 0.38373851776123047
    },
    {
      "epoch": 0.0002988525390625,
      "model_forward_time": 0.11708450317382812,
      "step": 48964
    },
    {
      "epoch": 0.0002988525390625,
      "step": 48964,
      "training_step_time": 0.8181943893432617
    },
    {
      "epoch": 0.000298858642578125,
      "model_forward_time": 0.11428475379943848,
      "step": 48965
    },
    {
      "epoch": 0.000298858642578125,
      "step": 48965,
      "training_step_time": 0.39252352714538574
    },
    {
      "epoch": 0.00029886474609375,
      "model_forward_time": 0.11429738998413086,
      "step": 48966
    },
    {
      "epoch": 0.00029886474609375,
      "step": 48966,
      "training_step_time": 0.3941488265991211
    },
    {
      "epoch": 0.000298870849609375,
      "model_forward_time": 0.11451220512390137,
      "step": 48967
    },
    {
      "epoch": 0.000298870849609375,
      "step": 48967,
      "training_step_time": 0.3841879367828369
    },
    {
      "epoch": 0.000298876953125,
      "model_forward_time": 0.1144871711730957,
      "step": 48968
    },
    {
      "epoch": 0.000298876953125,
      "step": 48968,
      "training_step_time": 0.3837850093841553
    },
    {
      "epoch": 0.000298883056640625,
      "model_forward_time": 0.11459994316101074,
      "step": 48969
    },
    {
      "epoch": 0.000298883056640625,
      "step": 48969,
      "training_step_time": 0.3991246223449707
    },
    {
      "epoch": 0.00029888916015625,
      "grad_norm": 0.09095155447721481,
      "learning_rate": 8.958273406249839e-06,
      "loss": 0.0325,
      "step": 48970
    },
    {
      "epoch": 0.00029888916015625,
      "model_forward_time": 0.11477136611938477,
      "step": 48970
    },
    {
      "epoch": 0.00029888916015625,
      "step": 48970,
      "training_step_time": 0.44904160499572754
    },
    {
      "epoch": 0.000298895263671875,
      "model_forward_time": 0.11513304710388184,
      "step": 48971
    },
    {
      "epoch": 0.000298895263671875,
      "step": 48971,
      "training_step_time": 0.3974640369415283
    },
    {
      "epoch": 0.0002989013671875,
      "model_forward_time": 0.11506271362304688,
      "step": 48972
    },
    {
      "epoch": 0.0002989013671875,
      "step": 48972,
      "training_step_time": 0.5059812068939209
    },
    {
      "epoch": 0.000298907470703125,
      "model_forward_time": 0.1156008243560791,
      "step": 48973
    },
    {
      "epoch": 0.000298907470703125,
      "step": 48973,
      "training_step_time": 0.4207165241241455
    },
    {
      "epoch": 0.00029891357421875,
      "model_forward_time": 0.11504054069519043,
      "step": 48974
    },
    {
      "epoch": 0.00029891357421875,
      "step": 48974,
      "training_step_time": 0.4950997829437256
    },
    {
      "epoch": 0.000298919677734375,
      "model_forward_time": 0.11498594284057617,
      "step": 48975
    },
    {
      "epoch": 0.000298919677734375,
      "step": 48975,
      "training_step_time": 0.3914318084716797
    },
    {
      "epoch": 0.00029892578125,
      "model_forward_time": 0.11482739448547363,
      "step": 48976
    },
    {
      "epoch": 0.00029892578125,
      "step": 48976,
      "training_step_time": 0.3883535861968994
    },
    {
      "epoch": 0.000298931884765625,
      "model_forward_time": 0.11571836471557617,
      "step": 48977
    },
    {
      "epoch": 0.000298931884765625,
      "step": 48977,
      "training_step_time": 0.393310546875
    },
    {
      "epoch": 0.00029893798828125,
      "model_forward_time": 0.11671924591064453,
      "step": 48978
    },
    {
      "epoch": 0.00029893798828125,
      "step": 48978,
      "training_step_time": 0.4391300678253174
    },
    {
      "epoch": 0.000298944091796875,
      "model_forward_time": 0.11516714096069336,
      "step": 48979
    },
    {
      "epoch": 0.000298944091796875,
      "step": 48979,
      "training_step_time": 0.474837064743042
    },
    {
      "epoch": 0.0002989501953125,
      "grad_norm": 0.09144923835992813,
      "learning_rate": 8.9425395433148e-06,
      "loss": 0.0368,
      "step": 48980
    },
    {
      "epoch": 0.0002989501953125,
      "model_forward_time": 0.1153712272644043,
      "step": 48980
    },
    {
      "epoch": 0.0002989501953125,
      "step": 48980,
      "training_step_time": 0.4622659683227539
    },
    {
      "epoch": 0.000298956298828125,
      "model_forward_time": 0.11531591415405273,
      "step": 48981
    },
    {
      "epoch": 0.000298956298828125,
      "step": 48981,
      "training_step_time": 0.38924741744995117
    },
    {
      "epoch": 0.00029896240234375,
      "model_forward_time": 0.1153252124786377,
      "step": 48982
    },
    {
      "epoch": 0.00029896240234375,
      "step": 48982,
      "training_step_time": 0.3884084224700928
    },
    {
      "epoch": 0.000298968505859375,
      "model_forward_time": 0.11522436141967773,
      "step": 48983
    },
    {
      "epoch": 0.000298968505859375,
      "step": 48983,
      "training_step_time": 0.38320088386535645
    },
    {
      "epoch": 0.000298974609375,
      "model_forward_time": 0.11546206474304199,
      "step": 48984
    },
    {
      "epoch": 0.000298974609375,
      "step": 48984,
      "training_step_time": 0.4012465476989746
    },
    {
      "epoch": 0.000298980712890625,
      "model_forward_time": 0.1152486801147461,
      "step": 48985
    },
    {
      "epoch": 0.000298980712890625,
      "step": 48985,
      "training_step_time": 0.3925149440765381
    },
    {
      "epoch": 0.00029898681640625,
      "model_forward_time": 0.11614203453063965,
      "step": 48986
    },
    {
      "epoch": 0.00029898681640625,
      "step": 48986,
      "training_step_time": 0.400435209274292
    },
    {
      "epoch": 0.000298992919921875,
      "model_forward_time": 0.11650204658508301,
      "step": 48987
    },
    {
      "epoch": 0.000298992919921875,
      "step": 48987,
      "training_step_time": 0.43488478660583496
    },
    {
      "epoch": 0.0002989990234375,
      "model_forward_time": 0.11529374122619629,
      "step": 48988
    },
    {
      "epoch": 0.0002989990234375,
      "step": 48988,
      "training_step_time": 0.4810149669647217
    },
    {
      "epoch": 0.000299005126953125,
      "model_forward_time": 0.11601901054382324,
      "step": 48989
    },
    {
      "epoch": 0.000299005126953125,
      "step": 48989,
      "training_step_time": 0.41988086700439453
    },
    {
      "epoch": 0.00029901123046875,
      "grad_norm": 0.1062108725309372,
      "learning_rate": 8.92681815255219e-06,
      "loss": 0.0389,
      "step": 48990
    },
    {
      "epoch": 0.00029901123046875,
      "model_forward_time": 0.11517691612243652,
      "step": 48990
    },
    {
      "epoch": 0.00029901123046875,
      "step": 48990,
      "training_step_time": 0.4186859130859375
    },
    {
      "epoch": 0.000299017333984375,
      "model_forward_time": 0.11464500427246094,
      "step": 48991
    },
    {
      "epoch": 0.000299017333984375,
      "step": 48991,
      "training_step_time": 0.38962340354919434
    },
    {
      "epoch": 0.0002990234375,
      "model_forward_time": 0.11538100242614746,
      "step": 48992
    },
    {
      "epoch": 0.0002990234375,
      "step": 48992,
      "training_step_time": 0.39055943489074707
    },
    {
      "epoch": 0.000299029541015625,
      "model_forward_time": 0.11559033393859863,
      "step": 48993
    },
    {
      "epoch": 0.000299029541015625,
      "step": 48993,
      "training_step_time": 0.5081806182861328
    },
    {
      "epoch": 0.00029903564453125,
      "model_forward_time": 0.11577272415161133,
      "step": 48994
    },
    {
      "epoch": 0.00029903564453125,
      "step": 48994,
      "training_step_time": 0.6621546745300293
    },
    {
      "epoch": 0.000299041748046875,
      "model_forward_time": 0.1141970157623291,
      "step": 48995
    },
    {
      "epoch": 0.000299041748046875,
      "step": 48995,
      "training_step_time": 0.3746652603149414
    },
    {
      "epoch": 0.0002990478515625,
      "model_forward_time": 0.11540365219116211,
      "step": 48996
    },
    {
      "epoch": 0.0002990478515625,
      "step": 48996,
      "training_step_time": 0.38938021659851074
    },
    {
      "epoch": 0.000299053955078125,
      "model_forward_time": 0.11422419548034668,
      "step": 48997
    },
    {
      "epoch": 0.000299053955078125,
      "step": 48997,
      "training_step_time": 0.3893287181854248
    },
    {
      "epoch": 0.00029906005859375,
      "model_forward_time": 0.11449074745178223,
      "step": 48998
    },
    {
      "epoch": 0.00029906005859375,
      "step": 48998,
      "training_step_time": 0.3903524875640869
    },
    {
      "epoch": 0.000299066162109375,
      "model_forward_time": 0.11484646797180176,
      "step": 48999
    },
    {
      "epoch": 0.000299066162109375,
      "step": 48999,
      "training_step_time": 0.38823986053466797
    },
    {
      "epoch": 0.000299072265625,
      "grad_norm": 0.12187421321868896,
      "learning_rate": 8.911109238737747e-06,
      "loss": 0.0348,
      "step": 49000
    },
    {
      "epoch": 0.000299072265625,
      "model_forward_time": 0.11356711387634277,
      "step": 49000
    },
    {
      "epoch": 0.000299072265625,
      "step": 49000,
      "training_step_time": 0.35633063316345215
    },
    {
      "epoch": 0.000299078369140625,
      "model_forward_time": 0.11179852485656738,
      "step": 49001
    },
    {
      "epoch": 0.000299078369140625,
      "step": 49001,
      "training_step_time": 0.4505503177642822
    },
    {
      "epoch": 0.00029908447265625,
      "model_forward_time": 0.11225581169128418,
      "step": 49002
    },
    {
      "epoch": 0.00029908447265625,
      "step": 49002,
      "training_step_time": 0.40694403648376465
    },
    {
      "epoch": 0.000299090576171875,
      "model_forward_time": 0.11337685585021973,
      "step": 49003
    },
    {
      "epoch": 0.000299090576171875,
      "step": 49003,
      "training_step_time": 0.4007856845855713
    },
    {
      "epoch": 0.0002990966796875,
      "model_forward_time": 0.1132504940032959,
      "step": 49004
    },
    {
      "epoch": 0.0002990966796875,
      "step": 49004,
      "training_step_time": 0.3803136348724365
    },
    {
      "epoch": 0.000299102783203125,
      "model_forward_time": 0.11362457275390625,
      "step": 49005
    },
    {
      "epoch": 0.000299102783203125,
      "step": 49005,
      "training_step_time": 0.38120174407958984
    },
    {
      "epoch": 0.00029910888671875,
      "model_forward_time": 0.11486625671386719,
      "step": 49006
    },
    {
      "epoch": 0.00029910888671875,
      "step": 49006,
      "training_step_time": 0.38721489906311035
    },
    {
      "epoch": 0.000299114990234375,
      "model_forward_time": 0.11501550674438477,
      "step": 49007
    },
    {
      "epoch": 0.000299114990234375,
      "step": 49007,
      "training_step_time": 0.39619946479797363
    },
    {
      "epoch": 0.00029912109375,
      "model_forward_time": 0.11486005783081055,
      "step": 49008
    },
    {
      "epoch": 0.00029912109375,
      "step": 49008,
      "training_step_time": 0.4899773597717285
    },
    {
      "epoch": 0.000299127197265625,
      "model_forward_time": 0.11483120918273926,
      "step": 49009
    },
    {
      "epoch": 0.000299127197265625,
      "step": 49009,
      "training_step_time": 0.5168542861938477
    },
    {
      "epoch": 0.00029913330078125,
      "grad_norm": 0.13190363347530365,
      "learning_rate": 8.895412806643427e-06,
      "loss": 0.0396,
      "step": 49010
    },
    {
      "epoch": 0.00029913330078125,
      "model_forward_time": 0.11464762687683105,
      "step": 49010
    },
    {
      "epoch": 0.00029913330078125,
      "step": 49010,
      "training_step_time": 0.39321136474609375
    },
    {
      "epoch": 0.000299139404296875,
      "model_forward_time": 0.11503219604492188,
      "step": 49011
    },
    {
      "epoch": 0.000299139404296875,
      "step": 49011,
      "training_step_time": 0.3908345699310303
    },
    {
      "epoch": 0.0002991455078125,
      "model_forward_time": 0.11509847640991211,
      "step": 49012
    },
    {
      "epoch": 0.0002991455078125,
      "step": 49012,
      "training_step_time": 0.3842637538909912
    },
    {
      "epoch": 0.000299151611328125,
      "model_forward_time": 0.11451482772827148,
      "step": 49013
    },
    {
      "epoch": 0.000299151611328125,
      "step": 49013,
      "training_step_time": 0.39162755012512207
    },
    {
      "epoch": 0.00029915771484375,
      "model_forward_time": 0.11435818672180176,
      "step": 49014
    },
    {
      "epoch": 0.00029915771484375,
      "step": 49014,
      "training_step_time": 0.3942441940307617
    },
    {
      "epoch": 0.000299163818359375,
      "model_forward_time": 0.1151435375213623,
      "step": 49015
    },
    {
      "epoch": 0.000299163818359375,
      "step": 49015,
      "training_step_time": 0.4317634105682373
    },
    {
      "epoch": 0.000299169921875,
      "model_forward_time": 0.11587095260620117,
      "step": 49016
    },
    {
      "epoch": 0.000299169921875,
      "step": 49016,
      "training_step_time": 0.41694021224975586
    },
    {
      "epoch": 0.000299176025390625,
      "model_forward_time": 0.11748504638671875,
      "step": 49017
    },
    {
      "epoch": 0.000299176025390625,
      "step": 49017,
      "training_step_time": 0.44835519790649414
    },
    {
      "epoch": 0.00029918212890625,
      "model_forward_time": 0.11525225639343262,
      "step": 49018
    },
    {
      "epoch": 0.00029918212890625,
      "step": 49018,
      "training_step_time": 0.40343761444091797
    },
    {
      "epoch": 0.000299188232421875,
      "model_forward_time": 0.1150355339050293,
      "step": 49019
    },
    {
      "epoch": 0.000299188232421875,
      "step": 49019,
      "training_step_time": 0.3938906192779541
    },
    {
      "epoch": 0.0002991943359375,
      "grad_norm": 0.09511201828718185,
      "learning_rate": 8.879728861037384e-06,
      "loss": 0.0366,
      "step": 49020
    },
    {
      "epoch": 0.0002991943359375,
      "model_forward_time": 0.1147761344909668,
      "step": 49020
    },
    {
      "epoch": 0.0002991943359375,
      "step": 49020,
      "training_step_time": 0.4004807472229004
    },
    {
      "epoch": 0.000299200439453125,
      "model_forward_time": 0.11588740348815918,
      "step": 49021
    },
    {
      "epoch": 0.000299200439453125,
      "step": 49021,
      "training_step_time": 0.39049625396728516
    },
    {
      "epoch": 0.00029920654296875,
      "model_forward_time": 0.1157996654510498,
      "step": 49022
    },
    {
      "epoch": 0.00029920654296875,
      "step": 49022,
      "training_step_time": 0.4505653381347656
    },
    {
      "epoch": 0.000299212646484375,
      "model_forward_time": 0.11633801460266113,
      "step": 49023
    },
    {
      "epoch": 0.000299212646484375,
      "step": 49023,
      "training_step_time": 0.45953941345214844
    },
    {
      "epoch": 0.00029921875,
      "model_forward_time": 0.11498284339904785,
      "step": 49024
    },
    {
      "epoch": 0.00029921875,
      "step": 49024,
      "training_step_time": 0.4957747459411621
    },
    {
      "epoch": 0.000299224853515625,
      "model_forward_time": 0.11655664443969727,
      "step": 49025
    },
    {
      "epoch": 0.000299224853515625,
      "step": 49025,
      "training_step_time": 0.38433337211608887
    },
    {
      "epoch": 0.00029923095703125,
      "model_forward_time": 0.11470627784729004,
      "step": 49026
    },
    {
      "epoch": 0.00029923095703125,
      "step": 49026,
      "training_step_time": 0.3952019214630127
    },
    {
      "epoch": 0.000299237060546875,
      "model_forward_time": 0.11494278907775879,
      "step": 49027
    },
    {
      "epoch": 0.000299237060546875,
      "step": 49027,
      "training_step_time": 0.40271615982055664
    },
    {
      "epoch": 0.0002992431640625,
      "model_forward_time": 0.11513900756835938,
      "step": 49028
    },
    {
      "epoch": 0.0002992431640625,
      "step": 49028,
      "training_step_time": 0.39403343200683594
    },
    {
      "epoch": 0.000299249267578125,
      "model_forward_time": 0.11519455909729004,
      "step": 49029
    },
    {
      "epoch": 0.000299249267578125,
      "step": 49029,
      "training_step_time": 0.4396395683288574
    },
    {
      "epoch": 0.00029925537109375,
      "grad_norm": 0.08356130868196487,
      "learning_rate": 8.864057406684023e-06,
      "loss": 0.0375,
      "step": 49030
    },
    {
      "epoch": 0.00029925537109375,
      "model_forward_time": 0.11472558975219727,
      "step": 49030
    },
    {
      "epoch": 0.00029925537109375,
      "step": 49030,
      "training_step_time": 0.44492244720458984
    },
    {
      "epoch": 0.000299261474609375,
      "model_forward_time": 0.11503863334655762,
      "step": 49031
    },
    {
      "epoch": 0.000299261474609375,
      "step": 49031,
      "training_step_time": 0.4015984535217285
    },
    {
      "epoch": 0.000299267578125,
      "model_forward_time": 0.11486673355102539,
      "step": 49032
    },
    {
      "epoch": 0.000299267578125,
      "step": 49032,
      "training_step_time": 0.47705698013305664
    },
    {
      "epoch": 0.000299273681640625,
      "model_forward_time": 0.11458039283752441,
      "step": 49033
    },
    {
      "epoch": 0.000299273681640625,
      "step": 49033,
      "training_step_time": 0.3884272575378418
    },
    {
      "epoch": 0.00029927978515625,
      "model_forward_time": 0.1150050163269043,
      "step": 49034
    },
    {
      "epoch": 0.00029927978515625,
      "step": 49034,
      "training_step_time": 0.39639949798583984
    },
    {
      "epoch": 0.000299285888671875,
      "model_forward_time": 0.11504840850830078,
      "step": 49035
    },
    {
      "epoch": 0.000299285888671875,
      "step": 49035,
      "training_step_time": 0.39026689529418945
    },
    {
      "epoch": 0.0002992919921875,
      "model_forward_time": 0.11540365219116211,
      "step": 49036
    },
    {
      "epoch": 0.0002992919921875,
      "step": 49036,
      "training_step_time": 0.3879849910736084
    },
    {
      "epoch": 0.000299298095703125,
      "model_forward_time": 0.1154778003692627,
      "step": 49037
    },
    {
      "epoch": 0.000299298095703125,
      "step": 49037,
      "training_step_time": 0.4663233757019043
    },
    {
      "epoch": 0.00029930419921875,
      "model_forward_time": 0.1152489185333252,
      "step": 49038
    },
    {
      "epoch": 0.00029930419921875,
      "step": 49038,
      "training_step_time": 0.4825592041015625
    },
    {
      "epoch": 0.000299310302734375,
      "model_forward_time": 0.11429524421691895,
      "step": 49039
    },
    {
      "epoch": 0.000299310302734375,
      "step": 49039,
      "training_step_time": 0.5395665168762207
    },
    {
      "epoch": 0.00029931640625,
      "grad_norm": 0.11434103548526764,
      "learning_rate": 8.848398448343859e-06,
      "loss": 0.0343,
      "step": 49040
    },
    {
      "epoch": 0.00029931640625,
      "model_forward_time": 0.11451220512390137,
      "step": 49040
    },
    {
      "epoch": 0.00029931640625,
      "step": 49040,
      "training_step_time": 0.39418649673461914
    },
    {
      "epoch": 0.000299322509765625,
      "model_forward_time": 0.11460518836975098,
      "step": 49041
    },
    {
      "epoch": 0.000299322509765625,
      "step": 49041,
      "training_step_time": 0.3853600025177002
    },
    {
      "epoch": 0.00029932861328125,
      "model_forward_time": 0.11493754386901855,
      "step": 49042
    },
    {
      "epoch": 0.00029932861328125,
      "step": 49042,
      "training_step_time": 0.3849189281463623
    },
    {
      "epoch": 0.000299334716796875,
      "model_forward_time": 0.11462235450744629,
      "step": 49043
    },
    {
      "epoch": 0.000299334716796875,
      "step": 49043,
      "training_step_time": 0.4106721878051758
    },
    {
      "epoch": 0.0002993408203125,
      "model_forward_time": 0.11503124237060547,
      "step": 49044
    },
    {
      "epoch": 0.0002993408203125,
      "step": 49044,
      "training_step_time": 0.486752986907959
    },
    {
      "epoch": 0.000299346923828125,
      "model_forward_time": 0.1145012378692627,
      "step": 49045
    },
    {
      "epoch": 0.000299346923828125,
      "step": 49045,
      "training_step_time": 0.4414503574371338
    },
    {
      "epoch": 0.00029935302734375,
      "model_forward_time": 0.11498069763183594,
      "step": 49046
    },
    {
      "epoch": 0.00029935302734375,
      "step": 49046,
      "training_step_time": 0.3978283405303955
    },
    {
      "epoch": 0.000299359130859375,
      "model_forward_time": 0.1154627799987793,
      "step": 49047
    },
    {
      "epoch": 0.000299359130859375,
      "step": 49047,
      "training_step_time": 0.38439106941223145
    },
    {
      "epoch": 0.000299365234375,
      "model_forward_time": 0.11534762382507324,
      "step": 49048
    },
    {
      "epoch": 0.000299365234375,
      "step": 49048,
      "training_step_time": 0.38643860816955566
    },
    {
      "epoch": 0.000299371337890625,
      "model_forward_time": 0.11513352394104004,
      "step": 49049
    },
    {
      "epoch": 0.000299371337890625,
      "step": 49049,
      "training_step_time": 0.38789963722229004
    },
    {
      "epoch": 0.00029937744140625,
      "grad_norm": 0.1001303419470787,
      "learning_rate": 8.832751990773714e-06,
      "loss": 0.0389,
      "step": 49050
    },
    {
      "epoch": 0.00029937744140625,
      "model_forward_time": 0.11469006538391113,
      "step": 49050
    },
    {
      "epoch": 0.00029937744140625,
      "step": 49050,
      "training_step_time": 0.3984370231628418
    },
    {
      "epoch": 0.000299383544921875,
      "model_forward_time": 0.11530160903930664,
      "step": 49051
    },
    {
      "epoch": 0.000299383544921875,
      "step": 49051,
      "training_step_time": 0.3688502311706543
    },
    {
      "epoch": 0.0002993896484375,
      "model_forward_time": 0.11524248123168945,
      "step": 49052
    },
    {
      "epoch": 0.0002993896484375,
      "step": 49052,
      "training_step_time": 0.44760942459106445
    },
    {
      "epoch": 0.000299395751953125,
      "model_forward_time": 0.11534810066223145,
      "step": 49053
    },
    {
      "epoch": 0.000299395751953125,
      "step": 49053,
      "training_step_time": 0.4409470558166504
    },
    {
      "epoch": 0.00029940185546875,
      "model_forward_time": 0.11475992202758789,
      "step": 49054
    },
    {
      "epoch": 0.00029940185546875,
      "step": 49054,
      "training_step_time": 0.39285826683044434
    },
    {
      "epoch": 0.000299407958984375,
      "model_forward_time": 0.11554837226867676,
      "step": 49055
    },
    {
      "epoch": 0.000299407958984375,
      "step": 49055,
      "training_step_time": 0.39609265327453613
    },
    {
      "epoch": 0.0002994140625,
      "model_forward_time": 0.1156466007232666,
      "step": 49056
    },
    {
      "epoch": 0.0002994140625,
      "step": 49056,
      "training_step_time": 0.4402635097503662
    },
    {
      "epoch": 0.000299420166015625,
      "model_forward_time": 0.11496376991271973,
      "step": 49057
    },
    {
      "epoch": 0.000299420166015625,
      "step": 49057,
      "training_step_time": 0.4130740165710449
    },
    {
      "epoch": 0.00029942626953125,
      "model_forward_time": 0.11551308631896973,
      "step": 49058
    },
    {
      "epoch": 0.00029942626953125,
      "step": 49058,
      "training_step_time": 0.42313385009765625
    },
    {
      "epoch": 0.000299432373046875,
      "model_forward_time": 0.11440587043762207,
      "step": 49059
    },
    {
      "epoch": 0.000299432373046875,
      "step": 49059,
      "training_step_time": 0.41436171531677246
    },
    {
      "epoch": 0.0002994384765625,
      "grad_norm": 0.09699883311986923,
      "learning_rate": 8.817118038726558e-06,
      "loss": 0.034,
      "step": 49060
    },
    {
      "epoch": 0.0002994384765625,
      "model_forward_time": 0.11499333381652832,
      "step": 49060
    },
    {
      "epoch": 0.0002994384765625,
      "step": 49060,
      "training_step_time": 0.49428749084472656
    },
    {
      "epoch": 0.000299444580078125,
      "model_forward_time": 0.11588406562805176,
      "step": 49061
    },
    {
      "epoch": 0.000299444580078125,
      "step": 49061,
      "training_step_time": 0.40071582794189453
    },
    {
      "epoch": 0.00029945068359375,
      "model_forward_time": 0.11584615707397461,
      "step": 49062
    },
    {
      "epoch": 0.00029945068359375,
      "step": 49062,
      "training_step_time": 0.3929412364959717
    },
    {
      "epoch": 0.000299456787109375,
      "model_forward_time": 0.11512303352355957,
      "step": 49063
    },
    {
      "epoch": 0.000299456787109375,
      "step": 49063,
      "training_step_time": 0.3941926956176758
    },
    {
      "epoch": 0.000299462890625,
      "model_forward_time": 0.11451554298400879,
      "step": 49064
    },
    {
      "epoch": 0.000299462890625,
      "step": 49064,
      "training_step_time": 0.39221954345703125
    },
    {
      "epoch": 0.000299468994140625,
      "model_forward_time": 0.11552071571350098,
      "step": 49065
    },
    {
      "epoch": 0.000299468994140625,
      "step": 49065,
      "training_step_time": 0.387866735458374
    },
    {
      "epoch": 0.00029947509765625,
      "model_forward_time": 0.1158745288848877,
      "step": 49066
    },
    {
      "epoch": 0.00029947509765625,
      "step": 49066,
      "training_step_time": 0.4423809051513672
    },
    {
      "epoch": 0.000299481201171875,
      "model_forward_time": 0.11570262908935547,
      "step": 49067
    },
    {
      "epoch": 0.000299481201171875,
      "step": 49067,
      "training_step_time": 0.4609487056732178
    },
    {
      "epoch": 0.0002994873046875,
      "model_forward_time": 0.1163334846496582,
      "step": 49068
    },
    {
      "epoch": 0.0002994873046875,
      "step": 49068,
      "training_step_time": 0.5003809928894043
    },
    {
      "epoch": 0.000299493408203125,
      "model_forward_time": 0.11477017402648926,
      "step": 49069
    },
    {
      "epoch": 0.000299493408203125,
      "step": 49069,
      "training_step_time": 0.39168763160705566
    },
    {
      "epoch": 0.00029949951171875,
      "grad_norm": 0.10506104677915573,
      "learning_rate": 8.801496596951575e-06,
      "loss": 0.0361,
      "step": 49070
    },
    {
      "epoch": 0.00029949951171875,
      "model_forward_time": 0.11519455909729004,
      "step": 49070
    },
    {
      "epoch": 0.00029949951171875,
      "step": 49070,
      "training_step_time": 0.4129607677459717
    },
    {
      "epoch": 0.000299505615234375,
      "model_forward_time": 0.11509895324707031,
      "step": 49071
    },
    {
      "epoch": 0.000299505615234375,
      "step": 49071,
      "training_step_time": 0.4108092784881592
    },
    {
      "epoch": 0.00029951171875,
      "model_forward_time": 0.1143960952758789,
      "step": 49072
    },
    {
      "epoch": 0.00029951171875,
      "step": 49072,
      "training_step_time": 0.3889174461364746
    },
    {
      "epoch": 0.000299517822265625,
      "model_forward_time": 0.1155085563659668,
      "step": 49073
    },
    {
      "epoch": 0.000299517822265625,
      "step": 49073,
      "training_step_time": 0.45804929733276367
    },
    {
      "epoch": 0.00029952392578125,
      "model_forward_time": 0.1153564453125,
      "step": 49074
    },
    {
      "epoch": 0.00029952392578125,
      "step": 49074,
      "training_step_time": 0.42183899879455566
    },
    {
      "epoch": 0.000299530029296875,
      "model_forward_time": 0.1147909164428711,
      "step": 49075
    },
    {
      "epoch": 0.000299530029296875,
      "step": 49075,
      "training_step_time": 0.5150063037872314
    },
    {
      "epoch": 0.0002995361328125,
      "model_forward_time": 0.11476564407348633,
      "step": 49076
    },
    {
      "epoch": 0.0002995361328125,
      "step": 49076,
      "training_step_time": 0.3989126682281494
    },
    {
      "epoch": 0.000299542236328125,
      "model_forward_time": 0.11595582962036133,
      "step": 49077
    },
    {
      "epoch": 0.000299542236328125,
      "step": 49077,
      "training_step_time": 0.38872218132019043
    },
    {
      "epoch": 0.00029954833984375,
      "model_forward_time": 0.11542582511901855,
      "step": 49078
    },
    {
      "epoch": 0.00029954833984375,
      "step": 49078,
      "training_step_time": 0.3893108367919922
    },
    {
      "epoch": 0.000299554443359375,
      "model_forward_time": 0.11467361450195312,
      "step": 49079
    },
    {
      "epoch": 0.000299554443359375,
      "step": 49079,
      "training_step_time": 0.418900728225708
    },
    {
      "epoch": 0.000299560546875,
      "grad_norm": 0.10576397180557251,
      "learning_rate": 8.785887670194138e-06,
      "loss": 0.0389,
      "step": 49080
    },
    {
      "epoch": 0.000299560546875,
      "model_forward_time": 0.1156759262084961,
      "step": 49080
    },
    {
      "epoch": 0.000299560546875,
      "step": 49080,
      "training_step_time": 0.40616726875305176
    },
    {
      "epoch": 0.000299566650390625,
      "model_forward_time": 0.11434769630432129,
      "step": 49081
    },
    {
      "epoch": 0.000299566650390625,
      "step": 49081,
      "training_step_time": 0.3984816074371338
    },
    {
      "epoch": 0.00029957275390625,
      "model_forward_time": 0.11570525169372559,
      "step": 49082
    },
    {
      "epoch": 0.00029957275390625,
      "step": 49082,
      "training_step_time": 0.3939383029937744
    },
    {
      "epoch": 0.000299578857421875,
      "model_forward_time": 0.11553239822387695,
      "step": 49083
    },
    {
      "epoch": 0.000299578857421875,
      "step": 49083,
      "training_step_time": 0.416379451751709
    },
    {
      "epoch": 0.0002995849609375,
      "model_forward_time": 0.11533737182617188,
      "step": 49084
    },
    {
      "epoch": 0.0002995849609375,
      "step": 49084,
      "training_step_time": 0.39130234718322754
    },
    {
      "epoch": 0.000299591064453125,
      "model_forward_time": 0.11511397361755371,
      "step": 49085
    },
    {
      "epoch": 0.000299591064453125,
      "step": 49085,
      "training_step_time": 0.4065091609954834
    },
    {
      "epoch": 0.00029959716796875,
      "model_forward_time": 0.11502671241760254,
      "step": 49086
    },
    {
      "epoch": 0.00029959716796875,
      "step": 49086,
      "training_step_time": 0.41753101348876953
    },
    {
      "epoch": 0.000299603271484375,
      "model_forward_time": 0.11541223526000977,
      "step": 49087
    },
    {
      "epoch": 0.000299603271484375,
      "step": 49087,
      "training_step_time": 0.39344143867492676
    },
    {
      "epoch": 0.000299609375,
      "model_forward_time": 0.11520266532897949,
      "step": 49088
    },
    {
      "epoch": 0.000299609375,
      "step": 49088,
      "training_step_time": 0.4165654182434082
    },
    {
      "epoch": 0.000299615478515625,
      "model_forward_time": 0.11496615409851074,
      "step": 49089
    },
    {
      "epoch": 0.000299615478515625,
      "step": 49089,
      "training_step_time": 0.3816652297973633
    },
    {
      "epoch": 0.00029962158203125,
      "grad_norm": 0.09686014801263809,
      "learning_rate": 8.770291263195819e-06,
      "loss": 0.0354,
      "step": 49090
    },
    {
      "epoch": 0.00029962158203125,
      "model_forward_time": 0.1151585578918457,
      "step": 49090
    },
    {
      "epoch": 0.00029962158203125,
      "step": 49090,
      "training_step_time": 0.4150209426879883
    },
    {
      "epoch": 0.000299627685546875,
      "model_forward_time": 0.1154177188873291,
      "step": 49091
    },
    {
      "epoch": 0.000299627685546875,
      "step": 49091,
      "training_step_time": 0.38321518898010254
    },
    {
      "epoch": 0.0002996337890625,
      "model_forward_time": 0.11479020118713379,
      "step": 49092
    },
    {
      "epoch": 0.0002996337890625,
      "step": 49092,
      "training_step_time": 0.39936351776123047
    },
    {
      "epoch": 0.000299639892578125,
      "model_forward_time": 0.11539578437805176,
      "step": 49093
    },
    {
      "epoch": 0.000299639892578125,
      "step": 49093,
      "training_step_time": 0.39138126373291016
    },
    {
      "epoch": 0.00029964599609375,
      "model_forward_time": 0.11528658866882324,
      "step": 49094
    },
    {
      "epoch": 0.00029964599609375,
      "step": 49094,
      "training_step_time": 0.39822816848754883
    },
    {
      "epoch": 0.000299652099609375,
      "model_forward_time": 0.11591029167175293,
      "step": 49095
    },
    {
      "epoch": 0.000299652099609375,
      "step": 49095,
      "training_step_time": 0.4189314842224121
    },
    {
      "epoch": 0.000299658203125,
      "model_forward_time": 0.11592268943786621,
      "step": 49096
    },
    {
      "epoch": 0.000299658203125,
      "step": 49096,
      "training_step_time": 0.47978782653808594
    },
    {
      "epoch": 0.000299664306640625,
      "model_forward_time": 0.11532258987426758,
      "step": 49097
    },
    {
      "epoch": 0.000299664306640625,
      "step": 49097,
      "training_step_time": 0.463817834854126
    },
    {
      "epoch": 0.00029967041015625,
      "model_forward_time": 0.1150665283203125,
      "step": 49098
    },
    {
      "epoch": 0.00029967041015625,
      "step": 49098,
      "training_step_time": 0.4725959300994873
    },
    {
      "epoch": 0.000299676513671875,
      "model_forward_time": 0.11495113372802734,
      "step": 49099
    },
    {
      "epoch": 0.000299676513671875,
      "step": 49099,
      "training_step_time": 0.3958742618560791
    },
    {
      "epoch": 0.0002996826171875,
      "grad_norm": 0.09167348593473434,
      "learning_rate": 8.754707380694427e-06,
      "loss": 0.0353,
      "step": 49100
    },
    {
      "epoch": 0.0002996826171875,
      "model_forward_time": 0.11439132690429688,
      "step": 49100
    },
    {
      "epoch": 0.0002996826171875,
      "step": 49100,
      "training_step_time": 0.3926856517791748
    },
    {
      "epoch": 0.000299688720703125,
      "model_forward_time": 0.11499738693237305,
      "step": 49101
    },
    {
      "epoch": 0.000299688720703125,
      "step": 49101,
      "training_step_time": 0.4005463123321533
    },
    {
      "epoch": 0.00029969482421875,
      "model_forward_time": 0.11508607864379883,
      "step": 49102
    },
    {
      "epoch": 0.00029969482421875,
      "step": 49102,
      "training_step_time": 0.3908219337463379
    },
    {
      "epoch": 0.000299700927734375,
      "model_forward_time": 0.11482977867126465,
      "step": 49103
    },
    {
      "epoch": 0.000299700927734375,
      "step": 49103,
      "training_step_time": 0.40151429176330566
    },
    {
      "epoch": 0.00029970703125,
      "model_forward_time": 0.1148526668548584,
      "step": 49104
    },
    {
      "epoch": 0.00029970703125,
      "step": 49104,
      "training_step_time": 0.5186476707458496
    },
    {
      "epoch": 0.000299713134765625,
      "model_forward_time": 0.1149895191192627,
      "step": 49105
    },
    {
      "epoch": 0.000299713134765625,
      "step": 49105,
      "training_step_time": 0.39145708084106445
    },
    {
      "epoch": 0.00029971923828125,
      "model_forward_time": 0.1155555248260498,
      "step": 49106
    },
    {
      "epoch": 0.00029971923828125,
      "step": 49106,
      "training_step_time": 0.3936271667480469
    },
    {
      "epoch": 0.000299725341796875,
      "model_forward_time": 0.11531615257263184,
      "step": 49107
    },
    {
      "epoch": 0.000299725341796875,
      "step": 49107,
      "training_step_time": 0.3958120346069336
    },
    {
      "epoch": 0.0002997314453125,
      "model_forward_time": 0.1152958869934082,
      "step": 49108
    },
    {
      "epoch": 0.0002997314453125,
      "step": 49108,
      "training_step_time": 0.3954153060913086
    },
    {
      "epoch": 0.000299737548828125,
      "model_forward_time": 0.11521792411804199,
      "step": 49109
    },
    {
      "epoch": 0.000299737548828125,
      "step": 49109,
      "training_step_time": 0.39151668548583984
    },
    {
      "epoch": 0.00029974365234375,
      "grad_norm": 0.07813193649053574,
      "learning_rate": 8.739136027423894e-06,
      "loss": 0.0344,
      "step": 49110
    },
    {
      "epoch": 0.00029974365234375,
      "model_forward_time": 0.11492776870727539,
      "step": 49110
    },
    {
      "epoch": 0.00029974365234375,
      "step": 49110,
      "training_step_time": 0.4443047046661377
    },
    {
      "epoch": 0.000299749755859375,
      "model_forward_time": 0.11677122116088867,
      "step": 49111
    },
    {
      "epoch": 0.000299749755859375,
      "step": 49111,
      "training_step_time": 0.45082712173461914
    },
    {
      "epoch": 0.000299755859375,
      "model_forward_time": 0.11685967445373535,
      "step": 49112
    },
    {
      "epoch": 0.000299755859375,
      "step": 49112,
      "training_step_time": 0.46047234535217285
    },
    {
      "epoch": 0.000299761962890625,
      "model_forward_time": 0.11519050598144531,
      "step": 49113
    },
    {
      "epoch": 0.000299761962890625,
      "step": 49113,
      "training_step_time": 0.4190676212310791
    },
    {
      "epoch": 0.00029976806640625,
      "model_forward_time": 0.11502361297607422,
      "step": 49114
    },
    {
      "epoch": 0.00029976806640625,
      "step": 49114,
      "training_step_time": 0.37773823738098145
    },
    {
      "epoch": 0.000299774169921875,
      "model_forward_time": 0.11452174186706543,
      "step": 49115
    },
    {
      "epoch": 0.000299774169921875,
      "step": 49115,
      "training_step_time": 0.38124632835388184
    },
    {
      "epoch": 0.0002997802734375,
      "model_forward_time": 0.11592292785644531,
      "step": 49116
    },
    {
      "epoch": 0.0002997802734375,
      "step": 49116,
      "training_step_time": 0.41376805305480957
    },
    {
      "epoch": 0.000299786376953125,
      "model_forward_time": 0.1151130199432373,
      "step": 49117
    },
    {
      "epoch": 0.000299786376953125,
      "step": 49117,
      "training_step_time": 0.4243032932281494
    },
    {
      "epoch": 0.00029979248046875,
      "model_forward_time": 0.1151878833770752,
      "step": 49118
    },
    {
      "epoch": 0.00029979248046875,
      "step": 49118,
      "training_step_time": 0.4359407424926758
    },
    {
      "epoch": 0.000299798583984375,
      "model_forward_time": 0.11489224433898926,
      "step": 49119
    },
    {
      "epoch": 0.000299798583984375,
      "step": 49119,
      "training_step_time": 0.3861699104309082
    },
    {
      "epoch": 0.0002998046875,
      "grad_norm": 0.09359343349933624,
      "learning_rate": 8.723577208114419e-06,
      "loss": 0.0385,
      "step": 49120
    },
    {
      "epoch": 0.0002998046875,
      "model_forward_time": 0.11511850357055664,
      "step": 49120
    },
    {
      "epoch": 0.0002998046875,
      "step": 49120,
      "training_step_time": 0.3964567184448242
    },
    {
      "epoch": 0.000299810791015625,
      "model_forward_time": 0.11553621292114258,
      "step": 49121
    },
    {
      "epoch": 0.000299810791015625,
      "step": 49121,
      "training_step_time": 0.7263050079345703
    },
    {
      "epoch": 0.00029981689453125,
      "model_forward_time": 0.11419439315795898,
      "step": 49122
    },
    {
      "epoch": 0.00029981689453125,
      "step": 49122,
      "training_step_time": 0.3995683193206787
    },
    {
      "epoch": 0.000299822998046875,
      "model_forward_time": 0.1144096851348877,
      "step": 49123
    },
    {
      "epoch": 0.000299822998046875,
      "step": 49123,
      "training_step_time": 0.39078712463378906
    },
    {
      "epoch": 0.0002998291015625,
      "model_forward_time": 0.11521363258361816,
      "step": 49124
    },
    {
      "epoch": 0.0002998291015625,
      "step": 49124,
      "training_step_time": 0.5073790550231934
    },
    {
      "epoch": 0.000299835205078125,
      "model_forward_time": 0.11500668525695801,
      "step": 49125
    },
    {
      "epoch": 0.000299835205078125,
      "step": 49125,
      "training_step_time": 0.49993371963500977
    },
    {
      "epoch": 0.00029984130859375,
      "model_forward_time": 0.11470317840576172,
      "step": 49126
    },
    {
      "epoch": 0.00029984130859375,
      "step": 49126,
      "training_step_time": 0.40843963623046875
    },
    {
      "epoch": 0.000299847412109375,
      "model_forward_time": 0.11451077461242676,
      "step": 49127
    },
    {
      "epoch": 0.000299847412109375,
      "step": 49127,
      "training_step_time": 0.3808631896972656
    },
    {
      "epoch": 0.000299853515625,
      "model_forward_time": 0.11533665657043457,
      "step": 49128
    },
    {
      "epoch": 0.000299853515625,
      "step": 49128,
      "training_step_time": 0.3889179229736328
    },
    {
      "epoch": 0.000299859619140625,
      "model_forward_time": 0.11518073081970215,
      "step": 49129
    },
    {
      "epoch": 0.000299859619140625,
      "step": 49129,
      "training_step_time": 0.39027953147888184
    },
    {
      "epoch": 0.00029986572265625,
      "grad_norm": 0.07016768306493759,
      "learning_rate": 8.708030927492345e-06,
      "loss": 0.0336,
      "step": 49130
    },
    {
      "epoch": 0.00029986572265625,
      "model_forward_time": 0.1153419017791748,
      "step": 49130
    },
    {
      "epoch": 0.00029986572265625,
      "step": 49130,
      "training_step_time": 0.5110704898834229
    },
    {
      "epoch": 0.000299871826171875,
      "model_forward_time": 0.11635065078735352,
      "step": 49131
    },
    {
      "epoch": 0.000299871826171875,
      "step": 49131,
      "training_step_time": 0.39804959297180176
    },
    {
      "epoch": 0.0002998779296875,
      "model_forward_time": 0.11610722541809082,
      "step": 49132
    },
    {
      "epoch": 0.0002998779296875,
      "step": 49132,
      "training_step_time": 0.3950960636138916
    },
    {
      "epoch": 0.000299884033203125,
      "model_forward_time": 0.11476492881774902,
      "step": 49133
    },
    {
      "epoch": 0.000299884033203125,
      "step": 49133,
      "training_step_time": 0.4447059631347656
    },
    {
      "epoch": 0.00029989013671875,
      "model_forward_time": 0.11519837379455566,
      "step": 49134
    },
    {
      "epoch": 0.00029989013671875,
      "step": 49134,
      "training_step_time": 0.3912668228149414
    },
    {
      "epoch": 0.000299896240234375,
      "model_forward_time": 0.1158132553100586,
      "step": 49135
    },
    {
      "epoch": 0.000299896240234375,
      "step": 49135,
      "training_step_time": 0.39138364791870117
    },
    {
      "epoch": 0.00029990234375,
      "model_forward_time": 0.11521506309509277,
      "step": 49136
    },
    {
      "epoch": 0.00029990234375,
      "step": 49136,
      "training_step_time": 0.3942434787750244
    },
    {
      "epoch": 0.000299908447265625,
      "model_forward_time": 0.11552786827087402,
      "step": 49137
    },
    {
      "epoch": 0.000299908447265625,
      "step": 49137,
      "training_step_time": 0.3902881145477295
    },
    {
      "epoch": 0.00029991455078125,
      "model_forward_time": 0.11557173728942871,
      "step": 49138
    },
    {
      "epoch": 0.00029991455078125,
      "step": 49138,
      "training_step_time": 0.3637580871582031
    },
    {
      "epoch": 0.000299920654296875,
      "model_forward_time": 0.1154947280883789,
      "step": 49139
    },
    {
      "epoch": 0.000299920654296875,
      "step": 49139,
      "training_step_time": 0.5205338001251221
    },
    {
      "epoch": 0.0002999267578125,
      "grad_norm": 0.11028074473142624,
      "learning_rate": 8.692497190280224e-06,
      "loss": 0.039,
      "step": 49140
    },
    {
      "epoch": 0.0002999267578125,
      "model_forward_time": 0.11461925506591797,
      "step": 49140
    },
    {
      "epoch": 0.0002999267578125,
      "step": 49140,
      "training_step_time": 0.48033738136291504
    },
    {
      "epoch": 0.000299932861328125,
      "model_forward_time": 0.11487722396850586,
      "step": 49141
    },
    {
      "epoch": 0.000299932861328125,
      "step": 49141,
      "training_step_time": 0.4150810241699219
    },
    {
      "epoch": 0.00029993896484375,
      "model_forward_time": 0.1148672103881836,
      "step": 49142
    },
    {
      "epoch": 0.00029993896484375,
      "step": 49142,
      "training_step_time": 0.39941883087158203
    },
    {
      "epoch": 0.000299945068359375,
      "model_forward_time": 0.11522650718688965,
      "step": 49143
    },
    {
      "epoch": 0.000299945068359375,
      "step": 49143,
      "training_step_time": 0.3917849063873291
    },
    {
      "epoch": 0.000299951171875,
      "model_forward_time": 0.11483573913574219,
      "step": 49144
    },
    {
      "epoch": 0.000299951171875,
      "step": 49144,
      "training_step_time": 0.4517223834991455
    },
    {
      "epoch": 0.000299957275390625,
      "model_forward_time": 0.11530041694641113,
      "step": 49145
    },
    {
      "epoch": 0.000299957275390625,
      "step": 49145,
      "training_step_time": 0.4200148582458496
    },
    {
      "epoch": 0.00029996337890625,
      "model_forward_time": 0.11470866203308105,
      "step": 49146
    },
    {
      "epoch": 0.00029996337890625,
      "step": 49146,
      "training_step_time": 0.48993515968322754
    },
    {
      "epoch": 0.000299969482421875,
      "model_forward_time": 0.11464738845825195,
      "step": 49147
    },
    {
      "epoch": 0.000299969482421875,
      "step": 49147,
      "training_step_time": 0.39966869354248047
    },
    {
      "epoch": 0.0002999755859375,
      "model_forward_time": 0.11527442932128906,
      "step": 49148
    },
    {
      "epoch": 0.0002999755859375,
      "step": 49148,
      "training_step_time": 0.38871049880981445
    },
    {
      "epoch": 0.000299981689453125,
      "model_forward_time": 0.1152799129486084,
      "step": 49149
    },
    {
      "epoch": 0.000299981689453125,
      "step": 49149,
      "training_step_time": 0.39101123809814453
    },
    {
      "epoch": 0.00029998779296875,
      "grad_norm": 0.14921173453330994,
      "learning_rate": 8.676976001196801e-06,
      "loss": 0.0347,
      "step": 49150
    },
    {
      "epoch": 0.00029998779296875,
      "model_forward_time": 0.11494278907775879,
      "step": 49150
    },
    {
      "epoch": 0.00029998779296875,
      "step": 49150,
      "training_step_time": 0.39894819259643555
    },
    {
      "epoch": 0.000299993896484375,
      "model_forward_time": 0.11503005027770996,
      "step": 49151
    },
    {
      "epoch": 0.000299993896484375,
      "step": 49151,
      "training_step_time": 0.6934289932250977
    },
    {
      "epoch": 0.0003,
      "model_forward_time": 0.11494278907775879,
      "step": 49152
    },
    {
      "epoch": 0.0003,
      "step": 49152,
      "training_step_time": 0.36386966705322266
    },
    {
      "epoch": 0.000300006103515625,
      "model_forward_time": 0.11497807502746582,
      "step": 49153
    },
    {
      "epoch": 0.000300006103515625,
      "step": 49153,
      "training_step_time": 0.46131038665771484
    },
    {
      "epoch": 0.00030001220703125,
      "model_forward_time": 0.11578059196472168,
      "step": 49154
    },
    {
      "epoch": 0.00030001220703125,
      "step": 49154,
      "training_step_time": 0.43823790550231934
    },
    {
      "epoch": 0.000300018310546875,
      "model_forward_time": 0.11406064033508301,
      "step": 49155
    },
    {
      "epoch": 0.000300018310546875,
      "step": 49155,
      "training_step_time": 0.41712236404418945
    },
    {
      "epoch": 0.0003000244140625,
      "model_forward_time": 0.11422562599182129,
      "step": 49156
    },
    {
      "epoch": 0.0003000244140625,
      "step": 49156,
      "training_step_time": 0.3843984603881836
    },
    {
      "epoch": 0.000300030517578125,
      "model_forward_time": 0.11562824249267578,
      "step": 49157
    },
    {
      "epoch": 0.000300030517578125,
      "step": 49157,
      "training_step_time": 0.622105598449707
    },
    {
      "epoch": 0.00030003662109375,
      "model_forward_time": 0.11502242088317871,
      "step": 49158
    },
    {
      "epoch": 0.00030003662109375,
      "step": 49158,
      "training_step_time": 0.40526509284973145
    },
    {
      "epoch": 0.000300042724609375,
      "model_forward_time": 0.114654541015625,
      "step": 49159
    },
    {
      "epoch": 0.000300042724609375,
      "step": 49159,
      "training_step_time": 0.4707801342010498
    },
    {
      "epoch": 0.000300048828125,
      "grad_norm": 0.08298638463020325,
      "learning_rate": 8.661467364956993e-06,
      "loss": 0.0346,
      "step": 49160
    },
    {
      "epoch": 0.000300048828125,
      "model_forward_time": 0.11493325233459473,
      "step": 49160
    },
    {
      "epoch": 0.000300048828125,
      "step": 49160,
      "training_step_time": 0.38335084915161133
    },
    {
      "epoch": 0.000300054931640625,
      "model_forward_time": 0.11472392082214355,
      "step": 49161
    },
    {
      "epoch": 0.000300054931640625,
      "step": 49161,
      "training_step_time": 0.3871312141418457
    },
    {
      "epoch": 0.00030006103515625,
      "model_forward_time": 0.11445736885070801,
      "step": 49162
    },
    {
      "epoch": 0.00030006103515625,
      "step": 49162,
      "training_step_time": 0.39650869369506836
    },
    {
      "epoch": 0.000300067138671875,
      "model_forward_time": 0.1149907112121582,
      "step": 49163
    },
    {
      "epoch": 0.000300067138671875,
      "step": 49163,
      "training_step_time": 0.6032469272613525
    },
    {
      "epoch": 0.0003000732421875,
      "model_forward_time": 0.11406159400939941,
      "step": 49164
    },
    {
      "epoch": 0.0003000732421875,
      "step": 49164,
      "training_step_time": 0.39411282539367676
    },
    {
      "epoch": 0.000300079345703125,
      "model_forward_time": 0.11526346206665039,
      "step": 49165
    },
    {
      "epoch": 0.000300079345703125,
      "step": 49165,
      "training_step_time": 0.39214038848876953
    },
    {
      "epoch": 0.00030008544921875,
      "model_forward_time": 0.11492609977722168,
      "step": 49166
    },
    {
      "epoch": 0.00030008544921875,
      "step": 49166,
      "training_step_time": 0.45286083221435547
    },
    {
      "epoch": 0.000300091552734375,
      "model_forward_time": 0.11507844924926758,
      "step": 49167
    },
    {
      "epoch": 0.000300091552734375,
      "step": 49167,
      "training_step_time": 0.5014946460723877
    },
    {
      "epoch": 0.00030009765625,
      "model_forward_time": 0.1146841049194336,
      "step": 49168
    },
    {
      "epoch": 0.00030009765625,
      "step": 49168,
      "training_step_time": 0.40529322624206543
    },
    {
      "epoch": 0.000300103759765625,
      "model_forward_time": 0.11439251899719238,
      "step": 49169
    },
    {
      "epoch": 0.000300103759765625,
      "step": 49169,
      "training_step_time": 0.6037371158599854
    },
    {
      "epoch": 0.00030010986328125,
      "grad_norm": 0.07939710468053818,
      "learning_rate": 8.645971286271904e-06,
      "loss": 0.0373,
      "step": 49170
    },
    {
      "epoch": 0.00030010986328125,
      "model_forward_time": 0.11501908302307129,
      "step": 49170
    },
    {
      "epoch": 0.00030010986328125,
      "step": 49170,
      "training_step_time": 0.3909013271331787
    },
    {
      "epoch": 0.000300115966796875,
      "model_forward_time": 0.11437392234802246,
      "step": 49171
    },
    {
      "epoch": 0.000300115966796875,
      "step": 49171,
      "training_step_time": 0.39225053787231445
    },
    {
      "epoch": 0.0003001220703125,
      "model_forward_time": 0.11481547355651855,
      "step": 49172
    },
    {
      "epoch": 0.0003001220703125,
      "step": 49172,
      "training_step_time": 0.4029238224029541
    },
    {
      "epoch": 0.000300128173828125,
      "model_forward_time": 0.11458039283752441,
      "step": 49173
    },
    {
      "epoch": 0.000300128173828125,
      "step": 49173,
      "training_step_time": 0.45690035820007324
    },
    {
      "epoch": 0.00030013427734375,
      "model_forward_time": 0.11485981941223145,
      "step": 49174
    },
    {
      "epoch": 0.00030013427734375,
      "step": 49174,
      "training_step_time": 0.3838202953338623
    },
    {
      "epoch": 0.000300140380859375,
      "model_forward_time": 0.11530113220214844,
      "step": 49175
    },
    {
      "epoch": 0.000300140380859375,
      "step": 49175,
      "training_step_time": 0.5407595634460449
    },
    {
      "epoch": 0.000300146484375,
      "model_forward_time": 0.11511731147766113,
      "step": 49176
    },
    {
      "epoch": 0.000300146484375,
      "step": 49176,
      "training_step_time": 0.4019763469696045
    },
    {
      "epoch": 0.000300152587890625,
      "model_forward_time": 0.11489582061767578,
      "step": 49177
    },
    {
      "epoch": 0.000300152587890625,
      "step": 49177,
      "training_step_time": 0.3941483497619629
    },
    {
      "epoch": 0.00030015869140625,
      "model_forward_time": 0.11516785621643066,
      "step": 49178
    },
    {
      "epoch": 0.00030015869140625,
      "step": 49178,
      "training_step_time": 0.3960609436035156
    },
    {
      "epoch": 0.000300164794921875,
      "model_forward_time": 0.11507344245910645,
      "step": 49179
    },
    {
      "epoch": 0.000300164794921875,
      "step": 49179,
      "training_step_time": 0.408921480178833
    },
    {
      "epoch": 0.0003001708984375,
      "grad_norm": 0.09106609225273132,
      "learning_rate": 8.630487769848877e-06,
      "loss": 0.0327,
      "step": 49180
    },
    {
      "epoch": 0.0003001708984375,
      "model_forward_time": 0.11484599113464355,
      "step": 49180
    },
    {
      "epoch": 0.0003001708984375,
      "step": 49180,
      "training_step_time": 0.43016791343688965
    },
    {
      "epoch": 0.000300177001953125,
      "model_forward_time": 0.11532258987426758,
      "step": 49181
    },
    {
      "epoch": 0.000300177001953125,
      "step": 49181,
      "training_step_time": 0.49248552322387695
    },
    {
      "epoch": 0.00030018310546875,
      "model_forward_time": 0.11701726913452148,
      "step": 49182
    },
    {
      "epoch": 0.00030018310546875,
      "step": 49182,
      "training_step_time": 0.42354273796081543
    },
    {
      "epoch": 0.000300189208984375,
      "model_forward_time": 0.11488223075866699,
      "step": 49183
    },
    {
      "epoch": 0.000300189208984375,
      "step": 49183,
      "training_step_time": 0.41362524032592773
    },
    {
      "epoch": 0.0003001953125,
      "model_forward_time": 0.11488723754882812,
      "step": 49184
    },
    {
      "epoch": 0.0003001953125,
      "step": 49184,
      "training_step_time": 0.38968944549560547
    },
    {
      "epoch": 0.000300201416015625,
      "model_forward_time": 0.11554670333862305,
      "step": 49185
    },
    {
      "epoch": 0.000300201416015625,
      "step": 49185,
      "training_step_time": 0.40113162994384766
    },
    {
      "epoch": 0.00030020751953125,
      "model_forward_time": 0.11520004272460938,
      "step": 49186
    },
    {
      "epoch": 0.00030020751953125,
      "step": 49186,
      "training_step_time": 0.45416975021362305
    },
    {
      "epoch": 0.000300213623046875,
      "model_forward_time": 0.11504292488098145,
      "step": 49187
    },
    {
      "epoch": 0.000300213623046875,
      "step": 49187,
      "training_step_time": 0.3930346965789795
    },
    {
      "epoch": 0.0003002197265625,
      "model_forward_time": 0.11526322364807129,
      "step": 49188
    },
    {
      "epoch": 0.0003002197265625,
      "step": 49188,
      "training_step_time": 0.3973503112792969
    },
    {
      "epoch": 0.000300225830078125,
      "model_forward_time": 0.11543631553649902,
      "step": 49189
    },
    {
      "epoch": 0.000300225830078125,
      "step": 49189,
      "training_step_time": 0.3949863910675049
    },
    {
      "epoch": 0.00030023193359375,
      "grad_norm": 0.10306954383850098,
      "learning_rate": 8.615016820391342e-06,
      "loss": 0.0348,
      "step": 49190
    },
    {
      "epoch": 0.00030023193359375,
      "model_forward_time": 0.11569595336914062,
      "step": 49190
    },
    {
      "epoch": 0.00030023193359375,
      "step": 49190,
      "training_step_time": 0.39267921447753906
    },
    {
      "epoch": 0.000300238037109375,
      "model_forward_time": 0.11502265930175781,
      "step": 49191
    },
    {
      "epoch": 0.000300238037109375,
      "step": 49191,
      "training_step_time": 0.39809203147888184
    },
    {
      "epoch": 0.000300244140625,
      "model_forward_time": 0.11458301544189453,
      "step": 49192
    },
    {
      "epoch": 0.000300244140625,
      "step": 49192,
      "training_step_time": 0.39359283447265625
    },
    {
      "epoch": 0.000300250244140625,
      "model_forward_time": 0.11498427391052246,
      "step": 49193
    },
    {
      "epoch": 0.000300250244140625,
      "step": 49193,
      "training_step_time": 0.9104337692260742
    },
    {
      "epoch": 0.00030025634765625,
      "model_forward_time": 0.11479663848876953,
      "step": 49194
    },
    {
      "epoch": 0.00030025634765625,
      "step": 49194,
      "training_step_time": 0.47222471237182617
    },
    {
      "epoch": 0.000300262451171875,
      "model_forward_time": 0.11462640762329102,
      "step": 49195
    },
    {
      "epoch": 0.000300262451171875,
      "step": 49195,
      "training_step_time": 0.47487401962280273
    },
    {
      "epoch": 0.0003002685546875,
      "model_forward_time": 0.11466598510742188,
      "step": 49196
    },
    {
      "epoch": 0.0003002685546875,
      "step": 49196,
      "training_step_time": 0.511322021484375
    },
    {
      "epoch": 0.000300274658203125,
      "model_forward_time": 0.11356019973754883,
      "step": 49197
    },
    {
      "epoch": 0.000300274658203125,
      "step": 49197,
      "training_step_time": 0.38907909393310547
    },
    {
      "epoch": 0.00030028076171875,
      "model_forward_time": 0.1141664981842041,
      "step": 49198
    },
    {
      "epoch": 0.00030028076171875,
      "step": 49198,
      "training_step_time": 0.3908085823059082
    },
    {
      "epoch": 0.000300286865234375,
      "model_forward_time": 0.11404967308044434,
      "step": 49199
    },
    {
      "epoch": 0.000300286865234375,
      "step": 49199,
      "training_step_time": 0.4575841426849365
    },
    {
      "epoch": 0.00030029296875,
      "grad_norm": 0.12390587478876114,
      "learning_rate": 8.599558442598998e-06,
      "loss": 0.0381,
      "step": 49200
    },
    {
      "epoch": 0.00030029296875,
      "model_forward_time": 0.11462020874023438,
      "step": 49200
    },
    {
      "epoch": 0.00030029296875,
      "step": 49200,
      "training_step_time": 0.406083345413208
    },
    {
      "epoch": 0.000300299072265625,
      "model_forward_time": 0.1149146556854248,
      "step": 49201
    },
    {
      "epoch": 0.000300299072265625,
      "step": 49201,
      "training_step_time": 0.48825669288635254
    },
    {
      "epoch": 0.00030030517578125,
      "model_forward_time": 0.1152639389038086,
      "step": 49202
    },
    {
      "epoch": 0.00030030517578125,
      "step": 49202,
      "training_step_time": 0.37833380699157715
    },
    {
      "epoch": 0.000300311279296875,
      "model_forward_time": 0.11509537696838379,
      "step": 49203
    },
    {
      "epoch": 0.000300311279296875,
      "step": 49203,
      "training_step_time": 0.39317917823791504
    },
    {
      "epoch": 0.0003003173828125,
      "model_forward_time": 0.11471033096313477,
      "step": 49204
    },
    {
      "epoch": 0.0003003173828125,
      "step": 49204,
      "training_step_time": 0.45260119438171387
    },
    {
      "epoch": 0.000300323486328125,
      "model_forward_time": 0.11417865753173828,
      "step": 49205
    },
    {
      "epoch": 0.000300323486328125,
      "step": 49205,
      "training_step_time": 0.8532392978668213
    },
    {
      "epoch": 0.00030032958984375,
      "model_forward_time": 0.1136176586151123,
      "step": 49206
    },
    {
      "epoch": 0.00030032958984375,
      "step": 49206,
      "training_step_time": 0.38010597229003906
    },
    {
      "epoch": 0.000300335693359375,
      "model_forward_time": 0.1155698299407959,
      "step": 49207
    },
    {
      "epoch": 0.000300335693359375,
      "step": 49207,
      "training_step_time": 0.3984489440917969
    },
    {
      "epoch": 0.000300341796875,
      "model_forward_time": 0.11457586288452148,
      "step": 49208
    },
    {
      "epoch": 0.000300341796875,
      "step": 49208,
      "training_step_time": 0.3997974395751953
    },
    {
      "epoch": 0.000300347900390625,
      "model_forward_time": 0.11451840400695801,
      "step": 49209
    },
    {
      "epoch": 0.000300347900390625,
      "step": 49209,
      "training_step_time": 0.48378896713256836
    },
    {
      "epoch": 0.00030035400390625,
      "grad_norm": 0.11609627306461334,
      "learning_rate": 8.584112641167686e-06,
      "loss": 0.0328,
      "step": 49210
    },
    {
      "epoch": 0.00030035400390625,
      "model_forward_time": 0.11440229415893555,
      "step": 49210
    },
    {
      "epoch": 0.00030035400390625,
      "step": 49210,
      "training_step_time": 0.41746091842651367
    },
    {
      "epoch": 0.000300360107421875,
      "model_forward_time": 0.1144251823425293,
      "step": 49211
    },
    {
      "epoch": 0.000300360107421875,
      "step": 49211,
      "training_step_time": 0.7526099681854248
    },
    {
      "epoch": 0.0003003662109375,
      "model_forward_time": 0.11420297622680664,
      "step": 49212
    },
    {
      "epoch": 0.0003003662109375,
      "step": 49212,
      "training_step_time": 0.47908711433410645
    },
    {
      "epoch": 0.000300372314453125,
      "model_forward_time": 0.11409687995910645,
      "step": 49213
    },
    {
      "epoch": 0.000300372314453125,
      "step": 49213,
      "training_step_time": 0.4727005958557129
    },
    {
      "epoch": 0.00030037841796875,
      "model_forward_time": 0.11509394645690918,
      "step": 49214
    },
    {
      "epoch": 0.00030037841796875,
      "step": 49214,
      "training_step_time": 0.4015021324157715
    },
    {
      "epoch": 0.000300384521484375,
      "model_forward_time": 0.11393356323242188,
      "step": 49215
    },
    {
      "epoch": 0.000300384521484375,
      "step": 49215,
      "training_step_time": 0.39997053146362305
    },
    {
      "epoch": 0.000300390625,
      "model_forward_time": 0.1141357421875,
      "step": 49216
    },
    {
      "epoch": 0.000300390625,
      "step": 49216,
      "training_step_time": 0.4282960891723633
    },
    {
      "epoch": 0.000300396728515625,
      "model_forward_time": 0.1143791675567627,
      "step": 49217
    },
    {
      "epoch": 0.000300396728515625,
      "step": 49217,
      "training_step_time": 0.4255223274230957
    },
    {
      "epoch": 0.00030040283203125,
      "model_forward_time": 0.1150369644165039,
      "step": 49218
    },
    {
      "epoch": 0.00030040283203125,
      "step": 49218,
      "training_step_time": 0.3988649845123291
    },
    {
      "epoch": 0.000300408935546875,
      "model_forward_time": 0.11419034004211426,
      "step": 49219
    },
    {
      "epoch": 0.000300408935546875,
      "step": 49219,
      "training_step_time": 0.3876020908355713
    },
    {
      "epoch": 0.0003004150390625,
      "grad_norm": 0.08827706426382065,
      "learning_rate": 8.568679420789433e-06,
      "loss": 0.0363,
      "step": 49220
    },
    {
      "epoch": 0.0003004150390625,
      "model_forward_time": 0.11518645286560059,
      "step": 49220
    },
    {
      "epoch": 0.0003004150390625,
      "step": 49220,
      "training_step_time": 0.38640666007995605
    },
    {
      "epoch": 0.000300421142578125,
      "model_forward_time": 0.11512303352355957,
      "step": 49221
    },
    {
      "epoch": 0.000300421142578125,
      "step": 49221,
      "training_step_time": 0.3923006057739258
    },
    {
      "epoch": 0.00030042724609375,
      "model_forward_time": 0.11528849601745605,
      "step": 49222
    },
    {
      "epoch": 0.00030042724609375,
      "step": 49222,
      "training_step_time": 0.48470067977905273
    },
    {
      "epoch": 0.000300433349609375,
      "model_forward_time": 0.11658263206481934,
      "step": 49223
    },
    {
      "epoch": 0.000300433349609375,
      "step": 49223,
      "training_step_time": 0.9826028347015381
    },
    {
      "epoch": 0.000300439453125,
      "model_forward_time": 0.1143960952758789,
      "step": 49224
    },
    {
      "epoch": 0.000300439453125,
      "step": 49224,
      "training_step_time": 0.4004380702972412
    },
    {
      "epoch": 0.000300445556640625,
      "model_forward_time": 0.11429810523986816,
      "step": 49225
    },
    {
      "epoch": 0.000300445556640625,
      "step": 49225,
      "training_step_time": 0.43871450424194336
    },
    {
      "epoch": 0.00030045166015625,
      "model_forward_time": 0.11397504806518555,
      "step": 49226
    },
    {
      "epoch": 0.00030045166015625,
      "step": 49226,
      "training_step_time": 0.4291839599609375
    },
    {
      "epoch": 0.000300457763671875,
      "model_forward_time": 0.11404824256896973,
      "step": 49227
    },
    {
      "epoch": 0.000300457763671875,
      "step": 49227,
      "training_step_time": 0.38602113723754883
    },
    {
      "epoch": 0.0003004638671875,
      "model_forward_time": 0.11430573463439941,
      "step": 49228
    },
    {
      "epoch": 0.0003004638671875,
      "step": 49228,
      "training_step_time": 0.4160280227661133
    },
    {
      "epoch": 0.000300469970703125,
      "model_forward_time": 0.11460447311401367,
      "step": 49229
    },
    {
      "epoch": 0.000300469970703125,
      "step": 49229,
      "training_step_time": 0.8528571128845215
    },
    {
      "epoch": 0.00030047607421875,
      "grad_norm": 0.08626149594783783,
      "learning_rate": 8.55325878615244e-06,
      "loss": 0.0296,
      "step": 49230
    },
    {
      "epoch": 0.00030047607421875,
      "model_forward_time": 0.11437630653381348,
      "step": 49230
    },
    {
      "epoch": 0.00030047607421875,
      "step": 49230,
      "training_step_time": 0.4000256061553955
    },
    {
      "epoch": 0.000300482177734375,
      "model_forward_time": 0.11377692222595215,
      "step": 49231
    },
    {
      "epoch": 0.000300482177734375,
      "step": 49231,
      "training_step_time": 0.38353800773620605
    },
    {
      "epoch": 0.00030048828125,
      "model_forward_time": 0.11432957649230957,
      "step": 49232
    },
    {
      "epoch": 0.00030048828125,
      "step": 49232,
      "training_step_time": 0.3788008689880371
    },
    {
      "epoch": 0.000300494384765625,
      "model_forward_time": 0.1142585277557373,
      "step": 49233
    },
    {
      "epoch": 0.000300494384765625,
      "step": 49233,
      "training_step_time": 0.3795201778411865
    },
    {
      "epoch": 0.00030050048828125,
      "model_forward_time": 0.11465764045715332,
      "step": 49234
    },
    {
      "epoch": 0.00030050048828125,
      "step": 49234,
      "training_step_time": 0.3630099296569824
    },
    {
      "epoch": 0.000300506591796875,
      "model_forward_time": 0.11530303955078125,
      "step": 49235
    },
    {
      "epoch": 0.000300506591796875,
      "step": 49235,
      "training_step_time": 0.547365665435791
    },
    {
      "epoch": 0.0003005126953125,
      "model_forward_time": 0.11485934257507324,
      "step": 49236
    },
    {
      "epoch": 0.0003005126953125,
      "step": 49236,
      "training_step_time": 0.42103052139282227
    },
    {
      "epoch": 0.000300518798828125,
      "model_forward_time": 0.11460304260253906,
      "step": 49237
    },
    {
      "epoch": 0.000300518798828125,
      "step": 49237,
      "training_step_time": 0.3896787166595459
    },
    {
      "epoch": 0.00030052490234375,
      "model_forward_time": 0.11519050598144531,
      "step": 49238
    },
    {
      "epoch": 0.00030052490234375,
      "step": 49238,
      "training_step_time": 0.3801395893096924
    },
    {
      "epoch": 0.000300531005859375,
      "model_forward_time": 0.11450576782226562,
      "step": 49239
    },
    {
      "epoch": 0.000300531005859375,
      "step": 49239,
      "training_step_time": 0.3987903594970703
    },
    {
      "epoch": 0.000300537109375,
      "grad_norm": 0.07399789243936539,
      "learning_rate": 8.537850741941073e-06,
      "loss": 0.0392,
      "step": 49240
    },
    {
      "epoch": 0.000300537109375,
      "model_forward_time": 0.11548852920532227,
      "step": 49240
    },
    {
      "epoch": 0.000300537109375,
      "step": 49240,
      "training_step_time": 0.40003132820129395
    },
    {
      "epoch": 0.000300543212890625,
      "model_forward_time": 0.11512374877929688,
      "step": 49241
    },
    {
      "epoch": 0.000300543212890625,
      "step": 49241,
      "training_step_time": 0.6514320373535156
    },
    {
      "epoch": 0.00030054931640625,
      "model_forward_time": 0.11498689651489258,
      "step": 49242
    },
    {
      "epoch": 0.00030054931640625,
      "step": 49242,
      "training_step_time": 0.39299678802490234
    },
    {
      "epoch": 0.000300555419921875,
      "model_forward_time": 0.11491751670837402,
      "step": 49243
    },
    {
      "epoch": 0.000300555419921875,
      "step": 49243,
      "training_step_time": 0.39119935035705566
    },
    {
      "epoch": 0.0003005615234375,
      "model_forward_time": 0.1144859790802002,
      "step": 49244
    },
    {
      "epoch": 0.0003005615234375,
      "step": 49244,
      "training_step_time": 0.3844590187072754
    },
    {
      "epoch": 0.000300567626953125,
      "model_forward_time": 0.11521530151367188,
      "step": 49245
    },
    {
      "epoch": 0.000300567626953125,
      "step": 49245,
      "training_step_time": 0.3924086093902588
    },
    {
      "epoch": 0.00030057373046875,
      "model_forward_time": 0.11521601676940918,
      "step": 49246
    },
    {
      "epoch": 0.00030057373046875,
      "step": 49246,
      "training_step_time": 0.39583420753479004
    },
    {
      "epoch": 0.000300579833984375,
      "model_forward_time": 0.11533284187316895,
      "step": 49247
    },
    {
      "epoch": 0.000300579833984375,
      "step": 49247,
      "training_step_time": 0.9146015644073486
    },
    {
      "epoch": 0.0003005859375,
      "model_forward_time": 0.11479592323303223,
      "step": 49248
    },
    {
      "epoch": 0.0003005859375,
      "step": 49248,
      "training_step_time": 0.5066182613372803
    },
    {
      "epoch": 0.000300592041015625,
      "model_forward_time": 0.11397457122802734,
      "step": 49249
    },
    {
      "epoch": 0.000300592041015625,
      "step": 49249,
      "training_step_time": 0.48740673065185547
    },
    {
      "epoch": 0.00030059814453125,
      "grad_norm": 0.11243453621864319,
      "learning_rate": 8.522455292835934e-06,
      "loss": 0.0367,
      "step": 49250
    },
    {
      "epoch": 0.00030059814453125,
      "model_forward_time": 0.11472797393798828,
      "step": 49250
    },
    {
      "epoch": 0.00030059814453125,
      "step": 49250,
      "training_step_time": 0.3876347541809082
    },
    {
      "epoch": 0.000300604248046875,
      "model_forward_time": 0.11392545700073242,
      "step": 49251
    },
    {
      "epoch": 0.000300604248046875,
      "step": 49251,
      "training_step_time": 0.37494325637817383
    },
    {
      "epoch": 0.0003006103515625,
      "model_forward_time": 0.11376643180847168,
      "step": 49252
    },
    {
      "epoch": 0.0003006103515625,
      "step": 49252,
      "training_step_time": 0.47817111015319824
    },
    {
      "epoch": 0.000300616455078125,
      "model_forward_time": 0.11455440521240234,
      "step": 49253
    },
    {
      "epoch": 0.000300616455078125,
      "step": 49253,
      "training_step_time": 0.5420901775360107
    },
    {
      "epoch": 0.00030062255859375,
      "model_forward_time": 0.11461544036865234,
      "step": 49254
    },
    {
      "epoch": 0.00030062255859375,
      "step": 49254,
      "training_step_time": 0.3940248489379883
    },
    {
      "epoch": 0.000300628662109375,
      "model_forward_time": 0.11549782752990723,
      "step": 49255
    },
    {
      "epoch": 0.000300628662109375,
      "step": 49255,
      "training_step_time": 0.3907473087310791
    },
    {
      "epoch": 0.000300634765625,
      "model_forward_time": 0.11476325988769531,
      "step": 49256
    },
    {
      "epoch": 0.000300634765625,
      "step": 49256,
      "training_step_time": 0.3903992176055908
    },
    {
      "epoch": 0.000300640869140625,
      "model_forward_time": 0.11464428901672363,
      "step": 49257
    },
    {
      "epoch": 0.000300640869140625,
      "step": 49257,
      "training_step_time": 0.4016435146331787
    },
    {
      "epoch": 0.00030064697265625,
      "model_forward_time": 0.11556529998779297,
      "step": 49258
    },
    {
      "epoch": 0.00030064697265625,
      "step": 49258,
      "training_step_time": 0.3986806869506836
    },
    {
      "epoch": 0.000300653076171875,
      "model_forward_time": 0.11482048034667969,
      "step": 49259
    },
    {
      "epoch": 0.000300653076171875,
      "step": 49259,
      "training_step_time": 1.0480520725250244
    },
    {
      "epoch": 0.0003006591796875,
      "grad_norm": 0.10306686908006668,
      "learning_rate": 8.507072443513702e-06,
      "loss": 0.0348,
      "step": 49260
    },
    {
      "epoch": 0.0003006591796875,
      "model_forward_time": 0.11427664756774902,
      "step": 49260
    },
    {
      "epoch": 0.0003006591796875,
      "step": 49260,
      "training_step_time": 0.4298820495605469
    },
    {
      "epoch": 0.000300665283203125,
      "model_forward_time": 0.11448359489440918,
      "step": 49261
    },
    {
      "epoch": 0.000300665283203125,
      "step": 49261,
      "training_step_time": 0.4280674457550049
    },
    {
      "epoch": 0.00030067138671875,
      "model_forward_time": 0.11393237113952637,
      "step": 49262
    },
    {
      "epoch": 0.00030067138671875,
      "step": 49262,
      "training_step_time": 0.4564788341522217
    },
    {
      "epoch": 0.000300677490234375,
      "model_forward_time": 0.1140739917755127,
      "step": 49263
    },
    {
      "epoch": 0.000300677490234375,
      "step": 49263,
      "training_step_time": 0.3750765323638916
    },
    {
      "epoch": 0.00030068359375,
      "model_forward_time": 0.11418700218200684,
      "step": 49264
    },
    {
      "epoch": 0.00030068359375,
      "step": 49264,
      "training_step_time": 0.3749427795410156
    },
    {
      "epoch": 0.000300689697265625,
      "model_forward_time": 0.11472582817077637,
      "step": 49265
    },
    {
      "epoch": 0.000300689697265625,
      "step": 49265,
      "training_step_time": 0.579667329788208
    },
    {
      "epoch": 0.00030069580078125,
      "model_forward_time": 0.11414074897766113,
      "step": 49266
    },
    {
      "epoch": 0.00030069580078125,
      "step": 49266,
      "training_step_time": 0.4698774814605713
    },
    {
      "epoch": 0.000300701904296875,
      "model_forward_time": 0.11500072479248047,
      "step": 49267
    },
    {
      "epoch": 0.000300701904296875,
      "step": 49267,
      "training_step_time": 0.41736841201782227
    },
    {
      "epoch": 0.0003007080078125,
      "model_forward_time": 0.11412572860717773,
      "step": 49268
    },
    {
      "epoch": 0.0003007080078125,
      "step": 49268,
      "training_step_time": 0.3923342227935791
    },
    {
      "epoch": 0.000300714111328125,
      "model_forward_time": 0.1152198314666748,
      "step": 49269
    },
    {
      "epoch": 0.000300714111328125,
      "step": 49269,
      "training_step_time": 0.38358163833618164
    },
    {
      "epoch": 0.00030072021484375,
      "grad_norm": 0.12193005532026291,
      "learning_rate": 8.491702198647333e-06,
      "loss": 0.034,
      "step": 49270
    },
    {
      "epoch": 0.00030072021484375,
      "model_forward_time": 0.11466383934020996,
      "step": 49270
    },
    {
      "epoch": 0.00030072021484375,
      "step": 49270,
      "training_step_time": 0.40782618522644043
    },
    {
      "epoch": 0.000300726318359375,
      "model_forward_time": 0.11465597152709961,
      "step": 49271
    },
    {
      "epoch": 0.000300726318359375,
      "step": 49271,
      "training_step_time": 0.7092227935791016
    },
    {
      "epoch": 0.000300732421875,
      "model_forward_time": 0.11440515518188477,
      "step": 49272
    },
    {
      "epoch": 0.000300732421875,
      "step": 49272,
      "training_step_time": 0.40091657638549805
    },
    {
      "epoch": 0.000300738525390625,
      "model_forward_time": 0.11407923698425293,
      "step": 49273
    },
    {
      "epoch": 0.000300738525390625,
      "step": 49273,
      "training_step_time": 0.3890411853790283
    },
    {
      "epoch": 0.00030074462890625,
      "model_forward_time": 0.11493134498596191,
      "step": 49274
    },
    {
      "epoch": 0.00030074462890625,
      "step": 49274,
      "training_step_time": 0.36922144889831543
    },
    {
      "epoch": 0.000300750732421875,
      "model_forward_time": 0.11482787132263184,
      "step": 49275
    },
    {
      "epoch": 0.000300750732421875,
      "step": 49275,
      "training_step_time": 0.4280259609222412
    },
    {
      "epoch": 0.0003007568359375,
      "model_forward_time": 0.11538219451904297,
      "step": 49276
    },
    {
      "epoch": 0.0003007568359375,
      "step": 49276,
      "training_step_time": 0.45306968688964844
    },
    {
      "epoch": 0.000300762939453125,
      "model_forward_time": 0.11518216133117676,
      "step": 49277
    },
    {
      "epoch": 0.000300762939453125,
      "step": 49277,
      "training_step_time": 0.7006895542144775
    },
    {
      "epoch": 0.00030076904296875,
      "model_forward_time": 0.11433553695678711,
      "step": 49278
    },
    {
      "epoch": 0.00030076904296875,
      "step": 49278,
      "training_step_time": 0.4352843761444092
    },
    {
      "epoch": 0.000300775146484375,
      "model_forward_time": 0.11454653739929199,
      "step": 49279
    },
    {
      "epoch": 0.000300775146484375,
      "step": 49279,
      "training_step_time": 0.41080236434936523
    },
    {
      "epoch": 0.00030078125,
      "grad_norm": 0.08125300705432892,
      "learning_rate": 8.476344562905841e-06,
      "loss": 0.0334,
      "step": 49280
    },
    {
      "epoch": 0.00030078125,
      "model_forward_time": 0.1145327091217041,
      "step": 49280
    },
    {
      "epoch": 0.00030078125,
      "step": 49280,
      "training_step_time": 0.4069812297821045
    },
    {
      "epoch": 0.000300787353515625,
      "model_forward_time": 0.11436152458190918,
      "step": 49281
    },
    {
      "epoch": 0.000300787353515625,
      "step": 49281,
      "training_step_time": 0.3866724967956543
    },
    {
      "epoch": 0.00030079345703125,
      "model_forward_time": 0.11472845077514648,
      "step": 49282
    },
    {
      "epoch": 0.00030079345703125,
      "step": 49282,
      "training_step_time": 0.3904383182525635
    },
    {
      "epoch": 0.000300799560546875,
      "model_forward_time": 0.11490440368652344,
      "step": 49283
    },
    {
      "epoch": 0.000300799560546875,
      "step": 49283,
      "training_step_time": 0.5706400871276855
    },
    {
      "epoch": 0.0003008056640625,
      "model_forward_time": 0.11556625366210938,
      "step": 49284
    },
    {
      "epoch": 0.0003008056640625,
      "step": 49284,
      "training_step_time": 0.3912694454193115
    },
    {
      "epoch": 0.000300811767578125,
      "model_forward_time": 0.11534285545349121,
      "step": 49285
    },
    {
      "epoch": 0.000300811767578125,
      "step": 49285,
      "training_step_time": 0.39043354988098145
    },
    {
      "epoch": 0.00030081787109375,
      "model_forward_time": 0.11489987373352051,
      "step": 49286
    },
    {
      "epoch": 0.00030081787109375,
      "step": 49286,
      "training_step_time": 0.3867635726928711
    },
    {
      "epoch": 0.000300823974609375,
      "model_forward_time": 0.11563897132873535,
      "step": 49287
    },
    {
      "epoch": 0.000300823974609375,
      "step": 49287,
      "training_step_time": 0.4032866954803467
    },
    {
      "epoch": 0.000300830078125,
      "model_forward_time": 0.11479663848876953,
      "step": 49288
    },
    {
      "epoch": 0.000300830078125,
      "step": 49288,
      "training_step_time": 0.38233232498168945
    },
    {
      "epoch": 0.000300836181640625,
      "model_forward_time": 0.1149759292602539,
      "step": 49289
    },
    {
      "epoch": 0.000300836181640625,
      "step": 49289,
      "training_step_time": 0.5008082389831543
    },
    {
      "epoch": 0.00030084228515625,
      "grad_norm": 0.12691839039325714,
      "learning_rate": 8.460999540954517e-06,
      "loss": 0.0315,
      "step": 49290
    },
    {
      "epoch": 0.00030084228515625,
      "model_forward_time": 0.11458158493041992,
      "step": 49290
    },
    {
      "epoch": 0.00030084228515625,
      "step": 49290,
      "training_step_time": 0.4848620891571045
    },
    {
      "epoch": 0.000300848388671875,
      "model_forward_time": 0.11464214324951172,
      "step": 49291
    },
    {
      "epoch": 0.000300848388671875,
      "step": 49291,
      "training_step_time": 0.40138792991638184
    },
    {
      "epoch": 0.0003008544921875,
      "model_forward_time": 0.11494112014770508,
      "step": 49292
    },
    {
      "epoch": 0.0003008544921875,
      "step": 49292,
      "training_step_time": 0.44316649436950684
    },
    {
      "epoch": 0.000300860595703125,
      "model_forward_time": 0.11473846435546875,
      "step": 49293
    },
    {
      "epoch": 0.000300860595703125,
      "step": 49293,
      "training_step_time": 0.4184894561767578
    },
    {
      "epoch": 0.00030086669921875,
      "model_forward_time": 0.1146690845489502,
      "step": 49294
    },
    {
      "epoch": 0.00030086669921875,
      "step": 49294,
      "training_step_time": 0.4756932258605957
    },
    {
      "epoch": 0.000300872802734375,
      "model_forward_time": 0.11493659019470215,
      "step": 49295
    },
    {
      "epoch": 0.000300872802734375,
      "step": 49295,
      "training_step_time": 0.39906907081604004
    },
    {
      "epoch": 0.00030087890625,
      "model_forward_time": 0.11450958251953125,
      "step": 49296
    },
    {
      "epoch": 0.00030087890625,
      "step": 49296,
      "training_step_time": 0.3994140625
    },
    {
      "epoch": 0.000300885009765625,
      "model_forward_time": 0.11464095115661621,
      "step": 49297
    },
    {
      "epoch": 0.000300885009765625,
      "step": 49297,
      "training_step_time": 0.39103150367736816
    },
    {
      "epoch": 0.00030089111328125,
      "model_forward_time": 0.11486649513244629,
      "step": 49298
    },
    {
      "epoch": 0.00030089111328125,
      "step": 49298,
      "training_step_time": 0.3855617046356201
    },
    {
      "epoch": 0.000300897216796875,
      "model_forward_time": 0.1149451732635498,
      "step": 49299
    },
    {
      "epoch": 0.000300897216796875,
      "step": 49299,
      "training_step_time": 0.39734840393066406
    },
    {
      "epoch": 0.0003009033203125,
      "grad_norm": 0.10265498608350754,
      "learning_rate": 8.445667137454761e-06,
      "loss": 0.0345,
      "step": 49300
    },
    {
      "epoch": 0.0003009033203125,
      "model_forward_time": 0.11573910713195801,
      "step": 49300
    },
    {
      "epoch": 0.0003009033203125,
      "step": 49300,
      "training_step_time": 0.40590786933898926
    },
    {
      "epoch": 0.000300909423828125,
      "model_forward_time": 0.11549592018127441,
      "step": 49301
    },
    {
      "epoch": 0.000300909423828125,
      "step": 49301,
      "training_step_time": 0.4104311466217041
    },
    {
      "epoch": 0.00030091552734375,
      "model_forward_time": 0.11467361450195312,
      "step": 49302
    },
    {
      "epoch": 0.00030091552734375,
      "step": 49302,
      "training_step_time": 0.4302074909210205
    },
    {
      "epoch": 0.000300921630859375,
      "model_forward_time": 0.11522769927978516,
      "step": 49303
    },
    {
      "epoch": 0.000300921630859375,
      "step": 49303,
      "training_step_time": 0.40376782417297363
    },
    {
      "epoch": 0.000300927734375,
      "model_forward_time": 0.11515522003173828,
      "step": 49304
    },
    {
      "epoch": 0.000300927734375,
      "step": 49304,
      "training_step_time": 0.5067694187164307
    },
    {
      "epoch": 0.000300933837890625,
      "model_forward_time": 0.11553096771240234,
      "step": 49305
    },
    {
      "epoch": 0.000300933837890625,
      "step": 49305,
      "training_step_time": 0.4205608367919922
    },
    {
      "epoch": 0.00030093994140625,
      "model_forward_time": 0.11442279815673828,
      "step": 49306
    },
    {
      "epoch": 0.00030093994140625,
      "step": 49306,
      "training_step_time": 0.43705177307128906
    },
    {
      "epoch": 0.000300946044921875,
      "model_forward_time": 0.11422252655029297,
      "step": 49307
    },
    {
      "epoch": 0.000300946044921875,
      "step": 49307,
      "training_step_time": 0.3964388370513916
    },
    {
      "epoch": 0.0003009521484375,
      "model_forward_time": 0.1153862476348877,
      "step": 49308
    },
    {
      "epoch": 0.0003009521484375,
      "step": 49308,
      "training_step_time": 0.502708911895752
    },
    {
      "epoch": 0.000300958251953125,
      "model_forward_time": 0.11472821235656738,
      "step": 49309
    },
    {
      "epoch": 0.000300958251953125,
      "step": 49309,
      "training_step_time": 0.3954315185546875
    },
    {
      "epoch": 0.00030096435546875,
      "grad_norm": 0.09503259509801865,
      "learning_rate": 8.43034735706415e-06,
      "loss": 0.0385,
      "step": 49310
    },
    {
      "epoch": 0.00030096435546875,
      "model_forward_time": 0.11505484580993652,
      "step": 49310
    },
    {
      "epoch": 0.00030096435546875,
      "step": 49310,
      "training_step_time": 0.3906056880950928
    },
    {
      "epoch": 0.000300970458984375,
      "model_forward_time": 0.11489510536193848,
      "step": 49311
    },
    {
      "epoch": 0.000300970458984375,
      "step": 49311,
      "training_step_time": 0.37792539596557617
    },
    {
      "epoch": 0.0003009765625,
      "model_forward_time": 0.11517977714538574,
      "step": 49312
    },
    {
      "epoch": 0.0003009765625,
      "step": 49312,
      "training_step_time": 0.39329004287719727
    },
    {
      "epoch": 0.000300982666015625,
      "model_forward_time": 0.11465597152709961,
      "step": 49313
    },
    {
      "epoch": 0.000300982666015625,
      "step": 49313,
      "training_step_time": 0.8411076068878174
    },
    {
      "epoch": 0.00030098876953125,
      "model_forward_time": 0.11502957344055176,
      "step": 49314
    },
    {
      "epoch": 0.00030098876953125,
      "step": 49314,
      "training_step_time": 0.3926663398742676
    },
    {
      "epoch": 0.000300994873046875,
      "model_forward_time": 0.11417651176452637,
      "step": 49315
    },
    {
      "epoch": 0.000300994873046875,
      "step": 49315,
      "training_step_time": 0.39781856536865234
    },
    {
      "epoch": 0.0003010009765625,
      "model_forward_time": 0.11496901512145996,
      "step": 49316
    },
    {
      "epoch": 0.0003010009765625,
      "step": 49316,
      "training_step_time": 0.4242987632751465
    },
    {
      "epoch": 0.000301007080078125,
      "model_forward_time": 0.11493062973022461,
      "step": 49317
    },
    {
      "epoch": 0.000301007080078125,
      "step": 49317,
      "training_step_time": 0.4102139472961426
    },
    {
      "epoch": 0.00030101318359375,
      "model_forward_time": 0.11442208290100098,
      "step": 49318
    },
    {
      "epoch": 0.00030101318359375,
      "step": 49318,
      "training_step_time": 0.47250866889953613
    },
    {
      "epoch": 0.000301019287109375,
      "model_forward_time": 0.11442112922668457,
      "step": 49319
    },
    {
      "epoch": 0.000301019287109375,
      "step": 49319,
      "training_step_time": 0.4353771209716797
    },
    {
      "epoch": 0.000301025390625,
      "grad_norm": 0.07689306139945984,
      "learning_rate": 8.415040204436426e-06,
      "loss": 0.0357,
      "step": 49320
    },
    {
      "epoch": 0.000301025390625,
      "model_forward_time": 0.11512446403503418,
      "step": 49320
    },
    {
      "epoch": 0.000301025390625,
      "step": 49320,
      "training_step_time": 0.49245667457580566
    },
    {
      "epoch": 0.000301031494140625,
      "model_forward_time": 0.11457276344299316,
      "step": 49321
    },
    {
      "epoch": 0.000301031494140625,
      "step": 49321,
      "training_step_time": 0.4044675827026367
    },
    {
      "epoch": 0.00030103759765625,
      "model_forward_time": 0.11413168907165527,
      "step": 49322
    },
    {
      "epoch": 0.00030103759765625,
      "step": 49322,
      "training_step_time": 0.406846284866333
    },
    {
      "epoch": 0.000301043701171875,
      "model_forward_time": 0.11576271057128906,
      "step": 49323
    },
    {
      "epoch": 0.000301043701171875,
      "step": 49323,
      "training_step_time": 0.3937342166900635
    },
    {
      "epoch": 0.0003010498046875,
      "model_forward_time": 0.11448884010314941,
      "step": 49324
    },
    {
      "epoch": 0.0003010498046875,
      "step": 49324,
      "training_step_time": 0.3836245536804199
    },
    {
      "epoch": 0.000301055908203125,
      "model_forward_time": 0.11561346054077148,
      "step": 49325
    },
    {
      "epoch": 0.000301055908203125,
      "step": 49325,
      "training_step_time": 0.808035135269165
    },
    {
      "epoch": 0.00030106201171875,
      "model_forward_time": 0.11461472511291504,
      "step": 49326
    },
    {
      "epoch": 0.00030106201171875,
      "step": 49326,
      "training_step_time": 0.38808536529541016
    },
    {
      "epoch": 0.000301068115234375,
      "model_forward_time": 0.11417865753173828,
      "step": 49327
    },
    {
      "epoch": 0.000301068115234375,
      "step": 49327,
      "training_step_time": 0.3949413299560547
    },
    {
      "epoch": 0.00030107421875,
      "model_forward_time": 0.11481690406799316,
      "step": 49328
    },
    {
      "epoch": 0.00030107421875,
      "step": 49328,
      "training_step_time": 0.39133572578430176
    },
    {
      "epoch": 0.000301080322265625,
      "model_forward_time": 0.11520075798034668,
      "step": 49329
    },
    {
      "epoch": 0.000301080322265625,
      "step": 49329,
      "training_step_time": 0.4102020263671875
    },
    {
      "epoch": 0.00030108642578125,
      "grad_norm": 0.11401694267988205,
      "learning_rate": 8.399745684221499e-06,
      "loss": 0.034,
      "step": 49330
    },
    {
      "epoch": 0.00030108642578125,
      "model_forward_time": 0.11441230773925781,
      "step": 49330
    },
    {
      "epoch": 0.00030108642578125,
      "step": 49330,
      "training_step_time": 0.4568779468536377
    },
    {
      "epoch": 0.000301092529296875,
      "model_forward_time": 0.11598420143127441,
      "step": 49331
    },
    {
      "epoch": 0.000301092529296875,
      "step": 49331,
      "training_step_time": 0.5144171714782715
    },
    {
      "epoch": 0.0003010986328125,
      "model_forward_time": 0.1149146556854248,
      "step": 49332
    },
    {
      "epoch": 0.0003010986328125,
      "step": 49332,
      "training_step_time": 0.47184228897094727
    },
    {
      "epoch": 0.000301104736328125,
      "model_forward_time": 0.11432743072509766,
      "step": 49333
    },
    {
      "epoch": 0.000301104736328125,
      "step": 49333,
      "training_step_time": 0.39546751976013184
    },
    {
      "epoch": 0.00030111083984375,
      "model_forward_time": 0.11459541320800781,
      "step": 49334
    },
    {
      "epoch": 0.00030111083984375,
      "step": 49334,
      "training_step_time": 0.47513914108276367
    },
    {
      "epoch": 0.000301116943359375,
      "model_forward_time": 0.11417508125305176,
      "step": 49335
    },
    {
      "epoch": 0.000301116943359375,
      "step": 49335,
      "training_step_time": 0.48326730728149414
    },
    {
      "epoch": 0.000301123046875,
      "model_forward_time": 0.11435198783874512,
      "step": 49336
    },
    {
      "epoch": 0.000301123046875,
      "step": 49336,
      "training_step_time": 0.3930168151855469
    },
    {
      "epoch": 0.000301129150390625,
      "model_forward_time": 0.11461925506591797,
      "step": 49337
    },
    {
      "epoch": 0.000301129150390625,
      "step": 49337,
      "training_step_time": 0.43447208404541016
    },
    {
      "epoch": 0.00030113525390625,
      "model_forward_time": 0.11476802825927734,
      "step": 49338
    },
    {
      "epoch": 0.00030113525390625,
      "step": 49338,
      "training_step_time": 0.39636850357055664
    },
    {
      "epoch": 0.000301141357421875,
      "model_forward_time": 0.11479425430297852,
      "step": 49339
    },
    {
      "epoch": 0.000301141357421875,
      "step": 49339,
      "training_step_time": 0.3931541442871094
    },
    {
      "epoch": 0.0003011474609375,
      "grad_norm": 0.0991746336221695,
      "learning_rate": 8.384463801065434e-06,
      "loss": 0.0311,
      "step": 49340
    },
    {
      "epoch": 0.0003011474609375,
      "model_forward_time": 0.11457943916320801,
      "step": 49340
    },
    {
      "epoch": 0.0003011474609375,
      "step": 49340,
      "training_step_time": 0.39997100830078125
    },
    {
      "epoch": 0.000301153564453125,
      "model_forward_time": 0.11515665054321289,
      "step": 49341
    },
    {
      "epoch": 0.000301153564453125,
      "step": 49341,
      "training_step_time": 0.39305830001831055
    },
    {
      "epoch": 0.00030115966796875,
      "model_forward_time": 0.11466526985168457,
      "step": 49342
    },
    {
      "epoch": 0.00030115966796875,
      "step": 49342,
      "training_step_time": 0.40022921562194824
    },
    {
      "epoch": 0.000301165771484375,
      "model_forward_time": 0.11538100242614746,
      "step": 49343
    },
    {
      "epoch": 0.000301165771484375,
      "step": 49343,
      "training_step_time": 1.2208967208862305
    },
    {
      "epoch": 0.000301171875,
      "model_forward_time": 0.11408042907714844,
      "step": 49344
    },
    {
      "epoch": 0.000301171875,
      "step": 49344,
      "training_step_time": 0.4662942886352539
    },
    {
      "epoch": 0.000301177978515625,
      "model_forward_time": 0.11412644386291504,
      "step": 49345
    },
    {
      "epoch": 0.000301177978515625,
      "step": 49345,
      "training_step_time": 0.39259982109069824
    },
    {
      "epoch": 0.00030118408203125,
      "model_forward_time": 0.11409902572631836,
      "step": 49346
    },
    {
      "epoch": 0.00030118408203125,
      "step": 49346,
      "training_step_time": 0.38002467155456543
    },
    {
      "epoch": 0.000301190185546875,
      "model_forward_time": 0.11393308639526367,
      "step": 49347
    },
    {
      "epoch": 0.000301190185546875,
      "step": 49347,
      "training_step_time": 0.4321761131286621
    },
    {
      "epoch": 0.0003011962890625,
      "model_forward_time": 0.1146693229675293,
      "step": 49348
    },
    {
      "epoch": 0.0003011962890625,
      "step": 49348,
      "training_step_time": 0.41623759269714355
    },
    {
      "epoch": 0.000301202392578125,
      "model_forward_time": 0.11512279510498047,
      "step": 49349
    },
    {
      "epoch": 0.000301202392578125,
      "step": 49349,
      "training_step_time": 0.38678956031799316
    },
    {
      "epoch": 0.00030120849609375,
      "grad_norm": 0.11357560008764267,
      "learning_rate": 8.369194559610482e-06,
      "loss": 0.037,
      "step": 49350
    },
    {
      "epoch": 0.00030120849609375,
      "model_forward_time": 0.11491084098815918,
      "step": 49350
    },
    {
      "epoch": 0.00030120849609375,
      "step": 49350,
      "training_step_time": 0.41436147689819336
    },
    {
      "epoch": 0.000301214599609375,
      "model_forward_time": 0.11505508422851562,
      "step": 49351
    },
    {
      "epoch": 0.000301214599609375,
      "step": 49351,
      "training_step_time": 0.38454222679138184
    },
    {
      "epoch": 0.000301220703125,
      "model_forward_time": 0.11519980430603027,
      "step": 49352
    },
    {
      "epoch": 0.000301220703125,
      "step": 49352,
      "training_step_time": 0.3872051239013672
    },
    {
      "epoch": 0.000301226806640625,
      "model_forward_time": 0.11488533020019531,
      "step": 49353
    },
    {
      "epoch": 0.000301226806640625,
      "step": 49353,
      "training_step_time": 0.3784596920013428
    },
    {
      "epoch": 0.00030123291015625,
      "model_forward_time": 0.11533045768737793,
      "step": 49354
    },
    {
      "epoch": 0.00030123291015625,
      "step": 49354,
      "training_step_time": 0.3988797664642334
    },
    {
      "epoch": 0.000301239013671875,
      "model_forward_time": 0.11525559425354004,
      "step": 49355
    },
    {
      "epoch": 0.000301239013671875,
      "step": 49355,
      "training_step_time": 0.7651286125183105
    },
    {
      "epoch": 0.0003012451171875,
      "model_forward_time": 0.11509203910827637,
      "step": 49356
    },
    {
      "epoch": 0.0003012451171875,
      "step": 49356,
      "training_step_time": 0.4420309066772461
    },
    {
      "epoch": 0.000301251220703125,
      "model_forward_time": 0.1144108772277832,
      "step": 49357
    },
    {
      "epoch": 0.000301251220703125,
      "step": 49357,
      "training_step_time": 0.40548276901245117
    },
    {
      "epoch": 0.00030125732421875,
      "model_forward_time": 0.11404204368591309,
      "step": 49358
    },
    {
      "epoch": 0.00030125732421875,
      "step": 49358,
      "training_step_time": 0.44663095474243164
    },
    {
      "epoch": 0.000301263427734375,
      "model_forward_time": 0.1142127513885498,
      "step": 49359
    },
    {
      "epoch": 0.000301263427734375,
      "step": 49359,
      "training_step_time": 0.4817471504211426
    },
    {
      "epoch": 0.00030126953125,
      "grad_norm": 0.08507706224918365,
      "learning_rate": 8.353937964495029e-06,
      "loss": 0.0306,
      "step": 49360
    },
    {
      "epoch": 0.00030126953125,
      "model_forward_time": 0.11415529251098633,
      "step": 49360
    },
    {
      "epoch": 0.00030126953125,
      "step": 49360,
      "training_step_time": 0.49775123596191406
    },
    {
      "epoch": 0.000301275634765625,
      "model_forward_time": 0.11426997184753418,
      "step": 49361
    },
    {
      "epoch": 0.000301275634765625,
      "step": 49361,
      "training_step_time": 0.38030433654785156
    },
    {
      "epoch": 0.00030128173828125,
      "model_forward_time": 0.11432909965515137,
      "step": 49362
    },
    {
      "epoch": 0.00030128173828125,
      "step": 49362,
      "training_step_time": 0.3950064182281494
    },
    {
      "epoch": 0.000301287841796875,
      "model_forward_time": 0.11453080177307129,
      "step": 49363
    },
    {
      "epoch": 0.000301287841796875,
      "step": 49363,
      "training_step_time": 0.39086413383483887
    },
    {
      "epoch": 0.0003012939453125,
      "model_forward_time": 0.11460733413696289,
      "step": 49364
    },
    {
      "epoch": 0.0003012939453125,
      "step": 49364,
      "training_step_time": 0.3843686580657959
    },
    {
      "epoch": 0.000301300048828125,
      "model_forward_time": 0.11487960815429688,
      "step": 49365
    },
    {
      "epoch": 0.000301300048828125,
      "step": 49365,
      "training_step_time": 0.3995335102081299
    },
    {
      "epoch": 0.00030130615234375,
      "model_forward_time": 0.11494922637939453,
      "step": 49366
    },
    {
      "epoch": 0.00030130615234375,
      "step": 49366,
      "training_step_time": 0.3981595039367676
    },
    {
      "epoch": 0.000301312255859375,
      "model_forward_time": 0.1145029067993164,
      "step": 49367
    },
    {
      "epoch": 0.000301312255859375,
      "step": 49367,
      "training_step_time": 0.81614089012146
    },
    {
      "epoch": 0.000301318359375,
      "model_forward_time": 0.11463189125061035,
      "step": 49368
    },
    {
      "epoch": 0.000301318359375,
      "step": 49368,
      "training_step_time": 0.38955163955688477
    },
    {
      "epoch": 0.000301324462890625,
      "model_forward_time": 0.11403346061706543,
      "step": 49369
    },
    {
      "epoch": 0.000301324462890625,
      "step": 49369,
      "training_step_time": 0.388927698135376
    },
    {
      "epoch": 0.00030133056640625,
      "grad_norm": 0.10425620526075363,
      "learning_rate": 8.338694020353616e-06,
      "loss": 0.0353,
      "step": 49370
    },
    {
      "epoch": 0.00030133056640625,
      "model_forward_time": 0.11472725868225098,
      "step": 49370
    },
    {
      "epoch": 0.00030133056640625,
      "step": 49370,
      "training_step_time": 0.43770766258239746
    },
    {
      "epoch": 0.000301336669921875,
      "model_forward_time": 0.11430501937866211,
      "step": 49371
    },
    {
      "epoch": 0.000301336669921875,
      "step": 49371,
      "training_step_time": 0.4026644229888916
    },
    {
      "epoch": 0.0003013427734375,
      "model_forward_time": 0.11501240730285645,
      "step": 49372
    },
    {
      "epoch": 0.0003013427734375,
      "step": 49372,
      "training_step_time": 0.47602152824401855
    },
    {
      "epoch": 0.000301348876953125,
      "model_forward_time": 0.11412549018859863,
      "step": 49373
    },
    {
      "epoch": 0.000301348876953125,
      "step": 49373,
      "training_step_time": 0.4884934425354004
    },
    {
      "epoch": 0.00030135498046875,
      "model_forward_time": 0.11514449119567871,
      "step": 49374
    },
    {
      "epoch": 0.00030135498046875,
      "step": 49374,
      "training_step_time": 0.40233564376831055
    },
    {
      "epoch": 0.000301361083984375,
      "model_forward_time": 0.11430168151855469,
      "step": 49375
    },
    {
      "epoch": 0.000301361083984375,
      "step": 49375,
      "training_step_time": 0.4881014823913574
    },
    {
      "epoch": 0.0003013671875,
      "model_forward_time": 0.11429119110107422,
      "step": 49376
    },
    {
      "epoch": 0.0003013671875,
      "step": 49376,
      "training_step_time": 0.38796067237854004
    },
    {
      "epoch": 0.000301373291015625,
      "model_forward_time": 0.11467981338500977,
      "step": 49377
    },
    {
      "epoch": 0.000301373291015625,
      "step": 49377,
      "training_step_time": 0.3819758892059326
    },
    {
      "epoch": 0.00030137939453125,
      "model_forward_time": 0.11566877365112305,
      "step": 49378
    },
    {
      "epoch": 0.00030137939453125,
      "step": 49378,
      "training_step_time": 0.3967170715332031
    },
    {
      "epoch": 0.000301385498046875,
      "model_forward_time": 0.11440753936767578,
      "step": 49379
    },
    {
      "epoch": 0.000301385498046875,
      "step": 49379,
      "training_step_time": 0.5194799900054932
    },
    {
      "epoch": 0.0003013916015625,
      "grad_norm": 0.09100260585546494,
      "learning_rate": 8.323462731816961e-06,
      "loss": 0.0369,
      "step": 49380
    },
    {
      "epoch": 0.0003013916015625,
      "model_forward_time": 0.11423969268798828,
      "step": 49380
    },
    {
      "epoch": 0.0003013916015625,
      "step": 49380,
      "training_step_time": 0.4011266231536865
    },
    {
      "epoch": 0.000301397705078125,
      "model_forward_time": 0.11459851264953613,
      "step": 49381
    },
    {
      "epoch": 0.000301397705078125,
      "step": 49381,
      "training_step_time": 0.4016454219818115
    },
    {
      "epoch": 0.00030140380859375,
      "model_forward_time": 0.11474442481994629,
      "step": 49382
    },
    {
      "epoch": 0.00030140380859375,
      "step": 49382,
      "training_step_time": 0.3872509002685547
    },
    {
      "epoch": 0.000301409912109375,
      "model_forward_time": 0.11499142646789551,
      "step": 49383
    },
    {
      "epoch": 0.000301409912109375,
      "step": 49383,
      "training_step_time": 0.4148094654083252
    },
    {
      "epoch": 0.000301416015625,
      "model_forward_time": 0.11513924598693848,
      "step": 49384
    },
    {
      "epoch": 0.000301416015625,
      "step": 49384,
      "training_step_time": 0.4125814437866211
    },
    {
      "epoch": 0.000301422119140625,
      "model_forward_time": 0.11535215377807617,
      "step": 49385
    },
    {
      "epoch": 0.000301422119140625,
      "step": 49385,
      "training_step_time": 0.5818309783935547
    },
    {
      "epoch": 0.00030142822265625,
      "model_forward_time": 0.11500883102416992,
      "step": 49386
    },
    {
      "epoch": 0.00030142822265625,
      "step": 49386,
      "training_step_time": 0.4487795829772949
    },
    {
      "epoch": 0.000301434326171875,
      "model_forward_time": 0.11502432823181152,
      "step": 49387
    },
    {
      "epoch": 0.000301434326171875,
      "step": 49387,
      "training_step_time": 0.49576854705810547
    },
    {
      "epoch": 0.0003014404296875,
      "model_forward_time": 0.11438918113708496,
      "step": 49388
    },
    {
      "epoch": 0.0003014404296875,
      "step": 49388,
      "training_step_time": 0.39087367057800293
    },
    {
      "epoch": 0.000301446533203125,
      "model_forward_time": 0.11406087875366211,
      "step": 49389
    },
    {
      "epoch": 0.000301446533203125,
      "step": 49389,
      "training_step_time": 0.4206857681274414
    },
    {
      "epoch": 0.00030145263671875,
      "grad_norm": 0.09194450825452805,
      "learning_rate": 8.308244103511909e-06,
      "loss": 0.0348,
      "step": 49390
    },
    {
      "epoch": 0.00030145263671875,
      "model_forward_time": 0.11461639404296875,
      "step": 49390
    },
    {
      "epoch": 0.00030145263671875,
      "step": 49390,
      "training_step_time": 0.39762353897094727
    },
    {
      "epoch": 0.000301458740234375,
      "model_forward_time": 0.1143953800201416,
      "step": 49391
    },
    {
      "epoch": 0.000301458740234375,
      "step": 49391,
      "training_step_time": 0.40392613410949707
    },
    {
      "epoch": 0.00030146484375,
      "model_forward_time": 0.11533331871032715,
      "step": 49392
    },
    {
      "epoch": 0.00030146484375,
      "step": 49392,
      "training_step_time": 0.39623022079467773
    },
    {
      "epoch": 0.000301470947265625,
      "model_forward_time": 0.11467432975769043,
      "step": 49393
    },
    {
      "epoch": 0.000301470947265625,
      "step": 49393,
      "training_step_time": 0.39755964279174805
    },
    {
      "epoch": 0.00030147705078125,
      "model_forward_time": 0.11535024642944336,
      "step": 49394
    },
    {
      "epoch": 0.00030147705078125,
      "step": 49394,
      "training_step_time": 0.39143896102905273
    },
    {
      "epoch": 0.000301483154296875,
      "model_forward_time": 0.11509346961975098,
      "step": 49395
    },
    {
      "epoch": 0.000301483154296875,
      "step": 49395,
      "training_step_time": 0.39116859436035156
    },
    {
      "epoch": 0.0003014892578125,
      "model_forward_time": 0.11581611633300781,
      "step": 49396
    },
    {
      "epoch": 0.0003014892578125,
      "step": 49396,
      "training_step_time": 0.3989908695220947
    },
    {
      "epoch": 0.000301495361328125,
      "model_forward_time": 0.11478495597839355,
      "step": 49397
    },
    {
      "epoch": 0.000301495361328125,
      "step": 49397,
      "training_step_time": 0.5367848873138428
    },
    {
      "epoch": 0.00030150146484375,
      "model_forward_time": 0.11542224884033203,
      "step": 49398
    },
    {
      "epoch": 0.00030150146484375,
      "step": 49398,
      "training_step_time": 0.42061448097229004
    },
    {
      "epoch": 0.000301507568359375,
      "model_forward_time": 0.11540675163269043,
      "step": 49399
    },
    {
      "epoch": 0.000301507568359375,
      "step": 49399,
      "training_step_time": 0.4405243396759033
    },
    {
      "epoch": 0.000301513671875,
      "grad_norm": 0.11367026716470718,
      "learning_rate": 8.293038140061515e-06,
      "loss": 0.0346,
      "step": 49400
    },
    {
      "epoch": 0.000301513671875,
      "model_forward_time": 0.11542868614196777,
      "step": 49400
    },
    {
      "epoch": 0.000301513671875,
      "step": 49400,
      "training_step_time": 0.48235607147216797
    },
    {
      "epoch": 0.000301519775390625,
      "model_forward_time": 0.11436939239501953,
      "step": 49401
    },
    {
      "epoch": 0.000301519775390625,
      "step": 49401,
      "training_step_time": 0.46631336212158203
    },
    {
      "epoch": 0.00030152587890625,
      "model_forward_time": 0.11506128311157227,
      "step": 49402
    },
    {
      "epoch": 0.00030152587890625,
      "step": 49402,
      "training_step_time": 0.4536128044128418
    },
    {
      "epoch": 0.000301531982421875,
      "model_forward_time": 0.11458086967468262,
      "step": 49403
    },
    {
      "epoch": 0.000301531982421875,
      "step": 49403,
      "training_step_time": 0.44318389892578125
    },
    {
      "epoch": 0.0003015380859375,
      "model_forward_time": 0.11464977264404297,
      "step": 49404
    },
    {
      "epoch": 0.0003015380859375,
      "step": 49404,
      "training_step_time": 0.39561891555786133
    },
    {
      "epoch": 0.000301544189453125,
      "model_forward_time": 0.11480474472045898,
      "step": 49405
    },
    {
      "epoch": 0.000301544189453125,
      "step": 49405,
      "training_step_time": 0.41288137435913086
    },
    {
      "epoch": 0.00030155029296875,
      "model_forward_time": 0.11475086212158203,
      "step": 49406
    },
    {
      "epoch": 0.00030155029296875,
      "step": 49406,
      "training_step_time": 0.39606475830078125
    },
    {
      "epoch": 0.000301556396484375,
      "model_forward_time": 0.11489248275756836,
      "step": 49407
    },
    {
      "epoch": 0.000301556396484375,
      "step": 49407,
      "training_step_time": 0.3871152400970459
    },
    {
      "epoch": 0.0003015625,
      "model_forward_time": 0.11496329307556152,
      "step": 49408
    },
    {
      "epoch": 0.0003015625,
      "step": 49408,
      "training_step_time": 0.3939347267150879
    },
    {
      "epoch": 0.000301568603515625,
      "model_forward_time": 0.11538386344909668,
      "step": 49409
    },
    {
      "epoch": 0.000301568603515625,
      "step": 49409,
      "training_step_time": 0.4030749797821045
    },
    {
      "epoch": 0.00030157470703125,
      "grad_norm": 0.10955218225717545,
      "learning_rate": 8.277844846084898e-06,
      "loss": 0.0378,
      "step": 49410
    },
    {
      "epoch": 0.00030157470703125,
      "model_forward_time": 0.1152498722076416,
      "step": 49410
    },
    {
      "epoch": 0.00030157470703125,
      "step": 49410,
      "training_step_time": 0.43230199813842773
    },
    {
      "epoch": 0.000301580810546875,
      "model_forward_time": 0.11491656303405762,
      "step": 49411
    },
    {
      "epoch": 0.000301580810546875,
      "step": 49411,
      "training_step_time": 0.42206859588623047
    },
    {
      "epoch": 0.0003015869140625,
      "model_forward_time": 0.11482787132263184,
      "step": 49412
    },
    {
      "epoch": 0.0003015869140625,
      "step": 49412,
      "training_step_time": 0.3955566883087158
    },
    {
      "epoch": 0.000301593017578125,
      "model_forward_time": 0.11539864540100098,
      "step": 49413
    },
    {
      "epoch": 0.000301593017578125,
      "step": 49413,
      "training_step_time": 0.3929171562194824
    },
    {
      "epoch": 0.00030159912109375,
      "model_forward_time": 0.11489987373352051,
      "step": 49414
    },
    {
      "epoch": 0.00030159912109375,
      "step": 49414,
      "training_step_time": 0.3666219711303711
    },
    {
      "epoch": 0.000301605224609375,
      "model_forward_time": 0.11466550827026367,
      "step": 49415
    },
    {
      "epoch": 0.000301605224609375,
      "step": 49415,
      "training_step_time": 0.4191555976867676
    },
    {
      "epoch": 0.000301611328125,
      "model_forward_time": 0.11494731903076172,
      "step": 49416
    },
    {
      "epoch": 0.000301611328125,
      "step": 49416,
      "training_step_time": 0.47177863121032715
    },
    {
      "epoch": 0.000301617431640625,
      "model_forward_time": 0.1157829761505127,
      "step": 49417
    },
    {
      "epoch": 0.000301617431640625,
      "step": 49417,
      "training_step_time": 0.4309554100036621
    },
    {
      "epoch": 0.00030162353515625,
      "model_forward_time": 0.11540818214416504,
      "step": 49418
    },
    {
      "epoch": 0.00030162353515625,
      "step": 49418,
      "training_step_time": 0.4038355350494385
    },
    {
      "epoch": 0.000301629638671875,
      "model_forward_time": 0.1152639389038086,
      "step": 49419
    },
    {
      "epoch": 0.000301629638671875,
      "step": 49419,
      "training_step_time": 0.37816381454467773
    },
    {
      "epoch": 0.0003016357421875,
      "grad_norm": 0.1111208125948906,
      "learning_rate": 8.262664226197436e-06,
      "loss": 0.0352,
      "step": 49420
    },
    {
      "epoch": 0.0003016357421875,
      "model_forward_time": 0.11418390274047852,
      "step": 49420
    },
    {
      "epoch": 0.0003016357421875,
      "step": 49420,
      "training_step_time": 0.3848133087158203
    },
    {
      "epoch": 0.000301641845703125,
      "model_forward_time": 0.1149139404296875,
      "step": 49421
    },
    {
      "epoch": 0.000301641845703125,
      "step": 49421,
      "training_step_time": 0.3929026126861572
    },
    {
      "epoch": 0.00030164794921875,
      "model_forward_time": 0.11528444290161133,
      "step": 49422
    },
    {
      "epoch": 0.00030164794921875,
      "step": 49422,
      "training_step_time": 0.39464831352233887
    },
    {
      "epoch": 0.000301654052734375,
      "model_forward_time": 0.11555051803588867,
      "step": 49423
    },
    {
      "epoch": 0.000301654052734375,
      "step": 49423,
      "training_step_time": 0.40106844902038574
    },
    {
      "epoch": 0.00030166015625,
      "model_forward_time": 0.11506915092468262,
      "step": 49424
    },
    {
      "epoch": 0.00030166015625,
      "step": 49424,
      "training_step_time": 0.40090346336364746
    },
    {
      "epoch": 0.000301666259765625,
      "model_forward_time": 0.11524271965026855,
      "step": 49425
    },
    {
      "epoch": 0.000301666259765625,
      "step": 49425,
      "training_step_time": 0.4221994876861572
    },
    {
      "epoch": 0.00030167236328125,
      "model_forward_time": 0.11531782150268555,
      "step": 49426
    },
    {
      "epoch": 0.00030167236328125,
      "step": 49426,
      "training_step_time": 0.4210216999053955
    },
    {
      "epoch": 0.000301678466796875,
      "model_forward_time": 0.11596465110778809,
      "step": 49427
    },
    {
      "epoch": 0.000301678466796875,
      "step": 49427,
      "training_step_time": 0.3888571262359619
    },
    {
      "epoch": 0.0003016845703125,
      "model_forward_time": 0.11522984504699707,
      "step": 49428
    },
    {
      "epoch": 0.0003016845703125,
      "step": 49428,
      "training_step_time": 0.39055323600769043
    },
    {
      "epoch": 0.000301690673828125,
      "model_forward_time": 0.11558151245117188,
      "step": 49429
    },
    {
      "epoch": 0.000301690673828125,
      "step": 49429,
      "training_step_time": 0.39232873916625977
    },
    {
      "epoch": 0.00030169677734375,
      "grad_norm": 0.1253771185874939,
      "learning_rate": 8.247496285010548e-06,
      "loss": 0.0346,
      "step": 49430
    },
    {
      "epoch": 0.00030169677734375,
      "model_forward_time": 0.1148684024810791,
      "step": 49430
    },
    {
      "epoch": 0.00030169677734375,
      "step": 49430,
      "training_step_time": 0.41191554069519043
    },
    {
      "epoch": 0.000301702880859375,
      "model_forward_time": 0.11532807350158691,
      "step": 49431
    },
    {
      "epoch": 0.000301702880859375,
      "step": 49431,
      "training_step_time": 0.49225306510925293
    },
    {
      "epoch": 0.000301708984375,
      "model_forward_time": 0.11469244956970215,
      "step": 49432
    },
    {
      "epoch": 0.000301708984375,
      "step": 49432,
      "training_step_time": 0.4732701778411865
    },
    {
      "epoch": 0.000301715087890625,
      "model_forward_time": 0.11461257934570312,
      "step": 49433
    },
    {
      "epoch": 0.000301715087890625,
      "step": 49433,
      "training_step_time": 0.4019300937652588
    },
    {
      "epoch": 0.00030172119140625,
      "model_forward_time": 0.1144571304321289,
      "step": 49434
    },
    {
      "epoch": 0.00030172119140625,
      "step": 49434,
      "training_step_time": 0.39810991287231445
    },
    {
      "epoch": 0.000301727294921875,
      "model_forward_time": 0.11449694633483887,
      "step": 49435
    },
    {
      "epoch": 0.000301727294921875,
      "step": 49435,
      "training_step_time": 0.3981163501739502
    },
    {
      "epoch": 0.0003017333984375,
      "model_forward_time": 0.11467456817626953,
      "step": 49436
    },
    {
      "epoch": 0.0003017333984375,
      "step": 49436,
      "training_step_time": 0.4063751697540283
    },
    {
      "epoch": 0.000301739501953125,
      "model_forward_time": 0.11472940444946289,
      "step": 49437
    },
    {
      "epoch": 0.000301739501953125,
      "step": 49437,
      "training_step_time": 0.39241862297058105
    },
    {
      "epoch": 0.00030174560546875,
      "model_forward_time": 0.11427521705627441,
      "step": 49438
    },
    {
      "epoch": 0.00030174560546875,
      "step": 49438,
      "training_step_time": 0.4006044864654541
    },
    {
      "epoch": 0.000301751708984375,
      "model_forward_time": 0.11522746086120605,
      "step": 49439
    },
    {
      "epoch": 0.000301751708984375,
      "step": 49439,
      "training_step_time": 0.8989899158477783
    },
    {
      "epoch": 0.0003017578125,
      "grad_norm": 0.11354032903909683,
      "learning_rate": 8.232341027131885e-06,
      "loss": 0.0302,
      "step": 49440
    },
    {
      "epoch": 0.0003017578125,
      "model_forward_time": 0.11471056938171387,
      "step": 49440
    },
    {
      "epoch": 0.0003017578125,
      "step": 49440,
      "training_step_time": 0.3920323848724365
    },
    {
      "epoch": 0.000301763916015625,
      "model_forward_time": 0.11443161964416504,
      "step": 49441
    },
    {
      "epoch": 0.000301763916015625,
      "step": 49441,
      "training_step_time": 0.3874032497406006
    },
    {
      "epoch": 0.00030177001953125,
      "model_forward_time": 0.11385130882263184,
      "step": 49442
    },
    {
      "epoch": 0.00030177001953125,
      "step": 49442,
      "training_step_time": 0.38405346870422363
    },
    {
      "epoch": 0.000301776123046875,
      "model_forward_time": 0.11484026908874512,
      "step": 49443
    },
    {
      "epoch": 0.000301776123046875,
      "step": 49443,
      "training_step_time": 0.3615274429321289
    },
    {
      "epoch": 0.0003017822265625,
      "model_forward_time": 0.11414289474487305,
      "step": 49444
    },
    {
      "epoch": 0.0003017822265625,
      "step": 49444,
      "training_step_time": 0.4497206211090088
    },
    {
      "epoch": 0.000301788330078125,
      "model_forward_time": 0.11455392837524414,
      "step": 49445
    },
    {
      "epoch": 0.000301788330078125,
      "step": 49445,
      "training_step_time": 0.4281198978424072
    },
    {
      "epoch": 0.00030179443359375,
      "model_forward_time": 0.11486172676086426,
      "step": 49446
    },
    {
      "epoch": 0.00030179443359375,
      "step": 49446,
      "training_step_time": 0.39264416694641113
    },
    {
      "epoch": 0.000301800537109375,
      "model_forward_time": 0.11529707908630371,
      "step": 49447
    },
    {
      "epoch": 0.000301800537109375,
      "step": 49447,
      "training_step_time": 0.3964502811431885
    },
    {
      "epoch": 0.000301806640625,
      "model_forward_time": 0.11413240432739258,
      "step": 49448
    },
    {
      "epoch": 0.000301806640625,
      "step": 49448,
      "training_step_time": 0.3923022747039795
    },
    {
      "epoch": 0.000301812744140625,
      "model_forward_time": 0.11526823043823242,
      "step": 49449
    },
    {
      "epoch": 0.000301812744140625,
      "step": 49449,
      "training_step_time": 0.38642358779907227
    },
    {
      "epoch": 0.00030181884765625,
      "grad_norm": 0.10084249824285507,
      "learning_rate": 8.21719845716521e-06,
      "loss": 0.0307,
      "step": 49450
    },
    {
      "epoch": 0.00030181884765625,
      "model_forward_time": 0.11450982093811035,
      "step": 49450
    },
    {
      "epoch": 0.00030181884765625,
      "step": 49450,
      "training_step_time": 0.4029214382171631
    },
    {
      "epoch": 0.000301824951171875,
      "model_forward_time": 0.1151430606842041,
      "step": 49451
    },
    {
      "epoch": 0.000301824951171875,
      "step": 49451,
      "training_step_time": 0.48011279106140137
    },
    {
      "epoch": 0.0003018310546875,
      "model_forward_time": 0.1145029067993164,
      "step": 49452
    },
    {
      "epoch": 0.0003018310546875,
      "step": 49452,
      "training_step_time": 0.405092716217041
    },
    {
      "epoch": 0.000301837158203125,
      "model_forward_time": 0.11505413055419922,
      "step": 49453
    },
    {
      "epoch": 0.000301837158203125,
      "step": 49453,
      "training_step_time": 0.3995647430419922
    },
    {
      "epoch": 0.00030184326171875,
      "model_forward_time": 0.11504435539245605,
      "step": 49454
    },
    {
      "epoch": 0.00030184326171875,
      "step": 49454,
      "training_step_time": 0.38874173164367676
    },
    {
      "epoch": 0.000301849365234375,
      "model_forward_time": 0.1147000789642334,
      "step": 49455
    },
    {
      "epoch": 0.000301849365234375,
      "step": 49455,
      "training_step_time": 0.39723658561706543
    },
    {
      "epoch": 0.00030185546875,
      "model_forward_time": 0.11468029022216797,
      "step": 49456
    },
    {
      "epoch": 0.00030185546875,
      "step": 49456,
      "training_step_time": 0.4126601219177246
    },
    {
      "epoch": 0.000301861572265625,
      "model_forward_time": 0.11506175994873047,
      "step": 49457
    },
    {
      "epoch": 0.000301861572265625,
      "step": 49457,
      "training_step_time": 0.5064117908477783
    },
    {
      "epoch": 0.00030186767578125,
      "model_forward_time": 0.11493873596191406,
      "step": 49458
    },
    {
      "epoch": 0.00030186767578125,
      "step": 49458,
      "training_step_time": 0.5166971683502197
    },
    {
      "epoch": 0.000301873779296875,
      "model_forward_time": 0.11511468887329102,
      "step": 49459
    },
    {
      "epoch": 0.000301873779296875,
      "step": 49459,
      "training_step_time": 0.5347678661346436
    },
    {
      "epoch": 0.0003018798828125,
      "grad_norm": 0.10814218968153,
      "learning_rate": 8.202068579710431e-06,
      "loss": 0.0357,
      "step": 49460
    },
    {
      "epoch": 0.0003018798828125,
      "model_forward_time": 0.11442375183105469,
      "step": 49460
    },
    {
      "epoch": 0.0003018798828125,
      "step": 49460,
      "training_step_time": 0.44570207595825195
    },
    {
      "epoch": 0.000301885986328125,
      "model_forward_time": 0.1146242618560791,
      "step": 49461
    },
    {
      "epoch": 0.000301885986328125,
      "step": 49461,
      "training_step_time": 0.3878748416900635
    },
    {
      "epoch": 0.00030189208984375,
      "model_forward_time": 0.11450648307800293,
      "step": 49462
    },
    {
      "epoch": 0.00030189208984375,
      "step": 49462,
      "training_step_time": 0.39026665687561035
    },
    {
      "epoch": 0.000301898193359375,
      "model_forward_time": 0.11423134803771973,
      "step": 49463
    },
    {
      "epoch": 0.000301898193359375,
      "step": 49463,
      "training_step_time": 0.39699745178222656
    },
    {
      "epoch": 0.000301904296875,
      "model_forward_time": 0.11461114883422852,
      "step": 49464
    },
    {
      "epoch": 0.000301904296875,
      "step": 49464,
      "training_step_time": 0.4013488292694092
    },
    {
      "epoch": 0.000301910400390625,
      "model_forward_time": 0.11528205871582031,
      "step": 49465
    },
    {
      "epoch": 0.000301910400390625,
      "step": 49465,
      "training_step_time": 0.4407768249511719
    },
    {
      "epoch": 0.00030191650390625,
      "model_forward_time": 0.11418724060058594,
      "step": 49466
    },
    {
      "epoch": 0.00030191650390625,
      "step": 49466,
      "training_step_time": 0.41661977767944336
    },
    {
      "epoch": 0.000301922607421875,
      "model_forward_time": 0.11498665809631348,
      "step": 49467
    },
    {
      "epoch": 0.000301922607421875,
      "step": 49467,
      "training_step_time": 0.39356017112731934
    },
    {
      "epoch": 0.0003019287109375,
      "model_forward_time": 0.11514520645141602,
      "step": 49468
    },
    {
      "epoch": 0.0003019287109375,
      "step": 49468,
      "training_step_time": 0.39788174629211426
    },
    {
      "epoch": 0.000301934814453125,
      "model_forward_time": 0.11507105827331543,
      "step": 49469
    },
    {
      "epoch": 0.000301934814453125,
      "step": 49469,
      "training_step_time": 0.3953118324279785
    },
    {
      "epoch": 0.00030194091796875,
      "grad_norm": 0.11997707933187485,
      "learning_rate": 8.186951399363613e-06,
      "loss": 0.0361,
      "step": 49470
    },
    {
      "epoch": 0.00030194091796875,
      "model_forward_time": 0.1147768497467041,
      "step": 49470
    },
    {
      "epoch": 0.00030194091796875,
      "step": 49470,
      "training_step_time": 0.40313267707824707
    },
    {
      "epoch": 0.000301947021484375,
      "model_forward_time": 0.1151132583618164,
      "step": 49471
    },
    {
      "epoch": 0.000301947021484375,
      "step": 49471,
      "training_step_time": 0.3994779586791992
    },
    {
      "epoch": 0.000301953125,
      "model_forward_time": 0.11484575271606445,
      "step": 49472
    },
    {
      "epoch": 0.000301953125,
      "step": 49472,
      "training_step_time": 0.3671610355377197
    },
    {
      "epoch": 0.000301959228515625,
      "model_forward_time": 0.11560487747192383,
      "step": 49473
    },
    {
      "epoch": 0.000301959228515625,
      "step": 49473,
      "training_step_time": 0.4229452610015869
    },
    {
      "epoch": 0.00030196533203125,
      "model_forward_time": 0.11566948890686035,
      "step": 49474
    },
    {
      "epoch": 0.00030196533203125,
      "step": 49474,
      "training_step_time": 0.4571821689605713
    },
    {
      "epoch": 0.000301971435546875,
      "model_forward_time": 0.11554503440856934,
      "step": 49475
    },
    {
      "epoch": 0.000301971435546875,
      "step": 49475,
      "training_step_time": 0.4764370918273926
    },
    {
      "epoch": 0.0003019775390625,
      "model_forward_time": 0.11536884307861328,
      "step": 49476
    },
    {
      "epoch": 0.0003019775390625,
      "step": 49476,
      "training_step_time": 0.3816101551055908
    },
    {
      "epoch": 0.000301983642578125,
      "model_forward_time": 0.11488866806030273,
      "step": 49477
    },
    {
      "epoch": 0.000301983642578125,
      "step": 49477,
      "training_step_time": 0.3811335563659668
    },
    {
      "epoch": 0.00030198974609375,
      "model_forward_time": 0.115875244140625,
      "step": 49478
    },
    {
      "epoch": 0.00030198974609375,
      "step": 49478,
      "training_step_time": 0.44175100326538086
    },
    {
      "epoch": 0.000301995849609375,
      "model_forward_time": 0.11594629287719727,
      "step": 49479
    },
    {
      "epoch": 0.000301995849609375,
      "step": 49479,
      "training_step_time": 0.42358946800231934
    },
    {
      "epoch": 0.000302001953125,
      "grad_norm": 0.08256366103887558,
      "learning_rate": 8.17184692071694e-06,
      "loss": 0.0352,
      "step": 49480
    },
    {
      "epoch": 0.000302001953125,
      "model_forward_time": 0.11498141288757324,
      "step": 49480
    },
    {
      "epoch": 0.000302001953125,
      "step": 49480,
      "training_step_time": 0.3853316307067871
    },
    {
      "epoch": 0.000302008056640625,
      "model_forward_time": 0.11466026306152344,
      "step": 49481
    },
    {
      "epoch": 0.000302008056640625,
      "step": 49481,
      "training_step_time": 0.4340212345123291
    },
    {
      "epoch": 0.00030201416015625,
      "model_forward_time": 0.11483001708984375,
      "step": 49482
    },
    {
      "epoch": 0.00030201416015625,
      "step": 49482,
      "training_step_time": 0.400524377822876
    },
    {
      "epoch": 0.000302020263671875,
      "model_forward_time": 0.11486029624938965,
      "step": 49483
    },
    {
      "epoch": 0.000302020263671875,
      "step": 49483,
      "training_step_time": 0.3967587947845459
    },
    {
      "epoch": 0.0003020263671875,
      "model_forward_time": 0.11540412902832031,
      "step": 49484
    },
    {
      "epoch": 0.0003020263671875,
      "step": 49484,
      "training_step_time": 0.3856935501098633
    },
    {
      "epoch": 0.000302032470703125,
      "model_forward_time": 0.11517572402954102,
      "step": 49485
    },
    {
      "epoch": 0.000302032470703125,
      "step": 49485,
      "training_step_time": 0.39315271377563477
    },
    {
      "epoch": 0.00030203857421875,
      "model_forward_time": 0.11501216888427734,
      "step": 49486
    },
    {
      "epoch": 0.00030203857421875,
      "step": 49486,
      "training_step_time": 0.3941805362701416
    },
    {
      "epoch": 0.000302044677734375,
      "model_forward_time": 0.11555981636047363,
      "step": 49487
    },
    {
      "epoch": 0.000302044677734375,
      "step": 49487,
      "training_step_time": 0.7574737071990967
    },
    {
      "epoch": 0.00030205078125,
      "model_forward_time": 0.11509060859680176,
      "step": 49488
    },
    {
      "epoch": 0.00030205078125,
      "step": 49488,
      "training_step_time": 0.4959588050842285
    },
    {
      "epoch": 0.000302056884765625,
      "model_forward_time": 0.11508584022521973,
      "step": 49489
    },
    {
      "epoch": 0.000302056884765625,
      "step": 49489,
      "training_step_time": 0.4008004665374756
    },
    {
      "epoch": 0.00030206298828125,
      "grad_norm": 0.07936178892850876,
      "learning_rate": 8.156755148358764e-06,
      "loss": 0.0346,
      "step": 49490
    },
    {
      "epoch": 0.00030206298828125,
      "model_forward_time": 0.11421775817871094,
      "step": 49490
    },
    {
      "epoch": 0.00030206298828125,
      "step": 49490,
      "training_step_time": 0.3956749439239502
    },
    {
      "epoch": 0.000302069091796875,
      "model_forward_time": 0.11375021934509277,
      "step": 49491
    },
    {
      "epoch": 0.000302069091796875,
      "step": 49491,
      "training_step_time": 0.398449182510376
    },
    {
      "epoch": 0.0003020751953125,
      "model_forward_time": 0.11516451835632324,
      "step": 49492
    },
    {
      "epoch": 0.0003020751953125,
      "step": 49492,
      "training_step_time": 0.3816337585449219
    },
    {
      "epoch": 0.000302081298828125,
      "model_forward_time": 0.1156318187713623,
      "step": 49493
    },
    {
      "epoch": 0.000302081298828125,
      "step": 49493,
      "training_step_time": 0.42752790451049805
    },
    {
      "epoch": 0.00030208740234375,
      "model_forward_time": 0.11496472358703613,
      "step": 49494
    },
    {
      "epoch": 0.00030208740234375,
      "step": 49494,
      "training_step_time": 0.39130067825317383
    },
    {
      "epoch": 0.000302093505859375,
      "model_forward_time": 0.11455178260803223,
      "step": 49495
    },
    {
      "epoch": 0.000302093505859375,
      "step": 49495,
      "training_step_time": 0.3837299346923828
    },
    {
      "epoch": 0.000302099609375,
      "model_forward_time": 0.11562013626098633,
      "step": 49496
    },
    {
      "epoch": 0.000302099609375,
      "step": 49496,
      "training_step_time": 0.38684797286987305
    },
    {
      "epoch": 0.000302105712890625,
      "model_forward_time": 0.11519289016723633,
      "step": 49497
    },
    {
      "epoch": 0.000302105712890625,
      "step": 49497,
      "training_step_time": 0.3871946334838867
    },
    {
      "epoch": 0.00030211181640625,
      "model_forward_time": 0.11492395401000977,
      "step": 49498
    },
    {
      "epoch": 0.00030211181640625,
      "step": 49498,
      "training_step_time": 0.39360713958740234
    },
    {
      "epoch": 0.000302117919921875,
      "model_forward_time": 0.11513042449951172,
      "step": 49499
    },
    {
      "epoch": 0.000302117919921875,
      "step": 49499,
      "training_step_time": 0.5711350440979004
    },
    {
      "epoch": 0.0003021240234375,
      "grad_norm": 0.08651334792375565,
      "learning_rate": 8.141676086873572e-06,
      "loss": 0.0351,
      "step": 49500
    },
    {
      "epoch": 0.0003021240234375,
      "model_forward_time": 0.11513304710388184,
      "step": 49500
    },
    {
      "epoch": 0.0003021240234375,
      "step": 49500,
      "training_step_time": 0.40209317207336426
    },
    {
      "epoch": 0.000302130126953125,
      "model_forward_time": 0.11510109901428223,
      "step": 49501
    },
    {
      "epoch": 0.000302130126953125,
      "step": 49501,
      "training_step_time": 0.4073796272277832
    },
    {
      "epoch": 0.00030213623046875,
      "model_forward_time": 0.11523294448852539,
      "step": 49502
    },
    {
      "epoch": 0.00030213623046875,
      "step": 49502,
      "training_step_time": 0.48021602630615234
    },
    {
      "epoch": 0.000302142333984375,
      "model_forward_time": 0.11559867858886719,
      "step": 49503
    },
    {
      "epoch": 0.000302142333984375,
      "step": 49503,
      "training_step_time": 0.4736769199371338
    },
    {
      "epoch": 0.0003021484375,
      "model_forward_time": 0.1146388053894043,
      "step": 49504
    },
    {
      "epoch": 0.0003021484375,
      "step": 49504,
      "training_step_time": 0.39461183547973633
    },
    {
      "epoch": 0.000302154541015625,
      "model_forward_time": 0.11467528343200684,
      "step": 49505
    },
    {
      "epoch": 0.000302154541015625,
      "step": 49505,
      "training_step_time": 0.6334636211395264
    },
    {
      "epoch": 0.00030216064453125,
      "model_forward_time": 0.11360836029052734,
      "step": 49506
    },
    {
      "epoch": 0.00030216064453125,
      "step": 49506,
      "training_step_time": 0.4159691333770752
    },
    {
      "epoch": 0.000302166748046875,
      "model_forward_time": 0.11433291435241699,
      "step": 49507
    },
    {
      "epoch": 0.000302166748046875,
      "step": 49507,
      "training_step_time": 0.3848114013671875
    },
    {
      "epoch": 0.0003021728515625,
      "model_forward_time": 0.11429405212402344,
      "step": 49508
    },
    {
      "epoch": 0.0003021728515625,
      "step": 49508,
      "training_step_time": 0.40309977531433105
    },
    {
      "epoch": 0.000302178955078125,
      "model_forward_time": 0.11477303504943848,
      "step": 49509
    },
    {
      "epoch": 0.000302178955078125,
      "step": 49509,
      "training_step_time": 0.3867146968841553
    },
    {
      "epoch": 0.00030218505859375,
      "grad_norm": 0.09468330442905426,
      "learning_rate": 8.126609740841979e-06,
      "loss": 0.031,
      "step": 49510
    },
    {
      "epoch": 0.00030218505859375,
      "model_forward_time": 0.11432051658630371,
      "step": 49510
    },
    {
      "epoch": 0.00030218505859375,
      "step": 49510,
      "training_step_time": 0.38736915588378906
    },
    {
      "epoch": 0.000302191162109375,
      "model_forward_time": 0.1152350902557373,
      "step": 49511
    },
    {
      "epoch": 0.000302191162109375,
      "step": 49511,
      "training_step_time": 0.5808672904968262
    },
    {
      "epoch": 0.000302197265625,
      "model_forward_time": 0.11498904228210449,
      "step": 49512
    },
    {
      "epoch": 0.000302197265625,
      "step": 49512,
      "training_step_time": 0.3995974063873291
    },
    {
      "epoch": 0.000302203369140625,
      "model_forward_time": 0.11420321464538574,
      "step": 49513
    },
    {
      "epoch": 0.000302203369140625,
      "step": 49513,
      "training_step_time": 0.3958752155303955
    },
    {
      "epoch": 0.00030220947265625,
      "model_forward_time": 0.11474204063415527,
      "step": 49514
    },
    {
      "epoch": 0.00030220947265625,
      "step": 49514,
      "training_step_time": 0.38732266426086426
    },
    {
      "epoch": 0.000302215576171875,
      "model_forward_time": 0.11493682861328125,
      "step": 49515
    },
    {
      "epoch": 0.000302215576171875,
      "step": 49515,
      "training_step_time": 0.47038936614990234
    },
    {
      "epoch": 0.0003022216796875,
      "model_forward_time": 0.1152811050415039,
      "step": 49516
    },
    {
      "epoch": 0.0003022216796875,
      "step": 49516,
      "training_step_time": 0.5062153339385986
    },
    {
      "epoch": 0.000302227783203125,
      "model_forward_time": 0.11531758308410645,
      "step": 49517
    },
    {
      "epoch": 0.000302227783203125,
      "step": 49517,
      "training_step_time": 0.47823190689086914
    },
    {
      "epoch": 0.00030223388671875,
      "model_forward_time": 0.11497879028320312,
      "step": 49518
    },
    {
      "epoch": 0.00030223388671875,
      "step": 49518,
      "training_step_time": 0.39158010482788086
    },
    {
      "epoch": 0.000302239990234375,
      "model_forward_time": 0.11448836326599121,
      "step": 49519
    },
    {
      "epoch": 0.000302239990234375,
      "step": 49519,
      "training_step_time": 0.4189260005950928
    },
    {
      "epoch": 0.00030224609375,
      "grad_norm": 0.09341752529144287,
      "learning_rate": 8.111556114840746e-06,
      "loss": 0.0352,
      "step": 49520
    },
    {
      "epoch": 0.00030224609375,
      "model_forward_time": 0.11538100242614746,
      "step": 49520
    },
    {
      "epoch": 0.00030224609375,
      "step": 49520,
      "training_step_time": 0.3877580165863037
    },
    {
      "epoch": 0.000302252197265625,
      "model_forward_time": 0.11481952667236328,
      "step": 49521
    },
    {
      "epoch": 0.000302252197265625,
      "step": 49521,
      "training_step_time": 0.38505005836486816
    },
    {
      "epoch": 0.00030225830078125,
      "model_forward_time": 0.11502313613891602,
      "step": 49522
    },
    {
      "epoch": 0.00030225830078125,
      "step": 49522,
      "training_step_time": 0.38140439987182617
    },
    {
      "epoch": 0.000302264404296875,
      "model_forward_time": 0.1148686408996582,
      "step": 49523
    },
    {
      "epoch": 0.000302264404296875,
      "step": 49523,
      "training_step_time": 0.4297487735748291
    },
    {
      "epoch": 0.0003022705078125,
      "model_forward_time": 0.11483955383300781,
      "step": 49524
    },
    {
      "epoch": 0.0003022705078125,
      "step": 49524,
      "training_step_time": 0.4163196086883545
    },
    {
      "epoch": 0.000302276611328125,
      "model_forward_time": 0.11442089080810547,
      "step": 49525
    },
    {
      "epoch": 0.000302276611328125,
      "step": 49525,
      "training_step_time": 0.395418643951416
    },
    {
      "epoch": 0.00030228271484375,
      "model_forward_time": 0.11572933197021484,
      "step": 49526
    },
    {
      "epoch": 0.00030228271484375,
      "step": 49526,
      "training_step_time": 0.39954161643981934
    },
    {
      "epoch": 0.000302288818359375,
      "model_forward_time": 0.11470556259155273,
      "step": 49527
    },
    {
      "epoch": 0.000302288818359375,
      "step": 49527,
      "training_step_time": 0.39278602600097656
    },
    {
      "epoch": 0.000302294921875,
      "model_forward_time": 0.11550140380859375,
      "step": 49528
    },
    {
      "epoch": 0.000302294921875,
      "step": 49528,
      "training_step_time": 0.40471959114074707
    },
    {
      "epoch": 0.000302301025390625,
      "model_forward_time": 0.11503791809082031,
      "step": 49529
    },
    {
      "epoch": 0.000302301025390625,
      "step": 49529,
      "training_step_time": 0.6541333198547363
    },
    {
      "epoch": 0.00030230712890625,
      "grad_norm": 0.10209301859140396,
      "learning_rate": 8.096515213442762e-06,
      "loss": 0.0358,
      "step": 49530
    },
    {
      "epoch": 0.00030230712890625,
      "model_forward_time": 0.11430954933166504,
      "step": 49530
    },
    {
      "epoch": 0.00030230712890625,
      "step": 49530,
      "training_step_time": 0.5933125019073486
    },
    {
      "epoch": 0.000302313232421875,
      "model_forward_time": 0.1149442195892334,
      "step": 49531
    },
    {
      "epoch": 0.000302313232421875,
      "step": 49531,
      "training_step_time": 0.4953906536102295
    },
    {
      "epoch": 0.0003023193359375,
      "model_forward_time": 0.11430621147155762,
      "step": 49532
    },
    {
      "epoch": 0.0003023193359375,
      "step": 49532,
      "training_step_time": 0.4268653392791748
    },
    {
      "epoch": 0.000302325439453125,
      "model_forward_time": 0.1137540340423584,
      "step": 49533
    },
    {
      "epoch": 0.000302325439453125,
      "step": 49533,
      "training_step_time": 0.38281822204589844
    },
    {
      "epoch": 0.00030233154296875,
      "model_forward_time": 0.11377239227294922,
      "step": 49534
    },
    {
      "epoch": 0.00030233154296875,
      "step": 49534,
      "training_step_time": 0.3848133087158203
    },
    {
      "epoch": 0.000302337646484375,
      "model_forward_time": 0.11484050750732422,
      "step": 49535
    },
    {
      "epoch": 0.000302337646484375,
      "step": 49535,
      "training_step_time": 0.40477871894836426
    },
    {
      "epoch": 0.00030234375,
      "model_forward_time": 0.11516404151916504,
      "step": 49536
    },
    {
      "epoch": 0.00030234375,
      "step": 49536,
      "training_step_time": 0.7904093265533447
    },
    {
      "epoch": 0.000302349853515625,
      "model_forward_time": 0.11386895179748535,
      "step": 49537
    },
    {
      "epoch": 0.000302349853515625,
      "step": 49537,
      "training_step_time": 0.3870549201965332
    },
    {
      "epoch": 0.00030235595703125,
      "model_forward_time": 0.11466670036315918,
      "step": 49538
    },
    {
      "epoch": 0.00030235595703125,
      "step": 49538,
      "training_step_time": 0.3853909969329834
    },
    {
      "epoch": 0.000302362060546875,
      "model_forward_time": 0.11348819732666016,
      "step": 49539
    },
    {
      "epoch": 0.000302362060546875,
      "step": 49539,
      "training_step_time": 0.3925211429595947
    },
    {
      "epoch": 0.0003023681640625,
      "grad_norm": 0.12671151757240295,
      "learning_rate": 8.08148704121705e-06,
      "loss": 0.0387,
      "step": 49540
    },
    {
      "epoch": 0.0003023681640625,
      "model_forward_time": 0.11402583122253418,
      "step": 49540
    },
    {
      "epoch": 0.0003023681640625,
      "step": 49540,
      "training_step_time": 0.39824390411376953
    },
    {
      "epoch": 0.000302374267578125,
      "model_forward_time": 0.1168820858001709,
      "step": 49541
    },
    {
      "epoch": 0.000302374267578125,
      "step": 49541,
      "training_step_time": 0.3944549560546875
    },
    {
      "epoch": 0.00030238037109375,
      "model_forward_time": 0.11553430557250977,
      "step": 49542
    },
    {
      "epoch": 0.00030238037109375,
      "step": 49542,
      "training_step_time": 0.6294665336608887
    },
    {
      "epoch": 0.000302386474609375,
      "model_forward_time": 0.11492037773132324,
      "step": 49543
    },
    {
      "epoch": 0.000302386474609375,
      "step": 49543,
      "training_step_time": 0.4633939266204834
    },
    {
      "epoch": 0.000302392578125,
      "model_forward_time": 0.11472582817077637,
      "step": 49544
    },
    {
      "epoch": 0.000302392578125,
      "step": 49544,
      "training_step_time": 0.47278738021850586
    },
    {
      "epoch": 0.000302398681640625,
      "model_forward_time": 0.11539769172668457,
      "step": 49545
    },
    {
      "epoch": 0.000302398681640625,
      "step": 49545,
      "training_step_time": 0.45841288566589355
    },
    {
      "epoch": 0.00030240478515625,
      "model_forward_time": 0.1142416000366211,
      "step": 49546
    },
    {
      "epoch": 0.00030240478515625,
      "step": 49546,
      "training_step_time": 0.3963742256164551
    },
    {
      "epoch": 0.000302410888671875,
      "model_forward_time": 0.11453676223754883,
      "step": 49547
    },
    {
      "epoch": 0.000302410888671875,
      "step": 49547,
      "training_step_time": 0.39247655868530273
    },
    {
      "epoch": 0.0003024169921875,
      "model_forward_time": 0.11483383178710938,
      "step": 49548
    },
    {
      "epoch": 0.0003024169921875,
      "step": 49548,
      "training_step_time": 0.39383649826049805
    },
    {
      "epoch": 0.000302423095703125,
      "model_forward_time": 0.11458516120910645,
      "step": 49549
    },
    {
      "epoch": 0.000302423095703125,
      "step": 49549,
      "training_step_time": 0.39191341400146484
    },
    {
      "epoch": 0.00030242919921875,
      "grad_norm": 0.0755336582660675,
      "learning_rate": 8.066471602728803e-06,
      "loss": 0.0324,
      "step": 49550
    },
    {
      "epoch": 0.00030242919921875,
      "model_forward_time": 0.11530494689941406,
      "step": 49550
    },
    {
      "epoch": 0.00030242919921875,
      "step": 49550,
      "training_step_time": 0.38847899436950684
    },
    {
      "epoch": 0.000302435302734375,
      "model_forward_time": 0.11577272415161133,
      "step": 49551
    },
    {
      "epoch": 0.000302435302734375,
      "step": 49551,
      "training_step_time": 0.39004993438720703
    },
    {
      "epoch": 0.00030244140625,
      "model_forward_time": 0.11489725112915039,
      "step": 49552
    },
    {
      "epoch": 0.00030244140625,
      "step": 49552,
      "training_step_time": 0.40168333053588867
    },
    {
      "epoch": 0.000302447509765625,
      "model_forward_time": 0.11527204513549805,
      "step": 49553
    },
    {
      "epoch": 0.000302447509765625,
      "step": 49553,
      "training_step_time": 0.38650059700012207
    },
    {
      "epoch": 0.00030245361328125,
      "model_forward_time": 0.1150975227355957,
      "step": 49554
    },
    {
      "epoch": 0.00030245361328125,
      "step": 49554,
      "training_step_time": 0.41910576820373535
    },
    {
      "epoch": 0.000302459716796875,
      "model_forward_time": 0.11474752426147461,
      "step": 49555
    },
    {
      "epoch": 0.000302459716796875,
      "step": 49555,
      "training_step_time": 0.40116000175476074
    },
    {
      "epoch": 0.0003024658203125,
      "model_forward_time": 0.11568999290466309,
      "step": 49556
    },
    {
      "epoch": 0.0003024658203125,
      "step": 49556,
      "training_step_time": 0.39194655418395996
    },
    {
      "epoch": 0.000302471923828125,
      "model_forward_time": 0.11553430557250977,
      "step": 49557
    },
    {
      "epoch": 0.000302471923828125,
      "step": 49557,
      "training_step_time": 0.494922399520874
    },
    {
      "epoch": 0.00030247802734375,
      "model_forward_time": 0.11580538749694824,
      "step": 49558
    },
    {
      "epoch": 0.00030247802734375,
      "step": 49558,
      "training_step_time": 0.4505927562713623
    },
    {
      "epoch": 0.000302484130859375,
      "model_forward_time": 0.11542582511901855,
      "step": 49559
    },
    {
      "epoch": 0.000302484130859375,
      "step": 49559,
      "training_step_time": 0.5039224624633789
    },
    {
      "epoch": 0.000302490234375,
      "grad_norm": 0.07365559786558151,
      "learning_rate": 8.051468902539272e-06,
      "loss": 0.0351,
      "step": 49560
    },
    {
      "epoch": 0.000302490234375,
      "model_forward_time": 0.11583328247070312,
      "step": 49560
    },
    {
      "epoch": 0.000302490234375,
      "step": 49560,
      "training_step_time": 0.5884382724761963
    },
    {
      "epoch": 0.000302496337890625,
      "model_forward_time": 0.11495542526245117,
      "step": 49561
    },
    {
      "epoch": 0.000302496337890625,
      "step": 49561,
      "training_step_time": 0.3853738307952881
    },
    {
      "epoch": 0.00030250244140625,
      "model_forward_time": 0.11427164077758789,
      "step": 49562
    },
    {
      "epoch": 0.00030250244140625,
      "step": 49562,
      "training_step_time": 0.390239953994751
    },
    {
      "epoch": 0.000302508544921875,
      "model_forward_time": 0.11501860618591309,
      "step": 49563
    },
    {
      "epoch": 0.000302508544921875,
      "step": 49563,
      "training_step_time": 0.3969557285308838
    },
    {
      "epoch": 0.0003025146484375,
      "model_forward_time": 0.11519455909729004,
      "step": 49564
    },
    {
      "epoch": 0.0003025146484375,
      "step": 49564,
      "training_step_time": 0.39359140396118164
    },
    {
      "epoch": 0.000302520751953125,
      "model_forward_time": 0.11549830436706543,
      "step": 49565
    },
    {
      "epoch": 0.000302520751953125,
      "step": 49565,
      "training_step_time": 0.3878045082092285
    },
    {
      "epoch": 0.00030252685546875,
      "model_forward_time": 0.11523056030273438,
      "step": 49566
    },
    {
      "epoch": 0.00030252685546875,
      "step": 49566,
      "training_step_time": 0.5598406791687012
    },
    {
      "epoch": 0.000302532958984375,
      "model_forward_time": 0.11469912528991699,
      "step": 49567
    },
    {
      "epoch": 0.000302532958984375,
      "step": 49567,
      "training_step_time": 0.4033467769622803
    },
    {
      "epoch": 0.0003025390625,
      "model_forward_time": 0.11487793922424316,
      "step": 49568
    },
    {
      "epoch": 0.0003025390625,
      "step": 49568,
      "training_step_time": 0.398953914642334
    },
    {
      "epoch": 0.000302545166015625,
      "model_forward_time": 0.11491632461547852,
      "step": 49569
    },
    {
      "epoch": 0.000302545166015625,
      "step": 49569,
      "training_step_time": 0.39409637451171875
    },
    {
      "epoch": 0.00030255126953125,
      "grad_norm": 0.07139060646295547,
      "learning_rate": 8.036478945205922e-06,
      "loss": 0.0333,
      "step": 49570
    },
    {
      "epoch": 0.00030255126953125,
      "model_forward_time": 0.1149594783782959,
      "step": 49570
    },
    {
      "epoch": 0.00030255126953125,
      "step": 49570,
      "training_step_time": 0.39514899253845215
    },
    {
      "epoch": 0.000302557373046875,
      "model_forward_time": 0.11527752876281738,
      "step": 49571
    },
    {
      "epoch": 0.000302557373046875,
      "step": 49571,
      "training_step_time": 0.4187171459197998
    },
    {
      "epoch": 0.0003025634765625,
      "model_forward_time": 0.11556124687194824,
      "step": 49572
    },
    {
      "epoch": 0.0003025634765625,
      "step": 49572,
      "training_step_time": 0.5185434818267822
    },
    {
      "epoch": 0.000302569580078125,
      "model_forward_time": 0.1146087646484375,
      "step": 49573
    },
    {
      "epoch": 0.000302569580078125,
      "step": 49573,
      "training_step_time": 0.4806063175201416
    },
    {
      "epoch": 0.00030257568359375,
      "model_forward_time": 0.11538457870483398,
      "step": 49574
    },
    {
      "epoch": 0.00030257568359375,
      "step": 49574,
      "training_step_time": 0.39418673515319824
    },
    {
      "epoch": 0.000302581787109375,
      "model_forward_time": 0.11536145210266113,
      "step": 49575
    },
    {
      "epoch": 0.000302581787109375,
      "step": 49575,
      "training_step_time": 0.3817880153656006
    },
    {
      "epoch": 0.000302587890625,
      "model_forward_time": 0.1148684024810791,
      "step": 49576
    },
    {
      "epoch": 0.000302587890625,
      "step": 49576,
      "training_step_time": 0.3917551040649414
    },
    {
      "epoch": 0.000302593994140625,
      "model_forward_time": 0.11511731147766113,
      "step": 49577
    },
    {
      "epoch": 0.000302593994140625,
      "step": 49577,
      "training_step_time": 0.38981008529663086
    },
    {
      "epoch": 0.00030260009765625,
      "model_forward_time": 0.11497092247009277,
      "step": 49578
    },
    {
      "epoch": 0.00030260009765625,
      "step": 49578,
      "training_step_time": 0.4538545608520508
    },
    {
      "epoch": 0.000302606201171875,
      "model_forward_time": 0.11490893363952637,
      "step": 49579
    },
    {
      "epoch": 0.000302606201171875,
      "step": 49579,
      "training_step_time": 0.3957176208496094
    },
    {
      "epoch": 0.0003026123046875,
      "grad_norm": 0.07385606318712234,
      "learning_rate": 8.021501735282266e-06,
      "loss": 0.0347,
      "step": 49580
    },
    {
      "epoch": 0.0003026123046875,
      "model_forward_time": 0.11564302444458008,
      "step": 49580
    },
    {
      "epoch": 0.0003026123046875,
      "step": 49580,
      "training_step_time": 0.40264463424682617
    },
    {
      "epoch": 0.000302618408203125,
      "model_forward_time": 0.11535334587097168,
      "step": 49581
    },
    {
      "epoch": 0.000302618408203125,
      "step": 49581,
      "training_step_time": 0.3927175998687744
    },
    {
      "epoch": 0.00030262451171875,
      "model_forward_time": 0.1150674819946289,
      "step": 49582
    },
    {
      "epoch": 0.00030262451171875,
      "step": 49582,
      "training_step_time": 0.39641547203063965
    },
    {
      "epoch": 0.000302630615234375,
      "model_forward_time": 0.11573505401611328,
      "step": 49583
    },
    {
      "epoch": 0.000302630615234375,
      "step": 49583,
      "training_step_time": 0.38805484771728516
    },
    {
      "epoch": 0.00030263671875,
      "model_forward_time": 0.11756157875061035,
      "step": 49584
    },
    {
      "epoch": 0.00030263671875,
      "step": 49584,
      "training_step_time": 0.6106319427490234
    },
    {
      "epoch": 0.000302642822265625,
      "model_forward_time": 0.11510777473449707,
      "step": 49585
    },
    {
      "epoch": 0.000302642822265625,
      "step": 49585,
      "training_step_time": 0.47423768043518066
    },
    {
      "epoch": 0.00030264892578125,
      "model_forward_time": 0.11491537094116211,
      "step": 49586
    },
    {
      "epoch": 0.00030264892578125,
      "step": 49586,
      "training_step_time": 0.4745361804962158
    },
    {
      "epoch": 0.000302655029296875,
      "model_forward_time": 0.11522150039672852,
      "step": 49587
    },
    {
      "epoch": 0.000302655029296875,
      "step": 49587,
      "training_step_time": 0.46940135955810547
    },
    {
      "epoch": 0.0003026611328125,
      "model_forward_time": 0.11393046379089355,
      "step": 49588
    },
    {
      "epoch": 0.0003026611328125,
      "step": 49588,
      "training_step_time": 0.45638561248779297
    },
    {
      "epoch": 0.000302667236328125,
      "model_forward_time": 0.11406111717224121,
      "step": 49589
    },
    {
      "epoch": 0.000302667236328125,
      "step": 49589,
      "training_step_time": 0.3823862075805664
    },
    {
      "epoch": 0.00030267333984375,
      "grad_norm": 0.08356788754463196,
      "learning_rate": 8.00653727731801e-06,
      "loss": 0.0378,
      "step": 49590
    },
    {
      "epoch": 0.00030267333984375,
      "model_forward_time": 0.11479854583740234,
      "step": 49590
    },
    {
      "epoch": 0.00030267333984375,
      "step": 49590,
      "training_step_time": 0.39698076248168945
    },
    {
      "epoch": 0.000302679443359375,
      "model_forward_time": 0.1144857406616211,
      "step": 49591
    },
    {
      "epoch": 0.000302679443359375,
      "step": 49591,
      "training_step_time": 0.40007972717285156
    },
    {
      "epoch": 0.000302685546875,
      "model_forward_time": 0.11566829681396484,
      "step": 49592
    },
    {
      "epoch": 0.000302685546875,
      "step": 49592,
      "training_step_time": 0.39994287490844727
    },
    {
      "epoch": 0.000302691650390625,
      "model_forward_time": 0.11477518081665039,
      "step": 49593
    },
    {
      "epoch": 0.000302691650390625,
      "step": 49593,
      "training_step_time": 0.3894772529602051
    },
    {
      "epoch": 0.00030269775390625,
      "model_forward_time": 0.11569070816040039,
      "step": 49594
    },
    {
      "epoch": 0.00030269775390625,
      "step": 49594,
      "training_step_time": 0.404803991317749
    },
    {
      "epoch": 0.000302703857421875,
      "model_forward_time": 0.11495780944824219,
      "step": 49595
    },
    {
      "epoch": 0.000302703857421875,
      "step": 49595,
      "training_step_time": 0.39464759826660156
    },
    {
      "epoch": 0.0003027099609375,
      "model_forward_time": 0.11546087265014648,
      "step": 49596
    },
    {
      "epoch": 0.0003027099609375,
      "step": 49596,
      "training_step_time": 0.4041922092437744
    },
    {
      "epoch": 0.000302716064453125,
      "model_forward_time": 0.11539006233215332,
      "step": 49597
    },
    {
      "epoch": 0.000302716064453125,
      "step": 49597,
      "training_step_time": 0.3965599536895752
    },
    {
      "epoch": 0.00030272216796875,
      "model_forward_time": 0.11540699005126953,
      "step": 49598
    },
    {
      "epoch": 0.00030272216796875,
      "step": 49598,
      "training_step_time": 0.39220714569091797
    },
    {
      "epoch": 0.000302728271484375,
      "model_forward_time": 0.11529707908630371,
      "step": 49599
    },
    {
      "epoch": 0.000302728271484375,
      "step": 49599,
      "training_step_time": 0.48365330696105957
    },
    {
      "epoch": 0.000302734375,
      "grad_norm": 0.09826943278312683,
      "learning_rate": 7.991585575858961e-06,
      "loss": 0.0368,
      "step": 49600
    },
    {
      "epoch": 0.000302734375,
      "model_forward_time": 0.1154487133026123,
      "step": 49600
    },
    {
      "epoch": 0.000302734375,
      "step": 49600,
      "training_step_time": 0.46506786346435547
    },
    {
      "epoch": 0.000302740478515625,
      "model_forward_time": 0.1152036190032959,
      "step": 49601
    },
    {
      "epoch": 0.000302740478515625,
      "step": 49601,
      "training_step_time": 0.5068647861480713
    },
    {
      "epoch": 0.00030274658203125,
      "model_forward_time": 0.11502671241760254,
      "step": 49602
    },
    {
      "epoch": 0.00030274658203125,
      "step": 49602,
      "training_step_time": 0.4597151279449463
    },
    {
      "epoch": 0.000302752685546875,
      "model_forward_time": 0.11508631706237793,
      "step": 49603
    },
    {
      "epoch": 0.000302752685546875,
      "step": 49603,
      "training_step_time": 0.39373016357421875
    },
    {
      "epoch": 0.0003027587890625,
      "model_forward_time": 0.11480212211608887,
      "step": 49604
    },
    {
      "epoch": 0.0003027587890625,
      "step": 49604,
      "training_step_time": 0.38625168800354004
    },
    {
      "epoch": 0.000302764892578125,
      "model_forward_time": 0.11506772041320801,
      "step": 49605
    },
    {
      "epoch": 0.000302764892578125,
      "step": 49605,
      "training_step_time": 0.4019138813018799
    },
    {
      "epoch": 0.00030277099609375,
      "model_forward_time": 0.11540365219116211,
      "step": 49606
    },
    {
      "epoch": 0.00030277099609375,
      "step": 49606,
      "training_step_time": 0.39415526390075684
    },
    {
      "epoch": 0.000302777099609375,
      "model_forward_time": 0.1149749755859375,
      "step": 49607
    },
    {
      "epoch": 0.000302777099609375,
      "step": 49607,
      "training_step_time": 0.4017951488494873
    },
    {
      "epoch": 0.000302783203125,
      "model_forward_time": 0.11457610130310059,
      "step": 49608
    },
    {
      "epoch": 0.000302783203125,
      "step": 49608,
      "training_step_time": 0.40209174156188965
    },
    {
      "epoch": 0.000302789306640625,
      "model_forward_time": 0.11515092849731445,
      "step": 49609
    },
    {
      "epoch": 0.000302789306640625,
      "step": 49609,
      "training_step_time": 0.4057807922363281
    },
    {
      "epoch": 0.00030279541015625,
      "grad_norm": 0.07883510738611221,
      "learning_rate": 7.976646635447044e-06,
      "loss": 0.0333,
      "step": 49610
    },
    {
      "epoch": 0.00030279541015625,
      "model_forward_time": 0.1151113510131836,
      "step": 49610
    },
    {
      "epoch": 0.00030279541015625,
      "step": 49610,
      "training_step_time": 0.3946247100830078
    },
    {
      "epoch": 0.000302801513671875,
      "model_forward_time": 0.11527705192565918,
      "step": 49611
    },
    {
      "epoch": 0.000302801513671875,
      "step": 49611,
      "training_step_time": 0.3918619155883789
    },
    {
      "epoch": 0.0003028076171875,
      "model_forward_time": 0.11472892761230469,
      "step": 49612
    },
    {
      "epoch": 0.0003028076171875,
      "step": 49612,
      "training_step_time": 0.3873476982116699
    },
    {
      "epoch": 0.000302813720703125,
      "model_forward_time": 0.11547684669494629,
      "step": 49613
    },
    {
      "epoch": 0.000302813720703125,
      "step": 49613,
      "training_step_time": 0.39638853073120117
    },
    {
      "epoch": 0.00030281982421875,
      "model_forward_time": 0.11540627479553223,
      "step": 49614
    },
    {
      "epoch": 0.00030281982421875,
      "step": 49614,
      "training_step_time": 0.4819037914276123
    },
    {
      "epoch": 0.000302825927734375,
      "model_forward_time": 0.11534667015075684,
      "step": 49615
    },
    {
      "epoch": 0.000302825927734375,
      "step": 49615,
      "training_step_time": 0.3880290985107422
    },
    {
      "epoch": 0.00030283203125,
      "model_forward_time": 0.11439919471740723,
      "step": 49616
    },
    {
      "epoch": 0.00030283203125,
      "step": 49616,
      "training_step_time": 0.42743849754333496
    },
    {
      "epoch": 0.000302838134765625,
      "model_forward_time": 0.11499881744384766,
      "step": 49617
    },
    {
      "epoch": 0.000302838134765625,
      "step": 49617,
      "training_step_time": 0.48392295837402344
    },
    {
      "epoch": 0.00030284423828125,
      "model_forward_time": 0.11628961563110352,
      "step": 49618
    },
    {
      "epoch": 0.00030284423828125,
      "step": 49618,
      "training_step_time": 0.39217185974121094
    },
    {
      "epoch": 0.000302850341796875,
      "model_forward_time": 0.11479020118713379,
      "step": 49619
    },
    {
      "epoch": 0.000302850341796875,
      "step": 49619,
      "training_step_time": 0.38295674324035645
    },
    {
      "epoch": 0.0003028564453125,
      "grad_norm": 0.07551700621843338,
      "learning_rate": 7.96172046062032e-06,
      "loss": 0.038,
      "step": 49620
    },
    {
      "epoch": 0.0003028564453125,
      "model_forward_time": 0.11466765403747559,
      "step": 49620
    },
    {
      "epoch": 0.0003028564453125,
      "step": 49620,
      "training_step_time": 0.41304922103881836
    },
    {
      "epoch": 0.000302862548828125,
      "model_forward_time": 0.11517572402954102,
      "step": 49621
    },
    {
      "epoch": 0.000302862548828125,
      "step": 49621,
      "training_step_time": 0.7107415199279785
    },
    {
      "epoch": 0.00030286865234375,
      "model_forward_time": 0.11433100700378418,
      "step": 49622
    },
    {
      "epoch": 0.00030286865234375,
      "step": 49622,
      "training_step_time": 0.36954355239868164
    },
    {
      "epoch": 0.000302874755859375,
      "model_forward_time": 0.11474251747131348,
      "step": 49623
    },
    {
      "epoch": 0.000302874755859375,
      "step": 49623,
      "training_step_time": 0.37826037406921387
    },
    {
      "epoch": 0.000302880859375,
      "model_forward_time": 0.11469364166259766,
      "step": 49624
    },
    {
      "epoch": 0.000302880859375,
      "step": 49624,
      "training_step_time": 0.38965559005737305
    },
    {
      "epoch": 0.000302886962890625,
      "model_forward_time": 0.11387443542480469,
      "step": 49625
    },
    {
      "epoch": 0.000302886962890625,
      "step": 49625,
      "training_step_time": 0.395305871963501
    },
    {
      "epoch": 0.00030289306640625,
      "model_forward_time": 0.11525225639343262,
      "step": 49626
    },
    {
      "epoch": 0.00030289306640625,
      "step": 49626,
      "training_step_time": 0.40346765518188477
    },
    {
      "epoch": 0.000302899169921875,
      "model_forward_time": 0.11492657661437988,
      "step": 49627
    },
    {
      "epoch": 0.000302899169921875,
      "step": 49627,
      "training_step_time": 0.4854440689086914
    },
    {
      "epoch": 0.0003029052734375,
      "model_forward_time": 0.11524295806884766,
      "step": 49628
    },
    {
      "epoch": 0.0003029052734375,
      "step": 49628,
      "training_step_time": 0.43167591094970703
    },
    {
      "epoch": 0.000302911376953125,
      "model_forward_time": 0.11483025550842285,
      "step": 49629
    },
    {
      "epoch": 0.000302911376953125,
      "step": 49629,
      "training_step_time": 0.3822746276855469
    },
    {
      "epoch": 0.00030291748046875,
      "grad_norm": 0.09452207386493683,
      "learning_rate": 7.946807055912959e-06,
      "loss": 0.0355,
      "step": 49630
    },
    {
      "epoch": 0.00030291748046875,
      "model_forward_time": 0.11506938934326172,
      "step": 49630
    },
    {
      "epoch": 0.00030291748046875,
      "step": 49630,
      "training_step_time": 0.45762181282043457
    },
    {
      "epoch": 0.000302923583984375,
      "model_forward_time": 0.11450839042663574,
      "step": 49631
    },
    {
      "epoch": 0.000302923583984375,
      "step": 49631,
      "training_step_time": 0.4758913516998291
    },
    {
      "epoch": 0.0003029296875,
      "model_forward_time": 0.11513257026672363,
      "step": 49632
    },
    {
      "epoch": 0.0003029296875,
      "step": 49632,
      "training_step_time": 0.3869016170501709
    },
    {
      "epoch": 0.000302935791015625,
      "model_forward_time": 0.11455726623535156,
      "step": 49633
    },
    {
      "epoch": 0.000302935791015625,
      "step": 49633,
      "training_step_time": 0.3879826068878174
    },
    {
      "epoch": 0.00030294189453125,
      "model_forward_time": 0.1147453784942627,
      "step": 49634
    },
    {
      "epoch": 0.00030294189453125,
      "step": 49634,
      "training_step_time": 0.38465237617492676
    },
    {
      "epoch": 0.000302947998046875,
      "model_forward_time": 0.1144874095916748,
      "step": 49635
    },
    {
      "epoch": 0.000302947998046875,
      "step": 49635,
      "training_step_time": 0.37785911560058594
    },
    {
      "epoch": 0.0003029541015625,
      "model_forward_time": 0.11535501480102539,
      "step": 49636
    },
    {
      "epoch": 0.0003029541015625,
      "step": 49636,
      "training_step_time": 0.3846621513366699
    },
    {
      "epoch": 0.000302960205078125,
      "model_forward_time": 0.11519384384155273,
      "step": 49637
    },
    {
      "epoch": 0.000302960205078125,
      "step": 49637,
      "training_step_time": 0.40029239654541016
    },
    {
      "epoch": 0.00030296630859375,
      "model_forward_time": 0.11530709266662598,
      "step": 49638
    },
    {
      "epoch": 0.00030296630859375,
      "step": 49638,
      "training_step_time": 0.3930971622467041
    },
    {
      "epoch": 0.000302972412109375,
      "model_forward_time": 0.1155545711517334,
      "step": 49639
    },
    {
      "epoch": 0.000302972412109375,
      "step": 49639,
      "training_step_time": 0.9514784812927246
    },
    {
      "epoch": 0.000302978515625,
      "grad_norm": 0.1051454022526741,
      "learning_rate": 7.931906425855268e-06,
      "loss": 0.037,
      "step": 49640
    },
    {
      "epoch": 0.000302978515625,
      "model_forward_time": 0.11426877975463867,
      "step": 49640
    },
    {
      "epoch": 0.000302978515625,
      "step": 49640,
      "training_step_time": 0.41713762283325195
    },
    {
      "epoch": 0.000302984619140625,
      "model_forward_time": 0.11443471908569336,
      "step": 49641
    },
    {
      "epoch": 0.000302984619140625,
      "step": 49641,
      "training_step_time": 0.3757960796356201
    },
    {
      "epoch": 0.00030299072265625,
      "model_forward_time": 0.11487102508544922,
      "step": 49642
    },
    {
      "epoch": 0.00030299072265625,
      "step": 49642,
      "training_step_time": 0.38382911682128906
    },
    {
      "epoch": 0.000302996826171875,
      "model_forward_time": 0.11455082893371582,
      "step": 49643
    },
    {
      "epoch": 0.000302996826171875,
      "step": 49643,
      "training_step_time": 0.5107996463775635
    },
    {
      "epoch": 0.0003030029296875,
      "model_forward_time": 0.11454200744628906,
      "step": 49644
    },
    {
      "epoch": 0.0003030029296875,
      "step": 49644,
      "training_step_time": 0.4393460750579834
    },
    {
      "epoch": 0.000303009033203125,
      "model_forward_time": 0.11523985862731934,
      "step": 49645
    },
    {
      "epoch": 0.000303009033203125,
      "step": 49645,
      "training_step_time": 0.514655590057373
    },
    {
      "epoch": 0.00030301513671875,
      "model_forward_time": 0.11508464813232422,
      "step": 49646
    },
    {
      "epoch": 0.00030301513671875,
      "step": 49646,
      "training_step_time": 0.3759315013885498
    },
    {
      "epoch": 0.000303021240234375,
      "model_forward_time": 0.11460137367248535,
      "step": 49647
    },
    {
      "epoch": 0.000303021240234375,
      "step": 49647,
      "training_step_time": 0.38506221771240234
    },
    {
      "epoch": 0.00030302734375,
      "model_forward_time": 0.11438322067260742,
      "step": 49648
    },
    {
      "epoch": 0.00030302734375,
      "step": 49648,
      "training_step_time": 0.3963470458984375
    },
    {
      "epoch": 0.000303033447265625,
      "model_forward_time": 0.11487531661987305,
      "step": 49649
    },
    {
      "epoch": 0.000303033447265625,
      "step": 49649,
      "training_step_time": 0.3895540237426758
    },
    {
      "epoch": 0.00030303955078125,
      "grad_norm": 0.08909213542938232,
      "learning_rate": 7.917018574973645e-06,
      "loss": 0.0362,
      "step": 49650
    },
    {
      "epoch": 0.00030303955078125,
      "model_forward_time": 0.11502408981323242,
      "step": 49650
    },
    {
      "epoch": 0.00030303955078125,
      "step": 49650,
      "training_step_time": 0.39750123023986816
    },
    {
      "epoch": 0.000303045654296875,
      "model_forward_time": 0.11547660827636719,
      "step": 49651
    },
    {
      "epoch": 0.000303045654296875,
      "step": 49651,
      "training_step_time": 0.3889434337615967
    },
    {
      "epoch": 0.0003030517578125,
      "model_forward_time": 0.11510443687438965,
      "step": 49652
    },
    {
      "epoch": 0.0003030517578125,
      "step": 49652,
      "training_step_time": 0.4097626209259033
    },
    {
      "epoch": 0.000303057861328125,
      "model_forward_time": 0.11563801765441895,
      "step": 49653
    },
    {
      "epoch": 0.000303057861328125,
      "step": 49653,
      "training_step_time": 0.4411284923553467
    },
    {
      "epoch": 0.00030306396484375,
      "model_forward_time": 0.11491966247558594,
      "step": 49654
    },
    {
      "epoch": 0.00030306396484375,
      "step": 49654,
      "training_step_time": 0.39103078842163086
    },
    {
      "epoch": 0.000303070068359375,
      "model_forward_time": 0.11498403549194336,
      "step": 49655
    },
    {
      "epoch": 0.000303070068359375,
      "step": 49655,
      "training_step_time": 0.39890503883361816
    },
    {
      "epoch": 0.000303076171875,
      "model_forward_time": 0.11462783813476562,
      "step": 49656
    },
    {
      "epoch": 0.000303076171875,
      "step": 49656,
      "training_step_time": 0.49664855003356934
    },
    {
      "epoch": 0.000303082275390625,
      "model_forward_time": 0.11462831497192383,
      "step": 49657
    },
    {
      "epoch": 0.000303082275390625,
      "step": 49657,
      "training_step_time": 0.46079063415527344
    },
    {
      "epoch": 0.00030308837890625,
      "model_forward_time": 0.11453771591186523,
      "step": 49658
    },
    {
      "epoch": 0.00030308837890625,
      "step": 49658,
      "training_step_time": 0.43302178382873535
    },
    {
      "epoch": 0.000303094482421875,
      "model_forward_time": 0.11466073989868164,
      "step": 49659
    },
    {
      "epoch": 0.000303094482421875,
      "step": 49659,
      "training_step_time": 0.46721959114074707
    },
    {
      "epoch": 0.0003031005859375,
      "grad_norm": 0.09938838332891464,
      "learning_rate": 7.902143507790661e-06,
      "loss": 0.0356,
      "step": 49660
    },
    {
      "epoch": 0.0003031005859375,
      "model_forward_time": 0.11432909965515137,
      "step": 49660
    },
    {
      "epoch": 0.0003031005859375,
      "step": 49660,
      "training_step_time": 0.3842902183532715
    },
    {
      "epoch": 0.000303106689453125,
      "model_forward_time": 0.11458063125610352,
      "step": 49661
    },
    {
      "epoch": 0.000303106689453125,
      "step": 49661,
      "training_step_time": 0.388629674911499
    },
    {
      "epoch": 0.00030311279296875,
      "model_forward_time": 0.11446547508239746,
      "step": 49662
    },
    {
      "epoch": 0.00030311279296875,
      "step": 49662,
      "training_step_time": 0.39292335510253906
    },
    {
      "epoch": 0.000303118896484375,
      "model_forward_time": 0.11490631103515625,
      "step": 49663
    },
    {
      "epoch": 0.000303118896484375,
      "step": 49663,
      "training_step_time": 0.39154505729675293
    },
    {
      "epoch": 0.000303125,
      "model_forward_time": 0.11506867408752441,
      "step": 49664
    },
    {
      "epoch": 0.000303125,
      "step": 49664,
      "training_step_time": 0.3960437774658203
    },
    {
      "epoch": 0.000303131103515625,
      "model_forward_time": 0.11575126647949219,
      "step": 49665
    },
    {
      "epoch": 0.000303131103515625,
      "step": 49665,
      "training_step_time": 0.3849949836730957
    },
    {
      "epoch": 0.00030313720703125,
      "model_forward_time": 0.11521029472351074,
      "step": 49666
    },
    {
      "epoch": 0.00030313720703125,
      "step": 49666,
      "training_step_time": 0.409559965133667
    },
    {
      "epoch": 0.000303143310546875,
      "model_forward_time": 0.11525678634643555,
      "step": 49667
    },
    {
      "epoch": 0.000303143310546875,
      "step": 49667,
      "training_step_time": 0.41783881187438965
    },
    {
      "epoch": 0.0003031494140625,
      "model_forward_time": 0.11499810218811035,
      "step": 49668
    },
    {
      "epoch": 0.0003031494140625,
      "step": 49668,
      "training_step_time": 0.4190042018890381
    },
    {
      "epoch": 0.000303155517578125,
      "model_forward_time": 0.11484336853027344,
      "step": 49669
    },
    {
      "epoch": 0.000303155517578125,
      "step": 49669,
      "training_step_time": 0.3955855369567871
    },
    {
      "epoch": 0.00030316162109375,
      "grad_norm": 0.08313338458538055,
      "learning_rate": 7.887281228824944e-06,
      "loss": 0.0324,
      "step": 49670
    },
    {
      "epoch": 0.00030316162109375,
      "model_forward_time": 0.11572432518005371,
      "step": 49670
    },
    {
      "epoch": 0.00030316162109375,
      "step": 49670,
      "training_step_time": 0.46993446350097656
    },
    {
      "epoch": 0.000303167724609375,
      "model_forward_time": 0.11478519439697266,
      "step": 49671
    },
    {
      "epoch": 0.000303167724609375,
      "step": 49671,
      "training_step_time": 0.4163651466369629
    },
    {
      "epoch": 0.000303173828125,
      "model_forward_time": 0.11530184745788574,
      "step": 49672
    },
    {
      "epoch": 0.000303173828125,
      "step": 49672,
      "training_step_time": 0.49097394943237305
    },
    {
      "epoch": 0.000303179931640625,
      "model_forward_time": 0.11564970016479492,
      "step": 49673
    },
    {
      "epoch": 0.000303179931640625,
      "step": 49673,
      "training_step_time": 0.4506208896636963
    },
    {
      "epoch": 0.00030318603515625,
      "model_forward_time": 0.11533236503601074,
      "step": 49674
    },
    {
      "epoch": 0.00030318603515625,
      "step": 49674,
      "training_step_time": 0.4524726867675781
    },
    {
      "epoch": 0.000303192138671875,
      "model_forward_time": 0.11449599266052246,
      "step": 49675
    },
    {
      "epoch": 0.000303192138671875,
      "step": 49675,
      "training_step_time": 0.43120384216308594
    },
    {
      "epoch": 0.0003031982421875,
      "model_forward_time": 0.1152348518371582,
      "step": 49676
    },
    {
      "epoch": 0.0003031982421875,
      "step": 49676,
      "training_step_time": 0.4328877925872803
    },
    {
      "epoch": 0.000303204345703125,
      "model_forward_time": 0.11648440361022949,
      "step": 49677
    },
    {
      "epoch": 0.000303204345703125,
      "step": 49677,
      "training_step_time": 0.5617480278015137
    },
    {
      "epoch": 0.00030321044921875,
      "model_forward_time": 0.12042856216430664,
      "step": 49678
    },
    {
      "epoch": 0.00030321044921875,
      "step": 49678,
      "training_step_time": 0.5868005752563477
    },
    {
      "epoch": 0.000303216552734375,
      "model_forward_time": 0.11512112617492676,
      "step": 49679
    },
    {
      "epoch": 0.000303216552734375,
      "step": 49679,
      "training_step_time": 0.6732847690582275
    },
    {
      "epoch": 0.00030322265625,
      "grad_norm": 0.08176841586828232,
      "learning_rate": 7.872431742591268e-06,
      "loss": 0.0345,
      "step": 49680
    },
    {
      "epoch": 0.00030322265625,
      "model_forward_time": 0.12083959579467773,
      "step": 49680
    },
    {
      "epoch": 0.00030322265625,
      "step": 49680,
      "training_step_time": 0.6842126846313477
    },
    {
      "epoch": 0.000303228759765625,
      "model_forward_time": 0.11985564231872559,
      "step": 49681
    },
    {
      "epoch": 0.000303228759765625,
      "step": 49681,
      "training_step_time": 0.6298775672912598
    },
    {
      "epoch": 0.00030323486328125,
      "model_forward_time": 0.11549019813537598,
      "step": 49682
    },
    {
      "epoch": 0.00030323486328125,
      "step": 49682,
      "training_step_time": 0.7377479076385498
    },
    {
      "epoch": 0.000303240966796875,
      "model_forward_time": 0.11916756629943848,
      "step": 49683
    },
    {
      "epoch": 0.000303240966796875,
      "step": 49683,
      "training_step_time": 0.8421318531036377
    },
    {
      "epoch": 0.0003032470703125,
      "model_forward_time": 0.1228184700012207,
      "step": 49684
    },
    {
      "epoch": 0.0003032470703125,
      "step": 49684,
      "training_step_time": 0.7783069610595703
    },
    {
      "epoch": 0.000303253173828125,
      "model_forward_time": 0.11873912811279297,
      "step": 49685
    },
    {
      "epoch": 0.000303253173828125,
      "step": 49685,
      "training_step_time": 0.6361191272735596
    },
    {
      "epoch": 0.00030325927734375,
      "model_forward_time": 0.11947345733642578,
      "step": 49686
    },
    {
      "epoch": 0.00030325927734375,
      "step": 49686,
      "training_step_time": 0.6980245113372803
    },
    {
      "epoch": 0.000303265380859375,
      "model_forward_time": 0.11673617362976074,
      "step": 49687
    },
    {
      "epoch": 0.000303265380859375,
      "step": 49687,
      "training_step_time": 0.6665334701538086
    },
    {
      "epoch": 0.000303271484375,
      "model_forward_time": 0.11982154846191406,
      "step": 49688
    },
    {
      "epoch": 0.000303271484375,
      "step": 49688,
      "training_step_time": 0.6223247051239014
    },
    {
      "epoch": 0.000303277587890625,
      "model_forward_time": 0.11530327796936035,
      "step": 49689
    },
    {
      "epoch": 0.000303277587890625,
      "step": 49689,
      "training_step_time": 0.6821575164794922
    },
    {
      "epoch": 0.00030328369140625,
      "grad_norm": 0.09046562016010284,
      "learning_rate": 7.857595053600513e-06,
      "loss": 0.0362,
      "step": 49690
    },
    {
      "epoch": 0.00030328369140625,
      "model_forward_time": 0.11792850494384766,
      "step": 49690
    },
    {
      "epoch": 0.00030328369140625,
      "step": 49690,
      "training_step_time": 0.7009828090667725
    },
    {
      "epoch": 0.000303289794921875,
      "model_forward_time": 0.12291979789733887,
      "step": 49691
    },
    {
      "epoch": 0.000303289794921875,
      "step": 49691,
      "training_step_time": 0.7283658981323242
    },
    {
      "epoch": 0.0003032958984375,
      "model_forward_time": 0.11915349960327148,
      "step": 49692
    },
    {
      "epoch": 0.0003032958984375,
      "step": 49692,
      "training_step_time": 0.7392239570617676
    },
    {
      "epoch": 0.000303302001953125,
      "model_forward_time": 0.11832785606384277,
      "step": 49693
    },
    {
      "epoch": 0.000303302001953125,
      "step": 49693,
      "training_step_time": 0.7045943737030029
    },
    {
      "epoch": 0.00030330810546875,
      "model_forward_time": 0.11966180801391602,
      "step": 49694
    },
    {
      "epoch": 0.00030330810546875,
      "step": 49694,
      "training_step_time": 0.7126498222351074
    },
    {
      "epoch": 0.000303314208984375,
      "model_forward_time": 0.11822223663330078,
      "step": 49695
    },
    {
      "epoch": 0.000303314208984375,
      "step": 49695,
      "training_step_time": 0.741649866104126
    },
    {
      "epoch": 0.0003033203125,
      "model_forward_time": 0.11822676658630371,
      "step": 49696
    },
    {
      "epoch": 0.0003033203125,
      "step": 49696,
      "training_step_time": 0.6521809101104736
    },
    {
      "epoch": 0.000303326416015625,
      "model_forward_time": 0.11841893196105957,
      "step": 49697
    },
    {
      "epoch": 0.000303326416015625,
      "step": 49697,
      "training_step_time": 0.6451137065887451
    },
    {
      "epoch": 0.00030333251953125,
      "model_forward_time": 0.12312102317810059,
      "step": 49698
    },
    {
      "epoch": 0.00030333251953125,
      "step": 49698,
      "training_step_time": 0.7086935043334961
    },
    {
      "epoch": 0.000303338623046875,
      "model_forward_time": 0.12050414085388184,
      "step": 49699
    },
    {
      "epoch": 0.000303338623046875,
      "step": 49699,
      "training_step_time": 0.5988588333129883
    },
    {
      "epoch": 0.0003033447265625,
      "grad_norm": 0.10358501970767975,
      "learning_rate": 7.842771166359681e-06,
      "loss": 0.0355,
      "step": 49700
    },
    {
      "epoch": 0.0003033447265625,
      "model_forward_time": 0.12230706214904785,
      "step": 49700
    },
    {
      "epoch": 0.0003033447265625,
      "step": 49700,
      "training_step_time": 0.7258179187774658
    },
    {
      "epoch": 0.000303350830078125,
      "model_forward_time": 0.11831355094909668,
      "step": 49701
    },
    {
      "epoch": 0.000303350830078125,
      "step": 49701,
      "training_step_time": 0.6812775135040283
    },
    {
      "epoch": 0.00030335693359375,
      "model_forward_time": 0.12134218215942383,
      "step": 49702
    },
    {
      "epoch": 0.00030335693359375,
      "step": 49702,
      "training_step_time": 0.6967720985412598
    },
    {
      "epoch": 0.000303363037109375,
      "model_forward_time": 0.12044763565063477,
      "step": 49703
    },
    {
      "epoch": 0.000303363037109375,
      "step": 49703,
      "training_step_time": 0.6756851673126221
    },
    {
      "epoch": 0.000303369140625,
      "model_forward_time": 0.12532949447631836,
      "step": 49704
    },
    {
      "epoch": 0.000303369140625,
      "step": 49704,
      "training_step_time": 0.6494998931884766
    },
    {
      "epoch": 0.000303375244140625,
      "model_forward_time": 0.11768746376037598,
      "step": 49705
    },
    {
      "epoch": 0.000303375244140625,
      "step": 49705,
      "training_step_time": 0.7000656127929688
    },
    {
      "epoch": 0.00030338134765625,
      "model_forward_time": 0.12001752853393555,
      "step": 49706
    },
    {
      "epoch": 0.00030338134765625,
      "step": 49706,
      "training_step_time": 0.6789765357971191
    },
    {
      "epoch": 0.000303387451171875,
      "model_forward_time": 0.12038421630859375,
      "step": 49707
    },
    {
      "epoch": 0.000303387451171875,
      "step": 49707,
      "training_step_time": 0.6470084190368652
    },
    {
      "epoch": 0.0003033935546875,
      "model_forward_time": 0.11816024780273438,
      "step": 49708
    },
    {
      "epoch": 0.0003033935546875,
      "step": 49708,
      "training_step_time": 0.7034907341003418
    },
    {
      "epoch": 0.000303399658203125,
      "model_forward_time": 0.12263345718383789,
      "step": 49709
    },
    {
      "epoch": 0.000303399658203125,
      "step": 49709,
      "training_step_time": 0.6314258575439453
    },
    {
      "epoch": 0.00030340576171875,
      "grad_norm": 0.1327282190322876,
      "learning_rate": 7.827960085371855e-06,
      "loss": 0.0473,
      "step": 49710
    },
    {
      "epoch": 0.00030340576171875,
      "model_forward_time": 0.12414097785949707,
      "step": 49710
    },
    {
      "epoch": 0.00030340576171875,
      "step": 49710,
      "training_step_time": 0.7051935195922852
    },
    {
      "epoch": 0.000303411865234375,
      "model_forward_time": 0.11950087547302246,
      "step": 49711
    },
    {
      "epoch": 0.000303411865234375,
      "step": 49711,
      "training_step_time": 0.7635974884033203
    },
    {
      "epoch": 0.00030341796875,
      "model_forward_time": 0.1350264549255371,
      "step": 49712
    },
    {
      "epoch": 0.00030341796875,
      "step": 49712,
      "training_step_time": 0.6742517948150635
    },
    {
      "epoch": 0.000303424072265625,
      "model_forward_time": 0.12825798988342285,
      "step": 49713
    },
    {
      "epoch": 0.000303424072265625,
      "step": 49713,
      "training_step_time": 0.823847770690918
    },
    {
      "epoch": 0.00030343017578125,
      "model_forward_time": 0.11808061599731445,
      "step": 49714
    },
    {
      "epoch": 0.00030343017578125,
      "step": 49714,
      "training_step_time": 0.6429743766784668
    },
    {
      "epoch": 0.000303436279296875,
      "model_forward_time": 0.11803674697875977,
      "step": 49715
    },
    {
      "epoch": 0.000303436279296875,
      "step": 49715,
      "training_step_time": 0.6318564414978027
    },
    {
      "epoch": 0.0003034423828125,
      "model_forward_time": 0.11994719505310059,
      "step": 49716
    },
    {
      "epoch": 0.0003034423828125,
      "step": 49716,
      "training_step_time": 0.6253550052642822
    },
    {
      "epoch": 0.000303448486328125,
      "model_forward_time": 0.12311387062072754,
      "step": 49717
    },
    {
      "epoch": 0.000303448486328125,
      "step": 49717,
      "training_step_time": 0.6413695812225342
    },
    {
      "epoch": 0.00030345458984375,
      "model_forward_time": 0.11842989921569824,
      "step": 49718
    },
    {
      "epoch": 0.00030345458984375,
      "step": 49718,
      "training_step_time": 0.7123732566833496
    },
    {
      "epoch": 0.000303460693359375,
      "model_forward_time": 0.12160539627075195,
      "step": 49719
    },
    {
      "epoch": 0.000303460693359375,
      "step": 49719,
      "training_step_time": 0.6483745574951172
    },
    {
      "epoch": 0.000303466796875,
      "grad_norm": 0.1499730795621872,
      "learning_rate": 7.813161815136294e-06,
      "loss": 0.0409,
      "step": 49720
    },
    {
      "epoch": 0.000303466796875,
      "model_forward_time": 0.11950516700744629,
      "step": 49720
    },
    {
      "epoch": 0.000303466796875,
      "step": 49720,
      "training_step_time": 0.7804219722747803
    },
    {
      "epoch": 0.000303472900390625,
      "model_forward_time": 0.11718964576721191,
      "step": 49721
    },
    {
      "epoch": 0.000303472900390625,
      "step": 49721,
      "training_step_time": 0.6860017776489258
    },
    {
      "epoch": 0.00030347900390625,
      "model_forward_time": 0.11792516708374023,
      "step": 49722
    },
    {
      "epoch": 0.00030347900390625,
      "step": 49722,
      "training_step_time": 0.7829620838165283
    },
    {
      "epoch": 0.000303485107421875,
      "model_forward_time": 0.121826171875,
      "step": 49723
    },
    {
      "epoch": 0.000303485107421875,
      "step": 49723,
      "training_step_time": 0.7869091033935547
    },
    {
      "epoch": 0.0003034912109375,
      "model_forward_time": 0.1171255111694336,
      "step": 49724
    },
    {
      "epoch": 0.0003034912109375,
      "step": 49724,
      "training_step_time": 0.589195966720581
    },
    {
      "epoch": 0.000303497314453125,
      "model_forward_time": 0.1156773567199707,
      "step": 49725
    },
    {
      "epoch": 0.000303497314453125,
      "step": 49725,
      "training_step_time": 0.6872296333312988
    },
    {
      "epoch": 0.00030350341796875,
      "model_forward_time": 0.11697196960449219,
      "step": 49726
    },
    {
      "epoch": 0.00030350341796875,
      "step": 49726,
      "training_step_time": 0.7019500732421875
    },
    {
      "epoch": 0.000303509521484375,
      "model_forward_time": 0.11947798728942871,
      "step": 49727
    },
    {
      "epoch": 0.000303509521484375,
      "step": 49727,
      "training_step_time": 0.6352105140686035
    },
    {
      "epoch": 0.000303515625,
      "model_forward_time": 0.11878299713134766,
      "step": 49728
    },
    {
      "epoch": 0.000303515625,
      "step": 49728,
      "training_step_time": 0.6700351238250732
    },
    {
      "epoch": 0.000303521728515625,
      "model_forward_time": 0.11639833450317383,
      "step": 49729
    },
    {
      "epoch": 0.000303521728515625,
      "step": 49729,
      "training_step_time": 0.6562356948852539
    },
    {
      "epoch": 0.00030352783203125,
      "grad_norm": 0.08912692964076996,
      "learning_rate": 7.79837636014827e-06,
      "loss": 0.0341,
      "step": 49730
    },
    {
      "epoch": 0.00030352783203125,
      "model_forward_time": 0.11778926849365234,
      "step": 49730
    },
    {
      "epoch": 0.00030352783203125,
      "step": 49730,
      "training_step_time": 0.650620698928833
    },
    {
      "epoch": 0.000303533935546875,
      "model_forward_time": 0.12101960182189941,
      "step": 49731
    },
    {
      "epoch": 0.000303533935546875,
      "step": 49731,
      "training_step_time": 0.8074111938476562
    },
    {
      "epoch": 0.0003035400390625,
      "model_forward_time": 0.11619925498962402,
      "step": 49732
    },
    {
      "epoch": 0.0003035400390625,
      "step": 49732,
      "training_step_time": 0.6239914894104004
    },
    {
      "epoch": 0.000303546142578125,
      "model_forward_time": 0.11960148811340332,
      "step": 49733
    },
    {
      "epoch": 0.000303546142578125,
      "step": 49733,
      "training_step_time": 0.647529125213623
    },
    {
      "epoch": 0.00030355224609375,
      "model_forward_time": 0.1186361312866211,
      "step": 49734
    },
    {
      "epoch": 0.00030355224609375,
      "step": 49734,
      "training_step_time": 0.6412889957427979
    },
    {
      "epoch": 0.000303558349609375,
      "model_forward_time": 0.11726045608520508,
      "step": 49735
    },
    {
      "epoch": 0.000303558349609375,
      "step": 49735,
      "training_step_time": 0.6667735576629639
    },
    {
      "epoch": 0.000303564453125,
      "model_forward_time": 0.1199030876159668,
      "step": 49736
    },
    {
      "epoch": 0.000303564453125,
      "step": 49736,
      "training_step_time": 0.6380434036254883
    },
    {
      "epoch": 0.000303570556640625,
      "model_forward_time": 0.12493133544921875,
      "step": 49737
    },
    {
      "epoch": 0.000303570556640625,
      "step": 49737,
      "training_step_time": 0.6807429790496826
    },
    {
      "epoch": 0.00030357666015625,
      "model_forward_time": 0.11820697784423828,
      "step": 49738
    },
    {
      "epoch": 0.00030357666015625,
      "step": 49738,
      "training_step_time": 0.6552631855010986
    },
    {
      "epoch": 0.000303582763671875,
      "model_forward_time": 0.11848115921020508,
      "step": 49739
    },
    {
      "epoch": 0.000303582763671875,
      "step": 49739,
      "training_step_time": 0.6454932689666748
    },
    {
      "epoch": 0.0003035888671875,
      "grad_norm": 0.12955701351165771,
      "learning_rate": 7.783603724899257e-06,
      "loss": 0.0399,
      "step": 49740
    },
    {
      "epoch": 0.0003035888671875,
      "model_forward_time": 0.14485573768615723,
      "step": 49740
    },
    {
      "epoch": 0.0003035888671875,
      "step": 49740,
      "training_step_time": 0.6649065017700195
    },
    {
      "epoch": 0.000303594970703125,
      "model_forward_time": 0.11940765380859375,
      "step": 49741
    },
    {
      "epoch": 0.000303594970703125,
      "step": 49741,
      "training_step_time": 0.7023141384124756
    },
    {
      "epoch": 0.00030360107421875,
      "model_forward_time": 0.11843585968017578,
      "step": 49742
    },
    {
      "epoch": 0.00030360107421875,
      "step": 49742,
      "training_step_time": 0.5892362594604492
    },
    {
      "epoch": 0.000303607177734375,
      "model_forward_time": 0.11865735054016113,
      "step": 49743
    },
    {
      "epoch": 0.000303607177734375,
      "step": 49743,
      "training_step_time": 0.613335371017456
    },
    {
      "epoch": 0.00030361328125,
      "model_forward_time": 0.12003946304321289,
      "step": 49744
    },
    {
      "epoch": 0.00030361328125,
      "step": 49744,
      "training_step_time": 0.5904152393341064
    },
    {
      "epoch": 0.000303619384765625,
      "model_forward_time": 0.12033987045288086,
      "step": 49745
    },
    {
      "epoch": 0.000303619384765625,
      "step": 49745,
      "training_step_time": 0.5738303661346436
    },
    {
      "epoch": 0.00030362548828125,
      "model_forward_time": 0.12181735038757324,
      "step": 49746
    },
    {
      "epoch": 0.00030362548828125,
      "step": 49746,
      "training_step_time": 0.571941614151001
    },
    {
      "epoch": 0.000303631591796875,
      "model_forward_time": 0.1306626796722412,
      "step": 49747
    },
    {
      "epoch": 0.000303631591796875,
      "step": 49747,
      "training_step_time": 0.5426023006439209
    },
    {
      "epoch": 0.0003036376953125,
      "model_forward_time": 0.11973309516906738,
      "step": 49748
    },
    {
      "epoch": 0.0003036376953125,
      "step": 49748,
      "training_step_time": 0.5877141952514648
    },
    {
      "epoch": 0.000303643798828125,
      "model_forward_time": 0.12108325958251953,
      "step": 49749
    },
    {
      "epoch": 0.000303643798828125,
      "step": 49749,
      "training_step_time": 0.5468344688415527
    },
    {
      "epoch": 0.00030364990234375,
      "grad_norm": 0.10882950574159622,
      "learning_rate": 7.768843913876756e-06,
      "loss": 0.034,
      "step": 49750
    },
    {
      "epoch": 0.00030364990234375,
      "model_forward_time": 0.11714291572570801,
      "step": 49750
    },
    {
      "epoch": 0.00030364990234375,
      "step": 49750,
      "training_step_time": 0.414719820022583
    },
    {
      "epoch": 0.000303656005859375,
      "model_forward_time": 0.11666321754455566,
      "step": 49751
    },
    {
      "epoch": 0.000303656005859375,
      "step": 49751,
      "training_step_time": 0.5163021087646484
    },
    {
      "epoch": 0.000303662109375,
      "model_forward_time": 0.11697053909301758,
      "step": 49752
    },
    {
      "epoch": 0.000303662109375,
      "step": 49752,
      "training_step_time": 0.579167366027832
    },
    {
      "epoch": 0.000303668212890625,
      "model_forward_time": 0.11553812026977539,
      "step": 49753
    },
    {
      "epoch": 0.000303668212890625,
      "step": 49753,
      "training_step_time": 0.3876359462738037
    },
    {
      "epoch": 0.00030367431640625,
      "model_forward_time": 0.11557555198669434,
      "step": 49754
    },
    {
      "epoch": 0.00030367431640625,
      "step": 49754,
      "training_step_time": 0.4188423156738281
    },
    {
      "epoch": 0.000303680419921875,
      "model_forward_time": 0.1158745288848877,
      "step": 49755
    },
    {
      "epoch": 0.000303680419921875,
      "step": 49755,
      "training_step_time": 0.4091153144836426
    },
    {
      "epoch": 0.0003036865234375,
      "model_forward_time": 0.11521005630493164,
      "step": 49756
    },
    {
      "epoch": 0.0003036865234375,
      "step": 49756,
      "training_step_time": 0.40954113006591797
    },
    {
      "epoch": 0.000303692626953125,
      "model_forward_time": 0.11535835266113281,
      "step": 49757
    },
    {
      "epoch": 0.000303692626953125,
      "step": 49757,
      "training_step_time": 0.4001924991607666
    },
    {
      "epoch": 0.00030369873046875,
      "model_forward_time": 0.11497783660888672,
      "step": 49758
    },
    {
      "epoch": 0.00030369873046875,
      "step": 49758,
      "training_step_time": 0.4003889560699463
    },
    {
      "epoch": 0.000303704833984375,
      "model_forward_time": 0.11514759063720703,
      "step": 49759
    },
    {
      "epoch": 0.000303704833984375,
      "step": 49759,
      "training_step_time": 0.39276599884033203
    },
    {
      "epoch": 0.0003037109375,
      "grad_norm": 0.11598099023103714,
      "learning_rate": 7.754096931564431e-06,
      "loss": 0.0406,
      "step": 49760
    },
    {
      "epoch": 0.0003037109375,
      "model_forward_time": 0.1149749755859375,
      "step": 49760
    },
    {
      "epoch": 0.0003037109375,
      "step": 49760,
      "training_step_time": 0.4051089286804199
    },
    {
      "epoch": 0.000303717041015625,
      "model_forward_time": 0.11485123634338379,
      "step": 49761
    },
    {
      "epoch": 0.000303717041015625,
      "step": 49761,
      "training_step_time": 0.4101991653442383
    },
    {
      "epoch": 0.00030372314453125,
      "model_forward_time": 0.11511969566345215,
      "step": 49762
    },
    {
      "epoch": 0.00030372314453125,
      "step": 49762,
      "training_step_time": 0.4197249412536621
    },
    {
      "epoch": 0.000303729248046875,
      "model_forward_time": 0.11503911018371582,
      "step": 49763
    },
    {
      "epoch": 0.000303729248046875,
      "step": 49763,
      "training_step_time": 0.4563412666320801
    },
    {
      "epoch": 0.0003037353515625,
      "model_forward_time": 0.11520171165466309,
      "step": 49764
    },
    {
      "epoch": 0.0003037353515625,
      "step": 49764,
      "training_step_time": 0.4104654788970947
    },
    {
      "epoch": 0.000303741455078125,
      "model_forward_time": 0.11538863182067871,
      "step": 49765
    },
    {
      "epoch": 0.000303741455078125,
      "step": 49765,
      "training_step_time": 0.42012858390808105
    },
    {
      "epoch": 0.00030374755859375,
      "model_forward_time": 0.11496400833129883,
      "step": 49766
    },
    {
      "epoch": 0.00030374755859375,
      "step": 49766,
      "training_step_time": 0.45337510108947754
    },
    {
      "epoch": 0.000303753662109375,
      "model_forward_time": 0.11558318138122559,
      "step": 49767
    },
    {
      "epoch": 0.000303753662109375,
      "step": 49767,
      "training_step_time": 0.49990081787109375
    },
    {
      "epoch": 0.000303759765625,
      "model_forward_time": 0.11505818367004395,
      "step": 49768
    },
    {
      "epoch": 0.000303759765625,
      "step": 49768,
      "training_step_time": 0.3883223533630371
    },
    {
      "epoch": 0.000303765869140625,
      "model_forward_time": 0.11464214324951172,
      "step": 49769
    },
    {
      "epoch": 0.000303765869140625,
      "step": 49769,
      "training_step_time": 0.38542699813842773
    },
    {
      "epoch": 0.00030377197265625,
      "grad_norm": 0.09892979264259338,
      "learning_rate": 7.739362782442021e-06,
      "loss": 0.0397,
      "step": 49770
    },
    {
      "epoch": 0.00030377197265625,
      "model_forward_time": 0.1151590347290039,
      "step": 49770
    },
    {
      "epoch": 0.00030377197265625,
      "step": 49770,
      "training_step_time": 0.40331315994262695
    },
    {
      "epoch": 0.000303778076171875,
      "model_forward_time": 0.11503100395202637,
      "step": 49771
    },
    {
      "epoch": 0.000303778076171875,
      "step": 49771,
      "training_step_time": 0.39940690994262695
    },
    {
      "epoch": 0.0003037841796875,
      "model_forward_time": 0.11476826667785645,
      "step": 49772
    },
    {
      "epoch": 0.0003037841796875,
      "step": 49772,
      "training_step_time": 0.3843870162963867
    },
    {
      "epoch": 0.000303790283203125,
      "model_forward_time": 0.1161189079284668,
      "step": 49773
    },
    {
      "epoch": 0.000303790283203125,
      "step": 49773,
      "training_step_time": 0.40193819999694824
    },
    {
      "epoch": 0.00030379638671875,
      "model_forward_time": 0.11549854278564453,
      "step": 49774
    },
    {
      "epoch": 0.00030379638671875,
      "step": 49774,
      "training_step_time": 0.39580202102661133
    },
    {
      "epoch": 0.000303802490234375,
      "model_forward_time": 0.1162712574005127,
      "step": 49775
    },
    {
      "epoch": 0.000303802490234375,
      "step": 49775,
      "training_step_time": 0.39705514907836914
    },
    {
      "epoch": 0.00030380859375,
      "model_forward_time": 0.11608338356018066,
      "step": 49776
    },
    {
      "epoch": 0.00030380859375,
      "step": 49776,
      "training_step_time": 0.41138768196105957
    },
    {
      "epoch": 0.000303814697265625,
      "model_forward_time": 0.11562728881835938,
      "step": 49777
    },
    {
      "epoch": 0.000303814697265625,
      "step": 49777,
      "training_step_time": 0.44361400604248047
    },
    {
      "epoch": 0.00030382080078125,
      "model_forward_time": 0.11520504951477051,
      "step": 49778
    },
    {
      "epoch": 0.00030382080078125,
      "step": 49778,
      "training_step_time": 0.4962313175201416
    },
    {
      "epoch": 0.000303826904296875,
      "model_forward_time": 0.11530852317810059,
      "step": 49779
    },
    {
      "epoch": 0.000303826904296875,
      "step": 49779,
      "training_step_time": 0.4335143566131592
    },
    {
      "epoch": 0.0003038330078125,
      "grad_norm": 0.11549411714076996,
      "learning_rate": 7.724641470985378e-06,
      "loss": 0.0347,
      "step": 49780
    },
    {
      "epoch": 0.0003038330078125,
      "model_forward_time": 0.11748385429382324,
      "step": 49780
    },
    {
      "epoch": 0.0003038330078125,
      "step": 49780,
      "training_step_time": 0.5145351886749268
    },
    {
      "epoch": 0.000303839111328125,
      "model_forward_time": 0.11523866653442383,
      "step": 49781
    },
    {
      "epoch": 0.000303839111328125,
      "step": 49781,
      "training_step_time": 0.4894123077392578
    },
    {
      "epoch": 0.00030384521484375,
      "model_forward_time": 0.11476016044616699,
      "step": 49782
    },
    {
      "epoch": 0.00030384521484375,
      "step": 49782,
      "training_step_time": 0.474393367767334
    },
    {
      "epoch": 0.000303851318359375,
      "model_forward_time": 0.11529064178466797,
      "step": 49783
    },
    {
      "epoch": 0.000303851318359375,
      "step": 49783,
      "training_step_time": 0.38716864585876465
    },
    {
      "epoch": 0.000303857421875,
      "model_forward_time": 0.11447834968566895,
      "step": 49784
    },
    {
      "epoch": 0.000303857421875,
      "step": 49784,
      "training_step_time": 0.380448579788208
    },
    {
      "epoch": 0.000303863525390625,
      "model_forward_time": 0.11523318290710449,
      "step": 49785
    },
    {
      "epoch": 0.000303863525390625,
      "step": 49785,
      "training_step_time": 0.3767385482788086
    },
    {
      "epoch": 0.00030386962890625,
      "model_forward_time": 0.11512041091918945,
      "step": 49786
    },
    {
      "epoch": 0.00030386962890625,
      "step": 49786,
      "training_step_time": 0.3911592960357666
    },
    {
      "epoch": 0.000303875732421875,
      "model_forward_time": 0.11536216735839844,
      "step": 49787
    },
    {
      "epoch": 0.000303875732421875,
      "step": 49787,
      "training_step_time": 0.3983497619628906
    },
    {
      "epoch": 0.0003038818359375,
      "model_forward_time": 0.11538195610046387,
      "step": 49788
    },
    {
      "epoch": 0.0003038818359375,
      "step": 49788,
      "training_step_time": 0.40175628662109375
    },
    {
      "epoch": 0.000303887939453125,
      "model_forward_time": 0.11527490615844727,
      "step": 49789
    },
    {
      "epoch": 0.000303887939453125,
      "step": 49789,
      "training_step_time": 0.39725327491760254
    },
    {
      "epoch": 0.00030389404296875,
      "grad_norm": 0.12345770746469498,
      "learning_rate": 7.709933001666431e-06,
      "loss": 0.0365,
      "step": 49790
    },
    {
      "epoch": 0.00030389404296875,
      "model_forward_time": 0.11642742156982422,
      "step": 49790
    },
    {
      "epoch": 0.00030389404296875,
      "step": 49790,
      "training_step_time": 0.39051008224487305
    },
    {
      "epoch": 0.000303900146484375,
      "model_forward_time": 0.11511349678039551,
      "step": 49791
    },
    {
      "epoch": 0.000303900146484375,
      "step": 49791,
      "training_step_time": 0.45613837242126465
    },
    {
      "epoch": 0.00030390625,
      "model_forward_time": 0.11536598205566406,
      "step": 49792
    },
    {
      "epoch": 0.00030390625,
      "step": 49792,
      "training_step_time": 0.4192965030670166
    },
    {
      "epoch": 0.000303912353515625,
      "model_forward_time": 0.11543035507202148,
      "step": 49793
    },
    {
      "epoch": 0.000303912353515625,
      "step": 49793,
      "training_step_time": 0.4104428291320801
    },
    {
      "epoch": 0.00030391845703125,
      "model_forward_time": 0.11486577987670898,
      "step": 49794
    },
    {
      "epoch": 0.00030391845703125,
      "step": 49794,
      "training_step_time": 0.44743943214416504
    },
    {
      "epoch": 0.000303924560546875,
      "model_forward_time": 0.11518740653991699,
      "step": 49795
    },
    {
      "epoch": 0.000303924560546875,
      "step": 49795,
      "training_step_time": 0.5250470638275146
    },
    {
      "epoch": 0.0003039306640625,
      "model_forward_time": 0.11538863182067871,
      "step": 49796
    },
    {
      "epoch": 0.0003039306640625,
      "step": 49796,
      "training_step_time": 0.4221343994140625
    },
    {
      "epoch": 0.000303936767578125,
      "model_forward_time": 0.11481380462646484,
      "step": 49797
    },
    {
      "epoch": 0.000303936767578125,
      "step": 49797,
      "training_step_time": 0.555680513381958
    },
    {
      "epoch": 0.00030394287109375,
      "model_forward_time": 0.11523866653442383,
      "step": 49798
    },
    {
      "epoch": 0.00030394287109375,
      "step": 49798,
      "training_step_time": 0.3853631019592285
    },
    {
      "epoch": 0.000303948974609375,
      "model_forward_time": 0.11532211303710938,
      "step": 49799
    },
    {
      "epoch": 0.000303948974609375,
      "step": 49799,
      "training_step_time": 0.38817644119262695
    },
    {
      "epoch": 0.000303955078125,
      "grad_norm": 0.10522003471851349,
      "learning_rate": 7.695237378953223e-06,
      "loss": 0.0349,
      "step": 49800
    },
    {
      "epoch": 0.000303955078125,
      "model_forward_time": 0.11469292640686035,
      "step": 49800
    },
    {
      "epoch": 0.000303955078125,
      "step": 49800,
      "training_step_time": 0.42319440841674805
    },
    {
      "epoch": 0.000303961181640625,
      "model_forward_time": 0.11478137969970703,
      "step": 49801
    },
    {
      "epoch": 0.000303961181640625,
      "step": 49801,
      "training_step_time": 0.39219045639038086
    },
    {
      "epoch": 0.00030396728515625,
      "model_forward_time": 0.11493825912475586,
      "step": 49802
    },
    {
      "epoch": 0.00030396728515625,
      "step": 49802,
      "training_step_time": 0.38380956649780273
    },
    {
      "epoch": 0.000303973388671875,
      "model_forward_time": 0.11462187767028809,
      "step": 49803
    },
    {
      "epoch": 0.000303973388671875,
      "step": 49803,
      "training_step_time": 0.7940676212310791
    },
    {
      "epoch": 0.0003039794921875,
      "model_forward_time": 0.11417675018310547,
      "step": 49804
    },
    {
      "epoch": 0.0003039794921875,
      "step": 49804,
      "training_step_time": 0.45786046981811523
    },
    {
      "epoch": 0.000303985595703125,
      "model_forward_time": 0.11443448066711426,
      "step": 49805
    },
    {
      "epoch": 0.000303985595703125,
      "step": 49805,
      "training_step_time": 0.4000074863433838
    },
    {
      "epoch": 0.00030399169921875,
      "model_forward_time": 0.11412620544433594,
      "step": 49806
    },
    {
      "epoch": 0.00030399169921875,
      "step": 49806,
      "training_step_time": 0.44860029220581055
    },
    {
      "epoch": 0.000303997802734375,
      "model_forward_time": 0.11445236206054688,
      "step": 49807
    },
    {
      "epoch": 0.000303997802734375,
      "step": 49807,
      "training_step_time": 0.3631324768066406
    },
    {
      "epoch": 0.00030400390625,
      "model_forward_time": 0.11437201499938965,
      "step": 49808
    },
    {
      "epoch": 0.00030400390625,
      "step": 49808,
      "training_step_time": 0.432175874710083
    },
    {
      "epoch": 0.000304010009765625,
      "model_forward_time": 0.11510133743286133,
      "step": 49809
    },
    {
      "epoch": 0.000304010009765625,
      "step": 49809,
      "training_step_time": 0.46561503410339355
    },
    {
      "epoch": 0.00030401611328125,
      "grad_norm": 0.10366673022508621,
      "learning_rate": 7.680554607309926e-06,
      "loss": 0.0433,
      "step": 49810
    },
    {
      "epoch": 0.00030401611328125,
      "model_forward_time": 0.11527299880981445,
      "step": 49810
    },
    {
      "epoch": 0.00030401611328125,
      "step": 49810,
      "training_step_time": 0.3992629051208496
    },
    {
      "epoch": 0.000304022216796875,
      "model_forward_time": 0.11467647552490234,
      "step": 49811
    },
    {
      "epoch": 0.000304022216796875,
      "step": 49811,
      "training_step_time": 0.3921627998352051
    },
    {
      "epoch": 0.0003040283203125,
      "model_forward_time": 0.11566877365112305,
      "step": 49812
    },
    {
      "epoch": 0.0003040283203125,
      "step": 49812,
      "training_step_time": 0.3974471092224121
    },
    {
      "epoch": 0.000304034423828125,
      "model_forward_time": 0.11465764045715332,
      "step": 49813
    },
    {
      "epoch": 0.000304034423828125,
      "step": 49813,
      "training_step_time": 0.3942680358886719
    },
    {
      "epoch": 0.00030404052734375,
      "model_forward_time": 0.11464357376098633,
      "step": 49814
    },
    {
      "epoch": 0.00030404052734375,
      "step": 49814,
      "training_step_time": 0.39435744285583496
    },
    {
      "epoch": 0.000304046630859375,
      "model_forward_time": 0.11525559425354004,
      "step": 49815
    },
    {
      "epoch": 0.000304046630859375,
      "step": 49815,
      "training_step_time": 0.5143749713897705
    },
    {
      "epoch": 0.000304052734375,
      "model_forward_time": 0.1153860092163086,
      "step": 49816
    },
    {
      "epoch": 0.000304052734375,
      "step": 49816,
      "training_step_time": 0.3903312683105469
    },
    {
      "epoch": 0.000304058837890625,
      "model_forward_time": 0.11456871032714844,
      "step": 49817
    },
    {
      "epoch": 0.000304058837890625,
      "step": 49817,
      "training_step_time": 0.3993699550628662
    },
    {
      "epoch": 0.00030406494140625,
      "model_forward_time": 0.11520051956176758,
      "step": 49818
    },
    {
      "epoch": 0.00030406494140625,
      "step": 49818,
      "training_step_time": 0.42366600036621094
    },
    {
      "epoch": 0.000304071044921875,
      "model_forward_time": 0.11454510688781738,
      "step": 49819
    },
    {
      "epoch": 0.000304071044921875,
      "step": 49819,
      "training_step_time": 0.4068450927734375
    },
    {
      "epoch": 0.0003040771484375,
      "grad_norm": 0.11762441694736481,
      "learning_rate": 7.66588469119675e-06,
      "loss": 0.0366,
      "step": 49820
    },
    {
      "epoch": 0.0003040771484375,
      "model_forward_time": 0.11475157737731934,
      "step": 49820
    },
    {
      "epoch": 0.0003040771484375,
      "step": 49820,
      "training_step_time": 0.40628671646118164
    },
    {
      "epoch": 0.000304083251953125,
      "model_forward_time": 0.11507511138916016,
      "step": 49821
    },
    {
      "epoch": 0.000304083251953125,
      "step": 49821,
      "training_step_time": 0.6349434852600098
    },
    {
      "epoch": 0.00030408935546875,
      "model_forward_time": 0.11467838287353516,
      "step": 49822
    },
    {
      "epoch": 0.00030408935546875,
      "step": 49822,
      "training_step_time": 0.41560935974121094
    },
    {
      "epoch": 0.000304095458984375,
      "model_forward_time": 0.11441349983215332,
      "step": 49823
    },
    {
      "epoch": 0.000304095458984375,
      "step": 49823,
      "training_step_time": 0.4688842296600342
    },
    {
      "epoch": 0.0003041015625,
      "model_forward_time": 0.11461091041564941,
      "step": 49824
    },
    {
      "epoch": 0.0003041015625,
      "step": 49824,
      "training_step_time": 0.41698408126831055
    },
    {
      "epoch": 0.000304107666015625,
      "model_forward_time": 0.11451172828674316,
      "step": 49825
    },
    {
      "epoch": 0.000304107666015625,
      "step": 49825,
      "training_step_time": 0.42444539070129395
    },
    {
      "epoch": 0.00030411376953125,
      "model_forward_time": 0.11380290985107422,
      "step": 49826
    },
    {
      "epoch": 0.00030411376953125,
      "step": 49826,
      "training_step_time": 0.3988974094390869
    },
    {
      "epoch": 0.000304119873046875,
      "model_forward_time": 0.11582565307617188,
      "step": 49827
    },
    {
      "epoch": 0.000304119873046875,
      "step": 49827,
      "training_step_time": 0.5310328006744385
    },
    {
      "epoch": 0.0003041259765625,
      "model_forward_time": 0.11480283737182617,
      "step": 49828
    },
    {
      "epoch": 0.0003041259765625,
      "step": 49828,
      "training_step_time": 0.3897666931152344
    },
    {
      "epoch": 0.000304132080078125,
      "model_forward_time": 0.11477279663085938,
      "step": 49829
    },
    {
      "epoch": 0.000304132080078125,
      "step": 49829,
      "training_step_time": 0.39145898818969727
    },
    {
      "epoch": 0.00030413818359375,
      "grad_norm": 0.09806697070598602,
      "learning_rate": 7.651227635070041e-06,
      "loss": 0.0398,
      "step": 49830
    },
    {
      "epoch": 0.00030413818359375,
      "model_forward_time": 0.11556077003479004,
      "step": 49830
    },
    {
      "epoch": 0.00030413818359375,
      "step": 49830,
      "training_step_time": 0.3934478759765625
    },
    {
      "epoch": 0.000304144287109375,
      "model_forward_time": 0.11540603637695312,
      "step": 49831
    },
    {
      "epoch": 0.000304144287109375,
      "step": 49831,
      "training_step_time": 0.3945770263671875
    },
    {
      "epoch": 0.000304150390625,
      "model_forward_time": 0.11504554748535156,
      "step": 49832
    },
    {
      "epoch": 0.000304150390625,
      "step": 49832,
      "training_step_time": 0.42825913429260254
    },
    {
      "epoch": 0.000304156494140625,
      "model_forward_time": 0.11535477638244629,
      "step": 49833
    },
    {
      "epoch": 0.000304156494140625,
      "step": 49833,
      "training_step_time": 0.7524282932281494
    },
    {
      "epoch": 0.00030416259765625,
      "model_forward_time": 0.11396384239196777,
      "step": 49834
    },
    {
      "epoch": 0.00030416259765625,
      "step": 49834,
      "training_step_time": 0.3806188106536865
    },
    {
      "epoch": 0.000304168701171875,
      "model_forward_time": 0.11484122276306152,
      "step": 49835
    },
    {
      "epoch": 0.000304168701171875,
      "step": 49835,
      "training_step_time": 0.3718712329864502
    },
    {
      "epoch": 0.0003041748046875,
      "model_forward_time": 0.11553549766540527,
      "step": 49836
    },
    {
      "epoch": 0.0003041748046875,
      "step": 49836,
      "training_step_time": 0.4395112991333008
    },
    {
      "epoch": 0.000304180908203125,
      "model_forward_time": 0.11453056335449219,
      "step": 49837
    },
    {
      "epoch": 0.000304180908203125,
      "step": 49837,
      "training_step_time": 0.4023258686065674
    },
    {
      "epoch": 0.00030418701171875,
      "model_forward_time": 0.1147470474243164,
      "step": 49838
    },
    {
      "epoch": 0.00030418701171875,
      "step": 49838,
      "training_step_time": 0.400287389755249
    },
    {
      "epoch": 0.000304193115234375,
      "model_forward_time": 0.11561059951782227,
      "step": 49839
    },
    {
      "epoch": 0.000304193115234375,
      "step": 49839,
      "training_step_time": 0.7015798091888428
    },
    {
      "epoch": 0.00030419921875,
      "grad_norm": 0.08804843574762344,
      "learning_rate": 7.636583443382223e-06,
      "loss": 0.0401,
      "step": 49840
    },
    {
      "epoch": 0.00030419921875,
      "model_forward_time": 0.11518645286560059,
      "step": 49840
    },
    {
      "epoch": 0.00030419921875,
      "step": 49840,
      "training_step_time": 0.38402628898620605
    },
    {
      "epoch": 0.000304205322265625,
      "model_forward_time": 0.1143491268157959,
      "step": 49841
    },
    {
      "epoch": 0.000304205322265625,
      "step": 49841,
      "training_step_time": 0.3771495819091797
    },
    {
      "epoch": 0.00030421142578125,
      "model_forward_time": 0.11460471153259277,
      "step": 49842
    },
    {
      "epoch": 0.00030421142578125,
      "step": 49842,
      "training_step_time": 0.38420796394348145
    },
    {
      "epoch": 0.000304217529296875,
      "model_forward_time": 0.11488938331604004,
      "step": 49843
    },
    {
      "epoch": 0.000304217529296875,
      "step": 49843,
      "training_step_time": 0.38977694511413574
    },
    {
      "epoch": 0.0003042236328125,
      "model_forward_time": 0.11397886276245117,
      "step": 49844
    },
    {
      "epoch": 0.0003042236328125,
      "step": 49844,
      "training_step_time": 0.44245433807373047
    },
    {
      "epoch": 0.000304229736328125,
      "model_forward_time": 0.11478161811828613,
      "step": 49845
    },
    {
      "epoch": 0.000304229736328125,
      "step": 49845,
      "training_step_time": 0.5877130031585693
    },
    {
      "epoch": 0.00030423583984375,
      "model_forward_time": 0.11423134803771973,
      "step": 49846
    },
    {
      "epoch": 0.00030423583984375,
      "step": 49846,
      "training_step_time": 0.43201684951782227
    },
    {
      "epoch": 0.000304241943359375,
      "model_forward_time": 0.1152653694152832,
      "step": 49847
    },
    {
      "epoch": 0.000304241943359375,
      "step": 49847,
      "training_step_time": 0.40714573860168457
    },
    {
      "epoch": 0.000304248046875,
      "model_forward_time": 0.11501431465148926,
      "step": 49848
    },
    {
      "epoch": 0.000304248046875,
      "step": 49848,
      "training_step_time": 0.38442277908325195
    },
    {
      "epoch": 0.000304254150390625,
      "model_forward_time": 0.11490106582641602,
      "step": 49849
    },
    {
      "epoch": 0.000304254150390625,
      "step": 49849,
      "training_step_time": 0.36573290824890137
    },
    {
      "epoch": 0.00030426025390625,
      "grad_norm": 0.10272479057312012,
      "learning_rate": 7.62195212058181e-06,
      "loss": 0.0401,
      "step": 49850
    },
    {
      "epoch": 0.00030426025390625,
      "model_forward_time": 0.11455273628234863,
      "step": 49850
    },
    {
      "epoch": 0.00030426025390625,
      "step": 49850,
      "training_step_time": 0.47083139419555664
    },
    {
      "epoch": 0.000304266357421875,
      "model_forward_time": 0.1155703067779541,
      "step": 49851
    },
    {
      "epoch": 0.000304266357421875,
      "step": 49851,
      "training_step_time": 0.48017406463623047
    },
    {
      "epoch": 0.0003042724609375,
      "model_forward_time": 0.11794734001159668,
      "step": 49852
    },
    {
      "epoch": 0.0003042724609375,
      "step": 49852,
      "training_step_time": 0.4386303424835205
    },
    {
      "epoch": 0.000304278564453125,
      "model_forward_time": 0.11597871780395508,
      "step": 49853
    },
    {
      "epoch": 0.000304278564453125,
      "step": 49853,
      "training_step_time": 0.4005117416381836
    },
    {
      "epoch": 0.00030428466796875,
      "model_forward_time": 0.11466312408447266,
      "step": 49854
    },
    {
      "epoch": 0.00030428466796875,
      "step": 49854,
      "training_step_time": 0.3939640522003174
    },
    {
      "epoch": 0.000304290771484375,
      "model_forward_time": 0.11476778984069824,
      "step": 49855
    },
    {
      "epoch": 0.000304290771484375,
      "step": 49855,
      "training_step_time": 0.3930480480194092
    },
    {
      "epoch": 0.000304296875,
      "model_forward_time": 0.1145772933959961,
      "step": 49856
    },
    {
      "epoch": 0.000304296875,
      "step": 49856,
      "training_step_time": 0.38280177116394043
    },
    {
      "epoch": 0.000304302978515625,
      "model_forward_time": 0.11512875556945801,
      "step": 49857
    },
    {
      "epoch": 0.000304302978515625,
      "step": 49857,
      "training_step_time": 0.4555201530456543
    },
    {
      "epoch": 0.00030430908203125,
      "model_forward_time": 0.11533594131469727,
      "step": 49858
    },
    {
      "epoch": 0.00030430908203125,
      "step": 49858,
      "training_step_time": 0.4035964012145996
    },
    {
      "epoch": 0.000304315185546875,
      "model_forward_time": 0.11507868766784668,
      "step": 49859
    },
    {
      "epoch": 0.000304315185546875,
      "step": 49859,
      "training_step_time": 0.43747401237487793
    },
    {
      "epoch": 0.0003043212890625,
      "grad_norm": 0.11336570233106613,
      "learning_rate": 7.607333671113409e-06,
      "loss": 0.0353,
      "step": 49860
    },
    {
      "epoch": 0.0003043212890625,
      "model_forward_time": 0.11477017402648926,
      "step": 49860
    },
    {
      "epoch": 0.0003043212890625,
      "step": 49860,
      "training_step_time": 0.39890313148498535
    },
    {
      "epoch": 0.000304327392578125,
      "model_forward_time": 0.11523103713989258,
      "step": 49861
    },
    {
      "epoch": 0.000304327392578125,
      "step": 49861,
      "training_step_time": 0.4233088493347168
    },
    {
      "epoch": 0.00030433349609375,
      "model_forward_time": 0.11553263664245605,
      "step": 49862
    },
    {
      "epoch": 0.00030433349609375,
      "step": 49862,
      "training_step_time": 0.38843393325805664
    },
    {
      "epoch": 0.000304339599609375,
      "model_forward_time": 0.11541867256164551,
      "step": 49863
    },
    {
      "epoch": 0.000304339599609375,
      "step": 49863,
      "training_step_time": 0.6885049343109131
    },
    {
      "epoch": 0.000304345703125,
      "model_forward_time": 0.11488199234008789,
      "step": 49864
    },
    {
      "epoch": 0.000304345703125,
      "step": 49864,
      "training_step_time": 0.48013901710510254
    },
    {
      "epoch": 0.000304351806640625,
      "model_forward_time": 0.11444926261901855,
      "step": 49865
    },
    {
      "epoch": 0.000304351806640625,
      "step": 49865,
      "training_step_time": 0.4362027645111084
    },
    {
      "epoch": 0.00030435791015625,
      "model_forward_time": 0.11454963684082031,
      "step": 49866
    },
    {
      "epoch": 0.00030435791015625,
      "step": 49866,
      "training_step_time": 0.47728776931762695
    },
    {
      "epoch": 0.000304364013671875,
      "model_forward_time": 0.11460041999816895,
      "step": 49867
    },
    {
      "epoch": 0.000304364013671875,
      "step": 49867,
      "training_step_time": 0.3883373737335205
    },
    {
      "epoch": 0.0003043701171875,
      "model_forward_time": 0.11380767822265625,
      "step": 49868
    },
    {
      "epoch": 0.0003043701171875,
      "step": 49868,
      "training_step_time": 0.3789801597595215
    },
    {
      "epoch": 0.000304376220703125,
      "model_forward_time": 0.11554336547851562,
      "step": 49869
    },
    {
      "epoch": 0.000304376220703125,
      "step": 49869,
      "training_step_time": 0.46567797660827637
    },
    {
      "epoch": 0.00030438232421875,
      "grad_norm": 0.136227548122406,
      "learning_rate": 7.592728099417745e-06,
      "loss": 0.0393,
      "step": 49870
    },
    {
      "epoch": 0.00030438232421875,
      "model_forward_time": 0.11559724807739258,
      "step": 49870
    },
    {
      "epoch": 0.00030438232421875,
      "step": 49870,
      "training_step_time": 0.4090726375579834
    },
    {
      "epoch": 0.000304388427734375,
      "model_forward_time": 0.11522173881530762,
      "step": 49871
    },
    {
      "epoch": 0.000304388427734375,
      "step": 49871,
      "training_step_time": 0.4059572219848633
    },
    {
      "epoch": 0.00030439453125,
      "model_forward_time": 0.1152641773223877,
      "step": 49872
    },
    {
      "epoch": 0.00030439453125,
      "step": 49872,
      "training_step_time": 0.40984559059143066
    },
    {
      "epoch": 0.000304400634765625,
      "model_forward_time": 0.11564326286315918,
      "step": 49873
    },
    {
      "epoch": 0.000304400634765625,
      "step": 49873,
      "training_step_time": 0.4046471118927002
    },
    {
      "epoch": 0.00030440673828125,
      "model_forward_time": 0.11559462547302246,
      "step": 49874
    },
    {
      "epoch": 0.00030440673828125,
      "step": 49874,
      "training_step_time": 0.38905858993530273
    },
    {
      "epoch": 0.000304412841796875,
      "model_forward_time": 0.11519980430603027,
      "step": 49875
    },
    {
      "epoch": 0.000304412841796875,
      "step": 49875,
      "training_step_time": 0.6532018184661865
    },
    {
      "epoch": 0.0003044189453125,
      "model_forward_time": 0.11558938026428223,
      "step": 49876
    },
    {
      "epoch": 0.0003044189453125,
      "step": 49876,
      "training_step_time": 0.38362765312194824
    },
    {
      "epoch": 0.000304425048828125,
      "model_forward_time": 0.11453056335449219,
      "step": 49877
    },
    {
      "epoch": 0.000304425048828125,
      "step": 49877,
      "training_step_time": 0.36362719535827637
    },
    {
      "epoch": 0.00030443115234375,
      "model_forward_time": 0.11451125144958496,
      "step": 49878
    },
    {
      "epoch": 0.00030443115234375,
      "step": 49878,
      "training_step_time": 0.4418962001800537
    },
    {
      "epoch": 0.000304437255859375,
      "model_forward_time": 0.1147165298461914,
      "step": 49879
    },
    {
      "epoch": 0.000304437255859375,
      "step": 49879,
      "training_step_time": 0.41373348236083984
    },
    {
      "epoch": 0.000304443359375,
      "grad_norm": 0.11367610841989517,
      "learning_rate": 7.578135409931558e-06,
      "loss": 0.0393,
      "step": 49880
    },
    {
      "epoch": 0.000304443359375,
      "model_forward_time": 0.11542654037475586,
      "step": 49880
    },
    {
      "epoch": 0.000304443359375,
      "step": 49880,
      "training_step_time": 0.47191834449768066
    },
    {
      "epoch": 0.000304449462890625,
      "model_forward_time": 0.11471414566040039,
      "step": 49881
    },
    {
      "epoch": 0.000304449462890625,
      "step": 49881,
      "training_step_time": 0.5260188579559326
    },
    {
      "epoch": 0.00030445556640625,
      "model_forward_time": 0.11435604095458984,
      "step": 49882
    },
    {
      "epoch": 0.00030445556640625,
      "step": 49882,
      "training_step_time": 0.3929469585418701
    },
    {
      "epoch": 0.000304461669921875,
      "model_forward_time": 0.11443424224853516,
      "step": 49883
    },
    {
      "epoch": 0.000304461669921875,
      "step": 49883,
      "training_step_time": 0.4339940547943115
    },
    {
      "epoch": 0.0003044677734375,
      "model_forward_time": 0.11447358131408691,
      "step": 49884
    },
    {
      "epoch": 0.0003044677734375,
      "step": 49884,
      "training_step_time": 0.39409756660461426
    },
    {
      "epoch": 0.000304473876953125,
      "model_forward_time": 0.11472320556640625,
      "step": 49885
    },
    {
      "epoch": 0.000304473876953125,
      "step": 49885,
      "training_step_time": 0.38483619689941406
    },
    {
      "epoch": 0.00030447998046875,
      "model_forward_time": 0.11465072631835938,
      "step": 49886
    },
    {
      "epoch": 0.00030447998046875,
      "step": 49886,
      "training_step_time": 0.38182663917541504
    },
    {
      "epoch": 0.000304486083984375,
      "model_forward_time": 0.11511540412902832,
      "step": 49887
    },
    {
      "epoch": 0.000304486083984375,
      "step": 49887,
      "training_step_time": 0.7419440746307373
    },
    {
      "epoch": 0.0003044921875,
      "model_forward_time": 0.11522197723388672,
      "step": 49888
    },
    {
      "epoch": 0.0003044921875,
      "step": 49888,
      "training_step_time": 0.4126765727996826
    },
    {
      "epoch": 0.000304498291015625,
      "model_forward_time": 0.11498594284057617,
      "step": 49889
    },
    {
      "epoch": 0.000304498291015625,
      "step": 49889,
      "training_step_time": 0.4949932098388672
    },
    {
      "epoch": 0.00030450439453125,
      "grad_norm": 0.08495073765516281,
      "learning_rate": 7.56355560708778e-06,
      "loss": 0.0358,
      "step": 49890
    },
    {
      "epoch": 0.00030450439453125,
      "model_forward_time": 0.11438941955566406,
      "step": 49890
    },
    {
      "epoch": 0.00030450439453125,
      "step": 49890,
      "training_step_time": 0.36362290382385254
    },
    {
      "epoch": 0.000304510498046875,
      "model_forward_time": 0.11544132232666016,
      "step": 49891
    },
    {
      "epoch": 0.000304510498046875,
      "step": 49891,
      "training_step_time": 0.3798658847808838
    },
    {
      "epoch": 0.0003045166015625,
      "model_forward_time": 0.11455249786376953,
      "step": 49892
    },
    {
      "epoch": 0.0003045166015625,
      "step": 49892,
      "training_step_time": 0.45581531524658203
    },
    {
      "epoch": 0.000304522705078125,
      "model_forward_time": 0.11494827270507812,
      "step": 49893
    },
    {
      "epoch": 0.000304522705078125,
      "step": 49893,
      "training_step_time": 0.46167588233947754
    },
    {
      "epoch": 0.00030452880859375,
      "model_forward_time": 0.11465263366699219,
      "step": 49894
    },
    {
      "epoch": 0.00030452880859375,
      "step": 49894,
      "training_step_time": 0.3849208354949951
    },
    {
      "epoch": 0.000304534912109375,
      "model_forward_time": 0.11441659927368164,
      "step": 49895
    },
    {
      "epoch": 0.000304534912109375,
      "step": 49895,
      "training_step_time": 0.39395976066589355
    },
    {
      "epoch": 0.000304541015625,
      "model_forward_time": 0.1150362491607666,
      "step": 49896
    },
    {
      "epoch": 0.000304541015625,
      "step": 49896,
      "training_step_time": 0.4471447467803955
    },
    {
      "epoch": 0.000304547119140625,
      "model_forward_time": 0.11453890800476074,
      "step": 49897
    },
    {
      "epoch": 0.000304547119140625,
      "step": 49897,
      "training_step_time": 0.40013813972473145
    },
    {
      "epoch": 0.00030455322265625,
      "model_forward_time": 0.11494112014770508,
      "step": 49898
    },
    {
      "epoch": 0.00030455322265625,
      "step": 49898,
      "training_step_time": 0.4005904197692871
    },
    {
      "epoch": 0.000304559326171875,
      "model_forward_time": 0.11504292488098145,
      "step": 49899
    },
    {
      "epoch": 0.000304559326171875,
      "step": 49899,
      "training_step_time": 0.572045087814331
    },
    {
      "epoch": 0.0003045654296875,
      "grad_norm": 0.1264958679676056,
      "learning_rate": 7.5489886953153125e-06,
      "loss": 0.0303,
      "step": 49900
    },
    {
      "epoch": 0.0003045654296875,
      "model_forward_time": 0.11432600021362305,
      "step": 49900
    },
    {
      "epoch": 0.0003045654296875,
      "step": 49900,
      "training_step_time": 0.3836531639099121
    },
    {
      "epoch": 0.000304571533203125,
      "model_forward_time": 0.11536741256713867,
      "step": 49901
    },
    {
      "epoch": 0.000304571533203125,
      "step": 49901,
      "training_step_time": 0.42491793632507324
    },
    {
      "epoch": 0.00030457763671875,
      "model_forward_time": 0.11500787734985352,
      "step": 49902
    },
    {
      "epoch": 0.00030457763671875,
      "step": 49902,
      "training_step_time": 0.4189765453338623
    },
    {
      "epoch": 0.000304583740234375,
      "model_forward_time": 0.11543941497802734,
      "step": 49903
    },
    {
      "epoch": 0.000304583740234375,
      "step": 49903,
      "training_step_time": 0.43016624450683594
    },
    {
      "epoch": 0.00030458984375,
      "model_forward_time": 0.11510443687438965,
      "step": 49904
    },
    {
      "epoch": 0.00030458984375,
      "step": 49904,
      "training_step_time": 0.39312744140625
    },
    {
      "epoch": 0.000304595947265625,
      "model_forward_time": 0.11456060409545898,
      "step": 49905
    },
    {
      "epoch": 0.000304595947265625,
      "step": 49905,
      "training_step_time": 0.7011420726776123
    },
    {
      "epoch": 0.00030460205078125,
      "model_forward_time": 0.11442446708679199,
      "step": 49906
    },
    {
      "epoch": 0.00030460205078125,
      "step": 49906,
      "training_step_time": 0.4532155990600586
    },
    {
      "epoch": 0.000304608154296875,
      "model_forward_time": 0.11495256423950195,
      "step": 49907
    },
    {
      "epoch": 0.000304608154296875,
      "step": 49907,
      "training_step_time": 0.4690408706665039
    },
    {
      "epoch": 0.0003046142578125,
      "model_forward_time": 0.11410737037658691,
      "step": 49908
    },
    {
      "epoch": 0.0003046142578125,
      "step": 49908,
      "training_step_time": 0.39199376106262207
    },
    {
      "epoch": 0.000304620361328125,
      "model_forward_time": 0.11444306373596191,
      "step": 49909
    },
    {
      "epoch": 0.000304620361328125,
      "step": 49909,
      "training_step_time": 0.4436302185058594
    },
    {
      "epoch": 0.00030462646484375,
      "grad_norm": 0.09142783284187317,
      "learning_rate": 7.5344346790392375e-06,
      "loss": 0.0388,
      "step": 49910
    },
    {
      "epoch": 0.00030462646484375,
      "model_forward_time": 0.11461758613586426,
      "step": 49910
    },
    {
      "epoch": 0.00030462646484375,
      "step": 49910,
      "training_step_time": 0.44716930389404297
    },
    {
      "epoch": 0.000304632568359375,
      "model_forward_time": 0.11454272270202637,
      "step": 49911
    },
    {
      "epoch": 0.000304632568359375,
      "step": 49911,
      "training_step_time": 0.38663411140441895
    },
    {
      "epoch": 0.000304638671875,
      "model_forward_time": 0.11535143852233887,
      "step": 49912
    },
    {
      "epoch": 0.000304638671875,
      "step": 49912,
      "training_step_time": 0.3892955780029297
    },
    {
      "epoch": 0.000304644775390625,
      "model_forward_time": 0.1155843734741211,
      "step": 49913
    },
    {
      "epoch": 0.000304644775390625,
      "step": 49913,
      "training_step_time": 0.38994669914245605
    },
    {
      "epoch": 0.00030465087890625,
      "model_forward_time": 0.11531686782836914,
      "step": 49914
    },
    {
      "epoch": 0.00030465087890625,
      "step": 49914,
      "training_step_time": 0.3927347660064697
    },
    {
      "epoch": 0.000304656982421875,
      "model_forward_time": 0.11512541770935059,
      "step": 49915
    },
    {
      "epoch": 0.000304656982421875,
      "step": 49915,
      "training_step_time": 0.44826674461364746
    },
    {
      "epoch": 0.0003046630859375,
      "model_forward_time": 0.11490797996520996,
      "step": 49916
    },
    {
      "epoch": 0.0003046630859375,
      "step": 49916,
      "training_step_time": 0.42127466201782227
    },
    {
      "epoch": 0.000304669189453125,
      "model_forward_time": 0.1154177188873291,
      "step": 49917
    },
    {
      "epoch": 0.000304669189453125,
      "step": 49917,
      "training_step_time": 0.45166587829589844
    },
    {
      "epoch": 0.00030467529296875,
      "model_forward_time": 0.11576342582702637,
      "step": 49918
    },
    {
      "epoch": 0.00030467529296875,
      "step": 49918,
      "training_step_time": 0.3904867172241211
    },
    {
      "epoch": 0.000304681396484375,
      "model_forward_time": 0.1153104305267334,
      "step": 49919
    },
    {
      "epoch": 0.000304681396484375,
      "step": 49919,
      "training_step_time": 0.4427974224090576
    },
    {
      "epoch": 0.0003046875,
      "grad_norm": 0.08473709970712662,
      "learning_rate": 7.519893562680663e-06,
      "loss": 0.0405,
      "step": 49920
    },
    {
      "epoch": 0.0003046875,
      "model_forward_time": 0.11495327949523926,
      "step": 49920
    },
    {
      "epoch": 0.0003046875,
      "step": 49920,
      "training_step_time": 0.40035581588745117
    },
    {
      "epoch": 0.000304693603515625,
      "model_forward_time": 0.11558032035827637,
      "step": 49921
    },
    {
      "epoch": 0.000304693603515625,
      "step": 49921,
      "training_step_time": 0.466869592666626
    },
    {
      "epoch": 0.00030469970703125,
      "model_forward_time": 0.11484885215759277,
      "step": 49922
    },
    {
      "epoch": 0.00030469970703125,
      "step": 49922,
      "training_step_time": 0.4626331329345703
    },
    {
      "epoch": 0.000304705810546875,
      "model_forward_time": 0.11489105224609375,
      "step": 49923
    },
    {
      "epoch": 0.000304705810546875,
      "step": 49923,
      "training_step_time": 0.621469259262085
    },
    {
      "epoch": 0.0003047119140625,
      "model_forward_time": 0.11467599868774414,
      "step": 49924
    },
    {
      "epoch": 0.0003047119140625,
      "step": 49924,
      "training_step_time": 0.38583898544311523
    },
    {
      "epoch": 0.000304718017578125,
      "model_forward_time": 0.11478567123413086,
      "step": 49925
    },
    {
      "epoch": 0.000304718017578125,
      "step": 49925,
      "training_step_time": 0.38818836212158203
    },
    {
      "epoch": 0.00030472412109375,
      "model_forward_time": 0.11494565010070801,
      "step": 49926
    },
    {
      "epoch": 0.00030472412109375,
      "step": 49926,
      "training_step_time": 0.3895089626312256
    },
    {
      "epoch": 0.000304730224609375,
      "model_forward_time": 0.11475825309753418,
      "step": 49927
    },
    {
      "epoch": 0.000304730224609375,
      "step": 49927,
      "training_step_time": 0.39493608474731445
    },
    {
      "epoch": 0.000304736328125,
      "model_forward_time": 0.11570596694946289,
      "step": 49928
    },
    {
      "epoch": 0.000304736328125,
      "step": 49928,
      "training_step_time": 0.3837730884552002
    },
    {
      "epoch": 0.000304742431640625,
      "model_forward_time": 0.11536741256713867,
      "step": 49929
    },
    {
      "epoch": 0.000304742431640625,
      "step": 49929,
      "training_step_time": 0.6223855018615723
    },
    {
      "epoch": 0.00030474853515625,
      "grad_norm": 0.1167021095752716,
      "learning_rate": 7.505365350656812e-06,
      "loss": 0.0368,
      "step": 49930
    },
    {
      "epoch": 0.00030474853515625,
      "model_forward_time": 0.1155097484588623,
      "step": 49930
    },
    {
      "epoch": 0.00030474853515625,
      "step": 49930,
      "training_step_time": 0.41390514373779297
    },
    {
      "epoch": 0.000304754638671875,
      "model_forward_time": 0.11460471153259277,
      "step": 49931
    },
    {
      "epoch": 0.000304754638671875,
      "step": 49931,
      "training_step_time": 0.4949533939361572
    },
    {
      "epoch": 0.0003047607421875,
      "model_forward_time": 0.11478829383850098,
      "step": 49932
    },
    {
      "epoch": 0.0003047607421875,
      "step": 49932,
      "training_step_time": 0.39649152755737305
    },
    {
      "epoch": 0.000304766845703125,
      "model_forward_time": 0.11514043807983398,
      "step": 49933
    },
    {
      "epoch": 0.000304766845703125,
      "step": 49933,
      "training_step_time": 0.475924015045166
    },
    {
      "epoch": 0.00030477294921875,
      "model_forward_time": 0.11449027061462402,
      "step": 49934
    },
    {
      "epoch": 0.00030477294921875,
      "step": 49934,
      "training_step_time": 0.4885542392730713
    },
    {
      "epoch": 0.000304779052734375,
      "model_forward_time": 0.11473584175109863,
      "step": 49935
    },
    {
      "epoch": 0.000304779052734375,
      "step": 49935,
      "training_step_time": 0.4550764560699463
    },
    {
      "epoch": 0.00030478515625,
      "model_forward_time": 0.11490607261657715,
      "step": 49936
    },
    {
      "epoch": 0.00030478515625,
      "step": 49936,
      "training_step_time": 0.47208428382873535
    },
    {
      "epoch": 0.000304791259765625,
      "model_forward_time": 0.11501097679138184,
      "step": 49937
    },
    {
      "epoch": 0.000304791259765625,
      "step": 49937,
      "training_step_time": 0.37999558448791504
    },
    {
      "epoch": 0.00030479736328125,
      "model_forward_time": 0.11585021018981934,
      "step": 49938
    },
    {
      "epoch": 0.00030479736328125,
      "step": 49938,
      "training_step_time": 0.3992879390716553
    },
    {
      "epoch": 0.000304803466796875,
      "model_forward_time": 0.11497855186462402,
      "step": 49939
    },
    {
      "epoch": 0.000304803466796875,
      "step": 49939,
      "training_step_time": 0.3931710720062256
    },
    {
      "epoch": 0.0003048095703125,
      "grad_norm": 0.09235228598117828,
      "learning_rate": 7.490850047380954e-06,
      "loss": 0.0364,
      "step": 49940
    },
    {
      "epoch": 0.0003048095703125,
      "model_forward_time": 0.1149442195892334,
      "step": 49940
    },
    {
      "epoch": 0.0003048095703125,
      "step": 49940,
      "training_step_time": 0.4035518169403076
    },
    {
      "epoch": 0.000304815673828125,
      "model_forward_time": 0.11483311653137207,
      "step": 49941
    },
    {
      "epoch": 0.000304815673828125,
      "step": 49941,
      "training_step_time": 0.47842979431152344
    },
    {
      "epoch": 0.00030482177734375,
      "model_forward_time": 0.1153099536895752,
      "step": 49942
    },
    {
      "epoch": 0.00030482177734375,
      "step": 49942,
      "training_step_time": 0.3833630084991455
    },
    {
      "epoch": 0.000304827880859375,
      "model_forward_time": 0.11467528343200684,
      "step": 49943
    },
    {
      "epoch": 0.000304827880859375,
      "step": 49943,
      "training_step_time": 0.415236234664917
    },
    {
      "epoch": 0.000304833984375,
      "model_forward_time": 0.11507034301757812,
      "step": 49944
    },
    {
      "epoch": 0.000304833984375,
      "step": 49944,
      "training_step_time": 0.41600918769836426
    },
    {
      "epoch": 0.000304840087890625,
      "model_forward_time": 0.11512446403503418,
      "step": 49945
    },
    {
      "epoch": 0.000304840087890625,
      "step": 49945,
      "training_step_time": 0.4351634979248047
    },
    {
      "epoch": 0.00030484619140625,
      "model_forward_time": 0.11474275588989258,
      "step": 49946
    },
    {
      "epoch": 0.00030484619140625,
      "step": 49946,
      "training_step_time": 0.3879718780517578
    },
    {
      "epoch": 0.000304852294921875,
      "model_forward_time": 0.11476898193359375,
      "step": 49947
    },
    {
      "epoch": 0.000304852294921875,
      "step": 49947,
      "training_step_time": 0.6256718635559082
    },
    {
      "epoch": 0.0003048583984375,
      "model_forward_time": 0.11491894721984863,
      "step": 49948
    },
    {
      "epoch": 0.0003048583984375,
      "step": 49948,
      "training_step_time": 0.4504811763763428
    },
    {
      "epoch": 0.000304864501953125,
      "model_forward_time": 0.11459684371948242,
      "step": 49949
    },
    {
      "epoch": 0.000304864501953125,
      "step": 49949,
      "training_step_time": 0.43756961822509766
    },
    {
      "epoch": 0.00030487060546875,
      "grad_norm": 0.07819526642560959,
      "learning_rate": 7.476347657262456e-06,
      "loss": 0.0337,
      "step": 49950
    },
    {
      "epoch": 0.00030487060546875,
      "model_forward_time": 0.11462783813476562,
      "step": 49950
    },
    {
      "epoch": 0.00030487060546875,
      "step": 49950,
      "training_step_time": 0.44878363609313965
    },
    {
      "epoch": 0.000304876708984375,
      "model_forward_time": 0.1148684024810791,
      "step": 49951
    },
    {
      "epoch": 0.000304876708984375,
      "step": 49951,
      "training_step_time": 0.39742541313171387
    },
    {
      "epoch": 0.0003048828125,
      "model_forward_time": 0.1166372299194336,
      "step": 49952
    },
    {
      "epoch": 0.0003048828125,
      "step": 49952,
      "training_step_time": 0.39011192321777344
    },
    {
      "epoch": 0.000304888916015625,
      "model_forward_time": 0.1149592399597168,
      "step": 49953
    },
    {
      "epoch": 0.000304888916015625,
      "step": 49953,
      "training_step_time": 0.5324513912200928
    },
    {
      "epoch": 0.00030489501953125,
      "model_forward_time": 0.11506247520446777,
      "step": 49954
    },
    {
      "epoch": 0.00030489501953125,
      "step": 49954,
      "training_step_time": 0.3908839225769043
    },
    {
      "epoch": 0.000304901123046875,
      "model_forward_time": 0.11606931686401367,
      "step": 49955
    },
    {
      "epoch": 0.000304901123046875,
      "step": 49955,
      "training_step_time": 0.3963949680328369
    },
    {
      "epoch": 0.0003049072265625,
      "model_forward_time": 0.11693859100341797,
      "step": 49956
    },
    {
      "epoch": 0.0003049072265625,
      "step": 49956,
      "training_step_time": 0.3995494842529297
    },
    {
      "epoch": 0.000304913330078125,
      "model_forward_time": 0.11487865447998047,
      "step": 49957
    },
    {
      "epoch": 0.000304913330078125,
      "step": 49957,
      "training_step_time": 0.4072296619415283
    },
    {
      "epoch": 0.00030491943359375,
      "model_forward_time": 0.11522102355957031,
      "step": 49958
    },
    {
      "epoch": 0.00030491943359375,
      "step": 49958,
      "training_step_time": 0.40062689781188965
    },
    {
      "epoch": 0.000304925537109375,
      "model_forward_time": 0.11481571197509766,
      "step": 49959
    },
    {
      "epoch": 0.000304925537109375,
      "step": 49959,
      "training_step_time": 0.6175687313079834
    },
    {
      "epoch": 0.000304931640625,
      "grad_norm": 0.10470673441886902,
      "learning_rate": 7.461858184706777e-06,
      "loss": 0.039,
      "step": 49960
    },
    {
      "epoch": 0.000304931640625,
      "model_forward_time": 0.11586689949035645,
      "step": 49960
    },
    {
      "epoch": 0.000304931640625,
      "step": 49960,
      "training_step_time": 0.4037492275238037
    },
    {
      "epoch": 0.000304937744140625,
      "model_forward_time": 0.11467933654785156,
      "step": 49961
    },
    {
      "epoch": 0.000304937744140625,
      "step": 49961,
      "training_step_time": 0.4303915500640869
    },
    {
      "epoch": 0.00030494384765625,
      "model_forward_time": 0.11517572402954102,
      "step": 49962
    },
    {
      "epoch": 0.00030494384765625,
      "step": 49962,
      "training_step_time": 0.4299142360687256
    },
    {
      "epoch": 0.000304949951171875,
      "model_forward_time": 0.11491107940673828,
      "step": 49963
    },
    {
      "epoch": 0.000304949951171875,
      "step": 49963,
      "training_step_time": 0.4231705665588379
    },
    {
      "epoch": 0.0003049560546875,
      "model_forward_time": 0.11445188522338867,
      "step": 49964
    },
    {
      "epoch": 0.0003049560546875,
      "step": 49964,
      "training_step_time": 0.4123497009277344
    },
    {
      "epoch": 0.000304962158203125,
      "model_forward_time": 0.1146860122680664,
      "step": 49965
    },
    {
      "epoch": 0.000304962158203125,
      "step": 49965,
      "training_step_time": 0.5601322650909424
    },
    {
      "epoch": 0.00030496826171875,
      "model_forward_time": 0.11442971229553223,
      "step": 49966
    },
    {
      "epoch": 0.00030496826171875,
      "step": 49966,
      "training_step_time": 0.3844339847564697
    },
    {
      "epoch": 0.000304974365234375,
      "model_forward_time": 0.1151123046875,
      "step": 49967
    },
    {
      "epoch": 0.000304974365234375,
      "step": 49967,
      "training_step_time": 0.38605284690856934
    },
    {
      "epoch": 0.00030498046875,
      "model_forward_time": 0.1145017147064209,
      "step": 49968
    },
    {
      "epoch": 0.00030498046875,
      "step": 49968,
      "training_step_time": 0.4085540771484375
    },
    {
      "epoch": 0.000304986572265625,
      "model_forward_time": 0.11529874801635742,
      "step": 49969
    },
    {
      "epoch": 0.000304986572265625,
      "step": 49969,
      "training_step_time": 0.38947391510009766
    },
    {
      "epoch": 0.00030499267578125,
      "grad_norm": 0.0825272873044014,
      "learning_rate": 7.4473816341154245e-06,
      "loss": 0.0355,
      "step": 49970
    },
    {
      "epoch": 0.00030499267578125,
      "model_forward_time": 0.11473488807678223,
      "step": 49970
    },
    {
      "epoch": 0.00030499267578125,
      "step": 49970,
      "training_step_time": 0.3833622932434082
    },
    {
      "epoch": 0.000304998779296875,
      "model_forward_time": 0.11579751968383789,
      "step": 49971
    },
    {
      "epoch": 0.000304998779296875,
      "step": 49971,
      "training_step_time": 0.707749605178833
    },
    {
      "epoch": 0.0003050048828125,
      "model_forward_time": 0.11443257331848145,
      "step": 49972
    },
    {
      "epoch": 0.0003050048828125,
      "step": 49972,
      "training_step_time": 0.49162721633911133
    },
    {
      "epoch": 0.000305010986328125,
      "model_forward_time": 0.11455440521240234,
      "step": 49973
    },
    {
      "epoch": 0.000305010986328125,
      "step": 49973,
      "training_step_time": 0.38765573501586914
    },
    {
      "epoch": 0.00030501708984375,
      "model_forward_time": 0.11489391326904297,
      "step": 49974
    },
    {
      "epoch": 0.00030501708984375,
      "step": 49974,
      "training_step_time": 0.39008164405822754
    },
    {
      "epoch": 0.000305023193359375,
      "model_forward_time": 0.11523175239562988,
      "step": 49975
    },
    {
      "epoch": 0.000305023193359375,
      "step": 49975,
      "training_step_time": 0.3960421085357666
    },
    {
      "epoch": 0.000305029296875,
      "model_forward_time": 0.11440110206604004,
      "step": 49976
    },
    {
      "epoch": 0.000305029296875,
      "step": 49976,
      "training_step_time": 0.469315767288208
    },
    {
      "epoch": 0.000305035400390625,
      "model_forward_time": 0.11520218849182129,
      "step": 49977
    },
    {
      "epoch": 0.000305035400390625,
      "step": 49977,
      "training_step_time": 0.4717898368835449
    },
    {
      "epoch": 0.00030504150390625,
      "model_forward_time": 0.11535191535949707,
      "step": 49978
    },
    {
      "epoch": 0.00030504150390625,
      "step": 49978,
      "training_step_time": 0.4264256954193115
    },
    {
      "epoch": 0.000305047607421875,
      "model_forward_time": 0.1154329776763916,
      "step": 49979
    },
    {
      "epoch": 0.000305047607421875,
      "step": 49979,
      "training_step_time": 0.39692187309265137
    },
    {
      "epoch": 0.0003050537109375,
      "grad_norm": 0.1132182776927948,
      "learning_rate": 7.432918009885997e-06,
      "loss": 0.0325,
      "step": 49980
    },
    {
      "epoch": 0.0003050537109375,
      "model_forward_time": 0.11535501480102539,
      "step": 49980
    },
    {
      "epoch": 0.0003050537109375,
      "step": 49980,
      "training_step_time": 0.4006979465484619
    },
    {
      "epoch": 0.000305059814453125,
      "model_forward_time": 0.11483359336853027,
      "step": 49981
    },
    {
      "epoch": 0.000305059814453125,
      "step": 49981,
      "training_step_time": 0.399111270904541
    },
    {
      "epoch": 0.00030506591796875,
      "model_forward_time": 0.11465024948120117,
      "step": 49982
    },
    {
      "epoch": 0.00030506591796875,
      "step": 49982,
      "training_step_time": 0.398059606552124
    },
    {
      "epoch": 0.000305072021484375,
      "model_forward_time": 0.11516547203063965,
      "step": 49983
    },
    {
      "epoch": 0.000305072021484375,
      "step": 49983,
      "training_step_time": 0.6847081184387207
    },
    {
      "epoch": 0.000305078125,
      "model_forward_time": 0.1153557300567627,
      "step": 49984
    },
    {
      "epoch": 0.000305078125,
      "step": 49984,
      "training_step_time": 0.390427827835083
    },
    {
      "epoch": 0.000305084228515625,
      "model_forward_time": 0.11451935768127441,
      "step": 49985
    },
    {
      "epoch": 0.000305084228515625,
      "step": 49985,
      "training_step_time": 0.4843173027038574
    },
    {
      "epoch": 0.00030509033203125,
      "model_forward_time": 0.11479687690734863,
      "step": 49986
    },
    {
      "epoch": 0.00030509033203125,
      "step": 49986,
      "training_step_time": 0.4336421489715576
    },
    {
      "epoch": 0.000305096435546875,
      "model_forward_time": 0.11462116241455078,
      "step": 49987
    },
    {
      "epoch": 0.000305096435546875,
      "step": 49987,
      "training_step_time": 0.39264988899230957
    },
    {
      "epoch": 0.0003051025390625,
      "model_forward_time": 0.11385989189147949,
      "step": 49988
    },
    {
      "epoch": 0.0003051025390625,
      "step": 49988,
      "training_step_time": 0.38658642768859863
    },
    {
      "epoch": 0.000305108642578125,
      "model_forward_time": 0.11519908905029297,
      "step": 49989
    },
    {
      "epoch": 0.000305108642578125,
      "step": 49989,
      "training_step_time": 0.498150110244751
    },
    {
      "epoch": 0.00030511474609375,
      "grad_norm": 0.13378594815731049,
      "learning_rate": 7.418467316412158e-06,
      "loss": 0.0424,
      "step": 49990
    },
    {
      "epoch": 0.00030511474609375,
      "model_forward_time": 0.11485838890075684,
      "step": 49990
    },
    {
      "epoch": 0.00030511474609375,
      "step": 49990,
      "training_step_time": 0.5099923610687256
    },
    {
      "epoch": 0.000305120849609375,
      "model_forward_time": 0.11409568786621094,
      "step": 49991
    },
    {
      "epoch": 0.000305120849609375,
      "step": 49991,
      "training_step_time": 0.44773197174072266
    },
    {
      "epoch": 0.000305126953125,
      "model_forward_time": 0.11481308937072754,
      "step": 49992
    },
    {
      "epoch": 0.000305126953125,
      "step": 49992,
      "training_step_time": 0.46016764640808105
    },
    {
      "epoch": 0.000305133056640625,
      "model_forward_time": 0.11451911926269531,
      "step": 49993
    },
    {
      "epoch": 0.000305133056640625,
      "step": 49993,
      "training_step_time": 0.3911709785461426
    },
    {
      "epoch": 0.00030513916015625,
      "model_forward_time": 0.11466741561889648,
      "step": 49994
    },
    {
      "epoch": 0.00030513916015625,
      "step": 49994,
      "training_step_time": 0.39474987983703613
    },
    {
      "epoch": 0.000305145263671875,
      "model_forward_time": 0.11490082740783691,
      "step": 49995
    },
    {
      "epoch": 0.000305145263671875,
      "step": 49995,
      "training_step_time": 0.408231258392334
    },
    {
      "epoch": 0.0003051513671875,
      "model_forward_time": 0.11438632011413574,
      "step": 49996
    },
    {
      "epoch": 0.0003051513671875,
      "step": 49996,
      "training_step_time": 0.3845031261444092
    },
    {
      "epoch": 0.000305157470703125,
      "model_forward_time": 0.11563277244567871,
      "step": 49997
    },
    {
      "epoch": 0.000305157470703125,
      "step": 49997,
      "training_step_time": 0.4026803970336914
    },
    {
      "epoch": 0.00030516357421875,
      "model_forward_time": 0.11478281021118164,
      "step": 49998
    },
    {
      "epoch": 0.00030516357421875,
      "step": 49998,
      "training_step_time": 0.49297046661376953
    },
    {
      "epoch": 0.000305169677734375,
      "model_forward_time": 0.11578750610351562,
      "step": 49999
    },
    {
      "epoch": 0.000305169677734375,
      "step": 49999,
      "training_step_time": 0.42092370986938477
    },
    {
      "epoch": 0.00030517578125,
      "grad_norm": 0.10993663221597672,
      "learning_rate": 7.404029558083653e-06,
      "loss": 0.0398,
      "step": 50000
    },
    {
      "epoch": 0.00030517578125,
      "model_forward_time": 0.11308646202087402,
      "step": 50000
    },
    {
      "epoch": 0.00030517578125,
      "step": 50000,
      "training_step_time": 0.3570413589477539
    },
    {
      "epoch": 0.000305181884765625,
      "model_forward_time": 0.1125953197479248,
      "step": 50001
    },
    {
      "epoch": 0.000305181884765625,
      "step": 50001,
      "training_step_time": 0.46317410469055176
    },
    {
      "epoch": 0.00030518798828125,
      "model_forward_time": 0.11277174949645996,
      "step": 50002
    },
    {
      "epoch": 0.00030518798828125,
      "step": 50002,
      "training_step_time": 0.3812577724456787
    },
    {
      "epoch": 0.000305194091796875,
      "model_forward_time": 0.11420059204101562,
      "step": 50003
    },
    {
      "epoch": 0.000305194091796875,
      "step": 50003,
      "training_step_time": 0.36681532859802246
    },
    {
      "epoch": 0.0003052001953125,
      "model_forward_time": 0.11424589157104492,
      "step": 50004
    },
    {
      "epoch": 0.0003052001953125,
      "step": 50004,
      "training_step_time": 0.39360523223876953
    },
    {
      "epoch": 0.000305206298828125,
      "model_forward_time": 0.11426687240600586,
      "step": 50005
    },
    {
      "epoch": 0.000305206298828125,
      "step": 50005,
      "training_step_time": 0.44272613525390625
    },
    {
      "epoch": 0.00030521240234375,
      "model_forward_time": 0.11471033096313477,
      "step": 50006
    },
    {
      "epoch": 0.00030521240234375,
      "step": 50006,
      "training_step_time": 0.4658851623535156
    },
    {
      "epoch": 0.000305218505859375,
      "model_forward_time": 0.11455345153808594,
      "step": 50007
    },
    {
      "epoch": 0.000305218505859375,
      "step": 50007,
      "training_step_time": 0.5211372375488281
    },
    {
      "epoch": 0.000305224609375,
      "model_forward_time": 0.1145334243774414,
      "step": 50008
    },
    {
      "epoch": 0.000305224609375,
      "step": 50008,
      "training_step_time": 0.38699769973754883
    },
    {
      "epoch": 0.000305230712890625,
      "model_forward_time": 0.11510634422302246,
      "step": 50009
    },
    {
      "epoch": 0.000305230712890625,
      "step": 50009,
      "training_step_time": 0.38400697708129883
    },
    {
      "epoch": 0.00030523681640625,
      "grad_norm": 0.10663838684558868,
      "learning_rate": 7.389604739286271e-06,
      "loss": 0.037,
      "step": 50010
    },
    {
      "epoch": 0.00030523681640625,
      "model_forward_time": 0.11462998390197754,
      "step": 50010
    },
    {
      "epoch": 0.00030523681640625,
      "step": 50010,
      "training_step_time": 0.393934965133667
    },
    {
      "epoch": 0.000305242919921875,
      "model_forward_time": 0.11467552185058594,
      "step": 50011
    },
    {
      "epoch": 0.000305242919921875,
      "step": 50011,
      "training_step_time": 0.3912334442138672
    },
    {
      "epoch": 0.0003052490234375,
      "model_forward_time": 0.11488080024719238,
      "step": 50012
    },
    {
      "epoch": 0.0003052490234375,
      "step": 50012,
      "training_step_time": 0.38837695121765137
    },
    {
      "epoch": 0.000305255126953125,
      "model_forward_time": 0.11436104774475098,
      "step": 50013
    },
    {
      "epoch": 0.000305255126953125,
      "step": 50013,
      "training_step_time": 0.40886759757995605
    },
    {
      "epoch": 0.00030526123046875,
      "model_forward_time": 0.11541414260864258,
      "step": 50014
    },
    {
      "epoch": 0.00030526123046875,
      "step": 50014,
      "training_step_time": 0.4419543743133545
    },
    {
      "epoch": 0.000305267333984375,
      "model_forward_time": 0.11464595794677734,
      "step": 50015
    },
    {
      "epoch": 0.000305267333984375,
      "step": 50015,
      "training_step_time": 0.4528350830078125
    },
    {
      "epoch": 0.0003052734375,
      "model_forward_time": 0.11490678787231445,
      "step": 50016
    },
    {
      "epoch": 0.0003052734375,
      "step": 50016,
      "training_step_time": 0.4270787239074707
    },
    {
      "epoch": 0.000305279541015625,
      "model_forward_time": 0.11514711380004883,
      "step": 50017
    },
    {
      "epoch": 0.000305279541015625,
      "step": 50017,
      "training_step_time": 0.40020155906677246
    },
    {
      "epoch": 0.00030528564453125,
      "model_forward_time": 0.11631584167480469,
      "step": 50018
    },
    {
      "epoch": 0.00030528564453125,
      "step": 50018,
      "training_step_time": 0.4088923931121826
    },
    {
      "epoch": 0.000305291748046875,
      "model_forward_time": 0.1150350570678711,
      "step": 50019
    },
    {
      "epoch": 0.000305291748046875,
      "step": 50019,
      "training_step_time": 0.5097005367279053
    },
    {
      "epoch": 0.0003052978515625,
      "grad_norm": 0.13321055471897125,
      "learning_rate": 7.375192864401931e-06,
      "loss": 0.0347,
      "step": 50020
    },
    {
      "epoch": 0.0003052978515625,
      "model_forward_time": 0.11518430709838867,
      "step": 50020
    },
    {
      "epoch": 0.0003052978515625,
      "step": 50020,
      "training_step_time": 0.4365708827972412
    },
    {
      "epoch": 0.000305303955078125,
      "model_forward_time": 0.11537337303161621,
      "step": 50021
    },
    {
      "epoch": 0.000305303955078125,
      "step": 50021,
      "training_step_time": 0.42635369300842285
    },
    {
      "epoch": 0.00030531005859375,
      "model_forward_time": 0.11484718322753906,
      "step": 50022
    },
    {
      "epoch": 0.00030531005859375,
      "step": 50022,
      "training_step_time": 0.42464637756347656
    },
    {
      "epoch": 0.000305316162109375,
      "model_forward_time": 0.11449313163757324,
      "step": 50023
    },
    {
      "epoch": 0.000305316162109375,
      "step": 50023,
      "training_step_time": 0.40064334869384766
    },
    {
      "epoch": 0.000305322265625,
      "model_forward_time": 0.11484122276306152,
      "step": 50024
    },
    {
      "epoch": 0.000305322265625,
      "step": 50024,
      "training_step_time": 0.38460564613342285
    },
    {
      "epoch": 0.000305328369140625,
      "model_forward_time": 0.11487722396850586,
      "step": 50025
    },
    {
      "epoch": 0.000305328369140625,
      "step": 50025,
      "training_step_time": 0.3986694812774658
    },
    {
      "epoch": 0.00030533447265625,
      "model_forward_time": 0.11489343643188477,
      "step": 50026
    },
    {
      "epoch": 0.00030533447265625,
      "step": 50026,
      "training_step_time": 0.400179386138916
    },
    {
      "epoch": 0.000305340576171875,
      "model_forward_time": 0.11530351638793945,
      "step": 50027
    },
    {
      "epoch": 0.000305340576171875,
      "step": 50027,
      "training_step_time": 0.40241241455078125
    },
    {
      "epoch": 0.0003053466796875,
      "model_forward_time": 0.1153254508972168,
      "step": 50028
    },
    {
      "epoch": 0.0003053466796875,
      "step": 50028,
      "training_step_time": 0.40062546730041504
    },
    {
      "epoch": 0.000305352783203125,
      "model_forward_time": 0.11581778526306152,
      "step": 50029
    },
    {
      "epoch": 0.000305352783203125,
      "step": 50029,
      "training_step_time": 0.4085094928741455
    },
    {
      "epoch": 0.00030535888671875,
      "grad_norm": 0.09720815718173981,
      "learning_rate": 7.36079393780853e-06,
      "loss": 0.0365,
      "step": 50030
    },
    {
      "epoch": 0.00030535888671875,
      "model_forward_time": 0.11505842208862305,
      "step": 50030
    },
    {
      "epoch": 0.00030535888671875,
      "step": 50030,
      "training_step_time": 0.42693138122558594
    },
    {
      "epoch": 0.000305364990234375,
      "model_forward_time": 0.11463546752929688,
      "step": 50031
    },
    {
      "epoch": 0.000305364990234375,
      "step": 50031,
      "training_step_time": 0.44086742401123047
    },
    {
      "epoch": 0.00030537109375,
      "model_forward_time": 0.11535239219665527,
      "step": 50032
    },
    {
      "epoch": 0.00030537109375,
      "step": 50032,
      "training_step_time": 0.39998769760131836
    },
    {
      "epoch": 0.000305377197265625,
      "model_forward_time": 0.1144716739654541,
      "step": 50033
    },
    {
      "epoch": 0.000305377197265625,
      "step": 50033,
      "training_step_time": 0.43132758140563965
    },
    {
      "epoch": 0.00030538330078125,
      "model_forward_time": 0.11532711982727051,
      "step": 50034
    },
    {
      "epoch": 0.00030538330078125,
      "step": 50034,
      "training_step_time": 0.41005635261535645
    },
    {
      "epoch": 0.000305389404296875,
      "model_forward_time": 0.11566805839538574,
      "step": 50035
    },
    {
      "epoch": 0.000305389404296875,
      "step": 50035,
      "training_step_time": 0.401885986328125
    },
    {
      "epoch": 0.0003053955078125,
      "model_forward_time": 0.1152944564819336,
      "step": 50036
    },
    {
      "epoch": 0.0003053955078125,
      "step": 50036,
      "training_step_time": 0.4052426815032959
    },
    {
      "epoch": 0.000305401611328125,
      "model_forward_time": 0.11505722999572754,
      "step": 50037
    },
    {
      "epoch": 0.000305401611328125,
      "step": 50037,
      "training_step_time": 0.43939661979675293
    },
    {
      "epoch": 0.00030540771484375,
      "model_forward_time": 0.11505293846130371,
      "step": 50038
    },
    {
      "epoch": 0.00030540771484375,
      "step": 50038,
      "training_step_time": 0.4103269577026367
    },
    {
      "epoch": 0.000305413818359375,
      "model_forward_time": 0.1145939826965332,
      "step": 50039
    },
    {
      "epoch": 0.000305413818359375,
      "step": 50039,
      "training_step_time": 0.4047079086303711
    },
    {
      "epoch": 0.000305419921875,
      "grad_norm": 0.14341139793395996,
      "learning_rate": 7.3464079638801365e-06,
      "loss": 0.0356,
      "step": 50040
    },
    {
      "epoch": 0.000305419921875,
      "model_forward_time": 0.11461734771728516,
      "step": 50040
    },
    {
      "epoch": 0.000305419921875,
      "step": 50040,
      "training_step_time": 0.384509801864624
    },
    {
      "epoch": 0.000305426025390625,
      "model_forward_time": 0.1148061752319336,
      "step": 50041
    },
    {
      "epoch": 0.000305426025390625,
      "step": 50041,
      "training_step_time": 0.40274596214294434
    },
    {
      "epoch": 0.00030543212890625,
      "model_forward_time": 0.11508965492248535,
      "step": 50042
    },
    {
      "epoch": 0.00030543212890625,
      "step": 50042,
      "training_step_time": 0.40146613121032715
    },
    {
      "epoch": 0.000305438232421875,
      "model_forward_time": 0.11509490013122559,
      "step": 50043
    },
    {
      "epoch": 0.000305438232421875,
      "step": 50043,
      "training_step_time": 0.42367982864379883
    },
    {
      "epoch": 0.0003054443359375,
      "model_forward_time": 0.1150665283203125,
      "step": 50044
    },
    {
      "epoch": 0.0003054443359375,
      "step": 50044,
      "training_step_time": 0.39292407035827637
    },
    {
      "epoch": 0.000305450439453125,
      "model_forward_time": 0.11591172218322754,
      "step": 50045
    },
    {
      "epoch": 0.000305450439453125,
      "step": 50045,
      "training_step_time": 0.4081244468688965
    },
    {
      "epoch": 0.00030545654296875,
      "model_forward_time": 0.11500954627990723,
      "step": 50046
    },
    {
      "epoch": 0.00030545654296875,
      "step": 50046,
      "training_step_time": 0.39220237731933594
    },
    {
      "epoch": 0.000305462646484375,
      "model_forward_time": 0.11521339416503906,
      "step": 50047
    },
    {
      "epoch": 0.000305462646484375,
      "step": 50047,
      "training_step_time": 0.40076160430908203
    },
    {
      "epoch": 0.00030546875,
      "model_forward_time": 0.11535525321960449,
      "step": 50048
    },
    {
      "epoch": 0.00030546875,
      "step": 50048,
      "training_step_time": 0.5088388919830322
    },
    {
      "epoch": 0.000305474853515625,
      "model_forward_time": 0.1155400276184082,
      "step": 50049
    },
    {
      "epoch": 0.000305474853515625,
      "step": 50049,
      "training_step_time": 0.5070116519927979
    },
    {
      "epoch": 0.00030548095703125,
      "grad_norm": 0.07801621407270432,
      "learning_rate": 7.332034946986771e-06,
      "loss": 0.0378,
      "step": 50050
    },
    {
      "epoch": 0.00030548095703125,
      "model_forward_time": 0.1149590015411377,
      "step": 50050
    },
    {
      "epoch": 0.00030548095703125,
      "step": 50050,
      "training_step_time": 0.3913288116455078
    },
    {
      "epoch": 0.000305487060546875,
      "model_forward_time": 0.11501312255859375,
      "step": 50051
    },
    {
      "epoch": 0.000305487060546875,
      "step": 50051,
      "training_step_time": 0.46791934967041016
    },
    {
      "epoch": 0.0003054931640625,
      "model_forward_time": 0.11466217041015625,
      "step": 50052
    },
    {
      "epoch": 0.0003054931640625,
      "step": 50052,
      "training_step_time": 0.45763540267944336
    },
    {
      "epoch": 0.000305499267578125,
      "model_forward_time": 0.1143944263458252,
      "step": 50053
    },
    {
      "epoch": 0.000305499267578125,
      "step": 50053,
      "training_step_time": 0.38997364044189453
    },
    {
      "epoch": 0.00030550537109375,
      "model_forward_time": 0.1147007942199707,
      "step": 50054
    },
    {
      "epoch": 0.00030550537109375,
      "step": 50054,
      "training_step_time": 0.3926515579223633
    },
    {
      "epoch": 0.000305511474609375,
      "model_forward_time": 0.11449456214904785,
      "step": 50055
    },
    {
      "epoch": 0.000305511474609375,
      "step": 50055,
      "training_step_time": 0.38488173484802246
    },
    {
      "epoch": 0.000305517578125,
      "model_forward_time": 0.11504530906677246,
      "step": 50056
    },
    {
      "epoch": 0.000305517578125,
      "step": 50056,
      "training_step_time": 0.3943636417388916
    },
    {
      "epoch": 0.000305523681640625,
      "model_forward_time": 0.11533689498901367,
      "step": 50057
    },
    {
      "epoch": 0.000305523681640625,
      "step": 50057,
      "training_step_time": 0.4106762409210205
    },
    {
      "epoch": 0.00030552978515625,
      "model_forward_time": 0.11554098129272461,
      "step": 50058
    },
    {
      "epoch": 0.00030552978515625,
      "step": 50058,
      "training_step_time": 0.4317142963409424
    },
    {
      "epoch": 0.000305535888671875,
      "model_forward_time": 0.115509033203125,
      "step": 50059
    },
    {
      "epoch": 0.000305535888671875,
      "step": 50059,
      "training_step_time": 0.4029805660247803
    },
    {
      "epoch": 0.0003055419921875,
      "grad_norm": 0.08682224899530411,
      "learning_rate": 7.317674891494625e-06,
      "loss": 0.036,
      "step": 50060
    },
    {
      "epoch": 0.0003055419921875,
      "model_forward_time": 0.11496758460998535,
      "step": 50060
    },
    {
      "epoch": 0.0003055419921875,
      "step": 50060,
      "training_step_time": 0.38916969299316406
    },
    {
      "epoch": 0.000305548095703125,
      "model_forward_time": 0.11484718322753906,
      "step": 50061
    },
    {
      "epoch": 0.000305548095703125,
      "step": 50061,
      "training_step_time": 0.3867354393005371
    },
    {
      "epoch": 0.00030555419921875,
      "model_forward_time": 0.11454200744628906,
      "step": 50062
    },
    {
      "epoch": 0.00030555419921875,
      "step": 50062,
      "training_step_time": 0.3678629398345947
    },
    {
      "epoch": 0.000305560302734375,
      "model_forward_time": 0.1149454116821289,
      "step": 50063
    },
    {
      "epoch": 0.000305560302734375,
      "step": 50063,
      "training_step_time": 0.42905664443969727
    },
    {
      "epoch": 0.00030556640625,
      "model_forward_time": 0.11489081382751465,
      "step": 50064
    },
    {
      "epoch": 0.00030556640625,
      "step": 50064,
      "training_step_time": 0.45205259323120117
    },
    {
      "epoch": 0.000305572509765625,
      "model_forward_time": 0.11516475677490234,
      "step": 50065
    },
    {
      "epoch": 0.000305572509765625,
      "step": 50065,
      "training_step_time": 0.43843793869018555
    },
    {
      "epoch": 0.00030557861328125,
      "model_forward_time": 0.1143636703491211,
      "step": 50066
    },
    {
      "epoch": 0.00030557861328125,
      "step": 50066,
      "training_step_time": 0.4672205448150635
    },
    {
      "epoch": 0.000305584716796875,
      "model_forward_time": 0.11508488655090332,
      "step": 50067
    },
    {
      "epoch": 0.000305584716796875,
      "step": 50067,
      "training_step_time": 0.4085252285003662
    },
    {
      "epoch": 0.0003055908203125,
      "model_forward_time": 0.11457109451293945,
      "step": 50068
    },
    {
      "epoch": 0.0003055908203125,
      "step": 50068,
      "training_step_time": 0.39661216735839844
    },
    {
      "epoch": 0.000305596923828125,
      "model_forward_time": 0.11438775062561035,
      "step": 50069
    },
    {
      "epoch": 0.000305596923828125,
      "step": 50069,
      "training_step_time": 0.3986847400665283
    },
    {
      "epoch": 0.00030560302734375,
      "grad_norm": 0.09479870647192001,
      "learning_rate": 7.30332780176588e-06,
      "loss": 0.0358,
      "step": 50070
    },
    {
      "epoch": 0.00030560302734375,
      "model_forward_time": 0.11451172828674316,
      "step": 50070
    },
    {
      "epoch": 0.00030560302734375,
      "step": 50070,
      "training_step_time": 0.3942127227783203
    },
    {
      "epoch": 0.000305609130859375,
      "model_forward_time": 0.1146399974822998,
      "step": 50071
    },
    {
      "epoch": 0.000305609130859375,
      "step": 50071,
      "training_step_time": 0.4534115791320801
    },
    {
      "epoch": 0.000305615234375,
      "model_forward_time": 0.11467933654785156,
      "step": 50072
    },
    {
      "epoch": 0.000305615234375,
      "step": 50072,
      "training_step_time": 0.44434475898742676
    },
    {
      "epoch": 0.000305621337890625,
      "model_forward_time": 0.11482620239257812,
      "step": 50073
    },
    {
      "epoch": 0.000305621337890625,
      "step": 50073,
      "training_step_time": 0.4492018222808838
    },
    {
      "epoch": 0.00030562744140625,
      "model_forward_time": 0.11533284187316895,
      "step": 50074
    },
    {
      "epoch": 0.00030562744140625,
      "step": 50074,
      "training_step_time": 0.40306663513183594
    },
    {
      "epoch": 0.000305633544921875,
      "model_forward_time": 0.114593505859375,
      "step": 50075
    },
    {
      "epoch": 0.000305633544921875,
      "step": 50075,
      "training_step_time": 0.38727498054504395
    },
    {
      "epoch": 0.0003056396484375,
      "model_forward_time": 0.11516952514648438,
      "step": 50076
    },
    {
      "epoch": 0.0003056396484375,
      "step": 50076,
      "training_step_time": 0.4019649028778076
    },
    {
      "epoch": 0.000305645751953125,
      "model_forward_time": 0.1150667667388916,
      "step": 50077
    },
    {
      "epoch": 0.000305645751953125,
      "step": 50077,
      "training_step_time": 0.3859443664550781
    },
    {
      "epoch": 0.00030565185546875,
      "model_forward_time": 0.11480832099914551,
      "step": 50078
    },
    {
      "epoch": 0.00030565185546875,
      "step": 50078,
      "training_step_time": 0.47307252883911133
    },
    {
      "epoch": 0.000305657958984375,
      "model_forward_time": 0.11403226852416992,
      "step": 50079
    },
    {
      "epoch": 0.000305657958984375,
      "step": 50079,
      "training_step_time": 0.7121841907501221
    },
    {
      "epoch": 0.0003056640625,
      "grad_norm": 0.10272778570652008,
      "learning_rate": 7.2889936821588125e-06,
      "loss": 0.0389,
      "step": 50080
    },
    {
      "epoch": 0.0003056640625,
      "model_forward_time": 0.11439728736877441,
      "step": 50080
    },
    {
      "epoch": 0.0003056640625,
      "step": 50080,
      "training_step_time": 0.4090743064880371
    },
    {
      "epoch": 0.000305670166015625,
      "model_forward_time": 0.11414313316345215,
      "step": 50081
    },
    {
      "epoch": 0.000305670166015625,
      "step": 50081,
      "training_step_time": 0.4982304573059082
    },
    {
      "epoch": 0.00030567626953125,
      "model_forward_time": 0.1135857105255127,
      "step": 50082
    },
    {
      "epoch": 0.00030567626953125,
      "step": 50082,
      "training_step_time": 0.3884563446044922
    },
    {
      "epoch": 0.000305682373046875,
      "model_forward_time": 0.11435198783874512,
      "step": 50083
    },
    {
      "epoch": 0.000305682373046875,
      "step": 50083,
      "training_step_time": 0.3910682201385498
    },
    {
      "epoch": 0.0003056884765625,
      "model_forward_time": 0.11411929130554199,
      "step": 50084
    },
    {
      "epoch": 0.0003056884765625,
      "step": 50084,
      "training_step_time": 0.3994414806365967
    },
    {
      "epoch": 0.000305694580078125,
      "model_forward_time": 0.11565089225769043,
      "step": 50085
    },
    {
      "epoch": 0.000305694580078125,
      "step": 50085,
      "training_step_time": 0.696542501449585
    },
    {
      "epoch": 0.00030570068359375,
      "model_forward_time": 0.11419320106506348,
      "step": 50086
    },
    {
      "epoch": 0.00030570068359375,
      "step": 50086,
      "training_step_time": 0.4299437999725342
    },
    {
      "epoch": 0.000305706787109375,
      "model_forward_time": 0.11458468437194824,
      "step": 50087
    },
    {
      "epoch": 0.000305706787109375,
      "step": 50087,
      "training_step_time": 0.46267247200012207
    },
    {
      "epoch": 0.000305712890625,
      "model_forward_time": 0.11470532417297363,
      "step": 50088
    },
    {
      "epoch": 0.000305712890625,
      "step": 50088,
      "training_step_time": 0.387087345123291
    },
    {
      "epoch": 0.000305718994140625,
      "model_forward_time": 0.11444258689880371,
      "step": 50089
    },
    {
      "epoch": 0.000305718994140625,
      "step": 50089,
      "training_step_time": 0.40172791481018066
    },
    {
      "epoch": 0.00030572509765625,
      "grad_norm": 0.07987115532159805,
      "learning_rate": 7.2746725370277435e-06,
      "loss": 0.0331,
      "step": 50090
    },
    {
      "epoch": 0.00030572509765625,
      "model_forward_time": 0.11392974853515625,
      "step": 50090
    },
    {
      "epoch": 0.00030572509765625,
      "step": 50090,
      "training_step_time": 0.40831875801086426
    },
    {
      "epoch": 0.000305731201171875,
      "model_forward_time": 0.11492252349853516,
      "step": 50091
    },
    {
      "epoch": 0.000305731201171875,
      "step": 50091,
      "training_step_time": 1.1578190326690674
    },
    {
      "epoch": 0.0003057373046875,
      "model_forward_time": 0.11397266387939453,
      "step": 50092
    },
    {
      "epoch": 0.0003057373046875,
      "step": 50092,
      "training_step_time": 0.458477258682251
    },
    {
      "epoch": 0.000305743408203125,
      "model_forward_time": 0.11375808715820312,
      "step": 50093
    },
    {
      "epoch": 0.000305743408203125,
      "step": 50093,
      "training_step_time": 0.46178388595581055
    },
    {
      "epoch": 0.00030574951171875,
      "model_forward_time": 0.11345887184143066,
      "step": 50094
    },
    {
      "epoch": 0.00030574951171875,
      "step": 50094,
      "training_step_time": 0.38422560691833496
    },
    {
      "epoch": 0.000305755615234375,
      "model_forward_time": 0.11383628845214844,
      "step": 50095
    },
    {
      "epoch": 0.000305755615234375,
      "step": 50095,
      "training_step_time": 0.4026012420654297
    },
    {
      "epoch": 0.00030576171875,
      "model_forward_time": 0.11410927772521973,
      "step": 50096
    },
    {
      "epoch": 0.00030576171875,
      "step": 50096,
      "training_step_time": 0.39666032791137695
    },
    {
      "epoch": 0.000305767822265625,
      "model_forward_time": 0.11393857002258301,
      "step": 50097
    },
    {
      "epoch": 0.000305767822265625,
      "step": 50097,
      "training_step_time": 1.0404045581817627
    },
    {
      "epoch": 0.00030577392578125,
      "model_forward_time": 0.113616943359375,
      "step": 50098
    },
    {
      "epoch": 0.00030577392578125,
      "step": 50098,
      "training_step_time": 0.4153463840484619
    },
    {
      "epoch": 0.000305780029296875,
      "model_forward_time": 0.11354541778564453,
      "step": 50099
    },
    {
      "epoch": 0.000305780029296875,
      "step": 50099,
      "training_step_time": 0.38350439071655273
    },
    {
      "epoch": 0.0003057861328125,
      "grad_norm": 0.09140656888484955,
      "learning_rate": 7.260364370723044e-06,
      "loss": 0.033,
      "step": 50100
    },
    {
      "epoch": 0.0003057861328125,
      "model_forward_time": 0.11440086364746094,
      "step": 50100
    },
    {
      "epoch": 0.0003057861328125,
      "step": 50100,
      "training_step_time": 0.38933682441711426
    },
    {
      "epoch": 0.000305792236328125,
      "model_forward_time": 0.11383271217346191,
      "step": 50101
    },
    {
      "epoch": 0.000305792236328125,
      "step": 50101,
      "training_step_time": 0.3908576965332031
    },
    {
      "epoch": 0.00030579833984375,
      "model_forward_time": 0.11426687240600586,
      "step": 50102
    },
    {
      "epoch": 0.00030579833984375,
      "step": 50102,
      "training_step_time": 0.39415621757507324
    },
    {
      "epoch": 0.000305804443359375,
      "model_forward_time": 0.11472439765930176,
      "step": 50103
    },
    {
      "epoch": 0.000305804443359375,
      "step": 50103,
      "training_step_time": 0.9009714126586914
    },
    {
      "epoch": 0.000305810546875,
      "model_forward_time": 0.11454248428344727,
      "step": 50104
    },
    {
      "epoch": 0.000305810546875,
      "step": 50104,
      "training_step_time": 0.39305758476257324
    },
    {
      "epoch": 0.000305816650390625,
      "model_forward_time": 0.11371231079101562,
      "step": 50105
    },
    {
      "epoch": 0.000305816650390625,
      "step": 50105,
      "training_step_time": 0.4268791675567627
    },
    {
      "epoch": 0.00030582275390625,
      "model_forward_time": 0.11404109001159668,
      "step": 50106
    },
    {
      "epoch": 0.00030582275390625,
      "step": 50106,
      "training_step_time": 0.4678182601928711
    },
    {
      "epoch": 0.000305828857421875,
      "model_forward_time": 0.11429309844970703,
      "step": 50107
    },
    {
      "epoch": 0.000305828857421875,
      "step": 50107,
      "training_step_time": 0.3965635299682617
    },
    {
      "epoch": 0.0003058349609375,
      "model_forward_time": 0.11417078971862793,
      "step": 50108
    },
    {
      "epoch": 0.0003058349609375,
      "step": 50108,
      "training_step_time": 0.4096338748931885
    },
    {
      "epoch": 0.000305841064453125,
      "model_forward_time": 0.11509418487548828,
      "step": 50109
    },
    {
      "epoch": 0.000305841064453125,
      "step": 50109,
      "training_step_time": 0.9702177047729492
    },
    {
      "epoch": 0.00030584716796875,
      "grad_norm": 0.10448186099529266,
      "learning_rate": 7.246069187591204e-06,
      "loss": 0.0388,
      "step": 50110
    },
    {
      "epoch": 0.00030584716796875,
      "model_forward_time": 0.11398863792419434,
      "step": 50110
    },
    {
      "epoch": 0.00030584716796875,
      "step": 50110,
      "training_step_time": 0.3980271816253662
    },
    {
      "epoch": 0.000305853271484375,
      "model_forward_time": 0.1148991584777832,
      "step": 50111
    },
    {
      "epoch": 0.000305853271484375,
      "step": 50111,
      "training_step_time": 0.4323105812072754
    },
    {
      "epoch": 0.000305859375,
      "model_forward_time": 0.1139988899230957,
      "step": 50112
    },
    {
      "epoch": 0.000305859375,
      "step": 50112,
      "training_step_time": 0.39232635498046875
    },
    {
      "epoch": 0.000305865478515625,
      "model_forward_time": 0.1138308048248291,
      "step": 50113
    },
    {
      "epoch": 0.000305865478515625,
      "step": 50113,
      "training_step_time": 0.3890087604522705
    },
    {
      "epoch": 0.00030587158203125,
      "model_forward_time": 0.11407756805419922,
      "step": 50114
    },
    {
      "epoch": 0.00030587158203125,
      "step": 50114,
      "training_step_time": 0.38907647132873535
    },
    {
      "epoch": 0.000305877685546875,
      "model_forward_time": 0.11445140838623047,
      "step": 50115
    },
    {
      "epoch": 0.000305877685546875,
      "step": 50115,
      "training_step_time": 0.9214656352996826
    },
    {
      "epoch": 0.0003058837890625,
      "model_forward_time": 0.11476993560791016,
      "step": 50116
    },
    {
      "epoch": 0.0003058837890625,
      "step": 50116,
      "training_step_time": 0.39157962799072266
    },
    {
      "epoch": 0.000305889892578125,
      "model_forward_time": 0.11431431770324707,
      "step": 50117
    },
    {
      "epoch": 0.000305889892578125,
      "step": 50117,
      "training_step_time": 0.4552016258239746
    },
    {
      "epoch": 0.00030589599609375,
      "model_forward_time": 0.11452293395996094,
      "step": 50118
    },
    {
      "epoch": 0.00030589599609375,
      "step": 50118,
      "training_step_time": 0.41465044021606445
    },
    {
      "epoch": 0.000305902099609375,
      "model_forward_time": 0.1136484146118164,
      "step": 50119
    },
    {
      "epoch": 0.000305902099609375,
      "step": 50119,
      "training_step_time": 0.4574930667877197
    },
    {
      "epoch": 0.000305908203125,
      "grad_norm": 0.1055067852139473,
      "learning_rate": 7.2317869919746705e-06,
      "loss": 0.0369,
      "step": 50120
    },
    {
      "epoch": 0.000305908203125,
      "model_forward_time": 0.11412835121154785,
      "step": 50120
    },
    {
      "epoch": 0.000305908203125,
      "step": 50120,
      "training_step_time": 0.3896169662475586
    },
    {
      "epoch": 0.000305914306640625,
      "model_forward_time": 0.11463785171508789,
      "step": 50121
    },
    {
      "epoch": 0.000305914306640625,
      "step": 50121,
      "training_step_time": 1.1367149353027344
    },
    {
      "epoch": 0.00030592041015625,
      "model_forward_time": 0.11358928680419922,
      "step": 50122
    },
    {
      "epoch": 0.00030592041015625,
      "step": 50122,
      "training_step_time": 0.3783607482910156
    },
    {
      "epoch": 0.000305926513671875,
      "model_forward_time": 0.11349701881408691,
      "step": 50123
    },
    {
      "epoch": 0.000305926513671875,
      "step": 50123,
      "training_step_time": 0.40262722969055176
    },
    {
      "epoch": 0.0003059326171875,
      "model_forward_time": 0.11497831344604492,
      "step": 50124
    },
    {
      "epoch": 0.0003059326171875,
      "step": 50124,
      "training_step_time": 0.3901975154876709
    },
    {
      "epoch": 0.000305938720703125,
      "model_forward_time": 0.11362957954406738,
      "step": 50125
    },
    {
      "epoch": 0.000305938720703125,
      "step": 50125,
      "training_step_time": 0.3831665515899658
    },
    {
      "epoch": 0.00030594482421875,
      "model_forward_time": 0.11352252960205078,
      "step": 50126
    },
    {
      "epoch": 0.00030594482421875,
      "step": 50126,
      "training_step_time": 0.38518357276916504
    },
    {
      "epoch": 0.000305950927734375,
      "model_forward_time": 0.11506056785583496,
      "step": 50127
    },
    {
      "epoch": 0.000305950927734375,
      "step": 50127,
      "training_step_time": 0.5449340343475342
    },
    {
      "epoch": 0.00030595703125,
      "model_forward_time": 0.11444902420043945,
      "step": 50128
    },
    {
      "epoch": 0.00030595703125,
      "step": 50128,
      "training_step_time": 0.42778730392456055
    },
    {
      "epoch": 0.000305963134765625,
      "model_forward_time": 0.11444091796875,
      "step": 50129
    },
    {
      "epoch": 0.000305963134765625,
      "step": 50129,
      "training_step_time": 0.4309396743774414
    },
    {
      "epoch": 0.00030596923828125,
      "grad_norm": 0.0978771448135376,
      "learning_rate": 7.217517788212025e-06,
      "loss": 0.0322,
      "step": 50130
    },
    {
      "epoch": 0.00030596923828125,
      "model_forward_time": 0.11494970321655273,
      "step": 50130
    },
    {
      "epoch": 0.00030596923828125,
      "step": 50130,
      "training_step_time": 0.467667818069458
    },
    {
      "epoch": 0.000305975341796875,
      "model_forward_time": 0.11461782455444336,
      "step": 50131
    },
    {
      "epoch": 0.000305975341796875,
      "step": 50131,
      "training_step_time": 0.43157076835632324
    },
    {
      "epoch": 0.0003059814453125,
      "model_forward_time": 0.11493110656738281,
      "step": 50132
    },
    {
      "epoch": 0.0003059814453125,
      "step": 50132,
      "training_step_time": 0.43413257598876953
    },
    {
      "epoch": 0.000305987548828125,
      "model_forward_time": 0.11502885818481445,
      "step": 50133
    },
    {
      "epoch": 0.000305987548828125,
      "step": 50133,
      "training_step_time": 0.4839029312133789
    },
    {
      "epoch": 0.00030599365234375,
      "model_forward_time": 0.11469602584838867,
      "step": 50134
    },
    {
      "epoch": 0.00030599365234375,
      "step": 50134,
      "training_step_time": 0.3967413902282715
    },
    {
      "epoch": 0.000305999755859375,
      "model_forward_time": 0.11473369598388672,
      "step": 50135
    },
    {
      "epoch": 0.000305999755859375,
      "step": 50135,
      "training_step_time": 0.4237534999847412
    },
    {
      "epoch": 0.000306005859375,
      "model_forward_time": 0.11500668525695801,
      "step": 50136
    },
    {
      "epoch": 0.000306005859375,
      "step": 50136,
      "training_step_time": 0.4219491481781006
    },
    {
      "epoch": 0.000306011962890625,
      "model_forward_time": 0.11394453048706055,
      "step": 50137
    },
    {
      "epoch": 0.000306011962890625,
      "step": 50137,
      "training_step_time": 0.4826631546020508
    },
    {
      "epoch": 0.00030601806640625,
      "model_forward_time": 0.11452770233154297,
      "step": 50138
    },
    {
      "epoch": 0.00030601806640625,
      "step": 50138,
      "training_step_time": 0.3883795738220215
    },
    {
      "epoch": 0.000306024169921875,
      "model_forward_time": 0.11473488807678223,
      "step": 50139
    },
    {
      "epoch": 0.000306024169921875,
      "step": 50139,
      "training_step_time": 0.5190403461456299
    },
    {
      "epoch": 0.0003060302734375,
      "grad_norm": 0.08307375758886337,
      "learning_rate": 7.203261580637877e-06,
      "loss": 0.0351,
      "step": 50140
    },
    {
      "epoch": 0.0003060302734375,
      "model_forward_time": 0.11534333229064941,
      "step": 50140
    },
    {
      "epoch": 0.0003060302734375,
      "step": 50140,
      "training_step_time": 0.41027331352233887
    },
    {
      "epoch": 0.000306036376953125,
      "model_forward_time": 0.11481642723083496,
      "step": 50141
    },
    {
      "epoch": 0.000306036376953125,
      "step": 50141,
      "training_step_time": 0.3995785713195801
    },
    {
      "epoch": 0.00030604248046875,
      "model_forward_time": 0.1147608757019043,
      "step": 50142
    },
    {
      "epoch": 0.00030604248046875,
      "step": 50142,
      "training_step_time": 0.43268680572509766
    },
    {
      "epoch": 0.000306048583984375,
      "model_forward_time": 0.11555814743041992,
      "step": 50143
    },
    {
      "epoch": 0.000306048583984375,
      "step": 50143,
      "training_step_time": 0.42934513092041016
    },
    {
      "epoch": 0.0003060546875,
      "model_forward_time": 0.11424851417541504,
      "step": 50144
    },
    {
      "epoch": 0.0003060546875,
      "step": 50144,
      "training_step_time": 0.4746541976928711
    },
    {
      "epoch": 0.000306060791015625,
      "model_forward_time": 0.11532068252563477,
      "step": 50145
    },
    {
      "epoch": 0.000306060791015625,
      "step": 50145,
      "training_step_time": 0.7073137760162354
    },
    {
      "epoch": 0.00030606689453125,
      "model_forward_time": 0.11411046981811523,
      "step": 50146
    },
    {
      "epoch": 0.00030606689453125,
      "step": 50146,
      "training_step_time": 0.3906548023223877
    },
    {
      "epoch": 0.000306072998046875,
      "model_forward_time": 0.1139371395111084,
      "step": 50147
    },
    {
      "epoch": 0.000306072998046875,
      "step": 50147,
      "training_step_time": 0.3790607452392578
    },
    {
      "epoch": 0.0003060791015625,
      "model_forward_time": 0.11461901664733887,
      "step": 50148
    },
    {
      "epoch": 0.0003060791015625,
      "step": 50148,
      "training_step_time": 0.38877296447753906
    },
    {
      "epoch": 0.000306085205078125,
      "model_forward_time": 0.11431097984313965,
      "step": 50149
    },
    {
      "epoch": 0.000306085205078125,
      "step": 50149,
      "training_step_time": 0.48712921142578125
    },
    {
      "epoch": 0.00030609130859375,
      "grad_norm": 0.09141196310520172,
      "learning_rate": 7.189018373582873e-06,
      "loss": 0.0392,
      "step": 50150
    },
    {
      "epoch": 0.00030609130859375,
      "model_forward_time": 0.11460208892822266,
      "step": 50150
    },
    {
      "epoch": 0.00030609130859375,
      "step": 50150,
      "training_step_time": 0.38900160789489746
    },
    {
      "epoch": 0.000306097412109375,
      "model_forward_time": 0.11493229866027832,
      "step": 50151
    },
    {
      "epoch": 0.000306097412109375,
      "step": 50151,
      "training_step_time": 0.5603270530700684
    },
    {
      "epoch": 0.000306103515625,
      "model_forward_time": 0.11507344245910645,
      "step": 50152
    },
    {
      "epoch": 0.000306103515625,
      "step": 50152,
      "training_step_time": 0.3894782066345215
    },
    {
      "epoch": 0.000306109619140625,
      "model_forward_time": 0.11461925506591797,
      "step": 50153
    },
    {
      "epoch": 0.000306109619140625,
      "step": 50153,
      "training_step_time": 0.3925955295562744
    },
    {
      "epoch": 0.00030611572265625,
      "model_forward_time": 0.1148066520690918,
      "step": 50154
    },
    {
      "epoch": 0.00030611572265625,
      "step": 50154,
      "training_step_time": 0.39717817306518555
    },
    {
      "epoch": 0.000306121826171875,
      "model_forward_time": 0.11542916297912598,
      "step": 50155
    },
    {
      "epoch": 0.000306121826171875,
      "step": 50155,
      "training_step_time": 0.3995227813720703
    },
    {
      "epoch": 0.0003061279296875,
      "model_forward_time": 0.1158449649810791,
      "step": 50156
    },
    {
      "epoch": 0.0003061279296875,
      "step": 50156,
      "training_step_time": 0.44781064987182617
    },
    {
      "epoch": 0.000306134033203125,
      "model_forward_time": 0.11571717262268066,
      "step": 50157
    },
    {
      "epoch": 0.000306134033203125,
      "step": 50157,
      "training_step_time": 0.6890382766723633
    },
    {
      "epoch": 0.00030614013671875,
      "model_forward_time": 0.11481118202209473,
      "step": 50158
    },
    {
      "epoch": 0.00030614013671875,
      "step": 50158,
      "training_step_time": 0.4768495559692383
    },
    {
      "epoch": 0.000306146240234375,
      "model_forward_time": 0.11399698257446289,
      "step": 50159
    },
    {
      "epoch": 0.000306146240234375,
      "step": 50159,
      "training_step_time": 0.48190975189208984
    },
    {
      "epoch": 0.00030615234375,
      "grad_norm": 0.1460551768541336,
      "learning_rate": 7.174788171373731e-06,
      "loss": 0.0371,
      "step": 50160
    },
    {
      "epoch": 0.00030615234375,
      "model_forward_time": 0.11490511894226074,
      "step": 50160
    },
    {
      "epoch": 0.00030615234375,
      "step": 50160,
      "training_step_time": 0.3921489715576172
    },
    {
      "epoch": 0.000306158447265625,
      "model_forward_time": 0.11423635482788086,
      "step": 50161
    },
    {
      "epoch": 0.000306158447265625,
      "step": 50161,
      "training_step_time": 0.38828134536743164
    },
    {
      "epoch": 0.00030616455078125,
      "model_forward_time": 0.11417484283447266,
      "step": 50162
    },
    {
      "epoch": 0.00030616455078125,
      "step": 50162,
      "training_step_time": 0.40442824363708496
    },
    {
      "epoch": 0.000306170654296875,
      "model_forward_time": 0.11526155471801758,
      "step": 50163
    },
    {
      "epoch": 0.000306170654296875,
      "step": 50163,
      "training_step_time": 0.4172382354736328
    },
    {
      "epoch": 0.0003061767578125,
      "model_forward_time": 0.11496305465698242,
      "step": 50164
    },
    {
      "epoch": 0.0003061767578125,
      "step": 50164,
      "training_step_time": 0.39478516578674316
    },
    {
      "epoch": 0.000306182861328125,
      "model_forward_time": 0.11524009704589844,
      "step": 50165
    },
    {
      "epoch": 0.000306182861328125,
      "step": 50165,
      "training_step_time": 0.418226957321167
    },
    {
      "epoch": 0.00030618896484375,
      "model_forward_time": 0.11494135856628418,
      "step": 50166
    },
    {
      "epoch": 0.00030618896484375,
      "step": 50166,
      "training_step_time": 0.4036216735839844
    },
    {
      "epoch": 0.000306195068359375,
      "model_forward_time": 0.11511850357055664,
      "step": 50167
    },
    {
      "epoch": 0.000306195068359375,
      "step": 50167,
      "training_step_time": 0.39183545112609863
    },
    {
      "epoch": 0.000306201171875,
      "model_forward_time": 0.11555099487304688,
      "step": 50168
    },
    {
      "epoch": 0.000306201171875,
      "step": 50168,
      "training_step_time": 0.4013848304748535
    },
    {
      "epoch": 0.000306207275390625,
      "model_forward_time": 0.11493396759033203,
      "step": 50169
    },
    {
      "epoch": 0.000306207275390625,
      "step": 50169,
      "training_step_time": 0.660149097442627
    },
    {
      "epoch": 0.00030621337890625,
      "grad_norm": 0.09233680367469788,
      "learning_rate": 7.160570978333203e-06,
      "loss": 0.035,
      "step": 50170
    },
    {
      "epoch": 0.00030621337890625,
      "model_forward_time": 0.11519265174865723,
      "step": 50170
    },
    {
      "epoch": 0.00030621337890625,
      "step": 50170,
      "training_step_time": 0.4355125427246094
    },
    {
      "epoch": 0.000306219482421875,
      "model_forward_time": 0.11551570892333984,
      "step": 50171
    },
    {
      "epoch": 0.000306219482421875,
      "step": 50171,
      "training_step_time": 0.5034778118133545
    },
    {
      "epoch": 0.0003062255859375,
      "model_forward_time": 0.11502218246459961,
      "step": 50172
    },
    {
      "epoch": 0.0003062255859375,
      "step": 50172,
      "training_step_time": 0.43091487884521484
    },
    {
      "epoch": 0.000306231689453125,
      "model_forward_time": 0.11451053619384766,
      "step": 50173
    },
    {
      "epoch": 0.000306231689453125,
      "step": 50173,
      "training_step_time": 0.4771888256072998
    },
    {
      "epoch": 0.00030623779296875,
      "model_forward_time": 0.11396050453186035,
      "step": 50174
    },
    {
      "epoch": 0.00030623779296875,
      "step": 50174,
      "training_step_time": 0.3933548927307129
    },
    {
      "epoch": 0.000306243896484375,
      "model_forward_time": 0.11456918716430664,
      "step": 50175
    },
    {
      "epoch": 0.000306243896484375,
      "step": 50175,
      "training_step_time": 0.5279152393341064
    },
    {
      "epoch": 0.00030625,
      "model_forward_time": 0.1150963306427002,
      "step": 50176
    },
    {
      "epoch": 0.00030625,
      "step": 50176,
      "training_step_time": 0.42804789543151855
    },
    {
      "epoch": 0.000306256103515625,
      "model_forward_time": 0.11441779136657715,
      "step": 50177
    },
    {
      "epoch": 0.000306256103515625,
      "step": 50177,
      "training_step_time": 0.45331454277038574
    },
    {
      "epoch": 0.00030626220703125,
      "model_forward_time": 0.11498737335205078,
      "step": 50178
    },
    {
      "epoch": 0.00030626220703125,
      "step": 50178,
      "training_step_time": 0.46236109733581543
    },
    {
      "epoch": 0.000306268310546875,
      "model_forward_time": 0.11479067802429199,
      "step": 50179
    },
    {
      "epoch": 0.000306268310546875,
      "step": 50179,
      "training_step_time": 0.3982675075531006
    },
    {
      "epoch": 0.0003062744140625,
      "grad_norm": 0.10586714744567871,
      "learning_rate": 7.146366798780096e-06,
      "loss": 0.0353,
      "step": 50180
    },
    {
      "epoch": 0.0003062744140625,
      "model_forward_time": 0.11551785469055176,
      "step": 50180
    },
    {
      "epoch": 0.0003062744140625,
      "step": 50180,
      "training_step_time": 0.3973832130432129
    },
    {
      "epoch": 0.000306280517578125,
      "model_forward_time": 0.1150510311126709,
      "step": 50181
    },
    {
      "epoch": 0.000306280517578125,
      "step": 50181,
      "training_step_time": 0.594130277633667
    },
    {
      "epoch": 0.00030628662109375,
      "model_forward_time": 0.1147012710571289,
      "step": 50182
    },
    {
      "epoch": 0.00030628662109375,
      "step": 50182,
      "training_step_time": 0.3914213180541992
    },
    {
      "epoch": 0.000306292724609375,
      "model_forward_time": 0.11480236053466797,
      "step": 50183
    },
    {
      "epoch": 0.000306292724609375,
      "step": 50183,
      "training_step_time": 0.3908061981201172
    },
    {
      "epoch": 0.000306298828125,
      "model_forward_time": 0.11524200439453125,
      "step": 50184
    },
    {
      "epoch": 0.000306298828125,
      "step": 50184,
      "training_step_time": 0.42672133445739746
    },
    {
      "epoch": 0.000306304931640625,
      "model_forward_time": 0.1165306568145752,
      "step": 50185
    },
    {
      "epoch": 0.000306304931640625,
      "step": 50185,
      "training_step_time": 0.48924899101257324
    },
    {
      "epoch": 0.00030631103515625,
      "model_forward_time": 0.11448240280151367,
      "step": 50186
    },
    {
      "epoch": 0.00030631103515625,
      "step": 50186,
      "training_step_time": 0.4130847454071045
    },
    {
      "epoch": 0.000306317138671875,
      "model_forward_time": 0.11600494384765625,
      "step": 50187
    },
    {
      "epoch": 0.000306317138671875,
      "step": 50187,
      "training_step_time": 0.47875213623046875
    },
    {
      "epoch": 0.0003063232421875,
      "model_forward_time": 0.11453819274902344,
      "step": 50188
    },
    {
      "epoch": 0.0003063232421875,
      "step": 50188,
      "training_step_time": 0.38214111328125
    },
    {
      "epoch": 0.000306329345703125,
      "model_forward_time": 0.11492109298706055,
      "step": 50189
    },
    {
      "epoch": 0.000306329345703125,
      "step": 50189,
      "training_step_time": 0.38793301582336426
    },
    {
      "epoch": 0.00030633544921875,
      "grad_norm": 0.10976121574640274,
      "learning_rate": 7.132175637029293e-06,
      "loss": 0.032,
      "step": 50190
    },
    {
      "epoch": 0.00030633544921875,
      "model_forward_time": 0.11573934555053711,
      "step": 50190
    },
    {
      "epoch": 0.00030633544921875,
      "step": 50190,
      "training_step_time": 0.39879560470581055
    },
    {
      "epoch": 0.000306341552734375,
      "model_forward_time": 0.11466217041015625,
      "step": 50191
    },
    {
      "epoch": 0.000306341552734375,
      "step": 50191,
      "training_step_time": 0.4062154293060303
    },
    {
      "epoch": 0.00030634765625,
      "model_forward_time": 0.11451840400695801,
      "step": 50192
    },
    {
      "epoch": 0.00030634765625,
      "step": 50192,
      "training_step_time": 0.4171106815338135
    },
    {
      "epoch": 0.000306353759765625,
      "model_forward_time": 0.11489534378051758,
      "step": 50193
    },
    {
      "epoch": 0.000306353759765625,
      "step": 50193,
      "training_step_time": 0.6755080223083496
    },
    {
      "epoch": 0.00030635986328125,
      "model_forward_time": 0.11472225189208984,
      "step": 50194
    },
    {
      "epoch": 0.00030635986328125,
      "step": 50194,
      "training_step_time": 0.38380980491638184
    },
    {
      "epoch": 0.000306365966796875,
      "model_forward_time": 0.11641955375671387,
      "step": 50195
    },
    {
      "epoch": 0.000306365966796875,
      "step": 50195,
      "training_step_time": 0.4304192066192627
    },
    {
      "epoch": 0.0003063720703125,
      "model_forward_time": 0.11456465721130371,
      "step": 50196
    },
    {
      "epoch": 0.0003063720703125,
      "step": 50196,
      "training_step_time": 0.40305209159851074
    },
    {
      "epoch": 0.000306378173828125,
      "model_forward_time": 0.11507797241210938,
      "step": 50197
    },
    {
      "epoch": 0.000306378173828125,
      "step": 50197,
      "training_step_time": 0.39898014068603516
    },
    {
      "epoch": 0.00030638427734375,
      "model_forward_time": 0.11459183692932129,
      "step": 50198
    },
    {
      "epoch": 0.00030638427734375,
      "step": 50198,
      "training_step_time": 0.36522889137268066
    },
    {
      "epoch": 0.000306390380859375,
      "model_forward_time": 0.11504006385803223,
      "step": 50199
    },
    {
      "epoch": 0.000306390380859375,
      "step": 50199,
      "training_step_time": 0.7243332862854004
    },
    {
      "epoch": 0.000306396484375,
      "grad_norm": 0.09135865420103073,
      "learning_rate": 7.1179974973916486e-06,
      "loss": 0.0382,
      "step": 50200
    },
    {
      "epoch": 0.000306396484375,
      "model_forward_time": 0.11507487297058105,
      "step": 50200
    },
    {
      "epoch": 0.000306396484375,
      "step": 50200,
      "training_step_time": 0.43860292434692383
    },
    {
      "epoch": 0.000306402587890625,
      "model_forward_time": 0.11492037773132324,
      "step": 50201
    },
    {
      "epoch": 0.000306402587890625,
      "step": 50201,
      "training_step_time": 0.4777841567993164
    },
    {
      "epoch": 0.00030640869140625,
      "model_forward_time": 0.11397027969360352,
      "step": 50202
    },
    {
      "epoch": 0.00030640869140625,
      "step": 50202,
      "training_step_time": 0.41869473457336426
    },
    {
      "epoch": 0.000306414794921875,
      "model_forward_time": 0.11452794075012207,
      "step": 50203
    },
    {
      "epoch": 0.000306414794921875,
      "step": 50203,
      "training_step_time": 0.3909285068511963
    },
    {
      "epoch": 0.0003064208984375,
      "model_forward_time": 0.11380457878112793,
      "step": 50204
    },
    {
      "epoch": 0.0003064208984375,
      "step": 50204,
      "training_step_time": 0.42116713523864746
    },
    {
      "epoch": 0.000306427001953125,
      "model_forward_time": 0.11504197120666504,
      "step": 50205
    },
    {
      "epoch": 0.000306427001953125,
      "step": 50205,
      "training_step_time": 0.4089357852935791
    },
    {
      "epoch": 0.00030643310546875,
      "model_forward_time": 0.1145334243774414,
      "step": 50206
    },
    {
      "epoch": 0.00030643310546875,
      "step": 50206,
      "training_step_time": 0.500032901763916
    },
    {
      "epoch": 0.000306439208984375,
      "model_forward_time": 0.1147606372833252,
      "step": 50207
    },
    {
      "epoch": 0.000306439208984375,
      "step": 50207,
      "training_step_time": 0.39501404762268066
    },
    {
      "epoch": 0.0003064453125,
      "model_forward_time": 0.11457443237304688,
      "step": 50208
    },
    {
      "epoch": 0.0003064453125,
      "step": 50208,
      "training_step_time": 0.4594759941101074
    },
    {
      "epoch": 0.000306451416015625,
      "model_forward_time": 0.11535143852233887,
      "step": 50209
    },
    {
      "epoch": 0.000306451416015625,
      "step": 50209,
      "training_step_time": 0.3998706340789795
    },
    {
      "epoch": 0.00030645751953125,
      "grad_norm": 0.113785520195961,
      "learning_rate": 7.103832384174142e-06,
      "loss": 0.0391,
      "step": 50210
    },
    {
      "epoch": 0.00030645751953125,
      "model_forward_time": 0.11490106582641602,
      "step": 50210
    },
    {
      "epoch": 0.00030645751953125,
      "step": 50210,
      "training_step_time": 0.40203309059143066
    },
    {
      "epoch": 0.000306463623046875,
      "model_forward_time": 0.1147603988647461,
      "step": 50211
    },
    {
      "epoch": 0.000306463623046875,
      "step": 50211,
      "training_step_time": 0.5180151462554932
    },
    {
      "epoch": 0.0003064697265625,
      "model_forward_time": 0.11494684219360352,
      "step": 50212
    },
    {
      "epoch": 0.0003064697265625,
      "step": 50212,
      "training_step_time": 0.41297268867492676
    },
    {
      "epoch": 0.000306475830078125,
      "model_forward_time": 0.11579179763793945,
      "step": 50213
    },
    {
      "epoch": 0.000306475830078125,
      "step": 50213,
      "training_step_time": 0.44024658203125
    },
    {
      "epoch": 0.00030648193359375,
      "model_forward_time": 0.11526823043823242,
      "step": 50214
    },
    {
      "epoch": 0.00030648193359375,
      "step": 50214,
      "training_step_time": 0.47062110900878906
    },
    {
      "epoch": 0.000306488037109375,
      "model_forward_time": 0.11497020721435547,
      "step": 50215
    },
    {
      "epoch": 0.000306488037109375,
      "step": 50215,
      "training_step_time": 0.4055652618408203
    },
    {
      "epoch": 0.000306494140625,
      "model_forward_time": 0.11478638648986816,
      "step": 50216
    },
    {
      "epoch": 0.000306494140625,
      "step": 50216,
      "training_step_time": 0.41256213188171387
    },
    {
      "epoch": 0.000306500244140625,
      "model_forward_time": 0.11466002464294434,
      "step": 50217
    },
    {
      "epoch": 0.000306500244140625,
      "step": 50217,
      "training_step_time": 0.5236301422119141
    },
    {
      "epoch": 0.00030650634765625,
      "model_forward_time": 0.11463046073913574,
      "step": 50218
    },
    {
      "epoch": 0.00030650634765625,
      "step": 50218,
      "training_step_time": 0.40261077880859375
    },
    {
      "epoch": 0.000306512451171875,
      "model_forward_time": 0.11461162567138672,
      "step": 50219
    },
    {
      "epoch": 0.000306512451171875,
      "step": 50219,
      "training_step_time": 0.3976137638092041
    },
    {
      "epoch": 0.0003065185546875,
      "grad_norm": 0.10479728132486343,
      "learning_rate": 7.089680301679752e-06,
      "loss": 0.0351,
      "step": 50220
    },
    {
      "epoch": 0.0003065185546875,
      "model_forward_time": 0.11474728584289551,
      "step": 50220
    },
    {
      "epoch": 0.0003065185546875,
      "step": 50220,
      "training_step_time": 0.40163421630859375
    },
    {
      "epoch": 0.000306524658203125,
      "model_forward_time": 0.11534476280212402,
      "step": 50221
    },
    {
      "epoch": 0.000306524658203125,
      "step": 50221,
      "training_step_time": 0.40282535552978516
    },
    {
      "epoch": 0.00030653076171875,
      "model_forward_time": 0.11513018608093262,
      "step": 50222
    },
    {
      "epoch": 0.00030653076171875,
      "step": 50222,
      "training_step_time": 0.4142439365386963
    },
    {
      "epoch": 0.000306536865234375,
      "model_forward_time": 0.11573243141174316,
      "step": 50223
    },
    {
      "epoch": 0.000306536865234375,
      "step": 50223,
      "training_step_time": 0.6835734844207764
    },
    {
      "epoch": 0.00030654296875,
      "model_forward_time": 0.11472511291503906,
      "step": 50224
    },
    {
      "epoch": 0.00030654296875,
      "step": 50224,
      "training_step_time": 0.3958740234375
    },
    {
      "epoch": 0.000306549072265625,
      "model_forward_time": 0.11527395248413086,
      "step": 50225
    },
    {
      "epoch": 0.000306549072265625,
      "step": 50225,
      "training_step_time": 0.39985179901123047
    },
    {
      "epoch": 0.00030655517578125,
      "model_forward_time": 0.11452364921569824,
      "step": 50226
    },
    {
      "epoch": 0.00030655517578125,
      "step": 50226,
      "training_step_time": 0.36133623123168945
    },
    {
      "epoch": 0.000306561279296875,
      "model_forward_time": 0.11486959457397461,
      "step": 50227
    },
    {
      "epoch": 0.000306561279296875,
      "step": 50227,
      "training_step_time": 0.4376034736633301
    },
    {
      "epoch": 0.0003065673828125,
      "model_forward_time": 0.11425232887268066,
      "step": 50228
    },
    {
      "epoch": 0.0003065673828125,
      "step": 50228,
      "training_step_time": 0.4254639148712158
    },
    {
      "epoch": 0.000306573486328125,
      "model_forward_time": 0.11548256874084473,
      "step": 50229
    },
    {
      "epoch": 0.000306573486328125,
      "step": 50229,
      "training_step_time": 0.5735864639282227
    },
    {
      "epoch": 0.00030657958984375,
      "grad_norm": 0.08690948784351349,
      "learning_rate": 7.075541254207502e-06,
      "loss": 0.0353,
      "step": 50230
    },
    {
      "epoch": 0.00030657958984375,
      "model_forward_time": 0.1147768497467041,
      "step": 50230
    },
    {
      "epoch": 0.00030657958984375,
      "step": 50230,
      "training_step_time": 0.3819875717163086
    },
    {
      "epoch": 0.000306585693359375,
      "model_forward_time": 0.11453652381896973,
      "step": 50231
    },
    {
      "epoch": 0.000306585693359375,
      "step": 50231,
      "training_step_time": 0.38349246978759766
    },
    {
      "epoch": 0.000306591796875,
      "model_forward_time": 0.11513614654541016,
      "step": 50232
    },
    {
      "epoch": 0.000306591796875,
      "step": 50232,
      "training_step_time": 0.45696282386779785
    },
    {
      "epoch": 0.000306597900390625,
      "model_forward_time": 0.11489033699035645,
      "step": 50233
    },
    {
      "epoch": 0.000306597900390625,
      "step": 50233,
      "training_step_time": 0.3949306011199951
    },
    {
      "epoch": 0.00030660400390625,
      "model_forward_time": 0.11513066291809082,
      "step": 50234
    },
    {
      "epoch": 0.00030660400390625,
      "step": 50234,
      "training_step_time": 0.4534294605255127
    },
    {
      "epoch": 0.000306610107421875,
      "model_forward_time": 0.11550593376159668,
      "step": 50235
    },
    {
      "epoch": 0.000306610107421875,
      "step": 50235,
      "training_step_time": 0.4715309143066406
    },
    {
      "epoch": 0.0003066162109375,
      "model_forward_time": 0.11492657661437988,
      "step": 50236
    },
    {
      "epoch": 0.0003066162109375,
      "step": 50236,
      "training_step_time": 0.38459134101867676
    },
    {
      "epoch": 0.000306622314453125,
      "model_forward_time": 0.11495327949523926,
      "step": 50237
    },
    {
      "epoch": 0.000306622314453125,
      "step": 50237,
      "training_step_time": 0.39083242416381836
    },
    {
      "epoch": 0.00030662841796875,
      "model_forward_time": 0.11486053466796875,
      "step": 50238
    },
    {
      "epoch": 0.00030662841796875,
      "step": 50238,
      "training_step_time": 0.40004420280456543
    },
    {
      "epoch": 0.000306634521484375,
      "model_forward_time": 0.11436295509338379,
      "step": 50239
    },
    {
      "epoch": 0.000306634521484375,
      "step": 50239,
      "training_step_time": 0.414276123046875
    },
    {
      "epoch": 0.000306640625,
      "grad_norm": 0.07719939947128296,
      "learning_rate": 7.061415246052466e-06,
      "loss": 0.0326,
      "step": 50240
    },
    {
      "epoch": 0.000306640625,
      "model_forward_time": 0.11532831192016602,
      "step": 50240
    },
    {
      "epoch": 0.000306640625,
      "step": 50240,
      "training_step_time": 0.39965105056762695
    },
    {
      "epoch": 0.000306646728515625,
      "model_forward_time": 0.11521530151367188,
      "step": 50241
    },
    {
      "epoch": 0.000306646728515625,
      "step": 50241,
      "training_step_time": 0.8113570213317871
    },
    {
      "epoch": 0.00030665283203125,
      "model_forward_time": 0.1144876480102539,
      "step": 50242
    },
    {
      "epoch": 0.00030665283203125,
      "step": 50242,
      "training_step_time": 0.39483141899108887
    },
    {
      "epoch": 0.000306658935546875,
      "model_forward_time": 0.1150655746459961,
      "step": 50243
    },
    {
      "epoch": 0.000306658935546875,
      "step": 50243,
      "training_step_time": 0.39333581924438477
    },
    {
      "epoch": 0.0003066650390625,
      "model_forward_time": 0.11438179016113281,
      "step": 50244
    },
    {
      "epoch": 0.0003066650390625,
      "step": 50244,
      "training_step_time": 0.41597795486450195
    },
    {
      "epoch": 0.000306671142578125,
      "model_forward_time": 0.11446619033813477,
      "step": 50245
    },
    {
      "epoch": 0.000306671142578125,
      "step": 50245,
      "training_step_time": 0.40311384201049805
    },
    {
      "epoch": 0.00030667724609375,
      "model_forward_time": 0.11559891700744629,
      "step": 50246
    },
    {
      "epoch": 0.00030667724609375,
      "step": 50246,
      "training_step_time": 0.40087056159973145
    },
    {
      "epoch": 0.000306683349609375,
      "model_forward_time": 0.11487793922424316,
      "step": 50247
    },
    {
      "epoch": 0.000306683349609375,
      "step": 50247,
      "training_step_time": 0.6363589763641357
    },
    {
      "epoch": 0.000306689453125,
      "model_forward_time": 0.11468338966369629,
      "step": 50248
    },
    {
      "epoch": 0.000306689453125,
      "step": 50248,
      "training_step_time": 0.4953920841217041
    },
    {
      "epoch": 0.000306695556640625,
      "model_forward_time": 0.11458444595336914,
      "step": 50249
    },
    {
      "epoch": 0.000306695556640625,
      "step": 50249,
      "training_step_time": 0.3923218250274658
    },
    {
      "epoch": 0.00030670166015625,
      "grad_norm": 0.08420660346746445,
      "learning_rate": 7.047302281505736e-06,
      "loss": 0.0363,
      "step": 50250
    },
    {
      "epoch": 0.00030670166015625,
      "model_forward_time": 0.11403298377990723,
      "step": 50250
    },
    {
      "epoch": 0.00030670166015625,
      "step": 50250,
      "training_step_time": 0.38320326805114746
    },
    {
      "epoch": 0.000306707763671875,
      "model_forward_time": 0.1140286922454834,
      "step": 50251
    },
    {
      "epoch": 0.000306707763671875,
      "step": 50251,
      "training_step_time": 0.3927643299102783
    },
    {
      "epoch": 0.0003067138671875,
      "model_forward_time": 0.11426639556884766,
      "step": 50252
    },
    {
      "epoch": 0.0003067138671875,
      "step": 50252,
      "training_step_time": 0.39983057975769043
    },
    {
      "epoch": 0.000306719970703125,
      "model_forward_time": 0.11524558067321777,
      "step": 50253
    },
    {
      "epoch": 0.000306719970703125,
      "step": 50253,
      "training_step_time": 0.5186247825622559
    },
    {
      "epoch": 0.00030672607421875,
      "model_forward_time": 0.11524748802185059,
      "step": 50254
    },
    {
      "epoch": 0.00030672607421875,
      "step": 50254,
      "training_step_time": 0.46521902084350586
    },
    {
      "epoch": 0.000306732177734375,
      "model_forward_time": 0.11484599113464355,
      "step": 50255
    },
    {
      "epoch": 0.000306732177734375,
      "step": 50255,
      "training_step_time": 0.5003230571746826
    },
    {
      "epoch": 0.00030673828125,
      "model_forward_time": 0.11525344848632812,
      "step": 50256
    },
    {
      "epoch": 0.00030673828125,
      "step": 50256,
      "training_step_time": 0.4208109378814697
    },
    {
      "epoch": 0.000306744384765625,
      "model_forward_time": 0.11471128463745117,
      "step": 50257
    },
    {
      "epoch": 0.000306744384765625,
      "step": 50257,
      "training_step_time": 0.39307260513305664
    },
    {
      "epoch": 0.00030675048828125,
      "model_forward_time": 0.11495780944824219,
      "step": 50258
    },
    {
      "epoch": 0.00030675048828125,
      "step": 50258,
      "training_step_time": 0.4222099781036377
    },
    {
      "epoch": 0.000306756591796875,
      "model_forward_time": 0.11497282981872559,
      "step": 50259
    },
    {
      "epoch": 0.000306756591796875,
      "step": 50259,
      "training_step_time": 0.5292785167694092
    },
    {
      "epoch": 0.0003067626953125,
      "grad_norm": 0.07864607870578766,
      "learning_rate": 7.0332023648544965e-06,
      "loss": 0.0377,
      "step": 50260
    },
    {
      "epoch": 0.0003067626953125,
      "model_forward_time": 0.11398482322692871,
      "step": 50260
    },
    {
      "epoch": 0.0003067626953125,
      "step": 50260,
      "training_step_time": 0.3942286968231201
    },
    {
      "epoch": 0.000306768798828125,
      "model_forward_time": 0.11504578590393066,
      "step": 50261
    },
    {
      "epoch": 0.000306768798828125,
      "step": 50261,
      "training_step_time": 0.46883225440979004
    },
    {
      "epoch": 0.00030677490234375,
      "model_forward_time": 0.11528515815734863,
      "step": 50262
    },
    {
      "epoch": 0.00030677490234375,
      "step": 50262,
      "training_step_time": 0.4937713146209717
    },
    {
      "epoch": 0.000306781005859375,
      "model_forward_time": 0.11494803428649902,
      "step": 50263
    },
    {
      "epoch": 0.000306781005859375,
      "step": 50263,
      "training_step_time": 0.3867340087890625
    },
    {
      "epoch": 0.000306787109375,
      "model_forward_time": 0.11492705345153809,
      "step": 50264
    },
    {
      "epoch": 0.000306787109375,
      "step": 50264,
      "training_step_time": 0.3896355628967285
    },
    {
      "epoch": 0.000306793212890625,
      "model_forward_time": 0.11512970924377441,
      "step": 50265
    },
    {
      "epoch": 0.000306793212890625,
      "step": 50265,
      "training_step_time": 0.49672937393188477
    },
    {
      "epoch": 0.00030679931640625,
      "model_forward_time": 0.11444354057312012,
      "step": 50266
    },
    {
      "epoch": 0.00030679931640625,
      "step": 50266,
      "training_step_time": 0.38315463066101074
    },
    {
      "epoch": 0.000306805419921875,
      "model_forward_time": 0.11489009857177734,
      "step": 50267
    },
    {
      "epoch": 0.000306805419921875,
      "step": 50267,
      "training_step_time": 0.40378665924072266
    },
    {
      "epoch": 0.0003068115234375,
      "model_forward_time": 0.1150059700012207,
      "step": 50268
    },
    {
      "epoch": 0.0003068115234375,
      "step": 50268,
      "training_step_time": 0.38324975967407227
    },
    {
      "epoch": 0.000306817626953125,
      "model_forward_time": 0.11551022529602051,
      "step": 50269
    },
    {
      "epoch": 0.000306817626953125,
      "step": 50269,
      "training_step_time": 0.43793272972106934
    },
    {
      "epoch": 0.00030682373046875,
      "grad_norm": 0.10799261182546616,
      "learning_rate": 7.01911550038189e-06,
      "loss": 0.0385,
      "step": 50270
    },
    {
      "epoch": 0.00030682373046875,
      "model_forward_time": 0.11453485488891602,
      "step": 50270
    },
    {
      "epoch": 0.00030682373046875,
      "step": 50270,
      "training_step_time": 0.4246511459350586
    },
    {
      "epoch": 0.000306829833984375,
      "model_forward_time": 0.11626696586608887,
      "step": 50271
    },
    {
      "epoch": 0.000306829833984375,
      "step": 50271,
      "training_step_time": 0.532198429107666
    },
    {
      "epoch": 0.0003068359375,
      "model_forward_time": 0.1143033504486084,
      "step": 50272
    },
    {
      "epoch": 0.0003068359375,
      "step": 50272,
      "training_step_time": 0.3948402404785156
    },
    {
      "epoch": 0.000306842041015625,
      "model_forward_time": 0.1140749454498291,
      "step": 50273
    },
    {
      "epoch": 0.000306842041015625,
      "step": 50273,
      "training_step_time": 0.38959264755249023
    },
    {
      "epoch": 0.00030684814453125,
      "model_forward_time": 0.11459136009216309,
      "step": 50274
    },
    {
      "epoch": 0.00030684814453125,
      "step": 50274,
      "training_step_time": 0.4878885746002197
    },
    {
      "epoch": 0.000306854248046875,
      "model_forward_time": 0.11463236808776855,
      "step": 50275
    },
    {
      "epoch": 0.000306854248046875,
      "step": 50275,
      "training_step_time": 0.4652445316314697
    },
    {
      "epoch": 0.0003068603515625,
      "model_forward_time": 0.11411213874816895,
      "step": 50276
    },
    {
      "epoch": 0.0003068603515625,
      "step": 50276,
      "training_step_time": 0.4671823978424072
    },
    {
      "epoch": 0.000306866455078125,
      "model_forward_time": 0.1142892837524414,
      "step": 50277
    },
    {
      "epoch": 0.000306866455078125,
      "step": 50277,
      "training_step_time": 0.5177428722381592
    },
    {
      "epoch": 0.00030687255859375,
      "model_forward_time": 0.11470937728881836,
      "step": 50278
    },
    {
      "epoch": 0.00030687255859375,
      "step": 50278,
      "training_step_time": 0.40743255615234375
    },
    {
      "epoch": 0.000306878662109375,
      "model_forward_time": 0.11397218704223633,
      "step": 50279
    },
    {
      "epoch": 0.000306878662109375,
      "step": 50279,
      "training_step_time": 0.39210939407348633
    },
    {
      "epoch": 0.000306884765625,
      "grad_norm": 0.08502613008022308,
      "learning_rate": 7.005041692367154e-06,
      "loss": 0.0346,
      "step": 50280
    },
    {
      "epoch": 0.000306884765625,
      "model_forward_time": 0.11461305618286133,
      "step": 50280
    },
    {
      "epoch": 0.000306884765625,
      "step": 50280,
      "training_step_time": 0.3956418037414551
    },
    {
      "epoch": 0.000306890869140625,
      "model_forward_time": 0.11497068405151367,
      "step": 50281
    },
    {
      "epoch": 0.000306890869140625,
      "step": 50281,
      "training_step_time": 0.39933347702026367
    },
    {
      "epoch": 0.00030689697265625,
      "model_forward_time": 0.11504006385803223,
      "step": 50282
    },
    {
      "epoch": 0.00030689697265625,
      "step": 50282,
      "training_step_time": 0.41782593727111816
    },
    {
      "epoch": 0.000306903076171875,
      "model_forward_time": 0.11559200286865234,
      "step": 50283
    },
    {
      "epoch": 0.000306903076171875,
      "step": 50283,
      "training_step_time": 0.6268150806427002
    },
    {
      "epoch": 0.0003069091796875,
      "model_forward_time": 0.11498093605041504,
      "step": 50284
    },
    {
      "epoch": 0.0003069091796875,
      "step": 50284,
      "training_step_time": 0.4687035083770752
    },
    {
      "epoch": 0.000306915283203125,
      "model_forward_time": 0.11462688446044922,
      "step": 50285
    },
    {
      "epoch": 0.000306915283203125,
      "step": 50285,
      "training_step_time": 0.48856115341186523
    },
    {
      "epoch": 0.00030692138671875,
      "model_forward_time": 0.11417269706726074,
      "step": 50286
    },
    {
      "epoch": 0.00030692138671875,
      "step": 50286,
      "training_step_time": 0.4109783172607422
    },
    {
      "epoch": 0.000306927490234375,
      "model_forward_time": 0.11443066596984863,
      "step": 50287
    },
    {
      "epoch": 0.000306927490234375,
      "step": 50287,
      "training_step_time": 0.3876328468322754
    },
    {
      "epoch": 0.00030693359375,
      "model_forward_time": 0.11435747146606445,
      "step": 50288
    },
    {
      "epoch": 0.00030693359375,
      "step": 50288,
      "training_step_time": 0.41736316680908203
    },
    {
      "epoch": 0.000306939697265625,
      "model_forward_time": 0.11471223831176758,
      "step": 50289
    },
    {
      "epoch": 0.000306939697265625,
      "step": 50289,
      "training_step_time": 0.534998893737793
    },
    {
      "epoch": 0.00030694580078125,
      "grad_norm": 0.12300410866737366,
      "learning_rate": 6.9909809450855345e-06,
      "loss": 0.0392,
      "step": 50290
    },
    {
      "epoch": 0.00030694580078125,
      "model_forward_time": 0.11489033699035645,
      "step": 50290
    },
    {
      "epoch": 0.00030694580078125,
      "step": 50290,
      "training_step_time": 0.4861874580383301
    },
    {
      "epoch": 0.000306951904296875,
      "model_forward_time": 0.11507415771484375,
      "step": 50291
    },
    {
      "epoch": 0.000306951904296875,
      "step": 50291,
      "training_step_time": 0.3912346363067627
    },
    {
      "epoch": 0.0003069580078125,
      "model_forward_time": 0.11412525177001953,
      "step": 50292
    },
    {
      "epoch": 0.0003069580078125,
      "step": 50292,
      "training_step_time": 0.3905322551727295
    },
    {
      "epoch": 0.000306964111328125,
      "model_forward_time": 0.11446881294250488,
      "step": 50293
    },
    {
      "epoch": 0.000306964111328125,
      "step": 50293,
      "training_step_time": 0.4061298370361328
    },
    {
      "epoch": 0.00030697021484375,
      "model_forward_time": 0.11438179016113281,
      "step": 50294
    },
    {
      "epoch": 0.00030697021484375,
      "step": 50294,
      "training_step_time": 0.39186716079711914
    },
    {
      "epoch": 0.000306976318359375,
      "model_forward_time": 0.1151893138885498,
      "step": 50295
    },
    {
      "epoch": 0.000306976318359375,
      "step": 50295,
      "training_step_time": 0.7261741161346436
    },
    {
      "epoch": 0.000306982421875,
      "model_forward_time": 0.11439275741577148,
      "step": 50296
    },
    {
      "epoch": 0.000306982421875,
      "step": 50296,
      "training_step_time": 0.43762993812561035
    },
    {
      "epoch": 0.000306988525390625,
      "model_forward_time": 0.1149439811706543,
      "step": 50297
    },
    {
      "epoch": 0.000306988525390625,
      "step": 50297,
      "training_step_time": 0.48704075813293457
    },
    {
      "epoch": 0.00030699462890625,
      "model_forward_time": 0.11475181579589844,
      "step": 50298
    },
    {
      "epoch": 0.00030699462890625,
      "step": 50298,
      "training_step_time": 0.4140348434448242
    },
    {
      "epoch": 0.000307000732421875,
      "model_forward_time": 0.11423659324645996,
      "step": 50299
    },
    {
      "epoch": 0.000307000732421875,
      "step": 50299,
      "training_step_time": 0.4729180335998535
    },
    {
      "epoch": 0.0003070068359375,
      "grad_norm": 0.06527246534824371,
      "learning_rate": 6.976933262808322e-06,
      "loss": 0.0299,
      "step": 50300
    },
    {
      "epoch": 0.0003070068359375,
      "model_forward_time": 0.11404037475585938,
      "step": 50300
    },
    {
      "epoch": 0.0003070068359375,
      "step": 50300,
      "training_step_time": 0.38761329650878906
    },
    {
      "epoch": 0.000307012939453125,
      "model_forward_time": 0.11524152755737305,
      "step": 50301
    },
    {
      "epoch": 0.000307012939453125,
      "step": 50301,
      "training_step_time": 0.38686156272888184
    },
    {
      "epoch": 0.00030701904296875,
      "model_forward_time": 0.11481189727783203,
      "step": 50302
    },
    {
      "epoch": 0.00030701904296875,
      "step": 50302,
      "training_step_time": 0.45018887519836426
    },
    {
      "epoch": 0.000307025146484375,
      "model_forward_time": 0.11485719680786133,
      "step": 50303
    },
    {
      "epoch": 0.000307025146484375,
      "step": 50303,
      "training_step_time": 0.4066195487976074
    },
    {
      "epoch": 0.00030703125,
      "model_forward_time": 0.1145789623260498,
      "step": 50304
    },
    {
      "epoch": 0.00030703125,
      "step": 50304,
      "training_step_time": 0.4200899600982666
    },
    {
      "epoch": 0.000307037353515625,
      "model_forward_time": 0.11478209495544434,
      "step": 50305
    },
    {
      "epoch": 0.000307037353515625,
      "step": 50305,
      "training_step_time": 0.40007996559143066
    },
    {
      "epoch": 0.00030704345703125,
      "model_forward_time": 0.11483883857727051,
      "step": 50306
    },
    {
      "epoch": 0.00030704345703125,
      "step": 50306,
      "training_step_time": 0.398484468460083
    },
    {
      "epoch": 0.000307049560546875,
      "model_forward_time": 0.11478996276855469,
      "step": 50307
    },
    {
      "epoch": 0.000307049560546875,
      "step": 50307,
      "training_step_time": 0.5582883358001709
    },
    {
      "epoch": 0.0003070556640625,
      "model_forward_time": 0.11503863334655762,
      "step": 50308
    },
    {
      "epoch": 0.0003070556640625,
      "step": 50308,
      "training_step_time": 0.40398669242858887
    },
    {
      "epoch": 0.000307061767578125,
      "model_forward_time": 0.11472535133361816,
      "step": 50309
    },
    {
      "epoch": 0.000307061767578125,
      "step": 50309,
      "training_step_time": 0.4081447124481201
    },
    {
      "epoch": 0.00030706787109375,
      "grad_norm": 0.08873609453439713,
      "learning_rate": 6.962898649802823e-06,
      "loss": 0.0361,
      "step": 50310
    },
    {
      "epoch": 0.00030706787109375,
      "model_forward_time": 0.1149892807006836,
      "step": 50310
    },
    {
      "epoch": 0.00030706787109375,
      "step": 50310,
      "training_step_time": 0.43078088760375977
    },
    {
      "epoch": 0.000307073974609375,
      "model_forward_time": 0.11587405204772949,
      "step": 50311
    },
    {
      "epoch": 0.000307073974609375,
      "step": 50311,
      "training_step_time": 0.4335753917694092
    },
    {
      "epoch": 0.000307080078125,
      "model_forward_time": 0.11516261100769043,
      "step": 50312
    },
    {
      "epoch": 0.000307080078125,
      "step": 50312,
      "training_step_time": 0.46089911460876465
    },
    {
      "epoch": 0.000307086181640625,
      "model_forward_time": 0.11501550674438477,
      "step": 50313
    },
    {
      "epoch": 0.000307086181640625,
      "step": 50313,
      "training_step_time": 0.48276305198669434
    },
    {
      "epoch": 0.00030709228515625,
      "model_forward_time": 0.11454510688781738,
      "step": 50314
    },
    {
      "epoch": 0.00030709228515625,
      "step": 50314,
      "training_step_time": 0.3874952793121338
    },
    {
      "epoch": 0.000307098388671875,
      "model_forward_time": 0.11507987976074219,
      "step": 50315
    },
    {
      "epoch": 0.000307098388671875,
      "step": 50315,
      "training_step_time": 0.3792080879211426
    },
    {
      "epoch": 0.0003071044921875,
      "model_forward_time": 0.11501073837280273,
      "step": 50316
    },
    {
      "epoch": 0.0003071044921875,
      "step": 50316,
      "training_step_time": 0.411968469619751
    },
    {
      "epoch": 0.000307110595703125,
      "model_forward_time": 0.1145486831665039,
      "step": 50317
    },
    {
      "epoch": 0.000307110595703125,
      "step": 50317,
      "training_step_time": 0.42109012603759766
    },
    {
      "epoch": 0.00030711669921875,
      "model_forward_time": 0.11433959007263184,
      "step": 50318
    },
    {
      "epoch": 0.00030711669921875,
      "step": 50318,
      "training_step_time": 0.39187097549438477
    },
    {
      "epoch": 0.000307122802734375,
      "model_forward_time": 0.11510634422302246,
      "step": 50319
    },
    {
      "epoch": 0.000307122802734375,
      "step": 50319,
      "training_step_time": 0.6554079055786133
    },
    {
      "epoch": 0.00030712890625,
      "grad_norm": 0.09618597477674484,
      "learning_rate": 6.948877110332386e-06,
      "loss": 0.0367,
      "step": 50320
    },
    {
      "epoch": 0.00030712890625,
      "model_forward_time": 0.1142435073852539,
      "step": 50320
    },
    {
      "epoch": 0.00030712890625,
      "step": 50320,
      "training_step_time": 0.39437103271484375
    },
    {
      "epoch": 0.000307135009765625,
      "model_forward_time": 0.11483192443847656,
      "step": 50321
    },
    {
      "epoch": 0.000307135009765625,
      "step": 50321,
      "training_step_time": 0.3965427875518799
    },
    {
      "epoch": 0.00030714111328125,
      "model_forward_time": 0.11485123634338379,
      "step": 50322
    },
    {
      "epoch": 0.00030714111328125,
      "step": 50322,
      "training_step_time": 0.3865032196044922
    },
    {
      "epoch": 0.000307147216796875,
      "model_forward_time": 0.1147770881652832,
      "step": 50323
    },
    {
      "epoch": 0.000307147216796875,
      "step": 50323,
      "training_step_time": 0.38953495025634766
    },
    {
      "epoch": 0.0003071533203125,
      "model_forward_time": 0.1151735782623291,
      "step": 50324
    },
    {
      "epoch": 0.0003071533203125,
      "step": 50324,
      "training_step_time": 0.4402601718902588
    },
    {
      "epoch": 0.000307159423828125,
      "model_forward_time": 0.11479043960571289,
      "step": 50325
    },
    {
      "epoch": 0.000307159423828125,
      "step": 50325,
      "training_step_time": 0.546151876449585
    },
    {
      "epoch": 0.00030716552734375,
      "model_forward_time": 0.1146996021270752,
      "step": 50326
    },
    {
      "epoch": 0.00030716552734375,
      "step": 50326,
      "training_step_time": 0.4335024356842041
    },
    {
      "epoch": 0.000307171630859375,
      "model_forward_time": 0.11509943008422852,
      "step": 50327
    },
    {
      "epoch": 0.000307171630859375,
      "step": 50327,
      "training_step_time": 0.483997106552124
    },
    {
      "epoch": 0.000307177734375,
      "model_forward_time": 0.11461400985717773,
      "step": 50328
    },
    {
      "epoch": 0.000307177734375,
      "step": 50328,
      "training_step_time": 0.40815258026123047
    },
    {
      "epoch": 0.000307183837890625,
      "model_forward_time": 0.11506366729736328,
      "step": 50329
    },
    {
      "epoch": 0.000307183837890625,
      "step": 50329,
      "training_step_time": 0.3968672752380371
    },
    {
      "epoch": 0.00030718994140625,
      "grad_norm": 0.08005260676145554,
      "learning_rate": 6.934868648656373e-06,
      "loss": 0.0345,
      "step": 50330
    },
    {
      "epoch": 0.00030718994140625,
      "model_forward_time": 0.11477541923522949,
      "step": 50330
    },
    {
      "epoch": 0.00030718994140625,
      "step": 50330,
      "training_step_time": 0.4321451187133789
    },
    {
      "epoch": 0.000307196044921875,
      "model_forward_time": 0.11538195610046387,
      "step": 50331
    },
    {
      "epoch": 0.000307196044921875,
      "step": 50331,
      "training_step_time": 0.6274054050445557
    },
    {
      "epoch": 0.0003072021484375,
      "model_forward_time": 0.11451411247253418,
      "step": 50332
    },
    {
      "epoch": 0.0003072021484375,
      "step": 50332,
      "training_step_time": 0.3868377208709717
    },
    {
      "epoch": 0.000307208251953125,
      "model_forward_time": 0.11467623710632324,
      "step": 50333
    },
    {
      "epoch": 0.000307208251953125,
      "step": 50333,
      "training_step_time": 0.3817775249481201
    },
    {
      "epoch": 0.00030721435546875,
      "model_forward_time": 0.11455583572387695,
      "step": 50334
    },
    {
      "epoch": 0.00030721435546875,
      "step": 50334,
      "training_step_time": 0.3868527412414551
    },
    {
      "epoch": 0.000307220458984375,
      "model_forward_time": 0.11433911323547363,
      "step": 50335
    },
    {
      "epoch": 0.000307220458984375,
      "step": 50335,
      "training_step_time": 0.39040446281433105
    },
    {
      "epoch": 0.0003072265625,
      "model_forward_time": 0.11515164375305176,
      "step": 50336
    },
    {
      "epoch": 0.0003072265625,
      "step": 50336,
      "training_step_time": 0.39008021354675293
    },
    {
      "epoch": 0.000307232666015625,
      "model_forward_time": 0.11526727676391602,
      "step": 50337
    },
    {
      "epoch": 0.000307232666015625,
      "step": 50337,
      "training_step_time": 0.6287603378295898
    },
    {
      "epoch": 0.00030723876953125,
      "model_forward_time": 0.11495137214660645,
      "step": 50338
    },
    {
      "epoch": 0.00030723876953125,
      "step": 50338,
      "training_step_time": 0.446535587310791
    },
    {
      "epoch": 0.000307244873046875,
      "model_forward_time": 0.11468958854675293,
      "step": 50339
    },
    {
      "epoch": 0.000307244873046875,
      "step": 50339,
      "training_step_time": 0.4406449794769287
    },
    {
      "epoch": 0.0003072509765625,
      "grad_norm": 0.08659744262695312,
      "learning_rate": 6.92087326903022e-06,
      "loss": 0.0356,
      "step": 50340
    },
    {
      "epoch": 0.0003072509765625,
      "model_forward_time": 0.11511540412902832,
      "step": 50340
    },
    {
      "epoch": 0.0003072509765625,
      "step": 50340,
      "training_step_time": 0.41185569763183594
    },
    {
      "epoch": 0.000307257080078125,
      "model_forward_time": 0.11519885063171387,
      "step": 50341
    },
    {
      "epoch": 0.000307257080078125,
      "step": 50341,
      "training_step_time": 0.5097253322601318
    },
    {
      "epoch": 0.00030726318359375,
      "model_forward_time": 0.11453413963317871,
      "step": 50342
    },
    {
      "epoch": 0.00030726318359375,
      "step": 50342,
      "training_step_time": 0.38666796684265137
    },
    {
      "epoch": 0.000307269287109375,
      "model_forward_time": 0.11520528793334961,
      "step": 50343
    },
    {
      "epoch": 0.000307269287109375,
      "step": 50343,
      "training_step_time": 0.44498562812805176
    },
    {
      "epoch": 0.000307275390625,
      "model_forward_time": 0.11552309989929199,
      "step": 50344
    },
    {
      "epoch": 0.000307275390625,
      "step": 50344,
      "training_step_time": 0.4859433174133301
    },
    {
      "epoch": 0.000307281494140625,
      "model_forward_time": 0.11473393440246582,
      "step": 50345
    },
    {
      "epoch": 0.000307281494140625,
      "step": 50345,
      "training_step_time": 0.4579439163208008
    },
    {
      "epoch": 0.00030728759765625,
      "model_forward_time": 0.11469316482543945,
      "step": 50346
    },
    {
      "epoch": 0.00030728759765625,
      "step": 50346,
      "training_step_time": 0.5035703182220459
    },
    {
      "epoch": 0.000307293701171875,
      "model_forward_time": 0.11652326583862305,
      "step": 50347
    },
    {
      "epoch": 0.000307293701171875,
      "step": 50347,
      "training_step_time": 0.3867030143737793
    },
    {
      "epoch": 0.0003072998046875,
      "model_forward_time": 0.11438870429992676,
      "step": 50348
    },
    {
      "epoch": 0.0003072998046875,
      "step": 50348,
      "training_step_time": 0.3893404006958008
    },
    {
      "epoch": 0.000307305908203125,
      "model_forward_time": 0.1147007942199707,
      "step": 50349
    },
    {
      "epoch": 0.000307305908203125,
      "step": 50349,
      "training_step_time": 0.4503049850463867
    },
    {
      "epoch": 0.00030731201171875,
      "grad_norm": 0.13612057268619537,
      "learning_rate": 6.906890975705305e-06,
      "loss": 0.0326,
      "step": 50350
    },
    {
      "epoch": 0.00030731201171875,
      "model_forward_time": 0.11551022529602051,
      "step": 50350
    },
    {
      "epoch": 0.00030731201171875,
      "step": 50350,
      "training_step_time": 0.3896827697753906
    },
    {
      "epoch": 0.000307318115234375,
      "model_forward_time": 0.11475419998168945,
      "step": 50351
    },
    {
      "epoch": 0.000307318115234375,
      "step": 50351,
      "training_step_time": 0.4378845691680908
    },
    {
      "epoch": 0.00030732421875,
      "model_forward_time": 0.11551189422607422,
      "step": 50352
    },
    {
      "epoch": 0.00030732421875,
      "step": 50352,
      "training_step_time": 0.4085559844970703
    },
    {
      "epoch": 0.000307330322265625,
      "model_forward_time": 0.11554217338562012,
      "step": 50353
    },
    {
      "epoch": 0.000307330322265625,
      "step": 50353,
      "training_step_time": 0.39786219596862793
    },
    {
      "epoch": 0.00030733642578125,
      "model_forward_time": 0.11635136604309082,
      "step": 50354
    },
    {
      "epoch": 0.00030733642578125,
      "step": 50354,
      "training_step_time": 0.4395461082458496
    },
    {
      "epoch": 0.000307342529296875,
      "model_forward_time": 0.11559510231018066,
      "step": 50355
    },
    {
      "epoch": 0.000307342529296875,
      "step": 50355,
      "training_step_time": 0.6920850276947021
    },
    {
      "epoch": 0.0003073486328125,
      "model_forward_time": 0.11539411544799805,
      "step": 50356
    },
    {
      "epoch": 0.0003073486328125,
      "step": 50356,
      "training_step_time": 0.37476539611816406
    },
    {
      "epoch": 0.000307354736328125,
      "model_forward_time": 0.11523652076721191,
      "step": 50357
    },
    {
      "epoch": 0.000307354736328125,
      "step": 50357,
      "training_step_time": 0.39154791831970215
    },
    {
      "epoch": 0.00030736083984375,
      "model_forward_time": 0.1148221492767334,
      "step": 50358
    },
    {
      "epoch": 0.00030736083984375,
      "step": 50358,
      "training_step_time": 0.4683797359466553
    },
    {
      "epoch": 0.000307366943359375,
      "model_forward_time": 0.1139986515045166,
      "step": 50359
    },
    {
      "epoch": 0.000307366943359375,
      "step": 50359,
      "training_step_time": 0.390270471572876
    },
    {
      "epoch": 0.000307373046875,
      "grad_norm": 0.08180057257413864,
      "learning_rate": 6.892921772929112e-06,
      "loss": 0.0405,
      "step": 50360
    },
    {
      "epoch": 0.000307373046875,
      "model_forward_time": 0.11397600173950195,
      "step": 50360
    },
    {
      "epoch": 0.000307373046875,
      "step": 50360,
      "training_step_time": 0.4502561092376709
    },
    {
      "epoch": 0.000307379150390625,
      "model_forward_time": 0.11510133743286133,
      "step": 50361
    },
    {
      "epoch": 0.000307379150390625,
      "step": 50361,
      "training_step_time": 0.49706101417541504
    },
    {
      "epoch": 0.00030738525390625,
      "model_forward_time": 0.1149590015411377,
      "step": 50362
    },
    {
      "epoch": 0.00030738525390625,
      "step": 50362,
      "training_step_time": 0.38770055770874023
    },
    {
      "epoch": 0.000307391357421875,
      "model_forward_time": 0.11464667320251465,
      "step": 50363
    },
    {
      "epoch": 0.000307391357421875,
      "step": 50363,
      "training_step_time": 0.38930821418762207
    },
    {
      "epoch": 0.0003073974609375,
      "model_forward_time": 0.11464953422546387,
      "step": 50364
    },
    {
      "epoch": 0.0003073974609375,
      "step": 50364,
      "training_step_time": 0.43694257736206055
    },
    {
      "epoch": 0.000307403564453125,
      "model_forward_time": 0.11478996276855469,
      "step": 50365
    },
    {
      "epoch": 0.000307403564453125,
      "step": 50365,
      "training_step_time": 0.39850401878356934
    },
    {
      "epoch": 0.00030740966796875,
      "model_forward_time": 0.11507844924926758,
      "step": 50366
    },
    {
      "epoch": 0.00030740966796875,
      "step": 50366,
      "training_step_time": 0.3634805679321289
    },
    {
      "epoch": 0.000307415771484375,
      "model_forward_time": 0.11534786224365234,
      "step": 50367
    },
    {
      "epoch": 0.000307415771484375,
      "step": 50367,
      "training_step_time": 0.5575041770935059
    },
    {
      "epoch": 0.000307421875,
      "model_forward_time": 0.11447572708129883,
      "step": 50368
    },
    {
      "epoch": 0.000307421875,
      "step": 50368,
      "training_step_time": 0.4432487487792969
    },
    {
      "epoch": 0.000307427978515625,
      "model_forward_time": 0.11461639404296875,
      "step": 50369
    },
    {
      "epoch": 0.000307427978515625,
      "step": 50369,
      "training_step_time": 0.46518468856811523
    },
    {
      "epoch": 0.00030743408203125,
      "grad_norm": 0.08996116369962692,
      "learning_rate": 6.878965664945108e-06,
      "loss": 0.0358,
      "step": 50370
    },
    {
      "epoch": 0.00030743408203125,
      "model_forward_time": 0.11492633819580078,
      "step": 50370
    },
    {
      "epoch": 0.00030743408203125,
      "step": 50370,
      "training_step_time": 0.4032011032104492
    },
    {
      "epoch": 0.000307440185546875,
      "model_forward_time": 0.11536026000976562,
      "step": 50371
    },
    {
      "epoch": 0.000307440185546875,
      "step": 50371,
      "training_step_time": 0.39313673973083496
    },
    {
      "epoch": 0.0003074462890625,
      "model_forward_time": 0.11405754089355469,
      "step": 50372
    },
    {
      "epoch": 0.0003074462890625,
      "step": 50372,
      "training_step_time": 0.3988475799560547
    },
    {
      "epoch": 0.000307452392578125,
      "model_forward_time": 0.11451172828674316,
      "step": 50373
    },
    {
      "epoch": 0.000307452392578125,
      "step": 50373,
      "training_step_time": 0.5057108402252197
    },
    {
      "epoch": 0.00030745849609375,
      "model_forward_time": 0.11449122428894043,
      "step": 50374
    },
    {
      "epoch": 0.00030745849609375,
      "step": 50374,
      "training_step_time": 0.40048813819885254
    },
    {
      "epoch": 0.000307464599609375,
      "model_forward_time": 0.1149604320526123,
      "step": 50375
    },
    {
      "epoch": 0.000307464599609375,
      "step": 50375,
      "training_step_time": 0.39981818199157715
    },
    {
      "epoch": 0.000307470703125,
      "model_forward_time": 0.11514735221862793,
      "step": 50376
    },
    {
      "epoch": 0.000307470703125,
      "step": 50376,
      "training_step_time": 0.40119409561157227
    },
    {
      "epoch": 0.000307476806640625,
      "model_forward_time": 0.11453080177307129,
      "step": 50377
    },
    {
      "epoch": 0.000307476806640625,
      "step": 50377,
      "training_step_time": 0.4411125183105469
    },
    {
      "epoch": 0.00030748291015625,
      "model_forward_time": 0.1151268482208252,
      "step": 50378
    },
    {
      "epoch": 0.00030748291015625,
      "step": 50378,
      "training_step_time": 0.39778590202331543
    },
    {
      "epoch": 0.000307489013671875,
      "model_forward_time": 0.11461877822875977,
      "step": 50379
    },
    {
      "epoch": 0.000307489013671875,
      "step": 50379,
      "training_step_time": 0.5732088088989258
    },
    {
      "epoch": 0.0003074951171875,
      "grad_norm": 0.08081954717636108,
      "learning_rate": 6.865022655992798e-06,
      "loss": 0.035,
      "step": 50380
    },
    {
      "epoch": 0.0003074951171875,
      "model_forward_time": 0.1155245304107666,
      "step": 50380
    },
    {
      "epoch": 0.0003074951171875,
      "step": 50380,
      "training_step_time": 0.3651454448699951
    },
    {
      "epoch": 0.000307501220703125,
      "model_forward_time": 0.1151742935180664,
      "step": 50381
    },
    {
      "epoch": 0.000307501220703125,
      "step": 50381,
      "training_step_time": 0.46178460121154785
    },
    {
      "epoch": 0.00030750732421875,
      "model_forward_time": 0.1151125431060791,
      "step": 50382
    },
    {
      "epoch": 0.00030750732421875,
      "step": 50382,
      "training_step_time": 0.4152212142944336
    },
    {
      "epoch": 0.000307513427734375,
      "model_forward_time": 0.1148228645324707,
      "step": 50383
    },
    {
      "epoch": 0.000307513427734375,
      "step": 50383,
      "training_step_time": 0.4431748390197754
    },
    {
      "epoch": 0.00030751953125,
      "model_forward_time": 0.11475968360900879,
      "step": 50384
    },
    {
      "epoch": 0.00030751953125,
      "step": 50384,
      "training_step_time": 0.41179561614990234
    },
    {
      "epoch": 0.000307525634765625,
      "model_forward_time": 0.11552548408508301,
      "step": 50385
    },
    {
      "epoch": 0.000307525634765625,
      "step": 50385,
      "training_step_time": 0.6001889705657959
    },
    {
      "epoch": 0.00030753173828125,
      "model_forward_time": 0.11457419395446777,
      "step": 50386
    },
    {
      "epoch": 0.00030753173828125,
      "step": 50386,
      "training_step_time": 0.47458887100219727
    },
    {
      "epoch": 0.000307537841796875,
      "model_forward_time": 0.11522555351257324,
      "step": 50387
    },
    {
      "epoch": 0.000307537841796875,
      "step": 50387,
      "training_step_time": 0.4362154006958008
    },
    {
      "epoch": 0.0003075439453125,
      "model_forward_time": 0.11423635482788086,
      "step": 50388
    },
    {
      "epoch": 0.0003075439453125,
      "step": 50388,
      "training_step_time": 0.3988368511199951
    },
    {
      "epoch": 0.000307550048828125,
      "model_forward_time": 0.11438202857971191,
      "step": 50389
    },
    {
      "epoch": 0.000307550048828125,
      "step": 50389,
      "training_step_time": 0.3980879783630371
    },
    {
      "epoch": 0.00030755615234375,
      "grad_norm": 0.10791142284870148,
      "learning_rate": 6.851092750307686e-06,
      "loss": 0.0375,
      "step": 50390
    },
    {
      "epoch": 0.00030755615234375,
      "model_forward_time": 0.11453008651733398,
      "step": 50390
    },
    {
      "epoch": 0.00030755615234375,
      "step": 50390,
      "training_step_time": 0.39639711380004883
    },
    {
      "epoch": 0.000307562255859375,
      "model_forward_time": 0.11497974395751953,
      "step": 50391
    },
    {
      "epoch": 0.000307562255859375,
      "step": 50391,
      "training_step_time": 0.5785751342773438
    },
    {
      "epoch": 0.000307568359375,
      "model_forward_time": 0.11608600616455078,
      "step": 50392
    },
    {
      "epoch": 0.000307568359375,
      "step": 50392,
      "training_step_time": 0.3863978385925293
    },
    {
      "epoch": 0.000307574462890625,
      "model_forward_time": 0.11531829833984375,
      "step": 50393
    },
    {
      "epoch": 0.000307574462890625,
      "step": 50393,
      "training_step_time": 0.3993496894836426
    },
    {
      "epoch": 0.00030758056640625,
      "model_forward_time": 0.1160426139831543,
      "step": 50394
    },
    {
      "epoch": 0.00030758056640625,
      "step": 50394,
      "training_step_time": 0.39805006980895996
    },
    {
      "epoch": 0.000307586669921875,
      "model_forward_time": 0.11444258689880371,
      "step": 50395
    },
    {
      "epoch": 0.000307586669921875,
      "step": 50395,
      "training_step_time": 0.399738073348999
    },
    {
      "epoch": 0.0003075927734375,
      "model_forward_time": 0.11496353149414062,
      "step": 50396
    },
    {
      "epoch": 0.0003075927734375,
      "step": 50396,
      "training_step_time": 0.4429969787597656
    },
    {
      "epoch": 0.000307598876953125,
      "model_forward_time": 0.11528253555297852,
      "step": 50397
    },
    {
      "epoch": 0.000307598876953125,
      "step": 50397,
      "training_step_time": 0.6871528625488281
    },
    {
      "epoch": 0.00030760498046875,
      "model_forward_time": 0.11451244354248047,
      "step": 50398
    },
    {
      "epoch": 0.00030760498046875,
      "step": 50398,
      "training_step_time": 0.4045748710632324
    },
    {
      "epoch": 0.000307611083984375,
      "model_forward_time": 0.11462593078613281,
      "step": 50399
    },
    {
      "epoch": 0.000307611083984375,
      "step": 50399,
      "training_step_time": 0.3958292007446289
    },
    {
      "epoch": 0.0003076171875,
      "grad_norm": 0.08568736910820007,
      "learning_rate": 6.837175952121306e-06,
      "loss": 0.031,
      "step": 50400
    },
    {
      "epoch": 0.0003076171875,
      "model_forward_time": 0.11446404457092285,
      "step": 50400
    },
    {
      "epoch": 0.0003076171875,
      "step": 50400,
      "training_step_time": 0.3931255340576172
    },
    {
      "epoch": 0.000307623291015625,
      "model_forward_time": 0.11522054672241211,
      "step": 50401
    },
    {
      "epoch": 0.000307623291015625,
      "step": 50401,
      "training_step_time": 0.4071986675262451
    },
    {
      "epoch": 0.00030762939453125,
      "model_forward_time": 0.11420035362243652,
      "step": 50402
    },
    {
      "epoch": 0.00030762939453125,
      "step": 50402,
      "training_step_time": 0.41236448287963867
    },
    {
      "epoch": 0.000307635498046875,
      "model_forward_time": 0.11504030227661133,
      "step": 50403
    },
    {
      "epoch": 0.000307635498046875,
      "step": 50403,
      "training_step_time": 0.6268751621246338
    },
    {
      "epoch": 0.0003076416015625,
      "model_forward_time": 0.11455202102661133,
      "step": 50404
    },
    {
      "epoch": 0.0003076416015625,
      "step": 50404,
      "training_step_time": 0.41774559020996094
    },
    {
      "epoch": 0.000307647705078125,
      "model_forward_time": 0.1142423152923584,
      "step": 50405
    },
    {
      "epoch": 0.000307647705078125,
      "step": 50405,
      "training_step_time": 0.382265567779541
    },
    {
      "epoch": 0.00030765380859375,
      "model_forward_time": 0.11451601982116699,
      "step": 50406
    },
    {
      "epoch": 0.00030765380859375,
      "step": 50406,
      "training_step_time": 0.39286303520202637
    },
    {
      "epoch": 0.000307659912109375,
      "model_forward_time": 0.11495685577392578,
      "step": 50407
    },
    {
      "epoch": 0.000307659912109375,
      "step": 50407,
      "training_step_time": 0.40322303771972656
    },
    {
      "epoch": 0.000307666015625,
      "model_forward_time": 0.11513209342956543,
      "step": 50408
    },
    {
      "epoch": 0.000307666015625,
      "step": 50408,
      "training_step_time": 0.3977501392364502
    },
    {
      "epoch": 0.000307672119140625,
      "model_forward_time": 0.1151127815246582,
      "step": 50409
    },
    {
      "epoch": 0.000307672119140625,
      "step": 50409,
      "training_step_time": 0.5758571624755859
    },
    {
      "epoch": 0.00030767822265625,
      "grad_norm": 0.09748746454715729,
      "learning_rate": 6.8232722656612405e-06,
      "loss": 0.0369,
      "step": 50410
    },
    {
      "epoch": 0.00030767822265625,
      "model_forward_time": 0.11468005180358887,
      "step": 50410
    },
    {
      "epoch": 0.00030767822265625,
      "step": 50410,
      "training_step_time": 0.4232344627380371
    },
    {
      "epoch": 0.000307684326171875,
      "model_forward_time": 0.1147012710571289,
      "step": 50411
    },
    {
      "epoch": 0.000307684326171875,
      "step": 50411,
      "training_step_time": 0.40898942947387695
    },
    {
      "epoch": 0.0003076904296875,
      "model_forward_time": 0.11447405815124512,
      "step": 50412
    },
    {
      "epoch": 0.0003076904296875,
      "step": 50412,
      "training_step_time": 0.511204719543457
    },
    {
      "epoch": 0.000307696533203125,
      "model_forward_time": 0.11468815803527832,
      "step": 50413
    },
    {
      "epoch": 0.000307696533203125,
      "step": 50413,
      "training_step_time": 0.38980603218078613
    },
    {
      "epoch": 0.00030770263671875,
      "model_forward_time": 0.11501622200012207,
      "step": 50414
    },
    {
      "epoch": 0.00030770263671875,
      "step": 50414,
      "training_step_time": 0.47451305389404297
    },
    {
      "epoch": 0.000307708740234375,
      "model_forward_time": 0.11520552635192871,
      "step": 50415
    },
    {
      "epoch": 0.000307708740234375,
      "step": 50415,
      "training_step_time": 0.5756301879882812
    },
    {
      "epoch": 0.00030771484375,
      "model_forward_time": 0.11438298225402832,
      "step": 50416
    },
    {
      "epoch": 0.00030771484375,
      "step": 50416,
      "training_step_time": 0.3999001979827881
    },
    {
      "epoch": 0.000307720947265625,
      "model_forward_time": 0.1148221492767334,
      "step": 50417
    },
    {
      "epoch": 0.000307720947265625,
      "step": 50417,
      "training_step_time": 0.3971226215362549
    },
    {
      "epoch": 0.00030772705078125,
      "model_forward_time": 0.11420273780822754,
      "step": 50418
    },
    {
      "epoch": 0.00030772705078125,
      "step": 50418,
      "training_step_time": 0.39806485176086426
    },
    {
      "epoch": 0.000307733154296875,
      "model_forward_time": 0.11527800559997559,
      "step": 50419
    },
    {
      "epoch": 0.000307733154296875,
      "step": 50419,
      "training_step_time": 0.39075660705566406
    },
    {
      "epoch": 0.0003077392578125,
      "grad_norm": 0.07765155285596848,
      "learning_rate": 6.809381695151029e-06,
      "loss": 0.0343,
      "step": 50420
    },
    {
      "epoch": 0.0003077392578125,
      "model_forward_time": 0.11442947387695312,
      "step": 50420
    },
    {
      "epoch": 0.0003077392578125,
      "step": 50420,
      "training_step_time": 0.39340734481811523
    },
    {
      "epoch": 0.000307745361328125,
      "model_forward_time": 0.1150522232055664,
      "step": 50421
    },
    {
      "epoch": 0.000307745361328125,
      "step": 50421,
      "training_step_time": 0.4057958126068115
    },
    {
      "epoch": 0.00030775146484375,
      "model_forward_time": 0.11495256423950195,
      "step": 50422
    },
    {
      "epoch": 0.00030775146484375,
      "step": 50422,
      "training_step_time": 0.3646540641784668
    },
    {
      "epoch": 0.000307757568359375,
      "model_forward_time": 0.11588644981384277,
      "step": 50423
    },
    {
      "epoch": 0.000307757568359375,
      "step": 50423,
      "training_step_time": 0.46750497817993164
    },
    {
      "epoch": 0.000307763671875,
      "model_forward_time": 0.1150972843170166,
      "step": 50424
    },
    {
      "epoch": 0.000307763671875,
      "step": 50424,
      "training_step_time": 0.4255404472351074
    },
    {
      "epoch": 0.000307769775390625,
      "model_forward_time": 0.11502647399902344,
      "step": 50425
    },
    {
      "epoch": 0.000307769775390625,
      "step": 50425,
      "training_step_time": 0.3991737365722656
    },
    {
      "epoch": 0.00030777587890625,
      "model_forward_time": 0.11538410186767578,
      "step": 50426
    },
    {
      "epoch": 0.00030777587890625,
      "step": 50426,
      "training_step_time": 0.4758436679840088
    },
    {
      "epoch": 0.000307781982421875,
      "model_forward_time": 0.11445808410644531,
      "step": 50427
    },
    {
      "epoch": 0.000307781982421875,
      "step": 50427,
      "training_step_time": 0.40209102630615234
    },
    {
      "epoch": 0.0003077880859375,
      "model_forward_time": 0.11500883102416992,
      "step": 50428
    },
    {
      "epoch": 0.0003077880859375,
      "step": 50428,
      "training_step_time": 0.49389147758483887
    },
    {
      "epoch": 0.000307794189453125,
      "model_forward_time": 0.11509418487548828,
      "step": 50429
    },
    {
      "epoch": 0.000307794189453125,
      "step": 50429,
      "training_step_time": 0.4497239589691162
    },
    {
      "epoch": 0.00030780029296875,
      "grad_norm": 0.08629300445318222,
      "learning_rate": 6.795504244810285e-06,
      "loss": 0.0325,
      "step": 50430
    },
    {
      "epoch": 0.00030780029296875,
      "model_forward_time": 0.11506438255310059,
      "step": 50430
    },
    {
      "epoch": 0.00030780029296875,
      "step": 50430,
      "training_step_time": 0.40961194038391113
    },
    {
      "epoch": 0.000307806396484375,
      "model_forward_time": 0.11487221717834473,
      "step": 50431
    },
    {
      "epoch": 0.000307806396484375,
      "step": 50431,
      "training_step_time": 0.3936915397644043
    },
    {
      "epoch": 0.0003078125,
      "model_forward_time": 0.11473464965820312,
      "step": 50432
    },
    {
      "epoch": 0.0003078125,
      "step": 50432,
      "training_step_time": 0.3938007354736328
    },
    {
      "epoch": 0.000307818603515625,
      "model_forward_time": 0.11554408073425293,
      "step": 50433
    },
    {
      "epoch": 0.000307818603515625,
      "step": 50433,
      "training_step_time": 0.39937591552734375
    },
    {
      "epoch": 0.00030782470703125,
      "model_forward_time": 0.11462569236755371,
      "step": 50434
    },
    {
      "epoch": 0.00030782470703125,
      "step": 50434,
      "training_step_time": 0.39797401428222656
    },
    {
      "epoch": 0.000307830810546875,
      "model_forward_time": 0.1151125431060791,
      "step": 50435
    },
    {
      "epoch": 0.000307830810546875,
      "step": 50435,
      "training_step_time": 0.4024477005004883
    },
    {
      "epoch": 0.0003078369140625,
      "model_forward_time": 0.11568641662597656,
      "step": 50436
    },
    {
      "epoch": 0.0003078369140625,
      "step": 50436,
      "training_step_time": 0.39906883239746094
    },
    {
      "epoch": 0.000307843017578125,
      "model_forward_time": 0.11479330062866211,
      "step": 50437
    },
    {
      "epoch": 0.000307843017578125,
      "step": 50437,
      "training_step_time": 0.5001306533813477
    },
    {
      "epoch": 0.00030784912109375,
      "model_forward_time": 0.11531925201416016,
      "step": 50438
    },
    {
      "epoch": 0.00030784912109375,
      "step": 50438,
      "training_step_time": 0.5054299831390381
    },
    {
      "epoch": 0.000307855224609375,
      "model_forward_time": 0.11480236053466797,
      "step": 50439
    },
    {
      "epoch": 0.000307855224609375,
      "step": 50439,
      "training_step_time": 0.4209117889404297
    },
    {
      "epoch": 0.000307861328125,
      "grad_norm": 0.09411763399839401,
      "learning_rate": 6.781639918854604e-06,
      "loss": 0.0375,
      "step": 50440
    },
    {
      "epoch": 0.000307861328125,
      "model_forward_time": 0.11488485336303711,
      "step": 50440
    },
    {
      "epoch": 0.000307861328125,
      "step": 50440,
      "training_step_time": 0.43929171562194824
    },
    {
      "epoch": 0.000307867431640625,
      "model_forward_time": 0.11546182632446289,
      "step": 50441
    },
    {
      "epoch": 0.000307867431640625,
      "step": 50441,
      "training_step_time": 0.504915714263916
    },
    {
      "epoch": 0.00030787353515625,
      "model_forward_time": 0.11484193801879883,
      "step": 50442
    },
    {
      "epoch": 0.00030787353515625,
      "step": 50442,
      "training_step_time": 0.43442678451538086
    },
    {
      "epoch": 0.000307879638671875,
      "model_forward_time": 0.11477470397949219,
      "step": 50443
    },
    {
      "epoch": 0.000307879638671875,
      "step": 50443,
      "training_step_time": 0.4200859069824219
    },
    {
      "epoch": 0.0003078857421875,
      "model_forward_time": 0.11447787284851074,
      "step": 50444
    },
    {
      "epoch": 0.0003078857421875,
      "step": 50444,
      "training_step_time": 0.40811872482299805
    },
    {
      "epoch": 0.000307891845703125,
      "model_forward_time": 0.11500811576843262,
      "step": 50445
    },
    {
      "epoch": 0.000307891845703125,
      "step": 50445,
      "training_step_time": 0.39931273460388184
    },
    {
      "epoch": 0.00030789794921875,
      "model_forward_time": 0.11486959457397461,
      "step": 50446
    },
    {
      "epoch": 0.00030789794921875,
      "step": 50446,
      "training_step_time": 0.3914952278137207
    },
    {
      "epoch": 0.000307904052734375,
      "model_forward_time": 0.11565518379211426,
      "step": 50447
    },
    {
      "epoch": 0.000307904052734375,
      "step": 50447,
      "training_step_time": 0.3929440975189209
    },
    {
      "epoch": 0.00030791015625,
      "model_forward_time": 0.11581206321716309,
      "step": 50448
    },
    {
      "epoch": 0.00030791015625,
      "step": 50448,
      "training_step_time": 0.40488529205322266
    },
    {
      "epoch": 0.000307916259765625,
      "model_forward_time": 0.11590743064880371,
      "step": 50449
    },
    {
      "epoch": 0.000307916259765625,
      "step": 50449,
      "training_step_time": 0.3970215320587158
    },
    {
      "epoch": 0.00030792236328125,
      "grad_norm": 0.08675703406333923,
      "learning_rate": 6.7677887214956e-06,
      "loss": 0.0326,
      "step": 50450
    },
    {
      "epoch": 0.00030792236328125,
      "model_forward_time": 0.1151895523071289,
      "step": 50450
    },
    {
      "epoch": 0.00030792236328125,
      "step": 50450,
      "training_step_time": 0.39865946769714355
    },
    {
      "epoch": 0.000307928466796875,
      "model_forward_time": 0.11510324478149414,
      "step": 50451
    },
    {
      "epoch": 0.000307928466796875,
      "step": 50451,
      "training_step_time": 0.45571303367614746
    },
    {
      "epoch": 0.0003079345703125,
      "model_forward_time": 0.1157371997833252,
      "step": 50452
    },
    {
      "epoch": 0.0003079345703125,
      "step": 50452,
      "training_step_time": 0.5124728679656982
    },
    {
      "epoch": 0.000307940673828125,
      "model_forward_time": 0.11516594886779785,
      "step": 50453
    },
    {
      "epoch": 0.000307940673828125,
      "step": 50453,
      "training_step_time": 0.4279971122741699
    },
    {
      "epoch": 0.00030794677734375,
      "model_forward_time": 0.11525177955627441,
      "step": 50454
    },
    {
      "epoch": 0.00030794677734375,
      "step": 50454,
      "training_step_time": 0.39728832244873047
    },
    {
      "epoch": 0.000307952880859375,
      "model_forward_time": 0.11490249633789062,
      "step": 50455
    },
    {
      "epoch": 0.000307952880859375,
      "step": 50455,
      "training_step_time": 0.47810888290405273
    },
    {
      "epoch": 0.000307958984375,
      "model_forward_time": 0.1151576042175293,
      "step": 50456
    },
    {
      "epoch": 0.000307958984375,
      "step": 50456,
      "training_step_time": 0.481259822845459
    },
    {
      "epoch": 0.000307965087890625,
      "model_forward_time": 0.11453008651733398,
      "step": 50457
    },
    {
      "epoch": 0.000307965087890625,
      "step": 50457,
      "training_step_time": 0.4810473918914795
    },
    {
      "epoch": 0.00030797119140625,
      "model_forward_time": 0.11527848243713379,
      "step": 50458
    },
    {
      "epoch": 0.00030797119140625,
      "step": 50458,
      "training_step_time": 0.4438140392303467
    },
    {
      "epoch": 0.000307977294921875,
      "model_forward_time": 0.11460566520690918,
      "step": 50459
    },
    {
      "epoch": 0.000307977294921875,
      "step": 50459,
      "training_step_time": 0.3912198543548584
    },
    {
      "epoch": 0.0003079833984375,
      "grad_norm": 0.0676722377538681,
      "learning_rate": 6.753950656940905e-06,
      "loss": 0.0363,
      "step": 50460
    },
    {
      "epoch": 0.0003079833984375,
      "model_forward_time": 0.11402130126953125,
      "step": 50460
    },
    {
      "epoch": 0.0003079833984375,
      "step": 50460,
      "training_step_time": 0.3985707759857178
    },
    {
      "epoch": 0.000307989501953125,
      "model_forward_time": 0.1153876781463623,
      "step": 50461
    },
    {
      "epoch": 0.000307989501953125,
      "step": 50461,
      "training_step_time": 0.3872253894805908
    },
    {
      "epoch": 0.00030799560546875,
      "model_forward_time": 0.1151723861694336,
      "step": 50462
    },
    {
      "epoch": 0.00030799560546875,
      "step": 50462,
      "training_step_time": 0.3875908851623535
    },
    {
      "epoch": 0.000308001708984375,
      "model_forward_time": 0.11454033851623535,
      "step": 50463
    },
    {
      "epoch": 0.000308001708984375,
      "step": 50463,
      "training_step_time": 0.3933582305908203
    },
    {
      "epoch": 0.0003080078125,
      "model_forward_time": 0.11479520797729492,
      "step": 50464
    },
    {
      "epoch": 0.0003080078125,
      "step": 50464,
      "training_step_time": 0.5406167507171631
    },
    {
      "epoch": 0.000308013916015625,
      "model_forward_time": 0.11455321311950684,
      "step": 50465
    },
    {
      "epoch": 0.000308013916015625,
      "step": 50465,
      "training_step_time": 0.38759684562683105
    },
    {
      "epoch": 0.00030802001953125,
      "model_forward_time": 0.11440515518188477,
      "step": 50466
    },
    {
      "epoch": 0.00030802001953125,
      "step": 50466,
      "training_step_time": 0.39910221099853516
    },
    {
      "epoch": 0.000308026123046875,
      "model_forward_time": 0.11514997482299805,
      "step": 50467
    },
    {
      "epoch": 0.000308026123046875,
      "step": 50467,
      "training_step_time": 0.4401888847351074
    },
    {
      "epoch": 0.0003080322265625,
      "model_forward_time": 0.11509108543395996,
      "step": 50468
    },
    {
      "epoch": 0.0003080322265625,
      "step": 50468,
      "training_step_time": 0.45534539222717285
    },
    {
      "epoch": 0.000308038330078125,
      "model_forward_time": 0.11497855186462402,
      "step": 50469
    },
    {
      "epoch": 0.000308038330078125,
      "step": 50469,
      "training_step_time": 0.42476916313171387
    },
    {
      "epoch": 0.00030804443359375,
      "grad_norm": 0.0876101702451706,
      "learning_rate": 6.740125729394148e-06,
      "loss": 0.0359,
      "step": 50470
    },
    {
      "epoch": 0.00030804443359375,
      "model_forward_time": 0.11544919013977051,
      "step": 50470
    },
    {
      "epoch": 0.00030804443359375,
      "step": 50470,
      "training_step_time": 0.5346572399139404
    },
    {
      "epoch": 0.000308050537109375,
      "model_forward_time": 0.11437249183654785,
      "step": 50471
    },
    {
      "epoch": 0.000308050537109375,
      "step": 50471,
      "training_step_time": 0.4153785705566406
    },
    {
      "epoch": 0.000308056640625,
      "model_forward_time": 0.11489272117614746,
      "step": 50472
    },
    {
      "epoch": 0.000308056640625,
      "step": 50472,
      "training_step_time": 0.42926597595214844
    },
    {
      "epoch": 0.000308062744140625,
      "model_forward_time": 0.11502385139465332,
      "step": 50473
    },
    {
      "epoch": 0.000308062744140625,
      "step": 50473,
      "training_step_time": 0.4010956287384033
    },
    {
      "epoch": 0.00030806884765625,
      "model_forward_time": 0.11469221115112305,
      "step": 50474
    },
    {
      "epoch": 0.00030806884765625,
      "step": 50474,
      "training_step_time": 0.39470744132995605
    },
    {
      "epoch": 0.000308074951171875,
      "model_forward_time": 0.11514663696289062,
      "step": 50475
    },
    {
      "epoch": 0.000308074951171875,
      "step": 50475,
      "training_step_time": 0.39746594429016113
    },
    {
      "epoch": 0.0003080810546875,
      "model_forward_time": 0.11426496505737305,
      "step": 50476
    },
    {
      "epoch": 0.0003080810546875,
      "step": 50476,
      "training_step_time": 0.39588212966918945
    },
    {
      "epoch": 0.000308087158203125,
      "model_forward_time": 0.11500144004821777,
      "step": 50477
    },
    {
      "epoch": 0.000308087158203125,
      "step": 50477,
      "training_step_time": 0.39691638946533203
    },
    {
      "epoch": 0.00030809326171875,
      "model_forward_time": 0.11469459533691406,
      "step": 50478
    },
    {
      "epoch": 0.00030809326171875,
      "step": 50478,
      "training_step_time": 0.40288662910461426
    },
    {
      "epoch": 0.000308099365234375,
      "model_forward_time": 0.11515116691589355,
      "step": 50479
    },
    {
      "epoch": 0.000308099365234375,
      "step": 50479,
      "training_step_time": 0.40294337272644043
    },
    {
      "epoch": 0.00030810546875,
      "grad_norm": 0.06022714823484421,
      "learning_rate": 6.726313943054991e-06,
      "loss": 0.0325,
      "step": 50480
    },
    {
      "epoch": 0.00030810546875,
      "model_forward_time": 0.11481738090515137,
      "step": 50480
    },
    {
      "epoch": 0.00030810546875,
      "step": 50480,
      "training_step_time": 0.36791467666625977
    },
    {
      "epoch": 0.000308111572265625,
      "model_forward_time": 0.11566162109375,
      "step": 50481
    },
    {
      "epoch": 0.000308111572265625,
      "step": 50481,
      "training_step_time": 0.4543125629425049
    },
    {
      "epoch": 0.00030811767578125,
      "model_forward_time": 0.11519289016723633,
      "step": 50482
    },
    {
      "epoch": 0.00030811767578125,
      "step": 50482,
      "training_step_time": 0.5042309761047363
    },
    {
      "epoch": 0.000308123779296875,
      "model_forward_time": 0.11533665657043457,
      "step": 50483
    },
    {
      "epoch": 0.000308123779296875,
      "step": 50483,
      "training_step_time": 0.4501152038574219
    },
    {
      "epoch": 0.0003081298828125,
      "model_forward_time": 0.11592912673950195,
      "step": 50484
    },
    {
      "epoch": 0.0003081298828125,
      "step": 50484,
      "training_step_time": 0.4961075782775879
    },
    {
      "epoch": 0.000308135986328125,
      "model_forward_time": 0.11492681503295898,
      "step": 50485
    },
    {
      "epoch": 0.000308135986328125,
      "step": 50485,
      "training_step_time": 0.4964919090270996
    },
    {
      "epoch": 0.00030814208984375,
      "model_forward_time": 0.1141667366027832,
      "step": 50486
    },
    {
      "epoch": 0.00030814208984375,
      "step": 50486,
      "training_step_time": 0.44148802757263184
    },
    {
      "epoch": 0.000308148193359375,
      "model_forward_time": 0.11606597900390625,
      "step": 50487
    },
    {
      "epoch": 0.000308148193359375,
      "step": 50487,
      "training_step_time": 0.39940547943115234
    },
    {
      "epoch": 0.000308154296875,
      "model_forward_time": 0.11460995674133301,
      "step": 50488
    },
    {
      "epoch": 0.000308154296875,
      "step": 50488,
      "training_step_time": 0.39227890968322754
    },
    {
      "epoch": 0.000308160400390625,
      "model_forward_time": 0.11514520645141602,
      "step": 50489
    },
    {
      "epoch": 0.000308160400390625,
      "step": 50489,
      "training_step_time": 0.40325140953063965
    },
    {
      "epoch": 0.00030816650390625,
      "grad_norm": 0.11840686947107315,
      "learning_rate": 6.712515302119077e-06,
      "loss": 0.0397,
      "step": 50490
    },
    {
      "epoch": 0.00030816650390625,
      "model_forward_time": 0.11463212966918945,
      "step": 50490
    },
    {
      "epoch": 0.00030816650390625,
      "step": 50490,
      "training_step_time": 0.3988337516784668
    },
    {
      "epoch": 0.000308172607421875,
      "model_forward_time": 0.11518311500549316,
      "step": 50491
    },
    {
      "epoch": 0.000308172607421875,
      "step": 50491,
      "training_step_time": 0.40489649772644043
    },
    {
      "epoch": 0.0003081787109375,
      "model_forward_time": 0.11494088172912598,
      "step": 50492
    },
    {
      "epoch": 0.0003081787109375,
      "step": 50492,
      "training_step_time": 0.393657922744751
    },
    {
      "epoch": 0.000308184814453125,
      "model_forward_time": 0.1146993637084961,
      "step": 50493
    },
    {
      "epoch": 0.000308184814453125,
      "step": 50493,
      "training_step_time": 0.4004557132720947
    },
    {
      "epoch": 0.00030819091796875,
      "model_forward_time": 0.11516833305358887,
      "step": 50494
    },
    {
      "epoch": 0.00030819091796875,
      "step": 50494,
      "training_step_time": 0.41195082664489746
    },
    {
      "epoch": 0.000308197021484375,
      "model_forward_time": 0.11529731750488281,
      "step": 50495
    },
    {
      "epoch": 0.000308197021484375,
      "step": 50495,
      "training_step_time": 0.473935604095459
    },
    {
      "epoch": 0.000308203125,
      "model_forward_time": 0.11522340774536133,
      "step": 50496
    },
    {
      "epoch": 0.000308203125,
      "step": 50496,
      "training_step_time": 0.4811079502105713
    },
    {
      "epoch": 0.000308209228515625,
      "model_forward_time": 0.11614322662353516,
      "step": 50497
    },
    {
      "epoch": 0.000308209228515625,
      "step": 50497,
      "training_step_time": 0.4342324733734131
    },
    {
      "epoch": 0.00030821533203125,
      "model_forward_time": 0.11497902870178223,
      "step": 50498
    },
    {
      "epoch": 0.00030821533203125,
      "step": 50498,
      "training_step_time": 0.4564681053161621
    },
    {
      "epoch": 0.000308221435546875,
      "model_forward_time": 0.11500048637390137,
      "step": 50499
    },
    {
      "epoch": 0.000308221435546875,
      "step": 50499,
      "training_step_time": 0.3899674415588379
    },
    {
      "epoch": 0.0003082275390625,
      "grad_norm": 0.08964019268751144,
      "learning_rate": 6.698729810778065e-06,
      "loss": 0.0325,
      "step": 50500
    },
    {
      "epoch": 0.0003082275390625,
      "model_forward_time": 0.11423134803771973,
      "step": 50500
    },
    {
      "epoch": 0.0003082275390625,
      "step": 50500,
      "training_step_time": 0.4745655059814453
    },
    {
      "epoch": 0.000308233642578125,
      "model_forward_time": 0.11463379859924316,
      "step": 50501
    },
    {
      "epoch": 0.000308233642578125,
      "step": 50501,
      "training_step_time": 0.3935554027557373
    },
    {
      "epoch": 0.00030823974609375,
      "model_forward_time": 0.11479926109313965,
      "step": 50502
    },
    {
      "epoch": 0.00030823974609375,
      "step": 50502,
      "training_step_time": 0.48996901512145996
    },
    {
      "epoch": 0.000308245849609375,
      "model_forward_time": 0.11520147323608398,
      "step": 50503
    },
    {
      "epoch": 0.000308245849609375,
      "step": 50503,
      "training_step_time": 0.39010190963745117
    },
    {
      "epoch": 0.000308251953125,
      "model_forward_time": 0.11534667015075684,
      "step": 50504
    },
    {
      "epoch": 0.000308251953125,
      "step": 50504,
      "training_step_time": 0.39676880836486816
    },
    {
      "epoch": 0.000308258056640625,
      "model_forward_time": 0.11516141891479492,
      "step": 50505
    },
    {
      "epoch": 0.000308258056640625,
      "step": 50505,
      "training_step_time": 0.3977515697479248
    },
    {
      "epoch": 0.00030826416015625,
      "model_forward_time": 0.11507129669189453,
      "step": 50506
    },
    {
      "epoch": 0.00030826416015625,
      "step": 50506,
      "training_step_time": 0.398101806640625
    },
    {
      "epoch": 0.000308270263671875,
      "model_forward_time": 0.11518740653991699,
      "step": 50507
    },
    {
      "epoch": 0.000308270263671875,
      "step": 50507,
      "training_step_time": 0.40160465240478516
    },
    {
      "epoch": 0.0003082763671875,
      "model_forward_time": 0.11513876914978027,
      "step": 50508
    },
    {
      "epoch": 0.0003082763671875,
      "step": 50508,
      "training_step_time": 0.39470767974853516
    },
    {
      "epoch": 0.000308282470703125,
      "model_forward_time": 0.11582040786743164,
      "step": 50509
    },
    {
      "epoch": 0.000308282470703125,
      "step": 50509,
      "training_step_time": 0.46944308280944824
    },
    {
      "epoch": 0.00030828857421875,
      "grad_norm": 0.07135941088199615,
      "learning_rate": 6.684957473219644e-06,
      "loss": 0.0341,
      "step": 50510
    },
    {
      "epoch": 0.00030828857421875,
      "model_forward_time": 0.11551570892333984,
      "step": 50510
    },
    {
      "epoch": 0.00030828857421875,
      "step": 50510,
      "training_step_time": 0.4978957176208496
    },
    {
      "epoch": 0.000308294677734375,
      "model_forward_time": 0.11662673950195312,
      "step": 50511
    },
    {
      "epoch": 0.000308294677734375,
      "step": 50511,
      "training_step_time": 0.4463822841644287
    },
    {
      "epoch": 0.00030830078125,
      "model_forward_time": 0.11522603034973145,
      "step": 50512
    },
    {
      "epoch": 0.00030830078125,
      "step": 50512,
      "training_step_time": 0.40149569511413574
    },
    {
      "epoch": 0.000308306884765625,
      "model_forward_time": 0.1157066822052002,
      "step": 50513
    },
    {
      "epoch": 0.000308306884765625,
      "step": 50513,
      "training_step_time": 0.4613800048828125
    },
    {
      "epoch": 0.00030831298828125,
      "model_forward_time": 0.11587977409362793,
      "step": 50514
    },
    {
      "epoch": 0.00030831298828125,
      "step": 50514,
      "training_step_time": 0.4312591552734375
    },
    {
      "epoch": 0.000308319091796875,
      "model_forward_time": 0.1152186393737793,
      "step": 50515
    },
    {
      "epoch": 0.000308319091796875,
      "step": 50515,
      "training_step_time": 0.4102785587310791
    },
    {
      "epoch": 0.0003083251953125,
      "model_forward_time": 0.11565637588500977,
      "step": 50516
    },
    {
      "epoch": 0.0003083251953125,
      "step": 50516,
      "training_step_time": 0.45149660110473633
    },
    {
      "epoch": 0.000308331298828125,
      "model_forward_time": 0.11539793014526367,
      "step": 50517
    },
    {
      "epoch": 0.000308331298828125,
      "step": 50517,
      "training_step_time": 0.3881821632385254
    },
    {
      "epoch": 0.00030833740234375,
      "model_forward_time": 0.11563944816589355,
      "step": 50518
    },
    {
      "epoch": 0.00030833740234375,
      "step": 50518,
      "training_step_time": 0.39327573776245117
    },
    {
      "epoch": 0.000308343505859375,
      "model_forward_time": 0.11436343193054199,
      "step": 50519
    },
    {
      "epoch": 0.000308343505859375,
      "step": 50519,
      "training_step_time": 0.3888270854949951
    },
    {
      "epoch": 0.000308349609375,
      "grad_norm": 0.07298555225133896,
      "learning_rate": 6.671198293627479e-06,
      "loss": 0.0338,
      "step": 50520
    },
    {
      "epoch": 0.000308349609375,
      "model_forward_time": 0.11506485939025879,
      "step": 50520
    },
    {
      "epoch": 0.000308349609375,
      "step": 50520,
      "training_step_time": 0.40078234672546387
    },
    {
      "epoch": 0.000308355712890625,
      "model_forward_time": 0.11497759819030762,
      "step": 50521
    },
    {
      "epoch": 0.000308355712890625,
      "step": 50521,
      "training_step_time": 0.3976171016693115
    },
    {
      "epoch": 0.00030836181640625,
      "model_forward_time": 0.1150825023651123,
      "step": 50522
    },
    {
      "epoch": 0.00030836181640625,
      "step": 50522,
      "training_step_time": 0.38542890548706055
    },
    {
      "epoch": 0.000308367919921875,
      "model_forward_time": 0.11565828323364258,
      "step": 50523
    },
    {
      "epoch": 0.000308367919921875,
      "step": 50523,
      "training_step_time": 0.3968627452850342
    },
    {
      "epoch": 0.0003083740234375,
      "model_forward_time": 0.11569428443908691,
      "step": 50524
    },
    {
      "epoch": 0.0003083740234375,
      "step": 50524,
      "training_step_time": 0.5035688877105713
    },
    {
      "epoch": 0.000308380126953125,
      "model_forward_time": 0.11589741706848145,
      "step": 50525
    },
    {
      "epoch": 0.000308380126953125,
      "step": 50525,
      "training_step_time": 0.5081503391265869
    },
    {
      "epoch": 0.00030838623046875,
      "model_forward_time": 0.11513614654541016,
      "step": 50526
    },
    {
      "epoch": 0.00030838623046875,
      "step": 50526,
      "training_step_time": 0.39498400688171387
    },
    {
      "epoch": 0.000308392333984375,
      "model_forward_time": 0.1155705451965332,
      "step": 50527
    },
    {
      "epoch": 0.000308392333984375,
      "step": 50527,
      "training_step_time": 0.4772450923919678
    },
    {
      "epoch": 0.0003083984375,
      "model_forward_time": 0.1145017147064209,
      "step": 50528
    },
    {
      "epoch": 0.0003083984375,
      "step": 50528,
      "training_step_time": 0.39013051986694336
    },
    {
      "epoch": 0.000308404541015625,
      "model_forward_time": 0.11502861976623535,
      "step": 50529
    },
    {
      "epoch": 0.000308404541015625,
      "step": 50529,
      "training_step_time": 0.39287614822387695
    },
    {
      "epoch": 0.00030841064453125,
      "grad_norm": 0.11181247979402542,
      "learning_rate": 6.6574522761812366e-06,
      "loss": 0.0375,
      "step": 50530
    },
    {
      "epoch": 0.00030841064453125,
      "model_forward_time": 0.11480259895324707,
      "step": 50530
    },
    {
      "epoch": 0.00030841064453125,
      "step": 50530,
      "training_step_time": 0.4927196502685547
    },
    {
      "epoch": 0.000308416748046875,
      "model_forward_time": 0.11477231979370117,
      "step": 50531
    },
    {
      "epoch": 0.000308416748046875,
      "step": 50531,
      "training_step_time": 0.40322375297546387
    },
    {
      "epoch": 0.0003084228515625,
      "model_forward_time": 0.11528420448303223,
      "step": 50532
    },
    {
      "epoch": 0.0003084228515625,
      "step": 50532,
      "training_step_time": 0.39495134353637695
    },
    {
      "epoch": 0.000308428955078125,
      "model_forward_time": 0.11500763893127441,
      "step": 50533
    },
    {
      "epoch": 0.000308428955078125,
      "step": 50533,
      "training_step_time": 0.39492130279541016
    },
    {
      "epoch": 0.00030843505859375,
      "model_forward_time": 0.11481142044067383,
      "step": 50534
    },
    {
      "epoch": 0.00030843505859375,
      "step": 50534,
      "training_step_time": 0.39350318908691406
    },
    {
      "epoch": 0.000308441162109375,
      "model_forward_time": 0.11553096771240234,
      "step": 50535
    },
    {
      "epoch": 0.000308441162109375,
      "step": 50535,
      "training_step_time": 0.39049863815307617
    },
    {
      "epoch": 0.000308447265625,
      "model_forward_time": 0.11545038223266602,
      "step": 50536
    },
    {
      "epoch": 0.000308447265625,
      "step": 50536,
      "training_step_time": 0.7835144996643066
    },
    {
      "epoch": 0.000308453369140625,
      "model_forward_time": 0.1139366626739502,
      "step": 50537
    },
    {
      "epoch": 0.000308453369140625,
      "step": 50537,
      "training_step_time": 0.3640553951263428
    },
    {
      "epoch": 0.00030845947265625,
      "model_forward_time": 0.11481595039367676,
      "step": 50538
    },
    {
      "epoch": 0.00030845947265625,
      "step": 50538,
      "training_step_time": 0.43057870864868164
    },
    {
      "epoch": 0.000308465576171875,
      "model_forward_time": 0.11510992050170898,
      "step": 50539
    },
    {
      "epoch": 0.000308465576171875,
      "step": 50539,
      "training_step_time": 0.40439796447753906
    },
    {
      "epoch": 0.0003084716796875,
      "grad_norm": 0.09297414869070053,
      "learning_rate": 6.6437194250566e-06,
      "loss": 0.0347,
      "step": 50540
    },
    {
      "epoch": 0.0003084716796875,
      "model_forward_time": 0.11474132537841797,
      "step": 50540
    },
    {
      "epoch": 0.0003084716796875,
      "step": 50540,
      "training_step_time": 0.45721888542175293
    },
    {
      "epoch": 0.000308477783203125,
      "model_forward_time": 0.11416006088256836,
      "step": 50541
    },
    {
      "epoch": 0.000308477783203125,
      "step": 50541,
      "training_step_time": 0.4203147888183594
    },
    {
      "epoch": 0.00030848388671875,
      "model_forward_time": 0.1143038272857666,
      "step": 50542
    },
    {
      "epoch": 0.00030848388671875,
      "step": 50542,
      "training_step_time": 0.4888489246368408
    },
    {
      "epoch": 0.000308489990234375,
      "model_forward_time": 0.11557769775390625,
      "step": 50543
    },
    {
      "epoch": 0.000308489990234375,
      "step": 50543,
      "training_step_time": 0.4727015495300293
    },
    {
      "epoch": 0.00030849609375,
      "model_forward_time": 0.11491537094116211,
      "step": 50544
    },
    {
      "epoch": 0.00030849609375,
      "step": 50544,
      "training_step_time": 0.4151425361633301
    },
    {
      "epoch": 0.000308502197265625,
      "model_forward_time": 0.11515069007873535,
      "step": 50545
    },
    {
      "epoch": 0.000308502197265625,
      "step": 50545,
      "training_step_time": 0.3917579650878906
    },
    {
      "epoch": 0.00030850830078125,
      "model_forward_time": 0.11455416679382324,
      "step": 50546
    },
    {
      "epoch": 0.00030850830078125,
      "step": 50546,
      "training_step_time": 0.39255785942077637
    },
    {
      "epoch": 0.000308514404296875,
      "model_forward_time": 0.11436867713928223,
      "step": 50547
    },
    {
      "epoch": 0.000308514404296875,
      "step": 50547,
      "training_step_time": 0.39778876304626465
    },
    {
      "epoch": 0.0003085205078125,
      "model_forward_time": 0.11428618431091309,
      "step": 50548
    },
    {
      "epoch": 0.0003085205078125,
      "step": 50548,
      "training_step_time": 0.5737178325653076
    },
    {
      "epoch": 0.000308526611328125,
      "model_forward_time": 0.11482787132263184,
      "step": 50549
    },
    {
      "epoch": 0.000308526611328125,
      "step": 50549,
      "training_step_time": 0.39518046379089355
    },
    {
      "epoch": 0.00030853271484375,
      "grad_norm": 0.07314002513885498,
      "learning_rate": 6.629999744425236e-06,
      "loss": 0.0288,
      "step": 50550
    },
    {
      "epoch": 0.00030853271484375,
      "model_forward_time": 0.11645245552062988,
      "step": 50550
    },
    {
      "epoch": 0.00030853271484375,
      "step": 50550,
      "training_step_time": 0.4070608615875244
    },
    {
      "epoch": 0.000308538818359375,
      "model_forward_time": 0.11466145515441895,
      "step": 50551
    },
    {
      "epoch": 0.000308538818359375,
      "step": 50551,
      "training_step_time": 0.362351655960083
    },
    {
      "epoch": 0.000308544921875,
      "model_forward_time": 0.11517453193664551,
      "step": 50552
    },
    {
      "epoch": 0.000308544921875,
      "step": 50552,
      "training_step_time": 0.4547746181488037
    },
    {
      "epoch": 0.000308551025390625,
      "model_forward_time": 0.11467289924621582,
      "step": 50553
    },
    {
      "epoch": 0.000308551025390625,
      "step": 50553,
      "training_step_time": 0.40174269676208496
    },
    {
      "epoch": 0.00030855712890625,
      "model_forward_time": 0.11533999443054199,
      "step": 50554
    },
    {
      "epoch": 0.00030855712890625,
      "step": 50554,
      "training_step_time": 0.5584824085235596
    },
    {
      "epoch": 0.000308563232421875,
      "model_forward_time": 0.11438226699829102,
      "step": 50555
    },
    {
      "epoch": 0.000308563232421875,
      "step": 50555,
      "training_step_time": 0.4033396244049072
    },
    {
      "epoch": 0.0003085693359375,
      "model_forward_time": 0.11487174034118652,
      "step": 50556
    },
    {
      "epoch": 0.0003085693359375,
      "step": 50556,
      "training_step_time": 0.4314427375793457
    },
    {
      "epoch": 0.000308575439453125,
      "model_forward_time": 0.11472606658935547,
      "step": 50557
    },
    {
      "epoch": 0.000308575439453125,
      "step": 50557,
      "training_step_time": 0.39136600494384766
    },
    {
      "epoch": 0.00030858154296875,
      "model_forward_time": 0.11457681655883789,
      "step": 50558
    },
    {
      "epoch": 0.00030858154296875,
      "step": 50558,
      "training_step_time": 0.4475569725036621
    },
    {
      "epoch": 0.000308587646484375,
      "model_forward_time": 0.11519241333007812,
      "step": 50559
    },
    {
      "epoch": 0.000308587646484375,
      "step": 50559,
      "training_step_time": 0.41507887840270996
    },
    {
      "epoch": 0.00030859375,
      "grad_norm": 0.08296061307191849,
      "learning_rate": 6.6162932384548515e-06,
      "loss": 0.0339,
      "step": 50560
    },
    {
      "epoch": 0.00030859375,
      "model_forward_time": 0.11502504348754883,
      "step": 50560
    },
    {
      "epoch": 0.00030859375,
      "step": 50560,
      "training_step_time": 0.6318352222442627
    },
    {
      "epoch": 0.000308599853515625,
      "model_forward_time": 0.11422181129455566,
      "step": 50561
    },
    {
      "epoch": 0.000308599853515625,
      "step": 50561,
      "training_step_time": 0.3860461711883545
    },
    {
      "epoch": 0.00030860595703125,
      "model_forward_time": 0.1146090030670166,
      "step": 50562
    },
    {
      "epoch": 0.00030860595703125,
      "step": 50562,
      "training_step_time": 0.39063549041748047
    },
    {
      "epoch": 0.000308612060546875,
      "model_forward_time": 0.11496210098266602,
      "step": 50563
    },
    {
      "epoch": 0.000308612060546875,
      "step": 50563,
      "training_step_time": 0.39557623863220215
    },
    {
      "epoch": 0.0003086181640625,
      "model_forward_time": 0.11451077461242676,
      "step": 50564
    },
    {
      "epoch": 0.0003086181640625,
      "step": 50564,
      "training_step_time": 0.40641140937805176
    },
    {
      "epoch": 0.000308624267578125,
      "model_forward_time": 0.11489987373352051,
      "step": 50565
    },
    {
      "epoch": 0.000308624267578125,
      "step": 50565,
      "training_step_time": 0.42260146141052246
    },
    {
      "epoch": 0.00030863037109375,
      "model_forward_time": 0.11495018005371094,
      "step": 50566
    },
    {
      "epoch": 0.00030863037109375,
      "step": 50566,
      "training_step_time": 0.5717971324920654
    },
    {
      "epoch": 0.000308636474609375,
      "model_forward_time": 0.11514401435852051,
      "step": 50567
    },
    {
      "epoch": 0.000308636474609375,
      "step": 50567,
      "training_step_time": 0.4196586608886719
    },
    {
      "epoch": 0.000308642578125,
      "model_forward_time": 0.11544227600097656,
      "step": 50568
    },
    {
      "epoch": 0.000308642578125,
      "step": 50568,
      "training_step_time": 0.4304037094116211
    },
    {
      "epoch": 0.000308648681640625,
      "model_forward_time": 0.11482048034667969,
      "step": 50569
    },
    {
      "epoch": 0.000308648681640625,
      "step": 50569,
      "training_step_time": 0.44579315185546875
    },
    {
      "epoch": 0.00030865478515625,
      "grad_norm": 0.07834534347057343,
      "learning_rate": 6.602599911309082e-06,
      "loss": 0.0345,
      "step": 50570
    },
    {
      "epoch": 0.00030865478515625,
      "model_forward_time": 0.11461544036865234,
      "step": 50570
    },
    {
      "epoch": 0.00030865478515625,
      "step": 50570,
      "training_step_time": 0.4794199466705322
    },
    {
      "epoch": 0.000308660888671875,
      "model_forward_time": 0.11428403854370117,
      "step": 50571
    },
    {
      "epoch": 0.000308660888671875,
      "step": 50571,
      "training_step_time": 0.42854857444763184
    },
    {
      "epoch": 0.0003086669921875,
      "model_forward_time": 0.11527895927429199,
      "step": 50572
    },
    {
      "epoch": 0.0003086669921875,
      "step": 50572,
      "training_step_time": 0.44103550910949707
    },
    {
      "epoch": 0.000308673095703125,
      "model_forward_time": 0.11521673202514648,
      "step": 50573
    },
    {
      "epoch": 0.000308673095703125,
      "step": 50573,
      "training_step_time": 0.3899972438812256
    },
    {
      "epoch": 0.00030867919921875,
      "model_forward_time": 0.11532258987426758,
      "step": 50574
    },
    {
      "epoch": 0.00030867919921875,
      "step": 50574,
      "training_step_time": 0.4017651081085205
    },
    {
      "epoch": 0.000308685302734375,
      "model_forward_time": 0.11470580101013184,
      "step": 50575
    },
    {
      "epoch": 0.000308685302734375,
      "step": 50575,
      "training_step_time": 0.4159226417541504
    },
    {
      "epoch": 0.00030869140625,
      "model_forward_time": 0.11465907096862793,
      "step": 50576
    },
    {
      "epoch": 0.00030869140625,
      "step": 50576,
      "training_step_time": 0.3988966941833496
    },
    {
      "epoch": 0.000308697509765625,
      "model_forward_time": 0.11492729187011719,
      "step": 50577
    },
    {
      "epoch": 0.000308697509765625,
      "step": 50577,
      "training_step_time": 0.4183359146118164
    },
    {
      "epoch": 0.00030870361328125,
      "model_forward_time": 0.11504602432250977,
      "step": 50578
    },
    {
      "epoch": 0.00030870361328125,
      "step": 50578,
      "training_step_time": 0.6028242111206055
    },
    {
      "epoch": 0.000308709716796875,
      "model_forward_time": 0.11614608764648438,
      "step": 50579
    },
    {
      "epoch": 0.000308709716796875,
      "step": 50579,
      "training_step_time": 0.40217065811157227
    },
    {
      "epoch": 0.0003087158203125,
      "grad_norm": 0.06823599338531494,
      "learning_rate": 6.588919767147639e-06,
      "loss": 0.0362,
      "step": 50580
    },
    {
      "epoch": 0.0003087158203125,
      "model_forward_time": 0.1147608757019043,
      "step": 50580
    },
    {
      "epoch": 0.0003087158203125,
      "step": 50580,
      "training_step_time": 0.4054539203643799
    },
    {
      "epoch": 0.000308721923828125,
      "model_forward_time": 0.11568689346313477,
      "step": 50581
    },
    {
      "epoch": 0.000308721923828125,
      "step": 50581,
      "training_step_time": 0.43356919288635254
    },
    {
      "epoch": 0.00030872802734375,
      "model_forward_time": 0.11561107635498047,
      "step": 50582
    },
    {
      "epoch": 0.00030872802734375,
      "step": 50582,
      "training_step_time": 0.49988245964050293
    },
    {
      "epoch": 0.000308734130859375,
      "model_forward_time": 0.11507511138916016,
      "step": 50583
    },
    {
      "epoch": 0.000308734130859375,
      "step": 50583,
      "training_step_time": 0.43694257736206055
    },
    {
      "epoch": 0.000308740234375,
      "model_forward_time": 0.11530208587646484,
      "step": 50584
    },
    {
      "epoch": 0.000308740234375,
      "step": 50584,
      "training_step_time": 0.5804173946380615
    },
    {
      "epoch": 0.000308746337890625,
      "model_forward_time": 0.11501932144165039,
      "step": 50585
    },
    {
      "epoch": 0.000308746337890625,
      "step": 50585,
      "training_step_time": 0.4290943145751953
    },
    {
      "epoch": 0.00030875244140625,
      "model_forward_time": 0.11528658866882324,
      "step": 50586
    },
    {
      "epoch": 0.00030875244140625,
      "step": 50586,
      "training_step_time": 0.46935510635375977
    },
    {
      "epoch": 0.000308758544921875,
      "model_forward_time": 0.11465692520141602,
      "step": 50587
    },
    {
      "epoch": 0.000308758544921875,
      "step": 50587,
      "training_step_time": 0.389540433883667
    },
    {
      "epoch": 0.0003087646484375,
      "model_forward_time": 0.11475324630737305,
      "step": 50588
    },
    {
      "epoch": 0.0003087646484375,
      "step": 50588,
      "training_step_time": 0.39009833335876465
    },
    {
      "epoch": 0.000308770751953125,
      "model_forward_time": 0.11522889137268066,
      "step": 50589
    },
    {
      "epoch": 0.000308770751953125,
      "step": 50589,
      "training_step_time": 0.39612722396850586
    },
    {
      "epoch": 0.00030877685546875,
      "grad_norm": 0.11179511249065399,
      "learning_rate": 6.575252810126143e-06,
      "loss": 0.0323,
      "step": 50590
    },
    {
      "epoch": 0.00030877685546875,
      "model_forward_time": 0.1143946647644043,
      "step": 50590
    },
    {
      "epoch": 0.00030877685546875,
      "step": 50590,
      "training_step_time": 0.485637903213501
    },
    {
      "epoch": 0.000308782958984375,
      "model_forward_time": 0.11471962928771973,
      "step": 50591
    },
    {
      "epoch": 0.000308782958984375,
      "step": 50591,
      "training_step_time": 0.4353036880493164
    },
    {
      "epoch": 0.0003087890625,
      "model_forward_time": 0.11440682411193848,
      "step": 50592
    },
    {
      "epoch": 0.0003087890625,
      "step": 50592,
      "training_step_time": 0.3994441032409668
    },
    {
      "epoch": 0.000308795166015625,
      "model_forward_time": 0.11463618278503418,
      "step": 50593
    },
    {
      "epoch": 0.000308795166015625,
      "step": 50593,
      "training_step_time": 0.3618636131286621
    },
    {
      "epoch": 0.00030880126953125,
      "model_forward_time": 0.11501002311706543,
      "step": 50594
    },
    {
      "epoch": 0.00030880126953125,
      "step": 50594,
      "training_step_time": 0.457918643951416
    },
    {
      "epoch": 0.000308807373046875,
      "model_forward_time": 0.11434483528137207,
      "step": 50595
    },
    {
      "epoch": 0.000308807373046875,
      "step": 50595,
      "training_step_time": 0.4380972385406494
    },
    {
      "epoch": 0.0003088134765625,
      "model_forward_time": 0.11514472961425781,
      "step": 50596
    },
    {
      "epoch": 0.0003088134765625,
      "step": 50596,
      "training_step_time": 0.5916869640350342
    },
    {
      "epoch": 0.000308819580078125,
      "model_forward_time": 0.11430239677429199,
      "step": 50597
    },
    {
      "epoch": 0.000308819580078125,
      "step": 50597,
      "training_step_time": 0.4186897277832031
    },
    {
      "epoch": 0.00030882568359375,
      "model_forward_time": 0.11454248428344727,
      "step": 50598
    },
    {
      "epoch": 0.00030882568359375,
      "step": 50598,
      "training_step_time": 0.43175244331359863
    },
    {
      "epoch": 0.000308831787109375,
      "model_forward_time": 0.11420798301696777,
      "step": 50599
    },
    {
      "epoch": 0.000308831787109375,
      "step": 50599,
      "training_step_time": 0.3990480899810791
    },
    {
      "epoch": 0.000308837890625,
      "grad_norm": 0.07875146716833115,
      "learning_rate": 6.561599044396288e-06,
      "loss": 0.031,
      "step": 50600
    },
    {
      "epoch": 0.000308837890625,
      "model_forward_time": 0.11439943313598633,
      "step": 50600
    },
    {
      "epoch": 0.000308837890625,
      "step": 50600,
      "training_step_time": 0.47878575325012207
    },
    {
      "epoch": 0.000308843994140625,
      "model_forward_time": 0.11442971229553223,
      "step": 50601
    },
    {
      "epoch": 0.000308843994140625,
      "step": 50601,
      "training_step_time": 0.3873872756958008
    },
    {
      "epoch": 0.00030885009765625,
      "model_forward_time": 0.11512207984924316,
      "step": 50602
    },
    {
      "epoch": 0.00030885009765625,
      "step": 50602,
      "training_step_time": 0.5549407005310059
    },
    {
      "epoch": 0.000308856201171875,
      "model_forward_time": 0.11458516120910645,
      "step": 50603
    },
    {
      "epoch": 0.000308856201171875,
      "step": 50603,
      "training_step_time": 0.4419572353363037
    },
    {
      "epoch": 0.0003088623046875,
      "model_forward_time": 0.11426424980163574,
      "step": 50604
    },
    {
      "epoch": 0.0003088623046875,
      "step": 50604,
      "training_step_time": 0.41578173637390137
    },
    {
      "epoch": 0.000308868408203125,
      "model_forward_time": 0.11516070365905762,
      "step": 50605
    },
    {
      "epoch": 0.000308868408203125,
      "step": 50605,
      "training_step_time": 0.3959167003631592
    },
    {
      "epoch": 0.00030887451171875,
      "model_forward_time": 0.11469864845275879,
      "step": 50606
    },
    {
      "epoch": 0.00030887451171875,
      "step": 50606,
      "training_step_time": 0.39742255210876465
    },
    {
      "epoch": 0.000308880615234375,
      "model_forward_time": 0.11593866348266602,
      "step": 50607
    },
    {
      "epoch": 0.000308880615234375,
      "step": 50607,
      "training_step_time": 0.37998008728027344
    },
    {
      "epoch": 0.00030888671875,
      "model_forward_time": 0.11487174034118652,
      "step": 50608
    },
    {
      "epoch": 0.00030888671875,
      "step": 50608,
      "training_step_time": 0.7095255851745605
    },
    {
      "epoch": 0.000308892822265625,
      "model_forward_time": 0.11464333534240723,
      "step": 50609
    },
    {
      "epoch": 0.000308892822265625,
      "step": 50609,
      "training_step_time": 0.41109204292297363
    },
    {
      "epoch": 0.00030889892578125,
      "grad_norm": 0.08124614506959915,
      "learning_rate": 6.5479584741057255e-06,
      "loss": 0.0354,
      "step": 50610
    },
    {
      "epoch": 0.00030889892578125,
      "model_forward_time": 0.1143038272857666,
      "step": 50610
    },
    {
      "epoch": 0.00030889892578125,
      "step": 50610,
      "training_step_time": 0.42296886444091797
    },
    {
      "epoch": 0.000308905029296875,
      "model_forward_time": 0.11418032646179199,
      "step": 50611
    },
    {
      "epoch": 0.000308905029296875,
      "step": 50611,
      "training_step_time": 0.5314884185791016
    },
    {
      "epoch": 0.0003089111328125,
      "model_forward_time": 0.11409759521484375,
      "step": 50612
    },
    {
      "epoch": 0.0003089111328125,
      "step": 50612,
      "training_step_time": 0.4285244941711426
    },
    {
      "epoch": 0.000308917236328125,
      "model_forward_time": 0.11499667167663574,
      "step": 50613
    },
    {
      "epoch": 0.000308917236328125,
      "step": 50613,
      "training_step_time": 0.41678857803344727
    },
    {
      "epoch": 0.00030892333984375,
      "model_forward_time": 0.11541008949279785,
      "step": 50614
    },
    {
      "epoch": 0.00030892333984375,
      "step": 50614,
      "training_step_time": 0.6285126209259033
    },
    {
      "epoch": 0.000308929443359375,
      "model_forward_time": 0.11758732795715332,
      "step": 50615
    },
    {
      "epoch": 0.000308929443359375,
      "step": 50615,
      "training_step_time": 0.634209394454956
    },
    {
      "epoch": 0.000308935546875,
      "model_forward_time": 0.11614179611206055,
      "step": 50616
    },
    {
      "epoch": 0.000308935546875,
      "step": 50616,
      "training_step_time": 0.6783583164215088
    },
    {
      "epoch": 0.000308941650390625,
      "model_forward_time": 0.13539767265319824,
      "step": 50617
    },
    {
      "epoch": 0.000308941650390625,
      "step": 50617,
      "training_step_time": 0.6617963314056396
    },
    {
      "epoch": 0.00030894775390625,
      "model_forward_time": 0.119049072265625,
      "step": 50618
    },
    {
      "epoch": 0.00030894775390625,
      "step": 50618,
      "training_step_time": 0.6311159133911133
    },
    {
      "epoch": 0.000308953857421875,
      "model_forward_time": 0.11615180969238281,
      "step": 50619
    },
    {
      "epoch": 0.000308953857421875,
      "step": 50619,
      "training_step_time": 0.6532936096191406
    },
    {
      "epoch": 0.0003089599609375,
      "grad_norm": 0.09865111112594604,
      "learning_rate": 6.5343311033980895e-06,
      "loss": 0.035,
      "step": 50620
    },
    {
      "epoch": 0.0003089599609375,
      "model_forward_time": 0.11886286735534668,
      "step": 50620
    },
    {
      "epoch": 0.0003089599609375,
      "step": 50620,
      "training_step_time": 0.6991150379180908
    },
    {
      "epoch": 0.000308966064453125,
      "model_forward_time": 0.12002110481262207,
      "step": 50621
    },
    {
      "epoch": 0.000308966064453125,
      "step": 50621,
      "training_step_time": 0.7584507465362549
    },
    {
      "epoch": 0.00030897216796875,
      "model_forward_time": 0.11869955062866211,
      "step": 50622
    },
    {
      "epoch": 0.00030897216796875,
      "step": 50622,
      "training_step_time": 0.6122133731842041
    },
    {
      "epoch": 0.000308978271484375,
      "model_forward_time": 0.1178591251373291,
      "step": 50623
    },
    {
      "epoch": 0.000308978271484375,
      "step": 50623,
      "training_step_time": 0.6912021636962891
    },
    {
      "epoch": 0.000308984375,
      "model_forward_time": 0.12204289436340332,
      "step": 50624
    },
    {
      "epoch": 0.000308984375,
      "step": 50624,
      "training_step_time": 0.6703202724456787
    },
    {
      "epoch": 0.000308990478515625,
      "model_forward_time": 0.12126684188842773,
      "step": 50625
    },
    {
      "epoch": 0.000308990478515625,
      "step": 50625,
      "training_step_time": 0.6734111309051514
    },
    {
      "epoch": 0.00030899658203125,
      "model_forward_time": 0.11842560768127441,
      "step": 50626
    },
    {
      "epoch": 0.00030899658203125,
      "step": 50626,
      "training_step_time": 0.6514890193939209
    },
    {
      "epoch": 0.000309002685546875,
      "model_forward_time": 0.1197819709777832,
      "step": 50627
    },
    {
      "epoch": 0.000309002685546875,
      "step": 50627,
      "training_step_time": 0.789743185043335
    },
    {
      "epoch": 0.0003090087890625,
      "model_forward_time": 0.12240862846374512,
      "step": 50628
    },
    {
      "epoch": 0.0003090087890625,
      "step": 50628,
      "training_step_time": 0.7153639793395996
    },
    {
      "epoch": 0.000309014892578125,
      "model_forward_time": 0.12201118469238281,
      "step": 50629
    },
    {
      "epoch": 0.000309014892578125,
      "step": 50629,
      "training_step_time": 0.6736266613006592
    },
    {
      "epoch": 0.00030902099609375,
      "grad_norm": 0.09213348478078842,
      "learning_rate": 6.520716936413018e-06,
      "loss": 0.0355,
      "step": 50630
    },
    {
      "epoch": 0.00030902099609375,
      "model_forward_time": 0.12211489677429199,
      "step": 50630
    },
    {
      "epoch": 0.00030902099609375,
      "step": 50630,
      "training_step_time": 0.7875454425811768
    },
    {
      "epoch": 0.000309027099609375,
      "model_forward_time": 0.1183617115020752,
      "step": 50631
    },
    {
      "epoch": 0.000309027099609375,
      "step": 50631,
      "training_step_time": 0.6869041919708252
    },
    {
      "epoch": 0.000309033203125,
      "model_forward_time": 0.11846208572387695,
      "step": 50632
    },
    {
      "epoch": 0.000309033203125,
      "step": 50632,
      "training_step_time": 0.7391612529754639
    },
    {
      "epoch": 0.000309039306640625,
      "model_forward_time": 0.12201046943664551,
      "step": 50633
    },
    {
      "epoch": 0.000309039306640625,
      "step": 50633,
      "training_step_time": 0.7302343845367432
    },
    {
      "epoch": 0.00030904541015625,
      "model_forward_time": 0.116180419921875,
      "step": 50634
    },
    {
      "epoch": 0.00030904541015625,
      "step": 50634,
      "training_step_time": 0.6966536045074463
    },
    {
      "epoch": 0.000309051513671875,
      "model_forward_time": 0.11753225326538086,
      "step": 50635
    },
    {
      "epoch": 0.000309051513671875,
      "step": 50635,
      "training_step_time": 0.7181165218353271
    },
    {
      "epoch": 0.0003090576171875,
      "model_forward_time": 0.11864709854125977,
      "step": 50636
    },
    {
      "epoch": 0.0003090576171875,
      "step": 50636,
      "training_step_time": 0.6762101650238037
    },
    {
      "epoch": 0.000309063720703125,
      "model_forward_time": 0.12272214889526367,
      "step": 50637
    },
    {
      "epoch": 0.000309063720703125,
      "step": 50637,
      "training_step_time": 0.6805496215820312
    },
    {
      "epoch": 0.00030906982421875,
      "model_forward_time": 0.11747074127197266,
      "step": 50638
    },
    {
      "epoch": 0.00030906982421875,
      "step": 50638,
      "training_step_time": 0.6777486801147461
    },
    {
      "epoch": 0.000309075927734375,
      "model_forward_time": 0.11810445785522461,
      "step": 50639
    },
    {
      "epoch": 0.000309075927734375,
      "step": 50639,
      "training_step_time": 0.7185218334197998
    },
    {
      "epoch": 0.00030908203125,
      "grad_norm": 0.09572876244783401,
      "learning_rate": 6.5071159772861436e-06,
      "loss": 0.0374,
      "step": 50640
    },
    {
      "epoch": 0.00030908203125,
      "model_forward_time": 0.12806200981140137,
      "step": 50640
    },
    {
      "epoch": 0.00030908203125,
      "step": 50640,
      "training_step_time": 0.653449535369873
    },
    {
      "epoch": 0.000309088134765625,
      "model_forward_time": 0.12136006355285645,
      "step": 50641
    },
    {
      "epoch": 0.000309088134765625,
      "step": 50641,
      "training_step_time": 0.6774790287017822
    },
    {
      "epoch": 0.00030909423828125,
      "model_forward_time": 0.11911320686340332,
      "step": 50642
    },
    {
      "epoch": 0.00030909423828125,
      "step": 50642,
      "training_step_time": 0.8213109970092773
    },
    {
      "epoch": 0.000309100341796875,
      "model_forward_time": 0.11626791954040527,
      "step": 50643
    },
    {
      "epoch": 0.000309100341796875,
      "step": 50643,
      "training_step_time": 0.6841371059417725
    },
    {
      "epoch": 0.0003091064453125,
      "model_forward_time": 0.12360596656799316,
      "step": 50644
    },
    {
      "epoch": 0.0003091064453125,
      "step": 50644,
      "training_step_time": 0.6362733840942383
    },
    {
      "epoch": 0.000309112548828125,
      "model_forward_time": 0.11953043937683105,
      "step": 50645
    },
    {
      "epoch": 0.000309112548828125,
      "step": 50645,
      "training_step_time": 0.6458966732025146
    },
    {
      "epoch": 0.00030911865234375,
      "model_forward_time": 0.1186530590057373,
      "step": 50646
    },
    {
      "epoch": 0.00030911865234375,
      "step": 50646,
      "training_step_time": 0.533665657043457
    },
    {
      "epoch": 0.000309124755859375,
      "model_forward_time": 0.12436556816101074,
      "step": 50647
    },
    {
      "epoch": 0.000309124755859375,
      "step": 50647,
      "training_step_time": 0.6591682434082031
    },
    {
      "epoch": 0.000309130859375,
      "model_forward_time": 0.11894011497497559,
      "step": 50648
    },
    {
      "epoch": 0.000309130859375,
      "step": 50648,
      "training_step_time": 0.6570453643798828
    },
    {
      "epoch": 0.000309136962890625,
      "model_forward_time": 0.11832809448242188,
      "step": 50649
    },
    {
      "epoch": 0.000309136962890625,
      "step": 50649,
      "training_step_time": 0.6695516109466553
    },
    {
      "epoch": 0.00030914306640625,
      "grad_norm": 0.10745972394943237,
      "learning_rate": 6.493528230149054e-06,
      "loss": 0.0387,
      "step": 50650
    },
    {
      "epoch": 0.00030914306640625,
      "model_forward_time": 0.11909294128417969,
      "step": 50650
    },
    {
      "epoch": 0.00030914306640625,
      "step": 50650,
      "training_step_time": 0.7385106086730957
    },
    {
      "epoch": 0.000309149169921875,
      "model_forward_time": 0.1192619800567627,
      "step": 50651
    },
    {
      "epoch": 0.000309149169921875,
      "step": 50651,
      "training_step_time": 0.5855147838592529
    },
    {
      "epoch": 0.0003091552734375,
      "model_forward_time": 0.11896872520446777,
      "step": 50652
    },
    {
      "epoch": 0.0003091552734375,
      "step": 50652,
      "training_step_time": 0.6360321044921875
    },
    {
      "epoch": 0.000309161376953125,
      "model_forward_time": 0.11841535568237305,
      "step": 50653
    },
    {
      "epoch": 0.000309161376953125,
      "step": 50653,
      "training_step_time": 0.6643898487091064
    },
    {
      "epoch": 0.00030916748046875,
      "model_forward_time": 0.13317131996154785,
      "step": 50654
    },
    {
      "epoch": 0.00030916748046875,
      "step": 50654,
      "training_step_time": 0.6220138072967529
    },
    {
      "epoch": 0.000309173583984375,
      "model_forward_time": 0.11659359931945801,
      "step": 50655
    },
    {
      "epoch": 0.000309173583984375,
      "step": 50655,
      "training_step_time": 0.6362216472625732
    },
    {
      "epoch": 0.0003091796875,
      "model_forward_time": 0.12124252319335938,
      "step": 50656
    },
    {
      "epoch": 0.0003091796875,
      "step": 50656,
      "training_step_time": 0.6841259002685547
    },
    {
      "epoch": 0.000309185791015625,
      "model_forward_time": 0.12082386016845703,
      "step": 50657
    },
    {
      "epoch": 0.000309185791015625,
      "step": 50657,
      "training_step_time": 0.6987338066101074
    },
    {
      "epoch": 0.00030919189453125,
      "model_forward_time": 0.12421798706054688,
      "step": 50658
    },
    {
      "epoch": 0.00030919189453125,
      "step": 50658,
      "training_step_time": 0.6079216003417969
    },
    {
      "epoch": 0.000309197998046875,
      "model_forward_time": 0.12169528007507324,
      "step": 50659
    },
    {
      "epoch": 0.000309197998046875,
      "step": 50659,
      "training_step_time": 0.7565529346466064
    },
    {
      "epoch": 0.0003092041015625,
      "grad_norm": 0.14409388601779938,
      "learning_rate": 6.479953699129382e-06,
      "loss": 0.0418,
      "step": 50660
    },
    {
      "epoch": 0.0003092041015625,
      "model_forward_time": 0.11621761322021484,
      "step": 50660
    },
    {
      "epoch": 0.0003092041015625,
      "step": 50660,
      "training_step_time": 0.723381519317627
    },
    {
      "epoch": 0.000309210205078125,
      "model_forward_time": 0.11906766891479492,
      "step": 50661
    },
    {
      "epoch": 0.000309210205078125,
      "step": 50661,
      "training_step_time": 0.671499490737915
    },
    {
      "epoch": 0.00030921630859375,
      "model_forward_time": 0.11602473258972168,
      "step": 50662
    },
    {
      "epoch": 0.00030921630859375,
      "step": 50662,
      "training_step_time": 0.6593017578125
    },
    {
      "epoch": 0.000309222412109375,
      "model_forward_time": 0.11747312545776367,
      "step": 50663
    },
    {
      "epoch": 0.000309222412109375,
      "step": 50663,
      "training_step_time": 0.6223630905151367
    },
    {
      "epoch": 0.000309228515625,
      "model_forward_time": 0.11855626106262207,
      "step": 50664
    },
    {
      "epoch": 0.000309228515625,
      "step": 50664,
      "training_step_time": 0.6169960498809814
    },
    {
      "epoch": 0.000309234619140625,
      "model_forward_time": 0.11763644218444824,
      "step": 50665
    },
    {
      "epoch": 0.000309234619140625,
      "step": 50665,
      "training_step_time": 0.6797173023223877
    },
    {
      "epoch": 0.00030924072265625,
      "model_forward_time": 0.11930966377258301,
      "step": 50666
    },
    {
      "epoch": 0.00030924072265625,
      "step": 50666,
      "training_step_time": 0.7507286071777344
    },
    {
      "epoch": 0.000309246826171875,
      "model_forward_time": 0.11853790283203125,
      "step": 50667
    },
    {
      "epoch": 0.000309246826171875,
      "step": 50667,
      "training_step_time": 0.733407735824585
    },
    {
      "epoch": 0.0003092529296875,
      "model_forward_time": 0.14635562896728516,
      "step": 50668
    },
    {
      "epoch": 0.0003092529296875,
      "step": 50668,
      "training_step_time": 0.6956491470336914
    },
    {
      "epoch": 0.000309259033203125,
      "model_forward_time": 0.11808204650878906,
      "step": 50669
    },
    {
      "epoch": 0.000309259033203125,
      "step": 50669,
      "training_step_time": 0.6895625591278076
    },
    {
      "epoch": 0.00030926513671875,
      "grad_norm": 0.10197579115629196,
      "learning_rate": 6.466392388350695e-06,
      "loss": 0.0392,
      "step": 50670
    },
    {
      "epoch": 0.00030926513671875,
      "model_forward_time": 0.11941051483154297,
      "step": 50670
    },
    {
      "epoch": 0.00030926513671875,
      "step": 50670,
      "training_step_time": 0.6793942451477051
    },
    {
      "epoch": 0.000309271240234375,
      "model_forward_time": 0.11669063568115234,
      "step": 50671
    },
    {
      "epoch": 0.000309271240234375,
      "step": 50671,
      "training_step_time": 0.6276812553405762
    },
    {
      "epoch": 0.00030927734375,
      "model_forward_time": 0.12157392501831055,
      "step": 50672
    },
    {
      "epoch": 0.00030927734375,
      "step": 50672,
      "training_step_time": 0.597369909286499
    },
    {
      "epoch": 0.000309283447265625,
      "model_forward_time": 0.11667346954345703,
      "step": 50673
    },
    {
      "epoch": 0.000309283447265625,
      "step": 50673,
      "training_step_time": 0.6493434906005859
    },
    {
      "epoch": 0.00030928955078125,
      "model_forward_time": 0.11866641044616699,
      "step": 50674
    },
    {
      "epoch": 0.00030928955078125,
      "step": 50674,
      "training_step_time": 0.6335947513580322
    },
    {
      "epoch": 0.000309295654296875,
      "model_forward_time": 0.12302708625793457,
      "step": 50675
    },
    {
      "epoch": 0.000309295654296875,
      "step": 50675,
      "training_step_time": 0.5765089988708496
    },
    {
      "epoch": 0.0003093017578125,
      "model_forward_time": 0.11903929710388184,
      "step": 50676
    },
    {
      "epoch": 0.0003093017578125,
      "step": 50676,
      "training_step_time": 0.6724121570587158
    },
    {
      "epoch": 0.000309307861328125,
      "model_forward_time": 0.11995220184326172,
      "step": 50677
    },
    {
      "epoch": 0.000309307861328125,
      "step": 50677,
      "training_step_time": 0.7156224250793457
    },
    {
      "epoch": 0.00030931396484375,
      "model_forward_time": 0.14108681678771973,
      "step": 50678
    },
    {
      "epoch": 0.00030931396484375,
      "step": 50678,
      "training_step_time": 0.6311600208282471
    },
    {
      "epoch": 0.000309320068359375,
      "model_forward_time": 0.11778974533081055,
      "step": 50679
    },
    {
      "epoch": 0.000309320068359375,
      "step": 50679,
      "training_step_time": 0.6690750122070312
    },
    {
      "epoch": 0.000309326171875,
      "grad_norm": 0.10819247364997864,
      "learning_rate": 6.452844301932559e-06,
      "loss": 0.037,
      "step": 50680
    },
    {
      "epoch": 0.000309326171875,
      "model_forward_time": 0.12080168724060059,
      "step": 50680
    },
    {
      "epoch": 0.000309326171875,
      "step": 50680,
      "training_step_time": 0.5818729400634766
    },
    {
      "epoch": 0.000309332275390625,
      "model_forward_time": 0.12037825584411621,
      "step": 50681
    },
    {
      "epoch": 0.000309332275390625,
      "step": 50681,
      "training_step_time": 0.5549814701080322
    },
    {
      "epoch": 0.00030933837890625,
      "model_forward_time": 0.12089920043945312,
      "step": 50682
    },
    {
      "epoch": 0.00030933837890625,
      "step": 50682,
      "training_step_time": 0.5229969024658203
    },
    {
      "epoch": 0.000309344482421875,
      "model_forward_time": 0.12076640129089355,
      "step": 50683
    },
    {
      "epoch": 0.000309344482421875,
      "step": 50683,
      "training_step_time": 0.4961411952972412
    },
    {
      "epoch": 0.0003093505859375,
      "model_forward_time": 0.12105512619018555,
      "step": 50684
    },
    {
      "epoch": 0.0003093505859375,
      "step": 50684,
      "training_step_time": 0.509913444519043
    },
    {
      "epoch": 0.000309356689453125,
      "model_forward_time": 0.12455368041992188,
      "step": 50685
    },
    {
      "epoch": 0.000309356689453125,
      "step": 50685,
      "training_step_time": 0.5028307437896729
    },
    {
      "epoch": 0.00030936279296875,
      "model_forward_time": 0.1190481185913086,
      "step": 50686
    },
    {
      "epoch": 0.00030936279296875,
      "step": 50686,
      "training_step_time": 0.5228304862976074
    },
    {
      "epoch": 0.000309368896484375,
      "model_forward_time": 0.11704349517822266,
      "step": 50687
    },
    {
      "epoch": 0.000309368896484375,
      "step": 50687,
      "training_step_time": 0.5839426517486572
    },
    {
      "epoch": 0.000309375,
      "model_forward_time": 0.11737585067749023,
      "step": 50688
    },
    {
      "epoch": 0.000309375,
      "step": 50688,
      "training_step_time": 0.4754221439361572
    },
    {
      "epoch": 0.000309381103515625,
      "model_forward_time": 0.1164097785949707,
      "step": 50689
    },
    {
      "epoch": 0.000309381103515625,
      "step": 50689,
      "training_step_time": 0.43056821823120117
    },
    {
      "epoch": 0.00030938720703125,
      "grad_norm": 0.09319398552179337,
      "learning_rate": 6.439309443990532e-06,
      "loss": 0.039,
      "step": 50690
    },
    {
      "epoch": 0.00030938720703125,
      "model_forward_time": 0.11642670631408691,
      "step": 50690
    },
    {
      "epoch": 0.00030938720703125,
      "step": 50690,
      "training_step_time": 0.44463276863098145
    },
    {
      "epoch": 0.000309393310546875,
      "model_forward_time": 0.116912841796875,
      "step": 50691
    },
    {
      "epoch": 0.000309393310546875,
      "step": 50691,
      "training_step_time": 0.4626121520996094
    },
    {
      "epoch": 0.0003093994140625,
      "model_forward_time": 0.11532878875732422,
      "step": 50692
    },
    {
      "epoch": 0.0003093994140625,
      "step": 50692,
      "training_step_time": 0.5139408111572266
    },
    {
      "epoch": 0.000309405517578125,
      "model_forward_time": 0.11611747741699219,
      "step": 50693
    },
    {
      "epoch": 0.000309405517578125,
      "step": 50693,
      "training_step_time": 0.3816194534301758
    },
    {
      "epoch": 0.00030941162109375,
      "model_forward_time": 0.11525583267211914,
      "step": 50694
    },
    {
      "epoch": 0.00030941162109375,
      "step": 50694,
      "training_step_time": 0.3957693576812744
    },
    {
      "epoch": 0.000309417724609375,
      "model_forward_time": 0.11487507820129395,
      "step": 50695
    },
    {
      "epoch": 0.000309417724609375,
      "step": 50695,
      "training_step_time": 0.3760263919830322
    },
    {
      "epoch": 0.000309423828125,
      "model_forward_time": 0.11507272720336914,
      "step": 50696
    },
    {
      "epoch": 0.000309423828125,
      "step": 50696,
      "training_step_time": 0.39632368087768555
    },
    {
      "epoch": 0.000309429931640625,
      "model_forward_time": 0.11647439002990723,
      "step": 50697
    },
    {
      "epoch": 0.000309429931640625,
      "step": 50697,
      "training_step_time": 0.405672550201416
    },
    {
      "epoch": 0.00030943603515625,
      "model_forward_time": 0.11517119407653809,
      "step": 50698
    },
    {
      "epoch": 0.00030943603515625,
      "step": 50698,
      "training_step_time": 0.43518662452697754
    },
    {
      "epoch": 0.000309442138671875,
      "model_forward_time": 0.11526894569396973,
      "step": 50699
    },
    {
      "epoch": 0.000309442138671875,
      "step": 50699,
      "training_step_time": 0.40752172470092773
    },
    {
      "epoch": 0.0003094482421875,
      "grad_norm": 0.07794512808322906,
      "learning_rate": 6.425787818636131e-06,
      "loss": 0.0381,
      "step": 50700
    },
    {
      "epoch": 0.0003094482421875,
      "model_forward_time": 0.11477160453796387,
      "step": 50700
    },
    {
      "epoch": 0.0003094482421875,
      "step": 50700,
      "training_step_time": 0.3957223892211914
    },
    {
      "epoch": 0.000309454345703125,
      "model_forward_time": 0.11664390563964844,
      "step": 50701
    },
    {
      "epoch": 0.000309454345703125,
      "step": 50701,
      "training_step_time": 0.3743009567260742
    },
    {
      "epoch": 0.00030946044921875,
      "model_forward_time": 0.11579322814941406,
      "step": 50702
    },
    {
      "epoch": 0.00030946044921875,
      "step": 50702,
      "training_step_time": 0.4524669647216797
    },
    {
      "epoch": 0.000309466552734375,
      "model_forward_time": 0.11524081230163574,
      "step": 50703
    },
    {
      "epoch": 0.000309466552734375,
      "step": 50703,
      "training_step_time": 0.4583160877227783
    },
    {
      "epoch": 0.00030947265625,
      "model_forward_time": 0.11508917808532715,
      "step": 50704
    },
    {
      "epoch": 0.00030947265625,
      "step": 50704,
      "training_step_time": 0.40370941162109375
    },
    {
      "epoch": 0.000309478759765625,
      "model_forward_time": 0.11529111862182617,
      "step": 50705
    },
    {
      "epoch": 0.000309478759765625,
      "step": 50705,
      "training_step_time": 0.438230037689209
    },
    {
      "epoch": 0.00030948486328125,
      "model_forward_time": 0.11542844772338867,
      "step": 50706
    },
    {
      "epoch": 0.00030948486328125,
      "step": 50706,
      "training_step_time": 0.49294543266296387
    },
    {
      "epoch": 0.000309490966796875,
      "model_forward_time": 0.11543893814086914,
      "step": 50707
    },
    {
      "epoch": 0.000309490966796875,
      "step": 50707,
      "training_step_time": 0.4389204978942871
    },
    {
      "epoch": 0.0003094970703125,
      "model_forward_time": 0.1150815486907959,
      "step": 50708
    },
    {
      "epoch": 0.0003094970703125,
      "step": 50708,
      "training_step_time": 0.394944429397583
    },
    {
      "epoch": 0.000309503173828125,
      "model_forward_time": 0.11492013931274414,
      "step": 50709
    },
    {
      "epoch": 0.000309503173828125,
      "step": 50709,
      "training_step_time": 0.39426326751708984
    },
    {
      "epoch": 0.00030950927734375,
      "grad_norm": 0.10897328704595566,
      "learning_rate": 6.412279429976903e-06,
      "loss": 0.0375,
      "step": 50710
    },
    {
      "epoch": 0.00030950927734375,
      "model_forward_time": 0.11714053153991699,
      "step": 50710
    },
    {
      "epoch": 0.00030950927734375,
      "step": 50710,
      "training_step_time": 0.3979175090789795
    },
    {
      "epoch": 0.000309515380859375,
      "model_forward_time": 0.11455488204956055,
      "step": 50711
    },
    {
      "epoch": 0.000309515380859375,
      "step": 50711,
      "training_step_time": 0.396045446395874
    },
    {
      "epoch": 0.000309521484375,
      "model_forward_time": 0.1153872013092041,
      "step": 50712
    },
    {
      "epoch": 0.000309521484375,
      "step": 50712,
      "training_step_time": 0.40372729301452637
    },
    {
      "epoch": 0.000309527587890625,
      "model_forward_time": 0.11492419242858887,
      "step": 50713
    },
    {
      "epoch": 0.000309527587890625,
      "step": 50713,
      "training_step_time": 0.4001600742340088
    },
    {
      "epoch": 0.00030953369140625,
      "model_forward_time": 0.11536073684692383,
      "step": 50714
    },
    {
      "epoch": 0.00030953369140625,
      "step": 50714,
      "training_step_time": 0.38880276679992676
    },
    {
      "epoch": 0.000309539794921875,
      "model_forward_time": 0.1149148941040039,
      "step": 50715
    },
    {
      "epoch": 0.000309539794921875,
      "step": 50715,
      "training_step_time": 0.39852404594421387
    },
    {
      "epoch": 0.0003095458984375,
      "model_forward_time": 0.11551856994628906,
      "step": 50716
    },
    {
      "epoch": 0.0003095458984375,
      "step": 50716,
      "training_step_time": 0.4437990188598633
    },
    {
      "epoch": 0.000309552001953125,
      "model_forward_time": 0.11612606048583984,
      "step": 50717
    },
    {
      "epoch": 0.000309552001953125,
      "step": 50717,
      "training_step_time": 0.501568078994751
    },
    {
      "epoch": 0.00030955810546875,
      "model_forward_time": 0.11667728424072266,
      "step": 50718
    },
    {
      "epoch": 0.00030955810546875,
      "step": 50718,
      "training_step_time": 0.41216063499450684
    },
    {
      "epoch": 0.000309564208984375,
      "model_forward_time": 0.1155860424041748,
      "step": 50719
    },
    {
      "epoch": 0.000309564208984375,
      "step": 50719,
      "training_step_time": 0.4557983875274658
    },
    {
      "epoch": 0.0003095703125,
      "grad_norm": 0.07604479044675827,
      "learning_rate": 6.398784282116293e-06,
      "loss": 0.0353,
      "step": 50720
    },
    {
      "epoch": 0.0003095703125,
      "model_forward_time": 0.11462211608886719,
      "step": 50720
    },
    {
      "epoch": 0.0003095703125,
      "step": 50720,
      "training_step_time": 0.44338250160217285
    },
    {
      "epoch": 0.000309576416015625,
      "model_forward_time": 0.1150665283203125,
      "step": 50721
    },
    {
      "epoch": 0.000309576416015625,
      "step": 50721,
      "training_step_time": 0.4814891815185547
    },
    {
      "epoch": 0.00030958251953125,
      "model_forward_time": 0.11472725868225098,
      "step": 50722
    },
    {
      "epoch": 0.00030958251953125,
      "step": 50722,
      "training_step_time": 0.38474011421203613
    },
    {
      "epoch": 0.000309588623046875,
      "model_forward_time": 0.11453485488891602,
      "step": 50723
    },
    {
      "epoch": 0.000309588623046875,
      "step": 50723,
      "training_step_time": 0.37833571434020996
    },
    {
      "epoch": 0.0003095947265625,
      "model_forward_time": 0.11534237861633301,
      "step": 50724
    },
    {
      "epoch": 0.0003095947265625,
      "step": 50724,
      "training_step_time": 0.3940894603729248
    },
    {
      "epoch": 0.000309600830078125,
      "model_forward_time": 0.11528825759887695,
      "step": 50725
    },
    {
      "epoch": 0.000309600830078125,
      "step": 50725,
      "training_step_time": 0.4012477397918701
    },
    {
      "epoch": 0.00030960693359375,
      "model_forward_time": 0.11464214324951172,
      "step": 50726
    },
    {
      "epoch": 0.00030960693359375,
      "step": 50726,
      "training_step_time": 0.39086413383483887
    },
    {
      "epoch": 0.000309613037109375,
      "model_forward_time": 0.11499857902526855,
      "step": 50727
    },
    {
      "epoch": 0.000309613037109375,
      "step": 50727,
      "training_step_time": 0.39479827880859375
    },
    {
      "epoch": 0.000309619140625,
      "model_forward_time": 0.11553144454956055,
      "step": 50728
    },
    {
      "epoch": 0.000309619140625,
      "step": 50728,
      "training_step_time": 0.39129018783569336
    },
    {
      "epoch": 0.000309625244140625,
      "model_forward_time": 0.11580300331115723,
      "step": 50729
    },
    {
      "epoch": 0.000309625244140625,
      "step": 50729,
      "training_step_time": 0.43123555183410645
    },
    {
      "epoch": 0.00030963134765625,
      "grad_norm": 0.1003279834985733,
      "learning_rate": 6.385302379153818e-06,
      "loss": 0.0346,
      "step": 50730
    },
    {
      "epoch": 0.00030963134765625,
      "model_forward_time": 0.11494827270507812,
      "step": 50730
    },
    {
      "epoch": 0.00030963134765625,
      "step": 50730,
      "training_step_time": 0.41876769065856934
    },
    {
      "epoch": 0.000309637451171875,
      "model_forward_time": 0.11461901664733887,
      "step": 50731
    },
    {
      "epoch": 0.000309637451171875,
      "step": 50731,
      "training_step_time": 0.48852062225341797
    },
    {
      "epoch": 0.0003096435546875,
      "model_forward_time": 0.11430072784423828,
      "step": 50732
    },
    {
      "epoch": 0.0003096435546875,
      "step": 50732,
      "training_step_time": 0.4868323802947998
    },
    {
      "epoch": 0.000309649658203125,
      "model_forward_time": 0.11475729942321777,
      "step": 50733
    },
    {
      "epoch": 0.000309649658203125,
      "step": 50733,
      "training_step_time": 0.39289331436157227
    },
    {
      "epoch": 0.00030965576171875,
      "model_forward_time": 0.11439681053161621,
      "step": 50734
    },
    {
      "epoch": 0.00030965576171875,
      "step": 50734,
      "training_step_time": 0.45667409896850586
    },
    {
      "epoch": 0.000309661865234375,
      "model_forward_time": 0.11471819877624512,
      "step": 50735
    },
    {
      "epoch": 0.000309661865234375,
      "step": 50735,
      "training_step_time": 0.4906351566314697
    },
    {
      "epoch": 0.00030966796875,
      "model_forward_time": 0.11511516571044922,
      "step": 50736
    },
    {
      "epoch": 0.00030966796875,
      "step": 50736,
      "training_step_time": 0.40808844566345215
    },
    {
      "epoch": 0.000309674072265625,
      "model_forward_time": 0.11446475982666016,
      "step": 50737
    },
    {
      "epoch": 0.000309674072265625,
      "step": 50737,
      "training_step_time": 0.39021921157836914
    },
    {
      "epoch": 0.00030968017578125,
      "model_forward_time": 0.11496615409851074,
      "step": 50738
    },
    {
      "epoch": 0.00030968017578125,
      "step": 50738,
      "training_step_time": 0.3918881416320801
    },
    {
      "epoch": 0.000309686279296875,
      "model_forward_time": 0.11490297317504883,
      "step": 50739
    },
    {
      "epoch": 0.000309686279296875,
      "step": 50739,
      "training_step_time": 0.3973526954650879
    },
    {
      "epoch": 0.0003096923828125,
      "grad_norm": 0.12850497663021088,
      "learning_rate": 6.3718337251848785e-06,
      "loss": 0.0422,
      "step": 50740
    },
    {
      "epoch": 0.0003096923828125,
      "model_forward_time": 0.1145167350769043,
      "step": 50740
    },
    {
      "epoch": 0.0003096923828125,
      "step": 50740,
      "training_step_time": 0.389096736907959
    },
    {
      "epoch": 0.000309698486328125,
      "model_forward_time": 0.11561012268066406,
      "step": 50741
    },
    {
      "epoch": 0.000309698486328125,
      "step": 50741,
      "training_step_time": 0.392505407333374
    },
    {
      "epoch": 0.00030970458984375,
      "model_forward_time": 0.1163487434387207,
      "step": 50742
    },
    {
      "epoch": 0.00030970458984375,
      "step": 50742,
      "training_step_time": 0.41281628608703613
    },
    {
      "epoch": 0.000309710693359375,
      "model_forward_time": 0.11562585830688477,
      "step": 50743
    },
    {
      "epoch": 0.000309710693359375,
      "step": 50743,
      "training_step_time": 0.40350794792175293
    },
    {
      "epoch": 0.000309716796875,
      "model_forward_time": 0.11548280715942383,
      "step": 50744
    },
    {
      "epoch": 0.000309716796875,
      "step": 50744,
      "training_step_time": 0.40479111671447754
    },
    {
      "epoch": 0.000309722900390625,
      "model_forward_time": 0.11516237258911133,
      "step": 50745
    },
    {
      "epoch": 0.000309722900390625,
      "step": 50745,
      "training_step_time": 0.3999049663543701
    },
    {
      "epoch": 0.00030972900390625,
      "model_forward_time": 0.1156761646270752,
      "step": 50746
    },
    {
      "epoch": 0.00030972900390625,
      "step": 50746,
      "training_step_time": 0.455639123916626
    },
    {
      "epoch": 0.000309735107421875,
      "model_forward_time": 0.11639046669006348,
      "step": 50747
    },
    {
      "epoch": 0.000309735107421875,
      "step": 50747,
      "training_step_time": 0.4146859645843506
    },
    {
      "epoch": 0.0003097412109375,
      "model_forward_time": 0.11567401885986328,
      "step": 50748
    },
    {
      "epoch": 0.0003097412109375,
      "step": 50748,
      "training_step_time": 0.4601731300354004
    },
    {
      "epoch": 0.000309747314453125,
      "model_forward_time": 0.1146097183227539,
      "step": 50749
    },
    {
      "epoch": 0.000309747314453125,
      "step": 50749,
      "training_step_time": 0.4077322483062744
    },
    {
      "epoch": 0.00030975341796875,
      "grad_norm": 0.11003102362155914,
      "learning_rate": 6.3583783243009285e-06,
      "loss": 0.0384,
      "step": 50750
    },
    {
      "epoch": 0.00030975341796875,
      "model_forward_time": 0.11534643173217773,
      "step": 50750
    },
    {
      "epoch": 0.00030975341796875,
      "step": 50750,
      "training_step_time": 0.42441797256469727
    },
    {
      "epoch": 0.000309759521484375,
      "model_forward_time": 0.11503195762634277,
      "step": 50751
    },
    {
      "epoch": 0.000309759521484375,
      "step": 50751,
      "training_step_time": 0.4220764636993408
    },
    {
      "epoch": 0.000309765625,
      "model_forward_time": 0.11608195304870605,
      "step": 50752
    },
    {
      "epoch": 0.000309765625,
      "step": 50752,
      "training_step_time": 0.39723873138427734
    },
    {
      "epoch": 0.000309771728515625,
      "model_forward_time": 0.11514425277709961,
      "step": 50753
    },
    {
      "epoch": 0.000309771728515625,
      "step": 50753,
      "training_step_time": 0.39035820960998535
    },
    {
      "epoch": 0.00030977783203125,
      "model_forward_time": 0.11531877517700195,
      "step": 50754
    },
    {
      "epoch": 0.00030977783203125,
      "step": 50754,
      "training_step_time": 0.40184521675109863
    },
    {
      "epoch": 0.000309783935546875,
      "model_forward_time": 0.1154317855834961,
      "step": 50755
    },
    {
      "epoch": 0.000309783935546875,
      "step": 50755,
      "training_step_time": 0.39440011978149414
    },
    {
      "epoch": 0.0003097900390625,
      "model_forward_time": 0.11528658866882324,
      "step": 50756
    },
    {
      "epoch": 0.0003097900390625,
      "step": 50756,
      "training_step_time": 0.40460968017578125
    },
    {
      "epoch": 0.000309796142578125,
      "model_forward_time": 0.11580896377563477,
      "step": 50757
    },
    {
      "epoch": 0.000309796142578125,
      "step": 50757,
      "training_step_time": 0.40061354637145996
    },
    {
      "epoch": 0.00030980224609375,
      "model_forward_time": 0.11558842658996582,
      "step": 50758
    },
    {
      "epoch": 0.00030980224609375,
      "step": 50758,
      "training_step_time": 0.411334753036499
    },
    {
      "epoch": 0.000309808349609375,
      "model_forward_time": 0.11558914184570312,
      "step": 50759
    },
    {
      "epoch": 0.000309808349609375,
      "step": 50759,
      "training_step_time": 0.40335750579833984
    },
    {
      "epoch": 0.000309814453125,
      "grad_norm": 0.09499399363994598,
      "learning_rate": 6.344936180589351e-06,
      "loss": 0.0346,
      "step": 50760
    },
    {
      "epoch": 0.000309814453125,
      "model_forward_time": 0.11524224281311035,
      "step": 50760
    },
    {
      "epoch": 0.000309814453125,
      "step": 50760,
      "training_step_time": 0.4393429756164551
    },
    {
      "epoch": 0.000309820556640625,
      "model_forward_time": 0.11424446105957031,
      "step": 50761
    },
    {
      "epoch": 0.000309820556640625,
      "step": 50761,
      "training_step_time": 0.3921513557434082
    },
    {
      "epoch": 0.00030982666015625,
      "model_forward_time": 0.11565995216369629,
      "step": 50762
    },
    {
      "epoch": 0.00030982666015625,
      "step": 50762,
      "training_step_time": 0.46114253997802734
    },
    {
      "epoch": 0.000309832763671875,
      "model_forward_time": 0.11531305313110352,
      "step": 50763
    },
    {
      "epoch": 0.000309832763671875,
      "step": 50763,
      "training_step_time": 0.3946263790130615
    },
    {
      "epoch": 0.0003098388671875,
      "model_forward_time": 0.11579370498657227,
      "step": 50764
    },
    {
      "epoch": 0.0003098388671875,
      "step": 50764,
      "training_step_time": 0.4588446617126465
    },
    {
      "epoch": 0.000309844970703125,
      "model_forward_time": 0.11538457870483398,
      "step": 50765
    },
    {
      "epoch": 0.000309844970703125,
      "step": 50765,
      "training_step_time": 0.4573934078216553
    },
    {
      "epoch": 0.00030985107421875,
      "model_forward_time": 0.11517119407653809,
      "step": 50766
    },
    {
      "epoch": 0.00030985107421875,
      "step": 50766,
      "training_step_time": 0.5225954055786133
    },
    {
      "epoch": 0.000309857177734375,
      "model_forward_time": 0.11479973793029785,
      "step": 50767
    },
    {
      "epoch": 0.000309857177734375,
      "step": 50767,
      "training_step_time": 0.39424633979797363
    },
    {
      "epoch": 0.00030986328125,
      "model_forward_time": 0.11533021926879883,
      "step": 50768
    },
    {
      "epoch": 0.00030986328125,
      "step": 50768,
      "training_step_time": 0.3891735076904297
    },
    {
      "epoch": 0.000309869384765625,
      "model_forward_time": 0.1151580810546875,
      "step": 50769
    },
    {
      "epoch": 0.000309869384765625,
      "step": 50769,
      "training_step_time": 0.38276219367980957
    },
    {
      "epoch": 0.00030987548828125,
      "grad_norm": 0.08219810575246811,
      "learning_rate": 6.3315072981335215e-06,
      "loss": 0.034,
      "step": 50770
    },
    {
      "epoch": 0.00030987548828125,
      "model_forward_time": 0.11548900604248047,
      "step": 50770
    },
    {
      "epoch": 0.00030987548828125,
      "step": 50770,
      "training_step_time": 0.40123915672302246
    },
    {
      "epoch": 0.000309881591796875,
      "model_forward_time": 0.115509033203125,
      "step": 50771
    },
    {
      "epoch": 0.000309881591796875,
      "step": 50771,
      "training_step_time": 0.43070435523986816
    },
    {
      "epoch": 0.0003098876953125,
      "model_forward_time": 0.11577248573303223,
      "step": 50772
    },
    {
      "epoch": 0.0003098876953125,
      "step": 50772,
      "training_step_time": 0.688784122467041
    },
    {
      "epoch": 0.000309893798828125,
      "model_forward_time": 0.11482548713684082,
      "step": 50773
    },
    {
      "epoch": 0.000309893798828125,
      "step": 50773,
      "training_step_time": 0.3987095355987549
    },
    {
      "epoch": 0.00030989990234375,
      "model_forward_time": 0.11453390121459961,
      "step": 50774
    },
    {
      "epoch": 0.00030989990234375,
      "step": 50774,
      "training_step_time": 0.36669349670410156
    },
    {
      "epoch": 0.000309906005859375,
      "model_forward_time": 0.11452913284301758,
      "step": 50775
    },
    {
      "epoch": 0.000309906005859375,
      "step": 50775,
      "training_step_time": 0.4164257049560547
    },
    {
      "epoch": 0.000309912109375,
      "model_forward_time": 0.11464834213256836,
      "step": 50776
    },
    {
      "epoch": 0.000309912109375,
      "step": 50776,
      "training_step_time": 0.38387179374694824
    },
    {
      "epoch": 0.000309918212890625,
      "model_forward_time": 0.11573338508605957,
      "step": 50777
    },
    {
      "epoch": 0.000309918212890625,
      "step": 50777,
      "training_step_time": 0.4094879627227783
    },
    {
      "epoch": 0.00030992431640625,
      "model_forward_time": 0.11544108390808105,
      "step": 50778
    },
    {
      "epoch": 0.00030992431640625,
      "step": 50778,
      "training_step_time": 0.5848219394683838
    },
    {
      "epoch": 0.000309930419921875,
      "model_forward_time": 0.11507606506347656,
      "step": 50779
    },
    {
      "epoch": 0.000309930419921875,
      "step": 50779,
      "training_step_time": 0.572965145111084
    },
    {
      "epoch": 0.0003099365234375,
      "grad_norm": 0.09096506237983704,
      "learning_rate": 6.318091681012772e-06,
      "loss": 0.0368,
      "step": 50780
    },
    {
      "epoch": 0.0003099365234375,
      "model_forward_time": 0.11442351341247559,
      "step": 50780
    },
    {
      "epoch": 0.0003099365234375,
      "step": 50780,
      "training_step_time": 0.38944244384765625
    },
    {
      "epoch": 0.000309942626953125,
      "model_forward_time": 0.11467885971069336,
      "step": 50781
    },
    {
      "epoch": 0.000309942626953125,
      "step": 50781,
      "training_step_time": 0.3963758945465088
    },
    {
      "epoch": 0.00030994873046875,
      "model_forward_time": 0.11487340927124023,
      "step": 50782
    },
    {
      "epoch": 0.00030994873046875,
      "step": 50782,
      "training_step_time": 0.39329099655151367
    },
    {
      "epoch": 0.000309954833984375,
      "model_forward_time": 0.1144556999206543,
      "step": 50783
    },
    {
      "epoch": 0.000309954833984375,
      "step": 50783,
      "training_step_time": 0.43569254875183105
    },
    {
      "epoch": 0.0003099609375,
      "model_forward_time": 0.1146092414855957,
      "step": 50784
    },
    {
      "epoch": 0.0003099609375,
      "step": 50784,
      "training_step_time": 0.4943203926086426
    },
    {
      "epoch": 0.000309967041015625,
      "model_forward_time": 0.1150815486907959,
      "step": 50785
    },
    {
      "epoch": 0.000309967041015625,
      "step": 50785,
      "training_step_time": 0.5987849235534668
    },
    {
      "epoch": 0.00030997314453125,
      "model_forward_time": 0.11421537399291992,
      "step": 50786
    },
    {
      "epoch": 0.00030997314453125,
      "step": 50786,
      "training_step_time": 0.38762569427490234
    },
    {
      "epoch": 0.000309979248046875,
      "model_forward_time": 0.11532306671142578,
      "step": 50787
    },
    {
      "epoch": 0.000309979248046875,
      "step": 50787,
      "training_step_time": 0.39653468132019043
    },
    {
      "epoch": 0.0003099853515625,
      "model_forward_time": 0.11490511894226074,
      "step": 50788
    },
    {
      "epoch": 0.0003099853515625,
      "step": 50788,
      "training_step_time": 0.4484739303588867
    },
    {
      "epoch": 0.000309991455078125,
      "model_forward_time": 0.11503744125366211,
      "step": 50789
    },
    {
      "epoch": 0.000309991455078125,
      "step": 50789,
      "training_step_time": 0.5076103210449219
    },
    {
      "epoch": 0.00030999755859375,
      "grad_norm": 0.07347326725721359,
      "learning_rate": 6.304689333302416e-06,
      "loss": 0.0329,
      "step": 50790
    },
    {
      "epoch": 0.00030999755859375,
      "model_forward_time": 0.11465764045715332,
      "step": 50790
    },
    {
      "epoch": 0.00030999755859375,
      "step": 50790,
      "training_step_time": 0.46001219749450684
    },
    {
      "epoch": 0.000310003662109375,
      "model_forward_time": 0.1148386001586914,
      "step": 50791
    },
    {
      "epoch": 0.000310003662109375,
      "step": 50791,
      "training_step_time": 0.5044147968292236
    },
    {
      "epoch": 0.000310009765625,
      "model_forward_time": 0.11476373672485352,
      "step": 50792
    },
    {
      "epoch": 0.000310009765625,
      "step": 50792,
      "training_step_time": 0.44327378273010254
    },
    {
      "epoch": 0.000310015869140625,
      "model_forward_time": 0.11432886123657227,
      "step": 50793
    },
    {
      "epoch": 0.000310015869140625,
      "step": 50793,
      "training_step_time": 0.4347543716430664
    },
    {
      "epoch": 0.00031002197265625,
      "model_forward_time": 0.11459112167358398,
      "step": 50794
    },
    {
      "epoch": 0.00031002197265625,
      "step": 50794,
      "training_step_time": 0.39203596115112305
    },
    {
      "epoch": 0.000310028076171875,
      "model_forward_time": 0.11427855491638184,
      "step": 50795
    },
    {
      "epoch": 0.000310028076171875,
      "step": 50795,
      "training_step_time": 0.3930695056915283
    },
    {
      "epoch": 0.0003100341796875,
      "model_forward_time": 0.11530685424804688,
      "step": 50796
    },
    {
      "epoch": 0.0003100341796875,
      "step": 50796,
      "training_step_time": 0.4317963123321533
    },
    {
      "epoch": 0.000310040283203125,
      "model_forward_time": 0.11491870880126953,
      "step": 50797
    },
    {
      "epoch": 0.000310040283203125,
      "step": 50797,
      "training_step_time": 0.5533933639526367
    },
    {
      "epoch": 0.00031004638671875,
      "model_forward_time": 0.1155848503112793,
      "step": 50798
    },
    {
      "epoch": 0.00031004638671875,
      "step": 50798,
      "training_step_time": 0.3917245864868164
    },
    {
      "epoch": 0.000310052490234375,
      "model_forward_time": 0.11507821083068848,
      "step": 50799
    },
    {
      "epoch": 0.000310052490234375,
      "step": 50799,
      "training_step_time": 0.39540600776672363
    },
    {
      "epoch": 0.00031005859375,
      "grad_norm": 0.0849454402923584,
      "learning_rate": 6.291300259073724e-06,
      "loss": 0.0356,
      "step": 50800
    },
    {
      "epoch": 0.00031005859375,
      "model_forward_time": 0.11429476737976074,
      "step": 50800
    },
    {
      "epoch": 0.00031005859375,
      "step": 50800,
      "training_step_time": 0.38540196418762207
    },
    {
      "epoch": 0.000310064697265625,
      "model_forward_time": 0.11488652229309082,
      "step": 50801
    },
    {
      "epoch": 0.000310064697265625,
      "step": 50801,
      "training_step_time": 0.39142322540283203
    },
    {
      "epoch": 0.00031007080078125,
      "model_forward_time": 0.11563348770141602,
      "step": 50802
    },
    {
      "epoch": 0.00031007080078125,
      "step": 50802,
      "training_step_time": 0.530909538269043
    },
    {
      "epoch": 0.000310076904296875,
      "model_forward_time": 0.11532402038574219,
      "step": 50803
    },
    {
      "epoch": 0.000310076904296875,
      "step": 50803,
      "training_step_time": 0.5810723304748535
    },
    {
      "epoch": 0.0003100830078125,
      "model_forward_time": 0.11460757255554199,
      "step": 50804
    },
    {
      "epoch": 0.0003100830078125,
      "step": 50804,
      "training_step_time": 0.41243672370910645
    },
    {
      "epoch": 0.000310089111328125,
      "model_forward_time": 0.11498236656188965,
      "step": 50805
    },
    {
      "epoch": 0.000310089111328125,
      "step": 50805,
      "training_step_time": 0.45896196365356445
    },
    {
      "epoch": 0.00031009521484375,
      "model_forward_time": 0.11484384536743164,
      "step": 50806
    },
    {
      "epoch": 0.00031009521484375,
      "step": 50806,
      "training_step_time": 0.4986152648925781
    },
    {
      "epoch": 0.000310101318359375,
      "model_forward_time": 0.11387085914611816,
      "step": 50807
    },
    {
      "epoch": 0.000310101318359375,
      "step": 50807,
      "training_step_time": 0.42293477058410645
    },
    {
      "epoch": 0.000310107421875,
      "model_forward_time": 0.11439704895019531,
      "step": 50808
    },
    {
      "epoch": 0.000310107421875,
      "step": 50808,
      "training_step_time": 0.3970303535461426
    },
    {
      "epoch": 0.000310113525390625,
      "model_forward_time": 0.11458754539489746,
      "step": 50809
    },
    {
      "epoch": 0.000310113525390625,
      "step": 50809,
      "training_step_time": 0.4694488048553467
    },
    {
      "epoch": 0.00031011962890625,
      "grad_norm": 0.09487858414649963,
      "learning_rate": 6.277924462393958e-06,
      "loss": 0.0399,
      "step": 50810
    },
    {
      "epoch": 0.00031011962890625,
      "model_forward_time": 0.11516070365905762,
      "step": 50810
    },
    {
      "epoch": 0.00031011962890625,
      "step": 50810,
      "training_step_time": 0.3842906951904297
    },
    {
      "epoch": 0.000310125732421875,
      "model_forward_time": 0.11490702629089355,
      "step": 50811
    },
    {
      "epoch": 0.000310125732421875,
      "step": 50811,
      "training_step_time": 0.39072275161743164
    },
    {
      "epoch": 0.0003101318359375,
      "model_forward_time": 0.11488127708435059,
      "step": 50812
    },
    {
      "epoch": 0.0003101318359375,
      "step": 50812,
      "training_step_time": 0.3913612365722656
    },
    {
      "epoch": 0.000310137939453125,
      "model_forward_time": 0.11503434181213379,
      "step": 50813
    },
    {
      "epoch": 0.000310137939453125,
      "step": 50813,
      "training_step_time": 0.3914680480957031
    },
    {
      "epoch": 0.00031014404296875,
      "model_forward_time": 0.11538338661193848,
      "step": 50814
    },
    {
      "epoch": 0.00031014404296875,
      "step": 50814,
      "training_step_time": 0.5121111869812012
    },
    {
      "epoch": 0.000310150146484375,
      "model_forward_time": 0.11482572555541992,
      "step": 50815
    },
    {
      "epoch": 0.000310150146484375,
      "step": 50815,
      "training_step_time": 0.6442489624023438
    },
    {
      "epoch": 0.00031015625,
      "model_forward_time": 0.11588764190673828,
      "step": 50816
    },
    {
      "epoch": 0.00031015625,
      "step": 50816,
      "training_step_time": 0.42788100242614746
    },
    {
      "epoch": 0.000310162353515625,
      "model_forward_time": 0.11468672752380371,
      "step": 50817
    },
    {
      "epoch": 0.000310162353515625,
      "step": 50817,
      "training_step_time": 0.42892026901245117
    },
    {
      "epoch": 0.00031016845703125,
      "model_forward_time": 0.11461782455444336,
      "step": 50818
    },
    {
      "epoch": 0.00031016845703125,
      "step": 50818,
      "training_step_time": 0.48229146003723145
    },
    {
      "epoch": 0.000310174560546875,
      "model_forward_time": 0.11460328102111816,
      "step": 50819
    },
    {
      "epoch": 0.000310174560546875,
      "step": 50819,
      "training_step_time": 0.3906095027923584
    },
    {
      "epoch": 0.0003101806640625,
      "grad_norm": 0.10443411022424698,
      "learning_rate": 6.264561947326331e-06,
      "loss": 0.0374,
      "step": 50820
    },
    {
      "epoch": 0.0003101806640625,
      "model_forward_time": 0.1152806282043457,
      "step": 50820
    },
    {
      "epoch": 0.0003101806640625,
      "step": 50820,
      "training_step_time": 0.4446074962615967
    },
    {
      "epoch": 0.000310186767578125,
      "model_forward_time": 0.11460089683532715,
      "step": 50821
    },
    {
      "epoch": 0.000310186767578125,
      "step": 50821,
      "training_step_time": 0.6485776901245117
    },
    {
      "epoch": 0.00031019287109375,
      "model_forward_time": 0.11406826972961426,
      "step": 50822
    },
    {
      "epoch": 0.00031019287109375,
      "step": 50822,
      "training_step_time": 0.4233112335205078
    },
    {
      "epoch": 0.000310198974609375,
      "model_forward_time": 0.11479640007019043,
      "step": 50823
    },
    {
      "epoch": 0.000310198974609375,
      "step": 50823,
      "training_step_time": 0.3904702663421631
    },
    {
      "epoch": 0.000310205078125,
      "model_forward_time": 0.1135396957397461,
      "step": 50824
    },
    {
      "epoch": 0.000310205078125,
      "step": 50824,
      "training_step_time": 0.40285348892211914
    },
    {
      "epoch": 0.000310211181640625,
      "model_forward_time": 0.11485934257507324,
      "step": 50825
    },
    {
      "epoch": 0.000310211181640625,
      "step": 50825,
      "training_step_time": 0.3905010223388672
    },
    {
      "epoch": 0.00031021728515625,
      "model_forward_time": 0.11508417129516602,
      "step": 50826
    },
    {
      "epoch": 0.00031021728515625,
      "step": 50826,
      "training_step_time": 0.4025886058807373
    },
    {
      "epoch": 0.000310223388671875,
      "model_forward_time": 0.11565995216369629,
      "step": 50827
    },
    {
      "epoch": 0.000310223388671875,
      "step": 50827,
      "training_step_time": 0.6259679794311523
    },
    {
      "epoch": 0.0003102294921875,
      "model_forward_time": 0.1137857437133789,
      "step": 50828
    },
    {
      "epoch": 0.0003102294921875,
      "step": 50828,
      "training_step_time": 0.3949739933013916
    },
    {
      "epoch": 0.000310235595703125,
      "model_forward_time": 0.11489605903625488,
      "step": 50829
    },
    {
      "epoch": 0.000310235595703125,
      "step": 50829,
      "training_step_time": 0.36995387077331543
    },
    {
      "epoch": 0.00031024169921875,
      "grad_norm": 0.13573172688484192,
      "learning_rate": 6.251212717930017e-06,
      "loss": 0.0415,
      "step": 50830
    },
    {
      "epoch": 0.00031024169921875,
      "model_forward_time": 0.11504340171813965,
      "step": 50830
    },
    {
      "epoch": 0.00031024169921875,
      "step": 50830,
      "training_step_time": 0.4421248435974121
    },
    {
      "epoch": 0.000310247802734375,
      "model_forward_time": 0.11482596397399902,
      "step": 50831
    },
    {
      "epoch": 0.000310247802734375,
      "step": 50831,
      "training_step_time": 0.40599894523620605
    },
    {
      "epoch": 0.00031025390625,
      "model_forward_time": 0.11449837684631348,
      "step": 50832
    },
    {
      "epoch": 0.00031025390625,
      "step": 50832,
      "training_step_time": 0.3979206085205078
    },
    {
      "epoch": 0.000310260009765625,
      "model_forward_time": 0.11468696594238281,
      "step": 50833
    },
    {
      "epoch": 0.000310260009765625,
      "step": 50833,
      "training_step_time": 0.6548192501068115
    },
    {
      "epoch": 0.00031026611328125,
      "model_forward_time": 0.11457633972167969,
      "step": 50834
    },
    {
      "epoch": 0.00031026611328125,
      "step": 50834,
      "training_step_time": 0.4532179832458496
    },
    {
      "epoch": 0.000310272216796875,
      "model_forward_time": 0.11471104621887207,
      "step": 50835
    },
    {
      "epoch": 0.000310272216796875,
      "step": 50835,
      "training_step_time": 0.3877556324005127
    },
    {
      "epoch": 0.0003102783203125,
      "model_forward_time": 0.11458730697631836,
      "step": 50836
    },
    {
      "epoch": 0.0003102783203125,
      "step": 50836,
      "training_step_time": 0.3973407745361328
    },
    {
      "epoch": 0.000310284423828125,
      "model_forward_time": 0.11418390274047852,
      "step": 50837
    },
    {
      "epoch": 0.000310284423828125,
      "step": 50837,
      "training_step_time": 0.3855605125427246
    },
    {
      "epoch": 0.00031029052734375,
      "model_forward_time": 0.11508584022521973,
      "step": 50838
    },
    {
      "epoch": 0.00031029052734375,
      "step": 50838,
      "training_step_time": 0.40111303329467773
    },
    {
      "epoch": 0.000310296630859375,
      "model_forward_time": 0.11527585983276367,
      "step": 50839
    },
    {
      "epoch": 0.000310296630859375,
      "step": 50839,
      "training_step_time": 0.5344421863555908
    },
    {
      "epoch": 0.000310302734375,
      "grad_norm": 0.08800885081291199,
      "learning_rate": 6.237876778260155e-06,
      "loss": 0.0398,
      "step": 50840
    },
    {
      "epoch": 0.000310302734375,
      "model_forward_time": 0.11439633369445801,
      "step": 50840
    },
    {
      "epoch": 0.000310302734375,
      "step": 50840,
      "training_step_time": 0.38796329498291016
    },
    {
      "epoch": 0.000310308837890625,
      "model_forward_time": 0.11531376838684082,
      "step": 50841
    },
    {
      "epoch": 0.000310308837890625,
      "step": 50841,
      "training_step_time": 0.3975234031677246
    },
    {
      "epoch": 0.00031031494140625,
      "model_forward_time": 0.11482429504394531,
      "step": 50842
    },
    {
      "epoch": 0.00031031494140625,
      "step": 50842,
      "training_step_time": 0.39818549156188965
    },
    {
      "epoch": 0.000310321044921875,
      "model_forward_time": 0.11499643325805664,
      "step": 50843
    },
    {
      "epoch": 0.000310321044921875,
      "step": 50843,
      "training_step_time": 0.3678169250488281
    },
    {
      "epoch": 0.0003103271484375,
      "model_forward_time": 0.11458444595336914,
      "step": 50844
    },
    {
      "epoch": 0.0003103271484375,
      "step": 50844,
      "training_step_time": 0.4541032314300537
    },
    {
      "epoch": 0.000310333251953125,
      "model_forward_time": 0.11508727073669434,
      "step": 50845
    },
    {
      "epoch": 0.000310333251953125,
      "step": 50845,
      "training_step_time": 0.7397222518920898
    },
    {
      "epoch": 0.00031033935546875,
      "model_forward_time": 0.1141805648803711,
      "step": 50846
    },
    {
      "epoch": 0.00031033935546875,
      "step": 50846,
      "training_step_time": 0.4551970958709717
    },
    {
      "epoch": 0.000310345458984375,
      "model_forward_time": 0.11410808563232422,
      "step": 50847
    },
    {
      "epoch": 0.000310345458984375,
      "step": 50847,
      "training_step_time": 0.41248178482055664
    },
    {
      "epoch": 0.0003103515625,
      "model_forward_time": 0.11447644233703613,
      "step": 50848
    },
    {
      "epoch": 0.0003103515625,
      "step": 50848,
      "training_step_time": 0.41211915016174316
    },
    {
      "epoch": 0.000310357666015625,
      "model_forward_time": 0.1143503189086914,
      "step": 50849
    },
    {
      "epoch": 0.000310357666015625,
      "step": 50849,
      "training_step_time": 0.3954453468322754
    },
    {
      "epoch": 0.00031036376953125,
      "grad_norm": 0.1090565025806427,
      "learning_rate": 6.22455413236786e-06,
      "loss": 0.0423,
      "step": 50850
    },
    {
      "epoch": 0.00031036376953125,
      "model_forward_time": 0.11388516426086426,
      "step": 50850
    },
    {
      "epoch": 0.00031036376953125,
      "step": 50850,
      "training_step_time": 0.3828144073486328
    },
    {
      "epoch": 0.000310369873046875,
      "model_forward_time": 0.11464142799377441,
      "step": 50851
    },
    {
      "epoch": 0.000310369873046875,
      "step": 50851,
      "training_step_time": 0.6348862648010254
    },
    {
      "epoch": 0.0003103759765625,
      "model_forward_time": 0.11479878425598145,
      "step": 50852
    },
    {
      "epoch": 0.0003103759765625,
      "step": 50852,
      "training_step_time": 0.4067118167877197
    },
    {
      "epoch": 0.000310382080078125,
      "model_forward_time": 0.11444854736328125,
      "step": 50853
    },
    {
      "epoch": 0.000310382080078125,
      "step": 50853,
      "training_step_time": 0.3990786075592041
    },
    {
      "epoch": 0.00031038818359375,
      "model_forward_time": 0.11480450630187988,
      "step": 50854
    },
    {
      "epoch": 0.00031038818359375,
      "step": 50854,
      "training_step_time": 0.40023040771484375
    },
    {
      "epoch": 0.000310394287109375,
      "model_forward_time": 0.11499166488647461,
      "step": 50855
    },
    {
      "epoch": 0.000310394287109375,
      "step": 50855,
      "training_step_time": 0.3925044536590576
    },
    {
      "epoch": 0.000310400390625,
      "model_forward_time": 0.1153104305267334,
      "step": 50856
    },
    {
      "epoch": 0.000310400390625,
      "step": 50856,
      "training_step_time": 0.3926525115966797
    },
    {
      "epoch": 0.000310406494140625,
      "model_forward_time": 0.11505722999572754,
      "step": 50857
    },
    {
      "epoch": 0.000310406494140625,
      "step": 50857,
      "training_step_time": 0.6480228900909424
    },
    {
      "epoch": 0.00031041259765625,
      "model_forward_time": 0.11530494689941406,
      "step": 50858
    },
    {
      "epoch": 0.00031041259765625,
      "step": 50858,
      "training_step_time": 0.40215325355529785
    },
    {
      "epoch": 0.000310418701171875,
      "model_forward_time": 0.11458969116210938,
      "step": 50859
    },
    {
      "epoch": 0.000310418701171875,
      "step": 50859,
      "training_step_time": 0.39071130752563477
    },
    {
      "epoch": 0.0003104248046875,
      "grad_norm": 0.11372480541467667,
      "learning_rate": 6.211244784300197e-06,
      "loss": 0.0305,
      "step": 50860
    },
    {
      "epoch": 0.0003104248046875,
      "model_forward_time": 0.11479711532592773,
      "step": 50860
    },
    {
      "epoch": 0.0003104248046875,
      "step": 50860,
      "training_step_time": 0.46938276290893555
    },
    {
      "epoch": 0.000310430908203125,
      "model_forward_time": 0.11455917358398438,
      "step": 50861
    },
    {
      "epoch": 0.000310430908203125,
      "step": 50861,
      "training_step_time": 0.41185450553894043
    },
    {
      "epoch": 0.00031043701171875,
      "model_forward_time": 0.11439657211303711,
      "step": 50862
    },
    {
      "epoch": 0.00031043701171875,
      "step": 50862,
      "training_step_time": 0.44220876693725586
    },
    {
      "epoch": 0.000310443115234375,
      "model_forward_time": 0.11465311050415039,
      "step": 50863
    },
    {
      "epoch": 0.000310443115234375,
      "step": 50863,
      "training_step_time": 0.5904989242553711
    },
    {
      "epoch": 0.00031044921875,
      "model_forward_time": 0.11450958251953125,
      "step": 50864
    },
    {
      "epoch": 0.00031044921875,
      "step": 50864,
      "training_step_time": 0.39024877548217773
    },
    {
      "epoch": 0.000310455322265625,
      "model_forward_time": 0.11454081535339355,
      "step": 50865
    },
    {
      "epoch": 0.000310455322265625,
      "step": 50865,
      "training_step_time": 0.3900718688964844
    },
    {
      "epoch": 0.00031046142578125,
      "model_forward_time": 0.1150815486907959,
      "step": 50866
    },
    {
      "epoch": 0.00031046142578125,
      "step": 50866,
      "training_step_time": 0.3970959186553955
    },
    {
      "epoch": 0.000310467529296875,
      "model_forward_time": 0.11464500427246094,
      "step": 50867
    },
    {
      "epoch": 0.000310467529296875,
      "step": 50867,
      "training_step_time": 0.39103031158447266
    },
    {
      "epoch": 0.0003104736328125,
      "model_forward_time": 0.11431336402893066,
      "step": 50868
    },
    {
      "epoch": 0.0003104736328125,
      "step": 50868,
      "training_step_time": 0.4004209041595459
    },
    {
      "epoch": 0.000310479736328125,
      "model_forward_time": 0.11475372314453125,
      "step": 50869
    },
    {
      "epoch": 0.000310479736328125,
      "step": 50869,
      "training_step_time": 0.7381877899169922
    },
    {
      "epoch": 0.00031048583984375,
      "grad_norm": 0.09971334785223007,
      "learning_rate": 6.1979487381001786e-06,
      "loss": 0.0329,
      "step": 50870
    },
    {
      "epoch": 0.00031048583984375,
      "model_forward_time": 0.11489486694335938,
      "step": 50870
    },
    {
      "epoch": 0.00031048583984375,
      "step": 50870,
      "training_step_time": 0.36299824714660645
    },
    {
      "epoch": 0.000310491943359375,
      "model_forward_time": 0.1145174503326416,
      "step": 50871
    },
    {
      "epoch": 0.000310491943359375,
      "step": 50871,
      "training_step_time": 0.44526052474975586
    },
    {
      "epoch": 0.000310498046875,
      "model_forward_time": 0.11458396911621094,
      "step": 50872
    },
    {
      "epoch": 0.000310498046875,
      "step": 50872,
      "training_step_time": 0.3921496868133545
    },
    {
      "epoch": 0.000310504150390625,
      "model_forward_time": 0.1142418384552002,
      "step": 50873
    },
    {
      "epoch": 0.000310504150390625,
      "step": 50873,
      "training_step_time": 0.4459202289581299
    },
    {
      "epoch": 0.00031051025390625,
      "model_forward_time": 0.11493325233459473,
      "step": 50874
    },
    {
      "epoch": 0.00031051025390625,
      "step": 50874,
      "training_step_time": 0.4585990905761719
    },
    {
      "epoch": 0.000310516357421875,
      "model_forward_time": 0.11469030380249023,
      "step": 50875
    },
    {
      "epoch": 0.000310516357421875,
      "step": 50875,
      "training_step_time": 0.4963569641113281
    },
    {
      "epoch": 0.0003105224609375,
      "model_forward_time": 0.11438560485839844,
      "step": 50876
    },
    {
      "epoch": 0.0003105224609375,
      "step": 50876,
      "training_step_time": 0.4530160427093506
    },
    {
      "epoch": 0.000310528564453125,
      "model_forward_time": 0.11487889289855957,
      "step": 50877
    },
    {
      "epoch": 0.000310528564453125,
      "step": 50877,
      "training_step_time": 0.38300108909606934
    },
    {
      "epoch": 0.00031053466796875,
      "model_forward_time": 0.11478066444396973,
      "step": 50878
    },
    {
      "epoch": 0.00031053466796875,
      "step": 50878,
      "training_step_time": 0.39647603034973145
    },
    {
      "epoch": 0.000310540771484375,
      "model_forward_time": 0.11484336853027344,
      "step": 50879
    },
    {
      "epoch": 0.000310540771484375,
      "step": 50879,
      "training_step_time": 0.3926084041595459
    },
    {
      "epoch": 0.000310546875,
      "grad_norm": 0.09296239912509918,
      "learning_rate": 6.184665997806832e-06,
      "loss": 0.0365,
      "step": 50880
    },
    {
      "epoch": 0.000310546875,
      "model_forward_time": 0.11476445198059082,
      "step": 50880
    },
    {
      "epoch": 0.000310546875,
      "step": 50880,
      "training_step_time": 0.40195298194885254
    },
    {
      "epoch": 0.000310552978515625,
      "model_forward_time": 0.11518621444702148,
      "step": 50881
    },
    {
      "epoch": 0.000310552978515625,
      "step": 50881,
      "training_step_time": 0.7560429573059082
    },
    {
      "epoch": 0.00031055908203125,
      "model_forward_time": 0.1145479679107666,
      "step": 50882
    },
    {
      "epoch": 0.00031055908203125,
      "step": 50882,
      "training_step_time": 0.39056825637817383
    },
    {
      "epoch": 0.000310565185546875,
      "model_forward_time": 0.11428117752075195,
      "step": 50883
    },
    {
      "epoch": 0.000310565185546875,
      "step": 50883,
      "training_step_time": 0.38463926315307617
    },
    {
      "epoch": 0.0003105712890625,
      "model_forward_time": 0.11494851112365723,
      "step": 50884
    },
    {
      "epoch": 0.0003105712890625,
      "step": 50884,
      "training_step_time": 0.4092094898223877
    },
    {
      "epoch": 0.000310577392578125,
      "model_forward_time": 0.11452507972717285,
      "step": 50885
    },
    {
      "epoch": 0.000310577392578125,
      "step": 50885,
      "training_step_time": 0.3875558376312256
    },
    {
      "epoch": 0.00031058349609375,
      "model_forward_time": 0.11478519439697266,
      "step": 50886
    },
    {
      "epoch": 0.00031058349609375,
      "step": 50886,
      "training_step_time": 0.4761323928833008
    },
    {
      "epoch": 0.000310589599609375,
      "model_forward_time": 0.11549639701843262,
      "step": 50887
    },
    {
      "epoch": 0.000310589599609375,
      "step": 50887,
      "training_step_time": 0.5371205806732178
    },
    {
      "epoch": 0.000310595703125,
      "model_forward_time": 0.11457061767578125,
      "step": 50888
    },
    {
      "epoch": 0.000310595703125,
      "step": 50888,
      "training_step_time": 0.3966214656829834
    },
    {
      "epoch": 0.000310601806640625,
      "model_forward_time": 0.11454534530639648,
      "step": 50889
    },
    {
      "epoch": 0.000310601806640625,
      "step": 50889,
      "training_step_time": 0.3973977565765381
    },
    {
      "epoch": 0.00031060791015625,
      "grad_norm": 0.09057623893022537,
      "learning_rate": 6.171396567455051e-06,
      "loss": 0.0415,
      "step": 50890
    },
    {
      "epoch": 0.00031060791015625,
      "model_forward_time": 0.11514043807983398,
      "step": 50890
    },
    {
      "epoch": 0.00031060791015625,
      "step": 50890,
      "training_step_time": 0.43147921562194824
    },
    {
      "epoch": 0.000310614013671875,
      "model_forward_time": 0.11441755294799805,
      "step": 50891
    },
    {
      "epoch": 0.000310614013671875,
      "step": 50891,
      "training_step_time": 0.40035343170166016
    },
    {
      "epoch": 0.0003106201171875,
      "model_forward_time": 0.11449193954467773,
      "step": 50892
    },
    {
      "epoch": 0.0003106201171875,
      "step": 50892,
      "training_step_time": 0.4115419387817383
    },
    {
      "epoch": 0.000310626220703125,
      "model_forward_time": 0.115234375,
      "step": 50893
    },
    {
      "epoch": 0.000310626220703125,
      "step": 50893,
      "training_step_time": 0.743488073348999
    },
    {
      "epoch": 0.00031063232421875,
      "model_forward_time": 0.11465620994567871,
      "step": 50894
    },
    {
      "epoch": 0.00031063232421875,
      "step": 50894,
      "training_step_time": 0.3862113952636719
    },
    {
      "epoch": 0.000310638427734375,
      "model_forward_time": 0.11432433128356934,
      "step": 50895
    },
    {
      "epoch": 0.000310638427734375,
      "step": 50895,
      "training_step_time": 0.3941771984100342
    },
    {
      "epoch": 0.00031064453125,
      "model_forward_time": 0.11462759971618652,
      "step": 50896
    },
    {
      "epoch": 0.00031064453125,
      "step": 50896,
      "training_step_time": 0.3970632553100586
    },
    {
      "epoch": 0.000310650634765625,
      "model_forward_time": 0.11442923545837402,
      "step": 50897
    },
    {
      "epoch": 0.000310650634765625,
      "step": 50897,
      "training_step_time": 0.38701963424682617
    },
    {
      "epoch": 0.00031065673828125,
      "model_forward_time": 0.11465096473693848,
      "step": 50898
    },
    {
      "epoch": 0.00031065673828125,
      "step": 50898,
      "training_step_time": 0.41756486892700195
    },
    {
      "epoch": 0.000310662841796875,
      "model_forward_time": 0.11551427841186523,
      "step": 50899
    },
    {
      "epoch": 0.000310662841796875,
      "step": 50899,
      "training_step_time": 0.692948579788208
    },
    {
      "epoch": 0.0003106689453125,
      "grad_norm": 0.08336759358644485,
      "learning_rate": 6.158140451075795e-06,
      "loss": 0.038,
      "step": 50900
    },
    {
      "epoch": 0.0003106689453125,
      "model_forward_time": 0.11419796943664551,
      "step": 50900
    },
    {
      "epoch": 0.0003106689453125,
      "step": 50900,
      "training_step_time": 0.400209903717041
    },
    {
      "epoch": 0.000310675048828125,
      "model_forward_time": 0.1145472526550293,
      "step": 50901
    },
    {
      "epoch": 0.000310675048828125,
      "step": 50901,
      "training_step_time": 0.48046159744262695
    },
    {
      "epoch": 0.00031068115234375,
      "model_forward_time": 0.11402535438537598,
      "step": 50902
    },
    {
      "epoch": 0.00031068115234375,
      "step": 50902,
      "training_step_time": 0.4362447261810303
    },
    {
      "epoch": 0.000310687255859375,
      "model_forward_time": 0.11499500274658203,
      "step": 50903
    },
    {
      "epoch": 0.000310687255859375,
      "step": 50903,
      "training_step_time": 0.4002046585083008
    },
    {
      "epoch": 0.000310693359375,
      "model_forward_time": 0.11419129371643066,
      "step": 50904
    },
    {
      "epoch": 0.000310693359375,
      "step": 50904,
      "training_step_time": 0.38521862030029297
    },
    {
      "epoch": 0.000310699462890625,
      "model_forward_time": 0.11487102508544922,
      "step": 50905
    },
    {
      "epoch": 0.000310699462890625,
      "step": 50905,
      "training_step_time": 0.6764078140258789
    },
    {
      "epoch": 0.00031070556640625,
      "model_forward_time": 0.11389780044555664,
      "step": 50906
    },
    {
      "epoch": 0.00031070556640625,
      "step": 50906,
      "training_step_time": 0.3857579231262207
    },
    {
      "epoch": 0.000310711669921875,
      "model_forward_time": 0.11469650268554688,
      "step": 50907
    },
    {
      "epoch": 0.000310711669921875,
      "step": 50907,
      "training_step_time": 0.38994288444519043
    },
    {
      "epoch": 0.0003107177734375,
      "model_forward_time": 0.11487817764282227,
      "step": 50908
    },
    {
      "epoch": 0.0003107177734375,
      "step": 50908,
      "training_step_time": 0.38422131538391113
    },
    {
      "epoch": 0.000310723876953125,
      "model_forward_time": 0.11458635330200195,
      "step": 50909
    },
    {
      "epoch": 0.000310723876953125,
      "step": 50909,
      "training_step_time": 0.3869175910949707
    },
    {
      "epoch": 0.00031072998046875,
      "grad_norm": 0.0867842584848404,
      "learning_rate": 6.144897652695864e-06,
      "loss": 0.0344,
      "step": 50910
    },
    {
      "epoch": 0.00031072998046875,
      "model_forward_time": 0.11487412452697754,
      "step": 50910
    },
    {
      "epoch": 0.00031072998046875,
      "step": 50910,
      "training_step_time": 0.4024813175201416
    },
    {
      "epoch": 0.000310736083984375,
      "model_forward_time": 0.11449623107910156,
      "step": 50911
    },
    {
      "epoch": 0.000310736083984375,
      "step": 50911,
      "training_step_time": 0.7370543479919434
    },
    {
      "epoch": 0.0003107421875,
      "model_forward_time": 0.11539745330810547,
      "step": 50912
    },
    {
      "epoch": 0.0003107421875,
      "step": 50912,
      "training_step_time": 0.41449522972106934
    },
    {
      "epoch": 0.000310748291015625,
      "model_forward_time": 0.11451077461242676,
      "step": 50913
    },
    {
      "epoch": 0.000310748291015625,
      "step": 50913,
      "training_step_time": 0.42715907096862793
    },
    {
      "epoch": 0.00031075439453125,
      "model_forward_time": 0.11435532569885254,
      "step": 50914
    },
    {
      "epoch": 0.00031075439453125,
      "step": 50914,
      "training_step_time": 0.4682340621948242
    },
    {
      "epoch": 0.000310760498046875,
      "model_forward_time": 0.11408138275146484,
      "step": 50915
    },
    {
      "epoch": 0.000310760498046875,
      "step": 50915,
      "training_step_time": 0.3910207748413086
    },
    {
      "epoch": 0.0003107666015625,
      "model_forward_time": 0.11460256576538086,
      "step": 50916
    },
    {
      "epoch": 0.0003107666015625,
      "step": 50916,
      "training_step_time": 0.45699501037597656
    },
    {
      "epoch": 0.000310772705078125,
      "model_forward_time": 0.11501264572143555,
      "step": 50917
    },
    {
      "epoch": 0.000310772705078125,
      "step": 50917,
      "training_step_time": 0.45295286178588867
    },
    {
      "epoch": 0.00031077880859375,
      "model_forward_time": 0.11528849601745605,
      "step": 50918
    },
    {
      "epoch": 0.00031077880859375,
      "step": 50918,
      "training_step_time": 0.38990020751953125
    },
    {
      "epoch": 0.000310784912109375,
      "model_forward_time": 0.11463141441345215,
      "step": 50919
    },
    {
      "epoch": 0.000310784912109375,
      "step": 50919,
      "training_step_time": 0.39179515838623047
    },
    {
      "epoch": 0.000310791015625,
      "grad_norm": 0.08952934294939041,
      "learning_rate": 6.131668176338118e-06,
      "loss": 0.0334,
      "step": 50920
    },
    {
      "epoch": 0.000310791015625,
      "model_forward_time": 0.11442852020263672,
      "step": 50920
    },
    {
      "epoch": 0.000310791015625,
      "step": 50920,
      "training_step_time": 0.39111995697021484
    },
    {
      "epoch": 0.000310797119140625,
      "model_forward_time": 0.11563920974731445,
      "step": 50921
    },
    {
      "epoch": 0.000310797119140625,
      "step": 50921,
      "training_step_time": 0.40990161895751953
    },
    {
      "epoch": 0.00031080322265625,
      "model_forward_time": 0.11504554748535156,
      "step": 50922
    },
    {
      "epoch": 0.00031080322265625,
      "step": 50922,
      "training_step_time": 0.40464305877685547
    },
    {
      "epoch": 0.000310809326171875,
      "model_forward_time": 0.11495447158813477,
      "step": 50923
    },
    {
      "epoch": 0.000310809326171875,
      "step": 50923,
      "training_step_time": 0.7701506614685059
    },
    {
      "epoch": 0.0003108154296875,
      "model_forward_time": 0.11414837837219238,
      "step": 50924
    },
    {
      "epoch": 0.0003108154296875,
      "step": 50924,
      "training_step_time": 0.4033067226409912
    },
    {
      "epoch": 0.000310821533203125,
      "model_forward_time": 0.11494994163513184,
      "step": 50925
    },
    {
      "epoch": 0.000310821533203125,
      "step": 50925,
      "training_step_time": 0.42229723930358887
    },
    {
      "epoch": 0.00031082763671875,
      "model_forward_time": 0.11448264122009277,
      "step": 50926
    },
    {
      "epoch": 0.00031082763671875,
      "step": 50926,
      "training_step_time": 0.41032910346984863
    },
    {
      "epoch": 0.000310833740234375,
      "model_forward_time": 0.1146855354309082,
      "step": 50927
    },
    {
      "epoch": 0.000310833740234375,
      "step": 50927,
      "training_step_time": 0.4282951354980469
    },
    {
      "epoch": 0.00031083984375,
      "model_forward_time": 0.11436319351196289,
      "step": 50928
    },
    {
      "epoch": 0.00031083984375,
      "step": 50928,
      "training_step_time": 0.4674828052520752
    },
    {
      "epoch": 0.000310845947265625,
      "model_forward_time": 0.11461567878723145,
      "step": 50929
    },
    {
      "epoch": 0.000310845947265625,
      "step": 50929,
      "training_step_time": 0.5839414596557617
    },
    {
      "epoch": 0.00031085205078125,
      "grad_norm": 0.09293490648269653,
      "learning_rate": 6.118452026021299e-06,
      "loss": 0.0411,
      "step": 50930
    },
    {
      "epoch": 0.00031085205078125,
      "model_forward_time": 0.11463212966918945,
      "step": 50930
    },
    {
      "epoch": 0.00031085205078125,
      "step": 50930,
      "training_step_time": 0.4034545421600342
    },
    {
      "epoch": 0.000310858154296875,
      "model_forward_time": 0.11399674415588379,
      "step": 50931
    },
    {
      "epoch": 0.000310858154296875,
      "step": 50931,
      "training_step_time": 0.4453294277191162
    },
    {
      "epoch": 0.0003108642578125,
      "model_forward_time": 0.11421442031860352,
      "step": 50932
    },
    {
      "epoch": 0.0003108642578125,
      "step": 50932,
      "training_step_time": 0.38378143310546875
    },
    {
      "epoch": 0.000310870361328125,
      "model_forward_time": 0.11444330215454102,
      "step": 50933
    },
    {
      "epoch": 0.000310870361328125,
      "step": 50933,
      "training_step_time": 0.37543678283691406
    },
    {
      "epoch": 0.00031087646484375,
      "model_forward_time": 0.11486315727233887,
      "step": 50934
    },
    {
      "epoch": 0.00031087646484375,
      "step": 50934,
      "training_step_time": 0.3948800563812256
    },
    {
      "epoch": 0.000310882568359375,
      "model_forward_time": 0.11507081985473633,
      "step": 50935
    },
    {
      "epoch": 0.000310882568359375,
      "step": 50935,
      "training_step_time": 0.6397855281829834
    },
    {
      "epoch": 0.000310888671875,
      "model_forward_time": 0.11431336402893066,
      "step": 50936
    },
    {
      "epoch": 0.000310888671875,
      "step": 50936,
      "training_step_time": 0.3924143314361572
    },
    {
      "epoch": 0.000310894775390625,
      "model_forward_time": 0.11470174789428711,
      "step": 50937
    },
    {
      "epoch": 0.000310894775390625,
      "step": 50937,
      "training_step_time": 0.41898465156555176
    },
    {
      "epoch": 0.00031090087890625,
      "model_forward_time": 0.11556291580200195,
      "step": 50938
    },
    {
      "epoch": 0.00031090087890625,
      "step": 50938,
      "training_step_time": 0.41509270668029785
    },
    {
      "epoch": 0.000310906982421875,
      "model_forward_time": 0.11440205574035645,
      "step": 50939
    },
    {
      "epoch": 0.000310906982421875,
      "step": 50939,
      "training_step_time": 0.459820032119751
    },
    {
      "epoch": 0.0003109130859375,
      "grad_norm": 0.09037046134471893,
      "learning_rate": 6.1052492057601275e-06,
      "loss": 0.0358,
      "step": 50940
    },
    {
      "epoch": 0.0003109130859375,
      "model_forward_time": 0.11500906944274902,
      "step": 50940
    },
    {
      "epoch": 0.0003109130859375,
      "step": 50940,
      "training_step_time": 0.48841214179992676
    },
    {
      "epoch": 0.000310919189453125,
      "model_forward_time": 0.11525630950927734,
      "step": 50941
    },
    {
      "epoch": 0.000310919189453125,
      "step": 50941,
      "training_step_time": 0.5023953914642334
    },
    {
      "epoch": 0.00031092529296875,
      "model_forward_time": 0.11471104621887207,
      "step": 50942
    },
    {
      "epoch": 0.00031092529296875,
      "step": 50942,
      "training_step_time": 0.38799476623535156
    },
    {
      "epoch": 0.000310931396484375,
      "model_forward_time": 0.11450839042663574,
      "step": 50943
    },
    {
      "epoch": 0.000310931396484375,
      "step": 50943,
      "training_step_time": 0.4364943504333496
    },
    {
      "epoch": 0.0003109375,
      "model_forward_time": 0.11454105377197266,
      "step": 50944
    },
    {
      "epoch": 0.0003109375,
      "step": 50944,
      "training_step_time": 0.387587308883667
    },
    {
      "epoch": 0.000310943603515625,
      "model_forward_time": 0.11439371109008789,
      "step": 50945
    },
    {
      "epoch": 0.000310943603515625,
      "step": 50945,
      "training_step_time": 0.46622204780578613
    },
    {
      "epoch": 0.00031094970703125,
      "model_forward_time": 0.11563658714294434,
      "step": 50946
    },
    {
      "epoch": 0.00031094970703125,
      "step": 50946,
      "training_step_time": 0.3987443447113037
    },
    {
      "epoch": 0.000310955810546875,
      "model_forward_time": 0.11476755142211914,
      "step": 50947
    },
    {
      "epoch": 0.000310955810546875,
      "step": 50947,
      "training_step_time": 0.6146490573883057
    },
    {
      "epoch": 0.0003109619140625,
      "model_forward_time": 0.11566996574401855,
      "step": 50948
    },
    {
      "epoch": 0.0003109619140625,
      "step": 50948,
      "training_step_time": 0.3940548896789551
    },
    {
      "epoch": 0.000310968017578125,
      "model_forward_time": 0.1144561767578125,
      "step": 50949
    },
    {
      "epoch": 0.000310968017578125,
      "step": 50949,
      "training_step_time": 0.39997005462646484
    },
    {
      "epoch": 0.00031097412109375,
      "grad_norm": 0.11758434027433395,
      "learning_rate": 6.092059719565274e-06,
      "loss": 0.0371,
      "step": 50950
    },
    {
      "epoch": 0.00031097412109375,
      "model_forward_time": 0.115234375,
      "step": 50950
    },
    {
      "epoch": 0.00031097412109375,
      "step": 50950,
      "training_step_time": 0.4227471351623535
    },
    {
      "epoch": 0.000310980224609375,
      "model_forward_time": 0.1143794059753418,
      "step": 50951
    },
    {
      "epoch": 0.000310980224609375,
      "step": 50951,
      "training_step_time": 0.3925349712371826
    },
    {
      "epoch": 0.000310986328125,
      "model_forward_time": 0.11472916603088379,
      "step": 50952
    },
    {
      "epoch": 0.000310986328125,
      "step": 50952,
      "training_step_time": 0.36419034004211426
    },
    {
      "epoch": 0.000310992431640625,
      "model_forward_time": 0.11481595039367676,
      "step": 50953
    },
    {
      "epoch": 0.000310992431640625,
      "step": 50953,
      "training_step_time": 0.6732544898986816
    },
    {
      "epoch": 0.00031099853515625,
      "model_forward_time": 0.11474132537841797,
      "step": 50954
    },
    {
      "epoch": 0.00031099853515625,
      "step": 50954,
      "training_step_time": 0.4546012878417969
    },
    {
      "epoch": 0.000311004638671875,
      "model_forward_time": 0.11497879028320312,
      "step": 50955
    },
    {
      "epoch": 0.000311004638671875,
      "step": 50955,
      "training_step_time": 0.45902061462402344
    },
    {
      "epoch": 0.0003110107421875,
      "model_forward_time": 0.11473369598388672,
      "step": 50956
    },
    {
      "epoch": 0.0003110107421875,
      "step": 50956,
      "training_step_time": 0.4107933044433594
    },
    {
      "epoch": 0.000311016845703125,
      "model_forward_time": 0.11486220359802246,
      "step": 50957
    },
    {
      "epoch": 0.000311016845703125,
      "step": 50957,
      "training_step_time": 0.39696526527404785
    },
    {
      "epoch": 0.00031102294921875,
      "model_forward_time": 0.11438703536987305,
      "step": 50958
    },
    {
      "epoch": 0.00031102294921875,
      "step": 50958,
      "training_step_time": 0.4406137466430664
    },
    {
      "epoch": 0.000311029052734375,
      "model_forward_time": 0.11451888084411621,
      "step": 50959
    },
    {
      "epoch": 0.000311029052734375,
      "step": 50959,
      "training_step_time": 0.4362633228302002
    },
    {
      "epoch": 0.00031103515625,
      "grad_norm": 0.11096367985010147,
      "learning_rate": 6.07888357144335e-06,
      "loss": 0.0373,
      "step": 50960
    },
    {
      "epoch": 0.00031103515625,
      "model_forward_time": 0.1158454418182373,
      "step": 50960
    },
    {
      "epoch": 0.00031103515625,
      "step": 50960,
      "training_step_time": 0.3833739757537842
    },
    {
      "epoch": 0.000311041259765625,
      "model_forward_time": 0.11490678787231445,
      "step": 50961
    },
    {
      "epoch": 0.000311041259765625,
      "step": 50961,
      "training_step_time": 0.3919103145599365
    },
    {
      "epoch": 0.00031104736328125,
      "model_forward_time": 0.11437177658081055,
      "step": 50962
    },
    {
      "epoch": 0.00031104736328125,
      "step": 50962,
      "training_step_time": 0.400083065032959
    },
    {
      "epoch": 0.000311053466796875,
      "model_forward_time": 0.1152181625366211,
      "step": 50963
    },
    {
      "epoch": 0.000311053466796875,
      "step": 50963,
      "training_step_time": 0.3987843990325928
    },
    {
      "epoch": 0.0003110595703125,
      "model_forward_time": 0.11478590965270996,
      "step": 50964
    },
    {
      "epoch": 0.0003110595703125,
      "step": 50964,
      "training_step_time": 0.4162938594818115
    },
    {
      "epoch": 0.000311065673828125,
      "model_forward_time": 0.11565637588500977,
      "step": 50965
    },
    {
      "epoch": 0.000311065673828125,
      "step": 50965,
      "training_step_time": 0.746985912322998
    },
    {
      "epoch": 0.00031107177734375,
      "model_forward_time": 0.11439728736877441,
      "step": 50966
    },
    {
      "epoch": 0.00031107177734375,
      "step": 50966,
      "training_step_time": 0.4339420795440674
    },
    {
      "epoch": 0.000311077880859375,
      "model_forward_time": 0.1146094799041748,
      "step": 50967
    },
    {
      "epoch": 0.000311077880859375,
      "step": 50967,
      "training_step_time": 0.4820897579193115
    },
    {
      "epoch": 0.000311083984375,
      "model_forward_time": 0.11424446105957031,
      "step": 50968
    },
    {
      "epoch": 0.000311083984375,
      "step": 50968,
      "training_step_time": 0.4141199588775635
    },
    {
      "epoch": 0.000311090087890625,
      "model_forward_time": 0.11338114738464355,
      "step": 50969
    },
    {
      "epoch": 0.000311090087890625,
      "step": 50969,
      "training_step_time": 0.3977072238922119
    },
    {
      "epoch": 0.00031109619140625,
      "grad_norm": 0.0876995250582695,
      "learning_rate": 6.0657207653969315e-06,
      "loss": 0.0377,
      "step": 50970
    },
    {
      "epoch": 0.00031109619140625,
      "model_forward_time": 0.1145777702331543,
      "step": 50970
    },
    {
      "epoch": 0.00031109619140625,
      "step": 50970,
      "training_step_time": 0.3958859443664551
    },
    {
      "epoch": 0.000311102294921875,
      "model_forward_time": 0.11461257934570312,
      "step": 50971
    },
    {
      "epoch": 0.000311102294921875,
      "step": 50971,
      "training_step_time": 0.6414294242858887
    },
    {
      "epoch": 0.0003111083984375,
      "model_forward_time": 0.11408638954162598,
      "step": 50972
    },
    {
      "epoch": 0.0003111083984375,
      "step": 50972,
      "training_step_time": 0.4684877395629883
    },
    {
      "epoch": 0.000311114501953125,
      "model_forward_time": 0.11434316635131836,
      "step": 50973
    },
    {
      "epoch": 0.000311114501953125,
      "step": 50973,
      "training_step_time": 0.3936448097229004
    },
    {
      "epoch": 0.00031112060546875,
      "model_forward_time": 0.11496281623840332,
      "step": 50974
    },
    {
      "epoch": 0.00031112060546875,
      "step": 50974,
      "training_step_time": 0.4000105857849121
    },
    {
      "epoch": 0.000311126708984375,
      "model_forward_time": 0.1151580810546875,
      "step": 50975
    },
    {
      "epoch": 0.000311126708984375,
      "step": 50975,
      "training_step_time": 0.3887801170349121
    },
    {
      "epoch": 0.0003111328125,
      "model_forward_time": 0.11525392532348633,
      "step": 50976
    },
    {
      "epoch": 0.0003111328125,
      "step": 50976,
      "training_step_time": 0.4039945602416992
    },
    {
      "epoch": 0.000311138916015625,
      "model_forward_time": 0.1146697998046875,
      "step": 50977
    },
    {
      "epoch": 0.000311138916015625,
      "step": 50977,
      "training_step_time": 0.6212639808654785
    },
    {
      "epoch": 0.00031114501953125,
      "model_forward_time": 0.11506223678588867,
      "step": 50978
    },
    {
      "epoch": 0.00031114501953125,
      "step": 50978,
      "training_step_time": 0.3899104595184326
    },
    {
      "epoch": 0.000311151123046875,
      "model_forward_time": 0.11583471298217773,
      "step": 50979
    },
    {
      "epoch": 0.000311151123046875,
      "step": 50979,
      "training_step_time": 0.45065903663635254
    },
    {
      "epoch": 0.0003111572265625,
      "grad_norm": 0.0754580870270729,
      "learning_rate": 6.052571305424531e-06,
      "loss": 0.037,
      "step": 50980
    },
    {
      "epoch": 0.0003111572265625,
      "model_forward_time": 0.11752915382385254,
      "step": 50980
    },
    {
      "epoch": 0.0003111572265625,
      "step": 50980,
      "training_step_time": 0.4917328357696533
    },
    {
      "epoch": 0.000311163330078125,
      "model_forward_time": 0.11518740653991699,
      "step": 50981
    },
    {
      "epoch": 0.000311163330078125,
      "step": 50981,
      "training_step_time": 0.4423792362213135
    },
    {
      "epoch": 0.00031116943359375,
      "model_forward_time": 0.11502909660339355,
      "step": 50982
    },
    {
      "epoch": 0.00031116943359375,
      "step": 50982,
      "training_step_time": 0.42898035049438477
    },
    {
      "epoch": 0.000311175537109375,
      "model_forward_time": 0.11475229263305664,
      "step": 50983
    },
    {
      "epoch": 0.000311175537109375,
      "step": 50983,
      "training_step_time": 0.4202115535736084
    },
    {
      "epoch": 0.000311181640625,
      "model_forward_time": 0.11508965492248535,
      "step": 50984
    },
    {
      "epoch": 0.000311181640625,
      "step": 50984,
      "training_step_time": 0.4742310047149658
    },
    {
      "epoch": 0.000311187744140625,
      "model_forward_time": 0.11459851264953613,
      "step": 50985
    },
    {
      "epoch": 0.000311187744140625,
      "step": 50985,
      "training_step_time": 0.41120314598083496
    },
    {
      "epoch": 0.00031119384765625,
      "model_forward_time": 0.11515188217163086,
      "step": 50986
    },
    {
      "epoch": 0.00031119384765625,
      "step": 50986,
      "training_step_time": 0.49774885177612305
    },
    {
      "epoch": 0.000311199951171875,
      "model_forward_time": 0.11447811126708984,
      "step": 50987
    },
    {
      "epoch": 0.000311199951171875,
      "step": 50987,
      "training_step_time": 0.39446067810058594
    },
    {
      "epoch": 0.0003112060546875,
      "model_forward_time": 0.11456727981567383,
      "step": 50988
    },
    {
      "epoch": 0.0003112060546875,
      "step": 50988,
      "training_step_time": 0.3982412815093994
    },
    {
      "epoch": 0.000311212158203125,
      "model_forward_time": 0.11470580101013184,
      "step": 50989
    },
    {
      "epoch": 0.000311212158203125,
      "step": 50989,
      "training_step_time": 0.547276496887207
    },
    {
      "epoch": 0.00031121826171875,
      "grad_norm": 0.08466869592666626,
      "learning_rate": 6.039435195520604e-06,
      "loss": 0.0398,
      "step": 50990
    },
    {
      "epoch": 0.00031121826171875,
      "model_forward_time": 0.11503863334655762,
      "step": 50990
    },
    {
      "epoch": 0.00031121826171875,
      "step": 50990,
      "training_step_time": 0.3986029624938965
    },
    {
      "epoch": 0.000311224365234375,
      "model_forward_time": 0.11490488052368164,
      "step": 50991
    },
    {
      "epoch": 0.000311224365234375,
      "step": 50991,
      "training_step_time": 0.3939025402069092
    },
    {
      "epoch": 0.00031123046875,
      "model_forward_time": 0.11559605598449707,
      "step": 50992
    },
    {
      "epoch": 0.00031123046875,
      "step": 50992,
      "training_step_time": 0.3893609046936035
    },
    {
      "epoch": 0.000311236572265625,
      "model_forward_time": 0.11561226844787598,
      "step": 50993
    },
    {
      "epoch": 0.000311236572265625,
      "step": 50993,
      "training_step_time": 0.41010499000549316
    },
    {
      "epoch": 0.00031124267578125,
      "model_forward_time": 0.11577534675598145,
      "step": 50994
    },
    {
      "epoch": 0.00031124267578125,
      "step": 50994,
      "training_step_time": 0.4085667133331299
    },
    {
      "epoch": 0.000311248779296875,
      "model_forward_time": 0.11561250686645508,
      "step": 50995
    },
    {
      "epoch": 0.000311248779296875,
      "step": 50995,
      "training_step_time": 0.6794283390045166
    },
    {
      "epoch": 0.0003112548828125,
      "model_forward_time": 0.11451935768127441,
      "step": 50996
    },
    {
      "epoch": 0.0003112548828125,
      "step": 50996,
      "training_step_time": 0.4956517219543457
    },
    {
      "epoch": 0.000311260986328125,
      "model_forward_time": 0.11398911476135254,
      "step": 50997
    },
    {
      "epoch": 0.000311260986328125,
      "step": 50997,
      "training_step_time": 0.394014835357666
    },
    {
      "epoch": 0.00031126708984375,
      "model_forward_time": 0.11436986923217773,
      "step": 50998
    },
    {
      "epoch": 0.00031126708984375,
      "step": 50998,
      "training_step_time": 0.4578533172607422
    },
    {
      "epoch": 0.000311273193359375,
      "model_forward_time": 0.11448335647583008,
      "step": 50999
    },
    {
      "epoch": 0.000311273193359375,
      "step": 50999,
      "training_step_time": 0.45429134368896484
    },
    {
      "epoch": 0.000311279296875,
      "grad_norm": 0.09155754745006561,
      "learning_rate": 6.026312439675552e-06,
      "loss": 0.0396,
      "step": 51000
    },
    {
      "epoch": 0.000311279296875,
      "model_forward_time": 0.11289000511169434,
      "step": 51000
    },
    {
      "epoch": 0.000311279296875,
      "step": 51000,
      "training_step_time": 0.360337495803833
    },
    {
      "epoch": 0.000311285400390625,
      "model_forward_time": 0.11233282089233398,
      "step": 51001
    },
    {
      "epoch": 0.000311285400390625,
      "step": 51001,
      "training_step_time": 0.37383389472961426
    },
    {
      "epoch": 0.00031129150390625,
      "model_forward_time": 0.11286067962646484,
      "step": 51002
    },
    {
      "epoch": 0.00031129150390625,
      "step": 51002,
      "training_step_time": 0.3778073787689209
    },
    {
      "epoch": 0.000311297607421875,
      "model_forward_time": 0.11293745040893555,
      "step": 51003
    },
    {
      "epoch": 0.000311297607421875,
      "step": 51003,
      "training_step_time": 0.3736281394958496
    },
    {
      "epoch": 0.0003113037109375,
      "model_forward_time": 0.11384940147399902,
      "step": 51004
    },
    {
      "epoch": 0.0003113037109375,
      "step": 51004,
      "training_step_time": 0.35765624046325684
    },
    {
      "epoch": 0.000311309814453125,
      "model_forward_time": 0.1138148307800293,
      "step": 51005
    },
    {
      "epoch": 0.000311309814453125,
      "step": 51005,
      "training_step_time": 0.41933298110961914
    },
    {
      "epoch": 0.00031131591796875,
      "model_forward_time": 0.11456751823425293,
      "step": 51006
    },
    {
      "epoch": 0.00031131591796875,
      "step": 51006,
      "training_step_time": 0.40715813636779785
    },
    {
      "epoch": 0.000311322021484375,
      "model_forward_time": 0.11394214630126953,
      "step": 51007
    },
    {
      "epoch": 0.000311322021484375,
      "step": 51007,
      "training_step_time": 0.44448161125183105
    },
    {
      "epoch": 0.000311328125,
      "model_forward_time": 0.11457252502441406,
      "step": 51008
    },
    {
      "epoch": 0.000311328125,
      "step": 51008,
      "training_step_time": 0.49575185775756836
    },
    {
      "epoch": 0.000311334228515625,
      "model_forward_time": 0.11416268348693848,
      "step": 51009
    },
    {
      "epoch": 0.000311334228515625,
      "step": 51009,
      "training_step_time": 0.38660573959350586
    },
    {
      "epoch": 0.00031134033203125,
      "grad_norm": 0.1006447896361351,
      "learning_rate": 6.01320304187572e-06,
      "loss": 0.0312,
      "step": 51010
    },
    {
      "epoch": 0.00031134033203125,
      "model_forward_time": 0.11494684219360352,
      "step": 51010
    },
    {
      "epoch": 0.00031134033203125,
      "step": 51010,
      "training_step_time": 0.41640305519104004
    },
    {
      "epoch": 0.000311346435546875,
      "model_forward_time": 0.11413145065307617,
      "step": 51011
    },
    {
      "epoch": 0.000311346435546875,
      "step": 51011,
      "training_step_time": 0.4539151191711426
    },
    {
      "epoch": 0.0003113525390625,
      "model_forward_time": 0.1153860092163086,
      "step": 51012
    },
    {
      "epoch": 0.0003113525390625,
      "step": 51012,
      "training_step_time": 0.46150875091552734
    },
    {
      "epoch": 0.000311358642578125,
      "model_forward_time": 0.11442184448242188,
      "step": 51013
    },
    {
      "epoch": 0.000311358642578125,
      "step": 51013,
      "training_step_time": 0.3950662612915039
    },
    {
      "epoch": 0.00031136474609375,
      "model_forward_time": 0.11489295959472656,
      "step": 51014
    },
    {
      "epoch": 0.00031136474609375,
      "step": 51014,
      "training_step_time": 0.3992476463317871
    },
    {
      "epoch": 0.000311370849609375,
      "model_forward_time": 0.11467647552490234,
      "step": 51015
    },
    {
      "epoch": 0.000311370849609375,
      "step": 51015,
      "training_step_time": 0.3978393077850342
    },
    {
      "epoch": 0.000311376953125,
      "model_forward_time": 0.11537027359008789,
      "step": 51016
    },
    {
      "epoch": 0.000311376953125,
      "step": 51016,
      "training_step_time": 0.3886563777923584
    },
    {
      "epoch": 0.000311383056640625,
      "model_forward_time": 0.1147308349609375,
      "step": 51017
    },
    {
      "epoch": 0.000311383056640625,
      "step": 51017,
      "training_step_time": 0.39418911933898926
    },
    {
      "epoch": 0.00031138916015625,
      "model_forward_time": 0.11474299430847168,
      "step": 51018
    },
    {
      "epoch": 0.00031138916015625,
      "step": 51018,
      "training_step_time": 0.3998396396636963
    },
    {
      "epoch": 0.000311395263671875,
      "model_forward_time": 0.11524796485900879,
      "step": 51019
    },
    {
      "epoch": 0.000311395263671875,
      "step": 51019,
      "training_step_time": 0.47895073890686035
    },
    {
      "epoch": 0.0003114013671875,
      "grad_norm": 0.10444698482751846,
      "learning_rate": 6.0001070061033945e-06,
      "loss": 0.0408,
      "step": 51020
    },
    {
      "epoch": 0.0003114013671875,
      "model_forward_time": 0.11503887176513672,
      "step": 51020
    },
    {
      "epoch": 0.0003114013671875,
      "step": 51020,
      "training_step_time": 0.40981388092041016
    },
    {
      "epoch": 0.000311407470703125,
      "model_forward_time": 0.11489987373352051,
      "step": 51021
    },
    {
      "epoch": 0.000311407470703125,
      "step": 51021,
      "training_step_time": 0.41785168647766113
    },
    {
      "epoch": 0.00031141357421875,
      "model_forward_time": 0.11792564392089844,
      "step": 51022
    },
    {
      "epoch": 0.00031141357421875,
      "step": 51022,
      "training_step_time": 0.4826672077178955
    },
    {
      "epoch": 0.000311419677734375,
      "model_forward_time": 0.1192469596862793,
      "step": 51023
    },
    {
      "epoch": 0.000311419677734375,
      "step": 51023,
      "training_step_time": 0.3821537494659424
    },
    {
      "epoch": 0.00031142578125,
      "model_forward_time": 0.11512041091918945,
      "step": 51024
    },
    {
      "epoch": 0.00031142578125,
      "step": 51024,
      "training_step_time": 0.47524380683898926
    },
    {
      "epoch": 0.000311431884765625,
      "model_forward_time": 0.11474013328552246,
      "step": 51025
    },
    {
      "epoch": 0.000311431884765625,
      "step": 51025,
      "training_step_time": 0.40503573417663574
    },
    {
      "epoch": 0.00031143798828125,
      "model_forward_time": 0.11481571197509766,
      "step": 51026
    },
    {
      "epoch": 0.00031143798828125,
      "step": 51026,
      "training_step_time": 0.40458178520202637
    },
    {
      "epoch": 0.000311444091796875,
      "model_forward_time": 0.11500358581542969,
      "step": 51027
    },
    {
      "epoch": 0.000311444091796875,
      "step": 51027,
      "training_step_time": 0.38134241104125977
    },
    {
      "epoch": 0.0003114501953125,
      "model_forward_time": 0.11487627029418945,
      "step": 51028
    },
    {
      "epoch": 0.0003114501953125,
      "step": 51028,
      "training_step_time": 0.40955328941345215
    },
    {
      "epoch": 0.000311456298828125,
      "model_forward_time": 0.11499452590942383,
      "step": 51029
    },
    {
      "epoch": 0.000311456298828125,
      "step": 51029,
      "training_step_time": 0.3920707702636719
    },
    {
      "epoch": 0.00031146240234375,
      "grad_norm": 0.08969981968402863,
      "learning_rate": 5.9870243363368275e-06,
      "loss": 0.0371,
      "step": 51030
    },
    {
      "epoch": 0.00031146240234375,
      "model_forward_time": 0.11475872993469238,
      "step": 51030
    },
    {
      "epoch": 0.00031146240234375,
      "step": 51030,
      "training_step_time": 0.4125251770019531
    },
    {
      "epoch": 0.000311468505859375,
      "model_forward_time": 0.11493229866027832,
      "step": 51031
    },
    {
      "epoch": 0.000311468505859375,
      "step": 51031,
      "training_step_time": 0.39405345916748047
    },
    {
      "epoch": 0.000311474609375,
      "model_forward_time": 0.11499190330505371,
      "step": 51032
    },
    {
      "epoch": 0.000311474609375,
      "step": 51032,
      "training_step_time": 0.39454030990600586
    },
    {
      "epoch": 0.000311480712890625,
      "model_forward_time": 0.11523199081420898,
      "step": 51033
    },
    {
      "epoch": 0.000311480712890625,
      "step": 51033,
      "training_step_time": 0.3850529193878174
    },
    {
      "epoch": 0.00031148681640625,
      "model_forward_time": 0.1153256893157959,
      "step": 51034
    },
    {
      "epoch": 0.00031148681640625,
      "step": 51034,
      "training_step_time": 0.517935037612915
    },
    {
      "epoch": 0.000311492919921875,
      "model_forward_time": 0.11478495597839355,
      "step": 51035
    },
    {
      "epoch": 0.000311492919921875,
      "step": 51035,
      "training_step_time": 0.5061032772064209
    },
    {
      "epoch": 0.0003114990234375,
      "model_forward_time": 0.11499786376953125,
      "step": 51036
    },
    {
      "epoch": 0.0003114990234375,
      "step": 51036,
      "training_step_time": 0.3981757164001465
    },
    {
      "epoch": 0.000311505126953125,
      "model_forward_time": 0.11462521553039551,
      "step": 51037
    },
    {
      "epoch": 0.000311505126953125,
      "step": 51037,
      "training_step_time": 0.4543912410736084
    },
    {
      "epoch": 0.00031151123046875,
      "model_forward_time": 0.1149749755859375,
      "step": 51038
    },
    {
      "epoch": 0.00031151123046875,
      "step": 51038,
      "training_step_time": 0.44699597358703613
    },
    {
      "epoch": 0.000311517333984375,
      "model_forward_time": 0.1144707202911377,
      "step": 51039
    },
    {
      "epoch": 0.000311517333984375,
      "step": 51039,
      "training_step_time": 0.3903496265411377
    },
    {
      "epoch": 0.0003115234375,
      "grad_norm": 0.08526071161031723,
      "learning_rate": 5.9739550365501494e-06,
      "loss": 0.0372,
      "step": 51040
    },
    {
      "epoch": 0.0003115234375,
      "model_forward_time": 0.1151118278503418,
      "step": 51040
    },
    {
      "epoch": 0.0003115234375,
      "step": 51040,
      "training_step_time": 0.46370792388916016
    },
    {
      "epoch": 0.000311529541015625,
      "model_forward_time": 0.11570572853088379,
      "step": 51041
    },
    {
      "epoch": 0.000311529541015625,
      "step": 51041,
      "training_step_time": 0.3862802982330322
    },
    {
      "epoch": 0.00031153564453125,
      "model_forward_time": 0.11444950103759766,
      "step": 51042
    },
    {
      "epoch": 0.00031153564453125,
      "step": 51042,
      "training_step_time": 0.3855457305908203
    },
    {
      "epoch": 0.000311541748046875,
      "model_forward_time": 0.1155097484588623,
      "step": 51043
    },
    {
      "epoch": 0.000311541748046875,
      "step": 51043,
      "training_step_time": 0.39370012283325195
    },
    {
      "epoch": 0.0003115478515625,
      "model_forward_time": 0.11482810974121094,
      "step": 51044
    },
    {
      "epoch": 0.0003115478515625,
      "step": 51044,
      "training_step_time": 0.39429664611816406
    },
    {
      "epoch": 0.000311553955078125,
      "model_forward_time": 0.11516356468200684,
      "step": 51045
    },
    {
      "epoch": 0.000311553955078125,
      "step": 51045,
      "training_step_time": 0.38167691230773926
    },
    {
      "epoch": 0.00031156005859375,
      "model_forward_time": 0.11505603790283203,
      "step": 51046
    },
    {
      "epoch": 0.00031156005859375,
      "step": 51046,
      "training_step_time": 0.3963277339935303
    },
    {
      "epoch": 0.000311566162109375,
      "model_forward_time": 0.11516618728637695,
      "step": 51047
    },
    {
      "epoch": 0.000311566162109375,
      "step": 51047,
      "training_step_time": 0.40500426292419434
    },
    {
      "epoch": 0.000311572265625,
      "model_forward_time": 0.1166691780090332,
      "step": 51048
    },
    {
      "epoch": 0.000311572265625,
      "step": 51048,
      "training_step_time": 0.6991214752197266
    },
    {
      "epoch": 0.000311578369140625,
      "model_forward_time": 0.11487221717834473,
      "step": 51049
    },
    {
      "epoch": 0.000311578369140625,
      "step": 51049,
      "training_step_time": 0.4755880832672119
    },
    {
      "epoch": 0.00031158447265625,
      "grad_norm": 0.084622323513031,
      "learning_rate": 5.960899110713519e-06,
      "loss": 0.0355,
      "step": 51050
    },
    {
      "epoch": 0.00031158447265625,
      "model_forward_time": 0.11426281929016113,
      "step": 51050
    },
    {
      "epoch": 0.00031158447265625,
      "step": 51050,
      "training_step_time": 0.3890199661254883
    },
    {
      "epoch": 0.000311590576171875,
      "model_forward_time": 0.11488723754882812,
      "step": 51051
    },
    {
      "epoch": 0.000311590576171875,
      "step": 51051,
      "training_step_time": 0.4921267032623291
    },
    {
      "epoch": 0.0003115966796875,
      "model_forward_time": 0.11455178260803223,
      "step": 51052
    },
    {
      "epoch": 0.0003115966796875,
      "step": 51052,
      "training_step_time": 0.38567543029785156
    },
    {
      "epoch": 0.000311602783203125,
      "model_forward_time": 0.11456680297851562,
      "step": 51053
    },
    {
      "epoch": 0.000311602783203125,
      "step": 51053,
      "training_step_time": 0.44524526596069336
    },
    {
      "epoch": 0.00031160888671875,
      "model_forward_time": 0.11549162864685059,
      "step": 51054
    },
    {
      "epoch": 0.00031160888671875,
      "step": 51054,
      "training_step_time": 0.4844200611114502
    },
    {
      "epoch": 0.000311614990234375,
      "model_forward_time": 0.11481833457946777,
      "step": 51055
    },
    {
      "epoch": 0.000311614990234375,
      "step": 51055,
      "training_step_time": 0.3916330337524414
    },
    {
      "epoch": 0.00031162109375,
      "model_forward_time": 0.11453413963317871,
      "step": 51056
    },
    {
      "epoch": 0.00031162109375,
      "step": 51056,
      "training_step_time": 0.39495301246643066
    },
    {
      "epoch": 0.000311627197265625,
      "model_forward_time": 0.11454963684082031,
      "step": 51057
    },
    {
      "epoch": 0.000311627197265625,
      "step": 51057,
      "training_step_time": 0.39850282669067383
    },
    {
      "epoch": 0.00031163330078125,
      "model_forward_time": 0.11587405204772949,
      "step": 51058
    },
    {
      "epoch": 0.00031163330078125,
      "step": 51058,
      "training_step_time": 0.3845083713531494
    },
    {
      "epoch": 0.000311639404296875,
      "model_forward_time": 0.11559081077575684,
      "step": 51059
    },
    {
      "epoch": 0.000311639404296875,
      "step": 51059,
      "training_step_time": 0.39865756034851074
    },
    {
      "epoch": 0.0003116455078125,
      "grad_norm": 0.09119004011154175,
      "learning_rate": 5.947856562792925e-06,
      "loss": 0.0356,
      "step": 51060
    },
    {
      "epoch": 0.0003116455078125,
      "model_forward_time": 0.11489009857177734,
      "step": 51060
    },
    {
      "epoch": 0.0003116455078125,
      "step": 51060,
      "training_step_time": 0.6296234130859375
    },
    {
      "epoch": 0.000311651611328125,
      "model_forward_time": 0.1143336296081543,
      "step": 51061
    },
    {
      "epoch": 0.000311651611328125,
      "step": 51061,
      "training_step_time": 0.3643798828125
    },
    {
      "epoch": 0.00031165771484375,
      "model_forward_time": 0.11496186256408691,
      "step": 51062
    },
    {
      "epoch": 0.00031165771484375,
      "step": 51062,
      "training_step_time": 0.4488530158996582
    },
    {
      "epoch": 0.000311663818359375,
      "model_forward_time": 0.11488509178161621,
      "step": 51063
    },
    {
      "epoch": 0.000311663818359375,
      "step": 51063,
      "training_step_time": 0.4798696041107178
    },
    {
      "epoch": 0.000311669921875,
      "model_forward_time": 0.11463236808776855,
      "step": 51064
    },
    {
      "epoch": 0.000311669921875,
      "step": 51064,
      "training_step_time": 0.47130703926086426
    },
    {
      "epoch": 0.000311676025390625,
      "model_forward_time": 0.1147606372833252,
      "step": 51065
    },
    {
      "epoch": 0.000311676025390625,
      "step": 51065,
      "training_step_time": 0.39528489112854004
    },
    {
      "epoch": 0.00031168212890625,
      "model_forward_time": 0.11480951309204102,
      "step": 51066
    },
    {
      "epoch": 0.00031168212890625,
      "step": 51066,
      "training_step_time": 0.4688262939453125
    },
    {
      "epoch": 0.000311688232421875,
      "model_forward_time": 0.11464095115661621,
      "step": 51067
    },
    {
      "epoch": 0.000311688232421875,
      "step": 51067,
      "training_step_time": 0.3978886604309082
    },
    {
      "epoch": 0.0003116943359375,
      "model_forward_time": 0.11388349533081055,
      "step": 51068
    },
    {
      "epoch": 0.0003116943359375,
      "step": 51068,
      "training_step_time": 0.3925936222076416
    },
    {
      "epoch": 0.000311700439453125,
      "model_forward_time": 0.11424636840820312,
      "step": 51069
    },
    {
      "epoch": 0.000311700439453125,
      "step": 51069,
      "training_step_time": 0.38768529891967773
    },
    {
      "epoch": 0.00031170654296875,
      "grad_norm": 0.0769534632563591,
      "learning_rate": 5.934827396750392e-06,
      "loss": 0.0383,
      "step": 51070
    },
    {
      "epoch": 0.00031170654296875,
      "model_forward_time": 0.11545395851135254,
      "step": 51070
    },
    {
      "epoch": 0.00031170654296875,
      "step": 51070,
      "training_step_time": 0.38415050506591797
    },
    {
      "epoch": 0.000311712646484375,
      "model_forward_time": 0.11509561538696289,
      "step": 51071
    },
    {
      "epoch": 0.000311712646484375,
      "step": 51071,
      "training_step_time": 0.385678768157959
    },
    {
      "epoch": 0.00031171875,
      "model_forward_time": 0.11518168449401855,
      "step": 51072
    },
    {
      "epoch": 0.00031171875,
      "step": 51072,
      "training_step_time": 0.5929427146911621
    },
    {
      "epoch": 0.000311724853515625,
      "model_forward_time": 0.11462998390197754,
      "step": 51073
    },
    {
      "epoch": 0.000311724853515625,
      "step": 51073,
      "training_step_time": 0.39118361473083496
    },
    {
      "epoch": 0.00031173095703125,
      "model_forward_time": 0.11518430709838867,
      "step": 51074
    },
    {
      "epoch": 0.00031173095703125,
      "step": 51074,
      "training_step_time": 0.39954280853271484
    },
    {
      "epoch": 0.000311737060546875,
      "model_forward_time": 0.1151881217956543,
      "step": 51075
    },
    {
      "epoch": 0.000311737060546875,
      "step": 51075,
      "training_step_time": 0.41646528244018555
    },
    {
      "epoch": 0.0003117431640625,
      "model_forward_time": 0.1148676872253418,
      "step": 51076
    },
    {
      "epoch": 0.0003117431640625,
      "step": 51076,
      "training_step_time": 0.3885974884033203
    },
    {
      "epoch": 0.000311749267578125,
      "model_forward_time": 0.11507391929626465,
      "step": 51077
    },
    {
      "epoch": 0.000311749267578125,
      "step": 51077,
      "training_step_time": 0.44094038009643555
    },
    {
      "epoch": 0.00031175537109375,
      "model_forward_time": 0.11712241172790527,
      "step": 51078
    },
    {
      "epoch": 0.00031175537109375,
      "step": 51078,
      "training_step_time": 0.63883376121521
    },
    {
      "epoch": 0.000311761474609375,
      "model_forward_time": 0.11422300338745117,
      "step": 51079
    },
    {
      "epoch": 0.000311761474609375,
      "step": 51079,
      "training_step_time": 0.40731000900268555
    },
    {
      "epoch": 0.000311767578125,
      "grad_norm": 0.119325190782547,
      "learning_rate": 5.921811616543821e-06,
      "loss": 0.0383,
      "step": 51080
    },
    {
      "epoch": 0.000311767578125,
      "model_forward_time": 0.11415314674377441,
      "step": 51080
    },
    {
      "epoch": 0.000311767578125,
      "step": 51080,
      "training_step_time": 0.41625165939331055
    },
    {
      "epoch": 0.000311773681640625,
      "model_forward_time": 0.11470937728881836,
      "step": 51081
    },
    {
      "epoch": 0.000311773681640625,
      "step": 51081,
      "training_step_time": 0.4118657112121582
    },
    {
      "epoch": 0.00031177978515625,
      "model_forward_time": 0.11441588401794434,
      "step": 51082
    },
    {
      "epoch": 0.00031177978515625,
      "step": 51082,
      "training_step_time": 0.48503994941711426
    },
    {
      "epoch": 0.000311785888671875,
      "model_forward_time": 0.11429071426391602,
      "step": 51083
    },
    {
      "epoch": 0.000311785888671875,
      "step": 51083,
      "training_step_time": 0.37921142578125
    },
    {
      "epoch": 0.0003117919921875,
      "model_forward_time": 0.11472654342651367,
      "step": 51084
    },
    {
      "epoch": 0.0003117919921875,
      "step": 51084,
      "training_step_time": 0.5368540287017822
    },
    {
      "epoch": 0.000311798095703125,
      "model_forward_time": 0.11451935768127441,
      "step": 51085
    },
    {
      "epoch": 0.000311798095703125,
      "step": 51085,
      "training_step_time": 0.3860156536102295
    },
    {
      "epoch": 0.00031180419921875,
      "model_forward_time": 0.11490321159362793,
      "step": 51086
    },
    {
      "epoch": 0.00031180419921875,
      "step": 51086,
      "training_step_time": 0.38910746574401855
    },
    {
      "epoch": 0.000311810302734375,
      "model_forward_time": 0.11522293090820312,
      "step": 51087
    },
    {
      "epoch": 0.000311810302734375,
      "step": 51087,
      "training_step_time": 0.38664937019348145
    },
    {
      "epoch": 0.00031181640625,
      "model_forward_time": 0.11500191688537598,
      "step": 51088
    },
    {
      "epoch": 0.00031181640625,
      "step": 51088,
      "training_step_time": 0.38289356231689453
    },
    {
      "epoch": 0.000311822509765625,
      "model_forward_time": 0.11532473564147949,
      "step": 51089
    },
    {
      "epoch": 0.000311822509765625,
      "step": 51089,
      "training_step_time": 0.3980400562286377
    },
    {
      "epoch": 0.00031182861328125,
      "grad_norm": 0.13140703737735748,
      "learning_rate": 5.908809226127054e-06,
      "loss": 0.0394,
      "step": 51090
    },
    {
      "epoch": 0.00031182861328125,
      "model_forward_time": 0.11554288864135742,
      "step": 51090
    },
    {
      "epoch": 0.00031182861328125,
      "step": 51090,
      "training_step_time": 0.7002062797546387
    },
    {
      "epoch": 0.000311834716796875,
      "model_forward_time": 0.11545872688293457,
      "step": 51091
    },
    {
      "epoch": 0.000311834716796875,
      "step": 51091,
      "training_step_time": 0.4313344955444336
    },
    {
      "epoch": 0.0003118408203125,
      "model_forward_time": 0.1145939826965332,
      "step": 51092
    },
    {
      "epoch": 0.0003118408203125,
      "step": 51092,
      "training_step_time": 0.408937931060791
    },
    {
      "epoch": 0.000311846923828125,
      "model_forward_time": 0.11431407928466797,
      "step": 51093
    },
    {
      "epoch": 0.000311846923828125,
      "step": 51093,
      "training_step_time": 0.3874835968017578
    },
    {
      "epoch": 0.00031185302734375,
      "model_forward_time": 0.11523938179016113,
      "step": 51094
    },
    {
      "epoch": 0.00031185302734375,
      "step": 51094,
      "training_step_time": 0.4252505302429199
    },
    {
      "epoch": 0.000311859130859375,
      "model_forward_time": 0.11347508430480957,
      "step": 51095
    },
    {
      "epoch": 0.000311859130859375,
      "step": 51095,
      "training_step_time": 0.3977973461151123
    },
    {
      "epoch": 0.000311865234375,
      "model_forward_time": 0.11550354957580566,
      "step": 51096
    },
    {
      "epoch": 0.000311865234375,
      "step": 51096,
      "training_step_time": 0.7132580280303955
    },
    {
      "epoch": 0.000311871337890625,
      "model_forward_time": 0.11441540718078613,
      "step": 51097
    },
    {
      "epoch": 0.000311871337890625,
      "step": 51097,
      "training_step_time": 0.3885025978088379
    },
    {
      "epoch": 0.00031187744140625,
      "model_forward_time": 0.11482071876525879,
      "step": 51098
    },
    {
      "epoch": 0.00031187744140625,
      "step": 51098,
      "training_step_time": 0.38341212272644043
    },
    {
      "epoch": 0.000311883544921875,
      "model_forward_time": 0.11342835426330566,
      "step": 51099
    },
    {
      "epoch": 0.000311883544921875,
      "step": 51099,
      "training_step_time": 0.38378047943115234
    },
    {
      "epoch": 0.0003118896484375,
      "grad_norm": 0.10814616084098816,
      "learning_rate": 5.895820229449906e-06,
      "loss": 0.0346,
      "step": 51100
    },
    {
      "epoch": 0.0003118896484375,
      "model_forward_time": 0.11454939842224121,
      "step": 51100
    },
    {
      "epoch": 0.0003118896484375,
      "step": 51100,
      "training_step_time": 0.3835272789001465
    },
    {
      "epoch": 0.000311895751953125,
      "model_forward_time": 0.1151421070098877,
      "step": 51101
    },
    {
      "epoch": 0.000311895751953125,
      "step": 51101,
      "training_step_time": 0.38649511337280273
    },
    {
      "epoch": 0.00031190185546875,
      "model_forward_time": 0.11517715454101562,
      "step": 51102
    },
    {
      "epoch": 0.00031190185546875,
      "step": 51102,
      "training_step_time": 0.6988208293914795
    },
    {
      "epoch": 0.000311907958984375,
      "model_forward_time": 0.1151888370513916,
      "step": 51103
    },
    {
      "epoch": 0.000311907958984375,
      "step": 51103,
      "training_step_time": 0.3956160545349121
    },
    {
      "epoch": 0.0003119140625,
      "model_forward_time": 0.1146245002746582,
      "step": 51104
    },
    {
      "epoch": 0.0003119140625,
      "step": 51104,
      "training_step_time": 0.4870171546936035
    },
    {
      "epoch": 0.000311920166015625,
      "model_forward_time": 0.11477780342102051,
      "step": 51105
    },
    {
      "epoch": 0.000311920166015625,
      "step": 51105,
      "training_step_time": 0.41932177543640137
    },
    {
      "epoch": 0.00031192626953125,
      "model_forward_time": 0.11543941497802734,
      "step": 51106
    },
    {
      "epoch": 0.00031192626953125,
      "step": 51106,
      "training_step_time": 0.41775035858154297
    },
    {
      "epoch": 0.000311932373046875,
      "model_forward_time": 0.1145482063293457,
      "step": 51107
    },
    {
      "epoch": 0.000311932373046875,
      "step": 51107,
      "training_step_time": 0.3920886516571045
    },
    {
      "epoch": 0.0003119384765625,
      "model_forward_time": 0.11489677429199219,
      "step": 51108
    },
    {
      "epoch": 0.0003119384765625,
      "step": 51108,
      "training_step_time": 0.47691845893859863
    },
    {
      "epoch": 0.000311944580078125,
      "model_forward_time": 0.11509346961975098,
      "step": 51109
    },
    {
      "epoch": 0.000311944580078125,
      "step": 51109,
      "training_step_time": 0.40817689895629883
    },
    {
      "epoch": 0.00031195068359375,
      "grad_norm": 0.10726378113031387,
      "learning_rate": 5.882844630458045e-06,
      "loss": 0.0413,
      "step": 51110
    },
    {
      "epoch": 0.00031195068359375,
      "model_forward_time": 0.11534357070922852,
      "step": 51110
    },
    {
      "epoch": 0.00031195068359375,
      "step": 51110,
      "training_step_time": 0.4094269275665283
    },
    {
      "epoch": 0.000311956787109375,
      "model_forward_time": 0.11479520797729492,
      "step": 51111
    },
    {
      "epoch": 0.000311956787109375,
      "step": 51111,
      "training_step_time": 0.38558077812194824
    },
    {
      "epoch": 0.000311962890625,
      "model_forward_time": 0.11506462097167969,
      "step": 51112
    },
    {
      "epoch": 0.000311962890625,
      "step": 51112,
      "training_step_time": 0.3849790096282959
    },
    {
      "epoch": 0.000311968994140625,
      "model_forward_time": 0.11525368690490723,
      "step": 51113
    },
    {
      "epoch": 0.000311968994140625,
      "step": 51113,
      "training_step_time": 0.3907132148742676
    },
    {
      "epoch": 0.00031197509765625,
      "model_forward_time": 0.11505603790283203,
      "step": 51114
    },
    {
      "epoch": 0.00031197509765625,
      "step": 51114,
      "training_step_time": 0.7325530052185059
    },
    {
      "epoch": 0.000311981201171875,
      "model_forward_time": 0.1143028736114502,
      "step": 51115
    },
    {
      "epoch": 0.000311981201171875,
      "step": 51115,
      "training_step_time": 0.38977837562561035
    },
    {
      "epoch": 0.0003119873046875,
      "model_forward_time": 0.11525678634643555,
      "step": 51116
    },
    {
      "epoch": 0.0003119873046875,
      "step": 51116,
      "training_step_time": 0.39040446281433105
    },
    {
      "epoch": 0.000311993408203125,
      "model_forward_time": 0.11447286605834961,
      "step": 51117
    },
    {
      "epoch": 0.000311993408203125,
      "step": 51117,
      "training_step_time": 0.36634135246276855
    },
    {
      "epoch": 0.00031199951171875,
      "model_forward_time": 0.11468815803527832,
      "step": 51118
    },
    {
      "epoch": 0.00031199951171875,
      "step": 51118,
      "training_step_time": 0.44186997413635254
    },
    {
      "epoch": 0.000312005615234375,
      "model_forward_time": 0.11472964286804199,
      "step": 51119
    },
    {
      "epoch": 0.000312005615234375,
      "step": 51119,
      "training_step_time": 0.43769192695617676
    },
    {
      "epoch": 0.00031201171875,
      "grad_norm": 0.08955242484807968,
      "learning_rate": 5.869882433093155e-06,
      "loss": 0.0333,
      "step": 51120
    },
    {
      "epoch": 0.00031201171875,
      "model_forward_time": 0.1149744987487793,
      "step": 51120
    },
    {
      "epoch": 0.00031201171875,
      "step": 51120,
      "training_step_time": 0.606600284576416
    },
    {
      "epoch": 0.000312017822265625,
      "model_forward_time": 0.11452722549438477,
      "step": 51121
    },
    {
      "epoch": 0.000312017822265625,
      "step": 51121,
      "training_step_time": 0.38848328590393066
    },
    {
      "epoch": 0.00031202392578125,
      "model_forward_time": 0.11443161964416504,
      "step": 51122
    },
    {
      "epoch": 0.00031202392578125,
      "step": 51122,
      "training_step_time": 0.43860530853271484
    },
    {
      "epoch": 0.000312030029296875,
      "model_forward_time": 0.11448502540588379,
      "step": 51123
    },
    {
      "epoch": 0.000312030029296875,
      "step": 51123,
      "training_step_time": 0.39426732063293457
    },
    {
      "epoch": 0.0003120361328125,
      "model_forward_time": 0.11519861221313477,
      "step": 51124
    },
    {
      "epoch": 0.0003120361328125,
      "step": 51124,
      "training_step_time": 0.4863705635070801
    },
    {
      "epoch": 0.000312042236328125,
      "model_forward_time": 0.11470198631286621,
      "step": 51125
    },
    {
      "epoch": 0.000312042236328125,
      "step": 51125,
      "training_step_time": 0.3880641460418701
    },
    {
      "epoch": 0.00031204833984375,
      "model_forward_time": 0.11499667167663574,
      "step": 51126
    },
    {
      "epoch": 0.00031204833984375,
      "step": 51126,
      "training_step_time": 0.534498929977417
    },
    {
      "epoch": 0.000312054443359375,
      "model_forward_time": 0.11538982391357422,
      "step": 51127
    },
    {
      "epoch": 0.000312054443359375,
      "step": 51127,
      "training_step_time": 0.39084935188293457
    },
    {
      "epoch": 0.000312060546875,
      "model_forward_time": 0.11443591117858887,
      "step": 51128
    },
    {
      "epoch": 0.000312060546875,
      "step": 51128,
      "training_step_time": 0.3873484134674072
    },
    {
      "epoch": 0.000312066650390625,
      "model_forward_time": 0.1148829460144043,
      "step": 51129
    },
    {
      "epoch": 0.000312066650390625,
      "step": 51129,
      "training_step_time": 0.40146517753601074
    },
    {
      "epoch": 0.00031207275390625,
      "grad_norm": 0.10524026304483414,
      "learning_rate": 5.856933641292789e-06,
      "loss": 0.0387,
      "step": 51130
    },
    {
      "epoch": 0.00031207275390625,
      "model_forward_time": 0.11626243591308594,
      "step": 51130
    },
    {
      "epoch": 0.00031207275390625,
      "step": 51130,
      "training_step_time": 0.3982579708099365
    },
    {
      "epoch": 0.000312078857421875,
      "model_forward_time": 0.1147150993347168,
      "step": 51131
    },
    {
      "epoch": 0.000312078857421875,
      "step": 51131,
      "training_step_time": 0.36571240425109863
    },
    {
      "epoch": 0.0003120849609375,
      "model_forward_time": 0.11541056632995605,
      "step": 51132
    },
    {
      "epoch": 0.0003120849609375,
      "step": 51132,
      "training_step_time": 0.64748215675354
    },
    {
      "epoch": 0.000312091064453125,
      "model_forward_time": 0.11453366279602051,
      "step": 51133
    },
    {
      "epoch": 0.000312091064453125,
      "step": 51133,
      "training_step_time": 0.4887278079986572
    },
    {
      "epoch": 0.00031209716796875,
      "model_forward_time": 0.1144266128540039,
      "step": 51134
    },
    {
      "epoch": 0.00031209716796875,
      "step": 51134,
      "training_step_time": 0.38419532775878906
    },
    {
      "epoch": 0.000312103271484375,
      "model_forward_time": 0.11427950859069824,
      "step": 51135
    },
    {
      "epoch": 0.000312103271484375,
      "step": 51135,
      "training_step_time": 0.39203763008117676
    },
    {
      "epoch": 0.000312109375,
      "model_forward_time": 0.11507773399353027,
      "step": 51136
    },
    {
      "epoch": 0.000312109375,
      "step": 51136,
      "training_step_time": 0.4385204315185547
    },
    {
      "epoch": 0.000312115478515625,
      "model_forward_time": 0.11425113677978516,
      "step": 51137
    },
    {
      "epoch": 0.000312115478515625,
      "step": 51137,
      "training_step_time": 0.3963174819946289
    },
    {
      "epoch": 0.00031212158203125,
      "model_forward_time": 0.11481356620788574,
      "step": 51138
    },
    {
      "epoch": 0.00031212158203125,
      "step": 51138,
      "training_step_time": 0.4925198554992676
    },
    {
      "epoch": 0.000312127685546875,
      "model_forward_time": 0.11484098434448242,
      "step": 51139
    },
    {
      "epoch": 0.000312127685546875,
      "step": 51139,
      "training_step_time": 0.3978710174560547
    },
    {
      "epoch": 0.0003121337890625,
      "grad_norm": 0.09508942812681198,
      "learning_rate": 5.843998258990452e-06,
      "loss": 0.036,
      "step": 51140
    },
    {
      "epoch": 0.0003121337890625,
      "model_forward_time": 0.1150519847869873,
      "step": 51140
    },
    {
      "epoch": 0.0003121337890625,
      "step": 51140,
      "training_step_time": 0.37550926208496094
    },
    {
      "epoch": 0.000312139892578125,
      "model_forward_time": 0.11495685577392578,
      "step": 51141
    },
    {
      "epoch": 0.000312139892578125,
      "step": 51141,
      "training_step_time": 0.38789892196655273
    },
    {
      "epoch": 0.00031214599609375,
      "model_forward_time": 0.11519312858581543,
      "step": 51142
    },
    {
      "epoch": 0.00031214599609375,
      "step": 51142,
      "training_step_time": 0.39309215545654297
    },
    {
      "epoch": 0.000312152099609375,
      "model_forward_time": 0.11546874046325684,
      "step": 51143
    },
    {
      "epoch": 0.000312152099609375,
      "step": 51143,
      "training_step_time": 0.40471506118774414
    },
    {
      "epoch": 0.000312158203125,
      "model_forward_time": 0.1146392822265625,
      "step": 51144
    },
    {
      "epoch": 0.000312158203125,
      "step": 51144,
      "training_step_time": 0.5639541149139404
    },
    {
      "epoch": 0.000312164306640625,
      "model_forward_time": 0.11491131782531738,
      "step": 51145
    },
    {
      "epoch": 0.000312164306640625,
      "step": 51145,
      "training_step_time": 0.43727755546569824
    },
    {
      "epoch": 0.00031217041015625,
      "model_forward_time": 0.11504006385803223,
      "step": 51146
    },
    {
      "epoch": 0.00031217041015625,
      "step": 51146,
      "training_step_time": 0.4304959774017334
    },
    {
      "epoch": 0.000312176513671875,
      "model_forward_time": 0.11523556709289551,
      "step": 51147
    },
    {
      "epoch": 0.000312176513671875,
      "step": 51147,
      "training_step_time": 0.48315930366516113
    },
    {
      "epoch": 0.0003121826171875,
      "model_forward_time": 0.11568927764892578,
      "step": 51148
    },
    {
      "epoch": 0.0003121826171875,
      "step": 51148,
      "training_step_time": 0.38750386238098145
    },
    {
      "epoch": 0.000312188720703125,
      "model_forward_time": 0.11501955986022949,
      "step": 51149
    },
    {
      "epoch": 0.000312188720703125,
      "step": 51149,
      "training_step_time": 0.38814544677734375
    },
    {
      "epoch": 0.00031219482421875,
      "grad_norm": 0.08000552654266357,
      "learning_rate": 5.831076290115573e-06,
      "loss": 0.034,
      "step": 51150
    },
    {
      "epoch": 0.00031219482421875,
      "model_forward_time": 0.11542010307312012,
      "step": 51150
    },
    {
      "epoch": 0.00031219482421875,
      "step": 51150,
      "training_step_time": 0.5366554260253906
    },
    {
      "epoch": 0.000312200927734375,
      "model_forward_time": 0.11507248878479004,
      "step": 51151
    },
    {
      "epoch": 0.000312200927734375,
      "step": 51151,
      "training_step_time": 0.39321398735046387
    },
    {
      "epoch": 0.00031220703125,
      "model_forward_time": 0.11470985412597656,
      "step": 51152
    },
    {
      "epoch": 0.00031220703125,
      "step": 51152,
      "training_step_time": 0.4668867588043213
    },
    {
      "epoch": 0.000312213134765625,
      "model_forward_time": 0.11484217643737793,
      "step": 51153
    },
    {
      "epoch": 0.000312213134765625,
      "step": 51153,
      "training_step_time": 0.3814268112182617
    },
    {
      "epoch": 0.00031221923828125,
      "model_forward_time": 0.11606788635253906,
      "step": 51154
    },
    {
      "epoch": 0.00031221923828125,
      "step": 51154,
      "training_step_time": 0.3967270851135254
    },
    {
      "epoch": 0.000312225341796875,
      "model_forward_time": 0.11493563652038574,
      "step": 51155
    },
    {
      "epoch": 0.000312225341796875,
      "step": 51155,
      "training_step_time": 0.4028332233428955
    },
    {
      "epoch": 0.0003122314453125,
      "model_forward_time": 0.11615967750549316,
      "step": 51156
    },
    {
      "epoch": 0.0003122314453125,
      "step": 51156,
      "training_step_time": 0.6484894752502441
    },
    {
      "epoch": 0.000312237548828125,
      "model_forward_time": 0.11490964889526367,
      "step": 51157
    },
    {
      "epoch": 0.000312237548828125,
      "step": 51157,
      "training_step_time": 0.3947162628173828
    },
    {
      "epoch": 0.00031224365234375,
      "model_forward_time": 0.11485815048217773,
      "step": 51158
    },
    {
      "epoch": 0.00031224365234375,
      "step": 51158,
      "training_step_time": 0.3901631832122803
    },
    {
      "epoch": 0.000312249755859375,
      "model_forward_time": 0.11484336853027344,
      "step": 51159
    },
    {
      "epoch": 0.000312249755859375,
      "step": 51159,
      "training_step_time": 0.37978363037109375
    },
    {
      "epoch": 0.000312255859375,
      "grad_norm": 0.12346978485584259,
      "learning_rate": 5.818167738593505e-06,
      "loss": 0.0359,
      "step": 51160
    },
    {
      "epoch": 0.000312255859375,
      "model_forward_time": 0.11417889595031738,
      "step": 51160
    },
    {
      "epoch": 0.000312255859375,
      "step": 51160,
      "training_step_time": 0.43028998374938965
    },
    {
      "epoch": 0.000312261962890625,
      "model_forward_time": 0.11476850509643555,
      "step": 51161
    },
    {
      "epoch": 0.000312261962890625,
      "step": 51161,
      "training_step_time": 0.47930359840393066
    },
    {
      "epoch": 0.00031226806640625,
      "model_forward_time": 0.11510276794433594,
      "step": 51162
    },
    {
      "epoch": 0.00031226806640625,
      "step": 51162,
      "training_step_time": 0.4856135845184326
    },
    {
      "epoch": 0.000312274169921875,
      "model_forward_time": 0.11543822288513184,
      "step": 51163
    },
    {
      "epoch": 0.000312274169921875,
      "step": 51163,
      "training_step_time": 0.39570069313049316
    },
    {
      "epoch": 0.0003122802734375,
      "model_forward_time": 0.11609768867492676,
      "step": 51164
    },
    {
      "epoch": 0.0003122802734375,
      "step": 51164,
      "training_step_time": 0.40030813217163086
    },
    {
      "epoch": 0.000312286376953125,
      "model_forward_time": 0.11504578590393066,
      "step": 51165
    },
    {
      "epoch": 0.000312286376953125,
      "step": 51165,
      "training_step_time": 0.39124441146850586
    },
    {
      "epoch": 0.00031229248046875,
      "model_forward_time": 0.11426520347595215,
      "step": 51166
    },
    {
      "epoch": 0.00031229248046875,
      "step": 51166,
      "training_step_time": 0.4058089256286621
    },
    {
      "epoch": 0.000312298583984375,
      "model_forward_time": 0.11510610580444336,
      "step": 51167
    },
    {
      "epoch": 0.000312298583984375,
      "step": 51167,
      "training_step_time": 0.40309906005859375
    },
    {
      "epoch": 0.0003123046875,
      "model_forward_time": 0.11443185806274414,
      "step": 51168
    },
    {
      "epoch": 0.0003123046875,
      "step": 51168,
      "training_step_time": 0.6529779434204102
    },
    {
      "epoch": 0.000312310791015625,
      "model_forward_time": 0.11460995674133301,
      "step": 51169
    },
    {
      "epoch": 0.000312310791015625,
      "step": 51169,
      "training_step_time": 0.43160057067871094
    },
    {
      "epoch": 0.00031231689453125,
      "grad_norm": 0.09379824995994568,
      "learning_rate": 5.805272608345513e-06,
      "loss": 0.04,
      "step": 51170
    },
    {
      "epoch": 0.00031231689453125,
      "model_forward_time": 0.1153106689453125,
      "step": 51170
    },
    {
      "epoch": 0.00031231689453125,
      "step": 51170,
      "training_step_time": 0.39815783500671387
    },
    {
      "epoch": 0.000312322998046875,
      "model_forward_time": 0.11420416831970215,
      "step": 51171
    },
    {
      "epoch": 0.000312322998046875,
      "step": 51171,
      "training_step_time": 0.3891587257385254
    },
    {
      "epoch": 0.0003123291015625,
      "model_forward_time": 0.1146399974822998,
      "step": 51172
    },
    {
      "epoch": 0.0003123291015625,
      "step": 51172,
      "training_step_time": 0.3958771228790283
    },
    {
      "epoch": 0.000312335205078125,
      "model_forward_time": 0.11435461044311523,
      "step": 51173
    },
    {
      "epoch": 0.000312335205078125,
      "step": 51173,
      "training_step_time": 0.36383557319641113
    },
    {
      "epoch": 0.00031234130859375,
      "model_forward_time": 0.11495494842529297,
      "step": 51174
    },
    {
      "epoch": 0.00031234130859375,
      "step": 51174,
      "training_step_time": 0.658062219619751
    },
    {
      "epoch": 0.000312347412109375,
      "model_forward_time": 0.11487793922424316,
      "step": 51175
    },
    {
      "epoch": 0.000312347412109375,
      "step": 51175,
      "training_step_time": 0.4759340286254883
    },
    {
      "epoch": 0.000312353515625,
      "model_forward_time": 0.11471939086914062,
      "step": 51176
    },
    {
      "epoch": 0.000312353515625,
      "step": 51176,
      "training_step_time": 0.43401670455932617
    },
    {
      "epoch": 0.000312359619140625,
      "model_forward_time": 0.11412763595581055,
      "step": 51177
    },
    {
      "epoch": 0.000312359619140625,
      "step": 51177,
      "training_step_time": 0.380932092666626
    },
    {
      "epoch": 0.00031236572265625,
      "model_forward_time": 0.11434412002563477,
      "step": 51178
    },
    {
      "epoch": 0.00031236572265625,
      "step": 51178,
      "training_step_time": 0.38303661346435547
    },
    {
      "epoch": 0.000312371826171875,
      "model_forward_time": 0.1152644157409668,
      "step": 51179
    },
    {
      "epoch": 0.000312371826171875,
      "step": 51179,
      "training_step_time": 0.38770532608032227
    },
    {
      "epoch": 0.0003123779296875,
      "grad_norm": 0.10774870216846466,
      "learning_rate": 5.79239090328883e-06,
      "loss": 0.0361,
      "step": 51180
    },
    {
      "epoch": 0.0003123779296875,
      "model_forward_time": 0.11485695838928223,
      "step": 51180
    },
    {
      "epoch": 0.0003123779296875,
      "step": 51180,
      "training_step_time": 0.5885131359100342
    },
    {
      "epoch": 0.000312384033203125,
      "model_forward_time": 0.11443781852722168,
      "step": 51181
    },
    {
      "epoch": 0.000312384033203125,
      "step": 51181,
      "training_step_time": 0.4089169502258301
    },
    {
      "epoch": 0.00031239013671875,
      "model_forward_time": 0.11493754386901855,
      "step": 51182
    },
    {
      "epoch": 0.00031239013671875,
      "step": 51182,
      "training_step_time": 0.4382343292236328
    },
    {
      "epoch": 0.000312396240234375,
      "model_forward_time": 0.11493825912475586,
      "step": 51183
    },
    {
      "epoch": 0.000312396240234375,
      "step": 51183,
      "training_step_time": 0.4057173728942871
    },
    {
      "epoch": 0.00031240234375,
      "model_forward_time": 0.11519551277160645,
      "step": 51184
    },
    {
      "epoch": 0.00031240234375,
      "step": 51184,
      "training_step_time": 0.38930416107177734
    },
    {
      "epoch": 0.000312408447265625,
      "model_forward_time": 0.11533069610595703,
      "step": 51185
    },
    {
      "epoch": 0.000312408447265625,
      "step": 51185,
      "training_step_time": 0.37849879264831543
    },
    {
      "epoch": 0.00031241455078125,
      "model_forward_time": 0.11481213569641113,
      "step": 51186
    },
    {
      "epoch": 0.00031241455078125,
      "step": 51186,
      "training_step_time": 0.6854867935180664
    },
    {
      "epoch": 0.000312420654296875,
      "model_forward_time": 0.11400842666625977,
      "step": 51187
    },
    {
      "epoch": 0.000312420654296875,
      "step": 51187,
      "training_step_time": 0.5027577877044678
    },
    {
      "epoch": 0.0003124267578125,
      "model_forward_time": 0.11438226699829102,
      "step": 51188
    },
    {
      "epoch": 0.0003124267578125,
      "step": 51188,
      "training_step_time": 0.48736572265625
    },
    {
      "epoch": 0.000312432861328125,
      "model_forward_time": 0.11444354057312012,
      "step": 51189
    },
    {
      "epoch": 0.000312432861328125,
      "step": 51189,
      "training_step_time": 0.44119739532470703
    },
    {
      "epoch": 0.00031243896484375,
      "grad_norm": 0.09292029589414597,
      "learning_rate": 5.779522627336537e-06,
      "loss": 0.0375,
      "step": 51190
    },
    {
      "epoch": 0.00031243896484375,
      "model_forward_time": 0.11464500427246094,
      "step": 51190
    },
    {
      "epoch": 0.00031243896484375,
      "step": 51190,
      "training_step_time": 0.3822672367095947
    },
    {
      "epoch": 0.000312445068359375,
      "model_forward_time": 0.11439371109008789,
      "step": 51191
    },
    {
      "epoch": 0.000312445068359375,
      "step": 51191,
      "training_step_time": 0.38745975494384766
    },
    {
      "epoch": 0.000312451171875,
      "model_forward_time": 0.11458706855773926,
      "step": 51192
    },
    {
      "epoch": 0.000312451171875,
      "step": 51192,
      "training_step_time": 0.47751784324645996
    },
    {
      "epoch": 0.000312457275390625,
      "model_forward_time": 0.11413264274597168,
      "step": 51193
    },
    {
      "epoch": 0.000312457275390625,
      "step": 51193,
      "training_step_time": 0.4051685333251953
    },
    {
      "epoch": 0.00031246337890625,
      "model_forward_time": 0.1146702766418457,
      "step": 51194
    },
    {
      "epoch": 0.00031246337890625,
      "step": 51194,
      "training_step_time": 0.4023427963256836
    },
    {
      "epoch": 0.000312469482421875,
      "model_forward_time": 0.11604857444763184,
      "step": 51195
    },
    {
      "epoch": 0.000312469482421875,
      "step": 51195,
      "training_step_time": 0.43412256240844727
    },
    {
      "epoch": 0.0003124755859375,
      "model_forward_time": 0.1153116226196289,
      "step": 51196
    },
    {
      "epoch": 0.0003124755859375,
      "step": 51196,
      "training_step_time": 0.41333556175231934
    },
    {
      "epoch": 0.000312481689453125,
      "model_forward_time": 0.1152639389038086,
      "step": 51197
    },
    {
      "epoch": 0.000312481689453125,
      "step": 51197,
      "training_step_time": 0.39563870429992676
    },
    {
      "epoch": 0.00031248779296875,
      "model_forward_time": 0.11458826065063477,
      "step": 51198
    },
    {
      "epoch": 0.00031248779296875,
      "step": 51198,
      "training_step_time": 0.4969029426574707
    },
    {
      "epoch": 0.000312493896484375,
      "model_forward_time": 0.11536264419555664,
      "step": 51199
    },
    {
      "epoch": 0.000312493896484375,
      "step": 51199,
      "training_step_time": 0.3883662223815918
    },
    {
      "epoch": 0.0003125,
      "grad_norm": 0.11560916900634766,
      "learning_rate": 5.766667784397706e-06,
      "loss": 0.0367,
      "step": 51200
    },
    {
      "epoch": 0.0003125,
      "model_forward_time": 0.11476278305053711,
      "step": 51200
    },
    {
      "epoch": 0.0003125,
      "step": 51200,
      "training_step_time": 0.3873577117919922
    },
    {
      "epoch": 0.000312506103515625,
      "model_forward_time": 0.11505675315856934,
      "step": 51201
    },
    {
      "epoch": 0.000312506103515625,
      "step": 51201,
      "training_step_time": 0.5154733657836914
    },
    {
      "epoch": 0.00031251220703125,
      "model_forward_time": 0.11513185501098633,
      "step": 51202
    },
    {
      "epoch": 0.00031251220703125,
      "step": 51202,
      "training_step_time": 0.4788036346435547
    },
    {
      "epoch": 0.000312518310546875,
      "model_forward_time": 0.11441946029663086,
      "step": 51203
    },
    {
      "epoch": 0.000312518310546875,
      "step": 51203,
      "training_step_time": 0.4519782066345215
    },
    {
      "epoch": 0.0003125244140625,
      "model_forward_time": 0.11539101600646973,
      "step": 51204
    },
    {
      "epoch": 0.0003125244140625,
      "step": 51204,
      "training_step_time": 0.5182664394378662
    },
    {
      "epoch": 0.000312530517578125,
      "model_forward_time": 0.11405563354492188,
      "step": 51205
    },
    {
      "epoch": 0.000312530517578125,
      "step": 51205,
      "training_step_time": 0.39410924911499023
    },
    {
      "epoch": 0.00031253662109375,
      "model_forward_time": 0.11451387405395508,
      "step": 51206
    },
    {
      "epoch": 0.00031253662109375,
      "step": 51206,
      "training_step_time": 0.4045710563659668
    },
    {
      "epoch": 0.000312542724609375,
      "model_forward_time": 0.11472821235656738,
      "step": 51207
    },
    {
      "epoch": 0.000312542724609375,
      "step": 51207,
      "training_step_time": 0.45180654525756836
    },
    {
      "epoch": 0.000312548828125,
      "model_forward_time": 0.1147913932800293,
      "step": 51208
    },
    {
      "epoch": 0.000312548828125,
      "step": 51208,
      "training_step_time": 0.43367552757263184
    },
    {
      "epoch": 0.000312554931640625,
      "model_forward_time": 0.11467313766479492,
      "step": 51209
    },
    {
      "epoch": 0.000312554931640625,
      "step": 51209,
      "training_step_time": 0.44914889335632324
    },
    {
      "epoch": 0.00031256103515625,
      "grad_norm": 0.09243708103895187,
      "learning_rate": 5.753826378377286e-06,
      "loss": 0.0328,
      "step": 51210
    },
    {
      "epoch": 0.00031256103515625,
      "model_forward_time": 0.1145162582397461,
      "step": 51210
    },
    {
      "epoch": 0.00031256103515625,
      "step": 51210,
      "training_step_time": 0.4640786647796631
    },
    {
      "epoch": 0.000312567138671875,
      "model_forward_time": 0.1144258975982666,
      "step": 51211
    },
    {
      "epoch": 0.000312567138671875,
      "step": 51211,
      "training_step_time": 0.398601770401001
    },
    {
      "epoch": 0.0003125732421875,
      "model_forward_time": 0.11527609825134277,
      "step": 51212
    },
    {
      "epoch": 0.0003125732421875,
      "step": 51212,
      "training_step_time": 0.3967278003692627
    },
    {
      "epoch": 0.000312579345703125,
      "model_forward_time": 0.11486554145812988,
      "step": 51213
    },
    {
      "epoch": 0.000312579345703125,
      "step": 51213,
      "training_step_time": 0.40201783180236816
    },
    {
      "epoch": 0.00031258544921875,
      "model_forward_time": 0.1147620677947998,
      "step": 51214
    },
    {
      "epoch": 0.00031258544921875,
      "step": 51214,
      "training_step_time": 0.3911397457122803
    },
    {
      "epoch": 0.000312591552734375,
      "model_forward_time": 0.1155250072479248,
      "step": 51215
    },
    {
      "epoch": 0.000312591552734375,
      "step": 51215,
      "training_step_time": 0.42159318923950195
    },
    {
      "epoch": 0.00031259765625,
      "model_forward_time": 0.1157999038696289,
      "step": 51216
    },
    {
      "epoch": 0.00031259765625,
      "step": 51216,
      "training_step_time": 0.6797325611114502
    },
    {
      "epoch": 0.000312603759765625,
      "model_forward_time": 0.11400818824768066,
      "step": 51217
    },
    {
      "epoch": 0.000312603759765625,
      "step": 51217,
      "training_step_time": 0.44904613494873047
    },
    {
      "epoch": 0.00031260986328125,
      "model_forward_time": 0.11440920829772949,
      "step": 51218
    },
    {
      "epoch": 0.00031260986328125,
      "step": 51218,
      "training_step_time": 0.4068777561187744
    },
    {
      "epoch": 0.000312615966796875,
      "model_forward_time": 0.11534500122070312,
      "step": 51219
    },
    {
      "epoch": 0.000312615966796875,
      "step": 51219,
      "training_step_time": 0.37952685356140137
    },
    {
      "epoch": 0.0003126220703125,
      "grad_norm": 0.1228109672665596,
      "learning_rate": 5.740998413176163e-06,
      "loss": 0.0371,
      "step": 51220
    },
    {
      "epoch": 0.0003126220703125,
      "model_forward_time": 0.11471843719482422,
      "step": 51220
    },
    {
      "epoch": 0.0003126220703125,
      "step": 51220,
      "training_step_time": 0.44343113899230957
    },
    {
      "epoch": 0.000312628173828125,
      "model_forward_time": 0.11504244804382324,
      "step": 51221
    },
    {
      "epoch": 0.000312628173828125,
      "step": 51221,
      "training_step_time": 0.4334867000579834
    },
    {
      "epoch": 0.00031263427734375,
      "model_forward_time": 0.11458945274353027,
      "step": 51222
    },
    {
      "epoch": 0.00031263427734375,
      "step": 51222,
      "training_step_time": 0.5273706912994385
    },
    {
      "epoch": 0.000312640380859375,
      "model_forward_time": 0.11524629592895508,
      "step": 51223
    },
    {
      "epoch": 0.000312640380859375,
      "step": 51223,
      "training_step_time": 0.3902873992919922
    },
    {
      "epoch": 0.000312646484375,
      "model_forward_time": 0.11465930938720703,
      "step": 51224
    },
    {
      "epoch": 0.000312646484375,
      "step": 51224,
      "training_step_time": 0.3953101634979248
    },
    {
      "epoch": 0.000312652587890625,
      "model_forward_time": 0.11472582817077637,
      "step": 51225
    },
    {
      "epoch": 0.000312652587890625,
      "step": 51225,
      "training_step_time": 0.39783453941345215
    },
    {
      "epoch": 0.00031265869140625,
      "model_forward_time": 0.11452221870422363,
      "step": 51226
    },
    {
      "epoch": 0.00031265869140625,
      "step": 51226,
      "training_step_time": 0.3928067684173584
    },
    {
      "epoch": 0.000312664794921875,
      "model_forward_time": 0.11490011215209961,
      "step": 51227
    },
    {
      "epoch": 0.000312664794921875,
      "step": 51227,
      "training_step_time": 0.3893928527832031
    },
    {
      "epoch": 0.0003126708984375,
      "model_forward_time": 0.11520028114318848,
      "step": 51228
    },
    {
      "epoch": 0.0003126708984375,
      "step": 51228,
      "training_step_time": 0.7241051197052002
    },
    {
      "epoch": 0.000312677001953125,
      "model_forward_time": 0.11476755142211914,
      "step": 51229
    },
    {
      "epoch": 0.000312677001953125,
      "step": 51229,
      "training_step_time": 0.48029303550720215
    },
    {
      "epoch": 0.00031268310546875,
      "grad_norm": 0.11584194004535675,
      "learning_rate": 5.72818389269113e-06,
      "loss": 0.0364,
      "step": 51230
    },
    {
      "epoch": 0.00031268310546875,
      "model_forward_time": 0.11476612091064453,
      "step": 51230
    },
    {
      "epoch": 0.00031268310546875,
      "step": 51230,
      "training_step_time": 0.4314448833465576
    },
    {
      "epoch": 0.000312689208984375,
      "model_forward_time": 0.11406064033508301,
      "step": 51231
    },
    {
      "epoch": 0.000312689208984375,
      "step": 51231,
      "training_step_time": 0.3927793502807617
    },
    {
      "epoch": 0.0003126953125,
      "model_forward_time": 0.1139838695526123,
      "step": 51232
    },
    {
      "epoch": 0.0003126953125,
      "step": 51232,
      "training_step_time": 0.3823866844177246
    },
    {
      "epoch": 0.000312701416015625,
      "model_forward_time": 0.11417698860168457,
      "step": 51233
    },
    {
      "epoch": 0.000312701416015625,
      "step": 51233,
      "training_step_time": 0.38158130645751953
    },
    {
      "epoch": 0.00031270751953125,
      "model_forward_time": 0.1152961254119873,
      "step": 51234
    },
    {
      "epoch": 0.00031270751953125,
      "step": 51234,
      "training_step_time": 0.5288536548614502
    },
    {
      "epoch": 0.000312713623046875,
      "model_forward_time": 0.11480545997619629,
      "step": 51235
    },
    {
      "epoch": 0.000312713623046875,
      "step": 51235,
      "training_step_time": 0.4209306240081787
    },
    {
      "epoch": 0.0003127197265625,
      "model_forward_time": 0.11474442481994629,
      "step": 51236
    },
    {
      "epoch": 0.0003127197265625,
      "step": 51236,
      "training_step_time": 0.4866671562194824
    },
    {
      "epoch": 0.000312725830078125,
      "model_forward_time": 0.11437201499938965,
      "step": 51237
    },
    {
      "epoch": 0.000312725830078125,
      "step": 51237,
      "training_step_time": 0.3945629596710205
    },
    {
      "epoch": 0.00031273193359375,
      "model_forward_time": 0.11478781700134277,
      "step": 51238
    },
    {
      "epoch": 0.00031273193359375,
      "step": 51238,
      "training_step_time": 0.3819453716278076
    },
    {
      "epoch": 0.000312738037109375,
      "model_forward_time": 0.11424589157104492,
      "step": 51239
    },
    {
      "epoch": 0.000312738037109375,
      "step": 51239,
      "training_step_time": 0.3833603858947754
    },
    {
      "epoch": 0.000312744140625,
      "grad_norm": 0.11013096570968628,
      "learning_rate": 5.715382820814885e-06,
      "loss": 0.0339,
      "step": 51240
    },
    {
      "epoch": 0.000312744140625,
      "model_forward_time": 0.11558294296264648,
      "step": 51240
    },
    {
      "epoch": 0.000312744140625,
      "step": 51240,
      "training_step_time": 0.6294717788696289
    },
    {
      "epoch": 0.000312750244140625,
      "model_forward_time": 0.11440563201904297,
      "step": 51241
    },
    {
      "epoch": 0.000312750244140625,
      "step": 51241,
      "training_step_time": 0.3973712921142578
    },
    {
      "epoch": 0.00031275634765625,
      "model_forward_time": 0.11498451232910156,
      "step": 51242
    },
    {
      "epoch": 0.00031275634765625,
      "step": 51242,
      "training_step_time": 0.38990044593811035
    },
    {
      "epoch": 0.000312762451171875,
      "model_forward_time": 0.11471390724182129,
      "step": 51243
    },
    {
      "epoch": 0.000312762451171875,
      "step": 51243,
      "training_step_time": 0.49204230308532715
    },
    {
      "epoch": 0.0003127685546875,
      "model_forward_time": 0.11432266235351562,
      "step": 51244
    },
    {
      "epoch": 0.0003127685546875,
      "step": 51244,
      "training_step_time": 0.48744654655456543
    },
    {
      "epoch": 0.000312774658203125,
      "model_forward_time": 0.11362767219543457,
      "step": 51245
    },
    {
      "epoch": 0.000312774658203125,
      "step": 51245,
      "training_step_time": 0.3926529884338379
    },
    {
      "epoch": 0.00031278076171875,
      "model_forward_time": 0.11623120307922363,
      "step": 51246
    },
    {
      "epoch": 0.00031278076171875,
      "step": 51246,
      "training_step_time": 0.48173046112060547
    },
    {
      "epoch": 0.000312786865234375,
      "model_forward_time": 0.11421465873718262,
      "step": 51247
    },
    {
      "epoch": 0.000312786865234375,
      "step": 51247,
      "training_step_time": 0.4035518169403076
    },
    {
      "epoch": 0.00031279296875,
      "model_forward_time": 0.1147768497467041,
      "step": 51248
    },
    {
      "epoch": 0.00031279296875,
      "step": 51248,
      "training_step_time": 0.43772101402282715
    },
    {
      "epoch": 0.000312799072265625,
      "model_forward_time": 0.11498427391052246,
      "step": 51249
    },
    {
      "epoch": 0.000312799072265625,
      "step": 51249,
      "training_step_time": 0.4076833724975586
    },
    {
      "epoch": 0.00031280517578125,
      "grad_norm": 0.09992226213216782,
      "learning_rate": 5.702595201436101e-06,
      "loss": 0.0348,
      "step": 51250
    },
    {
      "epoch": 0.00031280517578125,
      "model_forward_time": 0.1151132583618164,
      "step": 51250
    },
    {
      "epoch": 0.00031280517578125,
      "step": 51250,
      "training_step_time": 0.4918067455291748
    },
    {
      "epoch": 0.000312811279296875,
      "model_forward_time": 0.11441540718078613,
      "step": 51251
    },
    {
      "epoch": 0.000312811279296875,
      "step": 51251,
      "training_step_time": 0.388425350189209
    },
    {
      "epoch": 0.0003128173828125,
      "model_forward_time": 0.1146705150604248,
      "step": 51252
    },
    {
      "epoch": 0.0003128173828125,
      "step": 51252,
      "training_step_time": 0.5100662708282471
    },
    {
      "epoch": 0.000312823486328125,
      "model_forward_time": 0.11450815200805664,
      "step": 51253
    },
    {
      "epoch": 0.000312823486328125,
      "step": 51253,
      "training_step_time": 0.3912181854248047
    },
    {
      "epoch": 0.00031282958984375,
      "model_forward_time": 0.11493802070617676,
      "step": 51254
    },
    {
      "epoch": 0.00031282958984375,
      "step": 51254,
      "training_step_time": 0.3901395797729492
    },
    {
      "epoch": 0.000312835693359375,
      "model_forward_time": 0.1157999038696289,
      "step": 51255
    },
    {
      "epoch": 0.000312835693359375,
      "step": 51255,
      "training_step_time": 0.388643741607666
    },
    {
      "epoch": 0.000312841796875,
      "model_forward_time": 0.1157376766204834,
      "step": 51256
    },
    {
      "epoch": 0.000312841796875,
      "step": 51256,
      "training_step_time": 0.38791632652282715
    },
    {
      "epoch": 0.000312847900390625,
      "model_forward_time": 0.11507582664489746,
      "step": 51257
    },
    {
      "epoch": 0.000312847900390625,
      "step": 51257,
      "training_step_time": 0.45789480209350586
    },
    {
      "epoch": 0.00031285400390625,
      "model_forward_time": 0.11543035507202148,
      "step": 51258
    },
    {
      "epoch": 0.00031285400390625,
      "step": 51258,
      "training_step_time": 0.5961925983428955
    },
    {
      "epoch": 0.000312860107421875,
      "model_forward_time": 0.1149895191192627,
      "step": 51259
    },
    {
      "epoch": 0.000312860107421875,
      "step": 51259,
      "training_step_time": 0.394944429397583
    },
    {
      "epoch": 0.0003128662109375,
      "grad_norm": 0.08509114384651184,
      "learning_rate": 5.689821038439263e-06,
      "loss": 0.0371,
      "step": 51260
    },
    {
      "epoch": 0.0003128662109375,
      "model_forward_time": 0.11492466926574707,
      "step": 51260
    },
    {
      "epoch": 0.0003128662109375,
      "step": 51260,
      "training_step_time": 0.45123934745788574
    },
    {
      "epoch": 0.000312872314453125,
      "model_forward_time": 0.11449313163757324,
      "step": 51261
    },
    {
      "epoch": 0.000312872314453125,
      "step": 51261,
      "training_step_time": 0.42101597785949707
    },
    {
      "epoch": 0.00031287841796875,
      "model_forward_time": 0.11453723907470703,
      "step": 51262
    },
    {
      "epoch": 0.00031287841796875,
      "step": 51262,
      "training_step_time": 0.4080085754394531
    },
    {
      "epoch": 0.000312884521484375,
      "model_forward_time": 0.11489605903625488,
      "step": 51263
    },
    {
      "epoch": 0.000312884521484375,
      "step": 51263,
      "training_step_time": 0.40089988708496094
    },
    {
      "epoch": 0.000312890625,
      "model_forward_time": 0.11479830741882324,
      "step": 51264
    },
    {
      "epoch": 0.000312890625,
      "step": 51264,
      "training_step_time": 0.6150209903717041
    },
    {
      "epoch": 0.000312896728515625,
      "model_forward_time": 0.11453032493591309,
      "step": 51265
    },
    {
      "epoch": 0.000312896728515625,
      "step": 51265,
      "training_step_time": 0.3846156597137451
    },
    {
      "epoch": 0.00031290283203125,
      "model_forward_time": 0.11474990844726562,
      "step": 51266
    },
    {
      "epoch": 0.00031290283203125,
      "step": 51266,
      "training_step_time": 0.3930246829986572
    },
    {
      "epoch": 0.000312908935546875,
      "model_forward_time": 0.11481618881225586,
      "step": 51267
    },
    {
      "epoch": 0.000312908935546875,
      "step": 51267,
      "training_step_time": 0.3872671127319336
    },
    {
      "epoch": 0.0003129150390625,
      "model_forward_time": 0.11484193801879883,
      "step": 51268
    },
    {
      "epoch": 0.0003129150390625,
      "step": 51268,
      "training_step_time": 0.3936340808868408
    },
    {
      "epoch": 0.000312921142578125,
      "model_forward_time": 0.11490702629089355,
      "step": 51269
    },
    {
      "epoch": 0.000312921142578125,
      "step": 51269,
      "training_step_time": 0.4088020324707031
    },
    {
      "epoch": 0.00031292724609375,
      "grad_norm": 0.08390423655509949,
      "learning_rate": 5.67706033570487e-06,
      "loss": 0.0338,
      "step": 51270
    },
    {
      "epoch": 0.00031292724609375,
      "model_forward_time": 0.11483287811279297,
      "step": 51270
    },
    {
      "epoch": 0.00031292724609375,
      "step": 51270,
      "training_step_time": 0.6708922386169434
    },
    {
      "epoch": 0.000312933349609375,
      "model_forward_time": 0.11474418640136719,
      "step": 51271
    },
    {
      "epoch": 0.000312933349609375,
      "step": 51271,
      "training_step_time": 0.4215719699859619
    },
    {
      "epoch": 0.000312939453125,
      "model_forward_time": 0.11465954780578613,
      "step": 51272
    },
    {
      "epoch": 0.000312939453125,
      "step": 51272,
      "training_step_time": 0.45668935775756836
    },
    {
      "epoch": 0.000312945556640625,
      "model_forward_time": 0.11561179161071777,
      "step": 51273
    },
    {
      "epoch": 0.000312945556640625,
      "step": 51273,
      "training_step_time": 0.45265722274780273
    },
    {
      "epoch": 0.00031295166015625,
      "model_forward_time": 0.1145327091217041,
      "step": 51274
    },
    {
      "epoch": 0.00031295166015625,
      "step": 51274,
      "training_step_time": 0.4285128116607666
    },
    {
      "epoch": 0.000312957763671875,
      "model_forward_time": 0.11431717872619629,
      "step": 51275
    },
    {
      "epoch": 0.000312957763671875,
      "step": 51275,
      "training_step_time": 0.3842806816101074
    },
    {
      "epoch": 0.0003129638671875,
      "model_forward_time": 0.11441946029663086,
      "step": 51276
    },
    {
      "epoch": 0.0003129638671875,
      "step": 51276,
      "training_step_time": 0.4290320873260498
    },
    {
      "epoch": 0.000312969970703125,
      "model_forward_time": 0.11461281776428223,
      "step": 51277
    },
    {
      "epoch": 0.000312969970703125,
      "step": 51277,
      "training_step_time": 0.5062026977539062
    },
    {
      "epoch": 0.00031297607421875,
      "model_forward_time": 0.1146245002746582,
      "step": 51278
    },
    {
      "epoch": 0.00031297607421875,
      "step": 51278,
      "training_step_time": 0.38747239112854004
    },
    {
      "epoch": 0.000312982177734375,
      "model_forward_time": 0.1158144474029541,
      "step": 51279
    },
    {
      "epoch": 0.000312982177734375,
      "step": 51279,
      "training_step_time": 0.3853914737701416
    },
    {
      "epoch": 0.00031298828125,
      "grad_norm": 0.08736391365528107,
      "learning_rate": 5.6643130971092525e-06,
      "loss": 0.0341,
      "step": 51280
    },
    {
      "epoch": 0.00031298828125,
      "model_forward_time": 0.1144258975982666,
      "step": 51280
    },
    {
      "epoch": 0.00031298828125,
      "step": 51280,
      "training_step_time": 0.3870542049407959
    },
    {
      "epoch": 0.000312994384765625,
      "model_forward_time": 0.11448979377746582,
      "step": 51281
    },
    {
      "epoch": 0.000312994384765625,
      "step": 51281,
      "training_step_time": 0.3839743137359619
    },
    {
      "epoch": 0.00031300048828125,
      "model_forward_time": 0.11484026908874512,
      "step": 51282
    },
    {
      "epoch": 0.00031300048828125,
      "step": 51282,
      "training_step_time": 0.6523797512054443
    },
    {
      "epoch": 0.000313006591796875,
      "model_forward_time": 0.11519312858581543,
      "step": 51283
    },
    {
      "epoch": 0.000313006591796875,
      "step": 51283,
      "training_step_time": 0.3939633369445801
    },
    {
      "epoch": 0.0003130126953125,
      "model_forward_time": 0.11480927467346191,
      "step": 51284
    },
    {
      "epoch": 0.0003130126953125,
      "step": 51284,
      "training_step_time": 0.36232566833496094
    },
    {
      "epoch": 0.000313018798828125,
      "model_forward_time": 0.11516642570495605,
      "step": 51285
    },
    {
      "epoch": 0.000313018798828125,
      "step": 51285,
      "training_step_time": 0.4138336181640625
    },
    {
      "epoch": 0.00031302490234375,
      "model_forward_time": 0.11452388763427734,
      "step": 51286
    },
    {
      "epoch": 0.00031302490234375,
      "step": 51286,
      "training_step_time": 0.4868662357330322
    },
    {
      "epoch": 0.000313031005859375,
      "model_forward_time": 0.11477470397949219,
      "step": 51287
    },
    {
      "epoch": 0.000313031005859375,
      "step": 51287,
      "training_step_time": 0.4357340335845947
    },
    {
      "epoch": 0.000313037109375,
      "model_forward_time": 0.11471724510192871,
      "step": 51288
    },
    {
      "epoch": 0.000313037109375,
      "step": 51288,
      "training_step_time": 0.5741372108459473
    },
    {
      "epoch": 0.000313043212890625,
      "model_forward_time": 0.11393928527832031,
      "step": 51289
    },
    {
      "epoch": 0.000313043212890625,
      "step": 51289,
      "training_step_time": 0.38739705085754395
    },
    {
      "epoch": 0.00031304931640625,
      "grad_norm": 0.10657099634408951,
      "learning_rate": 5.651579326524709e-06,
      "loss": 0.0347,
      "step": 51290
    },
    {
      "epoch": 0.00031304931640625,
      "model_forward_time": 0.11558127403259277,
      "step": 51290
    },
    {
      "epoch": 0.00031304931640625,
      "step": 51290,
      "training_step_time": 0.4672670364379883
    },
    {
      "epoch": 0.000313055419921875,
      "model_forward_time": 0.1145319938659668,
      "step": 51291
    },
    {
      "epoch": 0.000313055419921875,
      "step": 51291,
      "training_step_time": 0.4298591613769531
    },
    {
      "epoch": 0.0003130615234375,
      "model_forward_time": 0.11448526382446289,
      "step": 51292
    },
    {
      "epoch": 0.0003130615234375,
      "step": 51292,
      "training_step_time": 0.38736653327941895
    },
    {
      "epoch": 0.000313067626953125,
      "model_forward_time": 0.1146080493927002,
      "step": 51293
    },
    {
      "epoch": 0.000313067626953125,
      "step": 51293,
      "training_step_time": 0.38198351860046387
    },
    {
      "epoch": 0.00031307373046875,
      "model_forward_time": 0.11522817611694336,
      "step": 51294
    },
    {
      "epoch": 0.00031307373046875,
      "step": 51294,
      "training_step_time": 0.6028668880462646
    },
    {
      "epoch": 0.000313079833984375,
      "model_forward_time": 0.11403822898864746,
      "step": 51295
    },
    {
      "epoch": 0.000313079833984375,
      "step": 51295,
      "training_step_time": 0.388871431350708
    },
    {
      "epoch": 0.0003130859375,
      "model_forward_time": 0.1149449348449707,
      "step": 51296
    },
    {
      "epoch": 0.0003130859375,
      "step": 51296,
      "training_step_time": 0.38620448112487793
    },
    {
      "epoch": 0.000313092041015625,
      "model_forward_time": 0.11496925354003906,
      "step": 51297
    },
    {
      "epoch": 0.000313092041015625,
      "step": 51297,
      "training_step_time": 0.3892250061035156
    },
    {
      "epoch": 0.00031309814453125,
      "model_forward_time": 0.11515235900878906,
      "step": 51298
    },
    {
      "epoch": 0.00031309814453125,
      "step": 51298,
      "training_step_time": 0.39440345764160156
    },
    {
      "epoch": 0.000313104248046875,
      "model_forward_time": 0.11452531814575195,
      "step": 51299
    },
    {
      "epoch": 0.000313104248046875,
      "step": 51299,
      "training_step_time": 0.4301149845123291
    },
    {
      "epoch": 0.0003131103515625,
      "grad_norm": 0.1358288824558258,
      "learning_rate": 5.6388590278194096e-06,
      "loss": 0.0389,
      "step": 51300
    },
    {
      "epoch": 0.0003131103515625,
      "model_forward_time": 0.1150813102722168,
      "step": 51300
    },
    {
      "epoch": 0.0003131103515625,
      "step": 51300,
      "training_step_time": 0.7353019714355469
    },
    {
      "epoch": 0.000313116455078125,
      "model_forward_time": 0.11484646797180176,
      "step": 51301
    },
    {
      "epoch": 0.000313116455078125,
      "step": 51301,
      "training_step_time": 0.3854382038116455
    },
    {
      "epoch": 0.00031312255859375,
      "model_forward_time": 0.11499476432800293,
      "step": 51302
    },
    {
      "epoch": 0.00031312255859375,
      "step": 51302,
      "training_step_time": 0.3884594440460205
    },
    {
      "epoch": 0.000313128662109375,
      "model_forward_time": 0.11432361602783203,
      "step": 51303
    },
    {
      "epoch": 0.000313128662109375,
      "step": 51303,
      "training_step_time": 0.3869750499725342
    },
    {
      "epoch": 0.000313134765625,
      "model_forward_time": 0.1149740219116211,
      "step": 51304
    },
    {
      "epoch": 0.000313134765625,
      "step": 51304,
      "training_step_time": 0.38765382766723633
    },
    {
      "epoch": 0.000313140869140625,
      "model_forward_time": 0.11469459533691406,
      "step": 51305
    },
    {
      "epoch": 0.000313140869140625,
      "step": 51305,
      "training_step_time": 0.48796868324279785
    },
    {
      "epoch": 0.00031314697265625,
      "model_forward_time": 0.11508607864379883,
      "step": 51306
    },
    {
      "epoch": 0.00031314697265625,
      "step": 51306,
      "training_step_time": 0.5490889549255371
    },
    {
      "epoch": 0.000313153076171875,
      "model_forward_time": 0.11467933654785156,
      "step": 51307
    },
    {
      "epoch": 0.000313153076171875,
      "step": 51307,
      "training_step_time": 0.39516139030456543
    },
    {
      "epoch": 0.0003131591796875,
      "model_forward_time": 0.11501955986022949,
      "step": 51308
    },
    {
      "epoch": 0.0003131591796875,
      "step": 51308,
      "training_step_time": 0.3818533420562744
    },
    {
      "epoch": 0.000313165283203125,
      "model_forward_time": 0.11471819877624512,
      "step": 51309
    },
    {
      "epoch": 0.000313165283203125,
      "step": 51309,
      "training_step_time": 0.3895440101623535
    },
    {
      "epoch": 0.00031317138671875,
      "grad_norm": 0.07882820814847946,
      "learning_rate": 5.626152204857454e-06,
      "loss": 0.034,
      "step": 51310
    },
    {
      "epoch": 0.00031317138671875,
      "model_forward_time": 0.11517977714538574,
      "step": 51310
    },
    {
      "epoch": 0.00031317138671875,
      "step": 51310,
      "training_step_time": 0.3863527774810791
    },
    {
      "epoch": 0.000313177490234375,
      "model_forward_time": 0.11535763740539551,
      "step": 51311
    },
    {
      "epoch": 0.000313177490234375,
      "step": 51311,
      "training_step_time": 0.3982422351837158
    },
    {
      "epoch": 0.00031318359375,
      "model_forward_time": 0.11523294448852539,
      "step": 51312
    },
    {
      "epoch": 0.00031318359375,
      "step": 51312,
      "training_step_time": 0.6898984909057617
    },
    {
      "epoch": 0.000313189697265625,
      "model_forward_time": 0.11501026153564453,
      "step": 51313
    },
    {
      "epoch": 0.000313189697265625,
      "step": 51313,
      "training_step_time": 0.4753847122192383
    },
    {
      "epoch": 0.00031319580078125,
      "model_forward_time": 0.11444902420043945,
      "step": 51314
    },
    {
      "epoch": 0.00031319580078125,
      "step": 51314,
      "training_step_time": 0.4643850326538086
    },
    {
      "epoch": 0.000313201904296875,
      "model_forward_time": 0.11475539207458496,
      "step": 51315
    },
    {
      "epoch": 0.000313201904296875,
      "step": 51315,
      "training_step_time": 0.41181421279907227
    },
    {
      "epoch": 0.0003132080078125,
      "model_forward_time": 0.11490821838378906,
      "step": 51316
    },
    {
      "epoch": 0.0003132080078125,
      "step": 51316,
      "training_step_time": 0.3899080753326416
    },
    {
      "epoch": 0.000313214111328125,
      "model_forward_time": 0.11408329010009766,
      "step": 51317
    },
    {
      "epoch": 0.000313214111328125,
      "step": 51317,
      "training_step_time": 0.3891623020172119
    },
    {
      "epoch": 0.00031322021484375,
      "model_forward_time": 0.11493706703186035,
      "step": 51318
    },
    {
      "epoch": 0.00031322021484375,
      "step": 51318,
      "training_step_time": 0.45882415771484375
    },
    {
      "epoch": 0.000313226318359375,
      "model_forward_time": 0.1152806282043457,
      "step": 51319
    },
    {
      "epoch": 0.000313226318359375,
      "step": 51319,
      "training_step_time": 0.4979383945465088
    },
    {
      "epoch": 0.000313232421875,
      "grad_norm": 0.11606092005968094,
      "learning_rate": 5.613458861498832e-06,
      "loss": 0.0329,
      "step": 51320
    },
    {
      "epoch": 0.000313232421875,
      "model_forward_time": 0.11499643325805664,
      "step": 51320
    },
    {
      "epoch": 0.000313232421875,
      "step": 51320,
      "training_step_time": 0.38820934295654297
    },
    {
      "epoch": 0.000313238525390625,
      "model_forward_time": 0.11538982391357422,
      "step": 51321
    },
    {
      "epoch": 0.000313238525390625,
      "step": 51321,
      "training_step_time": 0.39307570457458496
    },
    {
      "epoch": 0.00031324462890625,
      "model_forward_time": 0.11524844169616699,
      "step": 51322
    },
    {
      "epoch": 0.00031324462890625,
      "step": 51322,
      "training_step_time": 0.3877604007720947
    },
    {
      "epoch": 0.000313250732421875,
      "model_forward_time": 0.11494064331054688,
      "step": 51323
    },
    {
      "epoch": 0.000313250732421875,
      "step": 51323,
      "training_step_time": 0.39742231369018555
    },
    {
      "epoch": 0.0003132568359375,
      "model_forward_time": 0.11511468887329102,
      "step": 51324
    },
    {
      "epoch": 0.0003132568359375,
      "step": 51324,
      "training_step_time": 0.6541116237640381
    },
    {
      "epoch": 0.000313262939453125,
      "model_forward_time": 0.1148524284362793,
      "step": 51325
    },
    {
      "epoch": 0.000313262939453125,
      "step": 51325,
      "training_step_time": 0.4347238540649414
    },
    {
      "epoch": 0.00031326904296875,
      "model_forward_time": 0.11475086212158203,
      "step": 51326
    },
    {
      "epoch": 0.00031326904296875,
      "step": 51326,
      "training_step_time": 0.43145251274108887
    },
    {
      "epoch": 0.000313275146484375,
      "model_forward_time": 0.11499547958374023,
      "step": 51327
    },
    {
      "epoch": 0.000313275146484375,
      "step": 51327,
      "training_step_time": 0.4149951934814453
    },
    {
      "epoch": 0.00031328125,
      "model_forward_time": 0.1142585277557373,
      "step": 51328
    },
    {
      "epoch": 0.00031328125,
      "step": 51328,
      "training_step_time": 0.42857813835144043
    },
    {
      "epoch": 0.000313287353515625,
      "model_forward_time": 0.11428546905517578,
      "step": 51329
    },
    {
      "epoch": 0.000313287353515625,
      "step": 51329,
      "training_step_time": 0.4681823253631592
    },
    {
      "epoch": 0.00031329345703125,
      "grad_norm": 0.09825960546731949,
      "learning_rate": 5.600779001599455e-06,
      "loss": 0.0358,
      "step": 51330
    },
    {
      "epoch": 0.00031329345703125,
      "model_forward_time": 0.11497139930725098,
      "step": 51330
    },
    {
      "epoch": 0.00031329345703125,
      "step": 51330,
      "training_step_time": 0.49479126930236816
    },
    {
      "epoch": 0.000313299560546875,
      "model_forward_time": 0.11438584327697754,
      "step": 51331
    },
    {
      "epoch": 0.000313299560546875,
      "step": 51331,
      "training_step_time": 0.4249591827392578
    },
    {
      "epoch": 0.0003133056640625,
      "model_forward_time": 0.11505413055419922,
      "step": 51332
    },
    {
      "epoch": 0.0003133056640625,
      "step": 51332,
      "training_step_time": 0.4499082565307617
    },
    {
      "epoch": 0.000313311767578125,
      "model_forward_time": 0.11457991600036621,
      "step": 51333
    },
    {
      "epoch": 0.000313311767578125,
      "step": 51333,
      "training_step_time": 0.46594905853271484
    },
    {
      "epoch": 0.00031331787109375,
      "model_forward_time": 0.11468768119812012,
      "step": 51334
    },
    {
      "epoch": 0.00031331787109375,
      "step": 51334,
      "training_step_time": 0.39270448684692383
    },
    {
      "epoch": 0.000313323974609375,
      "model_forward_time": 0.11506080627441406,
      "step": 51335
    },
    {
      "epoch": 0.000313323974609375,
      "step": 51335,
      "training_step_time": 0.39299941062927246
    },
    {
      "epoch": 0.000313330078125,
      "model_forward_time": 0.11537861824035645,
      "step": 51336
    },
    {
      "epoch": 0.000313330078125,
      "step": 51336,
      "training_step_time": 0.5937764644622803
    },
    {
      "epoch": 0.000313336181640625,
      "model_forward_time": 0.11463117599487305,
      "step": 51337
    },
    {
      "epoch": 0.000313336181640625,
      "step": 51337,
      "training_step_time": 0.3923475742340088
    },
    {
      "epoch": 0.00031334228515625,
      "model_forward_time": 0.11460494995117188,
      "step": 51338
    },
    {
      "epoch": 0.00031334228515625,
      "step": 51338,
      "training_step_time": 0.39742374420166016
    },
    {
      "epoch": 0.000313348388671875,
      "model_forward_time": 0.1148223876953125,
      "step": 51339
    },
    {
      "epoch": 0.000313348388671875,
      "step": 51339,
      "training_step_time": 0.392780065536499
    },
    {
      "epoch": 0.0003133544921875,
      "grad_norm": 0.0800151601433754,
      "learning_rate": 5.58811262901111e-06,
      "loss": 0.037,
      "step": 51340
    },
    {
      "epoch": 0.0003133544921875,
      "model_forward_time": 0.11500954627990723,
      "step": 51340
    },
    {
      "epoch": 0.0003133544921875,
      "step": 51340,
      "training_step_time": 0.42469143867492676
    },
    {
      "epoch": 0.000313360595703125,
      "model_forward_time": 0.11512446403503418,
      "step": 51341
    },
    {
      "epoch": 0.000313360595703125,
      "step": 51341,
      "training_step_time": 0.38725781440734863
    },
    {
      "epoch": 0.00031336669921875,
      "model_forward_time": 0.11461544036865234,
      "step": 51342
    },
    {
      "epoch": 0.00031336669921875,
      "step": 51342,
      "training_step_time": 0.5763306617736816
    },
    {
      "epoch": 0.000313372802734375,
      "model_forward_time": 0.11463069915771484,
      "step": 51343
    },
    {
      "epoch": 0.000313372802734375,
      "step": 51343,
      "training_step_time": 0.39840030670166016
    },
    {
      "epoch": 0.00031337890625,
      "model_forward_time": 0.11493325233459473,
      "step": 51344
    },
    {
      "epoch": 0.00031337890625,
      "step": 51344,
      "training_step_time": 0.3876609802246094
    },
    {
      "epoch": 0.000313385009765625,
      "model_forward_time": 0.11535954475402832,
      "step": 51345
    },
    {
      "epoch": 0.000313385009765625,
      "step": 51345,
      "training_step_time": 0.5083301067352295
    },
    {
      "epoch": 0.00031339111328125,
      "model_forward_time": 0.11426639556884766,
      "step": 51346
    },
    {
      "epoch": 0.00031339111328125,
      "step": 51346,
      "training_step_time": 0.41657018661499023
    },
    {
      "epoch": 0.000313397216796875,
      "model_forward_time": 0.11496973037719727,
      "step": 51347
    },
    {
      "epoch": 0.000313397216796875,
      "step": 51347,
      "training_step_time": 0.40372443199157715
    },
    {
      "epoch": 0.0003134033203125,
      "model_forward_time": 0.11498475074768066,
      "step": 51348
    },
    {
      "epoch": 0.0003134033203125,
      "step": 51348,
      "training_step_time": 0.5442500114440918
    },
    {
      "epoch": 0.000313409423828125,
      "model_forward_time": 0.11466860771179199,
      "step": 51349
    },
    {
      "epoch": 0.000313409423828125,
      "step": 51349,
      "training_step_time": 0.3947618007659912
    },
    {
      "epoch": 0.00031341552734375,
      "grad_norm": 0.0785745307803154,
      "learning_rate": 5.57545974758153e-06,
      "loss": 0.0339,
      "step": 51350
    },
    {
      "epoch": 0.00031341552734375,
      "model_forward_time": 0.11484098434448242,
      "step": 51350
    },
    {
      "epoch": 0.00031341552734375,
      "step": 51350,
      "training_step_time": 0.39739370346069336
    },
    {
      "epoch": 0.000313421630859375,
      "model_forward_time": 0.11435174942016602,
      "step": 51351
    },
    {
      "epoch": 0.000313421630859375,
      "step": 51351,
      "training_step_time": 0.42717957496643066
    },
    {
      "epoch": 0.000313427734375,
      "model_forward_time": 0.11408734321594238,
      "step": 51352
    },
    {
      "epoch": 0.000313427734375,
      "step": 51352,
      "training_step_time": 0.4005722999572754
    },
    {
      "epoch": 0.000313433837890625,
      "model_forward_time": 0.1149146556854248,
      "step": 51353
    },
    {
      "epoch": 0.000313433837890625,
      "step": 51353,
      "training_step_time": 0.4117887020111084
    },
    {
      "epoch": 0.00031343994140625,
      "model_forward_time": 0.11454176902770996,
      "step": 51354
    },
    {
      "epoch": 0.00031343994140625,
      "step": 51354,
      "training_step_time": 0.6560981273651123
    },
    {
      "epoch": 0.000313446044921875,
      "model_forward_time": 0.11460089683532715,
      "step": 51355
    },
    {
      "epoch": 0.000313446044921875,
      "step": 51355,
      "training_step_time": 0.44999122619628906
    },
    {
      "epoch": 0.0003134521484375,
      "model_forward_time": 0.11462187767028809,
      "step": 51356
    },
    {
      "epoch": 0.0003134521484375,
      "step": 51356,
      "training_step_time": 0.46541786193847656
    },
    {
      "epoch": 0.000313458251953125,
      "model_forward_time": 0.11456799507141113,
      "step": 51357
    },
    {
      "epoch": 0.000313458251953125,
      "step": 51357,
      "training_step_time": 0.41275978088378906
    },
    {
      "epoch": 0.00031346435546875,
      "model_forward_time": 0.11425352096557617,
      "step": 51358
    },
    {
      "epoch": 0.00031346435546875,
      "step": 51358,
      "training_step_time": 0.3957993984222412
    },
    {
      "epoch": 0.000313470458984375,
      "model_forward_time": 0.11452531814575195,
      "step": 51359
    },
    {
      "epoch": 0.000313470458984375,
      "step": 51359,
      "training_step_time": 0.420011043548584
    },
    {
      "epoch": 0.0003134765625,
      "grad_norm": 0.11497700959444046,
      "learning_rate": 5.562820361154314e-06,
      "loss": 0.0402,
      "step": 51360
    },
    {
      "epoch": 0.0003134765625,
      "model_forward_time": 0.11454486846923828,
      "step": 51360
    },
    {
      "epoch": 0.0003134765625,
      "step": 51360,
      "training_step_time": 0.6630196571350098
    },
    {
      "epoch": 0.000313482666015625,
      "model_forward_time": 0.11449527740478516,
      "step": 51361
    },
    {
      "epoch": 0.000313482666015625,
      "step": 51361,
      "training_step_time": 0.39469170570373535
    },
    {
      "epoch": 0.00031348876953125,
      "model_forward_time": 0.11374616622924805,
      "step": 51362
    },
    {
      "epoch": 0.00031348876953125,
      "step": 51362,
      "training_step_time": 0.3993339538574219
    },
    {
      "epoch": 0.000313494873046875,
      "model_forward_time": 0.11484932899475098,
      "step": 51363
    },
    {
      "epoch": 0.000313494873046875,
      "step": 51363,
      "training_step_time": 0.400071382522583
    },
    {
      "epoch": 0.0003135009765625,
      "model_forward_time": 0.11434125900268555,
      "step": 51364
    },
    {
      "epoch": 0.0003135009765625,
      "step": 51364,
      "training_step_time": 0.4001622200012207
    },
    {
      "epoch": 0.000313507080078125,
      "model_forward_time": 0.11486029624938965,
      "step": 51365
    },
    {
      "epoch": 0.000313507080078125,
      "step": 51365,
      "training_step_time": 0.38864731788635254
    },
    {
      "epoch": 0.00031351318359375,
      "model_forward_time": 0.11502718925476074,
      "step": 51366
    },
    {
      "epoch": 0.00031351318359375,
      "step": 51366,
      "training_step_time": 0.8635904788970947
    },
    {
      "epoch": 0.000313519287109375,
      "model_forward_time": 0.11412572860717773,
      "step": 51367
    },
    {
      "epoch": 0.000313519287109375,
      "step": 51367,
      "training_step_time": 0.4453451633453369
    },
    {
      "epoch": 0.000313525390625,
      "model_forward_time": 0.11586499214172363,
      "step": 51368
    },
    {
      "epoch": 0.000313525390625,
      "step": 51368,
      "training_step_time": 0.48441100120544434
    },
    {
      "epoch": 0.000313531494140625,
      "model_forward_time": 0.11432480812072754,
      "step": 51369
    },
    {
      "epoch": 0.000313531494140625,
      "step": 51369,
      "training_step_time": 0.4661555290222168
    },
    {
      "epoch": 0.00031353759765625,
      "grad_norm": 0.08224128186702728,
      "learning_rate": 5.550194473568981e-06,
      "loss": 0.0379,
      "step": 51370
    },
    {
      "epoch": 0.00031353759765625,
      "model_forward_time": 0.11409139633178711,
      "step": 51370
    },
    {
      "epoch": 0.00031353759765625,
      "step": 51370,
      "training_step_time": 0.40102672576904297
    },
    {
      "epoch": 0.000313543701171875,
      "model_forward_time": 0.11573410034179688,
      "step": 51371
    },
    {
      "epoch": 0.000313543701171875,
      "step": 51371,
      "training_step_time": 0.38634490966796875
    },
    {
      "epoch": 0.0003135498046875,
      "model_forward_time": 0.11522579193115234,
      "step": 51372
    },
    {
      "epoch": 0.0003135498046875,
      "step": 51372,
      "training_step_time": 0.4284939765930176
    },
    {
      "epoch": 0.000313555908203125,
      "model_forward_time": 0.11462187767028809,
      "step": 51373
    },
    {
      "epoch": 0.000313555908203125,
      "step": 51373,
      "training_step_time": 0.3951098918914795
    },
    {
      "epoch": 0.00031356201171875,
      "model_forward_time": 0.11487340927124023,
      "step": 51374
    },
    {
      "epoch": 0.00031356201171875,
      "step": 51374,
      "training_step_time": 0.46122074127197266
    },
    {
      "epoch": 0.000313568115234375,
      "model_forward_time": 0.11466050148010254,
      "step": 51375
    },
    {
      "epoch": 0.000313568115234375,
      "step": 51375,
      "training_step_time": 0.39330554008483887
    },
    {
      "epoch": 0.00031357421875,
      "model_forward_time": 0.11526870727539062,
      "step": 51376
    },
    {
      "epoch": 0.00031357421875,
      "step": 51376,
      "training_step_time": 0.3937718868255615
    },
    {
      "epoch": 0.000313580322265625,
      "model_forward_time": 0.11461663246154785,
      "step": 51377
    },
    {
      "epoch": 0.000313580322265625,
      "step": 51377,
      "training_step_time": 0.4492487907409668
    },
    {
      "epoch": 0.00031358642578125,
      "model_forward_time": 0.11480975151062012,
      "step": 51378
    },
    {
      "epoch": 0.00031358642578125,
      "step": 51378,
      "training_step_time": 0.5815973281860352
    },
    {
      "epoch": 0.000313592529296875,
      "model_forward_time": 0.11459517478942871,
      "step": 51379
    },
    {
      "epoch": 0.000313592529296875,
      "step": 51379,
      "training_step_time": 0.3889908790588379
    },
    {
      "epoch": 0.0003135986328125,
      "grad_norm": 0.11099623143672943,
      "learning_rate": 5.537582088660937e-06,
      "loss": 0.0343,
      "step": 51380
    },
    {
      "epoch": 0.0003135986328125,
      "model_forward_time": 0.11455464363098145,
      "step": 51380
    },
    {
      "epoch": 0.0003135986328125,
      "step": 51380,
      "training_step_time": 0.39087986946105957
    },
    {
      "epoch": 0.000313604736328125,
      "model_forward_time": 0.11443185806274414,
      "step": 51381
    },
    {
      "epoch": 0.000313604736328125,
      "step": 51381,
      "training_step_time": 0.42818784713745117
    },
    {
      "epoch": 0.00031361083984375,
      "model_forward_time": 0.11466455459594727,
      "step": 51382
    },
    {
      "epoch": 0.00031361083984375,
      "step": 51382,
      "training_step_time": 0.36218976974487305
    },
    {
      "epoch": 0.000313616943359375,
      "model_forward_time": 0.11455202102661133,
      "step": 51383
    },
    {
      "epoch": 0.000313616943359375,
      "step": 51383,
      "training_step_time": 0.4611399173736572
    },
    {
      "epoch": 0.000313623046875,
      "model_forward_time": 0.11471080780029297,
      "step": 51384
    },
    {
      "epoch": 0.000313623046875,
      "step": 51384,
      "training_step_time": 0.48554253578186035
    },
    {
      "epoch": 0.000313629150390625,
      "model_forward_time": 0.11539316177368164,
      "step": 51385
    },
    {
      "epoch": 0.000313629150390625,
      "step": 51385,
      "training_step_time": 0.39896488189697266
    },
    {
      "epoch": 0.00031363525390625,
      "model_forward_time": 0.11424612998962402,
      "step": 51386
    },
    {
      "epoch": 0.00031363525390625,
      "step": 51386,
      "training_step_time": 0.3928987979888916
    },
    {
      "epoch": 0.000313641357421875,
      "model_forward_time": 0.11467361450195312,
      "step": 51387
    },
    {
      "epoch": 0.000313641357421875,
      "step": 51387,
      "training_step_time": 0.49985432624816895
    },
    {
      "epoch": 0.0003136474609375,
      "model_forward_time": 0.11469316482543945,
      "step": 51388
    },
    {
      "epoch": 0.0003136474609375,
      "step": 51388,
      "training_step_time": 0.47102832794189453
    },
    {
      "epoch": 0.000313653564453125,
      "model_forward_time": 0.11466717720031738,
      "step": 51389
    },
    {
      "epoch": 0.000313653564453125,
      "step": 51389,
      "training_step_time": 0.38953304290771484
    },
    {
      "epoch": 0.00031365966796875,
      "grad_norm": 0.10623281449079514,
      "learning_rate": 5.524983210261481e-06,
      "loss": 0.0378,
      "step": 51390
    },
    {
      "epoch": 0.00031365966796875,
      "model_forward_time": 0.11579179763793945,
      "step": 51390
    },
    {
      "epoch": 0.00031365966796875,
      "step": 51390,
      "training_step_time": 0.6160340309143066
    },
    {
      "epoch": 0.000313665771484375,
      "model_forward_time": 0.11473894119262695,
      "step": 51391
    },
    {
      "epoch": 0.000313665771484375,
      "step": 51391,
      "training_step_time": 0.38603830337524414
    },
    {
      "epoch": 0.000313671875,
      "model_forward_time": 0.11446690559387207,
      "step": 51392
    },
    {
      "epoch": 0.000313671875,
      "step": 51392,
      "training_step_time": 0.3865346908569336
    },
    {
      "epoch": 0.000313677978515625,
      "model_forward_time": 0.11467266082763672,
      "step": 51393
    },
    {
      "epoch": 0.000313677978515625,
      "step": 51393,
      "training_step_time": 0.40293264389038086
    },
    {
      "epoch": 0.00031368408203125,
      "model_forward_time": 0.11558413505554199,
      "step": 51394
    },
    {
      "epoch": 0.00031368408203125,
      "step": 51394,
      "training_step_time": 0.3926670551300049
    },
    {
      "epoch": 0.000313690185546875,
      "model_forward_time": 0.11507654190063477,
      "step": 51395
    },
    {
      "epoch": 0.000313690185546875,
      "step": 51395,
      "training_step_time": 0.43239498138427734
    },
    {
      "epoch": 0.0003136962890625,
      "model_forward_time": 0.11495828628540039,
      "step": 51396
    },
    {
      "epoch": 0.0003136962890625,
      "step": 51396,
      "training_step_time": 0.5932843685150146
    },
    {
      "epoch": 0.000313702392578125,
      "model_forward_time": 0.11477112770080566,
      "step": 51397
    },
    {
      "epoch": 0.000313702392578125,
      "step": 51397,
      "training_step_time": 0.48264288902282715
    },
    {
      "epoch": 0.00031370849609375,
      "model_forward_time": 0.11449170112609863,
      "step": 51398
    },
    {
      "epoch": 0.00031370849609375,
      "step": 51398,
      "training_step_time": 0.42795348167419434
    },
    {
      "epoch": 0.000313714599609375,
      "model_forward_time": 0.11444616317749023,
      "step": 51399
    },
    {
      "epoch": 0.000313714599609375,
      "step": 51399,
      "training_step_time": 0.38170695304870605
    },
    {
      "epoch": 0.000313720703125,
      "grad_norm": 0.0854182243347168,
      "learning_rate": 5.512397842197847e-06,
      "loss": 0.0351,
      "step": 51400
    },
    {
      "epoch": 0.000313720703125,
      "model_forward_time": 0.11470818519592285,
      "step": 51400
    },
    {
      "epoch": 0.000313720703125,
      "step": 51400,
      "training_step_time": 0.45330142974853516
    },
    {
      "epoch": 0.000313726806640625,
      "model_forward_time": 0.11433553695678711,
      "step": 51401
    },
    {
      "epoch": 0.000313726806640625,
      "step": 51401,
      "training_step_time": 0.39885616302490234
    },
    {
      "epoch": 0.00031373291015625,
      "model_forward_time": 0.1146402359008789,
      "step": 51402
    },
    {
      "epoch": 0.00031373291015625,
      "step": 51402,
      "training_step_time": 0.5541315078735352
    },
    {
      "epoch": 0.000313739013671875,
      "model_forward_time": 0.11499452590942383,
      "step": 51403
    },
    {
      "epoch": 0.000313739013671875,
      "step": 51403,
      "training_step_time": 0.41661548614501953
    },
    {
      "epoch": 0.0003137451171875,
      "model_forward_time": 0.11577033996582031,
      "step": 51404
    },
    {
      "epoch": 0.0003137451171875,
      "step": 51404,
      "training_step_time": 0.39860033988952637
    },
    {
      "epoch": 0.000313751220703125,
      "model_forward_time": 0.11571621894836426,
      "step": 51405
    },
    {
      "epoch": 0.000313751220703125,
      "step": 51405,
      "training_step_time": 0.3826603889465332
    },
    {
      "epoch": 0.00031375732421875,
      "model_forward_time": 0.11478209495544434,
      "step": 51406
    },
    {
      "epoch": 0.00031375732421875,
      "step": 51406,
      "training_step_time": 0.3890974521636963
    },
    {
      "epoch": 0.000313763427734375,
      "model_forward_time": 0.11458396911621094,
      "step": 51407
    },
    {
      "epoch": 0.000313763427734375,
      "step": 51407,
      "training_step_time": 0.38584017753601074
    },
    {
      "epoch": 0.00031376953125,
      "model_forward_time": 0.11558055877685547,
      "step": 51408
    },
    {
      "epoch": 0.00031376953125,
      "step": 51408,
      "training_step_time": 0.6972968578338623
    },
    {
      "epoch": 0.000313775634765625,
      "model_forward_time": 0.11484599113464355,
      "step": 51409
    },
    {
      "epoch": 0.000313775634765625,
      "step": 51409,
      "training_step_time": 0.4303579330444336
    },
    {
      "epoch": 0.00031378173828125,
      "grad_norm": 0.07819351553916931,
      "learning_rate": 5.499825988293106e-06,
      "loss": 0.035,
      "step": 51410
    },
    {
      "epoch": 0.00031378173828125,
      "model_forward_time": 0.11502623558044434,
      "step": 51410
    },
    {
      "epoch": 0.00031378173828125,
      "step": 51410,
      "training_step_time": 0.48415684700012207
    },
    {
      "epoch": 0.000313787841796875,
      "model_forward_time": 0.11419677734375,
      "step": 51411
    },
    {
      "epoch": 0.000313787841796875,
      "step": 51411,
      "training_step_time": 0.45754575729370117
    },
    {
      "epoch": 0.0003137939453125,
      "model_forward_time": 0.11522555351257324,
      "step": 51412
    },
    {
      "epoch": 0.0003137939453125,
      "step": 51412,
      "training_step_time": 0.39284491539001465
    },
    {
      "epoch": 0.000313800048828125,
      "model_forward_time": 0.11370849609375,
      "step": 51413
    },
    {
      "epoch": 0.000313800048828125,
      "step": 51413,
      "training_step_time": 0.3996095657348633
    },
    {
      "epoch": 0.00031380615234375,
      "model_forward_time": 0.11466598510742188,
      "step": 51414
    },
    {
      "epoch": 0.00031380615234375,
      "step": 51414,
      "training_step_time": 0.4489755630493164
    },
    {
      "epoch": 0.000313812255859375,
      "model_forward_time": 0.11635136604309082,
      "step": 51415
    },
    {
      "epoch": 0.000313812255859375,
      "step": 51415,
      "training_step_time": 0.43453359603881836
    },
    {
      "epoch": 0.000313818359375,
      "model_forward_time": 0.11580514907836914,
      "step": 51416
    },
    {
      "epoch": 0.000313818359375,
      "step": 51416,
      "training_step_time": 0.4735867977142334
    },
    {
      "epoch": 0.000313824462890625,
      "model_forward_time": 0.11519265174865723,
      "step": 51417
    },
    {
      "epoch": 0.000313824462890625,
      "step": 51417,
      "training_step_time": 0.40407776832580566
    },
    {
      "epoch": 0.00031383056640625,
      "model_forward_time": 0.11441683769226074,
      "step": 51418
    },
    {
      "epoch": 0.00031383056640625,
      "step": 51418,
      "training_step_time": 0.38599419593811035
    },
    {
      "epoch": 0.000313836669921875,
      "model_forward_time": 0.1148843765258789,
      "step": 51419
    },
    {
      "epoch": 0.000313836669921875,
      "step": 51419,
      "training_step_time": 0.3801288604736328
    },
    {
      "epoch": 0.0003138427734375,
      "grad_norm": 0.10104944556951523,
      "learning_rate": 5.48726765236629e-06,
      "loss": 0.0393,
      "step": 51420
    },
    {
      "epoch": 0.0003138427734375,
      "model_forward_time": 0.11530566215515137,
      "step": 51420
    },
    {
      "epoch": 0.0003138427734375,
      "step": 51420,
      "training_step_time": 0.5749585628509521
    },
    {
      "epoch": 0.000313848876953125,
      "model_forward_time": 0.11496376991271973,
      "step": 51421
    },
    {
      "epoch": 0.000313848876953125,
      "step": 51421,
      "training_step_time": 0.3954484462738037
    },
    {
      "epoch": 0.00031385498046875,
      "model_forward_time": 0.1150674819946289,
      "step": 51422
    },
    {
      "epoch": 0.00031385498046875,
      "step": 51422,
      "training_step_time": 0.39015936851501465
    },
    {
      "epoch": 0.000313861083984375,
      "model_forward_time": 0.11501002311706543,
      "step": 51423
    },
    {
      "epoch": 0.000313861083984375,
      "step": 51423,
      "training_step_time": 0.46480655670166016
    },
    {
      "epoch": 0.0003138671875,
      "model_forward_time": 0.11533236503601074,
      "step": 51424
    },
    {
      "epoch": 0.0003138671875,
      "step": 51424,
      "training_step_time": 0.49916982650756836
    },
    {
      "epoch": 0.000313873291015625,
      "model_forward_time": 0.11426734924316406,
      "step": 51425
    },
    {
      "epoch": 0.000313873291015625,
      "step": 51425,
      "training_step_time": 0.4912443161010742
    },
    {
      "epoch": 0.00031387939453125,
      "model_forward_time": 0.11474847793579102,
      "step": 51426
    },
    {
      "epoch": 0.00031387939453125,
      "step": 51426,
      "training_step_time": 0.5731701850891113
    },
    {
      "epoch": 0.000313885498046875,
      "model_forward_time": 0.11432242393493652,
      "step": 51427
    },
    {
      "epoch": 0.000313885498046875,
      "step": 51427,
      "training_step_time": 0.3837151527404785
    },
    {
      "epoch": 0.0003138916015625,
      "model_forward_time": 0.11501216888427734,
      "step": 51428
    },
    {
      "epoch": 0.0003138916015625,
      "step": 51428,
      "training_step_time": 0.4128117561340332
    },
    {
      "epoch": 0.000313897705078125,
      "model_forward_time": 0.11440348625183105,
      "step": 51429
    },
    {
      "epoch": 0.000313897705078125,
      "step": 51429,
      "training_step_time": 0.39783477783203125
    },
    {
      "epoch": 0.00031390380859375,
      "grad_norm": 0.09648768603801727,
      "learning_rate": 5.474722838232254e-06,
      "loss": 0.0342,
      "step": 51430
    },
    {
      "epoch": 0.00031390380859375,
      "model_forward_time": 0.11518549919128418,
      "step": 51430
    },
    {
      "epoch": 0.00031390380859375,
      "step": 51430,
      "training_step_time": 0.4536471366882324
    },
    {
      "epoch": 0.000313909912109375,
      "model_forward_time": 0.11543917655944824,
      "step": 51431
    },
    {
      "epoch": 0.000313909912109375,
      "step": 51431,
      "training_step_time": 0.39606356620788574
    },
    {
      "epoch": 0.000313916015625,
      "model_forward_time": 0.11490201950073242,
      "step": 51432
    },
    {
      "epoch": 0.000313916015625,
      "step": 51432,
      "training_step_time": 0.5912606716156006
    },
    {
      "epoch": 0.000313922119140625,
      "model_forward_time": 0.1143496036529541,
      "step": 51433
    },
    {
      "epoch": 0.000313922119140625,
      "step": 51433,
      "training_step_time": 0.39614200592041016
    },
    {
      "epoch": 0.00031392822265625,
      "model_forward_time": 0.11537885665893555,
      "step": 51434
    },
    {
      "epoch": 0.00031392822265625,
      "step": 51434,
      "training_step_time": 0.3909766674041748
    },
    {
      "epoch": 0.000313934326171875,
      "model_forward_time": 0.11532425880432129,
      "step": 51435
    },
    {
      "epoch": 0.000313934326171875,
      "step": 51435,
      "training_step_time": 0.3952000141143799
    },
    {
      "epoch": 0.0003139404296875,
      "model_forward_time": 0.11530733108520508,
      "step": 51436
    },
    {
      "epoch": 0.0003139404296875,
      "step": 51436,
      "training_step_time": 0.3910677433013916
    },
    {
      "epoch": 0.000313946533203125,
      "model_forward_time": 0.11415433883666992,
      "step": 51437
    },
    {
      "epoch": 0.000313946533203125,
      "step": 51437,
      "training_step_time": 0.36491966247558594
    },
    {
      "epoch": 0.00031395263671875,
      "model_forward_time": 0.11672329902648926,
      "step": 51438
    },
    {
      "epoch": 0.00031395263671875,
      "step": 51438,
      "training_step_time": 0.5172843933105469
    },
    {
      "epoch": 0.000313958740234375,
      "model_forward_time": 0.11477446556091309,
      "step": 51439
    },
    {
      "epoch": 0.000313958740234375,
      "step": 51439,
      "training_step_time": 0.46445512771606445
    },
    {
      "epoch": 0.00031396484375,
      "grad_norm": 0.07723883539438248,
      "learning_rate": 5.462191549701806e-06,
      "loss": 0.0355,
      "step": 51440
    },
    {
      "epoch": 0.00031396484375,
      "model_forward_time": 0.11427974700927734,
      "step": 51440
    },
    {
      "epoch": 0.00031396484375,
      "step": 51440,
      "training_step_time": 0.3923368453979492
    },
    {
      "epoch": 0.000313970947265625,
      "model_forward_time": 0.11498832702636719,
      "step": 51441
    },
    {
      "epoch": 0.000313970947265625,
      "step": 51441,
      "training_step_time": 0.38670849800109863
    },
    {
      "epoch": 0.00031397705078125,
      "model_forward_time": 0.1152951717376709,
      "step": 51442
    },
    {
      "epoch": 0.00031397705078125,
      "step": 51442,
      "training_step_time": 0.4467043876647949
    },
    {
      "epoch": 0.000313983154296875,
      "model_forward_time": 0.11481380462646484,
      "step": 51443
    },
    {
      "epoch": 0.000313983154296875,
      "step": 51443,
      "training_step_time": 0.45086216926574707
    },
    {
      "epoch": 0.0003139892578125,
      "model_forward_time": 0.11638808250427246,
      "step": 51444
    },
    {
      "epoch": 0.0003139892578125,
      "step": 51444,
      "training_step_time": 0.5303313732147217
    },
    {
      "epoch": 0.000313995361328125,
      "model_forward_time": 0.11469507217407227,
      "step": 51445
    },
    {
      "epoch": 0.000313995361328125,
      "step": 51445,
      "training_step_time": 0.39273691177368164
    },
    {
      "epoch": 0.00031400146484375,
      "model_forward_time": 0.11532735824584961,
      "step": 51446
    },
    {
      "epoch": 0.00031400146484375,
      "step": 51446,
      "training_step_time": 0.38484764099121094
    },
    {
      "epoch": 0.000314007568359375,
      "model_forward_time": 0.11520695686340332,
      "step": 51447
    },
    {
      "epoch": 0.000314007568359375,
      "step": 51447,
      "training_step_time": 0.38007044792175293
    },
    {
      "epoch": 0.000314013671875,
      "model_forward_time": 0.11540865898132324,
      "step": 51448
    },
    {
      "epoch": 0.000314013671875,
      "step": 51448,
      "training_step_time": 0.3889932632446289
    },
    {
      "epoch": 0.000314019775390625,
      "model_forward_time": 0.11459565162658691,
      "step": 51449
    },
    {
      "epoch": 0.000314019775390625,
      "step": 51449,
      "training_step_time": 0.39015793800354004
    },
    {
      "epoch": 0.00031402587890625,
      "grad_norm": 0.0730753093957901,
      "learning_rate": 5.449673790581611e-06,
      "loss": 0.0334,
      "step": 51450
    },
    {
      "epoch": 0.00031402587890625,
      "model_forward_time": 0.1153109073638916,
      "step": 51450
    },
    {
      "epoch": 0.00031402587890625,
      "step": 51450,
      "training_step_time": 0.7055778503417969
    },
    {
      "epoch": 0.000314031982421875,
      "model_forward_time": 0.11445403099060059,
      "step": 51451
    },
    {
      "epoch": 0.000314031982421875,
      "step": 51451,
      "training_step_time": 0.4183816909790039
    },
    {
      "epoch": 0.0003140380859375,
      "model_forward_time": 0.11523675918579102,
      "step": 51452
    },
    {
      "epoch": 0.0003140380859375,
      "step": 51452,
      "training_step_time": 0.43842005729675293
    },
    {
      "epoch": 0.000314044189453125,
      "model_forward_time": 0.11472082138061523,
      "step": 51453
    },
    {
      "epoch": 0.000314044189453125,
      "step": 51453,
      "training_step_time": 0.45286035537719727
    },
    {
      "epoch": 0.00031405029296875,
      "model_forward_time": 0.11449217796325684,
      "step": 51454
    },
    {
      "epoch": 0.00031405029296875,
      "step": 51454,
      "training_step_time": 0.3940556049346924
    },
    {
      "epoch": 0.000314056396484375,
      "model_forward_time": 0.11475205421447754,
      "step": 51455
    },
    {
      "epoch": 0.000314056396484375,
      "step": 51455,
      "training_step_time": 0.3764352798461914
    },
    {
      "epoch": 0.0003140625,
      "model_forward_time": 0.11562323570251465,
      "step": 51456
    },
    {
      "epoch": 0.0003140625,
      "step": 51456,
      "training_step_time": 0.43546509742736816
    },
    {
      "epoch": 0.000314068603515625,
      "model_forward_time": 0.11538171768188477,
      "step": 51457
    },
    {
      "epoch": 0.000314068603515625,
      "step": 51457,
      "training_step_time": 0.4078092575073242
    },
    {
      "epoch": 0.00031407470703125,
      "model_forward_time": 0.11520981788635254,
      "step": 51458
    },
    {
      "epoch": 0.00031407470703125,
      "step": 51458,
      "training_step_time": 0.4689218997955322
    },
    {
      "epoch": 0.000314080810546875,
      "model_forward_time": 0.11519098281860352,
      "step": 51459
    },
    {
      "epoch": 0.000314080810546875,
      "step": 51459,
      "training_step_time": 0.39507079124450684
    },
    {
      "epoch": 0.0003140869140625,
      "grad_norm": 0.0869048684835434,
      "learning_rate": 5.437169564674233e-06,
      "loss": 0.0329,
      "step": 51460
    },
    {
      "epoch": 0.0003140869140625,
      "model_forward_time": 0.11468148231506348,
      "step": 51460
    },
    {
      "epoch": 0.0003140869140625,
      "step": 51460,
      "training_step_time": 0.3956313133239746
    },
    {
      "epoch": 0.000314093017578125,
      "model_forward_time": 0.11474990844726562,
      "step": 51461
    },
    {
      "epoch": 0.000314093017578125,
      "step": 51461,
      "training_step_time": 0.3892476558685303
    },
    {
      "epoch": 0.00031409912109375,
      "model_forward_time": 0.11500000953674316,
      "step": 51462
    },
    {
      "epoch": 0.00031409912109375,
      "step": 51462,
      "training_step_time": 0.6178562641143799
    },
    {
      "epoch": 0.000314105224609375,
      "model_forward_time": 0.11441755294799805,
      "step": 51463
    },
    {
      "epoch": 0.000314105224609375,
      "step": 51463,
      "training_step_time": 0.39846229553222656
    },
    {
      "epoch": 0.000314111328125,
      "model_forward_time": 0.11578059196472168,
      "step": 51464
    },
    {
      "epoch": 0.000314111328125,
      "step": 51464,
      "training_step_time": 0.3926877975463867
    },
    {
      "epoch": 0.000314117431640625,
      "model_forward_time": 0.11484432220458984,
      "step": 51465
    },
    {
      "epoch": 0.000314117431640625,
      "step": 51465,
      "training_step_time": 0.425203800201416
    },
    {
      "epoch": 0.00031412353515625,
      "model_forward_time": 0.11461997032165527,
      "step": 51466
    },
    {
      "epoch": 0.00031412353515625,
      "step": 51466,
      "training_step_time": 0.3940255641937256
    },
    {
      "epoch": 0.000314129638671875,
      "model_forward_time": 0.11464405059814453,
      "step": 51467
    },
    {
      "epoch": 0.000314129638671875,
      "step": 51467,
      "training_step_time": 0.45049405097961426
    },
    {
      "epoch": 0.0003141357421875,
      "model_forward_time": 0.11449790000915527,
      "step": 51468
    },
    {
      "epoch": 0.0003141357421875,
      "step": 51468,
      "training_step_time": 0.5738530158996582
    },
    {
      "epoch": 0.000314141845703125,
      "model_forward_time": 0.11517024040222168,
      "step": 51469
    },
    {
      "epoch": 0.000314141845703125,
      "step": 51469,
      "training_step_time": 0.3961470127105713
    },
    {
      "epoch": 0.00031414794921875,
      "grad_norm": 0.10638997703790665,
      "learning_rate": 5.424678875778133e-06,
      "loss": 0.0364,
      "step": 51470
    },
    {
      "epoch": 0.00031414794921875,
      "model_forward_time": 0.11464214324951172,
      "step": 51470
    },
    {
      "epoch": 0.00031414794921875,
      "step": 51470,
      "training_step_time": 0.3898460865020752
    },
    {
      "epoch": 0.000314154052734375,
      "model_forward_time": 0.115142822265625,
      "step": 51471
    },
    {
      "epoch": 0.000314154052734375,
      "step": 51471,
      "training_step_time": 0.4539334774017334
    },
    {
      "epoch": 0.00031416015625,
      "model_forward_time": 0.11406731605529785,
      "step": 51472
    },
    {
      "epoch": 0.00031416015625,
      "step": 51472,
      "training_step_time": 0.4849216938018799
    },
    {
      "epoch": 0.000314166259765625,
      "model_forward_time": 0.11477947235107422,
      "step": 51473
    },
    {
      "epoch": 0.000314166259765625,
      "step": 51473,
      "training_step_time": 0.3870983123779297
    },
    {
      "epoch": 0.00031417236328125,
      "model_forward_time": 0.1174464225769043,
      "step": 51474
    },
    {
      "epoch": 0.00031417236328125,
      "step": 51474,
      "training_step_time": 0.5666570663452148
    },
    {
      "epoch": 0.000314178466796875,
      "model_forward_time": 0.11434531211853027,
      "step": 51475
    },
    {
      "epoch": 0.000314178466796875,
      "step": 51475,
      "training_step_time": 0.3960590362548828
    },
    {
      "epoch": 0.0003141845703125,
      "model_forward_time": 0.11418652534484863,
      "step": 51476
    },
    {
      "epoch": 0.0003141845703125,
      "step": 51476,
      "training_step_time": 0.4049520492553711
    },
    {
      "epoch": 0.000314190673828125,
      "model_forward_time": 0.11462807655334473,
      "step": 51477
    },
    {
      "epoch": 0.000314190673828125,
      "step": 51477,
      "training_step_time": 0.4006154537200928
    },
    {
      "epoch": 0.00031419677734375,
      "model_forward_time": 0.11605501174926758,
      "step": 51478
    },
    {
      "epoch": 0.00031419677734375,
      "step": 51478,
      "training_step_time": 0.39929914474487305
    },
    {
      "epoch": 0.000314202880859375,
      "model_forward_time": 0.11437010765075684,
      "step": 51479
    },
    {
      "epoch": 0.000314202880859375,
      "step": 51479,
      "training_step_time": 0.3664085865020752
    },
    {
      "epoch": 0.000314208984375,
      "grad_norm": 0.10065706819295883,
      "learning_rate": 5.412201727687644e-06,
      "loss": 0.0371,
      "step": 51480
    },
    {
      "epoch": 0.000314208984375,
      "model_forward_time": 0.11500692367553711,
      "step": 51480
    },
    {
      "epoch": 0.000314208984375,
      "step": 51480,
      "training_step_time": 0.6056764125823975
    },
    {
      "epoch": 0.000314215087890625,
      "model_forward_time": 0.11422109603881836,
      "step": 51481
    },
    {
      "epoch": 0.000314215087890625,
      "step": 51481,
      "training_step_time": 0.4516592025756836
    },
    {
      "epoch": 0.00031422119140625,
      "model_forward_time": 0.11449742317199707,
      "step": 51482
    },
    {
      "epoch": 0.00031422119140625,
      "step": 51482,
      "training_step_time": 0.41132092475891113
    },
    {
      "epoch": 0.000314227294921875,
      "model_forward_time": 0.11445307731628418,
      "step": 51483
    },
    {
      "epoch": 0.000314227294921875,
      "step": 51483,
      "training_step_time": 0.39565229415893555
    },
    {
      "epoch": 0.0003142333984375,
      "model_forward_time": 0.11482739448547363,
      "step": 51484
    },
    {
      "epoch": 0.0003142333984375,
      "step": 51484,
      "training_step_time": 0.396578311920166
    },
    {
      "epoch": 0.000314239501953125,
      "model_forward_time": 0.11429762840270996,
      "step": 51485
    },
    {
      "epoch": 0.000314239501953125,
      "step": 51485,
      "training_step_time": 0.4570937156677246
    },
    {
      "epoch": 0.00031424560546875,
      "model_forward_time": 0.11508631706237793,
      "step": 51486
    },
    {
      "epoch": 0.00031424560546875,
      "step": 51486,
      "training_step_time": 0.6298046112060547
    },
    {
      "epoch": 0.000314251708984375,
      "model_forward_time": 0.11468625068664551,
      "step": 51487
    },
    {
      "epoch": 0.000314251708984375,
      "step": 51487,
      "training_step_time": 0.39171290397644043
    },
    {
      "epoch": 0.0003142578125,
      "model_forward_time": 0.11513662338256836,
      "step": 51488
    },
    {
      "epoch": 0.0003142578125,
      "step": 51488,
      "training_step_time": 0.4021918773651123
    },
    {
      "epoch": 0.000314263916015625,
      "model_forward_time": 0.11507034301757812,
      "step": 51489
    },
    {
      "epoch": 0.000314263916015625,
      "step": 51489,
      "training_step_time": 0.39745616912841797
    },
    {
      "epoch": 0.00031427001953125,
      "grad_norm": 0.11706726253032684,
      "learning_rate": 5.399738124192988e-06,
      "loss": 0.0371,
      "step": 51490
    },
    {
      "epoch": 0.00031427001953125,
      "model_forward_time": 0.11463093757629395,
      "step": 51490
    },
    {
      "epoch": 0.00031427001953125,
      "step": 51490,
      "training_step_time": 0.3950357437133789
    },
    {
      "epoch": 0.000314276123046875,
      "model_forward_time": 0.11449718475341797,
      "step": 51491
    },
    {
      "epoch": 0.000314276123046875,
      "step": 51491,
      "training_step_time": 0.39667201042175293
    },
    {
      "epoch": 0.0003142822265625,
      "model_forward_time": 0.1148989200592041,
      "step": 51492
    },
    {
      "epoch": 0.0003142822265625,
      "step": 51492,
      "training_step_time": 0.6233096122741699
    },
    {
      "epoch": 0.000314288330078125,
      "model_forward_time": 0.1149137020111084,
      "step": 51493
    },
    {
      "epoch": 0.000314288330078125,
      "step": 51493,
      "training_step_time": 0.47460103034973145
    },
    {
      "epoch": 0.00031429443359375,
      "model_forward_time": 0.1146402359008789,
      "step": 51494
    },
    {
      "epoch": 0.00031429443359375,
      "step": 51494,
      "training_step_time": 0.4915950298309326
    },
    {
      "epoch": 0.000314300537109375,
      "model_forward_time": 0.1140906810760498,
      "step": 51495
    },
    {
      "epoch": 0.000314300537109375,
      "step": 51495,
      "training_step_time": 0.3997921943664551
    },
    {
      "epoch": 0.000314306640625,
      "model_forward_time": 0.11486077308654785,
      "step": 51496
    },
    {
      "epoch": 0.000314306640625,
      "step": 51496,
      "training_step_time": 0.3876631259918213
    },
    {
      "epoch": 0.000314312744140625,
      "model_forward_time": 0.11438751220703125,
      "step": 51497
    },
    {
      "epoch": 0.000314312744140625,
      "step": 51497,
      "training_step_time": 0.3950314521789551
    },
    {
      "epoch": 0.00031431884765625,
      "model_forward_time": 0.11450695991516113,
      "step": 51498
    },
    {
      "epoch": 0.00031431884765625,
      "step": 51498,
      "training_step_time": 0.47725868225097656
    },
    {
      "epoch": 0.000314324951171875,
      "model_forward_time": 0.11432242393493652,
      "step": 51499
    },
    {
      "epoch": 0.000314324951171875,
      "step": 51499,
      "training_step_time": 0.4089336395263672
    },
    {
      "epoch": 0.0003143310546875,
      "grad_norm": 0.0844893753528595,
      "learning_rate": 5.387288069080299e-06,
      "loss": 0.0308,
      "step": 51500
    },
    {
      "epoch": 0.0003143310546875,
      "model_forward_time": 0.1147315502166748,
      "step": 51500
    },
    {
      "epoch": 0.0003143310546875,
      "step": 51500,
      "training_step_time": 0.402172327041626
    },
    {
      "epoch": 0.000314337158203125,
      "model_forward_time": 0.11521124839782715,
      "step": 51501
    },
    {
      "epoch": 0.000314337158203125,
      "step": 51501,
      "training_step_time": 0.4072258472442627
    },
    {
      "epoch": 0.00031434326171875,
      "model_forward_time": 0.11432814598083496,
      "step": 51502
    },
    {
      "epoch": 0.00031434326171875,
      "step": 51502,
      "training_step_time": 0.3958873748779297
    },
    {
      "epoch": 0.000314349365234375,
      "model_forward_time": 0.11562466621398926,
      "step": 51503
    },
    {
      "epoch": 0.000314349365234375,
      "step": 51503,
      "training_step_time": 0.4025609493255615
    },
    {
      "epoch": 0.00031435546875,
      "model_forward_time": 0.1150810718536377,
      "step": 51504
    },
    {
      "epoch": 0.00031435546875,
      "step": 51504,
      "training_step_time": 0.6569714546203613
    },
    {
      "epoch": 0.000314361572265625,
      "model_forward_time": 0.11406874656677246,
      "step": 51505
    },
    {
      "epoch": 0.000314361572265625,
      "step": 51505,
      "training_step_time": 0.39847373962402344
    },
    {
      "epoch": 0.00031436767578125,
      "model_forward_time": 0.11472010612487793,
      "step": 51506
    },
    {
      "epoch": 0.00031436767578125,
      "step": 51506,
      "training_step_time": 0.39614343643188477
    },
    {
      "epoch": 0.000314373779296875,
      "model_forward_time": 0.11479997634887695,
      "step": 51507
    },
    {
      "epoch": 0.000314373779296875,
      "step": 51507,
      "training_step_time": 0.4341773986816406
    },
    {
      "epoch": 0.0003143798828125,
      "model_forward_time": 0.11637043952941895,
      "step": 51508
    },
    {
      "epoch": 0.0003143798828125,
      "step": 51508,
      "training_step_time": 0.4898183345794678
    },
    {
      "epoch": 0.000314385986328125,
      "model_forward_time": 0.11478638648986816,
      "step": 51509
    },
    {
      "epoch": 0.000314385986328125,
      "step": 51509,
      "training_step_time": 0.4136381149291992
    },
    {
      "epoch": 0.00031439208984375,
      "grad_norm": 0.07996238023042679,
      "learning_rate": 5.374851566131561e-06,
      "loss": 0.0332,
      "step": 51510
    },
    {
      "epoch": 0.00031439208984375,
      "model_forward_time": 0.11504769325256348,
      "step": 51510
    },
    {
      "epoch": 0.00031439208984375,
      "step": 51510,
      "training_step_time": 0.6120555400848389
    },
    {
      "epoch": 0.000314398193359375,
      "model_forward_time": 0.1143798828125,
      "step": 51511
    },
    {
      "epoch": 0.000314398193359375,
      "step": 51511,
      "training_step_time": 0.3965444564819336
    },
    {
      "epoch": 0.000314404296875,
      "model_forward_time": 0.11545014381408691,
      "step": 51512
    },
    {
      "epoch": 0.000314404296875,
      "step": 51512,
      "training_step_time": 0.4382622241973877
    },
    {
      "epoch": 0.000314410400390625,
      "model_forward_time": 0.11438250541687012,
      "step": 51513
    },
    {
      "epoch": 0.000314410400390625,
      "step": 51513,
      "training_step_time": 0.4048330783843994
    },
    {
      "epoch": 0.00031441650390625,
      "model_forward_time": 0.11484241485595703,
      "step": 51514
    },
    {
      "epoch": 0.00031441650390625,
      "step": 51514,
      "training_step_time": 0.4472026824951172
    },
    {
      "epoch": 0.000314422607421875,
      "model_forward_time": 0.11402297019958496,
      "step": 51515
    },
    {
      "epoch": 0.000314422607421875,
      "step": 51515,
      "training_step_time": 0.38641858100891113
    },
    {
      "epoch": 0.0003144287109375,
      "model_forward_time": 0.11507415771484375,
      "step": 51516
    },
    {
      "epoch": 0.0003144287109375,
      "step": 51516,
      "training_step_time": 0.5242700576782227
    },
    {
      "epoch": 0.000314434814453125,
      "model_forward_time": 0.11486649513244629,
      "step": 51517
    },
    {
      "epoch": 0.000314434814453125,
      "step": 51517,
      "training_step_time": 0.3958590030670166
    },
    {
      "epoch": 0.00031444091796875,
      "model_forward_time": 0.11484599113464355,
      "step": 51518
    },
    {
      "epoch": 0.00031444091796875,
      "step": 51518,
      "training_step_time": 0.39589524269104004
    },
    {
      "epoch": 0.000314447021484375,
      "model_forward_time": 0.11532211303710938,
      "step": 51519
    },
    {
      "epoch": 0.000314447021484375,
      "step": 51519,
      "training_step_time": 0.4009113311767578
    },
    {
      "epoch": 0.000314453125,
      "grad_norm": 0.10679854452610016,
      "learning_rate": 5.362428619124666e-06,
      "loss": 0.0354,
      "step": 51520
    },
    {
      "epoch": 0.000314453125,
      "model_forward_time": 0.1148841381072998,
      "step": 51520
    },
    {
      "epoch": 0.000314453125,
      "step": 51520,
      "training_step_time": 0.41633176803588867
    },
    {
      "epoch": 0.000314459228515625,
      "model_forward_time": 0.11883163452148438,
      "step": 51521
    },
    {
      "epoch": 0.000314459228515625,
      "step": 51521,
      "training_step_time": 0.39081597328186035
    },
    {
      "epoch": 0.00031446533203125,
      "model_forward_time": 0.11908698081970215,
      "step": 51522
    },
    {
      "epoch": 0.00031446533203125,
      "step": 51522,
      "training_step_time": 0.6682374477386475
    },
    {
      "epoch": 0.000314471435546875,
      "model_forward_time": 0.12141752243041992,
      "step": 51523
    },
    {
      "epoch": 0.000314471435546875,
      "step": 51523,
      "training_step_time": 0.4288618564605713
    },
    {
      "epoch": 0.0003144775390625,
      "model_forward_time": 0.1165018081665039,
      "step": 51524
    },
    {
      "epoch": 0.0003144775390625,
      "step": 51524,
      "training_step_time": 0.38501501083374023
    },
    {
      "epoch": 0.000314483642578125,
      "model_forward_time": 0.1152796745300293,
      "step": 51525
    },
    {
      "epoch": 0.000314483642578125,
      "step": 51525,
      "training_step_time": 0.3927924633026123
    },
    {
      "epoch": 0.00031448974609375,
      "model_forward_time": 0.11484646797180176,
      "step": 51526
    },
    {
      "epoch": 0.00031448974609375,
      "step": 51526,
      "training_step_time": 0.4599902629852295
    },
    {
      "epoch": 0.000314495849609375,
      "model_forward_time": 0.11430907249450684,
      "step": 51527
    },
    {
      "epoch": 0.000314495849609375,
      "step": 51527,
      "training_step_time": 0.4273521900177002
    },
    {
      "epoch": 0.000314501953125,
      "model_forward_time": 0.11523032188415527,
      "step": 51528
    },
    {
      "epoch": 0.000314501953125,
      "step": 51528,
      "training_step_time": 0.4987976551055908
    },
    {
      "epoch": 0.000314508056640625,
      "model_forward_time": 0.11507153511047363,
      "step": 51529
    },
    {
      "epoch": 0.000314508056640625,
      "step": 51529,
      "training_step_time": 0.4000728130340576
    },
    {
      "epoch": 0.00031451416015625,
      "grad_norm": 0.08304520696401596,
      "learning_rate": 5.350019231833364e-06,
      "loss": 0.0354,
      "step": 51530
    },
    {
      "epoch": 0.00031451416015625,
      "model_forward_time": 0.11445164680480957,
      "step": 51530
    },
    {
      "epoch": 0.00031451416015625,
      "step": 51530,
      "training_step_time": 0.3926386833190918
    },
    {
      "epoch": 0.000314520263671875,
      "model_forward_time": 0.11512112617492676,
      "step": 51531
    },
    {
      "epoch": 0.000314520263671875,
      "step": 51531,
      "training_step_time": 0.38306140899658203
    },
    {
      "epoch": 0.0003145263671875,
      "model_forward_time": 0.11506986618041992,
      "step": 51532
    },
    {
      "epoch": 0.0003145263671875,
      "step": 51532,
      "training_step_time": 0.39248013496398926
    },
    {
      "epoch": 0.000314532470703125,
      "model_forward_time": 0.11575126647949219,
      "step": 51533
    },
    {
      "epoch": 0.000314532470703125,
      "step": 51533,
      "training_step_time": 0.3854389190673828
    },
    {
      "epoch": 0.00031453857421875,
      "model_forward_time": 0.11562895774841309,
      "step": 51534
    },
    {
      "epoch": 0.00031453857421875,
      "step": 51534,
      "training_step_time": 0.6671042442321777
    },
    {
      "epoch": 0.000314544677734375,
      "model_forward_time": 0.11502313613891602,
      "step": 51535
    },
    {
      "epoch": 0.000314544677734375,
      "step": 51535,
      "training_step_time": 0.3693056106567383
    },
    {
      "epoch": 0.00031455078125,
      "model_forward_time": 0.11494660377502441,
      "step": 51536
    },
    {
      "epoch": 0.00031455078125,
      "step": 51536,
      "training_step_time": 0.46602416038513184
    },
    {
      "epoch": 0.000314556884765625,
      "model_forward_time": 0.11487174034118652,
      "step": 51537
    },
    {
      "epoch": 0.000314556884765625,
      "step": 51537,
      "training_step_time": 0.4253268241882324
    },
    {
      "epoch": 0.00031456298828125,
      "model_forward_time": 0.11458563804626465,
      "step": 51538
    },
    {
      "epoch": 0.00031456298828125,
      "step": 51538,
      "training_step_time": 0.38490867614746094
    },
    {
      "epoch": 0.000314569091796875,
      "model_forward_time": 0.11486363410949707,
      "step": 51539
    },
    {
      "epoch": 0.000314569091796875,
      "step": 51539,
      "training_step_time": 0.383181095123291
    },
    {
      "epoch": 0.0003145751953125,
      "grad_norm": 0.11228396743535995,
      "learning_rate": 5.337623408027293e-06,
      "loss": 0.0363,
      "step": 51540
    },
    {
      "epoch": 0.0003145751953125,
      "model_forward_time": 0.11524629592895508,
      "step": 51540
    },
    {
      "epoch": 0.0003145751953125,
      "step": 51540,
      "training_step_time": 0.6510353088378906
    },
    {
      "epoch": 0.000314581298828125,
      "model_forward_time": 0.11442756652832031,
      "step": 51541
    },
    {
      "epoch": 0.000314581298828125,
      "step": 51541,
      "training_step_time": 0.48340940475463867
    },
    {
      "epoch": 0.00031458740234375,
      "model_forward_time": 0.11440396308898926,
      "step": 51542
    },
    {
      "epoch": 0.00031458740234375,
      "step": 51542,
      "training_step_time": 0.3914070129394531
    },
    {
      "epoch": 0.000314593505859375,
      "model_forward_time": 0.11475539207458496,
      "step": 51543
    },
    {
      "epoch": 0.000314593505859375,
      "step": 51543,
      "training_step_time": 0.39556884765625
    },
    {
      "epoch": 0.000314599609375,
      "model_forward_time": 0.1142737865447998,
      "step": 51544
    },
    {
      "epoch": 0.000314599609375,
      "step": 51544,
      "training_step_time": 0.3782463073730469
    },
    {
      "epoch": 0.000314605712890625,
      "model_forward_time": 0.11472153663635254,
      "step": 51545
    },
    {
      "epoch": 0.000314605712890625,
      "step": 51545,
      "training_step_time": 0.38625359535217285
    },
    {
      "epoch": 0.00031461181640625,
      "model_forward_time": 0.11564183235168457,
      "step": 51546
    },
    {
      "epoch": 0.00031461181640625,
      "step": 51546,
      "training_step_time": 0.649634599685669
    },
    {
      "epoch": 0.000314617919921875,
      "model_forward_time": 0.11430191993713379,
      "step": 51547
    },
    {
      "epoch": 0.000314617919921875,
      "step": 51547,
      "training_step_time": 0.40667223930358887
    },
    {
      "epoch": 0.0003146240234375,
      "model_forward_time": 0.11539244651794434,
      "step": 51548
    },
    {
      "epoch": 0.0003146240234375,
      "step": 51548,
      "training_step_time": 0.4031834602355957
    },
    {
      "epoch": 0.000314630126953125,
      "model_forward_time": 0.11543059349060059,
      "step": 51549
    },
    {
      "epoch": 0.000314630126953125,
      "step": 51549,
      "training_step_time": 0.40753865242004395
    },
    {
      "epoch": 0.00031463623046875,
      "grad_norm": 0.12320857495069504,
      "learning_rate": 5.32524115147201e-06,
      "loss": 0.0347,
      "step": 51550
    },
    {
      "epoch": 0.00031463623046875,
      "model_forward_time": 0.11771798133850098,
      "step": 51550
    },
    {
      "epoch": 0.00031463623046875,
      "step": 51550,
      "training_step_time": 0.5218505859375
    },
    {
      "epoch": 0.000314642333984375,
      "model_forward_time": 0.11845207214355469,
      "step": 51551
    },
    {
      "epoch": 0.000314642333984375,
      "step": 51551,
      "training_step_time": 0.4773409366607666
    },
    {
      "epoch": 0.0003146484375,
      "model_forward_time": 0.11951828002929688,
      "step": 51552
    },
    {
      "epoch": 0.0003146484375,
      "step": 51552,
      "training_step_time": 0.6946067810058594
    },
    {
      "epoch": 0.000314654541015625,
      "model_forward_time": 0.1170499324798584,
      "step": 51553
    },
    {
      "epoch": 0.000314654541015625,
      "step": 51553,
      "training_step_time": 0.7219913005828857
    },
    {
      "epoch": 0.00031466064453125,
      "model_forward_time": 0.1280686855316162,
      "step": 51554
    },
    {
      "epoch": 0.00031466064453125,
      "step": 51554,
      "training_step_time": 0.7620480060577393
    },
    {
      "epoch": 0.000314666748046875,
      "model_forward_time": 0.11898159980773926,
      "step": 51555
    },
    {
      "epoch": 0.000314666748046875,
      "step": 51555,
      "training_step_time": 0.650688886642456
    },
    {
      "epoch": 0.0003146728515625,
      "model_forward_time": 0.11899185180664062,
      "step": 51556
    },
    {
      "epoch": 0.0003146728515625,
      "step": 51556,
      "training_step_time": 0.612159013748169
    },
    {
      "epoch": 0.000314678955078125,
      "model_forward_time": 0.1184701919555664,
      "step": 51557
    },
    {
      "epoch": 0.000314678955078125,
      "step": 51557,
      "training_step_time": 0.6470012664794922
    },
    {
      "epoch": 0.00031468505859375,
      "model_forward_time": 0.12237286567687988,
      "step": 51558
    },
    {
      "epoch": 0.00031468505859375,
      "step": 51558,
      "training_step_time": 0.7487139701843262
    },
    {
      "epoch": 0.000314691162109375,
      "model_forward_time": 0.11888766288757324,
      "step": 51559
    },
    {
      "epoch": 0.000314691162109375,
      "step": 51559,
      "training_step_time": 0.5857305526733398
    },
    {
      "epoch": 0.000314697265625,
      "grad_norm": 0.17102596163749695,
      "learning_rate": 5.312872465928881e-06,
      "loss": 0.037,
      "step": 51560
    },
    {
      "epoch": 0.000314697265625,
      "model_forward_time": 0.11881566047668457,
      "step": 51560
    },
    {
      "epoch": 0.000314697265625,
      "step": 51560,
      "training_step_time": 0.7169010639190674
    },
    {
      "epoch": 0.000314703369140625,
      "model_forward_time": 0.11724114418029785,
      "step": 51561
    },
    {
      "epoch": 0.000314703369140625,
      "step": 51561,
      "training_step_time": 0.6430618762969971
    },
    {
      "epoch": 0.00031470947265625,
      "model_forward_time": 0.11796975135803223,
      "step": 51562
    },
    {
      "epoch": 0.00031470947265625,
      "step": 51562,
      "training_step_time": 0.6604583263397217
    },
    {
      "epoch": 0.000314715576171875,
      "model_forward_time": 0.13026094436645508,
      "step": 51563
    },
    {
      "epoch": 0.000314715576171875,
      "step": 51563,
      "training_step_time": 0.6392652988433838
    },
    {
      "epoch": 0.0003147216796875,
      "model_forward_time": 0.12389969825744629,
      "step": 51564
    },
    {
      "epoch": 0.0003147216796875,
      "step": 51564,
      "training_step_time": 0.6903591156005859
    },
    {
      "epoch": 0.000314727783203125,
      "model_forward_time": 0.1201314926147461,
      "step": 51565
    },
    {
      "epoch": 0.000314727783203125,
      "step": 51565,
      "training_step_time": 0.6744909286499023
    },
    {
      "epoch": 0.00031473388671875,
      "model_forward_time": 0.1172332763671875,
      "step": 51566
    },
    {
      "epoch": 0.00031473388671875,
      "step": 51566,
      "training_step_time": 0.6951792240142822
    },
    {
      "epoch": 0.000314739990234375,
      "model_forward_time": 0.11977910995483398,
      "step": 51567
    },
    {
      "epoch": 0.000314739990234375,
      "step": 51567,
      "training_step_time": 0.7385380268096924
    },
    {
      "epoch": 0.00031474609375,
      "model_forward_time": 0.11984109878540039,
      "step": 51568
    },
    {
      "epoch": 0.00031474609375,
      "step": 51568,
      "training_step_time": 0.7247703075408936
    },
    {
      "epoch": 0.000314752197265625,
      "model_forward_time": 0.12466716766357422,
      "step": 51569
    },
    {
      "epoch": 0.000314752197265625,
      "step": 51569,
      "training_step_time": 0.684706449508667
    },
    {
      "epoch": 0.00031475830078125,
      "grad_norm": 0.09538518637418747,
      "learning_rate": 5.300517355155215e-06,
      "loss": 0.0395,
      "step": 51570
    },
    {
      "epoch": 0.00031475830078125,
      "model_forward_time": 0.12486982345581055,
      "step": 51570
    },
    {
      "epoch": 0.00031475830078125,
      "step": 51570,
      "training_step_time": 0.7612636089324951
    },
    {
      "epoch": 0.000314764404296875,
      "model_forward_time": 0.11863541603088379,
      "step": 51571
    },
    {
      "epoch": 0.000314764404296875,
      "step": 51571,
      "training_step_time": 0.685039758682251
    },
    {
      "epoch": 0.0003147705078125,
      "model_forward_time": 0.11957383155822754,
      "step": 51572
    },
    {
      "epoch": 0.0003147705078125,
      "step": 51572,
      "training_step_time": 0.6001112461090088
    },
    {
      "epoch": 0.000314776611328125,
      "model_forward_time": 0.11688375473022461,
      "step": 51573
    },
    {
      "epoch": 0.000314776611328125,
      "step": 51573,
      "training_step_time": 0.7414321899414062
    },
    {
      "epoch": 0.00031478271484375,
      "model_forward_time": 0.11700105667114258,
      "step": 51574
    },
    {
      "epoch": 0.00031478271484375,
      "step": 51574,
      "training_step_time": 0.6701784133911133
    },
    {
      "epoch": 0.000314788818359375,
      "model_forward_time": 0.11598515510559082,
      "step": 51575
    },
    {
      "epoch": 0.000314788818359375,
      "step": 51575,
      "training_step_time": 0.6520535945892334
    },
    {
      "epoch": 0.000314794921875,
      "model_forward_time": 0.11714982986450195,
      "step": 51576
    },
    {
      "epoch": 0.000314794921875,
      "step": 51576,
      "training_step_time": 0.6867034435272217
    },
    {
      "epoch": 0.000314801025390625,
      "model_forward_time": 0.11954832077026367,
      "step": 51577
    },
    {
      "epoch": 0.000314801025390625,
      "step": 51577,
      "training_step_time": 0.7193787097930908
    },
    {
      "epoch": 0.00031480712890625,
      "model_forward_time": 0.12449860572814941,
      "step": 51578
    },
    {
      "epoch": 0.00031480712890625,
      "step": 51578,
      "training_step_time": 0.6910324096679688
    },
    {
      "epoch": 0.000314813232421875,
      "model_forward_time": 0.11838245391845703,
      "step": 51579
    },
    {
      "epoch": 0.000314813232421875,
      "step": 51579,
      "training_step_time": 0.687535285949707
    },
    {
      "epoch": 0.0003148193359375,
      "grad_norm": 0.13766418397426605,
      "learning_rate": 5.28817582290414e-06,
      "loss": 0.0456,
      "step": 51580
    },
    {
      "epoch": 0.0003148193359375,
      "model_forward_time": 0.12167477607727051,
      "step": 51580
    },
    {
      "epoch": 0.0003148193359375,
      "step": 51580,
      "training_step_time": 0.6873617172241211
    },
    {
      "epoch": 0.000314825439453125,
      "model_forward_time": 0.1186516284942627,
      "step": 51581
    },
    {
      "epoch": 0.000314825439453125,
      "step": 51581,
      "training_step_time": 0.6639034748077393
    },
    {
      "epoch": 0.00031483154296875,
      "model_forward_time": 0.11767125129699707,
      "step": 51582
    },
    {
      "epoch": 0.00031483154296875,
      "step": 51582,
      "training_step_time": 0.7039308547973633
    },
    {
      "epoch": 0.000314837646484375,
      "model_forward_time": 0.11843681335449219,
      "step": 51583
    },
    {
      "epoch": 0.000314837646484375,
      "step": 51583,
      "training_step_time": 0.7308297157287598
    },
    {
      "epoch": 0.00031484375,
      "model_forward_time": 0.11941742897033691,
      "step": 51584
    },
    {
      "epoch": 0.00031484375,
      "step": 51584,
      "training_step_time": 0.6448268890380859
    },
    {
      "epoch": 0.000314849853515625,
      "model_forward_time": 0.11877560615539551,
      "step": 51585
    },
    {
      "epoch": 0.000314849853515625,
      "step": 51585,
      "training_step_time": 0.6760473251342773
    },
    {
      "epoch": 0.00031485595703125,
      "model_forward_time": 0.12164068222045898,
      "step": 51586
    },
    {
      "epoch": 0.00031485595703125,
      "step": 51586,
      "training_step_time": 0.7139706611633301
    },
    {
      "epoch": 0.000314862060546875,
      "model_forward_time": 0.1197209358215332,
      "step": 51587
    },
    {
      "epoch": 0.000314862060546875,
      "step": 51587,
      "training_step_time": 0.7221980094909668
    },
    {
      "epoch": 0.0003148681640625,
      "model_forward_time": 0.11646723747253418,
      "step": 51588
    },
    {
      "epoch": 0.0003148681640625,
      "step": 51588,
      "training_step_time": 0.6635653972625732
    },
    {
      "epoch": 0.000314874267578125,
      "model_forward_time": 0.1206052303314209,
      "step": 51589
    },
    {
      "epoch": 0.000314874267578125,
      "step": 51589,
      "training_step_time": 0.6030995845794678
    },
    {
      "epoch": 0.00031488037109375,
      "grad_norm": 0.08331765234470367,
      "learning_rate": 5.2758478729247164e-06,
      "loss": 0.039,
      "step": 51590
    },
    {
      "epoch": 0.00031488037109375,
      "model_forward_time": 0.1304764747619629,
      "step": 51590
    },
    {
      "epoch": 0.00031488037109375,
      "step": 51590,
      "training_step_time": 0.7190501689910889
    },
    {
      "epoch": 0.000314886474609375,
      "model_forward_time": 0.12060332298278809,
      "step": 51591
    },
    {
      "epoch": 0.000314886474609375,
      "step": 51591,
      "training_step_time": 0.6874978542327881
    },
    {
      "epoch": 0.000314892578125,
      "model_forward_time": 0.12000131607055664,
      "step": 51592
    },
    {
      "epoch": 0.000314892578125,
      "step": 51592,
      "training_step_time": 0.7042422294616699
    },
    {
      "epoch": 0.000314898681640625,
      "model_forward_time": 0.12749648094177246,
      "step": 51593
    },
    {
      "epoch": 0.000314898681640625,
      "step": 51593,
      "training_step_time": 0.6595146656036377
    },
    {
      "epoch": 0.00031490478515625,
      "model_forward_time": 0.11941409111022949,
      "step": 51594
    },
    {
      "epoch": 0.00031490478515625,
      "step": 51594,
      "training_step_time": 0.688805103302002
    },
    {
      "epoch": 0.000314910888671875,
      "model_forward_time": 0.11654186248779297,
      "step": 51595
    },
    {
      "epoch": 0.000314910888671875,
      "step": 51595,
      "training_step_time": 0.6549427509307861
    },
    {
      "epoch": 0.0003149169921875,
      "model_forward_time": 0.12398719787597656,
      "step": 51596
    },
    {
      "epoch": 0.0003149169921875,
      "step": 51596,
      "training_step_time": 0.5877401828765869
    },
    {
      "epoch": 0.000314923095703125,
      "model_forward_time": 0.12152743339538574,
      "step": 51597
    },
    {
      "epoch": 0.000314923095703125,
      "step": 51597,
      "training_step_time": 0.7156193256378174
    },
    {
      "epoch": 0.00031492919921875,
      "model_forward_time": 0.12208986282348633,
      "step": 51598
    },
    {
      "epoch": 0.00031492919921875,
      "step": 51598,
      "training_step_time": 0.7238039970397949
    },
    {
      "epoch": 0.000314935302734375,
      "model_forward_time": 0.12299418449401855,
      "step": 51599
    },
    {
      "epoch": 0.000314935302734375,
      "step": 51599,
      "training_step_time": 0.6571905612945557
    },
    {
      "epoch": 0.00031494140625,
      "grad_norm": 0.11337517946958542,
      "learning_rate": 5.263533508961827e-06,
      "loss": 0.0386,
      "step": 51600
    },
    {
      "epoch": 0.00031494140625,
      "model_forward_time": 0.1226806640625,
      "step": 51600
    },
    {
      "epoch": 0.00031494140625,
      "step": 51600,
      "training_step_time": 0.6589515209197998
    },
    {
      "epoch": 0.000314947509765625,
      "model_forward_time": 0.12805461883544922,
      "step": 51601
    },
    {
      "epoch": 0.000314947509765625,
      "step": 51601,
      "training_step_time": 0.7324423789978027
    },
    {
      "epoch": 0.00031495361328125,
      "model_forward_time": 0.11655664443969727,
      "step": 51602
    },
    {
      "epoch": 0.00031495361328125,
      "step": 51602,
      "training_step_time": 0.5870993137359619
    },
    {
      "epoch": 0.000314959716796875,
      "model_forward_time": 0.12284040451049805,
      "step": 51603
    },
    {
      "epoch": 0.000314959716796875,
      "step": 51603,
      "training_step_time": 0.6496374607086182
    },
    {
      "epoch": 0.0003149658203125,
      "model_forward_time": 0.12108731269836426,
      "step": 51604
    },
    {
      "epoch": 0.0003149658203125,
      "step": 51604,
      "training_step_time": 0.6252942085266113
    },
    {
      "epoch": 0.000314971923828125,
      "model_forward_time": 0.11690688133239746,
      "step": 51605
    },
    {
      "epoch": 0.000314971923828125,
      "step": 51605,
      "training_step_time": 0.6500587463378906
    },
    {
      "epoch": 0.00031497802734375,
      "model_forward_time": 0.11822795867919922,
      "step": 51606
    },
    {
      "epoch": 0.00031497802734375,
      "step": 51606,
      "training_step_time": 0.6979193687438965
    },
    {
      "epoch": 0.000314984130859375,
      "model_forward_time": 0.11872625350952148,
      "step": 51607
    },
    {
      "epoch": 0.000314984130859375,
      "step": 51607,
      "training_step_time": 0.6716055870056152
    },
    {
      "epoch": 0.000314990234375,
      "model_forward_time": 0.12079071998596191,
      "step": 51608
    },
    {
      "epoch": 0.000314990234375,
      "step": 51608,
      "training_step_time": 0.6340527534484863
    },
    {
      "epoch": 0.000314996337890625,
      "model_forward_time": 0.12138795852661133,
      "step": 51609
    },
    {
      "epoch": 0.000314996337890625,
      "step": 51609,
      "training_step_time": 0.7044196128845215
    },
    {
      "epoch": 0.00031500244140625,
      "grad_norm": 0.08600929379463196,
      "learning_rate": 5.251232734756268e-06,
      "loss": 0.0403,
      "step": 51610
    },
    {
      "epoch": 0.00031500244140625,
      "model_forward_time": 0.1187276840209961,
      "step": 51610
    },
    {
      "epoch": 0.00031500244140625,
      "step": 51610,
      "training_step_time": 0.6779565811157227
    },
    {
      "epoch": 0.000315008544921875,
      "model_forward_time": 0.12071371078491211,
      "step": 51611
    },
    {
      "epoch": 0.000315008544921875,
      "step": 51611,
      "training_step_time": 0.6703152656555176
    },
    {
      "epoch": 0.0003150146484375,
      "model_forward_time": 0.12016725540161133,
      "step": 51612
    },
    {
      "epoch": 0.0003150146484375,
      "step": 51612,
      "training_step_time": 0.6383841037750244
    },
    {
      "epoch": 0.000315020751953125,
      "model_forward_time": 0.12032508850097656,
      "step": 51613
    },
    {
      "epoch": 0.000315020751953125,
      "step": 51613,
      "training_step_time": 0.6860380172729492
    },
    {
      "epoch": 0.00031502685546875,
      "model_forward_time": 0.12793874740600586,
      "step": 51614
    },
    {
      "epoch": 0.00031502685546875,
      "step": 51614,
      "training_step_time": 0.6787939071655273
    },
    {
      "epoch": 0.000315032958984375,
      "model_forward_time": 0.12563371658325195,
      "step": 51615
    },
    {
      "epoch": 0.000315032958984375,
      "step": 51615,
      "training_step_time": 0.5966205596923828
    },
    {
      "epoch": 0.0003150390625,
      "model_forward_time": 0.1198129653930664,
      "step": 51616
    },
    {
      "epoch": 0.0003150390625,
      "step": 51616,
      "training_step_time": 0.6838102340698242
    },
    {
      "epoch": 0.000315045166015625,
      "model_forward_time": 0.12068605422973633,
      "step": 51617
    },
    {
      "epoch": 0.000315045166015625,
      "step": 51617,
      "training_step_time": 0.6073777675628662
    },
    {
      "epoch": 0.00031505126953125,
      "model_forward_time": 0.12245297431945801,
      "step": 51618
    },
    {
      "epoch": 0.00031505126953125,
      "step": 51618,
      "training_step_time": 0.5627589225769043
    },
    {
      "epoch": 0.000315057373046875,
      "model_forward_time": 0.11922001838684082,
      "step": 51619
    },
    {
      "epoch": 0.000315057373046875,
      "step": 51619,
      "training_step_time": 0.5788822174072266
    },
    {
      "epoch": 0.0003150634765625,
      "grad_norm": 0.07403764128684998,
      "learning_rate": 5.238945554044672e-06,
      "loss": 0.0326,
      "step": 51620
    },
    {
      "epoch": 0.0003150634765625,
      "model_forward_time": 0.1233668327331543,
      "step": 51620
    },
    {
      "epoch": 0.0003150634765625,
      "step": 51620,
      "training_step_time": 0.6172537803649902
    },
    {
      "epoch": 0.000315069580078125,
      "model_forward_time": 0.12010812759399414,
      "step": 51621
    },
    {
      "epoch": 0.000315069580078125,
      "step": 51621,
      "training_step_time": 0.6646168231964111
    },
    {
      "epoch": 0.00031507568359375,
      "model_forward_time": 0.11643147468566895,
      "step": 51622
    },
    {
      "epoch": 0.00031507568359375,
      "step": 51622,
      "training_step_time": 0.5829455852508545
    },
    {
      "epoch": 0.000315081787109375,
      "model_forward_time": 0.11655640602111816,
      "step": 51623
    },
    {
      "epoch": 0.000315081787109375,
      "step": 51623,
      "training_step_time": 0.4773979187011719
    },
    {
      "epoch": 0.000315087890625,
      "model_forward_time": 0.11474084854125977,
      "step": 51624
    },
    {
      "epoch": 0.000315087890625,
      "step": 51624,
      "training_step_time": 0.4590592384338379
    },
    {
      "epoch": 0.000315093994140625,
      "model_forward_time": 0.11484575271606445,
      "step": 51625
    },
    {
      "epoch": 0.000315093994140625,
      "step": 51625,
      "training_step_time": 0.5216100215911865
    },
    {
      "epoch": 0.00031510009765625,
      "model_forward_time": 0.11527705192565918,
      "step": 51626
    },
    {
      "epoch": 0.00031510009765625,
      "step": 51626,
      "training_step_time": 0.429363489151001
    },
    {
      "epoch": 0.000315106201171875,
      "model_forward_time": 0.11455368995666504,
      "step": 51627
    },
    {
      "epoch": 0.000315106201171875,
      "step": 51627,
      "training_step_time": 0.36942338943481445
    },
    {
      "epoch": 0.0003151123046875,
      "model_forward_time": 0.1148996353149414,
      "step": 51628
    },
    {
      "epoch": 0.0003151123046875,
      "step": 51628,
      "training_step_time": 0.4492008686065674
    },
    {
      "epoch": 0.000315118408203125,
      "model_forward_time": 0.11499738693237305,
      "step": 51629
    },
    {
      "epoch": 0.000315118408203125,
      "step": 51629,
      "training_step_time": 0.43201279640197754
    },
    {
      "epoch": 0.00031512451171875,
      "grad_norm": 0.08645808696746826,
      "learning_rate": 5.226671970559577e-06,
      "loss": 0.0349,
      "step": 51630
    },
    {
      "epoch": 0.00031512451171875,
      "model_forward_time": 0.1152944564819336,
      "step": 51630
    },
    {
      "epoch": 0.00031512451171875,
      "step": 51630,
      "training_step_time": 0.403475284576416
    },
    {
      "epoch": 0.000315130615234375,
      "model_forward_time": 0.11460137367248535,
      "step": 51631
    },
    {
      "epoch": 0.000315130615234375,
      "step": 51631,
      "training_step_time": 0.4080617427825928
    },
    {
      "epoch": 0.00031513671875,
      "model_forward_time": 0.11582803726196289,
      "step": 51632
    },
    {
      "epoch": 0.00031513671875,
      "step": 51632,
      "training_step_time": 0.3948493003845215
    },
    {
      "epoch": 0.000315142822265625,
      "model_forward_time": 0.11479806900024414,
      "step": 51633
    },
    {
      "epoch": 0.000315142822265625,
      "step": 51633,
      "training_step_time": 0.4031062126159668
    },
    {
      "epoch": 0.00031514892578125,
      "model_forward_time": 0.11628055572509766,
      "step": 51634
    },
    {
      "epoch": 0.00031514892578125,
      "step": 51634,
      "training_step_time": 0.4201364517211914
    },
    {
      "epoch": 0.000315155029296875,
      "model_forward_time": 0.11458802223205566,
      "step": 51635
    },
    {
      "epoch": 0.000315155029296875,
      "step": 51635,
      "training_step_time": 0.39592576026916504
    },
    {
      "epoch": 0.0003151611328125,
      "model_forward_time": 0.1162099838256836,
      "step": 51636
    },
    {
      "epoch": 0.0003151611328125,
      "step": 51636,
      "training_step_time": 0.4388141632080078
    },
    {
      "epoch": 0.000315167236328125,
      "model_forward_time": 0.11480140686035156,
      "step": 51637
    },
    {
      "epoch": 0.000315167236328125,
      "step": 51637,
      "training_step_time": 0.3917360305786133
    },
    {
      "epoch": 0.00031517333984375,
      "model_forward_time": 0.11461448669433594,
      "step": 51638
    },
    {
      "epoch": 0.00031517333984375,
      "step": 51638,
      "training_step_time": 0.40866589546203613
    },
    {
      "epoch": 0.000315179443359375,
      "model_forward_time": 0.11454105377197266,
      "step": 51639
    },
    {
      "epoch": 0.000315179443359375,
      "step": 51639,
      "training_step_time": 0.38799548149108887
    },
    {
      "epoch": 0.000315185546875,
      "grad_norm": 0.0839393213391304,
      "learning_rate": 5.214411988029355e-06,
      "loss": 0.0408,
      "step": 51640
    },
    {
      "epoch": 0.000315185546875,
      "model_forward_time": 0.11523604393005371,
      "step": 51640
    },
    {
      "epoch": 0.000315185546875,
      "step": 51640,
      "training_step_time": 0.4525175094604492
    },
    {
      "epoch": 0.000315191650390625,
      "model_forward_time": 0.11518311500549316,
      "step": 51641
    },
    {
      "epoch": 0.000315191650390625,
      "step": 51641,
      "training_step_time": 0.4237401485443115
    },
    {
      "epoch": 0.00031519775390625,
      "model_forward_time": 0.11400723457336426,
      "step": 51642
    },
    {
      "epoch": 0.00031519775390625,
      "step": 51642,
      "training_step_time": 0.47768306732177734
    },
    {
      "epoch": 0.000315203857421875,
      "model_forward_time": 0.11570310592651367,
      "step": 51643
    },
    {
      "epoch": 0.000315203857421875,
      "step": 51643,
      "training_step_time": 0.40530824661254883
    },
    {
      "epoch": 0.0003152099609375,
      "model_forward_time": 0.11471152305603027,
      "step": 51644
    },
    {
      "epoch": 0.0003152099609375,
      "step": 51644,
      "training_step_time": 0.4067566394805908
    },
    {
      "epoch": 0.000315216064453125,
      "model_forward_time": 0.11485576629638672,
      "step": 51645
    },
    {
      "epoch": 0.000315216064453125,
      "step": 51645,
      "training_step_time": 0.4355349540710449
    },
    {
      "epoch": 0.00031522216796875,
      "model_forward_time": 0.11486959457397461,
      "step": 51646
    },
    {
      "epoch": 0.00031522216796875,
      "step": 51646,
      "training_step_time": 0.4013810157775879
    },
    {
      "epoch": 0.000315228271484375,
      "model_forward_time": 0.11434674263000488,
      "step": 51647
    },
    {
      "epoch": 0.000315228271484375,
      "step": 51647,
      "training_step_time": 0.3929753303527832
    },
    {
      "epoch": 0.000315234375,
      "model_forward_time": 0.11525535583496094,
      "step": 51648
    },
    {
      "epoch": 0.000315234375,
      "step": 51648,
      "training_step_time": 0.39227890968322754
    },
    {
      "epoch": 0.000315240478515625,
      "model_forward_time": 0.1147770881652832,
      "step": 51649
    },
    {
      "epoch": 0.000315240478515625,
      "step": 51649,
      "training_step_time": 0.39318084716796875
    },
    {
      "epoch": 0.00031524658203125,
      "grad_norm": 0.08278723806142807,
      "learning_rate": 5.202165610178284e-06,
      "loss": 0.038,
      "step": 51650
    },
    {
      "epoch": 0.00031524658203125,
      "model_forward_time": 0.11538124084472656,
      "step": 51650
    },
    {
      "epoch": 0.00031524658203125,
      "step": 51650,
      "training_step_time": 0.40869617462158203
    },
    {
      "epoch": 0.000315252685546875,
      "model_forward_time": 0.11530566215515137,
      "step": 51651
    },
    {
      "epoch": 0.000315252685546875,
      "step": 51651,
      "training_step_time": 0.39730215072631836
    },
    {
      "epoch": 0.0003152587890625,
      "model_forward_time": 0.11558175086975098,
      "step": 51652
    },
    {
      "epoch": 0.0003152587890625,
      "step": 51652,
      "training_step_time": 0.38814711570739746
    },
    {
      "epoch": 0.000315264892578125,
      "model_forward_time": 0.11535477638244629,
      "step": 51653
    },
    {
      "epoch": 0.000315264892578125,
      "step": 51653,
      "training_step_time": 0.38918232917785645
    },
    {
      "epoch": 0.00031527099609375,
      "model_forward_time": 0.11579251289367676,
      "step": 51654
    },
    {
      "epoch": 0.00031527099609375,
      "step": 51654,
      "training_step_time": 0.39554858207702637
    },
    {
      "epoch": 0.000315277099609375,
      "model_forward_time": 0.11552882194519043,
      "step": 51655
    },
    {
      "epoch": 0.000315277099609375,
      "step": 51655,
      "training_step_time": 0.4519801139831543
    },
    {
      "epoch": 0.000315283203125,
      "model_forward_time": 0.11624741554260254,
      "step": 51656
    },
    {
      "epoch": 0.000315283203125,
      "step": 51656,
      "training_step_time": 0.4369943141937256
    },
    {
      "epoch": 0.000315289306640625,
      "model_forward_time": 0.11511945724487305,
      "step": 51657
    },
    {
      "epoch": 0.000315289306640625,
      "step": 51657,
      "training_step_time": 0.5109314918518066
    },
    {
      "epoch": 0.00031529541015625,
      "model_forward_time": 0.11541938781738281,
      "step": 51658
    },
    {
      "epoch": 0.00031529541015625,
      "step": 51658,
      "training_step_time": 0.5056617259979248
    },
    {
      "epoch": 0.000315301513671875,
      "model_forward_time": 0.11476492881774902,
      "step": 51659
    },
    {
      "epoch": 0.000315301513671875,
      "step": 51659,
      "training_step_time": 0.4381725788116455
    },
    {
      "epoch": 0.0003153076171875,
      "grad_norm": 0.08439329266548157,
      "learning_rate": 5.1899328407264855e-06,
      "loss": 0.0341,
      "step": 51660
    },
    {
      "epoch": 0.0003153076171875,
      "model_forward_time": 0.11457967758178711,
      "step": 51660
    },
    {
      "epoch": 0.0003153076171875,
      "step": 51660,
      "training_step_time": 0.4135925769805908
    },
    {
      "epoch": 0.000315313720703125,
      "model_forward_time": 0.1150047779083252,
      "step": 51661
    },
    {
      "epoch": 0.000315313720703125,
      "step": 51661,
      "training_step_time": 0.3920478820800781
    },
    {
      "epoch": 0.00031531982421875,
      "model_forward_time": 0.11475753784179688,
      "step": 51662
    },
    {
      "epoch": 0.00031531982421875,
      "step": 51662,
      "training_step_time": 0.3957509994506836
    },
    {
      "epoch": 0.000315325927734375,
      "model_forward_time": 0.11481189727783203,
      "step": 51663
    },
    {
      "epoch": 0.000315325927734375,
      "step": 51663,
      "training_step_time": 0.39565253257751465
    },
    {
      "epoch": 0.00031533203125,
      "model_forward_time": 0.11518526077270508,
      "step": 51664
    },
    {
      "epoch": 0.00031533203125,
      "step": 51664,
      "training_step_time": 0.4453103542327881
    },
    {
      "epoch": 0.000315338134765625,
      "model_forward_time": 0.11511754989624023,
      "step": 51665
    },
    {
      "epoch": 0.000315338134765625,
      "step": 51665,
      "training_step_time": 0.4982621669769287
    },
    {
      "epoch": 0.00031534423828125,
      "model_forward_time": 0.11480093002319336,
      "step": 51666
    },
    {
      "epoch": 0.00031534423828125,
      "step": 51666,
      "training_step_time": 0.4009072780609131
    },
    {
      "epoch": 0.000315350341796875,
      "model_forward_time": 0.11621499061584473,
      "step": 51667
    },
    {
      "epoch": 0.000315350341796875,
      "step": 51667,
      "training_step_time": 0.3970043659210205
    },
    {
      "epoch": 0.0003153564453125,
      "model_forward_time": 0.11480951309204102,
      "step": 51668
    },
    {
      "epoch": 0.0003153564453125,
      "step": 51668,
      "training_step_time": 0.3915369510650635
    },
    {
      "epoch": 0.000315362548828125,
      "model_forward_time": 0.11512041091918945,
      "step": 51669
    },
    {
      "epoch": 0.000315362548828125,
      "step": 51669,
      "training_step_time": 0.39006638526916504
    },
    {
      "epoch": 0.00031536865234375,
      "grad_norm": 0.1099155843257904,
      "learning_rate": 5.177713683389945e-06,
      "loss": 0.0407,
      "step": 51670
    },
    {
      "epoch": 0.00031536865234375,
      "model_forward_time": 0.11685442924499512,
      "step": 51670
    },
    {
      "epoch": 0.00031536865234375,
      "step": 51670,
      "training_step_time": 0.4089221954345703
    },
    {
      "epoch": 0.000315374755859375,
      "model_forward_time": 0.11554336547851562,
      "step": 51671
    },
    {
      "epoch": 0.000315374755859375,
      "step": 51671,
      "training_step_time": 0.47875332832336426
    },
    {
      "epoch": 0.000315380859375,
      "model_forward_time": 0.11625480651855469,
      "step": 51672
    },
    {
      "epoch": 0.000315380859375,
      "step": 51672,
      "training_step_time": 0.4009366035461426
    },
    {
      "epoch": 0.000315386962890625,
      "model_forward_time": 0.11584258079528809,
      "step": 51673
    },
    {
      "epoch": 0.000315386962890625,
      "step": 51673,
      "training_step_time": 0.43192338943481445
    },
    {
      "epoch": 0.00031539306640625,
      "model_forward_time": 0.11544013023376465,
      "step": 51674
    },
    {
      "epoch": 0.00031539306640625,
      "step": 51674,
      "training_step_time": 0.39401769638061523
    },
    {
      "epoch": 0.000315399169921875,
      "model_forward_time": 0.11514115333557129,
      "step": 51675
    },
    {
      "epoch": 0.000315399169921875,
      "step": 51675,
      "training_step_time": 0.39611291885375977
    },
    {
      "epoch": 0.0003154052734375,
      "model_forward_time": 0.11507415771484375,
      "step": 51676
    },
    {
      "epoch": 0.0003154052734375,
      "step": 51676,
      "training_step_time": 0.3897221088409424
    },
    {
      "epoch": 0.000315411376953125,
      "model_forward_time": 0.11602425575256348,
      "step": 51677
    },
    {
      "epoch": 0.000315411376953125,
      "step": 51677,
      "training_step_time": 0.3930330276489258
    },
    {
      "epoch": 0.00031541748046875,
      "model_forward_time": 0.11530590057373047,
      "step": 51678
    },
    {
      "epoch": 0.00031541748046875,
      "step": 51678,
      "training_step_time": 0.4159364700317383
    },
    {
      "epoch": 0.000315423583984375,
      "model_forward_time": 0.11499357223510742,
      "step": 51679
    },
    {
      "epoch": 0.000315423583984375,
      "step": 51679,
      "training_step_time": 0.42402148246765137
    },
    {
      "epoch": 0.0003154296875,
      "grad_norm": 0.07461179792881012,
      "learning_rate": 5.165508141880526e-06,
      "loss": 0.0404,
      "step": 51680
    },
    {
      "epoch": 0.0003154296875,
      "model_forward_time": 0.11527419090270996,
      "step": 51680
    },
    {
      "epoch": 0.0003154296875,
      "step": 51680,
      "training_step_time": 0.4349076747894287
    },
    {
      "epoch": 0.000315435791015625,
      "model_forward_time": 0.11559271812438965,
      "step": 51681
    },
    {
      "epoch": 0.000315435791015625,
      "step": 51681,
      "training_step_time": 0.3928544521331787
    },
    {
      "epoch": 0.00031544189453125,
      "model_forward_time": 0.11503744125366211,
      "step": 51682
    },
    {
      "epoch": 0.00031544189453125,
      "step": 51682,
      "training_step_time": 0.39626240730285645
    },
    {
      "epoch": 0.000315447998046875,
      "model_forward_time": 0.1152334213256836,
      "step": 51683
    },
    {
      "epoch": 0.000315447998046875,
      "step": 51683,
      "training_step_time": 0.39291977882385254
    },
    {
      "epoch": 0.0003154541015625,
      "model_forward_time": 0.11524128913879395,
      "step": 51684
    },
    {
      "epoch": 0.0003154541015625,
      "step": 51684,
      "training_step_time": 0.5131480693817139
    },
    {
      "epoch": 0.000315460205078125,
      "model_forward_time": 0.11533212661743164,
      "step": 51685
    },
    {
      "epoch": 0.000315460205078125,
      "step": 51685,
      "training_step_time": 0.36489248275756836
    },
    {
      "epoch": 0.00031546630859375,
      "model_forward_time": 0.11568832397460938,
      "step": 51686
    },
    {
      "epoch": 0.00031546630859375,
      "step": 51686,
      "training_step_time": 0.4817681312561035
    },
    {
      "epoch": 0.000315472412109375,
      "model_forward_time": 0.11495757102966309,
      "step": 51687
    },
    {
      "epoch": 0.000315472412109375,
      "step": 51687,
      "training_step_time": 0.45111846923828125
    },
    {
      "epoch": 0.000315478515625,
      "model_forward_time": 0.11479544639587402,
      "step": 51688
    },
    {
      "epoch": 0.000315478515625,
      "step": 51688,
      "training_step_time": 0.45572566986083984
    },
    {
      "epoch": 0.000315484619140625,
      "model_forward_time": 0.11537575721740723,
      "step": 51689
    },
    {
      "epoch": 0.000315484619140625,
      "step": 51689,
      "training_step_time": 0.39287447929382324
    },
    {
      "epoch": 0.00031549072265625,
      "grad_norm": 0.08344002813100815,
      "learning_rate": 5.153316219905946e-06,
      "loss": 0.0348,
      "step": 51690
    },
    {
      "epoch": 0.00031549072265625,
      "model_forward_time": 0.11442375183105469,
      "step": 51690
    },
    {
      "epoch": 0.00031549072265625,
      "step": 51690,
      "training_step_time": 0.3944523334503174
    },
    {
      "epoch": 0.000315496826171875,
      "model_forward_time": 0.11572480201721191,
      "step": 51691
    },
    {
      "epoch": 0.000315496826171875,
      "step": 51691,
      "training_step_time": 0.3852252960205078
    },
    {
      "epoch": 0.0003155029296875,
      "model_forward_time": 0.11512017250061035,
      "step": 51692
    },
    {
      "epoch": 0.0003155029296875,
      "step": 51692,
      "training_step_time": 0.5007545948028564
    },
    {
      "epoch": 0.000315509033203125,
      "model_forward_time": 0.11570453643798828,
      "step": 51693
    },
    {
      "epoch": 0.000315509033203125,
      "step": 51693,
      "training_step_time": 0.4395129680633545
    },
    {
      "epoch": 0.00031551513671875,
      "model_forward_time": 0.11537885665893555,
      "step": 51694
    },
    {
      "epoch": 0.00031551513671875,
      "step": 51694,
      "training_step_time": 0.5034592151641846
    },
    {
      "epoch": 0.000315521240234375,
      "model_forward_time": 0.11469721794128418,
      "step": 51695
    },
    {
      "epoch": 0.000315521240234375,
      "step": 51695,
      "training_step_time": 0.3858051300048828
    },
    {
      "epoch": 0.00031552734375,
      "model_forward_time": 0.11466598510742188,
      "step": 51696
    },
    {
      "epoch": 0.00031552734375,
      "step": 51696,
      "training_step_time": 0.38274478912353516
    },
    {
      "epoch": 0.000315533447265625,
      "model_forward_time": 0.11459827423095703,
      "step": 51697
    },
    {
      "epoch": 0.000315533447265625,
      "step": 51697,
      "training_step_time": 0.387847900390625
    },
    {
      "epoch": 0.00031553955078125,
      "model_forward_time": 0.11458158493041992,
      "step": 51698
    },
    {
      "epoch": 0.00031553955078125,
      "step": 51698,
      "training_step_time": 0.5518598556518555
    },
    {
      "epoch": 0.000315545654296875,
      "model_forward_time": 0.11465573310852051,
      "step": 51699
    },
    {
      "epoch": 0.000315545654296875,
      "step": 51699,
      "training_step_time": 0.4115867614746094
    },
    {
      "epoch": 0.0003155517578125,
      "grad_norm": 0.07663615792989731,
      "learning_rate": 5.141137921169792e-06,
      "loss": 0.037,
      "step": 51700
    },
    {
      "epoch": 0.0003155517578125,
      "model_forward_time": 0.11497616767883301,
      "step": 51700
    },
    {
      "epoch": 0.0003155517578125,
      "step": 51700,
      "training_step_time": 0.5086920261383057
    },
    {
      "epoch": 0.000315557861328125,
      "model_forward_time": 0.11473202705383301,
      "step": 51701
    },
    {
      "epoch": 0.000315557861328125,
      "step": 51701,
      "training_step_time": 0.49814343452453613
    },
    {
      "epoch": 0.00031556396484375,
      "model_forward_time": 0.11406517028808594,
      "step": 51702
    },
    {
      "epoch": 0.00031556396484375,
      "step": 51702,
      "training_step_time": 0.3815498352050781
    },
    {
      "epoch": 0.000315570068359375,
      "model_forward_time": 0.11498904228210449,
      "step": 51703
    },
    {
      "epoch": 0.000315570068359375,
      "step": 51703,
      "training_step_time": 0.3930661678314209
    },
    {
      "epoch": 0.000315576171875,
      "model_forward_time": 0.11383533477783203,
      "step": 51704
    },
    {
      "epoch": 0.000315576171875,
      "step": 51704,
      "training_step_time": 0.3983187675476074
    },
    {
      "epoch": 0.000315582275390625,
      "model_forward_time": 0.1148231029510498,
      "step": 51705
    },
    {
      "epoch": 0.000315582275390625,
      "step": 51705,
      "training_step_time": 0.44002223014831543
    },
    {
      "epoch": 0.00031558837890625,
      "model_forward_time": 0.11541271209716797,
      "step": 51706
    },
    {
      "epoch": 0.00031558837890625,
      "step": 51706,
      "training_step_time": 0.4268641471862793
    },
    {
      "epoch": 0.000315594482421875,
      "model_forward_time": 0.11500859260559082,
      "step": 51707
    },
    {
      "epoch": 0.000315594482421875,
      "step": 51707,
      "training_step_time": 0.43279600143432617
    },
    {
      "epoch": 0.0003156005859375,
      "model_forward_time": 0.11515212059020996,
      "step": 51708
    },
    {
      "epoch": 0.0003156005859375,
      "step": 51708,
      "training_step_time": 0.5066916942596436
    },
    {
      "epoch": 0.000315606689453125,
      "model_forward_time": 0.11531281471252441,
      "step": 51709
    },
    {
      "epoch": 0.000315606689453125,
      "step": 51709,
      "training_step_time": 0.39589810371398926
    },
    {
      "epoch": 0.00031561279296875,
      "grad_norm": 0.1333664059638977,
      "learning_rate": 5.128973249371494e-06,
      "loss": 0.0371,
      "step": 51710
    },
    {
      "epoch": 0.00031561279296875,
      "model_forward_time": 0.11433982849121094,
      "step": 51710
    },
    {
      "epoch": 0.00031561279296875,
      "step": 51710,
      "training_step_time": 0.3882410526275635
    },
    {
      "epoch": 0.000315618896484375,
      "model_forward_time": 0.1146245002746582,
      "step": 51711
    },
    {
      "epoch": 0.000315618896484375,
      "step": 51711,
      "training_step_time": 0.5168213844299316
    },
    {
      "epoch": 0.000315625,
      "model_forward_time": 0.11480021476745605,
      "step": 51712
    },
    {
      "epoch": 0.000315625,
      "step": 51712,
      "training_step_time": 0.4347412586212158
    },
    {
      "epoch": 0.000315631103515625,
      "model_forward_time": 0.11514639854431152,
      "step": 51713
    },
    {
      "epoch": 0.000315631103515625,
      "step": 51713,
      "training_step_time": 0.416456937789917
    },
    {
      "epoch": 0.00031563720703125,
      "model_forward_time": 0.11501431465148926,
      "step": 51714
    },
    {
      "epoch": 0.00031563720703125,
      "step": 51714,
      "training_step_time": 0.458479642868042
    },
    {
      "epoch": 0.000315643310546875,
      "model_forward_time": 0.11521410942077637,
      "step": 51715
    },
    {
      "epoch": 0.000315643310546875,
      "step": 51715,
      "training_step_time": 0.48724889755249023
    },
    {
      "epoch": 0.0003156494140625,
      "model_forward_time": 0.11578941345214844,
      "step": 51716
    },
    {
      "epoch": 0.0003156494140625,
      "step": 51716,
      "training_step_time": 0.39300537109375
    },
    {
      "epoch": 0.000315655517578125,
      "model_forward_time": 0.11476874351501465,
      "step": 51717
    },
    {
      "epoch": 0.000315655517578125,
      "step": 51717,
      "training_step_time": 0.41375112533569336
    },
    {
      "epoch": 0.00031566162109375,
      "model_forward_time": 0.11473727226257324,
      "step": 51718
    },
    {
      "epoch": 0.00031566162109375,
      "step": 51718,
      "training_step_time": 0.39052510261535645
    },
    {
      "epoch": 0.000315667724609375,
      "model_forward_time": 0.11530828475952148,
      "step": 51719
    },
    {
      "epoch": 0.000315667724609375,
      "step": 51719,
      "training_step_time": 0.38513612747192383
    },
    {
      "epoch": 0.000315673828125,
      "grad_norm": 0.114388607442379,
      "learning_rate": 5.116822208206396e-06,
      "loss": 0.0407,
      "step": 51720
    },
    {
      "epoch": 0.000315673828125,
      "model_forward_time": 0.1152493953704834,
      "step": 51720
    },
    {
      "epoch": 0.000315673828125,
      "step": 51720,
      "training_step_time": 0.39629268646240234
    },
    {
      "epoch": 0.000315679931640625,
      "model_forward_time": 0.11491918563842773,
      "step": 51721
    },
    {
      "epoch": 0.000315679931640625,
      "step": 51721,
      "training_step_time": 0.4040641784667969
    },
    {
      "epoch": 0.00031568603515625,
      "model_forward_time": 0.11523723602294922,
      "step": 51722
    },
    {
      "epoch": 0.00031568603515625,
      "step": 51722,
      "training_step_time": 0.39334583282470703
    },
    {
      "epoch": 0.000315692138671875,
      "model_forward_time": 0.11539912223815918,
      "step": 51723
    },
    {
      "epoch": 0.000315692138671875,
      "step": 51723,
      "training_step_time": 0.6996827125549316
    },
    {
      "epoch": 0.0003156982421875,
      "model_forward_time": 0.11471271514892578,
      "step": 51724
    },
    {
      "epoch": 0.0003156982421875,
      "step": 51724,
      "training_step_time": 0.3855757713317871
    },
    {
      "epoch": 0.000315704345703125,
      "model_forward_time": 0.11528301239013672,
      "step": 51725
    },
    {
      "epoch": 0.000315704345703125,
      "step": 51725,
      "training_step_time": 0.38761448860168457
    },
    {
      "epoch": 0.00031571044921875,
      "model_forward_time": 0.11480927467346191,
      "step": 51726
    },
    {
      "epoch": 0.00031571044921875,
      "step": 51726,
      "training_step_time": 0.4682126045227051
    },
    {
      "epoch": 0.000315716552734375,
      "model_forward_time": 0.1149899959564209,
      "step": 51727
    },
    {
      "epoch": 0.000315716552734375,
      "step": 51727,
      "training_step_time": 0.40320897102355957
    },
    {
      "epoch": 0.00031572265625,
      "model_forward_time": 0.1148996353149414,
      "step": 51728
    },
    {
      "epoch": 0.00031572265625,
      "step": 51728,
      "training_step_time": 0.3939204216003418
    },
    {
      "epoch": 0.000315728759765625,
      "model_forward_time": 0.11535191535949707,
      "step": 51729
    },
    {
      "epoch": 0.000315728759765625,
      "step": 51729,
      "training_step_time": 0.47614312171936035
    },
    {
      "epoch": 0.00031573486328125,
      "grad_norm": 0.09451719373464584,
      "learning_rate": 5.1046848013656165e-06,
      "loss": 0.0348,
      "step": 51730
    },
    {
      "epoch": 0.00031573486328125,
      "model_forward_time": 0.11525774002075195,
      "step": 51730
    },
    {
      "epoch": 0.00031573486328125,
      "step": 51730,
      "training_step_time": 0.3957352638244629
    },
    {
      "epoch": 0.000315740966796875,
      "model_forward_time": 0.11842870712280273,
      "step": 51731
    },
    {
      "epoch": 0.000315740966796875,
      "step": 51731,
      "training_step_time": 0.3773622512817383
    },
    {
      "epoch": 0.0003157470703125,
      "model_forward_time": 0.11851143836975098,
      "step": 51732
    },
    {
      "epoch": 0.0003157470703125,
      "step": 51732,
      "training_step_time": 0.3911893367767334
    },
    {
      "epoch": 0.000315753173828125,
      "model_forward_time": 0.11483502388000488,
      "step": 51733
    },
    {
      "epoch": 0.000315753173828125,
      "step": 51733,
      "training_step_time": 0.39422035217285156
    },
    {
      "epoch": 0.00031575927734375,
      "model_forward_time": 0.11482119560241699,
      "step": 51734
    },
    {
      "epoch": 0.00031575927734375,
      "step": 51734,
      "training_step_time": 0.4436378479003906
    },
    {
      "epoch": 0.000315765380859375,
      "model_forward_time": 0.11464047431945801,
      "step": 51735
    },
    {
      "epoch": 0.000315765380859375,
      "step": 51735,
      "training_step_time": 0.6840813159942627
    },
    {
      "epoch": 0.000315771484375,
      "model_forward_time": 0.11487293243408203,
      "step": 51736
    },
    {
      "epoch": 0.000315771484375,
      "step": 51736,
      "training_step_time": 0.3946259021759033
    },
    {
      "epoch": 0.000315777587890625,
      "model_forward_time": 0.11478781700134277,
      "step": 51737
    },
    {
      "epoch": 0.000315777587890625,
      "step": 51737,
      "training_step_time": 0.38771486282348633
    },
    {
      "epoch": 0.00031578369140625,
      "model_forward_time": 0.11472010612487793,
      "step": 51738
    },
    {
      "epoch": 0.00031578369140625,
      "step": 51738,
      "training_step_time": 0.39257049560546875
    },
    {
      "epoch": 0.000315789794921875,
      "model_forward_time": 0.11501789093017578,
      "step": 51739
    },
    {
      "epoch": 0.000315789794921875,
      "step": 51739,
      "training_step_time": 0.39084386825561523
    },
    {
      "epoch": 0.0003157958984375,
      "grad_norm": 0.13759711384773254,
      "learning_rate": 5.092561032536225e-06,
      "loss": 0.0418,
      "step": 51740
    },
    {
      "epoch": 0.0003157958984375,
      "model_forward_time": 0.1149744987487793,
      "step": 51740
    },
    {
      "epoch": 0.0003157958984375,
      "step": 51740,
      "training_step_time": 0.446624755859375
    },
    {
      "epoch": 0.000315802001953125,
      "model_forward_time": 0.11499714851379395,
      "step": 51741
    },
    {
      "epoch": 0.000315802001953125,
      "step": 51741,
      "training_step_time": 0.6641204357147217
    },
    {
      "epoch": 0.00031580810546875,
      "model_forward_time": 0.11454510688781738,
      "step": 51742
    },
    {
      "epoch": 0.00031580810546875,
      "step": 51742,
      "training_step_time": 0.44405508041381836
    },
    {
      "epoch": 0.000315814208984375,
      "model_forward_time": 0.11474823951721191,
      "step": 51743
    },
    {
      "epoch": 0.000315814208984375,
      "step": 51743,
      "training_step_time": 0.48487257957458496
    },
    {
      "epoch": 0.0003158203125,
      "model_forward_time": 0.11485552787780762,
      "step": 51744
    },
    {
      "epoch": 0.0003158203125,
      "step": 51744,
      "training_step_time": 0.39067578315734863
    },
    {
      "epoch": 0.000315826416015625,
      "model_forward_time": 0.11410784721374512,
      "step": 51745
    },
    {
      "epoch": 0.000315826416015625,
      "step": 51745,
      "training_step_time": 0.3919868469238281
    },
    {
      "epoch": 0.00031583251953125,
      "model_forward_time": 0.11537313461303711,
      "step": 51746
    },
    {
      "epoch": 0.00031583251953125,
      "step": 51746,
      "training_step_time": 0.3862583637237549
    },
    {
      "epoch": 0.000315838623046875,
      "model_forward_time": 0.11506843566894531,
      "step": 51747
    },
    {
      "epoch": 0.000315838623046875,
      "step": 51747,
      "training_step_time": 0.4086606502532959
    },
    {
      "epoch": 0.0003158447265625,
      "model_forward_time": 0.11484742164611816,
      "step": 51748
    },
    {
      "epoch": 0.0003158447265625,
      "step": 51748,
      "training_step_time": 0.41975927352905273
    },
    {
      "epoch": 0.000315850830078125,
      "model_forward_time": 0.1151888370513916,
      "step": 51749
    },
    {
      "epoch": 0.000315850830078125,
      "step": 51749,
      "training_step_time": 0.40616750717163086
    },
    {
      "epoch": 0.00031585693359375,
      "grad_norm": 0.1074955016374588,
      "learning_rate": 5.080450905401057e-06,
      "loss": 0.0345,
      "step": 51750
    },
    {
      "epoch": 0.00031585693359375,
      "model_forward_time": 0.11552548408508301,
      "step": 51750
    },
    {
      "epoch": 0.00031585693359375,
      "step": 51750,
      "training_step_time": 0.5001411437988281
    },
    {
      "epoch": 0.000315863037109375,
      "model_forward_time": 0.11542272567749023,
      "step": 51751
    },
    {
      "epoch": 0.000315863037109375,
      "step": 51751,
      "training_step_time": 0.38553929328918457
    },
    {
      "epoch": 0.000315869140625,
      "model_forward_time": 0.11556005477905273,
      "step": 51752
    },
    {
      "epoch": 0.000315869140625,
      "step": 51752,
      "training_step_time": 0.39416027069091797
    },
    {
      "epoch": 0.000315875244140625,
      "model_forward_time": 0.11440658569335938,
      "step": 51753
    },
    {
      "epoch": 0.000315875244140625,
      "step": 51753,
      "training_step_time": 0.3935391902923584
    },
    {
      "epoch": 0.00031588134765625,
      "model_forward_time": 0.11565399169921875,
      "step": 51754
    },
    {
      "epoch": 0.00031588134765625,
      "step": 51754,
      "training_step_time": 0.41369056701660156
    },
    {
      "epoch": 0.000315887451171875,
      "model_forward_time": 0.1154937744140625,
      "step": 51755
    },
    {
      "epoch": 0.000315887451171875,
      "step": 51755,
      "training_step_time": 0.41417908668518066
    },
    {
      "epoch": 0.0003158935546875,
      "model_forward_time": 0.11595964431762695,
      "step": 51756
    },
    {
      "epoch": 0.0003158935546875,
      "step": 51756,
      "training_step_time": 0.49344515800476074
    },
    {
      "epoch": 0.000315899658203125,
      "model_forward_time": 0.11490082740783691,
      "step": 51757
    },
    {
      "epoch": 0.000315899658203125,
      "step": 51757,
      "training_step_time": 0.46803832054138184
    },
    {
      "epoch": 0.00031590576171875,
      "model_forward_time": 0.11520075798034668,
      "step": 51758
    },
    {
      "epoch": 0.00031590576171875,
      "step": 51758,
      "training_step_time": 0.4035923480987549
    },
    {
      "epoch": 0.000315911865234375,
      "model_forward_time": 0.11539530754089355,
      "step": 51759
    },
    {
      "epoch": 0.000315911865234375,
      "step": 51759,
      "training_step_time": 0.39757847785949707
    },
    {
      "epoch": 0.00031591796875,
      "grad_norm": 0.08837010711431503,
      "learning_rate": 5.068354423638882e-06,
      "loss": 0.0383,
      "step": 51760
    },
    {
      "epoch": 0.00031591796875,
      "model_forward_time": 0.11536955833435059,
      "step": 51760
    },
    {
      "epoch": 0.00031591796875,
      "step": 51760,
      "training_step_time": 0.3870124816894531
    },
    {
      "epoch": 0.000315924072265625,
      "model_forward_time": 0.11511826515197754,
      "step": 51761
    },
    {
      "epoch": 0.000315924072265625,
      "step": 51761,
      "training_step_time": 0.39825940132141113
    },
    {
      "epoch": 0.00031593017578125,
      "model_forward_time": 0.11487221717834473,
      "step": 51762
    },
    {
      "epoch": 0.00031593017578125,
      "step": 51762,
      "training_step_time": 0.38588476181030273
    },
    {
      "epoch": 0.000315936279296875,
      "model_forward_time": 0.11510205268859863,
      "step": 51763
    },
    {
      "epoch": 0.000315936279296875,
      "step": 51763,
      "training_step_time": 0.44720935821533203
    },
    {
      "epoch": 0.0003159423828125,
      "model_forward_time": 0.11520147323608398,
      "step": 51764
    },
    {
      "epoch": 0.0003159423828125,
      "step": 51764,
      "training_step_time": 0.42034029960632324
    },
    {
      "epoch": 0.000315948486328125,
      "model_forward_time": 0.11530065536499023,
      "step": 51765
    },
    {
      "epoch": 0.000315948486328125,
      "step": 51765,
      "training_step_time": 0.3982248306274414
    },
    {
      "epoch": 0.00031595458984375,
      "model_forward_time": 0.11466407775878906,
      "step": 51766
    },
    {
      "epoch": 0.00031595458984375,
      "step": 51766,
      "training_step_time": 0.4017763137817383
    },
    {
      "epoch": 0.000315960693359375,
      "model_forward_time": 0.11538934707641602,
      "step": 51767
    },
    {
      "epoch": 0.000315960693359375,
      "step": 51767,
      "training_step_time": 0.4088413715362549
    },
    {
      "epoch": 0.000315966796875,
      "model_forward_time": 0.11443376541137695,
      "step": 51768
    },
    {
      "epoch": 0.000315966796875,
      "step": 51768,
      "training_step_time": 0.4428255558013916
    },
    {
      "epoch": 0.000315972900390625,
      "model_forward_time": 0.11467933654785156,
      "step": 51769
    },
    {
      "epoch": 0.000315972900390625,
      "step": 51769,
      "training_step_time": 0.43564486503601074
    },
    {
      "epoch": 0.00031597900390625,
      "grad_norm": 0.09635544568300247,
      "learning_rate": 5.056271590924283e-06,
      "loss": 0.0376,
      "step": 51770
    },
    {
      "epoch": 0.00031597900390625,
      "model_forward_time": 0.11532258987426758,
      "step": 51770
    },
    {
      "epoch": 0.00031597900390625,
      "step": 51770,
      "training_step_time": 0.3995678424835205
    },
    {
      "epoch": 0.000315985107421875,
      "model_forward_time": 0.1153862476348877,
      "step": 51771
    },
    {
      "epoch": 0.000315985107421875,
      "step": 51771,
      "training_step_time": 0.4718475341796875
    },
    {
      "epoch": 0.0003159912109375,
      "model_forward_time": 0.11543822288513184,
      "step": 51772
    },
    {
      "epoch": 0.0003159912109375,
      "step": 51772,
      "training_step_time": 0.4275994300842285
    },
    {
      "epoch": 0.000315997314453125,
      "model_forward_time": 0.11565661430358887,
      "step": 51773
    },
    {
      "epoch": 0.000315997314453125,
      "step": 51773,
      "training_step_time": 0.42226171493530273
    },
    {
      "epoch": 0.00031600341796875,
      "model_forward_time": 0.11573362350463867,
      "step": 51774
    },
    {
      "epoch": 0.00031600341796875,
      "step": 51774,
      "training_step_time": 0.4253554344177246
    },
    {
      "epoch": 0.000316009521484375,
      "model_forward_time": 0.11511445045471191,
      "step": 51775
    },
    {
      "epoch": 0.000316009521484375,
      "step": 51775,
      "training_step_time": 0.4012746810913086
    },
    {
      "epoch": 0.000316015625,
      "model_forward_time": 0.11570310592651367,
      "step": 51776
    },
    {
      "epoch": 0.000316015625,
      "step": 51776,
      "training_step_time": 0.39033007621765137
    },
    {
      "epoch": 0.000316021728515625,
      "model_forward_time": 0.11531591415405273,
      "step": 51777
    },
    {
      "epoch": 0.000316021728515625,
      "step": 51777,
      "training_step_time": 0.3898124694824219
    },
    {
      "epoch": 0.00031602783203125,
      "model_forward_time": 0.11545538902282715,
      "step": 51778
    },
    {
      "epoch": 0.00031602783203125,
      "step": 51778,
      "training_step_time": 0.3992123603820801
    },
    {
      "epoch": 0.000316033935546875,
      "model_forward_time": 0.11514735221862793,
      "step": 51779
    },
    {
      "epoch": 0.000316033935546875,
      "step": 51779,
      "training_step_time": 0.4176015853881836
    },
    {
      "epoch": 0.0003160400390625,
      "grad_norm": 0.10681463032960892,
      "learning_rate": 5.044202410927706e-06,
      "loss": 0.0407,
      "step": 51780
    },
    {
      "epoch": 0.0003160400390625,
      "model_forward_time": 0.11512899398803711,
      "step": 51780
    },
    {
      "epoch": 0.0003160400390625,
      "step": 51780,
      "training_step_time": 0.46634864807128906
    },
    {
      "epoch": 0.000316046142578125,
      "model_forward_time": 0.1156611442565918,
      "step": 51781
    },
    {
      "epoch": 0.000316046142578125,
      "step": 51781,
      "training_step_time": 0.44214510917663574
    },
    {
      "epoch": 0.00031605224609375,
      "model_forward_time": 0.11555981636047363,
      "step": 51782
    },
    {
      "epoch": 0.00031605224609375,
      "step": 51782,
      "training_step_time": 0.4936788082122803
    },
    {
      "epoch": 0.000316058349609375,
      "model_forward_time": 0.11500692367553711,
      "step": 51783
    },
    {
      "epoch": 0.000316058349609375,
      "step": 51783,
      "training_step_time": 0.43372583389282227
    },
    {
      "epoch": 0.000316064453125,
      "model_forward_time": 0.11440515518188477,
      "step": 51784
    },
    {
      "epoch": 0.000316064453125,
      "step": 51784,
      "training_step_time": 0.4342942237854004
    },
    {
      "epoch": 0.000316070556640625,
      "model_forward_time": 0.11468148231506348,
      "step": 51785
    },
    {
      "epoch": 0.000316070556640625,
      "step": 51785,
      "training_step_time": 0.3679652214050293
    },
    {
      "epoch": 0.00031607666015625,
      "model_forward_time": 0.11467099189758301,
      "step": 51786
    },
    {
      "epoch": 0.00031607666015625,
      "step": 51786,
      "training_step_time": 0.44295811653137207
    },
    {
      "epoch": 0.000316082763671875,
      "model_forward_time": 0.11522340774536133,
      "step": 51787
    },
    {
      "epoch": 0.000316082763671875,
      "step": 51787,
      "training_step_time": 0.427600622177124
    },
    {
      "epoch": 0.0003160888671875,
      "model_forward_time": 0.1149289608001709,
      "step": 51788
    },
    {
      "epoch": 0.0003160888671875,
      "step": 51788,
      "training_step_time": 0.39013075828552246
    },
    {
      "epoch": 0.000316094970703125,
      "model_forward_time": 0.11510252952575684,
      "step": 51789
    },
    {
      "epoch": 0.000316094970703125,
      "step": 51789,
      "training_step_time": 0.4055929183959961
    },
    {
      "epoch": 0.00031610107421875,
      "grad_norm": 0.08718124777078629,
      "learning_rate": 5.032146887315448e-06,
      "loss": 0.0376,
      "step": 51790
    },
    {
      "epoch": 0.00031610107421875,
      "model_forward_time": 0.11485505104064941,
      "step": 51790
    },
    {
      "epoch": 0.00031610107421875,
      "step": 51790,
      "training_step_time": 0.39460086822509766
    },
    {
      "epoch": 0.000316107177734375,
      "model_forward_time": 0.11530900001525879,
      "step": 51791
    },
    {
      "epoch": 0.000316107177734375,
      "step": 51791,
      "training_step_time": 0.3953840732574463
    },
    {
      "epoch": 0.00031611328125,
      "model_forward_time": 0.1149132251739502,
      "step": 51792
    },
    {
      "epoch": 0.00031611328125,
      "step": 51792,
      "training_step_time": 0.44049072265625
    },
    {
      "epoch": 0.000316119384765625,
      "model_forward_time": 0.1153419017791748,
      "step": 51793
    },
    {
      "epoch": 0.000316119384765625,
      "step": 51793,
      "training_step_time": 0.4188559055328369
    },
    {
      "epoch": 0.00031612548828125,
      "model_forward_time": 0.11530423164367676,
      "step": 51794
    },
    {
      "epoch": 0.00031612548828125,
      "step": 51794,
      "training_step_time": 0.43052124977111816
    },
    {
      "epoch": 0.000316131591796875,
      "model_forward_time": 0.11521053314208984,
      "step": 51795
    },
    {
      "epoch": 0.000316131591796875,
      "step": 51795,
      "training_step_time": 0.5357513427734375
    },
    {
      "epoch": 0.0003161376953125,
      "model_forward_time": 0.11533927917480469,
      "step": 51796
    },
    {
      "epoch": 0.0003161376953125,
      "step": 51796,
      "training_step_time": 0.45003819465637207
    },
    {
      "epoch": 0.000316143798828125,
      "model_forward_time": 0.11540865898132324,
      "step": 51797
    },
    {
      "epoch": 0.000316143798828125,
      "step": 51797,
      "training_step_time": 0.42091917991638184
    },
    {
      "epoch": 0.00031614990234375,
      "model_forward_time": 0.11516118049621582,
      "step": 51798
    },
    {
      "epoch": 0.00031614990234375,
      "step": 51798,
      "training_step_time": 0.4224379062652588
    },
    {
      "epoch": 0.000316156005859375,
      "model_forward_time": 0.11691546440124512,
      "step": 51799
    },
    {
      "epoch": 0.000316156005859375,
      "step": 51799,
      "training_step_time": 0.48630523681640625
    },
    {
      "epoch": 0.000316162109375,
      "grad_norm": 0.13292579352855682,
      "learning_rate": 5.020105023749644e-06,
      "loss": 0.0396,
      "step": 51800
    },
    {
      "epoch": 0.000316162109375,
      "model_forward_time": 0.11850452423095703,
      "step": 51800
    },
    {
      "epoch": 0.000316162109375,
      "step": 51800,
      "training_step_time": 0.3793637752532959
    },
    {
      "epoch": 0.000316168212890625,
      "model_forward_time": 0.11807894706726074,
      "step": 51801
    },
    {
      "epoch": 0.000316168212890625,
      "step": 51801,
      "training_step_time": 0.469743013381958
    },
    {
      "epoch": 0.00031617431640625,
      "model_forward_time": 0.11788034439086914,
      "step": 51802
    },
    {
      "epoch": 0.00031617431640625,
      "step": 51802,
      "training_step_time": 0.380584716796875
    },
    {
      "epoch": 0.000316180419921875,
      "model_forward_time": 0.11780023574829102,
      "step": 51803
    },
    {
      "epoch": 0.000316180419921875,
      "step": 51803,
      "training_step_time": 0.3869051933288574
    },
    {
      "epoch": 0.0003161865234375,
      "model_forward_time": 0.11515092849731445,
      "step": 51804
    },
    {
      "epoch": 0.0003161865234375,
      "step": 51804,
      "training_step_time": 0.3986351490020752
    },
    {
      "epoch": 0.000316192626953125,
      "model_forward_time": 0.11452960968017578,
      "step": 51805
    },
    {
      "epoch": 0.000316192626953125,
      "step": 51805,
      "training_step_time": 0.39077234268188477
    },
    {
      "epoch": 0.00031619873046875,
      "model_forward_time": 0.1148982048034668,
      "step": 51806
    },
    {
      "epoch": 0.00031619873046875,
      "step": 51806,
      "training_step_time": 0.3971104621887207
    },
    {
      "epoch": 0.000316204833984375,
      "model_forward_time": 0.1148684024810791,
      "step": 51807
    },
    {
      "epoch": 0.000316204833984375,
      "step": 51807,
      "training_step_time": 0.7260844707489014
    },
    {
      "epoch": 0.0003162109375,
      "model_forward_time": 0.11452364921569824,
      "step": 51808
    },
    {
      "epoch": 0.0003162109375,
      "step": 51808,
      "training_step_time": 0.4213526248931885
    },
    {
      "epoch": 0.000316217041015625,
      "model_forward_time": 0.1148841381072998,
      "step": 51809
    },
    {
      "epoch": 0.000316217041015625,
      "step": 51809,
      "training_step_time": 0.401775598526001
    },
    {
      "epoch": 0.00031622314453125,
      "grad_norm": 0.0999363586306572,
      "learning_rate": 5.008076823888319e-06,
      "loss": 0.035,
      "step": 51810
    },
    {
      "epoch": 0.00031622314453125,
      "model_forward_time": 0.11408114433288574,
      "step": 51810
    },
    {
      "epoch": 0.00031622314453125,
      "step": 51810,
      "training_step_time": 0.4119424819946289
    },
    {
      "epoch": 0.000316229248046875,
      "model_forward_time": 0.11508297920227051,
      "step": 51811
    },
    {
      "epoch": 0.000316229248046875,
      "step": 51811,
      "training_step_time": 0.4316132068634033
    },
    {
      "epoch": 0.0003162353515625,
      "model_forward_time": 0.11424756050109863,
      "step": 51812
    },
    {
      "epoch": 0.0003162353515625,
      "step": 51812,
      "training_step_time": 0.4357926845550537
    },
    {
      "epoch": 0.000316241455078125,
      "model_forward_time": 0.11431264877319336,
      "step": 51813
    },
    {
      "epoch": 0.000316241455078125,
      "step": 51813,
      "training_step_time": 0.4249589443206787
    },
    {
      "epoch": 0.00031624755859375,
      "model_forward_time": 0.11475491523742676,
      "step": 51814
    },
    {
      "epoch": 0.00031624755859375,
      "step": 51814,
      "training_step_time": 0.3943789005279541
    },
    {
      "epoch": 0.000316253662109375,
      "model_forward_time": 0.11502528190612793,
      "step": 51815
    },
    {
      "epoch": 0.000316253662109375,
      "step": 51815,
      "training_step_time": 0.44045257568359375
    },
    {
      "epoch": 0.000316259765625,
      "model_forward_time": 0.11517524719238281,
      "step": 51816
    },
    {
      "epoch": 0.000316259765625,
      "step": 51816,
      "training_step_time": 0.4023730754852295
    },
    {
      "epoch": 0.000316265869140625,
      "model_forward_time": 0.11742758750915527,
      "step": 51817
    },
    {
      "epoch": 0.000316265869140625,
      "step": 51817,
      "training_step_time": 0.3794703483581543
    },
    {
      "epoch": 0.00031627197265625,
      "model_forward_time": 0.11957287788391113,
      "step": 51818
    },
    {
      "epoch": 0.00031627197265625,
      "step": 51818,
      "training_step_time": 0.3858988285064697
    },
    {
      "epoch": 0.000316278076171875,
      "model_forward_time": 0.11769890785217285,
      "step": 51819
    },
    {
      "epoch": 0.000316278076171875,
      "step": 51819,
      "training_step_time": 0.5448110103607178
    },
    {
      "epoch": 0.0003162841796875,
      "grad_norm": 0.09143856167793274,
      "learning_rate": 4.996062291385317e-06,
      "loss": 0.037,
      "step": 51820
    },
    {
      "epoch": 0.0003162841796875,
      "model_forward_time": 0.11585187911987305,
      "step": 51820
    },
    {
      "epoch": 0.0003162841796875,
      "step": 51820,
      "training_step_time": 0.45720911026000977
    },
    {
      "epoch": 0.000316290283203125,
      "model_forward_time": 0.11510682106018066,
      "step": 51821
    },
    {
      "epoch": 0.000316290283203125,
      "step": 51821,
      "training_step_time": 0.423980712890625
    },
    {
      "epoch": 0.00031629638671875,
      "model_forward_time": 0.11515617370605469,
      "step": 51822
    },
    {
      "epoch": 0.00031629638671875,
      "step": 51822,
      "training_step_time": 0.4322342872619629
    },
    {
      "epoch": 0.000316302490234375,
      "model_forward_time": 0.11519074440002441,
      "step": 51823
    },
    {
      "epoch": 0.000316302490234375,
      "step": 51823,
      "training_step_time": 0.3944282531738281
    },
    {
      "epoch": 0.00031630859375,
      "model_forward_time": 0.11456656455993652,
      "step": 51824
    },
    {
      "epoch": 0.00031630859375,
      "step": 51824,
      "training_step_time": 0.3927292823791504
    },
    {
      "epoch": 0.000316314697265625,
      "model_forward_time": 0.11585640907287598,
      "step": 51825
    },
    {
      "epoch": 0.000316314697265625,
      "step": 51825,
      "training_step_time": 0.5096595287322998
    },
    {
      "epoch": 0.00031632080078125,
      "model_forward_time": 0.11551880836486816,
      "step": 51826
    },
    {
      "epoch": 0.00031632080078125,
      "step": 51826,
      "training_step_time": 0.5177464485168457
    },
    {
      "epoch": 0.000316326904296875,
      "model_forward_time": 0.11527657508850098,
      "step": 51827
    },
    {
      "epoch": 0.000316326904296875,
      "step": 51827,
      "training_step_time": 0.3768012523651123
    },
    {
      "epoch": 0.0003163330078125,
      "model_forward_time": 0.11513137817382812,
      "step": 51828
    },
    {
      "epoch": 0.0003163330078125,
      "step": 51828,
      "training_step_time": 0.43683385848999023
    },
    {
      "epoch": 0.000316339111328125,
      "model_forward_time": 0.11552834510803223,
      "step": 51829
    },
    {
      "epoch": 0.000316339111328125,
      "step": 51829,
      "training_step_time": 0.4030487537384033
    },
    {
      "epoch": 0.00031634521484375,
      "grad_norm": 0.0854043960571289,
      "learning_rate": 4.984061429890324e-06,
      "loss": 0.0291,
      "step": 51830
    },
    {
      "epoch": 0.00031634521484375,
      "model_forward_time": 0.11458444595336914,
      "step": 51830
    },
    {
      "epoch": 0.00031634521484375,
      "step": 51830,
      "training_step_time": 0.40068697929382324
    },
    {
      "epoch": 0.000316351318359375,
      "model_forward_time": 0.11517977714538574,
      "step": 51831
    },
    {
      "epoch": 0.000316351318359375,
      "step": 51831,
      "training_step_time": 0.4167673587799072
    },
    {
      "epoch": 0.000316357421875,
      "model_forward_time": 0.11492204666137695,
      "step": 51832
    },
    {
      "epoch": 0.000316357421875,
      "step": 51832,
      "training_step_time": 0.4006783962249756
    },
    {
      "epoch": 0.000316363525390625,
      "model_forward_time": 0.11523890495300293,
      "step": 51833
    },
    {
      "epoch": 0.000316363525390625,
      "step": 51833,
      "training_step_time": 0.3943166732788086
    },
    {
      "epoch": 0.00031636962890625,
      "model_forward_time": 0.11474442481994629,
      "step": 51834
    },
    {
      "epoch": 0.00031636962890625,
      "step": 51834,
      "training_step_time": 0.4237208366394043
    },
    {
      "epoch": 0.000316375732421875,
      "model_forward_time": 0.11474299430847168,
      "step": 51835
    },
    {
      "epoch": 0.000316375732421875,
      "step": 51835,
      "training_step_time": 0.44363880157470703
    },
    {
      "epoch": 0.0003163818359375,
      "model_forward_time": 0.11574888229370117,
      "step": 51836
    },
    {
      "epoch": 0.0003163818359375,
      "step": 51836,
      "training_step_time": 0.4458274841308594
    },
    {
      "epoch": 0.000316387939453125,
      "model_forward_time": 0.11513352394104004,
      "step": 51837
    },
    {
      "epoch": 0.000316387939453125,
      "step": 51837,
      "training_step_time": 0.5825700759887695
    },
    {
      "epoch": 0.00031639404296875,
      "model_forward_time": 0.11499166488647461,
      "step": 51838
    },
    {
      "epoch": 0.00031639404296875,
      "step": 51838,
      "training_step_time": 0.39202880859375
    },
    {
      "epoch": 0.000316400146484375,
      "model_forward_time": 0.11448812484741211,
      "step": 51839
    },
    {
      "epoch": 0.000316400146484375,
      "step": 51839,
      "training_step_time": 0.39082980155944824
    },
    {
      "epoch": 0.00031640625,
      "grad_norm": 0.12307092547416687,
      "learning_rate": 4.972074243048897e-06,
      "loss": 0.0375,
      "step": 51840
    },
    {
      "epoch": 0.00031640625,
      "model_forward_time": 0.1145021915435791,
      "step": 51840
    },
    {
      "epoch": 0.00031640625,
      "step": 51840,
      "training_step_time": 0.5144574642181396
    },
    {
      "epoch": 0.000316412353515625,
      "model_forward_time": 0.1143960952758789,
      "step": 51841
    },
    {
      "epoch": 0.000316412353515625,
      "step": 51841,
      "training_step_time": 0.38143491744995117
    },
    {
      "epoch": 0.00031641845703125,
      "model_forward_time": 0.1142735481262207,
      "step": 51842
    },
    {
      "epoch": 0.00031641845703125,
      "step": 51842,
      "training_step_time": 0.42575716972351074
    },
    {
      "epoch": 0.000316424560546875,
      "model_forward_time": 0.11476540565490723,
      "step": 51843
    },
    {
      "epoch": 0.000316424560546875,
      "step": 51843,
      "training_step_time": 0.4215061664581299
    },
    {
      "epoch": 0.0003164306640625,
      "model_forward_time": 0.11452078819274902,
      "step": 51844
    },
    {
      "epoch": 0.0003164306640625,
      "step": 51844,
      "training_step_time": 0.3845069408416748
    },
    {
      "epoch": 0.000316436767578125,
      "model_forward_time": 0.11502861976623535,
      "step": 51845
    },
    {
      "epoch": 0.000316436767578125,
      "step": 51845,
      "training_step_time": 0.38982152938842773
    },
    {
      "epoch": 0.00031644287109375,
      "model_forward_time": 0.11488795280456543,
      "step": 51846
    },
    {
      "epoch": 0.00031644287109375,
      "step": 51846,
      "training_step_time": 0.3972046375274658
    },
    {
      "epoch": 0.000316448974609375,
      "model_forward_time": 0.11563897132873535,
      "step": 51847
    },
    {
      "epoch": 0.000316448974609375,
      "step": 51847,
      "training_step_time": 0.41327714920043945
    },
    {
      "epoch": 0.000316455078125,
      "model_forward_time": 0.11596918106079102,
      "step": 51848
    },
    {
      "epoch": 0.000316455078125,
      "step": 51848,
      "training_step_time": 0.3892483711242676
    },
    {
      "epoch": 0.000316461181640625,
      "model_forward_time": 0.11510729789733887,
      "step": 51849
    },
    {
      "epoch": 0.000316461181640625,
      "step": 51849,
      "training_step_time": 0.7245533466339111
    },
    {
      "epoch": 0.00031646728515625,
      "grad_norm": 0.08054995536804199,
      "learning_rate": 4.960100734502421e-06,
      "loss": 0.0394,
      "step": 51850
    },
    {
      "epoch": 0.00031646728515625,
      "model_forward_time": 0.1148531436920166,
      "step": 51850
    },
    {
      "epoch": 0.00031646728515625,
      "step": 51850,
      "training_step_time": 0.48467111587524414
    },
    {
      "epoch": 0.000316473388671875,
      "model_forward_time": 0.11417627334594727,
      "step": 51851
    },
    {
      "epoch": 0.000316473388671875,
      "step": 51851,
      "training_step_time": 0.3780806064605713
    },
    {
      "epoch": 0.0003164794921875,
      "model_forward_time": 0.11441397666931152,
      "step": 51852
    },
    {
      "epoch": 0.0003164794921875,
      "step": 51852,
      "training_step_time": 0.47162365913391113
    },
    {
      "epoch": 0.000316485595703125,
      "model_forward_time": 0.11504721641540527,
      "step": 51853
    },
    {
      "epoch": 0.000316485595703125,
      "step": 51853,
      "training_step_time": 0.401064395904541
    },
    {
      "epoch": 0.00031649169921875,
      "model_forward_time": 0.11396908760070801,
      "step": 51854
    },
    {
      "epoch": 0.00031649169921875,
      "step": 51854,
      "training_step_time": 0.4675936698913574
    },
    {
      "epoch": 0.000316497802734375,
      "model_forward_time": 0.11497783660888672,
      "step": 51855
    },
    {
      "epoch": 0.000316497802734375,
      "step": 51855,
      "training_step_time": 0.42153096199035645
    },
    {
      "epoch": 0.00031650390625,
      "model_forward_time": 0.11501693725585938,
      "step": 51856
    },
    {
      "epoch": 0.00031650390625,
      "step": 51856,
      "training_step_time": 0.49306321144104004
    },
    {
      "epoch": 0.000316510009765625,
      "model_forward_time": 0.11579251289367676,
      "step": 51857
    },
    {
      "epoch": 0.000316510009765625,
      "step": 51857,
      "training_step_time": 0.43247199058532715
    },
    {
      "epoch": 0.00031651611328125,
      "model_forward_time": 0.11490225791931152,
      "step": 51858
    },
    {
      "epoch": 0.00031651611328125,
      "step": 51858,
      "training_step_time": 0.389941930770874
    },
    {
      "epoch": 0.000316522216796875,
      "model_forward_time": 0.11514425277709961,
      "step": 51859
    },
    {
      "epoch": 0.000316522216796875,
      "step": 51859,
      "training_step_time": 0.39592528343200684
    },
    {
      "epoch": 0.0003165283203125,
      "grad_norm": 0.10832155495882034,
      "learning_rate": 4.948140907888121e-06,
      "loss": 0.0352,
      "step": 51860
    },
    {
      "epoch": 0.0003165283203125,
      "model_forward_time": 0.11525774002075195,
      "step": 51860
    },
    {
      "epoch": 0.0003165283203125,
      "step": 51860,
      "training_step_time": 0.3868587017059326
    },
    {
      "epoch": 0.000316534423828125,
      "model_forward_time": 0.11540436744689941,
      "step": 51861
    },
    {
      "epoch": 0.000316534423828125,
      "step": 51861,
      "training_step_time": 0.4632604122161865
    },
    {
      "epoch": 0.00031654052734375,
      "model_forward_time": 0.11481738090515137,
      "step": 51862
    },
    {
      "epoch": 0.00031654052734375,
      "step": 51862,
      "training_step_time": 0.4275045394897461
    },
    {
      "epoch": 0.000316546630859375,
      "model_forward_time": 0.11487817764282227,
      "step": 51863
    },
    {
      "epoch": 0.000316546630859375,
      "step": 51863,
      "training_step_time": 0.40682339668273926
    },
    {
      "epoch": 0.000316552734375,
      "model_forward_time": 0.11528539657592773,
      "step": 51864
    },
    {
      "epoch": 0.000316552734375,
      "step": 51864,
      "training_step_time": 0.5202507972717285
    },
    {
      "epoch": 0.000316558837890625,
      "model_forward_time": 0.11583590507507324,
      "step": 51865
    },
    {
      "epoch": 0.000316558837890625,
      "step": 51865,
      "training_step_time": 0.39661550521850586
    },
    {
      "epoch": 0.00031656494140625,
      "model_forward_time": 0.11544299125671387,
      "step": 51866
    },
    {
      "epoch": 0.00031656494140625,
      "step": 51866,
      "training_step_time": 0.38187241554260254
    },
    {
      "epoch": 0.000316571044921875,
      "model_forward_time": 0.11551809310913086,
      "step": 51867
    },
    {
      "epoch": 0.000316571044921875,
      "step": 51867,
      "training_step_time": 0.5208125114440918
    },
    {
      "epoch": 0.0003165771484375,
      "model_forward_time": 0.11484646797180176,
      "step": 51868
    },
    {
      "epoch": 0.0003165771484375,
      "step": 51868,
      "training_step_time": 0.38365650177001953
    },
    {
      "epoch": 0.000316583251953125,
      "model_forward_time": 0.11551237106323242,
      "step": 51869
    },
    {
      "epoch": 0.000316583251953125,
      "step": 51869,
      "training_step_time": 0.36612558364868164
    },
    {
      "epoch": 0.00031658935546875,
      "grad_norm": 0.07182800024747849,
      "learning_rate": 4.936194766839103e-06,
      "loss": 0.038,
      "step": 51870
    },
    {
      "epoch": 0.00031658935546875,
      "model_forward_time": 0.11598610877990723,
      "step": 51870
    },
    {
      "epoch": 0.00031658935546875,
      "step": 51870,
      "training_step_time": 0.4386115074157715
    },
    {
      "epoch": 0.000316595458984375,
      "model_forward_time": 0.11574769020080566,
      "step": 51871
    },
    {
      "epoch": 0.000316595458984375,
      "step": 51871,
      "training_step_time": 0.408642053604126
    },
    {
      "epoch": 0.0003166015625,
      "model_forward_time": 0.1149604320526123,
      "step": 51872
    },
    {
      "epoch": 0.0003166015625,
      "step": 51872,
      "training_step_time": 0.3954925537109375
    },
    {
      "epoch": 0.000316607666015625,
      "model_forward_time": 0.11487507820129395,
      "step": 51873
    },
    {
      "epoch": 0.000316607666015625,
      "step": 51873,
      "training_step_time": 0.5505344867706299
    },
    {
      "epoch": 0.00031661376953125,
      "model_forward_time": 0.11458992958068848,
      "step": 51874
    },
    {
      "epoch": 0.00031661376953125,
      "step": 51874,
      "training_step_time": 0.3979451656341553
    },
    {
      "epoch": 0.000316619873046875,
      "model_forward_time": 0.11547303199768066,
      "step": 51875
    },
    {
      "epoch": 0.000316619873046875,
      "step": 51875,
      "training_step_time": 0.4536621570587158
    },
    {
      "epoch": 0.0003166259765625,
      "model_forward_time": 0.1151280403137207,
      "step": 51876
    },
    {
      "epoch": 0.0003166259765625,
      "step": 51876,
      "training_step_time": 0.41614341735839844
    },
    {
      "epoch": 0.000316632080078125,
      "model_forward_time": 0.11506938934326172,
      "step": 51877
    },
    {
      "epoch": 0.000316632080078125,
      "step": 51877,
      "training_step_time": 0.40729546546936035
    },
    {
      "epoch": 0.00031663818359375,
      "model_forward_time": 0.11528277397155762,
      "step": 51878
    },
    {
      "epoch": 0.00031663818359375,
      "step": 51878,
      "training_step_time": 0.41318559646606445
    },
    {
      "epoch": 0.000316644287109375,
      "model_forward_time": 0.11501598358154297,
      "step": 51879
    },
    {
      "epoch": 0.000316644287109375,
      "step": 51879,
      "training_step_time": 0.5628488063812256
    },
    {
      "epoch": 0.000316650390625,
      "grad_norm": 0.0902775228023529,
      "learning_rate": 4.924262314984262e-06,
      "loss": 0.0333,
      "step": 51880
    },
    {
      "epoch": 0.000316650390625,
      "model_forward_time": 0.11517047882080078,
      "step": 51880
    },
    {
      "epoch": 0.000316650390625,
      "step": 51880,
      "training_step_time": 0.5082821846008301
    },
    {
      "epoch": 0.000316656494140625,
      "model_forward_time": 0.11455440521240234,
      "step": 51881
    },
    {
      "epoch": 0.000316656494140625,
      "step": 51881,
      "training_step_time": 0.4115715026855469
    },
    {
      "epoch": 0.00031666259765625,
      "model_forward_time": 0.11471152305603027,
      "step": 51882
    },
    {
      "epoch": 0.00031666259765625,
      "step": 51882,
      "training_step_time": 0.4763679504394531
    },
    {
      "epoch": 0.000316668701171875,
      "model_forward_time": 0.11756634712219238,
      "step": 51883
    },
    {
      "epoch": 0.000316668701171875,
      "step": 51883,
      "training_step_time": 0.42301273345947266
    },
    {
      "epoch": 0.0003166748046875,
      "model_forward_time": 0.11693739891052246,
      "step": 51884
    },
    {
      "epoch": 0.0003166748046875,
      "step": 51884,
      "training_step_time": 0.4882042407989502
    },
    {
      "epoch": 0.000316680908203125,
      "model_forward_time": 0.1151418685913086,
      "step": 51885
    },
    {
      "epoch": 0.000316680908203125,
      "step": 51885,
      "training_step_time": 0.407975435256958
    },
    {
      "epoch": 0.00031668701171875,
      "model_forward_time": 0.11554265022277832,
      "step": 51886
    },
    {
      "epoch": 0.00031668701171875,
      "step": 51886,
      "training_step_time": 0.3991568088531494
    },
    {
      "epoch": 0.000316693115234375,
      "model_forward_time": 0.11448287963867188,
      "step": 51887
    },
    {
      "epoch": 0.000316693115234375,
      "step": 51887,
      "training_step_time": 0.39309120178222656
    },
    {
      "epoch": 0.00031669921875,
      "model_forward_time": 0.11473202705383301,
      "step": 51888
    },
    {
      "epoch": 0.00031669921875,
      "step": 51888,
      "training_step_time": 0.40518856048583984
    },
    {
      "epoch": 0.000316705322265625,
      "model_forward_time": 0.11519098281860352,
      "step": 51889
    },
    {
      "epoch": 0.000316705322265625,
      "step": 51889,
      "training_step_time": 0.41100454330444336
    },
    {
      "epoch": 0.00031671142578125,
      "grad_norm": 0.0681491419672966,
      "learning_rate": 4.91234355594839e-06,
      "loss": 0.0354,
      "step": 51890
    },
    {
      "epoch": 0.00031671142578125,
      "model_forward_time": 0.11659646034240723,
      "step": 51890
    },
    {
      "epoch": 0.00031671142578125,
      "step": 51890,
      "training_step_time": 0.4008669853210449
    },
    {
      "epoch": 0.000316717529296875,
      "model_forward_time": 0.11469459533691406,
      "step": 51891
    },
    {
      "epoch": 0.000316717529296875,
      "step": 51891,
      "training_step_time": 0.5272674560546875
    },
    {
      "epoch": 0.0003167236328125,
      "model_forward_time": 0.11508464813232422,
      "step": 51892
    },
    {
      "epoch": 0.0003167236328125,
      "step": 51892,
      "training_step_time": 0.41243410110473633
    },
    {
      "epoch": 0.000316729736328125,
      "model_forward_time": 0.1151885986328125,
      "step": 51893
    },
    {
      "epoch": 0.000316729736328125,
      "step": 51893,
      "training_step_time": 0.39323925971984863
    },
    {
      "epoch": 0.00031673583984375,
      "model_forward_time": 0.11576294898986816,
      "step": 51894
    },
    {
      "epoch": 0.00031673583984375,
      "step": 51894,
      "training_step_time": 0.4233744144439697
    },
    {
      "epoch": 0.000316741943359375,
      "model_forward_time": 0.11463379859924316,
      "step": 51895
    },
    {
      "epoch": 0.000316741943359375,
      "step": 51895,
      "training_step_time": 0.3966507911682129
    },
    {
      "epoch": 0.000316748046875,
      "model_forward_time": 0.11524105072021484,
      "step": 51896
    },
    {
      "epoch": 0.000316748046875,
      "step": 51896,
      "training_step_time": 0.40961647033691406
    },
    {
      "epoch": 0.000316754150390625,
      "model_forward_time": 0.11518192291259766,
      "step": 51897
    },
    {
      "epoch": 0.000316754150390625,
      "step": 51897,
      "training_step_time": 0.6642417907714844
    },
    {
      "epoch": 0.00031676025390625,
      "model_forward_time": 0.11533021926879883,
      "step": 51898
    },
    {
      "epoch": 0.00031676025390625,
      "step": 51898,
      "training_step_time": 0.42200255393981934
    },
    {
      "epoch": 0.000316766357421875,
      "model_forward_time": 0.11483216285705566,
      "step": 51899
    },
    {
      "epoch": 0.000316766357421875,
      "step": 51899,
      "training_step_time": 0.3969547748565674
    },
    {
      "epoch": 0.0003167724609375,
      "grad_norm": 0.10570403188467026,
      "learning_rate": 4.900438493352055e-06,
      "loss": 0.0361,
      "step": 51900
    },
    {
      "epoch": 0.0003167724609375,
      "model_forward_time": 0.11471366882324219,
      "step": 51900
    },
    {
      "epoch": 0.0003167724609375,
      "step": 51900,
      "training_step_time": 0.3905375003814697
    },
    {
      "epoch": 0.000316778564453125,
      "model_forward_time": 0.11520719528198242,
      "step": 51901
    },
    {
      "epoch": 0.000316778564453125,
      "step": 51901,
      "training_step_time": 0.42791056632995605
    },
    {
      "epoch": 0.00031678466796875,
      "model_forward_time": 0.11444902420043945,
      "step": 51902
    },
    {
      "epoch": 0.00031678466796875,
      "step": 51902,
      "training_step_time": 0.4036262035369873
    },
    {
      "epoch": 0.000316790771484375,
      "model_forward_time": 0.11516237258911133,
      "step": 51903
    },
    {
      "epoch": 0.000316790771484375,
      "step": 51903,
      "training_step_time": 0.43538379669189453
    },
    {
      "epoch": 0.000316796875,
      "model_forward_time": 0.11496853828430176,
      "step": 51904
    },
    {
      "epoch": 0.000316796875,
      "step": 51904,
      "training_step_time": 0.4921536445617676
    },
    {
      "epoch": 0.000316802978515625,
      "model_forward_time": 0.11533093452453613,
      "step": 51905
    },
    {
      "epoch": 0.000316802978515625,
      "step": 51905,
      "training_step_time": 0.457319974899292
    },
    {
      "epoch": 0.00031680908203125,
      "model_forward_time": 0.11525630950927734,
      "step": 51906
    },
    {
      "epoch": 0.00031680908203125,
      "step": 51906,
      "training_step_time": 0.4930846691131592
    },
    {
      "epoch": 0.000316815185546875,
      "model_forward_time": 0.11598443984985352,
      "step": 51907
    },
    {
      "epoch": 0.000316815185546875,
      "step": 51907,
      "training_step_time": 0.3884098529815674
    },
    {
      "epoch": 0.0003168212890625,
      "model_forward_time": 0.11452388763427734,
      "step": 51908
    },
    {
      "epoch": 0.0003168212890625,
      "step": 51908,
      "training_step_time": 0.4368910789489746
    },
    {
      "epoch": 0.000316827392578125,
      "model_forward_time": 0.11853408813476562,
      "step": 51909
    },
    {
      "epoch": 0.000316827392578125,
      "step": 51909,
      "training_step_time": 0.39871788024902344
    },
    {
      "epoch": 0.00031683349609375,
      "grad_norm": 0.09031098335981369,
      "learning_rate": 4.888547130811732e-06,
      "loss": 0.0358,
      "step": 51910
    },
    {
      "epoch": 0.00031683349609375,
      "model_forward_time": 0.11482667922973633,
      "step": 51910
    },
    {
      "epoch": 0.00031683349609375,
      "step": 51910,
      "training_step_time": 0.45861220359802246
    },
    {
      "epoch": 0.000316839599609375,
      "model_forward_time": 0.11379885673522949,
      "step": 51911
    },
    {
      "epoch": 0.000316839599609375,
      "step": 51911,
      "training_step_time": 0.3656771183013916
    },
    {
      "epoch": 0.000316845703125,
      "model_forward_time": 0.11472392082214355,
      "step": 51912
    },
    {
      "epoch": 0.000316845703125,
      "step": 51912,
      "training_step_time": 0.44278717041015625
    },
    {
      "epoch": 0.000316851806640625,
      "model_forward_time": 0.11444711685180664,
      "step": 51913
    },
    {
      "epoch": 0.000316851806640625,
      "step": 51913,
      "training_step_time": 0.3966844081878662
    },
    {
      "epoch": 0.00031685791015625,
      "model_forward_time": 0.11464881896972656,
      "step": 51914
    },
    {
      "epoch": 0.00031685791015625,
      "step": 51914,
      "training_step_time": 0.39058613777160645
    },
    {
      "epoch": 0.000316864013671875,
      "model_forward_time": 0.11471915245056152,
      "step": 51915
    },
    {
      "epoch": 0.000316864013671875,
      "step": 51915,
      "training_step_time": 0.7527022361755371
    },
    {
      "epoch": 0.0003168701171875,
      "model_forward_time": 0.1145777702331543,
      "step": 51916
    },
    {
      "epoch": 0.0003168701171875,
      "step": 51916,
      "training_step_time": 0.3798801898956299
    },
    {
      "epoch": 0.000316876220703125,
      "model_forward_time": 0.11544132232666016,
      "step": 51917
    },
    {
      "epoch": 0.000316876220703125,
      "step": 51917,
      "training_step_time": 0.3856961727142334
    },
    {
      "epoch": 0.00031688232421875,
      "model_forward_time": 0.11408758163452148,
      "step": 51918
    },
    {
      "epoch": 0.00031688232421875,
      "step": 51918,
      "training_step_time": 0.39522314071655273
    },
    {
      "epoch": 0.000316888427734375,
      "model_forward_time": 0.11496758460998535,
      "step": 51919
    },
    {
      "epoch": 0.000316888427734375,
      "step": 51919,
      "training_step_time": 0.3896181583404541
    },
    {
      "epoch": 0.00031689453125,
      "grad_norm": 0.09445817768573761,
      "learning_rate": 4.8766694719396875e-06,
      "loss": 0.0339,
      "step": 51920
    },
    {
      "epoch": 0.00031689453125,
      "model_forward_time": 0.11579108238220215,
      "step": 51920
    },
    {
      "epoch": 0.00031689453125,
      "step": 51920,
      "training_step_time": 0.4770493507385254
    },
    {
      "epoch": 0.000316900634765625,
      "model_forward_time": 0.11478495597839355,
      "step": 51921
    },
    {
      "epoch": 0.000316900634765625,
      "step": 51921,
      "training_step_time": 0.5335896015167236
    },
    {
      "epoch": 0.00031690673828125,
      "model_forward_time": 0.11417841911315918,
      "step": 51922
    },
    {
      "epoch": 0.00031690673828125,
      "step": 51922,
      "training_step_time": 0.46039676666259766
    },
    {
      "epoch": 0.000316912841796875,
      "model_forward_time": 0.11493420600891113,
      "step": 51923
    },
    {
      "epoch": 0.000316912841796875,
      "step": 51923,
      "training_step_time": 0.4064798355102539
    },
    {
      "epoch": 0.0003169189453125,
      "model_forward_time": 0.11485671997070312,
      "step": 51924
    },
    {
      "epoch": 0.0003169189453125,
      "step": 51924,
      "training_step_time": 0.3951706886291504
    },
    {
      "epoch": 0.000316925048828125,
      "model_forward_time": 0.11498665809631348,
      "step": 51925
    },
    {
      "epoch": 0.000316925048828125,
      "step": 51925,
      "training_step_time": 0.40528368949890137
    },
    {
      "epoch": 0.00031693115234375,
      "model_forward_time": 0.11454486846923828,
      "step": 51926
    },
    {
      "epoch": 0.00031693115234375,
      "step": 51926,
      "training_step_time": 0.3855476379394531
    },
    {
      "epoch": 0.000316937255859375,
      "model_forward_time": 0.11504316329956055,
      "step": 51927
    },
    {
      "epoch": 0.000316937255859375,
      "step": 51927,
      "training_step_time": 0.405900239944458
    },
    {
      "epoch": 0.000316943359375,
      "model_forward_time": 0.11507248878479004,
      "step": 51928
    },
    {
      "epoch": 0.000316943359375,
      "step": 51928,
      "training_step_time": 0.39857006072998047
    },
    {
      "epoch": 0.000316949462890625,
      "model_forward_time": 0.11538577079772949,
      "step": 51929
    },
    {
      "epoch": 0.000316949462890625,
      "step": 51929,
      "training_step_time": 0.39084625244140625
    },
    {
      "epoch": 0.00031695556640625,
      "grad_norm": 0.12169447541236877,
      "learning_rate": 4.864805520344051e-06,
      "loss": 0.0379,
      "step": 51930
    },
    {
      "epoch": 0.00031695556640625,
      "model_forward_time": 0.1155097484588623,
      "step": 51930
    },
    {
      "epoch": 0.00031695556640625,
      "step": 51930,
      "training_step_time": 0.39669203758239746
    },
    {
      "epoch": 0.000316961669921875,
      "model_forward_time": 0.11511063575744629,
      "step": 51931
    },
    {
      "epoch": 0.000316961669921875,
      "step": 51931,
      "training_step_time": 0.395418643951416
    },
    {
      "epoch": 0.0003169677734375,
      "model_forward_time": 0.11604142189025879,
      "step": 51932
    },
    {
      "epoch": 0.0003169677734375,
      "step": 51932,
      "training_step_time": 0.3930017948150635
    },
    {
      "epoch": 0.000316973876953125,
      "model_forward_time": 0.11526346206665039,
      "step": 51933
    },
    {
      "epoch": 0.000316973876953125,
      "step": 51933,
      "training_step_time": 0.6743168830871582
    },
    {
      "epoch": 0.00031697998046875,
      "model_forward_time": 0.11443567276000977,
      "step": 51934
    },
    {
      "epoch": 0.00031697998046875,
      "step": 51934,
      "training_step_time": 0.42153286933898926
    },
    {
      "epoch": 0.000316986083984375,
      "model_forward_time": 0.11444592475891113,
      "step": 51935
    },
    {
      "epoch": 0.000316986083984375,
      "step": 51935,
      "training_step_time": 0.4012877941131592
    },
    {
      "epoch": 0.0003169921875,
      "model_forward_time": 0.1152186393737793,
      "step": 51936
    },
    {
      "epoch": 0.0003169921875,
      "step": 51936,
      "training_step_time": 0.4196956157684326
    },
    {
      "epoch": 0.000316998291015625,
      "model_forward_time": 0.11551928520202637,
      "step": 51937
    },
    {
      "epoch": 0.000316998291015625,
      "step": 51937,
      "training_step_time": 0.413912296295166
    },
    {
      "epoch": 0.00031700439453125,
      "model_forward_time": 0.1150059700012207,
      "step": 51938
    },
    {
      "epoch": 0.00031700439453125,
      "step": 51938,
      "training_step_time": 0.3923201560974121
    },
    {
      "epoch": 0.000317010498046875,
      "model_forward_time": 0.11468935012817383,
      "step": 51939
    },
    {
      "epoch": 0.000317010498046875,
      "step": 51939,
      "training_step_time": 0.39901208877563477
    },
    {
      "epoch": 0.0003170166015625,
      "grad_norm": 0.06611355394124985,
      "learning_rate": 4.852955279628768e-06,
      "loss": 0.0343,
      "step": 51940
    },
    {
      "epoch": 0.0003170166015625,
      "model_forward_time": 0.11507582664489746,
      "step": 51940
    },
    {
      "epoch": 0.0003170166015625,
      "step": 51940,
      "training_step_time": 0.4288477897644043
    },
    {
      "epoch": 0.000317022705078125,
      "model_forward_time": 0.11462950706481934,
      "step": 51941
    },
    {
      "epoch": 0.000317022705078125,
      "step": 51941,
      "training_step_time": 0.4048731327056885
    },
    {
      "epoch": 0.00031702880859375,
      "model_forward_time": 0.11466670036315918,
      "step": 51942
    },
    {
      "epoch": 0.00031702880859375,
      "step": 51942,
      "training_step_time": 0.3960590362548828
    },
    {
      "epoch": 0.000317034912109375,
      "model_forward_time": 0.11507844924926758,
      "step": 51943
    },
    {
      "epoch": 0.000317034912109375,
      "step": 51943,
      "training_step_time": 0.3891775608062744
    },
    {
      "epoch": 0.000317041015625,
      "model_forward_time": 0.11464643478393555,
      "step": 51944
    },
    {
      "epoch": 0.000317041015625,
      "step": 51944,
      "training_step_time": 0.38565754890441895
    },
    {
      "epoch": 0.000317047119140625,
      "model_forward_time": 0.1164240837097168,
      "step": 51945
    },
    {
      "epoch": 0.000317047119140625,
      "step": 51945,
      "training_step_time": 0.5319738388061523
    },
    {
      "epoch": 0.00031705322265625,
      "model_forward_time": 0.11502480506896973,
      "step": 51946
    },
    {
      "epoch": 0.00031705322265625,
      "step": 51946,
      "training_step_time": 0.40070343017578125
    },
    {
      "epoch": 0.000317059326171875,
      "model_forward_time": 0.11494040489196777,
      "step": 51947
    },
    {
      "epoch": 0.000317059326171875,
      "step": 51947,
      "training_step_time": 0.4012017250061035
    },
    {
      "epoch": 0.0003170654296875,
      "model_forward_time": 0.11542367935180664,
      "step": 51948
    },
    {
      "epoch": 0.0003170654296875,
      "step": 51948,
      "training_step_time": 0.45993518829345703
    },
    {
      "epoch": 0.000317071533203125,
      "model_forward_time": 0.11541604995727539,
      "step": 51949
    },
    {
      "epoch": 0.000317071533203125,
      "step": 51949,
      "training_step_time": 0.46379899978637695
    },
    {
      "epoch": 0.00031707763671875,
      "grad_norm": 0.10968858748674393,
      "learning_rate": 4.8411187533936195e-06,
      "loss": 0.0406,
      "step": 51950
    },
    {
      "epoch": 0.00031707763671875,
      "model_forward_time": 0.11465001106262207,
      "step": 51950
    },
    {
      "epoch": 0.00031707763671875,
      "step": 51950,
      "training_step_time": 0.42659664154052734
    },
    {
      "epoch": 0.000317083740234375,
      "model_forward_time": 0.11513590812683105,
      "step": 51951
    },
    {
      "epoch": 0.000317083740234375,
      "step": 51951,
      "training_step_time": 0.4794166088104248
    },
    {
      "epoch": 0.00031708984375,
      "model_forward_time": 0.11519336700439453,
      "step": 51952
    },
    {
      "epoch": 0.00031708984375,
      "step": 51952,
      "training_step_time": 0.3803293704986572
    },
    {
      "epoch": 0.000317095947265625,
      "model_forward_time": 0.11521244049072266,
      "step": 51953
    },
    {
      "epoch": 0.000317095947265625,
      "step": 51953,
      "training_step_time": 0.3611791133880615
    },
    {
      "epoch": 0.00031710205078125,
      "model_forward_time": 0.11480569839477539,
      "step": 51954
    },
    {
      "epoch": 0.00031710205078125,
      "step": 51954,
      "training_step_time": 0.44501662254333496
    },
    {
      "epoch": 0.000317108154296875,
      "model_forward_time": 0.11452937126159668,
      "step": 51955
    },
    {
      "epoch": 0.000317108154296875,
      "step": 51955,
      "training_step_time": 0.40064263343811035
    },
    {
      "epoch": 0.0003171142578125,
      "model_forward_time": 0.11530375480651855,
      "step": 51956
    },
    {
      "epoch": 0.0003171142578125,
      "step": 51956,
      "training_step_time": 0.382845401763916
    },
    {
      "epoch": 0.000317120361328125,
      "model_forward_time": 0.11615562438964844,
      "step": 51957
    },
    {
      "epoch": 0.000317120361328125,
      "step": 51957,
      "training_step_time": 0.38974881172180176
    },
    {
      "epoch": 0.00031712646484375,
      "model_forward_time": 0.11551332473754883,
      "step": 51958
    },
    {
      "epoch": 0.00031712646484375,
      "step": 51958,
      "training_step_time": 0.4183330535888672
    },
    {
      "epoch": 0.000317132568359375,
      "model_forward_time": 0.11458253860473633,
      "step": 51959
    },
    {
      "epoch": 0.000317132568359375,
      "step": 51959,
      "training_step_time": 0.38811540603637695
    },
    {
      "epoch": 0.000317138671875,
      "grad_norm": 0.08091744780540466,
      "learning_rate": 4.829295945234258e-06,
      "loss": 0.0384,
      "step": 51960
    },
    {
      "epoch": 0.000317138671875,
      "model_forward_time": 0.11559581756591797,
      "step": 51960
    },
    {
      "epoch": 0.000317138671875,
      "step": 51960,
      "training_step_time": 0.411724328994751
    },
    {
      "epoch": 0.000317144775390625,
      "model_forward_time": 0.1153404712677002,
      "step": 51961
    },
    {
      "epoch": 0.000317144775390625,
      "step": 51961,
      "training_step_time": 0.4298744201660156
    },
    {
      "epoch": 0.00031715087890625,
      "model_forward_time": 0.11573266983032227,
      "step": 51962
    },
    {
      "epoch": 0.00031715087890625,
      "step": 51962,
      "training_step_time": 0.41855859756469727
    },
    {
      "epoch": 0.000317156982421875,
      "model_forward_time": 0.11493563652038574,
      "step": 51963
    },
    {
      "epoch": 0.000317156982421875,
      "step": 51963,
      "training_step_time": 0.439298152923584
    },
    {
      "epoch": 0.0003171630859375,
      "model_forward_time": 0.11510682106018066,
      "step": 51964
    },
    {
      "epoch": 0.0003171630859375,
      "step": 51964,
      "training_step_time": 0.4485335350036621
    },
    {
      "epoch": 0.000317169189453125,
      "model_forward_time": 0.11485075950622559,
      "step": 51965
    },
    {
      "epoch": 0.000317169189453125,
      "step": 51965,
      "training_step_time": 0.416748046875
    },
    {
      "epoch": 0.00031717529296875,
      "model_forward_time": 0.11468648910522461,
      "step": 51966
    },
    {
      "epoch": 0.00031717529296875,
      "step": 51966,
      "training_step_time": 0.48674464225769043
    },
    {
      "epoch": 0.000317181396484375,
      "model_forward_time": 0.11588335037231445,
      "step": 51967
    },
    {
      "epoch": 0.000317181396484375,
      "step": 51967,
      "training_step_time": 0.41172361373901367
    },
    {
      "epoch": 0.0003171875,
      "model_forward_time": 0.11493563652038574,
      "step": 51968
    },
    {
      "epoch": 0.0003171875,
      "step": 51968,
      "training_step_time": 0.4626014232635498
    },
    {
      "epoch": 0.000317193603515625,
      "model_forward_time": 0.11503148078918457,
      "step": 51969
    },
    {
      "epoch": 0.000317193603515625,
      "step": 51969,
      "training_step_time": 0.48131513595581055
    },
    {
      "epoch": 0.00031719970703125,
      "grad_norm": 0.09168606251478195,
      "learning_rate": 4.817486858742127e-06,
      "loss": 0.0396,
      "step": 51970
    },
    {
      "epoch": 0.00031719970703125,
      "model_forward_time": 0.11507606506347656,
      "step": 51970
    },
    {
      "epoch": 0.00031719970703125,
      "step": 51970,
      "training_step_time": 0.38600897789001465
    },
    {
      "epoch": 0.000317205810546875,
      "model_forward_time": 0.11508584022521973,
      "step": 51971
    },
    {
      "epoch": 0.000317205810546875,
      "step": 51971,
      "training_step_time": 0.39278101921081543
    },
    {
      "epoch": 0.0003172119140625,
      "model_forward_time": 0.11570024490356445,
      "step": 51972
    },
    {
      "epoch": 0.0003172119140625,
      "step": 51972,
      "training_step_time": 0.39975571632385254
    },
    {
      "epoch": 0.000317218017578125,
      "model_forward_time": 0.1149148941040039,
      "step": 51973
    },
    {
      "epoch": 0.000317218017578125,
      "step": 51973,
      "training_step_time": 0.3907036781311035
    },
    {
      "epoch": 0.00031722412109375,
      "model_forward_time": 0.11533069610595703,
      "step": 51974
    },
    {
      "epoch": 0.00031722412109375,
      "step": 51974,
      "training_step_time": 0.39621400833129883
    },
    {
      "epoch": 0.000317230224609375,
      "model_forward_time": 0.11554765701293945,
      "step": 51975
    },
    {
      "epoch": 0.000317230224609375,
      "step": 51975,
      "training_step_time": 0.5101258754730225
    },
    {
      "epoch": 0.000317236328125,
      "model_forward_time": 0.11527538299560547,
      "step": 51976
    },
    {
      "epoch": 0.000317236328125,
      "step": 51976,
      "training_step_time": 0.4490318298339844
    },
    {
      "epoch": 0.000317242431640625,
      "model_forward_time": 0.11542844772338867,
      "step": 51977
    },
    {
      "epoch": 0.000317242431640625,
      "step": 51977,
      "training_step_time": 0.5109364986419678
    },
    {
      "epoch": 0.00031724853515625,
      "model_forward_time": 0.11488199234008789,
      "step": 51978
    },
    {
      "epoch": 0.00031724853515625,
      "step": 51978,
      "training_step_time": 0.4307265281677246
    },
    {
      "epoch": 0.000317254638671875,
      "model_forward_time": 0.11513876914978027,
      "step": 51979
    },
    {
      "epoch": 0.000317254638671875,
      "step": 51979,
      "training_step_time": 0.4126894474029541
    },
    {
      "epoch": 0.0003172607421875,
      "grad_norm": 0.07070491462945938,
      "learning_rate": 4.805691497504505e-06,
      "loss": 0.0366,
      "step": 51980
    },
    {
      "epoch": 0.0003172607421875,
      "model_forward_time": 0.11512589454650879,
      "step": 51980
    },
    {
      "epoch": 0.0003172607421875,
      "step": 51980,
      "training_step_time": 0.47887182235717773
    },
    {
      "epoch": 0.000317266845703125,
      "model_forward_time": 0.1150217056274414,
      "step": 51981
    },
    {
      "epoch": 0.000317266845703125,
      "step": 51981,
      "training_step_time": 0.44059205055236816
    },
    {
      "epoch": 0.00031727294921875,
      "model_forward_time": 0.11530327796936035,
      "step": 51982
    },
    {
      "epoch": 0.00031727294921875,
      "step": 51982,
      "training_step_time": 0.4486660957336426
    },
    {
      "epoch": 0.000317279052734375,
      "model_forward_time": 0.11485123634338379,
      "step": 51983
    },
    {
      "epoch": 0.000317279052734375,
      "step": 51983,
      "training_step_time": 0.4906346797943115
    },
    {
      "epoch": 0.00031728515625,
      "model_forward_time": 0.11568164825439453,
      "step": 51984
    },
    {
      "epoch": 0.00031728515625,
      "step": 51984,
      "training_step_time": 0.38509225845336914
    },
    {
      "epoch": 0.000317291259765625,
      "model_forward_time": 0.11464500427246094,
      "step": 51985
    },
    {
      "epoch": 0.000317291259765625,
      "step": 51985,
      "training_step_time": 0.3932216167449951
    },
    {
      "epoch": 0.00031729736328125,
      "model_forward_time": 0.11407637596130371,
      "step": 51986
    },
    {
      "epoch": 0.00031729736328125,
      "step": 51986,
      "training_step_time": 0.3929274082183838
    },
    {
      "epoch": 0.000317303466796875,
      "model_forward_time": 0.11571526527404785,
      "step": 51987
    },
    {
      "epoch": 0.000317303466796875,
      "step": 51987,
      "training_step_time": 0.396636962890625
    },
    {
      "epoch": 0.0003173095703125,
      "model_forward_time": 0.114898681640625,
      "step": 51988
    },
    {
      "epoch": 0.0003173095703125,
      "step": 51988,
      "training_step_time": 0.40262484550476074
    },
    {
      "epoch": 0.000317315673828125,
      "model_forward_time": 0.11484003067016602,
      "step": 51989
    },
    {
      "epoch": 0.000317315673828125,
      "step": 51989,
      "training_step_time": 0.402132511138916
    },
    {
      "epoch": 0.00031732177734375,
      "grad_norm": 0.07300786674022675,
      "learning_rate": 4.7939098651045235e-06,
      "loss": 0.0363,
      "step": 51990
    },
    {
      "epoch": 0.00031732177734375,
      "model_forward_time": 0.11513829231262207,
      "step": 51990
    },
    {
      "epoch": 0.00031732177734375,
      "step": 51990,
      "training_step_time": 0.39975976943969727
    },
    {
      "epoch": 0.000317327880859375,
      "model_forward_time": 0.11484265327453613,
      "step": 51991
    },
    {
      "epoch": 0.000317327880859375,
      "step": 51991,
      "training_step_time": 0.39277005195617676
    },
    {
      "epoch": 0.000317333984375,
      "model_forward_time": 0.11550617218017578,
      "step": 51992
    },
    {
      "epoch": 0.000317333984375,
      "step": 51992,
      "training_step_time": 0.42826151847839355
    },
    {
      "epoch": 0.000317340087890625,
      "model_forward_time": 0.11484026908874512,
      "step": 51993
    },
    {
      "epoch": 0.000317340087890625,
      "step": 51993,
      "training_step_time": 0.39720916748046875
    },
    {
      "epoch": 0.00031734619140625,
      "model_forward_time": 0.11631321907043457,
      "step": 51994
    },
    {
      "epoch": 0.00031734619140625,
      "step": 51994,
      "training_step_time": 0.499896764755249
    },
    {
      "epoch": 0.000317352294921875,
      "model_forward_time": 0.11514973640441895,
      "step": 51995
    },
    {
      "epoch": 0.000317352294921875,
      "step": 51995,
      "training_step_time": 0.4086935520172119
    },
    {
      "epoch": 0.0003173583984375,
      "model_forward_time": 0.11582231521606445,
      "step": 51996
    },
    {
      "epoch": 0.0003173583984375,
      "step": 51996,
      "training_step_time": 0.3684117794036865
    },
    {
      "epoch": 0.000317364501953125,
      "model_forward_time": 0.11799216270446777,
      "step": 51997
    },
    {
      "epoch": 0.000317364501953125,
      "step": 51997,
      "training_step_time": 0.45563340187072754
    },
    {
      "epoch": 0.00031737060546875,
      "model_forward_time": 0.11512589454650879,
      "step": 51998
    },
    {
      "epoch": 0.00031737060546875,
      "step": 51998,
      "training_step_time": 0.41696929931640625
    },
    {
      "epoch": 0.000317376708984375,
      "model_forward_time": 0.11539793014526367,
      "step": 51999
    },
    {
      "epoch": 0.000317376708984375,
      "step": 51999,
      "training_step_time": 0.39877939224243164
    },
    {
      "epoch": 0.0003173828125,
      "grad_norm": 0.11421408504247665,
      "learning_rate": 4.782141965121128e-06,
      "loss": 0.0373,
      "step": 52000
    },
    {
      "epoch": 0.0003173828125,
      "model_forward_time": 0.11227679252624512,
      "step": 52000
    },
    {
      "epoch": 0.0003173828125,
      "step": 52000,
      "training_step_time": 0.3527095317840576
    },
    {
      "epoch": 0.000317388916015625,
      "model_forward_time": 0.11255049705505371,
      "step": 52001
    },
    {
      "epoch": 0.000317388916015625,
      "step": 52001,
      "training_step_time": 0.3747560977935791
    },
    {
      "epoch": 0.00031739501953125,
      "model_forward_time": 0.11305093765258789,
      "step": 52002
    },
    {
      "epoch": 0.00031739501953125,
      "step": 52002,
      "training_step_time": 0.38416433334350586
    },
    {
      "epoch": 0.000317401123046875,
      "model_forward_time": 0.11345887184143066,
      "step": 52003
    },
    {
      "epoch": 0.000317401123046875,
      "step": 52003,
      "training_step_time": 0.3802475929260254
    },
    {
      "epoch": 0.0003174072265625,
      "model_forward_time": 0.11412167549133301,
      "step": 52004
    },
    {
      "epoch": 0.0003174072265625,
      "step": 52004,
      "training_step_time": 0.406749963760376
    },
    {
      "epoch": 0.000317413330078125,
      "model_forward_time": 0.1139376163482666,
      "step": 52005
    },
    {
      "epoch": 0.000317413330078125,
      "step": 52005,
      "training_step_time": 0.39983630180358887
    },
    {
      "epoch": 0.00031741943359375,
      "model_forward_time": 0.11486959457397461,
      "step": 52006
    },
    {
      "epoch": 0.00031741943359375,
      "step": 52006,
      "training_step_time": 0.3937036991119385
    },
    {
      "epoch": 0.000317425537109375,
      "model_forward_time": 0.11463379859924316,
      "step": 52007
    },
    {
      "epoch": 0.000317425537109375,
      "step": 52007,
      "training_step_time": 0.44012451171875
    },
    {
      "epoch": 0.000317431640625,
      "model_forward_time": 0.11528563499450684,
      "step": 52008
    },
    {
      "epoch": 0.000317431640625,
      "step": 52008,
      "training_step_time": 0.40958404541015625
    },
    {
      "epoch": 0.000317437744140625,
      "model_forward_time": 0.11532115936279297,
      "step": 52009
    },
    {
      "epoch": 0.000317437744140625,
      "step": 52009,
      "training_step_time": 0.4911525249481201
    },
    {
      "epoch": 0.00031744384765625,
      "grad_norm": 0.11683880537748337,
      "learning_rate": 4.770387801129084e-06,
      "loss": 0.0332,
      "step": 52010
    },
    {
      "epoch": 0.00031744384765625,
      "model_forward_time": 0.11518669128417969,
      "step": 52010
    },
    {
      "epoch": 0.00031744384765625,
      "step": 52010,
      "training_step_time": 0.3870673179626465
    },
    {
      "epoch": 0.000317449951171875,
      "model_forward_time": 0.11535120010375977,
      "step": 52011
    },
    {
      "epoch": 0.000317449951171875,
      "step": 52011,
      "training_step_time": 0.3660750389099121
    },
    {
      "epoch": 0.0003174560546875,
      "model_forward_time": 0.11512947082519531,
      "step": 52012
    },
    {
      "epoch": 0.0003174560546875,
      "step": 52012,
      "training_step_time": 0.4488980770111084
    },
    {
      "epoch": 0.000317462158203125,
      "model_forward_time": 0.11513614654541016,
      "step": 52013
    },
    {
      "epoch": 0.000317462158203125,
      "step": 52013,
      "training_step_time": 0.4069089889526367
    },
    {
      "epoch": 0.00031746826171875,
      "model_forward_time": 0.11482667922973633,
      "step": 52014
    },
    {
      "epoch": 0.00031746826171875,
      "step": 52014,
      "training_step_time": 0.40590596199035645
    },
    {
      "epoch": 0.000317474365234375,
      "model_forward_time": 0.11590361595153809,
      "step": 52015
    },
    {
      "epoch": 0.000317474365234375,
      "step": 52015,
      "training_step_time": 0.39858555793762207
    },
    {
      "epoch": 0.00031748046875,
      "model_forward_time": 0.11571955680847168,
      "step": 52016
    },
    {
      "epoch": 0.00031748046875,
      "step": 52016,
      "training_step_time": 0.39284729957580566
    },
    {
      "epoch": 0.000317486572265625,
      "model_forward_time": 0.11477136611938477,
      "step": 52017
    },
    {
      "epoch": 0.000317486572265625,
      "step": 52017,
      "training_step_time": 0.4077315330505371
    },
    {
      "epoch": 0.00031749267578125,
      "model_forward_time": 0.11542201042175293,
      "step": 52018
    },
    {
      "epoch": 0.00031749267578125,
      "step": 52018,
      "training_step_time": 0.39909887313842773
    },
    {
      "epoch": 0.000317498779296875,
      "model_forward_time": 0.11538100242614746,
      "step": 52019
    },
    {
      "epoch": 0.000317498779296875,
      "step": 52019,
      "training_step_time": 0.4326801300048828
    },
    {
      "epoch": 0.0003175048828125,
      "grad_norm": 0.10090762376785278,
      "learning_rate": 4.758647376699032e-06,
      "loss": 0.0413,
      "step": 52020
    },
    {
      "epoch": 0.0003175048828125,
      "model_forward_time": 0.1150217056274414,
      "step": 52020
    },
    {
      "epoch": 0.0003175048828125,
      "step": 52020,
      "training_step_time": 0.46315979957580566
    },
    {
      "epoch": 0.000317510986328125,
      "model_forward_time": 0.11508727073669434,
      "step": 52021
    },
    {
      "epoch": 0.000317510986328125,
      "step": 52021,
      "training_step_time": 0.42212343215942383
    },
    {
      "epoch": 0.00031751708984375,
      "model_forward_time": 0.11517453193664551,
      "step": 52022
    },
    {
      "epoch": 0.00031751708984375,
      "step": 52022,
      "training_step_time": 0.4556705951690674
    },
    {
      "epoch": 0.000317523193359375,
      "model_forward_time": 0.11525225639343262,
      "step": 52023
    },
    {
      "epoch": 0.000317523193359375,
      "step": 52023,
      "training_step_time": 0.4654359817504883
    },
    {
      "epoch": 0.000317529296875,
      "model_forward_time": 0.11519336700439453,
      "step": 52024
    },
    {
      "epoch": 0.000317529296875,
      "step": 52024,
      "training_step_time": 0.38780879974365234
    },
    {
      "epoch": 0.000317535400390625,
      "model_forward_time": 0.11482501029968262,
      "step": 52025
    },
    {
      "epoch": 0.000317535400390625,
      "step": 52025,
      "training_step_time": 0.39952850341796875
    },
    {
      "epoch": 0.00031754150390625,
      "model_forward_time": 0.1206045150756836,
      "step": 52026
    },
    {
      "epoch": 0.00031754150390625,
      "step": 52026,
      "training_step_time": 0.4097590446472168
    },
    {
      "epoch": 0.000317547607421875,
      "model_forward_time": 0.1150062084197998,
      "step": 52027
    },
    {
      "epoch": 0.000317547607421875,
      "step": 52027,
      "training_step_time": 0.4815638065338135
    },
    {
      "epoch": 0.0003175537109375,
      "model_forward_time": 0.11545681953430176,
      "step": 52028
    },
    {
      "epoch": 0.0003175537109375,
      "step": 52028,
      "training_step_time": 0.3981955051422119
    },
    {
      "epoch": 0.000317559814453125,
      "model_forward_time": 0.11484837532043457,
      "step": 52029
    },
    {
      "epoch": 0.000317559814453125,
      "step": 52029,
      "training_step_time": 0.3838217258453369
    },
    {
      "epoch": 0.00031756591796875,
      "grad_norm": 0.08260694891214371,
      "learning_rate": 4.7469206953973495e-06,
      "loss": 0.0359,
      "step": 52030
    },
    {
      "epoch": 0.00031756591796875,
      "model_forward_time": 0.11472582817077637,
      "step": 52030
    },
    {
      "epoch": 0.00031756591796875,
      "step": 52030,
      "training_step_time": 0.40179920196533203
    },
    {
      "epoch": 0.000317572021484375,
      "model_forward_time": 0.11439847946166992,
      "step": 52031
    },
    {
      "epoch": 0.000317572021484375,
      "step": 52031,
      "training_step_time": 0.39874935150146484
    },
    {
      "epoch": 0.000317578125,
      "model_forward_time": 0.11544418334960938,
      "step": 52032
    },
    {
      "epoch": 0.000317578125,
      "step": 52032,
      "training_step_time": 0.39291810989379883
    },
    {
      "epoch": 0.000317584228515625,
      "model_forward_time": 0.11475586891174316,
      "step": 52033
    },
    {
      "epoch": 0.000317584228515625,
      "step": 52033,
      "training_step_time": 0.39715576171875
    },
    {
      "epoch": 0.00031759033203125,
      "model_forward_time": 0.11547684669494629,
      "step": 52034
    },
    {
      "epoch": 0.00031759033203125,
      "step": 52034,
      "training_step_time": 0.4540679454803467
    },
    {
      "epoch": 0.000317596435546875,
      "model_forward_time": 0.11458659172058105,
      "step": 52035
    },
    {
      "epoch": 0.000317596435546875,
      "step": 52035,
      "training_step_time": 0.4513070583343506
    },
    {
      "epoch": 0.0003176025390625,
      "model_forward_time": 0.11542129516601562,
      "step": 52036
    },
    {
      "epoch": 0.0003176025390625,
      "step": 52036,
      "training_step_time": 0.5228126049041748
    },
    {
      "epoch": 0.000317608642578125,
      "model_forward_time": 0.11483573913574219,
      "step": 52037
    },
    {
      "epoch": 0.000317608642578125,
      "step": 52037,
      "training_step_time": 0.44315576553344727
    },
    {
      "epoch": 0.00031761474609375,
      "model_forward_time": 0.1152794361114502,
      "step": 52038
    },
    {
      "epoch": 0.00031761474609375,
      "step": 52038,
      "training_step_time": 0.39694833755493164
    },
    {
      "epoch": 0.000317620849609375,
      "model_forward_time": 0.11473226547241211,
      "step": 52039
    },
    {
      "epoch": 0.000317620849609375,
      "step": 52039,
      "training_step_time": 0.3963963985443115
    },
    {
      "epoch": 0.000317626953125,
      "grad_norm": 0.11880729347467422,
      "learning_rate": 4.7352077607863475e-06,
      "loss": 0.0359,
      "step": 52040
    },
    {
      "epoch": 0.000317626953125,
      "model_forward_time": 0.11450386047363281,
      "step": 52040
    },
    {
      "epoch": 0.000317626953125,
      "step": 52040,
      "training_step_time": 0.36696767807006836
    },
    {
      "epoch": 0.000317633056640625,
      "model_forward_time": 0.11440682411193848,
      "step": 52041
    },
    {
      "epoch": 0.000317633056640625,
      "step": 52041,
      "training_step_time": 0.44327235221862793
    },
    {
      "epoch": 0.00031763916015625,
      "model_forward_time": 0.11469197273254395,
      "step": 52042
    },
    {
      "epoch": 0.00031763916015625,
      "step": 52042,
      "training_step_time": 0.42993807792663574
    },
    {
      "epoch": 0.000317645263671875,
      "model_forward_time": 0.11466693878173828,
      "step": 52043
    },
    {
      "epoch": 0.000317645263671875,
      "step": 52043,
      "training_step_time": 0.3991200923919678
    },
    {
      "epoch": 0.0003176513671875,
      "model_forward_time": 0.11701679229736328,
      "step": 52044
    },
    {
      "epoch": 0.0003176513671875,
      "step": 52044,
      "training_step_time": 0.4027888774871826
    },
    {
      "epoch": 0.000317657470703125,
      "model_forward_time": 0.11568188667297363,
      "step": 52045
    },
    {
      "epoch": 0.000317657470703125,
      "step": 52045,
      "training_step_time": 0.39695310592651367
    },
    {
      "epoch": 0.00031766357421875,
      "model_forward_time": 0.11498332023620605,
      "step": 52046
    },
    {
      "epoch": 0.00031766357421875,
      "step": 52046,
      "training_step_time": 0.3960700035095215
    },
    {
      "epoch": 0.000317669677734375,
      "model_forward_time": 0.11522674560546875,
      "step": 52047
    },
    {
      "epoch": 0.000317669677734375,
      "step": 52047,
      "training_step_time": 0.39789915084838867
    },
    {
      "epoch": 0.00031767578125,
      "model_forward_time": 0.11549520492553711,
      "step": 52048
    },
    {
      "epoch": 0.00031767578125,
      "step": 52048,
      "training_step_time": 0.39002466201782227
    },
    {
      "epoch": 0.000317681884765625,
      "model_forward_time": 0.1155390739440918,
      "step": 52049
    },
    {
      "epoch": 0.000317681884765625,
      "step": 52049,
      "training_step_time": 0.4449462890625
    },
    {
      "epoch": 0.00031768798828125,
      "grad_norm": 0.07742038369178772,
      "learning_rate": 4.723508576424062e-06,
      "loss": 0.0393,
      "step": 52050
    },
    {
      "epoch": 0.00031768798828125,
      "model_forward_time": 0.11522531509399414,
      "step": 52050
    },
    {
      "epoch": 0.00031768798828125,
      "step": 52050,
      "training_step_time": 0.4772038459777832
    },
    {
      "epoch": 0.000317694091796875,
      "model_forward_time": 0.11497616767883301,
      "step": 52051
    },
    {
      "epoch": 0.000317694091796875,
      "step": 52051,
      "training_step_time": 0.43283724784851074
    },
    {
      "epoch": 0.0003177001953125,
      "model_forward_time": 0.11605429649353027,
      "step": 52052
    },
    {
      "epoch": 0.0003177001953125,
      "step": 52052,
      "training_step_time": 0.483654260635376
    },
    {
      "epoch": 0.000317706298828125,
      "model_forward_time": 0.11472296714782715,
      "step": 52053
    },
    {
      "epoch": 0.000317706298828125,
      "step": 52053,
      "training_step_time": 0.38782715797424316
    },
    {
      "epoch": 0.00031771240234375,
      "model_forward_time": 0.11518478393554688,
      "step": 52054
    },
    {
      "epoch": 0.00031771240234375,
      "step": 52054,
      "training_step_time": 0.3978404998779297
    },
    {
      "epoch": 0.000317718505859375,
      "model_forward_time": 0.1145012378692627,
      "step": 52055
    },
    {
      "epoch": 0.000317718505859375,
      "step": 52055,
      "training_step_time": 0.38446784019470215
    },
    {
      "epoch": 0.000317724609375,
      "model_forward_time": 0.11537981033325195,
      "step": 52056
    },
    {
      "epoch": 0.000317724609375,
      "step": 52056,
      "training_step_time": 0.4874293804168701
    },
    {
      "epoch": 0.000317730712890625,
      "model_forward_time": 0.11483001708984375,
      "step": 52057
    },
    {
      "epoch": 0.000317730712890625,
      "step": 52057,
      "training_step_time": 0.4218862056732178
    },
    {
      "epoch": 0.00031773681640625,
      "model_forward_time": 0.11892461776733398,
      "step": 52058
    },
    {
      "epoch": 0.00031773681640625,
      "step": 52058,
      "training_step_time": 0.3957679271697998
    },
    {
      "epoch": 0.000317742919921875,
      "model_forward_time": 0.1186227798461914,
      "step": 52059
    },
    {
      "epoch": 0.000317742919921875,
      "step": 52059,
      "training_step_time": 0.5577704906463623
    },
    {
      "epoch": 0.0003177490234375,
      "grad_norm": 0.08091342449188232,
      "learning_rate": 4.711823145864419e-06,
      "loss": 0.031,
      "step": 52060
    },
    {
      "epoch": 0.0003177490234375,
      "model_forward_time": 0.11454939842224121,
      "step": 52060
    },
    {
      "epoch": 0.0003177490234375,
      "step": 52060,
      "training_step_time": 0.3898622989654541
    },
    {
      "epoch": 0.000317755126953125,
      "model_forward_time": 0.11698412895202637,
      "step": 52061
    },
    {
      "epoch": 0.000317755126953125,
      "step": 52061,
      "training_step_time": 0.42993736267089844
    },
    {
      "epoch": 0.00031776123046875,
      "model_forward_time": 0.11545467376708984,
      "step": 52062
    },
    {
      "epoch": 0.00031776123046875,
      "step": 52062,
      "training_step_time": 0.408372163772583
    },
    {
      "epoch": 0.000317767333984375,
      "model_forward_time": 0.11419200897216797,
      "step": 52063
    },
    {
      "epoch": 0.000317767333984375,
      "step": 52063,
      "training_step_time": 0.4443342685699463
    },
    {
      "epoch": 0.0003177734375,
      "model_forward_time": 0.11535239219665527,
      "step": 52064
    },
    {
      "epoch": 0.0003177734375,
      "step": 52064,
      "training_step_time": 0.48739147186279297
    },
    {
      "epoch": 0.000317779541015625,
      "model_forward_time": 0.11506152153015137,
      "step": 52065
    },
    {
      "epoch": 0.000317779541015625,
      "step": 52065,
      "training_step_time": 0.43770313262939453
    },
    {
      "epoch": 0.00031778564453125,
      "model_forward_time": 0.1148068904876709,
      "step": 52066
    },
    {
      "epoch": 0.00031778564453125,
      "step": 52066,
      "training_step_time": 0.4388105869293213
    },
    {
      "epoch": 0.000317791748046875,
      "model_forward_time": 0.11492300033569336,
      "step": 52067
    },
    {
      "epoch": 0.000317791748046875,
      "step": 52067,
      "training_step_time": 0.3866586685180664
    },
    {
      "epoch": 0.0003177978515625,
      "model_forward_time": 0.11506772041320801,
      "step": 52068
    },
    {
      "epoch": 0.0003177978515625,
      "step": 52068,
      "training_step_time": 0.39211535453796387
    },
    {
      "epoch": 0.000317803955078125,
      "model_forward_time": 0.1157996654510498,
      "step": 52069
    },
    {
      "epoch": 0.000317803955078125,
      "step": 52069,
      "training_step_time": 0.41440486907958984
    },
    {
      "epoch": 0.00031781005859375,
      "grad_norm": 0.09741143882274628,
      "learning_rate": 4.700151472657144e-06,
      "loss": 0.038,
      "step": 52070
    },
    {
      "epoch": 0.00031781005859375,
      "model_forward_time": 0.11545228958129883,
      "step": 52070
    },
    {
      "epoch": 0.00031781005859375,
      "step": 52070,
      "training_step_time": 0.392899751663208
    },
    {
      "epoch": 0.000317816162109375,
      "model_forward_time": 0.11522030830383301,
      "step": 52071
    },
    {
      "epoch": 0.000317816162109375,
      "step": 52071,
      "training_step_time": 0.7172126770019531
    },
    {
      "epoch": 0.000317822265625,
      "model_forward_time": 0.11489462852478027,
      "step": 52072
    },
    {
      "epoch": 0.000317822265625,
      "step": 52072,
      "training_step_time": 0.38788342475891113
    },
    {
      "epoch": 0.000317828369140625,
      "model_forward_time": 0.11452245712280273,
      "step": 52073
    },
    {
      "epoch": 0.000317828369140625,
      "step": 52073,
      "training_step_time": 0.3918004035949707
    },
    {
      "epoch": 0.00031783447265625,
      "model_forward_time": 0.11439990997314453,
      "step": 52074
    },
    {
      "epoch": 0.00031783447265625,
      "step": 52074,
      "training_step_time": 0.3716287612915039
    },
    {
      "epoch": 0.000317840576171875,
      "model_forward_time": 0.11541867256164551,
      "step": 52075
    },
    {
      "epoch": 0.000317840576171875,
      "step": 52075,
      "training_step_time": 0.41282200813293457
    },
    {
      "epoch": 0.0003178466796875,
      "model_forward_time": 0.11493921279907227,
      "step": 52076
    },
    {
      "epoch": 0.0003178466796875,
      "step": 52076,
      "training_step_time": 0.4616415500640869
    },
    {
      "epoch": 0.000317852783203125,
      "model_forward_time": 0.11458468437194824,
      "step": 52077
    },
    {
      "epoch": 0.000317852783203125,
      "step": 52077,
      "training_step_time": 0.8600587844848633
    },
    {
      "epoch": 0.00031785888671875,
      "model_forward_time": 0.1146392822265625,
      "step": 52078
    },
    {
      "epoch": 0.00031785888671875,
      "step": 52078,
      "training_step_time": 0.41628313064575195
    },
    {
      "epoch": 0.000317864990234375,
      "model_forward_time": 0.11514544486999512,
      "step": 52079
    },
    {
      "epoch": 0.000317864990234375,
      "step": 52079,
      "training_step_time": 0.4192073345184326
    },
    {
      "epoch": 0.00031787109375,
      "grad_norm": 0.14940854907035828,
      "learning_rate": 4.688493560347773e-06,
      "loss": 0.0354,
      "step": 52080
    },
    {
      "epoch": 0.00031787109375,
      "model_forward_time": 0.1148521900177002,
      "step": 52080
    },
    {
      "epoch": 0.00031787109375,
      "step": 52080,
      "training_step_time": 0.38782262802124023
    },
    {
      "epoch": 0.000317877197265625,
      "model_forward_time": 0.11477398872375488,
      "step": 52081
    },
    {
      "epoch": 0.000317877197265625,
      "step": 52081,
      "training_step_time": 0.38607096672058105
    },
    {
      "epoch": 0.00031788330078125,
      "model_forward_time": 0.11490845680236816,
      "step": 52082
    },
    {
      "epoch": 0.00031788330078125,
      "step": 52082,
      "training_step_time": 0.42781662940979004
    },
    {
      "epoch": 0.000317889404296875,
      "model_forward_time": 0.11501932144165039,
      "step": 52083
    },
    {
      "epoch": 0.000317889404296875,
      "step": 52083,
      "training_step_time": 0.48360157012939453
    },
    {
      "epoch": 0.0003178955078125,
      "model_forward_time": 0.11572742462158203,
      "step": 52084
    },
    {
      "epoch": 0.0003178955078125,
      "step": 52084,
      "training_step_time": 0.40984582901000977
    },
    {
      "epoch": 0.000317901611328125,
      "model_forward_time": 0.11500716209411621,
      "step": 52085
    },
    {
      "epoch": 0.000317901611328125,
      "step": 52085,
      "training_step_time": 0.40062570571899414
    },
    {
      "epoch": 0.00031790771484375,
      "model_forward_time": 0.1163642406463623,
      "step": 52086
    },
    {
      "epoch": 0.00031790771484375,
      "step": 52086,
      "training_step_time": 0.40144848823547363
    },
    {
      "epoch": 0.000317913818359375,
      "model_forward_time": 0.11454248428344727,
      "step": 52087
    },
    {
      "epoch": 0.000317913818359375,
      "step": 52087,
      "training_step_time": 0.4217345714569092
    },
    {
      "epoch": 0.000317919921875,
      "model_forward_time": 0.11646819114685059,
      "step": 52088
    },
    {
      "epoch": 0.000317919921875,
      "step": 52088,
      "training_step_time": 0.41153454780578613
    },
    {
      "epoch": 0.000317926025390625,
      "model_forward_time": 0.1158914566040039,
      "step": 52089
    },
    {
      "epoch": 0.000317926025390625,
      "step": 52089,
      "training_step_time": 0.5235462188720703
    },
    {
      "epoch": 0.00031793212890625,
      "grad_norm": 0.07890652120113373,
      "learning_rate": 4.67684941247768e-06,
      "loss": 0.0329,
      "step": 52090
    },
    {
      "epoch": 0.00031793212890625,
      "model_forward_time": 0.11548757553100586,
      "step": 52090
    },
    {
      "epoch": 0.00031793212890625,
      "step": 52090,
      "training_step_time": 0.383697509765625
    },
    {
      "epoch": 0.000317938232421875,
      "model_forward_time": 0.11522150039672852,
      "step": 52091
    },
    {
      "epoch": 0.000317938232421875,
      "step": 52091,
      "training_step_time": 0.4870481491088867
    },
    {
      "epoch": 0.0003179443359375,
      "model_forward_time": 0.11479496955871582,
      "step": 52092
    },
    {
      "epoch": 0.0003179443359375,
      "step": 52092,
      "training_step_time": 0.5078887939453125
    },
    {
      "epoch": 0.000317950439453125,
      "model_forward_time": 0.11474132537841797,
      "step": 52093
    },
    {
      "epoch": 0.000317950439453125,
      "step": 52093,
      "training_step_time": 0.4162256717681885
    },
    {
      "epoch": 0.00031795654296875,
      "model_forward_time": 0.11475586891174316,
      "step": 52094
    },
    {
      "epoch": 0.00031795654296875,
      "step": 52094,
      "training_step_time": 0.4005284309387207
    },
    {
      "epoch": 0.000317962646484375,
      "model_forward_time": 0.1144723892211914,
      "step": 52095
    },
    {
      "epoch": 0.000317962646484375,
      "step": 52095,
      "training_step_time": 0.5140318870544434
    },
    {
      "epoch": 0.00031796875,
      "model_forward_time": 0.11715817451477051,
      "step": 52096
    },
    {
      "epoch": 0.00031796875,
      "step": 52096,
      "training_step_time": 0.40157365798950195
    },
    {
      "epoch": 0.000317974853515625,
      "model_forward_time": 0.11850547790527344,
      "step": 52097
    },
    {
      "epoch": 0.000317974853515625,
      "step": 52097,
      "training_step_time": 0.389514684677124
    },
    {
      "epoch": 0.00031798095703125,
      "model_forward_time": 0.1182699203491211,
      "step": 52098
    },
    {
      "epoch": 0.00031798095703125,
      "step": 52098,
      "training_step_time": 0.3882310390472412
    },
    {
      "epoch": 0.000317987060546875,
      "model_forward_time": 0.1183929443359375,
      "step": 52099
    },
    {
      "epoch": 0.000317987060546875,
      "step": 52099,
      "training_step_time": 0.38214993476867676
    },
    {
      "epoch": 0.0003179931640625,
      "grad_norm": 0.09225483238697052,
      "learning_rate": 4.66521903258404e-06,
      "loss": 0.0349,
      "step": 52100
    },
    {
      "epoch": 0.0003179931640625,
      "model_forward_time": 0.11838507652282715,
      "step": 52100
    },
    {
      "epoch": 0.0003179931640625,
      "step": 52100,
      "training_step_time": 0.3871021270751953
    },
    {
      "epoch": 0.000317999267578125,
      "model_forward_time": 0.11569619178771973,
      "step": 52101
    },
    {
      "epoch": 0.000317999267578125,
      "step": 52101,
      "training_step_time": 0.48815083503723145
    },
    {
      "epoch": 0.00031800537109375,
      "model_forward_time": 0.11532068252563477,
      "step": 52102
    },
    {
      "epoch": 0.00031800537109375,
      "step": 52102,
      "training_step_time": 0.4687480926513672
    },
    {
      "epoch": 0.000318011474609375,
      "model_forward_time": 0.11524343490600586,
      "step": 52103
    },
    {
      "epoch": 0.000318011474609375,
      "step": 52103,
      "training_step_time": 0.3809647560119629
    },
    {
      "epoch": 0.000318017578125,
      "model_forward_time": 0.11632847785949707,
      "step": 52104
    },
    {
      "epoch": 0.000318017578125,
      "step": 52104,
      "training_step_time": 0.3893866539001465
    },
    {
      "epoch": 0.000318023681640625,
      "model_forward_time": 0.11679983139038086,
      "step": 52105
    },
    {
      "epoch": 0.000318023681640625,
      "step": 52105,
      "training_step_time": 0.5025796890258789
    },
    {
      "epoch": 0.00031802978515625,
      "model_forward_time": 0.11530709266662598,
      "step": 52106
    },
    {
      "epoch": 0.00031802978515625,
      "step": 52106,
      "training_step_time": 0.42065978050231934
    },
    {
      "epoch": 0.000318035888671875,
      "model_forward_time": 0.11542773246765137,
      "step": 52107
    },
    {
      "epoch": 0.000318035888671875,
      "step": 52107,
      "training_step_time": 0.4827878475189209
    },
    {
      "epoch": 0.0003180419921875,
      "model_forward_time": 0.11528348922729492,
      "step": 52108
    },
    {
      "epoch": 0.0003180419921875,
      "step": 52108,
      "training_step_time": 0.3882572650909424
    },
    {
      "epoch": 0.000318048095703125,
      "model_forward_time": 0.11496520042419434,
      "step": 52109
    },
    {
      "epoch": 0.000318048095703125,
      "step": 52109,
      "training_step_time": 0.38222265243530273
    },
    {
      "epoch": 0.00031805419921875,
      "grad_norm": 0.07396679371595383,
      "learning_rate": 4.653602424199876e-06,
      "loss": 0.0374,
      "step": 52110
    },
    {
      "epoch": 0.00031805419921875,
      "model_forward_time": 0.1150655746459961,
      "step": 52110
    },
    {
      "epoch": 0.00031805419921875,
      "step": 52110,
      "training_step_time": 0.36840319633483887
    },
    {
      "epoch": 0.000318060302734375,
      "model_forward_time": 0.11519598960876465,
      "step": 52111
    },
    {
      "epoch": 0.000318060302734375,
      "step": 52111,
      "training_step_time": 0.4113147258758545
    },
    {
      "epoch": 0.00031806640625,
      "model_forward_time": 0.11507678031921387,
      "step": 52112
    },
    {
      "epoch": 0.00031806640625,
      "step": 52112,
      "training_step_time": 0.4961686134338379
    },
    {
      "epoch": 0.000318072509765625,
      "model_forward_time": 0.11501026153564453,
      "step": 52113
    },
    {
      "epoch": 0.000318072509765625,
      "step": 52113,
      "training_step_time": 0.39269518852233887
    },
    {
      "epoch": 0.00031807861328125,
      "model_forward_time": 0.11570215225219727,
      "step": 52114
    },
    {
      "epoch": 0.00031807861328125,
      "step": 52114,
      "training_step_time": 0.4025702476501465
    },
    {
      "epoch": 0.000318084716796875,
      "model_forward_time": 0.11572790145874023,
      "step": 52115
    },
    {
      "epoch": 0.000318084716796875,
      "step": 52115,
      "training_step_time": 0.4166259765625
    },
    {
      "epoch": 0.0003180908203125,
      "model_forward_time": 0.11511397361755371,
      "step": 52116
    },
    {
      "epoch": 0.0003180908203125,
      "step": 52116,
      "training_step_time": 0.4035146236419678
    },
    {
      "epoch": 0.000318096923828125,
      "model_forward_time": 0.11500716209411621,
      "step": 52117
    },
    {
      "epoch": 0.000318096923828125,
      "step": 52117,
      "training_step_time": 0.39011263847351074
    },
    {
      "epoch": 0.00031810302734375,
      "model_forward_time": 0.11594843864440918,
      "step": 52118
    },
    {
      "epoch": 0.00031810302734375,
      "step": 52118,
      "training_step_time": 0.40166354179382324
    },
    {
      "epoch": 0.000318109130859375,
      "model_forward_time": 0.11553812026977539,
      "step": 52119
    },
    {
      "epoch": 0.000318109130859375,
      "step": 52119,
      "training_step_time": 0.3895230293273926
    },
    {
      "epoch": 0.000318115234375,
      "grad_norm": 0.10728587210178375,
      "learning_rate": 4.64199959085398e-06,
      "loss": 0.0418,
      "step": 52120
    },
    {
      "epoch": 0.000318115234375,
      "model_forward_time": 0.11680269241333008,
      "step": 52120
    },
    {
      "epoch": 0.000318115234375,
      "step": 52120,
      "training_step_time": 0.41361498832702637
    },
    {
      "epoch": 0.000318121337890625,
      "model_forward_time": 0.11549758911132812,
      "step": 52121
    },
    {
      "epoch": 0.000318121337890625,
      "step": 52121,
      "training_step_time": 0.45533108711242676
    },
    {
      "epoch": 0.00031812744140625,
      "model_forward_time": 0.11549091339111328,
      "step": 52122
    },
    {
      "epoch": 0.00031812744140625,
      "step": 52122,
      "training_step_time": 0.42609095573425293
    },
    {
      "epoch": 0.000318133544921875,
      "model_forward_time": 0.11507964134216309,
      "step": 52123
    },
    {
      "epoch": 0.000318133544921875,
      "step": 52123,
      "training_step_time": 0.3945765495300293
    },
    {
      "epoch": 0.0003181396484375,
      "model_forward_time": 0.11669921875,
      "step": 52124
    },
    {
      "epoch": 0.0003181396484375,
      "step": 52124,
      "training_step_time": 0.3907496929168701
    },
    {
      "epoch": 0.000318145751953125,
      "model_forward_time": 0.11510872840881348,
      "step": 52125
    },
    {
      "epoch": 0.000318145751953125,
      "step": 52125,
      "training_step_time": 0.831904411315918
    },
    {
      "epoch": 0.00031815185546875,
      "model_forward_time": 0.11503767967224121,
      "step": 52126
    },
    {
      "epoch": 0.00031815185546875,
      "step": 52126,
      "training_step_time": 0.3976173400878906
    },
    {
      "epoch": 0.000318157958984375,
      "model_forward_time": 0.11452817916870117,
      "step": 52127
    },
    {
      "epoch": 0.000318157958984375,
      "step": 52127,
      "training_step_time": 0.44513940811157227
    },
    {
      "epoch": 0.0003181640625,
      "model_forward_time": 0.11384773254394531,
      "step": 52128
    },
    {
      "epoch": 0.0003181640625,
      "step": 52128,
      "training_step_time": 0.4513273239135742
    },
    {
      "epoch": 0.000318170166015625,
      "model_forward_time": 0.11482119560241699,
      "step": 52129
    },
    {
      "epoch": 0.000318170166015625,
      "step": 52129,
      "training_step_time": 0.38761210441589355
    },
    {
      "epoch": 0.00031817626953125,
      "grad_norm": 0.13310448825359344,
      "learning_rate": 4.630410536071006e-06,
      "loss": 0.0348,
      "step": 52130
    },
    {
      "epoch": 0.00031817626953125,
      "model_forward_time": 0.11432147026062012,
      "step": 52130
    },
    {
      "epoch": 0.00031817626953125,
      "step": 52130,
      "training_step_time": 0.3889472484588623
    },
    {
      "epoch": 0.000318182373046875,
      "model_forward_time": 0.11569428443908691,
      "step": 52131
    },
    {
      "epoch": 0.000318182373046875,
      "step": 52131,
      "training_step_time": 0.8278787136077881
    },
    {
      "epoch": 0.0003181884765625,
      "model_forward_time": 0.11459088325500488,
      "step": 52132
    },
    {
      "epoch": 0.0003181884765625,
      "step": 52132,
      "training_step_time": 0.39310312271118164
    },
    {
      "epoch": 0.000318194580078125,
      "model_forward_time": 0.11423730850219727,
      "step": 52133
    },
    {
      "epoch": 0.000318194580078125,
      "step": 52133,
      "training_step_time": 0.4401693344116211
    },
    {
      "epoch": 0.00031820068359375,
      "model_forward_time": 0.11453485488891602,
      "step": 52134
    },
    {
      "epoch": 0.00031820068359375,
      "step": 52134,
      "training_step_time": 0.41623783111572266
    },
    {
      "epoch": 0.000318206787109375,
      "model_forward_time": 0.11431002616882324,
      "step": 52135
    },
    {
      "epoch": 0.000318206787109375,
      "step": 52135,
      "training_step_time": 0.38574814796447754
    },
    {
      "epoch": 0.000318212890625,
      "model_forward_time": 0.11417269706726074,
      "step": 52136
    },
    {
      "epoch": 0.000318212890625,
      "step": 52136,
      "training_step_time": 0.3883640766143799
    },
    {
      "epoch": 0.000318218994140625,
      "model_forward_time": 0.1151726245880127,
      "step": 52137
    },
    {
      "epoch": 0.000318218994140625,
      "step": 52137,
      "training_step_time": 0.5830938816070557
    },
    {
      "epoch": 0.00031822509765625,
      "model_forward_time": 0.11520051956176758,
      "step": 52138
    },
    {
      "epoch": 0.00031822509765625,
      "step": 52138,
      "training_step_time": 0.3903787136077881
    },
    {
      "epoch": 0.000318231201171875,
      "model_forward_time": 0.1148536205291748,
      "step": 52139
    },
    {
      "epoch": 0.000318231201171875,
      "step": 52139,
      "training_step_time": 0.48761582374572754
    },
    {
      "epoch": 0.0003182373046875,
      "grad_norm": 0.09364891797304153,
      "learning_rate": 4.618835263371396e-06,
      "loss": 0.0358,
      "step": 52140
    },
    {
      "epoch": 0.0003182373046875,
      "model_forward_time": 0.1150519847869873,
      "step": 52140
    },
    {
      "epoch": 0.0003182373046875,
      "step": 52140,
      "training_step_time": 0.43512701988220215
    },
    {
      "epoch": 0.000318243408203125,
      "model_forward_time": 0.11689090728759766,
      "step": 52141
    },
    {
      "epoch": 0.000318243408203125,
      "step": 52141,
      "training_step_time": 0.3898015022277832
    },
    {
      "epoch": 0.00031824951171875,
      "model_forward_time": 0.1148519515991211,
      "step": 52142
    },
    {
      "epoch": 0.00031824951171875,
      "step": 52142,
      "training_step_time": 0.3917500972747803
    },
    {
      "epoch": 0.000318255615234375,
      "model_forward_time": 0.11560654640197754,
      "step": 52143
    },
    {
      "epoch": 0.000318255615234375,
      "step": 52143,
      "training_step_time": 0.6946327686309814
    },
    {
      "epoch": 0.00031826171875,
      "model_forward_time": 0.11446595191955566,
      "step": 52144
    },
    {
      "epoch": 0.00031826171875,
      "step": 52144,
      "training_step_time": 0.39806222915649414
    },
    {
      "epoch": 0.000318267822265625,
      "model_forward_time": 0.11425924301147461,
      "step": 52145
    },
    {
      "epoch": 0.000318267822265625,
      "step": 52145,
      "training_step_time": 0.39028263092041016
    },
    {
      "epoch": 0.00031827392578125,
      "model_forward_time": 0.11453557014465332,
      "step": 52146
    },
    {
      "epoch": 0.00031827392578125,
      "step": 52146,
      "training_step_time": 0.44353151321411133
    },
    {
      "epoch": 0.000318280029296875,
      "model_forward_time": 0.11492538452148438,
      "step": 52147
    },
    {
      "epoch": 0.000318280029296875,
      "step": 52147,
      "training_step_time": 0.4234774112701416
    },
    {
      "epoch": 0.0003182861328125,
      "model_forward_time": 0.11460375785827637,
      "step": 52148
    },
    {
      "epoch": 0.0003182861328125,
      "step": 52148,
      "training_step_time": 0.4196450710296631
    },
    {
      "epoch": 0.000318292236328125,
      "model_forward_time": 0.1153256893157959,
      "step": 52149
    },
    {
      "epoch": 0.000318292236328125,
      "step": 52149,
      "training_step_time": 0.3877897262573242
    },
    {
      "epoch": 0.00031829833984375,
      "grad_norm": 0.09491204470396042,
      "learning_rate": 4.6072737762714144e-06,
      "loss": 0.0339,
      "step": 52150
    },
    {
      "epoch": 0.00031829833984375,
      "model_forward_time": 0.11477828025817871,
      "step": 52150
    },
    {
      "epoch": 0.00031829833984375,
      "step": 52150,
      "training_step_time": 0.39989161491394043
    },
    {
      "epoch": 0.000318304443359375,
      "model_forward_time": 0.11521315574645996,
      "step": 52151
    },
    {
      "epoch": 0.000318304443359375,
      "step": 52151,
      "training_step_time": 0.39249157905578613
    },
    {
      "epoch": 0.000318310546875,
      "model_forward_time": 0.11500263214111328,
      "step": 52152
    },
    {
      "epoch": 0.000318310546875,
      "step": 52152,
      "training_step_time": 0.38599276542663574
    },
    {
      "epoch": 0.000318316650390625,
      "model_forward_time": 0.11482596397399902,
      "step": 52153
    },
    {
      "epoch": 0.000318316650390625,
      "step": 52153,
      "training_step_time": 0.41983747482299805
    },
    {
      "epoch": 0.00031832275390625,
      "model_forward_time": 0.11579203605651855,
      "step": 52154
    },
    {
      "epoch": 0.00031832275390625,
      "step": 52154,
      "training_step_time": 0.49100756645202637
    },
    {
      "epoch": 0.000318328857421875,
      "model_forward_time": 0.11530232429504395,
      "step": 52155
    },
    {
      "epoch": 0.000318328857421875,
      "step": 52155,
      "training_step_time": 0.395871639251709
    },
    {
      "epoch": 0.0003183349609375,
      "model_forward_time": 0.11505770683288574,
      "step": 52156
    },
    {
      "epoch": 0.0003183349609375,
      "step": 52156,
      "training_step_time": 0.3941473960876465
    },
    {
      "epoch": 0.000318341064453125,
      "model_forward_time": 0.11557888984680176,
      "step": 52157
    },
    {
      "epoch": 0.000318341064453125,
      "step": 52157,
      "training_step_time": 0.40508532524108887
    },
    {
      "epoch": 0.00031834716796875,
      "model_forward_time": 0.11558175086975098,
      "step": 52158
    },
    {
      "epoch": 0.00031834716796875,
      "step": 52158,
      "training_step_time": 0.39368629455566406
    },
    {
      "epoch": 0.000318353271484375,
      "model_forward_time": 0.11444687843322754,
      "step": 52159
    },
    {
      "epoch": 0.000318353271484375,
      "step": 52159,
      "training_step_time": 0.39395928382873535
    },
    {
      "epoch": 0.000318359375,
      "grad_norm": 0.08478603512048721,
      "learning_rate": 4.595726078283136e-06,
      "loss": 0.0354,
      "step": 52160
    },
    {
      "epoch": 0.000318359375,
      "model_forward_time": 0.11583828926086426,
      "step": 52160
    },
    {
      "epoch": 0.000318359375,
      "step": 52160,
      "training_step_time": 0.39958834648132324
    },
    {
      "epoch": 0.000318365478515625,
      "model_forward_time": 0.11554384231567383,
      "step": 52161
    },
    {
      "epoch": 0.000318365478515625,
      "step": 52161,
      "training_step_time": 0.49945664405822754
    },
    {
      "epoch": 0.00031837158203125,
      "model_forward_time": 0.11479520797729492,
      "step": 52162
    },
    {
      "epoch": 0.00031837158203125,
      "step": 52162,
      "training_step_time": 0.5448884963989258
    },
    {
      "epoch": 0.000318377685546875,
      "model_forward_time": 0.11486172676086426,
      "step": 52163
    },
    {
      "epoch": 0.000318377685546875,
      "step": 52163,
      "training_step_time": 0.3891129493713379
    },
    {
      "epoch": 0.0003183837890625,
      "model_forward_time": 0.11493992805480957,
      "step": 52164
    },
    {
      "epoch": 0.0003183837890625,
      "step": 52164,
      "training_step_time": 0.38939881324768066
    },
    {
      "epoch": 0.000318389892578125,
      "model_forward_time": 0.11513304710388184,
      "step": 52165
    },
    {
      "epoch": 0.000318389892578125,
      "step": 52165,
      "training_step_time": 0.3886077404022217
    },
    {
      "epoch": 0.00031839599609375,
      "model_forward_time": 0.11492013931274414,
      "step": 52166
    },
    {
      "epoch": 0.00031839599609375,
      "step": 52166,
      "training_step_time": 0.3867173194885254
    },
    {
      "epoch": 0.000318402099609375,
      "model_forward_time": 0.1148228645324707,
      "step": 52167
    },
    {
      "epoch": 0.000318402099609375,
      "step": 52167,
      "training_step_time": 0.39559412002563477
    },
    {
      "epoch": 0.000318408203125,
      "model_forward_time": 0.11512351036071777,
      "step": 52168
    },
    {
      "epoch": 0.000318408203125,
      "step": 52168,
      "training_step_time": 0.5004866123199463
    },
    {
      "epoch": 0.000318414306640625,
      "model_forward_time": 0.11436009407043457,
      "step": 52169
    },
    {
      "epoch": 0.000318414306640625,
      "step": 52169,
      "training_step_time": 0.47917985916137695
    },
    {
      "epoch": 0.00031842041015625,
      "grad_norm": 0.09394225478172302,
      "learning_rate": 4.5841921729144424e-06,
      "loss": 0.0314,
      "step": 52170
    },
    {
      "epoch": 0.00031842041015625,
      "model_forward_time": 0.11425638198852539,
      "step": 52170
    },
    {
      "epoch": 0.00031842041015625,
      "step": 52170,
      "training_step_time": 0.3838613033294678
    },
    {
      "epoch": 0.000318426513671875,
      "model_forward_time": 0.11487889289855957,
      "step": 52171
    },
    {
      "epoch": 0.000318426513671875,
      "step": 52171,
      "training_step_time": 0.38776135444641113
    },
    {
      "epoch": 0.0003184326171875,
      "model_forward_time": 0.11491632461547852,
      "step": 52172
    },
    {
      "epoch": 0.0003184326171875,
      "step": 52172,
      "training_step_time": 0.4067859649658203
    },
    {
      "epoch": 0.000318438720703125,
      "model_forward_time": 0.11461472511291504,
      "step": 52173
    },
    {
      "epoch": 0.000318438720703125,
      "step": 52173,
      "training_step_time": 0.3886425495147705
    },
    {
      "epoch": 0.00031844482421875,
      "model_forward_time": 0.11432504653930664,
      "step": 52174
    },
    {
      "epoch": 0.00031844482421875,
      "step": 52174,
      "training_step_time": 0.3950364589691162
    },
    {
      "epoch": 0.000318450927734375,
      "model_forward_time": 0.11444950103759766,
      "step": 52175
    },
    {
      "epoch": 0.000318450927734375,
      "step": 52175,
      "training_step_time": 0.5029377937316895
    },
    {
      "epoch": 0.00031845703125,
      "model_forward_time": 0.11600852012634277,
      "step": 52176
    },
    {
      "epoch": 0.00031845703125,
      "step": 52176,
      "training_step_time": 0.4072995185852051
    },
    {
      "epoch": 0.000318463134765625,
      "model_forward_time": 0.11471843719482422,
      "step": 52177
    },
    {
      "epoch": 0.000318463134765625,
      "step": 52177,
      "training_step_time": 0.4684469699859619
    },
    {
      "epoch": 0.00031846923828125,
      "model_forward_time": 0.11535811424255371,
      "step": 52178
    },
    {
      "epoch": 0.00031846923828125,
      "step": 52178,
      "training_step_time": 0.3997199535369873
    },
    {
      "epoch": 0.000318475341796875,
      "model_forward_time": 0.11504721641540527,
      "step": 52179
    },
    {
      "epoch": 0.000318475341796875,
      "step": 52179,
      "training_step_time": 0.392197847366333
    },
    {
      "epoch": 0.0003184814453125,
      "grad_norm": 0.11597006022930145,
      "learning_rate": 4.57267206366902e-06,
      "loss": 0.0359,
      "step": 52180
    },
    {
      "epoch": 0.0003184814453125,
      "model_forward_time": 0.11543989181518555,
      "step": 52180
    },
    {
      "epoch": 0.0003184814453125,
      "step": 52180,
      "training_step_time": 0.3909921646118164
    },
    {
      "epoch": 0.000318487548828125,
      "model_forward_time": 0.11550331115722656,
      "step": 52181
    },
    {
      "epoch": 0.000318487548828125,
      "step": 52181,
      "training_step_time": 0.39329981803894043
    },
    {
      "epoch": 0.00031849365234375,
      "model_forward_time": 0.11504912376403809,
      "step": 52182
    },
    {
      "epoch": 0.00031849365234375,
      "step": 52182,
      "training_step_time": 0.3964569568634033
    },
    {
      "epoch": 0.000318499755859375,
      "model_forward_time": 0.11542153358459473,
      "step": 52183
    },
    {
      "epoch": 0.000318499755859375,
      "step": 52183,
      "training_step_time": 0.43485093116760254
    },
    {
      "epoch": 0.000318505859375,
      "model_forward_time": 0.11562633514404297,
      "step": 52184
    },
    {
      "epoch": 0.000318505859375,
      "step": 52184,
      "training_step_time": 0.41272592544555664
    },
    {
      "epoch": 0.000318511962890625,
      "model_forward_time": 0.11472916603088379,
      "step": 52185
    },
    {
      "epoch": 0.000318511962890625,
      "step": 52185,
      "training_step_time": 0.3978245258331299
    },
    {
      "epoch": 0.00031851806640625,
      "model_forward_time": 0.11544966697692871,
      "step": 52186
    },
    {
      "epoch": 0.00031851806640625,
      "step": 52186,
      "training_step_time": 0.3992135524749756
    },
    {
      "epoch": 0.000318524169921875,
      "model_forward_time": 0.115325927734375,
      "step": 52187
    },
    {
      "epoch": 0.000318524169921875,
      "step": 52187,
      "training_step_time": 0.39786624908447266
    },
    {
      "epoch": 0.0003185302734375,
      "model_forward_time": 0.11533451080322266,
      "step": 52188
    },
    {
      "epoch": 0.0003185302734375,
      "step": 52188,
      "training_step_time": 0.3985862731933594
    },
    {
      "epoch": 0.000318536376953125,
      "model_forward_time": 0.11493515968322754,
      "step": 52189
    },
    {
      "epoch": 0.000318536376953125,
      "step": 52189,
      "training_step_time": 0.39815449714660645
    },
    {
      "epoch": 0.00031854248046875,
      "grad_norm": 0.1282961666584015,
      "learning_rate": 4.5611657540464036e-06,
      "loss": 0.034,
      "step": 52190
    },
    {
      "epoch": 0.00031854248046875,
      "model_forward_time": 0.11514997482299805,
      "step": 52190
    },
    {
      "epoch": 0.00031854248046875,
      "step": 52190,
      "training_step_time": 0.46817755699157715
    },
    {
      "epoch": 0.000318548583984375,
      "model_forward_time": 0.1146693229675293,
      "step": 52191
    },
    {
      "epoch": 0.000318548583984375,
      "step": 52191,
      "training_step_time": 0.48984503746032715
    },
    {
      "epoch": 0.0003185546875,
      "model_forward_time": 0.11641597747802734,
      "step": 52192
    },
    {
      "epoch": 0.0003185546875,
      "step": 52192,
      "training_step_time": 0.4204747676849365
    },
    {
      "epoch": 0.000318560791015625,
      "model_forward_time": 0.1146843433380127,
      "step": 52193
    },
    {
      "epoch": 0.000318560791015625,
      "step": 52193,
      "training_step_time": 0.381854772567749
    },
    {
      "epoch": 0.00031856689453125,
      "model_forward_time": 0.11523699760437012,
      "step": 52194
    },
    {
      "epoch": 0.00031856689453125,
      "step": 52194,
      "training_step_time": 0.412609338760376
    },
    {
      "epoch": 0.000318572998046875,
      "model_forward_time": 0.11497068405151367,
      "step": 52195
    },
    {
      "epoch": 0.000318572998046875,
      "step": 52195,
      "training_step_time": 0.40809202194213867
    },
    {
      "epoch": 0.0003185791015625,
      "model_forward_time": 0.11514091491699219,
      "step": 52196
    },
    {
      "epoch": 0.0003185791015625,
      "step": 52196,
      "training_step_time": 0.42716073989868164
    },
    {
      "epoch": 0.000318585205078125,
      "model_forward_time": 0.11656570434570312,
      "step": 52197
    },
    {
      "epoch": 0.000318585205078125,
      "step": 52197,
      "training_step_time": 0.6227457523345947
    },
    {
      "epoch": 0.00031859130859375,
      "model_forward_time": 0.11480712890625,
      "step": 52198
    },
    {
      "epoch": 0.00031859130859375,
      "step": 52198,
      "training_step_time": 0.3952982425689697
    },
    {
      "epoch": 0.000318597412109375,
      "model_forward_time": 0.11454153060913086,
      "step": 52199
    },
    {
      "epoch": 0.000318597412109375,
      "step": 52199,
      "training_step_time": 0.38736391067504883
    },
    {
      "epoch": 0.000318603515625,
      "grad_norm": 0.07726601511240005,
      "learning_rate": 4.549673247541875e-06,
      "loss": 0.0353,
      "step": 52200
    },
    {
      "epoch": 0.000318603515625,
      "model_forward_time": 0.11506056785583496,
      "step": 52200
    },
    {
      "epoch": 0.000318603515625,
      "step": 52200,
      "training_step_time": 0.404865026473999
    },
    {
      "epoch": 0.000318609619140625,
      "model_forward_time": 0.11480236053466797,
      "step": 52201
    },
    {
      "epoch": 0.000318609619140625,
      "step": 52201,
      "training_step_time": 0.3934783935546875
    },
    {
      "epoch": 0.00031861572265625,
      "model_forward_time": 0.11549043655395508,
      "step": 52202
    },
    {
      "epoch": 0.00031861572265625,
      "step": 52202,
      "training_step_time": 0.40321850776672363
    },
    {
      "epoch": 0.000318621826171875,
      "model_forward_time": 0.11481738090515137,
      "step": 52203
    },
    {
      "epoch": 0.000318621826171875,
      "step": 52203,
      "training_step_time": 0.6551787853240967
    },
    {
      "epoch": 0.0003186279296875,
      "model_forward_time": 0.11455202102661133,
      "step": 52204
    },
    {
      "epoch": 0.0003186279296875,
      "step": 52204,
      "training_step_time": 0.4522249698638916
    },
    {
      "epoch": 0.000318634033203125,
      "model_forward_time": 0.11461353302001953,
      "step": 52205
    },
    {
      "epoch": 0.000318634033203125,
      "step": 52205,
      "training_step_time": 0.42317819595336914
    },
    {
      "epoch": 0.00031864013671875,
      "model_forward_time": 0.11453580856323242,
      "step": 52206
    },
    {
      "epoch": 0.00031864013671875,
      "step": 52206,
      "training_step_time": 0.40892887115478516
    },
    {
      "epoch": 0.000318646240234375,
      "model_forward_time": 0.11471033096313477,
      "step": 52207
    },
    {
      "epoch": 0.000318646240234375,
      "step": 52207,
      "training_step_time": 0.39159584045410156
    },
    {
      "epoch": 0.00031865234375,
      "model_forward_time": 0.11476731300354004,
      "step": 52208
    },
    {
      "epoch": 0.00031865234375,
      "step": 52208,
      "training_step_time": 0.41638636589050293
    },
    {
      "epoch": 0.000318658447265625,
      "model_forward_time": 0.1147615909576416,
      "step": 52209
    },
    {
      "epoch": 0.000318658447265625,
      "step": 52209,
      "training_step_time": 0.4910092353820801
    },
    {
      "epoch": 0.00031866455078125,
      "grad_norm": 0.10047363489866257,
      "learning_rate": 4.538194547646574e-06,
      "loss": 0.0413,
      "step": 52210
    },
    {
      "epoch": 0.00031866455078125,
      "model_forward_time": 0.11502528190612793,
      "step": 52210
    },
    {
      "epoch": 0.00031866455078125,
      "step": 52210,
      "training_step_time": 0.42880988121032715
    },
    {
      "epoch": 0.000318670654296875,
      "model_forward_time": 0.11824274063110352,
      "step": 52211
    },
    {
      "epoch": 0.000318670654296875,
      "step": 52211,
      "training_step_time": 0.43168044090270996
    },
    {
      "epoch": 0.0003186767578125,
      "model_forward_time": 0.11792945861816406,
      "step": 52212
    },
    {
      "epoch": 0.0003186767578125,
      "step": 52212,
      "training_step_time": 0.4333462715148926
    },
    {
      "epoch": 0.000318682861328125,
      "model_forward_time": 0.11791682243347168,
      "step": 52213
    },
    {
      "epoch": 0.000318682861328125,
      "step": 52213,
      "training_step_time": 0.38061046600341797
    },
    {
      "epoch": 0.00031868896484375,
      "model_forward_time": 0.11910557746887207,
      "step": 52214
    },
    {
      "epoch": 0.00031868896484375,
      "step": 52214,
      "training_step_time": 0.37941551208496094
    },
    {
      "epoch": 0.000318695068359375,
      "model_forward_time": 0.11814665794372559,
      "step": 52215
    },
    {
      "epoch": 0.000318695068359375,
      "step": 52215,
      "training_step_time": 0.4729800224304199
    },
    {
      "epoch": 0.000318701171875,
      "model_forward_time": 0.1165311336517334,
      "step": 52216
    },
    {
      "epoch": 0.000318701171875,
      "step": 52216,
      "training_step_time": 0.3949298858642578
    },
    {
      "epoch": 0.000318707275390625,
      "model_forward_time": 0.11503815650939941,
      "step": 52217
    },
    {
      "epoch": 0.000318707275390625,
      "step": 52217,
      "training_step_time": 0.4892721176147461
    },
    {
      "epoch": 0.00031871337890625,
      "model_forward_time": 0.11524772644042969,
      "step": 52218
    },
    {
      "epoch": 0.00031871337890625,
      "step": 52218,
      "training_step_time": 0.42397403717041016
    },
    {
      "epoch": 0.000318719482421875,
      "model_forward_time": 0.11509013175964355,
      "step": 52219
    },
    {
      "epoch": 0.000318719482421875,
      "step": 52219,
      "training_step_time": 0.4897000789642334
    },
    {
      "epoch": 0.0003187255859375,
      "grad_norm": 0.10411466658115387,
      "learning_rate": 4.526729657847423e-06,
      "loss": 0.0387,
      "step": 52220
    },
    {
      "epoch": 0.0003187255859375,
      "model_forward_time": 0.11471247673034668,
      "step": 52220
    },
    {
      "epoch": 0.0003187255859375,
      "step": 52220,
      "training_step_time": 0.418595552444458
    },
    {
      "epoch": 0.000318731689453125,
      "model_forward_time": 0.11554765701293945,
      "step": 52221
    },
    {
      "epoch": 0.000318731689453125,
      "step": 52221,
      "training_step_time": 0.4252963066101074
    },
    {
      "epoch": 0.00031873779296875,
      "model_forward_time": 0.11494159698486328,
      "step": 52222
    },
    {
      "epoch": 0.00031873779296875,
      "step": 52222,
      "training_step_time": 0.4530751705169678
    },
    {
      "epoch": 0.000318743896484375,
      "model_forward_time": 0.11591649055480957,
      "step": 52223
    },
    {
      "epoch": 0.000318743896484375,
      "step": 52223,
      "training_step_time": 0.3855562210083008
    },
    {
      "epoch": 0.00031875,
      "model_forward_time": 0.11501669883728027,
      "step": 52224
    },
    {
      "epoch": 0.00031875,
      "step": 52224,
      "training_step_time": 0.3624458312988281
    },
    {
      "epoch": 0.000318756103515625,
      "model_forward_time": 0.1148529052734375,
      "step": 52225
    },
    {
      "epoch": 0.000318756103515625,
      "step": 52225,
      "training_step_time": 0.44910550117492676
    },
    {
      "epoch": 0.00031876220703125,
      "model_forward_time": 0.11492204666137695,
      "step": 52226
    },
    {
      "epoch": 0.00031876220703125,
      "step": 52226,
      "training_step_time": 0.41537904739379883
    },
    {
      "epoch": 0.000318768310546875,
      "model_forward_time": 0.11548209190368652,
      "step": 52227
    },
    {
      "epoch": 0.000318768310546875,
      "step": 52227,
      "training_step_time": 0.3922755718231201
    },
    {
      "epoch": 0.0003187744140625,
      "model_forward_time": 0.11424088478088379,
      "step": 52228
    },
    {
      "epoch": 0.0003187744140625,
      "step": 52228,
      "training_step_time": 0.39736342430114746
    },
    {
      "epoch": 0.000318780517578125,
      "model_forward_time": 0.11574459075927734,
      "step": 52229
    },
    {
      "epoch": 0.000318780517578125,
      "step": 52229,
      "training_step_time": 0.39409828186035156
    },
    {
      "epoch": 0.00031878662109375,
      "grad_norm": 0.1192931979894638,
      "learning_rate": 4.515278581627141e-06,
      "loss": 0.0376,
      "step": 52230
    },
    {
      "epoch": 0.00031878662109375,
      "model_forward_time": 0.1145627498626709,
      "step": 52230
    },
    {
      "epoch": 0.00031878662109375,
      "step": 52230,
      "training_step_time": 0.401517391204834
    },
    {
      "epoch": 0.000318792724609375,
      "model_forward_time": 0.11473608016967773,
      "step": 52231
    },
    {
      "epoch": 0.000318792724609375,
      "step": 52231,
      "training_step_time": 0.396503210067749
    },
    {
      "epoch": 0.000318798828125,
      "model_forward_time": 0.11528229713439941,
      "step": 52232
    },
    {
      "epoch": 0.000318798828125,
      "step": 52232,
      "training_step_time": 0.41333937644958496
    },
    {
      "epoch": 0.000318804931640625,
      "model_forward_time": 0.11530089378356934,
      "step": 52233
    },
    {
      "epoch": 0.000318804931640625,
      "step": 52233,
      "training_step_time": 0.6447713375091553
    },
    {
      "epoch": 0.00031881103515625,
      "model_forward_time": 0.114776611328125,
      "step": 52234
    },
    {
      "epoch": 0.00031881103515625,
      "step": 52234,
      "training_step_time": 0.4221043586730957
    },
    {
      "epoch": 0.000318817138671875,
      "model_forward_time": 0.11528420448303223,
      "step": 52235
    },
    {
      "epoch": 0.000318817138671875,
      "step": 52235,
      "training_step_time": 0.43514013290405273
    },
    {
      "epoch": 0.0003188232421875,
      "model_forward_time": 0.11570096015930176,
      "step": 52236
    },
    {
      "epoch": 0.0003188232421875,
      "step": 52236,
      "training_step_time": 0.3917834758758545
    },
    {
      "epoch": 0.000318829345703125,
      "model_forward_time": 0.11470389366149902,
      "step": 52237
    },
    {
      "epoch": 0.000318829345703125,
      "step": 52237,
      "training_step_time": 0.3888106346130371
    },
    {
      "epoch": 0.00031883544921875,
      "model_forward_time": 0.11521410942077637,
      "step": 52238
    },
    {
      "epoch": 0.00031883544921875,
      "step": 52238,
      "training_step_time": 0.3769562244415283
    },
    {
      "epoch": 0.000318841552734375,
      "model_forward_time": 0.11575055122375488,
      "step": 52239
    },
    {
      "epoch": 0.000318841552734375,
      "step": 52239,
      "training_step_time": 0.709244966506958
    },
    {
      "epoch": 0.00031884765625,
      "grad_norm": 0.07683171331882477,
      "learning_rate": 4.5038413224642805e-06,
      "loss": 0.0406,
      "step": 52240
    },
    {
      "epoch": 0.00031884765625,
      "model_forward_time": 0.1142427921295166,
      "step": 52240
    },
    {
      "epoch": 0.00031884765625,
      "step": 52240,
      "training_step_time": 0.3918921947479248
    },
    {
      "epoch": 0.000318853759765625,
      "model_forward_time": 0.11479568481445312,
      "step": 52241
    },
    {
      "epoch": 0.000318853759765625,
      "step": 52241,
      "training_step_time": 0.3869633674621582
    },
    {
      "epoch": 0.00031885986328125,
      "model_forward_time": 0.1143639087677002,
      "step": 52242
    },
    {
      "epoch": 0.00031885986328125,
      "step": 52242,
      "training_step_time": 0.38980889320373535
    },
    {
      "epoch": 0.000318865966796875,
      "model_forward_time": 0.11522769927978516,
      "step": 52243
    },
    {
      "epoch": 0.000318865966796875,
      "step": 52243,
      "training_step_time": 0.3965332508087158
    },
    {
      "epoch": 0.0003188720703125,
      "model_forward_time": 0.11551713943481445,
      "step": 52244
    },
    {
      "epoch": 0.0003188720703125,
      "step": 52244,
      "training_step_time": 0.39049434661865234
    },
    {
      "epoch": 0.000318878173828125,
      "model_forward_time": 0.11490106582641602,
      "step": 52245
    },
    {
      "epoch": 0.000318878173828125,
      "step": 52245,
      "training_step_time": 0.7156202793121338
    },
    {
      "epoch": 0.00031888427734375,
      "model_forward_time": 0.1148061752319336,
      "step": 52246
    },
    {
      "epoch": 0.00031888427734375,
      "step": 52246,
      "training_step_time": 0.4874918460845947
    },
    {
      "epoch": 0.000318890380859375,
      "model_forward_time": 0.11435365676879883,
      "step": 52247
    },
    {
      "epoch": 0.000318890380859375,
      "step": 52247,
      "training_step_time": 0.40211915969848633
    },
    {
      "epoch": 0.000318896484375,
      "model_forward_time": 0.11452889442443848,
      "step": 52248
    },
    {
      "epoch": 0.000318896484375,
      "step": 52248,
      "training_step_time": 0.40765810012817383
    },
    {
      "epoch": 0.000318902587890625,
      "model_forward_time": 0.1137852668762207,
      "step": 52249
    },
    {
      "epoch": 0.000318902587890625,
      "step": 52249,
      "training_step_time": 0.44874095916748047
    },
    {
      "epoch": 0.00031890869140625,
      "grad_norm": 0.1048753634095192,
      "learning_rate": 4.492417883833155e-06,
      "loss": 0.0353,
      "step": 52250
    },
    {
      "epoch": 0.00031890869140625,
      "model_forward_time": 0.11427044868469238,
      "step": 52250
    },
    {
      "epoch": 0.00031890869140625,
      "step": 52250,
      "training_step_time": 0.3950960636138916
    },
    {
      "epoch": 0.000318914794921875,
      "model_forward_time": 0.11550474166870117,
      "step": 52251
    },
    {
      "epoch": 0.000318914794921875,
      "step": 52251,
      "training_step_time": 0.5182788372039795
    },
    {
      "epoch": 0.0003189208984375,
      "model_forward_time": 0.1143038272857666,
      "step": 52252
    },
    {
      "epoch": 0.0003189208984375,
      "step": 52252,
      "training_step_time": 0.3638036251068115
    },
    {
      "epoch": 0.000318927001953125,
      "model_forward_time": 0.11504888534545898,
      "step": 52253
    },
    {
      "epoch": 0.000318927001953125,
      "step": 52253,
      "training_step_time": 0.44017887115478516
    },
    {
      "epoch": 0.00031893310546875,
      "model_forward_time": 0.11451482772827148,
      "step": 52254
    },
    {
      "epoch": 0.00031893310546875,
      "step": 52254,
      "training_step_time": 0.39283061027526855
    },
    {
      "epoch": 0.000318939208984375,
      "model_forward_time": 0.1136932373046875,
      "step": 52255
    },
    {
      "epoch": 0.000318939208984375,
      "step": 52255,
      "training_step_time": 0.403928279876709
    },
    {
      "epoch": 0.0003189453125,
      "model_forward_time": 0.11505246162414551,
      "step": 52256
    },
    {
      "epoch": 0.0003189453125,
      "step": 52256,
      "training_step_time": 0.38665199279785156
    },
    {
      "epoch": 0.000318951416015625,
      "model_forward_time": 0.1145622730255127,
      "step": 52257
    },
    {
      "epoch": 0.000318951416015625,
      "step": 52257,
      "training_step_time": 0.5055153369903564
    },
    {
      "epoch": 0.00031895751953125,
      "model_forward_time": 0.1154320240020752,
      "step": 52258
    },
    {
      "epoch": 0.00031895751953125,
      "step": 52258,
      "training_step_time": 0.38397860527038574
    },
    {
      "epoch": 0.000318963623046875,
      "model_forward_time": 0.11502361297607422,
      "step": 52259
    },
    {
      "epoch": 0.000318963623046875,
      "step": 52259,
      "training_step_time": 0.4366490840911865
    },
    {
      "epoch": 0.0003189697265625,
      "grad_norm": 0.08838752657175064,
      "learning_rate": 4.48100826920394e-06,
      "loss": 0.0345,
      "step": 52260
    },
    {
      "epoch": 0.0003189697265625,
      "model_forward_time": 0.11577844619750977,
      "step": 52260
    },
    {
      "epoch": 0.0003189697265625,
      "step": 52260,
      "training_step_time": 0.417783260345459
    },
    {
      "epoch": 0.000318975830078125,
      "model_forward_time": 0.1151883602142334,
      "step": 52261
    },
    {
      "epoch": 0.000318975830078125,
      "step": 52261,
      "training_step_time": 0.4717750549316406
    },
    {
      "epoch": 0.00031898193359375,
      "model_forward_time": 0.11454367637634277,
      "step": 52262
    },
    {
      "epoch": 0.00031898193359375,
      "step": 52262,
      "training_step_time": 0.39654064178466797
    },
    {
      "epoch": 0.000318988037109375,
      "model_forward_time": 0.11519360542297363,
      "step": 52263
    },
    {
      "epoch": 0.000318988037109375,
      "step": 52263,
      "training_step_time": 0.4958953857421875
    },
    {
      "epoch": 0.000318994140625,
      "model_forward_time": 0.11614632606506348,
      "step": 52264
    },
    {
      "epoch": 0.000318994140625,
      "step": 52264,
      "training_step_time": 0.3818955421447754
    },
    {
      "epoch": 0.000319000244140625,
      "model_forward_time": 0.11478972434997559,
      "step": 52265
    },
    {
      "epoch": 0.000319000244140625,
      "step": 52265,
      "training_step_time": 0.38828611373901367
    },
    {
      "epoch": 0.00031900634765625,
      "model_forward_time": 0.11481094360351562,
      "step": 52266
    },
    {
      "epoch": 0.00031900634765625,
      "step": 52266,
      "training_step_time": 0.3989872932434082
    },
    {
      "epoch": 0.000319012451171875,
      "model_forward_time": 0.11540627479553223,
      "step": 52267
    },
    {
      "epoch": 0.000319012451171875,
      "step": 52267,
      "training_step_time": 0.5160682201385498
    },
    {
      "epoch": 0.0003190185546875,
      "model_forward_time": 0.11492395401000977,
      "step": 52268
    },
    {
      "epoch": 0.0003190185546875,
      "step": 52268,
      "training_step_time": 0.50537109375
    },
    {
      "epoch": 0.000319024658203125,
      "model_forward_time": 0.1158595085144043,
      "step": 52269
    },
    {
      "epoch": 0.000319024658203125,
      "step": 52269,
      "training_step_time": 0.40242481231689453
    },
    {
      "epoch": 0.00031903076171875,
      "grad_norm": 0.0992085337638855,
      "learning_rate": 4.469612482042534e-06,
      "loss": 0.0337,
      "step": 52270
    },
    {
      "epoch": 0.00031903076171875,
      "model_forward_time": 0.11457610130310059,
      "step": 52270
    },
    {
      "epoch": 0.00031903076171875,
      "step": 52270,
      "training_step_time": 0.3870868682861328
    },
    {
      "epoch": 0.000319036865234375,
      "model_forward_time": 0.11498904228210449,
      "step": 52271
    },
    {
      "epoch": 0.000319036865234375,
      "step": 52271,
      "training_step_time": 0.4031496047973633
    },
    {
      "epoch": 0.00031904296875,
      "model_forward_time": 0.11461019515991211,
      "step": 52272
    },
    {
      "epoch": 0.00031904296875,
      "step": 52272,
      "training_step_time": 0.39593052864074707
    },
    {
      "epoch": 0.000319049072265625,
      "model_forward_time": 0.11556005477905273,
      "step": 52273
    },
    {
      "epoch": 0.000319049072265625,
      "step": 52273,
      "training_step_time": 0.5118913650512695
    },
    {
      "epoch": 0.00031905517578125,
      "model_forward_time": 0.11561274528503418,
      "step": 52274
    },
    {
      "epoch": 0.00031905517578125,
      "step": 52274,
      "training_step_time": 0.4125669002532959
    },
    {
      "epoch": 0.000319061279296875,
      "model_forward_time": 0.11429452896118164,
      "step": 52275
    },
    {
      "epoch": 0.000319061279296875,
      "step": 52275,
      "training_step_time": 0.6564862728118896
    },
    {
      "epoch": 0.0003190673828125,
      "model_forward_time": 0.11477780342102051,
      "step": 52276
    },
    {
      "epoch": 0.0003190673828125,
      "step": 52276,
      "training_step_time": 0.4845733642578125
    },
    {
      "epoch": 0.000319073486328125,
      "model_forward_time": 0.11450791358947754,
      "step": 52277
    },
    {
      "epoch": 0.000319073486328125,
      "step": 52277,
      "training_step_time": 0.3912067413330078
    },
    {
      "epoch": 0.00031907958984375,
      "model_forward_time": 0.11407685279846191,
      "step": 52278
    },
    {
      "epoch": 0.00031907958984375,
      "step": 52278,
      "training_step_time": 0.39453911781311035
    },
    {
      "epoch": 0.000319085693359375,
      "model_forward_time": 0.11554932594299316,
      "step": 52279
    },
    {
      "epoch": 0.000319085693359375,
      "step": 52279,
      "training_step_time": 0.38657689094543457
    },
    {
      "epoch": 0.000319091796875,
      "grad_norm": 0.11736942827701569,
      "learning_rate": 4.458230525810708e-06,
      "loss": 0.035,
      "step": 52280
    },
    {
      "epoch": 0.000319091796875,
      "model_forward_time": 0.11423754692077637,
      "step": 52280
    },
    {
      "epoch": 0.000319091796875,
      "step": 52280,
      "training_step_time": 0.3889920711517334
    },
    {
      "epoch": 0.000319097900390625,
      "model_forward_time": 0.11574959754943848,
      "step": 52281
    },
    {
      "epoch": 0.000319097900390625,
      "step": 52281,
      "training_step_time": 0.494675874710083
    },
    {
      "epoch": 0.00031910400390625,
      "model_forward_time": 0.11484503746032715,
      "step": 52282
    },
    {
      "epoch": 0.00031910400390625,
      "step": 52282,
      "training_step_time": 0.49698638916015625
    },
    {
      "epoch": 0.000319110107421875,
      "model_forward_time": 0.11489605903625488,
      "step": 52283
    },
    {
      "epoch": 0.000319110107421875,
      "step": 52283,
      "training_step_time": 0.39807820320129395
    },
    {
      "epoch": 0.0003191162109375,
      "model_forward_time": 0.11519694328308105,
      "step": 52284
    },
    {
      "epoch": 0.0003191162109375,
      "step": 52284,
      "training_step_time": 0.3997056484222412
    },
    {
      "epoch": 0.000319122314453125,
      "model_forward_time": 0.11448502540588379,
      "step": 52285
    },
    {
      "epoch": 0.000319122314453125,
      "step": 52285,
      "training_step_time": 0.39244818687438965
    },
    {
      "epoch": 0.00031912841796875,
      "model_forward_time": 0.11479592323303223,
      "step": 52286
    },
    {
      "epoch": 0.00031912841796875,
      "step": 52286,
      "training_step_time": 0.3928053379058838
    },
    {
      "epoch": 0.000319134521484375,
      "model_forward_time": 0.11502552032470703,
      "step": 52287
    },
    {
      "epoch": 0.000319134521484375,
      "step": 52287,
      "training_step_time": 0.531409502029419
    },
    {
      "epoch": 0.000319140625,
      "model_forward_time": 0.11530566215515137,
      "step": 52288
    },
    {
      "epoch": 0.000319140625,
      "step": 52288,
      "training_step_time": 0.46024346351623535
    },
    {
      "epoch": 0.000319146728515625,
      "model_forward_time": 0.11508798599243164,
      "step": 52289
    },
    {
      "epoch": 0.000319146728515625,
      "step": 52289,
      "training_step_time": 0.388871431350708
    },
    {
      "epoch": 0.00031915283203125,
      "grad_norm": 0.08861568570137024,
      "learning_rate": 4.446862403965984e-06,
      "loss": 0.0302,
      "step": 52290
    },
    {
      "epoch": 0.00031915283203125,
      "model_forward_time": 0.11488628387451172,
      "step": 52290
    },
    {
      "epoch": 0.00031915283203125,
      "step": 52290,
      "training_step_time": 0.41954731941223145
    },
    {
      "epoch": 0.000319158935546875,
      "model_forward_time": 0.11467218399047852,
      "step": 52291
    },
    {
      "epoch": 0.000319158935546875,
      "step": 52291,
      "training_step_time": 0.4308793544769287
    },
    {
      "epoch": 0.0003191650390625,
      "model_forward_time": 0.11454105377197266,
      "step": 52292
    },
    {
      "epoch": 0.0003191650390625,
      "step": 52292,
      "training_step_time": 0.39251017570495605
    },
    {
      "epoch": 0.000319171142578125,
      "model_forward_time": 0.11539769172668457,
      "step": 52293
    },
    {
      "epoch": 0.000319171142578125,
      "step": 52293,
      "training_step_time": 0.5842680931091309
    },
    {
      "epoch": 0.00031917724609375,
      "model_forward_time": 0.1145181655883789,
      "step": 52294
    },
    {
      "epoch": 0.00031917724609375,
      "step": 52294,
      "training_step_time": 0.39823174476623535
    },
    {
      "epoch": 0.000319183349609375,
      "model_forward_time": 0.1153724193572998,
      "step": 52295
    },
    {
      "epoch": 0.000319183349609375,
      "step": 52295,
      "training_step_time": 0.4124715328216553
    },
    {
      "epoch": 0.000319189453125,
      "model_forward_time": 0.11528682708740234,
      "step": 52296
    },
    {
      "epoch": 0.000319189453125,
      "step": 52296,
      "training_step_time": 0.44245386123657227
    },
    {
      "epoch": 0.000319195556640625,
      "model_forward_time": 0.11814475059509277,
      "step": 52297
    },
    {
      "epoch": 0.000319195556640625,
      "step": 52297,
      "training_step_time": 0.47397351264953613
    },
    {
      "epoch": 0.00031920166015625,
      "model_forward_time": 0.11854958534240723,
      "step": 52298
    },
    {
      "epoch": 0.00031920166015625,
      "step": 52298,
      "training_step_time": 0.3990499973297119
    },
    {
      "epoch": 0.000319207763671875,
      "model_forward_time": 0.11737322807312012,
      "step": 52299
    },
    {
      "epoch": 0.000319207763671875,
      "step": 52299,
      "training_step_time": 0.5425369739532471
    },
    {
      "epoch": 0.0003192138671875,
      "grad_norm": 0.15930797159671783,
      "learning_rate": 4.435508119961701e-06,
      "loss": 0.0385,
      "step": 52300
    },
    {
      "epoch": 0.0003192138671875,
      "model_forward_time": 0.11457943916320801,
      "step": 52300
    },
    {
      "epoch": 0.0003192138671875,
      "step": 52300,
      "training_step_time": 0.394122838973999
    },
    {
      "epoch": 0.000319219970703125,
      "model_forward_time": 0.11441922187805176,
      "step": 52301
    },
    {
      "epoch": 0.000319219970703125,
      "step": 52301,
      "training_step_time": 0.42432641983032227
    },
    {
      "epoch": 0.00031922607421875,
      "model_forward_time": 0.11482667922973633,
      "step": 52302
    },
    {
      "epoch": 0.00031922607421875,
      "step": 52302,
      "training_step_time": 0.4898393154144287
    },
    {
      "epoch": 0.000319232177734375,
      "model_forward_time": 0.11482429504394531,
      "step": 52303
    },
    {
      "epoch": 0.000319232177734375,
      "step": 52303,
      "training_step_time": 0.4055824279785156
    },
    {
      "epoch": 0.00031923828125,
      "model_forward_time": 0.11433959007263184,
      "step": 52304
    },
    {
      "epoch": 0.00031923828125,
      "step": 52304,
      "training_step_time": 0.43834376335144043
    },
    {
      "epoch": 0.000319244384765625,
      "model_forward_time": 0.11507797241210938,
      "step": 52305
    },
    {
      "epoch": 0.000319244384765625,
      "step": 52305,
      "training_step_time": 0.5237462520599365
    },
    {
      "epoch": 0.00031925048828125,
      "model_forward_time": 0.11493945121765137,
      "step": 52306
    },
    {
      "epoch": 0.00031925048828125,
      "step": 52306,
      "training_step_time": 0.411557674407959
    },
    {
      "epoch": 0.000319256591796875,
      "model_forward_time": 0.11432671546936035,
      "step": 52307
    },
    {
      "epoch": 0.000319256591796875,
      "step": 52307,
      "training_step_time": 0.38172221183776855
    },
    {
      "epoch": 0.0003192626953125,
      "model_forward_time": 0.11464595794677734,
      "step": 52308
    },
    {
      "epoch": 0.0003192626953125,
      "step": 52308,
      "training_step_time": 0.3945653438568115
    },
    {
      "epoch": 0.000319268798828125,
      "model_forward_time": 0.11499285697937012,
      "step": 52309
    },
    {
      "epoch": 0.000319268798828125,
      "step": 52309,
      "training_step_time": 0.4616208076477051
    },
    {
      "epoch": 0.00031927490234375,
      "grad_norm": 0.06374839693307877,
      "learning_rate": 4.4241676772469875e-06,
      "loss": 0.0369,
      "step": 52310
    },
    {
      "epoch": 0.00031927490234375,
      "model_forward_time": 0.11477136611938477,
      "step": 52310
    },
    {
      "epoch": 0.00031927490234375,
      "step": 52310,
      "training_step_time": 0.44234132766723633
    },
    {
      "epoch": 0.000319281005859375,
      "model_forward_time": 0.11581277847290039,
      "step": 52311
    },
    {
      "epoch": 0.000319281005859375,
      "step": 52311,
      "training_step_time": 0.5828194618225098
    },
    {
      "epoch": 0.000319287109375,
      "model_forward_time": 0.11486554145812988,
      "step": 52312
    },
    {
      "epoch": 0.000319287109375,
      "step": 52312,
      "training_step_time": 0.40863990783691406
    },
    {
      "epoch": 0.000319293212890625,
      "model_forward_time": 0.11434698104858398,
      "step": 52313
    },
    {
      "epoch": 0.000319293212890625,
      "step": 52313,
      "training_step_time": 0.41959142684936523
    },
    {
      "epoch": 0.00031929931640625,
      "model_forward_time": 0.11625981330871582,
      "step": 52314
    },
    {
      "epoch": 0.00031929931640625,
      "step": 52314,
      "training_step_time": 0.4048600196838379
    },
    {
      "epoch": 0.000319305419921875,
      "model_forward_time": 0.11455702781677246,
      "step": 52315
    },
    {
      "epoch": 0.000319305419921875,
      "step": 52315,
      "training_step_time": 0.3984823226928711
    },
    {
      "epoch": 0.0003193115234375,
      "model_forward_time": 0.11534905433654785,
      "step": 52316
    },
    {
      "epoch": 0.0003193115234375,
      "step": 52316,
      "training_step_time": 0.489849328994751
    },
    {
      "epoch": 0.000319317626953125,
      "model_forward_time": 0.11517000198364258,
      "step": 52317
    },
    {
      "epoch": 0.000319317626953125,
      "step": 52317,
      "training_step_time": 0.43941330909729004
    },
    {
      "epoch": 0.00031932373046875,
      "model_forward_time": 0.11479067802429199,
      "step": 52318
    },
    {
      "epoch": 0.00031932373046875,
      "step": 52318,
      "training_step_time": 0.5833315849304199
    },
    {
      "epoch": 0.000319329833984375,
      "model_forward_time": 0.11472797393798828,
      "step": 52319
    },
    {
      "epoch": 0.000319329833984375,
      "step": 52319,
      "training_step_time": 0.3924107551574707
    },
    {
      "epoch": 0.0003193359375,
      "grad_norm": 0.08918667584657669,
      "learning_rate": 4.412841079266777e-06,
      "loss": 0.0325,
      "step": 52320
    },
    {
      "epoch": 0.0003193359375,
      "model_forward_time": 0.11489415168762207,
      "step": 52320
    },
    {
      "epoch": 0.0003193359375,
      "step": 52320,
      "training_step_time": 0.389998197555542
    },
    {
      "epoch": 0.000319342041015625,
      "model_forward_time": 0.11498260498046875,
      "step": 52321
    },
    {
      "epoch": 0.000319342041015625,
      "step": 52321,
      "training_step_time": 0.3865344524383545
    },
    {
      "epoch": 0.00031934814453125,
      "model_forward_time": 0.11471748352050781,
      "step": 52322
    },
    {
      "epoch": 0.00031934814453125,
      "step": 52322,
      "training_step_time": 0.38945794105529785
    },
    {
      "epoch": 0.000319354248046875,
      "model_forward_time": 0.11520147323608398,
      "step": 52323
    },
    {
      "epoch": 0.000319354248046875,
      "step": 52323,
      "training_step_time": 0.42019200325012207
    },
    {
      "epoch": 0.0003193603515625,
      "model_forward_time": 0.11537742614746094,
      "step": 52324
    },
    {
      "epoch": 0.0003193603515625,
      "step": 52324,
      "training_step_time": 0.5084154605865479
    },
    {
      "epoch": 0.000319366455078125,
      "model_forward_time": 0.11614775657653809,
      "step": 52325
    },
    {
      "epoch": 0.000319366455078125,
      "step": 52325,
      "training_step_time": 0.4813230037689209
    },
    {
      "epoch": 0.00031937255859375,
      "model_forward_time": 0.11482620239257812,
      "step": 52326
    },
    {
      "epoch": 0.00031937255859375,
      "step": 52326,
      "training_step_time": 0.47243213653564453
    },
    {
      "epoch": 0.000319378662109375,
      "model_forward_time": 0.11585330963134766,
      "step": 52327
    },
    {
      "epoch": 0.000319378662109375,
      "step": 52327,
      "training_step_time": 0.3802981376647949
    },
    {
      "epoch": 0.000319384765625,
      "model_forward_time": 0.11478471755981445,
      "step": 52328
    },
    {
      "epoch": 0.000319384765625,
      "step": 52328,
      "training_step_time": 0.4224984645843506
    },
    {
      "epoch": 0.000319390869140625,
      "model_forward_time": 0.11511468887329102,
      "step": 52329
    },
    {
      "epoch": 0.000319390869140625,
      "step": 52329,
      "training_step_time": 0.38054537773132324
    },
    {
      "epoch": 0.00031939697265625,
      "grad_norm": 0.08181148767471313,
      "learning_rate": 4.401528329461779e-06,
      "loss": 0.0375,
      "step": 52330
    },
    {
      "epoch": 0.00031939697265625,
      "model_forward_time": 0.11523318290710449,
      "step": 52330
    },
    {
      "epoch": 0.00031939697265625,
      "step": 52330,
      "training_step_time": 0.3816845417022705
    },
    {
      "epoch": 0.000319403076171875,
      "model_forward_time": 0.11501479148864746,
      "step": 52331
    },
    {
      "epoch": 0.000319403076171875,
      "step": 52331,
      "training_step_time": 0.40462350845336914
    },
    {
      "epoch": 0.0003194091796875,
      "model_forward_time": 0.11526679992675781,
      "step": 52332
    },
    {
      "epoch": 0.0003194091796875,
      "step": 52332,
      "training_step_time": 0.41982054710388184
    },
    {
      "epoch": 0.000319415283203125,
      "model_forward_time": 0.11423420906066895,
      "step": 52333
    },
    {
      "epoch": 0.000319415283203125,
      "step": 52333,
      "training_step_time": 0.43498778343200684
    },
    {
      "epoch": 0.00031942138671875,
      "model_forward_time": 0.11588907241821289,
      "step": 52334
    },
    {
      "epoch": 0.00031942138671875,
      "step": 52334,
      "training_step_time": 0.3924596309661865
    },
    {
      "epoch": 0.000319427490234375,
      "model_forward_time": 0.1156306266784668,
      "step": 52335
    },
    {
      "epoch": 0.000319427490234375,
      "step": 52335,
      "training_step_time": 0.4649832248687744
    },
    {
      "epoch": 0.00031943359375,
      "model_forward_time": 0.11518049240112305,
      "step": 52336
    },
    {
      "epoch": 0.00031943359375,
      "step": 52336,
      "training_step_time": 0.3974790573120117
    },
    {
      "epoch": 0.000319439697265625,
      "model_forward_time": 0.11590290069580078,
      "step": 52337
    },
    {
      "epoch": 0.000319439697265625,
      "step": 52337,
      "training_step_time": 0.38626933097839355
    },
    {
      "epoch": 0.00031944580078125,
      "model_forward_time": 0.11556696891784668,
      "step": 52338
    },
    {
      "epoch": 0.00031944580078125,
      "step": 52338,
      "training_step_time": 0.4439260959625244
    },
    {
      "epoch": 0.000319451904296875,
      "model_forward_time": 0.11491703987121582,
      "step": 52339
    },
    {
      "epoch": 0.000319451904296875,
      "step": 52339,
      "training_step_time": 0.49732327461242676
    },
    {
      "epoch": 0.0003194580078125,
      "grad_norm": 0.07040715217590332,
      "learning_rate": 4.390229431268534e-06,
      "loss": 0.0348,
      "step": 52340
    },
    {
      "epoch": 0.0003194580078125,
      "model_forward_time": 0.11510968208312988,
      "step": 52340
    },
    {
      "epoch": 0.0003194580078125,
      "step": 52340,
      "training_step_time": 0.4306211471557617
    },
    {
      "epoch": 0.000319464111328125,
      "model_forward_time": 0.1146230697631836,
      "step": 52341
    },
    {
      "epoch": 0.000319464111328125,
      "step": 52341,
      "training_step_time": 0.524115800857544
    },
    {
      "epoch": 0.00031947021484375,
      "model_forward_time": 0.1147606372833252,
      "step": 52342
    },
    {
      "epoch": 0.00031947021484375,
      "step": 52342,
      "training_step_time": 0.48305344581604004
    },
    {
      "epoch": 0.000319476318359375,
      "model_forward_time": 0.11584591865539551,
      "step": 52343
    },
    {
      "epoch": 0.000319476318359375,
      "step": 52343,
      "training_step_time": 0.41188502311706543
    },
    {
      "epoch": 0.000319482421875,
      "model_forward_time": 0.11497759819030762,
      "step": 52344
    },
    {
      "epoch": 0.000319482421875,
      "step": 52344,
      "training_step_time": 0.47573328018188477
    },
    {
      "epoch": 0.000319488525390625,
      "model_forward_time": 0.11454415321350098,
      "step": 52345
    },
    {
      "epoch": 0.000319488525390625,
      "step": 52345,
      "training_step_time": 0.3946061134338379
    },
    {
      "epoch": 0.00031949462890625,
      "model_forward_time": 0.11466813087463379,
      "step": 52346
    },
    {
      "epoch": 0.00031949462890625,
      "step": 52346,
      "training_step_time": 0.4091508388519287
    },
    {
      "epoch": 0.000319500732421875,
      "model_forward_time": 0.11528730392456055,
      "step": 52347
    },
    {
      "epoch": 0.000319500732421875,
      "step": 52347,
      "training_step_time": 0.4626593589782715
    },
    {
      "epoch": 0.0003195068359375,
      "model_forward_time": 0.11520528793334961,
      "step": 52348
    },
    {
      "epoch": 0.0003195068359375,
      "step": 52348,
      "training_step_time": 0.40152692794799805
    },
    {
      "epoch": 0.000319512939453125,
      "model_forward_time": 0.11455750465393066,
      "step": 52349
    },
    {
      "epoch": 0.000319512939453125,
      "step": 52349,
      "training_step_time": 0.3789370059967041
    },
    {
      "epoch": 0.00031951904296875,
      "grad_norm": 0.10087550431489944,
      "learning_rate": 4.378944388119311e-06,
      "loss": 0.0345,
      "step": 52350
    },
    {
      "epoch": 0.00031951904296875,
      "model_forward_time": 0.11463141441345215,
      "step": 52350
    },
    {
      "epoch": 0.00031951904296875,
      "step": 52350,
      "training_step_time": 0.3966965675354004
    },
    {
      "epoch": 0.000319525146484375,
      "model_forward_time": 0.11474227905273438,
      "step": 52351
    },
    {
      "epoch": 0.000319525146484375,
      "step": 52351,
      "training_step_time": 0.39206695556640625
    },
    {
      "epoch": 0.00031953125,
      "model_forward_time": 0.11574459075927734,
      "step": 52352
    },
    {
      "epoch": 0.00031953125,
      "step": 52352,
      "training_step_time": 0.4349994659423828
    },
    {
      "epoch": 0.000319537353515625,
      "model_forward_time": 0.11505579948425293,
      "step": 52353
    },
    {
      "epoch": 0.000319537353515625,
      "step": 52353,
      "training_step_time": 0.6209864616394043
    },
    {
      "epoch": 0.00031954345703125,
      "model_forward_time": 0.11509251594543457,
      "step": 52354
    },
    {
      "epoch": 0.00031954345703125,
      "step": 52354,
      "training_step_time": 0.44183945655822754
    },
    {
      "epoch": 0.000319549560546875,
      "model_forward_time": 0.11519956588745117,
      "step": 52355
    },
    {
      "epoch": 0.000319549560546875,
      "step": 52355,
      "training_step_time": 0.391650915145874
    },
    {
      "epoch": 0.0003195556640625,
      "model_forward_time": 0.11432456970214844,
      "step": 52356
    },
    {
      "epoch": 0.0003195556640625,
      "step": 52356,
      "training_step_time": 0.42025303840637207
    },
    {
      "epoch": 0.000319561767578125,
      "model_forward_time": 0.11520648002624512,
      "step": 52357
    },
    {
      "epoch": 0.000319561767578125,
      "step": 52357,
      "training_step_time": 0.4155573844909668
    },
    {
      "epoch": 0.00031956787109375,
      "model_forward_time": 0.11509561538696289,
      "step": 52358
    },
    {
      "epoch": 0.00031956787109375,
      "step": 52358,
      "training_step_time": 0.4730110168457031
    },
    {
      "epoch": 0.000319573974609375,
      "model_forward_time": 0.11458635330200195,
      "step": 52359
    },
    {
      "epoch": 0.000319573974609375,
      "step": 52359,
      "training_step_time": 0.5397379398345947
    },
    {
      "epoch": 0.000319580078125,
      "grad_norm": 0.09385828673839569,
      "learning_rate": 4.367673203442241e-06,
      "loss": 0.0329,
      "step": 52360
    },
    {
      "epoch": 0.000319580078125,
      "model_forward_time": 0.11525321006774902,
      "step": 52360
    },
    {
      "epoch": 0.000319580078125,
      "step": 52360,
      "training_step_time": 0.44313907623291016
    },
    {
      "epoch": 0.000319586181640625,
      "model_forward_time": 0.11459732055664062,
      "step": 52361
    },
    {
      "epoch": 0.000319586181640625,
      "step": 52361,
      "training_step_time": 0.4322350025177002
    },
    {
      "epoch": 0.00031959228515625,
      "model_forward_time": 0.11478352546691895,
      "step": 52362
    },
    {
      "epoch": 0.00031959228515625,
      "step": 52362,
      "training_step_time": 0.3921513557434082
    },
    {
      "epoch": 0.000319598388671875,
      "model_forward_time": 0.11472845077514648,
      "step": 52363
    },
    {
      "epoch": 0.000319598388671875,
      "step": 52363,
      "training_step_time": 0.3957545757293701
    },
    {
      "epoch": 0.0003196044921875,
      "model_forward_time": 0.11493277549743652,
      "step": 52364
    },
    {
      "epoch": 0.0003196044921875,
      "step": 52364,
      "training_step_time": 0.3984713554382324
    },
    {
      "epoch": 0.000319610595703125,
      "model_forward_time": 0.11507701873779297,
      "step": 52365
    },
    {
      "epoch": 0.000319610595703125,
      "step": 52365,
      "training_step_time": 0.5838534832000732
    },
    {
      "epoch": 0.00031961669921875,
      "model_forward_time": 0.11489987373352051,
      "step": 52366
    },
    {
      "epoch": 0.00031961669921875,
      "step": 52366,
      "training_step_time": 0.41663575172424316
    },
    {
      "epoch": 0.000319622802734375,
      "model_forward_time": 0.11600875854492188,
      "step": 52367
    },
    {
      "epoch": 0.000319622802734375,
      "step": 52367,
      "training_step_time": 0.4282655715942383
    },
    {
      "epoch": 0.00031962890625,
      "model_forward_time": 0.11527824401855469,
      "step": 52368
    },
    {
      "epoch": 0.00031962890625,
      "step": 52368,
      "training_step_time": 0.42049670219421387
    },
    {
      "epoch": 0.000319635009765625,
      "model_forward_time": 0.11510205268859863,
      "step": 52369
    },
    {
      "epoch": 0.000319635009765625,
      "step": 52369,
      "training_step_time": 0.3948543071746826
    },
    {
      "epoch": 0.00031964111328125,
      "grad_norm": 0.08591011166572571,
      "learning_rate": 4.356415880661197e-06,
      "loss": 0.0336,
      "step": 52370
    },
    {
      "epoch": 0.00031964111328125,
      "model_forward_time": 0.11484360694885254,
      "step": 52370
    },
    {
      "epoch": 0.00031964111328125,
      "step": 52370,
      "training_step_time": 0.4697091579437256
    },
    {
      "epoch": 0.000319647216796875,
      "model_forward_time": 0.11492395401000977,
      "step": 52371
    },
    {
      "epoch": 0.000319647216796875,
      "step": 52371,
      "training_step_time": 0.49875354766845703
    },
    {
      "epoch": 0.0003196533203125,
      "model_forward_time": 0.11457562446594238,
      "step": 52372
    },
    {
      "epoch": 0.0003196533203125,
      "step": 52372,
      "training_step_time": 0.43621158599853516
    },
    {
      "epoch": 0.000319659423828125,
      "model_forward_time": 0.1152801513671875,
      "step": 52373
    },
    {
      "epoch": 0.000319659423828125,
      "step": 52373,
      "training_step_time": 0.4559504985809326
    },
    {
      "epoch": 0.00031966552734375,
      "model_forward_time": 0.11521577835083008,
      "step": 52374
    },
    {
      "epoch": 0.00031966552734375,
      "step": 52374,
      "training_step_time": 0.42142534255981445
    },
    {
      "epoch": 0.000319671630859375,
      "model_forward_time": 0.11545467376708984,
      "step": 52375
    },
    {
      "epoch": 0.000319671630859375,
      "step": 52375,
      "training_step_time": 0.418670654296875
    },
    {
      "epoch": 0.000319677734375,
      "model_forward_time": 0.11596417427062988,
      "step": 52376
    },
    {
      "epoch": 0.000319677734375,
      "step": 52376,
      "training_step_time": 0.3975670337677002
    },
    {
      "epoch": 0.000319683837890625,
      "model_forward_time": 0.11482524871826172,
      "step": 52377
    },
    {
      "epoch": 0.000319683837890625,
      "step": 52377,
      "training_step_time": 0.4982004165649414
    },
    {
      "epoch": 0.00031968994140625,
      "model_forward_time": 0.1156473159790039,
      "step": 52378
    },
    {
      "epoch": 0.00031968994140625,
      "step": 52378,
      "training_step_time": 0.40613603591918945
    },
    {
      "epoch": 0.000319696044921875,
      "model_forward_time": 0.11509013175964355,
      "step": 52379
    },
    {
      "epoch": 0.000319696044921875,
      "step": 52379,
      "training_step_time": 0.391599178314209
    },
    {
      "epoch": 0.0003197021484375,
      "grad_norm": 0.10903850197792053,
      "learning_rate": 4.3451724231958644e-06,
      "loss": 0.0366,
      "step": 52380
    },
    {
      "epoch": 0.0003197021484375,
      "model_forward_time": 0.11504030227661133,
      "step": 52380
    },
    {
      "epoch": 0.0003197021484375,
      "step": 52380,
      "training_step_time": 0.377671480178833
    },
    {
      "epoch": 0.000319708251953125,
      "model_forward_time": 0.11515069007873535,
      "step": 52381
    },
    {
      "epoch": 0.000319708251953125,
      "step": 52381,
      "training_step_time": 0.4449031352996826
    },
    {
      "epoch": 0.00031971435546875,
      "model_forward_time": 0.11454319953918457,
      "step": 52382
    },
    {
      "epoch": 0.00031971435546875,
      "step": 52382,
      "training_step_time": 0.4122583866119385
    },
    {
      "epoch": 0.000319720458984375,
      "model_forward_time": 0.1147451400756836,
      "step": 52383
    },
    {
      "epoch": 0.000319720458984375,
      "step": 52383,
      "training_step_time": 0.590069055557251
    },
    {
      "epoch": 0.0003197265625,
      "model_forward_time": 0.11510896682739258,
      "step": 52384
    },
    {
      "epoch": 0.0003197265625,
      "step": 52384,
      "training_step_time": 0.4354383945465088
    },
    {
      "epoch": 0.000319732666015625,
      "model_forward_time": 0.11477518081665039,
      "step": 52385
    },
    {
      "epoch": 0.000319732666015625,
      "step": 52385,
      "training_step_time": 0.3975241184234619
    },
    {
      "epoch": 0.00031973876953125,
      "model_forward_time": 0.11479353904724121,
      "step": 52386
    },
    {
      "epoch": 0.00031973876953125,
      "step": 52386,
      "training_step_time": 0.4747018814086914
    },
    {
      "epoch": 0.000319744873046875,
      "model_forward_time": 0.114471435546875,
      "step": 52387
    },
    {
      "epoch": 0.000319744873046875,
      "step": 52387,
      "training_step_time": 0.478283166885376
    },
    {
      "epoch": 0.0003197509765625,
      "model_forward_time": 0.11406898498535156,
      "step": 52388
    },
    {
      "epoch": 0.0003197509765625,
      "step": 52388,
      "training_step_time": 0.40495729446411133
    },
    {
      "epoch": 0.000319757080078125,
      "model_forward_time": 0.11478662490844727,
      "step": 52389
    },
    {
      "epoch": 0.000319757080078125,
      "step": 52389,
      "training_step_time": 0.432964563369751
    },
    {
      "epoch": 0.00031976318359375,
      "grad_norm": 0.09087193757295609,
      "learning_rate": 4.333942834461702e-06,
      "loss": 0.0368,
      "step": 52390
    },
    {
      "epoch": 0.00031976318359375,
      "model_forward_time": 0.11525487899780273,
      "step": 52390
    },
    {
      "epoch": 0.00031976318359375,
      "step": 52390,
      "training_step_time": 0.39738917350769043
    },
    {
      "epoch": 0.000319769287109375,
      "model_forward_time": 0.11477327346801758,
      "step": 52391
    },
    {
      "epoch": 0.000319769287109375,
      "step": 52391,
      "training_step_time": 0.3895289897918701
    },
    {
      "epoch": 0.000319775390625,
      "model_forward_time": 0.11476945877075195,
      "step": 52392
    },
    {
      "epoch": 0.000319775390625,
      "step": 52392,
      "training_step_time": 0.3990962505340576
    },
    {
      "epoch": 0.000319781494140625,
      "model_forward_time": 0.11472368240356445,
      "step": 52393
    },
    {
      "epoch": 0.000319781494140625,
      "step": 52393,
      "training_step_time": 0.45455241203308105
    },
    {
      "epoch": 0.00031978759765625,
      "model_forward_time": 0.1150350570678711,
      "step": 52394
    },
    {
      "epoch": 0.00031978759765625,
      "step": 52394,
      "training_step_time": 0.3794422149658203
    },
    {
      "epoch": 0.000319793701171875,
      "model_forward_time": 0.11529231071472168,
      "step": 52395
    },
    {
      "epoch": 0.000319793701171875,
      "step": 52395,
      "training_step_time": 0.44896507263183594
    },
    {
      "epoch": 0.0003197998046875,
      "model_forward_time": 0.11551332473754883,
      "step": 52396
    },
    {
      "epoch": 0.0003197998046875,
      "step": 52396,
      "training_step_time": 0.4130392074584961
    },
    {
      "epoch": 0.000319805908203125,
      "model_forward_time": 0.11532330513000488,
      "step": 52397
    },
    {
      "epoch": 0.000319805908203125,
      "step": 52397,
      "training_step_time": 0.3888366222381592
    },
    {
      "epoch": 0.00031981201171875,
      "model_forward_time": 0.11561322212219238,
      "step": 52398
    },
    {
      "epoch": 0.00031981201171875,
      "step": 52398,
      "training_step_time": 0.44893670082092285
    },
    {
      "epoch": 0.000319818115234375,
      "model_forward_time": 0.11450719833374023,
      "step": 52399
    },
    {
      "epoch": 0.000319818115234375,
      "step": 52399,
      "training_step_time": 0.43080711364746094
    },
    {
      "epoch": 0.00031982421875,
      "grad_norm": 0.0997338816523552,
      "learning_rate": 4.322727117869951e-06,
      "loss": 0.0361,
      "step": 52400
    },
    {
      "epoch": 0.00031982421875,
      "model_forward_time": 0.11471819877624512,
      "step": 52400
    },
    {
      "epoch": 0.00031982421875,
      "step": 52400,
      "training_step_time": 0.3925039768218994
    },
    {
      "epoch": 0.000319830322265625,
      "model_forward_time": 0.11541295051574707,
      "step": 52401
    },
    {
      "epoch": 0.000319830322265625,
      "step": 52401,
      "training_step_time": 0.5318121910095215
    },
    {
      "epoch": 0.00031983642578125,
      "model_forward_time": 0.11468172073364258,
      "step": 52402
    },
    {
      "epoch": 0.00031983642578125,
      "step": 52402,
      "training_step_time": 0.3963632583618164
    },
    {
      "epoch": 0.000319842529296875,
      "model_forward_time": 0.11484146118164062,
      "step": 52403
    },
    {
      "epoch": 0.000319842529296875,
      "step": 52403,
      "training_step_time": 0.4179959297180176
    },
    {
      "epoch": 0.0003198486328125,
      "model_forward_time": 0.11492276191711426,
      "step": 52404
    },
    {
      "epoch": 0.0003198486328125,
      "step": 52404,
      "training_step_time": 0.40523767471313477
    },
    {
      "epoch": 0.000319854736328125,
      "model_forward_time": 0.11484599113464355,
      "step": 52405
    },
    {
      "epoch": 0.000319854736328125,
      "step": 52405,
      "training_step_time": 0.3928065299987793
    },
    {
      "epoch": 0.00031986083984375,
      "model_forward_time": 0.1149895191192627,
      "step": 52406
    },
    {
      "epoch": 0.00031986083984375,
      "step": 52406,
      "training_step_time": 0.39319467544555664
    },
    {
      "epoch": 0.000319866943359375,
      "model_forward_time": 0.11537313461303711,
      "step": 52407
    },
    {
      "epoch": 0.000319866943359375,
      "step": 52407,
      "training_step_time": 0.7183582782745361
    },
    {
      "epoch": 0.000319873046875,
      "model_forward_time": 0.11548948287963867,
      "step": 52408
    },
    {
      "epoch": 0.000319873046875,
      "step": 52408,
      "training_step_time": 0.41002607345581055
    },
    {
      "epoch": 0.000319879150390625,
      "model_forward_time": 0.11552810668945312,
      "step": 52409
    },
    {
      "epoch": 0.000319879150390625,
      "step": 52409,
      "training_step_time": 0.42334890365600586
    },
    {
      "epoch": 0.00031988525390625,
      "grad_norm": 0.07839152961969376,
      "learning_rate": 4.311525276827682e-06,
      "loss": 0.0345,
      "step": 52410
    },
    {
      "epoch": 0.00031988525390625,
      "model_forward_time": 0.11501049995422363,
      "step": 52410
    },
    {
      "epoch": 0.00031988525390625,
      "step": 52410,
      "training_step_time": 0.4320945739746094
    },
    {
      "epoch": 0.000319891357421875,
      "model_forward_time": 0.11439990997314453,
      "step": 52411
    },
    {
      "epoch": 0.000319891357421875,
      "step": 52411,
      "training_step_time": 0.3854842185974121
    },
    {
      "epoch": 0.0003198974609375,
      "model_forward_time": 0.11447930335998535,
      "step": 52412
    },
    {
      "epoch": 0.0003198974609375,
      "step": 52412,
      "training_step_time": 0.4234297275543213
    },
    {
      "epoch": 0.000319903564453125,
      "model_forward_time": 0.1151125431060791,
      "step": 52413
    },
    {
      "epoch": 0.000319903564453125,
      "step": 52413,
      "training_step_time": 0.5660960674285889
    },
    {
      "epoch": 0.00031990966796875,
      "model_forward_time": 0.11420774459838867,
      "step": 52414
    },
    {
      "epoch": 0.00031990966796875,
      "step": 52414,
      "training_step_time": 0.3817732334136963
    },
    {
      "epoch": 0.000319915771484375,
      "model_forward_time": 0.11510634422302246,
      "step": 52415
    },
    {
      "epoch": 0.000319915771484375,
      "step": 52415,
      "training_step_time": 0.38733363151550293
    },
    {
      "epoch": 0.000319921875,
      "model_forward_time": 0.11513185501098633,
      "step": 52416
    },
    {
      "epoch": 0.000319921875,
      "step": 52416,
      "training_step_time": 0.39894533157348633
    },
    {
      "epoch": 0.000319927978515625,
      "model_forward_time": 0.11597180366516113,
      "step": 52417
    },
    {
      "epoch": 0.000319927978515625,
      "step": 52417,
      "training_step_time": 0.4102447032928467
    },
    {
      "epoch": 0.00031993408203125,
      "model_forward_time": 0.11558008193969727,
      "step": 52418
    },
    {
      "epoch": 0.00031993408203125,
      "step": 52418,
      "training_step_time": 0.46756601333618164
    },
    {
      "epoch": 0.000319940185546875,
      "model_forward_time": 0.11518526077270508,
      "step": 52419
    },
    {
      "epoch": 0.000319940185546875,
      "step": 52419,
      "training_step_time": 0.6785688400268555
    },
    {
      "epoch": 0.0003199462890625,
      "grad_norm": 0.09742103517055511,
      "learning_rate": 4.300337314737685e-06,
      "loss": 0.034,
      "step": 52420
    },
    {
      "epoch": 0.0003199462890625,
      "model_forward_time": 0.1146697998046875,
      "step": 52420
    },
    {
      "epoch": 0.0003199462890625,
      "step": 52420,
      "training_step_time": 0.4004545211791992
    },
    {
      "epoch": 0.000319952392578125,
      "model_forward_time": 0.11399197578430176,
      "step": 52421
    },
    {
      "epoch": 0.000319952392578125,
      "step": 52421,
      "training_step_time": 0.3842341899871826
    },
    {
      "epoch": 0.00031995849609375,
      "model_forward_time": 0.11467623710632324,
      "step": 52422
    },
    {
      "epoch": 0.00031995849609375,
      "step": 52422,
      "training_step_time": 0.39502978324890137
    },
    {
      "epoch": 0.000319964599609375,
      "model_forward_time": 0.11468029022216797,
      "step": 52423
    },
    {
      "epoch": 0.000319964599609375,
      "step": 52423,
      "training_step_time": 0.4679679870605469
    },
    {
      "epoch": 0.000319970703125,
      "model_forward_time": 0.11473536491394043,
      "step": 52424
    },
    {
      "epoch": 0.000319970703125,
      "step": 52424,
      "training_step_time": 0.4171624183654785
    },
    {
      "epoch": 0.000319976806640625,
      "model_forward_time": 0.11529207229614258,
      "step": 52425
    },
    {
      "epoch": 0.000319976806640625,
      "step": 52425,
      "training_step_time": 0.5938272476196289
    },
    {
      "epoch": 0.00031998291015625,
      "model_forward_time": 0.1149148941040039,
      "step": 52426
    },
    {
      "epoch": 0.00031998291015625,
      "step": 52426,
      "training_step_time": 0.39190101623535156
    },
    {
      "epoch": 0.000319989013671875,
      "model_forward_time": 0.11414504051208496,
      "step": 52427
    },
    {
      "epoch": 0.000319989013671875,
      "step": 52427,
      "training_step_time": 0.40769410133361816
    },
    {
      "epoch": 0.0003199951171875,
      "model_forward_time": 0.11532354354858398,
      "step": 52428
    },
    {
      "epoch": 0.0003199951171875,
      "step": 52428,
      "training_step_time": 0.40181446075439453
    },
    {
      "epoch": 0.000320001220703125,
      "model_forward_time": 0.11529803276062012,
      "step": 52429
    },
    {
      "epoch": 0.000320001220703125,
      "step": 52429,
      "training_step_time": 0.39672398567199707
    },
    {
      "epoch": 0.00032000732421875,
      "grad_norm": 0.09409142285585403,
      "learning_rate": 4.289163234998589e-06,
      "loss": 0.0335,
      "step": 52430
    },
    {
      "epoch": 0.00032000732421875,
      "model_forward_time": 0.11513853073120117,
      "step": 52430
    },
    {
      "epoch": 0.00032000732421875,
      "step": 52430,
      "training_step_time": 0.4693777561187744
    },
    {
      "epoch": 0.000320013427734375,
      "model_forward_time": 0.11514449119567871,
      "step": 52431
    },
    {
      "epoch": 0.000320013427734375,
      "step": 52431,
      "training_step_time": 0.6108019351959229
    },
    {
      "epoch": 0.00032001953125,
      "model_forward_time": 0.11534500122070312,
      "step": 52432
    },
    {
      "epoch": 0.00032001953125,
      "step": 52432,
      "training_step_time": 0.4585914611816406
    },
    {
      "epoch": 0.000320025634765625,
      "model_forward_time": 0.1147303581237793,
      "step": 52433
    },
    {
      "epoch": 0.000320025634765625,
      "step": 52433,
      "training_step_time": 0.40357327461242676
    },
    {
      "epoch": 0.00032003173828125,
      "model_forward_time": 0.11431479454040527,
      "step": 52434
    },
    {
      "epoch": 0.00032003173828125,
      "step": 52434,
      "training_step_time": 0.38101649284362793
    },
    {
      "epoch": 0.000320037841796875,
      "model_forward_time": 0.114776611328125,
      "step": 52435
    },
    {
      "epoch": 0.000320037841796875,
      "step": 52435,
      "training_step_time": 0.3904073238372803
    },
    {
      "epoch": 0.0003200439453125,
      "model_forward_time": 0.11575651168823242,
      "step": 52436
    },
    {
      "epoch": 0.0003200439453125,
      "step": 52436,
      "training_step_time": 0.42843031883239746
    },
    {
      "epoch": 0.000320050048828125,
      "model_forward_time": 0.11606931686401367,
      "step": 52437
    },
    {
      "epoch": 0.000320050048828125,
      "step": 52437,
      "training_step_time": 0.5369679927825928
    },
    {
      "epoch": 0.00032005615234375,
      "model_forward_time": 0.11606311798095703,
      "step": 52438
    },
    {
      "epoch": 0.00032005615234375,
      "step": 52438,
      "training_step_time": 0.39992594718933105
    },
    {
      "epoch": 0.000320062255859375,
      "model_forward_time": 0.11546063423156738,
      "step": 52439
    },
    {
      "epoch": 0.000320062255859375,
      "step": 52439,
      "training_step_time": 0.45058417320251465
    },
    {
      "epoch": 0.000320068359375,
      "grad_norm": 0.08973030000925064,
      "learning_rate": 4.27800304100478e-06,
      "loss": 0.0394,
      "step": 52440
    },
    {
      "epoch": 0.000320068359375,
      "model_forward_time": 0.11454653739929199,
      "step": 52440
    },
    {
      "epoch": 0.000320068359375,
      "step": 52440,
      "training_step_time": 0.4355649948120117
    },
    {
      "epoch": 0.000320074462890625,
      "model_forward_time": 0.11536622047424316,
      "step": 52441
    },
    {
      "epoch": 0.000320074462890625,
      "step": 52441,
      "training_step_time": 0.41329407691955566
    },
    {
      "epoch": 0.00032008056640625,
      "model_forward_time": 0.11458945274353027,
      "step": 52442
    },
    {
      "epoch": 0.00032008056640625,
      "step": 52442,
      "training_step_time": 0.4100356101989746
    },
    {
      "epoch": 0.000320086669921875,
      "model_forward_time": 0.11546659469604492,
      "step": 52443
    },
    {
      "epoch": 0.000320086669921875,
      "step": 52443,
      "training_step_time": 0.5139391422271729
    },
    {
      "epoch": 0.0003200927734375,
      "model_forward_time": 0.1144704818725586,
      "step": 52444
    },
    {
      "epoch": 0.0003200927734375,
      "step": 52444,
      "training_step_time": 0.4028961658477783
    },
    {
      "epoch": 0.000320098876953125,
      "model_forward_time": 0.11480927467346191,
      "step": 52445
    },
    {
      "epoch": 0.000320098876953125,
      "step": 52445,
      "training_step_time": 0.4278726577758789
    },
    {
      "epoch": 0.00032010498046875,
      "model_forward_time": 0.11461114883422852,
      "step": 52446
    },
    {
      "epoch": 0.00032010498046875,
      "step": 52446,
      "training_step_time": 0.39603710174560547
    },
    {
      "epoch": 0.000320111083984375,
      "model_forward_time": 0.11533594131469727,
      "step": 52447
    },
    {
      "epoch": 0.000320111083984375,
      "step": 52447,
      "training_step_time": 0.39517712593078613
    },
    {
      "epoch": 0.0003201171875,
      "model_forward_time": 0.1158757209777832,
      "step": 52448
    },
    {
      "epoch": 0.0003201171875,
      "step": 52448,
      "training_step_time": 0.3986389636993408
    },
    {
      "epoch": 0.000320123291015625,
      "model_forward_time": 0.11550354957580566,
      "step": 52449
    },
    {
      "epoch": 0.000320123291015625,
      "step": 52449,
      "training_step_time": 0.8847482204437256
    },
    {
      "epoch": 0.00032012939453125,
      "grad_norm": 0.09904865175485611,
      "learning_rate": 4.266856736146424e-06,
      "loss": 0.0363,
      "step": 52450
    },
    {
      "epoch": 0.00032012939453125,
      "model_forward_time": 0.11450934410095215,
      "step": 52450
    },
    {
      "epoch": 0.00032012939453125,
      "step": 52450,
      "training_step_time": 0.40200138092041016
    },
    {
      "epoch": 0.000320135498046875,
      "model_forward_time": 0.1160891056060791,
      "step": 52451
    },
    {
      "epoch": 0.000320135498046875,
      "step": 52451,
      "training_step_time": 0.47034287452697754
    },
    {
      "epoch": 0.0003201416015625,
      "model_forward_time": 0.11435341835021973,
      "step": 52452
    },
    {
      "epoch": 0.0003201416015625,
      "step": 52452,
      "training_step_time": 0.4409213066101074
    },
    {
      "epoch": 0.000320147705078125,
      "model_forward_time": 0.11611413955688477,
      "step": 52453
    },
    {
      "epoch": 0.000320147705078125,
      "step": 52453,
      "training_step_time": 0.39533376693725586
    },
    {
      "epoch": 0.00032015380859375,
      "model_forward_time": 0.1149134635925293,
      "step": 52454
    },
    {
      "epoch": 0.00032015380859375,
      "step": 52454,
      "training_step_time": 0.46364283561706543
    },
    {
      "epoch": 0.000320159912109375,
      "model_forward_time": 0.11471986770629883,
      "step": 52455
    },
    {
      "epoch": 0.000320159912109375,
      "step": 52455,
      "training_step_time": 0.5726699829101562
    },
    {
      "epoch": 0.000320166015625,
      "model_forward_time": 0.11506819725036621,
      "step": 52456
    },
    {
      "epoch": 0.000320166015625,
      "step": 52456,
      "training_step_time": 0.3966357707977295
    },
    {
      "epoch": 0.000320172119140625,
      "model_forward_time": 0.11521625518798828,
      "step": 52457
    },
    {
      "epoch": 0.000320172119140625,
      "step": 52457,
      "training_step_time": 0.3880188465118408
    },
    {
      "epoch": 0.00032017822265625,
      "model_forward_time": 0.11516809463500977,
      "step": 52458
    },
    {
      "epoch": 0.00032017822265625,
      "step": 52458,
      "training_step_time": 0.4588279724121094
    },
    {
      "epoch": 0.000320184326171875,
      "model_forward_time": 0.11519050598144531,
      "step": 52459
    },
    {
      "epoch": 0.000320184326171875,
      "step": 52459,
      "training_step_time": 0.4907660484313965
    },
    {
      "epoch": 0.0003201904296875,
      "grad_norm": 0.08657754957675934,
      "learning_rate": 4.255724323809479e-06,
      "loss": 0.0331,
      "step": 52460
    },
    {
      "epoch": 0.0003201904296875,
      "model_forward_time": 0.11425256729125977,
      "step": 52460
    },
    {
      "epoch": 0.0003201904296875,
      "step": 52460,
      "training_step_time": 0.3815786838531494
    },
    {
      "epoch": 0.000320196533203125,
      "model_forward_time": 0.1151888370513916,
      "step": 52461
    },
    {
      "epoch": 0.000320196533203125,
      "step": 52461,
      "training_step_time": 0.6485946178436279
    },
    {
      "epoch": 0.00032020263671875,
      "model_forward_time": 0.11437821388244629,
      "step": 52462
    },
    {
      "epoch": 0.00032020263671875,
      "step": 52462,
      "training_step_time": 0.396378755569458
    },
    {
      "epoch": 0.000320208740234375,
      "model_forward_time": 0.11441159248352051,
      "step": 52463
    },
    {
      "epoch": 0.000320208740234375,
      "step": 52463,
      "training_step_time": 0.363741397857666
    },
    {
      "epoch": 0.00032021484375,
      "model_forward_time": 0.11462712287902832,
      "step": 52464
    },
    {
      "epoch": 0.00032021484375,
      "step": 52464,
      "training_step_time": 0.42333531379699707
    },
    {
      "epoch": 0.000320220947265625,
      "model_forward_time": 0.11472058296203613,
      "step": 52465
    },
    {
      "epoch": 0.000320220947265625,
      "step": 52465,
      "training_step_time": 0.46366357803344727
    },
    {
      "epoch": 0.00032022705078125,
      "model_forward_time": 0.1146688461303711,
      "step": 52466
    },
    {
      "epoch": 0.00032022705078125,
      "step": 52466,
      "training_step_time": 0.3936455249786377
    },
    {
      "epoch": 0.000320233154296875,
      "model_forward_time": 0.11541342735290527,
      "step": 52467
    },
    {
      "epoch": 0.000320233154296875,
      "step": 52467,
      "training_step_time": 0.4662504196166992
    },
    {
      "epoch": 0.0003202392578125,
      "model_forward_time": 0.11522507667541504,
      "step": 52468
    },
    {
      "epoch": 0.0003202392578125,
      "step": 52468,
      "training_step_time": 0.3870389461517334
    },
    {
      "epoch": 0.000320245361328125,
      "model_forward_time": 0.11561155319213867,
      "step": 52469
    },
    {
      "epoch": 0.000320245361328125,
      "step": 52469,
      "training_step_time": 0.39293432235717773
    },
    {
      "epoch": 0.00032025146484375,
      "grad_norm": 0.0964878648519516,
      "learning_rate": 4.244605807375679e-06,
      "loss": 0.0372,
      "step": 52470
    },
    {
      "epoch": 0.00032025146484375,
      "model_forward_time": 0.11525082588195801,
      "step": 52470
    },
    {
      "epoch": 0.00032025146484375,
      "step": 52470,
      "training_step_time": 0.39948105812072754
    },
    {
      "epoch": 0.000320257568359375,
      "model_forward_time": 0.11536121368408203,
      "step": 52471
    },
    {
      "epoch": 0.000320257568359375,
      "step": 52471,
      "training_step_time": 0.40696167945861816
    },
    {
      "epoch": 0.000320263671875,
      "model_forward_time": 0.11580657958984375,
      "step": 52472
    },
    {
      "epoch": 0.000320263671875,
      "step": 52472,
      "training_step_time": 0.44601917266845703
    },
    {
      "epoch": 0.000320269775390625,
      "model_forward_time": 0.11570930480957031,
      "step": 52473
    },
    {
      "epoch": 0.000320269775390625,
      "step": 52473,
      "training_step_time": 0.5302410125732422
    },
    {
      "epoch": 0.00032027587890625,
      "model_forward_time": 0.1155557632446289,
      "step": 52474
    },
    {
      "epoch": 0.00032027587890625,
      "step": 52474,
      "training_step_time": 0.3889272212982178
    },
    {
      "epoch": 0.000320281982421875,
      "model_forward_time": 0.11539244651794434,
      "step": 52475
    },
    {
      "epoch": 0.000320281982421875,
      "step": 52475,
      "training_step_time": 0.3892381191253662
    },
    {
      "epoch": 0.0003202880859375,
      "model_forward_time": 0.11594772338867188,
      "step": 52476
    },
    {
      "epoch": 0.0003202880859375,
      "step": 52476,
      "training_step_time": 0.40307116508483887
    },
    {
      "epoch": 0.000320294189453125,
      "model_forward_time": 0.1147608757019043,
      "step": 52477
    },
    {
      "epoch": 0.000320294189453125,
      "step": 52477,
      "training_step_time": 0.39000511169433594
    },
    {
      "epoch": 0.00032030029296875,
      "model_forward_time": 0.11514115333557129,
      "step": 52478
    },
    {
      "epoch": 0.00032030029296875,
      "step": 52478,
      "training_step_time": 0.39031434059143066
    },
    {
      "epoch": 0.000320306396484375,
      "model_forward_time": 0.1151587963104248,
      "step": 52479
    },
    {
      "epoch": 0.000320306396484375,
      "step": 52479,
      "training_step_time": 0.49670863151550293
    },
    {
      "epoch": 0.0003203125,
      "grad_norm": 0.11344140768051147,
      "learning_rate": 4.233501190222533e-06,
      "loss": 0.0357,
      "step": 52480
    },
    {
      "epoch": 0.0003203125,
      "model_forward_time": 0.11518478393554688,
      "step": 52480
    },
    {
      "epoch": 0.0003203125,
      "step": 52480,
      "training_step_time": 0.4650459289550781
    },
    {
      "epoch": 0.000320318603515625,
      "model_forward_time": 0.11520600318908691,
      "step": 52481
    },
    {
      "epoch": 0.000320318603515625,
      "step": 52481,
      "training_step_time": 0.403822660446167
    },
    {
      "epoch": 0.00032032470703125,
      "model_forward_time": 0.11504960060119629,
      "step": 52482
    },
    {
      "epoch": 0.00032032470703125,
      "step": 52482,
      "training_step_time": 0.4014313220977783
    },
    {
      "epoch": 0.000320330810546875,
      "model_forward_time": 0.11462616920471191,
      "step": 52483
    },
    {
      "epoch": 0.000320330810546875,
      "step": 52483,
      "training_step_time": 0.39098572731018066
    },
    {
      "epoch": 0.0003203369140625,
      "model_forward_time": 0.11575913429260254,
      "step": 52484
    },
    {
      "epoch": 0.0003203369140625,
      "step": 52484,
      "training_step_time": 0.39646410942077637
    },
    {
      "epoch": 0.000320343017578125,
      "model_forward_time": 0.11498141288757324,
      "step": 52485
    },
    {
      "epoch": 0.000320343017578125,
      "step": 52485,
      "training_step_time": 0.4955179691314697
    },
    {
      "epoch": 0.00032034912109375,
      "model_forward_time": 0.11507010459899902,
      "step": 52486
    },
    {
      "epoch": 0.00032034912109375,
      "step": 52486,
      "training_step_time": 0.39074206352233887
    },
    {
      "epoch": 0.000320355224609375,
      "model_forward_time": 0.11498641967773438,
      "step": 52487
    },
    {
      "epoch": 0.000320355224609375,
      "step": 52487,
      "training_step_time": 0.40563297271728516
    },
    {
      "epoch": 0.000320361328125,
      "model_forward_time": 0.11568307876586914,
      "step": 52488
    },
    {
      "epoch": 0.000320361328125,
      "step": 52488,
      "training_step_time": 0.48825883865356445
    },
    {
      "epoch": 0.000320367431640625,
      "model_forward_time": 0.11649656295776367,
      "step": 52489
    },
    {
      "epoch": 0.000320367431640625,
      "step": 52489,
      "training_step_time": 0.4694535732269287
    },
    {
      "epoch": 0.00032037353515625,
      "grad_norm": 0.09480055421590805,
      "learning_rate": 4.222410475723326e-06,
      "loss": 0.0324,
      "step": 52490
    },
    {
      "epoch": 0.00032037353515625,
      "model_forward_time": 0.11834096908569336,
      "step": 52490
    },
    {
      "epoch": 0.00032037353515625,
      "step": 52490,
      "training_step_time": 0.554957389831543
    },
    {
      "epoch": 0.000320379638671875,
      "model_forward_time": 0.11557483673095703,
      "step": 52491
    },
    {
      "epoch": 0.000320379638671875,
      "step": 52491,
      "training_step_time": 0.6099767684936523
    },
    {
      "epoch": 0.0003203857421875,
      "model_forward_time": 0.1290297508239746,
      "step": 52492
    },
    {
      "epoch": 0.0003203857421875,
      "step": 52492,
      "training_step_time": 0.7437937259674072
    },
    {
      "epoch": 0.000320391845703125,
      "model_forward_time": 0.11816740036010742,
      "step": 52493
    },
    {
      "epoch": 0.000320391845703125,
      "step": 52493,
      "training_step_time": 0.6905069351196289
    },
    {
      "epoch": 0.00032039794921875,
      "model_forward_time": 0.12326526641845703,
      "step": 52494
    },
    {
      "epoch": 0.00032039794921875,
      "step": 52494,
      "training_step_time": 0.6323971748352051
    },
    {
      "epoch": 0.000320404052734375,
      "model_forward_time": 0.11952757835388184,
      "step": 52495
    },
    {
      "epoch": 0.000320404052734375,
      "step": 52495,
      "training_step_time": 0.6986064910888672
    },
    {
      "epoch": 0.00032041015625,
      "model_forward_time": 0.12384438514709473,
      "step": 52496
    },
    {
      "epoch": 0.00032041015625,
      "step": 52496,
      "training_step_time": 0.7067840099334717
    },
    {
      "epoch": 0.000320416259765625,
      "model_forward_time": 0.12331008911132812,
      "step": 52497
    },
    {
      "epoch": 0.000320416259765625,
      "step": 52497,
      "training_step_time": 0.6839511394500732
    },
    {
      "epoch": 0.00032042236328125,
      "model_forward_time": 0.12490606307983398,
      "step": 52498
    },
    {
      "epoch": 0.00032042236328125,
      "step": 52498,
      "training_step_time": 0.6554484367370605
    },
    {
      "epoch": 0.000320428466796875,
      "model_forward_time": 0.12300634384155273,
      "step": 52499
    },
    {
      "epoch": 0.000320428466796875,
      "step": 52499,
      "training_step_time": 0.7455167770385742
    },
    {
      "epoch": 0.0003204345703125,
      "grad_norm": 0.10024581104516983,
      "learning_rate": 4.2113336672471245e-06,
      "loss": 0.0351,
      "step": 52500
    },
    {
      "epoch": 0.0003204345703125,
      "model_forward_time": 0.12493395805358887,
      "step": 52500
    },
    {
      "epoch": 0.0003204345703125,
      "step": 52500,
      "training_step_time": 0.6584293842315674
    },
    {
      "epoch": 0.000320440673828125,
      "model_forward_time": 0.12331485748291016,
      "step": 52501
    },
    {
      "epoch": 0.000320440673828125,
      "step": 52501,
      "training_step_time": 0.5822553634643555
    },
    {
      "epoch": 0.00032044677734375,
      "model_forward_time": 0.11694717407226562,
      "step": 52502
    },
    {
      "epoch": 0.00032044677734375,
      "step": 52502,
      "training_step_time": 0.7139663696289062
    },
    {
      "epoch": 0.000320452880859375,
      "model_forward_time": 0.1659989356994629,
      "step": 52503
    },
    {
      "epoch": 0.000320452880859375,
      "step": 52503,
      "training_step_time": 0.6858179569244385
    },
    {
      "epoch": 0.000320458984375,
      "model_forward_time": 0.1266636848449707,
      "step": 52504
    },
    {
      "epoch": 0.000320458984375,
      "step": 52504,
      "training_step_time": 0.7203381061553955
    },
    {
      "epoch": 0.000320465087890625,
      "model_forward_time": 0.11982297897338867,
      "step": 52505
    },
    {
      "epoch": 0.000320465087890625,
      "step": 52505,
      "training_step_time": 0.7330498695373535
    },
    {
      "epoch": 0.00032047119140625,
      "model_forward_time": 0.12138652801513672,
      "step": 52506
    },
    {
      "epoch": 0.00032047119140625,
      "step": 52506,
      "training_step_time": 0.6330337524414062
    },
    {
      "epoch": 0.000320477294921875,
      "model_forward_time": 0.12056565284729004,
      "step": 52507
    },
    {
      "epoch": 0.000320477294921875,
      "step": 52507,
      "training_step_time": 0.7130506038665771
    },
    {
      "epoch": 0.0003204833984375,
      "model_forward_time": 0.13397002220153809,
      "step": 52508
    },
    {
      "epoch": 0.0003204833984375,
      "step": 52508,
      "training_step_time": 0.693915843963623
    },
    {
      "epoch": 0.000320489501953125,
      "model_forward_time": 0.12442302703857422,
      "step": 52509
    },
    {
      "epoch": 0.000320489501953125,
      "step": 52509,
      "training_step_time": 0.6650750637054443
    },
    {
      "epoch": 0.00032049560546875,
      "grad_norm": 0.07816847413778305,
      "learning_rate": 4.2002707681587874e-06,
      "loss": 0.0423,
      "step": 52510
    },
    {
      "epoch": 0.00032049560546875,
      "model_forward_time": 0.12792110443115234,
      "step": 52510
    },
    {
      "epoch": 0.00032049560546875,
      "step": 52510,
      "training_step_time": 0.664499044418335
    },
    {
      "epoch": 0.000320501708984375,
      "model_forward_time": 0.11979889869689941,
      "step": 52511
    },
    {
      "epoch": 0.000320501708984375,
      "step": 52511,
      "training_step_time": 0.7319991588592529
    },
    {
      "epoch": 0.0003205078125,
      "model_forward_time": 0.11866426467895508,
      "step": 52512
    },
    {
      "epoch": 0.0003205078125,
      "step": 52512,
      "training_step_time": 0.6878876686096191
    },
    {
      "epoch": 0.000320513916015625,
      "model_forward_time": 0.11681985855102539,
      "step": 52513
    },
    {
      "epoch": 0.000320513916015625,
      "step": 52513,
      "training_step_time": 0.6925992965698242
    },
    {
      "epoch": 0.00032052001953125,
      "model_forward_time": 0.11852836608886719,
      "step": 52514
    },
    {
      "epoch": 0.00032052001953125,
      "step": 52514,
      "training_step_time": 0.7122228145599365
    },
    {
      "epoch": 0.000320526123046875,
      "model_forward_time": 0.13945412635803223,
      "step": 52515
    },
    {
      "epoch": 0.000320526123046875,
      "step": 52515,
      "training_step_time": 0.669292688369751
    },
    {
      "epoch": 0.0003205322265625,
      "model_forward_time": 0.11682295799255371,
      "step": 52516
    },
    {
      "epoch": 0.0003205322265625,
      "step": 52516,
      "training_step_time": 0.6507692337036133
    },
    {
      "epoch": 0.000320538330078125,
      "model_forward_time": 0.13688039779663086,
      "step": 52517
    },
    {
      "epoch": 0.000320538330078125,
      "step": 52517,
      "training_step_time": 0.7036421298980713
    },
    {
      "epoch": 0.00032054443359375,
      "model_forward_time": 0.11836624145507812,
      "step": 52518
    },
    {
      "epoch": 0.00032054443359375,
      "step": 52518,
      "training_step_time": 0.6592130661010742
    },
    {
      "epoch": 0.000320550537109375,
      "model_forward_time": 0.12237787246704102,
      "step": 52519
    },
    {
      "epoch": 0.000320550537109375,
      "step": 52519,
      "training_step_time": 0.7125914096832275
    },
    {
      "epoch": 0.000320556640625,
      "grad_norm": 0.07928669452667236,
      "learning_rate": 4.189221781818914e-06,
      "loss": 0.0414,
      "step": 52520
    },
    {
      "epoch": 0.000320556640625,
      "model_forward_time": 0.11900997161865234,
      "step": 52520
    },
    {
      "epoch": 0.000320556640625,
      "step": 52520,
      "training_step_time": 0.5751686096191406
    },
    {
      "epoch": 0.000320562744140625,
      "model_forward_time": 0.11760163307189941,
      "step": 52521
    },
    {
      "epoch": 0.000320562744140625,
      "step": 52521,
      "training_step_time": 0.69199538230896
    },
    {
      "epoch": 0.00032056884765625,
      "model_forward_time": 0.12084460258483887,
      "step": 52522
    },
    {
      "epoch": 0.00032056884765625,
      "step": 52522,
      "training_step_time": 0.6833846569061279
    },
    {
      "epoch": 0.000320574951171875,
      "model_forward_time": 0.12202811241149902,
      "step": 52523
    },
    {
      "epoch": 0.000320574951171875,
      "step": 52523,
      "training_step_time": 0.6658642292022705
    },
    {
      "epoch": 0.0003205810546875,
      "model_forward_time": 0.11787772178649902,
      "step": 52524
    },
    {
      "epoch": 0.0003205810546875,
      "step": 52524,
      "training_step_time": 0.6161129474639893
    },
    {
      "epoch": 0.000320587158203125,
      "model_forward_time": 0.1180868148803711,
      "step": 52525
    },
    {
      "epoch": 0.000320587158203125,
      "step": 52525,
      "training_step_time": 0.6599986553192139
    },
    {
      "epoch": 0.00032059326171875,
      "model_forward_time": 0.11923980712890625,
      "step": 52526
    },
    {
      "epoch": 0.00032059326171875,
      "step": 52526,
      "training_step_time": 0.6726799011230469
    },
    {
      "epoch": 0.000320599365234375,
      "model_forward_time": 0.11559200286865234,
      "step": 52527
    },
    {
      "epoch": 0.000320599365234375,
      "step": 52527,
      "training_step_time": 0.7466444969177246
    },
    {
      "epoch": 0.00032060546875,
      "model_forward_time": 0.11932063102722168,
      "step": 52528
    },
    {
      "epoch": 0.00032060546875,
      "step": 52528,
      "training_step_time": 0.6352558135986328
    },
    {
      "epoch": 0.000320611572265625,
      "model_forward_time": 0.12061381340026855,
      "step": 52529
    },
    {
      "epoch": 0.000320611572265625,
      "step": 52529,
      "training_step_time": 0.6692647933959961
    },
    {
      "epoch": 0.00032061767578125,
      "grad_norm": 0.11054804176092148,
      "learning_rate": 4.178186711583904e-06,
      "loss": 0.0358,
      "step": 52530
    },
    {
      "epoch": 0.00032061767578125,
      "model_forward_time": 0.11888670921325684,
      "step": 52530
    },
    {
      "epoch": 0.00032061767578125,
      "step": 52530,
      "training_step_time": 0.7531144618988037
    },
    {
      "epoch": 0.000320623779296875,
      "model_forward_time": 0.1256241798400879,
      "step": 52531
    },
    {
      "epoch": 0.000320623779296875,
      "step": 52531,
      "training_step_time": 0.7436795234680176
    },
    {
      "epoch": 0.0003206298828125,
      "model_forward_time": 0.11802887916564941,
      "step": 52532
    },
    {
      "epoch": 0.0003206298828125,
      "step": 52532,
      "training_step_time": 0.6316466331481934
    },
    {
      "epoch": 0.000320635986328125,
      "model_forward_time": 0.11752653121948242,
      "step": 52533
    },
    {
      "epoch": 0.000320635986328125,
      "step": 52533,
      "training_step_time": 0.7015571594238281
    },
    {
      "epoch": 0.00032064208984375,
      "model_forward_time": 0.11898493766784668,
      "step": 52534
    },
    {
      "epoch": 0.00032064208984375,
      "step": 52534,
      "training_step_time": 0.6989622116088867
    },
    {
      "epoch": 0.000320648193359375,
      "model_forward_time": 0.11670589447021484,
      "step": 52535
    },
    {
      "epoch": 0.000320648193359375,
      "step": 52535,
      "training_step_time": 0.703284502029419
    },
    {
      "epoch": 0.000320654296875,
      "model_forward_time": 0.11748909950256348,
      "step": 52536
    },
    {
      "epoch": 0.000320654296875,
      "step": 52536,
      "training_step_time": 0.6715981960296631
    },
    {
      "epoch": 0.000320660400390625,
      "model_forward_time": 0.11888623237609863,
      "step": 52537
    },
    {
      "epoch": 0.000320660400390625,
      "step": 52537,
      "training_step_time": 0.6788022518157959
    },
    {
      "epoch": 0.00032066650390625,
      "model_forward_time": 0.12010765075683594,
      "step": 52538
    },
    {
      "epoch": 0.00032066650390625,
      "step": 52538,
      "training_step_time": 0.6619656085968018
    },
    {
      "epoch": 0.000320672607421875,
      "model_forward_time": 0.1229393482208252,
      "step": 52539
    },
    {
      "epoch": 0.000320672607421875,
      "step": 52539,
      "training_step_time": 0.6747300624847412
    },
    {
      "epoch": 0.0003206787109375,
      "grad_norm": 0.11176566779613495,
      "learning_rate": 4.167165560805914e-06,
      "loss": 0.0385,
      "step": 52540
    },
    {
      "epoch": 0.0003206787109375,
      "model_forward_time": 0.11646342277526855,
      "step": 52540
    },
    {
      "epoch": 0.0003206787109375,
      "step": 52540,
      "training_step_time": 0.7131154537200928
    },
    {
      "epoch": 0.000320684814453125,
      "model_forward_time": 0.11698484420776367,
      "step": 52541
    },
    {
      "epoch": 0.000320684814453125,
      "step": 52541,
      "training_step_time": 0.7236466407775879
    },
    {
      "epoch": 0.00032069091796875,
      "model_forward_time": 0.11924123764038086,
      "step": 52542
    },
    {
      "epoch": 0.00032069091796875,
      "step": 52542,
      "training_step_time": 0.6374988555908203
    },
    {
      "epoch": 0.000320697021484375,
      "model_forward_time": 0.12136650085449219,
      "step": 52543
    },
    {
      "epoch": 0.000320697021484375,
      "step": 52543,
      "training_step_time": 0.635138750076294
    },
    {
      "epoch": 0.000320703125,
      "model_forward_time": 0.1251847743988037,
      "step": 52544
    },
    {
      "epoch": 0.000320703125,
      "step": 52544,
      "training_step_time": 0.7654836177825928
    },
    {
      "epoch": 0.000320709228515625,
      "model_forward_time": 0.12176942825317383,
      "step": 52545
    },
    {
      "epoch": 0.000320709228515625,
      "step": 52545,
      "training_step_time": 0.705275297164917
    },
    {
      "epoch": 0.00032071533203125,
      "model_forward_time": 0.11894011497497559,
      "step": 52546
    },
    {
      "epoch": 0.00032071533203125,
      "step": 52546,
      "training_step_time": 0.6596546173095703
    },
    {
      "epoch": 0.000320721435546875,
      "model_forward_time": 0.11813664436340332,
      "step": 52547
    },
    {
      "epoch": 0.000320721435546875,
      "step": 52547,
      "training_step_time": 0.6837232112884521
    },
    {
      "epoch": 0.0003207275390625,
      "model_forward_time": 0.1196441650390625,
      "step": 52548
    },
    {
      "epoch": 0.0003207275390625,
      "step": 52548,
      "training_step_time": 0.7208054065704346
    },
    {
      "epoch": 0.000320733642578125,
      "model_forward_time": 0.11834144592285156,
      "step": 52549
    },
    {
      "epoch": 0.000320733642578125,
      "step": 52549,
      "training_step_time": 0.7018511295318604
    },
    {
      "epoch": 0.00032073974609375,
      "grad_norm": 0.09481567144393921,
      "learning_rate": 4.1561583328328716e-06,
      "loss": 0.0389,
      "step": 52550
    },
    {
      "epoch": 0.00032073974609375,
      "model_forward_time": 0.11820554733276367,
      "step": 52550
    },
    {
      "epoch": 0.00032073974609375,
      "step": 52550,
      "training_step_time": 0.580777645111084
    },
    {
      "epoch": 0.000320745849609375,
      "model_forward_time": 0.11971378326416016,
      "step": 52551
    },
    {
      "epoch": 0.000320745849609375,
      "step": 52551,
      "training_step_time": 0.6346426010131836
    },
    {
      "epoch": 0.000320751953125,
      "model_forward_time": 0.12128663063049316,
      "step": 52552
    },
    {
      "epoch": 0.000320751953125,
      "step": 52552,
      "training_step_time": 0.5995171070098877
    },
    {
      "epoch": 0.000320758056640625,
      "model_forward_time": 0.12627220153808594,
      "step": 52553
    },
    {
      "epoch": 0.000320758056640625,
      "step": 52553,
      "training_step_time": 0.598275899887085
    },
    {
      "epoch": 0.00032076416015625,
      "model_forward_time": 0.12255239486694336,
      "step": 52554
    },
    {
      "epoch": 0.00032076416015625,
      "step": 52554,
      "training_step_time": 0.6782803535461426
    },
    {
      "epoch": 0.000320770263671875,
      "model_forward_time": 0.11957573890686035,
      "step": 52555
    },
    {
      "epoch": 0.000320770263671875,
      "step": 52555,
      "training_step_time": 0.6783292293548584
    },
    {
      "epoch": 0.0003207763671875,
      "model_forward_time": 0.1272740364074707,
      "step": 52556
    },
    {
      "epoch": 0.0003207763671875,
      "step": 52556,
      "training_step_time": 0.6066675186157227
    },
    {
      "epoch": 0.000320782470703125,
      "model_forward_time": 0.12057137489318848,
      "step": 52557
    },
    {
      "epoch": 0.000320782470703125,
      "step": 52557,
      "training_step_time": 0.6176204681396484
    },
    {
      "epoch": 0.00032078857421875,
      "model_forward_time": 0.12581634521484375,
      "step": 52558
    },
    {
      "epoch": 0.00032078857421875,
      "step": 52558,
      "training_step_time": 0.6359610557556152
    },
    {
      "epoch": 0.000320794677734375,
      "model_forward_time": 0.12296199798583984,
      "step": 52559
    },
    {
      "epoch": 0.000320794677734375,
      "step": 52559,
      "training_step_time": 0.591120719909668
    },
    {
      "epoch": 0.00032080078125,
      "grad_norm": 0.10350082814693451,
      "learning_rate": 4.145165031008508e-06,
      "loss": 0.0368,
      "step": 52560
    },
    {
      "epoch": 0.00032080078125,
      "model_forward_time": 0.11884427070617676,
      "step": 52560
    },
    {
      "epoch": 0.00032080078125,
      "step": 52560,
      "training_step_time": 0.6274616718292236
    },
    {
      "epoch": 0.000320806884765625,
      "model_forward_time": 0.11797022819519043,
      "step": 52561
    },
    {
      "epoch": 0.000320806884765625,
      "step": 52561,
      "training_step_time": 0.46579837799072266
    },
    {
      "epoch": 0.00032081298828125,
      "model_forward_time": 0.11649060249328613,
      "step": 52562
    },
    {
      "epoch": 0.00032081298828125,
      "step": 52562,
      "training_step_time": 0.4308643341064453
    },
    {
      "epoch": 0.000320819091796875,
      "model_forward_time": 0.11734676361083984,
      "step": 52563
    },
    {
      "epoch": 0.000320819091796875,
      "step": 52563,
      "training_step_time": 0.41460585594177246
    },
    {
      "epoch": 0.0003208251953125,
      "model_forward_time": 0.11634612083435059,
      "step": 52564
    },
    {
      "epoch": 0.0003208251953125,
      "step": 52564,
      "training_step_time": 0.40462303161621094
    },
    {
      "epoch": 0.000320831298828125,
      "model_forward_time": 0.11736345291137695,
      "step": 52565
    },
    {
      "epoch": 0.000320831298828125,
      "step": 52565,
      "training_step_time": 0.537736177444458
    },
    {
      "epoch": 0.00032083740234375,
      "model_forward_time": 0.1156313419342041,
      "step": 52566
    },
    {
      "epoch": 0.00032083740234375,
      "step": 52566,
      "training_step_time": 0.5115017890930176
    },
    {
      "epoch": 0.000320843505859375,
      "model_forward_time": 0.11508750915527344,
      "step": 52567
    },
    {
      "epoch": 0.000320843505859375,
      "step": 52567,
      "training_step_time": 0.4307219982147217
    },
    {
      "epoch": 0.000320849609375,
      "model_forward_time": 0.11541247367858887,
      "step": 52568
    },
    {
      "epoch": 0.000320849609375,
      "step": 52568,
      "training_step_time": 0.4069814682006836
    },
    {
      "epoch": 0.000320855712890625,
      "model_forward_time": 0.11499333381652832,
      "step": 52569
    },
    {
      "epoch": 0.000320855712890625,
      "step": 52569,
      "training_step_time": 0.4096369743347168
    },
    {
      "epoch": 0.00032086181640625,
      "grad_norm": 0.08617778867483139,
      "learning_rate": 4.1341856586722625e-06,
      "loss": 0.0354,
      "step": 52570
    },
    {
      "epoch": 0.00032086181640625,
      "model_forward_time": 0.11489510536193848,
      "step": 52570
    },
    {
      "epoch": 0.00032086181640625,
      "step": 52570,
      "training_step_time": 0.39129066467285156
    },
    {
      "epoch": 0.000320867919921875,
      "model_forward_time": 0.11507606506347656,
      "step": 52571
    },
    {
      "epoch": 0.000320867919921875,
      "step": 52571,
      "training_step_time": 0.41280603408813477
    },
    {
      "epoch": 0.0003208740234375,
      "model_forward_time": 0.11474061012268066,
      "step": 52572
    },
    {
      "epoch": 0.0003208740234375,
      "step": 52572,
      "training_step_time": 0.38791537284851074
    },
    {
      "epoch": 0.000320880126953125,
      "model_forward_time": 0.11507749557495117,
      "step": 52573
    },
    {
      "epoch": 0.000320880126953125,
      "step": 52573,
      "training_step_time": 0.4290332794189453
    },
    {
      "epoch": 0.00032088623046875,
      "model_forward_time": 0.1153266429901123,
      "step": 52574
    },
    {
      "epoch": 0.00032088623046875,
      "step": 52574,
      "training_step_time": 0.4220714569091797
    },
    {
      "epoch": 0.000320892333984375,
      "model_forward_time": 0.1157827377319336,
      "step": 52575
    },
    {
      "epoch": 0.000320892333984375,
      "step": 52575,
      "training_step_time": 0.39307665824890137
    },
    {
      "epoch": 0.0003208984375,
      "model_forward_time": 0.11528205871582031,
      "step": 52576
    },
    {
      "epoch": 0.0003208984375,
      "step": 52576,
      "training_step_time": 0.3987858295440674
    },
    {
      "epoch": 0.000320904541015625,
      "model_forward_time": 0.1155233383178711,
      "step": 52577
    },
    {
      "epoch": 0.000320904541015625,
      "step": 52577,
      "training_step_time": 0.3939990997314453
    },
    {
      "epoch": 0.00032091064453125,
      "model_forward_time": 0.11499810218811035,
      "step": 52578
    },
    {
      "epoch": 0.00032091064453125,
      "step": 52578,
      "training_step_time": 0.37680768966674805
    },
    {
      "epoch": 0.000320916748046875,
      "model_forward_time": 0.11509108543395996,
      "step": 52579
    },
    {
      "epoch": 0.000320916748046875,
      "step": 52579,
      "training_step_time": 0.4006223678588867
    },
    {
      "epoch": 0.0003209228515625,
      "grad_norm": 0.12074059993028641,
      "learning_rate": 4.123220219159418e-06,
      "loss": 0.0374,
      "step": 52580
    },
    {
      "epoch": 0.0003209228515625,
      "model_forward_time": 0.11622858047485352,
      "step": 52580
    },
    {
      "epoch": 0.0003209228515625,
      "step": 52580,
      "training_step_time": 0.43619751930236816
    },
    {
      "epoch": 0.000320928955078125,
      "model_forward_time": 0.11552166938781738,
      "step": 52581
    },
    {
      "epoch": 0.000320928955078125,
      "step": 52581,
      "training_step_time": 0.4326596260070801
    },
    {
      "epoch": 0.00032093505859375,
      "model_forward_time": 0.11478590965270996,
      "step": 52582
    },
    {
      "epoch": 0.00032093505859375,
      "step": 52582,
      "training_step_time": 0.4118509292602539
    },
    {
      "epoch": 0.000320941162109375,
      "model_forward_time": 0.11527729034423828,
      "step": 52583
    },
    {
      "epoch": 0.000320941162109375,
      "step": 52583,
      "training_step_time": 0.41782164573669434
    },
    {
      "epoch": 0.000320947265625,
      "model_forward_time": 0.11684656143188477,
      "step": 52584
    },
    {
      "epoch": 0.000320947265625,
      "step": 52584,
      "training_step_time": 0.395733118057251
    },
    {
      "epoch": 0.000320953369140625,
      "model_forward_time": 0.11524343490600586,
      "step": 52585
    },
    {
      "epoch": 0.000320953369140625,
      "step": 52585,
      "training_step_time": 0.4003441333770752
    },
    {
      "epoch": 0.00032095947265625,
      "model_forward_time": 0.1149742603302002,
      "step": 52586
    },
    {
      "epoch": 0.00032095947265625,
      "step": 52586,
      "training_step_time": 0.4616281986236572
    },
    {
      "epoch": 0.000320965576171875,
      "model_forward_time": 0.11593389511108398,
      "step": 52587
    },
    {
      "epoch": 0.000320965576171875,
      "step": 52587,
      "training_step_time": 0.46680307388305664
    },
    {
      "epoch": 0.0003209716796875,
      "model_forward_time": 0.11536908149719238,
      "step": 52588
    },
    {
      "epoch": 0.0003209716796875,
      "step": 52588,
      "training_step_time": 0.4151287078857422
    },
    {
      "epoch": 0.000320977783203125,
      "model_forward_time": 0.1151587963104248,
      "step": 52589
    },
    {
      "epoch": 0.000320977783203125,
      "step": 52589,
      "training_step_time": 0.4627206325531006
    },
    {
      "epoch": 0.00032098388671875,
      "grad_norm": 0.10137521475553513,
      "learning_rate": 4.112268715800943e-06,
      "loss": 0.032,
      "step": 52590
    },
    {
      "epoch": 0.00032098388671875,
      "model_forward_time": 0.11493945121765137,
      "step": 52590
    },
    {
      "epoch": 0.00032098388671875,
      "step": 52590,
      "training_step_time": 0.41882824897766113
    },
    {
      "epoch": 0.000320989990234375,
      "model_forward_time": 0.11483955383300781,
      "step": 52591
    },
    {
      "epoch": 0.000320989990234375,
      "step": 52591,
      "training_step_time": 0.3942530155181885
    },
    {
      "epoch": 0.00032099609375,
      "model_forward_time": 0.11525249481201172,
      "step": 52592
    },
    {
      "epoch": 0.00032099609375,
      "step": 52592,
      "training_step_time": 0.39299821853637695
    },
    {
      "epoch": 0.000321002197265625,
      "model_forward_time": 0.11519575119018555,
      "step": 52593
    },
    {
      "epoch": 0.000321002197265625,
      "step": 52593,
      "training_step_time": 0.39826321601867676
    },
    {
      "epoch": 0.00032100830078125,
      "model_forward_time": 0.11534857749938965,
      "step": 52594
    },
    {
      "epoch": 0.00032100830078125,
      "step": 52594,
      "training_step_time": 0.39769840240478516
    },
    {
      "epoch": 0.000321014404296875,
      "model_forward_time": 0.11573195457458496,
      "step": 52595
    },
    {
      "epoch": 0.000321014404296875,
      "step": 52595,
      "training_step_time": 0.46027040481567383
    },
    {
      "epoch": 0.0003210205078125,
      "model_forward_time": 0.11547517776489258,
      "step": 52596
    },
    {
      "epoch": 0.0003210205078125,
      "step": 52596,
      "training_step_time": 0.49107956886291504
    },
    {
      "epoch": 0.000321026611328125,
      "model_forward_time": 0.11477041244506836,
      "step": 52597
    },
    {
      "epoch": 0.000321026611328125,
      "step": 52597,
      "training_step_time": 0.4143054485321045
    },
    {
      "epoch": 0.00032103271484375,
      "model_forward_time": 0.11498641967773438,
      "step": 52598
    },
    {
      "epoch": 0.00032103271484375,
      "step": 52598,
      "training_step_time": 0.38486409187316895
    },
    {
      "epoch": 0.000321038818359375,
      "model_forward_time": 0.11483335494995117,
      "step": 52599
    },
    {
      "epoch": 0.000321038818359375,
      "step": 52599,
      "training_step_time": 0.3980119228363037
    },
    {
      "epoch": 0.000321044921875,
      "grad_norm": 0.07980792224407196,
      "learning_rate": 4.101331151923649e-06,
      "loss": 0.0354,
      "step": 52600
    },
    {
      "epoch": 0.000321044921875,
      "model_forward_time": 0.11443805694580078,
      "step": 52600
    },
    {
      "epoch": 0.000321044921875,
      "step": 52600,
      "training_step_time": 0.4184749126434326
    },
    {
      "epoch": 0.000321051025390625,
      "model_forward_time": 0.1148843765258789,
      "step": 52601
    },
    {
      "epoch": 0.000321051025390625,
      "step": 52601,
      "training_step_time": 0.45532774925231934
    },
    {
      "epoch": 0.00032105712890625,
      "model_forward_time": 0.11450767517089844,
      "step": 52602
    },
    {
      "epoch": 0.00032105712890625,
      "step": 52602,
      "training_step_time": 0.39875125885009766
    },
    {
      "epoch": 0.000321063232421875,
      "model_forward_time": 0.11676478385925293,
      "step": 52603
    },
    {
      "epoch": 0.000321063232421875,
      "step": 52603,
      "training_step_time": 0.39938783645629883
    },
    {
      "epoch": 0.0003210693359375,
      "model_forward_time": 0.11528849601745605,
      "step": 52604
    },
    {
      "epoch": 0.0003210693359375,
      "step": 52604,
      "training_step_time": 0.44417643547058105
    },
    {
      "epoch": 0.000321075439453125,
      "model_forward_time": 0.11501240730285645,
      "step": 52605
    },
    {
      "epoch": 0.000321075439453125,
      "step": 52605,
      "training_step_time": 0.4156026840209961
    },
    {
      "epoch": 0.00032108154296875,
      "model_forward_time": 0.1147618293762207,
      "step": 52606
    },
    {
      "epoch": 0.00032108154296875,
      "step": 52606,
      "training_step_time": 0.40088677406311035
    },
    {
      "epoch": 0.000321087646484375,
      "model_forward_time": 0.11562514305114746,
      "step": 52607
    },
    {
      "epoch": 0.000321087646484375,
      "step": 52607,
      "training_step_time": 0.3891615867614746
    },
    {
      "epoch": 0.00032109375,
      "model_forward_time": 0.11547040939331055,
      "step": 52608
    },
    {
      "epoch": 0.00032109375,
      "step": 52608,
      "training_step_time": 0.4045839309692383
    },
    {
      "epoch": 0.000321099853515625,
      "model_forward_time": 0.11512517929077148,
      "step": 52609
    },
    {
      "epoch": 0.000321099853515625,
      "step": 52609,
      "training_step_time": 0.4773547649383545
    },
    {
      "epoch": 0.00032110595703125,
      "grad_norm": 0.08349266648292542,
      "learning_rate": 4.090407530850065e-06,
      "loss": 0.0401,
      "step": 52610
    },
    {
      "epoch": 0.00032110595703125,
      "model_forward_time": 0.11502933502197266,
      "step": 52610
    },
    {
      "epoch": 0.00032110595703125,
      "step": 52610,
      "training_step_time": 0.43048763275146484
    },
    {
      "epoch": 0.000321112060546875,
      "model_forward_time": 0.11500382423400879,
      "step": 52611
    },
    {
      "epoch": 0.000321112060546875,
      "step": 52611,
      "training_step_time": 0.39823174476623535
    },
    {
      "epoch": 0.0003211181640625,
      "model_forward_time": 0.11506485939025879,
      "step": 52612
    },
    {
      "epoch": 0.0003211181640625,
      "step": 52612,
      "training_step_time": 0.426257848739624
    },
    {
      "epoch": 0.000321124267578125,
      "model_forward_time": 0.11438226699829102,
      "step": 52613
    },
    {
      "epoch": 0.000321124267578125,
      "step": 52613,
      "training_step_time": 0.3926234245300293
    },
    {
      "epoch": 0.00032113037109375,
      "model_forward_time": 0.11542105674743652,
      "step": 52614
    },
    {
      "epoch": 0.00032113037109375,
      "step": 52614,
      "training_step_time": 0.5019781589508057
    },
    {
      "epoch": 0.000321136474609375,
      "model_forward_time": 0.11416435241699219,
      "step": 52615
    },
    {
      "epoch": 0.000321136474609375,
      "step": 52615,
      "training_step_time": 0.43486642837524414
    },
    {
      "epoch": 0.000321142578125,
      "model_forward_time": 0.11448431015014648,
      "step": 52616
    },
    {
      "epoch": 0.000321142578125,
      "step": 52616,
      "training_step_time": 0.39319419860839844
    },
    {
      "epoch": 0.000321148681640625,
      "model_forward_time": 0.1147618293762207,
      "step": 52617
    },
    {
      "epoch": 0.000321148681640625,
      "step": 52617,
      "training_step_time": 0.36454343795776367
    },
    {
      "epoch": 0.00032115478515625,
      "model_forward_time": 0.11493563652038574,
      "step": 52618
    },
    {
      "epoch": 0.00032115478515625,
      "step": 52618,
      "training_step_time": 0.45806097984313965
    },
    {
      "epoch": 0.000321160888671875,
      "model_forward_time": 0.1143636703491211,
      "step": 52619
    },
    {
      "epoch": 0.000321160888671875,
      "step": 52619,
      "training_step_time": 0.4067361354827881
    },
    {
      "epoch": 0.0003211669921875,
      "grad_norm": 0.09943535178899765,
      "learning_rate": 4.079497855898501e-06,
      "loss": 0.0407,
      "step": 52620
    },
    {
      "epoch": 0.0003211669921875,
      "model_forward_time": 0.11504387855529785,
      "step": 52620
    },
    {
      "epoch": 0.0003211669921875,
      "step": 52620,
      "training_step_time": 0.4081532955169678
    },
    {
      "epoch": 0.000321173095703125,
      "model_forward_time": 0.11489748954772949,
      "step": 52621
    },
    {
      "epoch": 0.000321173095703125,
      "step": 52621,
      "training_step_time": 0.3907806873321533
    },
    {
      "epoch": 0.00032117919921875,
      "model_forward_time": 0.11459755897521973,
      "step": 52622
    },
    {
      "epoch": 0.00032117919921875,
      "step": 52622,
      "training_step_time": 0.38898611068725586
    },
    {
      "epoch": 0.000321185302734375,
      "model_forward_time": 0.11458802223205566,
      "step": 52623
    },
    {
      "epoch": 0.000321185302734375,
      "step": 52623,
      "training_step_time": 0.3935880661010742
    },
    {
      "epoch": 0.00032119140625,
      "model_forward_time": 0.11515569686889648,
      "step": 52624
    },
    {
      "epoch": 0.00032119140625,
      "step": 52624,
      "training_step_time": 0.47588515281677246
    },
    {
      "epoch": 0.000321197509765625,
      "model_forward_time": 0.11484742164611816,
      "step": 52625
    },
    {
      "epoch": 0.000321197509765625,
      "step": 52625,
      "training_step_time": 0.5006802082061768
    },
    {
      "epoch": 0.00032120361328125,
      "model_forward_time": 0.11416888236999512,
      "step": 52626
    },
    {
      "epoch": 0.00032120361328125,
      "step": 52626,
      "training_step_time": 0.3987925052642822
    },
    {
      "epoch": 0.000321209716796875,
      "model_forward_time": 0.1146092414855957,
      "step": 52627
    },
    {
      "epoch": 0.000321209716796875,
      "step": 52627,
      "training_step_time": 0.3900752067565918
    },
    {
      "epoch": 0.0003212158203125,
      "model_forward_time": 0.11443638801574707,
      "step": 52628
    },
    {
      "epoch": 0.0003212158203125,
      "step": 52628,
      "training_step_time": 0.39852261543273926
    },
    {
      "epoch": 0.000321221923828125,
      "model_forward_time": 0.11528277397155762,
      "step": 52629
    },
    {
      "epoch": 0.000321221923828125,
      "step": 52629,
      "training_step_time": 0.43735218048095703
    },
    {
      "epoch": 0.00032122802734375,
      "grad_norm": 0.08376751840114594,
      "learning_rate": 4.068602130383031e-06,
      "loss": 0.0381,
      "step": 52630
    },
    {
      "epoch": 0.00032122802734375,
      "model_forward_time": 0.11585640907287598,
      "step": 52630
    },
    {
      "epoch": 0.00032122802734375,
      "step": 52630,
      "training_step_time": 0.491532564163208
    },
    {
      "epoch": 0.000321234130859375,
      "model_forward_time": 0.11499452590942383,
      "step": 52631
    },
    {
      "epoch": 0.000321234130859375,
      "step": 52631,
      "training_step_time": 0.3965301513671875
    },
    {
      "epoch": 0.000321240234375,
      "model_forward_time": 0.11596822738647461,
      "step": 52632
    },
    {
      "epoch": 0.000321240234375,
      "step": 52632,
      "training_step_time": 0.4751298427581787
    },
    {
      "epoch": 0.000321246337890625,
      "model_forward_time": 0.11484313011169434,
      "step": 52633
    },
    {
      "epoch": 0.000321246337890625,
      "step": 52633,
      "training_step_time": 0.4980316162109375
    },
    {
      "epoch": 0.00032125244140625,
      "model_forward_time": 0.11434316635131836,
      "step": 52634
    },
    {
      "epoch": 0.00032125244140625,
      "step": 52634,
      "training_step_time": 0.3952775001525879
    },
    {
      "epoch": 0.000321258544921875,
      "model_forward_time": 0.11445403099060059,
      "step": 52635
    },
    {
      "epoch": 0.000321258544921875,
      "step": 52635,
      "training_step_time": 0.38547277450561523
    },
    {
      "epoch": 0.0003212646484375,
      "model_forward_time": 0.11542916297912598,
      "step": 52636
    },
    {
      "epoch": 0.0003212646484375,
      "step": 52636,
      "training_step_time": 0.39004945755004883
    },
    {
      "epoch": 0.000321270751953125,
      "model_forward_time": 0.11486029624938965,
      "step": 52637
    },
    {
      "epoch": 0.000321270751953125,
      "step": 52637,
      "training_step_time": 0.3988351821899414
    },
    {
      "epoch": 0.00032127685546875,
      "model_forward_time": 0.11495471000671387,
      "step": 52638
    },
    {
      "epoch": 0.00032127685546875,
      "step": 52638,
      "training_step_time": 0.4509127140045166
    },
    {
      "epoch": 0.000321282958984375,
      "model_forward_time": 0.11484289169311523,
      "step": 52639
    },
    {
      "epoch": 0.000321282958984375,
      "step": 52639,
      "training_step_time": 0.4374806880950928
    },
    {
      "epoch": 0.0003212890625,
      "grad_norm": 0.09932374954223633,
      "learning_rate": 4.057720357613482e-06,
      "loss": 0.0369,
      "step": 52640
    },
    {
      "epoch": 0.0003212890625,
      "model_forward_time": 0.11525583267211914,
      "step": 52640
    },
    {
      "epoch": 0.0003212890625,
      "step": 52640,
      "training_step_time": 0.4893221855163574
    },
    {
      "epoch": 0.000321295166015625,
      "model_forward_time": 0.11505484580993652,
      "step": 52641
    },
    {
      "epoch": 0.000321295166015625,
      "step": 52641,
      "training_step_time": 0.39914655685424805
    },
    {
      "epoch": 0.00032130126953125,
      "model_forward_time": 0.11501216888427734,
      "step": 52642
    },
    {
      "epoch": 0.00032130126953125,
      "step": 52642,
      "training_step_time": 0.3954899311065674
    },
    {
      "epoch": 0.000321307373046875,
      "model_forward_time": 0.11535382270812988,
      "step": 52643
    },
    {
      "epoch": 0.000321307373046875,
      "step": 52643,
      "training_step_time": 0.45752763748168945
    },
    {
      "epoch": 0.0003213134765625,
      "model_forward_time": 0.11530256271362305,
      "step": 52644
    },
    {
      "epoch": 0.0003213134765625,
      "step": 52644,
      "training_step_time": 0.43876028060913086
    },
    {
      "epoch": 0.000321319580078125,
      "model_forward_time": 0.11474442481994629,
      "step": 52645
    },
    {
      "epoch": 0.000321319580078125,
      "step": 52645,
      "training_step_time": 0.4333679676055908
    },
    {
      "epoch": 0.00032132568359375,
      "model_forward_time": 0.11504721641540527,
      "step": 52646
    },
    {
      "epoch": 0.00032132568359375,
      "step": 52646,
      "training_step_time": 0.3935399055480957
    },
    {
      "epoch": 0.000321331787109375,
      "model_forward_time": 0.11575841903686523,
      "step": 52647
    },
    {
      "epoch": 0.000321331787109375,
      "step": 52647,
      "training_step_time": 0.40389204025268555
    },
    {
      "epoch": 0.000321337890625,
      "model_forward_time": 0.1148829460144043,
      "step": 52648
    },
    {
      "epoch": 0.000321337890625,
      "step": 52648,
      "training_step_time": 0.40112757682800293
    },
    {
      "epoch": 0.000321343994140625,
      "model_forward_time": 0.11492109298706055,
      "step": 52649
    },
    {
      "epoch": 0.000321343994140625,
      "step": 52649,
      "training_step_time": 0.39495134353637695
    },
    {
      "epoch": 0.00032135009765625,
      "grad_norm": 0.09294924139976501,
      "learning_rate": 4.046852540895446e-06,
      "loss": 0.0408,
      "step": 52650
    },
    {
      "epoch": 0.00032135009765625,
      "model_forward_time": 0.11541247367858887,
      "step": 52650
    },
    {
      "epoch": 0.00032135009765625,
      "step": 52650,
      "training_step_time": 0.409832239151001
    },
    {
      "epoch": 0.000321356201171875,
      "model_forward_time": 0.11484718322753906,
      "step": 52651
    },
    {
      "epoch": 0.000321356201171875,
      "step": 52651,
      "training_step_time": 0.4005887508392334
    },
    {
      "epoch": 0.0003213623046875,
      "model_forward_time": 0.11573195457458496,
      "step": 52652
    },
    {
      "epoch": 0.0003213623046875,
      "step": 52652,
      "training_step_time": 0.41106247901916504
    },
    {
      "epoch": 0.000321368408203125,
      "model_forward_time": 0.11611318588256836,
      "step": 52653
    },
    {
      "epoch": 0.000321368408203125,
      "step": 52653,
      "training_step_time": 0.4093911647796631
    },
    {
      "epoch": 0.00032137451171875,
      "model_forward_time": 0.11537599563598633,
      "step": 52654
    },
    {
      "epoch": 0.00032137451171875,
      "step": 52654,
      "training_step_time": 0.466015100479126
    },
    {
      "epoch": 0.000321380615234375,
      "model_forward_time": 0.11570429801940918,
      "step": 52655
    },
    {
      "epoch": 0.000321380615234375,
      "step": 52655,
      "training_step_time": 0.5006453990936279
    },
    {
      "epoch": 0.00032138671875,
      "model_forward_time": 0.11557674407958984,
      "step": 52656
    },
    {
      "epoch": 0.00032138671875,
      "step": 52656,
      "training_step_time": 0.3917245864868164
    },
    {
      "epoch": 0.000321392822265625,
      "model_forward_time": 0.11507058143615723,
      "step": 52657
    },
    {
      "epoch": 0.000321392822265625,
      "step": 52657,
      "training_step_time": 0.4060366153717041
    },
    {
      "epoch": 0.00032139892578125,
      "model_forward_time": 0.11513972282409668,
      "step": 52658
    },
    {
      "epoch": 0.00032139892578125,
      "step": 52658,
      "training_step_time": 0.3940012454986572
    },
    {
      "epoch": 0.000321405029296875,
      "model_forward_time": 0.11476898193359375,
      "step": 52659
    },
    {
      "epoch": 0.000321405029296875,
      "step": 52659,
      "training_step_time": 0.4789259433746338
    },
    {
      "epoch": 0.0003214111328125,
      "grad_norm": 0.07326627522706985,
      "learning_rate": 4.03599868353029e-06,
      "loss": 0.034,
      "step": 52660
    },
    {
      "epoch": 0.0003214111328125,
      "model_forward_time": 0.11483120918273926,
      "step": 52660
    },
    {
      "epoch": 0.0003214111328125,
      "step": 52660,
      "training_step_time": 0.3810458183288574
    },
    {
      "epoch": 0.000321417236328125,
      "model_forward_time": 0.11554384231567383,
      "step": 52661
    },
    {
      "epoch": 0.000321417236328125,
      "step": 52661,
      "training_step_time": 0.40695858001708984
    },
    {
      "epoch": 0.00032142333984375,
      "model_forward_time": 0.11468863487243652,
      "step": 52662
    },
    {
      "epoch": 0.00032142333984375,
      "step": 52662,
      "training_step_time": 0.5017795562744141
    },
    {
      "epoch": 0.000321429443359375,
      "model_forward_time": 0.11554765701293945,
      "step": 52663
    },
    {
      "epoch": 0.000321429443359375,
      "step": 52663,
      "training_step_time": 0.39432382583618164
    },
    {
      "epoch": 0.000321435546875,
      "model_forward_time": 0.11479973793029785,
      "step": 52664
    },
    {
      "epoch": 0.000321435546875,
      "step": 52664,
      "training_step_time": 0.3915283679962158
    },
    {
      "epoch": 0.000321441650390625,
      "model_forward_time": 0.11503982543945312,
      "step": 52665
    },
    {
      "epoch": 0.000321441650390625,
      "step": 52665,
      "training_step_time": 0.4310884475708008
    },
    {
      "epoch": 0.00032144775390625,
      "model_forward_time": 0.11475992202758789,
      "step": 52666
    },
    {
      "epoch": 0.00032144775390625,
      "step": 52666,
      "training_step_time": 0.3998231887817383
    },
    {
      "epoch": 0.000321453857421875,
      "model_forward_time": 0.11675190925598145,
      "step": 52667
    },
    {
      "epoch": 0.000321453857421875,
      "step": 52667,
      "training_step_time": 0.40467357635498047
    },
    {
      "epoch": 0.0003214599609375,
      "model_forward_time": 0.11573171615600586,
      "step": 52668
    },
    {
      "epoch": 0.0003214599609375,
      "step": 52668,
      "training_step_time": 0.39821505546569824
    },
    {
      "epoch": 0.000321466064453125,
      "model_forward_time": 0.11490821838378906,
      "step": 52669
    },
    {
      "epoch": 0.000321466064453125,
      "step": 52669,
      "training_step_time": 0.4191579818725586
    },
    {
      "epoch": 0.00032147216796875,
      "grad_norm": 0.10207297652959824,
      "learning_rate": 4.025158788815131e-06,
      "loss": 0.0463,
      "step": 52670
    },
    {
      "epoch": 0.00032147216796875,
      "model_forward_time": 0.11484265327453613,
      "step": 52670
    },
    {
      "epoch": 0.00032147216796875,
      "step": 52670,
      "training_step_time": 0.48479533195495605
    },
    {
      "epoch": 0.000321478271484375,
      "model_forward_time": 0.11445045471191406,
      "step": 52671
    },
    {
      "epoch": 0.000321478271484375,
      "step": 52671,
      "training_step_time": 0.3922855854034424
    },
    {
      "epoch": 0.000321484375,
      "model_forward_time": 0.11871671676635742,
      "step": 52672
    },
    {
      "epoch": 0.000321484375,
      "step": 52672,
      "training_step_time": 0.4426901340484619
    },
    {
      "epoch": 0.000321490478515625,
      "model_forward_time": 0.11532330513000488,
      "step": 52673
    },
    {
      "epoch": 0.000321490478515625,
      "step": 52673,
      "training_step_time": 0.4223294258117676
    },
    {
      "epoch": 0.00032149658203125,
      "model_forward_time": 0.11534571647644043,
      "step": 52674
    },
    {
      "epoch": 0.00032149658203125,
      "step": 52674,
      "training_step_time": 0.48720240592956543
    },
    {
      "epoch": 0.000321502685546875,
      "model_forward_time": 0.11487317085266113,
      "step": 52675
    },
    {
      "epoch": 0.000321502685546875,
      "step": 52675,
      "training_step_time": 0.3973712921142578
    },
    {
      "epoch": 0.0003215087890625,
      "model_forward_time": 0.11567568778991699,
      "step": 52676
    },
    {
      "epoch": 0.0003215087890625,
      "step": 52676,
      "training_step_time": 0.4055790901184082
    },
    {
      "epoch": 0.000321514892578125,
      "model_forward_time": 0.1150667667388916,
      "step": 52677
    },
    {
      "epoch": 0.000321514892578125,
      "step": 52677,
      "training_step_time": 0.4175679683685303
    },
    {
      "epoch": 0.00032152099609375,
      "model_forward_time": 0.11541199684143066,
      "step": 52678
    },
    {
      "epoch": 0.00032152099609375,
      "step": 52678,
      "training_step_time": 0.3969123363494873
    },
    {
      "epoch": 0.000321527099609375,
      "model_forward_time": 0.11528182029724121,
      "step": 52679
    },
    {
      "epoch": 0.000321527099609375,
      "step": 52679,
      "training_step_time": 0.4517643451690674
    },
    {
      "epoch": 0.000321533203125,
      "grad_norm": 0.11566608399152756,
      "learning_rate": 4.01433286004283e-06,
      "loss": 0.0382,
      "step": 52680
    },
    {
      "epoch": 0.000321533203125,
      "model_forward_time": 0.11498832702636719,
      "step": 52680
    },
    {
      "epoch": 0.000321533203125,
      "step": 52680,
      "training_step_time": 0.4113638401031494
    },
    {
      "epoch": 0.000321539306640625,
      "model_forward_time": 0.11564445495605469,
      "step": 52681
    },
    {
      "epoch": 0.000321539306640625,
      "step": 52681,
      "training_step_time": 0.5900115966796875
    },
    {
      "epoch": 0.00032154541015625,
      "model_forward_time": 0.11486673355102539,
      "step": 52682
    },
    {
      "epoch": 0.00032154541015625,
      "step": 52682,
      "training_step_time": 0.3990478515625
    },
    {
      "epoch": 0.000321551513671875,
      "model_forward_time": 0.11499214172363281,
      "step": 52683
    },
    {
      "epoch": 0.000321551513671875,
      "step": 52683,
      "training_step_time": 0.46172070503234863
    },
    {
      "epoch": 0.0003215576171875,
      "model_forward_time": 0.11470603942871094,
      "step": 52684
    },
    {
      "epoch": 0.0003215576171875,
      "step": 52684,
      "training_step_time": 0.4855360984802246
    },
    {
      "epoch": 0.000321563720703125,
      "model_forward_time": 0.11444735527038574,
      "step": 52685
    },
    {
      "epoch": 0.000321563720703125,
      "step": 52685,
      "training_step_time": 0.3870511054992676
    },
    {
      "epoch": 0.00032156982421875,
      "model_forward_time": 0.11532211303710938,
      "step": 52686
    },
    {
      "epoch": 0.00032156982421875,
      "step": 52686,
      "training_step_time": 0.39884328842163086
    },
    {
      "epoch": 0.000321575927734375,
      "model_forward_time": 0.11515045166015625,
      "step": 52687
    },
    {
      "epoch": 0.000321575927734375,
      "step": 52687,
      "training_step_time": 0.6913392543792725
    },
    {
      "epoch": 0.00032158203125,
      "model_forward_time": 0.11527562141418457,
      "step": 52688
    },
    {
      "epoch": 0.00032158203125,
      "step": 52688,
      "training_step_time": 0.38954806327819824
    },
    {
      "epoch": 0.000321588134765625,
      "model_forward_time": 0.11463308334350586,
      "step": 52689
    },
    {
      "epoch": 0.000321588134765625,
      "step": 52689,
      "training_step_time": 0.4671299457550049
    },
    {
      "epoch": 0.00032159423828125,
      "grad_norm": 0.0827859491109848,
      "learning_rate": 4.003520900502028e-06,
      "loss": 0.0372,
      "step": 52690
    },
    {
      "epoch": 0.00032159423828125,
      "model_forward_time": 0.11443686485290527,
      "step": 52690
    },
    {
      "epoch": 0.00032159423828125,
      "step": 52690,
      "training_step_time": 0.4937140941619873
    },
    {
      "epoch": 0.000321600341796875,
      "model_forward_time": 0.11446070671081543,
      "step": 52691
    },
    {
      "epoch": 0.000321600341796875,
      "step": 52691,
      "training_step_time": 0.4438505172729492
    },
    {
      "epoch": 0.0003216064453125,
      "model_forward_time": 0.11469292640686035,
      "step": 52692
    },
    {
      "epoch": 0.0003216064453125,
      "step": 52692,
      "training_step_time": 0.4504115581512451
    },
    {
      "epoch": 0.000321612548828125,
      "model_forward_time": 0.11495280265808105,
      "step": 52693
    },
    {
      "epoch": 0.000321612548828125,
      "step": 52693,
      "training_step_time": 0.39585113525390625
    },
    {
      "epoch": 0.00032161865234375,
      "model_forward_time": 0.1146082878112793,
      "step": 52694
    },
    {
      "epoch": 0.00032161865234375,
      "step": 52694,
      "training_step_time": 0.39594030380249023
    },
    {
      "epoch": 0.000321624755859375,
      "model_forward_time": 0.11562824249267578,
      "step": 52695
    },
    {
      "epoch": 0.000321624755859375,
      "step": 52695,
      "training_step_time": 0.39762377738952637
    },
    {
      "epoch": 0.000321630859375,
      "model_forward_time": 0.1149744987487793,
      "step": 52696
    },
    {
      "epoch": 0.000321630859375,
      "step": 52696,
      "training_step_time": 0.42235708236694336
    },
    {
      "epoch": 0.000321636962890625,
      "model_forward_time": 0.11491942405700684,
      "step": 52697
    },
    {
      "epoch": 0.000321636962890625,
      "step": 52697,
      "training_step_time": 0.3993544578552246
    },
    {
      "epoch": 0.00032164306640625,
      "model_forward_time": 0.1158301830291748,
      "step": 52698
    },
    {
      "epoch": 0.00032164306640625,
      "step": 52698,
      "training_step_time": 0.48869848251342773
    },
    {
      "epoch": 0.000321649169921875,
      "model_forward_time": 0.11432838439941406,
      "step": 52699
    },
    {
      "epoch": 0.000321649169921875,
      "step": 52699,
      "training_step_time": 0.40036869049072266
    },
    {
      "epoch": 0.0003216552734375,
      "grad_norm": 0.0955771878361702,
      "learning_rate": 3.9927229134771035e-06,
      "loss": 0.0365,
      "step": 52700
    },
    {
      "epoch": 0.0003216552734375,
      "model_forward_time": 0.1141963005065918,
      "step": 52700
    },
    {
      "epoch": 0.0003216552734375,
      "step": 52700,
      "training_step_time": 0.5021810531616211
    },
    {
      "epoch": 0.000321661376953125,
      "model_forward_time": 0.11433577537536621,
      "step": 52701
    },
    {
      "epoch": 0.000321661376953125,
      "step": 52701,
      "training_step_time": 0.38887906074523926
    },
    {
      "epoch": 0.00032166748046875,
      "model_forward_time": 0.11534547805786133,
      "step": 52702
    },
    {
      "epoch": 0.00032166748046875,
      "step": 52702,
      "training_step_time": 0.39743852615356445
    },
    {
      "epoch": 0.000321673583984375,
      "model_forward_time": 0.11518549919128418,
      "step": 52703
    },
    {
      "epoch": 0.000321673583984375,
      "step": 52703,
      "training_step_time": 0.40047550201416016
    },
    {
      "epoch": 0.0003216796875,
      "model_forward_time": 0.11486005783081055,
      "step": 52704
    },
    {
      "epoch": 0.0003216796875,
      "step": 52704,
      "training_step_time": 0.4344198703765869
    },
    {
      "epoch": 0.000321685791015625,
      "model_forward_time": 0.11587190628051758,
      "step": 52705
    },
    {
      "epoch": 0.000321685791015625,
      "step": 52705,
      "training_step_time": 0.45148515701293945
    },
    {
      "epoch": 0.00032169189453125,
      "model_forward_time": 0.11489248275756836,
      "step": 52706
    },
    {
      "epoch": 0.00032169189453125,
      "step": 52706,
      "training_step_time": 0.4551658630371094
    },
    {
      "epoch": 0.000321697998046875,
      "model_forward_time": 0.11478519439697266,
      "step": 52707
    },
    {
      "epoch": 0.000321697998046875,
      "step": 52707,
      "training_step_time": 0.39046812057495117
    },
    {
      "epoch": 0.0003217041015625,
      "model_forward_time": 0.11517143249511719,
      "step": 52708
    },
    {
      "epoch": 0.0003217041015625,
      "step": 52708,
      "training_step_time": 0.3918910026550293
    },
    {
      "epoch": 0.000321710205078125,
      "model_forward_time": 0.11530876159667969,
      "step": 52709
    },
    {
      "epoch": 0.000321710205078125,
      "step": 52709,
      "training_step_time": 0.3971729278564453
    },
    {
      "epoch": 0.00032171630859375,
      "grad_norm": 0.11111433058977127,
      "learning_rate": 3.981938902248222e-06,
      "loss": 0.0337,
      "step": 52710
    },
    {
      "epoch": 0.00032171630859375,
      "model_forward_time": 0.11450004577636719,
      "step": 52710
    },
    {
      "epoch": 0.00032171630859375,
      "step": 52710,
      "training_step_time": 0.3945481777191162
    },
    {
      "epoch": 0.000321722412109375,
      "model_forward_time": 0.11519503593444824,
      "step": 52711
    },
    {
      "epoch": 0.000321722412109375,
      "step": 52711,
      "training_step_time": 0.44881176948547363
    },
    {
      "epoch": 0.000321728515625,
      "model_forward_time": 0.11443233489990234,
      "step": 52712
    },
    {
      "epoch": 0.000321728515625,
      "step": 52712,
      "training_step_time": 0.4521195888519287
    },
    {
      "epoch": 0.000321734619140625,
      "model_forward_time": 0.1147620677947998,
      "step": 52713
    },
    {
      "epoch": 0.000321734619140625,
      "step": 52713,
      "training_step_time": 0.5236024856567383
    },
    {
      "epoch": 0.00032174072265625,
      "model_forward_time": 0.11520266532897949,
      "step": 52714
    },
    {
      "epoch": 0.00032174072265625,
      "step": 52714,
      "training_step_time": 0.4324769973754883
    },
    {
      "epoch": 0.000321746826171875,
      "model_forward_time": 0.11524295806884766,
      "step": 52715
    },
    {
      "epoch": 0.000321746826171875,
      "step": 52715,
      "training_step_time": 0.42890477180480957
    },
    {
      "epoch": 0.0003217529296875,
      "model_forward_time": 0.11554622650146484,
      "step": 52716
    },
    {
      "epoch": 0.0003217529296875,
      "step": 52716,
      "training_step_time": 0.4047861099243164
    },
    {
      "epoch": 0.000321759033203125,
      "model_forward_time": 0.1144711971282959,
      "step": 52717
    },
    {
      "epoch": 0.000321759033203125,
      "step": 52717,
      "training_step_time": 0.3659031391143799
    },
    {
      "epoch": 0.00032176513671875,
      "model_forward_time": 0.11522960662841797,
      "step": 52718
    },
    {
      "epoch": 0.00032176513671875,
      "step": 52718,
      "training_step_time": 0.40924525260925293
    },
    {
      "epoch": 0.000321771240234375,
      "model_forward_time": 0.1147317886352539,
      "step": 52719
    },
    {
      "epoch": 0.000321771240234375,
      "step": 52719,
      "training_step_time": 0.5021791458129883
    },
    {
      "epoch": 0.00032177734375,
      "grad_norm": 0.10086405277252197,
      "learning_rate": 3.971168870091247e-06,
      "loss": 0.0352,
      "step": 52720
    },
    {
      "epoch": 0.00032177734375,
      "model_forward_time": 0.11494255065917969,
      "step": 52720
    },
    {
      "epoch": 0.00032177734375,
      "step": 52720,
      "training_step_time": 0.3836092948913574
    },
    {
      "epoch": 0.000321783447265625,
      "model_forward_time": 0.11463332176208496,
      "step": 52721
    },
    {
      "epoch": 0.000321783447265625,
      "step": 52721,
      "training_step_time": 0.3918435573577881
    },
    {
      "epoch": 0.00032178955078125,
      "model_forward_time": 0.11456704139709473,
      "step": 52722
    },
    {
      "epoch": 0.00032178955078125,
      "step": 52722,
      "training_step_time": 0.3957536220550537
    },
    {
      "epoch": 0.000321795654296875,
      "model_forward_time": 0.11475872993469238,
      "step": 52723
    },
    {
      "epoch": 0.000321795654296875,
      "step": 52723,
      "training_step_time": 0.40241312980651855
    },
    {
      "epoch": 0.0003218017578125,
      "model_forward_time": 0.11485934257507324,
      "step": 52724
    },
    {
      "epoch": 0.0003218017578125,
      "step": 52724,
      "training_step_time": 0.3967311382293701
    },
    {
      "epoch": 0.000321807861328125,
      "model_forward_time": 0.1148691177368164,
      "step": 52725
    },
    {
      "epoch": 0.000321807861328125,
      "step": 52725,
      "training_step_time": 0.39524126052856445
    },
    {
      "epoch": 0.00032181396484375,
      "model_forward_time": 0.11516332626342773,
      "step": 52726
    },
    {
      "epoch": 0.00032181396484375,
      "step": 52726,
      "training_step_time": 0.4361412525177002
    },
    {
      "epoch": 0.000321820068359375,
      "model_forward_time": 0.1157386302947998,
      "step": 52727
    },
    {
      "epoch": 0.000321820068359375,
      "step": 52727,
      "training_step_time": 0.4378316402435303
    },
    {
      "epoch": 0.000321826171875,
      "model_forward_time": 0.11664462089538574,
      "step": 52728
    },
    {
      "epoch": 0.000321826171875,
      "step": 52728,
      "training_step_time": 0.4669654369354248
    },
    {
      "epoch": 0.000321832275390625,
      "model_forward_time": 0.11687254905700684,
      "step": 52729
    },
    {
      "epoch": 0.000321832275390625,
      "step": 52729,
      "training_step_time": 0.42056941986083984
    },
    {
      "epoch": 0.00032183837890625,
      "grad_norm": 0.07908368110656738,
      "learning_rate": 3.960412820277865e-06,
      "loss": 0.0372,
      "step": 52730
    },
    {
      "epoch": 0.00032183837890625,
      "model_forward_time": 0.11536669731140137,
      "step": 52730
    },
    {
      "epoch": 0.00032183837890625,
      "step": 52730,
      "training_step_time": 0.4347259998321533
    },
    {
      "epoch": 0.000321844482421875,
      "model_forward_time": 0.11471772193908691,
      "step": 52731
    },
    {
      "epoch": 0.000321844482421875,
      "step": 52731,
      "training_step_time": 0.3963603973388672
    },
    {
      "epoch": 0.0003218505859375,
      "model_forward_time": 0.11546897888183594,
      "step": 52732
    },
    {
      "epoch": 0.0003218505859375,
      "step": 52732,
      "training_step_time": 0.46018075942993164
    },
    {
      "epoch": 0.000321856689453125,
      "model_forward_time": 0.1150963306427002,
      "step": 52733
    },
    {
      "epoch": 0.000321856689453125,
      "step": 52733,
      "training_step_time": 0.4147026538848877
    },
    {
      "epoch": 0.00032186279296875,
      "model_forward_time": 0.11528992652893066,
      "step": 52734
    },
    {
      "epoch": 0.00032186279296875,
      "step": 52734,
      "training_step_time": 0.40021228790283203
    },
    {
      "epoch": 0.000321868896484375,
      "model_forward_time": 0.11559772491455078,
      "step": 52735
    },
    {
      "epoch": 0.000321868896484375,
      "step": 52735,
      "training_step_time": 0.3938138484954834
    },
    {
      "epoch": 0.000321875,
      "model_forward_time": 0.11509442329406738,
      "step": 52736
    },
    {
      "epoch": 0.000321875,
      "step": 52736,
      "training_step_time": 0.3958714008331299
    },
    {
      "epoch": 0.000321881103515625,
      "model_forward_time": 0.11519789695739746,
      "step": 52737
    },
    {
      "epoch": 0.000321881103515625,
      "step": 52737,
      "training_step_time": 0.40223264694213867
    },
    {
      "epoch": 0.00032188720703125,
      "model_forward_time": 0.1148829460144043,
      "step": 52738
    },
    {
      "epoch": 0.00032188720703125,
      "step": 52738,
      "training_step_time": 0.3948955535888672
    },
    {
      "epoch": 0.000321893310546875,
      "model_forward_time": 0.11535429954528809,
      "step": 52739
    },
    {
      "epoch": 0.000321893310546875,
      "step": 52739,
      "training_step_time": 0.39110493659973145
    },
    {
      "epoch": 0.0003218994140625,
      "grad_norm": 0.09718424826860428,
      "learning_rate": 3.949670756075447e-06,
      "loss": 0.0403,
      "step": 52740
    },
    {
      "epoch": 0.0003218994140625,
      "model_forward_time": 0.11540889739990234,
      "step": 52740
    },
    {
      "epoch": 0.0003218994140625,
      "step": 52740,
      "training_step_time": 0.39172911643981934
    },
    {
      "epoch": 0.000321905517578125,
      "model_forward_time": 0.11554503440856934,
      "step": 52741
    },
    {
      "epoch": 0.000321905517578125,
      "step": 52741,
      "training_step_time": 0.5301306247711182
    },
    {
      "epoch": 0.00032191162109375,
      "model_forward_time": 0.1156473159790039,
      "step": 52742
    },
    {
      "epoch": 0.00032191162109375,
      "step": 52742,
      "training_step_time": 0.5250320434570312
    },
    {
      "epoch": 0.000321917724609375,
      "model_forward_time": 0.11496472358703613,
      "step": 52743
    },
    {
      "epoch": 0.000321917724609375,
      "step": 52743,
      "training_step_time": 0.44295406341552734
    },
    {
      "epoch": 0.000321923828125,
      "model_forward_time": 0.11475729942321777,
      "step": 52744
    },
    {
      "epoch": 0.000321923828125,
      "step": 52744,
      "training_step_time": 0.4411885738372803
    },
    {
      "epoch": 0.000321929931640625,
      "model_forward_time": 0.11443948745727539,
      "step": 52745
    },
    {
      "epoch": 0.000321929931640625,
      "step": 52745,
      "training_step_time": 0.38555026054382324
    },
    {
      "epoch": 0.00032193603515625,
      "model_forward_time": 0.11464071273803711,
      "step": 52746
    },
    {
      "epoch": 0.00032193603515625,
      "step": 52746,
      "training_step_time": 0.36530256271362305
    },
    {
      "epoch": 0.000321942138671875,
      "model_forward_time": 0.11579608917236328,
      "step": 52747
    },
    {
      "epoch": 0.000321942138671875,
      "step": 52747,
      "training_step_time": 0.44158387184143066
    },
    {
      "epoch": 0.0003219482421875,
      "model_forward_time": 0.11503911018371582,
      "step": 52748
    },
    {
      "epoch": 0.0003219482421875,
      "step": 52748,
      "training_step_time": 0.40595245361328125
    },
    {
      "epoch": 0.000321954345703125,
      "model_forward_time": 0.11566829681396484,
      "step": 52749
    },
    {
      "epoch": 0.000321954345703125,
      "step": 52749,
      "training_step_time": 0.38705015182495117
    },
    {
      "epoch": 0.00032196044921875,
      "grad_norm": 0.07859167456626892,
      "learning_rate": 3.9389426807471766e-06,
      "loss": 0.0351,
      "step": 52750
    },
    {
      "epoch": 0.00032196044921875,
      "model_forward_time": 0.11485886573791504,
      "step": 52750
    },
    {
      "epoch": 0.00032196044921875,
      "step": 52750,
      "training_step_time": 0.3958115577697754
    },
    {
      "epoch": 0.000321966552734375,
      "model_forward_time": 0.11507105827331543,
      "step": 52751
    },
    {
      "epoch": 0.000321966552734375,
      "step": 52751,
      "training_step_time": 0.38834571838378906
    },
    {
      "epoch": 0.00032197265625,
      "model_forward_time": 0.11509513854980469,
      "step": 52752
    },
    {
      "epoch": 0.00032197265625,
      "step": 52752,
      "training_step_time": 0.40348052978515625
    },
    {
      "epoch": 0.000321978759765625,
      "model_forward_time": 0.11505722999572754,
      "step": 52753
    },
    {
      "epoch": 0.000321978759765625,
      "step": 52753,
      "training_step_time": 0.47068262100219727
    },
    {
      "epoch": 0.00032198486328125,
      "model_forward_time": 0.11479330062866211,
      "step": 52754
    },
    {
      "epoch": 0.00032198486328125,
      "step": 52754,
      "training_step_time": 0.40121006965637207
    },
    {
      "epoch": 0.000321990966796875,
      "model_forward_time": 0.1152808666229248,
      "step": 52755
    },
    {
      "epoch": 0.000321990966796875,
      "step": 52755,
      "training_step_time": 0.40818047523498535
    },
    {
      "epoch": 0.0003219970703125,
      "model_forward_time": 0.11521291732788086,
      "step": 52756
    },
    {
      "epoch": 0.0003219970703125,
      "step": 52756,
      "training_step_time": 0.4477424621582031
    },
    {
      "epoch": 0.000322003173828125,
      "model_forward_time": 0.11435437202453613,
      "step": 52757
    },
    {
      "epoch": 0.000322003173828125,
      "step": 52757,
      "training_step_time": 0.49288177490234375
    },
    {
      "epoch": 0.00032200927734375,
      "model_forward_time": 0.11577248573303223,
      "step": 52758
    },
    {
      "epoch": 0.00032200927734375,
      "step": 52758,
      "training_step_time": 0.4825582504272461
    },
    {
      "epoch": 0.000322015380859375,
      "model_forward_time": 0.11466002464294434,
      "step": 52759
    },
    {
      "epoch": 0.000322015380859375,
      "step": 52759,
      "training_step_time": 0.4408538341522217
    },
    {
      "epoch": 0.000322021484375,
      "grad_norm": 0.1416614055633545,
      "learning_rate": 3.928228597551947e-06,
      "loss": 0.0424,
      "step": 52760
    },
    {
      "epoch": 0.000322021484375,
      "model_forward_time": 0.11492371559143066,
      "step": 52760
    },
    {
      "epoch": 0.000322021484375,
      "step": 52760,
      "training_step_time": 0.4229567050933838
    },
    {
      "epoch": 0.000322027587890625,
      "model_forward_time": 0.11448407173156738,
      "step": 52761
    },
    {
      "epoch": 0.000322027587890625,
      "step": 52761,
      "training_step_time": 0.4583604335784912
    },
    {
      "epoch": 0.00032203369140625,
      "model_forward_time": 0.11493182182312012,
      "step": 52762
    },
    {
      "epoch": 0.00032203369140625,
      "step": 52762,
      "training_step_time": 0.4869711399078369
    },
    {
      "epoch": 0.000322039794921875,
      "model_forward_time": 0.11452913284301758,
      "step": 52763
    },
    {
      "epoch": 0.000322039794921875,
      "step": 52763,
      "training_step_time": 0.3946104049682617
    },
    {
      "epoch": 0.0003220458984375,
      "model_forward_time": 0.11563968658447266,
      "step": 52764
    },
    {
      "epoch": 0.0003220458984375,
      "step": 52764,
      "training_step_time": 0.38689088821411133
    },
    {
      "epoch": 0.000322052001953125,
      "model_forward_time": 0.11522221565246582,
      "step": 52765
    },
    {
      "epoch": 0.000322052001953125,
      "step": 52765,
      "training_step_time": 0.3925938606262207
    },
    {
      "epoch": 0.00032205810546875,
      "model_forward_time": 0.11636495590209961,
      "step": 52766
    },
    {
      "epoch": 0.00032205810546875,
      "step": 52766,
      "training_step_time": 0.39757728576660156
    },
    {
      "epoch": 0.000322064208984375,
      "model_forward_time": 0.11516952514648438,
      "step": 52767
    },
    {
      "epoch": 0.000322064208984375,
      "step": 52767,
      "training_step_time": 0.3857455253601074
    },
    {
      "epoch": 0.0003220703125,
      "model_forward_time": 0.11459922790527344,
      "step": 52768
    },
    {
      "epoch": 0.0003220703125,
      "step": 52768,
      "training_step_time": 0.39380526542663574
    },
    {
      "epoch": 0.000322076416015625,
      "model_forward_time": 0.115814208984375,
      "step": 52769
    },
    {
      "epoch": 0.000322076416015625,
      "step": 52769,
      "training_step_time": 0.4117920398712158
    },
    {
      "epoch": 0.00032208251953125,
      "grad_norm": 0.10183534026145935,
      "learning_rate": 3.917528509744412e-06,
      "loss": 0.0386,
      "step": 52770
    },
    {
      "epoch": 0.00032208251953125,
      "model_forward_time": 0.11500954627990723,
      "step": 52770
    },
    {
      "epoch": 0.00032208251953125,
      "step": 52770,
      "training_step_time": 0.4154930114746094
    },
    {
      "epoch": 0.000322088623046875,
      "model_forward_time": 0.1150357723236084,
      "step": 52771
    },
    {
      "epoch": 0.000322088623046875,
      "step": 52771,
      "training_step_time": 0.5201001167297363
    },
    {
      "epoch": 0.0003220947265625,
      "model_forward_time": 0.11600351333618164,
      "step": 52772
    },
    {
      "epoch": 0.0003220947265625,
      "step": 52772,
      "training_step_time": 0.4199352264404297
    },
    {
      "epoch": 0.000322100830078125,
      "model_forward_time": 0.11510062217712402,
      "step": 52773
    },
    {
      "epoch": 0.000322100830078125,
      "step": 52773,
      "training_step_time": 0.48731422424316406
    },
    {
      "epoch": 0.00032210693359375,
      "model_forward_time": 0.11475610733032227,
      "step": 52774
    },
    {
      "epoch": 0.00032210693359375,
      "step": 52774,
      "training_step_time": 0.39069604873657227
    },
    {
      "epoch": 0.000322113037109375,
      "model_forward_time": 0.11523747444152832,
      "step": 52775
    },
    {
      "epoch": 0.000322113037109375,
      "step": 52775,
      "training_step_time": 0.39099812507629395
    },
    {
      "epoch": 0.000322119140625,
      "model_forward_time": 0.11517095565795898,
      "step": 52776
    },
    {
      "epoch": 0.000322119140625,
      "step": 52776,
      "training_step_time": 0.4319779872894287
    },
    {
      "epoch": 0.000322125244140625,
      "model_forward_time": 0.11535239219665527,
      "step": 52777
    },
    {
      "epoch": 0.000322125244140625,
      "step": 52777,
      "training_step_time": 0.3994925022125244
    },
    {
      "epoch": 0.00032213134765625,
      "model_forward_time": 0.11504364013671875,
      "step": 52778
    },
    {
      "epoch": 0.00032213134765625,
      "step": 52778,
      "training_step_time": 0.3887507915496826
    },
    {
      "epoch": 0.000322137451171875,
      "model_forward_time": 0.11455416679382324,
      "step": 52779
    },
    {
      "epoch": 0.000322137451171875,
      "step": 52779,
      "training_step_time": 0.3926823139190674
    },
    {
      "epoch": 0.0003221435546875,
      "grad_norm": 0.14399658143520355,
      "learning_rate": 3.90684242057498e-06,
      "loss": 0.0403,
      "step": 52780
    },
    {
      "epoch": 0.0003221435546875,
      "model_forward_time": 0.1145334243774414,
      "step": 52780
    },
    {
      "epoch": 0.0003221435546875,
      "step": 52780,
      "training_step_time": 0.3909735679626465
    },
    {
      "epoch": 0.000322149658203125,
      "model_forward_time": 0.11498332023620605,
      "step": 52781
    },
    {
      "epoch": 0.000322149658203125,
      "step": 52781,
      "training_step_time": 0.40122079849243164
    },
    {
      "epoch": 0.00032215576171875,
      "model_forward_time": 0.11500382423400879,
      "step": 52782
    },
    {
      "epoch": 0.00032215576171875,
      "step": 52782,
      "training_step_time": 0.3979613780975342
    },
    {
      "epoch": 0.000322161865234375,
      "model_forward_time": 0.11565136909484863,
      "step": 52783
    },
    {
      "epoch": 0.000322161865234375,
      "step": 52783,
      "training_step_time": 1.025156021118164
    },
    {
      "epoch": 0.00032216796875,
      "model_forward_time": 0.11465787887573242,
      "step": 52784
    },
    {
      "epoch": 0.00032216796875,
      "step": 52784,
      "training_step_time": 0.41652417182922363
    },
    {
      "epoch": 0.000322174072265625,
      "model_forward_time": 0.1145014762878418,
      "step": 52785
    },
    {
      "epoch": 0.000322174072265625,
      "step": 52785,
      "training_step_time": 0.4464104175567627
    },
    {
      "epoch": 0.00032218017578125,
      "model_forward_time": 0.11417126655578613,
      "step": 52786
    },
    {
      "epoch": 0.00032218017578125,
      "step": 52786,
      "training_step_time": 0.4305877685546875
    },
    {
      "epoch": 0.000322186279296875,
      "model_forward_time": 0.11415481567382812,
      "step": 52787
    },
    {
      "epoch": 0.000322186279296875,
      "step": 52787,
      "training_step_time": 0.3858451843261719
    },
    {
      "epoch": 0.0003221923828125,
      "model_forward_time": 0.11420154571533203,
      "step": 52788
    },
    {
      "epoch": 0.0003221923828125,
      "step": 52788,
      "training_step_time": 0.3927276134490967
    },
    {
      "epoch": 0.000322198486328125,
      "model_forward_time": 0.11559152603149414,
      "step": 52789
    },
    {
      "epoch": 0.000322198486328125,
      "step": 52789,
      "training_step_time": 0.582322359085083
    },
    {
      "epoch": 0.00032220458984375,
      "grad_norm": 0.11968669295310974,
      "learning_rate": 3.8961703332898e-06,
      "loss": 0.033,
      "step": 52790
    },
    {
      "epoch": 0.00032220458984375,
      "model_forward_time": 0.11441779136657715,
      "step": 52790
    },
    {
      "epoch": 0.00032220458984375,
      "step": 52790,
      "training_step_time": 0.4242236614227295
    },
    {
      "epoch": 0.000322210693359375,
      "model_forward_time": 0.11522436141967773,
      "step": 52791
    },
    {
      "epoch": 0.000322210693359375,
      "step": 52791,
      "training_step_time": 0.3859281539916992
    },
    {
      "epoch": 0.000322216796875,
      "model_forward_time": 0.11496853828430176,
      "step": 52792
    },
    {
      "epoch": 0.000322216796875,
      "step": 52792,
      "training_step_time": 0.3899691104888916
    },
    {
      "epoch": 0.000322222900390625,
      "model_forward_time": 0.11503171920776367,
      "step": 52793
    },
    {
      "epoch": 0.000322222900390625,
      "step": 52793,
      "training_step_time": 0.38613295555114746
    },
    {
      "epoch": 0.00032222900390625,
      "model_forward_time": 0.11548328399658203,
      "step": 52794
    },
    {
      "epoch": 0.00032222900390625,
      "step": 52794,
      "training_step_time": 0.3883082866668701
    },
    {
      "epoch": 0.000322235107421875,
      "model_forward_time": 0.11513304710388184,
      "step": 52795
    },
    {
      "epoch": 0.000322235107421875,
      "step": 52795,
      "training_step_time": 0.6948490142822266
    },
    {
      "epoch": 0.0003222412109375,
      "model_forward_time": 0.11487960815429688,
      "step": 52796
    },
    {
      "epoch": 0.0003222412109375,
      "step": 52796,
      "training_step_time": 0.41658949851989746
    },
    {
      "epoch": 0.000322247314453125,
      "model_forward_time": 0.11448359489440918,
      "step": 52797
    },
    {
      "epoch": 0.000322247314453125,
      "step": 52797,
      "training_step_time": 0.41104984283447266
    },
    {
      "epoch": 0.00032225341796875,
      "model_forward_time": 0.11456751823425293,
      "step": 52798
    },
    {
      "epoch": 0.00032225341796875,
      "step": 52798,
      "training_step_time": 0.44356703758239746
    },
    {
      "epoch": 0.000322259521484375,
      "model_forward_time": 0.11411476135253906,
      "step": 52799
    },
    {
      "epoch": 0.000322259521484375,
      "step": 52799,
      "training_step_time": 0.48691582679748535
    },
    {
      "epoch": 0.000322265625,
      "grad_norm": 0.0929255485534668,
      "learning_rate": 3.885512251130763e-06,
      "loss": 0.0421,
      "step": 52800
    },
    {
      "epoch": 0.000322265625,
      "model_forward_time": 0.11494731903076172,
      "step": 52800
    },
    {
      "epoch": 0.000322265625,
      "step": 52800,
      "training_step_time": 0.3985903263092041
    },
    {
      "epoch": 0.000322271728515625,
      "model_forward_time": 0.11489105224609375,
      "step": 52801
    },
    {
      "epoch": 0.000322271728515625,
      "step": 52801,
      "training_step_time": 0.5437238216400146
    },
    {
      "epoch": 0.00032227783203125,
      "model_forward_time": 0.11456441879272461,
      "step": 52802
    },
    {
      "epoch": 0.00032227783203125,
      "step": 52802,
      "training_step_time": 0.3652536869049072
    },
    {
      "epoch": 0.000322283935546875,
      "model_forward_time": 0.11453700065612793,
      "step": 52803
    },
    {
      "epoch": 0.000322283935546875,
      "step": 52803,
      "training_step_time": 0.4333920478820801
    },
    {
      "epoch": 0.0003222900390625,
      "model_forward_time": 0.11505436897277832,
      "step": 52804
    },
    {
      "epoch": 0.0003222900390625,
      "step": 52804,
      "training_step_time": 0.4080016613006592
    },
    {
      "epoch": 0.000322296142578125,
      "model_forward_time": 0.1143808364868164,
      "step": 52805
    },
    {
      "epoch": 0.000322296142578125,
      "step": 52805,
      "training_step_time": 0.38092875480651855
    },
    {
      "epoch": 0.00032230224609375,
      "model_forward_time": 0.11471343040466309,
      "step": 52806
    },
    {
      "epoch": 0.00032230224609375,
      "step": 52806,
      "training_step_time": 0.38904333114624023
    },
    {
      "epoch": 0.000322308349609375,
      "model_forward_time": 0.11570429801940918,
      "step": 52807
    },
    {
      "epoch": 0.000322308349609375,
      "step": 52807,
      "training_step_time": 0.7826282978057861
    },
    {
      "epoch": 0.000322314453125,
      "model_forward_time": 0.11398768424987793,
      "step": 52808
    },
    {
      "epoch": 0.000322314453125,
      "step": 52808,
      "training_step_time": 0.4016413688659668
    },
    {
      "epoch": 0.000322320556640625,
      "model_forward_time": 0.11465573310852051,
      "step": 52809
    },
    {
      "epoch": 0.000322320556640625,
      "step": 52809,
      "training_step_time": 0.3892385959625244
    },
    {
      "epoch": 0.00032232666015625,
      "grad_norm": 0.07790480554103851,
      "learning_rate": 3.8748681773355335e-06,
      "loss": 0.033,
      "step": 52810
    },
    {
      "epoch": 0.00032232666015625,
      "model_forward_time": 0.11446356773376465,
      "step": 52810
    },
    {
      "epoch": 0.00032232666015625,
      "step": 52810,
      "training_step_time": 0.3869004249572754
    },
    {
      "epoch": 0.000322332763671875,
      "model_forward_time": 0.11411356925964355,
      "step": 52811
    },
    {
      "epoch": 0.000322332763671875,
      "step": 52811,
      "training_step_time": 0.42575907707214355
    },
    {
      "epoch": 0.0003223388671875,
      "model_forward_time": 0.11474490165710449,
      "step": 52812
    },
    {
      "epoch": 0.0003223388671875,
      "step": 52812,
      "training_step_time": 0.482572078704834
    },
    {
      "epoch": 0.000322344970703125,
      "model_forward_time": 0.11497998237609863,
      "step": 52813
    },
    {
      "epoch": 0.000322344970703125,
      "step": 52813,
      "training_step_time": 0.5790348052978516
    },
    {
      "epoch": 0.00032235107421875,
      "model_forward_time": 0.11465954780578613,
      "step": 52814
    },
    {
      "epoch": 0.00032235107421875,
      "step": 52814,
      "training_step_time": 0.4051322937011719
    },
    {
      "epoch": 0.000322357177734375,
      "model_forward_time": 0.11465573310852051,
      "step": 52815
    },
    {
      "epoch": 0.000322357177734375,
      "step": 52815,
      "training_step_time": 0.39423489570617676
    },
    {
      "epoch": 0.00032236328125,
      "model_forward_time": 0.11451268196105957,
      "step": 52816
    },
    {
      "epoch": 0.00032236328125,
      "step": 52816,
      "training_step_time": 0.39671850204467773
    },
    {
      "epoch": 0.000322369384765625,
      "model_forward_time": 0.11508321762084961,
      "step": 52817
    },
    {
      "epoch": 0.000322369384765625,
      "step": 52817,
      "training_step_time": 0.3966655731201172
    },
    {
      "epoch": 0.00032237548828125,
      "model_forward_time": 0.11558413505554199,
      "step": 52818
    },
    {
      "epoch": 0.00032237548828125,
      "step": 52818,
      "training_step_time": 0.4116511344909668
    },
    {
      "epoch": 0.000322381591796875,
      "model_forward_time": 0.11511373519897461,
      "step": 52819
    },
    {
      "epoch": 0.000322381591796875,
      "step": 52819,
      "training_step_time": 0.8319149017333984
    },
    {
      "epoch": 0.0003223876953125,
      "grad_norm": 0.10694177448749542,
      "learning_rate": 3.864238115137481e-06,
      "loss": 0.0352,
      "step": 52820
    },
    {
      "epoch": 0.0003223876953125,
      "model_forward_time": 0.11402535438537598,
      "step": 52820
    },
    {
      "epoch": 0.0003223876953125,
      "step": 52820,
      "training_step_time": 0.4061164855957031
    },
    {
      "epoch": 0.000322393798828125,
      "model_forward_time": 0.1137237548828125,
      "step": 52821
    },
    {
      "epoch": 0.000322393798828125,
      "step": 52821,
      "training_step_time": 0.3900635242462158
    },
    {
      "epoch": 0.00032239990234375,
      "model_forward_time": 0.11363554000854492,
      "step": 52822
    },
    {
      "epoch": 0.00032239990234375,
      "step": 52822,
      "training_step_time": 0.39046382904052734
    },
    {
      "epoch": 0.000322406005859375,
      "model_forward_time": 0.11438798904418945,
      "step": 52823
    },
    {
      "epoch": 0.000322406005859375,
      "step": 52823,
      "training_step_time": 0.39600276947021484
    },
    {
      "epoch": 0.000322412109375,
      "model_forward_time": 0.11411643028259277,
      "step": 52824
    },
    {
      "epoch": 0.000322412109375,
      "step": 52824,
      "training_step_time": 0.44324541091918945
    },
    {
      "epoch": 0.000322418212890625,
      "model_forward_time": 0.11475896835327148,
      "step": 52825
    },
    {
      "epoch": 0.000322418212890625,
      "step": 52825,
      "training_step_time": 0.6906495094299316
    },
    {
      "epoch": 0.00032242431640625,
      "model_forward_time": 0.11459684371948242,
      "step": 52826
    },
    {
      "epoch": 0.00032242431640625,
      "step": 52826,
      "training_step_time": 0.4536702632904053
    },
    {
      "epoch": 0.000322430419921875,
      "model_forward_time": 0.11428070068359375,
      "step": 52827
    },
    {
      "epoch": 0.000322430419921875,
      "step": 52827,
      "training_step_time": 0.3980522155761719
    },
    {
      "epoch": 0.0003224365234375,
      "model_forward_time": 0.11442303657531738,
      "step": 52828
    },
    {
      "epoch": 0.0003224365234375,
      "step": 52828,
      "training_step_time": 0.40125441551208496
    },
    {
      "epoch": 0.000322442626953125,
      "model_forward_time": 0.11445069313049316,
      "step": 52829
    },
    {
      "epoch": 0.000322442626953125,
      "step": 52829,
      "training_step_time": 0.3640155792236328
    },
    {
      "epoch": 0.00032244873046875,
      "grad_norm": 0.14579126238822937,
      "learning_rate": 3.8536220677657495e-06,
      "loss": 0.0346,
      "step": 52830
    },
    {
      "epoch": 0.00032244873046875,
      "model_forward_time": 0.1144266128540039,
      "step": 52830
    },
    {
      "epoch": 0.00032244873046875,
      "step": 52830,
      "training_step_time": 0.428455114364624
    },
    {
      "epoch": 0.000322454833984375,
      "model_forward_time": 0.11498594284057617,
      "step": 52831
    },
    {
      "epoch": 0.000322454833984375,
      "step": 52831,
      "training_step_time": 0.7724041938781738
    },
    {
      "epoch": 0.0003224609375,
      "model_forward_time": 0.11415457725524902,
      "step": 52832
    },
    {
      "epoch": 0.0003224609375,
      "step": 52832,
      "training_step_time": 0.3846724033355713
    },
    {
      "epoch": 0.000322467041015625,
      "model_forward_time": 0.1140449047088623,
      "step": 52833
    },
    {
      "epoch": 0.000322467041015625,
      "step": 52833,
      "training_step_time": 0.3895244598388672
    },
    {
      "epoch": 0.00032247314453125,
      "model_forward_time": 0.11414051055908203,
      "step": 52834
    },
    {
      "epoch": 0.00032247314453125,
      "step": 52834,
      "training_step_time": 0.38866543769836426
    },
    {
      "epoch": 0.000322479248046875,
      "model_forward_time": 0.1142425537109375,
      "step": 52835
    },
    {
      "epoch": 0.000322479248046875,
      "step": 52835,
      "training_step_time": 0.3869965076446533
    },
    {
      "epoch": 0.0003224853515625,
      "model_forward_time": 0.11534595489501953,
      "step": 52836
    },
    {
      "epoch": 0.0003224853515625,
      "step": 52836,
      "training_step_time": 0.4095134735107422
    },
    {
      "epoch": 0.000322491455078125,
      "model_forward_time": 0.11475539207458496,
      "step": 52837
    },
    {
      "epoch": 0.000322491455078125,
      "step": 52837,
      "training_step_time": 1.1406826972961426
    },
    {
      "epoch": 0.00032249755859375,
      "model_forward_time": 0.11407160758972168,
      "step": 52838
    },
    {
      "epoch": 0.00032249755859375,
      "step": 52838,
      "training_step_time": 0.4937324523925781
    },
    {
      "epoch": 0.000322503662109375,
      "model_forward_time": 0.11396670341491699,
      "step": 52839
    },
    {
      "epoch": 0.000322503662109375,
      "step": 52839,
      "training_step_time": 0.416201114654541
    },
    {
      "epoch": 0.000322509765625,
      "grad_norm": 0.07578834891319275,
      "learning_rate": 3.843020038445211e-06,
      "loss": 0.0328,
      "step": 52840
    },
    {
      "epoch": 0.000322509765625,
      "model_forward_time": 0.11430501937866211,
      "step": 52840
    },
    {
      "epoch": 0.000322509765625,
      "step": 52840,
      "training_step_time": 0.3884847164154053
    },
    {
      "epoch": 0.000322515869140625,
      "model_forward_time": 0.11461138725280762,
      "step": 52841
    },
    {
      "epoch": 0.000322515869140625,
      "step": 52841,
      "training_step_time": 0.35997772216796875
    },
    {
      "epoch": 0.00032252197265625,
      "model_forward_time": 0.1146543025970459,
      "step": 52842
    },
    {
      "epoch": 0.00032252197265625,
      "step": 52842,
      "training_step_time": 0.39333415031433105
    },
    {
      "epoch": 0.000322528076171875,
      "model_forward_time": 0.11453104019165039,
      "step": 52843
    },
    {
      "epoch": 0.000322528076171875,
      "step": 52843,
      "training_step_time": 0.49572110176086426
    },
    {
      "epoch": 0.0003225341796875,
      "model_forward_time": 0.11488008499145508,
      "step": 52844
    },
    {
      "epoch": 0.0003225341796875,
      "step": 52844,
      "training_step_time": 0.41037631034851074
    },
    {
      "epoch": 0.000322540283203125,
      "model_forward_time": 0.11662626266479492,
      "step": 52845
    },
    {
      "epoch": 0.000322540283203125,
      "step": 52845,
      "training_step_time": 0.3866705894470215
    },
    {
      "epoch": 0.00032254638671875,
      "model_forward_time": 0.11512899398803711,
      "step": 52846
    },
    {
      "epoch": 0.00032254638671875,
      "step": 52846,
      "training_step_time": 0.3954787254333496
    },
    {
      "epoch": 0.000322552490234375,
      "model_forward_time": 0.11512422561645508,
      "step": 52847
    },
    {
      "epoch": 0.000322552490234375,
      "step": 52847,
      "training_step_time": 0.3890225887298584
    },
    {
      "epoch": 0.00032255859375,
      "model_forward_time": 0.11493730545043945,
      "step": 52848
    },
    {
      "epoch": 0.00032255859375,
      "step": 52848,
      "training_step_time": 0.4741342067718506
    },
    {
      "epoch": 0.000322564697265625,
      "model_forward_time": 0.11512160301208496,
      "step": 52849
    },
    {
      "epoch": 0.000322564697265625,
      "step": 52849,
      "training_step_time": 0.4111614227294922
    },
    {
      "epoch": 0.00032257080078125,
      "grad_norm": 0.07307067513465881,
      "learning_rate": 3.832432030396471e-06,
      "loss": 0.0362,
      "step": 52850
    },
    {
      "epoch": 0.00032257080078125,
      "model_forward_time": 0.11477088928222656,
      "step": 52850
    },
    {
      "epoch": 0.00032257080078125,
      "step": 52850,
      "training_step_time": 0.39238810539245605
    },
    {
      "epoch": 0.000322576904296875,
      "model_forward_time": 0.11535477638244629,
      "step": 52851
    },
    {
      "epoch": 0.000322576904296875,
      "step": 52851,
      "training_step_time": 0.4723362922668457
    },
    {
      "epoch": 0.0003225830078125,
      "model_forward_time": 0.11482858657836914,
      "step": 52852
    },
    {
      "epoch": 0.0003225830078125,
      "step": 52852,
      "training_step_time": 0.4377143383026123
    },
    {
      "epoch": 0.000322589111328125,
      "model_forward_time": 0.11524724960327148,
      "step": 52853
    },
    {
      "epoch": 0.000322589111328125,
      "step": 52853,
      "training_step_time": 0.5042142868041992
    },
    {
      "epoch": 0.00032259521484375,
      "model_forward_time": 0.11490058898925781,
      "step": 52854
    },
    {
      "epoch": 0.00032259521484375,
      "step": 52854,
      "training_step_time": 0.3994405269622803
    },
    {
      "epoch": 0.000322601318359375,
      "model_forward_time": 0.11504197120666504,
      "step": 52855
    },
    {
      "epoch": 0.000322601318359375,
      "step": 52855,
      "training_step_time": 0.3957033157348633
    },
    {
      "epoch": 0.000322607421875,
      "model_forward_time": 0.11606574058532715,
      "step": 52856
    },
    {
      "epoch": 0.000322607421875,
      "step": 52856,
      "training_step_time": 0.40847277641296387
    },
    {
      "epoch": 0.000322613525390625,
      "model_forward_time": 0.11600637435913086,
      "step": 52857
    },
    {
      "epoch": 0.000322613525390625,
      "step": 52857,
      "training_step_time": 0.4318273067474365
    },
    {
      "epoch": 0.00032261962890625,
      "model_forward_time": 0.1148383617401123,
      "step": 52858
    },
    {
      "epoch": 0.00032261962890625,
      "step": 52858,
      "training_step_time": 0.40809202194213867
    },
    {
      "epoch": 0.000322625732421875,
      "model_forward_time": 0.11449956893920898,
      "step": 52859
    },
    {
      "epoch": 0.000322625732421875,
      "step": 52859,
      "training_step_time": 0.3983595371246338
    },
    {
      "epoch": 0.0003226318359375,
      "grad_norm": 0.09315869212150574,
      "learning_rate": 3.821858046835913e-06,
      "loss": 0.0366,
      "step": 52860
    },
    {
      "epoch": 0.0003226318359375,
      "model_forward_time": 0.11498308181762695,
      "step": 52860
    },
    {
      "epoch": 0.0003226318359375,
      "step": 52860,
      "training_step_time": 0.39688897132873535
    },
    {
      "epoch": 0.000322637939453125,
      "model_forward_time": 0.11527180671691895,
      "step": 52861
    },
    {
      "epoch": 0.000322637939453125,
      "step": 52861,
      "training_step_time": 0.6032922267913818
    },
    {
      "epoch": 0.00032264404296875,
      "model_forward_time": 0.11479997634887695,
      "step": 52862
    },
    {
      "epoch": 0.00032264404296875,
      "step": 52862,
      "training_step_time": 0.4045743942260742
    },
    {
      "epoch": 0.000322650146484375,
      "model_forward_time": 0.11492753028869629,
      "step": 52863
    },
    {
      "epoch": 0.000322650146484375,
      "step": 52863,
      "training_step_time": 0.3932974338531494
    },
    {
      "epoch": 0.00032265625,
      "model_forward_time": 0.11578249931335449,
      "step": 52864
    },
    {
      "epoch": 0.00032265625,
      "step": 52864,
      "training_step_time": 0.38552021980285645
    },
    {
      "epoch": 0.000322662353515625,
      "model_forward_time": 0.11512517929077148,
      "step": 52865
    },
    {
      "epoch": 0.000322662353515625,
      "step": 52865,
      "training_step_time": 0.38863134384155273
    },
    {
      "epoch": 0.00032266845703125,
      "model_forward_time": 0.11465167999267578,
      "step": 52866
    },
    {
      "epoch": 0.00032266845703125,
      "step": 52866,
      "training_step_time": 0.41097378730773926
    },
    {
      "epoch": 0.000322674560546875,
      "model_forward_time": 0.11499476432800293,
      "step": 52867
    },
    {
      "epoch": 0.000322674560546875,
      "step": 52867,
      "training_step_time": 0.8654358386993408
    },
    {
      "epoch": 0.0003226806640625,
      "model_forward_time": 0.1146082878112793,
      "step": 52868
    },
    {
      "epoch": 0.0003226806640625,
      "step": 52868,
      "training_step_time": 0.38846731185913086
    },
    {
      "epoch": 0.000322686767578125,
      "model_forward_time": 0.11418461799621582,
      "step": 52869
    },
    {
      "epoch": 0.000322686767578125,
      "step": 52869,
      "training_step_time": 0.38666629791259766
    },
    {
      "epoch": 0.00032269287109375,
      "grad_norm": 0.12820357084274292,
      "learning_rate": 3.8112980909756014e-06,
      "loss": 0.0366,
      "step": 52870
    },
    {
      "epoch": 0.00032269287109375,
      "model_forward_time": 0.11439728736877441,
      "step": 52870
    },
    {
      "epoch": 0.00032269287109375,
      "step": 52870,
      "training_step_time": 0.4260263442993164
    },
    {
      "epoch": 0.000322698974609375,
      "model_forward_time": 0.11425089836120605,
      "step": 52871
    },
    {
      "epoch": 0.000322698974609375,
      "step": 52871,
      "training_step_time": 0.3976283073425293
    },
    {
      "epoch": 0.000322705078125,
      "model_forward_time": 0.11479735374450684,
      "step": 52872
    },
    {
      "epoch": 0.000322705078125,
      "step": 52872,
      "training_step_time": 0.4197685718536377
    },
    {
      "epoch": 0.000322711181640625,
      "model_forward_time": 0.11606788635253906,
      "step": 52873
    },
    {
      "epoch": 0.000322711181640625,
      "step": 52873,
      "training_step_time": 0.4129493236541748
    },
    {
      "epoch": 0.00032271728515625,
      "model_forward_time": 0.11498618125915527,
      "step": 52874
    },
    {
      "epoch": 0.00032271728515625,
      "step": 52874,
      "training_step_time": 0.38831210136413574
    },
    {
      "epoch": 0.000322723388671875,
      "model_forward_time": 0.11492347717285156,
      "step": 52875
    },
    {
      "epoch": 0.000322723388671875,
      "step": 52875,
      "training_step_time": 0.404923677444458
    },
    {
      "epoch": 0.0003227294921875,
      "model_forward_time": 0.11502265930175781,
      "step": 52876
    },
    {
      "epoch": 0.0003227294921875,
      "step": 52876,
      "training_step_time": 0.39214158058166504
    },
    {
      "epoch": 0.000322735595703125,
      "model_forward_time": 0.11586570739746094,
      "step": 52877
    },
    {
      "epoch": 0.000322735595703125,
      "step": 52877,
      "training_step_time": 0.39301204681396484
    },
    {
      "epoch": 0.00032274169921875,
      "model_forward_time": 0.11536502838134766,
      "step": 52878
    },
    {
      "epoch": 0.00032274169921875,
      "step": 52878,
      "training_step_time": 0.4055511951446533
    },
    {
      "epoch": 0.000322747802734375,
      "model_forward_time": 0.1153860092163086,
      "step": 52879
    },
    {
      "epoch": 0.000322747802734375,
      "step": 52879,
      "training_step_time": 0.6446013450622559
    },
    {
      "epoch": 0.00032275390625,
      "grad_norm": 0.09200619161128998,
      "learning_rate": 3.8007521660234023e-06,
      "loss": 0.0367,
      "step": 52880
    },
    {
      "epoch": 0.00032275390625,
      "model_forward_time": 0.11458230018615723,
      "step": 52880
    },
    {
      "epoch": 0.00032275390625,
      "step": 52880,
      "training_step_time": 0.4366178512573242
    },
    {
      "epoch": 0.000322760009765625,
      "model_forward_time": 0.11429071426391602,
      "step": 52881
    },
    {
      "epoch": 0.000322760009765625,
      "step": 52881,
      "training_step_time": 0.4068148136138916
    },
    {
      "epoch": 0.00032276611328125,
      "model_forward_time": 0.11422014236450195,
      "step": 52882
    },
    {
      "epoch": 0.00032276611328125,
      "step": 52882,
      "training_step_time": 0.38768887519836426
    },
    {
      "epoch": 0.000322772216796875,
      "model_forward_time": 0.11435890197753906,
      "step": 52883
    },
    {
      "epoch": 0.000322772216796875,
      "step": 52883,
      "training_step_time": 0.39079976081848145
    },
    {
      "epoch": 0.0003227783203125,
      "model_forward_time": 0.11459803581237793,
      "step": 52884
    },
    {
      "epoch": 0.0003227783203125,
      "step": 52884,
      "training_step_time": 0.36385512351989746
    },
    {
      "epoch": 0.000322784423828125,
      "model_forward_time": 0.11515331268310547,
      "step": 52885
    },
    {
      "epoch": 0.000322784423828125,
      "step": 52885,
      "training_step_time": 0.4561934471130371
    },
    {
      "epoch": 0.00032279052734375,
      "model_forward_time": 0.11472415924072266,
      "step": 52886
    },
    {
      "epoch": 0.00032279052734375,
      "step": 52886,
      "training_step_time": 0.40886783599853516
    },
    {
      "epoch": 0.000322796630859375,
      "model_forward_time": 0.11611628532409668,
      "step": 52887
    },
    {
      "epoch": 0.000322796630859375,
      "step": 52887,
      "training_step_time": 0.37985825538635254
    },
    {
      "epoch": 0.000322802734375,
      "model_forward_time": 0.11481666564941406,
      "step": 52888
    },
    {
      "epoch": 0.000322802734375,
      "step": 52888,
      "training_step_time": 0.4463765621185303
    },
    {
      "epoch": 0.000322808837890625,
      "model_forward_time": 0.11460995674133301,
      "step": 52889
    },
    {
      "epoch": 0.000322808837890625,
      "step": 52889,
      "training_step_time": 0.4062356948852539
    },
    {
      "epoch": 0.00032281494140625,
      "grad_norm": 0.11110426485538483,
      "learning_rate": 3.790220275182854e-06,
      "loss": 0.0342,
      "step": 52890
    },
    {
      "epoch": 0.00032281494140625,
      "model_forward_time": 0.11463308334350586,
      "step": 52890
    },
    {
      "epoch": 0.00032281494140625,
      "step": 52890,
      "training_step_time": 0.4079909324645996
    },
    {
      "epoch": 0.000322821044921875,
      "model_forward_time": 0.11518716812133789,
      "step": 52891
    },
    {
      "epoch": 0.000322821044921875,
      "step": 52891,
      "training_step_time": 0.4284210205078125
    },
    {
      "epoch": 0.0003228271484375,
      "model_forward_time": 0.1151113510131836,
      "step": 52892
    },
    {
      "epoch": 0.0003228271484375,
      "step": 52892,
      "training_step_time": 0.39601826667785645
    },
    {
      "epoch": 0.000322833251953125,
      "model_forward_time": 0.11595368385314941,
      "step": 52893
    },
    {
      "epoch": 0.000322833251953125,
      "step": 52893,
      "training_step_time": 0.38892531394958496
    },
    {
      "epoch": 0.00032283935546875,
      "model_forward_time": 0.11536788940429688,
      "step": 52894
    },
    {
      "epoch": 0.00032283935546875,
      "step": 52894,
      "training_step_time": 0.47877979278564453
    },
    {
      "epoch": 0.000322845458984375,
      "model_forward_time": 0.11547541618347168,
      "step": 52895
    },
    {
      "epoch": 0.000322845458984375,
      "step": 52895,
      "training_step_time": 0.4351072311401367
    },
    {
      "epoch": 0.0003228515625,
      "model_forward_time": 0.11487078666687012,
      "step": 52896
    },
    {
      "epoch": 0.0003228515625,
      "step": 52896,
      "training_step_time": 0.43526363372802734
    },
    {
      "epoch": 0.000322857666015625,
      "model_forward_time": 0.11509370803833008,
      "step": 52897
    },
    {
      "epoch": 0.000322857666015625,
      "step": 52897,
      "training_step_time": 0.4031696319580078
    },
    {
      "epoch": 0.00032286376953125,
      "model_forward_time": 0.11475324630737305,
      "step": 52898
    },
    {
      "epoch": 0.00032286376953125,
      "step": 52898,
      "training_step_time": 0.3955361843109131
    },
    {
      "epoch": 0.000322869873046875,
      "model_forward_time": 0.11530542373657227,
      "step": 52899
    },
    {
      "epoch": 0.000322869873046875,
      "step": 52899,
      "training_step_time": 0.4157991409301758
    },
    {
      "epoch": 0.0003228759765625,
      "grad_norm": 0.1287880539894104,
      "learning_rate": 3.7797024216533138e-06,
      "loss": 0.0335,
      "step": 52900
    },
    {
      "epoch": 0.0003228759765625,
      "model_forward_time": 0.11527156829833984,
      "step": 52900
    },
    {
      "epoch": 0.0003228759765625,
      "step": 52900,
      "training_step_time": 0.40204811096191406
    },
    {
      "epoch": 0.000322882080078125,
      "model_forward_time": 0.11522889137268066,
      "step": 52901
    },
    {
      "epoch": 0.000322882080078125,
      "step": 52901,
      "training_step_time": 0.3946504592895508
    },
    {
      "epoch": 0.00032288818359375,
      "model_forward_time": 0.11512994766235352,
      "step": 52902
    },
    {
      "epoch": 0.00032288818359375,
      "step": 52902,
      "training_step_time": 0.40489816665649414
    },
    {
      "epoch": 0.000322894287109375,
      "model_forward_time": 0.1152658462524414,
      "step": 52903
    },
    {
      "epoch": 0.000322894287109375,
      "step": 52903,
      "training_step_time": 0.5615599155426025
    },
    {
      "epoch": 0.000322900390625,
      "model_forward_time": 0.11400246620178223,
      "step": 52904
    },
    {
      "epoch": 0.000322900390625,
      "step": 52904,
      "training_step_time": 0.39183807373046875
    },
    {
      "epoch": 0.000322906494140625,
      "model_forward_time": 0.11542367935180664,
      "step": 52905
    },
    {
      "epoch": 0.000322906494140625,
      "step": 52905,
      "training_step_time": 0.3954353332519531
    },
    {
      "epoch": 0.00032291259765625,
      "model_forward_time": 0.11481261253356934,
      "step": 52906
    },
    {
      "epoch": 0.00032291259765625,
      "step": 52906,
      "training_step_time": 0.4080188274383545
    },
    {
      "epoch": 0.000322918701171875,
      "model_forward_time": 0.11561012268066406,
      "step": 52907
    },
    {
      "epoch": 0.000322918701171875,
      "step": 52907,
      "training_step_time": 0.3905651569366455
    },
    {
      "epoch": 0.0003229248046875,
      "model_forward_time": 0.11488962173461914,
      "step": 52908
    },
    {
      "epoch": 0.0003229248046875,
      "step": 52908,
      "training_step_time": 0.46325039863586426
    },
    {
      "epoch": 0.000322930908203125,
      "model_forward_time": 0.11546778678894043,
      "step": 52909
    },
    {
      "epoch": 0.000322930908203125,
      "step": 52909,
      "training_step_time": 0.5158188343048096
    },
    {
      "epoch": 0.00032293701171875,
      "grad_norm": 0.07772223651409149,
      "learning_rate": 3.769198608629776e-06,
      "loss": 0.0378,
      "step": 52910
    },
    {
      "epoch": 0.00032293701171875,
      "model_forward_time": 0.11493229866027832,
      "step": 52910
    },
    {
      "epoch": 0.00032293701171875,
      "step": 52910,
      "training_step_time": 0.4114110469818115
    },
    {
      "epoch": 0.000322943115234375,
      "model_forward_time": 0.11452221870422363,
      "step": 52911
    },
    {
      "epoch": 0.000322943115234375,
      "step": 52911,
      "training_step_time": 0.47897839546203613
    },
    {
      "epoch": 0.00032294921875,
      "model_forward_time": 0.11484360694885254,
      "step": 52912
    },
    {
      "epoch": 0.00032294921875,
      "step": 52912,
      "training_step_time": 0.42441654205322266
    },
    {
      "epoch": 0.000322955322265625,
      "model_forward_time": 0.11526846885681152,
      "step": 52913
    },
    {
      "epoch": 0.000322955322265625,
      "step": 52913,
      "training_step_time": 0.4030168056488037
    },
    {
      "epoch": 0.00032296142578125,
      "model_forward_time": 0.11487245559692383,
      "step": 52914
    },
    {
      "epoch": 0.00032296142578125,
      "step": 52914,
      "training_step_time": 0.4053363800048828
    },
    {
      "epoch": 0.000322967529296875,
      "model_forward_time": 0.11546874046325684,
      "step": 52915
    },
    {
      "epoch": 0.000322967529296875,
      "step": 52915,
      "training_step_time": 0.5512149333953857
    },
    {
      "epoch": 0.0003229736328125,
      "model_forward_time": 0.11431360244750977,
      "step": 52916
    },
    {
      "epoch": 0.0003229736328125,
      "step": 52916,
      "training_step_time": 0.41200947761535645
    },
    {
      "epoch": 0.000322979736328125,
      "model_forward_time": 0.11454963684082031,
      "step": 52917
    },
    {
      "epoch": 0.000322979736328125,
      "step": 52917,
      "training_step_time": 0.3985445499420166
    },
    {
      "epoch": 0.00032298583984375,
      "model_forward_time": 0.11463522911071777,
      "step": 52918
    },
    {
      "epoch": 0.00032298583984375,
      "step": 52918,
      "training_step_time": 0.3965878486633301
    },
    {
      "epoch": 0.000322991943359375,
      "model_forward_time": 0.11490178108215332,
      "step": 52919
    },
    {
      "epoch": 0.000322991943359375,
      "step": 52919,
      "training_step_time": 0.3931756019592285
    },
    {
      "epoch": 0.000322998046875,
      "grad_norm": 0.07669857144355774,
      "learning_rate": 3.75870883930306e-06,
      "loss": 0.0302,
      "step": 52920
    },
    {
      "epoch": 0.000322998046875,
      "model_forward_time": 0.11434364318847656,
      "step": 52920
    },
    {
      "epoch": 0.000322998046875,
      "step": 52920,
      "training_step_time": 0.3988833427429199
    },
    {
      "epoch": 0.000323004150390625,
      "model_forward_time": 0.11450362205505371,
      "step": 52921
    },
    {
      "epoch": 0.000323004150390625,
      "step": 52921,
      "training_step_time": 1.0083532333374023
    },
    {
      "epoch": 0.00032301025390625,
      "model_forward_time": 0.11414217948913574,
      "step": 52922
    },
    {
      "epoch": 0.00032301025390625,
      "step": 52922,
      "training_step_time": 0.4195544719696045
    },
    {
      "epoch": 0.000323016357421875,
      "model_forward_time": 0.11432147026062012,
      "step": 52923
    },
    {
      "epoch": 0.000323016357421875,
      "step": 52923,
      "training_step_time": 0.4148392677307129
    },
    {
      "epoch": 0.0003230224609375,
      "model_forward_time": 0.11402368545532227,
      "step": 52924
    },
    {
      "epoch": 0.0003230224609375,
      "step": 52924,
      "training_step_time": 0.4521026611328125
    },
    {
      "epoch": 0.000323028564453125,
      "model_forward_time": 0.11367630958557129,
      "step": 52925
    },
    {
      "epoch": 0.000323028564453125,
      "step": 52925,
      "training_step_time": 0.3761143684387207
    },
    {
      "epoch": 0.00032303466796875,
      "model_forward_time": 0.11449146270751953,
      "step": 52926
    },
    {
      "epoch": 0.00032303466796875,
      "step": 52926,
      "training_step_time": 0.38179492950439453
    },
    {
      "epoch": 0.000323040771484375,
      "model_forward_time": 0.1152963638305664,
      "step": 52927
    },
    {
      "epoch": 0.000323040771484375,
      "step": 52927,
      "training_step_time": 0.4998915195465088
    },
    {
      "epoch": 0.000323046875,
      "model_forward_time": 0.11436700820922852,
      "step": 52928
    },
    {
      "epoch": 0.000323046875,
      "step": 52928,
      "training_step_time": 0.4935276508331299
    },
    {
      "epoch": 0.000323052978515625,
      "model_forward_time": 0.11513113975524902,
      "step": 52929
    },
    {
      "epoch": 0.000323052978515625,
      "step": 52929,
      "training_step_time": 0.38237714767456055
    },
    {
      "epoch": 0.00032305908203125,
      "grad_norm": 0.09160090237855911,
      "learning_rate": 3.7482331168596675e-06,
      "loss": 0.037,
      "step": 52930
    },
    {
      "epoch": 0.00032305908203125,
      "model_forward_time": 0.11496424674987793,
      "step": 52930
    },
    {
      "epoch": 0.00032305908203125,
      "step": 52930,
      "training_step_time": 0.3867156505584717
    },
    {
      "epoch": 0.000323065185546875,
      "model_forward_time": 0.1145639419555664,
      "step": 52931
    },
    {
      "epoch": 0.000323065185546875,
      "step": 52931,
      "training_step_time": 0.39218902587890625
    },
    {
      "epoch": 0.0003230712890625,
      "model_forward_time": 0.11500096321105957,
      "step": 52932
    },
    {
      "epoch": 0.0003230712890625,
      "step": 52932,
      "training_step_time": 0.39961767196655273
    },
    {
      "epoch": 0.000323077392578125,
      "model_forward_time": 0.11506199836730957,
      "step": 52933
    },
    {
      "epoch": 0.000323077392578125,
      "step": 52933,
      "training_step_time": 0.3934297561645508
    },
    {
      "epoch": 0.00032308349609375,
      "model_forward_time": 0.11486482620239258,
      "step": 52934
    },
    {
      "epoch": 0.00032308349609375,
      "step": 52934,
      "training_step_time": 0.41532087326049805
    },
    {
      "epoch": 0.000323089599609375,
      "model_forward_time": 0.11519265174865723,
      "step": 52935
    },
    {
      "epoch": 0.000323089599609375,
      "step": 52935,
      "training_step_time": 0.404552698135376
    },
    {
      "epoch": 0.000323095703125,
      "model_forward_time": 0.11550211906433105,
      "step": 52936
    },
    {
      "epoch": 0.000323095703125,
      "step": 52936,
      "training_step_time": 0.46269774436950684
    },
    {
      "epoch": 0.000323101806640625,
      "model_forward_time": 0.11520195007324219,
      "step": 52937
    },
    {
      "epoch": 0.000323101806640625,
      "step": 52937,
      "training_step_time": 0.45868754386901855
    },
    {
      "epoch": 0.00032310791015625,
      "model_forward_time": 0.11442685127258301,
      "step": 52938
    },
    {
      "epoch": 0.00032310791015625,
      "step": 52938,
      "training_step_time": 0.49359965324401855
    },
    {
      "epoch": 0.000323114013671875,
      "model_forward_time": 0.1141214370727539,
      "step": 52939
    },
    {
      "epoch": 0.000323114013671875,
      "step": 52939,
      "training_step_time": 0.46405839920043945
    },
    {
      "epoch": 0.0003231201171875,
      "grad_norm": 0.12717613577842712,
      "learning_rate": 3.7377714444818468e-06,
      "loss": 0.0407,
      "step": 52940
    },
    {
      "epoch": 0.0003231201171875,
      "model_forward_time": 0.11489534378051758,
      "step": 52940
    },
    {
      "epoch": 0.0003231201171875,
      "step": 52940,
      "training_step_time": 0.3930683135986328
    },
    {
      "epoch": 0.000323126220703125,
      "model_forward_time": 0.11488795280456543,
      "step": 52941
    },
    {
      "epoch": 0.000323126220703125,
      "step": 52941,
      "training_step_time": 0.3649106025695801
    },
    {
      "epoch": 0.00032313232421875,
      "model_forward_time": 0.11487841606140137,
      "step": 52942
    },
    {
      "epoch": 0.00032313232421875,
      "step": 52942,
      "training_step_time": 0.4100790023803711
    },
    {
      "epoch": 0.000323138427734375,
      "model_forward_time": 0.11523318290710449,
      "step": 52943
    },
    {
      "epoch": 0.000323138427734375,
      "step": 52943,
      "training_step_time": 0.4962129592895508
    },
    {
      "epoch": 0.00032314453125,
      "model_forward_time": 0.11478900909423828,
      "step": 52944
    },
    {
      "epoch": 0.00032314453125,
      "step": 52944,
      "training_step_time": 0.41051673889160156
    },
    {
      "epoch": 0.000323150634765625,
      "model_forward_time": 0.11486029624938965,
      "step": 52945
    },
    {
      "epoch": 0.000323150634765625,
      "step": 52945,
      "training_step_time": 0.40204524993896484
    },
    {
      "epoch": 0.00032315673828125,
      "model_forward_time": 0.11460137367248535,
      "step": 52946
    },
    {
      "epoch": 0.00032315673828125,
      "step": 52946,
      "training_step_time": 0.3957982063293457
    },
    {
      "epoch": 0.000323162841796875,
      "model_forward_time": 0.11565804481506348,
      "step": 52947
    },
    {
      "epoch": 0.000323162841796875,
      "step": 52947,
      "training_step_time": 0.3936452865600586
    },
    {
      "epoch": 0.0003231689453125,
      "model_forward_time": 0.1151585578918457,
      "step": 52948
    },
    {
      "epoch": 0.0003231689453125,
      "step": 52948,
      "training_step_time": 0.39615631103515625
    },
    {
      "epoch": 0.000323175048828125,
      "model_forward_time": 0.11462163925170898,
      "step": 52949
    },
    {
      "epoch": 0.000323175048828125,
      "step": 52949,
      "training_step_time": 0.45665931701660156
    },
    {
      "epoch": 0.00032318115234375,
      "grad_norm": 0.08520842343568802,
      "learning_rate": 3.7273238253475785e-06,
      "loss": 0.0363,
      "step": 52950
    },
    {
      "epoch": 0.00032318115234375,
      "model_forward_time": 0.11479067802429199,
      "step": 52950
    },
    {
      "epoch": 0.00032318115234375,
      "step": 52950,
      "training_step_time": 0.432938814163208
    },
    {
      "epoch": 0.000323187255859375,
      "model_forward_time": 0.11541891098022461,
      "step": 52951
    },
    {
      "epoch": 0.000323187255859375,
      "step": 52951,
      "training_step_time": 0.5053207874298096
    },
    {
      "epoch": 0.000323193359375,
      "model_forward_time": 0.11504030227661133,
      "step": 52952
    },
    {
      "epoch": 0.000323193359375,
      "step": 52952,
      "training_step_time": 0.4056229591369629
    },
    {
      "epoch": 0.000323199462890625,
      "model_forward_time": 0.11542892456054688,
      "step": 52953
    },
    {
      "epoch": 0.000323199462890625,
      "step": 52953,
      "training_step_time": 0.4322826862335205
    },
    {
      "epoch": 0.00032320556640625,
      "model_forward_time": 0.11487460136413574,
      "step": 52954
    },
    {
      "epoch": 0.00032320556640625,
      "step": 52954,
      "training_step_time": 0.4100351333618164
    },
    {
      "epoch": 0.000323211669921875,
      "model_forward_time": 0.11476016044616699,
      "step": 52955
    },
    {
      "epoch": 0.000323211669921875,
      "step": 52955,
      "training_step_time": 0.4171016216278076
    },
    {
      "epoch": 0.0003232177734375,
      "model_forward_time": 0.11500239372253418,
      "step": 52956
    },
    {
      "epoch": 0.0003232177734375,
      "step": 52956,
      "training_step_time": 0.40155577659606934
    },
    {
      "epoch": 0.000323223876953125,
      "model_forward_time": 0.11584258079528809,
      "step": 52957
    },
    {
      "epoch": 0.000323223876953125,
      "step": 52957,
      "training_step_time": 0.540778398513794
    },
    {
      "epoch": 0.00032322998046875,
      "model_forward_time": 0.11545467376708984,
      "step": 52958
    },
    {
      "epoch": 0.00032322998046875,
      "step": 52958,
      "training_step_time": 0.44945311546325684
    },
    {
      "epoch": 0.000323236083984375,
      "model_forward_time": 0.114959716796875,
      "step": 52959
    },
    {
      "epoch": 0.000323236083984375,
      "step": 52959,
      "training_step_time": 0.39267849922180176
    },
    {
      "epoch": 0.0003232421875,
      "grad_norm": 0.0684560239315033,
      "learning_rate": 3.7168902626305622e-06,
      "loss": 0.0399,
      "step": 52960
    },
    {
      "epoch": 0.0003232421875,
      "model_forward_time": 0.11569547653198242,
      "step": 52960
    },
    {
      "epoch": 0.0003232421875,
      "step": 52960,
      "training_step_time": 0.39028120040893555
    },
    {
      "epoch": 0.000323248291015625,
      "model_forward_time": 0.11464452743530273,
      "step": 52961
    },
    {
      "epoch": 0.000323248291015625,
      "step": 52961,
      "training_step_time": 0.3897390365600586
    },
    {
      "epoch": 0.00032325439453125,
      "model_forward_time": 0.11455893516540527,
      "step": 52962
    },
    {
      "epoch": 0.00032325439453125,
      "step": 52962,
      "training_step_time": 0.41263794898986816
    },
    {
      "epoch": 0.000323260498046875,
      "model_forward_time": 0.1150057315826416,
      "step": 52963
    },
    {
      "epoch": 0.000323260498046875,
      "step": 52963,
      "training_step_time": 0.4002068042755127
    },
    {
      "epoch": 0.0003232666015625,
      "model_forward_time": 0.11525273323059082,
      "step": 52964
    },
    {
      "epoch": 0.0003232666015625,
      "step": 52964,
      "training_step_time": 0.4005262851715088
    },
    {
      "epoch": 0.000323272705078125,
      "model_forward_time": 0.11546874046325684,
      "step": 52965
    },
    {
      "epoch": 0.000323272705078125,
      "step": 52965,
      "training_step_time": 0.5033102035522461
    },
    {
      "epoch": 0.00032327880859375,
      "model_forward_time": 0.11490082740783691,
      "step": 52966
    },
    {
      "epoch": 0.00032327880859375,
      "step": 52966,
      "training_step_time": 0.4392051696777344
    },
    {
      "epoch": 0.000323284912109375,
      "model_forward_time": 0.11548185348510742,
      "step": 52967
    },
    {
      "epoch": 0.000323284912109375,
      "step": 52967,
      "training_step_time": 0.4930875301361084
    },
    {
      "epoch": 0.000323291015625,
      "model_forward_time": 0.11505556106567383,
      "step": 52968
    },
    {
      "epoch": 0.000323291015625,
      "step": 52968,
      "training_step_time": 0.4340019226074219
    },
    {
      "epoch": 0.000323297119140625,
      "model_forward_time": 0.1145622730255127,
      "step": 52969
    },
    {
      "epoch": 0.000323297119140625,
      "step": 52969,
      "training_step_time": 0.4280261993408203
    },
    {
      "epoch": 0.00032330322265625,
      "grad_norm": 0.09040287137031555,
      "learning_rate": 3.7064707595002635e-06,
      "loss": 0.0378,
      "step": 52970
    },
    {
      "epoch": 0.00032330322265625,
      "model_forward_time": 0.11440324783325195,
      "step": 52970
    },
    {
      "epoch": 0.00032330322265625,
      "step": 52970,
      "training_step_time": 0.363832950592041
    },
    {
      "epoch": 0.000323309326171875,
      "model_forward_time": 0.11508488655090332,
      "step": 52971
    },
    {
      "epoch": 0.000323309326171875,
      "step": 52971,
      "training_step_time": 0.4458043575286865
    },
    {
      "epoch": 0.0003233154296875,
      "model_forward_time": 0.11560773849487305,
      "step": 52972
    },
    {
      "epoch": 0.0003233154296875,
      "step": 52972,
      "training_step_time": 0.40219640731811523
    },
    {
      "epoch": 0.000323321533203125,
      "model_forward_time": 0.11451601982116699,
      "step": 52973
    },
    {
      "epoch": 0.000323321533203125,
      "step": 52973,
      "training_step_time": 0.3857598304748535
    },
    {
      "epoch": 0.00032332763671875,
      "model_forward_time": 0.11494779586791992,
      "step": 52974
    },
    {
      "epoch": 0.00032332763671875,
      "step": 52974,
      "training_step_time": 0.3887364864349365
    },
    {
      "epoch": 0.000323333740234375,
      "model_forward_time": 0.11522912979125977,
      "step": 52975
    },
    {
      "epoch": 0.000323333740234375,
      "step": 52975,
      "training_step_time": 0.3969447612762451
    },
    {
      "epoch": 0.00032333984375,
      "model_forward_time": 0.11485624313354492,
      "step": 52976
    },
    {
      "epoch": 0.00032333984375,
      "step": 52976,
      "training_step_time": 0.39042234420776367
    },
    {
      "epoch": 0.000323345947265625,
      "model_forward_time": 0.11511993408203125,
      "step": 52977
    },
    {
      "epoch": 0.000323345947265625,
      "step": 52977,
      "training_step_time": 0.3981757164001465
    },
    {
      "epoch": 0.00032335205078125,
      "model_forward_time": 0.11611294746398926,
      "step": 52978
    },
    {
      "epoch": 0.00032335205078125,
      "step": 52978,
      "training_step_time": 0.43532562255859375
    },
    {
      "epoch": 0.000323358154296875,
      "model_forward_time": 0.11543822288513184,
      "step": 52979
    },
    {
      "epoch": 0.000323358154296875,
      "step": 52979,
      "training_step_time": 0.42807960510253906
    },
    {
      "epoch": 0.0003233642578125,
      "grad_norm": 0.09325719624757767,
      "learning_rate": 3.696065319121833e-06,
      "loss": 0.0336,
      "step": 52980
    },
    {
      "epoch": 0.0003233642578125,
      "model_forward_time": 0.11527109146118164,
      "step": 52980
    },
    {
      "epoch": 0.0003233642578125,
      "step": 52980,
      "training_step_time": 0.4971306324005127
    },
    {
      "epoch": 0.000323370361328125,
      "model_forward_time": 0.11535167694091797,
      "step": 52981
    },
    {
      "epoch": 0.000323370361328125,
      "step": 52981,
      "training_step_time": 0.43280577659606934
    },
    {
      "epoch": 0.00032337646484375,
      "model_forward_time": 0.1150979995727539,
      "step": 52982
    },
    {
      "epoch": 0.00032337646484375,
      "step": 52982,
      "training_step_time": 0.4625060558319092
    },
    {
      "epoch": 0.000323382568359375,
      "model_forward_time": 0.1155083179473877,
      "step": 52983
    },
    {
      "epoch": 0.000323382568359375,
      "step": 52983,
      "training_step_time": 0.38123321533203125
    },
    {
      "epoch": 0.000323388671875,
      "model_forward_time": 0.11477780342102051,
      "step": 52984
    },
    {
      "epoch": 0.000323388671875,
      "step": 52984,
      "training_step_time": 0.38712072372436523
    },
    {
      "epoch": 0.000323394775390625,
      "model_forward_time": 0.11529254913330078,
      "step": 52985
    },
    {
      "epoch": 0.000323394775390625,
      "step": 52985,
      "training_step_time": 0.5023288726806641
    },
    {
      "epoch": 0.00032340087890625,
      "model_forward_time": 0.11521077156066895,
      "step": 52986
    },
    {
      "epoch": 0.00032340087890625,
      "step": 52986,
      "training_step_time": 0.49133968353271484
    },
    {
      "epoch": 0.000323406982421875,
      "model_forward_time": 0.11518716812133789,
      "step": 52987
    },
    {
      "epoch": 0.000323406982421875,
      "step": 52987,
      "training_step_time": 0.3963353633880615
    },
    {
      "epoch": 0.0003234130859375,
      "model_forward_time": 0.11500191688537598,
      "step": 52988
    },
    {
      "epoch": 0.0003234130859375,
      "step": 52988,
      "training_step_time": 0.4026339054107666
    },
    {
      "epoch": 0.000323419189453125,
      "model_forward_time": 0.11520123481750488,
      "step": 52989
    },
    {
      "epoch": 0.000323419189453125,
      "step": 52989,
      "training_step_time": 0.38577771186828613
    },
    {
      "epoch": 0.00032342529296875,
      "grad_norm": 0.10678029805421829,
      "learning_rate": 3.685673944656176e-06,
      "loss": 0.038,
      "step": 52990
    },
    {
      "epoch": 0.00032342529296875,
      "model_forward_time": 0.1141510009765625,
      "step": 52990
    },
    {
      "epoch": 0.00032342529296875,
      "step": 52990,
      "training_step_time": 0.38895559310913086
    },
    {
      "epoch": 0.000323431396484375,
      "model_forward_time": 0.1152961254119873,
      "step": 52991
    },
    {
      "epoch": 0.000323431396484375,
      "step": 52991,
      "training_step_time": 0.3837153911590576
    },
    {
      "epoch": 0.0003234375,
      "model_forward_time": 0.11527490615844727,
      "step": 52992
    },
    {
      "epoch": 0.0003234375,
      "step": 52992,
      "training_step_time": 0.40282678604125977
    },
    {
      "epoch": 0.000323443603515625,
      "model_forward_time": 0.1154031753540039,
      "step": 52993
    },
    {
      "epoch": 0.000323443603515625,
      "step": 52993,
      "training_step_time": 0.5114645957946777
    },
    {
      "epoch": 0.00032344970703125,
      "model_forward_time": 0.11500406265258789,
      "step": 52994
    },
    {
      "epoch": 0.00032344970703125,
      "step": 52994,
      "training_step_time": 0.4439234733581543
    },
    {
      "epoch": 0.000323455810546875,
      "model_forward_time": 0.11510396003723145,
      "step": 52995
    },
    {
      "epoch": 0.000323455810546875,
      "step": 52995,
      "training_step_time": 0.46009373664855957
    },
    {
      "epoch": 0.0003234619140625,
      "model_forward_time": 0.1146688461303711,
      "step": 52996
    },
    {
      "epoch": 0.0003234619140625,
      "step": 52996,
      "training_step_time": 0.42633938789367676
    },
    {
      "epoch": 0.000323468017578125,
      "model_forward_time": 0.11532902717590332,
      "step": 52997
    },
    {
      "epoch": 0.000323468017578125,
      "step": 52997,
      "training_step_time": 0.3975801467895508
    },
    {
      "epoch": 0.00032347412109375,
      "model_forward_time": 0.11532926559448242,
      "step": 52998
    },
    {
      "epoch": 0.00032347412109375,
      "step": 52998,
      "training_step_time": 0.3976256847381592
    },
    {
      "epoch": 0.000323480224609375,
      "model_forward_time": 0.11636209487915039,
      "step": 52999
    },
    {
      "epoch": 0.000323480224609375,
      "step": 52999,
      "training_step_time": 0.450026273727417
    },
    {
      "epoch": 0.000323486328125,
      "grad_norm": 0.0943969190120697,
      "learning_rate": 3.675296639259912e-06,
      "loss": 0.04,
      "step": 53000
    },
    {
      "epoch": 0.000323486328125,
      "model_forward_time": 0.11343026161193848,
      "step": 53000
    },
    {
      "epoch": 0.000323486328125,
      "step": 53000,
      "training_step_time": 0.3568003177642822
    },
    {
      "epoch": 0.000323492431640625,
      "model_forward_time": 0.11242461204528809,
      "step": 53001
    },
    {
      "epoch": 0.000323492431640625,
      "step": 53001,
      "training_step_time": 0.3730659484863281
    },
    {
      "epoch": 0.00032349853515625,
      "model_forward_time": 0.11294960975646973,
      "step": 53002
    },
    {
      "epoch": 0.00032349853515625,
      "step": 53002,
      "training_step_time": 0.37393712997436523
    },
    {
      "epoch": 0.000323504638671875,
      "model_forward_time": 0.11379408836364746,
      "step": 53003
    },
    {
      "epoch": 0.000323504638671875,
      "step": 53003,
      "training_step_time": 0.38394808769226074
    },
    {
      "epoch": 0.0003235107421875,
      "model_forward_time": 0.1144561767578125,
      "step": 53004
    },
    {
      "epoch": 0.0003235107421875,
      "step": 53004,
      "training_step_time": 0.38239455223083496
    },
    {
      "epoch": 0.000323516845703125,
      "model_forward_time": 0.11410355567932129,
      "step": 53005
    },
    {
      "epoch": 0.000323516845703125,
      "step": 53005,
      "training_step_time": 0.38947200775146484
    },
    {
      "epoch": 0.00032352294921875,
      "model_forward_time": 0.11474204063415527,
      "step": 53006
    },
    {
      "epoch": 0.00032352294921875,
      "step": 53006,
      "training_step_time": 0.44603919982910156
    },
    {
      "epoch": 0.000323529052734375,
      "model_forward_time": 0.11597657203674316,
      "step": 53007
    },
    {
      "epoch": 0.000323529052734375,
      "step": 53007,
      "training_step_time": 0.4343223571777344
    },
    {
      "epoch": 0.00032353515625,
      "model_forward_time": 0.11454486846923828,
      "step": 53008
    },
    {
      "epoch": 0.00032353515625,
      "step": 53008,
      "training_step_time": 0.41542673110961914
    },
    {
      "epoch": 0.000323541259765625,
      "model_forward_time": 0.11575675010681152,
      "step": 53009
    },
    {
      "epoch": 0.000323541259765625,
      "step": 53009,
      "training_step_time": 0.3803863525390625
    },
    {
      "epoch": 0.00032354736328125,
      "grad_norm": 0.07369697093963623,
      "learning_rate": 3.664933406085402e-06,
      "loss": 0.0407,
      "step": 53010
    },
    {
      "epoch": 0.00032354736328125,
      "model_forward_time": 0.11486291885375977,
      "step": 53010
    },
    {
      "epoch": 0.00032354736328125,
      "step": 53010,
      "training_step_time": 0.48108363151550293
    },
    {
      "epoch": 0.000323553466796875,
      "model_forward_time": 0.11533904075622559,
      "step": 53011
    },
    {
      "epoch": 0.000323553466796875,
      "step": 53011,
      "training_step_time": 0.39675402641296387
    },
    {
      "epoch": 0.0003235595703125,
      "model_forward_time": 0.11553430557250977,
      "step": 53012
    },
    {
      "epoch": 0.0003235595703125,
      "step": 53012,
      "training_step_time": 0.3854348659515381
    },
    {
      "epoch": 0.000323565673828125,
      "model_forward_time": 0.11578559875488281,
      "step": 53013
    },
    {
      "epoch": 0.000323565673828125,
      "step": 53013,
      "training_step_time": 0.4127535820007324
    },
    {
      "epoch": 0.00032357177734375,
      "model_forward_time": 0.11544108390808105,
      "step": 53014
    },
    {
      "epoch": 0.00032357177734375,
      "step": 53014,
      "training_step_time": 0.4295947551727295
    },
    {
      "epoch": 0.000323577880859375,
      "model_forward_time": 0.11487627029418945,
      "step": 53015
    },
    {
      "epoch": 0.000323577880859375,
      "step": 53015,
      "training_step_time": 0.48358964920043945
    },
    {
      "epoch": 0.000323583984375,
      "model_forward_time": 0.11462974548339844,
      "step": 53016
    },
    {
      "epoch": 0.000323583984375,
      "step": 53016,
      "training_step_time": 0.39756155014038086
    },
    {
      "epoch": 0.000323590087890625,
      "model_forward_time": 0.11506795883178711,
      "step": 53017
    },
    {
      "epoch": 0.000323590087890625,
      "step": 53017,
      "training_step_time": 0.4058663845062256
    },
    {
      "epoch": 0.00032359619140625,
      "model_forward_time": 0.11552715301513672,
      "step": 53018
    },
    {
      "epoch": 0.00032359619140625,
      "step": 53018,
      "training_step_time": 0.3998243808746338
    },
    {
      "epoch": 0.000323602294921875,
      "model_forward_time": 0.11590051651000977,
      "step": 53019
    },
    {
      "epoch": 0.000323602294921875,
      "step": 53019,
      "training_step_time": 0.39660048484802246
    },
    {
      "epoch": 0.0003236083984375,
      "grad_norm": 0.07693588733673096,
      "learning_rate": 3.654584248280707e-06,
      "loss": 0.0354,
      "step": 53020
    },
    {
      "epoch": 0.0003236083984375,
      "model_forward_time": 0.11487579345703125,
      "step": 53020
    },
    {
      "epoch": 0.0003236083984375,
      "step": 53020,
      "training_step_time": 0.39809656143188477
    },
    {
      "epoch": 0.000323614501953125,
      "model_forward_time": 0.1154794692993164,
      "step": 53021
    },
    {
      "epoch": 0.000323614501953125,
      "step": 53021,
      "training_step_time": 0.4654557704925537
    },
    {
      "epoch": 0.00032362060546875,
      "model_forward_time": 0.11532831192016602,
      "step": 53022
    },
    {
      "epoch": 0.00032362060546875,
      "step": 53022,
      "training_step_time": 0.4325883388519287
    },
    {
      "epoch": 0.000323626708984375,
      "model_forward_time": 0.11630487442016602,
      "step": 53023
    },
    {
      "epoch": 0.000323626708984375,
      "step": 53023,
      "training_step_time": 0.4433119297027588
    },
    {
      "epoch": 0.0003236328125,
      "model_forward_time": 0.11604690551757812,
      "step": 53024
    },
    {
      "epoch": 0.0003236328125,
      "step": 53024,
      "training_step_time": 0.4870874881744385
    },
    {
      "epoch": 0.000323638916015625,
      "model_forward_time": 0.1148993968963623,
      "step": 53025
    },
    {
      "epoch": 0.000323638916015625,
      "step": 53025,
      "training_step_time": 0.38678407669067383
    },
    {
      "epoch": 0.00032364501953125,
      "model_forward_time": 0.11554479598999023,
      "step": 53026
    },
    {
      "epoch": 0.00032364501953125,
      "step": 53026,
      "training_step_time": 0.3976290225982666
    },
    {
      "epoch": 0.000323651123046875,
      "model_forward_time": 0.11803293228149414,
      "step": 53027
    },
    {
      "epoch": 0.000323651123046875,
      "step": 53027,
      "training_step_time": 0.403658390045166
    },
    {
      "epoch": 0.0003236572265625,
      "model_forward_time": 0.11519289016723633,
      "step": 53028
    },
    {
      "epoch": 0.0003236572265625,
      "step": 53028,
      "training_step_time": 0.4768667221069336
    },
    {
      "epoch": 0.000323663330078125,
      "model_forward_time": 0.11605143547058105,
      "step": 53029
    },
    {
      "epoch": 0.000323663330078125,
      "step": 53029,
      "training_step_time": 0.41947150230407715
    },
    {
      "epoch": 0.00032366943359375,
      "grad_norm": 0.096642404794693,
      "learning_rate": 3.64424916898965e-06,
      "loss": 0.0353,
      "step": 53030
    },
    {
      "epoch": 0.00032366943359375,
      "model_forward_time": 0.11519813537597656,
      "step": 53030
    },
    {
      "epoch": 0.00032366943359375,
      "step": 53030,
      "training_step_time": 0.38394641876220703
    },
    {
      "epoch": 0.000323675537109375,
      "model_forward_time": 0.11441874504089355,
      "step": 53031
    },
    {
      "epoch": 0.000323675537109375,
      "step": 53031,
      "training_step_time": 0.39425039291381836
    },
    {
      "epoch": 0.000323681640625,
      "model_forward_time": 0.11459064483642578,
      "step": 53032
    },
    {
      "epoch": 0.000323681640625,
      "step": 53032,
      "training_step_time": 0.39945483207702637
    },
    {
      "epoch": 0.000323687744140625,
      "model_forward_time": 0.11448907852172852,
      "step": 53033
    },
    {
      "epoch": 0.000323687744140625,
      "step": 53033,
      "training_step_time": 0.39257216453552246
    },
    {
      "epoch": 0.00032369384765625,
      "model_forward_time": 0.11500358581542969,
      "step": 53034
    },
    {
      "epoch": 0.00032369384765625,
      "step": 53034,
      "training_step_time": 0.4003162384033203
    },
    {
      "epoch": 0.000323699951171875,
      "model_forward_time": 0.11520648002624512,
      "step": 53035
    },
    {
      "epoch": 0.000323699951171875,
      "step": 53035,
      "training_step_time": 0.42752718925476074
    },
    {
      "epoch": 0.0003237060546875,
      "model_forward_time": 0.11540365219116211,
      "step": 53036
    },
    {
      "epoch": 0.0003237060546875,
      "step": 53036,
      "training_step_time": 0.41785597801208496
    },
    {
      "epoch": 0.000323712158203125,
      "model_forward_time": 0.11518263816833496,
      "step": 53037
    },
    {
      "epoch": 0.000323712158203125,
      "step": 53037,
      "training_step_time": 0.5361685752868652
    },
    {
      "epoch": 0.00032371826171875,
      "model_forward_time": 0.11517477035522461,
      "step": 53038
    },
    {
      "epoch": 0.00032371826171875,
      "step": 53038,
      "training_step_time": 0.3917982578277588
    },
    {
      "epoch": 0.000323724365234375,
      "model_forward_time": 0.11458563804626465,
      "step": 53039
    },
    {
      "epoch": 0.000323724365234375,
      "step": 53039,
      "training_step_time": 0.4634397029876709
    },
    {
      "epoch": 0.00032373046875,
      "grad_norm": 0.09722968190908432,
      "learning_rate": 3.6339281713517303e-06,
      "loss": 0.0315,
      "step": 53040
    },
    {
      "epoch": 0.00032373046875,
      "model_forward_time": 0.11444830894470215,
      "step": 53040
    },
    {
      "epoch": 0.00032373046875,
      "step": 53040,
      "training_step_time": 0.39859485626220703
    },
    {
      "epoch": 0.000323736572265625,
      "model_forward_time": 0.11598587036132812,
      "step": 53041
    },
    {
      "epoch": 0.000323736572265625,
      "step": 53041,
      "training_step_time": 0.3679051399230957
    },
    {
      "epoch": 0.00032374267578125,
      "model_forward_time": 0.11455988883972168,
      "step": 53042
    },
    {
      "epoch": 0.00032374267578125,
      "step": 53042,
      "training_step_time": 0.4305126667022705
    },
    {
      "epoch": 0.000323748779296875,
      "model_forward_time": 0.1152963638305664,
      "step": 53043
    },
    {
      "epoch": 0.000323748779296875,
      "step": 53043,
      "training_step_time": 0.40074658393859863
    },
    {
      "epoch": 0.0003237548828125,
      "model_forward_time": 0.11643409729003906,
      "step": 53044
    },
    {
      "epoch": 0.0003237548828125,
      "step": 53044,
      "training_step_time": 0.3964874744415283
    },
    {
      "epoch": 0.000323760986328125,
      "model_forward_time": 0.11510372161865234,
      "step": 53045
    },
    {
      "epoch": 0.000323760986328125,
      "step": 53045,
      "training_step_time": 0.38866758346557617
    },
    {
      "epoch": 0.00032376708984375,
      "model_forward_time": 0.11474990844726562,
      "step": 53046
    },
    {
      "epoch": 0.00032376708984375,
      "step": 53046,
      "training_step_time": 0.393538236618042
    },
    {
      "epoch": 0.000323773193359375,
      "model_forward_time": 0.11535239219665527,
      "step": 53047
    },
    {
      "epoch": 0.000323773193359375,
      "step": 53047,
      "training_step_time": 0.3973989486694336
    },
    {
      "epoch": 0.000323779296875,
      "model_forward_time": 0.11610603332519531,
      "step": 53048
    },
    {
      "epoch": 0.000323779296875,
      "step": 53048,
      "training_step_time": 0.40958094596862793
    },
    {
      "epoch": 0.000323785400390625,
      "model_forward_time": 0.1153879165649414,
      "step": 53049
    },
    {
      "epoch": 0.000323785400390625,
      "step": 53049,
      "training_step_time": 0.4496152400970459
    },
    {
      "epoch": 0.00032379150390625,
      "grad_norm": 0.07246877253055573,
      "learning_rate": 3.6236212585022187e-06,
      "loss": 0.0392,
      "step": 53050
    },
    {
      "epoch": 0.00032379150390625,
      "model_forward_time": 0.11535906791687012,
      "step": 53050
    },
    {
      "epoch": 0.00032379150390625,
      "step": 53050,
      "training_step_time": 0.4526944160461426
    },
    {
      "epoch": 0.000323797607421875,
      "model_forward_time": 0.11533832550048828,
      "step": 53051
    },
    {
      "epoch": 0.000323797607421875,
      "step": 53051,
      "training_step_time": 0.3893287181854248
    },
    {
      "epoch": 0.0003238037109375,
      "model_forward_time": 0.11437368392944336,
      "step": 53052
    },
    {
      "epoch": 0.0003238037109375,
      "step": 53052,
      "training_step_time": 0.46632933616638184
    },
    {
      "epoch": 0.000323809814453125,
      "model_forward_time": 0.11545157432556152,
      "step": 53053
    },
    {
      "epoch": 0.000323809814453125,
      "step": 53053,
      "training_step_time": 0.44135427474975586
    },
    {
      "epoch": 0.00032381591796875,
      "model_forward_time": 0.11536049842834473,
      "step": 53054
    },
    {
      "epoch": 0.00032381591796875,
      "step": 53054,
      "training_step_time": 0.43828749656677246
    },
    {
      "epoch": 0.000323822021484375,
      "model_forward_time": 0.11508059501647949,
      "step": 53055
    },
    {
      "epoch": 0.000323822021484375,
      "step": 53055,
      "training_step_time": 0.39796972274780273
    },
    {
      "epoch": 0.000323828125,
      "model_forward_time": 0.11523628234863281,
      "step": 53056
    },
    {
      "epoch": 0.000323828125,
      "step": 53056,
      "training_step_time": 0.4571239948272705
    },
    {
      "epoch": 0.000323834228515625,
      "model_forward_time": 0.11459183692932129,
      "step": 53057
    },
    {
      "epoch": 0.000323834228515625,
      "step": 53057,
      "training_step_time": 0.4590752124786377
    },
    {
      "epoch": 0.00032384033203125,
      "model_forward_time": 0.1142432689666748,
      "step": 53058
    },
    {
      "epoch": 0.00032384033203125,
      "step": 53058,
      "training_step_time": 0.41855645179748535
    },
    {
      "epoch": 0.000323846435546875,
      "model_forward_time": 0.11465167999267578,
      "step": 53059
    },
    {
      "epoch": 0.000323846435546875,
      "step": 53059,
      "training_step_time": 0.3928682804107666
    },
    {
      "epoch": 0.0003238525390625,
      "grad_norm": 0.07279528677463531,
      "learning_rate": 3.6133284335720605e-06,
      "loss": 0.0349,
      "step": 53060
    },
    {
      "epoch": 0.0003238525390625,
      "model_forward_time": 0.11510801315307617,
      "step": 53060
    },
    {
      "epoch": 0.0003238525390625,
      "step": 53060,
      "training_step_time": 0.39077305793762207
    },
    {
      "epoch": 0.000323858642578125,
      "model_forward_time": 0.11437058448791504,
      "step": 53061
    },
    {
      "epoch": 0.000323858642578125,
      "step": 53061,
      "training_step_time": 0.395627498626709
    },
    {
      "epoch": 0.00032386474609375,
      "model_forward_time": 0.11494708061218262,
      "step": 53062
    },
    {
      "epoch": 0.00032386474609375,
      "step": 53062,
      "training_step_time": 0.40394115447998047
    },
    {
      "epoch": 0.000323870849609375,
      "model_forward_time": 0.11501908302307129,
      "step": 53063
    },
    {
      "epoch": 0.000323870849609375,
      "step": 53063,
      "training_step_time": 0.3920724391937256
    },
    {
      "epoch": 0.000323876953125,
      "model_forward_time": 0.11562538146972656,
      "step": 53064
    },
    {
      "epoch": 0.000323876953125,
      "step": 53064,
      "training_step_time": 0.3976776599884033
    },
    {
      "epoch": 0.000323883056640625,
      "model_forward_time": 0.11566901206970215,
      "step": 53065
    },
    {
      "epoch": 0.000323883056640625,
      "step": 53065,
      "training_step_time": 0.42052745819091797
    },
    {
      "epoch": 0.00032388916015625,
      "model_forward_time": 0.11570525169372559,
      "step": 53066
    },
    {
      "epoch": 0.00032388916015625,
      "step": 53066,
      "training_step_time": 0.39029717445373535
    },
    {
      "epoch": 0.000323895263671875,
      "model_forward_time": 0.11559820175170898,
      "step": 53067
    },
    {
      "epoch": 0.000323895263671875,
      "step": 53067,
      "training_step_time": 0.42499566078186035
    },
    {
      "epoch": 0.0003239013671875,
      "model_forward_time": 0.11458969116210938,
      "step": 53068
    },
    {
      "epoch": 0.0003239013671875,
      "step": 53068,
      "training_step_time": 0.4814035892486572
    },
    {
      "epoch": 0.000323907470703125,
      "model_forward_time": 0.11588573455810547,
      "step": 53069
    },
    {
      "epoch": 0.000323907470703125,
      "step": 53069,
      "training_step_time": 0.45751285552978516
    },
    {
      "epoch": 0.00032391357421875,
      "grad_norm": 0.12807802855968475,
      "learning_rate": 3.60304969968796e-06,
      "loss": 0.0291,
      "step": 53070
    },
    {
      "epoch": 0.00032391357421875,
      "model_forward_time": 0.11488723754882812,
      "step": 53070
    },
    {
      "epoch": 0.00032391357421875,
      "step": 53070,
      "training_step_time": 0.39691925048828125
    },
    {
      "epoch": 0.000323919677734375,
      "model_forward_time": 0.11449265480041504,
      "step": 53071
    },
    {
      "epoch": 0.000323919677734375,
      "step": 53071,
      "training_step_time": 0.47139883041381836
    },
    {
      "epoch": 0.00032392578125,
      "model_forward_time": 0.11478137969970703,
      "step": 53072
    },
    {
      "epoch": 0.00032392578125,
      "step": 53072,
      "training_step_time": 0.38985204696655273
    },
    {
      "epoch": 0.000323931884765625,
      "model_forward_time": 0.11493039131164551,
      "step": 53073
    },
    {
      "epoch": 0.000323931884765625,
      "step": 53073,
      "training_step_time": 0.40991997718811035
    },
    {
      "epoch": 0.00032393798828125,
      "model_forward_time": 0.11515641212463379,
      "step": 53074
    },
    {
      "epoch": 0.00032393798828125,
      "step": 53074,
      "training_step_time": 0.3915214538574219
    },
    {
      "epoch": 0.000323944091796875,
      "model_forward_time": 0.11503767967224121,
      "step": 53075
    },
    {
      "epoch": 0.000323944091796875,
      "step": 53075,
      "training_step_time": 0.38063669204711914
    },
    {
      "epoch": 0.0003239501953125,
      "model_forward_time": 0.11481738090515137,
      "step": 53076
    },
    {
      "epoch": 0.0003239501953125,
      "step": 53076,
      "training_step_time": 0.39600443840026855
    },
    {
      "epoch": 0.000323956298828125,
      "model_forward_time": 0.1155557632446289,
      "step": 53077
    },
    {
      "epoch": 0.000323956298828125,
      "step": 53077,
      "training_step_time": 0.4199540615081787
    },
    {
      "epoch": 0.00032396240234375,
      "model_forward_time": 0.11535501480102539,
      "step": 53078
    },
    {
      "epoch": 0.00032396240234375,
      "step": 53078,
      "training_step_time": 0.4015674591064453
    },
    {
      "epoch": 0.000323968505859375,
      "model_forward_time": 0.1145169734954834,
      "step": 53079
    },
    {
      "epoch": 0.000323968505859375,
      "step": 53079,
      "training_step_time": 0.43830323219299316
    },
    {
      "epoch": 0.000323974609375,
      "grad_norm": 0.0797940343618393,
      "learning_rate": 3.59278505997232e-06,
      "loss": 0.0389,
      "step": 53080
    },
    {
      "epoch": 0.000323974609375,
      "model_forward_time": 0.11469578742980957,
      "step": 53080
    },
    {
      "epoch": 0.000323974609375,
      "step": 53080,
      "training_step_time": 0.39527320861816406
    },
    {
      "epoch": 0.000323980712890625,
      "model_forward_time": 0.11580395698547363,
      "step": 53081
    },
    {
      "epoch": 0.000323980712890625,
      "step": 53081,
      "training_step_time": 0.4313168525695801
    },
    {
      "epoch": 0.00032398681640625,
      "model_forward_time": 0.11538934707641602,
      "step": 53082
    },
    {
      "epoch": 0.00032398681640625,
      "step": 53082,
      "training_step_time": 0.4451737403869629
    },
    {
      "epoch": 0.000323992919921875,
      "model_forward_time": 0.11463451385498047,
      "step": 53083
    },
    {
      "epoch": 0.000323992919921875,
      "step": 53083,
      "training_step_time": 0.48479652404785156
    },
    {
      "epoch": 0.0003239990234375,
      "model_forward_time": 0.11544346809387207,
      "step": 53084
    },
    {
      "epoch": 0.0003239990234375,
      "step": 53084,
      "training_step_time": 0.3880746364593506
    },
    {
      "epoch": 0.000324005126953125,
      "model_forward_time": 0.11546492576599121,
      "step": 53085
    },
    {
      "epoch": 0.000324005126953125,
      "step": 53085,
      "training_step_time": 0.38756346702575684
    },
    {
      "epoch": 0.00032401123046875,
      "model_forward_time": 0.11475706100463867,
      "step": 53086
    },
    {
      "epoch": 0.00032401123046875,
      "step": 53086,
      "training_step_time": 0.4314250946044922
    },
    {
      "epoch": 0.000324017333984375,
      "model_forward_time": 0.11636829376220703,
      "step": 53087
    },
    {
      "epoch": 0.000324017333984375,
      "step": 53087,
      "training_step_time": 0.4972875118255615
    },
    {
      "epoch": 0.0003240234375,
      "model_forward_time": 0.1157076358795166,
      "step": 53088
    },
    {
      "epoch": 0.0003240234375,
      "step": 53088,
      "training_step_time": 0.4229249954223633
    },
    {
      "epoch": 0.000324029541015625,
      "model_forward_time": 0.11483573913574219,
      "step": 53089
    },
    {
      "epoch": 0.000324029541015625,
      "step": 53089,
      "training_step_time": 0.3989682197570801
    },
    {
      "epoch": 0.00032403564453125,
      "grad_norm": 0.10058867186307907,
      "learning_rate": 3.582534517543268e-06,
      "loss": 0.0327,
      "step": 53090
    },
    {
      "epoch": 0.00032403564453125,
      "model_forward_time": 0.1145787239074707,
      "step": 53090
    },
    {
      "epoch": 0.00032403564453125,
      "step": 53090,
      "training_step_time": 0.4024031162261963
    },
    {
      "epoch": 0.000324041748046875,
      "model_forward_time": 0.11445856094360352,
      "step": 53091
    },
    {
      "epoch": 0.000324041748046875,
      "step": 53091,
      "training_step_time": 0.413330078125
    },
    {
      "epoch": 0.0003240478515625,
      "model_forward_time": 0.11485624313354492,
      "step": 53092
    },
    {
      "epoch": 0.0003240478515625,
      "step": 53092,
      "training_step_time": 0.392345666885376
    },
    {
      "epoch": 0.000324053955078125,
      "model_forward_time": 0.11565661430358887,
      "step": 53093
    },
    {
      "epoch": 0.000324053955078125,
      "step": 53093,
      "training_step_time": 0.38855838775634766
    },
    {
      "epoch": 0.00032406005859375,
      "model_forward_time": 0.11491894721984863,
      "step": 53094
    },
    {
      "epoch": 0.00032406005859375,
      "step": 53094,
      "training_step_time": 0.45868587493896484
    },
    {
      "epoch": 0.000324066162109375,
      "model_forward_time": 0.11500430107116699,
      "step": 53095
    },
    {
      "epoch": 0.000324066162109375,
      "step": 53095,
      "training_step_time": 0.49195098876953125
    },
    {
      "epoch": 0.000324072265625,
      "model_forward_time": 0.1147301197052002,
      "step": 53096
    },
    {
      "epoch": 0.000324072265625,
      "step": 53096,
      "training_step_time": 0.4434335231781006
    },
    {
      "epoch": 0.000324078369140625,
      "model_forward_time": 0.11455798149108887,
      "step": 53097
    },
    {
      "epoch": 0.000324078369140625,
      "step": 53097,
      "training_step_time": 0.4819655418395996
    },
    {
      "epoch": 0.00032408447265625,
      "model_forward_time": 0.1140592098236084,
      "step": 53098
    },
    {
      "epoch": 0.00032408447265625,
      "step": 53098,
      "training_step_time": 0.394608736038208
    },
    {
      "epoch": 0.000324090576171875,
      "model_forward_time": 0.11415433883666992,
      "step": 53099
    },
    {
      "epoch": 0.000324090576171875,
      "step": 53099,
      "training_step_time": 0.4003727436065674
    },
    {
      "epoch": 0.0003240966796875,
      "grad_norm": 0.13500875234603882,
      "learning_rate": 3.5722980755146517e-06,
      "loss": 0.043,
      "step": 53100
    },
    {
      "epoch": 0.0003240966796875,
      "model_forward_time": 0.11583280563354492,
      "step": 53100
    },
    {
      "epoch": 0.0003240966796875,
      "step": 53100,
      "training_step_time": 0.36568236351013184
    },
    {
      "epoch": 0.000324102783203125,
      "model_forward_time": 0.11537790298461914,
      "step": 53101
    },
    {
      "epoch": 0.000324102783203125,
      "step": 53101,
      "training_step_time": 0.4027700424194336
    },
    {
      "epoch": 0.00032410888671875,
      "model_forward_time": 0.11501884460449219,
      "step": 53102
    },
    {
      "epoch": 0.00032410888671875,
      "step": 53102,
      "training_step_time": 0.49852633476257324
    },
    {
      "epoch": 0.000324114990234375,
      "model_forward_time": 0.11487936973571777,
      "step": 53103
    },
    {
      "epoch": 0.000324114990234375,
      "step": 53103,
      "training_step_time": 0.3835580348968506
    },
    {
      "epoch": 0.00032412109375,
      "model_forward_time": 0.11530423164367676,
      "step": 53104
    },
    {
      "epoch": 0.00032412109375,
      "step": 53104,
      "training_step_time": 0.3992924690246582
    },
    {
      "epoch": 0.000324127197265625,
      "model_forward_time": 0.11489081382751465,
      "step": 53105
    },
    {
      "epoch": 0.000324127197265625,
      "step": 53105,
      "training_step_time": 0.4030141830444336
    },
    {
      "epoch": 0.00032413330078125,
      "model_forward_time": 0.11597108840942383,
      "step": 53106
    },
    {
      "epoch": 0.00032413330078125,
      "step": 53106,
      "training_step_time": 0.39718103408813477
    },
    {
      "epoch": 0.000324139404296875,
      "model_forward_time": 0.11527156829833984,
      "step": 53107
    },
    {
      "epoch": 0.000324139404296875,
      "step": 53107,
      "training_step_time": 0.3912007808685303
    },
    {
      "epoch": 0.0003241455078125,
      "model_forward_time": 0.1187441349029541,
      "step": 53108
    },
    {
      "epoch": 0.0003241455078125,
      "step": 53108,
      "training_step_time": 0.4412829875946045
    },
    {
      "epoch": 0.000324151611328125,
      "model_forward_time": 0.1154637336730957,
      "step": 53109
    },
    {
      "epoch": 0.000324151611328125,
      "step": 53109,
      "training_step_time": 0.4349067211151123
    },
    {
      "epoch": 0.00032415771484375,
      "grad_norm": 0.07547765225172043,
      "learning_rate": 3.5620757369960123e-06,
      "loss": 0.0361,
      "step": 53110
    },
    {
      "epoch": 0.00032415771484375,
      "model_forward_time": 0.11567926406860352,
      "step": 53110
    },
    {
      "epoch": 0.00032415771484375,
      "step": 53110,
      "training_step_time": 0.4266324043273926
    },
    {
      "epoch": 0.000324163818359375,
      "model_forward_time": 0.11476969718933105,
      "step": 53111
    },
    {
      "epoch": 0.000324163818359375,
      "step": 53111,
      "training_step_time": 0.40162086486816406
    },
    {
      "epoch": 0.000324169921875,
      "model_forward_time": 0.11526703834533691,
      "step": 53112
    },
    {
      "epoch": 0.000324169921875,
      "step": 53112,
      "training_step_time": 0.5038213729858398
    },
    {
      "epoch": 0.000324176025390625,
      "model_forward_time": 0.1149141788482666,
      "step": 53113
    },
    {
      "epoch": 0.000324176025390625,
      "step": 53113,
      "training_step_time": 0.3875133991241455
    },
    {
      "epoch": 0.00032418212890625,
      "model_forward_time": 0.11441540718078613,
      "step": 53114
    },
    {
      "epoch": 0.00032418212890625,
      "step": 53114,
      "training_step_time": 0.3889155387878418
    },
    {
      "epoch": 0.000324188232421875,
      "model_forward_time": 0.11500406265258789,
      "step": 53115
    },
    {
      "epoch": 0.000324188232421875,
      "step": 53115,
      "training_step_time": 0.43373799324035645
    },
    {
      "epoch": 0.0003241943359375,
      "model_forward_time": 0.11518073081970215,
      "step": 53116
    },
    {
      "epoch": 0.0003241943359375,
      "step": 53116,
      "training_step_time": 0.5129103660583496
    },
    {
      "epoch": 0.000324200439453125,
      "model_forward_time": 0.11427497863769531,
      "step": 53117
    },
    {
      "epoch": 0.000324200439453125,
      "step": 53117,
      "training_step_time": 0.4428112506866455
    },
    {
      "epoch": 0.00032420654296875,
      "model_forward_time": 0.1154787540435791,
      "step": 53118
    },
    {
      "epoch": 0.00032420654296875,
      "step": 53118,
      "training_step_time": 0.4116806983947754
    },
    {
      "epoch": 0.000324212646484375,
      "model_forward_time": 0.11437368392944336,
      "step": 53119
    },
    {
      "epoch": 0.000324212646484375,
      "step": 53119,
      "training_step_time": 0.40901756286621094
    },
    {
      "epoch": 0.00032421875,
      "grad_norm": 0.10898613184690475,
      "learning_rate": 3.5518675050926544e-06,
      "loss": 0.0358,
      "step": 53120
    },
    {
      "epoch": 0.00032421875,
      "model_forward_time": 0.11692595481872559,
      "step": 53120
    },
    {
      "epoch": 0.00032421875,
      "step": 53120,
      "training_step_time": 0.3889737129211426
    },
    {
      "epoch": 0.000324224853515625,
      "model_forward_time": 0.11444282531738281,
      "step": 53121
    },
    {
      "epoch": 0.000324224853515625,
      "step": 53121,
      "training_step_time": 0.38785862922668457
    },
    {
      "epoch": 0.00032423095703125,
      "model_forward_time": 0.11533093452453613,
      "step": 53122
    },
    {
      "epoch": 0.00032423095703125,
      "step": 53122,
      "training_step_time": 0.4004063606262207
    },
    {
      "epoch": 0.000324237060546875,
      "model_forward_time": 0.11489105224609375,
      "step": 53123
    },
    {
      "epoch": 0.000324237060546875,
      "step": 53123,
      "training_step_time": 0.4849081039428711
    },
    {
      "epoch": 0.0003242431640625,
      "model_forward_time": 0.1146693229675293,
      "step": 53124
    },
    {
      "epoch": 0.0003242431640625,
      "step": 53124,
      "training_step_time": 0.4471926689147949
    },
    {
      "epoch": 0.000324249267578125,
      "model_forward_time": 0.11538219451904297,
      "step": 53125
    },
    {
      "epoch": 0.000324249267578125,
      "step": 53125,
      "training_step_time": 0.4038658142089844
    },
    {
      "epoch": 0.00032425537109375,
      "model_forward_time": 0.11478161811828613,
      "step": 53126
    },
    {
      "epoch": 0.00032425537109375,
      "step": 53126,
      "training_step_time": 0.48590779304504395
    },
    {
      "epoch": 0.000324261474609375,
      "model_forward_time": 0.11491155624389648,
      "step": 53127
    },
    {
      "epoch": 0.000324261474609375,
      "step": 53127,
      "training_step_time": 0.3772282600402832
    },
    {
      "epoch": 0.000324267578125,
      "model_forward_time": 0.11542177200317383,
      "step": 53128
    },
    {
      "epoch": 0.000324267578125,
      "step": 53128,
      "training_step_time": 0.3830115795135498
    },
    {
      "epoch": 0.000324273681640625,
      "model_forward_time": 0.11489033699035645,
      "step": 53129
    },
    {
      "epoch": 0.000324273681640625,
      "step": 53129,
      "training_step_time": 0.3675723075866699
    },
    {
      "epoch": 0.00032427978515625,
      "grad_norm": 0.11118007451295853,
      "learning_rate": 3.541673382905558e-06,
      "loss": 0.0419,
      "step": 53130
    },
    {
      "epoch": 0.00032427978515625,
      "model_forward_time": 0.11479306221008301,
      "step": 53130
    },
    {
      "epoch": 0.00032427978515625,
      "step": 53130,
      "training_step_time": 0.4445793628692627
    },
    {
      "epoch": 0.000324285888671875,
      "model_forward_time": 0.11661219596862793,
      "step": 53131
    },
    {
      "epoch": 0.000324285888671875,
      "step": 53131,
      "training_step_time": 0.4330132007598877
    },
    {
      "epoch": 0.0003242919921875,
      "model_forward_time": 0.11472201347351074,
      "step": 53132
    },
    {
      "epoch": 0.0003242919921875,
      "step": 53132,
      "training_step_time": 0.4013223648071289
    },
    {
      "epoch": 0.000324298095703125,
      "model_forward_time": 0.11497163772583008,
      "step": 53133
    },
    {
      "epoch": 0.000324298095703125,
      "step": 53133,
      "training_step_time": 0.37788915634155273
    },
    {
      "epoch": 0.00032430419921875,
      "model_forward_time": 0.11486268043518066,
      "step": 53134
    },
    {
      "epoch": 0.00032430419921875,
      "step": 53134,
      "training_step_time": 0.3913092613220215
    },
    {
      "epoch": 0.000324310302734375,
      "model_forward_time": 0.11507463455200195,
      "step": 53135
    },
    {
      "epoch": 0.000324310302734375,
      "step": 53135,
      "training_step_time": 0.40163683891296387
    },
    {
      "epoch": 0.00032431640625,
      "model_forward_time": 0.11529159545898438,
      "step": 53136
    },
    {
      "epoch": 0.00032431640625,
      "step": 53136,
      "training_step_time": 0.3996419906616211
    },
    {
      "epoch": 0.000324322509765625,
      "model_forward_time": 0.11512422561645508,
      "step": 53137
    },
    {
      "epoch": 0.000324322509765625,
      "step": 53137,
      "training_step_time": 0.46436643600463867
    },
    {
      "epoch": 0.00032432861328125,
      "model_forward_time": 0.11632633209228516,
      "step": 53138
    },
    {
      "epoch": 0.00032432861328125,
      "step": 53138,
      "training_step_time": 0.39525938034057617
    },
    {
      "epoch": 0.000324334716796875,
      "model_forward_time": 0.11548638343811035,
      "step": 53139
    },
    {
      "epoch": 0.000324334716796875,
      "step": 53139,
      "training_step_time": 0.45043230056762695
    },
    {
      "epoch": 0.0003243408203125,
      "grad_norm": 0.09202496707439423,
      "learning_rate": 3.531493373531419e-06,
      "loss": 0.0423,
      "step": 53140
    },
    {
      "epoch": 0.0003243408203125,
      "model_forward_time": 0.11509060859680176,
      "step": 53140
    },
    {
      "epoch": 0.0003243408203125,
      "step": 53140,
      "training_step_time": 0.4390709400177002
    },
    {
      "epoch": 0.000324346923828125,
      "model_forward_time": 0.11493206024169922,
      "step": 53141
    },
    {
      "epoch": 0.000324346923828125,
      "step": 53141,
      "training_step_time": 0.4902775287628174
    },
    {
      "epoch": 0.00032435302734375,
      "model_forward_time": 0.11474490165710449,
      "step": 53142
    },
    {
      "epoch": 0.00032435302734375,
      "step": 53142,
      "training_step_time": 0.3841829299926758
    },
    {
      "epoch": 0.000324359130859375,
      "model_forward_time": 0.11527442932128906,
      "step": 53143
    },
    {
      "epoch": 0.000324359130859375,
      "step": 53143,
      "training_step_time": 0.38914918899536133
    },
    {
      "epoch": 0.000324365234375,
      "model_forward_time": 0.11497330665588379,
      "step": 53144
    },
    {
      "epoch": 0.000324365234375,
      "step": 53144,
      "training_step_time": 0.4290947914123535
    },
    {
      "epoch": 0.000324371337890625,
      "model_forward_time": 0.11517572402954102,
      "step": 53145
    },
    {
      "epoch": 0.000324371337890625,
      "step": 53145,
      "training_step_time": 0.4153308868408203
    },
    {
      "epoch": 0.00032437744140625,
      "model_forward_time": 0.11588048934936523,
      "step": 53146
    },
    {
      "epoch": 0.00032437744140625,
      "step": 53146,
      "training_step_time": 0.44016456604003906
    },
    {
      "epoch": 0.000324383544921875,
      "model_forward_time": 0.11481547355651855,
      "step": 53147
    },
    {
      "epoch": 0.000324383544921875,
      "step": 53147,
      "training_step_time": 0.39235472679138184
    },
    {
      "epoch": 0.0003243896484375,
      "model_forward_time": 0.11519265174865723,
      "step": 53148
    },
    {
      "epoch": 0.0003243896484375,
      "step": 53148,
      "training_step_time": 0.40256285667419434
    },
    {
      "epoch": 0.000324395751953125,
      "model_forward_time": 0.11514735221862793,
      "step": 53149
    },
    {
      "epoch": 0.000324395751953125,
      "step": 53149,
      "training_step_time": 0.3912181854248047
    },
    {
      "epoch": 0.00032440185546875,
      "grad_norm": 0.09135041385889053,
      "learning_rate": 3.5213274800626692e-06,
      "loss": 0.0403,
      "step": 53150
    },
    {
      "epoch": 0.00032440185546875,
      "model_forward_time": 0.11513161659240723,
      "step": 53150
    },
    {
      "epoch": 0.00032440185546875,
      "step": 53150,
      "training_step_time": 0.39903759956359863
    },
    {
      "epoch": 0.000324407958984375,
      "model_forward_time": 0.1151435375213623,
      "step": 53151
    },
    {
      "epoch": 0.000324407958984375,
      "step": 53151,
      "training_step_time": 0.4011564254760742
    },
    {
      "epoch": 0.0003244140625,
      "model_forward_time": 0.11537981033325195,
      "step": 53152
    },
    {
      "epoch": 0.0003244140625,
      "step": 53152,
      "training_step_time": 0.4293205738067627
    },
    {
      "epoch": 0.000324420166015625,
      "model_forward_time": 0.11559081077575684,
      "step": 53153
    },
    {
      "epoch": 0.000324420166015625,
      "step": 53153,
      "training_step_time": 0.4613680839538574
    },
    {
      "epoch": 0.00032442626953125,
      "model_forward_time": 0.11490797996520996,
      "step": 53154
    },
    {
      "epoch": 0.00032442626953125,
      "step": 53154,
      "training_step_time": 0.44597721099853516
    },
    {
      "epoch": 0.000324432373046875,
      "model_forward_time": 0.11559510231018066,
      "step": 53155
    },
    {
      "epoch": 0.000324432373046875,
      "step": 53155,
      "training_step_time": 0.417691707611084
    },
    {
      "epoch": 0.0003244384765625,
      "model_forward_time": 0.11576318740844727,
      "step": 53156
    },
    {
      "epoch": 0.0003244384765625,
      "step": 53156,
      "training_step_time": 0.43385887145996094
    },
    {
      "epoch": 0.000324444580078125,
      "model_forward_time": 0.11521196365356445,
      "step": 53157
    },
    {
      "epoch": 0.000324444580078125,
      "step": 53157,
      "training_step_time": 0.3961973190307617
    },
    {
      "epoch": 0.00032445068359375,
      "model_forward_time": 0.11491799354553223,
      "step": 53158
    },
    {
      "epoch": 0.00032445068359375,
      "step": 53158,
      "training_step_time": 0.36533021926879883
    },
    {
      "epoch": 0.000324456787109375,
      "model_forward_time": 0.11495375633239746,
      "step": 53159
    },
    {
      "epoch": 0.000324456787109375,
      "step": 53159,
      "training_step_time": 0.45627498626708984
    },
    {
      "epoch": 0.000324462890625,
      "grad_norm": 0.08931808173656464,
      "learning_rate": 3.511175705587433e-06,
      "loss": 0.0378,
      "step": 53160
    },
    {
      "epoch": 0.000324462890625,
      "model_forward_time": 0.11492776870727539,
      "step": 53160
    },
    {
      "epoch": 0.000324462890625,
      "step": 53160,
      "training_step_time": 0.4038534164428711
    },
    {
      "epoch": 0.000324468994140625,
      "model_forward_time": 0.11522150039672852,
      "step": 53161
    },
    {
      "epoch": 0.000324468994140625,
      "step": 53161,
      "training_step_time": 0.39054369926452637
    },
    {
      "epoch": 0.00032447509765625,
      "model_forward_time": 0.11474442481994629,
      "step": 53162
    },
    {
      "epoch": 0.00032447509765625,
      "step": 53162,
      "training_step_time": 0.392988920211792
    },
    {
      "epoch": 0.000324481201171875,
      "model_forward_time": 0.11513233184814453,
      "step": 53163
    },
    {
      "epoch": 0.000324481201171875,
      "step": 53163,
      "training_step_time": 0.3943450450897217
    },
    {
      "epoch": 0.0003244873046875,
      "model_forward_time": 0.11526703834533691,
      "step": 53164
    },
    {
      "epoch": 0.0003244873046875,
      "step": 53164,
      "training_step_time": 0.3933525085449219
    },
    {
      "epoch": 0.000324493408203125,
      "model_forward_time": 0.11523270606994629,
      "step": 53165
    },
    {
      "epoch": 0.000324493408203125,
      "step": 53165,
      "training_step_time": 0.40090060234069824
    },
    {
      "epoch": 0.00032449951171875,
      "model_forward_time": 0.11488890647888184,
      "step": 53166
    },
    {
      "epoch": 0.00032449951171875,
      "step": 53166,
      "training_step_time": 0.41893720626831055
    },
    {
      "epoch": 0.000324505615234375,
      "model_forward_time": 0.11519455909729004,
      "step": 53167
    },
    {
      "epoch": 0.000324505615234375,
      "step": 53167,
      "training_step_time": 0.42086172103881836
    },
    {
      "epoch": 0.00032451171875,
      "model_forward_time": 0.1152806282043457,
      "step": 53168
    },
    {
      "epoch": 0.00032451171875,
      "step": 53168,
      "training_step_time": 0.3948547840118408
    },
    {
      "epoch": 0.000324517822265625,
      "model_forward_time": 0.11462688446044922,
      "step": 53169
    },
    {
      "epoch": 0.000324517822265625,
      "step": 53169,
      "training_step_time": 0.45999670028686523
    },
    {
      "epoch": 0.00032452392578125,
      "grad_norm": 0.08750428259372711,
      "learning_rate": 3.5010380531895426e-06,
      "loss": 0.0354,
      "step": 53170
    },
    {
      "epoch": 0.00032452392578125,
      "model_forward_time": 0.11521577835083008,
      "step": 53170
    },
    {
      "epoch": 0.00032452392578125,
      "step": 53170,
      "training_step_time": 0.5050725936889648
    },
    {
      "epoch": 0.000324530029296875,
      "model_forward_time": 0.11550378799438477,
      "step": 53171
    },
    {
      "epoch": 0.000324530029296875,
      "step": 53171,
      "training_step_time": 0.38556361198425293
    },
    {
      "epoch": 0.0003245361328125,
      "model_forward_time": 0.11564970016479492,
      "step": 53172
    },
    {
      "epoch": 0.0003245361328125,
      "step": 53172,
      "training_step_time": 0.3879358768463135
    },
    {
      "epoch": 0.000324542236328125,
      "model_forward_time": 0.11525511741638184,
      "step": 53173
    },
    {
      "epoch": 0.000324542236328125,
      "step": 53173,
      "training_step_time": 0.4322395324707031
    },
    {
      "epoch": 0.00032454833984375,
      "model_forward_time": 0.1157679557800293,
      "step": 53174
    },
    {
      "epoch": 0.00032454833984375,
      "step": 53174,
      "training_step_time": 0.4089071750640869
    },
    {
      "epoch": 0.000324554443359375,
      "model_forward_time": 0.11538124084472656,
      "step": 53175
    },
    {
      "epoch": 0.000324554443359375,
      "step": 53175,
      "training_step_time": 0.4033927917480469
    },
    {
      "epoch": 0.000324560546875,
      "model_forward_time": 0.11476492881774902,
      "step": 53176
    },
    {
      "epoch": 0.000324560546875,
      "step": 53176,
      "training_step_time": 0.39727163314819336
    },
    {
      "epoch": 0.000324566650390625,
      "model_forward_time": 0.1159355640411377,
      "step": 53177
    },
    {
      "epoch": 0.000324566650390625,
      "step": 53177,
      "training_step_time": 0.3896486759185791
    },
    {
      "epoch": 0.00032457275390625,
      "model_forward_time": 0.11510396003723145,
      "step": 53178
    },
    {
      "epoch": 0.00032457275390625,
      "step": 53178,
      "training_step_time": 0.3868286609649658
    },
    {
      "epoch": 0.000324578857421875,
      "model_forward_time": 0.11501049995422363,
      "step": 53179
    },
    {
      "epoch": 0.000324578857421875,
      "step": 53179,
      "training_step_time": 0.3926975727081299
    },
    {
      "epoch": 0.0003245849609375,
      "grad_norm": 0.0785825178027153,
      "learning_rate": 3.4909145259485744e-06,
      "loss": 0.0315,
      "step": 53180
    },
    {
      "epoch": 0.0003245849609375,
      "model_forward_time": 0.11551332473754883,
      "step": 53180
    },
    {
      "epoch": 0.0003245849609375,
      "step": 53180,
      "training_step_time": 0.3905761241912842
    },
    {
      "epoch": 0.000324591064453125,
      "model_forward_time": 0.11603140830993652,
      "step": 53181
    },
    {
      "epoch": 0.000324591064453125,
      "step": 53181,
      "training_step_time": 0.5646626949310303
    },
    {
      "epoch": 0.00032459716796875,
      "model_forward_time": 0.1144247055053711,
      "step": 53182
    },
    {
      "epoch": 0.00032459716796875,
      "step": 53182,
      "training_step_time": 0.42539501190185547
    },
    {
      "epoch": 0.000324603271484375,
      "model_forward_time": 0.11446523666381836,
      "step": 53183
    },
    {
      "epoch": 0.000324603271484375,
      "step": 53183,
      "training_step_time": 0.3982102870941162
    },
    {
      "epoch": 0.000324609375,
      "model_forward_time": 0.11517691612243652,
      "step": 53184
    },
    {
      "epoch": 0.000324609375,
      "step": 53184,
      "training_step_time": 0.4059429168701172
    },
    {
      "epoch": 0.000324615478515625,
      "model_forward_time": 0.1144247055053711,
      "step": 53185
    },
    {
      "epoch": 0.000324615478515625,
      "step": 53185,
      "training_step_time": 0.49245166778564453
    },
    {
      "epoch": 0.00032462158203125,
      "model_forward_time": 0.1151728630065918,
      "step": 53186
    },
    {
      "epoch": 0.00032462158203125,
      "step": 53186,
      "training_step_time": 0.38815808296203613
    },
    {
      "epoch": 0.000324627685546875,
      "model_forward_time": 0.11500430107116699,
      "step": 53187
    },
    {
      "epoch": 0.000324627685546875,
      "step": 53187,
      "training_step_time": 0.3649775981903076
    },
    {
      "epoch": 0.0003246337890625,
      "model_forward_time": 0.11502408981323242,
      "step": 53188
    },
    {
      "epoch": 0.0003246337890625,
      "step": 53188,
      "training_step_time": 0.44726037979125977
    },
    {
      "epoch": 0.000324639892578125,
      "model_forward_time": 0.1154782772064209,
      "step": 53189
    },
    {
      "epoch": 0.000324639892578125,
      "step": 53189,
      "training_step_time": 0.4141542911529541
    },
    {
      "epoch": 0.00032464599609375,
      "grad_norm": 0.09716594964265823,
      "learning_rate": 3.4808051269397512e-06,
      "loss": 0.0409,
      "step": 53190
    },
    {
      "epoch": 0.00032464599609375,
      "model_forward_time": 0.1150665283203125,
      "step": 53190
    },
    {
      "epoch": 0.00032464599609375,
      "step": 53190,
      "training_step_time": 0.39267587661743164
    },
    {
      "epoch": 0.000324652099609375,
      "model_forward_time": 0.11491894721984863,
      "step": 53191
    },
    {
      "epoch": 0.000324652099609375,
      "step": 53191,
      "training_step_time": 0.38671207427978516
    },
    {
      "epoch": 0.000324658203125,
      "model_forward_time": 0.11496615409851074,
      "step": 53192
    },
    {
      "epoch": 0.000324658203125,
      "step": 53192,
      "training_step_time": 0.3901185989379883
    },
    {
      "epoch": 0.000324664306640625,
      "model_forward_time": 0.11496376991271973,
      "step": 53193
    },
    {
      "epoch": 0.000324664306640625,
      "step": 53193,
      "training_step_time": 0.3960452079772949
    },
    {
      "epoch": 0.00032467041015625,
      "model_forward_time": 0.1148226261138916,
      "step": 53194
    },
    {
      "epoch": 0.00032467041015625,
      "step": 53194,
      "training_step_time": 0.3909792900085449
    },
    {
      "epoch": 0.000324676513671875,
      "model_forward_time": 0.11550378799438477,
      "step": 53195
    },
    {
      "epoch": 0.000324676513671875,
      "step": 53195,
      "training_step_time": 0.4620943069458008
    },
    {
      "epoch": 0.0003246826171875,
      "model_forward_time": 0.11599922180175781,
      "step": 53196
    },
    {
      "epoch": 0.0003246826171875,
      "step": 53196,
      "training_step_time": 0.3985898494720459
    },
    {
      "epoch": 0.000324688720703125,
      "model_forward_time": 0.11544275283813477,
      "step": 53197
    },
    {
      "epoch": 0.000324688720703125,
      "step": 53197,
      "training_step_time": 0.47182226181030273
    },
    {
      "epoch": 0.00032469482421875,
      "model_forward_time": 0.11530113220214844,
      "step": 53198
    },
    {
      "epoch": 0.00032469482421875,
      "step": 53198,
      "training_step_time": 0.41672277450561523
    },
    {
      "epoch": 0.000324700927734375,
      "model_forward_time": 0.11471676826477051,
      "step": 53199
    },
    {
      "epoch": 0.000324700927734375,
      "step": 53199,
      "training_step_time": 0.41007423400878906
    },
    {
      "epoch": 0.00032470703125,
      "grad_norm": 0.12105392664670944,
      "learning_rate": 3.470709859234084e-06,
      "loss": 0.0379,
      "step": 53200
    },
    {
      "epoch": 0.00032470703125,
      "model_forward_time": 0.11453843116760254,
      "step": 53200
    },
    {
      "epoch": 0.00032470703125,
      "step": 53200,
      "training_step_time": 0.3971860408782959
    },
    {
      "epoch": 0.000324713134765625,
      "model_forward_time": 0.11566829681396484,
      "step": 53201
    },
    {
      "epoch": 0.000324713134765625,
      "step": 53201,
      "training_step_time": 0.39774584770202637
    },
    {
      "epoch": 0.00032471923828125,
      "model_forward_time": 0.11530041694641113,
      "step": 53202
    },
    {
      "epoch": 0.00032471923828125,
      "step": 53202,
      "training_step_time": 0.367279052734375
    },
    {
      "epoch": 0.000324725341796875,
      "model_forward_time": 0.11503744125366211,
      "step": 53203
    },
    {
      "epoch": 0.000324725341796875,
      "step": 53203,
      "training_step_time": 0.4557218551635742
    },
    {
      "epoch": 0.0003247314453125,
      "model_forward_time": 0.1146385669708252,
      "step": 53204
    },
    {
      "epoch": 0.0003247314453125,
      "step": 53204,
      "training_step_time": 0.4005582332611084
    },
    {
      "epoch": 0.000324737548828125,
      "model_forward_time": 0.11520600318908691,
      "step": 53205
    },
    {
      "epoch": 0.000324737548828125,
      "step": 53205,
      "training_step_time": 0.3910670280456543
    },
    {
      "epoch": 0.00032474365234375,
      "model_forward_time": 0.1150355339050293,
      "step": 53206
    },
    {
      "epoch": 0.00032474365234375,
      "step": 53206,
      "training_step_time": 0.39011645317077637
    },
    {
      "epoch": 0.000324749755859375,
      "model_forward_time": 0.11489319801330566,
      "step": 53207
    },
    {
      "epoch": 0.000324749755859375,
      "step": 53207,
      "training_step_time": 0.3935115337371826
    },
    {
      "epoch": 0.000324755859375,
      "model_forward_time": 0.11466073989868164,
      "step": 53208
    },
    {
      "epoch": 0.000324755859375,
      "step": 53208,
      "training_step_time": 0.39128661155700684
    },
    {
      "epoch": 0.000324761962890625,
      "model_forward_time": 0.11585664749145508,
      "step": 53209
    },
    {
      "epoch": 0.000324761962890625,
      "step": 53209,
      "training_step_time": 0.5025877952575684
    },
    {
      "epoch": 0.00032476806640625,
      "grad_norm": 0.08355782926082611,
      "learning_rate": 3.460628725898207e-06,
      "loss": 0.0357,
      "step": 53210
    },
    {
      "epoch": 0.00032476806640625,
      "model_forward_time": 0.11554884910583496,
      "step": 53210
    },
    {
      "epoch": 0.00032476806640625,
      "step": 53210,
      "training_step_time": 0.4351530075073242
    },
    {
      "epoch": 0.000324774169921875,
      "model_forward_time": 0.11500406265258789,
      "step": 53211
    },
    {
      "epoch": 0.000324774169921875,
      "step": 53211,
      "training_step_time": 0.4705495834350586
    },
    {
      "epoch": 0.0003247802734375,
      "model_forward_time": 0.11547255516052246,
      "step": 53212
    },
    {
      "epoch": 0.0003247802734375,
      "step": 53212,
      "training_step_time": 0.4114091396331787
    },
    {
      "epoch": 0.000324786376953125,
      "model_forward_time": 0.11502432823181152,
      "step": 53213
    },
    {
      "epoch": 0.000324786376953125,
      "step": 53213,
      "training_step_time": 0.45938611030578613
    },
    {
      "epoch": 0.00032479248046875,
      "model_forward_time": 0.11490511894226074,
      "step": 53214
    },
    {
      "epoch": 0.00032479248046875,
      "step": 53214,
      "training_step_time": 0.4640994071960449
    },
    {
      "epoch": 0.000324798583984375,
      "model_forward_time": 0.1149907112121582,
      "step": 53215
    },
    {
      "epoch": 0.000324798583984375,
      "step": 53215,
      "training_step_time": 0.40586066246032715
    },
    {
      "epoch": 0.0003248046875,
      "model_forward_time": 0.11468625068664551,
      "step": 53216
    },
    {
      "epoch": 0.0003248046875,
      "step": 53216,
      "training_step_time": 0.36255741119384766
    },
    {
      "epoch": 0.000324810791015625,
      "model_forward_time": 0.11466741561889648,
      "step": 53217
    },
    {
      "epoch": 0.000324810791015625,
      "step": 53217,
      "training_step_time": 0.45395994186401367
    },
    {
      "epoch": 0.00032481689453125,
      "model_forward_time": 0.11481571197509766,
      "step": 53218
    },
    {
      "epoch": 0.00032481689453125,
      "step": 53218,
      "training_step_time": 0.40526580810546875
    },
    {
      "epoch": 0.000324822998046875,
      "model_forward_time": 0.11426639556884766,
      "step": 53219
    },
    {
      "epoch": 0.000324822998046875,
      "step": 53219,
      "training_step_time": 0.39028072357177734
    },
    {
      "epoch": 0.0003248291015625,
      "grad_norm": 0.09845968335866928,
      "learning_rate": 3.4505617299945336e-06,
      "loss": 0.0348,
      "step": 53220
    },
    {
      "epoch": 0.0003248291015625,
      "model_forward_time": 0.11535835266113281,
      "step": 53220
    },
    {
      "epoch": 0.0003248291015625,
      "step": 53220,
      "training_step_time": 0.38484907150268555
    },
    {
      "epoch": 0.000324835205078125,
      "model_forward_time": 0.11447310447692871,
      "step": 53221
    },
    {
      "epoch": 0.000324835205078125,
      "step": 53221,
      "training_step_time": 0.46113157272338867
    },
    {
      "epoch": 0.00032484130859375,
      "model_forward_time": 0.11517715454101562,
      "step": 53222
    },
    {
      "epoch": 0.00032484130859375,
      "step": 53222,
      "training_step_time": 0.37648797035217285
    },
    {
      "epoch": 0.000324847412109375,
      "model_forward_time": 0.11538529396057129,
      "step": 53223
    },
    {
      "epoch": 0.000324847412109375,
      "step": 53223,
      "training_step_time": 0.4332132339477539
    },
    {
      "epoch": 0.000324853515625,
      "model_forward_time": 0.1145470142364502,
      "step": 53224
    },
    {
      "epoch": 0.000324853515625,
      "step": 53224,
      "training_step_time": 0.39841580390930176
    },
    {
      "epoch": 0.000324859619140625,
      "model_forward_time": 0.11716341972351074,
      "step": 53225
    },
    {
      "epoch": 0.000324859619140625,
      "step": 53225,
      "training_step_time": 0.4459505081176758
    },
    {
      "epoch": 0.00032486572265625,
      "model_forward_time": 0.11504364013671875,
      "step": 53226
    },
    {
      "epoch": 0.00032486572265625,
      "step": 53226,
      "training_step_time": 0.3946053981781006
    },
    {
      "epoch": 0.000324871826171875,
      "model_forward_time": 0.11522531509399414,
      "step": 53227
    },
    {
      "epoch": 0.000324871826171875,
      "step": 53227,
      "training_step_time": 0.46686530113220215
    },
    {
      "epoch": 0.0003248779296875,
      "model_forward_time": 0.11561465263366699,
      "step": 53228
    },
    {
      "epoch": 0.0003248779296875,
      "step": 53228,
      "training_step_time": 0.46492433547973633
    },
    {
      "epoch": 0.000324884033203125,
      "model_forward_time": 0.11513423919677734,
      "step": 53229
    },
    {
      "epoch": 0.000324884033203125,
      "step": 53229,
      "training_step_time": 0.3928842544555664
    },
    {
      "epoch": 0.00032489013671875,
      "grad_norm": 0.08504866063594818,
      "learning_rate": 3.440508874581139e-06,
      "loss": 0.0309,
      "step": 53230
    },
    {
      "epoch": 0.00032489013671875,
      "model_forward_time": 0.1155703067779541,
      "step": 53230
    },
    {
      "epoch": 0.00032489013671875,
      "step": 53230,
      "training_step_time": 0.38759279251098633
    },
    {
      "epoch": 0.000324896240234375,
      "model_forward_time": 0.11487579345703125,
      "step": 53231
    },
    {
      "epoch": 0.000324896240234375,
      "step": 53231,
      "training_step_time": 0.4577314853668213
    },
    {
      "epoch": 0.00032490234375,
      "model_forward_time": 0.11561799049377441,
      "step": 53232
    },
    {
      "epoch": 0.00032490234375,
      "step": 53232,
      "training_step_time": 0.3933708667755127
    },
    {
      "epoch": 0.000324908447265625,
      "model_forward_time": 0.11691045761108398,
      "step": 53233
    },
    {
      "epoch": 0.000324908447265625,
      "step": 53233,
      "training_step_time": 0.5645878314971924
    },
    {
      "epoch": 0.00032491455078125,
      "model_forward_time": 0.11493372917175293,
      "step": 53234
    },
    {
      "epoch": 0.00032491455078125,
      "step": 53234,
      "training_step_time": 0.37810373306274414
    },
    {
      "epoch": 0.000324920654296875,
      "model_forward_time": 0.11511516571044922,
      "step": 53235
    },
    {
      "epoch": 0.000324920654296875,
      "step": 53235,
      "training_step_time": 0.39011287689208984
    },
    {
      "epoch": 0.0003249267578125,
      "model_forward_time": 0.11465311050415039,
      "step": 53236
    },
    {
      "epoch": 0.0003249267578125,
      "step": 53236,
      "training_step_time": 0.40010619163513184
    },
    {
      "epoch": 0.000324932861328125,
      "model_forward_time": 0.11557245254516602,
      "step": 53237
    },
    {
      "epoch": 0.000324932861328125,
      "step": 53237,
      "training_step_time": 0.39965033531188965
    },
    {
      "epoch": 0.00032493896484375,
      "model_forward_time": 0.11565852165222168,
      "step": 53238
    },
    {
      "epoch": 0.00032493896484375,
      "step": 53238,
      "training_step_time": 0.409548282623291
    },
    {
      "epoch": 0.000324945068359375,
      "model_forward_time": 0.1150965690612793,
      "step": 53239
    },
    {
      "epoch": 0.000324945068359375,
      "step": 53239,
      "training_step_time": 0.570826530456543
    },
    {
      "epoch": 0.000324951171875,
      "grad_norm": 0.12452591955661774,
      "learning_rate": 3.430470162711813e-06,
      "loss": 0.0373,
      "step": 53240
    },
    {
      "epoch": 0.000324951171875,
      "model_forward_time": 0.11461567878723145,
      "step": 53240
    },
    {
      "epoch": 0.000324951171875,
      "step": 53240,
      "training_step_time": 0.395679235458374
    },
    {
      "epoch": 0.000324957275390625,
      "model_forward_time": 0.11683011054992676,
      "step": 53241
    },
    {
      "epoch": 0.000324957275390625,
      "step": 53241,
      "training_step_time": 0.39388132095336914
    },
    {
      "epoch": 0.00032496337890625,
      "model_forward_time": 0.11541938781738281,
      "step": 53242
    },
    {
      "epoch": 0.00032496337890625,
      "step": 53242,
      "training_step_time": 0.48020505905151367
    },
    {
      "epoch": 0.000324969482421875,
      "model_forward_time": 0.1149148941040039,
      "step": 53243
    },
    {
      "epoch": 0.000324969482421875,
      "step": 53243,
      "training_step_time": 0.4076249599456787
    },
    {
      "epoch": 0.0003249755859375,
      "model_forward_time": 0.1144108772277832,
      "step": 53244
    },
    {
      "epoch": 0.0003249755859375,
      "step": 53244,
      "training_step_time": 0.3898122310638428
    },
    {
      "epoch": 0.000324981689453125,
      "model_forward_time": 0.11700153350830078,
      "step": 53245
    },
    {
      "epoch": 0.000324981689453125,
      "step": 53245,
      "training_step_time": 0.8525688648223877
    },
    {
      "epoch": 0.00032498779296875,
      "model_forward_time": 0.11390495300292969,
      "step": 53246
    },
    {
      "epoch": 0.00032498779296875,
      "step": 53246,
      "training_step_time": 0.4364151954650879
    },
    {
      "epoch": 0.000324993896484375,
      "model_forward_time": 0.11461162567138672,
      "step": 53247
    },
    {
      "epoch": 0.000324993896484375,
      "step": 53247,
      "training_step_time": 0.3900871276855469
    },
    {
      "epoch": 0.000325,
      "model_forward_time": 0.11443018913269043,
      "step": 53248
    },
    {
      "epoch": 0.000325,
      "step": 53248,
      "training_step_time": 0.38011956214904785
    },
    {
      "epoch": 0.000325006103515625,
      "model_forward_time": 0.11428141593933105,
      "step": 53249
    },
    {
      "epoch": 0.000325006103515625,
      "step": 53249,
      "training_step_time": 0.3896305561065674
    },
    {
      "epoch": 0.00032501220703125,
      "grad_norm": 0.09439364075660706,
      "learning_rate": 3.420445597436056e-06,
      "loss": 0.0333,
      "step": 53250
    },
    {
      "epoch": 0.00032501220703125,
      "model_forward_time": 0.11458468437194824,
      "step": 53250
    },
    {
      "epoch": 0.00032501220703125,
      "step": 53250,
      "training_step_time": 0.39164161682128906
    },
    {
      "epoch": 0.000325018310546875,
      "model_forward_time": 0.11498069763183594,
      "step": 53251
    },
    {
      "epoch": 0.000325018310546875,
      "step": 53251,
      "training_step_time": 0.45021891593933105
    },
    {
      "epoch": 0.0003250244140625,
      "model_forward_time": 0.11577296257019043,
      "step": 53252
    },
    {
      "epoch": 0.0003250244140625,
      "step": 53252,
      "training_step_time": 0.3966789245605469
    },
    {
      "epoch": 0.000325030517578125,
      "model_forward_time": 0.11487102508544922,
      "step": 53253
    },
    {
      "epoch": 0.000325030517578125,
      "step": 53253,
      "training_step_time": 0.44207191467285156
    },
    {
      "epoch": 0.00032503662109375,
      "model_forward_time": 0.11476588249206543,
      "step": 53254
    },
    {
      "epoch": 0.00032503662109375,
      "step": 53254,
      "training_step_time": 0.4286625385284424
    },
    {
      "epoch": 0.000325042724609375,
      "model_forward_time": 0.11451148986816406,
      "step": 53255
    },
    {
      "epoch": 0.000325042724609375,
      "step": 53255,
      "training_step_time": 0.4086885452270508
    },
    {
      "epoch": 0.000325048828125,
      "model_forward_time": 0.11500978469848633,
      "step": 53256
    },
    {
      "epoch": 0.000325048828125,
      "step": 53256,
      "training_step_time": 0.48436522483825684
    },
    {
      "epoch": 0.000325054931640625,
      "model_forward_time": 0.1149897575378418,
      "step": 53257
    },
    {
      "epoch": 0.000325054931640625,
      "step": 53257,
      "training_step_time": 0.389066219329834
    },
    {
      "epoch": 0.00032506103515625,
      "model_forward_time": 0.11496329307556152,
      "step": 53258
    },
    {
      "epoch": 0.00032506103515625,
      "step": 53258,
      "training_step_time": 0.39420485496520996
    },
    {
      "epoch": 0.000325067138671875,
      "model_forward_time": 0.11480355262756348,
      "step": 53259
    },
    {
      "epoch": 0.000325067138671875,
      "step": 53259,
      "training_step_time": 0.43094849586486816
    },
    {
      "epoch": 0.0003250732421875,
      "grad_norm": 0.10327082872390747,
      "learning_rate": 3.41043518179906e-06,
      "loss": 0.0419,
      "step": 53260
    },
    {
      "epoch": 0.0003250732421875,
      "model_forward_time": 0.11499762535095215,
      "step": 53260
    },
    {
      "epoch": 0.0003250732421875,
      "step": 53260,
      "training_step_time": 0.3995518684387207
    },
    {
      "epoch": 0.000325079345703125,
      "model_forward_time": 0.11552929878234863,
      "step": 53261
    },
    {
      "epoch": 0.000325079345703125,
      "step": 53261,
      "training_step_time": 0.39783263206481934
    },
    {
      "epoch": 0.00032508544921875,
      "model_forward_time": 0.11496829986572266,
      "step": 53262
    },
    {
      "epoch": 0.00032508544921875,
      "step": 53262,
      "training_step_time": 0.40044426918029785
    },
    {
      "epoch": 0.000325091552734375,
      "model_forward_time": 0.11507701873779297,
      "step": 53263
    },
    {
      "epoch": 0.000325091552734375,
      "step": 53263,
      "training_step_time": 0.46922993659973145
    },
    {
      "epoch": 0.00032509765625,
      "model_forward_time": 0.11478376388549805,
      "step": 53264
    },
    {
      "epoch": 0.00032509765625,
      "step": 53264,
      "training_step_time": 0.38284969329833984
    },
    {
      "epoch": 0.000325103759765625,
      "model_forward_time": 0.1148684024810791,
      "step": 53265
    },
    {
      "epoch": 0.000325103759765625,
      "step": 53265,
      "training_step_time": 0.4074440002441406
    },
    {
      "epoch": 0.00032510986328125,
      "model_forward_time": 0.11460208892822266,
      "step": 53266
    },
    {
      "epoch": 0.00032510986328125,
      "step": 53266,
      "training_step_time": 0.4282352924346924
    },
    {
      "epoch": 0.000325115966796875,
      "model_forward_time": 0.11515188217163086,
      "step": 53267
    },
    {
      "epoch": 0.000325115966796875,
      "step": 53267,
      "training_step_time": 0.39063572883605957
    },
    {
      "epoch": 0.0003251220703125,
      "model_forward_time": 0.1151282787322998,
      "step": 53268
    },
    {
      "epoch": 0.0003251220703125,
      "step": 53268,
      "training_step_time": 0.454639196395874
    },
    {
      "epoch": 0.000325128173828125,
      "model_forward_time": 0.11573123931884766,
      "step": 53269
    },
    {
      "epoch": 0.000325128173828125,
      "step": 53269,
      "training_step_time": 0.47558116912841797
    },
    {
      "epoch": 0.00032513427734375,
      "grad_norm": 0.14505203068256378,
      "learning_rate": 3.4004389188417305e-06,
      "loss": 0.039,
      "step": 53270
    },
    {
      "epoch": 0.00032513427734375,
      "model_forward_time": 0.1148073673248291,
      "step": 53270
    },
    {
      "epoch": 0.00032513427734375,
      "step": 53270,
      "training_step_time": 0.39508748054504395
    },
    {
      "epoch": 0.000325140380859375,
      "model_forward_time": 0.11471319198608398,
      "step": 53271
    },
    {
      "epoch": 0.000325140380859375,
      "step": 53271,
      "training_step_time": 0.40819621086120605
    },
    {
      "epoch": 0.000325146484375,
      "model_forward_time": 0.11474895477294922,
      "step": 53272
    },
    {
      "epoch": 0.000325146484375,
      "step": 53272,
      "training_step_time": 0.3930084705352783
    },
    {
      "epoch": 0.000325152587890625,
      "model_forward_time": 0.11458396911621094,
      "step": 53273
    },
    {
      "epoch": 0.000325152587890625,
      "step": 53273,
      "training_step_time": 0.38333773612976074
    },
    {
      "epoch": 0.00032515869140625,
      "model_forward_time": 0.11526274681091309,
      "step": 53274
    },
    {
      "epoch": 0.00032515869140625,
      "step": 53274,
      "training_step_time": 0.5195720195770264
    },
    {
      "epoch": 0.000325164794921875,
      "model_forward_time": 0.11422872543334961,
      "step": 53275
    },
    {
      "epoch": 0.000325164794921875,
      "step": 53275,
      "training_step_time": 0.5012564659118652
    },
    {
      "epoch": 0.0003251708984375,
      "model_forward_time": 0.11519026756286621,
      "step": 53276
    },
    {
      "epoch": 0.0003251708984375,
      "step": 53276,
      "training_step_time": 0.3687400817871094
    },
    {
      "epoch": 0.000325177001953125,
      "model_forward_time": 0.11592650413513184,
      "step": 53277
    },
    {
      "epoch": 0.000325177001953125,
      "step": 53277,
      "training_step_time": 0.38115525245666504
    },
    {
      "epoch": 0.00032518310546875,
      "model_forward_time": 0.11501026153564453,
      "step": 53278
    },
    {
      "epoch": 0.00032518310546875,
      "step": 53278,
      "training_step_time": 0.3906888961791992
    },
    {
      "epoch": 0.000325189208984375,
      "model_forward_time": 0.11448407173156738,
      "step": 53279
    },
    {
      "epoch": 0.000325189208984375,
      "step": 53279,
      "training_step_time": 0.39420342445373535
    },
    {
      "epoch": 0.0003251953125,
      "grad_norm": 0.1140894889831543,
      "learning_rate": 3.390456811600673e-06,
      "loss": 0.0352,
      "step": 53280
    },
    {
      "epoch": 0.0003251953125,
      "model_forward_time": 0.11493277549743652,
      "step": 53280
    },
    {
      "epoch": 0.0003251953125,
      "step": 53280,
      "training_step_time": 0.4050159454345703
    },
    {
      "epoch": 0.000325201416015625,
      "model_forward_time": 0.11514163017272949,
      "step": 53281
    },
    {
      "epoch": 0.000325201416015625,
      "step": 53281,
      "training_step_time": 0.5383949279785156
    },
    {
      "epoch": 0.00032520751953125,
      "model_forward_time": 0.11512255668640137,
      "step": 53282
    },
    {
      "epoch": 0.00032520751953125,
      "step": 53282,
      "training_step_time": 0.4013550281524658
    },
    {
      "epoch": 0.000325213623046875,
      "model_forward_time": 0.11451840400695801,
      "step": 53283
    },
    {
      "epoch": 0.000325213623046875,
      "step": 53283,
      "training_step_time": 0.40263843536376953
    },
    {
      "epoch": 0.0003252197265625,
      "model_forward_time": 0.11482858657836914,
      "step": 53284
    },
    {
      "epoch": 0.0003252197265625,
      "step": 53284,
      "training_step_time": 0.3965573310852051
    },
    {
      "epoch": 0.000325225830078125,
      "model_forward_time": 0.11492419242858887,
      "step": 53285
    },
    {
      "epoch": 0.000325225830078125,
      "step": 53285,
      "training_step_time": 0.42707371711730957
    },
    {
      "epoch": 0.00032523193359375,
      "model_forward_time": 0.11488199234008789,
      "step": 53286
    },
    {
      "epoch": 0.00032523193359375,
      "step": 53286,
      "training_step_time": 0.41103172302246094
    },
    {
      "epoch": 0.000325238037109375,
      "model_forward_time": 0.11557960510253906,
      "step": 53287
    },
    {
      "epoch": 0.000325238037109375,
      "step": 53287,
      "training_step_time": 0.5087606906890869
    },
    {
      "epoch": 0.000325244140625,
      "model_forward_time": 0.11629152297973633,
      "step": 53288
    },
    {
      "epoch": 0.000325244140625,
      "step": 53288,
      "training_step_time": 0.4000532627105713
    },
    {
      "epoch": 0.000325250244140625,
      "model_forward_time": 0.11507987976074219,
      "step": 53289
    },
    {
      "epoch": 0.000325250244140625,
      "step": 53289,
      "training_step_time": 0.42708444595336914
    },
    {
      "epoch": 0.00032525634765625,
      "grad_norm": 0.115136057138443,
      "learning_rate": 3.380488863108183e-06,
      "loss": 0.0367,
      "step": 53290
    },
    {
      "epoch": 0.00032525634765625,
      "model_forward_time": 0.11564397811889648,
      "step": 53290
    },
    {
      "epoch": 0.00032525634765625,
      "step": 53290,
      "training_step_time": 0.4072744846343994
    },
    {
      "epoch": 0.000325262451171875,
      "model_forward_time": 0.11470985412597656,
      "step": 53291
    },
    {
      "epoch": 0.000325262451171875,
      "step": 53291,
      "training_step_time": 0.39160776138305664
    },
    {
      "epoch": 0.0003252685546875,
      "model_forward_time": 0.11489510536193848,
      "step": 53292
    },
    {
      "epoch": 0.0003252685546875,
      "step": 53292,
      "training_step_time": 0.3876793384552002
    },
    {
      "epoch": 0.000325274658203125,
      "model_forward_time": 0.11547327041625977,
      "step": 53293
    },
    {
      "epoch": 0.000325274658203125,
      "step": 53293,
      "training_step_time": 0.4239518642425537
    },
    {
      "epoch": 0.00032528076171875,
      "model_forward_time": 0.11481142044067383,
      "step": 53294
    },
    {
      "epoch": 0.00032528076171875,
      "step": 53294,
      "training_step_time": 0.4139831066131592
    },
    {
      "epoch": 0.000325286865234375,
      "model_forward_time": 0.11470794677734375,
      "step": 53295
    },
    {
      "epoch": 0.000325286865234375,
      "step": 53295,
      "training_step_time": 0.4457883834838867
    },
    {
      "epoch": 0.00032529296875,
      "model_forward_time": 0.11580443382263184,
      "step": 53296
    },
    {
      "epoch": 0.00032529296875,
      "step": 53296,
      "training_step_time": 0.4302504062652588
    },
    {
      "epoch": 0.000325299072265625,
      "model_forward_time": 0.114654541015625,
      "step": 53297
    },
    {
      "epoch": 0.000325299072265625,
      "step": 53297,
      "training_step_time": 0.3998422622680664
    },
    {
      "epoch": 0.00032530517578125,
      "model_forward_time": 0.1148524284362793,
      "step": 53298
    },
    {
      "epoch": 0.00032530517578125,
      "step": 53298,
      "training_step_time": 0.3941774368286133
    },
    {
      "epoch": 0.000325311279296875,
      "model_forward_time": 0.1152963638305664,
      "step": 53299
    },
    {
      "epoch": 0.000325311279296875,
      "step": 53299,
      "training_step_time": 0.47074460983276367
    },
    {
      "epoch": 0.0003253173828125,
      "grad_norm": 0.08971312642097473,
      "learning_rate": 3.3705350763922562e-06,
      "loss": 0.0357,
      "step": 53300
    },
    {
      "epoch": 0.0003253173828125,
      "model_forward_time": 0.11477804183959961,
      "step": 53300
    },
    {
      "epoch": 0.0003253173828125,
      "step": 53300,
      "training_step_time": 0.46826744079589844
    },
    {
      "epoch": 0.000325323486328125,
      "model_forward_time": 0.11524844169616699,
      "step": 53301
    },
    {
      "epoch": 0.000325323486328125,
      "step": 53301,
      "training_step_time": 0.38729214668273926
    },
    {
      "epoch": 0.00032532958984375,
      "model_forward_time": 0.11544156074523926,
      "step": 53302
    },
    {
      "epoch": 0.00032532958984375,
      "step": 53302,
      "training_step_time": 0.3647618293762207
    },
    {
      "epoch": 0.000325335693359375,
      "model_forward_time": 0.11477231979370117,
      "step": 53303
    },
    {
      "epoch": 0.000325335693359375,
      "step": 53303,
      "training_step_time": 0.43283939361572266
    },
    {
      "epoch": 0.000325341796875,
      "model_forward_time": 0.11541938781738281,
      "step": 53304
    },
    {
      "epoch": 0.000325341796875,
      "step": 53304,
      "training_step_time": 0.4868192672729492
    },
    {
      "epoch": 0.000325347900390625,
      "model_forward_time": 0.11470627784729004,
      "step": 53305
    },
    {
      "epoch": 0.000325347900390625,
      "step": 53305,
      "training_step_time": 0.3985753059387207
    },
    {
      "epoch": 0.00032535400390625,
      "model_forward_time": 0.11505508422851562,
      "step": 53306
    },
    {
      "epoch": 0.00032535400390625,
      "step": 53306,
      "training_step_time": 0.38593530654907227
    },
    {
      "epoch": 0.000325360107421875,
      "model_forward_time": 0.11437797546386719,
      "step": 53307
    },
    {
      "epoch": 0.000325360107421875,
      "step": 53307,
      "training_step_time": 0.3894624710083008
    },
    {
      "epoch": 0.0003253662109375,
      "model_forward_time": 0.1149287223815918,
      "step": 53308
    },
    {
      "epoch": 0.0003253662109375,
      "step": 53308,
      "training_step_time": 0.4000840187072754
    },
    {
      "epoch": 0.000325372314453125,
      "model_forward_time": 0.11517667770385742,
      "step": 53309
    },
    {
      "epoch": 0.000325372314453125,
      "step": 53309,
      "training_step_time": 0.41701793670654297
    },
    {
      "epoch": 0.00032537841796875,
      "grad_norm": 0.07618088275194168,
      "learning_rate": 3.360595454476595e-06,
      "loss": 0.0364,
      "step": 53310
    },
    {
      "epoch": 0.00032537841796875,
      "model_forward_time": 0.11437749862670898,
      "step": 53310
    },
    {
      "epoch": 0.00032537841796875,
      "step": 53310,
      "training_step_time": 0.4213376045227051
    },
    {
      "epoch": 0.000325384521484375,
      "model_forward_time": 0.11477208137512207,
      "step": 53311
    },
    {
      "epoch": 0.000325384521484375,
      "step": 53311,
      "training_step_time": 0.41449713706970215
    },
    {
      "epoch": 0.000325390625,
      "model_forward_time": 0.11533164978027344,
      "step": 53312
    },
    {
      "epoch": 0.000325390625,
      "step": 53312,
      "training_step_time": 0.3969693183898926
    },
    {
      "epoch": 0.000325396728515625,
      "model_forward_time": 0.1148219108581543,
      "step": 53313
    },
    {
      "epoch": 0.000325396728515625,
      "step": 53313,
      "training_step_time": 0.4495365619659424
    },
    {
      "epoch": 0.00032540283203125,
      "model_forward_time": 0.11473321914672852,
      "step": 53314
    },
    {
      "epoch": 0.00032540283203125,
      "step": 53314,
      "training_step_time": 0.44667768478393555
    },
    {
      "epoch": 0.000325408935546875,
      "model_forward_time": 0.11484074592590332,
      "step": 53315
    },
    {
      "epoch": 0.000325408935546875,
      "step": 53315,
      "training_step_time": 0.497448205947876
    },
    {
      "epoch": 0.0003254150390625,
      "model_forward_time": 0.11444902420043945,
      "step": 53316
    },
    {
      "epoch": 0.0003254150390625,
      "step": 53316,
      "training_step_time": 0.39958953857421875
    },
    {
      "epoch": 0.000325421142578125,
      "model_forward_time": 0.11545538902282715,
      "step": 53317
    },
    {
      "epoch": 0.000325421142578125,
      "step": 53317,
      "training_step_time": 0.4261512756347656
    },
    {
      "epoch": 0.00032542724609375,
      "model_forward_time": 0.11462116241455078,
      "step": 53318
    },
    {
      "epoch": 0.00032542724609375,
      "step": 53318,
      "training_step_time": 0.43190431594848633
    },
    {
      "epoch": 0.000325433349609375,
      "model_forward_time": 0.11544585227966309,
      "step": 53319
    },
    {
      "epoch": 0.000325433349609375,
      "step": 53319,
      "training_step_time": 0.4124484062194824
    },
    {
      "epoch": 0.000325439453125,
      "grad_norm": 0.0970863476395607,
      "learning_rate": 3.35067000038059e-06,
      "loss": 0.0353,
      "step": 53320
    },
    {
      "epoch": 0.000325439453125,
      "model_forward_time": 0.11506509780883789,
      "step": 53320
    },
    {
      "epoch": 0.000325439453125,
      "step": 53320,
      "training_step_time": 0.3995842933654785
    },
    {
      "epoch": 0.000325445556640625,
      "model_forward_time": 0.11489486694335938,
      "step": 53321
    },
    {
      "epoch": 0.000325445556640625,
      "step": 53321,
      "training_step_time": 0.39725637435913086
    },
    {
      "epoch": 0.00032545166015625,
      "model_forward_time": 0.11487579345703125,
      "step": 53322
    },
    {
      "epoch": 0.00032545166015625,
      "step": 53322,
      "training_step_time": 0.4026150703430176
    },
    {
      "epoch": 0.000325457763671875,
      "model_forward_time": 0.11559724807739258,
      "step": 53323
    },
    {
      "epoch": 0.000325457763671875,
      "step": 53323,
      "training_step_time": 0.458812952041626
    },
    {
      "epoch": 0.0003254638671875,
      "model_forward_time": 0.11431622505187988,
      "step": 53324
    },
    {
      "epoch": 0.0003254638671875,
      "step": 53324,
      "training_step_time": 0.40315794944763184
    },
    {
      "epoch": 0.000325469970703125,
      "model_forward_time": 0.11561393737792969,
      "step": 53325
    },
    {
      "epoch": 0.000325469970703125,
      "step": 53325,
      "training_step_time": 0.47048377990722656
    },
    {
      "epoch": 0.00032547607421875,
      "model_forward_time": 0.11500072479248047,
      "step": 53326
    },
    {
      "epoch": 0.00032547607421875,
      "step": 53326,
      "training_step_time": 0.39812779426574707
    },
    {
      "epoch": 0.000325482177734375,
      "model_forward_time": 0.11488747596740723,
      "step": 53327
    },
    {
      "epoch": 0.000325482177734375,
      "step": 53327,
      "training_step_time": 0.39533209800720215
    },
    {
      "epoch": 0.00032548828125,
      "model_forward_time": 0.11504006385803223,
      "step": 53328
    },
    {
      "epoch": 0.00032548828125,
      "step": 53328,
      "training_step_time": 0.41939473152160645
    },
    {
      "epoch": 0.000325494384765625,
      "model_forward_time": 0.11498212814331055,
      "step": 53329
    },
    {
      "epoch": 0.000325494384765625,
      "step": 53329,
      "training_step_time": 0.455751895904541
    },
    {
      "epoch": 0.00032550048828125,
      "grad_norm": 0.08646872639656067,
      "learning_rate": 3.3407587171193354e-06,
      "loss": 0.0367,
      "step": 53330
    },
    {
      "epoch": 0.00032550048828125,
      "model_forward_time": 0.11612319946289062,
      "step": 53330
    },
    {
      "epoch": 0.00032550048828125,
      "step": 53330,
      "training_step_time": 0.4624135494232178
    },
    {
      "epoch": 0.000325506591796875,
      "model_forward_time": 0.11512875556945801,
      "step": 53331
    },
    {
      "epoch": 0.000325506591796875,
      "step": 53331,
      "training_step_time": 0.36641740798950195
    },
    {
      "epoch": 0.0003255126953125,
      "model_forward_time": 0.1158761978149414,
      "step": 53332
    },
    {
      "epoch": 0.0003255126953125,
      "step": 53332,
      "training_step_time": 0.4634723663330078
    },
    {
      "epoch": 0.000325518798828125,
      "model_forward_time": 0.11477136611938477,
      "step": 53333
    },
    {
      "epoch": 0.000325518798828125,
      "step": 53333,
      "training_step_time": 0.44057250022888184
    },
    {
      "epoch": 0.00032552490234375,
      "model_forward_time": 0.11513137817382812,
      "step": 53334
    },
    {
      "epoch": 0.00032552490234375,
      "step": 53334,
      "training_step_time": 0.38707947731018066
    },
    {
      "epoch": 0.000325531005859375,
      "model_forward_time": 0.11510300636291504,
      "step": 53335
    },
    {
      "epoch": 0.000325531005859375,
      "step": 53335,
      "training_step_time": 0.39514660835266113
    },
    {
      "epoch": 0.000325537109375,
      "model_forward_time": 0.11464357376098633,
      "step": 53336
    },
    {
      "epoch": 0.000325537109375,
      "step": 53336,
      "training_step_time": 0.405245304107666
    },
    {
      "epoch": 0.000325543212890625,
      "model_forward_time": 0.11530017852783203,
      "step": 53337
    },
    {
      "epoch": 0.000325543212890625,
      "step": 53337,
      "training_step_time": 0.49761104583740234
    },
    {
      "epoch": 0.00032554931640625,
      "model_forward_time": 0.11551880836486816,
      "step": 53338
    },
    {
      "epoch": 0.00032554931640625,
      "step": 53338,
      "training_step_time": 0.40239882469177246
    },
    {
      "epoch": 0.000325555419921875,
      "model_forward_time": 0.11579465866088867,
      "step": 53339
    },
    {
      "epoch": 0.000325555419921875,
      "step": 53339,
      "training_step_time": 0.43673062324523926
    },
    {
      "epoch": 0.0003255615234375,
      "grad_norm": 0.08692994713783264,
      "learning_rate": 3.3308616077036115e-06,
      "loss": 0.0321,
      "step": 53340
    },
    {
      "epoch": 0.0003255615234375,
      "model_forward_time": 0.11471986770629883,
      "step": 53340
    },
    {
      "epoch": 0.0003255615234375,
      "step": 53340,
      "training_step_time": 0.4080801010131836
    },
    {
      "epoch": 0.000325567626953125,
      "model_forward_time": 0.11506819725036621,
      "step": 53341
    },
    {
      "epoch": 0.000325567626953125,
      "step": 53341,
      "training_step_time": 0.3954315185546875
    },
    {
      "epoch": 0.00032557373046875,
      "model_forward_time": 0.1144101619720459,
      "step": 53342
    },
    {
      "epoch": 0.00032557373046875,
      "step": 53342,
      "training_step_time": 0.4163215160369873
    },
    {
      "epoch": 0.000325579833984375,
      "model_forward_time": 0.11449027061462402,
      "step": 53343
    },
    {
      "epoch": 0.000325579833984375,
      "step": 53343,
      "training_step_time": 0.38484740257263184
    },
    {
      "epoch": 0.0003255859375,
      "model_forward_time": 0.11560320854187012,
      "step": 53344
    },
    {
      "epoch": 0.0003255859375,
      "step": 53344,
      "training_step_time": 0.40079665184020996
    },
    {
      "epoch": 0.000325592041015625,
      "model_forward_time": 0.1154325008392334,
      "step": 53345
    },
    {
      "epoch": 0.000325592041015625,
      "step": 53345,
      "training_step_time": 0.39394116401672363
    },
    {
      "epoch": 0.00032559814453125,
      "model_forward_time": 0.11480188369750977,
      "step": 53346
    },
    {
      "epoch": 0.00032559814453125,
      "step": 53346,
      "training_step_time": 0.4419980049133301
    },
    {
      "epoch": 0.000325604248046875,
      "model_forward_time": 0.11507606506347656,
      "step": 53347
    },
    {
      "epoch": 0.000325604248046875,
      "step": 53347,
      "training_step_time": 0.474564790725708
    },
    {
      "epoch": 0.0003256103515625,
      "model_forward_time": 0.11515235900878906,
      "step": 53348
    },
    {
      "epoch": 0.0003256103515625,
      "step": 53348,
      "training_step_time": 0.4606287479400635
    },
    {
      "epoch": 0.000325616455078125,
      "model_forward_time": 0.11461639404296875,
      "step": 53349
    },
    {
      "epoch": 0.000325616455078125,
      "step": 53349,
      "training_step_time": 0.3909337520599365
    },
    {
      "epoch": 0.00032562255859375,
      "grad_norm": 0.12449445575475693,
      "learning_rate": 3.3209786751399187e-06,
      "loss": 0.0395,
      "step": 53350
    },
    {
      "epoch": 0.00032562255859375,
      "model_forward_time": 0.11463046073913574,
      "step": 53350
    },
    {
      "epoch": 0.00032562255859375,
      "step": 53350,
      "training_step_time": 0.3892083168029785
    },
    {
      "epoch": 0.000325628662109375,
      "model_forward_time": 0.11618566513061523,
      "step": 53351
    },
    {
      "epoch": 0.000325628662109375,
      "step": 53351,
      "training_step_time": 0.411182165145874
    },
    {
      "epoch": 0.000325634765625,
      "model_forward_time": 0.11432695388793945,
      "step": 53352
    },
    {
      "epoch": 0.000325634765625,
      "step": 53352,
      "training_step_time": 0.4712967872619629
    },
    {
      "epoch": 0.000325640869140625,
      "model_forward_time": 0.11516594886779785,
      "step": 53353
    },
    {
      "epoch": 0.000325640869140625,
      "step": 53353,
      "training_step_time": 0.4351210594177246
    },
    {
      "epoch": 0.00032564697265625,
      "model_forward_time": 0.1150209903717041,
      "step": 53354
    },
    {
      "epoch": 0.00032564697265625,
      "step": 53354,
      "training_step_time": 0.402925968170166
    },
    {
      "epoch": 0.000325653076171875,
      "model_forward_time": 0.11495304107666016,
      "step": 53355
    },
    {
      "epoch": 0.000325653076171875,
      "step": 53355,
      "training_step_time": 0.40230393409729004
    },
    {
      "epoch": 0.0003256591796875,
      "model_forward_time": 0.11550498008728027,
      "step": 53356
    },
    {
      "epoch": 0.0003256591796875,
      "step": 53356,
      "training_step_time": 0.3903324604034424
    },
    {
      "epoch": 0.000325665283203125,
      "model_forward_time": 0.11500144004821777,
      "step": 53357
    },
    {
      "epoch": 0.000325665283203125,
      "step": 53357,
      "training_step_time": 0.4179048538208008
    },
    {
      "epoch": 0.00032567138671875,
      "model_forward_time": 0.11533951759338379,
      "step": 53358
    },
    {
      "epoch": 0.00032567138671875,
      "step": 53358,
      "training_step_time": 0.44568943977355957
    },
    {
      "epoch": 0.000325677490234375,
      "model_forward_time": 0.11486506462097168,
      "step": 53359
    },
    {
      "epoch": 0.000325677490234375,
      "step": 53359,
      "training_step_time": 0.4179868698120117
    },
    {
      "epoch": 0.00032568359375,
      "grad_norm": 0.06997397541999817,
      "learning_rate": 3.3111099224304e-06,
      "loss": 0.0334,
      "step": 53360
    },
    {
      "epoch": 0.00032568359375,
      "model_forward_time": 0.11474156379699707,
      "step": 53360
    },
    {
      "epoch": 0.00032568359375,
      "step": 53360,
      "training_step_time": 0.365811824798584
    },
    {
      "epoch": 0.000325689697265625,
      "model_forward_time": 0.11489677429199219,
      "step": 53361
    },
    {
      "epoch": 0.000325689697265625,
      "step": 53361,
      "training_step_time": 0.45783019065856934
    },
    {
      "epoch": 0.00032569580078125,
      "model_forward_time": 0.11574912071228027,
      "step": 53362
    },
    {
      "epoch": 0.00032569580078125,
      "step": 53362,
      "training_step_time": 0.4071958065032959
    },
    {
      "epoch": 0.000325701904296875,
      "model_forward_time": 0.11478972434997559,
      "step": 53363
    },
    {
      "epoch": 0.000325701904296875,
      "step": 53363,
      "training_step_time": 0.40663886070251465
    },
    {
      "epoch": 0.0003257080078125,
      "model_forward_time": 0.11489152908325195,
      "step": 53364
    },
    {
      "epoch": 0.0003257080078125,
      "step": 53364,
      "training_step_time": 0.4335005283355713
    },
    {
      "epoch": 0.000325714111328125,
      "model_forward_time": 0.11444568634033203,
      "step": 53365
    },
    {
      "epoch": 0.000325714111328125,
      "step": 53365,
      "training_step_time": 0.39612436294555664
    },
    {
      "epoch": 0.00032572021484375,
      "model_forward_time": 0.11602234840393066,
      "step": 53366
    },
    {
      "epoch": 0.00032572021484375,
      "step": 53366,
      "training_step_time": 0.43529558181762695
    },
    {
      "epoch": 0.000325726318359375,
      "model_forward_time": 0.11484217643737793,
      "step": 53367
    },
    {
      "epoch": 0.000325726318359375,
      "step": 53367,
      "training_step_time": 0.39203953742980957
    },
    {
      "epoch": 0.000325732421875,
      "model_forward_time": 0.11538457870483398,
      "step": 53368
    },
    {
      "epoch": 0.000325732421875,
      "step": 53368,
      "training_step_time": 0.4186384677886963
    },
    {
      "epoch": 0.000325738525390625,
      "model_forward_time": 0.11467885971069336,
      "step": 53369
    },
    {
      "epoch": 0.000325738525390625,
      "step": 53369,
      "training_step_time": 0.3894338607788086
    },
    {
      "epoch": 0.00032574462890625,
      "grad_norm": 0.10300110280513763,
      "learning_rate": 3.301255352572946e-06,
      "loss": 0.0354,
      "step": 53370
    },
    {
      "epoch": 0.00032574462890625,
      "model_forward_time": 0.11553287506103516,
      "step": 53370
    },
    {
      "epoch": 0.00032574462890625,
      "step": 53370,
      "training_step_time": 0.40317797660827637
    },
    {
      "epoch": 0.000325750732421875,
      "model_forward_time": 0.11511111259460449,
      "step": 53371
    },
    {
      "epoch": 0.000325750732421875,
      "step": 53371,
      "training_step_time": 0.5031490325927734
    },
    {
      "epoch": 0.0003257568359375,
      "model_forward_time": 0.11472654342651367,
      "step": 53372
    },
    {
      "epoch": 0.0003257568359375,
      "step": 53372,
      "training_step_time": 0.4127042293548584
    },
    {
      "epoch": 0.000325762939453125,
      "model_forward_time": 0.1147909164428711,
      "step": 53373
    },
    {
      "epoch": 0.000325762939453125,
      "step": 53373,
      "training_step_time": 0.4149355888366699
    },
    {
      "epoch": 0.00032576904296875,
      "model_forward_time": 0.11472845077514648,
      "step": 53374
    },
    {
      "epoch": 0.00032576904296875,
      "step": 53374,
      "training_step_time": 0.3921620845794678
    },
    {
      "epoch": 0.000325775146484375,
      "model_forward_time": 0.11539888381958008,
      "step": 53375
    },
    {
      "epoch": 0.000325775146484375,
      "step": 53375,
      "training_step_time": 0.40895700454711914
    },
    {
      "epoch": 0.00032578125,
      "model_forward_time": 0.1143944263458252,
      "step": 53376
    },
    {
      "epoch": 0.00032578125,
      "step": 53376,
      "training_step_time": 0.49768519401550293
    },
    {
      "epoch": 0.000325787353515625,
      "model_forward_time": 0.1148526668548584,
      "step": 53377
    },
    {
      "epoch": 0.000325787353515625,
      "step": 53377,
      "training_step_time": 0.4997842311859131
    },
    {
      "epoch": 0.00032579345703125,
      "model_forward_time": 0.11454486846923828,
      "step": 53378
    },
    {
      "epoch": 0.00032579345703125,
      "step": 53378,
      "training_step_time": 0.4604759216308594
    },
    {
      "epoch": 0.000325799560546875,
      "model_forward_time": 0.1143345832824707,
      "step": 53379
    },
    {
      "epoch": 0.000325799560546875,
      "step": 53379,
      "training_step_time": 0.3908407688140869
    },
    {
      "epoch": 0.0003258056640625,
      "grad_norm": 0.09801886975765228,
      "learning_rate": 3.2914149685611073e-06,
      "loss": 0.0345,
      "step": 53380
    },
    {
      "epoch": 0.0003258056640625,
      "model_forward_time": 0.11502671241760254,
      "step": 53380
    },
    {
      "epoch": 0.0003258056640625,
      "step": 53380,
      "training_step_time": 0.3981351852416992
    },
    {
      "epoch": 0.000325811767578125,
      "model_forward_time": 0.11488485336303711,
      "step": 53381
    },
    {
      "epoch": 0.000325811767578125,
      "step": 53381,
      "training_step_time": 0.39019179344177246
    },
    {
      "epoch": 0.00032581787109375,
      "model_forward_time": 0.11472225189208984,
      "step": 53382
    },
    {
      "epoch": 0.00032581787109375,
      "step": 53382,
      "training_step_time": 0.49048829078674316
    },
    {
      "epoch": 0.000325823974609375,
      "model_forward_time": 0.11449432373046875,
      "step": 53383
    },
    {
      "epoch": 0.000325823974609375,
      "step": 53383,
      "training_step_time": 0.39441418647766113
    },
    {
      "epoch": 0.000325830078125,
      "model_forward_time": 0.11499190330505371,
      "step": 53384
    },
    {
      "epoch": 0.000325830078125,
      "step": 53384,
      "training_step_time": 0.39244747161865234
    },
    {
      "epoch": 0.000325836181640625,
      "model_forward_time": 0.11516213417053223,
      "step": 53385
    },
    {
      "epoch": 0.000325836181640625,
      "step": 53385,
      "training_step_time": 0.4463322162628174
    },
    {
      "epoch": 0.00032584228515625,
      "model_forward_time": 0.11519312858581543,
      "step": 53386
    },
    {
      "epoch": 0.00032584228515625,
      "step": 53386,
      "training_step_time": 0.39859724044799805
    },
    {
      "epoch": 0.000325848388671875,
      "model_forward_time": 0.1153419017791748,
      "step": 53387
    },
    {
      "epoch": 0.000325848388671875,
      "step": 53387,
      "training_step_time": 0.4840257167816162
    },
    {
      "epoch": 0.0003258544921875,
      "model_forward_time": 0.11465573310852051,
      "step": 53388
    },
    {
      "epoch": 0.0003258544921875,
      "step": 53388,
      "training_step_time": 0.39017748832702637
    },
    {
      "epoch": 0.000325860595703125,
      "model_forward_time": 0.11533784866333008,
      "step": 53389
    },
    {
      "epoch": 0.000325860595703125,
      "step": 53389,
      "training_step_time": 0.39349937438964844
    },
    {
      "epoch": 0.00032586669921875,
      "grad_norm": 0.07699429988861084,
      "learning_rate": 3.2815887733841365e-06,
      "loss": 0.0365,
      "step": 53390
    },
    {
      "epoch": 0.00032586669921875,
      "model_forward_time": 0.1150350570678711,
      "step": 53390
    },
    {
      "epoch": 0.00032586669921875,
      "step": 53390,
      "training_step_time": 0.41118955612182617
    },
    {
      "epoch": 0.000325872802734375,
      "model_forward_time": 0.11470651626586914,
      "step": 53391
    },
    {
      "epoch": 0.000325872802734375,
      "step": 53391,
      "training_step_time": 0.41605114936828613
    },
    {
      "epoch": 0.00032587890625,
      "model_forward_time": 0.11510324478149414,
      "step": 53392
    },
    {
      "epoch": 0.00032587890625,
      "step": 53392,
      "training_step_time": 0.4138510227203369
    },
    {
      "epoch": 0.000325885009765625,
      "model_forward_time": 0.1160435676574707,
      "step": 53393
    },
    {
      "epoch": 0.000325885009765625,
      "step": 53393,
      "training_step_time": 0.39463233947753906
    },
    {
      "epoch": 0.00032589111328125,
      "model_forward_time": 0.11475062370300293,
      "step": 53394
    },
    {
      "epoch": 0.00032589111328125,
      "step": 53394,
      "training_step_time": 0.3956131935119629
    },
    {
      "epoch": 0.000325897216796875,
      "model_forward_time": 0.11510157585144043,
      "step": 53395
    },
    {
      "epoch": 0.000325897216796875,
      "step": 53395,
      "training_step_time": 0.45098233222961426
    },
    {
      "epoch": 0.0003259033203125,
      "model_forward_time": 0.11496710777282715,
      "step": 53396
    },
    {
      "epoch": 0.0003259033203125,
      "step": 53396,
      "training_step_time": 0.41991758346557617
    },
    {
      "epoch": 0.000325909423828125,
      "model_forward_time": 0.11483645439147949,
      "step": 53397
    },
    {
      "epoch": 0.000325909423828125,
      "step": 53397,
      "training_step_time": 0.48725461959838867
    },
    {
      "epoch": 0.00032591552734375,
      "model_forward_time": 0.11527705192565918,
      "step": 53398
    },
    {
      "epoch": 0.00032591552734375,
      "step": 53398,
      "training_step_time": 0.388228178024292
    },
    {
      "epoch": 0.000325921630859375,
      "model_forward_time": 0.1145334243774414,
      "step": 53399
    },
    {
      "epoch": 0.000325921630859375,
      "step": 53399,
      "training_step_time": 0.3905363082885742
    },
    {
      "epoch": 0.000325927734375,
      "grad_norm": 0.08452556282281876,
      "learning_rate": 3.271776770026963e-06,
      "loss": 0.0368,
      "step": 53400
    },
    {
      "epoch": 0.000325927734375,
      "model_forward_time": 0.11493897438049316,
      "step": 53400
    },
    {
      "epoch": 0.000325927734375,
      "step": 53400,
      "training_step_time": 0.4158341884613037
    },
    {
      "epoch": 0.000325933837890625,
      "model_forward_time": 0.1144723892211914,
      "step": 53401
    },
    {
      "epoch": 0.000325933837890625,
      "step": 53401,
      "training_step_time": 0.44837522506713867
    },
    {
      "epoch": 0.00032593994140625,
      "model_forward_time": 0.11496305465698242,
      "step": 53402
    },
    {
      "epoch": 0.00032593994140625,
      "step": 53402,
      "training_step_time": 0.49103307723999023
    },
    {
      "epoch": 0.000325946044921875,
      "model_forward_time": 0.11467456817626953,
      "step": 53403
    },
    {
      "epoch": 0.000325946044921875,
      "step": 53403,
      "training_step_time": 0.39075469970703125
    },
    {
      "epoch": 0.0003259521484375,
      "model_forward_time": 0.1156008243560791,
      "step": 53404
    },
    {
      "epoch": 0.0003259521484375,
      "step": 53404,
      "training_step_time": 0.4138641357421875
    },
    {
      "epoch": 0.000325958251953125,
      "model_forward_time": 0.11473941802978516,
      "step": 53405
    },
    {
      "epoch": 0.000325958251953125,
      "step": 53405,
      "training_step_time": 0.4596364498138428
    },
    {
      "epoch": 0.00032596435546875,
      "model_forward_time": 0.11492586135864258,
      "step": 53406
    },
    {
      "epoch": 0.00032596435546875,
      "step": 53406,
      "training_step_time": 0.4783935546875
    },
    {
      "epoch": 0.000325970458984375,
      "model_forward_time": 0.11466526985168457,
      "step": 53407
    },
    {
      "epoch": 0.000325970458984375,
      "step": 53407,
      "training_step_time": 0.39418506622314453
    },
    {
      "epoch": 0.0003259765625,
      "model_forward_time": 0.11519980430603027,
      "step": 53408
    },
    {
      "epoch": 0.0003259765625,
      "step": 53408,
      "training_step_time": 0.3893008232116699
    },
    {
      "epoch": 0.000325982666015625,
      "model_forward_time": 0.11560225486755371,
      "step": 53409
    },
    {
      "epoch": 0.000325982666015625,
      "step": 53409,
      "training_step_time": 0.4542405605316162
    },
    {
      "epoch": 0.00032598876953125,
      "grad_norm": 0.09328333288431168,
      "learning_rate": 3.2619789614702135e-06,
      "loss": 0.0323,
      "step": 53410
    },
    {
      "epoch": 0.00032598876953125,
      "model_forward_time": 0.11456632614135742,
      "step": 53410
    },
    {
      "epoch": 0.00032598876953125,
      "step": 53410,
      "training_step_time": 0.39534997940063477
    },
    {
      "epoch": 0.000325994873046875,
      "model_forward_time": 0.11497163772583008,
      "step": 53411
    },
    {
      "epoch": 0.000325994873046875,
      "step": 53411,
      "training_step_time": 0.41532158851623535
    },
    {
      "epoch": 0.0003260009765625,
      "model_forward_time": 0.11466574668884277,
      "step": 53412
    },
    {
      "epoch": 0.0003260009765625,
      "step": 53412,
      "training_step_time": 0.4004671573638916
    },
    {
      "epoch": 0.000326007080078125,
      "model_forward_time": 0.11525940895080566,
      "step": 53413
    },
    {
      "epoch": 0.000326007080078125,
      "step": 53413,
      "training_step_time": 0.3966801166534424
    },
    {
      "epoch": 0.00032601318359375,
      "model_forward_time": 0.11542129516601562,
      "step": 53414
    },
    {
      "epoch": 0.00032601318359375,
      "step": 53414,
      "training_step_time": 0.3942685127258301
    },
    {
      "epoch": 0.000326019287109375,
      "model_forward_time": 0.11448931694030762,
      "step": 53415
    },
    {
      "epoch": 0.000326019287109375,
      "step": 53415,
      "training_step_time": 0.4108285903930664
    },
    {
      "epoch": 0.000326025390625,
      "model_forward_time": 0.11563944816589355,
      "step": 53416
    },
    {
      "epoch": 0.000326025390625,
      "step": 53416,
      "training_step_time": 0.42166972160339355
    },
    {
      "epoch": 0.000326031494140625,
      "model_forward_time": 0.11493325233459473,
      "step": 53417
    },
    {
      "epoch": 0.000326031494140625,
      "step": 53417,
      "training_step_time": 0.4473142623901367
    },
    {
      "epoch": 0.00032603759765625,
      "model_forward_time": 0.11536431312561035,
      "step": 53418
    },
    {
      "epoch": 0.00032603759765625,
      "step": 53418,
      "training_step_time": 0.4417886734008789
    },
    {
      "epoch": 0.000326043701171875,
      "model_forward_time": 0.11522388458251953,
      "step": 53419
    },
    {
      "epoch": 0.000326043701171875,
      "step": 53419,
      "training_step_time": 0.40226054191589355
    },
    {
      "epoch": 0.0003260498046875,
      "grad_norm": 0.0979536846280098,
      "learning_rate": 3.2521953506902237e-06,
      "loss": 0.0371,
      "step": 53420
    },
    {
      "epoch": 0.0003260498046875,
      "model_forward_time": 0.11507868766784668,
      "step": 53420
    },
    {
      "epoch": 0.0003260498046875,
      "step": 53420,
      "training_step_time": 0.4259145259857178
    },
    {
      "epoch": 0.000326055908203125,
      "model_forward_time": 0.1160438060760498,
      "step": 53421
    },
    {
      "epoch": 0.000326055908203125,
      "step": 53421,
      "training_step_time": 0.4134960174560547
    },
    {
      "epoch": 0.00032606201171875,
      "model_forward_time": 0.11488223075866699,
      "step": 53422
    },
    {
      "epoch": 0.00032606201171875,
      "step": 53422,
      "training_step_time": 0.39632678031921387
    },
    {
      "epoch": 0.000326068115234375,
      "model_forward_time": 0.11571955680847168,
      "step": 53423
    },
    {
      "epoch": 0.000326068115234375,
      "step": 53423,
      "training_step_time": 0.39963698387145996
    },
    {
      "epoch": 0.00032607421875,
      "model_forward_time": 0.11507129669189453,
      "step": 53424
    },
    {
      "epoch": 0.00032607421875,
      "step": 53424,
      "training_step_time": 0.48894619941711426
    },
    {
      "epoch": 0.000326080322265625,
      "model_forward_time": 0.1152811050415039,
      "step": 53425
    },
    {
      "epoch": 0.000326080322265625,
      "step": 53425,
      "training_step_time": 0.48570966720581055
    },
    {
      "epoch": 0.00032608642578125,
      "model_forward_time": 0.11591434478759766,
      "step": 53426
    },
    {
      "epoch": 0.00032608642578125,
      "step": 53426,
      "training_step_time": 0.5706534385681152
    },
    {
      "epoch": 0.000326092529296875,
      "model_forward_time": 0.11661958694458008,
      "step": 53427
    },
    {
      "epoch": 0.000326092529296875,
      "step": 53427,
      "training_step_time": 0.5589334964752197
    },
    {
      "epoch": 0.0003260986328125,
      "model_forward_time": 0.11664175987243652,
      "step": 53428
    },
    {
      "epoch": 0.0003260986328125,
      "step": 53428,
      "training_step_time": 0.6319477558135986
    },
    {
      "epoch": 0.000326104736328125,
      "model_forward_time": 0.11854052543640137,
      "step": 53429
    },
    {
      "epoch": 0.000326104736328125,
      "step": 53429,
      "training_step_time": 0.7476816177368164
    },
    {
      "epoch": 0.00032611083984375,
      "grad_norm": 0.11097162216901779,
      "learning_rate": 3.2424259406589664e-06,
      "loss": 0.0346,
      "step": 53430
    },
    {
      "epoch": 0.00032611083984375,
      "model_forward_time": 0.11957311630249023,
      "step": 53430
    },
    {
      "epoch": 0.00032611083984375,
      "step": 53430,
      "training_step_time": 0.7207365036010742
    },
    {
      "epoch": 0.000326116943359375,
      "model_forward_time": 0.11768054962158203,
      "step": 53431
    },
    {
      "epoch": 0.000326116943359375,
      "step": 53431,
      "training_step_time": 0.6742539405822754
    },
    {
      "epoch": 0.000326123046875,
      "model_forward_time": 0.11805462837219238,
      "step": 53432
    },
    {
      "epoch": 0.000326123046875,
      "step": 53432,
      "training_step_time": 0.6452577114105225
    },
    {
      "epoch": 0.000326129150390625,
      "model_forward_time": 0.1167757511138916,
      "step": 53433
    },
    {
      "epoch": 0.000326129150390625,
      "step": 53433,
      "training_step_time": 0.6997826099395752
    },
    {
      "epoch": 0.00032613525390625,
      "model_forward_time": 0.13346600532531738,
      "step": 53434
    },
    {
      "epoch": 0.00032613525390625,
      "step": 53434,
      "training_step_time": 0.6213219165802002
    },
    {
      "epoch": 0.000326141357421875,
      "model_forward_time": 0.12056946754455566,
      "step": 53435
    },
    {
      "epoch": 0.000326141357421875,
      "step": 53435,
      "training_step_time": 0.6763393878936768
    },
    {
      "epoch": 0.0003261474609375,
      "model_forward_time": 0.11818051338195801,
      "step": 53436
    },
    {
      "epoch": 0.0003261474609375,
      "step": 53436,
      "training_step_time": 0.6606073379516602
    },
    {
      "epoch": 0.000326153564453125,
      "model_forward_time": 0.12466859817504883,
      "step": 53437
    },
    {
      "epoch": 0.000326153564453125,
      "step": 53437,
      "training_step_time": 0.6706933975219727
    },
    {
      "epoch": 0.00032615966796875,
      "model_forward_time": 0.12983131408691406,
      "step": 53438
    },
    {
      "epoch": 0.00032615966796875,
      "step": 53438,
      "training_step_time": 0.6895711421966553
    },
    {
      "epoch": 0.000326165771484375,
      "model_forward_time": 0.11807417869567871,
      "step": 53439
    },
    {
      "epoch": 0.000326165771484375,
      "step": 53439,
      "training_step_time": 0.7182705402374268
    },
    {
      "epoch": 0.000326171875,
      "grad_norm": 0.09144371747970581,
      "learning_rate": 3.2326707343441566e-06,
      "loss": 0.0335,
      "step": 53440
    },
    {
      "epoch": 0.000326171875,
      "model_forward_time": 0.12149691581726074,
      "step": 53440
    },
    {
      "epoch": 0.000326171875,
      "step": 53440,
      "training_step_time": 0.6542882919311523
    },
    {
      "epoch": 0.000326177978515625,
      "model_forward_time": 0.12686800956726074,
      "step": 53441
    },
    {
      "epoch": 0.000326177978515625,
      "step": 53441,
      "training_step_time": 0.7795584201812744
    },
    {
      "epoch": 0.00032618408203125,
      "model_forward_time": 0.11783170700073242,
      "step": 53442
    },
    {
      "epoch": 0.00032618408203125,
      "step": 53442,
      "training_step_time": 0.6549339294433594
    },
    {
      "epoch": 0.000326190185546875,
      "model_forward_time": 0.12065362930297852,
      "step": 53443
    },
    {
      "epoch": 0.000326190185546875,
      "step": 53443,
      "training_step_time": 0.6273446083068848
    },
    {
      "epoch": 0.0003261962890625,
      "model_forward_time": 0.12045025825500488,
      "step": 53444
    },
    {
      "epoch": 0.0003261962890625,
      "step": 53444,
      "training_step_time": 0.7240078449249268
    },
    {
      "epoch": 0.000326202392578125,
      "model_forward_time": 0.13099122047424316,
      "step": 53445
    },
    {
      "epoch": 0.000326202392578125,
      "step": 53445,
      "training_step_time": 0.6941542625427246
    },
    {
      "epoch": 0.00032620849609375,
      "model_forward_time": 0.11766433715820312,
      "step": 53446
    },
    {
      "epoch": 0.00032620849609375,
      "step": 53446,
      "training_step_time": 0.662499189376831
    },
    {
      "epoch": 0.000326214599609375,
      "model_forward_time": 0.11861920356750488,
      "step": 53447
    },
    {
      "epoch": 0.000326214599609375,
      "step": 53447,
      "training_step_time": 0.6827578544616699
    },
    {
      "epoch": 0.000326220703125,
      "model_forward_time": 0.12318968772888184,
      "step": 53448
    },
    {
      "epoch": 0.000326220703125,
      "step": 53448,
      "training_step_time": 0.7364816665649414
    },
    {
      "epoch": 0.000326226806640625,
      "model_forward_time": 0.11719799041748047,
      "step": 53449
    },
    {
      "epoch": 0.000326226806640625,
      "step": 53449,
      "training_step_time": 0.6985938549041748
    },
    {
      "epoch": 0.00032623291015625,
      "grad_norm": 0.10730592161417007,
      "learning_rate": 3.2229297347091514e-06,
      "loss": 0.0399,
      "step": 53450
    },
    {
      "epoch": 0.00032623291015625,
      "model_forward_time": 0.11865067481994629,
      "step": 53450
    },
    {
      "epoch": 0.00032623291015625,
      "step": 53450,
      "training_step_time": 0.548551082611084
    },
    {
      "epoch": 0.000326239013671875,
      "model_forward_time": 0.1215212345123291,
      "step": 53451
    },
    {
      "epoch": 0.000326239013671875,
      "step": 53451,
      "training_step_time": 0.7118425369262695
    },
    {
      "epoch": 0.0003262451171875,
      "model_forward_time": 0.11893749237060547,
      "step": 53452
    },
    {
      "epoch": 0.0003262451171875,
      "step": 53452,
      "training_step_time": 0.6396219730377197
    },
    {
      "epoch": 0.000326251220703125,
      "model_forward_time": 0.1168820858001709,
      "step": 53453
    },
    {
      "epoch": 0.000326251220703125,
      "step": 53453,
      "training_step_time": 0.7690372467041016
    },
    {
      "epoch": 0.00032625732421875,
      "model_forward_time": 0.11997675895690918,
      "step": 53454
    },
    {
      "epoch": 0.00032625732421875,
      "step": 53454,
      "training_step_time": 0.7626721858978271
    },
    {
      "epoch": 0.000326263427734375,
      "model_forward_time": 0.1174929141998291,
      "step": 53455
    },
    {
      "epoch": 0.000326263427734375,
      "step": 53455,
      "training_step_time": 0.6664316654205322
    },
    {
      "epoch": 0.00032626953125,
      "model_forward_time": 0.11635279655456543,
      "step": 53456
    },
    {
      "epoch": 0.00032626953125,
      "step": 53456,
      "training_step_time": 0.6340701580047607
    },
    {
      "epoch": 0.000326275634765625,
      "model_forward_time": 0.1154170036315918,
      "step": 53457
    },
    {
      "epoch": 0.000326275634765625,
      "step": 53457,
      "training_step_time": 0.7009463310241699
    },
    {
      "epoch": 0.00032628173828125,
      "model_forward_time": 0.12286996841430664,
      "step": 53458
    },
    {
      "epoch": 0.00032628173828125,
      "step": 53458,
      "training_step_time": 0.7027828693389893
    },
    {
      "epoch": 0.000326287841796875,
      "model_forward_time": 0.12074565887451172,
      "step": 53459
    },
    {
      "epoch": 0.000326287841796875,
      "step": 53459,
      "training_step_time": 0.6905286312103271
    },
    {
      "epoch": 0.0003262939453125,
      "grad_norm": 0.13487808406352997,
      "learning_rate": 3.213202944713023e-06,
      "loss": 0.0461,
      "step": 53460
    },
    {
      "epoch": 0.0003262939453125,
      "model_forward_time": 0.12321782112121582,
      "step": 53460
    },
    {
      "epoch": 0.0003262939453125,
      "step": 53460,
      "training_step_time": 0.7327220439910889
    },
    {
      "epoch": 0.000326300048828125,
      "model_forward_time": 0.11814641952514648,
      "step": 53461
    },
    {
      "epoch": 0.000326300048828125,
      "step": 53461,
      "training_step_time": 0.676483154296875
    },
    {
      "epoch": 0.00032630615234375,
      "model_forward_time": 0.12400269508361816,
      "step": 53462
    },
    {
      "epoch": 0.00032630615234375,
      "step": 53462,
      "training_step_time": 0.7203731536865234
    },
    {
      "epoch": 0.000326312255859375,
      "model_forward_time": 0.11696267127990723,
      "step": 53463
    },
    {
      "epoch": 0.000326312255859375,
      "step": 53463,
      "training_step_time": 0.7002029418945312
    },
    {
      "epoch": 0.000326318359375,
      "model_forward_time": 0.11724090576171875,
      "step": 53464
    },
    {
      "epoch": 0.000326318359375,
      "step": 53464,
      "training_step_time": 0.7462050914764404
    },
    {
      "epoch": 0.000326324462890625,
      "model_forward_time": 0.11642599105834961,
      "step": 53465
    },
    {
      "epoch": 0.000326324462890625,
      "step": 53465,
      "training_step_time": 0.6556310653686523
    },
    {
      "epoch": 0.00032633056640625,
      "model_forward_time": 0.1156313419342041,
      "step": 53466
    },
    {
      "epoch": 0.00032633056640625,
      "step": 53466,
      "training_step_time": 0.6594667434692383
    },
    {
      "epoch": 0.000326336669921875,
      "model_forward_time": 0.12563061714172363,
      "step": 53467
    },
    {
      "epoch": 0.000326336669921875,
      "step": 53467,
      "training_step_time": 0.6739084720611572
    },
    {
      "epoch": 0.0003263427734375,
      "model_forward_time": 0.11846041679382324,
      "step": 53468
    },
    {
      "epoch": 0.0003263427734375,
      "step": 53468,
      "training_step_time": 0.7392330169677734
    },
    {
      "epoch": 0.000326348876953125,
      "model_forward_time": 0.12112665176391602,
      "step": 53469
    },
    {
      "epoch": 0.000326348876953125,
      "step": 53469,
      "training_step_time": 0.6949372291564941
    },
    {
      "epoch": 0.00032635498046875,
      "grad_norm": 0.08697409182786942,
      "learning_rate": 3.203490367310502e-06,
      "loss": 0.0344,
      "step": 53470
    },
    {
      "epoch": 0.00032635498046875,
      "model_forward_time": 0.12025046348571777,
      "step": 53470
    },
    {
      "epoch": 0.00032635498046875,
      "step": 53470,
      "training_step_time": 0.6883268356323242
    },
    {
      "epoch": 0.000326361083984375,
      "model_forward_time": 0.1234903335571289,
      "step": 53471
    },
    {
      "epoch": 0.000326361083984375,
      "step": 53471,
      "training_step_time": 0.6292374134063721
    },
    {
      "epoch": 0.0003263671875,
      "model_forward_time": 0.1192622184753418,
      "step": 53472
    },
    {
      "epoch": 0.0003263671875,
      "step": 53472,
      "training_step_time": 0.6686444282531738
    },
    {
      "epoch": 0.000326373291015625,
      "model_forward_time": 0.11869668960571289,
      "step": 53473
    },
    {
      "epoch": 0.000326373291015625,
      "step": 53473,
      "training_step_time": 0.6888449192047119
    },
    {
      "epoch": 0.00032637939453125,
      "model_forward_time": 0.11959075927734375,
      "step": 53474
    },
    {
      "epoch": 0.00032637939453125,
      "step": 53474,
      "training_step_time": 0.6319615840911865
    },
    {
      "epoch": 0.000326385498046875,
      "model_forward_time": 0.12205076217651367,
      "step": 53475
    },
    {
      "epoch": 0.000326385498046875,
      "step": 53475,
      "training_step_time": 0.7548542022705078
    },
    {
      "epoch": 0.0003263916015625,
      "model_forward_time": 0.12317872047424316,
      "step": 53476
    },
    {
      "epoch": 0.0003263916015625,
      "step": 53476,
      "training_step_time": 0.7318477630615234
    },
    {
      "epoch": 0.000326397705078125,
      "model_forward_time": 0.12163472175598145,
      "step": 53477
    },
    {
      "epoch": 0.000326397705078125,
      "step": 53477,
      "training_step_time": 0.6365866661071777
    },
    {
      "epoch": 0.00032640380859375,
      "model_forward_time": 0.1159830093383789,
      "step": 53478
    },
    {
      "epoch": 0.00032640380859375,
      "step": 53478,
      "training_step_time": 0.5439379215240479
    },
    {
      "epoch": 0.000326409912109375,
      "model_forward_time": 0.12143182754516602,
      "step": 53479
    },
    {
      "epoch": 0.000326409912109375,
      "step": 53479,
      "training_step_time": 0.7434403896331787
    },
    {
      "epoch": 0.000326416015625,
      "grad_norm": 0.09700743854045868,
      "learning_rate": 3.193792005452018e-06,
      "loss": 0.0389,
      "step": 53480
    },
    {
      "epoch": 0.000326416015625,
      "model_forward_time": 0.11844730377197266,
      "step": 53480
    },
    {
      "epoch": 0.000326416015625,
      "step": 53480,
      "training_step_time": 0.7586605548858643
    },
    {
      "epoch": 0.000326422119140625,
      "model_forward_time": 0.1239771842956543,
      "step": 53481
    },
    {
      "epoch": 0.000326422119140625,
      "step": 53481,
      "training_step_time": 0.7383294105529785
    },
    {
      "epoch": 0.00032642822265625,
      "model_forward_time": 0.11530303955078125,
      "step": 53482
    },
    {
      "epoch": 0.00032642822265625,
      "step": 53482,
      "training_step_time": 0.6452102661132812
    },
    {
      "epoch": 0.000326434326171875,
      "model_forward_time": 0.11781191825866699,
      "step": 53483
    },
    {
      "epoch": 0.000326434326171875,
      "step": 53483,
      "training_step_time": 0.6559109687805176
    },
    {
      "epoch": 0.0003264404296875,
      "model_forward_time": 0.11679220199584961,
      "step": 53484
    },
    {
      "epoch": 0.0003264404296875,
      "step": 53484,
      "training_step_time": 0.6583199501037598
    },
    {
      "epoch": 0.000326446533203125,
      "model_forward_time": 0.11655330657958984,
      "step": 53485
    },
    {
      "epoch": 0.000326446533203125,
      "step": 53485,
      "training_step_time": 0.6612071990966797
    },
    {
      "epoch": 0.00032645263671875,
      "model_forward_time": 0.11910438537597656,
      "step": 53486
    },
    {
      "epoch": 0.00032645263671875,
      "step": 53486,
      "training_step_time": 0.7231945991516113
    },
    {
      "epoch": 0.000326458740234375,
      "model_forward_time": 0.1199040412902832,
      "step": 53487
    },
    {
      "epoch": 0.000326458740234375,
      "step": 53487,
      "training_step_time": 0.6406090259552002
    },
    {
      "epoch": 0.00032646484375,
      "model_forward_time": 0.1186668872833252,
      "step": 53488
    },
    {
      "epoch": 0.00032646484375,
      "step": 53488,
      "training_step_time": 0.6722848415374756
    },
    {
      "epoch": 0.000326470947265625,
      "model_forward_time": 0.12274980545043945,
      "step": 53489
    },
    {
      "epoch": 0.000326470947265625,
      "step": 53489,
      "training_step_time": 0.627629280090332
    },
    {
      "epoch": 0.00032647705078125,
      "grad_norm": 0.10909593850374222,
      "learning_rate": 3.1841078620836683e-06,
      "loss": 0.0392,
      "step": 53490
    },
    {
      "epoch": 0.00032647705078125,
      "model_forward_time": 0.11809277534484863,
      "step": 53490
    },
    {
      "epoch": 0.00032647705078125,
      "step": 53490,
      "training_step_time": 0.6004457473754883
    },
    {
      "epoch": 0.000326483154296875,
      "model_forward_time": 0.1201622486114502,
      "step": 53491
    },
    {
      "epoch": 0.000326483154296875,
      "step": 53491,
      "training_step_time": 0.7515559196472168
    },
    {
      "epoch": 0.0003264892578125,
      "model_forward_time": 0.11757874488830566,
      "step": 53492
    },
    {
      "epoch": 0.0003264892578125,
      "step": 53492,
      "training_step_time": 0.6116776466369629
    },
    {
      "epoch": 0.000326495361328125,
      "model_forward_time": 0.11977076530456543,
      "step": 53493
    },
    {
      "epoch": 0.000326495361328125,
      "step": 53493,
      "training_step_time": 0.6409294605255127
    },
    {
      "epoch": 0.00032650146484375,
      "model_forward_time": 0.11613154411315918,
      "step": 53494
    },
    {
      "epoch": 0.00032650146484375,
      "step": 53494,
      "training_step_time": 0.6332833766937256
    },
    {
      "epoch": 0.000326507568359375,
      "model_forward_time": 0.11864805221557617,
      "step": 53495
    },
    {
      "epoch": 0.000326507568359375,
      "step": 53495,
      "training_step_time": 0.6356668472290039
    },
    {
      "epoch": 0.000326513671875,
      "model_forward_time": 0.11609792709350586,
      "step": 53496
    },
    {
      "epoch": 0.000326513671875,
      "step": 53496,
      "training_step_time": 0.5256035327911377
    },
    {
      "epoch": 0.000326519775390625,
      "model_forward_time": 0.12320446968078613,
      "step": 53497
    },
    {
      "epoch": 0.000326519775390625,
      "step": 53497,
      "training_step_time": 0.45044612884521484
    },
    {
      "epoch": 0.00032652587890625,
      "model_forward_time": 0.11726641654968262,
      "step": 53498
    },
    {
      "epoch": 0.00032652587890625,
      "step": 53498,
      "training_step_time": 0.3989884853363037
    },
    {
      "epoch": 0.000326531982421875,
      "model_forward_time": 0.11743521690368652,
      "step": 53499
    },
    {
      "epoch": 0.000326531982421875,
      "step": 53499,
      "training_step_time": 0.5485267639160156
    },
    {
      "epoch": 0.0003265380859375,
      "grad_norm": 0.08807535469532013,
      "learning_rate": 3.1744379401472677e-06,
      "loss": 0.0364,
      "step": 53500
    },
    {
      "epoch": 0.0003265380859375,
      "model_forward_time": 0.11692523956298828,
      "step": 53500
    },
    {
      "epoch": 0.0003265380859375,
      "step": 53500,
      "training_step_time": 0.5935556888580322
    },
    {
      "epoch": 0.000326544189453125,
      "model_forward_time": 0.11559534072875977,
      "step": 53501
    },
    {
      "epoch": 0.000326544189453125,
      "step": 53501,
      "training_step_time": 0.4194002151489258
    },
    {
      "epoch": 0.00032655029296875,
      "model_forward_time": 0.11485958099365234,
      "step": 53502
    },
    {
      "epoch": 0.00032655029296875,
      "step": 53502,
      "training_step_time": 0.503892183303833
    },
    {
      "epoch": 0.000326556396484375,
      "model_forward_time": 0.11502671241760254,
      "step": 53503
    },
    {
      "epoch": 0.000326556396484375,
      "step": 53503,
      "training_step_time": 0.41492581367492676
    },
    {
      "epoch": 0.0003265625,
      "model_forward_time": 0.11480093002319336,
      "step": 53504
    },
    {
      "epoch": 0.0003265625,
      "step": 53504,
      "training_step_time": 0.3898892402648926
    },
    {
      "epoch": 0.000326568603515625,
      "model_forward_time": 0.11578702926635742,
      "step": 53505
    },
    {
      "epoch": 0.000326568603515625,
      "step": 53505,
      "training_step_time": 0.38273024559020996
    },
    {
      "epoch": 0.00032657470703125,
      "model_forward_time": 0.11551713943481445,
      "step": 53506
    },
    {
      "epoch": 0.00032657470703125,
      "step": 53506,
      "training_step_time": 0.4085421562194824
    },
    {
      "epoch": 0.000326580810546875,
      "model_forward_time": 0.11532306671142578,
      "step": 53507
    },
    {
      "epoch": 0.000326580810546875,
      "step": 53507,
      "training_step_time": 0.43013906478881836
    },
    {
      "epoch": 0.0003265869140625,
      "model_forward_time": 0.11520528793334961,
      "step": 53508
    },
    {
      "epoch": 0.0003265869140625,
      "step": 53508,
      "training_step_time": 0.40583348274230957
    },
    {
      "epoch": 0.000326593017578125,
      "model_forward_time": 0.11619305610656738,
      "step": 53509
    },
    {
      "epoch": 0.000326593017578125,
      "step": 53509,
      "training_step_time": 0.47583532333374023
    },
    {
      "epoch": 0.00032659912109375,
      "grad_norm": 0.09425590187311172,
      "learning_rate": 3.164782242580244e-06,
      "loss": 0.0419,
      "step": 53510
    },
    {
      "epoch": 0.00032659912109375,
      "model_forward_time": 0.11516118049621582,
      "step": 53510
    },
    {
      "epoch": 0.00032659912109375,
      "step": 53510,
      "training_step_time": 0.40132808685302734
    },
    {
      "epoch": 0.000326605224609375,
      "model_forward_time": 0.11662626266479492,
      "step": 53511
    },
    {
      "epoch": 0.000326605224609375,
      "step": 53511,
      "training_step_time": 0.3829348087310791
    },
    {
      "epoch": 0.000326611328125,
      "model_forward_time": 0.11584591865539551,
      "step": 53512
    },
    {
      "epoch": 0.000326611328125,
      "step": 53512,
      "training_step_time": 0.4030616283416748
    },
    {
      "epoch": 0.000326617431640625,
      "model_forward_time": 0.1154947280883789,
      "step": 53513
    },
    {
      "epoch": 0.000326617431640625,
      "step": 53513,
      "training_step_time": 0.457134485244751
    },
    {
      "epoch": 0.00032662353515625,
      "model_forward_time": 0.11509394645690918,
      "step": 53514
    },
    {
      "epoch": 0.00032662353515625,
      "step": 53514,
      "training_step_time": 0.49977540969848633
    },
    {
      "epoch": 0.000326629638671875,
      "model_forward_time": 0.11640691757202148,
      "step": 53515
    },
    {
      "epoch": 0.000326629638671875,
      "step": 53515,
      "training_step_time": 0.474625825881958
    },
    {
      "epoch": 0.0003266357421875,
      "model_forward_time": 0.11498785018920898,
      "step": 53516
    },
    {
      "epoch": 0.0003266357421875,
      "step": 53516,
      "training_step_time": 0.4287436008453369
    },
    {
      "epoch": 0.000326641845703125,
      "model_forward_time": 0.11475229263305664,
      "step": 53517
    },
    {
      "epoch": 0.000326641845703125,
      "step": 53517,
      "training_step_time": 0.4407765865325928
    },
    {
      "epoch": 0.00032664794921875,
      "model_forward_time": 0.11461281776428223,
      "step": 53518
    },
    {
      "epoch": 0.00032664794921875,
      "step": 53518,
      "training_step_time": 0.4027078151702881
    },
    {
      "epoch": 0.000326654052734375,
      "model_forward_time": 0.1149892807006836,
      "step": 53519
    },
    {
      "epoch": 0.000326654052734375,
      "step": 53519,
      "training_step_time": 0.39204955101013184
    },
    {
      "epoch": 0.00032666015625,
      "grad_norm": 0.10440850257873535,
      "learning_rate": 3.155140772315773e-06,
      "loss": 0.0381,
      "step": 53520
    },
    {
      "epoch": 0.00032666015625,
      "model_forward_time": 0.1147465705871582,
      "step": 53520
    },
    {
      "epoch": 0.00032666015625,
      "step": 53520,
      "training_step_time": 0.39691996574401855
    },
    {
      "epoch": 0.000326666259765625,
      "model_forward_time": 0.11517620086669922,
      "step": 53521
    },
    {
      "epoch": 0.000326666259765625,
      "step": 53521,
      "training_step_time": 0.37877964973449707
    },
    {
      "epoch": 0.00032667236328125,
      "model_forward_time": 0.11511969566345215,
      "step": 53522
    },
    {
      "epoch": 0.00032667236328125,
      "step": 53522,
      "training_step_time": 0.3870124816894531
    },
    {
      "epoch": 0.000326678466796875,
      "model_forward_time": 0.11528921127319336,
      "step": 53523
    },
    {
      "epoch": 0.000326678466796875,
      "step": 53523,
      "training_step_time": 0.3777158260345459
    },
    {
      "epoch": 0.0003266845703125,
      "model_forward_time": 0.11512613296508789,
      "step": 53524
    },
    {
      "epoch": 0.0003266845703125,
      "step": 53524,
      "training_step_time": 0.4424002170562744
    },
    {
      "epoch": 0.000326690673828125,
      "model_forward_time": 0.11534714698791504,
      "step": 53525
    },
    {
      "epoch": 0.000326690673828125,
      "step": 53525,
      "training_step_time": 0.39246129989624023
    },
    {
      "epoch": 0.00032669677734375,
      "model_forward_time": 0.1148073673248291,
      "step": 53526
    },
    {
      "epoch": 0.00032669677734375,
      "step": 53526,
      "training_step_time": 0.4092395305633545
    },
    {
      "epoch": 0.000326702880859375,
      "model_forward_time": 0.1153414249420166,
      "step": 53527
    },
    {
      "epoch": 0.000326702880859375,
      "step": 53527,
      "training_step_time": 0.3971982002258301
    },
    {
      "epoch": 0.000326708984375,
      "model_forward_time": 0.11489653587341309,
      "step": 53528
    },
    {
      "epoch": 0.000326708984375,
      "step": 53528,
      "training_step_time": 0.4720778465270996
    },
    {
      "epoch": 0.000326715087890625,
      "model_forward_time": 0.11599493026733398,
      "step": 53529
    },
    {
      "epoch": 0.000326715087890625,
      "step": 53529,
      "training_step_time": 0.4935626983642578
    },
    {
      "epoch": 0.00032672119140625,
      "grad_norm": 0.12542732059955597,
      "learning_rate": 3.1455135322826678e-06,
      "loss": 0.0403,
      "step": 53530
    },
    {
      "epoch": 0.00032672119140625,
      "model_forward_time": 0.11554288864135742,
      "step": 53530
    },
    {
      "epoch": 0.00032672119140625,
      "step": 53530,
      "training_step_time": 0.43263840675354004
    },
    {
      "epoch": 0.000326727294921875,
      "model_forward_time": 0.11438608169555664,
      "step": 53531
    },
    {
      "epoch": 0.000326727294921875,
      "step": 53531,
      "training_step_time": 0.4089653491973877
    },
    {
      "epoch": 0.0003267333984375,
      "model_forward_time": 0.11552262306213379,
      "step": 53532
    },
    {
      "epoch": 0.0003267333984375,
      "step": 53532,
      "training_step_time": 0.3993818759918213
    },
    {
      "epoch": 0.000326739501953125,
      "model_forward_time": 0.11419510841369629,
      "step": 53533
    },
    {
      "epoch": 0.000326739501953125,
      "step": 53533,
      "training_step_time": 0.3920619487762451
    },
    {
      "epoch": 0.00032674560546875,
      "model_forward_time": 0.1148843765258789,
      "step": 53534
    },
    {
      "epoch": 0.00032674560546875,
      "step": 53534,
      "training_step_time": 0.3946564197540283
    },
    {
      "epoch": 0.000326751708984375,
      "model_forward_time": 0.11533331871032715,
      "step": 53535
    },
    {
      "epoch": 0.000326751708984375,
      "step": 53535,
      "training_step_time": 0.3789806365966797
    },
    {
      "epoch": 0.0003267578125,
      "model_forward_time": 0.1151580810546875,
      "step": 53536
    },
    {
      "epoch": 0.0003267578125,
      "step": 53536,
      "training_step_time": 0.4070453643798828
    },
    {
      "epoch": 0.000326763916015625,
      "model_forward_time": 0.11519837379455566,
      "step": 53537
    },
    {
      "epoch": 0.000326763916015625,
      "step": 53537,
      "training_step_time": 0.39035868644714355
    },
    {
      "epoch": 0.00032677001953125,
      "model_forward_time": 0.11531257629394531,
      "step": 53538
    },
    {
      "epoch": 0.00032677001953125,
      "step": 53538,
      "training_step_time": 0.4236338138580322
    },
    {
      "epoch": 0.000326776123046875,
      "model_forward_time": 0.11582374572753906,
      "step": 53539
    },
    {
      "epoch": 0.000326776123046875,
      "step": 53539,
      "training_step_time": 0.3951876163482666
    },
    {
      "epoch": 0.0003267822265625,
      "grad_norm": 0.0852222591638565,
      "learning_rate": 3.1359005254054273e-06,
      "loss": 0.0364,
      "step": 53540
    },
    {
      "epoch": 0.0003267822265625,
      "model_forward_time": 0.11462593078613281,
      "step": 53540
    },
    {
      "epoch": 0.0003267822265625,
      "step": 53540,
      "training_step_time": 0.41237616539001465
    },
    {
      "epoch": 0.000326788330078125,
      "model_forward_time": 0.11497950553894043,
      "step": 53541
    },
    {
      "epoch": 0.000326788330078125,
      "step": 53541,
      "training_step_time": 0.3936913013458252
    },
    {
      "epoch": 0.00032679443359375,
      "model_forward_time": 0.11464405059814453,
      "step": 53542
    },
    {
      "epoch": 0.00032679443359375,
      "step": 53542,
      "training_step_time": 0.38916444778442383
    },
    {
      "epoch": 0.000326800537109375,
      "model_forward_time": 0.11569762229919434,
      "step": 53543
    },
    {
      "epoch": 0.000326800537109375,
      "step": 53543,
      "training_step_time": 0.42780351638793945
    },
    {
      "epoch": 0.000326806640625,
      "model_forward_time": 0.11577200889587402,
      "step": 53544
    },
    {
      "epoch": 0.000326806640625,
      "step": 53544,
      "training_step_time": 0.44901180267333984
    },
    {
      "epoch": 0.000326812744140625,
      "model_forward_time": 0.11504650115966797,
      "step": 53545
    },
    {
      "epoch": 0.000326812744140625,
      "step": 53545,
      "training_step_time": 0.43100571632385254
    },
    {
      "epoch": 0.00032681884765625,
      "model_forward_time": 0.1146993637084961,
      "step": 53546
    },
    {
      "epoch": 0.00032681884765625,
      "step": 53546,
      "training_step_time": 0.46266984939575195
    },
    {
      "epoch": 0.000326824951171875,
      "model_forward_time": 0.11460351943969727,
      "step": 53547
    },
    {
      "epoch": 0.000326824951171875,
      "step": 53547,
      "training_step_time": 0.394024133682251
    },
    {
      "epoch": 0.0003268310546875,
      "model_forward_time": 0.11549210548400879,
      "step": 53548
    },
    {
      "epoch": 0.0003268310546875,
      "step": 53548,
      "training_step_time": 0.3963139057159424
    },
    {
      "epoch": 0.000326837158203125,
      "model_forward_time": 0.11528658866882324,
      "step": 53549
    },
    {
      "epoch": 0.000326837158203125,
      "step": 53549,
      "training_step_time": 0.4054253101348877
    },
    {
      "epoch": 0.00032684326171875,
      "grad_norm": 0.08572214096784592,
      "learning_rate": 3.126301754604233e-06,
      "loss": 0.0398,
      "step": 53550
    },
    {
      "epoch": 0.00032684326171875,
      "model_forward_time": 0.11496853828430176,
      "step": 53550
    },
    {
      "epoch": 0.00032684326171875,
      "step": 53550,
      "training_step_time": 0.4046940803527832
    },
    {
      "epoch": 0.000326849365234375,
      "model_forward_time": 0.11531496047973633,
      "step": 53551
    },
    {
      "epoch": 0.000326849365234375,
      "step": 53551,
      "training_step_time": 0.39792513847351074
    },
    {
      "epoch": 0.00032685546875,
      "model_forward_time": 0.11524033546447754,
      "step": 53552
    },
    {
      "epoch": 0.00032685546875,
      "step": 53552,
      "training_step_time": 0.4566304683685303
    },
    {
      "epoch": 0.000326861572265625,
      "model_forward_time": 0.11476325988769531,
      "step": 53553
    },
    {
      "epoch": 0.000326861572265625,
      "step": 53553,
      "training_step_time": 0.496457576751709
    },
    {
      "epoch": 0.00032686767578125,
      "model_forward_time": 0.11505436897277832,
      "step": 53554
    },
    {
      "epoch": 0.00032686767578125,
      "step": 53554,
      "training_step_time": 0.39186954498291016
    },
    {
      "epoch": 0.000326873779296875,
      "model_forward_time": 0.11546564102172852,
      "step": 53555
    },
    {
      "epoch": 0.000326873779296875,
      "step": 53555,
      "training_step_time": 0.39129066467285156
    },
    {
      "epoch": 0.0003268798828125,
      "model_forward_time": 0.11471843719482422,
      "step": 53556
    },
    {
      "epoch": 0.0003268798828125,
      "step": 53556,
      "training_step_time": 0.3922712802886963
    },
    {
      "epoch": 0.000326885986328125,
      "model_forward_time": 0.11480426788330078,
      "step": 53557
    },
    {
      "epoch": 0.000326885986328125,
      "step": 53557,
      "training_step_time": 0.40316033363342285
    },
    {
      "epoch": 0.00032689208984375,
      "model_forward_time": 0.11514043807983398,
      "step": 53558
    },
    {
      "epoch": 0.00032689208984375,
      "step": 53558,
      "training_step_time": 0.4639885425567627
    },
    {
      "epoch": 0.000326898193359375,
      "model_forward_time": 0.11492371559143066,
      "step": 53559
    },
    {
      "epoch": 0.000326898193359375,
      "step": 53559,
      "training_step_time": 0.4806544780731201
    },
    {
      "epoch": 0.000326904296875,
      "grad_norm": 0.0876580998301506,
      "learning_rate": 3.1167172227949347e-06,
      "loss": 0.0309,
      "step": 53560
    },
    {
      "epoch": 0.000326904296875,
      "model_forward_time": 0.11601710319519043,
      "step": 53560
    },
    {
      "epoch": 0.000326904296875,
      "step": 53560,
      "training_step_time": 0.4154038429260254
    },
    {
      "epoch": 0.000326910400390625,
      "model_forward_time": 0.11565303802490234,
      "step": 53561
    },
    {
      "epoch": 0.000326910400390625,
      "step": 53561,
      "training_step_time": 0.3959059715270996
    },
    {
      "epoch": 0.00032691650390625,
      "model_forward_time": 0.11663627624511719,
      "step": 53562
    },
    {
      "epoch": 0.00032691650390625,
      "step": 53562,
      "training_step_time": 0.4018833637237549
    },
    {
      "epoch": 0.000326922607421875,
      "model_forward_time": 0.11513519287109375,
      "step": 53563
    },
    {
      "epoch": 0.000326922607421875,
      "step": 53563,
      "training_step_time": 0.39200425148010254
    },
    {
      "epoch": 0.0003269287109375,
      "model_forward_time": 0.11583924293518066,
      "step": 53564
    },
    {
      "epoch": 0.0003269287109375,
      "step": 53564,
      "training_step_time": 0.4005563259124756
    },
    {
      "epoch": 0.000326934814453125,
      "model_forward_time": 0.11537528038024902,
      "step": 53565
    },
    {
      "epoch": 0.000326934814453125,
      "step": 53565,
      "training_step_time": 0.41155242919921875
    },
    {
      "epoch": 0.00032694091796875,
      "model_forward_time": 0.1152353286743164,
      "step": 53566
    },
    {
      "epoch": 0.00032694091796875,
      "step": 53566,
      "training_step_time": 0.39682626724243164
    },
    {
      "epoch": 0.000326947021484375,
      "model_forward_time": 0.11529374122619629,
      "step": 53567
    },
    {
      "epoch": 0.000326947021484375,
      "step": 53567,
      "training_step_time": 0.4704437255859375
    },
    {
      "epoch": 0.000326953125,
      "model_forward_time": 0.1152045726776123,
      "step": 53568
    },
    {
      "epoch": 0.000326953125,
      "step": 53568,
      "training_step_time": 0.47329187393188477
    },
    {
      "epoch": 0.000326959228515625,
      "model_forward_time": 0.11465215682983398,
      "step": 53569
    },
    {
      "epoch": 0.000326959228515625,
      "step": 53569,
      "training_step_time": 0.46616578102111816
    },
    {
      "epoch": 0.00032696533203125,
      "grad_norm": 0.0877372995018959,
      "learning_rate": 3.107146932889071e-06,
      "loss": 0.0467,
      "step": 53570
    },
    {
      "epoch": 0.00032696533203125,
      "model_forward_time": 0.11488890647888184,
      "step": 53570
    },
    {
      "epoch": 0.00032696533203125,
      "step": 53570,
      "training_step_time": 0.390899658203125
    },
    {
      "epoch": 0.000326971435546875,
      "model_forward_time": 0.11502623558044434,
      "step": 53571
    },
    {
      "epoch": 0.000326971435546875,
      "step": 53571,
      "training_step_time": 0.42111968994140625
    },
    {
      "epoch": 0.0003269775390625,
      "model_forward_time": 0.11478447914123535,
      "step": 53572
    },
    {
      "epoch": 0.0003269775390625,
      "step": 53572,
      "training_step_time": 0.39185023307800293
    },
    {
      "epoch": 0.000326983642578125,
      "model_forward_time": 0.1149287223815918,
      "step": 53573
    },
    {
      "epoch": 0.000326983642578125,
      "step": 53573,
      "training_step_time": 0.4264206886291504
    },
    {
      "epoch": 0.00032698974609375,
      "model_forward_time": 0.11535906791687012,
      "step": 53574
    },
    {
      "epoch": 0.00032698974609375,
      "step": 53574,
      "training_step_time": 0.5011498928070068
    },
    {
      "epoch": 0.000326995849609375,
      "model_forward_time": 0.11549854278564453,
      "step": 53575
    },
    {
      "epoch": 0.000326995849609375,
      "step": 53575,
      "training_step_time": 0.42686009407043457
    },
    {
      "epoch": 0.000327001953125,
      "model_forward_time": 0.11503362655639648,
      "step": 53576
    },
    {
      "epoch": 0.000327001953125,
      "step": 53576,
      "training_step_time": 0.39508962631225586
    },
    {
      "epoch": 0.000327008056640625,
      "model_forward_time": 0.11563277244567871,
      "step": 53577
    },
    {
      "epoch": 0.000327008056640625,
      "step": 53577,
      "training_step_time": 0.3972742557525635
    },
    {
      "epoch": 0.00032701416015625,
      "model_forward_time": 0.11429405212402344,
      "step": 53578
    },
    {
      "epoch": 0.00032701416015625,
      "step": 53578,
      "training_step_time": 0.4054527282714844
    },
    {
      "epoch": 0.000327020263671875,
      "model_forward_time": 0.11520242691040039,
      "step": 53579
    },
    {
      "epoch": 0.000327020263671875,
      "step": 53579,
      "training_step_time": 0.391754150390625
    },
    {
      "epoch": 0.0003270263671875,
      "grad_norm": 0.09132910519838333,
      "learning_rate": 3.0975908877938277e-06,
      "loss": 0.0376,
      "step": 53580
    },
    {
      "epoch": 0.0003270263671875,
      "model_forward_time": 0.11504626274108887,
      "step": 53580
    },
    {
      "epoch": 0.0003270263671875,
      "step": 53580,
      "training_step_time": 0.3948225975036621
    },
    {
      "epoch": 0.000327032470703125,
      "model_forward_time": 0.11468839645385742,
      "step": 53581
    },
    {
      "epoch": 0.000327032470703125,
      "step": 53581,
      "training_step_time": 0.39204955101013184
    },
    {
      "epoch": 0.00032703857421875,
      "model_forward_time": 0.1149606704711914,
      "step": 53582
    },
    {
      "epoch": 0.00032703857421875,
      "step": 53582,
      "training_step_time": 0.8473069667816162
    },
    {
      "epoch": 0.000327044677734375,
      "model_forward_time": 0.11429190635681152,
      "step": 53583
    },
    {
      "epoch": 0.000327044677734375,
      "step": 53583,
      "training_step_time": 0.42242980003356934
    },
    {
      "epoch": 0.00032705078125,
      "model_forward_time": 0.11464810371398926,
      "step": 53584
    },
    {
      "epoch": 0.00032705078125,
      "step": 53584,
      "training_step_time": 0.387819766998291
    },
    {
      "epoch": 0.000327056884765625,
      "model_forward_time": 0.11374878883361816,
      "step": 53585
    },
    {
      "epoch": 0.000327056884765625,
      "step": 53585,
      "training_step_time": 0.4431920051574707
    },
    {
      "epoch": 0.00032706298828125,
      "model_forward_time": 0.11393523216247559,
      "step": 53586
    },
    {
      "epoch": 0.00032706298828125,
      "step": 53586,
      "training_step_time": 0.4139397144317627
    },
    {
      "epoch": 0.000327069091796875,
      "model_forward_time": 0.11432528495788574,
      "step": 53587
    },
    {
      "epoch": 0.000327069091796875,
      "step": 53587,
      "training_step_time": 0.45803022384643555
    },
    {
      "epoch": 0.0003270751953125,
      "model_forward_time": 0.1153864860534668,
      "step": 53588
    },
    {
      "epoch": 0.0003270751953125,
      "step": 53588,
      "training_step_time": 0.4956948757171631
    },
    {
      "epoch": 0.000327081298828125,
      "model_forward_time": 0.1160881519317627,
      "step": 53589
    },
    {
      "epoch": 0.000327081298828125,
      "step": 53589,
      "training_step_time": 0.41971635818481445
    },
    {
      "epoch": 0.00032708740234375,
      "grad_norm": 0.08884026110172272,
      "learning_rate": 3.0880490904120874e-06,
      "loss": 0.04,
      "step": 53590
    },
    {
      "epoch": 0.00032708740234375,
      "model_forward_time": 0.11467432975769043,
      "step": 53590
    },
    {
      "epoch": 0.00032708740234375,
      "step": 53590,
      "training_step_time": 0.3785526752471924
    },
    {
      "epoch": 0.000327093505859375,
      "model_forward_time": 0.11554265022277832,
      "step": 53591
    },
    {
      "epoch": 0.000327093505859375,
      "step": 53591,
      "training_step_time": 0.38268256187438965
    },
    {
      "epoch": 0.000327099609375,
      "model_forward_time": 0.11525201797485352,
      "step": 53592
    },
    {
      "epoch": 0.000327099609375,
      "step": 53592,
      "training_step_time": 0.39414143562316895
    },
    {
      "epoch": 0.000327105712890625,
      "model_forward_time": 0.11548185348510742,
      "step": 53593
    },
    {
      "epoch": 0.000327105712890625,
      "step": 53593,
      "training_step_time": 0.392031192779541
    },
    {
      "epoch": 0.00032711181640625,
      "model_forward_time": 0.11469650268554688,
      "step": 53594
    },
    {
      "epoch": 0.00032711181640625,
      "step": 53594,
      "training_step_time": 0.480649471282959
    },
    {
      "epoch": 0.000327117919921875,
      "model_forward_time": 0.11470222473144531,
      "step": 53595
    },
    {
      "epoch": 0.000327117919921875,
      "step": 53595,
      "training_step_time": 0.4952702522277832
    },
    {
      "epoch": 0.0003271240234375,
      "model_forward_time": 0.1147310733795166,
      "step": 53596
    },
    {
      "epoch": 0.0003271240234375,
      "step": 53596,
      "training_step_time": 0.4691352844238281
    },
    {
      "epoch": 0.000327130126953125,
      "model_forward_time": 0.11545753479003906,
      "step": 53597
    },
    {
      "epoch": 0.000327130126953125,
      "step": 53597,
      "training_step_time": 0.4845912456512451
    },
    {
      "epoch": 0.00032713623046875,
      "model_forward_time": 0.11491608619689941,
      "step": 53598
    },
    {
      "epoch": 0.00032713623046875,
      "step": 53598,
      "training_step_time": 0.43567657470703125
    },
    {
      "epoch": 0.000327142333984375,
      "model_forward_time": 0.11515259742736816,
      "step": 53599
    },
    {
      "epoch": 0.000327142333984375,
      "step": 53599,
      "training_step_time": 0.4608585834503174
    },
    {
      "epoch": 0.0003271484375,
      "grad_norm": 0.09779947996139526,
      "learning_rate": 3.078521543642399e-06,
      "loss": 0.0406,
      "step": 53600
    },
    {
      "epoch": 0.0003271484375,
      "model_forward_time": 0.11461210250854492,
      "step": 53600
    },
    {
      "epoch": 0.0003271484375,
      "step": 53600,
      "training_step_time": 0.40173768997192383
    },
    {
      "epoch": 0.000327154541015625,
      "model_forward_time": 0.11513161659240723,
      "step": 53601
    },
    {
      "epoch": 0.000327154541015625,
      "step": 53601,
      "training_step_time": 0.3735015392303467
    },
    {
      "epoch": 0.00032716064453125,
      "model_forward_time": 0.1146078109741211,
      "step": 53602
    },
    {
      "epoch": 0.00032716064453125,
      "step": 53602,
      "training_step_time": 0.43437671661376953
    },
    {
      "epoch": 0.000327166748046875,
      "model_forward_time": 0.11566853523254395,
      "step": 53603
    },
    {
      "epoch": 0.000327166748046875,
      "step": 53603,
      "training_step_time": 0.4201626777648926
    },
    {
      "epoch": 0.0003271728515625,
      "model_forward_time": 0.11532068252563477,
      "step": 53604
    },
    {
      "epoch": 0.0003271728515625,
      "step": 53604,
      "training_step_time": 0.3918275833129883
    },
    {
      "epoch": 0.000327178955078125,
      "model_forward_time": 0.11492204666137695,
      "step": 53605
    },
    {
      "epoch": 0.000327178955078125,
      "step": 53605,
      "training_step_time": 0.38845205307006836
    },
    {
      "epoch": 0.00032718505859375,
      "model_forward_time": 0.11490797996520996,
      "step": 53606
    },
    {
      "epoch": 0.00032718505859375,
      "step": 53606,
      "training_step_time": 0.39391374588012695
    },
    {
      "epoch": 0.000327191162109375,
      "model_forward_time": 0.11493396759033203,
      "step": 53607
    },
    {
      "epoch": 0.000327191162109375,
      "step": 53607,
      "training_step_time": 0.3980264663696289
    },
    {
      "epoch": 0.000327197265625,
      "model_forward_time": 0.1151437759399414,
      "step": 53608
    },
    {
      "epoch": 0.000327197265625,
      "step": 53608,
      "training_step_time": 0.38782358169555664
    },
    {
      "epoch": 0.000327203369140625,
      "model_forward_time": 0.11495447158813477,
      "step": 53609
    },
    {
      "epoch": 0.000327203369140625,
      "step": 53609,
      "training_step_time": 0.4050724506378174
    },
    {
      "epoch": 0.00032720947265625,
      "grad_norm": 0.10311092436313629,
      "learning_rate": 3.0690082503789742e-06,
      "loss": 0.0338,
      "step": 53610
    },
    {
      "epoch": 0.00032720947265625,
      "model_forward_time": 0.11478877067565918,
      "step": 53610
    },
    {
      "epoch": 0.00032720947265625,
      "step": 53610,
      "training_step_time": 0.4021475315093994
    },
    {
      "epoch": 0.000327215576171875,
      "model_forward_time": 0.11647462844848633,
      "step": 53611
    },
    {
      "epoch": 0.000327215576171875,
      "step": 53611,
      "training_step_time": 0.4167165756225586
    },
    {
      "epoch": 0.0003272216796875,
      "model_forward_time": 0.1146397590637207,
      "step": 53612
    },
    {
      "epoch": 0.0003272216796875,
      "step": 53612,
      "training_step_time": 0.486846923828125
    },
    {
      "epoch": 0.000327227783203125,
      "model_forward_time": 0.11533474922180176,
      "step": 53613
    },
    {
      "epoch": 0.000327227783203125,
      "step": 53613,
      "training_step_time": 0.47443151473999023
    },
    {
      "epoch": 0.00032723388671875,
      "model_forward_time": 0.11551403999328613,
      "step": 53614
    },
    {
      "epoch": 0.00032723388671875,
      "step": 53614,
      "training_step_time": 0.39156413078308105
    },
    {
      "epoch": 0.000327239990234375,
      "model_forward_time": 0.11534810066223145,
      "step": 53615
    },
    {
      "epoch": 0.000327239990234375,
      "step": 53615,
      "training_step_time": 0.463076114654541
    },
    {
      "epoch": 0.00032724609375,
      "model_forward_time": 0.11476635932922363,
      "step": 53616
    },
    {
      "epoch": 0.00032724609375,
      "step": 53616,
      "training_step_time": 0.4985847473144531
    },
    {
      "epoch": 0.000327252197265625,
      "model_forward_time": 0.11440777778625488,
      "step": 53617
    },
    {
      "epoch": 0.000327252197265625,
      "step": 53617,
      "training_step_time": 0.46408677101135254
    },
    {
      "epoch": 0.00032725830078125,
      "model_forward_time": 0.11492276191711426,
      "step": 53618
    },
    {
      "epoch": 0.00032725830078125,
      "step": 53618,
      "training_step_time": 0.4189188480377197
    },
    {
      "epoch": 0.000327264404296875,
      "model_forward_time": 0.11551666259765625,
      "step": 53619
    },
    {
      "epoch": 0.000327264404296875,
      "step": 53619,
      "training_step_time": 0.3831362724304199
    },
    {
      "epoch": 0.0003272705078125,
      "grad_norm": 0.07673704624176025,
      "learning_rate": 3.059509213511702e-06,
      "loss": 0.0355,
      "step": 53620
    },
    {
      "epoch": 0.0003272705078125,
      "model_forward_time": 0.11423087120056152,
      "step": 53620
    },
    {
      "epoch": 0.0003272705078125,
      "step": 53620,
      "training_step_time": 0.3851633071899414
    },
    {
      "epoch": 0.000327276611328125,
      "model_forward_time": 0.11497378349304199,
      "step": 53621
    },
    {
      "epoch": 0.000327276611328125,
      "step": 53621,
      "training_step_time": 0.39203739166259766
    },
    {
      "epoch": 0.00032728271484375,
      "model_forward_time": 0.1150059700012207,
      "step": 53622
    },
    {
      "epoch": 0.00032728271484375,
      "step": 53622,
      "training_step_time": 0.38721227645874023
    },
    {
      "epoch": 0.000327288818359375,
      "model_forward_time": 0.11506485939025879,
      "step": 53623
    },
    {
      "epoch": 0.000327288818359375,
      "step": 53623,
      "training_step_time": 0.38878703117370605
    },
    {
      "epoch": 0.000327294921875,
      "model_forward_time": 0.11517214775085449,
      "step": 53624
    },
    {
      "epoch": 0.000327294921875,
      "step": 53624,
      "training_step_time": 0.677161455154419
    },
    {
      "epoch": 0.000327301025390625,
      "model_forward_time": 0.1151125431060791,
      "step": 53625
    },
    {
      "epoch": 0.000327301025390625,
      "step": 53625,
      "training_step_time": 0.43863821029663086
    },
    {
      "epoch": 0.00032730712890625,
      "model_forward_time": 0.11405563354492188,
      "step": 53626
    },
    {
      "epoch": 0.00032730712890625,
      "step": 53626,
      "training_step_time": 0.43244099617004395
    },
    {
      "epoch": 0.000327313232421875,
      "model_forward_time": 0.11426377296447754,
      "step": 53627
    },
    {
      "epoch": 0.000327313232421875,
      "step": 53627,
      "training_step_time": 0.3917078971862793
    },
    {
      "epoch": 0.0003273193359375,
      "model_forward_time": 0.11486124992370605,
      "step": 53628
    },
    {
      "epoch": 0.0003273193359375,
      "step": 53628,
      "training_step_time": 0.3951115608215332
    },
    {
      "epoch": 0.000327325439453125,
      "model_forward_time": 0.11466646194458008,
      "step": 53629
    },
    {
      "epoch": 0.000327325439453125,
      "step": 53629,
      "training_step_time": 0.40046215057373047
    },
    {
      "epoch": 0.00032733154296875,
      "grad_norm": 0.0989251583814621,
      "learning_rate": 3.0500244359261355e-06,
      "loss": 0.0369,
      "step": 53630
    },
    {
      "epoch": 0.00032733154296875,
      "model_forward_time": 0.11488628387451172,
      "step": 53630
    },
    {
      "epoch": 0.00032733154296875,
      "step": 53630,
      "training_step_time": 0.5210309028625488
    },
    {
      "epoch": 0.000327337646484375,
      "model_forward_time": 0.1146535873413086,
      "step": 53631
    },
    {
      "epoch": 0.000327337646484375,
      "step": 53631,
      "training_step_time": 0.4821622371673584
    },
    {
      "epoch": 0.00032734375,
      "model_forward_time": 0.1148066520690918,
      "step": 53632
    },
    {
      "epoch": 0.00032734375,
      "step": 53632,
      "training_step_time": 0.41615843772888184
    },
    {
      "epoch": 0.000327349853515625,
      "model_forward_time": 0.11544537544250488,
      "step": 53633
    },
    {
      "epoch": 0.000327349853515625,
      "step": 53633,
      "training_step_time": 0.38982725143432617
    },
    {
      "epoch": 0.00032735595703125,
      "model_forward_time": 0.11490678787231445,
      "step": 53634
    },
    {
      "epoch": 0.00032735595703125,
      "step": 53634,
      "training_step_time": 0.38823699951171875
    },
    {
      "epoch": 0.000327362060546875,
      "model_forward_time": 0.11424422264099121,
      "step": 53635
    },
    {
      "epoch": 0.000327362060546875,
      "step": 53635,
      "training_step_time": 0.3859517574310303
    },
    {
      "epoch": 0.0003273681640625,
      "model_forward_time": 0.11516165733337402,
      "step": 53636
    },
    {
      "epoch": 0.0003273681640625,
      "step": 53636,
      "training_step_time": 0.7107088565826416
    },
    {
      "epoch": 0.000327374267578125,
      "model_forward_time": 0.11500191688537598,
      "step": 53637
    },
    {
      "epoch": 0.000327374267578125,
      "step": 53637,
      "training_step_time": 0.3890669345855713
    },
    {
      "epoch": 0.00032738037109375,
      "model_forward_time": 0.11435937881469727,
      "step": 53638
    },
    {
      "epoch": 0.00032738037109375,
      "step": 53638,
      "training_step_time": 0.447066068649292
    },
    {
      "epoch": 0.000327386474609375,
      "model_forward_time": 0.11437773704528809,
      "step": 53639
    },
    {
      "epoch": 0.000327386474609375,
      "step": 53639,
      "training_step_time": 0.43772101402282715
    },
    {
      "epoch": 0.000327392578125,
      "grad_norm": 0.09038541465997696,
      "learning_rate": 3.040553920503503e-06,
      "loss": 0.0357,
      "step": 53640
    },
    {
      "epoch": 0.000327392578125,
      "model_forward_time": 0.11463069915771484,
      "step": 53640
    },
    {
      "epoch": 0.000327392578125,
      "step": 53640,
      "training_step_time": 0.4314107894897461
    },
    {
      "epoch": 0.000327398681640625,
      "model_forward_time": 0.11433219909667969,
      "step": 53641
    },
    {
      "epoch": 0.000327398681640625,
      "step": 53641,
      "training_step_time": 0.39902400970458984
    },
    {
      "epoch": 0.00032740478515625,
      "model_forward_time": 0.1146397590637207,
      "step": 53642
    },
    {
      "epoch": 0.00032740478515625,
      "step": 53642,
      "training_step_time": 0.6383523941040039
    },
    {
      "epoch": 0.000327410888671875,
      "model_forward_time": 0.11417555809020996,
      "step": 53643
    },
    {
      "epoch": 0.000327410888671875,
      "step": 53643,
      "training_step_time": 0.5135254859924316
    },
    {
      "epoch": 0.0003274169921875,
      "model_forward_time": 0.11398196220397949,
      "step": 53644
    },
    {
      "epoch": 0.0003274169921875,
      "step": 53644,
      "training_step_time": 0.4315063953399658
    },
    {
      "epoch": 0.000327423095703125,
      "model_forward_time": 0.11456441879272461,
      "step": 53645
    },
    {
      "epoch": 0.000327423095703125,
      "step": 53645,
      "training_step_time": 0.47870612144470215
    },
    {
      "epoch": 0.00032742919921875,
      "model_forward_time": 0.11442852020263672,
      "step": 53646
    },
    {
      "epoch": 0.00032742919921875,
      "step": 53646,
      "training_step_time": 0.39542174339294434
    },
    {
      "epoch": 0.000327435302734375,
      "model_forward_time": 0.11481642723083496,
      "step": 53647
    },
    {
      "epoch": 0.000327435302734375,
      "step": 53647,
      "training_step_time": 0.3835868835449219
    },
    {
      "epoch": 0.00032744140625,
      "model_forward_time": 0.11431050300598145,
      "step": 53648
    },
    {
      "epoch": 0.00032744140625,
      "step": 53648,
      "training_step_time": 0.48768043518066406
    },
    {
      "epoch": 0.000327447509765625,
      "model_forward_time": 0.11544132232666016,
      "step": 53649
    },
    {
      "epoch": 0.000327447509765625,
      "step": 53649,
      "training_step_time": 0.3964376449584961
    },
    {
      "epoch": 0.00032745361328125,
      "grad_norm": 0.1533590704202652,
      "learning_rate": 3.031097670120703e-06,
      "loss": 0.04,
      "step": 53650
    },
    {
      "epoch": 0.00032745361328125,
      "model_forward_time": 0.1147768497467041,
      "step": 53650
    },
    {
      "epoch": 0.00032745361328125,
      "step": 53650,
      "training_step_time": 0.3949916362762451
    },
    {
      "epoch": 0.000327459716796875,
      "model_forward_time": 0.11530494689941406,
      "step": 53651
    },
    {
      "epoch": 0.000327459716796875,
      "step": 53651,
      "training_step_time": 0.4129602909088135
    },
    {
      "epoch": 0.0003274658203125,
      "model_forward_time": 0.11535191535949707,
      "step": 53652
    },
    {
      "epoch": 0.0003274658203125,
      "step": 53652,
      "training_step_time": 0.40555715560913086
    },
    {
      "epoch": 0.000327471923828125,
      "model_forward_time": 0.1152040958404541,
      "step": 53653
    },
    {
      "epoch": 0.000327471923828125,
      "step": 53653,
      "training_step_time": 0.4135422706604004
    },
    {
      "epoch": 0.00032747802734375,
      "model_forward_time": 0.11528635025024414,
      "step": 53654
    },
    {
      "epoch": 0.00032747802734375,
      "step": 53654,
      "training_step_time": 0.5983574390411377
    },
    {
      "epoch": 0.000327484130859375,
      "model_forward_time": 0.11522126197814941,
      "step": 53655
    },
    {
      "epoch": 0.000327484130859375,
      "step": 53655,
      "training_step_time": 0.395277738571167
    },
    {
      "epoch": 0.000327490234375,
      "model_forward_time": 0.11454391479492188,
      "step": 53656
    },
    {
      "epoch": 0.000327490234375,
      "step": 53656,
      "training_step_time": 0.3909947872161865
    },
    {
      "epoch": 0.000327496337890625,
      "model_forward_time": 0.11476755142211914,
      "step": 53657
    },
    {
      "epoch": 0.000327496337890625,
      "step": 53657,
      "training_step_time": 0.43688130378723145
    },
    {
      "epoch": 0.00032750244140625,
      "model_forward_time": 0.11472845077514648,
      "step": 53658
    },
    {
      "epoch": 0.00032750244140625,
      "step": 53658,
      "training_step_time": 0.4424264430999756
    },
    {
      "epoch": 0.000327508544921875,
      "model_forward_time": 0.11441373825073242,
      "step": 53659
    },
    {
      "epoch": 0.000327508544921875,
      "step": 53659,
      "training_step_time": 0.391063928604126
    },
    {
      "epoch": 0.0003275146484375,
      "grad_norm": 0.07746267318725586,
      "learning_rate": 3.021655687650282e-06,
      "loss": 0.0363,
      "step": 53660
    },
    {
      "epoch": 0.0003275146484375,
      "model_forward_time": 0.11492705345153809,
      "step": 53660
    },
    {
      "epoch": 0.0003275146484375,
      "step": 53660,
      "training_step_time": 0.4741029739379883
    },
    {
      "epoch": 0.000327520751953125,
      "model_forward_time": 0.11548185348510742,
      "step": 53661
    },
    {
      "epoch": 0.000327520751953125,
      "step": 53661,
      "training_step_time": 0.39081335067749023
    },
    {
      "epoch": 0.00032752685546875,
      "model_forward_time": 0.11507821083068848,
      "step": 53662
    },
    {
      "epoch": 0.00032752685546875,
      "step": 53662,
      "training_step_time": 0.39926815032958984
    },
    {
      "epoch": 0.000327532958984375,
      "model_forward_time": 0.11455464363098145,
      "step": 53663
    },
    {
      "epoch": 0.000327532958984375,
      "step": 53663,
      "training_step_time": 0.40601468086242676
    },
    {
      "epoch": 0.0003275390625,
      "model_forward_time": 0.11476874351501465,
      "step": 53664
    },
    {
      "epoch": 0.0003275390625,
      "step": 53664,
      "training_step_time": 0.4130706787109375
    },
    {
      "epoch": 0.000327545166015625,
      "model_forward_time": 0.11533713340759277,
      "step": 53665
    },
    {
      "epoch": 0.000327545166015625,
      "step": 53665,
      "training_step_time": 0.42267560958862305
    },
    {
      "epoch": 0.00032755126953125,
      "model_forward_time": 0.11493730545043945,
      "step": 53666
    },
    {
      "epoch": 0.00032755126953125,
      "step": 53666,
      "training_step_time": 0.47443175315856934
    },
    {
      "epoch": 0.000327557373046875,
      "model_forward_time": 0.1156618595123291,
      "step": 53667
    },
    {
      "epoch": 0.000327557373046875,
      "step": 53667,
      "training_step_time": 0.41359758377075195
    },
    {
      "epoch": 0.0003275634765625,
      "model_forward_time": 0.11456704139709473,
      "step": 53668
    },
    {
      "epoch": 0.0003275634765625,
      "step": 53668,
      "training_step_time": 0.4506356716156006
    },
    {
      "epoch": 0.000327569580078125,
      "model_forward_time": 0.11533331871032715,
      "step": 53669
    },
    {
      "epoch": 0.000327569580078125,
      "step": 53669,
      "training_step_time": 0.39966583251953125
    },
    {
      "epoch": 0.00032757568359375,
      "grad_norm": 0.0829734280705452,
      "learning_rate": 3.0122279759604745e-06,
      "loss": 0.0322,
      "step": 53670
    },
    {
      "epoch": 0.00032757568359375,
      "model_forward_time": 0.11454987525939941,
      "step": 53670
    },
    {
      "epoch": 0.00032757568359375,
      "step": 53670,
      "training_step_time": 0.41606569290161133
    },
    {
      "epoch": 0.000327581787109375,
      "model_forward_time": 0.11552071571350098,
      "step": 53671
    },
    {
      "epoch": 0.000327581787109375,
      "step": 53671,
      "training_step_time": 0.44977474212646484
    },
    {
      "epoch": 0.000327587890625,
      "model_forward_time": 0.11558389663696289,
      "step": 53672
    },
    {
      "epoch": 0.000327587890625,
      "step": 53672,
      "training_step_time": 0.4454820156097412
    },
    {
      "epoch": 0.000327593994140625,
      "model_forward_time": 0.11494302749633789,
      "step": 53673
    },
    {
      "epoch": 0.000327593994140625,
      "step": 53673,
      "training_step_time": 0.41573548316955566
    },
    {
      "epoch": 0.00032760009765625,
      "model_forward_time": 0.11517763137817383,
      "step": 53674
    },
    {
      "epoch": 0.00032760009765625,
      "step": 53674,
      "training_step_time": 0.4611546993255615
    },
    {
      "epoch": 0.000327606201171875,
      "model_forward_time": 0.11523032188415527,
      "step": 53675
    },
    {
      "epoch": 0.000327606201171875,
      "step": 53675,
      "training_step_time": 0.40042781829833984
    },
    {
      "epoch": 0.0003276123046875,
      "model_forward_time": 0.1154015064239502,
      "step": 53676
    },
    {
      "epoch": 0.0003276123046875,
      "step": 53676,
      "training_step_time": 0.3954324722290039
    },
    {
      "epoch": 0.000327618408203125,
      "model_forward_time": 0.11493182182312012,
      "step": 53677
    },
    {
      "epoch": 0.000327618408203125,
      "step": 53677,
      "training_step_time": 0.39404797554016113
    },
    {
      "epoch": 0.00032762451171875,
      "model_forward_time": 0.11518335342407227,
      "step": 53678
    },
    {
      "epoch": 0.00032762451171875,
      "step": 53678,
      "training_step_time": 0.4499335289001465
    },
    {
      "epoch": 0.000327630615234375,
      "model_forward_time": 0.11488795280456543,
      "step": 53679
    },
    {
      "epoch": 0.000327630615234375,
      "step": 53679,
      "training_step_time": 0.4015040397644043
    },
    {
      "epoch": 0.00032763671875,
      "grad_norm": 0.10405188053846359,
      "learning_rate": 3.0028145379151716e-06,
      "loss": 0.0355,
      "step": 53680
    },
    {
      "epoch": 0.00032763671875,
      "model_forward_time": 0.11508440971374512,
      "step": 53680
    },
    {
      "epoch": 0.00032763671875,
      "step": 53680,
      "training_step_time": 0.43401026725769043
    },
    {
      "epoch": 0.000327642822265625,
      "model_forward_time": 0.11484932899475098,
      "step": 53681
    },
    {
      "epoch": 0.000327642822265625,
      "step": 53681,
      "training_step_time": 0.3961751461029053
    },
    {
      "epoch": 0.00032764892578125,
      "model_forward_time": 0.11477422714233398,
      "step": 53682
    },
    {
      "epoch": 0.00032764892578125,
      "step": 53682,
      "training_step_time": 0.4800078868865967
    },
    {
      "epoch": 0.000327655029296875,
      "model_forward_time": 0.11475133895874023,
      "step": 53683
    },
    {
      "epoch": 0.000327655029296875,
      "step": 53683,
      "training_step_time": 0.38469696044921875
    },
    {
      "epoch": 0.0003276611328125,
      "model_forward_time": 0.11573314666748047,
      "step": 53684
    },
    {
      "epoch": 0.0003276611328125,
      "step": 53684,
      "training_step_time": 0.48281002044677734
    },
    {
      "epoch": 0.000327667236328125,
      "model_forward_time": 0.11601018905639648,
      "step": 53685
    },
    {
      "epoch": 0.000327667236328125,
      "step": 53685,
      "training_step_time": 0.39887118339538574
    },
    {
      "epoch": 0.00032767333984375,
      "model_forward_time": 0.11479449272155762,
      "step": 53686
    },
    {
      "epoch": 0.00032767333984375,
      "step": 53686,
      "training_step_time": 0.36664867401123047
    },
    {
      "epoch": 0.000327679443359375,
      "model_forward_time": 0.11465597152709961,
      "step": 53687
    },
    {
      "epoch": 0.000327679443359375,
      "step": 53687,
      "training_step_time": 0.4474985599517822
    },
    {
      "epoch": 0.000327685546875,
      "model_forward_time": 0.11521601676940918,
      "step": 53688
    },
    {
      "epoch": 0.000327685546875,
      "step": 53688,
      "training_step_time": 0.43964552879333496
    },
    {
      "epoch": 0.000327691650390625,
      "model_forward_time": 0.11493253707885742,
      "step": 53689
    },
    {
      "epoch": 0.000327691650390625,
      "step": 53689,
      "training_step_time": 0.3941664695739746
    },
    {
      "epoch": 0.00032769775390625,
      "grad_norm": 0.13405340909957886,
      "learning_rate": 2.9934153763739205e-06,
      "loss": 0.0375,
      "step": 53690
    },
    {
      "epoch": 0.00032769775390625,
      "model_forward_time": 0.11542487144470215,
      "step": 53690
    },
    {
      "epoch": 0.00032769775390625,
      "step": 53690,
      "training_step_time": 0.5498464107513428
    },
    {
      "epoch": 0.000327703857421875,
      "model_forward_time": 0.11455368995666504,
      "step": 53691
    },
    {
      "epoch": 0.000327703857421875,
      "step": 53691,
      "training_step_time": 0.4018833637237549
    },
    {
      "epoch": 0.0003277099609375,
      "model_forward_time": 0.11480879783630371,
      "step": 53692
    },
    {
      "epoch": 0.0003277099609375,
      "step": 53692,
      "training_step_time": 0.39377784729003906
    },
    {
      "epoch": 0.000327716064453125,
      "model_forward_time": 0.11461305618286133,
      "step": 53693
    },
    {
      "epoch": 0.000327716064453125,
      "step": 53693,
      "training_step_time": 0.38767218589782715
    },
    {
      "epoch": 0.00032772216796875,
      "model_forward_time": 0.11504292488098145,
      "step": 53694
    },
    {
      "epoch": 0.00032772216796875,
      "step": 53694,
      "training_step_time": 0.39756035804748535
    },
    {
      "epoch": 0.000327728271484375,
      "model_forward_time": 0.1145181655883789,
      "step": 53695
    },
    {
      "epoch": 0.000327728271484375,
      "step": 53695,
      "training_step_time": 0.4685204029083252
    },
    {
      "epoch": 0.000327734375,
      "model_forward_time": 0.11459040641784668,
      "step": 53696
    },
    {
      "epoch": 0.000327734375,
      "step": 53696,
      "training_step_time": 0.41585826873779297
    },
    {
      "epoch": 0.000327740478515625,
      "model_forward_time": 0.11508774757385254,
      "step": 53697
    },
    {
      "epoch": 0.000327740478515625,
      "step": 53697,
      "training_step_time": 0.4093208312988281
    },
    {
      "epoch": 0.00032774658203125,
      "model_forward_time": 0.11473441123962402,
      "step": 53698
    },
    {
      "epoch": 0.00032774658203125,
      "step": 53698,
      "training_step_time": 0.4519619941711426
    },
    {
      "epoch": 0.000327752685546875,
      "model_forward_time": 0.11472105979919434,
      "step": 53699
    },
    {
      "epoch": 0.000327752685546875,
      "step": 53699,
      "training_step_time": 0.42696356773376465
    },
    {
      "epoch": 0.0003277587890625,
      "grad_norm": 0.12964534759521484,
      "learning_rate": 2.9840304941919415e-06,
      "loss": 0.037,
      "step": 53700
    },
    {
      "epoch": 0.0003277587890625,
      "model_forward_time": 0.11461234092712402,
      "step": 53700
    },
    {
      "epoch": 0.0003277587890625,
      "step": 53700,
      "training_step_time": 0.39818525314331055
    },
    {
      "epoch": 0.000327764892578125,
      "model_forward_time": 0.11499309539794922,
      "step": 53701
    },
    {
      "epoch": 0.000327764892578125,
      "step": 53701,
      "training_step_time": 0.3894004821777344
    },
    {
      "epoch": 0.00032777099609375,
      "model_forward_time": 0.11462712287902832,
      "step": 53702
    },
    {
      "epoch": 0.00032777099609375,
      "step": 53702,
      "training_step_time": 0.43511104583740234
    },
    {
      "epoch": 0.000327777099609375,
      "model_forward_time": 0.11510300636291504,
      "step": 53703
    },
    {
      "epoch": 0.000327777099609375,
      "step": 53703,
      "training_step_time": 0.4201834201812744
    },
    {
      "epoch": 0.000327783203125,
      "model_forward_time": 0.11457562446594238,
      "step": 53704
    },
    {
      "epoch": 0.000327783203125,
      "step": 53704,
      "training_step_time": 0.4293630123138428
    },
    {
      "epoch": 0.000327789306640625,
      "model_forward_time": 0.11474347114562988,
      "step": 53705
    },
    {
      "epoch": 0.000327789306640625,
      "step": 53705,
      "training_step_time": 0.4150354862213135
    },
    {
      "epoch": 0.00032779541015625,
      "model_forward_time": 0.11496090888977051,
      "step": 53706
    },
    {
      "epoch": 0.00032779541015625,
      "step": 53706,
      "training_step_time": 0.39533162117004395
    },
    {
      "epoch": 0.000327801513671875,
      "model_forward_time": 0.11612391471862793,
      "step": 53707
    },
    {
      "epoch": 0.000327801513671875,
      "step": 53707,
      "training_step_time": 0.39561939239501953
    },
    {
      "epoch": 0.0003278076171875,
      "model_forward_time": 0.11519289016723633,
      "step": 53708
    },
    {
      "epoch": 0.0003278076171875,
      "step": 53708,
      "training_step_time": 0.3938312530517578
    },
    {
      "epoch": 0.000327813720703125,
      "model_forward_time": 0.11538147926330566,
      "step": 53709
    },
    {
      "epoch": 0.000327813720703125,
      "step": 53709,
      "training_step_time": 0.40154147148132324
    },
    {
      "epoch": 0.00032781982421875,
      "grad_norm": 0.09069132804870605,
      "learning_rate": 2.9746598942201054e-06,
      "loss": 0.0392,
      "step": 53710
    },
    {
      "epoch": 0.00032781982421875,
      "model_forward_time": 0.11518430709838867,
      "step": 53710
    },
    {
      "epoch": 0.00032781982421875,
      "step": 53710,
      "training_step_time": 0.41425585746765137
    },
    {
      "epoch": 0.000327825927734375,
      "model_forward_time": 0.1148836612701416,
      "step": 53711
    },
    {
      "epoch": 0.000327825927734375,
      "step": 53711,
      "training_step_time": 0.443742036819458
    },
    {
      "epoch": 0.00032783203125,
      "model_forward_time": 0.1160585880279541,
      "step": 53712
    },
    {
      "epoch": 0.00032783203125,
      "step": 53712,
      "training_step_time": 0.5132126808166504
    },
    {
      "epoch": 0.000327838134765625,
      "model_forward_time": 0.11509037017822266,
      "step": 53713
    },
    {
      "epoch": 0.000327838134765625,
      "step": 53713,
      "training_step_time": 0.44130659103393555
    },
    {
      "epoch": 0.00032784423828125,
      "model_forward_time": 0.11544227600097656,
      "step": 53714
    },
    {
      "epoch": 0.00032784423828125,
      "step": 53714,
      "training_step_time": 0.44177770614624023
    },
    {
      "epoch": 0.000327850341796875,
      "model_forward_time": 0.11461234092712402,
      "step": 53715
    },
    {
      "epoch": 0.000327850341796875,
      "step": 53715,
      "training_step_time": 0.3654139041900635
    },
    {
      "epoch": 0.0003278564453125,
      "model_forward_time": 0.11524438858032227,
      "step": 53716
    },
    {
      "epoch": 0.0003278564453125,
      "step": 53716,
      "training_step_time": 0.4278428554534912
    },
    {
      "epoch": 0.000327862548828125,
      "model_forward_time": 0.1148834228515625,
      "step": 53717
    },
    {
      "epoch": 0.000327862548828125,
      "step": 53717,
      "training_step_time": 0.37975001335144043
    },
    {
      "epoch": 0.00032786865234375,
      "model_forward_time": 0.11525487899780273,
      "step": 53718
    },
    {
      "epoch": 0.00032786865234375,
      "step": 53718,
      "training_step_time": 0.3799121379852295
    },
    {
      "epoch": 0.000327874755859375,
      "model_forward_time": 0.11486244201660156,
      "step": 53719
    },
    {
      "epoch": 0.000327874755859375,
      "step": 53719,
      "training_step_time": 0.41501450538635254
    },
    {
      "epoch": 0.000327880859375,
      "grad_norm": 0.07817196846008301,
      "learning_rate": 2.965303579304973e-06,
      "loss": 0.0402,
      "step": 53720
    },
    {
      "epoch": 0.000327880859375,
      "model_forward_time": 0.11480832099914551,
      "step": 53720
    },
    {
      "epoch": 0.000327880859375,
      "step": 53720,
      "training_step_time": 0.3881494998931885
    },
    {
      "epoch": 0.000327886962890625,
      "model_forward_time": 0.11578941345214844,
      "step": 53721
    },
    {
      "epoch": 0.000327886962890625,
      "step": 53721,
      "training_step_time": 0.3931772708892822
    },
    {
      "epoch": 0.00032789306640625,
      "model_forward_time": 0.11513423919677734,
      "step": 53722
    },
    {
      "epoch": 0.00032789306640625,
      "step": 53722,
      "training_step_time": 0.3962724208831787
    },
    {
      "epoch": 0.000327899169921875,
      "model_forward_time": 0.11513519287109375,
      "step": 53723
    },
    {
      "epoch": 0.000327899169921875,
      "step": 53723,
      "training_step_time": 0.39534711837768555
    },
    {
      "epoch": 0.0003279052734375,
      "model_forward_time": 0.11532831192016602,
      "step": 53724
    },
    {
      "epoch": 0.0003279052734375,
      "step": 53724,
      "training_step_time": 0.3828606605529785
    },
    {
      "epoch": 0.000327911376953125,
      "model_forward_time": 0.11505913734436035,
      "step": 53725
    },
    {
      "epoch": 0.000327911376953125,
      "step": 53725,
      "training_step_time": 0.4665865898132324
    },
    {
      "epoch": 0.00032791748046875,
      "model_forward_time": 0.11511015892028809,
      "step": 53726
    },
    {
      "epoch": 0.00032791748046875,
      "step": 53726,
      "training_step_time": 0.47450804710388184
    },
    {
      "epoch": 0.000327923583984375,
      "model_forward_time": 0.11458778381347656,
      "step": 53727
    },
    {
      "epoch": 0.000327923583984375,
      "step": 53727,
      "training_step_time": 0.4490644931793213
    },
    {
      "epoch": 0.0003279296875,
      "model_forward_time": 0.1148996353149414,
      "step": 53728
    },
    {
      "epoch": 0.0003279296875,
      "step": 53728,
      "training_step_time": 0.49595022201538086
    },
    {
      "epoch": 0.000327935791015625,
      "model_forward_time": 0.11454510688781738,
      "step": 53729
    },
    {
      "epoch": 0.000327935791015625,
      "step": 53729,
      "training_step_time": 0.36635565757751465
    },
    {
      "epoch": 0.00032794189453125,
      "grad_norm": 0.09680895507335663,
      "learning_rate": 2.9559615522887273e-06,
      "loss": 0.0356,
      "step": 53730
    },
    {
      "epoch": 0.00032794189453125,
      "model_forward_time": 0.11511445045471191,
      "step": 53730
    },
    {
      "epoch": 0.00032794189453125,
      "step": 53730,
      "training_step_time": 0.41563844680786133
    },
    {
      "epoch": 0.000327947998046875,
      "model_forward_time": 0.11630892753601074,
      "step": 53731
    },
    {
      "epoch": 0.000327947998046875,
      "step": 53731,
      "training_step_time": 0.4257686138153076
    },
    {
      "epoch": 0.0003279541015625,
      "model_forward_time": 0.11516690254211426,
      "step": 53732
    },
    {
      "epoch": 0.0003279541015625,
      "step": 53732,
      "training_step_time": 0.388277530670166
    },
    {
      "epoch": 0.000327960205078125,
      "model_forward_time": 0.11496758460998535,
      "step": 53733
    },
    {
      "epoch": 0.000327960205078125,
      "step": 53733,
      "training_step_time": 0.3750028610229492
    },
    {
      "epoch": 0.00032796630859375,
      "model_forward_time": 0.11557960510253906,
      "step": 53734
    },
    {
      "epoch": 0.00032796630859375,
      "step": 53734,
      "training_step_time": 0.37519097328186035
    },
    {
      "epoch": 0.000327972412109375,
      "model_forward_time": 0.1153874397277832,
      "step": 53735
    },
    {
      "epoch": 0.000327972412109375,
      "step": 53735,
      "training_step_time": 0.400465726852417
    },
    {
      "epoch": 0.000327978515625,
      "model_forward_time": 0.11452078819274902,
      "step": 53736
    },
    {
      "epoch": 0.000327978515625,
      "step": 53736,
      "training_step_time": 0.5995244979858398
    },
    {
      "epoch": 0.000327984619140625,
      "model_forward_time": 0.1147611141204834,
      "step": 53737
    },
    {
      "epoch": 0.000327984619140625,
      "step": 53737,
      "training_step_time": 0.39078783988952637
    },
    {
      "epoch": 0.00032799072265625,
      "model_forward_time": 0.11504483222961426,
      "step": 53738
    },
    {
      "epoch": 0.00032799072265625,
      "step": 53738,
      "training_step_time": 0.37911200523376465
    },
    {
      "epoch": 0.000327996826171875,
      "model_forward_time": 0.1144869327545166,
      "step": 53739
    },
    {
      "epoch": 0.000327996826171875,
      "step": 53739,
      "training_step_time": 0.4072716236114502
    },
    {
      "epoch": 0.0003280029296875,
      "grad_norm": 0.09656180441379547,
      "learning_rate": 2.946633816009242e-06,
      "loss": 0.0362,
      "step": 53740
    },
    {
      "epoch": 0.0003280029296875,
      "model_forward_time": 0.11515951156616211,
      "step": 53740
    },
    {
      "epoch": 0.0003280029296875,
      "step": 53740,
      "training_step_time": 0.40826869010925293
    },
    {
      "epoch": 0.000328009033203125,
      "model_forward_time": 0.11496686935424805,
      "step": 53741
    },
    {
      "epoch": 0.000328009033203125,
      "step": 53741,
      "training_step_time": 0.44370484352111816
    },
    {
      "epoch": 0.00032801513671875,
      "model_forward_time": 0.11496496200561523,
      "step": 53742
    },
    {
      "epoch": 0.00032801513671875,
      "step": 53742,
      "training_step_time": 0.5980513095855713
    },
    {
      "epoch": 0.000328021240234375,
      "model_forward_time": 0.11453509330749512,
      "step": 53743
    },
    {
      "epoch": 0.000328021240234375,
      "step": 53743,
      "training_step_time": 0.41306447982788086
    },
    {
      "epoch": 0.00032802734375,
      "model_forward_time": 0.11497902870178223,
      "step": 53744
    },
    {
      "epoch": 0.00032802734375,
      "step": 53744,
      "training_step_time": 0.46966004371643066
    },
    {
      "epoch": 0.000328033447265625,
      "model_forward_time": 0.1144721508026123,
      "step": 53745
    },
    {
      "epoch": 0.000328033447265625,
      "step": 53745,
      "training_step_time": 0.4095280170440674
    },
    {
      "epoch": 0.00032803955078125,
      "model_forward_time": 0.11410164833068848,
      "step": 53746
    },
    {
      "epoch": 0.00032803955078125,
      "step": 53746,
      "training_step_time": 0.38465023040771484
    },
    {
      "epoch": 0.000328045654296875,
      "model_forward_time": 0.11583113670349121,
      "step": 53747
    },
    {
      "epoch": 0.000328045654296875,
      "step": 53747,
      "training_step_time": 0.3831040859222412
    },
    {
      "epoch": 0.0003280517578125,
      "model_forward_time": 0.11527419090270996,
      "step": 53748
    },
    {
      "epoch": 0.0003280517578125,
      "step": 53748,
      "training_step_time": 0.6275308132171631
    },
    {
      "epoch": 0.000328057861328125,
      "model_forward_time": 0.11469721794128418,
      "step": 53749
    },
    {
      "epoch": 0.000328057861328125,
      "step": 53749,
      "training_step_time": 0.39374208450317383
    },
    {
      "epoch": 0.00032806396484375,
      "grad_norm": 0.07744479924440384,
      "learning_rate": 2.9373203733000232e-06,
      "loss": 0.0307,
      "step": 53750
    },
    {
      "epoch": 0.00032806396484375,
      "model_forward_time": 0.11414456367492676,
      "step": 53750
    },
    {
      "epoch": 0.00032806396484375,
      "step": 53750,
      "training_step_time": 0.38606834411621094
    },
    {
      "epoch": 0.000328070068359375,
      "model_forward_time": 0.11486625671386719,
      "step": 53751
    },
    {
      "epoch": 0.000328070068359375,
      "step": 53751,
      "training_step_time": 0.3881962299346924
    },
    {
      "epoch": 0.000328076171875,
      "model_forward_time": 0.1150977611541748,
      "step": 53752
    },
    {
      "epoch": 0.000328076171875,
      "step": 53752,
      "training_step_time": 0.38315868377685547
    },
    {
      "epoch": 0.000328082275390625,
      "model_forward_time": 0.11469364166259766,
      "step": 53753
    },
    {
      "epoch": 0.000328082275390625,
      "step": 53753,
      "training_step_time": 0.4321260452270508
    },
    {
      "epoch": 0.00032808837890625,
      "model_forward_time": 0.11540532112121582,
      "step": 53754
    },
    {
      "epoch": 0.00032808837890625,
      "step": 53754,
      "training_step_time": 0.5786018371582031
    },
    {
      "epoch": 0.000328094482421875,
      "model_forward_time": 0.11465334892272949,
      "step": 53755
    },
    {
      "epoch": 0.000328094482421875,
      "step": 53755,
      "training_step_time": 0.42604780197143555
    },
    {
      "epoch": 0.0003281005859375,
      "model_forward_time": 0.11506319046020508,
      "step": 53756
    },
    {
      "epoch": 0.0003281005859375,
      "step": 53756,
      "training_step_time": 0.49654507637023926
    },
    {
      "epoch": 0.000328106689453125,
      "model_forward_time": 0.11471676826477051,
      "step": 53757
    },
    {
      "epoch": 0.000328106689453125,
      "step": 53757,
      "training_step_time": 0.38806891441345215
    },
    {
      "epoch": 0.00032811279296875,
      "model_forward_time": 0.11481523513793945,
      "step": 53758
    },
    {
      "epoch": 0.00032811279296875,
      "step": 53758,
      "training_step_time": 0.4034910202026367
    },
    {
      "epoch": 0.000328118896484375,
      "model_forward_time": 0.1143026351928711,
      "step": 53759
    },
    {
      "epoch": 0.000328118896484375,
      "step": 53759,
      "training_step_time": 0.4301316738128662
    },
    {
      "epoch": 0.000328125,
      "grad_norm": 0.11631546914577484,
      "learning_rate": 2.928021226990263e-06,
      "loss": 0.0403,
      "step": 53760
    },
    {
      "epoch": 0.000328125,
      "model_forward_time": 0.11438465118408203,
      "step": 53760
    },
    {
      "epoch": 0.000328125,
      "step": 53760,
      "training_step_time": 0.45850610733032227
    },
    {
      "epoch": 0.000328131103515625,
      "model_forward_time": 0.11461520195007324,
      "step": 53761
    },
    {
      "epoch": 0.000328131103515625,
      "step": 53761,
      "training_step_time": 0.38840174674987793
    },
    {
      "epoch": 0.00032813720703125,
      "model_forward_time": 0.11492085456848145,
      "step": 53762
    },
    {
      "epoch": 0.00032813720703125,
      "step": 53762,
      "training_step_time": 0.39575958251953125
    },
    {
      "epoch": 0.000328143310546875,
      "model_forward_time": 0.1150362491607666,
      "step": 53763
    },
    {
      "epoch": 0.000328143310546875,
      "step": 53763,
      "training_step_time": 0.3974447250366211
    },
    {
      "epoch": 0.0003281494140625,
      "model_forward_time": 0.11485910415649414,
      "step": 53764
    },
    {
      "epoch": 0.0003281494140625,
      "step": 53764,
      "training_step_time": 0.3894345760345459
    },
    {
      "epoch": 0.000328155517578125,
      "model_forward_time": 0.11663508415222168,
      "step": 53765
    },
    {
      "epoch": 0.000328155517578125,
      "step": 53765,
      "training_step_time": 0.39285802841186523
    },
    {
      "epoch": 0.00032816162109375,
      "model_forward_time": 0.11517333984375,
      "step": 53766
    },
    {
      "epoch": 0.00032816162109375,
      "step": 53766,
      "training_step_time": 0.850853681564331
    },
    {
      "epoch": 0.000328167724609375,
      "model_forward_time": 0.11442756652832031,
      "step": 53767
    },
    {
      "epoch": 0.000328167724609375,
      "step": 53767,
      "training_step_time": 0.40985679626464844
    },
    {
      "epoch": 0.000328173828125,
      "model_forward_time": 0.11428236961364746,
      "step": 53768
    },
    {
      "epoch": 0.000328173828125,
      "step": 53768,
      "training_step_time": 0.45636582374572754
    },
    {
      "epoch": 0.000328179931640625,
      "model_forward_time": 0.11441636085510254,
      "step": 53769
    },
    {
      "epoch": 0.000328179931640625,
      "step": 53769,
      "training_step_time": 0.4610781669616699
    },
    {
      "epoch": 0.00032818603515625,
      "grad_norm": 0.08885249495506287,
      "learning_rate": 2.9187363799047905e-06,
      "loss": 0.0369,
      "step": 53770
    },
    {
      "epoch": 0.00032818603515625,
      "model_forward_time": 0.11393189430236816,
      "step": 53770
    },
    {
      "epoch": 0.00032818603515625,
      "step": 53770,
      "training_step_time": 0.4020063877105713
    },
    {
      "epoch": 0.000328192138671875,
      "model_forward_time": 0.11475396156311035,
      "step": 53771
    },
    {
      "epoch": 0.000328192138671875,
      "step": 53771,
      "training_step_time": 0.4138021469116211
    },
    {
      "epoch": 0.0003281982421875,
      "model_forward_time": 0.11481380462646484,
      "step": 53772
    },
    {
      "epoch": 0.0003281982421875,
      "step": 53772,
      "training_step_time": 0.5424103736877441
    },
    {
      "epoch": 0.000328204345703125,
      "model_forward_time": 0.11595416069030762,
      "step": 53773
    },
    {
      "epoch": 0.000328204345703125,
      "step": 53773,
      "training_step_time": 0.40029358863830566
    },
    {
      "epoch": 0.00032821044921875,
      "model_forward_time": 0.11455345153808594,
      "step": 53774
    },
    {
      "epoch": 0.00032821044921875,
      "step": 53774,
      "training_step_time": 0.38881850242614746
    },
    {
      "epoch": 0.000328216552734375,
      "model_forward_time": 0.11481499671936035,
      "step": 53775
    },
    {
      "epoch": 0.000328216552734375,
      "step": 53775,
      "training_step_time": 0.38539814949035645
    },
    {
      "epoch": 0.00032822265625,
      "model_forward_time": 0.11530756950378418,
      "step": 53776
    },
    {
      "epoch": 0.00032822265625,
      "step": 53776,
      "training_step_time": 0.3962852954864502
    },
    {
      "epoch": 0.000328228759765625,
      "model_forward_time": 0.11478686332702637,
      "step": 53777
    },
    {
      "epoch": 0.000328228759765625,
      "step": 53777,
      "training_step_time": 0.39271092414855957
    },
    {
      "epoch": 0.00032823486328125,
      "model_forward_time": 0.11472177505493164,
      "step": 53778
    },
    {
      "epoch": 0.00032823486328125,
      "step": 53778,
      "training_step_time": 0.7196230888366699
    },
    {
      "epoch": 0.000328240966796875,
      "model_forward_time": 0.11472535133361816,
      "step": 53779
    },
    {
      "epoch": 0.000328240966796875,
      "step": 53779,
      "training_step_time": 0.388883113861084
    },
    {
      "epoch": 0.0003282470703125,
      "grad_norm": 0.07706218957901001,
      "learning_rate": 2.9094658348640945e-06,
      "loss": 0.0343,
      "step": 53780
    },
    {
      "epoch": 0.0003282470703125,
      "model_forward_time": 0.11406135559082031,
      "step": 53780
    },
    {
      "epoch": 0.0003282470703125,
      "step": 53780,
      "training_step_time": 0.38428354263305664
    },
    {
      "epoch": 0.000328253173828125,
      "model_forward_time": 0.11506080627441406,
      "step": 53781
    },
    {
      "epoch": 0.000328253173828125,
      "step": 53781,
      "training_step_time": 0.3786044120788574
    },
    {
      "epoch": 0.00032825927734375,
      "model_forward_time": 0.11439704895019531,
      "step": 53782
    },
    {
      "epoch": 0.00032825927734375,
      "step": 53782,
      "training_step_time": 0.49598121643066406
    },
    {
      "epoch": 0.000328265380859375,
      "model_forward_time": 0.11437201499938965,
      "step": 53783
    },
    {
      "epoch": 0.000328265380859375,
      "step": 53783,
      "training_step_time": 0.4688689708709717
    },
    {
      "epoch": 0.000328271484375,
      "model_forward_time": 0.11548852920532227,
      "step": 53784
    },
    {
      "epoch": 0.000328271484375,
      "step": 53784,
      "training_step_time": 0.6508517265319824
    },
    {
      "epoch": 0.000328277587890625,
      "model_forward_time": 0.11435818672180176,
      "step": 53785
    },
    {
      "epoch": 0.000328277587890625,
      "step": 53785,
      "training_step_time": 0.42865419387817383
    },
    {
      "epoch": 0.00032828369140625,
      "model_forward_time": 0.11484074592590332,
      "step": 53786
    },
    {
      "epoch": 0.00032828369140625,
      "step": 53786,
      "training_step_time": 0.39704108238220215
    },
    {
      "epoch": 0.000328289794921875,
      "model_forward_time": 0.11445093154907227,
      "step": 53787
    },
    {
      "epoch": 0.000328289794921875,
      "step": 53787,
      "training_step_time": 0.389514684677124
    },
    {
      "epoch": 0.0003282958984375,
      "model_forward_time": 0.11440753936767578,
      "step": 53788
    },
    {
      "epoch": 0.0003282958984375,
      "step": 53788,
      "training_step_time": 0.38735270500183105
    },
    {
      "epoch": 0.000328302001953125,
      "model_forward_time": 0.11464619636535645,
      "step": 53789
    },
    {
      "epoch": 0.000328302001953125,
      "step": 53789,
      "training_step_time": 0.38997435569763184
    },
    {
      "epoch": 0.00032830810546875,
      "grad_norm": 0.09385591745376587,
      "learning_rate": 2.9002095946843277e-06,
      "loss": 0.0335,
      "step": 53790
    },
    {
      "epoch": 0.00032830810546875,
      "model_forward_time": 0.11509966850280762,
      "step": 53790
    },
    {
      "epoch": 0.00032830810546875,
      "step": 53790,
      "training_step_time": 0.6005773544311523
    },
    {
      "epoch": 0.000328314208984375,
      "model_forward_time": 0.11484122276306152,
      "step": 53791
    },
    {
      "epoch": 0.000328314208984375,
      "step": 53791,
      "training_step_time": 0.4367361068725586
    },
    {
      "epoch": 0.0003283203125,
      "model_forward_time": 0.11484718322753906,
      "step": 53792
    },
    {
      "epoch": 0.0003283203125,
      "step": 53792,
      "training_step_time": 0.3973255157470703
    },
    {
      "epoch": 0.000328326416015625,
      "model_forward_time": 0.11503458023071289,
      "step": 53793
    },
    {
      "epoch": 0.000328326416015625,
      "step": 53793,
      "training_step_time": 0.39179301261901855
    },
    {
      "epoch": 0.00032833251953125,
      "model_forward_time": 0.11516642570495605,
      "step": 53794
    },
    {
      "epoch": 0.00032833251953125,
      "step": 53794,
      "training_step_time": 0.38730788230895996
    },
    {
      "epoch": 0.000328338623046875,
      "model_forward_time": 0.11455774307250977,
      "step": 53795
    },
    {
      "epoch": 0.000328338623046875,
      "step": 53795,
      "training_step_time": 0.39641714096069336
    },
    {
      "epoch": 0.0003283447265625,
      "model_forward_time": 0.1147463321685791,
      "step": 53796
    },
    {
      "epoch": 0.0003283447265625,
      "step": 53796,
      "training_step_time": 0.779221773147583
    },
    {
      "epoch": 0.000328350830078125,
      "model_forward_time": 0.11449861526489258,
      "step": 53797
    },
    {
      "epoch": 0.000328350830078125,
      "step": 53797,
      "training_step_time": 0.41168785095214844
    },
    {
      "epoch": 0.00032835693359375,
      "model_forward_time": 0.11482620239257812,
      "step": 53798
    },
    {
      "epoch": 0.00032835693359375,
      "step": 53798,
      "training_step_time": 0.42961716651916504
    },
    {
      "epoch": 0.000328363037109375,
      "model_forward_time": 0.11502909660339355,
      "step": 53799
    },
    {
      "epoch": 0.000328363037109375,
      "step": 53799,
      "training_step_time": 0.4860086441040039
    },
    {
      "epoch": 0.000328369140625,
      "grad_norm": 0.08744032680988312,
      "learning_rate": 2.890967662177285e-06,
      "loss": 0.0391,
      "step": 53800
    },
    {
      "epoch": 0.000328369140625,
      "model_forward_time": 0.11478495597839355,
      "step": 53800
    },
    {
      "epoch": 0.000328369140625,
      "step": 53800,
      "training_step_time": 0.39771389961242676
    },
    {
      "epoch": 0.000328375244140625,
      "model_forward_time": 0.11431598663330078,
      "step": 53801
    },
    {
      "epoch": 0.000328375244140625,
      "step": 53801,
      "training_step_time": 0.38464879989624023
    },
    {
      "epoch": 0.00032838134765625,
      "model_forward_time": 0.11495351791381836,
      "step": 53802
    },
    {
      "epoch": 0.00032838134765625,
      "step": 53802,
      "training_step_time": 0.5111565589904785
    },
    {
      "epoch": 0.000328387451171875,
      "model_forward_time": 0.11491608619689941,
      "step": 53803
    },
    {
      "epoch": 0.000328387451171875,
      "step": 53803,
      "training_step_time": 0.38300442695617676
    },
    {
      "epoch": 0.0003283935546875,
      "model_forward_time": 0.11454439163208008,
      "step": 53804
    },
    {
      "epoch": 0.0003283935546875,
      "step": 53804,
      "training_step_time": 0.38895082473754883
    },
    {
      "epoch": 0.000328399658203125,
      "model_forward_time": 0.1151113510131836,
      "step": 53805
    },
    {
      "epoch": 0.000328399658203125,
      "step": 53805,
      "training_step_time": 0.39703869819641113
    },
    {
      "epoch": 0.00032840576171875,
      "model_forward_time": 0.11537289619445801,
      "step": 53806
    },
    {
      "epoch": 0.00032840576171875,
      "step": 53806,
      "training_step_time": 0.38793325424194336
    },
    {
      "epoch": 0.000328411865234375,
      "model_forward_time": 0.11502575874328613,
      "step": 53807
    },
    {
      "epoch": 0.000328411865234375,
      "step": 53807,
      "training_step_time": 0.39559054374694824
    },
    {
      "epoch": 0.00032841796875,
      "model_forward_time": 0.11459159851074219,
      "step": 53808
    },
    {
      "epoch": 0.00032841796875,
      "step": 53808,
      "training_step_time": 0.7322640419006348
    },
    {
      "epoch": 0.000328424072265625,
      "model_forward_time": 0.11455607414245605,
      "step": 53809
    },
    {
      "epoch": 0.000328424072265625,
      "step": 53809,
      "training_step_time": 0.44606828689575195
    },
    {
      "epoch": 0.00032843017578125,
      "grad_norm": 0.08393354713916779,
      "learning_rate": 2.881740040150416e-06,
      "loss": 0.038,
      "step": 53810
    },
    {
      "epoch": 0.00032843017578125,
      "model_forward_time": 0.11462235450744629,
      "step": 53810
    },
    {
      "epoch": 0.00032843017578125,
      "step": 53810,
      "training_step_time": 0.47516322135925293
    },
    {
      "epoch": 0.000328436279296875,
      "model_forward_time": 0.11468219757080078,
      "step": 53811
    },
    {
      "epoch": 0.000328436279296875,
      "step": 53811,
      "training_step_time": 0.4478647708892822
    },
    {
      "epoch": 0.0003284423828125,
      "model_forward_time": 0.1147770881652832,
      "step": 53812
    },
    {
      "epoch": 0.0003284423828125,
      "step": 53812,
      "training_step_time": 0.49952077865600586
    },
    {
      "epoch": 0.000328448486328125,
      "model_forward_time": 0.11422371864318848,
      "step": 53813
    },
    {
      "epoch": 0.000328448486328125,
      "step": 53813,
      "training_step_time": 0.482926607131958
    },
    {
      "epoch": 0.00032845458984375,
      "model_forward_time": 0.11461043357849121,
      "step": 53814
    },
    {
      "epoch": 0.00032845458984375,
      "step": 53814,
      "training_step_time": 0.40569186210632324
    },
    {
      "epoch": 0.000328460693359375,
      "model_forward_time": 0.11472058296203613,
      "step": 53815
    },
    {
      "epoch": 0.000328460693359375,
      "step": 53815,
      "training_step_time": 0.3906378746032715
    },
    {
      "epoch": 0.000328466796875,
      "model_forward_time": 0.11549687385559082,
      "step": 53816
    },
    {
      "epoch": 0.000328466796875,
      "step": 53816,
      "training_step_time": 0.39896345138549805
    },
    {
      "epoch": 0.000328472900390625,
      "model_forward_time": 0.11484527587890625,
      "step": 53817
    },
    {
      "epoch": 0.000328472900390625,
      "step": 53817,
      "training_step_time": 0.4018118381500244
    },
    {
      "epoch": 0.00032847900390625,
      "model_forward_time": 0.11599087715148926,
      "step": 53818
    },
    {
      "epoch": 0.00032847900390625,
      "step": 53818,
      "training_step_time": 0.41179895401000977
    },
    {
      "epoch": 0.000328485107421875,
      "model_forward_time": 0.11463022232055664,
      "step": 53819
    },
    {
      "epoch": 0.000328485107421875,
      "step": 53819,
      "training_step_time": 0.4041297435760498
    },
    {
      "epoch": 0.0003284912109375,
      "grad_norm": 0.0850774496793747,
      "learning_rate": 2.8725267314068495e-06,
      "loss": 0.0333,
      "step": 53820
    },
    {
      "epoch": 0.0003284912109375,
      "model_forward_time": 0.11519193649291992,
      "step": 53820
    },
    {
      "epoch": 0.0003284912109375,
      "step": 53820,
      "training_step_time": 0.5478866100311279
    },
    {
      "epoch": 0.000328497314453125,
      "model_forward_time": 0.1145024299621582,
      "step": 53821
    },
    {
      "epoch": 0.000328497314453125,
      "step": 53821,
      "training_step_time": 0.3933570384979248
    },
    {
      "epoch": 0.00032850341796875,
      "model_forward_time": 0.11426234245300293,
      "step": 53822
    },
    {
      "epoch": 0.00032850341796875,
      "step": 53822,
      "training_step_time": 0.3955042362213135
    },
    {
      "epoch": 0.000328509521484375,
      "model_forward_time": 0.11525797843933105,
      "step": 53823
    },
    {
      "epoch": 0.000328509521484375,
      "step": 53823,
      "training_step_time": 0.4359300136566162
    },
    {
      "epoch": 0.000328515625,
      "model_forward_time": 0.1148366928100586,
      "step": 53824
    },
    {
      "epoch": 0.000328515625,
      "step": 53824,
      "training_step_time": 0.4993557929992676
    },
    {
      "epoch": 0.000328521728515625,
      "model_forward_time": 0.11422038078308105,
      "step": 53825
    },
    {
      "epoch": 0.000328521728515625,
      "step": 53825,
      "training_step_time": 0.40039849281311035
    },
    {
      "epoch": 0.00032852783203125,
      "model_forward_time": 0.11496615409851074,
      "step": 53826
    },
    {
      "epoch": 0.00032852783203125,
      "step": 53826,
      "training_step_time": 0.580435037612915
    },
    {
      "epoch": 0.000328533935546875,
      "model_forward_time": 0.11445951461791992,
      "step": 53827
    },
    {
      "epoch": 0.000328533935546875,
      "step": 53827,
      "training_step_time": 0.4384608268737793
    },
    {
      "epoch": 0.0003285400390625,
      "model_forward_time": 0.1140902042388916,
      "step": 53828
    },
    {
      "epoch": 0.0003285400390625,
      "step": 53828,
      "training_step_time": 0.3871033191680908
    },
    {
      "epoch": 0.000328546142578125,
      "model_forward_time": 0.11515307426452637,
      "step": 53829
    },
    {
      "epoch": 0.000328546142578125,
      "step": 53829,
      "training_step_time": 0.38629579544067383
    },
    {
      "epoch": 0.00032855224609375,
      "grad_norm": 0.07513968646526337,
      "learning_rate": 2.8633277387453308e-06,
      "loss": 0.034,
      "step": 53830
    },
    {
      "epoch": 0.00032855224609375,
      "model_forward_time": 0.11430096626281738,
      "step": 53830
    },
    {
      "epoch": 0.00032855224609375,
      "step": 53830,
      "training_step_time": 0.38855409622192383
    },
    {
      "epoch": 0.000328558349609375,
      "model_forward_time": 0.11477875709533691,
      "step": 53831
    },
    {
      "epoch": 0.000328558349609375,
      "step": 53831,
      "training_step_time": 0.38101935386657715
    },
    {
      "epoch": 0.000328564453125,
      "model_forward_time": 0.11541485786437988,
      "step": 53832
    },
    {
      "epoch": 0.000328564453125,
      "step": 53832,
      "training_step_time": 0.6697070598602295
    },
    {
      "epoch": 0.000328570556640625,
      "model_forward_time": 0.11516571044921875,
      "step": 53833
    },
    {
      "epoch": 0.000328570556640625,
      "step": 53833,
      "training_step_time": 0.38996315002441406
    },
    {
      "epoch": 0.00032857666015625,
      "model_forward_time": 0.11521458625793457,
      "step": 53834
    },
    {
      "epoch": 0.00032857666015625,
      "step": 53834,
      "training_step_time": 0.38567042350769043
    },
    {
      "epoch": 0.000328582763671875,
      "model_forward_time": 0.11464095115661621,
      "step": 53835
    },
    {
      "epoch": 0.000328582763671875,
      "step": 53835,
      "training_step_time": 0.3892946243286133
    },
    {
      "epoch": 0.0003285888671875,
      "model_forward_time": 0.11508607864379883,
      "step": 53836
    },
    {
      "epoch": 0.0003285888671875,
      "step": 53836,
      "training_step_time": 0.4107851982116699
    },
    {
      "epoch": 0.000328594970703125,
      "model_forward_time": 0.11407017707824707,
      "step": 53837
    },
    {
      "epoch": 0.000328594970703125,
      "step": 53837,
      "training_step_time": 0.45860838890075684
    },
    {
      "epoch": 0.00032860107421875,
      "model_forward_time": 0.11550140380859375,
      "step": 53838
    },
    {
      "epoch": 0.00032860107421875,
      "step": 53838,
      "training_step_time": 0.6811666488647461
    },
    {
      "epoch": 0.000328607177734375,
      "model_forward_time": 0.1148836612701416,
      "step": 53839
    },
    {
      "epoch": 0.000328607177734375,
      "step": 53839,
      "training_step_time": 0.40529322624206543
    },
    {
      "epoch": 0.00032861328125,
      "grad_norm": 0.10956671833992004,
      "learning_rate": 2.854143064960274e-06,
      "loss": 0.0365,
      "step": 53840
    },
    {
      "epoch": 0.00032861328125,
      "model_forward_time": 0.11441683769226074,
      "step": 53840
    },
    {
      "epoch": 0.00032861328125,
      "step": 53840,
      "training_step_time": 0.4568216800689697
    },
    {
      "epoch": 0.000328619384765625,
      "model_forward_time": 0.11426544189453125,
      "step": 53841
    },
    {
      "epoch": 0.000328619384765625,
      "step": 53841,
      "training_step_time": 0.48704028129577637
    },
    {
      "epoch": 0.00032862548828125,
      "model_forward_time": 0.11477065086364746,
      "step": 53842
    },
    {
      "epoch": 0.00032862548828125,
      "step": 53842,
      "training_step_time": 0.3826625347137451
    },
    {
      "epoch": 0.000328631591796875,
      "model_forward_time": 0.11422991752624512,
      "step": 53843
    },
    {
      "epoch": 0.000328631591796875,
      "step": 53843,
      "training_step_time": 0.38253068923950195
    },
    {
      "epoch": 0.0003286376953125,
      "model_forward_time": 0.11496353149414062,
      "step": 53844
    },
    {
      "epoch": 0.0003286376953125,
      "step": 53844,
      "training_step_time": 0.5572006702423096
    },
    {
      "epoch": 0.000328643798828125,
      "model_forward_time": 0.11453437805175781,
      "step": 53845
    },
    {
      "epoch": 0.000328643798828125,
      "step": 53845,
      "training_step_time": 0.3966553211212158
    },
    {
      "epoch": 0.00032864990234375,
      "model_forward_time": 0.11415266990661621,
      "step": 53846
    },
    {
      "epoch": 0.00032864990234375,
      "step": 53846,
      "training_step_time": 0.3928706645965576
    },
    {
      "epoch": 0.000328656005859375,
      "model_forward_time": 0.11478614807128906,
      "step": 53847
    },
    {
      "epoch": 0.000328656005859375,
      "step": 53847,
      "training_step_time": 0.39774012565612793
    },
    {
      "epoch": 0.000328662109375,
      "model_forward_time": 0.1149909496307373,
      "step": 53848
    },
    {
      "epoch": 0.000328662109375,
      "step": 53848,
      "training_step_time": 0.4013357162475586
    },
    {
      "epoch": 0.000328668212890625,
      "model_forward_time": 0.11528897285461426,
      "step": 53849
    },
    {
      "epoch": 0.000328668212890625,
      "step": 53849,
      "training_step_time": 0.398435115814209
    },
    {
      "epoch": 0.00032867431640625,
      "grad_norm": 0.11353171616792679,
      "learning_rate": 2.844972712841737e-06,
      "loss": 0.0336,
      "step": 53850
    },
    {
      "epoch": 0.00032867431640625,
      "model_forward_time": 0.11510872840881348,
      "step": 53850
    },
    {
      "epoch": 0.00032867431640625,
      "step": 53850,
      "training_step_time": 0.7803757190704346
    },
    {
      "epoch": 0.000328680419921875,
      "model_forward_time": 0.1147606372833252,
      "step": 53851
    },
    {
      "epoch": 0.000328680419921875,
      "step": 53851,
      "training_step_time": 0.44078493118286133
    },
    {
      "epoch": 0.0003286865234375,
      "model_forward_time": 0.11516618728637695,
      "step": 53852
    },
    {
      "epoch": 0.0003286865234375,
      "step": 53852,
      "training_step_time": 0.4780142307281494
    },
    {
      "epoch": 0.000328692626953125,
      "model_forward_time": 0.11413788795471191,
      "step": 53853
    },
    {
      "epoch": 0.000328692626953125,
      "step": 53853,
      "training_step_time": 0.4264678955078125
    },
    {
      "epoch": 0.00032869873046875,
      "model_forward_time": 0.1144876480102539,
      "step": 53854
    },
    {
      "epoch": 0.00032869873046875,
      "step": 53854,
      "training_step_time": 0.47264766693115234
    },
    {
      "epoch": 0.000328704833984375,
      "model_forward_time": 0.11406993865966797,
      "step": 53855
    },
    {
      "epoch": 0.000328704833984375,
      "step": 53855,
      "training_step_time": 0.43045544624328613
    },
    {
      "epoch": 0.0003287109375,
      "model_forward_time": 0.11440563201904297,
      "step": 53856
    },
    {
      "epoch": 0.0003287109375,
      "step": 53856,
      "training_step_time": 0.41750597953796387
    },
    {
      "epoch": 0.000328717041015625,
      "model_forward_time": 0.11437177658081055,
      "step": 53857
    },
    {
      "epoch": 0.000328717041015625,
      "step": 53857,
      "training_step_time": 0.3991100788116455
    },
    {
      "epoch": 0.00032872314453125,
      "model_forward_time": 0.11435437202453613,
      "step": 53858
    },
    {
      "epoch": 0.00032872314453125,
      "step": 53858,
      "training_step_time": 0.3950486183166504
    },
    {
      "epoch": 0.000328729248046875,
      "model_forward_time": 0.11438918113708496,
      "step": 53859
    },
    {
      "epoch": 0.000328729248046875,
      "step": 53859,
      "training_step_time": 0.3885006904602051
    },
    {
      "epoch": 0.0003287353515625,
      "grad_norm": 0.14083217084407806,
      "learning_rate": 2.8358166851754297e-06,
      "loss": 0.0383,
      "step": 53860
    },
    {
      "epoch": 0.0003287353515625,
      "model_forward_time": 0.11484217643737793,
      "step": 53860
    },
    {
      "epoch": 0.0003287353515625,
      "step": 53860,
      "training_step_time": 0.391554594039917
    },
    {
      "epoch": 0.000328741455078125,
      "model_forward_time": 0.11499929428100586,
      "step": 53861
    },
    {
      "epoch": 0.000328741455078125,
      "step": 53861,
      "training_step_time": 0.3952360153198242
    },
    {
      "epoch": 0.00032874755859375,
      "model_forward_time": 0.11469173431396484,
      "step": 53862
    },
    {
      "epoch": 0.00032874755859375,
      "step": 53862,
      "training_step_time": 0.6934185028076172
    },
    {
      "epoch": 0.000328753662109375,
      "model_forward_time": 0.11502671241760254,
      "step": 53863
    },
    {
      "epoch": 0.000328753662109375,
      "step": 53863,
      "training_step_time": 0.4401838779449463
    },
    {
      "epoch": 0.000328759765625,
      "model_forward_time": 0.11458897590637207,
      "step": 53864
    },
    {
      "epoch": 0.000328759765625,
      "step": 53864,
      "training_step_time": 0.3945314884185791
    },
    {
      "epoch": 0.000328765869140625,
      "model_forward_time": 0.11432147026062012,
      "step": 53865
    },
    {
      "epoch": 0.000328765869140625,
      "step": 53865,
      "training_step_time": 0.4818592071533203
    },
    {
      "epoch": 0.00032877197265625,
      "model_forward_time": 0.11415243148803711,
      "step": 53866
    },
    {
      "epoch": 0.00032877197265625,
      "step": 53866,
      "training_step_time": 0.4432852268218994
    },
    {
      "epoch": 0.000328778076171875,
      "model_forward_time": 0.11563730239868164,
      "step": 53867
    },
    {
      "epoch": 0.000328778076171875,
      "step": 53867,
      "training_step_time": 0.431027889251709
    },
    {
      "epoch": 0.0003287841796875,
      "model_forward_time": 0.11459207534790039,
      "step": 53868
    },
    {
      "epoch": 0.0003287841796875,
      "step": 53868,
      "training_step_time": 0.4749898910522461
    },
    {
      "epoch": 0.000328790283203125,
      "model_forward_time": 0.11487650871276855,
      "step": 53869
    },
    {
      "epoch": 0.000328790283203125,
      "step": 53869,
      "training_step_time": 0.4055156707763672
    },
    {
      "epoch": 0.00032879638671875,
      "grad_norm": 0.08636055141687393,
      "learning_rate": 2.8266749847427275e-06,
      "loss": 0.0341,
      "step": 53870
    },
    {
      "epoch": 0.00032879638671875,
      "model_forward_time": 0.114990234375,
      "step": 53870
    },
    {
      "epoch": 0.00032879638671875,
      "step": 53870,
      "training_step_time": 0.3941059112548828
    },
    {
      "epoch": 0.000328802490234375,
      "model_forward_time": 0.11479949951171875,
      "step": 53871
    },
    {
      "epoch": 0.000328802490234375,
      "step": 53871,
      "training_step_time": 0.38631701469421387
    },
    {
      "epoch": 0.00032880859375,
      "model_forward_time": 0.1153860092163086,
      "step": 53872
    },
    {
      "epoch": 0.00032880859375,
      "step": 53872,
      "training_step_time": 0.39528846740722656
    },
    {
      "epoch": 0.000328814697265625,
      "model_forward_time": 0.1150503158569336,
      "step": 53873
    },
    {
      "epoch": 0.000328814697265625,
      "step": 53873,
      "training_step_time": 0.38381266593933105
    },
    {
      "epoch": 0.00032882080078125,
      "model_forward_time": 0.11475157737731934,
      "step": 53874
    },
    {
      "epoch": 0.00032882080078125,
      "step": 53874,
      "training_step_time": 0.670802116394043
    },
    {
      "epoch": 0.000328826904296875,
      "model_forward_time": 0.11490011215209961,
      "step": 53875
    },
    {
      "epoch": 0.000328826904296875,
      "step": 53875,
      "training_step_time": 0.44975733757019043
    },
    {
      "epoch": 0.0003288330078125,
      "model_forward_time": 0.11505937576293945,
      "step": 53876
    },
    {
      "epoch": 0.0003288330078125,
      "step": 53876,
      "training_step_time": 0.4084198474884033
    },
    {
      "epoch": 0.000328839111328125,
      "model_forward_time": 0.11486291885375977,
      "step": 53877
    },
    {
      "epoch": 0.000328839111328125,
      "step": 53877,
      "training_step_time": 0.44968152046203613
    },
    {
      "epoch": 0.00032884521484375,
      "model_forward_time": 0.11491012573242188,
      "step": 53878
    },
    {
      "epoch": 0.00032884521484375,
      "step": 53878,
      "training_step_time": 0.4728879928588867
    },
    {
      "epoch": 0.000328851318359375,
      "model_forward_time": 0.1142127513885498,
      "step": 53879
    },
    {
      "epoch": 0.000328851318359375,
      "step": 53879,
      "training_step_time": 0.42656826972961426
    },
    {
      "epoch": 0.000328857421875,
      "grad_norm": 0.09844644367694855,
      "learning_rate": 2.817547614320615e-06,
      "loss": 0.0328,
      "step": 53880
    },
    {
      "epoch": 0.000328857421875,
      "model_forward_time": 0.11506533622741699,
      "step": 53880
    },
    {
      "epoch": 0.000328857421875,
      "step": 53880,
      "training_step_time": 0.5224015712738037
    },
    {
      "epoch": 0.000328863525390625,
      "model_forward_time": 0.1146852970123291,
      "step": 53881
    },
    {
      "epoch": 0.000328863525390625,
      "step": 53881,
      "training_step_time": 0.3993496894836426
    },
    {
      "epoch": 0.00032886962890625,
      "model_forward_time": 0.11513614654541016,
      "step": 53882
    },
    {
      "epoch": 0.00032886962890625,
      "step": 53882,
      "training_step_time": 0.4758334159851074
    },
    {
      "epoch": 0.000328875732421875,
      "model_forward_time": 0.11454081535339355,
      "step": 53883
    },
    {
      "epoch": 0.000328875732421875,
      "step": 53883,
      "training_step_time": 0.39504408836364746
    },
    {
      "epoch": 0.0003288818359375,
      "model_forward_time": 0.11487388610839844,
      "step": 53884
    },
    {
      "epoch": 0.0003288818359375,
      "step": 53884,
      "training_step_time": 0.3911855220794678
    },
    {
      "epoch": 0.000328887939453125,
      "model_forward_time": 0.11489272117614746,
      "step": 53885
    },
    {
      "epoch": 0.000328887939453125,
      "step": 53885,
      "training_step_time": 0.39908456802368164
    },
    {
      "epoch": 0.00032889404296875,
      "model_forward_time": 0.11465239524841309,
      "step": 53886
    },
    {
      "epoch": 0.00032889404296875,
      "step": 53886,
      "training_step_time": 0.7156374454498291
    },
    {
      "epoch": 0.000328900146484375,
      "model_forward_time": 0.11358785629272461,
      "step": 53887
    },
    {
      "epoch": 0.000328900146484375,
      "step": 53887,
      "training_step_time": 0.39984774589538574
    },
    {
      "epoch": 0.00032890625,
      "model_forward_time": 0.1147465705871582,
      "step": 53888
    },
    {
      "epoch": 0.00032890625,
      "step": 53888,
      "training_step_time": 0.4481971263885498
    },
    {
      "epoch": 0.000328912353515625,
      "model_forward_time": 0.11466670036315918,
      "step": 53889
    },
    {
      "epoch": 0.000328912353515625,
      "step": 53889,
      "training_step_time": 0.4068613052368164
    },
    {
      "epoch": 0.00032891845703125,
      "grad_norm": 0.08012066781520844,
      "learning_rate": 2.8084345766817676e-06,
      "loss": 0.0366,
      "step": 53890
    },
    {
      "epoch": 0.00032891845703125,
      "model_forward_time": 0.11408615112304688,
      "step": 53890
    },
    {
      "epoch": 0.00032891845703125,
      "step": 53890,
      "training_step_time": 0.4457881450653076
    },
    {
      "epoch": 0.000328924560546875,
      "model_forward_time": 0.11442947387695312,
      "step": 53891
    },
    {
      "epoch": 0.000328924560546875,
      "step": 53891,
      "training_step_time": 0.4251251220703125
    },
    {
      "epoch": 0.0003289306640625,
      "model_forward_time": 0.11545920372009277,
      "step": 53892
    },
    {
      "epoch": 0.0003289306640625,
      "step": 53892,
      "training_step_time": 0.5993232727050781
    },
    {
      "epoch": 0.000328936767578125,
      "model_forward_time": 0.11529135704040527,
      "step": 53893
    },
    {
      "epoch": 0.000328936767578125,
      "step": 53893,
      "training_step_time": 0.4381089210510254
    },
    {
      "epoch": 0.00032894287109375,
      "model_forward_time": 0.11474013328552246,
      "step": 53894
    },
    {
      "epoch": 0.00032894287109375,
      "step": 53894,
      "training_step_time": 0.3903355598449707
    },
    {
      "epoch": 0.000328948974609375,
      "model_forward_time": 0.1148531436920166,
      "step": 53895
    },
    {
      "epoch": 0.000328948974609375,
      "step": 53895,
      "training_step_time": 0.48697805404663086
    },
    {
      "epoch": 0.000328955078125,
      "model_forward_time": 0.11462044715881348,
      "step": 53896
    },
    {
      "epoch": 0.000328955078125,
      "step": 53896,
      "training_step_time": 0.4851408004760742
    },
    {
      "epoch": 0.000328961181640625,
      "model_forward_time": 0.11405658721923828,
      "step": 53897
    },
    {
      "epoch": 0.000328961181640625,
      "step": 53897,
      "training_step_time": 0.39575648307800293
    },
    {
      "epoch": 0.00032896728515625,
      "model_forward_time": 0.11563277244567871,
      "step": 53898
    },
    {
      "epoch": 0.00032896728515625,
      "step": 53898,
      "training_step_time": 0.5643496513366699
    },
    {
      "epoch": 0.000328973388671875,
      "model_forward_time": 0.11444425582885742,
      "step": 53899
    },
    {
      "epoch": 0.000328973388671875,
      "step": 53899,
      "training_step_time": 0.40061378479003906
    },
    {
      "epoch": 0.0003289794921875,
      "grad_norm": 0.09560509771108627,
      "learning_rate": 2.7993358745944608e-06,
      "loss": 0.0349,
      "step": 53900
    },
    {
      "epoch": 0.0003289794921875,
      "model_forward_time": 0.11434769630432129,
      "step": 53900
    },
    {
      "epoch": 0.0003289794921875,
      "step": 53900,
      "training_step_time": 0.39693284034729004
    },
    {
      "epoch": 0.000328985595703125,
      "model_forward_time": 0.11481404304504395,
      "step": 53901
    },
    {
      "epoch": 0.000328985595703125,
      "step": 53901,
      "training_step_time": 0.41971516609191895
    },
    {
      "epoch": 0.00032899169921875,
      "model_forward_time": 0.11440610885620117,
      "step": 53902
    },
    {
      "epoch": 0.00032899169921875,
      "step": 53902,
      "training_step_time": 0.3915731906890869
    },
    {
      "epoch": 0.000328997802734375,
      "model_forward_time": 0.11555624008178711,
      "step": 53903
    },
    {
      "epoch": 0.000328997802734375,
      "step": 53903,
      "training_step_time": 0.39086270332336426
    },
    {
      "epoch": 0.00032900390625,
      "model_forward_time": 0.1145944595336914,
      "step": 53904
    },
    {
      "epoch": 0.00032900390625,
      "step": 53904,
      "training_step_time": 0.6369082927703857
    },
    {
      "epoch": 0.000329010009765625,
      "model_forward_time": 0.11493539810180664,
      "step": 53905
    },
    {
      "epoch": 0.000329010009765625,
      "step": 53905,
      "training_step_time": 0.3997836112976074
    },
    {
      "epoch": 0.00032901611328125,
      "model_forward_time": 0.11441779136657715,
      "step": 53906
    },
    {
      "epoch": 0.00032901611328125,
      "step": 53906,
      "training_step_time": 0.39969944953918457
    },
    {
      "epoch": 0.000329022216796875,
      "model_forward_time": 0.11562561988830566,
      "step": 53907
    },
    {
      "epoch": 0.000329022216796875,
      "step": 53907,
      "training_step_time": 0.43122339248657227
    },
    {
      "epoch": 0.0003290283203125,
      "model_forward_time": 0.11472964286804199,
      "step": 53908
    },
    {
      "epoch": 0.0003290283203125,
      "step": 53908,
      "training_step_time": 0.38346219062805176
    },
    {
      "epoch": 0.000329034423828125,
      "model_forward_time": 0.1147146224975586,
      "step": 53909
    },
    {
      "epoch": 0.000329034423828125,
      "step": 53909,
      "training_step_time": 0.42514991760253906
    },
    {
      "epoch": 0.00032904052734375,
      "grad_norm": 0.10090946406126022,
      "learning_rate": 2.790251510822661e-06,
      "loss": 0.0365,
      "step": 53910
    },
    {
      "epoch": 0.00032904052734375,
      "model_forward_time": 0.11483025550842285,
      "step": 53910
    },
    {
      "epoch": 0.00032904052734375,
      "step": 53910,
      "training_step_time": 0.7823662757873535
    },
    {
      "epoch": 0.000329046630859375,
      "model_forward_time": 0.11464834213256836,
      "step": 53911
    },
    {
      "epoch": 0.000329046630859375,
      "step": 53911,
      "training_step_time": 0.39431285858154297
    },
    {
      "epoch": 0.000329052734375,
      "model_forward_time": 0.11469745635986328,
      "step": 53912
    },
    {
      "epoch": 0.000329052734375,
      "step": 53912,
      "training_step_time": 0.39275360107421875
    },
    {
      "epoch": 0.000329058837890625,
      "model_forward_time": 0.11428451538085938,
      "step": 53913
    },
    {
      "epoch": 0.000329058837890625,
      "step": 53913,
      "training_step_time": 0.4397428035736084
    },
    {
      "epoch": 0.00032906494140625,
      "model_forward_time": 0.1143953800201416,
      "step": 53914
    },
    {
      "epoch": 0.00032906494140625,
      "step": 53914,
      "training_step_time": 0.39014220237731934
    },
    {
      "epoch": 0.000329071044921875,
      "model_forward_time": 0.11412787437438965,
      "step": 53915
    },
    {
      "epoch": 0.000329071044921875,
      "step": 53915,
      "training_step_time": 0.390211820602417
    },
    {
      "epoch": 0.0003290771484375,
      "model_forward_time": 0.11589360237121582,
      "step": 53916
    },
    {
      "epoch": 0.0003290771484375,
      "step": 53916,
      "training_step_time": 0.642916202545166
    },
    {
      "epoch": 0.000329083251953125,
      "model_forward_time": 0.11542177200317383,
      "step": 53917
    },
    {
      "epoch": 0.000329083251953125,
      "step": 53917,
      "training_step_time": 0.38001561164855957
    },
    {
      "epoch": 0.00032908935546875,
      "model_forward_time": 0.11471676826477051,
      "step": 53918
    },
    {
      "epoch": 0.00032908935546875,
      "step": 53918,
      "training_step_time": 0.4301743507385254
    },
    {
      "epoch": 0.000329095458984375,
      "model_forward_time": 0.11455988883972168,
      "step": 53919
    },
    {
      "epoch": 0.000329095458984375,
      "step": 53919,
      "training_step_time": 0.43172121047973633
    },
    {
      "epoch": 0.0003291015625,
      "grad_norm": 0.0953906998038292,
      "learning_rate": 2.7811814881259503e-06,
      "loss": 0.0371,
      "step": 53920
    },
    {
      "epoch": 0.0003291015625,
      "model_forward_time": 0.11482977867126465,
      "step": 53920
    },
    {
      "epoch": 0.0003291015625,
      "step": 53920,
      "training_step_time": 0.45849156379699707
    },
    {
      "epoch": 0.000329107666015625,
      "model_forward_time": 0.11438870429992676,
      "step": 53921
    },
    {
      "epoch": 0.000329107666015625,
      "step": 53921,
      "training_step_time": 0.3631448745727539
    },
    {
      "epoch": 0.00032911376953125,
      "model_forward_time": 0.11454367637634277,
      "step": 53922
    },
    {
      "epoch": 0.00032911376953125,
      "step": 53922,
      "training_step_time": 0.44350123405456543
    },
    {
      "epoch": 0.000329119873046875,
      "model_forward_time": 0.11539578437805176,
      "step": 53923
    },
    {
      "epoch": 0.000329119873046875,
      "step": 53923,
      "training_step_time": 0.4233744144439697
    },
    {
      "epoch": 0.0003291259765625,
      "model_forward_time": 0.11515617370605469,
      "step": 53924
    },
    {
      "epoch": 0.0003291259765625,
      "step": 53924,
      "training_step_time": 0.38260483741760254
    },
    {
      "epoch": 0.000329132080078125,
      "model_forward_time": 0.11471891403198242,
      "step": 53925
    },
    {
      "epoch": 0.000329132080078125,
      "step": 53925,
      "training_step_time": 0.3889191150665283
    },
    {
      "epoch": 0.00032913818359375,
      "model_forward_time": 0.11534357070922852,
      "step": 53926
    },
    {
      "epoch": 0.00032913818359375,
      "step": 53926,
      "training_step_time": 0.40675950050354004
    },
    {
      "epoch": 0.000329144287109375,
      "model_forward_time": 0.11537313461303711,
      "step": 53927
    },
    {
      "epoch": 0.000329144287109375,
      "step": 53927,
      "training_step_time": 0.4694671630859375
    },
    {
      "epoch": 0.000329150390625,
      "model_forward_time": 0.11532425880432129,
      "step": 53928
    },
    {
      "epoch": 0.000329150390625,
      "step": 53928,
      "training_step_time": 0.5193231105804443
    },
    {
      "epoch": 0.000329156494140625,
      "model_forward_time": 0.11490702629089355,
      "step": 53929
    },
    {
      "epoch": 0.000329156494140625,
      "step": 53929,
      "training_step_time": 0.3942556381225586
    },
    {
      "epoch": 0.00032916259765625,
      "grad_norm": 0.09247192740440369,
      "learning_rate": 2.7721258092595627e-06,
      "loss": 0.0345,
      "step": 53930
    },
    {
      "epoch": 0.00032916259765625,
      "model_forward_time": 0.11520242691040039,
      "step": 53930
    },
    {
      "epoch": 0.00032916259765625,
      "step": 53930,
      "training_step_time": 0.4872732162475586
    },
    {
      "epoch": 0.000329168701171875,
      "model_forward_time": 0.1142265796661377,
      "step": 53931
    },
    {
      "epoch": 0.000329168701171875,
      "step": 53931,
      "training_step_time": 0.4200623035430908
    },
    {
      "epoch": 0.0003291748046875,
      "model_forward_time": 0.11511754989624023,
      "step": 53932
    },
    {
      "epoch": 0.0003291748046875,
      "step": 53932,
      "training_step_time": 0.47455644607543945
    },
    {
      "epoch": 0.000329180908203125,
      "model_forward_time": 0.1145164966583252,
      "step": 53933
    },
    {
      "epoch": 0.000329180908203125,
      "step": 53933,
      "training_step_time": 0.41176652908325195
    },
    {
      "epoch": 0.00032918701171875,
      "model_forward_time": 0.11505866050720215,
      "step": 53934
    },
    {
      "epoch": 0.00032918701171875,
      "step": 53934,
      "training_step_time": 0.5853703022003174
    },
    {
      "epoch": 0.000329193115234375,
      "model_forward_time": 0.1148369312286377,
      "step": 53935
    },
    {
      "epoch": 0.000329193115234375,
      "step": 53935,
      "training_step_time": 0.40599751472473145
    },
    {
      "epoch": 0.00032919921875,
      "model_forward_time": 0.11498475074768066,
      "step": 53936
    },
    {
      "epoch": 0.00032919921875,
      "step": 53936,
      "training_step_time": 0.3947482109069824
    },
    {
      "epoch": 0.000329205322265625,
      "model_forward_time": 0.11484408378601074,
      "step": 53937
    },
    {
      "epoch": 0.000329205322265625,
      "step": 53937,
      "training_step_time": 0.46982288360595703
    },
    {
      "epoch": 0.00032921142578125,
      "model_forward_time": 0.11509132385253906,
      "step": 53938
    },
    {
      "epoch": 0.00032921142578125,
      "step": 53938,
      "training_step_time": 0.3878934383392334
    },
    {
      "epoch": 0.000329217529296875,
      "model_forward_time": 0.11540865898132324,
      "step": 53939
    },
    {
      "epoch": 0.000329217529296875,
      "step": 53939,
      "training_step_time": 0.40920281410217285
    },
    {
      "epoch": 0.0003292236328125,
      "grad_norm": 0.09319320321083069,
      "learning_rate": 2.7630844769743757e-06,
      "loss": 0.0326,
      "step": 53940
    },
    {
      "epoch": 0.0003292236328125,
      "model_forward_time": 0.11496210098266602,
      "step": 53940
    },
    {
      "epoch": 0.0003292236328125,
      "step": 53940,
      "training_step_time": 0.3982267379760742
    },
    {
      "epoch": 0.000329229736328125,
      "model_forward_time": 0.1154623031616211,
      "step": 53941
    },
    {
      "epoch": 0.000329229736328125,
      "step": 53941,
      "training_step_time": 0.40544915199279785
    },
    {
      "epoch": 0.00032923583984375,
      "model_forward_time": 0.1147303581237793,
      "step": 53942
    },
    {
      "epoch": 0.00032923583984375,
      "step": 53942,
      "training_step_time": 0.40032315254211426
    },
    {
      "epoch": 0.000329241943359375,
      "model_forward_time": 0.11547017097473145,
      "step": 53943
    },
    {
      "epoch": 0.000329241943359375,
      "step": 53943,
      "training_step_time": 0.3992884159088135
    },
    {
      "epoch": 0.000329248046875,
      "model_forward_time": 0.11466789245605469,
      "step": 53944
    },
    {
      "epoch": 0.000329248046875,
      "step": 53944,
      "training_step_time": 0.4395456314086914
    },
    {
      "epoch": 0.000329254150390625,
      "model_forward_time": 0.11442732810974121,
      "step": 53945
    },
    {
      "epoch": 0.000329254150390625,
      "step": 53945,
      "training_step_time": 0.4274466037750244
    },
    {
      "epoch": 0.00032926025390625,
      "model_forward_time": 0.11467099189758301,
      "step": 53946
    },
    {
      "epoch": 0.00032926025390625,
      "step": 53946,
      "training_step_time": 0.8521013259887695
    },
    {
      "epoch": 0.000329266357421875,
      "model_forward_time": 0.11410737037658691,
      "step": 53947
    },
    {
      "epoch": 0.000329266357421875,
      "step": 53947,
      "training_step_time": 0.3986682891845703
    },
    {
      "epoch": 0.0003292724609375,
      "model_forward_time": 0.11372876167297363,
      "step": 53948
    },
    {
      "epoch": 0.0003292724609375,
      "step": 53948,
      "training_step_time": 0.4208180904388428
    },
    {
      "epoch": 0.000329278564453125,
      "model_forward_time": 0.11449909210205078,
      "step": 53949
    },
    {
      "epoch": 0.000329278564453125,
      "step": 53949,
      "training_step_time": 0.4359252452850342
    },
    {
      "epoch": 0.00032928466796875,
      "grad_norm": 0.08112301677465439,
      "learning_rate": 2.754057494016904e-06,
      "loss": 0.0357,
      "step": 53950
    },
    {
      "epoch": 0.00032928466796875,
      "model_forward_time": 0.11487364768981934,
      "step": 53950
    },
    {
      "epoch": 0.00032928466796875,
      "step": 53950,
      "training_step_time": 0.4844553470611572
    },
    {
      "epoch": 0.000329290771484375,
      "model_forward_time": 0.11456012725830078,
      "step": 53951
    },
    {
      "epoch": 0.000329290771484375,
      "step": 53951,
      "training_step_time": 0.45403289794921875
    },
    {
      "epoch": 0.000329296875,
      "model_forward_time": 0.11479663848876953,
      "step": 53952
    },
    {
      "epoch": 0.000329296875,
      "step": 53952,
      "training_step_time": 0.4467332363128662
    },
    {
      "epoch": 0.000329302978515625,
      "model_forward_time": 0.11424779891967773,
      "step": 53953
    },
    {
      "epoch": 0.000329302978515625,
      "step": 53953,
      "training_step_time": 0.3894186019897461
    },
    {
      "epoch": 0.00032930908203125,
      "model_forward_time": 0.11470508575439453,
      "step": 53954
    },
    {
      "epoch": 0.00032930908203125,
      "step": 53954,
      "training_step_time": 0.3845846652984619
    },
    {
      "epoch": 0.000329315185546875,
      "model_forward_time": 0.1149141788482666,
      "step": 53955
    },
    {
      "epoch": 0.000329315185546875,
      "step": 53955,
      "training_step_time": 0.38520312309265137
    },
    {
      "epoch": 0.0003293212890625,
      "model_forward_time": 0.11534523963928223,
      "step": 53956
    },
    {
      "epoch": 0.0003293212890625,
      "step": 53956,
      "training_step_time": 0.3972022533416748
    },
    {
      "epoch": 0.000329327392578125,
      "model_forward_time": 0.11498165130615234,
      "step": 53957
    },
    {
      "epoch": 0.000329327392578125,
      "step": 53957,
      "training_step_time": 0.39005327224731445
    },
    {
      "epoch": 0.00032933349609375,
      "model_forward_time": 0.11489272117614746,
      "step": 53958
    },
    {
      "epoch": 0.00032933349609375,
      "step": 53958,
      "training_step_time": 1.028503179550171
    },
    {
      "epoch": 0.000329339599609375,
      "model_forward_time": 0.11482119560241699,
      "step": 53959
    },
    {
      "epoch": 0.000329339599609375,
      "step": 53959,
      "training_step_time": 0.4631519317626953
    },
    {
      "epoch": 0.000329345703125,
      "grad_norm": 0.06572617590427399,
      "learning_rate": 2.7450448631293036e-06,
      "loss": 0.0404,
      "step": 53960
    },
    {
      "epoch": 0.000329345703125,
      "model_forward_time": 0.11371874809265137,
      "step": 53960
    },
    {
      "epoch": 0.000329345703125,
      "step": 53960,
      "training_step_time": 0.42601799964904785
    },
    {
      "epoch": 0.000329351806640625,
      "model_forward_time": 0.11403989791870117,
      "step": 53961
    },
    {
      "epoch": 0.000329351806640625,
      "step": 53961,
      "training_step_time": 0.4694240093231201
    },
    {
      "epoch": 0.00032935791015625,
      "model_forward_time": 0.11394786834716797,
      "step": 53962
    },
    {
      "epoch": 0.00032935791015625,
      "step": 53962,
      "training_step_time": 0.37716174125671387
    },
    {
      "epoch": 0.000329364013671875,
      "model_forward_time": 0.11433601379394531,
      "step": 53963
    },
    {
      "epoch": 0.000329364013671875,
      "step": 53963,
      "training_step_time": 0.4470851421356201
    },
    {
      "epoch": 0.0003293701171875,
      "model_forward_time": 0.1146848201751709,
      "step": 53964
    },
    {
      "epoch": 0.0003293701171875,
      "step": 53964,
      "training_step_time": 0.5218665599822998
    },
    {
      "epoch": 0.000329376220703125,
      "model_forward_time": 0.11454987525939941,
      "step": 53965
    },
    {
      "epoch": 0.000329376220703125,
      "step": 53965,
      "training_step_time": 0.39250850677490234
    },
    {
      "epoch": 0.00032938232421875,
      "model_forward_time": 0.11695265769958496,
      "step": 53966
    },
    {
      "epoch": 0.00032938232421875,
      "step": 53966,
      "training_step_time": 0.386944055557251
    },
    {
      "epoch": 0.000329388427734375,
      "model_forward_time": 0.11538457870483398,
      "step": 53967
    },
    {
      "epoch": 0.000329388427734375,
      "step": 53967,
      "training_step_time": 0.39232945442199707
    },
    {
      "epoch": 0.00032939453125,
      "model_forward_time": 0.11536860466003418,
      "step": 53968
    },
    {
      "epoch": 0.00032939453125,
      "step": 53968,
      "training_step_time": 0.38715338706970215
    },
    {
      "epoch": 0.000329400634765625,
      "model_forward_time": 0.11520123481750488,
      "step": 53969
    },
    {
      "epoch": 0.000329400634765625,
      "step": 53969,
      "training_step_time": 0.3925642967224121
    },
    {
      "epoch": 0.00032940673828125,
      "grad_norm": 0.11743292212486267,
      "learning_rate": 2.73604658704939e-06,
      "loss": 0.0338,
      "step": 53970
    },
    {
      "epoch": 0.00032940673828125,
      "model_forward_time": 0.11562395095825195,
      "step": 53970
    },
    {
      "epoch": 0.00032940673828125,
      "step": 53970,
      "training_step_time": 0.7758922576904297
    },
    {
      "epoch": 0.000329412841796875,
      "model_forward_time": 0.114501953125,
      "step": 53971
    },
    {
      "epoch": 0.000329412841796875,
      "step": 53971,
      "training_step_time": 0.45507311820983887
    },
    {
      "epoch": 0.0003294189453125,
      "model_forward_time": 0.11495447158813477,
      "step": 53972
    },
    {
      "epoch": 0.0003294189453125,
      "step": 53972,
      "training_step_time": 0.4678530693054199
    },
    {
      "epoch": 0.000329425048828125,
      "model_forward_time": 0.11405611038208008,
      "step": 53973
    },
    {
      "epoch": 0.000329425048828125,
      "step": 53973,
      "training_step_time": 0.3742837905883789
    },
    {
      "epoch": 0.00032943115234375,
      "model_forward_time": 0.11463308334350586,
      "step": 53974
    },
    {
      "epoch": 0.00032943115234375,
      "step": 53974,
      "training_step_time": 0.43245434761047363
    },
    {
      "epoch": 0.000329437255859375,
      "model_forward_time": 0.11446928977966309,
      "step": 53975
    },
    {
      "epoch": 0.000329437255859375,
      "step": 53975,
      "training_step_time": 0.4813969135284424
    },
    {
      "epoch": 0.000329443359375,
      "model_forward_time": 0.11463522911071777,
      "step": 53976
    },
    {
      "epoch": 0.000329443359375,
      "step": 53976,
      "training_step_time": 0.4636080265045166
    },
    {
      "epoch": 0.000329449462890625,
      "model_forward_time": 0.1146230697631836,
      "step": 53977
    },
    {
      "epoch": 0.000329449462890625,
      "step": 53977,
      "training_step_time": 0.48883938789367676
    },
    {
      "epoch": 0.00032945556640625,
      "model_forward_time": 0.11480879783630371,
      "step": 53978
    },
    {
      "epoch": 0.00032945556640625,
      "step": 53978,
      "training_step_time": 0.4118027687072754
    },
    {
      "epoch": 0.000329461669921875,
      "model_forward_time": 0.11436605453491211,
      "step": 53979
    },
    {
      "epoch": 0.000329461669921875,
      "step": 53979,
      "training_step_time": 0.39339351654052734
    },
    {
      "epoch": 0.0003294677734375,
      "grad_norm": 0.11192954331636429,
      "learning_rate": 2.7270626685105828e-06,
      "loss": 0.039,
      "step": 53980
    },
    {
      "epoch": 0.0003294677734375,
      "model_forward_time": 0.11435508728027344,
      "step": 53980
    },
    {
      "epoch": 0.0003294677734375,
      "step": 53980,
      "training_step_time": 0.3881947994232178
    },
    {
      "epoch": 0.000329473876953125,
      "model_forward_time": 0.11472797393798828,
      "step": 53981
    },
    {
      "epoch": 0.000329473876953125,
      "step": 53981,
      "training_step_time": 0.3886902332305908
    },
    {
      "epoch": 0.00032947998046875,
      "model_forward_time": 0.11528563499450684,
      "step": 53982
    },
    {
      "epoch": 0.00032947998046875,
      "step": 53982,
      "training_step_time": 0.4911212921142578
    },
    {
      "epoch": 0.000329486083984375,
      "model_forward_time": 0.11569380760192871,
      "step": 53983
    },
    {
      "epoch": 0.000329486083984375,
      "step": 53983,
      "training_step_time": 0.39141321182250977
    },
    {
      "epoch": 0.0003294921875,
      "model_forward_time": 0.11511778831481934,
      "step": 53984
    },
    {
      "epoch": 0.0003294921875,
      "step": 53984,
      "training_step_time": 0.5049443244934082
    },
    {
      "epoch": 0.000329498291015625,
      "model_forward_time": 0.11514425277709961,
      "step": 53985
    },
    {
      "epoch": 0.000329498291015625,
      "step": 53985,
      "training_step_time": 0.4270970821380615
    },
    {
      "epoch": 0.00032950439453125,
      "model_forward_time": 0.11574220657348633,
      "step": 53986
    },
    {
      "epoch": 0.00032950439453125,
      "step": 53986,
      "training_step_time": 0.48216962814331055
    },
    {
      "epoch": 0.000329510498046875,
      "model_forward_time": 0.1140291690826416,
      "step": 53987
    },
    {
      "epoch": 0.000329510498046875,
      "step": 53987,
      "training_step_time": 0.45145463943481445
    },
    {
      "epoch": 0.0003295166015625,
      "model_forward_time": 0.11467123031616211,
      "step": 53988
    },
    {
      "epoch": 0.0003295166015625,
      "step": 53988,
      "training_step_time": 0.493084192276001
    },
    {
      "epoch": 0.000329522705078125,
      "model_forward_time": 0.1142127513885498,
      "step": 53989
    },
    {
      "epoch": 0.000329522705078125,
      "step": 53989,
      "training_step_time": 0.38861989974975586
    },
    {
      "epoch": 0.00032952880859375,
      "grad_norm": 0.10166477411985397,
      "learning_rate": 2.718093110241976e-06,
      "loss": 0.0372,
      "step": 53990
    },
    {
      "epoch": 0.00032952880859375,
      "model_forward_time": 0.11478018760681152,
      "step": 53990
    },
    {
      "epoch": 0.00032952880859375,
      "step": 53990,
      "training_step_time": 0.47409939765930176
    },
    {
      "epoch": 0.000329534912109375,
      "model_forward_time": 0.11447954177856445,
      "step": 53991
    },
    {
      "epoch": 0.000329534912109375,
      "step": 53991,
      "training_step_time": 0.48859119415283203
    },
    {
      "epoch": 0.000329541015625,
      "model_forward_time": 0.11447310447692871,
      "step": 53992
    },
    {
      "epoch": 0.000329541015625,
      "step": 53992,
      "training_step_time": 0.41851139068603516
    },
    {
      "epoch": 0.000329547119140625,
      "model_forward_time": 0.11383318901062012,
      "step": 53993
    },
    {
      "epoch": 0.000329547119140625,
      "step": 53993,
      "training_step_time": 0.38489556312561035
    },
    {
      "epoch": 0.00032955322265625,
      "model_forward_time": 0.11520528793334961,
      "step": 53994
    },
    {
      "epoch": 0.00032955322265625,
      "step": 53994,
      "training_step_time": 0.5111103057861328
    },
    {
      "epoch": 0.000329559326171875,
      "model_forward_time": 0.11474084854125977,
      "step": 53995
    },
    {
      "epoch": 0.000329559326171875,
      "step": 53995,
      "training_step_time": 0.38996386528015137
    },
    {
      "epoch": 0.0003295654296875,
      "model_forward_time": 0.11487269401550293,
      "step": 53996
    },
    {
      "epoch": 0.0003295654296875,
      "step": 53996,
      "training_step_time": 0.39586520195007324
    },
    {
      "epoch": 0.000329571533203125,
      "model_forward_time": 0.11508750915527344,
      "step": 53997
    },
    {
      "epoch": 0.000329571533203125,
      "step": 53997,
      "training_step_time": 0.38546061515808105
    },
    {
      "epoch": 0.00032957763671875,
      "model_forward_time": 0.11510133743286133,
      "step": 53998
    },
    {
      "epoch": 0.00032957763671875,
      "step": 53998,
      "training_step_time": 0.40821099281311035
    },
    {
      "epoch": 0.000329583740234375,
      "model_forward_time": 0.11437654495239258,
      "step": 53999
    },
    {
      "epoch": 0.000329583740234375,
      "step": 53999,
      "training_step_time": 0.41501665115356445
    },
    {
      "epoch": 0.00032958984375,
      "grad_norm": 0.08391240239143372,
      "learning_rate": 2.7091379149682685e-06,
      "loss": 0.035,
      "step": 54000
    },
    {
      "epoch": 0.00032958984375,
      "model_forward_time": 0.11300420761108398,
      "step": 54000
    },
    {
      "epoch": 0.00032958984375,
      "step": 54000,
      "training_step_time": 0.3536393642425537
    },
    {
      "epoch": 0.000329595947265625,
      "model_forward_time": 0.11225080490112305,
      "step": 54001
    },
    {
      "epoch": 0.000329595947265625,
      "step": 54001,
      "training_step_time": 0.4166994094848633
    },
    {
      "epoch": 0.00032960205078125,
      "model_forward_time": 0.11315083503723145,
      "step": 54002
    },
    {
      "epoch": 0.00032960205078125,
      "step": 54002,
      "training_step_time": 0.4364149570465088
    },
    {
      "epoch": 0.000329608154296875,
      "model_forward_time": 0.11351203918457031,
      "step": 54003
    },
    {
      "epoch": 0.000329608154296875,
      "step": 54003,
      "training_step_time": 0.4215359687805176
    },
    {
      "epoch": 0.0003296142578125,
      "model_forward_time": 0.1139068603515625,
      "step": 54004
    },
    {
      "epoch": 0.0003296142578125,
      "step": 54004,
      "training_step_time": 0.4034426212310791
    },
    {
      "epoch": 0.000329620361328125,
      "model_forward_time": 0.11435174942016602,
      "step": 54005
    },
    {
      "epoch": 0.000329620361328125,
      "step": 54005,
      "training_step_time": 0.4826986789703369
    },
    {
      "epoch": 0.00032962646484375,
      "model_forward_time": 0.11813521385192871,
      "step": 54006
    },
    {
      "epoch": 0.00032962646484375,
      "step": 54006,
      "training_step_time": 0.47576332092285156
    },
    {
      "epoch": 0.000329632568359375,
      "model_forward_time": 0.11702966690063477,
      "step": 54007
    },
    {
      "epoch": 0.000329632568359375,
      "step": 54007,
      "training_step_time": 0.3755769729614258
    },
    {
      "epoch": 0.000329638671875,
      "model_forward_time": 0.1172628402709961,
      "step": 54008
    },
    {
      "epoch": 0.000329638671875,
      "step": 54008,
      "training_step_time": 0.3790304660797119
    },
    {
      "epoch": 0.000329644775390625,
      "model_forward_time": 0.11792540550231934,
      "step": 54009
    },
    {
      "epoch": 0.000329644775390625,
      "step": 54009,
      "training_step_time": 0.3810112476348877
    },
    {
      "epoch": 0.00032965087890625,
      "grad_norm": 0.09477302432060242,
      "learning_rate": 2.700197085409817e-06,
      "loss": 0.0313,
      "step": 54010
    },
    {
      "epoch": 0.00032965087890625,
      "model_forward_time": 0.11791491508483887,
      "step": 54010
    },
    {
      "epoch": 0.00032965087890625,
      "step": 54010,
      "training_step_time": 0.3832740783691406
    },
    {
      "epoch": 0.000329656982421875,
      "model_forward_time": 0.11821722984313965,
      "step": 54011
    },
    {
      "epoch": 0.000329656982421875,
      "step": 54011,
      "training_step_time": 0.3779597282409668
    },
    {
      "epoch": 0.0003296630859375,
      "model_forward_time": 0.11797475814819336,
      "step": 54012
    },
    {
      "epoch": 0.0003296630859375,
      "step": 54012,
      "training_step_time": 0.4023163318634033
    },
    {
      "epoch": 0.000329669189453125,
      "model_forward_time": 0.11799263954162598,
      "step": 54013
    },
    {
      "epoch": 0.000329669189453125,
      "step": 54013,
      "training_step_time": 0.41675639152526855
    },
    {
      "epoch": 0.00032967529296875,
      "model_forward_time": 0.11782240867614746,
      "step": 54014
    },
    {
      "epoch": 0.00032967529296875,
      "step": 54014,
      "training_step_time": 0.38547730445861816
    },
    {
      "epoch": 0.000329681396484375,
      "model_forward_time": 0.11840319633483887,
      "step": 54015
    },
    {
      "epoch": 0.000329681396484375,
      "step": 54015,
      "training_step_time": 0.4437685012817383
    },
    {
      "epoch": 0.0003296875,
      "model_forward_time": 0.11531472206115723,
      "step": 54016
    },
    {
      "epoch": 0.0003296875,
      "step": 54016,
      "training_step_time": 0.45735859870910645
    },
    {
      "epoch": 0.000329693603515625,
      "model_forward_time": 0.11551547050476074,
      "step": 54017
    },
    {
      "epoch": 0.000329693603515625,
      "step": 54017,
      "training_step_time": 0.4360687732696533
    },
    {
      "epoch": 0.00032969970703125,
      "model_forward_time": 0.11757779121398926,
      "step": 54018
    },
    {
      "epoch": 0.00032969970703125,
      "step": 54018,
      "training_step_time": 0.4202127456665039
    },
    {
      "epoch": 0.000329705810546875,
      "model_forward_time": 0.11935734748840332,
      "step": 54019
    },
    {
      "epoch": 0.000329705810546875,
      "step": 54019,
      "training_step_time": 0.38011837005615234
    },
    {
      "epoch": 0.0003297119140625,
      "grad_norm": 0.08446383476257324,
      "learning_rate": 2.691270624282621e-06,
      "loss": 0.0366,
      "step": 54020
    },
    {
      "epoch": 0.0003297119140625,
      "model_forward_time": 0.11824274063110352,
      "step": 54020
    },
    {
      "epoch": 0.0003297119140625,
      "step": 54020,
      "training_step_time": 0.4681828022003174
    },
    {
      "epoch": 0.000329718017578125,
      "model_forward_time": 0.11866474151611328,
      "step": 54021
    },
    {
      "epoch": 0.000329718017578125,
      "step": 54021,
      "training_step_time": 0.41843533515930176
    },
    {
      "epoch": 0.00032972412109375,
      "model_forward_time": 0.11908268928527832,
      "step": 54022
    },
    {
      "epoch": 0.00032972412109375,
      "step": 54022,
      "training_step_time": 0.3796994686126709
    },
    {
      "epoch": 0.000329730224609375,
      "model_forward_time": 0.11810159683227539,
      "step": 54023
    },
    {
      "epoch": 0.000329730224609375,
      "step": 54023,
      "training_step_time": 0.38330960273742676
    },
    {
      "epoch": 0.000329736328125,
      "model_forward_time": 0.11799144744873047,
      "step": 54024
    },
    {
      "epoch": 0.000329736328125,
      "step": 54024,
      "training_step_time": 0.38327598571777344
    },
    {
      "epoch": 0.000329742431640625,
      "model_forward_time": 0.11913013458251953,
      "step": 54025
    },
    {
      "epoch": 0.000329742431640625,
      "step": 54025,
      "training_step_time": 0.3847367763519287
    },
    {
      "epoch": 0.00032974853515625,
      "model_forward_time": 0.11584997177124023,
      "step": 54026
    },
    {
      "epoch": 0.00032974853515625,
      "step": 54026,
      "training_step_time": 0.39551448822021484
    },
    {
      "epoch": 0.000329754638671875,
      "model_forward_time": 0.11586594581604004,
      "step": 54027
    },
    {
      "epoch": 0.000329754638671875,
      "step": 54027,
      "training_step_time": 0.392087459564209
    },
    {
      "epoch": 0.0003297607421875,
      "model_forward_time": 0.11516165733337402,
      "step": 54028
    },
    {
      "epoch": 0.0003297607421875,
      "step": 54028,
      "training_step_time": 0.40262651443481445
    },
    {
      "epoch": 0.000329766845703125,
      "model_forward_time": 0.1152951717376709,
      "step": 54029
    },
    {
      "epoch": 0.000329766845703125,
      "step": 54029,
      "training_step_time": 0.39464378356933594
    },
    {
      "epoch": 0.00032977294921875,
      "grad_norm": 0.12748350203037262,
      "learning_rate": 2.682358534298285e-06,
      "loss": 0.0376,
      "step": 54030
    },
    {
      "epoch": 0.00032977294921875,
      "model_forward_time": 0.1152794361114502,
      "step": 54030
    },
    {
      "epoch": 0.00032977294921875,
      "step": 54030,
      "training_step_time": 0.4039273262023926
    },
    {
      "epoch": 0.000329779052734375,
      "model_forward_time": 0.11553168296813965,
      "step": 54031
    },
    {
      "epoch": 0.000329779052734375,
      "step": 54031,
      "training_step_time": 0.4224522113800049
    },
    {
      "epoch": 0.00032978515625,
      "model_forward_time": 0.11464071273803711,
      "step": 54032
    },
    {
      "epoch": 0.00032978515625,
      "step": 54032,
      "training_step_time": 0.42539334297180176
    },
    {
      "epoch": 0.000329791259765625,
      "model_forward_time": 0.11452674865722656,
      "step": 54033
    },
    {
      "epoch": 0.000329791259765625,
      "step": 54033,
      "training_step_time": 0.43753695487976074
    },
    {
      "epoch": 0.00032979736328125,
      "model_forward_time": 0.11583614349365234,
      "step": 54034
    },
    {
      "epoch": 0.00032979736328125,
      "step": 54034,
      "training_step_time": 0.41141724586486816
    },
    {
      "epoch": 0.000329803466796875,
      "model_forward_time": 0.11532330513000488,
      "step": 54035
    },
    {
      "epoch": 0.000329803466796875,
      "step": 54035,
      "training_step_time": 0.401874303817749
    },
    {
      "epoch": 0.0003298095703125,
      "model_forward_time": 0.11504578590393066,
      "step": 54036
    },
    {
      "epoch": 0.0003298095703125,
      "step": 54036,
      "training_step_time": 0.4035179615020752
    },
    {
      "epoch": 0.000329815673828125,
      "model_forward_time": 0.11558699607849121,
      "step": 54037
    },
    {
      "epoch": 0.000329815673828125,
      "step": 54037,
      "training_step_time": 0.40124034881591797
    },
    {
      "epoch": 0.00032982177734375,
      "model_forward_time": 0.11470174789428711,
      "step": 54038
    },
    {
      "epoch": 0.00032982177734375,
      "step": 54038,
      "training_step_time": 0.40236377716064453
    },
    {
      "epoch": 0.000329827880859375,
      "model_forward_time": 0.11565160751342773,
      "step": 54039
    },
    {
      "epoch": 0.000329827880859375,
      "step": 54039,
      "training_step_time": 0.40041470527648926
    },
    {
      "epoch": 0.000329833984375,
      "grad_norm": 0.0727512463927269,
      "learning_rate": 2.6734608181640917e-06,
      "loss": 0.0367,
      "step": 54040
    },
    {
      "epoch": 0.000329833984375,
      "model_forward_time": 0.11496710777282715,
      "step": 54040
    },
    {
      "epoch": 0.000329833984375,
      "step": 54040,
      "training_step_time": 0.39505720138549805
    },
    {
      "epoch": 0.000329840087890625,
      "model_forward_time": 0.11513137817382812,
      "step": 54041
    },
    {
      "epoch": 0.000329840087890625,
      "step": 54041,
      "training_step_time": 0.410306453704834
    },
    {
      "epoch": 0.00032984619140625,
      "model_forward_time": 0.11471271514892578,
      "step": 54042
    },
    {
      "epoch": 0.00032984619140625,
      "step": 54042,
      "training_step_time": 0.3963596820831299
    },
    {
      "epoch": 0.000329852294921875,
      "model_forward_time": 0.11583757400512695,
      "step": 54043
    },
    {
      "epoch": 0.000329852294921875,
      "step": 54043,
      "training_step_time": 0.4340999126434326
    },
    {
      "epoch": 0.0003298583984375,
      "model_forward_time": 0.11569857597351074,
      "step": 54044
    },
    {
      "epoch": 0.0003298583984375,
      "step": 54044,
      "training_step_time": 0.5005216598510742
    },
    {
      "epoch": 0.000329864501953125,
      "model_forward_time": 0.1150503158569336,
      "step": 54045
    },
    {
      "epoch": 0.000329864501953125,
      "step": 54045,
      "training_step_time": 0.416532039642334
    },
    {
      "epoch": 0.00032987060546875,
      "model_forward_time": 0.11440730094909668,
      "step": 54046
    },
    {
      "epoch": 0.00032987060546875,
      "step": 54046,
      "training_step_time": 0.3946197032928467
    },
    {
      "epoch": 0.000329876708984375,
      "model_forward_time": 0.11496186256408691,
      "step": 54047
    },
    {
      "epoch": 0.000329876708984375,
      "step": 54047,
      "training_step_time": 0.4649219512939453
    },
    {
      "epoch": 0.0003298828125,
      "model_forward_time": 0.1150064468383789,
      "step": 54048
    },
    {
      "epoch": 0.0003298828125,
      "step": 54048,
      "training_step_time": 0.46455931663513184
    },
    {
      "epoch": 0.000329888916015625,
      "model_forward_time": 0.11485886573791504,
      "step": 54049
    },
    {
      "epoch": 0.000329888916015625,
      "step": 54049,
      "training_step_time": 0.4679698944091797
    },
    {
      "epoch": 0.00032989501953125,
      "grad_norm": 0.10109157860279083,
      "learning_rate": 2.664577478582897e-06,
      "loss": 0.0368,
      "step": 54050
    },
    {
      "epoch": 0.00032989501953125,
      "model_forward_time": 0.11560678482055664,
      "step": 54050
    },
    {
      "epoch": 0.00032989501953125,
      "step": 54050,
      "training_step_time": 0.4941396713256836
    },
    {
      "epoch": 0.000329901123046875,
      "model_forward_time": 0.11476635932922363,
      "step": 54051
    },
    {
      "epoch": 0.000329901123046875,
      "step": 54051,
      "training_step_time": 0.38320136070251465
    },
    {
      "epoch": 0.0003299072265625,
      "model_forward_time": 0.11489701271057129,
      "step": 54052
    },
    {
      "epoch": 0.0003299072265625,
      "step": 54052,
      "training_step_time": 0.38743162155151367
    },
    {
      "epoch": 0.000329913330078125,
      "model_forward_time": 0.11502742767333984,
      "step": 54053
    },
    {
      "epoch": 0.000329913330078125,
      "step": 54053,
      "training_step_time": 0.3917989730834961
    },
    {
      "epoch": 0.00032991943359375,
      "model_forward_time": 0.1146399974822998,
      "step": 54054
    },
    {
      "epoch": 0.00032991943359375,
      "step": 54054,
      "training_step_time": 0.39986705780029297
    },
    {
      "epoch": 0.000329925537109375,
      "model_forward_time": 0.11466383934020996,
      "step": 54055
    },
    {
      "epoch": 0.000329925537109375,
      "step": 54055,
      "training_step_time": 0.39880895614624023
    },
    {
      "epoch": 0.000329931640625,
      "model_forward_time": 0.11517977714538574,
      "step": 54056
    },
    {
      "epoch": 0.000329931640625,
      "step": 54056,
      "training_step_time": 0.3886139392852783
    },
    {
      "epoch": 0.000329937744140625,
      "model_forward_time": 0.11456060409545898,
      "step": 54057
    },
    {
      "epoch": 0.000329937744140625,
      "step": 54057,
      "training_step_time": 0.3965024948120117
    },
    {
      "epoch": 0.00032994384765625,
      "model_forward_time": 0.11617684364318848,
      "step": 54058
    },
    {
      "epoch": 0.00032994384765625,
      "step": 54058,
      "training_step_time": 0.4406750202178955
    },
    {
      "epoch": 0.000329949951171875,
      "model_forward_time": 0.11531615257263184,
      "step": 54059
    },
    {
      "epoch": 0.000329949951171875,
      "step": 54059,
      "training_step_time": 0.49033379554748535
    },
    {
      "epoch": 0.0003299560546875,
      "grad_norm": 0.06828882545232773,
      "learning_rate": 2.6557085182532582e-06,
      "loss": 0.035,
      "step": 54060
    },
    {
      "epoch": 0.0003299560546875,
      "model_forward_time": 0.11482524871826172,
      "step": 54060
    },
    {
      "epoch": 0.0003299560546875,
      "step": 54060,
      "training_step_time": 0.5214664936065674
    },
    {
      "epoch": 0.000329962158203125,
      "model_forward_time": 0.11407160758972168,
      "step": 54061
    },
    {
      "epoch": 0.000329962158203125,
      "step": 54061,
      "training_step_time": 0.3943178653717041
    },
    {
      "epoch": 0.00032996826171875,
      "model_forward_time": 0.11549520492553711,
      "step": 54062
    },
    {
      "epoch": 0.00032996826171875,
      "step": 54062,
      "training_step_time": 0.43213367462158203
    },
    {
      "epoch": 0.000329974365234375,
      "model_forward_time": 0.11526870727539062,
      "step": 54063
    },
    {
      "epoch": 0.000329974365234375,
      "step": 54063,
      "training_step_time": 0.4085073471069336
    },
    {
      "epoch": 0.00032998046875,
      "model_forward_time": 0.1146080493927002,
      "step": 54064
    },
    {
      "epoch": 0.00032998046875,
      "step": 54064,
      "training_step_time": 0.4246866703033447
    },
    {
      "epoch": 0.000329986572265625,
      "model_forward_time": 0.11525368690490723,
      "step": 54065
    },
    {
      "epoch": 0.000329986572265625,
      "step": 54065,
      "training_step_time": 0.39790844917297363
    },
    {
      "epoch": 0.00032999267578125,
      "model_forward_time": 0.11474418640136719,
      "step": 54066
    },
    {
      "epoch": 0.00032999267578125,
      "step": 54066,
      "training_step_time": 0.39924073219299316
    },
    {
      "epoch": 0.000329998779296875,
      "model_forward_time": 0.11509323120117188,
      "step": 54067
    },
    {
      "epoch": 0.000329998779296875,
      "step": 54067,
      "training_step_time": 0.39705514907836914
    },
    {
      "epoch": 0.0003300048828125,
      "model_forward_time": 0.11461234092712402,
      "step": 54068
    },
    {
      "epoch": 0.0003300048828125,
      "step": 54068,
      "training_step_time": 0.39346766471862793
    },
    {
      "epoch": 0.000330010986328125,
      "model_forward_time": 0.1151113510131836,
      "step": 54069
    },
    {
      "epoch": 0.000330010986328125,
      "step": 54069,
      "training_step_time": 0.3946080207824707
    },
    {
      "epoch": 0.00033001708984375,
      "grad_norm": 0.08358479291200638,
      "learning_rate": 2.646853939869315e-06,
      "loss": 0.0378,
      "step": 54070
    },
    {
      "epoch": 0.00033001708984375,
      "model_forward_time": 0.1165156364440918,
      "step": 54070
    },
    {
      "epoch": 0.00033001708984375,
      "step": 54070,
      "training_step_time": 0.4031360149383545
    },
    {
      "epoch": 0.000330023193359375,
      "model_forward_time": 0.11565351486206055,
      "step": 54071
    },
    {
      "epoch": 0.000330023193359375,
      "step": 54071,
      "training_step_time": 0.3933675289154053
    },
    {
      "epoch": 0.000330029296875,
      "model_forward_time": 0.11520814895629883,
      "step": 54072
    },
    {
      "epoch": 0.000330029296875,
      "step": 54072,
      "training_step_time": 0.4316999912261963
    },
    {
      "epoch": 0.000330035400390625,
      "model_forward_time": 0.1149601936340332,
      "step": 54073
    },
    {
      "epoch": 0.000330035400390625,
      "step": 54073,
      "training_step_time": 0.3936431407928467
    },
    {
      "epoch": 0.00033004150390625,
      "model_forward_time": 0.11517214775085449,
      "step": 54074
    },
    {
      "epoch": 0.00033004150390625,
      "step": 54074,
      "training_step_time": 0.4125807285308838
    },
    {
      "epoch": 0.000330047607421875,
      "model_forward_time": 0.11474347114562988,
      "step": 54075
    },
    {
      "epoch": 0.000330047607421875,
      "step": 54075,
      "training_step_time": 0.4382896423339844
    },
    {
      "epoch": 0.0003300537109375,
      "model_forward_time": 0.11474776268005371,
      "step": 54076
    },
    {
      "epoch": 0.0003300537109375,
      "step": 54076,
      "training_step_time": 0.4494056701660156
    },
    {
      "epoch": 0.000330059814453125,
      "model_forward_time": 0.11562728881835938,
      "step": 54077
    },
    {
      "epoch": 0.000330059814453125,
      "step": 54077,
      "training_step_time": 0.4690711498260498
    },
    {
      "epoch": 0.00033006591796875,
      "model_forward_time": 0.11489391326904297,
      "step": 54078
    },
    {
      "epoch": 0.00033006591796875,
      "step": 54078,
      "training_step_time": 0.6594669818878174
    },
    {
      "epoch": 0.000330072021484375,
      "model_forward_time": 0.11440420150756836,
      "step": 54079
    },
    {
      "epoch": 0.000330072021484375,
      "step": 54079,
      "training_step_time": 0.40187788009643555
    },
    {
      "epoch": 0.000330078125,
      "grad_norm": 0.07687905430793762,
      "learning_rate": 2.63801374612086e-06,
      "loss": 0.0334,
      "step": 54080
    },
    {
      "epoch": 0.000330078125,
      "model_forward_time": 0.1146402359008789,
      "step": 54080
    },
    {
      "epoch": 0.000330078125,
      "step": 54080,
      "training_step_time": 0.3784506320953369
    },
    {
      "epoch": 0.000330084228515625,
      "model_forward_time": 0.11439013481140137,
      "step": 54081
    },
    {
      "epoch": 0.000330084228515625,
      "step": 54081,
      "training_step_time": 0.3877699375152588
    },
    {
      "epoch": 0.00033009033203125,
      "model_forward_time": 0.11444330215454102,
      "step": 54082
    },
    {
      "epoch": 0.00033009033203125,
      "step": 54082,
      "training_step_time": 0.3901050090789795
    },
    {
      "epoch": 0.000330096435546875,
      "model_forward_time": 0.1145474910736084,
      "step": 54083
    },
    {
      "epoch": 0.000330096435546875,
      "step": 54083,
      "training_step_time": 0.3840658664703369
    },
    {
      "epoch": 0.0003301025390625,
      "model_forward_time": 0.11472845077514648,
      "step": 54084
    },
    {
      "epoch": 0.0003301025390625,
      "step": 54084,
      "training_step_time": 0.8732702732086182
    },
    {
      "epoch": 0.000330108642578125,
      "model_forward_time": 0.11489272117614746,
      "step": 54085
    },
    {
      "epoch": 0.000330108642578125,
      "step": 54085,
      "training_step_time": 0.46563100814819336
    },
    {
      "epoch": 0.00033011474609375,
      "model_forward_time": 0.11386990547180176,
      "step": 54086
    },
    {
      "epoch": 0.00033011474609375,
      "step": 54086,
      "training_step_time": 0.4220457077026367
    },
    {
      "epoch": 0.000330120849609375,
      "model_forward_time": 0.11457538604736328,
      "step": 54087
    },
    {
      "epoch": 0.000330120849609375,
      "step": 54087,
      "training_step_time": 0.41767072677612305
    },
    {
      "epoch": 0.000330126953125,
      "model_forward_time": 0.11537671089172363,
      "step": 54088
    },
    {
      "epoch": 0.000330126953125,
      "step": 54088,
      "training_step_time": 0.3905162811279297
    },
    {
      "epoch": 0.000330133056640625,
      "model_forward_time": 0.1145317554473877,
      "step": 54089
    },
    {
      "epoch": 0.000330133056640625,
      "step": 54089,
      "training_step_time": 0.43806934356689453
    },
    {
      "epoch": 0.00033013916015625,
      "grad_norm": 0.07294870167970657,
      "learning_rate": 2.6291879396933004e-06,
      "loss": 0.0354,
      "step": 54090
    },
    {
      "epoch": 0.00033013916015625,
      "model_forward_time": 0.11421775817871094,
      "step": 54090
    },
    {
      "epoch": 0.00033013916015625,
      "step": 54090,
      "training_step_time": 0.5552206039428711
    },
    {
      "epoch": 0.000330145263671875,
      "model_forward_time": 0.11437535285949707,
      "step": 54091
    },
    {
      "epoch": 0.000330145263671875,
      "step": 54091,
      "training_step_time": 0.4748694896697998
    },
    {
      "epoch": 0.0003301513671875,
      "model_forward_time": 0.11453914642333984,
      "step": 54092
    },
    {
      "epoch": 0.0003301513671875,
      "step": 54092,
      "training_step_time": 0.48343515396118164
    },
    {
      "epoch": 0.000330157470703125,
      "model_forward_time": 0.1141963005065918,
      "step": 54093
    },
    {
      "epoch": 0.000330157470703125,
      "step": 54093,
      "training_step_time": 0.3854482173919678
    },
    {
      "epoch": 0.00033016357421875,
      "model_forward_time": 0.11481714248657227,
      "step": 54094
    },
    {
      "epoch": 0.00033016357421875,
      "step": 54094,
      "training_step_time": 0.37553954124450684
    },
    {
      "epoch": 0.000330169677734375,
      "model_forward_time": 0.11440658569335938,
      "step": 54095
    },
    {
      "epoch": 0.000330169677734375,
      "step": 54095,
      "training_step_time": 0.3885054588317871
    },
    {
      "epoch": 0.00033017578125,
      "model_forward_time": 0.11500740051269531,
      "step": 54096
    },
    {
      "epoch": 0.00033017578125,
      "step": 54096,
      "training_step_time": 0.578284740447998
    },
    {
      "epoch": 0.000330181884765625,
      "model_forward_time": 0.11454224586486816,
      "step": 54097
    },
    {
      "epoch": 0.000330181884765625,
      "step": 54097,
      "training_step_time": 0.38973021507263184
    },
    {
      "epoch": 0.00033018798828125,
      "model_forward_time": 0.11545419692993164,
      "step": 54098
    },
    {
      "epoch": 0.00033018798828125,
      "step": 54098,
      "training_step_time": 0.39269042015075684
    },
    {
      "epoch": 0.000330194091796875,
      "model_forward_time": 0.1149446964263916,
      "step": 54099
    },
    {
      "epoch": 0.000330194091796875,
      "step": 54099,
      "training_step_time": 0.44780945777893066
    },
    {
      "epoch": 0.0003302001953125,
      "grad_norm": 0.07864183932542801,
      "learning_rate": 2.620376523267698e-06,
      "loss": 0.0368,
      "step": 54100
    },
    {
      "epoch": 0.0003302001953125,
      "model_forward_time": 0.11472129821777344,
      "step": 54100
    },
    {
      "epoch": 0.0003302001953125,
      "step": 54100,
      "training_step_time": 0.4587435722351074
    },
    {
      "epoch": 0.000330206298828125,
      "model_forward_time": 0.11518073081970215,
      "step": 54101
    },
    {
      "epoch": 0.000330206298828125,
      "step": 54101,
      "training_step_time": 0.3973655700683594
    },
    {
      "epoch": 0.00033021240234375,
      "model_forward_time": 0.11489081382751465,
      "step": 54102
    },
    {
      "epoch": 0.00033021240234375,
      "step": 54102,
      "training_step_time": 0.7144885063171387
    },
    {
      "epoch": 0.000330218505859375,
      "model_forward_time": 0.11483168601989746,
      "step": 54103
    },
    {
      "epoch": 0.000330218505859375,
      "step": 54103,
      "training_step_time": 0.48790597915649414
    },
    {
      "epoch": 0.000330224609375,
      "model_forward_time": 0.11407709121704102,
      "step": 54104
    },
    {
      "epoch": 0.000330224609375,
      "step": 54104,
      "training_step_time": 0.3911309242248535
    },
    {
      "epoch": 0.000330230712890625,
      "model_forward_time": 0.1142423152923584,
      "step": 54105
    },
    {
      "epoch": 0.000330230712890625,
      "step": 54105,
      "training_step_time": 0.489565372467041
    },
    {
      "epoch": 0.00033023681640625,
      "model_forward_time": 0.1148216724395752,
      "step": 54106
    },
    {
      "epoch": 0.00033023681640625,
      "step": 54106,
      "training_step_time": 0.51731276512146
    },
    {
      "epoch": 0.000330242919921875,
      "model_forward_time": 0.11472105979919434,
      "step": 54107
    },
    {
      "epoch": 0.000330242919921875,
      "step": 54107,
      "training_step_time": 0.3791074752807617
    },
    {
      "epoch": 0.0003302490234375,
      "model_forward_time": 0.1149282455444336,
      "step": 54108
    },
    {
      "epoch": 0.0003302490234375,
      "step": 54108,
      "training_step_time": 0.43486499786376953
    },
    {
      "epoch": 0.000330255126953125,
      "model_forward_time": 0.11584687232971191,
      "step": 54109
    },
    {
      "epoch": 0.000330255126953125,
      "step": 54109,
      "training_step_time": 0.4153175354003906
    },
    {
      "epoch": 0.00033026123046875,
      "grad_norm": 0.0878453403711319,
      "learning_rate": 2.611579499520722e-06,
      "loss": 0.0373,
      "step": 54110
    },
    {
      "epoch": 0.00033026123046875,
      "model_forward_time": 0.11519551277160645,
      "step": 54110
    },
    {
      "epoch": 0.00033026123046875,
      "step": 54110,
      "training_step_time": 0.39567017555236816
    },
    {
      "epoch": 0.000330267333984375,
      "model_forward_time": 0.11441874504089355,
      "step": 54111
    },
    {
      "epoch": 0.000330267333984375,
      "step": 54111,
      "training_step_time": 0.39516329765319824
    },
    {
      "epoch": 0.0003302734375,
      "model_forward_time": 0.11517572402954102,
      "step": 54112
    },
    {
      "epoch": 0.0003302734375,
      "step": 54112,
      "training_step_time": 0.4538075923919678
    },
    {
      "epoch": 0.000330279541015625,
      "model_forward_time": 0.11493277549743652,
      "step": 54113
    },
    {
      "epoch": 0.000330279541015625,
      "step": 54113,
      "training_step_time": 0.45793724060058594
    },
    {
      "epoch": 0.00033028564453125,
      "model_forward_time": 0.1144723892211914,
      "step": 54114
    },
    {
      "epoch": 0.00033028564453125,
      "step": 54114,
      "training_step_time": 0.6357004642486572
    },
    {
      "epoch": 0.000330291748046875,
      "model_forward_time": 0.11425495147705078,
      "step": 54115
    },
    {
      "epoch": 0.000330291748046875,
      "step": 54115,
      "training_step_time": 0.3920440673828125
    },
    {
      "epoch": 0.0003302978515625,
      "model_forward_time": 0.11521244049072266,
      "step": 54116
    },
    {
      "epoch": 0.0003302978515625,
      "step": 54116,
      "training_step_time": 0.39821743965148926
    },
    {
      "epoch": 0.000330303955078125,
      "model_forward_time": 0.1147918701171875,
      "step": 54117
    },
    {
      "epoch": 0.000330303955078125,
      "step": 54117,
      "training_step_time": 0.4297916889190674
    },
    {
      "epoch": 0.00033031005859375,
      "model_forward_time": 0.11468338966369629,
      "step": 54118
    },
    {
      "epoch": 0.00033031005859375,
      "step": 54118,
      "training_step_time": 0.39780497550964355
    },
    {
      "epoch": 0.000330316162109375,
      "model_forward_time": 0.11442852020263672,
      "step": 54119
    },
    {
      "epoch": 0.000330316162109375,
      "step": 54119,
      "training_step_time": 0.5146999359130859
    },
    {
      "epoch": 0.000330322265625,
      "grad_norm": 0.08582321554422379,
      "learning_rate": 2.602796871124663e-06,
      "loss": 0.0377,
      "step": 54120
    },
    {
      "epoch": 0.000330322265625,
      "model_forward_time": 0.11500430107116699,
      "step": 54120
    },
    {
      "epoch": 0.000330322265625,
      "step": 54120,
      "training_step_time": 0.5486688613891602
    },
    {
      "epoch": 0.000330328369140625,
      "model_forward_time": 0.11431717872619629,
      "step": 54121
    },
    {
      "epoch": 0.000330328369140625,
      "step": 54121,
      "training_step_time": 0.3903789520263672
    },
    {
      "epoch": 0.00033033447265625,
      "model_forward_time": 0.11590218544006348,
      "step": 54122
    },
    {
      "epoch": 0.00033033447265625,
      "step": 54122,
      "training_step_time": 0.3906521797180176
    },
    {
      "epoch": 0.000330340576171875,
      "model_forward_time": 0.11427712440490723,
      "step": 54123
    },
    {
      "epoch": 0.000330340576171875,
      "step": 54123,
      "training_step_time": 0.38776159286499023
    },
    {
      "epoch": 0.0003303466796875,
      "model_forward_time": 0.11453437805175781,
      "step": 54124
    },
    {
      "epoch": 0.0003303466796875,
      "step": 54124,
      "training_step_time": 0.390427827835083
    },
    {
      "epoch": 0.000330352783203125,
      "model_forward_time": 0.1148691177368164,
      "step": 54125
    },
    {
      "epoch": 0.000330352783203125,
      "step": 54125,
      "training_step_time": 0.38564419746398926
    },
    {
      "epoch": 0.00033035888671875,
      "model_forward_time": 0.1147468090057373,
      "step": 54126
    },
    {
      "epoch": 0.00033035888671875,
      "step": 54126,
      "training_step_time": 0.6394050121307373
    },
    {
      "epoch": 0.000330364990234375,
      "model_forward_time": 0.11500239372253418,
      "step": 54127
    },
    {
      "epoch": 0.000330364990234375,
      "step": 54127,
      "training_step_time": 0.46777868270874023
    },
    {
      "epoch": 0.00033037109375,
      "model_forward_time": 0.1146693229675293,
      "step": 54128
    },
    {
      "epoch": 0.00033037109375,
      "step": 54128,
      "training_step_time": 0.38901376724243164
    },
    {
      "epoch": 0.000330377197265625,
      "model_forward_time": 0.11472010612487793,
      "step": 54129
    },
    {
      "epoch": 0.000330377197265625,
      "step": 54129,
      "training_step_time": 0.39864587783813477
    },
    {
      "epoch": 0.00033038330078125,
      "grad_norm": 0.15913686156272888,
      "learning_rate": 2.594028640747476e-06,
      "loss": 0.0408,
      "step": 54130
    },
    {
      "epoch": 0.00033038330078125,
      "model_forward_time": 0.11539864540100098,
      "step": 54130
    },
    {
      "epoch": 0.00033038330078125,
      "step": 54130,
      "training_step_time": 0.41573596000671387
    },
    {
      "epoch": 0.000330389404296875,
      "model_forward_time": 0.11425185203552246,
      "step": 54131
    },
    {
      "epoch": 0.000330389404296875,
      "step": 54131,
      "training_step_time": 0.4586515426635742
    },
    {
      "epoch": 0.0003303955078125,
      "model_forward_time": 0.1158294677734375,
      "step": 54132
    },
    {
      "epoch": 0.0003303955078125,
      "step": 54132,
      "training_step_time": 0.5808577537536621
    },
    {
      "epoch": 0.000330401611328125,
      "model_forward_time": 0.11520886421203613,
      "step": 54133
    },
    {
      "epoch": 0.000330401611328125,
      "step": 54133,
      "training_step_time": 0.485198974609375
    },
    {
      "epoch": 0.00033040771484375,
      "model_forward_time": 0.11480259895324707,
      "step": 54134
    },
    {
      "epoch": 0.00033040771484375,
      "step": 54134,
      "training_step_time": 0.40454816818237305
    },
    {
      "epoch": 0.000330413818359375,
      "model_forward_time": 0.11483573913574219,
      "step": 54135
    },
    {
      "epoch": 0.000330413818359375,
      "step": 54135,
      "training_step_time": 0.373645544052124
    },
    {
      "epoch": 0.000330419921875,
      "model_forward_time": 0.11458730697631836,
      "step": 54136
    },
    {
      "epoch": 0.000330419921875,
      "step": 54136,
      "training_step_time": 0.3821907043457031
    },
    {
      "epoch": 0.000330426025390625,
      "model_forward_time": 0.11443781852722168,
      "step": 54137
    },
    {
      "epoch": 0.000330426025390625,
      "step": 54137,
      "training_step_time": 0.3850998878479004
    },
    {
      "epoch": 0.00033043212890625,
      "model_forward_time": 0.11559462547302246,
      "step": 54138
    },
    {
      "epoch": 0.00033043212890625,
      "step": 54138,
      "training_step_time": 0.5349001884460449
    },
    {
      "epoch": 0.000330438232421875,
      "model_forward_time": 0.11467480659484863,
      "step": 54139
    },
    {
      "epoch": 0.000330438232421875,
      "step": 54139,
      "training_step_time": 0.4812602996826172
    },
    {
      "epoch": 0.0003304443359375,
      "grad_norm": 0.14677061140537262,
      "learning_rate": 2.5852748110527014e-06,
      "loss": 0.0392,
      "step": 54140
    },
    {
      "epoch": 0.0003304443359375,
      "model_forward_time": 0.11430001258850098,
      "step": 54140
    },
    {
      "epoch": 0.0003304443359375,
      "step": 54140,
      "training_step_time": 0.4172546863555908
    },
    {
      "epoch": 0.000330450439453125,
      "model_forward_time": 0.11555194854736328,
      "step": 54141
    },
    {
      "epoch": 0.000330450439453125,
      "step": 54141,
      "training_step_time": 0.47794651985168457
    },
    {
      "epoch": 0.00033045654296875,
      "model_forward_time": 0.11436963081359863,
      "step": 54142
    },
    {
      "epoch": 0.00033045654296875,
      "step": 54142,
      "training_step_time": 0.39084649085998535
    },
    {
      "epoch": 0.000330462646484375,
      "model_forward_time": 0.11455273628234863,
      "step": 54143
    },
    {
      "epoch": 0.000330462646484375,
      "step": 54143,
      "training_step_time": 0.4320094585418701
    },
    {
      "epoch": 0.00033046875,
      "model_forward_time": 0.11514997482299805,
      "step": 54144
    },
    {
      "epoch": 0.00033046875,
      "step": 54144,
      "training_step_time": 0.5262346267700195
    },
    {
      "epoch": 0.000330474853515625,
      "model_forward_time": 0.1158454418182373,
      "step": 54145
    },
    {
      "epoch": 0.000330474853515625,
      "step": 54145,
      "training_step_time": 0.5025718212127686
    },
    {
      "epoch": 0.00033048095703125,
      "model_forward_time": 0.11424112319946289,
      "step": 54146
    },
    {
      "epoch": 0.00033048095703125,
      "step": 54146,
      "training_step_time": 0.42317843437194824
    },
    {
      "epoch": 0.000330487060546875,
      "model_forward_time": 0.11524176597595215,
      "step": 54147
    },
    {
      "epoch": 0.000330487060546875,
      "step": 54147,
      "training_step_time": 0.3919506072998047
    },
    {
      "epoch": 0.0003304931640625,
      "model_forward_time": 0.11468076705932617,
      "step": 54148
    },
    {
      "epoch": 0.0003304931640625,
      "step": 54148,
      "training_step_time": 0.4715285301208496
    },
    {
      "epoch": 0.000330499267578125,
      "model_forward_time": 0.11388874053955078,
      "step": 54149
    },
    {
      "epoch": 0.000330499267578125,
      "step": 54149,
      "training_step_time": 0.3916785717010498
    },
    {
      "epoch": 0.00033050537109375,
      "grad_norm": 0.07493195682764053,
      "learning_rate": 2.57653538469953e-06,
      "loss": 0.0388,
      "step": 54150
    },
    {
      "epoch": 0.00033050537109375,
      "model_forward_time": 0.11449384689331055,
      "step": 54150
    },
    {
      "epoch": 0.00033050537109375,
      "step": 54150,
      "training_step_time": 0.4840857982635498
    },
    {
      "epoch": 0.000330511474609375,
      "model_forward_time": 0.11413145065307617,
      "step": 54151
    },
    {
      "epoch": 0.000330511474609375,
      "step": 54151,
      "training_step_time": 0.38909006118774414
    },
    {
      "epoch": 0.000330517578125,
      "model_forward_time": 0.11504316329956055,
      "step": 54152
    },
    {
      "epoch": 0.000330517578125,
      "step": 54152,
      "training_step_time": 0.4313502311706543
    },
    {
      "epoch": 0.000330523681640625,
      "model_forward_time": 0.11560630798339844,
      "step": 54153
    },
    {
      "epoch": 0.000330523681640625,
      "step": 54153,
      "training_step_time": 0.4550900459289551
    },
    {
      "epoch": 0.00033052978515625,
      "model_forward_time": 0.11516237258911133,
      "step": 54154
    },
    {
      "epoch": 0.00033052978515625,
      "step": 54154,
      "training_step_time": 0.4853484630584717
    },
    {
      "epoch": 0.000330535888671875,
      "model_forward_time": 0.11457300186157227,
      "step": 54155
    },
    {
      "epoch": 0.000330535888671875,
      "step": 54155,
      "training_step_time": 0.3953211307525635
    },
    {
      "epoch": 0.0003305419921875,
      "model_forward_time": 0.11539745330810547,
      "step": 54156
    },
    {
      "epoch": 0.0003305419921875,
      "step": 54156,
      "training_step_time": 0.514554500579834
    },
    {
      "epoch": 0.000330548095703125,
      "model_forward_time": 0.11472702026367188,
      "step": 54157
    },
    {
      "epoch": 0.000330548095703125,
      "step": 54157,
      "training_step_time": 0.4363863468170166
    },
    {
      "epoch": 0.00033055419921875,
      "model_forward_time": 0.11421060562133789,
      "step": 54158
    },
    {
      "epoch": 0.00033055419921875,
      "step": 54158,
      "training_step_time": 0.4556162357330322
    },
    {
      "epoch": 0.000330560302734375,
      "model_forward_time": 0.11375784873962402,
      "step": 54159
    },
    {
      "epoch": 0.000330560302734375,
      "step": 54159,
      "training_step_time": 0.3880038261413574
    },
    {
      "epoch": 0.00033056640625,
      "grad_norm": 0.08808329701423645,
      "learning_rate": 2.567810364342765e-06,
      "loss": 0.0379,
      "step": 54160
    },
    {
      "epoch": 0.00033056640625,
      "model_forward_time": 0.1148064136505127,
      "step": 54160
    },
    {
      "epoch": 0.00033056640625,
      "step": 54160,
      "training_step_time": 0.4428367614746094
    },
    {
      "epoch": 0.000330572509765625,
      "model_forward_time": 0.11472892761230469,
      "step": 54161
    },
    {
      "epoch": 0.000330572509765625,
      "step": 54161,
      "training_step_time": 0.49716782569885254
    },
    {
      "epoch": 0.00033057861328125,
      "model_forward_time": 0.11484193801879883,
      "step": 54162
    },
    {
      "epoch": 0.00033057861328125,
      "step": 54162,
      "training_step_time": 0.4497978687286377
    },
    {
      "epoch": 0.000330584716796875,
      "model_forward_time": 0.11502885818481445,
      "step": 54163
    },
    {
      "epoch": 0.000330584716796875,
      "step": 54163,
      "training_step_time": 0.3901987075805664
    },
    {
      "epoch": 0.0003305908203125,
      "model_forward_time": 0.1150810718536377,
      "step": 54164
    },
    {
      "epoch": 0.0003305908203125,
      "step": 54164,
      "training_step_time": 0.4409830570220947
    },
    {
      "epoch": 0.000330596923828125,
      "model_forward_time": 0.11462688446044922,
      "step": 54165
    },
    {
      "epoch": 0.000330596923828125,
      "step": 54165,
      "training_step_time": 0.42138242721557617
    },
    {
      "epoch": 0.00033060302734375,
      "model_forward_time": 0.1148526668548584,
      "step": 54166
    },
    {
      "epoch": 0.00033060302734375,
      "step": 54166,
      "training_step_time": 0.37938857078552246
    },
    {
      "epoch": 0.000330609130859375,
      "model_forward_time": 0.11549615859985352,
      "step": 54167
    },
    {
      "epoch": 0.000330609130859375,
      "step": 54167,
      "training_step_time": 0.44658923149108887
    },
    {
      "epoch": 0.000330615234375,
      "model_forward_time": 0.11475706100463867,
      "step": 54168
    },
    {
      "epoch": 0.000330615234375,
      "step": 54168,
      "training_step_time": 0.6289706230163574
    },
    {
      "epoch": 0.000330621337890625,
      "model_forward_time": 0.11452150344848633,
      "step": 54169
    },
    {
      "epoch": 0.000330621337890625,
      "step": 54169,
      "training_step_time": 0.39115357398986816
    },
    {
      "epoch": 0.00033062744140625,
      "grad_norm": 0.09628558158874512,
      "learning_rate": 2.559099752632832e-06,
      "loss": 0.033,
      "step": 54170
    },
    {
      "epoch": 0.00033062744140625,
      "model_forward_time": 0.11427545547485352,
      "step": 54170
    },
    {
      "epoch": 0.00033062744140625,
      "step": 54170,
      "training_step_time": 0.5061872005462646
    },
    {
      "epoch": 0.000330633544921875,
      "model_forward_time": 0.11463761329650879,
      "step": 54171
    },
    {
      "epoch": 0.000330633544921875,
      "step": 54171,
      "training_step_time": 0.4400460720062256
    },
    {
      "epoch": 0.0003306396484375,
      "model_forward_time": 0.1146543025970459,
      "step": 54172
    },
    {
      "epoch": 0.0003306396484375,
      "step": 54172,
      "training_step_time": 0.48963212966918945
    },
    {
      "epoch": 0.000330645751953125,
      "model_forward_time": 0.11425542831420898,
      "step": 54173
    },
    {
      "epoch": 0.000330645751953125,
      "step": 54173,
      "training_step_time": 0.38317322731018066
    },
    {
      "epoch": 0.00033065185546875,
      "model_forward_time": 0.11539888381958008,
      "step": 54174
    },
    {
      "epoch": 0.00033065185546875,
      "step": 54174,
      "training_step_time": 0.50614333152771
    },
    {
      "epoch": 0.000330657958984375,
      "model_forward_time": 0.11459708213806152,
      "step": 54175
    },
    {
      "epoch": 0.000330657958984375,
      "step": 54175,
      "training_step_time": 0.3932352066040039
    },
    {
      "epoch": 0.0003306640625,
      "model_forward_time": 0.11548113822937012,
      "step": 54176
    },
    {
      "epoch": 0.0003306640625,
      "step": 54176,
      "training_step_time": 0.3966085910797119
    },
    {
      "epoch": 0.000330670166015625,
      "model_forward_time": 0.11448478698730469,
      "step": 54177
    },
    {
      "epoch": 0.000330670166015625,
      "step": 54177,
      "training_step_time": 0.4155750274658203
    },
    {
      "epoch": 0.00033067626953125,
      "model_forward_time": 0.1150979995727539,
      "step": 54178
    },
    {
      "epoch": 0.00033067626953125,
      "step": 54178,
      "training_step_time": 0.41765522956848145
    },
    {
      "epoch": 0.000330682373046875,
      "model_forward_time": 0.11484217643737793,
      "step": 54179
    },
    {
      "epoch": 0.000330682373046875,
      "step": 54179,
      "training_step_time": 0.3792386054992676
    },
    {
      "epoch": 0.0003306884765625,
      "grad_norm": 0.10475482791662216,
      "learning_rate": 2.5504035522157854e-06,
      "loss": 0.0347,
      "step": 54180
    },
    {
      "epoch": 0.0003306884765625,
      "model_forward_time": 0.11470818519592285,
      "step": 54180
    },
    {
      "epoch": 0.0003306884765625,
      "step": 54180,
      "training_step_time": 0.6928791999816895
    },
    {
      "epoch": 0.000330694580078125,
      "model_forward_time": 0.11471724510192871,
      "step": 54181
    },
    {
      "epoch": 0.000330694580078125,
      "step": 54181,
      "training_step_time": 0.3989720344543457
    },
    {
      "epoch": 0.00033070068359375,
      "model_forward_time": 0.11423158645629883,
      "step": 54182
    },
    {
      "epoch": 0.00033070068359375,
      "step": 54182,
      "training_step_time": 0.4806392192840576
    },
    {
      "epoch": 0.000330706787109375,
      "model_forward_time": 0.11498785018920898,
      "step": 54183
    },
    {
      "epoch": 0.000330706787109375,
      "step": 54183,
      "training_step_time": 0.37829041481018066
    },
    {
      "epoch": 0.000330712890625,
      "model_forward_time": 0.11463427543640137,
      "step": 54184
    },
    {
      "epoch": 0.000330712890625,
      "step": 54184,
      "training_step_time": 0.4079585075378418
    },
    {
      "epoch": 0.000330718994140625,
      "model_forward_time": 0.11408281326293945,
      "step": 54185
    },
    {
      "epoch": 0.000330718994140625,
      "step": 54185,
      "training_step_time": 0.42794227600097656
    },
    {
      "epoch": 0.00033072509765625,
      "model_forward_time": 0.11472749710083008,
      "step": 54186
    },
    {
      "epoch": 0.00033072509765625,
      "step": 54186,
      "training_step_time": 0.48538804054260254
    },
    {
      "epoch": 0.000330731201171875,
      "model_forward_time": 0.11490845680236816,
      "step": 54187
    },
    {
      "epoch": 0.000330731201171875,
      "step": 54187,
      "training_step_time": 0.36360764503479004
    },
    {
      "epoch": 0.0003307373046875,
      "model_forward_time": 0.11481904983520508,
      "step": 54188
    },
    {
      "epoch": 0.0003307373046875,
      "step": 54188,
      "training_step_time": 0.39796900749206543
    },
    {
      "epoch": 0.000330743408203125,
      "model_forward_time": 0.11528563499450684,
      "step": 54189
    },
    {
      "epoch": 0.000330743408203125,
      "step": 54189,
      "training_step_time": 0.4615168571472168
    },
    {
      "epoch": 0.00033074951171875,
      "grad_norm": 0.07997509092092514,
      "learning_rate": 2.541721765733318e-06,
      "loss": 0.0357,
      "step": 54190
    },
    {
      "epoch": 0.00033074951171875,
      "model_forward_time": 0.11470746994018555,
      "step": 54190
    },
    {
      "epoch": 0.00033074951171875,
      "step": 54190,
      "training_step_time": 0.3971874713897705
    },
    {
      "epoch": 0.000330755615234375,
      "model_forward_time": 0.11462545394897461,
      "step": 54191
    },
    {
      "epoch": 0.000330755615234375,
      "step": 54191,
      "training_step_time": 0.42130064964294434
    },
    {
      "epoch": 0.00033076171875,
      "model_forward_time": 0.11536312103271484,
      "step": 54192
    },
    {
      "epoch": 0.00033076171875,
      "step": 54192,
      "training_step_time": 0.5229778289794922
    },
    {
      "epoch": 0.000330767822265625,
      "model_forward_time": 0.1148531436920166,
      "step": 54193
    },
    {
      "epoch": 0.000330767822265625,
      "step": 54193,
      "training_step_time": 0.39476680755615234
    },
    {
      "epoch": 0.00033077392578125,
      "model_forward_time": 0.11432957649230957,
      "step": 54194
    },
    {
      "epoch": 0.00033077392578125,
      "step": 54194,
      "training_step_time": 0.46243906021118164
    },
    {
      "epoch": 0.000330780029296875,
      "model_forward_time": 0.11467576026916504,
      "step": 54195
    },
    {
      "epoch": 0.000330780029296875,
      "step": 54195,
      "training_step_time": 0.4192659854888916
    },
    {
      "epoch": 0.0003307861328125,
      "model_forward_time": 0.11480545997619629,
      "step": 54196
    },
    {
      "epoch": 0.0003307861328125,
      "step": 54196,
      "training_step_time": 0.49408483505249023
    },
    {
      "epoch": 0.000330792236328125,
      "model_forward_time": 0.11430644989013672,
      "step": 54197
    },
    {
      "epoch": 0.000330792236328125,
      "step": 54197,
      "training_step_time": 0.38818860054016113
    },
    {
      "epoch": 0.00033079833984375,
      "model_forward_time": 0.1154181957244873,
      "step": 54198
    },
    {
      "epoch": 0.00033079833984375,
      "step": 54198,
      "training_step_time": 0.6270794868469238
    },
    {
      "epoch": 0.000330804443359375,
      "model_forward_time": 0.11427140235900879,
      "step": 54199
    },
    {
      "epoch": 0.000330804443359375,
      "step": 54199,
      "training_step_time": 0.38719773292541504
    },
    {
      "epoch": 0.000330810546875,
      "grad_norm": 0.06914173066616058,
      "learning_rate": 2.533054395822704e-06,
      "loss": 0.0359,
      "step": 54200
    },
    {
      "epoch": 0.000330810546875,
      "model_forward_time": 0.11446619033813477,
      "step": 54200
    },
    {
      "epoch": 0.000330810546875,
      "step": 54200,
      "training_step_time": 0.4275999069213867
    },
    {
      "epoch": 0.000330816650390625,
      "model_forward_time": 0.11475539207458496,
      "step": 54201
    },
    {
      "epoch": 0.000330816650390625,
      "step": 54201,
      "training_step_time": 0.39753246307373047
    },
    {
      "epoch": 0.00033082275390625,
      "model_forward_time": 0.11585307121276855,
      "step": 54202
    },
    {
      "epoch": 0.00033082275390625,
      "step": 54202,
      "training_step_time": 0.40579938888549805
    },
    {
      "epoch": 0.000330828857421875,
      "model_forward_time": 0.11500096321105957,
      "step": 54203
    },
    {
      "epoch": 0.000330828857421875,
      "step": 54203,
      "training_step_time": 0.4199180603027344
    },
    {
      "epoch": 0.0003308349609375,
      "model_forward_time": 0.11572146415710449,
      "step": 54204
    },
    {
      "epoch": 0.0003308349609375,
      "step": 54204,
      "training_step_time": 0.6335210800170898
    },
    {
      "epoch": 0.000330841064453125,
      "model_forward_time": 0.11420845985412598,
      "step": 54205
    },
    {
      "epoch": 0.000330841064453125,
      "step": 54205,
      "training_step_time": 0.3919820785522461
    },
    {
      "epoch": 0.00033084716796875,
      "model_forward_time": 0.11436867713928223,
      "step": 54206
    },
    {
      "epoch": 0.00033084716796875,
      "step": 54206,
      "training_step_time": 0.3933234214782715
    },
    {
      "epoch": 0.000330853271484375,
      "model_forward_time": 0.11509370803833008,
      "step": 54207
    },
    {
      "epoch": 0.000330853271484375,
      "step": 54207,
      "training_step_time": 0.3882162570953369
    },
    {
      "epoch": 0.000330859375,
      "model_forward_time": 0.11415266990661621,
      "step": 54208
    },
    {
      "epoch": 0.000330859375,
      "step": 54208,
      "training_step_time": 0.3864908218383789
    },
    {
      "epoch": 0.000330865478515625,
      "model_forward_time": 0.11470150947570801,
      "step": 54209
    },
    {
      "epoch": 0.000330865478515625,
      "step": 54209,
      "training_step_time": 0.41962528228759766
    },
    {
      "epoch": 0.00033087158203125,
      "grad_norm": 0.09930229932069778,
      "learning_rate": 2.5244014451168863e-06,
      "loss": 0.0345,
      "step": 54210
    },
    {
      "epoch": 0.00033087158203125,
      "model_forward_time": 0.11622262001037598,
      "step": 54210
    },
    {
      "epoch": 0.00033087158203125,
      "step": 54210,
      "training_step_time": 0.7491559982299805
    },
    {
      "epoch": 0.000330877685546875,
      "model_forward_time": 0.1144108772277832,
      "step": 54211
    },
    {
      "epoch": 0.000330877685546875,
      "step": 54211,
      "training_step_time": 0.38828587532043457
    },
    {
      "epoch": 0.0003308837890625,
      "model_forward_time": 0.11446070671081543,
      "step": 54212
    },
    {
      "epoch": 0.0003308837890625,
      "step": 54212,
      "training_step_time": 0.3991241455078125
    },
    {
      "epoch": 0.000330889892578125,
      "model_forward_time": 0.11461162567138672,
      "step": 54213
    },
    {
      "epoch": 0.000330889892578125,
      "step": 54213,
      "training_step_time": 0.4452178478240967
    },
    {
      "epoch": 0.00033089599609375,
      "model_forward_time": 0.11428451538085938,
      "step": 54214
    },
    {
      "epoch": 0.00033089599609375,
      "step": 54214,
      "training_step_time": 0.46829724311828613
    },
    {
      "epoch": 0.000330902099609375,
      "model_forward_time": 0.11412811279296875,
      "step": 54215
    },
    {
      "epoch": 0.000330902099609375,
      "step": 54215,
      "training_step_time": 0.44484758377075195
    },
    {
      "epoch": 0.000330908203125,
      "model_forward_time": 0.1157064437866211,
      "step": 54216
    },
    {
      "epoch": 0.000330908203125,
      "step": 54216,
      "training_step_time": 0.45580458641052246
    },
    {
      "epoch": 0.000330914306640625,
      "model_forward_time": 0.11550545692443848,
      "step": 54217
    },
    {
      "epoch": 0.000330914306640625,
      "step": 54217,
      "training_step_time": 0.40282607078552246
    },
    {
      "epoch": 0.00033092041015625,
      "model_forward_time": 0.11545085906982422,
      "step": 54218
    },
    {
      "epoch": 0.00033092041015625,
      "step": 54218,
      "training_step_time": 0.3835866451263428
    },
    {
      "epoch": 0.000330926513671875,
      "model_forward_time": 0.11512470245361328,
      "step": 54219
    },
    {
      "epoch": 0.000330926513671875,
      "step": 54219,
      "training_step_time": 0.3863067626953125
    },
    {
      "epoch": 0.0003309326171875,
      "grad_norm": 0.10447190701961517,
      "learning_rate": 2.515762916244374e-06,
      "loss": 0.0373,
      "step": 54220
    },
    {
      "epoch": 0.0003309326171875,
      "model_forward_time": 0.11395525932312012,
      "step": 54220
    },
    {
      "epoch": 0.0003309326171875,
      "step": 54220,
      "training_step_time": 0.39905333518981934
    },
    {
      "epoch": 0.000330938720703125,
      "model_forward_time": 0.11523318290710449,
      "step": 54221
    },
    {
      "epoch": 0.000330938720703125,
      "step": 54221,
      "training_step_time": 0.4013676643371582
    },
    {
      "epoch": 0.00033094482421875,
      "model_forward_time": 0.11526036262512207,
      "step": 54222
    },
    {
      "epoch": 0.00033094482421875,
      "step": 54222,
      "training_step_time": 0.620455265045166
    },
    {
      "epoch": 0.000330950927734375,
      "model_forward_time": 0.11480069160461426,
      "step": 54223
    },
    {
      "epoch": 0.000330950927734375,
      "step": 54223,
      "training_step_time": 0.4100062847137451
    },
    {
      "epoch": 0.00033095703125,
      "model_forward_time": 0.11469817161560059,
      "step": 54224
    },
    {
      "epoch": 0.00033095703125,
      "step": 54224,
      "training_step_time": 0.39051151275634766
    },
    {
      "epoch": 0.000330963134765625,
      "model_forward_time": 0.11523818969726562,
      "step": 54225
    },
    {
      "epoch": 0.000330963134765625,
      "step": 54225,
      "training_step_time": 0.3986201286315918
    },
    {
      "epoch": 0.00033096923828125,
      "model_forward_time": 0.1143796443939209,
      "step": 54226
    },
    {
      "epoch": 0.00033096923828125,
      "step": 54226,
      "training_step_time": 0.4073359966278076
    },
    {
      "epoch": 0.000330975341796875,
      "model_forward_time": 0.1151127815246582,
      "step": 54227
    },
    {
      "epoch": 0.000330975341796875,
      "step": 54227,
      "training_step_time": 0.3944211006164551
    },
    {
      "epoch": 0.0003309814453125,
      "model_forward_time": 0.11521458625793457,
      "step": 54228
    },
    {
      "epoch": 0.0003309814453125,
      "step": 54228,
      "training_step_time": 0.6027839183807373
    },
    {
      "epoch": 0.000330987548828125,
      "model_forward_time": 0.11479330062866211,
      "step": 54229
    },
    {
      "epoch": 0.000330987548828125,
      "step": 54229,
      "training_step_time": 0.4269430637359619
    },
    {
      "epoch": 0.00033099365234375,
      "grad_norm": 0.07206534594297409,
      "learning_rate": 2.507138811829346e-06,
      "loss": 0.0362,
      "step": 54230
    },
    {
      "epoch": 0.00033099365234375,
      "model_forward_time": 0.11467313766479492,
      "step": 54230
    },
    {
      "epoch": 0.00033099365234375,
      "step": 54230,
      "training_step_time": 0.4556455612182617
    },
    {
      "epoch": 0.000330999755859375,
      "model_forward_time": 0.11371898651123047,
      "step": 54231
    },
    {
      "epoch": 0.000330999755859375,
      "step": 54231,
      "training_step_time": 0.39280152320861816
    },
    {
      "epoch": 0.000331005859375,
      "model_forward_time": 0.11435985565185547,
      "step": 54232
    },
    {
      "epoch": 0.000331005859375,
      "step": 54232,
      "training_step_time": 0.38335466384887695
    },
    {
      "epoch": 0.000331011962890625,
      "model_forward_time": 0.11443662643432617,
      "step": 54233
    },
    {
      "epoch": 0.000331011962890625,
      "step": 54233,
      "training_step_time": 0.39545607566833496
    },
    {
      "epoch": 0.00033101806640625,
      "model_forward_time": 0.11528944969177246,
      "step": 54234
    },
    {
      "epoch": 0.00033101806640625,
      "step": 54234,
      "training_step_time": 0.6152589321136475
    },
    {
      "epoch": 0.000331024169921875,
      "model_forward_time": 0.11441946029663086,
      "step": 54235
    },
    {
      "epoch": 0.000331024169921875,
      "step": 54235,
      "training_step_time": 0.4169950485229492
    },
    {
      "epoch": 0.0003310302734375,
      "model_forward_time": 0.1162559986114502,
      "step": 54236
    },
    {
      "epoch": 0.0003310302734375,
      "step": 54236,
      "training_step_time": 0.3875751495361328
    },
    {
      "epoch": 0.000331036376953125,
      "model_forward_time": 0.11473369598388672,
      "step": 54237
    },
    {
      "epoch": 0.000331036376953125,
      "step": 54237,
      "training_step_time": 0.431659460067749
    },
    {
      "epoch": 0.00033104248046875,
      "model_forward_time": 0.11567902565002441,
      "step": 54238
    },
    {
      "epoch": 0.00033104248046875,
      "step": 54238,
      "training_step_time": 0.3912656307220459
    },
    {
      "epoch": 0.000331048583984375,
      "model_forward_time": 0.11474823951721191,
      "step": 54239
    },
    {
      "epoch": 0.000331048583984375,
      "step": 54239,
      "training_step_time": 0.4719235897064209
    },
    {
      "epoch": 0.0003310546875,
      "grad_norm": 0.07422634214162827,
      "learning_rate": 2.4985291344915674e-06,
      "loss": 0.0308,
      "step": 54240
    },
    {
      "epoch": 0.0003310546875,
      "model_forward_time": 0.1146540641784668,
      "step": 54240
    },
    {
      "epoch": 0.0003310546875,
      "step": 54240,
      "training_step_time": 0.5837154388427734
    },
    {
      "epoch": 0.000331060791015625,
      "model_forward_time": 0.11470365524291992,
      "step": 54241
    },
    {
      "epoch": 0.000331060791015625,
      "step": 54241,
      "training_step_time": 0.40422844886779785
    },
    {
      "epoch": 0.00033106689453125,
      "model_forward_time": 0.11494851112365723,
      "step": 54242
    },
    {
      "epoch": 0.00033106689453125,
      "step": 54242,
      "training_step_time": 0.4573535919189453
    },
    {
      "epoch": 0.000331072998046875,
      "model_forward_time": 0.11468696594238281,
      "step": 54243
    },
    {
      "epoch": 0.000331072998046875,
      "step": 54243,
      "training_step_time": 0.5072169303894043
    },
    {
      "epoch": 0.0003310791015625,
      "model_forward_time": 0.11442852020263672,
      "step": 54244
    },
    {
      "epoch": 0.0003310791015625,
      "step": 54244,
      "training_step_time": 0.38661813735961914
    },
    {
      "epoch": 0.000331085205078125,
      "model_forward_time": 0.1144416332244873,
      "step": 54245
    },
    {
      "epoch": 0.000331085205078125,
      "step": 54245,
      "training_step_time": 0.4075896739959717
    },
    {
      "epoch": 0.00033109130859375,
      "model_forward_time": 0.1149284839630127,
      "step": 54246
    },
    {
      "epoch": 0.00033109130859375,
      "step": 54246,
      "training_step_time": 0.4877910614013672
    },
    {
      "epoch": 0.000331097412109375,
      "model_forward_time": 0.11495709419250488,
      "step": 54247
    },
    {
      "epoch": 0.000331097412109375,
      "step": 54247,
      "training_step_time": 0.3939957618713379
    },
    {
      "epoch": 0.000331103515625,
      "model_forward_time": 0.11463451385498047,
      "step": 54248
    },
    {
      "epoch": 0.000331103515625,
      "step": 54248,
      "training_step_time": 0.3871791362762451
    },
    {
      "epoch": 0.000331109619140625,
      "model_forward_time": 0.1145329475402832,
      "step": 54249
    },
    {
      "epoch": 0.000331109619140625,
      "step": 54249,
      "training_step_time": 0.46611690521240234
    },
    {
      "epoch": 0.00033111572265625,
      "grad_norm": 0.08835043758153915,
      "learning_rate": 2.4899338868464404e-06,
      "loss": 0.0389,
      "step": 54250
    },
    {
      "epoch": 0.00033111572265625,
      "model_forward_time": 0.11486649513244629,
      "step": 54250
    },
    {
      "epoch": 0.00033111572265625,
      "step": 54250,
      "training_step_time": 0.41902732849121094
    },
    {
      "epoch": 0.000331121826171875,
      "model_forward_time": 0.1150970458984375,
      "step": 54251
    },
    {
      "epoch": 0.000331121826171875,
      "step": 54251,
      "training_step_time": 0.473980188369751
    },
    {
      "epoch": 0.0003311279296875,
      "model_forward_time": 0.11571311950683594,
      "step": 54252
    },
    {
      "epoch": 0.0003311279296875,
      "step": 54252,
      "training_step_time": 0.4656867980957031
    },
    {
      "epoch": 0.000331134033203125,
      "model_forward_time": 0.11458277702331543,
      "step": 54253
    },
    {
      "epoch": 0.000331134033203125,
      "step": 54253,
      "training_step_time": 0.39772534370422363
    },
    {
      "epoch": 0.00033114013671875,
      "model_forward_time": 0.11485695838928223,
      "step": 54254
    },
    {
      "epoch": 0.00033114013671875,
      "step": 54254,
      "training_step_time": 0.4431130886077881
    },
    {
      "epoch": 0.000331146240234375,
      "model_forward_time": 0.11525774002075195,
      "step": 54255
    },
    {
      "epoch": 0.000331146240234375,
      "step": 54255,
      "training_step_time": 0.4109375476837158
    },
    {
      "epoch": 0.00033115234375,
      "model_forward_time": 0.11467504501342773,
      "step": 54256
    },
    {
      "epoch": 0.00033115234375,
      "step": 54256,
      "training_step_time": 0.4220297336578369
    },
    {
      "epoch": 0.000331158447265625,
      "model_forward_time": 0.11499953269958496,
      "step": 54257
    },
    {
      "epoch": 0.000331158447265625,
      "step": 54257,
      "training_step_time": 0.4257853031158447
    },
    {
      "epoch": 0.00033116455078125,
      "model_forward_time": 0.11498165130615234,
      "step": 54258
    },
    {
      "epoch": 0.00033116455078125,
      "step": 54258,
      "training_step_time": 0.5468673706054688
    },
    {
      "epoch": 0.000331170654296875,
      "model_forward_time": 0.1144874095916748,
      "step": 54259
    },
    {
      "epoch": 0.000331170654296875,
      "step": 54259,
      "training_step_time": 0.3812136650085449
    },
    {
      "epoch": 0.0003311767578125,
      "grad_norm": 0.07989475131034851,
      "learning_rate": 2.481353071504966e-06,
      "loss": 0.0337,
      "step": 54260
    },
    {
      "epoch": 0.0003311767578125,
      "model_forward_time": 0.11493277549743652,
      "step": 54260
    },
    {
      "epoch": 0.0003311767578125,
      "step": 54260,
      "training_step_time": 0.38420844078063965
    },
    {
      "epoch": 0.000331182861328125,
      "model_forward_time": 0.11504316329956055,
      "step": 54261
    },
    {
      "epoch": 0.000331182861328125,
      "step": 54261,
      "training_step_time": 0.378997802734375
    },
    {
      "epoch": 0.00033118896484375,
      "model_forward_time": 0.115234375,
      "step": 54262
    },
    {
      "epoch": 0.00033118896484375,
      "step": 54262,
      "training_step_time": 0.37836122512817383
    },
    {
      "epoch": 0.000331195068359375,
      "model_forward_time": 0.11501693725585938,
      "step": 54263
    },
    {
      "epoch": 0.000331195068359375,
      "step": 54263,
      "training_step_time": 0.38719606399536133
    },
    {
      "epoch": 0.000331201171875,
      "model_forward_time": 0.11472845077514648,
      "step": 54264
    },
    {
      "epoch": 0.000331201171875,
      "step": 54264,
      "training_step_time": 0.8141937255859375
    },
    {
      "epoch": 0.000331207275390625,
      "model_forward_time": 0.11409211158752441,
      "step": 54265
    },
    {
      "epoch": 0.000331207275390625,
      "step": 54265,
      "training_step_time": 0.38461828231811523
    },
    {
      "epoch": 0.00033121337890625,
      "model_forward_time": 0.11407470703125,
      "step": 54266
    },
    {
      "epoch": 0.00033121337890625,
      "step": 54266,
      "training_step_time": 0.39046335220336914
    },
    {
      "epoch": 0.000331219482421875,
      "model_forward_time": 0.114227294921875,
      "step": 54267
    },
    {
      "epoch": 0.000331219482421875,
      "step": 54267,
      "training_step_time": 0.3861408233642578
    },
    {
      "epoch": 0.0003312255859375,
      "model_forward_time": 0.11435866355895996,
      "step": 54268
    },
    {
      "epoch": 0.0003312255859375,
      "step": 54268,
      "training_step_time": 0.41613125801086426
    },
    {
      "epoch": 0.000331231689453125,
      "model_forward_time": 0.11513209342956543,
      "step": 54269
    },
    {
      "epoch": 0.000331231689453125,
      "step": 54269,
      "training_step_time": 0.4545862674713135
    },
    {
      "epoch": 0.00033123779296875,
      "grad_norm": 0.08425801992416382,
      "learning_rate": 2.4727866910737583e-06,
      "loss": 0.0343,
      "step": 54270
    },
    {
      "epoch": 0.00033123779296875,
      "model_forward_time": 0.11510467529296875,
      "step": 54270
    },
    {
      "epoch": 0.00033123779296875,
      "step": 54270,
      "training_step_time": 0.5800566673278809
    },
    {
      "epoch": 0.000331243896484375,
      "model_forward_time": 0.11528897285461426,
      "step": 54271
    },
    {
      "epoch": 0.000331243896484375,
      "step": 54271,
      "training_step_time": 0.450275182723999
    },
    {
      "epoch": 0.00033125,
      "model_forward_time": 0.11486029624938965,
      "step": 54272
    },
    {
      "epoch": 0.00033125,
      "step": 54272,
      "training_step_time": 0.3838989734649658
    },
    {
      "epoch": 0.000331256103515625,
      "model_forward_time": 0.11448049545288086,
      "step": 54273
    },
    {
      "epoch": 0.000331256103515625,
      "step": 54273,
      "training_step_time": 0.39706993103027344
    },
    {
      "epoch": 0.00033126220703125,
      "model_forward_time": 0.11499738693237305,
      "step": 54274
    },
    {
      "epoch": 0.00033126220703125,
      "step": 54274,
      "training_step_time": 0.39368319511413574
    },
    {
      "epoch": 0.000331268310546875,
      "model_forward_time": 0.1145639419555664,
      "step": 54275
    },
    {
      "epoch": 0.000331268310546875,
      "step": 54275,
      "training_step_time": 0.40180468559265137
    },
    {
      "epoch": 0.0003312744140625,
      "model_forward_time": 0.11475157737731934,
      "step": 54276
    },
    {
      "epoch": 0.0003312744140625,
      "step": 54276,
      "training_step_time": 0.7314178943634033
    },
    {
      "epoch": 0.000331280517578125,
      "model_forward_time": 0.11437511444091797,
      "step": 54277
    },
    {
      "epoch": 0.000331280517578125,
      "step": 54277,
      "training_step_time": 0.4297902584075928
    },
    {
      "epoch": 0.00033128662109375,
      "model_forward_time": 0.11471199989318848,
      "step": 54278
    },
    {
      "epoch": 0.00033128662109375,
      "step": 54278,
      "training_step_time": 0.3872792720794678
    },
    {
      "epoch": 0.000331292724609375,
      "model_forward_time": 0.11494708061218262,
      "step": 54279
    },
    {
      "epoch": 0.000331292724609375,
      "step": 54279,
      "training_step_time": 0.3852102756500244
    },
    {
      "epoch": 0.000331298828125,
      "grad_norm": 0.08973760902881622,
      "learning_rate": 2.4642347481550865e-06,
      "loss": 0.0344,
      "step": 54280
    },
    {
      "epoch": 0.000331298828125,
      "model_forward_time": 0.11437416076660156,
      "step": 54280
    },
    {
      "epoch": 0.000331298828125,
      "step": 54280,
      "training_step_time": 0.38523244857788086
    },
    {
      "epoch": 0.000331304931640625,
      "model_forward_time": 0.11476397514343262,
      "step": 54281
    },
    {
      "epoch": 0.000331304931640625,
      "step": 54281,
      "training_step_time": 0.43799400329589844
    },
    {
      "epoch": 0.00033131103515625,
      "model_forward_time": 0.1149454116821289,
      "step": 54282
    },
    {
      "epoch": 0.00033131103515625,
      "step": 54282,
      "training_step_time": 0.6403374671936035
    },
    {
      "epoch": 0.000331317138671875,
      "model_forward_time": 0.11470651626586914,
      "step": 54283
    },
    {
      "epoch": 0.000331317138671875,
      "step": 54283,
      "training_step_time": 0.48821425437927246
    },
    {
      "epoch": 0.0003313232421875,
      "model_forward_time": 0.1141822338104248,
      "step": 54284
    },
    {
      "epoch": 0.0003313232421875,
      "step": 54284,
      "training_step_time": 0.49132800102233887
    },
    {
      "epoch": 0.000331329345703125,
      "model_forward_time": 0.11416983604431152,
      "step": 54285
    },
    {
      "epoch": 0.000331329345703125,
      "step": 54285,
      "training_step_time": 0.3873152732849121
    },
    {
      "epoch": 0.00033133544921875,
      "model_forward_time": 0.11404013633728027,
      "step": 54286
    },
    {
      "epoch": 0.00033133544921875,
      "step": 54286,
      "training_step_time": 0.38287949562072754
    },
    {
      "epoch": 0.000331341552734375,
      "model_forward_time": 0.11486554145812988,
      "step": 54287
    },
    {
      "epoch": 0.000331341552734375,
      "step": 54287,
      "training_step_time": 0.39299631118774414
    },
    {
      "epoch": 0.00033134765625,
      "model_forward_time": 0.11600756645202637,
      "step": 54288
    },
    {
      "epoch": 0.00033134765625,
      "step": 54288,
      "training_step_time": 0.6207995414733887
    },
    {
      "epoch": 0.000331353759765625,
      "model_forward_time": 0.11522936820983887,
      "step": 54289
    },
    {
      "epoch": 0.000331353759765625,
      "step": 54289,
      "training_step_time": 0.39061522483825684
    },
    {
      "epoch": 0.00033135986328125,
      "grad_norm": 0.07221797853708267,
      "learning_rate": 2.455697245346783e-06,
      "loss": 0.0339,
      "step": 54290
    },
    {
      "epoch": 0.00033135986328125,
      "model_forward_time": 0.11464381217956543,
      "step": 54290
    },
    {
      "epoch": 0.00033135986328125,
      "step": 54290,
      "training_step_time": 0.3930845260620117
    },
    {
      "epoch": 0.000331365966796875,
      "model_forward_time": 0.11467909812927246,
      "step": 54291
    },
    {
      "epoch": 0.000331365966796875,
      "step": 54291,
      "training_step_time": 0.5017430782318115
    },
    {
      "epoch": 0.0003313720703125,
      "model_forward_time": 0.1154029369354248,
      "step": 54292
    },
    {
      "epoch": 0.0003313720703125,
      "step": 54292,
      "training_step_time": 0.4015028476715088
    },
    {
      "epoch": 0.000331378173828125,
      "model_forward_time": 0.11414551734924316,
      "step": 54293
    },
    {
      "epoch": 0.000331378173828125,
      "step": 54293,
      "training_step_time": 0.3924448490142822
    },
    {
      "epoch": 0.00033138427734375,
      "model_forward_time": 0.11538815498352051,
      "step": 54294
    },
    {
      "epoch": 0.00033138427734375,
      "step": 54294,
      "training_step_time": 0.684807300567627
    },
    {
      "epoch": 0.000331390380859375,
      "model_forward_time": 0.11487340927124023,
      "step": 54295
    },
    {
      "epoch": 0.000331390380859375,
      "step": 54295,
      "training_step_time": 0.40643954277038574
    },
    {
      "epoch": 0.000331396484375,
      "model_forward_time": 0.11472845077514648,
      "step": 54296
    },
    {
      "epoch": 0.000331396484375,
      "step": 54296,
      "training_step_time": 0.4521517753601074
    },
    {
      "epoch": 0.000331402587890625,
      "model_forward_time": 0.11492228507995605,
      "step": 54297
    },
    {
      "epoch": 0.000331402587890625,
      "step": 54297,
      "training_step_time": 0.44475507736206055
    },
    {
      "epoch": 0.00033140869140625,
      "model_forward_time": 0.11487483978271484,
      "step": 54298
    },
    {
      "epoch": 0.00033140869140625,
      "step": 54298,
      "training_step_time": 0.4061460494995117
    },
    {
      "epoch": 0.000331414794921875,
      "model_forward_time": 0.11457157135009766,
      "step": 54299
    },
    {
      "epoch": 0.000331414794921875,
      "step": 54299,
      "training_step_time": 0.39488816261291504
    },
    {
      "epoch": 0.0003314208984375,
      "grad_norm": 0.0803341194987297,
      "learning_rate": 2.4471741852423237e-06,
      "loss": 0.0348,
      "step": 54300
    },
    {
      "epoch": 0.0003314208984375,
      "model_forward_time": 0.11422181129455566,
      "step": 54300
    },
    {
      "epoch": 0.0003314208984375,
      "step": 54300,
      "training_step_time": 0.5309574604034424
    },
    {
      "epoch": 0.000331427001953125,
      "model_forward_time": 0.11611223220825195,
      "step": 54301
    },
    {
      "epoch": 0.000331427001953125,
      "step": 54301,
      "training_step_time": 0.39260268211364746
    },
    {
      "epoch": 0.00033143310546875,
      "model_forward_time": 0.1151266098022461,
      "step": 54302
    },
    {
      "epoch": 0.00033143310546875,
      "step": 54302,
      "training_step_time": 0.38752245903015137
    },
    {
      "epoch": 0.000331439208984375,
      "model_forward_time": 0.11534237861633301,
      "step": 54303
    },
    {
      "epoch": 0.000331439208984375,
      "step": 54303,
      "training_step_time": 0.3904101848602295
    },
    {
      "epoch": 0.0003314453125,
      "model_forward_time": 0.11503982543945312,
      "step": 54304
    },
    {
      "epoch": 0.0003314453125,
      "step": 54304,
      "training_step_time": 0.4272327423095703
    },
    {
      "epoch": 0.000331451416015625,
      "model_forward_time": 0.11472535133361816,
      "step": 54305
    },
    {
      "epoch": 0.000331451416015625,
      "step": 54305,
      "training_step_time": 0.4470202922821045
    },
    {
      "epoch": 0.00033145751953125,
      "model_forward_time": 0.1152040958404541,
      "step": 54306
    },
    {
      "epoch": 0.00033145751953125,
      "step": 54306,
      "training_step_time": 0.6744887828826904
    },
    {
      "epoch": 0.000331463623046875,
      "model_forward_time": 0.11536335945129395,
      "step": 54307
    },
    {
      "epoch": 0.000331463623046875,
      "step": 54307,
      "training_step_time": 0.39984989166259766
    },
    {
      "epoch": 0.0003314697265625,
      "model_forward_time": 0.11467933654785156,
      "step": 54308
    },
    {
      "epoch": 0.0003314697265625,
      "step": 54308,
      "training_step_time": 0.42229151725769043
    },
    {
      "epoch": 0.000331475830078125,
      "model_forward_time": 0.11575460433959961,
      "step": 54309
    },
    {
      "epoch": 0.000331475830078125,
      "step": 54309,
      "training_step_time": 0.3909468650817871
    },
    {
      "epoch": 0.00033148193359375,
      "grad_norm": 0.12012381106615067,
      "learning_rate": 2.438665570430787e-06,
      "loss": 0.0327,
      "step": 54310
    },
    {
      "epoch": 0.00033148193359375,
      "model_forward_time": 0.11457443237304688,
      "step": 54310
    },
    {
      "epoch": 0.00033148193359375,
      "step": 54310,
      "training_step_time": 0.4521651268005371
    },
    {
      "epoch": 0.000331488037109375,
      "model_forward_time": 0.1148078441619873,
      "step": 54311
    },
    {
      "epoch": 0.000331488037109375,
      "step": 54311,
      "training_step_time": 0.4801902770996094
    },
    {
      "epoch": 0.000331494140625,
      "model_forward_time": 0.11537361145019531,
      "step": 54312
    },
    {
      "epoch": 0.000331494140625,
      "step": 54312,
      "training_step_time": 0.42455291748046875
    },
    {
      "epoch": 0.000331500244140625,
      "model_forward_time": 0.11519408226013184,
      "step": 54313
    },
    {
      "epoch": 0.000331500244140625,
      "step": 54313,
      "training_step_time": 0.3910377025604248
    },
    {
      "epoch": 0.00033150634765625,
      "model_forward_time": 0.11536478996276855,
      "step": 54314
    },
    {
      "epoch": 0.00033150634765625,
      "step": 54314,
      "training_step_time": 0.39202880859375
    },
    {
      "epoch": 0.000331512451171875,
      "model_forward_time": 0.11496281623840332,
      "step": 54315
    },
    {
      "epoch": 0.000331512451171875,
      "step": 54315,
      "training_step_time": 0.3969111442565918
    },
    {
      "epoch": 0.0003315185546875,
      "model_forward_time": 0.11539363861083984,
      "step": 54316
    },
    {
      "epoch": 0.0003315185546875,
      "step": 54316,
      "training_step_time": 0.39209437370300293
    },
    {
      "epoch": 0.000331524658203125,
      "model_forward_time": 0.11571025848388672,
      "step": 54317
    },
    {
      "epoch": 0.000331524658203125,
      "step": 54317,
      "training_step_time": 0.39777398109436035
    },
    {
      "epoch": 0.00033153076171875,
      "model_forward_time": 0.11634135246276855,
      "step": 54318
    },
    {
      "epoch": 0.00033153076171875,
      "step": 54318,
      "training_step_time": 0.8360347747802734
    },
    {
      "epoch": 0.000331536865234375,
      "model_forward_time": 0.11464691162109375,
      "step": 54319
    },
    {
      "epoch": 0.000331536865234375,
      "step": 54319,
      "training_step_time": 0.40394043922424316
    },
    {
      "epoch": 0.00033154296875,
      "grad_norm": 0.08968021720647812,
      "learning_rate": 2.430171403496867e-06,
      "loss": 0.0308,
      "step": 54320
    },
    {
      "epoch": 0.00033154296875,
      "model_forward_time": 0.11440396308898926,
      "step": 54320
    },
    {
      "epoch": 0.00033154296875,
      "step": 54320,
      "training_step_time": 0.41045355796813965
    },
    {
      "epoch": 0.000331549072265625,
      "model_forward_time": 0.11472201347351074,
      "step": 54321
    },
    {
      "epoch": 0.000331549072265625,
      "step": 54321,
      "training_step_time": 0.3797729015350342
    },
    {
      "epoch": 0.00033155517578125,
      "model_forward_time": 0.11493420600891113,
      "step": 54322
    },
    {
      "epoch": 0.00033155517578125,
      "step": 54322,
      "training_step_time": 0.39839768409729004
    },
    {
      "epoch": 0.000331561279296875,
      "model_forward_time": 0.1145787239074707,
      "step": 54323
    },
    {
      "epoch": 0.000331561279296875,
      "step": 54323,
      "training_step_time": 0.43094420433044434
    },
    {
      "epoch": 0.0003315673828125,
      "model_forward_time": 0.11451172828674316,
      "step": 54324
    },
    {
      "epoch": 0.0003315673828125,
      "step": 54324,
      "training_step_time": 0.7566463947296143
    },
    {
      "epoch": 0.000331573486328125,
      "model_forward_time": 0.11454415321350098,
      "step": 54325
    },
    {
      "epoch": 0.000331573486328125,
      "step": 54325,
      "training_step_time": 0.4435098171234131
    },
    {
      "epoch": 0.00033157958984375,
      "model_forward_time": 0.11493206024169922,
      "step": 54326
    },
    {
      "epoch": 0.00033157958984375,
      "step": 54326,
      "training_step_time": 0.37474608421325684
    },
    {
      "epoch": 0.000331585693359375,
      "model_forward_time": 0.11448407173156738,
      "step": 54327
    },
    {
      "epoch": 0.000331585693359375,
      "step": 54327,
      "training_step_time": 0.38521289825439453
    },
    {
      "epoch": 0.000331591796875,
      "model_forward_time": 0.1143496036529541,
      "step": 54328
    },
    {
      "epoch": 0.000331591796875,
      "step": 54328,
      "training_step_time": 0.38565778732299805
    },
    {
      "epoch": 0.000331597900390625,
      "model_forward_time": 0.11424922943115234,
      "step": 54329
    },
    {
      "epoch": 0.000331597900390625,
      "step": 54329,
      "training_step_time": 0.3893461227416992
    },
    {
      "epoch": 0.00033160400390625,
      "grad_norm": 0.09513828903436661,
      "learning_rate": 2.421691687020855e-06,
      "loss": 0.0344,
      "step": 54330
    },
    {
      "epoch": 0.00033160400390625,
      "model_forward_time": 0.11444425582885742,
      "step": 54330
    },
    {
      "epoch": 0.00033160400390625,
      "step": 54330,
      "training_step_time": 0.5279490947723389
    },
    {
      "epoch": 0.000331610107421875,
      "model_forward_time": 0.11460614204406738,
      "step": 54331
    },
    {
      "epoch": 0.000331610107421875,
      "step": 54331,
      "training_step_time": 0.4208559989929199
    },
    {
      "epoch": 0.0003316162109375,
      "model_forward_time": 0.11601448059082031,
      "step": 54332
    },
    {
      "epoch": 0.0003316162109375,
      "step": 54332,
      "training_step_time": 0.4029817581176758
    },
    {
      "epoch": 0.000331622314453125,
      "model_forward_time": 0.11517572402954102,
      "step": 54333
    },
    {
      "epoch": 0.000331622314453125,
      "step": 54333,
      "training_step_time": 0.44051265716552734
    },
    {
      "epoch": 0.00033162841796875,
      "model_forward_time": 0.11506772041320801,
      "step": 54334
    },
    {
      "epoch": 0.00033162841796875,
      "step": 54334,
      "training_step_time": 0.388580322265625
    },
    {
      "epoch": 0.000331634521484375,
      "model_forward_time": 0.11561083793640137,
      "step": 54335
    },
    {
      "epoch": 0.000331634521484375,
      "step": 54335,
      "training_step_time": 0.3936753273010254
    },
    {
      "epoch": 0.000331640625,
      "model_forward_time": 0.11537551879882812,
      "step": 54336
    },
    {
      "epoch": 0.000331640625,
      "step": 54336,
      "training_step_time": 0.7011468410491943
    },
    {
      "epoch": 0.000331646728515625,
      "model_forward_time": 0.1148993968963623,
      "step": 54337
    },
    {
      "epoch": 0.000331646728515625,
      "step": 54337,
      "training_step_time": 0.4651947021484375
    },
    {
      "epoch": 0.00033165283203125,
      "model_forward_time": 0.11472821235656738,
      "step": 54338
    },
    {
      "epoch": 0.00033165283203125,
      "step": 54338,
      "training_step_time": 0.45278048515319824
    },
    {
      "epoch": 0.000331658935546875,
      "model_forward_time": 0.11429667472839355,
      "step": 54339
    },
    {
      "epoch": 0.000331658935546875,
      "step": 54339,
      "training_step_time": 0.48331165313720703
    },
    {
      "epoch": 0.0003316650390625,
      "grad_norm": 0.08312874287366867,
      "learning_rate": 2.413226423578696e-06,
      "loss": 0.04,
      "step": 54340
    },
    {
      "epoch": 0.0003316650390625,
      "model_forward_time": 0.11440515518188477,
      "step": 54340
    },
    {
      "epoch": 0.0003316650390625,
      "step": 54340,
      "training_step_time": 0.38464951515197754
    },
    {
      "epoch": 0.000331671142578125,
      "model_forward_time": 0.11391830444335938,
      "step": 54341
    },
    {
      "epoch": 0.000331671142578125,
      "step": 54341,
      "training_step_time": 0.3906087875366211
    },
    {
      "epoch": 0.00033167724609375,
      "model_forward_time": 0.1153874397277832,
      "step": 54342
    },
    {
      "epoch": 0.00033167724609375,
      "step": 54342,
      "training_step_time": 0.40601086616516113
    },
    {
      "epoch": 0.000331683349609375,
      "model_forward_time": 0.11450552940368652,
      "step": 54343
    },
    {
      "epoch": 0.000331683349609375,
      "step": 54343,
      "training_step_time": 0.4010903835296631
    },
    {
      "epoch": 0.000331689453125,
      "model_forward_time": 0.11515212059020996,
      "step": 54344
    },
    {
      "epoch": 0.000331689453125,
      "step": 54344,
      "training_step_time": 0.39937376976013184
    },
    {
      "epoch": 0.000331695556640625,
      "model_forward_time": 0.11564111709594727,
      "step": 54345
    },
    {
      "epoch": 0.000331695556640625,
      "step": 54345,
      "training_step_time": 0.4710044860839844
    },
    {
      "epoch": 0.00033170166015625,
      "model_forward_time": 0.11481761932373047,
      "step": 54346
    },
    {
      "epoch": 0.00033170166015625,
      "step": 54346,
      "training_step_time": 0.46075940132141113
    },
    {
      "epoch": 0.000331707763671875,
      "model_forward_time": 0.11585402488708496,
      "step": 54347
    },
    {
      "epoch": 0.000331707763671875,
      "step": 54347,
      "training_step_time": 0.4525306224822998
    },
    {
      "epoch": 0.0003317138671875,
      "model_forward_time": 0.1148524284362793,
      "step": 54348
    },
    {
      "epoch": 0.0003317138671875,
      "step": 54348,
      "training_step_time": 0.5443682670593262
    },
    {
      "epoch": 0.000331719970703125,
      "model_forward_time": 0.11405134201049805,
      "step": 54349
    },
    {
      "epoch": 0.000331719970703125,
      "step": 54349,
      "training_step_time": 0.4122314453125
    },
    {
      "epoch": 0.00033172607421875,
      "grad_norm": 0.08384940773248672,
      "learning_rate": 2.404775615741872e-06,
      "loss": 0.0368,
      "step": 54350
    },
    {
      "epoch": 0.00033172607421875,
      "model_forward_time": 0.11457324028015137,
      "step": 54350
    },
    {
      "epoch": 0.00033172607421875,
      "step": 54350,
      "training_step_time": 0.4368300437927246
    },
    {
      "epoch": 0.000331732177734375,
      "model_forward_time": 0.11514139175415039,
      "step": 54351
    },
    {
      "epoch": 0.000331732177734375,
      "step": 54351,
      "training_step_time": 0.42607545852661133
    },
    {
      "epoch": 0.00033173828125,
      "model_forward_time": 0.11463260650634766,
      "step": 54352
    },
    {
      "epoch": 0.00033173828125,
      "step": 54352,
      "training_step_time": 0.433290958404541
    },
    {
      "epoch": 0.000331744384765625,
      "model_forward_time": 0.11462926864624023,
      "step": 54353
    },
    {
      "epoch": 0.000331744384765625,
      "step": 54353,
      "training_step_time": 0.3975062370300293
    },
    {
      "epoch": 0.00033175048828125,
      "model_forward_time": 0.11454606056213379,
      "step": 54354
    },
    {
      "epoch": 0.00033175048828125,
      "step": 54354,
      "training_step_time": 0.5034575462341309
    },
    {
      "epoch": 0.000331756591796875,
      "model_forward_time": 0.11473989486694336,
      "step": 54355
    },
    {
      "epoch": 0.000331756591796875,
      "step": 54355,
      "training_step_time": 0.38785839080810547
    },
    {
      "epoch": 0.0003317626953125,
      "model_forward_time": 0.1148521900177002,
      "step": 54356
    },
    {
      "epoch": 0.0003317626953125,
      "step": 54356,
      "training_step_time": 0.39226865768432617
    },
    {
      "epoch": 0.000331768798828125,
      "model_forward_time": 0.11482024192810059,
      "step": 54357
    },
    {
      "epoch": 0.000331768798828125,
      "step": 54357,
      "training_step_time": 0.3893752098083496
    },
    {
      "epoch": 0.00033177490234375,
      "model_forward_time": 0.11496949195861816,
      "step": 54358
    },
    {
      "epoch": 0.00033177490234375,
      "step": 54358,
      "training_step_time": 0.39313435554504395
    },
    {
      "epoch": 0.000331781005859375,
      "model_forward_time": 0.1149895191192627,
      "step": 54359
    },
    {
      "epoch": 0.000331781005859375,
      "step": 54359,
      "training_step_time": 0.4333462715148926
    },
    {
      "epoch": 0.000331787109375,
      "grad_norm": 0.11314329504966736,
      "learning_rate": 2.3963392660775575e-06,
      "loss": 0.0367,
      "step": 54360
    },
    {
      "epoch": 0.000331787109375,
      "model_forward_time": 0.1155252456665039,
      "step": 54360
    },
    {
      "epoch": 0.000331787109375,
      "step": 54360,
      "training_step_time": 0.597583532333374
    },
    {
      "epoch": 0.000331793212890625,
      "model_forward_time": 0.11490488052368164,
      "step": 54361
    },
    {
      "epoch": 0.000331793212890625,
      "step": 54361,
      "training_step_time": 0.45210790634155273
    },
    {
      "epoch": 0.00033179931640625,
      "model_forward_time": 0.11505365371704102,
      "step": 54362
    },
    {
      "epoch": 0.00033179931640625,
      "step": 54362,
      "training_step_time": 0.38335227966308594
    },
    {
      "epoch": 0.000331805419921875,
      "model_forward_time": 0.11583304405212402,
      "step": 54363
    },
    {
      "epoch": 0.000331805419921875,
      "step": 54363,
      "training_step_time": 0.42768406867980957
    },
    {
      "epoch": 0.0003318115234375,
      "model_forward_time": 0.11684632301330566,
      "step": 54364
    },
    {
      "epoch": 0.0003318115234375,
      "step": 54364,
      "training_step_time": 0.47820425033569336
    },
    {
      "epoch": 0.000331817626953125,
      "model_forward_time": 0.11583089828491211,
      "step": 54365
    },
    {
      "epoch": 0.000331817626953125,
      "step": 54365,
      "training_step_time": 0.5566556453704834
    },
    {
      "epoch": 0.00033182373046875,
      "model_forward_time": 0.11706709861755371,
      "step": 54366
    },
    {
      "epoch": 0.00033182373046875,
      "step": 54366,
      "training_step_time": 0.9228777885437012
    },
    {
      "epoch": 0.000331829833984375,
      "model_forward_time": 0.11919641494750977,
      "step": 54367
    },
    {
      "epoch": 0.000331829833984375,
      "step": 54367,
      "training_step_time": 0.6963932514190674
    },
    {
      "epoch": 0.0003318359375,
      "model_forward_time": 0.11799049377441406,
      "step": 54368
    },
    {
      "epoch": 0.0003318359375,
      "step": 54368,
      "training_step_time": 0.6521925926208496
    },
    {
      "epoch": 0.000331842041015625,
      "model_forward_time": 0.11692404747009277,
      "step": 54369
    },
    {
      "epoch": 0.000331842041015625,
      "step": 54369,
      "training_step_time": 0.7130947113037109
    },
    {
      "epoch": 0.00033184814453125,
      "grad_norm": 0.09490927308797836,
      "learning_rate": 2.3879173771484576e-06,
      "loss": 0.0374,
      "step": 54370
    },
    {
      "epoch": 0.00033184814453125,
      "model_forward_time": 0.11810517311096191,
      "step": 54370
    },
    {
      "epoch": 0.00033184814453125,
      "step": 54370,
      "training_step_time": 0.7079203128814697
    },
    {
      "epoch": 0.000331854248046875,
      "model_forward_time": 0.12295293807983398,
      "step": 54371
    },
    {
      "epoch": 0.000331854248046875,
      "step": 54371,
      "training_step_time": 0.6479923725128174
    },
    {
      "epoch": 0.0003318603515625,
      "model_forward_time": 0.11722183227539062,
      "step": 54372
    },
    {
      "epoch": 0.0003318603515625,
      "step": 54372,
      "training_step_time": 0.712425708770752
    },
    {
      "epoch": 0.000331866455078125,
      "model_forward_time": 0.11830973625183105,
      "step": 54373
    },
    {
      "epoch": 0.000331866455078125,
      "step": 54373,
      "training_step_time": 0.757145881652832
    },
    {
      "epoch": 0.00033187255859375,
      "model_forward_time": 0.11970114707946777,
      "step": 54374
    },
    {
      "epoch": 0.00033187255859375,
      "step": 54374,
      "training_step_time": 0.620314359664917
    },
    {
      "epoch": 0.000331878662109375,
      "model_forward_time": 0.11725449562072754,
      "step": 54375
    },
    {
      "epoch": 0.000331878662109375,
      "step": 54375,
      "training_step_time": 0.6505851745605469
    },
    {
      "epoch": 0.000331884765625,
      "model_forward_time": 0.1224374771118164,
      "step": 54376
    },
    {
      "epoch": 0.000331884765625,
      "step": 54376,
      "training_step_time": 0.7147829532623291
    },
    {
      "epoch": 0.000331890869140625,
      "model_forward_time": 0.11677098274230957,
      "step": 54377
    },
    {
      "epoch": 0.000331890869140625,
      "step": 54377,
      "training_step_time": 0.6851866245269775
    },
    {
      "epoch": 0.00033189697265625,
      "model_forward_time": 0.12128996849060059,
      "step": 54378
    },
    {
      "epoch": 0.00033189697265625,
      "step": 54378,
      "training_step_time": 0.6652581691741943
    },
    {
      "epoch": 0.000331903076171875,
      "model_forward_time": 0.12117695808410645,
      "step": 54379
    },
    {
      "epoch": 0.000331903076171875,
      "step": 54379,
      "training_step_time": 0.6565878391265869
    },
    {
      "epoch": 0.0003319091796875,
      "grad_norm": 0.12210240215063095,
      "learning_rate": 2.379509951512937e-06,
      "loss": 0.0358,
      "step": 54380
    },
    {
      "epoch": 0.0003319091796875,
      "model_forward_time": 0.1172480583190918,
      "step": 54380
    },
    {
      "epoch": 0.0003319091796875,
      "step": 54380,
      "training_step_time": 0.6428680419921875
    },
    {
      "epoch": 0.000331915283203125,
      "model_forward_time": 0.1182565689086914,
      "step": 54381
    },
    {
      "epoch": 0.000331915283203125,
      "step": 54381,
      "training_step_time": 0.6717269420623779
    },
    {
      "epoch": 0.00033192138671875,
      "model_forward_time": 0.1169290542602539,
      "step": 54382
    },
    {
      "epoch": 0.00033192138671875,
      "step": 54382,
      "training_step_time": 0.684363842010498
    },
    {
      "epoch": 0.000331927490234375,
      "model_forward_time": 0.12386775016784668,
      "step": 54383
    },
    {
      "epoch": 0.000331927490234375,
      "step": 54383,
      "training_step_time": 0.7201147079467773
    },
    {
      "epoch": 0.00033193359375,
      "model_forward_time": 0.11602973937988281,
      "step": 54384
    },
    {
      "epoch": 0.00033193359375,
      "step": 54384,
      "training_step_time": 0.743715763092041
    },
    {
      "epoch": 0.000331939697265625,
      "model_forward_time": 0.11780381202697754,
      "step": 54385
    },
    {
      "epoch": 0.000331939697265625,
      "step": 54385,
      "training_step_time": 0.7815215587615967
    },
    {
      "epoch": 0.00033194580078125,
      "model_forward_time": 0.11607551574707031,
      "step": 54386
    },
    {
      "epoch": 0.00033194580078125,
      "step": 54386,
      "training_step_time": 0.6863236427307129
    },
    {
      "epoch": 0.000331951904296875,
      "model_forward_time": 0.1236565113067627,
      "step": 54387
    },
    {
      "epoch": 0.000331951904296875,
      "step": 54387,
      "training_step_time": 0.6870925426483154
    },
    {
      "epoch": 0.0003319580078125,
      "model_forward_time": 0.12289929389953613,
      "step": 54388
    },
    {
      "epoch": 0.0003319580078125,
      "step": 54388,
      "training_step_time": 0.5934844017028809
    },
    {
      "epoch": 0.000331964111328125,
      "model_forward_time": 0.12326455116271973,
      "step": 54389
    },
    {
      "epoch": 0.000331964111328125,
      "step": 54389,
      "training_step_time": 0.7718408107757568
    },
    {
      "epoch": 0.00033197021484375,
      "grad_norm": 0.11942119151353836,
      "learning_rate": 2.371116991724953e-06,
      "loss": 0.0393,
      "step": 54390
    },
    {
      "epoch": 0.00033197021484375,
      "model_forward_time": 0.12630224227905273,
      "step": 54390
    },
    {
      "epoch": 0.00033197021484375,
      "step": 54390,
      "training_step_time": 0.6478180885314941
    },
    {
      "epoch": 0.000331976318359375,
      "model_forward_time": 0.11972641944885254,
      "step": 54391
    },
    {
      "epoch": 0.000331976318359375,
      "step": 54391,
      "training_step_time": 0.705582857131958
    },
    {
      "epoch": 0.000331982421875,
      "model_forward_time": 0.11688733100891113,
      "step": 54392
    },
    {
      "epoch": 0.000331982421875,
      "step": 54392,
      "training_step_time": 0.7229509353637695
    },
    {
      "epoch": 0.000331988525390625,
      "model_forward_time": 0.12025976181030273,
      "step": 54393
    },
    {
      "epoch": 0.000331988525390625,
      "step": 54393,
      "training_step_time": 0.7122609615325928
    },
    {
      "epoch": 0.00033199462890625,
      "model_forward_time": 0.11851668357849121,
      "step": 54394
    },
    {
      "epoch": 0.00033199462890625,
      "step": 54394,
      "training_step_time": 0.7775363922119141
    },
    {
      "epoch": 0.000332000732421875,
      "model_forward_time": 0.11652946472167969,
      "step": 54395
    },
    {
      "epoch": 0.000332000732421875,
      "step": 54395,
      "training_step_time": 0.6700015068054199
    },
    {
      "epoch": 0.0003320068359375,
      "model_forward_time": 0.1265583038330078,
      "step": 54396
    },
    {
      "epoch": 0.0003320068359375,
      "step": 54396,
      "training_step_time": 0.6407864093780518
    },
    {
      "epoch": 0.000332012939453125,
      "model_forward_time": 0.11856412887573242,
      "step": 54397
    },
    {
      "epoch": 0.000332012939453125,
      "step": 54397,
      "training_step_time": 0.6844451427459717
    },
    {
      "epoch": 0.00033201904296875,
      "model_forward_time": 0.11962366104125977,
      "step": 54398
    },
    {
      "epoch": 0.00033201904296875,
      "step": 54398,
      "training_step_time": 0.6792488098144531
    },
    {
      "epoch": 0.000332025146484375,
      "model_forward_time": 0.1258225440979004,
      "step": 54399
    },
    {
      "epoch": 0.000332025146484375,
      "step": 54399,
      "training_step_time": 0.6876611709594727
    },
    {
      "epoch": 0.00033203125,
      "grad_norm": 0.09026531875133514,
      "learning_rate": 2.362738500334055e-06,
      "loss": 0.0368,
      "step": 54400
    },
    {
      "epoch": 0.00033203125,
      "model_forward_time": 0.12464070320129395,
      "step": 54400
    },
    {
      "epoch": 0.00033203125,
      "step": 54400,
      "training_step_time": 0.7032737731933594
    },
    {
      "epoch": 0.000332037353515625,
      "model_forward_time": 0.11878085136413574,
      "step": 54401
    },
    {
      "epoch": 0.000332037353515625,
      "step": 54401,
      "training_step_time": 0.73663330078125
    },
    {
      "epoch": 0.00033204345703125,
      "model_forward_time": 0.1179194450378418,
      "step": 54402
    },
    {
      "epoch": 0.00033204345703125,
      "step": 54402,
      "training_step_time": 0.5668725967407227
    },
    {
      "epoch": 0.000332049560546875,
      "model_forward_time": 0.11673688888549805,
      "step": 54403
    },
    {
      "epoch": 0.000332049560546875,
      "step": 54403,
      "training_step_time": 0.7700181007385254
    },
    {
      "epoch": 0.0003320556640625,
      "model_forward_time": 0.11869692802429199,
      "step": 54404
    },
    {
      "epoch": 0.0003320556640625,
      "step": 54404,
      "training_step_time": 0.5970878601074219
    },
    {
      "epoch": 0.000332061767578125,
      "model_forward_time": 0.12238049507141113,
      "step": 54405
    },
    {
      "epoch": 0.000332061767578125,
      "step": 54405,
      "training_step_time": 0.6161472797393799
    },
    {
      "epoch": 0.00033206787109375,
      "model_forward_time": 0.12237715721130371,
      "step": 54406
    },
    {
      "epoch": 0.00033206787109375,
      "step": 54406,
      "training_step_time": 0.6813492774963379
    },
    {
      "epoch": 0.000332073974609375,
      "model_forward_time": 0.12040424346923828,
      "step": 54407
    },
    {
      "epoch": 0.000332073974609375,
      "step": 54407,
      "training_step_time": 0.6818568706512451
    },
    {
      "epoch": 0.000332080078125,
      "model_forward_time": 0.12175607681274414,
      "step": 54408
    },
    {
      "epoch": 0.000332080078125,
      "step": 54408,
      "training_step_time": 0.6774718761444092
    },
    {
      "epoch": 0.000332086181640625,
      "model_forward_time": 0.12195849418640137,
      "step": 54409
    },
    {
      "epoch": 0.000332086181640625,
      "step": 54409,
      "training_step_time": 0.6177141666412354
    },
    {
      "epoch": 0.00033209228515625,
      "grad_norm": 0.1016841009259224,
      "learning_rate": 2.354374479885413e-06,
      "loss": 0.0384,
      "step": 54410
    },
    {
      "epoch": 0.00033209228515625,
      "model_forward_time": 0.1349341869354248,
      "step": 54410
    },
    {
      "epoch": 0.00033209228515625,
      "step": 54410,
      "training_step_time": 0.7543485164642334
    },
    {
      "epoch": 0.000332098388671875,
      "model_forward_time": 0.12102985382080078,
      "step": 54411
    },
    {
      "epoch": 0.000332098388671875,
      "step": 54411,
      "training_step_time": 0.7092270851135254
    },
    {
      "epoch": 0.0003321044921875,
      "model_forward_time": 0.11890053749084473,
      "step": 54412
    },
    {
      "epoch": 0.0003321044921875,
      "step": 54412,
      "training_step_time": 0.6715137958526611
    },
    {
      "epoch": 0.000332110595703125,
      "model_forward_time": 0.11684441566467285,
      "step": 54413
    },
    {
      "epoch": 0.000332110595703125,
      "step": 54413,
      "training_step_time": 0.7128582000732422
    },
    {
      "epoch": 0.00033211669921875,
      "model_forward_time": 0.12541675567626953,
      "step": 54414
    },
    {
      "epoch": 0.00033211669921875,
      "step": 54414,
      "training_step_time": 0.6526949405670166
    },
    {
      "epoch": 0.000332122802734375,
      "model_forward_time": 0.12172389030456543,
      "step": 54415
    },
    {
      "epoch": 0.000332122802734375,
      "step": 54415,
      "training_step_time": 0.6657288074493408
    },
    {
      "epoch": 0.00033212890625,
      "model_forward_time": 0.12232065200805664,
      "step": 54416
    },
    {
      "epoch": 0.00033212890625,
      "step": 54416,
      "training_step_time": 0.7868633270263672
    },
    {
      "epoch": 0.000332135009765625,
      "model_forward_time": 0.12029075622558594,
      "step": 54417
    },
    {
      "epoch": 0.000332135009765625,
      "step": 54417,
      "training_step_time": 0.6394283771514893
    },
    {
      "epoch": 0.00033214111328125,
      "model_forward_time": 0.1206350326538086,
      "step": 54418
    },
    {
      "epoch": 0.00033214111328125,
      "step": 54418,
      "training_step_time": 0.604320764541626
    },
    {
      "epoch": 0.000332147216796875,
      "model_forward_time": 0.11911582946777344,
      "step": 54419
    },
    {
      "epoch": 0.000332147216796875,
      "step": 54419,
      "training_step_time": 0.6664173603057861
    },
    {
      "epoch": 0.0003321533203125,
      "grad_norm": 0.11105295270681381,
      "learning_rate": 2.3460249329197824e-06,
      "loss": 0.039,
      "step": 54420
    },
    {
      "epoch": 0.0003321533203125,
      "model_forward_time": 0.11845231056213379,
      "step": 54420
    },
    {
      "epoch": 0.0003321533203125,
      "step": 54420,
      "training_step_time": 0.737445592880249
    },
    {
      "epoch": 0.000332159423828125,
      "model_forward_time": 0.11747312545776367,
      "step": 54421
    },
    {
      "epoch": 0.000332159423828125,
      "step": 54421,
      "training_step_time": 0.7137060165405273
    },
    {
      "epoch": 0.00033216552734375,
      "model_forward_time": 0.11964797973632812,
      "step": 54422
    },
    {
      "epoch": 0.00033216552734375,
      "step": 54422,
      "training_step_time": 0.7841446399688721
    },
    {
      "epoch": 0.000332171630859375,
      "model_forward_time": 0.11697578430175781,
      "step": 54423
    },
    {
      "epoch": 0.000332171630859375,
      "step": 54423,
      "training_step_time": 0.5986168384552002
    },
    {
      "epoch": 0.000332177734375,
      "model_forward_time": 0.1209859848022461,
      "step": 54424
    },
    {
      "epoch": 0.000332177734375,
      "step": 54424,
      "training_step_time": 0.6494996547698975
    },
    {
      "epoch": 0.000332183837890625,
      "model_forward_time": 0.12136292457580566,
      "step": 54425
    },
    {
      "epoch": 0.000332183837890625,
      "step": 54425,
      "training_step_time": 0.6687240600585938
    },
    {
      "epoch": 0.00033218994140625,
      "model_forward_time": 0.1215968132019043,
      "step": 54426
    },
    {
      "epoch": 0.00033218994140625,
      "step": 54426,
      "training_step_time": 0.6377115249633789
    },
    {
      "epoch": 0.000332196044921875,
      "model_forward_time": 0.12540245056152344,
      "step": 54427
    },
    {
      "epoch": 0.000332196044921875,
      "step": 54427,
      "training_step_time": 0.6835277080535889
    },
    {
      "epoch": 0.0003322021484375,
      "model_forward_time": 0.11960458755493164,
      "step": 54428
    },
    {
      "epoch": 0.0003322021484375,
      "step": 54428,
      "training_step_time": 0.5630719661712646
    },
    {
      "epoch": 0.000332208251953125,
      "model_forward_time": 0.1203145980834961,
      "step": 54429
    },
    {
      "epoch": 0.000332208251953125,
      "step": 54429,
      "training_step_time": 0.6201515197753906
    },
    {
      "epoch": 0.00033221435546875,
      "grad_norm": 0.1311911791563034,
      "learning_rate": 2.3376898619735577e-06,
      "loss": 0.0364,
      "step": 54430
    },
    {
      "epoch": 0.00033221435546875,
      "model_forward_time": 0.1200876235961914,
      "step": 54430
    },
    {
      "epoch": 0.00033221435546875,
      "step": 54430,
      "training_step_time": 0.6552855968475342
    },
    {
      "epoch": 0.000332220458984375,
      "model_forward_time": 0.12152934074401855,
      "step": 54431
    },
    {
      "epoch": 0.000332220458984375,
      "step": 54431,
      "training_step_time": 0.7235457897186279
    },
    {
      "epoch": 0.0003322265625,
      "model_forward_time": 0.11828756332397461,
      "step": 54432
    },
    {
      "epoch": 0.0003322265625,
      "step": 54432,
      "training_step_time": 0.6286964416503906
    },
    {
      "epoch": 0.000332232666015625,
      "model_forward_time": 0.11697912216186523,
      "step": 54433
    },
    {
      "epoch": 0.000332232666015625,
      "step": 54433,
      "training_step_time": 0.5532631874084473
    },
    {
      "epoch": 0.00033223876953125,
      "model_forward_time": 0.11647391319274902,
      "step": 54434
    },
    {
      "epoch": 0.00033223876953125,
      "step": 54434,
      "training_step_time": 0.4807248115539551
    },
    {
      "epoch": 0.000332244873046875,
      "model_forward_time": 0.11647486686706543,
      "step": 54435
    },
    {
      "epoch": 0.000332244873046875,
      "step": 54435,
      "training_step_time": 0.42694759368896484
    },
    {
      "epoch": 0.0003322509765625,
      "model_forward_time": 0.1158292293548584,
      "step": 54436
    },
    {
      "epoch": 0.0003322509765625,
      "step": 54436,
      "training_step_time": 0.592327356338501
    },
    {
      "epoch": 0.000332257080078125,
      "model_forward_time": 0.11504030227661133,
      "step": 54437
    },
    {
      "epoch": 0.000332257080078125,
      "step": 54437,
      "training_step_time": 0.42154622077941895
    },
    {
      "epoch": 0.00033226318359375,
      "model_forward_time": 0.11551213264465332,
      "step": 54438
    },
    {
      "epoch": 0.00033226318359375,
      "step": 54438,
      "training_step_time": 0.5236728191375732
    },
    {
      "epoch": 0.000332269287109375,
      "model_forward_time": 0.11464357376098633,
      "step": 54439
    },
    {
      "epoch": 0.000332269287109375,
      "step": 54439,
      "training_step_time": 0.40369248390197754
    },
    {
      "epoch": 0.000332275390625,
      "grad_norm": 0.09890604764223099,
      "learning_rate": 2.3293692695787017e-06,
      "loss": 0.0379,
      "step": 54440
    },
    {
      "epoch": 0.000332275390625,
      "model_forward_time": 0.1159203052520752,
      "step": 54440
    },
    {
      "epoch": 0.000332275390625,
      "step": 54440,
      "training_step_time": 0.38647913932800293
    },
    {
      "epoch": 0.000332281494140625,
      "model_forward_time": 0.11516928672790527,
      "step": 54441
    },
    {
      "epoch": 0.000332281494140625,
      "step": 54441,
      "training_step_time": 0.37930941581726074
    },
    {
      "epoch": 0.00033228759765625,
      "model_forward_time": 0.11491727828979492,
      "step": 54442
    },
    {
      "epoch": 0.00033228759765625,
      "step": 54442,
      "training_step_time": 0.3830094337463379
    },
    {
      "epoch": 0.000332293701171875,
      "model_forward_time": 0.11580657958984375,
      "step": 54443
    },
    {
      "epoch": 0.000332293701171875,
      "step": 54443,
      "training_step_time": 0.4142429828643799
    },
    {
      "epoch": 0.0003322998046875,
      "model_forward_time": 0.11725854873657227,
      "step": 54444
    },
    {
      "epoch": 0.0003322998046875,
      "step": 54444,
      "training_step_time": 0.4000670909881592
    },
    {
      "epoch": 0.000332305908203125,
      "model_forward_time": 0.11529946327209473,
      "step": 54445
    },
    {
      "epoch": 0.000332305908203125,
      "step": 54445,
      "training_step_time": 0.5019321441650391
    },
    {
      "epoch": 0.00033231201171875,
      "model_forward_time": 0.11552786827087402,
      "step": 54446
    },
    {
      "epoch": 0.00033231201171875,
      "step": 54446,
      "training_step_time": 0.47623157501220703
    },
    {
      "epoch": 0.000332318115234375,
      "model_forward_time": 0.11583971977233887,
      "step": 54447
    },
    {
      "epoch": 0.000332318115234375,
      "step": 54447,
      "training_step_time": 0.43155717849731445
    },
    {
      "epoch": 0.00033232421875,
      "model_forward_time": 0.1152191162109375,
      "step": 54448
    },
    {
      "epoch": 0.00033232421875,
      "step": 54448,
      "training_step_time": 0.37784266471862793
    },
    {
      "epoch": 0.000332330322265625,
      "model_forward_time": 0.1150665283203125,
      "step": 54449
    },
    {
      "epoch": 0.000332330322265625,
      "step": 54449,
      "training_step_time": 0.3973217010498047
    },
    {
      "epoch": 0.00033233642578125,
      "grad_norm": 0.11196189373731613,
      "learning_rate": 2.321063158262793e-06,
      "loss": 0.0331,
      "step": 54450
    },
    {
      "epoch": 0.00033233642578125,
      "model_forward_time": 0.11530852317810059,
      "step": 54450
    },
    {
      "epoch": 0.00033233642578125,
      "step": 54450,
      "training_step_time": 0.4102976322174072
    },
    {
      "epoch": 0.000332342529296875,
      "model_forward_time": 0.11474347114562988,
      "step": 54451
    },
    {
      "epoch": 0.000332342529296875,
      "step": 54451,
      "training_step_time": 0.39783644676208496
    },
    {
      "epoch": 0.0003323486328125,
      "model_forward_time": 0.11572909355163574,
      "step": 54452
    },
    {
      "epoch": 0.0003323486328125,
      "step": 54452,
      "training_step_time": 0.4346139430999756
    },
    {
      "epoch": 0.000332354736328125,
      "model_forward_time": 0.11461901664733887,
      "step": 54453
    },
    {
      "epoch": 0.000332354736328125,
      "step": 54453,
      "training_step_time": 0.38460850715637207
    },
    {
      "epoch": 0.00033236083984375,
      "model_forward_time": 0.11487150192260742,
      "step": 54454
    },
    {
      "epoch": 0.00033236083984375,
      "step": 54454,
      "training_step_time": 0.38107895851135254
    },
    {
      "epoch": 0.000332366943359375,
      "model_forward_time": 0.11518239974975586,
      "step": 54455
    },
    {
      "epoch": 0.000332366943359375,
      "step": 54455,
      "training_step_time": 0.39905810356140137
    },
    {
      "epoch": 0.000332373046875,
      "model_forward_time": 0.11554694175720215,
      "step": 54456
    },
    {
      "epoch": 0.000332373046875,
      "step": 54456,
      "training_step_time": 0.4017295837402344
    },
    {
      "epoch": 0.000332379150390625,
      "model_forward_time": 0.11509442329406738,
      "step": 54457
    },
    {
      "epoch": 0.000332379150390625,
      "step": 54457,
      "training_step_time": 0.4066312313079834
    },
    {
      "epoch": 0.00033238525390625,
      "model_forward_time": 0.11533975601196289,
      "step": 54458
    },
    {
      "epoch": 0.00033238525390625,
      "step": 54458,
      "training_step_time": 0.44090700149536133
    },
    {
      "epoch": 0.000332391357421875,
      "model_forward_time": 0.11574554443359375,
      "step": 54459
    },
    {
      "epoch": 0.000332391357421875,
      "step": 54459,
      "training_step_time": 0.41724467277526855
    },
    {
      "epoch": 0.0003323974609375,
      "grad_norm": 0.09556321054697037,
      "learning_rate": 2.3127715305490073e-06,
      "loss": 0.0348,
      "step": 54460
    },
    {
      "epoch": 0.0003323974609375,
      "model_forward_time": 0.1152501106262207,
      "step": 54460
    },
    {
      "epoch": 0.0003323974609375,
      "step": 54460,
      "training_step_time": 0.4707498550415039
    },
    {
      "epoch": 0.000332403564453125,
      "model_forward_time": 0.11547517776489258,
      "step": 54461
    },
    {
      "epoch": 0.000332403564453125,
      "step": 54461,
      "training_step_time": 0.5183992385864258
    },
    {
      "epoch": 0.00033240966796875,
      "model_forward_time": 0.11516237258911133,
      "step": 54462
    },
    {
      "epoch": 0.00033240966796875,
      "step": 54462,
      "training_step_time": 0.3818213939666748
    },
    {
      "epoch": 0.000332415771484375,
      "model_forward_time": 0.11662030220031738,
      "step": 54463
    },
    {
      "epoch": 0.000332415771484375,
      "step": 54463,
      "training_step_time": 0.3775484561920166
    },
    {
      "epoch": 0.000332421875,
      "model_forward_time": 0.11501240730285645,
      "step": 54464
    },
    {
      "epoch": 0.000332421875,
      "step": 54464,
      "training_step_time": 0.3838965892791748
    },
    {
      "epoch": 0.000332427978515625,
      "model_forward_time": 0.11584591865539551,
      "step": 54465
    },
    {
      "epoch": 0.000332427978515625,
      "step": 54465,
      "training_step_time": 0.39925646781921387
    },
    {
      "epoch": 0.00033243408203125,
      "model_forward_time": 0.11530351638793945,
      "step": 54466
    },
    {
      "epoch": 0.00033243408203125,
      "step": 54466,
      "training_step_time": 0.4735262393951416
    },
    {
      "epoch": 0.000332440185546875,
      "model_forward_time": 0.11480021476745605,
      "step": 54467
    },
    {
      "epoch": 0.000332440185546875,
      "step": 54467,
      "training_step_time": 0.43753528594970703
    },
    {
      "epoch": 0.0003324462890625,
      "model_forward_time": 0.11496853828430176,
      "step": 54468
    },
    {
      "epoch": 0.0003324462890625,
      "step": 54468,
      "training_step_time": 0.3923768997192383
    },
    {
      "epoch": 0.000332452392578125,
      "model_forward_time": 0.11542558670043945,
      "step": 54469
    },
    {
      "epoch": 0.000332452392578125,
      "step": 54469,
      "training_step_time": 0.38864707946777344
    },
    {
      "epoch": 0.00033245849609375,
      "grad_norm": 0.11160345375537872,
      "learning_rate": 2.3044943889561245e-06,
      "loss": 0.0385,
      "step": 54470
    },
    {
      "epoch": 0.00033245849609375,
      "model_forward_time": 0.1152801513671875,
      "step": 54470
    },
    {
      "epoch": 0.00033245849609375,
      "step": 54470,
      "training_step_time": 0.3880653381347656
    },
    {
      "epoch": 0.000332464599609375,
      "model_forward_time": 0.1140601634979248,
      "step": 54471
    },
    {
      "epoch": 0.000332464599609375,
      "step": 54471,
      "training_step_time": 0.44861817359924316
    },
    {
      "epoch": 0.000332470703125,
      "model_forward_time": 0.11463379859924316,
      "step": 54472
    },
    {
      "epoch": 0.000332470703125,
      "step": 54472,
      "training_step_time": 0.44722580909729004
    },
    {
      "epoch": 0.000332476806640625,
      "model_forward_time": 0.11521434783935547,
      "step": 54473
    },
    {
      "epoch": 0.000332476806640625,
      "step": 54473,
      "training_step_time": 0.418839693069458
    },
    {
      "epoch": 0.00033248291015625,
      "model_forward_time": 0.11465835571289062,
      "step": 54474
    },
    {
      "epoch": 0.00033248291015625,
      "step": 54474,
      "training_step_time": 0.4164283275604248
    },
    {
      "epoch": 0.000332489013671875,
      "model_forward_time": 0.11579608917236328,
      "step": 54475
    },
    {
      "epoch": 0.000332489013671875,
      "step": 54475,
      "training_step_time": 0.504955530166626
    },
    {
      "epoch": 0.0003324951171875,
      "model_forward_time": 0.1148688793182373,
      "step": 54476
    },
    {
      "epoch": 0.0003324951171875,
      "step": 54476,
      "training_step_time": 0.4214608669281006
    },
    {
      "epoch": 0.000332501220703125,
      "model_forward_time": 0.11529135704040527,
      "step": 54477
    },
    {
      "epoch": 0.000332501220703125,
      "step": 54477,
      "training_step_time": 0.378448486328125
    },
    {
      "epoch": 0.00033250732421875,
      "model_forward_time": 0.11456918716430664,
      "step": 54478
    },
    {
      "epoch": 0.00033250732421875,
      "step": 54478,
      "training_step_time": 0.39876627922058105
    },
    {
      "epoch": 0.000332513427734375,
      "model_forward_time": 0.11468958854675293,
      "step": 54479
    },
    {
      "epoch": 0.000332513427734375,
      "step": 54479,
      "training_step_time": 0.4011862277984619
    },
    {
      "epoch": 0.00033251953125,
      "grad_norm": 0.08504834771156311,
      "learning_rate": 2.296231735998511e-06,
      "loss": 0.0346,
      "step": 54480
    },
    {
      "epoch": 0.00033251953125,
      "model_forward_time": 0.11478686332702637,
      "step": 54480
    },
    {
      "epoch": 0.00033251953125,
      "step": 54480,
      "training_step_time": 0.4064056873321533
    },
    {
      "epoch": 0.000332525634765625,
      "model_forward_time": 0.11487579345703125,
      "step": 54481
    },
    {
      "epoch": 0.000332525634765625,
      "step": 54481,
      "training_step_time": 0.42308521270751953
    },
    {
      "epoch": 0.00033253173828125,
      "model_forward_time": 0.11512637138366699,
      "step": 54482
    },
    {
      "epoch": 0.00033253173828125,
      "step": 54482,
      "training_step_time": 0.3893163204193115
    },
    {
      "epoch": 0.000332537841796875,
      "model_forward_time": 0.11480331420898438,
      "step": 54483
    },
    {
      "epoch": 0.000332537841796875,
      "step": 54483,
      "training_step_time": 0.3766205310821533
    },
    {
      "epoch": 0.0003325439453125,
      "model_forward_time": 0.11478614807128906,
      "step": 54484
    },
    {
      "epoch": 0.0003325439453125,
      "step": 54484,
      "training_step_time": 0.38606977462768555
    },
    {
      "epoch": 0.000332550048828125,
      "model_forward_time": 0.11525416374206543,
      "step": 54485
    },
    {
      "epoch": 0.000332550048828125,
      "step": 54485,
      "training_step_time": 0.43198299407958984
    },
    {
      "epoch": 0.00033255615234375,
      "model_forward_time": 0.1143953800201416,
      "step": 54486
    },
    {
      "epoch": 0.00033255615234375,
      "step": 54486,
      "training_step_time": 0.4363594055175781
    },
    {
      "epoch": 0.000332562255859375,
      "model_forward_time": 0.11472535133361816,
      "step": 54487
    },
    {
      "epoch": 0.000332562255859375,
      "step": 54487,
      "training_step_time": 0.4563605785369873
    },
    {
      "epoch": 0.000332568359375,
      "model_forward_time": 0.11516976356506348,
      "step": 54488
    },
    {
      "epoch": 0.000332568359375,
      "step": 54488,
      "training_step_time": 0.39733219146728516
    },
    {
      "epoch": 0.000332574462890625,
      "model_forward_time": 0.1152031421661377,
      "step": 54489
    },
    {
      "epoch": 0.000332574462890625,
      "step": 54489,
      "training_step_time": 0.3979320526123047
    },
    {
      "epoch": 0.00033258056640625,
      "grad_norm": 0.11469731479883194,
      "learning_rate": 2.2879835741861586e-06,
      "loss": 0.0382,
      "step": 54490
    },
    {
      "epoch": 0.00033258056640625,
      "model_forward_time": 0.11524343490600586,
      "step": 54490
    },
    {
      "epoch": 0.00033258056640625,
      "step": 54490,
      "training_step_time": 0.4377439022064209
    },
    {
      "epoch": 0.000332586669921875,
      "model_forward_time": 0.11495518684387207,
      "step": 54491
    },
    {
      "epoch": 0.000332586669921875,
      "step": 54491,
      "training_step_time": 0.42115187644958496
    },
    {
      "epoch": 0.0003325927734375,
      "model_forward_time": 0.1163339614868164,
      "step": 54492
    },
    {
      "epoch": 0.0003325927734375,
      "step": 54492,
      "training_step_time": 0.3933415412902832
    },
    {
      "epoch": 0.000332598876953125,
      "model_forward_time": 0.11473822593688965,
      "step": 54493
    },
    {
      "epoch": 0.000332598876953125,
      "step": 54493,
      "training_step_time": 0.3929286003112793
    },
    {
      "epoch": 0.00033260498046875,
      "model_forward_time": 0.11525440216064453,
      "step": 54494
    },
    {
      "epoch": 0.00033260498046875,
      "step": 54494,
      "training_step_time": 0.5002343654632568
    },
    {
      "epoch": 0.000332611083984375,
      "model_forward_time": 0.11461210250854492,
      "step": 54495
    },
    {
      "epoch": 0.000332611083984375,
      "step": 54495,
      "training_step_time": 0.380037784576416
    },
    {
      "epoch": 0.0003326171875,
      "model_forward_time": 0.11519122123718262,
      "step": 54496
    },
    {
      "epoch": 0.0003326171875,
      "step": 54496,
      "training_step_time": 0.40962815284729004
    },
    {
      "epoch": 0.000332623291015625,
      "model_forward_time": 0.11482787132263184,
      "step": 54497
    },
    {
      "epoch": 0.000332623291015625,
      "step": 54497,
      "training_step_time": 0.3943912982940674
    },
    {
      "epoch": 0.00033262939453125,
      "model_forward_time": 0.1147468090057373,
      "step": 54498
    },
    {
      "epoch": 0.00033262939453125,
      "step": 54498,
      "training_step_time": 0.40272998809814453
    },
    {
      "epoch": 0.000332635498046875,
      "model_forward_time": 0.11489391326904297,
      "step": 54499
    },
    {
      "epoch": 0.000332635498046875,
      "step": 54499,
      "training_step_time": 0.40704345703125
    },
    {
      "epoch": 0.0003326416015625,
      "grad_norm": 0.13264665007591248,
      "learning_rate": 2.2797499060246253e-06,
      "loss": 0.0405,
      "step": 54500
    },
    {
      "epoch": 0.0003326416015625,
      "model_forward_time": 0.11478614807128906,
      "step": 54500
    },
    {
      "epoch": 0.0003326416015625,
      "step": 54500,
      "training_step_time": 0.3956911563873291
    },
    {
      "epoch": 0.000332647705078125,
      "model_forward_time": 0.1153097152709961,
      "step": 54501
    },
    {
      "epoch": 0.000332647705078125,
      "step": 54501,
      "training_step_time": 0.39211058616638184
    },
    {
      "epoch": 0.00033265380859375,
      "model_forward_time": 0.11437273025512695,
      "step": 54502
    },
    {
      "epoch": 0.00033265380859375,
      "step": 54502,
      "training_step_time": 0.39849305152893066
    },
    {
      "epoch": 0.000332659912109375,
      "model_forward_time": 0.11526799201965332,
      "step": 54503
    },
    {
      "epoch": 0.000332659912109375,
      "step": 54503,
      "training_step_time": 0.4105806350708008
    },
    {
      "epoch": 0.000332666015625,
      "model_forward_time": 0.1154489517211914,
      "step": 54504
    },
    {
      "epoch": 0.000332666015625,
      "step": 54504,
      "training_step_time": 0.4493396282196045
    },
    {
      "epoch": 0.000332672119140625,
      "model_forward_time": 0.11546707153320312,
      "step": 54505
    },
    {
      "epoch": 0.000332672119140625,
      "step": 54505,
      "training_step_time": 0.4591047763824463
    },
    {
      "epoch": 0.00033267822265625,
      "model_forward_time": 0.11566329002380371,
      "step": 54506
    },
    {
      "epoch": 0.00033267822265625,
      "step": 54506,
      "training_step_time": 0.42922115325927734
    },
    {
      "epoch": 0.000332684326171875,
      "model_forward_time": 0.11543607711791992,
      "step": 54507
    },
    {
      "epoch": 0.000332684326171875,
      "step": 54507,
      "training_step_time": 0.3765676021575928
    },
    {
      "epoch": 0.0003326904296875,
      "model_forward_time": 0.11507678031921387,
      "step": 54508
    },
    {
      "epoch": 0.0003326904296875,
      "step": 54508,
      "training_step_time": 0.3976140022277832
    },
    {
      "epoch": 0.000332696533203125,
      "model_forward_time": 0.11556601524353027,
      "step": 54509
    },
    {
      "epoch": 0.000332696533203125,
      "step": 54509,
      "training_step_time": 0.3894805908203125
    },
    {
      "epoch": 0.00033270263671875,
      "grad_norm": 0.10991058498620987,
      "learning_rate": 2.271530734015104e-06,
      "loss": 0.0398,
      "step": 54510
    },
    {
      "epoch": 0.00033270263671875,
      "model_forward_time": 0.11539578437805176,
      "step": 54510
    },
    {
      "epoch": 0.00033270263671875,
      "step": 54510,
      "training_step_time": 0.494143009185791
    },
    {
      "epoch": 0.000332708740234375,
      "model_forward_time": 0.11665201187133789,
      "step": 54511
    },
    {
      "epoch": 0.000332708740234375,
      "step": 54511,
      "training_step_time": 0.39866089820861816
    },
    {
      "epoch": 0.00033271484375,
      "model_forward_time": 0.11595940589904785,
      "step": 54512
    },
    {
      "epoch": 0.00033271484375,
      "step": 54512,
      "training_step_time": 0.4077951908111572
    },
    {
      "epoch": 0.000332720947265625,
      "model_forward_time": 0.11507201194763184,
      "step": 54513
    },
    {
      "epoch": 0.000332720947265625,
      "step": 54513,
      "training_step_time": 0.38593101501464844
    },
    {
      "epoch": 0.00033272705078125,
      "model_forward_time": 0.11495733261108398,
      "step": 54514
    },
    {
      "epoch": 0.00033272705078125,
      "step": 54514,
      "training_step_time": 0.3878469467163086
    },
    {
      "epoch": 0.000332733154296875,
      "model_forward_time": 0.11556386947631836,
      "step": 54515
    },
    {
      "epoch": 0.000332733154296875,
      "step": 54515,
      "training_step_time": 0.40113377571105957
    },
    {
      "epoch": 0.0003327392578125,
      "model_forward_time": 0.11522078514099121,
      "step": 54516
    },
    {
      "epoch": 0.0003327392578125,
      "step": 54516,
      "training_step_time": 0.42107367515563965
    },
    {
      "epoch": 0.000332745361328125,
      "model_forward_time": 0.1153707504272461,
      "step": 54517
    },
    {
      "epoch": 0.000332745361328125,
      "step": 54517,
      "training_step_time": 0.4077270030975342
    },
    {
      "epoch": 0.00033275146484375,
      "model_forward_time": 0.1150369644165039,
      "step": 54518
    },
    {
      "epoch": 0.00033275146484375,
      "step": 54518,
      "training_step_time": 0.4817342758178711
    },
    {
      "epoch": 0.000332757568359375,
      "model_forward_time": 0.11484169960021973,
      "step": 54519
    },
    {
      "epoch": 0.000332757568359375,
      "step": 54519,
      "training_step_time": 0.47772908210754395
    },
    {
      "epoch": 0.000332763671875,
      "grad_norm": 0.08564408123493195,
      "learning_rate": 2.263326060654336e-06,
      "loss": 0.0344,
      "step": 54520
    },
    {
      "epoch": 0.000332763671875,
      "model_forward_time": 0.11474609375,
      "step": 54520
    },
    {
      "epoch": 0.000332763671875,
      "step": 54520,
      "training_step_time": 0.5016617774963379
    },
    {
      "epoch": 0.000332769775390625,
      "model_forward_time": 0.11471724510192871,
      "step": 54521
    },
    {
      "epoch": 0.000332769775390625,
      "step": 54521,
      "training_step_time": 0.3956000804901123
    },
    {
      "epoch": 0.00033277587890625,
      "model_forward_time": 0.11495018005371094,
      "step": 54522
    },
    {
      "epoch": 0.00033277587890625,
      "step": 54522,
      "training_step_time": 0.3921170234680176
    },
    {
      "epoch": 0.000332781982421875,
      "model_forward_time": 0.11487102508544922,
      "step": 54523
    },
    {
      "epoch": 0.000332781982421875,
      "step": 54523,
      "training_step_time": 0.4883148670196533
    },
    {
      "epoch": 0.0003327880859375,
      "model_forward_time": 0.11423683166503906,
      "step": 54524
    },
    {
      "epoch": 0.0003327880859375,
      "step": 54524,
      "training_step_time": 0.4289858341217041
    },
    {
      "epoch": 0.000332794189453125,
      "model_forward_time": 0.11466026306152344,
      "step": 54525
    },
    {
      "epoch": 0.000332794189453125,
      "step": 54525,
      "training_step_time": 0.4128603935241699
    },
    {
      "epoch": 0.00033280029296875,
      "model_forward_time": 0.1152334213256836,
      "step": 54526
    },
    {
      "epoch": 0.00033280029296875,
      "step": 54526,
      "training_step_time": 0.3893132209777832
    },
    {
      "epoch": 0.000332806396484375,
      "model_forward_time": 0.11518597602844238,
      "step": 54527
    },
    {
      "epoch": 0.000332806396484375,
      "step": 54527,
      "training_step_time": 0.42716336250305176
    },
    {
      "epoch": 0.0003328125,
      "model_forward_time": 0.11535120010375977,
      "step": 54528
    },
    {
      "epoch": 0.0003328125,
      "step": 54528,
      "training_step_time": 0.41747069358825684
    },
    {
      "epoch": 0.000332818603515625,
      "model_forward_time": 0.11521172523498535,
      "step": 54529
    },
    {
      "epoch": 0.000332818603515625,
      "step": 54529,
      "training_step_time": 0.3955960273742676
    },
    {
      "epoch": 0.00033282470703125,
      "grad_norm": 0.09809549897909164,
      "learning_rate": 2.2551358884347007e-06,
      "loss": 0.0367,
      "step": 54530
    },
    {
      "epoch": 0.00033282470703125,
      "model_forward_time": 0.11523938179016113,
      "step": 54530
    },
    {
      "epoch": 0.00033282470703125,
      "step": 54530,
      "training_step_time": 0.392261266708374
    },
    {
      "epoch": 0.000332830810546875,
      "model_forward_time": 0.11577939987182617,
      "step": 54531
    },
    {
      "epoch": 0.000332830810546875,
      "step": 54531,
      "training_step_time": 0.3968067169189453
    },
    {
      "epoch": 0.0003328369140625,
      "model_forward_time": 0.11525702476501465,
      "step": 54532
    },
    {
      "epoch": 0.0003328369140625,
      "step": 54532,
      "training_step_time": 0.4324467182159424
    },
    {
      "epoch": 0.000332843017578125,
      "model_forward_time": 0.11539506912231445,
      "step": 54533
    },
    {
      "epoch": 0.000332843017578125,
      "step": 54533,
      "training_step_time": 0.70371413230896
    },
    {
      "epoch": 0.00033284912109375,
      "model_forward_time": 0.11517190933227539,
      "step": 54534
    },
    {
      "epoch": 0.00033284912109375,
      "step": 54534,
      "training_step_time": 0.4787273406982422
    },
    {
      "epoch": 0.000332855224609375,
      "model_forward_time": 0.1146399974822998,
      "step": 54535
    },
    {
      "epoch": 0.000332855224609375,
      "step": 54535,
      "training_step_time": 0.3900318145751953
    },
    {
      "epoch": 0.000332861328125,
      "model_forward_time": 0.11407065391540527,
      "step": 54536
    },
    {
      "epoch": 0.000332861328125,
      "step": 54536,
      "training_step_time": 0.39069366455078125
    },
    {
      "epoch": 0.000332867431640625,
      "model_forward_time": 0.1147150993347168,
      "step": 54537
    },
    {
      "epoch": 0.000332867431640625,
      "step": 54537,
      "training_step_time": 0.39595985412597656
    },
    {
      "epoch": 0.00033287353515625,
      "model_forward_time": 0.11365866661071777,
      "step": 54538
    },
    {
      "epoch": 0.00033287353515625,
      "step": 54538,
      "training_step_time": 0.4511387348175049
    },
    {
      "epoch": 0.000332879638671875,
      "model_forward_time": 0.11478853225708008,
      "step": 54539
    },
    {
      "epoch": 0.000332879638671875,
      "step": 54539,
      "training_step_time": 0.7449262142181396
    },
    {
      "epoch": 0.0003328857421875,
      "grad_norm": 0.0789179727435112,
      "learning_rate": 2.2469602198441573e-06,
      "loss": 0.032,
      "step": 54540
    },
    {
      "epoch": 0.0003328857421875,
      "model_forward_time": 0.11462640762329102,
      "step": 54540
    },
    {
      "epoch": 0.0003328857421875,
      "step": 54540,
      "training_step_time": 0.38576316833496094
    },
    {
      "epoch": 0.000332891845703125,
      "model_forward_time": 0.11429166793823242,
      "step": 54541
    },
    {
      "epoch": 0.000332891845703125,
      "step": 54541,
      "training_step_time": 0.3879685401916504
    },
    {
      "epoch": 0.00033289794921875,
      "model_forward_time": 0.11666393280029297,
      "step": 54542
    },
    {
      "epoch": 0.00033289794921875,
      "step": 54542,
      "training_step_time": 0.3891923427581787
    },
    {
      "epoch": 0.000332904052734375,
      "model_forward_time": 0.11458492279052734,
      "step": 54543
    },
    {
      "epoch": 0.000332904052734375,
      "step": 54543,
      "training_step_time": 0.37926316261291504
    },
    {
      "epoch": 0.00033291015625,
      "model_forward_time": 0.11435055732727051,
      "step": 54544
    },
    {
      "epoch": 0.00033291015625,
      "step": 54544,
      "training_step_time": 0.41993260383605957
    },
    {
      "epoch": 0.000332916259765625,
      "model_forward_time": 0.11519312858581543,
      "step": 54545
    },
    {
      "epoch": 0.000332916259765625,
      "step": 54545,
      "training_step_time": 0.6894078254699707
    },
    {
      "epoch": 0.00033292236328125,
      "model_forward_time": 0.11547994613647461,
      "step": 54546
    },
    {
      "epoch": 0.00033292236328125,
      "step": 54546,
      "training_step_time": 0.44118762016296387
    },
    {
      "epoch": 0.000332928466796875,
      "model_forward_time": 0.11548304557800293,
      "step": 54547
    },
    {
      "epoch": 0.000332928466796875,
      "step": 54547,
      "training_step_time": 0.3945956230163574
    },
    {
      "epoch": 0.0003329345703125,
      "model_forward_time": 0.1158914566040039,
      "step": 54548
    },
    {
      "epoch": 0.0003329345703125,
      "step": 54548,
      "training_step_time": 0.3971538543701172
    },
    {
      "epoch": 0.000332940673828125,
      "model_forward_time": 0.11442327499389648,
      "step": 54549
    },
    {
      "epoch": 0.000332940673828125,
      "step": 54549,
      "training_step_time": 0.3939673900604248
    },
    {
      "epoch": 0.00033294677734375,
      "grad_norm": 0.08155395090579987,
      "learning_rate": 2.2387990573662477e-06,
      "loss": 0.0373,
      "step": 54550
    },
    {
      "epoch": 0.00033294677734375,
      "model_forward_time": 0.11393237113952637,
      "step": 54550
    },
    {
      "epoch": 0.00033294677734375,
      "step": 54550,
      "training_step_time": 0.38747549057006836
    },
    {
      "epoch": 0.000332952880859375,
      "model_forward_time": 0.1156470775604248,
      "step": 54551
    },
    {
      "epoch": 0.000332952880859375,
      "step": 54551,
      "training_step_time": 0.5673954486846924
    },
    {
      "epoch": 0.000332958984375,
      "model_forward_time": 0.11529994010925293,
      "step": 54552
    },
    {
      "epoch": 0.000332958984375,
      "step": 54552,
      "training_step_time": 0.4526216983795166
    },
    {
      "epoch": 0.000332965087890625,
      "model_forward_time": 0.11484336853027344,
      "step": 54553
    },
    {
      "epoch": 0.000332965087890625,
      "step": 54553,
      "training_step_time": 0.3932633399963379
    },
    {
      "epoch": 0.00033297119140625,
      "model_forward_time": 0.11455011367797852,
      "step": 54554
    },
    {
      "epoch": 0.00033297119140625,
      "step": 54554,
      "training_step_time": 0.38466763496398926
    },
    {
      "epoch": 0.000332977294921875,
      "model_forward_time": 0.11500024795532227,
      "step": 54555
    },
    {
      "epoch": 0.000332977294921875,
      "step": 54555,
      "training_step_time": 0.3871912956237793
    },
    {
      "epoch": 0.0003329833984375,
      "model_forward_time": 0.11520695686340332,
      "step": 54556
    },
    {
      "epoch": 0.0003329833984375,
      "step": 54556,
      "training_step_time": 0.39275217056274414
    },
    {
      "epoch": 0.000332989501953125,
      "model_forward_time": 0.11528229713439941,
      "step": 54557
    },
    {
      "epoch": 0.000332989501953125,
      "step": 54557,
      "training_step_time": 0.7517104148864746
    },
    {
      "epoch": 0.00033299560546875,
      "model_forward_time": 0.11515688896179199,
      "step": 54558
    },
    {
      "epoch": 0.00033299560546875,
      "step": 54558,
      "training_step_time": 0.41237688064575195
    },
    {
      "epoch": 0.000333001708984375,
      "model_forward_time": 0.11504793167114258,
      "step": 54559
    },
    {
      "epoch": 0.000333001708984375,
      "step": 54559,
      "training_step_time": 0.36379551887512207
    },
    {
      "epoch": 0.0003330078125,
      "grad_norm": 0.0813325047492981,
      "learning_rate": 2.230652403480127e-06,
      "loss": 0.0379,
      "step": 54560
    },
    {
      "epoch": 0.0003330078125,
      "model_forward_time": 0.11448550224304199,
      "step": 54560
    },
    {
      "epoch": 0.0003330078125,
      "step": 54560,
      "training_step_time": 0.4423048496246338
    },
    {
      "epoch": 0.000333013916015625,
      "model_forward_time": 0.11436986923217773,
      "step": 54561
    },
    {
      "epoch": 0.000333013916015625,
      "step": 54561,
      "training_step_time": 0.4421725273132324
    },
    {
      "epoch": 0.00033302001953125,
      "model_forward_time": 0.11432337760925293,
      "step": 54562
    },
    {
      "epoch": 0.00033302001953125,
      "step": 54562,
      "training_step_time": 0.3958604335784912
    },
    {
      "epoch": 0.000333026123046875,
      "model_forward_time": 0.11471796035766602,
      "step": 54563
    },
    {
      "epoch": 0.000333026123046875,
      "step": 54563,
      "training_step_time": 0.48418760299682617
    },
    {
      "epoch": 0.0003330322265625,
      "model_forward_time": 0.1150813102722168,
      "step": 54564
    },
    {
      "epoch": 0.0003330322265625,
      "step": 54564,
      "training_step_time": 0.3978259563446045
    },
    {
      "epoch": 0.000333038330078125,
      "model_forward_time": 0.11501717567443848,
      "step": 54565
    },
    {
      "epoch": 0.000333038330078125,
      "step": 54565,
      "training_step_time": 0.48279261589050293
    },
    {
      "epoch": 0.00033304443359375,
      "model_forward_time": 0.11425566673278809,
      "step": 54566
    },
    {
      "epoch": 0.00033304443359375,
      "step": 54566,
      "training_step_time": 0.39267587661743164
    },
    {
      "epoch": 0.000333050537109375,
      "model_forward_time": 0.11486291885375977,
      "step": 54567
    },
    {
      "epoch": 0.000333050537109375,
      "step": 54567,
      "training_step_time": 0.3940119743347168
    },
    {
      "epoch": 0.000333056640625,
      "model_forward_time": 0.11501002311706543,
      "step": 54568
    },
    {
      "epoch": 0.000333056640625,
      "step": 54568,
      "training_step_time": 0.38742876052856445
    },
    {
      "epoch": 0.000333062744140625,
      "model_forward_time": 0.11490869522094727,
      "step": 54569
    },
    {
      "epoch": 0.000333062744140625,
      "step": 54569,
      "training_step_time": 0.39829230308532715
    },
    {
      "epoch": 0.00033306884765625,
      "grad_norm": 0.08710884302854538,
      "learning_rate": 2.222520260660521e-06,
      "loss": 0.0354,
      "step": 54570
    },
    {
      "epoch": 0.00033306884765625,
      "model_forward_time": 0.11544346809387207,
      "step": 54570
    },
    {
      "epoch": 0.00033306884765625,
      "step": 54570,
      "training_step_time": 0.3944401741027832
    },
    {
      "epoch": 0.000333074951171875,
      "model_forward_time": 0.11535048484802246,
      "step": 54571
    },
    {
      "epoch": 0.000333074951171875,
      "step": 54571,
      "training_step_time": 0.399799108505249
    },
    {
      "epoch": 0.0003330810546875,
      "model_forward_time": 0.11593818664550781,
      "step": 54572
    },
    {
      "epoch": 0.0003330810546875,
      "step": 54572,
      "training_step_time": 0.45928215980529785
    },
    {
      "epoch": 0.000333087158203125,
      "model_forward_time": 0.1151742935180664,
      "step": 54573
    },
    {
      "epoch": 0.000333087158203125,
      "step": 54573,
      "training_step_time": 0.46188974380493164
    },
    {
      "epoch": 0.00033309326171875,
      "model_forward_time": 0.11471414566040039,
      "step": 54574
    },
    {
      "epoch": 0.00033309326171875,
      "step": 54574,
      "training_step_time": 0.501253604888916
    },
    {
      "epoch": 0.000333099365234375,
      "model_forward_time": 0.114837646484375,
      "step": 54575
    },
    {
      "epoch": 0.000333099365234375,
      "step": 54575,
      "training_step_time": 0.4067871570587158
    },
    {
      "epoch": 0.00033310546875,
      "model_forward_time": 0.11504840850830078,
      "step": 54576
    },
    {
      "epoch": 0.00033310546875,
      "step": 54576,
      "training_step_time": 0.41767072677612305
    },
    {
      "epoch": 0.000333111572265625,
      "model_forward_time": 0.11499500274658203,
      "step": 54577
    },
    {
      "epoch": 0.000333111572265625,
      "step": 54577,
      "training_step_time": 0.38904285430908203
    },
    {
      "epoch": 0.00033311767578125,
      "model_forward_time": 0.11547636985778809,
      "step": 54578
    },
    {
      "epoch": 0.00033311767578125,
      "step": 54578,
      "training_step_time": 0.38974857330322266
    },
    {
      "epoch": 0.000333123779296875,
      "model_forward_time": 0.11552619934082031,
      "step": 54579
    },
    {
      "epoch": 0.000333123779296875,
      "step": 54579,
      "training_step_time": 0.41060757637023926
    },
    {
      "epoch": 0.0003331298828125,
      "grad_norm": 0.0794723704457283,
      "learning_rate": 2.214402631377782e-06,
      "loss": 0.0362,
      "step": 54580
    },
    {
      "epoch": 0.0003331298828125,
      "model_forward_time": 0.11559700965881348,
      "step": 54580
    },
    {
      "epoch": 0.0003331298828125,
      "step": 54580,
      "training_step_time": 0.46981072425842285
    },
    {
      "epoch": 0.000333135986328125,
      "model_forward_time": 0.11617016792297363,
      "step": 54581
    },
    {
      "epoch": 0.000333135986328125,
      "step": 54581,
      "training_step_time": 0.39031124114990234
    },
    {
      "epoch": 0.00033314208984375,
      "model_forward_time": 0.11456918716430664,
      "step": 54582
    },
    {
      "epoch": 0.00033314208984375,
      "step": 54582,
      "training_step_time": 0.3816056251525879
    },
    {
      "epoch": 0.000333148193359375,
      "model_forward_time": 0.11527609825134277,
      "step": 54583
    },
    {
      "epoch": 0.000333148193359375,
      "step": 54583,
      "training_step_time": 0.39513182640075684
    },
    {
      "epoch": 0.000333154296875,
      "model_forward_time": 0.11493420600891113,
      "step": 54584
    },
    {
      "epoch": 0.000333154296875,
      "step": 54584,
      "training_step_time": 0.38861536979675293
    },
    {
      "epoch": 0.000333160400390625,
      "model_forward_time": 0.11509227752685547,
      "step": 54585
    },
    {
      "epoch": 0.000333160400390625,
      "step": 54585,
      "training_step_time": 0.40497589111328125
    },
    {
      "epoch": 0.00033316650390625,
      "model_forward_time": 0.11469292640686035,
      "step": 54586
    },
    {
      "epoch": 0.00033316650390625,
      "step": 54586,
      "training_step_time": 0.37865424156188965
    },
    {
      "epoch": 0.000333172607421875,
      "model_forward_time": 0.11588501930236816,
      "step": 54587
    },
    {
      "epoch": 0.000333172607421875,
      "step": 54587,
      "training_step_time": 0.4379291534423828
    },
    {
      "epoch": 0.0003331787109375,
      "model_forward_time": 0.11558270454406738,
      "step": 54588
    },
    {
      "epoch": 0.0003331787109375,
      "step": 54588,
      "training_step_time": 0.4164109230041504
    },
    {
      "epoch": 0.000333184814453125,
      "model_forward_time": 0.11463618278503418,
      "step": 54589
    },
    {
      "epoch": 0.000333184814453125,
      "step": 54589,
      "training_step_time": 0.43877315521240234
    },
    {
      "epoch": 0.00033319091796875,
      "grad_norm": 0.0733485296368599,
      "learning_rate": 2.206299518097804e-06,
      "loss": 0.0384,
      "step": 54590
    },
    {
      "epoch": 0.00033319091796875,
      "model_forward_time": 0.11460685729980469,
      "step": 54590
    },
    {
      "epoch": 0.00033319091796875,
      "step": 54590,
      "training_step_time": 0.4819214344024658
    },
    {
      "epoch": 0.000333197021484375,
      "model_forward_time": 0.11502337455749512,
      "step": 54591
    },
    {
      "epoch": 0.000333197021484375,
      "step": 54591,
      "training_step_time": 0.39007115364074707
    },
    {
      "epoch": 0.000333203125,
      "model_forward_time": 0.11514425277709961,
      "step": 54592
    },
    {
      "epoch": 0.000333203125,
      "step": 54592,
      "training_step_time": 0.384291410446167
    },
    {
      "epoch": 0.000333209228515625,
      "model_forward_time": 0.11532330513000488,
      "step": 54593
    },
    {
      "epoch": 0.000333209228515625,
      "step": 54593,
      "training_step_time": 0.40495729446411133
    },
    {
      "epoch": 0.00033321533203125,
      "model_forward_time": 0.11524486541748047,
      "step": 54594
    },
    {
      "epoch": 0.00033321533203125,
      "step": 54594,
      "training_step_time": 0.4491865634918213
    },
    {
      "epoch": 0.000333221435546875,
      "model_forward_time": 0.11536264419555664,
      "step": 54595
    },
    {
      "epoch": 0.000333221435546875,
      "step": 54595,
      "training_step_time": 0.39591479301452637
    },
    {
      "epoch": 0.0003332275390625,
      "model_forward_time": 0.11580801010131836,
      "step": 54596
    },
    {
      "epoch": 0.0003332275390625,
      "step": 54596,
      "training_step_time": 0.3863697052001953
    },
    {
      "epoch": 0.000333233642578125,
      "model_forward_time": 0.11431765556335449,
      "step": 54597
    },
    {
      "epoch": 0.000333233642578125,
      "step": 54597,
      "training_step_time": 0.38713693618774414
    },
    {
      "epoch": 0.00033323974609375,
      "model_forward_time": 0.1155860424041748,
      "step": 54598
    },
    {
      "epoch": 0.00033323974609375,
      "step": 54598,
      "training_step_time": 0.3815939426422119
    },
    {
      "epoch": 0.000333245849609375,
      "model_forward_time": 0.11528372764587402,
      "step": 54599
    },
    {
      "epoch": 0.000333245849609375,
      "step": 54599,
      "training_step_time": 0.3936789035797119
    },
    {
      "epoch": 0.000333251953125,
      "grad_norm": 0.07378057390451431,
      "learning_rate": 2.1982109232821178e-06,
      "loss": 0.0345,
      "step": 54600
    },
    {
      "epoch": 0.000333251953125,
      "model_forward_time": 0.11478948593139648,
      "step": 54600
    },
    {
      "epoch": 0.000333251953125,
      "step": 54600,
      "training_step_time": 0.4038081169128418
    },
    {
      "epoch": 0.000333258056640625,
      "model_forward_time": 0.11496424674987793,
      "step": 54601
    },
    {
      "epoch": 0.000333258056640625,
      "step": 54601,
      "training_step_time": 0.3980751037597656
    },
    {
      "epoch": 0.00033326416015625,
      "model_forward_time": 0.11502861976623535,
      "step": 54602
    },
    {
      "epoch": 0.00033326416015625,
      "step": 54602,
      "training_step_time": 0.420971155166626
    },
    {
      "epoch": 0.000333270263671875,
      "model_forward_time": 0.1156625747680664,
      "step": 54603
    },
    {
      "epoch": 0.000333270263671875,
      "step": 54603,
      "training_step_time": 0.45659828186035156
    },
    {
      "epoch": 0.0003332763671875,
      "model_forward_time": 0.11583232879638672,
      "step": 54604
    },
    {
      "epoch": 0.0003332763671875,
      "step": 54604,
      "training_step_time": 0.5082712173461914
    },
    {
      "epoch": 0.000333282470703125,
      "model_forward_time": 0.11659026145935059,
      "step": 54605
    },
    {
      "epoch": 0.000333282470703125,
      "step": 54605,
      "training_step_time": 0.40393972396850586
    },
    {
      "epoch": 0.00033328857421875,
      "model_forward_time": 0.11473774909973145,
      "step": 54606
    },
    {
      "epoch": 0.00033328857421875,
      "step": 54606,
      "training_step_time": 0.39781904220581055
    },
    {
      "epoch": 0.000333294677734375,
      "model_forward_time": 0.1150820255279541,
      "step": 54607
    },
    {
      "epoch": 0.000333294677734375,
      "step": 54607,
      "training_step_time": 0.4023323059082031
    },
    {
      "epoch": 0.00033330078125,
      "model_forward_time": 0.1148233413696289,
      "step": 54608
    },
    {
      "epoch": 0.00033330078125,
      "step": 54608,
      "training_step_time": 0.45203280448913574
    },
    {
      "epoch": 0.000333306884765625,
      "model_forward_time": 0.11505293846130371,
      "step": 54609
    },
    {
      "epoch": 0.000333306884765625,
      "step": 54609,
      "training_step_time": 0.4825611114501953
    },
    {
      "epoch": 0.00033331298828125,
      "grad_norm": 0.1305622011423111,
      "learning_rate": 2.1901368493878248e-06,
      "loss": 0.0345,
      "step": 54610
    },
    {
      "epoch": 0.00033331298828125,
      "model_forward_time": 0.11461019515991211,
      "step": 54610
    },
    {
      "epoch": 0.00033331298828125,
      "step": 54610,
      "training_step_time": 0.37670230865478516
    },
    {
      "epoch": 0.000333319091796875,
      "model_forward_time": 0.11532139778137207,
      "step": 54611
    },
    {
      "epoch": 0.000333319091796875,
      "step": 54611,
      "training_step_time": 0.39127540588378906
    },
    {
      "epoch": 0.0003333251953125,
      "model_forward_time": 0.11505699157714844,
      "step": 54612
    },
    {
      "epoch": 0.0003333251953125,
      "step": 54612,
      "training_step_time": 0.396801233291626
    },
    {
      "epoch": 0.000333331298828125,
      "model_forward_time": 0.11441183090209961,
      "step": 54613
    },
    {
      "epoch": 0.000333331298828125,
      "step": 54613,
      "training_step_time": 0.4119694232940674
    },
    {
      "epoch": 0.00033333740234375,
      "model_forward_time": 0.11509943008422852,
      "step": 54614
    },
    {
      "epoch": 0.00033333740234375,
      "step": 54614,
      "training_step_time": 0.38382983207702637
    },
    {
      "epoch": 0.000333343505859375,
      "model_forward_time": 0.1147148609161377,
      "step": 54615
    },
    {
      "epoch": 0.000333343505859375,
      "step": 54615,
      "training_step_time": 0.3929781913757324
    },
    {
      "epoch": 0.000333349609375,
      "model_forward_time": 0.11518740653991699,
      "step": 54616
    },
    {
      "epoch": 0.000333349609375,
      "step": 54616,
      "training_step_time": 0.4923393726348877
    },
    {
      "epoch": 0.000333355712890625,
      "model_forward_time": 0.11556172370910645,
      "step": 54617
    },
    {
      "epoch": 0.000333355712890625,
      "step": 54617,
      "training_step_time": 0.4612405300140381
    },
    {
      "epoch": 0.00033336181640625,
      "model_forward_time": 0.11538887023925781,
      "step": 54618
    },
    {
      "epoch": 0.00033336181640625,
      "step": 54618,
      "training_step_time": 0.4919703006744385
    },
    {
      "epoch": 0.000333367919921875,
      "model_forward_time": 0.11530756950378418,
      "step": 54619
    },
    {
      "epoch": 0.000333367919921875,
      "step": 54619,
      "training_step_time": 0.43732285499572754
    },
    {
      "epoch": 0.0003333740234375,
      "grad_norm": 0.08511630445718765,
      "learning_rate": 2.1820772988676076e-06,
      "loss": 0.0382,
      "step": 54620
    },
    {
      "epoch": 0.0003333740234375,
      "model_forward_time": 0.11574482917785645,
      "step": 54620
    },
    {
      "epoch": 0.0003333740234375,
      "step": 54620,
      "training_step_time": 0.4104125499725342
    },
    {
      "epoch": 0.000333380126953125,
      "model_forward_time": 0.11469507217407227,
      "step": 54621
    },
    {
      "epoch": 0.000333380126953125,
      "step": 54621,
      "training_step_time": 0.4413418769836426
    },
    {
      "epoch": 0.00033338623046875,
      "model_forward_time": 0.1149137020111084,
      "step": 54622
    },
    {
      "epoch": 0.00033338623046875,
      "step": 54622,
      "training_step_time": 0.3961646556854248
    },
    {
      "epoch": 0.000333392333984375,
      "model_forward_time": 0.1145782470703125,
      "step": 54623
    },
    {
      "epoch": 0.000333392333984375,
      "step": 54623,
      "training_step_time": 0.4424574375152588
    },
    {
      "epoch": 0.0003333984375,
      "model_forward_time": 0.1147773265838623,
      "step": 54624
    },
    {
      "epoch": 0.0003333984375,
      "step": 54624,
      "training_step_time": 0.39372682571411133
    },
    {
      "epoch": 0.000333404541015625,
      "model_forward_time": 0.11552929878234863,
      "step": 54625
    },
    {
      "epoch": 0.000333404541015625,
      "step": 54625,
      "training_step_time": 0.3969135284423828
    },
    {
      "epoch": 0.00033341064453125,
      "model_forward_time": 0.11567306518554688,
      "step": 54626
    },
    {
      "epoch": 0.00033341064453125,
      "step": 54626,
      "training_step_time": 0.4051649570465088
    },
    {
      "epoch": 0.000333416748046875,
      "model_forward_time": 0.11580038070678711,
      "step": 54627
    },
    {
      "epoch": 0.000333416748046875,
      "step": 54627,
      "training_step_time": 0.39219045639038086
    },
    {
      "epoch": 0.0003334228515625,
      "model_forward_time": 0.11464357376098633,
      "step": 54628
    },
    {
      "epoch": 0.0003334228515625,
      "step": 54628,
      "training_step_time": 0.39132142066955566
    },
    {
      "epoch": 0.000333428955078125,
      "model_forward_time": 0.11475491523742676,
      "step": 54629
    },
    {
      "epoch": 0.000333428955078125,
      "step": 54629,
      "training_step_time": 0.4044790267944336
    },
    {
      "epoch": 0.00033343505859375,
      "grad_norm": 0.0803721621632576,
      "learning_rate": 2.174032274169746e-06,
      "loss": 0.0353,
      "step": 54630
    },
    {
      "epoch": 0.00033343505859375,
      "model_forward_time": 0.11646175384521484,
      "step": 54630
    },
    {
      "epoch": 0.00033343505859375,
      "step": 54630,
      "training_step_time": 0.40023231506347656
    },
    {
      "epoch": 0.000333441162109375,
      "model_forward_time": 0.11501646041870117,
      "step": 54631
    },
    {
      "epoch": 0.000333441162109375,
      "step": 54631,
      "training_step_time": 0.464768648147583
    },
    {
      "epoch": 0.000333447265625,
      "model_forward_time": 0.11553025245666504,
      "step": 54632
    },
    {
      "epoch": 0.000333447265625,
      "step": 54632,
      "training_step_time": 0.37967681884765625
    },
    {
      "epoch": 0.000333453369140625,
      "model_forward_time": 0.11537504196166992,
      "step": 54633
    },
    {
      "epoch": 0.000333453369140625,
      "step": 54633,
      "training_step_time": 0.4450497627258301
    },
    {
      "epoch": 0.00033345947265625,
      "model_forward_time": 0.12104511260986328,
      "step": 54634
    },
    {
      "epoch": 0.00033345947265625,
      "step": 54634,
      "training_step_time": 0.3881521224975586
    },
    {
      "epoch": 0.000333465576171875,
      "model_forward_time": 0.11562967300415039,
      "step": 54635
    },
    {
      "epoch": 0.000333465576171875,
      "step": 54635,
      "training_step_time": 0.38814210891723633
    },
    {
      "epoch": 0.0003334716796875,
      "model_forward_time": 0.11610102653503418,
      "step": 54636
    },
    {
      "epoch": 0.0003334716796875,
      "step": 54636,
      "training_step_time": 0.3922739028930664
    },
    {
      "epoch": 0.000333477783203125,
      "model_forward_time": 0.11548709869384766,
      "step": 54637
    },
    {
      "epoch": 0.000333477783203125,
      "step": 54637,
      "training_step_time": 0.4660680294036865
    },
    {
      "epoch": 0.00033348388671875,
      "model_forward_time": 0.11520957946777344,
      "step": 54638
    },
    {
      "epoch": 0.00033348388671875,
      "step": 54638,
      "training_step_time": 0.44391918182373047
    },
    {
      "epoch": 0.000333489990234375,
      "model_forward_time": 0.11570596694946289,
      "step": 54639
    },
    {
      "epoch": 0.000333489990234375,
      "step": 54639,
      "training_step_time": 0.38294553756713867
    },
    {
      "epoch": 0.00033349609375,
      "grad_norm": 0.10246093571186066,
      "learning_rate": 2.1660017777381135e-06,
      "loss": 0.0395,
      "step": 54640
    },
    {
      "epoch": 0.00033349609375,
      "model_forward_time": 0.11537504196166992,
      "step": 54640
    },
    {
      "epoch": 0.00033349609375,
      "step": 54640,
      "training_step_time": 0.38864779472351074
    },
    {
      "epoch": 0.000333502197265625,
      "model_forward_time": 0.11521434783935547,
      "step": 54641
    },
    {
      "epoch": 0.000333502197265625,
      "step": 54641,
      "training_step_time": 0.39510607719421387
    },
    {
      "epoch": 0.00033350830078125,
      "model_forward_time": 0.11522698402404785,
      "step": 54642
    },
    {
      "epoch": 0.00033350830078125,
      "step": 54642,
      "training_step_time": 0.3991830348968506
    },
    {
      "epoch": 0.000333514404296875,
      "model_forward_time": 0.11600089073181152,
      "step": 54643
    },
    {
      "epoch": 0.000333514404296875,
      "step": 54643,
      "training_step_time": 0.3942387104034424
    },
    {
      "epoch": 0.0003335205078125,
      "model_forward_time": 0.11573052406311035,
      "step": 54644
    },
    {
      "epoch": 0.0003335205078125,
      "step": 54644,
      "training_step_time": 0.4083216190338135
    },
    {
      "epoch": 0.000333526611328125,
      "model_forward_time": 0.11516785621643066,
      "step": 54645
    },
    {
      "epoch": 0.000333526611328125,
      "step": 54645,
      "training_step_time": 0.3931710720062256
    },
    {
      "epoch": 0.00033353271484375,
      "model_forward_time": 0.11567425727844238,
      "step": 54646
    },
    {
      "epoch": 0.00033353271484375,
      "step": 54646,
      "training_step_time": 0.42502522468566895
    },
    {
      "epoch": 0.000333538818359375,
      "model_forward_time": 0.11520624160766602,
      "step": 54647
    },
    {
      "epoch": 0.000333538818359375,
      "step": 54647,
      "training_step_time": 0.42000889778137207
    },
    {
      "epoch": 0.000333544921875,
      "model_forward_time": 0.11487340927124023,
      "step": 54648
    },
    {
      "epoch": 0.000333544921875,
      "step": 54648,
      "training_step_time": 0.46587586402893066
    },
    {
      "epoch": 0.000333551025390625,
      "model_forward_time": 0.11556482315063477,
      "step": 54649
    },
    {
      "epoch": 0.000333551025390625,
      "step": 54649,
      "training_step_time": 0.4230775833129883
    },
    {
      "epoch": 0.00033355712890625,
      "grad_norm": 0.09496437013149261,
      "learning_rate": 2.157985812012148e-06,
      "loss": 0.0351,
      "step": 54650
    },
    {
      "epoch": 0.00033355712890625,
      "model_forward_time": 0.11580324172973633,
      "step": 54650
    },
    {
      "epoch": 0.00033355712890625,
      "step": 54650,
      "training_step_time": 0.418778657913208
    },
    {
      "epoch": 0.000333563232421875,
      "model_forward_time": 0.11529254913330078,
      "step": 54651
    },
    {
      "epoch": 0.000333563232421875,
      "step": 54651,
      "training_step_time": 0.4071230888366699
    },
    {
      "epoch": 0.0003335693359375,
      "model_forward_time": 0.11509585380554199,
      "step": 54652
    },
    {
      "epoch": 0.0003335693359375,
      "step": 54652,
      "training_step_time": 0.41472840309143066
    },
    {
      "epoch": 0.000333575439453125,
      "model_forward_time": 0.11484575271606445,
      "step": 54653
    },
    {
      "epoch": 0.000333575439453125,
      "step": 54653,
      "training_step_time": 0.4892401695251465
    },
    {
      "epoch": 0.00033358154296875,
      "model_forward_time": 0.11516356468200684,
      "step": 54654
    },
    {
      "epoch": 0.00033358154296875,
      "step": 54654,
      "training_step_time": 0.3970353603363037
    },
    {
      "epoch": 0.000333587646484375,
      "model_forward_time": 0.11518692970275879,
      "step": 54655
    },
    {
      "epoch": 0.000333587646484375,
      "step": 54655,
      "training_step_time": 0.39553046226501465
    },
    {
      "epoch": 0.00033359375,
      "model_forward_time": 0.11554360389709473,
      "step": 54656
    },
    {
      "epoch": 0.00033359375,
      "step": 54656,
      "training_step_time": 0.39058542251586914
    },
    {
      "epoch": 0.000333599853515625,
      "model_forward_time": 0.11557888984680176,
      "step": 54657
    },
    {
      "epoch": 0.000333599853515625,
      "step": 54657,
      "training_step_time": 0.38392186164855957
    },
    {
      "epoch": 0.00033360595703125,
      "model_forward_time": 0.11506080627441406,
      "step": 54658
    },
    {
      "epoch": 0.00033360595703125,
      "step": 54658,
      "training_step_time": 0.4041135311126709
    },
    {
      "epoch": 0.000333612060546875,
      "model_forward_time": 0.11532711982727051,
      "step": 54659
    },
    {
      "epoch": 0.000333612060546875,
      "step": 54659,
      "training_step_time": 0.38686513900756836
    },
    {
      "epoch": 0.0003336181640625,
      "grad_norm": 0.09796739369630814,
      "learning_rate": 2.149984379426906e-06,
      "loss": 0.0381,
      "step": 54660
    },
    {
      "epoch": 0.0003336181640625,
      "model_forward_time": 0.11530828475952148,
      "step": 54660
    },
    {
      "epoch": 0.0003336181640625,
      "step": 54660,
      "training_step_time": 0.4086322784423828
    },
    {
      "epoch": 0.000333624267578125,
      "model_forward_time": 0.11601614952087402,
      "step": 54661
    },
    {
      "epoch": 0.000333624267578125,
      "step": 54661,
      "training_step_time": 0.4271364212036133
    },
    {
      "epoch": 0.00033363037109375,
      "model_forward_time": 0.1155083179473877,
      "step": 54662
    },
    {
      "epoch": 0.00033363037109375,
      "step": 54662,
      "training_step_time": 0.4656999111175537
    },
    {
      "epoch": 0.000333636474609375,
      "model_forward_time": 0.11559939384460449,
      "step": 54663
    },
    {
      "epoch": 0.000333636474609375,
      "step": 54663,
      "training_step_time": 0.5099701881408691
    },
    {
      "epoch": 0.000333642578125,
      "model_forward_time": 0.11481261253356934,
      "step": 54664
    },
    {
      "epoch": 0.000333642578125,
      "step": 54664,
      "training_step_time": 0.3994598388671875
    },
    {
      "epoch": 0.000333648681640625,
      "model_forward_time": 0.11602950096130371,
      "step": 54665
    },
    {
      "epoch": 0.000333648681640625,
      "step": 54665,
      "training_step_time": 0.38156604766845703
    },
    {
      "epoch": 0.00033365478515625,
      "model_forward_time": 0.11530303955078125,
      "step": 54666
    },
    {
      "epoch": 0.00033365478515625,
      "step": 54666,
      "training_step_time": 0.3961753845214844
    },
    {
      "epoch": 0.000333660888671875,
      "model_forward_time": 0.11558675765991211,
      "step": 54667
    },
    {
      "epoch": 0.000333660888671875,
      "step": 54667,
      "training_step_time": 0.429274320602417
    },
    {
      "epoch": 0.0003336669921875,
      "model_forward_time": 0.11679911613464355,
      "step": 54668
    },
    {
      "epoch": 0.0003336669921875,
      "step": 54668,
      "training_step_time": 0.38959288597106934
    },
    {
      "epoch": 0.000333673095703125,
      "model_forward_time": 0.11603283882141113,
      "step": 54669
    },
    {
      "epoch": 0.000333673095703125,
      "step": 54669,
      "training_step_time": 0.4020507335662842
    },
    {
      "epoch": 0.00033367919921875,
      "grad_norm": 0.11553824692964554,
      "learning_rate": 2.141997482412994e-06,
      "loss": 0.0342,
      "step": 54670
    },
    {
      "epoch": 0.00033367919921875,
      "model_forward_time": 0.11566901206970215,
      "step": 54670
    },
    {
      "epoch": 0.00033367919921875,
      "step": 54670,
      "training_step_time": 0.3977344036102295
    },
    {
      "epoch": 0.000333685302734375,
      "model_forward_time": 0.11516165733337402,
      "step": 54671
    },
    {
      "epoch": 0.000333685302734375,
      "step": 54671,
      "training_step_time": 0.3987133502960205
    },
    {
      "epoch": 0.00033369140625,
      "model_forward_time": 0.1155397891998291,
      "step": 54672
    },
    {
      "epoch": 0.00033369140625,
      "step": 54672,
      "training_step_time": 0.3932948112487793
    },
    {
      "epoch": 0.000333697509765625,
      "model_forward_time": 0.11557674407958984,
      "step": 54673
    },
    {
      "epoch": 0.000333697509765625,
      "step": 54673,
      "training_step_time": 0.39844274520874023
    },
    {
      "epoch": 0.00033370361328125,
      "model_forward_time": 0.11599540710449219,
      "step": 54674
    },
    {
      "epoch": 0.00033370361328125,
      "step": 54674,
      "training_step_time": 0.40158820152282715
    },
    {
      "epoch": 0.000333709716796875,
      "model_forward_time": 0.1152336597442627,
      "step": 54675
    },
    {
      "epoch": 0.000333709716796875,
      "step": 54675,
      "training_step_time": 0.4228241443634033
    },
    {
      "epoch": 0.0003337158203125,
      "model_forward_time": 0.11472272872924805,
      "step": 54676
    },
    {
      "epoch": 0.0003337158203125,
      "step": 54676,
      "training_step_time": 0.36733150482177734
    },
    {
      "epoch": 0.000333721923828125,
      "model_forward_time": 0.11543917655944824,
      "step": 54677
    },
    {
      "epoch": 0.000333721923828125,
      "step": 54677,
      "training_step_time": 0.4705691337585449
    },
    {
      "epoch": 0.00033372802734375,
      "model_forward_time": 0.11620664596557617,
      "step": 54678
    },
    {
      "epoch": 0.00033372802734375,
      "step": 54678,
      "training_step_time": 0.43451952934265137
    },
    {
      "epoch": 0.000333734130859375,
      "model_forward_time": 0.11518192291259766,
      "step": 54679
    },
    {
      "epoch": 0.000333734130859375,
      "step": 54679,
      "training_step_time": 0.39042115211486816
    },
    {
      "epoch": 0.000333740234375,
      "grad_norm": 0.10560236871242523,
      "learning_rate": 2.134025123396638e-06,
      "loss": 0.0414,
      "step": 54680
    },
    {
      "epoch": 0.000333740234375,
      "model_forward_time": 0.11533164978027344,
      "step": 54680
    },
    {
      "epoch": 0.000333740234375,
      "step": 54680,
      "training_step_time": 0.4344825744628906
    },
    {
      "epoch": 0.000333746337890625,
      "model_forward_time": 0.1151423454284668,
      "step": 54681
    },
    {
      "epoch": 0.000333746337890625,
      "step": 54681,
      "training_step_time": 0.4258434772491455
    },
    {
      "epoch": 0.00033375244140625,
      "model_forward_time": 0.11483097076416016,
      "step": 54682
    },
    {
      "epoch": 0.00033375244140625,
      "step": 54682,
      "training_step_time": 0.40947675704956055
    },
    {
      "epoch": 0.000333758544921875,
      "model_forward_time": 0.1151282787322998,
      "step": 54683
    },
    {
      "epoch": 0.000333758544921875,
      "step": 54683,
      "training_step_time": 0.39984822273254395
    },
    {
      "epoch": 0.0003337646484375,
      "model_forward_time": 0.11483621597290039,
      "step": 54684
    },
    {
      "epoch": 0.0003337646484375,
      "step": 54684,
      "training_step_time": 0.39423179626464844
    },
    {
      "epoch": 0.000333770751953125,
      "model_forward_time": 0.11541438102722168,
      "step": 54685
    },
    {
      "epoch": 0.000333770751953125,
      "step": 54685,
      "training_step_time": 0.39643120765686035
    },
    {
      "epoch": 0.00033377685546875,
      "model_forward_time": 0.11516427993774414,
      "step": 54686
    },
    {
      "epoch": 0.00033377685546875,
      "step": 54686,
      "training_step_time": 0.37865471839904785
    },
    {
      "epoch": 0.000333782958984375,
      "model_forward_time": 0.11535501480102539,
      "step": 54687
    },
    {
      "epoch": 0.000333782958984375,
      "step": 54687,
      "training_step_time": 0.3825843334197998
    },
    {
      "epoch": 0.0003337890625,
      "model_forward_time": 0.11627364158630371,
      "step": 54688
    },
    {
      "epoch": 0.0003337890625,
      "step": 54688,
      "training_step_time": 0.3929448127746582
    },
    {
      "epoch": 0.000333795166015625,
      "model_forward_time": 0.11580657958984375,
      "step": 54689
    },
    {
      "epoch": 0.000333795166015625,
      "step": 54689,
      "training_step_time": 0.44783878326416016
    },
    {
      "epoch": 0.00033380126953125,
      "grad_norm": 0.09529385715723038,
      "learning_rate": 2.1260673047996227e-06,
      "loss": 0.0339,
      "step": 54690
    },
    {
      "epoch": 0.00033380126953125,
      "model_forward_time": 0.11531829833984375,
      "step": 54690
    },
    {
      "epoch": 0.00033380126953125,
      "step": 54690,
      "training_step_time": 0.5014805793762207
    },
    {
      "epoch": 0.000333807373046875,
      "model_forward_time": 0.11467838287353516,
      "step": 54691
    },
    {
      "epoch": 0.000333807373046875,
      "step": 54691,
      "training_step_time": 0.45753979682922363
    },
    {
      "epoch": 0.0003338134765625,
      "model_forward_time": 0.1161191463470459,
      "step": 54692
    },
    {
      "epoch": 0.0003338134765625,
      "step": 54692,
      "training_step_time": 0.4833383560180664
    },
    {
      "epoch": 0.000333819580078125,
      "model_forward_time": 0.1155691146850586,
      "step": 54693
    },
    {
      "epoch": 0.000333819580078125,
      "step": 54693,
      "training_step_time": 0.39449310302734375
    },
    {
      "epoch": 0.00033382568359375,
      "model_forward_time": 0.1157526969909668,
      "step": 54694
    },
    {
      "epoch": 0.00033382568359375,
      "step": 54694,
      "training_step_time": 0.3954813480377197
    },
    {
      "epoch": 0.000333831787109375,
      "model_forward_time": 0.11469578742980957,
      "step": 54695
    },
    {
      "epoch": 0.000333831787109375,
      "step": 54695,
      "training_step_time": 0.434906005859375
    },
    {
      "epoch": 0.000333837890625,
      "model_forward_time": 0.11506056785583496,
      "step": 54696
    },
    {
      "epoch": 0.000333837890625,
      "step": 54696,
      "training_step_time": 0.4042394161224365
    },
    {
      "epoch": 0.000333843994140625,
      "model_forward_time": 0.1151127815246582,
      "step": 54697
    },
    {
      "epoch": 0.000333843994140625,
      "step": 54697,
      "training_step_time": 0.4130516052246094
    },
    {
      "epoch": 0.00033385009765625,
      "model_forward_time": 0.1158151626586914,
      "step": 54698
    },
    {
      "epoch": 0.00033385009765625,
      "step": 54698,
      "training_step_time": 0.3886280059814453
    },
    {
      "epoch": 0.000333856201171875,
      "model_forward_time": 0.11571979522705078,
      "step": 54699
    },
    {
      "epoch": 0.000333856201171875,
      "step": 54699,
      "training_step_time": 0.40126538276672363
    },
    {
      "epoch": 0.0003338623046875,
      "grad_norm": 0.08836711943149567,
      "learning_rate": 2.118124029039309e-06,
      "loss": 0.0379,
      "step": 54700
    },
    {
      "epoch": 0.0003338623046875,
      "model_forward_time": 0.11539363861083984,
      "step": 54700
    },
    {
      "epoch": 0.0003338623046875,
      "step": 54700,
      "training_step_time": 0.39469051361083984
    },
    {
      "epoch": 0.000333868408203125,
      "model_forward_time": 0.11592841148376465,
      "step": 54701
    },
    {
      "epoch": 0.000333868408203125,
      "step": 54701,
      "training_step_time": 0.3919792175292969
    },
    {
      "epoch": 0.00033387451171875,
      "model_forward_time": 0.11574101448059082,
      "step": 54702
    },
    {
      "epoch": 0.00033387451171875,
      "step": 54702,
      "training_step_time": 0.4262361526489258
    },
    {
      "epoch": 0.000333880615234375,
      "model_forward_time": 0.11574459075927734,
      "step": 54703
    },
    {
      "epoch": 0.000333880615234375,
      "step": 54703,
      "training_step_time": 0.44255971908569336
    },
    {
      "epoch": 0.00033388671875,
      "model_forward_time": 0.11660003662109375,
      "step": 54704
    },
    {
      "epoch": 0.00033388671875,
      "step": 54704,
      "training_step_time": 0.3987083435058594
    },
    {
      "epoch": 0.000333892822265625,
      "model_forward_time": 0.11458134651184082,
      "step": 54705
    },
    {
      "epoch": 0.000333892822265625,
      "step": 54705,
      "training_step_time": 0.3625943660736084
    },
    {
      "epoch": 0.00033389892578125,
      "model_forward_time": 0.11519336700439453,
      "step": 54706
    },
    {
      "epoch": 0.00033389892578125,
      "step": 54706,
      "training_step_time": 0.4523947238922119
    },
    {
      "epoch": 0.000333905029296875,
      "model_forward_time": 0.11532044410705566,
      "step": 54707
    },
    {
      "epoch": 0.000333905029296875,
      "step": 54707,
      "training_step_time": 0.3943502902984619
    },
    {
      "epoch": 0.0003339111328125,
      "model_forward_time": 0.11510848999023438,
      "step": 54708
    },
    {
      "epoch": 0.0003339111328125,
      "step": 54708,
      "training_step_time": 0.38525867462158203
    },
    {
      "epoch": 0.000333917236328125,
      "model_forward_time": 0.11595439910888672,
      "step": 54709
    },
    {
      "epoch": 0.000333917236328125,
      "step": 54709,
      "training_step_time": 0.37656211853027344
    },
    {
      "epoch": 0.00033392333984375,
      "grad_norm": 0.08584272861480713,
      "learning_rate": 2.110195298528683e-06,
      "loss": 0.0324,
      "step": 54710
    },
    {
      "epoch": 0.00033392333984375,
      "model_forward_time": 0.1149289608001709,
      "step": 54710
    },
    {
      "epoch": 0.00033392333984375,
      "step": 54710,
      "training_step_time": 0.4176633358001709
    },
    {
      "epoch": 0.000333929443359375,
      "model_forward_time": 0.11523747444152832,
      "step": 54711
    },
    {
      "epoch": 0.000333929443359375,
      "step": 54711,
      "training_step_time": 0.45209431648254395
    },
    {
      "epoch": 0.000333935546875,
      "model_forward_time": 0.11570048332214355,
      "step": 54712
    },
    {
      "epoch": 0.000333935546875,
      "step": 54712,
      "training_step_time": 0.38712620735168457
    },
    {
      "epoch": 0.000333941650390625,
      "model_forward_time": 0.11557197570800781,
      "step": 54713
    },
    {
      "epoch": 0.000333941650390625,
      "step": 54713,
      "training_step_time": 0.38622140884399414
    },
    {
      "epoch": 0.00033394775390625,
      "model_forward_time": 0.11492252349853516,
      "step": 54714
    },
    {
      "epoch": 0.00033394775390625,
      "step": 54714,
      "training_step_time": 0.3887193202972412
    },
    {
      "epoch": 0.000333953857421875,
      "model_forward_time": 0.1153414249420166,
      "step": 54715
    },
    {
      "epoch": 0.000333953857421875,
      "step": 54715,
      "training_step_time": 0.37690091133117676
    },
    {
      "epoch": 0.0003339599609375,
      "model_forward_time": 0.11592411994934082,
      "step": 54716
    },
    {
      "epoch": 0.0003339599609375,
      "step": 54716,
      "training_step_time": 0.43547868728637695
    },
    {
      "epoch": 0.000333966064453125,
      "model_forward_time": 0.11530041694641113,
      "step": 54717
    },
    {
      "epoch": 0.000333966064453125,
      "step": 54717,
      "training_step_time": 0.41358518600463867
    },
    {
      "epoch": 0.00033397216796875,
      "model_forward_time": 0.11510419845581055,
      "step": 54718
    },
    {
      "epoch": 0.00033397216796875,
      "step": 54718,
      "training_step_time": 0.4173007011413574
    },
    {
      "epoch": 0.000333978271484375,
      "model_forward_time": 0.1150825023651123,
      "step": 54719
    },
    {
      "epoch": 0.000333978271484375,
      "step": 54719,
      "training_step_time": 0.44855761528015137
    },
    {
      "epoch": 0.000333984375,
      "grad_norm": 0.10213367640972137,
      "learning_rate": 2.102281115676258e-06,
      "loss": 0.0394,
      "step": 54720
    },
    {
      "epoch": 0.000333984375,
      "model_forward_time": 0.1147606372833252,
      "step": 54720
    },
    {
      "epoch": 0.000333984375,
      "step": 54720,
      "training_step_time": 0.48340773582458496
    },
    {
      "epoch": 0.000333990478515625,
      "model_forward_time": 0.11547303199768066,
      "step": 54721
    },
    {
      "epoch": 0.000333990478515625,
      "step": 54721,
      "training_step_time": 0.42134761810302734
    },
    {
      "epoch": 0.00033399658203125,
      "model_forward_time": 0.1159663200378418,
      "step": 54722
    },
    {
      "epoch": 0.00033399658203125,
      "step": 54722,
      "training_step_time": 0.4333817958831787
    },
    {
      "epoch": 0.000334002685546875,
      "model_forward_time": 0.11563777923583984,
      "step": 54723
    },
    {
      "epoch": 0.000334002685546875,
      "step": 54723,
      "training_step_time": 0.39275383949279785
    },
    {
      "epoch": 0.0003340087890625,
      "model_forward_time": 0.11494255065917969,
      "step": 54724
    },
    {
      "epoch": 0.0003340087890625,
      "step": 54724,
      "training_step_time": 0.4369173049926758
    },
    {
      "epoch": 0.000334014892578125,
      "model_forward_time": 0.11495757102966309,
      "step": 54725
    },
    {
      "epoch": 0.000334014892578125,
      "step": 54725,
      "training_step_time": 0.4136381149291992
    },
    {
      "epoch": 0.00033402099609375,
      "model_forward_time": 0.11514949798583984,
      "step": 54726
    },
    {
      "epoch": 0.00033402099609375,
      "step": 54726,
      "training_step_time": 0.4408128261566162
    },
    {
      "epoch": 0.000334027099609375,
      "model_forward_time": 0.11553692817687988,
      "step": 54727
    },
    {
      "epoch": 0.000334027099609375,
      "step": 54727,
      "training_step_time": 0.4014871120452881
    },
    {
      "epoch": 0.000334033203125,
      "model_forward_time": 0.11511063575744629,
      "step": 54728
    },
    {
      "epoch": 0.000334033203125,
      "step": 54728,
      "training_step_time": 0.39668941497802734
    },
    {
      "epoch": 0.000334039306640625,
      "model_forward_time": 0.11551213264465332,
      "step": 54729
    },
    {
      "epoch": 0.000334039306640625,
      "step": 54729,
      "training_step_time": 0.41913890838623047
    },
    {
      "epoch": 0.00033404541015625,
      "grad_norm": 0.07634830474853516,
      "learning_rate": 2.0943814828861762e-06,
      "loss": 0.0361,
      "step": 54730
    },
    {
      "epoch": 0.00033404541015625,
      "model_forward_time": 0.11594009399414062,
      "step": 54730
    },
    {
      "epoch": 0.00033404541015625,
      "step": 54730,
      "training_step_time": 0.39675283432006836
    },
    {
      "epoch": 0.000334051513671875,
      "model_forward_time": 0.11513233184814453,
      "step": 54731
    },
    {
      "epoch": 0.000334051513671875,
      "step": 54731,
      "training_step_time": 0.4223489761352539
    },
    {
      "epoch": 0.0003340576171875,
      "model_forward_time": 0.11526799201965332,
      "step": 54732
    },
    {
      "epoch": 0.0003340576171875,
      "step": 54732,
      "training_step_time": 0.40207362174987793
    },
    {
      "epoch": 0.000334063720703125,
      "model_forward_time": 0.11591267585754395,
      "step": 54733
    },
    {
      "epoch": 0.000334063720703125,
      "step": 54733,
      "training_step_time": 0.3783113956451416
    },
    {
      "epoch": 0.00033406982421875,
      "model_forward_time": 0.11587643623352051,
      "step": 54734
    },
    {
      "epoch": 0.00033406982421875,
      "step": 54734,
      "training_step_time": 0.3893108367919922
    },
    {
      "epoch": 0.000334075927734375,
      "model_forward_time": 0.11510968208312988,
      "step": 54735
    },
    {
      "epoch": 0.000334075927734375,
      "step": 54735,
      "training_step_time": 0.4996378421783447
    },
    {
      "epoch": 0.00033408203125,
      "model_forward_time": 0.11545395851135254,
      "step": 54736
    },
    {
      "epoch": 0.00033408203125,
      "step": 54736,
      "training_step_time": 0.4426116943359375
    },
    {
      "epoch": 0.000334088134765625,
      "model_forward_time": 0.11545300483703613,
      "step": 54737
    },
    {
      "epoch": 0.000334088134765625,
      "step": 54737,
      "training_step_time": 0.41244053840637207
    },
    {
      "epoch": 0.00033409423828125,
      "model_forward_time": 0.1153264045715332,
      "step": 54738
    },
    {
      "epoch": 0.00033409423828125,
      "step": 54738,
      "training_step_time": 0.4097747802734375
    },
    {
      "epoch": 0.000334100341796875,
      "model_forward_time": 0.11495709419250488,
      "step": 54739
    },
    {
      "epoch": 0.000334100341796875,
      "step": 54739,
      "training_step_time": 0.37955784797668457
    },
    {
      "epoch": 0.0003341064453125,
      "grad_norm": 0.11536569893360138,
      "learning_rate": 2.0864964025581135e-06,
      "loss": 0.0334,
      "step": 54740
    },
    {
      "epoch": 0.0003341064453125,
      "model_forward_time": 0.11489605903625488,
      "step": 54740
    },
    {
      "epoch": 0.0003341064453125,
      "step": 54740,
      "training_step_time": 0.3803229331970215
    },
    {
      "epoch": 0.000334112548828125,
      "model_forward_time": 0.11680340766906738,
      "step": 54741
    },
    {
      "epoch": 0.000334112548828125,
      "step": 54741,
      "training_step_time": 0.39856958389282227
    },
    {
      "epoch": 0.00033411865234375,
      "model_forward_time": 0.1151895523071289,
      "step": 54742
    },
    {
      "epoch": 0.00033411865234375,
      "step": 54742,
      "training_step_time": 0.38101673126220703
    },
    {
      "epoch": 0.000334124755859375,
      "model_forward_time": 0.11493945121765137,
      "step": 54743
    },
    {
      "epoch": 0.000334124755859375,
      "step": 54743,
      "training_step_time": 0.4410831928253174
    },
    {
      "epoch": 0.000334130859375,
      "model_forward_time": 0.11506104469299316,
      "step": 54744
    },
    {
      "epoch": 0.000334130859375,
      "step": 54744,
      "training_step_time": 0.42853808403015137
    },
    {
      "epoch": 0.000334136962890625,
      "model_forward_time": 0.11562848091125488,
      "step": 54745
    },
    {
      "epoch": 0.000334136962890625,
      "step": 54745,
      "training_step_time": 0.3945937156677246
    },
    {
      "epoch": 0.00033414306640625,
      "model_forward_time": 0.11510944366455078,
      "step": 54746
    },
    {
      "epoch": 0.00033414306640625,
      "step": 54746,
      "training_step_time": 0.3932018280029297
    },
    {
      "epoch": 0.000334149169921875,
      "model_forward_time": 0.11680102348327637,
      "step": 54747
    },
    {
      "epoch": 0.000334149169921875,
      "step": 54747,
      "training_step_time": 0.38429951667785645
    },
    {
      "epoch": 0.0003341552734375,
      "model_forward_time": 0.11533665657043457,
      "step": 54748
    },
    {
      "epoch": 0.0003341552734375,
      "step": 54748,
      "training_step_time": 0.497089147567749
    },
    {
      "epoch": 0.000334161376953125,
      "model_forward_time": 0.11549544334411621,
      "step": 54749
    },
    {
      "epoch": 0.000334161376953125,
      "step": 54749,
      "training_step_time": 0.4141206741333008
    },
    {
      "epoch": 0.00033416748046875,
      "grad_norm": 0.15636476874351501,
      "learning_rate": 2.0786258770873647e-06,
      "loss": 0.0377,
      "step": 54750
    },
    {
      "epoch": 0.00033416748046875,
      "model_forward_time": 0.11514663696289062,
      "step": 54750
    },
    {
      "epoch": 0.00033416748046875,
      "step": 54750,
      "training_step_time": 0.47844791412353516
    },
    {
      "epoch": 0.000334173583984375,
      "model_forward_time": 0.11474037170410156,
      "step": 54751
    },
    {
      "epoch": 0.000334173583984375,
      "step": 54751,
      "training_step_time": 0.3944385051727295
    },
    {
      "epoch": 0.0003341796875,
      "model_forward_time": 0.11615967750549316,
      "step": 54752
    },
    {
      "epoch": 0.0003341796875,
      "step": 54752,
      "training_step_time": 0.38361644744873047
    },
    {
      "epoch": 0.000334185791015625,
      "model_forward_time": 0.11460041999816895,
      "step": 54753
    },
    {
      "epoch": 0.000334185791015625,
      "step": 54753,
      "training_step_time": 0.3826272487640381
    },
    {
      "epoch": 0.00033419189453125,
      "model_forward_time": 0.11589789390563965,
      "step": 54754
    },
    {
      "epoch": 0.00033419189453125,
      "step": 54754,
      "training_step_time": 0.4383533000946045
    },
    {
      "epoch": 0.000334197998046875,
      "model_forward_time": 0.11583209037780762,
      "step": 54755
    },
    {
      "epoch": 0.000334197998046875,
      "step": 54755,
      "training_step_time": 0.3888843059539795
    },
    {
      "epoch": 0.0003342041015625,
      "model_forward_time": 0.11573505401611328,
      "step": 54756
    },
    {
      "epoch": 0.0003342041015625,
      "step": 54756,
      "training_step_time": 0.3839111328125
    },
    {
      "epoch": 0.000334210205078125,
      "model_forward_time": 0.11563420295715332,
      "step": 54757
    },
    {
      "epoch": 0.000334210205078125,
      "step": 54757,
      "training_step_time": 0.40146946907043457
    },
    {
      "epoch": 0.00033421630859375,
      "model_forward_time": 0.11559724807739258,
      "step": 54758
    },
    {
      "epoch": 0.00033421630859375,
      "step": 54758,
      "training_step_time": 0.4218604564666748
    },
    {
      "epoch": 0.000334222412109375,
      "model_forward_time": 0.11545562744140625,
      "step": 54759
    },
    {
      "epoch": 0.000334222412109375,
      "step": 54759,
      "training_step_time": 0.40349531173706055
    },
    {
      "epoch": 0.000334228515625,
      "grad_norm": 0.09960633516311646,
      "learning_rate": 2.0707699088647836e-06,
      "loss": 0.0327,
      "step": 54760
    },
    {
      "epoch": 0.000334228515625,
      "model_forward_time": 0.11512541770935059,
      "step": 54760
    },
    {
      "epoch": 0.000334228515625,
      "step": 54760,
      "training_step_time": 0.5052940845489502
    },
    {
      "epoch": 0.000334234619140625,
      "model_forward_time": 0.11512613296508789,
      "step": 54761
    },
    {
      "epoch": 0.000334234619140625,
      "step": 54761,
      "training_step_time": 0.39270472526550293
    },
    {
      "epoch": 0.00033424072265625,
      "model_forward_time": 0.11549592018127441,
      "step": 54762
    },
    {
      "epoch": 0.00033424072265625,
      "step": 54762,
      "training_step_time": 0.3901839256286621
    },
    {
      "epoch": 0.000334246826171875,
      "model_forward_time": 0.11531209945678711,
      "step": 54763
    },
    {
      "epoch": 0.000334246826171875,
      "step": 54763,
      "training_step_time": 0.39185667037963867
    },
    {
      "epoch": 0.0003342529296875,
      "model_forward_time": 0.11457180976867676,
      "step": 54764
    },
    {
      "epoch": 0.0003342529296875,
      "step": 54764,
      "training_step_time": 0.4457252025604248
    },
    {
      "epoch": 0.000334259033203125,
      "model_forward_time": 0.11561822891235352,
      "step": 54765
    },
    {
      "epoch": 0.000334259033203125,
      "step": 54765,
      "training_step_time": 0.48369383811950684
    },
    {
      "epoch": 0.00033426513671875,
      "model_forward_time": 0.11530041694641113,
      "step": 54766
    },
    {
      "epoch": 0.00033426513671875,
      "step": 54766,
      "training_step_time": 0.4604361057281494
    },
    {
      "epoch": 0.000334271240234375,
      "model_forward_time": 0.11586809158325195,
      "step": 54767
    },
    {
      "epoch": 0.000334271240234375,
      "step": 54767,
      "training_step_time": 0.41437625885009766
    },
    {
      "epoch": 0.00033427734375,
      "model_forward_time": 0.11457395553588867,
      "step": 54768
    },
    {
      "epoch": 0.00033427734375,
      "step": 54768,
      "training_step_time": 0.4465911388397217
    },
    {
      "epoch": 0.000334283447265625,
      "model_forward_time": 0.11544370651245117,
      "step": 54769
    },
    {
      "epoch": 0.000334283447265625,
      "step": 54769,
      "training_step_time": 0.41817331314086914
    },
    {
      "epoch": 0.00033428955078125,
      "grad_norm": 0.07846242934465408,
      "learning_rate": 2.062928500276812e-06,
      "loss": 0.0356,
      "step": 54770
    },
    {
      "epoch": 0.00033428955078125,
      "model_forward_time": 0.11489701271057129,
      "step": 54770
    },
    {
      "epoch": 0.00033428955078125,
      "step": 54770,
      "training_step_time": 0.3845696449279785
    },
    {
      "epoch": 0.000334295654296875,
      "model_forward_time": 0.1154170036315918,
      "step": 54771
    },
    {
      "epoch": 0.000334295654296875,
      "step": 54771,
      "training_step_time": 0.41143274307250977
    },
    {
      "epoch": 0.0003343017578125,
      "model_forward_time": 0.11538434028625488,
      "step": 54772
    },
    {
      "epoch": 0.0003343017578125,
      "step": 54772,
      "training_step_time": 0.49760866165161133
    },
    {
      "epoch": 0.000334307861328125,
      "model_forward_time": 0.11491250991821289,
      "step": 54773
    },
    {
      "epoch": 0.000334307861328125,
      "step": 54773,
      "training_step_time": 0.3839092254638672
    },
    {
      "epoch": 0.00033431396484375,
      "model_forward_time": 0.11587262153625488,
      "step": 54774
    },
    {
      "epoch": 0.00033431396484375,
      "step": 54774,
      "training_step_time": 0.38928818702697754
    },
    {
      "epoch": 0.000334320068359375,
      "model_forward_time": 0.1160578727722168,
      "step": 54775
    },
    {
      "epoch": 0.000334320068359375,
      "step": 54775,
      "training_step_time": 0.39475154876708984
    },
    {
      "epoch": 0.000334326171875,
      "model_forward_time": 0.11526918411254883,
      "step": 54776
    },
    {
      "epoch": 0.000334326171875,
      "step": 54776,
      "training_step_time": 0.38536953926086426
    },
    {
      "epoch": 0.000334332275390625,
      "model_forward_time": 0.11479926109313965,
      "step": 54777
    },
    {
      "epoch": 0.000334332275390625,
      "step": 54777,
      "training_step_time": 0.38700175285339355
    },
    {
      "epoch": 0.00033433837890625,
      "model_forward_time": 0.11527681350708008,
      "step": 54778
    },
    {
      "epoch": 0.00033433837890625,
      "step": 54778,
      "training_step_time": 0.6286585330963135
    },
    {
      "epoch": 0.000334344482421875,
      "model_forward_time": 0.11508059501647949,
      "step": 54779
    },
    {
      "epoch": 0.000334344482421875,
      "step": 54779,
      "training_step_time": 0.501673698425293
    },
    {
      "epoch": 0.0003343505859375,
      "grad_norm": 0.07748234272003174,
      "learning_rate": 2.0551016537054493e-06,
      "loss": 0.0317,
      "step": 54780
    },
    {
      "epoch": 0.0003343505859375,
      "model_forward_time": 0.11527395248413086,
      "step": 54780
    },
    {
      "epoch": 0.0003343505859375,
      "step": 54780,
      "training_step_time": 0.39607882499694824
    },
    {
      "epoch": 0.000334356689453125,
      "model_forward_time": 0.11442947387695312,
      "step": 54781
    },
    {
      "epoch": 0.000334356689453125,
      "step": 54781,
      "training_step_time": 0.40950846672058105
    },
    {
      "epoch": 0.00033436279296875,
      "model_forward_time": 0.11456918716430664,
      "step": 54782
    },
    {
      "epoch": 0.00033436279296875,
      "step": 54782,
      "training_step_time": 0.41147470474243164
    },
    {
      "epoch": 0.000334368896484375,
      "model_forward_time": 0.1146233081817627,
      "step": 54783
    },
    {
      "epoch": 0.000334368896484375,
      "step": 54783,
      "training_step_time": 0.39969706535339355
    },
    {
      "epoch": 0.000334375,
      "model_forward_time": 0.1150364875793457,
      "step": 54784
    },
    {
      "epoch": 0.000334375,
      "step": 54784,
      "training_step_time": 0.5233292579650879
    },
    {
      "epoch": 0.000334381103515625,
      "model_forward_time": 0.11501216888427734,
      "step": 54785
    },
    {
      "epoch": 0.000334381103515625,
      "step": 54785,
      "training_step_time": 0.39194488525390625
    },
    {
      "epoch": 0.00033438720703125,
      "model_forward_time": 0.11501836776733398,
      "step": 54786
    },
    {
      "epoch": 0.00033438720703125,
      "step": 54786,
      "training_step_time": 0.4005861282348633
    },
    {
      "epoch": 0.000334393310546875,
      "model_forward_time": 0.11509370803833008,
      "step": 54787
    },
    {
      "epoch": 0.000334393310546875,
      "step": 54787,
      "training_step_time": 0.39014267921447754
    },
    {
      "epoch": 0.0003343994140625,
      "model_forward_time": 0.11458635330200195,
      "step": 54788
    },
    {
      "epoch": 0.0003343994140625,
      "step": 54788,
      "training_step_time": 0.40378236770629883
    },
    {
      "epoch": 0.000334405517578125,
      "model_forward_time": 0.1157228946685791,
      "step": 54789
    },
    {
      "epoch": 0.000334405517578125,
      "step": 54789,
      "training_step_time": 0.3835182189941406
    },
    {
      "epoch": 0.00033441162109375,
      "grad_norm": 0.09169542789459229,
      "learning_rate": 2.047289371528299e-06,
      "loss": 0.0356,
      "step": 54790
    },
    {
      "epoch": 0.00033441162109375,
      "model_forward_time": 0.1153724193572998,
      "step": 54790
    },
    {
      "epoch": 0.00033441162109375,
      "step": 54790,
      "training_step_time": 0.5827713012695312
    },
    {
      "epoch": 0.000334417724609375,
      "model_forward_time": 0.11469507217407227,
      "step": 54791
    },
    {
      "epoch": 0.000334417724609375,
      "step": 54791,
      "training_step_time": 0.5054240226745605
    },
    {
      "epoch": 0.000334423828125,
      "model_forward_time": 0.1150662899017334,
      "step": 54792
    },
    {
      "epoch": 0.000334423828125,
      "step": 54792,
      "training_step_time": 0.4271721839904785
    },
    {
      "epoch": 0.000334429931640625,
      "model_forward_time": 0.11442923545837402,
      "step": 54793
    },
    {
      "epoch": 0.000334429931640625,
      "step": 54793,
      "training_step_time": 0.49779295921325684
    },
    {
      "epoch": 0.00033443603515625,
      "model_forward_time": 0.11446428298950195,
      "step": 54794
    },
    {
      "epoch": 0.00033443603515625,
      "step": 54794,
      "training_step_time": 0.41538023948669434
    },
    {
      "epoch": 0.000334442138671875,
      "model_forward_time": 0.11440563201904297,
      "step": 54795
    },
    {
      "epoch": 0.000334442138671875,
      "step": 54795,
      "training_step_time": 0.471146821975708
    },
    {
      "epoch": 0.0003344482421875,
      "model_forward_time": 0.11436748504638672,
      "step": 54796
    },
    {
      "epoch": 0.0003344482421875,
      "step": 54796,
      "training_step_time": 0.42981815338134766
    },
    {
      "epoch": 0.000334454345703125,
      "model_forward_time": 0.11439728736877441,
      "step": 54797
    },
    {
      "epoch": 0.000334454345703125,
      "step": 54797,
      "training_step_time": 0.4029254913330078
    },
    {
      "epoch": 0.00033446044921875,
      "model_forward_time": 0.11510467529296875,
      "step": 54798
    },
    {
      "epoch": 0.00033446044921875,
      "step": 54798,
      "training_step_time": 0.400815486907959
    },
    {
      "epoch": 0.000334466552734375,
      "model_forward_time": 0.11463260650634766,
      "step": 54799
    },
    {
      "epoch": 0.000334466552734375,
      "step": 54799,
      "training_step_time": 0.39593958854675293
    },
    {
      "epoch": 0.00033447265625,
      "grad_norm": 0.09561388939619064,
      "learning_rate": 2.0394916561185083e-06,
      "loss": 0.0395,
      "step": 54800
    },
    {
      "epoch": 0.00033447265625,
      "model_forward_time": 0.11459565162658691,
      "step": 54800
    },
    {
      "epoch": 0.00033447265625,
      "step": 54800,
      "training_step_time": 0.40740442276000977
    },
    {
      "epoch": 0.000334478759765625,
      "model_forward_time": 0.11457204818725586,
      "step": 54801
    },
    {
      "epoch": 0.000334478759765625,
      "step": 54801,
      "training_step_time": 0.39763307571411133
    },
    {
      "epoch": 0.00033448486328125,
      "model_forward_time": 0.11533117294311523,
      "step": 54802
    },
    {
      "epoch": 0.00033448486328125,
      "step": 54802,
      "training_step_time": 0.40337657928466797
    },
    {
      "epoch": 0.000334490966796875,
      "model_forward_time": 0.11626791954040527,
      "step": 54803
    },
    {
      "epoch": 0.000334490966796875,
      "step": 54803,
      "training_step_time": 0.41910219192504883
    },
    {
      "epoch": 0.0003344970703125,
      "model_forward_time": 0.11514401435852051,
      "step": 54804
    },
    {
      "epoch": 0.0003344970703125,
      "step": 54804,
      "training_step_time": 0.4046008586883545
    },
    {
      "epoch": 0.000334503173828125,
      "model_forward_time": 0.11591482162475586,
      "step": 54805
    },
    {
      "epoch": 0.000334503173828125,
      "step": 54805,
      "training_step_time": 0.39582014083862305
    },
    {
      "epoch": 0.00033450927734375,
      "model_forward_time": 0.11533188819885254,
      "step": 54806
    },
    {
      "epoch": 0.00033450927734375,
      "step": 54806,
      "training_step_time": 0.42070460319519043
    },
    {
      "epoch": 0.000334515380859375,
      "model_forward_time": 0.11558890342712402,
      "step": 54807
    },
    {
      "epoch": 0.000334515380859375,
      "step": 54807,
      "training_step_time": 0.4441382884979248
    },
    {
      "epoch": 0.000334521484375,
      "model_forward_time": 0.11472249031066895,
      "step": 54808
    },
    {
      "epoch": 0.000334521484375,
      "step": 54808,
      "training_step_time": 0.5088872909545898
    },
    {
      "epoch": 0.000334527587890625,
      "model_forward_time": 0.11652827262878418,
      "step": 54809
    },
    {
      "epoch": 0.000334527587890625,
      "step": 54809,
      "training_step_time": 0.49683260917663574
    },
    {
      "epoch": 0.00033453369140625,
      "grad_norm": 0.13614411652088165,
      "learning_rate": 2.0317085098448372e-06,
      "loss": 0.0375,
      "step": 54810
    },
    {
      "epoch": 0.00033453369140625,
      "model_forward_time": 0.11479902267456055,
      "step": 54810
    },
    {
      "epoch": 0.00033453369140625,
      "step": 54810,
      "training_step_time": 0.4647088050842285
    },
    {
      "epoch": 0.000334539794921875,
      "model_forward_time": 0.1149301528930664,
      "step": 54811
    },
    {
      "epoch": 0.000334539794921875,
      "step": 54811,
      "training_step_time": 0.4902627468109131
    },
    {
      "epoch": 0.0003345458984375,
      "model_forward_time": 0.11479544639587402,
      "step": 54812
    },
    {
      "epoch": 0.0003345458984375,
      "step": 54812,
      "training_step_time": 0.3901686668395996
    },
    {
      "epoch": 0.000334552001953125,
      "model_forward_time": 0.11454010009765625,
      "step": 54813
    },
    {
      "epoch": 0.000334552001953125,
      "step": 54813,
      "training_step_time": 0.3903629779815674
    },
    {
      "epoch": 0.00033455810546875,
      "model_forward_time": 0.11474466323852539,
      "step": 54814
    },
    {
      "epoch": 0.00033455810546875,
      "step": 54814,
      "training_step_time": 0.3966789245605469
    },
    {
      "epoch": 0.000334564208984375,
      "model_forward_time": 0.11472320556640625,
      "step": 54815
    },
    {
      "epoch": 0.000334564208984375,
      "step": 54815,
      "training_step_time": 0.38986921310424805
    },
    {
      "epoch": 0.0003345703125,
      "model_forward_time": 0.11526298522949219,
      "step": 54816
    },
    {
      "epoch": 0.0003345703125,
      "step": 54816,
      "training_step_time": 0.399120569229126
    },
    {
      "epoch": 0.000334576416015625,
      "model_forward_time": 0.11501312255859375,
      "step": 54817
    },
    {
      "epoch": 0.000334576416015625,
      "step": 54817,
      "training_step_time": 0.40081787109375
    },
    {
      "epoch": 0.00033458251953125,
      "model_forward_time": 0.11499905586242676,
      "step": 54818
    },
    {
      "epoch": 0.00033458251953125,
      "step": 54818,
      "training_step_time": 0.39925622940063477
    },
    {
      "epoch": 0.000334588623046875,
      "model_forward_time": 0.11467361450195312,
      "step": 54819
    },
    {
      "epoch": 0.000334588623046875,
      "step": 54819,
      "training_step_time": 0.39313316345214844
    },
    {
      "epoch": 0.0003345947265625,
      "grad_norm": 0.10847268253564835,
      "learning_rate": 2.0239399350715895e-06,
      "loss": 0.0343,
      "step": 54820
    },
    {
      "epoch": 0.0003345947265625,
      "model_forward_time": 0.1157538890838623,
      "step": 54820
    },
    {
      "epoch": 0.0003345947265625,
      "step": 54820,
      "training_step_time": 0.5196683406829834
    },
    {
      "epoch": 0.000334600830078125,
      "model_forward_time": 0.11519908905029297,
      "step": 54821
    },
    {
      "epoch": 0.000334600830078125,
      "step": 54821,
      "training_step_time": 0.4199793338775635
    },
    {
      "epoch": 0.00033460693359375,
      "model_forward_time": 0.11548614501953125,
      "step": 54822
    },
    {
      "epoch": 0.00033460693359375,
      "step": 54822,
      "training_step_time": 0.41931867599487305
    },
    {
      "epoch": 0.000334613037109375,
      "model_forward_time": 0.11536526679992676,
      "step": 54823
    },
    {
      "epoch": 0.000334613037109375,
      "step": 54823,
      "training_step_time": 0.48316335678100586
    },
    {
      "epoch": 0.000334619140625,
      "model_forward_time": 0.11515283584594727,
      "step": 54824
    },
    {
      "epoch": 0.000334619140625,
      "step": 54824,
      "training_step_time": 0.43454432487487793
    },
    {
      "epoch": 0.000334625244140625,
      "model_forward_time": 0.11485433578491211,
      "step": 54825
    },
    {
      "epoch": 0.000334625244140625,
      "step": 54825,
      "training_step_time": 0.4209010601043701
    },
    {
      "epoch": 0.00033463134765625,
      "model_forward_time": 0.1180722713470459,
      "step": 54826
    },
    {
      "epoch": 0.00033463134765625,
      "step": 54826,
      "training_step_time": 0.43802428245544434
    },
    {
      "epoch": 0.000334637451171875,
      "model_forward_time": 0.11516261100769043,
      "step": 54827
    },
    {
      "epoch": 0.000334637451171875,
      "step": 54827,
      "training_step_time": 0.40070033073425293
    },
    {
      "epoch": 0.0003346435546875,
      "model_forward_time": 0.11506533622741699,
      "step": 54828
    },
    {
      "epoch": 0.0003346435546875,
      "step": 54828,
      "training_step_time": 0.3958148956298828
    },
    {
      "epoch": 0.000334649658203125,
      "model_forward_time": 0.11461830139160156,
      "step": 54829
    },
    {
      "epoch": 0.000334649658203125,
      "step": 54829,
      "training_step_time": 0.39296412467956543
    },
    {
      "epoch": 0.00033465576171875,
      "grad_norm": 0.07935141772031784,
      "learning_rate": 2.0161859341586597e-06,
      "loss": 0.0361,
      "step": 54830
    },
    {
      "epoch": 0.00033465576171875,
      "model_forward_time": 0.11487722396850586,
      "step": 54830
    },
    {
      "epoch": 0.00033465576171875,
      "step": 54830,
      "training_step_time": 0.3887062072753906
    },
    {
      "epoch": 0.000334661865234375,
      "model_forward_time": 0.11510920524597168,
      "step": 54831
    },
    {
      "epoch": 0.000334661865234375,
      "step": 54831,
      "training_step_time": 0.39830875396728516
    },
    {
      "epoch": 0.00033466796875,
      "model_forward_time": 0.11477351188659668,
      "step": 54832
    },
    {
      "epoch": 0.00033466796875,
      "step": 54832,
      "training_step_time": 0.6114418506622314
    },
    {
      "epoch": 0.000334674072265625,
      "model_forward_time": 0.11463046073913574,
      "step": 54833
    },
    {
      "epoch": 0.000334674072265625,
      "step": 54833,
      "training_step_time": 0.3936948776245117
    },
    {
      "epoch": 0.00033468017578125,
      "model_forward_time": 0.11553525924682617,
      "step": 54834
    },
    {
      "epoch": 0.00033468017578125,
      "step": 54834,
      "training_step_time": 0.45060205459594727
    },
    {
      "epoch": 0.000334686279296875,
      "model_forward_time": 0.11490321159362793,
      "step": 54835
    },
    {
      "epoch": 0.000334686279296875,
      "step": 54835,
      "training_step_time": 0.3673582077026367
    },
    {
      "epoch": 0.0003346923828125,
      "model_forward_time": 0.1153097152709961,
      "step": 54836
    },
    {
      "epoch": 0.0003346923828125,
      "step": 54836,
      "training_step_time": 0.4738321304321289
    },
    {
      "epoch": 0.000334698486328125,
      "model_forward_time": 0.11460232734680176,
      "step": 54837
    },
    {
      "epoch": 0.000334698486328125,
      "step": 54837,
      "training_step_time": 0.47684812545776367
    },
    {
      "epoch": 0.00033470458984375,
      "model_forward_time": 0.11526298522949219,
      "step": 54838
    },
    {
      "epoch": 0.00033470458984375,
      "step": 54838,
      "training_step_time": 0.4415445327758789
    },
    {
      "epoch": 0.000334710693359375,
      "model_forward_time": 0.11468243598937988,
      "step": 54839
    },
    {
      "epoch": 0.000334710693359375,
      "step": 54839,
      "training_step_time": 0.440061092376709
    },
    {
      "epoch": 0.000334716796875,
      "grad_norm": 0.09239395707845688,
      "learning_rate": 2.008446509461498e-06,
      "loss": 0.0356,
      "step": 54840
    },
    {
      "epoch": 0.000334716796875,
      "model_forward_time": 0.11539316177368164,
      "step": 54840
    },
    {
      "epoch": 0.000334716796875,
      "step": 54840,
      "training_step_time": 0.3830091953277588
    },
    {
      "epoch": 0.000334722900390625,
      "model_forward_time": 0.11508035659790039,
      "step": 54841
    },
    {
      "epoch": 0.000334722900390625,
      "step": 54841,
      "training_step_time": 0.38152551651000977
    },
    {
      "epoch": 0.00033472900390625,
      "model_forward_time": 0.1143498420715332,
      "step": 54842
    },
    {
      "epoch": 0.00033472900390625,
      "step": 54842,
      "training_step_time": 0.3807697296142578
    },
    {
      "epoch": 0.000334735107421875,
      "model_forward_time": 0.11530566215515137,
      "step": 54843
    },
    {
      "epoch": 0.000334735107421875,
      "step": 54843,
      "training_step_time": 0.3843045234680176
    },
    {
      "epoch": 0.0003347412109375,
      "model_forward_time": 0.11467528343200684,
      "step": 54844
    },
    {
      "epoch": 0.0003347412109375,
      "step": 54844,
      "training_step_time": 0.38581037521362305
    },
    {
      "epoch": 0.000334747314453125,
      "model_forward_time": 0.11543750762939453,
      "step": 54845
    },
    {
      "epoch": 0.000334747314453125,
      "step": 54845,
      "training_step_time": 0.39990806579589844
    },
    {
      "epoch": 0.00033475341796875,
      "model_forward_time": 0.11513471603393555,
      "step": 54846
    },
    {
      "epoch": 0.00033475341796875,
      "step": 54846,
      "training_step_time": 0.4000735282897949
    },
    {
      "epoch": 0.000334759521484375,
      "model_forward_time": 0.11544060707092285,
      "step": 54847
    },
    {
      "epoch": 0.000334759521484375,
      "step": 54847,
      "training_step_time": 0.3933391571044922
    },
    {
      "epoch": 0.000334765625,
      "model_forward_time": 0.11559152603149414,
      "step": 54848
    },
    {
      "epoch": 0.000334765625,
      "step": 54848,
      "training_step_time": 0.39441728591918945
    },
    {
      "epoch": 0.000334771728515625,
      "model_forward_time": 0.11558103561401367,
      "step": 54849
    },
    {
      "epoch": 0.000334771728515625,
      "step": 54849,
      "training_step_time": 0.44933104515075684
    },
    {
      "epoch": 0.00033477783203125,
      "grad_norm": 0.08398333191871643,
      "learning_rate": 2.00072166333114e-06,
      "loss": 0.0381,
      "step": 54850
    },
    {
      "epoch": 0.00033477783203125,
      "model_forward_time": 0.1151895523071289,
      "step": 54850
    },
    {
      "epoch": 0.00033477783203125,
      "step": 54850,
      "training_step_time": 0.5623059272766113
    },
    {
      "epoch": 0.000334783935546875,
      "model_forward_time": 0.11448502540588379,
      "step": 54851
    },
    {
      "epoch": 0.000334783935546875,
      "step": 54851,
      "training_step_time": 0.4994540214538574
    },
    {
      "epoch": 0.0003347900390625,
      "model_forward_time": 0.11473441123962402,
      "step": 54852
    },
    {
      "epoch": 0.0003347900390625,
      "step": 54852,
      "training_step_time": 0.4149494171142578
    },
    {
      "epoch": 0.000334796142578125,
      "model_forward_time": 0.1146242618560791,
      "step": 54853
    },
    {
      "epoch": 0.000334796142578125,
      "step": 54853,
      "training_step_time": 0.45427632331848145
    },
    {
      "epoch": 0.00033480224609375,
      "model_forward_time": 0.11462736129760742,
      "step": 54854
    },
    {
      "epoch": 0.00033480224609375,
      "step": 54854,
      "training_step_time": 0.39476990699768066
    },
    {
      "epoch": 0.000334808349609375,
      "model_forward_time": 0.11489486694335938,
      "step": 54855
    },
    {
      "epoch": 0.000334808349609375,
      "step": 54855,
      "training_step_time": 0.39135146141052246
    },
    {
      "epoch": 0.000334814453125,
      "model_forward_time": 0.11497259140014648,
      "step": 54856
    },
    {
      "epoch": 0.000334814453125,
      "step": 54856,
      "training_step_time": 0.38434290885925293
    },
    {
      "epoch": 0.000334820556640625,
      "model_forward_time": 0.1146690845489502,
      "step": 54857
    },
    {
      "epoch": 0.000334820556640625,
      "step": 54857,
      "training_step_time": 0.3861069679260254
    },
    {
      "epoch": 0.00033482666015625,
      "model_forward_time": 0.11534309387207031,
      "step": 54858
    },
    {
      "epoch": 0.00033482666015625,
      "step": 54858,
      "training_step_time": 0.40335750579833984
    },
    {
      "epoch": 0.000334832763671875,
      "model_forward_time": 0.11534571647644043,
      "step": 54859
    },
    {
      "epoch": 0.000334832763671875,
      "step": 54859,
      "training_step_time": 0.3969864845275879
    },
    {
      "epoch": 0.0003348388671875,
      "grad_norm": 0.11548636108636856,
      "learning_rate": 1.9930113981142028e-06,
      "loss": 0.0373,
      "step": 54860
    },
    {
      "epoch": 0.0003348388671875,
      "model_forward_time": 0.1165151596069336,
      "step": 54860
    },
    {
      "epoch": 0.0003348388671875,
      "step": 54860,
      "training_step_time": 0.4019958972930908
    },
    {
      "epoch": 0.000334844970703125,
      "model_forward_time": 0.11516618728637695,
      "step": 54861
    },
    {
      "epoch": 0.000334844970703125,
      "step": 54861,
      "training_step_time": 0.38986897468566895
    },
    {
      "epoch": 0.00033485107421875,
      "model_forward_time": 0.11465835571289062,
      "step": 54862
    },
    {
      "epoch": 0.00033485107421875,
      "step": 54862,
      "training_step_time": 0.6510646343231201
    },
    {
      "epoch": 0.000334857177734375,
      "model_forward_time": 0.11415791511535645,
      "step": 54863
    },
    {
      "epoch": 0.000334857177734375,
      "step": 54863,
      "training_step_time": 0.47521138191223145
    },
    {
      "epoch": 0.00033486328125,
      "model_forward_time": 0.1150515079498291,
      "step": 54864
    },
    {
      "epoch": 0.00033486328125,
      "step": 54864,
      "training_step_time": 0.4293668270111084
    },
    {
      "epoch": 0.000334869384765625,
      "model_forward_time": 0.11479473114013672,
      "step": 54865
    },
    {
      "epoch": 0.000334869384765625,
      "step": 54865,
      "training_step_time": 0.4552476406097412
    },
    {
      "epoch": 0.00033487548828125,
      "model_forward_time": 0.11442136764526367,
      "step": 54866
    },
    {
      "epoch": 0.00033487548828125,
      "step": 54866,
      "training_step_time": 0.401259183883667
    },
    {
      "epoch": 0.000334881591796875,
      "model_forward_time": 0.11398553848266602,
      "step": 54867
    },
    {
      "epoch": 0.000334881591796875,
      "step": 54867,
      "training_step_time": 0.41852712631225586
    },
    {
      "epoch": 0.0003348876953125,
      "model_forward_time": 0.11398172378540039,
      "step": 54868
    },
    {
      "epoch": 0.0003348876953125,
      "step": 54868,
      "training_step_time": 0.4239816665649414
    },
    {
      "epoch": 0.000334893798828125,
      "model_forward_time": 0.11480140686035156,
      "step": 54869
    },
    {
      "epoch": 0.000334893798828125,
      "step": 54869,
      "training_step_time": 0.3945443630218506
    },
    {
      "epoch": 0.00033489990234375,
      "grad_norm": 0.0898079052567482,
      "learning_rate": 1.985315716152847e-06,
      "loss": 0.0395,
      "step": 54870
    },
    {
      "epoch": 0.00033489990234375,
      "model_forward_time": 0.1142282485961914,
      "step": 54870
    },
    {
      "epoch": 0.00033489990234375,
      "step": 54870,
      "training_step_time": 0.39949917793273926
    },
    {
      "epoch": 0.000334906005859375,
      "model_forward_time": 0.11623096466064453,
      "step": 54871
    },
    {
      "epoch": 0.000334906005859375,
      "step": 54871,
      "training_step_time": 0.3966963291168213
    },
    {
      "epoch": 0.000334912109375,
      "model_forward_time": 0.11510848999023438,
      "step": 54872
    },
    {
      "epoch": 0.000334912109375,
      "step": 54872,
      "training_step_time": 0.39417171478271484
    },
    {
      "epoch": 0.000334918212890625,
      "model_forward_time": 0.1150355339050293,
      "step": 54873
    },
    {
      "epoch": 0.000334918212890625,
      "step": 54873,
      "training_step_time": 0.3958113193511963
    },
    {
      "epoch": 0.00033492431640625,
      "model_forward_time": 0.1154634952545166,
      "step": 54874
    },
    {
      "epoch": 0.00033492431640625,
      "step": 54874,
      "training_step_time": 0.642056941986084
    },
    {
      "epoch": 0.000334930419921875,
      "model_forward_time": 0.11452579498291016,
      "step": 54875
    },
    {
      "epoch": 0.000334930419921875,
      "step": 54875,
      "training_step_time": 0.4126310348510742
    },
    {
      "epoch": 0.0003349365234375,
      "model_forward_time": 0.11514878273010254,
      "step": 54876
    },
    {
      "epoch": 0.0003349365234375,
      "step": 54876,
      "training_step_time": 0.3961367607116699
    },
    {
      "epoch": 0.000334942626953125,
      "model_forward_time": 0.11462044715881348,
      "step": 54877
    },
    {
      "epoch": 0.000334942626953125,
      "step": 54877,
      "training_step_time": 0.44356608390808105
    },
    {
      "epoch": 0.00033494873046875,
      "model_forward_time": 0.11450529098510742,
      "step": 54878
    },
    {
      "epoch": 0.00033494873046875,
      "step": 54878,
      "training_step_time": 0.3979835510253906
    },
    {
      "epoch": 0.000334954833984375,
      "model_forward_time": 0.11501860618591309,
      "step": 54879
    },
    {
      "epoch": 0.000334954833984375,
      "step": 54879,
      "training_step_time": 0.5030031204223633
    },
    {
      "epoch": 0.0003349609375,
      "grad_norm": 0.0972793847322464,
      "learning_rate": 1.9776346197848296e-06,
      "loss": 0.0367,
      "step": 54880
    },
    {
      "epoch": 0.0003349609375,
      "model_forward_time": 0.11464047431945801,
      "step": 54880
    },
    {
      "epoch": 0.0003349609375,
      "step": 54880,
      "training_step_time": 0.43610334396362305
    },
    {
      "epoch": 0.000334967041015625,
      "model_forward_time": 0.11463117599487305,
      "step": 54881
    },
    {
      "epoch": 0.000334967041015625,
      "step": 54881,
      "training_step_time": 0.4507129192352295
    },
    {
      "epoch": 0.00033497314453125,
      "model_forward_time": 0.11455512046813965,
      "step": 54882
    },
    {
      "epoch": 0.00033497314453125,
      "step": 54882,
      "training_step_time": 0.39269256591796875
    },
    {
      "epoch": 0.000334979248046875,
      "model_forward_time": 0.1147010326385498,
      "step": 54883
    },
    {
      "epoch": 0.000334979248046875,
      "step": 54883,
      "training_step_time": 0.41170239448547363
    },
    {
      "epoch": 0.0003349853515625,
      "model_forward_time": 0.11490821838378906,
      "step": 54884
    },
    {
      "epoch": 0.0003349853515625,
      "step": 54884,
      "training_step_time": 0.3875002861022949
    },
    {
      "epoch": 0.000334991455078125,
      "model_forward_time": 0.11491966247558594,
      "step": 54885
    },
    {
      "epoch": 0.000334991455078125,
      "step": 54885,
      "training_step_time": 0.400958776473999
    },
    {
      "epoch": 0.00033499755859375,
      "model_forward_time": 0.11522126197814941,
      "step": 54886
    },
    {
      "epoch": 0.00033499755859375,
      "step": 54886,
      "training_step_time": 0.47286200523376465
    },
    {
      "epoch": 0.000335003662109375,
      "model_forward_time": 0.11464953422546387,
      "step": 54887
    },
    {
      "epoch": 0.000335003662109375,
      "step": 54887,
      "training_step_time": 0.39266443252563477
    },
    {
      "epoch": 0.000335009765625,
      "model_forward_time": 0.11522674560546875,
      "step": 54888
    },
    {
      "epoch": 0.000335009765625,
      "step": 54888,
      "training_step_time": 0.3997020721435547
    },
    {
      "epoch": 0.000335015869140625,
      "model_forward_time": 0.11536383628845215,
      "step": 54889
    },
    {
      "epoch": 0.000335015869140625,
      "step": 54889,
      "training_step_time": 0.39738893508911133
    },
    {
      "epoch": 0.00033502197265625,
      "grad_norm": 0.10673695057630539,
      "learning_rate": 1.96996811134344e-06,
      "loss": 0.0448,
      "step": 54890
    },
    {
      "epoch": 0.00033502197265625,
      "model_forward_time": 0.11546158790588379,
      "step": 54890
    },
    {
      "epoch": 0.00033502197265625,
      "step": 54890,
      "training_step_time": 0.45142674446105957
    },
    {
      "epoch": 0.000335028076171875,
      "model_forward_time": 0.11460399627685547,
      "step": 54891
    },
    {
      "epoch": 0.000335028076171875,
      "step": 54891,
      "training_step_time": 0.39339566230773926
    },
    {
      "epoch": 0.0003350341796875,
      "model_forward_time": 0.11591100692749023,
      "step": 54892
    },
    {
      "epoch": 0.0003350341796875,
      "step": 54892,
      "training_step_time": 0.5069253444671631
    },
    {
      "epoch": 0.000335040283203125,
      "model_forward_time": 0.11467409133911133,
      "step": 54893
    },
    {
      "epoch": 0.000335040283203125,
      "step": 54893,
      "training_step_time": 0.4685328006744385
    },
    {
      "epoch": 0.00033504638671875,
      "model_forward_time": 0.11428570747375488,
      "step": 54894
    },
    {
      "epoch": 0.00033504638671875,
      "step": 54894,
      "training_step_time": 0.45320582389831543
    },
    {
      "epoch": 0.000335052490234375,
      "model_forward_time": 0.11485815048217773,
      "step": 54895
    },
    {
      "epoch": 0.000335052490234375,
      "step": 54895,
      "training_step_time": 0.4382483959197998
    },
    {
      "epoch": 0.00033505859375,
      "model_forward_time": 0.11469340324401855,
      "step": 54896
    },
    {
      "epoch": 0.00033505859375,
      "step": 54896,
      "training_step_time": 0.3897867202758789
    },
    {
      "epoch": 0.000335064697265625,
      "model_forward_time": 0.11423802375793457,
      "step": 54897
    },
    {
      "epoch": 0.000335064697265625,
      "step": 54897,
      "training_step_time": 0.39458298683166504
    },
    {
      "epoch": 0.00033507080078125,
      "model_forward_time": 0.11493754386901855,
      "step": 54898
    },
    {
      "epoch": 0.00033507080078125,
      "step": 54898,
      "training_step_time": 0.46408939361572266
    },
    {
      "epoch": 0.000335076904296875,
      "model_forward_time": 0.11470222473144531,
      "step": 54899
    },
    {
      "epoch": 0.000335076904296875,
      "step": 54899,
      "training_step_time": 0.41733574867248535
    },
    {
      "epoch": 0.0003350830078125,
      "grad_norm": 0.09414652734994888,
      "learning_rate": 1.962316193157593e-06,
      "loss": 0.0349,
      "step": 54900
    },
    {
      "epoch": 0.0003350830078125,
      "model_forward_time": 0.11473488807678223,
      "step": 54900
    },
    {
      "epoch": 0.0003350830078125,
      "step": 54900,
      "training_step_time": 0.3947305679321289
    },
    {
      "epoch": 0.000335089111328125,
      "model_forward_time": 0.11536931991577148,
      "step": 54901
    },
    {
      "epoch": 0.000335089111328125,
      "step": 54901,
      "training_step_time": 0.39354658126831055
    },
    {
      "epoch": 0.00033509521484375,
      "model_forward_time": 0.11468195915222168,
      "step": 54902
    },
    {
      "epoch": 0.00033509521484375,
      "step": 54902,
      "training_step_time": 0.4062621593475342
    },
    {
      "epoch": 0.000335101318359375,
      "model_forward_time": 0.11562442779541016,
      "step": 54903
    },
    {
      "epoch": 0.000335101318359375,
      "step": 54903,
      "training_step_time": 0.3898625373840332
    },
    {
      "epoch": 0.000335107421875,
      "model_forward_time": 0.11517882347106934,
      "step": 54904
    },
    {
      "epoch": 0.000335107421875,
      "step": 54904,
      "training_step_time": 0.6011853218078613
    },
    {
      "epoch": 0.000335113525390625,
      "model_forward_time": 0.11454319953918457,
      "step": 54905
    },
    {
      "epoch": 0.000335113525390625,
      "step": 54905,
      "training_step_time": 0.4177851676940918
    },
    {
      "epoch": 0.00033511962890625,
      "model_forward_time": 0.11494159698486328,
      "step": 54906
    },
    {
      "epoch": 0.00033511962890625,
      "step": 54906,
      "training_step_time": 0.4141669273376465
    },
    {
      "epoch": 0.000335125732421875,
      "model_forward_time": 0.11578130722045898,
      "step": 54907
    },
    {
      "epoch": 0.000335125732421875,
      "step": 54907,
      "training_step_time": 0.4275510311126709
    },
    {
      "epoch": 0.0003351318359375,
      "model_forward_time": 0.11521029472351074,
      "step": 54908
    },
    {
      "epoch": 0.0003351318359375,
      "step": 54908,
      "training_step_time": 0.4713613986968994
    },
    {
      "epoch": 0.000335137939453125,
      "model_forward_time": 0.11528229713439941,
      "step": 54909
    },
    {
      "epoch": 0.000335137939453125,
      "step": 54909,
      "training_step_time": 0.46448779106140137
    },
    {
      "epoch": 0.00033514404296875,
      "grad_norm": 0.08187404274940491,
      "learning_rate": 1.9546788675517236e-06,
      "loss": 0.0371,
      "step": 54910
    },
    {
      "epoch": 0.00033514404296875,
      "model_forward_time": 0.11505246162414551,
      "step": 54910
    },
    {
      "epoch": 0.00033514404296875,
      "step": 54910,
      "training_step_time": 0.39913487434387207
    },
    {
      "epoch": 0.000335150146484375,
      "model_forward_time": 0.11513757705688477,
      "step": 54911
    },
    {
      "epoch": 0.000335150146484375,
      "step": 54911,
      "training_step_time": 0.38787221908569336
    },
    {
      "epoch": 0.00033515625,
      "model_forward_time": 0.11507415771484375,
      "step": 54912
    },
    {
      "epoch": 0.00033515625,
      "step": 54912,
      "training_step_time": 0.4001483917236328
    },
    {
      "epoch": 0.000335162353515625,
      "model_forward_time": 0.11553573608398438,
      "step": 54913
    },
    {
      "epoch": 0.000335162353515625,
      "step": 54913,
      "training_step_time": 0.3948404788970947
    },
    {
      "epoch": 0.00033516845703125,
      "model_forward_time": 0.11499357223510742,
      "step": 54914
    },
    {
      "epoch": 0.00033516845703125,
      "step": 54914,
      "training_step_time": 0.40272021293640137
    },
    {
      "epoch": 0.000335174560546875,
      "model_forward_time": 0.1144721508026123,
      "step": 54915
    },
    {
      "epoch": 0.000335174560546875,
      "step": 54915,
      "training_step_time": 0.3994109630584717
    },
    {
      "epoch": 0.0003351806640625,
      "model_forward_time": 0.1159980297088623,
      "step": 54916
    },
    {
      "epoch": 0.0003351806640625,
      "step": 54916,
      "training_step_time": 0.6725959777832031
    },
    {
      "epoch": 0.000335186767578125,
      "model_forward_time": 0.11502718925476074,
      "step": 54917
    },
    {
      "epoch": 0.000335186767578125,
      "step": 54917,
      "training_step_time": 0.39535975456237793
    },
    {
      "epoch": 0.00033519287109375,
      "model_forward_time": 0.11528253555297852,
      "step": 54918
    },
    {
      "epoch": 0.00033519287109375,
      "step": 54918,
      "training_step_time": 0.39484214782714844
    },
    {
      "epoch": 0.000335198974609375,
      "model_forward_time": 0.11538171768188477,
      "step": 54919
    },
    {
      "epoch": 0.000335198974609375,
      "step": 54919,
      "training_step_time": 0.39905214309692383
    },
    {
      "epoch": 0.000335205078125,
      "grad_norm": 0.09363963454961777,
      "learning_rate": 1.9470561368458485e-06,
      "loss": 0.0418,
      "step": 54920
    },
    {
      "epoch": 0.000335205078125,
      "model_forward_time": 0.11466240882873535,
      "step": 54920
    },
    {
      "epoch": 0.000335205078125,
      "step": 54920,
      "training_step_time": 0.395427942276001
    },
    {
      "epoch": 0.000335211181640625,
      "model_forward_time": 0.11418914794921875,
      "step": 54921
    },
    {
      "epoch": 0.000335211181640625,
      "step": 54921,
      "training_step_time": 0.3611621856689453
    },
    {
      "epoch": 0.00033521728515625,
      "model_forward_time": 0.11641621589660645,
      "step": 54922
    },
    {
      "epoch": 0.00033521728515625,
      "step": 54922,
      "training_step_time": 0.5584611892700195
    },
    {
      "epoch": 0.000335223388671875,
      "model_forward_time": 0.11444711685180664,
      "step": 54923
    },
    {
      "epoch": 0.000335223388671875,
      "step": 54923,
      "training_step_time": 0.4294407367706299
    },
    {
      "epoch": 0.0003352294921875,
      "model_forward_time": 0.11532974243164062,
      "step": 54924
    },
    {
      "epoch": 0.0003352294921875,
      "step": 54924,
      "training_step_time": 0.4132730960845947
    },
    {
      "epoch": 0.000335235595703125,
      "model_forward_time": 0.11475253105163574,
      "step": 54925
    },
    {
      "epoch": 0.000335235595703125,
      "step": 54925,
      "training_step_time": 0.3924293518066406
    },
    {
      "epoch": 0.00033524169921875,
      "model_forward_time": 0.1149146556854248,
      "step": 54926
    },
    {
      "epoch": 0.00033524169921875,
      "step": 54926,
      "training_step_time": 0.3910188674926758
    },
    {
      "epoch": 0.000335247802734375,
      "model_forward_time": 0.11515092849731445,
      "step": 54927
    },
    {
      "epoch": 0.000335247802734375,
      "step": 54927,
      "training_step_time": 0.3989522457122803
    },
    {
      "epoch": 0.00033525390625,
      "model_forward_time": 0.1145944595336914,
      "step": 54928
    },
    {
      "epoch": 0.00033525390625,
      "step": 54928,
      "training_step_time": 0.5733804702758789
    },
    {
      "epoch": 0.000335260009765625,
      "model_forward_time": 0.11503171920776367,
      "step": 54929
    },
    {
      "epoch": 0.000335260009765625,
      "step": 54929,
      "training_step_time": 0.40389084815979004
    },
    {
      "epoch": 0.00033526611328125,
      "grad_norm": 0.06453215330839157,
      "learning_rate": 1.939448003355554e-06,
      "loss": 0.0359,
      "step": 54930
    },
    {
      "epoch": 0.00033526611328125,
      "model_forward_time": 0.1143805980682373,
      "step": 54930
    },
    {
      "epoch": 0.00033526611328125,
      "step": 54930,
      "training_step_time": 0.43322110176086426
    },
    {
      "epoch": 0.000335272216796875,
      "model_forward_time": 0.11488556861877441,
      "step": 54931
    },
    {
      "epoch": 0.000335272216796875,
      "step": 54931,
      "training_step_time": 0.4287989139556885
    },
    {
      "epoch": 0.0003352783203125,
      "model_forward_time": 0.11490893363952637,
      "step": 54932
    },
    {
      "epoch": 0.0003352783203125,
      "step": 54932,
      "training_step_time": 0.3962829113006592
    },
    {
      "epoch": 0.000335284423828125,
      "model_forward_time": 0.11505699157714844,
      "step": 54933
    },
    {
      "epoch": 0.000335284423828125,
      "step": 54933,
      "training_step_time": 0.39667820930480957
    },
    {
      "epoch": 0.00033529052734375,
      "model_forward_time": 0.11533403396606445,
      "step": 54934
    },
    {
      "epoch": 0.00033529052734375,
      "step": 54934,
      "training_step_time": 0.5381958484649658
    },
    {
      "epoch": 0.000335296630859375,
      "model_forward_time": 0.11505007743835449,
      "step": 54935
    },
    {
      "epoch": 0.000335296630859375,
      "step": 54935,
      "training_step_time": 0.42606449127197266
    },
    {
      "epoch": 0.000335302734375,
      "model_forward_time": 0.11547994613647461,
      "step": 54936
    },
    {
      "epoch": 0.000335302734375,
      "step": 54936,
      "training_step_time": 0.43932175636291504
    },
    {
      "epoch": 0.000335308837890625,
      "model_forward_time": 0.11534309387207031,
      "step": 54937
    },
    {
      "epoch": 0.000335308837890625,
      "step": 54937,
      "training_step_time": 0.49070048332214355
    },
    {
      "epoch": 0.00033531494140625,
      "model_forward_time": 0.11496734619140625,
      "step": 54938
    },
    {
      "epoch": 0.00033531494140625,
      "step": 54938,
      "training_step_time": 0.399517297744751
    },
    {
      "epoch": 0.000335321044921875,
      "model_forward_time": 0.11490631103515625,
      "step": 54939
    },
    {
      "epoch": 0.000335321044921875,
      "step": 54939,
      "training_step_time": 0.3820610046386719
    },
    {
      "epoch": 0.0003353271484375,
      "grad_norm": 0.06982424855232239,
      "learning_rate": 1.9318544693919916e-06,
      "loss": 0.0334,
      "step": 54940
    },
    {
      "epoch": 0.0003353271484375,
      "model_forward_time": 0.11519455909729004,
      "step": 54940
    },
    {
      "epoch": 0.0003353271484375,
      "step": 54940,
      "training_step_time": 0.4266369342803955
    },
    {
      "epoch": 0.000335333251953125,
      "model_forward_time": 0.11486935615539551,
      "step": 54941
    },
    {
      "epoch": 0.000335333251953125,
      "step": 54941,
      "training_step_time": 0.3915715217590332
    },
    {
      "epoch": 0.00033533935546875,
      "model_forward_time": 0.11481094360351562,
      "step": 54942
    },
    {
      "epoch": 0.00033533935546875,
      "step": 54942,
      "training_step_time": 0.3983643054962158
    },
    {
      "epoch": 0.000335345458984375,
      "model_forward_time": 0.11565589904785156,
      "step": 54943
    },
    {
      "epoch": 0.000335345458984375,
      "step": 54943,
      "training_step_time": 0.4132204055786133
    },
    {
      "epoch": 0.0003353515625,
      "model_forward_time": 0.11499404907226562,
      "step": 54944
    },
    {
      "epoch": 0.0003353515625,
      "step": 54944,
      "training_step_time": 0.402862548828125
    },
    {
      "epoch": 0.000335357666015625,
      "model_forward_time": 0.11518549919128418,
      "step": 54945
    },
    {
      "epoch": 0.000335357666015625,
      "step": 54945,
      "training_step_time": 0.412914514541626
    },
    {
      "epoch": 0.00033536376953125,
      "model_forward_time": 0.11470270156860352,
      "step": 54946
    },
    {
      "epoch": 0.00033536376953125,
      "step": 54946,
      "training_step_time": 0.649848461151123
    },
    {
      "epoch": 0.000335369873046875,
      "model_forward_time": 0.11527895927429199,
      "step": 54947
    },
    {
      "epoch": 0.000335369873046875,
      "step": 54947,
      "training_step_time": 0.39105916023254395
    },
    {
      "epoch": 0.0003353759765625,
      "model_forward_time": 0.11522412300109863,
      "step": 54948
    },
    {
      "epoch": 0.0003353759765625,
      "step": 54948,
      "training_step_time": 0.4078669548034668
    },
    {
      "epoch": 0.000335382080078125,
      "model_forward_time": 0.11502599716186523,
      "step": 54949
    },
    {
      "epoch": 0.000335382080078125,
      "step": 54949,
      "training_step_time": 0.3651578426361084
    },
    {
      "epoch": 0.00033538818359375,
      "grad_norm": 0.088741734623909,
      "learning_rate": 1.924275537261877e-06,
      "loss": 0.0341,
      "step": 54950
    },
    {
      "epoch": 0.00033538818359375,
      "model_forward_time": 0.11434626579284668,
      "step": 54950
    },
    {
      "epoch": 0.00033538818359375,
      "step": 54950,
      "training_step_time": 0.4654073715209961
    },
    {
      "epoch": 0.000335394287109375,
      "model_forward_time": 0.1146538257598877,
      "step": 54951
    },
    {
      "epoch": 0.000335394287109375,
      "step": 54951,
      "training_step_time": 0.4740927219390869
    },
    {
      "epoch": 0.000335400390625,
      "model_forward_time": 0.11460542678833008,
      "step": 54952
    },
    {
      "epoch": 0.000335400390625,
      "step": 54952,
      "training_step_time": 0.4695448875427246
    },
    {
      "epoch": 0.000335406494140625,
      "model_forward_time": 0.11448812484741211,
      "step": 54953
    },
    {
      "epoch": 0.000335406494140625,
      "step": 54953,
      "training_step_time": 0.3929715156555176
    },
    {
      "epoch": 0.00033541259765625,
      "model_forward_time": 0.11479783058166504,
      "step": 54954
    },
    {
      "epoch": 0.00033541259765625,
      "step": 54954,
      "training_step_time": 0.3963468074798584
    },
    {
      "epoch": 0.000335418701171875,
      "model_forward_time": 0.1147928237915039,
      "step": 54955
    },
    {
      "epoch": 0.000335418701171875,
      "step": 54955,
      "training_step_time": 0.3987281322479248
    },
    {
      "epoch": 0.0003354248046875,
      "model_forward_time": 0.11485838890075684,
      "step": 54956
    },
    {
      "epoch": 0.0003354248046875,
      "step": 54956,
      "training_step_time": 0.392287015914917
    },
    {
      "epoch": 0.000335430908203125,
      "model_forward_time": 0.11510682106018066,
      "step": 54957
    },
    {
      "epoch": 0.000335430908203125,
      "step": 54957,
      "training_step_time": 0.42684459686279297
    },
    {
      "epoch": 0.00033543701171875,
      "model_forward_time": 0.11510348320007324,
      "step": 54958
    },
    {
      "epoch": 0.00033543701171875,
      "step": 54958,
      "training_step_time": 0.5352025032043457
    },
    {
      "epoch": 0.000335443115234375,
      "model_forward_time": 0.11514067649841309,
      "step": 54959
    },
    {
      "epoch": 0.000335443115234375,
      "step": 54959,
      "training_step_time": 0.406008243560791
    },
    {
      "epoch": 0.00033544921875,
      "grad_norm": 0.07967808097600937,
      "learning_rate": 1.91671120926748e-06,
      "loss": 0.0376,
      "step": 54960
    },
    {
      "epoch": 0.00033544921875,
      "model_forward_time": 0.11470842361450195,
      "step": 54960
    },
    {
      "epoch": 0.00033544921875,
      "step": 54960,
      "training_step_time": 0.4018828868865967
    },
    {
      "epoch": 0.000335455322265625,
      "model_forward_time": 0.11463356018066406,
      "step": 54961
    },
    {
      "epoch": 0.000335455322265625,
      "step": 54961,
      "training_step_time": 0.41364073753356934
    },
    {
      "epoch": 0.00033546142578125,
      "model_forward_time": 0.11458301544189453,
      "step": 54962
    },
    {
      "epoch": 0.00033546142578125,
      "step": 54962,
      "training_step_time": 0.38602495193481445
    },
    {
      "epoch": 0.000335467529296875,
      "model_forward_time": 0.11472678184509277,
      "step": 54963
    },
    {
      "epoch": 0.000335467529296875,
      "step": 54963,
      "training_step_time": 0.3992574214935303
    },
    {
      "epoch": 0.0003354736328125,
      "model_forward_time": 0.11491131782531738,
      "step": 54964
    },
    {
      "epoch": 0.0003354736328125,
      "step": 54964,
      "training_step_time": 0.6656966209411621
    },
    {
      "epoch": 0.000335479736328125,
      "model_forward_time": 0.1144251823425293,
      "step": 54965
    },
    {
      "epoch": 0.000335479736328125,
      "step": 54965,
      "training_step_time": 0.5053153038024902
    },
    {
      "epoch": 0.00033548583984375,
      "model_forward_time": 0.1148078441619873,
      "step": 54966
    },
    {
      "epoch": 0.00033548583984375,
      "step": 54966,
      "training_step_time": 0.38901543617248535
    },
    {
      "epoch": 0.000335491943359375,
      "model_forward_time": 0.1148223876953125,
      "step": 54967
    },
    {
      "epoch": 0.000335491943359375,
      "step": 54967,
      "training_step_time": 0.391526460647583
    },
    {
      "epoch": 0.000335498046875,
      "model_forward_time": 0.11467123031616211,
      "step": 54968
    },
    {
      "epoch": 0.000335498046875,
      "step": 54968,
      "training_step_time": 0.40688490867614746
    },
    {
      "epoch": 0.000335504150390625,
      "model_forward_time": 0.11486411094665527,
      "step": 54969
    },
    {
      "epoch": 0.000335504150390625,
      "step": 54969,
      "training_step_time": 0.39153027534484863
    },
    {
      "epoch": 0.00033551025390625,
      "grad_norm": 0.07546915858983994,
      "learning_rate": 1.9091614877066622e-06,
      "loss": 0.0419,
      "step": 54970
    },
    {
      "epoch": 0.00033551025390625,
      "model_forward_time": 0.1148829460144043,
      "step": 54970
    },
    {
      "epoch": 0.00033551025390625,
      "step": 54970,
      "training_step_time": 0.42873072624206543
    },
    {
      "epoch": 0.000335516357421875,
      "model_forward_time": 0.11475324630737305,
      "step": 54971
    },
    {
      "epoch": 0.000335516357421875,
      "step": 54971,
      "training_step_time": 0.4045126438140869
    },
    {
      "epoch": 0.0003355224609375,
      "model_forward_time": 0.1162722110748291,
      "step": 54972
    },
    {
      "epoch": 0.0003355224609375,
      "step": 54972,
      "training_step_time": 0.3970208168029785
    },
    {
      "epoch": 0.000335528564453125,
      "model_forward_time": 0.11503219604492188,
      "step": 54973
    },
    {
      "epoch": 0.000335528564453125,
      "step": 54973,
      "training_step_time": 0.3970508575439453
    },
    {
      "epoch": 0.00033553466796875,
      "model_forward_time": 0.11560249328613281,
      "step": 54974
    },
    {
      "epoch": 0.00033553466796875,
      "step": 54974,
      "training_step_time": 0.3869595527648926
    },
    {
      "epoch": 0.000335540771484375,
      "model_forward_time": 0.11464357376098633,
      "step": 54975
    },
    {
      "epoch": 0.000335540771484375,
      "step": 54975,
      "training_step_time": 0.40371179580688477
    },
    {
      "epoch": 0.000335546875,
      "model_forward_time": 0.11498212814331055,
      "step": 54976
    },
    {
      "epoch": 0.000335546875,
      "step": 54976,
      "training_step_time": 0.6737534999847412
    },
    {
      "epoch": 0.000335552978515625,
      "model_forward_time": 0.11523914337158203,
      "step": 54977
    },
    {
      "epoch": 0.000335552978515625,
      "step": 54977,
      "training_step_time": 0.3970296382904053
    },
    {
      "epoch": 0.00033555908203125,
      "model_forward_time": 0.11512064933776855,
      "step": 54978
    },
    {
      "epoch": 0.00033555908203125,
      "step": 54978,
      "training_step_time": 0.47058939933776855
    },
    {
      "epoch": 0.000335565185546875,
      "model_forward_time": 0.11583280563354492,
      "step": 54979
    },
    {
      "epoch": 0.000335565185546875,
      "step": 54979,
      "training_step_time": 0.48569440841674805
    },
    {
      "epoch": 0.0003355712890625,
      "grad_norm": 0.06233121454715729,
      "learning_rate": 1.9016263748728114e-06,
      "loss": 0.0314,
      "step": 54980
    },
    {
      "epoch": 0.0003355712890625,
      "model_forward_time": 0.11455583572387695,
      "step": 54980
    },
    {
      "epoch": 0.0003355712890625,
      "step": 54980,
      "training_step_time": 0.4094858169555664
    },
    {
      "epoch": 0.000335577392578125,
      "model_forward_time": 0.11473608016967773,
      "step": 54981
    },
    {
      "epoch": 0.000335577392578125,
      "step": 54981,
      "training_step_time": 0.38421630859375
    },
    {
      "epoch": 0.00033558349609375,
      "model_forward_time": 0.11507797241210938,
      "step": 54982
    },
    {
      "epoch": 0.00033558349609375,
      "step": 54982,
      "training_step_time": 0.4683365821838379
    },
    {
      "epoch": 0.000335589599609375,
      "model_forward_time": 0.11490130424499512,
      "step": 54983
    },
    {
      "epoch": 0.000335589599609375,
      "step": 54983,
      "training_step_time": 0.4012303352355957
    },
    {
      "epoch": 0.000335595703125,
      "model_forward_time": 0.11540555953979492,
      "step": 54984
    },
    {
      "epoch": 0.000335595703125,
      "step": 54984,
      "training_step_time": 0.4353654384613037
    },
    {
      "epoch": 0.000335601806640625,
      "model_forward_time": 0.11493325233459473,
      "step": 54985
    },
    {
      "epoch": 0.000335601806640625,
      "step": 54985,
      "training_step_time": 0.39063239097595215
    },
    {
      "epoch": 0.00033560791015625,
      "model_forward_time": 0.11503219604492188,
      "step": 54986
    },
    {
      "epoch": 0.00033560791015625,
      "step": 54986,
      "training_step_time": 0.4006960391998291
    },
    {
      "epoch": 0.000335614013671875,
      "model_forward_time": 0.11473822593688965,
      "step": 54987
    },
    {
      "epoch": 0.000335614013671875,
      "step": 54987,
      "training_step_time": 0.3920886516571045
    },
    {
      "epoch": 0.0003356201171875,
      "model_forward_time": 0.11521124839782715,
      "step": 54988
    },
    {
      "epoch": 0.0003356201171875,
      "step": 54988,
      "training_step_time": 0.6123006343841553
    },
    {
      "epoch": 0.000335626220703125,
      "model_forward_time": 0.11465573310852051,
      "step": 54989
    },
    {
      "epoch": 0.000335626220703125,
      "step": 54989,
      "training_step_time": 0.3901560306549072
    },
    {
      "epoch": 0.00033563232421875,
      "grad_norm": 0.06599266082048416,
      "learning_rate": 1.8941058730549132e-06,
      "loss": 0.0336,
      "step": 54990
    },
    {
      "epoch": 0.00033563232421875,
      "model_forward_time": 0.11488842964172363,
      "step": 54990
    },
    {
      "epoch": 0.00033563232421875,
      "step": 54990,
      "training_step_time": 0.40027570724487305
    },
    {
      "epoch": 0.000335638427734375,
      "model_forward_time": 0.11486625671386719,
      "step": 54991
    },
    {
      "epoch": 0.000335638427734375,
      "step": 54991,
      "training_step_time": 0.39061570167541504
    },
    {
      "epoch": 0.00033564453125,
      "model_forward_time": 0.11740899085998535,
      "step": 54992
    },
    {
      "epoch": 0.00033564453125,
      "step": 54992,
      "training_step_time": 0.47797656059265137
    },
    {
      "epoch": 0.000335650634765625,
      "model_forward_time": 0.11506271362304688,
      "step": 54993
    },
    {
      "epoch": 0.000335650634765625,
      "step": 54993,
      "training_step_time": 0.435260534286499
    },
    {
      "epoch": 0.00033565673828125,
      "model_forward_time": 0.11591053009033203,
      "step": 54994
    },
    {
      "epoch": 0.00033565673828125,
      "step": 54994,
      "training_step_time": 0.5763671398162842
    },
    {
      "epoch": 0.000335662841796875,
      "model_forward_time": 0.11456179618835449,
      "step": 54995
    },
    {
      "epoch": 0.000335662841796875,
      "step": 54995,
      "training_step_time": 0.38619327545166016
    },
    {
      "epoch": 0.0003356689453125,
      "model_forward_time": 0.11486363410949707,
      "step": 54996
    },
    {
      "epoch": 0.0003356689453125,
      "step": 54996,
      "training_step_time": 0.3970911502838135
    },
    {
      "epoch": 0.000335675048828125,
      "model_forward_time": 0.11470508575439453,
      "step": 54997
    },
    {
      "epoch": 0.000335675048828125,
      "step": 54997,
      "training_step_time": 0.41591334342956543
    },
    {
      "epoch": 0.00033568115234375,
      "model_forward_time": 0.11554622650146484,
      "step": 54998
    },
    {
      "epoch": 0.00033568115234375,
      "step": 54998,
      "training_step_time": 0.41475605964660645
    },
    {
      "epoch": 0.000335687255859375,
      "model_forward_time": 0.11471843719482422,
      "step": 54999
    },
    {
      "epoch": 0.000335687255859375,
      "step": 54999,
      "training_step_time": 0.3902242183685303
    },
    {
      "epoch": 0.000335693359375,
      "grad_norm": 0.07994025945663452,
      "learning_rate": 1.8865999845374793e-06,
      "loss": 0.0329,
      "step": 55000
    },
    {
      "epoch": 0.000335693359375,
      "model_forward_time": 0.11277556419372559,
      "step": 55000
    },
    {
      "epoch": 0.000335693359375,
      "step": 55000,
      "training_step_time": 0.35321831703186035
    },
    {
      "epoch": 0.000335699462890625,
      "model_forward_time": 0.1122736930847168,
      "step": 55001
    },
    {
      "epoch": 0.000335699462890625,
      "step": 55001,
      "training_step_time": 0.3758354187011719
    },
    {
      "epoch": 0.00033570556640625,
      "model_forward_time": 0.11252117156982422,
      "step": 55002
    },
    {
      "epoch": 0.00033570556640625,
      "step": 55002,
      "training_step_time": 0.3923666477203369
    },
    {
      "epoch": 0.000335711669921875,
      "model_forward_time": 0.11310076713562012,
      "step": 55003
    },
    {
      "epoch": 0.000335711669921875,
      "step": 55003,
      "training_step_time": 0.3790316581726074
    },
    {
      "epoch": 0.0003357177734375,
      "model_forward_time": 0.1136636734008789,
      "step": 55004
    },
    {
      "epoch": 0.0003357177734375,
      "step": 55004,
      "training_step_time": 0.38169407844543457
    },
    {
      "epoch": 0.000335723876953125,
      "model_forward_time": 0.11406683921813965,
      "step": 55005
    },
    {
      "epoch": 0.000335723876953125,
      "step": 55005,
      "training_step_time": 0.44872403144836426
    },
    {
      "epoch": 0.00033572998046875,
      "model_forward_time": 0.1144406795501709,
      "step": 55006
    },
    {
      "epoch": 0.00033572998046875,
      "step": 55006,
      "training_step_time": 0.47614121437072754
    },
    {
      "epoch": 0.000335736083984375,
      "model_forward_time": 0.11433839797973633,
      "step": 55007
    },
    {
      "epoch": 0.000335736083984375,
      "step": 55007,
      "training_step_time": 0.48525023460388184
    },
    {
      "epoch": 0.0003357421875,
      "model_forward_time": 0.11478257179260254,
      "step": 55008
    },
    {
      "epoch": 0.0003357421875,
      "step": 55008,
      "training_step_time": 0.4442424774169922
    },
    {
      "epoch": 0.000335748291015625,
      "model_forward_time": 0.11475563049316406,
      "step": 55009
    },
    {
      "epoch": 0.000335748291015625,
      "step": 55009,
      "training_step_time": 0.394561767578125
    },
    {
      "epoch": 0.00033575439453125,
      "grad_norm": 0.09633877873420715,
      "learning_rate": 1.8791087116006189e-06,
      "loss": 0.0409,
      "step": 55010
    },
    {
      "epoch": 0.00033575439453125,
      "model_forward_time": 0.11430954933166504,
      "step": 55010
    },
    {
      "epoch": 0.00033575439453125,
      "step": 55010,
      "training_step_time": 0.45449233055114746
    },
    {
      "epoch": 0.000335760498046875,
      "model_forward_time": 0.11439371109008789,
      "step": 55011
    },
    {
      "epoch": 0.000335760498046875,
      "step": 55011,
      "training_step_time": 0.37709856033325195
    },
    {
      "epoch": 0.0003357666015625,
      "model_forward_time": 0.11574006080627441,
      "step": 55012
    },
    {
      "epoch": 0.0003357666015625,
      "step": 55012,
      "training_step_time": 0.37907862663269043
    },
    {
      "epoch": 0.000335772705078125,
      "model_forward_time": 0.11480116844177246,
      "step": 55013
    },
    {
      "epoch": 0.000335772705078125,
      "step": 55013,
      "training_step_time": 0.3761746883392334
    },
    {
      "epoch": 0.00033577880859375,
      "model_forward_time": 0.1152029037475586,
      "step": 55014
    },
    {
      "epoch": 0.00033577880859375,
      "step": 55014,
      "training_step_time": 0.4018571376800537
    },
    {
      "epoch": 0.000335784912109375,
      "model_forward_time": 0.11643505096435547,
      "step": 55015
    },
    {
      "epoch": 0.000335784912109375,
      "step": 55015,
      "training_step_time": 0.3979017734527588
    },
    {
      "epoch": 0.000335791015625,
      "model_forward_time": 0.11508393287658691,
      "step": 55016
    },
    {
      "epoch": 0.000335791015625,
      "step": 55016,
      "training_step_time": 0.40674376487731934
    },
    {
      "epoch": 0.000335797119140625,
      "model_forward_time": 0.11501121520996094,
      "step": 55017
    },
    {
      "epoch": 0.000335797119140625,
      "step": 55017,
      "training_step_time": 0.4072835445404053
    },
    {
      "epoch": 0.00033580322265625,
      "model_forward_time": 0.11526131629943848,
      "step": 55018
    },
    {
      "epoch": 0.00033580322265625,
      "step": 55018,
      "training_step_time": 0.39475345611572266
    },
    {
      "epoch": 0.000335809326171875,
      "model_forward_time": 0.11592817306518555,
      "step": 55019
    },
    {
      "epoch": 0.000335809326171875,
      "step": 55019,
      "training_step_time": 0.3938760757446289
    },
    {
      "epoch": 0.0003358154296875,
      "grad_norm": 0.07988865673542023,
      "learning_rate": 1.8716320565199618e-06,
      "loss": 0.0343,
      "step": 55020
    },
    {
      "epoch": 0.0003358154296875,
      "model_forward_time": 0.11514139175415039,
      "step": 55020
    },
    {
      "epoch": 0.0003358154296875,
      "step": 55020,
      "training_step_time": 0.37790799140930176
    },
    {
      "epoch": 0.000335821533203125,
      "model_forward_time": 0.11533474922180176,
      "step": 55021
    },
    {
      "epoch": 0.000335821533203125,
      "step": 55021,
      "training_step_time": 0.4335963726043701
    },
    {
      "epoch": 0.00033582763671875,
      "model_forward_time": 0.1148827075958252,
      "step": 55022
    },
    {
      "epoch": 0.00033582763671875,
      "step": 55022,
      "training_step_time": 0.4959428310394287
    },
    {
      "epoch": 0.000335833740234375,
      "model_forward_time": 0.11493968963623047,
      "step": 55023
    },
    {
      "epoch": 0.000335833740234375,
      "step": 55023,
      "training_step_time": 0.4256246089935303
    },
    {
      "epoch": 0.00033583984375,
      "model_forward_time": 0.11433815956115723,
      "step": 55024
    },
    {
      "epoch": 0.00033583984375,
      "step": 55024,
      "training_step_time": 0.4284379482269287
    },
    {
      "epoch": 0.000335845947265625,
      "model_forward_time": 0.11482477188110352,
      "step": 55025
    },
    {
      "epoch": 0.000335845947265625,
      "step": 55025,
      "training_step_time": 0.39111924171447754
    },
    {
      "epoch": 0.00033585205078125,
      "model_forward_time": 0.11493039131164551,
      "step": 55026
    },
    {
      "epoch": 0.00033585205078125,
      "step": 55026,
      "training_step_time": 0.40360021591186523
    },
    {
      "epoch": 0.000335858154296875,
      "model_forward_time": 0.11479020118713379,
      "step": 55027
    },
    {
      "epoch": 0.000335858154296875,
      "step": 55027,
      "training_step_time": 0.3861262798309326
    },
    {
      "epoch": 0.0003358642578125,
      "model_forward_time": 0.11453890800476074,
      "step": 55028
    },
    {
      "epoch": 0.0003358642578125,
      "step": 55028,
      "training_step_time": 0.3922262191772461
    },
    {
      "epoch": 0.000335870361328125,
      "model_forward_time": 0.11528778076171875,
      "step": 55029
    },
    {
      "epoch": 0.000335870361328125,
      "step": 55029,
      "training_step_time": 0.40259623527526855
    },
    {
      "epoch": 0.00033587646484375,
      "grad_norm": 0.10287868976593018,
      "learning_rate": 1.8641700215667413e-06,
      "loss": 0.0392,
      "step": 55030
    },
    {
      "epoch": 0.00033587646484375,
      "model_forward_time": 0.11493444442749023,
      "step": 55030
    },
    {
      "epoch": 0.00033587646484375,
      "step": 55030,
      "training_step_time": 0.4048480987548828
    },
    {
      "epoch": 0.000335882568359375,
      "model_forward_time": 0.11522221565246582,
      "step": 55031
    },
    {
      "epoch": 0.000335882568359375,
      "step": 55031,
      "training_step_time": 0.3953738212585449
    },
    {
      "epoch": 0.000335888671875,
      "model_forward_time": 0.11546897888183594,
      "step": 55032
    },
    {
      "epoch": 0.000335888671875,
      "step": 55032,
      "training_step_time": 0.4119737148284912
    },
    {
      "epoch": 0.000335894775390625,
      "model_forward_time": 0.1152029037475586,
      "step": 55033
    },
    {
      "epoch": 0.000335894775390625,
      "step": 55033,
      "training_step_time": 0.3823416233062744
    },
    {
      "epoch": 0.00033590087890625,
      "model_forward_time": 0.1154623031616211,
      "step": 55034
    },
    {
      "epoch": 0.00033590087890625,
      "step": 55034,
      "training_step_time": 0.3937561511993408
    },
    {
      "epoch": 0.000335906982421875,
      "model_forward_time": 0.11543798446655273,
      "step": 55035
    },
    {
      "epoch": 0.000335906982421875,
      "step": 55035,
      "training_step_time": 0.4354667663574219
    },
    {
      "epoch": 0.0003359130859375,
      "model_forward_time": 0.11451292037963867,
      "step": 55036
    },
    {
      "epoch": 0.0003359130859375,
      "step": 55036,
      "training_step_time": 0.5231075286865234
    },
    {
      "epoch": 0.000335919189453125,
      "model_forward_time": 0.114776611328125,
      "step": 55037
    },
    {
      "epoch": 0.000335919189453125,
      "step": 55037,
      "training_step_time": 0.42026829719543457
    },
    {
      "epoch": 0.00033592529296875,
      "model_forward_time": 0.11497020721435547,
      "step": 55038
    },
    {
      "epoch": 0.00033592529296875,
      "step": 55038,
      "training_step_time": 0.39609241485595703
    },
    {
      "epoch": 0.000335931396484375,
      "model_forward_time": 0.11543726921081543,
      "step": 55039
    },
    {
      "epoch": 0.000335931396484375,
      "step": 55039,
      "training_step_time": 0.37830138206481934
    },
    {
      "epoch": 0.0003359375,
      "grad_norm": 0.09216681122779846,
      "learning_rate": 1.856722609007705e-06,
      "loss": 0.0408,
      "step": 55040
    },
    {
      "epoch": 0.0003359375,
      "model_forward_time": 0.11466193199157715,
      "step": 55040
    },
    {
      "epoch": 0.0003359375,
      "step": 55040,
      "training_step_time": 0.39098620414733887
    },
    {
      "epoch": 0.000335943603515625,
      "model_forward_time": 0.11483979225158691,
      "step": 55041
    },
    {
      "epoch": 0.000335943603515625,
      "step": 55041,
      "training_step_time": 0.3966231346130371
    },
    {
      "epoch": 0.00033594970703125,
      "model_forward_time": 0.11468815803527832,
      "step": 55042
    },
    {
      "epoch": 0.00033594970703125,
      "step": 55042,
      "training_step_time": 0.4038379192352295
    },
    {
      "epoch": 0.000335955810546875,
      "model_forward_time": 0.11513495445251465,
      "step": 55043
    },
    {
      "epoch": 0.000335955810546875,
      "step": 55043,
      "training_step_time": 0.4066028594970703
    },
    {
      "epoch": 0.0003359619140625,
      "model_forward_time": 0.11448121070861816,
      "step": 55044
    },
    {
      "epoch": 0.0003359619140625,
      "step": 55044,
      "training_step_time": 0.42395448684692383
    },
    {
      "epoch": 0.000335968017578125,
      "model_forward_time": 0.11520838737487793,
      "step": 55045
    },
    {
      "epoch": 0.000335968017578125,
      "step": 55045,
      "training_step_time": 0.3838646411895752
    },
    {
      "epoch": 0.00033597412109375,
      "model_forward_time": 0.11517572402954102,
      "step": 55046
    },
    {
      "epoch": 0.00033597412109375,
      "step": 55046,
      "training_step_time": 0.38742971420288086
    },
    {
      "epoch": 0.000335980224609375,
      "model_forward_time": 0.11535239219665527,
      "step": 55047
    },
    {
      "epoch": 0.000335980224609375,
      "step": 55047,
      "training_step_time": 0.3897242546081543
    },
    {
      "epoch": 0.000335986328125,
      "model_forward_time": 0.11509299278259277,
      "step": 55048
    },
    {
      "epoch": 0.000335986328125,
      "step": 55048,
      "training_step_time": 0.4140896797180176
    },
    {
      "epoch": 0.000335992431640625,
      "model_forward_time": 0.11488485336303711,
      "step": 55049
    },
    {
      "epoch": 0.000335992431640625,
      "step": 55049,
      "training_step_time": 0.47872161865234375
    },
    {
      "epoch": 0.00033599853515625,
      "grad_norm": 0.09386888891458511,
      "learning_rate": 1.849289821105199e-06,
      "loss": 0.0325,
      "step": 55050
    },
    {
      "epoch": 0.00033599853515625,
      "model_forward_time": 0.11602377891540527,
      "step": 55050
    },
    {
      "epoch": 0.00033599853515625,
      "step": 55050,
      "training_step_time": 0.43271899223327637
    },
    {
      "epoch": 0.000336004638671875,
      "model_forward_time": 0.11536693572998047,
      "step": 55051
    },
    {
      "epoch": 0.000336004638671875,
      "step": 55051,
      "training_step_time": 0.46918201446533203
    },
    {
      "epoch": 0.0003360107421875,
      "model_forward_time": 0.1149744987487793,
      "step": 55052
    },
    {
      "epoch": 0.0003360107421875,
      "step": 55052,
      "training_step_time": 0.4462761878967285
    },
    {
      "epoch": 0.000336016845703125,
      "model_forward_time": 0.11507916450500488,
      "step": 55053
    },
    {
      "epoch": 0.000336016845703125,
      "step": 55053,
      "training_step_time": 0.38037991523742676
    },
    {
      "epoch": 0.00033602294921875,
      "model_forward_time": 0.1143958568572998,
      "step": 55054
    },
    {
      "epoch": 0.00033602294921875,
      "step": 55054,
      "training_step_time": 0.3932454586029053
    },
    {
      "epoch": 0.000336029052734375,
      "model_forward_time": 0.11485576629638672,
      "step": 55055
    },
    {
      "epoch": 0.000336029052734375,
      "step": 55055,
      "training_step_time": 0.38358116149902344
    },
    {
      "epoch": 0.00033603515625,
      "model_forward_time": 0.1148219108581543,
      "step": 55056
    },
    {
      "epoch": 0.00033603515625,
      "step": 55056,
      "training_step_time": 0.40871691703796387
    },
    {
      "epoch": 0.000336041259765625,
      "model_forward_time": 0.11524629592895508,
      "step": 55057
    },
    {
      "epoch": 0.000336041259765625,
      "step": 55057,
      "training_step_time": 0.38991570472717285
    },
    {
      "epoch": 0.00033604736328125,
      "model_forward_time": 0.11518597602844238,
      "step": 55058
    },
    {
      "epoch": 0.00033604736328125,
      "step": 55058,
      "training_step_time": 0.39336419105529785
    },
    {
      "epoch": 0.000336053466796875,
      "model_forward_time": 0.11514639854431152,
      "step": 55059
    },
    {
      "epoch": 0.000336053466796875,
      "step": 55059,
      "training_step_time": 0.3869929313659668
    },
    {
      "epoch": 0.0003360595703125,
      "grad_norm": 0.08692585676908493,
      "learning_rate": 1.841871660117095e-06,
      "loss": 0.0341,
      "step": 55060
    },
    {
      "epoch": 0.0003360595703125,
      "model_forward_time": 0.11606812477111816,
      "step": 55060
    },
    {
      "epoch": 0.0003360595703125,
      "step": 55060,
      "training_step_time": 0.39940595626831055
    },
    {
      "epoch": 0.000336065673828125,
      "model_forward_time": 0.11510944366455078,
      "step": 55061
    },
    {
      "epoch": 0.000336065673828125,
      "step": 55061,
      "training_step_time": 0.39932703971862793
    },
    {
      "epoch": 0.00033607177734375,
      "model_forward_time": 0.11486506462097168,
      "step": 55062
    },
    {
      "epoch": 0.00033607177734375,
      "step": 55062,
      "training_step_time": 0.4268186092376709
    },
    {
      "epoch": 0.000336077880859375,
      "model_forward_time": 0.11455321311950684,
      "step": 55063
    },
    {
      "epoch": 0.000336077880859375,
      "step": 55063,
      "training_step_time": 0.3962268829345703
    },
    {
      "epoch": 0.000336083984375,
      "model_forward_time": 0.11536955833435059,
      "step": 55064
    },
    {
      "epoch": 0.000336083984375,
      "step": 55064,
      "training_step_time": 0.4860222339630127
    },
    {
      "epoch": 0.000336090087890625,
      "model_forward_time": 0.11470222473144531,
      "step": 55065
    },
    {
      "epoch": 0.000336090087890625,
      "step": 55065,
      "training_step_time": 0.4163661003112793
    },
    {
      "epoch": 0.00033609619140625,
      "model_forward_time": 0.11563515663146973,
      "step": 55066
    },
    {
      "epoch": 0.00033609619140625,
      "step": 55066,
      "training_step_time": 0.504662036895752
    },
    {
      "epoch": 0.000336102294921875,
      "model_forward_time": 0.11467123031616211,
      "step": 55067
    },
    {
      "epoch": 0.000336102294921875,
      "step": 55067,
      "training_step_time": 0.40964174270629883
    },
    {
      "epoch": 0.0003361083984375,
      "model_forward_time": 0.11537384986877441,
      "step": 55068
    },
    {
      "epoch": 0.0003361083984375,
      "step": 55068,
      "training_step_time": 0.39638209342956543
    },
    {
      "epoch": 0.000336114501953125,
      "model_forward_time": 0.11560440063476562,
      "step": 55069
    },
    {
      "epoch": 0.000336114501953125,
      "step": 55069,
      "training_step_time": 0.40070128440856934
    },
    {
      "epoch": 0.00033612060546875,
      "grad_norm": 0.1040436252951622,
      "learning_rate": 1.8344681282968401e-06,
      "loss": 0.0378,
      "step": 55070
    },
    {
      "epoch": 0.00033612060546875,
      "model_forward_time": 0.11735367774963379,
      "step": 55070
    },
    {
      "epoch": 0.00033612060546875,
      "step": 55070,
      "training_step_time": 0.39235377311706543
    },
    {
      "epoch": 0.000336126708984375,
      "model_forward_time": 0.11649274826049805,
      "step": 55071
    },
    {
      "epoch": 0.000336126708984375,
      "step": 55071,
      "training_step_time": 0.38071322441101074
    },
    {
      "epoch": 0.0003361328125,
      "model_forward_time": 0.11521029472351074,
      "step": 55072
    },
    {
      "epoch": 0.0003361328125,
      "step": 55072,
      "training_step_time": 0.4031493663787842
    },
    {
      "epoch": 0.000336138916015625,
      "model_forward_time": 0.11447525024414062,
      "step": 55073
    },
    {
      "epoch": 0.000336138916015625,
      "step": 55073,
      "training_step_time": 0.3930392265319824
    },
    {
      "epoch": 0.00033614501953125,
      "model_forward_time": 0.11496901512145996,
      "step": 55074
    },
    {
      "epoch": 0.00033614501953125,
      "step": 55074,
      "training_step_time": 0.39260125160217285
    },
    {
      "epoch": 0.000336151123046875,
      "model_forward_time": 0.11549854278564453,
      "step": 55075
    },
    {
      "epoch": 0.000336151123046875,
      "step": 55075,
      "training_step_time": 0.3982391357421875
    },
    {
      "epoch": 0.0003361572265625,
      "model_forward_time": 0.11558675765991211,
      "step": 55076
    },
    {
      "epoch": 0.0003361572265625,
      "step": 55076,
      "training_step_time": 0.390214204788208
    },
    {
      "epoch": 0.000336163330078125,
      "model_forward_time": 0.11513972282409668,
      "step": 55077
    },
    {
      "epoch": 0.000336163330078125,
      "step": 55077,
      "training_step_time": 0.389082670211792
    },
    {
      "epoch": 0.00033616943359375,
      "model_forward_time": 0.11503410339355469,
      "step": 55078
    },
    {
      "epoch": 0.00033616943359375,
      "step": 55078,
      "training_step_time": 0.3950204849243164
    },
    {
      "epoch": 0.000336175537109375,
      "model_forward_time": 0.11543703079223633,
      "step": 55079
    },
    {
      "epoch": 0.000336175537109375,
      "step": 55079,
      "training_step_time": 0.4438819885253906
    },
    {
      "epoch": 0.000336181640625,
      "grad_norm": 0.09259229153394699,
      "learning_rate": 1.8270792278934302e-06,
      "loss": 0.0358,
      "step": 55080
    },
    {
      "epoch": 0.000336181640625,
      "model_forward_time": 0.11513161659240723,
      "step": 55080
    },
    {
      "epoch": 0.000336181640625,
      "step": 55080,
      "training_step_time": 0.513401985168457
    },
    {
      "epoch": 0.000336187744140625,
      "model_forward_time": 0.1145784854888916,
      "step": 55081
    },
    {
      "epoch": 0.000336187744140625,
      "step": 55081,
      "training_step_time": 0.5149266719818115
    },
    {
      "epoch": 0.00033619384765625,
      "model_forward_time": 0.1141510009765625,
      "step": 55082
    },
    {
      "epoch": 0.00033619384765625,
      "step": 55082,
      "training_step_time": 0.38860440254211426
    },
    {
      "epoch": 0.000336199951171875,
      "model_forward_time": 0.11479020118713379,
      "step": 55083
    },
    {
      "epoch": 0.000336199951171875,
      "step": 55083,
      "training_step_time": 0.4152240753173828
    },
    {
      "epoch": 0.0003362060546875,
      "model_forward_time": 0.11447930335998535,
      "step": 55084
    },
    {
      "epoch": 0.0003362060546875,
      "step": 55084,
      "training_step_time": 0.39360976219177246
    },
    {
      "epoch": 0.000336212158203125,
      "model_forward_time": 0.1154179573059082,
      "step": 55085
    },
    {
      "epoch": 0.000336212158203125,
      "step": 55085,
      "training_step_time": 0.3914296627044678
    },
    {
      "epoch": 0.00033621826171875,
      "model_forward_time": 0.11532258987426758,
      "step": 55086
    },
    {
      "epoch": 0.00033621826171875,
      "step": 55086,
      "training_step_time": 0.4019317626953125
    },
    {
      "epoch": 0.000336224365234375,
      "model_forward_time": 0.11441397666931152,
      "step": 55087
    },
    {
      "epoch": 0.000336224365234375,
      "step": 55087,
      "training_step_time": 0.39465832710266113
    },
    {
      "epoch": 0.00033623046875,
      "model_forward_time": 0.11544132232666016,
      "step": 55088
    },
    {
      "epoch": 0.00033623046875,
      "step": 55088,
      "training_step_time": 0.4066767692565918
    },
    {
      "epoch": 0.000336236572265625,
      "model_forward_time": 0.11515069007873535,
      "step": 55089
    },
    {
      "epoch": 0.000336236572265625,
      "step": 55089,
      "training_step_time": 0.7951619625091553
    },
    {
      "epoch": 0.00033624267578125,
      "grad_norm": 0.07115286588668823,
      "learning_rate": 1.8197049611514194e-06,
      "loss": 0.0328,
      "step": 55090
    },
    {
      "epoch": 0.00033624267578125,
      "model_forward_time": 0.1146090030670166,
      "step": 55090
    },
    {
      "epoch": 0.00033624267578125,
      "step": 55090,
      "training_step_time": 0.38327455520629883
    },
    {
      "epoch": 0.000336248779296875,
      "model_forward_time": 0.1136622428894043,
      "step": 55091
    },
    {
      "epoch": 0.000336248779296875,
      "step": 55091,
      "training_step_time": 0.3812375068664551
    },
    {
      "epoch": 0.0003362548828125,
      "model_forward_time": 0.11423277854919434,
      "step": 55092
    },
    {
      "epoch": 0.0003362548828125,
      "step": 55092,
      "training_step_time": 0.4712095260620117
    },
    {
      "epoch": 0.000336260986328125,
      "model_forward_time": 0.11435890197753906,
      "step": 55093
    },
    {
      "epoch": 0.000336260986328125,
      "step": 55093,
      "training_step_time": 0.4716908931732178
    },
    {
      "epoch": 0.00033626708984375,
      "model_forward_time": 0.11426424980163574,
      "step": 55094
    },
    {
      "epoch": 0.00033626708984375,
      "step": 55094,
      "training_step_time": 0.3942434787750244
    },
    {
      "epoch": 0.000336273193359375,
      "model_forward_time": 0.11544990539550781,
      "step": 55095
    },
    {
      "epoch": 0.000336273193359375,
      "step": 55095,
      "training_step_time": 0.5156211853027344
    },
    {
      "epoch": 0.000336279296875,
      "model_forward_time": 0.11478900909423828,
      "step": 55096
    },
    {
      "epoch": 0.000336279296875,
      "step": 55096,
      "training_step_time": 0.38478755950927734
    },
    {
      "epoch": 0.000336285400390625,
      "model_forward_time": 0.11514639854431152,
      "step": 55097
    },
    {
      "epoch": 0.000336285400390625,
      "step": 55097,
      "training_step_time": 0.3903038501739502
    },
    {
      "epoch": 0.00033629150390625,
      "model_forward_time": 0.11511707305908203,
      "step": 55098
    },
    {
      "epoch": 0.00033629150390625,
      "step": 55098,
      "training_step_time": 0.3924582004547119
    },
    {
      "epoch": 0.000336297607421875,
      "model_forward_time": 0.115692138671875,
      "step": 55099
    },
    {
      "epoch": 0.000336297607421875,
      "step": 55099,
      "training_step_time": 0.39232492446899414
    },
    {
      "epoch": 0.0003363037109375,
      "grad_norm": 0.08703792095184326,
      "learning_rate": 1.812345330310916e-06,
      "loss": 0.0349,
      "step": 55100
    },
    {
      "epoch": 0.0003363037109375,
      "model_forward_time": 0.11466646194458008,
      "step": 55100
    },
    {
      "epoch": 0.0003363037109375,
      "step": 55100,
      "training_step_time": 0.3977997303009033
    },
    {
      "epoch": 0.000336309814453125,
      "model_forward_time": 0.11493945121765137,
      "step": 55101
    },
    {
      "epoch": 0.000336309814453125,
      "step": 55101,
      "training_step_time": 0.6495611667633057
    },
    {
      "epoch": 0.00033631591796875,
      "model_forward_time": 0.11541318893432617,
      "step": 55102
    },
    {
      "epoch": 0.00033631591796875,
      "step": 55102,
      "training_step_time": 0.40663838386535645
    },
    {
      "epoch": 0.000336322021484375,
      "model_forward_time": 0.11523246765136719,
      "step": 55103
    },
    {
      "epoch": 0.000336322021484375,
      "step": 55103,
      "training_step_time": 0.39574170112609863
    },
    {
      "epoch": 0.000336328125,
      "model_forward_time": 0.11626076698303223,
      "step": 55104
    },
    {
      "epoch": 0.000336328125,
      "step": 55104,
      "training_step_time": 0.38953375816345215
    },
    {
      "epoch": 0.000336334228515625,
      "model_forward_time": 0.1149759292602539,
      "step": 55105
    },
    {
      "epoch": 0.000336334228515625,
      "step": 55105,
      "training_step_time": 0.4233391284942627
    },
    {
      "epoch": 0.00033634033203125,
      "model_forward_time": 0.11517715454101562,
      "step": 55106
    },
    {
      "epoch": 0.00033634033203125,
      "step": 55106,
      "training_step_time": 0.45824599266052246
    },
    {
      "epoch": 0.000336346435546875,
      "model_forward_time": 0.11540746688842773,
      "step": 55107
    },
    {
      "epoch": 0.000336346435546875,
      "step": 55107,
      "training_step_time": 0.5645220279693604
    },
    {
      "epoch": 0.0003363525390625,
      "model_forward_time": 0.11445236206054688,
      "step": 55108
    },
    {
      "epoch": 0.0003363525390625,
      "step": 55108,
      "training_step_time": 0.47482872009277344
    },
    {
      "epoch": 0.000336358642578125,
      "model_forward_time": 0.11437463760375977,
      "step": 55109
    },
    {
      "epoch": 0.000336358642578125,
      "step": 55109,
      "training_step_time": 0.437237024307251
    },
    {
      "epoch": 0.00033636474609375,
      "grad_norm": 0.09051992744207382,
      "learning_rate": 1.8050003376075707e-06,
      "loss": 0.0375,
      "step": 55110
    },
    {
      "epoch": 0.00033636474609375,
      "model_forward_time": 0.11477923393249512,
      "step": 55110
    },
    {
      "epoch": 0.00033636474609375,
      "step": 55110,
      "training_step_time": 0.3896665573120117
    },
    {
      "epoch": 0.000336370849609375,
      "model_forward_time": 0.11415600776672363,
      "step": 55111
    },
    {
      "epoch": 0.000336370849609375,
      "step": 55111,
      "training_step_time": 0.39777421951293945
    },
    {
      "epoch": 0.000336376953125,
      "model_forward_time": 0.11467742919921875,
      "step": 55112
    },
    {
      "epoch": 0.000336376953125,
      "step": 55112,
      "training_step_time": 0.39024782180786133
    },
    {
      "epoch": 0.000336383056640625,
      "model_forward_time": 0.1153411865234375,
      "step": 55113
    },
    {
      "epoch": 0.000336383056640625,
      "step": 55113,
      "training_step_time": 0.5597312450408936
    },
    {
      "epoch": 0.00033638916015625,
      "model_forward_time": 0.11529946327209473,
      "step": 55114
    },
    {
      "epoch": 0.00033638916015625,
      "step": 55114,
      "training_step_time": 0.3898594379425049
    },
    {
      "epoch": 0.000336395263671875,
      "model_forward_time": 0.11473751068115234,
      "step": 55115
    },
    {
      "epoch": 0.000336395263671875,
      "step": 55115,
      "training_step_time": 0.38159751892089844
    },
    {
      "epoch": 0.0003364013671875,
      "model_forward_time": 0.11470651626586914,
      "step": 55116
    },
    {
      "epoch": 0.0003364013671875,
      "step": 55116,
      "training_step_time": 0.3917577266693115
    },
    {
      "epoch": 0.000336407470703125,
      "model_forward_time": 0.11577916145324707,
      "step": 55117
    },
    {
      "epoch": 0.000336407470703125,
      "step": 55117,
      "training_step_time": 0.39322733879089355
    },
    {
      "epoch": 0.00033641357421875,
      "model_forward_time": 0.1156930923461914,
      "step": 55118
    },
    {
      "epoch": 0.00033641357421875,
      "step": 55118,
      "training_step_time": 0.40295863151550293
    },
    {
      "epoch": 0.000336419677734375,
      "model_forward_time": 0.11530184745788574,
      "step": 55119
    },
    {
      "epoch": 0.000336419677734375,
      "step": 55119,
      "training_step_time": 0.5885167121887207
    },
    {
      "epoch": 0.00033642578125,
      "grad_norm": 0.10893790423870087,
      "learning_rate": 1.7976699852726153e-06,
      "loss": 0.0417,
      "step": 55120
    },
    {
      "epoch": 0.00033642578125,
      "model_forward_time": 0.11478996276855469,
      "step": 55120
    },
    {
      "epoch": 0.00033642578125,
      "step": 55120,
      "training_step_time": 0.41211438179016113
    },
    {
      "epoch": 0.000336431884765625,
      "model_forward_time": 0.11455512046813965,
      "step": 55121
    },
    {
      "epoch": 0.000336431884765625,
      "step": 55121,
      "training_step_time": 0.3713052272796631
    },
    {
      "epoch": 0.00033643798828125,
      "model_forward_time": 0.11544036865234375,
      "step": 55122
    },
    {
      "epoch": 0.00033643798828125,
      "step": 55122,
      "training_step_time": 0.40667247772216797
    },
    {
      "epoch": 0.000336444091796875,
      "model_forward_time": 0.11518096923828125,
      "step": 55123
    },
    {
      "epoch": 0.000336444091796875,
      "step": 55123,
      "training_step_time": 0.48775529861450195
    },
    {
      "epoch": 0.0003364501953125,
      "model_forward_time": 0.11439299583435059,
      "step": 55124
    },
    {
      "epoch": 0.0003364501953125,
      "step": 55124,
      "training_step_time": 0.3900735378265381
    },
    {
      "epoch": 0.000336456298828125,
      "model_forward_time": 0.11531543731689453,
      "step": 55125
    },
    {
      "epoch": 0.000336456298828125,
      "step": 55125,
      "training_step_time": 0.39571309089660645
    },
    {
      "epoch": 0.00033646240234375,
      "model_forward_time": 0.11432623863220215,
      "step": 55126
    },
    {
      "epoch": 0.00033646240234375,
      "step": 55126,
      "training_step_time": 0.39102840423583984
    },
    {
      "epoch": 0.000336468505859375,
      "model_forward_time": 0.11533355712890625,
      "step": 55127
    },
    {
      "epoch": 0.000336468505859375,
      "step": 55127,
      "training_step_time": 0.3920426368713379
    },
    {
      "epoch": 0.000336474609375,
      "model_forward_time": 0.11459136009216309,
      "step": 55128
    },
    {
      "epoch": 0.000336474609375,
      "step": 55128,
      "training_step_time": 0.3955550193786621
    },
    {
      "epoch": 0.000336480712890625,
      "model_forward_time": 0.11547589302062988,
      "step": 55129
    },
    {
      "epoch": 0.000336480712890625,
      "step": 55129,
      "training_step_time": 0.3928096294403076
    },
    {
      "epoch": 0.00033648681640625,
      "grad_norm": 0.14860060811042786,
      "learning_rate": 1.7903542755328073e-06,
      "loss": 0.0357,
      "step": 55130
    },
    {
      "epoch": 0.00033648681640625,
      "model_forward_time": 0.11521363258361816,
      "step": 55130
    },
    {
      "epoch": 0.00033648681640625,
      "step": 55130,
      "training_step_time": 0.40017199516296387
    },
    {
      "epoch": 0.000336492919921875,
      "model_forward_time": 0.11567425727844238,
      "step": 55131
    },
    {
      "epoch": 0.000336492919921875,
      "step": 55131,
      "training_step_time": 0.7630984783172607
    },
    {
      "epoch": 0.0003364990234375,
      "model_forward_time": 0.11438202857971191,
      "step": 55132
    },
    {
      "epoch": 0.0003364990234375,
      "step": 55132,
      "training_step_time": 0.40389370918273926
    },
    {
      "epoch": 0.000336505126953125,
      "model_forward_time": 0.11404919624328613,
      "step": 55133
    },
    {
      "epoch": 0.000336505126953125,
      "step": 55133,
      "training_step_time": 0.4463965892791748
    },
    {
      "epoch": 0.00033651123046875,
      "model_forward_time": 0.11440491676330566,
      "step": 55134
    },
    {
      "epoch": 0.00033651123046875,
      "step": 55134,
      "training_step_time": 0.427304744720459
    },
    {
      "epoch": 0.000336517333984375,
      "model_forward_time": 0.11438655853271484,
      "step": 55135
    },
    {
      "epoch": 0.000336517333984375,
      "step": 55135,
      "training_step_time": 0.5063023567199707
    },
    {
      "epoch": 0.0003365234375,
      "model_forward_time": 0.11463809013366699,
      "step": 55136
    },
    {
      "epoch": 0.0003365234375,
      "step": 55136,
      "training_step_time": 0.4165630340576172
    },
    {
      "epoch": 0.000336529541015625,
      "model_forward_time": 0.11551737785339355,
      "step": 55137
    },
    {
      "epoch": 0.000336529541015625,
      "step": 55137,
      "training_step_time": 0.5189940929412842
    },
    {
      "epoch": 0.00033653564453125,
      "model_forward_time": 0.11494135856628418,
      "step": 55138
    },
    {
      "epoch": 0.00033653564453125,
      "step": 55138,
      "training_step_time": 0.3782501220703125
    },
    {
      "epoch": 0.000336541748046875,
      "model_forward_time": 0.11439943313598633,
      "step": 55139
    },
    {
      "epoch": 0.000336541748046875,
      "step": 55139,
      "training_step_time": 0.3729276657104492
    },
    {
      "epoch": 0.0003365478515625,
      "grad_norm": 0.12835641205310822,
      "learning_rate": 1.7830532106104747e-06,
      "loss": 0.0371,
      "step": 55140
    },
    {
      "epoch": 0.0003365478515625,
      "model_forward_time": 0.11389017105102539,
      "step": 55140
    },
    {
      "epoch": 0.0003365478515625,
      "step": 55140,
      "training_step_time": 0.395108699798584
    },
    {
      "epoch": 0.000336553955078125,
      "model_forward_time": 0.11500096321105957,
      "step": 55141
    },
    {
      "epoch": 0.000336553955078125,
      "step": 55141,
      "training_step_time": 0.3914175033569336
    },
    {
      "epoch": 0.00033656005859375,
      "model_forward_time": 0.11441612243652344,
      "step": 55142
    },
    {
      "epoch": 0.00033656005859375,
      "step": 55142,
      "training_step_time": 0.38674497604370117
    },
    {
      "epoch": 0.000336566162109375,
      "model_forward_time": 0.11580061912536621,
      "step": 55143
    },
    {
      "epoch": 0.000336566162109375,
      "step": 55143,
      "training_step_time": 1.1132190227508545
    },
    {
      "epoch": 0.000336572265625,
      "model_forward_time": 0.11394166946411133,
      "step": 55144
    },
    {
      "epoch": 0.000336572265625,
      "step": 55144,
      "training_step_time": 0.4136073589324951
    },
    {
      "epoch": 0.000336578369140625,
      "model_forward_time": 0.11471962928771973,
      "step": 55145
    },
    {
      "epoch": 0.000336578369140625,
      "step": 55145,
      "training_step_time": 0.3852388858795166
    },
    {
      "epoch": 0.00033658447265625,
      "model_forward_time": 0.11419558525085449,
      "step": 55146
    },
    {
      "epoch": 0.00033658447265625,
      "step": 55146,
      "training_step_time": 0.3904440402984619
    },
    {
      "epoch": 0.000336590576171875,
      "model_forward_time": 0.11391735076904297,
      "step": 55147
    },
    {
      "epoch": 0.000336590576171875,
      "step": 55147,
      "training_step_time": 0.39664554595947266
    },
    {
      "epoch": 0.0003365966796875,
      "model_forward_time": 0.1140291690826416,
      "step": 55148
    },
    {
      "epoch": 0.0003365966796875,
      "step": 55148,
      "training_step_time": 0.4690542221069336
    },
    {
      "epoch": 0.000336602783203125,
      "model_forward_time": 0.11521601676940918,
      "step": 55149
    },
    {
      "epoch": 0.000336602783203125,
      "step": 55149,
      "training_step_time": 0.724700927734375
    },
    {
      "epoch": 0.00033660888671875,
      "grad_norm": 0.06059817969799042,
      "learning_rate": 1.7757667927234767e-06,
      "loss": 0.0306,
      "step": 55150
    },
    {
      "epoch": 0.00033660888671875,
      "model_forward_time": 0.11401629447937012,
      "step": 55150
    },
    {
      "epoch": 0.00033660888671875,
      "step": 55150,
      "training_step_time": 0.4762551784515381
    },
    {
      "epoch": 0.000336614990234375,
      "model_forward_time": 0.11490345001220703,
      "step": 55151
    },
    {
      "epoch": 0.000336614990234375,
      "step": 55151,
      "training_step_time": 0.39208483695983887
    },
    {
      "epoch": 0.00033662109375,
      "model_forward_time": 0.11492776870727539,
      "step": 55152
    },
    {
      "epoch": 0.00033662109375,
      "step": 55152,
      "training_step_time": 0.39264583587646484
    },
    {
      "epoch": 0.000336627197265625,
      "model_forward_time": 0.11419939994812012,
      "step": 55153
    },
    {
      "epoch": 0.000336627197265625,
      "step": 55153,
      "training_step_time": 0.39041876792907715
    },
    {
      "epoch": 0.00033663330078125,
      "model_forward_time": 0.11494898796081543,
      "step": 55154
    },
    {
      "epoch": 0.00033663330078125,
      "step": 55154,
      "training_step_time": 0.38031482696533203
    },
    {
      "epoch": 0.000336639404296875,
      "model_forward_time": 0.11444783210754395,
      "step": 55155
    },
    {
      "epoch": 0.000336639404296875,
      "step": 55155,
      "training_step_time": 0.8159980773925781
    },
    {
      "epoch": 0.0003366455078125,
      "model_forward_time": 0.11519789695739746,
      "step": 55156
    },
    {
      "epoch": 0.0003366455078125,
      "step": 55156,
      "training_step_time": 0.44141316413879395
    },
    {
      "epoch": 0.000336651611328125,
      "model_forward_time": 0.11425018310546875,
      "step": 55157
    },
    {
      "epoch": 0.000336651611328125,
      "step": 55157,
      "training_step_time": 0.394939661026001
    },
    {
      "epoch": 0.00033665771484375,
      "model_forward_time": 0.11478400230407715,
      "step": 55158
    },
    {
      "epoch": 0.00033665771484375,
      "step": 55158,
      "training_step_time": 0.39243030548095703
    },
    {
      "epoch": 0.000336663818359375,
      "model_forward_time": 0.11426305770874023,
      "step": 55159
    },
    {
      "epoch": 0.000336663818359375,
      "step": 55159,
      "training_step_time": 0.39392781257629395
    },
    {
      "epoch": 0.000336669921875,
      "grad_norm": 0.10614456981420517,
      "learning_rate": 1.7684950240852372e-06,
      "loss": 0.0344,
      "step": 55160
    },
    {
      "epoch": 0.000336669921875,
      "model_forward_time": 0.11497020721435547,
      "step": 55160
    },
    {
      "epoch": 0.000336669921875,
      "step": 55160,
      "training_step_time": 0.4370131492614746
    },
    {
      "epoch": 0.000336676025390625,
      "model_forward_time": 0.11530804634094238,
      "step": 55161
    },
    {
      "epoch": 0.000336676025390625,
      "step": 55161,
      "training_step_time": 0.9179511070251465
    },
    {
      "epoch": 0.00033668212890625,
      "model_forward_time": 0.11445903778076172,
      "step": 55162
    },
    {
      "epoch": 0.00033668212890625,
      "step": 55162,
      "training_step_time": 0.4829826354980469
    },
    {
      "epoch": 0.000336688232421875,
      "model_forward_time": 0.11446475982666016,
      "step": 55163
    },
    {
      "epoch": 0.000336688232421875,
      "step": 55163,
      "training_step_time": 0.41492199897766113
    },
    {
      "epoch": 0.0003366943359375,
      "model_forward_time": 0.11428093910217285,
      "step": 55164
    },
    {
      "epoch": 0.0003366943359375,
      "step": 55164,
      "training_step_time": 0.38236546516418457
    },
    {
      "epoch": 0.000336700439453125,
      "model_forward_time": 0.1142122745513916,
      "step": 55165
    },
    {
      "epoch": 0.000336700439453125,
      "step": 55165,
      "training_step_time": 0.3865070343017578
    },
    {
      "epoch": 0.00033670654296875,
      "model_forward_time": 0.11360883712768555,
      "step": 55166
    },
    {
      "epoch": 0.00033670654296875,
      "step": 55166,
      "training_step_time": 0.3790311813354492
    },
    {
      "epoch": 0.000336712646484375,
      "model_forward_time": 0.11582756042480469,
      "step": 55167
    },
    {
      "epoch": 0.000336712646484375,
      "step": 55167,
      "training_step_time": 0.6610162258148193
    },
    {
      "epoch": 0.00033671875,
      "model_forward_time": 0.1148672103881836,
      "step": 55168
    },
    {
      "epoch": 0.00033671875,
      "step": 55168,
      "training_step_time": 0.42063236236572266
    },
    {
      "epoch": 0.000336724853515625,
      "model_forward_time": 0.114837646484375,
      "step": 55169
    },
    {
      "epoch": 0.000336724853515625,
      "step": 55169,
      "training_step_time": 0.39754819869995117
    },
    {
      "epoch": 0.00033673095703125,
      "grad_norm": 0.09230949729681015,
      "learning_rate": 1.7612379069047335e-06,
      "loss": 0.0346,
      "step": 55170
    },
    {
      "epoch": 0.00033673095703125,
      "model_forward_time": 0.11464405059814453,
      "step": 55170
    },
    {
      "epoch": 0.00033673095703125,
      "step": 55170,
      "training_step_time": 0.405214786529541
    },
    {
      "epoch": 0.000336737060546875,
      "model_forward_time": 0.11492156982421875,
      "step": 55171
    },
    {
      "epoch": 0.000336737060546875,
      "step": 55171,
      "training_step_time": 0.39975905418395996
    },
    {
      "epoch": 0.0003367431640625,
      "model_forward_time": 0.1151888370513916,
      "step": 55172
    },
    {
      "epoch": 0.0003367431640625,
      "step": 55172,
      "training_step_time": 0.4849662780761719
    },
    {
      "epoch": 0.000336749267578125,
      "model_forward_time": 0.11489462852478027,
      "step": 55173
    },
    {
      "epoch": 0.000336749267578125,
      "step": 55173,
      "training_step_time": 0.8603770732879639
    },
    {
      "epoch": 0.00033675537109375,
      "model_forward_time": 0.11528182029724121,
      "step": 55174
    },
    {
      "epoch": 0.00033675537109375,
      "step": 55174,
      "training_step_time": 0.42514920234680176
    },
    {
      "epoch": 0.000336761474609375,
      "model_forward_time": 0.11484384536743164,
      "step": 55175
    },
    {
      "epoch": 0.000336761474609375,
      "step": 55175,
      "training_step_time": 0.5213522911071777
    },
    {
      "epoch": 0.000336767578125,
      "model_forward_time": 0.11472749710083008,
      "step": 55176
    },
    {
      "epoch": 0.000336767578125,
      "step": 55176,
      "training_step_time": 0.4143855571746826
    },
    {
      "epoch": 0.000336773681640625,
      "model_forward_time": 0.11434292793273926,
      "step": 55177
    },
    {
      "epoch": 0.000336773681640625,
      "step": 55177,
      "training_step_time": 0.3850278854370117
    },
    {
      "epoch": 0.00033677978515625,
      "model_forward_time": 0.11434030532836914,
      "step": 55178
    },
    {
      "epoch": 0.00033677978515625,
      "step": 55178,
      "training_step_time": 0.3831968307495117
    },
    {
      "epoch": 0.000336785888671875,
      "model_forward_time": 0.11469268798828125,
      "step": 55179
    },
    {
      "epoch": 0.000336785888671875,
      "step": 55179,
      "training_step_time": 0.6347367763519287
    },
    {
      "epoch": 0.0003367919921875,
      "grad_norm": 0.07678885012865067,
      "learning_rate": 1.7539954433864858e-06,
      "loss": 0.0343,
      "step": 55180
    },
    {
      "epoch": 0.0003367919921875,
      "model_forward_time": 0.11523914337158203,
      "step": 55180
    },
    {
      "epoch": 0.0003367919921875,
      "step": 55180,
      "training_step_time": 0.39695310592651367
    },
    {
      "epoch": 0.000336798095703125,
      "model_forward_time": 0.11481332778930664,
      "step": 55181
    },
    {
      "epoch": 0.000336798095703125,
      "step": 55181,
      "training_step_time": 0.3999147415161133
    },
    {
      "epoch": 0.00033680419921875,
      "model_forward_time": 0.11438584327697754,
      "step": 55182
    },
    {
      "epoch": 0.00033680419921875,
      "step": 55182,
      "training_step_time": 0.3891568183898926
    },
    {
      "epoch": 0.000336810302734375,
      "model_forward_time": 0.11523675918579102,
      "step": 55183
    },
    {
      "epoch": 0.000336810302734375,
      "step": 55183,
      "training_step_time": 0.3983733654022217
    },
    {
      "epoch": 0.00033681640625,
      "model_forward_time": 0.11496400833129883,
      "step": 55184
    },
    {
      "epoch": 0.00033681640625,
      "step": 55184,
      "training_step_time": 0.3987143039703369
    },
    {
      "epoch": 0.000336822509765625,
      "model_forward_time": 0.1155233383178711,
      "step": 55185
    },
    {
      "epoch": 0.000336822509765625,
      "step": 55185,
      "training_step_time": 1.084519386291504
    },
    {
      "epoch": 0.00033682861328125,
      "model_forward_time": 0.11407184600830078,
      "step": 55186
    },
    {
      "epoch": 0.00033682861328125,
      "step": 55186,
      "training_step_time": 0.41884756088256836
    },
    {
      "epoch": 0.000336834716796875,
      "model_forward_time": 0.11435103416442871,
      "step": 55187
    },
    {
      "epoch": 0.000336834716796875,
      "step": 55187,
      "training_step_time": 0.4443495273590088
    },
    {
      "epoch": 0.0003368408203125,
      "model_forward_time": 0.11460590362548828,
      "step": 55188
    },
    {
      "epoch": 0.0003368408203125,
      "step": 55188,
      "training_step_time": 0.4780611991882324
    },
    {
      "epoch": 0.000336846923828125,
      "model_forward_time": 0.11456704139709473,
      "step": 55189
    },
    {
      "epoch": 0.000336846923828125,
      "step": 55189,
      "training_step_time": 0.45191431045532227
    },
    {
      "epoch": 0.00033685302734375,
      "grad_norm": 0.13521958887577057,
      "learning_rate": 1.7467676357305561e-06,
      "loss": 0.0375,
      "step": 55190
    },
    {
      "epoch": 0.00033685302734375,
      "model_forward_time": 0.11450505256652832,
      "step": 55190
    },
    {
      "epoch": 0.00033685302734375,
      "step": 55190,
      "training_step_time": 0.39930033683776855
    },
    {
      "epoch": 0.000336859130859375,
      "model_forward_time": 0.11435580253601074,
      "step": 55191
    },
    {
      "epoch": 0.000336859130859375,
      "step": 55191,
      "training_step_time": 0.5887711048126221
    },
    {
      "epoch": 0.000336865234375,
      "model_forward_time": 0.11456918716430664,
      "step": 55192
    },
    {
      "epoch": 0.000336865234375,
      "step": 55192,
      "training_step_time": 0.42751622200012207
    },
    {
      "epoch": 0.000336871337890625,
      "model_forward_time": 0.1150510311126709,
      "step": 55193
    },
    {
      "epoch": 0.000336871337890625,
      "step": 55193,
      "training_step_time": 0.4129903316497803
    },
    {
      "epoch": 0.00033687744140625,
      "model_forward_time": 0.11509466171264648,
      "step": 55194
    },
    {
      "epoch": 0.00033687744140625,
      "step": 55194,
      "training_step_time": 0.3905344009399414
    },
    {
      "epoch": 0.000336883544921875,
      "model_forward_time": 0.11449909210205078,
      "step": 55195
    },
    {
      "epoch": 0.000336883544921875,
      "step": 55195,
      "training_step_time": 0.403395414352417
    },
    {
      "epoch": 0.0003368896484375,
      "model_forward_time": 0.11496496200561523,
      "step": 55196
    },
    {
      "epoch": 0.0003368896484375,
      "step": 55196,
      "training_step_time": 0.3864095211029053
    },
    {
      "epoch": 0.000336895751953125,
      "model_forward_time": 0.1154165267944336,
      "step": 55197
    },
    {
      "epoch": 0.000336895751953125,
      "step": 55197,
      "training_step_time": 0.46436166763305664
    },
    {
      "epoch": 0.00033690185546875,
      "model_forward_time": 0.11513209342956543,
      "step": 55198
    },
    {
      "epoch": 0.00033690185546875,
      "step": 55198,
      "training_step_time": 0.49294590950012207
    },
    {
      "epoch": 0.000336907958984375,
      "model_forward_time": 0.11478304862976074,
      "step": 55199
    },
    {
      "epoch": 0.000336907958984375,
      "step": 55199,
      "training_step_time": 0.3918135166168213
    },
    {
      "epoch": 0.0003369140625,
      "grad_norm": 0.12023141235113144,
      "learning_rate": 1.7395544861325718e-06,
      "loss": 0.0384,
      "step": 55200
    },
    {
      "epoch": 0.0003369140625,
      "model_forward_time": 0.11504364013671875,
      "step": 55200
    },
    {
      "epoch": 0.0003369140625,
      "step": 55200,
      "training_step_time": 0.42235326766967773
    },
    {
      "epoch": 0.000336920166015625,
      "model_forward_time": 0.11463761329650879,
      "step": 55201
    },
    {
      "epoch": 0.000336920166015625,
      "step": 55201,
      "training_step_time": 0.4567737579345703
    },
    {
      "epoch": 0.00033692626953125,
      "model_forward_time": 0.11480832099914551,
      "step": 55202
    },
    {
      "epoch": 0.00033692626953125,
      "step": 55202,
      "training_step_time": 0.4966471195220947
    },
    {
      "epoch": 0.000336932373046875,
      "model_forward_time": 0.11497807502746582,
      "step": 55203
    },
    {
      "epoch": 0.000336932373046875,
      "step": 55203,
      "training_step_time": 0.43381524085998535
    },
    {
      "epoch": 0.0003369384765625,
      "model_forward_time": 0.11486601829528809,
      "step": 55204
    },
    {
      "epoch": 0.0003369384765625,
      "step": 55204,
      "training_step_time": 0.3965644836425781
    },
    {
      "epoch": 0.000336944580078125,
      "model_forward_time": 0.11445307731628418,
      "step": 55205
    },
    {
      "epoch": 0.000336944580078125,
      "step": 55205,
      "training_step_time": 0.38809633255004883
    },
    {
      "epoch": 0.00033695068359375,
      "model_forward_time": 0.11482667922973633,
      "step": 55206
    },
    {
      "epoch": 0.00033695068359375,
      "step": 55206,
      "training_step_time": 0.4601724147796631
    },
    {
      "epoch": 0.000336956787109375,
      "model_forward_time": 0.11531615257263184,
      "step": 55207
    },
    {
      "epoch": 0.000336956787109375,
      "step": 55207,
      "training_step_time": 0.3908884525299072
    },
    {
      "epoch": 0.000336962890625,
      "model_forward_time": 0.11553096771240234,
      "step": 55208
    },
    {
      "epoch": 0.000336962890625,
      "step": 55208,
      "training_step_time": 0.3865227699279785
    },
    {
      "epoch": 0.000336968994140625,
      "model_forward_time": 0.11519169807434082,
      "step": 55209
    },
    {
      "epoch": 0.000336968994140625,
      "step": 55209,
      "training_step_time": 0.3952908515930176
    },
    {
      "epoch": 0.00033697509765625,
      "grad_norm": 0.10492812097072601,
      "learning_rate": 1.73235599678368e-06,
      "loss": 0.0335,
      "step": 55210
    },
    {
      "epoch": 0.00033697509765625,
      "model_forward_time": 0.11479616165161133,
      "step": 55210
    },
    {
      "epoch": 0.00033697509765625,
      "step": 55210,
      "training_step_time": 0.39056921005249023
    },
    {
      "epoch": 0.000336981201171875,
      "model_forward_time": 0.11438274383544922,
      "step": 55211
    },
    {
      "epoch": 0.000336981201171875,
      "step": 55211,
      "training_step_time": 0.40579986572265625
    },
    {
      "epoch": 0.0003369873046875,
      "model_forward_time": 0.11532449722290039,
      "step": 55212
    },
    {
      "epoch": 0.0003369873046875,
      "step": 55212,
      "training_step_time": 0.47214436531066895
    },
    {
      "epoch": 0.000336993408203125,
      "model_forward_time": 0.1148691177368164,
      "step": 55213
    },
    {
      "epoch": 0.000336993408203125,
      "step": 55213,
      "training_step_time": 0.41379547119140625
    },
    {
      "epoch": 0.00033699951171875,
      "model_forward_time": 0.1151893138885498,
      "step": 55214
    },
    {
      "epoch": 0.00033699951171875,
      "step": 55214,
      "training_step_time": 0.4257354736328125
    },
    {
      "epoch": 0.000337005615234375,
      "model_forward_time": 0.11506247520446777,
      "step": 55215
    },
    {
      "epoch": 0.000337005615234375,
      "step": 55215,
      "training_step_time": 0.4129934310913086
    },
    {
      "epoch": 0.00033701171875,
      "model_forward_time": 0.11515378952026367,
      "step": 55216
    },
    {
      "epoch": 0.00033701171875,
      "step": 55216,
      "training_step_time": 0.5153415203094482
    },
    {
      "epoch": 0.000337017822265625,
      "model_forward_time": 0.11493945121765137,
      "step": 55217
    },
    {
      "epoch": 0.000337017822265625,
      "step": 55217,
      "training_step_time": 0.5006983280181885
    },
    {
      "epoch": 0.00033702392578125,
      "model_forward_time": 0.11477971076965332,
      "step": 55218
    },
    {
      "epoch": 0.00033702392578125,
      "step": 55218,
      "training_step_time": 0.3993372917175293
    },
    {
      "epoch": 0.000337030029296875,
      "model_forward_time": 0.11527037620544434,
      "step": 55219
    },
    {
      "epoch": 0.000337030029296875,
      "step": 55219,
      "training_step_time": 0.38250136375427246
    },
    {
      "epoch": 0.0003370361328125,
      "grad_norm": 0.08845877647399902,
      "learning_rate": 1.7251721698706147e-06,
      "loss": 0.0349,
      "step": 55220
    },
    {
      "epoch": 0.0003370361328125,
      "model_forward_time": 0.11504507064819336,
      "step": 55220
    },
    {
      "epoch": 0.0003370361328125,
      "step": 55220,
      "training_step_time": 0.4184134006500244
    },
    {
      "epoch": 0.000337042236328125,
      "model_forward_time": 0.11466765403747559,
      "step": 55221
    },
    {
      "epoch": 0.000337042236328125,
      "step": 55221,
      "training_step_time": 0.41903233528137207
    },
    {
      "epoch": 0.00033704833984375,
      "model_forward_time": 0.11444735527038574,
      "step": 55222
    },
    {
      "epoch": 0.00033704833984375,
      "step": 55222,
      "training_step_time": 0.395643949508667
    },
    {
      "epoch": 0.000337054443359375,
      "model_forward_time": 0.11560940742492676,
      "step": 55223
    },
    {
      "epoch": 0.000337054443359375,
      "step": 55223,
      "training_step_time": 0.3963484764099121
    },
    {
      "epoch": 0.000337060546875,
      "model_forward_time": 0.11449813842773438,
      "step": 55224
    },
    {
      "epoch": 0.000337060546875,
      "step": 55224,
      "training_step_time": 0.4209911823272705
    },
    {
      "epoch": 0.000337066650390625,
      "model_forward_time": 0.11507248878479004,
      "step": 55225
    },
    {
      "epoch": 0.000337066650390625,
      "step": 55225,
      "training_step_time": 0.39974260330200195
    },
    {
      "epoch": 0.00033707275390625,
      "model_forward_time": 0.11531257629394531,
      "step": 55226
    },
    {
      "epoch": 0.00033707275390625,
      "step": 55226,
      "training_step_time": 0.4153866767883301
    },
    {
      "epoch": 0.000337078857421875,
      "model_forward_time": 0.11497855186462402,
      "step": 55227
    },
    {
      "epoch": 0.000337078857421875,
      "step": 55227,
      "training_step_time": 0.42067646980285645
    },
    {
      "epoch": 0.0003370849609375,
      "model_forward_time": 0.11513447761535645,
      "step": 55228
    },
    {
      "epoch": 0.0003370849609375,
      "step": 55228,
      "training_step_time": 0.392772912979126
    },
    {
      "epoch": 0.000337091064453125,
      "model_forward_time": 0.11534285545349121,
      "step": 55229
    },
    {
      "epoch": 0.000337091064453125,
      "step": 55229,
      "training_step_time": 0.5390799045562744
    },
    {
      "epoch": 0.00033709716796875,
      "grad_norm": 0.11270442605018616,
      "learning_rate": 1.7180030075756136e-06,
      "loss": 0.0376,
      "step": 55230
    },
    {
      "epoch": 0.00033709716796875,
      "model_forward_time": 0.11588025093078613,
      "step": 55230
    },
    {
      "epoch": 0.00033709716796875,
      "step": 55230,
      "training_step_time": 0.4748356342315674
    },
    {
      "epoch": 0.000337103271484375,
      "model_forward_time": 0.11516118049621582,
      "step": 55231
    },
    {
      "epoch": 0.000337103271484375,
      "step": 55231,
      "training_step_time": 0.4695582389831543
    },
    {
      "epoch": 0.000337109375,
      "model_forward_time": 0.11495018005371094,
      "step": 55232
    },
    {
      "epoch": 0.000337109375,
      "step": 55232,
      "training_step_time": 0.41132473945617676
    },
    {
      "epoch": 0.000337115478515625,
      "model_forward_time": 0.1158607006072998,
      "step": 55233
    },
    {
      "epoch": 0.000337115478515625,
      "step": 55233,
      "training_step_time": 0.38493847846984863
    },
    {
      "epoch": 0.00033712158203125,
      "model_forward_time": 0.11484026908874512,
      "step": 55234
    },
    {
      "epoch": 0.00033712158203125,
      "step": 55234,
      "training_step_time": 0.37651896476745605
    },
    {
      "epoch": 0.000337127685546875,
      "model_forward_time": 0.11482572555541992,
      "step": 55235
    },
    {
      "epoch": 0.000337127685546875,
      "step": 55235,
      "training_step_time": 0.38036274909973145
    },
    {
      "epoch": 0.0003371337890625,
      "model_forward_time": 0.11566638946533203,
      "step": 55236
    },
    {
      "epoch": 0.0003371337890625,
      "step": 55236,
      "training_step_time": 0.40363430976867676
    },
    {
      "epoch": 0.000337139892578125,
      "model_forward_time": 0.11542582511901855,
      "step": 55237
    },
    {
      "epoch": 0.000337139892578125,
      "step": 55237,
      "training_step_time": 0.38387131690979004
    },
    {
      "epoch": 0.00033714599609375,
      "model_forward_time": 0.11527633666992188,
      "step": 55238
    },
    {
      "epoch": 0.00033714599609375,
      "step": 55238,
      "training_step_time": 0.38341546058654785
    },
    {
      "epoch": 0.000337152099609375,
      "model_forward_time": 0.11575722694396973,
      "step": 55239
    },
    {
      "epoch": 0.000337152099609375,
      "step": 55239,
      "training_step_time": 0.8676345348358154
    },
    {
      "epoch": 0.000337158203125,
      "grad_norm": 0.10512810200452805,
      "learning_rate": 1.7108485120764905e-06,
      "loss": 0.0378,
      "step": 55240
    },
    {
      "epoch": 0.000337158203125,
      "model_forward_time": 0.11479353904724121,
      "step": 55240
    },
    {
      "epoch": 0.000337158203125,
      "step": 55240,
      "training_step_time": 0.3875846862792969
    },
    {
      "epoch": 0.000337164306640625,
      "model_forward_time": 0.11385703086853027,
      "step": 55241
    },
    {
      "epoch": 0.000337164306640625,
      "step": 55241,
      "training_step_time": 0.39826130867004395
    },
    {
      "epoch": 0.00033717041015625,
      "model_forward_time": 0.11357545852661133,
      "step": 55242
    },
    {
      "epoch": 0.00033717041015625,
      "step": 55242,
      "training_step_time": 0.42266416549682617
    },
    {
      "epoch": 0.000337176513671875,
      "model_forward_time": 0.11437106132507324,
      "step": 55243
    },
    {
      "epoch": 0.000337176513671875,
      "step": 55243,
      "training_step_time": 0.41672444343566895
    },
    {
      "epoch": 0.0003371826171875,
      "model_forward_time": 0.11410403251647949,
      "step": 55244
    },
    {
      "epoch": 0.0003371826171875,
      "step": 55244,
      "training_step_time": 0.4245460033416748
    },
    {
      "epoch": 0.000337188720703125,
      "model_forward_time": 0.11541175842285156,
      "step": 55245
    },
    {
      "epoch": 0.000337188720703125,
      "step": 55245,
      "training_step_time": 0.8547604084014893
    },
    {
      "epoch": 0.00033719482421875,
      "model_forward_time": 0.11420607566833496,
      "step": 55246
    },
    {
      "epoch": 0.00033719482421875,
      "step": 55246,
      "training_step_time": 0.39733195304870605
    },
    {
      "epoch": 0.000337200927734375,
      "model_forward_time": 0.11456179618835449,
      "step": 55247
    },
    {
      "epoch": 0.000337200927734375,
      "step": 55247,
      "training_step_time": 0.38483452796936035
    },
    {
      "epoch": 0.00033720703125,
      "model_forward_time": 0.11407065391540527,
      "step": 55248
    },
    {
      "epoch": 0.00033720703125,
      "step": 55248,
      "training_step_time": 0.3895294666290283
    },
    {
      "epoch": 0.000337213134765625,
      "model_forward_time": 0.11410284042358398,
      "step": 55249
    },
    {
      "epoch": 0.000337213134765625,
      "step": 55249,
      "training_step_time": 0.38622212409973145
    },
    {
      "epoch": 0.00033721923828125,
      "grad_norm": 0.09956254810094833,
      "learning_rate": 1.70370868554659e-06,
      "loss": 0.0393,
      "step": 55250
    },
    {
      "epoch": 0.00033721923828125,
      "model_forward_time": 0.11467409133911133,
      "step": 55250
    },
    {
      "epoch": 0.00033721923828125,
      "step": 55250,
      "training_step_time": 0.38590383529663086
    },
    {
      "epoch": 0.000337225341796875,
      "model_forward_time": 0.11443614959716797,
      "step": 55251
    },
    {
      "epoch": 0.000337225341796875,
      "step": 55251,
      "training_step_time": 0.8899717330932617
    },
    {
      "epoch": 0.0003372314453125,
      "model_forward_time": 0.11417460441589355,
      "step": 55252
    },
    {
      "epoch": 0.0003372314453125,
      "step": 55252,
      "training_step_time": 0.38663530349731445
    },
    {
      "epoch": 0.000337237548828125,
      "model_forward_time": 0.11390972137451172,
      "step": 55253
    },
    {
      "epoch": 0.000337237548828125,
      "step": 55253,
      "training_step_time": 0.38634300231933594
    },
    {
      "epoch": 0.00033724365234375,
      "model_forward_time": 0.11419105529785156,
      "step": 55254
    },
    {
      "epoch": 0.00033724365234375,
      "step": 55254,
      "training_step_time": 0.48327136039733887
    },
    {
      "epoch": 0.000337249755859375,
      "model_forward_time": 0.11431455612182617,
      "step": 55255
    },
    {
      "epoch": 0.000337249755859375,
      "step": 55255,
      "training_step_time": 0.4763448238372803
    },
    {
      "epoch": 0.000337255859375,
      "model_forward_time": 0.11379837989807129,
      "step": 55256
    },
    {
      "epoch": 0.000337255859375,
      "step": 55256,
      "training_step_time": 0.374983549118042
    },
    {
      "epoch": 0.000337261962890625,
      "model_forward_time": 0.1145627498626709,
      "step": 55257
    },
    {
      "epoch": 0.000337261962890625,
      "step": 55257,
      "training_step_time": 0.7592258453369141
    },
    {
      "epoch": 0.00033726806640625,
      "model_forward_time": 0.11437773704528809,
      "step": 55258
    },
    {
      "epoch": 0.00033726806640625,
      "step": 55258,
      "training_step_time": 0.4025886058807373
    },
    {
      "epoch": 0.000337274169921875,
      "model_forward_time": 0.11411476135253906,
      "step": 55259
    },
    {
      "epoch": 0.000337274169921875,
      "step": 55259,
      "training_step_time": 0.3898158073425293
    },
    {
      "epoch": 0.0003372802734375,
      "grad_norm": 0.09362930059432983,
      "learning_rate": 1.696583530154794e-06,
      "loss": 0.0377,
      "step": 55260
    },
    {
      "epoch": 0.0003372802734375,
      "model_forward_time": 0.11427021026611328,
      "step": 55260
    },
    {
      "epoch": 0.0003372802734375,
      "step": 55260,
      "training_step_time": 0.3877999782562256
    },
    {
      "epoch": 0.000337286376953125,
      "model_forward_time": 0.11496448516845703,
      "step": 55261
    },
    {
      "epoch": 0.000337286376953125,
      "step": 55261,
      "training_step_time": 0.38827013969421387
    },
    {
      "epoch": 0.00033729248046875,
      "model_forward_time": 0.1146390438079834,
      "step": 55262
    },
    {
      "epoch": 0.00033729248046875,
      "step": 55262,
      "training_step_time": 0.38535380363464355
    },
    {
      "epoch": 0.000337298583984375,
      "model_forward_time": 0.11469006538391113,
      "step": 55263
    },
    {
      "epoch": 0.000337298583984375,
      "step": 55263,
      "training_step_time": 0.8887100219726562
    },
    {
      "epoch": 0.0003373046875,
      "model_forward_time": 0.11492228507995605,
      "step": 55264
    },
    {
      "epoch": 0.0003373046875,
      "step": 55264,
      "training_step_time": 0.38380885124206543
    },
    {
      "epoch": 0.000337310791015625,
      "model_forward_time": 0.11412358283996582,
      "step": 55265
    },
    {
      "epoch": 0.000337310791015625,
      "step": 55265,
      "training_step_time": 0.3842153549194336
    },
    {
      "epoch": 0.00033731689453125,
      "model_forward_time": 0.11379194259643555,
      "step": 55266
    },
    {
      "epoch": 0.00033731689453125,
      "step": 55266,
      "training_step_time": 0.3868443965911865
    },
    {
      "epoch": 0.000337322998046875,
      "model_forward_time": 0.11412668228149414,
      "step": 55267
    },
    {
      "epoch": 0.000337322998046875,
      "step": 55267,
      "training_step_time": 0.4576733112335205
    },
    {
      "epoch": 0.0003373291015625,
      "model_forward_time": 0.11441779136657715,
      "step": 55268
    },
    {
      "epoch": 0.0003373291015625,
      "step": 55268,
      "training_step_time": 0.4758129119873047
    },
    {
      "epoch": 0.000337335205078125,
      "model_forward_time": 0.11488795280456543,
      "step": 55269
    },
    {
      "epoch": 0.000337335205078125,
      "step": 55269,
      "training_step_time": 0.4205794334411621
    },
    {
      "epoch": 0.00033734130859375,
      "grad_norm": 0.07802275568246841,
      "learning_rate": 1.6894730480655484e-06,
      "loss": 0.0384,
      "step": 55270
    },
    {
      "epoch": 0.00033734130859375,
      "model_forward_time": 0.11517906188964844,
      "step": 55270
    },
    {
      "epoch": 0.00033734130859375,
      "step": 55270,
      "training_step_time": 0.403674840927124
    },
    {
      "epoch": 0.000337347412109375,
      "model_forward_time": 0.11424112319946289,
      "step": 55271
    },
    {
      "epoch": 0.000337347412109375,
      "step": 55271,
      "training_step_time": 0.4927196502685547
    },
    {
      "epoch": 0.000337353515625,
      "model_forward_time": 0.11419892311096191,
      "step": 55272
    },
    {
      "epoch": 0.000337353515625,
      "step": 55272,
      "training_step_time": 0.47907090187072754
    },
    {
      "epoch": 0.000337359619140625,
      "model_forward_time": 0.11443161964416504,
      "step": 55273
    },
    {
      "epoch": 0.000337359619140625,
      "step": 55273,
      "training_step_time": 0.3992581367492676
    },
    {
      "epoch": 0.00033736572265625,
      "model_forward_time": 0.11549973487854004,
      "step": 55274
    },
    {
      "epoch": 0.00033736572265625,
      "step": 55274,
      "training_step_time": 0.38073039054870605
    },
    {
      "epoch": 0.000337371826171875,
      "model_forward_time": 0.114471435546875,
      "step": 55275
    },
    {
      "epoch": 0.000337371826171875,
      "step": 55275,
      "training_step_time": 0.6081109046936035
    },
    {
      "epoch": 0.0003373779296875,
      "model_forward_time": 0.11458206176757812,
      "step": 55276
    },
    {
      "epoch": 0.0003373779296875,
      "step": 55276,
      "training_step_time": 0.38630175590515137
    },
    {
      "epoch": 0.000337384033203125,
      "model_forward_time": 0.11481904983520508,
      "step": 55277
    },
    {
      "epoch": 0.000337384033203125,
      "step": 55277,
      "training_step_time": 0.3936784267425537
    },
    {
      "epoch": 0.00033739013671875,
      "model_forward_time": 0.11490941047668457,
      "step": 55278
    },
    {
      "epoch": 0.00033739013671875,
      "step": 55278,
      "training_step_time": 0.4109973907470703
    },
    {
      "epoch": 0.000337396240234375,
      "model_forward_time": 0.11528873443603516,
      "step": 55279
    },
    {
      "epoch": 0.000337396240234375,
      "step": 55279,
      "training_step_time": 0.40944409370422363
    },
    {
      "epoch": 0.00033740234375,
      "grad_norm": 0.08323632180690765,
      "learning_rate": 1.682377241438826e-06,
      "loss": 0.0374,
      "step": 55280
    },
    {
      "epoch": 0.00033740234375,
      "model_forward_time": 0.11455869674682617,
      "step": 55280
    },
    {
      "epoch": 0.00033740234375,
      "step": 55280,
      "training_step_time": 0.3998603820800781
    },
    {
      "epoch": 0.000337408447265625,
      "model_forward_time": 0.11551427841186523,
      "step": 55281
    },
    {
      "epoch": 0.000337408447265625,
      "step": 55281,
      "training_step_time": 0.6251027584075928
    },
    {
      "epoch": 0.00033741455078125,
      "model_forward_time": 0.11501169204711914,
      "step": 55282
    },
    {
      "epoch": 0.00033741455078125,
      "step": 55282,
      "training_step_time": 0.4606664180755615
    },
    {
      "epoch": 0.000337420654296875,
      "model_forward_time": 0.11433625221252441,
      "step": 55283
    },
    {
      "epoch": 0.000337420654296875,
      "step": 55283,
      "training_step_time": 0.4003431797027588
    },
    {
      "epoch": 0.0003374267578125,
      "model_forward_time": 0.11459755897521973,
      "step": 55284
    },
    {
      "epoch": 0.0003374267578125,
      "step": 55284,
      "training_step_time": 0.4271543025970459
    },
    {
      "epoch": 0.000337432861328125,
      "model_forward_time": 0.1143503189086914,
      "step": 55285
    },
    {
      "epoch": 0.000337432861328125,
      "step": 55285,
      "training_step_time": 0.4942746162414551
    },
    {
      "epoch": 0.00033743896484375,
      "model_forward_time": 0.1141045093536377,
      "step": 55286
    },
    {
      "epoch": 0.00033743896484375,
      "step": 55286,
      "training_step_time": 0.4872713088989258
    },
    {
      "epoch": 0.000337445068359375,
      "model_forward_time": 0.11500287055969238,
      "step": 55287
    },
    {
      "epoch": 0.000337445068359375,
      "step": 55287,
      "training_step_time": 0.39336609840393066
    },
    {
      "epoch": 0.000337451171875,
      "model_forward_time": 0.11448097229003906,
      "step": 55288
    },
    {
      "epoch": 0.000337451171875,
      "step": 55288,
      "training_step_time": 0.39136266708374023
    },
    {
      "epoch": 0.000337457275390625,
      "model_forward_time": 0.11488604545593262,
      "step": 55289
    },
    {
      "epoch": 0.000337457275390625,
      "step": 55289,
      "training_step_time": 0.40410876274108887
    },
    {
      "epoch": 0.00033746337890625,
      "grad_norm": 0.08342410624027252,
      "learning_rate": 1.6752961124301415e-06,
      "loss": 0.0366,
      "step": 55290
    },
    {
      "epoch": 0.00033746337890625,
      "model_forward_time": 0.11539864540100098,
      "step": 55290
    },
    {
      "epoch": 0.00033746337890625,
      "step": 55290,
      "training_step_time": 0.3980443477630615
    },
    {
      "epoch": 0.000337469482421875,
      "model_forward_time": 0.1149594783782959,
      "step": 55291
    },
    {
      "epoch": 0.000337469482421875,
      "step": 55291,
      "training_step_time": 0.39118361473083496
    },
    {
      "epoch": 0.0003374755859375,
      "model_forward_time": 0.11482930183410645,
      "step": 55292
    },
    {
      "epoch": 0.0003374755859375,
      "step": 55292,
      "training_step_time": 0.3902275562286377
    },
    {
      "epoch": 0.000337481689453125,
      "model_forward_time": 0.11492061614990234,
      "step": 55293
    },
    {
      "epoch": 0.000337481689453125,
      "step": 55293,
      "training_step_time": 0.41814279556274414
    },
    {
      "epoch": 0.00033748779296875,
      "model_forward_time": 0.11488485336303711,
      "step": 55294
    },
    {
      "epoch": 0.00033748779296875,
      "step": 55294,
      "training_step_time": 0.4035050868988037
    },
    {
      "epoch": 0.000337493896484375,
      "model_forward_time": 0.11508560180664062,
      "step": 55295
    },
    {
      "epoch": 0.000337493896484375,
      "step": 55295,
      "training_step_time": 0.40470337867736816
    },
    {
      "epoch": 0.0003375,
      "model_forward_time": 0.11557149887084961,
      "step": 55296
    },
    {
      "epoch": 0.0003375,
      "step": 55296,
      "training_step_time": 0.4431121349334717
    },
    {
      "epoch": 0.000337506103515625,
      "model_forward_time": 0.11534333229064941,
      "step": 55297
    },
    {
      "epoch": 0.000337506103515625,
      "step": 55297,
      "training_step_time": 0.49234652519226074
    },
    {
      "epoch": 0.00033751220703125,
      "model_forward_time": 0.11571717262268066,
      "step": 55298
    },
    {
      "epoch": 0.00033751220703125,
      "step": 55298,
      "training_step_time": 0.4677424430847168
    },
    {
      "epoch": 0.000337518310546875,
      "model_forward_time": 0.11451148986816406,
      "step": 55299
    },
    {
      "epoch": 0.000337518310546875,
      "step": 55299,
      "training_step_time": 0.48911237716674805
    },
    {
      "epoch": 0.0003375244140625,
      "grad_norm": 0.07588104158639908,
      "learning_rate": 1.6682296631905626e-06,
      "loss": 0.0351,
      "step": 55300
    },
    {
      "epoch": 0.0003375244140625,
      "model_forward_time": 0.1157524585723877,
      "step": 55300
    },
    {
      "epoch": 0.0003375244140625,
      "step": 55300,
      "training_step_time": 0.44678807258605957
    },
    {
      "epoch": 0.000337530517578125,
      "model_forward_time": 0.12059807777404785,
      "step": 55301
    },
    {
      "epoch": 0.000337530517578125,
      "step": 55301,
      "training_step_time": 0.5279688835144043
    },
    {
      "epoch": 0.00033753662109375,
      "model_forward_time": 0.11853480339050293,
      "step": 55302
    },
    {
      "epoch": 0.00033753662109375,
      "step": 55302,
      "training_step_time": 0.5855467319488525
    },
    {
      "epoch": 0.000337542724609375,
      "model_forward_time": 0.12823987007141113,
      "step": 55303
    },
    {
      "epoch": 0.000337542724609375,
      "step": 55303,
      "training_step_time": 0.6401395797729492
    },
    {
      "epoch": 0.000337548828125,
      "model_forward_time": 0.11701822280883789,
      "step": 55304
    },
    {
      "epoch": 0.000337548828125,
      "step": 55304,
      "training_step_time": 0.7246987819671631
    },
    {
      "epoch": 0.000337554931640625,
      "model_forward_time": 0.11822819709777832,
      "step": 55305
    },
    {
      "epoch": 0.000337554931640625,
      "step": 55305,
      "training_step_time": 0.6578712463378906
    },
    {
      "epoch": 0.00033756103515625,
      "model_forward_time": 0.1194770336151123,
      "step": 55306
    },
    {
      "epoch": 0.00033756103515625,
      "step": 55306,
      "training_step_time": 0.6070389747619629
    },
    {
      "epoch": 0.000337567138671875,
      "model_forward_time": 0.12181782722473145,
      "step": 55307
    },
    {
      "epoch": 0.000337567138671875,
      "step": 55307,
      "training_step_time": 0.6441726684570312
    },
    {
      "epoch": 0.0003375732421875,
      "model_forward_time": 0.1379852294921875,
      "step": 55308
    },
    {
      "epoch": 0.0003375732421875,
      "step": 55308,
      "training_step_time": 0.7556750774383545
    },
    {
      "epoch": 0.000337579345703125,
      "model_forward_time": 0.12073779106140137,
      "step": 55309
    },
    {
      "epoch": 0.000337579345703125,
      "step": 55309,
      "training_step_time": 0.7305407524108887
    },
    {
      "epoch": 0.00033758544921875,
      "grad_norm": 0.11978653818368912,
      "learning_rate": 1.6611778958666835e-06,
      "loss": 0.036,
      "step": 55310
    },
    {
      "epoch": 0.00033758544921875,
      "model_forward_time": 0.12023353576660156,
      "step": 55310
    },
    {
      "epoch": 0.00033758544921875,
      "step": 55310,
      "training_step_time": 0.7008545398712158
    },
    {
      "epoch": 0.000337591552734375,
      "model_forward_time": 0.11772942543029785,
      "step": 55311
    },
    {
      "epoch": 0.000337591552734375,
      "step": 55311,
      "training_step_time": 0.6767916679382324
    },
    {
      "epoch": 0.00033759765625,
      "model_forward_time": 0.11579060554504395,
      "step": 55312
    },
    {
      "epoch": 0.00033759765625,
      "step": 55312,
      "training_step_time": 0.705937385559082
    },
    {
      "epoch": 0.000337603759765625,
      "model_forward_time": 0.11806964874267578,
      "step": 55313
    },
    {
      "epoch": 0.000337603759765625,
      "step": 55313,
      "training_step_time": 0.6497311592102051
    },
    {
      "epoch": 0.00033760986328125,
      "model_forward_time": 0.1209266185760498,
      "step": 55314
    },
    {
      "epoch": 0.00033760986328125,
      "step": 55314,
      "training_step_time": 0.6634011268615723
    },
    {
      "epoch": 0.000337615966796875,
      "model_forward_time": 0.11821246147155762,
      "step": 55315
    },
    {
      "epoch": 0.000337615966796875,
      "step": 55315,
      "training_step_time": 0.636336088180542
    },
    {
      "epoch": 0.0003376220703125,
      "model_forward_time": 0.12309026718139648,
      "step": 55316
    },
    {
      "epoch": 0.0003376220703125,
      "step": 55316,
      "training_step_time": 0.7811367511749268
    },
    {
      "epoch": 0.000337628173828125,
      "model_forward_time": 0.11750125885009766,
      "step": 55317
    },
    {
      "epoch": 0.000337628173828125,
      "step": 55317,
      "training_step_time": 0.7073729038238525
    },
    {
      "epoch": 0.00033763427734375,
      "model_forward_time": 0.11657094955444336,
      "step": 55318
    },
    {
      "epoch": 0.00033763427734375,
      "step": 55318,
      "training_step_time": 0.6979482173919678
    },
    {
      "epoch": 0.000337640380859375,
      "model_forward_time": 0.1324319839477539,
      "step": 55319
    },
    {
      "epoch": 0.000337640380859375,
      "step": 55319,
      "training_step_time": 0.7334837913513184
    },
    {
      "epoch": 0.000337646484375,
      "grad_norm": 0.10257621854543686,
      "learning_rate": 1.6541408126006463e-06,
      "loss": 0.0393,
      "step": 55320
    },
    {
      "epoch": 0.000337646484375,
      "model_forward_time": 0.12112879753112793,
      "step": 55320
    },
    {
      "epoch": 0.000337646484375,
      "step": 55320,
      "training_step_time": 0.7303788661956787
    },
    {
      "epoch": 0.000337652587890625,
      "model_forward_time": 0.11867284774780273,
      "step": 55321
    },
    {
      "epoch": 0.000337652587890625,
      "step": 55321,
      "training_step_time": 0.6982405185699463
    },
    {
      "epoch": 0.00033765869140625,
      "model_forward_time": 0.11882519721984863,
      "step": 55322
    },
    {
      "epoch": 0.00033765869140625,
      "step": 55322,
      "training_step_time": 0.6351220607757568
    },
    {
      "epoch": 0.000337664794921875,
      "model_forward_time": 0.12188363075256348,
      "step": 55323
    },
    {
      "epoch": 0.000337664794921875,
      "step": 55323,
      "training_step_time": 0.6769945621490479
    },
    {
      "epoch": 0.0003376708984375,
      "model_forward_time": 0.11542677879333496,
      "step": 55324
    },
    {
      "epoch": 0.0003376708984375,
      "step": 55324,
      "training_step_time": 0.6444590091705322
    },
    {
      "epoch": 0.000337677001953125,
      "model_forward_time": 0.12480854988098145,
      "step": 55325
    },
    {
      "epoch": 0.000337677001953125,
      "step": 55325,
      "training_step_time": 0.8388638496398926
    },
    {
      "epoch": 0.00033768310546875,
      "model_forward_time": 0.12027430534362793,
      "step": 55326
    },
    {
      "epoch": 0.00033768310546875,
      "step": 55326,
      "training_step_time": 0.786588191986084
    },
    {
      "epoch": 0.000337689208984375,
      "model_forward_time": 0.11848235130310059,
      "step": 55327
    },
    {
      "epoch": 0.000337689208984375,
      "step": 55327,
      "training_step_time": 0.6542553901672363
    },
    {
      "epoch": 0.0003376953125,
      "model_forward_time": 0.12798738479614258,
      "step": 55328
    },
    {
      "epoch": 0.0003376953125,
      "step": 55328,
      "training_step_time": 0.699408769607544
    },
    {
      "epoch": 0.000337701416015625,
      "model_forward_time": 0.11647844314575195,
      "step": 55329
    },
    {
      "epoch": 0.000337701416015625,
      "step": 55329,
      "training_step_time": 0.7416281700134277
    },
    {
      "epoch": 0.00033770751953125,
      "grad_norm": 0.093777135014534,
      "learning_rate": 1.6471184155301355e-06,
      "loss": 0.0357,
      "step": 55330
    },
    {
      "epoch": 0.00033770751953125,
      "model_forward_time": 0.11689996719360352,
      "step": 55330
    },
    {
      "epoch": 0.00033770751953125,
      "step": 55330,
      "training_step_time": 0.7323379516601562
    },
    {
      "epoch": 0.000337713623046875,
      "model_forward_time": 0.11582803726196289,
      "step": 55331
    },
    {
      "epoch": 0.000337713623046875,
      "step": 55331,
      "training_step_time": 0.6515822410583496
    },
    {
      "epoch": 0.0003377197265625,
      "model_forward_time": 0.12439990043640137,
      "step": 55332
    },
    {
      "epoch": 0.0003377197265625,
      "step": 55332,
      "training_step_time": 0.6273789405822754
    },
    {
      "epoch": 0.000337725830078125,
      "model_forward_time": 0.1323075294494629,
      "step": 55333
    },
    {
      "epoch": 0.000337725830078125,
      "step": 55333,
      "training_step_time": 0.6867260932922363
    },
    {
      "epoch": 0.00033773193359375,
      "model_forward_time": 0.12014222145080566,
      "step": 55334
    },
    {
      "epoch": 0.00033773193359375,
      "step": 55334,
      "training_step_time": 0.6856794357299805
    },
    {
      "epoch": 0.000337738037109375,
      "model_forward_time": 0.11687827110290527,
      "step": 55335
    },
    {
      "epoch": 0.000337738037109375,
      "step": 55335,
      "training_step_time": 0.6471030712127686
    },
    {
      "epoch": 0.000337744140625,
      "model_forward_time": 0.12252354621887207,
      "step": 55336
    },
    {
      "epoch": 0.000337744140625,
      "step": 55336,
      "training_step_time": 0.6993129253387451
    },
    {
      "epoch": 0.000337750244140625,
      "model_forward_time": 0.1157071590423584,
      "step": 55337
    },
    {
      "epoch": 0.000337750244140625,
      "step": 55337,
      "training_step_time": 0.7141854763031006
    },
    {
      "epoch": 0.00033775634765625,
      "model_forward_time": 0.12761664390563965,
      "step": 55338
    },
    {
      "epoch": 0.00033775634765625,
      "step": 55338,
      "training_step_time": 0.8437426090240479
    },
    {
      "epoch": 0.000337762451171875,
      "model_forward_time": 0.14246416091918945,
      "step": 55339
    },
    {
      "epoch": 0.000337762451171875,
      "step": 55339,
      "training_step_time": 0.6357548236846924
    },
    {
      "epoch": 0.0003377685546875,
      "grad_norm": 0.11636169999837875,
      "learning_rate": 1.6401107067883559e-06,
      "loss": 0.0363,
      "step": 55340
    },
    {
      "epoch": 0.0003377685546875,
      "model_forward_time": 0.12116312980651855,
      "step": 55340
    },
    {
      "epoch": 0.0003377685546875,
      "step": 55340,
      "training_step_time": 0.6800704002380371
    },
    {
      "epoch": 0.000337774658203125,
      "model_forward_time": 0.11683535575866699,
      "step": 55341
    },
    {
      "epoch": 0.000337774658203125,
      "step": 55341,
      "training_step_time": 0.6617820262908936
    },
    {
      "epoch": 0.00033778076171875,
      "model_forward_time": 0.12005758285522461,
      "step": 55342
    },
    {
      "epoch": 0.00033778076171875,
      "step": 55342,
      "training_step_time": 0.6669323444366455
    },
    {
      "epoch": 0.000337786865234375,
      "model_forward_time": 0.12400221824645996,
      "step": 55343
    },
    {
      "epoch": 0.000337786865234375,
      "step": 55343,
      "training_step_time": 0.7060818672180176
    },
    {
      "epoch": 0.00033779296875,
      "model_forward_time": 0.11715579032897949,
      "step": 55344
    },
    {
      "epoch": 0.00033779296875,
      "step": 55344,
      "training_step_time": 0.6948208808898926
    },
    {
      "epoch": 0.000337799072265625,
      "model_forward_time": 0.12561249732971191,
      "step": 55345
    },
    {
      "epoch": 0.000337799072265625,
      "step": 55345,
      "training_step_time": 0.7623965740203857
    },
    {
      "epoch": 0.00033780517578125,
      "model_forward_time": 0.1192014217376709,
      "step": 55346
    },
    {
      "epoch": 0.00033780517578125,
      "step": 55346,
      "training_step_time": 0.661057710647583
    },
    {
      "epoch": 0.000337811279296875,
      "model_forward_time": 0.11861777305603027,
      "step": 55347
    },
    {
      "epoch": 0.000337811279296875,
      "step": 55347,
      "training_step_time": 0.7663905620574951
    },
    {
      "epoch": 0.0003378173828125,
      "model_forward_time": 0.11971068382263184,
      "step": 55348
    },
    {
      "epoch": 0.0003378173828125,
      "step": 55348,
      "training_step_time": 0.6514670848846436
    },
    {
      "epoch": 0.000337823486328125,
      "model_forward_time": 0.11878561973571777,
      "step": 55349
    },
    {
      "epoch": 0.000337823486328125,
      "step": 55349,
      "training_step_time": 0.6282069683074951
    },
    {
      "epoch": 0.00033782958984375,
      "grad_norm": 0.12034381926059723,
      "learning_rate": 1.6331176885040878e-06,
      "loss": 0.0364,
      "step": 55350
    },
    {
      "epoch": 0.00033782958984375,
      "model_forward_time": 0.13718223571777344,
      "step": 55350
    },
    {
      "epoch": 0.00033782958984375,
      "step": 55350,
      "training_step_time": 0.716118574142456
    },
    {
      "epoch": 0.000337835693359375,
      "model_forward_time": 0.11877560615539551,
      "step": 55351
    },
    {
      "epoch": 0.000337835693359375,
      "step": 55351,
      "training_step_time": 0.6661093235015869
    },
    {
      "epoch": 0.000337841796875,
      "model_forward_time": 0.12974977493286133,
      "step": 55352
    },
    {
      "epoch": 0.000337841796875,
      "step": 55352,
      "training_step_time": 0.6718270778656006
    },
    {
      "epoch": 0.000337847900390625,
      "model_forward_time": 0.1244504451751709,
      "step": 55353
    },
    {
      "epoch": 0.000337847900390625,
      "step": 55353,
      "training_step_time": 0.6977424621582031
    },
    {
      "epoch": 0.00033785400390625,
      "model_forward_time": 0.13337254524230957,
      "step": 55354
    },
    {
      "epoch": 0.00033785400390625,
      "step": 55354,
      "training_step_time": 0.6395416259765625
    },
    {
      "epoch": 0.000337860107421875,
      "model_forward_time": 0.12180399894714355,
      "step": 55355
    },
    {
      "epoch": 0.000337860107421875,
      "step": 55355,
      "training_step_time": 0.684619665145874
    },
    {
      "epoch": 0.0003378662109375,
      "model_forward_time": 0.11686348915100098,
      "step": 55356
    },
    {
      "epoch": 0.0003378662109375,
      "step": 55356,
      "training_step_time": 0.6702718734741211
    },
    {
      "epoch": 0.000337872314453125,
      "model_forward_time": 0.1193685531616211,
      "step": 55357
    },
    {
      "epoch": 0.000337872314453125,
      "step": 55357,
      "training_step_time": 0.6790580749511719
    },
    {
      "epoch": 0.00033787841796875,
      "model_forward_time": 0.11810564994812012,
      "step": 55358
    },
    {
      "epoch": 0.00033787841796875,
      "step": 55358,
      "training_step_time": 0.6710278987884521
    },
    {
      "epoch": 0.000337884521484375,
      "model_forward_time": 0.11790132522583008,
      "step": 55359
    },
    {
      "epoch": 0.000337884521484375,
      "step": 55359,
      "training_step_time": 0.6484003067016602
    },
    {
      "epoch": 0.000337890625,
      "grad_norm": 0.08884226530790329,
      "learning_rate": 1.626139362801604e-06,
      "loss": 0.035,
      "step": 55360
    },
    {
      "epoch": 0.000337890625,
      "model_forward_time": 0.12225008010864258,
      "step": 55360
    },
    {
      "epoch": 0.000337890625,
      "step": 55360,
      "training_step_time": 0.628669023513794
    },
    {
      "epoch": 0.000337896728515625,
      "model_forward_time": 0.1202239990234375,
      "step": 55361
    },
    {
      "epoch": 0.000337896728515625,
      "step": 55361,
      "training_step_time": 0.6840660572052002
    },
    {
      "epoch": 0.00033790283203125,
      "model_forward_time": 0.11950159072875977,
      "step": 55362
    },
    {
      "epoch": 0.00033790283203125,
      "step": 55362,
      "training_step_time": 0.7214937210083008
    },
    {
      "epoch": 0.000337908935546875,
      "model_forward_time": 0.12309718132019043,
      "step": 55363
    },
    {
      "epoch": 0.000337908935546875,
      "step": 55363,
      "training_step_time": 0.6806998252868652
    },
    {
      "epoch": 0.0003379150390625,
      "model_forward_time": 0.11884689331054688,
      "step": 55364
    },
    {
      "epoch": 0.0003379150390625,
      "step": 55364,
      "training_step_time": 0.6482334136962891
    },
    {
      "epoch": 0.000337921142578125,
      "model_forward_time": 0.12640881538391113,
      "step": 55365
    },
    {
      "epoch": 0.000337921142578125,
      "step": 55365,
      "training_step_time": 0.6805260181427002
    },
    {
      "epoch": 0.00033792724609375,
      "model_forward_time": 0.11992287635803223,
      "step": 55366
    },
    {
      "epoch": 0.00033792724609375,
      "step": 55366,
      "training_step_time": 0.7049548625946045
    },
    {
      "epoch": 0.000337933349609375,
      "model_forward_time": 0.11922264099121094,
      "step": 55367
    },
    {
      "epoch": 0.000337933349609375,
      "step": 55367,
      "training_step_time": 0.6478664875030518
    },
    {
      "epoch": 0.000337939453125,
      "model_forward_time": 0.1244513988494873,
      "step": 55368
    },
    {
      "epoch": 0.000337939453125,
      "step": 55368,
      "training_step_time": 0.5178141593933105
    },
    {
      "epoch": 0.000337945556640625,
      "model_forward_time": 0.12703895568847656,
      "step": 55369
    },
    {
      "epoch": 0.000337945556640625,
      "step": 55369,
      "training_step_time": 0.5308895111083984
    },
    {
      "epoch": 0.00033795166015625,
      "grad_norm": 0.09993131458759308,
      "learning_rate": 1.6191757318007429e-06,
      "loss": 0.0413,
      "step": 55370
    },
    {
      "epoch": 0.00033795166015625,
      "model_forward_time": 0.11818456649780273,
      "step": 55370
    },
    {
      "epoch": 0.00033795166015625,
      "step": 55370,
      "training_step_time": 0.5279116630554199
    },
    {
      "epoch": 0.000337957763671875,
      "model_forward_time": 0.12055706977844238,
      "step": 55371
    },
    {
      "epoch": 0.000337957763671875,
      "step": 55371,
      "training_step_time": 0.4988846778869629
    },
    {
      "epoch": 0.0003379638671875,
      "model_forward_time": 0.11824512481689453,
      "step": 55372
    },
    {
      "epoch": 0.0003379638671875,
      "step": 55372,
      "training_step_time": 0.6264035701751709
    },
    {
      "epoch": 0.000337969970703125,
      "model_forward_time": 0.11674785614013672,
      "step": 55373
    },
    {
      "epoch": 0.000337969970703125,
      "step": 55373,
      "training_step_time": 0.45266103744506836
    },
    {
      "epoch": 0.00033797607421875,
      "model_forward_time": 0.11683893203735352,
      "step": 55374
    },
    {
      "epoch": 0.00033797607421875,
      "step": 55374,
      "training_step_time": 0.4670732021331787
    },
    {
      "epoch": 0.000337982177734375,
      "model_forward_time": 0.11530351638793945,
      "step": 55375
    },
    {
      "epoch": 0.000337982177734375,
      "step": 55375,
      "training_step_time": 0.40795207023620605
    },
    {
      "epoch": 0.00033798828125,
      "model_forward_time": 0.11506009101867676,
      "step": 55376
    },
    {
      "epoch": 0.00033798828125,
      "step": 55376,
      "training_step_time": 0.4078223705291748
    },
    {
      "epoch": 0.000337994384765625,
      "model_forward_time": 0.11530327796936035,
      "step": 55377
    },
    {
      "epoch": 0.000337994384765625,
      "step": 55377,
      "training_step_time": 0.3718717098236084
    },
    {
      "epoch": 0.00033800048828125,
      "model_forward_time": 0.11497187614440918,
      "step": 55378
    },
    {
      "epoch": 0.00033800048828125,
      "step": 55378,
      "training_step_time": 0.48261404037475586
    },
    {
      "epoch": 0.000338006591796875,
      "model_forward_time": 0.1148383617401123,
      "step": 55379
    },
    {
      "epoch": 0.000338006591796875,
      "step": 55379,
      "training_step_time": 0.4729952812194824
    },
    {
      "epoch": 0.0003380126953125,
      "grad_norm": 0.06923098862171173,
      "learning_rate": 1.6122267976168781e-06,
      "loss": 0.0338,
      "step": 55380
    },
    {
      "epoch": 0.0003380126953125,
      "model_forward_time": 0.11452198028564453,
      "step": 55380
    },
    {
      "epoch": 0.0003380126953125,
      "step": 55380,
      "training_step_time": 0.43671607971191406
    },
    {
      "epoch": 0.000338018798828125,
      "model_forward_time": 0.11498165130615234,
      "step": 55381
    },
    {
      "epoch": 0.000338018798828125,
      "step": 55381,
      "training_step_time": 0.4043731689453125
    },
    {
      "epoch": 0.00033802490234375,
      "model_forward_time": 0.11672258377075195,
      "step": 55382
    },
    {
      "epoch": 0.00033802490234375,
      "step": 55382,
      "training_step_time": 0.41397714614868164
    },
    {
      "epoch": 0.000338031005859375,
      "model_forward_time": 0.11513948440551758,
      "step": 55383
    },
    {
      "epoch": 0.000338031005859375,
      "step": 55383,
      "training_step_time": 0.41622066497802734
    },
    {
      "epoch": 0.000338037109375,
      "model_forward_time": 0.11552953720092773,
      "step": 55384
    },
    {
      "epoch": 0.000338037109375,
      "step": 55384,
      "training_step_time": 0.39071202278137207
    },
    {
      "epoch": 0.000338043212890625,
      "model_forward_time": 0.11515116691589355,
      "step": 55385
    },
    {
      "epoch": 0.000338043212890625,
      "step": 55385,
      "training_step_time": 0.403972864151001
    },
    {
      "epoch": 0.00033804931640625,
      "model_forward_time": 0.11444211006164551,
      "step": 55386
    },
    {
      "epoch": 0.00033804931640625,
      "step": 55386,
      "training_step_time": 0.3962595462799072
    },
    {
      "epoch": 0.000338055419921875,
      "model_forward_time": 0.11497044563293457,
      "step": 55387
    },
    {
      "epoch": 0.000338055419921875,
      "step": 55387,
      "training_step_time": 0.4692270755767822
    },
    {
      "epoch": 0.0003380615234375,
      "model_forward_time": 0.11509323120117188,
      "step": 55388
    },
    {
      "epoch": 0.0003380615234375,
      "step": 55388,
      "training_step_time": 0.3914926052093506
    },
    {
      "epoch": 0.000338067626953125,
      "model_forward_time": 0.11494565010070801,
      "step": 55389
    },
    {
      "epoch": 0.000338067626953125,
      "step": 55389,
      "training_step_time": 0.40688395500183105
    },
    {
      "epoch": 0.00033807373046875,
      "grad_norm": 0.09383952617645264,
      "learning_rate": 1.6052925623609049e-06,
      "loss": 0.0368,
      "step": 55390
    },
    {
      "epoch": 0.00033807373046875,
      "model_forward_time": 0.11540794372558594,
      "step": 55390
    },
    {
      "epoch": 0.00033807373046875,
      "step": 55390,
      "training_step_time": 0.3847792148590088
    },
    {
      "epoch": 0.000338079833984375,
      "model_forward_time": 0.11500763893127441,
      "step": 55391
    },
    {
      "epoch": 0.000338079833984375,
      "step": 55391,
      "training_step_time": 0.38638949394226074
    },
    {
      "epoch": 0.0003380859375,
      "model_forward_time": 0.11512327194213867,
      "step": 55392
    },
    {
      "epoch": 0.0003380859375,
      "step": 55392,
      "training_step_time": 0.366412878036499
    },
    {
      "epoch": 0.000338092041015625,
      "model_forward_time": 0.1151895523071289,
      "step": 55393
    },
    {
      "epoch": 0.000338092041015625,
      "step": 55393,
      "training_step_time": 0.46616101264953613
    },
    {
      "epoch": 0.00033809814453125,
      "model_forward_time": 0.1149907112121582,
      "step": 55394
    },
    {
      "epoch": 0.00033809814453125,
      "step": 55394,
      "training_step_time": 0.4522421360015869
    },
    {
      "epoch": 0.000338104248046875,
      "model_forward_time": 0.11607956886291504,
      "step": 55395
    },
    {
      "epoch": 0.000338104248046875,
      "step": 55395,
      "training_step_time": 0.49514245986938477
    },
    {
      "epoch": 0.0003381103515625,
      "model_forward_time": 0.11452293395996094,
      "step": 55396
    },
    {
      "epoch": 0.0003381103515625,
      "step": 55396,
      "training_step_time": 0.42761754989624023
    },
    {
      "epoch": 0.000338116455078125,
      "model_forward_time": 0.11422085762023926,
      "step": 55397
    },
    {
      "epoch": 0.000338116455078125,
      "step": 55397,
      "training_step_time": 0.4183189868927002
    },
    {
      "epoch": 0.00033812255859375,
      "model_forward_time": 0.11476373672485352,
      "step": 55398
    },
    {
      "epoch": 0.00033812255859375,
      "step": 55398,
      "training_step_time": 0.3969855308532715
    },
    {
      "epoch": 0.000338128662109375,
      "model_forward_time": 0.11485123634338379,
      "step": 55399
    },
    {
      "epoch": 0.000338128662109375,
      "step": 55399,
      "training_step_time": 0.40050220489501953
    },
    {
      "epoch": 0.000338134765625,
      "grad_norm": 0.09185374528169632,
      "learning_rate": 1.5983730281392662e-06,
      "loss": 0.0344,
      "step": 55400
    },
    {
      "epoch": 0.000338134765625,
      "model_forward_time": 0.11459183692932129,
      "step": 55400
    },
    {
      "epoch": 0.000338134765625,
      "step": 55400,
      "training_step_time": 0.3924872875213623
    },
    {
      "epoch": 0.000338140869140625,
      "model_forward_time": 0.11517810821533203,
      "step": 55401
    },
    {
      "epoch": 0.000338140869140625,
      "step": 55401,
      "training_step_time": 0.3995015621185303
    },
    {
      "epoch": 0.00033814697265625,
      "model_forward_time": 0.11518001556396484,
      "step": 55402
    },
    {
      "epoch": 0.00033814697265625,
      "step": 55402,
      "training_step_time": 0.45008397102355957
    },
    {
      "epoch": 0.000338153076171875,
      "model_forward_time": 0.11519360542297363,
      "step": 55403
    },
    {
      "epoch": 0.000338153076171875,
      "step": 55403,
      "training_step_time": 0.4948878288269043
    },
    {
      "epoch": 0.0003381591796875,
      "model_forward_time": 0.11629080772399902,
      "step": 55404
    },
    {
      "epoch": 0.0003381591796875,
      "step": 55404,
      "training_step_time": 0.3925347328186035
    },
    {
      "epoch": 0.000338165283203125,
      "model_forward_time": 0.11500096321105957,
      "step": 55405
    },
    {
      "epoch": 0.000338165283203125,
      "step": 55405,
      "training_step_time": 0.38958191871643066
    },
    {
      "epoch": 0.00033817138671875,
      "model_forward_time": 0.11457538604736328,
      "step": 55406
    },
    {
      "epoch": 0.00033817138671875,
      "step": 55406,
      "training_step_time": 0.3977363109588623
    },
    {
      "epoch": 0.000338177490234375,
      "model_forward_time": 0.11519384384155273,
      "step": 55407
    },
    {
      "epoch": 0.000338177490234375,
      "step": 55407,
      "training_step_time": 0.5468096733093262
    },
    {
      "epoch": 0.00033818359375,
      "model_forward_time": 0.11461806297302246,
      "step": 55408
    },
    {
      "epoch": 0.00033818359375,
      "step": 55408,
      "training_step_time": 0.47148847579956055
    },
    {
      "epoch": 0.000338189697265625,
      "model_forward_time": 0.11579179763793945,
      "step": 55409
    },
    {
      "epoch": 0.000338189697265625,
      "step": 55409,
      "training_step_time": 0.42162418365478516
    },
    {
      "epoch": 0.00033819580078125,
      "grad_norm": 0.107457235455513,
      "learning_rate": 1.5914681970539192e-06,
      "loss": 0.0368,
      "step": 55410
    },
    {
      "epoch": 0.00033819580078125,
      "model_forward_time": 0.11479353904724121,
      "step": 55410
    },
    {
      "epoch": 0.00033819580078125,
      "step": 55410,
      "training_step_time": 0.4171769618988037
    },
    {
      "epoch": 0.000338201904296875,
      "model_forward_time": 0.11439752578735352,
      "step": 55411
    },
    {
      "epoch": 0.000338201904296875,
      "step": 55411,
      "training_step_time": 0.42365288734436035
    },
    {
      "epoch": 0.0003382080078125,
      "model_forward_time": 0.11451601982116699,
      "step": 55412
    },
    {
      "epoch": 0.0003382080078125,
      "step": 55412,
      "training_step_time": 0.39723849296569824
    },
    {
      "epoch": 0.000338214111328125,
      "model_forward_time": 0.11545467376708984,
      "step": 55413
    },
    {
      "epoch": 0.000338214111328125,
      "step": 55413,
      "training_step_time": 0.3901023864746094
    },
    {
      "epoch": 0.00033822021484375,
      "model_forward_time": 0.11512565612792969,
      "step": 55414
    },
    {
      "epoch": 0.00033822021484375,
      "step": 55414,
      "training_step_time": 0.3912529945373535
    },
    {
      "epoch": 0.000338226318359375,
      "model_forward_time": 0.11516046524047852,
      "step": 55415
    },
    {
      "epoch": 0.000338226318359375,
      "step": 55415,
      "training_step_time": 0.39197826385498047
    },
    {
      "epoch": 0.000338232421875,
      "model_forward_time": 0.11530113220214844,
      "step": 55416
    },
    {
      "epoch": 0.000338232421875,
      "step": 55416,
      "training_step_time": 0.4001138210296631
    },
    {
      "epoch": 0.000338238525390625,
      "model_forward_time": 0.11469316482543945,
      "step": 55417
    },
    {
      "epoch": 0.000338238525390625,
      "step": 55417,
      "training_step_time": 0.49840617179870605
    },
    {
      "epoch": 0.00033824462890625,
      "model_forward_time": 0.11488723754882812,
      "step": 55418
    },
    {
      "epoch": 0.00033824462890625,
      "step": 55418,
      "training_step_time": 0.3888070583343506
    },
    {
      "epoch": 0.000338250732421875,
      "model_forward_time": 0.11472463607788086,
      "step": 55419
    },
    {
      "epoch": 0.000338250732421875,
      "step": 55419,
      "training_step_time": 0.39325809478759766
    },
    {
      "epoch": 0.0003382568359375,
      "grad_norm": 0.09507634490728378,
      "learning_rate": 1.5845780712023973e-06,
      "loss": 0.0389,
      "step": 55420
    },
    {
      "epoch": 0.0003382568359375,
      "model_forward_time": 0.1148073673248291,
      "step": 55420
    },
    {
      "epoch": 0.0003382568359375,
      "step": 55420,
      "training_step_time": 0.3889310359954834
    },
    {
      "epoch": 0.000338262939453125,
      "model_forward_time": 0.11510515213012695,
      "step": 55421
    },
    {
      "epoch": 0.000338262939453125,
      "step": 55421,
      "training_step_time": 0.3675222396850586
    },
    {
      "epoch": 0.00033826904296875,
      "model_forward_time": 0.11502981185913086,
      "step": 55422
    },
    {
      "epoch": 0.00033826904296875,
      "step": 55422,
      "training_step_time": 0.4311103820800781
    },
    {
      "epoch": 0.000338275146484375,
      "model_forward_time": 0.115020751953125,
      "step": 55423
    },
    {
      "epoch": 0.000338275146484375,
      "step": 55423,
      "training_step_time": 0.450181245803833
    },
    {
      "epoch": 0.00033828125,
      "model_forward_time": 0.11489677429199219,
      "step": 55424
    },
    {
      "epoch": 0.00033828125,
      "step": 55424,
      "training_step_time": 0.49983954429626465
    },
    {
      "epoch": 0.000338287353515625,
      "model_forward_time": 0.11554503440856934,
      "step": 55425
    },
    {
      "epoch": 0.000338287353515625,
      "step": 55425,
      "training_step_time": 0.3876497745513916
    },
    {
      "epoch": 0.00033829345703125,
      "model_forward_time": 0.11503958702087402,
      "step": 55426
    },
    {
      "epoch": 0.00033829345703125,
      "step": 55426,
      "training_step_time": 0.3861973285675049
    },
    {
      "epoch": 0.000338299560546875,
      "model_forward_time": 0.11539340019226074,
      "step": 55427
    },
    {
      "epoch": 0.000338299560546875,
      "step": 55427,
      "training_step_time": 0.4019649028778076
    },
    {
      "epoch": 0.0003383056640625,
      "model_forward_time": 0.11532807350158691,
      "step": 55428
    },
    {
      "epoch": 0.0003383056640625,
      "step": 55428,
      "training_step_time": 0.4025876522064209
    },
    {
      "epoch": 0.000338311767578125,
      "model_forward_time": 0.11644458770751953,
      "step": 55429
    },
    {
      "epoch": 0.000338311767578125,
      "step": 55429,
      "training_step_time": 0.38565564155578613
    },
    {
      "epoch": 0.00033831787109375,
      "grad_norm": 0.09032736718654633,
      "learning_rate": 1.5777026526777094e-06,
      "loss": 0.0413,
      "step": 55430
    },
    {
      "epoch": 0.00033831787109375,
      "model_forward_time": 0.11626458168029785,
      "step": 55430
    },
    {
      "epoch": 0.00033831787109375,
      "step": 55430,
      "training_step_time": 0.40854883193969727
    },
    {
      "epoch": 0.000338323974609375,
      "model_forward_time": 0.11498475074768066,
      "step": 55431
    },
    {
      "epoch": 0.000338323974609375,
      "step": 55431,
      "training_step_time": 0.412581205368042
    },
    {
      "epoch": 0.000338330078125,
      "model_forward_time": 0.11586308479309082,
      "step": 55432
    },
    {
      "epoch": 0.000338330078125,
      "step": 55432,
      "training_step_time": 0.436082124710083
    },
    {
      "epoch": 0.000338336181640625,
      "model_forward_time": 0.11452722549438477,
      "step": 55433
    },
    {
      "epoch": 0.000338336181640625,
      "step": 55433,
      "training_step_time": 0.39768218994140625
    },
    {
      "epoch": 0.00033834228515625,
      "model_forward_time": 0.11513447761535645,
      "step": 55434
    },
    {
      "epoch": 0.00033834228515625,
      "step": 55434,
      "training_step_time": 0.40411949157714844
    },
    {
      "epoch": 0.000338348388671875,
      "model_forward_time": 0.1155996322631836,
      "step": 55435
    },
    {
      "epoch": 0.000338348388671875,
      "step": 55435,
      "training_step_time": 0.39077138900756836
    },
    {
      "epoch": 0.0003383544921875,
      "model_forward_time": 0.11522531509399414,
      "step": 55436
    },
    {
      "epoch": 0.0003383544921875,
      "step": 55436,
      "training_step_time": 0.47074031829833984
    },
    {
      "epoch": 0.000338360595703125,
      "model_forward_time": 0.11534261703491211,
      "step": 55437
    },
    {
      "epoch": 0.000338360595703125,
      "step": 55437,
      "training_step_time": 0.499678373336792
    },
    {
      "epoch": 0.00033836669921875,
      "model_forward_time": 0.11579465866088867,
      "step": 55438
    },
    {
      "epoch": 0.00033836669921875,
      "step": 55438,
      "training_step_time": 0.4137568473815918
    },
    {
      "epoch": 0.000338372802734375,
      "model_forward_time": 0.11475110054016113,
      "step": 55439
    },
    {
      "epoch": 0.000338372802734375,
      "step": 55439,
      "training_step_time": 0.3972632884979248
    },
    {
      "epoch": 0.00033837890625,
      "grad_norm": 0.08643089979887009,
      "learning_rate": 1.5708419435684462e-06,
      "loss": 0.0427,
      "step": 55440
    },
    {
      "epoch": 0.00033837890625,
      "model_forward_time": 0.11579227447509766,
      "step": 55440
    },
    {
      "epoch": 0.00033837890625,
      "step": 55440,
      "training_step_time": 0.38607358932495117
    },
    {
      "epoch": 0.000338385009765625,
      "model_forward_time": 0.11556124687194824,
      "step": 55441
    },
    {
      "epoch": 0.000338385009765625,
      "step": 55441,
      "training_step_time": 0.37700724601745605
    },
    {
      "epoch": 0.00033839111328125,
      "model_forward_time": 0.11558175086975098,
      "step": 55442
    },
    {
      "epoch": 0.00033839111328125,
      "step": 55442,
      "training_step_time": 0.379457950592041
    },
    {
      "epoch": 0.000338397216796875,
      "model_forward_time": 0.11559367179870605,
      "step": 55443
    },
    {
      "epoch": 0.000338397216796875,
      "step": 55443,
      "training_step_time": 0.38227248191833496
    },
    {
      "epoch": 0.0003384033203125,
      "model_forward_time": 0.1146538257598877,
      "step": 55444
    },
    {
      "epoch": 0.0003384033203125,
      "step": 55444,
      "training_step_time": 0.39223527908325195
    },
    {
      "epoch": 0.000338409423828125,
      "model_forward_time": 0.11601090431213379,
      "step": 55445
    },
    {
      "epoch": 0.000338409423828125,
      "step": 55445,
      "training_step_time": 0.4690587520599365
    },
    {
      "epoch": 0.00033841552734375,
      "model_forward_time": 0.11564135551452637,
      "step": 55446
    },
    {
      "epoch": 0.00033841552734375,
      "step": 55446,
      "training_step_time": 0.5035054683685303
    },
    {
      "epoch": 0.000338421630859375,
      "model_forward_time": 0.11488556861877441,
      "step": 55447
    },
    {
      "epoch": 0.000338421630859375,
      "step": 55447,
      "training_step_time": 0.39864468574523926
    },
    {
      "epoch": 0.000338427734375,
      "model_forward_time": 0.11526703834533691,
      "step": 55448
    },
    {
      "epoch": 0.000338427734375,
      "step": 55448,
      "training_step_time": 0.40033388137817383
    },
    {
      "epoch": 0.000338433837890625,
      "model_forward_time": 0.11492419242858887,
      "step": 55449
    },
    {
      "epoch": 0.000338433837890625,
      "step": 55449,
      "training_step_time": 0.3935234546661377
    },
    {
      "epoch": 0.00033843994140625,
      "grad_norm": 0.08254694938659668,
      "learning_rate": 1.5639959459586962e-06,
      "loss": 0.0363,
      "step": 55450
    },
    {
      "epoch": 0.00033843994140625,
      "model_forward_time": 0.11496114730834961,
      "step": 55450
    },
    {
      "epoch": 0.00033843994140625,
      "step": 55450,
      "training_step_time": 0.4061777591705322
    },
    {
      "epoch": 0.000338446044921875,
      "model_forward_time": 0.11516070365905762,
      "step": 55451
    },
    {
      "epoch": 0.000338446044921875,
      "step": 55451,
      "training_step_time": 0.46399855613708496
    },
    {
      "epoch": 0.0003384521484375,
      "model_forward_time": 0.1153414249420166,
      "step": 55452
    },
    {
      "epoch": 0.0003384521484375,
      "step": 55452,
      "training_step_time": 0.4302830696105957
    },
    {
      "epoch": 0.000338458251953125,
      "model_forward_time": 0.11512398719787598,
      "step": 55453
    },
    {
      "epoch": 0.000338458251953125,
      "step": 55453,
      "training_step_time": 0.48993659019470215
    },
    {
      "epoch": 0.00033846435546875,
      "model_forward_time": 0.11606574058532715,
      "step": 55454
    },
    {
      "epoch": 0.00033846435546875,
      "step": 55454,
      "training_step_time": 0.3941061496734619
    },
    {
      "epoch": 0.000338470458984375,
      "model_forward_time": 0.11527729034423828,
      "step": 55455
    },
    {
      "epoch": 0.000338470458984375,
      "step": 55455,
      "training_step_time": 0.3891940116882324
    },
    {
      "epoch": 0.0003384765625,
      "model_forward_time": 0.11522054672241211,
      "step": 55456
    },
    {
      "epoch": 0.0003384765625,
      "step": 55456,
      "training_step_time": 0.39830827713012695
    },
    {
      "epoch": 0.000338482666015625,
      "model_forward_time": 0.11503982543945312,
      "step": 55457
    },
    {
      "epoch": 0.000338482666015625,
      "step": 55457,
      "training_step_time": 0.4130828380584717
    },
    {
      "epoch": 0.00033848876953125,
      "model_forward_time": 0.1154477596282959,
      "step": 55458
    },
    {
      "epoch": 0.00033848876953125,
      "step": 55458,
      "training_step_time": 0.3958415985107422
    },
    {
      "epoch": 0.000338494873046875,
      "model_forward_time": 0.11538529396057129,
      "step": 55459
    },
    {
      "epoch": 0.000338494873046875,
      "step": 55459,
      "training_step_time": 0.47165417671203613
    },
    {
      "epoch": 0.0003385009765625,
      "grad_norm": 0.09543344378471375,
      "learning_rate": 1.5571646619281066e-06,
      "loss": 0.036,
      "step": 55460
    },
    {
      "epoch": 0.0003385009765625,
      "model_forward_time": 0.11477541923522949,
      "step": 55460
    },
    {
      "epoch": 0.0003385009765625,
      "step": 55460,
      "training_step_time": 0.4292778968811035
    },
    {
      "epoch": 0.000338507080078125,
      "model_forward_time": 0.11508846282958984,
      "step": 55461
    },
    {
      "epoch": 0.000338507080078125,
      "step": 55461,
      "training_step_time": 0.47995877265930176
    },
    {
      "epoch": 0.00033851318359375,
      "model_forward_time": 0.11486148834228516,
      "step": 55462
    },
    {
      "epoch": 0.00033851318359375,
      "step": 55462,
      "training_step_time": 0.4038212299346924
    },
    {
      "epoch": 0.000338519287109375,
      "model_forward_time": 0.11483025550842285,
      "step": 55463
    },
    {
      "epoch": 0.000338519287109375,
      "step": 55463,
      "training_step_time": 0.39989733695983887
    },
    {
      "epoch": 0.000338525390625,
      "model_forward_time": 0.11597824096679688,
      "step": 55464
    },
    {
      "epoch": 0.000338525390625,
      "step": 55464,
      "training_step_time": 0.4079322814941406
    },
    {
      "epoch": 0.000338531494140625,
      "model_forward_time": 0.11539316177368164,
      "step": 55465
    },
    {
      "epoch": 0.000338531494140625,
      "step": 55465,
      "training_step_time": 0.4163975715637207
    },
    {
      "epoch": 0.00033853759765625,
      "model_forward_time": 0.11484837532043457,
      "step": 55466
    },
    {
      "epoch": 0.00033853759765625,
      "step": 55466,
      "training_step_time": 0.4574103355407715
    },
    {
      "epoch": 0.000338543701171875,
      "model_forward_time": 0.11478161811828613,
      "step": 55467
    },
    {
      "epoch": 0.000338543701171875,
      "step": 55467,
      "training_step_time": 0.5212607383728027
    },
    {
      "epoch": 0.0003385498046875,
      "model_forward_time": 0.1154477596282959,
      "step": 55468
    },
    {
      "epoch": 0.0003385498046875,
      "step": 55468,
      "training_step_time": 0.39050936698913574
    },
    {
      "epoch": 0.000338555908203125,
      "model_forward_time": 0.11464238166809082,
      "step": 55469
    },
    {
      "epoch": 0.000338555908203125,
      "step": 55469,
      "training_step_time": 0.4079930782318115
    },
    {
      "epoch": 0.00033856201171875,
      "grad_norm": 0.07919027656316757,
      "learning_rate": 1.550348093551829e-06,
      "loss": 0.0363,
      "step": 55470
    },
    {
      "epoch": 0.00033856201171875,
      "model_forward_time": 0.11548638343811035,
      "step": 55470
    },
    {
      "epoch": 0.00033856201171875,
      "step": 55470,
      "training_step_time": 0.39336514472961426
    },
    {
      "epoch": 0.000338568115234375,
      "model_forward_time": 0.11555051803588867,
      "step": 55471
    },
    {
      "epoch": 0.000338568115234375,
      "step": 55471,
      "training_step_time": 0.3855173587799072
    },
    {
      "epoch": 0.00033857421875,
      "model_forward_time": 0.11552691459655762,
      "step": 55472
    },
    {
      "epoch": 0.00033857421875,
      "step": 55472,
      "training_step_time": 0.3987424373626709
    },
    {
      "epoch": 0.000338580322265625,
      "model_forward_time": 0.11548113822937012,
      "step": 55473
    },
    {
      "epoch": 0.000338580322265625,
      "step": 55473,
      "training_step_time": 0.39658546447753906
    },
    {
      "epoch": 0.00033858642578125,
      "model_forward_time": 0.11506128311157227,
      "step": 55474
    },
    {
      "epoch": 0.00033858642578125,
      "step": 55474,
      "training_step_time": 0.4005417823791504
    },
    {
      "epoch": 0.000338592529296875,
      "model_forward_time": 0.11475443840026855,
      "step": 55475
    },
    {
      "epoch": 0.000338592529296875,
      "step": 55475,
      "training_step_time": 0.3974025249481201
    },
    {
      "epoch": 0.0003385986328125,
      "model_forward_time": 0.11521434783935547,
      "step": 55476
    },
    {
      "epoch": 0.0003385986328125,
      "step": 55476,
      "training_step_time": 0.4050025939941406
    },
    {
      "epoch": 0.000338604736328125,
      "model_forward_time": 0.11522698402404785,
      "step": 55477
    },
    {
      "epoch": 0.000338604736328125,
      "step": 55477,
      "training_step_time": 0.39772486686706543
    },
    {
      "epoch": 0.00033861083984375,
      "model_forward_time": 0.11481142044067383,
      "step": 55478
    },
    {
      "epoch": 0.00033861083984375,
      "step": 55478,
      "training_step_time": 0.3913078308105469
    },
    {
      "epoch": 0.000338616943359375,
      "model_forward_time": 0.11580538749694824,
      "step": 55479
    },
    {
      "epoch": 0.000338616943359375,
      "step": 55479,
      "training_step_time": 0.4119894504547119
    },
    {
      "epoch": 0.000338623046875,
      "grad_norm": 0.13620610535144806,
      "learning_rate": 1.5435462429005675e-06,
      "loss": 0.0383,
      "step": 55480
    },
    {
      "epoch": 0.000338623046875,
      "model_forward_time": 0.1149454116821289,
      "step": 55480
    },
    {
      "epoch": 0.000338623046875,
      "step": 55480,
      "training_step_time": 0.36753344535827637
    },
    {
      "epoch": 0.000338629150390625,
      "model_forward_time": 0.11502480506896973,
      "step": 55481
    },
    {
      "epoch": 0.000338629150390625,
      "step": 55481,
      "training_step_time": 0.4863755702972412
    },
    {
      "epoch": 0.00033863525390625,
      "model_forward_time": 0.1150979995727539,
      "step": 55482
    },
    {
      "epoch": 0.00033863525390625,
      "step": 55482,
      "training_step_time": 0.4717378616333008
    },
    {
      "epoch": 0.000338641357421875,
      "model_forward_time": 0.11421608924865723,
      "step": 55483
    },
    {
      "epoch": 0.000338641357421875,
      "step": 55483,
      "training_step_time": 0.39385080337524414
    },
    {
      "epoch": 0.0003386474609375,
      "model_forward_time": 0.11510491371154785,
      "step": 55484
    },
    {
      "epoch": 0.0003386474609375,
      "step": 55484,
      "training_step_time": 0.39486145973205566
    },
    {
      "epoch": 0.000338653564453125,
      "model_forward_time": 0.11550641059875488,
      "step": 55485
    },
    {
      "epoch": 0.000338653564453125,
      "step": 55485,
      "training_step_time": 0.3856782913208008
    },
    {
      "epoch": 0.00033865966796875,
      "model_forward_time": 0.1150665283203125,
      "step": 55486
    },
    {
      "epoch": 0.00033865966796875,
      "step": 55486,
      "training_step_time": 0.3937532901763916
    },
    {
      "epoch": 0.000338665771484375,
      "model_forward_time": 0.11502432823181152,
      "step": 55487
    },
    {
      "epoch": 0.000338665771484375,
      "step": 55487,
      "training_step_time": 0.3967905044555664
    },
    {
      "epoch": 0.000338671875,
      "model_forward_time": 0.11510944366455078,
      "step": 55488
    },
    {
      "epoch": 0.000338671875,
      "step": 55488,
      "training_step_time": 0.49119114875793457
    },
    {
      "epoch": 0.000338677978515625,
      "model_forward_time": 0.11534309387207031,
      "step": 55489
    },
    {
      "epoch": 0.000338677978515625,
      "step": 55489,
      "training_step_time": 0.4161672592163086
    },
    {
      "epoch": 0.00033868408203125,
      "grad_norm": 0.09398194402456284,
      "learning_rate": 1.5367591120405256e-06,
      "loss": 0.0358,
      "step": 55490
    },
    {
      "epoch": 0.00033868408203125,
      "model_forward_time": 0.11502933502197266,
      "step": 55490
    },
    {
      "epoch": 0.00033868408203125,
      "step": 55490,
      "training_step_time": 0.5149374008178711
    },
    {
      "epoch": 0.000338690185546875,
      "model_forward_time": 0.11534690856933594,
      "step": 55491
    },
    {
      "epoch": 0.000338690185546875,
      "step": 55491,
      "training_step_time": 0.3944549560546875
    },
    {
      "epoch": 0.0003386962890625,
      "model_forward_time": 0.1150214672088623,
      "step": 55492
    },
    {
      "epoch": 0.0003386962890625,
      "step": 55492,
      "training_step_time": 0.4438920021057129
    },
    {
      "epoch": 0.000338702392578125,
      "model_forward_time": 0.11444640159606934,
      "step": 55493
    },
    {
      "epoch": 0.000338702392578125,
      "step": 55493,
      "training_step_time": 0.4092428684234619
    },
    {
      "epoch": 0.00033870849609375,
      "model_forward_time": 0.11511516571044922,
      "step": 55494
    },
    {
      "epoch": 0.00033870849609375,
      "step": 55494,
      "training_step_time": 0.39175891876220703
    },
    {
      "epoch": 0.000338714599609375,
      "model_forward_time": 0.11507248878479004,
      "step": 55495
    },
    {
      "epoch": 0.000338714599609375,
      "step": 55495,
      "training_step_time": 0.43555188179016113
    },
    {
      "epoch": 0.000338720703125,
      "model_forward_time": 0.1149759292602539,
      "step": 55496
    },
    {
      "epoch": 0.000338720703125,
      "step": 55496,
      "training_step_time": 0.430509090423584
    },
    {
      "epoch": 0.000338726806640625,
      "model_forward_time": 0.1144876480102539,
      "step": 55497
    },
    {
      "epoch": 0.000338726806640625,
      "step": 55497,
      "training_step_time": 0.478351354598999
    },
    {
      "epoch": 0.00033873291015625,
      "model_forward_time": 0.11508488655090332,
      "step": 55498
    },
    {
      "epoch": 0.00033873291015625,
      "step": 55498,
      "training_step_time": 0.4052557945251465
    },
    {
      "epoch": 0.000338739013671875,
      "model_forward_time": 0.11555862426757812,
      "step": 55499
    },
    {
      "epoch": 0.000338739013671875,
      "step": 55499,
      "training_step_time": 0.3842625617980957
    },
    {
      "epoch": 0.0003387451171875,
      "grad_norm": 0.10227742046117783,
      "learning_rate": 1.5299867030334814e-06,
      "loss": 0.0379,
      "step": 55500
    },
    {
      "epoch": 0.0003387451171875,
      "model_forward_time": 0.11497354507446289,
      "step": 55500
    },
    {
      "epoch": 0.0003387451171875,
      "step": 55500,
      "training_step_time": 0.4001188278198242
    },
    {
      "epoch": 0.000338751220703125,
      "model_forward_time": 0.11520004272460938,
      "step": 55501
    },
    {
      "epoch": 0.000338751220703125,
      "step": 55501,
      "training_step_time": 0.4008195400238037
    },
    {
      "epoch": 0.00033875732421875,
      "model_forward_time": 0.11493372917175293,
      "step": 55502
    },
    {
      "epoch": 0.00033875732421875,
      "step": 55502,
      "training_step_time": 0.39807868003845215
    },
    {
      "epoch": 0.000338763427734375,
      "model_forward_time": 0.11491203308105469,
      "step": 55503
    },
    {
      "epoch": 0.000338763427734375,
      "step": 55503,
      "training_step_time": 0.4080486297607422
    },
    {
      "epoch": 0.00033876953125,
      "model_forward_time": 0.1153254508972168,
      "step": 55504
    },
    {
      "epoch": 0.00033876953125,
      "step": 55504,
      "training_step_time": 0.5229759216308594
    },
    {
      "epoch": 0.000338775634765625,
      "model_forward_time": 0.11589574813842773,
      "step": 55505
    },
    {
      "epoch": 0.000338775634765625,
      "step": 55505,
      "training_step_time": 0.3852803707122803
    },
    {
      "epoch": 0.00033878173828125,
      "model_forward_time": 0.11599612236022949,
      "step": 55506
    },
    {
      "epoch": 0.00033878173828125,
      "step": 55506,
      "training_step_time": 0.38735318183898926
    },
    {
      "epoch": 0.000338787841796875,
      "model_forward_time": 0.11501336097717285,
      "step": 55507
    },
    {
      "epoch": 0.000338787841796875,
      "step": 55507,
      "training_step_time": 0.4074978828430176
    },
    {
      "epoch": 0.0003387939453125,
      "model_forward_time": 0.11517000198364258,
      "step": 55508
    },
    {
      "epoch": 0.0003387939453125,
      "step": 55508,
      "training_step_time": 0.3801558017730713
    },
    {
      "epoch": 0.000338800048828125,
      "model_forward_time": 0.11523985862731934,
      "step": 55509
    },
    {
      "epoch": 0.000338800048828125,
      "step": 55509,
      "training_step_time": 0.38319921493530273
    },
    {
      "epoch": 0.00033880615234375,
      "grad_norm": 0.07515067607164383,
      "learning_rate": 1.5232290179366948e-06,
      "loss": 0.036,
      "step": 55510
    },
    {
      "epoch": 0.00033880615234375,
      "model_forward_time": 0.11618280410766602,
      "step": 55510
    },
    {
      "epoch": 0.00033880615234375,
      "step": 55510,
      "training_step_time": 0.6249020099639893
    },
    {
      "epoch": 0.000338812255859375,
      "model_forward_time": 0.11543512344360352,
      "step": 55511
    },
    {
      "epoch": 0.000338812255859375,
      "step": 55511,
      "training_step_time": 0.4946773052215576
    },
    {
      "epoch": 0.000338818359375,
      "model_forward_time": 0.11541509628295898,
      "step": 55512
    },
    {
      "epoch": 0.000338818359375,
      "step": 55512,
      "training_step_time": 0.45826268196105957
    },
    {
      "epoch": 0.000338824462890625,
      "model_forward_time": 0.11477017402648926,
      "step": 55513
    },
    {
      "epoch": 0.000338824462890625,
      "step": 55513,
      "training_step_time": 0.37966036796569824
    },
    {
      "epoch": 0.00033883056640625,
      "model_forward_time": 0.11500120162963867,
      "step": 55514
    },
    {
      "epoch": 0.00033883056640625,
      "step": 55514,
      "training_step_time": 0.38715362548828125
    },
    {
      "epoch": 0.000338836669921875,
      "model_forward_time": 0.11469411849975586,
      "step": 55515
    },
    {
      "epoch": 0.000338836669921875,
      "step": 55515,
      "training_step_time": 0.38873934745788574
    },
    {
      "epoch": 0.0003388427734375,
      "model_forward_time": 0.11496186256408691,
      "step": 55516
    },
    {
      "epoch": 0.0003388427734375,
      "step": 55516,
      "training_step_time": 0.5348992347717285
    },
    {
      "epoch": 0.000338848876953125,
      "model_forward_time": 0.11592721939086914,
      "step": 55517
    },
    {
      "epoch": 0.000338848876953125,
      "step": 55517,
      "training_step_time": 0.4252970218658447
    },
    {
      "epoch": 0.00033885498046875,
      "model_forward_time": 0.11600661277770996,
      "step": 55518
    },
    {
      "epoch": 0.00033885498046875,
      "step": 55518,
      "training_step_time": 0.43659257888793945
    },
    {
      "epoch": 0.000338861083984375,
      "model_forward_time": 0.11523890495300293,
      "step": 55519
    },
    {
      "epoch": 0.000338861083984375,
      "step": 55519,
      "training_step_time": 0.41376829147338867
    },
    {
      "epoch": 0.0003388671875,
      "grad_norm": 0.12040960043668747,
      "learning_rate": 1.516486058802974e-06,
      "loss": 0.0377,
      "step": 55520
    },
    {
      "epoch": 0.0003388671875,
      "model_forward_time": 0.1150505542755127,
      "step": 55520
    },
    {
      "epoch": 0.0003388671875,
      "step": 55520,
      "training_step_time": 0.3986167907714844
    },
    {
      "epoch": 0.000338873291015625,
      "model_forward_time": 0.11559391021728516,
      "step": 55521
    },
    {
      "epoch": 0.000338873291015625,
      "step": 55521,
      "training_step_time": 0.3921785354614258
    },
    {
      "epoch": 0.00033887939453125,
      "model_forward_time": 0.11569428443908691,
      "step": 55522
    },
    {
      "epoch": 0.00033887939453125,
      "step": 55522,
      "training_step_time": 0.4401111602783203
    },
    {
      "epoch": 0.000338885498046875,
      "model_forward_time": 0.11501073837280273,
      "step": 55523
    },
    {
      "epoch": 0.000338885498046875,
      "step": 55523,
      "training_step_time": 0.3950645923614502
    },
    {
      "epoch": 0.0003388916015625,
      "model_forward_time": 0.11714816093444824,
      "step": 55524
    },
    {
      "epoch": 0.0003388916015625,
      "step": 55524,
      "training_step_time": 0.4644310474395752
    },
    {
      "epoch": 0.000338897705078125,
      "model_forward_time": 0.11546158790588379,
      "step": 55525
    },
    {
      "epoch": 0.000338897705078125,
      "step": 55525,
      "training_step_time": 0.5133802890777588
    },
    {
      "epoch": 0.00033890380859375,
      "model_forward_time": 0.11538028717041016,
      "step": 55526
    },
    {
      "epoch": 0.00033890380859375,
      "step": 55526,
      "training_step_time": 0.4614291191101074
    },
    {
      "epoch": 0.000338909912109375,
      "model_forward_time": 0.11479520797729492,
      "step": 55527
    },
    {
      "epoch": 0.000338909912109375,
      "step": 55527,
      "training_step_time": 0.39096736907958984
    },
    {
      "epoch": 0.000338916015625,
      "model_forward_time": 0.11479544639587402,
      "step": 55528
    },
    {
      "epoch": 0.000338916015625,
      "step": 55528,
      "training_step_time": 0.3875596523284912
    },
    {
      "epoch": 0.000338922119140625,
      "model_forward_time": 0.11603975296020508,
      "step": 55529
    },
    {
      "epoch": 0.000338922119140625,
      "step": 55529,
      "training_step_time": 0.40473389625549316
    },
    {
      "epoch": 0.00033892822265625,
      "grad_norm": 0.07588671147823334,
      "learning_rate": 1.5097578276806633e-06,
      "loss": 0.0318,
      "step": 55530
    },
    {
      "epoch": 0.00033892822265625,
      "model_forward_time": 0.11470723152160645,
      "step": 55530
    },
    {
      "epoch": 0.00033892822265625,
      "step": 55530,
      "training_step_time": 0.3964543342590332
    },
    {
      "epoch": 0.000338934326171875,
      "model_forward_time": 0.11588120460510254,
      "step": 55531
    },
    {
      "epoch": 0.000338934326171875,
      "step": 55531,
      "training_step_time": 0.4469482898712158
    },
    {
      "epoch": 0.0003389404296875,
      "model_forward_time": 0.1157369613647461,
      "step": 55532
    },
    {
      "epoch": 0.0003389404296875,
      "step": 55532,
      "training_step_time": 0.4430880546569824
    },
    {
      "epoch": 0.000338946533203125,
      "model_forward_time": 0.11557722091674805,
      "step": 55533
    },
    {
      "epoch": 0.000338946533203125,
      "step": 55533,
      "training_step_time": 0.430997371673584
    },
    {
      "epoch": 0.00033895263671875,
      "model_forward_time": 0.1154642105102539,
      "step": 55534
    },
    {
      "epoch": 0.00033895263671875,
      "step": 55534,
      "training_step_time": 0.4219093322753906
    },
    {
      "epoch": 0.000338958740234375,
      "model_forward_time": 0.11526989936828613,
      "step": 55535
    },
    {
      "epoch": 0.000338958740234375,
      "step": 55535,
      "training_step_time": 0.4049685001373291
    },
    {
      "epoch": 0.00033896484375,
      "model_forward_time": 0.116455078125,
      "step": 55536
    },
    {
      "epoch": 0.00033896484375,
      "step": 55536,
      "training_step_time": 0.40002894401550293
    },
    {
      "epoch": 0.000338970947265625,
      "model_forward_time": 0.11452817916870117,
      "step": 55537
    },
    {
      "epoch": 0.000338970947265625,
      "step": 55537,
      "training_step_time": 0.3973701000213623
    },
    {
      "epoch": 0.00033897705078125,
      "model_forward_time": 0.11494803428649902,
      "step": 55538
    },
    {
      "epoch": 0.00033897705078125,
      "step": 55538,
      "training_step_time": 0.3669898509979248
    },
    {
      "epoch": 0.000338983154296875,
      "model_forward_time": 0.11535215377807617,
      "step": 55539
    },
    {
      "epoch": 0.000338983154296875,
      "step": 55539,
      "training_step_time": 0.45877671241760254
    },
    {
      "epoch": 0.0003389892578125,
      "grad_norm": 0.11309117078781128,
      "learning_rate": 1.5030443266136118e-06,
      "loss": 0.0332,
      "step": 55540
    },
    {
      "epoch": 0.0003389892578125,
      "model_forward_time": 0.11502289772033691,
      "step": 55540
    },
    {
      "epoch": 0.0003389892578125,
      "step": 55540,
      "training_step_time": 0.5116829872131348
    },
    {
      "epoch": 0.000338995361328125,
      "model_forward_time": 0.11592721939086914,
      "step": 55541
    },
    {
      "epoch": 0.000338995361328125,
      "step": 55541,
      "training_step_time": 0.39736509323120117
    },
    {
      "epoch": 0.00033900146484375,
      "model_forward_time": 0.1157979965209961,
      "step": 55542
    },
    {
      "epoch": 0.00033900146484375,
      "step": 55542,
      "training_step_time": 0.396395206451416
    },
    {
      "epoch": 0.000339007568359375,
      "model_forward_time": 0.1150355339050293,
      "step": 55543
    },
    {
      "epoch": 0.000339007568359375,
      "step": 55543,
      "training_step_time": 0.3850290775299072
    },
    {
      "epoch": 0.000339013671875,
      "model_forward_time": 0.1149759292602539,
      "step": 55544
    },
    {
      "epoch": 0.000339013671875,
      "step": 55544,
      "training_step_time": 0.3848757743835449
    },
    {
      "epoch": 0.000339019775390625,
      "model_forward_time": 0.11518669128417969,
      "step": 55545
    },
    {
      "epoch": 0.000339019775390625,
      "step": 55545,
      "training_step_time": 0.4547858238220215
    },
    {
      "epoch": 0.00033902587890625,
      "model_forward_time": 0.11500382423400879,
      "step": 55546
    },
    {
      "epoch": 0.00033902587890625,
      "step": 55546,
      "training_step_time": 0.574371337890625
    },
    {
      "epoch": 0.000339031982421875,
      "model_forward_time": 0.1151740550994873,
      "step": 55547
    },
    {
      "epoch": 0.000339031982421875,
      "step": 55547,
      "training_step_time": 0.41845083236694336
    },
    {
      "epoch": 0.0003390380859375,
      "model_forward_time": 0.1145782470703125,
      "step": 55548
    },
    {
      "epoch": 0.0003390380859375,
      "step": 55548,
      "training_step_time": 0.3991234302520752
    },
    {
      "epoch": 0.000339044189453125,
      "model_forward_time": 0.11449718475341797,
      "step": 55549
    },
    {
      "epoch": 0.000339044189453125,
      "step": 55549,
      "training_step_time": 0.39412426948547363
    },
    {
      "epoch": 0.00033905029296875,
      "grad_norm": 0.12286201864480972,
      "learning_rate": 1.4963455576412045e-06,
      "loss": 0.042,
      "step": 55550
    },
    {
      "epoch": 0.00033905029296875,
      "model_forward_time": 0.11450362205505371,
      "step": 55550
    },
    {
      "epoch": 0.00033905029296875,
      "step": 55550,
      "training_step_time": 0.3948233127593994
    },
    {
      "epoch": 0.000339056396484375,
      "model_forward_time": 0.11545228958129883,
      "step": 55551
    },
    {
      "epoch": 0.000339056396484375,
      "step": 55551,
      "training_step_time": 0.3973207473754883
    },
    {
      "epoch": 0.0003390625,
      "model_forward_time": 0.11482453346252441,
      "step": 55552
    },
    {
      "epoch": 0.0003390625,
      "step": 55552,
      "training_step_time": 0.6788773536682129
    },
    {
      "epoch": 0.000339068603515625,
      "model_forward_time": 0.11424779891967773,
      "step": 55553
    },
    {
      "epoch": 0.000339068603515625,
      "step": 55553,
      "training_step_time": 0.44848155975341797
    },
    {
      "epoch": 0.00033907470703125,
      "model_forward_time": 0.1146402359008789,
      "step": 55554
    },
    {
      "epoch": 0.00033907470703125,
      "step": 55554,
      "training_step_time": 0.46062779426574707
    },
    {
      "epoch": 0.000339080810546875,
      "model_forward_time": 0.11452770233154297,
      "step": 55555
    },
    {
      "epoch": 0.000339080810546875,
      "step": 55555,
      "training_step_time": 0.39285969734191895
    },
    {
      "epoch": 0.0003390869140625,
      "model_forward_time": 0.11411333084106445,
      "step": 55556
    },
    {
      "epoch": 0.0003390869140625,
      "step": 55556,
      "training_step_time": 0.3950154781341553
    },
    {
      "epoch": 0.000339093017578125,
      "model_forward_time": 0.11420631408691406,
      "step": 55557
    },
    {
      "epoch": 0.000339093017578125,
      "step": 55557,
      "training_step_time": 0.3881192207336426
    },
    {
      "epoch": 0.00033909912109375,
      "model_forward_time": 0.11428689956665039,
      "step": 55558
    },
    {
      "epoch": 0.00033909912109375,
      "step": 55558,
      "training_step_time": 0.5739023685455322
    },
    {
      "epoch": 0.000339105224609375,
      "model_forward_time": 0.11462020874023438,
      "step": 55559
    },
    {
      "epoch": 0.000339105224609375,
      "step": 55559,
      "training_step_time": 0.4308912754058838
    },
    {
      "epoch": 0.000339111328125,
      "grad_norm": 0.08669138699769974,
      "learning_rate": 1.4896615227983468e-06,
      "loss": 0.0357,
      "step": 55560
    },
    {
      "epoch": 0.000339111328125,
      "model_forward_time": 0.11489701271057129,
      "step": 55560
    },
    {
      "epoch": 0.000339111328125,
      "step": 55560,
      "training_step_time": 0.4231541156768799
    },
    {
      "epoch": 0.000339117431640625,
      "model_forward_time": 0.11407303810119629,
      "step": 55561
    },
    {
      "epoch": 0.000339117431640625,
      "step": 55561,
      "training_step_time": 0.3880033493041992
    },
    {
      "epoch": 0.00033912353515625,
      "model_forward_time": 0.11501502990722656,
      "step": 55562
    },
    {
      "epoch": 0.00033912353515625,
      "step": 55562,
      "training_step_time": 0.3883543014526367
    },
    {
      "epoch": 0.000339129638671875,
      "model_forward_time": 0.11444854736328125,
      "step": 55563
    },
    {
      "epoch": 0.000339129638671875,
      "step": 55563,
      "training_step_time": 0.38776159286499023
    },
    {
      "epoch": 0.0003391357421875,
      "model_forward_time": 0.11527037620544434,
      "step": 55564
    },
    {
      "epoch": 0.0003391357421875,
      "step": 55564,
      "training_step_time": 0.5359911918640137
    },
    {
      "epoch": 0.000339141845703125,
      "model_forward_time": 0.11483407020568848,
      "step": 55565
    },
    {
      "epoch": 0.000339141845703125,
      "step": 55565,
      "training_step_time": 0.3935072422027588
    },
    {
      "epoch": 0.00033914794921875,
      "model_forward_time": 0.11461257934570312,
      "step": 55566
    },
    {
      "epoch": 0.00033914794921875,
      "step": 55566,
      "training_step_time": 0.36696720123291016
    },
    {
      "epoch": 0.000339154052734375,
      "model_forward_time": 0.11482524871826172,
      "step": 55567
    },
    {
      "epoch": 0.000339154052734375,
      "step": 55567,
      "training_step_time": 0.4656982421875
    },
    {
      "epoch": 0.00033916015625,
      "model_forward_time": 0.11503863334655762,
      "step": 55568
    },
    {
      "epoch": 0.00033916015625,
      "step": 55568,
      "training_step_time": 0.43572115898132324
    },
    {
      "epoch": 0.000339166259765625,
      "model_forward_time": 0.11455726623535156,
      "step": 55569
    },
    {
      "epoch": 0.000339166259765625,
      "step": 55569,
      "training_step_time": 0.4055297374725342
    },
    {
      "epoch": 0.00033917236328125,
      "grad_norm": 0.0795736089348793,
      "learning_rate": 1.482992224115487e-06,
      "loss": 0.0368,
      "step": 55570
    },
    {
      "epoch": 0.00033917236328125,
      "model_forward_time": 0.11536693572998047,
      "step": 55570
    },
    {
      "epoch": 0.00033917236328125,
      "step": 55570,
      "training_step_time": 0.39908313751220703
    },
    {
      "epoch": 0.000339178466796875,
      "model_forward_time": 0.1155693531036377,
      "step": 55571
    },
    {
      "epoch": 0.000339178466796875,
      "step": 55571,
      "training_step_time": 0.39591288566589355
    },
    {
      "epoch": 0.0003391845703125,
      "model_forward_time": 0.11519980430603027,
      "step": 55572
    },
    {
      "epoch": 0.0003391845703125,
      "step": 55572,
      "training_step_time": 0.4150724411010742
    },
    {
      "epoch": 0.000339190673828125,
      "model_forward_time": 0.11455440521240234,
      "step": 55573
    },
    {
      "epoch": 0.000339190673828125,
      "step": 55573,
      "training_step_time": 0.4249610900878906
    },
    {
      "epoch": 0.00033919677734375,
      "model_forward_time": 0.11483979225158691,
      "step": 55574
    },
    {
      "epoch": 0.00033919677734375,
      "step": 55574,
      "training_step_time": 0.46145129203796387
    },
    {
      "epoch": 0.000339202880859375,
      "model_forward_time": 0.11528468132019043,
      "step": 55575
    },
    {
      "epoch": 0.000339202880859375,
      "step": 55575,
      "training_step_time": 0.46222853660583496
    },
    {
      "epoch": 0.000339208984375,
      "model_forward_time": 0.11572623252868652,
      "step": 55576
    },
    {
      "epoch": 0.000339208984375,
      "step": 55576,
      "training_step_time": 0.5001907348632812
    },
    {
      "epoch": 0.000339215087890625,
      "model_forward_time": 0.1141805648803711,
      "step": 55577
    },
    {
      "epoch": 0.000339215087890625,
      "step": 55577,
      "training_step_time": 0.39841556549072266
    },
    {
      "epoch": 0.00033922119140625,
      "model_forward_time": 0.11484980583190918,
      "step": 55578
    },
    {
      "epoch": 0.00033922119140625,
      "step": 55578,
      "training_step_time": 0.39339661598205566
    },
    {
      "epoch": 0.000339227294921875,
      "model_forward_time": 0.11459469795227051,
      "step": 55579
    },
    {
      "epoch": 0.000339227294921875,
      "step": 55579,
      "training_step_time": 0.3992612361907959
    },
    {
      "epoch": 0.0003392333984375,
      "grad_norm": 0.08142907917499542,
      "learning_rate": 1.4763376636185599e-06,
      "loss": 0.0354,
      "step": 55580
    },
    {
      "epoch": 0.0003392333984375,
      "model_forward_time": 0.11428260803222656,
      "step": 55580
    },
    {
      "epoch": 0.0003392333984375,
      "step": 55580,
      "training_step_time": 0.39180946350097656
    },
    {
      "epoch": 0.000339239501953125,
      "model_forward_time": 0.11614155769348145,
      "step": 55581
    },
    {
      "epoch": 0.000339239501953125,
      "step": 55581,
      "training_step_time": 0.43494081497192383
    },
    {
      "epoch": 0.00033924560546875,
      "model_forward_time": 0.11549139022827148,
      "step": 55582
    },
    {
      "epoch": 0.00033924560546875,
      "step": 55582,
      "training_step_time": 0.5261411666870117
    },
    {
      "epoch": 0.000339251708984375,
      "model_forward_time": 0.11534500122070312,
      "step": 55583
    },
    {
      "epoch": 0.000339251708984375,
      "step": 55583,
      "training_step_time": 0.421527624130249
    },
    {
      "epoch": 0.0003392578125,
      "model_forward_time": 0.11479711532592773,
      "step": 55584
    },
    {
      "epoch": 0.0003392578125,
      "step": 55584,
      "training_step_time": 0.3794136047363281
    },
    {
      "epoch": 0.000339263916015625,
      "model_forward_time": 0.11497211456298828,
      "step": 55585
    },
    {
      "epoch": 0.000339263916015625,
      "step": 55585,
      "training_step_time": 0.3999297618865967
    },
    {
      "epoch": 0.00033927001953125,
      "model_forward_time": 0.1154179573059082,
      "step": 55586
    },
    {
      "epoch": 0.00033927001953125,
      "step": 55586,
      "training_step_time": 0.4095437526702881
    },
    {
      "epoch": 0.000339276123046875,
      "model_forward_time": 0.11477112770080566,
      "step": 55587
    },
    {
      "epoch": 0.000339276123046875,
      "step": 55587,
      "training_step_time": 0.39835596084594727
    },
    {
      "epoch": 0.0003392822265625,
      "model_forward_time": 0.11527085304260254,
      "step": 55588
    },
    {
      "epoch": 0.0003392822265625,
      "step": 55588,
      "training_step_time": 0.4814157485961914
    },
    {
      "epoch": 0.000339288330078125,
      "model_forward_time": 0.11545944213867188,
      "step": 55589
    },
    {
      "epoch": 0.000339288330078125,
      "step": 55589,
      "training_step_time": 0.41954994201660156
    },
    {
      "epoch": 0.00033929443359375,
      "grad_norm": 0.09471739828586578,
      "learning_rate": 1.4696978433290653e-06,
      "loss": 0.0419,
      "step": 55590
    },
    {
      "epoch": 0.00033929443359375,
      "model_forward_time": 0.11499810218811035,
      "step": 55590
    },
    {
      "epoch": 0.00033929443359375,
      "step": 55590,
      "training_step_time": 0.3960599899291992
    },
    {
      "epoch": 0.000339300537109375,
      "model_forward_time": 0.11522412300109863,
      "step": 55591
    },
    {
      "epoch": 0.000339300537109375,
      "step": 55591,
      "training_step_time": 0.39333581924438477
    },
    {
      "epoch": 0.000339306640625,
      "model_forward_time": 0.11505246162414551,
      "step": 55592
    },
    {
      "epoch": 0.000339306640625,
      "step": 55592,
      "training_step_time": 0.3858063220977783
    },
    {
      "epoch": 0.000339312744140625,
      "model_forward_time": 0.11577510833740234,
      "step": 55593
    },
    {
      "epoch": 0.000339312744140625,
      "step": 55593,
      "training_step_time": 0.3858318328857422
    },
    {
      "epoch": 0.00033931884765625,
      "model_forward_time": 0.1146087646484375,
      "step": 55594
    },
    {
      "epoch": 0.00033931884765625,
      "step": 55594,
      "training_step_time": 0.5863778591156006
    },
    {
      "epoch": 0.000339324951171875,
      "model_forward_time": 0.11434006690979004,
      "step": 55595
    },
    {
      "epoch": 0.000339324951171875,
      "step": 55595,
      "training_step_time": 0.3650524616241455
    },
    {
      "epoch": 0.0003393310546875,
      "model_forward_time": 0.11525130271911621,
      "step": 55596
    },
    {
      "epoch": 0.0003393310546875,
      "step": 55596,
      "training_step_time": 0.44967103004455566
    },
    {
      "epoch": 0.000339337158203125,
      "model_forward_time": 0.1145639419555664,
      "step": 55597
    },
    {
      "epoch": 0.000339337158203125,
      "step": 55597,
      "training_step_time": 0.40309643745422363
    },
    {
      "epoch": 0.00033934326171875,
      "model_forward_time": 0.11494755744934082,
      "step": 55598
    },
    {
      "epoch": 0.00033934326171875,
      "step": 55598,
      "training_step_time": 0.39504528045654297
    },
    {
      "epoch": 0.000339349365234375,
      "model_forward_time": 0.11429953575134277,
      "step": 55599
    },
    {
      "epoch": 0.000339349365234375,
      "step": 55599,
      "training_step_time": 0.40719032287597656
    },
    {
      "epoch": 0.00033935546875,
      "grad_norm": 0.1083005964756012,
      "learning_rate": 1.463072765264001e-06,
      "loss": 0.0359,
      "step": 55600
    },
    {
      "epoch": 0.00033935546875,
      "model_forward_time": 0.11444950103759766,
      "step": 55600
    },
    {
      "epoch": 0.00033935546875,
      "step": 55600,
      "training_step_time": 0.46912455558776855
    },
    {
      "epoch": 0.000339361572265625,
      "model_forward_time": 0.11535358428955078,
      "step": 55601
    },
    {
      "epoch": 0.000339361572265625,
      "step": 55601,
      "training_step_time": 0.39516758918762207
    },
    {
      "epoch": 0.00033936767578125,
      "model_forward_time": 0.11452031135559082,
      "step": 55602
    },
    {
      "epoch": 0.00033936767578125,
      "step": 55602,
      "training_step_time": 0.42434167861938477
    },
    {
      "epoch": 0.000339373779296875,
      "model_forward_time": 0.11559271812438965,
      "step": 55603
    },
    {
      "epoch": 0.000339373779296875,
      "step": 55603,
      "training_step_time": 0.45870399475097656
    },
    {
      "epoch": 0.0003393798828125,
      "model_forward_time": 0.1149449348449707,
      "step": 55604
    },
    {
      "epoch": 0.0003393798828125,
      "step": 55604,
      "training_step_time": 0.38237452507019043
    },
    {
      "epoch": 0.000339385986328125,
      "model_forward_time": 0.11526918411254883,
      "step": 55605
    },
    {
      "epoch": 0.000339385986328125,
      "step": 55605,
      "training_step_time": 0.39045143127441406
    },
    {
      "epoch": 0.00033939208984375,
      "model_forward_time": 0.11441588401794434,
      "step": 55606
    },
    {
      "epoch": 0.00033939208984375,
      "step": 55606,
      "training_step_time": 0.6156892776489258
    },
    {
      "epoch": 0.000339398193359375,
      "model_forward_time": 0.11437344551086426,
      "step": 55607
    },
    {
      "epoch": 0.000339398193359375,
      "step": 55607,
      "training_step_time": 0.4029514789581299
    },
    {
      "epoch": 0.000339404296875,
      "model_forward_time": 0.1152505874633789,
      "step": 55608
    },
    {
      "epoch": 0.000339404296875,
      "step": 55608,
      "training_step_time": 0.3932161331176758
    },
    {
      "epoch": 0.000339410400390625,
      "model_forward_time": 0.11459755897521973,
      "step": 55609
    },
    {
      "epoch": 0.000339410400390625,
      "step": 55609,
      "training_step_time": 0.36728358268737793
    },
    {
      "epoch": 0.00033941650390625,
      "grad_norm": 0.11768142879009247,
      "learning_rate": 1.4564624314358844e-06,
      "loss": 0.0344,
      "step": 55610
    },
    {
      "epoch": 0.00033941650390625,
      "model_forward_time": 0.11474776268005371,
      "step": 55610
    },
    {
      "epoch": 0.00033941650390625,
      "step": 55610,
      "training_step_time": 0.40346622467041016
    },
    {
      "epoch": 0.000339422607421875,
      "model_forward_time": 0.11629176139831543,
      "step": 55611
    },
    {
      "epoch": 0.000339422607421875,
      "step": 55611,
      "training_step_time": 0.4974668025970459
    },
    {
      "epoch": 0.0003394287109375,
      "model_forward_time": 0.11524844169616699,
      "step": 55612
    },
    {
      "epoch": 0.0003394287109375,
      "step": 55612,
      "training_step_time": 0.5263416767120361
    },
    {
      "epoch": 0.000339434814453125,
      "model_forward_time": 0.11548900604248047,
      "step": 55613
    },
    {
      "epoch": 0.000339434814453125,
      "step": 55613,
      "training_step_time": 0.3948497772216797
    },
    {
      "epoch": 0.00033944091796875,
      "model_forward_time": 0.11519432067871094,
      "step": 55614
    },
    {
      "epoch": 0.00033944091796875,
      "step": 55614,
      "training_step_time": 0.39186978340148926
    },
    {
      "epoch": 0.000339447021484375,
      "model_forward_time": 0.11491203308105469,
      "step": 55615
    },
    {
      "epoch": 0.000339447021484375,
      "step": 55615,
      "training_step_time": 0.39136385917663574
    },
    {
      "epoch": 0.000339453125,
      "model_forward_time": 0.11478376388549805,
      "step": 55616
    },
    {
      "epoch": 0.000339453125,
      "step": 55616,
      "training_step_time": 0.42591357231140137
    },
    {
      "epoch": 0.000339459228515625,
      "model_forward_time": 0.11462259292602539,
      "step": 55617
    },
    {
      "epoch": 0.000339459228515625,
      "step": 55617,
      "training_step_time": 0.4804189205169678
    },
    {
      "epoch": 0.00033946533203125,
      "model_forward_time": 0.11559009552001953,
      "step": 55618
    },
    {
      "epoch": 0.00033946533203125,
      "step": 55618,
      "training_step_time": 0.4035980701446533
    },
    {
      "epoch": 0.000339471435546875,
      "model_forward_time": 0.11520814895629883,
      "step": 55619
    },
    {
      "epoch": 0.000339471435546875,
      "step": 55619,
      "training_step_time": 0.4000742435455322
    },
    {
      "epoch": 0.0003394775390625,
      "grad_norm": 0.09810145944356918,
      "learning_rate": 1.4498668438527597e-06,
      "loss": 0.0387,
      "step": 55620
    },
    {
      "epoch": 0.0003394775390625,
      "model_forward_time": 0.11543464660644531,
      "step": 55620
    },
    {
      "epoch": 0.0003394775390625,
      "step": 55620,
      "training_step_time": 0.4005923271179199
    },
    {
      "epoch": 0.000339483642578125,
      "model_forward_time": 0.11531400680541992,
      "step": 55621
    },
    {
      "epoch": 0.000339483642578125,
      "step": 55621,
      "training_step_time": 0.39391613006591797
    },
    {
      "epoch": 0.00033948974609375,
      "model_forward_time": 0.11571907997131348,
      "step": 55622
    },
    {
      "epoch": 0.00033948974609375,
      "step": 55622,
      "training_step_time": 0.3933122158050537
    },
    {
      "epoch": 0.000339495849609375,
      "model_forward_time": 0.11512494087219238,
      "step": 55623
    },
    {
      "epoch": 0.000339495849609375,
      "step": 55623,
      "training_step_time": 0.39389777183532715
    },
    {
      "epoch": 0.000339501953125,
      "model_forward_time": 0.11682510375976562,
      "step": 55624
    },
    {
      "epoch": 0.000339501953125,
      "step": 55624,
      "training_step_time": 0.6243176460266113
    },
    {
      "epoch": 0.000339508056640625,
      "model_forward_time": 0.11468505859375,
      "step": 55625
    },
    {
      "epoch": 0.000339508056640625,
      "step": 55625,
      "training_step_time": 0.5236372947692871
    },
    {
      "epoch": 0.00033951416015625,
      "model_forward_time": 0.11529350280761719,
      "step": 55626
    },
    {
      "epoch": 0.00033951416015625,
      "step": 55626,
      "training_step_time": 0.4459352493286133
    },
    {
      "epoch": 0.000339520263671875,
      "model_forward_time": 0.11443185806274414,
      "step": 55627
    },
    {
      "epoch": 0.000339520263671875,
      "step": 55627,
      "training_step_time": 0.3887150287628174
    },
    {
      "epoch": 0.0003395263671875,
      "model_forward_time": 0.1139216423034668,
      "step": 55628
    },
    {
      "epoch": 0.0003395263671875,
      "step": 55628,
      "training_step_time": 0.39485907554626465
    },
    {
      "epoch": 0.000339532470703125,
      "model_forward_time": 0.11509847640991211,
      "step": 55629
    },
    {
      "epoch": 0.000339532470703125,
      "step": 55629,
      "training_step_time": 0.3774378299713135
    },
    {
      "epoch": 0.00033953857421875,
      "grad_norm": 0.10864006727933884,
      "learning_rate": 1.4432860045182017e-06,
      "loss": 0.0362,
      "step": 55630
    },
    {
      "epoch": 0.00033953857421875,
      "model_forward_time": 0.11513447761535645,
      "step": 55630
    },
    {
      "epoch": 0.00033953857421875,
      "step": 55630,
      "training_step_time": 0.41695237159729004
    },
    {
      "epoch": 0.000339544677734375,
      "model_forward_time": 0.11465001106262207,
      "step": 55631
    },
    {
      "epoch": 0.000339544677734375,
      "step": 55631,
      "training_step_time": 0.39633774757385254
    },
    {
      "epoch": 0.00033955078125,
      "model_forward_time": 0.11533904075622559,
      "step": 55632
    },
    {
      "epoch": 0.00033955078125,
      "step": 55632,
      "training_step_time": 0.41419458389282227
    },
    {
      "epoch": 0.000339556884765625,
      "model_forward_time": 0.11500740051269531,
      "step": 55633
    },
    {
      "epoch": 0.000339556884765625,
      "step": 55633,
      "training_step_time": 0.40168190002441406
    },
    {
      "epoch": 0.00033956298828125,
      "model_forward_time": 0.11500692367553711,
      "step": 55634
    },
    {
      "epoch": 0.00033956298828125,
      "step": 55634,
      "training_step_time": 0.3956584930419922
    },
    {
      "epoch": 0.000339569091796875,
      "model_forward_time": 0.1153268814086914,
      "step": 55635
    },
    {
      "epoch": 0.000339569091796875,
      "step": 55635,
      "training_step_time": 0.39199376106262207
    },
    {
      "epoch": 0.0003395751953125,
      "model_forward_time": 0.115936279296875,
      "step": 55636
    },
    {
      "epoch": 0.0003395751953125,
      "step": 55636,
      "training_step_time": 0.5212118625640869
    },
    {
      "epoch": 0.000339581298828125,
      "model_forward_time": 0.11493945121765137,
      "step": 55637
    },
    {
      "epoch": 0.000339581298828125,
      "step": 55637,
      "training_step_time": 0.397000789642334
    },
    {
      "epoch": 0.00033958740234375,
      "model_forward_time": 0.11529874801635742,
      "step": 55638
    },
    {
      "epoch": 0.00033958740234375,
      "step": 55638,
      "training_step_time": 0.4063107967376709
    },
    {
      "epoch": 0.000339593505859375,
      "model_forward_time": 0.11537528038024902,
      "step": 55639
    },
    {
      "epoch": 0.000339593505859375,
      "step": 55639,
      "training_step_time": 0.4383571147918701
    },
    {
      "epoch": 0.000339599609375,
      "grad_norm": 0.10850080847740173,
      "learning_rate": 1.4367199154312783e-06,
      "loss": 0.0414,
      "step": 55640
    },
    {
      "epoch": 0.000339599609375,
      "model_forward_time": 0.1149284839630127,
      "step": 55640
    },
    {
      "epoch": 0.000339599609375,
      "step": 55640,
      "training_step_time": 0.5071365833282471
    },
    {
      "epoch": 0.000339605712890625,
      "model_forward_time": 0.11547183990478516,
      "step": 55641
    },
    {
      "epoch": 0.000339605712890625,
      "step": 55641,
      "training_step_time": 0.3860015869140625
    },
    {
      "epoch": 0.00033961181640625,
      "model_forward_time": 0.11544656753540039,
      "step": 55642
    },
    {
      "epoch": 0.00033961181640625,
      "step": 55642,
      "training_step_time": 0.39763975143432617
    },
    {
      "epoch": 0.000339617919921875,
      "model_forward_time": 0.11476254463195801,
      "step": 55643
    },
    {
      "epoch": 0.000339617919921875,
      "step": 55643,
      "training_step_time": 0.38675880432128906
    },
    {
      "epoch": 0.0003396240234375,
      "model_forward_time": 0.11533999443054199,
      "step": 55644
    },
    {
      "epoch": 0.0003396240234375,
      "step": 55644,
      "training_step_time": 0.3998570442199707
    },
    {
      "epoch": 0.000339630126953125,
      "model_forward_time": 0.1150813102722168,
      "step": 55645
    },
    {
      "epoch": 0.000339630126953125,
      "step": 55645,
      "training_step_time": 0.4216346740722656
    },
    {
      "epoch": 0.00033963623046875,
      "model_forward_time": 0.11497235298156738,
      "step": 55646
    },
    {
      "epoch": 0.00033963623046875,
      "step": 55646,
      "training_step_time": 0.48726844787597656
    },
    {
      "epoch": 0.000339642333984375,
      "model_forward_time": 0.11697196960449219,
      "step": 55647
    },
    {
      "epoch": 0.000339642333984375,
      "step": 55647,
      "training_step_time": 0.3851289749145508
    },
    {
      "epoch": 0.0003396484375,
      "model_forward_time": 0.11541938781738281,
      "step": 55648
    },
    {
      "epoch": 0.0003396484375,
      "step": 55648,
      "training_step_time": 0.43410801887512207
    },
    {
      "epoch": 0.000339654541015625,
      "model_forward_time": 0.11454916000366211,
      "step": 55649
    },
    {
      "epoch": 0.000339654541015625,
      "step": 55649,
      "training_step_time": 0.3980076313018799
    },
    {
      "epoch": 0.00033966064453125,
      "grad_norm": 0.0888679027557373,
      "learning_rate": 1.4301685785866214e-06,
      "loss": 0.0381,
      "step": 55650
    },
    {
      "epoch": 0.00033966064453125,
      "model_forward_time": 0.11521100997924805,
      "step": 55650
    },
    {
      "epoch": 0.00033966064453125,
      "step": 55650,
      "training_step_time": 0.4086451530456543
    },
    {
      "epoch": 0.000339666748046875,
      "model_forward_time": 0.11449217796325684,
      "step": 55651
    },
    {
      "epoch": 0.000339666748046875,
      "step": 55651,
      "training_step_time": 0.393435001373291
    },
    {
      "epoch": 0.0003396728515625,
      "model_forward_time": 0.1145925521850586,
      "step": 55652
    },
    {
      "epoch": 0.0003396728515625,
      "step": 55652,
      "training_step_time": 0.39624834060668945
    },
    {
      "epoch": 0.000339678955078125,
      "model_forward_time": 0.11446356773376465,
      "step": 55653
    },
    {
      "epoch": 0.000339678955078125,
      "step": 55653,
      "training_step_time": 0.4151275157928467
    },
    {
      "epoch": 0.00033968505859375,
      "model_forward_time": 0.1144857406616211,
      "step": 55654
    },
    {
      "epoch": 0.00033968505859375,
      "step": 55654,
      "training_step_time": 0.5819849967956543
    },
    {
      "epoch": 0.000339691162109375,
      "model_forward_time": 0.11899232864379883,
      "step": 55655
    },
    {
      "epoch": 0.000339691162109375,
      "step": 55655,
      "training_step_time": 0.45289063453674316
    },
    {
      "epoch": 0.000339697265625,
      "model_forward_time": 0.11933279037475586,
      "step": 55656
    },
    {
      "epoch": 0.000339697265625,
      "step": 55656,
      "training_step_time": 0.41558027267456055
    },
    {
      "epoch": 0.000339703369140625,
      "model_forward_time": 0.11674261093139648,
      "step": 55657
    },
    {
      "epoch": 0.000339703369140625,
      "step": 55657,
      "training_step_time": 0.3985321521759033
    },
    {
      "epoch": 0.00033970947265625,
      "model_forward_time": 0.11575198173522949,
      "step": 55658
    },
    {
      "epoch": 0.00033970947265625,
      "step": 55658,
      "training_step_time": 0.42501354217529297
    },
    {
      "epoch": 0.000339715576171875,
      "model_forward_time": 0.1146397590637207,
      "step": 55659
    },
    {
      "epoch": 0.000339715576171875,
      "step": 55659,
      "training_step_time": 0.41495513916015625
    },
    {
      "epoch": 0.0003397216796875,
      "grad_norm": 0.10128951817750931,
      "learning_rate": 1.4236319959743227e-06,
      "loss": 0.0366,
      "step": 55660
    },
    {
      "epoch": 0.0003397216796875,
      "model_forward_time": 0.11824584007263184,
      "step": 55660
    },
    {
      "epoch": 0.0003397216796875,
      "step": 55660,
      "training_step_time": 0.46599435806274414
    },
    {
      "epoch": 0.000339727783203125,
      "model_forward_time": 0.1177985668182373,
      "step": 55661
    },
    {
      "epoch": 0.000339727783203125,
      "step": 55661,
      "training_step_time": 0.38802242279052734
    },
    {
      "epoch": 0.00033973388671875,
      "model_forward_time": 0.11813068389892578,
      "step": 55662
    },
    {
      "epoch": 0.00033973388671875,
      "step": 55662,
      "training_step_time": 0.3958001136779785
    },
    {
      "epoch": 0.000339739990234375,
      "model_forward_time": 0.11939668655395508,
      "step": 55663
    },
    {
      "epoch": 0.000339739990234375,
      "step": 55663,
      "training_step_time": 0.38610339164733887
    },
    {
      "epoch": 0.00033974609375,
      "model_forward_time": 0.1152653694152832,
      "step": 55664
    },
    {
      "epoch": 0.00033974609375,
      "step": 55664,
      "training_step_time": 0.40007972717285156
    },
    {
      "epoch": 0.000339752197265625,
      "model_forward_time": 0.1155693531036377,
      "step": 55665
    },
    {
      "epoch": 0.000339752197265625,
      "step": 55665,
      "training_step_time": 0.4014301300048828
    },
    {
      "epoch": 0.00033975830078125,
      "model_forward_time": 0.11465716361999512,
      "step": 55666
    },
    {
      "epoch": 0.00033975830078125,
      "step": 55666,
      "training_step_time": 0.5605456829071045
    },
    {
      "epoch": 0.000339764404296875,
      "model_forward_time": 0.11595487594604492,
      "step": 55667
    },
    {
      "epoch": 0.000339764404296875,
      "step": 55667,
      "training_step_time": 0.431286096572876
    },
    {
      "epoch": 0.0003397705078125,
      "model_forward_time": 0.11478638648986816,
      "step": 55668
    },
    {
      "epoch": 0.0003397705078125,
      "step": 55668,
      "training_step_time": 0.39772558212280273
    },
    {
      "epoch": 0.000339776611328125,
      "model_forward_time": 0.11522674560546875,
      "step": 55669
    },
    {
      "epoch": 0.000339776611328125,
      "step": 55669,
      "training_step_time": 0.47333478927612305
    },
    {
      "epoch": 0.00033978271484375,
      "grad_norm": 0.12916229665279388,
      "learning_rate": 1.4171101695800493e-06,
      "loss": 0.0298,
      "step": 55670
    },
    {
      "epoch": 0.00033978271484375,
      "model_forward_time": 0.11435461044311523,
      "step": 55670
    },
    {
      "epoch": 0.00033978271484375,
      "step": 55670,
      "training_step_time": 0.39658594131469727
    },
    {
      "epoch": 0.000339788818359375,
      "model_forward_time": 0.11508512496948242,
      "step": 55671
    },
    {
      "epoch": 0.000339788818359375,
      "step": 55671,
      "training_step_time": 0.38349437713623047
    },
    {
      "epoch": 0.000339794921875,
      "model_forward_time": 0.11545610427856445,
      "step": 55672
    },
    {
      "epoch": 0.000339794921875,
      "step": 55672,
      "training_step_time": 0.6035959720611572
    },
    {
      "epoch": 0.000339801025390625,
      "model_forward_time": 0.11518621444702148,
      "step": 55673
    },
    {
      "epoch": 0.000339801025390625,
      "step": 55673,
      "training_step_time": 0.39130711555480957
    },
    {
      "epoch": 0.00033980712890625,
      "model_forward_time": 0.11531305313110352,
      "step": 55674
    },
    {
      "epoch": 0.00033980712890625,
      "step": 55674,
      "training_step_time": 0.49959683418273926
    },
    {
      "epoch": 0.000339813232421875,
      "model_forward_time": 0.11439871788024902,
      "step": 55675
    },
    {
      "epoch": 0.000339813232421875,
      "step": 55675,
      "training_step_time": 0.38878417015075684
    },
    {
      "epoch": 0.0003398193359375,
      "model_forward_time": 0.11475777626037598,
      "step": 55676
    },
    {
      "epoch": 0.0003398193359375,
      "step": 55676,
      "training_step_time": 0.39438652992248535
    },
    {
      "epoch": 0.000339825439453125,
      "model_forward_time": 0.11456060409545898,
      "step": 55677
    },
    {
      "epoch": 0.000339825439453125,
      "step": 55677,
      "training_step_time": 0.3916456699371338
    },
    {
      "epoch": 0.00033983154296875,
      "model_forward_time": 0.11471128463745117,
      "step": 55678
    },
    {
      "epoch": 0.00033983154296875,
      "step": 55678,
      "training_step_time": 0.5800025463104248
    },
    {
      "epoch": 0.000339837646484375,
      "model_forward_time": 0.114593505859375,
      "step": 55679
    },
    {
      "epoch": 0.000339837646484375,
      "step": 55679,
      "training_step_time": 0.412477970123291
    },
    {
      "epoch": 0.00033984375,
      "grad_norm": 0.08553898334503174,
      "learning_rate": 1.4106031013849496e-06,
      "loss": 0.0331,
      "step": 55680
    },
    {
      "epoch": 0.00033984375,
      "model_forward_time": 0.11537623405456543,
      "step": 55680
    },
    {
      "epoch": 0.00033984375,
      "step": 55680,
      "training_step_time": 0.40212345123291016
    },
    {
      "epoch": 0.000339849853515625,
      "model_forward_time": 0.11542296409606934,
      "step": 55681
    },
    {
      "epoch": 0.000339849853515625,
      "step": 55681,
      "training_step_time": 0.4387478828430176
    },
    {
      "epoch": 0.00033985595703125,
      "model_forward_time": 0.11513495445251465,
      "step": 55682
    },
    {
      "epoch": 0.00033985595703125,
      "step": 55682,
      "training_step_time": 0.41097283363342285
    },
    {
      "epoch": 0.000339862060546875,
      "model_forward_time": 0.11483502388000488,
      "step": 55683
    },
    {
      "epoch": 0.000339862060546875,
      "step": 55683,
      "training_step_time": 0.48890113830566406
    },
    {
      "epoch": 0.0003398681640625,
      "model_forward_time": 0.11521387100219727,
      "step": 55684
    },
    {
      "epoch": 0.0003398681640625,
      "step": 55684,
      "training_step_time": 0.460676908493042
    },
    {
      "epoch": 0.000339874267578125,
      "model_forward_time": 0.11547231674194336,
      "step": 55685
    },
    {
      "epoch": 0.000339874267578125,
      "step": 55685,
      "training_step_time": 0.4047403335571289
    },
    {
      "epoch": 0.00033988037109375,
      "model_forward_time": 0.11541867256164551,
      "step": 55686
    },
    {
      "epoch": 0.00033988037109375,
      "step": 55686,
      "training_step_time": 0.4971911907196045
    },
    {
      "epoch": 0.000339886474609375,
      "model_forward_time": 0.11507987976074219,
      "step": 55687
    },
    {
      "epoch": 0.000339886474609375,
      "step": 55687,
      "training_step_time": 0.4172031879425049
    },
    {
      "epoch": 0.000339892578125,
      "model_forward_time": 0.1151735782623291,
      "step": 55688
    },
    {
      "epoch": 0.000339892578125,
      "step": 55688,
      "training_step_time": 0.4776346683502197
    },
    {
      "epoch": 0.000339898681640625,
      "model_forward_time": 0.11536312103271484,
      "step": 55689
    },
    {
      "epoch": 0.000339898681640625,
      "step": 55689,
      "training_step_time": 0.3917524814605713
    },
    {
      "epoch": 0.00033990478515625,
      "grad_norm": 0.10048120468854904,
      "learning_rate": 1.4041107933656928e-06,
      "loss": 0.0361,
      "step": 55690
    },
    {
      "epoch": 0.00033990478515625,
      "model_forward_time": 0.11560940742492676,
      "step": 55690
    },
    {
      "epoch": 0.00033990478515625,
      "step": 55690,
      "training_step_time": 0.3997008800506592
    },
    {
      "epoch": 0.000339910888671875,
      "model_forward_time": 0.11525106430053711,
      "step": 55691
    },
    {
      "epoch": 0.000339910888671875,
      "step": 55691,
      "training_step_time": 0.4002821445465088
    },
    {
      "epoch": 0.0003399169921875,
      "model_forward_time": 0.11628365516662598,
      "step": 55692
    },
    {
      "epoch": 0.0003399169921875,
      "step": 55692,
      "training_step_time": 0.4028911590576172
    },
    {
      "epoch": 0.000339923095703125,
      "model_forward_time": 0.11500000953674316,
      "step": 55693
    },
    {
      "epoch": 0.000339923095703125,
      "step": 55693,
      "training_step_time": 0.4121439456939697
    },
    {
      "epoch": 0.00033992919921875,
      "model_forward_time": 0.11513137817382812,
      "step": 55694
    },
    {
      "epoch": 0.00033992919921875,
      "step": 55694,
      "training_step_time": 0.3913588523864746
    },
    {
      "epoch": 0.000339935302734375,
      "model_forward_time": 0.11513566970825195,
      "step": 55695
    },
    {
      "epoch": 0.000339935302734375,
      "step": 55695,
      "training_step_time": 0.3937709331512451
    },
    {
      "epoch": 0.00033994140625,
      "model_forward_time": 0.11556673049926758,
      "step": 55696
    },
    {
      "epoch": 0.00033994140625,
      "step": 55696,
      "training_step_time": 0.5350704193115234
    },
    {
      "epoch": 0.000339947509765625,
      "model_forward_time": 0.11513519287109375,
      "step": 55697
    },
    {
      "epoch": 0.000339947509765625,
      "step": 55697,
      "training_step_time": 0.48171377182006836
    },
    {
      "epoch": 0.00033995361328125,
      "model_forward_time": 0.11456584930419922,
      "step": 55698
    },
    {
      "epoch": 0.00033995361328125,
      "step": 55698,
      "training_step_time": 0.43047475814819336
    },
    {
      "epoch": 0.000339959716796875,
      "model_forward_time": 0.11541604995727539,
      "step": 55699
    },
    {
      "epoch": 0.000339959716796875,
      "step": 55699,
      "training_step_time": 0.39116597175598145
    },
    {
      "epoch": 0.0003399658203125,
      "grad_norm": 0.07558384537696838,
      "learning_rate": 1.3976332474944843e-06,
      "loss": 0.0338,
      "step": 55700
    },
    {
      "epoch": 0.0003399658203125,
      "model_forward_time": 0.11502385139465332,
      "step": 55700
    },
    {
      "epoch": 0.0003399658203125,
      "step": 55700,
      "training_step_time": 0.42107295989990234
    },
    {
      "epoch": 0.000339971923828125,
      "model_forward_time": 0.11469030380249023,
      "step": 55701
    },
    {
      "epoch": 0.000339971923828125,
      "step": 55701,
      "training_step_time": 0.390627384185791
    },
    {
      "epoch": 0.00033997802734375,
      "model_forward_time": 0.11515450477600098,
      "step": 55702
    },
    {
      "epoch": 0.00033997802734375,
      "step": 55702,
      "training_step_time": 0.4381999969482422
    },
    {
      "epoch": 0.000339984130859375,
      "model_forward_time": 0.11522364616394043,
      "step": 55703
    },
    {
      "epoch": 0.000339984130859375,
      "step": 55703,
      "training_step_time": 0.3936290740966797
    },
    {
      "epoch": 0.000339990234375,
      "model_forward_time": 0.11533808708190918,
      "step": 55704
    },
    {
      "epoch": 0.000339990234375,
      "step": 55704,
      "training_step_time": 0.3953578472137451
    },
    {
      "epoch": 0.000339996337890625,
      "model_forward_time": 0.11515522003173828,
      "step": 55705
    },
    {
      "epoch": 0.000339996337890625,
      "step": 55705,
      "training_step_time": 0.39879894256591797
    },
    {
      "epoch": 0.00034000244140625,
      "model_forward_time": 0.11539649963378906,
      "step": 55706
    },
    {
      "epoch": 0.00034000244140625,
      "step": 55706,
      "training_step_time": 0.40177464485168457
    },
    {
      "epoch": 0.000340008544921875,
      "model_forward_time": 0.11526727676391602,
      "step": 55707
    },
    {
      "epoch": 0.000340008544921875,
      "step": 55707,
      "training_step_time": 0.42013025283813477
    },
    {
      "epoch": 0.0003400146484375,
      "model_forward_time": 0.11525678634643555,
      "step": 55708
    },
    {
      "epoch": 0.0003400146484375,
      "step": 55708,
      "training_step_time": 0.5441467761993408
    },
    {
      "epoch": 0.000340020751953125,
      "model_forward_time": 0.11533117294311523,
      "step": 55709
    },
    {
      "epoch": 0.000340020751953125,
      "step": 55709,
      "training_step_time": 0.3841817378997803
    },
    {
      "epoch": 0.00034002685546875,
      "grad_norm": 0.06573397666215897,
      "learning_rate": 1.3911704657390113e-06,
      "loss": 0.0331,
      "step": 55710
    },
    {
      "epoch": 0.00034002685546875,
      "model_forward_time": 0.11542153358459473,
      "step": 55710
    },
    {
      "epoch": 0.00034002685546875,
      "step": 55710,
      "training_step_time": 0.3730590343475342
    },
    {
      "epoch": 0.000340032958984375,
      "model_forward_time": 0.11518192291259766,
      "step": 55711
    },
    {
      "epoch": 0.000340032958984375,
      "step": 55711,
      "training_step_time": 0.44593381881713867
    },
    {
      "epoch": 0.0003400390625,
      "model_forward_time": 0.11489582061767578,
      "step": 55712
    },
    {
      "epoch": 0.0003400390625,
      "step": 55712,
      "training_step_time": 0.48615479469299316
    },
    {
      "epoch": 0.000340045166015625,
      "model_forward_time": 0.11418938636779785,
      "step": 55713
    },
    {
      "epoch": 0.000340045166015625,
      "step": 55713,
      "training_step_time": 0.4000678062438965
    },
    {
      "epoch": 0.00034005126953125,
      "model_forward_time": 0.11516547203063965,
      "step": 55714
    },
    {
      "epoch": 0.00034005126953125,
      "step": 55714,
      "training_step_time": 0.5076367855072021
    },
    {
      "epoch": 0.000340057373046875,
      "model_forward_time": 0.11500167846679688,
      "step": 55715
    },
    {
      "epoch": 0.000340057373046875,
      "step": 55715,
      "training_step_time": 0.40253424644470215
    },
    {
      "epoch": 0.0003400634765625,
      "model_forward_time": 0.11506223678588867,
      "step": 55716
    },
    {
      "epoch": 0.0003400634765625,
      "step": 55716,
      "training_step_time": 0.3993692398071289
    },
    {
      "epoch": 0.000340069580078125,
      "model_forward_time": 0.1150047779083252,
      "step": 55717
    },
    {
      "epoch": 0.000340069580078125,
      "step": 55717,
      "training_step_time": 0.392425537109375
    },
    {
      "epoch": 0.00034007568359375,
      "model_forward_time": 0.11508440971374512,
      "step": 55718
    },
    {
      "epoch": 0.00034007568359375,
      "step": 55718,
      "training_step_time": 0.39286041259765625
    },
    {
      "epoch": 0.000340081787109375,
      "model_forward_time": 0.11509466171264648,
      "step": 55719
    },
    {
      "epoch": 0.000340081787109375,
      "step": 55719,
      "training_step_time": 0.4006693363189697
    },
    {
      "epoch": 0.000340087890625,
      "grad_norm": 0.09370775520801544,
      "learning_rate": 1.3847224500625256e-06,
      "loss": 0.0359,
      "step": 55720
    },
    {
      "epoch": 0.000340087890625,
      "model_forward_time": 0.11448979377746582,
      "step": 55720
    },
    {
      "epoch": 0.000340087890625,
      "step": 55720,
      "training_step_time": 0.5772659778594971
    },
    {
      "epoch": 0.000340093994140625,
      "model_forward_time": 0.11548352241516113,
      "step": 55721
    },
    {
      "epoch": 0.000340093994140625,
      "step": 55721,
      "training_step_time": 0.4310448169708252
    },
    {
      "epoch": 0.00034010009765625,
      "model_forward_time": 0.11470580101013184,
      "step": 55722
    },
    {
      "epoch": 0.00034010009765625,
      "step": 55722,
      "training_step_time": 0.39842844009399414
    },
    {
      "epoch": 0.000340106201171875,
      "model_forward_time": 0.11484956741333008,
      "step": 55723
    },
    {
      "epoch": 0.000340106201171875,
      "step": 55723,
      "training_step_time": 0.3916897773742676
    },
    {
      "epoch": 0.0003401123046875,
      "model_forward_time": 0.11536812782287598,
      "step": 55724
    },
    {
      "epoch": 0.0003401123046875,
      "step": 55724,
      "training_step_time": 0.38389134407043457
    },
    {
      "epoch": 0.000340118408203125,
      "model_forward_time": 0.11643552780151367,
      "step": 55725
    },
    {
      "epoch": 0.000340118408203125,
      "step": 55725,
      "training_step_time": 0.5054671764373779
    },
    {
      "epoch": 0.00034012451171875,
      "model_forward_time": 0.1155095100402832,
      "step": 55726
    },
    {
      "epoch": 0.00034012451171875,
      "step": 55726,
      "training_step_time": 0.5086233615875244
    },
    {
      "epoch": 0.000340130615234375,
      "model_forward_time": 0.1149759292602539,
      "step": 55727
    },
    {
      "epoch": 0.000340130615234375,
      "step": 55727,
      "training_step_time": 0.38280606269836426
    },
    {
      "epoch": 0.00034013671875,
      "model_forward_time": 0.11498785018920898,
      "step": 55728
    },
    {
      "epoch": 0.00034013671875,
      "step": 55728,
      "training_step_time": 0.4099292755126953
    },
    {
      "epoch": 0.000340142822265625,
      "model_forward_time": 0.11455535888671875,
      "step": 55729
    },
    {
      "epoch": 0.000340142822265625,
      "step": 55729,
      "training_step_time": 0.3936045169830322
    },
    {
      "epoch": 0.00034014892578125,
      "grad_norm": 0.07676169276237488,
      "learning_rate": 1.3782892024237327e-06,
      "loss": 0.0342,
      "step": 55730
    },
    {
      "epoch": 0.00034014892578125,
      "model_forward_time": 0.11500406265258789,
      "step": 55730
    },
    {
      "epoch": 0.00034014892578125,
      "step": 55730,
      "training_step_time": 0.41524410247802734
    },
    {
      "epoch": 0.000340155029296875,
      "model_forward_time": 0.11510086059570312,
      "step": 55731
    },
    {
      "epoch": 0.000340155029296875,
      "step": 55731,
      "training_step_time": 0.38626599311828613
    },
    {
      "epoch": 0.0003401611328125,
      "model_forward_time": 0.11517167091369629,
      "step": 55732
    },
    {
      "epoch": 0.0003401611328125,
      "step": 55732,
      "training_step_time": 0.5506815910339355
    },
    {
      "epoch": 0.000340167236328125,
      "model_forward_time": 0.1150214672088623,
      "step": 55733
    },
    {
      "epoch": 0.000340167236328125,
      "step": 55733,
      "training_step_time": 0.43242931365966797
    },
    {
      "epoch": 0.00034017333984375,
      "model_forward_time": 0.11530399322509766,
      "step": 55734
    },
    {
      "epoch": 0.00034017333984375,
      "step": 55734,
      "training_step_time": 0.41498541831970215
    },
    {
      "epoch": 0.000340179443359375,
      "model_forward_time": 0.11460757255554199,
      "step": 55735
    },
    {
      "epoch": 0.000340179443359375,
      "step": 55735,
      "training_step_time": 0.3943769931793213
    },
    {
      "epoch": 0.000340185546875,
      "model_forward_time": 0.11539554595947266,
      "step": 55736
    },
    {
      "epoch": 0.000340185546875,
      "step": 55736,
      "training_step_time": 0.3852550983428955
    },
    {
      "epoch": 0.000340191650390625,
      "model_forward_time": 0.1148979663848877,
      "step": 55737
    },
    {
      "epoch": 0.000340191650390625,
      "step": 55737,
      "training_step_time": 0.3933885097503662
    },
    {
      "epoch": 0.00034019775390625,
      "model_forward_time": 0.11487555503845215,
      "step": 55738
    },
    {
      "epoch": 0.00034019775390625,
      "step": 55738,
      "training_step_time": 0.5920794010162354
    },
    {
      "epoch": 0.000340203857421875,
      "model_forward_time": 0.11459183692932129,
      "step": 55739
    },
    {
      "epoch": 0.000340203857421875,
      "step": 55739,
      "training_step_time": 0.46259069442749023
    },
    {
      "epoch": 0.0003402099609375,
      "grad_norm": 0.09625359624624252,
      "learning_rate": 1.3718707247769135e-06,
      "loss": 0.0343,
      "step": 55740
    },
    {
      "epoch": 0.0003402099609375,
      "model_forward_time": 0.11433887481689453,
      "step": 55740
    },
    {
      "epoch": 0.0003402099609375,
      "step": 55740,
      "training_step_time": 0.4968111515045166
    },
    {
      "epoch": 0.000340216064453125,
      "model_forward_time": 0.11448407173156738,
      "step": 55741
    },
    {
      "epoch": 0.000340216064453125,
      "step": 55741,
      "training_step_time": 0.3988151550292969
    },
    {
      "epoch": 0.00034022216796875,
      "model_forward_time": 0.11409115791320801,
      "step": 55742
    },
    {
      "epoch": 0.00034022216796875,
      "step": 55742,
      "training_step_time": 0.45809388160705566
    },
    {
      "epoch": 0.000340228271484375,
      "model_forward_time": 0.11420559883117676,
      "step": 55743
    },
    {
      "epoch": 0.000340228271484375,
      "step": 55743,
      "training_step_time": 0.418658971786499
    },
    {
      "epoch": 0.000340234375,
      "model_forward_time": 0.11458730697631836,
      "step": 55744
    },
    {
      "epoch": 0.000340234375,
      "step": 55744,
      "training_step_time": 0.40341877937316895
    },
    {
      "epoch": 0.000340240478515625,
      "model_forward_time": 0.11446595191955566,
      "step": 55745
    },
    {
      "epoch": 0.000340240478515625,
      "step": 55745,
      "training_step_time": 0.40196728706359863
    },
    {
      "epoch": 0.00034024658203125,
      "model_forward_time": 0.11474800109863281,
      "step": 55746
    },
    {
      "epoch": 0.00034024658203125,
      "step": 55746,
      "training_step_time": 0.4043920040130615
    },
    {
      "epoch": 0.000340252685546875,
      "model_forward_time": 0.11542320251464844,
      "step": 55747
    },
    {
      "epoch": 0.000340252685546875,
      "step": 55747,
      "training_step_time": 0.4241626262664795
    },
    {
      "epoch": 0.0003402587890625,
      "model_forward_time": 0.11572384834289551,
      "step": 55748
    },
    {
      "epoch": 0.0003402587890625,
      "step": 55748,
      "training_step_time": 0.40307164192199707
    },
    {
      "epoch": 0.000340264892578125,
      "model_forward_time": 0.1145021915435791,
      "step": 55749
    },
    {
      "epoch": 0.000340264892578125,
      "step": 55749,
      "training_step_time": 0.40564751625061035
    },
    {
      "epoch": 0.00034027099609375,
      "grad_norm": 0.07209862023591995,
      "learning_rate": 1.3654670190718034e-06,
      "loss": 0.0381,
      "step": 55750
    },
    {
      "epoch": 0.00034027099609375,
      "model_forward_time": 0.1155099868774414,
      "step": 55750
    },
    {
      "epoch": 0.00034027099609375,
      "step": 55750,
      "training_step_time": 0.41734957695007324
    },
    {
      "epoch": 0.000340277099609375,
      "model_forward_time": 0.11451911926269531,
      "step": 55751
    },
    {
      "epoch": 0.000340277099609375,
      "step": 55751,
      "training_step_time": 0.3999311923980713
    },
    {
      "epoch": 0.000340283203125,
      "model_forward_time": 0.11544108390808105,
      "step": 55752
    },
    {
      "epoch": 0.000340283203125,
      "step": 55752,
      "training_step_time": 0.3927004337310791
    },
    {
      "epoch": 0.000340289306640625,
      "model_forward_time": 0.11534428596496582,
      "step": 55753
    },
    {
      "epoch": 0.000340289306640625,
      "step": 55753,
      "training_step_time": 0.39417147636413574
    },
    {
      "epoch": 0.00034029541015625,
      "model_forward_time": 0.11478781700134277,
      "step": 55754
    },
    {
      "epoch": 0.00034029541015625,
      "step": 55754,
      "training_step_time": 0.4422781467437744
    },
    {
      "epoch": 0.000340301513671875,
      "model_forward_time": 0.11464762687683105,
      "step": 55755
    },
    {
      "epoch": 0.000340301513671875,
      "step": 55755,
      "training_step_time": 0.504288911819458
    },
    {
      "epoch": 0.0003403076171875,
      "model_forward_time": 0.11472916603088379,
      "step": 55756
    },
    {
      "epoch": 0.0003403076171875,
      "step": 55756,
      "training_step_time": 0.4981956481933594
    },
    {
      "epoch": 0.000340313720703125,
      "model_forward_time": 0.11491918563842773,
      "step": 55757
    },
    {
      "epoch": 0.000340313720703125,
      "step": 55757,
      "training_step_time": 0.4106154441833496
    },
    {
      "epoch": 0.00034031982421875,
      "model_forward_time": 0.11503458023071289,
      "step": 55758
    },
    {
      "epoch": 0.00034031982421875,
      "step": 55758,
      "training_step_time": 0.4271252155303955
    },
    {
      "epoch": 0.000340325927734375,
      "model_forward_time": 0.11418581008911133,
      "step": 55759
    },
    {
      "epoch": 0.000340325927734375,
      "step": 55759,
      "training_step_time": 0.39926910400390625
    },
    {
      "epoch": 0.00034033203125,
      "grad_norm": 0.0799146294593811,
      "learning_rate": 1.3590780872536958e-06,
      "loss": 0.037,
      "step": 55760
    },
    {
      "epoch": 0.00034033203125,
      "model_forward_time": 0.11454033851623535,
      "step": 55760
    },
    {
      "epoch": 0.00034033203125,
      "step": 55760,
      "training_step_time": 0.42063403129577637
    },
    {
      "epoch": 0.000340338134765625,
      "model_forward_time": 0.11564278602600098,
      "step": 55761
    },
    {
      "epoch": 0.000340338134765625,
      "step": 55761,
      "training_step_time": 0.4221322536468506
    },
    {
      "epoch": 0.00034034423828125,
      "model_forward_time": 0.11518597602844238,
      "step": 55762
    },
    {
      "epoch": 0.00034034423828125,
      "step": 55762,
      "training_step_time": 0.5060892105102539
    },
    {
      "epoch": 0.000340350341796875,
      "model_forward_time": 0.1144704818725586,
      "step": 55763
    },
    {
      "epoch": 0.000340350341796875,
      "step": 55763,
      "training_step_time": 0.38761019706726074
    },
    {
      "epoch": 0.0003403564453125,
      "model_forward_time": 0.11504483222961426,
      "step": 55764
    },
    {
      "epoch": 0.0003403564453125,
      "step": 55764,
      "training_step_time": 0.3905351161956787
    },
    {
      "epoch": 0.000340362548828125,
      "model_forward_time": 0.11522865295410156,
      "step": 55765
    },
    {
      "epoch": 0.000340362548828125,
      "step": 55765,
      "training_step_time": 0.3960001468658447
    },
    {
      "epoch": 0.00034036865234375,
      "model_forward_time": 0.11476373672485352,
      "step": 55766
    },
    {
      "epoch": 0.00034036865234375,
      "step": 55766,
      "training_step_time": 0.39453840255737305
    },
    {
      "epoch": 0.000340374755859375,
      "model_forward_time": 0.11470484733581543,
      "step": 55767
    },
    {
      "epoch": 0.000340374755859375,
      "step": 55767,
      "training_step_time": 0.3980693817138672
    },
    {
      "epoch": 0.000340380859375,
      "model_forward_time": 0.11443972587585449,
      "step": 55768
    },
    {
      "epoch": 0.000340380859375,
      "step": 55768,
      "training_step_time": 0.6573858261108398
    },
    {
      "epoch": 0.000340386962890625,
      "model_forward_time": 0.11767911911010742,
      "step": 55769
    },
    {
      "epoch": 0.000340386962890625,
      "step": 55769,
      "training_step_time": 0.41878724098205566
    },
    {
      "epoch": 0.00034039306640625,
      "grad_norm": 0.07372672110795975,
      "learning_rate": 1.3527039312633827e-06,
      "loss": 0.0323,
      "step": 55770
    },
    {
      "epoch": 0.00034039306640625,
      "model_forward_time": 0.11807465553283691,
      "step": 55770
    },
    {
      "epoch": 0.00034039306640625,
      "step": 55770,
      "training_step_time": 0.37969136238098145
    },
    {
      "epoch": 0.000340399169921875,
      "model_forward_time": 0.11503958702087402,
      "step": 55771
    },
    {
      "epoch": 0.000340399169921875,
      "step": 55771,
      "training_step_time": 0.40779948234558105
    },
    {
      "epoch": 0.0003404052734375,
      "model_forward_time": 0.11408114433288574,
      "step": 55772
    },
    {
      "epoch": 0.0003404052734375,
      "step": 55772,
      "training_step_time": 0.5176815986633301
    },
    {
      "epoch": 0.000340411376953125,
      "model_forward_time": 0.1143801212310791,
      "step": 55773
    },
    {
      "epoch": 0.000340411376953125,
      "step": 55773,
      "training_step_time": 0.40857362747192383
    },
    {
      "epoch": 0.00034041748046875,
      "model_forward_time": 0.1151881217956543,
      "step": 55774
    },
    {
      "epoch": 0.00034041748046875,
      "step": 55774,
      "training_step_time": 0.38474607467651367
    },
    {
      "epoch": 0.000340423583984375,
      "model_forward_time": 0.11526608467102051,
      "step": 55775
    },
    {
      "epoch": 0.000340423583984375,
      "step": 55775,
      "training_step_time": 0.39797234535217285
    },
    {
      "epoch": 0.0003404296875,
      "model_forward_time": 0.11483931541442871,
      "step": 55776
    },
    {
      "epoch": 0.0003404296875,
      "step": 55776,
      "training_step_time": 0.394550085067749
    },
    {
      "epoch": 0.000340435791015625,
      "model_forward_time": 0.11486577987670898,
      "step": 55777
    },
    {
      "epoch": 0.000340435791015625,
      "step": 55777,
      "training_step_time": 0.3877863883972168
    },
    {
      "epoch": 0.00034044189453125,
      "model_forward_time": 0.11552095413208008,
      "step": 55778
    },
    {
      "epoch": 0.00034044189453125,
      "step": 55778,
      "training_step_time": 0.4009370803833008
    },
    {
      "epoch": 0.000340447998046875,
      "model_forward_time": 0.11538052558898926,
      "step": 55779
    },
    {
      "epoch": 0.000340447998046875,
      "step": 55779,
      "training_step_time": 0.4010298252105713
    },
    {
      "epoch": 0.0003404541015625,
      "grad_norm": 0.07754497975111008,
      "learning_rate": 1.3463445530371488e-06,
      "loss": 0.0388,
      "step": 55780
    },
    {
      "epoch": 0.0003404541015625,
      "model_forward_time": 0.11551499366760254,
      "step": 55780
    },
    {
      "epoch": 0.0003404541015625,
      "step": 55780,
      "training_step_time": 0.562387228012085
    },
    {
      "epoch": 0.000340460205078125,
      "model_forward_time": 0.11462068557739258,
      "step": 55781
    },
    {
      "epoch": 0.000340460205078125,
      "step": 55781,
      "training_step_time": 0.36730241775512695
    },
    {
      "epoch": 0.00034046630859375,
      "model_forward_time": 0.11448812484741211,
      "step": 55782
    },
    {
      "epoch": 0.00034046630859375,
      "step": 55782,
      "training_step_time": 0.460526704788208
    },
    {
      "epoch": 0.000340472412109375,
      "model_forward_time": 0.11479783058166504,
      "step": 55783
    },
    {
      "epoch": 0.000340472412109375,
      "step": 55783,
      "training_step_time": 0.45752954483032227
    },
    {
      "epoch": 0.000340478515625,
      "model_forward_time": 0.11546993255615234,
      "step": 55784
    },
    {
      "epoch": 0.000340478515625,
      "step": 55784,
      "training_step_time": 0.37859320640563965
    },
    {
      "epoch": 0.000340484619140625,
      "model_forward_time": 0.11469459533691406,
      "step": 55785
    },
    {
      "epoch": 0.000340484619140625,
      "step": 55785,
      "training_step_time": 0.4527781009674072
    },
    {
      "epoch": 0.00034049072265625,
      "model_forward_time": 0.11508345603942871,
      "step": 55786
    },
    {
      "epoch": 0.00034049072265625,
      "step": 55786,
      "training_step_time": 0.41192626953125
    },
    {
      "epoch": 0.000340496826171875,
      "model_forward_time": 0.11465001106262207,
      "step": 55787
    },
    {
      "epoch": 0.000340496826171875,
      "step": 55787,
      "training_step_time": 0.4444284439086914
    },
    {
      "epoch": 0.0003405029296875,
      "model_forward_time": 0.1150357723236084,
      "step": 55788
    },
    {
      "epoch": 0.0003405029296875,
      "step": 55788,
      "training_step_time": 0.3976783752441406
    },
    {
      "epoch": 0.000340509033203125,
      "model_forward_time": 0.11515402793884277,
      "step": 55789
    },
    {
      "epoch": 0.000340509033203125,
      "step": 55789,
      "training_step_time": 0.39423322677612305
    },
    {
      "epoch": 0.00034051513671875,
      "grad_norm": 0.07693841308355331,
      "learning_rate": 1.339999954506821e-06,
      "loss": 0.0426,
      "step": 55790
    },
    {
      "epoch": 0.00034051513671875,
      "model_forward_time": 0.13286137580871582,
      "step": 55790
    },
    {
      "epoch": 0.00034051513671875,
      "step": 55790,
      "training_step_time": 0.39374256134033203
    },
    {
      "epoch": 0.000340521240234375,
      "model_forward_time": 0.11519956588745117,
      "step": 55791
    },
    {
      "epoch": 0.000340521240234375,
      "step": 55791,
      "training_step_time": 0.3916594982147217
    },
    {
      "epoch": 0.00034052734375,
      "model_forward_time": 0.11505722999572754,
      "step": 55792
    },
    {
      "epoch": 0.00034052734375,
      "step": 55792,
      "training_step_time": 0.3959376811981201
    },
    {
      "epoch": 0.000340533447265625,
      "model_forward_time": 0.11491823196411133,
      "step": 55793
    },
    {
      "epoch": 0.000340533447265625,
      "step": 55793,
      "training_step_time": 0.40254688262939453
    },
    {
      "epoch": 0.00034053955078125,
      "model_forward_time": 0.11531662940979004,
      "step": 55794
    },
    {
      "epoch": 0.00034053955078125,
      "step": 55794,
      "training_step_time": 0.4061441421508789
    },
    {
      "epoch": 0.000340545654296875,
      "model_forward_time": 0.11504817008972168,
      "step": 55795
    },
    {
      "epoch": 0.000340545654296875,
      "step": 55795,
      "training_step_time": 0.39324474334716797
    },
    {
      "epoch": 0.0003405517578125,
      "model_forward_time": 0.11545991897583008,
      "step": 55796
    },
    {
      "epoch": 0.0003405517578125,
      "step": 55796,
      "training_step_time": 0.4668087959289551
    },
    {
      "epoch": 0.000340557861328125,
      "model_forward_time": 0.11507582664489746,
      "step": 55797
    },
    {
      "epoch": 0.000340557861328125,
      "step": 55797,
      "training_step_time": 0.48465657234191895
    },
    {
      "epoch": 0.00034056396484375,
      "model_forward_time": 0.11504650115966797,
      "step": 55798
    },
    {
      "epoch": 0.00034056396484375,
      "step": 55798,
      "training_step_time": 0.47345924377441406
    },
    {
      "epoch": 0.000340570068359375,
      "model_forward_time": 0.11532759666442871,
      "step": 55799
    },
    {
      "epoch": 0.000340570068359375,
      "step": 55799,
      "training_step_time": 0.4264049530029297
    },
    {
      "epoch": 0.000340576171875,
      "grad_norm": 0.06993478536605835,
      "learning_rate": 1.333670137599713e-06,
      "loss": 0.0349,
      "step": 55800
    },
    {
      "epoch": 0.000340576171875,
      "model_forward_time": 0.11501216888427734,
      "step": 55800
    },
    {
      "epoch": 0.000340576171875,
      "step": 55800,
      "training_step_time": 0.4909646511077881
    },
    {
      "epoch": 0.000340582275390625,
      "model_forward_time": 0.11499166488647461,
      "step": 55801
    },
    {
      "epoch": 0.000340582275390625,
      "step": 55801,
      "training_step_time": 0.39748334884643555
    },
    {
      "epoch": 0.00034058837890625,
      "model_forward_time": 0.11489439010620117,
      "step": 55802
    },
    {
      "epoch": 0.00034058837890625,
      "step": 55802,
      "training_step_time": 0.38742923736572266
    },
    {
      "epoch": 0.000340594482421875,
      "model_forward_time": 0.11516356468200684,
      "step": 55803
    },
    {
      "epoch": 0.000340594482421875,
      "step": 55803,
      "training_step_time": 0.3939847946166992
    },
    {
      "epoch": 0.0003406005859375,
      "model_forward_time": 0.11513614654541016,
      "step": 55804
    },
    {
      "epoch": 0.0003406005859375,
      "step": 55804,
      "training_step_time": 0.5091695785522461
    },
    {
      "epoch": 0.000340606689453125,
      "model_forward_time": 0.11470746994018555,
      "step": 55805
    },
    {
      "epoch": 0.000340606689453125,
      "step": 55805,
      "training_step_time": 0.4005870819091797
    },
    {
      "epoch": 0.00034061279296875,
      "model_forward_time": 0.11512207984924316,
      "step": 55806
    },
    {
      "epoch": 0.00034061279296875,
      "step": 55806,
      "training_step_time": 0.39751124382019043
    },
    {
      "epoch": 0.000340618896484375,
      "model_forward_time": 0.11592674255371094,
      "step": 55807
    },
    {
      "epoch": 0.000340618896484375,
      "step": 55807,
      "training_step_time": 0.3914988040924072
    },
    {
      "epoch": 0.000340625,
      "model_forward_time": 0.11539554595947266,
      "step": 55808
    },
    {
      "epoch": 0.000340625,
      "step": 55808,
      "training_step_time": 0.3934807777404785
    },
    {
      "epoch": 0.000340631103515625,
      "model_forward_time": 0.11528563499450684,
      "step": 55809
    },
    {
      "epoch": 0.000340631103515625,
      "step": 55809,
      "training_step_time": 0.3944265842437744
    },
    {
      "epoch": 0.00034063720703125,
      "grad_norm": 0.07401490956544876,
      "learning_rate": 1.3273551042386534e-06,
      "loss": 0.0358,
      "step": 55810
    },
    {
      "epoch": 0.00034063720703125,
      "model_forward_time": 0.11507844924926758,
      "step": 55810
    },
    {
      "epoch": 0.00034063720703125,
      "step": 55810,
      "training_step_time": 0.5065133571624756
    },
    {
      "epoch": 0.000340643310546875,
      "model_forward_time": 0.11502408981323242,
      "step": 55811
    },
    {
      "epoch": 0.000340643310546875,
      "step": 55811,
      "training_step_time": 0.46122169494628906
    },
    {
      "epoch": 0.0003406494140625,
      "model_forward_time": 0.11747002601623535,
      "step": 55812
    },
    {
      "epoch": 0.0003406494140625,
      "step": 55812,
      "training_step_time": 0.47727441787719727
    },
    {
      "epoch": 0.000340655517578125,
      "model_forward_time": 0.11445093154907227,
      "step": 55813
    },
    {
      "epoch": 0.000340655517578125,
      "step": 55813,
      "training_step_time": 0.4221951961517334
    },
    {
      "epoch": 0.00034066162109375,
      "model_forward_time": 0.11503839492797852,
      "step": 55814
    },
    {
      "epoch": 0.00034066162109375,
      "step": 55814,
      "training_step_time": 0.42287778854370117
    },
    {
      "epoch": 0.000340667724609375,
      "model_forward_time": 0.11478567123413086,
      "step": 55815
    },
    {
      "epoch": 0.000340667724609375,
      "step": 55815,
      "training_step_time": 0.38510608673095703
    },
    {
      "epoch": 0.000340673828125,
      "model_forward_time": 0.11472845077514648,
      "step": 55816
    },
    {
      "epoch": 0.000340673828125,
      "step": 55816,
      "training_step_time": 0.38931798934936523
    },
    {
      "epoch": 0.000340679931640625,
      "model_forward_time": 0.11525464057922363,
      "step": 55817
    },
    {
      "epoch": 0.000340679931640625,
      "step": 55817,
      "training_step_time": 0.39525341987609863
    },
    {
      "epoch": 0.00034068603515625,
      "model_forward_time": 0.11521744728088379,
      "step": 55818
    },
    {
      "epoch": 0.00034068603515625,
      "step": 55818,
      "training_step_time": 0.40849804878234863
    },
    {
      "epoch": 0.000340692138671875,
      "model_forward_time": 0.1146993637084961,
      "step": 55819
    },
    {
      "epoch": 0.000340692138671875,
      "step": 55819,
      "training_step_time": 0.3908882141113281
    },
    {
      "epoch": 0.0003406982421875,
      "grad_norm": 0.07427913695573807,
      "learning_rate": 1.3210548563419856e-06,
      "loss": 0.0403,
      "step": 55820
    },
    {
      "epoch": 0.0003406982421875,
      "model_forward_time": 0.11501336097717285,
      "step": 55820
    },
    {
      "epoch": 0.0003406982421875,
      "step": 55820,
      "training_step_time": 0.39287400245666504
    },
    {
      "epoch": 0.000340704345703125,
      "model_forward_time": 0.11545395851135254,
      "step": 55821
    },
    {
      "epoch": 0.000340704345703125,
      "step": 55821,
      "training_step_time": 0.3859598636627197
    },
    {
      "epoch": 0.00034071044921875,
      "model_forward_time": 0.11587953567504883,
      "step": 55822
    },
    {
      "epoch": 0.00034071044921875,
      "step": 55822,
      "training_step_time": 0.5199809074401855
    },
    {
      "epoch": 0.000340716552734375,
      "model_forward_time": 0.11544442176818848,
      "step": 55823
    },
    {
      "epoch": 0.000340716552734375,
      "step": 55823,
      "training_step_time": 0.39391613006591797
    },
    {
      "epoch": 0.00034072265625,
      "model_forward_time": 0.11516165733337402,
      "step": 55824
    },
    {
      "epoch": 0.00034072265625,
      "step": 55824,
      "training_step_time": 0.36809253692626953
    },
    {
      "epoch": 0.000340728759765625,
      "model_forward_time": 0.1150362491607666,
      "step": 55825
    },
    {
      "epoch": 0.000340728759765625,
      "step": 55825,
      "training_step_time": 0.4578211307525635
    },
    {
      "epoch": 0.00034073486328125,
      "model_forward_time": 0.11540722846984863,
      "step": 55826
    },
    {
      "epoch": 0.00034073486328125,
      "step": 55826,
      "training_step_time": 0.4627816677093506
    },
    {
      "epoch": 0.000340740966796875,
      "model_forward_time": 0.11504316329956055,
      "step": 55827
    },
    {
      "epoch": 0.000340740966796875,
      "step": 55827,
      "training_step_time": 0.4786489009857178
    },
    {
      "epoch": 0.0003407470703125,
      "model_forward_time": 0.11468648910522461,
      "step": 55828
    },
    {
      "epoch": 0.0003407470703125,
      "step": 55828,
      "training_step_time": 0.5041429996490479
    },
    {
      "epoch": 0.000340753173828125,
      "model_forward_time": 0.11492395401000977,
      "step": 55829
    },
    {
      "epoch": 0.000340753173828125,
      "step": 55829,
      "training_step_time": 0.3855106830596924
    },
    {
      "epoch": 0.00034075927734375,
      "grad_norm": 0.08258812874555588,
      "learning_rate": 1.3147693958235618e-06,
      "loss": 0.0394,
      "step": 55830
    },
    {
      "epoch": 0.00034075927734375,
      "model_forward_time": 0.1152043342590332,
      "step": 55830
    },
    {
      "epoch": 0.00034075927734375,
      "step": 55830,
      "training_step_time": 0.3897416591644287
    },
    {
      "epoch": 0.000340765380859375,
      "model_forward_time": 0.11485838890075684,
      "step": 55831
    },
    {
      "epoch": 0.000340765380859375,
      "step": 55831,
      "training_step_time": 0.390303373336792
    },
    {
      "epoch": 0.000340771484375,
      "model_forward_time": 0.11473417282104492,
      "step": 55832
    },
    {
      "epoch": 0.000340771484375,
      "step": 55832,
      "training_step_time": 0.3937385082244873
    },
    {
      "epoch": 0.000340777587890625,
      "model_forward_time": 0.11604642868041992,
      "step": 55833
    },
    {
      "epoch": 0.000340777587890625,
      "step": 55833,
      "training_step_time": 0.3922743797302246
    },
    {
      "epoch": 0.00034078369140625,
      "model_forward_time": 0.11570167541503906,
      "step": 55834
    },
    {
      "epoch": 0.00034078369140625,
      "step": 55834,
      "training_step_time": 0.5037293434143066
    },
    {
      "epoch": 0.000340789794921875,
      "model_forward_time": 0.11517333984375,
      "step": 55835
    },
    {
      "epoch": 0.000340789794921875,
      "step": 55835,
      "training_step_time": 0.39920806884765625
    },
    {
      "epoch": 0.0003407958984375,
      "model_forward_time": 0.11529135704040527,
      "step": 55836
    },
    {
      "epoch": 0.0003407958984375,
      "step": 55836,
      "training_step_time": 0.393141508102417
    },
    {
      "epoch": 0.000340802001953125,
      "model_forward_time": 0.11480474472045898,
      "step": 55837
    },
    {
      "epoch": 0.000340802001953125,
      "step": 55837,
      "training_step_time": 0.38994693756103516
    },
    {
      "epoch": 0.00034080810546875,
      "model_forward_time": 0.11471891403198242,
      "step": 55838
    },
    {
      "epoch": 0.00034080810546875,
      "step": 55838,
      "training_step_time": 0.39050817489624023
    },
    {
      "epoch": 0.000340814208984375,
      "model_forward_time": 0.11487340927124023,
      "step": 55839
    },
    {
      "epoch": 0.000340814208984375,
      "step": 55839,
      "training_step_time": 0.5262002944946289
    },
    {
      "epoch": 0.0003408203125,
      "grad_norm": 0.09597021341323853,
      "learning_rate": 1.3084987245927383e-06,
      "loss": 0.0409,
      "step": 55840
    },
    {
      "epoch": 0.0003408203125,
      "model_forward_time": 0.11501693725585938,
      "step": 55840
    },
    {
      "epoch": 0.0003408203125,
      "step": 55840,
      "training_step_time": 0.49256229400634766
    },
    {
      "epoch": 0.000340826416015625,
      "model_forward_time": 0.11673092842102051,
      "step": 55841
    },
    {
      "epoch": 0.000340826416015625,
      "step": 55841,
      "training_step_time": 0.4470250606536865
    },
    {
      "epoch": 0.00034083251953125,
      "model_forward_time": 0.11525511741638184,
      "step": 55842
    },
    {
      "epoch": 0.00034083251953125,
      "step": 55842,
      "training_step_time": 0.49219608306884766
    },
    {
      "epoch": 0.000340838623046875,
      "model_forward_time": 0.11447381973266602,
      "step": 55843
    },
    {
      "epoch": 0.000340838623046875,
      "step": 55843,
      "training_step_time": 0.3915393352508545
    },
    {
      "epoch": 0.0003408447265625,
      "model_forward_time": 0.1147453784942627,
      "step": 55844
    },
    {
      "epoch": 0.0003408447265625,
      "step": 55844,
      "training_step_time": 0.394350528717041
    },
    {
      "epoch": 0.000340850830078125,
      "model_forward_time": 0.11516642570495605,
      "step": 55845
    },
    {
      "epoch": 0.000340850830078125,
      "step": 55845,
      "training_step_time": 0.390505313873291
    },
    {
      "epoch": 0.00034085693359375,
      "model_forward_time": 0.1151578426361084,
      "step": 55846
    },
    {
      "epoch": 0.00034085693359375,
      "step": 55846,
      "training_step_time": 0.41576576232910156
    },
    {
      "epoch": 0.000340863037109375,
      "model_forward_time": 0.11512303352355957,
      "step": 55847
    },
    {
      "epoch": 0.000340863037109375,
      "step": 55847,
      "training_step_time": 0.39423441886901855
    },
    {
      "epoch": 0.000340869140625,
      "model_forward_time": 0.1153719425201416,
      "step": 55848
    },
    {
      "epoch": 0.000340869140625,
      "step": 55848,
      "training_step_time": 0.4055783748626709
    },
    {
      "epoch": 0.000340875244140625,
      "model_forward_time": 0.11526083946228027,
      "step": 55849
    },
    {
      "epoch": 0.000340875244140625,
      "step": 55849,
      "training_step_time": 0.3933072090148926
    },
    {
      "epoch": 0.00034088134765625,
      "grad_norm": 0.10597016662359238,
      "learning_rate": 1.3022428445543799e-06,
      "loss": 0.0325,
      "step": 55850
    },
    {
      "epoch": 0.00034088134765625,
      "model_forward_time": 0.11466145515441895,
      "step": 55850
    },
    {
      "epoch": 0.00034088134765625,
      "step": 55850,
      "training_step_time": 0.3914041519165039
    },
    {
      "epoch": 0.000340887451171875,
      "model_forward_time": 0.11528682708740234,
      "step": 55851
    },
    {
      "epoch": 0.000340887451171875,
      "step": 55851,
      "training_step_time": 0.3914146423339844
    },
    {
      "epoch": 0.0003408935546875,
      "model_forward_time": 0.11529088020324707,
      "step": 55852
    },
    {
      "epoch": 0.0003408935546875,
      "step": 55852,
      "training_step_time": 0.6728687286376953
    },
    {
      "epoch": 0.000340899658203125,
      "model_forward_time": 0.1145787239074707,
      "step": 55853
    },
    {
      "epoch": 0.000340899658203125,
      "step": 55853,
      "training_step_time": 0.3995325565338135
    },
    {
      "epoch": 0.00034090576171875,
      "model_forward_time": 0.11477947235107422,
      "step": 55854
    },
    {
      "epoch": 0.00034090576171875,
      "step": 55854,
      "training_step_time": 0.4662895202636719
    },
    {
      "epoch": 0.000340911865234375,
      "model_forward_time": 0.1147758960723877,
      "step": 55855
    },
    {
      "epoch": 0.000340911865234375,
      "step": 55855,
      "training_step_time": 0.4797403812408447
    },
    {
      "epoch": 0.00034091796875,
      "model_forward_time": 0.11510753631591797,
      "step": 55856
    },
    {
      "epoch": 0.00034091796875,
      "step": 55856,
      "training_step_time": 0.44306135177612305
    },
    {
      "epoch": 0.000340924072265625,
      "model_forward_time": 0.11425471305847168,
      "step": 55857
    },
    {
      "epoch": 0.000340924072265625,
      "step": 55857,
      "training_step_time": 0.37816691398620605
    },
    {
      "epoch": 0.00034093017578125,
      "model_forward_time": 0.11497330665588379,
      "step": 55858
    },
    {
      "epoch": 0.00034093017578125,
      "step": 55858,
      "training_step_time": 0.41030073165893555
    },
    {
      "epoch": 0.000340936279296875,
      "model_forward_time": 0.1148991584777832,
      "step": 55859
    },
    {
      "epoch": 0.000340936279296875,
      "step": 55859,
      "training_step_time": 0.40284061431884766
    },
    {
      "epoch": 0.0003409423828125,
      "grad_norm": 0.10442201793193817,
      "learning_rate": 1.2960017576088446e-06,
      "loss": 0.0335,
      "step": 55860
    },
    {
      "epoch": 0.0003409423828125,
      "model_forward_time": 0.11492753028869629,
      "step": 55860
    },
    {
      "epoch": 0.0003409423828125,
      "step": 55860,
      "training_step_time": 0.39403295516967773
    },
    {
      "epoch": 0.000340948486328125,
      "model_forward_time": 0.11503863334655762,
      "step": 55861
    },
    {
      "epoch": 0.000340948486328125,
      "step": 55861,
      "training_step_time": 0.3956031799316406
    },
    {
      "epoch": 0.00034095458984375,
      "model_forward_time": 0.11530923843383789,
      "step": 55862
    },
    {
      "epoch": 0.00034095458984375,
      "step": 55862,
      "training_step_time": 0.393718957901001
    },
    {
      "epoch": 0.000340960693359375,
      "model_forward_time": 0.11626648902893066,
      "step": 55863
    },
    {
      "epoch": 0.000340960693359375,
      "step": 55863,
      "training_step_time": 0.39394187927246094
    },
    {
      "epoch": 0.000340966796875,
      "model_forward_time": 0.1155695915222168,
      "step": 55864
    },
    {
      "epoch": 0.000340966796875,
      "step": 55864,
      "training_step_time": 0.6155388355255127
    },
    {
      "epoch": 0.000340972900390625,
      "model_forward_time": 0.1148231029510498,
      "step": 55865
    },
    {
      "epoch": 0.000340972900390625,
      "step": 55865,
      "training_step_time": 0.3971977233886719
    },
    {
      "epoch": 0.00034097900390625,
      "model_forward_time": 0.11476659774780273,
      "step": 55866
    },
    {
      "epoch": 0.00034097900390625,
      "step": 55866,
      "training_step_time": 0.39357662200927734
    },
    {
      "epoch": 0.000340985107421875,
      "model_forward_time": 0.1151418685913086,
      "step": 55867
    },
    {
      "epoch": 0.000340985107421875,
      "step": 55867,
      "training_step_time": 0.4547581672668457
    },
    {
      "epoch": 0.0003409912109375,
      "model_forward_time": 0.11442708969116211,
      "step": 55868
    },
    {
      "epoch": 0.0003409912109375,
      "step": 55868,
      "training_step_time": 0.47541022300720215
    },
    {
      "epoch": 0.000340997314453125,
      "model_forward_time": 0.11463236808776855,
      "step": 55869
    },
    {
      "epoch": 0.000340997314453125,
      "step": 55869,
      "training_step_time": 0.42845773696899414
    },
    {
      "epoch": 0.00034100341796875,
      "grad_norm": 0.07884877920150757,
      "learning_rate": 1.2897754656520379e-06,
      "loss": 0.0335,
      "step": 55870
    },
    {
      "epoch": 0.00034100341796875,
      "model_forward_time": 0.11535072326660156,
      "step": 55870
    },
    {
      "epoch": 0.00034100341796875,
      "step": 55870,
      "training_step_time": 0.5002961158752441
    },
    {
      "epoch": 0.000341009521484375,
      "model_forward_time": 0.11539244651794434,
      "step": 55871
    },
    {
      "epoch": 0.000341009521484375,
      "step": 55871,
      "training_step_time": 0.3894050121307373
    },
    {
      "epoch": 0.000341015625,
      "model_forward_time": 0.1152799129486084,
      "step": 55872
    },
    {
      "epoch": 0.000341015625,
      "step": 55872,
      "training_step_time": 0.395932674407959
    },
    {
      "epoch": 0.000341021728515625,
      "model_forward_time": 0.11477899551391602,
      "step": 55873
    },
    {
      "epoch": 0.000341021728515625,
      "step": 55873,
      "training_step_time": 0.40012145042419434
    },
    {
      "epoch": 0.00034102783203125,
      "model_forward_time": 0.11494803428649902,
      "step": 55874
    },
    {
      "epoch": 0.00034102783203125,
      "step": 55874,
      "training_step_time": 0.38152623176574707
    },
    {
      "epoch": 0.000341033935546875,
      "model_forward_time": 0.1149299144744873,
      "step": 55875
    },
    {
      "epoch": 0.000341033935546875,
      "step": 55875,
      "training_step_time": 0.3933894634246826
    },
    {
      "epoch": 0.0003410400390625,
      "model_forward_time": 0.11495113372802734,
      "step": 55876
    },
    {
      "epoch": 0.0003410400390625,
      "step": 55876,
      "training_step_time": 0.5303733348846436
    },
    {
      "epoch": 0.000341046142578125,
      "model_forward_time": 0.11540365219116211,
      "step": 55877
    },
    {
      "epoch": 0.000341046142578125,
      "step": 55877,
      "training_step_time": 0.39255452156066895
    },
    {
      "epoch": 0.00034105224609375,
      "model_forward_time": 0.11488747596740723,
      "step": 55878
    },
    {
      "epoch": 0.00034105224609375,
      "step": 55878,
      "training_step_time": 0.40064096450805664
    },
    {
      "epoch": 0.000341058349609375,
      "model_forward_time": 0.11509180068969727,
      "step": 55879
    },
    {
      "epoch": 0.000341058349609375,
      "step": 55879,
      "training_step_time": 0.40827441215515137
    },
    {
      "epoch": 0.000341064453125,
      "grad_norm": 0.09369780868291855,
      "learning_rate": 1.2835639705753078e-06,
      "loss": 0.0352,
      "step": 55880
    },
    {
      "epoch": 0.000341064453125,
      "model_forward_time": 0.11466312408447266,
      "step": 55880
    },
    {
      "epoch": 0.000341064453125,
      "step": 55880,
      "training_step_time": 0.3887040615081787
    },
    {
      "epoch": 0.000341070556640625,
      "model_forward_time": 0.11535334587097168,
      "step": 55881
    },
    {
      "epoch": 0.000341070556640625,
      "step": 55881,
      "training_step_time": 0.3905043601989746
    },
    {
      "epoch": 0.00034107666015625,
      "model_forward_time": 0.11487221717834473,
      "step": 55882
    },
    {
      "epoch": 0.00034107666015625,
      "step": 55882,
      "training_step_time": 0.6639223098754883
    },
    {
      "epoch": 0.000341082763671875,
      "model_forward_time": 0.11493182182312012,
      "step": 55883
    },
    {
      "epoch": 0.000341082763671875,
      "step": 55883,
      "training_step_time": 0.47016143798828125
    },
    {
      "epoch": 0.0003410888671875,
      "model_forward_time": 0.1150815486907959,
      "step": 55884
    },
    {
      "epoch": 0.0003410888671875,
      "step": 55884,
      "training_step_time": 0.4491767883300781
    },
    {
      "epoch": 0.000341094970703125,
      "model_forward_time": 0.11465883255004883,
      "step": 55885
    },
    {
      "epoch": 0.000341094970703125,
      "step": 55885,
      "training_step_time": 0.4027256965637207
    },
    {
      "epoch": 0.00034110107421875,
      "model_forward_time": 0.11452817916870117,
      "step": 55886
    },
    {
      "epoch": 0.00034110107421875,
      "step": 55886,
      "training_step_time": 0.3885350227355957
    },
    {
      "epoch": 0.000341107177734375,
      "model_forward_time": 0.11446309089660645,
      "step": 55887
    },
    {
      "epoch": 0.000341107177734375,
      "step": 55887,
      "training_step_time": 0.3817403316497803
    },
    {
      "epoch": 0.00034111328125,
      "model_forward_time": 0.11511492729187012,
      "step": 55888
    },
    {
      "epoch": 0.00034111328125,
      "step": 55888,
      "training_step_time": 0.4115476608276367
    },
    {
      "epoch": 0.000341119384765625,
      "model_forward_time": 0.11445927619934082,
      "step": 55889
    },
    {
      "epoch": 0.000341119384765625,
      "step": 55889,
      "training_step_time": 0.3976771831512451
    },
    {
      "epoch": 0.00034112548828125,
      "grad_norm": 0.09435376524925232,
      "learning_rate": 1.2773672742655784e-06,
      "loss": 0.0362,
      "step": 55890
    },
    {
      "epoch": 0.00034112548828125,
      "model_forward_time": 0.11425566673278809,
      "step": 55890
    },
    {
      "epoch": 0.00034112548828125,
      "step": 55890,
      "training_step_time": 0.39446592330932617
    },
    {
      "epoch": 0.000341131591796875,
      "model_forward_time": 0.11435174942016602,
      "step": 55891
    },
    {
      "epoch": 0.000341131591796875,
      "step": 55891,
      "training_step_time": 0.38695597648620605
    },
    {
      "epoch": 0.0003411376953125,
      "model_forward_time": 0.11475396156311035,
      "step": 55892
    },
    {
      "epoch": 0.0003411376953125,
      "step": 55892,
      "training_step_time": 0.39550065994262695
    },
    {
      "epoch": 0.000341143798828125,
      "model_forward_time": 0.11518335342407227,
      "step": 55893
    },
    {
      "epoch": 0.000341143798828125,
      "step": 55893,
      "training_step_time": 0.3970775604248047
    },
    {
      "epoch": 0.00034114990234375,
      "model_forward_time": 0.11500835418701172,
      "step": 55894
    },
    {
      "epoch": 0.00034114990234375,
      "step": 55894,
      "training_step_time": 0.6264846324920654
    },
    {
      "epoch": 0.000341156005859375,
      "model_forward_time": 0.11505675315856934,
      "step": 55895
    },
    {
      "epoch": 0.000341156005859375,
      "step": 55895,
      "training_step_time": 0.39006662368774414
    },
    {
      "epoch": 0.000341162109375,
      "model_forward_time": 0.11470270156860352,
      "step": 55896
    },
    {
      "epoch": 0.000341162109375,
      "step": 55896,
      "training_step_time": 0.45566391944885254
    },
    {
      "epoch": 0.000341168212890625,
      "model_forward_time": 0.1148381233215332,
      "step": 55897
    },
    {
      "epoch": 0.000341168212890625,
      "step": 55897,
      "training_step_time": 0.49700260162353516
    },
    {
      "epoch": 0.00034117431640625,
      "model_forward_time": 0.11447811126708984,
      "step": 55898
    },
    {
      "epoch": 0.00034117431640625,
      "step": 55898,
      "training_step_time": 0.44386792182922363
    },
    {
      "epoch": 0.000341180419921875,
      "model_forward_time": 0.11416387557983398,
      "step": 55899
    },
    {
      "epoch": 0.000341180419921875,
      "step": 55899,
      "training_step_time": 0.40360260009765625
    },
    {
      "epoch": 0.0003411865234375,
      "grad_norm": 0.13255034387111664,
      "learning_rate": 1.2711853786052109e-06,
      "loss": 0.0362,
      "step": 55900
    },
    {
      "epoch": 0.0003411865234375,
      "model_forward_time": 0.11449432373046875,
      "step": 55900
    },
    {
      "epoch": 0.0003411865234375,
      "step": 55900,
      "training_step_time": 0.402146577835083
    },
    {
      "epoch": 0.000341192626953125,
      "model_forward_time": 0.11514997482299805,
      "step": 55901
    },
    {
      "epoch": 0.000341192626953125,
      "step": 55901,
      "training_step_time": 0.4134213924407959
    },
    {
      "epoch": 0.00034119873046875,
      "model_forward_time": 0.1151435375213623,
      "step": 55902
    },
    {
      "epoch": 0.00034119873046875,
      "step": 55902,
      "training_step_time": 0.39840149879455566
    },
    {
      "epoch": 0.000341204833984375,
      "model_forward_time": 0.11475181579589844,
      "step": 55903
    },
    {
      "epoch": 0.000341204833984375,
      "step": 55903,
      "training_step_time": 0.3896138668060303
    },
    {
      "epoch": 0.0003412109375,
      "model_forward_time": 0.11507534980773926,
      "step": 55904
    },
    {
      "epoch": 0.0003412109375,
      "step": 55904,
      "training_step_time": 0.38707637786865234
    },
    {
      "epoch": 0.000341217041015625,
      "model_forward_time": 0.11487030982971191,
      "step": 55905
    },
    {
      "epoch": 0.000341217041015625,
      "step": 55905,
      "training_step_time": 0.386167049407959
    },
    {
      "epoch": 0.00034122314453125,
      "model_forward_time": 0.11503934860229492,
      "step": 55906
    },
    {
      "epoch": 0.00034122314453125,
      "step": 55906,
      "training_step_time": 0.3976860046386719
    },
    {
      "epoch": 0.000341229248046875,
      "model_forward_time": 0.11569929122924805,
      "step": 55907
    },
    {
      "epoch": 0.000341229248046875,
      "step": 55907,
      "training_step_time": 0.38619136810302734
    },
    {
      "epoch": 0.0003412353515625,
      "model_forward_time": 0.11571264266967773,
      "step": 55908
    },
    {
      "epoch": 0.0003412353515625,
      "step": 55908,
      "training_step_time": 0.4764559268951416
    },
    {
      "epoch": 0.000341241455078125,
      "model_forward_time": 0.11497330665588379,
      "step": 55909
    },
    {
      "epoch": 0.000341241455078125,
      "step": 55909,
      "training_step_time": 0.4021604061126709
    },
    {
      "epoch": 0.00034124755859375,
      "grad_norm": 0.09700901061296463,
      "learning_rate": 1.2650182854721193e-06,
      "loss": 0.0375,
      "step": 55910
    },
    {
      "epoch": 0.00034124755859375,
      "model_forward_time": 0.11498093605041504,
      "step": 55910
    },
    {
      "epoch": 0.00034124755859375,
      "step": 55910,
      "training_step_time": 0.3656036853790283
    },
    {
      "epoch": 0.000341253662109375,
      "model_forward_time": 0.11498498916625977,
      "step": 55911
    },
    {
      "epoch": 0.000341253662109375,
      "step": 55911,
      "training_step_time": 0.4640934467315674
    },
    {
      "epoch": 0.000341259765625,
      "model_forward_time": 0.11475539207458496,
      "step": 55912
    },
    {
      "epoch": 0.000341259765625,
      "step": 55912,
      "training_step_time": 0.42634034156799316
    },
    {
      "epoch": 0.000341265869140625,
      "model_forward_time": 0.11436104774475098,
      "step": 55913
    },
    {
      "epoch": 0.000341265869140625,
      "step": 55913,
      "training_step_time": 0.48129892349243164
    },
    {
      "epoch": 0.00034127197265625,
      "model_forward_time": 0.1145775318145752,
      "step": 55914
    },
    {
      "epoch": 0.00034127197265625,
      "step": 55914,
      "training_step_time": 0.39654016494750977
    },
    {
      "epoch": 0.000341278076171875,
      "model_forward_time": 0.1148068904876709,
      "step": 55915
    },
    {
      "epoch": 0.000341278076171875,
      "step": 55915,
      "training_step_time": 0.3920724391937256
    },
    {
      "epoch": 0.0003412841796875,
      "model_forward_time": 0.11604714393615723,
      "step": 55916
    },
    {
      "epoch": 0.0003412841796875,
      "step": 55916,
      "training_step_time": 0.3989858627319336
    },
    {
      "epoch": 0.000341290283203125,
      "model_forward_time": 0.11501097679138184,
      "step": 55917
    },
    {
      "epoch": 0.000341290283203125,
      "step": 55917,
      "training_step_time": 0.4187281131744385
    },
    {
      "epoch": 0.00034129638671875,
      "model_forward_time": 0.11502337455749512,
      "step": 55918
    },
    {
      "epoch": 0.00034129638671875,
      "step": 55918,
      "training_step_time": 0.39960741996765137
    },
    {
      "epoch": 0.000341302490234375,
      "model_forward_time": 0.11501502990722656,
      "step": 55919
    },
    {
      "epoch": 0.000341302490234375,
      "step": 55919,
      "training_step_time": 0.40665578842163086
    },
    {
      "epoch": 0.00034130859375,
      "grad_norm": 0.1106349378824234,
      "learning_rate": 1.2588659967397e-06,
      "loss": 0.0403,
      "step": 55920
    },
    {
      "epoch": 0.00034130859375,
      "model_forward_time": 0.11499929428100586,
      "step": 55920
    },
    {
      "epoch": 0.00034130859375,
      "step": 55920,
      "training_step_time": 0.39876389503479004
    },
    {
      "epoch": 0.000341314697265625,
      "model_forward_time": 0.11524844169616699,
      "step": 55921
    },
    {
      "epoch": 0.000341314697265625,
      "step": 55921,
      "training_step_time": 0.4348742961883545
    },
    {
      "epoch": 0.00034132080078125,
      "model_forward_time": 0.11493778228759766,
      "step": 55922
    },
    {
      "epoch": 0.00034132080078125,
      "step": 55922,
      "training_step_time": 0.4248812198638916
    },
    {
      "epoch": 0.000341326904296875,
      "model_forward_time": 0.11525559425354004,
      "step": 55923
    },
    {
      "epoch": 0.000341326904296875,
      "step": 55923,
      "training_step_time": 0.39095044136047363
    },
    {
      "epoch": 0.0003413330078125,
      "model_forward_time": 0.11543631553649902,
      "step": 55924
    },
    {
      "epoch": 0.0003413330078125,
      "step": 55924,
      "training_step_time": 0.3996469974517822
    },
    {
      "epoch": 0.000341339111328125,
      "model_forward_time": 0.11566495895385742,
      "step": 55925
    },
    {
      "epoch": 0.000341339111328125,
      "step": 55925,
      "training_step_time": 0.45227527618408203
    },
    {
      "epoch": 0.00034134521484375,
      "model_forward_time": 0.11508607864379883,
      "step": 55926
    },
    {
      "epoch": 0.00034134521484375,
      "step": 55926,
      "training_step_time": 0.4734508991241455
    },
    {
      "epoch": 0.000341351318359375,
      "model_forward_time": 0.11495804786682129,
      "step": 55927
    },
    {
      "epoch": 0.000341351318359375,
      "step": 55927,
      "training_step_time": 0.4432845115661621
    },
    {
      "epoch": 0.000341357421875,
      "model_forward_time": 0.11530351638793945,
      "step": 55928
    },
    {
      "epoch": 0.000341357421875,
      "step": 55928,
      "training_step_time": 0.398362398147583
    },
    {
      "epoch": 0.000341363525390625,
      "model_forward_time": 0.11565136909484863,
      "step": 55929
    },
    {
      "epoch": 0.000341363525390625,
      "step": 55929,
      "training_step_time": 0.38988590240478516
    },
    {
      "epoch": 0.00034136962890625,
      "grad_norm": 0.08366627991199493,
      "learning_rate": 1.2527285142768574e-06,
      "loss": 0.0362,
      "step": 55930
    },
    {
      "epoch": 0.00034136962890625,
      "model_forward_time": 0.11500930786132812,
      "step": 55930
    },
    {
      "epoch": 0.00034136962890625,
      "step": 55930,
      "training_step_time": 0.3883795738220215
    },
    {
      "epoch": 0.000341375732421875,
      "model_forward_time": 0.11478877067565918,
      "step": 55931
    },
    {
      "epoch": 0.000341375732421875,
      "step": 55931,
      "training_step_time": 0.40636134147644043
    },
    {
      "epoch": 0.0003413818359375,
      "model_forward_time": 0.11455273628234863,
      "step": 55932
    },
    {
      "epoch": 0.0003413818359375,
      "step": 55932,
      "training_step_time": 0.3969099521636963
    },
    {
      "epoch": 0.000341387939453125,
      "model_forward_time": 0.1147603988647461,
      "step": 55933
    },
    {
      "epoch": 0.000341387939453125,
      "step": 55933,
      "training_step_time": 0.40177297592163086
    },
    {
      "epoch": 0.00034139404296875,
      "model_forward_time": 0.11632585525512695,
      "step": 55934
    },
    {
      "epoch": 0.00034139404296875,
      "step": 55934,
      "training_step_time": 0.38631677627563477
    },
    {
      "epoch": 0.000341400146484375,
      "model_forward_time": 0.11564016342163086,
      "step": 55935
    },
    {
      "epoch": 0.000341400146484375,
      "step": 55935,
      "training_step_time": 0.39408087730407715
    },
    {
      "epoch": 0.00034140625,
      "model_forward_time": 0.11550068855285645,
      "step": 55936
    },
    {
      "epoch": 0.00034140625,
      "step": 55936,
      "training_step_time": 0.42856407165527344
    },
    {
      "epoch": 0.000341412353515625,
      "model_forward_time": 0.11507606506347656,
      "step": 55937
    },
    {
      "epoch": 0.000341412353515625,
      "step": 55937,
      "training_step_time": 0.3960258960723877
    },
    {
      "epoch": 0.00034141845703125,
      "model_forward_time": 0.11502885818481445,
      "step": 55938
    },
    {
      "epoch": 0.00034141845703125,
      "step": 55938,
      "training_step_time": 0.3982534408569336
    },
    {
      "epoch": 0.000341424560546875,
      "model_forward_time": 0.11470627784729004,
      "step": 55939
    },
    {
      "epoch": 0.000341424560546875,
      "step": 55939,
      "training_step_time": 0.3956015110015869
    },
    {
      "epoch": 0.0003414306640625,
      "grad_norm": 0.07859236001968384,
      "learning_rate": 1.2466058399479952e-06,
      "loss": 0.0341,
      "step": 55940
    },
    {
      "epoch": 0.0003414306640625,
      "model_forward_time": 0.11515021324157715,
      "step": 55940
    },
    {
      "epoch": 0.0003414306640625,
      "step": 55940,
      "training_step_time": 0.36704230308532715
    },
    {
      "epoch": 0.000341436767578125,
      "model_forward_time": 0.11461520195007324,
      "step": 55941
    },
    {
      "epoch": 0.000341436767578125,
      "step": 55941,
      "training_step_time": 0.46643519401550293
    },
    {
      "epoch": 0.00034144287109375,
      "model_forward_time": 0.11475872993469238,
      "step": 55942
    },
    {
      "epoch": 0.00034144287109375,
      "step": 55942,
      "training_step_time": 0.4905991554260254
    },
    {
      "epoch": 0.000341448974609375,
      "model_forward_time": 0.11492061614990234,
      "step": 55943
    },
    {
      "epoch": 0.000341448974609375,
      "step": 55943,
      "training_step_time": 0.3969700336456299
    },
    {
      "epoch": 0.000341455078125,
      "model_forward_time": 0.11521625518798828,
      "step": 55944
    },
    {
      "epoch": 0.000341455078125,
      "step": 55944,
      "training_step_time": 0.3911755084991455
    },
    {
      "epoch": 0.000341461181640625,
      "model_forward_time": 0.11457157135009766,
      "step": 55945
    },
    {
      "epoch": 0.000341461181640625,
      "step": 55945,
      "training_step_time": 0.39166736602783203
    },
    {
      "epoch": 0.00034146728515625,
      "model_forward_time": 0.11553359031677246,
      "step": 55946
    },
    {
      "epoch": 0.00034146728515625,
      "step": 55946,
      "training_step_time": 0.39399099349975586
    },
    {
      "epoch": 0.000341473388671875,
      "model_forward_time": 0.11487913131713867,
      "step": 55947
    },
    {
      "epoch": 0.000341473388671875,
      "step": 55947,
      "training_step_time": 0.39082837104797363
    },
    {
      "epoch": 0.0003414794921875,
      "model_forward_time": 0.11483216285705566,
      "step": 55948
    },
    {
      "epoch": 0.0003414794921875,
      "step": 55948,
      "training_step_time": 0.38378381729125977
    },
    {
      "epoch": 0.000341485595703125,
      "model_forward_time": 0.1154477596282959,
      "step": 55949
    },
    {
      "epoch": 0.000341485595703125,
      "step": 55949,
      "training_step_time": 0.4011855125427246
    },
    {
      "epoch": 0.00034149169921875,
      "grad_norm": 0.09331945329904556,
      "learning_rate": 1.2404979756130142e-06,
      "loss": 0.0359,
      "step": 55950
    },
    {
      "epoch": 0.00034149169921875,
      "model_forward_time": 0.11530947685241699,
      "step": 55950
    },
    {
      "epoch": 0.00034149169921875,
      "step": 55950,
      "training_step_time": 0.41583704948425293
    },
    {
      "epoch": 0.000341497802734375,
      "model_forward_time": 0.11535787582397461,
      "step": 55951
    },
    {
      "epoch": 0.000341497802734375,
      "step": 55951,
      "training_step_time": 0.43822383880615234
    },
    {
      "epoch": 0.00034150390625,
      "model_forward_time": 0.11552238464355469,
      "step": 55952
    },
    {
      "epoch": 0.00034150390625,
      "step": 55952,
      "training_step_time": 0.38979434967041016
    },
    {
      "epoch": 0.000341510009765625,
      "model_forward_time": 0.1148080825805664,
      "step": 55953
    },
    {
      "epoch": 0.000341510009765625,
      "step": 55953,
      "training_step_time": 0.3897557258605957
    },
    {
      "epoch": 0.00034151611328125,
      "model_forward_time": 0.11541295051574707,
      "step": 55954
    },
    {
      "epoch": 0.00034151611328125,
      "step": 55954,
      "training_step_time": 0.3952820301055908
    },
    {
      "epoch": 0.000341522216796875,
      "model_forward_time": 0.11557579040527344,
      "step": 55955
    },
    {
      "epoch": 0.000341522216796875,
      "step": 55955,
      "training_step_time": 0.46385717391967773
    },
    {
      "epoch": 0.0003415283203125,
      "model_forward_time": 0.11531996726989746,
      "step": 55956
    },
    {
      "epoch": 0.0003415283203125,
      "step": 55956,
      "training_step_time": 0.5076518058776855
    },
    {
      "epoch": 0.000341534423828125,
      "model_forward_time": 0.11520671844482422,
      "step": 55957
    },
    {
      "epoch": 0.000341534423828125,
      "step": 55957,
      "training_step_time": 0.48903822898864746
    },
    {
      "epoch": 0.00034154052734375,
      "model_forward_time": 0.11571288108825684,
      "step": 55958
    },
    {
      "epoch": 0.00034154052734375,
      "step": 55958,
      "training_step_time": 0.3840970993041992
    },
    {
      "epoch": 0.000341546630859375,
      "model_forward_time": 0.1150050163269043,
      "step": 55959
    },
    {
      "epoch": 0.000341546630859375,
      "step": 55959,
      "training_step_time": 0.40446996688842773
    },
    {
      "epoch": 0.000341552734375,
      "grad_norm": 0.07292113453149796,
      "learning_rate": 1.2344049231273302e-06,
      "loss": 0.0305,
      "step": 55960
    },
    {
      "epoch": 0.000341552734375,
      "model_forward_time": 0.1155850887298584,
      "step": 55960
    },
    {
      "epoch": 0.000341552734375,
      "step": 55960,
      "training_step_time": 0.39571380615234375
    },
    {
      "epoch": 0.000341558837890625,
      "model_forward_time": 0.11443209648132324,
      "step": 55961
    },
    {
      "epoch": 0.000341558837890625,
      "step": 55961,
      "training_step_time": 0.3935122489929199
    },
    {
      "epoch": 0.00034156494140625,
      "model_forward_time": 0.11522221565246582,
      "step": 55962
    },
    {
      "epoch": 0.00034156494140625,
      "step": 55962,
      "training_step_time": 0.3957791328430176
    },
    {
      "epoch": 0.000341571044921875,
      "model_forward_time": 0.11495351791381836,
      "step": 55963
    },
    {
      "epoch": 0.000341571044921875,
      "step": 55963,
      "training_step_time": 0.3955659866333008
    },
    {
      "epoch": 0.0003415771484375,
      "model_forward_time": 0.11579775810241699,
      "step": 55964
    },
    {
      "epoch": 0.0003415771484375,
      "step": 55964,
      "training_step_time": 0.3993644714355469
    },
    {
      "epoch": 0.000341583251953125,
      "model_forward_time": 0.11471819877624512,
      "step": 55965
    },
    {
      "epoch": 0.000341583251953125,
      "step": 55965,
      "training_step_time": 0.4049856662750244
    },
    {
      "epoch": 0.00034158935546875,
      "model_forward_time": 0.11566901206970215,
      "step": 55966
    },
    {
      "epoch": 0.00034158935546875,
      "step": 55966,
      "training_step_time": 0.38599371910095215
    },
    {
      "epoch": 0.000341595458984375,
      "model_forward_time": 0.11568522453308105,
      "step": 55967
    },
    {
      "epoch": 0.000341595458984375,
      "step": 55967,
      "training_step_time": 0.4086747169494629
    },
    {
      "epoch": 0.0003416015625,
      "model_forward_time": 0.11570549011230469,
      "step": 55968
    },
    {
      "epoch": 0.0003416015625,
      "step": 55968,
      "training_step_time": 0.40059423446655273
    },
    {
      "epoch": 0.000341607666015625,
      "model_forward_time": 0.11505937576293945,
      "step": 55969
    },
    {
      "epoch": 0.000341607666015625,
      "step": 55969,
      "training_step_time": 0.39548730850219727
    },
    {
      "epoch": 0.00034161376953125,
      "grad_norm": 0.06349474936723709,
      "learning_rate": 1.2283266843418517e-06,
      "loss": 0.0356,
      "step": 55970
    },
    {
      "epoch": 0.00034161376953125,
      "model_forward_time": 0.11649012565612793,
      "step": 55970
    },
    {
      "epoch": 0.00034161376953125,
      "step": 55970,
      "training_step_time": 0.47650909423828125
    },
    {
      "epoch": 0.000341619873046875,
      "model_forward_time": 0.11443972587585449,
      "step": 55971
    },
    {
      "epoch": 0.000341619873046875,
      "step": 55971,
      "training_step_time": 0.4627852439880371
    },
    {
      "epoch": 0.0003416259765625,
      "model_forward_time": 0.11547398567199707,
      "step": 55972
    },
    {
      "epoch": 0.0003416259765625,
      "step": 55972,
      "training_step_time": 0.4689326286315918
    },
    {
      "epoch": 0.000341632080078125,
      "model_forward_time": 0.1148843765258789,
      "step": 55973
    },
    {
      "epoch": 0.000341632080078125,
      "step": 55973,
      "training_step_time": 0.3943483829498291
    },
    {
      "epoch": 0.00034163818359375,
      "model_forward_time": 0.11524844169616699,
      "step": 55974
    },
    {
      "epoch": 0.00034163818359375,
      "step": 55974,
      "training_step_time": 0.3903465270996094
    },
    {
      "epoch": 0.000341644287109375,
      "model_forward_time": 0.1145486831665039,
      "step": 55975
    },
    {
      "epoch": 0.000341644287109375,
      "step": 55975,
      "training_step_time": 0.3900587558746338
    },
    {
      "epoch": 0.000341650390625,
      "model_forward_time": 0.11523771286010742,
      "step": 55976
    },
    {
      "epoch": 0.000341650390625,
      "step": 55976,
      "training_step_time": 0.39612746238708496
    },
    {
      "epoch": 0.000341656494140625,
      "model_forward_time": 0.11530494689941406,
      "step": 55977
    },
    {
      "epoch": 0.000341656494140625,
      "step": 55977,
      "training_step_time": 0.3947737216949463
    },
    {
      "epoch": 0.00034166259765625,
      "model_forward_time": 0.1154024600982666,
      "step": 55978
    },
    {
      "epoch": 0.00034166259765625,
      "step": 55978,
      "training_step_time": 0.3893258571624756
    },
    {
      "epoch": 0.000341668701171875,
      "model_forward_time": 0.11487317085266113,
      "step": 55979
    },
    {
      "epoch": 0.000341668701171875,
      "step": 55979,
      "training_step_time": 0.41289448738098145
    },
    {
      "epoch": 0.0003416748046875,
      "grad_norm": 0.0695226714015007,
      "learning_rate": 1.222263261102985e-06,
      "loss": 0.0402,
      "step": 55980
    },
    {
      "epoch": 0.0003416748046875,
      "model_forward_time": 0.1149601936340332,
      "step": 55980
    },
    {
      "epoch": 0.0003416748046875,
      "step": 55980,
      "training_step_time": 0.3957078456878662
    },
    {
      "epoch": 0.000341680908203125,
      "model_forward_time": 0.11573672294616699,
      "step": 55981
    },
    {
      "epoch": 0.000341680908203125,
      "step": 55981,
      "training_step_time": 0.3968055248260498
    },
    {
      "epoch": 0.00034168701171875,
      "model_forward_time": 0.11546468734741211,
      "step": 55982
    },
    {
      "epoch": 0.00034168701171875,
      "step": 55982,
      "training_step_time": 0.3984248638153076
    },
    {
      "epoch": 0.000341693115234375,
      "model_forward_time": 0.11530947685241699,
      "step": 55983
    },
    {
      "epoch": 0.000341693115234375,
      "step": 55983,
      "training_step_time": 0.3907325267791748
    },
    {
      "epoch": 0.00034169921875,
      "model_forward_time": 0.11507511138916016,
      "step": 55984
    },
    {
      "epoch": 0.00034169921875,
      "step": 55984,
      "training_step_time": 0.38973522186279297
    },
    {
      "epoch": 0.000341705322265625,
      "model_forward_time": 0.11512207984924316,
      "step": 55985
    },
    {
      "epoch": 0.000341705322265625,
      "step": 55985,
      "training_step_time": 0.4213905334472656
    },
    {
      "epoch": 0.00034171142578125,
      "model_forward_time": 0.11561131477355957,
      "step": 55986
    },
    {
      "epoch": 0.00034171142578125,
      "step": 55986,
      "training_step_time": 0.5422301292419434
    },
    {
      "epoch": 0.000341717529296875,
      "model_forward_time": 0.11493730545043945,
      "step": 55987
    },
    {
      "epoch": 0.000341717529296875,
      "step": 55987,
      "training_step_time": 0.4105362892150879
    },
    {
      "epoch": 0.0003417236328125,
      "model_forward_time": 0.1148386001586914,
      "step": 55988
    },
    {
      "epoch": 0.0003417236328125,
      "step": 55988,
      "training_step_time": 0.39402341842651367
    },
    {
      "epoch": 0.000341729736328125,
      "model_forward_time": 0.11459875106811523,
      "step": 55989
    },
    {
      "epoch": 0.000341729736328125,
      "step": 55989,
      "training_step_time": 0.40228939056396484
    },
    {
      "epoch": 0.00034173583984375,
      "grad_norm": 0.09726224839687347,
      "learning_rate": 1.2162146552526399e-06,
      "loss": 0.0392,
      "step": 55990
    },
    {
      "epoch": 0.00034173583984375,
      "model_forward_time": 0.11537933349609375,
      "step": 55990
    },
    {
      "epoch": 0.00034173583984375,
      "step": 55990,
      "training_step_time": 0.3928682804107666
    },
    {
      "epoch": 0.000341741943359375,
      "model_forward_time": 0.11502242088317871,
      "step": 55991
    },
    {
      "epoch": 0.000341741943359375,
      "step": 55991,
      "training_step_time": 0.3942525386810303
    },
    {
      "epoch": 0.000341748046875,
      "model_forward_time": 0.11518549919128418,
      "step": 55992
    },
    {
      "epoch": 0.000341748046875,
      "step": 55992,
      "training_step_time": 0.4740133285522461
    },
    {
      "epoch": 0.000341754150390625,
      "model_forward_time": 0.11539435386657715,
      "step": 55993
    },
    {
      "epoch": 0.000341754150390625,
      "step": 55993,
      "training_step_time": 0.42475056648254395
    },
    {
      "epoch": 0.00034176025390625,
      "model_forward_time": 0.11450362205505371,
      "step": 55994
    },
    {
      "epoch": 0.00034176025390625,
      "step": 55994,
      "training_step_time": 0.40354466438293457
    },
    {
      "epoch": 0.000341766357421875,
      "model_forward_time": 0.11490702629089355,
      "step": 55995
    },
    {
      "epoch": 0.000341766357421875,
      "step": 55995,
      "training_step_time": 0.438584566116333
    },
    {
      "epoch": 0.0003417724609375,
      "model_forward_time": 0.11530518531799316,
      "step": 55996
    },
    {
      "epoch": 0.0003417724609375,
      "step": 55996,
      "training_step_time": 0.4015159606933594
    },
    {
      "epoch": 0.000341778564453125,
      "model_forward_time": 0.11463522911071777,
      "step": 55997
    },
    {
      "epoch": 0.000341778564453125,
      "step": 55997,
      "training_step_time": 0.39459753036499023
    },
    {
      "epoch": 0.00034178466796875,
      "model_forward_time": 0.11497998237609863,
      "step": 55998
    },
    {
      "epoch": 0.00034178466796875,
      "step": 55998,
      "training_step_time": 0.38840436935424805
    },
    {
      "epoch": 0.000341790771484375,
      "model_forward_time": 0.1146383285522461,
      "step": 55999
    },
    {
      "epoch": 0.000341790771484375,
      "step": 55999,
      "training_step_time": 0.3810560703277588
    },
    {
      "epoch": 0.000341796875,
      "grad_norm": 0.0914275199174881,
      "learning_rate": 1.210180868628219e-06,
      "loss": 0.0368,
      "step": 56000
    },
    {
      "epoch": 0.000341796875,
      "model_forward_time": 0.11350870132446289,
      "step": 56000
    },
    {
      "epoch": 0.000341796875,
      "step": 56000,
      "training_step_time": 0.3563883304595947
    },
    {
      "epoch": 0.000341802978515625,
      "model_forward_time": 0.1127619743347168,
      "step": 56001
    },
    {
      "epoch": 0.000341802978515625,
      "step": 56001,
      "training_step_time": 0.38617682456970215
    },
    {
      "epoch": 0.00034180908203125,
      "model_forward_time": 0.11238741874694824,
      "step": 56002
    },
    {
      "epoch": 0.00034180908203125,
      "step": 56002,
      "training_step_time": 0.47435760498046875
    },
    {
      "epoch": 0.000341815185546875,
      "model_forward_time": 0.11341977119445801,
      "step": 56003
    },
    {
      "epoch": 0.000341815185546875,
      "step": 56003,
      "training_step_time": 0.38643693923950195
    },
    {
      "epoch": 0.0003418212890625,
      "model_forward_time": 0.11373782157897949,
      "step": 56004
    },
    {
      "epoch": 0.0003418212890625,
      "step": 56004,
      "training_step_time": 0.3802971839904785
    },
    {
      "epoch": 0.000341827392578125,
      "model_forward_time": 0.11410403251647949,
      "step": 56005
    },
    {
      "epoch": 0.000341827392578125,
      "step": 56005,
      "training_step_time": 0.3887333869934082
    },
    {
      "epoch": 0.00034183349609375,
      "model_forward_time": 0.1149899959564209,
      "step": 56006
    },
    {
      "epoch": 0.00034183349609375,
      "step": 56006,
      "training_step_time": 0.4333217144012451
    },
    {
      "epoch": 0.000341839599609375,
      "model_forward_time": 0.11514115333557129,
      "step": 56007
    },
    {
      "epoch": 0.000341839599609375,
      "step": 56007,
      "training_step_time": 0.41420674324035645
    },
    {
      "epoch": 0.000341845703125,
      "model_forward_time": 0.11387014389038086,
      "step": 56008
    },
    {
      "epoch": 0.000341845703125,
      "step": 56008,
      "training_step_time": 0.40317630767822266
    },
    {
      "epoch": 0.000341851806640625,
      "model_forward_time": 0.11498403549194336,
      "step": 56009
    },
    {
      "epoch": 0.000341851806640625,
      "step": 56009,
      "training_step_time": 0.38891100883483887
    },
    {
      "epoch": 0.00034185791015625,
      "grad_norm": 0.08693566918373108,
      "learning_rate": 1.2041619030626284e-06,
      "loss": 0.0382,
      "step": 56010
    },
    {
      "epoch": 0.00034185791015625,
      "model_forward_time": 0.11524128913879395,
      "step": 56010
    },
    {
      "epoch": 0.00034185791015625,
      "step": 56010,
      "training_step_time": 0.39911413192749023
    },
    {
      "epoch": 0.000341864013671875,
      "model_forward_time": 0.11459755897521973,
      "step": 56011
    },
    {
      "epoch": 0.000341864013671875,
      "step": 56011,
      "training_step_time": 0.3934974670410156
    },
    {
      "epoch": 0.0003418701171875,
      "model_forward_time": 0.11549615859985352,
      "step": 56012
    },
    {
      "epoch": 0.0003418701171875,
      "step": 56012,
      "training_step_time": 0.3816976547241211
    },
    {
      "epoch": 0.000341876220703125,
      "model_forward_time": 0.11523795127868652,
      "step": 56013
    },
    {
      "epoch": 0.000341876220703125,
      "step": 56013,
      "training_step_time": 0.39420509338378906
    },
    {
      "epoch": 0.00034188232421875,
      "model_forward_time": 0.11530065536499023,
      "step": 56014
    },
    {
      "epoch": 0.00034188232421875,
      "step": 56014,
      "training_step_time": 0.40239882469177246
    },
    {
      "epoch": 0.000341888427734375,
      "model_forward_time": 0.11439204216003418,
      "step": 56015
    },
    {
      "epoch": 0.000341888427734375,
      "step": 56015,
      "training_step_time": 0.3956410884857178
    },
    {
      "epoch": 0.00034189453125,
      "model_forward_time": 0.11533021926879883,
      "step": 56016
    },
    {
      "epoch": 0.00034189453125,
      "step": 56016,
      "training_step_time": 0.47728490829467773
    },
    {
      "epoch": 0.000341900634765625,
      "model_forward_time": 0.11485886573791504,
      "step": 56017
    },
    {
      "epoch": 0.000341900634765625,
      "step": 56017,
      "training_step_time": 0.4785912036895752
    },
    {
      "epoch": 0.00034190673828125,
      "model_forward_time": 0.11534571647644043,
      "step": 56018
    },
    {
      "epoch": 0.00034190673828125,
      "step": 56018,
      "training_step_time": 0.39117431640625
    },
    {
      "epoch": 0.000341912841796875,
      "model_forward_time": 0.1150655746459961,
      "step": 56019
    },
    {
      "epoch": 0.000341912841796875,
      "step": 56019,
      "training_step_time": 0.40109896659851074
    },
    {
      "epoch": 0.0003419189453125,
      "grad_norm": 0.07523184269666672,
      "learning_rate": 1.1981577603842776e-06,
      "loss": 0.0352,
      "step": 56020
    },
    {
      "epoch": 0.0003419189453125,
      "model_forward_time": 0.11490583419799805,
      "step": 56020
    },
    {
      "epoch": 0.0003419189453125,
      "step": 56020,
      "training_step_time": 0.4185450077056885
    },
    {
      "epoch": 0.000341925048828125,
      "model_forward_time": 0.11507773399353027,
      "step": 56021
    },
    {
      "epoch": 0.000341925048828125,
      "step": 56021,
      "training_step_time": 0.4079000949859619
    },
    {
      "epoch": 0.00034193115234375,
      "model_forward_time": 0.11510515213012695,
      "step": 56022
    },
    {
      "epoch": 0.00034193115234375,
      "step": 56022,
      "training_step_time": 0.40958619117736816
    },
    {
      "epoch": 0.000341937255859375,
      "model_forward_time": 0.11461496353149414,
      "step": 56023
    },
    {
      "epoch": 0.000341937255859375,
      "step": 56023,
      "training_step_time": 0.39499425888061523
    },
    {
      "epoch": 0.000341943359375,
      "model_forward_time": 0.11477780342102051,
      "step": 56024
    },
    {
      "epoch": 0.000341943359375,
      "step": 56024,
      "training_step_time": 0.38620996475219727
    },
    {
      "epoch": 0.000341949462890625,
      "model_forward_time": 0.11502480506896973,
      "step": 56025
    },
    {
      "epoch": 0.000341949462890625,
      "step": 56025,
      "training_step_time": 0.3895232677459717
    },
    {
      "epoch": 0.00034195556640625,
      "model_forward_time": 0.11539030075073242,
      "step": 56026
    },
    {
      "epoch": 0.00034195556640625,
      "step": 56026,
      "training_step_time": 0.3892335891723633
    },
    {
      "epoch": 0.000341961669921875,
      "model_forward_time": 0.11523866653442383,
      "step": 56027
    },
    {
      "epoch": 0.000341961669921875,
      "step": 56027,
      "training_step_time": 0.39552831649780273
    },
    {
      "epoch": 0.0003419677734375,
      "model_forward_time": 0.11517071723937988,
      "step": 56028
    },
    {
      "epoch": 0.0003419677734375,
      "step": 56028,
      "training_step_time": 0.3965578079223633
    },
    {
      "epoch": 0.000341973876953125,
      "model_forward_time": 0.1150050163269043,
      "step": 56029
    },
    {
      "epoch": 0.000341973876953125,
      "step": 56029,
      "training_step_time": 0.39718079566955566
    },
    {
      "epoch": 0.00034197998046875,
      "grad_norm": 0.07037372887134552,
      "learning_rate": 1.192168442417052e-06,
      "loss": 0.0351,
      "step": 56030
    },
    {
      "epoch": 0.00034197998046875,
      "model_forward_time": 0.11571049690246582,
      "step": 56030
    },
    {
      "epoch": 0.00034197998046875,
      "step": 56030,
      "training_step_time": 0.44629383087158203
    },
    {
      "epoch": 0.000341986083984375,
      "model_forward_time": 0.11465835571289062,
      "step": 56031
    },
    {
      "epoch": 0.000341986083984375,
      "step": 56031,
      "training_step_time": 0.46669554710388184
    },
    {
      "epoch": 0.0003419921875,
      "model_forward_time": 0.11508059501647949,
      "step": 56032
    },
    {
      "epoch": 0.0003419921875,
      "step": 56032,
      "training_step_time": 0.40985655784606934
    },
    {
      "epoch": 0.000341998291015625,
      "model_forward_time": 0.11447930335998535,
      "step": 56033
    },
    {
      "epoch": 0.000341998291015625,
      "step": 56033,
      "training_step_time": 0.3890564441680908
    },
    {
      "epoch": 0.00034200439453125,
      "model_forward_time": 0.11544108390808105,
      "step": 56034
    },
    {
      "epoch": 0.00034200439453125,
      "step": 56034,
      "training_step_time": 0.42646145820617676
    },
    {
      "epoch": 0.000342010498046875,
      "model_forward_time": 0.11546182632446289,
      "step": 56035
    },
    {
      "epoch": 0.000342010498046875,
      "step": 56035,
      "training_step_time": 0.3976097106933594
    },
    {
      "epoch": 0.0003420166015625,
      "model_forward_time": 0.11550617218017578,
      "step": 56036
    },
    {
      "epoch": 0.0003420166015625,
      "step": 56036,
      "training_step_time": 0.3883960247039795
    },
    {
      "epoch": 0.000342022705078125,
      "model_forward_time": 0.11443686485290527,
      "step": 56037
    },
    {
      "epoch": 0.000342022705078125,
      "step": 56037,
      "training_step_time": 0.39827895164489746
    },
    {
      "epoch": 0.00034202880859375,
      "model_forward_time": 0.11533331871032715,
      "step": 56038
    },
    {
      "epoch": 0.00034202880859375,
      "step": 56038,
      "training_step_time": 0.3952465057373047
    },
    {
      "epoch": 0.000342034912109375,
      "model_forward_time": 0.11484885215759277,
      "step": 56039
    },
    {
      "epoch": 0.000342034912109375,
      "step": 56039,
      "training_step_time": 0.39916253089904785
    },
    {
      "epoch": 0.000342041015625,
      "grad_norm": 0.06926878541707993,
      "learning_rate": 1.1861939509803687e-06,
      "loss": 0.0355,
      "step": 56040
    },
    {
      "epoch": 0.000342041015625,
      "model_forward_time": 0.11494088172912598,
      "step": 56040
    },
    {
      "epoch": 0.000342041015625,
      "step": 56040,
      "training_step_time": 0.397174596786499
    },
    {
      "epoch": 0.000342047119140625,
      "model_forward_time": 0.11553335189819336,
      "step": 56041
    },
    {
      "epoch": 0.000342047119140625,
      "step": 56041,
      "training_step_time": 0.4130432605743408
    },
    {
      "epoch": 0.00034205322265625,
      "model_forward_time": 0.1148831844329834,
      "step": 56042
    },
    {
      "epoch": 0.00034205322265625,
      "step": 56042,
      "training_step_time": 0.39768028259277344
    },
    {
      "epoch": 0.000342059326171875,
      "model_forward_time": 0.11498689651489258,
      "step": 56043
    },
    {
      "epoch": 0.000342059326171875,
      "step": 56043,
      "training_step_time": 0.40224647521972656
    },
    {
      "epoch": 0.0003420654296875,
      "model_forward_time": 0.11550259590148926,
      "step": 56044
    },
    {
      "epoch": 0.0003420654296875,
      "step": 56044,
      "training_step_time": 0.44049715995788574
    },
    {
      "epoch": 0.000342071533203125,
      "model_forward_time": 0.1149587631225586,
      "step": 56045
    },
    {
      "epoch": 0.000342071533203125,
      "step": 56045,
      "training_step_time": 0.4550197124481201
    },
    {
      "epoch": 0.00034207763671875,
      "model_forward_time": 0.11472225189208984,
      "step": 56046
    },
    {
      "epoch": 0.00034207763671875,
      "step": 56046,
      "training_step_time": 0.4442293643951416
    },
    {
      "epoch": 0.000342083740234375,
      "model_forward_time": 0.1148214340209961,
      "step": 56047
    },
    {
      "epoch": 0.000342083740234375,
      "step": 56047,
      "training_step_time": 0.434798002243042
    },
    {
      "epoch": 0.00034208984375,
      "model_forward_time": 0.11419868469238281,
      "step": 56048
    },
    {
      "epoch": 0.00034208984375,
      "step": 56048,
      "training_step_time": 0.3914368152618408
    },
    {
      "epoch": 0.000342095947265625,
      "model_forward_time": 0.11600255966186523,
      "step": 56049
    },
    {
      "epoch": 0.000342095947265625,
      "step": 56049,
      "training_step_time": 0.39760279655456543
    },
    {
      "epoch": 0.00034210205078125,
      "grad_norm": 0.07463879883289337,
      "learning_rate": 1.1802342878891037e-06,
      "loss": 0.0345,
      "step": 56050
    },
    {
      "epoch": 0.00034210205078125,
      "model_forward_time": 0.11542320251464844,
      "step": 56050
    },
    {
      "epoch": 0.00034210205078125,
      "step": 56050,
      "training_step_time": 0.4012627601623535
    },
    {
      "epoch": 0.000342108154296875,
      "model_forward_time": 0.1140446662902832,
      "step": 56051
    },
    {
      "epoch": 0.000342108154296875,
      "step": 56051,
      "training_step_time": 0.39836931228637695
    },
    {
      "epoch": 0.0003421142578125,
      "model_forward_time": 0.115325927734375,
      "step": 56052
    },
    {
      "epoch": 0.0003421142578125,
      "step": 56052,
      "training_step_time": 0.3997969627380371
    },
    {
      "epoch": 0.000342120361328125,
      "model_forward_time": 0.11455178260803223,
      "step": 56053
    },
    {
      "epoch": 0.000342120361328125,
      "step": 56053,
      "training_step_time": 0.3886263370513916
    },
    {
      "epoch": 0.00034212646484375,
      "model_forward_time": 0.11571073532104492,
      "step": 56054
    },
    {
      "epoch": 0.00034212646484375,
      "step": 56054,
      "training_step_time": 0.39789819717407227
    },
    {
      "epoch": 0.000342132568359375,
      "model_forward_time": 0.1147761344909668,
      "step": 56055
    },
    {
      "epoch": 0.000342132568359375,
      "step": 56055,
      "training_step_time": 0.4270815849304199
    },
    {
      "epoch": 0.000342138671875,
      "model_forward_time": 0.11495757102966309,
      "step": 56056
    },
    {
      "epoch": 0.000342138671875,
      "step": 56056,
      "training_step_time": 0.39705777168273926
    },
    {
      "epoch": 0.000342144775390625,
      "model_forward_time": 0.11531901359558105,
      "step": 56057
    },
    {
      "epoch": 0.000342144775390625,
      "step": 56057,
      "training_step_time": 0.3903965950012207
    },
    {
      "epoch": 0.00034215087890625,
      "model_forward_time": 0.1146383285522461,
      "step": 56058
    },
    {
      "epoch": 0.00034215087890625,
      "step": 56058,
      "training_step_time": 0.40092968940734863
    },
    {
      "epoch": 0.000342156982421875,
      "model_forward_time": 0.11536097526550293,
      "step": 56059
    },
    {
      "epoch": 0.000342156982421875,
      "step": 56059,
      "training_step_time": 0.5196759700775146
    },
    {
      "epoch": 0.0003421630859375,
      "grad_norm": 0.09247972816228867,
      "learning_rate": 1.1742894549536477e-06,
      "loss": 0.0313,
      "step": 56060
    },
    {
      "epoch": 0.0003421630859375,
      "model_forward_time": 0.11516141891479492,
      "step": 56060
    },
    {
      "epoch": 0.0003421630859375,
      "step": 56060,
      "training_step_time": 0.39760923385620117
    },
    {
      "epoch": 0.000342169189453125,
      "model_forward_time": 0.11532878875732422,
      "step": 56061
    },
    {
      "epoch": 0.000342169189453125,
      "step": 56061,
      "training_step_time": 0.6277031898498535
    },
    {
      "epoch": 0.00034217529296875,
      "model_forward_time": 0.11432552337646484,
      "step": 56062
    },
    {
      "epoch": 0.00034217529296875,
      "step": 56062,
      "training_step_time": 0.39626049995422363
    },
    {
      "epoch": 0.000342181396484375,
      "model_forward_time": 0.11408019065856934,
      "step": 56063
    },
    {
      "epoch": 0.000342181396484375,
      "step": 56063,
      "training_step_time": 0.3928205966949463
    },
    {
      "epoch": 0.0003421875,
      "model_forward_time": 0.11501121520996094,
      "step": 56064
    },
    {
      "epoch": 0.0003421875,
      "step": 56064,
      "training_step_time": 0.39482545852661133
    },
    {
      "epoch": 0.000342193603515625,
      "model_forward_time": 0.11511826515197754,
      "step": 56065
    },
    {
      "epoch": 0.000342193603515625,
      "step": 56065,
      "training_step_time": 0.40380048751831055
    },
    {
      "epoch": 0.00034219970703125,
      "model_forward_time": 0.11489582061767578,
      "step": 56066
    },
    {
      "epoch": 0.00034219970703125,
      "step": 56066,
      "training_step_time": 0.38564181327819824
    },
    {
      "epoch": 0.000342205810546875,
      "model_forward_time": 0.1152336597442627,
      "step": 56067
    },
    {
      "epoch": 0.000342205810546875,
      "step": 56067,
      "training_step_time": 0.5467925071716309
    },
    {
      "epoch": 0.0003422119140625,
      "model_forward_time": 0.11476445198059082,
      "step": 56068
    },
    {
      "epoch": 0.0003422119140625,
      "step": 56068,
      "training_step_time": 0.39641690254211426
    },
    {
      "epoch": 0.000342218017578125,
      "model_forward_time": 0.11505579948425293,
      "step": 56069
    },
    {
      "epoch": 0.000342218017578125,
      "step": 56069,
      "training_step_time": 0.3904149532318115
    },
    {
      "epoch": 0.00034222412109375,
      "grad_norm": 0.09461737424135208,
      "learning_rate": 1.1683594539798893e-06,
      "loss": 0.0375,
      "step": 56070
    },
    {
      "epoch": 0.00034222412109375,
      "model_forward_time": 0.11499977111816406,
      "step": 56070
    },
    {
      "epoch": 0.00034222412109375,
      "step": 56070,
      "training_step_time": 0.39945268630981445
    },
    {
      "epoch": 0.000342230224609375,
      "model_forward_time": 0.1150197982788086,
      "step": 56071
    },
    {
      "epoch": 0.000342230224609375,
      "step": 56071,
      "training_step_time": 0.3924374580383301
    },
    {
      "epoch": 0.000342236328125,
      "model_forward_time": 0.11504101753234863,
      "step": 56072
    },
    {
      "epoch": 0.000342236328125,
      "step": 56072,
      "training_step_time": 0.3932032585144043
    },
    {
      "epoch": 0.000342242431640625,
      "model_forward_time": 0.11468982696533203,
      "step": 56073
    },
    {
      "epoch": 0.000342242431640625,
      "step": 56073,
      "training_step_time": 0.5173249244689941
    },
    {
      "epoch": 0.00034224853515625,
      "model_forward_time": 0.11581230163574219,
      "step": 56074
    },
    {
      "epoch": 0.00034224853515625,
      "step": 56074,
      "training_step_time": 0.46550869941711426
    },
    {
      "epoch": 0.000342254638671875,
      "model_forward_time": 0.11498212814331055,
      "step": 56075
    },
    {
      "epoch": 0.000342254638671875,
      "step": 56075,
      "training_step_time": 0.4615046977996826
    },
    {
      "epoch": 0.0003422607421875,
      "model_forward_time": 0.1151883602142334,
      "step": 56076
    },
    {
      "epoch": 0.0003422607421875,
      "step": 56076,
      "training_step_time": 0.4187886714935303
    },
    {
      "epoch": 0.000342266845703125,
      "model_forward_time": 0.11486601829528809,
      "step": 56077
    },
    {
      "epoch": 0.000342266845703125,
      "step": 56077,
      "training_step_time": 0.39226388931274414
    },
    {
      "epoch": 0.00034227294921875,
      "model_forward_time": 0.11488819122314453,
      "step": 56078
    },
    {
      "epoch": 0.00034227294921875,
      "step": 56078,
      "training_step_time": 0.39386749267578125
    },
    {
      "epoch": 0.000342279052734375,
      "model_forward_time": 0.11661887168884277,
      "step": 56079
    },
    {
      "epoch": 0.000342279052734375,
      "step": 56079,
      "training_step_time": 0.3902616500854492
    },
    {
      "epoch": 0.00034228515625,
      "grad_norm": 0.10725134611129761,
      "learning_rate": 1.16244428676921e-06,
      "loss": 0.0405,
      "step": 56080
    },
    {
      "epoch": 0.00034228515625,
      "model_forward_time": 0.1145930290222168,
      "step": 56080
    },
    {
      "epoch": 0.00034228515625,
      "step": 56080,
      "training_step_time": 0.38976216316223145
    },
    {
      "epoch": 0.000342291259765625,
      "model_forward_time": 0.11556839942932129,
      "step": 56081
    },
    {
      "epoch": 0.000342291259765625,
      "step": 56081,
      "training_step_time": 0.39089512825012207
    },
    {
      "epoch": 0.00034229736328125,
      "model_forward_time": 0.11482596397399902,
      "step": 56082
    },
    {
      "epoch": 0.00034229736328125,
      "step": 56082,
      "training_step_time": 0.3918306827545166
    },
    {
      "epoch": 0.000342303466796875,
      "model_forward_time": 0.11536502838134766,
      "step": 56083
    },
    {
      "epoch": 0.000342303466796875,
      "step": 56083,
      "training_step_time": 0.3894336223602295
    },
    {
      "epoch": 0.0003423095703125,
      "model_forward_time": 0.11581087112426758,
      "step": 56084
    },
    {
      "epoch": 0.0003423095703125,
      "step": 56084,
      "training_step_time": 0.3839836120605469
    },
    {
      "epoch": 0.000342315673828125,
      "model_forward_time": 0.11532473564147949,
      "step": 56085
    },
    {
      "epoch": 0.000342315673828125,
      "step": 56085,
      "training_step_time": 0.660283088684082
    },
    {
      "epoch": 0.00034232177734375,
      "model_forward_time": 0.11493706703186035,
      "step": 56086
    },
    {
      "epoch": 0.00034232177734375,
      "step": 56086,
      "training_step_time": 0.4007246494293213
    },
    {
      "epoch": 0.000342327880859375,
      "model_forward_time": 0.1156468391418457,
      "step": 56087
    },
    {
      "epoch": 0.000342327880859375,
      "step": 56087,
      "training_step_time": 0.4387021064758301
    },
    {
      "epoch": 0.000342333984375,
      "model_forward_time": 0.11456942558288574,
      "step": 56088
    },
    {
      "epoch": 0.000342333984375,
      "step": 56088,
      "training_step_time": 0.40930628776550293
    },
    {
      "epoch": 0.000342340087890625,
      "model_forward_time": 0.11460065841674805,
      "step": 56089
    },
    {
      "epoch": 0.000342340087890625,
      "step": 56089,
      "training_step_time": 0.49822068214416504
    },
    {
      "epoch": 0.00034234619140625,
      "grad_norm": 0.10083076357841492,
      "learning_rate": 1.1565439551184664e-06,
      "loss": 0.0334,
      "step": 56090
    },
    {
      "epoch": 0.00034234619140625,
      "model_forward_time": 0.11446642875671387,
      "step": 56090
    },
    {
      "epoch": 0.00034234619140625,
      "step": 56090,
      "training_step_time": 0.48801565170288086
    },
    {
      "epoch": 0.000342352294921875,
      "model_forward_time": 0.1145787239074707,
      "step": 56091
    },
    {
      "epoch": 0.000342352294921875,
      "step": 56091,
      "training_step_time": 0.3896808624267578
    },
    {
      "epoch": 0.0003423583984375,
      "model_forward_time": 0.11496472358703613,
      "step": 56092
    },
    {
      "epoch": 0.0003423583984375,
      "step": 56092,
      "training_step_time": 0.4062817096710205
    },
    {
      "epoch": 0.000342364501953125,
      "model_forward_time": 0.11478209495544434,
      "step": 56093
    },
    {
      "epoch": 0.000342364501953125,
      "step": 56093,
      "training_step_time": 0.3954908847808838
    },
    {
      "epoch": 0.00034237060546875,
      "model_forward_time": 0.1154015064239502,
      "step": 56094
    },
    {
      "epoch": 0.00034237060546875,
      "step": 56094,
      "training_step_time": 0.394331693649292
    },
    {
      "epoch": 0.000342376708984375,
      "model_forward_time": 0.11434745788574219,
      "step": 56095
    },
    {
      "epoch": 0.000342376708984375,
      "step": 56095,
      "training_step_time": 0.39505958557128906
    },
    {
      "epoch": 0.0003423828125,
      "model_forward_time": 0.11516165733337402,
      "step": 56096
    },
    {
      "epoch": 0.0003423828125,
      "step": 56096,
      "training_step_time": 0.3884727954864502
    },
    {
      "epoch": 0.000342388916015625,
      "model_forward_time": 0.11541628837585449,
      "step": 56097
    },
    {
      "epoch": 0.000342388916015625,
      "step": 56097,
      "training_step_time": 0.40288209915161133
    },
    {
      "epoch": 0.00034239501953125,
      "model_forward_time": 0.11550378799438477,
      "step": 56098
    },
    {
      "epoch": 0.00034239501953125,
      "step": 56098,
      "training_step_time": 0.3928794860839844
    },
    {
      "epoch": 0.000342401123046875,
      "model_forward_time": 0.11531686782836914,
      "step": 56099
    },
    {
      "epoch": 0.000342401123046875,
      "step": 56099,
      "training_step_time": 0.3942420482635498
    },
    {
      "epoch": 0.0003424072265625,
      "grad_norm": 0.08269441872835159,
      "learning_rate": 1.1506584608200367e-06,
      "loss": 0.0398,
      "step": 56100
    },
    {
      "epoch": 0.0003424072265625,
      "model_forward_time": 0.11558938026428223,
      "step": 56100
    },
    {
      "epoch": 0.0003424072265625,
      "step": 56100,
      "training_step_time": 0.3962829113006592
    },
    {
      "epoch": 0.000342413330078125,
      "model_forward_time": 0.11534738540649414,
      "step": 56101
    },
    {
      "epoch": 0.000342413330078125,
      "step": 56101,
      "training_step_time": 0.39901185035705566
    },
    {
      "epoch": 0.00034241943359375,
      "model_forward_time": 0.11559438705444336,
      "step": 56102
    },
    {
      "epoch": 0.00034241943359375,
      "step": 56102,
      "training_step_time": 0.41465258598327637
    },
    {
      "epoch": 0.000342425537109375,
      "model_forward_time": 0.11499738693237305,
      "step": 56103
    },
    {
      "epoch": 0.000342425537109375,
      "step": 56103,
      "training_step_time": 0.5353641510009766
    },
    {
      "epoch": 0.000342431640625,
      "model_forward_time": 0.11597418785095215,
      "step": 56104
    },
    {
      "epoch": 0.000342431640625,
      "step": 56104,
      "training_step_time": 0.5309317111968994
    },
    {
      "epoch": 0.000342437744140625,
      "model_forward_time": 0.11464118957519531,
      "step": 56105
    },
    {
      "epoch": 0.000342437744140625,
      "step": 56105,
      "training_step_time": 0.44843411445617676
    },
    {
      "epoch": 0.00034244384765625,
      "model_forward_time": 0.114593505859375,
      "step": 56106
    },
    {
      "epoch": 0.00034244384765625,
      "step": 56106,
      "training_step_time": 0.39234280586242676
    },
    {
      "epoch": 0.000342449951171875,
      "model_forward_time": 0.11423444747924805,
      "step": 56107
    },
    {
      "epoch": 0.000342449951171875,
      "step": 56107,
      "training_step_time": 0.3862776756286621
    },
    {
      "epoch": 0.0003424560546875,
      "model_forward_time": 0.11417269706726074,
      "step": 56108
    },
    {
      "epoch": 0.0003424560546875,
      "step": 56108,
      "training_step_time": 0.4051177501678467
    },
    {
      "epoch": 0.000342462158203125,
      "model_forward_time": 0.11451387405395508,
      "step": 56109
    },
    {
      "epoch": 0.000342462158203125,
      "step": 56109,
      "training_step_time": 0.38474178314208984
    },
    {
      "epoch": 0.00034246826171875,
      "grad_norm": 0.11301089078187943,
      "learning_rate": 1.1447878056617734e-06,
      "loss": 0.0421,
      "step": 56110
    },
    {
      "epoch": 0.00034246826171875,
      "model_forward_time": 0.1146085262298584,
      "step": 56110
    },
    {
      "epoch": 0.00034246826171875,
      "step": 56110,
      "training_step_time": 0.3951294422149658
    },
    {
      "epoch": 0.000342474365234375,
      "model_forward_time": 0.11456799507141113,
      "step": 56111
    },
    {
      "epoch": 0.000342474365234375,
      "step": 56111,
      "training_step_time": 0.386685848236084
    },
    {
      "epoch": 0.00034248046875,
      "model_forward_time": 0.11506986618041992,
      "step": 56112
    },
    {
      "epoch": 0.00034248046875,
      "step": 56112,
      "training_step_time": 0.4206085205078125
    },
    {
      "epoch": 0.000342486572265625,
      "model_forward_time": 0.11451315879821777,
      "step": 56113
    },
    {
      "epoch": 0.000342486572265625,
      "step": 56113,
      "training_step_time": 0.4048619270324707
    },
    {
      "epoch": 0.00034249267578125,
      "model_forward_time": 0.11556529998779297,
      "step": 56114
    },
    {
      "epoch": 0.00034249267578125,
      "step": 56114,
      "training_step_time": 0.3985886573791504
    },
    {
      "epoch": 0.000342498779296875,
      "model_forward_time": 0.11514401435852051,
      "step": 56115
    },
    {
      "epoch": 0.000342498779296875,
      "step": 56115,
      "training_step_time": 0.5226843357086182
    },
    {
      "epoch": 0.0003425048828125,
      "model_forward_time": 0.1161496639251709,
      "step": 56116
    },
    {
      "epoch": 0.0003425048828125,
      "step": 56116,
      "training_step_time": 0.43839430809020996
    },
    {
      "epoch": 0.000342510986328125,
      "model_forward_time": 0.11583781242370605,
      "step": 56117
    },
    {
      "epoch": 0.000342510986328125,
      "step": 56117,
      "training_step_time": 0.42032289505004883
    },
    {
      "epoch": 0.00034251708984375,
      "model_forward_time": 0.11759114265441895,
      "step": 56118
    },
    {
      "epoch": 0.00034251708984375,
      "step": 56118,
      "training_step_time": 0.5124456882476807
    },
    {
      "epoch": 0.000342523193359375,
      "model_forward_time": 0.118438720703125,
      "step": 56119
    },
    {
      "epoch": 0.000342523193359375,
      "step": 56119,
      "training_step_time": 0.4820230007171631
    },
    {
      "epoch": 0.000342529296875,
      "grad_norm": 0.10519001632928848,
      "learning_rate": 1.138931991427028e-06,
      "loss": 0.0353,
      "step": 56120
    },
    {
      "epoch": 0.000342529296875,
      "model_forward_time": 0.1183171272277832,
      "step": 56120
    },
    {
      "epoch": 0.000342529296875,
      "step": 56120,
      "training_step_time": 0.3873879909515381
    },
    {
      "epoch": 0.000342535400390625,
      "model_forward_time": 0.11519408226013184,
      "step": 56121
    },
    {
      "epoch": 0.000342535400390625,
      "step": 56121,
      "training_step_time": 0.3780951499938965
    },
    {
      "epoch": 0.00034254150390625,
      "model_forward_time": 0.11571455001831055,
      "step": 56122
    },
    {
      "epoch": 0.00034254150390625,
      "step": 56122,
      "training_step_time": 0.39423131942749023
    },
    {
      "epoch": 0.000342547607421875,
      "model_forward_time": 0.11500978469848633,
      "step": 56123
    },
    {
      "epoch": 0.000342547607421875,
      "step": 56123,
      "training_step_time": 0.39662861824035645
    },
    {
      "epoch": 0.0003425537109375,
      "model_forward_time": 0.11531186103820801,
      "step": 56124
    },
    {
      "epoch": 0.0003425537109375,
      "step": 56124,
      "training_step_time": 0.3970465660095215
    },
    {
      "epoch": 0.000342559814453125,
      "model_forward_time": 0.11497974395751953,
      "step": 56125
    },
    {
      "epoch": 0.000342559814453125,
      "step": 56125,
      "training_step_time": 0.39777183532714844
    },
    {
      "epoch": 0.00034256591796875,
      "model_forward_time": 0.11533594131469727,
      "step": 56126
    },
    {
      "epoch": 0.00034256591796875,
      "step": 56126,
      "training_step_time": 0.3970959186553955
    },
    {
      "epoch": 0.000342572021484375,
      "model_forward_time": 0.11543130874633789,
      "step": 56127
    },
    {
      "epoch": 0.000342572021484375,
      "step": 56127,
      "training_step_time": 0.5257186889648438
    },
    {
      "epoch": 0.000342578125,
      "model_forward_time": 0.1149148941040039,
      "step": 56128
    },
    {
      "epoch": 0.000342578125,
      "step": 56128,
      "training_step_time": 0.39479923248291016
    },
    {
      "epoch": 0.000342584228515625,
      "model_forward_time": 0.11528730392456055,
      "step": 56129
    },
    {
      "epoch": 0.000342584228515625,
      "step": 56129,
      "training_step_time": 0.4010016918182373
    },
    {
      "epoch": 0.00034259033203125,
      "grad_norm": 0.08111681789159775,
      "learning_rate": 1.1330910198946442e-06,
      "loss": 0.0339,
      "step": 56130
    },
    {
      "epoch": 0.00034259033203125,
      "model_forward_time": 0.11510562896728516,
      "step": 56130
    },
    {
      "epoch": 0.00034259033203125,
      "step": 56130,
      "training_step_time": 0.4435403347015381
    },
    {
      "epoch": 0.000342596435546875,
      "model_forward_time": 0.114532470703125,
      "step": 56131
    },
    {
      "epoch": 0.000342596435546875,
      "step": 56131,
      "training_step_time": 0.4308328628540039
    },
    {
      "epoch": 0.0003426025390625,
      "model_forward_time": 0.11503291130065918,
      "step": 56132
    },
    {
      "epoch": 0.0003426025390625,
      "step": 56132,
      "training_step_time": 0.4660797119140625
    },
    {
      "epoch": 0.000342608642578125,
      "model_forward_time": 0.11539483070373535,
      "step": 56133
    },
    {
      "epoch": 0.000342608642578125,
      "step": 56133,
      "training_step_time": 0.4775221347808838
    },
    {
      "epoch": 0.00034261474609375,
      "model_forward_time": 0.11475062370300293,
      "step": 56134
    },
    {
      "epoch": 0.00034261474609375,
      "step": 56134,
      "training_step_time": 0.3888518810272217
    },
    {
      "epoch": 0.000342620849609375,
      "model_forward_time": 0.11510777473449707,
      "step": 56135
    },
    {
      "epoch": 0.000342620849609375,
      "step": 56135,
      "training_step_time": 0.39313769340515137
    },
    {
      "epoch": 0.000342626953125,
      "model_forward_time": 0.1149759292602539,
      "step": 56136
    },
    {
      "epoch": 0.000342626953125,
      "step": 56136,
      "training_step_time": 0.3891904354095459
    },
    {
      "epoch": 0.000342633056640625,
      "model_forward_time": 0.11565637588500977,
      "step": 56137
    },
    {
      "epoch": 0.000342633056640625,
      "step": 56137,
      "training_step_time": 0.3878822326660156
    },
    {
      "epoch": 0.00034263916015625,
      "model_forward_time": 0.11471152305603027,
      "step": 56138
    },
    {
      "epoch": 0.00034263916015625,
      "step": 56138,
      "training_step_time": 0.38441896438598633
    },
    {
      "epoch": 0.000342645263671875,
      "model_forward_time": 0.11551880836486816,
      "step": 56139
    },
    {
      "epoch": 0.000342645263671875,
      "step": 56139,
      "training_step_time": 0.48927760124206543
    },
    {
      "epoch": 0.0003426513671875,
      "grad_norm": 0.0761442482471466,
      "learning_rate": 1.1272648928389473e-06,
      "loss": 0.0333,
      "step": 56140
    },
    {
      "epoch": 0.0003426513671875,
      "model_forward_time": 0.11440515518188477,
      "step": 56140
    },
    {
      "epoch": 0.0003426513671875,
      "step": 56140,
      "training_step_time": 0.3948795795440674
    },
    {
      "epoch": 0.000342657470703125,
      "model_forward_time": 0.11527156829833984,
      "step": 56141
    },
    {
      "epoch": 0.000342657470703125,
      "step": 56141,
      "training_step_time": 0.3910486698150635
    },
    {
      "epoch": 0.00034266357421875,
      "model_forward_time": 0.11480140686035156,
      "step": 56142
    },
    {
      "epoch": 0.00034266357421875,
      "step": 56142,
      "training_step_time": 0.39846348762512207
    },
    {
      "epoch": 0.000342669677734375,
      "model_forward_time": 0.11532926559448242,
      "step": 56143
    },
    {
      "epoch": 0.000342669677734375,
      "step": 56143,
      "training_step_time": 0.4043278694152832
    },
    {
      "epoch": 0.00034267578125,
      "model_forward_time": 0.11541533470153809,
      "step": 56144
    },
    {
      "epoch": 0.00034267578125,
      "step": 56144,
      "training_step_time": 0.4161865711212158
    },
    {
      "epoch": 0.000342681884765625,
      "model_forward_time": 0.1147313117980957,
      "step": 56145
    },
    {
      "epoch": 0.000342681884765625,
      "step": 56145,
      "training_step_time": 0.45743751525878906
    },
    {
      "epoch": 0.00034268798828125,
      "model_forward_time": 0.11470389366149902,
      "step": 56146
    },
    {
      "epoch": 0.00034268798828125,
      "step": 56146,
      "training_step_time": 0.4092538356781006
    },
    {
      "epoch": 0.000342694091796875,
      "model_forward_time": 0.11504507064819336,
      "step": 56147
    },
    {
      "epoch": 0.000342694091796875,
      "step": 56147,
      "training_step_time": 0.4495062828063965
    },
    {
      "epoch": 0.0003427001953125,
      "model_forward_time": 0.11514091491699219,
      "step": 56148
    },
    {
      "epoch": 0.0003427001953125,
      "step": 56148,
      "training_step_time": 0.4294869899749756
    },
    {
      "epoch": 0.000342706298828125,
      "model_forward_time": 0.11541914939880371,
      "step": 56149
    },
    {
      "epoch": 0.000342706298828125,
      "step": 56149,
      "training_step_time": 0.4503638744354248
    },
    {
      "epoch": 0.00034271240234375,
      "grad_norm": 0.08369632810354233,
      "learning_rate": 1.1214536120297714e-06,
      "loss": 0.0372,
      "step": 56150
    },
    {
      "epoch": 0.00034271240234375,
      "model_forward_time": 0.11423778533935547,
      "step": 56150
    },
    {
      "epoch": 0.00034271240234375,
      "step": 56150,
      "training_step_time": 0.3873002529144287
    },
    {
      "epoch": 0.000342718505859375,
      "model_forward_time": 0.1146399974822998,
      "step": 56151
    },
    {
      "epoch": 0.000342718505859375,
      "step": 56151,
      "training_step_time": 0.38413214683532715
    },
    {
      "epoch": 0.000342724609375,
      "model_forward_time": 0.1164095401763916,
      "step": 56152
    },
    {
      "epoch": 0.000342724609375,
      "step": 56152,
      "training_step_time": 0.38731813430786133
    },
    {
      "epoch": 0.000342730712890625,
      "model_forward_time": 0.11481213569641113,
      "step": 56153
    },
    {
      "epoch": 0.000342730712890625,
      "step": 56153,
      "training_step_time": 0.40335893630981445
    },
    {
      "epoch": 0.00034273681640625,
      "model_forward_time": 0.1149454116821289,
      "step": 56154
    },
    {
      "epoch": 0.00034273681640625,
      "step": 56154,
      "training_step_time": 0.3956880569458008
    },
    {
      "epoch": 0.000342742919921875,
      "model_forward_time": 0.11570215225219727,
      "step": 56155
    },
    {
      "epoch": 0.000342742919921875,
      "step": 56155,
      "training_step_time": 0.3896307945251465
    },
    {
      "epoch": 0.0003427490234375,
      "model_forward_time": 0.11562657356262207,
      "step": 56156
    },
    {
      "epoch": 0.0003427490234375,
      "step": 56156,
      "training_step_time": 0.3946988582611084
    },
    {
      "epoch": 0.000342755126953125,
      "model_forward_time": 0.11562228202819824,
      "step": 56157
    },
    {
      "epoch": 0.000342755126953125,
      "step": 56157,
      "training_step_time": 0.5317649841308594
    },
    {
      "epoch": 0.00034276123046875,
      "model_forward_time": 0.11510419845581055,
      "step": 56158
    },
    {
      "epoch": 0.00034276123046875,
      "step": 56158,
      "training_step_time": 0.3885767459869385
    },
    {
      "epoch": 0.000342767333984375,
      "model_forward_time": 0.11571502685546875,
      "step": 56159
    },
    {
      "epoch": 0.000342767333984375,
      "step": 56159,
      "training_step_time": 0.4378092288970947
    },
    {
      "epoch": 0.0003427734375,
      "grad_norm": 0.08218678086996078,
      "learning_rate": 1.1156571792324211e-06,
      "loss": 0.0379,
      "step": 56160
    },
    {
      "epoch": 0.0003427734375,
      "model_forward_time": 0.11498022079467773,
      "step": 56160
    },
    {
      "epoch": 0.0003427734375,
      "step": 56160,
      "training_step_time": 0.47083568572998047
    },
    {
      "epoch": 0.000342779541015625,
      "model_forward_time": 0.1146397590637207,
      "step": 56161
    },
    {
      "epoch": 0.000342779541015625,
      "step": 56161,
      "training_step_time": 0.367753267288208
    },
    {
      "epoch": 0.00034278564453125,
      "model_forward_time": 0.11536931991577148,
      "step": 56162
    },
    {
      "epoch": 0.00034278564453125,
      "step": 56162,
      "training_step_time": 0.44793152809143066
    },
    {
      "epoch": 0.000342791748046875,
      "model_forward_time": 0.11513161659240723,
      "step": 56163
    },
    {
      "epoch": 0.000342791748046875,
      "step": 56163,
      "training_step_time": 0.43357348442077637
    },
    {
      "epoch": 0.0003427978515625,
      "model_forward_time": 0.11472535133361816,
      "step": 56164
    },
    {
      "epoch": 0.0003427978515625,
      "step": 56164,
      "training_step_time": 0.38832569122314453
    },
    {
      "epoch": 0.000342803955078125,
      "model_forward_time": 0.11489081382751465,
      "step": 56165
    },
    {
      "epoch": 0.000342803955078125,
      "step": 56165,
      "training_step_time": 0.3941013813018799
    },
    {
      "epoch": 0.00034281005859375,
      "model_forward_time": 0.11440467834472656,
      "step": 56166
    },
    {
      "epoch": 0.00034281005859375,
      "step": 56166,
      "training_step_time": 0.38744091987609863
    },
    {
      "epoch": 0.000342816162109375,
      "model_forward_time": 0.11524534225463867,
      "step": 56167
    },
    {
      "epoch": 0.000342816162109375,
      "step": 56167,
      "training_step_time": 0.38782787322998047
    },
    {
      "epoch": 0.000342822265625,
      "model_forward_time": 0.11507320404052734,
      "step": 56168
    },
    {
      "epoch": 0.000342822265625,
      "step": 56168,
      "training_step_time": 0.3877074718475342
    },
    {
      "epoch": 0.000342828369140625,
      "model_forward_time": 0.11483120918273926,
      "step": 56169
    },
    {
      "epoch": 0.000342828369140625,
      "step": 56169,
      "training_step_time": 0.39727354049682617
    },
    {
      "epoch": 0.00034283447265625,
      "grad_norm": 0.08731292188167572,
      "learning_rate": 1.10987559620771e-06,
      "loss": 0.0344,
      "step": 56170
    },
    {
      "epoch": 0.00034283447265625,
      "model_forward_time": 0.11605596542358398,
      "step": 56170
    },
    {
      "epoch": 0.00034283447265625,
      "step": 56170,
      "training_step_time": 0.3937230110168457
    },
    {
      "epoch": 0.000342840576171875,
      "model_forward_time": 0.11548423767089844,
      "step": 56171
    },
    {
      "epoch": 0.000342840576171875,
      "step": 56171,
      "training_step_time": 0.40137791633605957
    },
    {
      "epoch": 0.0003428466796875,
      "model_forward_time": 0.11561059951782227,
      "step": 56172
    },
    {
      "epoch": 0.0003428466796875,
      "step": 56172,
      "training_step_time": 0.4603695869445801
    },
    {
      "epoch": 0.000342852783203125,
      "model_forward_time": 0.11482620239257812,
      "step": 56173
    },
    {
      "epoch": 0.000342852783203125,
      "step": 56173,
      "training_step_time": 0.44678306579589844
    },
    {
      "epoch": 0.00034285888671875,
      "model_forward_time": 0.11445260047912598,
      "step": 56174
    },
    {
      "epoch": 0.00034285888671875,
      "step": 56174,
      "training_step_time": 0.41336488723754883
    },
    {
      "epoch": 0.000342864990234375,
      "model_forward_time": 0.1147775650024414,
      "step": 56175
    },
    {
      "epoch": 0.000342864990234375,
      "step": 56175,
      "training_step_time": 0.46589136123657227
    },
    {
      "epoch": 0.00034287109375,
      "model_forward_time": 0.11481785774230957,
      "step": 56176
    },
    {
      "epoch": 0.00034287109375,
      "step": 56176,
      "training_step_time": 0.4032294750213623
    },
    {
      "epoch": 0.000342877197265625,
      "model_forward_time": 0.11545634269714355,
      "step": 56177
    },
    {
      "epoch": 0.000342877197265625,
      "step": 56177,
      "training_step_time": 0.470902681350708
    },
    {
      "epoch": 0.00034288330078125,
      "model_forward_time": 0.1154778003692627,
      "step": 56178
    },
    {
      "epoch": 0.00034288330078125,
      "step": 56178,
      "training_step_time": 0.4664580821990967
    },
    {
      "epoch": 0.000342889404296875,
      "model_forward_time": 0.11443662643432617,
      "step": 56179
    },
    {
      "epoch": 0.000342889404296875,
      "step": 56179,
      "training_step_time": 0.3908689022064209
    },
    {
      "epoch": 0.0003428955078125,
      "grad_norm": 0.1412598192691803,
      "learning_rate": 1.1041088647119114e-06,
      "loss": 0.0429,
      "step": 56180
    },
    {
      "epoch": 0.0003428955078125,
      "model_forward_time": 0.11478924751281738,
      "step": 56180
    },
    {
      "epoch": 0.0003428955078125,
      "step": 56180,
      "training_step_time": 0.3911149501800537
    },
    {
      "epoch": 0.000342901611328125,
      "model_forward_time": 0.1157219409942627,
      "step": 56181
    },
    {
      "epoch": 0.000342901611328125,
      "step": 56181,
      "training_step_time": 0.3846757411956787
    },
    {
      "epoch": 0.00034290771484375,
      "model_forward_time": 0.11491775512695312,
      "step": 56182
    },
    {
      "epoch": 0.00034290771484375,
      "step": 56182,
      "training_step_time": 0.3871438503265381
    },
    {
      "epoch": 0.000342913818359375,
      "model_forward_time": 0.11465239524841309,
      "step": 56183
    },
    {
      "epoch": 0.000342913818359375,
      "step": 56183,
      "training_step_time": 0.39911603927612305
    },
    {
      "epoch": 0.000342919921875,
      "model_forward_time": 0.11496281623840332,
      "step": 56184
    },
    {
      "epoch": 0.000342919921875,
      "step": 56184,
      "training_step_time": 0.3933584690093994
    },
    {
      "epoch": 0.000342926025390625,
      "model_forward_time": 0.11483383178710938,
      "step": 56185
    },
    {
      "epoch": 0.000342926025390625,
      "step": 56185,
      "training_step_time": 0.39553189277648926
    },
    {
      "epoch": 0.00034293212890625,
      "model_forward_time": 0.11617159843444824,
      "step": 56186
    },
    {
      "epoch": 0.00034293212890625,
      "step": 56186,
      "training_step_time": 0.42682385444641113
    },
    {
      "epoch": 0.000342938232421875,
      "model_forward_time": 0.11486315727233887,
      "step": 56187
    },
    {
      "epoch": 0.000342938232421875,
      "step": 56187,
      "training_step_time": 0.39391517639160156
    },
    {
      "epoch": 0.0003429443359375,
      "model_forward_time": 0.11562180519104004,
      "step": 56188
    },
    {
      "epoch": 0.0003429443359375,
      "step": 56188,
      "training_step_time": 0.48969388008117676
    },
    {
      "epoch": 0.000342950439453125,
      "model_forward_time": 0.11560678482055664,
      "step": 56189
    },
    {
      "epoch": 0.000342950439453125,
      "step": 56189,
      "training_step_time": 0.3968198299407959
    },
    {
      "epoch": 0.00034295654296875,
      "grad_norm": 0.07608577609062195,
      "learning_rate": 1.0983569864968346e-06,
      "loss": 0.0381,
      "step": 56190
    },
    {
      "epoch": 0.00034295654296875,
      "model_forward_time": 0.11551046371459961,
      "step": 56190
    },
    {
      "epoch": 0.00034295654296875,
      "step": 56190,
      "training_step_time": 0.4259469509124756
    },
    {
      "epoch": 0.000342962646484375,
      "model_forward_time": 0.11465334892272949,
      "step": 56191
    },
    {
      "epoch": 0.000342962646484375,
      "step": 56191,
      "training_step_time": 0.4512171745300293
    },
    {
      "epoch": 0.00034296875,
      "model_forward_time": 0.11597990989685059,
      "step": 56192
    },
    {
      "epoch": 0.00034296875,
      "step": 56192,
      "training_step_time": 0.4354093074798584
    },
    {
      "epoch": 0.000342974853515625,
      "model_forward_time": 0.11498498916625977,
      "step": 56193
    },
    {
      "epoch": 0.000342974853515625,
      "step": 56193,
      "training_step_time": 0.4421203136444092
    },
    {
      "epoch": 0.00034298095703125,
      "model_forward_time": 0.11567902565002441,
      "step": 56194
    },
    {
      "epoch": 0.00034298095703125,
      "step": 56194,
      "training_step_time": 0.3926107883453369
    },
    {
      "epoch": 0.000342987060546875,
      "model_forward_time": 0.11391115188598633,
      "step": 56195
    },
    {
      "epoch": 0.000342987060546875,
      "step": 56195,
      "training_step_time": 0.39185333251953125
    },
    {
      "epoch": 0.0003429931640625,
      "model_forward_time": 0.1155080795288086,
      "step": 56196
    },
    {
      "epoch": 0.0003429931640625,
      "step": 56196,
      "training_step_time": 0.3925151824951172
    },
    {
      "epoch": 0.000342999267578125,
      "model_forward_time": 0.11471891403198242,
      "step": 56197
    },
    {
      "epoch": 0.000342999267578125,
      "step": 56197,
      "training_step_time": 0.3932609558105469
    },
    {
      "epoch": 0.00034300537109375,
      "model_forward_time": 0.11519503593444824,
      "step": 56198
    },
    {
      "epoch": 0.00034300537109375,
      "step": 56198,
      "training_step_time": 0.39679813385009766
    },
    {
      "epoch": 0.000343011474609375,
      "model_forward_time": 0.11535859107971191,
      "step": 56199
    },
    {
      "epoch": 0.000343011474609375,
      "step": 56199,
      "training_step_time": 0.4120059013366699
    },
    {
      "epoch": 0.000343017578125,
      "grad_norm": 0.09366486221551895,
      "learning_rate": 1.0926199633097157e-06,
      "loss": 0.0317,
      "step": 56200
    },
    {
      "epoch": 0.000343017578125,
      "model_forward_time": 0.1154792308807373,
      "step": 56200
    },
    {
      "epoch": 0.000343017578125,
      "step": 56200,
      "training_step_time": 0.41536808013916016
    },
    {
      "epoch": 0.000343023681640625,
      "model_forward_time": 0.11425542831420898,
      "step": 56201
    },
    {
      "epoch": 0.000343023681640625,
      "step": 56201,
      "training_step_time": 0.43396830558776855
    },
    {
      "epoch": 0.00034302978515625,
      "model_forward_time": 0.11423110961914062,
      "step": 56202
    },
    {
      "epoch": 0.00034302978515625,
      "step": 56202,
      "training_step_time": 0.40726470947265625
    },
    {
      "epoch": 0.000343035888671875,
      "model_forward_time": 0.11553263664245605,
      "step": 56203
    },
    {
      "epoch": 0.000343035888671875,
      "step": 56203,
      "training_step_time": 0.40183138847351074
    },
    {
      "epoch": 0.0003430419921875,
      "model_forward_time": 0.11600685119628906,
      "step": 56204
    },
    {
      "epoch": 0.0003430419921875,
      "step": 56204,
      "training_step_time": 0.4805314540863037
    },
    {
      "epoch": 0.000343048095703125,
      "model_forward_time": 0.11530661582946777,
      "step": 56205
    },
    {
      "epoch": 0.000343048095703125,
      "step": 56205,
      "training_step_time": 0.44005465507507324
    },
    {
      "epoch": 0.00034305419921875,
      "model_forward_time": 0.11534500122070312,
      "step": 56206
    },
    {
      "epoch": 0.00034305419921875,
      "step": 56206,
      "training_step_time": 0.42397212982177734
    },
    {
      "epoch": 0.000343060302734375,
      "model_forward_time": 0.11520719528198242,
      "step": 56207
    },
    {
      "epoch": 0.000343060302734375,
      "step": 56207,
      "training_step_time": 0.4218418598175049
    },
    {
      "epoch": 0.00034306640625,
      "model_forward_time": 0.11455750465393066,
      "step": 56208
    },
    {
      "epoch": 0.00034306640625,
      "step": 56208,
      "training_step_time": 0.4798591136932373
    },
    {
      "epoch": 0.000343072509765625,
      "model_forward_time": 0.1143341064453125,
      "step": 56209
    },
    {
      "epoch": 0.000343072509765625,
      "step": 56209,
      "training_step_time": 0.4026350975036621
    },
    {
      "epoch": 0.00034307861328125,
      "grad_norm": 0.07517603784799576,
      "learning_rate": 1.0868977968933436e-06,
      "loss": 0.0403,
      "step": 56210
    },
    {
      "epoch": 0.00034307861328125,
      "model_forward_time": 0.1145484447479248,
      "step": 56210
    },
    {
      "epoch": 0.00034307861328125,
      "step": 56210,
      "training_step_time": 0.3956148624420166
    },
    {
      "epoch": 0.000343084716796875,
      "model_forward_time": 0.11528611183166504,
      "step": 56211
    },
    {
      "epoch": 0.000343084716796875,
      "step": 56211,
      "training_step_time": 0.40006017684936523
    },
    {
      "epoch": 0.0003430908203125,
      "model_forward_time": 0.11456418037414551,
      "step": 56212
    },
    {
      "epoch": 0.0003430908203125,
      "step": 56212,
      "training_step_time": 0.3964359760284424
    },
    {
      "epoch": 0.000343096923828125,
      "model_forward_time": 0.1145787239074707,
      "step": 56213
    },
    {
      "epoch": 0.000343096923828125,
      "step": 56213,
      "training_step_time": 0.38121700286865234
    },
    {
      "epoch": 0.00034310302734375,
      "model_forward_time": 0.11582303047180176,
      "step": 56214
    },
    {
      "epoch": 0.00034310302734375,
      "step": 56214,
      "training_step_time": 0.39316415786743164
    },
    {
      "epoch": 0.000343109130859375,
      "model_forward_time": 0.11559438705444336,
      "step": 56215
    },
    {
      "epoch": 0.000343109130859375,
      "step": 56215,
      "training_step_time": 0.39963841438293457
    },
    {
      "epoch": 0.000343115234375,
      "model_forward_time": 0.11494159698486328,
      "step": 56216
    },
    {
      "epoch": 0.000343115234375,
      "step": 56216,
      "training_step_time": 0.38883185386657715
    },
    {
      "epoch": 0.000343121337890625,
      "model_forward_time": 0.11514663696289062,
      "step": 56217
    },
    {
      "epoch": 0.000343121337890625,
      "step": 56217,
      "training_step_time": 0.5810198783874512
    },
    {
      "epoch": 0.00034312744140625,
      "model_forward_time": 0.11497259140014648,
      "step": 56218
    },
    {
      "epoch": 0.00034312744140625,
      "step": 56218,
      "training_step_time": 0.39820432662963867
    },
    {
      "epoch": 0.000343133544921875,
      "model_forward_time": 0.11459660530090332,
      "step": 56219
    },
    {
      "epoch": 0.000343133544921875,
      "step": 56219,
      "training_step_time": 0.48288965225219727
    },
    {
      "epoch": 0.0003431396484375,
      "grad_norm": 0.08230380713939667,
      "learning_rate": 1.0811904889859336e-06,
      "loss": 0.0363,
      "step": 56220
    },
    {
      "epoch": 0.0003431396484375,
      "model_forward_time": 0.11500406265258789,
      "step": 56220
    },
    {
      "epoch": 0.0003431396484375,
      "step": 56220,
      "training_step_time": 0.36292290687561035
    },
    {
      "epoch": 0.000343145751953125,
      "model_forward_time": 0.11429452896118164,
      "step": 56221
    },
    {
      "epoch": 0.000343145751953125,
      "step": 56221,
      "training_step_time": 0.4520132541656494
    },
    {
      "epoch": 0.00034315185546875,
      "model_forward_time": 0.11583399772644043,
      "step": 56222
    },
    {
      "epoch": 0.00034315185546875,
      "step": 56222,
      "training_step_time": 0.3950462341308594
    },
    {
      "epoch": 0.000343157958984375,
      "model_forward_time": 0.11525630950927734,
      "step": 56223
    },
    {
      "epoch": 0.000343157958984375,
      "step": 56223,
      "training_step_time": 0.3874471187591553
    },
    {
      "epoch": 0.0003431640625,
      "model_forward_time": 0.11467981338500977,
      "step": 56224
    },
    {
      "epoch": 0.0003431640625,
      "step": 56224,
      "training_step_time": 0.39305806159973145
    },
    {
      "epoch": 0.000343170166015625,
      "model_forward_time": 0.11527466773986816,
      "step": 56225
    },
    {
      "epoch": 0.000343170166015625,
      "step": 56225,
      "training_step_time": 0.39536333084106445
    },
    {
      "epoch": 0.00034317626953125,
      "model_forward_time": 0.11520242691040039,
      "step": 56226
    },
    {
      "epoch": 0.00034317626953125,
      "step": 56226,
      "training_step_time": 0.39822983741760254
    },
    {
      "epoch": 0.000343182373046875,
      "model_forward_time": 0.11527729034423828,
      "step": 56227
    },
    {
      "epoch": 0.000343182373046875,
      "step": 56227,
      "training_step_time": 0.3971102237701416
    },
    {
      "epoch": 0.0003431884765625,
      "model_forward_time": 0.1153724193572998,
      "step": 56228
    },
    {
      "epoch": 0.0003431884765625,
      "step": 56228,
      "training_step_time": 0.44922423362731934
    },
    {
      "epoch": 0.000343194580078125,
      "model_forward_time": 0.1149287223815918,
      "step": 56229
    },
    {
      "epoch": 0.000343194580078125,
      "step": 56229,
      "training_step_time": 0.5387506484985352
    },
    {
      "epoch": 0.00034320068359375,
      "grad_norm": 0.09081742912530899,
      "learning_rate": 1.0754980413212268e-06,
      "loss": 0.0338,
      "step": 56230
    },
    {
      "epoch": 0.00034320068359375,
      "model_forward_time": 0.1155545711517334,
      "step": 56230
    },
    {
      "epoch": 0.00034320068359375,
      "step": 56230,
      "training_step_time": 0.39804768562316895
    },
    {
      "epoch": 0.000343206787109375,
      "model_forward_time": 0.11467623710632324,
      "step": 56231
    },
    {
      "epoch": 0.000343206787109375,
      "step": 56231,
      "training_step_time": 0.38837504386901855
    },
    {
      "epoch": 0.000343212890625,
      "model_forward_time": 0.11535000801086426,
      "step": 56232
    },
    {
      "epoch": 0.000343212890625,
      "step": 56232,
      "training_step_time": 0.4257547855377197
    },
    {
      "epoch": 0.000343218994140625,
      "model_forward_time": 0.11457538604736328,
      "step": 56233
    },
    {
      "epoch": 0.000343218994140625,
      "step": 56233,
      "training_step_time": 0.4701416492462158
    },
    {
      "epoch": 0.00034322509765625,
      "model_forward_time": 0.11525225639343262,
      "step": 56234
    },
    {
      "epoch": 0.00034322509765625,
      "step": 56234,
      "training_step_time": 0.45088744163513184
    },
    {
      "epoch": 0.000343231201171875,
      "model_forward_time": 0.11513495445251465,
      "step": 56235
    },
    {
      "epoch": 0.000343231201171875,
      "step": 56235,
      "training_step_time": 0.495635986328125
    },
    {
      "epoch": 0.0003432373046875,
      "model_forward_time": 0.11523079872131348,
      "step": 56236
    },
    {
      "epoch": 0.0003432373046875,
      "step": 56236,
      "training_step_time": 0.4547395706176758
    },
    {
      "epoch": 0.000343243408203125,
      "model_forward_time": 0.11468052864074707,
      "step": 56237
    },
    {
      "epoch": 0.000343243408203125,
      "step": 56237,
      "training_step_time": 0.37708187103271484
    },
    {
      "epoch": 0.00034324951171875,
      "model_forward_time": 0.11545205116271973,
      "step": 56238
    },
    {
      "epoch": 0.00034324951171875,
      "step": 56238,
      "training_step_time": 0.40502285957336426
    },
    {
      "epoch": 0.000343255615234375,
      "model_forward_time": 0.11637997627258301,
      "step": 56239
    },
    {
      "epoch": 0.000343255615234375,
      "step": 56239,
      "training_step_time": 0.47416257858276367
    },
    {
      "epoch": 0.00034326171875,
      "grad_norm": 0.10301005840301514,
      "learning_rate": 1.0698204556284452e-06,
      "loss": 0.0394,
      "step": 56240
    },
    {
      "epoch": 0.00034326171875,
      "model_forward_time": 0.12061548233032227,
      "step": 56240
    },
    {
      "epoch": 0.00034326171875,
      "step": 56240,
      "training_step_time": 0.5277717113494873
    },
    {
      "epoch": 0.000343267822265625,
      "model_forward_time": 0.11794686317443848,
      "step": 56241
    },
    {
      "epoch": 0.000343267822265625,
      "step": 56241,
      "training_step_time": 0.8599169254302979
    },
    {
      "epoch": 0.00034327392578125,
      "model_forward_time": 0.1166844367980957,
      "step": 56242
    },
    {
      "epoch": 0.00034327392578125,
      "step": 56242,
      "training_step_time": 0.6780076026916504
    },
    {
      "epoch": 0.000343280029296875,
      "model_forward_time": 0.13112258911132812,
      "step": 56243
    },
    {
      "epoch": 0.000343280029296875,
      "step": 56243,
      "training_step_time": 0.701991081237793
    },
    {
      "epoch": 0.0003432861328125,
      "model_forward_time": 0.12074971199035645,
      "step": 56244
    },
    {
      "epoch": 0.0003432861328125,
      "step": 56244,
      "training_step_time": 0.7871658802032471
    },
    {
      "epoch": 0.000343292236328125,
      "model_forward_time": 0.11772823333740234,
      "step": 56245
    },
    {
      "epoch": 0.000343292236328125,
      "step": 56245,
      "training_step_time": 0.6603405475616455
    },
    {
      "epoch": 0.00034329833984375,
      "model_forward_time": 0.11965751647949219,
      "step": 56246
    },
    {
      "epoch": 0.00034329833984375,
      "step": 56246,
      "training_step_time": 0.7475435733795166
    },
    {
      "epoch": 0.000343304443359375,
      "model_forward_time": 0.12178182601928711,
      "step": 56247
    },
    {
      "epoch": 0.000343304443359375,
      "step": 56247,
      "training_step_time": 0.6823430061340332
    },
    {
      "epoch": 0.000343310546875,
      "model_forward_time": 0.1163783073425293,
      "step": 56248
    },
    {
      "epoch": 0.000343310546875,
      "step": 56248,
      "training_step_time": 0.6624479293823242
    },
    {
      "epoch": 0.000343316650390625,
      "model_forward_time": 0.1152961254119873,
      "step": 56249
    },
    {
      "epoch": 0.000343316650390625,
      "step": 56249,
      "training_step_time": 0.7041127681732178
    },
    {
      "epoch": 0.00034332275390625,
      "grad_norm": 0.07875359803438187,
      "learning_rate": 1.064157733632276e-06,
      "loss": 0.0361,
      "step": 56250
    },
    {
      "epoch": 0.00034332275390625,
      "model_forward_time": 0.11753106117248535,
      "step": 56250
    },
    {
      "epoch": 0.00034332275390625,
      "step": 56250,
      "training_step_time": 0.668590784072876
    },
    {
      "epoch": 0.000343328857421875,
      "model_forward_time": 0.11716556549072266,
      "step": 56251
    },
    {
      "epoch": 0.000343328857421875,
      "step": 56251,
      "training_step_time": 0.6731913089752197
    },
    {
      "epoch": 0.0003433349609375,
      "model_forward_time": 0.11816883087158203,
      "step": 56252
    },
    {
      "epoch": 0.0003433349609375,
      "step": 56252,
      "training_step_time": 0.6770210266113281
    },
    {
      "epoch": 0.000343341064453125,
      "model_forward_time": 0.12180089950561523,
      "step": 56253
    },
    {
      "epoch": 0.000343341064453125,
      "step": 56253,
      "training_step_time": 0.6918487548828125
    },
    {
      "epoch": 0.00034334716796875,
      "model_forward_time": 0.12438726425170898,
      "step": 56254
    },
    {
      "epoch": 0.00034334716796875,
      "step": 56254,
      "training_step_time": 0.6812152862548828
    },
    {
      "epoch": 0.000343353271484375,
      "model_forward_time": 0.1273043155670166,
      "step": 56255
    },
    {
      "epoch": 0.000343353271484375,
      "step": 56255,
      "training_step_time": 0.6858558654785156
    },
    {
      "epoch": 0.000343359375,
      "model_forward_time": 0.1194760799407959,
      "step": 56256
    },
    {
      "epoch": 0.000343359375,
      "step": 56256,
      "training_step_time": 0.6893243789672852
    },
    {
      "epoch": 0.000343365478515625,
      "model_forward_time": 0.11742234230041504,
      "step": 56257
    },
    {
      "epoch": 0.000343365478515625,
      "step": 56257,
      "training_step_time": 0.6206619739532471
    },
    {
      "epoch": 0.00034337158203125,
      "model_forward_time": 0.1160426139831543,
      "step": 56258
    },
    {
      "epoch": 0.00034337158203125,
      "step": 56258,
      "training_step_time": 0.7431209087371826
    },
    {
      "epoch": 0.000343377685546875,
      "model_forward_time": 0.12280607223510742,
      "step": 56259
    },
    {
      "epoch": 0.000343377685546875,
      "step": 56259,
      "training_step_time": 0.6039690971374512
    },
    {
      "epoch": 0.0003433837890625,
      "grad_norm": 0.08864682167768478,
      "learning_rate": 1.0585098770529157e-06,
      "loss": 0.038,
      "step": 56260
    },
    {
      "epoch": 0.0003433837890625,
      "model_forward_time": 0.12341189384460449,
      "step": 56260
    },
    {
      "epoch": 0.0003433837890625,
      "step": 56260,
      "training_step_time": 0.7443320751190186
    },
    {
      "epoch": 0.000343389892578125,
      "model_forward_time": 0.1184089183807373,
      "step": 56261
    },
    {
      "epoch": 0.000343389892578125,
      "step": 56261,
      "training_step_time": 0.6921625137329102
    },
    {
      "epoch": 0.00034339599609375,
      "model_forward_time": 0.11727786064147949,
      "step": 56262
    },
    {
      "epoch": 0.00034339599609375,
      "step": 56262,
      "training_step_time": 0.7503657341003418
    },
    {
      "epoch": 0.000343402099609375,
      "model_forward_time": 0.11861801147460938,
      "step": 56263
    },
    {
      "epoch": 0.000343402099609375,
      "step": 56263,
      "training_step_time": 0.7339663505554199
    },
    {
      "epoch": 0.000343408203125,
      "model_forward_time": 0.11745095252990723,
      "step": 56264
    },
    {
      "epoch": 0.000343408203125,
      "step": 56264,
      "training_step_time": 0.6822659969329834
    },
    {
      "epoch": 0.000343414306640625,
      "model_forward_time": 0.11537551879882812,
      "step": 56265
    },
    {
      "epoch": 0.000343414306640625,
      "step": 56265,
      "training_step_time": 0.7475697994232178
    },
    {
      "epoch": 0.00034342041015625,
      "model_forward_time": 0.1216437816619873,
      "step": 56266
    },
    {
      "epoch": 0.00034342041015625,
      "step": 56266,
      "training_step_time": 0.7206466197967529
    },
    {
      "epoch": 0.000343426513671875,
      "model_forward_time": 0.11708307266235352,
      "step": 56267
    },
    {
      "epoch": 0.000343426513671875,
      "step": 56267,
      "training_step_time": 0.709674596786499
    },
    {
      "epoch": 0.0003434326171875,
      "model_forward_time": 0.12242436408996582,
      "step": 56268
    },
    {
      "epoch": 0.0003434326171875,
      "step": 56268,
      "training_step_time": 0.6778421401977539
    },
    {
      "epoch": 0.000343438720703125,
      "model_forward_time": 0.1199955940246582,
      "step": 56269
    },
    {
      "epoch": 0.000343438720703125,
      "step": 56269,
      "training_step_time": 0.605118989944458
    },
    {
      "epoch": 0.00034344482421875,
      "grad_norm": 0.1351432502269745,
      "learning_rate": 1.0528768876060246e-06,
      "loss": 0.0415,
      "step": 56270
    },
    {
      "epoch": 0.00034344482421875,
      "model_forward_time": 0.12045097351074219,
      "step": 56270
    },
    {
      "epoch": 0.00034344482421875,
      "step": 56270,
      "training_step_time": 0.6584410667419434
    },
    {
      "epoch": 0.000343450927734375,
      "model_forward_time": 0.11575794219970703,
      "step": 56271
    },
    {
      "epoch": 0.000343450927734375,
      "step": 56271,
      "training_step_time": 0.7449040412902832
    },
    {
      "epoch": 0.00034345703125,
      "model_forward_time": 0.12561607360839844,
      "step": 56272
    },
    {
      "epoch": 0.00034345703125,
      "step": 56272,
      "training_step_time": 0.6910066604614258
    },
    {
      "epoch": 0.000343463134765625,
      "model_forward_time": 0.11667394638061523,
      "step": 56273
    },
    {
      "epoch": 0.000343463134765625,
      "step": 56273,
      "training_step_time": 0.7168457508087158
    },
    {
      "epoch": 0.00034346923828125,
      "model_forward_time": 0.11659097671508789,
      "step": 56274
    },
    {
      "epoch": 0.00034346923828125,
      "step": 56274,
      "training_step_time": 0.7209467887878418
    },
    {
      "epoch": 0.000343475341796875,
      "model_forward_time": 0.11789822578430176,
      "step": 56275
    },
    {
      "epoch": 0.000343475341796875,
      "step": 56275,
      "training_step_time": 0.6433815956115723
    },
    {
      "epoch": 0.0003434814453125,
      "model_forward_time": 0.12022638320922852,
      "step": 56276
    },
    {
      "epoch": 0.0003434814453125,
      "step": 56276,
      "training_step_time": 0.6704840660095215
    },
    {
      "epoch": 0.000343487548828125,
      "model_forward_time": 0.11789202690124512,
      "step": 56277
    },
    {
      "epoch": 0.000343487548828125,
      "step": 56277,
      "training_step_time": 0.6697194576263428
    },
    {
      "epoch": 0.00034349365234375,
      "model_forward_time": 0.11582469940185547,
      "step": 56278
    },
    {
      "epoch": 0.00034349365234375,
      "step": 56278,
      "training_step_time": 0.63043212890625
    },
    {
      "epoch": 0.000343499755859375,
      "model_forward_time": 0.12103915214538574,
      "step": 56279
    },
    {
      "epoch": 0.000343499755859375,
      "step": 56279,
      "training_step_time": 0.6504690647125244
    },
    {
      "epoch": 0.000343505859375,
      "grad_norm": 0.08134658634662628,
      "learning_rate": 1.0472587670027678e-06,
      "loss": 0.0347,
      "step": 56280
    },
    {
      "epoch": 0.000343505859375,
      "model_forward_time": 0.12034273147583008,
      "step": 56280
    },
    {
      "epoch": 0.000343505859375,
      "step": 56280,
      "training_step_time": 0.6667649745941162
    },
    {
      "epoch": 0.000343511962890625,
      "model_forward_time": 0.11670708656311035,
      "step": 56281
    },
    {
      "epoch": 0.000343511962890625,
      "step": 56281,
      "training_step_time": 0.6600925922393799
    },
    {
      "epoch": 0.00034351806640625,
      "model_forward_time": 0.12288689613342285,
      "step": 56282
    },
    {
      "epoch": 0.00034351806640625,
      "step": 56282,
      "training_step_time": 0.6299388408660889
    },
    {
      "epoch": 0.000343524169921875,
      "model_forward_time": 0.12428879737854004,
      "step": 56283
    },
    {
      "epoch": 0.000343524169921875,
      "step": 56283,
      "training_step_time": 0.6790721416473389
    },
    {
      "epoch": 0.0003435302734375,
      "model_forward_time": 0.11915946006774902,
      "step": 56284
    },
    {
      "epoch": 0.0003435302734375,
      "step": 56284,
      "training_step_time": 0.7333540916442871
    },
    {
      "epoch": 0.000343536376953125,
      "model_forward_time": 0.11728715896606445,
      "step": 56285
    },
    {
      "epoch": 0.000343536376953125,
      "step": 56285,
      "training_step_time": 0.6607935428619385
    },
    {
      "epoch": 0.00034354248046875,
      "model_forward_time": 0.11835956573486328,
      "step": 56286
    },
    {
      "epoch": 0.00034354248046875,
      "step": 56286,
      "training_step_time": 0.6300253868103027
    },
    {
      "epoch": 0.000343548583984375,
      "model_forward_time": 0.12535309791564941,
      "step": 56287
    },
    {
      "epoch": 0.000343548583984375,
      "step": 56287,
      "training_step_time": 0.649014949798584
    },
    {
      "epoch": 0.0003435546875,
      "model_forward_time": 0.11613225936889648,
      "step": 56288
    },
    {
      "epoch": 0.0003435546875,
      "step": 56288,
      "training_step_time": 0.6790175437927246
    },
    {
      "epoch": 0.000343560791015625,
      "model_forward_time": 0.11894559860229492,
      "step": 56289
    },
    {
      "epoch": 0.000343560791015625,
      "step": 56289,
      "training_step_time": 0.7033591270446777
    },
    {
      "epoch": 0.00034356689453125,
      "grad_norm": 0.10234387218952179,
      "learning_rate": 1.0416555169497688e-06,
      "loss": 0.034,
      "step": 56290
    },
    {
      "epoch": 0.00034356689453125,
      "model_forward_time": 0.12104201316833496,
      "step": 56290
    },
    {
      "epoch": 0.00034356689453125,
      "step": 56290,
      "training_step_time": 0.659905195236206
    },
    {
      "epoch": 0.000343572998046875,
      "model_forward_time": 0.12027621269226074,
      "step": 56291
    },
    {
      "epoch": 0.000343572998046875,
      "step": 56291,
      "training_step_time": 0.6734147071838379
    },
    {
      "epoch": 0.0003435791015625,
      "model_forward_time": 0.11752748489379883,
      "step": 56292
    },
    {
      "epoch": 0.0003435791015625,
      "step": 56292,
      "training_step_time": 0.7549121379852295
    },
    {
      "epoch": 0.000343585205078125,
      "model_forward_time": 0.11819720268249512,
      "step": 56293
    },
    {
      "epoch": 0.000343585205078125,
      "step": 56293,
      "training_step_time": 0.7558112144470215
    },
    {
      "epoch": 0.00034359130859375,
      "model_forward_time": 0.11801815032958984,
      "step": 56294
    },
    {
      "epoch": 0.00034359130859375,
      "step": 56294,
      "training_step_time": 0.6742103099822998
    },
    {
      "epoch": 0.000343597412109375,
      "model_forward_time": 0.12025070190429688,
      "step": 56295
    },
    {
      "epoch": 0.000343597412109375,
      "step": 56295,
      "training_step_time": 0.6563034057617188
    },
    {
      "epoch": 0.000343603515625,
      "model_forward_time": 0.12134623527526855,
      "step": 56296
    },
    {
      "epoch": 0.000343603515625,
      "step": 56296,
      "training_step_time": 0.670297384262085
    },
    {
      "epoch": 0.000343609619140625,
      "model_forward_time": 0.11811041831970215,
      "step": 56297
    },
    {
      "epoch": 0.000343609619140625,
      "step": 56297,
      "training_step_time": 0.6382787227630615
    },
    {
      "epoch": 0.00034361572265625,
      "model_forward_time": 0.12203860282897949,
      "step": 56298
    },
    {
      "epoch": 0.00034361572265625,
      "step": 56298,
      "training_step_time": 0.672205924987793
    },
    {
      "epoch": 0.000343621826171875,
      "model_forward_time": 0.1165015697479248,
      "step": 56299
    },
    {
      "epoch": 0.000343621826171875,
      "step": 56299,
      "training_step_time": 0.6909351348876953
    },
    {
      "epoch": 0.0003436279296875,
      "grad_norm": 0.06450933963060379,
      "learning_rate": 1.0360671391491606e-06,
      "loss": 0.0396,
      "step": 56300
    },
    {
      "epoch": 0.0003436279296875,
      "model_forward_time": 0.12099933624267578,
      "step": 56300
    },
    {
      "epoch": 0.0003436279296875,
      "step": 56300,
      "training_step_time": 0.6894781589508057
    },
    {
      "epoch": 0.000343634033203125,
      "model_forward_time": 0.11877751350402832,
      "step": 56301
    },
    {
      "epoch": 0.000343634033203125,
      "step": 56301,
      "training_step_time": 0.618922233581543
    },
    {
      "epoch": 0.00034364013671875,
      "model_forward_time": 0.1170802116394043,
      "step": 56302
    },
    {
      "epoch": 0.00034364013671875,
      "step": 56302,
      "training_step_time": 0.7234187126159668
    },
    {
      "epoch": 0.000343646240234375,
      "model_forward_time": 0.12029242515563965,
      "step": 56303
    },
    {
      "epoch": 0.000343646240234375,
      "step": 56303,
      "training_step_time": 0.6576247215270996
    },
    {
      "epoch": 0.00034365234375,
      "model_forward_time": 0.12358331680297852,
      "step": 56304
    },
    {
      "epoch": 0.00034365234375,
      "step": 56304,
      "training_step_time": 0.5983390808105469
    },
    {
      "epoch": 0.000343658447265625,
      "model_forward_time": 0.12161707878112793,
      "step": 56305
    },
    {
      "epoch": 0.000343658447265625,
      "step": 56305,
      "training_step_time": 0.5613300800323486
    },
    {
      "epoch": 0.00034366455078125,
      "model_forward_time": 0.12797164916992188,
      "step": 56306
    },
    {
      "epoch": 0.00034366455078125,
      "step": 56306,
      "training_step_time": 0.602297306060791
    },
    {
      "epoch": 0.000343670654296875,
      "model_forward_time": 0.11935615539550781,
      "step": 56307
    },
    {
      "epoch": 0.000343670654296875,
      "step": 56307,
      "training_step_time": 0.5484702587127686
    },
    {
      "epoch": 0.0003436767578125,
      "model_forward_time": 0.11826276779174805,
      "step": 56308
    },
    {
      "epoch": 0.0003436767578125,
      "step": 56308,
      "training_step_time": 0.5824260711669922
    },
    {
      "epoch": 0.000343682861328125,
      "model_forward_time": 0.1194908618927002,
      "step": 56309
    },
    {
      "epoch": 0.000343682861328125,
      "step": 56309,
      "training_step_time": 0.5959212779998779
    },
    {
      "epoch": 0.00034368896484375,
      "grad_norm": 0.0953722819685936,
      "learning_rate": 1.030493635298535e-06,
      "loss": 0.0367,
      "step": 56310
    },
    {
      "epoch": 0.00034368896484375,
      "model_forward_time": 0.11848759651184082,
      "step": 56310
    },
    {
      "epoch": 0.00034368896484375,
      "step": 56310,
      "training_step_time": 0.5724456310272217
    },
    {
      "epoch": 0.000343695068359375,
      "model_forward_time": 0.11704730987548828,
      "step": 56311
    },
    {
      "epoch": 0.000343695068359375,
      "step": 56311,
      "training_step_time": 0.5619025230407715
    },
    {
      "epoch": 0.000343701171875,
      "model_forward_time": 0.11730718612670898,
      "step": 56312
    },
    {
      "epoch": 0.000343701171875,
      "step": 56312,
      "training_step_time": 0.5051789283752441
    },
    {
      "epoch": 0.000343707275390625,
      "model_forward_time": 0.1168818473815918,
      "step": 56313
    },
    {
      "epoch": 0.000343707275390625,
      "step": 56313,
      "training_step_time": 0.43973803520202637
    },
    {
      "epoch": 0.00034371337890625,
      "model_forward_time": 0.11594319343566895,
      "step": 56314
    },
    {
      "epoch": 0.00034371337890625,
      "step": 56314,
      "training_step_time": 0.5039010047912598
    },
    {
      "epoch": 0.000343719482421875,
      "model_forward_time": 0.11588096618652344,
      "step": 56315
    },
    {
      "epoch": 0.000343719482421875,
      "step": 56315,
      "training_step_time": 0.41358494758605957
    },
    {
      "epoch": 0.0003437255859375,
      "model_forward_time": 0.11528420448303223,
      "step": 56316
    },
    {
      "epoch": 0.0003437255859375,
      "step": 56316,
      "training_step_time": 0.3955504894256592
    },
    {
      "epoch": 0.000343731689453125,
      "model_forward_time": 0.11498451232910156,
      "step": 56317
    },
    {
      "epoch": 0.000343731689453125,
      "step": 56317,
      "training_step_time": 0.4143381118774414
    },
    {
      "epoch": 0.00034373779296875,
      "model_forward_time": 0.11567068099975586,
      "step": 56318
    },
    {
      "epoch": 0.00034373779296875,
      "step": 56318,
      "training_step_time": 0.40257883071899414
    },
    {
      "epoch": 0.000343743896484375,
      "model_forward_time": 0.11522412300109863,
      "step": 56319
    },
    {
      "epoch": 0.000343743896484375,
      "step": 56319,
      "training_step_time": 0.40139245986938477
    },
    {
      "epoch": 0.00034375,
      "grad_norm": 0.08432119339704514,
      "learning_rate": 1.0249350070909768e-06,
      "loss": 0.0372,
      "step": 56320
    },
    {
      "epoch": 0.00034375,
      "model_forward_time": 0.11430239677429199,
      "step": 56320
    },
    {
      "epoch": 0.00034375,
      "step": 56320,
      "training_step_time": 0.39227986335754395
    },
    {
      "epoch": 0.000343756103515625,
      "model_forward_time": 0.11505722999572754,
      "step": 56321
    },
    {
      "epoch": 0.000343756103515625,
      "step": 56321,
      "training_step_time": 0.396777868270874
    },
    {
      "epoch": 0.00034376220703125,
      "model_forward_time": 0.11622786521911621,
      "step": 56322
    },
    {
      "epoch": 0.00034376220703125,
      "step": 56322,
      "training_step_time": 0.40675806999206543
    },
    {
      "epoch": 0.000343768310546875,
      "model_forward_time": 0.1146845817565918,
      "step": 56323
    },
    {
      "epoch": 0.000343768310546875,
      "step": 56323,
      "training_step_time": 0.488903284072876
    },
    {
      "epoch": 0.0003437744140625,
      "model_forward_time": 0.11523556709289551,
      "step": 56324
    },
    {
      "epoch": 0.0003437744140625,
      "step": 56324,
      "training_step_time": 0.41344475746154785
    },
    {
      "epoch": 0.000343780517578125,
      "model_forward_time": 0.11482787132263184,
      "step": 56325
    },
    {
      "epoch": 0.000343780517578125,
      "step": 56325,
      "training_step_time": 0.41454410552978516
    },
    {
      "epoch": 0.00034378662109375,
      "model_forward_time": 0.11484789848327637,
      "step": 56326
    },
    {
      "epoch": 0.00034378662109375,
      "step": 56326,
      "training_step_time": 0.4446382522583008
    },
    {
      "epoch": 0.000343792724609375,
      "model_forward_time": 0.1154942512512207,
      "step": 56327
    },
    {
      "epoch": 0.000343792724609375,
      "step": 56327,
      "training_step_time": 0.4216947555541992
    },
    {
      "epoch": 0.000343798828125,
      "model_forward_time": 0.11530685424804688,
      "step": 56328
    },
    {
      "epoch": 0.000343798828125,
      "step": 56328,
      "training_step_time": 0.4419572353363037
    },
    {
      "epoch": 0.000343804931640625,
      "model_forward_time": 0.11533522605895996,
      "step": 56329
    },
    {
      "epoch": 0.000343804931640625,
      "step": 56329,
      "training_step_time": 0.46540403366088867
    },
    {
      "epoch": 0.00034381103515625,
      "grad_norm": 0.10447806864976883,
      "learning_rate": 1.0193912562150464e-06,
      "loss": 0.0369,
      "step": 56330
    },
    {
      "epoch": 0.00034381103515625,
      "model_forward_time": 0.11481380462646484,
      "step": 56330
    },
    {
      "epoch": 0.00034381103515625,
      "step": 56330,
      "training_step_time": 0.3970808982849121
    },
    {
      "epoch": 0.000343817138671875,
      "model_forward_time": 0.11519789695739746,
      "step": 56331
    },
    {
      "epoch": 0.000343817138671875,
      "step": 56331,
      "training_step_time": 0.3913121223449707
    },
    {
      "epoch": 0.0003438232421875,
      "model_forward_time": 0.1150202751159668,
      "step": 56332
    },
    {
      "epoch": 0.0003438232421875,
      "step": 56332,
      "training_step_time": 0.3948636054992676
    },
    {
      "epoch": 0.000343829345703125,
      "model_forward_time": 0.11464190483093262,
      "step": 56333
    },
    {
      "epoch": 0.000343829345703125,
      "step": 56333,
      "training_step_time": 0.38692474365234375
    },
    {
      "epoch": 0.00034383544921875,
      "model_forward_time": 0.11513829231262207,
      "step": 56334
    },
    {
      "epoch": 0.00034383544921875,
      "step": 56334,
      "training_step_time": 0.39016103744506836
    },
    {
      "epoch": 0.000343841552734375,
      "model_forward_time": 0.11537837982177734,
      "step": 56335
    },
    {
      "epoch": 0.000343841552734375,
      "step": 56335,
      "training_step_time": 0.3929321765899658
    },
    {
      "epoch": 0.00034384765625,
      "model_forward_time": 0.11513137817382812,
      "step": 56336
    },
    {
      "epoch": 0.00034384765625,
      "step": 56336,
      "training_step_time": 0.39150404930114746
    },
    {
      "epoch": 0.000343853759765625,
      "model_forward_time": 0.11501264572143555,
      "step": 56337
    },
    {
      "epoch": 0.000343853759765625,
      "step": 56337,
      "training_step_time": 0.3945009708404541
    },
    {
      "epoch": 0.00034385986328125,
      "model_forward_time": 0.1159219741821289,
      "step": 56338
    },
    {
      "epoch": 0.00034385986328125,
      "step": 56338,
      "training_step_time": 0.5041472911834717
    },
    {
      "epoch": 0.000343865966796875,
      "model_forward_time": 0.11514878273010254,
      "step": 56339
    },
    {
      "epoch": 0.000343865966796875,
      "step": 56339,
      "training_step_time": 0.4746873378753662
    },
    {
      "epoch": 0.0003438720703125,
      "grad_norm": 0.07285416126251221,
      "learning_rate": 1.0138623843548078e-06,
      "loss": 0.0354,
      "step": 56340
    },
    {
      "epoch": 0.0003438720703125,
      "model_forward_time": 0.1145625114440918,
      "step": 56340
    },
    {
      "epoch": 0.0003438720703125,
      "step": 56340,
      "training_step_time": 0.4597315788269043
    },
    {
      "epoch": 0.000343878173828125,
      "model_forward_time": 0.11534929275512695,
      "step": 56341
    },
    {
      "epoch": 0.000343878173828125,
      "step": 56341,
      "training_step_time": 0.42278003692626953
    },
    {
      "epoch": 0.00034388427734375,
      "model_forward_time": 0.1147315502166748,
      "step": 56342
    },
    {
      "epoch": 0.00034388427734375,
      "step": 56342,
      "training_step_time": 0.48404526710510254
    },
    {
      "epoch": 0.000343890380859375,
      "model_forward_time": 0.11475253105163574,
      "step": 56343
    },
    {
      "epoch": 0.000343890380859375,
      "step": 56343,
      "training_step_time": 0.4670114517211914
    },
    {
      "epoch": 0.000343896484375,
      "model_forward_time": 0.11558151245117188,
      "step": 56344
    },
    {
      "epoch": 0.000343896484375,
      "step": 56344,
      "training_step_time": 0.39317774772644043
    },
    {
      "epoch": 0.000343902587890625,
      "model_forward_time": 0.11519169807434082,
      "step": 56345
    },
    {
      "epoch": 0.000343902587890625,
      "step": 56345,
      "training_step_time": 0.3981659412384033
    },
    {
      "epoch": 0.00034390869140625,
      "model_forward_time": 0.11468338966369629,
      "step": 56346
    },
    {
      "epoch": 0.00034390869140625,
      "step": 56346,
      "training_step_time": 0.4010045528411865
    },
    {
      "epoch": 0.000343914794921875,
      "model_forward_time": 0.11553621292114258,
      "step": 56347
    },
    {
      "epoch": 0.000343914794921875,
      "step": 56347,
      "training_step_time": 0.3964879512786865
    },
    {
      "epoch": 0.0003439208984375,
      "model_forward_time": 0.11543107032775879,
      "step": 56348
    },
    {
      "epoch": 0.0003439208984375,
      "step": 56348,
      "training_step_time": 0.4077596664428711
    },
    {
      "epoch": 0.000343927001953125,
      "model_forward_time": 0.11591124534606934,
      "step": 56349
    },
    {
      "epoch": 0.000343927001953125,
      "step": 56349,
      "training_step_time": 0.411954402923584
    },
    {
      "epoch": 0.00034393310546875,
      "grad_norm": 0.07165487110614777,
      "learning_rate": 1.008348393189762e-06,
      "loss": 0.0382,
      "step": 56350
    },
    {
      "epoch": 0.00034393310546875,
      "model_forward_time": 0.11529898643493652,
      "step": 56350
    },
    {
      "epoch": 0.00034393310546875,
      "step": 56350,
      "training_step_time": 0.3949315547943115
    },
    {
      "epoch": 0.000343939208984375,
      "model_forward_time": 0.1156771183013916,
      "step": 56351
    },
    {
      "epoch": 0.000343939208984375,
      "step": 56351,
      "training_step_time": 0.396686315536499
    },
    {
      "epoch": 0.0003439453125,
      "model_forward_time": 0.11516475677490234,
      "step": 56352
    },
    {
      "epoch": 0.0003439453125,
      "step": 56352,
      "training_step_time": 0.4031682014465332
    },
    {
      "epoch": 0.000343951416015625,
      "model_forward_time": 0.11499261856079102,
      "step": 56353
    },
    {
      "epoch": 0.000343951416015625,
      "step": 56353,
      "training_step_time": 0.4807271957397461
    },
    {
      "epoch": 0.00034395751953125,
      "model_forward_time": 0.11492276191711426,
      "step": 56354
    },
    {
      "epoch": 0.00034395751953125,
      "step": 56354,
      "training_step_time": 0.4333817958831787
    },
    {
      "epoch": 0.000343963623046875,
      "model_forward_time": 0.1153876781463623,
      "step": 56355
    },
    {
      "epoch": 0.000343963623046875,
      "step": 56355,
      "training_step_time": 0.46538448333740234
    },
    {
      "epoch": 0.0003439697265625,
      "model_forward_time": 0.11505293846130371,
      "step": 56356
    },
    {
      "epoch": 0.0003439697265625,
      "step": 56356,
      "training_step_time": 0.3749561309814453
    },
    {
      "epoch": 0.000343975830078125,
      "model_forward_time": 0.11566543579101562,
      "step": 56357
    },
    {
      "epoch": 0.000343975830078125,
      "step": 56357,
      "training_step_time": 0.45012378692626953
    },
    {
      "epoch": 0.00034398193359375,
      "model_forward_time": 0.11509084701538086,
      "step": 56358
    },
    {
      "epoch": 0.00034398193359375,
      "step": 56358,
      "training_step_time": 0.41107797622680664
    },
    {
      "epoch": 0.000343988037109375,
      "model_forward_time": 0.11496496200561523,
      "step": 56359
    },
    {
      "epoch": 0.000343988037109375,
      "step": 56359,
      "training_step_time": 0.38762331008911133
    },
    {
      "epoch": 0.000343994140625,
      "grad_norm": 0.08601225167512894,
      "learning_rate": 1.00284928439493e-06,
      "loss": 0.0368,
      "step": 56360
    },
    {
      "epoch": 0.000343994140625,
      "model_forward_time": 0.1150827407836914,
      "step": 56360
    },
    {
      "epoch": 0.000343994140625,
      "step": 56360,
      "training_step_time": 0.3992934226989746
    },
    {
      "epoch": 0.000344000244140625,
      "model_forward_time": 0.11480903625488281,
      "step": 56361
    },
    {
      "epoch": 0.000344000244140625,
      "step": 56361,
      "training_step_time": 0.4389674663543701
    },
    {
      "epoch": 0.00034400634765625,
      "model_forward_time": 0.11530566215515137,
      "step": 56362
    },
    {
      "epoch": 0.00034400634765625,
      "step": 56362,
      "training_step_time": 0.39594244956970215
    },
    {
      "epoch": 0.000344012451171875,
      "model_forward_time": 0.1148231029510498,
      "step": 56363
    },
    {
      "epoch": 0.000344012451171875,
      "step": 56363,
      "training_step_time": 0.40346479415893555
    },
    {
      "epoch": 0.0003440185546875,
      "model_forward_time": 0.11495566368103027,
      "step": 56364
    },
    {
      "epoch": 0.0003440185546875,
      "step": 56364,
      "training_step_time": 0.41602253913879395
    },
    {
      "epoch": 0.000344024658203125,
      "model_forward_time": 0.11471819877624512,
      "step": 56365
    },
    {
      "epoch": 0.000344024658203125,
      "step": 56365,
      "training_step_time": 0.3946866989135742
    },
    {
      "epoch": 0.00034403076171875,
      "model_forward_time": 0.1145787239074707,
      "step": 56366
    },
    {
      "epoch": 0.00034403076171875,
      "step": 56366,
      "training_step_time": 0.3966648578643799
    },
    {
      "epoch": 0.000344036865234375,
      "model_forward_time": 0.11616921424865723,
      "step": 56367
    },
    {
      "epoch": 0.000344036865234375,
      "step": 56367,
      "training_step_time": 0.49500274658203125
    },
    {
      "epoch": 0.00034404296875,
      "model_forward_time": 0.11523866653442383,
      "step": 56368
    },
    {
      "epoch": 0.00034404296875,
      "step": 56368,
      "training_step_time": 0.42166900634765625
    },
    {
      "epoch": 0.000344049072265625,
      "model_forward_time": 0.11507797241210938,
      "step": 56369
    },
    {
      "epoch": 0.000344049072265625,
      "step": 56369,
      "training_step_time": 0.4884834289550781
    },
    {
      "epoch": 0.00034405517578125,
      "grad_norm": 0.08931553363800049,
      "learning_rate": 9.97365059640787e-07,
      "loss": 0.0354,
      "step": 56370
    },
    {
      "epoch": 0.00034405517578125,
      "model_forward_time": 0.1155233383178711,
      "step": 56370
    },
    {
      "epoch": 0.00034405517578125,
      "step": 56370,
      "training_step_time": 0.3996467590332031
    },
    {
      "epoch": 0.000344061279296875,
      "model_forward_time": 0.11413431167602539,
      "step": 56371
    },
    {
      "epoch": 0.000344061279296875,
      "step": 56371,
      "training_step_time": 0.49468088150024414
    },
    {
      "epoch": 0.0003440673828125,
      "model_forward_time": 0.11509180068969727,
      "step": 56372
    },
    {
      "epoch": 0.0003440673828125,
      "step": 56372,
      "training_step_time": 0.4529275894165039
    },
    {
      "epoch": 0.000344073486328125,
      "model_forward_time": 0.11491894721984863,
      "step": 56373
    },
    {
      "epoch": 0.000344073486328125,
      "step": 56373,
      "training_step_time": 0.38829803466796875
    },
    {
      "epoch": 0.00034407958984375,
      "model_forward_time": 0.1148381233215332,
      "step": 56374
    },
    {
      "epoch": 0.00034407958984375,
      "step": 56374,
      "training_step_time": 0.39635467529296875
    },
    {
      "epoch": 0.000344085693359375,
      "model_forward_time": 0.11568474769592285,
      "step": 56375
    },
    {
      "epoch": 0.000344085693359375,
      "step": 56375,
      "training_step_time": 0.3907337188720703
    },
    {
      "epoch": 0.000344091796875,
      "model_forward_time": 0.11499762535095215,
      "step": 56376
    },
    {
      "epoch": 0.000344091796875,
      "step": 56376,
      "training_step_time": 0.3968484401702881
    },
    {
      "epoch": 0.000344097900390625,
      "model_forward_time": 0.11450529098510742,
      "step": 56377
    },
    {
      "epoch": 0.000344097900390625,
      "step": 56377,
      "training_step_time": 0.4034237861633301
    },
    {
      "epoch": 0.00034410400390625,
      "model_forward_time": 0.11514449119567871,
      "step": 56378
    },
    {
      "epoch": 0.00034410400390625,
      "step": 56378,
      "training_step_time": 0.40793800354003906
    },
    {
      "epoch": 0.000344110107421875,
      "model_forward_time": 0.1153252124786377,
      "step": 56379
    },
    {
      "epoch": 0.000344110107421875,
      "step": 56379,
      "training_step_time": 0.3932492733001709
    },
    {
      "epoch": 0.0003441162109375,
      "grad_norm": 0.13914097845554352,
      "learning_rate": 9.918957205933e-07,
      "loss": 0.0366,
      "step": 56380
    },
    {
      "epoch": 0.0003441162109375,
      "model_forward_time": 0.1147775650024414,
      "step": 56380
    },
    {
      "epoch": 0.0003441162109375,
      "step": 56380,
      "training_step_time": 0.3997361660003662
    },
    {
      "epoch": 0.000344122314453125,
      "model_forward_time": 0.11522912979125977,
      "step": 56381
    },
    {
      "epoch": 0.000344122314453125,
      "step": 56381,
      "training_step_time": 0.41435837745666504
    },
    {
      "epoch": 0.00034412841796875,
      "model_forward_time": 0.11551713943481445,
      "step": 56382
    },
    {
      "epoch": 0.00034412841796875,
      "step": 56382,
      "training_step_time": 0.437758207321167
    },
    {
      "epoch": 0.000344134521484375,
      "model_forward_time": 0.11592459678649902,
      "step": 56383
    },
    {
      "epoch": 0.000344134521484375,
      "step": 56383,
      "training_step_time": 0.43369054794311523
    },
    {
      "epoch": 0.000344140625,
      "model_forward_time": 0.11478281021118164,
      "step": 56384
    },
    {
      "epoch": 0.000344140625,
      "step": 56384,
      "training_step_time": 0.4047513008117676
    },
    {
      "epoch": 0.000344146728515625,
      "model_forward_time": 0.1150665283203125,
      "step": 56385
    },
    {
      "epoch": 0.000344146728515625,
      "step": 56385,
      "training_step_time": 0.417818546295166
    },
    {
      "epoch": 0.00034415283203125,
      "model_forward_time": 0.11426973342895508,
      "step": 56386
    },
    {
      "epoch": 0.00034415283203125,
      "step": 56386,
      "training_step_time": 0.41875696182250977
    },
    {
      "epoch": 0.000344158935546875,
      "model_forward_time": 0.11526823043823242,
      "step": 56387
    },
    {
      "epoch": 0.000344158935546875,
      "step": 56387,
      "training_step_time": 0.41531825065612793
    },
    {
      "epoch": 0.0003441650390625,
      "model_forward_time": 0.11492657661437988,
      "step": 56388
    },
    {
      "epoch": 0.0003441650390625,
      "step": 56388,
      "training_step_time": 0.3806321620941162
    },
    {
      "epoch": 0.000344171142578125,
      "model_forward_time": 0.11620926856994629,
      "step": 56389
    },
    {
      "epoch": 0.000344171142578125,
      "step": 56389,
      "training_step_time": 0.40079236030578613
    },
    {
      "epoch": 0.00034417724609375,
      "grad_norm": 0.07329167425632477,
      "learning_rate": 9.864412689139123e-07,
      "loss": 0.0338,
      "step": 56390
    },
    {
      "epoch": 0.00034417724609375,
      "model_forward_time": 0.11473846435546875,
      "step": 56390
    },
    {
      "epoch": 0.00034417724609375,
      "step": 56390,
      "training_step_time": 0.4022862911224365
    },
    {
      "epoch": 0.000344183349609375,
      "model_forward_time": 0.11522889137268066,
      "step": 56391
    },
    {
      "epoch": 0.000344183349609375,
      "step": 56391,
      "training_step_time": 0.4051475524902344
    },
    {
      "epoch": 0.000344189453125,
      "model_forward_time": 0.11521768569946289,
      "step": 56392
    },
    {
      "epoch": 0.000344189453125,
      "step": 56392,
      "training_step_time": 0.40381503105163574
    },
    {
      "epoch": 0.000344195556640625,
      "model_forward_time": 0.11578106880187988,
      "step": 56393
    },
    {
      "epoch": 0.000344195556640625,
      "step": 56393,
      "training_step_time": 0.394855260848999
    },
    {
      "epoch": 0.00034420166015625,
      "model_forward_time": 0.11529731750488281,
      "step": 56394
    },
    {
      "epoch": 0.00034420166015625,
      "step": 56394,
      "training_step_time": 0.39701056480407715
    },
    {
      "epoch": 0.000344207763671875,
      "model_forward_time": 0.1148078441619873,
      "step": 56395
    },
    {
      "epoch": 0.000344207763671875,
      "step": 56395,
      "training_step_time": 0.3954193592071533
    },
    {
      "epoch": 0.0003442138671875,
      "model_forward_time": 0.11503028869628906,
      "step": 56396
    },
    {
      "epoch": 0.0003442138671875,
      "step": 56396,
      "training_step_time": 0.4162178039550781
    },
    {
      "epoch": 0.000344219970703125,
      "model_forward_time": 0.11497163772583008,
      "step": 56397
    },
    {
      "epoch": 0.000344219970703125,
      "step": 56397,
      "training_step_time": 0.3923978805541992
    },
    {
      "epoch": 0.00034422607421875,
      "model_forward_time": 0.11529541015625,
      "step": 56398
    },
    {
      "epoch": 0.00034422607421875,
      "step": 56398,
      "training_step_time": 0.449493408203125
    },
    {
      "epoch": 0.000344232177734375,
      "model_forward_time": 0.11493229866027832,
      "step": 56399
    },
    {
      "epoch": 0.000344232177734375,
      "step": 56399,
      "training_step_time": 0.39282798767089844
    },
    {
      "epoch": 0.00034423828125,
      "grad_norm": 0.08085841685533524,
      "learning_rate": 9.810017062595322e-07,
      "loss": 0.0315,
      "step": 56400
    },
    {
      "epoch": 0.00034423828125,
      "model_forward_time": 0.11583781242370605,
      "step": 56400
    },
    {
      "epoch": 0.00034423828125,
      "step": 56400,
      "training_step_time": 0.45460987091064453
    },
    {
      "epoch": 0.000344244384765625,
      "model_forward_time": 0.11511993408203125,
      "step": 56401
    },
    {
      "epoch": 0.000344244384765625,
      "step": 56401,
      "training_step_time": 0.4756472110748291
    },
    {
      "epoch": 0.00034425048828125,
      "model_forward_time": 0.11519432067871094,
      "step": 56402
    },
    {
      "epoch": 0.00034425048828125,
      "step": 56402,
      "training_step_time": 0.4451158046722412
    },
    {
      "epoch": 0.000344256591796875,
      "model_forward_time": 0.11469268798828125,
      "step": 56403
    },
    {
      "epoch": 0.000344256591796875,
      "step": 56403,
      "training_step_time": 0.395613431930542
    },
    {
      "epoch": 0.0003442626953125,
      "model_forward_time": 0.11491060256958008,
      "step": 56404
    },
    {
      "epoch": 0.0003442626953125,
      "step": 56404,
      "training_step_time": 0.39136672019958496
    },
    {
      "epoch": 0.000344268798828125,
      "model_forward_time": 0.11478972434997559,
      "step": 56405
    },
    {
      "epoch": 0.000344268798828125,
      "step": 56405,
      "training_step_time": 0.39650750160217285
    },
    {
      "epoch": 0.00034427490234375,
      "model_forward_time": 0.11514568328857422,
      "step": 56406
    },
    {
      "epoch": 0.00034427490234375,
      "step": 56406,
      "training_step_time": 0.555452823638916
    },
    {
      "epoch": 0.000344281005859375,
      "model_forward_time": 0.11460208892822266,
      "step": 56407
    },
    {
      "epoch": 0.000344281005859375,
      "step": 56407,
      "training_step_time": 0.3928194046020508
    },
    {
      "epoch": 0.000344287109375,
      "model_forward_time": 0.11559176445007324,
      "step": 56408
    },
    {
      "epoch": 0.000344287109375,
      "step": 56408,
      "training_step_time": 0.39251708984375
    },
    {
      "epoch": 0.000344293212890625,
      "model_forward_time": 0.11497879028320312,
      "step": 56409
    },
    {
      "epoch": 0.000344293212890625,
      "step": 56409,
      "training_step_time": 0.43309688568115234
    },
    {
      "epoch": 0.00034429931640625,
      "grad_norm": 0.09761805087327957,
      "learning_rate": 9.75577034282571e-07,
      "loss": 0.035,
      "step": 56410
    },
    {
      "epoch": 0.00034429931640625,
      "model_forward_time": 0.11492753028869629,
      "step": 56410
    },
    {
      "epoch": 0.00034429931640625,
      "step": 56410,
      "training_step_time": 0.4472010135650635
    },
    {
      "epoch": 0.000344305419921875,
      "model_forward_time": 0.11490559577941895,
      "step": 56411
    },
    {
      "epoch": 0.000344305419921875,
      "step": 56411,
      "training_step_time": 0.39933347702026367
    },
    {
      "epoch": 0.0003443115234375,
      "model_forward_time": 0.11486005783081055,
      "step": 56412
    },
    {
      "epoch": 0.0003443115234375,
      "step": 56412,
      "training_step_time": 0.7427661418914795
    },
    {
      "epoch": 0.000344317626953125,
      "model_forward_time": 0.11428403854370117,
      "step": 56413
    },
    {
      "epoch": 0.000344317626953125,
      "step": 56413,
      "training_step_time": 0.3792891502380371
    },
    {
      "epoch": 0.00034432373046875,
      "model_forward_time": 0.11435317993164062,
      "step": 56414
    },
    {
      "epoch": 0.00034432373046875,
      "step": 56414,
      "training_step_time": 0.49416232109069824
    },
    {
      "epoch": 0.000344329833984375,
      "model_forward_time": 0.11421489715576172,
      "step": 56415
    },
    {
      "epoch": 0.000344329833984375,
      "step": 56415,
      "training_step_time": 0.4848673343658447
    },
    {
      "epoch": 0.0003443359375,
      "model_forward_time": 0.11426687240600586,
      "step": 56416
    },
    {
      "epoch": 0.0003443359375,
      "step": 56416,
      "training_step_time": 0.3822755813598633
    },
    {
      "epoch": 0.000344342041015625,
      "model_forward_time": 0.11448287963867188,
      "step": 56417
    },
    {
      "epoch": 0.000344342041015625,
      "step": 56417,
      "training_step_time": 0.38193559646606445
    },
    {
      "epoch": 0.00034434814453125,
      "model_forward_time": 0.11481475830078125,
      "step": 56418
    },
    {
      "epoch": 0.00034434814453125,
      "step": 56418,
      "training_step_time": 0.3993821144104004
    },
    {
      "epoch": 0.000344354248046875,
      "model_forward_time": 0.1149909496307373,
      "step": 56419
    },
    {
      "epoch": 0.000344354248046875,
      "step": 56419,
      "training_step_time": 0.38633108139038086
    },
    {
      "epoch": 0.0003443603515625,
      "grad_norm": 0.08979687839746475,
      "learning_rate": 9.701672546308827e-07,
      "loss": 0.0352,
      "step": 56420
    },
    {
      "epoch": 0.0003443603515625,
      "model_forward_time": 0.11492490768432617,
      "step": 56420
    },
    {
      "epoch": 0.0003443603515625,
      "step": 56420,
      "training_step_time": 0.3935081958770752
    },
    {
      "epoch": 0.000344366455078125,
      "model_forward_time": 0.11461162567138672,
      "step": 56421
    },
    {
      "epoch": 0.000344366455078125,
      "step": 56421,
      "training_step_time": 0.3909034729003906
    },
    {
      "epoch": 0.00034437255859375,
      "model_forward_time": 0.11559534072875977,
      "step": 56422
    },
    {
      "epoch": 0.00034437255859375,
      "step": 56422,
      "training_step_time": 0.4133632183074951
    },
    {
      "epoch": 0.000344378662109375,
      "model_forward_time": 0.11503744125366211,
      "step": 56423
    },
    {
      "epoch": 0.000344378662109375,
      "step": 56423,
      "training_step_time": 0.39546871185302734
    },
    {
      "epoch": 0.000344384765625,
      "model_forward_time": 0.11575508117675781,
      "step": 56424
    },
    {
      "epoch": 0.000344384765625,
      "step": 56424,
      "training_step_time": 0.6715555191040039
    },
    {
      "epoch": 0.000344390869140625,
      "model_forward_time": 0.11511659622192383,
      "step": 56425
    },
    {
      "epoch": 0.000344390869140625,
      "step": 56425,
      "training_step_time": 0.39741039276123047
    },
    {
      "epoch": 0.00034439697265625,
      "model_forward_time": 0.11490154266357422,
      "step": 56426
    },
    {
      "epoch": 0.00034439697265625,
      "step": 56426,
      "training_step_time": 0.39583826065063477
    },
    {
      "epoch": 0.000344403076171875,
      "model_forward_time": 0.11474084854125977,
      "step": 56427
    },
    {
      "epoch": 0.000344403076171875,
      "step": 56427,
      "training_step_time": 0.41835761070251465
    },
    {
      "epoch": 0.0003444091796875,
      "model_forward_time": 0.11504769325256348,
      "step": 56428
    },
    {
      "epoch": 0.0003444091796875,
      "step": 56428,
      "training_step_time": 0.3985564708709717
    },
    {
      "epoch": 0.000344415283203125,
      "model_forward_time": 0.1150197982788086,
      "step": 56429
    },
    {
      "epoch": 0.000344415283203125,
      "step": 56429,
      "training_step_time": 0.48935890197753906
    },
    {
      "epoch": 0.00034442138671875,
      "grad_norm": 0.09866763651371002,
      "learning_rate": 9.647723689478305e-07,
      "loss": 0.036,
      "step": 56430
    },
    {
      "epoch": 0.00034442138671875,
      "model_forward_time": 0.11507058143615723,
      "step": 56430
    },
    {
      "epoch": 0.00034442138671875,
      "step": 56430,
      "training_step_time": 0.568650484085083
    },
    {
      "epoch": 0.000344427490234375,
      "model_forward_time": 0.11480331420898438,
      "step": 56431
    },
    {
      "epoch": 0.000344427490234375,
      "step": 56431,
      "training_step_time": 0.3966689109802246
    },
    {
      "epoch": 0.00034443359375,
      "model_forward_time": 0.11450648307800293,
      "step": 56432
    },
    {
      "epoch": 0.00034443359375,
      "step": 56432,
      "training_step_time": 0.3903543949127197
    },
    {
      "epoch": 0.000344439697265625,
      "model_forward_time": 0.11493515968322754,
      "step": 56433
    },
    {
      "epoch": 0.000344439697265625,
      "step": 56433,
      "training_step_time": 0.41136789321899414
    },
    {
      "epoch": 0.00034444580078125,
      "model_forward_time": 0.1147773265838623,
      "step": 56434
    },
    {
      "epoch": 0.00034444580078125,
      "step": 56434,
      "training_step_time": 0.392042875289917
    },
    {
      "epoch": 0.000344451904296875,
      "model_forward_time": 0.11476731300354004,
      "step": 56435
    },
    {
      "epoch": 0.000344451904296875,
      "step": 56435,
      "training_step_time": 0.4028792381286621
    },
    {
      "epoch": 0.0003444580078125,
      "model_forward_time": 0.11543965339660645,
      "step": 56436
    },
    {
      "epoch": 0.0003444580078125,
      "step": 56436,
      "training_step_time": 0.710885763168335
    },
    {
      "epoch": 0.000344464111328125,
      "model_forward_time": 0.11426258087158203,
      "step": 56437
    },
    {
      "epoch": 0.000344464111328125,
      "step": 56437,
      "training_step_time": 0.3941638469696045
    },
    {
      "epoch": 0.00034447021484375,
      "model_forward_time": 0.11458587646484375,
      "step": 56438
    },
    {
      "epoch": 0.00034447021484375,
      "step": 56438,
      "training_step_time": 0.4746384620666504
    },
    {
      "epoch": 0.000344476318359375,
      "model_forward_time": 0.11416363716125488,
      "step": 56439
    },
    {
      "epoch": 0.000344476318359375,
      "step": 56439,
      "training_step_time": 0.3949863910675049
    },
    {
      "epoch": 0.000344482421875,
      "grad_norm": 0.12016908824443817,
      "learning_rate": 9.593923788722315e-07,
      "loss": 0.0317,
      "step": 56440
    },
    {
      "epoch": 0.000344482421875,
      "model_forward_time": 0.11478900909423828,
      "step": 56440
    },
    {
      "epoch": 0.000344482421875,
      "step": 56440,
      "training_step_time": 0.445192813873291
    },
    {
      "epoch": 0.000344488525390625,
      "model_forward_time": 0.11440587043762207,
      "step": 56441
    },
    {
      "epoch": 0.000344488525390625,
      "step": 56441,
      "training_step_time": 0.49861693382263184
    },
    {
      "epoch": 0.00034449462890625,
      "model_forward_time": 0.11473226547241211,
      "step": 56442
    },
    {
      "epoch": 0.00034449462890625,
      "step": 56442,
      "training_step_time": 0.44243359565734863
    },
    {
      "epoch": 0.000344500732421875,
      "model_forward_time": 0.11484289169311523,
      "step": 56443
    },
    {
      "epoch": 0.000344500732421875,
      "step": 56443,
      "training_step_time": 0.4761362075805664
    },
    {
      "epoch": 0.0003445068359375,
      "model_forward_time": 0.11412787437438965,
      "step": 56444
    },
    {
      "epoch": 0.0003445068359375,
      "step": 56444,
      "training_step_time": 0.39818882942199707
    },
    {
      "epoch": 0.000344512939453125,
      "model_forward_time": 0.11464166641235352,
      "step": 56445
    },
    {
      "epoch": 0.000344512939453125,
      "step": 56445,
      "training_step_time": 0.383897066116333
    },
    {
      "epoch": 0.00034451904296875,
      "model_forward_time": 0.11437368392944336,
      "step": 56446
    },
    {
      "epoch": 0.00034451904296875,
      "step": 56446,
      "training_step_time": 0.39902591705322266
    },
    {
      "epoch": 0.000344525146484375,
      "model_forward_time": 0.1149909496307373,
      "step": 56447
    },
    {
      "epoch": 0.000344525146484375,
      "step": 56447,
      "training_step_time": 0.39610958099365234
    },
    {
      "epoch": 0.00034453125,
      "model_forward_time": 0.11457228660583496,
      "step": 56448
    },
    {
      "epoch": 0.00034453125,
      "step": 56448,
      "training_step_time": 0.4866163730621338
    },
    {
      "epoch": 0.000344537353515625,
      "model_forward_time": 0.11499309539794922,
      "step": 56449
    },
    {
      "epoch": 0.000344537353515625,
      "step": 56449,
      "training_step_time": 0.43543100357055664
    },
    {
      "epoch": 0.00034454345703125,
      "grad_norm": 0.0932324156165123,
      "learning_rate": 9.540272860383837e-07,
      "loss": 0.0335,
      "step": 56450
    },
    {
      "epoch": 0.00034454345703125,
      "model_forward_time": 0.11529207229614258,
      "step": 56450
    },
    {
      "epoch": 0.00034454345703125,
      "step": 56450,
      "training_step_time": 0.40472865104675293
    },
    {
      "epoch": 0.000344549560546875,
      "model_forward_time": 0.11472058296203613,
      "step": 56451
    },
    {
      "epoch": 0.000344549560546875,
      "step": 56451,
      "training_step_time": 0.397247314453125
    },
    {
      "epoch": 0.0003445556640625,
      "model_forward_time": 0.11703944206237793,
      "step": 56452
    },
    {
      "epoch": 0.0003445556640625,
      "step": 56452,
      "training_step_time": 0.45697951316833496
    },
    {
      "epoch": 0.000344561767578125,
      "model_forward_time": 0.11442804336547852,
      "step": 56453
    },
    {
      "epoch": 0.000344561767578125,
      "step": 56453,
      "training_step_time": 0.39487218856811523
    },
    {
      "epoch": 0.00034456787109375,
      "model_forward_time": 0.1148977279663086,
      "step": 56454
    },
    {
      "epoch": 0.00034456787109375,
      "step": 56454,
      "training_step_time": 0.5375995635986328
    },
    {
      "epoch": 0.000344573974609375,
      "model_forward_time": 0.11481380462646484,
      "step": 56455
    },
    {
      "epoch": 0.000344573974609375,
      "step": 56455,
      "training_step_time": 0.43762707710266113
    },
    {
      "epoch": 0.000344580078125,
      "model_forward_time": 0.11460494995117188,
      "step": 56456
    },
    {
      "epoch": 0.000344580078125,
      "step": 56456,
      "training_step_time": 0.4533565044403076
    },
    {
      "epoch": 0.000344586181640625,
      "model_forward_time": 0.11515283584594727,
      "step": 56457
    },
    {
      "epoch": 0.000344586181640625,
      "step": 56457,
      "training_step_time": 0.49214696884155273
    },
    {
      "epoch": 0.00034459228515625,
      "model_forward_time": 0.1147925853729248,
      "step": 56458
    },
    {
      "epoch": 0.00034459228515625,
      "step": 56458,
      "training_step_time": 0.4047520160675049
    },
    {
      "epoch": 0.000344598388671875,
      "model_forward_time": 0.11461400985717773,
      "step": 56459
    },
    {
      "epoch": 0.000344598388671875,
      "step": 56459,
      "training_step_time": 0.39800214767456055
    },
    {
      "epoch": 0.0003446044921875,
      "grad_norm": 0.07236398756504059,
      "learning_rate": 9.486770920760668e-07,
      "loss": 0.0331,
      "step": 56460
    },
    {
      "epoch": 0.0003446044921875,
      "model_forward_time": 0.11458349227905273,
      "step": 56460
    },
    {
      "epoch": 0.0003446044921875,
      "step": 56460,
      "training_step_time": 0.4238615036010742
    },
    {
      "epoch": 0.000344610595703125,
      "model_forward_time": 0.11512613296508789,
      "step": 56461
    },
    {
      "epoch": 0.000344610595703125,
      "step": 56461,
      "training_step_time": 0.3963639736175537
    },
    {
      "epoch": 0.00034461669921875,
      "model_forward_time": 0.1150217056274414,
      "step": 56462
    },
    {
      "epoch": 0.00034461669921875,
      "step": 56462,
      "training_step_time": 0.4264512062072754
    },
    {
      "epoch": 0.000344622802734375,
      "model_forward_time": 0.11463618278503418,
      "step": 56463
    },
    {
      "epoch": 0.000344622802734375,
      "step": 56463,
      "training_step_time": 0.40010929107666016
    },
    {
      "epoch": 0.00034462890625,
      "model_forward_time": 0.11603617668151855,
      "step": 56464
    },
    {
      "epoch": 0.00034462890625,
      "step": 56464,
      "training_step_time": 0.40447092056274414
    },
    {
      "epoch": 0.000344635009765625,
      "model_forward_time": 0.11587667465209961,
      "step": 56465
    },
    {
      "epoch": 0.000344635009765625,
      "step": 56465,
      "training_step_time": 0.4018571376800537
    },
    {
      "epoch": 0.00034464111328125,
      "model_forward_time": 0.11482644081115723,
      "step": 56466
    },
    {
      "epoch": 0.00034464111328125,
      "step": 56466,
      "training_step_time": 0.6540684700012207
    },
    {
      "epoch": 0.000344647216796875,
      "model_forward_time": 0.11501288414001465,
      "step": 56467
    },
    {
      "epoch": 0.000344647216796875,
      "step": 56467,
      "training_step_time": 0.4050264358520508
    },
    {
      "epoch": 0.0003446533203125,
      "model_forward_time": 0.11426711082458496,
      "step": 56468
    },
    {
      "epoch": 0.0003446533203125,
      "step": 56468,
      "training_step_time": 0.4142594337463379
    },
    {
      "epoch": 0.000344659423828125,
      "model_forward_time": 0.1146237850189209,
      "step": 56469
    },
    {
      "epoch": 0.000344659423828125,
      "step": 56469,
      "training_step_time": 0.3936147689819336
    },
    {
      "epoch": 0.00034466552734375,
      "grad_norm": 0.074351966381073,
      "learning_rate": 9.433417986105198e-07,
      "loss": 0.0329,
      "step": 56470
    },
    {
      "epoch": 0.00034466552734375,
      "model_forward_time": 0.11502194404602051,
      "step": 56470
    },
    {
      "epoch": 0.00034466552734375,
      "step": 56470,
      "training_step_time": 0.37924718856811523
    },
    {
      "epoch": 0.000344671630859375,
      "model_forward_time": 0.11429333686828613,
      "step": 56471
    },
    {
      "epoch": 0.000344671630859375,
      "step": 56471,
      "training_step_time": 0.48602819442749023
    },
    {
      "epoch": 0.000344677734375,
      "model_forward_time": 0.11555290222167969,
      "step": 56472
    },
    {
      "epoch": 0.000344677734375,
      "step": 56472,
      "training_step_time": 0.4725074768066406
    },
    {
      "epoch": 0.000344683837890625,
      "model_forward_time": 0.11445927619934082,
      "step": 56473
    },
    {
      "epoch": 0.000344683837890625,
      "step": 56473,
      "training_step_time": 0.39438891410827637
    },
    {
      "epoch": 0.00034468994140625,
      "model_forward_time": 0.11503243446350098,
      "step": 56474
    },
    {
      "epoch": 0.00034468994140625,
      "step": 56474,
      "training_step_time": 0.38997626304626465
    },
    {
      "epoch": 0.000344696044921875,
      "model_forward_time": 0.11532711982727051,
      "step": 56475
    },
    {
      "epoch": 0.000344696044921875,
      "step": 56475,
      "training_step_time": 0.40366363525390625
    },
    {
      "epoch": 0.0003447021484375,
      "model_forward_time": 0.11503839492797852,
      "step": 56476
    },
    {
      "epoch": 0.0003447021484375,
      "step": 56476,
      "training_step_time": 0.39902424812316895
    },
    {
      "epoch": 0.000344708251953125,
      "model_forward_time": 0.11551141738891602,
      "step": 56477
    },
    {
      "epoch": 0.000344708251953125,
      "step": 56477,
      "training_step_time": 0.39865565299987793
    },
    {
      "epoch": 0.00034471435546875,
      "model_forward_time": 0.11533880233764648,
      "step": 56478
    },
    {
      "epoch": 0.00034471435546875,
      "step": 56478,
      "training_step_time": 0.6076247692108154
    },
    {
      "epoch": 0.000344720458984375,
      "model_forward_time": 0.11528658866882324,
      "step": 56479
    },
    {
      "epoch": 0.000344720458984375,
      "step": 56479,
      "training_step_time": 0.4200704097747803
    },
    {
      "epoch": 0.0003447265625,
      "grad_norm": 0.08046532422304153,
      "learning_rate": 9.380214072624682e-07,
      "loss": 0.0376,
      "step": 56480
    },
    {
      "epoch": 0.0003447265625,
      "model_forward_time": 0.11508965492248535,
      "step": 56480
    },
    {
      "epoch": 0.0003447265625,
      "step": 56480,
      "training_step_time": 0.408740758895874
    },
    {
      "epoch": 0.000344732666015625,
      "model_forward_time": 0.11449575424194336,
      "step": 56481
    },
    {
      "epoch": 0.000344732666015625,
      "step": 56481,
      "training_step_time": 0.41248154640197754
    },
    {
      "epoch": 0.00034473876953125,
      "model_forward_time": 0.11491680145263672,
      "step": 56482
    },
    {
      "epoch": 0.00034473876953125,
      "step": 56482,
      "training_step_time": 0.3999457359313965
    },
    {
      "epoch": 0.000344744873046875,
      "model_forward_time": 0.11486148834228516,
      "step": 56483
    },
    {
      "epoch": 0.000344744873046875,
      "step": 56483,
      "training_step_time": 0.3978390693664551
    },
    {
      "epoch": 0.0003447509765625,
      "model_forward_time": 0.11522173881530762,
      "step": 56484
    },
    {
      "epoch": 0.0003447509765625,
      "step": 56484,
      "training_step_time": 0.6395382881164551
    },
    {
      "epoch": 0.000344757080078125,
      "model_forward_time": 0.11535477638244629,
      "step": 56485
    },
    {
      "epoch": 0.000344757080078125,
      "step": 56485,
      "training_step_time": 0.5049653053283691
    },
    {
      "epoch": 0.00034476318359375,
      "model_forward_time": 0.1151113510131836,
      "step": 56486
    },
    {
      "epoch": 0.00034476318359375,
      "step": 56486,
      "training_step_time": 0.44099879264831543
    },
    {
      "epoch": 0.000344769287109375,
      "model_forward_time": 0.1149601936340332,
      "step": 56487
    },
    {
      "epoch": 0.000344769287109375,
      "step": 56487,
      "training_step_time": 0.39413952827453613
    },
    {
      "epoch": 0.000344775390625,
      "model_forward_time": 0.11442923545837402,
      "step": 56488
    },
    {
      "epoch": 0.000344775390625,
      "step": 56488,
      "training_step_time": 0.4145524501800537
    },
    {
      "epoch": 0.000344781494140625,
      "model_forward_time": 0.11430740356445312,
      "step": 56489
    },
    {
      "epoch": 0.000344781494140625,
      "step": 56489,
      "training_step_time": 0.3994476795196533
    },
    {
      "epoch": 0.00034478759765625,
      "grad_norm": 0.08446195721626282,
      "learning_rate": 9.327159196481138e-07,
      "loss": 0.0388,
      "step": 56490
    },
    {
      "epoch": 0.00034478759765625,
      "model_forward_time": 0.11406064033508301,
      "step": 56490
    },
    {
      "epoch": 0.00034478759765625,
      "step": 56490,
      "training_step_time": 0.466139554977417
    },
    {
      "epoch": 0.000344793701171875,
      "model_forward_time": 0.1165158748626709,
      "step": 56491
    },
    {
      "epoch": 0.000344793701171875,
      "step": 56491,
      "training_step_time": 0.41135334968566895
    },
    {
      "epoch": 0.0003447998046875,
      "model_forward_time": 0.11483430862426758,
      "step": 56492
    },
    {
      "epoch": 0.0003447998046875,
      "step": 56492,
      "training_step_time": 0.3959474563598633
    },
    {
      "epoch": 0.000344805908203125,
      "model_forward_time": 0.1146857738494873,
      "step": 56493
    },
    {
      "epoch": 0.000344805908203125,
      "step": 56493,
      "training_step_time": 0.4701721668243408
    },
    {
      "epoch": 0.00034481201171875,
      "model_forward_time": 0.11517667770385742,
      "step": 56494
    },
    {
      "epoch": 0.00034481201171875,
      "step": 56494,
      "training_step_time": 0.4236109256744385
    },
    {
      "epoch": 0.000344818115234375,
      "model_forward_time": 0.11515617370605469,
      "step": 56495
    },
    {
      "epoch": 0.000344818115234375,
      "step": 56495,
      "training_step_time": 0.4212460517883301
    },
    {
      "epoch": 0.00034482421875,
      "model_forward_time": 0.11477208137512207,
      "step": 56496
    },
    {
      "epoch": 0.00034482421875,
      "step": 56496,
      "training_step_time": 0.5348720550537109
    },
    {
      "epoch": 0.000344830322265625,
      "model_forward_time": 0.11497306823730469,
      "step": 56497
    },
    {
      "epoch": 0.000344830322265625,
      "step": 56497,
      "training_step_time": 0.39690160751342773
    },
    {
      "epoch": 0.00034483642578125,
      "model_forward_time": 0.1153566837310791,
      "step": 56498
    },
    {
      "epoch": 0.00034483642578125,
      "step": 56498,
      "training_step_time": 0.41638851165771484
    },
    {
      "epoch": 0.000344842529296875,
      "model_forward_time": 0.11506485939025879,
      "step": 56499
    },
    {
      "epoch": 0.000344842529296875,
      "step": 56499,
      "training_step_time": 0.4113771915435791
    },
    {
      "epoch": 0.0003448486328125,
      "grad_norm": 0.11139001697301865,
      "learning_rate": 9.274253373791064e-07,
      "loss": 0.037,
      "step": 56500
    },
    {
      "epoch": 0.0003448486328125,
      "model_forward_time": 0.11503076553344727,
      "step": 56500
    },
    {
      "epoch": 0.0003448486328125,
      "step": 56500,
      "training_step_time": 0.4651060104370117
    },
    {
      "epoch": 0.000344854736328125,
      "model_forward_time": 0.11546540260314941,
      "step": 56501
    },
    {
      "epoch": 0.000344854736328125,
      "step": 56501,
      "training_step_time": 0.48480772972106934
    },
    {
      "epoch": 0.00034486083984375,
      "model_forward_time": 0.11520600318908691,
      "step": 56502
    },
    {
      "epoch": 0.00034486083984375,
      "step": 56502,
      "training_step_time": 0.4765315055847168
    },
    {
      "epoch": 0.000344866943359375,
      "model_forward_time": 0.11483621597290039,
      "step": 56503
    },
    {
      "epoch": 0.000344866943359375,
      "step": 56503,
      "training_step_time": 0.3902416229248047
    },
    {
      "epoch": 0.000344873046875,
      "model_forward_time": 0.11430859565734863,
      "step": 56504
    },
    {
      "epoch": 0.000344873046875,
      "step": 56504,
      "training_step_time": 0.3947713375091553
    },
    {
      "epoch": 0.000344879150390625,
      "model_forward_time": 0.11606192588806152,
      "step": 56505
    },
    {
      "epoch": 0.000344879150390625,
      "step": 56505,
      "training_step_time": 0.3942263126373291
    },
    {
      "epoch": 0.00034488525390625,
      "model_forward_time": 0.11529779434204102,
      "step": 56506
    },
    {
      "epoch": 0.00034488525390625,
      "step": 56506,
      "training_step_time": 0.3918633460998535
    },
    {
      "epoch": 0.000344891357421875,
      "model_forward_time": 0.11541032791137695,
      "step": 56507
    },
    {
      "epoch": 0.000344891357421875,
      "step": 56507,
      "training_step_time": 0.3948802947998047
    },
    {
      "epoch": 0.0003448974609375,
      "model_forward_time": 0.11495399475097656,
      "step": 56508
    },
    {
      "epoch": 0.0003448974609375,
      "step": 56508,
      "training_step_time": 0.593883752822876
    },
    {
      "epoch": 0.000344903564453125,
      "model_forward_time": 0.1151113510131836,
      "step": 56509
    },
    {
      "epoch": 0.000344903564453125,
      "step": 56509,
      "training_step_time": 0.4138507843017578
    },
    {
      "epoch": 0.00034490966796875,
      "grad_norm": 0.10432688146829605,
      "learning_rate": 9.221496620626047e-07,
      "loss": 0.036,
      "step": 56510
    },
    {
      "epoch": 0.00034490966796875,
      "model_forward_time": 0.11482501029968262,
      "step": 56510
    },
    {
      "epoch": 0.00034490966796875,
      "step": 56510,
      "training_step_time": 0.40299105644226074
    },
    {
      "epoch": 0.000344915771484375,
      "model_forward_time": 0.11487793922424316,
      "step": 56511
    },
    {
      "epoch": 0.000344915771484375,
      "step": 56511,
      "training_step_time": 0.396512508392334
    },
    {
      "epoch": 0.000344921875,
      "model_forward_time": 0.11479902267456055,
      "step": 56512
    },
    {
      "epoch": 0.000344921875,
      "step": 56512,
      "training_step_time": 0.477064847946167
    },
    {
      "epoch": 0.000344927978515625,
      "model_forward_time": 0.1150815486907959,
      "step": 56513
    },
    {
      "epoch": 0.000344927978515625,
      "step": 56513,
      "training_step_time": 0.47977757453918457
    },
    {
      "epoch": 0.00034493408203125,
      "model_forward_time": 0.11457991600036621,
      "step": 56514
    },
    {
      "epoch": 0.00034493408203125,
      "step": 56514,
      "training_step_time": 0.507941484451294
    },
    {
      "epoch": 0.000344940185546875,
      "model_forward_time": 0.1149449348449707,
      "step": 56515
    },
    {
      "epoch": 0.000344940185546875,
      "step": 56515,
      "training_step_time": 0.4284534454345703
    },
    {
      "epoch": 0.0003449462890625,
      "model_forward_time": 0.11482787132263184,
      "step": 56516
    },
    {
      "epoch": 0.0003449462890625,
      "step": 56516,
      "training_step_time": 0.3923056125640869
    },
    {
      "epoch": 0.000344952392578125,
      "model_forward_time": 0.1147611141204834,
      "step": 56517
    },
    {
      "epoch": 0.000344952392578125,
      "step": 56517,
      "training_step_time": 0.3934502601623535
    },
    {
      "epoch": 0.00034495849609375,
      "model_forward_time": 0.11476516723632812,
      "step": 56518
    },
    {
      "epoch": 0.00034495849609375,
      "step": 56518,
      "training_step_time": 0.40198278427124023
    },
    {
      "epoch": 0.000344964599609375,
      "model_forward_time": 0.11523103713989258,
      "step": 56519
    },
    {
      "epoch": 0.000344964599609375,
      "step": 56519,
      "training_step_time": 0.4001121520996094
    },
    {
      "epoch": 0.000344970703125,
      "grad_norm": 0.10439755022525787,
      "learning_rate": 9.168888953011989e-07,
      "loss": 0.0432,
      "step": 56520
    },
    {
      "epoch": 0.000344970703125,
      "model_forward_time": 0.11505579948425293,
      "step": 56520
    },
    {
      "epoch": 0.000344970703125,
      "step": 56520,
      "training_step_time": 0.629554033279419
    },
    {
      "epoch": 0.000344976806640625,
      "model_forward_time": 0.11443161964416504,
      "step": 56521
    },
    {
      "epoch": 0.000344976806640625,
      "step": 56521,
      "training_step_time": 0.4799363613128662
    },
    {
      "epoch": 0.00034498291015625,
      "model_forward_time": 0.11400151252746582,
      "step": 56522
    },
    {
      "epoch": 0.00034498291015625,
      "step": 56522,
      "training_step_time": 0.4135568141937256
    },
    {
      "epoch": 0.000344989013671875,
      "model_forward_time": 0.11525702476501465,
      "step": 56523
    },
    {
      "epoch": 0.000344989013671875,
      "step": 56523,
      "training_step_time": 0.40976619720458984
    },
    {
      "epoch": 0.0003449951171875,
      "model_forward_time": 0.11437416076660156,
      "step": 56524
    },
    {
      "epoch": 0.0003449951171875,
      "step": 56524,
      "training_step_time": 0.39461517333984375
    },
    {
      "epoch": 0.000345001220703125,
      "model_forward_time": 0.11479449272155762,
      "step": 56525
    },
    {
      "epoch": 0.000345001220703125,
      "step": 56525,
      "training_step_time": 0.3956747055053711
    },
    {
      "epoch": 0.00034500732421875,
      "model_forward_time": 0.11503958702087402,
      "step": 56526
    },
    {
      "epoch": 0.00034500732421875,
      "step": 56526,
      "training_step_time": 0.4734072685241699
    },
    {
      "epoch": 0.000345013427734375,
      "model_forward_time": 0.11487627029418945,
      "step": 56527
    },
    {
      "epoch": 0.000345013427734375,
      "step": 56527,
      "training_step_time": 0.3657679557800293
    },
    {
      "epoch": 0.00034501953125,
      "model_forward_time": 0.11486124992370605,
      "step": 56528
    },
    {
      "epoch": 0.00034501953125,
      "step": 56528,
      "training_step_time": 0.4441554546356201
    },
    {
      "epoch": 0.000345025634765625,
      "model_forward_time": 0.11584973335266113,
      "step": 56529
    },
    {
      "epoch": 0.000345025634765625,
      "step": 56529,
      "training_step_time": 0.44698548316955566
    },
    {
      "epoch": 0.00034503173828125,
      "grad_norm": 0.1251520961523056,
      "learning_rate": 9.116430386929886e-07,
      "loss": 0.04,
      "step": 56530
    },
    {
      "epoch": 0.00034503173828125,
      "model_forward_time": 0.1157071590423584,
      "step": 56530
    },
    {
      "epoch": 0.00034503173828125,
      "step": 56530,
      "training_step_time": 0.3925151824951172
    },
    {
      "epoch": 0.000345037841796875,
      "model_forward_time": 0.11520910263061523,
      "step": 56531
    },
    {
      "epoch": 0.000345037841796875,
      "step": 56531,
      "training_step_time": 0.4030582904815674
    },
    {
      "epoch": 0.0003450439453125,
      "model_forward_time": 0.11548352241516113,
      "step": 56532
    },
    {
      "epoch": 0.0003450439453125,
      "step": 56532,
      "training_step_time": 0.5054850578308105
    },
    {
      "epoch": 0.000345050048828125,
      "model_forward_time": 0.11500978469848633,
      "step": 56533
    },
    {
      "epoch": 0.000345050048828125,
      "step": 56533,
      "training_step_time": 0.3955667018890381
    },
    {
      "epoch": 0.00034505615234375,
      "model_forward_time": 0.11534690856933594,
      "step": 56534
    },
    {
      "epoch": 0.00034505615234375,
      "step": 56534,
      "training_step_time": 0.3945963382720947
    },
    {
      "epoch": 0.000345062255859375,
      "model_forward_time": 0.11493158340454102,
      "step": 56535
    },
    {
      "epoch": 0.000345062255859375,
      "step": 56535,
      "training_step_time": 0.4465153217315674
    },
    {
      "epoch": 0.000345068359375,
      "model_forward_time": 0.11541485786437988,
      "step": 56536
    },
    {
      "epoch": 0.000345068359375,
      "step": 56536,
      "training_step_time": 0.3992617130279541
    },
    {
      "epoch": 0.000345074462890625,
      "model_forward_time": 0.11488199234008789,
      "step": 56537
    },
    {
      "epoch": 0.000345074462890625,
      "step": 56537,
      "training_step_time": 0.42719364166259766
    },
    {
      "epoch": 0.00034508056640625,
      "model_forward_time": 0.11506009101867676,
      "step": 56538
    },
    {
      "epoch": 0.00034508056640625,
      "step": 56538,
      "training_step_time": 0.5935728549957275
    },
    {
      "epoch": 0.000345086669921875,
      "model_forward_time": 0.11505627632141113,
      "step": 56539
    },
    {
      "epoch": 0.000345086669921875,
      "step": 56539,
      "training_step_time": 0.3880913257598877
    },
    {
      "epoch": 0.0003450927734375,
      "grad_norm": 0.07554011791944504,
      "learning_rate": 9.064120938315213e-07,
      "loss": 0.0355,
      "step": 56540
    },
    {
      "epoch": 0.0003450927734375,
      "model_forward_time": 0.1151437759399414,
      "step": 56540
    },
    {
      "epoch": 0.0003450927734375,
      "step": 56540,
      "training_step_time": 0.4607059955596924
    },
    {
      "epoch": 0.000345098876953125,
      "model_forward_time": 0.11515235900878906,
      "step": 56541
    },
    {
      "epoch": 0.000345098876953125,
      "step": 56541,
      "training_step_time": 0.42136406898498535
    },
    {
      "epoch": 0.00034510498046875,
      "model_forward_time": 0.11470985412597656,
      "step": 56542
    },
    {
      "epoch": 0.00034510498046875,
      "step": 56542,
      "training_step_time": 0.506542444229126
    },
    {
      "epoch": 0.000345111083984375,
      "model_forward_time": 0.11411619186401367,
      "step": 56543
    },
    {
      "epoch": 0.000345111083984375,
      "step": 56543,
      "training_step_time": 0.5095651149749756
    },
    {
      "epoch": 0.0003451171875,
      "model_forward_time": 0.11434483528137207,
      "step": 56544
    },
    {
      "epoch": 0.0003451171875,
      "step": 56544,
      "training_step_time": 0.38825297355651855
    },
    {
      "epoch": 0.000345123291015625,
      "model_forward_time": 0.11468839645385742,
      "step": 56545
    },
    {
      "epoch": 0.000345123291015625,
      "step": 56545,
      "training_step_time": 0.38564014434814453
    },
    {
      "epoch": 0.00034512939453125,
      "model_forward_time": 0.11440825462341309,
      "step": 56546
    },
    {
      "epoch": 0.00034512939453125,
      "step": 56546,
      "training_step_time": 0.38448214530944824
    },
    {
      "epoch": 0.000345135498046875,
      "model_forward_time": 0.11480855941772461,
      "step": 56547
    },
    {
      "epoch": 0.000345135498046875,
      "step": 56547,
      "training_step_time": 0.3993411064147949
    },
    {
      "epoch": 0.0003451416015625,
      "model_forward_time": 0.11448454856872559,
      "step": 56548
    },
    {
      "epoch": 0.0003451416015625,
      "step": 56548,
      "training_step_time": 0.3910515308380127
    },
    {
      "epoch": 0.000345147705078125,
      "model_forward_time": 0.11540699005126953,
      "step": 56549
    },
    {
      "epoch": 0.000345147705078125,
      "step": 56549,
      "training_step_time": 0.4241635799407959
    },
    {
      "epoch": 0.00034515380859375,
      "grad_norm": 0.09022071957588196,
      "learning_rate": 9.011960623058202e-07,
      "loss": 0.0345,
      "step": 56550
    },
    {
      "epoch": 0.00034515380859375,
      "model_forward_time": 0.11461758613586426,
      "step": 56550
    },
    {
      "epoch": 0.00034515380859375,
      "step": 56550,
      "training_step_time": 0.594414234161377
    },
    {
      "epoch": 0.000345159912109375,
      "model_forward_time": 0.11481714248657227,
      "step": 56551
    },
    {
      "epoch": 0.000345159912109375,
      "step": 56551,
      "training_step_time": 0.40787172317504883
    },
    {
      "epoch": 0.000345166015625,
      "model_forward_time": 0.11472058296203613,
      "step": 56552
    },
    {
      "epoch": 0.000345166015625,
      "step": 56552,
      "training_step_time": 0.39505958557128906
    },
    {
      "epoch": 0.000345172119140625,
      "model_forward_time": 0.11449933052062988,
      "step": 56553
    },
    {
      "epoch": 0.000345172119140625,
      "step": 56553,
      "training_step_time": 0.3932492733001709
    },
    {
      "epoch": 0.00034517822265625,
      "model_forward_time": 0.11490321159362793,
      "step": 56554
    },
    {
      "epoch": 0.00034517822265625,
      "step": 56554,
      "training_step_time": 0.45406341552734375
    },
    {
      "epoch": 0.000345184326171875,
      "model_forward_time": 0.11496949195861816,
      "step": 56555
    },
    {
      "epoch": 0.000345184326171875,
      "step": 56555,
      "training_step_time": 0.4428689479827881
    },
    {
      "epoch": 0.0003451904296875,
      "model_forward_time": 0.11500239372253418,
      "step": 56556
    },
    {
      "epoch": 0.0003451904296875,
      "step": 56556,
      "training_step_time": 0.5356957912445068
    },
    {
      "epoch": 0.000345196533203125,
      "model_forward_time": 0.11463332176208496,
      "step": 56557
    },
    {
      "epoch": 0.000345196533203125,
      "step": 56557,
      "training_step_time": 0.3966634273529053
    },
    {
      "epoch": 0.00034520263671875,
      "model_forward_time": 0.11459994316101074,
      "step": 56558
    },
    {
      "epoch": 0.00034520263671875,
      "step": 56558,
      "training_step_time": 0.4508857727050781
    },
    {
      "epoch": 0.000345208740234375,
      "model_forward_time": 0.11461639404296875,
      "step": 56559
    },
    {
      "epoch": 0.000345208740234375,
      "step": 56559,
      "training_step_time": 0.3960227966308594
    },
    {
      "epoch": 0.00034521484375,
      "grad_norm": 0.08987460285425186,
      "learning_rate": 8.959949457003736e-07,
      "loss": 0.0357,
      "step": 56560
    },
    {
      "epoch": 0.00034521484375,
      "model_forward_time": 0.11451578140258789,
      "step": 56560
    },
    {
      "epoch": 0.00034521484375,
      "step": 56560,
      "training_step_time": 0.404557466506958
    },
    {
      "epoch": 0.000345220947265625,
      "model_forward_time": 0.11513543128967285,
      "step": 56561
    },
    {
      "epoch": 0.000345220947265625,
      "step": 56561,
      "training_step_time": 0.5027015209197998
    },
    {
      "epoch": 0.00034522705078125,
      "model_forward_time": 0.11486029624938965,
      "step": 56562
    },
    {
      "epoch": 0.00034522705078125,
      "step": 56562,
      "training_step_time": 0.46734118461608887
    },
    {
      "epoch": 0.000345233154296875,
      "model_forward_time": 0.1143486499786377,
      "step": 56563
    },
    {
      "epoch": 0.000345233154296875,
      "step": 56563,
      "training_step_time": 0.4649012088775635
    },
    {
      "epoch": 0.0003452392578125,
      "model_forward_time": 0.11458110809326172,
      "step": 56564
    },
    {
      "epoch": 0.0003452392578125,
      "step": 56564,
      "training_step_time": 0.4143695831298828
    },
    {
      "epoch": 0.000345245361328125,
      "model_forward_time": 0.11452269554138184,
      "step": 56565
    },
    {
      "epoch": 0.000345245361328125,
      "step": 56565,
      "training_step_time": 0.3918476104736328
    },
    {
      "epoch": 0.00034525146484375,
      "model_forward_time": 0.11429882049560547,
      "step": 56566
    },
    {
      "epoch": 0.00034525146484375,
      "step": 56566,
      "training_step_time": 0.39272618293762207
    },
    {
      "epoch": 0.000345257568359375,
      "model_forward_time": 0.11501812934875488,
      "step": 56567
    },
    {
      "epoch": 0.000345257568359375,
      "step": 56567,
      "training_step_time": 0.3941988945007324
    },
    {
      "epoch": 0.000345263671875,
      "model_forward_time": 0.11526036262512207,
      "step": 56568
    },
    {
      "epoch": 0.000345263671875,
      "step": 56568,
      "training_step_time": 0.5745174884796143
    },
    {
      "epoch": 0.000345269775390625,
      "model_forward_time": 0.11446237564086914,
      "step": 56569
    },
    {
      "epoch": 0.000345269775390625,
      "step": 56569,
      "training_step_time": 0.3953375816345215
    },
    {
      "epoch": 0.00034527587890625,
      "grad_norm": 0.10036646574735641,
      "learning_rate": 8.908087455951397e-07,
      "loss": 0.038,
      "step": 56570
    },
    {
      "epoch": 0.00034527587890625,
      "model_forward_time": 0.11540579795837402,
      "step": 56570
    },
    {
      "epoch": 0.00034527587890625,
      "step": 56570,
      "training_step_time": 0.4670445919036865
    },
    {
      "epoch": 0.000345281982421875,
      "model_forward_time": 0.11504197120666504,
      "step": 56571
    },
    {
      "epoch": 0.000345281982421875,
      "step": 56571,
      "training_step_time": 0.4109690189361572
    },
    {
      "epoch": 0.0003452880859375,
      "model_forward_time": 0.11519289016723633,
      "step": 56572
    },
    {
      "epoch": 0.0003452880859375,
      "step": 56572,
      "training_step_time": 0.4550209045410156
    },
    {
      "epoch": 0.000345294189453125,
      "model_forward_time": 0.11497020721435547,
      "step": 56573
    },
    {
      "epoch": 0.000345294189453125,
      "step": 56573,
      "training_step_time": 0.3749821186065674
    },
    {
      "epoch": 0.00034530029296875,
      "model_forward_time": 0.11489415168762207,
      "step": 56574
    },
    {
      "epoch": 0.00034530029296875,
      "step": 56574,
      "training_step_time": 0.5554730892181396
    },
    {
      "epoch": 0.000345306396484375,
      "model_forward_time": 0.1143636703491211,
      "step": 56575
    },
    {
      "epoch": 0.000345306396484375,
      "step": 56575,
      "training_step_time": 0.3888132572174072
    },
    {
      "epoch": 0.0003453125,
      "model_forward_time": 0.11498594284057617,
      "step": 56576
    },
    {
      "epoch": 0.0003453125,
      "step": 56576,
      "training_step_time": 0.4101414680480957
    },
    {
      "epoch": 0.000345318603515625,
      "model_forward_time": 0.11439776420593262,
      "step": 56577
    },
    {
      "epoch": 0.000345318603515625,
      "step": 56577,
      "training_step_time": 0.4600534439086914
    },
    {
      "epoch": 0.00034532470703125,
      "model_forward_time": 0.11476922035217285,
      "step": 56578
    },
    {
      "epoch": 0.00034532470703125,
      "step": 56578,
      "training_step_time": 0.44455504417419434
    },
    {
      "epoch": 0.000345330810546875,
      "model_forward_time": 0.11495566368103027,
      "step": 56579
    },
    {
      "epoch": 0.000345330810546875,
      "step": 56579,
      "training_step_time": 0.4169578552246094
    },
    {
      "epoch": 0.0003453369140625,
      "grad_norm": 0.09122985601425171,
      "learning_rate": 8.856374635655695e-07,
      "loss": 0.0353,
      "step": 56580
    },
    {
      "epoch": 0.0003453369140625,
      "model_forward_time": 0.11504650115966797,
      "step": 56580
    },
    {
      "epoch": 0.0003453369140625,
      "step": 56580,
      "training_step_time": 0.5575594902038574
    },
    {
      "epoch": 0.000345343017578125,
      "model_forward_time": 0.11509561538696289,
      "step": 56581
    },
    {
      "epoch": 0.000345343017578125,
      "step": 56581,
      "training_step_time": 0.39194369316101074
    },
    {
      "epoch": 0.00034534912109375,
      "model_forward_time": 0.11419868469238281,
      "step": 56582
    },
    {
      "epoch": 0.00034534912109375,
      "step": 56582,
      "training_step_time": 0.4755430221557617
    },
    {
      "epoch": 0.000345355224609375,
      "model_forward_time": 0.11447763442993164,
      "step": 56583
    },
    {
      "epoch": 0.000345355224609375,
      "step": 56583,
      "training_step_time": 0.4155995845794678
    },
    {
      "epoch": 0.000345361328125,
      "model_forward_time": 0.11479711532592773,
      "step": 56584
    },
    {
      "epoch": 0.000345361328125,
      "step": 56584,
      "training_step_time": 0.43883562088012695
    },
    {
      "epoch": 0.000345367431640625,
      "model_forward_time": 0.11495280265808105,
      "step": 56585
    },
    {
      "epoch": 0.000345367431640625,
      "step": 56585,
      "training_step_time": 0.49462461471557617
    },
    {
      "epoch": 0.00034537353515625,
      "model_forward_time": 0.11490917205810547,
      "step": 56586
    },
    {
      "epoch": 0.00034537353515625,
      "step": 56586,
      "training_step_time": 0.4222230911254883
    },
    {
      "epoch": 0.000345379638671875,
      "model_forward_time": 0.1153419017791748,
      "step": 56587
    },
    {
      "epoch": 0.000345379638671875,
      "step": 56587,
      "training_step_time": 0.3908071517944336
    },
    {
      "epoch": 0.0003453857421875,
      "model_forward_time": 0.11444687843322754,
      "step": 56588
    },
    {
      "epoch": 0.0003453857421875,
      "step": 56588,
      "training_step_time": 0.3960685729980469
    },
    {
      "epoch": 0.000345391845703125,
      "model_forward_time": 0.115234375,
      "step": 56589
    },
    {
      "epoch": 0.000345391845703125,
      "step": 56589,
      "training_step_time": 0.3967254161834717
    },
    {
      "epoch": 0.00034539794921875,
      "grad_norm": 0.09650103747844696,
      "learning_rate": 8.804811011825398e-07,
      "loss": 0.0333,
      "step": 56590
    },
    {
      "epoch": 0.00034539794921875,
      "model_forward_time": 0.11499762535095215,
      "step": 56590
    },
    {
      "epoch": 0.00034539794921875,
      "step": 56590,
      "training_step_time": 0.3962826728820801
    },
    {
      "epoch": 0.000345404052734375,
      "model_forward_time": 0.11529707908630371,
      "step": 56591
    },
    {
      "epoch": 0.000345404052734375,
      "step": 56591,
      "training_step_time": 0.418018102645874
    },
    {
      "epoch": 0.00034541015625,
      "model_forward_time": 0.11536240577697754,
      "step": 56592
    },
    {
      "epoch": 0.00034541015625,
      "step": 56592,
      "training_step_time": 0.5212020874023438
    },
    {
      "epoch": 0.000345416259765625,
      "model_forward_time": 0.11508655548095703,
      "step": 56593
    },
    {
      "epoch": 0.000345416259765625,
      "step": 56593,
      "training_step_time": 0.4211723804473877
    },
    {
      "epoch": 0.00034542236328125,
      "model_forward_time": 0.11528539657592773,
      "step": 56594
    },
    {
      "epoch": 0.00034542236328125,
      "step": 56594,
      "training_step_time": 0.4127986431121826
    },
    {
      "epoch": 0.000345428466796875,
      "model_forward_time": 0.11520838737487793,
      "step": 56595
    },
    {
      "epoch": 0.000345428466796875,
      "step": 56595,
      "training_step_time": 0.3962399959564209
    },
    {
      "epoch": 0.0003454345703125,
      "model_forward_time": 0.11527466773986816,
      "step": 56596
    },
    {
      "epoch": 0.0003454345703125,
      "step": 56596,
      "training_step_time": 0.5110812187194824
    },
    {
      "epoch": 0.000345440673828125,
      "model_forward_time": 0.11481809616088867,
      "step": 56597
    },
    {
      "epoch": 0.000345440673828125,
      "step": 56597,
      "training_step_time": 0.4351804256439209
    },
    {
      "epoch": 0.00034544677734375,
      "model_forward_time": 0.11462783813476562,
      "step": 56598
    },
    {
      "epoch": 0.00034544677734375,
      "step": 56598,
      "training_step_time": 0.4340250492095947
    },
    {
      "epoch": 0.000345452880859375,
      "model_forward_time": 0.1151270866394043,
      "step": 56599
    },
    {
      "epoch": 0.000345452880859375,
      "step": 56599,
      "training_step_time": 0.3868529796600342
    },
    {
      "epoch": 0.000345458984375,
      "grad_norm": 0.06462883949279785,
      "learning_rate": 8.753396600124253e-07,
      "loss": 0.0363,
      "step": 56600
    },
    {
      "epoch": 0.000345458984375,
      "model_forward_time": 0.1156606674194336,
      "step": 56600
    },
    {
      "epoch": 0.000345458984375,
      "step": 56600,
      "training_step_time": 0.43196535110473633
    },
    {
      "epoch": 0.000345465087890625,
      "model_forward_time": 0.11500716209411621,
      "step": 56601
    },
    {
      "epoch": 0.000345465087890625,
      "step": 56601,
      "training_step_time": 0.3986325263977051
    },
    {
      "epoch": 0.00034547119140625,
      "model_forward_time": 0.11486291885375977,
      "step": 56602
    },
    {
      "epoch": 0.00034547119140625,
      "step": 56602,
      "training_step_time": 0.395998477935791
    },
    {
      "epoch": 0.000345477294921875,
      "model_forward_time": 0.11557698249816895,
      "step": 56603
    },
    {
      "epoch": 0.000345477294921875,
      "step": 56603,
      "training_step_time": 0.3885648250579834
    },
    {
      "epoch": 0.0003454833984375,
      "model_forward_time": 0.11445355415344238,
      "step": 56604
    },
    {
      "epoch": 0.0003454833984375,
      "step": 56604,
      "training_step_time": 0.3942739963531494
    },
    {
      "epoch": 0.000345489501953125,
      "model_forward_time": 0.11492300033569336,
      "step": 56605
    },
    {
      "epoch": 0.000345489501953125,
      "step": 56605,
      "training_step_time": 0.46447157859802246
    },
    {
      "epoch": 0.00034549560546875,
      "model_forward_time": 0.11467647552490234,
      "step": 56606
    },
    {
      "epoch": 0.00034549560546875,
      "step": 56606,
      "training_step_time": 0.42742371559143066
    },
    {
      "epoch": 0.000345501708984375,
      "model_forward_time": 0.11490225791931152,
      "step": 56607
    },
    {
      "epoch": 0.000345501708984375,
      "step": 56607,
      "training_step_time": 0.3989129066467285
    },
    {
      "epoch": 0.0003455078125,
      "model_forward_time": 0.11492729187011719,
      "step": 56608
    },
    {
      "epoch": 0.0003455078125,
      "step": 56608,
      "training_step_time": 0.43108630180358887
    },
    {
      "epoch": 0.000345513916015625,
      "model_forward_time": 0.11473608016967773,
      "step": 56609
    },
    {
      "epoch": 0.000345513916015625,
      "step": 56609,
      "training_step_time": 0.40419936180114746
    },
    {
      "epoch": 0.00034552001953125,
      "grad_norm": 0.1201511025428772,
      "learning_rate": 8.702131416170656e-07,
      "loss": 0.0388,
      "step": 56610
    },
    {
      "epoch": 0.00034552001953125,
      "model_forward_time": 0.11510705947875977,
      "step": 56610
    },
    {
      "epoch": 0.00034552001953125,
      "step": 56610,
      "training_step_time": 0.3996114730834961
    },
    {
      "epoch": 0.000345526123046875,
      "model_forward_time": 0.11521387100219727,
      "step": 56611
    },
    {
      "epoch": 0.000345526123046875,
      "step": 56611,
      "training_step_time": 0.3986954689025879
    },
    {
      "epoch": 0.0003455322265625,
      "model_forward_time": 0.11488890647888184,
      "step": 56612
    },
    {
      "epoch": 0.0003455322265625,
      "step": 56612,
      "training_step_time": 0.42542076110839844
    },
    {
      "epoch": 0.000345538330078125,
      "model_forward_time": 0.11510920524597168,
      "step": 56613
    },
    {
      "epoch": 0.000345538330078125,
      "step": 56613,
      "training_step_time": 0.4141113758087158
    },
    {
      "epoch": 0.00034554443359375,
      "model_forward_time": 0.11477088928222656,
      "step": 56614
    },
    {
      "epoch": 0.00034554443359375,
      "step": 56614,
      "training_step_time": 0.4194173812866211
    },
    {
      "epoch": 0.000345550537109375,
      "model_forward_time": 0.1149148941040039,
      "step": 56615
    },
    {
      "epoch": 0.000345550537109375,
      "step": 56615,
      "training_step_time": 0.4183783531188965
    },
    {
      "epoch": 0.000345556640625,
      "model_forward_time": 0.11487150192260742,
      "step": 56616
    },
    {
      "epoch": 0.000345556640625,
      "step": 56616,
      "training_step_time": 0.41384220123291016
    },
    {
      "epoch": 0.000345562744140625,
      "model_forward_time": 0.11449265480041504,
      "step": 56617
    },
    {
      "epoch": 0.000345562744140625,
      "step": 56617,
      "training_step_time": 0.3918788433074951
    },
    {
      "epoch": 0.00034556884765625,
      "model_forward_time": 0.11546540260314941,
      "step": 56618
    },
    {
      "epoch": 0.00034556884765625,
      "step": 56618,
      "training_step_time": 0.3916471004486084
    },
    {
      "epoch": 0.000345574951171875,
      "model_forward_time": 0.1148686408996582,
      "step": 56619
    },
    {
      "epoch": 0.000345574951171875,
      "step": 56619,
      "training_step_time": 0.40595412254333496
    },
    {
      "epoch": 0.0003455810546875,
      "grad_norm": 0.06919699162244797,
      "learning_rate": 8.651015475537538e-07,
      "loss": 0.0407,
      "step": 56620
    },
    {
      "epoch": 0.0003455810546875,
      "model_forward_time": 0.11484146118164062,
      "step": 56620
    },
    {
      "epoch": 0.0003455810546875,
      "step": 56620,
      "training_step_time": 0.440826416015625
    },
    {
      "epoch": 0.000345587158203125,
      "model_forward_time": 0.11517572402954102,
      "step": 56621
    },
    {
      "epoch": 0.000345587158203125,
      "step": 56621,
      "training_step_time": 0.4833545684814453
    },
    {
      "epoch": 0.00034559326171875,
      "model_forward_time": 0.11490440368652344,
      "step": 56622
    },
    {
      "epoch": 0.00034559326171875,
      "step": 56622,
      "training_step_time": 0.4100675582885742
    },
    {
      "epoch": 0.000345599365234375,
      "model_forward_time": 0.11551380157470703,
      "step": 56623
    },
    {
      "epoch": 0.000345599365234375,
      "step": 56623,
      "training_step_time": 0.3963017463684082
    },
    {
      "epoch": 0.00034560546875,
      "model_forward_time": 0.11513900756835938,
      "step": 56624
    },
    {
      "epoch": 0.00034560546875,
      "step": 56624,
      "training_step_time": 0.39768099784851074
    },
    {
      "epoch": 0.000345611572265625,
      "model_forward_time": 0.11534738540649414,
      "step": 56625
    },
    {
      "epoch": 0.000345611572265625,
      "step": 56625,
      "training_step_time": 0.4493231773376465
    },
    {
      "epoch": 0.00034561767578125,
      "model_forward_time": 0.11473369598388672,
      "step": 56626
    },
    {
      "epoch": 0.00034561767578125,
      "step": 56626,
      "training_step_time": 0.39693403244018555
    },
    {
      "epoch": 0.000345623779296875,
      "model_forward_time": 0.11490631103515625,
      "step": 56627
    },
    {
      "epoch": 0.000345623779296875,
      "step": 56627,
      "training_step_time": 0.47567081451416016
    },
    {
      "epoch": 0.0003456298828125,
      "model_forward_time": 0.11473464965820312,
      "step": 56628
    },
    {
      "epoch": 0.0003456298828125,
      "step": 56628,
      "training_step_time": 0.4947397708892822
    },
    {
      "epoch": 0.000345635986328125,
      "model_forward_time": 0.11527705192565918,
      "step": 56629
    },
    {
      "epoch": 0.000345635986328125,
      "step": 56629,
      "training_step_time": 0.4848897457122803
    },
    {
      "epoch": 0.00034564208984375,
      "grad_norm": 0.0866861417889595,
      "learning_rate": 8.60004879375259e-07,
      "loss": 0.0347,
      "step": 56630
    },
    {
      "epoch": 0.00034564208984375,
      "model_forward_time": 0.11425113677978516,
      "step": 56630
    },
    {
      "epoch": 0.00034564208984375,
      "step": 56630,
      "training_step_time": 0.42536377906799316
    },
    {
      "epoch": 0.000345648193359375,
      "model_forward_time": 0.11496090888977051,
      "step": 56631
    },
    {
      "epoch": 0.000345648193359375,
      "step": 56631,
      "training_step_time": 0.3848147392272949
    },
    {
      "epoch": 0.000345654296875,
      "model_forward_time": 0.11524128913879395,
      "step": 56632
    },
    {
      "epoch": 0.000345654296875,
      "step": 56632,
      "training_step_time": 0.3992443084716797
    },
    {
      "epoch": 0.000345660400390625,
      "model_forward_time": 0.11451005935668945,
      "step": 56633
    },
    {
      "epoch": 0.000345660400390625,
      "step": 56633,
      "training_step_time": 0.3961212635040283
    },
    {
      "epoch": 0.00034566650390625,
      "model_forward_time": 0.11519742012023926,
      "step": 56634
    },
    {
      "epoch": 0.00034566650390625,
      "step": 56634,
      "training_step_time": 0.47696638107299805
    },
    {
      "epoch": 0.000345672607421875,
      "model_forward_time": 0.11478376388549805,
      "step": 56635
    },
    {
      "epoch": 0.000345672607421875,
      "step": 56635,
      "training_step_time": 0.4326164722442627
    },
    {
      "epoch": 0.0003456787109375,
      "model_forward_time": 0.11570549011230469,
      "step": 56636
    },
    {
      "epoch": 0.0003456787109375,
      "step": 56636,
      "training_step_time": 0.40119242668151855
    },
    {
      "epoch": 0.000345684814453125,
      "model_forward_time": 0.11556029319763184,
      "step": 56637
    },
    {
      "epoch": 0.000345684814453125,
      "step": 56637,
      "training_step_time": 0.3825812339782715
    },
    {
      "epoch": 0.00034569091796875,
      "model_forward_time": 0.11515545845031738,
      "step": 56638
    },
    {
      "epoch": 0.00034569091796875,
      "step": 56638,
      "training_step_time": 0.3978743553161621
    },
    {
      "epoch": 0.000345697021484375,
      "model_forward_time": 0.11505794525146484,
      "step": 56639
    },
    {
      "epoch": 0.000345697021484375,
      "step": 56639,
      "training_step_time": 0.392139196395874
    },
    {
      "epoch": 0.000345703125,
      "grad_norm": 0.07414799183607101,
      "learning_rate": 8.549231386298151e-07,
      "loss": 0.0361,
      "step": 56640
    },
    {
      "epoch": 0.000345703125,
      "model_forward_time": 0.11444568634033203,
      "step": 56640
    },
    {
      "epoch": 0.000345703125,
      "step": 56640,
      "training_step_time": 0.39106178283691406
    },
    {
      "epoch": 0.000345709228515625,
      "model_forward_time": 0.11564469337463379,
      "step": 56641
    },
    {
      "epoch": 0.000345709228515625,
      "step": 56641,
      "training_step_time": 0.39815521240234375
    },
    {
      "epoch": 0.00034571533203125,
      "model_forward_time": 0.1147451400756836,
      "step": 56642
    },
    {
      "epoch": 0.00034571533203125,
      "step": 56642,
      "training_step_time": 0.37560153007507324
    },
    {
      "epoch": 0.000345721435546875,
      "model_forward_time": 0.11505413055419922,
      "step": 56643
    },
    {
      "epoch": 0.000345721435546875,
      "step": 56643,
      "training_step_time": 0.3980751037597656
    },
    {
      "epoch": 0.0003457275390625,
      "model_forward_time": 0.11502218246459961,
      "step": 56644
    },
    {
      "epoch": 0.0003457275390625,
      "step": 56644,
      "training_step_time": 0.4892079830169678
    },
    {
      "epoch": 0.000345733642578125,
      "model_forward_time": 0.1149284839630127,
      "step": 56645
    },
    {
      "epoch": 0.000345733642578125,
      "step": 56645,
      "training_step_time": 0.38948559761047363
    },
    {
      "epoch": 0.00034573974609375,
      "model_forward_time": 0.11416435241699219,
      "step": 56646
    },
    {
      "epoch": 0.00034573974609375,
      "step": 56646,
      "training_step_time": 0.4240531921386719
    },
    {
      "epoch": 0.000345745849609375,
      "model_forward_time": 0.11420607566833496,
      "step": 56647
    },
    {
      "epoch": 0.000345745849609375,
      "step": 56647,
      "training_step_time": 0.39072680473327637
    },
    {
      "epoch": 0.000345751953125,
      "model_forward_time": 0.11471676826477051,
      "step": 56648
    },
    {
      "epoch": 0.000345751953125,
      "step": 56648,
      "training_step_time": 0.42728471755981445
    },
    {
      "epoch": 0.000345758056640625,
      "model_forward_time": 0.11479043960571289,
      "step": 56649
    },
    {
      "epoch": 0.000345758056640625,
      "step": 56649,
      "training_step_time": 0.42531514167785645
    },
    {
      "epoch": 0.00034576416015625,
      "grad_norm": 0.0896848738193512,
      "learning_rate": 8.498563268611204e-07,
      "loss": 0.0338,
      "step": 56650
    },
    {
      "epoch": 0.00034576416015625,
      "model_forward_time": 0.11457586288452148,
      "step": 56650
    },
    {
      "epoch": 0.00034576416015625,
      "step": 56650,
      "training_step_time": 0.44747161865234375
    },
    {
      "epoch": 0.000345770263671875,
      "model_forward_time": 0.11496591567993164,
      "step": 56651
    },
    {
      "epoch": 0.000345770263671875,
      "step": 56651,
      "training_step_time": 0.3991708755493164
    },
    {
      "epoch": 0.0003457763671875,
      "model_forward_time": 0.11546182632446289,
      "step": 56652
    },
    {
      "epoch": 0.0003457763671875,
      "step": 56652,
      "training_step_time": 0.39448094367980957
    },
    {
      "epoch": 0.000345782470703125,
      "model_forward_time": 0.11618900299072266,
      "step": 56653
    },
    {
      "epoch": 0.000345782470703125,
      "step": 56653,
      "training_step_time": 0.39098191261291504
    },
    {
      "epoch": 0.00034578857421875,
      "model_forward_time": 0.11499142646789551,
      "step": 56654
    },
    {
      "epoch": 0.00034578857421875,
      "step": 56654,
      "training_step_time": 0.40149950981140137
    },
    {
      "epoch": 0.000345794677734375,
      "model_forward_time": 0.11451840400695801,
      "step": 56655
    },
    {
      "epoch": 0.000345794677734375,
      "step": 56655,
      "training_step_time": 0.410538911819458
    },
    {
      "epoch": 0.00034580078125,
      "model_forward_time": 0.11529970169067383,
      "step": 56656
    },
    {
      "epoch": 0.00034580078125,
      "step": 56656,
      "training_step_time": 0.42386484146118164
    },
    {
      "epoch": 0.000345806884765625,
      "model_forward_time": 0.11458802223205566,
      "step": 56657
    },
    {
      "epoch": 0.000345806884765625,
      "step": 56657,
      "training_step_time": 0.3668992519378662
    },
    {
      "epoch": 0.00034581298828125,
      "model_forward_time": 0.11510491371154785,
      "step": 56658
    },
    {
      "epoch": 0.00034581298828125,
      "step": 56658,
      "training_step_time": 0.4141669273376465
    },
    {
      "epoch": 0.000345819091796875,
      "model_forward_time": 0.11485600471496582,
      "step": 56659
    },
    {
      "epoch": 0.000345819091796875,
      "step": 56659,
      "training_step_time": 0.4571712017059326
    },
    {
      "epoch": 0.0003458251953125,
      "grad_norm": 0.08838954567909241,
      "learning_rate": 8.448044456083493e-07,
      "loss": 0.0365,
      "step": 56660
    },
    {
      "epoch": 0.0003458251953125,
      "model_forward_time": 0.1150655746459961,
      "step": 56660
    },
    {
      "epoch": 0.0003458251953125,
      "step": 56660,
      "training_step_time": 0.39696812629699707
    },
    {
      "epoch": 0.000345831298828125,
      "model_forward_time": 0.11540436744689941,
      "step": 56661
    },
    {
      "epoch": 0.000345831298828125,
      "step": 56661,
      "training_step_time": 0.39043521881103516
    },
    {
      "epoch": 0.00034583740234375,
      "model_forward_time": 0.11512589454650879,
      "step": 56662
    },
    {
      "epoch": 0.00034583740234375,
      "step": 56662,
      "training_step_time": 0.40041232109069824
    },
    {
      "epoch": 0.000345843505859375,
      "model_forward_time": 0.11476254463195801,
      "step": 56663
    },
    {
      "epoch": 0.000345843505859375,
      "step": 56663,
      "training_step_time": 0.4681103229522705
    },
    {
      "epoch": 0.000345849609375,
      "model_forward_time": 0.1149148941040039,
      "step": 56664
    },
    {
      "epoch": 0.000345849609375,
      "step": 56664,
      "training_step_time": 0.4342691898345947
    },
    {
      "epoch": 0.000345855712890625,
      "model_forward_time": 0.11506390571594238,
      "step": 56665
    },
    {
      "epoch": 0.000345855712890625,
      "step": 56665,
      "training_step_time": 0.4542374610900879
    },
    {
      "epoch": 0.00034586181640625,
      "model_forward_time": 0.11509108543395996,
      "step": 56666
    },
    {
      "epoch": 0.00034586181640625,
      "step": 56666,
      "training_step_time": 0.39994192123413086
    },
    {
      "epoch": 0.000345867919921875,
      "model_forward_time": 0.11782670021057129,
      "step": 56667
    },
    {
      "epoch": 0.000345867919921875,
      "step": 56667,
      "training_step_time": 0.392411470413208
    },
    {
      "epoch": 0.0003458740234375,
      "model_forward_time": 0.11560416221618652,
      "step": 56668
    },
    {
      "epoch": 0.0003458740234375,
      "step": 56668,
      "training_step_time": 0.39427971839904785
    },
    {
      "epoch": 0.000345880126953125,
      "model_forward_time": 0.11517953872680664,
      "step": 56669
    },
    {
      "epoch": 0.000345880126953125,
      "step": 56669,
      "training_step_time": 0.4146111011505127
    },
    {
      "epoch": 0.00034588623046875,
      "grad_norm": 0.10406443476676941,
      "learning_rate": 8.397674964061075e-07,
      "loss": 0.0394,
      "step": 56670
    },
    {
      "epoch": 0.00034588623046875,
      "model_forward_time": 0.115020751953125,
      "step": 56670
    },
    {
      "epoch": 0.00034588623046875,
      "step": 56670,
      "training_step_time": 0.3994178771972656
    },
    {
      "epoch": 0.000345892333984375,
      "model_forward_time": 0.11526918411254883,
      "step": 56671
    },
    {
      "epoch": 0.000345892333984375,
      "step": 56671,
      "training_step_time": 0.47980451583862305
    },
    {
      "epoch": 0.0003458984375,
      "model_forward_time": 0.11514902114868164,
      "step": 56672
    },
    {
      "epoch": 0.0003458984375,
      "step": 56672,
      "training_step_time": 0.36578869819641113
    },
    {
      "epoch": 0.000345904541015625,
      "model_forward_time": 0.11507606506347656,
      "step": 56673
    },
    {
      "epoch": 0.000345904541015625,
      "step": 56673,
      "training_step_time": 0.40533876419067383
    },
    {
      "epoch": 0.00034591064453125,
      "model_forward_time": 0.1150205135345459,
      "step": 56674
    },
    {
      "epoch": 0.00034591064453125,
      "step": 56674,
      "training_step_time": 0.46356940269470215
    },
    {
      "epoch": 0.000345916748046875,
      "model_forward_time": 0.11536240577697754,
      "step": 56675
    },
    {
      "epoch": 0.000345916748046875,
      "step": 56675,
      "training_step_time": 0.4310131072998047
    },
    {
      "epoch": 0.0003459228515625,
      "model_forward_time": 0.11461281776428223,
      "step": 56676
    },
    {
      "epoch": 0.0003459228515625,
      "step": 56676,
      "training_step_time": 0.45268940925598145
    },
    {
      "epoch": 0.000345928955078125,
      "model_forward_time": 0.11539125442504883,
      "step": 56677
    },
    {
      "epoch": 0.000345928955078125,
      "step": 56677,
      "training_step_time": 0.4778921604156494
    },
    {
      "epoch": 0.00034593505859375,
      "model_forward_time": 0.11488199234008789,
      "step": 56678
    },
    {
      "epoch": 0.00034593505859375,
      "step": 56678,
      "training_step_time": 0.41019487380981445
    },
    {
      "epoch": 0.000345941162109375,
      "model_forward_time": 0.1145939826965332,
      "step": 56679
    },
    {
      "epoch": 0.000345941162109375,
      "step": 56679,
      "training_step_time": 0.4103684425354004
    },
    {
      "epoch": 0.000345947265625,
      "grad_norm": 0.09161914139986038,
      "learning_rate": 8.347454807845045e-07,
      "loss": 0.0343,
      "step": 56680
    },
    {
      "epoch": 0.000345947265625,
      "model_forward_time": 0.11499333381652832,
      "step": 56680
    },
    {
      "epoch": 0.000345947265625,
      "step": 56680,
      "training_step_time": 0.39827799797058105
    },
    {
      "epoch": 0.000345953369140625,
      "model_forward_time": 0.11515116691589355,
      "step": 56681
    },
    {
      "epoch": 0.000345953369140625,
      "step": 56681,
      "training_step_time": 0.39107489585876465
    },
    {
      "epoch": 0.00034595947265625,
      "model_forward_time": 0.1151576042175293,
      "step": 56682
    },
    {
      "epoch": 0.00034595947265625,
      "step": 56682,
      "training_step_time": 0.4025297164916992
    },
    {
      "epoch": 0.000345965576171875,
      "model_forward_time": 0.1149909496307373,
      "step": 56683
    },
    {
      "epoch": 0.000345965576171875,
      "step": 56683,
      "training_step_time": 0.388735294342041
    },
    {
      "epoch": 0.0003459716796875,
      "model_forward_time": 0.11557602882385254,
      "step": 56684
    },
    {
      "epoch": 0.0003459716796875,
      "step": 56684,
      "training_step_time": 0.4285004138946533
    },
    {
      "epoch": 0.000345977783203125,
      "model_forward_time": 0.11553001403808594,
      "step": 56685
    },
    {
      "epoch": 0.000345977783203125,
      "step": 56685,
      "training_step_time": 0.43689727783203125
    },
    {
      "epoch": 0.00034598388671875,
      "model_forward_time": 0.11455059051513672,
      "step": 56686
    },
    {
      "epoch": 0.00034598388671875,
      "step": 56686,
      "training_step_time": 0.48851633071899414
    },
    {
      "epoch": 0.000345989990234375,
      "model_forward_time": 0.11533355712890625,
      "step": 56687
    },
    {
      "epoch": 0.000345989990234375,
      "step": 56687,
      "training_step_time": 0.36463451385498047
    },
    {
      "epoch": 0.00034599609375,
      "model_forward_time": 0.11514925956726074,
      "step": 56688
    },
    {
      "epoch": 0.00034599609375,
      "step": 56688,
      "training_step_time": 0.41315507888793945
    },
    {
      "epoch": 0.000346002197265625,
      "model_forward_time": 0.11484694480895996,
      "step": 56689
    },
    {
      "epoch": 0.000346002197265625,
      "step": 56689,
      "training_step_time": 0.43336963653564453
    },
    {
      "epoch": 0.00034600830078125,
      "grad_norm": 0.08039252460002899,
      "learning_rate": 8.297384002690866e-07,
      "loss": 0.0333,
      "step": 56690
    },
    {
      "epoch": 0.00034600830078125,
      "model_forward_time": 0.11545395851135254,
      "step": 56690
    },
    {
      "epoch": 0.00034600830078125,
      "step": 56690,
      "training_step_time": 0.3967921733856201
    },
    {
      "epoch": 0.000346014404296875,
      "model_forward_time": 0.11543917655944824,
      "step": 56691
    },
    {
      "epoch": 0.000346014404296875,
      "step": 56691,
      "training_step_time": 0.39751124382019043
    },
    {
      "epoch": 0.0003460205078125,
      "model_forward_time": 0.11500883102416992,
      "step": 56692
    },
    {
      "epoch": 0.0003460205078125,
      "step": 56692,
      "training_step_time": 0.44583988189697266
    },
    {
      "epoch": 0.000346026611328125,
      "model_forward_time": 0.11513924598693848,
      "step": 56693
    },
    {
      "epoch": 0.000346026611328125,
      "step": 56693,
      "training_step_time": 0.45664310455322266
    },
    {
      "epoch": 0.00034603271484375,
      "model_forward_time": 0.11537981033325195,
      "step": 56694
    },
    {
      "epoch": 0.00034603271484375,
      "step": 56694,
      "training_step_time": 0.3946645259857178
    },
    {
      "epoch": 0.000346038818359375,
      "model_forward_time": 0.11500287055969238,
      "step": 56695
    },
    {
      "epoch": 0.000346038818359375,
      "step": 56695,
      "training_step_time": 0.3936018943786621
    },
    {
      "epoch": 0.000346044921875,
      "model_forward_time": 0.11513876914978027,
      "step": 56696
    },
    {
      "epoch": 0.000346044921875,
      "step": 56696,
      "training_step_time": 0.39643073081970215
    },
    {
      "epoch": 0.000346051025390625,
      "model_forward_time": 0.11572456359863281,
      "step": 56697
    },
    {
      "epoch": 0.000346051025390625,
      "step": 56697,
      "training_step_time": 0.39643406867980957
    },
    {
      "epoch": 0.00034605712890625,
      "model_forward_time": 0.11492562294006348,
      "step": 56698
    },
    {
      "epoch": 0.00034605712890625,
      "step": 56698,
      "training_step_time": 0.400388240814209
    },
    {
      "epoch": 0.000346063232421875,
      "model_forward_time": 0.11533832550048828,
      "step": 56699
    },
    {
      "epoch": 0.000346063232421875,
      "step": 56699,
      "training_step_time": 0.45392727851867676
    },
    {
      "epoch": 0.0003460693359375,
      "grad_norm": 0.10974062979221344,
      "learning_rate": 8.247462563808817e-07,
      "loss": 0.0319,
      "step": 56700
    },
    {
      "epoch": 0.0003460693359375,
      "model_forward_time": 0.11530351638793945,
      "step": 56700
    },
    {
      "epoch": 0.0003460693359375,
      "step": 56700,
      "training_step_time": 0.4334678649902344
    },
    {
      "epoch": 0.000346075439453125,
      "model_forward_time": 0.11507463455200195,
      "step": 56701
    },
    {
      "epoch": 0.000346075439453125,
      "step": 56701,
      "training_step_time": 0.41976261138916016
    },
    {
      "epoch": 0.00034608154296875,
      "model_forward_time": 0.11591291427612305,
      "step": 56702
    },
    {
      "epoch": 0.00034608154296875,
      "step": 56702,
      "training_step_time": 0.3678007125854492
    },
    {
      "epoch": 0.000346087646484375,
      "model_forward_time": 0.11542510986328125,
      "step": 56703
    },
    {
      "epoch": 0.000346087646484375,
      "step": 56703,
      "training_step_time": 0.4534316062927246
    },
    {
      "epoch": 0.00034609375,
      "model_forward_time": 0.115509033203125,
      "step": 56704
    },
    {
      "epoch": 0.00034609375,
      "step": 56704,
      "training_step_time": 0.45441746711730957
    },
    {
      "epoch": 0.000346099853515625,
      "model_forward_time": 0.11542010307312012,
      "step": 56705
    },
    {
      "epoch": 0.000346099853515625,
      "step": 56705,
      "training_step_time": 0.39528679847717285
    },
    {
      "epoch": 0.00034610595703125,
      "model_forward_time": 0.11546993255615234,
      "step": 56706
    },
    {
      "epoch": 0.00034610595703125,
      "step": 56706,
      "training_step_time": 0.5799620151519775
    },
    {
      "epoch": 0.000346112060546875,
      "model_forward_time": 0.11458039283752441,
      "step": 56707
    },
    {
      "epoch": 0.000346112060546875,
      "step": 56707,
      "training_step_time": 0.44478535652160645
    },
    {
      "epoch": 0.0003461181640625,
      "model_forward_time": 0.11481308937072754,
      "step": 56708
    },
    {
      "epoch": 0.0003461181640625,
      "step": 56708,
      "training_step_time": 0.38518309593200684
    },
    {
      "epoch": 0.000346124267578125,
      "model_forward_time": 0.11473369598388672,
      "step": 56709
    },
    {
      "epoch": 0.000346124267578125,
      "step": 56709,
      "training_step_time": 0.39156651496887207
    },
    {
      "epoch": 0.00034613037109375,
      "grad_norm": 0.10188528150320053,
      "learning_rate": 8.197690506363709e-07,
      "loss": 0.0377,
      "step": 56710
    },
    {
      "epoch": 0.00034613037109375,
      "model_forward_time": 0.1150367259979248,
      "step": 56710
    },
    {
      "epoch": 0.00034613037109375,
      "step": 56710,
      "training_step_time": 0.3989269733428955
    },
    {
      "epoch": 0.000346136474609375,
      "model_forward_time": 0.11481976509094238,
      "step": 56711
    },
    {
      "epoch": 0.000346136474609375,
      "step": 56711,
      "training_step_time": 0.39694738388061523
    },
    {
      "epoch": 0.000346142578125,
      "model_forward_time": 0.11513280868530273,
      "step": 56712
    },
    {
      "epoch": 0.000346142578125,
      "step": 56712,
      "training_step_time": 0.5740518569946289
    },
    {
      "epoch": 0.000346148681640625,
      "model_forward_time": 0.11552691459655762,
      "step": 56713
    },
    {
      "epoch": 0.000346148681640625,
      "step": 56713,
      "training_step_time": 0.3965575695037842
    },
    {
      "epoch": 0.00034615478515625,
      "model_forward_time": 0.11490082740783691,
      "step": 56714
    },
    {
      "epoch": 0.00034615478515625,
      "step": 56714,
      "training_step_time": 0.3999350070953369
    },
    {
      "epoch": 0.000346160888671875,
      "model_forward_time": 0.1148233413696289,
      "step": 56715
    },
    {
      "epoch": 0.000346160888671875,
      "step": 56715,
      "training_step_time": 0.4564945697784424
    },
    {
      "epoch": 0.0003461669921875,
      "model_forward_time": 0.1147465705871582,
      "step": 56716
    },
    {
      "epoch": 0.0003461669921875,
      "step": 56716,
      "training_step_time": 0.3740675449371338
    },
    {
      "epoch": 0.000346173095703125,
      "model_forward_time": 0.11470365524291992,
      "step": 56717
    },
    {
      "epoch": 0.000346173095703125,
      "step": 56717,
      "training_step_time": 0.4096963405609131
    },
    {
      "epoch": 0.00034617919921875,
      "model_forward_time": 0.11479806900024414,
      "step": 56718
    },
    {
      "epoch": 0.00034617919921875,
      "step": 56718,
      "training_step_time": 0.5601022243499756
    },
    {
      "epoch": 0.000346185302734375,
      "model_forward_time": 0.11499309539794922,
      "step": 56719
    },
    {
      "epoch": 0.000346185302734375,
      "step": 56719,
      "training_step_time": 0.3979489803314209
    },
    {
      "epoch": 0.00034619140625,
      "grad_norm": 0.10328376293182373,
      "learning_rate": 8.148067845474838e-07,
      "loss": 0.0362,
      "step": 56720
    },
    {
      "epoch": 0.00034619140625,
      "model_forward_time": 0.11444354057312012,
      "step": 56720
    },
    {
      "epoch": 0.00034619140625,
      "step": 56720,
      "training_step_time": 0.4160339832305908
    },
    {
      "epoch": 0.000346197509765625,
      "model_forward_time": 0.11475086212158203,
      "step": 56721
    },
    {
      "epoch": 0.000346197509765625,
      "step": 56721,
      "training_step_time": 0.4837064743041992
    },
    {
      "epoch": 0.00034620361328125,
      "model_forward_time": 0.11530232429504395,
      "step": 56722
    },
    {
      "epoch": 0.00034620361328125,
      "step": 56722,
      "training_step_time": 0.40230250358581543
    },
    {
      "epoch": 0.000346209716796875,
      "model_forward_time": 0.11415266990661621,
      "step": 56723
    },
    {
      "epoch": 0.000346209716796875,
      "step": 56723,
      "training_step_time": 0.3835279941558838
    },
    {
      "epoch": 0.0003462158203125,
      "model_forward_time": 0.11630535125732422,
      "step": 56724
    },
    {
      "epoch": 0.0003462158203125,
      "step": 56724,
      "training_step_time": 0.5819127559661865
    },
    {
      "epoch": 0.000346221923828125,
      "model_forward_time": 0.11508035659790039,
      "step": 56725
    },
    {
      "epoch": 0.000346221923828125,
      "step": 56725,
      "training_step_time": 0.3863518238067627
    },
    {
      "epoch": 0.00034622802734375,
      "model_forward_time": 0.11481690406799316,
      "step": 56726
    },
    {
      "epoch": 0.00034622802734375,
      "step": 56726,
      "training_step_time": 0.397998571395874
    },
    {
      "epoch": 0.000346234130859375,
      "model_forward_time": 0.11482429504394531,
      "step": 56727
    },
    {
      "epoch": 0.000346234130859375,
      "step": 56727,
      "training_step_time": 0.39632582664489746
    },
    {
      "epoch": 0.000346240234375,
      "model_forward_time": 0.11510038375854492,
      "step": 56728
    },
    {
      "epoch": 0.000346240234375,
      "step": 56728,
      "training_step_time": 0.4095020294189453
    },
    {
      "epoch": 0.000346246337890625,
      "model_forward_time": 0.11481332778930664,
      "step": 56729
    },
    {
      "epoch": 0.000346246337890625,
      "step": 56729,
      "training_step_time": 0.4609403610229492
    },
    {
      "epoch": 0.00034625244140625,
      "grad_norm": 0.09597739577293396,
      "learning_rate": 8.098594596216424e-07,
      "loss": 0.0361,
      "step": 56730
    },
    {
      "epoch": 0.00034625244140625,
      "model_forward_time": 0.11514735221862793,
      "step": 56730
    },
    {
      "epoch": 0.00034625244140625,
      "step": 56730,
      "training_step_time": 0.5775406360626221
    },
    {
      "epoch": 0.000346258544921875,
      "model_forward_time": 0.11478257179260254,
      "step": 56731
    },
    {
      "epoch": 0.000346258544921875,
      "step": 56731,
      "training_step_time": 0.4074704647064209
    },
    {
      "epoch": 0.0003462646484375,
      "model_forward_time": 0.11440563201904297,
      "step": 56732
    },
    {
      "epoch": 0.0003462646484375,
      "step": 56732,
      "training_step_time": 0.43446803092956543
    },
    {
      "epoch": 0.000346270751953125,
      "model_forward_time": 0.11448788642883301,
      "step": 56733
    },
    {
      "epoch": 0.000346270751953125,
      "step": 56733,
      "training_step_time": 0.436901330947876
    },
    {
      "epoch": 0.00034627685546875,
      "model_forward_time": 0.11441206932067871,
      "step": 56734
    },
    {
      "epoch": 0.00034627685546875,
      "step": 56734,
      "training_step_time": 0.3934764862060547
    },
    {
      "epoch": 0.000346282958984375,
      "model_forward_time": 0.11416006088256836,
      "step": 56735
    },
    {
      "epoch": 0.000346282958984375,
      "step": 56735,
      "training_step_time": 0.43854260444641113
    },
    {
      "epoch": 0.0003462890625,
      "model_forward_time": 0.11426639556884766,
      "step": 56736
    },
    {
      "epoch": 0.0003462890625,
      "step": 56736,
      "training_step_time": 0.533149242401123
    },
    {
      "epoch": 0.000346295166015625,
      "model_forward_time": 0.11507868766784668,
      "step": 56737
    },
    {
      "epoch": 0.000346295166015625,
      "step": 56737,
      "training_step_time": 0.40624380111694336
    },
    {
      "epoch": 0.00034630126953125,
      "model_forward_time": 0.11457943916320801,
      "step": 56738
    },
    {
      "epoch": 0.00034630126953125,
      "step": 56738,
      "training_step_time": 0.3996727466583252
    },
    {
      "epoch": 0.000346307373046875,
      "model_forward_time": 0.1145169734954834,
      "step": 56739
    },
    {
      "epoch": 0.000346307373046875,
      "step": 56739,
      "training_step_time": 0.4043252468109131
    },
    {
      "epoch": 0.0003463134765625,
      "grad_norm": 0.08229923993349075,
      "learning_rate": 8.049270773617057e-07,
      "loss": 0.0312,
      "step": 56740
    },
    {
      "epoch": 0.0003463134765625,
      "model_forward_time": 0.11429286003112793,
      "step": 56740
    },
    {
      "epoch": 0.0003463134765625,
      "step": 56740,
      "training_step_time": 0.38695788383483887
    },
    {
      "epoch": 0.000346319580078125,
      "model_forward_time": 0.11484313011169434,
      "step": 56741
    },
    {
      "epoch": 0.000346319580078125,
      "step": 56741,
      "training_step_time": 0.4030873775482178
    },
    {
      "epoch": 0.00034632568359375,
      "model_forward_time": 0.1146090030670166,
      "step": 56742
    },
    {
      "epoch": 0.00034632568359375,
      "step": 56742,
      "training_step_time": 0.7111895084381104
    },
    {
      "epoch": 0.000346331787109375,
      "model_forward_time": 0.1146078109741211,
      "step": 56743
    },
    {
      "epoch": 0.000346331787109375,
      "step": 56743,
      "training_step_time": 0.43536972999572754
    },
    {
      "epoch": 0.000346337890625,
      "model_forward_time": 0.11490821838378906,
      "step": 56744
    },
    {
      "epoch": 0.000346337890625,
      "step": 56744,
      "training_step_time": 0.39951539039611816
    },
    {
      "epoch": 0.000346343994140625,
      "model_forward_time": 0.11513280868530273,
      "step": 56745
    },
    {
      "epoch": 0.000346343994140625,
      "step": 56745,
      "training_step_time": 0.4802851676940918
    },
    {
      "epoch": 0.00034635009765625,
      "model_forward_time": 0.11459589004516602,
      "step": 56746
    },
    {
      "epoch": 0.00034635009765625,
      "step": 56746,
      "training_step_time": 0.40940141677856445
    },
    {
      "epoch": 0.000346356201171875,
      "model_forward_time": 0.1147007942199707,
      "step": 56747
    },
    {
      "epoch": 0.000346356201171875,
      "step": 56747,
      "training_step_time": 0.4700469970703125
    },
    {
      "epoch": 0.0003463623046875,
      "model_forward_time": 0.11442112922668457,
      "step": 56748
    },
    {
      "epoch": 0.0003463623046875,
      "step": 56748,
      "training_step_time": 0.5034716129302979
    },
    {
      "epoch": 0.000346368408203125,
      "model_forward_time": 0.11455082893371582,
      "step": 56749
    },
    {
      "epoch": 0.000346368408203125,
      "step": 56749,
      "training_step_time": 0.4004485607147217
    },
    {
      "epoch": 0.00034637451171875,
      "grad_norm": 0.10166660696268082,
      "learning_rate": 8.000096392660029e-07,
      "loss": 0.0349,
      "step": 56750
    },
    {
      "epoch": 0.00034637451171875,
      "model_forward_time": 0.11462163925170898,
      "step": 56750
    },
    {
      "epoch": 0.00034637451171875,
      "step": 56750,
      "training_step_time": 0.4000740051269531
    },
    {
      "epoch": 0.000346380615234375,
      "model_forward_time": 0.11442995071411133,
      "step": 56751
    },
    {
      "epoch": 0.000346380615234375,
      "step": 56751,
      "training_step_time": 0.393618106842041
    },
    {
      "epoch": 0.00034638671875,
      "model_forward_time": 0.11461091041564941,
      "step": 56752
    },
    {
      "epoch": 0.00034638671875,
      "step": 56752,
      "training_step_time": 0.3909637928009033
    },
    {
      "epoch": 0.000346392822265625,
      "model_forward_time": 0.11536550521850586,
      "step": 56753
    },
    {
      "epoch": 0.000346392822265625,
      "step": 56753,
      "training_step_time": 0.401461124420166
    },
    {
      "epoch": 0.00034639892578125,
      "model_forward_time": 0.11460518836975098,
      "step": 56754
    },
    {
      "epoch": 0.00034639892578125,
      "step": 56754,
      "training_step_time": 0.6096251010894775
    },
    {
      "epoch": 0.000346405029296875,
      "model_forward_time": 0.11493206024169922,
      "step": 56755
    },
    {
      "epoch": 0.000346405029296875,
      "step": 56755,
      "training_step_time": 0.4078369140625
    },
    {
      "epoch": 0.0003464111328125,
      "model_forward_time": 0.11450695991516113,
      "step": 56756
    },
    {
      "epoch": 0.0003464111328125,
      "step": 56756,
      "training_step_time": 0.452639102935791
    },
    {
      "epoch": 0.000346417236328125,
      "model_forward_time": 0.11495137214660645,
      "step": 56757
    },
    {
      "epoch": 0.000346417236328125,
      "step": 56757,
      "training_step_time": 0.47705817222595215
    },
    {
      "epoch": 0.00034642333984375,
      "model_forward_time": 0.11409211158752441,
      "step": 56758
    },
    {
      "epoch": 0.00034642333984375,
      "step": 56758,
      "training_step_time": 0.4504971504211426
    },
    {
      "epoch": 0.000346429443359375,
      "model_forward_time": 0.11449623107910156,
      "step": 56759
    },
    {
      "epoch": 0.000346429443359375,
      "step": 56759,
      "training_step_time": 0.4793553352355957
    },
    {
      "epoch": 0.000346435546875,
      "grad_norm": 0.09008081257343292,
      "learning_rate": 7.951071468283167e-07,
      "loss": 0.0363,
      "step": 56760
    },
    {
      "epoch": 0.000346435546875,
      "model_forward_time": 0.11492490768432617,
      "step": 56760
    },
    {
      "epoch": 0.000346435546875,
      "step": 56760,
      "training_step_time": 0.39826202392578125
    },
    {
      "epoch": 0.000346441650390625,
      "model_forward_time": 0.11413884162902832,
      "step": 56761
    },
    {
      "epoch": 0.000346441650390625,
      "step": 56761,
      "training_step_time": 0.4057936668395996
    },
    {
      "epoch": 0.00034644775390625,
      "model_forward_time": 0.11451077461242676,
      "step": 56762
    },
    {
      "epoch": 0.00034644775390625,
      "step": 56762,
      "training_step_time": 0.4955258369445801
    },
    {
      "epoch": 0.000346453857421875,
      "model_forward_time": 0.11469697952270508,
      "step": 56763
    },
    {
      "epoch": 0.000346453857421875,
      "step": 56763,
      "training_step_time": 0.393817663192749
    },
    {
      "epoch": 0.0003464599609375,
      "model_forward_time": 0.11444568634033203,
      "step": 56764
    },
    {
      "epoch": 0.0003464599609375,
      "step": 56764,
      "training_step_time": 0.4041421413421631
    },
    {
      "epoch": 0.000346466064453125,
      "model_forward_time": 0.11496186256408691,
      "step": 56765
    },
    {
      "epoch": 0.000346466064453125,
      "step": 56765,
      "training_step_time": 0.3829007148742676
    },
    {
      "epoch": 0.00034647216796875,
      "model_forward_time": 0.11457467079162598,
      "step": 56766
    },
    {
      "epoch": 0.00034647216796875,
      "step": 56766,
      "training_step_time": 0.43230271339416504
    },
    {
      "epoch": 0.000346478271484375,
      "model_forward_time": 0.11480832099914551,
      "step": 56767
    },
    {
      "epoch": 0.000346478271484375,
      "step": 56767,
      "training_step_time": 0.3992741107940674
    },
    {
      "epoch": 0.000346484375,
      "model_forward_time": 0.11458396911621094,
      "step": 56768
    },
    {
      "epoch": 0.000346484375,
      "step": 56768,
      "training_step_time": 0.4115891456604004
    },
    {
      "epoch": 0.000346490478515625,
      "model_forward_time": 0.114898681640625,
      "step": 56769
    },
    {
      "epoch": 0.000346490478515625,
      "step": 56769,
      "training_step_time": 0.4793260097503662
    },
    {
      "epoch": 0.00034649658203125,
      "grad_norm": 0.07777867466211319,
      "learning_rate": 7.90219601537906e-07,
      "loss": 0.0381,
      "step": 56770
    },
    {
      "epoch": 0.00034649658203125,
      "model_forward_time": 0.1148521900177002,
      "step": 56770
    },
    {
      "epoch": 0.00034649658203125,
      "step": 56770,
      "training_step_time": 0.4172828197479248
    },
    {
      "epoch": 0.000346502685546875,
      "model_forward_time": 0.11510944366455078,
      "step": 56771
    },
    {
      "epoch": 0.000346502685546875,
      "step": 56771,
      "training_step_time": 0.42430925369262695
    },
    {
      "epoch": 0.0003465087890625,
      "model_forward_time": 0.11478734016418457,
      "step": 56772
    },
    {
      "epoch": 0.0003465087890625,
      "step": 56772,
      "training_step_time": 0.4640476703643799
    },
    {
      "epoch": 0.000346514892578125,
      "model_forward_time": 0.1145329475402832,
      "step": 56773
    },
    {
      "epoch": 0.000346514892578125,
      "step": 56773,
      "training_step_time": 0.46256566047668457
    },
    {
      "epoch": 0.00034652099609375,
      "model_forward_time": 0.11530375480651855,
      "step": 56774
    },
    {
      "epoch": 0.00034652099609375,
      "step": 56774,
      "training_step_time": 0.4230496883392334
    },
    {
      "epoch": 0.000346527099609375,
      "model_forward_time": 0.1147918701171875,
      "step": 56775
    },
    {
      "epoch": 0.000346527099609375,
      "step": 56775,
      "training_step_time": 0.4396936893463135
    },
    {
      "epoch": 0.000346533203125,
      "model_forward_time": 0.11465597152709961,
      "step": 56776
    },
    {
      "epoch": 0.000346533203125,
      "step": 56776,
      "training_step_time": 0.4850912094116211
    },
    {
      "epoch": 0.000346539306640625,
      "model_forward_time": 0.11473560333251953,
      "step": 56777
    },
    {
      "epoch": 0.000346539306640625,
      "step": 56777,
      "training_step_time": 0.3931615352630615
    },
    {
      "epoch": 0.00034654541015625,
      "model_forward_time": 0.11447358131408691,
      "step": 56778
    },
    {
      "epoch": 0.00034654541015625,
      "step": 56778,
      "training_step_time": 0.40724754333496094
    },
    {
      "epoch": 0.000346551513671875,
      "model_forward_time": 0.11456823348999023,
      "step": 56779
    },
    {
      "epoch": 0.000346551513671875,
      "step": 56779,
      "training_step_time": 0.3989887237548828
    },
    {
      "epoch": 0.0003465576171875,
      "grad_norm": 0.10353805869817734,
      "learning_rate": 7.853470048794664e-07,
      "loss": 0.0359,
      "step": 56780
    },
    {
      "epoch": 0.0003465576171875,
      "model_forward_time": 0.11448478698730469,
      "step": 56780
    },
    {
      "epoch": 0.0003465576171875,
      "step": 56780,
      "training_step_time": 0.39197516441345215
    },
    {
      "epoch": 0.000346563720703125,
      "model_forward_time": 0.11491775512695312,
      "step": 56781
    },
    {
      "epoch": 0.000346563720703125,
      "step": 56781,
      "training_step_time": 0.3981297016143799
    },
    {
      "epoch": 0.00034656982421875,
      "model_forward_time": 0.11583113670349121,
      "step": 56782
    },
    {
      "epoch": 0.00034656982421875,
      "step": 56782,
      "training_step_time": 0.40277791023254395
    },
    {
      "epoch": 0.000346575927734375,
      "model_forward_time": 0.11499905586242676,
      "step": 56783
    },
    {
      "epoch": 0.000346575927734375,
      "step": 56783,
      "training_step_time": 0.4026470184326172
    },
    {
      "epoch": 0.00034658203125,
      "model_forward_time": 0.11524772644042969,
      "step": 56784
    },
    {
      "epoch": 0.00034658203125,
      "step": 56784,
      "training_step_time": 0.6310327053070068
    },
    {
      "epoch": 0.000346588134765625,
      "model_forward_time": 0.11575818061828613,
      "step": 56785
    },
    {
      "epoch": 0.000346588134765625,
      "step": 56785,
      "training_step_time": 0.40630483627319336
    },
    {
      "epoch": 0.00034659423828125,
      "model_forward_time": 0.11560249328613281,
      "step": 56786
    },
    {
      "epoch": 0.00034659423828125,
      "step": 56786,
      "training_step_time": 0.45744848251342773
    },
    {
      "epoch": 0.000346600341796875,
      "model_forward_time": 0.115081787109375,
      "step": 56787
    },
    {
      "epoch": 0.000346600341796875,
      "step": 56787,
      "training_step_time": 0.43081116676330566
    },
    {
      "epoch": 0.0003466064453125,
      "model_forward_time": 0.11459708213806152,
      "step": 56788
    },
    {
      "epoch": 0.0003466064453125,
      "step": 56788,
      "training_step_time": 0.41088342666625977
    },
    {
      "epoch": 0.000346612548828125,
      "model_forward_time": 0.1149744987487793,
      "step": 56789
    },
    {
      "epoch": 0.000346612548828125,
      "step": 56789,
      "training_step_time": 0.49673986434936523
    },
    {
      "epoch": 0.00034661865234375,
      "grad_norm": 0.07671986520290375,
      "learning_rate": 7.804893583331696e-07,
      "loss": 0.0326,
      "step": 56790
    },
    {
      "epoch": 0.00034661865234375,
      "model_forward_time": 0.11529850959777832,
      "step": 56790
    },
    {
      "epoch": 0.00034661865234375,
      "step": 56790,
      "training_step_time": 0.404094934463501
    },
    {
      "epoch": 0.000346624755859375,
      "model_forward_time": 0.11478853225708008,
      "step": 56791
    },
    {
      "epoch": 0.000346624755859375,
      "step": 56791,
      "training_step_time": 0.39906764030456543
    },
    {
      "epoch": 0.000346630859375,
      "model_forward_time": 0.11522221565246582,
      "step": 56792
    },
    {
      "epoch": 0.000346630859375,
      "step": 56792,
      "training_step_time": 0.39261722564697266
    },
    {
      "epoch": 0.000346636962890625,
      "model_forward_time": 0.1148672103881836,
      "step": 56793
    },
    {
      "epoch": 0.000346636962890625,
      "step": 56793,
      "training_step_time": 0.39580249786376953
    },
    {
      "epoch": 0.00034664306640625,
      "model_forward_time": 0.11509442329406738,
      "step": 56794
    },
    {
      "epoch": 0.00034664306640625,
      "step": 56794,
      "training_step_time": 0.3966848850250244
    },
    {
      "epoch": 0.000346649169921875,
      "model_forward_time": 0.11462974548339844,
      "step": 56795
    },
    {
      "epoch": 0.000346649169921875,
      "step": 56795,
      "training_step_time": 0.41747093200683594
    },
    {
      "epoch": 0.0003466552734375,
      "model_forward_time": 0.11555600166320801,
      "step": 56796
    },
    {
      "epoch": 0.0003466552734375,
      "step": 56796,
      "training_step_time": 0.5831491947174072
    },
    {
      "epoch": 0.000346661376953125,
      "model_forward_time": 0.11434173583984375,
      "step": 56797
    },
    {
      "epoch": 0.000346661376953125,
      "step": 56797,
      "training_step_time": 0.3778054714202881
    },
    {
      "epoch": 0.00034666748046875,
      "model_forward_time": 0.11543798446655273,
      "step": 56798
    },
    {
      "epoch": 0.00034666748046875,
      "step": 56798,
      "training_step_time": 0.4139266014099121
    },
    {
      "epoch": 0.000346673583984375,
      "model_forward_time": 0.11496853828430176,
      "step": 56799
    },
    {
      "epoch": 0.000346673583984375,
      "step": 56799,
      "training_step_time": 0.3882899284362793
    },
    {
      "epoch": 0.0003466796875,
      "grad_norm": 0.12046927958726883,
      "learning_rate": 7.756466633746407e-07,
      "loss": 0.033,
      "step": 56800
    },
    {
      "epoch": 0.0003466796875,
      "model_forward_time": 0.1150510311126709,
      "step": 56800
    },
    {
      "epoch": 0.0003466796875,
      "step": 56800,
      "training_step_time": 0.46048641204833984
    },
    {
      "epoch": 0.000346685791015625,
      "model_forward_time": 0.1154775619506836,
      "step": 56801
    },
    {
      "epoch": 0.000346685791015625,
      "step": 56801,
      "training_step_time": 0.38356828689575195
    },
    {
      "epoch": 0.00034669189453125,
      "model_forward_time": 0.11476874351501465,
      "step": 56802
    },
    {
      "epoch": 0.00034669189453125,
      "step": 56802,
      "training_step_time": 0.7068443298339844
    },
    {
      "epoch": 0.000346697998046875,
      "model_forward_time": 0.1143953800201416,
      "step": 56803
    },
    {
      "epoch": 0.000346697998046875,
      "step": 56803,
      "training_step_time": 0.43952155113220215
    },
    {
      "epoch": 0.0003467041015625,
      "model_forward_time": 0.11443519592285156,
      "step": 56804
    },
    {
      "epoch": 0.0003467041015625,
      "step": 56804,
      "training_step_time": 0.43656301498413086
    },
    {
      "epoch": 0.000346710205078125,
      "model_forward_time": 0.11423921585083008,
      "step": 56805
    },
    {
      "epoch": 0.000346710205078125,
      "step": 56805,
      "training_step_time": 0.397869348526001
    },
    {
      "epoch": 0.00034671630859375,
      "model_forward_time": 0.11385750770568848,
      "step": 56806
    },
    {
      "epoch": 0.00034671630859375,
      "step": 56806,
      "training_step_time": 0.39085912704467773
    },
    {
      "epoch": 0.000346722412109375,
      "model_forward_time": 0.11422061920166016,
      "step": 56807
    },
    {
      "epoch": 0.000346722412109375,
      "step": 56807,
      "training_step_time": 0.38783812522888184
    },
    {
      "epoch": 0.000346728515625,
      "model_forward_time": 0.1148533821105957,
      "step": 56808
    },
    {
      "epoch": 0.000346728515625,
      "step": 56808,
      "training_step_time": 0.4153625965118408
    },
    {
      "epoch": 0.000346734619140625,
      "model_forward_time": 0.11500692367553711,
      "step": 56809
    },
    {
      "epoch": 0.000346734619140625,
      "step": 56809,
      "training_step_time": 0.4008457660675049
    },
    {
      "epoch": 0.00034674072265625,
      "grad_norm": 0.09775466471910477,
      "learning_rate": 7.708189214749639e-07,
      "loss": 0.035,
      "step": 56810
    },
    {
      "epoch": 0.00034674072265625,
      "model_forward_time": 0.11527180671691895,
      "step": 56810
    },
    {
      "epoch": 0.00034674072265625,
      "step": 56810,
      "training_step_time": 0.4119760990142822
    },
    {
      "epoch": 0.000346746826171875,
      "model_forward_time": 0.11488199234008789,
      "step": 56811
    },
    {
      "epoch": 0.000346746826171875,
      "step": 56811,
      "training_step_time": 0.4117393493652344
    },
    {
      "epoch": 0.0003467529296875,
      "model_forward_time": 0.11499214172363281,
      "step": 56812
    },
    {
      "epoch": 0.0003467529296875,
      "step": 56812,
      "training_step_time": 0.3984658718109131
    },
    {
      "epoch": 0.000346759033203125,
      "model_forward_time": 0.11469411849975586,
      "step": 56813
    },
    {
      "epoch": 0.000346759033203125,
      "step": 56813,
      "training_step_time": 0.4117250442504883
    },
    {
      "epoch": 0.00034676513671875,
      "model_forward_time": 0.11455059051513672,
      "step": 56814
    },
    {
      "epoch": 0.00034676513671875,
      "step": 56814,
      "training_step_time": 0.5850179195404053
    },
    {
      "epoch": 0.000346771240234375,
      "model_forward_time": 0.11462020874023438,
      "step": 56815
    },
    {
      "epoch": 0.000346771240234375,
      "step": 56815,
      "training_step_time": 0.40787315368652344
    },
    {
      "epoch": 0.00034677734375,
      "model_forward_time": 0.11498594284057617,
      "step": 56816
    },
    {
      "epoch": 0.00034677734375,
      "step": 56816,
      "training_step_time": 0.42020106315612793
    },
    {
      "epoch": 0.000346783447265625,
      "model_forward_time": 0.11502218246459961,
      "step": 56817
    },
    {
      "epoch": 0.000346783447265625,
      "step": 56817,
      "training_step_time": 0.4172070026397705
    },
    {
      "epoch": 0.00034678955078125,
      "model_forward_time": 0.1145482063293457,
      "step": 56818
    },
    {
      "epoch": 0.00034678955078125,
      "step": 56818,
      "training_step_time": 0.42496728897094727
    },
    {
      "epoch": 0.000346795654296875,
      "model_forward_time": 0.11424994468688965,
      "step": 56819
    },
    {
      "epoch": 0.000346795654296875,
      "step": 56819,
      "training_step_time": 0.40486884117126465
    },
    {
      "epoch": 0.0003468017578125,
      "grad_norm": 0.0785447433590889,
      "learning_rate": 7.66006134100672e-07,
      "loss": 0.0345,
      "step": 56820
    },
    {
      "epoch": 0.0003468017578125,
      "model_forward_time": 0.11492466926574707,
      "step": 56820
    },
    {
      "epoch": 0.0003468017578125,
      "step": 56820,
      "training_step_time": 0.5222592353820801
    },
    {
      "epoch": 0.000346807861328125,
      "model_forward_time": 0.11415266990661621,
      "step": 56821
    },
    {
      "epoch": 0.000346807861328125,
      "step": 56821,
      "training_step_time": 0.44095540046691895
    },
    {
      "epoch": 0.00034681396484375,
      "model_forward_time": 0.11453747749328613,
      "step": 56822
    },
    {
      "epoch": 0.00034681396484375,
      "step": 56822,
      "training_step_time": 0.4164893627166748
    },
    {
      "epoch": 0.000346820068359375,
      "model_forward_time": 0.11410093307495117,
      "step": 56823
    },
    {
      "epoch": 0.000346820068359375,
      "step": 56823,
      "training_step_time": 0.3950350284576416
    },
    {
      "epoch": 0.000346826171875,
      "model_forward_time": 0.11490654945373535,
      "step": 56824
    },
    {
      "epoch": 0.000346826171875,
      "step": 56824,
      "training_step_time": 0.38496899604797363
    },
    {
      "epoch": 0.000346832275390625,
      "model_forward_time": 0.11553645133972168,
      "step": 56825
    },
    {
      "epoch": 0.000346832275390625,
      "step": 56825,
      "training_step_time": 0.40451622009277344
    },
    {
      "epoch": 0.00034683837890625,
      "model_forward_time": 0.1146397590637207,
      "step": 56826
    },
    {
      "epoch": 0.00034683837890625,
      "step": 56826,
      "training_step_time": 0.6061322689056396
    },
    {
      "epoch": 0.000346844482421875,
      "model_forward_time": 0.11478662490844727,
      "step": 56827
    },
    {
      "epoch": 0.000346844482421875,
      "step": 56827,
      "training_step_time": 0.39513540267944336
    },
    {
      "epoch": 0.0003468505859375,
      "model_forward_time": 0.11512327194213867,
      "step": 56828
    },
    {
      "epoch": 0.0003468505859375,
      "step": 56828,
      "training_step_time": 0.467975378036499
    },
    {
      "epoch": 0.000346856689453125,
      "model_forward_time": 0.115142822265625,
      "step": 56829
    },
    {
      "epoch": 0.000346856689453125,
      "step": 56829,
      "training_step_time": 0.42128515243530273
    },
    {
      "epoch": 0.00034686279296875,
      "grad_norm": 0.07626574486494064,
      "learning_rate": 7.612083027137728e-07,
      "loss": 0.0301,
      "step": 56830
    },
    {
      "epoch": 0.00034686279296875,
      "model_forward_time": 0.1146392822265625,
      "step": 56830
    },
    {
      "epoch": 0.00034686279296875,
      "step": 56830,
      "training_step_time": 0.4730994701385498
    },
    {
      "epoch": 0.000346868896484375,
      "model_forward_time": 0.1141510009765625,
      "step": 56831
    },
    {
      "epoch": 0.000346868896484375,
      "step": 56831,
      "training_step_time": 0.3915259838104248
    },
    {
      "epoch": 0.000346875,
      "model_forward_time": 0.11562156677246094,
      "step": 56832
    },
    {
      "epoch": 0.000346875,
      "step": 56832,
      "training_step_time": 0.4716322422027588
    },
    {
      "epoch": 0.000346881103515625,
      "model_forward_time": 0.1141047477722168,
      "step": 56833
    },
    {
      "epoch": 0.000346881103515625,
      "step": 56833,
      "training_step_time": 0.3791370391845703
    },
    {
      "epoch": 0.00034688720703125,
      "model_forward_time": 0.11495780944824219,
      "step": 56834
    },
    {
      "epoch": 0.00034688720703125,
      "step": 56834,
      "training_step_time": 0.3978312015533447
    },
    {
      "epoch": 0.000346893310546875,
      "model_forward_time": 0.11477088928222656,
      "step": 56835
    },
    {
      "epoch": 0.000346893310546875,
      "step": 56835,
      "training_step_time": 0.4673330783843994
    },
    {
      "epoch": 0.0003468994140625,
      "model_forward_time": 0.11482524871826172,
      "step": 56836
    },
    {
      "epoch": 0.0003468994140625,
      "step": 56836,
      "training_step_time": 0.4587128162384033
    },
    {
      "epoch": 0.000346905517578125,
      "model_forward_time": 0.11481952667236328,
      "step": 56837
    },
    {
      "epoch": 0.000346905517578125,
      "step": 56837,
      "training_step_time": 0.3961160182952881
    },
    {
      "epoch": 0.00034691162109375,
      "model_forward_time": 0.11511826515197754,
      "step": 56838
    },
    {
      "epoch": 0.00034691162109375,
      "step": 56838,
      "training_step_time": 0.3960742950439453
    },
    {
      "epoch": 0.000346917724609375,
      "model_forward_time": 0.11506009101867676,
      "step": 56839
    },
    {
      "epoch": 0.000346917724609375,
      "step": 56839,
      "training_step_time": 0.3833591938018799
    },
    {
      "epoch": 0.000346923828125,
      "grad_norm": 0.10892035812139511,
      "learning_rate": 7.564254287717176e-07,
      "loss": 0.0339,
      "step": 56840
    },
    {
      "epoch": 0.000346923828125,
      "model_forward_time": 0.11576342582702637,
      "step": 56840
    },
    {
      "epoch": 0.000346923828125,
      "step": 56840,
      "training_step_time": 0.40453171730041504
    },
    {
      "epoch": 0.000346929931640625,
      "model_forward_time": 0.11510944366455078,
      "step": 56841
    },
    {
      "epoch": 0.000346929931640625,
      "step": 56841,
      "training_step_time": 0.40813732147216797
    },
    {
      "epoch": 0.00034693603515625,
      "model_forward_time": 0.11536073684692383,
      "step": 56842
    },
    {
      "epoch": 0.00034693603515625,
      "step": 56842,
      "training_step_time": 0.3659706115722656
    },
    {
      "epoch": 0.000346942138671875,
      "model_forward_time": 0.11519408226013184,
      "step": 56843
    },
    {
      "epoch": 0.000346942138671875,
      "step": 56843,
      "training_step_time": 0.44319820404052734
    },
    {
      "epoch": 0.0003469482421875,
      "model_forward_time": 0.11478972434997559,
      "step": 56844
    },
    {
      "epoch": 0.0003469482421875,
      "step": 56844,
      "training_step_time": 0.6939163208007812
    },
    {
      "epoch": 0.000346954345703125,
      "model_forward_time": 0.11416888236999512,
      "step": 56845
    },
    {
      "epoch": 0.000346954345703125,
      "step": 56845,
      "training_step_time": 0.3957514762878418
    },
    {
      "epoch": 0.00034696044921875,
      "model_forward_time": 0.11419320106506348,
      "step": 56846
    },
    {
      "epoch": 0.00034696044921875,
      "step": 56846,
      "training_step_time": 0.386798620223999
    },
    {
      "epoch": 0.000346966552734375,
      "model_forward_time": 0.11493229866027832,
      "step": 56847
    },
    {
      "epoch": 0.000346966552734375,
      "step": 56847,
      "training_step_time": 0.3978099822998047
    },
    {
      "epoch": 0.00034697265625,
      "model_forward_time": 0.11449718475341797,
      "step": 56848
    },
    {
      "epoch": 0.00034697265625,
      "step": 56848,
      "training_step_time": 0.3940720558166504
    },
    {
      "epoch": 0.000346978759765625,
      "model_forward_time": 0.11439871788024902,
      "step": 56849
    },
    {
      "epoch": 0.000346978759765625,
      "step": 56849,
      "training_step_time": 0.4047818183898926
    },
    {
      "epoch": 0.00034698486328125,
      "grad_norm": 0.09928825497627258,
      "learning_rate": 7.516575137274162e-07,
      "loss": 0.0346,
      "step": 56850
    },
    {
      "epoch": 0.00034698486328125,
      "model_forward_time": 0.1161186695098877,
      "step": 56850
    },
    {
      "epoch": 0.00034698486328125,
      "step": 56850,
      "training_step_time": 0.6285288333892822
    },
    {
      "epoch": 0.000346990966796875,
      "model_forward_time": 0.11529946327209473,
      "step": 56851
    },
    {
      "epoch": 0.000346990966796875,
      "step": 56851,
      "training_step_time": 0.40960025787353516
    },
    {
      "epoch": 0.0003469970703125,
      "model_forward_time": 0.11378788948059082,
      "step": 56852
    },
    {
      "epoch": 0.0003469970703125,
      "step": 56852,
      "training_step_time": 0.3826751708984375
    },
    {
      "epoch": 0.000347003173828125,
      "model_forward_time": 0.11465573310852051,
      "step": 56853
    },
    {
      "epoch": 0.000347003173828125,
      "step": 56853,
      "training_step_time": 0.40334486961364746
    },
    {
      "epoch": 0.00034700927734375,
      "model_forward_time": 0.11516499519348145,
      "step": 56854
    },
    {
      "epoch": 0.00034700927734375,
      "step": 56854,
      "training_step_time": 0.3899803161621094
    },
    {
      "epoch": 0.000347015380859375,
      "model_forward_time": 0.11484074592590332,
      "step": 56855
    },
    {
      "epoch": 0.000347015380859375,
      "step": 56855,
      "training_step_time": 0.40836095809936523
    },
    {
      "epoch": 0.000347021484375,
      "model_forward_time": 0.11498355865478516,
      "step": 56856
    },
    {
      "epoch": 0.000347021484375,
      "step": 56856,
      "training_step_time": 0.6306219100952148
    },
    {
      "epoch": 0.000347027587890625,
      "model_forward_time": 0.11476874351501465,
      "step": 56857
    },
    {
      "epoch": 0.000347027587890625,
      "step": 56857,
      "training_step_time": 0.43633127212524414
    },
    {
      "epoch": 0.00034703369140625,
      "model_forward_time": 0.11495637893676758,
      "step": 56858
    },
    {
      "epoch": 0.00034703369140625,
      "step": 56858,
      "training_step_time": 0.44123053550720215
    },
    {
      "epoch": 0.000347039794921875,
      "model_forward_time": 0.11445164680480957,
      "step": 56859
    },
    {
      "epoch": 0.000347039794921875,
      "step": 56859,
      "training_step_time": 0.3886289596557617
    },
    {
      "epoch": 0.0003470458984375,
      "grad_norm": 0.05962057411670685,
      "learning_rate": 7.469045590292323e-07,
      "loss": 0.0334,
      "step": 56860
    },
    {
      "epoch": 0.0003470458984375,
      "model_forward_time": 0.11431694030761719,
      "step": 56860
    },
    {
      "epoch": 0.0003470458984375,
      "step": 56860,
      "training_step_time": 0.39720797538757324
    },
    {
      "epoch": 0.000347052001953125,
      "model_forward_time": 0.11478304862976074,
      "step": 56861
    },
    {
      "epoch": 0.000347052001953125,
      "step": 56861,
      "training_step_time": 0.41278910636901855
    },
    {
      "epoch": 0.00034705810546875,
      "model_forward_time": 0.11513447761535645,
      "step": 56862
    },
    {
      "epoch": 0.00034705810546875,
      "step": 56862,
      "training_step_time": 0.5870640277862549
    },
    {
      "epoch": 0.000347064208984375,
      "model_forward_time": 0.11582732200622559,
      "step": 56863
    },
    {
      "epoch": 0.000347064208984375,
      "step": 56863,
      "training_step_time": 0.40662503242492676
    },
    {
      "epoch": 0.0003470703125,
      "model_forward_time": 0.11501169204711914,
      "step": 56864
    },
    {
      "epoch": 0.0003470703125,
      "step": 56864,
      "training_step_time": 0.38734865188598633
    },
    {
      "epoch": 0.000347076416015625,
      "model_forward_time": 0.11503458023071289,
      "step": 56865
    },
    {
      "epoch": 0.000347076416015625,
      "step": 56865,
      "training_step_time": 0.3967273235321045
    },
    {
      "epoch": 0.00034708251953125,
      "model_forward_time": 0.11461067199707031,
      "step": 56866
    },
    {
      "epoch": 0.00034708251953125,
      "step": 56866,
      "training_step_time": 0.40300488471984863
    },
    {
      "epoch": 0.000347088623046875,
      "model_forward_time": 0.1152946949005127,
      "step": 56867
    },
    {
      "epoch": 0.000347088623046875,
      "step": 56867,
      "training_step_time": 0.3999772071838379
    },
    {
      "epoch": 0.0003470947265625,
      "model_forward_time": 0.11476683616638184,
      "step": 56868
    },
    {
      "epoch": 0.0003470947265625,
      "step": 56868,
      "training_step_time": 0.7395741939544678
    },
    {
      "epoch": 0.000347100830078125,
      "model_forward_time": 0.11537551879882812,
      "step": 56869
    },
    {
      "epoch": 0.000347100830078125,
      "step": 56869,
      "training_step_time": 0.39607715606689453
    },
    {
      "epoch": 0.00034710693359375,
      "grad_norm": 0.09276147931814194,
      "learning_rate": 7.421665661209887e-07,
      "loss": 0.0302,
      "step": 56870
    },
    {
      "epoch": 0.00034710693359375,
      "model_forward_time": 0.11450600624084473,
      "step": 56870
    },
    {
      "epoch": 0.00034710693359375,
      "step": 56870,
      "training_step_time": 0.47867774963378906
    },
    {
      "epoch": 0.000347113037109375,
      "model_forward_time": 0.11404705047607422,
      "step": 56871
    },
    {
      "epoch": 0.000347113037109375,
      "step": 56871,
      "training_step_time": 0.4889185428619385
    },
    {
      "epoch": 0.000347119140625,
      "model_forward_time": 0.1139531135559082,
      "step": 56872
    },
    {
      "epoch": 0.000347119140625,
      "step": 56872,
      "training_step_time": 0.39207935333251953
    },
    {
      "epoch": 0.000347125244140625,
      "model_forward_time": 0.11392068862915039,
      "step": 56873
    },
    {
      "epoch": 0.000347125244140625,
      "step": 56873,
      "training_step_time": 0.422149658203125
    },
    {
      "epoch": 0.00034713134765625,
      "model_forward_time": 0.11485767364501953,
      "step": 56874
    },
    {
      "epoch": 0.00034713134765625,
      "step": 56874,
      "training_step_time": 0.5160188674926758
    },
    {
      "epoch": 0.000347137451171875,
      "model_forward_time": 0.11459112167358398,
      "step": 56875
    },
    {
      "epoch": 0.000347137451171875,
      "step": 56875,
      "training_step_time": 0.42246437072753906
    },
    {
      "epoch": 0.0003471435546875,
      "model_forward_time": 0.1147303581237793,
      "step": 56876
    },
    {
      "epoch": 0.0003471435546875,
      "step": 56876,
      "training_step_time": 0.398268461227417
    },
    {
      "epoch": 0.000347149658203125,
      "model_forward_time": 0.1145784854888916,
      "step": 56877
    },
    {
      "epoch": 0.000347149658203125,
      "step": 56877,
      "training_step_time": 0.3939237594604492
    },
    {
      "epoch": 0.00034715576171875,
      "model_forward_time": 0.11480474472045898,
      "step": 56878
    },
    {
      "epoch": 0.00034715576171875,
      "step": 56878,
      "training_step_time": 0.39418530464172363
    },
    {
      "epoch": 0.000347161865234375,
      "model_forward_time": 0.1144571304321289,
      "step": 56879
    },
    {
      "epoch": 0.000347161865234375,
      "step": 56879,
      "training_step_time": 0.39741063117980957
    },
    {
      "epoch": 0.00034716796875,
      "grad_norm": 0.09543479979038239,
      "learning_rate": 7.374435364419674e-07,
      "loss": 0.0374,
      "step": 56880
    },
    {
      "epoch": 0.00034716796875,
      "model_forward_time": 0.11484026908874512,
      "step": 56880
    },
    {
      "epoch": 0.00034716796875,
      "step": 56880,
      "training_step_time": 0.5533721446990967
    },
    {
      "epoch": 0.000347174072265625,
      "model_forward_time": 0.11512875556945801,
      "step": 56881
    },
    {
      "epoch": 0.000347174072265625,
      "step": 56881,
      "training_step_time": 0.39437198638916016
    },
    {
      "epoch": 0.00034718017578125,
      "model_forward_time": 0.11476588249206543,
      "step": 56882
    },
    {
      "epoch": 0.00034718017578125,
      "step": 56882,
      "training_step_time": 0.39784741401672363
    },
    {
      "epoch": 0.000347186279296875,
      "model_forward_time": 0.11513590812683105,
      "step": 56883
    },
    {
      "epoch": 0.000347186279296875,
      "step": 56883,
      "training_step_time": 0.4814002513885498
    },
    {
      "epoch": 0.0003471923828125,
      "model_forward_time": 0.11529397964477539,
      "step": 56884
    },
    {
      "epoch": 0.0003471923828125,
      "step": 56884,
      "training_step_time": 0.45081114768981934
    },
    {
      "epoch": 0.000347198486328125,
      "model_forward_time": 0.1150963306427002,
      "step": 56885
    },
    {
      "epoch": 0.000347198486328125,
      "step": 56885,
      "training_step_time": 0.4803915023803711
    },
    {
      "epoch": 0.00034720458984375,
      "model_forward_time": 0.11510157585144043,
      "step": 56886
    },
    {
      "epoch": 0.00034720458984375,
      "step": 56886,
      "training_step_time": 0.4523439407348633
    },
    {
      "epoch": 0.000347210693359375,
      "model_forward_time": 0.11500167846679688,
      "step": 56887
    },
    {
      "epoch": 0.000347210693359375,
      "step": 56887,
      "training_step_time": 0.4516148567199707
    },
    {
      "epoch": 0.000347216796875,
      "model_forward_time": 0.11512541770935059,
      "step": 56888
    },
    {
      "epoch": 0.000347216796875,
      "step": 56888,
      "training_step_time": 0.4035911560058594
    },
    {
      "epoch": 0.000347222900390625,
      "model_forward_time": 0.11590290069580078,
      "step": 56889
    },
    {
      "epoch": 0.000347222900390625,
      "step": 56889,
      "training_step_time": 0.3974745273590088
    },
    {
      "epoch": 0.00034722900390625,
      "grad_norm": 0.08645786345005035,
      "learning_rate": 7.32735471426893e-07,
      "loss": 0.0362,
      "step": 56890
    },
    {
      "epoch": 0.00034722900390625,
      "model_forward_time": 0.11478090286254883,
      "step": 56890
    },
    {
      "epoch": 0.00034722900390625,
      "step": 56890,
      "training_step_time": 0.39557838439941406
    },
    {
      "epoch": 0.000347235107421875,
      "model_forward_time": 0.1150503158569336,
      "step": 56891
    },
    {
      "epoch": 0.000347235107421875,
      "step": 56891,
      "training_step_time": 0.3894531726837158
    },
    {
      "epoch": 0.0003472412109375,
      "model_forward_time": 0.11486601829528809,
      "step": 56892
    },
    {
      "epoch": 0.0003472412109375,
      "step": 56892,
      "training_step_time": 0.5898592472076416
    },
    {
      "epoch": 0.000347247314453125,
      "model_forward_time": 0.11415457725524902,
      "step": 56893
    },
    {
      "epoch": 0.000347247314453125,
      "step": 56893,
      "training_step_time": 0.4004857540130615
    },
    {
      "epoch": 0.00034725341796875,
      "model_forward_time": 0.11487889289855957,
      "step": 56894
    },
    {
      "epoch": 0.00034725341796875,
      "step": 56894,
      "training_step_time": 0.39727044105529785
    },
    {
      "epoch": 0.000347259521484375,
      "model_forward_time": 0.11492347717285156,
      "step": 56895
    },
    {
      "epoch": 0.000347259521484375,
      "step": 56895,
      "training_step_time": 0.4085347652435303
    },
    {
      "epoch": 0.000347265625,
      "model_forward_time": 0.11452865600585938,
      "step": 56896
    },
    {
      "epoch": 0.000347265625,
      "step": 56896,
      "training_step_time": 0.404374361038208
    },
    {
      "epoch": 0.000347271728515625,
      "model_forward_time": 0.11631536483764648,
      "step": 56897
    },
    {
      "epoch": 0.000347271728515625,
      "step": 56897,
      "training_step_time": 0.4965384006500244
    },
    {
      "epoch": 0.00034727783203125,
      "model_forward_time": 0.11492753028869629,
      "step": 56898
    },
    {
      "epoch": 0.00034727783203125,
      "step": 56898,
      "training_step_time": 0.5448408126831055
    },
    {
      "epoch": 0.000347283935546875,
      "model_forward_time": 0.1147756576538086,
      "step": 56899
    },
    {
      "epoch": 0.000347283935546875,
      "step": 56899,
      "training_step_time": 0.46456217765808105
    },
    {
      "epoch": 0.0003472900390625,
      "grad_norm": 0.12411276251077652,
      "learning_rate": 7.280423725059604e-07,
      "loss": 0.0344,
      "step": 56900
    },
    {
      "epoch": 0.0003472900390625,
      "model_forward_time": 0.11534476280212402,
      "step": 56900
    },
    {
      "epoch": 0.0003472900390625,
      "step": 56900,
      "training_step_time": 0.44185519218444824
    },
    {
      "epoch": 0.000347296142578125,
      "model_forward_time": 0.1150522232055664,
      "step": 56901
    },
    {
      "epoch": 0.000347296142578125,
      "step": 56901,
      "training_step_time": 0.39919567108154297
    },
    {
      "epoch": 0.00034730224609375,
      "model_forward_time": 0.11459159851074219,
      "step": 56902
    },
    {
      "epoch": 0.00034730224609375,
      "step": 56902,
      "training_step_time": 0.4003756046295166
    },
    {
      "epoch": 0.000347308349609375,
      "model_forward_time": 0.11460638046264648,
      "step": 56903
    },
    {
      "epoch": 0.000347308349609375,
      "step": 56903,
      "training_step_time": 0.401766300201416
    },
    {
      "epoch": 0.000347314453125,
      "model_forward_time": 0.11465024948120117,
      "step": 56904
    },
    {
      "epoch": 0.000347314453125,
      "step": 56904,
      "training_step_time": 0.5111465454101562
    },
    {
      "epoch": 0.000347320556640625,
      "model_forward_time": 0.11484694480895996,
      "step": 56905
    },
    {
      "epoch": 0.000347320556640625,
      "step": 56905,
      "training_step_time": 0.39603090286254883
    },
    {
      "epoch": 0.00034732666015625,
      "model_forward_time": 0.11554527282714844,
      "step": 56906
    },
    {
      "epoch": 0.00034732666015625,
      "step": 56906,
      "training_step_time": 0.38869786262512207
    },
    {
      "epoch": 0.000347332763671875,
      "model_forward_time": 0.11556529998779297,
      "step": 56907
    },
    {
      "epoch": 0.000347332763671875,
      "step": 56907,
      "training_step_time": 0.3994412422180176
    },
    {
      "epoch": 0.0003473388671875,
      "model_forward_time": 0.11489319801330566,
      "step": 56908
    },
    {
      "epoch": 0.0003473388671875,
      "step": 56908,
      "training_step_time": 0.39798760414123535
    },
    {
      "epoch": 0.000347344970703125,
      "model_forward_time": 0.11523604393005371,
      "step": 56909
    },
    {
      "epoch": 0.000347344970703125,
      "step": 56909,
      "training_step_time": 0.40191149711608887
    },
    {
      "epoch": 0.00034735107421875,
      "grad_norm": 0.08068206161260605,
      "learning_rate": 7.233642411048014e-07,
      "loss": 0.034,
      "step": 56910
    },
    {
      "epoch": 0.00034735107421875,
      "model_forward_time": 0.11487078666687012,
      "step": 56910
    },
    {
      "epoch": 0.00034735107421875,
      "step": 56910,
      "training_step_time": 0.8077304363250732
    },
    {
      "epoch": 0.000347357177734375,
      "model_forward_time": 0.11489033699035645,
      "step": 56911
    },
    {
      "epoch": 0.000347357177734375,
      "step": 56911,
      "training_step_time": 0.3987703323364258
    },
    {
      "epoch": 0.00034736328125,
      "model_forward_time": 0.11530184745788574,
      "step": 56912
    },
    {
      "epoch": 0.00034736328125,
      "step": 56912,
      "training_step_time": 0.4435608386993408
    },
    {
      "epoch": 0.000347369384765625,
      "model_forward_time": 0.11487030982971191,
      "step": 56913
    },
    {
      "epoch": 0.000347369384765625,
      "step": 56913,
      "training_step_time": 0.46368837356567383
    },
    {
      "epoch": 0.00034737548828125,
      "model_forward_time": 0.1145477294921875,
      "step": 56914
    },
    {
      "epoch": 0.00034737548828125,
      "step": 56914,
      "training_step_time": 0.4638400077819824
    },
    {
      "epoch": 0.000347381591796875,
      "model_forward_time": 0.11455321311950684,
      "step": 56915
    },
    {
      "epoch": 0.000347381591796875,
      "step": 56915,
      "training_step_time": 0.4190666675567627
    },
    {
      "epoch": 0.0003473876953125,
      "model_forward_time": 0.11526870727539062,
      "step": 56916
    },
    {
      "epoch": 0.0003473876953125,
      "step": 56916,
      "training_step_time": 0.47459912300109863
    },
    {
      "epoch": 0.000347393798828125,
      "model_forward_time": 0.11463618278503418,
      "step": 56917
    },
    {
      "epoch": 0.000347393798828125,
      "step": 56917,
      "training_step_time": 0.4105677604675293
    },
    {
      "epoch": 0.00034739990234375,
      "model_forward_time": 0.11619830131530762,
      "step": 56918
    },
    {
      "epoch": 0.00034739990234375,
      "step": 56918,
      "training_step_time": 0.4096341133117676
    },
    {
      "epoch": 0.000347406005859375,
      "model_forward_time": 0.11487746238708496,
      "step": 56919
    },
    {
      "epoch": 0.000347406005859375,
      "step": 56919,
      "training_step_time": 0.3947138786315918
    },
    {
      "epoch": 0.000347412109375,
      "grad_norm": 0.07715852558612823,
      "learning_rate": 7.187010786445181e-07,
      "loss": 0.0323,
      "step": 56920
    },
    {
      "epoch": 0.000347412109375,
      "model_forward_time": 0.11517620086669922,
      "step": 56920
    },
    {
      "epoch": 0.000347412109375,
      "step": 56920,
      "training_step_time": 0.40180373191833496
    },
    {
      "epoch": 0.000347418212890625,
      "model_forward_time": 0.11486172676086426,
      "step": 56921
    },
    {
      "epoch": 0.000347418212890625,
      "step": 56921,
      "training_step_time": 0.4031367301940918
    },
    {
      "epoch": 0.00034742431640625,
      "model_forward_time": 0.11548042297363281,
      "step": 56922
    },
    {
      "epoch": 0.00034742431640625,
      "step": 56922,
      "training_step_time": 0.5703985691070557
    },
    {
      "epoch": 0.000347430419921875,
      "model_forward_time": 0.1145627498626709,
      "step": 56923
    },
    {
      "epoch": 0.000347430419921875,
      "step": 56923,
      "training_step_time": 0.3937504291534424
    },
    {
      "epoch": 0.0003474365234375,
      "model_forward_time": 0.1145942211151123,
      "step": 56924
    },
    {
      "epoch": 0.0003474365234375,
      "step": 56924,
      "training_step_time": 0.3887190818786621
    },
    {
      "epoch": 0.000347442626953125,
      "model_forward_time": 0.11508440971374512,
      "step": 56925
    },
    {
      "epoch": 0.000347442626953125,
      "step": 56925,
      "training_step_time": 0.4172780513763428
    },
    {
      "epoch": 0.00034744873046875,
      "model_forward_time": 0.11609959602355957,
      "step": 56926
    },
    {
      "epoch": 0.00034744873046875,
      "step": 56926,
      "training_step_time": 0.39110589027404785
    },
    {
      "epoch": 0.000347454833984375,
      "model_forward_time": 0.11487054824829102,
      "step": 56927
    },
    {
      "epoch": 0.000347454833984375,
      "step": 56927,
      "training_step_time": 0.4794321060180664
    },
    {
      "epoch": 0.0003474609375,
      "model_forward_time": 0.11507463455200195,
      "step": 56928
    },
    {
      "epoch": 0.0003474609375,
      "step": 56928,
      "training_step_time": 0.589259147644043
    },
    {
      "epoch": 0.000347467041015625,
      "model_forward_time": 0.11494803428649902,
      "step": 56929
    },
    {
      "epoch": 0.000347467041015625,
      "step": 56929,
      "training_step_time": 0.4000861644744873
    },
    {
      "epoch": 0.00034747314453125,
      "grad_norm": 0.07327836751937866,
      "learning_rate": 7.140528865416441e-07,
      "loss": 0.0382,
      "step": 56930
    },
    {
      "epoch": 0.00034747314453125,
      "model_forward_time": 0.11487627029418945,
      "step": 56930
    },
    {
      "epoch": 0.00034747314453125,
      "step": 56930,
      "training_step_time": 0.3941326141357422
    },
    {
      "epoch": 0.000347479248046875,
      "model_forward_time": 0.11507439613342285,
      "step": 56931
    },
    {
      "epoch": 0.000347479248046875,
      "step": 56931,
      "training_step_time": 0.3949456214904785
    },
    {
      "epoch": 0.0003474853515625,
      "model_forward_time": 0.11490631103515625,
      "step": 56932
    },
    {
      "epoch": 0.0003474853515625,
      "step": 56932,
      "training_step_time": 0.3984811305999756
    },
    {
      "epoch": 0.000347491455078125,
      "model_forward_time": 0.11563682556152344,
      "step": 56933
    },
    {
      "epoch": 0.000347491455078125,
      "step": 56933,
      "training_step_time": 0.39386844635009766
    },
    {
      "epoch": 0.00034749755859375,
      "model_forward_time": 0.11471199989318848,
      "step": 56934
    },
    {
      "epoch": 0.00034749755859375,
      "step": 56934,
      "training_step_time": 0.6697642803192139
    },
    {
      "epoch": 0.000347503662109375,
      "model_forward_time": 0.11444878578186035,
      "step": 56935
    },
    {
      "epoch": 0.000347503662109375,
      "step": 56935,
      "training_step_time": 0.3962724208831787
    },
    {
      "epoch": 0.000347509765625,
      "model_forward_time": 0.11515212059020996,
      "step": 56936
    },
    {
      "epoch": 0.000347509765625,
      "step": 56936,
      "training_step_time": 0.385617733001709
    },
    {
      "epoch": 0.000347515869140625,
      "model_forward_time": 0.11460566520690918,
      "step": 56937
    },
    {
      "epoch": 0.000347515869140625,
      "step": 56937,
      "training_step_time": 0.3831746578216553
    },
    {
      "epoch": 0.00034752197265625,
      "model_forward_time": 0.11471009254455566,
      "step": 56938
    },
    {
      "epoch": 0.00034752197265625,
      "step": 56938,
      "training_step_time": 0.40126538276672363
    },
    {
      "epoch": 0.000347528076171875,
      "model_forward_time": 0.11502480506896973,
      "step": 56939
    },
    {
      "epoch": 0.000347528076171875,
      "step": 56939,
      "training_step_time": 0.5004267692565918
    },
    {
      "epoch": 0.0003475341796875,
      "grad_norm": 0.07879100739955902,
      "learning_rate": 7.094196662081831e-07,
      "loss": 0.0334,
      "step": 56940
    },
    {
      "epoch": 0.0003475341796875,
      "model_forward_time": 0.1146397590637207,
      "step": 56940
    },
    {
      "epoch": 0.0003475341796875,
      "step": 56940,
      "training_step_time": 0.7001421451568604
    },
    {
      "epoch": 0.000347540283203125,
      "model_forward_time": 0.1147150993347168,
      "step": 56941
    },
    {
      "epoch": 0.000347540283203125,
      "step": 56941,
      "training_step_time": 0.4741475582122803
    },
    {
      "epoch": 0.00034754638671875,
      "model_forward_time": 0.11388802528381348,
      "step": 56942
    },
    {
      "epoch": 0.00034754638671875,
      "step": 56942,
      "training_step_time": 0.42267870903015137
    },
    {
      "epoch": 0.000347552490234375,
      "model_forward_time": 0.11460638046264648,
      "step": 56943
    },
    {
      "epoch": 0.000347552490234375,
      "step": 56943,
      "training_step_time": 0.3898496627807617
    },
    {
      "epoch": 0.00034755859375,
      "model_forward_time": 0.11359977722167969,
      "step": 56944
    },
    {
      "epoch": 0.00034755859375,
      "step": 56944,
      "training_step_time": 0.4047365188598633
    },
    {
      "epoch": 0.000347564697265625,
      "model_forward_time": 0.11429858207702637,
      "step": 56945
    },
    {
      "epoch": 0.000347564697265625,
      "step": 56945,
      "training_step_time": 0.3951292037963867
    },
    {
      "epoch": 0.00034757080078125,
      "model_forward_time": 0.11497807502746582,
      "step": 56946
    },
    {
      "epoch": 0.00034757080078125,
      "step": 56946,
      "training_step_time": 0.3989698886871338
    },
    {
      "epoch": 0.000347576904296875,
      "model_forward_time": 0.11562204360961914,
      "step": 56947
    },
    {
      "epoch": 0.000347576904296875,
      "step": 56947,
      "training_step_time": 0.39379096031188965
    },
    {
      "epoch": 0.0003475830078125,
      "model_forward_time": 0.11517739295959473,
      "step": 56948
    },
    {
      "epoch": 0.0003475830078125,
      "step": 56948,
      "training_step_time": 0.3906741142272949
    },
    {
      "epoch": 0.000347589111328125,
      "model_forward_time": 0.11519670486450195,
      "step": 56949
    },
    {
      "epoch": 0.000347589111328125,
      "step": 56949,
      "training_step_time": 0.4002518653869629
    },
    {
      "epoch": 0.00034759521484375,
      "grad_norm": 0.09640682488679886,
      "learning_rate": 7.048014190515872e-07,
      "loss": 0.0378,
      "step": 56950
    },
    {
      "epoch": 0.00034759521484375,
      "model_forward_time": 0.11559915542602539,
      "step": 56950
    },
    {
      "epoch": 0.00034759521484375,
      "step": 56950,
      "training_step_time": 0.39610886573791504
    },
    {
      "epoch": 0.000347601318359375,
      "model_forward_time": 0.11474275588989258,
      "step": 56951
    },
    {
      "epoch": 0.000347601318359375,
      "step": 56951,
      "training_step_time": 0.38829946517944336
    },
    {
      "epoch": 0.000347607421875,
      "model_forward_time": 0.11549520492553711,
      "step": 56952
    },
    {
      "epoch": 0.000347607421875,
      "step": 56952,
      "training_step_time": 0.6770830154418945
    },
    {
      "epoch": 0.000347613525390625,
      "model_forward_time": 0.11489748954772949,
      "step": 56953
    },
    {
      "epoch": 0.000347613525390625,
      "step": 56953,
      "training_step_time": 0.435971736907959
    },
    {
      "epoch": 0.00034761962890625,
      "model_forward_time": 0.11565113067626953,
      "step": 56954
    },
    {
      "epoch": 0.00034761962890625,
      "step": 56954,
      "training_step_time": 0.4644908905029297
    },
    {
      "epoch": 0.000347625732421875,
      "model_forward_time": 0.1145017147064209,
      "step": 56955
    },
    {
      "epoch": 0.000347625732421875,
      "step": 56955,
      "training_step_time": 0.48878002166748047
    },
    {
      "epoch": 0.0003476318359375,
      "model_forward_time": 0.11481022834777832,
      "step": 56956
    },
    {
      "epoch": 0.0003476318359375,
      "step": 56956,
      "training_step_time": 0.40236353874206543
    },
    {
      "epoch": 0.000347637939453125,
      "model_forward_time": 0.11423873901367188,
      "step": 56957
    },
    {
      "epoch": 0.000347637939453125,
      "step": 56957,
      "training_step_time": 0.40189313888549805
    },
    {
      "epoch": 0.00034764404296875,
      "model_forward_time": 0.1145782470703125,
      "step": 56958
    },
    {
      "epoch": 0.00034764404296875,
      "step": 56958,
      "training_step_time": 0.5005800724029541
    },
    {
      "epoch": 0.000347650146484375,
      "model_forward_time": 0.11406898498535156,
      "step": 56959
    },
    {
      "epoch": 0.000347650146484375,
      "step": 56959,
      "training_step_time": 0.39783191680908203
    },
    {
      "epoch": 0.00034765625,
      "grad_norm": 0.07010643184185028,
      "learning_rate": 7.001981464747565e-07,
      "loss": 0.035,
      "step": 56960
    },
    {
      "epoch": 0.00034765625,
      "model_forward_time": 0.11464571952819824,
      "step": 56960
    },
    {
      "epoch": 0.00034765625,
      "step": 56960,
      "training_step_time": 0.3878438472747803
    },
    {
      "epoch": 0.000347662353515625,
      "model_forward_time": 0.11542487144470215,
      "step": 56961
    },
    {
      "epoch": 0.000347662353515625,
      "step": 56961,
      "training_step_time": 0.392866849899292
    },
    {
      "epoch": 0.00034766845703125,
      "model_forward_time": 0.11553215980529785,
      "step": 56962
    },
    {
      "epoch": 0.00034766845703125,
      "step": 56962,
      "training_step_time": 0.390106201171875
    },
    {
      "epoch": 0.000347674560546875,
      "model_forward_time": 0.11529183387756348,
      "step": 56963
    },
    {
      "epoch": 0.000347674560546875,
      "step": 56963,
      "training_step_time": 0.40846872329711914
    },
    {
      "epoch": 0.0003476806640625,
      "model_forward_time": 0.11490774154663086,
      "step": 56964
    },
    {
      "epoch": 0.0003476806640625,
      "step": 56964,
      "training_step_time": 0.5948605537414551
    },
    {
      "epoch": 0.000347686767578125,
      "model_forward_time": 0.11444759368896484,
      "step": 56965
    },
    {
      "epoch": 0.000347686767578125,
      "step": 56965,
      "training_step_time": 0.44178199768066406
    },
    {
      "epoch": 0.00034769287109375,
      "model_forward_time": 0.11457180976867676,
      "step": 56966
    },
    {
      "epoch": 0.00034769287109375,
      "step": 56966,
      "training_step_time": 0.43521738052368164
    },
    {
      "epoch": 0.000347698974609375,
      "model_forward_time": 0.11480879783630371,
      "step": 56967
    },
    {
      "epoch": 0.000347698974609375,
      "step": 56967,
      "training_step_time": 0.3871190547943115
    },
    {
      "epoch": 0.000347705078125,
      "model_forward_time": 0.11517906188964844,
      "step": 56968
    },
    {
      "epoch": 0.000347705078125,
      "step": 56968,
      "training_step_time": 0.4857652187347412
    },
    {
      "epoch": 0.000347711181640625,
      "model_forward_time": 0.11453104019165039,
      "step": 56969
    },
    {
      "epoch": 0.000347711181640625,
      "step": 56969,
      "training_step_time": 0.49866604804992676
    },
    {
      "epoch": 0.00034771728515625,
      "grad_norm": 0.10867926478385925,
      "learning_rate": 6.956098498760389e-07,
      "loss": 0.0368,
      "step": 56970
    },
    {
      "epoch": 0.00034771728515625,
      "model_forward_time": 0.11494660377502441,
      "step": 56970
    },
    {
      "epoch": 0.00034771728515625,
      "step": 56970,
      "training_step_time": 0.5254778861999512
    },
    {
      "epoch": 0.000347723388671875,
      "model_forward_time": 0.11470484733581543,
      "step": 56971
    },
    {
      "epoch": 0.000347723388671875,
      "step": 56971,
      "training_step_time": 0.3922560214996338
    },
    {
      "epoch": 0.0003477294921875,
      "model_forward_time": 0.11513590812683105,
      "step": 56972
    },
    {
      "epoch": 0.0003477294921875,
      "step": 56972,
      "training_step_time": 0.3941075801849365
    },
    {
      "epoch": 0.000347735595703125,
      "model_forward_time": 0.11455202102661133,
      "step": 56973
    },
    {
      "epoch": 0.000347735595703125,
      "step": 56973,
      "training_step_time": 0.3938257694244385
    },
    {
      "epoch": 0.00034774169921875,
      "model_forward_time": 0.11448979377746582,
      "step": 56974
    },
    {
      "epoch": 0.00034774169921875,
      "step": 56974,
      "training_step_time": 0.3906989097595215
    },
    {
      "epoch": 0.000347747802734375,
      "model_forward_time": 0.11458015441894531,
      "step": 56975
    },
    {
      "epoch": 0.000347747802734375,
      "step": 56975,
      "training_step_time": 0.39976954460144043
    },
    {
      "epoch": 0.00034775390625,
      "model_forward_time": 0.11515307426452637,
      "step": 56976
    },
    {
      "epoch": 0.00034775390625,
      "step": 56976,
      "training_step_time": 0.526604413986206
    },
    {
      "epoch": 0.000347760009765625,
      "model_forward_time": 0.11486220359802246,
      "step": 56977
    },
    {
      "epoch": 0.000347760009765625,
      "step": 56977,
      "training_step_time": 0.3972029685974121
    },
    {
      "epoch": 0.00034776611328125,
      "model_forward_time": 0.11505627632141113,
      "step": 56978
    },
    {
      "epoch": 0.00034776611328125,
      "step": 56978,
      "training_step_time": 0.39858317375183105
    },
    {
      "epoch": 0.000347772216796875,
      "model_forward_time": 0.11571455001831055,
      "step": 56979
    },
    {
      "epoch": 0.000347772216796875,
      "step": 56979,
      "training_step_time": 0.45241880416870117
    },
    {
      "epoch": 0.0003477783203125,
      "grad_norm": 0.10118627548217773,
      "learning_rate": 6.910365306492416e-07,
      "loss": 0.0302,
      "step": 56980
    },
    {
      "epoch": 0.0003477783203125,
      "model_forward_time": 0.11475777626037598,
      "step": 56980
    },
    {
      "epoch": 0.0003477783203125,
      "step": 56980,
      "training_step_time": 0.41548824310302734
    },
    {
      "epoch": 0.000347784423828125,
      "model_forward_time": 0.11435770988464355,
      "step": 56981
    },
    {
      "epoch": 0.000347784423828125,
      "step": 56981,
      "training_step_time": 0.4146454334259033
    },
    {
      "epoch": 0.00034779052734375,
      "model_forward_time": 0.11553573608398438,
      "step": 56982
    },
    {
      "epoch": 0.00034779052734375,
      "step": 56982,
      "training_step_time": 0.5959930419921875
    },
    {
      "epoch": 0.000347796630859375,
      "model_forward_time": 0.11471343040466309,
      "step": 56983
    },
    {
      "epoch": 0.000347796630859375,
      "step": 56983,
      "training_step_time": 0.4053323268890381
    },
    {
      "epoch": 0.000347802734375,
      "model_forward_time": 0.11490273475646973,
      "step": 56984
    },
    {
      "epoch": 0.000347802734375,
      "step": 56984,
      "training_step_time": 0.4699094295501709
    },
    {
      "epoch": 0.000347808837890625,
      "model_forward_time": 0.11477136611938477,
      "step": 56985
    },
    {
      "epoch": 0.000347808837890625,
      "step": 56985,
      "training_step_time": 0.38116931915283203
    },
    {
      "epoch": 0.00034781494140625,
      "model_forward_time": 0.11495184898376465,
      "step": 56986
    },
    {
      "epoch": 0.00034781494140625,
      "step": 56986,
      "training_step_time": 0.38776111602783203
    },
    {
      "epoch": 0.000347821044921875,
      "model_forward_time": 0.11496949195861816,
      "step": 56987
    },
    {
      "epoch": 0.000347821044921875,
      "step": 56987,
      "training_step_time": 0.39044809341430664
    },
    {
      "epoch": 0.0003478271484375,
      "model_forward_time": 0.1149744987487793,
      "step": 56988
    },
    {
      "epoch": 0.0003478271484375,
      "step": 56988,
      "training_step_time": 0.6012136936187744
    },
    {
      "epoch": 0.000347833251953125,
      "model_forward_time": 0.11530041694641113,
      "step": 56989
    },
    {
      "epoch": 0.000347833251953125,
      "step": 56989,
      "training_step_time": 0.39444828033447266
    },
    {
      "epoch": 0.00034783935546875,
      "grad_norm": 0.08962622284889221,
      "learning_rate": 6.864781901836259e-07,
      "loss": 0.0355,
      "step": 56990
    },
    {
      "epoch": 0.00034783935546875,
      "model_forward_time": 0.11513352394104004,
      "step": 56990
    },
    {
      "epoch": 0.00034783935546875,
      "step": 56990,
      "training_step_time": 0.4070720672607422
    },
    {
      "epoch": 0.000347845458984375,
      "model_forward_time": 0.11521506309509277,
      "step": 56991
    },
    {
      "epoch": 0.000347845458984375,
      "step": 56991,
      "training_step_time": 0.3917717933654785
    },
    {
      "epoch": 0.0003478515625,
      "model_forward_time": 0.11513996124267578,
      "step": 56992
    },
    {
      "epoch": 0.0003478515625,
      "step": 56992,
      "training_step_time": 0.3864612579345703
    },
    {
      "epoch": 0.000347857666015625,
      "model_forward_time": 0.11438584327697754,
      "step": 56993
    },
    {
      "epoch": 0.000347857666015625,
      "step": 56993,
      "training_step_time": 0.40303826332092285
    },
    {
      "epoch": 0.00034786376953125,
      "model_forward_time": 0.11487889289855957,
      "step": 56994
    },
    {
      "epoch": 0.00034786376953125,
      "step": 56994,
      "training_step_time": 0.7446694374084473
    },
    {
      "epoch": 0.000347869873046875,
      "model_forward_time": 0.11453390121459961,
      "step": 56995
    },
    {
      "epoch": 0.000347869873046875,
      "step": 56995,
      "training_step_time": 0.39855408668518066
    },
    {
      "epoch": 0.0003478759765625,
      "model_forward_time": 0.11419486999511719,
      "step": 56996
    },
    {
      "epoch": 0.0003478759765625,
      "step": 56996,
      "training_step_time": 0.4802060127258301
    },
    {
      "epoch": 0.000347882080078125,
      "model_forward_time": 0.11440587043762207,
      "step": 56997
    },
    {
      "epoch": 0.000347882080078125,
      "step": 56997,
      "training_step_time": 0.5045289993286133
    },
    {
      "epoch": 0.00034788818359375,
      "model_forward_time": 0.11445426940917969,
      "step": 56998
    },
    {
      "epoch": 0.00034788818359375,
      "step": 56998,
      "training_step_time": 0.38059353828430176
    },
    {
      "epoch": 0.000347894287109375,
      "model_forward_time": 0.11451911926269531,
      "step": 56999
    },
    {
      "epoch": 0.000347894287109375,
      "step": 56999,
      "training_step_time": 0.38537144660949707
    },
    {
      "epoch": 0.000347900390625,
      "grad_norm": 0.10835392773151398,
      "learning_rate": 6.819348298638839e-07,
      "loss": 0.0417,
      "step": 57000
    },
    {
      "epoch": 0.000347900390625,
      "model_forward_time": 0.11293292045593262,
      "step": 57000
    },
    {
      "epoch": 0.000347900390625,
      "step": 57000,
      "training_step_time": 0.3560822010040283
    },
    {
      "epoch": 0.000347906494140625,
      "model_forward_time": 0.11297774314880371,
      "step": 57001
    },
    {
      "epoch": 0.000347906494140625,
      "step": 57001,
      "training_step_time": 0.3796682357788086
    },
    {
      "epoch": 0.00034791259765625,
      "model_forward_time": 0.11237883567810059,
      "step": 57002
    },
    {
      "epoch": 0.00034791259765625,
      "step": 57002,
      "training_step_time": 0.3783683776855469
    },
    {
      "epoch": 0.000347918701171875,
      "model_forward_time": 0.11295437812805176,
      "step": 57003
    },
    {
      "epoch": 0.000347918701171875,
      "step": 57003,
      "training_step_time": 0.37888121604919434
    },
    {
      "epoch": 0.0003479248046875,
      "model_forward_time": 0.11359572410583496,
      "step": 57004
    },
    {
      "epoch": 0.0003479248046875,
      "step": 57004,
      "training_step_time": 0.4418628215789795
    },
    {
      "epoch": 0.000347930908203125,
      "model_forward_time": 0.11436676979064941,
      "step": 57005
    },
    {
      "epoch": 0.000347930908203125,
      "step": 57005,
      "training_step_time": 0.44597649574279785
    },
    {
      "epoch": 0.00034793701171875,
      "model_forward_time": 0.11446547508239746,
      "step": 57006
    },
    {
      "epoch": 0.00034793701171875,
      "step": 57006,
      "training_step_time": 0.3877685070037842
    },
    {
      "epoch": 0.000347943115234375,
      "model_forward_time": 0.11475920677185059,
      "step": 57007
    },
    {
      "epoch": 0.000347943115234375,
      "step": 57007,
      "training_step_time": 0.3943216800689697
    },
    {
      "epoch": 0.00034794921875,
      "model_forward_time": 0.11427593231201172,
      "step": 57008
    },
    {
      "epoch": 0.00034794921875,
      "step": 57008,
      "training_step_time": 0.3937397003173828
    },
    {
      "epoch": 0.000347955322265625,
      "model_forward_time": 0.11466550827026367,
      "step": 57009
    },
    {
      "epoch": 0.000347955322265625,
      "step": 57009,
      "training_step_time": 0.4410409927368164
    },
    {
      "epoch": 0.00034796142578125,
      "grad_norm": 0.0847688615322113,
      "learning_rate": 6.774064510701727e-07,
      "loss": 0.0367,
      "step": 57010
    },
    {
      "epoch": 0.00034796142578125,
      "model_forward_time": 0.11530351638793945,
      "step": 57010
    },
    {
      "epoch": 0.00034796142578125,
      "step": 57010,
      "training_step_time": 0.4446218013763428
    },
    {
      "epoch": 0.000347967529296875,
      "model_forward_time": 0.11491250991821289,
      "step": 57011
    },
    {
      "epoch": 0.000347967529296875,
      "step": 57011,
      "training_step_time": 0.5099713802337646
    },
    {
      "epoch": 0.0003479736328125,
      "model_forward_time": 0.11462044715881348,
      "step": 57012
    },
    {
      "epoch": 0.0003479736328125,
      "step": 57012,
      "training_step_time": 0.46605372428894043
    },
    {
      "epoch": 0.000347979736328125,
      "model_forward_time": 0.1150517463684082,
      "step": 57013
    },
    {
      "epoch": 0.000347979736328125,
      "step": 57013,
      "training_step_time": 0.39858555793762207
    },
    {
      "epoch": 0.00034798583984375,
      "model_forward_time": 0.11430835723876953,
      "step": 57014
    },
    {
      "epoch": 0.00034798583984375,
      "step": 57014,
      "training_step_time": 0.3945600986480713
    },
    {
      "epoch": 0.000347991943359375,
      "model_forward_time": 0.11511945724487305,
      "step": 57015
    },
    {
      "epoch": 0.000347991943359375,
      "step": 57015,
      "training_step_time": 0.3948371410369873
    },
    {
      "epoch": 0.000347998046875,
      "model_forward_time": 0.11439037322998047,
      "step": 57016
    },
    {
      "epoch": 0.000347998046875,
      "step": 57016,
      "training_step_time": 0.4003772735595703
    },
    {
      "epoch": 0.000348004150390625,
      "model_forward_time": 0.11528730392456055,
      "step": 57017
    },
    {
      "epoch": 0.000348004150390625,
      "step": 57017,
      "training_step_time": 0.3981926441192627
    },
    {
      "epoch": 0.00034801025390625,
      "model_forward_time": 0.11542272567749023,
      "step": 57018
    },
    {
      "epoch": 0.00034801025390625,
      "step": 57018,
      "training_step_time": 0.42878150939941406
    },
    {
      "epoch": 0.000348016357421875,
      "model_forward_time": 0.11535167694091797,
      "step": 57019
    },
    {
      "epoch": 0.000348016357421875,
      "step": 57019,
      "training_step_time": 0.42142581939697266
    },
    {
      "epoch": 0.0003480224609375,
      "grad_norm": 0.05838225781917572,
      "learning_rate": 6.728930551780865e-07,
      "loss": 0.034,
      "step": 57020
    },
    {
      "epoch": 0.0003480224609375,
      "model_forward_time": 0.1149137020111084,
      "step": 57020
    },
    {
      "epoch": 0.0003480224609375,
      "step": 57020,
      "training_step_time": 0.39511895179748535
    },
    {
      "epoch": 0.000348028564453125,
      "model_forward_time": 0.11459875106811523,
      "step": 57021
    },
    {
      "epoch": 0.000348028564453125,
      "step": 57021,
      "training_step_time": 0.40027451515197754
    },
    {
      "epoch": 0.00034803466796875,
      "model_forward_time": 0.11480855941772461,
      "step": 57022
    },
    {
      "epoch": 0.00034803466796875,
      "step": 57022,
      "training_step_time": 0.4395568370819092
    },
    {
      "epoch": 0.000348040771484375,
      "model_forward_time": 0.11492800712585449,
      "step": 57023
    },
    {
      "epoch": 0.000348040771484375,
      "step": 57023,
      "training_step_time": 0.4104776382446289
    },
    {
      "epoch": 0.000348046875,
      "model_forward_time": 0.11441302299499512,
      "step": 57024
    },
    {
      "epoch": 0.000348046875,
      "step": 57024,
      "training_step_time": 0.42958974838256836
    },
    {
      "epoch": 0.000348052978515625,
      "model_forward_time": 0.11498904228210449,
      "step": 57025
    },
    {
      "epoch": 0.000348052978515625,
      "step": 57025,
      "training_step_time": 0.37469053268432617
    },
    {
      "epoch": 0.00034805908203125,
      "model_forward_time": 0.11527013778686523,
      "step": 57026
    },
    {
      "epoch": 0.00034805908203125,
      "step": 57026,
      "training_step_time": 0.47408056259155273
    },
    {
      "epoch": 0.000348065185546875,
      "model_forward_time": 0.11496305465698242,
      "step": 57027
    },
    {
      "epoch": 0.000348065185546875,
      "step": 57027,
      "training_step_time": 0.44009828567504883
    },
    {
      "epoch": 0.0003480712890625,
      "model_forward_time": 0.11490392684936523,
      "step": 57028
    },
    {
      "epoch": 0.0003480712890625,
      "step": 57028,
      "training_step_time": 0.3957028388977051
    },
    {
      "epoch": 0.000348077392578125,
      "model_forward_time": 0.11536026000976562,
      "step": 57029
    },
    {
      "epoch": 0.000348077392578125,
      "step": 57029,
      "training_step_time": 0.40999698638916016
    },
    {
      "epoch": 0.00034808349609375,
      "grad_norm": 0.09487217664718628,
      "learning_rate": 6.683946435586952e-07,
      "loss": 0.0381,
      "step": 57030
    },
    {
      "epoch": 0.00034808349609375,
      "model_forward_time": 0.11517000198364258,
      "step": 57030
    },
    {
      "epoch": 0.00034808349609375,
      "step": 57030,
      "training_step_time": 0.39612245559692383
    },
    {
      "epoch": 0.000348089599609375,
      "model_forward_time": 0.11487483978271484,
      "step": 57031
    },
    {
      "epoch": 0.000348089599609375,
      "step": 57031,
      "training_step_time": 0.3903195858001709
    },
    {
      "epoch": 0.000348095703125,
      "model_forward_time": 0.11501598358154297,
      "step": 57032
    },
    {
      "epoch": 0.000348095703125,
      "step": 57032,
      "training_step_time": 0.39199304580688477
    },
    {
      "epoch": 0.000348101806640625,
      "model_forward_time": 0.11524081230163574,
      "step": 57033
    },
    {
      "epoch": 0.000348101806640625,
      "step": 57033,
      "training_step_time": 0.40300512313842773
    },
    {
      "epoch": 0.00034810791015625,
      "model_forward_time": 0.1151726245880127,
      "step": 57034
    },
    {
      "epoch": 0.00034810791015625,
      "step": 57034,
      "training_step_time": 0.38471364974975586
    },
    {
      "epoch": 0.000348114013671875,
      "model_forward_time": 0.11458253860473633,
      "step": 57035
    },
    {
      "epoch": 0.000348114013671875,
      "step": 57035,
      "training_step_time": 0.3987457752227783
    },
    {
      "epoch": 0.0003481201171875,
      "model_forward_time": 0.1154322624206543,
      "step": 57036
    },
    {
      "epoch": 0.0003481201171875,
      "step": 57036,
      "training_step_time": 0.40991878509521484
    },
    {
      "epoch": 0.000348126220703125,
      "model_forward_time": 0.11466693878173828,
      "step": 57037
    },
    {
      "epoch": 0.000348126220703125,
      "step": 57037,
      "training_step_time": 0.39678263664245605
    },
    {
      "epoch": 0.00034813232421875,
      "model_forward_time": 0.11557769775390625,
      "step": 57038
    },
    {
      "epoch": 0.00034813232421875,
      "step": 57038,
      "training_step_time": 0.4028174877166748
    },
    {
      "epoch": 0.000348138427734375,
      "model_forward_time": 0.11489534378051758,
      "step": 57039
    },
    {
      "epoch": 0.000348138427734375,
      "step": 57039,
      "training_step_time": 0.428572416305542
    },
    {
      "epoch": 0.00034814453125,
      "grad_norm": 0.07409413158893585,
      "learning_rate": 6.639112175784778e-07,
      "loss": 0.0294,
      "step": 57040
    },
    {
      "epoch": 0.00034814453125,
      "model_forward_time": 0.11513805389404297,
      "step": 57040
    },
    {
      "epoch": 0.00034814453125,
      "step": 57040,
      "training_step_time": 0.45533013343811035
    },
    {
      "epoch": 0.000348150634765625,
      "model_forward_time": 0.11597228050231934,
      "step": 57041
    },
    {
      "epoch": 0.000348150634765625,
      "step": 57041,
      "training_step_time": 0.4930422306060791
    },
    {
      "epoch": 0.00034815673828125,
      "model_forward_time": 0.11477041244506836,
      "step": 57042
    },
    {
      "epoch": 0.00034815673828125,
      "step": 57042,
      "training_step_time": 0.41916489601135254
    },
    {
      "epoch": 0.000348162841796875,
      "model_forward_time": 0.11524486541748047,
      "step": 57043
    },
    {
      "epoch": 0.000348162841796875,
      "step": 57043,
      "training_step_time": 0.39530372619628906
    },
    {
      "epoch": 0.0003481689453125,
      "model_forward_time": 0.11450600624084473,
      "step": 57044
    },
    {
      "epoch": 0.0003481689453125,
      "step": 57044,
      "training_step_time": 0.40019869804382324
    },
    {
      "epoch": 0.000348175048828125,
      "model_forward_time": 0.11490893363952637,
      "step": 57045
    },
    {
      "epoch": 0.000348175048828125,
      "step": 57045,
      "training_step_time": 0.4008324146270752
    },
    {
      "epoch": 0.00034818115234375,
      "model_forward_time": 0.11558127403259277,
      "step": 57046
    },
    {
      "epoch": 0.00034818115234375,
      "step": 57046,
      "training_step_time": 0.39814305305480957
    },
    {
      "epoch": 0.000348187255859375,
      "model_forward_time": 0.114715576171875,
      "step": 57047
    },
    {
      "epoch": 0.000348187255859375,
      "step": 57047,
      "training_step_time": 0.4140288829803467
    },
    {
      "epoch": 0.000348193359375,
      "model_forward_time": 0.11554956436157227,
      "step": 57048
    },
    {
      "epoch": 0.000348193359375,
      "step": 57048,
      "training_step_time": 0.764289140701294
    },
    {
      "epoch": 0.000348199462890625,
      "model_forward_time": 0.11475801467895508,
      "step": 57049
    },
    {
      "epoch": 0.000348199462890625,
      "step": 57049,
      "training_step_time": 0.393251895904541
    },
    {
      "epoch": 0.00034820556640625,
      "grad_norm": 0.1085435301065445,
      "learning_rate": 6.594427785993951e-07,
      "loss": 0.0336,
      "step": 57050
    },
    {
      "epoch": 0.00034820556640625,
      "model_forward_time": 0.1141512393951416,
      "step": 57050
    },
    {
      "epoch": 0.00034820556640625,
      "step": 57050,
      "training_step_time": 0.39032602310180664
    },
    {
      "epoch": 0.000348211669921875,
      "model_forward_time": 0.11441850662231445,
      "step": 57051
    },
    {
      "epoch": 0.000348211669921875,
      "step": 57051,
      "training_step_time": 0.40748047828674316
    },
    {
      "epoch": 0.0003482177734375,
      "model_forward_time": 0.11447739601135254,
      "step": 57052
    },
    {
      "epoch": 0.0003482177734375,
      "step": 57052,
      "training_step_time": 0.3837463855743408
    },
    {
      "epoch": 0.000348223876953125,
      "model_forward_time": 0.11490011215209961,
      "step": 57053
    },
    {
      "epoch": 0.000348223876953125,
      "step": 57053,
      "training_step_time": 0.4602670669555664
    },
    {
      "epoch": 0.00034822998046875,
      "model_forward_time": 0.11490607261657715,
      "step": 57054
    },
    {
      "epoch": 0.00034822998046875,
      "step": 57054,
      "training_step_time": 0.5927004814147949
    },
    {
      "epoch": 0.000348236083984375,
      "model_forward_time": 0.1143195629119873,
      "step": 57055
    },
    {
      "epoch": 0.000348236083984375,
      "step": 57055,
      "training_step_time": 0.447664737701416
    },
    {
      "epoch": 0.0003482421875,
      "model_forward_time": 0.11432719230651855,
      "step": 57056
    },
    {
      "epoch": 0.0003482421875,
      "step": 57056,
      "training_step_time": 0.41955041885375977
    },
    {
      "epoch": 0.000348248291015625,
      "model_forward_time": 0.11480236053466797,
      "step": 57057
    },
    {
      "epoch": 0.000348248291015625,
      "step": 57057,
      "training_step_time": 0.40317368507385254
    },
    {
      "epoch": 0.00034825439453125,
      "model_forward_time": 0.11442184448242188,
      "step": 57058
    },
    {
      "epoch": 0.00034825439453125,
      "step": 57058,
      "training_step_time": 0.3976166248321533
    },
    {
      "epoch": 0.000348260498046875,
      "model_forward_time": 0.11467385292053223,
      "step": 57059
    },
    {
      "epoch": 0.000348260498046875,
      "step": 57059,
      "training_step_time": 0.4441182613372803
    },
    {
      "epoch": 0.0003482666015625,
      "grad_norm": 0.0826130136847496,
      "learning_rate": 6.549893279788277e-07,
      "loss": 0.0311,
      "step": 57060
    },
    {
      "epoch": 0.0003482666015625,
      "model_forward_time": 0.1149139404296875,
      "step": 57060
    },
    {
      "epoch": 0.0003482666015625,
      "step": 57060,
      "training_step_time": 0.5841553211212158
    },
    {
      "epoch": 0.000348272705078125,
      "model_forward_time": 0.11493182182312012,
      "step": 57061
    },
    {
      "epoch": 0.000348272705078125,
      "step": 57061,
      "training_step_time": 0.3925588130950928
    },
    {
      "epoch": 0.00034827880859375,
      "model_forward_time": 0.11498069763183594,
      "step": 57062
    },
    {
      "epoch": 0.00034827880859375,
      "step": 57062,
      "training_step_time": 0.38886523246765137
    },
    {
      "epoch": 0.000348284912109375,
      "model_forward_time": 0.1148383617401123,
      "step": 57063
    },
    {
      "epoch": 0.000348284912109375,
      "step": 57063,
      "training_step_time": 0.4097449779510498
    },
    {
      "epoch": 0.000348291015625,
      "model_forward_time": 0.11510968208312988,
      "step": 57064
    },
    {
      "epoch": 0.000348291015625,
      "step": 57064,
      "training_step_time": 0.3846902847290039
    },
    {
      "epoch": 0.000348297119140625,
      "model_forward_time": 0.1141972541809082,
      "step": 57065
    },
    {
      "epoch": 0.000348297119140625,
      "step": 57065,
      "training_step_time": 0.38725781440734863
    },
    {
      "epoch": 0.00034830322265625,
      "model_forward_time": 0.11526679992675781,
      "step": 57066
    },
    {
      "epoch": 0.00034830322265625,
      "step": 57066,
      "training_step_time": 0.6750409603118896
    },
    {
      "epoch": 0.000348309326171875,
      "model_forward_time": 0.11501407623291016,
      "step": 57067
    },
    {
      "epoch": 0.000348309326171875,
      "step": 57067,
      "training_step_time": 0.4320662021636963
    },
    {
      "epoch": 0.0003483154296875,
      "model_forward_time": 0.11402392387390137,
      "step": 57068
    },
    {
      "epoch": 0.0003483154296875,
      "step": 57068,
      "training_step_time": 0.3678457736968994
    },
    {
      "epoch": 0.000348321533203125,
      "model_forward_time": 0.11429381370544434,
      "step": 57069
    },
    {
      "epoch": 0.000348321533203125,
      "step": 57069,
      "training_step_time": 0.4638712406158447
    },
    {
      "epoch": 0.00034832763671875,
      "grad_norm": 0.12315244972705841,
      "learning_rate": 6.50550867069627e-07,
      "loss": 0.0352,
      "step": 57070
    },
    {
      "epoch": 0.00034832763671875,
      "model_forward_time": 0.11409997940063477,
      "step": 57070
    },
    {
      "epoch": 0.00034832763671875,
      "step": 57070,
      "training_step_time": 0.44931626319885254
    },
    {
      "epoch": 0.000348333740234375,
      "model_forward_time": 0.11490988731384277,
      "step": 57071
    },
    {
      "epoch": 0.000348333740234375,
      "step": 57071,
      "training_step_time": 0.398942232131958
    },
    {
      "epoch": 0.00034833984375,
      "model_forward_time": 0.11457943916320801,
      "step": 57072
    },
    {
      "epoch": 0.00034833984375,
      "step": 57072,
      "training_step_time": 0.46774744987487793
    },
    {
      "epoch": 0.000348345947265625,
      "model_forward_time": 0.11401247978210449,
      "step": 57073
    },
    {
      "epoch": 0.000348345947265625,
      "step": 57073,
      "training_step_time": 0.3920412063598633
    },
    {
      "epoch": 0.00034835205078125,
      "model_forward_time": 0.11521625518798828,
      "step": 57074
    },
    {
      "epoch": 0.00034835205078125,
      "step": 57074,
      "training_step_time": 0.38790154457092285
    },
    {
      "epoch": 0.000348358154296875,
      "model_forward_time": 0.11523199081420898,
      "step": 57075
    },
    {
      "epoch": 0.000348358154296875,
      "step": 57075,
      "training_step_time": 0.3882405757904053
    },
    {
      "epoch": 0.0003483642578125,
      "model_forward_time": 0.11513018608093262,
      "step": 57076
    },
    {
      "epoch": 0.0003483642578125,
      "step": 57076,
      "training_step_time": 0.40377044677734375
    },
    {
      "epoch": 0.000348370361328125,
      "model_forward_time": 0.11497640609741211,
      "step": 57077
    },
    {
      "epoch": 0.000348370361328125,
      "step": 57077,
      "training_step_time": 0.4053232669830322
    },
    {
      "epoch": 0.00034837646484375,
      "model_forward_time": 0.11569809913635254,
      "step": 57078
    },
    {
      "epoch": 0.00034837646484375,
      "step": 57078,
      "training_step_time": 0.7067010402679443
    },
    {
      "epoch": 0.000348382568359375,
      "model_forward_time": 0.11466813087463379,
      "step": 57079
    },
    {
      "epoch": 0.000348382568359375,
      "step": 57079,
      "training_step_time": 0.4024174213409424
    },
    {
      "epoch": 0.000348388671875,
      "grad_norm": 0.08861324936151505,
      "learning_rate": 6.461273972200755e-07,
      "loss": 0.0354,
      "step": 57080
    },
    {
      "epoch": 0.000348388671875,
      "model_forward_time": 0.11480212211608887,
      "step": 57080
    },
    {
      "epoch": 0.000348388671875,
      "step": 57080,
      "training_step_time": 0.433962345123291
    },
    {
      "epoch": 0.000348394775390625,
      "model_forward_time": 0.11466741561889648,
      "step": 57081
    },
    {
      "epoch": 0.000348394775390625,
      "step": 57081,
      "training_step_time": 0.38808417320251465
    },
    {
      "epoch": 0.00034840087890625,
      "model_forward_time": 0.11402702331542969,
      "step": 57082
    },
    {
      "epoch": 0.00034840087890625,
      "step": 57082,
      "training_step_time": 0.38074302673339844
    },
    {
      "epoch": 0.000348406982421875,
      "model_forward_time": 0.11450481414794922,
      "step": 57083
    },
    {
      "epoch": 0.000348406982421875,
      "step": 57083,
      "training_step_time": 0.42118263244628906
    },
    {
      "epoch": 0.0003484130859375,
      "model_forward_time": 0.11501884460449219,
      "step": 57084
    },
    {
      "epoch": 0.0003484130859375,
      "step": 57084,
      "training_step_time": 0.5566146373748779
    },
    {
      "epoch": 0.000348419189453125,
      "model_forward_time": 0.11454248428344727,
      "step": 57085
    },
    {
      "epoch": 0.000348419189453125,
      "step": 57085,
      "training_step_time": 0.4109489917755127
    },
    {
      "epoch": 0.00034842529296875,
      "model_forward_time": 0.11498427391052246,
      "step": 57086
    },
    {
      "epoch": 0.00034842529296875,
      "step": 57086,
      "training_step_time": 0.47047996520996094
    },
    {
      "epoch": 0.000348431396484375,
      "model_forward_time": 0.11462068557739258,
      "step": 57087
    },
    {
      "epoch": 0.000348431396484375,
      "step": 57087,
      "training_step_time": 0.38431549072265625
    },
    {
      "epoch": 0.0003484375,
      "model_forward_time": 0.11414241790771484,
      "step": 57088
    },
    {
      "epoch": 0.0003484375,
      "step": 57088,
      "training_step_time": 0.38106656074523926
    },
    {
      "epoch": 0.000348443603515625,
      "model_forward_time": 0.11463332176208496,
      "step": 57089
    },
    {
      "epoch": 0.000348443603515625,
      "step": 57089,
      "training_step_time": 0.39854979515075684
    },
    {
      "epoch": 0.00034844970703125,
      "grad_norm": 0.09450287371873856,
      "learning_rate": 6.417189197739093e-07,
      "loss": 0.0364,
      "step": 57090
    },
    {
      "epoch": 0.00034844970703125,
      "model_forward_time": 0.11418771743774414,
      "step": 57090
    },
    {
      "epoch": 0.00034844970703125,
      "step": 57090,
      "training_step_time": 0.6161861419677734
    },
    {
      "epoch": 0.000348455810546875,
      "model_forward_time": 0.11428546905517578,
      "step": 57091
    },
    {
      "epoch": 0.000348455810546875,
      "step": 57091,
      "training_step_time": 0.40782666206359863
    },
    {
      "epoch": 0.0003484619140625,
      "model_forward_time": 0.1150200366973877,
      "step": 57092
    },
    {
      "epoch": 0.0003484619140625,
      "step": 57092,
      "training_step_time": 0.39403581619262695
    },
    {
      "epoch": 0.000348468017578125,
      "model_forward_time": 0.11392068862915039,
      "step": 57093
    },
    {
      "epoch": 0.000348468017578125,
      "step": 57093,
      "training_step_time": 0.3823976516723633
    },
    {
      "epoch": 0.00034847412109375,
      "model_forward_time": 0.11511707305908203,
      "step": 57094
    },
    {
      "epoch": 0.00034847412109375,
      "step": 57094,
      "training_step_time": 0.382457971572876
    },
    {
      "epoch": 0.000348480224609375,
      "model_forward_time": 0.11392664909362793,
      "step": 57095
    },
    {
      "epoch": 0.000348480224609375,
      "step": 57095,
      "training_step_time": 0.4000377655029297
    },
    {
      "epoch": 0.000348486328125,
      "model_forward_time": 0.1147623062133789,
      "step": 57096
    },
    {
      "epoch": 0.000348486328125,
      "step": 57096,
      "training_step_time": 0.7852153778076172
    },
    {
      "epoch": 0.000348492431640625,
      "model_forward_time": 0.11745953559875488,
      "step": 57097
    },
    {
      "epoch": 0.000348492431640625,
      "step": 57097,
      "training_step_time": 0.4891393184661865
    },
    {
      "epoch": 0.00034849853515625,
      "model_forward_time": 0.11597323417663574,
      "step": 57098
    },
    {
      "epoch": 0.00034849853515625,
      "step": 57098,
      "training_step_time": 0.42170023918151855
    },
    {
      "epoch": 0.000348504638671875,
      "model_forward_time": 0.11421680450439453,
      "step": 57099
    },
    {
      "epoch": 0.000348504638671875,
      "step": 57099,
      "training_step_time": 0.38643765449523926
    },
    {
      "epoch": 0.0003485107421875,
      "grad_norm": 0.09453712403774261,
      "learning_rate": 6.373254360703018e-07,
      "loss": 0.0306,
      "step": 57100
    },
    {
      "epoch": 0.0003485107421875,
      "model_forward_time": 0.1141512393951416,
      "step": 57100
    },
    {
      "epoch": 0.0003485107421875,
      "step": 57100,
      "training_step_time": 0.39577412605285645
    },
    {
      "epoch": 0.000348516845703125,
      "model_forward_time": 0.11391115188598633,
      "step": 57101
    },
    {
      "epoch": 0.000348516845703125,
      "step": 57101,
      "training_step_time": 0.3925144672393799
    },
    {
      "epoch": 0.00034852294921875,
      "model_forward_time": 0.11484694480895996,
      "step": 57102
    },
    {
      "epoch": 0.00034852294921875,
      "step": 57102,
      "training_step_time": 0.43297791481018066
    },
    {
      "epoch": 0.000348529052734375,
      "model_forward_time": 0.1148068904876709,
      "step": 57103
    },
    {
      "epoch": 0.000348529052734375,
      "step": 57103,
      "training_step_time": 0.39462947845458984
    },
    {
      "epoch": 0.00034853515625,
      "model_forward_time": 0.11498022079467773,
      "step": 57104
    },
    {
      "epoch": 0.00034853515625,
      "step": 57104,
      "training_step_time": 0.3950469493865967
    },
    {
      "epoch": 0.000348541259765625,
      "model_forward_time": 0.11510205268859863,
      "step": 57105
    },
    {
      "epoch": 0.000348541259765625,
      "step": 57105,
      "training_step_time": 0.40526580810546875
    },
    {
      "epoch": 0.00034854736328125,
      "model_forward_time": 0.11484861373901367,
      "step": 57106
    },
    {
      "epoch": 0.00034854736328125,
      "step": 57106,
      "training_step_time": 0.3994784355163574
    },
    {
      "epoch": 0.000348553466796875,
      "model_forward_time": 0.1147456169128418,
      "step": 57107
    },
    {
      "epoch": 0.000348553466796875,
      "step": 57107,
      "training_step_time": 0.39785099029541016
    },
    {
      "epoch": 0.0003485595703125,
      "model_forward_time": 0.11482977867126465,
      "step": 57108
    },
    {
      "epoch": 0.0003485595703125,
      "step": 57108,
      "training_step_time": 0.7032210826873779
    },
    {
      "epoch": 0.000348565673828125,
      "model_forward_time": 0.11394596099853516,
      "step": 57109
    },
    {
      "epoch": 0.000348565673828125,
      "step": 57109,
      "training_step_time": 0.40061426162719727
    },
    {
      "epoch": 0.00034857177734375,
      "grad_norm": 0.08130079507827759,
      "learning_rate": 6.329469474438798e-07,
      "loss": 0.0356,
      "step": 57110
    },
    {
      "epoch": 0.00034857177734375,
      "model_forward_time": 0.11525917053222656,
      "step": 57110
    },
    {
      "epoch": 0.00034857177734375,
      "step": 57110,
      "training_step_time": 0.47817564010620117
    },
    {
      "epoch": 0.000348577880859375,
      "model_forward_time": 0.11441373825073242,
      "step": 57111
    },
    {
      "epoch": 0.000348577880859375,
      "step": 57111,
      "training_step_time": 0.43727946281433105
    },
    {
      "epoch": 0.000348583984375,
      "model_forward_time": 0.11475062370300293,
      "step": 57112
    },
    {
      "epoch": 0.000348583984375,
      "step": 57112,
      "training_step_time": 0.4055521488189697
    },
    {
      "epoch": 0.000348590087890625,
      "model_forward_time": 0.11421084403991699,
      "step": 57113
    },
    {
      "epoch": 0.000348590087890625,
      "step": 57113,
      "training_step_time": 0.4016859531402588
    },
    {
      "epoch": 0.00034859619140625,
      "model_forward_time": 0.11530613899230957,
      "step": 57114
    },
    {
      "epoch": 0.00034859619140625,
      "step": 57114,
      "training_step_time": 0.47847890853881836
    },
    {
      "epoch": 0.000348602294921875,
      "model_forward_time": 0.11459660530090332,
      "step": 57115
    },
    {
      "epoch": 0.000348602294921875,
      "step": 57115,
      "training_step_time": 0.39254093170166016
    },
    {
      "epoch": 0.0003486083984375,
      "model_forward_time": 0.11547660827636719,
      "step": 57116
    },
    {
      "epoch": 0.0003486083984375,
      "step": 57116,
      "training_step_time": 0.39435505867004395
    },
    {
      "epoch": 0.000348614501953125,
      "model_forward_time": 0.11485695838928223,
      "step": 57117
    },
    {
      "epoch": 0.000348614501953125,
      "step": 57117,
      "training_step_time": 0.3890647888183594
    },
    {
      "epoch": 0.00034862060546875,
      "model_forward_time": 0.11510777473449707,
      "step": 57118
    },
    {
      "epoch": 0.00034862060546875,
      "step": 57118,
      "training_step_time": 0.386199951171875
    },
    {
      "epoch": 0.000348626708984375,
      "model_forward_time": 0.11537742614746094,
      "step": 57119
    },
    {
      "epoch": 0.000348626708984375,
      "step": 57119,
      "training_step_time": 0.3997018337249756
    },
    {
      "epoch": 0.0003486328125,
      "grad_norm": 0.09129857271909714,
      "learning_rate": 6.285834552247128e-07,
      "loss": 0.0349,
      "step": 57120
    },
    {
      "epoch": 0.0003486328125,
      "model_forward_time": 0.11515164375305176,
      "step": 57120
    },
    {
      "epoch": 0.0003486328125,
      "step": 57120,
      "training_step_time": 0.6911618709564209
    },
    {
      "epoch": 0.000348638916015625,
      "model_forward_time": 0.11503791809082031,
      "step": 57121
    },
    {
      "epoch": 0.000348638916015625,
      "step": 57121,
      "training_step_time": 0.3922138214111328
    },
    {
      "epoch": 0.00034864501953125,
      "model_forward_time": 0.11490702629089355,
      "step": 57122
    },
    {
      "epoch": 0.00034864501953125,
      "step": 57122,
      "training_step_time": 0.4668242931365967
    },
    {
      "epoch": 0.000348651123046875,
      "model_forward_time": 0.11473703384399414,
      "step": 57123
    },
    {
      "epoch": 0.000348651123046875,
      "step": 57123,
      "training_step_time": 0.443371057510376
    },
    {
      "epoch": 0.0003486572265625,
      "model_forward_time": 0.11462640762329102,
      "step": 57124
    },
    {
      "epoch": 0.0003486572265625,
      "step": 57124,
      "training_step_time": 0.4483029842376709
    },
    {
      "epoch": 0.000348663330078125,
      "model_forward_time": 0.11455750465393066,
      "step": 57125
    },
    {
      "epoch": 0.000348663330078125,
      "step": 57125,
      "training_step_time": 0.44341111183166504
    },
    {
      "epoch": 0.00034866943359375,
      "model_forward_time": 0.1150050163269043,
      "step": 57126
    },
    {
      "epoch": 0.00034866943359375,
      "step": 57126,
      "training_step_time": 0.4303770065307617
    },
    {
      "epoch": 0.000348675537109375,
      "model_forward_time": 0.11385226249694824,
      "step": 57127
    },
    {
      "epoch": 0.000348675537109375,
      "step": 57127,
      "training_step_time": 0.38736891746520996
    },
    {
      "epoch": 0.000348681640625,
      "model_forward_time": 0.11465620994567871,
      "step": 57128
    },
    {
      "epoch": 0.000348681640625,
      "step": 57128,
      "training_step_time": 0.3923976421356201
    },
    {
      "epoch": 0.000348687744140625,
      "model_forward_time": 0.11492085456848145,
      "step": 57129
    },
    {
      "epoch": 0.000348687744140625,
      "step": 57129,
      "training_step_time": 0.3938920497894287
    },
    {
      "epoch": 0.00034869384765625,
      "grad_norm": 0.10850264132022858,
      "learning_rate": 6.24234960738318e-07,
      "loss": 0.0411,
      "step": 57130
    },
    {
      "epoch": 0.00034869384765625,
      "model_forward_time": 0.11492276191711426,
      "step": 57130
    },
    {
      "epoch": 0.00034869384765625,
      "step": 57130,
      "training_step_time": 0.3891623020172119
    },
    {
      "epoch": 0.000348699951171875,
      "model_forward_time": 0.11662030220031738,
      "step": 57131
    },
    {
      "epoch": 0.000348699951171875,
      "step": 57131,
      "training_step_time": 0.40079474449157715
    },
    {
      "epoch": 0.0003487060546875,
      "model_forward_time": 0.11521530151367188,
      "step": 57132
    },
    {
      "epoch": 0.0003487060546875,
      "step": 57132,
      "training_step_time": 0.6541104316711426
    },
    {
      "epoch": 0.000348712158203125,
      "model_forward_time": 0.11468744277954102,
      "step": 57133
    },
    {
      "epoch": 0.000348712158203125,
      "step": 57133,
      "training_step_time": 0.3994414806365967
    },
    {
      "epoch": 0.00034871826171875,
      "model_forward_time": 0.11536955833435059,
      "step": 57134
    },
    {
      "epoch": 0.00034871826171875,
      "step": 57134,
      "training_step_time": 0.38960695266723633
    },
    {
      "epoch": 0.000348724365234375,
      "model_forward_time": 0.11468696594238281,
      "step": 57135
    },
    {
      "epoch": 0.000348724365234375,
      "step": 57135,
      "training_step_time": 0.38885951042175293
    },
    {
      "epoch": 0.00034873046875,
      "model_forward_time": 0.11536192893981934,
      "step": 57136
    },
    {
      "epoch": 0.00034873046875,
      "step": 57136,
      "training_step_time": 0.41803669929504395
    },
    {
      "epoch": 0.000348736572265625,
      "model_forward_time": 0.11511468887329102,
      "step": 57137
    },
    {
      "epoch": 0.000348736572265625,
      "step": 57137,
      "training_step_time": 0.4363057613372803
    },
    {
      "epoch": 0.00034874267578125,
      "model_forward_time": 0.11487364768981934,
      "step": 57138
    },
    {
      "epoch": 0.00034874267578125,
      "step": 57138,
      "training_step_time": 0.5931692123413086
    },
    {
      "epoch": 0.000348748779296875,
      "model_forward_time": 0.11429238319396973,
      "step": 57139
    },
    {
      "epoch": 0.000348748779296875,
      "step": 57139,
      "training_step_time": 0.4740145206451416
    },
    {
      "epoch": 0.0003487548828125,
      "grad_norm": 0.0938158929347992,
      "learning_rate": 6.1990146530565e-07,
      "loss": 0.0344,
      "step": 57140
    },
    {
      "epoch": 0.0003487548828125,
      "model_forward_time": 0.11456561088562012,
      "step": 57140
    },
    {
      "epoch": 0.0003487548828125,
      "step": 57140,
      "training_step_time": 0.3955230712890625
    },
    {
      "epoch": 0.000348760986328125,
      "model_forward_time": 0.11500668525695801,
      "step": 57141
    },
    {
      "epoch": 0.000348760986328125,
      "step": 57141,
      "training_step_time": 0.38203859329223633
    },
    {
      "epoch": 0.00034876708984375,
      "model_forward_time": 0.11439633369445801,
      "step": 57142
    },
    {
      "epoch": 0.00034876708984375,
      "step": 57142,
      "training_step_time": 0.38831377029418945
    },
    {
      "epoch": 0.000348773193359375,
      "model_forward_time": 0.1149141788482666,
      "step": 57143
    },
    {
      "epoch": 0.000348773193359375,
      "step": 57143,
      "training_step_time": 0.3918895721435547
    },
    {
      "epoch": 0.000348779296875,
      "model_forward_time": 0.11508631706237793,
      "step": 57144
    },
    {
      "epoch": 0.000348779296875,
      "step": 57144,
      "training_step_time": 0.6081225872039795
    },
    {
      "epoch": 0.000348785400390625,
      "model_forward_time": 0.11564183235168457,
      "step": 57145
    },
    {
      "epoch": 0.000348785400390625,
      "step": 57145,
      "training_step_time": 0.4101572036743164
    },
    {
      "epoch": 0.00034879150390625,
      "model_forward_time": 0.11458492279052734,
      "step": 57146
    },
    {
      "epoch": 0.00034879150390625,
      "step": 57146,
      "training_step_time": 0.38904428482055664
    },
    {
      "epoch": 0.000348797607421875,
      "model_forward_time": 0.11530017852783203,
      "step": 57147
    },
    {
      "epoch": 0.000348797607421875,
      "step": 57147,
      "training_step_time": 0.3834493160247803
    },
    {
      "epoch": 0.0003488037109375,
      "model_forward_time": 0.11519765853881836,
      "step": 57148
    },
    {
      "epoch": 0.0003488037109375,
      "step": 57148,
      "training_step_time": 0.3940255641937256
    },
    {
      "epoch": 0.000348809814453125,
      "model_forward_time": 0.1153874397277832,
      "step": 57149
    },
    {
      "epoch": 0.000348809814453125,
      "step": 57149,
      "training_step_time": 0.40093159675598145
    },
    {
      "epoch": 0.00034881591796875,
      "grad_norm": 0.1373368799686432,
      "learning_rate": 6.15582970243117e-07,
      "loss": 0.0396,
      "step": 57150
    },
    {
      "epoch": 0.00034881591796875,
      "model_forward_time": 0.1143941879272461,
      "step": 57150
    },
    {
      "epoch": 0.00034881591796875,
      "step": 57150,
      "training_step_time": 0.8103771209716797
    },
    {
      "epoch": 0.000348822021484375,
      "model_forward_time": 0.11409997940063477,
      "step": 57151
    },
    {
      "epoch": 0.000348822021484375,
      "step": 57151,
      "training_step_time": 0.45329737663269043
    },
    {
      "epoch": 0.000348828125,
      "model_forward_time": 0.11424732208251953,
      "step": 57152
    },
    {
      "epoch": 0.000348828125,
      "step": 57152,
      "training_step_time": 0.4719552993774414
    },
    {
      "epoch": 0.000348834228515625,
      "model_forward_time": 0.11430740356445312,
      "step": 57153
    },
    {
      "epoch": 0.000348834228515625,
      "step": 57153,
      "training_step_time": 0.43682265281677246
    },
    {
      "epoch": 0.00034884033203125,
      "model_forward_time": 0.11400580406188965,
      "step": 57154
    },
    {
      "epoch": 0.00034884033203125,
      "step": 57154,
      "training_step_time": 0.37923574447631836
    },
    {
      "epoch": 0.000348846435546875,
      "model_forward_time": 0.11431384086608887,
      "step": 57155
    },
    {
      "epoch": 0.000348846435546875,
      "step": 57155,
      "training_step_time": 0.3851473331451416
    },
    {
      "epoch": 0.0003488525390625,
      "model_forward_time": 0.11491084098815918,
      "step": 57156
    },
    {
      "epoch": 0.0003488525390625,
      "step": 57156,
      "training_step_time": 0.47700047492980957
    },
    {
      "epoch": 0.000348858642578125,
      "model_forward_time": 0.11453580856323242,
      "step": 57157
    },
    {
      "epoch": 0.000348858642578125,
      "step": 57157,
      "training_step_time": 0.3933703899383545
    },
    {
      "epoch": 0.00034886474609375,
      "model_forward_time": 0.11487889289855957,
      "step": 57158
    },
    {
      "epoch": 0.00034886474609375,
      "step": 57158,
      "training_step_time": 0.3938922882080078
    },
    {
      "epoch": 0.000348870849609375,
      "model_forward_time": 0.1152193546295166,
      "step": 57159
    },
    {
      "epoch": 0.000348870849609375,
      "step": 57159,
      "training_step_time": 0.38916468620300293
    },
    {
      "epoch": 0.000348876953125,
      "grad_norm": 0.09622761607170105,
      "learning_rate": 6.11279476862553e-07,
      "loss": 0.0386,
      "step": 57160
    },
    {
      "epoch": 0.000348876953125,
      "model_forward_time": 0.11500310897827148,
      "step": 57160
    },
    {
      "epoch": 0.000348876953125,
      "step": 57160,
      "training_step_time": 0.4006516933441162
    },
    {
      "epoch": 0.000348883056640625,
      "model_forward_time": 0.11606168746948242,
      "step": 57161
    },
    {
      "epoch": 0.000348883056640625,
      "step": 57161,
      "training_step_time": 0.39235973358154297
    },
    {
      "epoch": 0.00034888916015625,
      "model_forward_time": 0.11484456062316895,
      "step": 57162
    },
    {
      "epoch": 0.00034888916015625,
      "step": 57162,
      "training_step_time": 0.7613437175750732
    },
    {
      "epoch": 0.000348895263671875,
      "model_forward_time": 0.11457586288452148,
      "step": 57163
    },
    {
      "epoch": 0.000348895263671875,
      "step": 57163,
      "training_step_time": 0.45506978034973145
    },
    {
      "epoch": 0.0003489013671875,
      "model_forward_time": 0.11422228813171387,
      "step": 57164
    },
    {
      "epoch": 0.0003489013671875,
      "step": 57164,
      "training_step_time": 0.4510228633880615
    },
    {
      "epoch": 0.000348907470703125,
      "model_forward_time": 0.11424541473388672,
      "step": 57165
    },
    {
      "epoch": 0.000348907470703125,
      "step": 57165,
      "training_step_time": 0.45735788345336914
    },
    {
      "epoch": 0.00034891357421875,
      "model_forward_time": 0.11375999450683594,
      "step": 57166
    },
    {
      "epoch": 0.00034891357421875,
      "step": 57166,
      "training_step_time": 0.4473130702972412
    },
    {
      "epoch": 0.000348919677734375,
      "model_forward_time": 0.11439824104309082,
      "step": 57167
    },
    {
      "epoch": 0.000348919677734375,
      "step": 57167,
      "training_step_time": 0.46848082542419434
    },
    {
      "epoch": 0.00034892578125,
      "model_forward_time": 0.11526894569396973,
      "step": 57168
    },
    {
      "epoch": 0.00034892578125,
      "step": 57168,
      "training_step_time": 0.3906095027923584
    },
    {
      "epoch": 0.000348931884765625,
      "model_forward_time": 0.11404895782470703,
      "step": 57169
    },
    {
      "epoch": 0.000348931884765625,
      "step": 57169,
      "training_step_time": 0.3946211338043213
    },
    {
      "epoch": 0.00034893798828125,
      "grad_norm": 0.06579649448394775,
      "learning_rate": 6.069909864712453e-07,
      "loss": 0.0303,
      "step": 57170
    },
    {
      "epoch": 0.00034893798828125,
      "model_forward_time": 0.11481094360351562,
      "step": 57170
    },
    {
      "epoch": 0.00034893798828125,
      "step": 57170,
      "training_step_time": 0.40006566047668457
    },
    {
      "epoch": 0.000348944091796875,
      "model_forward_time": 0.11477851867675781,
      "step": 57171
    },
    {
      "epoch": 0.000348944091796875,
      "step": 57171,
      "training_step_time": 0.3868265151977539
    },
    {
      "epoch": 0.0003489501953125,
      "model_forward_time": 0.11529707908630371,
      "step": 57172
    },
    {
      "epoch": 0.0003489501953125,
      "step": 57172,
      "training_step_time": 0.3863847255706787
    },
    {
      "epoch": 0.000348956298828125,
      "model_forward_time": 0.11480855941772461,
      "step": 57173
    },
    {
      "epoch": 0.000348956298828125,
      "step": 57173,
      "training_step_time": 0.3911116123199463
    },
    {
      "epoch": 0.00034896240234375,
      "model_forward_time": 0.11545252799987793,
      "step": 57174
    },
    {
      "epoch": 0.00034896240234375,
      "step": 57174,
      "training_step_time": 0.6873254776000977
    },
    {
      "epoch": 0.000348968505859375,
      "model_forward_time": 0.11468648910522461,
      "step": 57175
    },
    {
      "epoch": 0.000348968505859375,
      "step": 57175,
      "training_step_time": 0.4344208240509033
    },
    {
      "epoch": 0.000348974609375,
      "model_forward_time": 0.11514949798583984,
      "step": 57176
    },
    {
      "epoch": 0.000348974609375,
      "step": 57176,
      "training_step_time": 0.47516965866088867
    },
    {
      "epoch": 0.000348980712890625,
      "model_forward_time": 0.11567044258117676,
      "step": 57177
    },
    {
      "epoch": 0.000348980712890625,
      "step": 57177,
      "training_step_time": 0.6249239444732666
    },
    {
      "epoch": 0.00034898681640625,
      "model_forward_time": 0.11970949172973633,
      "step": 57178
    },
    {
      "epoch": 0.00034898681640625,
      "step": 57178,
      "training_step_time": 0.7736916542053223
    },
    {
      "epoch": 0.000348992919921875,
      "model_forward_time": 0.11735916137695312,
      "step": 57179
    },
    {
      "epoch": 0.000348992919921875,
      "step": 57179,
      "training_step_time": 0.8220157623291016
    },
    {
      "epoch": 0.0003489990234375,
      "grad_norm": 0.11106568574905396,
      "learning_rate": 6.027175003719354e-07,
      "loss": 0.0371,
      "step": 57180
    },
    {
      "epoch": 0.0003489990234375,
      "model_forward_time": 0.11688804626464844,
      "step": 57180
    },
    {
      "epoch": 0.0003489990234375,
      "step": 57180,
      "training_step_time": 0.7013907432556152
    },
    {
      "epoch": 0.000349005126953125,
      "model_forward_time": 0.11755990982055664,
      "step": 57181
    },
    {
      "epoch": 0.000349005126953125,
      "step": 57181,
      "training_step_time": 0.712775707244873
    },
    {
      "epoch": 0.00034901123046875,
      "model_forward_time": 0.12017011642456055,
      "step": 57182
    },
    {
      "epoch": 0.00034901123046875,
      "step": 57182,
      "training_step_time": 0.7136693000793457
    },
    {
      "epoch": 0.000349017333984375,
      "model_forward_time": 0.1195378303527832,
      "step": 57183
    },
    {
      "epoch": 0.000349017333984375,
      "step": 57183,
      "training_step_time": 0.6783628463745117
    },
    {
      "epoch": 0.0003490234375,
      "model_forward_time": 0.12027263641357422,
      "step": 57184
    },
    {
      "epoch": 0.0003490234375,
      "step": 57184,
      "training_step_time": 0.6495730876922607
    },
    {
      "epoch": 0.000349029541015625,
      "model_forward_time": 0.12624096870422363,
      "step": 57185
    },
    {
      "epoch": 0.000349029541015625,
      "step": 57185,
      "training_step_time": 0.6799590587615967
    },
    {
      "epoch": 0.00034903564453125,
      "model_forward_time": 0.11984372138977051,
      "step": 57186
    },
    {
      "epoch": 0.00034903564453125,
      "step": 57186,
      "training_step_time": 0.7034952640533447
    },
    {
      "epoch": 0.000349041748046875,
      "model_forward_time": 0.12018060684204102,
      "step": 57187
    },
    {
      "epoch": 0.000349041748046875,
      "step": 57187,
      "training_step_time": 0.7005078792572021
    },
    {
      "epoch": 0.0003490478515625,
      "model_forward_time": 0.11784124374389648,
      "step": 57188
    },
    {
      "epoch": 0.0003490478515625,
      "step": 57188,
      "training_step_time": 0.7166163921356201
    },
    {
      "epoch": 0.000349053955078125,
      "model_forward_time": 0.11898326873779297,
      "step": 57189
    },
    {
      "epoch": 0.000349053955078125,
      "step": 57189,
      "training_step_time": 0.6585428714752197
    },
    {
      "epoch": 0.00034906005859375,
      "grad_norm": 0.08088172972202301,
      "learning_rate": 5.984590198627849e-07,
      "loss": 0.0356,
      "step": 57190
    },
    {
      "epoch": 0.00034906005859375,
      "model_forward_time": 0.11778783798217773,
      "step": 57190
    },
    {
      "epoch": 0.00034906005859375,
      "step": 57190,
      "training_step_time": 0.8088529109954834
    },
    {
      "epoch": 0.000349066162109375,
      "model_forward_time": 0.11734962463378906,
      "step": 57191
    },
    {
      "epoch": 0.000349066162109375,
      "step": 57191,
      "training_step_time": 0.6852872371673584
    },
    {
      "epoch": 0.000349072265625,
      "model_forward_time": 0.12246227264404297,
      "step": 57192
    },
    {
      "epoch": 0.000349072265625,
      "step": 57192,
      "training_step_time": 0.6793043613433838
    },
    {
      "epoch": 0.000349078369140625,
      "model_forward_time": 0.11773276329040527,
      "step": 57193
    },
    {
      "epoch": 0.000349078369140625,
      "step": 57193,
      "training_step_time": 0.6980082988739014
    },
    {
      "epoch": 0.00034908447265625,
      "model_forward_time": 0.11698055267333984,
      "step": 57194
    },
    {
      "epoch": 0.00034908447265625,
      "step": 57194,
      "training_step_time": 0.6594305038452148
    },
    {
      "epoch": 0.000349090576171875,
      "model_forward_time": 0.1193997859954834,
      "step": 57195
    },
    {
      "epoch": 0.000349090576171875,
      "step": 57195,
      "training_step_time": 0.6971821784973145
    },
    {
      "epoch": 0.0003490966796875,
      "model_forward_time": 0.12034225463867188,
      "step": 57196
    },
    {
      "epoch": 0.0003490966796875,
      "step": 57196,
      "training_step_time": 0.657353401184082
    },
    {
      "epoch": 0.000349102783203125,
      "model_forward_time": 0.12044382095336914,
      "step": 57197
    },
    {
      "epoch": 0.000349102783203125,
      "step": 57197,
      "training_step_time": 0.7381107807159424
    },
    {
      "epoch": 0.00034910888671875,
      "model_forward_time": 0.1249537467956543,
      "step": 57198
    },
    {
      "epoch": 0.00034910888671875,
      "step": 57198,
      "training_step_time": 0.7586915493011475
    },
    {
      "epoch": 0.000349114990234375,
      "model_forward_time": 0.1235818862915039,
      "step": 57199
    },
    {
      "epoch": 0.000349114990234375,
      "step": 57199,
      "training_step_time": 0.6585314273834229
    },
    {
      "epoch": 0.00034912109375,
      "grad_norm": 0.07692605257034302,
      "learning_rate": 5.9421554623742e-07,
      "loss": 0.0376,
      "step": 57200
    },
    {
      "epoch": 0.00034912109375,
      "model_forward_time": 0.11747002601623535,
      "step": 57200
    },
    {
      "epoch": 0.00034912109375,
      "step": 57200,
      "training_step_time": 0.605107307434082
    },
    {
      "epoch": 0.000349127197265625,
      "model_forward_time": 0.12032914161682129,
      "step": 57201
    },
    {
      "epoch": 0.000349127197265625,
      "step": 57201,
      "training_step_time": 0.6953961849212646
    },
    {
      "epoch": 0.00034913330078125,
      "model_forward_time": 0.11748933792114258,
      "step": 57202
    },
    {
      "epoch": 0.00034913330078125,
      "step": 57202,
      "training_step_time": 0.7274749279022217
    },
    {
      "epoch": 0.000349139404296875,
      "model_forward_time": 0.12911772727966309,
      "step": 57203
    },
    {
      "epoch": 0.000349139404296875,
      "step": 57203,
      "training_step_time": 0.6313037872314453
    },
    {
      "epoch": 0.0003491455078125,
      "model_forward_time": 0.12227416038513184,
      "step": 57204
    },
    {
      "epoch": 0.0003491455078125,
      "step": 57204,
      "training_step_time": 0.6720857620239258
    },
    {
      "epoch": 0.000349151611328125,
      "model_forward_time": 0.12005496025085449,
      "step": 57205
    },
    {
      "epoch": 0.000349151611328125,
      "step": 57205,
      "training_step_time": 0.6955506801605225
    },
    {
      "epoch": 0.00034915771484375,
      "model_forward_time": 0.11665844917297363,
      "step": 57206
    },
    {
      "epoch": 0.00034915771484375,
      "step": 57206,
      "training_step_time": 0.7098524570465088
    },
    {
      "epoch": 0.000349163818359375,
      "model_forward_time": 0.12061595916748047,
      "step": 57207
    },
    {
      "epoch": 0.000349163818359375,
      "step": 57207,
      "training_step_time": 0.751460075378418
    },
    {
      "epoch": 0.000349169921875,
      "model_forward_time": 0.1161036491394043,
      "step": 57208
    },
    {
      "epoch": 0.000349169921875,
      "step": 57208,
      "training_step_time": 0.7573277950286865
    },
    {
      "epoch": 0.000349176025390625,
      "model_forward_time": 0.1210787296295166,
      "step": 57209
    },
    {
      "epoch": 0.000349176025390625,
      "step": 57209,
      "training_step_time": 0.6351397037506104
    },
    {
      "epoch": 0.00034918212890625,
      "grad_norm": 0.09297726303339005,
      "learning_rate": 5.899870807848762e-07,
      "loss": 0.0342,
      "step": 57210
    },
    {
      "epoch": 0.00034918212890625,
      "model_forward_time": 0.11892127990722656,
      "step": 57210
    },
    {
      "epoch": 0.00034918212890625,
      "step": 57210,
      "training_step_time": 0.6553599834442139
    },
    {
      "epoch": 0.000349188232421875,
      "model_forward_time": 0.11628437042236328,
      "step": 57211
    },
    {
      "epoch": 0.000349188232421875,
      "step": 57211,
      "training_step_time": 0.6848568916320801
    },
    {
      "epoch": 0.0003491943359375,
      "model_forward_time": 0.1173405647277832,
      "step": 57212
    },
    {
      "epoch": 0.0003491943359375,
      "step": 57212,
      "training_step_time": 0.7346596717834473
    },
    {
      "epoch": 0.000349200439453125,
      "model_forward_time": 0.11815452575683594,
      "step": 57213
    },
    {
      "epoch": 0.000349200439453125,
      "step": 57213,
      "training_step_time": 0.6646897792816162
    },
    {
      "epoch": 0.00034920654296875,
      "model_forward_time": 0.11985445022583008,
      "step": 57214
    },
    {
      "epoch": 0.00034920654296875,
      "step": 57214,
      "training_step_time": 0.8121659755706787
    },
    {
      "epoch": 0.000349212646484375,
      "model_forward_time": 0.1258552074432373,
      "step": 57215
    },
    {
      "epoch": 0.000349212646484375,
      "step": 57215,
      "training_step_time": 0.6888530254364014
    },
    {
      "epoch": 0.00034921875,
      "model_forward_time": 0.11768913269042969,
      "step": 57216
    },
    {
      "epoch": 0.00034921875,
      "step": 57216,
      "training_step_time": 0.6671974658966064
    },
    {
      "epoch": 0.000349224853515625,
      "model_forward_time": 0.11770439147949219,
      "step": 57217
    },
    {
      "epoch": 0.000349224853515625,
      "step": 57217,
      "training_step_time": 0.6999809741973877
    },
    {
      "epoch": 0.00034923095703125,
      "model_forward_time": 0.1239924430847168,
      "step": 57218
    },
    {
      "epoch": 0.00034923095703125,
      "step": 57218,
      "training_step_time": 0.6849465370178223
    },
    {
      "epoch": 0.000349237060546875,
      "model_forward_time": 0.11810588836669922,
      "step": 57219
    },
    {
      "epoch": 0.000349237060546875,
      "step": 57219,
      "training_step_time": 0.6918337345123291
    },
    {
      "epoch": 0.0003492431640625,
      "grad_norm": 0.08514755219221115,
      "learning_rate": 5.857736247896706e-07,
      "loss": 0.0368,
      "step": 57220
    },
    {
      "epoch": 0.0003492431640625,
      "model_forward_time": 0.12424778938293457,
      "step": 57220
    },
    {
      "epoch": 0.0003492431640625,
      "step": 57220,
      "training_step_time": 0.7137331962585449
    },
    {
      "epoch": 0.000349249267578125,
      "model_forward_time": 0.11683082580566406,
      "step": 57221
    },
    {
      "epoch": 0.000349249267578125,
      "step": 57221,
      "training_step_time": 0.7118802070617676
    },
    {
      "epoch": 0.00034925537109375,
      "model_forward_time": 0.11879611015319824,
      "step": 57222
    },
    {
      "epoch": 0.00034925537109375,
      "step": 57222,
      "training_step_time": 0.6652767658233643
    },
    {
      "epoch": 0.000349261474609375,
      "model_forward_time": 0.12618637084960938,
      "step": 57223
    },
    {
      "epoch": 0.000349261474609375,
      "step": 57223,
      "training_step_time": 0.7100448608398438
    },
    {
      "epoch": 0.000349267578125,
      "model_forward_time": 0.12110495567321777,
      "step": 57224
    },
    {
      "epoch": 0.000349267578125,
      "step": 57224,
      "training_step_time": 0.7010650634765625
    },
    {
      "epoch": 0.000349273681640625,
      "model_forward_time": 0.12612700462341309,
      "step": 57225
    },
    {
      "epoch": 0.000349273681640625,
      "step": 57225,
      "training_step_time": 0.7361023426055908
    },
    {
      "epoch": 0.00034927978515625,
      "model_forward_time": 0.11800098419189453,
      "step": 57226
    },
    {
      "epoch": 0.00034927978515625,
      "step": 57226,
      "training_step_time": 0.6496009826660156
    },
    {
      "epoch": 0.000349285888671875,
      "model_forward_time": 0.12078380584716797,
      "step": 57227
    },
    {
      "epoch": 0.000349285888671875,
      "step": 57227,
      "training_step_time": 0.659135103225708
    },
    {
      "epoch": 0.0003492919921875,
      "model_forward_time": 0.1198117733001709,
      "step": 57228
    },
    {
      "epoch": 0.0003492919921875,
      "step": 57228,
      "training_step_time": 0.6687314510345459
    },
    {
      "epoch": 0.000349298095703125,
      "model_forward_time": 0.1183159351348877,
      "step": 57229
    },
    {
      "epoch": 0.000349298095703125,
      "step": 57229,
      "training_step_time": 0.631183385848999
    },
    {
      "epoch": 0.00034930419921875,
      "grad_norm": 0.0800003707408905,
      "learning_rate": 5.815751795317237e-07,
      "loss": 0.0345,
      "step": 57230
    },
    {
      "epoch": 0.00034930419921875,
      "model_forward_time": 0.1238565444946289,
      "step": 57230
    },
    {
      "epoch": 0.00034930419921875,
      "step": 57230,
      "training_step_time": 0.6278202533721924
    },
    {
      "epoch": 0.000349310302734375,
      "model_forward_time": 0.1175374984741211,
      "step": 57231
    },
    {
      "epoch": 0.000349310302734375,
      "step": 57231,
      "training_step_time": 0.7269887924194336
    },
    {
      "epoch": 0.00034931640625,
      "model_forward_time": 0.12526178359985352,
      "step": 57232
    },
    {
      "epoch": 0.00034931640625,
      "step": 57232,
      "training_step_time": 0.6964240074157715
    },
    {
      "epoch": 0.000349322509765625,
      "model_forward_time": 0.11749911308288574,
      "step": 57233
    },
    {
      "epoch": 0.000349322509765625,
      "step": 57233,
      "training_step_time": 0.7216663360595703
    },
    {
      "epoch": 0.00034932861328125,
      "model_forward_time": 0.12125968933105469,
      "step": 57234
    },
    {
      "epoch": 0.00034932861328125,
      "step": 57234,
      "training_step_time": 0.5459401607513428
    },
    {
      "epoch": 0.000349334716796875,
      "model_forward_time": 0.11986660957336426,
      "step": 57235
    },
    {
      "epoch": 0.000349334716796875,
      "step": 57235,
      "training_step_time": 0.6260759830474854
    },
    {
      "epoch": 0.0003493408203125,
      "model_forward_time": 0.11965370178222656,
      "step": 57236
    },
    {
      "epoch": 0.0003493408203125,
      "step": 57236,
      "training_step_time": 0.6998424530029297
    },
    {
      "epoch": 0.000349346923828125,
      "model_forward_time": 0.12238931655883789,
      "step": 57237
    },
    {
      "epoch": 0.000349346923828125,
      "step": 57237,
      "training_step_time": 0.5809853076934814
    },
    {
      "epoch": 0.00034935302734375,
      "model_forward_time": 0.1181950569152832,
      "step": 57238
    },
    {
      "epoch": 0.00034935302734375,
      "step": 57238,
      "training_step_time": 0.620621919631958
    },
    {
      "epoch": 0.000349359130859375,
      "model_forward_time": 0.11958599090576172,
      "step": 57239
    },
    {
      "epoch": 0.000349359130859375,
      "step": 57239,
      "training_step_time": 0.6420562267303467
    },
    {
      "epoch": 0.000349365234375,
      "grad_norm": 0.09981335699558258,
      "learning_rate": 5.773917462864264e-07,
      "loss": 0.0395,
      "step": 57240
    },
    {
      "epoch": 0.000349365234375,
      "model_forward_time": 0.12543392181396484,
      "step": 57240
    },
    {
      "epoch": 0.000349365234375,
      "step": 57240,
      "training_step_time": 0.5739874839782715
    },
    {
      "epoch": 0.000349371337890625,
      "model_forward_time": 0.1211709976196289,
      "step": 57241
    },
    {
      "epoch": 0.000349371337890625,
      "step": 57241,
      "training_step_time": 0.717451810836792
    },
    {
      "epoch": 0.00034937744140625,
      "model_forward_time": 0.11837053298950195,
      "step": 57242
    },
    {
      "epoch": 0.00034937744140625,
      "step": 57242,
      "training_step_time": 0.6409821510314941
    },
    {
      "epoch": 0.000349383544921875,
      "model_forward_time": 0.1199042797088623,
      "step": 57243
    },
    {
      "epoch": 0.000349383544921875,
      "step": 57243,
      "training_step_time": 0.7036397457122803
    },
    {
      "epoch": 0.0003493896484375,
      "model_forward_time": 0.11918973922729492,
      "step": 57244
    },
    {
      "epoch": 0.0003493896484375,
      "step": 57244,
      "training_step_time": 0.45862603187561035
    },
    {
      "epoch": 0.000349395751953125,
      "model_forward_time": 0.11714816093444824,
      "step": 57245
    },
    {
      "epoch": 0.000349395751953125,
      "step": 57245,
      "training_step_time": 0.6429572105407715
    },
    {
      "epoch": 0.00034940185546875,
      "model_forward_time": 0.11569046974182129,
      "step": 57246
    },
    {
      "epoch": 0.00034940185546875,
      "step": 57246,
      "training_step_time": 0.49992918968200684
    },
    {
      "epoch": 0.000349407958984375,
      "model_forward_time": 0.11601567268371582,
      "step": 57247
    },
    {
      "epoch": 0.000349407958984375,
      "step": 57247,
      "training_step_time": 0.42856574058532715
    },
    {
      "epoch": 0.0003494140625,
      "model_forward_time": 0.11585330963134766,
      "step": 57248
    },
    {
      "epoch": 0.0003494140625,
      "step": 57248,
      "training_step_time": 0.4232490062713623
    },
    {
      "epoch": 0.000349420166015625,
      "model_forward_time": 0.11526632308959961,
      "step": 57249
    },
    {
      "epoch": 0.000349420166015625,
      "step": 57249,
      "training_step_time": 0.4078083038330078
    },
    {
      "epoch": 0.00034942626953125,
      "grad_norm": 0.0774758905172348,
      "learning_rate": 5.732233263245845e-07,
      "loss": 0.039,
      "step": 57250
    },
    {
      "epoch": 0.00034942626953125,
      "model_forward_time": 0.11533260345458984,
      "step": 57250
    },
    {
      "epoch": 0.00034942626953125,
      "step": 57250,
      "training_step_time": 0.4140329360961914
    },
    {
      "epoch": 0.000349432373046875,
      "model_forward_time": 0.11508989334106445,
      "step": 57251
    },
    {
      "epoch": 0.000349432373046875,
      "step": 57251,
      "training_step_time": 0.41434431076049805
    },
    {
      "epoch": 0.0003494384765625,
      "model_forward_time": 0.11565518379211426,
      "step": 57252
    },
    {
      "epoch": 0.0003494384765625,
      "step": 57252,
      "training_step_time": 0.40683841705322266
    },
    {
      "epoch": 0.000349444580078125,
      "model_forward_time": 0.11479949951171875,
      "step": 57253
    },
    {
      "epoch": 0.000349444580078125,
      "step": 57253,
      "training_step_time": 0.38157105445861816
    },
    {
      "epoch": 0.00034945068359375,
      "model_forward_time": 0.11558699607849121,
      "step": 57254
    },
    {
      "epoch": 0.00034945068359375,
      "step": 57254,
      "training_step_time": 0.3973541259765625
    },
    {
      "epoch": 0.000349456787109375,
      "model_forward_time": 0.11518001556396484,
      "step": 57255
    },
    {
      "epoch": 0.000349456787109375,
      "step": 57255,
      "training_step_time": 0.3944976329803467
    },
    {
      "epoch": 0.000349462890625,
      "model_forward_time": 0.115325927734375,
      "step": 57256
    },
    {
      "epoch": 0.000349462890625,
      "step": 57256,
      "training_step_time": 0.48748278617858887
    },
    {
      "epoch": 0.000349468994140625,
      "model_forward_time": 0.11508440971374512,
      "step": 57257
    },
    {
      "epoch": 0.000349468994140625,
      "step": 57257,
      "training_step_time": 0.38953089714050293
    },
    {
      "epoch": 0.00034947509765625,
      "model_forward_time": 0.11553788185119629,
      "step": 57258
    },
    {
      "epoch": 0.00034947509765625,
      "step": 57258,
      "training_step_time": 0.3967440128326416
    },
    {
      "epoch": 0.000349481201171875,
      "model_forward_time": 0.11501789093017578,
      "step": 57259
    },
    {
      "epoch": 0.000349481201171875,
      "step": 57259,
      "training_step_time": 0.37181758880615234
    },
    {
      "epoch": 0.0003494873046875,
      "grad_norm": 0.1089121550321579,
      "learning_rate": 5.690699209124573e-07,
      "loss": 0.0371,
      "step": 57260
    },
    {
      "epoch": 0.0003494873046875,
      "model_forward_time": 0.1148843765258789,
      "step": 57260
    },
    {
      "epoch": 0.0003494873046875,
      "step": 57260,
      "training_step_time": 0.4573242664337158
    },
    {
      "epoch": 0.000349493408203125,
      "model_forward_time": 0.11609315872192383,
      "step": 57261
    },
    {
      "epoch": 0.000349493408203125,
      "step": 57261,
      "training_step_time": 0.4001350402832031
    },
    {
      "epoch": 0.00034949951171875,
      "model_forward_time": 0.1151731014251709,
      "step": 57262
    },
    {
      "epoch": 0.00034949951171875,
      "step": 57262,
      "training_step_time": 0.39420104026794434
    },
    {
      "epoch": 0.000349505615234375,
      "model_forward_time": 0.11517882347106934,
      "step": 57263
    },
    {
      "epoch": 0.000349505615234375,
      "step": 57263,
      "training_step_time": 0.3940145969390869
    },
    {
      "epoch": 0.00034951171875,
      "model_forward_time": 0.1150519847869873,
      "step": 57264
    },
    {
      "epoch": 0.00034951171875,
      "step": 57264,
      "training_step_time": 0.3912949562072754
    },
    {
      "epoch": 0.000349517822265625,
      "model_forward_time": 0.11525464057922363,
      "step": 57265
    },
    {
      "epoch": 0.000349517822265625,
      "step": 57265,
      "training_step_time": 0.4032883644104004
    },
    {
      "epoch": 0.00034952392578125,
      "model_forward_time": 0.11515378952026367,
      "step": 57266
    },
    {
      "epoch": 0.00034952392578125,
      "step": 57266,
      "training_step_time": 0.376767635345459
    },
    {
      "epoch": 0.000349530029296875,
      "model_forward_time": 0.11558699607849121,
      "step": 57267
    },
    {
      "epoch": 0.000349530029296875,
      "step": 57267,
      "training_step_time": 0.3877553939819336
    },
    {
      "epoch": 0.0003495361328125,
      "model_forward_time": 0.11513900756835938,
      "step": 57268
    },
    {
      "epoch": 0.0003495361328125,
      "step": 57268,
      "training_step_time": 0.3844263553619385
    },
    {
      "epoch": 0.000349542236328125,
      "model_forward_time": 0.1155235767364502,
      "step": 57269
    },
    {
      "epoch": 0.000349542236328125,
      "step": 57269,
      "training_step_time": 0.40482473373413086
    },
    {
      "epoch": 0.00034954833984375,
      "grad_norm": 0.0966913253068924,
      "learning_rate": 5.64931531311741e-07,
      "loss": 0.0354,
      "step": 57270
    },
    {
      "epoch": 0.00034954833984375,
      "model_forward_time": 0.11676144599914551,
      "step": 57270
    },
    {
      "epoch": 0.00034954833984375,
      "step": 57270,
      "training_step_time": 0.3990771770477295
    },
    {
      "epoch": 0.000349554443359375,
      "model_forward_time": 0.11528658866882324,
      "step": 57271
    },
    {
      "epoch": 0.000349554443359375,
      "step": 57271,
      "training_step_time": 0.5234966278076172
    },
    {
      "epoch": 0.000349560546875,
      "model_forward_time": 0.11532402038574219,
      "step": 57272
    },
    {
      "epoch": 0.000349560546875,
      "step": 57272,
      "training_step_time": 0.3937079906463623
    },
    {
      "epoch": 0.000349566650390625,
      "model_forward_time": 0.11391353607177734,
      "step": 57273
    },
    {
      "epoch": 0.000349566650390625,
      "step": 57273,
      "training_step_time": 0.49399399757385254
    },
    {
      "epoch": 0.00034957275390625,
      "model_forward_time": 0.11536455154418945,
      "step": 57274
    },
    {
      "epoch": 0.00034957275390625,
      "step": 57274,
      "training_step_time": 0.477907657623291
    },
    {
      "epoch": 0.000349578857421875,
      "model_forward_time": 0.1146242618560791,
      "step": 57275
    },
    {
      "epoch": 0.000349578857421875,
      "step": 57275,
      "training_step_time": 0.4107847213745117
    },
    {
      "epoch": 0.0003495849609375,
      "model_forward_time": 0.1144571304321289,
      "step": 57276
    },
    {
      "epoch": 0.0003495849609375,
      "step": 57276,
      "training_step_time": 0.4886665344238281
    },
    {
      "epoch": 0.000349591064453125,
      "model_forward_time": 0.11464381217956543,
      "step": 57277
    },
    {
      "epoch": 0.000349591064453125,
      "step": 57277,
      "training_step_time": 0.3913841247558594
    },
    {
      "epoch": 0.00034959716796875,
      "model_forward_time": 0.11474299430847168,
      "step": 57278
    },
    {
      "epoch": 0.00034959716796875,
      "step": 57278,
      "training_step_time": 0.3742804527282715
    },
    {
      "epoch": 0.000349603271484375,
      "model_forward_time": 0.11574363708496094,
      "step": 57279
    },
    {
      "epoch": 0.000349603271484375,
      "step": 57279,
      "training_step_time": 0.3899686336517334
    },
    {
      "epoch": 0.000349609375,
      "grad_norm": 0.08379381150007248,
      "learning_rate": 5.608081587795688e-07,
      "loss": 0.0391,
      "step": 57280
    },
    {
      "epoch": 0.000349609375,
      "model_forward_time": 0.1148378849029541,
      "step": 57280
    },
    {
      "epoch": 0.000349609375,
      "step": 57280,
      "training_step_time": 0.38525891304016113
    },
    {
      "epoch": 0.000349615478515625,
      "model_forward_time": 0.1153419017791748,
      "step": 57281
    },
    {
      "epoch": 0.000349615478515625,
      "step": 57281,
      "training_step_time": 0.403456449508667
    },
    {
      "epoch": 0.00034962158203125,
      "model_forward_time": 0.11441564559936523,
      "step": 57282
    },
    {
      "epoch": 0.00034962158203125,
      "step": 57282,
      "training_step_time": 0.40182948112487793
    },
    {
      "epoch": 0.000349627685546875,
      "model_forward_time": 0.11517500877380371,
      "step": 57283
    },
    {
      "epoch": 0.000349627685546875,
      "step": 57283,
      "training_step_time": 0.3957688808441162
    },
    {
      "epoch": 0.0003496337890625,
      "model_forward_time": 0.11597228050231934,
      "step": 57284
    },
    {
      "epoch": 0.0003496337890625,
      "step": 57284,
      "training_step_time": 0.41269588470458984
    },
    {
      "epoch": 0.000349639892578125,
      "model_forward_time": 0.11540341377258301,
      "step": 57285
    },
    {
      "epoch": 0.000349639892578125,
      "step": 57285,
      "training_step_time": 0.40134549140930176
    },
    {
      "epoch": 0.00034964599609375,
      "model_forward_time": 0.11520051956176758,
      "step": 57286
    },
    {
      "epoch": 0.00034964599609375,
      "step": 57286,
      "training_step_time": 0.4599490165710449
    },
    {
      "epoch": 0.000349652099609375,
      "model_forward_time": 0.11507201194763184,
      "step": 57287
    },
    {
      "epoch": 0.000349652099609375,
      "step": 57287,
      "training_step_time": 0.45385289192199707
    },
    {
      "epoch": 0.000349658203125,
      "model_forward_time": 0.11488652229309082,
      "step": 57288
    },
    {
      "epoch": 0.000349658203125,
      "step": 57288,
      "training_step_time": 0.3711731433868408
    },
    {
      "epoch": 0.000349664306640625,
      "model_forward_time": 0.11522388458251953,
      "step": 57289
    },
    {
      "epoch": 0.000349664306640625,
      "step": 57289,
      "training_step_time": 0.468719482421875
    },
    {
      "epoch": 0.00034967041015625,
      "grad_norm": 0.07335519045591354,
      "learning_rate": 5.566998045685112e-07,
      "loss": 0.0339,
      "step": 57290
    },
    {
      "epoch": 0.00034967041015625,
      "model_forward_time": 0.11497116088867188,
      "step": 57290
    },
    {
      "epoch": 0.00034967041015625,
      "step": 57290,
      "training_step_time": 0.49327969551086426
    },
    {
      "epoch": 0.000349676513671875,
      "model_forward_time": 0.11481857299804688,
      "step": 57291
    },
    {
      "epoch": 0.000349676513671875,
      "step": 57291,
      "training_step_time": 0.3842027187347412
    },
    {
      "epoch": 0.0003496826171875,
      "model_forward_time": 0.11435627937316895,
      "step": 57292
    },
    {
      "epoch": 0.0003496826171875,
      "step": 57292,
      "training_step_time": 0.39470744132995605
    },
    {
      "epoch": 0.000349688720703125,
      "model_forward_time": 0.11475491523742676,
      "step": 57293
    },
    {
      "epoch": 0.000349688720703125,
      "step": 57293,
      "training_step_time": 0.3882410526275635
    },
    {
      "epoch": 0.00034969482421875,
      "model_forward_time": 0.11491703987121582,
      "step": 57294
    },
    {
      "epoch": 0.00034969482421875,
      "step": 57294,
      "training_step_time": 0.38386988639831543
    },
    {
      "epoch": 0.000349700927734375,
      "model_forward_time": 0.11496162414550781,
      "step": 57295
    },
    {
      "epoch": 0.000349700927734375,
      "step": 57295,
      "training_step_time": 0.3963282108306885
    },
    {
      "epoch": 0.00034970703125,
      "model_forward_time": 0.11502504348754883,
      "step": 57296
    },
    {
      "epoch": 0.00034970703125,
      "step": 57296,
      "training_step_time": 0.37413525581359863
    },
    {
      "epoch": 0.000349713134765625,
      "model_forward_time": 0.11551046371459961,
      "step": 57297
    },
    {
      "epoch": 0.000349713134765625,
      "step": 57297,
      "training_step_time": 0.39356398582458496
    },
    {
      "epoch": 0.00034971923828125,
      "model_forward_time": 0.11510133743286133,
      "step": 57298
    },
    {
      "epoch": 0.00034971923828125,
      "step": 57298,
      "training_step_time": 0.3972616195678711
    },
    {
      "epoch": 0.000349725341796875,
      "model_forward_time": 0.11589598655700684,
      "step": 57299
    },
    {
      "epoch": 0.000349725341796875,
      "step": 57299,
      "training_step_time": 0.4522209167480469
    },
    {
      "epoch": 0.0003497314453125,
      "grad_norm": 0.10003644227981567,
      "learning_rate": 5.526064699265753e-07,
      "loss": 0.0373,
      "step": 57300
    },
    {
      "epoch": 0.0003497314453125,
      "model_forward_time": 0.11497950553894043,
      "step": 57300
    },
    {
      "epoch": 0.0003497314453125,
      "step": 57300,
      "training_step_time": 0.4891026020050049
    },
    {
      "epoch": 0.000349737548828125,
      "model_forward_time": 0.11550092697143555,
      "step": 57301
    },
    {
      "epoch": 0.000349737548828125,
      "step": 57301,
      "training_step_time": 0.4071238040924072
    },
    {
      "epoch": 0.00034974365234375,
      "model_forward_time": 0.11498069763183594,
      "step": 57302
    },
    {
      "epoch": 0.00034974365234375,
      "step": 57302,
      "training_step_time": 0.3874790668487549
    },
    {
      "epoch": 0.000349749755859375,
      "model_forward_time": 0.1151123046875,
      "step": 57303
    },
    {
      "epoch": 0.000349749755859375,
      "step": 57303,
      "training_step_time": 0.4742739200592041
    },
    {
      "epoch": 0.000349755859375,
      "model_forward_time": 0.11553835868835449,
      "step": 57304
    },
    {
      "epoch": 0.000349755859375,
      "step": 57304,
      "training_step_time": 0.48836660385131836
    },
    {
      "epoch": 0.000349761962890625,
      "model_forward_time": 0.1149897575378418,
      "step": 57305
    },
    {
      "epoch": 0.000349761962890625,
      "step": 57305,
      "training_step_time": 0.41577887535095215
    },
    {
      "epoch": 0.00034976806640625,
      "model_forward_time": 0.11493229866027832,
      "step": 57306
    },
    {
      "epoch": 0.00034976806640625,
      "step": 57306,
      "training_step_time": 0.3852217197418213
    },
    {
      "epoch": 0.000349774169921875,
      "model_forward_time": 0.11533379554748535,
      "step": 57307
    },
    {
      "epoch": 0.000349774169921875,
      "step": 57307,
      "training_step_time": 0.3965790271759033
    },
    {
      "epoch": 0.0003497802734375,
      "model_forward_time": 0.11465048789978027,
      "step": 57308
    },
    {
      "epoch": 0.0003497802734375,
      "step": 57308,
      "training_step_time": 0.3808596134185791
    },
    {
      "epoch": 0.000349786376953125,
      "model_forward_time": 0.11521744728088379,
      "step": 57309
    },
    {
      "epoch": 0.000349786376953125,
      "step": 57309,
      "training_step_time": 0.3953206539154053
    },
    {
      "epoch": 0.00034979248046875,
      "grad_norm": 0.08398283272981644,
      "learning_rate": 5.485281560972166e-07,
      "loss": 0.0371,
      "step": 57310
    },
    {
      "epoch": 0.00034979248046875,
      "model_forward_time": 0.1160268783569336,
      "step": 57310
    },
    {
      "epoch": 0.00034979248046875,
      "step": 57310,
      "training_step_time": 0.39269137382507324
    },
    {
      "epoch": 0.000349798583984375,
      "model_forward_time": 0.11543512344360352,
      "step": 57311
    },
    {
      "epoch": 0.000349798583984375,
      "step": 57311,
      "training_step_time": 0.3883516788482666
    },
    {
      "epoch": 0.0003498046875,
      "model_forward_time": 0.11551666259765625,
      "step": 57312
    },
    {
      "epoch": 0.0003498046875,
      "step": 57312,
      "training_step_time": 0.39690518379211426
    },
    {
      "epoch": 0.000349810791015625,
      "model_forward_time": 0.1149287223815918,
      "step": 57313
    },
    {
      "epoch": 0.000349810791015625,
      "step": 57313,
      "training_step_time": 0.44537997245788574
    },
    {
      "epoch": 0.00034981689453125,
      "model_forward_time": 0.11579012870788574,
      "step": 57314
    },
    {
      "epoch": 0.00034981689453125,
      "step": 57314,
      "training_step_time": 0.43366169929504395
    },
    {
      "epoch": 0.000349822998046875,
      "model_forward_time": 0.11481046676635742,
      "step": 57315
    },
    {
      "epoch": 0.000349822998046875,
      "step": 57315,
      "training_step_time": 0.45729851722717285
    },
    {
      "epoch": 0.0003498291015625,
      "model_forward_time": 0.11541056632995605,
      "step": 57316
    },
    {
      "epoch": 0.0003498291015625,
      "step": 57316,
      "training_step_time": 0.3946101665496826
    },
    {
      "epoch": 0.000349835205078125,
      "model_forward_time": 0.11577415466308594,
      "step": 57317
    },
    {
      "epoch": 0.000349835205078125,
      "step": 57317,
      "training_step_time": 0.4315357208251953
    },
    {
      "epoch": 0.00034984130859375,
      "model_forward_time": 0.11498332023620605,
      "step": 57318
    },
    {
      "epoch": 0.00034984130859375,
      "step": 57318,
      "training_step_time": 0.43842649459838867
    },
    {
      "epoch": 0.000349847412109375,
      "model_forward_time": 0.11653017997741699,
      "step": 57319
    },
    {
      "epoch": 0.000349847412109375,
      "step": 57319,
      "training_step_time": 0.4437289237976074
    },
    {
      "epoch": 0.000349853515625,
      "grad_norm": 0.09256327897310257,
      "learning_rate": 5.444648643193051e-07,
      "loss": 0.0353,
      "step": 57320
    },
    {
      "epoch": 0.000349853515625,
      "model_forward_time": 0.11543798446655273,
      "step": 57320
    },
    {
      "epoch": 0.000349853515625,
      "step": 57320,
      "training_step_time": 0.3877279758453369
    },
    {
      "epoch": 0.000349859619140625,
      "model_forward_time": 0.11538839340209961,
      "step": 57321
    },
    {
      "epoch": 0.000349859619140625,
      "step": 57321,
      "training_step_time": 0.38719677925109863
    },
    {
      "epoch": 0.00034986572265625,
      "model_forward_time": 0.11511969566345215,
      "step": 57322
    },
    {
      "epoch": 0.00034986572265625,
      "step": 57322,
      "training_step_time": 0.3905677795410156
    },
    {
      "epoch": 0.000349871826171875,
      "model_forward_time": 0.11612224578857422,
      "step": 57323
    },
    {
      "epoch": 0.000349871826171875,
      "step": 57323,
      "training_step_time": 0.39584922790527344
    },
    {
      "epoch": 0.0003498779296875,
      "model_forward_time": 0.11546206474304199,
      "step": 57324
    },
    {
      "epoch": 0.0003498779296875,
      "step": 57324,
      "training_step_time": 0.3905513286590576
    },
    {
      "epoch": 0.000349884033203125,
      "model_forward_time": 0.11645364761352539,
      "step": 57325
    },
    {
      "epoch": 0.000349884033203125,
      "step": 57325,
      "training_step_time": 0.39542460441589355
    },
    {
      "epoch": 0.00034989013671875,
      "model_forward_time": 0.11634993553161621,
      "step": 57326
    },
    {
      "epoch": 0.00034989013671875,
      "step": 57326,
      "training_step_time": 0.3935239315032959
    },
    {
      "epoch": 0.000349896240234375,
      "model_forward_time": 0.1156764030456543,
      "step": 57327
    },
    {
      "epoch": 0.000349896240234375,
      "step": 57327,
      "training_step_time": 0.43668603897094727
    },
    {
      "epoch": 0.00034990234375,
      "model_forward_time": 0.11638498306274414,
      "step": 57328
    },
    {
      "epoch": 0.00034990234375,
      "step": 57328,
      "training_step_time": 0.40566420555114746
    },
    {
      "epoch": 0.000349908447265625,
      "model_forward_time": 0.11592960357666016,
      "step": 57329
    },
    {
      "epoch": 0.000349908447265625,
      "step": 57329,
      "training_step_time": 0.4257779121398926
    },
    {
      "epoch": 0.00034991455078125,
      "grad_norm": 0.10379333049058914,
      "learning_rate": 5.404165958271811e-07,
      "loss": 0.0377,
      "step": 57330
    },
    {
      "epoch": 0.00034991455078125,
      "model_forward_time": 0.11510586738586426,
      "step": 57330
    },
    {
      "epoch": 0.00034991455078125,
      "step": 57330,
      "training_step_time": 0.43326687812805176
    },
    {
      "epoch": 0.000349920654296875,
      "model_forward_time": 0.1153261661529541,
      "step": 57331
    },
    {
      "epoch": 0.000349920654296875,
      "step": 57331,
      "training_step_time": 0.39858198165893555
    },
    {
      "epoch": 0.0003499267578125,
      "model_forward_time": 0.11556577682495117,
      "step": 57332
    },
    {
      "epoch": 0.0003499267578125,
      "step": 57332,
      "training_step_time": 0.42458057403564453
    },
    {
      "epoch": 0.000349932861328125,
      "model_forward_time": 0.11520576477050781,
      "step": 57333
    },
    {
      "epoch": 0.000349932861328125,
      "step": 57333,
      "training_step_time": 0.4056708812713623
    },
    {
      "epoch": 0.00034993896484375,
      "model_forward_time": 0.11593747138977051,
      "step": 57334
    },
    {
      "epoch": 0.00034993896484375,
      "step": 57334,
      "training_step_time": 0.41524386405944824
    },
    {
      "epoch": 0.000349945068359375,
      "model_forward_time": 0.11510562896728516,
      "step": 57335
    },
    {
      "epoch": 0.000349945068359375,
      "step": 57335,
      "training_step_time": 0.47644782066345215
    },
    {
      "epoch": 0.000349951171875,
      "model_forward_time": 0.11557817459106445,
      "step": 57336
    },
    {
      "epoch": 0.000349951171875,
      "step": 57336,
      "training_step_time": 0.3926858901977539
    },
    {
      "epoch": 0.000349957275390625,
      "model_forward_time": 0.11459946632385254,
      "step": 57337
    },
    {
      "epoch": 0.000349957275390625,
      "step": 57337,
      "training_step_time": 0.39621615409851074
    },
    {
      "epoch": 0.00034996337890625,
      "model_forward_time": 0.1157381534576416,
      "step": 57338
    },
    {
      "epoch": 0.00034996337890625,
      "step": 57338,
      "training_step_time": 0.3994712829589844
    },
    {
      "epoch": 0.000349969482421875,
      "model_forward_time": 0.11533141136169434,
      "step": 57339
    },
    {
      "epoch": 0.000349969482421875,
      "step": 57339,
      "training_step_time": 0.39904117584228516
    },
    {
      "epoch": 0.0003499755859375,
      "grad_norm": 0.11140622198581696,
      "learning_rate": 5.363833518505834e-07,
      "loss": 0.0419,
      "step": 57340
    },
    {
      "epoch": 0.0003499755859375,
      "model_forward_time": 0.11484789848327637,
      "step": 57340
    },
    {
      "epoch": 0.0003499755859375,
      "step": 57340,
      "training_step_time": 0.40508437156677246
    },
    {
      "epoch": 0.000349981689453125,
      "model_forward_time": 0.1156153678894043,
      "step": 57341
    },
    {
      "epoch": 0.000349981689453125,
      "step": 57341,
      "training_step_time": 0.3958110809326172
    },
    {
      "epoch": 0.00034998779296875,
      "model_forward_time": 0.11531925201416016,
      "step": 57342
    },
    {
      "epoch": 0.00034998779296875,
      "step": 57342,
      "training_step_time": 0.4410400390625
    },
    {
      "epoch": 0.000349993896484375,
      "model_forward_time": 0.11475896835327148,
      "step": 57343
    },
    {
      "epoch": 0.000349993896484375,
      "step": 57343,
      "training_step_time": 0.46848320960998535
    },
    {
      "epoch": 0.00035,
      "model_forward_time": 0.11482834815979004,
      "step": 57344
    },
    {
      "epoch": 0.00035,
      "step": 57344,
      "training_step_time": 0.40715622901916504
    },
    {
      "epoch": 0.000350006103515625,
      "model_forward_time": 0.11531615257263184,
      "step": 57345
    },
    {
      "epoch": 0.000350006103515625,
      "step": 57345,
      "training_step_time": 0.3835916519165039
    },
    {
      "epoch": 0.00035001220703125,
      "model_forward_time": 0.1152811050415039,
      "step": 57346
    },
    {
      "epoch": 0.00035001220703125,
      "step": 57346,
      "training_step_time": 0.3956921100616455
    },
    {
      "epoch": 0.000350018310546875,
      "model_forward_time": 0.11542940139770508,
      "step": 57347
    },
    {
      "epoch": 0.000350018310546875,
      "step": 57347,
      "training_step_time": 0.44992494583129883
    },
    {
      "epoch": 0.0003500244140625,
      "model_forward_time": 0.11528396606445312,
      "step": 57348
    },
    {
      "epoch": 0.0003500244140625,
      "step": 57348,
      "training_step_time": 0.45048069953918457
    },
    {
      "epoch": 0.000350030517578125,
      "model_forward_time": 0.11575794219970703,
      "step": 57349
    },
    {
      "epoch": 0.000350030517578125,
      "step": 57349,
      "training_step_time": 0.4945063591003418
    },
    {
      "epoch": 0.00035003662109375,
      "grad_norm": 0.11301245540380478,
      "learning_rate": 5.323651336147151e-07,
      "loss": 0.0344,
      "step": 57350
    },
    {
      "epoch": 0.00035003662109375,
      "model_forward_time": 0.11489748954772949,
      "step": 57350
    },
    {
      "epoch": 0.00035003662109375,
      "step": 57350,
      "training_step_time": 0.44063735008239746
    },
    {
      "epoch": 0.000350042724609375,
      "model_forward_time": 0.11527347564697266,
      "step": 57351
    },
    {
      "epoch": 0.000350042724609375,
      "step": 57351,
      "training_step_time": 0.3892350196838379
    },
    {
      "epoch": 0.000350048828125,
      "model_forward_time": 0.11532735824584961,
      "step": 57352
    },
    {
      "epoch": 0.000350048828125,
      "step": 57352,
      "training_step_time": 0.4006218910217285
    },
    {
      "epoch": 0.000350054931640625,
      "model_forward_time": 0.11507773399353027,
      "step": 57353
    },
    {
      "epoch": 0.000350054931640625,
      "step": 57353,
      "training_step_time": 0.3926513195037842
    },
    {
      "epoch": 0.00035006103515625,
      "model_forward_time": 0.1159675121307373,
      "step": 57354
    },
    {
      "epoch": 0.00035006103515625,
      "step": 57354,
      "training_step_time": 0.38941073417663574
    },
    {
      "epoch": 0.000350067138671875,
      "model_forward_time": 0.1143796443939209,
      "step": 57355
    },
    {
      "epoch": 0.000350067138671875,
      "step": 57355,
      "training_step_time": 0.4028759002685547
    },
    {
      "epoch": 0.0003500732421875,
      "model_forward_time": 0.11533117294311523,
      "step": 57356
    },
    {
      "epoch": 0.0003500732421875,
      "step": 57356,
      "training_step_time": 0.4340529441833496
    },
    {
      "epoch": 0.000350079345703125,
      "model_forward_time": 0.1157386302947998,
      "step": 57357
    },
    {
      "epoch": 0.000350079345703125,
      "step": 57357,
      "training_step_time": 0.6212239265441895
    },
    {
      "epoch": 0.00035008544921875,
      "model_forward_time": 0.11474418640136719,
      "step": 57358
    },
    {
      "epoch": 0.00035008544921875,
      "step": 57358,
      "training_step_time": 0.4151148796081543
    },
    {
      "epoch": 0.000350091552734375,
      "model_forward_time": 0.11509037017822266,
      "step": 57359
    },
    {
      "epoch": 0.000350091552734375,
      "step": 57359,
      "training_step_time": 0.39270544052124023
    },
    {
      "epoch": 0.00035009765625,
      "grad_norm": 0.13378150761127472,
      "learning_rate": 5.283619423401998e-07,
      "loss": 0.0326,
      "step": 57360
    },
    {
      "epoch": 0.00035009765625,
      "model_forward_time": 0.11463260650634766,
      "step": 57360
    },
    {
      "epoch": 0.00035009765625,
      "step": 57360,
      "training_step_time": 0.38553714752197266
    },
    {
      "epoch": 0.000350103759765625,
      "model_forward_time": 0.11497330665588379,
      "step": 57361
    },
    {
      "epoch": 0.000350103759765625,
      "step": 57361,
      "training_step_time": 0.3975560665130615
    },
    {
      "epoch": 0.00035010986328125,
      "model_forward_time": 0.11497378349304199,
      "step": 57362
    },
    {
      "epoch": 0.00035010986328125,
      "step": 57362,
      "training_step_time": 0.3643505573272705
    },
    {
      "epoch": 0.000350115966796875,
      "model_forward_time": 0.11523270606994629,
      "step": 57363
    },
    {
      "epoch": 0.000350115966796875,
      "step": 57363,
      "training_step_time": 0.5234663486480713
    },
    {
      "epoch": 0.0003501220703125,
      "model_forward_time": 0.11621999740600586,
      "step": 57364
    },
    {
      "epoch": 0.0003501220703125,
      "step": 57364,
      "training_step_time": 0.40798425674438477
    },
    {
      "epoch": 0.000350128173828125,
      "model_forward_time": 0.11458587646484375,
      "step": 57365
    },
    {
      "epoch": 0.000350128173828125,
      "step": 57365,
      "training_step_time": 0.3946964740753174
    },
    {
      "epoch": 0.00035013427734375,
      "model_forward_time": 0.11570000648498535,
      "step": 57366
    },
    {
      "epoch": 0.00035013427734375,
      "step": 57366,
      "training_step_time": 0.3818051815032959
    },
    {
      "epoch": 0.000350140380859375,
      "model_forward_time": 0.11533212661743164,
      "step": 57367
    },
    {
      "epoch": 0.000350140380859375,
      "step": 57367,
      "training_step_time": 0.3927185535430908
    },
    {
      "epoch": 0.000350146484375,
      "model_forward_time": 0.11489391326904297,
      "step": 57368
    },
    {
      "epoch": 0.000350146484375,
      "step": 57368,
      "training_step_time": 0.3967783451080322
    },
    {
      "epoch": 0.000350152587890625,
      "model_forward_time": 0.11485004425048828,
      "step": 57369
    },
    {
      "epoch": 0.000350152587890625,
      "step": 57369,
      "training_step_time": 0.6556785106658936
    },
    {
      "epoch": 0.00035015869140625,
      "grad_norm": 0.11439134925603867,
      "learning_rate": 5.243737792431091e-07,
      "loss": 0.0402,
      "step": 57370
    },
    {
      "epoch": 0.00035015869140625,
      "model_forward_time": 0.11524033546447754,
      "step": 57370
    },
    {
      "epoch": 0.00035015869140625,
      "step": 57370,
      "training_step_time": 0.3865213394165039
    },
    {
      "epoch": 0.000350164794921875,
      "model_forward_time": 0.11511564254760742,
      "step": 57371
    },
    {
      "epoch": 0.000350164794921875,
      "step": 57371,
      "training_step_time": 0.4056675434112549
    },
    {
      "epoch": 0.0003501708984375,
      "model_forward_time": 0.11476802825927734,
      "step": 57372
    },
    {
      "epoch": 0.0003501708984375,
      "step": 57372,
      "training_step_time": 0.4445919990539551
    },
    {
      "epoch": 0.000350177001953125,
      "model_forward_time": 0.11508512496948242,
      "step": 57373
    },
    {
      "epoch": 0.000350177001953125,
      "step": 57373,
      "training_step_time": 0.39252161979675293
    },
    {
      "epoch": 0.00035018310546875,
      "model_forward_time": 0.1149134635925293,
      "step": 57374
    },
    {
      "epoch": 0.00035018310546875,
      "step": 57374,
      "training_step_time": 0.3879554271697998
    },
    {
      "epoch": 0.000350189208984375,
      "model_forward_time": 0.11566329002380371,
      "step": 57375
    },
    {
      "epoch": 0.000350189208984375,
      "step": 57375,
      "training_step_time": 0.5485935211181641
    },
    {
      "epoch": 0.0003501953125,
      "model_forward_time": 0.11510705947875977,
      "step": 57376
    },
    {
      "epoch": 0.0003501953125,
      "step": 57376,
      "training_step_time": 0.4070396423339844
    },
    {
      "epoch": 0.000350201416015625,
      "model_forward_time": 0.11512088775634766,
      "step": 57377
    },
    {
      "epoch": 0.000350201416015625,
      "step": 57377,
      "training_step_time": 0.47380948066711426
    },
    {
      "epoch": 0.00035020751953125,
      "model_forward_time": 0.11542201042175293,
      "step": 57378
    },
    {
      "epoch": 0.00035020751953125,
      "step": 57378,
      "training_step_time": 0.4043419361114502
    },
    {
      "epoch": 0.000350213623046875,
      "model_forward_time": 0.11512207984924316,
      "step": 57379
    },
    {
      "epoch": 0.000350213623046875,
      "step": 57379,
      "training_step_time": 0.49607419967651367
    },
    {
      "epoch": 0.0003502197265625,
      "grad_norm": 0.08875321596860886,
      "learning_rate": 5.204006455349297e-07,
      "loss": 0.0347,
      "step": 57380
    },
    {
      "epoch": 0.0003502197265625,
      "model_forward_time": 0.11472296714782715,
      "step": 57380
    },
    {
      "epoch": 0.0003502197265625,
      "step": 57380,
      "training_step_time": 0.3979530334472656
    },
    {
      "epoch": 0.000350225830078125,
      "model_forward_time": 0.11535453796386719,
      "step": 57381
    },
    {
      "epoch": 0.000350225830078125,
      "step": 57381,
      "training_step_time": 0.4083137512207031
    },
    {
      "epoch": 0.00035023193359375,
      "model_forward_time": 0.1148521900177002,
      "step": 57382
    },
    {
      "epoch": 0.00035023193359375,
      "step": 57382,
      "training_step_time": 0.38573694229125977
    },
    {
      "epoch": 0.000350238037109375,
      "model_forward_time": 0.11497187614440918,
      "step": 57383
    },
    {
      "epoch": 0.000350238037109375,
      "step": 57383,
      "training_step_time": 0.4150717258453369
    },
    {
      "epoch": 0.000350244140625,
      "model_forward_time": 0.11489582061767578,
      "step": 57384
    },
    {
      "epoch": 0.000350244140625,
      "step": 57384,
      "training_step_time": 0.37971925735473633
    },
    {
      "epoch": 0.000350250244140625,
      "model_forward_time": 0.11501216888427734,
      "step": 57385
    },
    {
      "epoch": 0.000350250244140625,
      "step": 57385,
      "training_step_time": 0.4613664150238037
    },
    {
      "epoch": 0.00035025634765625,
      "model_forward_time": 0.1151425838470459,
      "step": 57386
    },
    {
      "epoch": 0.00035025634765625,
      "step": 57386,
      "training_step_time": 0.49550652503967285
    },
    {
      "epoch": 0.000350262451171875,
      "model_forward_time": 0.11500668525695801,
      "step": 57387
    },
    {
      "epoch": 0.000350262451171875,
      "step": 57387,
      "training_step_time": 0.5231304168701172
    },
    {
      "epoch": 0.0003502685546875,
      "model_forward_time": 0.11458897590637207,
      "step": 57388
    },
    {
      "epoch": 0.0003502685546875,
      "step": 57388,
      "training_step_time": 0.38140249252319336
    },
    {
      "epoch": 0.000350274658203125,
      "model_forward_time": 0.11536216735839844,
      "step": 57389
    },
    {
      "epoch": 0.000350274658203125,
      "step": 57389,
      "training_step_time": 0.3886704444885254
    },
    {
      "epoch": 0.00035028076171875,
      "grad_norm": 0.09250828623771667,
      "learning_rate": 5.164425424226016e-07,
      "loss": 0.0375,
      "step": 57390
    },
    {
      "epoch": 0.00035028076171875,
      "model_forward_time": 0.11508631706237793,
      "step": 57390
    },
    {
      "epoch": 0.00035028076171875,
      "step": 57390,
      "training_step_time": 0.3981211185455322
    },
    {
      "epoch": 0.000350286865234375,
      "model_forward_time": 0.11495780944824219,
      "step": 57391
    },
    {
      "epoch": 0.000350286865234375,
      "step": 57391,
      "training_step_time": 0.41323041915893555
    },
    {
      "epoch": 0.00035029296875,
      "model_forward_time": 0.11510944366455078,
      "step": 57392
    },
    {
      "epoch": 0.00035029296875,
      "step": 57392,
      "training_step_time": 0.4836738109588623
    },
    {
      "epoch": 0.000350299072265625,
      "model_forward_time": 0.1147148609161377,
      "step": 57393
    },
    {
      "epoch": 0.000350299072265625,
      "step": 57393,
      "training_step_time": 0.5945148468017578
    },
    {
      "epoch": 0.00035030517578125,
      "model_forward_time": 0.1148366928100586,
      "step": 57394
    },
    {
      "epoch": 0.00035030517578125,
      "step": 57394,
      "training_step_time": 0.38718366622924805
    },
    {
      "epoch": 0.000350311279296875,
      "model_forward_time": 0.11500430107116699,
      "step": 57395
    },
    {
      "epoch": 0.000350311279296875,
      "step": 57395,
      "training_step_time": 0.41477227210998535
    },
    {
      "epoch": 0.0003503173828125,
      "model_forward_time": 0.11414408683776855,
      "step": 57396
    },
    {
      "epoch": 0.0003503173828125,
      "step": 57396,
      "training_step_time": 0.385408878326416
    },
    {
      "epoch": 0.000350323486328125,
      "model_forward_time": 0.1146547794342041,
      "step": 57397
    },
    {
      "epoch": 0.000350323486328125,
      "step": 57397,
      "training_step_time": 0.3971579074859619
    },
    {
      "epoch": 0.00035032958984375,
      "model_forward_time": 0.11560249328613281,
      "step": 57398
    },
    {
      "epoch": 0.00035032958984375,
      "step": 57398,
      "training_step_time": 0.4039759635925293
    },
    {
      "epoch": 0.000350335693359375,
      "model_forward_time": 0.11517643928527832,
      "step": 57399
    },
    {
      "epoch": 0.000350335693359375,
      "step": 57399,
      "training_step_time": 0.5394816398620605
    },
    {
      "epoch": 0.000350341796875,
      "grad_norm": 0.09056976437568665,
      "learning_rate": 5.124994711084963e-07,
      "loss": 0.0369,
      "step": 57400
    },
    {
      "epoch": 0.000350341796875,
      "model_forward_time": 0.11479306221008301,
      "step": 57400
    },
    {
      "epoch": 0.000350341796875,
      "step": 57400,
      "training_step_time": 0.42613768577575684
    },
    {
      "epoch": 0.000350347900390625,
      "model_forward_time": 0.11438655853271484,
      "step": 57401
    },
    {
      "epoch": 0.000350347900390625,
      "step": 57401,
      "training_step_time": 0.39780759811401367
    },
    {
      "epoch": 0.00035035400390625,
      "model_forward_time": 0.11478424072265625,
      "step": 57402
    },
    {
      "epoch": 0.00035035400390625,
      "step": 57402,
      "training_step_time": 0.38451123237609863
    },
    {
      "epoch": 0.000350360107421875,
      "model_forward_time": 0.11469602584838867,
      "step": 57403
    },
    {
      "epoch": 0.000350360107421875,
      "step": 57403,
      "training_step_time": 0.3939337730407715
    },
    {
      "epoch": 0.0003503662109375,
      "model_forward_time": 0.1149294376373291,
      "step": 57404
    },
    {
      "epoch": 0.0003503662109375,
      "step": 57404,
      "training_step_time": 0.4332151412963867
    },
    {
      "epoch": 0.000350372314453125,
      "model_forward_time": 0.11632657051086426,
      "step": 57405
    },
    {
      "epoch": 0.000350372314453125,
      "step": 57405,
      "training_step_time": 0.47512030601501465
    },
    {
      "epoch": 0.00035037841796875,
      "model_forward_time": 0.11479353904724121,
      "step": 57406
    },
    {
      "epoch": 0.00035037841796875,
      "step": 57406,
      "training_step_time": 0.47359371185302734
    },
    {
      "epoch": 0.000350384521484375,
      "model_forward_time": 0.11470174789428711,
      "step": 57407
    },
    {
      "epoch": 0.000350384521484375,
      "step": 57407,
      "training_step_time": 0.41399526596069336
    },
    {
      "epoch": 0.000350390625,
      "model_forward_time": 0.11470627784729004,
      "step": 57408
    },
    {
      "epoch": 0.000350390625,
      "step": 57408,
      "training_step_time": 0.39333295822143555
    },
    {
      "epoch": 0.000350396728515625,
      "model_forward_time": 0.11499238014221191,
      "step": 57409
    },
    {
      "epoch": 0.000350396728515625,
      "step": 57409,
      "training_step_time": 0.4301769733428955
    },
    {
      "epoch": 0.00035040283203125,
      "grad_norm": 0.09947581589221954,
      "learning_rate": 5.085714327904056e-07,
      "loss": 0.0375,
      "step": 57410
    },
    {
      "epoch": 0.00035040283203125,
      "model_forward_time": 0.11469411849975586,
      "step": 57410
    },
    {
      "epoch": 0.00035040283203125,
      "step": 57410,
      "training_step_time": 0.39542460441589355
    },
    {
      "epoch": 0.000350408935546875,
      "model_forward_time": 0.11493563652038574,
      "step": 57411
    },
    {
      "epoch": 0.000350408935546875,
      "step": 57411,
      "training_step_time": 0.40521717071533203
    },
    {
      "epoch": 0.0003504150390625,
      "model_forward_time": 0.11480307579040527,
      "step": 57412
    },
    {
      "epoch": 0.0003504150390625,
      "step": 57412,
      "training_step_time": 0.3910553455352783
    },
    {
      "epoch": 0.000350421142578125,
      "model_forward_time": 0.11486649513244629,
      "step": 57413
    },
    {
      "epoch": 0.000350421142578125,
      "step": 57413,
      "training_step_time": 0.49164867401123047
    },
    {
      "epoch": 0.00035042724609375,
      "model_forward_time": 0.11538934707641602,
      "step": 57414
    },
    {
      "epoch": 0.00035042724609375,
      "step": 57414,
      "training_step_time": 0.4751322269439697
    },
    {
      "epoch": 0.000350433349609375,
      "model_forward_time": 0.11478543281555176,
      "step": 57415
    },
    {
      "epoch": 0.000350433349609375,
      "step": 57415,
      "training_step_time": 0.41161322593688965
    },
    {
      "epoch": 0.000350439453125,
      "model_forward_time": 0.11533975601196289,
      "step": 57416
    },
    {
      "epoch": 0.000350439453125,
      "step": 57416,
      "training_step_time": 0.4048347473144531
    },
    {
      "epoch": 0.000350445556640625,
      "model_forward_time": 0.11520266532897949,
      "step": 57417
    },
    {
      "epoch": 0.000350445556640625,
      "step": 57417,
      "training_step_time": 0.39485764503479004
    },
    {
      "epoch": 0.00035045166015625,
      "model_forward_time": 0.11458516120910645,
      "step": 57418
    },
    {
      "epoch": 0.00035045166015625,
      "step": 57418,
      "training_step_time": 0.4044911861419678
    },
    {
      "epoch": 0.000350457763671875,
      "model_forward_time": 0.11501646041870117,
      "step": 57419
    },
    {
      "epoch": 0.000350457763671875,
      "step": 57419,
      "training_step_time": 0.3994319438934326
    },
    {
      "epoch": 0.0003504638671875,
      "grad_norm": 0.09875110536813736,
      "learning_rate": 5.046584286615697e-07,
      "loss": 0.0355,
      "step": 57420
    },
    {
      "epoch": 0.0003504638671875,
      "model_forward_time": 0.11505603790283203,
      "step": 57420
    },
    {
      "epoch": 0.0003504638671875,
      "step": 57420,
      "training_step_time": 0.4275217056274414
    },
    {
      "epoch": 0.000350469970703125,
      "model_forward_time": 0.11469244956970215,
      "step": 57421
    },
    {
      "epoch": 0.000350469970703125,
      "step": 57421,
      "training_step_time": 0.4913508892059326
    },
    {
      "epoch": 0.00035047607421875,
      "model_forward_time": 0.11466312408447266,
      "step": 57422
    },
    {
      "epoch": 0.00035047607421875,
      "step": 57422,
      "training_step_time": 0.4647257328033447
    },
    {
      "epoch": 0.000350482177734375,
      "model_forward_time": 0.11537814140319824,
      "step": 57423
    },
    {
      "epoch": 0.000350482177734375,
      "step": 57423,
      "training_step_time": 0.41579461097717285
    },
    {
      "epoch": 0.00035048828125,
      "model_forward_time": 0.11490488052368164,
      "step": 57424
    },
    {
      "epoch": 0.00035048828125,
      "step": 57424,
      "training_step_time": 0.4050307273864746
    },
    {
      "epoch": 0.000350494384765625,
      "model_forward_time": 0.11449384689331055,
      "step": 57425
    },
    {
      "epoch": 0.000350494384765625,
      "step": 57425,
      "training_step_time": 0.38765859603881836
    },
    {
      "epoch": 0.00035050048828125,
      "model_forward_time": 0.11557912826538086,
      "step": 57426
    },
    {
      "epoch": 0.00035050048828125,
      "step": 57426,
      "training_step_time": 0.3850722312927246
    },
    {
      "epoch": 0.000350506591796875,
      "model_forward_time": 0.11457252502441406,
      "step": 57427
    },
    {
      "epoch": 0.000350506591796875,
      "step": 57427,
      "training_step_time": 0.41899871826171875
    },
    {
      "epoch": 0.0003505126953125,
      "model_forward_time": 0.11486434936523438,
      "step": 57428
    },
    {
      "epoch": 0.0003505126953125,
      "step": 57428,
      "training_step_time": 0.4291214942932129
    },
    {
      "epoch": 0.000350518798828125,
      "model_forward_time": 0.1175987720489502,
      "step": 57429
    },
    {
      "epoch": 0.000350518798828125,
      "step": 57429,
      "training_step_time": 0.5411267280578613
    },
    {
      "epoch": 0.00035052490234375,
      "grad_norm": 0.09751838445663452,
      "learning_rate": 5.007604599106486e-07,
      "loss": 0.0374,
      "step": 57430
    },
    {
      "epoch": 0.00035052490234375,
      "model_forward_time": 0.1151587963104248,
      "step": 57430
    },
    {
      "epoch": 0.00035052490234375,
      "step": 57430,
      "training_step_time": 0.3873481750488281
    },
    {
      "epoch": 0.000350531005859375,
      "model_forward_time": 0.11507034301757812,
      "step": 57431
    },
    {
      "epoch": 0.000350531005859375,
      "step": 57431,
      "training_step_time": 0.39113903045654297
    },
    {
      "epoch": 0.000350537109375,
      "model_forward_time": 0.11562466621398926,
      "step": 57432
    },
    {
      "epoch": 0.000350537109375,
      "step": 57432,
      "training_step_time": 0.3876028060913086
    },
    {
      "epoch": 0.000350543212890625,
      "model_forward_time": 0.11455917358398438,
      "step": 57433
    },
    {
      "epoch": 0.000350543212890625,
      "step": 57433,
      "training_step_time": 0.39777541160583496
    },
    {
      "epoch": 0.00035054931640625,
      "model_forward_time": 0.11522173881530762,
      "step": 57434
    },
    {
      "epoch": 0.00035054931640625,
      "step": 57434,
      "training_step_time": 0.4026522636413574
    },
    {
      "epoch": 0.000350555419921875,
      "model_forward_time": 0.11545062065124512,
      "step": 57435
    },
    {
      "epoch": 0.000350555419921875,
      "step": 57435,
      "training_step_time": 0.6432759761810303
    },
    {
      "epoch": 0.0003505615234375,
      "model_forward_time": 0.11493921279907227,
      "step": 57436
    },
    {
      "epoch": 0.0003505615234375,
      "step": 57436,
      "training_step_time": 0.4990386962890625
    },
    {
      "epoch": 0.000350567626953125,
      "model_forward_time": 0.11495661735534668,
      "step": 57437
    },
    {
      "epoch": 0.000350567626953125,
      "step": 57437,
      "training_step_time": 0.3852963447570801
    },
    {
      "epoch": 0.00035057373046875,
      "model_forward_time": 0.11540555953979492,
      "step": 57438
    },
    {
      "epoch": 0.00035057373046875,
      "step": 57438,
      "training_step_time": 0.3935081958770752
    },
    {
      "epoch": 0.000350579833984375,
      "model_forward_time": 0.11452555656433105,
      "step": 57439
    },
    {
      "epoch": 0.000350579833984375,
      "step": 57439,
      "training_step_time": 0.40692830085754395
    },
    {
      "epoch": 0.0003505859375,
      "grad_norm": 0.08581733703613281,
      "learning_rate": 4.968775277217563e-07,
      "loss": 0.0362,
      "step": 57440
    },
    {
      "epoch": 0.0003505859375,
      "model_forward_time": 0.1148383617401123,
      "step": 57440
    },
    {
      "epoch": 0.0003505859375,
      "step": 57440,
      "training_step_time": 0.3864285945892334
    },
    {
      "epoch": 0.000350592041015625,
      "model_forward_time": 0.11528635025024414,
      "step": 57441
    },
    {
      "epoch": 0.000350592041015625,
      "step": 57441,
      "training_step_time": 0.5139565467834473
    },
    {
      "epoch": 0.00035059814453125,
      "model_forward_time": 0.1151881217956543,
      "step": 57442
    },
    {
      "epoch": 0.00035059814453125,
      "step": 57442,
      "training_step_time": 0.4467501640319824
    },
    {
      "epoch": 0.000350604248046875,
      "model_forward_time": 0.11517047882080078,
      "step": 57443
    },
    {
      "epoch": 0.000350604248046875,
      "step": 57443,
      "training_step_time": 0.41068148612976074
    },
    {
      "epoch": 0.0003506103515625,
      "model_forward_time": 0.11492395401000977,
      "step": 57444
    },
    {
      "epoch": 0.0003506103515625,
      "step": 57444,
      "training_step_time": 0.39592742919921875
    },
    {
      "epoch": 0.000350616455078125,
      "model_forward_time": 0.11470937728881836,
      "step": 57445
    },
    {
      "epoch": 0.000350616455078125,
      "step": 57445,
      "training_step_time": 0.40174412727355957
    },
    {
      "epoch": 0.00035062255859375,
      "model_forward_time": 0.11479067802429199,
      "step": 57446
    },
    {
      "epoch": 0.00035062255859375,
      "step": 57446,
      "training_step_time": 0.3871185779571533
    },
    {
      "epoch": 0.000350628662109375,
      "model_forward_time": 0.11544632911682129,
      "step": 57447
    },
    {
      "epoch": 0.000350628662109375,
      "step": 57447,
      "training_step_time": 0.5947751998901367
    },
    {
      "epoch": 0.000350634765625,
      "model_forward_time": 0.11513090133666992,
      "step": 57448
    },
    {
      "epoch": 0.000350634765625,
      "step": 57448,
      "training_step_time": 0.4004065990447998
    },
    {
      "epoch": 0.000350640869140625,
      "model_forward_time": 0.11481356620788574,
      "step": 57449
    },
    {
      "epoch": 0.000350640869140625,
      "step": 57449,
      "training_step_time": 0.41528797149658203
    },
    {
      "epoch": 0.00035064697265625,
      "grad_norm": 0.08204318583011627,
      "learning_rate": 4.930096332744105e-07,
      "loss": 0.0365,
      "step": 57450
    },
    {
      "epoch": 0.00035064697265625,
      "model_forward_time": 0.115264892578125,
      "step": 57450
    },
    {
      "epoch": 0.00035064697265625,
      "step": 57450,
      "training_step_time": 0.4660959243774414
    },
    {
      "epoch": 0.000350653076171875,
      "model_forward_time": 0.11527562141418457,
      "step": 57451
    },
    {
      "epoch": 0.000350653076171875,
      "step": 57451,
      "training_step_time": 0.45067715644836426
    },
    {
      "epoch": 0.0003506591796875,
      "model_forward_time": 0.1151728630065918,
      "step": 57452
    },
    {
      "epoch": 0.0003506591796875,
      "step": 57452,
      "training_step_time": 0.39276123046875
    },
    {
      "epoch": 0.000350665283203125,
      "model_forward_time": 0.11518359184265137,
      "step": 57453
    },
    {
      "epoch": 0.000350665283203125,
      "step": 57453,
      "training_step_time": 0.5200417041778564
    },
    {
      "epoch": 0.00035067138671875,
      "model_forward_time": 0.11561226844787598,
      "step": 57454
    },
    {
      "epoch": 0.00035067138671875,
      "step": 57454,
      "training_step_time": 0.3829531669616699
    },
    {
      "epoch": 0.000350677490234375,
      "model_forward_time": 0.11439824104309082,
      "step": 57455
    },
    {
      "epoch": 0.000350677490234375,
      "step": 57455,
      "training_step_time": 0.44347214698791504
    },
    {
      "epoch": 0.00035068359375,
      "model_forward_time": 0.11490821838378906,
      "step": 57456
    },
    {
      "epoch": 0.00035068359375,
      "step": 57456,
      "training_step_time": 0.4403653144836426
    },
    {
      "epoch": 0.000350689697265625,
      "model_forward_time": 0.11471366882324219,
      "step": 57457
    },
    {
      "epoch": 0.000350689697265625,
      "step": 57457,
      "training_step_time": 0.442965030670166
    },
    {
      "epoch": 0.00035069580078125,
      "model_forward_time": 0.11477112770080566,
      "step": 57458
    },
    {
      "epoch": 0.00035069580078125,
      "step": 57458,
      "training_step_time": 0.3943912982940674
    },
    {
      "epoch": 0.000350701904296875,
      "model_forward_time": 0.11532044410705566,
      "step": 57459
    },
    {
      "epoch": 0.000350701904296875,
      "step": 57459,
      "training_step_time": 0.4811420440673828
    },
    {
      "epoch": 0.0003507080078125,
      "grad_norm": 0.06952648609876633,
      "learning_rate": 4.891567777435879e-07,
      "loss": 0.0336,
      "step": 57460
    },
    {
      "epoch": 0.0003507080078125,
      "model_forward_time": 0.11427545547485352,
      "step": 57460
    },
    {
      "epoch": 0.0003507080078125,
      "step": 57460,
      "training_step_time": 0.3882908821105957
    },
    {
      "epoch": 0.000350714111328125,
      "model_forward_time": 0.11583352088928223,
      "step": 57461
    },
    {
      "epoch": 0.000350714111328125,
      "step": 57461,
      "training_step_time": 0.3986856937408447
    },
    {
      "epoch": 0.00035072021484375,
      "model_forward_time": 0.11503767967224121,
      "step": 57462
    },
    {
      "epoch": 0.00035072021484375,
      "step": 57462,
      "training_step_time": 0.4810793399810791
    },
    {
      "epoch": 0.000350726318359375,
      "model_forward_time": 0.11493754386901855,
      "step": 57463
    },
    {
      "epoch": 0.000350726318359375,
      "step": 57463,
      "training_step_time": 0.47937941551208496
    },
    {
      "epoch": 0.000350732421875,
      "model_forward_time": 0.11463713645935059,
      "step": 57464
    },
    {
      "epoch": 0.000350732421875,
      "step": 57464,
      "training_step_time": 0.3948018550872803
    },
    {
      "epoch": 0.000350738525390625,
      "model_forward_time": 0.11556315422058105,
      "step": 57465
    },
    {
      "epoch": 0.000350738525390625,
      "step": 57465,
      "training_step_time": 0.4137904644012451
    },
    {
      "epoch": 0.00035074462890625,
      "model_forward_time": 0.11464285850524902,
      "step": 57466
    },
    {
      "epoch": 0.00035074462890625,
      "step": 57466,
      "training_step_time": 0.4024348258972168
    },
    {
      "epoch": 0.000350750732421875,
      "model_forward_time": 0.11521148681640625,
      "step": 57467
    },
    {
      "epoch": 0.000350750732421875,
      "step": 57467,
      "training_step_time": 0.39452075958251953
    },
    {
      "epoch": 0.0003507568359375,
      "model_forward_time": 0.11540937423706055,
      "step": 57468
    },
    {
      "epoch": 0.0003507568359375,
      "step": 57468,
      "training_step_time": 0.41712522506713867
    },
    {
      "epoch": 0.000350762939453125,
      "model_forward_time": 0.11563420295715332,
      "step": 57469
    },
    {
      "epoch": 0.000350762939453125,
      "step": 57469,
      "training_step_time": 0.43590879440307617
    },
    {
      "epoch": 0.00035076904296875,
      "grad_norm": 0.09793736785650253,
      "learning_rate": 4.853189622996745e-07,
      "loss": 0.0391,
      "step": 57470
    },
    {
      "epoch": 0.00035076904296875,
      "model_forward_time": 0.11538076400756836,
      "step": 57470
    },
    {
      "epoch": 0.00035076904296875,
      "step": 57470,
      "training_step_time": 0.39418888092041016
    },
    {
      "epoch": 0.000350775146484375,
      "model_forward_time": 0.11566781997680664,
      "step": 57471
    },
    {
      "epoch": 0.000350775146484375,
      "step": 57471,
      "training_step_time": 0.6045806407928467
    },
    {
      "epoch": 0.00035078125,
      "model_forward_time": 0.11572432518005371,
      "step": 57472
    },
    {
      "epoch": 0.00035078125,
      "step": 57472,
      "training_step_time": 0.3913230895996094
    },
    {
      "epoch": 0.000350787353515625,
      "model_forward_time": 0.11518144607543945,
      "step": 57473
    },
    {
      "epoch": 0.000350787353515625,
      "step": 57473,
      "training_step_time": 0.3946106433868408
    },
    {
      "epoch": 0.00035079345703125,
      "model_forward_time": 0.11560559272766113,
      "step": 57474
    },
    {
      "epoch": 0.00035079345703125,
      "step": 57474,
      "training_step_time": 0.39745092391967773
    },
    {
      "epoch": 0.000350799560546875,
      "model_forward_time": 0.11474108695983887,
      "step": 57475
    },
    {
      "epoch": 0.000350799560546875,
      "step": 57475,
      "training_step_time": 0.39218616485595703
    },
    {
      "epoch": 0.0003508056640625,
      "model_forward_time": 0.11513304710388184,
      "step": 57476
    },
    {
      "epoch": 0.0003508056640625,
      "step": 57476,
      "training_step_time": 0.4452219009399414
    },
    {
      "epoch": 0.000350811767578125,
      "model_forward_time": 0.1150660514831543,
      "step": 57477
    },
    {
      "epoch": 0.000350811767578125,
      "step": 57477,
      "training_step_time": 0.459061861038208
    },
    {
      "epoch": 0.00035081787109375,
      "model_forward_time": 0.11524796485900879,
      "step": 57478
    },
    {
      "epoch": 0.00035081787109375,
      "step": 57478,
      "training_step_time": 0.4043598175048828
    },
    {
      "epoch": 0.000350823974609375,
      "model_forward_time": 0.11475014686584473,
      "step": 57479
    },
    {
      "epoch": 0.000350823974609375,
      "step": 57479,
      "training_step_time": 0.49506688117980957
    },
    {
      "epoch": 0.000350830078125,
      "grad_norm": 0.09051065891981125,
      "learning_rate": 4.814961881085045e-07,
      "loss": 0.032,
      "step": 57480
    },
    {
      "epoch": 0.000350830078125,
      "model_forward_time": 0.1138606071472168,
      "step": 57480
    },
    {
      "epoch": 0.000350830078125,
      "step": 57480,
      "training_step_time": 0.3932931423187256
    },
    {
      "epoch": 0.000350836181640625,
      "model_forward_time": 0.11409831047058105,
      "step": 57481
    },
    {
      "epoch": 0.000350836181640625,
      "step": 57481,
      "training_step_time": 0.3909428119659424
    },
    {
      "epoch": 0.00035084228515625,
      "model_forward_time": 0.11486935615539551,
      "step": 57482
    },
    {
      "epoch": 0.00035084228515625,
      "step": 57482,
      "training_step_time": 0.38901782035827637
    },
    {
      "epoch": 0.000350848388671875,
      "model_forward_time": 0.11557888984680176,
      "step": 57483
    },
    {
      "epoch": 0.000350848388671875,
      "step": 57483,
      "training_step_time": 0.46503424644470215
    },
    {
      "epoch": 0.0003508544921875,
      "model_forward_time": 0.11475515365600586,
      "step": 57484
    },
    {
      "epoch": 0.0003508544921875,
      "step": 57484,
      "training_step_time": 0.43324923515319824
    },
    {
      "epoch": 0.000350860595703125,
      "model_forward_time": 0.11485123634338379,
      "step": 57485
    },
    {
      "epoch": 0.000350860595703125,
      "step": 57485,
      "training_step_time": 0.4940152168273926
    },
    {
      "epoch": 0.00035086669921875,
      "model_forward_time": 0.1145927906036377,
      "step": 57486
    },
    {
      "epoch": 0.00035086669921875,
      "step": 57486,
      "training_step_time": 0.3955197334289551
    },
    {
      "epoch": 0.000350872802734375,
      "model_forward_time": 0.11465930938720703,
      "step": 57487
    },
    {
      "epoch": 0.000350872802734375,
      "step": 57487,
      "training_step_time": 0.3922538757324219
    },
    {
      "epoch": 0.00035087890625,
      "model_forward_time": 0.11443972587585449,
      "step": 57488
    },
    {
      "epoch": 0.00035087890625,
      "step": 57488,
      "training_step_time": 0.38672590255737305
    },
    {
      "epoch": 0.000350885009765625,
      "model_forward_time": 0.11514067649841309,
      "step": 57489
    },
    {
      "epoch": 0.000350885009765625,
      "step": 57489,
      "training_step_time": 0.4356226921081543
    },
    {
      "epoch": 0.00035089111328125,
      "grad_norm": 0.09840279072523117,
      "learning_rate": 4.776884563313266e-07,
      "loss": 0.04,
      "step": 57490
    },
    {
      "epoch": 0.00035089111328125,
      "model_forward_time": 0.11501598358154297,
      "step": 57490
    },
    {
      "epoch": 0.00035089111328125,
      "step": 57490,
      "training_step_time": 0.38878321647644043
    },
    {
      "epoch": 0.000350897216796875,
      "model_forward_time": 0.1153097152709961,
      "step": 57491
    },
    {
      "epoch": 0.000350897216796875,
      "step": 57491,
      "training_step_time": 0.3919551372528076
    },
    {
      "epoch": 0.0003509033203125,
      "model_forward_time": 0.11541962623596191,
      "step": 57492
    },
    {
      "epoch": 0.0003509033203125,
      "step": 57492,
      "training_step_time": 0.42256903648376465
    },
    {
      "epoch": 0.000350909423828125,
      "model_forward_time": 0.11455631256103516,
      "step": 57493
    },
    {
      "epoch": 0.000350909423828125,
      "step": 57493,
      "training_step_time": 0.4104440212249756
    },
    {
      "epoch": 0.00035091552734375,
      "model_forward_time": 0.11509871482849121,
      "step": 57494
    },
    {
      "epoch": 0.00035091552734375,
      "step": 57494,
      "training_step_time": 0.48108673095703125
    },
    {
      "epoch": 0.000350921630859375,
      "model_forward_time": 0.11564135551452637,
      "step": 57495
    },
    {
      "epoch": 0.000350921630859375,
      "step": 57495,
      "training_step_time": 0.5332789421081543
    },
    {
      "epoch": 0.000350927734375,
      "model_forward_time": 0.11493349075317383,
      "step": 57496
    },
    {
      "epoch": 0.000350927734375,
      "step": 57496,
      "training_step_time": 0.387148380279541
    },
    {
      "epoch": 0.000350933837890625,
      "model_forward_time": 0.11564445495605469,
      "step": 57497
    },
    {
      "epoch": 0.000350933837890625,
      "step": 57497,
      "training_step_time": 0.38834118843078613
    },
    {
      "epoch": 0.00035093994140625,
      "model_forward_time": 0.11516976356506348,
      "step": 57498
    },
    {
      "epoch": 0.00035093994140625,
      "step": 57498,
      "training_step_time": 0.4635658264160156
    },
    {
      "epoch": 0.000350946044921875,
      "model_forward_time": 0.11441802978515625,
      "step": 57499
    },
    {
      "epoch": 0.000350946044921875,
      "step": 57499,
      "training_step_time": 0.44327449798583984
    },
    {
      "epoch": 0.0003509521484375,
      "grad_norm": 0.11026041209697723,
      "learning_rate": 4.738957681248379e-07,
      "loss": 0.0399,
      "step": 57500
    },
    {
      "epoch": 0.0003509521484375,
      "model_forward_time": 0.11461615562438965,
      "step": 57500
    },
    {
      "epoch": 0.0003509521484375,
      "step": 57500,
      "training_step_time": 0.3958144187927246
    },
    {
      "epoch": 0.000350958251953125,
      "model_forward_time": 0.1153726577758789,
      "step": 57501
    },
    {
      "epoch": 0.000350958251953125,
      "step": 57501,
      "training_step_time": 0.5301699638366699
    },
    {
      "epoch": 0.00035096435546875,
      "model_forward_time": 0.11525082588195801,
      "step": 57502
    },
    {
      "epoch": 0.00035096435546875,
      "step": 57502,
      "training_step_time": 0.40006232261657715
    },
    {
      "epoch": 0.000350970458984375,
      "model_forward_time": 0.11471128463745117,
      "step": 57503
    },
    {
      "epoch": 0.000350970458984375,
      "step": 57503,
      "training_step_time": 0.39475202560424805
    },
    {
      "epoch": 0.0003509765625,
      "model_forward_time": 0.11520504951477051,
      "step": 57504
    },
    {
      "epoch": 0.0003509765625,
      "step": 57504,
      "training_step_time": 0.4025108814239502
    },
    {
      "epoch": 0.000350982666015625,
      "model_forward_time": 0.11468625068664551,
      "step": 57505
    },
    {
      "epoch": 0.000350982666015625,
      "step": 57505,
      "training_step_time": 0.3993196487426758
    },
    {
      "epoch": 0.00035098876953125,
      "model_forward_time": 0.11506533622741699,
      "step": 57506
    },
    {
      "epoch": 0.00035098876953125,
      "step": 57506,
      "training_step_time": 0.46674394607543945
    },
    {
      "epoch": 0.000350994873046875,
      "model_forward_time": 0.11540484428405762,
      "step": 57507
    },
    {
      "epoch": 0.000350994873046875,
      "step": 57507,
      "training_step_time": 0.5898091793060303
    },
    {
      "epoch": 0.0003510009765625,
      "model_forward_time": 0.11412882804870605,
      "step": 57508
    },
    {
      "epoch": 0.0003510009765625,
      "step": 57508,
      "training_step_time": 0.47144222259521484
    },
    {
      "epoch": 0.000351007080078125,
      "model_forward_time": 0.1153559684753418,
      "step": 57509
    },
    {
      "epoch": 0.000351007080078125,
      "step": 57509,
      "training_step_time": 0.389052152633667
    },
    {
      "epoch": 0.00035101318359375,
      "grad_norm": 0.08603258430957794,
      "learning_rate": 4.701181246411501e-07,
      "loss": 0.0423,
      "step": 57510
    },
    {
      "epoch": 0.00035101318359375,
      "model_forward_time": 0.11502480506896973,
      "step": 57510
    },
    {
      "epoch": 0.00035101318359375,
      "step": 57510,
      "training_step_time": 0.39622068405151367
    },
    {
      "epoch": 0.000351019287109375,
      "model_forward_time": 0.11485147476196289,
      "step": 57511
    },
    {
      "epoch": 0.000351019287109375,
      "step": 57511,
      "training_step_time": 0.49288320541381836
    },
    {
      "epoch": 0.000351025390625,
      "model_forward_time": 0.11474442481994629,
      "step": 57512
    },
    {
      "epoch": 0.000351025390625,
      "step": 57512,
      "training_step_time": 0.4342927932739258
    },
    {
      "epoch": 0.000351031494140625,
      "model_forward_time": 0.11456799507141113,
      "step": 57513
    },
    {
      "epoch": 0.000351031494140625,
      "step": 57513,
      "training_step_time": 0.40096259117126465
    },
    {
      "epoch": 0.00035103759765625,
      "model_forward_time": 0.11495471000671387,
      "step": 57514
    },
    {
      "epoch": 0.00035103759765625,
      "step": 57514,
      "training_step_time": 0.3884458541870117
    },
    {
      "epoch": 0.000351043701171875,
      "model_forward_time": 0.1150979995727539,
      "step": 57515
    },
    {
      "epoch": 0.000351043701171875,
      "step": 57515,
      "training_step_time": 0.39862966537475586
    },
    {
      "epoch": 0.0003510498046875,
      "model_forward_time": 0.11511921882629395,
      "step": 57516
    },
    {
      "epoch": 0.0003510498046875,
      "step": 57516,
      "training_step_time": 0.40213942527770996
    },
    {
      "epoch": 0.000351055908203125,
      "model_forward_time": 0.11522173881530762,
      "step": 57517
    },
    {
      "epoch": 0.000351055908203125,
      "step": 57517,
      "training_step_time": 0.39766788482666016
    },
    {
      "epoch": 0.00035106201171875,
      "model_forward_time": 0.11484265327453613,
      "step": 57518
    },
    {
      "epoch": 0.00035106201171875,
      "step": 57518,
      "training_step_time": 0.3981137275695801
    },
    {
      "epoch": 0.000351068115234375,
      "model_forward_time": 0.11548328399658203,
      "step": 57519
    },
    {
      "epoch": 0.000351068115234375,
      "step": 57519,
      "training_step_time": 0.58939528465271
    },
    {
      "epoch": 0.00035107421875,
      "grad_norm": 0.09188023954629898,
      "learning_rate": 4.6635552702782305e-07,
      "loss": 0.0377,
      "step": 57520
    },
    {
      "epoch": 0.00035107421875,
      "model_forward_time": 0.11468172073364258,
      "step": 57520
    },
    {
      "epoch": 0.00035107421875,
      "step": 57520,
      "training_step_time": 0.41026782989501953
    },
    {
      "epoch": 0.000351080322265625,
      "model_forward_time": 0.1150963306427002,
      "step": 57521
    },
    {
      "epoch": 0.000351080322265625,
      "step": 57521,
      "training_step_time": 0.4002089500427246
    },
    {
      "epoch": 0.00035108642578125,
      "model_forward_time": 0.11552810668945312,
      "step": 57522
    },
    {
      "epoch": 0.00035108642578125,
      "step": 57522,
      "training_step_time": 0.5089178085327148
    },
    {
      "epoch": 0.000351092529296875,
      "model_forward_time": 0.11531877517700195,
      "step": 57523
    },
    {
      "epoch": 0.000351092529296875,
      "step": 57523,
      "training_step_time": 0.40738749504089355
    },
    {
      "epoch": 0.0003510986328125,
      "model_forward_time": 0.11489653587341309,
      "step": 57524
    },
    {
      "epoch": 0.0003510986328125,
      "step": 57524,
      "training_step_time": 0.3862948417663574
    },
    {
      "epoch": 0.000351104736328125,
      "model_forward_time": 0.11521005630493164,
      "step": 57525
    },
    {
      "epoch": 0.000351104736328125,
      "step": 57525,
      "training_step_time": 0.42711615562438965
    },
    {
      "epoch": 0.00035111083984375,
      "model_forward_time": 0.11462211608886719,
      "step": 57526
    },
    {
      "epoch": 0.00035111083984375,
      "step": 57526,
      "training_step_time": 0.42110729217529297
    },
    {
      "epoch": 0.000351116943359375,
      "model_forward_time": 0.11495447158813477,
      "step": 57527
    },
    {
      "epoch": 0.000351116943359375,
      "step": 57527,
      "training_step_time": 0.4793369770050049
    },
    {
      "epoch": 0.000351123046875,
      "model_forward_time": 0.11481475830078125,
      "step": 57528
    },
    {
      "epoch": 0.000351123046875,
      "step": 57528,
      "training_step_time": 0.39460110664367676
    },
    {
      "epoch": 0.000351129150390625,
      "model_forward_time": 0.11506056785583496,
      "step": 57529
    },
    {
      "epoch": 0.000351129150390625,
      "step": 57529,
      "training_step_time": 0.40311503410339355
    },
    {
      "epoch": 0.00035113525390625,
      "grad_norm": 0.08076190948486328,
      "learning_rate": 4.6260797642782014e-07,
      "loss": 0.0356,
      "step": 57530
    },
    {
      "epoch": 0.00035113525390625,
      "model_forward_time": 0.11498165130615234,
      "step": 57530
    },
    {
      "epoch": 0.00035113525390625,
      "step": 57530,
      "training_step_time": 0.415539026260376
    },
    {
      "epoch": 0.000351141357421875,
      "model_forward_time": 0.11439847946166992,
      "step": 57531
    },
    {
      "epoch": 0.000351141357421875,
      "step": 57531,
      "training_step_time": 0.46208810806274414
    },
    {
      "epoch": 0.0003511474609375,
      "model_forward_time": 0.11511421203613281,
      "step": 57532
    },
    {
      "epoch": 0.0003511474609375,
      "step": 57532,
      "training_step_time": 0.40402746200561523
    },
    {
      "epoch": 0.000351153564453125,
      "model_forward_time": 0.11487436294555664,
      "step": 57533
    },
    {
      "epoch": 0.000351153564453125,
      "step": 57533,
      "training_step_time": 0.39006638526916504
    },
    {
      "epoch": 0.00035115966796875,
      "model_forward_time": 0.11535763740539551,
      "step": 57534
    },
    {
      "epoch": 0.00035115966796875,
      "step": 57534,
      "training_step_time": 0.39640092849731445
    },
    {
      "epoch": 0.000351165771484375,
      "model_forward_time": 0.11555719375610352,
      "step": 57535
    },
    {
      "epoch": 0.000351165771484375,
      "step": 57535,
      "training_step_time": 0.4756166934967041
    },
    {
      "epoch": 0.000351171875,
      "model_forward_time": 0.11534547805786133,
      "step": 57536
    },
    {
      "epoch": 0.000351171875,
      "step": 57536,
      "training_step_time": 0.4486513137817383
    },
    {
      "epoch": 0.000351177978515625,
      "model_forward_time": 0.11481356620788574,
      "step": 57537
    },
    {
      "epoch": 0.000351177978515625,
      "step": 57537,
      "training_step_time": 0.5369434356689453
    },
    {
      "epoch": 0.00035118408203125,
      "model_forward_time": 0.1155548095703125,
      "step": 57538
    },
    {
      "epoch": 0.00035118408203125,
      "step": 57538,
      "training_step_time": 0.4010913372039795
    },
    {
      "epoch": 0.000351190185546875,
      "model_forward_time": 0.11453866958618164,
      "step": 57539
    },
    {
      "epoch": 0.000351190185546875,
      "step": 57539,
      "training_step_time": 0.5019750595092773
    },
    {
      "epoch": 0.0003511962890625,
      "grad_norm": 0.11212585866451263,
      "learning_rate": 4.5887547397955864e-07,
      "loss": 0.0374,
      "step": 57540
    },
    {
      "epoch": 0.0003511962890625,
      "model_forward_time": 0.11420631408691406,
      "step": 57540
    },
    {
      "epoch": 0.0003511962890625,
      "step": 57540,
      "training_step_time": 0.4157536029815674
    },
    {
      "epoch": 0.000351202392578125,
      "model_forward_time": 0.11520957946777344,
      "step": 57541
    },
    {
      "epoch": 0.000351202392578125,
      "step": 57541,
      "training_step_time": 0.4970834255218506
    },
    {
      "epoch": 0.00035120849609375,
      "model_forward_time": 0.11415839195251465,
      "step": 57542
    },
    {
      "epoch": 0.00035120849609375,
      "step": 57542,
      "training_step_time": 0.39563918113708496
    },
    {
      "epoch": 0.000351214599609375,
      "model_forward_time": 0.11462759971618652,
      "step": 57543
    },
    {
      "epoch": 0.000351214599609375,
      "step": 57543,
      "training_step_time": 0.40503978729248047
    },
    {
      "epoch": 0.000351220703125,
      "model_forward_time": 0.11427903175354004,
      "step": 57544
    },
    {
      "epoch": 0.000351220703125,
      "step": 57544,
      "training_step_time": 0.4048483371734619
    },
    {
      "epoch": 0.000351226806640625,
      "model_forward_time": 0.1146397590637207,
      "step": 57545
    },
    {
      "epoch": 0.000351226806640625,
      "step": 57545,
      "training_step_time": 0.3970942497253418
    },
    {
      "epoch": 0.00035123291015625,
      "model_forward_time": 0.11525130271911621,
      "step": 57546
    },
    {
      "epoch": 0.00035123291015625,
      "step": 57546,
      "training_step_time": 0.3976171016693115
    },
    {
      "epoch": 0.000351239013671875,
      "model_forward_time": 0.11489176750183105,
      "step": 57547
    },
    {
      "epoch": 0.000351239013671875,
      "step": 57547,
      "training_step_time": 0.40236926078796387
    },
    {
      "epoch": 0.0003512451171875,
      "model_forward_time": 0.11491584777832031,
      "step": 57548
    },
    {
      "epoch": 0.0003512451171875,
      "step": 57548,
      "training_step_time": 0.3973121643066406
    },
    {
      "epoch": 0.000351251220703125,
      "model_forward_time": 0.11526322364807129,
      "step": 57549
    },
    {
      "epoch": 0.000351251220703125,
      "step": 57549,
      "training_step_time": 0.47403740882873535
    },
    {
      "epoch": 0.00035125732421875,
      "grad_norm": 0.1293140947818756,
      "learning_rate": 4.5515802081687045e-07,
      "loss": 0.0359,
      "step": 57550
    },
    {
      "epoch": 0.00035125732421875,
      "model_forward_time": 0.11566829681396484,
      "step": 57550
    },
    {
      "epoch": 0.00035125732421875,
      "step": 57550,
      "training_step_time": 0.4221179485321045
    },
    {
      "epoch": 0.000351263427734375,
      "model_forward_time": 0.11495018005371094,
      "step": 57551
    },
    {
      "epoch": 0.000351263427734375,
      "step": 57551,
      "training_step_time": 0.46765708923339844
    },
    {
      "epoch": 0.00035126953125,
      "model_forward_time": 0.11514687538146973,
      "step": 57552
    },
    {
      "epoch": 0.00035126953125,
      "step": 57552,
      "training_step_time": 0.39681529998779297
    },
    {
      "epoch": 0.000351275634765625,
      "model_forward_time": 0.11499238014221191,
      "step": 57553
    },
    {
      "epoch": 0.000351275634765625,
      "step": 57553,
      "training_step_time": 0.43048954010009766
    },
    {
      "epoch": 0.00035128173828125,
      "model_forward_time": 0.11448192596435547,
      "step": 57554
    },
    {
      "epoch": 0.00035128173828125,
      "step": 57554,
      "training_step_time": 0.39199185371398926
    },
    {
      "epoch": 0.000351287841796875,
      "model_forward_time": 0.1147012710571289,
      "step": 57555
    },
    {
      "epoch": 0.000351287841796875,
      "step": 57555,
      "training_step_time": 0.49091291427612305
    },
    {
      "epoch": 0.0003512939453125,
      "model_forward_time": 0.11594152450561523,
      "step": 57556
    },
    {
      "epoch": 0.0003512939453125,
      "step": 57556,
      "training_step_time": 0.371368408203125
    },
    {
      "epoch": 0.000351300048828125,
      "model_forward_time": 0.11475539207458496,
      "step": 57557
    },
    {
      "epoch": 0.000351300048828125,
      "step": 57557,
      "training_step_time": 0.40900421142578125
    },
    {
      "epoch": 0.00035130615234375,
      "model_forward_time": 0.11583948135375977,
      "step": 57558
    },
    {
      "epoch": 0.00035130615234375,
      "step": 57558,
      "training_step_time": 0.39962029457092285
    },
    {
      "epoch": 0.000351312255859375,
      "model_forward_time": 0.11498594284057617,
      "step": 57559
    },
    {
      "epoch": 0.000351312255859375,
      "step": 57559,
      "training_step_time": 0.3953056335449219
    },
    {
      "epoch": 0.000351318359375,
      "grad_norm": 0.09358225017786026,
      "learning_rate": 4.514556180690188e-07,
      "loss": 0.0402,
      "step": 57560
    },
    {
      "epoch": 0.000351318359375,
      "model_forward_time": 0.11506414413452148,
      "step": 57560
    },
    {
      "epoch": 0.000351318359375,
      "step": 57560,
      "training_step_time": 0.3863942623138428
    },
    {
      "epoch": 0.000351324462890625,
      "model_forward_time": 0.11509466171264648,
      "step": 57561
    },
    {
      "epoch": 0.000351324462890625,
      "step": 57561,
      "training_step_time": 0.6157553195953369
    },
    {
      "epoch": 0.00035133056640625,
      "model_forward_time": 0.1152653694152832,
      "step": 57562
    },
    {
      "epoch": 0.00035133056640625,
      "step": 57562,
      "training_step_time": 0.3848145008087158
    },
    {
      "epoch": 0.000351336669921875,
      "model_forward_time": 0.1143655776977539,
      "step": 57563
    },
    {
      "epoch": 0.000351336669921875,
      "step": 57563,
      "training_step_time": 0.36267995834350586
    },
    {
      "epoch": 0.0003513427734375,
      "model_forward_time": 0.11479592323303223,
      "step": 57564
    },
    {
      "epoch": 0.0003513427734375,
      "step": 57564,
      "training_step_time": 0.42895054817199707
    },
    {
      "epoch": 0.000351348876953125,
      "model_forward_time": 0.11436152458190918,
      "step": 57565
    },
    {
      "epoch": 0.000351348876953125,
      "step": 57565,
      "training_step_time": 0.4343388080596924
    },
    {
      "epoch": 0.00035135498046875,
      "model_forward_time": 0.11483979225158691,
      "step": 57566
    },
    {
      "epoch": 0.00035135498046875,
      "step": 57566,
      "training_step_time": 0.39545559883117676
    },
    {
      "epoch": 0.000351361083984375,
      "model_forward_time": 0.11488604545593262,
      "step": 57567
    },
    {
      "epoch": 0.000351361083984375,
      "step": 57567,
      "training_step_time": 0.49120068550109863
    },
    {
      "epoch": 0.0003513671875,
      "model_forward_time": 0.11511468887329102,
      "step": 57568
    },
    {
      "epoch": 0.0003513671875,
      "step": 57568,
      "training_step_time": 0.4192233085632324
    },
    {
      "epoch": 0.000351373291015625,
      "model_forward_time": 0.11506175994873047,
      "step": 57569
    },
    {
      "epoch": 0.000351373291015625,
      "step": 57569,
      "training_step_time": 0.47421956062316895
    },
    {
      "epoch": 0.00035137939453125,
      "grad_norm": 0.11526063829660416,
      "learning_rate": 4.4776826686069305e-07,
      "loss": 0.0397,
      "step": 57570
    },
    {
      "epoch": 0.00035137939453125,
      "model_forward_time": 0.11476349830627441,
      "step": 57570
    },
    {
      "epoch": 0.00035137939453125,
      "step": 57570,
      "training_step_time": 0.39548707008361816
    },
    {
      "epoch": 0.000351385498046875,
      "model_forward_time": 0.11477947235107422,
      "step": 57571
    },
    {
      "epoch": 0.000351385498046875,
      "step": 57571,
      "training_step_time": 0.3915538787841797
    },
    {
      "epoch": 0.0003513916015625,
      "model_forward_time": 0.11516904830932617,
      "step": 57572
    },
    {
      "epoch": 0.0003513916015625,
      "step": 57572,
      "training_step_time": 0.3862943649291992
    },
    {
      "epoch": 0.000351397705078125,
      "model_forward_time": 0.11501765251159668,
      "step": 57573
    },
    {
      "epoch": 0.000351397705078125,
      "step": 57573,
      "training_step_time": 0.5538887977600098
    },
    {
      "epoch": 0.00035140380859375,
      "model_forward_time": 0.11512517929077148,
      "step": 57574
    },
    {
      "epoch": 0.00035140380859375,
      "step": 57574,
      "training_step_time": 0.3847179412841797
    },
    {
      "epoch": 0.000351409912109375,
      "model_forward_time": 0.11444997787475586,
      "step": 57575
    },
    {
      "epoch": 0.000351409912109375,
      "step": 57575,
      "training_step_time": 0.3892812728881836
    },
    {
      "epoch": 0.000351416015625,
      "model_forward_time": 0.11507463455200195,
      "step": 57576
    },
    {
      "epoch": 0.000351416015625,
      "step": 57576,
      "training_step_time": 0.39702510833740234
    },
    {
      "epoch": 0.000351422119140625,
      "model_forward_time": 0.11484241485595703,
      "step": 57577
    },
    {
      "epoch": 0.000351422119140625,
      "step": 57577,
      "training_step_time": 0.4005889892578125
    },
    {
      "epoch": 0.00035142822265625,
      "model_forward_time": 0.11502337455749512,
      "step": 57578
    },
    {
      "epoch": 0.00035142822265625,
      "step": 57578,
      "training_step_time": 0.40562939643859863
    },
    {
      "epoch": 0.000351434326171875,
      "model_forward_time": 0.11468172073364258,
      "step": 57579
    },
    {
      "epoch": 0.000351434326171875,
      "step": 57579,
      "training_step_time": 0.6284589767456055
    },
    {
      "epoch": 0.0003514404296875,
      "grad_norm": 0.085938960313797,
      "learning_rate": 4.440959683120194e-07,
      "loss": 0.0333,
      "step": 57580
    },
    {
      "epoch": 0.0003514404296875,
      "model_forward_time": 0.1150660514831543,
      "step": 57580
    },
    {
      "epoch": 0.0003514404296875,
      "step": 57580,
      "training_step_time": 0.42761659622192383
    },
    {
      "epoch": 0.000351446533203125,
      "model_forward_time": 0.11450672149658203,
      "step": 57581
    },
    {
      "epoch": 0.000351446533203125,
      "step": 57581,
      "training_step_time": 0.3989875316619873
    },
    {
      "epoch": 0.00035145263671875,
      "model_forward_time": 0.11381149291992188,
      "step": 57582
    },
    {
      "epoch": 0.00035145263671875,
      "step": 57582,
      "training_step_time": 0.40653514862060547
    },
    {
      "epoch": 0.000351458740234375,
      "model_forward_time": 0.11511993408203125,
      "step": 57583
    },
    {
      "epoch": 0.000351458740234375,
      "step": 57583,
      "training_step_time": 0.49133944511413574
    },
    {
      "epoch": 0.00035146484375,
      "model_forward_time": 0.11463093757629395,
      "step": 57584
    },
    {
      "epoch": 0.00035146484375,
      "step": 57584,
      "training_step_time": 0.3860054016113281
    },
    {
      "epoch": 0.000351470947265625,
      "model_forward_time": 0.11552596092224121,
      "step": 57585
    },
    {
      "epoch": 0.000351470947265625,
      "step": 57585,
      "training_step_time": 0.39824867248535156
    },
    {
      "epoch": 0.00035147705078125,
      "model_forward_time": 0.11535334587097168,
      "step": 57586
    },
    {
      "epoch": 0.00035147705078125,
      "step": 57586,
      "training_step_time": 0.3817582130432129
    },
    {
      "epoch": 0.000351483154296875,
      "model_forward_time": 0.11535048484802246,
      "step": 57587
    },
    {
      "epoch": 0.000351483154296875,
      "step": 57587,
      "training_step_time": 0.3965461254119873
    },
    {
      "epoch": 0.0003514892578125,
      "model_forward_time": 0.11485958099365234,
      "step": 57588
    },
    {
      "epoch": 0.0003514892578125,
      "step": 57588,
      "training_step_time": 0.39493441581726074
    },
    {
      "epoch": 0.000351495361328125,
      "model_forward_time": 0.11491560935974121,
      "step": 57589
    },
    {
      "epoch": 0.000351495361328125,
      "step": 57589,
      "training_step_time": 0.40384936332702637
    },
    {
      "epoch": 0.00035150146484375,
      "grad_norm": 0.07918470352888107,
      "learning_rate": 4.404387235385443e-07,
      "loss": 0.0368,
      "step": 57590
    },
    {
      "epoch": 0.00035150146484375,
      "model_forward_time": 0.11496758460998535,
      "step": 57590
    },
    {
      "epoch": 0.00035150146484375,
      "step": 57590,
      "training_step_time": 0.39113903045654297
    },
    {
      "epoch": 0.000351507568359375,
      "model_forward_time": 0.11491727828979492,
      "step": 57591
    },
    {
      "epoch": 0.000351507568359375,
      "step": 57591,
      "training_step_time": 0.6435940265655518
    },
    {
      "epoch": 0.000351513671875,
      "model_forward_time": 0.11447405815124512,
      "step": 57592
    },
    {
      "epoch": 0.000351513671875,
      "step": 57592,
      "training_step_time": 0.39019203186035156
    },
    {
      "epoch": 0.000351519775390625,
      "model_forward_time": 0.11473774909973145,
      "step": 57593
    },
    {
      "epoch": 0.000351519775390625,
      "step": 57593,
      "training_step_time": 0.4103364944458008
    },
    {
      "epoch": 0.00035152587890625,
      "model_forward_time": 0.11492252349853516,
      "step": 57594
    },
    {
      "epoch": 0.00035152587890625,
      "step": 57594,
      "training_step_time": 0.4729583263397217
    },
    {
      "epoch": 0.000351531982421875,
      "model_forward_time": 0.1140298843383789,
      "step": 57595
    },
    {
      "epoch": 0.000351531982421875,
      "step": 57595,
      "training_step_time": 0.4806804656982422
    },
    {
      "epoch": 0.0003515380859375,
      "model_forward_time": 0.11418819427490234,
      "step": 57596
    },
    {
      "epoch": 0.0003515380859375,
      "step": 57596,
      "training_step_time": 0.4387400150299072
    },
    {
      "epoch": 0.000351544189453125,
      "model_forward_time": 0.11463189125061035,
      "step": 57597
    },
    {
      "epoch": 0.000351544189453125,
      "step": 57597,
      "training_step_time": 0.3865385055541992
    },
    {
      "epoch": 0.00035155029296875,
      "model_forward_time": 0.11439776420593262,
      "step": 57598
    },
    {
      "epoch": 0.00035155029296875,
      "step": 57598,
      "training_step_time": 0.38573408126831055
    },
    {
      "epoch": 0.000351556396484375,
      "model_forward_time": 0.11466097831726074,
      "step": 57599
    },
    {
      "epoch": 0.000351556396484375,
      "step": 57599,
      "training_step_time": 0.3852667808532715
    },
    {
      "epoch": 0.0003515625,
      "grad_norm": 0.09131254255771637,
      "learning_rate": 4.367965336512403e-07,
      "loss": 0.0363,
      "step": 57600
    },
    {
      "epoch": 0.0003515625,
      "model_forward_time": 0.11616325378417969,
      "step": 57600
    },
    {
      "epoch": 0.0003515625,
      "step": 57600,
      "training_step_time": 0.38963890075683594
    },
    {
      "epoch": 0.000351568603515625,
      "model_forward_time": 0.11496710777282715,
      "step": 57601
    },
    {
      "epoch": 0.000351568603515625,
      "step": 57601,
      "training_step_time": 0.3997805118560791
    },
    {
      "epoch": 0.00035157470703125,
      "model_forward_time": 0.11466670036315918,
      "step": 57602
    },
    {
      "epoch": 0.00035157470703125,
      "step": 57602,
      "training_step_time": 0.40137720108032227
    },
    {
      "epoch": 0.000351580810546875,
      "model_forward_time": 0.11524677276611328,
      "step": 57603
    },
    {
      "epoch": 0.000351580810546875,
      "step": 57603,
      "training_step_time": 0.44742274284362793
    },
    {
      "epoch": 0.0003515869140625,
      "model_forward_time": 0.11588668823242188,
      "step": 57604
    },
    {
      "epoch": 0.0003515869140625,
      "step": 57604,
      "training_step_time": 0.3973565101623535
    },
    {
      "epoch": 0.000351593017578125,
      "model_forward_time": 0.11530613899230957,
      "step": 57605
    },
    {
      "epoch": 0.000351593017578125,
      "step": 57605,
      "training_step_time": 0.3997769355773926
    },
    {
      "epoch": 0.00035159912109375,
      "model_forward_time": 0.1150519847869873,
      "step": 57606
    },
    {
      "epoch": 0.00035159912109375,
      "step": 57606,
      "training_step_time": 0.3909759521484375
    },
    {
      "epoch": 0.000351605224609375,
      "model_forward_time": 0.11560940742492676,
      "step": 57607
    },
    {
      "epoch": 0.000351605224609375,
      "step": 57607,
      "training_step_time": 0.4949827194213867
    },
    {
      "epoch": 0.000351611328125,
      "model_forward_time": 0.11505818367004395,
      "step": 57608
    },
    {
      "epoch": 0.000351611328125,
      "step": 57608,
      "training_step_time": 0.4589712619781494
    },
    {
      "epoch": 0.000351617431640625,
      "model_forward_time": 0.11534714698791504,
      "step": 57609
    },
    {
      "epoch": 0.000351617431640625,
      "step": 57609,
      "training_step_time": 0.51318359375
    },
    {
      "epoch": 0.00035162353515625,
      "grad_norm": 0.09778743237257004,
      "learning_rate": 4.331693997565056e-07,
      "loss": 0.0389,
      "step": 57610
    },
    {
      "epoch": 0.00035162353515625,
      "model_forward_time": 0.11481237411499023,
      "step": 57610
    },
    {
      "epoch": 0.00035162353515625,
      "step": 57610,
      "training_step_time": 0.4115617275238037
    },
    {
      "epoch": 0.000351629638671875,
      "model_forward_time": 0.11450600624084473,
      "step": 57611
    },
    {
      "epoch": 0.000351629638671875,
      "step": 57611,
      "training_step_time": 0.48859643936157227
    },
    {
      "epoch": 0.0003516357421875,
      "model_forward_time": 0.11415243148803711,
      "step": 57612
    },
    {
      "epoch": 0.0003516357421875,
      "step": 57612,
      "training_step_time": 0.38924193382263184
    },
    {
      "epoch": 0.000351641845703125,
      "model_forward_time": 0.11451864242553711,
      "step": 57613
    },
    {
      "epoch": 0.000351641845703125,
      "step": 57613,
      "training_step_time": 0.40169405937194824
    },
    {
      "epoch": 0.00035164794921875,
      "model_forward_time": 0.11539554595947266,
      "step": 57614
    },
    {
      "epoch": 0.00035164794921875,
      "step": 57614,
      "training_step_time": 0.4041709899902344
    },
    {
      "epoch": 0.000351654052734375,
      "model_forward_time": 0.1152639389038086,
      "step": 57615
    },
    {
      "epoch": 0.000351654052734375,
      "step": 57615,
      "training_step_time": 0.38596582412719727
    },
    {
      "epoch": 0.00035166015625,
      "model_forward_time": 0.11464762687683105,
      "step": 57616
    },
    {
      "epoch": 0.00035166015625,
      "step": 57616,
      "training_step_time": 0.3857150077819824
    },
    {
      "epoch": 0.000351666259765625,
      "model_forward_time": 0.11453723907470703,
      "step": 57617
    },
    {
      "epoch": 0.000351666259765625,
      "step": 57617,
      "training_step_time": 0.3896777629852295
    },
    {
      "epoch": 0.00035167236328125,
      "model_forward_time": 0.11557149887084961,
      "step": 57618
    },
    {
      "epoch": 0.00035167236328125,
      "step": 57618,
      "training_step_time": 0.39577651023864746
    },
    {
      "epoch": 0.000351678466796875,
      "model_forward_time": 0.11546087265014648,
      "step": 57619
    },
    {
      "epoch": 0.000351678466796875,
      "step": 57619,
      "training_step_time": 0.39792728424072266
    },
    {
      "epoch": 0.0003516845703125,
      "grad_norm": 0.08242034167051315,
      "learning_rate": 4.2955732295617554e-07,
      "loss": 0.038,
      "step": 57620
    },
    {
      "epoch": 0.0003516845703125,
      "model_forward_time": 0.11501383781433105,
      "step": 57620
    },
    {
      "epoch": 0.0003516845703125,
      "step": 57620,
      "training_step_time": 0.3913590908050537
    },
    {
      "epoch": 0.000351690673828125,
      "model_forward_time": 0.11530423164367676,
      "step": 57621
    },
    {
      "epoch": 0.000351690673828125,
      "step": 57621,
      "training_step_time": 0.5920195579528809
    },
    {
      "epoch": 0.00035169677734375,
      "model_forward_time": 0.11544084548950195,
      "step": 57622
    },
    {
      "epoch": 0.00035169677734375,
      "step": 57622,
      "training_step_time": 0.45798349380493164
    },
    {
      "epoch": 0.000351702880859375,
      "model_forward_time": 0.11467432975769043,
      "step": 57623
    },
    {
      "epoch": 0.000351702880859375,
      "step": 57623,
      "training_step_time": 0.431943416595459
    },
    {
      "epoch": 0.000351708984375,
      "model_forward_time": 0.11658906936645508,
      "step": 57624
    },
    {
      "epoch": 0.000351708984375,
      "step": 57624,
      "training_step_time": 0.4630570411682129
    },
    {
      "epoch": 0.000351715087890625,
      "model_forward_time": 0.11458587646484375,
      "step": 57625
    },
    {
      "epoch": 0.000351715087890625,
      "step": 57625,
      "training_step_time": 0.41306591033935547
    },
    {
      "epoch": 0.00035172119140625,
      "model_forward_time": 0.11414957046508789,
      "step": 57626
    },
    {
      "epoch": 0.00035172119140625,
      "step": 57626,
      "training_step_time": 0.39359235763549805
    },
    {
      "epoch": 0.000351727294921875,
      "model_forward_time": 0.1149606704711914,
      "step": 57627
    },
    {
      "epoch": 0.000351727294921875,
      "step": 57627,
      "training_step_time": 0.433945894241333
    },
    {
      "epoch": 0.0003517333984375,
      "model_forward_time": 0.11492156982421875,
      "step": 57628
    },
    {
      "epoch": 0.0003517333984375,
      "step": 57628,
      "training_step_time": 0.38753271102905273
    },
    {
      "epoch": 0.000351739501953125,
      "model_forward_time": 0.11536717414855957,
      "step": 57629
    },
    {
      "epoch": 0.000351739501953125,
      "step": 57629,
      "training_step_time": 0.41464924812316895
    },
    {
      "epoch": 0.00035174560546875,
      "grad_norm": 0.10896969586610794,
      "learning_rate": 4.259603043475002e-07,
      "loss": 0.0372,
      "step": 57630
    },
    {
      "epoch": 0.00035174560546875,
      "model_forward_time": 0.1152806282043457,
      "step": 57630
    },
    {
      "epoch": 0.00035174560546875,
      "step": 57630,
      "training_step_time": 0.39830493927001953
    },
    {
      "epoch": 0.000351751708984375,
      "model_forward_time": 0.11461353302001953,
      "step": 57631
    },
    {
      "epoch": 0.000351751708984375,
      "step": 57631,
      "training_step_time": 0.41489243507385254
    },
    {
      "epoch": 0.0003517578125,
      "model_forward_time": 0.11511611938476562,
      "step": 57632
    },
    {
      "epoch": 0.0003517578125,
      "step": 57632,
      "training_step_time": 0.39346790313720703
    },
    {
      "epoch": 0.000351763916015625,
      "model_forward_time": 0.11467480659484863,
      "step": 57633
    },
    {
      "epoch": 0.000351763916015625,
      "step": 57633,
      "training_step_time": 0.5674102306365967
    },
    {
      "epoch": 0.00035177001953125,
      "model_forward_time": 0.11487197875976562,
      "step": 57634
    },
    {
      "epoch": 0.00035177001953125,
      "step": 57634,
      "training_step_time": 0.39458775520324707
    },
    {
      "epoch": 0.000351776123046875,
      "model_forward_time": 0.1152341365814209,
      "step": 57635
    },
    {
      "epoch": 0.000351776123046875,
      "step": 57635,
      "training_step_time": 0.3843264579772949
    },
    {
      "epoch": 0.0003517822265625,
      "model_forward_time": 0.11533498764038086,
      "step": 57636
    },
    {
      "epoch": 0.0003517822265625,
      "step": 57636,
      "training_step_time": 0.42325925827026367
    },
    {
      "epoch": 0.000351788330078125,
      "model_forward_time": 0.11476707458496094,
      "step": 57637
    },
    {
      "epoch": 0.000351788330078125,
      "step": 57637,
      "training_step_time": 0.4257848262786865
    },
    {
      "epoch": 0.00035179443359375,
      "model_forward_time": 0.11515426635742188,
      "step": 57638
    },
    {
      "epoch": 0.00035179443359375,
      "step": 57638,
      "training_step_time": 0.4782125949859619
    },
    {
      "epoch": 0.000351800537109375,
      "model_forward_time": 0.11537742614746094,
      "step": 57639
    },
    {
      "epoch": 0.000351800537109375,
      "step": 57639,
      "training_step_time": 0.5157821178436279
    },
    {
      "epoch": 0.000351806640625,
      "grad_norm": 0.0804734155535698,
      "learning_rate": 4.2237834502314997e-07,
      "loss": 0.0384,
      "step": 57640
    },
    {
      "epoch": 0.000351806640625,
      "model_forward_time": 0.11472511291503906,
      "step": 57640
    },
    {
      "epoch": 0.000351806640625,
      "step": 57640,
      "training_step_time": 0.39269256591796875
    },
    {
      "epoch": 0.000351812744140625,
      "model_forward_time": 0.11515331268310547,
      "step": 57641
    },
    {
      "epoch": 0.000351812744140625,
      "step": 57641,
      "training_step_time": 0.37711215019226074
    },
    {
      "epoch": 0.00035181884765625,
      "model_forward_time": 0.1150655746459961,
      "step": 57642
    },
    {
      "epoch": 0.00035181884765625,
      "step": 57642,
      "training_step_time": 0.40210580825805664
    },
    {
      "epoch": 0.000351824951171875,
      "model_forward_time": 0.1155691146850586,
      "step": 57643
    },
    {
      "epoch": 0.000351824951171875,
      "step": 57643,
      "training_step_time": 0.3972454071044922
    },
    {
      "epoch": 0.0003518310546875,
      "model_forward_time": 0.11506533622741699,
      "step": 57644
    },
    {
      "epoch": 0.0003518310546875,
      "step": 57644,
      "training_step_time": 0.39217686653137207
    },
    {
      "epoch": 0.000351837158203125,
      "model_forward_time": 0.11468124389648438,
      "step": 57645
    },
    {
      "epoch": 0.000351837158203125,
      "step": 57645,
      "training_step_time": 0.5903327465057373
    },
    {
      "epoch": 0.00035184326171875,
      "model_forward_time": 0.11466765403747559,
      "step": 57646
    },
    {
      "epoch": 0.00035184326171875,
      "step": 57646,
      "training_step_time": 0.3836188316345215
    },
    {
      "epoch": 0.000351849365234375,
      "model_forward_time": 0.1145472526550293,
      "step": 57647
    },
    {
      "epoch": 0.000351849365234375,
      "step": 57647,
      "training_step_time": 0.40940213203430176
    },
    {
      "epoch": 0.00035185546875,
      "model_forward_time": 0.11467266082763672,
      "step": 57648
    },
    {
      "epoch": 0.00035185546875,
      "step": 57648,
      "training_step_time": 0.3870539665222168
    },
    {
      "epoch": 0.000351861572265625,
      "model_forward_time": 0.11474776268005371,
      "step": 57649
    },
    {
      "epoch": 0.000351861572265625,
      "step": 57649,
      "training_step_time": 0.3937504291534424
    },
    {
      "epoch": 0.00035186767578125,
      "grad_norm": 0.11858430504798889,
      "learning_rate": 4.1881144607124334e-07,
      "loss": 0.0387,
      "step": 57650
    },
    {
      "epoch": 0.00035186767578125,
      "model_forward_time": 0.11478137969970703,
      "step": 57650
    },
    {
      "epoch": 0.00035186767578125,
      "step": 57650,
      "training_step_time": 0.36469340324401855
    },
    {
      "epoch": 0.000351873779296875,
      "model_forward_time": 0.11568069458007812,
      "step": 57651
    },
    {
      "epoch": 0.000351873779296875,
      "step": 57651,
      "training_step_time": 0.6017520427703857
    },
    {
      "epoch": 0.0003518798828125,
      "model_forward_time": 0.11434173583984375,
      "step": 57652
    },
    {
      "epoch": 0.0003518798828125,
      "step": 57652,
      "training_step_time": 0.4351615905761719
    },
    {
      "epoch": 0.000351885986328125,
      "model_forward_time": 0.11507654190063477,
      "step": 57653
    },
    {
      "epoch": 0.000351885986328125,
      "step": 57653,
      "training_step_time": 0.4535694122314453
    },
    {
      "epoch": 0.00035189208984375,
      "model_forward_time": 0.11526036262512207,
      "step": 57654
    },
    {
      "epoch": 0.00035189208984375,
      "step": 57654,
      "training_step_time": 0.3884420394897461
    },
    {
      "epoch": 0.000351898193359375,
      "model_forward_time": 0.11469721794128418,
      "step": 57655
    },
    {
      "epoch": 0.000351898193359375,
      "step": 57655,
      "training_step_time": 0.39675331115722656
    },
    {
      "epoch": 0.000351904296875,
      "model_forward_time": 0.1148827075958252,
      "step": 57656
    },
    {
      "epoch": 0.000351904296875,
      "step": 57656,
      "training_step_time": 0.3927798271179199
    },
    {
      "epoch": 0.000351910400390625,
      "model_forward_time": 0.11460566520690918,
      "step": 57657
    },
    {
      "epoch": 0.000351910400390625,
      "step": 57657,
      "training_step_time": 0.519756555557251
    },
    {
      "epoch": 0.00035191650390625,
      "model_forward_time": 0.1149141788482666,
      "step": 57658
    },
    {
      "epoch": 0.00035191650390625,
      "step": 57658,
      "training_step_time": 0.38778066635131836
    },
    {
      "epoch": 0.000351922607421875,
      "model_forward_time": 0.11507701873779297,
      "step": 57659
    },
    {
      "epoch": 0.000351922607421875,
      "step": 57659,
      "training_step_time": 0.4146440029144287
    },
    {
      "epoch": 0.0003519287109375,
      "grad_norm": 0.08879891037940979,
      "learning_rate": 4.1525960857530243e-07,
      "loss": 0.0335,
      "step": 57660
    },
    {
      "epoch": 0.0003519287109375,
      "model_forward_time": 0.1150517463684082,
      "step": 57660
    },
    {
      "epoch": 0.0003519287109375,
      "step": 57660,
      "training_step_time": 0.38480615615844727
    },
    {
      "epoch": 0.000351934814453125,
      "model_forward_time": 0.11536288261413574,
      "step": 57661
    },
    {
      "epoch": 0.000351934814453125,
      "step": 57661,
      "training_step_time": 0.40521788597106934
    },
    {
      "epoch": 0.00035194091796875,
      "model_forward_time": 0.11411118507385254,
      "step": 57662
    },
    {
      "epoch": 0.00035194091796875,
      "step": 57662,
      "training_step_time": 0.3941967487335205
    },
    {
      "epoch": 0.000351947021484375,
      "model_forward_time": 0.116424560546875,
      "step": 57663
    },
    {
      "epoch": 0.000351947021484375,
      "step": 57663,
      "training_step_time": 0.6057181358337402
    },
    {
      "epoch": 0.000351953125,
      "model_forward_time": 0.11504507064819336,
      "step": 57664
    },
    {
      "epoch": 0.000351953125,
      "step": 57664,
      "training_step_time": 0.4516310691833496
    },
    {
      "epoch": 0.000351959228515625,
      "model_forward_time": 0.11472773551940918,
      "step": 57665
    },
    {
      "epoch": 0.000351959228515625,
      "step": 57665,
      "training_step_time": 0.4988212585449219
    },
    {
      "epoch": 0.00035196533203125,
      "model_forward_time": 0.11508631706237793,
      "step": 57666
    },
    {
      "epoch": 0.00035196533203125,
      "step": 57666,
      "training_step_time": 0.45809149742126465
    },
    {
      "epoch": 0.000351971435546875,
      "model_forward_time": 0.1146395206451416,
      "step": 57667
    },
    {
      "epoch": 0.000351971435546875,
      "step": 57667,
      "training_step_time": 0.40779638290405273
    },
    {
      "epoch": 0.0003519775390625,
      "model_forward_time": 0.11428976058959961,
      "step": 57668
    },
    {
      "epoch": 0.0003519775390625,
      "step": 57668,
      "training_step_time": 0.38378095626831055
    },
    {
      "epoch": 0.000351983642578125,
      "model_forward_time": 0.11456942558288574,
      "step": 57669
    },
    {
      "epoch": 0.000351983642578125,
      "step": 57669,
      "training_step_time": 0.3913562297821045
    },
    {
      "epoch": 0.00035198974609375,
      "grad_norm": 0.08912325650453568,
      "learning_rate": 4.1172283361428644e-07,
      "loss": 0.0352,
      "step": 57670
    },
    {
      "epoch": 0.00035198974609375,
      "model_forward_time": 0.11481380462646484,
      "step": 57670
    },
    {
      "epoch": 0.00035198974609375,
      "step": 57670,
      "training_step_time": 0.3870992660522461
    },
    {
      "epoch": 0.000351995849609375,
      "model_forward_time": 0.11433577537536621,
      "step": 57671
    },
    {
      "epoch": 0.000351995849609375,
      "step": 57671,
      "training_step_time": 0.40996241569519043
    },
    {
      "epoch": 0.000352001953125,
      "model_forward_time": 0.11547183990478516,
      "step": 57672
    },
    {
      "epoch": 0.000352001953125,
      "step": 57672,
      "training_step_time": 0.3962569236755371
    },
    {
      "epoch": 0.000352008056640625,
      "model_forward_time": 0.11490178108215332,
      "step": 57673
    },
    {
      "epoch": 0.000352008056640625,
      "step": 57673,
      "training_step_time": 0.410186767578125
    },
    {
      "epoch": 0.00035201416015625,
      "model_forward_time": 0.11507058143615723,
      "step": 57674
    },
    {
      "epoch": 0.00035201416015625,
      "step": 57674,
      "training_step_time": 0.40430736541748047
    },
    {
      "epoch": 0.000352020263671875,
      "model_forward_time": 0.11507391929626465,
      "step": 57675
    },
    {
      "epoch": 0.000352020263671875,
      "step": 57675,
      "training_step_time": 0.5378191471099854
    },
    {
      "epoch": 0.0003520263671875,
      "model_forward_time": 0.1154782772064209,
      "step": 57676
    },
    {
      "epoch": 0.0003520263671875,
      "step": 57676,
      "training_step_time": 0.39821672439575195
    },
    {
      "epoch": 0.000352032470703125,
      "model_forward_time": 0.11515593528747559,
      "step": 57677
    },
    {
      "epoch": 0.000352032470703125,
      "step": 57677,
      "training_step_time": 0.4111137390136719
    },
    {
      "epoch": 0.00035203857421875,
      "model_forward_time": 0.11536598205566406,
      "step": 57678
    },
    {
      "epoch": 0.00035203857421875,
      "step": 57678,
      "training_step_time": 0.4035639762878418
    },
    {
      "epoch": 0.000352044677734375,
      "model_forward_time": 0.11450862884521484,
      "step": 57679
    },
    {
      "epoch": 0.000352044677734375,
      "step": 57679,
      "training_step_time": 0.5035262107849121
    },
    {
      "epoch": 0.00035205078125,
      "grad_norm": 0.096779465675354,
      "learning_rate": 4.082011222625637e-07,
      "loss": 0.0371,
      "step": 57680
    },
    {
      "epoch": 0.00035205078125,
      "model_forward_time": 0.114959716796875,
      "step": 57680
    },
    {
      "epoch": 0.00035205078125,
      "step": 57680,
      "training_step_time": 0.485872745513916
    },
    {
      "epoch": 0.000352056884765625,
      "model_forward_time": 0.11440157890319824,
      "step": 57681
    },
    {
      "epoch": 0.000352056884765625,
      "step": 57681,
      "training_step_time": 0.4518411159515381
    },
    {
      "epoch": 0.00035206298828125,
      "model_forward_time": 0.11455917358398438,
      "step": 57682
    },
    {
      "epoch": 0.00035206298828125,
      "step": 57682,
      "training_step_time": 0.39066290855407715
    },
    {
      "epoch": 0.000352069091796875,
      "model_forward_time": 0.11475706100463867,
      "step": 57683
    },
    {
      "epoch": 0.000352069091796875,
      "step": 57683,
      "training_step_time": 0.39106225967407227
    },
    {
      "epoch": 0.0003520751953125,
      "model_forward_time": 0.11549973487854004,
      "step": 57684
    },
    {
      "epoch": 0.0003520751953125,
      "step": 57684,
      "training_step_time": 0.39076995849609375
    },
    {
      "epoch": 0.000352081298828125,
      "model_forward_time": 0.11547970771789551,
      "step": 57685
    },
    {
      "epoch": 0.000352081298828125,
      "step": 57685,
      "training_step_time": 0.3912696838378906
    },
    {
      "epoch": 0.00035208740234375,
      "model_forward_time": 0.11463332176208496,
      "step": 57686
    },
    {
      "epoch": 0.00035208740234375,
      "step": 57686,
      "training_step_time": 0.38876914978027344
    },
    {
      "epoch": 0.000352093505859375,
      "model_forward_time": 0.11533021926879883,
      "step": 57687
    },
    {
      "epoch": 0.000352093505859375,
      "step": 57687,
      "training_step_time": 0.6310625076293945
    },
    {
      "epoch": 0.000352099609375,
      "model_forward_time": 0.11463499069213867,
      "step": 57688
    },
    {
      "epoch": 0.000352099609375,
      "step": 57688,
      "training_step_time": 0.38753628730773926
    },
    {
      "epoch": 0.000352105712890625,
      "model_forward_time": 0.1152658462524414,
      "step": 57689
    },
    {
      "epoch": 0.000352105712890625,
      "step": 57689,
      "training_step_time": 0.3901703357696533
    },
    {
      "epoch": 0.00035211181640625,
      "grad_norm": 0.09773746877908707,
      "learning_rate": 4.0469447558995065e-07,
      "loss": 0.0393,
      "step": 57690
    },
    {
      "epoch": 0.00035211181640625,
      "model_forward_time": 0.11507534980773926,
      "step": 57690
    },
    {
      "epoch": 0.00035211181640625,
      "step": 57690,
      "training_step_time": 0.39896297454833984
    },
    {
      "epoch": 0.000352117919921875,
      "model_forward_time": 0.11473488807678223,
      "step": 57691
    },
    {
      "epoch": 0.000352117919921875,
      "step": 57691,
      "training_step_time": 0.4172224998474121
    },
    {
      "epoch": 0.0003521240234375,
      "model_forward_time": 0.11466169357299805,
      "step": 57692
    },
    {
      "epoch": 0.0003521240234375,
      "step": 57692,
      "training_step_time": 0.38576221466064453
    },
    {
      "epoch": 0.000352130126953125,
      "model_forward_time": 0.11520791053771973,
      "step": 57693
    },
    {
      "epoch": 0.000352130126953125,
      "step": 57693,
      "training_step_time": 0.7529561519622803
    },
    {
      "epoch": 0.00035213623046875,
      "model_forward_time": 0.11428284645080566,
      "step": 57694
    },
    {
      "epoch": 0.00035213623046875,
      "step": 57694,
      "training_step_time": 0.4730377197265625
    },
    {
      "epoch": 0.000352142333984375,
      "model_forward_time": 0.11506271362304688,
      "step": 57695
    },
    {
      "epoch": 0.000352142333984375,
      "step": 57695,
      "training_step_time": 0.45601820945739746
    },
    {
      "epoch": 0.0003521484375,
      "model_forward_time": 0.11446261405944824,
      "step": 57696
    },
    {
      "epoch": 0.0003521484375,
      "step": 57696,
      "training_step_time": 0.3839268684387207
    },
    {
      "epoch": 0.000352154541015625,
      "model_forward_time": 0.1140131950378418,
      "step": 57697
    },
    {
      "epoch": 0.000352154541015625,
      "step": 57697,
      "training_step_time": 0.38581180572509766
    },
    {
      "epoch": 0.00035216064453125,
      "model_forward_time": 0.11361837387084961,
      "step": 57698
    },
    {
      "epoch": 0.00035216064453125,
      "step": 57698,
      "training_step_time": 0.3826444149017334
    },
    {
      "epoch": 0.000352166748046875,
      "model_forward_time": 0.11513900756835938,
      "step": 57699
    },
    {
      "epoch": 0.000352166748046875,
      "step": 57699,
      "training_step_time": 0.40166306495666504
    },
    {
      "epoch": 0.0003521728515625,
      "grad_norm": 0.06401416659355164,
      "learning_rate": 4.012028946616675e-07,
      "loss": 0.0363,
      "step": 57700
    },
    {
      "epoch": 0.0003521728515625,
      "model_forward_time": 0.11434054374694824,
      "step": 57700
    },
    {
      "epoch": 0.0003521728515625,
      "step": 57700,
      "training_step_time": 0.3997688293457031
    },
    {
      "epoch": 0.000352178955078125,
      "model_forward_time": 0.1155555248260498,
      "step": 57701
    },
    {
      "epoch": 0.000352178955078125,
      "step": 57701,
      "training_step_time": 0.38625597953796387
    },
    {
      "epoch": 0.00035218505859375,
      "model_forward_time": 0.11505603790283203,
      "step": 57702
    },
    {
      "epoch": 0.00035218505859375,
      "step": 57702,
      "training_step_time": 0.39533114433288574
    },
    {
      "epoch": 0.000352191162109375,
      "model_forward_time": 0.11515069007873535,
      "step": 57703
    },
    {
      "epoch": 0.000352191162109375,
      "step": 57703,
      "training_step_time": 0.39859580993652344
    },
    {
      "epoch": 0.000352197265625,
      "model_forward_time": 0.11569857597351074,
      "step": 57704
    },
    {
      "epoch": 0.000352197265625,
      "step": 57704,
      "training_step_time": 0.4809589385986328
    },
    {
      "epoch": 0.000352203369140625,
      "model_forward_time": 0.11446666717529297,
      "step": 57705
    },
    {
      "epoch": 0.000352203369140625,
      "step": 57705,
      "training_step_time": 0.410414457321167
    },
    {
      "epoch": 0.00035220947265625,
      "model_forward_time": 0.11514902114868164,
      "step": 57706
    },
    {
      "epoch": 0.00035220947265625,
      "step": 57706,
      "training_step_time": 0.3908231258392334
    },
    {
      "epoch": 0.000352215576171875,
      "model_forward_time": 0.11537694931030273,
      "step": 57707
    },
    {
      "epoch": 0.000352215576171875,
      "step": 57707,
      "training_step_time": 0.46744585037231445
    },
    {
      "epoch": 0.0003522216796875,
      "model_forward_time": 0.11546134948730469,
      "step": 57708
    },
    {
      "epoch": 0.0003522216796875,
      "step": 57708,
      "training_step_time": 0.46685266494750977
    },
    {
      "epoch": 0.000352227783203125,
      "model_forward_time": 0.11523199081420898,
      "step": 57709
    },
    {
      "epoch": 0.000352227783203125,
      "step": 57709,
      "training_step_time": 0.508122444152832
    },
    {
      "epoch": 0.00035223388671875,
      "grad_norm": 0.09551063179969788,
      "learning_rate": 3.977263805383602e-07,
      "loss": 0.0411,
      "step": 57710
    },
    {
      "epoch": 0.00035223388671875,
      "model_forward_time": 0.11505842208862305,
      "step": 57710
    },
    {
      "epoch": 0.00035223388671875,
      "step": 57710,
      "training_step_time": 0.45427918434143066
    },
    {
      "epoch": 0.000352239990234375,
      "model_forward_time": 0.11493444442749023,
      "step": 57711
    },
    {
      "epoch": 0.000352239990234375,
      "step": 57711,
      "training_step_time": 0.3907194137573242
    },
    {
      "epoch": 0.00035224609375,
      "model_forward_time": 0.11486458778381348,
      "step": 57712
    },
    {
      "epoch": 0.00035224609375,
      "step": 57712,
      "training_step_time": 0.40071797370910645
    },
    {
      "epoch": 0.000352252197265625,
      "model_forward_time": 0.11443042755126953,
      "step": 57713
    },
    {
      "epoch": 0.000352252197265625,
      "step": 57713,
      "training_step_time": 0.3877382278442383
    },
    {
      "epoch": 0.00035225830078125,
      "model_forward_time": 0.11537289619445801,
      "step": 57714
    },
    {
      "epoch": 0.00035225830078125,
      "step": 57714,
      "training_step_time": 0.40815281867980957
    },
    {
      "epoch": 0.000352264404296875,
      "model_forward_time": 0.11471319198608398,
      "step": 57715
    },
    {
      "epoch": 0.000352264404296875,
      "step": 57715,
      "training_step_time": 0.39446282386779785
    },
    {
      "epoch": 0.0003522705078125,
      "model_forward_time": 0.11550617218017578,
      "step": 57716
    },
    {
      "epoch": 0.0003522705078125,
      "step": 57716,
      "training_step_time": 0.38820552825927734
    },
    {
      "epoch": 0.000352276611328125,
      "model_forward_time": 0.11569929122924805,
      "step": 57717
    },
    {
      "epoch": 0.000352276611328125,
      "step": 57717,
      "training_step_time": 0.40848731994628906
    },
    {
      "epoch": 0.00035228271484375,
      "model_forward_time": 0.11680984497070312,
      "step": 57718
    },
    {
      "epoch": 0.00035228271484375,
      "step": 57718,
      "training_step_time": 0.3915364742279053
    },
    {
      "epoch": 0.000352288818359375,
      "model_forward_time": 0.1156606674194336,
      "step": 57719
    },
    {
      "epoch": 0.000352288818359375,
      "step": 57719,
      "training_step_time": 0.40506982803344727
    },
    {
      "epoch": 0.000352294921875,
      "grad_norm": 0.09729945659637451,
      "learning_rate": 3.9426493427611177e-07,
      "loss": 0.0345,
      "step": 57720
    },
    {
      "epoch": 0.000352294921875,
      "model_forward_time": 0.11486697196960449,
      "step": 57720
    },
    {
      "epoch": 0.000352294921875,
      "step": 57720,
      "training_step_time": 0.4082484245300293
    },
    {
      "epoch": 0.000352301025390625,
      "model_forward_time": 0.1148381233215332,
      "step": 57721
    },
    {
      "epoch": 0.000352301025390625,
      "step": 57721,
      "training_step_time": 0.41108107566833496
    },
    {
      "epoch": 0.00035230712890625,
      "model_forward_time": 0.11617445945739746,
      "step": 57722
    },
    {
      "epoch": 0.00035230712890625,
      "step": 57722,
      "training_step_time": 0.4290151596069336
    },
    {
      "epoch": 0.000352313232421875,
      "model_forward_time": 0.11502480506896973,
      "step": 57723
    },
    {
      "epoch": 0.000352313232421875,
      "step": 57723,
      "training_step_time": 0.4712679386138916
    },
    {
      "epoch": 0.0003523193359375,
      "model_forward_time": 0.11484122276306152,
      "step": 57724
    },
    {
      "epoch": 0.0003523193359375,
      "step": 57724,
      "training_step_time": 0.41758275032043457
    },
    {
      "epoch": 0.000352325439453125,
      "model_forward_time": 0.11520099639892578,
      "step": 57725
    },
    {
      "epoch": 0.000352325439453125,
      "step": 57725,
      "training_step_time": 0.44108033180236816
    },
    {
      "epoch": 0.00035233154296875,
      "model_forward_time": 0.1154472827911377,
      "step": 57726
    },
    {
      "epoch": 0.00035233154296875,
      "step": 57726,
      "training_step_time": 0.38671326637268066
    },
    {
      "epoch": 0.000352337646484375,
      "model_forward_time": 0.11497354507446289,
      "step": 57727
    },
    {
      "epoch": 0.000352337646484375,
      "step": 57727,
      "training_step_time": 0.39480018615722656
    },
    {
      "epoch": 0.00035234375,
      "model_forward_time": 0.11563730239868164,
      "step": 57728
    },
    {
      "epoch": 0.00035234375,
      "step": 57728,
      "training_step_time": 0.4088866710662842
    },
    {
      "epoch": 0.000352349853515625,
      "model_forward_time": 0.11542034149169922,
      "step": 57729
    },
    {
      "epoch": 0.000352349853515625,
      "step": 57729,
      "training_step_time": 0.39597201347351074
    },
    {
      "epoch": 0.00035235595703125,
      "grad_norm": 0.09884767979383469,
      "learning_rate": 3.9081855692640333e-07,
      "loss": 0.041,
      "step": 57730
    },
    {
      "epoch": 0.00035235595703125,
      "model_forward_time": 0.11490130424499512,
      "step": 57730
    },
    {
      "epoch": 0.00035235595703125,
      "step": 57730,
      "training_step_time": 0.39671802520751953
    },
    {
      "epoch": 0.000352362060546875,
      "model_forward_time": 0.11550784111022949,
      "step": 57731
    },
    {
      "epoch": 0.000352362060546875,
      "step": 57731,
      "training_step_time": 0.42814016342163086
    },
    {
      "epoch": 0.0003523681640625,
      "model_forward_time": 0.1151573657989502,
      "step": 57732
    },
    {
      "epoch": 0.0003523681640625,
      "step": 57732,
      "training_step_time": 0.3956120014190674
    },
    {
      "epoch": 0.000352374267578125,
      "model_forward_time": 0.11513471603393555,
      "step": 57733
    },
    {
      "epoch": 0.000352374267578125,
      "step": 57733,
      "training_step_time": 0.3970332145690918
    },
    {
      "epoch": 0.00035238037109375,
      "model_forward_time": 0.1154015064239502,
      "step": 57734
    },
    {
      "epoch": 0.00035238037109375,
      "step": 57734,
      "training_step_time": 0.39860987663269043
    },
    {
      "epoch": 0.000352386474609375,
      "model_forward_time": 0.11519145965576172,
      "step": 57735
    },
    {
      "epoch": 0.000352386474609375,
      "step": 57735,
      "training_step_time": 0.39435553550720215
    },
    {
      "epoch": 0.000352392578125,
      "model_forward_time": 0.11582565307617188,
      "step": 57736
    },
    {
      "epoch": 0.000352392578125,
      "step": 57736,
      "training_step_time": 0.4199562072753906
    },
    {
      "epoch": 0.000352398681640625,
      "model_forward_time": 0.11562633514404297,
      "step": 57737
    },
    {
      "epoch": 0.000352398681640625,
      "step": 57737,
      "training_step_time": 0.3982865810394287
    },
    {
      "epoch": 0.00035240478515625,
      "model_forward_time": 0.11509513854980469,
      "step": 57738
    },
    {
      "epoch": 0.00035240478515625,
      "step": 57738,
      "training_step_time": 0.3878171443939209
    },
    {
      "epoch": 0.000352410888671875,
      "model_forward_time": 0.11520600318908691,
      "step": 57739
    },
    {
      "epoch": 0.000352410888671875,
      "step": 57739,
      "training_step_time": 0.48934340476989746
    },
    {
      "epoch": 0.0003524169921875,
      "grad_norm": 0.0892530232667923,
      "learning_rate": 3.873872495361697e-07,
      "loss": 0.0448,
      "step": 57740
    },
    {
      "epoch": 0.0003524169921875,
      "model_forward_time": 0.1141514778137207,
      "step": 57740
    },
    {
      "epoch": 0.0003524169921875,
      "step": 57740,
      "training_step_time": 0.4060544967651367
    },
    {
      "epoch": 0.000352423095703125,
      "model_forward_time": 0.11491584777832031,
      "step": 57741
    },
    {
      "epoch": 0.000352423095703125,
      "step": 57741,
      "training_step_time": 0.4165060520172119
    },
    {
      "epoch": 0.00035242919921875,
      "model_forward_time": 0.11533522605895996,
      "step": 57742
    },
    {
      "epoch": 0.00035242919921875,
      "step": 57742,
      "training_step_time": 0.39540624618530273
    },
    {
      "epoch": 0.000352435302734375,
      "model_forward_time": 0.1152036190032959,
      "step": 57743
    },
    {
      "epoch": 0.000352435302734375,
      "step": 57743,
      "training_step_time": 0.39464902877807617
    },
    {
      "epoch": 0.00035244140625,
      "model_forward_time": 0.11502718925476074,
      "step": 57744
    },
    {
      "epoch": 0.00035244140625,
      "step": 57744,
      "training_step_time": 0.39839768409729004
    },
    {
      "epoch": 0.000352447509765625,
      "model_forward_time": 0.1148536205291748,
      "step": 57745
    },
    {
      "epoch": 0.000352447509765625,
      "step": 57745,
      "training_step_time": 0.4113328456878662
    },
    {
      "epoch": 0.00035245361328125,
      "model_forward_time": 0.1146092414855957,
      "step": 57746
    },
    {
      "epoch": 0.00035245361328125,
      "step": 57746,
      "training_step_time": 0.43190765380859375
    },
    {
      "epoch": 0.000352459716796875,
      "model_forward_time": 0.11464118957519531,
      "step": 57747
    },
    {
      "epoch": 0.000352459716796875,
      "step": 57747,
      "training_step_time": 0.39852070808410645
    },
    {
      "epoch": 0.0003524658203125,
      "model_forward_time": 0.11505937576293945,
      "step": 57748
    },
    {
      "epoch": 0.0003524658203125,
      "step": 57748,
      "training_step_time": 0.39385342597961426
    },
    {
      "epoch": 0.000352471923828125,
      "model_forward_time": 0.11567974090576172,
      "step": 57749
    },
    {
      "epoch": 0.000352471923828125,
      "step": 57749,
      "training_step_time": 0.4037322998046875
    },
    {
      "epoch": 0.00035247802734375,
      "grad_norm": 0.08370713144540787,
      "learning_rate": 3.839710131477492e-07,
      "loss": 0.0354,
      "step": 57750
    },
    {
      "epoch": 0.00035247802734375,
      "model_forward_time": 0.11542034149169922,
      "step": 57750
    },
    {
      "epoch": 0.00035247802734375,
      "step": 57750,
      "training_step_time": 0.3903467655181885
    },
    {
      "epoch": 0.000352484130859375,
      "model_forward_time": 0.11486291885375977,
      "step": 57751
    },
    {
      "epoch": 0.000352484130859375,
      "step": 57751,
      "training_step_time": 0.44441652297973633
    },
    {
      "epoch": 0.000352490234375,
      "model_forward_time": 0.1148233413696289,
      "step": 57752
    },
    {
      "epoch": 0.000352490234375,
      "step": 57752,
      "training_step_time": 0.44993114471435547
    },
    {
      "epoch": 0.000352496337890625,
      "model_forward_time": 0.11544609069824219,
      "step": 57753
    },
    {
      "epoch": 0.000352496337890625,
      "step": 57753,
      "training_step_time": 0.503331184387207
    },
    {
      "epoch": 0.00035250244140625,
      "model_forward_time": 0.11527562141418457,
      "step": 57754
    },
    {
      "epoch": 0.00035250244140625,
      "step": 57754,
      "training_step_time": 0.4002695083618164
    },
    {
      "epoch": 0.000352508544921875,
      "model_forward_time": 0.1147465705871582,
      "step": 57755
    },
    {
      "epoch": 0.000352508544921875,
      "step": 57755,
      "training_step_time": 0.4218878746032715
    },
    {
      "epoch": 0.0003525146484375,
      "model_forward_time": 0.11455178260803223,
      "step": 57756
    },
    {
      "epoch": 0.0003525146484375,
      "step": 57756,
      "training_step_time": 0.384082555770874
    },
    {
      "epoch": 0.000352520751953125,
      "model_forward_time": 0.1147608757019043,
      "step": 57757
    },
    {
      "epoch": 0.000352520751953125,
      "step": 57757,
      "training_step_time": 0.38855957984924316
    },
    {
      "epoch": 0.00035252685546875,
      "model_forward_time": 0.11480855941772461,
      "step": 57758
    },
    {
      "epoch": 0.00035252685546875,
      "step": 57758,
      "training_step_time": 0.40024399757385254
    },
    {
      "epoch": 0.000352532958984375,
      "model_forward_time": 0.11568927764892578,
      "step": 57759
    },
    {
      "epoch": 0.000352532958984375,
      "step": 57759,
      "training_step_time": 0.41119384765625
    },
    {
      "epoch": 0.0003525390625,
      "grad_norm": 0.09552349895238876,
      "learning_rate": 3.805698487988951e-07,
      "loss": 0.0434,
      "step": 57760
    },
    {
      "epoch": 0.0003525390625,
      "model_forward_time": 0.11510658264160156,
      "step": 57760
    },
    {
      "epoch": 0.0003525390625,
      "step": 57760,
      "training_step_time": 0.410754919052124
    },
    {
      "epoch": 0.000352545166015625,
      "model_forward_time": 0.11495351791381836,
      "step": 57761
    },
    {
      "epoch": 0.000352545166015625,
      "step": 57761,
      "training_step_time": 0.38387417793273926
    },
    {
      "epoch": 0.00035255126953125,
      "model_forward_time": 0.11535525321960449,
      "step": 57762
    },
    {
      "epoch": 0.00035255126953125,
      "step": 57762,
      "training_step_time": 0.41414475440979004
    },
    {
      "epoch": 0.000352557373046875,
      "model_forward_time": 0.11536574363708496,
      "step": 57763
    },
    {
      "epoch": 0.000352557373046875,
      "step": 57763,
      "training_step_time": 0.39782047271728516
    },
    {
      "epoch": 0.0003525634765625,
      "model_forward_time": 0.11622500419616699,
      "step": 57764
    },
    {
      "epoch": 0.0003525634765625,
      "step": 57764,
      "training_step_time": 0.40346670150756836
    },
    {
      "epoch": 0.000352569580078125,
      "model_forward_time": 0.11501908302307129,
      "step": 57765
    },
    {
      "epoch": 0.000352569580078125,
      "step": 57765,
      "training_step_time": 0.3970303535461426
    },
    {
      "epoch": 0.00035257568359375,
      "model_forward_time": 0.11487054824829102,
      "step": 57766
    },
    {
      "epoch": 0.00035257568359375,
      "step": 57766,
      "training_step_time": 0.39299631118774414
    },
    {
      "epoch": 0.000352581787109375,
      "model_forward_time": 0.11571288108825684,
      "step": 57767
    },
    {
      "epoch": 0.000352581787109375,
      "step": 57767,
      "training_step_time": 0.45502662658691406
    },
    {
      "epoch": 0.000352587890625,
      "model_forward_time": 0.11607933044433594,
      "step": 57768
    },
    {
      "epoch": 0.000352587890625,
      "step": 57768,
      "training_step_time": 0.43374061584472656
    },
    {
      "epoch": 0.000352593994140625,
      "model_forward_time": 0.11555171012878418,
      "step": 57769
    },
    {
      "epoch": 0.000352593994140625,
      "step": 57769,
      "training_step_time": 0.4813525676727295
    },
    {
      "epoch": 0.00035260009765625,
      "grad_norm": 0.06844060122966766,
      "learning_rate": 3.7718375752279214e-07,
      "loss": 0.0314,
      "step": 57770
    },
    {
      "epoch": 0.00035260009765625,
      "model_forward_time": 0.11549735069274902,
      "step": 57770
    },
    {
      "epoch": 0.00035260009765625,
      "step": 57770,
      "training_step_time": 0.422849178314209
    },
    {
      "epoch": 0.000352606201171875,
      "model_forward_time": 0.11531972885131836,
      "step": 57771
    },
    {
      "epoch": 0.000352606201171875,
      "step": 57771,
      "training_step_time": 0.3946211338043213
    },
    {
      "epoch": 0.0003526123046875,
      "model_forward_time": 0.1146240234375,
      "step": 57772
    },
    {
      "epoch": 0.0003526123046875,
      "step": 57772,
      "training_step_time": 0.40634989738464355
    },
    {
      "epoch": 0.000352618408203125,
      "model_forward_time": 0.11499619483947754,
      "step": 57773
    },
    {
      "epoch": 0.000352618408203125,
      "step": 57773,
      "training_step_time": 0.43013882637023926
    },
    {
      "epoch": 0.00035262451171875,
      "model_forward_time": 0.11534380912780762,
      "step": 57774
    },
    {
      "epoch": 0.00035262451171875,
      "step": 57774,
      "training_step_time": 0.4130585193634033
    },
    {
      "epoch": 0.000352630615234375,
      "model_forward_time": 0.11463165283203125,
      "step": 57775
    },
    {
      "epoch": 0.000352630615234375,
      "step": 57775,
      "training_step_time": 0.3770866394042969
    },
    {
      "epoch": 0.00035263671875,
      "model_forward_time": 0.11502695083618164,
      "step": 57776
    },
    {
      "epoch": 0.00035263671875,
      "step": 57776,
      "training_step_time": 0.4123697280883789
    },
    {
      "epoch": 0.000352642822265625,
      "model_forward_time": 0.11481475830078125,
      "step": 57777
    },
    {
      "epoch": 0.000352642822265625,
      "step": 57777,
      "training_step_time": 0.39154624938964844
    },
    {
      "epoch": 0.00035264892578125,
      "model_forward_time": 0.11554884910583496,
      "step": 57778
    },
    {
      "epoch": 0.00035264892578125,
      "step": 57778,
      "training_step_time": 0.39579033851623535
    },
    {
      "epoch": 0.000352655029296875,
      "model_forward_time": 0.11503100395202637,
      "step": 57779
    },
    {
      "epoch": 0.000352655029296875,
      "step": 57779,
      "training_step_time": 0.3925788402557373
    },
    {
      "epoch": 0.0003526611328125,
      "grad_norm": 0.08720196038484573,
      "learning_rate": 3.738127403480507e-07,
      "loss": 0.0387,
      "step": 57780
    },
    {
      "epoch": 0.0003526611328125,
      "model_forward_time": 0.11592960357666016,
      "step": 57780
    },
    {
      "epoch": 0.0003526611328125,
      "step": 57780,
      "training_step_time": 0.38634729385375977
    },
    {
      "epoch": 0.000352667236328125,
      "model_forward_time": 0.11539030075073242,
      "step": 57781
    },
    {
      "epoch": 0.000352667236328125,
      "step": 57781,
      "training_step_time": 0.4016549587249756
    },
    {
      "epoch": 0.00035267333984375,
      "model_forward_time": 0.11539292335510254,
      "step": 57782
    },
    {
      "epoch": 0.00035267333984375,
      "step": 57782,
      "training_step_time": 0.4011201858520508
    },
    {
      "epoch": 0.000352679443359375,
      "model_forward_time": 0.11516213417053223,
      "step": 57783
    },
    {
      "epoch": 0.000352679443359375,
      "step": 57783,
      "training_step_time": 0.503990888595581
    },
    {
      "epoch": 0.000352685546875,
      "model_forward_time": 0.11573004722595215,
      "step": 57784
    },
    {
      "epoch": 0.000352685546875,
      "step": 57784,
      "training_step_time": 0.45600247383117676
    },
    {
      "epoch": 0.000352691650390625,
      "model_forward_time": 0.1152031421661377,
      "step": 57785
    },
    {
      "epoch": 0.000352691650390625,
      "step": 57785,
      "training_step_time": 0.4240100383758545
    },
    {
      "epoch": 0.00035269775390625,
      "model_forward_time": 0.11455368995666504,
      "step": 57786
    },
    {
      "epoch": 0.00035269775390625,
      "step": 57786,
      "training_step_time": 0.38445067405700684
    },
    {
      "epoch": 0.000352703857421875,
      "model_forward_time": 0.1156153678894043,
      "step": 57787
    },
    {
      "epoch": 0.000352703857421875,
      "step": 57787,
      "training_step_time": 0.4141061305999756
    },
    {
      "epoch": 0.0003527099609375,
      "model_forward_time": 0.11463618278503418,
      "step": 57788
    },
    {
      "epoch": 0.0003527099609375,
      "step": 57788,
      "training_step_time": 0.40024328231811523
    },
    {
      "epoch": 0.000352716064453125,
      "model_forward_time": 0.11445856094360352,
      "step": 57789
    },
    {
      "epoch": 0.000352716064453125,
      "step": 57789,
      "training_step_time": 0.39292454719543457
    },
    {
      "epoch": 0.00035272216796875,
      "grad_norm": 0.07269027084112167,
      "learning_rate": 3.7045679829870175e-07,
      "loss": 0.036,
      "step": 57790
    },
    {
      "epoch": 0.00035272216796875,
      "model_forward_time": 0.1148681640625,
      "step": 57790
    },
    {
      "epoch": 0.00035272216796875,
      "step": 57790,
      "training_step_time": 0.3952014446258545
    },
    {
      "epoch": 0.000352728271484375,
      "model_forward_time": 0.11521792411804199,
      "step": 57791
    },
    {
      "epoch": 0.000352728271484375,
      "step": 57791,
      "training_step_time": 0.3978447914123535
    },
    {
      "epoch": 0.000352734375,
      "model_forward_time": 0.11529326438903809,
      "step": 57792
    },
    {
      "epoch": 0.000352734375,
      "step": 57792,
      "training_step_time": 0.48169565200805664
    },
    {
      "epoch": 0.000352740478515625,
      "model_forward_time": 0.11513805389404297,
      "step": 57793
    },
    {
      "epoch": 0.000352740478515625,
      "step": 57793,
      "training_step_time": 0.37686872482299805
    },
    {
      "epoch": 0.00035274658203125,
      "model_forward_time": 0.11537694931030273,
      "step": 57794
    },
    {
      "epoch": 0.00035274658203125,
      "step": 57794,
      "training_step_time": 0.40077805519104004
    },
    {
      "epoch": 0.000352752685546875,
      "model_forward_time": 0.1146845817565918,
      "step": 57795
    },
    {
      "epoch": 0.000352752685546875,
      "step": 57795,
      "training_step_time": 0.3834962844848633
    },
    {
      "epoch": 0.0003527587890625,
      "model_forward_time": 0.11511969566345215,
      "step": 57796
    },
    {
      "epoch": 0.0003527587890625,
      "step": 57796,
      "training_step_time": 0.49260592460632324
    },
    {
      "epoch": 0.000352764892578125,
      "model_forward_time": 0.11491656303405762,
      "step": 57797
    },
    {
      "epoch": 0.000352764892578125,
      "step": 57797,
      "training_step_time": 0.3677082061767578
    },
    {
      "epoch": 0.00035277099609375,
      "model_forward_time": 0.11490821838378906,
      "step": 57798
    },
    {
      "epoch": 0.00035277099609375,
      "step": 57798,
      "training_step_time": 0.52712082862854
    },
    {
      "epoch": 0.000352777099609375,
      "model_forward_time": 0.11461400985717773,
      "step": 57799
    },
    {
      "epoch": 0.000352777099609375,
      "step": 57799,
      "training_step_time": 0.38332581520080566
    },
    {
      "epoch": 0.000352783203125,
      "grad_norm": 0.09306327998638153,
      "learning_rate": 3.6711593239417973e-07,
      "loss": 0.0393,
      "step": 57800
    },
    {
      "epoch": 0.000352783203125,
      "model_forward_time": 0.11475944519042969,
      "step": 57800
    },
    {
      "epoch": 0.000352783203125,
      "step": 57800,
      "training_step_time": 0.4055490493774414
    },
    {
      "epoch": 0.000352789306640625,
      "model_forward_time": 0.11491870880126953,
      "step": 57801
    },
    {
      "epoch": 0.000352789306640625,
      "step": 57801,
      "training_step_time": 0.41898512840270996
    },
    {
      "epoch": 0.00035279541015625,
      "model_forward_time": 0.11484026908874512,
      "step": 57802
    },
    {
      "epoch": 0.00035279541015625,
      "step": 57802,
      "training_step_time": 0.4317507743835449
    },
    {
      "epoch": 0.000352801513671875,
      "model_forward_time": 0.11451244354248047,
      "step": 57803
    },
    {
      "epoch": 0.000352801513671875,
      "step": 57803,
      "training_step_time": 0.3911926746368408
    },
    {
      "epoch": 0.0003528076171875,
      "model_forward_time": 0.11520528793334961,
      "step": 57804
    },
    {
      "epoch": 0.0003528076171875,
      "step": 57804,
      "training_step_time": 0.5996901988983154
    },
    {
      "epoch": 0.000352813720703125,
      "model_forward_time": 0.11530256271362305,
      "step": 57805
    },
    {
      "epoch": 0.000352813720703125,
      "step": 57805,
      "training_step_time": 0.3835418224334717
    },
    {
      "epoch": 0.00035281982421875,
      "model_forward_time": 0.114410400390625,
      "step": 57806
    },
    {
      "epoch": 0.00035281982421875,
      "step": 57806,
      "training_step_time": 0.4080939292907715
    },
    {
      "epoch": 0.000352825927734375,
      "model_forward_time": 0.115081787109375,
      "step": 57807
    },
    {
      "epoch": 0.000352825927734375,
      "step": 57807,
      "training_step_time": 0.38871002197265625
    },
    {
      "epoch": 0.00035283203125,
      "model_forward_time": 0.11477875709533691,
      "step": 57808
    },
    {
      "epoch": 0.00035283203125,
      "step": 57808,
      "training_step_time": 0.3841218948364258
    },
    {
      "epoch": 0.000352838134765625,
      "model_forward_time": 0.11504697799682617,
      "step": 57809
    },
    {
      "epoch": 0.000352838134765625,
      "step": 57809,
      "training_step_time": 0.3958308696746826
    },
    {
      "epoch": 0.00035284423828125,
      "grad_norm": 0.08750642836093903,
      "learning_rate": 3.637901436493507e-07,
      "loss": 0.0374,
      "step": 57810
    },
    {
      "epoch": 0.00035284423828125,
      "model_forward_time": 0.11535191535949707,
      "step": 57810
    },
    {
      "epoch": 0.00035284423828125,
      "step": 57810,
      "training_step_time": 0.5897212028503418
    },
    {
      "epoch": 0.000352850341796875,
      "model_forward_time": 0.11559319496154785,
      "step": 57811
    },
    {
      "epoch": 0.000352850341796875,
      "step": 57811,
      "training_step_time": 0.43346166610717773
    },
    {
      "epoch": 0.0003528564453125,
      "model_forward_time": 0.11480045318603516,
      "step": 57812
    },
    {
      "epoch": 0.0003528564453125,
      "step": 57812,
      "training_step_time": 0.4231424331665039
    },
    {
      "epoch": 0.000352862548828125,
      "model_forward_time": 0.1157991886138916,
      "step": 57813
    },
    {
      "epoch": 0.000352862548828125,
      "step": 57813,
      "training_step_time": 0.4815409183502197
    },
    {
      "epoch": 0.00035286865234375,
      "model_forward_time": 0.11458873748779297,
      "step": 57814
    },
    {
      "epoch": 0.00035286865234375,
      "step": 57814,
      "training_step_time": 0.39732956886291504
    },
    {
      "epoch": 0.000352874755859375,
      "model_forward_time": 0.11435389518737793,
      "step": 57815
    },
    {
      "epoch": 0.000352874755859375,
      "step": 57815,
      "training_step_time": 0.40847039222717285
    },
    {
      "epoch": 0.000352880859375,
      "model_forward_time": 0.11513710021972656,
      "step": 57816
    },
    {
      "epoch": 0.000352880859375,
      "step": 57816,
      "training_step_time": 0.42694807052612305
    },
    {
      "epoch": 0.000352886962890625,
      "model_forward_time": 0.1151270866394043,
      "step": 57817
    },
    {
      "epoch": 0.000352886962890625,
      "step": 57817,
      "training_step_time": 0.3801748752593994
    },
    {
      "epoch": 0.00035289306640625,
      "model_forward_time": 0.11503720283508301,
      "step": 57818
    },
    {
      "epoch": 0.00035289306640625,
      "step": 57818,
      "training_step_time": 0.395111083984375
    },
    {
      "epoch": 0.000352899169921875,
      "model_forward_time": 0.11549186706542969,
      "step": 57819
    },
    {
      "epoch": 0.000352899169921875,
      "step": 57819,
      "training_step_time": 0.3810112476348877
    },
    {
      "epoch": 0.0003529052734375,
      "grad_norm": 0.09974828362464905,
      "learning_rate": 3.604794330745176e-07,
      "loss": 0.0393,
      "step": 57820
    },
    {
      "epoch": 0.0003529052734375,
      "model_forward_time": 0.11520528793334961,
      "step": 57820
    },
    {
      "epoch": 0.0003529052734375,
      "step": 57820,
      "training_step_time": 0.38599610328674316
    },
    {
      "epoch": 0.000352911376953125,
      "model_forward_time": 0.11534833908081055,
      "step": 57821
    },
    {
      "epoch": 0.000352911376953125,
      "step": 57821,
      "training_step_time": 0.39455747604370117
    },
    {
      "epoch": 0.00035291748046875,
      "model_forward_time": 0.11498689651489258,
      "step": 57822
    },
    {
      "epoch": 0.00035291748046875,
      "step": 57822,
      "training_step_time": 0.7112278938293457
    },
    {
      "epoch": 0.000352923583984375,
      "model_forward_time": 0.11422204971313477,
      "step": 57823
    },
    {
      "epoch": 0.000352923583984375,
      "step": 57823,
      "training_step_time": 0.37431883811950684
    },
    {
      "epoch": 0.0003529296875,
      "model_forward_time": 0.11497020721435547,
      "step": 57824
    },
    {
      "epoch": 0.0003529296875,
      "step": 57824,
      "training_step_time": 0.39406871795654297
    },
    {
      "epoch": 0.000352935791015625,
      "model_forward_time": 0.11466860771179199,
      "step": 57825
    },
    {
      "epoch": 0.000352935791015625,
      "step": 57825,
      "training_step_time": 0.4189012050628662
    },
    {
      "epoch": 0.00035294189453125,
      "model_forward_time": 0.11481308937072754,
      "step": 57826
    },
    {
      "epoch": 0.00035294189453125,
      "step": 57826,
      "training_step_time": 0.49864959716796875
    },
    {
      "epoch": 0.000352947998046875,
      "model_forward_time": 0.11525702476501465,
      "step": 57827
    },
    {
      "epoch": 0.000352947998046875,
      "step": 57827,
      "training_step_time": 0.4327383041381836
    },
    {
      "epoch": 0.0003529541015625,
      "model_forward_time": 0.11635327339172363,
      "step": 57828
    },
    {
      "epoch": 0.0003529541015625,
      "step": 57828,
      "training_step_time": 0.4406559467315674
    },
    {
      "epoch": 0.000352960205078125,
      "model_forward_time": 0.11496901512145996,
      "step": 57829
    },
    {
      "epoch": 0.000352960205078125,
      "step": 57829,
      "training_step_time": 0.3922553062438965
    },
    {
      "epoch": 0.00035296630859375,
      "grad_norm": 0.09692786633968353,
      "learning_rate": 3.571838016753759e-07,
      "loss": 0.0353,
      "step": 57830
    },
    {
      "epoch": 0.00035296630859375,
      "model_forward_time": 0.11529827117919922,
      "step": 57830
    },
    {
      "epoch": 0.00035296630859375,
      "step": 57830,
      "training_step_time": 0.40024828910827637
    },
    {
      "epoch": 0.000352972412109375,
      "model_forward_time": 0.11533975601196289,
      "step": 57831
    },
    {
      "epoch": 0.000352972412109375,
      "step": 57831,
      "training_step_time": 0.3834712505340576
    },
    {
      "epoch": 0.000352978515625,
      "model_forward_time": 0.11489439010620117,
      "step": 57832
    },
    {
      "epoch": 0.000352978515625,
      "step": 57832,
      "training_step_time": 0.3892052173614502
    },
    {
      "epoch": 0.000352984619140625,
      "model_forward_time": 0.11479711532592773,
      "step": 57833
    },
    {
      "epoch": 0.000352984619140625,
      "step": 57833,
      "training_step_time": 0.40575742721557617
    },
    {
      "epoch": 0.00035299072265625,
      "model_forward_time": 0.11548089981079102,
      "step": 57834
    },
    {
      "epoch": 0.00035299072265625,
      "step": 57834,
      "training_step_time": 0.596604585647583
    },
    {
      "epoch": 0.000352996826171875,
      "model_forward_time": 0.1147916316986084,
      "step": 57835
    },
    {
      "epoch": 0.000352996826171875,
      "step": 57835,
      "training_step_time": 0.3955509662628174
    },
    {
      "epoch": 0.0003530029296875,
      "model_forward_time": 0.11438560485839844,
      "step": 57836
    },
    {
      "epoch": 0.0003530029296875,
      "step": 57836,
      "training_step_time": 0.40195536613464355
    },
    {
      "epoch": 0.000353009033203125,
      "model_forward_time": 0.11450815200805664,
      "step": 57837
    },
    {
      "epoch": 0.000353009033203125,
      "step": 57837,
      "training_step_time": 0.3967399597167969
    },
    {
      "epoch": 0.00035301513671875,
      "model_forward_time": 0.11470460891723633,
      "step": 57838
    },
    {
      "epoch": 0.00035301513671875,
      "step": 57838,
      "training_step_time": 0.43007540702819824
    },
    {
      "epoch": 0.000353021240234375,
      "model_forward_time": 0.11512374877929688,
      "step": 57839
    },
    {
      "epoch": 0.000353021240234375,
      "step": 57839,
      "training_step_time": 0.41020917892456055
    },
    {
      "epoch": 0.00035302734375,
      "grad_norm": 0.09480278193950653,
      "learning_rate": 3.5390325045304706e-07,
      "loss": 0.037,
      "step": 57840
    },
    {
      "epoch": 0.00035302734375,
      "model_forward_time": 0.11467981338500977,
      "step": 57840
    },
    {
      "epoch": 0.00035302734375,
      "step": 57840,
      "training_step_time": 0.4984855651855469
    },
    {
      "epoch": 0.000353033447265625,
      "model_forward_time": 0.1148073673248291,
      "step": 57841
    },
    {
      "epoch": 0.000353033447265625,
      "step": 57841,
      "training_step_time": 0.437330961227417
    },
    {
      "epoch": 0.00035303955078125,
      "model_forward_time": 0.11467194557189941,
      "step": 57842
    },
    {
      "epoch": 0.00035303955078125,
      "step": 57842,
      "training_step_time": 0.40277624130249023
    },
    {
      "epoch": 0.000353045654296875,
      "model_forward_time": 0.11466836929321289,
      "step": 57843
    },
    {
      "epoch": 0.000353045654296875,
      "step": 57843,
      "training_step_time": 0.38323473930358887
    },
    {
      "epoch": 0.0003530517578125,
      "model_forward_time": 0.1150968074798584,
      "step": 57844
    },
    {
      "epoch": 0.0003530517578125,
      "step": 57844,
      "training_step_time": 0.37928342819213867
    },
    {
      "epoch": 0.000353057861328125,
      "model_forward_time": 0.11506414413452148,
      "step": 57845
    },
    {
      "epoch": 0.000353057861328125,
      "step": 57845,
      "training_step_time": 0.38428783416748047
    },
    {
      "epoch": 0.00035306396484375,
      "model_forward_time": 0.11632752418518066,
      "step": 57846
    },
    {
      "epoch": 0.00035306396484375,
      "step": 57846,
      "training_step_time": 0.48911023139953613
    },
    {
      "epoch": 0.000353070068359375,
      "model_forward_time": 0.1150970458984375,
      "step": 57847
    },
    {
      "epoch": 0.000353070068359375,
      "step": 57847,
      "training_step_time": 0.3877289295196533
    },
    {
      "epoch": 0.000353076171875,
      "model_forward_time": 0.11449170112609863,
      "step": 57848
    },
    {
      "epoch": 0.000353076171875,
      "step": 57848,
      "training_step_time": 0.39125609397888184
    },
    {
      "epoch": 0.000353082275390625,
      "model_forward_time": 0.11594319343566895,
      "step": 57849
    },
    {
      "epoch": 0.000353082275390625,
      "step": 57849,
      "training_step_time": 0.3947603702545166
    },
    {
      "epoch": 0.00035308837890625,
      "grad_norm": 0.07968000322580338,
      "learning_rate": 3.5063778040408947e-07,
      "loss": 0.0361,
      "step": 57850
    },
    {
      "epoch": 0.00035308837890625,
      "model_forward_time": 0.11528229713439941,
      "step": 57850
    },
    {
      "epoch": 0.00035308837890625,
      "step": 57850,
      "training_step_time": 0.4033515453338623
    },
    {
      "epoch": 0.000353094482421875,
      "model_forward_time": 0.11497211456298828,
      "step": 57851
    },
    {
      "epoch": 0.000353094482421875,
      "step": 57851,
      "training_step_time": 0.4018867015838623
    },
    {
      "epoch": 0.0003531005859375,
      "model_forward_time": 0.11556506156921387,
      "step": 57852
    },
    {
      "epoch": 0.0003531005859375,
      "step": 57852,
      "training_step_time": 0.6359922885894775
    },
    {
      "epoch": 0.000353106689453125,
      "model_forward_time": 0.1152801513671875,
      "step": 57853
    },
    {
      "epoch": 0.000353106689453125,
      "step": 57853,
      "training_step_time": 0.4106101989746094
    },
    {
      "epoch": 0.00035311279296875,
      "model_forward_time": 0.1148521900177002,
      "step": 57854
    },
    {
      "epoch": 0.00035311279296875,
      "step": 57854,
      "training_step_time": 0.4391000270843506
    },
    {
      "epoch": 0.000353118896484375,
      "model_forward_time": 0.11479759216308594,
      "step": 57855
    },
    {
      "epoch": 0.000353118896484375,
      "step": 57855,
      "training_step_time": 0.418332576751709
    },
    {
      "epoch": 0.000353125,
      "model_forward_time": 0.11450815200805664,
      "step": 57856
    },
    {
      "epoch": 0.000353125,
      "step": 57856,
      "training_step_time": 0.48625922203063965
    },
    {
      "epoch": 0.000353131103515625,
      "model_forward_time": 0.11458396911621094,
      "step": 57857
    },
    {
      "epoch": 0.000353131103515625,
      "step": 57857,
      "training_step_time": 0.39075207710266113
    },
    {
      "epoch": 0.00035313720703125,
      "model_forward_time": 0.11517548561096191,
      "step": 57858
    },
    {
      "epoch": 0.00035313720703125,
      "step": 57858,
      "training_step_time": 0.39290881156921387
    },
    {
      "epoch": 0.000353143310546875,
      "model_forward_time": 0.11542987823486328,
      "step": 57859
    },
    {
      "epoch": 0.000353143310546875,
      "step": 57859,
      "training_step_time": 0.39252758026123047
    },
    {
      "epoch": 0.0003531494140625,
      "grad_norm": 0.08841383457183838,
      "learning_rate": 3.4738739252045405e-07,
      "loss": 0.0388,
      "step": 57860
    },
    {
      "epoch": 0.0003531494140625,
      "model_forward_time": 0.11507749557495117,
      "step": 57860
    },
    {
      "epoch": 0.0003531494140625,
      "step": 57860,
      "training_step_time": 0.402909517288208
    },
    {
      "epoch": 0.000353155517578125,
      "model_forward_time": 0.11508560180664062,
      "step": 57861
    },
    {
      "epoch": 0.000353155517578125,
      "step": 57861,
      "training_step_time": 0.38814544677734375
    },
    {
      "epoch": 0.00035316162109375,
      "model_forward_time": 0.11535763740539551,
      "step": 57862
    },
    {
      "epoch": 0.00035316162109375,
      "step": 57862,
      "training_step_time": 0.399233341217041
    },
    {
      "epoch": 0.000353167724609375,
      "model_forward_time": 0.11932659149169922,
      "step": 57863
    },
    {
      "epoch": 0.000353167724609375,
      "step": 57863,
      "training_step_time": 0.4010615348815918
    },
    {
      "epoch": 0.000353173828125,
      "model_forward_time": 0.11639595031738281,
      "step": 57864
    },
    {
      "epoch": 0.000353173828125,
      "step": 57864,
      "training_step_time": 0.5064499378204346
    },
    {
      "epoch": 0.000353179931640625,
      "model_forward_time": 0.11574769020080566,
      "step": 57865
    },
    {
      "epoch": 0.000353179931640625,
      "step": 57865,
      "training_step_time": 0.40218424797058105
    },
    {
      "epoch": 0.00035318603515625,
      "model_forward_time": 0.11542606353759766,
      "step": 57866
    },
    {
      "epoch": 0.00035318603515625,
      "step": 57866,
      "training_step_time": 0.40561914443969727
    },
    {
      "epoch": 0.000353192138671875,
      "model_forward_time": 0.11538147926330566,
      "step": 57867
    },
    {
      "epoch": 0.000353192138671875,
      "step": 57867,
      "training_step_time": 0.410219669342041
    },
    {
      "epoch": 0.0003531982421875,
      "model_forward_time": 0.1153249740600586,
      "step": 57868
    },
    {
      "epoch": 0.0003531982421875,
      "step": 57868,
      "training_step_time": 0.4325098991394043
    },
    {
      "epoch": 0.000353204345703125,
      "model_forward_time": 0.11547422409057617,
      "step": 57869
    },
    {
      "epoch": 0.000353204345703125,
      "step": 57869,
      "training_step_time": 0.48371052742004395
    },
    {
      "epoch": 0.00035321044921875,
      "grad_norm": 0.10686192661523819,
      "learning_rate": 3.441520877895288e-07,
      "loss": 0.037,
      "step": 57870
    },
    {
      "epoch": 0.00035321044921875,
      "model_forward_time": 0.1157987117767334,
      "step": 57870
    },
    {
      "epoch": 0.00035321044921875,
      "step": 57870,
      "training_step_time": 0.5024917125701904
    },
    {
      "epoch": 0.000353216552734375,
      "model_forward_time": 0.11499738693237305,
      "step": 57871
    },
    {
      "epoch": 0.000353216552734375,
      "step": 57871,
      "training_step_time": 0.39649391174316406
    },
    {
      "epoch": 0.00035322265625,
      "model_forward_time": 0.11559391021728516,
      "step": 57872
    },
    {
      "epoch": 0.00035322265625,
      "step": 57872,
      "training_step_time": 0.391803503036499
    },
    {
      "epoch": 0.000353228759765625,
      "model_forward_time": 0.11586999893188477,
      "step": 57873
    },
    {
      "epoch": 0.000353228759765625,
      "step": 57873,
      "training_step_time": 0.3899509906768799
    },
    {
      "epoch": 0.00035323486328125,
      "model_forward_time": 0.11539006233215332,
      "step": 57874
    },
    {
      "epoch": 0.00035323486328125,
      "step": 57874,
      "training_step_time": 0.3925635814666748
    },
    {
      "epoch": 0.000353240966796875,
      "model_forward_time": 0.1159827709197998,
      "step": 57875
    },
    {
      "epoch": 0.000353240966796875,
      "step": 57875,
      "training_step_time": 0.3985297679901123
    },
    {
      "epoch": 0.0003532470703125,
      "model_forward_time": 0.11544632911682129,
      "step": 57876
    },
    {
      "epoch": 0.0003532470703125,
      "step": 57876,
      "training_step_time": 0.6335248947143555
    },
    {
      "epoch": 0.000353253173828125,
      "model_forward_time": 0.11530756950378418,
      "step": 57877
    },
    {
      "epoch": 0.000353253173828125,
      "step": 57877,
      "training_step_time": 0.8045268058776855
    },
    {
      "epoch": 0.00035325927734375,
      "model_forward_time": 0.11485791206359863,
      "step": 57878
    },
    {
      "epoch": 0.00035325927734375,
      "step": 57878,
      "training_step_time": 0.3752570152282715
    },
    {
      "epoch": 0.000353265380859375,
      "model_forward_time": 0.11455011367797852,
      "step": 57879
    },
    {
      "epoch": 0.000353265380859375,
      "step": 57879,
      "training_step_time": 0.38256072998046875
    },
    {
      "epoch": 0.000353271484375,
      "grad_norm": 0.1101677417755127,
      "learning_rate": 3.4093186719411085e-07,
      "loss": 0.0357,
      "step": 57880
    },
    {
      "epoch": 0.000353271484375,
      "model_forward_time": 0.11435413360595703,
      "step": 57880
    },
    {
      "epoch": 0.000353271484375,
      "step": 57880,
      "training_step_time": 0.4339008331298828
    },
    {
      "epoch": 0.000353277587890625,
      "model_forward_time": 0.11482620239257812,
      "step": 57881
    },
    {
      "epoch": 0.000353277587890625,
      "step": 57881,
      "training_step_time": 0.5119650363922119
    },
    {
      "epoch": 0.00035328369140625,
      "model_forward_time": 0.11445116996765137,
      "step": 57882
    },
    {
      "epoch": 0.00035328369140625,
      "step": 57882,
      "training_step_time": 0.36432313919067383
    },
    {
      "epoch": 0.000353289794921875,
      "model_forward_time": 0.11531329154968262,
      "step": 57883
    },
    {
      "epoch": 0.000353289794921875,
      "step": 57883,
      "training_step_time": 0.6409626007080078
    },
    {
      "epoch": 0.0003532958984375,
      "model_forward_time": 0.11576366424560547,
      "step": 57884
    },
    {
      "epoch": 0.0003532958984375,
      "step": 57884,
      "training_step_time": 0.453967809677124
    },
    {
      "epoch": 0.000353302001953125,
      "model_forward_time": 0.11447334289550781,
      "step": 57885
    },
    {
      "epoch": 0.000353302001953125,
      "step": 57885,
      "training_step_time": 0.37797069549560547
    },
    {
      "epoch": 0.00035330810546875,
      "model_forward_time": 0.1144709587097168,
      "step": 57886
    },
    {
      "epoch": 0.00035330810546875,
      "step": 57886,
      "training_step_time": 0.3726012706756592
    },
    {
      "epoch": 0.000353314208984375,
      "model_forward_time": 0.11487746238708496,
      "step": 57887
    },
    {
      "epoch": 0.000353314208984375,
      "step": 57887,
      "training_step_time": 0.39061760902404785
    },
    {
      "epoch": 0.0003533203125,
      "model_forward_time": 0.11536550521850586,
      "step": 57888
    },
    {
      "epoch": 0.0003533203125,
      "step": 57888,
      "training_step_time": 0.3943359851837158
    },
    {
      "epoch": 0.000353326416015625,
      "model_forward_time": 0.11547732353210449,
      "step": 57889
    },
    {
      "epoch": 0.000353326416015625,
      "step": 57889,
      "training_step_time": 1.0263559818267822
    },
    {
      "epoch": 0.00035333251953125,
      "grad_norm": 0.09985626488924026,
      "learning_rate": 3.377267317124233e-07,
      "loss": 0.0388,
      "step": 57890
    },
    {
      "epoch": 0.00035333251953125,
      "model_forward_time": 0.11434149742126465,
      "step": 57890
    },
    {
      "epoch": 0.00035333251953125,
      "step": 57890,
      "training_step_time": 0.3790476322174072
    },
    {
      "epoch": 0.000353338623046875,
      "model_forward_time": 0.11415553092956543,
      "step": 57891
    },
    {
      "epoch": 0.000353338623046875,
      "step": 57891,
      "training_step_time": 0.37447428703308105
    },
    {
      "epoch": 0.0003533447265625,
      "model_forward_time": 0.11451482772827148,
      "step": 57892
    },
    {
      "epoch": 0.0003533447265625,
      "step": 57892,
      "training_step_time": 0.3773665428161621
    },
    {
      "epoch": 0.000353350830078125,
      "model_forward_time": 0.11395573616027832,
      "step": 57893
    },
    {
      "epoch": 0.000353350830078125,
      "step": 57893,
      "training_step_time": 0.4443070888519287
    },
    {
      "epoch": 0.00035335693359375,
      "model_forward_time": 0.1138155460357666,
      "step": 57894
    },
    {
      "epoch": 0.00035335693359375,
      "step": 57894,
      "training_step_time": 0.4449021816253662
    },
    {
      "epoch": 0.000353363037109375,
      "model_forward_time": 0.11539697647094727,
      "step": 57895
    },
    {
      "epoch": 0.000353363037109375,
      "step": 57895,
      "training_step_time": 0.36658549308776855
    },
    {
      "epoch": 0.000353369140625,
      "model_forward_time": 0.11534595489501953,
      "step": 57896
    },
    {
      "epoch": 0.000353369140625,
      "step": 57896,
      "training_step_time": 0.4683098793029785
    },
    {
      "epoch": 0.000353375244140625,
      "model_forward_time": 0.11502432823181152,
      "step": 57897
    },
    {
      "epoch": 0.000353375244140625,
      "step": 57897,
      "training_step_time": 0.48552823066711426
    },
    {
      "epoch": 0.00035338134765625,
      "model_forward_time": 0.1147150993347168,
      "step": 57898
    },
    {
      "epoch": 0.00035338134765625,
      "step": 57898,
      "training_step_time": 0.38514232635498047
    },
    {
      "epoch": 0.000353387451171875,
      "model_forward_time": 0.11484718322753906,
      "step": 57899
    },
    {
      "epoch": 0.000353387451171875,
      "step": 57899,
      "training_step_time": 0.39792418479919434
    },
    {
      "epoch": 0.0003533935546875,
      "grad_norm": 0.07197228074073792,
      "learning_rate": 3.3453668231809286e-07,
      "loss": 0.0407,
      "step": 57900
    },
    {
      "epoch": 0.0003533935546875,
      "model_forward_time": 0.11526250839233398,
      "step": 57900
    },
    {
      "epoch": 0.0003533935546875,
      "step": 57900,
      "training_step_time": 0.4070298671722412
    },
    {
      "epoch": 0.000353399658203125,
      "model_forward_time": 0.11499714851379395,
      "step": 57901
    },
    {
      "epoch": 0.000353399658203125,
      "step": 57901,
      "training_step_time": 0.8457391262054443
    },
    {
      "epoch": 0.00035340576171875,
      "model_forward_time": 0.11454486846923828,
      "step": 57902
    },
    {
      "epoch": 0.00035340576171875,
      "step": 57902,
      "training_step_time": 0.3793976306915283
    },
    {
      "epoch": 0.000353411865234375,
      "model_forward_time": 0.11437416076660156,
      "step": 57903
    },
    {
      "epoch": 0.000353411865234375,
      "step": 57903,
      "training_step_time": 0.38660287857055664
    },
    {
      "epoch": 0.00035341796875,
      "model_forward_time": 0.11468315124511719,
      "step": 57904
    },
    {
      "epoch": 0.00035341796875,
      "step": 57904,
      "training_step_time": 0.3867788314819336
    },
    {
      "epoch": 0.000353424072265625,
      "model_forward_time": 0.11420464515686035,
      "step": 57905
    },
    {
      "epoch": 0.000353424072265625,
      "step": 57905,
      "training_step_time": 0.3886592388153076
    },
    {
      "epoch": 0.00035343017578125,
      "model_forward_time": 0.11476659774780273,
      "step": 57906
    },
    {
      "epoch": 0.00035343017578125,
      "step": 57906,
      "training_step_time": 0.39417147636413574
    },
    {
      "epoch": 0.000353436279296875,
      "model_forward_time": 0.11532974243164062,
      "step": 57907
    },
    {
      "epoch": 0.000353436279296875,
      "step": 57907,
      "training_step_time": 0.8851392269134521
    },
    {
      "epoch": 0.0003534423828125,
      "model_forward_time": 0.11513996124267578,
      "step": 57908
    },
    {
      "epoch": 0.0003534423828125,
      "step": 57908,
      "training_step_time": 0.3857080936431885
    },
    {
      "epoch": 0.000353448486328125,
      "model_forward_time": 0.11467814445495605,
      "step": 57909
    },
    {
      "epoch": 0.000353448486328125,
      "step": 57909,
      "training_step_time": 0.5036933422088623
    },
    {
      "epoch": 0.00035345458984375,
      "grad_norm": 0.07425064593553543,
      "learning_rate": 3.3136171998017775e-07,
      "loss": 0.0382,
      "step": 57910
    },
    {
      "epoch": 0.00035345458984375,
      "model_forward_time": 0.11400580406188965,
      "step": 57910
    },
    {
      "epoch": 0.00035345458984375,
      "step": 57910,
      "training_step_time": 0.4853644371032715
    },
    {
      "epoch": 0.000353460693359375,
      "model_forward_time": 0.11394715309143066,
      "step": 57911
    },
    {
      "epoch": 0.000353460693359375,
      "step": 57911,
      "training_step_time": 0.3872413635253906
    },
    {
      "epoch": 0.000353466796875,
      "model_forward_time": 0.11444282531738281,
      "step": 57912
    },
    {
      "epoch": 0.000353466796875,
      "step": 57912,
      "training_step_time": 0.38455820083618164
    },
    {
      "epoch": 0.000353472900390625,
      "model_forward_time": 0.11497855186462402,
      "step": 57913
    },
    {
      "epoch": 0.000353472900390625,
      "step": 57913,
      "training_step_time": 0.4387857913970947
    },
    {
      "epoch": 0.00035347900390625,
      "model_forward_time": 0.11541223526000977,
      "step": 57914
    },
    {
      "epoch": 0.00035347900390625,
      "step": 57914,
      "training_step_time": 0.38857483863830566
    },
    {
      "epoch": 0.000353485107421875,
      "model_forward_time": 0.11471748352050781,
      "step": 57915
    },
    {
      "epoch": 0.000353485107421875,
      "step": 57915,
      "training_step_time": 0.38518595695495605
    },
    {
      "epoch": 0.0003534912109375,
      "model_forward_time": 0.11449480056762695,
      "step": 57916
    },
    {
      "epoch": 0.0003534912109375,
      "step": 57916,
      "training_step_time": 0.40402936935424805
    },
    {
      "epoch": 0.000353497314453125,
      "model_forward_time": 0.11531734466552734,
      "step": 57917
    },
    {
      "epoch": 0.000353497314453125,
      "step": 57917,
      "training_step_time": 0.38393306732177734
    },
    {
      "epoch": 0.00035350341796875,
      "model_forward_time": 0.11519598960876465,
      "step": 57918
    },
    {
      "epoch": 0.00035350341796875,
      "step": 57918,
      "training_step_time": 0.3889329433441162
    },
    {
      "epoch": 0.000353509521484375,
      "model_forward_time": 0.11504197120666504,
      "step": 57919
    },
    {
      "epoch": 0.000353509521484375,
      "step": 57919,
      "training_step_time": 1.073519229888916
    },
    {
      "epoch": 0.000353515625,
      "grad_norm": 0.08790221065282822,
      "learning_rate": 3.2820184566315084e-07,
      "loss": 0.0374,
      "step": 57920
    },
    {
      "epoch": 0.000353515625,
      "model_forward_time": 0.11443901062011719,
      "step": 57920
    },
    {
      "epoch": 0.000353515625,
      "step": 57920,
      "training_step_time": 0.4957849979400635
    },
    {
      "epoch": 0.000353521728515625,
      "model_forward_time": 0.11452937126159668,
      "step": 57921
    },
    {
      "epoch": 0.000353521728515625,
      "step": 57921,
      "training_step_time": 0.42246103286743164
    },
    {
      "epoch": 0.00035352783203125,
      "model_forward_time": 0.11449599266052246,
      "step": 57922
    },
    {
      "epoch": 0.00035352783203125,
      "step": 57922,
      "training_step_time": 0.4996821880340576
    },
    {
      "epoch": 0.000353533935546875,
      "model_forward_time": 0.11419510841369629,
      "step": 57923
    },
    {
      "epoch": 0.000353533935546875,
      "step": 57923,
      "training_step_time": 0.4704413414001465
    },
    {
      "epoch": 0.0003535400390625,
      "model_forward_time": 0.11481404304504395,
      "step": 57924
    },
    {
      "epoch": 0.0003535400390625,
      "step": 57924,
      "training_step_time": 0.3880598545074463
    },
    {
      "epoch": 0.000353546142578125,
      "model_forward_time": 0.11463093757629395,
      "step": 57925
    },
    {
      "epoch": 0.000353546142578125,
      "step": 57925,
      "training_step_time": 0.4177703857421875
    },
    {
      "epoch": 0.00035355224609375,
      "model_forward_time": 0.11519956588745117,
      "step": 57926
    },
    {
      "epoch": 0.00035355224609375,
      "step": 57926,
      "training_step_time": 0.3810555934906006
    },
    {
      "epoch": 0.000353558349609375,
      "model_forward_time": 0.11505842208862305,
      "step": 57927
    },
    {
      "epoch": 0.000353558349609375,
      "step": 57927,
      "training_step_time": 0.3876008987426758
    },
    {
      "epoch": 0.000353564453125,
      "model_forward_time": 0.11459827423095703,
      "step": 57928
    },
    {
      "epoch": 0.000353564453125,
      "step": 57928,
      "training_step_time": 0.3900794982910156
    },
    {
      "epoch": 0.000353570556640625,
      "model_forward_time": 0.11487126350402832,
      "step": 57929
    },
    {
      "epoch": 0.000353570556640625,
      "step": 57929,
      "training_step_time": 0.39327549934387207
    },
    {
      "epoch": 0.00035357666015625,
      "grad_norm": 0.09674908965826035,
      "learning_rate": 3.250570603268943e-07,
      "loss": 0.0361,
      "step": 57930
    },
    {
      "epoch": 0.00035357666015625,
      "model_forward_time": 0.1157388687133789,
      "step": 57930
    },
    {
      "epoch": 0.00035357666015625,
      "step": 57930,
      "training_step_time": 0.3947911262512207
    },
    {
      "epoch": 0.000353582763671875,
      "model_forward_time": 0.1151285171508789,
      "step": 57931
    },
    {
      "epoch": 0.000353582763671875,
      "step": 57931,
      "training_step_time": 0.6305644512176514
    },
    {
      "epoch": 0.0003535888671875,
      "model_forward_time": 0.11508917808532715,
      "step": 57932
    },
    {
      "epoch": 0.0003535888671875,
      "step": 57932,
      "training_step_time": 0.45960140228271484
    },
    {
      "epoch": 0.000353594970703125,
      "model_forward_time": 0.11541604995727539,
      "step": 57933
    },
    {
      "epoch": 0.000353594970703125,
      "step": 57933,
      "training_step_time": 0.46277594566345215
    },
    {
      "epoch": 0.00035360107421875,
      "model_forward_time": 0.11453962326049805,
      "step": 57934
    },
    {
      "epoch": 0.00035360107421875,
      "step": 57934,
      "training_step_time": 0.4416024684906006
    },
    {
      "epoch": 0.000353607177734375,
      "model_forward_time": 0.11430788040161133,
      "step": 57935
    },
    {
      "epoch": 0.000353607177734375,
      "step": 57935,
      "training_step_time": 0.36576223373413086
    },
    {
      "epoch": 0.00035361328125,
      "model_forward_time": 0.1144411563873291,
      "step": 57936
    },
    {
      "epoch": 0.00035361328125,
      "step": 57936,
      "training_step_time": 0.4382905960083008
    },
    {
      "epoch": 0.000353619384765625,
      "model_forward_time": 0.11454200744628906,
      "step": 57937
    },
    {
      "epoch": 0.000353619384765625,
      "step": 57937,
      "training_step_time": 0.8914167881011963
    },
    {
      "epoch": 0.00035362548828125,
      "model_forward_time": 0.11425518989562988,
      "step": 57938
    },
    {
      "epoch": 0.00035362548828125,
      "step": 57938,
      "training_step_time": 0.38824939727783203
    },
    {
      "epoch": 0.000353631591796875,
      "model_forward_time": 0.1137688159942627,
      "step": 57939
    },
    {
      "epoch": 0.000353631591796875,
      "step": 57939,
      "training_step_time": 0.38265371322631836
    },
    {
      "epoch": 0.0003536376953125,
      "grad_norm": 0.08231036365032196,
      "learning_rate": 3.219273649267163e-07,
      "loss": 0.0367,
      "step": 57940
    },
    {
      "epoch": 0.0003536376953125,
      "model_forward_time": 0.1147608757019043,
      "step": 57940
    },
    {
      "epoch": 0.0003536376953125,
      "step": 57940,
      "training_step_time": 0.39348292350769043
    },
    {
      "epoch": 0.000353643798828125,
      "model_forward_time": 0.11425042152404785,
      "step": 57941
    },
    {
      "epoch": 0.000353643798828125,
      "step": 57941,
      "training_step_time": 0.39731884002685547
    },
    {
      "epoch": 0.00035364990234375,
      "model_forward_time": 0.11494326591491699,
      "step": 57942
    },
    {
      "epoch": 0.00035364990234375,
      "step": 57942,
      "training_step_time": 0.38543152809143066
    },
    {
      "epoch": 0.000353656005859375,
      "model_forward_time": 0.11554837226867676,
      "step": 57943
    },
    {
      "epoch": 0.000353656005859375,
      "step": 57943,
      "training_step_time": 1.0381593704223633
    },
    {
      "epoch": 0.000353662109375,
      "model_forward_time": 0.1144418716430664,
      "step": 57944
    },
    {
      "epoch": 0.000353662109375,
      "step": 57944,
      "training_step_time": 0.38561177253723145
    },
    {
      "epoch": 0.000353668212890625,
      "model_forward_time": 0.11428141593933105,
      "step": 57945
    },
    {
      "epoch": 0.000353668212890625,
      "step": 57945,
      "training_step_time": 0.502861499786377
    },
    {
      "epoch": 0.00035367431640625,
      "model_forward_time": 0.1136171817779541,
      "step": 57946
    },
    {
      "epoch": 0.00035367431640625,
      "step": 57946,
      "training_step_time": 0.4536557197570801
    },
    {
      "epoch": 0.000353680419921875,
      "model_forward_time": 0.11420273780822754,
      "step": 57947
    },
    {
      "epoch": 0.000353680419921875,
      "step": 57947,
      "training_step_time": 0.39066171646118164
    },
    {
      "epoch": 0.0003536865234375,
      "model_forward_time": 0.1148991584777832,
      "step": 57948
    },
    {
      "epoch": 0.0003536865234375,
      "step": 57948,
      "training_step_time": 0.4281594753265381
    },
    {
      "epoch": 0.000353692626953125,
      "model_forward_time": 0.11484503746032715,
      "step": 57949
    },
    {
      "epoch": 0.000353692626953125,
      "step": 57949,
      "training_step_time": 0.6226327419281006
    },
    {
      "epoch": 0.00035369873046875,
      "grad_norm": 0.09428826719522476,
      "learning_rate": 3.188127604133284e-07,
      "loss": 0.0375,
      "step": 57950
    },
    {
      "epoch": 0.00035369873046875,
      "model_forward_time": 0.11458539962768555,
      "step": 57950
    },
    {
      "epoch": 0.00035369873046875,
      "step": 57950,
      "training_step_time": 0.3915824890136719
    },
    {
      "epoch": 0.000353704833984375,
      "model_forward_time": 0.1139061450958252,
      "step": 57951
    },
    {
      "epoch": 0.000353704833984375,
      "step": 57951,
      "training_step_time": 0.4048027992248535
    },
    {
      "epoch": 0.0003537109375,
      "model_forward_time": 0.11470150947570801,
      "step": 57952
    },
    {
      "epoch": 0.0003537109375,
      "step": 57952,
      "training_step_time": 0.3852839469909668
    },
    {
      "epoch": 0.000353717041015625,
      "model_forward_time": 0.11468005180358887,
      "step": 57953
    },
    {
      "epoch": 0.000353717041015625,
      "step": 57953,
      "training_step_time": 0.4055495262145996
    },
    {
      "epoch": 0.00035372314453125,
      "model_forward_time": 0.1148521900177002,
      "step": 57954
    },
    {
      "epoch": 0.00035372314453125,
      "step": 57954,
      "training_step_time": 0.3927333354949951
    },
    {
      "epoch": 0.000353729248046875,
      "model_forward_time": 0.11510753631591797,
      "step": 57955
    },
    {
      "epoch": 0.000353729248046875,
      "step": 57955,
      "training_step_time": 0.8971757888793945
    },
    {
      "epoch": 0.0003537353515625,
      "model_forward_time": 0.11509537696838379,
      "step": 57956
    },
    {
      "epoch": 0.0003537353515625,
      "step": 57956,
      "training_step_time": 0.38161301612854004
    },
    {
      "epoch": 0.000353741455078125,
      "model_forward_time": 0.11464905738830566,
      "step": 57957
    },
    {
      "epoch": 0.000353741455078125,
      "step": 57957,
      "training_step_time": 0.37735486030578613
    },
    {
      "epoch": 0.00035374755859375,
      "model_forward_time": 0.11441946029663086,
      "step": 57958
    },
    {
      "epoch": 0.00035374755859375,
      "step": 57958,
      "training_step_time": 0.49259161949157715
    },
    {
      "epoch": 0.000353753662109375,
      "model_forward_time": 0.114105224609375,
      "step": 57959
    },
    {
      "epoch": 0.000353753662109375,
      "step": 57959,
      "training_step_time": 0.4657933712005615
    },
    {
      "epoch": 0.000353759765625,
      "grad_norm": 0.08682823181152344,
      "learning_rate": 3.157132477328628e-07,
      "loss": 0.0386,
      "step": 57960
    },
    {
      "epoch": 0.000353759765625,
      "model_forward_time": 0.11439275741577148,
      "step": 57960
    },
    {
      "epoch": 0.000353759765625,
      "step": 57960,
      "training_step_time": 0.37786102294921875
    },
    {
      "epoch": 0.000353765869140625,
      "model_forward_time": 0.11486172676086426,
      "step": 57961
    },
    {
      "epoch": 0.000353765869140625,
      "step": 57961,
      "training_step_time": 0.8665256500244141
    },
    {
      "epoch": 0.00035377197265625,
      "model_forward_time": 0.11408805847167969,
      "step": 57962
    },
    {
      "epoch": 0.00035377197265625,
      "step": 57962,
      "training_step_time": 0.38112592697143555
    },
    {
      "epoch": 0.000353778076171875,
      "model_forward_time": 0.11462759971618652,
      "step": 57963
    },
    {
      "epoch": 0.000353778076171875,
      "step": 57963,
      "training_step_time": 0.379976749420166
    },
    {
      "epoch": 0.0003537841796875,
      "model_forward_time": 0.1143488883972168,
      "step": 57964
    },
    {
      "epoch": 0.0003537841796875,
      "step": 57964,
      "training_step_time": 0.3885364532470703
    },
    {
      "epoch": 0.000353790283203125,
      "model_forward_time": 0.11524605751037598,
      "step": 57965
    },
    {
      "epoch": 0.000353790283203125,
      "step": 57965,
      "training_step_time": 0.3932929039001465
    },
    {
      "epoch": 0.00035379638671875,
      "model_forward_time": 0.11456298828125,
      "step": 57966
    },
    {
      "epoch": 0.00035379638671875,
      "step": 57966,
      "training_step_time": 0.39010047912597656
    },
    {
      "epoch": 0.000353802490234375,
      "model_forward_time": 0.11484360694885254,
      "step": 57967
    },
    {
      "epoch": 0.000353802490234375,
      "step": 57967,
      "training_step_time": 0.7680904865264893
    },
    {
      "epoch": 0.00035380859375,
      "model_forward_time": 0.1146693229675293,
      "step": 57968
    },
    {
      "epoch": 0.00035380859375,
      "step": 57968,
      "training_step_time": 0.4461832046508789
    },
    {
      "epoch": 0.000353814697265625,
      "model_forward_time": 0.11471939086914062,
      "step": 57969
    },
    {
      "epoch": 0.000353814697265625,
      "step": 57969,
      "training_step_time": 0.3790092468261719
    },
    {
      "epoch": 0.00035382080078125,
      "grad_norm": 0.09250719100236893,
      "learning_rate": 3.12628827826883e-07,
      "loss": 0.0349,
      "step": 57970
    },
    {
      "epoch": 0.00035382080078125,
      "model_forward_time": 0.11464118957519531,
      "step": 57970
    },
    {
      "epoch": 0.00035382080078125,
      "step": 57970,
      "training_step_time": 0.3779151439666748
    },
    {
      "epoch": 0.000353826904296875,
      "model_forward_time": 0.11492180824279785,
      "step": 57971
    },
    {
      "epoch": 0.000353826904296875,
      "step": 57971,
      "training_step_time": 0.43409013748168945
    },
    {
      "epoch": 0.0003538330078125,
      "model_forward_time": 0.11479520797729492,
      "step": 57972
    },
    {
      "epoch": 0.0003538330078125,
      "step": 57972,
      "training_step_time": 0.477130651473999
    },
    {
      "epoch": 0.000353839111328125,
      "model_forward_time": 0.11442232131958008,
      "step": 57973
    },
    {
      "epoch": 0.000353839111328125,
      "step": 57973,
      "training_step_time": 0.39851832389831543
    },
    {
      "epoch": 0.00035384521484375,
      "model_forward_time": 0.11486625671386719,
      "step": 57974
    },
    {
      "epoch": 0.00035384521484375,
      "step": 57974,
      "training_step_time": 0.4169490337371826
    },
    {
      "epoch": 0.000353851318359375,
      "model_forward_time": 0.11543989181518555,
      "step": 57975
    },
    {
      "epoch": 0.000353851318359375,
      "step": 57975,
      "training_step_time": 0.39600419998168945
    },
    {
      "epoch": 0.000353857421875,
      "model_forward_time": 0.11510682106018066,
      "step": 57976
    },
    {
      "epoch": 0.000353857421875,
      "step": 57976,
      "training_step_time": 0.37746262550354004
    },
    {
      "epoch": 0.000353863525390625,
      "model_forward_time": 0.11451935768127441,
      "step": 57977
    },
    {
      "epoch": 0.000353863525390625,
      "step": 57977,
      "training_step_time": 0.3879830837249756
    },
    {
      "epoch": 0.00035386962890625,
      "model_forward_time": 0.11525964736938477,
      "step": 57978
    },
    {
      "epoch": 0.00035386962890625,
      "step": 57978,
      "training_step_time": 0.3879523277282715
    },
    {
      "epoch": 0.000353875732421875,
      "model_forward_time": 0.1147923469543457,
      "step": 57979
    },
    {
      "epoch": 0.000353875732421875,
      "step": 57979,
      "training_step_time": 1.1068525314331055
    },
    {
      "epoch": 0.0003538818359375,
      "grad_norm": 0.12260481715202332,
      "learning_rate": 3.095595016323394e-07,
      "loss": 0.0331,
      "step": 57980
    },
    {
      "epoch": 0.0003538818359375,
      "model_forward_time": 0.11426138877868652,
      "step": 57980
    },
    {
      "epoch": 0.0003538818359375,
      "step": 57980,
      "training_step_time": 0.38326001167297363
    },
    {
      "epoch": 0.000353887939453125,
      "model_forward_time": 0.11489272117614746,
      "step": 57981
    },
    {
      "epoch": 0.000353887939453125,
      "step": 57981,
      "training_step_time": 0.3970160484313965
    },
    {
      "epoch": 0.00035389404296875,
      "model_forward_time": 0.11396527290344238,
      "step": 57982
    },
    {
      "epoch": 0.00035389404296875,
      "step": 57982,
      "training_step_time": 0.3872840404510498
    },
    {
      "epoch": 0.000353900146484375,
      "model_forward_time": 0.11377573013305664,
      "step": 57983
    },
    {
      "epoch": 0.000353900146484375,
      "step": 57983,
      "training_step_time": 0.38170385360717773
    },
    {
      "epoch": 0.00035390625,
      "model_forward_time": 0.11388182640075684,
      "step": 57984
    },
    {
      "epoch": 0.00035390625,
      "step": 57984,
      "training_step_time": 0.44805407524108887
    },
    {
      "epoch": 0.000353912353515625,
      "model_forward_time": 0.1151585578918457,
      "step": 57985
    },
    {
      "epoch": 0.000353912353515625,
      "step": 57985,
      "training_step_time": 0.46453046798706055
    },
    {
      "epoch": 0.00035391845703125,
      "model_forward_time": 0.1151430606842041,
      "step": 57986
    },
    {
      "epoch": 0.00035391845703125,
      "step": 57986,
      "training_step_time": 0.4311356544494629
    },
    {
      "epoch": 0.000353924560546875,
      "model_forward_time": 0.11518621444702148,
      "step": 57987
    },
    {
      "epoch": 0.000353924560546875,
      "step": 57987,
      "training_step_time": 0.4230012893676758
    },
    {
      "epoch": 0.0003539306640625,
      "model_forward_time": 0.11497616767883301,
      "step": 57988
    },
    {
      "epoch": 0.0003539306640625,
      "step": 57988,
      "training_step_time": 0.50777268409729
    },
    {
      "epoch": 0.000353936767578125,
      "model_forward_time": 0.11440539360046387,
      "step": 57989
    },
    {
      "epoch": 0.000353936767578125,
      "step": 57989,
      "training_step_time": 0.39379119873046875
    },
    {
      "epoch": 0.00035394287109375,
      "grad_norm": 0.12981122732162476,
      "learning_rate": 3.0650527008162513e-07,
      "loss": 0.039,
      "step": 57990
    },
    {
      "epoch": 0.00035394287109375,
      "model_forward_time": 0.1150052547454834,
      "step": 57990
    },
    {
      "epoch": 0.00035394287109375,
      "step": 57990,
      "training_step_time": 0.3964242935180664
    },
    {
      "epoch": 0.000353948974609375,
      "model_forward_time": 0.11468744277954102,
      "step": 57991
    },
    {
      "epoch": 0.000353948974609375,
      "step": 57991,
      "training_step_time": 0.5253317356109619
    },
    {
      "epoch": 0.000353955078125,
      "model_forward_time": 0.11550092697143555,
      "step": 57992
    },
    {
      "epoch": 0.000353955078125,
      "step": 57992,
      "training_step_time": 0.387692928314209
    },
    {
      "epoch": 0.000353961181640625,
      "model_forward_time": 0.1146242618560791,
      "step": 57993
    },
    {
      "epoch": 0.000353961181640625,
      "step": 57993,
      "training_step_time": 0.4573335647583008
    },
    {
      "epoch": 0.00035396728515625,
      "model_forward_time": 0.11466646194458008,
      "step": 57994
    },
    {
      "epoch": 0.00035396728515625,
      "step": 57994,
      "training_step_time": 0.4116237163543701
    },
    {
      "epoch": 0.000353973388671875,
      "model_forward_time": 0.11435770988464355,
      "step": 57995
    },
    {
      "epoch": 0.000353973388671875,
      "step": 57995,
      "training_step_time": 0.3902294635772705
    },
    {
      "epoch": 0.0003539794921875,
      "model_forward_time": 0.11504101753234863,
      "step": 57996
    },
    {
      "epoch": 0.0003539794921875,
      "step": 57996,
      "training_step_time": 0.3934659957885742
    },
    {
      "epoch": 0.000353985595703125,
      "model_forward_time": 0.11467623710632324,
      "step": 57997
    },
    {
      "epoch": 0.000353985595703125,
      "step": 57997,
      "training_step_time": 0.9436545372009277
    },
    {
      "epoch": 0.00035399169921875,
      "model_forward_time": 0.11497712135314941,
      "step": 57998
    },
    {
      "epoch": 0.00035399169921875,
      "step": 57998,
      "training_step_time": 0.41169118881225586
    },
    {
      "epoch": 0.000353997802734375,
      "model_forward_time": 0.11492180824279785,
      "step": 57999
    },
    {
      "epoch": 0.000353997802734375,
      "step": 57999,
      "training_step_time": 0.4039320945739746
    },
    {
      "epoch": 0.00035400390625,
      "grad_norm": 0.07864077389240265,
      "learning_rate": 3.034661341025258e-07,
      "loss": 0.0337,
      "step": 58000
    },
    {
      "epoch": 0.00035400390625,
      "model_forward_time": 0.11319208145141602,
      "step": 58000
    },
    {
      "epoch": 0.00035400390625,
      "step": 58000,
      "training_step_time": 0.3523116111755371
    },
    {
      "epoch": 0.000354010009765625,
      "model_forward_time": 0.11284828186035156,
      "step": 58001
    },
    {
      "epoch": 0.000354010009765625,
      "step": 58001,
      "training_step_time": 0.47813892364501953
    },
    {
      "epoch": 0.00035401611328125,
      "model_forward_time": 0.11284422874450684,
      "step": 58002
    },
    {
      "epoch": 0.00035401611328125,
      "step": 58002,
      "training_step_time": 0.39302992820739746
    },
    {
      "epoch": 0.000354022216796875,
      "model_forward_time": 0.11293268203735352,
      "step": 58003
    },
    {
      "epoch": 0.000354022216796875,
      "step": 58003,
      "training_step_time": 0.3777732849121094
    },
    {
      "epoch": 0.0003540283203125,
      "model_forward_time": 0.11351275444030762,
      "step": 58004
    },
    {
      "epoch": 0.0003540283203125,
      "step": 58004,
      "training_step_time": 0.3757658004760742
    },
    {
      "epoch": 0.000354034423828125,
      "model_forward_time": 0.11447978019714355,
      "step": 58005
    },
    {
      "epoch": 0.000354034423828125,
      "step": 58005,
      "training_step_time": 0.39275598526000977
    },
    {
      "epoch": 0.00035404052734375,
      "model_forward_time": 0.11499738693237305,
      "step": 58006
    },
    {
      "epoch": 0.00035404052734375,
      "step": 58006,
      "training_step_time": 0.3773655891418457
    },
    {
      "epoch": 0.000354046630859375,
      "model_forward_time": 0.115264892578125,
      "step": 58007
    },
    {
      "epoch": 0.000354046630859375,
      "step": 58007,
      "training_step_time": 0.3921663761138916
    },
    {
      "epoch": 0.000354052734375,
      "model_forward_time": 0.11543893814086914,
      "step": 58008
    },
    {
      "epoch": 0.000354052734375,
      "step": 58008,
      "training_step_time": 0.3901100158691406
    },
    {
      "epoch": 0.000354058837890625,
      "model_forward_time": 0.11542344093322754,
      "step": 58009
    },
    {
      "epoch": 0.000354058837890625,
      "step": 58009,
      "training_step_time": 0.39099907875061035
    },
    {
      "epoch": 0.00035406494140625,
      "grad_norm": 0.05972515791654587,
      "learning_rate": 3.0044209461825844e-07,
      "loss": 0.0383,
      "step": 58010
    },
    {
      "epoch": 0.00035406494140625,
      "model_forward_time": 0.11598777770996094,
      "step": 58010
    },
    {
      "epoch": 0.00035406494140625,
      "step": 58010,
      "training_step_time": 0.38167595863342285
    },
    {
      "epoch": 0.000354071044921875,
      "model_forward_time": 0.11455368995666504,
      "step": 58011
    },
    {
      "epoch": 0.000354071044921875,
      "step": 58011,
      "training_step_time": 0.3956644535064697
    },
    {
      "epoch": 0.0003540771484375,
      "model_forward_time": 0.11460614204406738,
      "step": 58012
    },
    {
      "epoch": 0.0003540771484375,
      "step": 58012,
      "training_step_time": 0.4413297176361084
    },
    {
      "epoch": 0.000354083251953125,
      "model_forward_time": 0.11549854278564453,
      "step": 58013
    },
    {
      "epoch": 0.000354083251953125,
      "step": 58013,
      "training_step_time": 0.4360225200653076
    },
    {
      "epoch": 0.00035408935546875,
      "model_forward_time": 0.11516952514648438,
      "step": 58014
    },
    {
      "epoch": 0.00035408935546875,
      "step": 58014,
      "training_step_time": 0.4792320728302002
    },
    {
      "epoch": 0.000354095458984375,
      "model_forward_time": 0.1144566535949707,
      "step": 58015
    },
    {
      "epoch": 0.000354095458984375,
      "step": 58015,
      "training_step_time": 0.41634488105773926
    },
    {
      "epoch": 0.0003541015625,
      "model_forward_time": 0.11502361297607422,
      "step": 58016
    },
    {
      "epoch": 0.0003541015625,
      "step": 58016,
      "training_step_time": 0.5070281028747559
    },
    {
      "epoch": 0.000354107666015625,
      "model_forward_time": 0.11633777618408203,
      "step": 58017
    },
    {
      "epoch": 0.000354107666015625,
      "step": 58017,
      "training_step_time": 0.40731382369995117
    },
    {
      "epoch": 0.00035411376953125,
      "model_forward_time": 0.11539793014526367,
      "step": 58018
    },
    {
      "epoch": 0.00035411376953125,
      "step": 58018,
      "training_step_time": 0.37972521781921387
    },
    {
      "epoch": 0.000354119873046875,
      "model_forward_time": 0.11568593978881836,
      "step": 58019
    },
    {
      "epoch": 0.000354119873046875,
      "step": 58019,
      "training_step_time": 0.41561150550842285
    },
    {
      "epoch": 0.0003541259765625,
      "grad_norm": 0.12631504237651825,
      "learning_rate": 2.9743315254743833e-07,
      "loss": 0.0321,
      "step": 58020
    },
    {
      "epoch": 0.0003541259765625,
      "model_forward_time": 0.11403274536132812,
      "step": 58020
    },
    {
      "epoch": 0.0003541259765625,
      "step": 58020,
      "training_step_time": 0.45991015434265137
    },
    {
      "epoch": 0.000354132080078125,
      "model_forward_time": 0.11495184898376465,
      "step": 58021
    },
    {
      "epoch": 0.000354132080078125,
      "step": 58021,
      "training_step_time": 0.396578311920166
    },
    {
      "epoch": 0.00035413818359375,
      "model_forward_time": 0.11455011367797852,
      "step": 58022
    },
    {
      "epoch": 0.00035413818359375,
      "step": 58022,
      "training_step_time": 0.38970112800598145
    },
    {
      "epoch": 0.000354144287109375,
      "model_forward_time": 0.11481261253356934,
      "step": 58023
    },
    {
      "epoch": 0.000354144287109375,
      "step": 58023,
      "training_step_time": 0.40531206130981445
    },
    {
      "epoch": 0.000354150390625,
      "model_forward_time": 0.11462926864624023,
      "step": 58024
    },
    {
      "epoch": 0.000354150390625,
      "step": 58024,
      "training_step_time": 0.3983430862426758
    },
    {
      "epoch": 0.000354156494140625,
      "model_forward_time": 0.11510539054870605,
      "step": 58025
    },
    {
      "epoch": 0.000354156494140625,
      "step": 58025,
      "training_step_time": 0.3786191940307617
    },
    {
      "epoch": 0.00035416259765625,
      "model_forward_time": 0.11572122573852539,
      "step": 58026
    },
    {
      "epoch": 0.00035416259765625,
      "step": 58026,
      "training_step_time": 0.4538872241973877
    },
    {
      "epoch": 0.000354168701171875,
      "model_forward_time": 0.11476683616638184,
      "step": 58027
    },
    {
      "epoch": 0.000354168701171875,
      "step": 58027,
      "training_step_time": 0.3975553512573242
    },
    {
      "epoch": 0.0003541748046875,
      "model_forward_time": 0.11501646041870117,
      "step": 58028
    },
    {
      "epoch": 0.0003541748046875,
      "step": 58028,
      "training_step_time": 0.4494941234588623
    },
    {
      "epoch": 0.000354180908203125,
      "model_forward_time": 0.11560940742492676,
      "step": 58029
    },
    {
      "epoch": 0.000354180908203125,
      "step": 58029,
      "training_step_time": 0.4392201900482178
    },
    {
      "epoch": 0.00035418701171875,
      "grad_norm": 0.08784379810094833,
      "learning_rate": 2.944393088041009e-07,
      "loss": 0.0379,
      "step": 58030
    },
    {
      "epoch": 0.00035418701171875,
      "model_forward_time": 0.11520004272460938,
      "step": 58030
    },
    {
      "epoch": 0.00035418701171875,
      "step": 58030,
      "training_step_time": 0.4540684223175049
    },
    {
      "epoch": 0.000354193115234375,
      "model_forward_time": 0.11444664001464844,
      "step": 58031
    },
    {
      "epoch": 0.000354193115234375,
      "step": 58031,
      "training_step_time": 0.49610233306884766
    },
    {
      "epoch": 0.00035419921875,
      "model_forward_time": 0.11480522155761719,
      "step": 58032
    },
    {
      "epoch": 0.00035419921875,
      "step": 58032,
      "training_step_time": 0.40235209465026855
    },
    {
      "epoch": 0.000354205322265625,
      "model_forward_time": 0.11515045166015625,
      "step": 58033
    },
    {
      "epoch": 0.000354205322265625,
      "step": 58033,
      "training_step_time": 0.40290188789367676
    },
    {
      "epoch": 0.00035421142578125,
      "model_forward_time": 0.11462950706481934,
      "step": 58034
    },
    {
      "epoch": 0.00035421142578125,
      "step": 58034,
      "training_step_time": 0.4117755889892578
    },
    {
      "epoch": 0.000354217529296875,
      "model_forward_time": 0.11476492881774902,
      "step": 58035
    },
    {
      "epoch": 0.000354217529296875,
      "step": 58035,
      "training_step_time": 0.39205479621887207
    },
    {
      "epoch": 0.0003542236328125,
      "model_forward_time": 0.1151266098022461,
      "step": 58036
    },
    {
      "epoch": 0.0003542236328125,
      "step": 58036,
      "training_step_time": 0.38818788528442383
    },
    {
      "epoch": 0.000354229736328125,
      "model_forward_time": 0.11439824104309082,
      "step": 58037
    },
    {
      "epoch": 0.000354229736328125,
      "step": 58037,
      "training_step_time": 0.3914351463317871
    },
    {
      "epoch": 0.00035423583984375,
      "model_forward_time": 0.11528468132019043,
      "step": 58038
    },
    {
      "epoch": 0.00035423583984375,
      "step": 58038,
      "training_step_time": 0.4024546146392822
    },
    {
      "epoch": 0.000354241943359375,
      "model_forward_time": 0.11457085609436035,
      "step": 58039
    },
    {
      "epoch": 0.000354241943359375,
      "step": 58039,
      "training_step_time": 0.3995041847229004
    },
    {
      "epoch": 0.000354248046875,
      "grad_norm": 0.09964385628700256,
      "learning_rate": 2.9146056429771305e-07,
      "loss": 0.0378,
      "step": 58040
    },
    {
      "epoch": 0.000354248046875,
      "model_forward_time": 0.11513423919677734,
      "step": 58040
    },
    {
      "epoch": 0.000354248046875,
      "step": 58040,
      "training_step_time": 0.3925809860229492
    },
    {
      "epoch": 0.000354254150390625,
      "model_forward_time": 0.1151432991027832,
      "step": 58041
    },
    {
      "epoch": 0.000354254150390625,
      "step": 58041,
      "training_step_time": 0.40158510208129883
    },
    {
      "epoch": 0.00035426025390625,
      "model_forward_time": 0.1150045394897461,
      "step": 58042
    },
    {
      "epoch": 0.00035426025390625,
      "step": 58042,
      "training_step_time": 0.4589254856109619
    },
    {
      "epoch": 0.000354266357421875,
      "model_forward_time": 0.11562156677246094,
      "step": 58043
    },
    {
      "epoch": 0.000354266357421875,
      "step": 58043,
      "training_step_time": 0.42362332344055176
    },
    {
      "epoch": 0.0003542724609375,
      "model_forward_time": 0.11505818367004395,
      "step": 58044
    },
    {
      "epoch": 0.0003542724609375,
      "step": 58044,
      "training_step_time": 0.36568212509155273
    },
    {
      "epoch": 0.000354278564453125,
      "model_forward_time": 0.11465716361999512,
      "step": 58045
    },
    {
      "epoch": 0.000354278564453125,
      "step": 58045,
      "training_step_time": 0.45652270317077637
    },
    {
      "epoch": 0.00035428466796875,
      "model_forward_time": 0.11425590515136719,
      "step": 58046
    },
    {
      "epoch": 0.00035428466796875,
      "step": 58046,
      "training_step_time": 0.40993189811706543
    },
    {
      "epoch": 0.000354290771484375,
      "model_forward_time": 0.11483430862426758,
      "step": 58047
    },
    {
      "epoch": 0.000354290771484375,
      "step": 58047,
      "training_step_time": 0.3949928283691406
    },
    {
      "epoch": 0.000354296875,
      "model_forward_time": 0.1146550178527832,
      "step": 58048
    },
    {
      "epoch": 0.000354296875,
      "step": 58048,
      "training_step_time": 0.3916287422180176
    },
    {
      "epoch": 0.000354302978515625,
      "model_forward_time": 0.11517763137817383,
      "step": 58049
    },
    {
      "epoch": 0.000354302978515625,
      "step": 58049,
      "training_step_time": 0.39920711517333984
    },
    {
      "epoch": 0.00035430908203125,
      "grad_norm": 0.08608684688806534,
      "learning_rate": 2.8849691993311777e-07,
      "loss": 0.0358,
      "step": 58050
    },
    {
      "epoch": 0.00035430908203125,
      "model_forward_time": 0.11496782302856445,
      "step": 58050
    },
    {
      "epoch": 0.00035430908203125,
      "step": 58050,
      "training_step_time": 0.4107046127319336
    },
    {
      "epoch": 0.000354315185546875,
      "model_forward_time": 0.11466193199157715,
      "step": 58051
    },
    {
      "epoch": 0.000354315185546875,
      "step": 58051,
      "training_step_time": 0.3966636657714844
    },
    {
      "epoch": 0.0003543212890625,
      "model_forward_time": 0.11562895774841309,
      "step": 58052
    },
    {
      "epoch": 0.0003543212890625,
      "step": 58052,
      "training_step_time": 0.40276598930358887
    },
    {
      "epoch": 0.000354327392578125,
      "model_forward_time": 0.11467838287353516,
      "step": 58053
    },
    {
      "epoch": 0.000354327392578125,
      "step": 58053,
      "training_step_time": 0.39890480041503906
    },
    {
      "epoch": 0.00035433349609375,
      "model_forward_time": 0.1152033805847168,
      "step": 58054
    },
    {
      "epoch": 0.00035433349609375,
      "step": 58054,
      "training_step_time": 0.3906056880950928
    },
    {
      "epoch": 0.000354339599609375,
      "model_forward_time": 0.11500883102416992,
      "step": 58055
    },
    {
      "epoch": 0.000354339599609375,
      "step": 58055,
      "training_step_time": 0.39105677604675293
    },
    {
      "epoch": 0.000354345703125,
      "model_forward_time": 0.11536455154418945,
      "step": 58056
    },
    {
      "epoch": 0.000354345703125,
      "step": 58056,
      "training_step_time": 0.40984320640563965
    },
    {
      "epoch": 0.000354351806640625,
      "model_forward_time": 0.11469674110412598,
      "step": 58057
    },
    {
      "epoch": 0.000354351806640625,
      "step": 58057,
      "training_step_time": 0.3926045894622803
    },
    {
      "epoch": 0.00035435791015625,
      "model_forward_time": 0.11527204513549805,
      "step": 58058
    },
    {
      "epoch": 0.00035435791015625,
      "step": 58058,
      "training_step_time": 0.41015076637268066
    },
    {
      "epoch": 0.000354364013671875,
      "model_forward_time": 0.11463785171508789,
      "step": 58059
    },
    {
      "epoch": 0.000354364013671875,
      "step": 58059,
      "training_step_time": 0.3647885322570801
    },
    {
      "epoch": 0.0003543701171875,
      "grad_norm": 0.10568711161613464,
      "learning_rate": 2.85548376610606e-07,
      "loss": 0.034,
      "step": 58060
    },
    {
      "epoch": 0.0003543701171875,
      "model_forward_time": 0.11574697494506836,
      "step": 58060
    },
    {
      "epoch": 0.0003543701171875,
      "step": 58060,
      "training_step_time": 0.45490503311157227
    },
    {
      "epoch": 0.000354376220703125,
      "model_forward_time": 0.11536526679992676,
      "step": 58061
    },
    {
      "epoch": 0.000354376220703125,
      "step": 58061,
      "training_step_time": 0.42395544052124023
    },
    {
      "epoch": 0.00035438232421875,
      "model_forward_time": 0.11473321914672852,
      "step": 58062
    },
    {
      "epoch": 0.00035438232421875,
      "step": 58062,
      "training_step_time": 0.38723134994506836
    },
    {
      "epoch": 0.000354388427734375,
      "model_forward_time": 0.11501407623291016,
      "step": 58063
    },
    {
      "epoch": 0.000354388427734375,
      "step": 58063,
      "training_step_time": 0.38837528228759766
    },
    {
      "epoch": 0.00035439453125,
      "model_forward_time": 0.11524820327758789,
      "step": 58064
    },
    {
      "epoch": 0.00035439453125,
      "step": 58064,
      "training_step_time": 0.39170265197753906
    },
    {
      "epoch": 0.000354400634765625,
      "model_forward_time": 0.11523818969726562,
      "step": 58065
    },
    {
      "epoch": 0.000354400634765625,
      "step": 58065,
      "training_step_time": 0.4213392734527588
    },
    {
      "epoch": 0.00035440673828125,
      "model_forward_time": 0.1149148941040039,
      "step": 58066
    },
    {
      "epoch": 0.00035440673828125,
      "step": 58066,
      "training_step_time": 0.39479684829711914
    },
    {
      "epoch": 0.000354412841796875,
      "model_forward_time": 0.11482977867126465,
      "step": 58067
    },
    {
      "epoch": 0.000354412841796875,
      "step": 58067,
      "training_step_time": 0.39077281951904297
    },
    {
      "epoch": 0.0003544189453125,
      "model_forward_time": 0.11574530601501465,
      "step": 58068
    },
    {
      "epoch": 0.0003544189453125,
      "step": 58068,
      "training_step_time": 0.3927347660064697
    },
    {
      "epoch": 0.000354425048828125,
      "model_forward_time": 0.11497020721435547,
      "step": 58069
    },
    {
      "epoch": 0.000354425048828125,
      "step": 58069,
      "training_step_time": 0.39049315452575684
    },
    {
      "epoch": 0.00035443115234375,
      "grad_norm": 0.09467411041259766,
      "learning_rate": 2.826149352258667e-07,
      "loss": 0.0387,
      "step": 58070
    },
    {
      "epoch": 0.00035443115234375,
      "model_forward_time": 0.11492466926574707,
      "step": 58070
    },
    {
      "epoch": 0.00035443115234375,
      "step": 58070,
      "training_step_time": 0.3951146602630615
    },
    {
      "epoch": 0.000354437255859375,
      "model_forward_time": 0.11550760269165039,
      "step": 58071
    },
    {
      "epoch": 0.000354437255859375,
      "step": 58071,
      "training_step_time": 0.5625088214874268
    },
    {
      "epoch": 0.000354443359375,
      "model_forward_time": 0.1147925853729248,
      "step": 58072
    },
    {
      "epoch": 0.000354443359375,
      "step": 58072,
      "training_step_time": 0.424724817276001
    },
    {
      "epoch": 0.000354449462890625,
      "model_forward_time": 0.11472845077514648,
      "step": 58073
    },
    {
      "epoch": 0.000354449462890625,
      "step": 58073,
      "training_step_time": 0.4547872543334961
    },
    {
      "epoch": 0.00035445556640625,
      "model_forward_time": 0.11521148681640625,
      "step": 58074
    },
    {
      "epoch": 0.00035445556640625,
      "step": 58074,
      "training_step_time": 0.42633676528930664
    },
    {
      "epoch": 0.000354461669921875,
      "model_forward_time": 0.11527752876281738,
      "step": 58075
    },
    {
      "epoch": 0.000354461669921875,
      "step": 58075,
      "training_step_time": 0.4629957675933838
    },
    {
      "epoch": 0.0003544677734375,
      "model_forward_time": 0.11527633666992188,
      "step": 58076
    },
    {
      "epoch": 0.0003544677734375,
      "step": 58076,
      "training_step_time": 0.436903715133667
    },
    {
      "epoch": 0.000354473876953125,
      "model_forward_time": 0.1145620346069336,
      "step": 58077
    },
    {
      "epoch": 0.000354473876953125,
      "step": 58077,
      "training_step_time": 0.409393310546875
    },
    {
      "epoch": 0.00035447998046875,
      "model_forward_time": 0.11543130874633789,
      "step": 58078
    },
    {
      "epoch": 0.00035447998046875,
      "step": 58078,
      "training_step_time": 0.3857741355895996
    },
    {
      "epoch": 0.000354486083984375,
      "model_forward_time": 0.11505961418151855,
      "step": 58079
    },
    {
      "epoch": 0.000354486083984375,
      "step": 58079,
      "training_step_time": 0.7504236698150635
    },
    {
      "epoch": 0.0003544921875,
      "grad_norm": 0.09116142988204956,
      "learning_rate": 2.796965966699927e-07,
      "loss": 0.0369,
      "step": 58080
    },
    {
      "epoch": 0.0003544921875,
      "model_forward_time": 0.11415243148803711,
      "step": 58080
    },
    {
      "epoch": 0.0003544921875,
      "step": 58080,
      "training_step_time": 0.3823995590209961
    },
    {
      "epoch": 0.000354498291015625,
      "model_forward_time": 0.11457085609436035,
      "step": 58081
    },
    {
      "epoch": 0.000354498291015625,
      "step": 58081,
      "training_step_time": 0.39365482330322266
    },
    {
      "epoch": 0.00035450439453125,
      "model_forward_time": 0.11488485336303711,
      "step": 58082
    },
    {
      "epoch": 0.00035450439453125,
      "step": 58082,
      "training_step_time": 0.40761613845825195
    },
    {
      "epoch": 0.000354510498046875,
      "model_forward_time": 0.11439847946166992,
      "step": 58083
    },
    {
      "epoch": 0.000354510498046875,
      "step": 58083,
      "training_step_time": 0.40077733993530273
    },
    {
      "epoch": 0.0003545166015625,
      "model_forward_time": 0.11421656608581543,
      "step": 58084
    },
    {
      "epoch": 0.0003545166015625,
      "step": 58084,
      "training_step_time": 0.382551908493042
    },
    {
      "epoch": 0.000354522705078125,
      "model_forward_time": 0.11445045471191406,
      "step": 58085
    },
    {
      "epoch": 0.000354522705078125,
      "step": 58085,
      "training_step_time": 0.48362159729003906
    },
    {
      "epoch": 0.00035452880859375,
      "model_forward_time": 0.11553072929382324,
      "step": 58086
    },
    {
      "epoch": 0.00035452880859375,
      "step": 58086,
      "training_step_time": 0.4672560691833496
    },
    {
      "epoch": 0.000354534912109375,
      "model_forward_time": 0.11443209648132324,
      "step": 58087
    },
    {
      "epoch": 0.000354534912109375,
      "step": 58087,
      "training_step_time": 0.41736578941345215
    },
    {
      "epoch": 0.000354541015625,
      "model_forward_time": 0.11499643325805664,
      "step": 58088
    },
    {
      "epoch": 0.000354541015625,
      "step": 58088,
      "training_step_time": 0.4862360954284668
    },
    {
      "epoch": 0.000354547119140625,
      "model_forward_time": 0.11543512344360352,
      "step": 58089
    },
    {
      "epoch": 0.000354547119140625,
      "step": 58089,
      "training_step_time": 0.49275803565979004
    },
    {
      "epoch": 0.00035455322265625,
      "grad_norm": 0.07381538301706314,
      "learning_rate": 2.767933618295082e-07,
      "loss": 0.0358,
      "step": 58090
    },
    {
      "epoch": 0.00035455322265625,
      "model_forward_time": 0.11450505256652832,
      "step": 58090
    },
    {
      "epoch": 0.00035455322265625,
      "step": 58090,
      "training_step_time": 0.3905043601989746
    },
    {
      "epoch": 0.000354559326171875,
      "model_forward_time": 0.1142878532409668,
      "step": 58091
    },
    {
      "epoch": 0.000354559326171875,
      "step": 58091,
      "training_step_time": 0.4053373336791992
    },
    {
      "epoch": 0.0003545654296875,
      "model_forward_time": 0.1148076057434082,
      "step": 58092
    },
    {
      "epoch": 0.0003545654296875,
      "step": 58092,
      "training_step_time": 0.3894367218017578
    },
    {
      "epoch": 0.000354571533203125,
      "model_forward_time": 0.11467170715332031,
      "step": 58093
    },
    {
      "epoch": 0.000354571533203125,
      "step": 58093,
      "training_step_time": 0.4030017852783203
    },
    {
      "epoch": 0.00035457763671875,
      "model_forward_time": 0.1157693862915039,
      "step": 58094
    },
    {
      "epoch": 0.00035457763671875,
      "step": 58094,
      "training_step_time": 0.4014856815338135
    },
    {
      "epoch": 0.000354583740234375,
      "model_forward_time": 0.11515307426452637,
      "step": 58095
    },
    {
      "epoch": 0.000354583740234375,
      "step": 58095,
      "training_step_time": 0.3903346061706543
    },
    {
      "epoch": 0.00035458984375,
      "model_forward_time": 0.11508059501647949,
      "step": 58096
    },
    {
      "epoch": 0.00035458984375,
      "step": 58096,
      "training_step_time": 0.3814888000488281
    },
    {
      "epoch": 0.000354595947265625,
      "model_forward_time": 0.11559748649597168,
      "step": 58097
    },
    {
      "epoch": 0.000354595947265625,
      "step": 58097,
      "training_step_time": 0.3997030258178711
    },
    {
      "epoch": 0.00035460205078125,
      "model_forward_time": 0.11519145965576172,
      "step": 58098
    },
    {
      "epoch": 0.00035460205078125,
      "step": 58098,
      "training_step_time": 0.4153313636779785
    },
    {
      "epoch": 0.000354608154296875,
      "model_forward_time": 0.11499786376953125,
      "step": 58099
    },
    {
      "epoch": 0.000354608154296875,
      "step": 58099,
      "training_step_time": 0.41379261016845703
    },
    {
      "epoch": 0.0003546142578125,
      "grad_norm": 0.08936069905757904,
      "learning_rate": 2.7390523158633554e-07,
      "loss": 0.044,
      "step": 58100
    },
    {
      "epoch": 0.0003546142578125,
      "model_forward_time": 0.11523747444152832,
      "step": 58100
    },
    {
      "epoch": 0.0003546142578125,
      "step": 58100,
      "training_step_time": 0.48968982696533203
    },
    {
      "epoch": 0.000354620361328125,
      "model_forward_time": 0.1162576675415039,
      "step": 58101
    },
    {
      "epoch": 0.000354620361328125,
      "step": 58101,
      "training_step_time": 0.4045107364654541
    },
    {
      "epoch": 0.00035462646484375,
      "model_forward_time": 0.11568522453308105,
      "step": 58102
    },
    {
      "epoch": 0.00035462646484375,
      "step": 58102,
      "training_step_time": 0.44296836853027344
    },
    {
      "epoch": 0.000354632568359375,
      "model_forward_time": 0.11561989784240723,
      "step": 58103
    },
    {
      "epoch": 0.000354632568359375,
      "step": 58103,
      "training_step_time": 0.456373929977417
    },
    {
      "epoch": 0.000354638671875,
      "model_forward_time": 0.1150367259979248,
      "step": 58104
    },
    {
      "epoch": 0.000354638671875,
      "step": 58104,
      "training_step_time": 0.44666528701782227
    },
    {
      "epoch": 0.000354644775390625,
      "model_forward_time": 0.11481761932373047,
      "step": 58105
    },
    {
      "epoch": 0.000354644775390625,
      "step": 58105,
      "training_step_time": 0.40933704376220703
    },
    {
      "epoch": 0.00035465087890625,
      "model_forward_time": 0.11497926712036133,
      "step": 58106
    },
    {
      "epoch": 0.00035465087890625,
      "step": 58106,
      "training_step_time": 0.40511441230773926
    },
    {
      "epoch": 0.000354656982421875,
      "model_forward_time": 0.1149599552154541,
      "step": 58107
    },
    {
      "epoch": 0.000354656982421875,
      "step": 58107,
      "training_step_time": 0.3917820453643799
    },
    {
      "epoch": 0.0003546630859375,
      "model_forward_time": 0.11539435386657715,
      "step": 58108
    },
    {
      "epoch": 0.0003546630859375,
      "step": 58108,
      "training_step_time": 0.395688533782959
    },
    {
      "epoch": 0.000354669189453125,
      "model_forward_time": 0.11524629592895508,
      "step": 58109
    },
    {
      "epoch": 0.000354669189453125,
      "step": 58109,
      "training_step_time": 0.4134962558746338
    },
    {
      "epoch": 0.00035467529296875,
      "grad_norm": 0.09495451301336288,
      "learning_rate": 2.7103220681780615e-07,
      "loss": 0.0386,
      "step": 58110
    },
    {
      "epoch": 0.00035467529296875,
      "model_forward_time": 0.11539578437805176,
      "step": 58110
    },
    {
      "epoch": 0.00035467529296875,
      "step": 58110,
      "training_step_time": 0.3983151912689209
    },
    {
      "epoch": 0.000354681396484375,
      "model_forward_time": 0.11441946029663086,
      "step": 58111
    },
    {
      "epoch": 0.000354681396484375,
      "step": 58111,
      "training_step_time": 0.3936338424682617
    },
    {
      "epoch": 0.0003546875,
      "model_forward_time": 0.11706662178039551,
      "step": 58112
    },
    {
      "epoch": 0.0003546875,
      "step": 58112,
      "training_step_time": 0.3933601379394531
    },
    {
      "epoch": 0.000354693603515625,
      "model_forward_time": 0.11519408226013184,
      "step": 58113
    },
    {
      "epoch": 0.000354693603515625,
      "step": 58113,
      "training_step_time": 0.4248654842376709
    },
    {
      "epoch": 0.00035469970703125,
      "model_forward_time": 0.11605167388916016,
      "step": 58114
    },
    {
      "epoch": 0.00035469970703125,
      "step": 58114,
      "training_step_time": 0.4872877597808838
    },
    {
      "epoch": 0.000354705810546875,
      "model_forward_time": 0.11887359619140625,
      "step": 58115
    },
    {
      "epoch": 0.000354705810546875,
      "step": 58115,
      "training_step_time": 0.7094740867614746
    },
    {
      "epoch": 0.0003547119140625,
      "model_forward_time": 0.1169428825378418,
      "step": 58116
    },
    {
      "epoch": 0.0003547119140625,
      "step": 58116,
      "training_step_time": 0.6200635433197021
    },
    {
      "epoch": 0.000354718017578125,
      "model_forward_time": 0.12423539161682129,
      "step": 58117
    },
    {
      "epoch": 0.000354718017578125,
      "step": 58117,
      "training_step_time": 0.7421751022338867
    },
    {
      "epoch": 0.00035472412109375,
      "model_forward_time": 0.12680745124816895,
      "step": 58118
    },
    {
      "epoch": 0.00035472412109375,
      "step": 58118,
      "training_step_time": 0.6800670623779297
    },
    {
      "epoch": 0.000354730224609375,
      "model_forward_time": 0.12338590621948242,
      "step": 58119
    },
    {
      "epoch": 0.000354730224609375,
      "step": 58119,
      "training_step_time": 0.6440737247467041
    },
    {
      "epoch": 0.000354736328125,
      "grad_norm": 0.09571340680122375,
      "learning_rate": 2.6817428839668315e-07,
      "loss": 0.0367,
      "step": 58120
    },
    {
      "epoch": 0.000354736328125,
      "model_forward_time": 0.11871886253356934,
      "step": 58120
    },
    {
      "epoch": 0.000354736328125,
      "step": 58120,
      "training_step_time": 0.6357324123382568
    },
    {
      "epoch": 0.000354742431640625,
      "model_forward_time": 0.13340306282043457,
      "step": 58121
    },
    {
      "epoch": 0.000354742431640625,
      "step": 58121,
      "training_step_time": 0.6803643703460693
    },
    {
      "epoch": 0.00035474853515625,
      "model_forward_time": 0.14615678787231445,
      "step": 58122
    },
    {
      "epoch": 0.00035474853515625,
      "step": 58122,
      "training_step_time": 0.6598300933837891
    },
    {
      "epoch": 0.000354754638671875,
      "model_forward_time": 0.12079167366027832,
      "step": 58123
    },
    {
      "epoch": 0.000354754638671875,
      "step": 58123,
      "training_step_time": 0.6807448863983154
    },
    {
      "epoch": 0.0003547607421875,
      "model_forward_time": 0.12039065361022949,
      "step": 58124
    },
    {
      "epoch": 0.0003547607421875,
      "step": 58124,
      "training_step_time": 0.6500911712646484
    },
    {
      "epoch": 0.000354766845703125,
      "model_forward_time": 0.12084269523620605,
      "step": 58125
    },
    {
      "epoch": 0.000354766845703125,
      "step": 58125,
      "training_step_time": 0.6324174404144287
    },
    {
      "epoch": 0.00035477294921875,
      "model_forward_time": 0.12130522727966309,
      "step": 58126
    },
    {
      "epoch": 0.00035477294921875,
      "step": 58126,
      "training_step_time": 0.7225444316864014
    },
    {
      "epoch": 0.000354779052734375,
      "model_forward_time": 0.11986923217773438,
      "step": 58127
    },
    {
      "epoch": 0.000354779052734375,
      "step": 58127,
      "training_step_time": 0.7134840488433838
    },
    {
      "epoch": 0.00035478515625,
      "model_forward_time": 0.13875842094421387,
      "step": 58128
    },
    {
      "epoch": 0.00035478515625,
      "step": 58128,
      "training_step_time": 0.6599254608154297
    },
    {
      "epoch": 0.000354791259765625,
      "model_forward_time": 0.12283015251159668,
      "step": 58129
    },
    {
      "epoch": 0.000354791259765625,
      "step": 58129,
      "training_step_time": 0.6934020519256592
    },
    {
      "epoch": 0.00035479736328125,
      "grad_norm": 0.10357961058616638,
      "learning_rate": 2.653314771911108e-07,
      "loss": 0.0406,
      "step": 58130
    },
    {
      "epoch": 0.00035479736328125,
      "model_forward_time": 0.11990714073181152,
      "step": 58130
    },
    {
      "epoch": 0.00035479736328125,
      "step": 58130,
      "training_step_time": 0.6704208850860596
    },
    {
      "epoch": 0.000354803466796875,
      "model_forward_time": 0.11867833137512207,
      "step": 58131
    },
    {
      "epoch": 0.000354803466796875,
      "step": 58131,
      "training_step_time": 0.6753401756286621
    },
    {
      "epoch": 0.0003548095703125,
      "model_forward_time": 0.12021470069885254,
      "step": 58132
    },
    {
      "epoch": 0.0003548095703125,
      "step": 58132,
      "training_step_time": 0.8134346008300781
    },
    {
      "epoch": 0.000354815673828125,
      "model_forward_time": 0.11985540390014648,
      "step": 58133
    },
    {
      "epoch": 0.000354815673828125,
      "step": 58133,
      "training_step_time": 0.7102324962615967
    },
    {
      "epoch": 0.00035482177734375,
      "model_forward_time": 0.11799240112304688,
      "step": 58134
    },
    {
      "epoch": 0.00035482177734375,
      "step": 58134,
      "training_step_time": 0.7898044586181641
    },
    {
      "epoch": 0.000354827880859375,
      "model_forward_time": 0.1206820011138916,
      "step": 58135
    },
    {
      "epoch": 0.000354827880859375,
      "step": 58135,
      "training_step_time": 0.6758837699890137
    },
    {
      "epoch": 0.000354833984375,
      "model_forward_time": 0.11855340003967285,
      "step": 58136
    },
    {
      "epoch": 0.000354833984375,
      "step": 58136,
      "training_step_time": 0.693425178527832
    },
    {
      "epoch": 0.000354840087890625,
      "model_forward_time": 0.11617159843444824,
      "step": 58137
    },
    {
      "epoch": 0.000354840087890625,
      "step": 58137,
      "training_step_time": 0.6720421314239502
    },
    {
      "epoch": 0.00035484619140625,
      "model_forward_time": 0.1196739673614502,
      "step": 58138
    },
    {
      "epoch": 0.00035484619140625,
      "step": 58138,
      "training_step_time": 0.65557861328125
    },
    {
      "epoch": 0.000354852294921875,
      "model_forward_time": 0.12371325492858887,
      "step": 58139
    },
    {
      "epoch": 0.000354852294921875,
      "step": 58139,
      "training_step_time": 0.6372790336608887
    },
    {
      "epoch": 0.0003548583984375,
      "grad_norm": 0.09723302721977234,
      "learning_rate": 2.625037740646763e-07,
      "loss": 0.0372,
      "step": 58140
    },
    {
      "epoch": 0.0003548583984375,
      "model_forward_time": 0.12436127662658691,
      "step": 58140
    },
    {
      "epoch": 0.0003548583984375,
      "step": 58140,
      "training_step_time": 0.6794390678405762
    },
    {
      "epoch": 0.000354864501953125,
      "model_forward_time": 0.12003183364868164,
      "step": 58141
    },
    {
      "epoch": 0.000354864501953125,
      "step": 58141,
      "training_step_time": 0.6303095817565918
    },
    {
      "epoch": 0.00035487060546875,
      "model_forward_time": 0.11576986312866211,
      "step": 58142
    },
    {
      "epoch": 0.00035487060546875,
      "step": 58142,
      "training_step_time": 0.6858983039855957
    },
    {
      "epoch": 0.000354876708984375,
      "model_forward_time": 0.11666321754455566,
      "step": 58143
    },
    {
      "epoch": 0.000354876708984375,
      "step": 58143,
      "training_step_time": 0.6452615261077881
    },
    {
      "epoch": 0.0003548828125,
      "model_forward_time": 0.11795973777770996,
      "step": 58144
    },
    {
      "epoch": 0.0003548828125,
      "step": 58144,
      "training_step_time": 0.5586869716644287
    },
    {
      "epoch": 0.000354888916015625,
      "model_forward_time": 0.11618924140930176,
      "step": 58145
    },
    {
      "epoch": 0.000354888916015625,
      "step": 58145,
      "training_step_time": 0.7898142337799072
    },
    {
      "epoch": 0.00035489501953125,
      "model_forward_time": 0.1162269115447998,
      "step": 58146
    },
    {
      "epoch": 0.00035489501953125,
      "step": 58146,
      "training_step_time": 0.6275002956390381
    },
    {
      "epoch": 0.000354901123046875,
      "model_forward_time": 0.11871695518493652,
      "step": 58147
    },
    {
      "epoch": 0.000354901123046875,
      "step": 58147,
      "training_step_time": 0.6207747459411621
    },
    {
      "epoch": 0.0003549072265625,
      "model_forward_time": 0.11800885200500488,
      "step": 58148
    },
    {
      "epoch": 0.0003549072265625,
      "step": 58148,
      "training_step_time": 0.6347212791442871
    },
    {
      "epoch": 0.000354913330078125,
      "model_forward_time": 0.13059139251708984,
      "step": 58149
    },
    {
      "epoch": 0.000354913330078125,
      "step": 58149,
      "training_step_time": 0.6638612747192383
    },
    {
      "epoch": 0.00035491943359375,
      "grad_norm": 0.08738407492637634,
      "learning_rate": 2.5969117987634797e-07,
      "loss": 0.035,
      "step": 58150
    },
    {
      "epoch": 0.00035491943359375,
      "model_forward_time": 0.12143301963806152,
      "step": 58150
    },
    {
      "epoch": 0.00035491943359375,
      "step": 58150,
      "training_step_time": 0.6417849063873291
    },
    {
      "epoch": 0.000354925537109375,
      "model_forward_time": 0.11702251434326172,
      "step": 58151
    },
    {
      "epoch": 0.000354925537109375,
      "step": 58151,
      "training_step_time": 0.6926112174987793
    },
    {
      "epoch": 0.000354931640625,
      "model_forward_time": 0.11985087394714355,
      "step": 58152
    },
    {
      "epoch": 0.000354931640625,
      "step": 58152,
      "training_step_time": 0.7718508243560791
    },
    {
      "epoch": 0.000354937744140625,
      "model_forward_time": 0.11944222450256348,
      "step": 58153
    },
    {
      "epoch": 0.000354937744140625,
      "step": 58153,
      "training_step_time": 0.7116672992706299
    },
    {
      "epoch": 0.00035494384765625,
      "model_forward_time": 0.11852455139160156,
      "step": 58154
    },
    {
      "epoch": 0.00035494384765625,
      "step": 58154,
      "training_step_time": 0.6798710823059082
    },
    {
      "epoch": 0.000354949951171875,
      "model_forward_time": 0.11716341972351074,
      "step": 58155
    },
    {
      "epoch": 0.000354949951171875,
      "step": 58155,
      "training_step_time": 0.6417782306671143
    },
    {
      "epoch": 0.0003549560546875,
      "model_forward_time": 0.11702156066894531,
      "step": 58156
    },
    {
      "epoch": 0.0003549560546875,
      "step": 58156,
      "training_step_time": 0.6574406623840332
    },
    {
      "epoch": 0.000354962158203125,
      "model_forward_time": 0.12082123756408691,
      "step": 58157
    },
    {
      "epoch": 0.000354962158203125,
      "step": 58157,
      "training_step_time": 0.6253712177276611
    },
    {
      "epoch": 0.00035496826171875,
      "model_forward_time": 0.12140202522277832,
      "step": 58158
    },
    {
      "epoch": 0.00035496826171875,
      "step": 58158,
      "training_step_time": 0.6648907661437988
    },
    {
      "epoch": 0.000354974365234375,
      "model_forward_time": 0.11991477012634277,
      "step": 58159
    },
    {
      "epoch": 0.000354974365234375,
      "step": 58159,
      "training_step_time": 0.6710937023162842
    },
    {
      "epoch": 0.00035498046875,
      "grad_norm": 0.11119929701089859,
      "learning_rate": 2.568936954805201e-07,
      "loss": 0.0392,
      "step": 58160
    },
    {
      "epoch": 0.00035498046875,
      "model_forward_time": 0.11908602714538574,
      "step": 58160
    },
    {
      "epoch": 0.00035498046875,
      "step": 58160,
      "training_step_time": 0.753300666809082
    },
    {
      "epoch": 0.000354986572265625,
      "model_forward_time": 0.1258842945098877,
      "step": 58161
    },
    {
      "epoch": 0.000354986572265625,
      "step": 58161,
      "training_step_time": 0.6963090896606445
    },
    {
      "epoch": 0.00035499267578125,
      "model_forward_time": 0.11948657035827637,
      "step": 58162
    },
    {
      "epoch": 0.00035499267578125,
      "step": 58162,
      "training_step_time": 0.6753382682800293
    },
    {
      "epoch": 0.000354998779296875,
      "model_forward_time": 0.1196596622467041,
      "step": 58163
    },
    {
      "epoch": 0.000354998779296875,
      "step": 58163,
      "training_step_time": 0.6372787952423096
    },
    {
      "epoch": 0.0003550048828125,
      "model_forward_time": 0.11976027488708496,
      "step": 58164
    },
    {
      "epoch": 0.0003550048828125,
      "step": 58164,
      "training_step_time": 0.6900320053100586
    },
    {
      "epoch": 0.000355010986328125,
      "model_forward_time": 0.11796736717224121,
      "step": 58165
    },
    {
      "epoch": 0.000355010986328125,
      "step": 58165,
      "training_step_time": 0.6934220790863037
    },
    {
      "epoch": 0.00035501708984375,
      "model_forward_time": 0.11812472343444824,
      "step": 58166
    },
    {
      "epoch": 0.00035501708984375,
      "step": 58166,
      "training_step_time": 0.6286382675170898
    },
    {
      "epoch": 0.000355023193359375,
      "model_forward_time": 0.11950016021728516,
      "step": 58167
    },
    {
      "epoch": 0.000355023193359375,
      "step": 58167,
      "training_step_time": 0.6736674308776855
    },
    {
      "epoch": 0.000355029296875,
      "model_forward_time": 0.1151573657989502,
      "step": 58168
    },
    {
      "epoch": 0.000355029296875,
      "step": 58168,
      "training_step_time": 0.7112586498260498
    },
    {
      "epoch": 0.000355035400390625,
      "model_forward_time": 0.11980915069580078,
      "step": 58169
    },
    {
      "epoch": 0.000355035400390625,
      "step": 58169,
      "training_step_time": 0.6888511180877686
    },
    {
      "epoch": 0.00035504150390625,
      "grad_norm": 0.0920775979757309,
      "learning_rate": 2.5411132172700194e-07,
      "loss": 0.0347,
      "step": 58170
    },
    {
      "epoch": 0.00035504150390625,
      "model_forward_time": 0.11954903602600098,
      "step": 58170
    },
    {
      "epoch": 0.00035504150390625,
      "step": 58170,
      "training_step_time": 0.6871836185455322
    },
    {
      "epoch": 0.000355047607421875,
      "model_forward_time": 0.11941313743591309,
      "step": 58171
    },
    {
      "epoch": 0.000355047607421875,
      "step": 58171,
      "training_step_time": 0.6617414951324463
    },
    {
      "epoch": 0.0003550537109375,
      "model_forward_time": 0.11607909202575684,
      "step": 58172
    },
    {
      "epoch": 0.0003550537109375,
      "step": 58172,
      "training_step_time": 0.682633638381958
    },
    {
      "epoch": 0.000355059814453125,
      "model_forward_time": 0.1194298267364502,
      "step": 58173
    },
    {
      "epoch": 0.000355059814453125,
      "step": 58173,
      "training_step_time": 0.6681325435638428
    },
    {
      "epoch": 0.00035506591796875,
      "model_forward_time": 0.11849308013916016,
      "step": 58174
    },
    {
      "epoch": 0.00035506591796875,
      "step": 58174,
      "training_step_time": 0.738720178604126
    },
    {
      "epoch": 0.000355072021484375,
      "model_forward_time": 0.11929631233215332,
      "step": 58175
    },
    {
      "epoch": 0.000355072021484375,
      "step": 58175,
      "training_step_time": 0.6514716148376465
    },
    {
      "epoch": 0.000355078125,
      "model_forward_time": 0.12314653396606445,
      "step": 58176
    },
    {
      "epoch": 0.000355078125,
      "step": 58176,
      "training_step_time": 0.6664943695068359
    },
    {
      "epoch": 0.000355084228515625,
      "model_forward_time": 0.11682343482971191,
      "step": 58177
    },
    {
      "epoch": 0.000355084228515625,
      "step": 58177,
      "training_step_time": 0.605879545211792
    },
    {
      "epoch": 0.00035509033203125,
      "model_forward_time": 0.12072181701660156,
      "step": 58178
    },
    {
      "epoch": 0.00035509033203125,
      "step": 58178,
      "training_step_time": 0.6795496940612793
    },
    {
      "epoch": 0.000355096435546875,
      "model_forward_time": 0.1221311092376709,
      "step": 58179
    },
    {
      "epoch": 0.000355096435546875,
      "step": 58179,
      "training_step_time": 0.6414964199066162
    },
    {
      "epoch": 0.0003551025390625,
      "grad_norm": 0.10283609479665756,
      "learning_rate": 2.51344059460995e-07,
      "loss": 0.0342,
      "step": 58180
    },
    {
      "epoch": 0.0003551025390625,
      "model_forward_time": 0.12194228172302246,
      "step": 58180
    },
    {
      "epoch": 0.0003551025390625,
      "step": 58180,
      "training_step_time": 0.632582426071167
    },
    {
      "epoch": 0.000355108642578125,
      "model_forward_time": 0.11966395378112793,
      "step": 58181
    },
    {
      "epoch": 0.000355108642578125,
      "step": 58181,
      "training_step_time": 0.633995771408081
    },
    {
      "epoch": 0.00035511474609375,
      "model_forward_time": 0.12927794456481934,
      "step": 58182
    },
    {
      "epoch": 0.00035511474609375,
      "step": 58182,
      "training_step_time": 0.6692969799041748
    },
    {
      "epoch": 0.000355120849609375,
      "model_forward_time": 0.1222374439239502,
      "step": 58183
    },
    {
      "epoch": 0.000355120849609375,
      "step": 58183,
      "training_step_time": 0.6713547706604004
    },
    {
      "epoch": 0.000355126953125,
      "model_forward_time": 0.11918377876281738,
      "step": 58184
    },
    {
      "epoch": 0.000355126953125,
      "step": 58184,
      "training_step_time": 0.6924099922180176
    },
    {
      "epoch": 0.000355133056640625,
      "model_forward_time": 0.12442636489868164,
      "step": 58185
    },
    {
      "epoch": 0.000355133056640625,
      "step": 58185,
      "training_step_time": 0.49886250495910645
    },
    {
      "epoch": 0.00035513916015625,
      "model_forward_time": 0.11769843101501465,
      "step": 58186
    },
    {
      "epoch": 0.00035513916015625,
      "step": 58186,
      "training_step_time": 0.44905853271484375
    },
    {
      "epoch": 0.000355145263671875,
      "model_forward_time": 0.11875486373901367,
      "step": 58187
    },
    {
      "epoch": 0.000355145263671875,
      "step": 58187,
      "training_step_time": 0.4732675552368164
    },
    {
      "epoch": 0.0003551513671875,
      "model_forward_time": 0.11778116226196289,
      "step": 58188
    },
    {
      "epoch": 0.0003551513671875,
      "step": 58188,
      "training_step_time": 0.4302232265472412
    },
    {
      "epoch": 0.000355157470703125,
      "model_forward_time": 0.11699557304382324,
      "step": 58189
    },
    {
      "epoch": 0.000355157470703125,
      "step": 58189,
      "training_step_time": 0.44591522216796875
    },
    {
      "epoch": 0.00035516357421875,
      "grad_norm": 0.1222366914153099,
      "learning_rate": 2.485919095231326e-07,
      "loss": 0.039,
      "step": 58190
    },
    {
      "epoch": 0.00035516357421875,
      "model_forward_time": 0.11678791046142578,
      "step": 58190
    },
    {
      "epoch": 0.00035516357421875,
      "step": 58190,
      "training_step_time": 0.39618992805480957
    },
    {
      "epoch": 0.000355169677734375,
      "model_forward_time": 0.11572718620300293,
      "step": 58191
    },
    {
      "epoch": 0.000355169677734375,
      "step": 58191,
      "training_step_time": 0.40927958488464355
    },
    {
      "epoch": 0.00035517578125,
      "model_forward_time": 0.11570858955383301,
      "step": 58192
    },
    {
      "epoch": 0.00035517578125,
      "step": 58192,
      "training_step_time": 0.4168403148651123
    },
    {
      "epoch": 0.000355181884765625,
      "model_forward_time": 0.1151731014251709,
      "step": 58193
    },
    {
      "epoch": 0.000355181884765625,
      "step": 58193,
      "training_step_time": 0.4104008674621582
    },
    {
      "epoch": 0.00035518798828125,
      "model_forward_time": 0.11542272567749023,
      "step": 58194
    },
    {
      "epoch": 0.00035518798828125,
      "step": 58194,
      "training_step_time": 0.4107472896575928
    },
    {
      "epoch": 0.000355194091796875,
      "model_forward_time": 0.1156620979309082,
      "step": 58195
    },
    {
      "epoch": 0.000355194091796875,
      "step": 58195,
      "training_step_time": 0.40779781341552734
    },
    {
      "epoch": 0.0003552001953125,
      "model_forward_time": 0.1151266098022461,
      "step": 58196
    },
    {
      "epoch": 0.0003552001953125,
      "step": 58196,
      "training_step_time": 0.43799519538879395
    },
    {
      "epoch": 0.000355206298828125,
      "model_forward_time": 0.1158895492553711,
      "step": 58197
    },
    {
      "epoch": 0.000355206298828125,
      "step": 58197,
      "training_step_time": 0.4564075469970703
    },
    {
      "epoch": 0.00035521240234375,
      "model_forward_time": 0.11492443084716797,
      "step": 58198
    },
    {
      "epoch": 0.00035521240234375,
      "step": 58198,
      "training_step_time": 0.5137770175933838
    },
    {
      "epoch": 0.000355218505859375,
      "model_forward_time": 0.11519193649291992,
      "step": 58199
    },
    {
      "epoch": 0.000355218505859375,
      "step": 58199,
      "training_step_time": 0.4615354537963867
    },
    {
      "epoch": 0.000355224609375,
      "grad_norm": 0.10028932988643646,
      "learning_rate": 2.458548727494292e-07,
      "loss": 0.0361,
      "step": 58200
    },
    {
      "epoch": 0.000355224609375,
      "model_forward_time": 0.11516356468200684,
      "step": 58200
    },
    {
      "epoch": 0.000355224609375,
      "step": 58200,
      "training_step_time": 0.3922755718231201
    },
    {
      "epoch": 0.000355230712890625,
      "model_forward_time": 0.11533641815185547,
      "step": 58201
    },
    {
      "epoch": 0.000355230712890625,
      "step": 58201,
      "training_step_time": 0.41597557067871094
    },
    {
      "epoch": 0.00035523681640625,
      "model_forward_time": 0.11565947532653809,
      "step": 58202
    },
    {
      "epoch": 0.00035523681640625,
      "step": 58202,
      "training_step_time": 0.3906991481781006
    },
    {
      "epoch": 0.000355242919921875,
      "model_forward_time": 0.11513113975524902,
      "step": 58203
    },
    {
      "epoch": 0.000355242919921875,
      "step": 58203,
      "training_step_time": 0.38420891761779785
    },
    {
      "epoch": 0.0003552490234375,
      "model_forward_time": 0.11505341529846191,
      "step": 58204
    },
    {
      "epoch": 0.0003552490234375,
      "step": 58204,
      "training_step_time": 0.4138474464416504
    },
    {
      "epoch": 0.000355255126953125,
      "model_forward_time": 0.11497044563293457,
      "step": 58205
    },
    {
      "epoch": 0.000355255126953125,
      "step": 58205,
      "training_step_time": 0.45864295959472656
    },
    {
      "epoch": 0.00035526123046875,
      "model_forward_time": 0.11470532417297363,
      "step": 58206
    },
    {
      "epoch": 0.00035526123046875,
      "step": 58206,
      "training_step_time": 0.4026038646697998
    },
    {
      "epoch": 0.000355267333984375,
      "model_forward_time": 0.11594867706298828,
      "step": 58207
    },
    {
      "epoch": 0.000355267333984375,
      "step": 58207,
      "training_step_time": 0.487626314163208
    },
    {
      "epoch": 0.0003552734375,
      "model_forward_time": 0.11532473564147949,
      "step": 58208
    },
    {
      "epoch": 0.0003552734375,
      "step": 58208,
      "training_step_time": 0.38896894454956055
    },
    {
      "epoch": 0.000355279541015625,
      "model_forward_time": 0.11532998085021973,
      "step": 58209
    },
    {
      "epoch": 0.000355279541015625,
      "step": 58209,
      "training_step_time": 0.4017825126647949
    },
    {
      "epoch": 0.00035528564453125,
      "grad_norm": 0.08018165826797485,
      "learning_rate": 2.4313294997134195e-07,
      "loss": 0.0354,
      "step": 58210
    },
    {
      "epoch": 0.00035528564453125,
      "model_forward_time": 0.11765861511230469,
      "step": 58210
    },
    {
      "epoch": 0.00035528564453125,
      "step": 58210,
      "training_step_time": 0.44931602478027344
    },
    {
      "epoch": 0.000355291748046875,
      "model_forward_time": 0.11466193199157715,
      "step": 58211
    },
    {
      "epoch": 0.000355291748046875,
      "step": 58211,
      "training_step_time": 0.3949294090270996
    },
    {
      "epoch": 0.0003552978515625,
      "model_forward_time": 0.11508297920227051,
      "step": 58212
    },
    {
      "epoch": 0.0003552978515625,
      "step": 58212,
      "training_step_time": 0.48802971839904785
    },
    {
      "epoch": 0.000355303955078125,
      "model_forward_time": 0.11630463600158691,
      "step": 58213
    },
    {
      "epoch": 0.000355303955078125,
      "step": 58213,
      "training_step_time": 0.396878719329834
    },
    {
      "epoch": 0.00035531005859375,
      "model_forward_time": 0.1154012680053711,
      "step": 58214
    },
    {
      "epoch": 0.00035531005859375,
      "step": 58214,
      "training_step_time": 0.4320945739746094
    },
    {
      "epoch": 0.000355316162109375,
      "model_forward_time": 0.11505794525146484,
      "step": 58215
    },
    {
      "epoch": 0.000355316162109375,
      "step": 58215,
      "training_step_time": 0.40862059593200684
    },
    {
      "epoch": 0.000355322265625,
      "model_forward_time": 0.11548924446105957,
      "step": 58216
    },
    {
      "epoch": 0.000355322265625,
      "step": 58216,
      "training_step_time": 0.46393418312072754
    },
    {
      "epoch": 0.000355328369140625,
      "model_forward_time": 0.11484336853027344,
      "step": 58217
    },
    {
      "epoch": 0.000355328369140625,
      "step": 58217,
      "training_step_time": 0.3997790813446045
    },
    {
      "epoch": 0.00035533447265625,
      "model_forward_time": 0.11467981338500977,
      "step": 58218
    },
    {
      "epoch": 0.00035533447265625,
      "step": 58218,
      "training_step_time": 0.402057409286499
    },
    {
      "epoch": 0.000355340576171875,
      "model_forward_time": 0.11537957191467285,
      "step": 58219
    },
    {
      "epoch": 0.000355340576171875,
      "step": 58219,
      "training_step_time": 0.39676928520202637
    },
    {
      "epoch": 0.0003553466796875,
      "grad_norm": 0.10210760682821274,
      "learning_rate": 2.404261420157039e-07,
      "loss": 0.0381,
      "step": 58220
    },
    {
      "epoch": 0.0003553466796875,
      "model_forward_time": 0.11461615562438965,
      "step": 58220
    },
    {
      "epoch": 0.0003553466796875,
      "step": 58220,
      "training_step_time": 0.5073902606964111
    },
    {
      "epoch": 0.000355352783203125,
      "model_forward_time": 0.11472892761230469,
      "step": 58221
    },
    {
      "epoch": 0.000355352783203125,
      "step": 58221,
      "training_step_time": 0.42126917839050293
    },
    {
      "epoch": 0.00035535888671875,
      "model_forward_time": 0.1146097183227539,
      "step": 58222
    },
    {
      "epoch": 0.00035535888671875,
      "step": 58222,
      "training_step_time": 0.3949470520019531
    },
    {
      "epoch": 0.000355364990234375,
      "model_forward_time": 0.11529755592346191,
      "step": 58223
    },
    {
      "epoch": 0.000355364990234375,
      "step": 58223,
      "training_step_time": 0.39528965950012207
    },
    {
      "epoch": 0.00035537109375,
      "model_forward_time": 0.11453962326049805,
      "step": 58224
    },
    {
      "epoch": 0.00035537109375,
      "step": 58224,
      "training_step_time": 0.3966639041900635
    },
    {
      "epoch": 0.000355377197265625,
      "model_forward_time": 0.11538457870483398,
      "step": 58225
    },
    {
      "epoch": 0.000355377197265625,
      "step": 58225,
      "training_step_time": 0.43910813331604004
    },
    {
      "epoch": 0.00035538330078125,
      "model_forward_time": 0.11528611183166504,
      "step": 58226
    },
    {
      "epoch": 0.00035538330078125,
      "step": 58226,
      "training_step_time": 0.3700873851776123
    },
    {
      "epoch": 0.000355389404296875,
      "model_forward_time": 0.11496758460998535,
      "step": 58227
    },
    {
      "epoch": 0.000355389404296875,
      "step": 58227,
      "training_step_time": 0.4870617389678955
    },
    {
      "epoch": 0.0003553955078125,
      "model_forward_time": 0.11497879028320312,
      "step": 58228
    },
    {
      "epoch": 0.0003553955078125,
      "step": 58228,
      "training_step_time": 0.39955997467041016
    },
    {
      "epoch": 0.000355401611328125,
      "model_forward_time": 0.11639094352722168,
      "step": 58229
    },
    {
      "epoch": 0.000355401611328125,
      "step": 58229,
      "training_step_time": 0.4266653060913086
    },
    {
      "epoch": 0.00035540771484375,
      "grad_norm": 0.06851174682378769,
      "learning_rate": 2.3773444970477955e-07,
      "loss": 0.0395,
      "step": 58230
    },
    {
      "epoch": 0.00035540771484375,
      "model_forward_time": 0.11450910568237305,
      "step": 58230
    },
    {
      "epoch": 0.00035540771484375,
      "step": 58230,
      "training_step_time": 0.3998558521270752
    },
    {
      "epoch": 0.000355413818359375,
      "model_forward_time": 0.11493277549743652,
      "step": 58231
    },
    {
      "epoch": 0.000355413818359375,
      "step": 58231,
      "training_step_time": 0.39293599128723145
    },
    {
      "epoch": 0.000355419921875,
      "model_forward_time": 0.11517071723937988,
      "step": 58232
    },
    {
      "epoch": 0.000355419921875,
      "step": 58232,
      "training_step_time": 0.4066283702850342
    },
    {
      "epoch": 0.000355426025390625,
      "model_forward_time": 0.11431193351745605,
      "step": 58233
    },
    {
      "epoch": 0.000355426025390625,
      "step": 58233,
      "training_step_time": 0.40505361557006836
    },
    {
      "epoch": 0.00035543212890625,
      "model_forward_time": 0.11533665657043457,
      "step": 58234
    },
    {
      "epoch": 0.00035543212890625,
      "step": 58234,
      "training_step_time": 0.4241156578063965
    },
    {
      "epoch": 0.000355438232421875,
      "model_forward_time": 0.11491250991821289,
      "step": 58235
    },
    {
      "epoch": 0.000355438232421875,
      "step": 58235,
      "training_step_time": 0.41194629669189453
    },
    {
      "epoch": 0.0003554443359375,
      "model_forward_time": 0.1154170036315918,
      "step": 58236
    },
    {
      "epoch": 0.0003554443359375,
      "step": 58236,
      "training_step_time": 0.44558024406433105
    },
    {
      "epoch": 0.000355450439453125,
      "model_forward_time": 0.1149754524230957,
      "step": 58237
    },
    {
      "epoch": 0.000355450439453125,
      "step": 58237,
      "training_step_time": 0.39723968505859375
    },
    {
      "epoch": 0.00035545654296875,
      "model_forward_time": 0.11537528038024902,
      "step": 58238
    },
    {
      "epoch": 0.00035545654296875,
      "step": 58238,
      "training_step_time": 0.3976588249206543
    },
    {
      "epoch": 0.000355462646484375,
      "model_forward_time": 0.11493802070617676,
      "step": 58239
    },
    {
      "epoch": 0.000355462646484375,
      "step": 58239,
      "training_step_time": 0.392711877822876
    },
    {
      "epoch": 0.00035546875,
      "grad_norm": 0.08876363933086395,
      "learning_rate": 2.3505787385623702e-07,
      "loss": 0.0406,
      "step": 58240
    },
    {
      "epoch": 0.00035546875,
      "model_forward_time": 0.11500787734985352,
      "step": 58240
    },
    {
      "epoch": 0.00035546875,
      "step": 58240,
      "training_step_time": 0.40993618965148926
    },
    {
      "epoch": 0.000355474853515625,
      "model_forward_time": 0.11545562744140625,
      "step": 58241
    },
    {
      "epoch": 0.000355474853515625,
      "step": 58241,
      "training_step_time": 0.38598012924194336
    },
    {
      "epoch": 0.00035548095703125,
      "model_forward_time": 0.11617040634155273,
      "step": 58242
    },
    {
      "epoch": 0.00035548095703125,
      "step": 58242,
      "training_step_time": 0.49459171295166016
    },
    {
      "epoch": 0.000355487060546875,
      "model_forward_time": 0.11468338966369629,
      "step": 58243
    },
    {
      "epoch": 0.000355487060546875,
      "step": 58243,
      "training_step_time": 0.4555976390838623
    },
    {
      "epoch": 0.0003554931640625,
      "model_forward_time": 0.11484193801879883,
      "step": 58244
    },
    {
      "epoch": 0.0003554931640625,
      "step": 58244,
      "training_step_time": 0.3940918445587158
    },
    {
      "epoch": 0.000355499267578125,
      "model_forward_time": 0.11531281471252441,
      "step": 58245
    },
    {
      "epoch": 0.000355499267578125,
      "step": 58245,
      "training_step_time": 0.39820146560668945
    },
    {
      "epoch": 0.00035550537109375,
      "model_forward_time": 0.11522579193115234,
      "step": 58246
    },
    {
      "epoch": 0.00035550537109375,
      "step": 58246,
      "training_step_time": 0.3923318386077881
    },
    {
      "epoch": 0.000355511474609375,
      "model_forward_time": 0.11496973037719727,
      "step": 58247
    },
    {
      "epoch": 0.000355511474609375,
      "step": 58247,
      "training_step_time": 0.39545249938964844
    },
    {
      "epoch": 0.000355517578125,
      "model_forward_time": 0.11512899398803711,
      "step": 58248
    },
    {
      "epoch": 0.000355517578125,
      "step": 58248,
      "training_step_time": 0.4033520221710205
    },
    {
      "epoch": 0.000355523681640625,
      "model_forward_time": 0.11475443840026855,
      "step": 58249
    },
    {
      "epoch": 0.000355523681640625,
      "step": 58249,
      "training_step_time": 0.4657166004180908
    },
    {
      "epoch": 0.00035552978515625,
      "grad_norm": 0.09831558912992477,
      "learning_rate": 2.323964152831426e-07,
      "loss": 0.0394,
      "step": 58250
    },
    {
      "epoch": 0.00035552978515625,
      "model_forward_time": 0.11477947235107422,
      "step": 58250
    },
    {
      "epoch": 0.00035552978515625,
      "step": 58250,
      "training_step_time": 0.41705989837646484
    },
    {
      "epoch": 0.000355535888671875,
      "model_forward_time": 0.11457037925720215,
      "step": 58251
    },
    {
      "epoch": 0.000355535888671875,
      "step": 58251,
      "training_step_time": 0.5265049934387207
    },
    {
      "epoch": 0.0003555419921875,
      "model_forward_time": 0.11563467979431152,
      "step": 58252
    },
    {
      "epoch": 0.0003555419921875,
      "step": 58252,
      "training_step_time": 0.3922562599182129
    },
    {
      "epoch": 0.000355548095703125,
      "model_forward_time": 0.11490058898925781,
      "step": 58253
    },
    {
      "epoch": 0.000355548095703125,
      "step": 58253,
      "training_step_time": 0.40525150299072266
    },
    {
      "epoch": 0.00035555419921875,
      "model_forward_time": 0.11516404151916504,
      "step": 58254
    },
    {
      "epoch": 0.00035555419921875,
      "step": 58254,
      "training_step_time": 0.41590070724487305
    },
    {
      "epoch": 0.000355560302734375,
      "model_forward_time": 0.1153879165649414,
      "step": 58255
    },
    {
      "epoch": 0.000355560302734375,
      "step": 58255,
      "training_step_time": 0.4725182056427002
    },
    {
      "epoch": 0.00035556640625,
      "model_forward_time": 0.11486649513244629,
      "step": 58256
    },
    {
      "epoch": 0.00035556640625,
      "step": 58256,
      "training_step_time": 0.41724729537963867
    },
    {
      "epoch": 0.000355572509765625,
      "model_forward_time": 0.11468696594238281,
      "step": 58257
    },
    {
      "epoch": 0.000355572509765625,
      "step": 58257,
      "training_step_time": 0.4961376190185547
    },
    {
      "epoch": 0.00035557861328125,
      "model_forward_time": 0.1152336597442627,
      "step": 58258
    },
    {
      "epoch": 0.00035557861328125,
      "step": 58258,
      "training_step_time": 0.3848073482513428
    },
    {
      "epoch": 0.000355584716796875,
      "model_forward_time": 0.1144254207611084,
      "step": 58259
    },
    {
      "epoch": 0.000355584716796875,
      "step": 58259,
      "training_step_time": 0.39507222175598145
    },
    {
      "epoch": 0.0003555908203125,
      "grad_norm": 0.09538467973470688,
      "learning_rate": 2.2975007479397738e-07,
      "loss": 0.0355,
      "step": 58260
    },
    {
      "epoch": 0.0003555908203125,
      "model_forward_time": 0.11452150344848633,
      "step": 58260
    },
    {
      "epoch": 0.0003555908203125,
      "step": 58260,
      "training_step_time": 0.39711976051330566
    },
    {
      "epoch": 0.000355596923828125,
      "model_forward_time": 0.11475777626037598,
      "step": 58261
    },
    {
      "epoch": 0.000355596923828125,
      "step": 58261,
      "training_step_time": 0.3957555294036865
    },
    {
      "epoch": 0.00035560302734375,
      "model_forward_time": 0.11451864242553711,
      "step": 58262
    },
    {
      "epoch": 0.00035560302734375,
      "step": 58262,
      "training_step_time": 0.39136266708374023
    },
    {
      "epoch": 0.000355609130859375,
      "model_forward_time": 0.11548924446105957,
      "step": 58263
    },
    {
      "epoch": 0.000355609130859375,
      "step": 58263,
      "training_step_time": 0.39408016204833984
    },
    {
      "epoch": 0.000355615234375,
      "model_forward_time": 0.11481833457946777,
      "step": 58264
    },
    {
      "epoch": 0.000355615234375,
      "step": 58264,
      "training_step_time": 0.41973066329956055
    },
    {
      "epoch": 0.000355621337890625,
      "model_forward_time": 0.11550569534301758,
      "step": 58265
    },
    {
      "epoch": 0.000355621337890625,
      "step": 58265,
      "training_step_time": 0.6353623867034912
    },
    {
      "epoch": 0.00035562744140625,
      "model_forward_time": 0.11468005180358887,
      "step": 58266
    },
    {
      "epoch": 0.00035562744140625,
      "step": 58266,
      "training_step_time": 0.39957165718078613
    },
    {
      "epoch": 0.000355633544921875,
      "model_forward_time": 0.11516523361206055,
      "step": 58267
    },
    {
      "epoch": 0.000355633544921875,
      "step": 58267,
      "training_step_time": 0.4004087448120117
    },
    {
      "epoch": 0.0003556396484375,
      "model_forward_time": 0.11480522155761719,
      "step": 58268
    },
    {
      "epoch": 0.0003556396484375,
      "step": 58268,
      "training_step_time": 0.3896040916442871
    },
    {
      "epoch": 0.000355645751953125,
      "model_forward_time": 0.11645245552062988,
      "step": 58269
    },
    {
      "epoch": 0.000355645751953125,
      "step": 58269,
      "training_step_time": 0.49596667289733887
    },
    {
      "epoch": 0.00035565185546875,
      "grad_norm": 0.09309439361095428,
      "learning_rate": 2.2711885319263714e-07,
      "loss": 0.0382,
      "step": 58270
    },
    {
      "epoch": 0.00035565185546875,
      "model_forward_time": 0.1151280403137207,
      "step": 58270
    },
    {
      "epoch": 0.00035565185546875,
      "step": 58270,
      "training_step_time": 0.4693636894226074
    },
    {
      "epoch": 0.000355657958984375,
      "model_forward_time": 0.11506247520446777,
      "step": 58271
    },
    {
      "epoch": 0.000355657958984375,
      "step": 58271,
      "training_step_time": 0.6687803268432617
    },
    {
      "epoch": 0.0003556640625,
      "model_forward_time": 0.11394405364990234,
      "step": 58272
    },
    {
      "epoch": 0.0003556640625,
      "step": 58272,
      "training_step_time": 0.3949146270751953
    },
    {
      "epoch": 0.000355670166015625,
      "model_forward_time": 0.11408758163452148,
      "step": 58273
    },
    {
      "epoch": 0.000355670166015625,
      "step": 58273,
      "training_step_time": 0.407196044921875
    },
    {
      "epoch": 0.00035567626953125,
      "model_forward_time": 0.11435794830322266,
      "step": 58274
    },
    {
      "epoch": 0.00035567626953125,
      "step": 58274,
      "training_step_time": 0.3993513584136963
    },
    {
      "epoch": 0.000355682373046875,
      "model_forward_time": 0.11417198181152344,
      "step": 58275
    },
    {
      "epoch": 0.000355682373046875,
      "step": 58275,
      "training_step_time": 0.38419532775878906
    },
    {
      "epoch": 0.0003556884765625,
      "model_forward_time": 0.11458659172058105,
      "step": 58276
    },
    {
      "epoch": 0.0003556884765625,
      "step": 58276,
      "training_step_time": 0.39353203773498535
    },
    {
      "epoch": 0.000355694580078125,
      "model_forward_time": 0.11492490768432617,
      "step": 58277
    },
    {
      "epoch": 0.000355694580078125,
      "step": 58277,
      "training_step_time": 0.5513615608215332
    },
    {
      "epoch": 0.00035570068359375,
      "model_forward_time": 0.1151878833770752,
      "step": 58278
    },
    {
      "epoch": 0.00035570068359375,
      "step": 58278,
      "training_step_time": 0.4078357219696045
    },
    {
      "epoch": 0.000355706787109375,
      "model_forward_time": 0.11491894721984863,
      "step": 58279
    },
    {
      "epoch": 0.000355706787109375,
      "step": 58279,
      "training_step_time": 0.46450042724609375
    },
    {
      "epoch": 0.000355712890625,
      "grad_norm": 0.10600805282592773,
      "learning_rate": 2.2450275127841036e-07,
      "loss": 0.0381,
      "step": 58280
    },
    {
      "epoch": 0.000355712890625,
      "model_forward_time": 0.1146385669708252,
      "step": 58280
    },
    {
      "epoch": 0.000355712890625,
      "step": 58280,
      "training_step_time": 0.38553380966186523
    },
    {
      "epoch": 0.000355718994140625,
      "model_forward_time": 0.11533069610595703,
      "step": 58281
    },
    {
      "epoch": 0.000355718994140625,
      "step": 58281,
      "training_step_time": 0.3894462585449219
    },
    {
      "epoch": 0.00035572509765625,
      "model_forward_time": 0.11589241027832031,
      "step": 58282
    },
    {
      "epoch": 0.00035572509765625,
      "step": 58282,
      "training_step_time": 0.39667844772338867
    },
    {
      "epoch": 0.000355731201171875,
      "model_forward_time": 0.11441659927368164,
      "step": 58283
    },
    {
      "epoch": 0.000355731201171875,
      "step": 58283,
      "training_step_time": 0.6084332466125488
    },
    {
      "epoch": 0.0003557373046875,
      "model_forward_time": 0.11505603790283203,
      "step": 58284
    },
    {
      "epoch": 0.0003557373046875,
      "step": 58284,
      "training_step_time": 0.4846668243408203
    },
    {
      "epoch": 0.000355743408203125,
      "model_forward_time": 0.11474800109863281,
      "step": 58285
    },
    {
      "epoch": 0.000355743408203125,
      "step": 58285,
      "training_step_time": 0.48958301544189453
    },
    {
      "epoch": 0.00035574951171875,
      "model_forward_time": 0.11439132690429688,
      "step": 58286
    },
    {
      "epoch": 0.00035574951171875,
      "step": 58286,
      "training_step_time": 0.3884444236755371
    },
    {
      "epoch": 0.000355755615234375,
      "model_forward_time": 0.11494922637939453,
      "step": 58287
    },
    {
      "epoch": 0.000355755615234375,
      "step": 58287,
      "training_step_time": 0.3869965076446533
    },
    {
      "epoch": 0.00035576171875,
      "model_forward_time": 0.11396002769470215,
      "step": 58288
    },
    {
      "epoch": 0.00035576171875,
      "step": 58288,
      "training_step_time": 0.3837556838989258
    },
    {
      "epoch": 0.000355767822265625,
      "model_forward_time": 0.1151273250579834,
      "step": 58289
    },
    {
      "epoch": 0.000355767822265625,
      "step": 58289,
      "training_step_time": 0.4266483783721924
    },
    {
      "epoch": 0.00035577392578125,
      "grad_norm": 0.11777837574481964,
      "learning_rate": 2.219017698460002e-07,
      "loss": 0.041,
      "step": 58290
    },
    {
      "epoch": 0.00035577392578125,
      "model_forward_time": 0.1153569221496582,
      "step": 58290
    },
    {
      "epoch": 0.00035577392578125,
      "step": 58290,
      "training_step_time": 0.4075760841369629
    },
    {
      "epoch": 0.000355780029296875,
      "model_forward_time": 0.11608409881591797,
      "step": 58291
    },
    {
      "epoch": 0.000355780029296875,
      "step": 58291,
      "training_step_time": 0.45417118072509766
    },
    {
      "epoch": 0.0003557861328125,
      "model_forward_time": 0.11467671394348145,
      "step": 58292
    },
    {
      "epoch": 0.0003557861328125,
      "step": 58292,
      "training_step_time": 0.43347787857055664
    },
    {
      "epoch": 0.000355792236328125,
      "model_forward_time": 0.11437869071960449,
      "step": 58293
    },
    {
      "epoch": 0.000355792236328125,
      "step": 58293,
      "training_step_time": 0.38929224014282227
    },
    {
      "epoch": 0.00035579833984375,
      "model_forward_time": 0.11511397361755371,
      "step": 58294
    },
    {
      "epoch": 0.00035579833984375,
      "step": 58294,
      "training_step_time": 0.40057945251464844
    },
    {
      "epoch": 0.000355804443359375,
      "model_forward_time": 0.11475682258605957,
      "step": 58295
    },
    {
      "epoch": 0.000355804443359375,
      "step": 58295,
      "training_step_time": 0.5271942615509033
    },
    {
      "epoch": 0.000355810546875,
      "model_forward_time": 0.11490631103515625,
      "step": 58296
    },
    {
      "epoch": 0.000355810546875,
      "step": 58296,
      "training_step_time": 0.42742276191711426
    },
    {
      "epoch": 0.000355816650390625,
      "model_forward_time": 0.11428070068359375,
      "step": 58297
    },
    {
      "epoch": 0.000355816650390625,
      "step": 58297,
      "training_step_time": 0.43962645530700684
    },
    {
      "epoch": 0.00035582275390625,
      "model_forward_time": 0.11464500427246094,
      "step": 58298
    },
    {
      "epoch": 0.00035582275390625,
      "step": 58298,
      "training_step_time": 0.45533227920532227
    },
    {
      "epoch": 0.000355828857421875,
      "model_forward_time": 0.11473679542541504,
      "step": 58299
    },
    {
      "epoch": 0.000355828857421875,
      "step": 58299,
      "training_step_time": 0.610051155090332
    },
    {
      "epoch": 0.0003558349609375,
      "grad_norm": 0.07969437539577484,
      "learning_rate": 2.1931590968551908e-07,
      "loss": 0.034,
      "step": 58300
    },
    {
      "epoch": 0.0003558349609375,
      "model_forward_time": 0.11470246315002441,
      "step": 58300
    },
    {
      "epoch": 0.0003558349609375,
      "step": 58300,
      "training_step_time": 0.38521575927734375
    },
    {
      "epoch": 0.000355841064453125,
      "model_forward_time": 0.11443424224853516,
      "step": 58301
    },
    {
      "epoch": 0.000355841064453125,
      "step": 58301,
      "training_step_time": 0.3848702907562256
    },
    {
      "epoch": 0.00035584716796875,
      "model_forward_time": 0.11421036720275879,
      "step": 58302
    },
    {
      "epoch": 0.00035584716796875,
      "step": 58302,
      "training_step_time": 0.39331912994384766
    },
    {
      "epoch": 0.000355853271484375,
      "model_forward_time": 0.11528348922729492,
      "step": 58303
    },
    {
      "epoch": 0.000355853271484375,
      "step": 58303,
      "training_step_time": 0.39644718170166016
    },
    {
      "epoch": 0.000355859375,
      "model_forward_time": 0.11458134651184082,
      "step": 58304
    },
    {
      "epoch": 0.000355859375,
      "step": 58304,
      "training_step_time": 0.3989682197570801
    },
    {
      "epoch": 0.000355865478515625,
      "model_forward_time": 0.11559414863586426,
      "step": 58305
    },
    {
      "epoch": 0.000355865478515625,
      "step": 58305,
      "training_step_time": 0.4718918800354004
    },
    {
      "epoch": 0.00035587158203125,
      "model_forward_time": 0.11484766006469727,
      "step": 58306
    },
    {
      "epoch": 0.00035587158203125,
      "step": 58306,
      "training_step_time": 0.396801233291626
    },
    {
      "epoch": 0.000355877685546875,
      "model_forward_time": 0.11509561538696289,
      "step": 58307
    },
    {
      "epoch": 0.000355877685546875,
      "step": 58307,
      "training_step_time": 0.5524873733520508
    },
    {
      "epoch": 0.0003558837890625,
      "model_forward_time": 0.11460471153259277,
      "step": 58308
    },
    {
      "epoch": 0.0003558837890625,
      "step": 58308,
      "training_step_time": 0.4441871643066406
    },
    {
      "epoch": 0.000355889892578125,
      "model_forward_time": 0.11478638648986816,
      "step": 58309
    },
    {
      "epoch": 0.000355889892578125,
      "step": 58309,
      "training_step_time": 0.41127610206604004
    },
    {
      "epoch": 0.00035589599609375,
      "grad_norm": 0.10979656875133514,
      "learning_rate": 2.1674517158248308e-07,
      "loss": 0.0415,
      "step": 58310
    },
    {
      "epoch": 0.00035589599609375,
      "model_forward_time": 0.11503386497497559,
      "step": 58310
    },
    {
      "epoch": 0.00035589599609375,
      "step": 58310,
      "training_step_time": 0.39307379722595215
    },
    {
      "epoch": 0.000355902099609375,
      "model_forward_time": 0.11464238166809082,
      "step": 58311
    },
    {
      "epoch": 0.000355902099609375,
      "step": 58311,
      "training_step_time": 0.3652536869049072
    },
    {
      "epoch": 0.000355908203125,
      "model_forward_time": 0.11428093910217285,
      "step": 58312
    },
    {
      "epoch": 0.000355908203125,
      "step": 58312,
      "training_step_time": 0.4154384136199951
    },
    {
      "epoch": 0.000355914306640625,
      "model_forward_time": 0.11515331268310547,
      "step": 58313
    },
    {
      "epoch": 0.000355914306640625,
      "step": 58313,
      "training_step_time": 0.5430057048797607
    },
    {
      "epoch": 0.00035592041015625,
      "model_forward_time": 0.11493158340454102,
      "step": 58314
    },
    {
      "epoch": 0.00035592041015625,
      "step": 58314,
      "training_step_time": 0.40190577507019043
    },
    {
      "epoch": 0.000355926513671875,
      "model_forward_time": 0.11494755744934082,
      "step": 58315
    },
    {
      "epoch": 0.000355926513671875,
      "step": 58315,
      "training_step_time": 0.3933241367340088
    },
    {
      "epoch": 0.0003559326171875,
      "model_forward_time": 0.11451244354248047,
      "step": 58316
    },
    {
      "epoch": 0.0003559326171875,
      "step": 58316,
      "training_step_time": 0.39215755462646484
    },
    {
      "epoch": 0.000355938720703125,
      "model_forward_time": 0.11475992202758789,
      "step": 58317
    },
    {
      "epoch": 0.000355938720703125,
      "step": 58317,
      "training_step_time": 0.3842322826385498
    },
    {
      "epoch": 0.00035594482421875,
      "model_forward_time": 0.11524248123168945,
      "step": 58318
    },
    {
      "epoch": 0.00035594482421875,
      "step": 58318,
      "training_step_time": 0.38289952278137207
    },
    {
      "epoch": 0.000355950927734375,
      "model_forward_time": 0.11441898345947266,
      "step": 58319
    },
    {
      "epoch": 0.000355950927734375,
      "step": 58319,
      "training_step_time": 0.6383423805236816
    },
    {
      "epoch": 0.00035595703125,
      "grad_norm": 0.09702575951814651,
      "learning_rate": 2.1418955631781202e-07,
      "loss": 0.038,
      "step": 58320
    },
    {
      "epoch": 0.00035595703125,
      "model_forward_time": 0.11467909812927246,
      "step": 58320
    },
    {
      "epoch": 0.00035595703125,
      "step": 58320,
      "training_step_time": 0.40464234352111816
    },
    {
      "epoch": 0.000355963134765625,
      "model_forward_time": 0.11405754089355469,
      "step": 58321
    },
    {
      "epoch": 0.000355963134765625,
      "step": 58321,
      "training_step_time": 0.44594621658325195
    },
    {
      "epoch": 0.00035596923828125,
      "model_forward_time": 0.1143043041229248,
      "step": 58322
    },
    {
      "epoch": 0.00035596923828125,
      "step": 58322,
      "training_step_time": 0.43195080757141113
    },
    {
      "epoch": 0.000355975341796875,
      "model_forward_time": 0.11478018760681152,
      "step": 58323
    },
    {
      "epoch": 0.000355975341796875,
      "step": 58323,
      "training_step_time": 0.39289116859436035
    },
    {
      "epoch": 0.0003559814453125,
      "model_forward_time": 0.11429286003112793,
      "step": 58324
    },
    {
      "epoch": 0.0003559814453125,
      "step": 58324,
      "training_step_time": 0.3975107669830322
    },
    {
      "epoch": 0.000355987548828125,
      "model_forward_time": 0.11539888381958008,
      "step": 58325
    },
    {
      "epoch": 0.000355987548828125,
      "step": 58325,
      "training_step_time": 0.5813095569610596
    },
    {
      "epoch": 0.00035599365234375,
      "model_forward_time": 0.11495471000671387,
      "step": 58326
    },
    {
      "epoch": 0.00035599365234375,
      "step": 58326,
      "training_step_time": 0.4183189868927002
    },
    {
      "epoch": 0.000355999755859375,
      "model_forward_time": 0.11489486694335938,
      "step": 58327
    },
    {
      "epoch": 0.000355999755859375,
      "step": 58327,
      "training_step_time": 0.42357945442199707
    },
    {
      "epoch": 0.000356005859375,
      "model_forward_time": 0.11460518836975098,
      "step": 58328
    },
    {
      "epoch": 0.000356005859375,
      "step": 58328,
      "training_step_time": 0.4017910957336426
    },
    {
      "epoch": 0.000356011962890625,
      "model_forward_time": 0.11522531509399414,
      "step": 58329
    },
    {
      "epoch": 0.000356011962890625,
      "step": 58329,
      "training_step_time": 0.39151835441589355
    },
    {
      "epoch": 0.00035601806640625,
      "grad_norm": 0.10598193109035492,
      "learning_rate": 2.1164906466783485e-07,
      "loss": 0.0359,
      "step": 58330
    },
    {
      "epoch": 0.00035601806640625,
      "model_forward_time": 0.11470413208007812,
      "step": 58330
    },
    {
      "epoch": 0.00035601806640625,
      "step": 58330,
      "training_step_time": 0.3896169662475586
    },
    {
      "epoch": 0.000356024169921875,
      "model_forward_time": 0.11450409889221191,
      "step": 58331
    },
    {
      "epoch": 0.000356024169921875,
      "step": 58331,
      "training_step_time": 0.6919982433319092
    },
    {
      "epoch": 0.0003560302734375,
      "model_forward_time": 0.11449551582336426,
      "step": 58332
    },
    {
      "epoch": 0.0003560302734375,
      "step": 58332,
      "training_step_time": 0.38663363456726074
    },
    {
      "epoch": 0.000356036376953125,
      "model_forward_time": 0.11414718627929688,
      "step": 58333
    },
    {
      "epoch": 0.000356036376953125,
      "step": 58333,
      "training_step_time": 0.4326155185699463
    },
    {
      "epoch": 0.00035604248046875,
      "model_forward_time": 0.11468338966369629,
      "step": 58334
    },
    {
      "epoch": 0.00035604248046875,
      "step": 58334,
      "training_step_time": 0.4856588840484619
    },
    {
      "epoch": 0.000356048583984375,
      "model_forward_time": 0.11425924301147461,
      "step": 58335
    },
    {
      "epoch": 0.000356048583984375,
      "step": 58335,
      "training_step_time": 0.4170646667480469
    },
    {
      "epoch": 0.0003560546875,
      "model_forward_time": 0.11457657814025879,
      "step": 58336
    },
    {
      "epoch": 0.0003560546875,
      "step": 58336,
      "training_step_time": 0.3945026397705078
    },
    {
      "epoch": 0.000356060791015625,
      "model_forward_time": 0.11456036567687988,
      "step": 58337
    },
    {
      "epoch": 0.000356060791015625,
      "step": 58337,
      "training_step_time": 0.46280932426452637
    },
    {
      "epoch": 0.00035606689453125,
      "model_forward_time": 0.11493253707885742,
      "step": 58338
    },
    {
      "epoch": 0.00035606689453125,
      "step": 58338,
      "training_step_time": 0.3911170959472656
    },
    {
      "epoch": 0.000356072998046875,
      "model_forward_time": 0.11442303657531738,
      "step": 58339
    },
    {
      "epoch": 0.000356072998046875,
      "step": 58339,
      "training_step_time": 0.3650825023651123
    },
    {
      "epoch": 0.0003560791015625,
      "grad_norm": 0.09509184956550598,
      "learning_rate": 2.0912369740428983e-07,
      "loss": 0.0365,
      "step": 58340
    },
    {
      "epoch": 0.0003560791015625,
      "model_forward_time": 0.1147468090057373,
      "step": 58340
    },
    {
      "epoch": 0.0003560791015625,
      "step": 58340,
      "training_step_time": 0.4515979290008545
    },
    {
      "epoch": 0.000356085205078125,
      "model_forward_time": 0.11520147323608398,
      "step": 58341
    },
    {
      "epoch": 0.000356085205078125,
      "step": 58341,
      "training_step_time": 0.3996467590332031
    },
    {
      "epoch": 0.00035609130859375,
      "model_forward_time": 0.11495423316955566,
      "step": 58342
    },
    {
      "epoch": 0.00035609130859375,
      "step": 58342,
      "training_step_time": 0.38642263412475586
    },
    {
      "epoch": 0.000356097412109375,
      "model_forward_time": 0.11585187911987305,
      "step": 58343
    },
    {
      "epoch": 0.000356097412109375,
      "step": 58343,
      "training_step_time": 0.5215318202972412
    },
    {
      "epoch": 0.000356103515625,
      "model_forward_time": 0.11502647399902344,
      "step": 58344
    },
    {
      "epoch": 0.000356103515625,
      "step": 58344,
      "training_step_time": 0.4025123119354248
    },
    {
      "epoch": 0.000356109619140625,
      "model_forward_time": 0.11477160453796387,
      "step": 58345
    },
    {
      "epoch": 0.000356109619140625,
      "step": 58345,
      "training_step_time": 0.40073680877685547
    },
    {
      "epoch": 0.00035611572265625,
      "model_forward_time": 0.11604881286621094,
      "step": 58346
    },
    {
      "epoch": 0.00035611572265625,
      "step": 58346,
      "training_step_time": 0.38782453536987305
    },
    {
      "epoch": 0.000356121826171875,
      "model_forward_time": 0.11535954475402832,
      "step": 58347
    },
    {
      "epoch": 0.000356121826171875,
      "step": 58347,
      "training_step_time": 0.48664021492004395
    },
    {
      "epoch": 0.0003561279296875,
      "model_forward_time": 0.1148686408996582,
      "step": 58348
    },
    {
      "epoch": 0.0003561279296875,
      "step": 58348,
      "training_step_time": 0.38866472244262695
    },
    {
      "epoch": 0.000356134033203125,
      "model_forward_time": 0.11475634574890137,
      "step": 58349
    },
    {
      "epoch": 0.000356134033203125,
      "step": 58349,
      "training_step_time": 0.559293270111084
    },
    {
      "epoch": 0.00035614013671875,
      "grad_norm": 0.08494256436824799,
      "learning_rate": 2.0661345529430775e-07,
      "loss": 0.0393,
      "step": 58350
    },
    {
      "epoch": 0.00035614013671875,
      "model_forward_time": 0.1143808364868164,
      "step": 58350
    },
    {
      "epoch": 0.00035614013671875,
      "step": 58350,
      "training_step_time": 0.41841554641723633
    },
    {
      "epoch": 0.000356146240234375,
      "model_forward_time": 0.11484503746032715,
      "step": 58351
    },
    {
      "epoch": 0.000356146240234375,
      "step": 58351,
      "training_step_time": 0.4011046886444092
    },
    {
      "epoch": 0.00035615234375,
      "model_forward_time": 0.1146383285522461,
      "step": 58352
    },
    {
      "epoch": 0.00035615234375,
      "step": 58352,
      "training_step_time": 0.39670705795288086
    },
    {
      "epoch": 0.000356158447265625,
      "model_forward_time": 0.11450433731079102,
      "step": 58353
    },
    {
      "epoch": 0.000356158447265625,
      "step": 58353,
      "training_step_time": 0.46302151679992676
    },
    {
      "epoch": 0.00035616455078125,
      "model_forward_time": 0.1153569221496582,
      "step": 58354
    },
    {
      "epoch": 0.00035616455078125,
      "step": 58354,
      "training_step_time": 0.4329526424407959
    },
    {
      "epoch": 0.000356170654296875,
      "model_forward_time": 0.11558341979980469,
      "step": 58355
    },
    {
      "epoch": 0.000356170654296875,
      "step": 58355,
      "training_step_time": 0.5347611904144287
    },
    {
      "epoch": 0.0003561767578125,
      "model_forward_time": 0.11561846733093262,
      "step": 58356
    },
    {
      "epoch": 0.0003561767578125,
      "step": 58356,
      "training_step_time": 0.4540681838989258
    },
    {
      "epoch": 0.000356182861328125,
      "model_forward_time": 0.1147298812866211,
      "step": 58357
    },
    {
      "epoch": 0.000356182861328125,
      "step": 58357,
      "training_step_time": 0.3887803554534912
    },
    {
      "epoch": 0.00035618896484375,
      "model_forward_time": 0.11468982696533203,
      "step": 58358
    },
    {
      "epoch": 0.00035618896484375,
      "step": 58358,
      "training_step_time": 0.3844485282897949
    },
    {
      "epoch": 0.000356195068359375,
      "model_forward_time": 0.11518549919128418,
      "step": 58359
    },
    {
      "epoch": 0.000356195068359375,
      "step": 58359,
      "training_step_time": 0.3931562900543213
    },
    {
      "epoch": 0.000356201171875,
      "grad_norm": 0.07970720529556274,
      "learning_rate": 2.041183391004453e-07,
      "loss": 0.0406,
      "step": 58360
    },
    {
      "epoch": 0.000356201171875,
      "model_forward_time": 0.11475801467895508,
      "step": 58360
    },
    {
      "epoch": 0.000356201171875,
      "step": 58360,
      "training_step_time": 0.3848123550415039
    },
    {
      "epoch": 0.000356207275390625,
      "model_forward_time": 0.11521482467651367,
      "step": 58361
    },
    {
      "epoch": 0.000356207275390625,
      "step": 58361,
      "training_step_time": 0.5271015167236328
    },
    {
      "epoch": 0.00035621337890625,
      "model_forward_time": 0.11551809310913086,
      "step": 58362
    },
    {
      "epoch": 0.00035621337890625,
      "step": 58362,
      "training_step_time": 0.40489959716796875
    },
    {
      "epoch": 0.000356219482421875,
      "model_forward_time": 0.11501622200012207,
      "step": 58363
    },
    {
      "epoch": 0.000356219482421875,
      "step": 58363,
      "training_step_time": 0.42449951171875
    },
    {
      "epoch": 0.0003562255859375,
      "model_forward_time": 0.11605668067932129,
      "step": 58364
    },
    {
      "epoch": 0.0003562255859375,
      "step": 58364,
      "training_step_time": 0.38831591606140137
    },
    {
      "epoch": 0.000356231689453125,
      "model_forward_time": 0.11548852920532227,
      "step": 58365
    },
    {
      "epoch": 0.000356231689453125,
      "step": 58365,
      "training_step_time": 0.3903043270111084
    },
    {
      "epoch": 0.00035623779296875,
      "model_forward_time": 0.11573171615600586,
      "step": 58366
    },
    {
      "epoch": 0.00035623779296875,
      "step": 58366,
      "training_step_time": 0.3904435634613037
    },
    {
      "epoch": 0.000356243896484375,
      "model_forward_time": 0.11501193046569824,
      "step": 58367
    },
    {
      "epoch": 0.000356243896484375,
      "step": 58367,
      "training_step_time": 0.6499841213226318
    },
    {
      "epoch": 0.00035625,
      "model_forward_time": 0.11519432067871094,
      "step": 58368
    },
    {
      "epoch": 0.00035625,
      "step": 58368,
      "training_step_time": 0.44068384170532227
    },
    {
      "epoch": 0.000356256103515625,
      "model_forward_time": 0.11493277549743652,
      "step": 58369
    },
    {
      "epoch": 0.000356256103515625,
      "step": 58369,
      "training_step_time": 0.467543363571167
    },
    {
      "epoch": 0.00035626220703125,
      "grad_norm": 0.11402639746665955,
      "learning_rate": 2.0163834958064065e-07,
      "loss": 0.0344,
      "step": 58370
    },
    {
      "epoch": 0.00035626220703125,
      "model_forward_time": 0.11452198028564453,
      "step": 58370
    },
    {
      "epoch": 0.00035626220703125,
      "step": 58370,
      "training_step_time": 0.41477441787719727
    },
    {
      "epoch": 0.000356268310546875,
      "model_forward_time": 0.11443495750427246,
      "step": 58371
    },
    {
      "epoch": 0.000356268310546875,
      "step": 58371,
      "training_step_time": 0.3829684257507324
    },
    {
      "epoch": 0.0003562744140625,
      "model_forward_time": 0.11580705642700195,
      "step": 58372
    },
    {
      "epoch": 0.0003562744140625,
      "step": 58372,
      "training_step_time": 0.37879133224487305
    },
    {
      "epoch": 0.000356280517578125,
      "model_forward_time": 0.11480045318603516,
      "step": 58373
    },
    {
      "epoch": 0.000356280517578125,
      "step": 58373,
      "training_step_time": 0.4845585823059082
    },
    {
      "epoch": 0.00035628662109375,
      "model_forward_time": 0.11512446403503418,
      "step": 58374
    },
    {
      "epoch": 0.00035628662109375,
      "step": 58374,
      "training_step_time": 0.40444397926330566
    },
    {
      "epoch": 0.000356292724609375,
      "model_forward_time": 0.11487603187561035,
      "step": 58375
    },
    {
      "epoch": 0.000356292724609375,
      "step": 58375,
      "training_step_time": 0.45534610748291016
    },
    {
      "epoch": 0.000356298828125,
      "model_forward_time": 0.11578512191772461,
      "step": 58376
    },
    {
      "epoch": 0.000356298828125,
      "step": 58376,
      "training_step_time": 0.47204113006591797
    },
    {
      "epoch": 0.000356304931640625,
      "model_forward_time": 0.11471843719482422,
      "step": 58377
    },
    {
      "epoch": 0.000356304931640625,
      "step": 58377,
      "training_step_time": 0.4031980037689209
    },
    {
      "epoch": 0.00035631103515625,
      "model_forward_time": 0.11437034606933594,
      "step": 58378
    },
    {
      "epoch": 0.00035631103515625,
      "step": 58378,
      "training_step_time": 0.3992033004760742
    },
    {
      "epoch": 0.000356317138671875,
      "model_forward_time": 0.1155397891998291,
      "step": 58379
    },
    {
      "epoch": 0.000356317138671875,
      "step": 58379,
      "training_step_time": 0.4975011348724365
    },
    {
      "epoch": 0.0003563232421875,
      "grad_norm": 0.10826215893030167,
      "learning_rate": 1.9917348748826335e-07,
      "loss": 0.0359,
      "step": 58380
    },
    {
      "epoch": 0.0003563232421875,
      "model_forward_time": 0.11466574668884277,
      "step": 58380
    },
    {
      "epoch": 0.0003563232421875,
      "step": 58380,
      "training_step_time": 0.4005429744720459
    },
    {
      "epoch": 0.000356329345703125,
      "model_forward_time": 0.11515426635742188,
      "step": 58381
    },
    {
      "epoch": 0.000356329345703125,
      "step": 58381,
      "training_step_time": 0.40920424461364746
    },
    {
      "epoch": 0.00035633544921875,
      "model_forward_time": 0.1149141788482666,
      "step": 58382
    },
    {
      "epoch": 0.00035633544921875,
      "step": 58382,
      "training_step_time": 0.42040371894836426
    },
    {
      "epoch": 0.000356341552734375,
      "model_forward_time": 0.11478137969970703,
      "step": 58383
    },
    {
      "epoch": 0.000356341552734375,
      "step": 58383,
      "training_step_time": 0.47484421730041504
    },
    {
      "epoch": 0.00035634765625,
      "model_forward_time": 0.1146843433380127,
      "step": 58384
    },
    {
      "epoch": 0.00035634765625,
      "step": 58384,
      "training_step_time": 0.43833065032958984
    },
    {
      "epoch": 0.000356353759765625,
      "model_forward_time": 0.11517143249511719,
      "step": 58385
    },
    {
      "epoch": 0.000356353759765625,
      "step": 58385,
      "training_step_time": 0.40450024604797363
    },
    {
      "epoch": 0.00035635986328125,
      "model_forward_time": 0.11478304862976074,
      "step": 58386
    },
    {
      "epoch": 0.00035635986328125,
      "step": 58386,
      "training_step_time": 0.3957645893096924
    },
    {
      "epoch": 0.000356365966796875,
      "model_forward_time": 0.11526751518249512,
      "step": 58387
    },
    {
      "epoch": 0.000356365966796875,
      "step": 58387,
      "training_step_time": 0.3913438320159912
    },
    {
      "epoch": 0.0003563720703125,
      "model_forward_time": 0.11485958099365234,
      "step": 58388
    },
    {
      "epoch": 0.0003563720703125,
      "step": 58388,
      "training_step_time": 0.4437077045440674
    },
    {
      "epoch": 0.000356378173828125,
      "model_forward_time": 0.1150655746459961,
      "step": 58389
    },
    {
      "epoch": 0.000356378173828125,
      "step": 58389,
      "training_step_time": 0.41114115715026855
    },
    {
      "epoch": 0.00035638427734375,
      "grad_norm": 0.09497760981321335,
      "learning_rate": 1.9672375357206452e-07,
      "loss": 0.0342,
      "step": 58390
    },
    {
      "epoch": 0.00035638427734375,
      "model_forward_time": 0.11487698554992676,
      "step": 58390
    },
    {
      "epoch": 0.00035638427734375,
      "step": 58390,
      "training_step_time": 0.43360352516174316
    },
    {
      "epoch": 0.000356390380859375,
      "model_forward_time": 0.11475372314453125,
      "step": 58391
    },
    {
      "epoch": 0.000356390380859375,
      "step": 58391,
      "training_step_time": 0.6558878421783447
    },
    {
      "epoch": 0.000356396484375,
      "model_forward_time": 0.1151423454284668,
      "step": 58392
    },
    {
      "epoch": 0.000356396484375,
      "step": 58392,
      "training_step_time": 0.3965919017791748
    },
    {
      "epoch": 0.000356402587890625,
      "model_forward_time": 0.11397051811218262,
      "step": 58393
    },
    {
      "epoch": 0.000356402587890625,
      "step": 58393,
      "training_step_time": 0.395449161529541
    },
    {
      "epoch": 0.00035640869140625,
      "model_forward_time": 0.11456418037414551,
      "step": 58394
    },
    {
      "epoch": 0.00035640869140625,
      "step": 58394,
      "training_step_time": 0.3931691646575928
    },
    {
      "epoch": 0.000356414794921875,
      "model_forward_time": 0.1149599552154541,
      "step": 58395
    },
    {
      "epoch": 0.000356414794921875,
      "step": 58395,
      "training_step_time": 0.42385101318359375
    },
    {
      "epoch": 0.0003564208984375,
      "model_forward_time": 0.11449694633483887,
      "step": 58396
    },
    {
      "epoch": 0.0003564208984375,
      "step": 58396,
      "training_step_time": 0.36287665367126465
    },
    {
      "epoch": 0.000356427001953125,
      "model_forward_time": 0.11510777473449707,
      "step": 58397
    },
    {
      "epoch": 0.000356427001953125,
      "step": 58397,
      "training_step_time": 0.46268677711486816
    },
    {
      "epoch": 0.00035643310546875,
      "model_forward_time": 0.11549854278564453,
      "step": 58398
    },
    {
      "epoch": 0.00035643310546875,
      "step": 58398,
      "training_step_time": 0.3999326229095459
    },
    {
      "epoch": 0.000356439208984375,
      "model_forward_time": 0.11463356018066406,
      "step": 58399
    },
    {
      "epoch": 0.000356439208984375,
      "step": 58399,
      "training_step_time": 0.4082152843475342
    },
    {
      "epoch": 0.0003564453125,
      "grad_norm": 0.09371323138475418,
      "learning_rate": 1.942891485762044e-07,
      "loss": 0.0345,
      "step": 58400
    },
    {
      "epoch": 0.0003564453125,
      "model_forward_time": 0.11519217491149902,
      "step": 58400
    },
    {
      "epoch": 0.0003564453125,
      "step": 58400,
      "training_step_time": 0.40091991424560547
    },
    {
      "epoch": 0.000356451416015625,
      "model_forward_time": 0.11488986015319824,
      "step": 58401
    },
    {
      "epoch": 0.000356451416015625,
      "step": 58401,
      "training_step_time": 0.38918185234069824
    },
    {
      "epoch": 0.00035645751953125,
      "model_forward_time": 0.11552691459655762,
      "step": 58402
    },
    {
      "epoch": 0.00035645751953125,
      "step": 58402,
      "training_step_time": 0.40493011474609375
    },
    {
      "epoch": 0.000356463623046875,
      "model_forward_time": 0.11501455307006836,
      "step": 58403
    },
    {
      "epoch": 0.000356463623046875,
      "step": 58403,
      "training_step_time": 0.5741968154907227
    },
    {
      "epoch": 0.0003564697265625,
      "model_forward_time": 0.11492562294006348,
      "step": 58404
    },
    {
      "epoch": 0.0003564697265625,
      "step": 58404,
      "training_step_time": 0.493802547454834
    },
    {
      "epoch": 0.000356475830078125,
      "model_forward_time": 0.11487841606140137,
      "step": 58405
    },
    {
      "epoch": 0.000356475830078125,
      "step": 58405,
      "training_step_time": 0.3991711139678955
    },
    {
      "epoch": 0.00035648193359375,
      "model_forward_time": 0.11434817314147949,
      "step": 58406
    },
    {
      "epoch": 0.00035648193359375,
      "step": 58406,
      "training_step_time": 0.3941035270690918
    },
    {
      "epoch": 0.000356488037109375,
      "model_forward_time": 0.11474227905273438,
      "step": 58407
    },
    {
      "epoch": 0.000356488037109375,
      "step": 58407,
      "training_step_time": 0.3866996765136719
    },
    {
      "epoch": 0.000356494140625,
      "model_forward_time": 0.11523914337158203,
      "step": 58408
    },
    {
      "epoch": 0.000356494140625,
      "step": 58408,
      "training_step_time": 0.3938000202178955
    },
    {
      "epoch": 0.000356500244140625,
      "model_forward_time": 0.11512517929077148,
      "step": 58409
    },
    {
      "epoch": 0.000356500244140625,
      "step": 58409,
      "training_step_time": 0.5477707386016846
    },
    {
      "epoch": 0.00035650634765625,
      "grad_norm": 0.06743822246789932,
      "learning_rate": 1.918696732402636e-07,
      "loss": 0.0365,
      "step": 58410
    },
    {
      "epoch": 0.00035650634765625,
      "model_forward_time": 0.11541390419006348,
      "step": 58410
    },
    {
      "epoch": 0.00035650634765625,
      "step": 58410,
      "training_step_time": 0.4712216854095459
    },
    {
      "epoch": 0.000356512451171875,
      "model_forward_time": 0.11599445343017578,
      "step": 58411
    },
    {
      "epoch": 0.000356512451171875,
      "step": 58411,
      "training_step_time": 0.47621846199035645
    },
    {
      "epoch": 0.0003565185546875,
      "model_forward_time": 0.11467242240905762,
      "step": 58412
    },
    {
      "epoch": 0.0003565185546875,
      "step": 58412,
      "training_step_time": 0.4162275791168213
    },
    {
      "epoch": 0.000356524658203125,
      "model_forward_time": 0.11465334892272949,
      "step": 58413
    },
    {
      "epoch": 0.000356524658203125,
      "step": 58413,
      "training_step_time": 0.4280734062194824
    },
    {
      "epoch": 0.00035653076171875,
      "model_forward_time": 0.11498165130615234,
      "step": 58414
    },
    {
      "epoch": 0.00035653076171875,
      "step": 58414,
      "training_step_time": 0.41362905502319336
    },
    {
      "epoch": 0.000356536865234375,
      "model_forward_time": 0.1144859790802002,
      "step": 58415
    },
    {
      "epoch": 0.000356536865234375,
      "step": 58415,
      "training_step_time": 0.44635438919067383
    },
    {
      "epoch": 0.00035654296875,
      "model_forward_time": 0.11492466926574707,
      "step": 58416
    },
    {
      "epoch": 0.00035654296875,
      "step": 58416,
      "training_step_time": 0.46288418769836426
    },
    {
      "epoch": 0.000356549072265625,
      "model_forward_time": 0.11572027206420898,
      "step": 58417
    },
    {
      "epoch": 0.000356549072265625,
      "step": 58417,
      "training_step_time": 0.43668150901794434
    },
    {
      "epoch": 0.00035655517578125,
      "model_forward_time": 0.11526823043823242,
      "step": 58418
    },
    {
      "epoch": 0.00035655517578125,
      "step": 58418,
      "training_step_time": 0.39984893798828125
    },
    {
      "epoch": 0.000356561279296875,
      "model_forward_time": 0.11550474166870117,
      "step": 58419
    },
    {
      "epoch": 0.000356561279296875,
      "step": 58419,
      "training_step_time": 0.39195942878723145
    },
    {
      "epoch": 0.0003565673828125,
      "grad_norm": 0.09706535190343857,
      "learning_rate": 1.8946532829920426e-07,
      "loss": 0.0378,
      "step": 58420
    },
    {
      "epoch": 0.0003565673828125,
      "model_forward_time": 0.11563897132873535,
      "step": 58420
    },
    {
      "epoch": 0.0003565673828125,
      "step": 58420,
      "training_step_time": 0.39940643310546875
    },
    {
      "epoch": 0.000356573486328125,
      "model_forward_time": 0.11504435539245605,
      "step": 58421
    },
    {
      "epoch": 0.000356573486328125,
      "step": 58421,
      "training_step_time": 0.3898134231567383
    },
    {
      "epoch": 0.00035657958984375,
      "model_forward_time": 0.11518573760986328,
      "step": 58422
    },
    {
      "epoch": 0.00035657958984375,
      "step": 58422,
      "training_step_time": 0.40466809272766113
    },
    {
      "epoch": 0.000356585693359375,
      "model_forward_time": 0.11526989936828613,
      "step": 58423
    },
    {
      "epoch": 0.000356585693359375,
      "step": 58423,
      "training_step_time": 0.3967711925506592
    },
    {
      "epoch": 0.000356591796875,
      "model_forward_time": 0.11475968360900879,
      "step": 58424
    },
    {
      "epoch": 0.000356591796875,
      "step": 58424,
      "training_step_time": 0.38765597343444824
    },
    {
      "epoch": 0.000356597900390625,
      "model_forward_time": 0.11499667167663574,
      "step": 58425
    },
    {
      "epoch": 0.000356597900390625,
      "step": 58425,
      "training_step_time": 0.4028444290161133
    },
    {
      "epoch": 0.00035660400390625,
      "model_forward_time": 0.11533308029174805,
      "step": 58426
    },
    {
      "epoch": 0.00035660400390625,
      "step": 58426,
      "training_step_time": 0.39743638038635254
    },
    {
      "epoch": 0.000356610107421875,
      "model_forward_time": 0.11614727973937988,
      "step": 58427
    },
    {
      "epoch": 0.000356610107421875,
      "step": 58427,
      "training_step_time": 0.6184232234954834
    },
    {
      "epoch": 0.0003566162109375,
      "model_forward_time": 0.11466598510742188,
      "step": 58428
    },
    {
      "epoch": 0.0003566162109375,
      "step": 58428,
      "training_step_time": 0.4201850891113281
    },
    {
      "epoch": 0.000356622314453125,
      "model_forward_time": 0.1142885684967041,
      "step": 58429
    },
    {
      "epoch": 0.000356622314453125,
      "step": 58429,
      "training_step_time": 0.43108630180358887
    },
    {
      "epoch": 0.00035662841796875,
      "grad_norm": 0.09656776487827301,
      "learning_rate": 1.870761144834088e-07,
      "loss": 0.0376,
      "step": 58430
    },
    {
      "epoch": 0.00035662841796875,
      "model_forward_time": 0.11513113975524902,
      "step": 58430
    },
    {
      "epoch": 0.00035662841796875,
      "step": 58430,
      "training_step_time": 0.49291181564331055
    },
    {
      "epoch": 0.000356634521484375,
      "model_forward_time": 0.11455655097961426,
      "step": 58431
    },
    {
      "epoch": 0.000356634521484375,
      "step": 58431,
      "training_step_time": 0.41754984855651855
    },
    {
      "epoch": 0.000356640625,
      "model_forward_time": 0.11396479606628418,
      "step": 58432
    },
    {
      "epoch": 0.000356640625,
      "step": 58432,
      "training_step_time": 0.463198184967041
    },
    {
      "epoch": 0.000356646728515625,
      "model_forward_time": 0.11473250389099121,
      "step": 58433
    },
    {
      "epoch": 0.000356646728515625,
      "step": 58433,
      "training_step_time": 0.444028377532959
    },
    {
      "epoch": 0.00035665283203125,
      "model_forward_time": 0.11629676818847656,
      "step": 58434
    },
    {
      "epoch": 0.00035665283203125,
      "step": 58434,
      "training_step_time": 0.38863086700439453
    },
    {
      "epoch": 0.000356658935546875,
      "model_forward_time": 0.11407661437988281,
      "step": 58435
    },
    {
      "epoch": 0.000356658935546875,
      "step": 58435,
      "training_step_time": 0.3840780258178711
    },
    {
      "epoch": 0.0003566650390625,
      "model_forward_time": 0.11647653579711914,
      "step": 58436
    },
    {
      "epoch": 0.0003566650390625,
      "step": 58436,
      "training_step_time": 0.38070178031921387
    },
    {
      "epoch": 0.000356671142578125,
      "model_forward_time": 0.11529159545898438,
      "step": 58437
    },
    {
      "epoch": 0.000356671142578125,
      "step": 58437,
      "training_step_time": 0.3913278579711914
    },
    {
      "epoch": 0.00035667724609375,
      "model_forward_time": 0.11472845077514648,
      "step": 58438
    },
    {
      "epoch": 0.00035667724609375,
      "step": 58438,
      "training_step_time": 0.4692349433898926
    },
    {
      "epoch": 0.000356683349609375,
      "model_forward_time": 0.11467647552490234,
      "step": 58439
    },
    {
      "epoch": 0.000356683349609375,
      "step": 58439,
      "training_step_time": 0.6009504795074463
    },
    {
      "epoch": 0.000356689453125,
      "grad_norm": 0.08008630573749542,
      "learning_rate": 1.847020325186577e-07,
      "loss": 0.0381,
      "step": 58440
    },
    {
      "epoch": 0.000356689453125,
      "model_forward_time": 0.11449027061462402,
      "step": 58440
    },
    {
      "epoch": 0.000356689453125,
      "step": 58440,
      "training_step_time": 0.44390296936035156
    },
    {
      "epoch": 0.000356695556640625,
      "model_forward_time": 0.11466217041015625,
      "step": 58441
    },
    {
      "epoch": 0.000356695556640625,
      "step": 58441,
      "training_step_time": 0.49109983444213867
    },
    {
      "epoch": 0.00035670166015625,
      "model_forward_time": 0.11444902420043945,
      "step": 58442
    },
    {
      "epoch": 0.00035670166015625,
      "step": 58442,
      "training_step_time": 0.4045710563659668
    },
    {
      "epoch": 0.000356707763671875,
      "model_forward_time": 0.114898681640625,
      "step": 58443
    },
    {
      "epoch": 0.000356707763671875,
      "step": 58443,
      "training_step_time": 0.3992645740509033
    },
    {
      "epoch": 0.0003567138671875,
      "model_forward_time": 0.11449432373046875,
      "step": 58444
    },
    {
      "epoch": 0.0003567138671875,
      "step": 58444,
      "training_step_time": 0.4053828716278076
    },
    {
      "epoch": 0.000356719970703125,
      "model_forward_time": 0.11451339721679688,
      "step": 58445
    },
    {
      "epoch": 0.000356719970703125,
      "step": 58445,
      "training_step_time": 0.493227481842041
    },
    {
      "epoch": 0.00035672607421875,
      "model_forward_time": 0.11478257179260254,
      "step": 58446
    },
    {
      "epoch": 0.00035672607421875,
      "step": 58446,
      "training_step_time": 0.41309523582458496
    },
    {
      "epoch": 0.000356732177734375,
      "model_forward_time": 0.11486959457397461,
      "step": 58447
    },
    {
      "epoch": 0.000356732177734375,
      "step": 58447,
      "training_step_time": 0.39252758026123047
    },
    {
      "epoch": 0.00035673828125,
      "model_forward_time": 0.11567568778991699,
      "step": 58448
    },
    {
      "epoch": 0.00035673828125,
      "step": 58448,
      "training_step_time": 0.3900895118713379
    },
    {
      "epoch": 0.000356744384765625,
      "model_forward_time": 0.11553478240966797,
      "step": 58449
    },
    {
      "epoch": 0.000356744384765625,
      "step": 58449,
      "training_step_time": 0.3908686637878418
    },
    {
      "epoch": 0.00035675048828125,
      "grad_norm": 0.1101391464471817,
      "learning_rate": 1.8234308312612968e-07,
      "loss": 0.0351,
      "step": 58450
    },
    {
      "epoch": 0.00035675048828125,
      "model_forward_time": 0.1148519515991211,
      "step": 58450
    },
    {
      "epoch": 0.00035675048828125,
      "step": 58450,
      "training_step_time": 0.3985445499420166
    },
    {
      "epoch": 0.000356756591796875,
      "model_forward_time": 0.11522674560546875,
      "step": 58451
    },
    {
      "epoch": 0.000356756591796875,
      "step": 58451,
      "training_step_time": 0.7847681045532227
    },
    {
      "epoch": 0.0003567626953125,
      "model_forward_time": 0.11462020874023438,
      "step": 58452
    },
    {
      "epoch": 0.0003567626953125,
      "step": 58452,
      "training_step_time": 0.40991640090942383
    },
    {
      "epoch": 0.000356768798828125,
      "model_forward_time": 0.11467313766479492,
      "step": 58453
    },
    {
      "epoch": 0.000356768798828125,
      "step": 58453,
      "training_step_time": 0.4102177619934082
    },
    {
      "epoch": 0.00035677490234375,
      "model_forward_time": 0.1143798828125,
      "step": 58454
    },
    {
      "epoch": 0.00035677490234375,
      "step": 58454,
      "training_step_time": 0.4194149971008301
    },
    {
      "epoch": 0.000356781005859375,
      "model_forward_time": 0.11402559280395508,
      "step": 58455
    },
    {
      "epoch": 0.000356781005859375,
      "step": 58455,
      "training_step_time": 2.872727870941162
    },
    {
      "epoch": 0.000356787109375,
      "model_forward_time": 0.11223340034484863,
      "step": 58456
    },
    {
      "epoch": 0.000356787109375,
      "step": 58456,
      "training_step_time": 0.3669466972351074
    },
    {
      "epoch": 0.000356793212890625,
      "model_forward_time": 0.11227941513061523,
      "step": 58457
    },
    {
      "epoch": 0.000356793212890625,
      "step": 58457,
      "training_step_time": 0.37082862854003906
    },
    {
      "epoch": 0.00035679931640625,
      "model_forward_time": 0.1132349967956543,
      "step": 58458
    },
    {
      "epoch": 0.00035679931640625,
      "step": 58458,
      "training_step_time": 0.38182640075683594
    },
    {
      "epoch": 0.000356805419921875,
      "model_forward_time": 0.11328339576721191,
      "step": 58459
    },
    {
      "epoch": 0.000356805419921875,
      "step": 58459,
      "training_step_time": 0.3822309970855713
    },
    {
      "epoch": 0.0003568115234375,
      "grad_norm": 0.09550106525421143,
      "learning_rate": 1.799992670224182e-07,
      "loss": 0.041,
      "step": 58460
    },
    {
      "epoch": 0.0003568115234375,
      "model_forward_time": 0.11450505256652832,
      "step": 58460
    },
    {
      "epoch": 0.0003568115234375,
      "step": 58460,
      "training_step_time": 0.3836061954498291
    },
    {
      "epoch": 0.000356817626953125,
      "model_forward_time": 0.11508631706237793,
      "step": 58461
    },
    {
      "epoch": 0.000356817626953125,
      "step": 58461,
      "training_step_time": 0.43123531341552734
    },
    {
      "epoch": 0.00035682373046875,
      "model_forward_time": 0.11489987373352051,
      "step": 58462
    },
    {
      "epoch": 0.00035682373046875,
      "step": 58462,
      "training_step_time": 0.40460872650146484
    },
    {
      "epoch": 0.000356829833984375,
      "model_forward_time": 0.11496901512145996,
      "step": 58463
    },
    {
      "epoch": 0.000356829833984375,
      "step": 58463,
      "training_step_time": 0.4454319477081299
    },
    {
      "epoch": 0.0003568359375,
      "model_forward_time": 0.1157073974609375,
      "step": 58464
    },
    {
      "epoch": 0.0003568359375,
      "step": 58464,
      "training_step_time": 0.48517942428588867
    },
    {
      "epoch": 0.000356842041015625,
      "model_forward_time": 0.11510014533996582,
      "step": 58465
    },
    {
      "epoch": 0.000356842041015625,
      "step": 58465,
      "training_step_time": 0.3971841335296631
    },
    {
      "epoch": 0.00035684814453125,
      "model_forward_time": 0.11495661735534668,
      "step": 58466
    },
    {
      "epoch": 0.00035684814453125,
      "step": 58466,
      "training_step_time": 0.39946413040161133
    },
    {
      "epoch": 0.000356854248046875,
      "model_forward_time": 0.11541390419006348,
      "step": 58467
    },
    {
      "epoch": 0.000356854248046875,
      "step": 58467,
      "training_step_time": 0.41190409660339355
    },
    {
      "epoch": 0.0003568603515625,
      "model_forward_time": 0.11486530303955078,
      "step": 58468
    },
    {
      "epoch": 0.0003568603515625,
      "step": 58468,
      "training_step_time": 0.46573638916015625
    },
    {
      "epoch": 0.000356866455078125,
      "model_forward_time": 0.11451029777526855,
      "step": 58469
    },
    {
      "epoch": 0.000356866455078125,
      "step": 58469,
      "training_step_time": 0.4024944305419922
    },
    {
      "epoch": 0.00035687255859375,
      "grad_norm": 0.08789686113595963,
      "learning_rate": 1.776705849195037e-07,
      "loss": 0.0396,
      "step": 58470
    },
    {
      "epoch": 0.00035687255859375,
      "model_forward_time": 0.11514711380004883,
      "step": 58470
    },
    {
      "epoch": 0.00035687255859375,
      "step": 58470,
      "training_step_time": 0.4233379364013672
    },
    {
      "epoch": 0.000356878662109375,
      "model_forward_time": 0.11509513854980469,
      "step": 58471
    },
    {
      "epoch": 0.000356878662109375,
      "step": 58471,
      "training_step_time": 0.39430737495422363
    },
    {
      "epoch": 0.000356884765625,
      "model_forward_time": 0.11490035057067871,
      "step": 58472
    },
    {
      "epoch": 0.000356884765625,
      "step": 58472,
      "training_step_time": 0.3987700939178467
    },
    {
      "epoch": 0.000356890869140625,
      "model_forward_time": 0.1153874397277832,
      "step": 58473
    },
    {
      "epoch": 0.000356890869140625,
      "step": 58473,
      "training_step_time": 0.3959836959838867
    },
    {
      "epoch": 0.00035689697265625,
      "model_forward_time": 0.11519718170166016,
      "step": 58474
    },
    {
      "epoch": 0.00035689697265625,
      "step": 58474,
      "training_step_time": 0.39435815811157227
    },
    {
      "epoch": 0.000356903076171875,
      "model_forward_time": 0.11489605903625488,
      "step": 58475
    },
    {
      "epoch": 0.000356903076171875,
      "step": 58475,
      "training_step_time": 0.3973422050476074
    },
    {
      "epoch": 0.0003569091796875,
      "model_forward_time": 0.11522459983825684,
      "step": 58476
    },
    {
      "epoch": 0.0003569091796875,
      "step": 58476,
      "training_step_time": 0.4322021007537842
    },
    {
      "epoch": 0.000356915283203125,
      "model_forward_time": 0.11552238464355469,
      "step": 58477
    },
    {
      "epoch": 0.000356915283203125,
      "step": 58477,
      "training_step_time": 0.42313170433044434
    },
    {
      "epoch": 0.00035692138671875,
      "model_forward_time": 0.11495590209960938,
      "step": 58478
    },
    {
      "epoch": 0.00035692138671875,
      "step": 58478,
      "training_step_time": 0.46688175201416016
    },
    {
      "epoch": 0.000356927490234375,
      "model_forward_time": 0.11481356620788574,
      "step": 58479
    },
    {
      "epoch": 0.000356927490234375,
      "step": 58479,
      "training_step_time": 0.41211795806884766
    },
    {
      "epoch": 0.00035693359375,
      "grad_norm": 0.05408144369721413,
      "learning_rate": 1.753570375247815e-07,
      "loss": 0.0388,
      "step": 58480
    },
    {
      "epoch": 0.00035693359375,
      "model_forward_time": 0.11481666564941406,
      "step": 58480
    },
    {
      "epoch": 0.00035693359375,
      "step": 58480,
      "training_step_time": 0.4199683666229248
    },
    {
      "epoch": 0.000356939697265625,
      "model_forward_time": 0.11540770530700684,
      "step": 58481
    },
    {
      "epoch": 0.000356939697265625,
      "step": 58481,
      "training_step_time": 0.43222808837890625
    },
    {
      "epoch": 0.00035694580078125,
      "model_forward_time": 0.11544156074523926,
      "step": 58482
    },
    {
      "epoch": 0.00035694580078125,
      "step": 58482,
      "training_step_time": 0.42329835891723633
    },
    {
      "epoch": 0.000356951904296875,
      "model_forward_time": 0.11501908302307129,
      "step": 58483
    },
    {
      "epoch": 0.000356951904296875,
      "step": 58483,
      "training_step_time": 0.41936755180358887
    },
    {
      "epoch": 0.0003569580078125,
      "model_forward_time": 0.11533784866333008,
      "step": 58484
    },
    {
      "epoch": 0.0003569580078125,
      "step": 58484,
      "training_step_time": 0.39588308334350586
    },
    {
      "epoch": 0.000356964111328125,
      "model_forward_time": 0.11509847640991211,
      "step": 58485
    },
    {
      "epoch": 0.000356964111328125,
      "step": 58485,
      "training_step_time": 0.38906073570251465
    },
    {
      "epoch": 0.00035697021484375,
      "model_forward_time": 0.11526703834533691,
      "step": 58486
    },
    {
      "epoch": 0.00035697021484375,
      "step": 58486,
      "training_step_time": 0.4028501510620117
    },
    {
      "epoch": 0.000356976318359375,
      "model_forward_time": 0.11461448669433594,
      "step": 58487
    },
    {
      "epoch": 0.000356976318359375,
      "step": 58487,
      "training_step_time": 0.4013214111328125
    },
    {
      "epoch": 0.000356982421875,
      "model_forward_time": 0.11513543128967285,
      "step": 58488
    },
    {
      "epoch": 0.000356982421875,
      "step": 58488,
      "training_step_time": 0.4048421382904053
    },
    {
      "epoch": 0.000356988525390625,
      "model_forward_time": 0.11482071876525879,
      "step": 58489
    },
    {
      "epoch": 0.000356988525390625,
      "step": 58489,
      "training_step_time": 0.394991397857666
    },
    {
      "epoch": 0.00035699462890625,
      "grad_norm": 0.10644907504320145,
      "learning_rate": 1.73058625541056e-07,
      "loss": 0.0329,
      "step": 58490
    },
    {
      "epoch": 0.00035699462890625,
      "model_forward_time": 0.11504793167114258,
      "step": 58490
    },
    {
      "epoch": 0.00035699462890625,
      "step": 58490,
      "training_step_time": 0.4086759090423584
    },
    {
      "epoch": 0.000357000732421875,
      "model_forward_time": 0.11525487899780273,
      "step": 58491
    },
    {
      "epoch": 0.000357000732421875,
      "step": 58491,
      "training_step_time": 0.4278874397277832
    },
    {
      "epoch": 0.0003570068359375,
      "model_forward_time": 0.11539578437805176,
      "step": 58492
    },
    {
      "epoch": 0.0003570068359375,
      "step": 58492,
      "training_step_time": 0.5426812171936035
    },
    {
      "epoch": 0.000357012939453125,
      "model_forward_time": 0.11507177352905273,
      "step": 58493
    },
    {
      "epoch": 0.000357012939453125,
      "step": 58493,
      "training_step_time": 0.42699122428894043
    },
    {
      "epoch": 0.00035701904296875,
      "model_forward_time": 0.11541581153869629,
      "step": 58494
    },
    {
      "epoch": 0.00035701904296875,
      "step": 58494,
      "training_step_time": 0.4985346794128418
    },
    {
      "epoch": 0.000357025146484375,
      "model_forward_time": 0.11430644989013672,
      "step": 58495
    },
    {
      "epoch": 0.000357025146484375,
      "step": 58495,
      "training_step_time": 0.4141674041748047
    },
    {
      "epoch": 0.00035703125,
      "model_forward_time": 0.11477541923522949,
      "step": 58496
    },
    {
      "epoch": 0.00035703125,
      "step": 58496,
      "training_step_time": 0.395488977432251
    },
    {
      "epoch": 0.000357037353515625,
      "model_forward_time": 0.11446285247802734,
      "step": 58497
    },
    {
      "epoch": 0.000357037353515625,
      "step": 58497,
      "training_step_time": 0.46128249168395996
    },
    {
      "epoch": 0.00035704345703125,
      "model_forward_time": 0.11597585678100586,
      "step": 58498
    },
    {
      "epoch": 0.00035704345703125,
      "step": 58498,
      "training_step_time": 0.39934492111206055
    },
    {
      "epoch": 0.000357049560546875,
      "model_forward_time": 0.1147003173828125,
      "step": 58499
    },
    {
      "epoch": 0.000357049560546875,
      "step": 58499,
      "training_step_time": 0.408083438873291
    },
    {
      "epoch": 0.0003570556640625,
      "grad_norm": 0.08362559229135513,
      "learning_rate": 1.7077534966650766e-07,
      "loss": 0.0327,
      "step": 58500
    },
    {
      "epoch": 0.0003570556640625,
      "model_forward_time": 0.1146233081817627,
      "step": 58500
    },
    {
      "epoch": 0.0003570556640625,
      "step": 58500,
      "training_step_time": 0.3964231014251709
    },
    {
      "epoch": 0.000357061767578125,
      "model_forward_time": 0.1153421401977539,
      "step": 58501
    },
    {
      "epoch": 0.000357061767578125,
      "step": 58501,
      "training_step_time": 0.39710021018981934
    },
    {
      "epoch": 0.00035706787109375,
      "model_forward_time": 0.11569905281066895,
      "step": 58502
    },
    {
      "epoch": 0.00035706787109375,
      "step": 58502,
      "training_step_time": 0.39789843559265137
    },
    {
      "epoch": 0.000357073974609375,
      "model_forward_time": 0.1149446964263916,
      "step": 58503
    },
    {
      "epoch": 0.000357073974609375,
      "step": 58503,
      "training_step_time": 0.4269254207611084
    },
    {
      "epoch": 0.000357080078125,
      "model_forward_time": 0.11486148834228516,
      "step": 58504
    },
    {
      "epoch": 0.000357080078125,
      "step": 58504,
      "training_step_time": 0.3977186679840088
    },
    {
      "epoch": 0.000357086181640625,
      "model_forward_time": 0.11517977714538574,
      "step": 58505
    },
    {
      "epoch": 0.000357086181640625,
      "step": 58505,
      "training_step_time": 0.4095158576965332
    },
    {
      "epoch": 0.00035709228515625,
      "model_forward_time": 0.11448860168457031,
      "step": 58506
    },
    {
      "epoch": 0.00035709228515625,
      "step": 58506,
      "training_step_time": 0.44767332077026367
    },
    {
      "epoch": 0.000357098388671875,
      "model_forward_time": 0.11528611183166504,
      "step": 58507
    },
    {
      "epoch": 0.000357098388671875,
      "step": 58507,
      "training_step_time": 0.36552000045776367
    },
    {
      "epoch": 0.0003571044921875,
      "model_forward_time": 0.11449360847473145,
      "step": 58508
    },
    {
      "epoch": 0.0003571044921875,
      "step": 58508,
      "training_step_time": 0.3971083164215088
    },
    {
      "epoch": 0.000357110595703125,
      "model_forward_time": 0.11528968811035156,
      "step": 58509
    },
    {
      "epoch": 0.000357110595703125,
      "step": 58509,
      "training_step_time": 0.44452381134033203
    },
    {
      "epoch": 0.00035711669921875,
      "grad_norm": 0.07749295234680176,
      "learning_rate": 1.6850721059474827e-07,
      "loss": 0.041,
      "step": 58510
    },
    {
      "epoch": 0.00035711669921875,
      "model_forward_time": 0.11530208587646484,
      "step": 58510
    },
    {
      "epoch": 0.00035711669921875,
      "step": 58510,
      "training_step_time": 0.41033387184143066
    },
    {
      "epoch": 0.000357122802734375,
      "model_forward_time": 0.11506819725036621,
      "step": 58511
    },
    {
      "epoch": 0.000357122802734375,
      "step": 58511,
      "training_step_time": 0.4581718444824219
    },
    {
      "epoch": 0.00035712890625,
      "model_forward_time": 0.1151740550994873,
      "step": 58512
    },
    {
      "epoch": 0.00035712890625,
      "step": 58512,
      "training_step_time": 0.3973813056945801
    },
    {
      "epoch": 0.000357135009765625,
      "model_forward_time": 0.11575484275817871,
      "step": 58513
    },
    {
      "epoch": 0.000357135009765625,
      "step": 58513,
      "training_step_time": 0.38431310653686523
    },
    {
      "epoch": 0.00035714111328125,
      "model_forward_time": 0.1149590015411377,
      "step": 58514
    },
    {
      "epoch": 0.00035714111328125,
      "step": 58514,
      "training_step_time": 0.4084923267364502
    },
    {
      "epoch": 0.000357147216796875,
      "model_forward_time": 0.11506485939025879,
      "step": 58515
    },
    {
      "epoch": 0.000357147216796875,
      "step": 58515,
      "training_step_time": 0.398984432220459
    },
    {
      "epoch": 0.0003571533203125,
      "model_forward_time": 0.11587071418762207,
      "step": 58516
    },
    {
      "epoch": 0.0003571533203125,
      "step": 58516,
      "training_step_time": 0.39777398109436035
    },
    {
      "epoch": 0.000357159423828125,
      "model_forward_time": 0.11500716209411621,
      "step": 58517
    },
    {
      "epoch": 0.000357159423828125,
      "step": 58517,
      "training_step_time": 0.40048646926879883
    },
    {
      "epoch": 0.00035716552734375,
      "model_forward_time": 0.11502575874328613,
      "step": 58518
    },
    {
      "epoch": 0.00035716552734375,
      "step": 58518,
      "training_step_time": 0.41913318634033203
    },
    {
      "epoch": 0.000357171630859375,
      "model_forward_time": 0.11536192893981934,
      "step": 58519
    },
    {
      "epoch": 0.000357171630859375,
      "step": 58519,
      "training_step_time": 0.3916761875152588
    },
    {
      "epoch": 0.000357177734375,
      "grad_norm": 0.08951672911643982,
      "learning_rate": 1.662542090147712e-07,
      "loss": 0.0346,
      "step": 58520
    },
    {
      "epoch": 0.000357177734375,
      "model_forward_time": 0.1157069206237793,
      "step": 58520
    },
    {
      "epoch": 0.000357177734375,
      "step": 58520,
      "training_step_time": 0.440096378326416
    },
    {
      "epoch": 0.000357183837890625,
      "model_forward_time": 0.11539316177368164,
      "step": 58521
    },
    {
      "epoch": 0.000357183837890625,
      "step": 58521,
      "training_step_time": 0.48433923721313477
    },
    {
      "epoch": 0.00035718994140625,
      "model_forward_time": 0.11547136306762695,
      "step": 58522
    },
    {
      "epoch": 0.00035718994140625,
      "step": 58522,
      "training_step_time": 0.4859476089477539
    },
    {
      "epoch": 0.000357196044921875,
      "model_forward_time": 0.11472702026367188,
      "step": 58523
    },
    {
      "epoch": 0.000357196044921875,
      "step": 58523,
      "training_step_time": 0.47872304916381836
    },
    {
      "epoch": 0.0003572021484375,
      "model_forward_time": 0.11491703987121582,
      "step": 58524
    },
    {
      "epoch": 0.0003572021484375,
      "step": 58524,
      "training_step_time": 0.4851360321044922
    },
    {
      "epoch": 0.000357208251953125,
      "model_forward_time": 0.11445856094360352,
      "step": 58525
    },
    {
      "epoch": 0.000357208251953125,
      "step": 58525,
      "training_step_time": 0.47690391540527344
    },
    {
      "epoch": 0.00035721435546875,
      "model_forward_time": 0.1147623062133789,
      "step": 58526
    },
    {
      "epoch": 0.00035721435546875,
      "step": 58526,
      "training_step_time": 0.3797121047973633
    },
    {
      "epoch": 0.000357220458984375,
      "model_forward_time": 0.11549544334411621,
      "step": 58527
    },
    {
      "epoch": 0.000357220458984375,
      "step": 58527,
      "training_step_time": 0.38109254837036133
    },
    {
      "epoch": 0.0003572265625,
      "model_forward_time": 0.11528706550598145,
      "step": 58528
    },
    {
      "epoch": 0.0003572265625,
      "step": 58528,
      "training_step_time": 0.38999080657958984
    },
    {
      "epoch": 0.000357232666015625,
      "model_forward_time": 0.11549735069274902,
      "step": 58529
    },
    {
      "epoch": 0.000357232666015625,
      "step": 58529,
      "training_step_time": 0.3775815963745117
    },
    {
      "epoch": 0.00035723876953125,
      "grad_norm": 0.10440338402986526,
      "learning_rate": 1.6401634561098444e-07,
      "loss": 0.0368,
      "step": 58530
    },
    {
      "epoch": 0.00035723876953125,
      "model_forward_time": 0.11439728736877441,
      "step": 58530
    },
    {
      "epoch": 0.00035723876953125,
      "step": 58530,
      "training_step_time": 0.4158046245574951
    },
    {
      "epoch": 0.000357244873046875,
      "model_forward_time": 0.11543965339660645,
      "step": 58531
    },
    {
      "epoch": 0.000357244873046875,
      "step": 58531,
      "training_step_time": 0.4393136501312256
    },
    {
      "epoch": 0.0003572509765625,
      "model_forward_time": 0.11559104919433594,
      "step": 58532
    },
    {
      "epoch": 0.0003572509765625,
      "step": 58532,
      "training_step_time": 0.38338375091552734
    },
    {
      "epoch": 0.000357257080078125,
      "model_forward_time": 0.11522698402404785,
      "step": 58533
    },
    {
      "epoch": 0.000357257080078125,
      "step": 58533,
      "training_step_time": 0.3952653408050537
    },
    {
      "epoch": 0.00035726318359375,
      "model_forward_time": 0.11493158340454102,
      "step": 58534
    },
    {
      "epoch": 0.00035726318359375,
      "step": 58534,
      "training_step_time": 0.38416528701782227
    },
    {
      "epoch": 0.000357269287109375,
      "model_forward_time": 0.11543035507202148,
      "step": 58535
    },
    {
      "epoch": 0.000357269287109375,
      "step": 58535,
      "training_step_time": 0.4842522144317627
    },
    {
      "epoch": 0.000357275390625,
      "model_forward_time": 0.11578989028930664,
      "step": 58536
    },
    {
      "epoch": 0.000357275390625,
      "step": 58536,
      "training_step_time": 0.4037308692932129
    },
    {
      "epoch": 0.000357281494140625,
      "model_forward_time": 0.11516094207763672,
      "step": 58537
    },
    {
      "epoch": 0.000357281494140625,
      "step": 58537,
      "training_step_time": 0.43166518211364746
    },
    {
      "epoch": 0.00035728759765625,
      "model_forward_time": 0.11530089378356934,
      "step": 58538
    },
    {
      "epoch": 0.00035728759765625,
      "step": 58538,
      "training_step_time": 0.4149510860443115
    },
    {
      "epoch": 0.000357293701171875,
      "model_forward_time": 0.11545944213867188,
      "step": 58539
    },
    {
      "epoch": 0.000357293701171875,
      "step": 58539,
      "training_step_time": 0.4237990379333496
    },
    {
      "epoch": 0.0003572998046875,
      "grad_norm": 0.11123732477426529,
      "learning_rate": 1.6179362106318874e-07,
      "loss": 0.0378,
      "step": 58540
    },
    {
      "epoch": 0.0003572998046875,
      "model_forward_time": 0.11485671997070312,
      "step": 58540
    },
    {
      "epoch": 0.0003572998046875,
      "step": 58540,
      "training_step_time": 0.4787940979003906
    },
    {
      "epoch": 0.000357305908203125,
      "model_forward_time": 0.11520910263061523,
      "step": 58541
    },
    {
      "epoch": 0.000357305908203125,
      "step": 58541,
      "training_step_time": 0.39800143241882324
    },
    {
      "epoch": 0.00035731201171875,
      "model_forward_time": 0.1149742603302002,
      "step": 58542
    },
    {
      "epoch": 0.00035731201171875,
      "step": 58542,
      "training_step_time": 0.40329742431640625
    },
    {
      "epoch": 0.000357318115234375,
      "model_forward_time": 0.11527228355407715,
      "step": 58543
    },
    {
      "epoch": 0.000357318115234375,
      "step": 58543,
      "training_step_time": 0.4067997932434082
    },
    {
      "epoch": 0.00035732421875,
      "model_forward_time": 0.1149148941040039,
      "step": 58544
    },
    {
      "epoch": 0.00035732421875,
      "step": 58544,
      "training_step_time": 0.44930148124694824
    },
    {
      "epoch": 0.000357330322265625,
      "model_forward_time": 0.11560606956481934,
      "step": 58545
    },
    {
      "epoch": 0.000357330322265625,
      "step": 58545,
      "training_step_time": 0.38519835472106934
    },
    {
      "epoch": 0.00035733642578125,
      "model_forward_time": 0.11441707611083984,
      "step": 58546
    },
    {
      "epoch": 0.00035733642578125,
      "step": 58546,
      "training_step_time": 0.3956644535064697
    },
    {
      "epoch": 0.000357342529296875,
      "model_forward_time": 0.11522984504699707,
      "step": 58547
    },
    {
      "epoch": 0.000357342529296875,
      "step": 58547,
      "training_step_time": 0.4015047550201416
    },
    {
      "epoch": 0.0003573486328125,
      "model_forward_time": 0.11496639251708984,
      "step": 58548
    },
    {
      "epoch": 0.0003573486328125,
      "step": 58548,
      "training_step_time": 0.4004180431365967
    },
    {
      "epoch": 0.000357354736328125,
      "model_forward_time": 0.1149754524230957,
      "step": 58549
    },
    {
      "epoch": 0.000357354736328125,
      "step": 58549,
      "training_step_time": 0.3925015926361084
    },
    {
      "epoch": 0.00035736083984375,
      "grad_norm": 0.09325556457042694,
      "learning_rate": 1.5958603604658838e-07,
      "loss": 0.0345,
      "step": 58550
    },
    {
      "epoch": 0.00035736083984375,
      "model_forward_time": 0.11473345756530762,
      "step": 58550
    },
    {
      "epoch": 0.00035736083984375,
      "step": 58550,
      "training_step_time": 0.5095579624176025
    },
    {
      "epoch": 0.000357366943359375,
      "model_forward_time": 0.11461925506591797,
      "step": 58551
    },
    {
      "epoch": 0.000357366943359375,
      "step": 58551,
      "training_step_time": 0.4962306022644043
    },
    {
      "epoch": 0.000357373046875,
      "model_forward_time": 0.11486554145812988,
      "step": 58552
    },
    {
      "epoch": 0.000357373046875,
      "step": 58552,
      "training_step_time": 0.3932781219482422
    },
    {
      "epoch": 0.000357379150390625,
      "model_forward_time": 0.11455440521240234,
      "step": 58553
    },
    {
      "epoch": 0.000357379150390625,
      "step": 58553,
      "training_step_time": 0.44107675552368164
    },
    {
      "epoch": 0.00035738525390625,
      "model_forward_time": 0.11556196212768555,
      "step": 58554
    },
    {
      "epoch": 0.00035738525390625,
      "step": 58554,
      "training_step_time": 0.4396505355834961
    },
    {
      "epoch": 0.000357391357421875,
      "model_forward_time": 0.1145164966583252,
      "step": 58555
    },
    {
      "epoch": 0.000357391357421875,
      "step": 58555,
      "training_step_time": 0.3889157772064209
    },
    {
      "epoch": 0.0003573974609375,
      "model_forward_time": 0.11510515213012695,
      "step": 58556
    },
    {
      "epoch": 0.0003573974609375,
      "step": 58556,
      "training_step_time": 0.38679957389831543
    },
    {
      "epoch": 0.000357403564453125,
      "model_forward_time": 0.11653780937194824,
      "step": 58557
    },
    {
      "epoch": 0.000357403564453125,
      "step": 58557,
      "training_step_time": 0.4500424861907959
    },
    {
      "epoch": 0.00035740966796875,
      "model_forward_time": 0.11507153511047363,
      "step": 58558
    },
    {
      "epoch": 0.00035740966796875,
      "step": 58558,
      "training_step_time": 0.3912684917449951
    },
    {
      "epoch": 0.000357415771484375,
      "model_forward_time": 0.11472392082214355,
      "step": 58559
    },
    {
      "epoch": 0.000357415771484375,
      "step": 58559,
      "training_step_time": 0.39491963386535645
    },
    {
      "epoch": 0.000357421875,
      "grad_norm": 0.1441255658864975,
      "learning_rate": 1.5739359123178587e-07,
      "loss": 0.0407,
      "step": 58560
    },
    {
      "epoch": 0.000357421875,
      "model_forward_time": 0.11554193496704102,
      "step": 58560
    },
    {
      "epoch": 0.000357421875,
      "step": 58560,
      "training_step_time": 0.41721439361572266
    },
    {
      "epoch": 0.000357427978515625,
      "model_forward_time": 0.11513662338256836,
      "step": 58561
    },
    {
      "epoch": 0.000357427978515625,
      "step": 58561,
      "training_step_time": 0.3981292247772217
    },
    {
      "epoch": 0.00035743408203125,
      "model_forward_time": 0.11511826515197754,
      "step": 58562
    },
    {
      "epoch": 0.00035743408203125,
      "step": 58562,
      "training_step_time": 0.3914170265197754
    },
    {
      "epoch": 0.000357440185546875,
      "model_forward_time": 0.1157691478729248,
      "step": 58563
    },
    {
      "epoch": 0.000357440185546875,
      "step": 58563,
      "training_step_time": 0.3986337184906006
    },
    {
      "epoch": 0.0003574462890625,
      "model_forward_time": 0.11506867408752441,
      "step": 58564
    },
    {
      "epoch": 0.0003574462890625,
      "step": 58564,
      "training_step_time": 0.37247180938720703
    },
    {
      "epoch": 0.000357452392578125,
      "model_forward_time": 0.1152505874633789,
      "step": 58565
    },
    {
      "epoch": 0.000357452392578125,
      "step": 58565,
      "training_step_time": 0.45395684242248535
    },
    {
      "epoch": 0.00035745849609375,
      "model_forward_time": 0.1158442497253418,
      "step": 58566
    },
    {
      "epoch": 0.00035745849609375,
      "step": 58566,
      "training_step_time": 0.4122037887573242
    },
    {
      "epoch": 0.000357464599609375,
      "model_forward_time": 0.11472105979919434,
      "step": 58567
    },
    {
      "epoch": 0.000357464599609375,
      "step": 58567,
      "training_step_time": 0.454282283782959
    },
    {
      "epoch": 0.000357470703125,
      "model_forward_time": 0.11541962623596191,
      "step": 58568
    },
    {
      "epoch": 0.000357470703125,
      "step": 58568,
      "training_step_time": 0.39281773567199707
    },
    {
      "epoch": 0.000357476806640625,
      "model_forward_time": 0.1154787540435791,
      "step": 58569
    },
    {
      "epoch": 0.000357476806640625,
      "step": 58569,
      "training_step_time": 0.4596543312072754
    },
    {
      "epoch": 0.00035748291015625,
      "grad_norm": 0.07259058952331543,
      "learning_rate": 1.5521628728479843e-07,
      "loss": 0.0358,
      "step": 58570
    },
    {
      "epoch": 0.00035748291015625,
      "model_forward_time": 0.11546516418457031,
      "step": 58570
    },
    {
      "epoch": 0.00035748291015625,
      "step": 58570,
      "training_step_time": 0.3889889717102051
    },
    {
      "epoch": 0.000357489013671875,
      "model_forward_time": 0.11506867408752441,
      "step": 58571
    },
    {
      "epoch": 0.000357489013671875,
      "step": 58571,
      "training_step_time": 0.4175128936767578
    },
    {
      "epoch": 0.0003574951171875,
      "model_forward_time": 0.11538219451904297,
      "step": 58572
    },
    {
      "epoch": 0.0003574951171875,
      "step": 58572,
      "training_step_time": 0.4479548931121826
    },
    {
      "epoch": 0.000357501220703125,
      "model_forward_time": 0.1151731014251709,
      "step": 58573
    },
    {
      "epoch": 0.000357501220703125,
      "step": 58573,
      "training_step_time": 0.38789963722229004
    },
    {
      "epoch": 0.00035750732421875,
      "model_forward_time": 0.11514449119567871,
      "step": 58574
    },
    {
      "epoch": 0.00035750732421875,
      "step": 58574,
      "training_step_time": 0.3982722759246826
    },
    {
      "epoch": 0.000357513427734375,
      "model_forward_time": 0.11507487297058105,
      "step": 58575
    },
    {
      "epoch": 0.000357513427734375,
      "step": 58575,
      "training_step_time": 0.40010976791381836
    },
    {
      "epoch": 0.00035751953125,
      "model_forward_time": 0.11512589454650879,
      "step": 58576
    },
    {
      "epoch": 0.00035751953125,
      "step": 58576,
      "training_step_time": 0.38448190689086914
    },
    {
      "epoch": 0.000357525634765625,
      "model_forward_time": 0.11536264419555664,
      "step": 58577
    },
    {
      "epoch": 0.000357525634765625,
      "step": 58577,
      "training_step_time": 0.39132189750671387
    },
    {
      "epoch": 0.00035753173828125,
      "model_forward_time": 0.11507987976074219,
      "step": 58578
    },
    {
      "epoch": 0.00035753173828125,
      "step": 58578,
      "training_step_time": 0.39569735527038574
    },
    {
      "epoch": 0.000357537841796875,
      "model_forward_time": 0.11561083793640137,
      "step": 58579
    },
    {
      "epoch": 0.000357537841796875,
      "step": 58579,
      "training_step_time": 0.46210765838623047
    },
    {
      "epoch": 0.0003575439453125,
      "grad_norm": 0.10535389184951782,
      "learning_rate": 1.5305412486702474e-07,
      "loss": 0.0382,
      "step": 58580
    },
    {
      "epoch": 0.0003575439453125,
      "model_forward_time": 0.11556410789489746,
      "step": 58580
    },
    {
      "epoch": 0.0003575439453125,
      "step": 58580,
      "training_step_time": 0.5231530666351318
    },
    {
      "epoch": 0.000357550048828125,
      "model_forward_time": 0.11583590507507324,
      "step": 58581
    },
    {
      "epoch": 0.000357550048828125,
      "step": 58581,
      "training_step_time": 0.5003607273101807
    },
    {
      "epoch": 0.00035755615234375,
      "model_forward_time": 0.11447739601135254,
      "step": 58582
    },
    {
      "epoch": 0.00035755615234375,
      "step": 58582,
      "training_step_time": 0.39841127395629883
    },
    {
      "epoch": 0.000357562255859375,
      "model_forward_time": 0.11453938484191895,
      "step": 58583
    },
    {
      "epoch": 0.000357562255859375,
      "step": 58583,
      "training_step_time": 0.4825890064239502
    },
    {
      "epoch": 0.000357568359375,
      "model_forward_time": 0.11494064331054688,
      "step": 58584
    },
    {
      "epoch": 0.000357568359375,
      "step": 58584,
      "training_step_time": 0.41220784187316895
    },
    {
      "epoch": 0.000357574462890625,
      "model_forward_time": 0.11460399627685547,
      "step": 58585
    },
    {
      "epoch": 0.000357574462890625,
      "step": 58585,
      "training_step_time": 0.42104673385620117
    },
    {
      "epoch": 0.00035758056640625,
      "model_forward_time": 0.11502218246459961,
      "step": 58586
    },
    {
      "epoch": 0.00035758056640625,
      "step": 58586,
      "training_step_time": 0.4066505432128906
    },
    {
      "epoch": 0.000357586669921875,
      "model_forward_time": 0.11544585227966309,
      "step": 58587
    },
    {
      "epoch": 0.000357586669921875,
      "step": 58587,
      "training_step_time": 0.38091254234313965
    },
    {
      "epoch": 0.0003575927734375,
      "model_forward_time": 0.11542987823486328,
      "step": 58588
    },
    {
      "epoch": 0.0003575927734375,
      "step": 58588,
      "training_step_time": 0.40563058853149414
    },
    {
      "epoch": 0.000357598876953125,
      "model_forward_time": 0.1149899959564209,
      "step": 58589
    },
    {
      "epoch": 0.000357598876953125,
      "step": 58589,
      "training_step_time": 0.41790223121643066
    },
    {
      "epoch": 0.00035760498046875,
      "grad_norm": 0.0985630676150322,
      "learning_rate": 1.5090710463527836e-07,
      "loss": 0.0348,
      "step": 58590
    },
    {
      "epoch": 0.00035760498046875,
      "model_forward_time": 0.11512231826782227,
      "step": 58590
    },
    {
      "epoch": 0.00035760498046875,
      "step": 58590,
      "training_step_time": 0.39588284492492676
    },
    {
      "epoch": 0.000357611083984375,
      "model_forward_time": 0.1147153377532959,
      "step": 58591
    },
    {
      "epoch": 0.000357611083984375,
      "step": 58591,
      "training_step_time": 0.39586424827575684
    },
    {
      "epoch": 0.0003576171875,
      "model_forward_time": 0.11601090431213379,
      "step": 58592
    },
    {
      "epoch": 0.0003576171875,
      "step": 58592,
      "training_step_time": 0.3873870372772217
    },
    {
      "epoch": 0.000357623291015625,
      "model_forward_time": 0.11470270156860352,
      "step": 58593
    },
    {
      "epoch": 0.000357623291015625,
      "step": 58593,
      "training_step_time": 0.367323637008667
    },
    {
      "epoch": 0.00035762939453125,
      "model_forward_time": 0.1147465705871582,
      "step": 58594
    },
    {
      "epoch": 0.00035762939453125,
      "step": 58594,
      "training_step_time": 0.4193570613861084
    },
    {
      "epoch": 0.000357635498046875,
      "model_forward_time": 0.11419367790222168,
      "step": 58595
    },
    {
      "epoch": 0.000357635498046875,
      "step": 58595,
      "training_step_time": 0.4584774971008301
    },
    {
      "epoch": 0.0003576416015625,
      "model_forward_time": 0.11451458930969238,
      "step": 58596
    },
    {
      "epoch": 0.0003576416015625,
      "step": 58596,
      "training_step_time": 0.4171304702758789
    },
    {
      "epoch": 0.000357647705078125,
      "model_forward_time": 0.11464619636535645,
      "step": 58597
    },
    {
      "epoch": 0.000357647705078125,
      "step": 58597,
      "training_step_time": 0.43691539764404297
    },
    {
      "epoch": 0.00035765380859375,
      "model_forward_time": 0.11398673057556152,
      "step": 58598
    },
    {
      "epoch": 0.00035765380859375,
      "step": 58598,
      "training_step_time": 0.40442538261413574
    },
    {
      "epoch": 0.000357659912109375,
      "model_forward_time": 0.11436271667480469,
      "step": 58599
    },
    {
      "epoch": 0.000357659912109375,
      "step": 58599,
      "training_step_time": 0.4178314208984375
    },
    {
      "epoch": 0.000357666015625,
      "grad_norm": 0.1147506907582283,
      "learning_rate": 1.4877522724175973e-07,
      "loss": 0.0374,
      "step": 58600
    },
    {
      "epoch": 0.000357666015625,
      "model_forward_time": 0.11414599418640137,
      "step": 58600
    },
    {
      "epoch": 0.000357666015625,
      "step": 58600,
      "training_step_time": 0.4189460277557373
    },
    {
      "epoch": 0.000357672119140625,
      "model_forward_time": 0.11552834510803223,
      "step": 58601
    },
    {
      "epoch": 0.000357672119140625,
      "step": 58601,
      "training_step_time": 0.395949125289917
    },
    {
      "epoch": 0.00035767822265625,
      "model_forward_time": 0.11515235900878906,
      "step": 58602
    },
    {
      "epoch": 0.00035767822265625,
      "step": 58602,
      "training_step_time": 0.40108275413513184
    },
    {
      "epoch": 0.000357684326171875,
      "model_forward_time": 0.11577057838439941,
      "step": 58603
    },
    {
      "epoch": 0.000357684326171875,
      "step": 58603,
      "training_step_time": 0.40044736862182617
    },
    {
      "epoch": 0.0003576904296875,
      "model_forward_time": 0.11490058898925781,
      "step": 58604
    },
    {
      "epoch": 0.0003576904296875,
      "step": 58604,
      "training_step_time": 0.39814329147338867
    },
    {
      "epoch": 0.000357696533203125,
      "model_forward_time": 0.11513137817382812,
      "step": 58605
    },
    {
      "epoch": 0.000357696533203125,
      "step": 58605,
      "training_step_time": 0.4022347927093506
    },
    {
      "epoch": 0.00035770263671875,
      "model_forward_time": 0.11520028114318848,
      "step": 58606
    },
    {
      "epoch": 0.00035770263671875,
      "step": 58606,
      "training_step_time": 0.39386892318725586
    },
    {
      "epoch": 0.000357708740234375,
      "model_forward_time": 0.11600160598754883,
      "step": 58607
    },
    {
      "epoch": 0.000357708740234375,
      "step": 58607,
      "training_step_time": 0.4776890277862549
    },
    {
      "epoch": 0.00035771484375,
      "model_forward_time": 0.11597967147827148,
      "step": 58608
    },
    {
      "epoch": 0.00035771484375,
      "step": 58608,
      "training_step_time": 0.43935275077819824
    },
    {
      "epoch": 0.000357720947265625,
      "model_forward_time": 0.11492061614990234,
      "step": 58609
    },
    {
      "epoch": 0.000357720947265625,
      "step": 58609,
      "training_step_time": 0.4417147636413574
    },
    {
      "epoch": 0.00035772705078125,
      "grad_norm": 0.08439471572637558,
      "learning_rate": 1.4665849333408976e-07,
      "loss": 0.0377,
      "step": 58610
    },
    {
      "epoch": 0.00035772705078125,
      "model_forward_time": 0.11673593521118164,
      "step": 58610
    },
    {
      "epoch": 0.00035772705078125,
      "step": 58610,
      "training_step_time": 0.48791980743408203
    },
    {
      "epoch": 0.000357733154296875,
      "model_forward_time": 0.1159353256225586,
      "step": 58611
    },
    {
      "epoch": 0.000357733154296875,
      "step": 58611,
      "training_step_time": 0.3981611728668213
    },
    {
      "epoch": 0.0003577392578125,
      "model_forward_time": 0.11523842811584473,
      "step": 58612
    },
    {
      "epoch": 0.0003577392578125,
      "step": 58612,
      "training_step_time": 0.48679065704345703
    },
    {
      "epoch": 0.000357745361328125,
      "model_forward_time": 0.11482453346252441,
      "step": 58613
    },
    {
      "epoch": 0.000357745361328125,
      "step": 58613,
      "training_step_time": 0.4586830139160156
    },
    {
      "epoch": 0.00035775146484375,
      "model_forward_time": 0.11447453498840332,
      "step": 58614
    },
    {
      "epoch": 0.00035775146484375,
      "step": 58614,
      "training_step_time": 0.39252233505249023
    },
    {
      "epoch": 0.000357757568359375,
      "model_forward_time": 0.11463713645935059,
      "step": 58615
    },
    {
      "epoch": 0.000357757568359375,
      "step": 58615,
      "training_step_time": 0.3938789367675781
    },
    {
      "epoch": 0.000357763671875,
      "model_forward_time": 0.11567091941833496,
      "step": 58616
    },
    {
      "epoch": 0.000357763671875,
      "step": 58616,
      "training_step_time": 0.3927888870239258
    },
    {
      "epoch": 0.000357769775390625,
      "model_forward_time": 0.11479687690734863,
      "step": 58617
    },
    {
      "epoch": 0.000357769775390625,
      "step": 58617,
      "training_step_time": 0.3883943557739258
    },
    {
      "epoch": 0.00035777587890625,
      "model_forward_time": 0.11532974243164062,
      "step": 58618
    },
    {
      "epoch": 0.00035777587890625,
      "step": 58618,
      "training_step_time": 0.39063405990600586
    },
    {
      "epoch": 0.000357781982421875,
      "model_forward_time": 0.11545085906982422,
      "step": 58619
    },
    {
      "epoch": 0.000357781982421875,
      "step": 58619,
      "training_step_time": 0.6370754241943359
    },
    {
      "epoch": 0.0003577880859375,
      "grad_norm": 0.10208870470523834,
      "learning_rate": 1.4455690355525964e-07,
      "loss": 0.035,
      "step": 58620
    },
    {
      "epoch": 0.0003577880859375,
      "model_forward_time": 0.11500072479248047,
      "step": 58620
    },
    {
      "epoch": 0.0003577880859375,
      "step": 58620,
      "training_step_time": 0.40056705474853516
    },
    {
      "epoch": 0.000357794189453125,
      "model_forward_time": 0.11500048637390137,
      "step": 58621
    },
    {
      "epoch": 0.000357794189453125,
      "step": 58621,
      "training_step_time": 0.3916049003601074
    },
    {
      "epoch": 0.00035780029296875,
      "model_forward_time": 0.1157381534576416,
      "step": 58622
    },
    {
      "epoch": 0.00035780029296875,
      "step": 58622,
      "training_step_time": 0.4001615047454834
    },
    {
      "epoch": 0.000357806396484375,
      "model_forward_time": 0.11646628379821777,
      "step": 58623
    },
    {
      "epoch": 0.000357806396484375,
      "step": 58623,
      "training_step_time": 0.48314523696899414
    },
    {
      "epoch": 0.0003578125,
      "model_forward_time": 0.11501049995422363,
      "step": 58624
    },
    {
      "epoch": 0.0003578125,
      "step": 58624,
      "training_step_time": 0.4360392093658447
    },
    {
      "epoch": 0.000357818603515625,
      "model_forward_time": 0.11510944366455078,
      "step": 58625
    },
    {
      "epoch": 0.000357818603515625,
      "step": 58625,
      "training_step_time": 0.47459840774536133
    },
    {
      "epoch": 0.00035782470703125,
      "model_forward_time": 0.11470842361450195,
      "step": 58626
    },
    {
      "epoch": 0.00035782470703125,
      "step": 58626,
      "training_step_time": 0.40914011001586914
    },
    {
      "epoch": 0.000357830810546875,
      "model_forward_time": 0.11531972885131836,
      "step": 58627
    },
    {
      "epoch": 0.000357830810546875,
      "step": 58627,
      "training_step_time": 0.4688379764556885
    },
    {
      "epoch": 0.0003578369140625,
      "model_forward_time": 0.11483383178710938,
      "step": 58628
    },
    {
      "epoch": 0.0003578369140625,
      "step": 58628,
      "training_step_time": 0.39437127113342285
    },
    {
      "epoch": 0.000357843017578125,
      "model_forward_time": 0.11619019508361816,
      "step": 58629
    },
    {
      "epoch": 0.000357843017578125,
      "step": 58629,
      "training_step_time": 0.3889327049255371
    },
    {
      "epoch": 0.00035784912109375,
      "grad_norm": 0.11578793823719025,
      "learning_rate": 1.42470458543692e-07,
      "loss": 0.0388,
      "step": 58630
    },
    {
      "epoch": 0.00035784912109375,
      "model_forward_time": 0.11530566215515137,
      "step": 58630
    },
    {
      "epoch": 0.00035784912109375,
      "step": 58630,
      "training_step_time": 0.38591551780700684
    },
    {
      "epoch": 0.000357855224609375,
      "model_forward_time": 0.11507081985473633,
      "step": 58631
    },
    {
      "epoch": 0.000357855224609375,
      "step": 58631,
      "training_step_time": 0.4943084716796875
    },
    {
      "epoch": 0.000357861328125,
      "model_forward_time": 0.1148533821105957,
      "step": 58632
    },
    {
      "epoch": 0.000357861328125,
      "step": 58632,
      "training_step_time": 0.39418673515319824
    },
    {
      "epoch": 0.000357867431640625,
      "model_forward_time": 0.11539769172668457,
      "step": 58633
    },
    {
      "epoch": 0.000357867431640625,
      "step": 58633,
      "training_step_time": 0.40706682205200195
    },
    {
      "epoch": 0.00035787353515625,
      "model_forward_time": 0.11557936668395996,
      "step": 58634
    },
    {
      "epoch": 0.00035787353515625,
      "step": 58634,
      "training_step_time": 0.3876330852508545
    },
    {
      "epoch": 0.000357879638671875,
      "model_forward_time": 0.1153557300567627,
      "step": 58635
    },
    {
      "epoch": 0.000357879638671875,
      "step": 58635,
      "training_step_time": 0.3950934410095215
    },
    {
      "epoch": 0.0003578857421875,
      "model_forward_time": 0.11472177505493164,
      "step": 58636
    },
    {
      "epoch": 0.0003578857421875,
      "step": 58636,
      "training_step_time": 0.42322278022766113
    },
    {
      "epoch": 0.000357891845703125,
      "model_forward_time": 0.11488962173461914,
      "step": 58637
    },
    {
      "epoch": 0.000357891845703125,
      "step": 58637,
      "training_step_time": 0.590205192565918
    },
    {
      "epoch": 0.00035789794921875,
      "model_forward_time": 0.11548614501953125,
      "step": 58638
    },
    {
      "epoch": 0.00035789794921875,
      "step": 58638,
      "training_step_time": 0.45438456535339355
    },
    {
      "epoch": 0.000357904052734375,
      "model_forward_time": 0.11486315727233887,
      "step": 58639
    },
    {
      "epoch": 0.000357904052734375,
      "step": 58639,
      "training_step_time": 0.46462297439575195
    },
    {
      "epoch": 0.00035791015625,
      "grad_norm": 0.10596496611833572,
      "learning_rate": 1.4039915893318544e-07,
      "loss": 0.0395,
      "step": 58640
    },
    {
      "epoch": 0.00035791015625,
      "model_forward_time": 0.11438369750976562,
      "step": 58640
    },
    {
      "epoch": 0.00035791015625,
      "step": 58640,
      "training_step_time": 0.4086623191833496
    },
    {
      "epoch": 0.000357916259765625,
      "model_forward_time": 0.11411142349243164,
      "step": 58641
    },
    {
      "epoch": 0.000357916259765625,
      "step": 58641,
      "training_step_time": 0.41678857803344727
    },
    {
      "epoch": 0.00035792236328125,
      "model_forward_time": 0.11437654495239258,
      "step": 58642
    },
    {
      "epoch": 0.00035792236328125,
      "step": 58642,
      "training_step_time": 0.3844270706176758
    },
    {
      "epoch": 0.000357928466796875,
      "model_forward_time": 0.11498665809631348,
      "step": 58643
    },
    {
      "epoch": 0.000357928466796875,
      "step": 58643,
      "training_step_time": 0.5233163833618164
    },
    {
      "epoch": 0.0003579345703125,
      "model_forward_time": 0.11463642120361328,
      "step": 58644
    },
    {
      "epoch": 0.0003579345703125,
      "step": 58644,
      "training_step_time": 0.39269471168518066
    },
    {
      "epoch": 0.000357940673828125,
      "model_forward_time": 0.1148383617401123,
      "step": 58645
    },
    {
      "epoch": 0.000357940673828125,
      "step": 58645,
      "training_step_time": 0.3925457000732422
    },
    {
      "epoch": 0.00035794677734375,
      "model_forward_time": 0.11535120010375977,
      "step": 58646
    },
    {
      "epoch": 0.00035794677734375,
      "step": 58646,
      "training_step_time": 0.3896346092224121
    },
    {
      "epoch": 0.000357952880859375,
      "model_forward_time": 0.11543750762939453,
      "step": 58647
    },
    {
      "epoch": 0.000357952880859375,
      "step": 58647,
      "training_step_time": 0.39202070236206055
    },
    {
      "epoch": 0.000357958984375,
      "model_forward_time": 0.11507010459899902,
      "step": 58648
    },
    {
      "epoch": 0.000357958984375,
      "step": 58648,
      "training_step_time": 0.4029052257537842
    },
    {
      "epoch": 0.000357965087890625,
      "model_forward_time": 0.11499238014221191,
      "step": 58649
    },
    {
      "epoch": 0.000357965087890625,
      "step": 58649,
      "training_step_time": 0.6643481254577637
    },
    {
      "epoch": 0.00035797119140625,
      "grad_norm": 0.07581507414579391,
      "learning_rate": 1.383430053529422e-07,
      "loss": 0.0342,
      "step": 58650
    },
    {
      "epoch": 0.00035797119140625,
      "model_forward_time": 0.11469507217407227,
      "step": 58650
    },
    {
      "epoch": 0.00035797119140625,
      "step": 58650,
      "training_step_time": 0.47103261947631836
    },
    {
      "epoch": 0.000357977294921875,
      "model_forward_time": 0.11448478698730469,
      "step": 58651
    },
    {
      "epoch": 0.000357977294921875,
      "step": 58651,
      "training_step_time": 0.458815336227417
    },
    {
      "epoch": 0.0003579833984375,
      "model_forward_time": 0.11560463905334473,
      "step": 58652
    },
    {
      "epoch": 0.0003579833984375,
      "step": 58652,
      "training_step_time": 0.41703033447265625
    },
    {
      "epoch": 0.000357989501953125,
      "model_forward_time": 0.11488938331604004,
      "step": 58653
    },
    {
      "epoch": 0.000357989501953125,
      "step": 58653,
      "training_step_time": 0.4618654251098633
    },
    {
      "epoch": 0.00035799560546875,
      "model_forward_time": 0.1146695613861084,
      "step": 58654
    },
    {
      "epoch": 0.00035799560546875,
      "step": 58654,
      "training_step_time": 0.39446067810058594
    },
    {
      "epoch": 0.000358001708984375,
      "model_forward_time": 0.11433076858520508,
      "step": 58655
    },
    {
      "epoch": 0.000358001708984375,
      "step": 58655,
      "training_step_time": 0.4083836078643799
    },
    {
      "epoch": 0.0003580078125,
      "model_forward_time": 0.11534667015075684,
      "step": 58656
    },
    {
      "epoch": 0.0003580078125,
      "step": 58656,
      "training_step_time": 0.39915895462036133
    },
    {
      "epoch": 0.000358013916015625,
      "model_forward_time": 0.11513113975524902,
      "step": 58657
    },
    {
      "epoch": 0.000358013916015625,
      "step": 58657,
      "training_step_time": 0.40948963165283203
    },
    {
      "epoch": 0.00035802001953125,
      "model_forward_time": 0.11599397659301758,
      "step": 58658
    },
    {
      "epoch": 0.00035802001953125,
      "step": 58658,
      "training_step_time": 0.3845665454864502
    },
    {
      "epoch": 0.000358026123046875,
      "model_forward_time": 0.11580538749694824,
      "step": 58659
    },
    {
      "epoch": 0.000358026123046875,
      "step": 58659,
      "training_step_time": 0.3917574882507324
    },
    {
      "epoch": 0.0003580322265625,
      "grad_norm": 0.08708269894123077,
      "learning_rate": 1.3630199842758484e-07,
      "loss": 0.0337,
      "step": 58660
    },
    {
      "epoch": 0.0003580322265625,
      "model_forward_time": 0.11525440216064453,
      "step": 58660
    },
    {
      "epoch": 0.0003580322265625,
      "step": 58660,
      "training_step_time": 0.405994176864624
    },
    {
      "epoch": 0.000358038330078125,
      "model_forward_time": 0.11518049240112305,
      "step": 58661
    },
    {
      "epoch": 0.000358038330078125,
      "step": 58661,
      "training_step_time": 0.6725480556488037
    },
    {
      "epoch": 0.00035804443359375,
      "model_forward_time": 0.11468720436096191,
      "step": 58662
    },
    {
      "epoch": 0.00035804443359375,
      "step": 58662,
      "training_step_time": 0.39966392517089844
    },
    {
      "epoch": 0.000358050537109375,
      "model_forward_time": 0.11446571350097656,
      "step": 58663
    },
    {
      "epoch": 0.000358050537109375,
      "step": 58663,
      "training_step_time": 0.39844512939453125
    },
    {
      "epoch": 0.000358056640625,
      "model_forward_time": 0.1151430606842041,
      "step": 58664
    },
    {
      "epoch": 0.000358056640625,
      "step": 58664,
      "training_step_time": 0.5004434585571289
    },
    {
      "epoch": 0.000358062744140625,
      "model_forward_time": 0.11578917503356934,
      "step": 58665
    },
    {
      "epoch": 0.000358062744140625,
      "step": 58665,
      "training_step_time": 0.4990513324737549
    },
    {
      "epoch": 0.00035806884765625,
      "model_forward_time": 0.11538982391357422,
      "step": 58666
    },
    {
      "epoch": 0.00035806884765625,
      "step": 58666,
      "training_step_time": 0.4138169288635254
    },
    {
      "epoch": 0.000358074951171875,
      "model_forward_time": 0.1147456169128418,
      "step": 58667
    },
    {
      "epoch": 0.000358074951171875,
      "step": 58667,
      "training_step_time": 0.45160531997680664
    },
    {
      "epoch": 0.0003580810546875,
      "model_forward_time": 0.11446499824523926,
      "step": 58668
    },
    {
      "epoch": 0.0003580810546875,
      "step": 58668,
      "training_step_time": 0.383620023727417
    },
    {
      "epoch": 0.000358087158203125,
      "model_forward_time": 0.1145331859588623,
      "step": 58669
    },
    {
      "epoch": 0.000358087158203125,
      "step": 58669,
      "training_step_time": 0.38699793815612793
    },
    {
      "epoch": 0.00035809326171875,
      "grad_norm": 0.09071248024702072,
      "learning_rate": 1.342761387770952e-07,
      "loss": 0.0385,
      "step": 58670
    },
    {
      "epoch": 0.00035809326171875,
      "model_forward_time": 0.11444282531738281,
      "step": 58670
    },
    {
      "epoch": 0.00035809326171875,
      "step": 58670,
      "training_step_time": 0.3887035846710205
    },
    {
      "epoch": 0.000358099365234375,
      "model_forward_time": 0.11525607109069824,
      "step": 58671
    },
    {
      "epoch": 0.000358099365234375,
      "step": 58671,
      "training_step_time": 0.3923468589782715
    },
    {
      "epoch": 0.00035810546875,
      "model_forward_time": 0.1150510311126709,
      "step": 58672
    },
    {
      "epoch": 0.00035810546875,
      "step": 58672,
      "training_step_time": 0.39043259620666504
    },
    {
      "epoch": 0.000358111572265625,
      "model_forward_time": 0.1148684024810791,
      "step": 58673
    },
    {
      "epoch": 0.000358111572265625,
      "step": 58673,
      "training_step_time": 0.5652141571044922
    },
    {
      "epoch": 0.00035811767578125,
      "model_forward_time": 0.11517643928527832,
      "step": 58674
    },
    {
      "epoch": 0.00035811767578125,
      "step": 58674,
      "training_step_time": 0.3972508907318115
    },
    {
      "epoch": 0.000358123779296875,
      "model_forward_time": 0.11514663696289062,
      "step": 58675
    },
    {
      "epoch": 0.000358123779296875,
      "step": 58675,
      "training_step_time": 0.4002201557159424
    },
    {
      "epoch": 0.0003581298828125,
      "model_forward_time": 0.11492013931274414,
      "step": 58676
    },
    {
      "epoch": 0.0003581298828125,
      "step": 58676,
      "training_step_time": 0.3924112319946289
    },
    {
      "epoch": 0.000358135986328125,
      "model_forward_time": 0.11439275741577148,
      "step": 58677
    },
    {
      "epoch": 0.000358135986328125,
      "step": 58677,
      "training_step_time": 0.40523648262023926
    },
    {
      "epoch": 0.00035814208984375,
      "model_forward_time": 0.11489605903625488,
      "step": 58678
    },
    {
      "epoch": 0.00035814208984375,
      "step": 58678,
      "training_step_time": 0.48394346237182617
    },
    {
      "epoch": 0.000358148193359375,
      "model_forward_time": 0.1148984432220459,
      "step": 58679
    },
    {
      "epoch": 0.000358148193359375,
      "step": 58679,
      "training_step_time": 0.5609331130981445
    },
    {
      "epoch": 0.000358154296875,
      "grad_norm": 0.0804762914776802,
      "learning_rate": 1.3226542701689215e-07,
      "loss": 0.0363,
      "step": 58680
    },
    {
      "epoch": 0.000358154296875,
      "model_forward_time": 0.11534261703491211,
      "step": 58680
    },
    {
      "epoch": 0.000358154296875,
      "step": 58680,
      "training_step_time": 0.4583449363708496
    },
    {
      "epoch": 0.000358160400390625,
      "model_forward_time": 0.1150517463684082,
      "step": 58681
    },
    {
      "epoch": 0.000358160400390625,
      "step": 58681,
      "training_step_time": 0.4958319664001465
    },
    {
      "epoch": 0.00035816650390625,
      "model_forward_time": 0.11445450782775879,
      "step": 58682
    },
    {
      "epoch": 0.00035816650390625,
      "step": 58682,
      "training_step_time": 0.3908877372741699
    },
    {
      "epoch": 0.000358172607421875,
      "model_forward_time": 0.11435413360595703,
      "step": 58683
    },
    {
      "epoch": 0.000358172607421875,
      "step": 58683,
      "training_step_time": 0.3958871364593506
    },
    {
      "epoch": 0.0003581787109375,
      "model_forward_time": 0.11442756652832031,
      "step": 58684
    },
    {
      "epoch": 0.0003581787109375,
      "step": 58684,
      "training_step_time": 0.3963160514831543
    },
    {
      "epoch": 0.000358184814453125,
      "model_forward_time": 0.1157538890838623,
      "step": 58685
    },
    {
      "epoch": 0.000358184814453125,
      "step": 58685,
      "training_step_time": 0.4724884033203125
    },
    {
      "epoch": 0.00035819091796875,
      "model_forward_time": 0.11472058296203613,
      "step": 58686
    },
    {
      "epoch": 0.00035819091796875,
      "step": 58686,
      "training_step_time": 0.3923349380493164
    },
    {
      "epoch": 0.000358197021484375,
      "model_forward_time": 0.11530351638793945,
      "step": 58687
    },
    {
      "epoch": 0.000358197021484375,
      "step": 58687,
      "training_step_time": 0.3990604877471924
    },
    {
      "epoch": 0.000358203125,
      "model_forward_time": 0.11507558822631836,
      "step": 58688
    },
    {
      "epoch": 0.000358203125,
      "step": 58688,
      "training_step_time": 0.38149333000183105
    },
    {
      "epoch": 0.000358209228515625,
      "model_forward_time": 0.11765003204345703,
      "step": 58689
    },
    {
      "epoch": 0.000358209228515625,
      "step": 58689,
      "training_step_time": 0.3905982971191406
    },
    {
      "epoch": 0.00035821533203125,
      "grad_norm": 0.06949611008167267,
      "learning_rate": 1.3026986375776485e-07,
      "loss": 0.0414,
      "step": 58690
    },
    {
      "epoch": 0.00035821533203125,
      "model_forward_time": 0.11570382118225098,
      "step": 58690
    },
    {
      "epoch": 0.00035821533203125,
      "step": 58690,
      "training_step_time": 0.3976554870605469
    },
    {
      "epoch": 0.000358221435546875,
      "model_forward_time": 0.11522960662841797,
      "step": 58691
    },
    {
      "epoch": 0.000358221435546875,
      "step": 58691,
      "training_step_time": 0.5655913352966309
    },
    {
      "epoch": 0.0003582275390625,
      "model_forward_time": 0.11518621444702148,
      "step": 58692
    },
    {
      "epoch": 0.0003582275390625,
      "step": 58692,
      "training_step_time": 0.3684828281402588
    },
    {
      "epoch": 0.000358233642578125,
      "model_forward_time": 0.11504912376403809,
      "step": 58693
    },
    {
      "epoch": 0.000358233642578125,
      "step": 58693,
      "training_step_time": 0.45691704750061035
    },
    {
      "epoch": 0.00035823974609375,
      "model_forward_time": 0.11498498916625977,
      "step": 58694
    },
    {
      "epoch": 0.00035823974609375,
      "step": 58694,
      "training_step_time": 0.4550440311431885
    },
    {
      "epoch": 0.000358245849609375,
      "model_forward_time": 0.11494874954223633,
      "step": 58695
    },
    {
      "epoch": 0.000358245849609375,
      "step": 58695,
      "training_step_time": 0.441709041595459
    },
    {
      "epoch": 0.000358251953125,
      "model_forward_time": 0.11521625518798828,
      "step": 58696
    },
    {
      "epoch": 0.000358251953125,
      "step": 58696,
      "training_step_time": 0.3860290050506592
    },
    {
      "epoch": 0.000358258056640625,
      "model_forward_time": 0.1148686408996582,
      "step": 58697
    },
    {
      "epoch": 0.000358258056640625,
      "step": 58697,
      "training_step_time": 0.40100622177124023
    },
    {
      "epoch": 0.00035826416015625,
      "model_forward_time": 0.11513543128967285,
      "step": 58698
    },
    {
      "epoch": 0.00035826416015625,
      "step": 58698,
      "training_step_time": 0.3967742919921875
    },
    {
      "epoch": 0.000358270263671875,
      "model_forward_time": 0.11471056938171387,
      "step": 58699
    },
    {
      "epoch": 0.000358270263671875,
      "step": 58699,
      "training_step_time": 0.3921809196472168
    },
    {
      "epoch": 0.0003582763671875,
      "grad_norm": 0.08738614618778229,
      "learning_rate": 1.2828944960592836e-07,
      "loss": 0.0394,
      "step": 58700
    },
    {
      "epoch": 0.0003582763671875,
      "model_forward_time": 0.1146700382232666,
      "step": 58700
    },
    {
      "epoch": 0.0003582763671875,
      "step": 58700,
      "training_step_time": 0.39917993545532227
    },
    {
      "epoch": 0.000358282470703125,
      "model_forward_time": 0.11502909660339355,
      "step": 58701
    },
    {
      "epoch": 0.000358282470703125,
      "step": 58701,
      "training_step_time": 0.4064335823059082
    },
    {
      "epoch": 0.00035828857421875,
      "model_forward_time": 0.11534452438354492,
      "step": 58702
    },
    {
      "epoch": 0.00035828857421875,
      "step": 58702,
      "training_step_time": 0.3888564109802246
    },
    {
      "epoch": 0.000358294677734375,
      "model_forward_time": 0.1150214672088623,
      "step": 58703
    },
    {
      "epoch": 0.000358294677734375,
      "step": 58703,
      "training_step_time": 0.5203835964202881
    },
    {
      "epoch": 0.00035830078125,
      "model_forward_time": 0.11470937728881836,
      "step": 58704
    },
    {
      "epoch": 0.00035830078125,
      "step": 58704,
      "training_step_time": 0.40992259979248047
    },
    {
      "epoch": 0.000358306884765625,
      "model_forward_time": 0.11495757102966309,
      "step": 58705
    },
    {
      "epoch": 0.000358306884765625,
      "step": 58705,
      "training_step_time": 0.3999457359313965
    },
    {
      "epoch": 0.00035831298828125,
      "model_forward_time": 0.11486101150512695,
      "step": 58706
    },
    {
      "epoch": 0.00035831298828125,
      "step": 58706,
      "training_step_time": 0.42444276809692383
    },
    {
      "epoch": 0.000358319091796875,
      "model_forward_time": 0.11501312255859375,
      "step": 58707
    },
    {
      "epoch": 0.000358319091796875,
      "step": 58707,
      "training_step_time": 0.42121195793151855
    },
    {
      "epoch": 0.0003583251953125,
      "model_forward_time": 0.11557888984680176,
      "step": 58708
    },
    {
      "epoch": 0.0003583251953125,
      "step": 58708,
      "training_step_time": 0.4898800849914551
    },
    {
      "epoch": 0.000358331298828125,
      "model_forward_time": 0.11545085906982422,
      "step": 58709
    },
    {
      "epoch": 0.000358331298828125,
      "step": 58709,
      "training_step_time": 0.48581671714782715
    },
    {
      "epoch": 0.00035833740234375,
      "grad_norm": 0.13540290296077728,
      "learning_rate": 1.2632418516296262e-07,
      "loss": 0.0389,
      "step": 58710
    },
    {
      "epoch": 0.00035833740234375,
      "model_forward_time": 0.11508703231811523,
      "step": 58710
    },
    {
      "epoch": 0.00035833740234375,
      "step": 58710,
      "training_step_time": 0.39844441413879395
    },
    {
      "epoch": 0.000358343505859375,
      "model_forward_time": 0.11491799354553223,
      "step": 58711
    },
    {
      "epoch": 0.000358343505859375,
      "step": 58711,
      "training_step_time": 0.3994591236114502
    },
    {
      "epoch": 0.000358349609375,
      "model_forward_time": 0.11483263969421387,
      "step": 58712
    },
    {
      "epoch": 0.000358349609375,
      "step": 58712,
      "training_step_time": 0.39415454864501953
    },
    {
      "epoch": 0.000358355712890625,
      "model_forward_time": 0.1149282455444336,
      "step": 58713
    },
    {
      "epoch": 0.000358355712890625,
      "step": 58713,
      "training_step_time": 0.38009071350097656
    },
    {
      "epoch": 0.00035836181640625,
      "model_forward_time": 0.11507678031921387,
      "step": 58714
    },
    {
      "epoch": 0.00035836181640625,
      "step": 58714,
      "training_step_time": 0.3917989730834961
    },
    {
      "epoch": 0.000358367919921875,
      "model_forward_time": 0.11492538452148438,
      "step": 58715
    },
    {
      "epoch": 0.000358367919921875,
      "step": 58715,
      "training_step_time": 0.6240766048431396
    },
    {
      "epoch": 0.0003583740234375,
      "model_forward_time": 0.11474180221557617,
      "step": 58716
    },
    {
      "epoch": 0.0003583740234375,
      "step": 58716,
      "training_step_time": 0.395463228225708
    },
    {
      "epoch": 0.000358380126953125,
      "model_forward_time": 0.11494588851928711,
      "step": 58717
    },
    {
      "epoch": 0.000358380126953125,
      "step": 58717,
      "training_step_time": 0.3945295810699463
    },
    {
      "epoch": 0.00035838623046875,
      "model_forward_time": 0.11563396453857422,
      "step": 58718
    },
    {
      "epoch": 0.00035838623046875,
      "step": 58718,
      "training_step_time": 0.38654303550720215
    },
    {
      "epoch": 0.000358392333984375,
      "model_forward_time": 0.11505722999572754,
      "step": 58719
    },
    {
      "epoch": 0.000358392333984375,
      "step": 58719,
      "training_step_time": 0.38931822776794434
    },
    {
      "epoch": 0.0003583984375,
      "grad_norm": 0.09175106883049011,
      "learning_rate": 1.243740710258734e-07,
      "loss": 0.035,
      "step": 58720
    },
    {
      "epoch": 0.0003583984375,
      "model_forward_time": 0.11432170867919922,
      "step": 58720
    },
    {
      "epoch": 0.0003583984375,
      "step": 58720,
      "training_step_time": 0.42006587982177734
    },
    {
      "epoch": 0.000358404541015625,
      "model_forward_time": 0.11540579795837402,
      "step": 58721
    },
    {
      "epoch": 0.000358404541015625,
      "step": 58721,
      "training_step_time": 0.6089956760406494
    },
    {
      "epoch": 0.00035841064453125,
      "model_forward_time": 0.11447381973266602,
      "step": 58722
    },
    {
      "epoch": 0.00035841064453125,
      "step": 58722,
      "training_step_time": 0.4500911235809326
    },
    {
      "epoch": 0.000358416748046875,
      "model_forward_time": 0.11482405662536621,
      "step": 58723
    },
    {
      "epoch": 0.000358416748046875,
      "step": 58723,
      "training_step_time": 0.4232752323150635
    },
    {
      "epoch": 0.0003584228515625,
      "model_forward_time": 0.11485552787780762,
      "step": 58724
    },
    {
      "epoch": 0.0003584228515625,
      "step": 58724,
      "training_step_time": 0.4265923500061035
    },
    {
      "epoch": 0.000358428955078125,
      "model_forward_time": 0.11426591873168945,
      "step": 58725
    },
    {
      "epoch": 0.000358428955078125,
      "step": 58725,
      "training_step_time": 0.3931903839111328
    },
    {
      "epoch": 0.00035843505859375,
      "model_forward_time": 0.11440348625183105,
      "step": 58726
    },
    {
      "epoch": 0.00035843505859375,
      "step": 58726,
      "training_step_time": 0.38440465927124023
    },
    {
      "epoch": 0.000358441162109375,
      "model_forward_time": 0.11492800712585449,
      "step": 58727
    },
    {
      "epoch": 0.000358441162109375,
      "step": 58727,
      "training_step_time": 0.43502378463745117
    },
    {
      "epoch": 0.000358447265625,
      "model_forward_time": 0.11526703834533691,
      "step": 58728
    },
    {
      "epoch": 0.000358447265625,
      "step": 58728,
      "training_step_time": 0.40522336959838867
    },
    {
      "epoch": 0.000358453369140625,
      "model_forward_time": 0.11506080627441406,
      "step": 58729
    },
    {
      "epoch": 0.000358453369140625,
      "step": 58729,
      "training_step_time": 0.4008762836456299
    },
    {
      "epoch": 0.00035845947265625,
      "grad_norm": 0.07465770840644836,
      "learning_rate": 1.2243910778705348e-07,
      "loss": 0.0341,
      "step": 58730
    },
    {
      "epoch": 0.00035845947265625,
      "model_forward_time": 0.11478590965270996,
      "step": 58730
    },
    {
      "epoch": 0.00035845947265625,
      "step": 58730,
      "training_step_time": 0.38562965393066406
    },
    {
      "epoch": 0.000358465576171875,
      "model_forward_time": 0.11485552787780762,
      "step": 58731
    },
    {
      "epoch": 0.000358465576171875,
      "step": 58731,
      "training_step_time": 0.3841972351074219
    },
    {
      "epoch": 0.0003584716796875,
      "model_forward_time": 0.11544227600097656,
      "step": 58732
    },
    {
      "epoch": 0.0003584716796875,
      "step": 58732,
      "training_step_time": 0.3864560127258301
    },
    {
      "epoch": 0.000358477783203125,
      "model_forward_time": 0.11477208137512207,
      "step": 58733
    },
    {
      "epoch": 0.000358477783203125,
      "step": 58733,
      "training_step_time": 0.6392004489898682
    },
    {
      "epoch": 0.00035848388671875,
      "model_forward_time": 0.11562132835388184,
      "step": 58734
    },
    {
      "epoch": 0.00035848388671875,
      "step": 58734,
      "training_step_time": 0.3916287422180176
    },
    {
      "epoch": 0.000358489990234375,
      "model_forward_time": 0.11461186408996582,
      "step": 58735
    },
    {
      "epoch": 0.000358489990234375,
      "step": 58735,
      "training_step_time": 0.36834096908569336
    },
    {
      "epoch": 0.00035849609375,
      "model_forward_time": 0.11552739143371582,
      "step": 58736
    },
    {
      "epoch": 0.00035849609375,
      "step": 58736,
      "training_step_time": 0.4266481399536133
    },
    {
      "epoch": 0.000358502197265625,
      "model_forward_time": 0.11471891403198242,
      "step": 58737
    },
    {
      "epoch": 0.000358502197265625,
      "step": 58737,
      "training_step_time": 0.4851701259613037
    },
    {
      "epoch": 0.00035850830078125,
      "model_forward_time": 0.1141970157623291,
      "step": 58738
    },
    {
      "epoch": 0.00035850830078125,
      "step": 58738,
      "training_step_time": 0.44706296920776367
    },
    {
      "epoch": 0.000358514404296875,
      "model_forward_time": 0.1147611141204834,
      "step": 58739
    },
    {
      "epoch": 0.000358514404296875,
      "step": 58739,
      "training_step_time": 0.4033060073852539
    },
    {
      "epoch": 0.0003585205078125,
      "grad_norm": 0.10035357624292374,
      "learning_rate": 1.2051929603428825e-07,
      "loss": 0.0379,
      "step": 58740
    },
    {
      "epoch": 0.0003585205078125,
      "model_forward_time": 0.11447000503540039,
      "step": 58740
    },
    {
      "epoch": 0.0003585205078125,
      "step": 58740,
      "training_step_time": 0.3875594139099121
    },
    {
      "epoch": 0.000358526611328125,
      "model_forward_time": 0.11510014533996582,
      "step": 58741
    },
    {
      "epoch": 0.000358526611328125,
      "step": 58741,
      "training_step_time": 0.3954794406890869
    },
    {
      "epoch": 0.00035853271484375,
      "model_forward_time": 0.11518526077270508,
      "step": 58742
    },
    {
      "epoch": 0.00035853271484375,
      "step": 58742,
      "training_step_time": 0.38517308235168457
    },
    {
      "epoch": 0.000358538818359375,
      "model_forward_time": 0.11517047882080078,
      "step": 58743
    },
    {
      "epoch": 0.000358538818359375,
      "step": 58743,
      "training_step_time": 0.3881876468658447
    },
    {
      "epoch": 0.000358544921875,
      "model_forward_time": 0.11484575271606445,
      "step": 58744
    },
    {
      "epoch": 0.000358544921875,
      "step": 58744,
      "training_step_time": 0.3908731937408447
    },
    {
      "epoch": 0.000358551025390625,
      "model_forward_time": 0.11601543426513672,
      "step": 58745
    },
    {
      "epoch": 0.000358551025390625,
      "step": 58745,
      "training_step_time": 0.7297022342681885
    },
    {
      "epoch": 0.00035855712890625,
      "model_forward_time": 0.11466240882873535,
      "step": 58746
    },
    {
      "epoch": 0.00035855712890625,
      "step": 58746,
      "training_step_time": 0.41373467445373535
    },
    {
      "epoch": 0.000358563232421875,
      "model_forward_time": 0.11499643325805664,
      "step": 58747
    },
    {
      "epoch": 0.000358563232421875,
      "step": 58747,
      "training_step_time": 0.395709753036499
    },
    {
      "epoch": 0.0003585693359375,
      "model_forward_time": 0.11455965042114258,
      "step": 58748
    },
    {
      "epoch": 0.0003585693359375,
      "step": 58748,
      "training_step_time": 0.38863635063171387
    },
    {
      "epoch": 0.000358575439453125,
      "model_forward_time": 0.11439776420593262,
      "step": 58749
    },
    {
      "epoch": 0.000358575439453125,
      "step": 58749,
      "training_step_time": 0.36141085624694824
    },
    {
      "epoch": 0.00035858154296875,
      "grad_norm": 0.07841940969228745,
      "learning_rate": 1.1861463635077785e-07,
      "loss": 0.038,
      "step": 58750
    },
    {
      "epoch": 0.00035858154296875,
      "model_forward_time": 0.11544561386108398,
      "step": 58750
    },
    {
      "epoch": 0.00035858154296875,
      "step": 58750,
      "training_step_time": 0.408771276473999
    },
    {
      "epoch": 0.000358587646484375,
      "model_forward_time": 0.11464047431945801,
      "step": 58751
    },
    {
      "epoch": 0.000358587646484375,
      "step": 58751,
      "training_step_time": 0.4438192844390869
    },
    {
      "epoch": 0.00035859375,
      "model_forward_time": 0.11565184593200684,
      "step": 58752
    },
    {
      "epoch": 0.00035859375,
      "step": 58752,
      "training_step_time": 0.5229854583740234
    },
    {
      "epoch": 0.000358599853515625,
      "model_forward_time": 0.11411142349243164,
      "step": 58753
    },
    {
      "epoch": 0.000358599853515625,
      "step": 58753,
      "training_step_time": 0.39211511611938477
    },
    {
      "epoch": 0.00035860595703125,
      "model_forward_time": 0.11525273323059082,
      "step": 58754
    },
    {
      "epoch": 0.00035860595703125,
      "step": 58754,
      "training_step_time": 0.38284826278686523
    },
    {
      "epoch": 0.000358612060546875,
      "model_forward_time": 0.1149909496307373,
      "step": 58755
    },
    {
      "epoch": 0.000358612060546875,
      "step": 58755,
      "training_step_time": 0.39377713203430176
    },
    {
      "epoch": 0.0003586181640625,
      "model_forward_time": 0.11521530151367188,
      "step": 58756
    },
    {
      "epoch": 0.0003586181640625,
      "step": 58756,
      "training_step_time": 0.3876638412475586
    },
    {
      "epoch": 0.000358624267578125,
      "model_forward_time": 0.11452126502990723,
      "step": 58757
    },
    {
      "epoch": 0.000358624267578125,
      "step": 58757,
      "training_step_time": 0.39809083938598633
    },
    {
      "epoch": 0.00035863037109375,
      "model_forward_time": 0.11533093452453613,
      "step": 58758
    },
    {
      "epoch": 0.00035863037109375,
      "step": 58758,
      "training_step_time": 0.3945322036743164
    },
    {
      "epoch": 0.000358636474609375,
      "model_forward_time": 0.11521744728088379,
      "step": 58759
    },
    {
      "epoch": 0.000358636474609375,
      "step": 58759,
      "training_step_time": 0.4222884178161621
    },
    {
      "epoch": 0.000358642578125,
      "grad_norm": 0.08698319643735886,
      "learning_rate": 1.1672512931509283e-07,
      "loss": 0.0331,
      "step": 58760
    },
    {
      "epoch": 0.000358642578125,
      "model_forward_time": 0.11550688743591309,
      "step": 58760
    },
    {
      "epoch": 0.000358642578125,
      "step": 58760,
      "training_step_time": 0.4182703495025635
    },
    {
      "epoch": 0.000358648681640625,
      "model_forward_time": 0.1146693229675293,
      "step": 58761
    },
    {
      "epoch": 0.000358648681640625,
      "step": 58761,
      "training_step_time": 0.41893529891967773
    },
    {
      "epoch": 0.00035865478515625,
      "model_forward_time": 0.11508798599243164,
      "step": 58762
    },
    {
      "epoch": 0.00035865478515625,
      "step": 58762,
      "training_step_time": 0.3873448371887207
    },
    {
      "epoch": 0.000358660888671875,
      "model_forward_time": 0.11551976203918457,
      "step": 58763
    },
    {
      "epoch": 0.000358660888671875,
      "step": 58763,
      "training_step_time": 0.3942742347717285
    },
    {
      "epoch": 0.0003586669921875,
      "model_forward_time": 0.11545729637145996,
      "step": 58764
    },
    {
      "epoch": 0.0003586669921875,
      "step": 58764,
      "training_step_time": 0.4423861503601074
    },
    {
      "epoch": 0.000358673095703125,
      "model_forward_time": 0.11545467376708984,
      "step": 58765
    },
    {
      "epoch": 0.000358673095703125,
      "step": 58765,
      "training_step_time": 0.4626646041870117
    },
    {
      "epoch": 0.00035867919921875,
      "model_forward_time": 0.11502885818481445,
      "step": 58766
    },
    {
      "epoch": 0.00035867919921875,
      "step": 58766,
      "training_step_time": 0.506744384765625
    },
    {
      "epoch": 0.000358685302734375,
      "model_forward_time": 0.11539387702941895,
      "step": 58767
    },
    {
      "epoch": 0.000358685302734375,
      "step": 58767,
      "training_step_time": 0.42898082733154297
    },
    {
      "epoch": 0.00035869140625,
      "model_forward_time": 0.11505961418151855,
      "step": 58768
    },
    {
      "epoch": 0.00035869140625,
      "step": 58768,
      "training_step_time": 0.38901853561401367
    },
    {
      "epoch": 0.000358697509765625,
      "model_forward_time": 0.11483979225158691,
      "step": 58769
    },
    {
      "epoch": 0.000358697509765625,
      "step": 58769,
      "training_step_time": 0.3972959518432617
    },
    {
      "epoch": 0.00035870361328125,
      "grad_norm": 0.08184050768613815,
      "learning_rate": 1.1485077550122402e-07,
      "loss": 0.0336,
      "step": 58770
    },
    {
      "epoch": 0.00035870361328125,
      "model_forward_time": 0.11499452590942383,
      "step": 58770
    },
    {
      "epoch": 0.00035870361328125,
      "step": 58770,
      "training_step_time": 0.39159107208251953
    },
    {
      "epoch": 0.000358709716796875,
      "model_forward_time": 0.11426806449890137,
      "step": 58771
    },
    {
      "epoch": 0.000358709716796875,
      "step": 58771,
      "training_step_time": 0.39202022552490234
    },
    {
      "epoch": 0.0003587158203125,
      "model_forward_time": 0.11567258834838867,
      "step": 58772
    },
    {
      "epoch": 0.0003587158203125,
      "step": 58772,
      "training_step_time": 0.38130784034729004
    },
    {
      "epoch": 0.000358721923828125,
      "model_forward_time": 0.11538839340209961,
      "step": 58773
    },
    {
      "epoch": 0.000358721923828125,
      "step": 58773,
      "training_step_time": 0.4245898723602295
    },
    {
      "epoch": 0.00035872802734375,
      "model_forward_time": 0.11573362350463867,
      "step": 58774
    },
    {
      "epoch": 0.00035872802734375,
      "step": 58774,
      "training_step_time": 0.43151354789733887
    },
    {
      "epoch": 0.000358734130859375,
      "model_forward_time": 0.11520528793334961,
      "step": 58775
    },
    {
      "epoch": 0.000358734130859375,
      "step": 58775,
      "training_step_time": 0.39546895027160645
    },
    {
      "epoch": 0.000358740234375,
      "model_forward_time": 0.11570358276367188,
      "step": 58776
    },
    {
      "epoch": 0.000358740234375,
      "step": 58776,
      "training_step_time": 0.39779233932495117
    },
    {
      "epoch": 0.000358746337890625,
      "model_forward_time": 0.11478495597839355,
      "step": 58777
    },
    {
      "epoch": 0.000358746337890625,
      "step": 58777,
      "training_step_time": 0.3947608470916748
    },
    {
      "epoch": 0.00035875244140625,
      "model_forward_time": 0.11506795883178711,
      "step": 58778
    },
    {
      "epoch": 0.00035875244140625,
      "step": 58778,
      "training_step_time": 0.39822888374328613
    },
    {
      "epoch": 0.000358758544921875,
      "model_forward_time": 0.11521673202514648,
      "step": 58779
    },
    {
      "epoch": 0.000358758544921875,
      "step": 58779,
      "training_step_time": 0.4322030544281006
    },
    {
      "epoch": 0.0003587646484375,
      "grad_norm": 0.10667982697486877,
      "learning_rate": 1.1299157547854377e-07,
      "loss": 0.0413,
      "step": 58780
    },
    {
      "epoch": 0.0003587646484375,
      "model_forward_time": 0.11621761322021484,
      "step": 58780
    },
    {
      "epoch": 0.0003587646484375,
      "step": 58780,
      "training_step_time": 0.4598050117492676
    },
    {
      "epoch": 0.000358770751953125,
      "model_forward_time": 0.11556339263916016,
      "step": 58781
    },
    {
      "epoch": 0.000358770751953125,
      "step": 58781,
      "training_step_time": 0.46341919898986816
    },
    {
      "epoch": 0.00035877685546875,
      "model_forward_time": 0.11549615859985352,
      "step": 58782
    },
    {
      "epoch": 0.00035877685546875,
      "step": 58782,
      "training_step_time": 0.42685914039611816
    },
    {
      "epoch": 0.000358782958984375,
      "model_forward_time": 0.11471796035766602,
      "step": 58783
    },
    {
      "epoch": 0.000358782958984375,
      "step": 58783,
      "training_step_time": 0.391995906829834
    },
    {
      "epoch": 0.0003587890625,
      "model_forward_time": 0.11569333076477051,
      "step": 58784
    },
    {
      "epoch": 0.0003587890625,
      "step": 58784,
      "training_step_time": 0.3858466148376465
    },
    {
      "epoch": 0.000358795166015625,
      "model_forward_time": 0.11632394790649414,
      "step": 58785
    },
    {
      "epoch": 0.000358795166015625,
      "step": 58785,
      "training_step_time": 0.3889346122741699
    },
    {
      "epoch": 0.00035880126953125,
      "model_forward_time": 0.11485862731933594,
      "step": 58786
    },
    {
      "epoch": 0.00035880126953125,
      "step": 58786,
      "training_step_time": 0.3983447551727295
    },
    {
      "epoch": 0.000358807373046875,
      "model_forward_time": 0.11567497253417969,
      "step": 58787
    },
    {
      "epoch": 0.000358807373046875,
      "step": 58787,
      "training_step_time": 0.39935898780822754
    },
    {
      "epoch": 0.0003588134765625,
      "model_forward_time": 0.11590337753295898,
      "step": 58788
    },
    {
      "epoch": 0.0003588134765625,
      "step": 58788,
      "training_step_time": 0.42186856269836426
    },
    {
      "epoch": 0.000358819580078125,
      "model_forward_time": 0.11532998085021973,
      "step": 58789
    },
    {
      "epoch": 0.000358819580078125,
      "step": 58789,
      "training_step_time": 0.39980053901672363
    },
    {
      "epoch": 0.00035882568359375,
      "grad_norm": 0.09308912605047226,
      "learning_rate": 1.1114752981183917e-07,
      "loss": 0.037,
      "step": 58790
    },
    {
      "epoch": 0.00035882568359375,
      "model_forward_time": 0.11546111106872559,
      "step": 58790
    },
    {
      "epoch": 0.00035882568359375,
      "step": 58790,
      "training_step_time": 0.384366512298584
    },
    {
      "epoch": 0.000358831787109375,
      "model_forward_time": 0.11523127555847168,
      "step": 58791
    },
    {
      "epoch": 0.000358831787109375,
      "step": 58791,
      "training_step_time": 0.3828117847442627
    },
    {
      "epoch": 0.000358837890625,
      "model_forward_time": 0.11489677429199219,
      "step": 58792
    },
    {
      "epoch": 0.000358837890625,
      "step": 58792,
      "training_step_time": 0.3890655040740967
    },
    {
      "epoch": 0.000358843994140625,
      "model_forward_time": 0.11563467979431152,
      "step": 58793
    },
    {
      "epoch": 0.000358843994140625,
      "step": 58793,
      "training_step_time": 0.39138078689575195
    },
    {
      "epoch": 0.00035885009765625,
      "model_forward_time": 0.11533427238464355,
      "step": 58794
    },
    {
      "epoch": 0.00035885009765625,
      "step": 58794,
      "training_step_time": 0.4743218421936035
    },
    {
      "epoch": 0.000358856201171875,
      "model_forward_time": 0.1161031723022461,
      "step": 58795
    },
    {
      "epoch": 0.000358856201171875,
      "step": 58795,
      "training_step_time": 0.416975736618042
    },
    {
      "epoch": 0.0003588623046875,
      "model_forward_time": 0.11555314064025879,
      "step": 58796
    },
    {
      "epoch": 0.0003588623046875,
      "step": 58796,
      "training_step_time": 0.4441218376159668
    },
    {
      "epoch": 0.000358868408203125,
      "model_forward_time": 0.11591958999633789,
      "step": 58797
    },
    {
      "epoch": 0.000358868408203125,
      "step": 58797,
      "training_step_time": 0.42288684844970703
    },
    {
      "epoch": 0.00035887451171875,
      "model_forward_time": 0.11536622047424316,
      "step": 58798
    },
    {
      "epoch": 0.00035887451171875,
      "step": 58798,
      "training_step_time": 0.395432710647583
    },
    {
      "epoch": 0.000358880615234375,
      "model_forward_time": 0.11487674713134766,
      "step": 58799
    },
    {
      "epoch": 0.000358880615234375,
      "step": 58799,
      "training_step_time": 0.40311098098754883
    },
    {
      "epoch": 0.00035888671875,
      "grad_norm": 0.09721041470766068,
      "learning_rate": 1.0931863906127327e-07,
      "loss": 0.0357,
      "step": 58800
    },
    {
      "epoch": 0.00035888671875,
      "model_forward_time": 0.11497187614440918,
      "step": 58800
    },
    {
      "epoch": 0.00035888671875,
      "step": 58800,
      "training_step_time": 0.39301419258117676
    },
    {
      "epoch": 0.000358892822265625,
      "model_forward_time": 0.11565279960632324,
      "step": 58801
    },
    {
      "epoch": 0.000358892822265625,
      "step": 58801,
      "training_step_time": 0.3968527317047119
    },
    {
      "epoch": 0.00035889892578125,
      "model_forward_time": 0.11454439163208008,
      "step": 58802
    },
    {
      "epoch": 0.00035889892578125,
      "step": 58802,
      "training_step_time": 0.4035794734954834
    },
    {
      "epoch": 0.000358905029296875,
      "model_forward_time": 0.11461162567138672,
      "step": 58803
    },
    {
      "epoch": 0.000358905029296875,
      "step": 58803,
      "training_step_time": 0.4052867889404297
    },
    {
      "epoch": 0.0003589111328125,
      "model_forward_time": 0.11554765701293945,
      "step": 58804
    },
    {
      "epoch": 0.0003589111328125,
      "step": 58804,
      "training_step_time": 0.39594483375549316
    },
    {
      "epoch": 0.000358917236328125,
      "model_forward_time": 0.11572265625,
      "step": 58805
    },
    {
      "epoch": 0.000358917236328125,
      "step": 58805,
      "training_step_time": 0.3945043087005615
    },
    {
      "epoch": 0.00035892333984375,
      "model_forward_time": 0.11522269248962402,
      "step": 58806
    },
    {
      "epoch": 0.00035892333984375,
      "step": 58806,
      "training_step_time": 0.39855241775512695
    },
    {
      "epoch": 0.000358929443359375,
      "model_forward_time": 0.11488485336303711,
      "step": 58807
    },
    {
      "epoch": 0.000358929443359375,
      "step": 58807,
      "training_step_time": 0.3971412181854248
    },
    {
      "epoch": 0.000358935546875,
      "model_forward_time": 0.1152048110961914,
      "step": 58808
    },
    {
      "epoch": 0.000358935546875,
      "step": 58808,
      "training_step_time": 0.3925745487213135
    },
    {
      "epoch": 0.000358941650390625,
      "model_forward_time": 0.11631989479064941,
      "step": 58809
    },
    {
      "epoch": 0.000358941650390625,
      "step": 58809,
      "training_step_time": 0.4741353988647461
    },
    {
      "epoch": 0.00035894775390625,
      "grad_norm": 0.07798004895448685,
      "learning_rate": 1.0750490378241828e-07,
      "loss": 0.033,
      "step": 58810
    },
    {
      "epoch": 0.00035894775390625,
      "model_forward_time": 0.11582064628601074,
      "step": 58810
    },
    {
      "epoch": 0.00035894775390625,
      "step": 58810,
      "training_step_time": 0.5069091320037842
    },
    {
      "epoch": 0.000358953857421875,
      "model_forward_time": 0.11575460433959961,
      "step": 58811
    },
    {
      "epoch": 0.000358953857421875,
      "step": 58811,
      "training_step_time": 0.5122742652893066
    },
    {
      "epoch": 0.0003589599609375,
      "model_forward_time": 0.1155557632446289,
      "step": 58812
    },
    {
      "epoch": 0.0003589599609375,
      "step": 58812,
      "training_step_time": 0.3899359703063965
    },
    {
      "epoch": 0.000358966064453125,
      "model_forward_time": 0.11514425277709961,
      "step": 58813
    },
    {
      "epoch": 0.000358966064453125,
      "step": 58813,
      "training_step_time": 0.39528679847717285
    },
    {
      "epoch": 0.00035897216796875,
      "model_forward_time": 0.11514496803283691,
      "step": 58814
    },
    {
      "epoch": 0.00035897216796875,
      "step": 58814,
      "training_step_time": 0.4262387752532959
    },
    {
      "epoch": 0.000358978271484375,
      "model_forward_time": 0.11513376235961914,
      "step": 58815
    },
    {
      "epoch": 0.000358978271484375,
      "step": 58815,
      "training_step_time": 0.4191124439239502
    },
    {
      "epoch": 0.000358984375,
      "model_forward_time": 0.11591839790344238,
      "step": 58816
    },
    {
      "epoch": 0.000358984375,
      "step": 58816,
      "training_step_time": 0.416797399520874
    },
    {
      "epoch": 0.000358990478515625,
      "model_forward_time": 0.11533570289611816,
      "step": 58817
    },
    {
      "epoch": 0.000358990478515625,
      "step": 58817,
      "training_step_time": 0.389387845993042
    },
    {
      "epoch": 0.00035899658203125,
      "model_forward_time": 0.1154947280883789,
      "step": 58818
    },
    {
      "epoch": 0.00035899658203125,
      "step": 58818,
      "training_step_time": 0.40783238410949707
    },
    {
      "epoch": 0.000359002685546875,
      "model_forward_time": 0.11498093605041504,
      "step": 58819
    },
    {
      "epoch": 0.000359002685546875,
      "step": 58819,
      "training_step_time": 0.39760565757751465
    },
    {
      "epoch": 0.0003590087890625,
      "grad_norm": 0.07447408884763718,
      "learning_rate": 1.0570632452623353e-07,
      "loss": 0.0388,
      "step": 58820
    },
    {
      "epoch": 0.0003590087890625,
      "model_forward_time": 0.11578893661499023,
      "step": 58820
    },
    {
      "epoch": 0.0003590087890625,
      "step": 58820,
      "training_step_time": 0.38721346855163574
    },
    {
      "epoch": 0.000359014892578125,
      "model_forward_time": 0.1153407096862793,
      "step": 58821
    },
    {
      "epoch": 0.000359014892578125,
      "step": 58821,
      "training_step_time": 0.3894083499908447
    },
    {
      "epoch": 0.00035902099609375,
      "model_forward_time": 0.11492252349853516,
      "step": 58822
    },
    {
      "epoch": 0.00035902099609375,
      "step": 58822,
      "training_step_time": 0.3921809196472168
    },
    {
      "epoch": 0.000359027099609375,
      "model_forward_time": 0.11622810363769531,
      "step": 58823
    },
    {
      "epoch": 0.000359027099609375,
      "step": 58823,
      "training_step_time": 0.44193267822265625
    },
    {
      "epoch": 0.000359033203125,
      "model_forward_time": 0.1156005859375,
      "step": 58824
    },
    {
      "epoch": 0.000359033203125,
      "step": 58824,
      "training_step_time": 0.45495128631591797
    },
    {
      "epoch": 0.000359039306640625,
      "model_forward_time": 0.11549997329711914,
      "step": 58825
    },
    {
      "epoch": 0.000359039306640625,
      "step": 58825,
      "training_step_time": 0.4788203239440918
    },
    {
      "epoch": 0.00035904541015625,
      "model_forward_time": 0.11566662788391113,
      "step": 58826
    },
    {
      "epoch": 0.00035904541015625,
      "step": 58826,
      "training_step_time": 0.396834135055542
    },
    {
      "epoch": 0.000359051513671875,
      "model_forward_time": 0.11517620086669922,
      "step": 58827
    },
    {
      "epoch": 0.000359051513671875,
      "step": 58827,
      "training_step_time": 0.3807520866394043
    },
    {
      "epoch": 0.0003590576171875,
      "model_forward_time": 0.11534690856933594,
      "step": 58828
    },
    {
      "epoch": 0.0003590576171875,
      "step": 58828,
      "training_step_time": 0.39638686180114746
    },
    {
      "epoch": 0.000359063720703125,
      "model_forward_time": 0.11537647247314453,
      "step": 58829
    },
    {
      "epoch": 0.000359063720703125,
      "step": 58829,
      "training_step_time": 0.38916015625
    },
    {
      "epoch": 0.00035906982421875,
      "grad_norm": 0.09165116399526596,
      "learning_rate": 1.0392290183909304e-07,
      "loss": 0.0389,
      "step": 58830
    },
    {
      "epoch": 0.00035906982421875,
      "model_forward_time": 0.11573004722595215,
      "step": 58830
    },
    {
      "epoch": 0.00035906982421875,
      "step": 58830,
      "training_step_time": 0.39435410499572754
    },
    {
      "epoch": 0.000359075927734375,
      "model_forward_time": 0.11474728584289551,
      "step": 58831
    },
    {
      "epoch": 0.000359075927734375,
      "step": 58831,
      "training_step_time": 0.3996436595916748
    },
    {
      "epoch": 0.00035908203125,
      "model_forward_time": 0.11531639099121094,
      "step": 58832
    },
    {
      "epoch": 0.00035908203125,
      "step": 58832,
      "training_step_time": 0.3893873691558838
    },
    {
      "epoch": 0.000359088134765625,
      "model_forward_time": 0.1164698600769043,
      "step": 58833
    },
    {
      "epoch": 0.000359088134765625,
      "step": 58833,
      "training_step_time": 0.4058384895324707
    },
    {
      "epoch": 0.00035909423828125,
      "model_forward_time": 0.11748170852661133,
      "step": 58834
    },
    {
      "epoch": 0.00035909423828125,
      "step": 58834,
      "training_step_time": 0.400498628616333
    },
    {
      "epoch": 0.000359100341796875,
      "model_forward_time": 0.11520266532897949,
      "step": 58835
    },
    {
      "epoch": 0.000359100341796875,
      "step": 58835,
      "training_step_time": 0.38827085494995117
    },
    {
      "epoch": 0.0003591064453125,
      "model_forward_time": 0.11532330513000488,
      "step": 58836
    },
    {
      "epoch": 0.0003591064453125,
      "step": 58836,
      "training_step_time": 0.4026482105255127
    },
    {
      "epoch": 0.000359112548828125,
      "model_forward_time": 0.11522221565246582,
      "step": 58837
    },
    {
      "epoch": 0.000359112548828125,
      "step": 58837,
      "training_step_time": 0.39388060569763184
    },
    {
      "epoch": 0.00035911865234375,
      "model_forward_time": 0.11574316024780273,
      "step": 58838
    },
    {
      "epoch": 0.00035911865234375,
      "step": 58838,
      "training_step_time": 0.43032097816467285
    },
    {
      "epoch": 0.000359124755859375,
      "model_forward_time": 0.11541461944580078,
      "step": 58839
    },
    {
      "epoch": 0.000359124755859375,
      "step": 58839,
      "training_step_time": 0.4076049327850342
    },
    {
      "epoch": 0.000359130859375,
      "grad_norm": 0.10294201225042343,
      "learning_rate": 1.0215463626274125e-07,
      "loss": 0.0385,
      "step": 58840
    },
    {
      "epoch": 0.000359130859375,
      "model_forward_time": 0.11567926406860352,
      "step": 58840
    },
    {
      "epoch": 0.000359130859375,
      "step": 58840,
      "training_step_time": 0.43224120140075684
    },
    {
      "epoch": 0.000359136962890625,
      "model_forward_time": 0.11535191535949707,
      "step": 58841
    },
    {
      "epoch": 0.000359136962890625,
      "step": 58841,
      "training_step_time": 0.5480725765228271
    },
    {
      "epoch": 0.00035914306640625,
      "model_forward_time": 0.11568188667297363,
      "step": 58842
    },
    {
      "epoch": 0.00035914306640625,
      "step": 58842,
      "training_step_time": 0.43661022186279297
    },
    {
      "epoch": 0.000359149169921875,
      "model_forward_time": 0.11533117294311523,
      "step": 58843
    },
    {
      "epoch": 0.000359149169921875,
      "step": 58843,
      "training_step_time": 0.4342319965362549
    },
    {
      "epoch": 0.0003591552734375,
      "model_forward_time": 0.11541366577148438,
      "step": 58844
    },
    {
      "epoch": 0.0003591552734375,
      "step": 58844,
      "training_step_time": 0.39838171005249023
    },
    {
      "epoch": 0.000359161376953125,
      "model_forward_time": 0.11542892456054688,
      "step": 58845
    },
    {
      "epoch": 0.000359161376953125,
      "step": 58845,
      "training_step_time": 0.39794278144836426
    },
    {
      "epoch": 0.00035916748046875,
      "model_forward_time": 0.11503314971923828,
      "step": 58846
    },
    {
      "epoch": 0.00035916748046875,
      "step": 58846,
      "training_step_time": 0.39023804664611816
    },
    {
      "epoch": 0.000359173583984375,
      "model_forward_time": 0.11498498916625977,
      "step": 58847
    },
    {
      "epoch": 0.000359173583984375,
      "step": 58847,
      "training_step_time": 0.3943309783935547
    },
    {
      "epoch": 0.0003591796875,
      "model_forward_time": 0.11438107490539551,
      "step": 58848
    },
    {
      "epoch": 0.0003591796875,
      "step": 58848,
      "training_step_time": 0.3948049545288086
    },
    {
      "epoch": 0.000359185791015625,
      "model_forward_time": 0.11518311500549316,
      "step": 58849
    },
    {
      "epoch": 0.000359185791015625,
      "step": 58849,
      "training_step_time": 0.3901345729827881
    },
    {
      "epoch": 0.00035919189453125,
      "grad_norm": 0.0701407641172409,
      "learning_rate": 1.0040152833433736e-07,
      "loss": 0.0385,
      "step": 58850
    },
    {
      "epoch": 0.00035919189453125,
      "model_forward_time": 0.1153709888458252,
      "step": 58850
    },
    {
      "epoch": 0.00035919189453125,
      "step": 58850,
      "training_step_time": 0.38413429260253906
    },
    {
      "epoch": 0.000359197998046875,
      "model_forward_time": 0.1150822639465332,
      "step": 58851
    },
    {
      "epoch": 0.000359197998046875,
      "step": 58851,
      "training_step_time": 0.38986849784851074
    },
    {
      "epoch": 0.0003592041015625,
      "model_forward_time": 0.11649060249328613,
      "step": 58852
    },
    {
      "epoch": 0.0003592041015625,
      "step": 58852,
      "training_step_time": 0.39589381217956543
    },
    {
      "epoch": 0.000359210205078125,
      "model_forward_time": 0.11572456359863281,
      "step": 58853
    },
    {
      "epoch": 0.000359210205078125,
      "step": 58853,
      "training_step_time": 0.4455392360687256
    },
    {
      "epoch": 0.00035921630859375,
      "model_forward_time": 0.11550116539001465,
      "step": 58854
    },
    {
      "epoch": 0.00035921630859375,
      "step": 58854,
      "training_step_time": 0.49332189559936523
    },
    {
      "epoch": 0.000359222412109375,
      "model_forward_time": 0.11583876609802246,
      "step": 58855
    },
    {
      "epoch": 0.000359222412109375,
      "step": 58855,
      "training_step_time": 0.46560025215148926
    },
    {
      "epoch": 0.000359228515625,
      "model_forward_time": 0.1146841049194336,
      "step": 58856
    },
    {
      "epoch": 0.000359228515625,
      "step": 58856,
      "training_step_time": 0.4454786777496338
    },
    {
      "epoch": 0.000359234619140625,
      "model_forward_time": 0.11461305618286133,
      "step": 58857
    },
    {
      "epoch": 0.000359234619140625,
      "step": 58857,
      "training_step_time": 0.39177393913269043
    },
    {
      "epoch": 0.00035924072265625,
      "model_forward_time": 0.11585187911987305,
      "step": 58858
    },
    {
      "epoch": 0.00035924072265625,
      "step": 58858,
      "training_step_time": 0.3857910633087158
    },
    {
      "epoch": 0.000359246826171875,
      "model_forward_time": 0.11475753784179688,
      "step": 58859
    },
    {
      "epoch": 0.000359246826171875,
      "step": 58859,
      "training_step_time": 0.4033787250518799
    },
    {
      "epoch": 0.0003592529296875,
      "grad_norm": 0.14507807791233063,
      "learning_rate": 9.866357858642205e-08,
      "loss": 0.048,
      "step": 58860
    },
    {
      "epoch": 0.0003592529296875,
      "model_forward_time": 0.11484479904174805,
      "step": 58860
    },
    {
      "epoch": 0.0003592529296875,
      "step": 58860,
      "training_step_time": 0.3987112045288086
    },
    {
      "epoch": 0.000359259033203125,
      "model_forward_time": 0.11524534225463867,
      "step": 58861
    },
    {
      "epoch": 0.000359259033203125,
      "step": 58861,
      "training_step_time": 0.550433874130249
    },
    {
      "epoch": 0.00035926513671875,
      "model_forward_time": 0.11488652229309082,
      "step": 58862
    },
    {
      "epoch": 0.00035926513671875,
      "step": 58862,
      "training_step_time": 0.39792561531066895
    },
    {
      "epoch": 0.000359271240234375,
      "model_forward_time": 0.11516475677490234,
      "step": 58863
    },
    {
      "epoch": 0.000359271240234375,
      "step": 58863,
      "training_step_time": 0.4099583625793457
    },
    {
      "epoch": 0.00035927734375,
      "model_forward_time": 0.11496782302856445,
      "step": 58864
    },
    {
      "epoch": 0.00035927734375,
      "step": 58864,
      "training_step_time": 0.39884185791015625
    },
    {
      "epoch": 0.000359283447265625,
      "model_forward_time": 0.11480498313903809,
      "step": 58865
    },
    {
      "epoch": 0.000359283447265625,
      "step": 58865,
      "training_step_time": 0.3970944881439209
    },
    {
      "epoch": 0.00035928955078125,
      "model_forward_time": 0.11447024345397949,
      "step": 58866
    },
    {
      "epoch": 0.00035928955078125,
      "step": 58866,
      "training_step_time": 0.39316368103027344
    },
    {
      "epoch": 0.000359295654296875,
      "model_forward_time": 0.11462116241455078,
      "step": 58867
    },
    {
      "epoch": 0.000359295654296875,
      "step": 58867,
      "training_step_time": 0.5646600723266602
    },
    {
      "epoch": 0.0003593017578125,
      "model_forward_time": 0.11419415473937988,
      "step": 58868
    },
    {
      "epoch": 0.0003593017578125,
      "step": 58868,
      "training_step_time": 0.5186362266540527
    },
    {
      "epoch": 0.000359307861328125,
      "model_forward_time": 0.11454463005065918,
      "step": 58869
    },
    {
      "epoch": 0.000359307861328125,
      "step": 58869,
      "training_step_time": 0.4460773468017578
    },
    {
      "epoch": 0.00035931396484375,
      "grad_norm": 0.10639330744743347,
      "learning_rate": 9.694078754694524e-08,
      "loss": 0.0331,
      "step": 58870
    },
    {
      "epoch": 0.00035931396484375,
      "model_forward_time": 0.11390995979309082,
      "step": 58870
    },
    {
      "epoch": 0.00035931396484375,
      "step": 58870,
      "training_step_time": 0.41312694549560547
    },
    {
      "epoch": 0.000359320068359375,
      "model_forward_time": 0.11417651176452637,
      "step": 58871
    },
    {
      "epoch": 0.000359320068359375,
      "step": 58871,
      "training_step_time": 0.38216280937194824
    },
    {
      "epoch": 0.000359326171875,
      "model_forward_time": 0.11515641212463379,
      "step": 58872
    },
    {
      "epoch": 0.000359326171875,
      "step": 58872,
      "training_step_time": 0.4080326557159424
    },
    {
      "epoch": 0.000359332275390625,
      "model_forward_time": 0.1151890754699707,
      "step": 58873
    },
    {
      "epoch": 0.000359332275390625,
      "step": 58873,
      "training_step_time": 0.41969966888427734
    },
    {
      "epoch": 0.00035933837890625,
      "model_forward_time": 0.11540865898132324,
      "step": 58874
    },
    {
      "epoch": 0.00035933837890625,
      "step": 58874,
      "training_step_time": 0.3875596523284912
    },
    {
      "epoch": 0.000359344482421875,
      "model_forward_time": 0.11457037925720215,
      "step": 58875
    },
    {
      "epoch": 0.000359344482421875,
      "step": 58875,
      "training_step_time": 0.38774585723876953
    },
    {
      "epoch": 0.0003593505859375,
      "model_forward_time": 0.11463189125061035,
      "step": 58876
    },
    {
      "epoch": 0.0003593505859375,
      "step": 58876,
      "training_step_time": 0.3956904411315918
    },
    {
      "epoch": 0.000359356689453125,
      "model_forward_time": 0.11538505554199219,
      "step": 58877
    },
    {
      "epoch": 0.000359356689453125,
      "step": 58877,
      "training_step_time": 0.40282273292541504
    },
    {
      "epoch": 0.00035936279296875,
      "model_forward_time": 0.11540699005126953,
      "step": 58878
    },
    {
      "epoch": 0.00035936279296875,
      "step": 58878,
      "training_step_time": 0.4051835536956787
    },
    {
      "epoch": 0.000359368896484375,
      "model_forward_time": 0.11481261253356934,
      "step": 58879
    },
    {
      "epoch": 0.000359368896484375,
      "step": 58879,
      "training_step_time": 0.5878267288208008
    },
    {
      "epoch": 0.000359375,
      "grad_norm": 0.07066666334867477,
      "learning_rate": 9.523315573924385e-08,
      "loss": 0.0318,
      "step": 58880
    },
    {
      "epoch": 0.000359375,
      "model_forward_time": 0.11519384384155273,
      "step": 58880
    },
    {
      "epoch": 0.000359375,
      "step": 58880,
      "training_step_time": 0.38727879524230957
    },
    {
      "epoch": 0.000359381103515625,
      "model_forward_time": 0.11470842361450195,
      "step": 58881
    },
    {
      "epoch": 0.000359381103515625,
      "step": 58881,
      "training_step_time": 0.36727094650268555
    },
    {
      "epoch": 0.00035938720703125,
      "model_forward_time": 0.11460757255554199,
      "step": 58882
    },
    {
      "epoch": 0.00035938720703125,
      "step": 58882,
      "training_step_time": 0.486950159072876
    },
    {
      "epoch": 0.000359393310546875,
      "model_forward_time": 0.11466598510742188,
      "step": 58883
    },
    {
      "epoch": 0.000359393310546875,
      "step": 58883,
      "training_step_time": 0.48852014541625977
    },
    {
      "epoch": 0.0003593994140625,
      "model_forward_time": 0.11468505859375,
      "step": 58884
    },
    {
      "epoch": 0.0003593994140625,
      "step": 58884,
      "training_step_time": 0.4433102607727051
    },
    {
      "epoch": 0.000359405517578125,
      "model_forward_time": 0.11541223526000977,
      "step": 58885
    },
    {
      "epoch": 0.000359405517578125,
      "step": 58885,
      "training_step_time": 0.41551685333251953
    },
    {
      "epoch": 0.00035941162109375,
      "model_forward_time": 0.11533689498901367,
      "step": 58886
    },
    {
      "epoch": 0.00035941162109375,
      "step": 58886,
      "training_step_time": 0.39958667755126953
    },
    {
      "epoch": 0.000359417724609375,
      "model_forward_time": 0.11437845230102539,
      "step": 58887
    },
    {
      "epoch": 0.000359417724609375,
      "step": 58887,
      "training_step_time": 0.38953065872192383
    },
    {
      "epoch": 0.000359423828125,
      "model_forward_time": 0.11512279510498047,
      "step": 58888
    },
    {
      "epoch": 0.000359423828125,
      "step": 58888,
      "training_step_time": 0.3983299732208252
    },
    {
      "epoch": 0.000359429931640625,
      "model_forward_time": 0.11535191535949707,
      "step": 58889
    },
    {
      "epoch": 0.000359429931640625,
      "step": 58889,
      "training_step_time": 0.39827632904052734
    },
    {
      "epoch": 0.00035943603515625,
      "grad_norm": 0.08828867226839066,
      "learning_rate": 9.354068368204739e-08,
      "loss": 0.0374,
      "step": 58890
    },
    {
      "epoch": 0.00035943603515625,
      "model_forward_time": 0.1153573989868164,
      "step": 58890
    },
    {
      "epoch": 0.00035943603515625,
      "step": 58890,
      "training_step_time": 0.4065825939178467
    },
    {
      "epoch": 0.000359442138671875,
      "model_forward_time": 0.1146094799041748,
      "step": 58891
    },
    {
      "epoch": 0.000359442138671875,
      "step": 58891,
      "training_step_time": 0.6322641372680664
    },
    {
      "epoch": 0.0003594482421875,
      "model_forward_time": 0.11550283432006836,
      "step": 58892
    },
    {
      "epoch": 0.0003594482421875,
      "step": 58892,
      "training_step_time": 0.38594913482666016
    },
    {
      "epoch": 0.000359454345703125,
      "model_forward_time": 0.11498427391052246,
      "step": 58893
    },
    {
      "epoch": 0.000359454345703125,
      "step": 58893,
      "training_step_time": 0.3859717845916748
    },
    {
      "epoch": 0.00035946044921875,
      "model_forward_time": 0.11491894721984863,
      "step": 58894
    },
    {
      "epoch": 0.00035946044921875,
      "step": 58894,
      "training_step_time": 0.38251757621765137
    },
    {
      "epoch": 0.000359466552734375,
      "model_forward_time": 0.11444997787475586,
      "step": 58895
    },
    {
      "epoch": 0.000359466552734375,
      "step": 58895,
      "training_step_time": 0.36173367500305176
    },
    {
      "epoch": 0.00035947265625,
      "model_forward_time": 0.11538863182067871,
      "step": 58896
    },
    {
      "epoch": 0.00035947265625,
      "step": 58896,
      "training_step_time": 0.48996496200561523
    },
    {
      "epoch": 0.000359478759765625,
      "model_forward_time": 0.1151275634765625,
      "step": 58897
    },
    {
      "epoch": 0.000359478759765625,
      "step": 58897,
      "training_step_time": 0.472853422164917
    },
    {
      "epoch": 0.00035948486328125,
      "model_forward_time": 0.11465930938720703,
      "step": 58898
    },
    {
      "epoch": 0.00035948486328125,
      "step": 58898,
      "training_step_time": 0.46251654624938965
    },
    {
      "epoch": 0.000359490966796875,
      "model_forward_time": 0.11503124237060547,
      "step": 58899
    },
    {
      "epoch": 0.000359490966796875,
      "step": 58899,
      "training_step_time": 0.386538028717041
    },
    {
      "epoch": 0.0003594970703125,
      "grad_norm": 0.10357885807752609,
      "learning_rate": 9.186337188949457e-08,
      "loss": 0.0334,
      "step": 58900
    },
    {
      "epoch": 0.0003594970703125,
      "model_forward_time": 0.11517810821533203,
      "step": 58900
    },
    {
      "epoch": 0.0003594970703125,
      "step": 58900,
      "training_step_time": 0.398984432220459
    },
    {
      "epoch": 0.000359503173828125,
      "model_forward_time": 0.11506819725036621,
      "step": 58901
    },
    {
      "epoch": 0.000359503173828125,
      "step": 58901,
      "training_step_time": 0.38432979583740234
    },
    {
      "epoch": 0.00035950927734375,
      "model_forward_time": 0.1143794059753418,
      "step": 58902
    },
    {
      "epoch": 0.00035950927734375,
      "step": 58902,
      "training_step_time": 0.394697904586792
    },
    {
      "epoch": 0.000359515380859375,
      "model_forward_time": 0.11520147323608398,
      "step": 58903
    },
    {
      "epoch": 0.000359515380859375,
      "step": 58903,
      "training_step_time": 0.4116227626800537
    },
    {
      "epoch": 0.000359521484375,
      "model_forward_time": 0.11453104019165039,
      "step": 58904
    },
    {
      "epoch": 0.000359521484375,
      "step": 58904,
      "training_step_time": 0.39258813858032227
    },
    {
      "epoch": 0.000359527587890625,
      "model_forward_time": 0.11580109596252441,
      "step": 58905
    },
    {
      "epoch": 0.000359527587890625,
      "step": 58905,
      "training_step_time": 0.39815306663513184
    },
    {
      "epoch": 0.00035953369140625,
      "model_forward_time": 0.11487841606140137,
      "step": 58906
    },
    {
      "epoch": 0.00035953369140625,
      "step": 58906,
      "training_step_time": 0.39990949630737305
    },
    {
      "epoch": 0.000359539794921875,
      "model_forward_time": 0.11545538902282715,
      "step": 58907
    },
    {
      "epoch": 0.000359539794921875,
      "step": 58907,
      "training_step_time": 0.39740633964538574
    },
    {
      "epoch": 0.0003595458984375,
      "model_forward_time": 0.11466026306152344,
      "step": 58908
    },
    {
      "epoch": 0.0003595458984375,
      "step": 58908,
      "training_step_time": 0.40690159797668457
    },
    {
      "epoch": 0.000359552001953125,
      "model_forward_time": 0.11544394493103027,
      "step": 58909
    },
    {
      "epoch": 0.000359552001953125,
      "step": 58909,
      "training_step_time": 0.6092450618743896
    },
    {
      "epoch": 0.00035955810546875,
      "grad_norm": 0.08207803219556808,
      "learning_rate": 9.02012208710945e-08,
      "loss": 0.0361,
      "step": 58910
    },
    {
      "epoch": 0.00035955810546875,
      "model_forward_time": 0.11481070518493652,
      "step": 58910
    },
    {
      "epoch": 0.00035955810546875,
      "step": 58910,
      "training_step_time": 0.5036849975585938
    },
    {
      "epoch": 0.000359564208984375,
      "model_forward_time": 0.11483097076416016,
      "step": 58911
    },
    {
      "epoch": 0.000359564208984375,
      "step": 58911,
      "training_step_time": 0.4925351142883301
    },
    {
      "epoch": 0.0003595703125,
      "model_forward_time": 0.11446547508239746,
      "step": 58912
    },
    {
      "epoch": 0.0003595703125,
      "step": 58912,
      "training_step_time": 0.3852086067199707
    },
    {
      "epoch": 0.000359576416015625,
      "model_forward_time": 0.11453127861022949,
      "step": 58913
    },
    {
      "epoch": 0.000359576416015625,
      "step": 58913,
      "training_step_time": 0.3887784481048584
    },
    {
      "epoch": 0.00035958251953125,
      "model_forward_time": 0.11451077461242676,
      "step": 58914
    },
    {
      "epoch": 0.00035958251953125,
      "step": 58914,
      "training_step_time": 0.3902320861816406
    },
    {
      "epoch": 0.000359588623046875,
      "model_forward_time": 0.11514902114868164,
      "step": 58915
    },
    {
      "epoch": 0.000359588623046875,
      "step": 58915,
      "training_step_time": 0.40143251419067383
    },
    {
      "epoch": 0.0003595947265625,
      "model_forward_time": 0.11443090438842773,
      "step": 58916
    },
    {
      "epoch": 0.0003595947265625,
      "step": 58916,
      "training_step_time": 0.39399099349975586
    },
    {
      "epoch": 0.000359600830078125,
      "model_forward_time": 0.11496520042419434,
      "step": 58917
    },
    {
      "epoch": 0.000359600830078125,
      "step": 58917,
      "training_step_time": 0.39342164993286133
    },
    {
      "epoch": 0.00035960693359375,
      "model_forward_time": 0.11611557006835938,
      "step": 58918
    },
    {
      "epoch": 0.00035960693359375,
      "step": 58918,
      "training_step_time": 0.3905515670776367
    },
    {
      "epoch": 0.000359613037109375,
      "model_forward_time": 0.11573529243469238,
      "step": 58919
    },
    {
      "epoch": 0.000359613037109375,
      "step": 58919,
      "training_step_time": 0.39662718772888184
    },
    {
      "epoch": 0.000359619140625,
      "grad_norm": 0.10458040237426758,
      "learning_rate": 8.855423113177664e-08,
      "loss": 0.0418,
      "step": 58920
    },
    {
      "epoch": 0.000359619140625,
      "model_forward_time": 0.11467146873474121,
      "step": 58920
    },
    {
      "epoch": 0.000359619140625,
      "step": 58920,
      "training_step_time": 0.4090259075164795
    },
    {
      "epoch": 0.000359625244140625,
      "model_forward_time": 0.11519336700439453,
      "step": 58921
    },
    {
      "epoch": 0.000359625244140625,
      "step": 58921,
      "training_step_time": 0.6379449367523193
    },
    {
      "epoch": 0.00035963134765625,
      "model_forward_time": 0.11442351341247559,
      "step": 58922
    },
    {
      "epoch": 0.00035963134765625,
      "step": 58922,
      "training_step_time": 0.43094563484191895
    },
    {
      "epoch": 0.000359637451171875,
      "model_forward_time": 0.1146690845489502,
      "step": 58923
    },
    {
      "epoch": 0.000359637451171875,
      "step": 58923,
      "training_step_time": 0.4117457866668701
    },
    {
      "epoch": 0.0003596435546875,
      "model_forward_time": 0.11550235748291016,
      "step": 58924
    },
    {
      "epoch": 0.0003596435546875,
      "step": 58924,
      "training_step_time": 0.4190988540649414
    },
    {
      "epoch": 0.000359649658203125,
      "model_forward_time": 0.11478805541992188,
      "step": 58925
    },
    {
      "epoch": 0.000359649658203125,
      "step": 58925,
      "training_step_time": 0.45737195014953613
    },
    {
      "epoch": 0.00035965576171875,
      "model_forward_time": 0.11460065841674805,
      "step": 58926
    },
    {
      "epoch": 0.00035965576171875,
      "step": 58926,
      "training_step_time": 0.4370429515838623
    },
    {
      "epoch": 0.000359661865234375,
      "model_forward_time": 0.11524581909179688,
      "step": 58927
    },
    {
      "epoch": 0.000359661865234375,
      "step": 58927,
      "training_step_time": 0.5139248371124268
    },
    {
      "epoch": 0.00035966796875,
      "model_forward_time": 0.11489677429199219,
      "step": 58928
    },
    {
      "epoch": 0.00035966796875,
      "step": 58928,
      "training_step_time": 0.3861427307128906
    },
    {
      "epoch": 0.000359674072265625,
      "model_forward_time": 0.1154632568359375,
      "step": 58929
    },
    {
      "epoch": 0.000359674072265625,
      "step": 58929,
      "training_step_time": 0.3894646167755127
    },
    {
      "epoch": 0.00035968017578125,
      "grad_norm": 0.08992578834295273,
      "learning_rate": 8.69224031718463e-08,
      "loss": 0.0331,
      "step": 58930
    },
    {
      "epoch": 0.00035968017578125,
      "model_forward_time": 0.1153724193572998,
      "step": 58930
    },
    {
      "epoch": 0.00035968017578125,
      "step": 58930,
      "training_step_time": 0.39156651496887207
    },
    {
      "epoch": 0.000359686279296875,
      "model_forward_time": 0.1147313117980957,
      "step": 58931
    },
    {
      "epoch": 0.000359686279296875,
      "step": 58931,
      "training_step_time": 0.39600157737731934
    },
    {
      "epoch": 0.0003596923828125,
      "model_forward_time": 0.11496329307556152,
      "step": 58932
    },
    {
      "epoch": 0.0003596923828125,
      "step": 58932,
      "training_step_time": 0.4030263423919678
    },
    {
      "epoch": 0.000359698486328125,
      "model_forward_time": 0.11506104469299316,
      "step": 58933
    },
    {
      "epoch": 0.000359698486328125,
      "step": 58933,
      "training_step_time": 0.6071031093597412
    },
    {
      "epoch": 0.00035970458984375,
      "model_forward_time": 0.11467504501342773,
      "step": 58934
    },
    {
      "epoch": 0.00035970458984375,
      "step": 58934,
      "training_step_time": 0.38985323905944824
    },
    {
      "epoch": 0.000359710693359375,
      "model_forward_time": 0.1149904727935791,
      "step": 58935
    },
    {
      "epoch": 0.000359710693359375,
      "step": 58935,
      "training_step_time": 0.4005556106567383
    },
    {
      "epoch": 0.000359716796875,
      "model_forward_time": 0.11473488807678223,
      "step": 58936
    },
    {
      "epoch": 0.000359716796875,
      "step": 58936,
      "training_step_time": 0.5033164024353027
    },
    {
      "epoch": 0.000359722900390625,
      "model_forward_time": 0.11454153060913086,
      "step": 58937
    },
    {
      "epoch": 0.000359722900390625,
      "step": 58937,
      "training_step_time": 0.41025304794311523
    },
    {
      "epoch": 0.00035972900390625,
      "model_forward_time": 0.11432647705078125,
      "step": 58938
    },
    {
      "epoch": 0.00035972900390625,
      "step": 58938,
      "training_step_time": 0.3708505630493164
    },
    {
      "epoch": 0.000359735107421875,
      "model_forward_time": 0.11496400833129883,
      "step": 58939
    },
    {
      "epoch": 0.000359735107421875,
      "step": 58939,
      "training_step_time": 0.4871203899383545
    },
    {
      "epoch": 0.0003597412109375,
      "grad_norm": 0.06278225034475327,
      "learning_rate": 8.530573748701253e-08,
      "loss": 0.0379,
      "step": 58940
    },
    {
      "epoch": 0.0003597412109375,
      "model_forward_time": 0.1140584945678711,
      "step": 58940
    },
    {
      "epoch": 0.0003597412109375,
      "step": 58940,
      "training_step_time": 0.5272581577301025
    },
    {
      "epoch": 0.000359747314453125,
      "model_forward_time": 0.11451554298400879,
      "step": 58941
    },
    {
      "epoch": 0.000359747314453125,
      "step": 58941,
      "training_step_time": 0.404163122177124
    },
    {
      "epoch": 0.00035975341796875,
      "model_forward_time": 0.11619782447814941,
      "step": 58942
    },
    {
      "epoch": 0.00035975341796875,
      "step": 58942,
      "training_step_time": 0.3982706069946289
    },
    {
      "epoch": 0.000359759521484375,
      "model_forward_time": 0.11467337608337402,
      "step": 58943
    },
    {
      "epoch": 0.000359759521484375,
      "step": 58943,
      "training_step_time": 0.3986232280731201
    },
    {
      "epoch": 0.000359765625,
      "model_forward_time": 0.11541271209716797,
      "step": 58944
    },
    {
      "epoch": 0.000359765625,
      "step": 58944,
      "training_step_time": 0.3983142375946045
    },
    {
      "epoch": 0.000359771728515625,
      "model_forward_time": 0.11410856246948242,
      "step": 58945
    },
    {
      "epoch": 0.000359771728515625,
      "step": 58945,
      "training_step_time": 0.3959081172943115
    },
    {
      "epoch": 0.00035977783203125,
      "model_forward_time": 0.11533021926879883,
      "step": 58946
    },
    {
      "epoch": 0.00035977783203125,
      "step": 58946,
      "training_step_time": 0.3947126865386963
    },
    {
      "epoch": 0.000359783935546875,
      "model_forward_time": 0.1154794692993164,
      "step": 58947
    },
    {
      "epoch": 0.000359783935546875,
      "step": 58947,
      "training_step_time": 0.4011349678039551
    },
    {
      "epoch": 0.0003597900390625,
      "model_forward_time": 0.11546969413757324,
      "step": 58948
    },
    {
      "epoch": 0.0003597900390625,
      "step": 58948,
      "training_step_time": 0.38782691955566406
    },
    {
      "epoch": 0.000359796142578125,
      "model_forward_time": 0.11477994918823242,
      "step": 58949
    },
    {
      "epoch": 0.000359796142578125,
      "step": 58949,
      "training_step_time": 0.4038069248199463
    },
    {
      "epoch": 0.00035980224609375,
      "grad_norm": 0.08966150879859924,
      "learning_rate": 8.37042345683714e-08,
      "loss": 0.0386,
      "step": 58950
    },
    {
      "epoch": 0.00035980224609375,
      "model_forward_time": 0.11517620086669922,
      "step": 58950
    },
    {
      "epoch": 0.00035980224609375,
      "step": 58950,
      "training_step_time": 0.47849082946777344
    },
    {
      "epoch": 0.000359808349609375,
      "model_forward_time": 0.11491680145263672,
      "step": 58951
    },
    {
      "epoch": 0.000359808349609375,
      "step": 58951,
      "training_step_time": 0.4627721309661865
    },
    {
      "epoch": 0.000359814453125,
      "model_forward_time": 0.11525511741638184,
      "step": 58952
    },
    {
      "epoch": 0.000359814453125,
      "step": 58952,
      "training_step_time": 0.453369140625
    },
    {
      "epoch": 0.000359820556640625,
      "model_forward_time": 0.11512637138366699,
      "step": 58953
    },
    {
      "epoch": 0.000359820556640625,
      "step": 58953,
      "training_step_time": 0.42905759811401367
    },
    {
      "epoch": 0.00035982666015625,
      "model_forward_time": 0.11464452743530273,
      "step": 58954
    },
    {
      "epoch": 0.00035982666015625,
      "step": 58954,
      "training_step_time": 0.43410825729370117
    },
    {
      "epoch": 0.000359832763671875,
      "model_forward_time": 0.11475205421447754,
      "step": 58955
    },
    {
      "epoch": 0.000359832763671875,
      "step": 58955,
      "training_step_time": 0.4997594356536865
    },
    {
      "epoch": 0.0003598388671875,
      "model_forward_time": 0.11390066146850586,
      "step": 58956
    },
    {
      "epoch": 0.0003598388671875,
      "step": 58956,
      "training_step_time": 0.39741969108581543
    },
    {
      "epoch": 0.000359844970703125,
      "model_forward_time": 0.11384010314941406,
      "step": 58957
    },
    {
      "epoch": 0.000359844970703125,
      "step": 58957,
      "training_step_time": 0.3950355052947998
    },
    {
      "epoch": 0.00035985107421875,
      "model_forward_time": 0.11441826820373535,
      "step": 58958
    },
    {
      "epoch": 0.00035985107421875,
      "step": 58958,
      "training_step_time": 0.39719343185424805
    },
    {
      "epoch": 0.000359857177734375,
      "model_forward_time": 0.11493492126464844,
      "step": 58959
    },
    {
      "epoch": 0.000359857177734375,
      "step": 58959,
      "training_step_time": 0.3908112049102783
    },
    {
      "epoch": 0.00035986328125,
      "grad_norm": 0.08922586590051651,
      "learning_rate": 8.211789490242261e-08,
      "loss": 0.0351,
      "step": 58960
    },
    {
      "epoch": 0.00035986328125,
      "model_forward_time": 0.11506295204162598,
      "step": 58960
    },
    {
      "epoch": 0.00035986328125,
      "step": 58960,
      "training_step_time": 0.3901824951171875
    },
    {
      "epoch": 0.000359869384765625,
      "model_forward_time": 0.11481165885925293,
      "step": 58961
    },
    {
      "epoch": 0.000359869384765625,
      "step": 58961,
      "training_step_time": 0.41210341453552246
    },
    {
      "epoch": 0.00035987548828125,
      "model_forward_time": 0.11494874954223633,
      "step": 58962
    },
    {
      "epoch": 0.00035987548828125,
      "step": 58962,
      "training_step_time": 0.3977797031402588
    },
    {
      "epoch": 0.000359881591796875,
      "model_forward_time": 0.11530184745788574,
      "step": 58963
    },
    {
      "epoch": 0.000359881591796875,
      "step": 58963,
      "training_step_time": 0.40793728828430176
    },
    {
      "epoch": 0.0003598876953125,
      "model_forward_time": 0.1152036190032959,
      "step": 58964
    },
    {
      "epoch": 0.0003598876953125,
      "step": 58964,
      "training_step_time": 0.41175413131713867
    },
    {
      "epoch": 0.000359893798828125,
      "model_forward_time": 0.11509394645690918,
      "step": 58965
    },
    {
      "epoch": 0.000359893798828125,
      "step": 58965,
      "training_step_time": 0.39939045906066895
    },
    {
      "epoch": 0.00035989990234375,
      "model_forward_time": 0.11518383026123047,
      "step": 58966
    },
    {
      "epoch": 0.00035989990234375,
      "step": 58966,
      "training_step_time": 0.45305728912353516
    },
    {
      "epoch": 0.000359906005859375,
      "model_forward_time": 0.11552572250366211,
      "step": 58967
    },
    {
      "epoch": 0.000359906005859375,
      "step": 58967,
      "training_step_time": 0.4676680564880371
    },
    {
      "epoch": 0.000359912109375,
      "model_forward_time": 0.11592268943786621,
      "step": 58968
    },
    {
      "epoch": 0.000359912109375,
      "step": 58968,
      "training_step_time": 0.4562199115753174
    },
    {
      "epoch": 0.000359918212890625,
      "model_forward_time": 0.11525249481201172,
      "step": 58969
    },
    {
      "epoch": 0.000359918212890625,
      "step": 58969,
      "training_step_time": 0.4216041564941406
    },
    {
      "epoch": 0.00035992431640625,
      "grad_norm": 0.08989136666059494,
      "learning_rate": 8.054671897104738e-08,
      "loss": 0.0354,
      "step": 58970
    },
    {
      "epoch": 0.00035992431640625,
      "model_forward_time": 0.11498808860778809,
      "step": 58970
    },
    {
      "epoch": 0.00035992431640625,
      "step": 58970,
      "training_step_time": 0.44767189025878906
    },
    {
      "epoch": 0.000359930419921875,
      "model_forward_time": 0.11410760879516602,
      "step": 58971
    },
    {
      "epoch": 0.000359930419921875,
      "step": 58971,
      "training_step_time": 0.3912050724029541
    },
    {
      "epoch": 0.0003599365234375,
      "model_forward_time": 0.11500978469848633,
      "step": 58972
    },
    {
      "epoch": 0.0003599365234375,
      "step": 58972,
      "training_step_time": 0.3805427551269531
    },
    {
      "epoch": 0.000359942626953125,
      "model_forward_time": 0.11564040184020996,
      "step": 58973
    },
    {
      "epoch": 0.000359942626953125,
      "step": 58973,
      "training_step_time": 0.3931241035461426
    },
    {
      "epoch": 0.00035994873046875,
      "model_forward_time": 0.1149752140045166,
      "step": 58974
    },
    {
      "epoch": 0.00035994873046875,
      "step": 58974,
      "training_step_time": 0.3962576389312744
    },
    {
      "epoch": 0.000359954833984375,
      "model_forward_time": 0.11551904678344727,
      "step": 58975
    },
    {
      "epoch": 0.000359954833984375,
      "step": 58975,
      "training_step_time": 0.3997032642364502
    },
    {
      "epoch": 0.0003599609375,
      "model_forward_time": 0.11533927917480469,
      "step": 58976
    },
    {
      "epoch": 0.0003599609375,
      "step": 58976,
      "training_step_time": 0.3930337429046631
    },
    {
      "epoch": 0.000359967041015625,
      "model_forward_time": 0.11575031280517578,
      "step": 58977
    },
    {
      "epoch": 0.000359967041015625,
      "step": 58977,
      "training_step_time": 0.4163784980773926
    },
    {
      "epoch": 0.00035997314453125,
      "model_forward_time": 0.11504530906677246,
      "step": 58978
    },
    {
      "epoch": 0.00035997314453125,
      "step": 58978,
      "training_step_time": 0.4319319725036621
    },
    {
      "epoch": 0.000359979248046875,
      "model_forward_time": 0.1149756908416748,
      "step": 58979
    },
    {
      "epoch": 0.000359979248046875,
      "step": 58979,
      "training_step_time": 0.43708109855651855
    },
    {
      "epoch": 0.0003599853515625,
      "grad_norm": 0.11839447915554047,
      "learning_rate": 7.899070725153613e-08,
      "loss": 0.0377,
      "step": 58980
    },
    {
      "epoch": 0.0003599853515625,
      "model_forward_time": 0.11516690254211426,
      "step": 58980
    },
    {
      "epoch": 0.0003599853515625,
      "step": 58980,
      "training_step_time": 0.4000108242034912
    },
    {
      "epoch": 0.000359991455078125,
      "model_forward_time": 0.11559128761291504,
      "step": 58981
    },
    {
      "epoch": 0.000359991455078125,
      "step": 58981,
      "training_step_time": 0.4227423667907715
    },
    {
      "epoch": 0.00035999755859375,
      "model_forward_time": 0.11462593078613281,
      "step": 58982
    },
    {
      "epoch": 0.00035999755859375,
      "step": 58982,
      "training_step_time": 0.5062894821166992
    },
    {
      "epoch": 0.000360003662109375,
      "model_forward_time": 0.11547970771789551,
      "step": 58983
    },
    {
      "epoch": 0.000360003662109375,
      "step": 58983,
      "training_step_time": 0.49686551094055176
    },
    {
      "epoch": 0.000360009765625,
      "model_forward_time": 0.1150825023651123,
      "step": 58984
    },
    {
      "epoch": 0.000360009765625,
      "step": 58984,
      "training_step_time": 0.49413251876831055
    },
    {
      "epoch": 0.000360015869140625,
      "model_forward_time": 0.11445140838623047,
      "step": 58985
    },
    {
      "epoch": 0.000360015869140625,
      "step": 58985,
      "training_step_time": 0.3868088722229004
    },
    {
      "epoch": 0.00036002197265625,
      "model_forward_time": 0.1146094799041748,
      "step": 58986
    },
    {
      "epoch": 0.00036002197265625,
      "step": 58986,
      "training_step_time": 0.3882148265838623
    },
    {
      "epoch": 0.000360028076171875,
      "model_forward_time": 0.11453413963317871,
      "step": 58987
    },
    {
      "epoch": 0.000360028076171875,
      "step": 58987,
      "training_step_time": 0.39324021339416504
    },
    {
      "epoch": 0.0003600341796875,
      "model_forward_time": 0.1148061752319336,
      "step": 58988
    },
    {
      "epoch": 0.0003600341796875,
      "step": 58988,
      "training_step_time": 0.3854990005493164
    },
    {
      "epoch": 0.000360040283203125,
      "model_forward_time": 0.11505341529846191,
      "step": 58989
    },
    {
      "epoch": 0.000360040283203125,
      "step": 58989,
      "training_step_time": 0.39539170265197754
    },
    {
      "epoch": 0.00036004638671875,
      "grad_norm": 0.07848986983299255,
      "learning_rate": 7.744986021656076e-08,
      "loss": 0.0371,
      "step": 58990
    },
    {
      "epoch": 0.00036004638671875,
      "model_forward_time": 0.11531352996826172,
      "step": 58990
    },
    {
      "epoch": 0.00036004638671875,
      "step": 58990,
      "training_step_time": 0.38185763359069824
    },
    {
      "epoch": 0.000360052490234375,
      "model_forward_time": 0.11478328704833984,
      "step": 58991
    },
    {
      "epoch": 0.000360052490234375,
      "step": 58991,
      "training_step_time": 0.4719991683959961
    },
    {
      "epoch": 0.00036005859375,
      "model_forward_time": 0.11491155624389648,
      "step": 58992
    },
    {
      "epoch": 0.00036005859375,
      "step": 58992,
      "training_step_time": 0.4030611515045166
    },
    {
      "epoch": 0.000360064697265625,
      "model_forward_time": 0.11514687538146973,
      "step": 58993
    },
    {
      "epoch": 0.000360064697265625,
      "step": 58993,
      "training_step_time": 0.39813709259033203
    },
    {
      "epoch": 0.00036007080078125,
      "model_forward_time": 0.11504387855529785,
      "step": 58994
    },
    {
      "epoch": 0.00036007080078125,
      "step": 58994,
      "training_step_time": 0.41119813919067383
    },
    {
      "epoch": 0.000360076904296875,
      "model_forward_time": 0.11482095718383789,
      "step": 58995
    },
    {
      "epoch": 0.000360076904296875,
      "step": 58995,
      "training_step_time": 0.5302104949951172
    },
    {
      "epoch": 0.0003600830078125,
      "model_forward_time": 0.11526370048522949,
      "step": 58996
    },
    {
      "epoch": 0.0003600830078125,
      "step": 58996,
      "training_step_time": 0.4125220775604248
    },
    {
      "epoch": 0.000360089111328125,
      "model_forward_time": 0.11568045616149902,
      "step": 58997
    },
    {
      "epoch": 0.000360089111328125,
      "step": 58997,
      "training_step_time": 0.40445399284362793
    },
    {
      "epoch": 0.00036009521484375,
      "model_forward_time": 0.11605215072631836,
      "step": 58998
    },
    {
      "epoch": 0.00036009521484375,
      "step": 58998,
      "training_step_time": 0.44370436668395996
    },
    {
      "epoch": 0.000360101318359375,
      "model_forward_time": 0.11517453193664551,
      "step": 58999
    },
    {
      "epoch": 0.000360101318359375,
      "step": 58999,
      "training_step_time": 0.47286105155944824
    },
    {
      "epoch": 0.000360107421875,
      "grad_norm": 0.0933973416686058,
      "learning_rate": 7.59241783341913e-08,
      "loss": 0.0338,
      "step": 59000
    },
    {
      "epoch": 0.000360107421875,
      "model_forward_time": 0.1124715805053711,
      "step": 59000
    },
    {
      "epoch": 0.000360107421875,
      "step": 59000,
      "training_step_time": 0.3506457805633545
    },
    {
      "epoch": 0.000360113525390625,
      "model_forward_time": 0.11176443099975586,
      "step": 59001
    },
    {
      "epoch": 0.000360113525390625,
      "step": 59001,
      "training_step_time": 0.3725249767303467
    },
    {
      "epoch": 0.00036011962890625,
      "model_forward_time": 0.11331892013549805,
      "step": 59002
    },
    {
      "epoch": 0.00036011962890625,
      "step": 59002,
      "training_step_time": 0.37418603897094727
    },
    {
      "epoch": 0.000360125732421875,
      "model_forward_time": 0.11291170120239258,
      "step": 59003
    },
    {
      "epoch": 0.000360125732421875,
      "step": 59003,
      "training_step_time": 0.37752723693847656
    },
    {
      "epoch": 0.0003601318359375,
      "model_forward_time": 0.1137847900390625,
      "step": 59004
    },
    {
      "epoch": 0.0003601318359375,
      "step": 59004,
      "training_step_time": 0.40666651725769043
    },
    {
      "epoch": 0.000360137939453125,
      "model_forward_time": 0.11451244354248047,
      "step": 59005
    },
    {
      "epoch": 0.000360137939453125,
      "step": 59005,
      "training_step_time": 0.3988621234893799
    },
    {
      "epoch": 0.00036014404296875,
      "model_forward_time": 0.11463189125061035,
      "step": 59006
    },
    {
      "epoch": 0.00036014404296875,
      "step": 59006,
      "training_step_time": 0.3887500762939453
    },
    {
      "epoch": 0.000360150146484375,
      "model_forward_time": 0.11484456062316895,
      "step": 59007
    },
    {
      "epoch": 0.000360150146484375,
      "step": 59007,
      "training_step_time": 0.38249802589416504
    },
    {
      "epoch": 0.00036015625,
      "model_forward_time": 0.1149909496307373,
      "step": 59008
    },
    {
      "epoch": 0.00036015625,
      "step": 59008,
      "training_step_time": 0.3923962116241455
    },
    {
      "epoch": 0.000360162353515625,
      "model_forward_time": 0.11546802520751953,
      "step": 59009
    },
    {
      "epoch": 0.000360162353515625,
      "step": 59009,
      "training_step_time": 0.38473033905029297
    },
    {
      "epoch": 0.00036016845703125,
      "grad_norm": 0.07810863852500916,
      "learning_rate": 7.44136620678848e-08,
      "loss": 0.0339,
      "step": 59010
    },
    {
      "epoch": 0.00036016845703125,
      "model_forward_time": 0.11550235748291016,
      "step": 59010
    },
    {
      "epoch": 0.00036016845703125,
      "step": 59010,
      "training_step_time": 0.4030036926269531
    },
    {
      "epoch": 0.000360174560546875,
      "model_forward_time": 0.1147150993347168,
      "step": 59011
    },
    {
      "epoch": 0.000360174560546875,
      "step": 59011,
      "training_step_time": 0.4101088047027588
    },
    {
      "epoch": 0.0003601806640625,
      "model_forward_time": 0.11513519287109375,
      "step": 59012
    },
    {
      "epoch": 0.0003601806640625,
      "step": 59012,
      "training_step_time": 0.47708940505981445
    },
    {
      "epoch": 0.000360186767578125,
      "model_forward_time": 0.1153714656829834,
      "step": 59013
    },
    {
      "epoch": 0.000360186767578125,
      "step": 59013,
      "training_step_time": 0.49891066551208496
    },
    {
      "epoch": 0.00036019287109375,
      "model_forward_time": 0.11601638793945312,
      "step": 59014
    },
    {
      "epoch": 0.00036019287109375,
      "step": 59014,
      "training_step_time": 0.44925761222839355
    },
    {
      "epoch": 0.000360198974609375,
      "model_forward_time": 0.11446166038513184,
      "step": 59015
    },
    {
      "epoch": 0.000360198974609375,
      "step": 59015,
      "training_step_time": 0.5093598365783691
    },
    {
      "epoch": 0.000360205078125,
      "model_forward_time": 0.11490654945373535,
      "step": 59016
    },
    {
      "epoch": 0.000360205078125,
      "step": 59016,
      "training_step_time": 0.39790964126586914
    },
    {
      "epoch": 0.000360211181640625,
      "model_forward_time": 0.11487817764282227,
      "step": 59017
    },
    {
      "epoch": 0.000360211181640625,
      "step": 59017,
      "training_step_time": 0.41523265838623047
    },
    {
      "epoch": 0.00036021728515625,
      "model_forward_time": 0.11461114883422852,
      "step": 59018
    },
    {
      "epoch": 0.00036021728515625,
      "step": 59018,
      "training_step_time": 0.408550500869751
    },
    {
      "epoch": 0.000360223388671875,
      "model_forward_time": 0.11455535888671875,
      "step": 59019
    },
    {
      "epoch": 0.000360223388671875,
      "step": 59019,
      "training_step_time": 0.39615797996520996
    },
    {
      "epoch": 0.0003602294921875,
      "grad_norm": 0.07057741284370422,
      "learning_rate": 7.291831187649645e-08,
      "loss": 0.0421,
      "step": 59020
    },
    {
      "epoch": 0.0003602294921875,
      "model_forward_time": 0.11507582664489746,
      "step": 59020
    },
    {
      "epoch": 0.0003602294921875,
      "step": 59020,
      "training_step_time": 0.4183516502380371
    },
    {
      "epoch": 0.000360235595703125,
      "model_forward_time": 0.11530041694641113,
      "step": 59021
    },
    {
      "epoch": 0.000360235595703125,
      "step": 59021,
      "training_step_time": 0.3965294361114502
    },
    {
      "epoch": 0.00036024169921875,
      "model_forward_time": 0.11489343643188477,
      "step": 59022
    },
    {
      "epoch": 0.00036024169921875,
      "step": 59022,
      "training_step_time": 0.4550013542175293
    },
    {
      "epoch": 0.000360247802734375,
      "model_forward_time": 0.11436867713928223,
      "step": 59023
    },
    {
      "epoch": 0.000360247802734375,
      "step": 59023,
      "training_step_time": 0.40274691581726074
    },
    {
      "epoch": 0.00036025390625,
      "model_forward_time": 0.1158301830291748,
      "step": 59024
    },
    {
      "epoch": 0.00036025390625,
      "step": 59024,
      "training_step_time": 0.44241857528686523
    },
    {
      "epoch": 0.000360260009765625,
      "model_forward_time": 0.11569976806640625,
      "step": 59025
    },
    {
      "epoch": 0.000360260009765625,
      "step": 59025,
      "training_step_time": 0.3998534679412842
    },
    {
      "epoch": 0.00036026611328125,
      "model_forward_time": 0.11479425430297852,
      "step": 59026
    },
    {
      "epoch": 0.00036026611328125,
      "step": 59026,
      "training_step_time": 0.45775938034057617
    },
    {
      "epoch": 0.000360272216796875,
      "model_forward_time": 0.11478900909423828,
      "step": 59027
    },
    {
      "epoch": 0.000360272216796875,
      "step": 59027,
      "training_step_time": 0.5124735832214355
    },
    {
      "epoch": 0.0003602783203125,
      "model_forward_time": 0.11505579948425293,
      "step": 59028
    },
    {
      "epoch": 0.0003602783203125,
      "step": 59028,
      "training_step_time": 0.4290647506713867
    },
    {
      "epoch": 0.000360284423828125,
      "model_forward_time": 0.11539673805236816,
      "step": 59029
    },
    {
      "epoch": 0.000360284423828125,
      "step": 59029,
      "training_step_time": 0.40275073051452637
    },
    {
      "epoch": 0.00036029052734375,
      "grad_norm": 0.07181375473737717,
      "learning_rate": 7.143812821427953e-08,
      "loss": 0.0405,
      "step": 59030
    },
    {
      "epoch": 0.00036029052734375,
      "model_forward_time": 0.11479306221008301,
      "step": 59030
    },
    {
      "epoch": 0.00036029052734375,
      "step": 59030,
      "training_step_time": 0.39499664306640625
    },
    {
      "epoch": 0.000360296630859375,
      "model_forward_time": 0.11464953422546387,
      "step": 59031
    },
    {
      "epoch": 0.000360296630859375,
      "step": 59031,
      "training_step_time": 0.39518260955810547
    },
    {
      "epoch": 0.000360302734375,
      "model_forward_time": 0.11522936820983887,
      "step": 59032
    },
    {
      "epoch": 0.000360302734375,
      "step": 59032,
      "training_step_time": 0.4213378429412842
    },
    {
      "epoch": 0.000360308837890625,
      "model_forward_time": 0.1148383617401123,
      "step": 59033
    },
    {
      "epoch": 0.000360308837890625,
      "step": 59033,
      "training_step_time": 0.3968665599822998
    },
    {
      "epoch": 0.00036031494140625,
      "model_forward_time": 0.11501932144165039,
      "step": 59034
    },
    {
      "epoch": 0.00036031494140625,
      "step": 59034,
      "training_step_time": 0.41910719871520996
    },
    {
      "epoch": 0.000360321044921875,
      "model_forward_time": 0.11487364768981934,
      "step": 59035
    },
    {
      "epoch": 0.000360321044921875,
      "step": 59035,
      "training_step_time": 0.39379239082336426
    },
    {
      "epoch": 0.0003603271484375,
      "model_forward_time": 0.11461973190307617,
      "step": 59036
    },
    {
      "epoch": 0.0003603271484375,
      "step": 59036,
      "training_step_time": 0.3968188762664795
    },
    {
      "epoch": 0.000360333251953125,
      "model_forward_time": 0.1153559684753418,
      "step": 59037
    },
    {
      "epoch": 0.000360333251953125,
      "step": 59037,
      "training_step_time": 0.4962301254272461
    },
    {
      "epoch": 0.00036033935546875,
      "model_forward_time": 0.11573600769042969,
      "step": 59038
    },
    {
      "epoch": 0.00036033935546875,
      "step": 59038,
      "training_step_time": 0.3902444839477539
    },
    {
      "epoch": 0.000360345458984375,
      "model_forward_time": 0.11446404457092285,
      "step": 59039
    },
    {
      "epoch": 0.000360345458984375,
      "step": 59039,
      "training_step_time": 0.38485050201416016
    },
    {
      "epoch": 0.0003603515625,
      "grad_norm": 0.10388034582138062,
      "learning_rate": 6.997311153086883e-08,
      "loss": 0.0351,
      "step": 59040
    },
    {
      "epoch": 0.0003603515625,
      "model_forward_time": 0.11474204063415527,
      "step": 59040
    },
    {
      "epoch": 0.0003603515625,
      "step": 59040,
      "training_step_time": 0.40132927894592285
    },
    {
      "epoch": 0.000360357666015625,
      "model_forward_time": 0.11492514610290527,
      "step": 59041
    },
    {
      "epoch": 0.000360357666015625,
      "step": 59041,
      "training_step_time": 0.40232181549072266
    },
    {
      "epoch": 0.00036036376953125,
      "model_forward_time": 0.11513972282409668,
      "step": 59042
    },
    {
      "epoch": 0.00036036376953125,
      "step": 59042,
      "training_step_time": 0.49842238426208496
    },
    {
      "epoch": 0.000360369873046875,
      "model_forward_time": 0.11499857902526855,
      "step": 59043
    },
    {
      "epoch": 0.000360369873046875,
      "step": 59043,
      "training_step_time": 0.42496323585510254
    },
    {
      "epoch": 0.0003603759765625,
      "model_forward_time": 0.11524653434753418,
      "step": 59044
    },
    {
      "epoch": 0.0003603759765625,
      "step": 59044,
      "training_step_time": 0.4051694869995117
    },
    {
      "epoch": 0.000360382080078125,
      "model_forward_time": 0.11502361297607422,
      "step": 59045
    },
    {
      "epoch": 0.000360382080078125,
      "step": 59045,
      "training_step_time": 0.4106895923614502
    },
    {
      "epoch": 0.00036038818359375,
      "model_forward_time": 0.11473441123962402,
      "step": 59046
    },
    {
      "epoch": 0.00036038818359375,
      "step": 59046,
      "training_step_time": 0.3923683166503906
    },
    {
      "epoch": 0.000360394287109375,
      "model_forward_time": 0.1148233413696289,
      "step": 59047
    },
    {
      "epoch": 0.000360394287109375,
      "step": 59047,
      "training_step_time": 0.3941614627838135
    },
    {
      "epoch": 0.000360400390625,
      "model_forward_time": 0.11552238464355469,
      "step": 59048
    },
    {
      "epoch": 0.000360400390625,
      "step": 59048,
      "training_step_time": 0.3917045593261719
    },
    {
      "epoch": 0.000360406494140625,
      "model_forward_time": 0.11568427085876465,
      "step": 59049
    },
    {
      "epoch": 0.000360406494140625,
      "step": 59049,
      "training_step_time": 0.3838624954223633
    },
    {
      "epoch": 0.00036041259765625,
      "grad_norm": 0.07893284410238266,
      "learning_rate": 6.852326227130834e-08,
      "loss": 0.0373,
      "step": 59050
    },
    {
      "epoch": 0.00036041259765625,
      "model_forward_time": 0.11504197120666504,
      "step": 59050
    },
    {
      "epoch": 0.00036041259765625,
      "step": 59050,
      "training_step_time": 0.3886117935180664
    },
    {
      "epoch": 0.000360418701171875,
      "model_forward_time": 0.11586499214172363,
      "step": 59051
    },
    {
      "epoch": 0.000360418701171875,
      "step": 59051,
      "training_step_time": 0.5073196887969971
    },
    {
      "epoch": 0.0003604248046875,
      "model_forward_time": 0.11612582206726074,
      "step": 59052
    },
    {
      "epoch": 0.0003604248046875,
      "step": 59052,
      "training_step_time": 0.5716278553009033
    },
    {
      "epoch": 0.000360430908203125,
      "model_forward_time": 0.12222838401794434,
      "step": 59053
    },
    {
      "epoch": 0.000360430908203125,
      "step": 59053,
      "training_step_time": 0.6711318492889404
    },
    {
      "epoch": 0.00036043701171875,
      "model_forward_time": 0.11900472640991211,
      "step": 59054
    },
    {
      "epoch": 0.00036043701171875,
      "step": 59054,
      "training_step_time": 0.7294826507568359
    },
    {
      "epoch": 0.000360443115234375,
      "model_forward_time": 0.11879611015319824,
      "step": 59055
    },
    {
      "epoch": 0.000360443115234375,
      "step": 59055,
      "training_step_time": 0.6509988307952881
    },
    {
      "epoch": 0.00036044921875,
      "model_forward_time": 0.1193232536315918,
      "step": 59056
    },
    {
      "epoch": 0.00036044921875,
      "step": 59056,
      "training_step_time": 0.6928207874298096
    },
    {
      "epoch": 0.000360455322265625,
      "model_forward_time": 0.12655258178710938,
      "step": 59057
    },
    {
      "epoch": 0.000360455322265625,
      "step": 59057,
      "training_step_time": 0.8819930553436279
    },
    {
      "epoch": 0.00036046142578125,
      "model_forward_time": 0.11992192268371582,
      "step": 59058
    },
    {
      "epoch": 0.00036046142578125,
      "step": 59058,
      "training_step_time": 0.5830926895141602
    },
    {
      "epoch": 0.000360467529296875,
      "model_forward_time": 0.11605691909790039,
      "step": 59059
    },
    {
      "epoch": 0.000360467529296875,
      "step": 59059,
      "training_step_time": 0.6474814414978027
    },
    {
      "epoch": 0.0003604736328125,
      "grad_norm": 0.07566532492637634,
      "learning_rate": 6.708858087601244e-08,
      "loss": 0.0348,
      "step": 59060
    },
    {
      "epoch": 0.0003604736328125,
      "model_forward_time": 0.12210226058959961,
      "step": 59060
    },
    {
      "epoch": 0.0003604736328125,
      "step": 59060,
      "training_step_time": 0.6831662654876709
    },
    {
      "epoch": 0.000360479736328125,
      "model_forward_time": 0.12169814109802246,
      "step": 59061
    },
    {
      "epoch": 0.000360479736328125,
      "step": 59061,
      "training_step_time": 0.7017486095428467
    },
    {
      "epoch": 0.00036048583984375,
      "model_forward_time": 0.12740468978881836,
      "step": 59062
    },
    {
      "epoch": 0.00036048583984375,
      "step": 59062,
      "training_step_time": 0.8272721767425537
    },
    {
      "epoch": 0.000360491943359375,
      "model_forward_time": 0.12224483489990234,
      "step": 59063
    },
    {
      "epoch": 0.000360491943359375,
      "step": 59063,
      "training_step_time": 0.6528170108795166
    },
    {
      "epoch": 0.000360498046875,
      "model_forward_time": 0.11903572082519531,
      "step": 59064
    },
    {
      "epoch": 0.000360498046875,
      "step": 59064,
      "training_step_time": 0.6684727668762207
    },
    {
      "epoch": 0.000360504150390625,
      "model_forward_time": 0.1162877082824707,
      "step": 59065
    },
    {
      "epoch": 0.000360504150390625,
      "step": 59065,
      "training_step_time": 0.7377979755401611
    },
    {
      "epoch": 0.00036051025390625,
      "model_forward_time": 0.11846685409545898,
      "step": 59066
    },
    {
      "epoch": 0.00036051025390625,
      "step": 59066,
      "training_step_time": 0.6628482341766357
    },
    {
      "epoch": 0.000360516357421875,
      "model_forward_time": 0.11956310272216797,
      "step": 59067
    },
    {
      "epoch": 0.000360516357421875,
      "step": 59067,
      "training_step_time": 0.6728157997131348
    },
    {
      "epoch": 0.0003605224609375,
      "model_forward_time": 0.11865592002868652,
      "step": 59068
    },
    {
      "epoch": 0.0003605224609375,
      "step": 59068,
      "training_step_time": 0.6722133159637451
    },
    {
      "epoch": 0.000360528564453125,
      "model_forward_time": 0.11821699142456055,
      "step": 59069
    },
    {
      "epoch": 0.000360528564453125,
      "step": 59069,
      "training_step_time": 0.666553258895874
    },
    {
      "epoch": 0.00036053466796875,
      "grad_norm": 0.12603600323200226,
      "learning_rate": 6.566906778079917e-08,
      "loss": 0.0375,
      "step": 59070
    },
    {
      "epoch": 0.00036053466796875,
      "model_forward_time": 0.11804747581481934,
      "step": 59070
    },
    {
      "epoch": 0.00036053466796875,
      "step": 59070,
      "training_step_time": 0.7310965061187744
    },
    {
      "epoch": 0.000360540771484375,
      "model_forward_time": 0.12339091300964355,
      "step": 59071
    },
    {
      "epoch": 0.000360540771484375,
      "step": 59071,
      "training_step_time": 0.6756737232208252
    },
    {
      "epoch": 0.000360546875,
      "model_forward_time": 0.12410593032836914,
      "step": 59072
    },
    {
      "epoch": 0.000360546875,
      "step": 59072,
      "training_step_time": 0.6861867904663086
    },
    {
      "epoch": 0.000360552978515625,
      "model_forward_time": 0.11797618865966797,
      "step": 59073
    },
    {
      "epoch": 0.000360552978515625,
      "step": 59073,
      "training_step_time": 0.7510547637939453
    },
    {
      "epoch": 0.00036055908203125,
      "model_forward_time": 0.11786127090454102,
      "step": 59074
    },
    {
      "epoch": 0.00036055908203125,
      "step": 59074,
      "training_step_time": 0.7033250331878662
    },
    {
      "epoch": 0.000360565185546875,
      "model_forward_time": 0.12164974212646484,
      "step": 59075
    },
    {
      "epoch": 0.000360565185546875,
      "step": 59075,
      "training_step_time": 0.69512939453125
    },
    {
      "epoch": 0.0003605712890625,
      "model_forward_time": 0.11640310287475586,
      "step": 59076
    },
    {
      "epoch": 0.0003605712890625,
      "step": 59076,
      "training_step_time": 0.6602396965026855
    },
    {
      "epoch": 0.000360577392578125,
      "model_forward_time": 0.12189364433288574,
      "step": 59077
    },
    {
      "epoch": 0.000360577392578125,
      "step": 59077,
      "training_step_time": 0.6568245887756348
    },
    {
      "epoch": 0.00036058349609375,
      "model_forward_time": 0.12515974044799805,
      "step": 59078
    },
    {
      "epoch": 0.00036058349609375,
      "step": 59078,
      "training_step_time": 0.6854572296142578
    },
    {
      "epoch": 0.000360589599609375,
      "model_forward_time": 0.12123727798461914,
      "step": 59079
    },
    {
      "epoch": 0.000360589599609375,
      "step": 59079,
      "training_step_time": 0.7367198467254639
    },
    {
      "epoch": 0.000360595703125,
      "grad_norm": 0.07878495007753372,
      "learning_rate": 6.426472341689027e-08,
      "loss": 0.0371,
      "step": 59080
    },
    {
      "epoch": 0.000360595703125,
      "model_forward_time": 0.11944341659545898,
      "step": 59080
    },
    {
      "epoch": 0.000360595703125,
      "step": 59080,
      "training_step_time": 0.6368234157562256
    },
    {
      "epoch": 0.000360601806640625,
      "model_forward_time": 0.11635470390319824,
      "step": 59081
    },
    {
      "epoch": 0.000360601806640625,
      "step": 59081,
      "training_step_time": 0.7081291675567627
    },
    {
      "epoch": 0.00036060791015625,
      "model_forward_time": 0.1295156478881836,
      "step": 59082
    },
    {
      "epoch": 0.00036060791015625,
      "step": 59082,
      "training_step_time": 0.5850458145141602
    },
    {
      "epoch": 0.000360614013671875,
      "model_forward_time": 0.1191401481628418,
      "step": 59083
    },
    {
      "epoch": 0.000360614013671875,
      "step": 59083,
      "training_step_time": 0.7156567573547363
    },
    {
      "epoch": 0.0003606201171875,
      "model_forward_time": 0.12144756317138672,
      "step": 59084
    },
    {
      "epoch": 0.0003606201171875,
      "step": 59084,
      "training_step_time": 0.7294108867645264
    },
    {
      "epoch": 0.000360626220703125,
      "model_forward_time": 0.11960721015930176,
      "step": 59085
    },
    {
      "epoch": 0.000360626220703125,
      "step": 59085,
      "training_step_time": 0.6777031421661377
    },
    {
      "epoch": 0.00036063232421875,
      "model_forward_time": 0.11978697776794434,
      "step": 59086
    },
    {
      "epoch": 0.00036063232421875,
      "step": 59086,
      "training_step_time": 0.6325812339782715
    },
    {
      "epoch": 0.000360638427734375,
      "model_forward_time": 0.11552023887634277,
      "step": 59087
    },
    {
      "epoch": 0.000360638427734375,
      "step": 59087,
      "training_step_time": 0.5944771766662598
    },
    {
      "epoch": 0.00036064453125,
      "model_forward_time": 0.11749005317687988,
      "step": 59088
    },
    {
      "epoch": 0.00036064453125,
      "step": 59088,
      "training_step_time": 0.7085332870483398
    },
    {
      "epoch": 0.000360650634765625,
      "model_forward_time": 0.11729192733764648,
      "step": 59089
    },
    {
      "epoch": 0.000360650634765625,
      "step": 59089,
      "training_step_time": 0.6623141765594482
    },
    {
      "epoch": 0.00036065673828125,
      "grad_norm": 0.06667390465736389,
      "learning_rate": 6.287554821087783e-08,
      "loss": 0.0335,
      "step": 59090
    },
    {
      "epoch": 0.00036065673828125,
      "model_forward_time": 0.1165473461151123,
      "step": 59090
    },
    {
      "epoch": 0.00036065673828125,
      "step": 59090,
      "training_step_time": 0.6429169178009033
    },
    {
      "epoch": 0.000360662841796875,
      "model_forward_time": 0.11727762222290039,
      "step": 59091
    },
    {
      "epoch": 0.000360662841796875,
      "step": 59091,
      "training_step_time": 0.7400405406951904
    },
    {
      "epoch": 0.0003606689453125,
      "model_forward_time": 0.11760210990905762,
      "step": 59092
    },
    {
      "epoch": 0.0003606689453125,
      "step": 59092,
      "training_step_time": 0.6888830661773682
    },
    {
      "epoch": 0.000360675048828125,
      "model_forward_time": 0.12488961219787598,
      "step": 59093
    },
    {
      "epoch": 0.000360675048828125,
      "step": 59093,
      "training_step_time": 0.7537107467651367
    },
    {
      "epoch": 0.00036068115234375,
      "model_forward_time": 0.1208183765411377,
      "step": 59094
    },
    {
      "epoch": 0.00036068115234375,
      "step": 59094,
      "training_step_time": 0.6223821640014648
    },
    {
      "epoch": 0.000360687255859375,
      "model_forward_time": 0.1202235221862793,
      "step": 59095
    },
    {
      "epoch": 0.000360687255859375,
      "step": 59095,
      "training_step_time": 0.7504041194915771
    },
    {
      "epoch": 0.000360693359375,
      "model_forward_time": 0.11887764930725098,
      "step": 59096
    },
    {
      "epoch": 0.000360693359375,
      "step": 59096,
      "training_step_time": 0.7286171913146973
    },
    {
      "epoch": 0.000360699462890625,
      "model_forward_time": 0.11821246147155762,
      "step": 59097
    },
    {
      "epoch": 0.000360699462890625,
      "step": 59097,
      "training_step_time": 0.7021622657775879
    },
    {
      "epoch": 0.00036070556640625,
      "model_forward_time": 0.11751270294189453,
      "step": 59098
    },
    {
      "epoch": 0.00036070556640625,
      "step": 59098,
      "training_step_time": 0.6324613094329834
    },
    {
      "epoch": 0.000360711669921875,
      "model_forward_time": 0.11971831321716309,
      "step": 59099
    },
    {
      "epoch": 0.000360711669921875,
      "step": 59099,
      "training_step_time": 0.648550271987915
    },
    {
      "epoch": 0.0003607177734375,
      "grad_norm": 0.10766042023897171,
      "learning_rate": 6.150154258476315e-08,
      "loss": 0.0342,
      "step": 59100
    },
    {
      "epoch": 0.0003607177734375,
      "model_forward_time": 0.11971902847290039,
      "step": 59100
    },
    {
      "epoch": 0.0003607177734375,
      "step": 59100,
      "training_step_time": 0.7016663551330566
    },
    {
      "epoch": 0.000360723876953125,
      "model_forward_time": 0.12136363983154297,
      "step": 59101
    },
    {
      "epoch": 0.000360723876953125,
      "step": 59101,
      "training_step_time": 0.668372392654419
    },
    {
      "epoch": 0.00036072998046875,
      "model_forward_time": 0.12180137634277344,
      "step": 59102
    },
    {
      "epoch": 0.00036072998046875,
      "step": 59102,
      "training_step_time": 0.6767628192901611
    },
    {
      "epoch": 0.000360736083984375,
      "model_forward_time": 0.12831997871398926,
      "step": 59103
    },
    {
      "epoch": 0.000360736083984375,
      "step": 59103,
      "training_step_time": 0.7044486999511719
    },
    {
      "epoch": 0.0003607421875,
      "model_forward_time": 0.11994528770446777,
      "step": 59104
    },
    {
      "epoch": 0.0003607421875,
      "step": 59104,
      "training_step_time": 0.6789391040802002
    },
    {
      "epoch": 0.000360748291015625,
      "model_forward_time": 0.11560606956481934,
      "step": 59105
    },
    {
      "epoch": 0.000360748291015625,
      "step": 59105,
      "training_step_time": 0.710258960723877
    },
    {
      "epoch": 0.00036075439453125,
      "model_forward_time": 0.12342596054077148,
      "step": 59106
    },
    {
      "epoch": 0.00036075439453125,
      "step": 59106,
      "training_step_time": 0.6412384510040283
    },
    {
      "epoch": 0.000360760498046875,
      "model_forward_time": 0.12335085868835449,
      "step": 59107
    },
    {
      "epoch": 0.000360760498046875,
      "step": 59107,
      "training_step_time": 0.703528881072998
    },
    {
      "epoch": 0.0003607666015625,
      "model_forward_time": 0.12620854377746582,
      "step": 59108
    },
    {
      "epoch": 0.0003607666015625,
      "step": 59108,
      "training_step_time": 0.713001012802124
    },
    {
      "epoch": 0.000360772705078125,
      "model_forward_time": 0.11644172668457031,
      "step": 59109
    },
    {
      "epoch": 0.000360772705078125,
      "step": 59109,
      "training_step_time": 0.6740257740020752
    },
    {
      "epoch": 0.00036077880859375,
      "grad_norm": 0.1141815185546875,
      "learning_rate": 6.014270695592905e-08,
      "loss": 0.0367,
      "step": 59110
    },
    {
      "epoch": 0.00036077880859375,
      "model_forward_time": 0.11881136894226074,
      "step": 59110
    },
    {
      "epoch": 0.00036077880859375,
      "step": 59110,
      "training_step_time": 0.5819802284240723
    },
    {
      "epoch": 0.000360784912109375,
      "model_forward_time": 0.11983609199523926,
      "step": 59111
    },
    {
      "epoch": 0.000360784912109375,
      "step": 59111,
      "training_step_time": 0.7018170356750488
    },
    {
      "epoch": 0.000360791015625,
      "model_forward_time": 0.12333917617797852,
      "step": 59112
    },
    {
      "epoch": 0.000360791015625,
      "step": 59112,
      "training_step_time": 0.6835529804229736
    },
    {
      "epoch": 0.000360797119140625,
      "model_forward_time": 0.12380719184875488,
      "step": 59113
    },
    {
      "epoch": 0.000360797119140625,
      "step": 59113,
      "training_step_time": 0.651165246963501
    },
    {
      "epoch": 0.00036080322265625,
      "model_forward_time": 0.12081074714660645,
      "step": 59114
    },
    {
      "epoch": 0.00036080322265625,
      "step": 59114,
      "training_step_time": 0.6535956859588623
    },
    {
      "epoch": 0.000360809326171875,
      "model_forward_time": 0.11879324913024902,
      "step": 59115
    },
    {
      "epoch": 0.000360809326171875,
      "step": 59115,
      "training_step_time": 0.6407008171081543
    },
    {
      "epoch": 0.0003608154296875,
      "model_forward_time": 0.11830449104309082,
      "step": 59116
    },
    {
      "epoch": 0.0003608154296875,
      "step": 59116,
      "training_step_time": 0.6447441577911377
    },
    {
      "epoch": 0.000360821533203125,
      "model_forward_time": 0.12005233764648438,
      "step": 59117
    },
    {
      "epoch": 0.000360821533203125,
      "step": 59117,
      "training_step_time": 0.6501569747924805
    },
    {
      "epoch": 0.00036082763671875,
      "model_forward_time": 0.13602638244628906,
      "step": 59118
    },
    {
      "epoch": 0.00036082763671875,
      "step": 59118,
      "training_step_time": 0.6712729930877686
    },
    {
      "epoch": 0.000360833740234375,
      "model_forward_time": 0.12032175064086914,
      "step": 59119
    },
    {
      "epoch": 0.000360833740234375,
      "step": 59119,
      "training_step_time": 0.5720388889312744
    },
    {
      "epoch": 0.00036083984375,
      "grad_norm": 0.08776924014091492,
      "learning_rate": 5.8799041737150896e-08,
      "loss": 0.0358,
      "step": 59120
    },
    {
      "epoch": 0.00036083984375,
      "model_forward_time": 0.11755633354187012,
      "step": 59120
    },
    {
      "epoch": 0.00036083984375,
      "step": 59120,
      "training_step_time": 0.4382150173187256
    },
    {
      "epoch": 0.000360845947265625,
      "model_forward_time": 0.11819291114807129,
      "step": 59121
    },
    {
      "epoch": 0.000360845947265625,
      "step": 59121,
      "training_step_time": 0.5468318462371826
    },
    {
      "epoch": 0.00036085205078125,
      "model_forward_time": 0.11722183227539062,
      "step": 59122
    },
    {
      "epoch": 0.00036085205078125,
      "step": 59122,
      "training_step_time": 0.6130387783050537
    },
    {
      "epoch": 0.000360858154296875,
      "model_forward_time": 0.11700057983398438,
      "step": 59123
    },
    {
      "epoch": 0.000360858154296875,
      "step": 59123,
      "training_step_time": 0.5341856479644775
    },
    {
      "epoch": 0.0003608642578125,
      "model_forward_time": 0.11646437644958496,
      "step": 59124
    },
    {
      "epoch": 0.0003608642578125,
      "step": 59124,
      "training_step_time": 0.5252037048339844
    },
    {
      "epoch": 0.000360870361328125,
      "model_forward_time": 0.11550784111022949,
      "step": 59125
    },
    {
      "epoch": 0.000360870361328125,
      "step": 59125,
      "training_step_time": 0.4258885383605957
    },
    {
      "epoch": 0.00036087646484375,
      "model_forward_time": 0.11494636535644531,
      "step": 59126
    },
    {
      "epoch": 0.00036087646484375,
      "step": 59126,
      "training_step_time": 0.4056265354156494
    },
    {
      "epoch": 0.000360882568359375,
      "model_forward_time": 0.11512136459350586,
      "step": 59127
    },
    {
      "epoch": 0.000360882568359375,
      "step": 59127,
      "training_step_time": 0.40424299240112305
    },
    {
      "epoch": 0.000360888671875,
      "model_forward_time": 0.11495709419250488,
      "step": 59128
    },
    {
      "epoch": 0.000360888671875,
      "step": 59128,
      "training_step_time": 0.39284610748291016
    },
    {
      "epoch": 0.000360894775390625,
      "model_forward_time": 0.11585879325866699,
      "step": 59129
    },
    {
      "epoch": 0.000360894775390625,
      "step": 59129,
      "training_step_time": 0.3925492763519287
    },
    {
      "epoch": 0.00036090087890625,
      "grad_norm": 0.07828228920698166,
      "learning_rate": 5.747054733660773e-08,
      "loss": 0.036,
      "step": 59130
    },
    {
      "epoch": 0.00036090087890625,
      "model_forward_time": 0.115325927734375,
      "step": 59130
    },
    {
      "epoch": 0.00036090087890625,
      "step": 59130,
      "training_step_time": 0.41188645362854004
    },
    {
      "epoch": 0.000360906982421875,
      "model_forward_time": 0.11449360847473145,
      "step": 59131
    },
    {
      "epoch": 0.000360906982421875,
      "step": 59131,
      "training_step_time": 0.4963524341583252
    },
    {
      "epoch": 0.0003609130859375,
      "model_forward_time": 0.1148836612701416,
      "step": 59132
    },
    {
      "epoch": 0.0003609130859375,
      "step": 59132,
      "training_step_time": 0.4402170181274414
    },
    {
      "epoch": 0.000360919189453125,
      "model_forward_time": 0.11482787132263184,
      "step": 59133
    },
    {
      "epoch": 0.000360919189453125,
      "step": 59133,
      "training_step_time": 0.3937349319458008
    },
    {
      "epoch": 0.00036092529296875,
      "model_forward_time": 0.11641621589660645,
      "step": 59134
    },
    {
      "epoch": 0.00036092529296875,
      "step": 59134,
      "training_step_time": 0.3962976932525635
    },
    {
      "epoch": 0.000360931396484375,
      "model_forward_time": 0.11484479904174805,
      "step": 59135
    },
    {
      "epoch": 0.000360931396484375,
      "step": 59135,
      "training_step_time": 0.46774721145629883
    },
    {
      "epoch": 0.0003609375,
      "model_forward_time": 0.11458754539489746,
      "step": 59136
    },
    {
      "epoch": 0.0003609375,
      "step": 59136,
      "training_step_time": 0.5059769153594971
    },
    {
      "epoch": 0.000360943603515625,
      "model_forward_time": 0.11462831497192383,
      "step": 59137
    },
    {
      "epoch": 0.000360943603515625,
      "step": 59137,
      "training_step_time": 0.4641566276550293
    },
    {
      "epoch": 0.00036094970703125,
      "model_forward_time": 0.11494970321655273,
      "step": 59138
    },
    {
      "epoch": 0.00036094970703125,
      "step": 59138,
      "training_step_time": 0.49601292610168457
    },
    {
      "epoch": 0.000360955810546875,
      "model_forward_time": 0.11436343193054199,
      "step": 59139
    },
    {
      "epoch": 0.000360955810546875,
      "step": 59139,
      "training_step_time": 0.3942074775695801
    },
    {
      "epoch": 0.0003609619140625,
      "grad_norm": 0.08278775215148926,
      "learning_rate": 5.615722415785451e-08,
      "loss": 0.0335,
      "step": 59140
    },
    {
      "epoch": 0.0003609619140625,
      "model_forward_time": 0.11524081230163574,
      "step": 59140
    },
    {
      "epoch": 0.0003609619140625,
      "step": 59140,
      "training_step_time": 0.3919496536254883
    },
    {
      "epoch": 0.000360968017578125,
      "model_forward_time": 0.11553406715393066,
      "step": 59141
    },
    {
      "epoch": 0.000360968017578125,
      "step": 59141,
      "training_step_time": 0.3957176208496094
    },
    {
      "epoch": 0.00036097412109375,
      "model_forward_time": 0.11565351486206055,
      "step": 59142
    },
    {
      "epoch": 0.00036097412109375,
      "step": 59142,
      "training_step_time": 0.39467406272888184
    },
    {
      "epoch": 0.000360980224609375,
      "model_forward_time": 0.11511373519897461,
      "step": 59143
    },
    {
      "epoch": 0.000360980224609375,
      "step": 59143,
      "training_step_time": 0.3942594528198242
    },
    {
      "epoch": 0.000360986328125,
      "model_forward_time": 0.11532402038574219,
      "step": 59144
    },
    {
      "epoch": 0.000360986328125,
      "step": 59144,
      "training_step_time": 0.4948272705078125
    },
    {
      "epoch": 0.000360992431640625,
      "model_forward_time": 0.1147010326385498,
      "step": 59145
    },
    {
      "epoch": 0.000360992431640625,
      "step": 59145,
      "training_step_time": 0.4136512279510498
    },
    {
      "epoch": 0.00036099853515625,
      "model_forward_time": 0.11473989486694336,
      "step": 59146
    },
    {
      "epoch": 0.00036099853515625,
      "step": 59146,
      "training_step_time": 0.4480767250061035
    },
    {
      "epoch": 0.000361004638671875,
      "model_forward_time": 0.11483287811279297,
      "step": 59147
    },
    {
      "epoch": 0.000361004638671875,
      "step": 59147,
      "training_step_time": 0.4205935001373291
    },
    {
      "epoch": 0.0003610107421875,
      "model_forward_time": 0.11597180366516113,
      "step": 59148
    },
    {
      "epoch": 0.0003610107421875,
      "step": 59148,
      "training_step_time": 0.39664220809936523
    },
    {
      "epoch": 0.000361016845703125,
      "model_forward_time": 0.11534905433654785,
      "step": 59149
    },
    {
      "epoch": 0.000361016845703125,
      "step": 59149,
      "training_step_time": 0.4662628173828125
    },
    {
      "epoch": 0.00036102294921875,
      "grad_norm": 0.09165303409099579,
      "learning_rate": 5.485907259984435e-08,
      "loss": 0.0434,
      "step": 59150
    },
    {
      "epoch": 0.00036102294921875,
      "model_forward_time": 0.11524701118469238,
      "step": 59150
    },
    {
      "epoch": 0.00036102294921875,
      "step": 59150,
      "training_step_time": 0.41844964027404785
    },
    {
      "epoch": 0.000361029052734375,
      "model_forward_time": 0.1147603988647461,
      "step": 59151
    },
    {
      "epoch": 0.000361029052734375,
      "step": 59151,
      "training_step_time": 0.48423218727111816
    },
    {
      "epoch": 0.00036103515625,
      "model_forward_time": 0.1150970458984375,
      "step": 59152
    },
    {
      "epoch": 0.00036103515625,
      "step": 59152,
      "training_step_time": 0.41306185722351074
    },
    {
      "epoch": 0.000361041259765625,
      "model_forward_time": 0.11440253257751465,
      "step": 59153
    },
    {
      "epoch": 0.000361041259765625,
      "step": 59153,
      "training_step_time": 0.43691182136535645
    },
    {
      "epoch": 0.00036104736328125,
      "model_forward_time": 0.11429476737976074,
      "step": 59154
    },
    {
      "epoch": 0.00036104736328125,
      "step": 59154,
      "training_step_time": 0.39466333389282227
    },
    {
      "epoch": 0.000361053466796875,
      "model_forward_time": 0.11493802070617676,
      "step": 59155
    },
    {
      "epoch": 0.000361053466796875,
      "step": 59155,
      "training_step_time": 0.3948936462402344
    },
    {
      "epoch": 0.0003610595703125,
      "model_forward_time": 0.11476635932922363,
      "step": 59156
    },
    {
      "epoch": 0.0003610595703125,
      "step": 59156,
      "training_step_time": 0.388047456741333
    },
    {
      "epoch": 0.000361065673828125,
      "model_forward_time": 0.11505842208862305,
      "step": 59157
    },
    {
      "epoch": 0.000361065673828125,
      "step": 59157,
      "training_step_time": 0.44165921211242676
    },
    {
      "epoch": 0.00036107177734375,
      "model_forward_time": 0.11496734619140625,
      "step": 59158
    },
    {
      "epoch": 0.00036107177734375,
      "step": 59158,
      "training_step_time": 0.44861412048339844
    },
    {
      "epoch": 0.000361077880859375,
      "model_forward_time": 0.11575007438659668,
      "step": 59159
    },
    {
      "epoch": 0.000361077880859375,
      "step": 59159,
      "training_step_time": 0.3729128837585449
    },
    {
      "epoch": 0.000361083984375,
      "grad_norm": 0.10613316297531128,
      "learning_rate": 5.3576093056922906e-08,
      "loss": 0.0389,
      "step": 59160
    },
    {
      "epoch": 0.000361083984375,
      "model_forward_time": 0.11507153511047363,
      "step": 59160
    },
    {
      "epoch": 0.000361083984375,
      "step": 59160,
      "training_step_time": 0.456897497177124
    },
    {
      "epoch": 0.000361090087890625,
      "model_forward_time": 0.11544513702392578,
      "step": 59161
    },
    {
      "epoch": 0.000361090087890625,
      "step": 59161,
      "training_step_time": 0.4204845428466797
    },
    {
      "epoch": 0.00036109619140625,
      "model_forward_time": 0.11542439460754395,
      "step": 59162
    },
    {
      "epoch": 0.00036109619140625,
      "step": 59162,
      "training_step_time": 0.3652529716491699
    },
    {
      "epoch": 0.000361102294921875,
      "model_forward_time": 0.11459040641784668,
      "step": 59163
    },
    {
      "epoch": 0.000361102294921875,
      "step": 59163,
      "training_step_time": 0.42577290534973145
    },
    {
      "epoch": 0.0003611083984375,
      "model_forward_time": 0.1152346134185791,
      "step": 59164
    },
    {
      "epoch": 0.0003611083984375,
      "step": 59164,
      "training_step_time": 0.48703622817993164
    },
    {
      "epoch": 0.000361114501953125,
      "model_forward_time": 0.11463236808776855,
      "step": 59165
    },
    {
      "epoch": 0.000361114501953125,
      "step": 59165,
      "training_step_time": 0.4084131717681885
    },
    {
      "epoch": 0.00036112060546875,
      "model_forward_time": 0.11554741859436035,
      "step": 59166
    },
    {
      "epoch": 0.00036112060546875,
      "step": 59166,
      "training_step_time": 0.4066274166107178
    },
    {
      "epoch": 0.000361126708984375,
      "model_forward_time": 0.1154630184173584,
      "step": 59167
    },
    {
      "epoch": 0.000361126708984375,
      "step": 59167,
      "training_step_time": 0.47185230255126953
    },
    {
      "epoch": 0.0003611328125,
      "model_forward_time": 0.11475110054016113,
      "step": 59168
    },
    {
      "epoch": 0.0003611328125,
      "step": 59168,
      "training_step_time": 0.3954300880432129
    },
    {
      "epoch": 0.000361138916015625,
      "model_forward_time": 0.1159517765045166,
      "step": 59169
    },
    {
      "epoch": 0.000361138916015625,
      "step": 59169,
      "training_step_time": 0.3835334777832031
    },
    {
      "epoch": 0.00036114501953125,
      "grad_norm": 0.10012800991535187,
      "learning_rate": 5.2308285918828415e-08,
      "loss": 0.0393,
      "step": 59170
    },
    {
      "epoch": 0.00036114501953125,
      "model_forward_time": 0.11518692970275879,
      "step": 59170
    },
    {
      "epoch": 0.00036114501953125,
      "step": 59170,
      "training_step_time": 0.4016892910003662
    },
    {
      "epoch": 0.000361151123046875,
      "model_forward_time": 0.11576461791992188,
      "step": 59171
    },
    {
      "epoch": 0.000361151123046875,
      "step": 59171,
      "training_step_time": 0.4515049457550049
    },
    {
      "epoch": 0.0003611572265625,
      "model_forward_time": 0.11485958099365234,
      "step": 59172
    },
    {
      "epoch": 0.0003611572265625,
      "step": 59172,
      "training_step_time": 0.4189717769622803
    },
    {
      "epoch": 0.000361163330078125,
      "model_forward_time": 0.11452388763427734,
      "step": 59173
    },
    {
      "epoch": 0.000361163330078125,
      "step": 59173,
      "training_step_time": 0.45134568214416504
    },
    {
      "epoch": 0.00036116943359375,
      "model_forward_time": 0.11504244804382324,
      "step": 59174
    },
    {
      "epoch": 0.00036116943359375,
      "step": 59174,
      "training_step_time": 0.45954084396362305
    },
    {
      "epoch": 0.000361175537109375,
      "model_forward_time": 0.11466836929321289,
      "step": 59175
    },
    {
      "epoch": 0.000361175537109375,
      "step": 59175,
      "training_step_time": 0.3951702117919922
    },
    {
      "epoch": 0.000361181640625,
      "model_forward_time": 0.11527371406555176,
      "step": 59176
    },
    {
      "epoch": 0.000361181640625,
      "step": 59176,
      "training_step_time": 0.3995707035064697
    },
    {
      "epoch": 0.000361187744140625,
      "model_forward_time": 0.11552071571350098,
      "step": 59177
    },
    {
      "epoch": 0.000361187744140625,
      "step": 59177,
      "training_step_time": 0.4192638397216797
    },
    {
      "epoch": 0.00036119384765625,
      "model_forward_time": 0.11617255210876465,
      "step": 59178
    },
    {
      "epoch": 0.00036119384765625,
      "step": 59178,
      "training_step_time": 0.4336130619049072
    },
    {
      "epoch": 0.000361199951171875,
      "model_forward_time": 0.11542129516601562,
      "step": 59179
    },
    {
      "epoch": 0.000361199951171875,
      "step": 59179,
      "training_step_time": 0.41694211959838867
    },
    {
      "epoch": 0.0003612060546875,
      "grad_norm": 0.07299037277698517,
      "learning_rate": 5.105565157068615e-08,
      "loss": 0.0306,
      "step": 59180
    },
    {
      "epoch": 0.0003612060546875,
      "model_forward_time": 0.11458253860473633,
      "step": 59180
    },
    {
      "epoch": 0.0003612060546875,
      "step": 59180,
      "training_step_time": 0.3866751194000244
    },
    {
      "epoch": 0.000361212158203125,
      "model_forward_time": 0.11524057388305664,
      "step": 59181
    },
    {
      "epoch": 0.000361212158203125,
      "step": 59181,
      "training_step_time": 0.39760446548461914
    },
    {
      "epoch": 0.00036121826171875,
      "model_forward_time": 0.11489462852478027,
      "step": 59182
    },
    {
      "epoch": 0.00036121826171875,
      "step": 59182,
      "training_step_time": 0.456453800201416
    },
    {
      "epoch": 0.000361224365234375,
      "model_forward_time": 0.11569762229919434,
      "step": 59183
    },
    {
      "epoch": 0.000361224365234375,
      "step": 59183,
      "training_step_time": 0.4026951789855957
    },
    {
      "epoch": 0.00036123046875,
      "model_forward_time": 0.11517620086669922,
      "step": 59184
    },
    {
      "epoch": 0.00036123046875,
      "step": 59184,
      "training_step_time": 0.397174596786499
    },
    {
      "epoch": 0.000361236572265625,
      "model_forward_time": 0.11546087265014648,
      "step": 59185
    },
    {
      "epoch": 0.000361236572265625,
      "step": 59185,
      "training_step_time": 0.43717002868652344
    },
    {
      "epoch": 0.00036124267578125,
      "model_forward_time": 0.1149899959564209,
      "step": 59186
    },
    {
      "epoch": 0.00036124267578125,
      "step": 59186,
      "training_step_time": 0.4189906120300293
    },
    {
      "epoch": 0.000361248779296875,
      "model_forward_time": 0.11489415168762207,
      "step": 59187
    },
    {
      "epoch": 0.000361248779296875,
      "step": 59187,
      "training_step_time": 0.4316558837890625
    },
    {
      "epoch": 0.0003612548828125,
      "model_forward_time": 0.11463403701782227,
      "step": 59188
    },
    {
      "epoch": 0.0003612548828125,
      "step": 59188,
      "training_step_time": 0.7001338005065918
    },
    {
      "epoch": 0.000361260986328125,
      "model_forward_time": 0.11464548110961914,
      "step": 59189
    },
    {
      "epoch": 0.000361260986328125,
      "step": 59189,
      "training_step_time": 0.3856942653656006
    },
    {
      "epoch": 0.00036126708984375,
      "grad_norm": 0.10044877231121063,
      "learning_rate": 4.981819039300284e-08,
      "loss": 0.0373,
      "step": 59190
    },
    {
      "epoch": 0.00036126708984375,
      "model_forward_time": 0.11450743675231934,
      "step": 59190
    },
    {
      "epoch": 0.00036126708984375,
      "step": 59190,
      "training_step_time": 0.39902257919311523
    },
    {
      "epoch": 0.000361273193359375,
      "model_forward_time": 0.11475348472595215,
      "step": 59191
    },
    {
      "epoch": 0.000361273193359375,
      "step": 59191,
      "training_step_time": 0.4435765743255615
    },
    {
      "epoch": 0.000361279296875,
      "model_forward_time": 0.11458921432495117,
      "step": 59192
    },
    {
      "epoch": 0.000361279296875,
      "step": 59192,
      "training_step_time": 0.48594021797180176
    },
    {
      "epoch": 0.000361285400390625,
      "model_forward_time": 0.1146993637084961,
      "step": 59193
    },
    {
      "epoch": 0.000361285400390625,
      "step": 59193,
      "training_step_time": 0.4041938781738281
    },
    {
      "epoch": 0.00036129150390625,
      "model_forward_time": 0.11462020874023438,
      "step": 59194
    },
    {
      "epoch": 0.00036129150390625,
      "step": 59194,
      "training_step_time": 0.39909815788269043
    },
    {
      "epoch": 0.000361297607421875,
      "model_forward_time": 0.11490273475646973,
      "step": 59195
    },
    {
      "epoch": 0.000361297607421875,
      "step": 59195,
      "training_step_time": 0.4181478023529053
    },
    {
      "epoch": 0.0003613037109375,
      "model_forward_time": 0.11532735824584961,
      "step": 59196
    },
    {
      "epoch": 0.0003613037109375,
      "step": 59196,
      "training_step_time": 0.44672250747680664
    },
    {
      "epoch": 0.000361309814453125,
      "model_forward_time": 0.11469221115112305,
      "step": 59197
    },
    {
      "epoch": 0.000361309814453125,
      "step": 59197,
      "training_step_time": 0.390643835067749
    },
    {
      "epoch": 0.00036131591796875,
      "model_forward_time": 0.11539554595947266,
      "step": 59198
    },
    {
      "epoch": 0.00036131591796875,
      "step": 59198,
      "training_step_time": 0.40578484535217285
    },
    {
      "epoch": 0.000361322021484375,
      "model_forward_time": 0.1145939826965332,
      "step": 59199
    },
    {
      "epoch": 0.000361322021484375,
      "step": 59199,
      "training_step_time": 0.40085577964782715
    },
    {
      "epoch": 0.000361328125,
      "grad_norm": 0.07774262875318527,
      "learning_rate": 4.859590276170556e-08,
      "loss": 0.0352,
      "step": 59200
    },
    {
      "epoch": 0.000361328125,
      "model_forward_time": 0.11481857299804688,
      "step": 59200
    },
    {
      "epoch": 0.000361328125,
      "step": 59200,
      "training_step_time": 0.6668705940246582
    },
    {
      "epoch": 0.000361334228515625,
      "model_forward_time": 0.1146385669708252,
      "step": 59201
    },
    {
      "epoch": 0.000361334228515625,
      "step": 59201,
      "training_step_time": 0.3862333297729492
    },
    {
      "epoch": 0.00036134033203125,
      "model_forward_time": 0.11448955535888672,
      "step": 59202
    },
    {
      "epoch": 0.00036134033203125,
      "step": 59202,
      "training_step_time": 0.4007091522216797
    },
    {
      "epoch": 0.000361346435546875,
      "model_forward_time": 0.11507606506347656,
      "step": 59203
    },
    {
      "epoch": 0.000361346435546875,
      "step": 59203,
      "training_step_time": 0.39568161964416504
    },
    {
      "epoch": 0.0003613525390625,
      "model_forward_time": 0.1150972843170166,
      "step": 59204
    },
    {
      "epoch": 0.0003613525390625,
      "step": 59204,
      "training_step_time": 0.396651029586792
    },
    {
      "epoch": 0.000361358642578125,
      "model_forward_time": 0.11463427543640137,
      "step": 59205
    },
    {
      "epoch": 0.000361358642578125,
      "step": 59205,
      "training_step_time": 0.36368632316589355
    },
    {
      "epoch": 0.00036136474609375,
      "model_forward_time": 0.11536836624145508,
      "step": 59206
    },
    {
      "epoch": 0.00036136474609375,
      "step": 59206,
      "training_step_time": 0.5880603790283203
    },
    {
      "epoch": 0.000361370849609375,
      "model_forward_time": 0.11447954177856445,
      "step": 59207
    },
    {
      "epoch": 0.000361370849609375,
      "step": 59207,
      "training_step_time": 0.3976764678955078
    },
    {
      "epoch": 0.000361376953125,
      "model_forward_time": 0.1154029369354248,
      "step": 59208
    },
    {
      "epoch": 0.000361376953125,
      "step": 59208,
      "training_step_time": 0.39534425735473633
    },
    {
      "epoch": 0.000361383056640625,
      "model_forward_time": 0.11552143096923828,
      "step": 59209
    },
    {
      "epoch": 0.000361383056640625,
      "step": 59209,
      "training_step_time": 0.4079558849334717
    },
    {
      "epoch": 0.00036138916015625,
      "grad_norm": 0.10779518634080887,
      "learning_rate": 4.7388789048075086e-08,
      "loss": 0.0376,
      "step": 59210
    },
    {
      "epoch": 0.00036138916015625,
      "model_forward_time": 0.11525225639343262,
      "step": 59210
    },
    {
      "epoch": 0.00036138916015625,
      "step": 59210,
      "training_step_time": 0.4373202323913574
    },
    {
      "epoch": 0.000361395263671875,
      "model_forward_time": 0.11465597152709961,
      "step": 59211
    },
    {
      "epoch": 0.000361395263671875,
      "step": 59211,
      "training_step_time": 0.3958613872528076
    },
    {
      "epoch": 0.0003614013671875,
      "model_forward_time": 0.11489319801330566,
      "step": 59212
    },
    {
      "epoch": 0.0003614013671875,
      "step": 59212,
      "training_step_time": 0.6101658344268799
    },
    {
      "epoch": 0.000361407470703125,
      "model_forward_time": 0.11564207077026367,
      "step": 59213
    },
    {
      "epoch": 0.000361407470703125,
      "step": 59213,
      "training_step_time": 0.3985440731048584
    },
    {
      "epoch": 0.00036141357421875,
      "model_forward_time": 0.11459732055664062,
      "step": 59214
    },
    {
      "epoch": 0.00036141357421875,
      "step": 59214,
      "training_step_time": 0.41109251976013184
    },
    {
      "epoch": 0.000361419677734375,
      "model_forward_time": 0.1146085262298584,
      "step": 59215
    },
    {
      "epoch": 0.000361419677734375,
      "step": 59215,
      "training_step_time": 0.418057918548584
    },
    {
      "epoch": 0.00036142578125,
      "model_forward_time": 0.11540579795837402,
      "step": 59216
    },
    {
      "epoch": 0.00036142578125,
      "step": 59216,
      "training_step_time": 0.48668718338012695
    },
    {
      "epoch": 0.000361431884765625,
      "model_forward_time": 0.11465120315551758,
      "step": 59217
    },
    {
      "epoch": 0.000361431884765625,
      "step": 59217,
      "training_step_time": 0.3896503448486328
    },
    {
      "epoch": 0.00036143798828125,
      "model_forward_time": 0.11549186706542969,
      "step": 59218
    },
    {
      "epoch": 0.00036143798828125,
      "step": 59218,
      "training_step_time": 0.47271251678466797
    },
    {
      "epoch": 0.000361444091796875,
      "model_forward_time": 0.11521363258361816,
      "step": 59219
    },
    {
      "epoch": 0.000361444091796875,
      "step": 59219,
      "training_step_time": 0.36524415016174316
    },
    {
      "epoch": 0.0003614501953125,
      "grad_norm": 0.07705046236515045,
      "learning_rate": 4.619684961881254e-08,
      "loss": 0.0342,
      "step": 59220
    },
    {
      "epoch": 0.0003614501953125,
      "model_forward_time": 0.11471939086914062,
      "step": 59220
    },
    {
      "epoch": 0.0003614501953125,
      "step": 59220,
      "training_step_time": 0.41552042961120605
    },
    {
      "epoch": 0.000361456298828125,
      "model_forward_time": 0.11448240280151367,
      "step": 59221
    },
    {
      "epoch": 0.000361456298828125,
      "step": 59221,
      "training_step_time": 0.39644813537597656
    },
    {
      "epoch": 0.00036146240234375,
      "model_forward_time": 0.11616373062133789,
      "step": 59222
    },
    {
      "epoch": 0.00036146240234375,
      "step": 59222,
      "training_step_time": 0.37093019485473633
    },
    {
      "epoch": 0.000361468505859375,
      "model_forward_time": 0.11540055274963379,
      "step": 59223
    },
    {
      "epoch": 0.000361468505859375,
      "step": 59223,
      "training_step_time": 0.42339563369750977
    },
    {
      "epoch": 0.000361474609375,
      "model_forward_time": 0.11549592018127441,
      "step": 59224
    },
    {
      "epoch": 0.000361474609375,
      "step": 59224,
      "training_step_time": 0.6420285701751709
    },
    {
      "epoch": 0.000361480712890625,
      "model_forward_time": 0.11478495597839355,
      "step": 59225
    },
    {
      "epoch": 0.000361480712890625,
      "step": 59225,
      "training_step_time": 0.41693639755249023
    },
    {
      "epoch": 0.00036148681640625,
      "model_forward_time": 0.11465120315551758,
      "step": 59226
    },
    {
      "epoch": 0.00036148681640625,
      "step": 59226,
      "training_step_time": 0.39409756660461426
    },
    {
      "epoch": 0.000361492919921875,
      "model_forward_time": 0.11481571197509766,
      "step": 59227
    },
    {
      "epoch": 0.000361492919921875,
      "step": 59227,
      "training_step_time": 0.47562360763549805
    },
    {
      "epoch": 0.0003614990234375,
      "model_forward_time": 0.11444282531738281,
      "step": 59228
    },
    {
      "epoch": 0.0003614990234375,
      "step": 59228,
      "training_step_time": 0.42534303665161133
    },
    {
      "epoch": 0.000361505126953125,
      "model_forward_time": 0.11456489562988281,
      "step": 59229
    },
    {
      "epoch": 0.000361505126953125,
      "step": 59229,
      "training_step_time": 0.43375158309936523
    },
    {
      "epoch": 0.00036151123046875,
      "grad_norm": 0.09837386757135391,
      "learning_rate": 4.502008483598941e-08,
      "loss": 0.036,
      "step": 59230
    },
    {
      "epoch": 0.00036151123046875,
      "model_forward_time": 0.11454892158508301,
      "step": 59230
    },
    {
      "epoch": 0.00036151123046875,
      "step": 59230,
      "training_step_time": 0.49352264404296875
    },
    {
      "epoch": 0.000361517333984375,
      "model_forward_time": 0.1145772933959961,
      "step": 59231
    },
    {
      "epoch": 0.000361517333984375,
      "step": 59231,
      "training_step_time": 0.3982388973236084
    },
    {
      "epoch": 0.0003615234375,
      "model_forward_time": 0.1148519515991211,
      "step": 59232
    },
    {
      "epoch": 0.0003615234375,
      "step": 59232,
      "training_step_time": 0.3925936222076416
    },
    {
      "epoch": 0.000361529541015625,
      "model_forward_time": 0.11537790298461914,
      "step": 59233
    },
    {
      "epoch": 0.000361529541015625,
      "step": 59233,
      "training_step_time": 0.4122624397277832
    },
    {
      "epoch": 0.00036153564453125,
      "model_forward_time": 0.11507320404052734,
      "step": 59234
    },
    {
      "epoch": 0.00036153564453125,
      "step": 59234,
      "training_step_time": 0.38790369033813477
    },
    {
      "epoch": 0.000361541748046875,
      "model_forward_time": 0.11495137214660645,
      "step": 59235
    },
    {
      "epoch": 0.000361541748046875,
      "step": 59235,
      "training_step_time": 0.42264699935913086
    },
    {
      "epoch": 0.0003615478515625,
      "model_forward_time": 0.11541295051574707,
      "step": 59236
    },
    {
      "epoch": 0.0003615478515625,
      "step": 59236,
      "training_step_time": 0.7628669738769531
    },
    {
      "epoch": 0.000361553955078125,
      "model_forward_time": 0.11493206024169922,
      "step": 59237
    },
    {
      "epoch": 0.000361553955078125,
      "step": 59237,
      "training_step_time": 0.48301124572753906
    },
    {
      "epoch": 0.00036156005859375,
      "model_forward_time": 0.11482405662536621,
      "step": 59238
    },
    {
      "epoch": 0.00036156005859375,
      "step": 59238,
      "training_step_time": 0.41139721870422363
    },
    {
      "epoch": 0.000361566162109375,
      "model_forward_time": 0.11486124992370605,
      "step": 59239
    },
    {
      "epoch": 0.000361566162109375,
      "step": 59239,
      "training_step_time": 0.38695311546325684
    },
    {
      "epoch": 0.000361572265625,
      "grad_norm": 0.09802620112895966,
      "learning_rate": 4.385849505708084e-08,
      "loss": 0.0391,
      "step": 59240
    },
    {
      "epoch": 0.000361572265625,
      "model_forward_time": 0.11411023139953613,
      "step": 59240
    },
    {
      "epoch": 0.000361572265625,
      "step": 59240,
      "training_step_time": 0.39273858070373535
    },
    {
      "epoch": 0.000361578369140625,
      "model_forward_time": 0.11521434783935547,
      "step": 59241
    },
    {
      "epoch": 0.000361578369140625,
      "step": 59241,
      "training_step_time": 0.3822934627532959
    },
    {
      "epoch": 0.00036158447265625,
      "model_forward_time": 0.11534571647644043,
      "step": 59242
    },
    {
      "epoch": 0.00036158447265625,
      "step": 59242,
      "training_step_time": 0.4680454730987549
    },
    {
      "epoch": 0.000361590576171875,
      "model_forward_time": 0.11465072631835938,
      "step": 59243
    },
    {
      "epoch": 0.000361590576171875,
      "step": 59243,
      "training_step_time": 0.5419542789459229
    },
    {
      "epoch": 0.0003615966796875,
      "model_forward_time": 0.11660408973693848,
      "step": 59244
    },
    {
      "epoch": 0.0003615966796875,
      "step": 59244,
      "training_step_time": 0.3934180736541748
    },
    {
      "epoch": 0.000361602783203125,
      "model_forward_time": 0.11504220962524414,
      "step": 59245
    },
    {
      "epoch": 0.000361602783203125,
      "step": 59245,
      "training_step_time": 0.40124082565307617
    },
    {
      "epoch": 0.00036160888671875,
      "model_forward_time": 0.1151278018951416,
      "step": 59246
    },
    {
      "epoch": 0.00036160888671875,
      "step": 59246,
      "training_step_time": 0.3908536434173584
    },
    {
      "epoch": 0.000361614990234375,
      "model_forward_time": 0.11554431915283203,
      "step": 59247
    },
    {
      "epoch": 0.000361614990234375,
      "step": 59247,
      "training_step_time": 0.44429707527160645
    },
    {
      "epoch": 0.00036162109375,
      "model_forward_time": 0.11525321006774902,
      "step": 59248
    },
    {
      "epoch": 0.00036162109375,
      "step": 59248,
      "training_step_time": 0.5449016094207764
    },
    {
      "epoch": 0.000361627197265625,
      "model_forward_time": 0.11521267890930176,
      "step": 59249
    },
    {
      "epoch": 0.000361627197265625,
      "step": 59249,
      "training_step_time": 0.4360320568084717
    },
    {
      "epoch": 0.00036163330078125,
      "grad_norm": 0.08105651289224625,
      "learning_rate": 4.2712080634949024e-08,
      "loss": 0.0348,
      "step": 59250
    },
    {
      "epoch": 0.00036163330078125,
      "model_forward_time": 0.1144866943359375,
      "step": 59250
    },
    {
      "epoch": 0.00036163330078125,
      "step": 59250,
      "training_step_time": 0.4485340118408203
    },
    {
      "epoch": 0.000361639404296875,
      "model_forward_time": 0.11485838890075684,
      "step": 59251
    },
    {
      "epoch": 0.000361639404296875,
      "step": 59251,
      "training_step_time": 0.5115997791290283
    },
    {
      "epoch": 0.0003616455078125,
      "model_forward_time": 0.1149754524230957,
      "step": 59252
    },
    {
      "epoch": 0.0003616455078125,
      "step": 59252,
      "training_step_time": 0.38678669929504395
    },
    {
      "epoch": 0.000361651611328125,
      "model_forward_time": 0.11416244506835938,
      "step": 59253
    },
    {
      "epoch": 0.000361651611328125,
      "step": 59253,
      "training_step_time": 0.3867228031158447
    },
    {
      "epoch": 0.00036165771484375,
      "model_forward_time": 0.11500310897827148,
      "step": 59254
    },
    {
      "epoch": 0.00036165771484375,
      "step": 59254,
      "training_step_time": 0.46388697624206543
    },
    {
      "epoch": 0.000361663818359375,
      "model_forward_time": 0.11467504501342773,
      "step": 59255
    },
    {
      "epoch": 0.000361663818359375,
      "step": 59255,
      "training_step_time": 0.3951530456542969
    },
    {
      "epoch": 0.000361669921875,
      "model_forward_time": 0.11518239974975586,
      "step": 59256
    },
    {
      "epoch": 0.000361669921875,
      "step": 59256,
      "training_step_time": 0.38963890075683594
    },
    {
      "epoch": 0.000361676025390625,
      "model_forward_time": 0.11442255973815918,
      "step": 59257
    },
    {
      "epoch": 0.000361676025390625,
      "step": 59257,
      "training_step_time": 0.4791252613067627
    },
    {
      "epoch": 0.00036168212890625,
      "model_forward_time": 0.11454892158508301,
      "step": 59258
    },
    {
      "epoch": 0.00036168212890625,
      "step": 59258,
      "training_step_time": 0.4094088077545166
    },
    {
      "epoch": 0.000361688232421875,
      "model_forward_time": 0.11431026458740234,
      "step": 59259
    },
    {
      "epoch": 0.000361688232421875,
      "step": 59259,
      "training_step_time": 0.3935515880584717
    },
    {
      "epoch": 0.0003616943359375,
      "grad_norm": 0.20379847288131714,
      "learning_rate": 4.158084191783762e-08,
      "loss": 0.0414,
      "step": 59260
    },
    {
      "epoch": 0.0003616943359375,
      "model_forward_time": 0.11533975601196289,
      "step": 59260
    },
    {
      "epoch": 0.0003616943359375,
      "step": 59260,
      "training_step_time": 0.4576880931854248
    },
    {
      "epoch": 0.000361700439453125,
      "model_forward_time": 0.11498141288757324,
      "step": 59261
    },
    {
      "epoch": 0.000361700439453125,
      "step": 59261,
      "training_step_time": 0.43039751052856445
    },
    {
      "epoch": 0.00036170654296875,
      "model_forward_time": 0.11554598808288574,
      "step": 59262
    },
    {
      "epoch": 0.00036170654296875,
      "step": 59262,
      "training_step_time": 0.4923732280731201
    },
    {
      "epoch": 0.000361712646484375,
      "model_forward_time": 0.11519813537597656,
      "step": 59263
    },
    {
      "epoch": 0.000361712646484375,
      "step": 59263,
      "training_step_time": 0.4275531768798828
    },
    {
      "epoch": 0.00036171875,
      "model_forward_time": 0.11516976356506348,
      "step": 59264
    },
    {
      "epoch": 0.00036171875,
      "step": 59264,
      "training_step_time": 0.44296741485595703
    },
    {
      "epoch": 0.000361724853515625,
      "model_forward_time": 0.11521220207214355,
      "step": 59265
    },
    {
      "epoch": 0.000361724853515625,
      "step": 59265,
      "training_step_time": 0.42046546936035156
    },
    {
      "epoch": 0.00036173095703125,
      "model_forward_time": 0.11522412300109863,
      "step": 59266
    },
    {
      "epoch": 0.00036173095703125,
      "step": 59266,
      "training_step_time": 0.4165217876434326
    },
    {
      "epoch": 0.000361737060546875,
      "model_forward_time": 0.11481285095214844,
      "step": 59267
    },
    {
      "epoch": 0.000361737060546875,
      "step": 59267,
      "training_step_time": 0.3956131935119629
    },
    {
      "epoch": 0.0003617431640625,
      "model_forward_time": 0.11542439460754395,
      "step": 59268
    },
    {
      "epoch": 0.0003617431640625,
      "step": 59268,
      "training_step_time": 0.3994290828704834
    },
    {
      "epoch": 0.000361749267578125,
      "model_forward_time": 0.11459827423095703,
      "step": 59269
    },
    {
      "epoch": 0.000361749267578125,
      "step": 59269,
      "training_step_time": 0.39115214347839355
    },
    {
      "epoch": 0.00036175537109375,
      "grad_norm": 0.07593008875846863,
      "learning_rate": 4.046477924939396e-08,
      "loss": 0.036,
      "step": 59270
    },
    {
      "epoch": 0.00036175537109375,
      "model_forward_time": 0.11485433578491211,
      "step": 59270
    },
    {
      "epoch": 0.00036175537109375,
      "step": 59270,
      "training_step_time": 0.48321986198425293
    },
    {
      "epoch": 0.000361761474609375,
      "model_forward_time": 0.1148526668548584,
      "step": 59271
    },
    {
      "epoch": 0.000361761474609375,
      "step": 59271,
      "training_step_time": 0.3866767883300781
    },
    {
      "epoch": 0.000361767578125,
      "model_forward_time": 0.11456990242004395,
      "step": 59272
    },
    {
      "epoch": 0.000361767578125,
      "step": 59272,
      "training_step_time": 0.45467305183410645
    },
    {
      "epoch": 0.000361773681640625,
      "model_forward_time": 0.11523294448852539,
      "step": 59273
    },
    {
      "epoch": 0.000361773681640625,
      "step": 59273,
      "training_step_time": 0.3902547359466553
    },
    {
      "epoch": 0.00036177978515625,
      "model_forward_time": 0.11480975151062012,
      "step": 59274
    },
    {
      "epoch": 0.00036177978515625,
      "step": 59274,
      "training_step_time": 0.39498066902160645
    },
    {
      "epoch": 0.000361785888671875,
      "model_forward_time": 0.1143946647644043,
      "step": 59275
    },
    {
      "epoch": 0.000361785888671875,
      "step": 59275,
      "training_step_time": 0.36650800704956055
    },
    {
      "epoch": 0.0003617919921875,
      "model_forward_time": 0.11578583717346191,
      "step": 59276
    },
    {
      "epoch": 0.0003617919921875,
      "step": 59276,
      "training_step_time": 0.3860926628112793
    },
    {
      "epoch": 0.000361798095703125,
      "model_forward_time": 0.11470866203308105,
      "step": 59277
    },
    {
      "epoch": 0.000361798095703125,
      "step": 59277,
      "training_step_time": 0.436708927154541
    },
    {
      "epoch": 0.00036180419921875,
      "model_forward_time": 0.11500430107116699,
      "step": 59278
    },
    {
      "epoch": 0.00036180419921875,
      "step": 59278,
      "training_step_time": 0.5713565349578857
    },
    {
      "epoch": 0.000361810302734375,
      "model_forward_time": 0.11519694328308105,
      "step": 59279
    },
    {
      "epoch": 0.000361810302734375,
      "step": 59279,
      "training_step_time": 0.4451582431793213
    },
    {
      "epoch": 0.00036181640625,
      "grad_norm": 0.14878857135772705,
      "learning_rate": 3.936389296864129e-08,
      "loss": 0.041,
      "step": 59280
    },
    {
      "epoch": 0.00036181640625,
      "model_forward_time": 0.11478066444396973,
      "step": 59280
    },
    {
      "epoch": 0.00036181640625,
      "step": 59280,
      "training_step_time": 0.5068917274475098
    },
    {
      "epoch": 0.000361822509765625,
      "model_forward_time": 0.11504650115966797,
      "step": 59281
    },
    {
      "epoch": 0.000361822509765625,
      "step": 59281,
      "training_step_time": 0.38956451416015625
    },
    {
      "epoch": 0.00036182861328125,
      "model_forward_time": 0.11417341232299805,
      "step": 59282
    },
    {
      "epoch": 0.00036182861328125,
      "step": 59282,
      "training_step_time": 0.40426135063171387
    },
    {
      "epoch": 0.000361834716796875,
      "model_forward_time": 0.11677312850952148,
      "step": 59283
    },
    {
      "epoch": 0.000361834716796875,
      "step": 59283,
      "training_step_time": 0.3948190212249756
    },
    {
      "epoch": 0.0003618408203125,
      "model_forward_time": 0.11496090888977051,
      "step": 59284
    },
    {
      "epoch": 0.0003618408203125,
      "step": 59284,
      "training_step_time": 0.4545927047729492
    },
    {
      "epoch": 0.000361846923828125,
      "model_forward_time": 0.11469697952270508,
      "step": 59285
    },
    {
      "epoch": 0.000361846923828125,
      "step": 59285,
      "training_step_time": 0.5165009498596191
    },
    {
      "epoch": 0.00036185302734375,
      "model_forward_time": 0.11459040641784668,
      "step": 59286
    },
    {
      "epoch": 0.00036185302734375,
      "step": 59286,
      "training_step_time": 0.39392614364624023
    },
    {
      "epoch": 0.000361859130859375,
      "model_forward_time": 0.11497688293457031,
      "step": 59287
    },
    {
      "epoch": 0.000361859130859375,
      "step": 59287,
      "training_step_time": 0.39405202865600586
    },
    {
      "epoch": 0.000361865234375,
      "model_forward_time": 0.11502933502197266,
      "step": 59288
    },
    {
      "epoch": 0.000361865234375,
      "step": 59288,
      "training_step_time": 0.3949108123779297
    },
    {
      "epoch": 0.000361871337890625,
      "model_forward_time": 0.11485648155212402,
      "step": 59289
    },
    {
      "epoch": 0.000361871337890625,
      "step": 59289,
      "training_step_time": 0.36545586585998535
    },
    {
      "epoch": 0.00036187744140625,
      "grad_norm": 0.06616082042455673,
      "learning_rate": 3.827818341000655e-08,
      "loss": 0.0318,
      "step": 59290
    },
    {
      "epoch": 0.00036187744140625,
      "model_forward_time": 0.11518192291259766,
      "step": 59290
    },
    {
      "epoch": 0.00036187744140625,
      "step": 59290,
      "training_step_time": 0.5179300308227539
    },
    {
      "epoch": 0.000361883544921875,
      "model_forward_time": 0.11458539962768555,
      "step": 59291
    },
    {
      "epoch": 0.000361883544921875,
      "step": 59291,
      "training_step_time": 0.3973536491394043
    },
    {
      "epoch": 0.0003618896484375,
      "model_forward_time": 0.11492514610290527,
      "step": 59292
    },
    {
      "epoch": 0.0003618896484375,
      "step": 59292,
      "training_step_time": 0.4136836528778076
    },
    {
      "epoch": 0.000361895751953125,
      "model_forward_time": 0.11492276191711426,
      "step": 59293
    },
    {
      "epoch": 0.000361895751953125,
      "step": 59293,
      "training_step_time": 0.3949565887451172
    },
    {
      "epoch": 0.00036190185546875,
      "model_forward_time": 0.11479854583740234,
      "step": 59294
    },
    {
      "epoch": 0.00036190185546875,
      "step": 59294,
      "training_step_time": 0.46575403213500977
    },
    {
      "epoch": 0.000361907958984375,
      "model_forward_time": 0.11459898948669434,
      "step": 59295
    },
    {
      "epoch": 0.000361907958984375,
      "step": 59295,
      "training_step_time": 0.3879354000091553
    },
    {
      "epoch": 0.0003619140625,
      "model_forward_time": 0.11550045013427734,
      "step": 59296
    },
    {
      "epoch": 0.0003619140625,
      "step": 59296,
      "training_step_time": 0.6173396110534668
    },
    {
      "epoch": 0.000361920166015625,
      "model_forward_time": 0.11442899703979492,
      "step": 59297
    },
    {
      "epoch": 0.000361920166015625,
      "step": 59297,
      "training_step_time": 0.3739809989929199
    },
    {
      "epoch": 0.00036192626953125,
      "model_forward_time": 0.11467480659484863,
      "step": 59298
    },
    {
      "epoch": 0.00036192626953125,
      "step": 59298,
      "training_step_time": 0.4712977409362793
    },
    {
      "epoch": 0.000361932373046875,
      "model_forward_time": 0.11467957496643066,
      "step": 59299
    },
    {
      "epoch": 0.000361932373046875,
      "step": 59299,
      "training_step_time": 0.4494435787200928
    },
    {
      "epoch": 0.0003619384765625,
      "grad_norm": 0.11762392520904541,
      "learning_rate": 3.720765090329814e-08,
      "loss": 0.0383,
      "step": 59300
    },
    {
      "epoch": 0.0003619384765625,
      "model_forward_time": 0.1152186393737793,
      "step": 59300
    },
    {
      "epoch": 0.0003619384765625,
      "step": 59300,
      "training_step_time": 0.3897733688354492
    },
    {
      "epoch": 0.000361944580078125,
      "model_forward_time": 0.11454963684082031,
      "step": 59301
    },
    {
      "epoch": 0.000361944580078125,
      "step": 59301,
      "training_step_time": 0.38871264457702637
    },
    {
      "epoch": 0.00036195068359375,
      "model_forward_time": 0.11438989639282227,
      "step": 59302
    },
    {
      "epoch": 0.00036195068359375,
      "step": 59302,
      "training_step_time": 0.46663737297058105
    },
    {
      "epoch": 0.000361956787109375,
      "model_forward_time": 0.11445856094360352,
      "step": 59303
    },
    {
      "epoch": 0.000361956787109375,
      "step": 59303,
      "training_step_time": 0.36348462104797363
    },
    {
      "epoch": 0.000361962890625,
      "model_forward_time": 0.11491250991821289,
      "step": 59304
    },
    {
      "epoch": 0.000361962890625,
      "step": 59304,
      "training_step_time": 0.419309139251709
    },
    {
      "epoch": 0.000361968994140625,
      "model_forward_time": 0.1149289608001709,
      "step": 59305
    },
    {
      "epoch": 0.000361968994140625,
      "step": 59305,
      "training_step_time": 0.4096648693084717
    },
    {
      "epoch": 0.00036197509765625,
      "model_forward_time": 0.11520147323608398,
      "step": 59306
    },
    {
      "epoch": 0.00036197509765625,
      "step": 59306,
      "training_step_time": 0.3891754150390625
    },
    {
      "epoch": 0.000361981201171875,
      "model_forward_time": 0.11557173728942871,
      "step": 59307
    },
    {
      "epoch": 0.000361981201171875,
      "step": 59307,
      "training_step_time": 0.3988685607910156
    },
    {
      "epoch": 0.0003619873046875,
      "model_forward_time": 0.11474823951721191,
      "step": 59308
    },
    {
      "epoch": 0.0003619873046875,
      "step": 59308,
      "training_step_time": 0.7707455158233643
    },
    {
      "epoch": 0.000361993408203125,
      "model_forward_time": 0.11453032493591309,
      "step": 59309
    },
    {
      "epoch": 0.000361993408203125,
      "step": 59309,
      "training_step_time": 0.39370083808898926
    },
    {
      "epoch": 0.00036199951171875,
      "grad_norm": 0.08978526294231415,
      "learning_rate": 3.615229577371149e-08,
      "loss": 0.0357,
      "step": 59310
    },
    {
      "epoch": 0.00036199951171875,
      "model_forward_time": 0.1148982048034668,
      "step": 59310
    },
    {
      "epoch": 0.00036199951171875,
      "step": 59310,
      "training_step_time": 0.3990139961242676
    },
    {
      "epoch": 0.000362005615234375,
      "model_forward_time": 0.1139836311340332,
      "step": 59311
    },
    {
      "epoch": 0.000362005615234375,
      "step": 59311,
      "training_step_time": 0.4544336795806885
    },
    {
      "epoch": 0.00036201171875,
      "model_forward_time": 0.11462616920471191,
      "step": 59312
    },
    {
      "epoch": 0.00036201171875,
      "step": 59312,
      "training_step_time": 0.43784403800964355
    },
    {
      "epoch": 0.000362017822265625,
      "model_forward_time": 0.11417388916015625,
      "step": 59313
    },
    {
      "epoch": 0.000362017822265625,
      "step": 59313,
      "training_step_time": 0.4609096050262451
    },
    {
      "epoch": 0.00036202392578125,
      "model_forward_time": 0.11469841003417969,
      "step": 59314
    },
    {
      "epoch": 0.00036202392578125,
      "step": 59314,
      "training_step_time": 0.3865377902984619
    },
    {
      "epoch": 0.000362030029296875,
      "model_forward_time": 0.11442780494689941,
      "step": 59315
    },
    {
      "epoch": 0.000362030029296875,
      "step": 59315,
      "training_step_time": 0.3958272933959961
    },
    {
      "epoch": 0.0003620361328125,
      "model_forward_time": 0.11571145057678223,
      "step": 59316
    },
    {
      "epoch": 0.0003620361328125,
      "step": 59316,
      "training_step_time": 0.40393972396850586
    },
    {
      "epoch": 0.000362042236328125,
      "model_forward_time": 0.1150977611541748,
      "step": 59317
    },
    {
      "epoch": 0.000362042236328125,
      "step": 59317,
      "training_step_time": 0.36654138565063477
    },
    {
      "epoch": 0.00036204833984375,
      "model_forward_time": 0.1156148910522461,
      "step": 59318
    },
    {
      "epoch": 0.00036204833984375,
      "step": 59318,
      "training_step_time": 0.40541553497314453
    },
    {
      "epoch": 0.000362054443359375,
      "model_forward_time": 0.1151432991027832,
      "step": 59319
    },
    {
      "epoch": 0.000362054443359375,
      "step": 59319,
      "training_step_time": 0.40524864196777344
    },
    {
      "epoch": 0.000362060546875,
      "grad_norm": 0.09100840240716934,
      "learning_rate": 3.511211834184014e-08,
      "loss": 0.0338,
      "step": 59320
    },
    {
      "epoch": 0.000362060546875,
      "model_forward_time": 0.11528539657592773,
      "step": 59320
    },
    {
      "epoch": 0.000362060546875,
      "step": 59320,
      "training_step_time": 0.6067144870758057
    },
    {
      "epoch": 0.000362066650390625,
      "model_forward_time": 0.1151423454284668,
      "step": 59321
    },
    {
      "epoch": 0.000362066650390625,
      "step": 59321,
      "training_step_time": 0.4948842525482178
    },
    {
      "epoch": 0.00036207275390625,
      "model_forward_time": 0.11438918113708496,
      "step": 59322
    },
    {
      "epoch": 0.00036207275390625,
      "step": 59322,
      "training_step_time": 0.4923837184906006
    },
    {
      "epoch": 0.000362078857421875,
      "model_forward_time": 0.11431455612182617,
      "step": 59323
    },
    {
      "epoch": 0.000362078857421875,
      "step": 59323,
      "training_step_time": 0.3961923122406006
    },
    {
      "epoch": 0.0003620849609375,
      "model_forward_time": 0.11423897743225098,
      "step": 59324
    },
    {
      "epoch": 0.0003620849609375,
      "step": 59324,
      "training_step_time": 0.3984544277191162
    },
    {
      "epoch": 0.000362091064453125,
      "model_forward_time": 0.11456108093261719,
      "step": 59325
    },
    {
      "epoch": 0.000362091064453125,
      "step": 59325,
      "training_step_time": 0.4506258964538574
    },
    {
      "epoch": 0.00036209716796875,
      "model_forward_time": 0.11515235900878906,
      "step": 59326
    },
    {
      "epoch": 0.00036209716796875,
      "step": 59326,
      "training_step_time": 0.48097991943359375
    },
    {
      "epoch": 0.000362103271484375,
      "model_forward_time": 0.1140890121459961,
      "step": 59327
    },
    {
      "epoch": 0.000362103271484375,
      "step": 59327,
      "training_step_time": 0.4194052219390869
    },
    {
      "epoch": 0.000362109375,
      "model_forward_time": 0.11509037017822266,
      "step": 59328
    },
    {
      "epoch": 0.000362109375,
      "step": 59328,
      "training_step_time": 0.3913297653198242
    },
    {
      "epoch": 0.000362115478515625,
      "model_forward_time": 0.11485600471496582,
      "step": 59329
    },
    {
      "epoch": 0.000362115478515625,
      "step": 59329,
      "training_step_time": 0.4058718681335449
    },
    {
      "epoch": 0.00036212158203125,
      "grad_norm": 0.09018909186124802,
      "learning_rate": 3.4087118923659125e-08,
      "loss": 0.0355,
      "step": 59330
    },
    {
      "epoch": 0.00036212158203125,
      "model_forward_time": 0.11552214622497559,
      "step": 59330
    },
    {
      "epoch": 0.00036212158203125,
      "step": 59330,
      "training_step_time": 0.4251747131347656
    },
    {
      "epoch": 0.000362127685546875,
      "model_forward_time": 0.11551880836486816,
      "step": 59331
    },
    {
      "epoch": 0.000362127685546875,
      "step": 59331,
      "training_step_time": 0.36614346504211426
    },
    {
      "epoch": 0.0003621337890625,
      "model_forward_time": 0.11630702018737793,
      "step": 59332
    },
    {
      "epoch": 0.0003621337890625,
      "step": 59332,
      "training_step_time": 0.5770204067230225
    },
    {
      "epoch": 0.000362139892578125,
      "model_forward_time": 0.11434149742126465,
      "step": 59333
    },
    {
      "epoch": 0.000362139892578125,
      "step": 59333,
      "training_step_time": 0.4547085762023926
    },
    {
      "epoch": 0.00036214599609375,
      "model_forward_time": 0.11595368385314941,
      "step": 59334
    },
    {
      "epoch": 0.00036214599609375,
      "step": 59334,
      "training_step_time": 0.41580891609191895
    },
    {
      "epoch": 0.000362152099609375,
      "model_forward_time": 0.11504411697387695,
      "step": 59335
    },
    {
      "epoch": 0.000362152099609375,
      "step": 59335,
      "training_step_time": 0.3995833396911621
    },
    {
      "epoch": 0.000362158203125,
      "model_forward_time": 0.11484003067016602,
      "step": 59336
    },
    {
      "epoch": 0.000362158203125,
      "step": 59336,
      "training_step_time": 0.4413759708404541
    },
    {
      "epoch": 0.000362164306640625,
      "model_forward_time": 0.11421060562133789,
      "step": 59337
    },
    {
      "epoch": 0.000362164306640625,
      "step": 59337,
      "training_step_time": 0.3958146572113037
    },
    {
      "epoch": 0.00036217041015625,
      "model_forward_time": 0.1149294376373291,
      "step": 59338
    },
    {
      "epoch": 0.00036217041015625,
      "step": 59338,
      "training_step_time": 0.48128724098205566
    },
    {
      "epoch": 0.000362176513671875,
      "model_forward_time": 0.11468219757080078,
      "step": 59339
    },
    {
      "epoch": 0.000362176513671875,
      "step": 59339,
      "training_step_time": 0.46401524543762207
    },
    {
      "epoch": 0.0003621826171875,
      "grad_norm": 0.07345244288444519,
      "learning_rate": 3.3077297830541584e-08,
      "loss": 0.033,
      "step": 59340
    },
    {
      "epoch": 0.0003621826171875,
      "model_forward_time": 0.11458396911621094,
      "step": 59340
    },
    {
      "epoch": 0.0003621826171875,
      "step": 59340,
      "training_step_time": 0.39271974563598633
    },
    {
      "epoch": 0.000362188720703125,
      "model_forward_time": 0.11465001106262207,
      "step": 59341
    },
    {
      "epoch": 0.000362188720703125,
      "step": 59341,
      "training_step_time": 0.41582822799682617
    },
    {
      "epoch": 0.00036219482421875,
      "model_forward_time": 0.11508917808532715,
      "step": 59342
    },
    {
      "epoch": 0.00036219482421875,
      "step": 59342,
      "training_step_time": 0.3903951644897461
    },
    {
      "epoch": 0.000362200927734375,
      "model_forward_time": 0.11487030982971191,
      "step": 59343
    },
    {
      "epoch": 0.000362200927734375,
      "step": 59343,
      "training_step_time": 0.3935110569000244
    },
    {
      "epoch": 0.00036220703125,
      "model_forward_time": 0.1160275936126709,
      "step": 59344
    },
    {
      "epoch": 0.00036220703125,
      "step": 59344,
      "training_step_time": 0.6808605194091797
    },
    {
      "epoch": 0.000362213134765625,
      "model_forward_time": 0.11591625213623047,
      "step": 59345
    },
    {
      "epoch": 0.000362213134765625,
      "step": 59345,
      "training_step_time": 0.3841683864593506
    },
    {
      "epoch": 0.00036221923828125,
      "model_forward_time": 0.11521291732788086,
      "step": 59346
    },
    {
      "epoch": 0.00036221923828125,
      "step": 59346,
      "training_step_time": 0.4048938751220703
    },
    {
      "epoch": 0.000362225341796875,
      "model_forward_time": 0.11508750915527344,
      "step": 59347
    },
    {
      "epoch": 0.000362225341796875,
      "step": 59347,
      "training_step_time": 0.4232165813446045
    },
    {
      "epoch": 0.0003622314453125,
      "model_forward_time": 0.11578679084777832,
      "step": 59348
    },
    {
      "epoch": 0.0003622314453125,
      "step": 59348,
      "training_step_time": 0.42248082160949707
    },
    {
      "epoch": 0.000362237548828125,
      "model_forward_time": 0.11483931541442871,
      "step": 59349
    },
    {
      "epoch": 0.000362237548828125,
      "step": 59349,
      "training_step_time": 0.39891958236694336
    },
    {
      "epoch": 0.00036224365234375,
      "grad_norm": 0.1317698210477829,
      "learning_rate": 3.20826553692366e-08,
      "loss": 0.0326,
      "step": 59350
    },
    {
      "epoch": 0.00036224365234375,
      "model_forward_time": 0.11630129814147949,
      "step": 59350
    },
    {
      "epoch": 0.00036224365234375,
      "step": 59350,
      "training_step_time": 0.5485107898712158
    },
    {
      "epoch": 0.000362249755859375,
      "model_forward_time": 0.11456608772277832,
      "step": 59351
    },
    {
      "epoch": 0.000362249755859375,
      "step": 59351,
      "training_step_time": 0.3990771770477295
    },
    {
      "epoch": 0.000362255859375,
      "model_forward_time": 0.11496806144714355,
      "step": 59352
    },
    {
      "epoch": 0.000362255859375,
      "step": 59352,
      "training_step_time": 0.3939385414123535
    },
    {
      "epoch": 0.000362261962890625,
      "model_forward_time": 0.11463618278503418,
      "step": 59353
    },
    {
      "epoch": 0.000362261962890625,
      "step": 59353,
      "training_step_time": 0.43145298957824707
    },
    {
      "epoch": 0.00036226806640625,
      "model_forward_time": 0.11459970474243164,
      "step": 59354
    },
    {
      "epoch": 0.00036226806640625,
      "step": 59354,
      "training_step_time": 0.5037980079650879
    },
    {
      "epoch": 0.000362274169921875,
      "model_forward_time": 0.11452484130859375,
      "step": 59355
    },
    {
      "epoch": 0.000362274169921875,
      "step": 59355,
      "training_step_time": 0.3875133991241455
    },
    {
      "epoch": 0.0003622802734375,
      "model_forward_time": 0.11494588851928711,
      "step": 59356
    },
    {
      "epoch": 0.0003622802734375,
      "step": 59356,
      "training_step_time": 0.6418812274932861
    },
    {
      "epoch": 0.000362286376953125,
      "model_forward_time": 0.11444425582885742,
      "step": 59357
    },
    {
      "epoch": 0.000362286376953125,
      "step": 59357,
      "training_step_time": 0.40111422538757324
    },
    {
      "epoch": 0.00036229248046875,
      "model_forward_time": 0.11443972587585449,
      "step": 59358
    },
    {
      "epoch": 0.00036229248046875,
      "step": 59358,
      "training_step_time": 0.38862085342407227
    },
    {
      "epoch": 0.000362298583984375,
      "model_forward_time": 0.1150662899017334,
      "step": 59359
    },
    {
      "epoch": 0.000362298583984375,
      "step": 59359,
      "training_step_time": 0.43300867080688477
    },
    {
      "epoch": 0.0003623046875,
      "grad_norm": 0.11675858497619629,
      "learning_rate": 3.110319184189692e-08,
      "loss": 0.0366,
      "step": 59360
    },
    {
      "epoch": 0.0003623046875,
      "model_forward_time": 0.11451268196105957,
      "step": 59360
    },
    {
      "epoch": 0.0003623046875,
      "step": 59360,
      "training_step_time": 0.5054700374603271
    },
    {
      "epoch": 0.000362310791015625,
      "model_forward_time": 0.11491131782531738,
      "step": 59361
    },
    {
      "epoch": 0.000362310791015625,
      "step": 59361,
      "training_step_time": 0.49961066246032715
    },
    {
      "epoch": 0.00036231689453125,
      "model_forward_time": 0.11456751823425293,
      "step": 59362
    },
    {
      "epoch": 0.00036231689453125,
      "step": 59362,
      "training_step_time": 0.4298574924468994
    },
    {
      "epoch": 0.000362322998046875,
      "model_forward_time": 0.1144099235534668,
      "step": 59363
    },
    {
      "epoch": 0.000362322998046875,
      "step": 59363,
      "training_step_time": 0.48465752601623535
    },
    {
      "epoch": 0.0003623291015625,
      "model_forward_time": 0.11395621299743652,
      "step": 59364
    },
    {
      "epoch": 0.0003623291015625,
      "step": 59364,
      "training_step_time": 0.3886220455169678
    },
    {
      "epoch": 0.000362335205078125,
      "model_forward_time": 0.11515283584594727,
      "step": 59365
    },
    {
      "epoch": 0.000362335205078125,
      "step": 59365,
      "training_step_time": 0.38913822174072266
    },
    {
      "epoch": 0.00036234130859375,
      "model_forward_time": 0.11483216285705566,
      "step": 59366
    },
    {
      "epoch": 0.00036234130859375,
      "step": 59366,
      "training_step_time": 0.3870975971221924
    },
    {
      "epoch": 0.000362347412109375,
      "model_forward_time": 0.11517071723937988,
      "step": 59367
    },
    {
      "epoch": 0.000362347412109375,
      "step": 59367,
      "training_step_time": 0.41390371322631836
    },
    {
      "epoch": 0.000362353515625,
      "model_forward_time": 0.11537528038024902,
      "step": 59368
    },
    {
      "epoch": 0.000362353515625,
      "step": 59368,
      "training_step_time": 0.48256826400756836
    },
    {
      "epoch": 0.000362359619140625,
      "model_forward_time": 0.11535954475402832,
      "step": 59369
    },
    {
      "epoch": 0.000362359619140625,
      "step": 59369,
      "training_step_time": 0.408740758895874
    },
    {
      "epoch": 0.00036236572265625,
      "grad_norm": 0.08070679754018784,
      "learning_rate": 3.01389075460512e-08,
      "loss": 0.037,
      "step": 59370
    },
    {
      "epoch": 0.00036236572265625,
      "model_forward_time": 0.11535072326660156,
      "step": 59370
    },
    {
      "epoch": 0.00036236572265625,
      "step": 59370,
      "training_step_time": 0.39818572998046875
    },
    {
      "epoch": 0.000362371826171875,
      "model_forward_time": 0.11491894721984863,
      "step": 59371
    },
    {
      "epoch": 0.000362371826171875,
      "step": 59371,
      "training_step_time": 0.396578311920166
    },
    {
      "epoch": 0.0003623779296875,
      "model_forward_time": 0.11475157737731934,
      "step": 59372
    },
    {
      "epoch": 0.0003623779296875,
      "step": 59372,
      "training_step_time": 0.3902733325958252
    },
    {
      "epoch": 0.000362384033203125,
      "model_forward_time": 0.11490488052368164,
      "step": 59373
    },
    {
      "epoch": 0.000362384033203125,
      "step": 59373,
      "training_step_time": 0.36652493476867676
    },
    {
      "epoch": 0.00036239013671875,
      "model_forward_time": 0.11509323120117188,
      "step": 59374
    },
    {
      "epoch": 0.00036239013671875,
      "step": 59374,
      "training_step_time": 0.4973263740539551
    },
    {
      "epoch": 0.000362396240234375,
      "model_forward_time": 0.11484265327453613,
      "step": 59375
    },
    {
      "epoch": 0.000362396240234375,
      "step": 59375,
      "training_step_time": 0.4992637634277344
    },
    {
      "epoch": 0.00036240234375,
      "model_forward_time": 0.11552238464355469,
      "step": 59376
    },
    {
      "epoch": 0.00036240234375,
      "step": 59376,
      "training_step_time": 0.4016096591949463
    },
    {
      "epoch": 0.000362408447265625,
      "model_forward_time": 0.11471152305603027,
      "step": 59377
    },
    {
      "epoch": 0.000362408447265625,
      "step": 59377,
      "training_step_time": 0.4742701053619385
    },
    {
      "epoch": 0.00036241455078125,
      "model_forward_time": 0.11445379257202148,
      "step": 59378
    },
    {
      "epoch": 0.00036241455078125,
      "step": 59378,
      "training_step_time": 0.3933522701263428
    },
    {
      "epoch": 0.000362420654296875,
      "model_forward_time": 0.11483502388000488,
      "step": 59379
    },
    {
      "epoch": 0.000362420654296875,
      "step": 59379,
      "training_step_time": 0.3875606060028076
    },
    {
      "epoch": 0.0003624267578125,
      "grad_norm": 0.07575277239084244,
      "learning_rate": 2.9189802774631792e-08,
      "loss": 0.0336,
      "step": 59380
    },
    {
      "epoch": 0.0003624267578125,
      "model_forward_time": 0.11517906188964844,
      "step": 59380
    },
    {
      "epoch": 0.0003624267578125,
      "step": 59380,
      "training_step_time": 0.439899206161499
    },
    {
      "epoch": 0.000362432861328125,
      "model_forward_time": 0.11493635177612305,
      "step": 59381
    },
    {
      "epoch": 0.000362432861328125,
      "step": 59381,
      "training_step_time": 0.4216032028198242
    },
    {
      "epoch": 0.00036243896484375,
      "model_forward_time": 0.11498093605041504,
      "step": 59382
    },
    {
      "epoch": 0.00036243896484375,
      "step": 59382,
      "training_step_time": 0.4848475456237793
    },
    {
      "epoch": 0.000362445068359375,
      "model_forward_time": 0.11552286148071289,
      "step": 59383
    },
    {
      "epoch": 0.000362445068359375,
      "step": 59383,
      "training_step_time": 0.39565134048461914
    },
    {
      "epoch": 0.000362451171875,
      "model_forward_time": 0.11495280265808105,
      "step": 59384
    },
    {
      "epoch": 0.000362451171875,
      "step": 59384,
      "training_step_time": 0.38343381881713867
    },
    {
      "epoch": 0.000362457275390625,
      "model_forward_time": 0.11598849296569824,
      "step": 59385
    },
    {
      "epoch": 0.000362457275390625,
      "step": 59385,
      "training_step_time": 0.39568662643432617
    },
    {
      "epoch": 0.00036246337890625,
      "model_forward_time": 0.11498546600341797,
      "step": 59386
    },
    {
      "epoch": 0.00036246337890625,
      "step": 59386,
      "training_step_time": 0.40149521827697754
    },
    {
      "epoch": 0.000362469482421875,
      "model_forward_time": 0.11558771133422852,
      "step": 59387
    },
    {
      "epoch": 0.000362469482421875,
      "step": 59387,
      "training_step_time": 0.419513463973999
    },
    {
      "epoch": 0.0003624755859375,
      "model_forward_time": 0.11459875106811523,
      "step": 59388
    },
    {
      "epoch": 0.0003624755859375,
      "step": 59388,
      "training_step_time": 0.4697425365447998
    },
    {
      "epoch": 0.000362481689453125,
      "model_forward_time": 0.11499357223510742,
      "step": 59389
    },
    {
      "epoch": 0.000362481689453125,
      "step": 59389,
      "training_step_time": 0.49421024322509766
    },
    {
      "epoch": 0.00036248779296875,
      "grad_norm": 0.09622447192668915,
      "learning_rate": 2.8255877815946963e-08,
      "loss": 0.038,
      "step": 59390
    },
    {
      "epoch": 0.00036248779296875,
      "model_forward_time": 0.11503338813781738,
      "step": 59390
    },
    {
      "epoch": 0.00036248779296875,
      "step": 59390,
      "training_step_time": 0.3960843086242676
    },
    {
      "epoch": 0.000362493896484375,
      "model_forward_time": 0.11522960662841797,
      "step": 59391
    },
    {
      "epoch": 0.000362493896484375,
      "step": 59391,
      "training_step_time": 0.41887593269348145
    },
    {
      "epoch": 0.0003625,
      "model_forward_time": 0.11477231979370117,
      "step": 59392
    },
    {
      "epoch": 0.0003625,
      "step": 59392,
      "training_step_time": 0.5252861976623535
    },
    {
      "epoch": 0.000362506103515625,
      "model_forward_time": 0.11490559577941895,
      "step": 59393
    },
    {
      "epoch": 0.000362506103515625,
      "step": 59393,
      "training_step_time": 0.3960726261138916
    },
    {
      "epoch": 0.00036251220703125,
      "model_forward_time": 0.11555647850036621,
      "step": 59394
    },
    {
      "epoch": 0.00036251220703125,
      "step": 59394,
      "training_step_time": 0.3910231590270996
    },
    {
      "epoch": 0.000362518310546875,
      "model_forward_time": 0.1156151294708252,
      "step": 59395
    },
    {
      "epoch": 0.000362518310546875,
      "step": 59395,
      "training_step_time": 0.45755648612976074
    },
    {
      "epoch": 0.0003625244140625,
      "model_forward_time": 0.11512422561645508,
      "step": 59396
    },
    {
      "epoch": 0.0003625244140625,
      "step": 59396,
      "training_step_time": 0.49805164337158203
    },
    {
      "epoch": 0.000362530517578125,
      "model_forward_time": 0.11426639556884766,
      "step": 59397
    },
    {
      "epoch": 0.000362530517578125,
      "step": 59397,
      "training_step_time": 0.41954755783081055
    },
    {
      "epoch": 0.00036253662109375,
      "model_forward_time": 0.11529803276062012,
      "step": 59398
    },
    {
      "epoch": 0.00036253662109375,
      "step": 59398,
      "training_step_time": 0.5540156364440918
    },
    {
      "epoch": 0.000362542724609375,
      "model_forward_time": 0.11466360092163086,
      "step": 59399
    },
    {
      "epoch": 0.000362542724609375,
      "step": 59399,
      "training_step_time": 0.39254260063171387
    },
    {
      "epoch": 0.000362548828125,
      "grad_norm": 0.10826050490140915,
      "learning_rate": 2.7337132953697554e-08,
      "loss": 0.0344,
      "step": 59400
    },
    {
      "epoch": 0.000362548828125,
      "model_forward_time": 0.11416387557983398,
      "step": 59400
    },
    {
      "epoch": 0.000362548828125,
      "step": 59400,
      "training_step_time": 0.3902244567871094
    },
    {
      "epoch": 0.000362554931640625,
      "model_forward_time": 0.11472868919372559,
      "step": 59401
    },
    {
      "epoch": 0.000362554931640625,
      "step": 59401,
      "training_step_time": 0.3937509059906006
    },
    {
      "epoch": 0.00036256103515625,
      "model_forward_time": 0.1160879135131836,
      "step": 59402
    },
    {
      "epoch": 0.00036256103515625,
      "step": 59402,
      "training_step_time": 0.40854716300964355
    },
    {
      "epoch": 0.000362567138671875,
      "model_forward_time": 0.11437487602233887,
      "step": 59403
    },
    {
      "epoch": 0.000362567138671875,
      "step": 59403,
      "training_step_time": 0.4510021209716797
    },
    {
      "epoch": 0.0003625732421875,
      "model_forward_time": 0.11502480506896973,
      "step": 59404
    },
    {
      "epoch": 0.0003625732421875,
      "step": 59404,
      "training_step_time": 0.6192677021026611
    },
    {
      "epoch": 0.000362579345703125,
      "model_forward_time": 0.11482596397399902,
      "step": 59405
    },
    {
      "epoch": 0.000362579345703125,
      "step": 59405,
      "training_step_time": 0.4805748462677002
    },
    {
      "epoch": 0.00036258544921875,
      "model_forward_time": 0.11523222923278809,
      "step": 59406
    },
    {
      "epoch": 0.00036258544921875,
      "step": 59406,
      "training_step_time": 0.3903682231903076
    },
    {
      "epoch": 0.000362591552734375,
      "model_forward_time": 0.1147005558013916,
      "step": 59407
    },
    {
      "epoch": 0.000362591552734375,
      "step": 59407,
      "training_step_time": 0.3926863670349121
    },
    {
      "epoch": 0.00036259765625,
      "model_forward_time": 0.11441922187805176,
      "step": 59408
    },
    {
      "epoch": 0.00036259765625,
      "step": 59408,
      "training_step_time": 0.40160608291625977
    },
    {
      "epoch": 0.000362603759765625,
      "model_forward_time": 0.11501669883728027,
      "step": 59409
    },
    {
      "epoch": 0.000362603759765625,
      "step": 59409,
      "training_step_time": 0.4560580253601074
    },
    {
      "epoch": 0.00036260986328125,
      "grad_norm": 0.08488702774047852,
      "learning_rate": 2.6433568466976978e-08,
      "loss": 0.0391,
      "step": 59410
    },
    {
      "epoch": 0.00036260986328125,
      "model_forward_time": 0.11539673805236816,
      "step": 59410
    },
    {
      "epoch": 0.00036260986328125,
      "step": 59410,
      "training_step_time": 0.5962929725646973
    },
    {
      "epoch": 0.000362615966796875,
      "model_forward_time": 0.1143651008605957,
      "step": 59411
    },
    {
      "epoch": 0.000362615966796875,
      "step": 59411,
      "training_step_time": 0.39402151107788086
    },
    {
      "epoch": 0.0003626220703125,
      "model_forward_time": 0.11505460739135742,
      "step": 59412
    },
    {
      "epoch": 0.0003626220703125,
      "step": 59412,
      "training_step_time": 0.4043998718261719
    },
    {
      "epoch": 0.000362628173828125,
      "model_forward_time": 0.11474204063415527,
      "step": 59413
    },
    {
      "epoch": 0.000362628173828125,
      "step": 59413,
      "training_step_time": 0.3943769931793213
    },
    {
      "epoch": 0.00036263427734375,
      "model_forward_time": 0.11527395248413086,
      "step": 59414
    },
    {
      "epoch": 0.00036263427734375,
      "step": 59414,
      "training_step_time": 0.4055938720703125
    },
    {
      "epoch": 0.000362640380859375,
      "model_forward_time": 0.11408782005310059,
      "step": 59415
    },
    {
      "epoch": 0.000362640380859375,
      "step": 59415,
      "training_step_time": 0.3896963596343994
    },
    {
      "epoch": 0.000362646484375,
      "model_forward_time": 0.11563873291015625,
      "step": 59416
    },
    {
      "epoch": 0.000362646484375,
      "step": 59416,
      "training_step_time": 0.6262743473052979
    },
    {
      "epoch": 0.000362652587890625,
      "model_forward_time": 0.11447834968566895,
      "step": 59417
    },
    {
      "epoch": 0.000362652587890625,
      "step": 59417,
      "training_step_time": 0.45456910133361816
    },
    {
      "epoch": 0.00036265869140625,
      "model_forward_time": 0.11494779586791992,
      "step": 59418
    },
    {
      "epoch": 0.00036265869140625,
      "step": 59418,
      "training_step_time": 0.40114760398864746
    },
    {
      "epoch": 0.000362664794921875,
      "model_forward_time": 0.11523175239562988,
      "step": 59419
    },
    {
      "epoch": 0.000362664794921875,
      "step": 59419,
      "training_step_time": 0.46803808212280273
    },
    {
      "epoch": 0.0003626708984375,
      "grad_norm": 0.09593652933835983,
      "learning_rate": 2.5545184630265672e-08,
      "loss": 0.0326,
      "step": 59420
    },
    {
      "epoch": 0.0003626708984375,
      "model_forward_time": 0.11439776420593262,
      "step": 59420
    },
    {
      "epoch": 0.0003626708984375,
      "step": 59420,
      "training_step_time": 0.39670276641845703
    },
    {
      "epoch": 0.000362677001953125,
      "model_forward_time": 0.11492514610290527,
      "step": 59421
    },
    {
      "epoch": 0.000362677001953125,
      "step": 59421,
      "training_step_time": 0.3868985176086426
    },
    {
      "epoch": 0.00036268310546875,
      "model_forward_time": 0.11515307426452637,
      "step": 59422
    },
    {
      "epoch": 0.00036268310546875,
      "step": 59422,
      "training_step_time": 0.5046103000640869
    },
    {
      "epoch": 0.000362689208984375,
      "model_forward_time": 0.11489200592041016,
      "step": 59423
    },
    {
      "epoch": 0.000362689208984375,
      "step": 59423,
      "training_step_time": 0.40163421630859375
    },
    {
      "epoch": 0.0003626953125,
      "model_forward_time": 0.11504149436950684,
      "step": 59424
    },
    {
      "epoch": 0.0003626953125,
      "step": 59424,
      "training_step_time": 0.41799330711364746
    },
    {
      "epoch": 0.000362701416015625,
      "model_forward_time": 0.1153097152709961,
      "step": 59425
    },
    {
      "epoch": 0.000362701416015625,
      "step": 59425,
      "training_step_time": 0.38889479637145996
    },
    {
      "epoch": 0.00036270751953125,
      "model_forward_time": 0.11534953117370605,
      "step": 59426
    },
    {
      "epoch": 0.00036270751953125,
      "step": 59426,
      "training_step_time": 0.3917222023010254
    },
    {
      "epoch": 0.000362713623046875,
      "model_forward_time": 0.1149451732635498,
      "step": 59427
    },
    {
      "epoch": 0.000362713623046875,
      "step": 59427,
      "training_step_time": 0.39908480644226074
    },
    {
      "epoch": 0.0003627197265625,
      "model_forward_time": 0.11531853675842285,
      "step": 59428
    },
    {
      "epoch": 0.0003627197265625,
      "step": 59428,
      "training_step_time": 0.5523436069488525
    },
    {
      "epoch": 0.000362725830078125,
      "model_forward_time": 0.11462712287902832,
      "step": 59429
    },
    {
      "epoch": 0.000362725830078125,
      "step": 59429,
      "training_step_time": 0.39289236068725586
    },
    {
      "epoch": 0.00036273193359375,
      "grad_norm": 0.12567231059074402,
      "learning_rate": 2.467198171342e-08,
      "loss": 0.0339,
      "step": 59430
    },
    {
      "epoch": 0.00036273193359375,
      "model_forward_time": 0.11488628387451172,
      "step": 59430
    },
    {
      "epoch": 0.00036273193359375,
      "step": 59430,
      "training_step_time": 0.3901956081390381
    },
    {
      "epoch": 0.000362738037109375,
      "model_forward_time": 0.11483597755432129,
      "step": 59431
    },
    {
      "epoch": 0.000362738037109375,
      "step": 59431,
      "training_step_time": 0.4491753578186035
    },
    {
      "epoch": 0.000362744140625,
      "model_forward_time": 0.11547517776489258,
      "step": 59432
    },
    {
      "epoch": 0.000362744140625,
      "step": 59432,
      "training_step_time": 0.4965693950653076
    },
    {
      "epoch": 0.000362750244140625,
      "model_forward_time": 0.11475753784179688,
      "step": 59433
    },
    {
      "epoch": 0.000362750244140625,
      "step": 59433,
      "training_step_time": 0.5166738033294678
    },
    {
      "epoch": 0.00036275634765625,
      "model_forward_time": 0.11456847190856934,
      "step": 59434
    },
    {
      "epoch": 0.00036275634765625,
      "step": 59434,
      "training_step_time": 0.39429259300231934
    },
    {
      "epoch": 0.000362762451171875,
      "model_forward_time": 0.11443209648132324,
      "step": 59435
    },
    {
      "epoch": 0.000362762451171875,
      "step": 59435,
      "training_step_time": 0.4507791996002197
    },
    {
      "epoch": 0.0003627685546875,
      "model_forward_time": 0.11456871032714844,
      "step": 59436
    },
    {
      "epoch": 0.0003627685546875,
      "step": 59436,
      "training_step_time": 0.417935848236084
    },
    {
      "epoch": 0.000362774658203125,
      "model_forward_time": 0.11626315116882324,
      "step": 59437
    },
    {
      "epoch": 0.000362774658203125,
      "step": 59437,
      "training_step_time": 0.48446226119995117
    },
    {
      "epoch": 0.00036278076171875,
      "model_forward_time": 0.11457633972167969,
      "step": 59438
    },
    {
      "epoch": 0.00036278076171875,
      "step": 59438,
      "training_step_time": 0.3820343017578125
    },
    {
      "epoch": 0.000362786865234375,
      "model_forward_time": 0.11531543731689453,
      "step": 59439
    },
    {
      "epoch": 0.000362786865234375,
      "step": 59439,
      "training_step_time": 0.3898475170135498
    },
    {
      "epoch": 0.00036279296875,
      "grad_norm": 0.07220903784036636,
      "learning_rate": 2.3813959981711097e-08,
      "loss": 0.0377,
      "step": 59440
    },
    {
      "epoch": 0.00036279296875,
      "model_forward_time": 0.11533188819885254,
      "step": 59440
    },
    {
      "epoch": 0.00036279296875,
      "step": 59440,
      "training_step_time": 0.4947381019592285
    },
    {
      "epoch": 0.000362799072265625,
      "model_forward_time": 0.115325927734375,
      "step": 59441
    },
    {
      "epoch": 0.000362799072265625,
      "step": 59441,
      "training_step_time": 0.38739705085754395
    },
    {
      "epoch": 0.00036280517578125,
      "model_forward_time": 0.11530160903930664,
      "step": 59442
    },
    {
      "epoch": 0.00036280517578125,
      "step": 59442,
      "training_step_time": 0.393481969833374
    },
    {
      "epoch": 0.000362811279296875,
      "model_forward_time": 0.11478114128112793,
      "step": 59443
    },
    {
      "epoch": 0.000362811279296875,
      "step": 59443,
      "training_step_time": 0.4055664539337158
    },
    {
      "epoch": 0.0003628173828125,
      "model_forward_time": 0.11494779586791992,
      "step": 59444
    },
    {
      "epoch": 0.0003628173828125,
      "step": 59444,
      "training_step_time": 0.39072275161743164
    },
    {
      "epoch": 0.000362823486328125,
      "model_forward_time": 0.11525321006774902,
      "step": 59445
    },
    {
      "epoch": 0.000362823486328125,
      "step": 59445,
      "training_step_time": 0.40799808502197266
    },
    {
      "epoch": 0.00036282958984375,
      "model_forward_time": 0.11558747291564941,
      "step": 59446
    },
    {
      "epoch": 0.00036282958984375,
      "step": 59446,
      "training_step_time": 0.5849294662475586
    },
    {
      "epoch": 0.000362835693359375,
      "model_forward_time": 0.1151590347290039,
      "step": 59447
    },
    {
      "epoch": 0.000362835693359375,
      "step": 59447,
      "training_step_time": 0.4401569366455078
    },
    {
      "epoch": 0.000362841796875,
      "model_forward_time": 0.11428570747375488,
      "step": 59448
    },
    {
      "epoch": 0.000362841796875,
      "step": 59448,
      "training_step_time": 0.47258663177490234
    },
    {
      "epoch": 0.000362847900390625,
      "model_forward_time": 0.11469793319702148,
      "step": 59449
    },
    {
      "epoch": 0.000362847900390625,
      "step": 59449,
      "training_step_time": 0.40052223205566406
    },
    {
      "epoch": 0.00036285400390625,
      "grad_norm": 0.12021230161190033,
      "learning_rate": 2.2971119695774925e-08,
      "loss": 0.0346,
      "step": 59450
    },
    {
      "epoch": 0.00036285400390625,
      "model_forward_time": 0.11455202102661133,
      "step": 59450
    },
    {
      "epoch": 0.00036285400390625,
      "step": 59450,
      "training_step_time": 0.3850560188293457
    },
    {
      "epoch": 0.000362860107421875,
      "model_forward_time": 0.1148383617401123,
      "step": 59451
    },
    {
      "epoch": 0.000362860107421875,
      "step": 59451,
      "training_step_time": 0.44794416427612305
    },
    {
      "epoch": 0.0003628662109375,
      "model_forward_time": 0.11498713493347168,
      "step": 59452
    },
    {
      "epoch": 0.0003628662109375,
      "step": 59452,
      "training_step_time": 0.3898625373840332
    },
    {
      "epoch": 0.000362872314453125,
      "model_forward_time": 0.11525130271911621,
      "step": 59453
    },
    {
      "epoch": 0.000362872314453125,
      "step": 59453,
      "training_step_time": 0.39124011993408203
    },
    {
      "epoch": 0.00036287841796875,
      "model_forward_time": 0.11555957794189453,
      "step": 59454
    },
    {
      "epoch": 0.00036287841796875,
      "step": 59454,
      "training_step_time": 0.4099748134613037
    },
    {
      "epoch": 0.000362884521484375,
      "model_forward_time": 0.1155252456665039,
      "step": 59455
    },
    {
      "epoch": 0.000362884521484375,
      "step": 59455,
      "training_step_time": 0.4010467529296875
    },
    {
      "epoch": 0.000362890625,
      "model_forward_time": 0.11566567420959473,
      "step": 59456
    },
    {
      "epoch": 0.000362890625,
      "step": 59456,
      "training_step_time": 0.3945953845977783
    },
    {
      "epoch": 0.000362896728515625,
      "model_forward_time": 0.11528921127319336,
      "step": 59457
    },
    {
      "epoch": 0.000362896728515625,
      "step": 59457,
      "training_step_time": 0.39070892333984375
    },
    {
      "epoch": 0.00036290283203125,
      "model_forward_time": 0.11537361145019531,
      "step": 59458
    },
    {
      "epoch": 0.00036290283203125,
      "step": 59458,
      "training_step_time": 0.6352112293243408
    },
    {
      "epoch": 0.000362908935546875,
      "model_forward_time": 0.11492252349853516,
      "step": 59459
    },
    {
      "epoch": 0.000362908935546875,
      "step": 59459,
      "training_step_time": 0.36997389793395996
    },
    {
      "epoch": 0.0003629150390625,
      "grad_norm": 0.11413741856813431,
      "learning_rate": 2.214346111164556e-08,
      "loss": 0.0359,
      "step": 59460
    },
    {
      "epoch": 0.0003629150390625,
      "model_forward_time": 0.11483120918273926,
      "step": 59460
    },
    {
      "epoch": 0.0003629150390625,
      "step": 59460,
      "training_step_time": 0.4794750213623047
    },
    {
      "epoch": 0.000362921142578125,
      "model_forward_time": 0.11519074440002441,
      "step": 59461
    },
    {
      "epoch": 0.000362921142578125,
      "step": 59461,
      "training_step_time": 0.485990047454834
    },
    {
      "epoch": 0.00036292724609375,
      "model_forward_time": 0.11458182334899902,
      "step": 59462
    },
    {
      "epoch": 0.00036292724609375,
      "step": 59462,
      "training_step_time": 0.44872593879699707
    },
    {
      "epoch": 0.000362933349609375,
      "model_forward_time": 0.11409378051757812,
      "step": 59463
    },
    {
      "epoch": 0.000362933349609375,
      "step": 59463,
      "training_step_time": 0.3831808567047119
    },
    {
      "epoch": 0.000362939453125,
      "model_forward_time": 0.11517810821533203,
      "step": 59464
    },
    {
      "epoch": 0.000362939453125,
      "step": 59464,
      "training_step_time": 0.42053675651550293
    },
    {
      "epoch": 0.000362945556640625,
      "model_forward_time": 0.11502218246459961,
      "step": 59465
    },
    {
      "epoch": 0.000362945556640625,
      "step": 59465,
      "training_step_time": 0.5035052299499512
    },
    {
      "epoch": 0.00036295166015625,
      "model_forward_time": 0.11452078819274902,
      "step": 59466
    },
    {
      "epoch": 0.00036295166015625,
      "step": 59466,
      "training_step_time": 0.3986508846282959
    },
    {
      "epoch": 0.000362957763671875,
      "model_forward_time": 0.11493635177612305,
      "step": 59467
    },
    {
      "epoch": 0.000362957763671875,
      "step": 59467,
      "training_step_time": 0.40099453926086426
    },
    {
      "epoch": 0.0003629638671875,
      "model_forward_time": 0.11502575874328613,
      "step": 59468
    },
    {
      "epoch": 0.0003629638671875,
      "step": 59468,
      "training_step_time": 0.38431501388549805
    },
    {
      "epoch": 0.000362969970703125,
      "model_forward_time": 0.11525964736938477,
      "step": 59469
    },
    {
      "epoch": 0.000362969970703125,
      "step": 59469,
      "training_step_time": 0.3944857120513916
    },
    {
      "epoch": 0.00036297607421875,
      "grad_norm": 0.07765862345695496,
      "learning_rate": 2.133098448074411e-08,
      "loss": 0.0328,
      "step": 59470
    },
    {
      "epoch": 0.00036297607421875,
      "model_forward_time": 0.1162872314453125,
      "step": 59470
    },
    {
      "epoch": 0.00036297607421875,
      "step": 59470,
      "training_step_time": 0.4542238712310791
    },
    {
      "epoch": 0.000362982177734375,
      "model_forward_time": 0.11482691764831543,
      "step": 59471
    },
    {
      "epoch": 0.000362982177734375,
      "step": 59471,
      "training_step_time": 0.43640685081481934
    },
    {
      "epoch": 0.00036298828125,
      "model_forward_time": 0.11486649513244629,
      "step": 59472
    },
    {
      "epoch": 0.00036298828125,
      "step": 59472,
      "training_step_time": 0.4101130962371826
    },
    {
      "epoch": 0.000362994384765625,
      "model_forward_time": 0.11553549766540527,
      "step": 59473
    },
    {
      "epoch": 0.000362994384765625,
      "step": 59473,
      "training_step_time": 0.42070555686950684
    },
    {
      "epoch": 0.00036300048828125,
      "model_forward_time": 0.11577701568603516,
      "step": 59474
    },
    {
      "epoch": 0.00036300048828125,
      "step": 59474,
      "training_step_time": 0.5207056999206543
    },
    {
      "epoch": 0.000363006591796875,
      "model_forward_time": 0.115020751953125,
      "step": 59475
    },
    {
      "epoch": 0.000363006591796875,
      "step": 59475,
      "training_step_time": 0.48937249183654785
    },
    {
      "epoch": 0.0003630126953125,
      "model_forward_time": 0.11570072174072266,
      "step": 59476
    },
    {
      "epoch": 0.0003630126953125,
      "step": 59476,
      "training_step_time": 0.4765160083770752
    },
    {
      "epoch": 0.000363018798828125,
      "model_forward_time": 0.11438107490539551,
      "step": 59477
    },
    {
      "epoch": 0.000363018798828125,
      "step": 59477,
      "training_step_time": 0.4045255184173584
    },
    {
      "epoch": 0.00036302490234375,
      "model_forward_time": 0.11462068557739258,
      "step": 59478
    },
    {
      "epoch": 0.00036302490234375,
      "step": 59478,
      "training_step_time": 0.40035247802734375
    },
    {
      "epoch": 0.000363031005859375,
      "model_forward_time": 0.11464667320251465,
      "step": 59479
    },
    {
      "epoch": 0.000363031005859375,
      "step": 59479,
      "training_step_time": 0.5233948230743408
    },
    {
      "epoch": 0.000363037109375,
      "grad_norm": 0.09127894043922424,
      "learning_rate": 2.0533690049878707e-08,
      "loss": 0.0361,
      "step": 59480
    },
    {
      "epoch": 0.000363037109375,
      "model_forward_time": 0.11454463005065918,
      "step": 59480
    },
    {
      "epoch": 0.000363037109375,
      "step": 59480,
      "training_step_time": 0.3894233703613281
    },
    {
      "epoch": 0.000363043212890625,
      "model_forward_time": 0.11449980735778809,
      "step": 59481
    },
    {
      "epoch": 0.000363043212890625,
      "step": 59481,
      "training_step_time": 0.39336490631103516
    },
    {
      "epoch": 0.00036304931640625,
      "model_forward_time": 0.11500787734985352,
      "step": 59482
    },
    {
      "epoch": 0.00036304931640625,
      "step": 59482,
      "training_step_time": 0.3915243148803711
    },
    {
      "epoch": 0.000363055419921875,
      "model_forward_time": 0.11485934257507324,
      "step": 59483
    },
    {
      "epoch": 0.000363055419921875,
      "step": 59483,
      "training_step_time": 0.40164923667907715
    },
    {
      "epoch": 0.0003630615234375,
      "model_forward_time": 0.11551094055175781,
      "step": 59484
    },
    {
      "epoch": 0.0003630615234375,
      "step": 59484,
      "training_step_time": 0.3938484191894531
    },
    {
      "epoch": 0.000363067626953125,
      "model_forward_time": 0.11505317687988281,
      "step": 59485
    },
    {
      "epoch": 0.000363067626953125,
      "step": 59485,
      "training_step_time": 0.3925786018371582
    },
    {
      "epoch": 0.00036307373046875,
      "model_forward_time": 0.11464905738830566,
      "step": 59486
    },
    {
      "epoch": 0.00036307373046875,
      "step": 59486,
      "training_step_time": 0.3842902183532715
    },
    {
      "epoch": 0.000363079833984375,
      "model_forward_time": 0.11650657653808594,
      "step": 59487
    },
    {
      "epoch": 0.000363079833984375,
      "step": 59487,
      "training_step_time": 0.39893579483032227
    },
    {
      "epoch": 0.0003630859375,
      "model_forward_time": 0.1151583194732666,
      "step": 59488
    },
    {
      "epoch": 0.0003630859375,
      "step": 59488,
      "training_step_time": 0.5170810222625732
    },
    {
      "epoch": 0.000363092041015625,
      "model_forward_time": 0.1151118278503418,
      "step": 59489
    },
    {
      "epoch": 0.000363092041015625,
      "step": 59489,
      "training_step_time": 0.49741053581237793
    },
    {
      "epoch": 0.00036309814453125,
      "grad_norm": 0.08572088181972504,
      "learning_rate": 1.9751578061244504e-08,
      "loss": 0.0379,
      "step": 59490
    },
    {
      "epoch": 0.00036309814453125,
      "model_forward_time": 0.11564278602600098,
      "step": 59490
    },
    {
      "epoch": 0.00036309814453125,
      "step": 59490,
      "training_step_time": 0.4457855224609375
    },
    {
      "epoch": 0.000363104248046875,
      "model_forward_time": 0.11465930938720703,
      "step": 59491
    },
    {
      "epoch": 0.000363104248046875,
      "step": 59491,
      "training_step_time": 0.46506190299987793
    },
    {
      "epoch": 0.0003631103515625,
      "model_forward_time": 0.11550283432006836,
      "step": 59492
    },
    {
      "epoch": 0.0003631103515625,
      "step": 59492,
      "training_step_time": 0.4788053035736084
    },
    {
      "epoch": 0.000363116455078125,
      "model_forward_time": 0.11437106132507324,
      "step": 59493
    },
    {
      "epoch": 0.000363116455078125,
      "step": 59493,
      "training_step_time": 0.3813145160675049
    },
    {
      "epoch": 0.00036312255859375,
      "model_forward_time": 0.11509227752685547,
      "step": 59494
    },
    {
      "epoch": 0.00036312255859375,
      "step": 59494,
      "training_step_time": 0.3977847099304199
    },
    {
      "epoch": 0.000363128662109375,
      "model_forward_time": 0.1149454116821289,
      "step": 59495
    },
    {
      "epoch": 0.000363128662109375,
      "step": 59495,
      "training_step_time": 0.37729883193969727
    },
    {
      "epoch": 0.000363134765625,
      "model_forward_time": 0.11539864540100098,
      "step": 59496
    },
    {
      "epoch": 0.000363134765625,
      "step": 59496,
      "training_step_time": 0.39514780044555664
    },
    {
      "epoch": 0.000363140869140625,
      "model_forward_time": 0.11541104316711426,
      "step": 59497
    },
    {
      "epoch": 0.000363140869140625,
      "step": 59497,
      "training_step_time": 0.39560842514038086
    },
    {
      "epoch": 0.00036314697265625,
      "model_forward_time": 0.11503291130065918,
      "step": 59498
    },
    {
      "epoch": 0.00036314697265625,
      "step": 59498,
      "training_step_time": 0.38949012756347656
    },
    {
      "epoch": 0.000363153076171875,
      "model_forward_time": 0.11552882194519043,
      "step": 59499
    },
    {
      "epoch": 0.000363153076171875,
      "step": 59499,
      "training_step_time": 0.3962886333465576
    },
    {
      "epoch": 0.0003631591796875,
      "grad_norm": 0.09474994242191315,
      "learning_rate": 1.8984648752429225e-08,
      "loss": 0.038,
      "step": 59500
    },
    {
      "epoch": 0.0003631591796875,
      "model_forward_time": 0.1152806282043457,
      "step": 59500
    },
    {
      "epoch": 0.0003631591796875,
      "step": 59500,
      "training_step_time": 0.484722375869751
    },
    {
      "epoch": 0.000363165283203125,
      "model_forward_time": 0.11547708511352539,
      "step": 59501
    },
    {
      "epoch": 0.000363165283203125,
      "step": 59501,
      "training_step_time": 0.4003176689147949
    },
    {
      "epoch": 0.00036317138671875,
      "model_forward_time": 0.11505651473999023,
      "step": 59502
    },
    {
      "epoch": 0.00036317138671875,
      "step": 59502,
      "training_step_time": 0.4349813461303711
    },
    {
      "epoch": 0.000363177490234375,
      "model_forward_time": 0.11485624313354492,
      "step": 59503
    },
    {
      "epoch": 0.000363177490234375,
      "step": 59503,
      "training_step_time": 0.42338085174560547
    },
    {
      "epoch": 0.00036318359375,
      "model_forward_time": 0.1151742935180664,
      "step": 59504
    },
    {
      "epoch": 0.00036318359375,
      "step": 59504,
      "training_step_time": 0.42484045028686523
    },
    {
      "epoch": 0.000363189697265625,
      "model_forward_time": 0.11540675163269043,
      "step": 59505
    },
    {
      "epoch": 0.000363189697265625,
      "step": 59505,
      "training_step_time": 0.4561173915863037
    },
    {
      "epoch": 0.00036319580078125,
      "model_forward_time": 0.11498188972473145,
      "step": 59506
    },
    {
      "epoch": 0.00036319580078125,
      "step": 59506,
      "training_step_time": 0.6031725406646729
    },
    {
      "epoch": 0.000363201904296875,
      "model_forward_time": 0.11425662040710449,
      "step": 59507
    },
    {
      "epoch": 0.000363201904296875,
      "step": 59507,
      "training_step_time": 0.3894214630126953
    },
    {
      "epoch": 0.0003632080078125,
      "model_forward_time": 0.1144862174987793,
      "step": 59508
    },
    {
      "epoch": 0.0003632080078125,
      "step": 59508,
      "training_step_time": 0.43175315856933594
    },
    {
      "epoch": 0.000363214111328125,
      "model_forward_time": 0.11460399627685547,
      "step": 59509
    },
    {
      "epoch": 0.000363214111328125,
      "step": 59509,
      "training_step_time": 0.40114474296569824
    },
    {
      "epoch": 0.00036322021484375,
      "grad_norm": 0.07119841128587723,
      "learning_rate": 1.8232902356407622e-08,
      "loss": 0.0372,
      "step": 59510
    },
    {
      "epoch": 0.00036322021484375,
      "model_forward_time": 0.11496114730834961,
      "step": 59510
    },
    {
      "epoch": 0.00036322021484375,
      "step": 59510,
      "training_step_time": 0.3858678340911865
    },
    {
      "epoch": 0.000363226318359375,
      "model_forward_time": 0.11463809013366699,
      "step": 59511
    },
    {
      "epoch": 0.000363226318359375,
      "step": 59511,
      "training_step_time": 0.3919188976287842
    },
    {
      "epoch": 0.000363232421875,
      "model_forward_time": 0.11507439613342285,
      "step": 59512
    },
    {
      "epoch": 0.000363232421875,
      "step": 59512,
      "training_step_time": 0.5299420356750488
    },
    {
      "epoch": 0.000363238525390625,
      "model_forward_time": 0.11485719680786133,
      "step": 59513
    },
    {
      "epoch": 0.000363238525390625,
      "step": 59513,
      "training_step_time": 0.4018526077270508
    },
    {
      "epoch": 0.00036324462890625,
      "model_forward_time": 0.11489582061767578,
      "step": 59514
    },
    {
      "epoch": 0.00036324462890625,
      "step": 59514,
      "training_step_time": 0.40416836738586426
    },
    {
      "epoch": 0.000363250732421875,
      "model_forward_time": 0.11472225189208984,
      "step": 59515
    },
    {
      "epoch": 0.000363250732421875,
      "step": 59515,
      "training_step_time": 0.4336390495300293
    },
    {
      "epoch": 0.0003632568359375,
      "model_forward_time": 0.11501574516296387,
      "step": 59516
    },
    {
      "epoch": 0.0003632568359375,
      "step": 59516,
      "training_step_time": 0.41210222244262695
    },
    {
      "epoch": 0.000363262939453125,
      "model_forward_time": 0.1150977611541748,
      "step": 59517
    },
    {
      "epoch": 0.000363262939453125,
      "step": 59517,
      "training_step_time": 0.3701801300048828
    },
    {
      "epoch": 0.00036326904296875,
      "model_forward_time": 0.1149437427520752,
      "step": 59518
    },
    {
      "epoch": 0.00036326904296875,
      "step": 59518,
      "training_step_time": 0.5912201404571533
    },
    {
      "epoch": 0.000363275146484375,
      "model_forward_time": 0.11433076858520508,
      "step": 59519
    },
    {
      "epoch": 0.000363275146484375,
      "step": 59519,
      "training_step_time": 0.46805834770202637
    },
    {
      "epoch": 0.00036328125,
      "grad_norm": 0.09553023427724838,
      "learning_rate": 1.749633910153592e-08,
      "loss": 0.0361,
      "step": 59520
    },
    {
      "epoch": 0.00036328125,
      "model_forward_time": 0.11431550979614258,
      "step": 59520
    },
    {
      "epoch": 0.00036328125,
      "step": 59520,
      "training_step_time": 0.46240901947021484
    },
    {
      "epoch": 0.000363287353515625,
      "model_forward_time": 0.11452293395996094,
      "step": 59521
    },
    {
      "epoch": 0.000363287353515625,
      "step": 59521,
      "training_step_time": 0.4421238899230957
    },
    {
      "epoch": 0.00036329345703125,
      "model_forward_time": 0.11429643630981445,
      "step": 59522
    },
    {
      "epoch": 0.00036329345703125,
      "step": 59522,
      "training_step_time": 0.409679651260376
    },
    {
      "epoch": 0.000363299560546875,
      "model_forward_time": 0.11436748504638672,
      "step": 59523
    },
    {
      "epoch": 0.000363299560546875,
      "step": 59523,
      "training_step_time": 0.3841252326965332
    },
    {
      "epoch": 0.0003633056640625,
      "model_forward_time": 0.11525511741638184,
      "step": 59524
    },
    {
      "epoch": 0.0003633056640625,
      "step": 59524,
      "training_step_time": 0.3949289321899414
    },
    {
      "epoch": 0.000363311767578125,
      "model_forward_time": 0.11555218696594238,
      "step": 59525
    },
    {
      "epoch": 0.000363311767578125,
      "step": 59525,
      "training_step_time": 0.3852684497833252
    },
    {
      "epoch": 0.00036331787109375,
      "model_forward_time": 0.11544060707092285,
      "step": 59526
    },
    {
      "epoch": 0.00036331787109375,
      "step": 59526,
      "training_step_time": 0.39341235160827637
    },
    {
      "epoch": 0.000363323974609375,
      "model_forward_time": 0.1150209903717041,
      "step": 59527
    },
    {
      "epoch": 0.000363323974609375,
      "step": 59527,
      "training_step_time": 0.3969459533691406
    },
    {
      "epoch": 0.000363330078125,
      "model_forward_time": 0.11499762535095215,
      "step": 59528
    },
    {
      "epoch": 0.000363330078125,
      "step": 59528,
      "training_step_time": 0.41222596168518066
    },
    {
      "epoch": 0.000363336181640625,
      "model_forward_time": 0.11519861221313477,
      "step": 59529
    },
    {
      "epoch": 0.000363336181640625,
      "step": 59529,
      "training_step_time": 0.42798471450805664
    },
    {
      "epoch": 0.00036334228515625,
      "grad_norm": 0.07746852189302444,
      "learning_rate": 1.6774959211568465e-08,
      "loss": 0.0414,
      "step": 59530
    },
    {
      "epoch": 0.00036334228515625,
      "model_forward_time": 0.11519384384155273,
      "step": 59530
    },
    {
      "epoch": 0.00036334228515625,
      "step": 59530,
      "training_step_time": 0.5729105472564697
    },
    {
      "epoch": 0.000363348388671875,
      "model_forward_time": 0.11473393440246582,
      "step": 59531
    },
    {
      "epoch": 0.000363348388671875,
      "step": 59531,
      "training_step_time": 0.439772367477417
    },
    {
      "epoch": 0.0003633544921875,
      "model_forward_time": 0.11553478240966797,
      "step": 59532
    },
    {
      "epoch": 0.0003633544921875,
      "step": 59532,
      "training_step_time": 0.46092677116394043
    },
    {
      "epoch": 0.000363360595703125,
      "model_forward_time": 0.11565852165222168,
      "step": 59533
    },
    {
      "epoch": 0.000363360595703125,
      "step": 59533,
      "training_step_time": 0.3936958312988281
    },
    {
      "epoch": 0.00036336669921875,
      "model_forward_time": 0.1151132583618164,
      "step": 59534
    },
    {
      "epoch": 0.00036336669921875,
      "step": 59534,
      "training_step_time": 0.4673280715942383
    },
    {
      "epoch": 0.000363372802734375,
      "model_forward_time": 0.1143190860748291,
      "step": 59535
    },
    {
      "epoch": 0.000363372802734375,
      "step": 59535,
      "training_step_time": 0.47057676315307617
    },
    {
      "epoch": 0.00036337890625,
      "model_forward_time": 0.1152200698852539,
      "step": 59536
    },
    {
      "epoch": 0.00036337890625,
      "step": 59536,
      "training_step_time": 0.38146376609802246
    },
    {
      "epoch": 0.000363385009765625,
      "model_forward_time": 0.11411499977111816,
      "step": 59537
    },
    {
      "epoch": 0.000363385009765625,
      "step": 59537,
      "training_step_time": 0.3868553638458252
    },
    {
      "epoch": 0.00036339111328125,
      "model_forward_time": 0.11522793769836426,
      "step": 59538
    },
    {
      "epoch": 0.00036339111328125,
      "step": 59538,
      "training_step_time": 0.3940906524658203
    },
    {
      "epoch": 0.000363397216796875,
      "model_forward_time": 0.11588597297668457,
      "step": 59539
    },
    {
      "epoch": 0.000363397216796875,
      "step": 59539,
      "training_step_time": 0.3901541233062744
    },
    {
      "epoch": 0.0003634033203125,
      "grad_norm": 0.0913148894906044,
      "learning_rate": 1.6068762905635527e-08,
      "loss": 0.0349,
      "step": 59540
    },
    {
      "epoch": 0.0003634033203125,
      "model_forward_time": 0.11488866806030273,
      "step": 59540
    },
    {
      "epoch": 0.0003634033203125,
      "step": 59540,
      "training_step_time": 0.3871004581451416
    },
    {
      "epoch": 0.000363409423828125,
      "model_forward_time": 0.11543083190917969,
      "step": 59541
    },
    {
      "epoch": 0.000363409423828125,
      "step": 59541,
      "training_step_time": 0.38240575790405273
    },
    {
      "epoch": 0.00036341552734375,
      "model_forward_time": 0.11496233940124512,
      "step": 59542
    },
    {
      "epoch": 0.00036341552734375,
      "step": 59542,
      "training_step_time": 0.47162342071533203
    },
    {
      "epoch": 0.000363421630859375,
      "model_forward_time": 0.11548805236816406,
      "step": 59543
    },
    {
      "epoch": 0.000363421630859375,
      "step": 59543,
      "training_step_time": 0.3858180046081543
    },
    {
      "epoch": 0.000363427734375,
      "model_forward_time": 0.11613798141479492,
      "step": 59544
    },
    {
      "epoch": 0.000363427734375,
      "step": 59544,
      "training_step_time": 0.41060495376586914
    },
    {
      "epoch": 0.000363433837890625,
      "model_forward_time": 0.1152808666229248,
      "step": 59545
    },
    {
      "epoch": 0.000363433837890625,
      "step": 59545,
      "training_step_time": 0.3926215171813965
    },
    {
      "epoch": 0.00036343994140625,
      "model_forward_time": 0.11537599563598633,
      "step": 59546
    },
    {
      "epoch": 0.00036343994140625,
      "step": 59546,
      "training_step_time": 0.41981053352355957
    },
    {
      "epoch": 0.000363446044921875,
      "model_forward_time": 0.11537837982177734,
      "step": 59547
    },
    {
      "epoch": 0.000363446044921875,
      "step": 59547,
      "training_step_time": 0.5053541660308838
    },
    {
      "epoch": 0.0003634521484375,
      "model_forward_time": 0.11491942405700684,
      "step": 59548
    },
    {
      "epoch": 0.0003634521484375,
      "step": 59548,
      "training_step_time": 0.5062878131866455
    },
    {
      "epoch": 0.000363458251953125,
      "model_forward_time": 0.1149132251739502,
      "step": 59549
    },
    {
      "epoch": 0.000363458251953125,
      "step": 59549,
      "training_step_time": 0.4027848243713379
    },
    {
      "epoch": 0.00036346435546875,
      "grad_norm": 0.0858369991183281,
      "learning_rate": 1.5377750398265502e-08,
      "loss": 0.0359,
      "step": 59550
    },
    {
      "epoch": 0.00036346435546875,
      "model_forward_time": 0.11525535583496094,
      "step": 59550
    },
    {
      "epoch": 0.00036346435546875,
      "step": 59550,
      "training_step_time": 0.4970378875732422
    },
    {
      "epoch": 0.000363470458984375,
      "model_forward_time": 0.11441659927368164,
      "step": 59551
    },
    {
      "epoch": 0.000363470458984375,
      "step": 59551,
      "training_step_time": 0.39755892753601074
    },
    {
      "epoch": 0.0003634765625,
      "model_forward_time": 0.11458921432495117,
      "step": 59552
    },
    {
      "epoch": 0.0003634765625,
      "step": 59552,
      "training_step_time": 0.38160276412963867
    },
    {
      "epoch": 0.000363482666015625,
      "model_forward_time": 0.11408090591430664,
      "step": 59553
    },
    {
      "epoch": 0.000363482666015625,
      "step": 59553,
      "training_step_time": 0.392681360244751
    },
    {
      "epoch": 0.00036348876953125,
      "model_forward_time": 0.11494731903076172,
      "step": 59554
    },
    {
      "epoch": 0.00036348876953125,
      "step": 59554,
      "training_step_time": 0.4692258834838867
    },
    {
      "epoch": 0.000363494873046875,
      "model_forward_time": 0.11440682411193848,
      "step": 59555
    },
    {
      "epoch": 0.000363494873046875,
      "step": 59555,
      "training_step_time": 0.42329835891723633
    },
    {
      "epoch": 0.0003635009765625,
      "model_forward_time": 0.11602377891540527,
      "step": 59556
    },
    {
      "epoch": 0.0003635009765625,
      "step": 59556,
      "training_step_time": 0.404510498046875
    },
    {
      "epoch": 0.000363507080078125,
      "model_forward_time": 0.11527037620544434,
      "step": 59557
    },
    {
      "epoch": 0.000363507080078125,
      "step": 59557,
      "training_step_time": 0.3916921615600586
    },
    {
      "epoch": 0.00036351318359375,
      "model_forward_time": 0.11520957946777344,
      "step": 59558
    },
    {
      "epoch": 0.00036351318359375,
      "step": 59558,
      "training_step_time": 0.3843531608581543
    },
    {
      "epoch": 0.000363519287109375,
      "model_forward_time": 0.11483383178710938,
      "step": 59559
    },
    {
      "epoch": 0.000363519287109375,
      "step": 59559,
      "training_step_time": 0.38579463958740234
    },
    {
      "epoch": 0.000363525390625,
      "grad_norm": 0.16516074538230896,
      "learning_rate": 1.4701921899362703e-08,
      "loss": 0.0385,
      "step": 59560
    },
    {
      "epoch": 0.000363525390625,
      "model_forward_time": 0.11481833457946777,
      "step": 59560
    },
    {
      "epoch": 0.000363525390625,
      "step": 59560,
      "training_step_time": 0.3948187828063965
    },
    {
      "epoch": 0.000363531494140625,
      "model_forward_time": 0.11481809616088867,
      "step": 59561
    },
    {
      "epoch": 0.000363531494140625,
      "step": 59561,
      "training_step_time": 0.403139591217041
    },
    {
      "epoch": 0.00036353759765625,
      "model_forward_time": 0.11526346206665039,
      "step": 59562
    },
    {
      "epoch": 0.00036353759765625,
      "step": 59562,
      "training_step_time": 0.4736063480377197
    },
    {
      "epoch": 0.000363543701171875,
      "model_forward_time": 0.11514663696289062,
      "step": 59563
    },
    {
      "epoch": 0.000363543701171875,
      "step": 59563,
      "training_step_time": 0.477553129196167
    },
    {
      "epoch": 0.0003635498046875,
      "model_forward_time": 0.11476349830627441,
      "step": 59564
    },
    {
      "epoch": 0.0003635498046875,
      "step": 59564,
      "training_step_time": 0.42824578285217285
    },
    {
      "epoch": 0.000363555908203125,
      "model_forward_time": 0.11443090438842773,
      "step": 59565
    },
    {
      "epoch": 0.000363555908203125,
      "step": 59565,
      "training_step_time": 0.3973217010498047
    },
    {
      "epoch": 0.00036356201171875,
      "model_forward_time": 0.11513352394104004,
      "step": 59566
    },
    {
      "epoch": 0.00036356201171875,
      "step": 59566,
      "training_step_time": 0.3862423896789551
    },
    {
      "epoch": 0.000363568115234375,
      "model_forward_time": 0.11499404907226562,
      "step": 59567
    },
    {
      "epoch": 0.000363568115234375,
      "step": 59567,
      "training_step_time": 0.4076194763183594
    },
    {
      "epoch": 0.00036357421875,
      "model_forward_time": 0.1144559383392334,
      "step": 59568
    },
    {
      "epoch": 0.00036357421875,
      "step": 59568,
      "training_step_time": 0.4002072811126709
    },
    {
      "epoch": 0.000363580322265625,
      "model_forward_time": 0.11499857902526855,
      "step": 59569
    },
    {
      "epoch": 0.000363580322265625,
      "step": 59569,
      "training_step_time": 0.4469268321990967
    },
    {
      "epoch": 0.00036358642578125,
      "grad_norm": 0.10572318732738495,
      "learning_rate": 1.4041277614235127e-08,
      "loss": 0.0385,
      "step": 59570
    },
    {
      "epoch": 0.00036358642578125,
      "model_forward_time": 0.1147458553314209,
      "step": 59570
    },
    {
      "epoch": 0.00036358642578125,
      "step": 59570,
      "training_step_time": 0.4335794448852539
    },
    {
      "epoch": 0.000363592529296875,
      "model_forward_time": 0.11501336097717285,
      "step": 59571
    },
    {
      "epoch": 0.000363592529296875,
      "step": 59571,
      "training_step_time": 0.3945126533508301
    },
    {
      "epoch": 0.0003635986328125,
      "model_forward_time": 0.11491155624389648,
      "step": 59572
    },
    {
      "epoch": 0.0003635986328125,
      "step": 59572,
      "training_step_time": 0.43885159492492676
    },
    {
      "epoch": 0.000363604736328125,
      "model_forward_time": 0.11532163619995117,
      "step": 59573
    },
    {
      "epoch": 0.000363604736328125,
      "step": 59573,
      "training_step_time": 0.39720964431762695
    },
    {
      "epoch": 0.00036361083984375,
      "model_forward_time": 0.11483645439147949,
      "step": 59574
    },
    {
      "epoch": 0.00036361083984375,
      "step": 59574,
      "training_step_time": 0.3887777328491211
    },
    {
      "epoch": 0.000363616943359375,
      "model_forward_time": 0.11430811882019043,
      "step": 59575
    },
    {
      "epoch": 0.000363616943359375,
      "step": 59575,
      "training_step_time": 0.5194807052612305
    },
    {
      "epoch": 0.000363623046875,
      "model_forward_time": 0.11576986312866211,
      "step": 59576
    },
    {
      "epoch": 0.000363623046875,
      "step": 59576,
      "training_step_time": 0.47978925704956055
    },
    {
      "epoch": 0.000363629150390625,
      "model_forward_time": 0.11440801620483398,
      "step": 59577
    },
    {
      "epoch": 0.000363629150390625,
      "step": 59577,
      "training_step_time": 0.49951910972595215
    },
    {
      "epoch": 0.00036363525390625,
      "model_forward_time": 0.11534261703491211,
      "step": 59578
    },
    {
      "epoch": 0.00036363525390625,
      "step": 59578,
      "training_step_time": 0.4677090644836426
    },
    {
      "epoch": 0.000363641357421875,
      "model_forward_time": 0.11471676826477051,
      "step": 59579
    },
    {
      "epoch": 0.000363641357421875,
      "step": 59579,
      "training_step_time": 0.3874220848083496
    },
    {
      "epoch": 0.0003636474609375,
      "grad_norm": 0.07330934703350067,
      "learning_rate": 1.3395817743561134e-08,
      "loss": 0.0329,
      "step": 59580
    },
    {
      "epoch": 0.0003636474609375,
      "model_forward_time": 0.11429882049560547,
      "step": 59580
    },
    {
      "epoch": 0.0003636474609375,
      "step": 59580,
      "training_step_time": 0.38710999488830566
    },
    {
      "epoch": 0.000363653564453125,
      "model_forward_time": 0.11400842666625977,
      "step": 59581
    },
    {
      "epoch": 0.000363653564453125,
      "step": 59581,
      "training_step_time": 0.40274548530578613
    },
    {
      "epoch": 0.00036365966796875,
      "model_forward_time": 0.11488127708435059,
      "step": 59582
    },
    {
      "epoch": 0.00036365966796875,
      "step": 59582,
      "training_step_time": 0.406940221786499
    },
    {
      "epoch": 0.000363665771484375,
      "model_forward_time": 0.11447787284851074,
      "step": 59583
    },
    {
      "epoch": 0.000363665771484375,
      "step": 59583,
      "training_step_time": 0.4058976173400879
    },
    {
      "epoch": 0.000363671875,
      "model_forward_time": 0.11472511291503906,
      "step": 59584
    },
    {
      "epoch": 0.000363671875,
      "step": 59584,
      "training_step_time": 0.44694089889526367
    },
    {
      "epoch": 0.000363677978515625,
      "model_forward_time": 0.1149897575378418,
      "step": 59585
    },
    {
      "epoch": 0.000363677978515625,
      "step": 59585,
      "training_step_time": 0.38612890243530273
    },
    {
      "epoch": 0.00036368408203125,
      "model_forward_time": 0.11481976509094238,
      "step": 59586
    },
    {
      "epoch": 0.00036368408203125,
      "step": 59586,
      "training_step_time": 0.40087461471557617
    },
    {
      "epoch": 0.000363690185546875,
      "model_forward_time": 0.11496710777282715,
      "step": 59587
    },
    {
      "epoch": 0.000363690185546875,
      "step": 59587,
      "training_step_time": 0.399794340133667
    },
    {
      "epoch": 0.0003636962890625,
      "model_forward_time": 0.11524629592895508,
      "step": 59588
    },
    {
      "epoch": 0.0003636962890625,
      "step": 59588,
      "training_step_time": 0.3939070701599121
    },
    {
      "epoch": 0.000363702392578125,
      "model_forward_time": 0.11555671691894531,
      "step": 59589
    },
    {
      "epoch": 0.000363702392578125,
      "step": 59589,
      "training_step_time": 0.3682708740234375
    },
    {
      "epoch": 0.00036370849609375,
      "grad_norm": 0.09660891443490982,
      "learning_rate": 1.2765542483417214e-08,
      "loss": 0.0396,
      "step": 59590
    },
    {
      "epoch": 0.00036370849609375,
      "model_forward_time": 0.11456751823425293,
      "step": 59590
    },
    {
      "epoch": 0.00036370849609375,
      "step": 59590,
      "training_step_time": 0.6556544303894043
    },
    {
      "epoch": 0.000363714599609375,
      "model_forward_time": 0.1150670051574707,
      "step": 59591
    },
    {
      "epoch": 0.000363714599609375,
      "step": 59591,
      "training_step_time": 0.4708442687988281
    },
    {
      "epoch": 0.000363720703125,
      "model_forward_time": 0.11536693572998047,
      "step": 59592
    },
    {
      "epoch": 0.000363720703125,
      "step": 59592,
      "training_step_time": 0.45418620109558105
    },
    {
      "epoch": 0.000363726806640625,
      "model_forward_time": 0.11408019065856934,
      "step": 59593
    },
    {
      "epoch": 0.000363726806640625,
      "step": 59593,
      "training_step_time": 0.3906264305114746
    },
    {
      "epoch": 0.00036373291015625,
      "model_forward_time": 0.11473608016967773,
      "step": 59594
    },
    {
      "epoch": 0.00036373291015625,
      "step": 59594,
      "training_step_time": 0.38355231285095215
    },
    {
      "epoch": 0.000363739013671875,
      "model_forward_time": 0.1159965991973877,
      "step": 59595
    },
    {
      "epoch": 0.000363739013671875,
      "step": 59595,
      "training_step_time": 0.4199240207672119
    },
    {
      "epoch": 0.0003637451171875,
      "model_forward_time": 0.11495018005371094,
      "step": 59596
    },
    {
      "epoch": 0.0003637451171875,
      "step": 59596,
      "training_step_time": 0.42009997367858887
    },
    {
      "epoch": 0.000363751220703125,
      "model_forward_time": 0.11465883255004883,
      "step": 59597
    },
    {
      "epoch": 0.000363751220703125,
      "step": 59597,
      "training_step_time": 0.4000508785247803
    },
    {
      "epoch": 0.00036375732421875,
      "model_forward_time": 0.11529135704040527,
      "step": 59598
    },
    {
      "epoch": 0.00036375732421875,
      "step": 59598,
      "training_step_time": 0.39427995681762695
    },
    {
      "epoch": 0.000363763427734375,
      "model_forward_time": 0.11517810821533203,
      "step": 59599
    },
    {
      "epoch": 0.000363763427734375,
      "step": 59599,
      "training_step_time": 0.39838099479675293
    },
    {
      "epoch": 0.00036376953125,
      "grad_norm": 0.08158218115568161,
      "learning_rate": 1.215045202527243e-08,
      "loss": 0.0389,
      "step": 59600
    },
    {
      "epoch": 0.00036376953125,
      "model_forward_time": 0.11493849754333496,
      "step": 59600
    },
    {
      "epoch": 0.00036376953125,
      "step": 59600,
      "training_step_time": 0.42499804496765137
    },
    {
      "epoch": 0.000363775634765625,
      "model_forward_time": 0.11473798751831055,
      "step": 59601
    },
    {
      "epoch": 0.000363775634765625,
      "step": 59601,
      "training_step_time": 0.3949544429779053
    },
    {
      "epoch": 0.00036378173828125,
      "model_forward_time": 0.11486577987670898,
      "step": 59602
    },
    {
      "epoch": 0.00036378173828125,
      "step": 59602,
      "training_step_time": 0.5784704685211182
    },
    {
      "epoch": 0.000363787841796875,
      "model_forward_time": 0.11487555503845215,
      "step": 59603
    },
    {
      "epoch": 0.000363787841796875,
      "step": 59603,
      "training_step_time": 0.44270920753479004
    },
    {
      "epoch": 0.0003637939453125,
      "model_forward_time": 0.11502695083618164,
      "step": 59604
    },
    {
      "epoch": 0.0003637939453125,
      "step": 59604,
      "training_step_time": 0.5017588138580322
    },
    {
      "epoch": 0.000363800048828125,
      "model_forward_time": 0.11530590057373047,
      "step": 59605
    },
    {
      "epoch": 0.000363800048828125,
      "step": 59605,
      "training_step_time": 0.4018397331237793
    },
    {
      "epoch": 0.00036380615234375,
      "model_forward_time": 0.11438655853271484,
      "step": 59606
    },
    {
      "epoch": 0.00036380615234375,
      "step": 59606,
      "training_step_time": 0.5185871124267578
    },
    {
      "epoch": 0.000363812255859375,
      "model_forward_time": 0.11396193504333496,
      "step": 59607
    },
    {
      "epoch": 0.000363812255859375,
      "step": 59607,
      "training_step_time": 0.3836507797241211
    },
    {
      "epoch": 0.000363818359375,
      "model_forward_time": 0.11436057090759277,
      "step": 59608
    },
    {
      "epoch": 0.000363818359375,
      "step": 59608,
      "training_step_time": 0.38674402236938477
    },
    {
      "epoch": 0.000363824462890625,
      "model_forward_time": 0.11518001556396484,
      "step": 59609
    },
    {
      "epoch": 0.000363824462890625,
      "step": 59609,
      "training_step_time": 0.4054841995239258
    },
    {
      "epoch": 0.00036383056640625,
      "grad_norm": 0.07443755120038986,
      "learning_rate": 1.1550546555960662e-08,
      "loss": 0.0405,
      "step": 59610
    },
    {
      "epoch": 0.00036383056640625,
      "model_forward_time": 0.11475396156311035,
      "step": 59610
    },
    {
      "epoch": 0.00036383056640625,
      "step": 59610,
      "training_step_time": 0.3954124450683594
    },
    {
      "epoch": 0.000363836669921875,
      "model_forward_time": 0.11499285697937012,
      "step": 59611
    },
    {
      "epoch": 0.000363836669921875,
      "step": 59611,
      "training_step_time": 0.3938412666320801
    },
    {
      "epoch": 0.0003638427734375,
      "model_forward_time": 0.1145775318145752,
      "step": 59612
    },
    {
      "epoch": 0.0003638427734375,
      "step": 59612,
      "training_step_time": 0.38616275787353516
    },
    {
      "epoch": 0.000363848876953125,
      "model_forward_time": 0.11541891098022461,
      "step": 59613
    },
    {
      "epoch": 0.000363848876953125,
      "step": 59613,
      "training_step_time": 0.3913249969482422
    },
    {
      "epoch": 0.00036385498046875,
      "model_forward_time": 0.11513328552246094,
      "step": 59614
    },
    {
      "epoch": 0.00036385498046875,
      "step": 59614,
      "training_step_time": 0.5785109996795654
    },
    {
      "epoch": 0.000363861083984375,
      "model_forward_time": 0.11541152000427246,
      "step": 59615
    },
    {
      "epoch": 0.000363861083984375,
      "step": 59615,
      "training_step_time": 0.3899879455566406
    },
    {
      "epoch": 0.0003638671875,
      "model_forward_time": 0.11522197723388672,
      "step": 59616
    },
    {
      "epoch": 0.0003638671875,
      "step": 59616,
      "training_step_time": 0.39533305168151855
    },
    {
      "epoch": 0.000363873291015625,
      "model_forward_time": 0.11551380157470703,
      "step": 59617
    },
    {
      "epoch": 0.000363873291015625,
      "step": 59617,
      "training_step_time": 0.39191532135009766
    },
    {
      "epoch": 0.00036387939453125,
      "model_forward_time": 0.11562585830688477,
      "step": 59618
    },
    {
      "epoch": 0.00036387939453125,
      "step": 59618,
      "training_step_time": 0.42441439628601074
    },
    {
      "epoch": 0.000363885498046875,
      "model_forward_time": 0.11475634574890137,
      "step": 59619
    },
    {
      "epoch": 0.000363885498046875,
      "step": 59619,
      "training_step_time": 0.4630277156829834
    },
    {
      "epoch": 0.0003638916015625,
      "grad_norm": 0.09415633976459503,
      "learning_rate": 1.096582625772502e-08,
      "loss": 0.0327,
      "step": 59620
    },
    {
      "epoch": 0.0003638916015625,
      "model_forward_time": 0.11564874649047852,
      "step": 59620
    },
    {
      "epoch": 0.0003638916015625,
      "step": 59620,
      "training_step_time": 0.5561671257019043
    },
    {
      "epoch": 0.000363897705078125,
      "model_forward_time": 0.11477851867675781,
      "step": 59621
    },
    {
      "epoch": 0.000363897705078125,
      "step": 59621,
      "training_step_time": 0.3840949535369873
    },
    {
      "epoch": 0.00036390380859375,
      "model_forward_time": 0.11468648910522461,
      "step": 59622
    },
    {
      "epoch": 0.00036390380859375,
      "step": 59622,
      "training_step_time": 0.40755534172058105
    },
    {
      "epoch": 0.000363909912109375,
      "model_forward_time": 0.11588621139526367,
      "step": 59623
    },
    {
      "epoch": 0.000363909912109375,
      "step": 59623,
      "training_step_time": 0.4068410396575928
    },
    {
      "epoch": 0.000363916015625,
      "model_forward_time": 0.11484622955322266,
      "step": 59624
    },
    {
      "epoch": 0.000363916015625,
      "step": 59624,
      "training_step_time": 0.38463878631591797
    },
    {
      "epoch": 0.000363922119140625,
      "model_forward_time": 0.11541223526000977,
      "step": 59625
    },
    {
      "epoch": 0.000363922119140625,
      "step": 59625,
      "training_step_time": 0.38717150688171387
    },
    {
      "epoch": 0.00036392822265625,
      "model_forward_time": 0.11519217491149902,
      "step": 59626
    },
    {
      "epoch": 0.00036392822265625,
      "step": 59626,
      "training_step_time": 0.5216374397277832
    },
    {
      "epoch": 0.000363934326171875,
      "model_forward_time": 0.1146695613861084,
      "step": 59627
    },
    {
      "epoch": 0.000363934326171875,
      "step": 59627,
      "training_step_time": 0.3901557922363281
    },
    {
      "epoch": 0.0003639404296875,
      "model_forward_time": 0.11498498916625977,
      "step": 59628
    },
    {
      "epoch": 0.0003639404296875,
      "step": 59628,
      "training_step_time": 0.39578771591186523
    },
    {
      "epoch": 0.000363946533203125,
      "model_forward_time": 0.1157538890838623,
      "step": 59629
    },
    {
      "epoch": 0.000363946533203125,
      "step": 59629,
      "training_step_time": 0.3959205150604248
    },
    {
      "epoch": 0.00036395263671875,
      "grad_norm": 0.09312769025564194,
      "learning_rate": 1.0396291308190087e-08,
      "loss": 0.0347,
      "step": 59630
    },
    {
      "epoch": 0.00036395263671875,
      "model_forward_time": 0.11565613746643066,
      "step": 59630
    },
    {
      "epoch": 0.00036395263671875,
      "step": 59630,
      "training_step_time": 0.3888223171234131
    },
    {
      "epoch": 0.000363958740234375,
      "model_forward_time": 0.11500263214111328,
      "step": 59631
    },
    {
      "epoch": 0.000363958740234375,
      "step": 59631,
      "training_step_time": 0.38669586181640625
    },
    {
      "epoch": 0.00036396484375,
      "model_forward_time": 0.1149747371673584,
      "step": 59632
    },
    {
      "epoch": 0.00036396484375,
      "step": 59632,
      "training_step_time": 0.6159865856170654
    },
    {
      "epoch": 0.000363970947265625,
      "model_forward_time": 0.1149454116821289,
      "step": 59633
    },
    {
      "epoch": 0.000363970947265625,
      "step": 59633,
      "training_step_time": 0.44519996643066406
    },
    {
      "epoch": 0.00036397705078125,
      "model_forward_time": 0.11445832252502441,
      "step": 59634
    },
    {
      "epoch": 0.00036397705078125,
      "step": 59634,
      "training_step_time": 0.504981279373169
    },
    {
      "epoch": 0.000363983154296875,
      "model_forward_time": 0.11456680297851562,
      "step": 59635
    },
    {
      "epoch": 0.000363983154296875,
      "step": 59635,
      "training_step_time": 0.4450056552886963
    },
    {
      "epoch": 0.0003639892578125,
      "model_forward_time": 0.11461257934570312,
      "step": 59636
    },
    {
      "epoch": 0.0003639892578125,
      "step": 59636,
      "training_step_time": 0.4038517475128174
    },
    {
      "epoch": 0.000363995361328125,
      "model_forward_time": 0.11465835571289062,
      "step": 59637
    },
    {
      "epoch": 0.000363995361328125,
      "step": 59637,
      "training_step_time": 0.38834595680236816
    },
    {
      "epoch": 0.00036400146484375,
      "model_forward_time": 0.11464285850524902,
      "step": 59638
    },
    {
      "epoch": 0.00036400146484375,
      "step": 59638,
      "training_step_time": 0.38941502571105957
    },
    {
      "epoch": 0.000364007568359375,
      "model_forward_time": 0.11486935615539551,
      "step": 59639
    },
    {
      "epoch": 0.000364007568359375,
      "step": 59639,
      "training_step_time": 0.39161157608032227
    },
    {
      "epoch": 0.000364013671875,
      "grad_norm": 0.21238192915916443,
      "learning_rate": 9.841941880361916e-09,
      "loss": 0.0357,
      "step": 59640
    },
    {
      "epoch": 0.000364013671875,
      "model_forward_time": 0.11469769477844238,
      "step": 59640
    },
    {
      "epoch": 0.000364013671875,
      "step": 59640,
      "training_step_time": 0.40160655975341797
    },
    {
      "epoch": 0.000364019775390625,
      "model_forward_time": 0.1151885986328125,
      "step": 59641
    },
    {
      "epoch": 0.000364019775390625,
      "step": 59641,
      "training_step_time": 0.4004526138305664
    },
    {
      "epoch": 0.00036402587890625,
      "model_forward_time": 0.11451601982116699,
      "step": 59642
    },
    {
      "epoch": 0.00036402587890625,
      "step": 59642,
      "training_step_time": 0.3912038803100586
    },
    {
      "epoch": 0.000364031982421875,
      "model_forward_time": 0.11515450477600098,
      "step": 59643
    },
    {
      "epoch": 0.000364031982421875,
      "step": 59643,
      "training_step_time": 0.3883044719696045
    },
    {
      "epoch": 0.0003640380859375,
      "model_forward_time": 0.11493611335754395,
      "step": 59644
    },
    {
      "epoch": 0.0003640380859375,
      "step": 59644,
      "training_step_time": 0.6240909099578857
    },
    {
      "epoch": 0.000364044189453125,
      "model_forward_time": 0.11473608016967773,
      "step": 59645
    },
    {
      "epoch": 0.000364044189453125,
      "step": 59645,
      "training_step_time": 0.39234256744384766
    },
    {
      "epoch": 0.00036405029296875,
      "model_forward_time": 0.11471891403198242,
      "step": 59646
    },
    {
      "epoch": 0.00036405029296875,
      "step": 59646,
      "training_step_time": 0.38361144065856934
    },
    {
      "epoch": 0.000364056396484375,
      "model_forward_time": 0.11481046676635742,
      "step": 59647
    },
    {
      "epoch": 0.000364056396484375,
      "step": 59647,
      "training_step_time": 0.41441965103149414
    },
    {
      "epoch": 0.0003640625,
      "model_forward_time": 0.11471176147460938,
      "step": 59648
    },
    {
      "epoch": 0.0003640625,
      "step": 59648,
      "training_step_time": 0.4799952507019043
    },
    {
      "epoch": 0.000364068603515625,
      "model_forward_time": 0.11472225189208984,
      "step": 59649
    },
    {
      "epoch": 0.000364068603515625,
      "step": 59649,
      "training_step_time": 0.45793700218200684
    },
    {
      "epoch": 0.00036407470703125,
      "grad_norm": 0.0878923088312149,
      "learning_rate": 9.302778142639135e-09,
      "loss": 0.0366,
      "step": 59650
    },
    {
      "epoch": 0.00036407470703125,
      "model_forward_time": 0.11555886268615723,
      "step": 59650
    },
    {
      "epoch": 0.00036407470703125,
      "step": 59650,
      "training_step_time": 0.4820365905761719
    },
    {
      "epoch": 0.000364080810546875,
      "model_forward_time": 0.11470675468444824,
      "step": 59651
    },
    {
      "epoch": 0.000364080810546875,
      "step": 59651,
      "training_step_time": 0.40177178382873535
    },
    {
      "epoch": 0.0003640869140625,
      "model_forward_time": 0.11460542678833008,
      "step": 59652
    },
    {
      "epoch": 0.0003640869140625,
      "step": 59652,
      "training_step_time": 0.39821839332580566
    },
    {
      "epoch": 0.000364093017578125,
      "model_forward_time": 0.11451053619384766,
      "step": 59653
    },
    {
      "epoch": 0.000364093017578125,
      "step": 59653,
      "training_step_time": 0.39690709114074707
    },
    {
      "epoch": 0.00036409912109375,
      "model_forward_time": 0.11489725112915039,
      "step": 59654
    },
    {
      "epoch": 0.00036409912109375,
      "step": 59654,
      "training_step_time": 0.39958810806274414
    },
    {
      "epoch": 0.000364105224609375,
      "model_forward_time": 0.1145164966583252,
      "step": 59655
    },
    {
      "epoch": 0.000364105224609375,
      "step": 59655,
      "training_step_time": 0.388286828994751
    },
    {
      "epoch": 0.000364111328125,
      "model_forward_time": 0.11471891403198242,
      "step": 59656
    },
    {
      "epoch": 0.000364111328125,
      "step": 59656,
      "training_step_time": 0.5151245594024658
    },
    {
      "epoch": 0.000364117431640625,
      "model_forward_time": 0.11503934860229492,
      "step": 59657
    },
    {
      "epoch": 0.000364117431640625,
      "step": 59657,
      "training_step_time": 0.38849329948425293
    },
    {
      "epoch": 0.00036412353515625,
      "model_forward_time": 0.1150355339050293,
      "step": 59658
    },
    {
      "epoch": 0.00036412353515625,
      "step": 59658,
      "training_step_time": 0.4064500331878662
    },
    {
      "epoch": 0.000364129638671875,
      "model_forward_time": 0.11470413208007812,
      "step": 59659
    },
    {
      "epoch": 0.000364129638671875,
      "step": 59659,
      "training_step_time": 0.39647388458251953
    },
    {
      "epoch": 0.0003641357421875,
      "grad_norm": 0.07044094800949097,
      "learning_rate": 8.778800258801844e-09,
      "loss": 0.0356,
      "step": 59660
    },
    {
      "epoch": 0.0003641357421875,
      "model_forward_time": 0.11485815048217773,
      "step": 59660
    },
    {
      "epoch": 0.0003641357421875,
      "step": 59660,
      "training_step_time": 0.40523672103881836
    },
    {
      "epoch": 0.000364141845703125,
      "model_forward_time": 0.11491203308105469,
      "step": 59661
    },
    {
      "epoch": 0.000364141845703125,
      "step": 59661,
      "training_step_time": 0.41577768325805664
    },
    {
      "epoch": 0.00036414794921875,
      "model_forward_time": 0.11497664451599121,
      "step": 59662
    },
    {
      "epoch": 0.00036414794921875,
      "step": 59662,
      "training_step_time": 0.5836570262908936
    },
    {
      "epoch": 0.000364154052734375,
      "model_forward_time": 0.11427545547485352,
      "step": 59663
    },
    {
      "epoch": 0.000364154052734375,
      "step": 59663,
      "training_step_time": 0.5090210437774658
    },
    {
      "epoch": 0.00036416015625,
      "model_forward_time": 0.11453866958618164,
      "step": 59664
    },
    {
      "epoch": 0.00036416015625,
      "step": 59664,
      "training_step_time": 0.39616894721984863
    },
    {
      "epoch": 0.000364166259765625,
      "model_forward_time": 0.11476349830627441,
      "step": 59665
    },
    {
      "epoch": 0.000364166259765625,
      "step": 59665,
      "training_step_time": 0.39948534965515137
    },
    {
      "epoch": 0.00036417236328125,
      "model_forward_time": 0.11511611938476562,
      "step": 59666
    },
    {
      "epoch": 0.00036417236328125,
      "step": 59666,
      "training_step_time": 0.3942420482635498
    },
    {
      "epoch": 0.000364178466796875,
      "model_forward_time": 0.11464476585388184,
      "step": 59667
    },
    {
      "epoch": 0.000364178466796875,
      "step": 59667,
      "training_step_time": 0.39800453186035156
    },
    {
      "epoch": 0.0003641845703125,
      "model_forward_time": 0.11522936820983887,
      "step": 59668
    },
    {
      "epoch": 0.0003641845703125,
      "step": 59668,
      "training_step_time": 0.4915945529937744
    },
    {
      "epoch": 0.000364190673828125,
      "model_forward_time": 0.1151885986328125,
      "step": 59669
    },
    {
      "epoch": 0.000364190673828125,
      "step": 59669,
      "training_step_time": 0.39658498764038086
    },
    {
      "epoch": 0.00036419677734375,
      "grad_norm": 0.0770314410328865,
      "learning_rate": 8.270008388022721e-09,
      "loss": 0.0358,
      "step": 59670
    },
    {
      "epoch": 0.00036419677734375,
      "model_forward_time": 0.11539530754089355,
      "step": 59670
    },
    {
      "epoch": 0.00036419677734375,
      "step": 59670,
      "training_step_time": 0.39933300018310547
    },
    {
      "epoch": 0.000364202880859375,
      "model_forward_time": 0.11461806297302246,
      "step": 59671
    },
    {
      "epoch": 0.000364202880859375,
      "step": 59671,
      "training_step_time": 0.3908517360687256
    },
    {
      "epoch": 0.000364208984375,
      "model_forward_time": 0.11502504348754883,
      "step": 59672
    },
    {
      "epoch": 0.000364208984375,
      "step": 59672,
      "training_step_time": 0.3951530456542969
    },
    {
      "epoch": 0.000364215087890625,
      "model_forward_time": 0.11565899848937988,
      "step": 59673
    },
    {
      "epoch": 0.000364215087890625,
      "step": 59673,
      "training_step_time": 0.39519667625427246
    },
    {
      "epoch": 0.00036422119140625,
      "model_forward_time": 0.11551356315612793,
      "step": 59674
    },
    {
      "epoch": 0.00036422119140625,
      "step": 59674,
      "training_step_time": 0.6097216606140137
    },
    {
      "epoch": 0.000364227294921875,
      "model_forward_time": 0.11527466773986816,
      "step": 59675
    },
    {
      "epoch": 0.000364227294921875,
      "step": 59675,
      "training_step_time": 0.4994494915008545
    },
    {
      "epoch": 0.0003642333984375,
      "model_forward_time": 0.11451411247253418,
      "step": 59676
    },
    {
      "epoch": 0.0003642333984375,
      "step": 59676,
      "training_step_time": 0.49139881134033203
    },
    {
      "epoch": 0.000364239501953125,
      "model_forward_time": 0.11384224891662598,
      "step": 59677
    },
    {
      "epoch": 0.000364239501953125,
      "step": 59677,
      "training_step_time": 0.4230482578277588
    },
    {
      "epoch": 0.00036424560546875,
      "model_forward_time": 0.11461329460144043,
      "step": 59678
    },
    {
      "epoch": 0.00036424560546875,
      "step": 59678,
      "training_step_time": 0.42488670349121094
    },
    {
      "epoch": 0.000364251708984375,
      "model_forward_time": 0.1141359806060791,
      "step": 59679
    },
    {
      "epoch": 0.000364251708984375,
      "step": 59679,
      "training_step_time": 0.3833928108215332
    },
    {
      "epoch": 0.0003642578125,
      "grad_norm": 0.06884808093309402,
      "learning_rate": 7.77640268486146e-09,
      "loss": 0.0367,
      "step": 59680
    },
    {
      "epoch": 0.0003642578125,
      "model_forward_time": 0.11417937278747559,
      "step": 59680
    },
    {
      "epoch": 0.0003642578125,
      "step": 59680,
      "training_step_time": 0.3675222396850586
    },
    {
      "epoch": 0.000364263916015625,
      "model_forward_time": 0.1159055233001709,
      "step": 59681
    },
    {
      "epoch": 0.000364263916015625,
      "step": 59681,
      "training_step_time": 0.39376401901245117
    },
    {
      "epoch": 0.00036427001953125,
      "model_forward_time": 0.11482381820678711,
      "step": 59682
    },
    {
      "epoch": 0.00036427001953125,
      "step": 59682,
      "training_step_time": 0.393831729888916
    },
    {
      "epoch": 0.000364276123046875,
      "model_forward_time": 0.11516022682189941,
      "step": 59683
    },
    {
      "epoch": 0.000364276123046875,
      "step": 59683,
      "training_step_time": 0.3905761241912842
    },
    {
      "epoch": 0.0003642822265625,
      "model_forward_time": 0.11488938331604004,
      "step": 59684
    },
    {
      "epoch": 0.0003642822265625,
      "step": 59684,
      "training_step_time": 0.39212775230407715
    },
    {
      "epoch": 0.000364288330078125,
      "model_forward_time": 0.11530590057373047,
      "step": 59685
    },
    {
      "epoch": 0.000364288330078125,
      "step": 59685,
      "training_step_time": 0.3939666748046875
    },
    {
      "epoch": 0.00036429443359375,
      "model_forward_time": 0.11532020568847656,
      "step": 59686
    },
    {
      "epoch": 0.00036429443359375,
      "step": 59686,
      "training_step_time": 0.6058754920959473
    },
    {
      "epoch": 0.000364300537109375,
      "model_forward_time": 0.11451458930969238,
      "step": 59687
    },
    {
      "epoch": 0.000364300537109375,
      "step": 59687,
      "training_step_time": 0.39568114280700684
    },
    {
      "epoch": 0.000364306640625,
      "model_forward_time": 0.11504793167114258,
      "step": 59688
    },
    {
      "epoch": 0.000364306640625,
      "step": 59688,
      "training_step_time": 0.40218591690063477
    },
    {
      "epoch": 0.000364312744140625,
      "model_forward_time": 0.11548829078674316,
      "step": 59689
    },
    {
      "epoch": 0.000364312744140625,
      "step": 59689,
      "training_step_time": 0.4613380432128906
    },
    {
      "epoch": 0.00036431884765625,
      "grad_norm": 0.08317875117063522,
      "learning_rate": 7.2979832992592365e-09,
      "loss": 0.0344,
      "step": 59690
    },
    {
      "epoch": 0.00036431884765625,
      "model_forward_time": 0.11447668075561523,
      "step": 59690
    },
    {
      "epoch": 0.00036431884765625,
      "step": 59690,
      "training_step_time": 0.46042919158935547
    },
    {
      "epoch": 0.000364324951171875,
      "model_forward_time": 0.11548185348510742,
      "step": 59691
    },
    {
      "epoch": 0.000364324951171875,
      "step": 59691,
      "training_step_time": 0.42441678047180176
    },
    {
      "epoch": 0.0003643310546875,
      "model_forward_time": 0.11489582061767578,
      "step": 59692
    },
    {
      "epoch": 0.0003643310546875,
      "step": 59692,
      "training_step_time": 0.42567920684814453
    },
    {
      "epoch": 0.000364337158203125,
      "model_forward_time": 0.11542105674743652,
      "step": 59693
    },
    {
      "epoch": 0.000364337158203125,
      "step": 59693,
      "training_step_time": 0.38800644874572754
    },
    {
      "epoch": 0.00036434326171875,
      "model_forward_time": 0.11526942253112793,
      "step": 59694
    },
    {
      "epoch": 0.00036434326171875,
      "step": 59694,
      "training_step_time": 0.3949010372161865
    },
    {
      "epoch": 0.000364349365234375,
      "model_forward_time": 0.11523938179016113,
      "step": 59695
    },
    {
      "epoch": 0.000364349365234375,
      "step": 59695,
      "training_step_time": 0.39426660537719727
    },
    {
      "epoch": 0.00036435546875,
      "model_forward_time": 0.11497259140014648,
      "step": 59696
    },
    {
      "epoch": 0.00036435546875,
      "step": 59696,
      "training_step_time": 0.3870508670806885
    },
    {
      "epoch": 0.000364361572265625,
      "model_forward_time": 0.1153402328491211,
      "step": 59697
    },
    {
      "epoch": 0.000364361572265625,
      "step": 59697,
      "training_step_time": 0.39510035514831543
    },
    {
      "epoch": 0.00036436767578125,
      "model_forward_time": 0.11513400077819824,
      "step": 59698
    },
    {
      "epoch": 0.00036436767578125,
      "step": 59698,
      "training_step_time": 0.662147045135498
    },
    {
      "epoch": 0.000364373779296875,
      "model_forward_time": 0.11442327499389648,
      "step": 59699
    },
    {
      "epoch": 0.000364373779296875,
      "step": 59699,
      "training_step_time": 0.39496827125549316
    },
    {
      "epoch": 0.0003643798828125,
      "grad_norm": 0.08328697085380554,
      "learning_rate": 6.834750376549792e-09,
      "loss": 0.0382,
      "step": 59700
    },
    {
      "epoch": 0.0003643798828125,
      "model_forward_time": 0.11425423622131348,
      "step": 59700
    },
    {
      "epoch": 0.0003643798828125,
      "step": 59700,
      "training_step_time": 0.3918778896331787
    },
    {
      "epoch": 0.000364385986328125,
      "model_forward_time": 0.11454939842224121,
      "step": 59701
    },
    {
      "epoch": 0.000364385986328125,
      "step": 59701,
      "training_step_time": 0.3905024528503418
    },
    {
      "epoch": 0.00036439208984375,
      "model_forward_time": 0.11539888381958008,
      "step": 59702
    },
    {
      "epoch": 0.00036439208984375,
      "step": 59702,
      "training_step_time": 0.40789794921875
    },
    {
      "epoch": 0.000364398193359375,
      "model_forward_time": 0.11518979072570801,
      "step": 59703
    },
    {
      "epoch": 0.000364398193359375,
      "step": 59703,
      "training_step_time": 0.36593079566955566
    },
    {
      "epoch": 0.000364404296875,
      "model_forward_time": 0.11515498161315918,
      "step": 59704
    },
    {
      "epoch": 0.000364404296875,
      "step": 59704,
      "training_step_time": 0.6565065383911133
    },
    {
      "epoch": 0.000364410400390625,
      "model_forward_time": 0.1146700382232666,
      "step": 59705
    },
    {
      "epoch": 0.000364410400390625,
      "step": 59705,
      "training_step_time": 0.47760009765625
    },
    {
      "epoch": 0.00036441650390625,
      "model_forward_time": 0.11464858055114746,
      "step": 59706
    },
    {
      "epoch": 0.00036441650390625,
      "step": 59706,
      "training_step_time": 0.4586942195892334
    },
    {
      "epoch": 0.000364422607421875,
      "model_forward_time": 0.11454987525939941,
      "step": 59707
    },
    {
      "epoch": 0.000364422607421875,
      "step": 59707,
      "training_step_time": 0.382948637008667
    },
    {
      "epoch": 0.0003644287109375,
      "model_forward_time": 0.11484432220458984,
      "step": 59708
    },
    {
      "epoch": 0.0003644287109375,
      "step": 59708,
      "training_step_time": 0.38213562965393066
    },
    {
      "epoch": 0.000364434814453125,
      "model_forward_time": 0.11464500427246094,
      "step": 59709
    },
    {
      "epoch": 0.000364434814453125,
      "step": 59709,
      "training_step_time": 0.3810615539550781
    },
    {
      "epoch": 0.00036444091796875,
      "grad_norm": 0.08372386544942856,
      "learning_rate": 6.386704057453896e-09,
      "loss": 0.0404,
      "step": 59710
    },
    {
      "epoch": 0.00036444091796875,
      "model_forward_time": 0.11558890342712402,
      "step": 59710
    },
    {
      "epoch": 0.00036444091796875,
      "step": 59710,
      "training_step_time": 0.4527761936187744
    },
    {
      "epoch": 0.000364447021484375,
      "model_forward_time": 0.11493444442749023,
      "step": 59711
    },
    {
      "epoch": 0.000364447021484375,
      "step": 59711,
      "training_step_time": 0.3816673755645752
    },
    {
      "epoch": 0.000364453125,
      "model_forward_time": 0.11560320854187012,
      "step": 59712
    },
    {
      "epoch": 0.000364453125,
      "step": 59712,
      "training_step_time": 0.3972177505493164
    },
    {
      "epoch": 0.000364459228515625,
      "model_forward_time": 0.11506867408752441,
      "step": 59713
    },
    {
      "epoch": 0.000364459228515625,
      "step": 59713,
      "training_step_time": 0.3909187316894531
    },
    {
      "epoch": 0.00036446533203125,
      "model_forward_time": 0.11513566970825195,
      "step": 59714
    },
    {
      "epoch": 0.00036446533203125,
      "step": 59714,
      "training_step_time": 0.38887906074523926
    },
    {
      "epoch": 0.000364471435546875,
      "model_forward_time": 0.11541891098022461,
      "step": 59715
    },
    {
      "epoch": 0.000364471435546875,
      "step": 59715,
      "training_step_time": 0.39586353302001953
    },
    {
      "epoch": 0.0003644775390625,
      "model_forward_time": 0.11465883255004883,
      "step": 59716
    },
    {
      "epoch": 0.0003644775390625,
      "step": 59716,
      "training_step_time": 0.6833064556121826
    },
    {
      "epoch": 0.000364483642578125,
      "model_forward_time": 0.11434412002563477,
      "step": 59717
    },
    {
      "epoch": 0.000364483642578125,
      "step": 59717,
      "training_step_time": 0.516986608505249
    },
    {
      "epoch": 0.00036448974609375,
      "model_forward_time": 0.11436223983764648,
      "step": 59718
    },
    {
      "epoch": 0.00036448974609375,
      "step": 59718,
      "training_step_time": 0.5023653507232666
    },
    {
      "epoch": 0.000364495849609375,
      "model_forward_time": 0.11407947540283203,
      "step": 59719
    },
    {
      "epoch": 0.000364495849609375,
      "step": 59719,
      "training_step_time": 0.45442676544189453
    },
    {
      "epoch": 0.000364501953125,
      "grad_norm": 0.09631120413541794,
      "learning_rate": 5.953844478068238e-09,
      "loss": 0.037,
      "step": 59720
    },
    {
      "epoch": 0.000364501953125,
      "model_forward_time": 0.11365389823913574,
      "step": 59720
    },
    {
      "epoch": 0.000364501953125,
      "step": 59720,
      "training_step_time": 0.38316893577575684
    },
    {
      "epoch": 0.000364508056640625,
      "model_forward_time": 0.11443543434143066,
      "step": 59721
    },
    {
      "epoch": 0.000364508056640625,
      "step": 59721,
      "training_step_time": 0.37654542922973633
    },
    {
      "epoch": 0.00036451416015625,
      "model_forward_time": 0.11455225944519043,
      "step": 59722
    },
    {
      "epoch": 0.00036451416015625,
      "step": 59722,
      "training_step_time": 0.3701975345611572
    },
    {
      "epoch": 0.000364520263671875,
      "model_forward_time": 0.11486220359802246,
      "step": 59723
    },
    {
      "epoch": 0.000364520263671875,
      "step": 59723,
      "training_step_time": 0.37528204917907715
    },
    {
      "epoch": 0.0003645263671875,
      "model_forward_time": 0.1150655746459961,
      "step": 59724
    },
    {
      "epoch": 0.0003645263671875,
      "step": 59724,
      "training_step_time": 0.3938155174255371
    },
    {
      "epoch": 0.000364532470703125,
      "model_forward_time": 0.1151432991027832,
      "step": 59725
    },
    {
      "epoch": 0.000364532470703125,
      "step": 59725,
      "training_step_time": 0.4140305519104004
    },
    {
      "epoch": 0.00036453857421875,
      "model_forward_time": 0.11532831192016602,
      "step": 59726
    },
    {
      "epoch": 0.00036453857421875,
      "step": 59726,
      "training_step_time": 0.38170957565307617
    },
    {
      "epoch": 0.000364544677734375,
      "model_forward_time": 0.1156625747680664,
      "step": 59727
    },
    {
      "epoch": 0.000364544677734375,
      "step": 59727,
      "training_step_time": 0.3899993896484375
    },
    {
      "epoch": 0.00036455078125,
      "model_forward_time": 0.11504912376403809,
      "step": 59728
    },
    {
      "epoch": 0.00036455078125,
      "step": 59728,
      "training_step_time": 0.38780999183654785
    },
    {
      "epoch": 0.000364556884765625,
      "model_forward_time": 0.11501431465148926,
      "step": 59729
    },
    {
      "epoch": 0.000364556884765625,
      "step": 59729,
      "training_step_time": 0.4133443832397461
    },
    {
      "epoch": 0.00036456298828125,
      "grad_norm": 0.08357225358486176,
      "learning_rate": 5.536171769887632e-09,
      "loss": 0.0381,
      "step": 59730
    },
    {
      "epoch": 0.00036456298828125,
      "model_forward_time": 0.11486434936523438,
      "step": 59730
    },
    {
      "epoch": 0.00036456298828125,
      "step": 59730,
      "training_step_time": 0.42632508277893066
    },
    {
      "epoch": 0.000364569091796875,
      "model_forward_time": 0.1146385669708252,
      "step": 59731
    },
    {
      "epoch": 0.000364569091796875,
      "step": 59731,
      "training_step_time": 0.3658926486968994
    },
    {
      "epoch": 0.0003645751953125,
      "model_forward_time": 0.11504054069519043,
      "step": 59732
    },
    {
      "epoch": 0.0003645751953125,
      "step": 59732,
      "training_step_time": 0.5131361484527588
    },
    {
      "epoch": 0.000364581298828125,
      "model_forward_time": 0.11481642723083496,
      "step": 59733
    },
    {
      "epoch": 0.000364581298828125,
      "step": 59733,
      "training_step_time": 0.4677090644836426
    },
    {
      "epoch": 0.00036458740234375,
      "model_forward_time": 0.11486458778381348,
      "step": 59734
    },
    {
      "epoch": 0.00036458740234375,
      "step": 59734,
      "training_step_time": 0.4695589542388916
    },
    {
      "epoch": 0.000364593505859375,
      "model_forward_time": 0.11512398719787598,
      "step": 59735
    },
    {
      "epoch": 0.000364593505859375,
      "step": 59735,
      "training_step_time": 0.39272260665893555
    },
    {
      "epoch": 0.000364599609375,
      "model_forward_time": 0.11452126502990723,
      "step": 59736
    },
    {
      "epoch": 0.000364599609375,
      "step": 59736,
      "training_step_time": 0.4013364315032959
    },
    {
      "epoch": 0.000364605712890625,
      "model_forward_time": 0.11487889289855957,
      "step": 59737
    },
    {
      "epoch": 0.000364605712890625,
      "step": 59737,
      "training_step_time": 0.407271146774292
    },
    {
      "epoch": 0.00036461181640625,
      "model_forward_time": 0.11487889289855957,
      "step": 59738
    },
    {
      "epoch": 0.00036461181640625,
      "step": 59738,
      "training_step_time": 0.38136887550354004
    },
    {
      "epoch": 0.000364617919921875,
      "model_forward_time": 0.11514616012573242,
      "step": 59739
    },
    {
      "epoch": 0.000364617919921875,
      "step": 59739,
      "training_step_time": 0.3874976634979248
    },
    {
      "epoch": 0.0003646240234375,
      "grad_norm": 0.07146671414375305,
      "learning_rate": 5.133686059793918e-09,
      "loss": 0.0334,
      "step": 59740
    },
    {
      "epoch": 0.0003646240234375,
      "model_forward_time": 0.11490607261657715,
      "step": 59740
    },
    {
      "epoch": 0.0003646240234375,
      "step": 59740,
      "training_step_time": 0.384946346282959
    },
    {
      "epoch": 0.000364630126953125,
      "model_forward_time": 0.11525154113769531,
      "step": 59741
    },
    {
      "epoch": 0.000364630126953125,
      "step": 59741,
      "training_step_time": 0.396223783493042
    },
    {
      "epoch": 0.00036463623046875,
      "model_forward_time": 0.11501598358154297,
      "step": 59742
    },
    {
      "epoch": 0.00036463623046875,
      "step": 59742,
      "training_step_time": 0.3921835422515869
    },
    {
      "epoch": 0.000364642333984375,
      "model_forward_time": 0.11511802673339844,
      "step": 59743
    },
    {
      "epoch": 0.000364642333984375,
      "step": 59743,
      "training_step_time": 0.4183056354522705
    },
    {
      "epoch": 0.0003646484375,
      "model_forward_time": 0.11507225036621094,
      "step": 59744
    },
    {
      "epoch": 0.0003646484375,
      "step": 59744,
      "training_step_time": 0.42208385467529297
    },
    {
      "epoch": 0.000364654541015625,
      "model_forward_time": 0.11602306365966797,
      "step": 59745
    },
    {
      "epoch": 0.000364654541015625,
      "step": 59745,
      "training_step_time": 0.3913297653198242
    },
    {
      "epoch": 0.00036466064453125,
      "model_forward_time": 0.11504793167114258,
      "step": 59746
    },
    {
      "epoch": 0.00036466064453125,
      "step": 59746,
      "training_step_time": 0.4821486473083496
    },
    {
      "epoch": 0.000364666748046875,
      "model_forward_time": 0.11447024345397949,
      "step": 59747
    },
    {
      "epoch": 0.000364666748046875,
      "step": 59747,
      "training_step_time": 0.4642794132232666
    },
    {
      "epoch": 0.0003646728515625,
      "model_forward_time": 0.11519384384155273,
      "step": 59748
    },
    {
      "epoch": 0.0003646728515625,
      "step": 59748,
      "training_step_time": 0.39414381980895996
    },
    {
      "epoch": 0.000364678955078125,
      "model_forward_time": 0.1158144474029541,
      "step": 59749
    },
    {
      "epoch": 0.000364678955078125,
      "step": 59749,
      "training_step_time": 0.43560147285461426
    },
    {
      "epoch": 0.00036468505859375,
      "grad_norm": 0.09674806892871857,
      "learning_rate": 4.746387470044855e-09,
      "loss": 0.0402,
      "step": 59750
    },
    {
      "epoch": 0.00036468505859375,
      "model_forward_time": 0.11605644226074219,
      "step": 59750
    },
    {
      "epoch": 0.00036468505859375,
      "step": 59750,
      "training_step_time": 0.38596534729003906
    },
    {
      "epoch": 0.000364691162109375,
      "model_forward_time": 0.11521196365356445,
      "step": 59751
    },
    {
      "epoch": 0.000364691162109375,
      "step": 59751,
      "training_step_time": 0.3830723762512207
    },
    {
      "epoch": 0.000364697265625,
      "model_forward_time": 0.11509108543395996,
      "step": 59752
    },
    {
      "epoch": 0.000364697265625,
      "step": 59752,
      "training_step_time": 0.38378357887268066
    },
    {
      "epoch": 0.000364703369140625,
      "model_forward_time": 0.11558294296264648,
      "step": 59753
    },
    {
      "epoch": 0.000364703369140625,
      "step": 59753,
      "training_step_time": 0.3782496452331543
    },
    {
      "epoch": 0.00036470947265625,
      "model_forward_time": 0.1158447265625,
      "step": 59754
    },
    {
      "epoch": 0.00036470947265625,
      "step": 59754,
      "training_step_time": 0.394695520401001
    },
    {
      "epoch": 0.000364715576171875,
      "model_forward_time": 0.11476898193359375,
      "step": 59755
    },
    {
      "epoch": 0.000364715576171875,
      "step": 59755,
      "training_step_time": 0.39893484115600586
    },
    {
      "epoch": 0.0003647216796875,
      "model_forward_time": 0.11533594131469727,
      "step": 59756
    },
    {
      "epoch": 0.0003647216796875,
      "step": 59756,
      "training_step_time": 0.38510918617248535
    },
    {
      "epoch": 0.000364727783203125,
      "model_forward_time": 0.1154332160949707,
      "step": 59757
    },
    {
      "epoch": 0.000364727783203125,
      "step": 59757,
      "training_step_time": 0.45751094818115234
    },
    {
      "epoch": 0.00036473388671875,
      "model_forward_time": 0.11509156227111816,
      "step": 59758
    },
    {
      "epoch": 0.00036473388671875,
      "step": 59758,
      "training_step_time": 0.4693276882171631
    },
    {
      "epoch": 0.000364739990234375,
      "model_forward_time": 0.11460661888122559,
      "step": 59759
    },
    {
      "epoch": 0.000364739990234375,
      "step": 59759,
      "training_step_time": 0.384105920791626
    },
    {
      "epoch": 0.00036474609375,
      "grad_norm": 0.09815746545791626,
      "learning_rate": 4.3742761183018784e-09,
      "loss": 0.0335,
      "step": 59760
    },
    {
      "epoch": 0.00036474609375,
      "model_forward_time": 0.11531567573547363,
      "step": 59760
    },
    {
      "epoch": 0.00036474609375,
      "step": 59760,
      "training_step_time": 0.4551856517791748
    },
    {
      "epoch": 0.000364752197265625,
      "model_forward_time": 0.11514854431152344,
      "step": 59761
    },
    {
      "epoch": 0.000364752197265625,
      "step": 59761,
      "training_step_time": 0.4127042293548584
    },
    {
      "epoch": 0.00036475830078125,
      "model_forward_time": 0.1148684024810791,
      "step": 59762
    },
    {
      "epoch": 0.00036475830078125,
      "step": 59762,
      "training_step_time": 0.47696471214294434
    },
    {
      "epoch": 0.000364764404296875,
      "model_forward_time": 0.11458373069763184,
      "step": 59763
    },
    {
      "epoch": 0.000364764404296875,
      "step": 59763,
      "training_step_time": 0.4947514533996582
    },
    {
      "epoch": 0.0003647705078125,
      "model_forward_time": 0.11485958099365234,
      "step": 59764
    },
    {
      "epoch": 0.0003647705078125,
      "step": 59764,
      "training_step_time": 0.4446110725402832
    },
    {
      "epoch": 0.000364776611328125,
      "model_forward_time": 0.11502671241760254,
      "step": 59765
    },
    {
      "epoch": 0.000364776611328125,
      "step": 59765,
      "training_step_time": 0.37770581245422363
    },
    {
      "epoch": 0.00036478271484375,
      "model_forward_time": 0.1149899959564209,
      "step": 59766
    },
    {
      "epoch": 0.00036478271484375,
      "step": 59766,
      "training_step_time": 0.39141106605529785
    },
    {
      "epoch": 0.000364788818359375,
      "model_forward_time": 0.11557722091674805,
      "step": 59767
    },
    {
      "epoch": 0.000364788818359375,
      "step": 59767,
      "training_step_time": 0.39123058319091797
    },
    {
      "epoch": 0.000364794921875,
      "model_forward_time": 0.11455821990966797,
      "step": 59768
    },
    {
      "epoch": 0.000364794921875,
      "step": 59768,
      "training_step_time": 0.385789155960083
    },
    {
      "epoch": 0.000364801025390625,
      "model_forward_time": 0.1150670051574707,
      "step": 59769
    },
    {
      "epoch": 0.000364801025390625,
      "step": 59769,
      "training_step_time": 0.4031703472137451
    },
    {
      "epoch": 0.00036480712890625,
      "grad_norm": 0.09320042282342911,
      "learning_rate": 4.017352117591244e-09,
      "loss": 0.037,
      "step": 59770
    },
    {
      "epoch": 0.00036480712890625,
      "model_forward_time": 0.11498737335205078,
      "step": 59770
    },
    {
      "epoch": 0.00036480712890625,
      "step": 59770,
      "training_step_time": 0.5986542701721191
    },
    {
      "epoch": 0.000364813232421875,
      "model_forward_time": 0.1154029369354248,
      "step": 59771
    },
    {
      "epoch": 0.000364813232421875,
      "step": 59771,
      "training_step_time": 0.45355749130249023
    },
    {
      "epoch": 0.0003648193359375,
      "model_forward_time": 0.11461710929870605,
      "step": 59772
    },
    {
      "epoch": 0.0003648193359375,
      "step": 59772,
      "training_step_time": 0.3991096019744873
    },
    {
      "epoch": 0.000364825439453125,
      "model_forward_time": 0.11420202255249023,
      "step": 59773
    },
    {
      "epoch": 0.000364825439453125,
      "step": 59773,
      "training_step_time": 0.3920598030090332
    },
    {
      "epoch": 0.00036483154296875,
      "model_forward_time": 0.11446881294250488,
      "step": 59774
    },
    {
      "epoch": 0.00036483154296875,
      "step": 59774,
      "training_step_time": 0.3642094135284424
    },
    {
      "epoch": 0.000364837646484375,
      "model_forward_time": 0.1144411563873291,
      "step": 59775
    },
    {
      "epoch": 0.000364837646484375,
      "step": 59775,
      "training_step_time": 0.4181790351867676
    },
    {
      "epoch": 0.00036484375,
      "model_forward_time": 0.11456513404846191,
      "step": 59776
    },
    {
      "epoch": 0.00036484375,
      "step": 59776,
      "training_step_time": 0.4178037643432617
    },
    {
      "epoch": 0.000364849853515625,
      "model_forward_time": 0.11472964286804199,
      "step": 59777
    },
    {
      "epoch": 0.000364849853515625,
      "step": 59777,
      "training_step_time": 0.4337637424468994
    },
    {
      "epoch": 0.00036485595703125,
      "model_forward_time": 0.11448287963867188,
      "step": 59778
    },
    {
      "epoch": 0.00036485595703125,
      "step": 59778,
      "training_step_time": 0.47237467765808105
    },
    {
      "epoch": 0.000364862060546875,
      "model_forward_time": 0.11474418640136719,
      "step": 59779
    },
    {
      "epoch": 0.000364862060546875,
      "step": 59779,
      "training_step_time": 0.3914332389831543
    },
    {
      "epoch": 0.0003648681640625,
      "grad_norm": 0.11782075464725494,
      "learning_rate": 3.6756155763373323e-09,
      "loss": 0.0407,
      "step": 59780
    },
    {
      "epoch": 0.0003648681640625,
      "model_forward_time": 0.1140890121459961,
      "step": 59780
    },
    {
      "epoch": 0.0003648681640625,
      "step": 59780,
      "training_step_time": 0.3988332748413086
    },
    {
      "epoch": 0.000364874267578125,
      "model_forward_time": 0.11504149436950684,
      "step": 59781
    },
    {
      "epoch": 0.000364874267578125,
      "step": 59781,
      "training_step_time": 0.3881568908691406
    },
    {
      "epoch": 0.00036488037109375,
      "model_forward_time": 0.11493492126464844,
      "step": 59782
    },
    {
      "epoch": 0.00036488037109375,
      "step": 59782,
      "training_step_time": 0.5748229026794434
    },
    {
      "epoch": 0.000364886474609375,
      "model_forward_time": 0.11431646347045898,
      "step": 59783
    },
    {
      "epoch": 0.000364886474609375,
      "step": 59783,
      "training_step_time": 0.40391063690185547
    },
    {
      "epoch": 0.000364892578125,
      "model_forward_time": 0.11482644081115723,
      "step": 59784
    },
    {
      "epoch": 0.000364892578125,
      "step": 59784,
      "training_step_time": 0.4139091968536377
    },
    {
      "epoch": 0.000364898681640625,
      "model_forward_time": 0.11515116691589355,
      "step": 59785
    },
    {
      "epoch": 0.000364898681640625,
      "step": 59785,
      "training_step_time": 0.3940095901489258
    },
    {
      "epoch": 0.00036490478515625,
      "model_forward_time": 0.11559200286865234,
      "step": 59786
    },
    {
      "epoch": 0.00036490478515625,
      "step": 59786,
      "training_step_time": 0.3913576602935791
    },
    {
      "epoch": 0.000364910888671875,
      "model_forward_time": 0.11490201950073242,
      "step": 59787
    },
    {
      "epoch": 0.000364910888671875,
      "step": 59787,
      "training_step_time": 0.39899611473083496
    },
    {
      "epoch": 0.0003649169921875,
      "model_forward_time": 0.11451506614685059,
      "step": 59788
    },
    {
      "epoch": 0.0003649169921875,
      "step": 59788,
      "training_step_time": 0.6194355487823486
    },
    {
      "epoch": 0.000364923095703125,
      "model_forward_time": 0.11474967002868652,
      "step": 59789
    },
    {
      "epoch": 0.000364923095703125,
      "step": 59789,
      "training_step_time": 0.4785187244415283
    },
    {
      "epoch": 0.00036492919921875,
      "grad_norm": 0.0738392025232315,
      "learning_rate": 3.349066598362649e-09,
      "loss": 0.036,
      "step": 59790
    },
    {
      "epoch": 0.00036492919921875,
      "model_forward_time": 0.11473417282104492,
      "step": 59790
    },
    {
      "epoch": 0.00036492919921875,
      "step": 59790,
      "training_step_time": 0.4891796112060547
    },
    {
      "epoch": 0.000364935302734375,
      "model_forward_time": 0.11431360244750977,
      "step": 59791
    },
    {
      "epoch": 0.000364935302734375,
      "step": 59791,
      "training_step_time": 0.39017486572265625
    },
    {
      "epoch": 0.00036494140625,
      "model_forward_time": 0.11389470100402832,
      "step": 59792
    },
    {
      "epoch": 0.00036494140625,
      "step": 59792,
      "training_step_time": 0.4179575443267822
    },
    {
      "epoch": 0.000364947509765625,
      "model_forward_time": 0.11399507522583008,
      "step": 59793
    },
    {
      "epoch": 0.000364947509765625,
      "step": 59793,
      "training_step_time": 0.3864469528198242
    },
    {
      "epoch": 0.00036495361328125,
      "model_forward_time": 0.11511993408203125,
      "step": 59794
    },
    {
      "epoch": 0.00036495361328125,
      "step": 59794,
      "training_step_time": 0.3905298709869385
    },
    {
      "epoch": 0.000364959716796875,
      "model_forward_time": 0.11394667625427246,
      "step": 59795
    },
    {
      "epoch": 0.000364959716796875,
      "step": 59795,
      "training_step_time": 0.38520193099975586
    },
    {
      "epoch": 0.0003649658203125,
      "model_forward_time": 0.1158599853515625,
      "step": 59796
    },
    {
      "epoch": 0.0003649658203125,
      "step": 59796,
      "training_step_time": 0.4228861331939697
    },
    {
      "epoch": 0.000364971923828125,
      "model_forward_time": 0.11452198028564453,
      "step": 59797
    },
    {
      "epoch": 0.000364971923828125,
      "step": 59797,
      "training_step_time": 0.4260983467102051
    },
    {
      "epoch": 0.00036497802734375,
      "model_forward_time": 0.11530923843383789,
      "step": 59798
    },
    {
      "epoch": 0.00036497802734375,
      "step": 59798,
      "training_step_time": 0.3944876194000244
    },
    {
      "epoch": 0.000364984130859375,
      "model_forward_time": 0.11529898643493652,
      "step": 59799
    },
    {
      "epoch": 0.000364984130859375,
      "step": 59799,
      "training_step_time": 0.38863086700439453
    },
    {
      "epoch": 0.000364990234375,
      "grad_norm": 0.07287616282701492,
      "learning_rate": 3.0377052828489683e-09,
      "loss": 0.0356,
      "step": 59800
    },
    {
      "epoch": 0.000364990234375,
      "model_forward_time": 0.11505317687988281,
      "step": 59800
    },
    {
      "epoch": 0.000364990234375,
      "step": 59800,
      "training_step_time": 0.4357459545135498
    },
    {
      "epoch": 0.000364996337890625,
      "model_forward_time": 0.11571002006530762,
      "step": 59801
    },
    {
      "epoch": 0.000364996337890625,
      "step": 59801,
      "training_step_time": 0.3906397819519043
    },
    {
      "epoch": 0.00036500244140625,
      "model_forward_time": 0.11561012268066406,
      "step": 59802
    },
    {
      "epoch": 0.00036500244140625,
      "step": 59802,
      "training_step_time": 0.4829220771789551
    },
    {
      "epoch": 0.000365008544921875,
      "model_forward_time": 0.1153411865234375,
      "step": 59803
    },
    {
      "epoch": 0.000365008544921875,
      "step": 59803,
      "training_step_time": 0.435854434967041
    },
    {
      "epoch": 0.0003650146484375,
      "model_forward_time": 0.11562347412109375,
      "step": 59804
    },
    {
      "epoch": 0.0003650146484375,
      "step": 59804,
      "training_step_time": 0.5010690689086914
    },
    {
      "epoch": 0.000365020751953125,
      "model_forward_time": 0.11540555953979492,
      "step": 59805
    },
    {
      "epoch": 0.000365020751953125,
      "step": 59805,
      "training_step_time": 0.4077718257904053
    },
    {
      "epoch": 0.00036502685546875,
      "model_forward_time": 0.11464095115661621,
      "step": 59806
    },
    {
      "epoch": 0.00036502685546875,
      "step": 59806,
      "training_step_time": 0.4435396194458008
    },
    {
      "epoch": 0.000365032958984375,
      "model_forward_time": 0.11512899398803711,
      "step": 59807
    },
    {
      "epoch": 0.000365032958984375,
      "step": 59807,
      "training_step_time": 0.474409818649292
    },
    {
      "epoch": 0.0003650390625,
      "model_forward_time": 0.11424660682678223,
      "step": 59808
    },
    {
      "epoch": 0.0003650390625,
      "step": 59808,
      "training_step_time": 0.4028956890106201
    },
    {
      "epoch": 0.000365045166015625,
      "model_forward_time": 0.11523771286010742,
      "step": 59809
    },
    {
      "epoch": 0.000365045166015625,
      "step": 59809,
      "training_step_time": 0.3905601501464844
    },
    {
      "epoch": 0.00036505126953125,
      "grad_norm": 0.1052832081913948,
      "learning_rate": 2.741531724392843e-09,
      "loss": 0.0397,
      "step": 59810
    },
    {
      "epoch": 0.00036505126953125,
      "model_forward_time": 0.11535239219665527,
      "step": 59810
    },
    {
      "epoch": 0.00036505126953125,
      "step": 59810,
      "training_step_time": 0.3909318447113037
    },
    {
      "epoch": 0.000365057373046875,
      "model_forward_time": 0.11484098434448242,
      "step": 59811
    },
    {
      "epoch": 0.000365057373046875,
      "step": 59811,
      "training_step_time": 0.4013478755950928
    },
    {
      "epoch": 0.0003650634765625,
      "model_forward_time": 0.11599564552307129,
      "step": 59812
    },
    {
      "epoch": 0.0003650634765625,
      "step": 59812,
      "training_step_time": 0.5020368099212646
    },
    {
      "epoch": 0.000365069580078125,
      "model_forward_time": 0.11528706550598145,
      "step": 59813
    },
    {
      "epoch": 0.000365069580078125,
      "step": 59813,
      "training_step_time": 0.38849425315856934
    },
    {
      "epoch": 0.00036507568359375,
      "model_forward_time": 0.11601614952087402,
      "step": 59814
    },
    {
      "epoch": 0.00036507568359375,
      "step": 59814,
      "training_step_time": 0.39531826972961426
    },
    {
      "epoch": 0.000365081787109375,
      "model_forward_time": 0.11492252349853516,
      "step": 59815
    },
    {
      "epoch": 0.000365081787109375,
      "step": 59815,
      "training_step_time": 0.39258623123168945
    },
    {
      "epoch": 0.000365087890625,
      "model_forward_time": 0.11589503288269043,
      "step": 59816
    },
    {
      "epoch": 0.000365087890625,
      "step": 59816,
      "training_step_time": 0.3846101760864258
    },
    {
      "epoch": 0.000365093994140625,
      "model_forward_time": 0.1152794361114502,
      "step": 59817
    },
    {
      "epoch": 0.000365093994140625,
      "step": 59817,
      "training_step_time": 0.3666386604309082
    },
    {
      "epoch": 0.00036510009765625,
      "model_forward_time": 0.11530852317810059,
      "step": 59818
    },
    {
      "epoch": 0.00036510009765625,
      "step": 59818,
      "training_step_time": 0.6735789775848389
    },
    {
      "epoch": 0.000365106201171875,
      "model_forward_time": 0.11451888084411621,
      "step": 59819
    },
    {
      "epoch": 0.000365106201171875,
      "step": 59819,
      "training_step_time": 0.47202014923095703
    },
    {
      "epoch": 0.0003651123046875,
      "grad_norm": 0.08560403436422348,
      "learning_rate": 2.4605460129556445e-09,
      "loss": 0.0359,
      "step": 59820
    },
    {
      "epoch": 0.0003651123046875,
      "model_forward_time": 0.1147923469543457,
      "step": 59820
    },
    {
      "epoch": 0.0003651123046875,
      "step": 59820,
      "training_step_time": 0.4503793716430664
    },
    {
      "epoch": 0.000365118408203125,
      "model_forward_time": 0.11427545547485352,
      "step": 59821
    },
    {
      "epoch": 0.000365118408203125,
      "step": 59821,
      "training_step_time": 0.45385098457336426
    },
    {
      "epoch": 0.00036512451171875,
      "model_forward_time": 0.11401224136352539,
      "step": 59822
    },
    {
      "epoch": 0.00036512451171875,
      "step": 59822,
      "training_step_time": 0.3860788345336914
    },
    {
      "epoch": 0.000365130615234375,
      "model_forward_time": 0.11417913436889648,
      "step": 59823
    },
    {
      "epoch": 0.000365130615234375,
      "step": 59823,
      "training_step_time": 0.4148709774017334
    },
    {
      "epoch": 0.00036513671875,
      "model_forward_time": 0.11486268043518066,
      "step": 59824
    },
    {
      "epoch": 0.00036513671875,
      "step": 59824,
      "training_step_time": 0.4399123191833496
    },
    {
      "epoch": 0.000365142822265625,
      "model_forward_time": 0.11522603034973145,
      "step": 59825
    },
    {
      "epoch": 0.000365142822265625,
      "step": 59825,
      "training_step_time": 0.38982558250427246
    },
    {
      "epoch": 0.00036514892578125,
      "model_forward_time": 0.11441993713378906,
      "step": 59826
    },
    {
      "epoch": 0.00036514892578125,
      "step": 59826,
      "training_step_time": 0.3949127197265625
    },
    {
      "epoch": 0.000365155029296875,
      "model_forward_time": 0.11588001251220703,
      "step": 59827
    },
    {
      "epoch": 0.000365155029296875,
      "step": 59827,
      "training_step_time": 0.39301252365112305
    },
    {
      "epoch": 0.0003651611328125,
      "model_forward_time": 0.11491680145263672,
      "step": 59828
    },
    {
      "epoch": 0.0003651611328125,
      "step": 59828,
      "training_step_time": 0.3904390335083008
    },
    {
      "epoch": 0.000365167236328125,
      "model_forward_time": 0.11559462547302246,
      "step": 59829
    },
    {
      "epoch": 0.000365167236328125,
      "step": 59829,
      "training_step_time": 0.38397741317749023
    },
    {
      "epoch": 0.00036517333984375,
      "grad_norm": 0.13629768788814545,
      "learning_rate": 2.1947482338968705e-09,
      "loss": 0.0412,
      "step": 59830
    },
    {
      "epoch": 0.00036517333984375,
      "model_forward_time": 0.11557555198669434,
      "step": 59830
    },
    {
      "epoch": 0.00036517333984375,
      "step": 59830,
      "training_step_time": 0.6217999458312988
    },
    {
      "epoch": 0.000365179443359375,
      "model_forward_time": 0.11480975151062012,
      "step": 59831
    },
    {
      "epoch": 0.000365179443359375,
      "step": 59831,
      "training_step_time": 0.3672807216644287
    },
    {
      "epoch": 0.000365185546875,
      "model_forward_time": 0.11509013175964355,
      "step": 59832
    },
    {
      "epoch": 0.000365185546875,
      "step": 59832,
      "training_step_time": 0.4623093605041504
    },
    {
      "epoch": 0.000365191650390625,
      "model_forward_time": 0.11491584777832031,
      "step": 59833
    },
    {
      "epoch": 0.000365191650390625,
      "step": 59833,
      "training_step_time": 0.4568455219268799
    },
    {
      "epoch": 0.00036519775390625,
      "model_forward_time": 0.11493396759033203,
      "step": 59834
    },
    {
      "epoch": 0.00036519775390625,
      "step": 59834,
      "training_step_time": 0.390608549118042
    },
    {
      "epoch": 0.000365203857421875,
      "model_forward_time": 0.11385583877563477,
      "step": 59835
    },
    {
      "epoch": 0.000365203857421875,
      "step": 59835,
      "training_step_time": 0.3989381790161133
    },
    {
      "epoch": 0.0003652099609375,
      "model_forward_time": 0.11424612998962402,
      "step": 59836
    },
    {
      "epoch": 0.0003652099609375,
      "step": 59836,
      "training_step_time": 0.42856574058532715
    },
    {
      "epoch": 0.000365216064453125,
      "model_forward_time": 0.11555933952331543,
      "step": 59837
    },
    {
      "epoch": 0.000365216064453125,
      "step": 59837,
      "training_step_time": 0.4016890525817871
    },
    {
      "epoch": 0.00036522216796875,
      "model_forward_time": 0.11470651626586914,
      "step": 59838
    },
    {
      "epoch": 0.00036522216796875,
      "step": 59838,
      "training_step_time": 0.3930015563964844
    },
    {
      "epoch": 0.000365228271484375,
      "model_forward_time": 0.11477088928222656,
      "step": 59839
    },
    {
      "epoch": 0.000365228271484375,
      "step": 59839,
      "training_step_time": 0.40892934799194336
    },
    {
      "epoch": 0.000365234375,
      "grad_norm": 0.06513973325490952,
      "learning_rate": 1.9441384679574903e-09,
      "loss": 0.0367,
      "step": 59840
    },
    {
      "epoch": 0.000365234375,
      "model_forward_time": 0.11526894569396973,
      "step": 59840
    },
    {
      "epoch": 0.000365234375,
      "step": 59840,
      "training_step_time": 0.3855469226837158
    },
    {
      "epoch": 0.000365240478515625,
      "model_forward_time": 0.11525130271911621,
      "step": 59841
    },
    {
      "epoch": 0.000365240478515625,
      "step": 59841,
      "training_step_time": 0.38723063468933105
    },
    {
      "epoch": 0.00036524658203125,
      "model_forward_time": 0.11527180671691895,
      "step": 59842
    },
    {
      "epoch": 0.00036524658203125,
      "step": 59842,
      "training_step_time": 0.397519588470459
    },
    {
      "epoch": 0.000365252685546875,
      "model_forward_time": 0.11564803123474121,
      "step": 59843
    },
    {
      "epoch": 0.000365252685546875,
      "step": 59843,
      "training_step_time": 0.3860921859741211
    },
    {
      "epoch": 0.0003652587890625,
      "model_forward_time": 0.11513209342956543,
      "step": 59844
    },
    {
      "epoch": 0.0003652587890625,
      "step": 59844,
      "training_step_time": 0.4190373420715332
    },
    {
      "epoch": 0.000365264892578125,
      "model_forward_time": 0.11431717872619629,
      "step": 59845
    },
    {
      "epoch": 0.000365264892578125,
      "step": 59845,
      "training_step_time": 0.402695894241333
    },
    {
      "epoch": 0.00036527099609375,
      "model_forward_time": 0.11603045463562012,
      "step": 59846
    },
    {
      "epoch": 0.00036527099609375,
      "step": 59846,
      "training_step_time": 0.4468197822570801
    },
    {
      "epoch": 0.000365277099609375,
      "model_forward_time": 0.11547684669494629,
      "step": 59847
    },
    {
      "epoch": 0.000365277099609375,
      "step": 59847,
      "training_step_time": 0.4418935775756836
    },
    {
      "epoch": 0.000365283203125,
      "model_forward_time": 0.11522293090820312,
      "step": 59848
    },
    {
      "epoch": 0.000365283203125,
      "step": 59848,
      "training_step_time": 0.5146255493164062
    },
    {
      "epoch": 0.000365289306640625,
      "model_forward_time": 0.11480164527893066,
      "step": 59849
    },
    {
      "epoch": 0.000365289306640625,
      "step": 59849,
      "training_step_time": 0.5025928020477295
    },
    {
      "epoch": 0.00036529541015625,
      "grad_norm": 0.09166125953197479,
      "learning_rate": 1.7087167912710478e-09,
      "loss": 0.0398,
      "step": 59850
    },
    {
      "epoch": 0.00036529541015625,
      "model_forward_time": 0.11435842514038086,
      "step": 59850
    },
    {
      "epoch": 0.00036529541015625,
      "step": 59850,
      "training_step_time": 0.39604735374450684
    },
    {
      "epoch": 0.000365301513671875,
      "model_forward_time": 0.1144707202911377,
      "step": 59851
    },
    {
      "epoch": 0.000365301513671875,
      "step": 59851,
      "training_step_time": 0.4059031009674072
    },
    {
      "epoch": 0.0003653076171875,
      "model_forward_time": 0.11466002464294434,
      "step": 59852
    },
    {
      "epoch": 0.0003653076171875,
      "step": 59852,
      "training_step_time": 0.39702844619750977
    },
    {
      "epoch": 0.000365313720703125,
      "model_forward_time": 0.1138157844543457,
      "step": 59853
    },
    {
      "epoch": 0.000365313720703125,
      "step": 59853,
      "training_step_time": 0.39061832427978516
    },
    {
      "epoch": 0.00036531982421875,
      "model_forward_time": 0.11573314666748047,
      "step": 59854
    },
    {
      "epoch": 0.00036531982421875,
      "step": 59854,
      "training_step_time": 0.4247322082519531
    },
    {
      "epoch": 0.000365325927734375,
      "model_forward_time": 0.11465048789978027,
      "step": 59855
    },
    {
      "epoch": 0.000365325927734375,
      "step": 59855,
      "training_step_time": 0.39089465141296387
    },
    {
      "epoch": 0.00036533203125,
      "model_forward_time": 0.11555218696594238,
      "step": 59856
    },
    {
      "epoch": 0.00036533203125,
      "step": 59856,
      "training_step_time": 0.43808865547180176
    },
    {
      "epoch": 0.000365338134765625,
      "model_forward_time": 0.11439895629882812,
      "step": 59857
    },
    {
      "epoch": 0.000365338134765625,
      "step": 59857,
      "training_step_time": 0.3882412910461426
    },
    {
      "epoch": 0.00036534423828125,
      "model_forward_time": 0.11563396453857422,
      "step": 59858
    },
    {
      "epoch": 0.00036534423828125,
      "step": 59858,
      "training_step_time": 0.3788492679595947
    },
    {
      "epoch": 0.000365350341796875,
      "model_forward_time": 0.11525368690490723,
      "step": 59859
    },
    {
      "epoch": 0.000365350341796875,
      "step": 59859,
      "training_step_time": 0.4063839912414551
    },
    {
      "epoch": 0.0003653564453125,
      "grad_norm": 0.08613108098506927,
      "learning_rate": 1.4884832753414569e-09,
      "loss": 0.0381,
      "step": 59860
    },
    {
      "epoch": 0.0003653564453125,
      "model_forward_time": 0.1154482364654541,
      "step": 59860
    },
    {
      "epoch": 0.0003653564453125,
      "step": 59860,
      "training_step_time": 0.632601261138916
    },
    {
      "epoch": 0.000365362548828125,
      "model_forward_time": 0.11481475830078125,
      "step": 59861
    },
    {
      "epoch": 0.000365362548828125,
      "step": 59861,
      "training_step_time": 0.38764381408691406
    },
    {
      "epoch": 0.00036536865234375,
      "model_forward_time": 0.11520719528198242,
      "step": 59862
    },
    {
      "epoch": 0.00036536865234375,
      "step": 59862,
      "training_step_time": 0.4018869400024414
    },
    {
      "epoch": 0.000365374755859375,
      "model_forward_time": 0.11478638648986816,
      "step": 59863
    },
    {
      "epoch": 0.000365374755859375,
      "step": 59863,
      "training_step_time": 0.398237943649292
    },
    {
      "epoch": 0.000365380859375,
      "model_forward_time": 0.1144568920135498,
      "step": 59864
    },
    {
      "epoch": 0.000365380859375,
      "step": 59864,
      "training_step_time": 0.49369239807128906
    },
    {
      "epoch": 0.000365386962890625,
      "model_forward_time": 0.11474895477294922,
      "step": 59865
    },
    {
      "epoch": 0.000365386962890625,
      "step": 59865,
      "training_step_time": 0.44510936737060547
    },
    {
      "epoch": 0.00036539306640625,
      "model_forward_time": 0.11488890647888184,
      "step": 59866
    },
    {
      "epoch": 0.00036539306640625,
      "step": 59866,
      "training_step_time": 0.5509331226348877
    },
    {
      "epoch": 0.000365399169921875,
      "model_forward_time": 0.11517453193664551,
      "step": 59867
    },
    {
      "epoch": 0.000365399169921875,
      "step": 59867,
      "training_step_time": 0.3899707794189453
    },
    {
      "epoch": 0.0003654052734375,
      "model_forward_time": 0.11452531814575195,
      "step": 59868
    },
    {
      "epoch": 0.0003654052734375,
      "step": 59868,
      "training_step_time": 0.3932006359100342
    },
    {
      "epoch": 0.000365411376953125,
      "model_forward_time": 0.1141653060913086,
      "step": 59869
    },
    {
      "epoch": 0.000365411376953125,
      "step": 59869,
      "training_step_time": 0.3916304111480713
    },
    {
      "epoch": 0.00036541748046875,
      "grad_norm": 0.10822374373674393,
      "learning_rate": 1.2834379870818592e-09,
      "loss": 0.0374,
      "step": 59870
    },
    {
      "epoch": 0.00036541748046875,
      "model_forward_time": 0.11503219604492188,
      "step": 59870
    },
    {
      "epoch": 0.00036541748046875,
      "step": 59870,
      "training_step_time": 0.3846898078918457
    },
    {
      "epoch": 0.000365423583984375,
      "model_forward_time": 0.11526274681091309,
      "step": 59871
    },
    {
      "epoch": 0.000365423583984375,
      "step": 59871,
      "training_step_time": 0.3910377025604248
    },
    {
      "epoch": 0.0003654296875,
      "model_forward_time": 0.11509084701538086,
      "step": 59872
    },
    {
      "epoch": 0.0003654296875,
      "step": 59872,
      "training_step_time": 0.5847682952880859
    },
    {
      "epoch": 0.000365435791015625,
      "model_forward_time": 0.11512470245361328,
      "step": 59873
    },
    {
      "epoch": 0.000365435791015625,
      "step": 59873,
      "training_step_time": 0.3885207176208496
    },
    {
      "epoch": 0.00036544189453125,
      "model_forward_time": 0.11439919471740723,
      "step": 59874
    },
    {
      "epoch": 0.00036544189453125,
      "step": 59874,
      "training_step_time": 0.3645358085632324
    },
    {
      "epoch": 0.000365447998046875,
      "model_forward_time": 0.11467576026916504,
      "step": 59875
    },
    {
      "epoch": 0.000365447998046875,
      "step": 59875,
      "training_step_time": 0.4280116558074951
    },
    {
      "epoch": 0.0003654541015625,
      "model_forward_time": 0.11508893966674805,
      "step": 59876
    },
    {
      "epoch": 0.0003654541015625,
      "step": 59876,
      "training_step_time": 0.42690134048461914
    },
    {
      "epoch": 0.000365460205078125,
      "model_forward_time": 0.11454129219055176,
      "step": 59877
    },
    {
      "epoch": 0.000365460205078125,
      "step": 59877,
      "training_step_time": 0.3984718322753906
    },
    {
      "epoch": 0.00036546630859375,
      "model_forward_time": 0.11436891555786133,
      "step": 59878
    },
    {
      "epoch": 0.00036546630859375,
      "step": 59878,
      "training_step_time": 0.529515266418457
    },
    {
      "epoch": 0.000365472412109375,
      "model_forward_time": 0.11423730850219727,
      "step": 59879
    },
    {
      "epoch": 0.000365472412109375,
      "step": 59879,
      "training_step_time": 0.37939882278442383
    },
    {
      "epoch": 0.000365478515625,
      "grad_norm": 0.07955948263406754,
      "learning_rate": 1.0935809887702154e-09,
      "loss": 0.0377,
      "step": 59880
    },
    {
      "epoch": 0.000365478515625,
      "model_forward_time": 0.11505484580993652,
      "step": 59880
    },
    {
      "epoch": 0.000365478515625,
      "step": 59880,
      "training_step_time": 0.39686107635498047
    },
    {
      "epoch": 0.000365484619140625,
      "model_forward_time": 0.11497235298156738,
      "step": 59881
    },
    {
      "epoch": 0.000365484619140625,
      "step": 59881,
      "training_step_time": 0.4007141590118408
    },
    {
      "epoch": 0.00036549072265625,
      "model_forward_time": 0.11604928970336914,
      "step": 59882
    },
    {
      "epoch": 0.00036549072265625,
      "step": 59882,
      "training_step_time": 0.3889169692993164
    },
    {
      "epoch": 0.000365496826171875,
      "model_forward_time": 0.11492609977722168,
      "step": 59883
    },
    {
      "epoch": 0.000365496826171875,
      "step": 59883,
      "training_step_time": 0.3946194648742676
    },
    {
      "epoch": 0.0003655029296875,
      "model_forward_time": 0.11431026458740234,
      "step": 59884
    },
    {
      "epoch": 0.0003655029296875,
      "step": 59884,
      "training_step_time": 0.7051270008087158
    },
    {
      "epoch": 0.000365509033203125,
      "model_forward_time": 0.11476635932922363,
      "step": 59885
    },
    {
      "epoch": 0.000365509033203125,
      "step": 59885,
      "training_step_time": 0.38424086570739746
    },
    {
      "epoch": 0.00036551513671875,
      "model_forward_time": 0.11440277099609375,
      "step": 59886
    },
    {
      "epoch": 0.00036551513671875,
      "step": 59886,
      "training_step_time": 0.4008455276489258
    },
    {
      "epoch": 0.000365521240234375,
      "model_forward_time": 0.11499595642089844,
      "step": 59887
    },
    {
      "epoch": 0.000365521240234375,
      "step": 59887,
      "training_step_time": 0.42658162117004395
    },
    {
      "epoch": 0.00036552734375,
      "model_forward_time": 0.11521267890930176,
      "step": 59888
    },
    {
      "epoch": 0.00036552734375,
      "step": 59888,
      "training_step_time": 0.4889214038848877
    },
    {
      "epoch": 0.000365533447265625,
      "model_forward_time": 0.1146395206451416,
      "step": 59889
    },
    {
      "epoch": 0.000365533447265625,
      "step": 59889,
      "training_step_time": 0.3747239112854004
    },
    {
      "epoch": 0.00036553955078125,
      "grad_norm": 0.07379768788814545,
      "learning_rate": 9.189123380826114e-10,
      "loss": 0.0364,
      "step": 59890
    },
    {
      "epoch": 0.00036553955078125,
      "model_forward_time": 0.1143651008605957,
      "step": 59890
    },
    {
      "epoch": 0.00036553955078125,
      "step": 59890,
      "training_step_time": 0.4775066375732422
    },
    {
      "epoch": 0.000365545654296875,
      "model_forward_time": 0.11467933654785156,
      "step": 59891
    },
    {
      "epoch": 0.000365545654296875,
      "step": 59891,
      "training_step_time": 0.42749905586242676
    },
    {
      "epoch": 0.0003655517578125,
      "model_forward_time": 0.11519575119018555,
      "step": 59892
    },
    {
      "epoch": 0.0003655517578125,
      "step": 59892,
      "training_step_time": 0.400850772857666
    },
    {
      "epoch": 0.000365557861328125,
      "model_forward_time": 0.11479616165161133,
      "step": 59893
    },
    {
      "epoch": 0.000365557861328125,
      "step": 59893,
      "training_step_time": 0.3876688480377197
    },
    {
      "epoch": 0.00036556396484375,
      "model_forward_time": 0.11559700965881348,
      "step": 59894
    },
    {
      "epoch": 0.00036556396484375,
      "step": 59894,
      "training_step_time": 0.38655567169189453
    },
    {
      "epoch": 0.000365570068359375,
      "model_forward_time": 0.1147618293762207,
      "step": 59895
    },
    {
      "epoch": 0.000365570068359375,
      "step": 59895,
      "training_step_time": 0.3832590579986572
    },
    {
      "epoch": 0.000365576171875,
      "model_forward_time": 0.11473774909973145,
      "step": 59896
    },
    {
      "epoch": 0.000365576171875,
      "step": 59896,
      "training_step_time": 0.5022616386413574
    },
    {
      "epoch": 0.000365582275390625,
      "model_forward_time": 0.11475872993469238,
      "step": 59897
    },
    {
      "epoch": 0.000365582275390625,
      "step": 59897,
      "training_step_time": 0.3955085277557373
    },
    {
      "epoch": 0.00036558837890625,
      "model_forward_time": 0.11517882347106934,
      "step": 59898
    },
    {
      "epoch": 0.00036558837890625,
      "step": 59898,
      "training_step_time": 0.44665050506591797
    },
    {
      "epoch": 0.000365594482421875,
      "model_forward_time": 0.11482739448547363,
      "step": 59899
    },
    {
      "epoch": 0.000365594482421875,
      "step": 59899,
      "training_step_time": 0.3964269161224365
    },
    {
      "epoch": 0.0003656005859375,
      "grad_norm": 0.12477221339941025,
      "learning_rate": 7.594320880821571e-10,
      "loss": 0.0385,
      "step": 59900
    },
    {
      "epoch": 0.0003656005859375,
      "model_forward_time": 0.11470603942871094,
      "step": 59900
    },
    {
      "epoch": 0.0003656005859375,
      "step": 59900,
      "training_step_time": 0.51023268699646
    },
    {
      "epoch": 0.000365606689453125,
      "model_forward_time": 0.11415243148803711,
      "step": 59901
    },
    {
      "epoch": 0.000365606689453125,
      "step": 59901,
      "training_step_time": 0.41609716415405273
    },
    {
      "epoch": 0.00036561279296875,
      "model_forward_time": 0.1150200366973877,
      "step": 59902
    },
    {
      "epoch": 0.00036561279296875,
      "step": 59902,
      "training_step_time": 0.5285828113555908
    },
    {
      "epoch": 0.000365618896484375,
      "model_forward_time": 0.11477279663085938,
      "step": 59903
    },
    {
      "epoch": 0.000365618896484375,
      "step": 59903,
      "training_step_time": 0.4284853935241699
    },
    {
      "epoch": 0.000365625,
      "model_forward_time": 0.11492538452148438,
      "step": 59904
    },
    {
      "epoch": 0.000365625,
      "step": 59904,
      "training_step_time": 0.4896202087402344
    },
    {
      "epoch": 0.000365631103515625,
      "model_forward_time": 0.11497354507446289,
      "step": 59905
    },
    {
      "epoch": 0.000365631103515625,
      "step": 59905,
      "training_step_time": 0.49611520767211914
    },
    {
      "epoch": 0.00036563720703125,
      "model_forward_time": 0.1143808364868164,
      "step": 59906
    },
    {
      "epoch": 0.00036563720703125,
      "step": 59906,
      "training_step_time": 0.3828763961791992
    },
    {
      "epoch": 0.000365643310546875,
      "model_forward_time": 0.11468291282653809,
      "step": 59907
    },
    {
      "epoch": 0.000365643310546875,
      "step": 59907,
      "training_step_time": 0.3842203617095947
    },
    {
      "epoch": 0.0003656494140625,
      "model_forward_time": 0.11490035057067871,
      "step": 59908
    },
    {
      "epoch": 0.0003656494140625,
      "step": 59908,
      "training_step_time": 0.396747350692749
    },
    {
      "epoch": 0.000365655517578125,
      "model_forward_time": 0.11468505859375,
      "step": 59909
    },
    {
      "epoch": 0.000365655517578125,
      "step": 59909,
      "training_step_time": 0.40188121795654297
    },
    {
      "epoch": 0.00036566162109375,
      "grad_norm": 0.06885875016450882,
      "learning_rate": 6.151402872134337e-10,
      "loss": 0.0361,
      "step": 59910
    },
    {
      "epoch": 0.00036566162109375,
      "model_forward_time": 0.11506438255310059,
      "step": 59910
    },
    {
      "epoch": 0.00036566162109375,
      "step": 59910,
      "training_step_time": 0.40451741218566895
    },
    {
      "epoch": 0.000365667724609375,
      "model_forward_time": 0.11489248275756836,
      "step": 59911
    },
    {
      "epoch": 0.000365667724609375,
      "step": 59911,
      "training_step_time": 0.3915383815765381
    },
    {
      "epoch": 0.000365673828125,
      "model_forward_time": 0.11565065383911133,
      "step": 59912
    },
    {
      "epoch": 0.000365673828125,
      "step": 59912,
      "training_step_time": 0.3846738338470459
    },
    {
      "epoch": 0.000365679931640625,
      "model_forward_time": 0.1160120964050293,
      "step": 59913
    },
    {
      "epoch": 0.000365679931640625,
      "step": 59913,
      "training_step_time": 0.384033203125
    },
    {
      "epoch": 0.00036568603515625,
      "model_forward_time": 0.11607480049133301,
      "step": 59914
    },
    {
      "epoch": 0.00036568603515625,
      "step": 59914,
      "training_step_time": 0.6973361968994141
    },
    {
      "epoch": 0.000365692138671875,
      "model_forward_time": 0.11431336402893066,
      "step": 59915
    },
    {
      "epoch": 0.000365692138671875,
      "step": 59915,
      "training_step_time": 0.4195442199707031
    },
    {
      "epoch": 0.0003656982421875,
      "model_forward_time": 0.11425161361694336,
      "step": 59916
    },
    {
      "epoch": 0.0003656982421875,
      "step": 59916,
      "training_step_time": 0.40181970596313477
    },
    {
      "epoch": 0.000365704345703125,
      "model_forward_time": 0.1146688461303711,
      "step": 59917
    },
    {
      "epoch": 0.000365704345703125,
      "step": 59917,
      "training_step_time": 0.3943924903869629
    },
    {
      "epoch": 0.00036571044921875,
      "model_forward_time": 0.1147463321685791,
      "step": 59918
    },
    {
      "epoch": 0.00036571044921875,
      "step": 59918,
      "training_step_time": 0.4141068458557129
    },
    {
      "epoch": 0.000365716552734375,
      "model_forward_time": 0.11437773704528809,
      "step": 59919
    },
    {
      "epoch": 0.000365716552734375,
      "step": 59919,
      "training_step_time": 0.4560127258300781
    },
    {
      "epoch": 0.00036572265625,
      "grad_norm": 0.07191029191017151,
      "learning_rate": 4.860369793080466e-10,
      "loss": 0.0328,
      "step": 59920
    },
    {
      "epoch": 0.00036572265625,
      "model_forward_time": 0.11568403244018555,
      "step": 59920
    },
    {
      "epoch": 0.00036572265625,
      "step": 59920,
      "training_step_time": 0.4700908660888672
    },
    {
      "epoch": 0.000365728759765625,
      "model_forward_time": 0.11512637138366699,
      "step": 59921
    },
    {
      "epoch": 0.000365728759765625,
      "step": 59921,
      "training_step_time": 0.39547181129455566
    },
    {
      "epoch": 0.00036573486328125,
      "model_forward_time": 0.11527609825134277,
      "step": 59922
    },
    {
      "epoch": 0.00036573486328125,
      "step": 59922,
      "training_step_time": 0.3914616107940674
    },
    {
      "epoch": 0.000365740966796875,
      "model_forward_time": 0.11528372764587402,
      "step": 59923
    },
    {
      "epoch": 0.000365740966796875,
      "step": 59923,
      "training_step_time": 0.3958885669708252
    },
    {
      "epoch": 0.0003657470703125,
      "model_forward_time": 0.1151278018951416,
      "step": 59924
    },
    {
      "epoch": 0.0003657470703125,
      "step": 59924,
      "training_step_time": 0.391693115234375
    },
    {
      "epoch": 0.000365753173828125,
      "model_forward_time": 0.11530256271362305,
      "step": 59925
    },
    {
      "epoch": 0.000365753173828125,
      "step": 59925,
      "training_step_time": 0.38034939765930176
    },
    {
      "epoch": 0.00036575927734375,
      "model_forward_time": 0.11434268951416016,
      "step": 59926
    },
    {
      "epoch": 0.00036575927734375,
      "step": 59926,
      "training_step_time": 0.7730741500854492
    },
    {
      "epoch": 0.000365765380859375,
      "model_forward_time": 0.11476564407348633,
      "step": 59927
    },
    {
      "epoch": 0.000365765380859375,
      "step": 59927,
      "training_step_time": 0.3773469924926758
    },
    {
      "epoch": 0.000365771484375,
      "model_forward_time": 0.1145780086517334,
      "step": 59928
    },
    {
      "epoch": 0.000365771484375,
      "step": 59928,
      "training_step_time": 0.42476916313171387
    },
    {
      "epoch": 0.000365777587890625,
      "model_forward_time": 0.11520171165466309,
      "step": 59929
    },
    {
      "epoch": 0.000365777587890625,
      "step": 59929,
      "training_step_time": 0.4035501480102539
    },
    {
      "epoch": 0.00036578369140625,
      "grad_norm": 0.08539421111345291,
      "learning_rate": 3.721222035846239e-10,
      "loss": 0.0386,
      "step": 59930
    },
    {
      "epoch": 0.00036578369140625,
      "model_forward_time": 0.11415290832519531,
      "step": 59930
    },
    {
      "epoch": 0.00036578369140625,
      "step": 59930,
      "training_step_time": 0.4664483070373535
    },
    {
      "epoch": 0.000365789794921875,
      "model_forward_time": 0.11416769027709961,
      "step": 59931
    },
    {
      "epoch": 0.000365789794921875,
      "step": 59931,
      "training_step_time": 0.3650062084197998
    },
    {
      "epoch": 0.0003657958984375,
      "model_forward_time": 0.1150972843170166,
      "step": 59932
    },
    {
      "epoch": 0.0003657958984375,
      "step": 59932,
      "training_step_time": 0.5743684768676758
    },
    {
      "epoch": 0.000365802001953125,
      "model_forward_time": 0.11445212364196777,
      "step": 59933
    },
    {
      "epoch": 0.000365802001953125,
      "step": 59933,
      "training_step_time": 0.4387819766998291
    },
    {
      "epoch": 0.00036580810546875,
      "model_forward_time": 0.11468005180358887,
      "step": 59934
    },
    {
      "epoch": 0.00036580810546875,
      "step": 59934,
      "training_step_time": 0.47689390182495117
    },
    {
      "epoch": 0.000365814208984375,
      "model_forward_time": 0.11436200141906738,
      "step": 59935
    },
    {
      "epoch": 0.000365814208984375,
      "step": 59935,
      "training_step_time": 0.38782238960266113
    },
    {
      "epoch": 0.0003658203125,
      "model_forward_time": 0.11483287811279297,
      "step": 59936
    },
    {
      "epoch": 0.0003658203125,
      "step": 59936,
      "training_step_time": 0.40340471267700195
    },
    {
      "epoch": 0.000365826416015625,
      "model_forward_time": 0.11432409286499023,
      "step": 59937
    },
    {
      "epoch": 0.000365826416015625,
      "step": 59937,
      "training_step_time": 0.3955252170562744
    },
    {
      "epoch": 0.00036583251953125,
      "model_forward_time": 0.11541914939880371,
      "step": 59938
    },
    {
      "epoch": 0.00036583251953125,
      "step": 59938,
      "training_step_time": 0.6336309909820557
    },
    {
      "epoch": 0.000365838623046875,
      "model_forward_time": 0.11415553092956543,
      "step": 59939
    },
    {
      "epoch": 0.000365838623046875,
      "step": 59939,
      "training_step_time": 0.3874030113220215
    },
    {
      "epoch": 0.0003658447265625,
      "grad_norm": 0.11951497197151184,
      "learning_rate": 2.7339599464326627e-10,
      "loss": 0.0373,
      "step": 59940
    },
    {
      "epoch": 0.0003658447265625,
      "model_forward_time": 0.11429286003112793,
      "step": 59940
    },
    {
      "epoch": 0.0003658447265625,
      "step": 59940,
      "training_step_time": 0.38887643814086914
    },
    {
      "epoch": 0.000365850830078125,
      "model_forward_time": 0.11448121070861816,
      "step": 59941
    },
    {
      "epoch": 0.000365850830078125,
      "step": 59941,
      "training_step_time": 0.3883323669433594
    },
    {
      "epoch": 0.00036585693359375,
      "model_forward_time": 0.11492753028869629,
      "step": 59942
    },
    {
      "epoch": 0.00036585693359375,
      "step": 59942,
      "training_step_time": 0.4407663345336914
    },
    {
      "epoch": 0.000365863037109375,
      "model_forward_time": 0.11481285095214844,
      "step": 59943
    },
    {
      "epoch": 0.000365863037109375,
      "step": 59943,
      "training_step_time": 0.44823575019836426
    },
    {
      "epoch": 0.000365869140625,
      "model_forward_time": 0.11530613899230957,
      "step": 59944
    },
    {
      "epoch": 0.000365869140625,
      "step": 59944,
      "training_step_time": 0.5533931255340576
    },
    {
      "epoch": 0.000365875244140625,
      "model_forward_time": 0.11548447608947754,
      "step": 59945
    },
    {
      "epoch": 0.000365875244140625,
      "step": 59945,
      "training_step_time": 0.418842077255249
    },
    {
      "epoch": 0.00036588134765625,
      "model_forward_time": 0.11469602584838867,
      "step": 59946
    },
    {
      "epoch": 0.00036588134765625,
      "step": 59946,
      "training_step_time": 0.40502309799194336
    },
    {
      "epoch": 0.000365887451171875,
      "model_forward_time": 0.11483240127563477,
      "step": 59947
    },
    {
      "epoch": 0.000365887451171875,
      "step": 59947,
      "training_step_time": 0.4944441318511963
    },
    {
      "epoch": 0.0003658935546875,
      "model_forward_time": 0.1151437759399414,
      "step": 59948
    },
    {
      "epoch": 0.0003658935546875,
      "step": 59948,
      "training_step_time": 0.3993709087371826
    },
    {
      "epoch": 0.000365899658203125,
      "model_forward_time": 0.11504173278808594,
      "step": 59949
    },
    {
      "epoch": 0.000365899658203125,
      "step": 59949,
      "training_step_time": 0.3989450931549072
    },
    {
      "epoch": 0.00036590576171875,
      "grad_norm": 0.07962308824062347,
      "learning_rate": 1.8985838248219978e-10,
      "loss": 0.0362,
      "step": 59950
    },
    {
      "epoch": 0.00036590576171875,
      "model_forward_time": 0.11446452140808105,
      "step": 59950
    },
    {
      "epoch": 0.00036590576171875,
      "step": 59950,
      "training_step_time": 0.6267826557159424
    },
    {
      "epoch": 0.000365911865234375,
      "model_forward_time": 0.11444830894470215,
      "step": 59951
    },
    {
      "epoch": 0.000365911865234375,
      "step": 59951,
      "training_step_time": 0.39108705520629883
    },
    {
      "epoch": 0.00036591796875,
      "model_forward_time": 0.1141669750213623,
      "step": 59952
    },
    {
      "epoch": 0.00036591796875,
      "step": 59952,
      "training_step_time": 0.3945953845977783
    },
    {
      "epoch": 0.000365924072265625,
      "model_forward_time": 0.11438584327697754,
      "step": 59953
    },
    {
      "epoch": 0.000365924072265625,
      "step": 59953,
      "training_step_time": 0.3953721523284912
    },
    {
      "epoch": 0.00036593017578125,
      "model_forward_time": 0.11495804786682129,
      "step": 59954
    },
    {
      "epoch": 0.00036593017578125,
      "step": 59954,
      "training_step_time": 0.38762497901916504
    },
    {
      "epoch": 0.000365936279296875,
      "model_forward_time": 0.11496329307556152,
      "step": 59955
    },
    {
      "epoch": 0.000365936279296875,
      "step": 59955,
      "training_step_time": 0.38388609886169434
    },
    {
      "epoch": 0.0003659423828125,
      "model_forward_time": 0.11421680450439453,
      "step": 59956
    },
    {
      "epoch": 0.0003659423828125,
      "step": 59956,
      "training_step_time": 0.5870592594146729
    },
    {
      "epoch": 0.000365948486328125,
      "model_forward_time": 0.11511421203613281,
      "step": 59957
    },
    {
      "epoch": 0.000365948486328125,
      "step": 59957,
      "training_step_time": 0.44005703926086426
    },
    {
      "epoch": 0.00036595458984375,
      "model_forward_time": 0.1154775619506836,
      "step": 59958
    },
    {
      "epoch": 0.00036595458984375,
      "step": 59958,
      "training_step_time": 0.39577341079711914
    },
    {
      "epoch": 0.000365960693359375,
      "model_forward_time": 0.11521029472351074,
      "step": 59959
    },
    {
      "epoch": 0.000365960693359375,
      "step": 59959,
      "training_step_time": 0.3792552947998047
    },
    {
      "epoch": 0.000365966796875,
      "grad_norm": 0.10685218870639801,
      "learning_rate": 1.2150939247002058e-10,
      "loss": 0.0334,
      "step": 59960
    },
    {
      "epoch": 0.000365966796875,
      "model_forward_time": 0.1147310733795166,
      "step": 59960
    },
    {
      "epoch": 0.000365966796875,
      "step": 59960,
      "training_step_time": 0.4083719253540039
    },
    {
      "epoch": 0.000365972900390625,
      "model_forward_time": 0.11480093002319336,
      "step": 59961
    },
    {
      "epoch": 0.000365972900390625,
      "step": 59961,
      "training_step_time": 0.41300010681152344
    },
    {
      "epoch": 0.00036597900390625,
      "model_forward_time": 0.1153421401977539,
      "step": 59962
    },
    {
      "epoch": 0.00036597900390625,
      "step": 59962,
      "training_step_time": 0.5309298038482666
    },
    {
      "epoch": 0.000365985107421875,
      "model_forward_time": 0.11543941497802734,
      "step": 59963
    },
    {
      "epoch": 0.000365985107421875,
      "step": 59963,
      "training_step_time": 0.3816962242126465
    },
    {
      "epoch": 0.0003659912109375,
      "model_forward_time": 0.11460304260253906,
      "step": 59964
    },
    {
      "epoch": 0.0003659912109375,
      "step": 59964,
      "training_step_time": 0.39263057708740234
    },
    {
      "epoch": 0.000365997314453125,
      "model_forward_time": 0.1150970458984375,
      "step": 59965
    },
    {
      "epoch": 0.000365997314453125,
      "step": 59965,
      "training_step_time": 0.3917672634124756
    },
    {
      "epoch": 0.00036600341796875,
      "model_forward_time": 0.11482572555541992,
      "step": 59966
    },
    {
      "epoch": 0.00036600341796875,
      "step": 59966,
      "training_step_time": 0.38524794578552246
    },
    {
      "epoch": 0.000366009521484375,
      "model_forward_time": 0.11536788940429688,
      "step": 59967
    },
    {
      "epoch": 0.000366009521484375,
      "step": 59967,
      "training_step_time": 0.38700056076049805
    },
    {
      "epoch": 0.000366015625,
      "model_forward_time": 0.11467480659484863,
      "step": 59968
    },
    {
      "epoch": 0.000366015625,
      "step": 59968,
      "training_step_time": 0.784912109375
    },
    {
      "epoch": 0.000366021728515625,
      "model_forward_time": 0.1141209602355957,
      "step": 59969
    },
    {
      "epoch": 0.000366021728515625,
      "step": 59969,
      "training_step_time": 0.4288475513458252
    },
    {
      "epoch": 0.00036602783203125,
      "grad_norm": 0.07514119148254395,
      "learning_rate": 6.834904537900144e-11,
      "loss": 0.0333,
      "step": 59970
    },
    {
      "epoch": 0.00036602783203125,
      "model_forward_time": 0.11429286003112793,
      "step": 59970
    },
    {
      "epoch": 0.00036602783203125,
      "step": 59970,
      "training_step_time": 0.4174377918243408
    },
    {
      "epoch": 0.000366033935546875,
      "model_forward_time": 0.11429595947265625,
      "step": 59971
    },
    {
      "epoch": 0.000366033935546875,
      "step": 59971,
      "training_step_time": 0.39843320846557617
    },
    {
      "epoch": 0.0003660400390625,
      "model_forward_time": 0.11437439918518066,
      "step": 59972
    },
    {
      "epoch": 0.0003660400390625,
      "step": 59972,
      "training_step_time": 0.3823835849761963
    },
    {
      "epoch": 0.000366046142578125,
      "model_forward_time": 0.11422586441040039,
      "step": 59973
    },
    {
      "epoch": 0.000366046142578125,
      "step": 59973,
      "training_step_time": 0.39687323570251465
    },
    {
      "epoch": 0.00036605224609375,
      "model_forward_time": 0.11523127555847168,
      "step": 59974
    },
    {
      "epoch": 0.00036605224609375,
      "step": 59974,
      "training_step_time": 0.5252127647399902
    },
    {
      "epoch": 0.000366058349609375,
      "model_forward_time": 0.11539363861083984,
      "step": 59975
    },
    {
      "epoch": 0.000366058349609375,
      "step": 59975,
      "training_step_time": 0.4297504425048828
    },
    {
      "epoch": 0.000366064453125,
      "model_forward_time": 0.11529111862182617,
      "step": 59976
    },
    {
      "epoch": 0.000366064453125,
      "step": 59976,
      "training_step_time": 0.5116150379180908
    },
    {
      "epoch": 0.000366070556640625,
      "model_forward_time": 0.11482691764831543,
      "step": 59977
    },
    {
      "epoch": 0.000366070556640625,
      "step": 59977,
      "training_step_time": 0.3840363025665283
    },
    {
      "epoch": 0.00036607666015625,
      "model_forward_time": 0.11527705192565918,
      "step": 59978
    },
    {
      "epoch": 0.00036607666015625,
      "step": 59978,
      "training_step_time": 0.37909722328186035
    },
    {
      "epoch": 0.000366082763671875,
      "model_forward_time": 0.11547040939331055,
      "step": 59979
    },
    {
      "epoch": 0.000366082763671875,
      "step": 59979,
      "training_step_time": 0.3865234851837158
    },
    {
      "epoch": 0.0003660888671875,
      "grad_norm": 0.08741237223148346,
      "learning_rate": 3.037735734623404e-11,
      "loss": 0.0334,
      "step": 59980
    },
    {
      "epoch": 0.0003660888671875,
      "model_forward_time": 0.11460447311401367,
      "step": 59980
    },
    {
      "epoch": 0.0003660888671875,
      "step": 59980,
      "training_step_time": 0.5316932201385498
    },
    {
      "epoch": 0.000366094970703125,
      "model_forward_time": 0.11455965042114258,
      "step": 59981
    },
    {
      "epoch": 0.000366094970703125,
      "step": 59981,
      "training_step_time": 0.3875882625579834
    },
    {
      "epoch": 0.00036610107421875,
      "model_forward_time": 0.11505937576293945,
      "step": 59982
    },
    {
      "epoch": 0.00036610107421875,
      "step": 59982,
      "training_step_time": 0.388225793838501
    },
    {
      "epoch": 0.000366107177734375,
      "model_forward_time": 0.11508440971374512,
      "step": 59983
    },
    {
      "epoch": 0.000366107177734375,
      "step": 59983,
      "training_step_time": 0.3877718448638916
    },
    {
      "epoch": 0.00036611328125,
      "model_forward_time": 0.1152346134185791,
      "step": 59984
    },
    {
      "epoch": 0.00036611328125,
      "step": 59984,
      "training_step_time": 0.44518089294433594
    },
    {
      "epoch": 0.000366119384765625,
      "model_forward_time": 0.11485171318054199,
      "step": 59985
    },
    {
      "epoch": 0.000366119384765625,
      "step": 59985,
      "training_step_time": 0.4923126697540283
    },
    {
      "epoch": 0.00036612548828125,
      "model_forward_time": 0.11509251594543457,
      "step": 59986
    },
    {
      "epoch": 0.00036612548828125,
      "step": 59986,
      "training_step_time": 0.4940955638885498
    },
    {
      "epoch": 0.000366131591796875,
      "model_forward_time": 0.11492085456848145,
      "step": 59987
    },
    {
      "epoch": 0.000366131591796875,
      "step": 59987,
      "training_step_time": 0.3678574562072754
    },
    {
      "epoch": 0.0003661376953125,
      "model_forward_time": 0.11634588241577148,
      "step": 59988
    },
    {
      "epoch": 0.0003661376953125,
      "step": 59988,
      "training_step_time": 0.44639039039611816
    },
    {
      "epoch": 0.000366143798828125,
      "model_forward_time": 0.11652541160583496,
      "step": 59989
    },
    {
      "epoch": 0.000366143798828125,
      "step": 59989,
      "training_step_time": 0.5073235034942627
    },
    {
      "epoch": 0.00036614990234375,
      "grad_norm": 0.08541887253522873,
      "learning_rate": 7.594339912486703e-12,
      "loss": 0.0343,
      "step": 59990
    },
    {
      "epoch": 0.00036614990234375,
      "model_forward_time": 0.11916232109069824,
      "step": 59990
    },
    {
      "epoch": 0.00036614990234375,
      "step": 59990,
      "training_step_time": 0.6251339912414551
    },
    {
      "epoch": 0.000366156005859375,
      "model_forward_time": 0.1197965145111084,
      "step": 59991
    },
    {
      "epoch": 0.000366156005859375,
      "step": 59991,
      "training_step_time": 0.5600154399871826
    },
    {
      "epoch": 0.000366162109375,
      "model_forward_time": 0.11943197250366211,
      "step": 59992
    },
    {
      "epoch": 0.000366162109375,
      "step": 59992,
      "training_step_time": 0.6533265113830566
    },
    {
      "epoch": 0.000366168212890625,
      "model_forward_time": 0.11741757392883301,
      "step": 59993
    },
    {
      "epoch": 0.000366168212890625,
      "step": 59993,
      "training_step_time": 0.6567552089691162
    },
    {
      "epoch": 0.00036617431640625,
      "model_forward_time": 0.12022042274475098,
      "step": 59994
    },
    {
      "epoch": 0.00036617431640625,
      "step": 59994,
      "training_step_time": 0.694582462310791
    },
    {
      "epoch": 0.000366180419921875,
      "model_forward_time": 0.11565876007080078,
      "step": 59995
    },
    {
      "epoch": 0.000366180419921875,
      "step": 59995,
      "training_step_time": 0.7343173027038574
    },
    {
      "epoch": 0.0003661865234375,
      "model_forward_time": 0.11886286735534668,
      "step": 59996
    },
    {
      "epoch": 0.0003661865234375,
      "step": 59996,
      "training_step_time": 0.7074971199035645
    },
    {
      "epoch": 0.000366192626953125,
      "model_forward_time": 0.1197054386138916,
      "step": 59997
    },
    {
      "epoch": 0.000366192626953125,
      "step": 59997,
      "training_step_time": 0.6200370788574219
    },
    {
      "epoch": 0.00036619873046875,
      "model_forward_time": 0.12000799179077148,
      "step": 59998
    },
    {
      "epoch": 0.00036619873046875,
      "step": 59998,
      "training_step_time": 0.6936919689178467
    },
    {
      "epoch": 0.000366204833984375,
      "model_forward_time": 0.11726641654968262,
      "step": 59999
    },
    {
      "epoch": 0.000366204833984375,
      "step": 59999,
      "training_step_time": 0.7790775299072266
    },
    {
      "epoch": 0.0003662109375,
      "grad_norm": 0.09518105536699295,
      "learning_rate": 0.0,
      "loss": 0.0327,
      "step": 60000
    }
  ],
  "logging_steps": 10,
  "max_steps": 60000,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 1,
  "save_steps": 1000,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": true
      },
      "attributes": {}
    }
  },
  "total_flos": 0.0,
  "train_batch_size": 32,
  "trial_name": null,
  "trial_params": null
}
